From d2d56f38da01001c92a09afc6b52b5acbd9bc13c Mon Sep 17 00:00:00 2001 From: tappro Date: Mon, 30 Jul 2007 21:08:59 +0000 Subject: [PATCH] - make HEAD from b_post_cmd3 --- lustre/ChangeLog | 433 +- lustre/Makefile.in | 7 +- lustre/autoMakefile.am | 16 +- lustre/autoconf/Makefile.am | 2 +- lustre/autoconf/kerberos5.m4 | 105 + lustre/autoconf/lustre-core.m4 | 470 +- lustre/autoconf/lustre-version.ac | 4 +- lustre/{ldiskfs => cmm}/.cvsignore | 8 +- lustre/cmm/Makefile.in | 6 + lustre/cmm/autoMakefile.am | 11 + lustre/cmm/cmm_device.c | 534 + lustre/cmm/cmm_internal.h | 235 + lustre/cmm/cmm_lproc.c | 108 + lustre/cmm/cmm_object.c | 1263 + lustre/cmm/cmm_split.c | 731 + lustre/cmm/mdc_device.c | 347 + lustre/cmm/mdc_internal.h | 109 + lustre/cmm/mdc_object.c | 579 + lustre/contrib/mpich2-1.0.3.patch | 1831 + lustre/doc/lustre.7 | 2 +- lustre/doc/mkfs.lustre.8 | 12 +- lustre/{ldiskfs2 => fid}/.cvsignore | 8 +- lustre/fid/Makefile.in | 4 + lustre/fid/autoMakefile.am | 20 + lustre/fid/fid_handler.c | 601 + lustre/fid/fid_internal.h | 80 + lustre/fid/fid_lib.c | 141 + lustre/fid/fid_request.c | 389 + lustre/fid/fid_store.c | 182 + lustre/fid/lproc_fid.c | 343 + lustre/fld/.cvsignore | 15 + lustre/fld/Makefile.in | 6 + lustre/fld/autoMakefile.am | 18 + lustre/fld/fld_cache.c | 442 + lustre/fld/fld_handler.c | 428 + lustre/fld/fld_index.c | 206 + lustre/fld/fld_internal.h | 125 + lustre/fld/fld_request.c | 635 + lustre/fld/lproc_fld.c | 153 + lustre/include/Makefile.am | 10 +- lustre/include/dt_object.h | 506 + lustre/include/liblustre.h | 149 +- lustre/include/linux/Makefile.am | 11 +- lustre/include/linux/lustre_acl.h | 39 + lustre/include/linux/lustre_compat25.h | 206 +- lustre/include/linux/lustre_fsfilt.h | 31 +- lustre/include/linux/lustre_handles.h | 12 +- lustre/include/linux/lustre_intent.h | 18 +- lustre/include/linux/lustre_lite.h | 6 +- lustre/include/linux/lustre_mds.h | 6 +- lustre/include/linux/lustre_patchless_compat.h | 33 +- lustre/include/linux/lvfs.h | 16 +- lustre/include/linux/lvfs_linux.h | 2 + lustre/include/linux/obd.h | 4 +- lustre/include/linux/obd_class.h | 3 +- lustre/include/linux/obd_support.h | 23 +- lustre/include/lprocfs_status.h | 6 +- lustre/include/lu_object.h | 1074 + lustre/include/lu_time.h | 47 + lustre/include/lustre/Makefile.am | 4 +- lustre/include/lustre/libiam.h | 127 + lustre/include/lustre/liblustreapi.h | 4 +- lustre/include/lustre/lustre_idl.h | 753 +- lustre/include/lustre/lustre_user.h | 55 +- lustre/include/lustre_capa.h | 272 + lustre/include/lustre_cfg.h | 8 + lustre/include/lustre_disk.h | 19 +- lustre/include/lustre_dlm.h | 70 +- lustre/include/lustre_export.h | 33 +- lustre/include/lustre_fid.h | 263 + lustre/include/lustre_fld.h | 237 + lustre/include/lustre_ha.h | 3 + lustre/include/lustre_handles.h | 10 + lustre/include/lustre_import.h | 6 +- lustre/include/lustre_lib.h | 60 +- lustre/include/lustre_lite.h | 2 +- lustre/include/lustre_log.h | 31 +- lustre/include/lustre_mdc.h | 80 + lustre/include/lustre_mds.h | 151 +- lustre/include/lustre_mdt.h | 85 + lustre/include/lustre_net.h | 188 +- lustre/include/lustre_param.h | 4 + lustre/include/lustre_quota.h | 6 +- lustre/include/lustre_req_layout.h | 183 + lustre/include/lustre_sec.h | 572 + lustre/include/lustre_ucache.h | 72 +- lustre/include/lustre_ver.h.in | 1 - lustre/include/md_object.h | 658 + lustre/include/obd.h | 440 +- lustre/include/obd_class.h | 870 +- lustre/include/obd_ost.h | 14 +- lustre/include/obd_support.h | 267 +- .../config-linux-2.4.18-p4smp-61chaos | 1035 - .../kernel_configs/config-linux-2.4.20-i386-rh | 1849 - .../kernel-2.4.20-hp_pnnl-2.4-ia64-smp.config | 1047 - .../kernel-2.4.20-hp_pnnl-2.4-ia64.config | 1047 - .../kernel-2.4.20-rh-2.4-i686-smp.config | 1866 - .../kernel-2.4.20-rh-2.4-i686.config | 1866 - .../kernel-2.4.21-rhel-2.4-i686-smp.config | 2139 - .../kernel-2.4.21-rhel-2.4-i686.config | 2139 - .../kernel-2.4.21-rhel-2.4-ia64-smp.config | 1451 - .../kernel-2.4.21-rhel-2.4-ia64.config | 1451 - .../kernel-2.4.21-rhel-2.4-x86_64-smp.config | 1787 - .../kernel-2.4.21-rhel-2.4-x86_64.config | 1787 - .../kernel-2.4.21-sles-2.4-i686-smp.config | 2383 - .../kernel-2.4.21-sles-2.4-i686.config | 2383 - .../kernel-2.4.21-suse-2.4.21-2-x86_64.config | 2042 - .../kernel-2.6.16-2.6-sles10-i686-bigsmp.config | 140 +- .../kernel-2.6.16-2.6-sles10-i686.config | 140 +- .../kernel-2.6.16-2.6-sles10-x86_64-smp.config | 134 +- .../kernel-2.6.16-2.6-sles10-x86_64.config | 134 +- .../kernel-2.6.5-2.6-suse-x86_64-smp.config | 2 +- .../kernel-2.6.9-2.6-rhel4-i686-smp.config | 56 +- .../kernel-2.6.9-2.6-rhel4-i686.config | 80 +- .../kernel-2.6.9-2.6-rhel4-ia64-smp.config | 51 +- .../kernel-2.6.9-2.6-rhel4-ia64.config | 51 +- .../kernel-2.6.9-2.6-rhel4-x86_64-smp.config | 69 +- .../kernel-2.6.9-2.6-rhel4-x86_64.config | 55 +- .../kernel_configs/uml-vanilla-2.4.24.config | 413 - .../kernel_patches/patches/2.6-rhel4-kgdb-ga.patch | 9445 +- .../3.5G-address-space-2.4.22-vanilla.patch | 352 - .../patches/__find_get_block_slow-scale.patch | 43 + .../kernel_patches/patches/add_page_private.patch | 23 - .../patches/atomic_add_return-sles9.patch | 104 + .../patches/blkdev_tunables-2.4.21-chaos.patch | 52 - .../patches/blkdev_tunables-2.6-sles10.patch | 13 + .../patches/compile-fixes-2.4.21-rhel.patch | 90 - .../patches/configurable-x86-stack-2.4.20.patch | 318 - .../configurable-x86-stack-2.4.21-chaos.patch | 468 - .../configurable-x86-stack-2.4.21-suse-171.patch | 317 - .../configurable-x86-stack-2.4.21-suse2.patch | 318 - .../patches/configurable-x86_64-2.4.21.patch | 122 - .../patches/dcache_refcount_debug.patch | 24 - .../patches/debugging-fields-in-current.patch | 14 + .../patches/dev_read_only-2.6-fc5.patch | 45 +- .../patches/dev_read_only-2.6-suse.patch | 53 +- .../patches/dev_read_only-2.6.18-vanilla.patch | 41 +- .../patches/dev_read_only_2.4.20-rh.patch | 125 - .../patches/dev_read_only_2.4.21-chaos.patch | 122 - .../patches/dynamic-locks-2.6-fc3.patch | 288 + .../patches/dynamic-locks-2.6.9.patch | 311 + lustre/kernel_patches/patches/elevator-cfq.patch | 20 - lustre/kernel_patches/patches/export-2.6-fc5.patch | 12 + .../patches/export-ext3-2.6-rhel4.patch | 33 - .../patches/export-ext3-2.6-suse.patch | 33 - .../patches/export-nr_free_buffer_pages.patch | 12 + .../patches/export-show_task-2.4-cray.patch | 33 - .../patches/export-show_task-2.4-rh.patch | 171 - .../patches/export-show_task-2.4-rhel.patch | 20 - .../patches/export-show_task-2.4-vanilla.patch | 34 - .../kernel_patches/patches/export-truncate.patch | 35 - .../patches/export-zap-page-range.patch | 12 - .../patches/export_num_siblings.patch | 10 - .../patches/export_symbol_numa-2.6.18.patch | 24 + .../patches/export_symbols-2.6-rhel4.patch | 13 + .../patches/export_symbols-2.6.12.patch | 13 + .../patches/export_symbols-ext3-2.6-suse.patch | 17 - .../patches/exports-2.4.21-chaos.patch | 59 - .../patches/exports_2.4.19-suse.patch | 53 - .../patches/exports_2.4.19-suse2.patch | 59 - .../patches/exports_2.4.20-rh-hp.patch | 53 - .../kernel_patches/patches/ext-2.4-patch-1.patch | 2536 - .../kernel_patches/patches/ext-2.4-patch-2.patch | 34 - .../kernel_patches/patches/ext-2.4-patch-3.patch | 96 - .../kernel_patches/patches/ext-2.4-patch-4.patch | 52 - lustre/kernel_patches/patches/ext3-2.4-ino_t.patch | 144 - .../kernel_patches/patches/ext3-2.4.20-fixes.patch | 118 - .../patches/ext3-check-jbd-errors-2.6-sles10.patch | 83 + .../patches/ext3-check-jbd-errors-2.6.5.patch | 113 - .../patches/ext3-check-jbd-errors-2.6.9.patch | 113 - .../patches/ext3-delete_thread-2.4.20-hp.patch | 499 - .../patches/ext3-delete_thread-2.4.21-chaos.patch | 449 - .../ext3-delete_thread-2.4.21-suse-171.patch | 496 - .../patches/ext3-delete_thread-2.4.24.patch | 449 - .../patches/ext3-delete_thread-2.4.29.patch | 442 - ...3-disable-write-bar-by-default-2.6-sles10.patch | 15 - .../patches/ext3-ea-in-inode-2.4.20.patch | 747 - .../patches/ext3-ea-in-inode-2.4.21-chaos.patch | 758 - .../patches/ext3-ea-in-inode-2.4.21-sles.patch | 758 - .../patches/ext3-ea-in-inode-2.4.21-suse2.patch | 758 - .../patches/ext3-ea-in-inode-2.4.22-rh.patch | 755 - .../patches/ext3-ea-in-inode-2.4.29.patch | 731 - .../patches/ext3-ea-in-inode-2.6-rhel4.patch | 840 - .../patches/ext3-ea-in-inode-2.6-suse.patch | 840 - .../kernel_patches/patches/ext3-error-export.patch | 16 - .../patches/ext3-extents-2.4.21-chaos.patch | 2877 - .../patches/ext3-extents-2.4.21-suse2.patch | 2875 - .../patches/ext3-extents-2.4.24.patch | 2863 - .../patches/ext3-extents-2.4.29.patch | 2858 - .../patches/ext3-extents-2.6.12.patch | 2940 - .../patches/ext3-extents-2.6.15.patch | 2947 - .../patches/ext3-extents-2.6.16-sles10.patch | 2947 - .../patches/ext3-extents-2.6.18-vanilla.patch | 2950 - .../patches/ext3-extents-2.6.5.patch | 2951 - .../patches/ext3-extents-2.6.9-rhel4.patch | 2925 - .../ext3-extents-asyncdel-2.4.21-chaos.patch | 31 - .../patches/ext3-extents-asyncdel-2.4.24.patch | 31 - .../patches/ext3-extents-bug11324.patch | 252 - .../patches/ext3-extents-fixes-2.6.9-rhel4.patch | 86 + ...t3-extents-multiblock-directio-2.6.5-suse.patch | 157 + ...3-extents-multiblock-directio-2.6.9-rhel4.patch | 149 + .../patches/ext3-extents-search-2.6.9-rhel4.patch | 168 + .../patches/ext3-external-journal-2.6.12.patch | 148 - .../patches/ext3-external-journal-2.6.9.patch | 150 - .../patches/ext3-filterdata-2.6.15.patch | 25 - .../patches/ext3-filterdata-sles10.patch | 25 + .../patches/ext3-htree-2.4.21-chaos.patch | 2593 - .../patches/ext3-htree-2.4.21-rhel.patch | 2531 - .../patches/ext3-htree-2.4.22-rh.patch | 2581 - .../kernel_patches/patches/ext3-htree-2.4.29.patch | 2496 - .../patches/ext3-htree-dot-2.6.5-suse.patch | 23 - .../patches/ext3-htree-dot-2.6.patch | 23 - .../patches/ext3-htree-path-ops.patch | 894 - .../patches/ext3-ialloc-2.4.21-suse2.patch | 237 - .../patches/ext3-ialloc-2.4.24.patch | 238 - .../kernel_patches/patches/ext3-ialloc-2.6.patch | 128 - .../patches/ext3-include-fixes-2.6-rhel4.patch | 20 - .../patches/ext3-include-fixes-2.6-suse.patch | 20 - .../patches/ext3-ino_sb_macro-2.4.21-chaos.patch | 1514 - .../patches/ext3-inode-version-2.6-sles10.patch | 426 + .../ext3-inode-version-2.6.18-vanilla.patch | 426 + lustre/kernel_patches/patches/ext3-largefile.patch | 16 - .../patches/ext3-lookup-dotdot-2.4.20.patch | 63 - .../patches/ext3-lookup-dotdot-2.6.9.patch | 63 - .../patches/ext3-map_inode_page-2.4.21-suse2.patch | 119 - .../patches/ext3-map_inode_page-2.6-suse.patch | 86 - .../patches/ext3-map_inode_page.patch | 110 - .../patches/ext3-map_inode_page_2.4.18.patch | 110 - .../patches/ext3-mballoc2-2.6-fc5.patch | 3105 - .../patches/ext3-mballoc2-2.6-suse.patch | 3111 - .../patches/ext3-mballoc2-2.6.12.patch | 3105 - .../patches/ext3-mballoc2-2.6.18-vanilla.patch | 3140 - .../patches/ext3-mballoc2-2.6.9-rhel4.patch | 3124 - .../patches/ext3-mballoc3-core.patch | 4528 + .../patches/ext3-mballoc3-rhel4.patch | 396 + .../patches/ext3-mballoc3-sles10.patch | 377 + .../patches/ext3-mballoc3-suse.patch | 397 + .../ext3-multi-mount-protection-2.6-fc5.patch | 381 - ...xt3-multi-mount-protection-2.6.18-vanilla.patch | 381 - .../patches/ext3-nanosecond-2.6-rhel4.patch | 401 - .../patches/ext3-nanosecond-2.6-sles10.patch | 404 - .../patches/ext3-nanosecond-2.6-suse.patch | 195 - .../patches/ext3-nanosecond-2.6.18-vanilla.patch | 403 - .../patches/ext3-nlinks-2.4.20-hp_pnnl.patch | 172 - .../patches/ext3-nlinks-2.4.21-chaos.patch | 172 - .../patches/ext3-nlinks-2.4.24.patch | 172 - .../kernel_patches/patches/ext3-nlinks-2.6.7.patch | 156 - .../kernel_patches/patches/ext3-nlinks-2.6.9.patch | 158 - .../patches/ext3-no-write-super-chaos.patch | 15 - .../patches/ext3-noread-2.4.20.patch | 218 - .../patches/ext3-noread-2.4.21-chaos.patch | 223 - .../patches/ext3-noread-2.4.21-suse2.patch | 218 - .../patches/ext3-o_direct-2.4.21-chaos.patch | 23 - .../patches/ext3-orphan_lock-2.4.22-rh.patch | 82 - .../kernel_patches/patches/ext3-orphan_lock.patch | 79 - .../kernel_patches/patches/ext3-raw-lookup.patch | 61 - .../ext3-remove-cond_resched-calls-2.6.12.patch | 29 - .../patches/ext3-rename-reserve-2.6-suse.patch | 263 - .../kernel_patches/patches/ext3-san-2.4.20.patch | 117 - .../patches/ext3-san-jdike-2.6-suse.patch | 106 - .../patches/ext3-sector_t-overflow-2.4.patch | 41 - .../patches/ext3-sector_t-overflow-2.6.12.patch | 64 - .../ext3-sector_t-overflow-2.6.5-suse.patch | 44 - .../ext3-sector_t-overflow-2.6.9-rhel4.patch | 64 - .../patches/ext3-statfs-2.6.12.patch | 177 - .../patches/ext3-truncate-buffer-head.patch | 10 - .../patches/ext3-truncate_blocks.patch | 92 - .../patches/ext3-trusted_ea-2.4.20.patch | 180 - .../patches/ext3-uninit-2.6-sles10.patch | 674 + .../patches/ext3-uninit-2.6-suse.patch | 653 + .../kernel_patches/patches/ext3-uninit-2.6.9.patch | 779 - .../patches/ext3-use-after-free.patch | 53 - .../patches/ext3-wantedi-2.6-rhel4.patch | 193 - .../patches/ext3-wantedi-2.6-suse.patch | 192 - .../patches/ext3-wantedi-2.6.15.patch | 174 - .../patches/ext3-wantedi-misc-2.6-suse.patch | 16 + .../patches/ext3-wantedi-misc-2.6.18-vanilla.patch | 16 + .../patches/ext3-xattr-ptr-arith-fix.patch | 18 - .../patches/extN-2.4.18-ino_sb_fixup.patch | 33 - .../kernel_patches/patches/extN-misc-fixup.patch | 20 - .../patches/extN-wantedi-2.4.21-chaos.patch | 213 - .../patches/extN-wantedi-2.4.21-suse2.patch | 226 - lustre/kernel_patches/patches/extN-wantedi.patch | 213 - .../patches/fsprivate-2.4-suse.patch | 10 - lustre/kernel_patches/patches/fsprivate-2.4.patch | 10 - .../patches/gfp_debug-2.4.21-rhel.patch | 77 - .../grab_cache_page_nowait_gfp-2.4.21-suse2.patch | 85 - .../grab_cache_page_nowait_gfp-rh-2.4.patch | 65 - .../patches/highmem-split-2.6-rhel4.patch | 96 + lustre/kernel_patches/patches/i_filter_data.patch | 12 + .../patches/increase-BH_LRU_SIZE.patch | 13 + .../patches/inode-max-readahead-2.4.24.patch | 22 - .../patches/inode-nr_unused-2.6.9-rhel4.patch | 29 +- .../patches/invalidate_show-2.4.20-hp.patch | 123 - .../patches/invalidate_show-2.4.20-rh.patch | 114 - .../patches/invalidate_show-2.4.29.patch | 107 - .../kernel_patches/patches/invalidate_show.patch | 112 - .../patches/iod-rmap-exports-2.4.21-chaos.patch | 94 - .../patches/iod-stock-24-exports_hp.patch | 48 - .../patches/iod-stock-exports-2.4.22.patch | 52 - lustre/kernel_patches/patches/iopen-2.4.20.patch | 495 - .../patches/iopen-2.4.21-chaos.patch | 497 - lustre/kernel_patches/patches/iopen-2.6-fc5.patch | 448 - .../kernel_patches/patches/iopen-2.6-rhel4.patch | 471 - lustre/kernel_patches/patches/iopen-2.6-suse.patch | 472 - lustre/kernel_patches/patches/iopen-2.6.12.patch | 471 - lustre/kernel_patches/patches/ipoib_tcpdump.patch | 18 + .../patches/jbd-checkpoint-on-commit.patch | 138 + .../patches/jbd-commit-tricks-rhel3.patch | 132 - .../kernel_patches/patches/jbd-commit-tricks.patch | 132 - .../patches/jbd-copy-out-everything.patch | 52 + lustre/kernel_patches/patches/jbd-ctx_switch.patch | 13 - .../patches/jbd-dont-account-blocks-twice.patch | 17 - lustre/kernel_patches/patches/jbd-flushtime.patch | 34 - .../patches/jbd-get_write_access.patch | 56 - .../kernel_patches/patches/jbd-stats-2.6.9.patch | 199 +- .../kernel_patches/patches/kallsyms-2.4.29.patch | 689 - .../patches/kjournald_affinity.patch | 52 - .../patches/linux-2.4.20-xattr-0.8.54-hp.patch | 4875 - .../patches/linux-2.4.21-xattr-0.8.54-chaos.patch | 2172 - .../linux-2.4.21-xattr-0.8.54-suse-171.patch | 276 - .../patches/linux-2.4.21-xattr-0.8.54-suse2.patch | 258 - .../linux-2.4.24-jbd-handle-EIO-rhel3.patch | 23 - .../patches/linux-2.4.24-jbd-handle-EIO.patch | 51 - .../patches/linux-2.4.24-xattr-0.8.54.patch | 5474 -- .../patches/linux-2.4.29-xattr-0.8.54.patch | 5362 - .../linux-2.6.9-network_driver-for-sk98.patch | 39833 ++++++++ lustre/kernel_patches/patches/listman-2.4.20.patch | 22 - .../patches/listman-2.4.21-chaos.patch | 26 - .../llnl-frame-pointer-walk-2.4.21-rhel.patch | 120 - .../llnl-frame-pointer-walk-fix-2.4.21-rhel.patch | 249 - lustre/kernel_patches/patches/lockmeter.patch | 3096 + .../lookup-stack-symbols-2.4.21-suse-171.patch | 234 - .../patches/loop-sync-2.4.21-suse.patch | 11 - lustre/kernel_patches/patches/lustre_build.patch | 33 - .../patches/netconsole-2.4.24-ppc.patch | 489 - .../patches/nfs_export_kernel-2.4.20-hp.patch | 740 - .../patches/nfs_export_kernel-2.4.21-chaos.patch | 756 - .../patches/nfs_export_kernel-2.4.21-suse2.patch | 756 - .../patches/nfs_export_kernel-2.4.22.patch | 745 - .../patches/nfs_export_kernel-2.4.29.patch | 744 - .../patches/nfs_export_kernel-2.4.29.patch-1 | 730 - .../nfs_export_kernel-getattr_on_lookup-2.4.patch | 64 - .../patches/nfs_statfs-toomanyfiles-rhel-2.4.patch | 30 - lustre/kernel_patches/patches/nfsd_iallocsem.patch | 19 - .../patches/pagecache-lock-2.4.21-chaos.patch | 21 - .../kernel_patches/patches/proc-sleep-2.6.9.patch | 758 + .../patches/procfs-ndynamic-2.4.21-suse2.patch | 16 - .../patches/procfs-ndynamic-2.4.patch | 13 - lustre/kernel_patches/patches/qsnet-rhel-2.4.patch | 93733 ------------------ .../kernel_patches/patches/qsnet-rhel4-2.6.patch | 98259 +------------------ lustre/kernel_patches/patches/qsnet-suse-2.6.patch | 94105 +----------------- .../patches/quota-deadlock-on-pagelock-core.patch | 85 +- lustre/kernel_patches/patches/raid5-zerocopy.patch | 374 + .../patches/remove-suid-2.4-rhel.patch | 23 - .../kernel_patches/patches/removepage-2.4.20.patch | 28 - .../patches/sd_iostats-2.4.21-chaos.patch | 442 - .../patches/sd_iostats-2.6-rhel4.patch | 78 +- .../patches/sd_iostats-2.6-suse.patch | 456 - .../patches/slab-use-after-free-debug-2.4.24.patch | 748 - .../patches/socket-exports-vanilla.patch | 42 - .../patches/statfs64-cast-unsigned-2.4-rhel.patch | 28 - .../patches/uml-2.4.20-do_mmap_pgoff-fix.patch | 16 - lustre/kernel_patches/patches/uml-2.6.10-fc3.patch | 2 +- .../patches/uml-patch-2.4.24-1.patch | 41972 -------- .../patches/uml-patch-2.4.29-1.patch | 46719 --------- .../patches/uml-sigusr1-2.4-vanilla.patch | 22 - .../patches/vfs_intent-2.4.20-hp.patch | 1948 - .../patches/vfs_intent-2.4.20-vanilla.patch | 1854 - .../patches/vfs_intent-2.4.21-rhel.patch | 1920 - .../patches/vfs_intent-2.4.21-suse-171.patch | 1877 - .../patches/vfs_intent-2.4.21-suse2.patch | 1878 - .../patches/vfs_intent-2.4.29-vanilla.patch | 1833 - .../patches/vfs_intent-2.6-fc3.patch | 127 +- .../patches/vfs_intent-2.6-rhel4.patch | 935 +- .../patches/vfs_intent-2.6-sles10.patch | 1312 +- .../kernel_patches/patches/vfs_intent-2.6.12.patch | 129 +- .../patches/vfs_races-2.6-rhel4.patch | 20 +- lustre/kernel_patches/prepare_tree.sh | 88 + lustre/kernel_patches/scripts/added-by-patch | 14 + lustre/kernel_patches/scripts/apatch | 97 + lustre/kernel_patches/scripts/cat-series | 17 + lustre/kernel_patches/scripts/combine-applied | 45 + lustre/kernel_patches/scripts/combine-series | 43 + lustre/kernel_patches/scripts/cvs-take-patch | 78 + lustre/kernel_patches/scripts/export_patch | 55 + lustre/kernel_patches/scripts/extract_description | 87 + lustre/kernel_patches/scripts/forkpatch | 76 + lustre/kernel_patches/scripts/fpatch | 53 + lustre/kernel_patches/scripts/import_patch | 102 + lustre/kernel_patches/scripts/inpatch | 27 + lustre/kernel_patches/scripts/join-patch | 28 + lustre/kernel_patches/scripts/linus-patch | 26 + lustre/kernel_patches/scripts/mpatch | 101 + lustre/kernel_patches/scripts/new-kernel | 82 + lustre/kernel_patches/scripts/p0-2-p1 | 10 + lustre/kernel_patches/scripts/p_diff | 60 + lustre/kernel_patches/scripts/patchdesc | 21 + lustre/kernel_patches/scripts/patchfns | 256 + lustre/kernel_patches/scripts/pcpatch | 45 + lustre/kernel_patches/scripts/poppatch | 72 + lustre/kernel_patches/scripts/prep-patch | 18 + lustre/kernel_patches/scripts/pstatus | 156 + lustre/kernel_patches/scripts/ptkdiff | 46 + lustre/kernel_patches/scripts/pushpatch | 86 + lustre/kernel_patches/scripts/refpatch | 32 + lustre/kernel_patches/scripts/removed-by-patch | 14 + lustre/kernel_patches/scripts/rename-patch | 20 + lustre/kernel_patches/scripts/rolled-up-patch | 30 + lustre/kernel_patches/scripts/rpatch | 90 + lustre/kernel_patches/scripts/split-patch | 29 + lustre/kernel_patches/scripts/sum-series | 41 + lustre/kernel_patches/scripts/tag-series | 41 + lustre/kernel_patches/scripts/toppatch | 27 + lustre/kernel_patches/scripts/touched-by-patch | 32 + lustre/kernel_patches/scripts/trypatch | 72 + lustre/kernel_patches/scripts/unitdiff.py | 223 + lustre/kernel_patches/scripts/unused-patches | 39 + lustre/kernel_patches/series/2.6-fc3.series | 2 +- lustre/kernel_patches/series/2.6-fc5.series | 2 +- lustre/kernel_patches/series/2.6-rhel4-cmd3.series | 42 + .../kernel_patches/series/2.6-rhel4-titech.series | 7 +- lustre/kernel_patches/series/2.6-rhel4.series | 10 +- lustre/kernel_patches/series/2.6-sles10.series | 5 +- lustre/kernel_patches/series/2.6-suse-newer.series | 2 +- lustre/kernel_patches/series/2.6-suse.series | 1 + lustre/kernel_patches/series/2.6.12-vanilla.series | 3 +- lustre/kernel_patches/series/2.6.18-vanilla.series | 8 +- lustre/kernel_patches/series/hp-pnnl-2.4.20 | 49 - .../kernel_patches/series/ldiskfs-2.6-fc3.series | 13 - .../kernel_patches/series/ldiskfs-2.6-fc5.series | 12 - .../kernel_patches/series/ldiskfs-2.6-rhel4.series | 17 - .../series/ldiskfs-2.6-sles10.series | 14 - .../kernel_patches/series/ldiskfs-2.6-suse.series | 17 - .../series/ldiskfs-2.6.12-vanilla.series | 15 - .../series/ldiskfs-2.6.18-vanilla.series | 13 - .../kernel_patches/series/ldiskfs2-2.6-fc3.series | 13 - .../kernel_patches/series/ldiskfs2-2.6-fc5.series | 12 - .../series/ldiskfs2-2.6-rhel4.series | 16 - .../series/ldiskfs2-2.6-sles10.series | 14 - .../kernel_patches/series/ldiskfs2-2.6-suse.series | 17 - .../series/ldiskfs2-2.6.12-vanilla.series | 15 - .../series/ldiskfs2-2.6.18-vanilla.series | 13 - lustre/kernel_patches/series/rhel-2.4.21 | 54 - lustre/kernel_patches/series/suse-2.4.21-cray | 43 - lustre/kernel_patches/series/vanilla-2.4.24 | 50 - lustre/kernel_patches/series/vanilla-2.4.29 | 45 - lustre/kernel_patches/series/vanilla-2.4.29-uml | 47 - lustre/kernel_patches/targets/2.6-rhel4.target.in | 2 +- lustre/kernel_patches/targets/2.6-sles10.target.in | 2 +- lustre/kernel_patches/targets/2.6-suse.target.in | 2 +- .../kernel_patches/targets/hp_pnnl-2.4.target.in | 17 - lustre/kernel_patches/targets/rh-2.4.target.in | 24 - lustre/kernel_patches/targets/rhel-2.4.target.in | 24 - lustre/kernel_patches/targets/sles-2.4.target.in | 26 - .../kernel_patches/targets/suse-2.4.21-2.target.in | 15 - lustre/kernel_patches/txt/dev_read_only.txt | 3 + lustre/kernel_patches/txt/exports.txt | 3 + lustre/kernel_patches/txt/exports_hp.txt | 3 + lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt | 3 + lustre/kernel_patches/txt/ext3-map_inode_page.txt | 3 + .../txt/ext3-map_inode_page_2.4.18.txt | 3 + lustre/kernel_patches/txt/invalidate_show.txt | 3 + lustre/kernel_patches/txt/kmem_cache_validate.txt | 3 + lustre/kernel_patches/txt/lustre_version.txt | 3 + lustre/kernel_patches/txt/uml_check_get_page.txt | 3 + lustre/kernel_patches/txt/uml_no_panic.txt | 3 + lustre/kernel_patches/which_patch | 19 +- lustre/ldiskfs/Makefile.in | 21 - lustre/ldiskfs/autoMakefile.am | 80 - lustre/ldiskfs2/Makefile.in | 21 - lustre/ldiskfs2/autoMakefile.am | 80 - lustre/ldlm/ldlm_extent.c | 14 +- lustre/ldlm/ldlm_flock.c | 7 +- lustre/ldlm/ldlm_inodebits.c | 4 +- lustre/ldlm/ldlm_internal.h | 3 +- lustre/ldlm/ldlm_lib.c | 1091 +- lustre/ldlm/ldlm_lock.c | 282 +- lustre/ldlm/ldlm_lockd.c | 324 +- lustre/ldlm/ldlm_plain.c | 1 + lustre/ldlm/ldlm_request.c | 68 +- lustre/ldlm/ldlm_resource.c | 55 +- lustre/liblustre/Makefile.am | 7 +- lustre/liblustre/dir.c | 43 +- lustre/liblustre/file.c | 155 +- lustre/liblustre/genlib.sh | 3 + lustre/liblustre/llite_fid.c | 62 + lustre/liblustre/llite_lib.c | 7 +- lustre/liblustre/llite_lib.h | 69 +- lustre/liblustre/lutil.c | 1 + lustre/liblustre/namei.c | 115 +- lustre/liblustre/rw.c | 67 +- lustre/liblustre/super.c | 512 +- lustre/liblustre/tests/Makefile.am | 4 + lustre/liblustre/tests/echo_test.c | 1 + lustre/liblustre/tests/recovery_small.c | 2 +- lustre/liblustre/tests/sanity.c | 19 +- lustre/llite/Makefile.in | 9 +- lustre/llite/autoMakefile.am | 2 +- lustre/llite/dcache.c | 180 +- lustre/llite/dir.c | 728 +- lustre/llite/file.c | 761 +- lustre/llite/llite_capa.c | 638 + lustre/llite/llite_close.c | 315 +- lustre/llite/llite_fid.c | 52 + lustre/llite/llite_internal.h | 282 +- lustre/llite/llite_lib.c | 1187 +- lustre/llite/llite_mmap.c | 18 +- lustre/llite/llite_nfs.c | 298 +- lustre/llite/lproc_llite.c | 205 +- lustre/llite/namei.c | 533 +- lustre/llite/remote_perm.c | 310 + lustre/llite/rw.c | 144 +- lustre/llite/rw24.c | 149 - lustre/llite/rw26.c | 74 +- lustre/llite/super.c | 127 - lustre/llite/super25.c | 95 +- lustre/llite/symlink.c | 30 +- lustre/llite/xattr.c | 62 +- lustre/lmv/.cvsignore | 15 + lustre/lmv/Makefile.in | 4 + lustre/lmv/autoMakefile.am | 18 + lustre/lmv/lmv_fld.c | 72 + lustre/lmv/lmv_intent.c | 1043 + lustre/lmv/lmv_internal.h | 225 + lustre/lmv/lmv_obd.c | 2792 + lustre/lmv/lmv_object.c | 426 + lustre/lmv/lproc_lmv.c | 151 + lustre/lov/lov_ea.c | 6 +- lustre/lov/lov_internal.h | 17 +- lustre/lov/lov_log.c | 22 +- lustre/lov/lov_merge.c | 9 +- lustre/lov/lov_obd.c | 245 +- lustre/lov/lov_pack.c | 131 +- lustre/lov/lov_qos.c | 218 +- lustre/lov/lov_request.c | 72 +- lustre/lov/lproc_lov.c | 2 +- lustre/lvfs/.cvsignore | 4 +- lustre/lvfs/Makefile.in | 4 +- lustre/lvfs/autoMakefile.am | 22 +- lustre/lvfs/fsfilt_ext3.c | 310 +- lustre/lvfs/fsfilt_reiserfs.c | 2 + lustre/lvfs/lvfs_linux.c | 248 +- lustre/lvfs/quotafmt_test.c | 7 +- lustre/lvfs/upcall_cache.c | 344 +- lustre/mdc/lproc_mdc.c | 1 + lustre/mdc/mdc_internal.h | 185 +- lustre/mdc/mdc_lib.c | 366 +- lustre/mdc/mdc_locks.c | 453 +- lustre/mdc/mdc_reint.c | 151 +- lustre/mdc/mdc_request.c | 1076 +- lustre/mdd/.cvsignore | 15 + lustre/mdd/Makefile.in | 7 + lustre/mdd/autoMakefile.am | 11 + lustre/mdd/mdd_device.c | 503 + lustre/mdd/mdd_dir.c | 1636 + lustre/mdd/mdd_internal.h | 575 + lustre/mdd/mdd_lock.c | 105 + lustre/mdd/mdd_lov.c | 717 + lustre/mdd/mdd_lproc.c | 102 + lustre/mdd/mdd_object.c | 1509 + lustre/mdd/mdd_orphans.c | 219 + lustre/mdd/mdd_permission.c | 654 + lustre/mdd/mdd_trans.c | 206 + lustre/mds/handler.c | 441 +- lustre/mds/lproc_mds.c | 4 + lustre/mds/mds_fs.c | 142 +- lustre/mds/mds_internal.h | 30 +- lustre/mds/mds_join.c | 10 +- lustre/mds/mds_lib.c | 18 + lustre/mds/mds_log.c | 20 +- lustre/mds/mds_lov.c | 111 +- lustre/mds/mds_open.c | 58 +- lustre/mds/mds_reint.c | 187 +- lustre/mds/mds_unlink_open.c | 17 +- lustre/mds/mds_xattr.c | 16 +- lustre/mdt/.cvsignore | 15 + lustre/mdt/Makefile.in | 5 + lustre/mdt/autoMakefile.am | 11 + lustre/mdt/mdt_capa.c | 302 + lustre/mdt/mdt_handler.c | 4860 + lustre/mdt/mdt_identity.c | 272 + lustre/mdt/mdt_idmap.c | 800 + lustre/mdt/mdt_internal.h | 835 + lustre/mdt/mdt_lib.c | 1143 + lustre/mdt/mdt_lproc.c | 763 + lustre/mdt/mdt_open.c | 1277 + lustre/mdt/mdt_recovery.c | 1131 + lustre/mdt/mdt_reint.c | 983 + lustre/mdt/mdt_rmtacl.c | 260 + lustre/mdt/mdt_xattr.c | 363 + lustre/mgc/libmgc.c | 18 +- lustre/mgc/mgc_request.c | 214 +- lustre/mgs/lproc_mgs.c | 1 - lustre/mgs/mgs_fs.c | 30 +- lustre/mgs/mgs_handler.c | 99 +- lustre/mgs/mgs_internal.h | 11 +- lustre/mgs/mgs_llog.c | 1054 +- lustre/obdclass/Makefile.in | 5 +- lustre/obdclass/autoMakefile.am | 9 +- lustre/obdclass/capa.c | 290 + lustre/obdclass/class_obd.c | 28 +- lustre/obdclass/darwin/.cvsignore | 1 - lustre/obdclass/darwin/darwin-sysctl.c | 4 +- lustre/obdclass/dt_object.c | 214 + lustre/obdclass/genops.c | 178 +- lustre/obdclass/hash.c | 241 + lustre/obdclass/linux/.cvsignore | 5 - lustre/obdclass/linux/linux-module.c | 9 +- lustre/obdclass/linux/linux-obdo.c | 137 +- lustre/obdclass/linux/linux-sysctl.c | 4 +- lustre/obdclass/llog.c | 5 +- lustre/obdclass/llog_cat.c | 29 +- lustre/obdclass/llog_lvfs.c | 15 +- lustre/obdclass/llog_obd.c | 65 +- lustre/obdclass/llog_swab.c | 17 +- lustre/obdclass/llog_test.c | 24 +- lustre/obdclass/lprocfs_status.c | 135 +- lustre/obdclass/lu_object.c | 1214 + lustre/obdclass/lu_time.c | 214 + lustre/obdclass/lustre_handles.c | 135 +- lustre/obdclass/lustre_peer.c | 10 +- lustre/obdclass/mea.c | 180 + lustre/obdclass/obd_config.c | 133 +- lustre/obdclass/obd_mount.c | 300 +- lustre/obdclass/obdo.c | 154 + lustre/obdclass/prng.c | 1 + lustre/obdclass/uuid.c | 70 - lustre/obdecho/echo.c | 23 +- lustre/obdecho/echo_client.c | 69 +- lustre/obdfilter/Makefile.in | 7 +- lustre/obdfilter/autoMakefile.am | 2 +- lustre/obdfilter/filter.c | 1181 +- lustre/obdfilter/filter_capa.c | 303 + lustre/obdfilter/filter_internal.h | 58 +- lustre/obdfilter/filter_io.c | 78 +- lustre/obdfilter/filter_io_24.c | 544 - lustre/obdfilter/filter_io_26.c | 44 +- lustre/obdfilter/filter_log.c | 56 +- lustre/obdfilter/filter_lvb.c | 8 +- lustre/obdfilter/lproc_obdfilter.c | 296 +- lustre/osc/lproc_osc.c | 15 +- lustre/osc/osc_create.c | 19 +- lustre/osc/osc_request.c | 504 +- lustre/osd/.cvsignore | 15 + lustre/osd/Makefile.in | 6 + lustre/osd/autoMakefile.am | 11 + lustre/osd/osd_handler.c | 2630 + lustre/osd/osd_igif.c | 70 + lustre/osd/osd_igif.h | 43 + lustre/osd/osd_internal.h | 83 + lustre/osd/osd_oi.c | 215 + lustre/osd/osd_oi.h | 92 + lustre/ost/ost_handler.c | 247 +- lustre/ptlrpc/Makefile.in | 5 +- lustre/ptlrpc/autoMakefile.am | 10 +- lustre/ptlrpc/client.c | 235 +- lustre/ptlrpc/connection.c | 3 - lustre/ptlrpc/events.c | 74 +- lustre/ptlrpc/gss/.cvsignore | 15 + lustre/ptlrpc/gss/Makefile.in | 9 + lustre/ptlrpc/gss/autoMakefile.am | 15 + lustre/ptlrpc/gss/gss_api.h | 152 + lustre/ptlrpc/gss/gss_asn1.h | 85 + lustre/ptlrpc/gss/gss_cli_upcall.c | 981 + lustre/ptlrpc/gss/gss_err.h | 194 + lustre/ptlrpc/gss/gss_generic_token.c | 290 + lustre/ptlrpc/gss/gss_internal.h | 352 + lustre/ptlrpc/gss/gss_krb5.h | 166 + lustre/ptlrpc/gss/gss_krb5_mech.c | 1263 + lustre/ptlrpc/gss/gss_mech_switch.c | 344 + lustre/ptlrpc/gss/gss_rawobj.c | 195 + lustre/ptlrpc/gss/gss_svc_upcall.c | 998 + lustre/ptlrpc/gss/lproc_gss.c | 165 + lustre/ptlrpc/gss/sec_gss.c | 2608 + lustre/ptlrpc/import.c | 113 +- lustre/ptlrpc/layout.c | 1058 + lustre/ptlrpc/llog_server.c | 2 +- lustre/ptlrpc/lproc_ptlrpc.c | 46 +- lustre/ptlrpc/niobuf.c | 118 +- lustre/ptlrpc/pack_generic.c | 461 +- lustre/ptlrpc/pinger.c | 187 +- lustre/ptlrpc/ptlrpc_internal.h | 80 +- lustre/ptlrpc/ptlrpc_module.c | 47 +- lustre/ptlrpc/ptlrpcd.c | 1 + lustre/ptlrpc/recov_thread.c | 19 +- lustre/ptlrpc/recover.c | 6 +- lustre/ptlrpc/sec.c | 2204 + lustre/ptlrpc/sec_bulk.c | 1012 + lustre/ptlrpc/sec_lproc.c | 182 + lustre/ptlrpc/sec_null.c | 361 + lustre/ptlrpc/sec_plain.c | 553 + lustre/ptlrpc/service.c | 191 +- lustre/ptlrpc/wiretest.c | 76 +- lustre/quota/quota_context.c | 148 +- lustre/quota/quota_interface.c | 51 +- lustre/quota/quota_master.c | 18 +- lustre/quota/quotacheck_test.c | 12 +- lustre/quota/quotactl_test.c | 9 +- lustre/scripts/.cvsignore | 16 +- lustre/scripts/lc_cluman.in | 2 +- lustre/scripts/lc_common | 69 +- lustre/scripts/lc_md.in | 2 +- lustre/scripts/lc_modprobe.in | 2 +- lustre/scripts/lc_servip | 2 +- lustre/scripts/lmc2csv.pl | 186 +- lustre/scripts/lustre | 2 +- lustre/scripts/lustre_config.in | 37 +- lustre/scripts/lustre_rmmod | 12 +- lustre/scripts/version_tag.pl.in | 7 +- lustre/tests/Makefile.am | 31 +- lustre/tests/acceptance-small.sh | 1 + lustre/tests/acl/getfacl-noacl.test | 6 +- lustre/tests/acl/permissions.test | 3 + lustre/tests/cfg/insanity-lmv.sh | 84 + lustre/tests/cfg/insanity-local.sh | 15 +- lustre/tests/cfg/insanity-ltest.sh | 1 + lustre/tests/cfg/insanity-mdev.sh | 1 + lustre/tests/cfg/lmv.sh | 88 + lustre/tests/cfg/local.sh | 26 +- lustre/tests/cfg/lov.sh | 22 +- lustre/tests/cfg/mdev.sh | 32 + lustre/tests/checkstack.pl | 83 + lustre/tests/conf-sanity.sh | 29 +- lustre/tests/directio.c | 4 +- lustre/tests/disk1_4.zip | Bin 170785 -> 0 bytes lustre/tests/fsx.c | 10 +- lustre/tests/gensymmap.c | 96 + lustre/tests/iam_ut | Bin 0 -> 63499 bytes lustre/tests/iam_ut.c | 423 + lustre/tests/insanity.sh | 34 +- lustre/tests/krb5_login.sh | 53 + lustre/tests/lfscktest.sh | 2 + lustre/tests/ll_dirstripe_verify.c | 10 +- lustre/tests/ll_sparseness_write.c | 2 + lustre/tests/mmap_sanity.c | 2 +- lustre/tests/multiop.c | 70 +- lustre/tests/o_directory.c | 2 + lustre/tests/oos.sh | 11 +- lustre/tests/oos2.sh | 1 + lustre/tests/openclose.c | 2 + lustre/tests/opendevunlink.c | 2 + lustre/tests/opendirunlink.c | 20 +- lustre/tests/openfile.c | 2 + lustre/tests/openfilleddirunlink.c | 2 + lustre/tests/qos.sh | 142 + lustre/tests/recovery-small.sh | 132 +- lustre/tests/replay-dual.sh | 90 +- lustre/tests/replay-ost-single.sh | 9 +- lustre/tests/replay-single-lmv.sh | 113 + lustre/tests/replay-single.sh | 434 +- lustre/tests/run-llog.sh | 6 +- lustre/tests/run-quotafmt.sh | 2 +- lustre/tests/runas.c | 60 +- lustre/tests/rundbench | 2 - lustre/tests/runfailure-mds | 63 - lustre/tests/runfailure-net | 66 - lustre/tests/runfailure-ost | 51 - lustre/tests/runregression-brw.sh | 111 - lustre/tests/runregression-net.sh | 99 - lustre/tests/runtests | 26 +- lustre/tests/sanity-gss.sh | 390 + lustre/tests/sanity-lmv.sh | 397 + lustre/tests/sanity-quota.sh | 4 +- lustre/tests/sanity-sec.sh | 370 + lustre/tests/sanity.sh | 537 +- lustre/tests/sanityN.sh | 86 +- lustre/tests/test-framework.sh | 287 +- lustre/tests/test_brw.c | 4 +- lustre/tests/testreq.c | 141 - lustre/utils/.cvsignore | 3 + lustre/utils/Lustre/.cvsignore | 4 + lustre/utils/Lustre/Makefile.am | 4 + lustre/utils/Lustre/__init__.py | 7 + lustre/utils/Lustre/cmdline.py | 194 + lustre/utils/Lustre/error.py | 10 + lustre/utils/Lustre/lustredb.py | 551 + lustre/utils/Makefile.am | 32 +- lustre/utils/automatic-reconnect-sample | 34 + lustre/utils/create_iam.c | 352 + lustre/utils/gss/.cvsignore | 11 + lustre/utils/gss/Makefile.am | 67 + lustre/utils/gss/README | 12 + lustre/utils/gss/cacheio.c | 296 + lustre/utils/gss/cacheio.h | 48 + lustre/utils/gss/context.c | 57 + lustre/utils/gss/context.h | 47 + lustre/utils/gss/context_heimdal.c | 267 + lustre/utils/gss/context_lucid.c | 604 + lustre/utils/gss/context_mit.c | 392 + lustre/utils/gss/context_spkm3.c | 176 + lustre/utils/gss/err_util.c | 132 + lustre/utils/gss/err_util.h | 38 + lustre/utils/gss/gss_oids.c | 39 + lustre/utils/gss/gss_oids.h | 45 + lustre/utils/gss/gss_util.c | 402 + lustre/utils/gss/gss_util.h | 45 + lustre/utils/gss/gssd.c | 259 + lustre/utils/gss/gssd.h | 99 + lustre/utils/gss/gssd_main_loop.c | 165 + lustre/utils/gss/gssd_proc.c | 1101 + lustre/utils/gss/krb5_util.c | 1124 + lustre/utils/gss/krb5_util.h | 30 + lustre/utils/gss/l_idmap.c | 37 + lustre/utils/gss/lsupport.c | 783 + lustre/utils/gss/lsupport.h | 89 + lustre/utils/gss/nfs-utils-1.0.10-lustre.diff | 3962 + lustre/utils/gss/svcgssd.c | 265 + lustre/utils/gss/svcgssd.h | 54 + lustre/utils/gss/svcgssd_main_loop.c | 111 + lustre/utils/gss/svcgssd_mech2file.c | 73 + lustre/utils/gss/svcgssd_proc.c | 545 + lustre/utils/gss/write_bytes.h | 158 + lustre/utils/l_facl.c | 268 + lustre/utils/l_getgroups.c | 249 - lustre/utils/l_getidentity.c | 440 + lustre/utils/lconf | 2910 + lustre/utils/lfs.c | 277 +- lustre/utils/libiam.c | 605 + lustre/utils/liblustreapi.c | 107 +- lustre/utils/llanalyze | 12 +- lustre/utils/llog_reader.c | 15 +- lustre/utils/lmc | 1238 + lustre/utils/loadgen.c | 3 +- lustre/utils/loadmod_all.sh | 47 + lustre/utils/lustre_cfg.c | 3 +- lustre/utils/mds-failover-sample | 20 + lustre/utils/mkfs_lustre.c | 521 +- lustre/utils/module_cleanup.sh | 3 +- lustre/utils/module_setup.sh | 42 +- lustre/utils/mount_lustre.c | 32 +- lustre/utils/obd.c | 5 +- lustre/utils/req-layout.c | 149 + lustre/utils/rmmod_all.sh | 14 + lustre/utils/wirecheck.c | 13 +- lustre/utils/wiretest.c | 76 +- 836 files changed, 162930 insertions(+), 541010 deletions(-) create mode 100644 lustre/autoconf/kerberos5.m4 rename lustre/{ldiskfs => cmm}/.cvsignore (79%) create mode 100644 lustre/cmm/Makefile.in create mode 100644 lustre/cmm/autoMakefile.am create mode 100644 lustre/cmm/cmm_device.c create mode 100644 lustre/cmm/cmm_internal.h create mode 100644 lustre/cmm/cmm_lproc.c create mode 100644 lustre/cmm/cmm_object.c create mode 100644 lustre/cmm/cmm_split.c create mode 100644 lustre/cmm/mdc_device.c create mode 100644 lustre/cmm/mdc_internal.h create mode 100644 lustre/cmm/mdc_object.c create mode 100644 lustre/contrib/mpich2-1.0.3.patch rename lustre/{ldiskfs2 => fid}/.cvsignore (79%) create mode 100644 lustre/fid/Makefile.in create mode 100644 lustre/fid/autoMakefile.am create mode 100644 lustre/fid/fid_handler.c create mode 100644 lustre/fid/fid_internal.h create mode 100644 lustre/fid/fid_lib.c create mode 100644 lustre/fid/fid_request.c create mode 100644 lustre/fid/fid_store.c create mode 100644 lustre/fid/lproc_fid.c create mode 100644 lustre/fld/.cvsignore create mode 100644 lustre/fld/Makefile.in create mode 100644 lustre/fld/autoMakefile.am create mode 100644 lustre/fld/fld_cache.c create mode 100644 lustre/fld/fld_handler.c create mode 100644 lustre/fld/fld_index.c create mode 100644 lustre/fld/fld_internal.h create mode 100644 lustre/fld/fld_request.c create mode 100644 lustre/fld/lproc_fld.c create mode 100644 lustre/include/dt_object.h create mode 100644 lustre/include/linux/lustre_acl.h create mode 100644 lustre/include/lu_object.h create mode 100644 lustre/include/lu_time.h create mode 100644 lustre/include/lustre/libiam.h create mode 100644 lustre/include/lustre_capa.h create mode 100644 lustre/include/lustre_fid.h create mode 100644 lustre/include/lustre_fld.h create mode 100644 lustre/include/lustre_mdc.h create mode 100644 lustre/include/lustre_mdt.h create mode 100644 lustre/include/lustre_req_layout.h create mode 100644 lustre/include/lustre_sec.h create mode 100644 lustre/include/md_object.h delete mode 100644 lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos delete mode 100644 lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64-smp.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686-smp.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686-smp.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64-smp.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686-smp.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686.config delete mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config delete mode 100644 lustre/kernel_patches/kernel_configs/uml-vanilla-2.4.24.config delete mode 100644 lustre/kernel_patches/patches/3.5G-address-space-2.4.22-vanilla.patch create mode 100644 lustre/kernel_patches/patches/__find_get_block_slow-scale.patch delete mode 100644 lustre/kernel_patches/patches/add_page_private.patch create mode 100644 lustre/kernel_patches/patches/atomic_add_return-sles9.patch delete mode 100644 lustre/kernel_patches/patches/blkdev_tunables-2.4.21-chaos.patch create mode 100644 lustre/kernel_patches/patches/blkdev_tunables-2.6-sles10.patch delete mode 100644 lustre/kernel_patches/patches/compile-fixes-2.4.21-rhel.patch delete mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse-171.patch delete mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/configurable-x86_64-2.4.21.patch delete mode 100644 lustre/kernel_patches/patches/dcache_refcount_debug.patch create mode 100644 lustre/kernel_patches/patches/debugging-fields-in-current.patch delete mode 100644 lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/dev_read_only_2.4.21-chaos.patch create mode 100644 lustre/kernel_patches/patches/dynamic-locks-2.6-fc3.patch create mode 100644 lustre/kernel_patches/patches/dynamic-locks-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/elevator-cfq.patch delete mode 100644 lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/export-ext3-2.6-suse.patch create mode 100644 lustre/kernel_patches/patches/export-nr_free_buffer_pages.patch delete mode 100644 lustre/kernel_patches/patches/export-show_task-2.4-cray.patch delete mode 100644 lustre/kernel_patches/patches/export-show_task-2.4-rh.patch delete mode 100644 lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch delete mode 100644 lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/export-truncate.patch delete mode 100644 lustre/kernel_patches/patches/export-zap-page-range.patch delete mode 100644 lustre/kernel_patches/patches/export_num_siblings.patch create mode 100644 lustre/kernel_patches/patches/export_symbol_numa-2.6.18.patch delete mode 100644 lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/exports-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/exports_2.4.19-suse.patch delete mode 100644 lustre/kernel_patches/patches/exports_2.4.19-suse2.patch delete mode 100644 lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-1.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-2.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-3.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4-ino_t.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch create mode 100644 lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch delete mode 100644 lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-suse-171.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.29.patch delete mode 100644 lustre/kernel_patches/patches/ext3-disable-write-bar-by-default-2.6-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-sles.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.22-rh.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.29.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-error-export.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.4.29.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.15.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.16-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.5.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-bug11324.patch create mode 100644 lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch create mode 100644 lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch create mode 100644 lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch create mode 100644 lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-external-journal-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/ext3-filterdata-2.6.15.patch create mode 100644 lustre/kernel_patches/patches/ext3-filterdata-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-2.4.21-rhel.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-2.4.29.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-dot-2.6.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-path-ops.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ialloc-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ialloc-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ialloc-2.6.patch delete mode 100644 lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-ino_sb_macro-2.4.21-chaos.patch create mode 100644 lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch create mode 100644 lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-largefile.patch delete mode 100644 lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/ext3-map_inode_page-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/ext3-map_inode_page-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-map_inode_page.patch delete mode 100644 lustre/kernel_patches/patches/ext3-map_inode_page_2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch create mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-core.patch create mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch create mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch create mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch delete mode 100644 lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nanosecond-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nanosecond-2.6-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nanosecond-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nanosecond-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch delete mode 100644 lustre/kernel_patches/patches/ext3-nlinks-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-noread-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext3-noread-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-noread-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/ext3-o_direct-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext3-orphan_lock-2.4.22-rh.patch delete mode 100644 lustre/kernel_patches/patches/ext3-orphan_lock.patch delete mode 100644 lustre/kernel_patches/patches/ext3-raw-lookup.patch delete mode 100644 lustre/kernel_patches/patches/ext3-remove-cond_resched-calls-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-san-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext3-san-jdike-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-sector_t-overflow-2.4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.5-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.9-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/ext3-truncate-buffer-head.patch delete mode 100644 lustre/kernel_patches/patches/ext3-truncate_blocks.patch delete mode 100644 lustre/kernel_patches/patches/ext3-trusted_ea-2.4.20.patch create mode 100644 lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch create mode 100644 lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/ext3-use-after-free.patch delete mode 100644 lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-wantedi-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch create mode 100644 lustre/kernel_patches/patches/ext3-wantedi-misc-2.6-suse.patch create mode 100644 lustre/kernel_patches/patches/ext3-wantedi-misc-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-xattr-ptr-arith-fix.patch delete mode 100644 lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch delete mode 100644 lustre/kernel_patches/patches/extN-misc-fixup.patch delete mode 100644 lustre/kernel_patches/patches/extN-wantedi-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/extN-wantedi-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/extN-wantedi.patch delete mode 100644 lustre/kernel_patches/patches/fsprivate-2.4-suse.patch delete mode 100644 lustre/kernel_patches/patches/fsprivate-2.4.patch delete mode 100644 lustre/kernel_patches/patches/gfp_debug-2.4.21-rhel.patch delete mode 100644 lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-rh-2.4.patch create mode 100644 lustre/kernel_patches/patches/highmem-split-2.6-rhel4.patch create mode 100644 lustre/kernel_patches/patches/i_filter_data.patch create mode 100644 lustre/kernel_patches/patches/increase-BH_LRU_SIZE.patch delete mode 100644 lustre/kernel_patches/patches/inode-max-readahead-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/invalidate_show-2.4.20-hp.patch delete mode 100644 lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/invalidate_show-2.4.29.patch delete mode 100644 lustre/kernel_patches/patches/invalidate_show.patch delete mode 100644 lustre/kernel_patches/patches/iod-rmap-exports-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch delete mode 100644 lustre/kernel_patches/patches/iod-stock-exports-2.4.22.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.6-fc5.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.6.12.patch create mode 100644 lustre/kernel_patches/patches/ipoib_tcpdump.patch create mode 100644 lustre/kernel_patches/patches/jbd-checkpoint-on-commit.patch delete mode 100644 lustre/kernel_patches/patches/jbd-commit-tricks-rhel3.patch delete mode 100644 lustre/kernel_patches/patches/jbd-commit-tricks.patch create mode 100644 lustre/kernel_patches/patches/jbd-copy-out-everything.patch delete mode 100644 lustre/kernel_patches/patches/jbd-ctx_switch.patch delete mode 100644 lustre/kernel_patches/patches/jbd-dont-account-blocks-twice.patch delete mode 100644 lustre/kernel_patches/patches/jbd-flushtime.patch delete mode 100644 lustre/kernel_patches/patches/jbd-get_write_access.patch delete mode 100644 lustre/kernel_patches/patches/kallsyms-2.4.29.patch delete mode 100644 lustre/kernel_patches/patches/kjournald_affinity.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse-171.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse2.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO-rhel3.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.29-xattr-0.8.54.patch create mode 100644 lustre/kernel_patches/patches/linux-2.6.9-network_driver-for-sk98.patch delete mode 100644 lustre/kernel_patches/patches/listman-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/listman-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/llnl-frame-pointer-walk-2.4.21-rhel.patch delete mode 100644 lustre/kernel_patches/patches/llnl-frame-pointer-walk-fix-2.4.21-rhel.patch create mode 100644 lustre/kernel_patches/patches/lockmeter.patch delete mode 100644 lustre/kernel_patches/patches/lookup-stack-symbols-2.4.21-suse-171.patch delete mode 100644 lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch delete mode 100644 lustre/kernel_patches/patches/lustre_build.patch delete mode 100644 lustre/kernel_patches/patches/netconsole-2.4.24-ppc.patch delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.20-hp.patch delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.22.patch delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch-1 delete mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-getattr_on_lookup-2.4.patch delete mode 100644 lustre/kernel_patches/patches/nfs_statfs-toomanyfiles-rhel-2.4.patch delete mode 100644 lustre/kernel_patches/patches/nfsd_iallocsem.patch delete mode 100644 lustre/kernel_patches/patches/pagecache-lock-2.4.21-chaos.patch create mode 100644 lustre/kernel_patches/patches/proc-sleep-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/procfs-ndynamic-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/procfs-ndynamic-2.4.patch delete mode 100644 lustre/kernel_patches/patches/qsnet-rhel-2.4.patch create mode 100644 lustre/kernel_patches/patches/raid5-zerocopy.patch delete mode 100644 lustre/kernel_patches/patches/remove-suid-2.4-rhel.patch delete mode 100644 lustre/kernel_patches/patches/removepage-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch delete mode 100644 lustre/kernel_patches/patches/sd_iostats-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/slab-use-after-free-debug-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/socket-exports-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/statfs64-cast-unsigned-2.4-rhel.patch delete mode 100644 lustre/kernel_patches/patches/uml-2.4.20-do_mmap_pgoff-fix.patch delete mode 100644 lustre/kernel_patches/patches/uml-patch-2.4.24-1.patch delete mode 100644 lustre/kernel_patches/patches/uml-patch-2.4.29-1.patch delete mode 100644 lustre/kernel_patches/patches/uml-sigusr1-2.4-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.21-rhel.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.21-suse-171.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.29-vanilla.patch create mode 100755 lustre/kernel_patches/prepare_tree.sh create mode 100755 lustre/kernel_patches/scripts/added-by-patch create mode 100755 lustre/kernel_patches/scripts/apatch create mode 100755 lustre/kernel_patches/scripts/cat-series create mode 100755 lustre/kernel_patches/scripts/combine-applied create mode 100755 lustre/kernel_patches/scripts/combine-series create mode 100755 lustre/kernel_patches/scripts/cvs-take-patch create mode 100755 lustre/kernel_patches/scripts/export_patch create mode 100755 lustre/kernel_patches/scripts/extract_description create mode 100755 lustre/kernel_patches/scripts/forkpatch create mode 100755 lustre/kernel_patches/scripts/fpatch create mode 100755 lustre/kernel_patches/scripts/import_patch create mode 100755 lustre/kernel_patches/scripts/inpatch create mode 100755 lustre/kernel_patches/scripts/join-patch create mode 100755 lustre/kernel_patches/scripts/linus-patch create mode 100755 lustre/kernel_patches/scripts/mpatch create mode 100755 lustre/kernel_patches/scripts/new-kernel create mode 100755 lustre/kernel_patches/scripts/p0-2-p1 create mode 100755 lustre/kernel_patches/scripts/p_diff create mode 100755 lustre/kernel_patches/scripts/patchdesc create mode 100644 lustre/kernel_patches/scripts/patchfns create mode 100755 lustre/kernel_patches/scripts/pcpatch create mode 100755 lustre/kernel_patches/scripts/poppatch create mode 100755 lustre/kernel_patches/scripts/prep-patch create mode 100755 lustre/kernel_patches/scripts/pstatus create mode 100755 lustre/kernel_patches/scripts/ptkdiff create mode 100755 lustre/kernel_patches/scripts/pushpatch create mode 100755 lustre/kernel_patches/scripts/refpatch create mode 100755 lustre/kernel_patches/scripts/removed-by-patch create mode 100755 lustre/kernel_patches/scripts/rename-patch create mode 100755 lustre/kernel_patches/scripts/rolled-up-patch create mode 100755 lustre/kernel_patches/scripts/rpatch create mode 100755 lustre/kernel_patches/scripts/split-patch create mode 100755 lustre/kernel_patches/scripts/sum-series create mode 100755 lustre/kernel_patches/scripts/tag-series create mode 100755 lustre/kernel_patches/scripts/toppatch create mode 100755 lustre/kernel_patches/scripts/touched-by-patch create mode 100755 lustre/kernel_patches/scripts/trypatch create mode 100755 lustre/kernel_patches/scripts/unitdiff.py create mode 100755 lustre/kernel_patches/scripts/unused-patches create mode 100644 lustre/kernel_patches/series/2.6-rhel4-cmd3.series delete mode 100644 lustre/kernel_patches/series/hp-pnnl-2.4.20 delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6-fc3.series delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6-fc5.series delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6-sles10.series delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6-suse.series delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series delete mode 100644 lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series delete mode 100644 lustre/kernel_patches/series/ldiskfs2-2.6-fc3.series delete mode 100644 lustre/kernel_patches/series/ldiskfs2-2.6-fc5.series delete mode 100644 lustre/kernel_patches/series/ldiskfs2-2.6-rhel4.series delete mode 100644 lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series delete mode 100644 lustre/kernel_patches/series/ldiskfs2-2.6-suse.series delete mode 100644 lustre/kernel_patches/series/ldiskfs2-2.6.12-vanilla.series delete mode 100644 lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series delete mode 100644 lustre/kernel_patches/series/rhel-2.4.21 delete mode 100644 lustre/kernel_patches/series/suse-2.4.21-cray delete mode 100644 lustre/kernel_patches/series/vanilla-2.4.24 delete mode 100644 lustre/kernel_patches/series/vanilla-2.4.29 delete mode 100644 lustre/kernel_patches/series/vanilla-2.4.29-uml delete mode 100644 lustre/kernel_patches/targets/hp_pnnl-2.4.target.in delete mode 100644 lustre/kernel_patches/targets/rh-2.4.target.in delete mode 100644 lustre/kernel_patches/targets/rhel-2.4.target.in delete mode 100644 lustre/kernel_patches/targets/sles-2.4.target.in delete mode 100644 lustre/kernel_patches/targets/suse-2.4.21-2.target.in create mode 100644 lustre/kernel_patches/txt/dev_read_only.txt create mode 100644 lustre/kernel_patches/txt/exports.txt create mode 100644 lustre/kernel_patches/txt/exports_hp.txt create mode 100644 lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt create mode 100644 lustre/kernel_patches/txt/ext3-map_inode_page.txt create mode 100644 lustre/kernel_patches/txt/ext3-map_inode_page_2.4.18.txt create mode 100644 lustre/kernel_patches/txt/invalidate_show.txt create mode 100644 lustre/kernel_patches/txt/kmem_cache_validate.txt create mode 100644 lustre/kernel_patches/txt/lustre_version.txt create mode 100644 lustre/kernel_patches/txt/uml_check_get_page.txt create mode 100644 lustre/kernel_patches/txt/uml_no_panic.txt delete mode 100644 lustre/ldiskfs/Makefile.in delete mode 100644 lustre/ldiskfs/autoMakefile.am delete mode 100644 lustre/ldiskfs2/Makefile.in delete mode 100644 lustre/ldiskfs2/autoMakefile.am create mode 100644 lustre/liblustre/llite_fid.c create mode 100644 lustre/llite/llite_capa.c create mode 100644 lustre/llite/llite_fid.c create mode 100644 lustre/llite/remote_perm.c delete mode 100644 lustre/llite/rw24.c delete mode 100644 lustre/llite/super.c create mode 100644 lustre/lmv/.cvsignore create mode 100644 lustre/lmv/Makefile.in create mode 100644 lustre/lmv/autoMakefile.am create mode 100644 lustre/lmv/lmv_fld.c create mode 100644 lustre/lmv/lmv_intent.c create mode 100644 lustre/lmv/lmv_internal.h create mode 100644 lustre/lmv/lmv_obd.c create mode 100644 lustre/lmv/lmv_object.c create mode 100644 lustre/lmv/lproc_lmv.c create mode 100644 lustre/mdd/.cvsignore create mode 100644 lustre/mdd/Makefile.in create mode 100644 lustre/mdd/autoMakefile.am create mode 100644 lustre/mdd/mdd_device.c create mode 100644 lustre/mdd/mdd_dir.c create mode 100644 lustre/mdd/mdd_internal.h create mode 100644 lustre/mdd/mdd_lock.c create mode 100644 lustre/mdd/mdd_lov.c create mode 100644 lustre/mdd/mdd_lproc.c create mode 100644 lustre/mdd/mdd_object.c create mode 100644 lustre/mdd/mdd_orphans.c create mode 100644 lustre/mdd/mdd_permission.c create mode 100644 lustre/mdd/mdd_trans.c create mode 100644 lustre/mdt/.cvsignore create mode 100644 lustre/mdt/Makefile.in create mode 100644 lustre/mdt/autoMakefile.am create mode 100644 lustre/mdt/mdt_capa.c create mode 100644 lustre/mdt/mdt_handler.c create mode 100644 lustre/mdt/mdt_identity.c create mode 100644 lustre/mdt/mdt_idmap.c create mode 100644 lustre/mdt/mdt_internal.h create mode 100644 lustre/mdt/mdt_lib.c create mode 100644 lustre/mdt/mdt_lproc.c create mode 100644 lustre/mdt/mdt_open.c create mode 100644 lustre/mdt/mdt_recovery.c create mode 100644 lustre/mdt/mdt_reint.c create mode 100644 lustre/mdt/mdt_rmtacl.c create mode 100644 lustre/mdt/mdt_xattr.c create mode 100644 lustre/obdclass/capa.c delete mode 100644 lustre/obdclass/darwin/.cvsignore create mode 100644 lustre/obdclass/dt_object.c create mode 100644 lustre/obdclass/hash.c delete mode 100644 lustre/obdclass/linux/.cvsignore create mode 100644 lustre/obdclass/lu_object.c create mode 100644 lustre/obdclass/lu_time.c create mode 100644 lustre/obdclass/mea.c create mode 100644 lustre/obdfilter/filter_capa.c delete mode 100644 lustre/obdfilter/filter_io_24.c create mode 100644 lustre/osd/.cvsignore create mode 100644 lustre/osd/Makefile.in create mode 100644 lustre/osd/autoMakefile.am create mode 100644 lustre/osd/osd_handler.c create mode 100644 lustre/osd/osd_igif.c create mode 100644 lustre/osd/osd_igif.h create mode 100644 lustre/osd/osd_internal.h create mode 100644 lustre/osd/osd_oi.c create mode 100644 lustre/osd/osd_oi.h create mode 100644 lustre/ptlrpc/gss/.cvsignore create mode 100644 lustre/ptlrpc/gss/Makefile.in create mode 100644 lustre/ptlrpc/gss/autoMakefile.am create mode 100644 lustre/ptlrpc/gss/gss_api.h create mode 100644 lustre/ptlrpc/gss/gss_asn1.h create mode 100644 lustre/ptlrpc/gss/gss_cli_upcall.c create mode 100644 lustre/ptlrpc/gss/gss_err.h create mode 100644 lustre/ptlrpc/gss/gss_generic_token.c create mode 100644 lustre/ptlrpc/gss/gss_internal.h create mode 100644 lustre/ptlrpc/gss/gss_krb5.h create mode 100644 lustre/ptlrpc/gss/gss_krb5_mech.c create mode 100644 lustre/ptlrpc/gss/gss_mech_switch.c create mode 100644 lustre/ptlrpc/gss/gss_rawobj.c create mode 100644 lustre/ptlrpc/gss/gss_svc_upcall.c create mode 100644 lustre/ptlrpc/gss/lproc_gss.c create mode 100644 lustre/ptlrpc/gss/sec_gss.c create mode 100644 lustre/ptlrpc/layout.c create mode 100644 lustre/ptlrpc/sec.c create mode 100644 lustre/ptlrpc/sec_bulk.c create mode 100644 lustre/ptlrpc/sec_lproc.c create mode 100644 lustre/ptlrpc/sec_null.c create mode 100644 lustre/ptlrpc/sec_plain.c create mode 100644 lustre/tests/cfg/insanity-lmv.sh create mode 100644 lustre/tests/cfg/lmv.sh create mode 100644 lustre/tests/cfg/mdev.sh create mode 100644 lustre/tests/checkstack.pl delete mode 100644 lustre/tests/disk1_4.zip create mode 100644 lustre/tests/gensymmap.c create mode 100755 lustre/tests/iam_ut create mode 100644 lustre/tests/iam_ut.c create mode 100755 lustre/tests/krb5_login.sh create mode 100644 lustre/tests/qos.sh create mode 100755 lustre/tests/replay-single-lmv.sh delete mode 100755 lustre/tests/runfailure-mds delete mode 100755 lustre/tests/runfailure-net delete mode 100755 lustre/tests/runfailure-ost delete mode 100644 lustre/tests/runregression-brw.sh delete mode 100644 lustre/tests/runregression-net.sh create mode 100644 lustre/tests/sanity-gss.sh create mode 100644 lustre/tests/sanity-lmv.sh create mode 100644 lustre/tests/sanity-sec.sh delete mode 100644 lustre/tests/testreq.c create mode 100644 lustre/utils/Lustre/.cvsignore create mode 100644 lustre/utils/Lustre/Makefile.am create mode 100644 lustre/utils/Lustre/__init__.py create mode 100644 lustre/utils/Lustre/cmdline.py create mode 100644 lustre/utils/Lustre/error.py create mode 100644 lustre/utils/Lustre/lustredb.py create mode 100755 lustre/utils/automatic-reconnect-sample create mode 100644 lustre/utils/create_iam.c create mode 100644 lustre/utils/gss/.cvsignore create mode 100644 lustre/utils/gss/Makefile.am create mode 100644 lustre/utils/gss/README create mode 100644 lustre/utils/gss/cacheio.c create mode 100644 lustre/utils/gss/cacheio.h create mode 100644 lustre/utils/gss/context.c create mode 100644 lustre/utils/gss/context.h create mode 100644 lustre/utils/gss/context_heimdal.c create mode 100644 lustre/utils/gss/context_lucid.c create mode 100644 lustre/utils/gss/context_mit.c create mode 100644 lustre/utils/gss/context_spkm3.c create mode 100644 lustre/utils/gss/err_util.c create mode 100644 lustre/utils/gss/err_util.h create mode 100644 lustre/utils/gss/gss_oids.c create mode 100644 lustre/utils/gss/gss_oids.h create mode 100644 lustre/utils/gss/gss_util.c create mode 100644 lustre/utils/gss/gss_util.h create mode 100644 lustre/utils/gss/gssd.c create mode 100644 lustre/utils/gss/gssd.h create mode 100644 lustre/utils/gss/gssd_main_loop.c create mode 100644 lustre/utils/gss/gssd_proc.c create mode 100644 lustre/utils/gss/krb5_util.c create mode 100644 lustre/utils/gss/krb5_util.h create mode 100644 lustre/utils/gss/l_idmap.c create mode 100644 lustre/utils/gss/lsupport.c create mode 100644 lustre/utils/gss/lsupport.h create mode 100644 lustre/utils/gss/nfs-utils-1.0.10-lustre.diff create mode 100644 lustre/utils/gss/svcgssd.c create mode 100644 lustre/utils/gss/svcgssd.h create mode 100644 lustre/utils/gss/svcgssd_main_loop.c create mode 100644 lustre/utils/gss/svcgssd_mech2file.c create mode 100644 lustre/utils/gss/svcgssd_proc.c create mode 100644 lustre/utils/gss/write_bytes.h create mode 100644 lustre/utils/l_facl.c delete mode 100644 lustre/utils/l_getgroups.c create mode 100644 lustre/utils/l_getidentity.c create mode 100755 lustre/utils/lconf create mode 100644 lustre/utils/libiam.c create mode 100755 lustre/utils/lmc create mode 100755 lustre/utils/loadmod_all.sh create mode 100755 lustre/utils/mds-failover-sample create mode 100644 lustre/utils/req-layout.c create mode 100755 lustre/utils/rmmod_all.sh diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 58f4891..91cda98 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -7,13 +7,60 @@ tbd Cluster File Systems, Inc. 2.6.12.6 vanilla (kernel.org) 2.6.16.27-0.9 (SLES 10) * Client support for unpatched kernels: - (see https://mail.clusterfs.com/wikis/lustre/PatchlessClient) - 2.6.16 - 2.6.19 vanilla (kernel.org) - 2.6.9-42.0.8.EL (RHEL 4) + (see http://wiki.lustre.org/index.php?title=Patchless_Client) + 2.6.9-42.0.10.EL (RHEL 4) + 2.6.16 - 2.6.21 vanilla (kernel.org) * Recommended e2fsprogs version: 1.39.cfs7 * Note that reiserfs quotas are disabled on SLES 10 in this kernel. * bug fixes +Severity : enhancement +Bugzilla : 12194 +Description: add optional extra BUILD_VERSION info +Details : add a new environment variable (namely LUSTRE_VERS) which allows + to override the lustre version. + +Severity : normal +Frequency : 2.6.18 servers only +Bugzilla : 12546 +Description: ll_kern_mount() doesn't release the module reference +Details : The ldiskfs module reference count never drops down to 0 + because ll_kern_mount() doesn't release the module reference. + +Severity : normal +Frequency : rare +Bugzilla : 12470 +Description: server LBUG when using old ost_num_threads parameter +Details : Accept the old ost_num_threads parameter but warn that it + is deprecated, and fix an off-by-one error that caused an LBUG. + +Severity : normal +Frequency : rare +Bugzilla : 11722 +Description: Transient SCSI error results in persistent IO issue +Details : iobuf->dr_error is not reinitialized to 0 between two + uses. + +Severity : normal +Frequency : sometimes when underlying device returns I/O errors +Bugzilla : 11743 +Description: OSTs not going read-only during write failures +Details : OSTs are not remounted read-only when the journal commit threads + get I/O errors because fsfilt_ext3 calls journal_start/stop() + instead of the ext3 wrappers. + +Severity : minor +Bugzilla : 12364 +Description: poor connect scaling with increasing client count +Details : Don't run filter_grant_sanity_check for more than 100 exports + to improve scaling for large numbers of clients. + +Severity : normal +Frequency : SLES10 only +Bugzilla : 12538 +Description: sanity-quota.sh quotacheck failed: rc = -22 +Details : Quotas cannot be enabled on SLES10. + Severity : normal Frequency : liblustre clients only Bugzilla : 12229 @@ -88,6 +135,189 @@ Details : the default for reading the health_check proc file has changed is possible to return to the previous behaviour during configure with --enable-health-write. +Severity : enhancement +Bugzilla : 10768 +Description: 64-bit inode version +Details: : Add a on-disk 64-bit inode version for ext3 to track changes made + to the inode. This will be required for version-based recovery. + +Severity : normal +Frequency : rare +Bugzilla : 11818 +Description: MDS fails to start if a duplicate client export is detected +Details : in some rare cases it was possible for a client to connect to + an MDS multiple times. Upon recovery the MDS would detect this + and fail during startup. Handle this more gracefully. + +Severity : enhancement +Bugzilla : 11563 +Description: Add -o localflock option to simulate old noflock +behaviour. +Details : This will achieve local-only flock/fcntl locks + coherentness. + +Severity : minor +Frequency : rare +Bugzilla : 11658 +Description: log_commit_thread vs filter_destroy race leads to crash +Details : Take import reference before releasing llog record semaphore + +Severity : normal +Frequency : rare +Bugzilla : 12477 +Description: Wrong request locking in request set processing +Details : ptlrpc_check_set wrongly uses req->rq_lock for proctect add to + imp_delayed_list, in this place should be used imp_lock. + +Severity : normal +Frequency : when reconnection +Bugzilla : 11662 +Description: Grant Leak when osc reconnect to OST +Details : When osc reconnect ost, OST(filter) should check whether it + should grant more space to client by comparing fed_grant and + cl_avail_grant, and return the granted space to client instead + of "new granted" space, because client will call osc_init_grant + to update the client grant space info. + +Severity : normal +Frequency : when client reconnect to OST +Bugzilla : 11662 +Description: Grant Leak when osc do resend and replay bulk write +Details : When osc reconnect to OST, OST(filter)should clear grant info of + bulk write request, because the grant info while be sync between + OSC and OST when reconnect, and we should ignore the grant info + these of resend/replay write req. + +Severity : normal +Frequency : rare +Bugzilla : 11662 +Description: Grant space more than avaiable left space sometimes. +Details : When then OST is about to be full, if two bulk writing from + different clients came to OST. Accord the avaliable space of the + OST, the first req should be permitted, and the second one + should be denied by ENOSPC. But if the seconde arrived before + the first one is commited. The OST might wrongly permit second + writing, which will cause grant space > avaiable space. + +Severity : normal +Frequency : when client is evicted +Bugzilla : 12371 +Description: Grant might be wrongly erased when osc is evicted by OST +Details : when the import is evicted by server, it will fork another + thread ptlrpc_invalidate_import_thread to invalidate the + import, where the grant will be set to 0. While the original + thread will update the grant it got when connecting. So if + the former happened latter, the grant will be wrongly errased + because of this race. + +Severity : normal +Frequency : rare +Bugzilla : 12401 +Description: Checking Stale with correct fid +Details : ll_revalidate_it should uses de_inode instead of op_data.fid2 + to check whether it is stale, because sometimes, we want the + enqueue happened anyway, and op_data.fid2 will not be initialized. + +Severity : enhancement +Bugzilla : 11647 +Description: update patchless client +Details : Add support for patchless client with 2.6.20, 2.6.21 and RHEL 5 + +Severity : normal +Frequency : only with 2.4 kernel +Bugzilla : 12134 +Description: random memory corruption +Details : size of struct ll_inode_info is to big for union inode.u and this + can be cause of random memory corruption. + +Severity : normal +Frequency : rare +Bugzilla : 10818 +Description: Memory leak in recovery +Details : Lov_mds_md was not free in an error handler in mds_create_object. + It should also check obd_fail before fsfilt_start, otherwise if + fsfilt_start return -EROFS,(failover mds during mds recovery). + then the req will return with repmsg->transno = 0 and rc = EROFS. + and we met hit the assert LASSERT(req->rq_reqmsg->transno == + req->rq_repmsg->transno) in ptlrpc_replay_interpret. Fcc should + be freed no matter whether fsfilt_commit success or not. + +Severity : minor +Frequency : only with huge count clients +Bugzilla : 11817 +Description: Prevents from taking the superblock lock in llap_from_page for + a soon died page. +Details : using LL_ORIGIN_REMOVEPAGE origin flag instead of LL_ORIGIN_UNKNOW + for llap_from_page call in ll_removepage prevents from taking the + superblock lock for a soon died page. + +Severity : normal +Frequency : rare +Bugzilla : 11935 +Description: Not check open intent error before release open handle +Details : in some rare cases, the open intent error is not checked before + release open handle, which may cause + ASSERTION(open_req->rq_transno != 0), because it tries to release + the failed open handle. + +Severity : normal +Frequency : rare +Bugzilla : 12556 +Description: Set cat log bitmap only after create log success. +Details : in some rare cases, the cat log bitmap is set too early. and it + should be set only after create log success. + +Severity : major +Bugzilla : 11971 +Description: Accessing a block bevice can re-enable I/O when Lustre is + tearing down a device. +Details : dev_clear_rdonly(bdev) must be called in kill_bdev() instead of + blkdev_put(). + +Severity : minor +Bugzilla : 11706 +Description: service threads may hog cpus when there are a lot of requests + coming +Details : Insert cond_resched to give other threads a chance to use some of + the cpu + +Severity : normal +Frequency : rare +Bugzilla : 12086 +Description: the cat log was not initialized in recovery +Details : When mds(mgs) do recovery, the tgt_count might be zero, so the + unlink log on mds will not be initialized until mds post + recovery. And also in mds post recovery, the unlink log will + initialization will be done asynchronausly, so there will be race + between add unlink log and unlink log initialization. + +Severity : normal +Bugzilla : 12597 +Description: brw_stats were being printed incorrectly +Details : brw_stats were being printed as log2 but all of them were not + recorded as log2. Also remove some code duplication arising from + filter_tally_{read,write}. + +Severity : normal +Bugzilla : 11674 +Frequency : rare, only in recovery. +Description: ASSERTION(req->rq_type != LI_POISON) failed +Details : imp_lock should be held while iterating over imp_sending_list for + prevent destroy request after get timeout in ptlrpc_queue_wait. + +Severity : normal +Bugzilla : 12689 +Description: replay-single.sh test 52 fails +Details : A lock's skiplist need to be cleanup when it being unlinked + from its resource list. + +Severity : normal +Bugzilla : 11737 +Description: Short directio read returns full requested size rather than + actual amount read. +Details : Direct I/O operations should return actual amount of bytes + transferred rather than requested size. + -------------------------------------------------------------------------------- 2007-05-03 Cluster File Systems, Inc. @@ -117,24 +347,15 @@ Details : The __iget() symbol export is missing. To avoid the need for * CONFIGURATION CHANGE. This version of Lustre WILL NOT INTEROPERATE with older versions automatically. In many cases a special upgrade step is needed. Please read the - user documentation before upgrading any part of a 1.4.x system. + user documentation before upgrading any part of a live system. + * WIRE PROTOCOL CHANGE from previous 1.6 beta versions. This + version will not interoperate with 1.6 betas before beta5 (1.5.95). * WARNING: Lustre configuration and startup changes are required with this release. See https://mail.clusterfs.com/wikis/lustre/MountConf for details. - * Support for kernels: - 2.4.21-47.0.1.EL (RHEL 3) - 2.6.5-7.283 (SLES 9) - 2.6.9-42.0.10.EL (RHEL 4) - 2.6.12.6 vanilla (kernel.org) - 2.6.16.27-0.9 (SLES10) - * Client support for unpatched kernels: - (see https://mail.clusterfs.com/wikis/lustre/PatchlessClient) - 2.6.16 - 2.6.19 vanilla (kernel.org) - 2.6.9-42.0.8EL (RHEL 4) - * Recommended e2fsprogs version: 1.39.cfs6 - * Note that reiserfs quotas are disabled on SLES 10 in this kernel * bug fixes + Severity : enhancement Bugzilla : 8007 Description: MountConf @@ -154,7 +375,7 @@ Bugzilla : 9851 Description: startup order invariance Details : MDTs and OSTs can be started in any order. Clients only require the MDT to complete startup. - + Severity : enhancement Bugzilla : 4899 Description: parallel, asynchronous orphan cleanup @@ -166,15 +387,15 @@ Bugzilla : 9862 Description: optimized stripe assignment Details : stripe assignments are now made based on ost space available, ost previous usage, and OSS previous usage, in order to try - to optimize storage space and networking resources. - + to optimize storage space and networking resources. + Severity : enhancement Bugzilla : 4226 Description: Permanently set tunables Details : All writable /proc/fs/lustre tunables can now be permanently - set on a per-server basis, at mkfs time or on a live + set on a per-server basis, at mkfs time or on a live system. - + Severity : enhancement Bugzilla : 10547 Description: Lustre message v2 @@ -186,81 +407,37 @@ Description: client OST exclusion list Details : Clients can be started with a list of OSTs that should be declared "inactive" for known non-responsive OSTs. -Severity : normal -Bugzilla : 12123 -Description: ENOENT returned for valid filehandle during dbench. -Details : Check if a directory has children when invalidating dentries - associated with an inode during lock cancellation. This fixes - an incorrect ENOENT sometimes seen for valid filehandles during - testing with dbench. - Severity : minor -Frequency : SFS test only (otherwise harmless) Bugzilla : 6062 Description: SPEC SFS validation failure on NFS v2 over lustre. Details : Changes the blocksize for regular files to be 2x RPC size, - and not depend on stripe size. - -Severity : enhancement -Bugzilla : 10088 -Description: fine-grained SMP locking inside DLM -Details : Improve DLM performance on SMP systems by removing the single - per-namespace lock and replace it with per-resource locks. - -Severity : enhancement -Bugzilla : 9332 -Description: don't hold multiple extent locks at one time -Details : To avoid client eviction during large writes, locks are not - held on multiple stripes at one time or for very large writes. - Otherwise, clients can block waiting for a lock on a failed OST - while holding locks on other OSTs and be evicted. - + and not depend on stripe size. + Severity : enhancement Bugzilla : 9293 Description: Multiple MD RPCs in flight. -Details : Further unserialise some read-only MDT RPCs - learn about intents. - To avoid overly-overloading MDT, introduce a limit on number of - MDT RPCs in flight for a single client and add /proc controls - to adjust this limit. +Details : Further unserialise some read-only MDS RPCs - learn about intents. + To avoid overly-overloading MDS, introduce a limit on number of + MDS RPCs in flight for a single client and add /proc controls + to adjust this limit. Severity : enhancement Bugzilla : 22484 Description: client read/write statistics Details : Add client read/write call usage stats for performance - analysis of user processes. + analysis of user processes. /proc/fs/lustre/llite/*/offset_stats shows non-sequential file access. extents_stats shows chunk size distribution. extents_stats_per_process show chunk size distribution per - user process. - -Severity : enhancement -Bugzilla : 22485 -Description: per-client statistics on server -Details : Add ldlm and operations statistics for each client in - /proc/fs/lustre/mds|obdfilter/*/exports/ + user process. Severity : enhancement Bugzilla : 22486 -Description: improved MDT statistics -Details : Add detailed MDT operations statistics in - /proc/fs/lustre/mds/*/stats - -Severity : enhancement -Bugzilla : 10968 -Description: VFS operations stats -Details : Add client VFS call stats, trackable by pid, ppid, or gid - /proc/fs/lustre/llite/*/vfs_ops_stats - /proc/fs/lustre/llite/*/vfs_track_[pid|ppid|gid] +Description: mds statistics +Details : Add detailed mds operations statistics in + /proc/fs/lustre/mds/*/stats. Severity : minor -Frequency : always -Bugzilla : 6380 -Description: Fix client-side osc byte counters -Details : The osc read/write byte counters in - /proc/fs/lustre/osc/*/stats are now working - -Severity : minor -Frequency : always as root on SLES Bugzilla : 10667 Description: Failure of copying files with lustre special EAs. Details : Client side always return success for setxattr call for lustre @@ -780,7 +957,6 @@ Details : the performance loss is caused by using of write barriers in the ext3 code. The SLES10 kernel turns barrier support on by default. The fix is to undo that change for ldiskfs. - ------------------------------------------------------------------------------ 2006-12-09 Cluster File Systems, Inc. @@ -997,13 +1173,13 @@ Details : The number of open files grows over time, whether or not ------------------------------------------------------------------------------ -2006-08-20 Cluster File Systems, Inc. +08-20-2006 Cluster File Systems, Inc. * version 1.4.7 * Support for kernels: - 2.6.9-42.EL (RHEL 4) - 2.6.5-7.267 (SLES 9) - 2.4.21-40.EL (RHEL 3) - 2.6.12.6 vanilla (kernel.org) + 2.6.9-42.EL (RHEL 4) + 2.6.5-7.276 (SLES 9) + 2.4.21-40.EL (RHEL 3) + 2.6.12.6 vanilla (kernel.org) * bug fixes Severity : major @@ -1034,14 +1210,6 @@ Details : If setting attributes on a file created under NFS that had never been opened it would be possible to oops the client if the file had no objects. -Severity : major -Frequency : rare -Bugzilla : 9326, 10402, 10897 -Description: client crash in ptlrpcd_wake() thread when sending async RPC -Details : It is possible that ptlrpcd_wake() dereferences a freed async - RPC. In rare cases the ptlrpcd thread alread processed the RPC - before ptlrpcd_wake() was called and the request was freed. - Severity : minor Frequency : always for liblustre Bugzilla : 10290 @@ -1071,7 +1239,7 @@ Details : If an OST is started on a device that has never been formatted or if the filesystem is corrupt and cannot even mount then the error handling cleanup routines would dereference a NULL pointer. -Severity : normal +Severity : medium Frequency : rare Bugzilla : 10047 Description: NULL pointer deref in llap_from_page. @@ -1090,7 +1258,7 @@ Details : sendfile called ll_readpage without right page locks present. around call to generic_file_sendfile() much like we do in ll_file_read(). -Severity : normal +Severity : medium Frequency : with certain MDS communication failures at client mount time Bugzilla : 10268 Description: NULL pointer deref after failed client mount @@ -1101,7 +1269,7 @@ Details : a client connection request may delayed by the network layer reference from the request import to the obd device and delay the cleanup until the network drops the request. -Severity : normal +Severity : medium Frequency : occasionally during client (re)connect Bugzilla : 9387 Description: assertion failure during client (re)connect @@ -1112,7 +1280,7 @@ Details : processing a client connection request may be delayed by the client may trip an assertion failure in ptlrpc_connect_interpret() which thought it would be the only running connect process. -Severity : normal +Severity : medium Frequency : only with obd_echo servers and clients that are rebooted Bugzilla : 10140 Description: kernel BUG accessing uninitialized data structure @@ -1149,7 +1317,7 @@ Description: Support NFS exporting on 2.6 kernels. Details : Implement non-rawops metadata methods for NFS server to use without changing NFS server code. -Severity : normal +Severity : medium Frequency : very rare (synthetic metadata workload only) Bugzilla : 9974 Description: two racing renames might cause an MDS thread to deadlock @@ -1278,7 +1446,7 @@ Details : Kenrel kmalloc limits allocations to 128kB and this prevents Severity : critical Frequency : Always, for 32-bit kernel without CONFIG_LBD and filesystem > 2TB Bugzilla : 6191 -Description: filesystem corruption for non-standard kernels and very large OSTs +Description: ldiskfs crash at mount for filesystem larger than 2TB with mballoc Details : If a 32-bit kernel is compiled without CONFIG_LBD enabled and a filesystems larger than 2TB is mounted then the kernel will silently corrupt the start of the filesystem. CONFIG_LBD is @@ -1336,6 +1504,15 @@ Details : protect inode from truncation within vfs_unlink() context just take a reference before calling vfs_unlink() and release it when parent's i_sem is free. +Severity : major +Frequency : rare +Bugzilla : 4778 +Description: last_id value checked outside lock on OST caused LASSERT failure +Details : If there were multiple MDS->OST object precreate requests in + flight, it was possible that the OST's last object id was checked + outside a lock and incorrectly tripped an assertion. Move checks + inside locks, and discard old precreate requests. + Severity : minor Frequency : always, if extents are used on OSTs Bugzilla : 10703 @@ -1355,14 +1532,6 @@ Details : Using a 32-bit jiffies timer with HZ=1000 may cause backup import connections to be ignored if the 32-bit jiffies counter wraps. Use a 64-bit jiffies counter. -Severity : major -Frequency : during server recovery -Bugzilla : 10479 -Description: crash after server is denying duplicate export -Details : If clients are resending connect requests to the server, the - server refuses to allow a client to connect multiple times. - Fixed a bug in the handling of this case. - Severity : minor Frequency : very large clusters immediately after boot Bugzilla : 10083 @@ -1378,11 +1547,11 @@ Frequency : Sometimes during replay Bugzilla : 9314 Description: Assertion failure in ll_local_open after replay. Details : If replay happened on an open request reply before we were able - to set replay handler, reply will become not swabbed tripping the - assertion in ll_local_open. Now we set the handler right after - recognising of open request + to set replay handler, reply will become not swabbed tripping the + assertion in ll_local_open. Now we set the handler right after + recognising of open request -Severity : minor +Severity : trivial Frequency : very rare Bugzilla : 10584 Description: kernel reports "badness in vsnprintf" @@ -1409,7 +1578,7 @@ Severity : minor Frequency : always Bugzilla : 10611 Description: Inability to activate failout mode -Details : lconf script incorrectly assumed that in python string's numeric +Details : lconf script incorrectly assumed that in pythong string's numeric value is used in comparisons. Severity : minor @@ -1421,7 +1590,7 @@ Details : When selecting which OSTs to stripe files over, for files with the MDS is always picking the same starting OST for each file. Return the OST selection heuristic to the original design. -Severity : minor +Severity : trivial Frequency : rare Bugzilla : 10673 Description: mount failures may take full timeout to return an error @@ -1429,10 +1598,46 @@ Details : Under some heavy load conditions it is possible that a failed mount can wait for the full obd_timeout interval, possibly several minutes, before reporting an error. Instead return an error as soon as the status is known. +Severity : major +Frequency : quota enabled and large files being deleted +Bugzilla : 10707 +Description: releasing more than 4GB of quota at once hangs OST +Details : If a user deletes more than 4GB of files on a single OST it + will cause the OST to spin in an infinite loop. Release + quota in < 4GB chunks, or use a 64-bit value for 1.4.7.1+. + +Severity : trivial +Frequency : rare +Bugzilla : 10845 +Description: statfs data retrieved from /proc may be stale or zero +Details : When reading per-device statfs data from /proc, in the + {kbytes,files}_{total,free,avail} files, it may appear + as zero or be out of date. + +Severity : trivial +Frequency : systems with MD RAID1 external journal devices +Bugzilla : 10832 +Description: lconf's call to blkid is confused by RAID1 journal devices +Details : Use the "blkid -l" flag to locate the MD RAID device instead + of returning all block devices that match the journal UUID. + +Severity : normal +Frequency : always, for aggregate stripe size over 4GB +Bugzilla : 10725 +Description: assertion fails when trying to use 4GB stripe size +Details : Use "setstripe" to set stripe size over 4GB will fail the kernel, + complaining "ASSERTION(lsm->lsm_xfersize != 0)" + +Severity : normal +Frequency : always on ppc64 +Bugzilla : 10634 +Description: the first write on an ext3 filesystem with mballoc got stuck +Details : ext3_mb_generate_buddy() uses find_next_bit() which does not + perform endianness conversion. ------------------------------------------------------------------------------ -2006-02-14 Cluster File Systems, Inc. +02-14-2006 Cluster File Systems, Inc. * version 1.4.6 * WIRE PROTOCOL CHANGE. This version of Lustre networking WILL NOT INTEROPERATE with older versions automatically. Please read the @@ -1476,7 +1681,7 @@ Description: Configuration change for the XT3 Rather --with-portals= is used to enable building on the XT3. In addition to enable XT3 specific features the option --enable-cray-xt3 must be used. - + Severity : major Frequency : rare Bugzilla : 7407 @@ -2057,7 +2262,7 @@ Details : Add sub-command 'df' on 'lfs' to report the disk space usage of ------------------------------------------------------------------------------ -2005-08-26 Cluster File Systems, Inc. +08-26-2005 Cluster File Systems, Inc. * version 1.4.5 * bug fixes @@ -3408,7 +3613,7 @@ tbd Cluster File Systems, Inc. * add hard link support * change obdfile creation method * kernel patch changed - + 2002-09-19 Peter Braam * version 0_5_9 * bug fix diff --git a/lustre/Makefile.in b/lustre/Makefile.in index cca2b21..82c5433 100644 --- a/lustre/Makefile.in +++ b/lustre/Makefile.in @@ -1,5 +1,4 @@ -@LDISKFS_TRUE@subdir-m += ldiskfs ldiskfs2 - +subdir-m += fid subdir-m += lvfs subdir-m += obdclass subdir-m += lov @@ -8,8 +7,8 @@ subdir-m += osc subdir-m += obdecho subdir-m += mgc -@SERVER_TRUE@subdir-m += mds obdfilter ost mgs -@CLIENT_TRUE@subdir-m += mdc llite +@SERVER_TRUE@subdir-m += mds obdfilter ost mgs mdt cmm mdd osd +@CLIENT_TRUE@subdir-m += mdc lmv llite fld @QUOTA_TRUE@subdir-m += quota @INCLUDE_RULES@ diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am index b2097ea..6be5e69 100644 --- a/lustre/autoMakefile.am +++ b/lustre/autoMakefile.am @@ -7,11 +7,11 @@ AUTOMAKE_OPTIONS = foreign # also update lustre/autoconf/lustre-core.m4 AC_CONFIG_FILES ALWAYS_SUBDIRS := include lvfs obdclass ldlm ptlrpc osc lov obdecho \ - mgc doc utils tests scripts autoconf contrib + mgc fid fld doc utils tests scripts autoconf contrib -SERVER_SUBDIRS := ldiskfs ldiskfs2 obdfilter ost mds mgs +SERVER_SUBDIRS := obdfilter ost mds mgs mdt cmm mdd osd -CLIENT_SUBDIRS := mdc llite +CLIENT_SUBDIRS := mdc lmv llite QUOTA_SUBDIRS := quota @@ -44,14 +44,6 @@ DIST_SUBDIRS := $(ALWAYS_SUBDIRS) $(SERVER_SUBDIRS) $(CLIENT_SUBDIRS) \ EXTRA_DIST = BUGS FDL kernel_patches -if LDISKFS -LDISKFS = ldiskfs-sources ldiskfs2-sources -ldiskfs-sources: - $(MAKE) sources -C ldiskfs -ldiskfs2-sources: - $(MAKE) sources -C ldiskfs2 -endif - lvfs-sources: $(MAKE) sources -C lvfs obdclass-sources: @@ -61,7 +53,7 @@ sources: $(LDISKFS) lvfs-sources obdclass-sources lustre_build_version all-recursive: lustre_build_version -BUILD_VER_H=$(top_builddir)/lustre/include/linux/lustre_build_version.h +BUILD_VER_H=$(top_builddir)/lustre/include/lustre/lustre_build_version.h lustre_build_version: perl $(top_builddir)/lustre/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver diff --git a/lustre/autoconf/Makefile.am b/lustre/autoconf/Makefile.am index 7a747da..78a6511 100644 --- a/lustre/autoconf/Makefile.am +++ b/lustre/autoconf/Makefile.am @@ -1 +1 @@ -EXTRA_DIST := lustre-core.m4 lustre-version.ac +EXTRA_DIST := lustre-core.m4 lustre-version.ac kerberos5.m4 diff --git a/lustre/autoconf/kerberos5.m4 b/lustre/autoconf/kerberos5.m4 new file mode 100644 index 0000000..1dac9f0 --- /dev/null +++ b/lustre/autoconf/kerberos5.m4 @@ -0,0 +1,105 @@ +dnl Checks for Kerberos +dnl NOTE: while we intend to do generic gss-api, currently we +dnl have a requirement to get an initial Kerberos machine +dnl credential. Thus, the requirement for Kerberos. +dnl The Kerberos gssapi library will be dynamically loaded? +AC_DEFUN([AC_KERBEROS_V5],[ + AC_MSG_CHECKING(for Kerberos v5) + AC_ARG_WITH(krb5, + [AC_HELP_STRING([--with-krb5=DIR], [use Kerberos v5 installation in DIR])], + [ case "$withval" in + yes|no) + krb5_with="" + ;; + *) + krb5_with="$withval" + ;; + esac ] + ) + + for dir in $krb5_with /usr /usr/kerberos /usr/local /usr/local/krb5 \ + /usr/krb5 /usr/heimdal /usr/local/heimdal /usr/athena ; do + dnl This ugly hack brought on by the split installation of + dnl MIT Kerberos on Fedora Core 1 + K5CONFIG="" + if test -f $dir/bin/krb5-config; then + K5CONFIG=$dir/bin/krb5-config + elif test -f "/usr/kerberos/bin/krb5-config"; then + K5CONFIG="/usr/kerberos/bin/krb5-config" + elif test -f "/usr/lib/mit/bin/krb5-config"; then + K5CONFIG="/usr/lib/mit/bin/krb5-config" + fi + if test "$K5CONFIG" != ""; then + KRBCFLAGS=`$K5CONFIG --cflags` + KRBLIBS=`$K5CONFIG --libs gssapi` + K5VERS=`$K5CONFIG --version | head -n 1 | awk '{split($(4),v,"."); if (v@<:@"3"@:>@ == "") v@<:@"3"@:>@ = "0"; print v@<:@"1"@:>@v@<:@"2"@:>@v@<:@"3"@:>@ }'` + AC_DEFINE_UNQUOTED(KRB5_VERSION, $K5VERS, [Define this as the Kerberos version number]) + if test -f $dir/include/gssapi/gssapi_krb5.h -a \ + \( -f $dir/lib/libgssapi_krb5.a -o \ + -f $dir/lib/libgssapi_krb5.so \) ; then + AC_DEFINE(HAVE_KRB5, 1, [Define this if you have MIT Kerberos libraries]) + KRBDIR="$dir" + dnl If we are using MIT K5 1.3.1 and before, we *MUST* use the + dnl private function (gss_krb5_ccache_name) to get correct + dnl behavior of changing the ccache used by gssapi. + dnl Starting in 1.3.2, we *DO NOT* want to use + dnl gss_krb5_ccache_name, instead we want to set KRB5CCNAME + dnl to get gssapi to use a different ccache + if test $K5VERS -le 131; then + AC_DEFINE(USE_GSS_KRB5_CCACHE_NAME, 1, [Define this if the private function, gss_krb5_cache_name, must be used to tell the Kerberos library which credentials cache to use. Otherwise, this is done by setting the KRB5CCNAME environment variable]) + fi + gssapi_lib=gssapi_krb5 + break + dnl The following ugly hack brought on by the split installation + dnl of Heimdal Kerberos on SuSe + elif test \( -f $dir/include/heim_err.h -o\ + -f $dir/include/heimdal/heim_err.h \) -a \ + -f $dir/lib/libroken.a; then + AC_DEFINE(HAVE_HEIMDAL, 1, [Define this if you have Heimdal Kerberos libraries]) + KRBDIR="$dir" + gssapi_lib=gssapi + break + fi + fi + done + dnl We didn't find a usable Kerberos environment + if test "x$KRBDIR" = "x"; then + if test "x$krb5_with" = "x"; then + AC_MSG_ERROR(Kerberos v5 with GSS support not found: consider --disable-gss or --with-krb5=) + else + AC_MSG_ERROR(Kerberos v5 with GSS support not found at $krb5_with) + fi + fi + AC_MSG_RESULT($KRBDIR) + + dnl Check if -rpath=$(KRBDIR)/lib is needed + echo "The current KRBDIR is $KRBDIR" + if test "$KRBDIR/lib" = "/lib" -o "$KRBDIR/lib" = "/usr/lib" \ + -o "$KRBDIR/lib" = "//lib" -o "$KRBDIR/lib" = "/usr//lib" ; then + KRBLDFLAGS=""; + elif /sbin/ldconfig -p | grep > /dev/null "=> $KRBDIR/lib/"; then + KRBLDFLAGS=""; + else + KRBLDFLAGS="-Wl,-rpath=$KRBDIR/lib" + fi + + dnl Now check for functions within gssapi library + AC_CHECK_LIB($gssapi_lib, gss_krb5_export_lucid_sec_context, + AC_DEFINE(HAVE_LUCID_CONTEXT_SUPPORT, 1, [Define this if the Kerberos GSS library supports gss_krb5_export_lucid_sec_context]), ,$KRBLIBS) + AC_CHECK_LIB($gssapi_lib, gss_krb5_set_allowable_enctypes, + AC_DEFINE(HAVE_SET_ALLOWABLE_ENCTYPES, 1, [Define this if the Kerberos GSS library supports gss_krb5_set_allowable_enctypes]), ,$KRBLIBS) + AC_CHECK_LIB($gssapi_lib, gss_krb5_ccache_name, + AC_DEFINE(HAVE_GSS_KRB5_CCACHE_NAME, 1, [Define this if the Kerberos GSS library supports gss_krb5_ccache_name]), ,$KRBLIBS) + + dnl If they specified a directory and it didn't work, give them a warning + if test "x$krb5_with" != "x" -a "$krb5_with" != "$KRBDIR"; then + AC_MSG_WARN(Using $KRBDIR instead of requested value of $krb5_with for Kerberos!) + fi + + AC_SUBST([KRBDIR]) + AC_SUBST([KRBLIBS]) + AC_SUBST([KRBCFLAGS]) + AC_SUBST([KRBLDFLAGS]) + AC_SUBST([K5VERS]) + +]) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index cce0a4b..be52ff9 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1,4 +1,5 @@ #* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- +#* vim:expandtab:shiftwidth=8:tabstop=8: # # LC_CONFIG_SRCDIR # @@ -7,7 +8,7 @@ AC_DEFUN([LC_CONFIG_SRCDIR], [AC_CONFIG_SRCDIR([lustre/obdclass/obdo.c]) ]) - + # # LC_PATH_DEFAULTS # @@ -303,82 +304,34 @@ kernel patches from Lustre version 1.4.3 or above.]) # # LC_CONFIG_BACKINGFS # -# whether to use ldiskfs instead of ext3 +# setup, check the backing filesystem # AC_DEFUN([LC_CONFIG_BACKINGFS], [ -BACKINGFS='ext3' - -# 2.6 gets ldiskfs -AC_MSG_CHECKING([whether to enable ldiskfs]) -AC_ARG_ENABLE([ldiskfs], - AC_HELP_STRING([--enable-ldiskfs], - [use ldiskfs for the Lustre backing FS]), - [],[enable_ldiskfs="$linux25"]) -AC_MSG_RESULT([$enable_ldiskfs]) - -if test x$enable_ldiskfs = xyes ; then - BACKINGFS="ldiskfs" +BACKINGFS="ldiskfs" - AC_MSG_CHECKING([whether to enable quilt for making ldiskfs]) - AC_ARG_ENABLE([quilt], - AC_HELP_STRING([--disable-quilt],[disable use of quilt for ldiskfs]), - [],[enable_quilt='yes']) - AC_MSG_RESULT([$enable_quilt]) +if test x$with_ldiskfs = xno ; then + BACKINGFS="ext3" - AC_PATH_PROG(PATCH, patch, [no]) - - if test x$enable_quilt = xno ; then - QUILT="no" - else - AC_PATH_PROG(QUILT, quilt, [no]) + if test x$linux25$enable_server = xyesyes ; then + AC_MSG_ERROR([ldiskfs is required for 2.6-based servers.]) fi - if test x$enable_ldiskfs$PATCH$QUILT = xyesnono ; then - AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)]) - fi - - AC_DEFINE(CONFIG_LDISKFS_FS_MODULE, 1, [build ldiskfs as a module]) - AC_DEFINE(CONFIG_LDISKFS_FS_XATTR, 1, [enable extended attributes for ldiskfs]) - AC_DEFINE(CONFIG_LDISKFS_FS_POSIX_ACL, 1, [enable posix acls for ldiskfs]) - AC_DEFINE(CONFIG_LDISKFS_FS_SECURITY, 1, [enable fs security for ldiskfs]) - - AC_DEFINE(CONFIG_LDISKFS2_FS_XATTR, 1, [enable extended attributes for ldiskfs2]) - AC_DEFINE(CONFIG_LDISKFS2_FS_POSIX_ACL, 1, [enable posix acls for ldiskfs2]) - AC_DEFINE(CONFIG_LDISKFS2_FS_SECURITY, 1, [enable fs security for ldiskfs2]) -fi + # --- Check that ext3 and ext3 xattr are enabled in the kernel + LC_CONFIG_EXT3([],[ + AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel]) + ],[ + AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel]) + AC_MSG_WARN([This build may fail.]) + ]) +else + # ldiskfs is enabled + LB_DEFINE_LDISKFS_OPTIONS +fi #ldiskfs AC_MSG_CHECKING([which backing filesystem to use]) AC_MSG_RESULT([$BACKINGFS]) AC_SUBST(BACKINGFS) - -case $BACKINGFS in - ext3) - # --- Check that ext3 and ext3 xattr are enabled in the kernel - LC_CONFIG_EXT3([],[ - AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel]) - ],[ - AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel]) - AC_MSG_WARN([This build may fail.]) - ]) - ;; - ldiskfs) - AC_MSG_CHECKING([which ldiskfs series to use]) - case $LINUXRELEASE in - 2.6.5*) LDISKFS_SERIES="2.6-suse.series" ;; - 2.6.9*) LDISKFS_SERIES="2.6-rhel4.series" ;; - 2.6.10-ac*) LDISKFS_SERIES="2.6-fc3.series" ;; - 2.6.10*) LDISKFS_SERIES="2.6-rhel4.series" ;; - 2.6.12*) LDISKFS_SERIES="2.6.12-vanilla.series" ;; - 2.6.15*) LDISKFS_SERIES="2.6-fc5.series";; - 2.6.16*) LDISKFS_SERIES="2.6-sles10.series";; - 2.6.18*) LDISKFS_SERIES="2.6.18-vanilla.series";; - *) AC_MSG_WARN([Unknown kernel version $LINUXRELEASE, fix lustre/autoconf/lustre-core.m4]) - esac - AC_MSG_RESULT([$LDISKFS_SERIES]) - AC_SUBST(LDISKFS_SERIES) - ;; -esac # $BACKINGFS ]) # @@ -401,16 +354,16 @@ fi # # LC_CONFIG_HEALTH_CHECK_WRITE # -# Turn on the actual write to the disk +# Turn off the actual write to the disk # AC_DEFUN([LC_CONFIG_HEALTH_CHECK_WRITE], [AC_MSG_CHECKING([whether to enable a write with the health check]) -AC_ARG_ENABLE([health-write], - AC_HELP_STRING([--enable-health-write], +AC_ARG_ENABLE([health_write], + AC_HELP_STRING([--enable-health_write], [enable disk writes when doing health check]), [],[enable_health_write='no']) AC_MSG_RESULT([$enable_health_write]) -if test x$enable_health_write == xyes ; then +if test x$enable_health_write != xno ; then AC_DEFINE(USE_HEALTH_CHECK_WRITE, 1, Write when Checking Health) fi ]) @@ -521,7 +474,7 @@ LB_LINUX_TRY_COMPILE([ #include #include ],[ - filemap_populate(NULL, 0, 0, __pgprot(0), 0, 0); + filemap_populate(NULL, 0, 0, __pgprot(0), 0, 0); ],[ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_FILEMAP_POPULATE, 1, [Kernel exports filemap_populate]) @@ -614,22 +567,21 @@ $1 ],[ AC_MSG_RESULT([no]) ]) +]) # # LC_EXPORT___IGET # starting from 2.6.19 linux kernel exports __iget() # AC_DEFUN([LC_EXPORT___IGET], -[AC_MSG_CHECKING([if kernel exports __iget]) - if grep -q "EXPORT_SYMBOL(__iget)" $LINUX/fs/inode.c 2>/dev/null ; then - AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget]) - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi +[LB_CHECK_SYMBOL_EXPORT([__iget], +[fs/inode.c],[ + AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget]) +],[ ]) ]) + AC_DEFUN([LC_LUSTRE_VERSION_H], [LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[ rm -f "$LUSTRE/include/linux/lustre_version.h" @@ -639,25 +591,84 @@ AC_DEFUN([LC_LUSTRE_VERSION_H], AC_MSG_WARN([Unpatched kernel detected.]) AC_MSG_WARN([Lustre servers cannot be built with an unpatched kernel;]) AC_MSG_WARN([disabling server build]) - enable_server='no' + enable_server='no' fi ]) ]) AC_DEFUN([LC_FUNC_SET_FS_PWD], -[AC_MSG_CHECKING([if kernel exports show_task]) -have_show_task=0 - if grep -q "EXPORT_SYMBOL(show_task)" \ - "$LINUX/fs/namespace.c" 2>/dev/null ; then - AC_DEFINE(HAVE_SET_FS_PWD, 1, [set_fs_pwd is exported]) - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi +[LB_CHECK_SYMBOL_EXPORT([set_fs_pwd], +[fs/namespace.c],[ + AC_DEFINE(HAVE_SET_FS_PWD, 1, [set_fs_pwd is exported]) +],[ +]) ]) +# +# LC_CAPA_CRYPTO +# +AC_DEFUN([LC_CAPA_CRYPTO], +[LB_LINUX_CONFIG_IM([CRYPTO],[],[ + AC_MSG_ERROR([Lustre capability require that CONFIG_CRYPTO is enabled in your kernel.]) +]) +LB_LINUX_CONFIG_IM([CRYPTO_HMAC],[],[ + AC_MSG_ERROR([Lustre capability require that CONFIG_CRYPTO_HMAC is enabled in your kernel.]) +]) +LB_LINUX_CONFIG_IM([CRYPTO_SHA1],[],[ + AC_MSG_ERROR([Lustre capability require that CONFIG_CRYPTO_SHA1 is enabled in your kernel.]) +]) +]) + +m4_pattern_allow(AC_KERBEROS_V5) # +# LC_CONFIG_GSS +# +# Build gss and related tools of Lustre. Currently both kernel and user space +# parts are depend on linux platform. +# +AC_DEFUN([LC_CONFIG_GSS], +[AC_MSG_CHECKING([whether to enable gss/krb5 support]) +AC_ARG_ENABLE([gss], + AC_HELP_STRING([--enable-gss], [enable gss/krb5 support]), + [],[enable_gss='no']) +AC_MSG_RESULT([$enable_gss]) + +if test x$enable_gss == xyes; then + LB_LINUX_CONFIG_IM([SUNRPC],[],[ + AC_MSG_ERROR([GSS require that CONFIG_SUNRPC is enabled in your kernel.]) + ]) + LB_LINUX_CONFIG_IM([CRYPTO_DES],[],[ + AC_MSG_WARN([DES support is recommended by using GSS.]) + ]) + LB_LINUX_CONFIG_IM([CRYPTO_MD5],[],[ + AC_MSG_WARN([MD5 support is recommended by using GSS.]) + ]) + LB_LINUX_CONFIG_IM([CRYPTO_SHA256],[],[ + AC_MSG_WARN([SHA256 support is recommended by using GSS.]) + ]) + LB_LINUX_CONFIG_IM([CRYPTO_SHA512],[],[ + AC_MSG_WARN([SHA512 support is recommended by using GSS.]) + ]) + LB_LINUX_CONFIG_IM([CRYPTO_ARC4],[],[ + AC_MSG_WARN([ARC4 support is recommended by using GSS.]) + ]) + # + # AES symbol is uncertain (optimized & depend on arch) + # + + AC_CHECK_LIB(gssapi, gss_init_sec_context, [ + GSSAPI_LIBS="$GSSAPI_LDFLAGS -lgssapi" + ], [ + AC_MSG_ERROR([libgssapi is not found, consider --disable-gss.]) + ], + ) + + AC_SUBST(GSSAPI_LIBS) + AC_KERBEROS_V5 +fi +]) + # LC_FUNC_MS_FLOCK_LOCK # # SLES9 kernel has MS_FLOCK_LOCK sb flag @@ -723,50 +734,6 @@ LB_LINUX_TRY_COMPILE([ ]) # -# LC_FUNC_MS_FLOCK_LOCK -# -# SLES9 kernel has MS_FLOCK_LOCK sb flag -# -AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK], -[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag]) -LB_LINUX_TRY_COMPILE([ - #include -],[ - int flags = MS_FLOCK_LOCK; -],[ - AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1, - [kernel has MS_FLOCK_LOCK flag]) - AC_MSG_RESULT([yes]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LC_FUNC_HAVE_CAN_SLEEP_ARG -# -# SLES9 kernel has third arg can_sleep -# in fs/locks.c: flock_lock_file_wait() -# -AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG], -[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()]) -LB_LINUX_TRY_COMPILE([ - #include -],[ - int cansleep; - struct file *file; - struct file_lock *file_lock; - flock_lock_file_wait(file, file_lock, cansleep); -],[ - AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1, - [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()]) - AC_MSG_RESULT([yes]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# # LC_TASK_PPTR # # task struct has p_pptr instead of parent @@ -849,7 +816,7 @@ LB_LINUX_TRY_COMPILE([ AC_MSG_RESULT(NO) ]) ]) - + # # LC_STATFS_DENTRY_PARAM # starting from 2.6.18 linux kernel uses dentry instead of @@ -872,7 +839,7 @@ LB_LINUX_TRY_COMPILE([ # # LC_VFS_KERN_MOUNT -# starting from 2.6.18 kernel doesn't export do_kern_mount +# starting from 2.6.18 kernel don't export do_kern_mount # and want to use vfs_kern_mount instead. # AC_DEFUN([LC_VFS_KERN_MOUNT], @@ -941,7 +908,7 @@ EXTRA_KCFLAGS="$tmp_flags" ]) # 2.6.19 API changes -# inode doesn't have i_blksize field +# inode don't have i_blksize field AC_DEFUN([LC_INODE_BLKSIZE], [AC_MSG_CHECKING([inode has i_blksize field]) LB_LINUX_TRY_COMPILE([ @@ -1023,7 +990,7 @@ LB_LINUX_TRY_COMPILE([ ]) # LC_NR_PAGECACHE -# 2.6.18 doesn't export nr_pagecahe +# 2.6.18 don't export nr_pagecahe AC_DEFUN([LC_NR_PAGECACHE], [AC_MSG_CHECKING([kernel export nr_pagecache]) LB_LINUX_TRY_COMPILE([ @@ -1039,27 +1006,118 @@ LB_LINUX_TRY_COMPILE([ ]) ]) -# LC_WB_RANGE_START -# 2.6.20 rename struct writeback fields -AC_DEFUN([LC_WB_RANGE_START], -[AC_MSG_CHECKING([kernel has range_start in struct writeback_control]) +# LC_CANCEL_DIRTY_PAGE +# 2.6.20 introduse cancel_dirty_page instead of +# clear_page_dirty. +AC_DEFUN([LC_CANCEL_DIRTY_PAGE], +[AC_MSG_CHECKING([kernel has cancel_dirty_page]) LB_LINUX_TRY_COMPILE([ - #include - #include - #include + #include +],[ + cancel_dirty_page(NULL, 0); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CANCEL_DIRTY_PAGE, 1, + [kernel has cancel_dirty_page instead of clear_page_dirty]) +],[ + AC_MSG_RESULT(NO) +]) +]) + +# +# LC_PAGE_CONSTANT +# +# In order to support raid5 zerocopy patch, we have to patch the kernel to make +# it support constant page, which means the page won't be modified during the +# IO. +# +AC_DEFUN([LC_PAGE_CONSTANT], +[AC_MSG_CHECKING([if kernel have PageConstant defined]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + #ifndef PG_constant + #error "Have no raid5 zcopy patch" + #endif +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PAGE_CONSTANT, 1, [kernel have PageConstant supported]) ],[ - struct writeback_control wb; + AC_MSG_RESULT(no); +]) +]) - wb.range_start = 0; +# RHEL5 in FS-cache patch rename PG_checked flag +# into PG_fs_misc +AC_DEFUN([LC_PG_FS_MISC], +[AC_MSG_CHECKING([kernel has PG_fs_misc]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + #ifndef PG_fs_misc + #error PG_fs_misc not defined in kernel + #endif ],[ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WB_RANGE_START, 1, - [writeback control has range_start field]) + AC_DEFINE(HAVE_PG_FS_MISC, 1, + [is kernel have PG_fs_misc]) ],[ AC_MSG_RESULT(NO) ]) ]) +AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE], +[LB_CHECK_SYMBOL_EXPORT([truncate_complete_page], +[mm/truncate.c],[ +AC_DEFINE(HAVE_TRUNCATE_COMPLETE_PAGE, 1, + [kernel export truncate_complete_page]) +],[ +]) +]) + +AC_DEFUN([LC_EXPORT_D_REHASH_COND], +[LB_CHECK_SYMBOL_EXPORT([d_rehash_cond], +[fs/dcache.c],[ +AC_DEFINE(HAVE_D_REHASH_COND, 1, + [d_rehash_cond is exported by the kernel]) +],[ +]) +]) + +AC_DEFUN([LC_EXPORT___D_REHASH], +[LB_CHECK_SYMBOL_EXPORT([__d_rehash], +[fs/dcache.c],[ +AC_DEFINE(HAVE___D_REHASH, 1, + [__d_rehash is exported by the kernel]) +],[ +]) +]) + +# +# LC_VFS_INTENT_PATCHES +# +# check if the kernel has the VFS intent patches +AC_DEFUN([LC_VFS_INTENT_PATCHES], +[AC_MSG_CHECKING([if the kernel has the VFS intent patches]) +LB_LINUX_TRY_COMPILE([ + #include + #include +],[ + struct nameidata nd; + struct lookup_intent *it; + + it = &nd.intent; + intent_init(it, IT_OPEN); + it->d.lustre.it_disposition = 0; + it->d.lustre.it_data = NULL; +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_INTENT_PATCHES, 1, [VFS intent patches are applied]) +],[ + AC_MSG_RESULT([no]) +]) +]) + # # LC_PROG_LINUX # @@ -1068,7 +1126,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFUN([LC_PROG_LINUX], [ LC_LUSTRE_VERSION_H if test x$enable_server = xyes ; then - LC_CONFIG_BACKINGFS + LC_CONFIG_BACKINGFS fi LC_CONFIG_PINGER LC_CONFIG_LIBLUSTRE_RECOVERY @@ -1076,6 +1134,10 @@ LC_CONFIG_QUOTA LC_CONFIG_HEALTH_CHECK_WRITE LC_TASK_PPTR +# RHEL4 pachess +LC_EXPORT_TRUNCATE_COMPLETE +LC_EXPORT_D_REHASH_COND +LC_EXPORT___D_REHASH LC_STRUCT_KIOBUF LC_FUNC_COND_RESCHED @@ -1097,13 +1159,18 @@ LC_BIT_SPINLOCK_H LC_XATTR_ACL LC_STRUCT_INTENT_FILE LC_POSIX_ACL_XATTR_H -LC_EXPORT___IGET LC_FUNC_SET_FS_PWD +LC_CAPA_CRYPTO +LC_CONFIG_GSS LC_FUNC_MS_FLOCK_LOCK LC_FUNC_HAVE_CAN_SLEEP_ARG LC_FUNC_F_OP_FLOCK LC_QUOTA_READ LC_COOKIE_FOLLOW_LINK +LC_FUNC_RCU + +# does the kernel have VFS intent patches? +LC_VFS_INTENT_PATCHES # 2.6.15 LC_INODE_I_MUTEX @@ -1117,13 +1184,21 @@ LC_STATFS_DENTRY_PARAM LC_VFS_KERN_MOUNT LC_INVALIDATEPAGE_RETURN_INT LC_UMOUNTBEGIN_HAS_VFSMOUNT -LC_WB_RANGE_START + +#2.6.18 + RHEL5 (fc6) +LC_PG_FS_MISC # 2.6.19 LC_INODE_BLKSIZE LC_VFS_READDIR_U64_INO LC_GENERIC_FILE_READ LC_GENERIC_FILE_WRITE + +# 2.6.20 +LC_CANCEL_DIRTY_PAGE + +# raid5-zerocopy patch +LC_PAGE_CONSTANT ]) # @@ -1191,10 +1266,10 @@ LC_CONFIG_LIBLUSTRE_RECOVERY # whether to enable quota support # AC_DEFUN([LC_CONFIG_QUOTA], -[AC_MSG_CHECKING([whether to enable quota support]) +[AC_MSG_CHECKING([whether to disable quota support]) AC_ARG_ENABLE([quota], - AC_HELP_STRING([--enable-quota], - [enable quota support]), + AC_HELP_STRING([--disable-quota], + [disable quota support]), [],[enable_quota='yes']) AC_MSG_RESULT([$enable_quota]) if test x$linux25 != xyes; then @@ -1204,7 +1279,24 @@ if test x$enable_quota != xno; then AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support]) fi ]) - + +# +# LC_CONFIG_SPLIT +# +# whether to enable split support +# +AC_DEFUN([LC_CONFIG_SPLIT], +[AC_MSG_CHECKING([whether to enable split support]) +AC_ARG_ENABLE([split], + AC_HELP_STRING([--enable-split], + [enable split support]), + [],[enable_split='no']) +AC_MSG_RESULT([$enable_split]) +if test x$enable_split != xno; then + AC_DEFINE(HAVE_SPLIT_SUPPORT, 1, [enable split support]) +fi +]) + AC_DEFUN([LC_QUOTA_READ], [AC_MSG_CHECKING([if kernel supports quota_read]) LB_LINUX_TRY_COMPILE([ @@ -1245,6 +1337,37 @@ LB_LINUX_TRY_COMPILE([ ]) # +# LC_FUNC_RCU +# +# kernels prior than 2.6.0(?) have no RCU supported; in kernel 2.6.5(SUSE), +# call_rcu takes three parameters. +# +AC_DEFUN([LC_FUNC_RCU], +[AC_MSG_CHECKING([if kernel have RCU supported]) +LB_LINUX_TRY_COMPILE([ + #include +],[],[ + AC_DEFINE(HAVE_RCU, 1, [have RCU defined]) + AC_MSG_RESULT([yes]) + + AC_MSG_CHECKING([if call_rcu takes three parameters]) + LB_LINUX_TRY_COMPILE([ + #include + ],[ + struct rcu_head rh; + call_rcu(&rh, (void (*)(struct rcu_head *))1, NULL); + ],[ + AC_DEFINE(HAVE_CALL_RCU_PARAM, 1, [call_rcu takes three parameters]) + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + ]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# # LC_CONFIGURE # # other configure checks @@ -1284,6 +1407,16 @@ if test "$enable_mindf" = "yes" ; then AC_DEFINE([MIN_DF], 1, [Report minimum OST free space]) fi +AC_ARG_ENABLE([fail_alloc], + AC_HELP_STRING([--disable-fail-alloc], + [disable randomly alloc failure]), + [],[enable_fail_alloc=yes]) +AC_MSG_CHECKING([whether to randomly failing memory alloc]) +AC_MSG_RESULT([$enable_fail_alloc]) +if test x$enable_fail_alloc != xno ; then + AC_DEFINE([RANDOM_FAIL_ALLOC], 1, [enable randomly alloc failure]) +fi + ]) # @@ -1293,15 +1426,16 @@ fi # AC_DEFUN([LC_CONDITIONALS], [AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes) -AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes) AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno) AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes) AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests) AM_CONDITIONAL(CLIENT, test x$enable_client = xyes) AM_CONDITIONAL(SERVER, test x$enable_server = xyes) AM_CONDITIONAL(QUOTA, test x$enable_quota = xyes) +AM_CONDITIONAL(SPLIT, test x$enable_split = xyes) AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes) AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes) +AM_CONDITIONAL(GSS, test x$enable_gss = xyes) AM_CONDITIONAL(LIBPTHREAD, test x$enable_libpthread = xyes) ]) @@ -1327,16 +1461,9 @@ lustre/kernel_patches/targets/2.6-rhel4.target lustre/kernel_patches/targets/2.6-fc5.target lustre/kernel_patches/targets/2.6-patchless.target lustre/kernel_patches/targets/2.6-sles10.target -lustre/kernel_patches/targets/hp_pnnl-2.4.target -lustre/kernel_patches/targets/rh-2.4.target -lustre/kernel_patches/targets/rhel-2.4.target -lustre/kernel_patches/targets/suse-2.4.21-2.target -lustre/kernel_patches/targets/sles-2.4.target -lustre/ldiskfs/Makefile -lustre/ldiskfs/autoMakefile -lustre/ldiskfs2/Makefile -lustre/ldiskfs2/autoMakefile lustre/ldlm/Makefile +lustre/fid/Makefile +lustre/fid/autoMakefile lustre/liblustre/Makefile lustre/liblustre/tests/Makefile lustre/llite/Makefile @@ -1347,8 +1474,18 @@ lustre/lvfs/Makefile lustre/lvfs/autoMakefile lustre/mdc/Makefile lustre/mdc/autoMakefile +lustre/lmv/Makefile +lustre/lmv/autoMakefile lustre/mds/Makefile lustre/mds/autoMakefile +lustre/mdt/Makefile +lustre/mdt/autoMakefile +lustre/cmm/Makefile +lustre/cmm/autoMakefile +lustre/mdd/Makefile +lustre/mdd/autoMakefile +lustre/fld/Makefile +lustre/fld/autoMakefile lustre/obdclass/Makefile lustre/obdclass/autoMakefile lustre/obdclass/linux/Makefile @@ -1360,18 +1497,23 @@ lustre/osc/Makefile lustre/osc/autoMakefile lustre/ost/Makefile lustre/ost/autoMakefile +lustre/osd/Makefile +lustre/osd/autoMakefile lustre/mgc/Makefile lustre/mgc/autoMakefile lustre/mgs/Makefile lustre/mgs/autoMakefile lustre/ptlrpc/Makefile lustre/ptlrpc/autoMakefile +lustre/ptlrpc/gss/Makefile +lustre/ptlrpc/gss/autoMakefile lustre/quota/Makefile lustre/quota/autoMakefile lustre/scripts/Makefile lustre/scripts/version_tag.pl lustre/tests/Makefile lustre/utils/Makefile +lustre/utils/gss/Makefile ]) case $lb_target_os in darwin) diff --git a/lustre/autoconf/lustre-version.ac b/lustre/autoconf/lustre-version.ac index 5eee07b..13785d4 100644 --- a/lustre/autoconf/lustre-version.ac +++ b/lustre/autoconf/lustre-version.ac @@ -1,7 +1,7 @@ m4_define([LUSTRE_MAJOR],[1]) -m4_define([LUSTRE_MINOR],[6]) +m4_define([LUSTRE_MINOR],[8]) m4_define([LUSTRE_PATCH],[0]) -m4_define([LUSTRE_FIX],[90]) +m4_define([LUSTRE_FIX],[0]) dnl # liblustre delta is 0.0.1.32 , next version with fixes is ok, but dnl # after following release candidate/beta would spill this warning already. diff --git a/lustre/ldiskfs/.cvsignore b/lustre/cmm/.cvsignore similarity index 79% rename from lustre/ldiskfs/.cvsignore rename to lustre/cmm/.cvsignore index 9ad7f07..5d26f00 100644 --- a/lustre/ldiskfs/.cvsignore +++ b/lustre/cmm/.cvsignore @@ -5,15 +5,11 @@ configure Makefile .deps TAGS +.*.cmd autoMakefile.in autoMakefile *.ko *.mod.c -.*.cmd .*.flags .tmp_versions -linux-stage -linux -*.c -*.h -sources +.depend diff --git a/lustre/cmm/Makefile.in b/lustre/cmm/Makefile.in new file mode 100644 index 0000000..befc9ea5 --- /dev/null +++ b/lustre/cmm/Makefile.in @@ -0,0 +1,6 @@ +MODULES := cmm +cmm-objs := cmm_device.o cmm_object.o cmm_lproc.o mdc_device.o mdc_object.o + +@SPLIT_TRUE@cmm-objs += cmm_split.o + +@INCLUDE_RULES@ diff --git a/lustre/cmm/autoMakefile.am b/lustre/cmm/autoMakefile.am new file mode 100644 index 0000000..53fe374 --- /dev/null +++ b/lustre/cmm/autoMakefile.am @@ -0,0 +1,11 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +modulefs_DATA = cmm$(KMODEXT) +endif + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ +DIST_SOURCES = $(cmm-objs:%.o=%.c) cmm_internal.h mdc_internal.h diff --git a/lustre/cmm/cmm_device.c b/lustre/cmm/cmm_device.c new file mode 100644 index 0000000..717271a --- /dev/null +++ b/lustre/cmm/cmm_device.c @@ -0,0 +1,534 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/cmm/cmm_device.c + * Lustre Cluster Metadata Manager (cmm) + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include + +#include +#include +#include +#include +#include "cmm_internal.h" +#include "mdc_internal.h" + +static struct obd_ops cmm_obd_device_ops = { + .o_owner = THIS_MODULE +}; + +static struct lu_device_operations cmm_lu_ops; + +static inline int lu_device_is_cmm(struct lu_device *d) +{ + return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &cmm_lu_ops); +} + +int cmm_root_get(const struct lu_env *env, struct md_device *md, + struct lu_fid *fid) +{ + struct cmm_device *cmm_dev = md2cmm_dev(md); + /* valid only on master MDS */ + if (cmm_dev->cmm_local_num == 0) + return cmm_child_ops(cmm_dev)->mdo_root_get(env, + cmm_dev->cmm_child, fid); + else + return -EINVAL; +} + +static int cmm_statfs(const struct lu_env *env, struct md_device *md, + struct kstatfs *sfs) +{ + struct cmm_device *cmm_dev = md2cmm_dev(md); + int rc; + + ENTRY; + rc = cmm_child_ops(cmm_dev)->mdo_statfs(env, + cmm_dev->cmm_child, sfs); + RETURN (rc); +} + +static int cmm_maxsize_get(const struct lu_env *env, struct md_device *md, + int *md_size, int *cookie_size) +{ + struct cmm_device *cmm_dev = md2cmm_dev(md); + int rc; + ENTRY; + rc = cmm_child_ops(cmm_dev)->mdo_maxsize_get(env, cmm_dev->cmm_child, + md_size, cookie_size); + RETURN(rc); +} + +static int cmm_init_capa_ctxt(const struct lu_env *env, struct md_device *md, + int mode , unsigned long timeout, __u32 alg, + struct lustre_capa_key *keys) +{ + struct cmm_device *cmm_dev = md2cmm_dev(md); + int rc; + ENTRY; + LASSERT(cmm_child_ops(cmm_dev)->mdo_init_capa_ctxt); + rc = cmm_child_ops(cmm_dev)->mdo_init_capa_ctxt(env, cmm_dev->cmm_child, + mode, timeout, alg, + keys); + RETURN(rc); +} + +static int cmm_update_capa_key(const struct lu_env *env, + struct md_device *md, + struct lustre_capa_key *key) +{ + struct cmm_device *cmm_dev = md2cmm_dev(md); + int rc; + ENTRY; + rc = cmm_child_ops(cmm_dev)->mdo_update_capa_key(env, + cmm_dev->cmm_child, + key); + RETURN(rc); +} + +static struct md_device_operations cmm_md_ops = { + .mdo_statfs = cmm_statfs, + .mdo_root_get = cmm_root_get, + .mdo_maxsize_get = cmm_maxsize_get, + .mdo_init_capa_ctxt = cmm_init_capa_ctxt, + .mdo_update_capa_key = cmm_update_capa_key, +}; + +extern struct lu_device_type mdc_device_type; + +static int cmm_post_init_mdc(const struct lu_env *env, + struct cmm_device *cmm) +{ + int max_mdsize, max_cookiesize, rc; + struct mdc_device *mc, *tmp; + + /* get the max mdsize and cookiesize from lower layer */ + rc = cmm_maxsize_get(env, &cmm->cmm_md_dev, &max_mdsize, + &max_cookiesize); + if (rc) + RETURN(rc); + + spin_lock(&cmm->cmm_tgt_guard); + list_for_each_entry_safe(mc, tmp, &cmm->cmm_targets, + mc_linkage) { + mdc_init_ea_size(env, mc, max_mdsize, max_cookiesize); + } + spin_unlock(&cmm->cmm_tgt_guard); + RETURN(rc); +} + +/* --- cmm_lu_operations --- */ +/* add new MDC to the CMM, create MDC lu_device and connect it to mdc_obd */ +static int cmm_add_mdc(const struct lu_env *env, + struct cmm_device *cm, struct lustre_cfg *cfg) +{ + struct lu_device_type *ldt = &mdc_device_type; + char *p, *num = lustre_cfg_string(cfg, 2); + struct mdc_device *mc, *tmp; + struct lu_fld_target target; + struct lu_device *ld; + mdsno_t mdc_num; + int rc; + ENTRY; + + /* find out that there is no such mdc */ + LASSERT(num); + mdc_num = simple_strtol(num, &p, 10); + if (*p) { + CERROR("Invalid index in lustre_cgf, offset 2\n"); + RETURN(-EINVAL); + } + + spin_lock(&cm->cmm_tgt_guard); + list_for_each_entry_safe(mc, tmp, &cm->cmm_targets, + mc_linkage) { + if (mc->mc_num == mdc_num) { + spin_unlock(&cm->cmm_tgt_guard); + RETURN(-EEXIST); + } + } + spin_unlock(&cm->cmm_tgt_guard); + ld = ldt->ldt_ops->ldto_device_alloc(env, ldt, cfg); + ld->ld_site = cmm2lu_dev(cm)->ld_site; + + rc = ldt->ldt_ops->ldto_device_init(env, ld, NULL, NULL); + if (rc) { + ldt->ldt_ops->ldto_device_free(env, ld); + RETURN (rc); + } + /* pass config to the just created MDC */ + rc = ld->ld_ops->ldo_process_config(env, ld, cfg); + if (rc) + RETURN(rc); + + spin_lock(&cm->cmm_tgt_guard); + list_for_each_entry_safe(mc, tmp, &cm->cmm_targets, + mc_linkage) { + if (mc->mc_num == mdc_num) { + spin_unlock(&cm->cmm_tgt_guard); + ldt->ldt_ops->ldto_device_fini(env, ld); + ldt->ldt_ops->ldto_device_free(env, ld); + RETURN(-EEXIST); + } + } + mc = lu2mdc_dev(ld); + list_add_tail(&mc->mc_linkage, &cm->cmm_targets); + cm->cmm_tgt_count++; + spin_unlock(&cm->cmm_tgt_guard); + + lu_device_get(cmm2lu_dev(cm)); + + target.ft_srv = NULL; + target.ft_idx = mc->mc_num; + target.ft_exp = mc->mc_desc.cl_exp; + fld_client_add_target(cm->cmm_fld, &target); + + /* Set max md size for the mdc. */ + rc = cmm_post_init_mdc(env, cm); + RETURN(rc); +} + +static void cmm_device_shutdown(const struct lu_env *env, + struct cmm_device *cm, + struct lustre_cfg *cfg) +{ + struct mdc_device *mc, *tmp; + ENTRY; + + /* Remove local target from FLD. */ + fld_client_del_target(cm->cmm_fld, cm->cmm_local_num); + + /* Finish all mdc devices. */ + spin_lock(&cm->cmm_tgt_guard); + list_for_each_entry_safe(mc, tmp, &cm->cmm_targets, mc_linkage) { + struct lu_device *ld_m = mdc2lu_dev(mc); + fld_client_del_target(cm->cmm_fld, mc->mc_num); + ld_m->ld_ops->ldo_process_config(env, ld_m, cfg); + } + spin_unlock(&cm->cmm_tgt_guard); + + /* remove upcall device*/ + md_upcall_fini(&cm->cmm_md_dev); + + EXIT; +} + +static int cmm_device_mount(const struct lu_env *env, + struct cmm_device *m, struct lustre_cfg *cfg) +{ + const char *index = lustre_cfg_string(cfg, 2); + char *p; + + LASSERT(index != NULL); + + m->cmm_local_num = simple_strtol(index, &p, 10); + if (*p) { + CERROR("Invalid index in lustre_cgf\n"); + RETURN(-EINVAL); + } + + RETURN(0); +} + +static int cmm_process_config(const struct lu_env *env, + struct lu_device *d, struct lustre_cfg *cfg) +{ + struct cmm_device *m = lu2cmm_dev(d); + struct lu_device *next = md2lu_dev(m->cmm_child); + int err; + ENTRY; + + switch(cfg->lcfg_command) { + case LCFG_ADD_MDC: + /* On first ADD_MDC add also local target. */ + if (!(m->cmm_flags & CMM_INITIALIZED)) { + struct lu_site *ls = cmm2lu_dev(m)->ld_site; + struct lu_fld_target target; + + target.ft_srv = ls->ls_server_fld; + target.ft_idx = m->cmm_local_num; + target.ft_exp = NULL; + + fld_client_add_target(m->cmm_fld, &target); + } + err = cmm_add_mdc(env, m, cfg); + + /* The first ADD_MDC can be counted as setup is finished. */ + if (!(m->cmm_flags & CMM_INITIALIZED)) + m->cmm_flags |= CMM_INITIALIZED; + + break; + case LCFG_SETUP: + { + /* lower layers should be set up at first */ + err = next->ld_ops->ldo_process_config(env, next, cfg); + if (err == 0) + err = cmm_device_mount(env, m, cfg); + break; + } + case LCFG_CLEANUP: + { + cmm_device_shutdown(env, m, cfg); + } + default: + err = next->ld_ops->ldo_process_config(env, next, cfg); + } + RETURN(err); +} + +static int cmm_recovery_complete(const struct lu_env *env, + struct lu_device *d) +{ + struct cmm_device *m = lu2cmm_dev(d); + struct lu_device *next = md2lu_dev(m->cmm_child); + int rc; + ENTRY; + rc = next->ld_ops->ldo_recovery_complete(env, next); + RETURN(rc); +} + +static struct lu_device_operations cmm_lu_ops = { + .ldo_object_alloc = cmm_object_alloc, + .ldo_process_config = cmm_process_config, + .ldo_recovery_complete = cmm_recovery_complete +}; + +/* --- lu_device_type operations --- */ +int cmm_upcall(const struct lu_env *env, struct md_device *md, + enum md_upcall_event ev) +{ + int rc; + ENTRY; + + switch (ev) { + case MD_LOV_SYNC: + rc = cmm_post_init_mdc(env, md2cmm_dev(md)); + if (rc) + CERROR("can not init md size %d\n", rc); + /* fall through */ + default: + rc = md_do_upcall(env, md, ev); + } + RETURN(rc); +} + +static struct lu_device *cmm_device_alloc(const struct lu_env *env, + struct lu_device_type *t, + struct lustre_cfg *cfg) +{ + struct lu_device *l; + struct cmm_device *m; + ENTRY; + + OBD_ALLOC_PTR(m); + if (m == NULL) { + l = ERR_PTR(-ENOMEM); + } else { + md_device_init(&m->cmm_md_dev, t); + m->cmm_md_dev.md_ops = &cmm_md_ops; + md_upcall_init(&m->cmm_md_dev, cmm_upcall); + l = cmm2lu_dev(m); + l->ld_ops = &cmm_lu_ops; + + OBD_ALLOC_PTR(m->cmm_fld); + if (!m->cmm_fld) + GOTO(out_free_cmm, l = ERR_PTR(-ENOMEM)); + } + + RETURN(l); +out_free_cmm: + OBD_FREE_PTR(m); + return l; +} + +static void cmm_device_free(const struct lu_env *env, struct lu_device *d) +{ + struct cmm_device *m = lu2cmm_dev(d); + + LASSERT(m->cmm_tgt_count == 0); + LASSERT(list_empty(&m->cmm_targets)); + if (m->cmm_fld != NULL) { + OBD_FREE_PTR(m->cmm_fld); + m->cmm_fld = NULL; + } + md_device_fini(&m->cmm_md_dev); + OBD_FREE_PTR(m); +} + +/* context key constructor/destructor */ +static void *cmm_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct cmm_thread_info *info; + + CLASSERT(CFS_PAGE_SIZE >= sizeof *info); + OBD_ALLOC_PTR(info); + if (info == NULL) + info = ERR_PTR(-ENOMEM); + return info; +} + +static void cmm_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct cmm_thread_info *info = data; + OBD_FREE_PTR(info); +} + +static struct lu_context_key cmm_thread_key = { + .lct_tags = LCT_MD_THREAD, + .lct_init = cmm_key_init, + .lct_fini = cmm_key_fini +}; + +struct cmm_thread_info *cmm_env_info(const struct lu_env *env) +{ + struct cmm_thread_info *info; + + info = lu_context_key_get(&env->le_ctx, &cmm_thread_key); + LASSERT(info != NULL); + return info; +} + +static int cmm_type_init(struct lu_device_type *t) +{ + LU_CONTEXT_KEY_INIT(&cmm_thread_key); + return lu_context_key_register(&cmm_thread_key); +} + +static void cmm_type_fini(struct lu_device_type *t) +{ + lu_context_key_degister(&cmm_thread_key); +} + +static int cmm_device_init(const struct lu_env *env, struct lu_device *d, + const char *name, struct lu_device *next) +{ + struct cmm_device *m = lu2cmm_dev(d); + struct lu_site *ls; + int err = 0; + ENTRY; + + spin_lock_init(&m->cmm_tgt_guard); + INIT_LIST_HEAD(&m->cmm_targets); + m->cmm_tgt_count = 0; + m->cmm_child = lu2md_dev(next); + + err = fld_client_init(m->cmm_fld, name, + LUSTRE_CLI_FLD_HASH_DHT); + if (err) { + CERROR("Can't init FLD, err %d\n", err); + RETURN(err); + } + + /* Assign site's fld client ref, needed for asserts in osd. */ + ls = cmm2lu_dev(m)->ld_site; + ls->ls_client_fld = m->cmm_fld; + err = cmm_procfs_init(m, name); + + RETURN(err); +} + +static struct lu_device *cmm_device_fini(const struct lu_env *env, + struct lu_device *ld) +{ + struct cmm_device *cm = lu2cmm_dev(ld); + struct mdc_device *mc, *tmp; + struct lu_site *ls; + ENTRY; + + /* Finish all mdc devices */ + spin_lock(&cm->cmm_tgt_guard); + list_for_each_entry_safe(mc, tmp, &cm->cmm_targets, mc_linkage) { + struct lu_device *ld_m = mdc2lu_dev(mc); + + list_del_init(&mc->mc_linkage); + lu_device_put(cmm2lu_dev(cm)); + ld_m->ld_type->ldt_ops->ldto_device_fini(env, ld_m); + ld_m->ld_type->ldt_ops->ldto_device_free(env, ld_m); + cm->cmm_tgt_count--; + } + spin_unlock(&cm->cmm_tgt_guard); + + fld_client_fini(cm->cmm_fld); + ls = cmm2lu_dev(cm)->ld_site; + ls->ls_client_fld = NULL; + cmm_procfs_fini(cm); + + RETURN (md2lu_dev(cm->cmm_child)); +} + +static struct lu_device_type_operations cmm_device_type_ops = { + .ldto_init = cmm_type_init, + .ldto_fini = cmm_type_fini, + + .ldto_device_alloc = cmm_device_alloc, + .ldto_device_free = cmm_device_free, + + .ldto_device_init = cmm_device_init, + .ldto_device_fini = cmm_device_fini +}; + +static struct lu_device_type cmm_device_type = { + .ldt_tags = LU_DEVICE_MD, + .ldt_name = LUSTRE_CMM_NAME, + .ldt_ops = &cmm_device_type_ops, + .ldt_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD +}; + +struct lprocfs_vars lprocfs_cmm_obd_vars[] = { + { 0 } +}; + +struct lprocfs_vars lprocfs_cmm_module_vars[] = { + { 0 } +}; + +LPROCFS_INIT_VARS(cmm, lprocfs_cmm_module_vars, lprocfs_cmm_obd_vars); + +static int __init cmm_mod_init(void) +{ + struct lprocfs_static_vars lvars; + + lprocfs_init_vars(cmm, &lvars); + return class_register_type(&cmm_obd_device_ops, NULL, lvars.module_vars, + LUSTRE_CMM_NAME, &cmm_device_type); +} + +static void __exit cmm_mod_exit(void) +{ + class_unregister_type(LUSTRE_CMM_NAME); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre Clustered Metadata Manager ("LUSTRE_CMM_NAME")"); +MODULE_LICENSE("GPL"); + +cfs_module(cmm, "0.1.0", cmm_mod_init, cmm_mod_exit); diff --git a/lustre/cmm/cmm_internal.h b/lustre/cmm/cmm_internal.h new file mode 100644 index 0000000..ab07974 --- /dev/null +++ b/lustre/cmm/cmm_internal.h @@ -0,0 +1,235 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/cmm/cmm_internal.h + * Lustre Cluster Metadata Manager (cmm) + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef _CMM_INTERNAL_H +#define _CMM_INTERNAL_H + +#if defined(__KERNEL__) + +#include +#include +#include +#include + + +struct cmm_device { + struct md_device cmm_md_dev; + /* device flags, taken from enum cmm_flags */ + __u32 cmm_flags; + /* underlaying device in MDS stack, usually MDD */ + struct md_device *cmm_child; + /* FLD client to talk to FLD */ + struct lu_client_fld *cmm_fld; + /* other MD servers in cluster */ + mdsno_t cmm_local_num; + __u32 cmm_tgt_count; + struct list_head cmm_targets; + spinlock_t cmm_tgt_guard; + cfs_proc_dir_entry_t *cmm_proc_entry; + struct lprocfs_stats *cmm_stats; +}; + +enum cmm_flags { + /* + * Device initialization complete. + */ + CMM_INITIALIZED = 1 << 0 +}; + +static inline struct md_device_operations *cmm_child_ops(struct cmm_device *d) +{ + return (d->cmm_child->md_ops); +} + +static inline struct cmm_device *md2cmm_dev(struct md_device *m) +{ + return container_of0(m, struct cmm_device, cmm_md_dev); +} + +static inline struct cmm_device *lu2cmm_dev(struct lu_device *d) +{ + return container_of0(d, struct cmm_device, cmm_md_dev.md_lu_dev); +} + +static inline struct lu_device *cmm2lu_dev(struct cmm_device *d) +{ + return (&d->cmm_md_dev.md_lu_dev); +} + +#ifdef HAVE_SPLIT_SUPPORT +enum cmm_split_state { + CMM_SPLIT_UNKNOWN, + CMM_SPLIT_NONE, + CMM_SPLIT_NEEDED, + CMM_SPLIT_DONE, + CMM_SPLIT_DENIED +}; +#endif + +struct cmm_object { + struct md_object cmo_obj; +}; + +/* local CMM object */ +struct cml_object { + struct cmm_object cmm_obj; +#ifdef HAVE_SPLIT_SUPPORT + /* split state of object (for dirs only)*/ + enum cmm_split_state clo_split; +#endif +}; + +/* remote CMM object */ +struct cmr_object { + struct cmm_object cmm_obj; + /* mds number where object is placed */ + mdsno_t cmo_num; +}; + +enum { + CMM_SPLIT_PAGE_COUNT = 1 +}; + +struct cmm_thread_info { + struct md_attr cmi_ma; + struct lu_buf cmi_buf; + struct lu_fid cmi_fid; /* used for le/cpu conversions */ + struct lu_rdpg cmi_rdpg; + /* pointers to pages for readpage. */ + struct page *cmi_pages[CMM_SPLIT_PAGE_COUNT]; + struct md_op_spec cmi_spec; + struct lmv_stripe_md cmi_lmv; + char cmi_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE]; + + /* Ops object filename */ + struct lu_name cti_name; +}; + +static inline struct cmm_device *cmm_obj2dev(struct cmm_object *c) +{ + return (md2cmm_dev(md_obj2dev(&c->cmo_obj))); +} + +static inline struct cmm_object *lu2cmm_obj(struct lu_object *o) +{ + //LASSERT(lu_device_is_cmm(o->lo_dev)); + return container_of0(o, struct cmm_object, cmo_obj.mo_lu); +} + +/* get cmm object from md_object */ +static inline struct cmm_object *md2cmm_obj(struct md_object *o) +{ + return container_of0(o, struct cmm_object, cmo_obj); +} +/* get lower-layer object */ +static inline struct md_object *cmm2child_obj(struct cmm_object *o) +{ + return (o ? lu2md(lu_object_next(&o->cmo_obj.mo_lu)) : NULL); +} + +static inline struct lu_fid* cmm2fid(struct cmm_object *obj) +{ + return &(obj->cmo_obj.mo_lu.lo_header->loh_fid); +} + +struct cmm_thread_info *cmm_env_info(const struct lu_env *env); + +/* cmm_object.c */ +struct lu_object *cmm_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *); + +/* + * local CMM object operations. cml_... + */ +static inline struct cml_object *lu2cml_obj(struct lu_object *o) +{ + return container_of0(o, struct cml_object, cmm_obj.cmo_obj.mo_lu); +} +static inline struct cml_object *md2cml_obj(struct md_object *mo) +{ + return container_of0(mo, struct cml_object, cmm_obj.cmo_obj); +} +static inline struct cml_object *cmm2cml_obj(struct cmm_object *co) +{ + return container_of0(co, struct cml_object, cmm_obj); +} + +int cmm_upcall(const struct lu_env *env, struct md_device *md, + enum md_upcall_event ev); + +#ifdef HAVE_SPLIT_SUPPORT + +#define CMM_MD_SIZE(stripes) (sizeof(struct lmv_stripe_md) + \ + (stripes) * sizeof(struct lu_fid)) + +/* cmm_split.c */ +static inline struct lu_buf *cmm_buf_get(const struct lu_env *env, + void *area, ssize_t len) +{ + struct lu_buf *buf; + + buf = &cmm_env_info(env)->cmi_buf; + buf->lb_buf = area; + buf->lb_len = len; + return buf; +} + +int cmm_split_check(const struct lu_env *env, struct md_object *mp, + const char *name); + +int cmm_split_expect(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma, int *split); + +int cmm_split_dir(const struct lu_env *env, struct md_object *mo); + +int cmm_split_access(const struct lu_env *env, struct md_object *mo, + mdl_mode_t lm); +#endif + +int cmm_fld_lookup(struct cmm_device *cm, const struct lu_fid *fid, + mdsno_t *mds, const struct lu_env *env); + +int cmm_procfs_init(struct cmm_device *cmm, const char *name); +int cmm_procfs_fini(struct cmm_device *cmm); + +void cmm_lprocfs_time_start(const struct lu_env *env); +void cmm_lprocfs_time_end(const struct lu_env *env, struct cmm_device *cmm, + int idx); + +enum { + LPROC_CMM_SPLIT_CHECK, + LPROC_CMM_SPLIT, + LPROC_CMM_LOOKUP, + LPROC_CMM_CREATE, + LPROC_CMM_NR +}; + +#endif /* __KERNEL__ */ +#endif /* _CMM_INTERNAL_H */ + diff --git a/lustre/cmm/cmm_lproc.c b/lustre/cmm/cmm_lproc.c new file mode 100644 index 0000000..806cac2 --- /dev/null +++ b/lustre/cmm/cmm_lproc.c @@ -0,0 +1,108 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * cmm/cmm_lproc.c + * CMM lprocfs stuff + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Wang Di + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cmm_internal.h" + +static const char *cmm_counter_names[LPROC_CMM_NR] = { + [LPROC_CMM_SPLIT_CHECK] = "split_check", + [LPROC_CMM_SPLIT] = "split", + [LPROC_CMM_LOOKUP] = "lookup", + [LPROC_CMM_CREATE] = "create" +}; + +int cmm_procfs_init(struct cmm_device *cmm, const char *name) +{ + struct lu_device *ld = &cmm->cmm_md_dev.md_lu_dev; + struct obd_type *type; + int rc; + ENTRY; + + type = ld->ld_type->ldt_obd_type; + + LASSERT(name != NULL); + LASSERT(type != NULL); + + /* Find the type procroot and add the proc entry for this device. */ + cmm->cmm_proc_entry = lprocfs_register(name, type->typ_procroot, + NULL, NULL); + if (IS_ERR(cmm->cmm_proc_entry)) { + rc = PTR_ERR(cmm->cmm_proc_entry); + CERROR("Error %d setting up lprocfs for %s\n", + rc, name); + cmm->cmm_proc_entry = NULL; + GOTO(out, rc); + } + + rc = lu_time_init(&cmm->cmm_stats, + cmm->cmm_proc_entry, + cmm_counter_names, ARRAY_SIZE(cmm_counter_names)); + + EXIT; +out: + if (rc) + cmm_procfs_fini(cmm); + return rc; +} + +int cmm_procfs_fini(struct cmm_device *cmm) +{ + if (cmm->cmm_stats) + lu_time_fini(&cmm->cmm_stats); + + if (cmm->cmm_proc_entry) { + lprocfs_remove(&cmm->cmm_proc_entry); + cmm->cmm_proc_entry = NULL; + } + RETURN(0); +} + +void cmm_lprocfs_time_start(const struct lu_env *env) +{ + lu_lprocfs_time_start(env); +} + +void cmm_lprocfs_time_end(const struct lu_env *env, struct cmm_device *cmm, + int idx) +{ + lu_lprocfs_time_end(env, cmm->cmm_stats, idx); +} diff --git a/lustre/cmm/cmm_object.c b/lustre/cmm/cmm_object.c new file mode 100644 index 0000000..88cf0fd --- /dev/null +++ b/lustre/cmm/cmm_object.c @@ -0,0 +1,1263 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/cmm/cmm_object.c + * Lustre Cluster Metadata Manager (cmm) + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include "cmm_internal.h" +#include "mdc_internal.h" + +int cmm_fld_lookup(struct cmm_device *cm, const struct lu_fid *fid, + mdsno_t *mds, const struct lu_env *env) +{ + int rc = 0; + ENTRY; + + LASSERT(fid_is_sane(fid)); + + rc = fld_client_lookup(cm->cmm_fld, fid_seq(fid), mds, env); + if (rc) { + CERROR("Can't find mds by seq "LPX64", rc %d\n", + fid_seq(fid), rc); + RETURN(rc); + } + + if (*mds > cm->cmm_tgt_count) { + CERROR("Got invalid mdsno: "LPU64" (max: %u)\n", + *mds, cm->cmm_tgt_count); + rc = -EINVAL; + } else { + CDEBUG(D_INFO, "CMM: got MDS "LPU64" for sequence: " + LPU64"\n", *mds, fid_seq(fid)); + } + + RETURN (rc); +} + +static struct md_object_operations cml_mo_ops; +static struct md_dir_operations cml_dir_ops; +static struct lu_object_operations cml_obj_ops; + +static struct md_object_operations cmr_mo_ops; +static struct md_dir_operations cmr_dir_ops; +static struct lu_object_operations cmr_obj_ops; + +struct lu_object *cmm_object_alloc(const struct lu_env *env, + const struct lu_object_header *loh, + struct lu_device *ld) +{ + const struct lu_fid *fid = &loh->loh_fid; + struct lu_object *lo = NULL; + struct cmm_device *cd; + mdsno_t mds; + int rc = 0; + + ENTRY; + + cd = lu2cmm_dev(ld); + if (cd->cmm_flags & CMM_INITIALIZED) { + /* get object location */ + rc = cmm_fld_lookup(lu2cmm_dev(ld), fid, &mds, env); + if (rc) + RETURN(NULL); + } else + /* + * Device is not yet initialized, cmm_object is being created + * as part of early bootstrap procedure (it is /ROOT, or /fld, + * etc.). Such object *has* to be local. + */ + mds = cd->cmm_local_num; + + /* select the proper set of operations based on object location */ + if (mds == cd->cmm_local_num) { + struct cml_object *clo; + + OBD_ALLOC_PTR(clo); + if (clo != NULL) { + lo = &clo->cmm_obj.cmo_obj.mo_lu; + lu_object_init(lo, NULL, ld); + clo->cmm_obj.cmo_obj.mo_ops = &cml_mo_ops; + clo->cmm_obj.cmo_obj.mo_dir_ops = &cml_dir_ops; + lo->lo_ops = &cml_obj_ops; + } + } else { + struct cmr_object *cro; + + OBD_ALLOC_PTR(cro); + if (cro != NULL) { + lo = &cro->cmm_obj.cmo_obj.mo_lu; + lu_object_init(lo, NULL, ld); + cro->cmm_obj.cmo_obj.mo_ops = &cmr_mo_ops; + cro->cmm_obj.cmo_obj.mo_dir_ops = &cmr_dir_ops; + lo->lo_ops = &cmr_obj_ops; + cro->cmo_num = mds; + } + } + RETURN(lo); +} + +/* + * CMM has two types of objects - local and remote. They have different set + * of operations so we are avoiding multiple checks in code. + */ + +/* get local child device */ +static struct lu_device *cml_child_dev(struct cmm_device *d) +{ + return &d->cmm_child->md_lu_dev; +} + +/* lu_object operations */ +static void cml_object_free(const struct lu_env *env, + struct lu_object *lo) +{ + struct cml_object *clo = lu2cml_obj(lo); + lu_object_fini(lo); + OBD_FREE_PTR(clo); +} + +static int cml_object_init(const struct lu_env *env, struct lu_object *lo) +{ + struct cmm_device *cd = lu2cmm_dev(lo->lo_dev); + struct lu_device *c_dev; + struct lu_object *c_obj; + int rc; + + ENTRY; + +#ifdef HAVE_SPLIT_SUPPORT + if (cd->cmm_tgt_count == 0) + lu2cml_obj(lo)->clo_split = CMM_SPLIT_DENIED; + else + lu2cml_obj(lo)->clo_split = CMM_SPLIT_UNKNOWN; +#endif + c_dev = cml_child_dev(cd); + if (c_dev == NULL) { + rc = -ENOENT; + } else { + c_obj = c_dev->ld_ops->ldo_object_alloc(env, + lo->lo_header, c_dev); + if (c_obj != NULL) { + lu_object_add(lo, c_obj); + rc = 0; + } else { + rc = -ENOMEM; + } + } + + RETURN(rc); +} + +static int cml_object_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *lo) +{ + return (*p)(env, cookie, LUSTRE_CMM_NAME"-local@%p", lo); +} + +static struct lu_object_operations cml_obj_ops = { + .loo_object_init = cml_object_init, + .loo_object_free = cml_object_free, + .loo_object_print = cml_object_print +}; + +/* CMM local md_object operations */ +static int cml_object_create(const struct lu_env *env, + struct md_object *mo, + const struct md_op_spec *spec, + struct md_attr *attr) +{ + int rc; + ENTRY; + rc = mo_object_create(env, md_object_next(mo), spec, attr); + RETURN(rc); +} + +static int cml_permission(const struct lu_env *env, + struct md_object *p, struct md_object *c, + struct md_attr *attr, int mask) +{ + int rc; + ENTRY; + rc = mo_permission(env, md_object_next(p), md_object_next(c), + attr, mask); + RETURN(rc); +} + +static int cml_attr_get(const struct lu_env *env, struct md_object *mo, + struct md_attr *attr) +{ + int rc; + ENTRY; + rc = mo_attr_get(env, md_object_next(mo), attr); + RETURN(rc); +} + +static int cml_attr_set(const struct lu_env *env, struct md_object *mo, + const struct md_attr *attr) +{ + int rc; + ENTRY; + rc = mo_attr_set(env, md_object_next(mo), attr); + RETURN(rc); +} + +static int cml_xattr_get(const struct lu_env *env, struct md_object *mo, + struct lu_buf *buf, const char *name) +{ + int rc; + ENTRY; + rc = mo_xattr_get(env, md_object_next(mo), buf, name); + RETURN(rc); +} + +static int cml_readlink(const struct lu_env *env, struct md_object *mo, + struct lu_buf *buf) +{ + int rc; + ENTRY; + rc = mo_readlink(env, md_object_next(mo), buf); + RETURN(rc); +} + +static int cml_xattr_list(const struct lu_env *env, struct md_object *mo, + struct lu_buf *buf) +{ + int rc; + ENTRY; + rc = mo_xattr_list(env, md_object_next(mo), buf); + RETURN(rc); +} + +static int cml_xattr_set(const struct lu_env *env, struct md_object *mo, + const struct lu_buf *buf, + const char *name, int fl) +{ + int rc; + ENTRY; + rc = mo_xattr_set(env, md_object_next(mo), buf, name, fl); + RETURN(rc); +} + +static int cml_xattr_del(const struct lu_env *env, struct md_object *mo, + const char *name) +{ + int rc; + ENTRY; + rc = mo_xattr_del(env, md_object_next(mo), name); + RETURN(rc); +} + +static int cml_ref_add(const struct lu_env *env, struct md_object *mo, + const struct md_attr *ma) +{ + int rc; + ENTRY; + rc = mo_ref_add(env, md_object_next(mo), ma); + RETURN(rc); +} + +static int cml_ref_del(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma) +{ + int rc; + ENTRY; + rc = mo_ref_del(env, md_object_next(mo), ma); + RETURN(rc); +} + +static int cml_open(const struct lu_env *env, struct md_object *mo, + int flags) +{ + int rc; + ENTRY; + rc = mo_open(env, md_object_next(mo), flags); + RETURN(rc); +} + +static int cml_close(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma) +{ + int rc; + ENTRY; + rc = mo_close(env, md_object_next(mo), ma); + RETURN(rc); +} + +static int cml_readpage(const struct lu_env *env, struct md_object *mo, + const struct lu_rdpg *rdpg) +{ + int rc; + ENTRY; + rc = mo_readpage(env, md_object_next(mo), rdpg); + RETURN(rc); +} + +static int cml_capa_get(const struct lu_env *env, struct md_object *mo, + struct lustre_capa *capa, int renewal) +{ + int rc; + ENTRY; + rc = mo_capa_get(env, md_object_next(mo), capa, renewal); + RETURN(rc); +} + +static struct md_object_operations cml_mo_ops = { + .moo_permission = cml_permission, + .moo_attr_get = cml_attr_get, + .moo_attr_set = cml_attr_set, + .moo_xattr_get = cml_xattr_get, + .moo_xattr_list = cml_xattr_list, + .moo_xattr_set = cml_xattr_set, + .moo_xattr_del = cml_xattr_del, + .moo_object_create = cml_object_create, + .moo_ref_add = cml_ref_add, + .moo_ref_del = cml_ref_del, + .moo_open = cml_open, + .moo_close = cml_close, + .moo_readpage = cml_readpage, + .moo_readlink = cml_readlink, + .moo_capa_get = cml_capa_get +}; + +/* md_dir operations */ +static int cml_lookup(const struct lu_env *env, struct md_object *mo_p, + const struct lu_name *lname, struct lu_fid *lf, + struct md_op_spec *spec) +{ + int rc; + ENTRY; + +#ifdef HAVE_SPLIT_SUPPORT + if (spec != NULL && spec->sp_ck_split) { + rc = cmm_split_check(env, mo_p, lname->ln_name); + if (rc) + RETURN(rc); + } +#endif + rc = mdo_lookup(env, md_object_next(mo_p), lname, lf, spec); + RETURN(rc); + +} + +static mdl_mode_t cml_lock_mode(const struct lu_env *env, + struct md_object *mo, mdl_mode_t lm) +{ + int rc = MDL_MINMODE; + ENTRY; + +#ifdef HAVE_SPLIT_SUPPORT + rc = cmm_split_access(env, mo, lm); +#endif + + RETURN(rc); +} + +static int cml_create(const struct lu_env *env, struct md_object *mo_p, + const struct lu_name *lname, struct md_object *mo_c, + struct md_op_spec *spec, struct md_attr *ma) +{ + int rc; + ENTRY; + +#ifdef HAVE_SPLIT_SUPPORT + /* Lock mode always should be sane. */ + LASSERT(spec->sp_cr_mode != MDL_MINMODE); + + /* + * Sigh... This is long story. MDT may have race with detecting if split + * is possible in cmm. We know this race and let it live, because + * getting it rid (with some sem or spinlock) will also mean that + * PDIROPS for create will not work because we kill parallel work, what + * is really bad for performance and makes no sense having PDIROPS. So, + * we better allow the race to live, but split dir only if some of + * concurrent threads takes EX lock, not matter which one. So that, say, + * two concurrent threads may have different lock modes on directory (CW + * and EX) and not first one which comes here and see that split is + * possible should split the dir, but only that one which has EX + * lock. And we do not care that in this case, split may happen a bit + * later (when dir size will not be necessarily 64K, but may be a bit + * larger). So that, we allow concurrent creates and protect split by EX + * lock. + */ + if (spec->sp_cr_mode == MDL_EX) { + /* + * Try to split @mo_p. If split is ok, -ERESTART is returned and + * current thread will not peoceed with create. Instead it sends + * -ERESTART to client to let it know that correct MDT should be + * choosen. + */ + rc = cmm_split_dir(env, mo_p); + if (rc) + /* + * -ERESTART or some split error is returned, we can't + * proceed with create. + */ + GOTO(out, rc); + } + + if (spec != NULL && spec->sp_ck_split) { + /* + * Check for possible split directory and let caller know that + * it should tell client that directory is split and operation + * should repeat to correct MDT. + */ + rc = cmm_split_check(env, mo_p, lname->ln_name); + if (rc) + GOTO(out, rc); + } +#endif + + rc = mdo_create(env, md_object_next(mo_p), lname, md_object_next(mo_c), + spec, ma); + + EXIT; +out: + return rc; +} + +static int cml_create_data(const struct lu_env *env, struct md_object *p, + struct md_object *o, + const struct md_op_spec *spec, + struct md_attr *ma) +{ + int rc; + ENTRY; + rc = mdo_create_data(env, md_object_next(p), md_object_next(o), + spec, ma); + RETURN(rc); +} + +static int cml_link(const struct lu_env *env, struct md_object *mo_p, + struct md_object *mo_s, const struct lu_name *lname, + struct md_attr *ma) +{ + int rc; + ENTRY; + rc = mdo_link(env, md_object_next(mo_p), md_object_next(mo_s), + lname, ma); + RETURN(rc); +} + +static int cml_unlink(const struct lu_env *env, struct md_object *mo_p, + struct md_object *mo_c, const struct lu_name *lname, + struct md_attr *ma) +{ + int rc; + ENTRY; + rc = mdo_unlink(env, md_object_next(mo_p), md_object_next(mo_c), + lname, ma); + RETURN(rc); +} + +/* rename is split to local/remote by location of new parent dir */ +struct md_object *md_object_find(const struct lu_env *env, + struct md_device *md, + const struct lu_fid *f) +{ + struct lu_object *o; + struct md_object *m; + ENTRY; + + o = lu_object_find(env, md2lu_dev(md)->ld_site, f); + if (IS_ERR(o)) + m = (struct md_object *)o; + else { + o = lu_object_locate(o->lo_header, md2lu_dev(md)->ld_type); + m = o ? lu2md(o) : NULL; + } + RETURN(m); +} + +static int cmm_mode_get(const struct lu_env *env, struct md_device *md, + const struct lu_fid *lf, struct md_attr *ma, + int *remote) +{ + struct md_object *mo_s = md_object_find(env, md, lf); + struct cmm_thread_info *cmi; + struct md_attr *tmp_ma; + int rc; + ENTRY; + + if (IS_ERR(mo_s)) + RETURN(PTR_ERR(mo_s)); + + if (remote && (lu_object_exists(&mo_s->mo_lu) < 0)) + *remote = 1; + + cmi = cmm_env_info(env); + tmp_ma = &cmi->cmi_ma; + tmp_ma->ma_need = MA_INODE; + tmp_ma->ma_valid = 0; + /* get type from src, can be remote req */ + rc = mo_attr_get(env, md_object_next(mo_s), tmp_ma); + if (rc == 0) { + ma->ma_attr.la_mode = tmp_ma->ma_attr.la_mode; + ma->ma_attr.la_uid = tmp_ma->ma_attr.la_uid; + ma->ma_attr.la_gid = tmp_ma->ma_attr.la_gid; + ma->ma_attr.la_flags = tmp_ma->ma_attr.la_flags; + ma->ma_attr.la_valid |= LA_MODE | LA_UID | LA_GID | LA_FLAGS; + } + lu_object_put(env, &mo_s->mo_lu); + RETURN(rc); +} + +static int cmm_rename_ctime(const struct lu_env *env, struct md_device *md, + const struct lu_fid *lf, struct md_attr *ma) +{ + struct md_object *mo_s = md_object_find(env, md, lf); + int rc; + ENTRY; + + if (IS_ERR(mo_s)) + RETURN(PTR_ERR(mo_s)); + + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + /* set ctime to obj, can be remote req */ + rc = mo_attr_set(env, md_object_next(mo_s), ma); + lu_object_put(env, &mo_s->mo_lu); + RETURN(rc); +} + +static inline void cml_rename_warn(const char *fname, + struct md_object *mo_po, + struct md_object *mo_pn, + const struct lu_fid *lf, + const char *s_name, + struct md_object *mo_t, + const char *t_name, + int err) +{ + if (mo_t) + CWARN("cml_rename failed for %s, should revoke: [mo_po "DFID"] " + "[mo_pn "DFID"] [lf "DFID"] [sname %s] [mo_t "DFID"] " + "[tname %s] [err %d]\n", fname, + PFID(lu_object_fid(&mo_po->mo_lu)), + PFID(lu_object_fid(&mo_pn->mo_lu)), + PFID(lf), s_name, + PFID(lu_object_fid(&mo_t->mo_lu)), + t_name, err); + else + CWARN("cml_rename failed for %s, should revoke: [mo_po "DFID"] " + "[mo_pn "DFID"] [lf "DFID"] [sname %s] [mo_t NULL] " + "[tname %s] [err %d]\n", fname, + PFID(lu_object_fid(&mo_po->mo_lu)), + PFID(lu_object_fid(&mo_pn->mo_lu)), + PFID(lf), s_name, + t_name, err); +} + +static int cml_rename(const struct lu_env *env, struct md_object *mo_po, + struct md_object *mo_pn, const struct lu_fid *lf, + const struct lu_name *ls_name, struct md_object *mo_t, + const struct lu_name *lt_name, struct md_attr *ma) +{ + struct cmm_thread_info *cmi; + struct md_attr *tmp_ma = NULL; + struct md_object *tmp_t = mo_t; + int remote = 0, rc; + ENTRY; + + rc = cmm_mode_get(env, md_obj2dev(mo_po), lf, ma, &remote); + if (rc) + RETURN(rc); + + if (mo_t && lu_object_exists(&mo_t->mo_lu) < 0) { + /* XXX: mo_t is remote object and there is RPC to unlink it. + * before that, do local sanity check for rename first. */ + if (!remote) { + struct md_object *mo_s = md_object_find(env, + md_obj2dev(mo_po), lf); + if (IS_ERR(mo_s)) + RETURN(PTR_ERR(mo_s)); + + LASSERT(lu_object_exists(&mo_s->mo_lu) > 0); + rc = mo_permission(env, md_object_next(mo_po), + md_object_next(mo_s), + ma, MAY_RENAME_SRC); + lu_object_put(env, &mo_s->mo_lu); + if (rc) + RETURN(rc); + } else { + rc = mo_permission(env, NULL, md_object_next(mo_po), + ma, MAY_UNLINK | MAY_VTX_FULL); + if (rc) + RETURN(rc); + } + + rc = mo_permission(env, NULL, md_object_next(mo_pn), ma, + MAY_UNLINK | MAY_VTX_PART); + if (rc) + RETURN(rc); + + /* + * XXX: @ma will be changed after mo_ref_del, but we will use + * it for mdo_rename later, so save it before mo_ref_del. + */ + cmi = cmm_env_info(env); + tmp_ma = &cmi->cmi_ma; + *tmp_ma = *ma; + rc = mo_ref_del(env, md_object_next(mo_t), ma); + if (rc) + RETURN(rc); + + tmp_ma->ma_attr_flags |= MDS_PERM_BYPASS; + mo_t = NULL; + } + + /* XXX: for src on remote MDS case, change its ctime before local + * rename. Firstly, do local sanity check for rename if necessary. */ + if (remote) { + if (!tmp_ma) { + rc = mo_permission(env, NULL, md_object_next(mo_po), + ma, MAY_UNLINK | MAY_VTX_FULL); + if (rc) + RETURN(rc); + + if (mo_t) { + LASSERT(lu_object_exists(&mo_t->mo_lu) > 0); + rc = mo_permission(env, md_object_next(mo_pn), + md_object_next(mo_t), + ma, MAY_RENAME_TAR); + if (rc) + RETURN(rc); + } else { + int mask; + + if (mo_po != mo_pn) + mask = (S_ISDIR(ma->ma_attr.la_mode) ? + MAY_LINK : MAY_CREATE); + else + mask = MAY_CREATE; + rc = mo_permission(env, NULL, + md_object_next(mo_pn), + NULL, mask); + if (rc) + RETURN(rc); + } + + ma->ma_attr_flags |= MDS_PERM_BYPASS; + } else { + LASSERT(tmp_ma->ma_attr_flags & MDS_PERM_BYPASS); + } + + rc = cmm_rename_ctime(env, md_obj2dev(mo_po), lf, + tmp_ma ? tmp_ma : ma); + if (rc) { + /* TODO: revoke mo_t if necessary. */ + cml_rename_warn("cmm_rename_ctime", mo_po, + mo_pn, lf, ls_name->ln_name, + tmp_t, lt_name->ln_name, rc); + RETURN(rc); + } + } + + /* local rename, mo_t can be NULL */ + rc = mdo_rename(env, md_object_next(mo_po), + md_object_next(mo_pn), lf, ls_name, + md_object_next(mo_t), lt_name, tmp_ma ? tmp_ma : ma); + if (rc) + /* TODO: revoke all cml_rename */ + cml_rename_warn("mdo_rename", mo_po, mo_pn, lf, + ls_name->ln_name, tmp_t, lt_name->ln_name, rc); + + RETURN(rc); +} + +static int cml_rename_tgt(const struct lu_env *env, struct md_object *mo_p, + struct md_object *mo_t, const struct lu_fid *lf, + const struct lu_name *lname, struct md_attr *ma) +{ + int rc; + ENTRY; + + rc = mdo_rename_tgt(env, md_object_next(mo_p), + md_object_next(mo_t), lf, lname, ma); + RETURN(rc); +} +/* used only in case of rename_tgt() when target is not exist */ +static int cml_name_insert(const struct lu_env *env, struct md_object *p, + const struct lu_name *lname, const struct lu_fid *lf, + const struct md_attr *ma) +{ + int rc; + ENTRY; + + rc = mdo_name_insert(env, md_object_next(p), lname, lf, ma); + + RETURN(rc); +} + +static int cmm_is_subdir(const struct lu_env *env, struct md_object *mo, + const struct lu_fid *fid, struct lu_fid *sfid) +{ + struct cmm_thread_info *cmi; + int rc; + ENTRY; + + cmi = cmm_env_info(env); + rc = cmm_mode_get(env, md_obj2dev(mo), fid, &cmi->cmi_ma, NULL); + if (rc) + RETURN(rc); + + if (!S_ISDIR(cmi->cmi_ma.ma_attr.la_mode)) + RETURN(0); + + rc = mdo_is_subdir(env, md_object_next(mo), fid, sfid); + RETURN(rc); +} + +static struct md_dir_operations cml_dir_ops = { + .mdo_is_subdir = cmm_is_subdir, + .mdo_lookup = cml_lookup, + .mdo_lock_mode = cml_lock_mode, + .mdo_create = cml_create, + .mdo_link = cml_link, + .mdo_unlink = cml_unlink, + .mdo_name_insert = cml_name_insert, + .mdo_rename = cml_rename, + .mdo_rename_tgt = cml_rename_tgt, + .mdo_create_data = cml_create_data +}; + +/* ------------------------------------------------------------------- + * remote CMM object operations. cmr_... + */ +static inline struct cmr_object *lu2cmr_obj(struct lu_object *o) +{ + return container_of0(o, struct cmr_object, cmm_obj.cmo_obj.mo_lu); +} +static inline struct cmr_object *md2cmr_obj(struct md_object *mo) +{ + return container_of0(mo, struct cmr_object, cmm_obj.cmo_obj); +} +static inline struct cmr_object *cmm2cmr_obj(struct cmm_object *co) +{ + return container_of0(co, struct cmr_object, cmm_obj); +} + +/* get proper child device from MDCs */ +static struct lu_device *cmr_child_dev(struct cmm_device *d, __u32 num) +{ + struct lu_device *next = NULL; + struct mdc_device *mdc; + + spin_lock(&d->cmm_tgt_guard); + list_for_each_entry(mdc, &d->cmm_targets, mc_linkage) { + if (mdc->mc_num == num) { + next = mdc2lu_dev(mdc); + break; + } + } + spin_unlock(&d->cmm_tgt_guard); + return next; +} + +/* lu_object operations */ +static void cmr_object_free(const struct lu_env *env, + struct lu_object *lo) +{ + struct cmr_object *cro = lu2cmr_obj(lo); + lu_object_fini(lo); + OBD_FREE_PTR(cro); +} + +static int cmr_object_init(const struct lu_env *env, struct lu_object *lo) +{ + struct cmm_device *cd = lu2cmm_dev(lo->lo_dev); + struct lu_device *c_dev; + struct lu_object *c_obj; + int rc; + + ENTRY; + + c_dev = cmr_child_dev(cd, lu2cmr_obj(lo)->cmo_num); + if (c_dev == NULL) { + rc = -ENOENT; + } else { + c_obj = c_dev->ld_ops->ldo_object_alloc(env, + lo->lo_header, c_dev); + if (c_obj != NULL) { + lu_object_add(lo, c_obj); + rc = 0; + } else { + rc = -ENOMEM; + } + } + + RETURN(rc); +} + +static int cmr_object_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *lo) +{ + return (*p)(env, cookie, LUSTRE_CMM_NAME"-remote@%p", lo); +} + +static struct lu_object_operations cmr_obj_ops = { + .loo_object_init = cmr_object_init, + .loo_object_free = cmr_object_free, + .loo_object_print = cmr_object_print +}; + +/* CMM remote md_object operations. All are invalid */ +static int cmr_object_create(const struct lu_env *env, + struct md_object *mo, + const struct md_op_spec *spec, + struct md_attr *ma) +{ + return -EFAULT; +} + +static int cmr_permission(const struct lu_env *env, + struct md_object *p, struct md_object *c, + struct md_attr *attr, int mask) +{ + return -EREMOTE; +} + +static int cmr_attr_get(const struct lu_env *env, struct md_object *mo, + struct md_attr *attr) +{ + return -EREMOTE; +} + +static int cmr_attr_set(const struct lu_env *env, struct md_object *mo, + const struct md_attr *attr) +{ + return -EFAULT; +} + +static int cmr_xattr_get(const struct lu_env *env, struct md_object *mo, + struct lu_buf *buf, const char *name) +{ + return -EFAULT; +} + +static int cmr_readlink(const struct lu_env *env, struct md_object *mo, + struct lu_buf *buf) +{ + return -EFAULT; +} + +static int cmr_xattr_list(const struct lu_env *env, struct md_object *mo, + struct lu_buf *buf) +{ + return -EFAULT; +} + +static int cmr_xattr_set(const struct lu_env *env, struct md_object *mo, + const struct lu_buf *buf, const char *name, int fl) +{ + return -EFAULT; +} + +static int cmr_xattr_del(const struct lu_env *env, struct md_object *mo, + const char *name) +{ + return -EFAULT; +} + +static int cmr_ref_add(const struct lu_env *env, struct md_object *mo, + const struct md_attr *ma) +{ + return -EFAULT; +} + +static int cmr_ref_del(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma) +{ + return -EFAULT; +} + +static int cmr_open(const struct lu_env *env, struct md_object *mo, + int flags) +{ + return -EREMOTE; +} + +static int cmr_close(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma) +{ + return -EFAULT; +} + +static int cmr_readpage(const struct lu_env *env, struct md_object *mo, + const struct lu_rdpg *rdpg) +{ + return -EREMOTE; +} + +static int cmr_capa_get(const struct lu_env *env, struct md_object *mo, + struct lustre_capa *capa, int renewal) +{ + return -EFAULT; +} + +static struct md_object_operations cmr_mo_ops = { + .moo_permission = cmr_permission, + .moo_attr_get = cmr_attr_get, + .moo_attr_set = cmr_attr_set, + .moo_xattr_get = cmr_xattr_get, + .moo_xattr_set = cmr_xattr_set, + .moo_xattr_list = cmr_xattr_list, + .moo_xattr_del = cmr_xattr_del, + .moo_object_create = cmr_object_create, + .moo_ref_add = cmr_ref_add, + .moo_ref_del = cmr_ref_del, + .moo_open = cmr_open, + .moo_close = cmr_close, + .moo_readpage = cmr_readpage, + .moo_readlink = cmr_readlink, + .moo_capa_get = cmr_capa_get +}; + +/* remote part of md_dir operations */ +static int cmr_lookup(const struct lu_env *env, struct md_object *mo_p, + const struct lu_name *lname, struct lu_fid *lf, + struct md_op_spec *spec) +{ + /* + * This can happens while rename() If new parent is remote dir, lookup + * will happen here. + */ + + return -EREMOTE; +} + +static mdl_mode_t cmr_lock_mode(const struct lu_env *env, + struct md_object *mo, mdl_mode_t lm) +{ + return MDL_MINMODE; +} + +/* + * All methods below are cross-ref by nature. They consist of remote call and + * local operation. Due to future rollback functionality there are several + * limitations for such methods: + * 1) remote call should be done at first to do epoch negotiation between all + * MDS involved and to avoid the RPC inside transaction. + * 2) only one RPC can be sent - also due to epoch negotiation. + * For more details see rollback HLD/DLD. + */ +static int cmr_create(const struct lu_env *env, struct md_object *mo_p, + const struct lu_name *lchild_name, struct md_object *mo_c, + struct md_op_spec *spec, + struct md_attr *ma) +{ + struct cmm_thread_info *cmi; + struct md_attr *tmp_ma; + int rc; + ENTRY; + + /* Make sure that name isn't exist before doing remote call. */ + rc = mdo_lookup(env, md_object_next(mo_p), lchild_name, + &cmm_env_info(env)->cmi_fid, NULL); + if (rc == 0) + RETURN(-EEXIST); + else if (rc != -ENOENT) + RETURN(rc); + + /* check the SGID attr */ + cmi = cmm_env_info(env); + LASSERT(cmi); + tmp_ma = &cmi->cmi_ma; + tmp_ma->ma_valid = 0; + tmp_ma->ma_need = MA_INODE; + +#ifdef CONFIG_FS_POSIX_ACL + if (!S_ISLNK(ma->ma_attr.la_mode)) { + tmp_ma->ma_acl = cmi->cmi_xattr_buf; + tmp_ma->ma_acl_size = sizeof(cmi->cmi_xattr_buf); + tmp_ma->ma_need |= MA_ACL_DEF; + } +#endif + rc = mo_attr_get(env, md_object_next(mo_p), tmp_ma); + if (rc) + RETURN(rc); + + if (tmp_ma->ma_attr.la_mode & S_ISGID) { + ma->ma_attr.la_gid = tmp_ma->ma_attr.la_gid; + if (S_ISDIR(ma->ma_attr.la_mode)) { + ma->ma_attr.la_mode |= S_ISGID; + ma->ma_attr.la_valid |= LA_MODE; + } + } + +#ifdef CONFIG_FS_POSIX_ACL + if (tmp_ma->ma_valid & MA_ACL_DEF) { + spec->u.sp_ea.fid = spec->u.sp_pfid; + spec->u.sp_ea.eadata = tmp_ma->ma_acl; + spec->u.sp_ea.eadatalen = tmp_ma->ma_acl_size; + spec->sp_cr_flags |= MDS_CREATE_RMT_ACL; + } +#endif + + /* Local permission check for name_insert before remote ops. */ + rc = mo_permission(env, NULL, md_object_next(mo_p), NULL, + (S_ISDIR(ma->ma_attr.la_mode) ? + MAY_LINK : MAY_CREATE)); + if (rc) + RETURN(rc); + + /* Remote object creation and local name insert. */ + /* + * XXX: @ma will be changed after mo_object_create, but we will use + * it for mdo_name_insert later, so save it before mo_object_create. + */ + *tmp_ma = *ma; + rc = mo_object_create(env, md_object_next(mo_c), spec, ma); + if (rc == 0) { + tmp_ma->ma_attr_flags |= MDS_PERM_BYPASS; + rc = mdo_name_insert(env, md_object_next(mo_p), lchild_name, + lu_object_fid(&mo_c->mo_lu), tmp_ma); + if (unlikely(rc)) { + /* TODO: remove object mo_c on remote MDS */ + CWARN("cmr_create failed, should revoke: [mo_p "DFID"]" + " [name %s] [mo_c "DFID"] [err %d]\n", + PFID(lu_object_fid(&mo_p->mo_lu)), + lchild_name->ln_name, + PFID(lu_object_fid(&mo_c->mo_lu)), rc); + } + } + + RETURN(rc); +} + +static int cmr_link(const struct lu_env *env, struct md_object *mo_p, + struct md_object *mo_s, const struct lu_name *lname, + struct md_attr *ma) +{ + int rc; + ENTRY; + + /* Make sure that name isn't exist before doing remote call. */ + rc = mdo_lookup(env, md_object_next(mo_p), lname, + &cmm_env_info(env)->cmi_fid, NULL); + if (rc == 0) { + rc = -EEXIST; + } else if (rc == -ENOENT) { + /* Local permission check for name_insert before remote ops. */ + rc = mo_permission(env, NULL, md_object_next(mo_p), NULL, + MAY_CREATE); + if (rc) + RETURN(rc); + + rc = mo_ref_add(env, md_object_next(mo_s), ma); + if (rc == 0) { + ma->ma_attr_flags |= MDS_PERM_BYPASS; + rc = mdo_name_insert(env, md_object_next(mo_p), lname, + lu_object_fid(&mo_s->mo_lu), ma); + if (unlikely(rc)) { + /* TODO: ref_del from mo_s on remote MDS */ + CWARN("cmr_link failed, should revoke: " + "[mo_p "DFID"] [mo_s "DFID"] " + "[name %s] [err %d]\n", + PFID(lu_object_fid(&mo_p->mo_lu)), + PFID(lu_object_fid(&mo_s->mo_lu)), + lname->ln_name, rc); + } + } + } + RETURN(rc); +} + +static int cmr_unlink(const struct lu_env *env, struct md_object *mo_p, + struct md_object *mo_c, const struct lu_name *lname, + struct md_attr *ma) +{ + struct cmm_thread_info *cmi; + struct md_attr *tmp_ma; + int rc; + ENTRY; + + /* Local permission check for name_remove before remote ops. */ + rc = mo_permission(env, NULL, md_object_next(mo_p), ma, + MAY_UNLINK | MAY_VTX_PART); + if (rc) + RETURN(rc); + + /* + * XXX: @ma will be changed after mo_ref_del, but we will use + * it for mdo_name_remove later, so save it before mo_ref_del. + */ + cmi = cmm_env_info(env); + tmp_ma = &cmi->cmi_ma; + *tmp_ma = *ma; + rc = mo_ref_del(env, md_object_next(mo_c), ma); + if (rc == 0) { + tmp_ma->ma_attr_flags |= MDS_PERM_BYPASS; + rc = mdo_name_remove(env, md_object_next(mo_p), lname, tmp_ma); + if (unlikely(rc)) { + /* TODO: ref_add to mo_c on remote MDS */ + CWARN("cmr_unlink failed, should revoke: [mo_p "DFID"]" + " [mo_c "DFID"] [name %s] [err %d]\n", + PFID(lu_object_fid(&mo_p->mo_lu)), + PFID(lu_object_fid(&mo_c->mo_lu)), + lname->ln_name, rc); + } + } + + RETURN(rc); +} + +static inline void cmr_rename_warn(const char *fname, + struct md_object *mo_po, + struct md_object *mo_pn, + const struct lu_fid *lf, + const char *s_name, + const char *t_name, + int err) +{ + CWARN("cmr_rename failed for %s, should revoke: " + "[mo_po "DFID"] [mo_pn "DFID"] [lf "DFID"] " + "[sname %s] [tname %s] [err %d]\n", fname, + PFID(lu_object_fid(&mo_po->mo_lu)), + PFID(lu_object_fid(&mo_pn->mo_lu)), + PFID(lf), s_name, t_name, err); +} + +static int cmr_rename(const struct lu_env *env, + struct md_object *mo_po, struct md_object *mo_pn, + const struct lu_fid *lf, const struct lu_name *ls_name, + struct md_object *mo_t, const struct lu_name *lt_name, + struct md_attr *ma) +{ + struct cmm_thread_info *cmi; + struct md_attr *tmp_ma; + int rc; + ENTRY; + + LASSERT(mo_t == NULL); + + /* get real type of src */ + rc = cmm_mode_get(env, md_obj2dev(mo_po), lf, ma, NULL); + if (rc) + RETURN(rc); + + /* Local permission check for name_remove before remote ops. */ + rc = mo_permission(env, NULL, md_object_next(mo_po), ma, + MAY_UNLINK | MAY_VTX_FULL); + if (rc) + RETURN(rc); + + /* + * XXX: @ma maybe changed after mdo_rename_tgt, but we will use it + * for mdo_name_remove later, so save it before mdo_rename_tgt. + */ + cmi = cmm_env_info(env); + tmp_ma = &cmi->cmi_ma; + *tmp_ma = *ma; + /* the mo_pn is remote directory, so we cannot even know if there is + * mo_t or not. Therefore mo_t is NULL here but remote server should do + * lookup and process this further */ + rc = mdo_rename_tgt(env, md_object_next(mo_pn), + NULL/* mo_t */, lf, lt_name, ma); + if (rc) + RETURN(rc); + + tmp_ma->ma_attr_flags |= MDS_PERM_BYPASS; + + /* src object maybe on remote MDS, do remote ops first. */ + rc = cmm_rename_ctime(env, md_obj2dev(mo_po), lf, tmp_ma); + if (unlikely(rc)) { + /* TODO: revoke mdo_rename_tgt */ + cmr_rename_warn("cmm_rename_ctime", mo_po, mo_pn, lf, + ls_name->ln_name, lt_name->ln_name, rc); + RETURN(rc); + } + + /* only old name is removed localy */ + rc = mdo_name_remove(env, md_object_next(mo_po), ls_name, tmp_ma); + if (unlikely(rc)) + /* TODO: revoke all cmr_rename */ + cmr_rename_warn("mdo_name_remove", mo_po, mo_pn, lf, + ls_name->ln_name, lt_name->ln_name, rc); + + RETURN(rc); +} + +/* part of cross-ref rename(). Used to insert new name in new parent + * and unlink target */ +static int cmr_rename_tgt(const struct lu_env *env, + struct md_object *mo_p, struct md_object *mo_t, + const struct lu_fid *lf, const struct lu_name *lname, + struct md_attr *ma) +{ + struct cmm_thread_info *cmi; + struct md_attr *tmp_ma; + int rc; + ENTRY; + + /* target object is remote one */ + /* Local permission check for rename_tgt before remote ops. */ + rc = mo_permission(env, NULL, md_object_next(mo_p), ma, + MAY_UNLINK | MAY_VTX_PART); + if (rc) + RETURN(rc); + + /* + * XXX: @ma maybe changed after mo_ref_del, but we will use + * it for mdo_rename_tgt later, so save it before mo_ref_del. + */ + cmi = cmm_env_info(env); + tmp_ma = &cmi->cmi_ma; + *tmp_ma = *ma; + rc = mo_ref_del(env, md_object_next(mo_t), ma); + /* continue locally with name handling only */ + if (rc == 0) { + tmp_ma->ma_attr_flags |= MDS_PERM_BYPASS; + rc = mdo_rename_tgt(env, md_object_next(mo_p), + NULL, lf, lname, tmp_ma); + if (unlikely(rc)) { + /* TODO: ref_add to mo_t on remote MDS */ + CWARN("cmr_rename_tgt failed, should revoke: " + "[mo_p "DFID"] [mo_t "DFID"] [lf "DFID"] " + "[name %s] [err %d]\n", + PFID(lu_object_fid(&mo_p->mo_lu)), + PFID(lu_object_fid(&mo_t->mo_lu)), + PFID(lf), + lname->ln_name, rc); + } + } + RETURN(rc); +} + +static struct md_dir_operations cmr_dir_ops = { + .mdo_is_subdir = cmm_is_subdir, + .mdo_lookup = cmr_lookup, + .mdo_lock_mode = cmr_lock_mode, + .mdo_create = cmr_create, + .mdo_link = cmr_link, + .mdo_unlink = cmr_unlink, + .mdo_rename = cmr_rename, + .mdo_rename_tgt = cmr_rename_tgt, +}; diff --git a/lustre/cmm/cmm_split.c b/lustre/cmm/cmm_split.c new file mode 100644 index 0000000..193a8b0 --- /dev/null +++ b/lustre/cmm/cmm_split.c @@ -0,0 +1,731 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/cmm/cmm_split.c + * Lustre splitting dir + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Alex Thomas + * Wang Di + * Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include "cmm_internal.h" +#include "mdc_internal.h" + +enum { + CMM_SPLIT_SIZE = 128 * 1024 +}; + +/* + * This function checks if passed @name come to correct server (local MDT). If + * not - return -ERESTART and let client know that dir was split and client + * needs to chose correct stripe. + */ +int cmm_split_check(const struct lu_env *env, struct md_object *mp, + const char *name) +{ + struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mp)); + struct md_attr *ma = &cmm_env_info(env)->cmi_ma; + struct cml_object *clo = md2cml_obj(mp); + int rc, lmv_size; + ENTRY; + + cmm_lprocfs_time_start(env); + + /* Not split yet */ + if (clo->clo_split == CMM_SPLIT_NONE || + clo->clo_split == CMM_SPLIT_DENIED) + GOTO(out, rc = 0); + + lmv_size = CMM_MD_SIZE(cmm->cmm_tgt_count + 1); + + /* Try to get the LMV EA */ + memset(ma, 0, sizeof(*ma)); + + ma->ma_need = MA_LMV; + ma->ma_lmv_size = lmv_size; + OBD_ALLOC(ma->ma_lmv, lmv_size); + if (ma->ma_lmv == NULL) + GOTO(out, rc = -ENOMEM); + + /* Get LMV EA, Note: refresh valid here for getting LMV_EA */ + rc = mo_attr_get(env, mp, ma); + if (rc) + GOTO(cleanup, rc); + + /* No LMV just return */ + if (!(ma->ma_valid & MA_LMV)) { + /* update split state if unknown */ + if (clo->clo_split == CMM_SPLIT_UNKNOWN) + clo->clo_split = CMM_SPLIT_NONE; + GOTO(cleanup, rc = 0); + } + + /* Skip checking the slave dirs (mea_count is 0) */ + if (ma->ma_lmv->mea_count != 0) { + int idx; + + /* + * Get stripe by name to check the name belongs to master dir, + * otherwise return the -ERESTART + */ + idx = mea_name2idx(ma->ma_lmv, name, strlen(name)); + + /* + * Check if name came to correct MDT server. We suppose that if + * client does not know about split, it sends create operation + * to master MDT. And this is master job to say it that dir got + * split and client should orward request to correct MDT. This + * is why we check here if stripe zero or not. Zero stripe means + * master stripe. If stripe calculated from name is not zero - + * return -ERESTART. + */ + if (idx != 0) + rc = -ERESTART; + + /* update split state to DONE if unknown */ + if (clo->clo_split == CMM_SPLIT_UNKNOWN) + clo->clo_split = CMM_SPLIT_DONE; + } else { + /* split is denied for slave dir */ + clo->clo_split = CMM_SPLIT_DENIED; + } + EXIT; +cleanup: + OBD_FREE(ma->ma_lmv, lmv_size); +out: + cmm_lprocfs_time_end(env, cmm, LPROC_CMM_SPLIT_CHECK); + return rc; +} + +/* + * Return preferable access mode to caller taking into account possible split + * and the fact of existing not splittable dirs in principle. + */ +int cmm_split_access(const struct lu_env *env, struct md_object *mo, + mdl_mode_t lm) +{ + struct md_attr *ma = &cmm_env_info(env)->cmi_ma; + int rc, split; + ENTRY; + + memset(ma, 0, sizeof(*ma)); + + /* + * Check only if we need protection from split. If not - mdt handles + * other cases. + */ + rc = cmm_split_expect(env, mo, ma, &split); + if (rc) { + CERROR("Can't check for possible split, rc %d\n", rc); + RETURN(MDL_MINMODE); + } + + /* + * Do not take PDO lock on non-splittable objects if this is not PW, + * this should speed things up a bit. + */ + if (split == CMM_SPLIT_DONE && lm != MDL_PW) + RETURN(MDL_NL); + + /* Protect splitting by exclusive lock. */ + if (split == CMM_SPLIT_NEEDED && lm == MDL_PW) + RETURN(MDL_EX); + + /* + * Have no idea about lock mode, let it be what higher layer wants. + */ + RETURN(MDL_MINMODE); +} + +/* Check if split is expected for current thread. */ +int cmm_split_expect(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma, int *split) +{ + struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); + struct cml_object *clo = md2cml_obj(mo); + struct lu_fid root_fid; + int rc; + ENTRY; + + if (clo->clo_split == CMM_SPLIT_DONE || + clo->clo_split == CMM_SPLIT_DENIED) { + *split = clo->clo_split; + RETURN(0); + } + /* CMM_SPLIT_UNKNOWN case below */ + + /* No need to split root object. */ + rc = cmm_child_ops(cmm)->mdo_root_get(env, cmm->cmm_child, + &root_fid); + if (rc) + RETURN(rc); + + if (lu_fid_eq(&root_fid, cmm2fid(md2cmm_obj(mo)))) { + /* update split state */ + *split = clo->clo_split == CMM_SPLIT_DENIED; + RETURN(0); + } + + /* + * Assumption: ma_valid = 0 here, we only need get inode and lmv_size + * for this get_attr. + */ + LASSERT(ma->ma_valid == 0); + ma->ma_need = MA_INODE | MA_LMV; + rc = mo_attr_get(env, mo, ma); + if (rc) + RETURN(rc); + + /* No need split for already split object */ + if (ma->ma_valid & MA_LMV) { + LASSERT(ma->ma_lmv_size > 0); + *split = clo->clo_split = CMM_SPLIT_DONE; + RETURN(0); + } + + /* No need split for object whose size < CMM_SPLIT_SIZE */ + if (ma->ma_attr.la_size < CMM_SPLIT_SIZE) { + *split = clo->clo_split = CMM_SPLIT_NONE; + RETURN(0); + } + + *split = clo->clo_split = CMM_SPLIT_NEEDED; + RETURN(0); +} + +struct cmm_object *cmm_object_find(const struct lu_env *env, + struct cmm_device *d, + const struct lu_fid *f) +{ + struct lu_object *o; + struct cmm_object *m; + ENTRY; + + o = lu_object_find(env, d->cmm_md_dev.md_lu_dev.ld_site, f); + if (IS_ERR(o)) + m = (struct cmm_object *)o; + else + m = lu2cmm_obj(lu_object_locate(o->lo_header, + d->cmm_md_dev.md_lu_dev.ld_type)); + RETURN(m); +} + +static inline void cmm_object_put(const struct lu_env *env, + struct cmm_object *o) +{ + lu_object_put(env, &o->cmo_obj.mo_lu); +} + +/* + * Allocate new on passed @mc for slave object which is going to create there + * soon. + */ +static int cmm_split_fid_alloc(const struct lu_env *env, + struct cmm_device *cmm, + struct mdc_device *mc, + struct lu_fid *fid) +{ + int rc; + ENTRY; + + LASSERT(cmm != NULL && mc != NULL && fid != NULL); + + down(&mc->mc_fid_sem); + + /* Alloc new fid on @mc. */ + rc = obd_fid_alloc(mc->mc_desc.cl_exp, fid, NULL); + if (rc > 0) { + /* Setup FLD for new sequenceif needed. */ + rc = fld_client_create(cmm->cmm_fld, fid_seq(fid), + mc->mc_num, env); + if (rc) + CERROR("Can't create fld entry, rc %d\n", rc); + } + up(&mc->mc_fid_sem); + + RETURN(rc); +} + +/* Allocate new slave object on passed @mc */ +static int cmm_split_slave_create(const struct lu_env *env, + struct cmm_device *cmm, + struct mdc_device *mc, + struct lu_fid *fid, + struct md_attr *ma, + struct lmv_stripe_md *lmv, + int lmv_size) +{ + struct md_op_spec *spec = &cmm_env_info(env)->cmi_spec; + struct cmm_object *obj; + int rc; + ENTRY; + + /* Allocate new fid and store it to @fid */ + rc = cmm_split_fid_alloc(env, cmm, mc, fid); + if (rc) { + CERROR("Can't alloc new fid on "LPU64 + ", rc %d\n", mc->mc_num, rc); + RETURN(rc); + } + + /* Allocate new object on @mc */ + obj = cmm_object_find(env, cmm, fid); + if (IS_ERR(obj)) + RETURN(PTR_ERR(obj)); + + memset(spec, 0, sizeof *spec); + spec->u.sp_ea.fid = fid; + spec->u.sp_ea.eadata = lmv; + spec->u.sp_ea.eadatalen = lmv_size; + spec->sp_cr_flags |= MDS_CREATE_SLAVE_OBJ; + rc = mo_object_create(env, md_object_next(&obj->cmo_obj), + spec, ma); + cmm_object_put(env, obj); + RETURN(rc); +} + +/* + * Create so many slaves as number of stripes. This is called in split time + * before sending pages to slaves. + */ +static int cmm_split_slaves_create(const struct lu_env *env, + struct md_object *mo, + struct md_attr *ma) +{ + struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); + struct lu_fid *lf = cmm2fid(md2cmm_obj(mo)); + struct lmv_stripe_md *slave_lmv = &cmm_env_info(env)->cmi_lmv; + struct mdc_device *mc, *tmp; + struct lmv_stripe_md *lmv; + int i = 1, rc = 0; + ENTRY; + + /* Init the split MEA */ + lmv = ma->ma_lmv; + lmv->mea_master = cmm->cmm_local_num; + lmv->mea_magic = MEA_MAGIC_HASH_SEGMENT; + lmv->mea_count = cmm->cmm_tgt_count + 1; + + /* + * Store master FID to local node idx number. Local node is always + * master and its stripe number if 0. + */ + lmv->mea_ids[0] = *lf; + + memset(slave_lmv, 0, sizeof *slave_lmv); + slave_lmv->mea_master = cmm->cmm_local_num; + slave_lmv->mea_magic = MEA_MAGIC_HASH_SEGMENT; + slave_lmv->mea_count = 0; + + list_for_each_entry_safe(mc, tmp, &cmm->cmm_targets, mc_linkage) { + rc = cmm_split_slave_create(env, cmm, mc, &lmv->mea_ids[i], + ma, slave_lmv, sizeof(*slave_lmv)); + if (rc) + GOTO(cleanup, rc); + i++; + } + + ma->ma_valid |= MA_LMV; + EXIT; +cleanup: + return rc; +} + +static inline int cmm_split_special_entry(struct lu_dirent *ent) +{ + if (!strncmp(ent->lde_name, ".", le16_to_cpu(ent->lde_namelen)) || + !strncmp(ent->lde_name, "..", le16_to_cpu(ent->lde_namelen))) + return 1; + return 0; +} + +static inline struct lu_name *cmm_name(const struct lu_env *env, + char *name, int buflen) +{ + struct lu_name *lname; + struct cmm_thread_info *cmi; + + LASSERT(buflen > 0); + LASSERT(name[buflen - 1] == '\0'); + + cmi = cmm_env_info(env); + lname = &cmi->cti_name; + lname->ln_name = name; + /* NOT count the terminating '\0' of name for length */ + lname->ln_namelen = buflen - 1; + return lname; +} + +/* + * Remove one entry from local MDT. Do not corrupt byte order in page, it will + * be sent to remote MDT. + */ +static int cmm_split_remove_entry(const struct lu_env *env, + struct md_object *mo, + struct lu_dirent *ent) +{ + struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); + struct cmm_thread_info *cmi; + struct md_attr *ma; + struct cmm_object *obj; + int is_dir, rc; + char *name; + struct lu_name *lname; + ENTRY; + + if (cmm_split_special_entry(ent)) + RETURN(0); + + fid_le_to_cpu(&cmm_env_info(env)->cmi_fid, &ent->lde_fid); + obj = cmm_object_find(env, cmm, &cmm_env_info(env)->cmi_fid); + if (IS_ERR(obj)) + RETURN(PTR_ERR(obj)); + + cmi = cmm_env_info(env); + ma = &cmi->cmi_ma; + + if (lu_object_exists(&obj->cmo_obj.mo_lu) > 0) + is_dir = S_ISDIR(lu_object_attr(&obj->cmo_obj.mo_lu)); + else + /* + * XXX: These days only cross-ref dirs are possible, so for the + * sake of simplicity, in split, we suppose that all cross-ref + * names pint to directory and do not do additional getattr to + * remote MDT. + */ + is_dir = 1; + + OBD_ALLOC(name, le16_to_cpu(ent->lde_namelen) + 1); + if (!name) + GOTO(cleanup, rc = -ENOMEM); + + memcpy(name, ent->lde_name, le16_to_cpu(ent->lde_namelen)); + lname = cmm_name(env, name, le16_to_cpu(ent->lde_namelen) + 1); + /* + * When split, no need update parent's ctime, + * and no permission check for name_remove. + */ + ma->ma_attr.la_ctime = 0; + if (is_dir) + ma->ma_attr.la_mode = S_IFDIR; + else + ma->ma_attr.la_mode = 0; + ma->ma_attr.la_valid = LA_MODE; + ma->ma_valid = MA_INODE; + + ma->ma_attr_flags |= MDS_PERM_BYPASS; + rc = mdo_name_remove(env, md_object_next(mo), lname, ma); + OBD_FREE(name, le16_to_cpu(ent->lde_namelen) + 1); + if (rc) + GOTO(cleanup, rc); + + /* + * This @ent will be transferred to slave MDS and insert there, so in + * the slave MDS, we should know whether this object is dir or not, so + * use the highest bit of the hash to indicate that (because we do not + * use highest bit of hash). + */ + if (is_dir) { + ent->lde_hash = le32_to_cpu(ent->lde_hash); + ent->lde_hash = cpu_to_le32(ent->lde_hash | MAX_HASH_HIGHEST_BIT); + } + EXIT; +cleanup: + cmm_object_put(env, obj); + return rc; +} + +/* + * Remove all entries from passed page. These entries are going to remote MDT + * and thus should be removed locally. + */ +static int cmm_split_remove_page(const struct lu_env *env, + struct md_object *mo, + struct lu_rdpg *rdpg, + __u32 hash_end, __u32 *len) +{ + struct lu_dirpage *dp; + struct lu_dirent *ent; + int rc = 0; + ENTRY; + + *len = 0; + kmap(rdpg->rp_pages[0]); + dp = page_address(rdpg->rp_pages[0]); + for (ent = lu_dirent_start(dp); + ent != NULL && le32_to_cpu(ent->lde_hash) < hash_end; + ent = lu_dirent_next(ent)) { + rc = cmm_split_remove_entry(env, mo, ent); + if (rc) { + /* + * XXX: Error handler to insert remove name back, + * currently we assumed it will success anyway in + * verfication test. + */ + CERROR("Can not del %*.*s, rc %d\n", + le16_to_cpu(ent->lde_namelen), + le16_to_cpu(ent->lde_namelen), + ent->lde_name, rc); + GOTO(unmap, rc); + } + *len += lu_dirent_size(ent); + } + + if (ent != lu_dirent_start(dp)) + *len += sizeof(struct lu_dirpage); + EXIT; +unmap: + kunmap(rdpg->rp_pages[0]); + return rc; +} + +/* Send one page to remote MDT for creating entries there. */ +static int cmm_split_send_page(const struct lu_env *env, + struct md_object *mo, + struct lu_rdpg *rdpg, + struct lu_fid *fid, int len) +{ + struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); + struct cmm_object *obj; + int rc = 0; + ENTRY; + + obj = cmm_object_find(env, cmm, fid); + if (IS_ERR(obj)) + RETURN(PTR_ERR(obj)); + + rc = mdc_send_page(cmm, env, md_object_next(&obj->cmo_obj), + rdpg->rp_pages[0], len); + cmm_object_put(env, obj); + RETURN(rc); +} + +/* Read one page of entries from local MDT. */ +static int cmm_split_read_page(const struct lu_env *env, + struct md_object *mo, + struct lu_rdpg *rdpg) +{ + int rc; + ENTRY; + memset(cfs_kmap(rdpg->rp_pages[0]), 0, CFS_PAGE_SIZE); + cfs_kunmap(rdpg->rp_pages[0]); + rc = mo_readpage(env, md_object_next(mo), rdpg); + RETURN(rc); +} + +/* + * This function performs migration of all pages with entries which fit into one + * stripe and one hash segment. + */ +static int cmm_split_process_stripe(const struct lu_env *env, + struct md_object *mo, + struct lu_rdpg *rdpg, + struct lu_fid *lf, + __u32 end) +{ + int rc, done = 0; + ENTRY; + + LASSERT(rdpg->rp_npages == 1); + do { + struct lu_dirpage *ldp; + __u32 len = 0; + + /* Read one page from local MDT. */ + rc = cmm_split_read_page(env, mo, rdpg); + if (rc) { + CERROR("Error in readpage: %d\n", rc); + RETURN(rc); + } + + /* Remove local entries which are going to remite MDT. */ + rc = cmm_split_remove_page(env, mo, rdpg, end, &len); + if (rc) { + CERROR("Error in remove stripe entries: %d\n", rc); + RETURN(rc); + } + + /* Send entries page to slave MDT. */ + if (len > 0) { + rc = cmm_split_send_page(env, mo, rdpg, lf, len); + if (rc) { + CERROR("Error in sending page: %d\n", rc); + RETURN(rc); + } + } + + kmap(rdpg->rp_pages[0]); + ldp = page_address(rdpg->rp_pages[0]); + if (le32_to_cpu(ldp->ldp_hash_end) >= end) + done = 1; + + rdpg->rp_hash = le32_to_cpu(ldp->ldp_hash_end); + kunmap(rdpg->rp_pages[0]); + } while (!done); + + RETURN(rc); +} + +static int cmm_split_process_dir(const struct lu_env *env, + struct md_object *mo, + struct md_attr *ma) +{ + struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); + struct lu_rdpg *rdpg = &cmm_env_info(env)->cmi_rdpg; + __u32 hash_segement; + int rc = 0, i; + ENTRY; + + memset(rdpg, 0, sizeof *rdpg); + rdpg->rp_npages = CMM_SPLIT_PAGE_COUNT; + rdpg->rp_count = CFS_PAGE_SIZE * rdpg->rp_npages; + rdpg->rp_pages = cmm_env_info(env)->cmi_pages; + + for (i = 0; i < rdpg->rp_npages; i++) { + rdpg->rp_pages[i] = alloc_pages(GFP_KERNEL, 0); + if (rdpg->rp_pages[i] == NULL) + GOTO(cleanup, rc = -ENOMEM); + } + + LASSERT(ma->ma_valid & MA_LMV); + hash_segement = MAX_HASH_SIZE / (cmm->cmm_tgt_count + 1); + for (i = 1; i < cmm->cmm_tgt_count + 1; i++) { + struct lu_fid *lf; + __u32 hash_end; + + lf = &ma->ma_lmv->mea_ids[i]; + + rdpg->rp_hash = i * hash_segement; + if (i == cmm->cmm_tgt_count) + hash_end = MAX_HASH_SIZE; + else + hash_end = rdpg->rp_hash + hash_segement; + rc = cmm_split_process_stripe(env, mo, rdpg, lf, hash_end); + if (rc) { + CERROR("Error (rc = %d) while splitting for %d: fid=" + DFID", %08x:%08x\n", rc, i, PFID(lf), + rdpg->rp_hash, hash_end); + GOTO(cleanup, rc); + } + } + EXIT; +cleanup: + for (i = 0; i < rdpg->rp_npages; i++) + if (rdpg->rp_pages[i] != NULL) + __free_pages(rdpg->rp_pages[i], 0); + return rc; +} + +int cmm_split_dir(const struct lu_env *env, struct md_object *mo) +{ + struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); + struct md_attr *ma = &cmm_env_info(env)->cmi_ma; + int rc = 0, split; + struct lu_buf *buf; + ENTRY; + + cmm_lprocfs_time_start(env); + + LASSERT(S_ISDIR(lu_object_attr(&mo->mo_lu))); + memset(ma, 0, sizeof(*ma)); + + /* Step1: Checking whether the dir needs to be split. */ + rc = cmm_split_expect(env, mo, ma, &split); + if (rc) + GOTO(out, rc); + + if (split != CMM_SPLIT_NEEDED) { + /* No split is needed, caller may proceed with create. */ + GOTO(out, rc = 0); + } + + /* Split should be done now, let's do it. */ + CWARN("Dir "DFID" is going to split (size: "LPU64")\n", + PFID(lu_object_fid(&mo->mo_lu)), ma->ma_attr.la_size); + + /* + * Disable transacrions for split, since there will be so many trans in + * this one ops, conflict with current recovery design. + */ + rc = cmm_upcall(env, &cmm->cmm_md_dev, MD_NO_TRANS); + if (rc) { + CERROR("Can't disable trans for split, rc %d\n", rc); + GOTO(out, rc); + } + + /* Step2: Prepare the md memory */ + ma->ma_lmv_size = CMM_MD_SIZE(cmm->cmm_tgt_count + 1); + OBD_ALLOC(ma->ma_lmv, ma->ma_lmv_size); + if (ma->ma_lmv == NULL) + GOTO(out, rc = -ENOMEM); + + /* Step3: Create slave objects and fill the ma->ma_lmv */ + rc = cmm_split_slaves_create(env, mo, ma); + if (rc) { + CERROR("Can't create slaves for split, rc %d\n", rc); + GOTO(cleanup, rc); + } + + /* Step4: Scan and split the object. */ + rc = cmm_split_process_dir(env, mo, ma); + if (rc) { + CERROR("Can't scan and split, rc %d\n", rc); + GOTO(cleanup, rc); + } + + /* Step5: Set mea to the master object. */ + LASSERT(ma->ma_valid & MA_LMV); + buf = cmm_buf_get(env, ma->ma_lmv, ma->ma_lmv_size); + rc = mo_xattr_set(env, md_object_next(mo), buf, + MDS_LMV_MD_NAME, 0); + if (rc) { + CERROR("Can't set MEA to master dir, " "rc %d\n", rc); + GOTO(cleanup, rc); + } + + /* set flag in cmm_object */ + md2cml_obj(mo)->clo_split = CMM_SPLIT_DONE; + + /* + * Finally, split succeed, tell client to repeat opetartion on correct + * MDT. + */ + CWARN("Dir "DFID" has been split\n", PFID(lu_object_fid(&mo->mo_lu))); + rc = -ERESTART; + EXIT; +cleanup: + OBD_FREE(ma->ma_lmv, ma->ma_lmv_size); +out: + cmm_lprocfs_time_end(env, cmm, LPROC_CMM_SPLIT); + return rc; +} diff --git a/lustre/cmm/mdc_device.c b/lustre/cmm/mdc_device.c new file mode 100644 index 0000000..480a73a --- /dev/null +++ b/lustre/cmm/mdc_device.c @@ -0,0 +1,347 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/cmm/cmm_mdc.c + * Lustre Metadata Client (mdc) + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include "cmm_internal.h" +#include "mdc_internal.h" + +static struct lu_device_operations mdc_lu_ops; + +static inline int lu_device_is_mdc(struct lu_device *ld) +{ + return ergo(ld != NULL && ld->ld_ops != NULL, + ld->ld_ops == &mdc_lu_ops); +} + +static struct md_device_operations mdc_md_ops = { 0 }; + +static int mdc_obd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner) +{ + struct mdc_device *mc = owner; + int rc = 0; + ENTRY; + + LASSERT(mc != NULL); + CDEBUG(D_CONFIG, "notify %s ev=%d\n", watched->obd_name, ev); + if (ev == OBD_NOTIFY_ACTIVE) { + CDEBUG(D_INFO|D_WARNING, "Device %s is active now\n", + watched->obd_name); + } else if (ev == OBD_NOTIFY_INACTIVE) { + CDEBUG(D_INFO|D_WARNING, "Device %s is inactive now\n", + watched->obd_name); + } else if (ev == OBD_NOTIFY_OCD) { + struct obd_connect_data *conn_data = + &watched->u.cli.cl_import->imp_connect_data; + /* + * Update exp_connect_flags. + */ + mc->mc_desc.cl_exp->exp_connect_flags = + conn_data->ocd_connect_flags; + CDEBUG(D_INFO, "Update connect_flags: "LPX64"\n", + conn_data->ocd_connect_flags); + } + + RETURN(rc); +} +/* MDC OBD is set up already and connected to the proper MDS + * mdc_add_obd() find that obd by uuid and connects to it. + * Local MDT uuid is used for connection + * */ +static int mdc_obd_add(const struct lu_env *env, + struct mdc_device *mc, struct lustre_cfg *cfg) +{ + struct mdc_cli_desc *desc = &mc->mc_desc; + struct obd_device *mdc; + const char *uuid_str = lustre_cfg_string(cfg, 1); + const char *index = lustre_cfg_string(cfg, 2); + const char *mdc_uuid_str = lustre_cfg_string(cfg, 4); + struct lu_site *ls = mdc2lu_dev(mc)->ld_site; + char *p; + int rc = 0; + + ENTRY; + LASSERT(uuid_str); + LASSERT(index); + + mc->mc_num = simple_strtol(index, &p, 10); + if (*p) { + CERROR("Invalid index in lustre_cgf, offset 2\n"); + RETURN(-EINVAL); + } + + obd_str2uuid(&desc->cl_srv_uuid, uuid_str); + obd_str2uuid(&desc->cl_cli_uuid, mdc_uuid_str); + /* try to find MDC OBD connected to the needed MDT */ + mdc = class_find_client_obd(&desc->cl_srv_uuid, LUSTRE_MDC_NAME, + &desc->cl_cli_uuid); + if (!mdc) { + CERROR("Cannot find MDC OBD connected to %s\n", uuid_str); + rc = -ENOENT; + } else if (!mdc->obd_set_up) { + CERROR("target %s not set up\n", mdc->obd_name); + rc = -EINVAL; + } else { + struct lustre_handle *conn = &desc->cl_conn; + struct obd_connect_data *ocd; + + CDEBUG(D_CONFIG, "connect to %s(%s)\n", + mdc->obd_name, mdc->obd_uuid.uuid); + + OBD_ALLOC_PTR(ocd); + if (!ocd) + RETURN(-ENOMEM); + /* + * The connection between MDS must be local, + * IBITS are needed for rename_lock (INODELOCK_UPDATE) + */ + ocd->ocd_ibits_known = MDS_INODELOCK_UPDATE; + ocd->ocd_connect_flags = OBD_CONNECT_VERSION | + OBD_CONNECT_ACL | + OBD_CONNECT_LCL_CLIENT | + OBD_CONNECT_MDS_CAPA | + OBD_CONNECT_OSS_CAPA | + OBD_CONNECT_IBITS | + OBD_CONNECT_MDS_MDS; + rc = obd_connect(env, conn, mdc, &mdc->obd_uuid, ocd); + OBD_FREE_PTR(ocd); + if (rc) { + CERROR("target %s connect error %d\n", + mdc->obd_name, rc); + } else { + desc->cl_exp = class_conn2export(conn); + /* set seq controller export for MDC0 if exists */ + if (mc->mc_num == 0) + ls->ls_control_exp = + class_export_get(desc->cl_exp); + rc = obd_fid_init(desc->cl_exp); + if (rc) + CERROR("fid init error %d \n", rc); + else { + /* obd notify mechanism */ + mdc->obd_upcall.onu_owner = mc; + mdc->obd_upcall.onu_upcall = mdc_obd_update; + } + } + + if (rc) { + obd_disconnect(desc->cl_exp); + desc->cl_exp = NULL; + } + } + + RETURN(rc); +} + +static int mdc_obd_del(const struct lu_env *env, struct mdc_device *mc, + struct lustre_cfg *cfg) +{ + struct mdc_cli_desc *desc = &mc->mc_desc; + const char *dev = lustre_cfg_string(cfg, 0); + struct obd_device *mdc_obd = class_exp2obd(desc->cl_exp); + struct obd_device *mdt_obd; + int rc; + + ENTRY; + + CDEBUG(D_CONFIG, "Disconnect from %s\n", + mdc_obd->obd_name); + + /* Set mdt_obd flags in shutdown. */ + mdt_obd = class_name2obd(dev); + LASSERT(mdt_obd != NULL); + if (mdc_obd) { + mdc_obd->obd_no_recov = mdt_obd->obd_no_recov; + mdc_obd->obd_force = mdt_obd->obd_force; + mdc_obd->obd_fail = 0; + } + + rc = obd_fid_fini(desc->cl_exp); + if (rc) + CERROR("Fid fini error %d\n", rc); + + obd_register_observer(mdc_obd, NULL); + mdc_obd->obd_upcall.onu_owner = NULL; + mdc_obd->obd_upcall.onu_upcall = NULL; + rc = obd_disconnect(desc->cl_exp); + if (rc) { + CERROR("Target %s disconnect error %d\n", + mdc_obd->obd_name, rc); + } + class_manual_cleanup(mdc_obd); + desc->cl_exp = NULL; + + RETURN(0); +} + +static int mdc_process_config(const struct lu_env *env, + struct lu_device *ld, + struct lustre_cfg *cfg) +{ + struct mdc_device *mc = lu2mdc_dev(ld); + int rc; + + ENTRY; + switch (cfg->lcfg_command) { + case LCFG_ADD_MDC: + rc = mdc_obd_add(env, mc, cfg); + break; + case LCFG_CLEANUP: + rc = mdc_obd_del(env, mc, cfg); + break; + default: + rc = -EOPNOTSUPP; + } + RETURN(rc); +} + +static struct lu_device_operations mdc_lu_ops = { + .ldo_object_alloc = mdc_object_alloc, + .ldo_process_config = mdc_process_config +}; + +void mdc_init_ea_size(const struct lu_env *env, struct mdc_device *mc, + int max_mdsize, int max_cookiesize) +{ + struct obd_device *obd = class_exp2obd(mc->mc_desc.cl_exp); + + obd->u.cli.cl_max_mds_easize = max_mdsize; + obd->u.cli.cl_max_mds_cookiesize = max_cookiesize; +} + +static int mdc_device_init(const struct lu_env *env, struct lu_device *ld, + const char *name, struct lu_device *next) +{ + return 0; +} + +static struct lu_device *mdc_device_fini(const struct lu_env *env, + struct lu_device *ld) +{ + ENTRY; + RETURN (NULL); +} + +struct lu_device *mdc_device_alloc(const struct lu_env *env, + struct lu_device_type *ldt, + struct lustre_cfg *cfg) +{ + struct lu_device *ld; + struct mdc_device *mc; + ENTRY; + + OBD_ALLOC_PTR(mc); + if (mc == NULL) { + ld = ERR_PTR(-ENOMEM); + } else { + md_device_init(&mc->mc_md_dev, ldt); + mc->mc_md_dev.md_ops = &mdc_md_ops; + ld = mdc2lu_dev(mc); + ld->ld_ops = &mdc_lu_ops; + sema_init(&mc->mc_fid_sem, 1); + + } + + RETURN (ld); +} +void mdc_device_free(const struct lu_env *env, struct lu_device *ld) +{ + struct mdc_device *mc = lu2mdc_dev(ld); + + LASSERTF(atomic_read(&ld->ld_ref) == 0, + "Refcount = %i\n", atomic_read(&ld->ld_ref)); + LASSERT(list_empty(&mc->mc_linkage)); + md_device_fini(&mc->mc_md_dev); + OBD_FREE_PTR(mc); +} + +/* context key constructor/destructor */ + +static void *mdc_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct mdc_thread_info *info; + + CLASSERT(CFS_PAGE_SIZE >= sizeof *info); + OBD_ALLOC_PTR(info); + if (info == NULL) + info = ERR_PTR(-ENOMEM); + return info; +} + +static void mdc_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct mdc_thread_info *info = data; + OBD_FREE_PTR(info); +} + +struct lu_context_key mdc_thread_key = { + .lct_tags = LCT_MD_THREAD|LCT_CL_THREAD, + .lct_init = mdc_key_init, + .lct_fini = mdc_key_fini +}; + +int mdc_type_init(struct lu_device_type *ldt) +{ + LU_CONTEXT_KEY_INIT(&mdc_thread_key); + return lu_context_key_register(&mdc_thread_key); +} + +void mdc_type_fini(struct lu_device_type *ldt) +{ + lu_context_key_degister(&mdc_thread_key); +} + +static struct lu_device_type_operations mdc_device_type_ops = { + .ldto_init = mdc_type_init, + .ldto_fini = mdc_type_fini, + + .ldto_device_alloc = mdc_device_alloc, + .ldto_device_free = mdc_device_free, + + .ldto_device_init = mdc_device_init, + .ldto_device_fini = mdc_device_fini +}; + +struct lu_device_type mdc_device_type = { + .ldt_tags = LU_DEVICE_MD, + .ldt_name = LUSTRE_CMM_MDC_NAME, + .ldt_ops = &mdc_device_type_ops, + .ldt_ctx_tags = LCT_MD_THREAD|LCT_CL_THREAD +}; + diff --git a/lustre/cmm/mdc_internal.h b/lustre/cmm/mdc_internal.h new file mode 100644 index 0000000..b5a2ae6 --- /dev/null +++ b/lustre/cmm/mdc_internal.h @@ -0,0 +1,109 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/cmm/cmm_internal.h + * Lustre Cluster Metadata Manager (cmm), + * MDC device + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef _CMM_MDC_INTERNAL_H +#define _CMM_MDC_INTERNAL_H + +#if defined(__KERNEL__) + +#include +#include +#include + +struct mdc_cli_desc { + struct lustre_handle cl_conn; + /* uuid of remote MDT to connect */ + struct obd_uuid cl_srv_uuid; + /* mdc uuid */ + struct obd_uuid cl_cli_uuid; + /* export of mdc obd */ + struct obd_export *cl_exp; +}; + +struct mdc_device { + struct md_device mc_md_dev; + /* other MD servers in cluster */ + struct list_head mc_linkage; + mdsno_t mc_num; + struct mdc_cli_desc mc_desc; + struct semaphore mc_fid_sem; +}; + +struct mdc_thread_info { + struct md_op_data mci_opdata; + struct ptlrpc_request *mci_req; +}; + +struct mdc_object { + struct md_object mco_obj; +}; + +static inline struct lu_device *mdc2lu_dev(struct mdc_device *mc) +{ + return (&mc->mc_md_dev.md_lu_dev); +} + +static inline struct mdc_device *md2mdc_dev(struct md_device *md) +{ + return container_of0(md, struct mdc_device, mc_md_dev); +} + +static inline struct mdc_device *mdc_obj2dev(struct mdc_object *mco) +{ + return (md2mdc_dev(md_obj2dev(&mco->mco_obj))); +} + +static inline struct mdc_object *lu2mdc_obj(struct lu_object *lo) +{ + return container_of0(lo, struct mdc_object, mco_obj.mo_lu); +} + +static inline struct mdc_object *md2mdc_obj(struct md_object *mo) +{ + return container_of0(mo, struct mdc_object, mco_obj); +} + +static inline struct mdc_device *lu2mdc_dev(struct lu_device *ld) +{ + return container_of0(ld, struct mdc_device, mc_md_dev.md_lu_dev); +} + +struct lu_object *mdc_object_alloc(const struct lu_env *, + const struct lu_object_header *, + struct lu_device *); + +void mdc_init_ea_size(const struct lu_env *env, struct mdc_device *mc, + int max_mdsize, int max_cookiesize); +#ifdef HAVE_SPLIT_SUPPORT +int mdc_send_page(struct cmm_device *cmm, const struct lu_env *env, + struct md_object *mo, struct page *page, __u32 end); +#endif + +#endif /* __KERNEL__ */ +#endif /* _CMM_MDC_INTERNAL_H */ diff --git a/lustre/cmm/mdc_object.c b/lustre/cmm/mdc_object.c new file mode 100644 index 0000000..1ac12c9 --- /dev/null +++ b/lustre/cmm/mdc_object.c @@ -0,0 +1,579 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/cmm/mdc_object.c + * Lustre Cluster Metadata Manager (cmm) + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_MDS +#include +#include +#include +#include +#include "cmm_internal.h" +#include "mdc_internal.h" + +static struct md_object_operations mdc_mo_ops; +static struct md_dir_operations mdc_dir_ops; +static struct lu_object_operations mdc_obj_ops; + +extern struct lu_context_key mdc_thread_key; + +struct lu_object *mdc_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *ld) +{ + struct mdc_object *mco; + ENTRY; + + OBD_ALLOC_PTR(mco); + if (mco != NULL) { + struct lu_object *lo; + + lo = &mco->mco_obj.mo_lu; + lu_object_init(lo, NULL, ld); + mco->mco_obj.mo_ops = &mdc_mo_ops; + mco->mco_obj.mo_dir_ops = &mdc_dir_ops; + lo->lo_ops = &mdc_obj_ops; + RETURN(lo); + } else + RETURN(NULL); +} + +static void mdc_object_free(const struct lu_env *env, struct lu_object *lo) +{ + struct mdc_object *mco = lu2mdc_obj(lo); + lu_object_fini(lo); + OBD_FREE_PTR(mco); +} + +static int mdc_object_init(const struct lu_env *env, struct lu_object *lo) +{ + ENTRY; + lo->lo_header->loh_attr |= LOHA_REMOTE; + RETURN(0); +} + +static int mdc_object_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *lo) +{ + return (*p)(env, cookie, LUSTRE_CMM_MDC_NAME"-object@%p", lo); +} + +static struct lu_object_operations mdc_obj_ops = { + .loo_object_init = mdc_object_init, + .loo_object_free = mdc_object_free, + .loo_object_print = mdc_object_print, +}; + +/* md_object_operations */ +static +struct mdc_thread_info *mdc_info_get(const struct lu_env *env) +{ + struct mdc_thread_info *mci; + + mci = lu_context_key_get(&env->le_ctx, &mdc_thread_key); + LASSERT(mci); + return mci; +} + +static +struct mdc_thread_info *mdc_info_init(const struct lu_env *env) +{ + struct mdc_thread_info *mci = mdc_info_get(env); + memset(mci, 0, sizeof(*mci)); + return mci; +} + +static void mdc_body2attr(struct mdt_body *body, struct md_attr *ma) +{ + struct lu_attr *la = &ma->ma_attr; + /* update time */ + if (body->valid & OBD_MD_FLCTIME && body->ctime >= la->la_ctime) { + la->la_ctime = body->ctime; + if (body->valid & OBD_MD_FLMTIME) + la->la_mtime = body->mtime; + } + + if (body->valid & OBD_MD_FLMODE) + la->la_mode = body->mode; + if (body->valid & OBD_MD_FLSIZE) + la->la_size = body->size; + if (body->valid & OBD_MD_FLBLOCKS) + la->la_blocks = body->blocks; + if (body->valid & OBD_MD_FLUID) + la->la_uid = body->uid; + if (body->valid & OBD_MD_FLGID) + la->la_gid = body->gid; + if (body->valid & OBD_MD_FLFLAGS) + la->la_flags = body->flags; + if (body->valid & OBD_MD_FLNLINK) + la->la_nlink = body->nlink; + if (body->valid & OBD_MD_FLRDEV) + la->la_rdev = body->rdev; + + la->la_valid = body->valid; + ma->ma_valid = MA_INODE; +} + +static int mdc_req2attr_update(const struct lu_env *env, + struct md_attr *ma) +{ + struct mdc_thread_info *mci; + struct ptlrpc_request *req; + struct mdt_body *body; + struct lov_mds_md *lov; + struct llog_cookie *cookie; + + ENTRY; + mci = mdc_info_get(env); + req = mci->mci_req; + LASSERT(req); + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); + LASSERT(body); + mdc_body2attr(body, ma); + + if (body->valid & OBD_MD_FLMDSCAPA) { + struct lustre_capa *capa; + + /* create for cross-ref will fetch mds capa from remote obj */ + capa = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, + sizeof(*capa)); + LASSERT(capa != NULL); + LASSERT(ma->ma_capa != NULL); + *ma->ma_capa = *capa; + } + + if (!(body->valid & OBD_MD_FLEASIZE)) + RETURN(0); + + if (body->eadatasize == 0) { + CERROR("OBD_MD_FLEASIZE is set but eadatasize is zero\n"); + RETURN(-EPROTO); + } + + lov = lustre_swab_repbuf(req, REPLY_REC_OFF + 1, + body->eadatasize, NULL); + if (lov == NULL) { + CERROR("Can't unpack MDS EA data\n"); + RETURN(-EPROTO); + } + + LASSERT(ma->ma_lmm != NULL); + LASSERT(ma->ma_lmm_size >= body->eadatasize); + ma->ma_lmm_size = body->eadatasize; + memcpy(ma->ma_lmm, lov, ma->ma_lmm_size); + ma->ma_valid |= MA_LOV; + + if (!(body->valid & OBD_MD_FLCOOKIE)) + RETURN(0); + + if (body->aclsize == 0) { + CERROR("OBD_MD_FLCOOKIE is set but cookie size is zero\n"); + RETURN(-EPROTO); + } + + cookie = lustre_msg_buf(req->rq_repmsg, + REPLY_REC_OFF + 2, body->aclsize); + if (cookie == NULL) { + CERROR("Can't unpack unlink cookie data\n"); + RETURN(-EPROTO); + } + + LASSERT(ma->ma_cookie != NULL); + LASSERT(ma->ma_cookie_size == body->aclsize); + memcpy(ma->ma_cookie, cookie, ma->ma_cookie_size); + ma->ma_valid |= MA_COOKIE; + RETURN(0); +} + +static int mdc_attr_get(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); + struct mdc_thread_info *mci; + int rc; + ENTRY; + + mci = lu_context_key_get(&env->le_ctx, &mdc_thread_key); + LASSERT(mci); + + memset(&mci->mci_opdata, 0, sizeof(mci->mci_opdata)); + + rc = md_getattr(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), + NULL, OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID | + OBD_MD_FLFLAGS | OBD_MD_FLCROSSREF, 0, &mci->mci_req); + if (rc == 0) { + /* get attr from request */ + rc = mdc_req2attr_update(env, ma); + } + + ptlrpc_req_finished(mci->mci_req); + + RETURN(rc); +} + +static inline struct timespec *mdc_attr_time(struct timespec *t, __u64 seconds) +{ + t->tv_sec = seconds; + t->tv_nsec = 0; + return t; +} + +/* + * XXX: It is only used for set ctime when rename's source on remote MDS. + */ +static int mdc_attr_set(const struct lu_env *env, struct md_object *mo, + const struct md_attr *ma) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); + const struct lu_attr *la = &ma->ma_attr; + struct mdc_thread_info *mci; + struct md_ucred *uc = md_ucred(env); + int rc; + ENTRY; + + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + + mci = lu_context_key_get(&env->le_ctx, &mdc_thread_key); + LASSERT(mci); + + memset(&mci->mci_opdata, 0, sizeof(mci->mci_opdata)); + + mci->mci_opdata.op_fid1 = *lu_object_fid(&mo->mo_lu); + mdc_attr_time(&mci->mci_opdata.op_attr.ia_ctime, la->la_ctime); + mci->mci_opdata.op_attr.ia_mode = la->la_mode; + mci->mci_opdata.op_attr.ia_valid = ATTR_CTIME_SET; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + mci->mci_opdata.op_fsuid = uc->mu_fsuid; + mci->mci_opdata.op_fsgid = uc->mu_fsgid; + mci->mci_opdata.op_cap = uc->mu_cap; + if (uc->mu_ginfo || (uc->mu_valid == UCRED_OLD)) { + mci->mci_opdata.op_suppgids[0] = uc->mu_suppgids[0]; + mci->mci_opdata.op_suppgids[1] = uc->mu_suppgids[1]; + } else { + mci->mci_opdata.op_suppgids[0] = + mci->mci_opdata.op_suppgids[1] = -1; + } + } else { + mci->mci_opdata.op_fsuid = la->la_uid; + mci->mci_opdata.op_fsgid = la->la_gid; + mci->mci_opdata.op_cap = current->cap_effective; + mci->mci_opdata.op_suppgids[0] = + mci->mci_opdata.op_suppgids[1] = -1; + } + + rc = md_setattr(mc->mc_desc.cl_exp, &mci->mci_opdata, + NULL, 0, NULL, 0, &mci->mci_req); + + ptlrpc_req_finished(mci->mci_req); + + RETURN(rc); +} + +static int mdc_object_create(const struct lu_env *env, + struct md_object *mo, + const struct md_op_spec *spec, + struct md_attr *ma) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); + struct lu_attr *la = &ma->ma_attr; + struct mdc_thread_info *mci; + const void *symname; + struct md_ucred *uc = md_ucred(env); + int rc, symlen; + uid_t uid; + gid_t gid; + __u32 cap; + ENTRY; + + LASSERT(S_ISDIR(la->la_mode)); + LASSERT(spec->u.sp_pfid != NULL); + + mci = mdc_info_init(env); + mci->mci_opdata.op_bias = MDS_CROSS_REF; + mci->mci_opdata.op_fid2 = *lu_object_fid(&mo->mo_lu); + + /* Parent fid is needed to create dotdot on the remote node. */ + mci->mci_opdata.op_fid1 = *(spec->u.sp_pfid); + mci->mci_opdata.op_mod_time = la->la_ctime; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + uid = uc->mu_fsuid; + if (la->la_mode & S_ISGID) + gid = la->la_gid; + else + gid = uc->mu_fsgid; + cap = uc->mu_cap; + if (uc->mu_ginfo || (uc->mu_valid == UCRED_OLD)) + mci->mci_opdata.op_suppgids[0] = uc->mu_suppgids[0]; + else + mci->mci_opdata.op_suppgids[0] = -1; + } else { + uid = la->la_uid; + gid = la->la_gid; + cap = 0; + mci->mci_opdata.op_suppgids[0] = -1; + } + + /* get data from spec */ + if (spec->sp_cr_flags & MDS_CREATE_SLAVE_OBJ) { + symname = spec->u.sp_ea.eadata; + symlen = spec->u.sp_ea.eadatalen; + mci->mci_opdata.op_fid1 = *(spec->u.sp_ea.fid); + mci->mci_opdata.op_flags |= MDS_CREATE_SLAVE_OBJ; +#ifdef CONFIG_FS_POSIX_ACL + } else if (spec->sp_cr_flags & MDS_CREATE_RMT_ACL) { + symname = spec->u.sp_ea.eadata; + symlen = spec->u.sp_ea.eadatalen; + mci->mci_opdata.op_fid1 = *(spec->u.sp_ea.fid); + mci->mci_opdata.op_flags |= MDS_CREATE_RMT_ACL; +#endif + } else { + symname = spec->u.sp_symname; + symlen = symname ? strlen(symname) + 1 : 0; + } + + rc = md_create(mc->mc_desc.cl_exp, &mci->mci_opdata, + symname, symlen, la->la_mode, uid, gid, + cap, la->la_rdev, &mci->mci_req); + + if (rc == 0) { + /* get attr from request */ + rc = mdc_req2attr_update(env, ma); + } + + ptlrpc_req_finished(mci->mci_req); + + RETURN(rc); +} + +static int mdc_ref_add(const struct lu_env *env, struct md_object *mo, + const struct md_attr *ma) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); + const struct lu_attr *la = &ma->ma_attr; + struct mdc_thread_info *mci; + struct md_ucred *uc = md_ucred(env); + int rc; + ENTRY; + + mci = lu_context_key_get(&env->le_ctx, &mdc_thread_key); + LASSERT(mci); + + memset(&mci->mci_opdata, 0, sizeof(mci->mci_opdata)); + mci->mci_opdata.op_bias = MDS_CROSS_REF; + mci->mci_opdata.op_fid1 = *lu_object_fid(&mo->mo_lu); + mci->mci_opdata.op_mod_time = la->la_ctime; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + mci->mci_opdata.op_fsuid = uc->mu_fsuid; + mci->mci_opdata.op_fsgid = uc->mu_fsgid; + mci->mci_opdata.op_cap = uc->mu_cap; + if (uc->mu_ginfo || (uc->mu_valid == UCRED_OLD)) { + mci->mci_opdata.op_suppgids[0] = uc->mu_suppgids[0]; + mci->mci_opdata.op_suppgids[1] = uc->mu_suppgids[1]; + } else { + mci->mci_opdata.op_suppgids[0] = + mci->mci_opdata.op_suppgids[1] = -1; + } + } else { + mci->mci_opdata.op_fsuid = la->la_uid; + mci->mci_opdata.op_fsgid = la->la_gid; + mci->mci_opdata.op_cap = current->cap_effective; + mci->mci_opdata.op_suppgids[0] = + mci->mci_opdata.op_suppgids[1] = -1; + } + + + rc = md_link(mc->mc_desc.cl_exp, &mci->mci_opdata, &mci->mci_req); + + ptlrpc_req_finished(mci->mci_req); + + RETURN(rc); +} + +static int mdc_ref_del(const struct lu_env *env, struct md_object *mo, + struct md_attr *ma) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); + struct lu_attr *la = &ma->ma_attr; + struct mdc_thread_info *mci; + struct md_ucred *uc = md_ucred(env); + int rc; + ENTRY; + + mci = mdc_info_init(env); + mci->mci_opdata.op_bias = MDS_CROSS_REF; + if (ma->ma_attr_flags & MDS_VTX_BYPASS) + mci->mci_opdata.op_bias |= MDS_VTX_BYPASS; + else + mci->mci_opdata.op_bias &= ~MDS_VTX_BYPASS; + mci->mci_opdata.op_fid1 = *lu_object_fid(&mo->mo_lu); + mci->mci_opdata.op_mode = la->la_mode; + mci->mci_opdata.op_mod_time = la->la_ctime; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + mci->mci_opdata.op_fsuid = uc->mu_fsuid; + mci->mci_opdata.op_fsgid = uc->mu_fsgid; + mci->mci_opdata.op_cap = uc->mu_cap; + if (uc->mu_ginfo || (uc->mu_valid == UCRED_OLD)) + mci->mci_opdata.op_suppgids[0] = uc->mu_suppgids[0]; + else + mci->mci_opdata.op_suppgids[0] = -1; + } else { + mci->mci_opdata.op_fsuid = la->la_uid; + mci->mci_opdata.op_fsgid = la->la_gid; + mci->mci_opdata.op_cap = current->cap_effective; + mci->mci_opdata.op_suppgids[0] = -1; + } + + rc = md_unlink(mc->mc_desc.cl_exp, &mci->mci_opdata, &mci->mci_req); + if (rc == 0) { + /* get attr from request */ + rc = mdc_req2attr_update(env, ma); + } + + ptlrpc_req_finished(mci->mci_req); + + RETURN(rc); +} + +#ifdef HAVE_SPLIT_SUPPORT +int mdc_send_page(struct cmm_device *cm, const struct lu_env *env, + struct md_object *mo, struct page *page, __u32 offset) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); + int rc; + ENTRY; + + rc = mdc_sendpage(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), + page, offset); + CDEBUG(rc ? D_ERROR : D_INFO, "send page %p offset %d fid "DFID + " rc %d \n", page, offset, PFID(lu_object_fid(&mo->mo_lu)), rc); + RETURN(rc); +} +#endif + +static struct md_object_operations mdc_mo_ops = { + .moo_attr_get = mdc_attr_get, + .moo_attr_set = mdc_attr_set, + .moo_object_create = mdc_object_create, + .moo_ref_add = mdc_ref_add, + .moo_ref_del = mdc_ref_del, +}; + +/* md_dir_operations */ +static int mdc_rename_tgt(const struct lu_env *env, struct md_object *mo_p, + struct md_object *mo_t, const struct lu_fid *lf, + const struct lu_name *lname, struct md_attr *ma) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo_p)); + struct lu_attr *la = &ma->ma_attr; + struct mdc_thread_info *mci; + struct md_ucred *uc = md_ucred(env); + int rc; + ENTRY; + + mci = mdc_info_init(env); + mci->mci_opdata.op_bias = MDS_CROSS_REF; + if (ma->ma_attr_flags & MDS_VTX_BYPASS) + mci->mci_opdata.op_bias |= MDS_VTX_BYPASS; + else + mci->mci_opdata.op_bias &= ~MDS_VTX_BYPASS; + mci->mci_opdata.op_fid1 = *lu_object_fid(&mo_p->mo_lu); + mci->mci_opdata.op_fid2 = *lf; + mci->mci_opdata.op_mode = la->la_mode; + mci->mci_opdata.op_mod_time = la->la_ctime; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + mci->mci_opdata.op_fsuid = uc->mu_fsuid; + mci->mci_opdata.op_fsgid = uc->mu_fsgid; + mci->mci_opdata.op_cap = uc->mu_cap; + if (uc->mu_ginfo || (uc->mu_valid == UCRED_OLD)) { + mci->mci_opdata.op_suppgids[0] = uc->mu_suppgids[0]; + mci->mci_opdata.op_suppgids[1] = uc->mu_suppgids[1]; + } else { + mci->mci_opdata.op_suppgids[0] = + mci->mci_opdata.op_suppgids[1] = -1; + } + } else { + mci->mci_opdata.op_fsuid = la->la_uid; + mci->mci_opdata.op_fsgid = la->la_gid; + mci->mci_opdata.op_cap = current->cap_effective; + mci->mci_opdata.op_suppgids[0] = + mci->mci_opdata.op_suppgids[1] = -1; + } + + rc = md_rename(mc->mc_desc.cl_exp, &mci->mci_opdata, NULL, 0, + lname->ln_name, lname->ln_namelen, &mci->mci_req); + if (rc == 0) { + /* get attr from request */ + mdc_req2attr_update(env, ma); + } + + ptlrpc_req_finished(mci->mci_req); + + RETURN(rc); +} +/* + * Return resulting fid in sfid + * 0: fids are not relatives + * fid: fid at which search stopped + */ +static int mdc_is_subdir(const struct lu_env *env, struct md_object *mo, + const struct lu_fid *fid, struct lu_fid *sfid) +{ + struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); + struct mdc_thread_info *mci; + struct mdt_body *body; + int rc; + ENTRY; + + mci = mdc_info_init(env); + + rc = md_is_subdir(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), + fid, &mci->mci_req); + if (rc == 0 || rc == -EREMOTE) { + body = lustre_msg_buf(mci->mci_req->rq_repmsg, REPLY_REC_OFF, + sizeof(*body)); + LASSERT(body->valid & OBD_MD_FLID); + + CDEBUG(D_INFO, "Remote mdo_is_subdir(), new src "DFID"\n", + PFID(&body->fid1)); + *sfid = body->fid1; + } + ptlrpc_req_finished(mci->mci_req); + RETURN(rc); +} + +static struct md_dir_operations mdc_dir_ops = { + .mdo_is_subdir = mdc_is_subdir, + .mdo_rename_tgt = mdc_rename_tgt +}; diff --git a/lustre/contrib/mpich2-1.0.3.patch b/lustre/contrib/mpich2-1.0.3.patch new file mode 100644 index 0000000..78dda9b --- /dev/null +++ b/lustre/contrib/mpich2-1.0.3.patch @@ -0,0 +1,1831 @@ +Date: Fri, 08 Jun 2007 14:04:34 -0400 +From: Weikuan Yu +To: Weikuan Yu +Subject: Re: [Lustre-discuss] MPI-IO for Lustre +Cc: lustre-discuss@clusterfs.com + + +This is the MPICH2 patch I originally started as a base for some ROMIO +optimizations over Lustre. It should work fine for MPICH2-1.0.3 on +experimental systems. However, use it as your risk :) + +Given time, I will try to push out my optimizations after some cleanup. I +would very happy to hear feedbacks on what features people would need most +at the time. + +-- +Weikuan + + +diff -ruN romio-orig/adio/ad_lustre/ad_lustre.c romio/adio/ad_lustre/ad_lustre.c +--- romio-orig/adio/ad_lustre/ad_lustre.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre.c 2006-09-06 18:40:56.000844619 -0400 +@@ -0,0 +1,37 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 2001 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++/* adioi.h has the ADIOI_Fns_struct define */ ++#include "adioi.h" ++ ++struct ADIOI_Fns_struct ADIO_LUSTRE_operations = { ++ ADIOI_LUSTRE_Open, /* Open */ ++ ADIOI_LUSTRE_ReadContig, /* ReadContig */ ++ ADIOI_LUSTRE_WriteContig, /* WriteContig */ ++ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ ++ ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */ ++ ADIOI_GEN_SeekIndividual, /* SeekIndividual */ ++ ADIOI_LUSTRE_Fcntl, /* Fcntl */ ++ ADIOI_LUSTRE_SetInfo, /* SetInfo */ ++ ADIOI_GEN_ReadStrided, /* ReadStrided */ ++ ADIOI_GEN_WriteStrided, /* WriteStrided */ ++ ADIOI_LUSTRE_Close, /* Close */ ++ ADIOI_LUSTRE_IreadContig, /* IreadContig */ ++ ADIOI_LUSTRE_IwriteContig, /* IwriteContig */ ++ ADIOI_LUSTRE_ReadDone, /* ReadDone */ ++ ADIOI_LUSTRE_WriteDone, /* WriteDone */ ++ ADIOI_LUSTRE_ReadComplete, /* ReadComplete */ ++ ADIOI_LUSTRE_WriteComplete, /* WriteComplete */ ++ ADIOI_LUSTRE_IreadStrided, /* IreadStrided */ ++ ADIOI_LUSTRE_IwriteStrided, /* IwriteStrided */ ++ ADIOI_GEN_Flush, /* Flush */ ++ ADIOI_LUSTRE_Resize, /* Resize */ ++ ADIOI_GEN_Delete, /* Delete */ ++}; +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_close.c romio/adio/ad_lustre/ad_lustre_close.c +--- romio-orig/adio/ad_lustre/ad_lustre_close.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_close.c 2006-09-06 17:10:35.000683211 -0400 +@@ -0,0 +1,32 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_close.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code) ++{ ++ int err; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_CLOSE"; ++#endif ++ ++ err = close(fd->fd_sys); ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_done.c romio/adio/ad_lustre/ad_lustre_done.c +--- romio-orig/adio/ad_lustre/ad_lustre_done.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_done.c 2006-09-06 17:10:35.000692922 -0400 +@@ -0,0 +1,188 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_done.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++int ADIOI_LUSTRE_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++#ifndef NO_AIO ++ int done=0; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_READDONE"; ++#endif ++#ifdef AIO_SUN ++ aio_result_t *result=0, *tmp; ++#else ++ int err; ++#endif ++#ifdef AIO_HANDLE_IN_AIOCB ++ struct aiocb *tmp1; ++#endif ++#endif ++ ++ if (*request == ADIO_REQUEST_NULL) { ++ *error_code = MPI_SUCCESS; ++ return 1; ++ } ++ ++#ifdef NO_AIO ++/* HP, FreeBSD, Linux */ ++#ifdef HAVE_STATUS_SET_BYTES ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ (*request)->fd->async_count--; ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ *error_code = MPI_SUCCESS; ++ return 1; ++#endif ++ ++#ifdef AIO_SUN ++ if ((*request)->queued) { ++ tmp = (aio_result_t *) (*request)->handle; ++ if (tmp->aio_return == AIO_INPROGRESS) { ++ done = 0; ++ *error_code = MPI_SUCCESS; ++ } ++ else if (tmp->aio_return != -1) { ++ result = (aio_result_t *) aiowait(0); /* dequeue any one request */ ++ done = 1; ++ (*request)->nbytes = tmp->aio_return; ++ *error_code = MPI_SUCCESS; ++ } ++ else { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(tmp->aio_errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(tmp->aio_errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ } /* if ((*request)->queued) ... */ ++ else { ++ /* ADIOI_Complete_Async completed this request, but request object ++ was not freed. */ ++ done = 1; ++ *error_code = MPI_SUCCESS; ++ } ++#ifdef HAVE_STATUS_SET_BYTES ++ if (done && ((*request)->nbytes != -1)) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#endif ++ ++#ifdef AIO_HANDLE_IN_AIOCB ++/* IBM */ ++ if ((*request)->queued) { ++ tmp1 = (struct aiocb *) (*request)->handle; ++ errno = aio_error(tmp1->aio_handle); ++ if (errno == EINPROG) { ++ done = 0; ++ *error_code = MPI_SUCCESS; ++ } ++ else { ++ err = aio_return(tmp1->aio_handle); ++ (*request)->nbytes = err; ++ errno = aio_error(tmp1->aio_handle); ++ ++ done = 1; ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } ++ } /* if ((*request)->queued) */ ++ else { ++ done = 1; ++ *error_code = MPI_SUCCESS; ++ } ++#ifdef HAVE_STATUS_SET_BYTES ++ if (done && ((*request)->nbytes != -1)) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#elif (!defined(NO_AIO) && !defined(AIO_SUN)) ++/* DEC, SGI IRIX 5 and 6 */ ++ if ((*request)->queued) { ++ errno = aio_error((const struct aiocb *) (*request)->handle); ++ if (errno == EINPROGRESS) { ++ done = 0; ++ *error_code = MPI_SUCCESS; ++ } ++ else { ++ err = aio_return((struct aiocb *) (*request)->handle); ++ (*request)->nbytes = err; ++ errno = aio_error((struct aiocb *) (*request)->handle); ++ ++ done = 1; ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } ++ } /* if ((*request)->queued) */ ++ else { ++ done = 1; ++ *error_code = MPI_SUCCESS; ++ } ++#ifdef HAVE_STATUS_SET_BYTES ++ if (done && ((*request)->nbytes != -1)) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#endif ++ ++#ifndef NO_AIO ++ if (done) { ++ /* if request is still queued in the system, it is also there ++ on ADIOI_Async_list. Delete it from there. */ ++ if ((*request)->queued) ADIOI_Del_req_from_list(request); ++ ++ (*request)->fd->async_count--; ++ if ((*request)->handle) ADIOI_Free((*request)->handle); ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ } ++ return done; ++#endif ++ ++} ++ ++ ++int ADIOI_LUSTRE_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++ return ADIOI_LUSTRE_ReadDone(request, status, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_fcntl.c romio/adio/ad_lustre/ad_lustre_fcntl.c +--- romio-orig/adio/ad_lustre/ad_lustre_fcntl.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_fcntl.c 2006-09-06 18:43:11.000365177 -0400 +@@ -0,0 +1,127 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_fcntl.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++#include "adio_extern.h" ++/* #ifdef MPISGI ++#include "mpisgi2.h" ++#endif */ ++ ++void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code) ++{ ++ int i, ntimes; ++ ADIO_Offset curr_fsize, alloc_size, size, len, done; ++ ADIO_Status status; ++ char *buf; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_FCNTL"; ++#endif ++ ++ switch(flag) { ++ case ADIO_FCNTL_GET_FSIZE: ++ fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END); ++ if (fd->fp_sys_posn != -1) ++ lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); ++ if (fcntl_struct->fsize == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ break; ++ ++ case ADIO_FCNTL_SET_DISKSPACE: ++ /* will be called by one process only */ ++ /* On file systems with no preallocation function, I have to ++ explicitly write ++ to allocate space. Since there could be holes in the file, ++ I need to read up to the current file size, write it back, ++ and then write beyond that depending on how much ++ preallocation is needed. ++ read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */ ++ ++ curr_fsize = lseek(fd->fd_sys, 0, SEEK_END); ++ alloc_size = fcntl_struct->diskspace; ++ ++ size = ADIOI_MIN(curr_fsize, alloc_size); ++ ++ ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; ++ buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ); ++ done = 0; ++ ++ for (i=0; i curr_fsize) { ++ memset(buf, 0, ADIOI_PREALLOC_BUFSZ); ++ size = alloc_size - curr_fsize; ++ ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; ++ for (i=0; ifp_sys_posn != -1) ++ lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); ++ *error_code = MPI_SUCCESS; ++ break; ++ ++#if 0 ++ case ADIO_FCNTL_SET_IOMODE: ++ /* for implementing PFS I/O modes. will not occur in MPI-IO ++ implementation.*/ ++ if (fd->iomode != fcntl_struct->iomode) { ++ fd->iomode = fcntl_struct->iomode; ++ MPI_Barrier(MPI_COMM_WORLD); ++ } ++ *error_code = MPI_SUCCESS; ++ break; ++#endif ++ ++ case ADIO_FCNTL_SET_ATOMICITY: ++ fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1; ++ *error_code = MPI_SUCCESS; ++ break; ++ ++ default: ++ FPRINTF(stderr, "Unknown flag passed to ADIOI_LUSTRE_Fcntl\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_flush.c romio/adio/ad_lustre/ad_lustre_flush.c +--- romio-orig/adio/ad_lustre/ad_lustre_flush.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_flush.c 2006-09-06 17:10:35.000711888 -0400 +@@ -0,0 +1,14 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_flush.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Flush(ADIO_File fd, int *error_code) ++{ ++ ADIOI_GEN_Flush(fd, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre.h romio/adio/ad_lustre/ad_lustre.h +--- romio-orig/adio/ad_lustre/ad_lustre.h 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre.h 2006-09-06 17:10:35.000722616 -0400 +@@ -0,0 +1,36 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre.h,v 1.2 2005/07/07 14:38:17 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#ifndef AD_UNIX_INCLUDE ++#define AD_UNIX_INCLUDE ++ ++/* temp*/ ++#define HAVE_ASM_TYPES_H 1 ++ ++#include ++#include ++#include ++#include ++#include "lustre/lustre_user.h" ++#include "adio.h" ++ ++#ifndef NO_AIO ++#ifdef AIO_SUN ++#include ++#else ++#include ++#ifdef NEEDS_ADIOCB_T ++typedef struct adiocb adiocb_t; ++#endif ++#endif ++#endif ++ ++int ADIOI_LUSTRE_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, ++ int wr, void *handle); ++ ++#endif +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_hints.c romio/adio/ad_lustre/ad_lustre_hints.c +--- romio-orig/adio/ad_lustre/ad_lustre_hints.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_hints.c 2006-09-06 17:10:35.000741994 -0400 +@@ -0,0 +1,130 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_hints.c,v 1.2 2005/07/07 14:38:17 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) ++{ ++ char *value, *value_in_fd; ++ int flag, tmp_val, str_factor=-1, str_unit=0, start_iodev=-1; ++ struct lov_user_md lum = { 0 }; ++ int err, myrank, fd_sys, perm, amode, old_mask; ++ ++ if ( (fd->info) == MPI_INFO_NULL) { ++ /* This must be part of the open call. can set striping parameters ++ if necessary. */ ++ MPI_Info_create(&(fd->info)); ++ ++ /* has user specified striping or server buffering parameters ++ and do they have the same value on all processes? */ ++ if (users_info != MPI_INFO_NULL) { ++ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ++ ++ MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, ++ value, &flag); ++ if (flag) { ++ str_factor=atoi(value); ++ tmp_val = str_factor; ++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); ++ if (tmp_val != str_factor) { ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"striping_factor\" must be the same on all processes\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, ++ value, &flag); ++ if (flag) { ++ str_unit=atoi(value); ++ tmp_val = str_unit; ++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); ++ if (tmp_val != str_unit) { ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"striping_unit\" must be the same on all processes\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, ++ value, &flag); ++ if (flag) { ++ start_iodev=atoi(value); ++ tmp_val = start_iodev; ++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); ++ if (tmp_val != start_iodev) { ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"start_iodevice\" must be the same on all processes\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ /* if user has specified striping info, process 0 tries to set it */ ++ if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { ++ MPI_Comm_rank(fd->comm, &myrank); ++ if (!myrank) { ++ if (fd->perm == ADIO_PERM_NULL) { ++ old_mask = umask(022); ++ umask(old_mask); ++ perm = old_mask ^ 0666; ++ } ++ else perm = fd->perm; ++ ++ amode = 0; ++ if (fd->access_mode & ADIO_CREATE) ++ amode = amode | O_CREAT; ++ if (fd->access_mode & ADIO_RDONLY) ++ amode = amode | O_RDONLY; ++ if (fd->access_mode & ADIO_WRONLY) ++ amode = amode | O_WRONLY; ++ if (fd->access_mode & ADIO_RDWR) ++ amode = amode | O_RDWR; ++ if (fd->access_mode & ADIO_EXCL) ++ amode = amode | O_EXCL; ++ ++ /* we need to create file so ensure this is set */ ++ amode = amode | O_LOV_DELAY_CREATE | O_CREAT; ++ ++ fd_sys = open(fd->filename, amode, perm); ++ if (fd_sys == -1) { ++ if (errno != EEXIST) ++ printf("Failure to open file %s %d %d\n",strerror(errno), amode, perm); ++ } else { ++ lum.lmm_magic = LOV_USER_MAGIC; ++ lum.lmm_pattern = 0; ++ lum.lmm_stripe_size = str_unit; ++ lum.lmm_stripe_count = str_factor; ++ lum.lmm_stripe_offset = start_iodev; ++ ++ err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum); ++ if (err == -1 && errno != EEXIST) { ++ printf("Failure to set stripe info %s \n",strerror(errno)); ++ } ++ ++ close(fd_sys); ++ } ++ ++ } ++ MPI_Barrier(fd->comm); ++ } ++ ++ ADIOI_Free(value); ++ } ++ ++ /* set the values for collective I/O and data sieving parameters */ ++ ADIOI_GEN_SetInfo(fd, users_info, error_code); ++ } ++ ++ else { ++ /* The file has been opened previously and fd->fd_sys is a valid ++ file descriptor. cannot set striping parameters now. */ ++ ++ /* set the values for collective I/O and data sieving parameters */ ++ ADIOI_GEN_SetInfo(fd, users_info, error_code); ++ ++ } ++ ++ *error_code = MPI_SUCCESS; ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_iread.c romio/adio/ad_lustre/ad_lustre_iread.c +--- romio-orig/adio/ad_lustre/ad_lustre_iread.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_iread.c 2006-09-06 17:10:35.000751765 -0400 +@@ -0,0 +1,106 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_iread.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_IreadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int *error_code) ++{ ++ int len, typesize; ++#ifdef NO_AIO ++ ADIO_Status status; ++#else ++ int err=-1; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_IREADCONTIG"; ++#endif ++#endif ++ ++ (*request) = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_READ; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ ++ MPI_Type_size(datatype, &typesize); ++ len = count * typesize; ++ ++#ifdef NO_AIO ++ /* HP, FreeBSD, Linux */ ++ /* no support for nonblocking I/O. Use blocking I/O. */ ++ ++ ADIOI_LUSTRE_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, ++ &status, error_code); ++ (*request)->queued = 0; ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Get_elements(&status, MPI_BYTE, &len); ++ (*request)->nbytes = len; ++ } ++#endif ++ ++#else ++ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; ++ err = ADIOI_LUSTRE_aio(fd, buf, len, offset, 0, &((*request)->handle)); ++ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; ++ ++ (*request)->queued = 1; ++ ADIOI_Add_req_to_list(request); ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++#endif /* NO_AIO */ ++ ++ fd->fp_sys_posn = -1; /* set it to null. */ ++ fd->async_count++; ++} ++ ++ ++ ++void ADIOI_LUSTRE_IreadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code) ++{ ++ ADIO_Status status; ++#ifdef HAVE_STATUS_SET_BYTES ++ int typesize; ++#endif ++ ++ *request = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_READ; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ (*request)->queued = 0; ++ (*request)->handle = 0; ++ ++/* call the blocking version. It is faster because it does data sieving. */ ++ ADIOI_LUSTRE_ReadStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, &status, error_code); ++ ++ fd->async_count++; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Type_size(datatype, &typesize); ++ (*request)->nbytes = count * typesize; ++ } ++#endif ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_iwrite.c romio/adio/ad_lustre/ad_lustre_iwrite.c +--- romio-orig/adio/ad_lustre/ad_lustre_iwrite.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_iwrite.c 2006-09-06 17:10:35.000761678 -0400 +@@ -0,0 +1,268 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_iwrite.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_IwriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int *error_code) ++{ ++ int len, typesize; ++#ifdef NO_AIO ++ ADIO_Status status; ++#else ++ int err=-1; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_IWRITECONTIG"; ++#endif ++#endif ++ ++ *request = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_WRITE; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ ++ MPI_Type_size(datatype, &typesize); ++ len = count * typesize; ++ ++#ifdef NO_AIO ++ /* HP, FreeBSD, Linux */ ++ /* no support for nonblocking I/O. Use blocking I/O. */ ++ ++ ADIOI_LUSTRE_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, ++ &status, error_code); ++ (*request)->queued = 0; ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Get_elements(&status, MPI_BYTE, &len); ++ (*request)->nbytes = len; ++ } ++#endif ++ ++#else ++ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; ++ err = ADIOI_LUSTRE_aio(fd, buf, len, offset, 1, &((*request)->handle)); ++ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; ++ ++ (*request)->queued = 1; ++ ADIOI_Add_req_to_list(request); ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++#endif /* NO_AIO */ ++ ++ fd->fp_sys_posn = -1; /* set it to null. */ ++ fd->async_count++; ++} ++ ++ ++ ++ ++void ADIOI_LUSTRE_IwriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code) ++{ ++ ADIO_Status status; ++#ifdef HAVE_STATUS_SET_BYTES ++ int typesize; ++#endif ++ ++ *request = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_WRITE; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ (*request)->queued = 0; ++ (*request)->handle = 0; ++ ++/* call the blocking version. It is faster because it does data sieving. */ ++ ADIOI_LUSTRE_WriteStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, &status, error_code); ++ ++ fd->async_count++; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Type_size(datatype, &typesize); ++ (*request)->nbytes = count * typesize; ++ } ++#endif ++} ++ ++ ++/* This function is for implementation convenience. It is not user-visible. ++ It takes care of the differences in the interface for nonblocking I/O ++ on various Unix machines! If wr==1 write, wr==0 read. */ ++ ++int ADIOI_LUSTRE_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, ++ int wr, void *handle) ++{ ++ int err=-1, fd_sys; ++ ++#ifndef NO_AIO ++ int error_code; ++#ifdef AIO_SUN ++ aio_result_t *result; ++#else ++ struct aiocb *aiocbp; ++#endif ++#endif ++ ++ fd_sys = fd->fd_sys; ++ ++#ifdef AIO_SUN ++ result = (aio_result_t *) ADIOI_Malloc(sizeof(aio_result_t)); ++ result->aio_return = AIO_INPROGRESS; ++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result); ++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result); ++ ++ if (err == -1) { ++ if (errno == EAGAIN) { ++ /* the man pages say EPROCLIM, but in reality errno is set to EAGAIN! */ ++ ++ /* exceeded the max. no. of outstanding requests. ++ complete all previous async. requests and try again.*/ ++ ++ ADIOI_Complete_async(&error_code); ++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result); ++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result); ++ ++ while (err == -1) { ++ if (errno == EAGAIN) { ++ /* sleep and try again */ ++ sleep(1); ++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result); ++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result); ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ *((aio_result_t **) handle) = result; ++#endif ++ ++#ifdef NO_FD_IN_AIOCB ++/* IBM */ ++ aiocbp = (struct aiocb *) ADIOI_Malloc(sizeof(struct aiocb)); ++ aiocbp->aio_whence = SEEK_SET; ++ aiocbp->aio_offset = offset; ++ aiocbp->aio_buf = buf; ++ aiocbp->aio_nbytes = len; ++ if (wr) err = aio_write(fd_sys, aiocbp); ++ else err = aio_read(fd_sys, aiocbp); ++ ++ if (err == -1) { ++ if (errno == EAGAIN) { ++ /* exceeded the max. no. of outstanding requests. ++ complete all previous async. requests and try again. */ ++ ++ ADIOI_Complete_async(&error_code); ++ if (wr) err = aio_write(fd_sys, aiocbp); ++ else err = aio_read(fd_sys, aiocbp); ++ ++ while (err == -1) { ++ if (errno == EAGAIN) { ++ /* sleep and try again */ ++ sleep(1); ++ if (wr) err = aio_write(fd_sys, aiocbp); ++ else err = aio_read(fd_sys, aiocbp); ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ *((struct aiocb **) handle) = aiocbp; ++ ++#elif (!defined(NO_AIO) && !defined(AIO_SUN)) ++/* DEC, SGI IRIX 5 and 6 */ ++ ++ aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1); ++ aiocbp->aio_fildes = fd_sys; ++ aiocbp->aio_offset = offset; ++ aiocbp->aio_buf = buf; ++ aiocbp->aio_nbytes = len; ++ ++#ifdef AIO_PRIORITY_DEFAULT ++/* DEC */ ++ aiocbp->aio_reqprio = AIO_PRIO_DFL; /* not needed in DEC Unix 4.0 */ ++ aiocbp->aio_sigevent.sigev_signo = 0; ++#else ++ aiocbp->aio_reqprio = 0; ++#endif ++ ++#ifdef AIO_SIGNOTIFY_NONE ++/* SGI IRIX 6 */ ++ aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE; ++#else ++ aiocbp->aio_sigevent.sigev_signo = 0; ++#endif ++ ++ if (wr) err = aio_write(aiocbp); ++ else err = aio_read(aiocbp); ++ ++ if (err == -1) { ++ if (errno == EAGAIN) { ++ /* exceeded the max. no. of outstanding requests. ++ complete all previous async. requests and try again. */ ++ ++ ADIOI_Complete_async(&error_code); ++ if (wr) err = aio_write(aiocbp); ++ else err = aio_read(aiocbp); ++ ++ while (err == -1) { ++ if (errno == EAGAIN) { ++ /* sleep and try again */ ++ sleep(1); ++ if (wr) err = aio_write(aiocbp); ++ else err = aio_read(aiocbp); ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ *((struct aiocb **) handle) = aiocbp; ++#endif ++ ++ return err; ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_open.c romio/adio/ad_lustre/ad_lustre_open.c +--- romio-orig/adio/ad_lustre/ad_lustre_open.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_open.c 2006-09-06 17:10:35.000771351 -0400 +@@ -0,0 +1,100 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_open.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) ++{ ++ int perm, old_mask, amode; ++ struct lov_user_md lum = { 0 }; ++ char *value; ++ ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_OPEN"; ++#endif ++ ++ if (fd->perm == ADIO_PERM_NULL) { ++ old_mask = umask(022); ++ umask(old_mask); ++ perm = old_mask ^ 0666; ++ } ++ else perm = fd->perm; ++ ++ amode = 0; ++ if (fd->access_mode & ADIO_CREATE) ++ amode = amode | O_CREAT; ++ if (fd->access_mode & ADIO_RDONLY) ++ amode = amode | O_RDONLY; ++ if (fd->access_mode & ADIO_WRONLY) ++ amode = amode | O_WRONLY; ++ if (fd->access_mode & ADIO_RDWR) ++ amode = amode | O_RDWR; ++ if (fd->access_mode & ADIO_EXCL) ++ amode = amode | O_EXCL; ++ ++ fd->fd_sys = open(fd->filename, amode, perm); ++ ++ if (fd->fd_sys != -1) { ++ int err; ++ ++ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ++ ++ /* get file striping information and set it in info */ ++ lum.lmm_magic = LOV_USER_MAGIC; ++ err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum); ++ ++ if (!err) { ++ sprintf(value, "%d", lum.lmm_stripe_size); ++ MPI_Info_set(fd->info, "striping_unit", value); ++ ++ sprintf(value, "%d", lum.lmm_stripe_count); ++ MPI_Info_set(fd->info, "striping_factor", value); ++ ++ sprintf(value, "%d", lum.lmm_stripe_offset); ++ MPI_Info_set(fd->info, "start_iodevice", value); ++ } ++ ADIOI_Free(value); ++ ++ if (fd->access_mode & ADIO_APPEND) ++ fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); ++ } ++ ++ ++ if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) ++ fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); ++ ++ if (fd->fd_sys == -1) { ++#ifdef MPICH2 ++ if (errno == ENAMETOOLONG) ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamelong", "**filenamelong %s %d", fd->filename, strlen(fd->filename)); ++ else if (errno == ENOENT) ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_NO_SUCH_FILE, "**filenoexist", "**filenoexist %s", fd->filename); ++ else if (errno == ENOTDIR || errno == ELOOP) ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamedir", "**filenamedir %s", fd->filename); ++ else if (errno == EACCES) { ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ACCESS, "**fileaccess", "**fileaccess %s", ++ fd->filename); ++ } ++ else if (errno == EROFS) { ++ /* Read only file or file system and write access requested */ ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_READ_ONLY, "**ioneedrd", 0); ++ } ++ else { ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ } ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_rdcoll.c romio/adio/ad_lustre/ad_lustre_rdcoll.c +--- romio-orig/adio/ad_lustre/ad_lustre_rdcoll.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_rdcoll.c 2006-09-06 17:10:35.000780880 -0400 +@@ -0,0 +1,18 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_rdcoll.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_ReadStridedColl(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_read.c romio/adio/ad_lustre/ad_lustre_read.c +--- romio-orig/adio/ad_lustre/ad_lustre_read.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_read.c 2006-09-06 17:10:35.000790846 -0400 +@@ -0,0 +1,67 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_read.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int *error_code) ++{ ++ int err=-1, datatype_size, len; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_READCONTIG"; ++#endif ++ ++ MPI_Type_size(datatype, &datatype_size); ++ len = datatype_size * count; ++ ++ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { ++ if (fd->fp_sys_posn != offset) ++ lseek(fd->fd_sys, offset, SEEK_SET); ++ err = read(fd->fd_sys, buf, len); ++ fd->fp_sys_posn = offset + len; ++ /* individual file pointer not updated */ ++ } ++ else { /* read from curr. location of ind. file pointer */ ++ if (fd->fp_sys_posn != fd->fp_ind) ++ lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); ++ err = read(fd->fd_sys, buf, len); ++ fd->fp_ind += err; ++ fd->fp_sys_posn = fd->fp_ind; ++ } ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (err != -1) MPIR_Status_set_bytes(status, datatype, err); ++#endif ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} ++ ++ ++ ++ ++void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_ReadStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_resize.c romio/adio/ad_lustre/ad_lustre_resize.c +--- romio-orig/adio/ad_lustre/ad_lustre_resize.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_resize.c 2006-09-06 17:10:35.000807397 -0400 +@@ -0,0 +1,32 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_resize.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) ++{ ++ int err; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_RESIZE"; ++#endif ++ ++ err = ftruncate(fd->fd_sys, size); ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_seek.c romio/adio/ad_lustre/ad_lustre_seek.c +--- romio-orig/adio/ad_lustre/ad_lustre_seek.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_seek.c 2006-09-06 17:10:35.000816583 -0400 +@@ -0,0 +1,15 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_seek.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++ADIO_Offset ADIOI_LUSTRE_SeekIndividual(ADIO_File fd, ADIO_Offset offset, ++ int whence, int *error_code) ++{ ++ return ADIOI_GEN_SeekIndividual(fd, offset, whence, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_wait.c romio/adio/ad_lustre/ad_lustre_wait.c +--- romio-orig/adio/ad_lustre/ad_lustre_wait.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_wait.c 2006-09-06 18:45:39.000190529 -0400 +@@ -0,0 +1,188 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_wait.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_ReadComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++#ifndef NO_AIO ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_READCOMPLETE"; ++#endif ++#ifdef AIO_SUN ++ aio_result_t *result=0, *tmp; ++#else ++ int err; ++#endif ++#ifdef AIO_HANDLE_IN_AIOCB ++ struct aiocb *tmp1; ++#endif ++#endif ++ ++ if (*request == ADIO_REQUEST_NULL) { ++ *error_code = MPI_SUCCESS; ++ return; ++ } ++ ++#ifdef AIO_SUN ++ if ((*request)->queued) { /* dequeue it */ ++ tmp = (aio_result_t *) (*request)->handle; ++ while (tmp->aio_return == AIO_INPROGRESS) usleep(1000); ++ /* sleep for 1 ms., until done. Is 1 ms. a good number? */ ++ /* when done, dequeue any one request */ ++ result = (aio_result_t *) aiowait(0); ++ ++ (*request)->nbytes = tmp->aio_return; ++ ++ if (tmp->aio_return == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(tmp->aio_errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(tmp->aio_errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ ++/* aiowait only dequeues a request. The completion of a request can be ++ checked by just checking the aio_return flag in the handle passed ++ to the original aioread()/aiowrite(). Therefore, I need to ensure ++ that aiowait() is called exactly once for each previous ++ aioread()/aiowrite(). This is also taken care of in ADIOI_xxxDone */ ++ } ++ else *error_code = MPI_SUCCESS; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if ((*request)->nbytes != -1) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#endif ++ ++#ifdef AIO_HANDLE_IN_AIOCB ++/* IBM */ ++ if ((*request)->queued) { ++ do { ++ err = aio_suspend(1, (struct aiocb **) &((*request)->handle)); ++ } while ((err == -1) && (errno == EINTR)); ++ ++ tmp1 = (struct aiocb *) (*request)->handle; ++ if (err != -1) { ++ err = aio_return(tmp1->aio_handle); ++ (*request)->nbytes = err; ++ errno = aio_error(tmp1->aio_handle); ++ } ++ else (*request)->nbytes = -1; ++ ++/* on DEC, it is required to call aio_return to dequeue the request. ++ IBM man pages don't indicate what function to use for dequeue. ++ I'm assuming it is aio_return! POSIX says aio_return may be called ++ only once on a given handle. */ ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } /* if ((*request)->queued) */ ++ else *error_code = MPI_SUCCESS; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if ((*request)->nbytes != -1) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#elif (!defined(NO_AIO) && !defined(AIO_SUN)) ++/* DEC, SGI IRIX 5 and 6 */ ++ if ((*request)->queued) { ++ do { ++ err = aio_suspend((const struct aiocb_t **) &((*request)->handle), 1, 0); ++ } while ((err == -1) && (errno == EINTR)); ++ ++ if (err != -1) { ++ err = aio_return((struct aiocb *) (*request)->handle); ++ (*request)->nbytes = err; ++ errno = aio_error((struct aiocb *) (*request)->handle); ++ } ++ else (*request)->nbytes = -1; ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } /* if ((*request)->queued) */ ++ else *error_code = MPI_SUCCESS; ++#ifdef HAVE_STATUS_SET_BYTES ++ if ((*request)->nbytes != -1) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++#endif ++ ++#ifndef NO_AIO ++ if ((*request)->queued != -1) { ++ ++ /* queued = -1 is an internal hack used when the request must ++ be completed, but the request object should not be ++ freed. This is used in ADIOI_Complete_async, because the user ++ will call MPI_Wait later, which would require status to ++ be filled. Ugly but works. queued = -1 should be used only ++ in ADIOI_Complete_async. ++ This should not affect the user in any way. */ ++ ++ /* if request is still queued in the system, it is also there ++ on ADIOI_Async_list. Delete it from there. */ ++ if ((*request)->queued) ADIOI_Del_req_from_list(request); ++ ++ (*request)->fd->async_count--; ++ if ((*request)->handle) ADIOI_Free((*request)->handle); ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ } ++ ++#else ++/* HP, FreeBSD, Linux */ ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ (*request)->fd->async_count--; ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ *error_code = MPI_SUCCESS; ++#endif ++} ++ ++ ++void ADIOI_LUSTRE_WriteComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++ ADIOI_LUSTRE_ReadComplete(request, status, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_wrcoll.c romio/adio/ad_lustre/ad_lustre_wrcoll.c +--- romio-orig/adio/ad_lustre/ad_lustre_wrcoll.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_wrcoll.c 2006-09-06 17:10:35.000835460 -0400 +@@ -0,0 +1,18 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_wrcoll.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_WriteStridedColl(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/ad_lustre_write.c romio/adio/ad_lustre/ad_lustre_write.c +--- romio-orig/adio/ad_lustre/ad_lustre_write.c 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/ad_lustre_write.c 2006-09-06 17:10:35.000844658 -0400 +@@ -0,0 +1,66 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_write.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int *error_code) ++{ ++ int err=-1, datatype_size, len; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_WRITECONTIG"; ++#endif ++ ++ MPI_Type_size(datatype, &datatype_size); ++ len = datatype_size * count; ++ ++ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { ++ if (fd->fp_sys_posn != offset) ++ lseek(fd->fd_sys, offset, SEEK_SET); ++ err = write(fd->fd_sys, buf, len); ++ fd->fp_sys_posn = offset + err; ++ /* individual file pointer not updated */ ++ } ++ else { /* write from curr. location of ind. file pointer */ ++ if (fd->fp_sys_posn != fd->fp_ind) ++ lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); ++ err = write(fd->fd_sys, buf, len); ++ fd->fp_ind += err; ++ fd->fp_sys_posn = fd->fp_ind; ++ } ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (err != -1 && status) MPIR_Status_set_bytes(status, datatype, err); ++#endif ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} ++ ++ ++ ++void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -ruN romio-orig/adio/ad_lustre/Makefile.in romio/adio/ad_lustre/Makefile.in +--- romio-orig/adio/ad_lustre/Makefile.in 1969-12-31 19:00:00.000000000 -0500 ++++ romio/adio/ad_lustre/Makefile.in 2006-09-06 18:48:56.000800829 -0400 +@@ -0,0 +1,51 @@ ++CC = @CC@ ++AR = @AR@ ++RANLIB = @RANLIB@ ++LIBNAME = @LIBNAME@ ++srcdir = @srcdir@ ++CC_SHL = @CC_SHL@ ++SHLIBNAME = @SHLIBNAME@ ++ ++INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I${srcdir}/../include -I../include -I../../include -I${srcdir}/../../../../include -I../../../../include ++CFLAGS = @CPPFLAGS@ @CFLAGS@ $(INCLUDE_DIR) ++ ++top_builddir = @master_topbuild_dir@ ++LIBTOOL = @LIBTOOL@ ++C_COMPILE_SHL = $(CC_SHL) @CFLAGS@ $(INCLUDE_DIR) ++ ++@VPATH@ ++ ++AD_LUSTRE_OBJECTS = ad_lustre.o ad_lustre_close.o ad_lustre_read.o \ ++ ad_lustre_open.o ad_lustre_write.o ad_lustre_done.o \ ++ ad_lustre_fcntl.o ad_lustre_iread.o ad_lustre_iwrite.o ad_lustre_wait.o \ ++ ad_lustre_resize.o ad_lustre_hints.o ++ ++default: $(LIBNAME) ++ @if [ "@ENABLE_SHLIB@" != "none" ] ; then \ ++ $(MAKE) $(SHLIBNAME).la ;\ ++ fi ++ ++.SUFFIXES: $(SUFFIXES) .p .lo ++ ++.c.o: ++ $(CC) $(CFLAGS) -c $< ++.c.lo: ++ $(C_COMPILE_SHL) -c $< -o _s$*.o ++ @mv -f _s$*.o $*.lo ++# $(C_COMPILE_SHL) -c $< ++# @mv -f $*.o $*.lo ++ ++$(LIBNAME): $(AD_LUSTRE_OBJECTS) ++ $(AR) $(LIBNAME) $(AD_LUSTRE_OBJECTS) ++ $(RANLIB) $(LIBNAME) ++ ++AD_LUSTRE_LOOBJECTS=$(AD_LUSTRE_OBJECTS:.o=.lo) ++$(SHLIBNAME).la: $(AD_LUSTRE_LOOBJECTS) ++ $(AR) $(SHLIBNAME).la $(AD_LUSTRE_LOOBJECTS) ++ ++coverage: ++ -@for file in ${AD_LUSTRE_OBJECTS:.o=.c} ; do \ ++ gcov -b -f $$file ; done ++ ++clean: ++ @rm -f *.o *.lo +diff -ruN romio-orig/adio/common/ad_fstype.c romio/adio/common/ad_fstype.c +--- romio-orig/adio/common/ad_fstype.c 2005-08-11 19:33:46.000000000 -0400 ++++ romio/adio/common/ad_fstype.c 2006-09-06 17:41:20.000830936 -0400 +@@ -265,6 +265,9 @@ + /* if UFS support is enabled, default to that */ + *fstype = ADIO_UFS; + return; ++# elif defined(LINUX) && defined(ROMIO_LUSTRE) ++# warning use correct include ++# define LL_SUPER_MAGIC 0x0BD00BD0 + # endif + + /* --BEGIN ERROR HANDLING-- */ +@@ -308,6 +311,13 @@ + } + # endif + ++# ifdef LL_SUPER_MAGIC ++ if (fsbuf.f_type == LL_SUPER_MAGIC) { ++ *fstype = ADIO_LUSTRE; ++ return; ++ } ++# endif ++ + # ifdef PAN_KERNEL_FS_CLIENT_SUPER_MAGIC + if (fsbuf.f_type == PAN_KERNEL_FS_CLIENT_SUPER_MAGIC) { + *fstype = ADIO_PANFS; +@@ -458,6 +468,11 @@ + { + *fstype = ADIO_GRIDFTP; + } ++ else if (!strncmp(filename, "lustre:", 7) ++ || !strncmp(filename, "LUSTRE:", 7)) ++ { ++ *fstype = ADIO_LUSTRE; ++ } + else { + #ifdef ROMIO_NTFS + *fstype = ADIO_NTFS; +@@ -657,6 +672,14 @@ + *ops = &ADIO_GRIDFTP_operations; + #endif + } ++ if (file_system == ADIO_LUSTRE) { ++#ifndef ROMIO_LUSTRE ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**iofstypeunsupported", 0); ++ return; ++#else ++ *ops = &ADIO_LUSTRE_operations; ++#endif ++ } + *error_code = MPI_SUCCESS; + *fstype = file_system; + return; +diff -ruN romio-orig/adio/include/adio.h romio/adio/include/adio.h +--- romio-orig/adio/include/adio.h 2006-06-09 17:45:04.000000000 -0400 ++++ romio/adio/include/adio.h 2006-09-06 17:44:16.000614058 -0400 +@@ -302,6 +302,7 @@ + #define ADIO_PVFS2 160 /* PVFS2: 2nd generation PVFS */ + #define ADIO_PANFS 161 /* Panasas FS */ + #define ADIO_GRIDFTP 162 /* Globus GridFTP */ ++#define ADIO_LUSTRE 163 /* Lustre */ + + #define ADIO_SEEK_SET SEEK_SET + #define ADIO_SEEK_CUR SEEK_CUR +diff -ruN romio-orig/adio/include/adioi_fs_proto.h romio/adio/include/adioi_fs_proto.h +--- romio-orig/adio/include/adioi_fs_proto.h 2005-06-08 17:16:39.000000000 -0400 ++++ romio/adio/include/adioi_fs_proto.h 2006-09-06 17:48:11.000523566 -0400 +@@ -49,6 +49,68 @@ + /* prototypes are in adio/ad_sfs/ad_sfs.h */ + #endif + ++#ifdef ROMIO_LUSTRE ++extern struct ADIOI_Fns_struct ADIO_LUSTRE_operations; ++ ++void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code); ++void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code); ++void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_IwriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++void ADIOI_LUSTRE_IreadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++int ADIOI_LUSTRE_ReadDone(ADIO_Request *request, ADIO_Status *status, int ++ *error_code); ++int ADIOI_LUSTRE_WriteDone(ADIO_Request *request, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_ReadComplete(ADIO_Request *request, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_WriteComplete(ADIO_Request *request, ADIO_Status *status, ++ int *error_code); ++void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int ++ *error_code); ++void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_IreadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++void ADIOI_LUSTRE_IwriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++void ADIOI_LUSTRE_Flush(ADIO_File fd, int *error_code); ++void ADIOI_LUSTRE_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); ++ADIO_Offset ADIOI_LUSTRE_SeekIndividual(ADIO_File fd, ADIO_Offset offset, ++ int whence, int *error_code); ++void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); ++#endif ++ + #ifdef ROMIO_NTFS + extern struct ADIOI_Fns_struct ADIO_NTFS_operations; + /* prototypes are in adio/ad_ntfs/ad_ntfs.h */ +diff -ruN romio-orig/adio/include/mpio_error.h romio/adio/include/mpio_error.h +--- romio-orig/adio/include/mpio_error.h 2005-05-23 19:27:50.000000000 -0400 ++++ romio/adio/include/mpio_error.h 2006-09-06 17:10:35.000984078 -0400 +@@ -63,6 +63,7 @@ + #define MPIR_ERR_FILETYPE 33 + #define MPIR_ERR_NO_NTFS 35 + #define MPIR_ERR_NO_TESTFS 36 ++#define MPIR_ERR_NO_LUSTRE 37 + + /* MPI_ERR_COMM */ + #ifndef MPIR_ERR_COMM_NULL +diff -ruN romio-orig/adio/include/romioconf.h.in romio/adio/include/romioconf.h.in +--- romio-orig/adio/include/romioconf.h.in 2006-08-11 09:48:44.000000000 -0400 ++++ romio/adio/include/romioconf.h.in 2006-09-06 17:43:08.000599274 -0400 +@@ -276,6 +276,9 @@ + /* Define for ROMIO with PVFS2 */ + #undef ROMIO_PVFS2 + ++/* Define for ROMIO with LUSTRE */ ++#undef ROMIO_LUSTRE ++ + /* Define if int64_t must be defined for PVFS */ + #undef ROMIO_PVFS_NEEDS_INT64_DEFINITION + +diff -ruN romio-orig/configure romio/configure +--- romio-orig/configure 2006-08-11 09:48:45.000000000 -0400 ++++ romio/configure 2006-09-06 17:20:57.000555513 -0400 +@@ -1400,7 +1400,7 @@ + # + have_aio=no + # +-known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp" ++known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp lustre" + known_mpi_impls="mpich2_mpi mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi" + # + # Defaults +@@ -7490,6 +7490,14 @@ + + fi + ++if test -n "$file_system_lustre"; then ++ ++cat >>confdefs.h <<\_ACEOF ++#define ROMIO_LUSTRE 1 ++_ACEOF ++ ++fi ++ + # + # Check for presence and characteristics of async. I/O calls if + # not disabled. +@@ -11977,7 +11985,7 @@ + # are active will be called by the top level ROMIO make + ac_config_commands="$ac_config_commands default-1" + +- ac_config_files="$ac_config_files Makefile localdefs mpi-io/Makefile mpi2-other/info/Makefile mpi2-other/array/Makefile adio/common/Makefile test/Makefile test/misc.c test/large_file.c test/runtests util/romioinstall include/mpio.h include/mpiof.h adio/ad_nfs/Makefile adio/ad_ufs/Makefile adio/ad_panfs/Makefile adio/ad_xfs/Makefile adio/ad_sfs/Makefile adio/ad_pfs/Makefile adio/ad_testfs/Makefile adio/ad_pvfs/Makefile adio/ad_pvfs2/Makefile adio/ad_gridftp/Makefile mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile mpi2-other/array/fortran/Makefile test/fmisc.f test/fcoll_test.f test/pfcoll_test.f test/fperf.f mpi-io/glue/mpich2/Makefile mpi-io/glue/mpich1/Makefile mpi-io/glue/default/Makefile" ++ ac_config_files="$ac_config_files Makefile localdefs mpi-io/Makefile mpi2-other/info/Makefile mpi2-other/array/Makefile adio/common/Makefile test/Makefile test/misc.c test/large_file.c test/runtests util/romioinstall include/mpio.h include/mpiof.h adio/ad_nfs/Makefile adio/ad_ufs/Makefile adio/ad_panfs/Makefile adio/ad_xfs/Makefile adio/ad_sfs/Makefile adio/ad_pfs/Makefile adio/ad_testfs/Makefile adio/ad_pvfs/Makefile adio/ad_pvfs2/Makefile adio/ad_gridftp/Makefile adio/ad_lustre/Makefile mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile mpi2-other/array/fortran/Makefile test/fmisc.f test/fcoll_test.f test/pfcoll_test.f test/fperf.f mpi-io/glue/mpich2/Makefile mpi-io/glue/mpich1/Makefile mpi-io/glue/default/Makefile" + cat >confcache <<\_ACEOF + # This file is a shell script that caches the results of configure + # tests run on this system so they can be shared between configure +@@ -12535,6 +12543,7 @@ + "adio/ad_pvfs/Makefile" ) CONFIG_FILES="$CONFIG_FILES adio/ad_pvfs/Makefile" ;; + "adio/ad_pvfs2/Makefile" ) CONFIG_FILES="$CONFIG_FILES adio/ad_pvfs2/Makefile" ;; + "adio/ad_gridftp/Makefile" ) CONFIG_FILES="$CONFIG_FILES adio/ad_gridftp/Makefile" ;; ++ "adio/ad_lustre/Makefile" ) CONFIG_FILES="$CONFIG_FILES adio/ad_lustre/Makefile" ;; + "mpi-io/fortran/Makefile" ) CONFIG_FILES="$CONFIG_FILES mpi-io/fortran/Makefile" ;; + "mpi2-other/info/fortran/Makefile" ) CONFIG_FILES="$CONFIG_FILES mpi2-other/info/fortran/Makefile" ;; + "mpi2-other/array/fortran/Makefile" ) CONFIG_FILES="$CONFIG_FILES mpi2-other/array/fortran/Makefile" ;; +diff -ruN romio-orig/configure.in romio/configure.in +--- romio-orig/configure.in 2006-07-24 17:55:57.000000000 -0400 ++++ romio/configure.in 2006-09-06 17:16:13.000525117 -0400 +@@ -93,7 +93,7 @@ + # + have_aio=no + # +-known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp" ++known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp lustre" + known_mpi_impls="mpich2_mpi mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi" + # + # Defaults +@@ -1062,6 +1062,9 @@ + if test -n "$file_system_testfs"; then + AC_DEFINE(ROMIO_TESTFS,1,[Define for ROMIO with TESTFS]) + fi ++if test -n "$file_system_lustre"; then ++ AC_DEFINE(ROMIO_LUSTRE,1,[Define for ROMIO with LUSTRE]) ++fi + + if test -n "$file_system_xfs"; then + AC_DEFINE(ROMIO_XFS,1,[Define for ROMIO with XFS]) +@@ -2024,6 +2027,7 @@ + adio/ad_testfs/Makefile adio/ad_pvfs/Makefile \ + adio/ad_pvfs2/Makefile \ + adio/ad_gridftp/Makefile \ ++ adio/ad_lustre/Makefile \ + mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile \ + mpi2-other/array/fortran/Makefile test/fmisc.f \ + test/fcoll_test.f test/pfcoll_test.f test/fperf.f \ +diff -ruN romio-orig/Makefile.in romio/Makefile.in +--- romio-orig/Makefile.in 2005-05-24 18:53:11.000000000 -0400 ++++ romio/Makefile.in 2006-09-06 17:13:25.000393429 -0400 +@@ -14,7 +14,7 @@ + MPIO_DIRS = mpi-io + EXTRA_SRC_DIRS = @EXTRA_SRC_DIRS@ + FILE_SYS_DIRS = @FILE_SYS_DIRS@ +-ALL_DIRS = mpi-io mpi-io/fortran mpi2-other/info mpi2-other/info/fortran mpi2-other/array mpi2-other/array/fortran adio/common adio/ad_pfs adio/ad_piofs adio/ad_nfs adio/ad_ufs adio/ad_xfs adio/ad_hfs adio/ad_sfs adio/ad_testfs adio/ad_pvfs adio/ad_pvfs2 adio/ad_panfs adio/ad_gridftp test ++ALL_DIRS = mpi-io mpi-io/fortran mpi2-other/info mpi2-other/info/fortran mpi2-other/array mpi2-other/array/fortran adio/common adio/ad_pfs adio/ad_piofs adio/ad_nfs adio/ad_ufs adio/ad_xfs adio/ad_hfs adio/ad_sfs adio/ad_testfs adio/ad_pvfs adio/ad_pvfs2 adio/ad_panfs adio/ad_gridftp adio/ad_lustre test + SHELL = /bin/sh + + @VPATH@ diff --git a/lustre/doc/lustre.7 b/lustre/doc/lustre.7 index fbcf375..460df98 100644 --- a/lustre/doc/lustre.7 +++ b/lustre/doc/lustre.7 @@ -57,7 +57,7 @@ A low-level interface to control various aspects of Lustre .B lfs(1) A user-level interface to control Lustre-specific information for individual files. -.B lustre_config +.B lustre_config.sh Format multiple Lustre targets simultaneously from definitions in a CSV file. .SH BUGS diff --git a/lustre/doc/mkfs.lustre.8 b/lustre/doc/mkfs.lustre.8 index 67329da..b469034 100644 --- a/lustre/doc/mkfs.lustre.8 +++ b/lustre/doc/mkfs.lustre.8 @@ -43,9 +43,6 @@ Set user comment about this disk, ignored by Lustre. .BI \--device-size= KB Set device size for loop devices .TP -.BI \--dryrun -Only print what would be done; does not affect the disk -.TP .BI \--failnode= nid,... Set the NID(s) of a failover partner. This option can be repeated as desired. .TP @@ -64,6 +61,9 @@ Set permanent mount options, equivalent to setting in /etc/fstab .BI \--mgsnode= nid,... Set the NID(s) of the MGS node, required for all targets other than the MGS. .TP +.BI \--noformat +Only print what would be done; does not affect the disk +.TP .BI \--param " key=value" Set permanent parameter .I key @@ -71,15 +71,15 @@ to value .I value. This option can be repeated as desired. Typical options might include: .RS -.I \--param sys.timeout=40 +.I \--param sys.timeout=40 .RS System obd timeout .RE -.I \--param lov.stripesize=2M +.I \--param lov.stripe.size=2097152 .RS Default stripe size .RE -.I \--param lov.stripecount=2 +.I \--param lov.stripe.count=2 .RS Default stripe count .RE diff --git a/lustre/ldiskfs2/.cvsignore b/lustre/fid/.cvsignore similarity index 79% rename from lustre/ldiskfs2/.cvsignore rename to lustre/fid/.cvsignore index 9ad7f07..5d26f00 100644 --- a/lustre/ldiskfs2/.cvsignore +++ b/lustre/fid/.cvsignore @@ -5,15 +5,11 @@ configure Makefile .deps TAGS +.*.cmd autoMakefile.in autoMakefile *.ko *.mod.c -.*.cmd .*.flags .tmp_versions -linux-stage -linux -*.c -*.h -sources +.depend diff --git a/lustre/fid/Makefile.in b/lustre/fid/Makefile.in new file mode 100644 index 0000000..e4908e5 --- /dev/null +++ b/lustre/fid/Makefile.in @@ -0,0 +1,4 @@ +MODULES := fid +fid-objs := fid_handler.o fid_store.o fid_request.o lproc_fid.o fid_lib.o + +@INCLUDE_RULES@ diff --git a/lustre/fid/autoMakefile.am b/lustre/fid/autoMakefile.am new file mode 100644 index 0000000..c007f43 --- /dev/null +++ b/lustre/fid/autoMakefile.am @@ -0,0 +1,20 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if LIBLUSTRE +noinst_LIBRARIES = libfid.a +libfid_a_SOURCES = fid_handler.c fid_store.c fid_request.c lproc_fid.c fid_lib.c fid_internal.h +libfid_a_CPPFLAGS = $(LLCPPFLAGS) +libfid_a_CFLAGS = $(LLCFLAGS) +endif + +if MODULES +modulefs_DATA = fid$(KMODEXT) +endif + +install-data-hook: $(install_data_hook) + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ +DIST_SOURCES = $(fid-objs:%.o=%.c) fid_internal.h diff --git a/lustre/fid/fid_handler.c b/lustre/fid/fid_handler.c new file mode 100644 index 0000000..0aa64b2 --- /dev/null +++ b/lustre/fid/fid_handler.c @@ -0,0 +1,601 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fid/fid_handler.c + * Lustre Sequence Manager + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FID + +#ifdef __KERNEL__ +# include +# include +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "fid_internal.h" + +#ifdef __KERNEL__ +/* Assigns client to sequence controller node. */ +int seq_server_set_cli(struct lu_server_seq *seq, + struct lu_client_seq *cli, + const struct lu_env *env) +{ + int rc = 0; + ENTRY; + + /* + * Ask client for new range, assign that range to ->seq_space and write + * seq state to backing store should be atomic. + */ + down(&seq->lss_sem); + + if (cli == NULL) { + CDEBUG(D_INFO, "%s: Detached sequence client %s\n", + seq->lss_name, cli->lcs_name); + seq->lss_cli = cli; + GOTO(out_up, rc = 0); + } + + if (seq->lss_cli != NULL) { + CERROR("%s: Sequence controller is already " + "assigned\n", seq->lss_name); + GOTO(out_up, rc = -EINVAL); + } + + CDEBUG(D_INFO, "%s: Attached sequence controller %s\n", + seq->lss_name, cli->lcs_name); + + seq->lss_cli = cli; + EXIT; +out_up: + up(&seq->lss_sem); + return rc; +} +EXPORT_SYMBOL(seq_server_set_cli); + +/* + * On controller node, allocate new super sequence for regular sequence server. + */ +static int __seq_server_alloc_super(struct lu_server_seq *seq, + struct lu_range *in, + struct lu_range *out, + const struct lu_env *env) +{ + struct lu_range *space = &seq->lss_space; + int rc; + ENTRY; + + LASSERT(range_is_sane(space)); + + if (in != NULL) { + CDEBUG(D_INFO, "%s: Input seq range: " + DRANGE"\n", seq->lss_name, PRANGE(in)); + + if (in->lr_end > space->lr_start) + space->lr_start = in->lr_end; + *out = *in; + + CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", + seq->lss_name, PRANGE(space)); + } else { + if (range_space(space) < seq->lss_width) { + CWARN("%s: Sequences space to be exhausted soon. " + "Only "LPU64" sequences left\n", seq->lss_name, + range_space(space)); + *out = *space; + space->lr_start = space->lr_end; + } else if (range_is_exhausted(space)) { + CERROR("%s: Sequences space is exhausted\n", + seq->lss_name); + RETURN(-ENOSPC); + } else { + range_alloc(out, space, seq->lss_width); + } + } + + rc = seq_store_write(seq, env); + if (rc) { + CERROR("%s: Can't write space data, rc %d\n", + seq->lss_name, rc); + RETURN(rc); + } + + CDEBUG(D_INFO, "%s: Allocated super-sequence " + DRANGE"\n", seq->lss_name, PRANGE(out)); + + RETURN(rc); +} + +int seq_server_alloc_super(struct lu_server_seq *seq, + struct lu_range *in, + struct lu_range *out, + const struct lu_env *env) +{ + int rc; + ENTRY; + + down(&seq->lss_sem); + rc = __seq_server_alloc_super(seq, in, out, env); + up(&seq->lss_sem); + + RETURN(rc); +} + +static int __seq_server_alloc_meta(struct lu_server_seq *seq, + struct lu_range *in, + struct lu_range *out, + const struct lu_env *env) +{ + struct lu_range *space = &seq->lss_space; + int rc = 0; + ENTRY; + + LASSERT(range_is_sane(space)); + + /* + * This is recovery case. Adjust super range if input range looks like + * it is allocated from new super. + */ + if (in != NULL) { + CDEBUG(D_INFO, "%s: Input seq range: " + DRANGE"\n", seq->lss_name, PRANGE(in)); + + if (range_is_exhausted(space)) { + /* + * Server cannot send empty range to client, this is why + * we check here that range from client is "newer" than + * exhausted super. + */ + LASSERT(in->lr_end > space->lr_start); + + /* + * Start is set to end of last allocated, because it + * *is* already allocated so we take that into account + * and do not use for other allocations. + */ + space->lr_start = in->lr_end; + + /* + * End is set to in->lr_start + super sequence + * allocation unit. That is because in->lr_start is + * first seq in new allocated range from controller + * before failure. + */ + space->lr_end = in->lr_start + LUSTRE_SEQ_SUPER_WIDTH; + + if (!seq->lss_cli) { + CERROR("%s: No sequence controller " + "is attached.\n", seq->lss_name); + RETURN(-ENODEV); + } + + /* + * Let controller know that this is recovery and last + * obtained range from it was @space. + */ + rc = seq_client_replay_super(seq->lss_cli, space, env); + if (rc) { + CERROR("%s: Can't replay super-sequence, " + "rc %d\n", seq->lss_name, rc); + RETURN(rc); + } + } else { + /* + * Update super start by end from client's range. Super + * end should not be changed if range was not exhausted. + */ + if (in->lr_end > space->lr_start) + space->lr_start = in->lr_end; + } + + *out = *in; + + CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", + seq->lss_name, PRANGE(space)); + } else { + /* + * XXX: Avoid cascading RPCs using kind of async preallocation + * when meta-sequence is close to exhausting. + */ + if (range_is_exhausted(space)) { + if (!seq->lss_cli) { + CERROR("%s: No sequence controller " + "is attached.\n", seq->lss_name); + RETURN(-ENODEV); + } + + rc = seq_client_alloc_super(seq->lss_cli, env); + if (rc) { + CERROR("%s: Can't allocate super-sequence, " + "rc %d\n", seq->lss_name, rc); + RETURN(rc); + } + + /* Saving new range to allocation space. */ + *space = seq->lss_cli->lcs_space; + LASSERT(range_is_sane(space)); + } + + range_alloc(out, space, seq->lss_width); + } + + rc = seq_store_write(seq, env); + if (rc) { + CERROR("%s: Can't write space data, rc %d\n", + seq->lss_name, rc); + } + + if (rc == 0) { + CDEBUG(D_INFO, "%s: Allocated meta-sequence " + DRANGE"\n", seq->lss_name, PRANGE(out)); + } + + RETURN(rc); +} + +int seq_server_alloc_meta(struct lu_server_seq *seq, + struct lu_range *in, + struct lu_range *out, + const struct lu_env *env) +{ + int rc; + ENTRY; + + down(&seq->lss_sem); + rc = __seq_server_alloc_meta(seq, in, out, env); + up(&seq->lss_sem); + + RETURN(rc); +} +EXPORT_SYMBOL(seq_server_alloc_meta); + +static int seq_server_handle(struct lu_site *site, + const struct lu_env *env, + __u32 opc, struct lu_range *in, + struct lu_range *out) +{ + int rc; + ENTRY; + + switch (opc) { + case SEQ_ALLOC_META: + if (!site->ls_server_seq) { + CERROR("Sequence server is not " + "initialized\n"); + RETURN(-EINVAL); + } + rc = seq_server_alloc_meta(site->ls_server_seq, + in, out, env); + break; + case SEQ_ALLOC_SUPER: + if (!site->ls_control_seq) { + CERROR("Sequence controller is not " + "initialized\n"); + RETURN(-EINVAL); + } + rc = seq_server_alloc_super(site->ls_control_seq, + in, out, env); + break; + default: + rc = -EINVAL; + break; + } + + RETURN(rc); +} + +static int seq_req_handle(struct ptlrpc_request *req, + const struct lu_env *env, + struct seq_thread_info *info) +{ + struct lu_range *out, *in = NULL; + struct lu_site *site; + int rc = -EPROTO; + __u32 *opc; + ENTRY; + + site = req->rq_export->exp_obd->obd_lu_dev->ld_site; + LASSERT(site != NULL); + + rc = req_capsule_pack(&info->sti_pill); + if (rc) + RETURN(err_serious(rc)); + + opc = req_capsule_client_get(&info->sti_pill, + &RMF_SEQ_OPC); + if (opc != NULL) { + out = req_capsule_server_get(&info->sti_pill, + &RMF_SEQ_RANGE); + if (out == NULL) + RETURN(err_serious(-EPROTO)); + + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { + in = req_capsule_client_get(&info->sti_pill, + &RMF_SEQ_RANGE); + + LASSERT(!range_is_zero(in) && range_is_sane(in)); + } + + rc = seq_server_handle(site, env, *opc, in, out); + } else + rc = err_serious(-EPROTO); + + RETURN(rc); +} + +static void *seq_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct seq_thread_info *info; + + /* + * check that no high order allocations are incurred. + */ + CLASSERT(CFS_PAGE_SIZE >= sizeof *info); + OBD_ALLOC_PTR(info); + if (info == NULL) + info = ERR_PTR(-ENOMEM); + return info; +} + +static void seq_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct seq_thread_info *info = data; + OBD_FREE_PTR(info); +} + +struct lu_context_key seq_thread_key = { + .lct_tags = LCT_MD_THREAD, + .lct_init = seq_key_init, + .lct_fini = seq_key_fini +}; + +static void seq_thread_info_init(struct ptlrpc_request *req, + struct seq_thread_info *info) +{ + int i; + + /* Mark rep buffer as req-layout stuff expects */ + for (i = 0; i < ARRAY_SIZE(info->sti_rep_buf_size); i++) + info->sti_rep_buf_size[i] = -1; + + /* Init request capsule */ + req_capsule_init(&info->sti_pill, req, RCL_SERVER, + info->sti_rep_buf_size); + + req_capsule_set(&info->sti_pill, &RQF_SEQ_QUERY); +} + +static void seq_thread_info_fini(struct seq_thread_info *info) +{ + req_capsule_fini(&info->sti_pill); +} + +static int seq_handle(struct ptlrpc_request *req) +{ + const struct lu_env *env; + struct seq_thread_info *info; + int rc; + + env = req->rq_svc_thread->t_env; + LASSERT(env != NULL); + + info = lu_context_key_get(&env->le_ctx, &seq_thread_key); + LASSERT(info != NULL); + + seq_thread_info_init(req, info); + rc = seq_req_handle(req, env, info); + seq_thread_info_fini(info); + + return rc; +} + +/* + * Entry point for handling FLD RPCs called from MDT. + */ +int seq_query(struct com_thread_info *info) +{ + return seq_handle(info->cti_pill.rc_req); +} +EXPORT_SYMBOL(seq_query); + +static void seq_server_proc_fini(struct lu_server_seq *seq); + +#ifdef LPROCFS +static int seq_server_proc_init(struct lu_server_seq *seq) +{ + int rc; + ENTRY; + + seq->lss_proc_dir = lprocfs_register(seq->lss_name, + seq_type_proc_dir, + NULL, NULL); + if (IS_ERR(seq->lss_proc_dir)) { + rc = PTR_ERR(seq->lss_proc_dir); + RETURN(rc); + } + + rc = lprocfs_add_vars(seq->lss_proc_dir, + seq_server_proc_list, seq); + if (rc) { + CERROR("%s: Can't init sequence manager " + "proc, rc %d\n", seq->lss_name, rc); + GOTO(out_cleanup, rc); + } + + RETURN(0); + +out_cleanup: + seq_server_proc_fini(seq); + return rc; +} + +static void seq_server_proc_fini(struct lu_server_seq *seq) +{ + ENTRY; + if (seq->lss_proc_dir != NULL) { + if (!IS_ERR(seq->lss_proc_dir)) + lprocfs_remove(&seq->lss_proc_dir); + seq->lss_proc_dir = NULL; + } + EXIT; +} +#else +static int seq_server_proc_init(struct lu_server_seq *seq) +{ + return 0; +} + +static void seq_server_proc_fini(struct lu_server_seq *seq) +{ + return; +} +#endif + +int seq_server_init(struct lu_server_seq *seq, + struct dt_device *dev, + const char *prefix, + enum lu_mgr_type type, + const struct lu_env *env) +{ + int rc, is_srv = (type == LUSTRE_SEQ_SERVER); + ENTRY; + + LASSERT(dev != NULL); + LASSERT(prefix != NULL); + + seq->lss_cli = NULL; + seq->lss_type = type; + range_zero(&seq->lss_space); + sema_init(&seq->lss_sem, 1); + + seq->lss_width = is_srv ? + LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH; + + snprintf(seq->lss_name, sizeof(seq->lss_name), + "%s-%s", (is_srv ? "srv" : "ctl"), prefix); + + rc = seq_store_init(seq, env, dev); + if (rc) + GOTO(out, rc); + + /* Request backing store for saved sequence info. */ + rc = seq_store_read(seq, env); + if (rc == -ENODATA) { + + /* Nothing is read, init by default value. */ + seq->lss_space = is_srv ? + LUSTRE_SEQ_ZERO_RANGE: + LUSTRE_SEQ_SPACE_RANGE; + + CDEBUG(D_INFO, "%s: No data found " + "on store. Initialize space\n", + seq->lss_name); + + /* Save default controller value to store. */ + rc = seq_store_write(seq, env); + if (rc) { + CERROR("%s: Can't write space data, " + "rc %d\n", seq->lss_name, rc); + } + } else if (rc) { + CERROR("%s: Can't read space data, rc %d\n", + seq->lss_name, rc); + GOTO(out, rc); + } + + if (is_srv) { + LASSERT(range_is_sane(&seq->lss_space)); + } else { + LASSERT(!range_is_zero(&seq->lss_space) && + range_is_sane(&seq->lss_space)); + } + + rc = seq_server_proc_init(seq); + if (rc) + GOTO(out, rc); + + EXIT; +out: + if (rc) + seq_server_fini(seq, env); + return rc; +} +EXPORT_SYMBOL(seq_server_init); + +void seq_server_fini(struct lu_server_seq *seq, + const struct lu_env *env) +{ + ENTRY; + + seq_server_proc_fini(seq); + seq_store_fini(seq, env); + + EXIT; +} +EXPORT_SYMBOL(seq_server_fini); + +cfs_proc_dir_entry_t *seq_type_proc_dir = NULL; + +static int __init fid_mod_init(void) +{ + seq_type_proc_dir = lprocfs_register(LUSTRE_SEQ_NAME, + proc_lustre_root, + NULL, NULL); + if (IS_ERR(seq_type_proc_dir)) + return PTR_ERR(seq_type_proc_dir); + + LU_CONTEXT_KEY_INIT(&seq_thread_key); + lu_context_key_register(&seq_thread_key); + return 0; +} + +static void __exit fid_mod_exit(void) +{ + lu_context_key_degister(&seq_thread_key); + if (seq_type_proc_dir != NULL && !IS_ERR(seq_type_proc_dir)) { + lprocfs_remove(&seq_type_proc_dir); + seq_type_proc_dir = NULL; + } +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre FID Module"); +MODULE_LICENSE("GPL"); + +cfs_module(fid, "0.1.0", fid_mod_init, fid_mod_exit); +#endif diff --git a/lustre/fid/fid_internal.h b/lustre/fid/fid_internal.h new file mode 100644 index 0000000..73d5959 --- /dev/null +++ b/lustre/fid/fid_internal.h @@ -0,0 +1,80 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * fid/fid_internal.h + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef __FID_INTERNAL_H +#define __FID_INTERNAL_H + +#include +#include + +#include +#include + +#include + +#ifdef __KERNEL__ +struct seq_thread_info { + struct txn_param sti_txn; + struct req_capsule sti_pill; + struct lu_range sti_space; + int sti_rep_buf_size[REQ_MAX_FIELD_NR]; + struct lu_buf sti_buf; +}; + +extern struct lu_context_key seq_thread_key; + +/* Functions used internally in module. */ +int seq_client_alloc_super(struct lu_client_seq *seq, + const struct lu_env *env); + +int seq_client_replay_super(struct lu_client_seq *seq, + struct lu_range *range, + const struct lu_env *env); + +/* Store API functions. */ +int seq_store_init(struct lu_server_seq *seq, + const struct lu_env *env, + struct dt_device *dt); + +void seq_store_fini(struct lu_server_seq *seq, + const struct lu_env *env); + +int seq_store_write(struct lu_server_seq *seq, + const struct lu_env *env); + +int seq_store_read(struct lu_server_seq *seq, + const struct lu_env *env); + +#ifdef LPROCFS +extern struct lprocfs_vars seq_server_proc_list[]; +extern struct lprocfs_vars seq_client_proc_list[]; +#endif + +#endif + +extern cfs_proc_dir_entry_t *seq_type_proc_dir; + +#endif /* __FID_INTERNAL_H */ diff --git a/lustre/fid/fid_lib.c b/lustre/fid/fid_lib.c new file mode 100644 index 0000000..5074393 --- /dev/null +++ b/lustre/fid/fid_lib.c @@ -0,0 +1,141 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fid/fid_lib.c + * Miscellaneous fid functions. + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov + * Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FID + +#ifdef __KERNEL__ +# include +# include +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include + +/* + * Sequence space, starts from 0x400 to have first 0x400 sequences used for + * special purposes. This means that if we have seq-with 10000 fids, we have + * ~10M fids reserved for special purposes (igifs, etc.). + */ +const struct lu_range LUSTRE_SEQ_SPACE_RANGE = { + (0x400), + ((__u64)~0ULL) +}; +EXPORT_SYMBOL(LUSTRE_SEQ_SPACE_RANGE); + +/* Zero range, used for init and other purposes. */ +const struct lu_range LUSTRE_SEQ_ZERO_RANGE = { + 0, + 0 +}; +EXPORT_SYMBOL(LUSTRE_SEQ_ZERO_RANGE); + +/* Lustre Big Fs Lock fid. */ +const struct lu_fid LUSTRE_BFL_FID = { .f_seq = 0x0000000000000003, + .f_oid = 0x0000000000000001, + .f_ver = 0x0000000000000000 }; +EXPORT_SYMBOL(LUSTRE_BFL_FID); + +void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); + dst->f_seq = cpu_to_le64(fid_seq(src)); + dst->f_oid = cpu_to_le32(fid_oid(src)); + dst->f_ver = cpu_to_le32(fid_ver(src)); +} +EXPORT_SYMBOL(fid_cpu_to_le); + +void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = le64_to_cpu(fid_seq(src)); + dst->f_oid = le32_to_cpu(fid_oid(src)); + dst->f_ver = le32_to_cpu(fid_ver(src)); + LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); +} +EXPORT_SYMBOL(fid_le_to_cpu); + +void range_cpu_to_le(struct lu_range *dst, const struct lu_range *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof src->lr_start + + sizeof src->lr_end); + dst->lr_start = cpu_to_le64(src->lr_start); + dst->lr_end = cpu_to_le64(src->lr_end); +} +EXPORT_SYMBOL(range_cpu_to_le); + +void range_le_to_cpu(struct lu_range *dst, const struct lu_range *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof src->lr_start + + sizeof src->lr_end); + dst->lr_start = le64_to_cpu(src->lr_start); + dst->lr_end = le64_to_cpu(src->lr_end); +} +EXPORT_SYMBOL(range_le_to_cpu); + +#ifdef __KERNEL__ +void range_cpu_to_be(struct lu_range *dst, const struct lu_range *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof src->lr_start + + sizeof src->lr_end); + dst->lr_start = cpu_to_be64(src->lr_start); + dst->lr_end = cpu_to_be64(src->lr_end); +} +EXPORT_SYMBOL(range_cpu_to_be); + +void range_be_to_cpu(struct lu_range *dst, const struct lu_range *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof src->lr_start + + sizeof src->lr_end); + dst->lr_start = be64_to_cpu(src->lr_start); + dst->lr_end = be64_to_cpu(src->lr_end); +} +EXPORT_SYMBOL(range_be_to_cpu); + +#endif diff --git a/lustre/fid/fid_request.c b/lustre/fid/fid_request.c new file mode 100644 index 0000000..5c40363 --- /dev/null +++ b/lustre/fid/fid_request.c @@ -0,0 +1,389 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fid/fid_request.c + * Lustre Sequence Manager + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FID + +#ifdef __KERNEL__ +# include +# include +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +/* mdc RPC locks */ +#include +#include "fid_internal.h" + +static int seq_client_rpc(struct lu_client_seq *seq, struct lu_range *input, + struct lu_range *output, __u32 opc, + const char *opcname) +{ + int rc, size[3] = { sizeof(struct ptlrpc_body), + sizeof(__u32), + sizeof(struct lu_range) }; + struct obd_export *exp = seq->lcs_exp; + struct ptlrpc_request *req; + struct lu_range *out, *in; + struct req_capsule pill; + __u32 *op; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + SEQ_QUERY, 3, size, NULL); + if (req == NULL) + RETURN(-ENOMEM); + + req_capsule_init(&pill, req, RCL_CLIENT, NULL); + req_capsule_set(&pill, &RQF_SEQ_QUERY); + + /* Init operation code */ + op = req_capsule_client_get(&pill, &RMF_SEQ_OPC); + *op = opc; + + /* Zero out input range, this is not recovery yet. */ + in = req_capsule_client_get(&pill, &RMF_SEQ_RANGE); + if (input != NULL) + *in = *input; + else + range_zero(in); + + size[1] = sizeof(struct lu_range); + ptlrpc_req_set_repsize(req, 2, size); + + if (seq->lcs_type == LUSTRE_SEQ_METADATA) { + req->rq_request_portal = (opc == SEQ_ALLOC_SUPER) ? + SEQ_CONTROLLER_PORTAL : SEQ_METADATA_PORTAL; + } else { + req->rq_request_portal = (opc == SEQ_ALLOC_SUPER) ? + SEQ_CONTROLLER_PORTAL : SEQ_DATA_PORTAL; + } + + mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + rc = ptlrpc_queue_wait(req); + mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + + if (rc) + GOTO(out_req, rc); + + out = req_capsule_server_get(&pill, &RMF_SEQ_RANGE); + *output = *out; + + if (!range_is_sane(output)) { + CERROR("%s: Invalid range received from server: " + DRANGE"\n", seq->lcs_name, PRANGE(output)); + GOTO(out_req, rc = -EINVAL); + } + + if (range_is_exhausted(output)) { + CERROR("%s: Range received from server is exhausted: " + DRANGE"]\n", seq->lcs_name, PRANGE(output)); + GOTO(out_req, rc = -EINVAL); + } + *in = *out; + + CDEBUG(D_INFO, "%s: Allocated %s-sequence "DRANGE"]\n", + seq->lcs_name, opcname, PRANGE(output)); + + EXIT; +out_req: + req_capsule_fini(&pill); + ptlrpc_req_finished(req); + return rc; +} + +/* Request sequence-controller node to allocate new super-sequence. */ +int seq_client_replay_super(struct lu_client_seq *seq, + struct lu_range *range, + const struct lu_env *env) +{ + int rc; + ENTRY; + + down(&seq->lcs_sem); + +#ifdef __KERNEL__ + if (seq->lcs_srv) { + LASSERT(env != NULL); + rc = seq_server_alloc_super(seq->lcs_srv, range, + &seq->lcs_space, env); + } else { +#endif + rc = seq_client_rpc(seq, range, &seq->lcs_space, + SEQ_ALLOC_SUPER, "super"); +#ifdef __KERNEL__ + } +#endif + up(&seq->lcs_sem); + RETURN(rc); +} + +/* Request sequence-controller node to allocate new super-sequence. */ +int seq_client_alloc_super(struct lu_client_seq *seq, + const struct lu_env *env) +{ + ENTRY; + RETURN(seq_client_replay_super(seq, NULL, env)); +} + +/* Request sequence-controller node to allocate new meta-sequence. */ +static int seq_client_alloc_meta(struct lu_client_seq *seq, + const struct lu_env *env) +{ + int rc; + ENTRY; + +#ifdef __KERNEL__ + if (seq->lcs_srv) { + LASSERT(env != NULL); + rc = seq_server_alloc_meta(seq->lcs_srv, NULL, + &seq->lcs_space, env); + } else { +#endif + rc = seq_client_rpc(seq, NULL, &seq->lcs_space, + SEQ_ALLOC_META, "meta"); +#ifdef __KERNEL__ + } +#endif + RETURN(rc); +} + +/* Allocate new sequence for client. */ +static int seq_client_alloc_seq(struct lu_client_seq *seq, seqno_t *seqnr) +{ + int rc; + ENTRY; + + LASSERT(range_is_sane(&seq->lcs_space)); + + if (range_is_exhausted(&seq->lcs_space)) { + rc = seq_client_alloc_meta(seq, NULL); + if (rc) { + CERROR("%s: Can't allocate new meta-sequence, " + "rc %d\n", seq->lcs_name, rc); + RETURN(rc); + } else { + CDEBUG(D_INFO, "%s: New range - "DRANGE"\n", + seq->lcs_name, PRANGE(&seq->lcs_space)); + } + } else { + rc = 0; + } + + LASSERT(!range_is_exhausted(&seq->lcs_space)); + *seqnr = seq->lcs_space.lr_start; + seq->lcs_space.lr_start += 1; + + CDEBUG(D_INFO, "%s: Allocated sequence ["LPX64"]\n", seq->lcs_name, + *seqnr); + + RETURN(rc); +} + +/* Allocate new fid on passed client @seq and save it to @fid. */ +int seq_client_alloc_fid(struct lu_client_seq *seq, struct lu_fid *fid) +{ + int rc; + ENTRY; + + LASSERT(seq != NULL); + LASSERT(fid != NULL); + + down(&seq->lcs_sem); + + if (fid_is_zero(&seq->lcs_fid) || + fid_oid(&seq->lcs_fid) >= seq->lcs_width) + { + seqno_t seqnr; + + rc = seq_client_alloc_seq(seq, &seqnr); + if (rc) { + CERROR("%s: Can't allocate new sequence, " + "rc %d\n", seq->lcs_name, rc); + up(&seq->lcs_sem); + RETURN(rc); + } + + CDEBUG(D_INFO, "%s: Switch to sequence " + "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr); + + seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID; + seq->lcs_fid.f_seq = seqnr; + seq->lcs_fid.f_ver = 0; + + /* + * Inform caller that sequence switch is performed to allow it + * to setup FLD for it. + */ + rc = 1; + } else { + /* Just bump last allocated fid and return to caller. */ + seq->lcs_fid.f_oid += 1; + rc = 0; + } + + *fid = seq->lcs_fid; + up(&seq->lcs_sem); + + CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid)); + RETURN(rc); +} +EXPORT_SYMBOL(seq_client_alloc_fid); + +/* + * Finish the current sequence due to disconnect. + * See mdc_import_event() + */ +void seq_client_flush(struct lu_client_seq *seq) +{ + LASSERT(seq != NULL); + down(&seq->lcs_sem); + fid_zero(&seq->lcs_fid); + range_zero(&seq->lcs_space); + up(&seq->lcs_sem); +} +EXPORT_SYMBOL(seq_client_flush); + +static void seq_client_proc_fini(struct lu_client_seq *seq); + +#ifdef LPROCFS +static int seq_client_proc_init(struct lu_client_seq *seq) +{ + int rc; + ENTRY; + + seq->lcs_proc_dir = lprocfs_register(seq->lcs_name, + seq_type_proc_dir, + NULL, NULL); + + if (IS_ERR(seq->lcs_proc_dir)) { + CERROR("%s: LProcFS failed in seq-init\n", + seq->lcs_name); + rc = PTR_ERR(seq->lcs_proc_dir); + RETURN(rc); + } + + rc = lprocfs_add_vars(seq->lcs_proc_dir, + seq_client_proc_list, seq); + if (rc) { + CERROR("%s: Can't init sequence manager " + "proc, rc %d\n", seq->lcs_name, rc); + GOTO(out_cleanup, rc); + } + + RETURN(0); + +out_cleanup: + seq_client_proc_fini(seq); + return rc; +} + +static void seq_client_proc_fini(struct lu_client_seq *seq) +{ + ENTRY; + if (seq->lcs_proc_dir) { + if (!IS_ERR(seq->lcs_proc_dir)) + lprocfs_remove(&seq->lcs_proc_dir); + seq->lcs_proc_dir = NULL; + } + EXIT; +} +#else +static int seq_client_proc_init(struct lu_client_seq *seq) +{ + return 0; +} + +static void seq_client_proc_fini(struct lu_client_seq *seq) +{ + return; +} +#endif + +int seq_client_init(struct lu_client_seq *seq, + struct obd_export *exp, + enum lu_cli_type type, + const char *prefix, + struct lu_server_seq *srv) +{ + int rc; + ENTRY; + + LASSERT(seq != NULL); + LASSERT(prefix != NULL); + + seq->lcs_exp = exp; + seq->lcs_srv = srv; + seq->lcs_type = type; + sema_init(&seq->lcs_sem, 1); + seq->lcs_width = LUSTRE_SEQ_MAX_WIDTH; + + /* Make sure that things are clear before work is started. */ + seq_client_flush(seq); + + if (exp == NULL) { + LASSERT(seq->lcs_srv != NULL); + } else { + LASSERT(seq->lcs_exp != NULL); + seq->lcs_exp = class_export_get(seq->lcs_exp); + } + + snprintf(seq->lcs_name, sizeof(seq->lcs_name), + "cli-%s", prefix); + + rc = seq_client_proc_init(seq); + if (rc) + seq_client_fini(seq); + RETURN(rc); +} +EXPORT_SYMBOL(seq_client_init); + +void seq_client_fini(struct lu_client_seq *seq) +{ + ENTRY; + + seq_client_proc_fini(seq); + + if (seq->lcs_exp != NULL) { + class_export_put(seq->lcs_exp); + seq->lcs_exp = NULL; + } + + seq->lcs_srv = NULL; + EXIT; +} +EXPORT_SYMBOL(seq_client_fini); diff --git a/lustre/fid/fid_store.c b/lustre/fid/fid_store.c new file mode 100644 index 0000000..b208e59 --- /dev/null +++ b/lustre/fid/fid_store.c @@ -0,0 +1,182 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fid/fid_store.c + * Lustre Sequence Manager + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FID + +#ifdef __KERNEL__ +# include +# include +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "fid_internal.h" + +#ifdef __KERNEL__ +enum { + SEQ_TXN_STORE_CREDITS = 20 +}; + +static struct lu_buf *seq_store_buf(struct seq_thread_info *info) +{ + struct lu_buf *buf; + + buf = &info->sti_buf; + buf->lb_buf = &info->sti_space; + buf->lb_len = sizeof(info->sti_space); + return buf; +} + +/* This function implies that caller takes care about locking. */ +int seq_store_write(struct lu_server_seq *seq, + const struct lu_env *env) +{ + struct dt_object *dt_obj = seq->lss_obj; + struct seq_thread_info *info; + struct dt_device *dt_dev; + struct thandle *th; + loff_t pos = 0; + int rc; + ENTRY; + + dt_dev = lu2dt_dev(seq->lss_obj->do_lu.lo_dev); + info = lu_context_key_get(&env->le_ctx, &seq_thread_key); + LASSERT(info != NULL); + + /* Stub here, will fix it later. */ + txn_param_init(&info->sti_txn, SEQ_TXN_STORE_CREDITS); + + th = dt_dev->dd_ops->dt_trans_start(env, dt_dev, &info->sti_txn); + if (!IS_ERR(th)) { + /* Store ranges in le format. */ + range_cpu_to_le(&info->sti_space, &seq->lss_space); + + rc = dt_obj->do_body_ops->dbo_write(env, dt_obj, + seq_store_buf(info), + &pos, th, BYPASS_CAPA); + if (rc == sizeof(info->sti_space)) { + CDEBUG(D_INFO, "%s: Space - "DRANGE"\n", + seq->lss_name, PRANGE(&seq->lss_space)); + rc = 0; + } else if (rc >= 0) { + rc = -EIO; + } + + dt_dev->dd_ops->dt_trans_stop(env, th); + } else { + rc = PTR_ERR(th); + } + + RETURN(rc); +} + +/* + * This function implies that caller takes care about locking or locking is not + * needed (init time). + */ +int seq_store_read(struct lu_server_seq *seq, + const struct lu_env *env) +{ + struct dt_object *dt_obj = seq->lss_obj; + struct seq_thread_info *info; + loff_t pos = 0; + int rc; + ENTRY; + + info = lu_context_key_get(&env->le_ctx, &seq_thread_key); + LASSERT(info != NULL); + + rc = dt_obj->do_body_ops->dbo_read(env, dt_obj, seq_store_buf(info), + &pos, BYPASS_CAPA); + + if (rc == sizeof(info->sti_space)) { + range_le_to_cpu(&seq->lss_space, &info->sti_space); + CDEBUG(D_INFO, "%s: Space - "DRANGE"\n", + seq->lss_name, PRANGE(&seq->lss_space)); + rc = 0; + } else if (rc == 0) { + rc = -ENODATA; + } else if (rc >= 0) { + CERROR("%s: Read only %d bytes of %d\n", seq->lss_name, + rc, sizeof(info->sti_space)); + rc = -EIO; + } + + RETURN(rc); +} + +int seq_store_init(struct lu_server_seq *seq, + const struct lu_env *env, + struct dt_device *dt) +{ + struct dt_object *dt_obj; + struct lu_fid fid; + const char *name; + int rc; + ENTRY; + + name = seq->lss_type == LUSTRE_SEQ_SERVER ? + LUSTRE_SEQ_SRV_NAME : LUSTRE_SEQ_CTL_NAME; + + dt_obj = dt_store_open(env, dt, name, &fid); + if (!IS_ERR(dt_obj)) { + seq->lss_obj = dt_obj; + rc = 0; + } else { + CERROR("%s: Can't find \"%s\" obj %d\n", + seq->lss_name, name, (int)PTR_ERR(dt_obj)); + rc = PTR_ERR(dt_obj); + } + + RETURN(rc); +} + +void seq_store_fini(struct lu_server_seq *seq, + const struct lu_env *env) +{ + ENTRY; + + if (seq->lss_obj != NULL) { + if (!IS_ERR(seq->lss_obj)) + lu_object_put(env, &seq->lss_obj->do_lu); + seq->lss_obj = NULL; + } + + EXIT; +} +#endif diff --git a/lustre/fid/lproc_fid.c b/lustre/fid/lproc_fid.c new file mode 100644 index 0000000..cf1896f --- /dev/null +++ b/lustre/fid/lproc_fid.c @@ -0,0 +1,343 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fid/lproc_fid.c + * Lustre Sequence Manager + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FID + +#ifdef __KERNEL__ +# include +# include +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "fid_internal.h" + +#ifdef LPROCFS +/* + * Note: this function is only used for testing, it is no safe for production + * use. + */ +static int +seq_proc_write_common(struct file *file, const char *buffer, + unsigned long count, void *data, + struct lu_range *range) +{ + struct lu_range tmp; + int rc; + ENTRY; + + LASSERT(range != NULL); + + rc = sscanf(buffer, "[%Lx - %Lx]\n", &tmp.lr_start, &tmp.lr_end); + if (rc != 2 || !range_is_sane(&tmp) || range_is_zero(&tmp)) + RETURN(-EINVAL); + *range = tmp; + RETURN(0); +} + +static int +seq_proc_read_common(char *page, char **start, off_t off, + int count, int *eof, void *data, + struct lu_range *range) +{ + int rc; + ENTRY; + + *eof = 1; + rc = snprintf(page, count, "[%Lx - %Lx]\n", + PRANGE(range)); + RETURN(rc); +} + +/* + * Server side procfs stuff. + */ +static int +seq_server_proc_write_space(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct lu_server_seq *seq = (struct lu_server_seq *)data; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lss_sem); + rc = seq_proc_write_common(file, buffer, count, + data, &seq->lss_space); + if (rc == 0) { + CDEBUG(D_INFO, "%s: Space: "DRANGE"\n", + seq->lss_name, PRANGE(&seq->lss_space)); + } + + up(&seq->lss_sem); + + RETURN(count); +} + +static int +seq_server_proc_read_space(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_server_seq *seq = (struct lu_server_seq *)data; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lss_sem); + rc = seq_proc_read_common(page, start, off, count, eof, + data, &seq->lss_space); + up(&seq->lss_sem); + + RETURN(rc); +} + +static int +seq_server_proc_read_server(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_server_seq *seq = (struct lu_server_seq *)data; + struct client_obd *cli; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + *eof = 1; + if (seq->lss_cli) { + if (seq->lss_cli->lcs_exp != NULL) { + cli = &seq->lss_cli->lcs_exp->exp_obd->u.cli; + rc = snprintf(page, count, "%s\n", + cli->cl_target_uuid.uuid); + } else { + rc = snprintf(page, count, "%s\n", + seq->lss_cli->lcs_srv->lss_name); + } + } else { + rc = snprintf(page, count, "\n"); + } + + RETURN(rc); +} + +static int +seq_server_proc_write_width(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct lu_server_seq *seq = (struct lu_server_seq *)data; + int rc, val; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lss_sem); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + RETURN(rc); + + seq->lss_width = val; + + if (rc == 0) { + CDEBUG(D_INFO, "%s: Width: "LPU64"\n", + seq->lss_name, seq->lss_width); + } + + up(&seq->lss_sem); + + RETURN(count); +} + +static int +seq_server_proc_read_width(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_server_seq *seq = (struct lu_server_seq *)data; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lss_sem); + rc = snprintf(page, count, LPU64"\n", seq->lss_width); + up(&seq->lss_sem); + + RETURN(rc); +} + +/* Client side procfs stuff */ +static int +seq_client_proc_write_space(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct lu_client_seq *seq = (struct lu_client_seq *)data; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lcs_sem); + rc = seq_proc_write_common(file, buffer, count, + data, &seq->lcs_space); + + if (rc == 0) { + CDEBUG(D_INFO, "%s: Space: "DRANGE"\n", + seq->lcs_name, PRANGE(&seq->lcs_space)); + } + + up(&seq->lcs_sem); + + RETURN(count); +} + +static int +seq_client_proc_read_space(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_client_seq *seq = (struct lu_client_seq *)data; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lcs_sem); + rc = seq_proc_read_common(page, start, off, count, eof, + data, &seq->lcs_space); + up(&seq->lcs_sem); + + RETURN(rc); +} + +static int +seq_client_proc_write_width(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct lu_client_seq *seq = (struct lu_client_seq *)data; + int rc, val; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lcs_sem); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + RETURN(rc); + + if (val <= LUSTRE_SEQ_MAX_WIDTH && val > 0) { + seq->lcs_width = val; + + if (rc == 0) { + CDEBUG(D_INFO, "%s: Sequence size: "LPU64"\n", + seq->lcs_name, seq->lcs_width); + } + } + + up(&seq->lcs_sem); + + RETURN(count); +} + +static int +seq_client_proc_read_width(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_client_seq *seq = (struct lu_client_seq *)data; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lcs_sem); + rc = snprintf(page, count, LPU64"\n", seq->lcs_width); + up(&seq->lcs_sem); + + RETURN(rc); +} + +static int +seq_client_proc_read_fid(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_client_seq *seq = (struct lu_client_seq *)data; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + down(&seq->lcs_sem); + rc = snprintf(page, count, DFID"\n", PFID(&seq->lcs_fid)); + up(&seq->lcs_sem); + + RETURN(rc); +} + +static int +seq_client_proc_read_server(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_client_seq *seq = (struct lu_client_seq *)data; + struct client_obd *cli; + int rc; + ENTRY; + + LASSERT(seq != NULL); + + if (seq->lcs_exp != NULL) { + cli = &seq->lcs_exp->exp_obd->u.cli; + rc = snprintf(page, count, "%s\n", cli->cl_target_uuid.uuid); + } else { + rc = snprintf(page, count, "%s\n", seq->lcs_srv->lss_name); + } + RETURN(rc); +} + +struct lprocfs_vars seq_server_proc_list[] = { + { "space", seq_server_proc_read_space, seq_server_proc_write_space, NULL }, + { "width", seq_server_proc_read_width, seq_server_proc_write_width, NULL }, + { "server", seq_server_proc_read_server, NULL, NULL }, + { NULL }}; + +struct lprocfs_vars seq_client_proc_list[] = { + { "space", seq_client_proc_read_space, seq_client_proc_write_space, NULL }, + { "width", seq_client_proc_read_width, seq_client_proc_write_width, NULL }, + { "server", seq_client_proc_read_server, NULL, NULL }, + { "fid", seq_client_proc_read_fid, NULL, NULL }, + { NULL }}; +#endif diff --git a/lustre/fld/.cvsignore b/lustre/fld/.cvsignore new file mode 100644 index 0000000..5d26f00 --- /dev/null +++ b/lustre/fld/.cvsignore @@ -0,0 +1,15 @@ +.Xrefs +config.log +config.status +configure +Makefile +.deps +TAGS +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lustre/fld/Makefile.in b/lustre/fld/Makefile.in new file mode 100644 index 0000000..2887277 --- /dev/null +++ b/lustre/fld/Makefile.in @@ -0,0 +1,6 @@ +MODULES := fld +fld-objs := fld_handler.o fld_request.o fld_cache.o fld_index.o lproc_fld.o + +EXTRA_PRE_CFLAGS := -I@LUSTRE@ -I@LUSTRE@/ldiskfs + +@INCLUDE_RULES@ diff --git a/lustre/fld/autoMakefile.am b/lustre/fld/autoMakefile.am new file mode 100644 index 0000000..bb6c212 --- /dev/null +++ b/lustre/fld/autoMakefile.am @@ -0,0 +1,18 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if LIBLUSTRE +noinst_LIBRARIES = libfld.a +libfld_a_SOURCES = fld_handler.c fld_request.c fld_cache.c lproc_fld.c fld_internal.h +libfld_a_CPPFLAGS = $(LLCPPFLAGS) +libfld_a_CFLAGS = $(LLCFLAGS) +endif + +if MODULES +modulefs_DATA = fld$(KMODEXT) +endif + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ +DIST_SOURCES := $(fld-objs:%.o=%.c) fld_internal.h diff --git a/lustre/fld/fld_cache.c b/lustre/fld/fld_cache.c new file mode 100644 index 0000000..192d81d --- /dev/null +++ b/lustre/fld/fld_cache.c @@ -0,0 +1,442 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fld/fld_cache.c + * FLD (Fids Location Database) + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FLD + +#ifdef __KERNEL__ +# include +# include +# include +# include +#else /* __KERNEL__ */ +# include +# include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "fld_internal.h" + +#ifdef __KERNEL__ +static inline __u32 fld_cache_hash(seqno_t seq) +{ + return (__u32)seq; +} + +void fld_cache_flush(struct fld_cache *cache) +{ + struct fld_cache_entry *flde; + struct hlist_head *bucket; + struct hlist_node *scan; + struct hlist_node *next; + int i; + ENTRY; + + /* Free all cache entries. */ + spin_lock(&cache->fci_lock); + for (i = 0; i < cache->fci_hash_size; i++) { + bucket = cache->fci_hash_table + i; + hlist_for_each_entry_safe(flde, scan, next, bucket, fce_list) { + hlist_del_init(&flde->fce_list); + list_del_init(&flde->fce_lru); + cache->fci_cache_count--; + OBD_FREE_PTR(flde); + } + } + spin_unlock(&cache->fci_lock); + EXIT; +} + +struct fld_cache *fld_cache_init(const char *name, int hash_size, + int cache_size, int cache_threshold) +{ + struct fld_cache *cache; + int i; + ENTRY; + + LASSERT(name != NULL); + LASSERT(IS_PO2(hash_size)); + LASSERT(cache_threshold < cache_size); + + OBD_ALLOC_PTR(cache); + if (cache == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + INIT_LIST_HEAD(&cache->fci_lru); + + cache->fci_cache_count = 0; + spin_lock_init(&cache->fci_lock); + + strncpy(cache->fci_name, name, + sizeof(cache->fci_name)); + + cache->fci_hash_size = hash_size; + cache->fci_cache_size = cache_size; + cache->fci_threshold = cache_threshold; + + /* Init fld cache info. */ + cache->fci_hash_mask = hash_size - 1; + OBD_ALLOC(cache->fci_hash_table, + hash_size * sizeof(*cache->fci_hash_table)); + if (cache->fci_hash_table == NULL) { + OBD_FREE_PTR(cache); + RETURN(ERR_PTR(-ENOMEM)); + } + + for (i = 0; i < hash_size; i++) + INIT_HLIST_HEAD(&cache->fci_hash_table[i]); + memset(&cache->fci_stat, 0, sizeof(cache->fci_stat)); + + CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n", + cache->fci_name, cache_size, cache_threshold); + + RETURN(cache); +} +EXPORT_SYMBOL(fld_cache_init); + +void fld_cache_fini(struct fld_cache *cache) +{ + __u64 pct; + ENTRY; + + LASSERT(cache != NULL); + fld_cache_flush(cache); + + if (cache->fci_stat.fst_count > 0) { + pct = cache->fci_stat.fst_cache * 100; + do_div(pct, cache->fci_stat.fst_count); + } else { + pct = 0; + } + + printk("FLD cache statistics (%s):\n", cache->fci_name); + printk(" Total reqs: "LPU64"\n", cache->fci_stat.fst_count); + printk(" Cache reqs: "LPU64"\n", cache->fci_stat.fst_cache); + printk(" Saved RPCs: "LPU64"\n", cache->fci_stat.fst_inflight); + printk(" Cache hits: "LPU64"%%\n", pct); + + OBD_FREE(cache->fci_hash_table, cache->fci_hash_size * + sizeof(*cache->fci_hash_table)); + OBD_FREE_PTR(cache); + + EXIT; +} +EXPORT_SYMBOL(fld_cache_fini); + +static inline struct hlist_head * +fld_cache_bucket(struct fld_cache *cache, seqno_t seq) +{ + return cache->fci_hash_table + (fld_cache_hash(seq) & + cache->fci_hash_mask); +} + +/* + * Check if cache needs to be shrinked. If so - do it. Tries to keep all + * collision lists well balanced. That is, check all of them and remove one + * entry in list and so on until cache is shrinked enough. + */ +static int fld_cache_shrink(struct fld_cache *cache) +{ + struct fld_cache_entry *flde; + struct list_head *curr; + int num = 0; + ENTRY; + + LASSERT(cache != NULL); + + if (cache->fci_cache_count < cache->fci_cache_size) + RETURN(0); + + curr = cache->fci_lru.prev; + + while (cache->fci_cache_count + cache->fci_threshold > + cache->fci_cache_size && curr != &cache->fci_lru) + { + flde = list_entry(curr, struct fld_cache_entry, fce_lru); + curr = curr->prev; + + /* keep inflights */ + if (flde->fce_inflight) + continue; + + hlist_del_init(&flde->fce_list); + list_del_init(&flde->fce_lru); + cache->fci_cache_count--; + OBD_FREE_PTR(flde); + num++; + } + + CDEBUG(D_INFO, "%s: FLD cache - Shrinked by " + "%d entries\n", cache->fci_name, num); + + RETURN(0); +} + +int fld_cache_insert_inflight(struct fld_cache *cache, seqno_t seq) +{ + struct fld_cache_entry *flde, *fldt; + struct hlist_head *bucket; + struct hlist_node *scan; + ENTRY; + + spin_lock(&cache->fci_lock); + + /* Check if cache already has the entry with such a seq. */ + bucket = fld_cache_bucket(cache, seq); + hlist_for_each_entry(fldt, scan, bucket, fce_list) { + if (fldt->fce_seq == seq) { + spin_unlock(&cache->fci_lock); + RETURN(-EEXIST); + } + } + spin_unlock(&cache->fci_lock); + + /* Allocate new entry. */ + OBD_ALLOC_PTR(flde); + if (!flde) + RETURN(-ENOMEM); + + /* + * Check if cache has the entry with such a seq again. It could be added + * while we were allocating new entry. + */ + spin_lock(&cache->fci_lock); + hlist_for_each_entry(fldt, scan, bucket, fce_list) { + if (fldt->fce_seq == seq) { + spin_unlock(&cache->fci_lock); + OBD_FREE_PTR(flde); + RETURN(0); + } + } + + /* Add new entry to cache and lru list. */ + INIT_HLIST_NODE(&flde->fce_list); + flde->fce_inflight = 1; + flde->fce_invalid = 1; + cfs_waitq_init(&flde->fce_waitq); + flde->fce_seq = seq; + + hlist_add_head(&flde->fce_list, bucket); + list_add(&flde->fce_lru, &cache->fci_lru); + cache->fci_cache_count++; + + spin_unlock(&cache->fci_lock); + + RETURN(0); +} +EXPORT_SYMBOL(fld_cache_insert_inflight); + +int fld_cache_insert(struct fld_cache *cache, + seqno_t seq, mdsno_t mds) +{ + struct fld_cache_entry *flde, *fldt; + struct hlist_head *bucket; + struct hlist_node *scan; + int rc; + ENTRY; + + spin_lock(&cache->fci_lock); + + /* Check if need to shrink cache. */ + rc = fld_cache_shrink(cache); + if (rc) { + spin_unlock(&cache->fci_lock); + RETURN(rc); + } + + /* Check if cache already has the entry with such a seq. */ + bucket = fld_cache_bucket(cache, seq); + hlist_for_each_entry(fldt, scan, bucket, fce_list) { + if (fldt->fce_seq == seq) { + if (fldt->fce_inflight) { + /* set mds for inflight entry */ + fldt->fce_mds = mds; + fldt->fce_inflight = 0; + fldt->fce_invalid = 0; + cfs_waitq_signal(&fldt->fce_waitq); + rc = 0; + } else + rc = -EEXIST; + spin_unlock(&cache->fci_lock); + RETURN(rc); + } + } + spin_unlock(&cache->fci_lock); + + /* Allocate new entry. */ + OBD_ALLOC_PTR(flde); + if (!flde) + RETURN(-ENOMEM); + + /* + * Check if cache has the entry with such a seq again. It could be added + * while we were allocating new entry. + */ + spin_lock(&cache->fci_lock); + hlist_for_each_entry(fldt, scan, bucket, fce_list) { + if (fldt->fce_seq == seq) { + spin_unlock(&cache->fci_lock); + OBD_FREE_PTR(flde); + RETURN(0); + } + } + + /* Add new entry to cache and lru list. */ + INIT_HLIST_NODE(&flde->fce_list); + flde->fce_mds = mds; + flde->fce_seq = seq; + flde->fce_inflight = 0; + flde->fce_invalid = 0; + + hlist_add_head(&flde->fce_list, bucket); + list_add(&flde->fce_lru, &cache->fci_lru); + cache->fci_cache_count++; + + spin_unlock(&cache->fci_lock); + + RETURN(0); +} +EXPORT_SYMBOL(fld_cache_insert); + +void fld_cache_delete(struct fld_cache *cache, seqno_t seq) +{ + struct fld_cache_entry *flde; + struct hlist_node *scan, *n; + struct hlist_head *bucket; + ENTRY; + + bucket = fld_cache_bucket(cache, seq); + + spin_lock(&cache->fci_lock); + hlist_for_each_entry_safe(flde, scan, n, bucket, fce_list) { + if (flde->fce_seq == seq) { + hlist_del_init(&flde->fce_list); + list_del_init(&flde->fce_lru); + if (flde->fce_inflight) { + flde->fce_inflight = 0; + flde->fce_invalid = 1; + cfs_waitq_signal(&flde->fce_waitq); + } + cache->fci_cache_count--; + OBD_FREE_PTR(flde); + GOTO(out_unlock, 0); + } + } + + EXIT; +out_unlock: + spin_unlock(&cache->fci_lock); +} +EXPORT_SYMBOL(fld_cache_delete); + +static int fld_check_inflight(struct fld_cache_entry *flde) +{ + return (flde->fce_inflight); +} + +int fld_cache_lookup(struct fld_cache *cache, + seqno_t seq, mdsno_t *mds) +{ + struct fld_cache_entry *flde; + struct hlist_node *scan, *n; + struct hlist_head *bucket; + ENTRY; + + bucket = fld_cache_bucket(cache, seq); + + spin_lock(&cache->fci_lock); + cache->fci_stat.fst_count++; + hlist_for_each_entry_safe(flde, scan, n, bucket, fce_list) { + if (flde->fce_seq == seq) { + if (flde->fce_inflight) { + /* lookup RPC is inflight need to wait */ + struct l_wait_info lwi; + spin_unlock(&cache->fci_lock); + lwi = LWI_TIMEOUT(0, NULL, NULL); + l_wait_event(flde->fce_waitq, + !fld_check_inflight(flde), &lwi); + LASSERT(!flde->fce_inflight); + if (flde->fce_invalid) + RETURN(-ENOENT); + + *mds = flde->fce_mds; + cache->fci_stat.fst_inflight++; + } else { + LASSERT(!flde->fce_invalid); + *mds = flde->fce_mds; + list_del(&flde->fce_lru); + list_add(&flde->fce_lru, &cache->fci_lru); + cache->fci_stat.fst_cache++; + spin_unlock(&cache->fci_lock); + } + RETURN(0); + } + } + spin_unlock(&cache->fci_lock); + RETURN(-ENOENT); +} +EXPORT_SYMBOL(fld_cache_lookup); +#else +int fld_cache_insert_inflight(struct fld_cache *cache, seqno_t seq) +{ + return -ENOTSUPP; +} +EXPORT_SYMBOL(fld_cache_insert_inflight); + +int fld_cache_insert(struct fld_cache *cache, + seqno_t seq, mdsno_t mds) +{ + return -ENOTSUPP; +} +EXPORT_SYMBOL(fld_cache_insert); + +void fld_cache_delete(struct fld_cache *cache, + seqno_t seq) +{ + return; +} +EXPORT_SYMBOL(fld_cache_delete); + +int fld_cache_lookup(struct fld_cache *cache, + seqno_t seq, mdsno_t *mds) +{ + return -ENOTSUPP; +} +EXPORT_SYMBOL(fld_cache_lookup); +#endif + diff --git a/lustre/fld/fld_handler.c b/lustre/fld/fld_handler.c new file mode 100644 index 0000000..d34ec0c --- /dev/null +++ b/lustre/fld/fld_handler.c @@ -0,0 +1,428 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fld/fld_handler.c + * FLD (Fids Location Database) + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * WangDi + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FLD + +#ifdef __KERNEL__ +# include +# include +# include +# include +#else /* __KERNEL__ */ +# include +# include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include "fld_internal.h" + +#ifdef __KERNEL__ +static void *fld_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct fld_thread_info *info; + ENTRY; + + OBD_ALLOC_PTR(info); + if (info == NULL) + info = ERR_PTR(-ENOMEM); + RETURN(info); +} + +static void fld_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct fld_thread_info *info = data; + ENTRY; + OBD_FREE_PTR(info); + EXIT; +} + +struct lu_context_key fld_thread_key = { + .lct_tags = LCT_MD_THREAD|LCT_DT_THREAD, + .lct_init = fld_key_init, + .lct_fini = fld_key_fini +}; + +cfs_proc_dir_entry_t *fld_type_proc_dir = NULL; + +static int __init fld_mod_init(void) +{ + fld_type_proc_dir = lprocfs_register(LUSTRE_FLD_NAME, + proc_lustre_root, + NULL, NULL); + if (IS_ERR(fld_type_proc_dir)) + return PTR_ERR(fld_type_proc_dir); + + LU_CONTEXT_KEY_INIT(&fld_thread_key); + lu_context_key_register(&fld_thread_key); + return 0; +} + +static void __exit fld_mod_exit(void) +{ + lu_context_key_degister(&fld_thread_key); + if (fld_type_proc_dir != NULL && !IS_ERR(fld_type_proc_dir)) { + lprocfs_remove(&fld_type_proc_dir); + fld_type_proc_dir = NULL; + } +} + +/* Insert index entry and update cache. */ +int fld_server_create(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t mds) +{ + int rc; + ENTRY; + + rc = fld_index_create(fld, env, seq, mds); + + if (rc == 0) { + /* + * Do not return result of calling fld_cache_insert() + * here. First of all because it may return -EEXISTS. Another + * reason is that, we do not want to stop proceeding even after + * cache errors. + */ + fld_cache_insert(fld->lsf_cache, seq, mds); + } + + RETURN(rc); +} +EXPORT_SYMBOL(fld_server_create); + +/* Delete index entry. */ +int fld_server_delete(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq) +{ + int rc; + ENTRY; + + fld_cache_delete(fld->lsf_cache, seq); + rc = fld_index_delete(fld, env, seq); + + RETURN(rc); +} +EXPORT_SYMBOL(fld_server_delete); + +/* Lookup mds by seq. */ +int fld_server_lookup(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t *mds) +{ + int rc; + ENTRY; + + /* Lookup it in the cache. */ + rc = fld_cache_lookup(fld->lsf_cache, seq, mds); + if (rc == 0) + RETURN(0); + + rc = fld_index_lookup(fld, env, seq, mds); + if (rc == 0) { + /* + * Do not return error here as well. See previous comment in + * same situation in function fld_server_create(). + */ + fld_cache_insert(fld->lsf_cache, seq, *mds); + } + RETURN(rc); +} +EXPORT_SYMBOL(fld_server_lookup); + +static int fld_server_handle(struct lu_server_fld *fld, + const struct lu_env *env, + __u32 opc, struct md_fld *mf, + struct fld_thread_info *info) +{ + int rc; + ENTRY; + + switch (opc) { + case FLD_CREATE: + rc = fld_server_create(fld, env, + mf->mf_seq, mf->mf_mds); + + /* Do not return -EEXIST error for resent case */ + if ((info->fti_flags & MSG_RESENT) && rc == -EEXIST) + rc = 0; + break; + case FLD_DELETE: + rc = fld_server_delete(fld, env, mf->mf_seq); + + /* Do not return -ENOENT error for resent case */ + if ((info->fti_flags & MSG_RESENT) && rc == -ENOENT) + rc = 0; + break; + case FLD_LOOKUP: + rc = fld_server_lookup(fld, env, + mf->mf_seq, &mf->mf_mds); + break; + default: + rc = -EINVAL; + break; + } + + CDEBUG(D_INFO, "%s: FLD req handle: error %d (opc: %d, seq: " + LPX64", mds: "LPU64")\n", fld->lsf_name, rc, opc, + mf->mf_seq, mf->mf_mds); + + RETURN(rc); + +} + +static int fld_req_handle(struct ptlrpc_request *req, + struct fld_thread_info *info) +{ + struct lu_site *site; + struct md_fld *in; + struct md_fld *out; + int rc; + __u32 *opc; + ENTRY; + + site = req->rq_export->exp_obd->obd_lu_dev->ld_site; + + rc = req_capsule_pack(&info->fti_pill); + if (rc) + RETURN(err_serious(rc)); + + opc = req_capsule_client_get(&info->fti_pill, &RMF_FLD_OPC); + if (opc != NULL) { + in = req_capsule_client_get(&info->fti_pill, &RMF_FLD_MDFLD); + if (in == NULL) + RETURN(err_serious(-EPROTO)); + out = req_capsule_server_get(&info->fti_pill, &RMF_FLD_MDFLD); + if (out == NULL) + RETURN(err_serious(-EPROTO)); + *out = *in; + + rc = fld_server_handle(site->ls_server_fld, + req->rq_svc_thread->t_env, + *opc, out, info); + } else + rc = err_serious(-EPROTO); + + RETURN(rc); +} + +static void fld_thread_info_init(struct ptlrpc_request *req, + struct fld_thread_info *info) +{ + int i; + + info->fti_flags = lustre_msg_get_flags(req->rq_reqmsg); + + /* Mark rep buffer as req-layout stuff expects. */ + for (i = 0; i < ARRAY_SIZE(info->fti_rep_buf_size); i++) + info->fti_rep_buf_size[i] = -1; + + /* Init request capsule. */ + req_capsule_init(&info->fti_pill, req, RCL_SERVER, + info->fti_rep_buf_size); + + req_capsule_set(&info->fti_pill, &RQF_FLD_QUERY); +} + +static void fld_thread_info_fini(struct fld_thread_info *info) +{ + req_capsule_fini(&info->fti_pill); +} + +static int fld_handle(struct ptlrpc_request *req) +{ + struct fld_thread_info *info; + const struct lu_env *env; + int rc; + + env = req->rq_svc_thread->t_env; + LASSERT(env != NULL); + + info = lu_context_key_get(&env->le_ctx, &fld_thread_key); + LASSERT(info != NULL); + + fld_thread_info_init(req, info); + rc = fld_req_handle(req, info); + fld_thread_info_fini(info); + + return rc; +} + +/* + * Entry point for handling FLD RPCs called from MDT. + */ +int fld_query(struct com_thread_info *info) +{ + return fld_handle(info->cti_pill.rc_req); +} +EXPORT_SYMBOL(fld_query); + +/* + * Returns true, if fid is local to this server node. + * + * WARNING: this function is *not* guaranteed to return false if fid is + * remote: it makes an educated conservative guess only. + * + * fid_is_local() is supposed to be used in assertion checks only. + */ +int fid_is_local(struct lu_site *site, const struct lu_fid *fid) +{ + int result; + + result = 1; /* conservatively assume fid is local */ + if (site->ls_client_fld != NULL) { + mdsno_t mds; + int rc; + + rc = fld_cache_lookup(site->ls_client_fld->lcf_cache, + fid_seq(fid), &mds); + if (rc == 0) + result = (mds == site->ls_node_id); + } + return result; +} +EXPORT_SYMBOL(fid_is_local); + +static void fld_server_proc_fini(struct lu_server_fld *fld); + +#ifdef LPROCFS +static int fld_server_proc_init(struct lu_server_fld *fld) +{ + int rc = 0; + ENTRY; + + fld->lsf_proc_dir = lprocfs_register(fld->lsf_name, + fld_type_proc_dir, + fld_server_proc_list, fld); + if (IS_ERR(fld->lsf_proc_dir)) { + rc = PTR_ERR(fld->lsf_proc_dir); + RETURN(rc); + } + + RETURN(rc); +} + +static void fld_server_proc_fini(struct lu_server_fld *fld) +{ + ENTRY; + if (fld->lsf_proc_dir != NULL) { + if (!IS_ERR(fld->lsf_proc_dir)) + lprocfs_remove(&fld->lsf_proc_dir); + fld->lsf_proc_dir = NULL; + } + EXIT; +} +#else +static int fld_server_proc_init(struct lu_server_fld *fld) +{ + return 0; +} + +static void fld_server_proc_fini(struct lu_server_fld *fld) +{ + return; +} +#endif + +int fld_server_init(struct lu_server_fld *fld, struct dt_device *dt, + const char *prefix, const struct lu_env *env) +{ + int cache_size, cache_threshold; + int rc; + ENTRY; + + snprintf(fld->lsf_name, sizeof(fld->lsf_name), + "srv-%s", prefix); + + cache_size = FLD_SERVER_CACHE_SIZE / + sizeof(struct fld_cache_entry); + + cache_threshold = cache_size * + FLD_SERVER_CACHE_THRESHOLD / 100; + + fld->lsf_cache = fld_cache_init(fld->lsf_name, + FLD_SERVER_HTABLE_SIZE, + cache_size, cache_threshold); + if (IS_ERR(fld->lsf_cache)) { + rc = PTR_ERR(fld->lsf_cache); + fld->lsf_cache = NULL; + GOTO(out, rc); + } + + rc = fld_index_init(fld, env, dt); + if (rc) + GOTO(out, rc); + + rc = fld_server_proc_init(fld); + if (rc) + GOTO(out, rc); + + EXIT; +out: + if (rc) + fld_server_fini(fld, env); + return rc; +} +EXPORT_SYMBOL(fld_server_init); + +void fld_server_fini(struct lu_server_fld *fld, + const struct lu_env *env) +{ + ENTRY; + + fld_server_proc_fini(fld); + fld_index_fini(fld, env); + + if (fld->lsf_cache != NULL) { + if (!IS_ERR(fld->lsf_cache)) + fld_cache_fini(fld->lsf_cache); + fld->lsf_cache = NULL; + } + + EXIT; +} +EXPORT_SYMBOL(fld_server_fini); + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre FLD"); +MODULE_LICENSE("GPL"); + +cfs_module(mdd, "0.1.0", fld_mod_init, fld_mod_exit); +#endif diff --git a/lustre/fld/fld_index.c b/lustre/fld/fld_index.c new file mode 100644 index 0000000..4d33514 --- /dev/null +++ b/lustre/fld/fld_index.c @@ -0,0 +1,206 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * fld/fld_index.c + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: WangDi + * Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FLD + +#ifdef __KERNEL__ +# include +# include +# include +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "fld_internal.h" + +const char fld_index_name[] = "fld"; + +static const struct dt_index_features fld_index_features = { + .dif_flags = DT_IND_UPDATE, + .dif_keysize_min = sizeof(seqno_t), + .dif_keysize_max = sizeof(seqno_t), + .dif_recsize_min = sizeof(mdsno_t), + .dif_recsize_max = sizeof(mdsno_t) +}; + +/* + * number of blocks to reserve for particular operations. Should be function of + * ... something. Stub for now. + */ +enum { + FLD_TXN_INDEX_INSERT_CREDITS = 20, + FLD_TXN_INDEX_DELETE_CREDITS = 20, +}; + +extern struct lu_context_key fld_thread_key; + +static struct dt_key *fld_key(const struct lu_env *env, + const seqno_t seq) +{ + struct fld_thread_info *info; + ENTRY; + + info = lu_context_key_get(&env->le_ctx, &fld_thread_key); + LASSERT(info != NULL); + + info->fti_key = cpu_to_be64(seq); + RETURN((void *)&info->fti_key); +} + +static struct dt_rec *fld_rec(const struct lu_env *env, + const mdsno_t mds) +{ + struct fld_thread_info *info; + ENTRY; + + info = lu_context_key_get(&env->le_ctx, &fld_thread_key); + LASSERT(info != NULL); + + info->fti_rec = cpu_to_be64(mds); + RETURN((void *)&info->fti_rec); +} + +int fld_index_create(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t mds) +{ + struct dt_object *dt_obj = fld->lsf_obj; + struct dt_device *dt_dev; + struct txn_param txn; + struct thandle *th; + int rc; + ENTRY; + + dt_dev = lu2dt_dev(fld->lsf_obj->do_lu.lo_dev); + + /* stub here, will fix it later */ + txn_param_init(&txn, FLD_TXN_INDEX_INSERT_CREDITS); + + th = dt_dev->dd_ops->dt_trans_start(env, dt_dev, &txn); + if (!IS_ERR(th)) { + rc = dt_obj->do_index_ops->dio_insert(env, dt_obj, + fld_rec(env, mds), + fld_key(env, seq), + th, BYPASS_CAPA); + dt_dev->dd_ops->dt_trans_stop(env, th); + } else + rc = PTR_ERR(th); + RETURN(rc); +} + +int fld_index_delete(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq) +{ + struct dt_object *dt_obj = fld->lsf_obj; + struct dt_device *dt_dev; + struct txn_param txn; + struct thandle *th; + int rc; + ENTRY; + + dt_dev = lu2dt_dev(fld->lsf_obj->do_lu.lo_dev); + txn_param_init(&txn, FLD_TXN_INDEX_DELETE_CREDITS); + th = dt_dev->dd_ops->dt_trans_start(env, dt_dev, &txn); + if (!IS_ERR(th)) { + rc = dt_obj->do_index_ops->dio_delete(env, dt_obj, + fld_key(env, seq), th, + BYPASS_CAPA); + dt_dev->dd_ops->dt_trans_stop(env, th); + } else + rc = PTR_ERR(th); + RETURN(rc); +} + +int fld_index_lookup(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t *mds) +{ + struct dt_object *dt_obj = fld->lsf_obj; + struct dt_rec *rec = fld_rec(env, 0); + int rc; + ENTRY; + + rc = dt_obj->do_index_ops->dio_lookup(env, dt_obj, rec, + fld_key(env, seq), BYPASS_CAPA); + if (rc == 0) + *mds = be64_to_cpu(*(__u64 *)rec); + RETURN(rc); +} + +int fld_index_init(struct lu_server_fld *fld, + const struct lu_env *env, + struct dt_device *dt) +{ + struct dt_object *dt_obj; + struct lu_fid fid; + int rc; + ENTRY; + + dt_obj = dt_store_open(env, dt, fld_index_name, &fid); + if (!IS_ERR(dt_obj)) { + fld->lsf_obj = dt_obj; + rc = dt_obj->do_ops->do_index_try(env, dt_obj, + &fld_index_features); + if (rc == 0) + LASSERT(dt_obj->do_index_ops != NULL); + else + CERROR("%s: File \"%s\" is not an index!\n", + fld->lsf_name, fld_index_name); + } else { + CERROR("%s: Can't find \"%s\" obj %d\n", + fld->lsf_name, fld_index_name, (int)PTR_ERR(dt_obj)); + rc = PTR_ERR(dt_obj); + } + + RETURN(rc); +} + +void fld_index_fini(struct lu_server_fld *fld, + const struct lu_env *env) +{ + ENTRY; + if (fld->lsf_obj != NULL) { + if (!IS_ERR(fld->lsf_obj)) + lu_object_put(env, &fld->lsf_obj->do_lu); + fld->lsf_obj = NULL; + } + EXIT; +} diff --git a/lustre/fld/fld_internal.h b/lustre/fld/fld_internal.h new file mode 100644 index 0000000..421b5c9 --- /dev/null +++ b/lustre/fld/fld_internal.h @@ -0,0 +1,125 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * fld/fld_internal.h + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * Tom WangDi + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef __FLD_INTERNAL_H +#define __FLD_INTERNAL_H + +#include +#include + +#include +#include + +#include +#include +#include + +enum fld_op { + FLD_CREATE = 0, + FLD_DELETE = 1, + FLD_LOOKUP = 2 +}; + +enum { + /* 4M of FLD cache will not hurt client a lot. */ + FLD_SERVER_CACHE_SIZE = (4 * 0x100000), + + /* 1M of FLD cache will not hurt client a lot. */ + FLD_CLIENT_CACHE_SIZE = (1 * 0x100000) +}; + +enum { + /* Cache threshold is 10 percent of size. */ + FLD_SERVER_CACHE_THRESHOLD = 10, + + /* Cache threshold is 10 percent of size. */ + FLD_CLIENT_CACHE_THRESHOLD = 10 +}; + +enum { + /* + * One page is used for hashtable. That is sizeof(struct hlist_head) * + * 1024. + */ + FLD_CLIENT_HTABLE_SIZE = (1024 * 1), + + /* + * Here 4 pages are used for hashtable of server cache. This is is + * because cache it self is 4 times bugger. + */ + FLD_SERVER_HTABLE_SIZE = (1024 * 4) +}; + +extern struct lu_fld_hash fld_hash[]; + +#ifdef __KERNEL__ +struct fld_thread_info { + struct req_capsule fti_pill; + int fti_rep_buf_size[REQ_MAX_FIELD_NR]; + __u64 fti_key; + __u64 fti_rec; + __u32 fti_flags; +}; + +int fld_index_init(struct lu_server_fld *fld, + const struct lu_env *env, + struct dt_device *dt); + +void fld_index_fini(struct lu_server_fld *fld, + const struct lu_env *env); + +int fld_index_create(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t mds); + +int fld_index_delete(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq); + +int fld_index_lookup(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t *mds); + +#ifdef LPROCFS +extern struct lprocfs_vars fld_server_proc_list[]; +extern struct lprocfs_vars fld_client_proc_list[]; +#endif + +#endif + +static inline const char * +fld_target_name(struct lu_fld_target *tar) +{ + if (tar->ft_srv != NULL) + return tar->ft_srv->lsf_name; + + return (const char *)tar->ft_exp->exp_obd->obd_name; +} + +extern cfs_proc_dir_entry_t *fld_type_proc_dir; + +#endif /* __FLD_INTERNAL_H */ diff --git a/lustre/fld/fld_request.c b/lustre/fld/fld_request.c new file mode 100644 index 0000000..ba57d51f --- /dev/null +++ b/lustre/fld/fld_request.c @@ -0,0 +1,635 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fld/fld_request.c + * FLD (Fids Location Database) + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FLD + +#ifdef __KERNEL__ +# include +# include +# include +# include +#else /* __KERNEL__ */ +# include +# include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "fld_internal.h" + +/* TODO: these 3 functions are copies of flow-control code from mdc_lib.c + * It should be common thing. The same about mdc RPC lock */ +static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) +{ + int rc; + ENTRY; + spin_lock(&cli->cl_loi_list_lock); + rc = list_empty(&mcw->mcw_entry); + spin_unlock(&cli->cl_loi_list_lock); + RETURN(rc); +}; + +static void fld_enter_request(struct client_obd *cli) +{ + struct mdc_cache_waiter mcw; + struct l_wait_info lwi = { 0 }; + + spin_lock(&cli->cl_loi_list_lock); + if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { + list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); + init_waitqueue_head(&mcw.mcw_waitq); + spin_unlock(&cli->cl_loi_list_lock); + l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi); + } else { + cli->cl_r_in_flight++; + spin_unlock(&cli->cl_loi_list_lock); + } +} + +static void fld_exit_request(struct client_obd *cli) +{ + struct list_head *l, *tmp; + struct mdc_cache_waiter *mcw; + + spin_lock(&cli->cl_loi_list_lock); + cli->cl_r_in_flight--; + list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { + + if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { + /* No free request slots anymore */ + break; + } + + mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry); + list_del_init(&mcw->mcw_entry); + cli->cl_r_in_flight++; + wake_up(&mcw->mcw_waitq); + } + spin_unlock(&cli->cl_loi_list_lock); +} + +static int fld_rrb_hash(struct lu_client_fld *fld, + seqno_t seq) +{ + LASSERT(fld->lcf_count > 0); + return do_div(seq, fld->lcf_count); +} + +static struct lu_fld_target * +fld_rrb_scan(struct lu_client_fld *fld, seqno_t seq) +{ + struct lu_fld_target *target; + int hash; + ENTRY; + + hash = fld_rrb_hash(fld, seq); + + list_for_each_entry(target, &fld->lcf_targets, ft_chain) { + if (target->ft_idx == hash) + RETURN(target); + } + + CERROR("%s: Can't find target by hash %d (seq "LPX64"). " + "Targets (%d):\n", fld->lcf_name, hash, seq, + fld->lcf_count); + + list_for_each_entry(target, &fld->lcf_targets, ft_chain) { + const char *srv_name = target->ft_srv != NULL ? + target->ft_srv->lsf_name : ""; + const char *exp_name = target->ft_exp != NULL ? + (char *)target->ft_exp->exp_obd->obd_uuid.uuid : + ""; + + CERROR(" exp: 0x%p (%s), srv: 0x%p (%s), idx: "LPU64"\n", + target->ft_exp, exp_name, target->ft_srv, + srv_name, target->ft_idx); + } + + /* + * If target is not found, there is logical error anyway, so here is + * LBUG() to catch this situation. + */ + LBUG(); + RETURN(NULL); +} + +static int fld_dht_hash(struct lu_client_fld *fld, + seqno_t seq) +{ + /* XXX: here should be DHT hash */ + return fld_rrb_hash(fld, seq); +} + +static struct lu_fld_target * +fld_dht_scan(struct lu_client_fld *fld, seqno_t seq) +{ + /* XXX: here should be DHT scan code */ + return fld_rrb_scan(fld, seq); +} + +struct lu_fld_hash fld_hash[3] = { + { + .fh_name = "DHT", + .fh_hash_func = fld_dht_hash, + .fh_scan_func = fld_dht_scan + }, + { + .fh_name = "RRB", + .fh_hash_func = fld_rrb_hash, + .fh_scan_func = fld_rrb_scan + }, + { + 0, + } +}; + +static struct lu_fld_target * +fld_client_get_target(struct lu_client_fld *fld, + seqno_t seq) +{ + struct lu_fld_target *target; + ENTRY; + + LASSERT(fld->lcf_hash != NULL); + + spin_lock(&fld->lcf_lock); + target = fld->lcf_hash->fh_scan_func(fld, seq); + spin_unlock(&fld->lcf_lock); + + if (target != NULL) { + CDEBUG(D_INFO, "%s: Found target (idx "LPU64 + ") by seq "LPX64"\n", fld->lcf_name, + target->ft_idx, seq); + } + + RETURN(target); +} + +/* + * Add export to FLD. This is usually done by CMM and LMV as they are main users + * of FLD module. + */ +int fld_client_add_target(struct lu_client_fld *fld, + struct lu_fld_target *tar) +{ + const char *name = fld_target_name(tar); + struct lu_fld_target *target, *tmp; + ENTRY; + + LASSERT(tar != NULL); + LASSERT(name != NULL); + LASSERT(tar->ft_srv != NULL || tar->ft_exp != NULL); + + if (fld->lcf_flags != LUSTRE_FLD_INIT) { + CERROR("%s: Attempt to add target %s (idx "LPU64") " + "on fly - skip it\n", fld->lcf_name, name, + tar->ft_idx); + RETURN(0); + } else { + CDEBUG(D_INFO, "%s: Adding target %s (idx " + LPU64")\n", fld->lcf_name, name, tar->ft_idx); + } + + OBD_ALLOC_PTR(target); + if (target == NULL) + RETURN(-ENOMEM); + + spin_lock(&fld->lcf_lock); + list_for_each_entry(tmp, &fld->lcf_targets, ft_chain) { + if (tmp->ft_idx == tar->ft_idx) { + spin_unlock(&fld->lcf_lock); + OBD_FREE_PTR(target); + CERROR("Target %s exists in FLD and known as %s:#"LPU64"\n", + name, fld_target_name(tmp), tmp->ft_idx); + RETURN(-EEXIST); + } + } + + target->ft_exp = tar->ft_exp; + if (target->ft_exp != NULL) + class_export_get(target->ft_exp); + target->ft_srv = tar->ft_srv; + target->ft_idx = tar->ft_idx; + + list_add_tail(&target->ft_chain, + &fld->lcf_targets); + + fld->lcf_count++; + spin_unlock(&fld->lcf_lock); + + RETURN(0); +} +EXPORT_SYMBOL(fld_client_add_target); + +/* Remove export from FLD */ +int fld_client_del_target(struct lu_client_fld *fld, + __u64 idx) +{ + struct lu_fld_target *target, *tmp; + ENTRY; + + spin_lock(&fld->lcf_lock); + list_for_each_entry_safe(target, tmp, + &fld->lcf_targets, ft_chain) { + if (target->ft_idx == idx) { + fld->lcf_count--; + list_del(&target->ft_chain); + spin_unlock(&fld->lcf_lock); + + if (target->ft_exp != NULL) + class_export_put(target->ft_exp); + + OBD_FREE_PTR(target); + RETURN(0); + } + } + spin_unlock(&fld->lcf_lock); + RETURN(-ENOENT); +} +EXPORT_SYMBOL(fld_client_del_target); + +static void fld_client_proc_fini(struct lu_client_fld *fld); + +#ifdef LPROCFS +static int fld_client_proc_init(struct lu_client_fld *fld) +{ + int rc; + ENTRY; + + fld->lcf_proc_dir = lprocfs_register(fld->lcf_name, + fld_type_proc_dir, + NULL, NULL); + + if (IS_ERR(fld->lcf_proc_dir)) { + CERROR("%s: LProcFS failed in fld-init\n", + fld->lcf_name); + rc = PTR_ERR(fld->lcf_proc_dir); + RETURN(rc); + } + + rc = lprocfs_add_vars(fld->lcf_proc_dir, + fld_client_proc_list, fld); + if (rc) { + CERROR("%s: Can't init FLD proc, rc %d\n", + fld->lcf_name, rc); + GOTO(out_cleanup, rc); + } + + RETURN(0); + +out_cleanup: + fld_client_proc_fini(fld); + return rc; +} + +static void fld_client_proc_fini(struct lu_client_fld *fld) +{ + ENTRY; + if (fld->lcf_proc_dir) { + if (!IS_ERR(fld->lcf_proc_dir)) + lprocfs_remove(&fld->lcf_proc_dir); + fld->lcf_proc_dir = NULL; + } + EXIT; +} +#else +static int fld_client_proc_init(struct lu_client_fld *fld) +{ + return 0; +} + +static void fld_client_proc_fini(struct lu_client_fld *fld) +{ + return; +} +#endif + +static inline int hash_is_sane(int hash) +{ + return (hash >= 0 && hash < ARRAY_SIZE(fld_hash)); +} + +int fld_client_init(struct lu_client_fld *fld, + const char *prefix, int hash) +{ +#ifdef __KERNEL__ + int cache_size, cache_threshold; +#endif + int rc; + ENTRY; + + LASSERT(fld != NULL); + + snprintf(fld->lcf_name, sizeof(fld->lcf_name), + "cli-%s", prefix); + + if (!hash_is_sane(hash)) { + CERROR("%s: Wrong hash function %#x\n", + fld->lcf_name, hash); + RETURN(-EINVAL); + } + + fld->lcf_count = 0; + spin_lock_init(&fld->lcf_lock); + fld->lcf_hash = &fld_hash[hash]; + fld->lcf_flags = LUSTRE_FLD_INIT; + INIT_LIST_HEAD(&fld->lcf_targets); + +#ifdef __KERNEL__ + cache_size = FLD_CLIENT_CACHE_SIZE / + sizeof(struct fld_cache_entry); + + cache_threshold = cache_size * + FLD_CLIENT_CACHE_THRESHOLD / 100; + + fld->lcf_cache = fld_cache_init(fld->lcf_name, + FLD_CLIENT_HTABLE_SIZE, + cache_size, cache_threshold); + if (IS_ERR(fld->lcf_cache)) { + rc = PTR_ERR(fld->lcf_cache); + fld->lcf_cache = NULL; + GOTO(out, rc); + } +#endif + + rc = fld_client_proc_init(fld); + if (rc) + GOTO(out, rc); + EXIT; +out: + if (rc) + fld_client_fini(fld); + else + CDEBUG(D_INFO, "%s: Using \"%s\" hash\n", + fld->lcf_name, fld->lcf_hash->fh_name); + return rc; +} +EXPORT_SYMBOL(fld_client_init); + +void fld_client_fini(struct lu_client_fld *fld) +{ + struct lu_fld_target *target, *tmp; + ENTRY; + + fld_client_proc_fini(fld); + + spin_lock(&fld->lcf_lock); + list_for_each_entry_safe(target, tmp, + &fld->lcf_targets, ft_chain) { + fld->lcf_count--; + list_del(&target->ft_chain); + if (target->ft_exp != NULL) + class_export_put(target->ft_exp); + OBD_FREE_PTR(target); + } + spin_unlock(&fld->lcf_lock); + +#ifdef __KERNEL__ + if (fld->lcf_cache != NULL) { + if (!IS_ERR(fld->lcf_cache)) + fld_cache_fini(fld->lcf_cache); + fld->lcf_cache = NULL; + } +#endif + + EXIT; +} +EXPORT_SYMBOL(fld_client_fini); + +static int fld_client_rpc(struct obd_export *exp, + struct md_fld *mf, __u32 fld_op) +{ + int size[3] = { sizeof(struct ptlrpc_body), + sizeof(__u32), + sizeof(struct md_fld) }; + struct ptlrpc_request *req; + struct req_capsule pill; + struct md_fld *pmf; + __u32 *op; + int rc; + ENTRY; + + LASSERT(exp != NULL); + + req = ptlrpc_prep_req(class_exp2cliimp(exp), + LUSTRE_MDS_VERSION, + FLD_QUERY, 3, size, + NULL); + if (req == NULL) + RETURN(-ENOMEM); + + req_capsule_init(&pill, req, RCL_CLIENT, NULL); + req_capsule_set(&pill, &RQF_FLD_QUERY); + + op = req_capsule_client_get(&pill, &RMF_FLD_OPC); + *op = fld_op; + + pmf = req_capsule_client_get(&pill, &RMF_FLD_MDFLD); + *pmf = *mf; + + size[1] = sizeof(struct md_fld); + ptlrpc_req_set_repsize(req, 2, size); + req->rq_request_portal = FLD_REQUEST_PORTAL; + + if (fld_op != FLD_LOOKUP) + mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + fld_enter_request(&exp->exp_obd->u.cli); + rc = ptlrpc_queue_wait(req); + fld_exit_request(&exp->exp_obd->u.cli); + if (fld_op != FLD_LOOKUP) + mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + if (rc) + GOTO(out_req, rc); + + pmf = req_capsule_server_get(&pill, &RMF_FLD_MDFLD); + if (pmf == NULL) + GOTO(out_req, rc = -EFAULT); + *mf = *pmf; + EXIT; +out_req: + req_capsule_fini(&pill); + ptlrpc_req_finished(req); + return rc; +} + +int fld_client_create(struct lu_client_fld *fld, + seqno_t seq, mdsno_t mds, + const struct lu_env *env) +{ + struct md_fld md_fld = { .mf_seq = seq, .mf_mds = mds }; + struct lu_fld_target *target; + int rc; + ENTRY; + + fld->lcf_flags |= LUSTRE_FLD_RUN; + target = fld_client_get_target(fld, seq); + LASSERT(target != NULL); + + CDEBUG(D_INFO, "%s: Create fld entry (seq: "LPX64"; mds: " + LPU64") on target %s (idx "LPU64")\n", fld->lcf_name, + seq, mds, fld_target_name(target), target->ft_idx); + +#ifdef __KERNEL__ + if (target->ft_srv != NULL) { + LASSERT(env != NULL); + rc = fld_server_create(target->ft_srv, env, seq, mds); + } else { +#endif + rc = fld_client_rpc(target->ft_exp, &md_fld, FLD_CREATE); +#ifdef __KERNEL__ + } +#endif + + if (rc == 0) { + /* + * Do not return result of calling fld_cache_insert() + * here. First of all because it may return -EEXISTS. Another + * reason is that, we do not want to stop proceeding because of + * cache errors. + */ + fld_cache_insert(fld->lcf_cache, seq, mds); + } else { + CERROR("%s: Can't create FLD entry, rc %d\n", + fld->lcf_name, rc); + } + + RETURN(rc); +} +EXPORT_SYMBOL(fld_client_create); + +int fld_client_delete(struct lu_client_fld *fld, seqno_t seq, + const struct lu_env *env) +{ + struct md_fld md_fld = { .mf_seq = seq, .mf_mds = 0 }; + struct lu_fld_target *target; + int rc; + ENTRY; + + fld->lcf_flags |= LUSTRE_FLD_RUN; + fld_cache_delete(fld->lcf_cache, seq); + + target = fld_client_get_target(fld, seq); + LASSERT(target != NULL); + + CDEBUG(D_INFO, "%s: Delete fld entry (seq: "LPX64") on " + "target %s (idx "LPU64")\n", fld->lcf_name, seq, + fld_target_name(target), target->ft_idx); + +#ifdef __KERNEL__ + if (target->ft_srv != NULL) { + LASSERT(env != NULL); + rc = fld_server_delete(target->ft_srv, + env, seq); + } else { +#endif + rc = fld_client_rpc(target->ft_exp, + &md_fld, FLD_DELETE); +#ifdef __KERNEL__ + } +#endif + + RETURN(rc); +} +EXPORT_SYMBOL(fld_client_delete); + +int fld_client_lookup(struct lu_client_fld *fld, + seqno_t seq, mdsno_t *mds, + const struct lu_env *env) +{ + struct md_fld md_fld = { .mf_seq = seq, .mf_mds = 0 }; + struct lu_fld_target *target; + int rc; + ENTRY; + + fld->lcf_flags |= LUSTRE_FLD_RUN; + + rc = fld_cache_lookup(fld->lcf_cache, seq, mds); + if (rc == 0) + RETURN(0); + + /* Can not find it in the cache */ + target = fld_client_get_target(fld, seq); + LASSERT(target != NULL); + + CDEBUG(D_INFO, "%s: Lookup fld entry (seq: "LPX64") on " + "target %s (idx "LPU64")\n", fld->lcf_name, seq, + fld_target_name(target), target->ft_idx); + +#ifdef __KERNEL__ + if (target->ft_srv != NULL) { + LASSERT(env != NULL); + rc = fld_server_lookup(target->ft_srv, + env, seq, &md_fld.mf_mds); + } else { +#endif + /* + * insert the 'inflight' sequence. No need to protect that, + * we are trying to reduce numbers of RPC but not restrict + * to them exactly one + */ + fld_cache_insert_inflight(fld->lcf_cache, seq); + rc = fld_client_rpc(target->ft_exp, + &md_fld, FLD_LOOKUP); +#ifdef __KERNEL__ + } +#endif + if (rc == 0) { + *mds = md_fld.mf_mds; + + /* + * Do not return error here as well. See previous comment in + * same situation in function fld_client_create(). + */ + fld_cache_insert(fld->lcf_cache, seq, *mds); + } else { + /* remove 'inflight' seq if it exists */ + fld_cache_delete(fld->lcf_cache, seq); + } + RETURN(rc); +} +EXPORT_SYMBOL(fld_client_lookup); + +void fld_client_flush(struct lu_client_fld *fld) +{ +#ifdef __KERNEL__ + fld_cache_flush(fld->lcf_cache); +#endif +} +EXPORT_SYMBOL(fld_client_flush); + diff --git a/lustre/fld/lproc_fld.c b/lustre/fld/lproc_fld.c new file mode 100644 index 0000000..c0dfba2 --- /dev/null +++ b/lustre/fld/lproc_fld.c @@ -0,0 +1,153 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/fld/lproc_fld.c + * FLD (FIDs Location Database) + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_FLD + +#ifdef __KERNEL__ +# include +# include +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "fld_internal.h" + +#ifdef LPROCFS +static int +fld_proc_read_targets(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_client_fld *fld = (struct lu_client_fld *)data; + struct lu_fld_target *target; + int total = 0, rc; + ENTRY; + + LASSERT(fld != NULL); + + spin_lock(&fld->lcf_lock); + list_for_each_entry(target, + &fld->lcf_targets, ft_chain) + { + rc = snprintf(page, count, "%s\n", + fld_target_name(target)); + page += rc; + count -= rc; + total += rc; + if (count == 0) + break; + } + spin_unlock(&fld->lcf_lock); + RETURN(total); +} + +static int +fld_proc_read_hash(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct lu_client_fld *fld = (struct lu_client_fld *)data; + int rc; + ENTRY; + + LASSERT(fld != NULL); + + spin_lock(&fld->lcf_lock); + rc = snprintf(page, count, "%s\n", + fld->lcf_hash->fh_name); + spin_unlock(&fld->lcf_lock); + + RETURN(rc); +} + +static int +fld_proc_write_hash(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct lu_client_fld *fld = (struct lu_client_fld *)data; + struct lu_fld_hash *hash = NULL; + int i; + ENTRY; + + LASSERT(fld != NULL); + + for (i = 0; fld_hash[i].fh_name != NULL; i++) { + if (count != strlen(fld_hash[i].fh_name)) + continue; + + if (!strncmp(fld_hash[i].fh_name, buffer, count)) { + hash = &fld_hash[i]; + break; + } + } + + if (hash != NULL) { + spin_lock(&fld->lcf_lock); + fld->lcf_hash = hash; + spin_unlock(&fld->lcf_lock); + + CDEBUG(D_INFO, "%s: Changed hash to \"%s\"\n", + fld->lcf_name, hash->fh_name); + } + + RETURN(count); +} + +static int +fld_proc_write_cache_flush(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct lu_client_fld *fld = (struct lu_client_fld *)data; + ENTRY; + + LASSERT(fld != NULL); + + fld_cache_flush(fld->lcf_cache); + + CDEBUG(D_INFO, "%s: Lookup cache is flushed\n", fld->lcf_name); + + RETURN(count); +} + +struct lprocfs_vars fld_server_proc_list[] = { + { NULL }}; + +struct lprocfs_vars fld_client_proc_list[] = { + { "targets", fld_proc_read_targets, NULL, NULL }, + { "hash", fld_proc_read_hash, fld_proc_write_hash, NULL }, + { "cache_flush", NULL, fld_proc_write_cache_flush, NULL }, + { NULL }}; +#endif diff --git a/lustre/include/Makefile.am b/lustre/include/Makefile.am index dc4306a..0a5f452 100644 --- a/lustre/include/Makefile.am +++ b/lustre/include/Makefile.am @@ -9,9 +9,11 @@ SUBDIRS = linux lustre EXTRA_DIST = ioctl.h liblustre.h lprocfs_status.h lustre_cfg.h \ lustre_commit_confd.h lustre_debug.h lustre_disk.h \ lustre_dlm.h lustre_export.h lustre_fsfilt.h lustre_ha.h \ - lustre_handles.h lustre_import.h lustre_lib.h \ - lustre_lite.h lustre_log.h lustre_mds.h lustre_net.h \ - lustre_param.h lustre_quota.h lustre_ucache.h lvfs.h \ + lustre_handles.h lustre_import.h lustre_lib.h lustre_sec.h \ + lustre_lite.h lustre_log.h lustre_mds.h lustre_mdc.h \ + lustre_net.h lustre_quota.h lustre_ucache.h lvfs.h \ obd_cache.h obd_class.h obd_echo.h obd.h obd_lov.h \ - obd_ost.h obd_support.h lustre_ver.h + obd_ost.h obd_support.h lustre_ver.h lu_object.h lu_time.h \ + md_object.h dt_object.h lustre_param.h lustre_mdt.h \ + lustre_fid.h lustre_fld.h lustre_req_layout.h lustre_capa.h diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h new file mode 100644 index 0000000..294f992 --- /dev/null +++ b/lustre/include/dt_object.h @@ -0,0 +1,506 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LUSTRE_DT_OBJECT_H +#define __LUSTRE_DT_OBJECT_H + +/* + * Sub-class of lu_object with methods common for "data" objects in OST stack. + * + * Data objects behave like regular files: you can read/write them, get and + * set their attributes. Implementation of dt interface is supposed to + * implement some form of garbage collection, normally reference counting + * (nlink) based one. + * + * Examples: osd (lustre/osd) is an implementation of dt interface. + */ + + +/* + * super-class definitions. + */ +#include + +#include +#include + +struct seq_file; +struct proc_dir_entry; +struct lustre_cfg; + +struct thandle; +struct txn_param; +struct dt_device; +struct dt_object; +struct dt_index_features; + +struct dt_device_param { + unsigned ddp_max_name_len; + unsigned ddp_max_nlink; + unsigned ddp_block_shift; +}; + +/* + * Basic transaction credit op + */ +enum dt_txn_op { + DTO_INDEX_INSERT, + DTO_INDEX_DELETE, + DTO_IDNEX_UPDATE, + DTO_OBJECT_CREATE, + DTO_OBJECT_DELETE, + DTO_ATTR_SET, + DTO_XATTR_SET, + DTO_LOG_REC, /* XXX temporary: dt layer knows nothing about llog. */ + + DTO_NR +}; + +/* + * Operations on dt device. + */ +struct dt_device_operations { + /* + * Return device-wide statistics. + */ + int (*dt_statfs)(const struct lu_env *env, + struct dt_device *dev, struct kstatfs *sfs); + /* + * Start transaction, described by @param. + */ + struct thandle *(*dt_trans_start)(const struct lu_env *env, + struct dt_device *dev, + struct txn_param *param); + /* + * Finish previously started transaction. + */ + void (*dt_trans_stop)(const struct lu_env *env, + struct thandle *th); + /* + * Return fid of root index object. + */ + int (*dt_root_get)(const struct lu_env *env, + struct dt_device *dev, struct lu_fid *f); + /* + * Return device configuration data. + */ + void (*dt_conf_get)(const struct lu_env *env, + const struct dt_device *dev, + struct dt_device_param *param); + /* + * handling device state, mostly for tests + */ + int (*dt_sync)(const struct lu_env *env, struct dt_device *dev); + void (*dt_ro)(const struct lu_env *env, struct dt_device *dev); + /* + * Initialize capability context. + */ + int (*dt_init_capa_ctxt)(const struct lu_env *env, + struct dt_device *dev, + int mode, unsigned long timeout, + __u32 alg, struct lustre_capa_key *keys); + + /* + * get transaction credits for given @op. + */ + int (*dt_credit_get)(const struct lu_env *env, struct dt_device *dev, + enum dt_txn_op); +}; + +struct dt_index_features { + /* required feature flags from enum dt_index_flags */ + __u32 dif_flags; + /* minimal required key size */ + size_t dif_keysize_min; + /* maximal required key size, 0 if no limit */ + size_t dif_keysize_max; + /* minimal required record size */ + size_t dif_recsize_min; + /* maximal required record size, 0 if no limit */ + size_t dif_recsize_max; +}; + +enum dt_index_flags { + /* index supports variable sized keys */ + DT_IND_VARKEY = 1 << 0, + /* index supports variable sized records */ + DT_IND_VARREC = 1 << 1, + /* index can be modified */ + DT_IND_UPDATE = 1 << 2, + /* index supports records with non-unique (duplicate) keys */ + DT_IND_NONUNQ = 1 << 3 +}; + +/* + * Features, required from index to support file system directories (mapping + * names to fids). + */ +extern const struct dt_index_features dt_directory_features; + +/* + * This is a general purpose dt allocation hint. + * It now contains the parent object. + * It can contain any allocation hint in the future. + */ +struct dt_allocation_hint { + struct dt_object *dah_parent; + __u32 dah_mode; +}; + +/* + * Per-dt-object operations. + */ +struct dt_object_operations { + void (*do_read_lock)(const struct lu_env *env, + struct dt_object *dt); + void (*do_write_lock)(const struct lu_env *env, + struct dt_object *dt); + void (*do_read_unlock)(const struct lu_env *env, + struct dt_object *dt); + void (*do_write_unlock)(const struct lu_env *env, + struct dt_object *dt); + /* + * Note: following ->do_{x,}attr_{set,get}() operations are very + * similar to ->moo_{x,}attr_{set,get}() operations in struct + * md_object_operations (see md_object.h). These operations are not in + * lu_object_operations, because ->do_{x,}attr_set() versions take + * transaction handle as an argument (this transaction is started by + * caller). We might factor ->do_{x,}attr_get() into + * lu_object_operations, but that would break existing symmetry. + */ + + /* + * Return standard attributes. + * + * precondition: lu_object_exists(&dt->do_lu); + */ + int (*do_attr_get)(const struct lu_env *env, + struct dt_object *dt, struct lu_attr *attr, + struct lustre_capa *capa); + /* + * Set standard attributes. + * + * precondition: dt_object_exists(dt); + */ + int (*do_attr_set)(const struct lu_env *env, + struct dt_object *dt, + const struct lu_attr *attr, + struct thandle *handle, + struct lustre_capa *capa); + /* + * Return a value of an extended attribute. + * + * precondition: dt_object_exists(dt); + */ + int (*do_xattr_get)(const struct lu_env *env, struct dt_object *dt, + struct lu_buf *buf, const char *name, + struct lustre_capa *capa); + /* + * Set value of an extended attribute. + * + * @fl - flags from enum lu_xattr_flags + * + * precondition: dt_object_exists(dt); + */ + int (*do_xattr_set)(const struct lu_env *env, + struct dt_object *dt, const struct lu_buf *buf, + const char *name, int fl, struct thandle *handle, + struct lustre_capa *capa); + /* + * Delete existing extended attribute. + * + * precondition: dt_object_exists(dt); + */ + int (*do_xattr_del)(const struct lu_env *env, + struct dt_object *dt, + const char *name, struct thandle *handle, + struct lustre_capa *capa); + /* + * Place list of existing extended attributes into @buf (which has + * length len). + * + * precondition: dt_object_exists(dt); + */ + int (*do_xattr_list)(const struct lu_env *env, + struct dt_object *dt, struct lu_buf *buf, + struct lustre_capa *capa); + /* + * Init allocation hint using parent object and child mode. + * (1) The @parent might be NULL if this is a partial creation for + * remote object. + * (2) The type of child is in @child_mode. + * (3) The result hint is stored in @ah; + */ + void (*do_ah_init)(const struct lu_env *env, + struct dt_allocation_hint *ah, + struct dt_object *parent, + umode_t child_mode); + /* + * Create new object on this device. + * + * precondition: !dt_object_exists(dt); + * postcondition: ergo(result == 0, dt_object_exists(dt)); + */ + int (*do_create)(const struct lu_env *env, struct dt_object *dt, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct thandle *th); + + /* + * Announce that this object is going to be used as an index. This + * operation check that object supports indexing operations and + * installs appropriate dt_index_operations vector on success. + * + * Also probes for features. Operation is successful if all required + * features are supported. + */ + int (*do_index_try)(const struct lu_env *env, + struct dt_object *dt, + const struct dt_index_features *feat); + /* + * Add nlink of the object + * precondition: dt_object_exists(dt); + */ + void (*do_ref_add)(const struct lu_env *env, + struct dt_object *dt, struct thandle *th); + /* + * Del nlink of the object + * precondition: dt_object_exists(dt); + */ + void (*do_ref_del)(const struct lu_env *env, + struct dt_object *dt, struct thandle *th); + + struct obd_capa *(*do_capa_get)(const struct lu_env *env, + struct dt_object *dt, + struct lustre_capa *old, + __u32 uid, __u64 opc); +}; + +/* + * Per-dt-object operations on "file body". + */ +struct dt_body_operations { + /* + * precondition: dt_object_exists(dt); + */ + ssize_t (*dbo_read)(const struct lu_env *env, struct dt_object *dt, + struct lu_buf *buf, loff_t *pos, + struct lustre_capa *capa); + /* + * precondition: dt_object_exists(dt); + */ + ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt, + const struct lu_buf *buf, loff_t *pos, + struct thandle *handle, struct lustre_capa *capa); +}; + +/* + * Incomplete type of index record. + */ +struct dt_rec; + +/* + * Incomplete type of index key. + */ +struct dt_key; + +/* + * Incomplete type of dt iterator. + */ +struct dt_it; + +/* + * Per-dt-object operations on object as index. + */ +struct dt_index_operations { + /* + * precondition: dt_object_exists(dt); + */ + int (*dio_lookup)(const struct lu_env *env, struct dt_object *dt, + struct dt_rec *rec, const struct dt_key *key, + struct lustre_capa *capa); + /* + * precondition: dt_object_exists(dt); + */ + int (*dio_insert)(const struct lu_env *env, struct dt_object *dt, + const struct dt_rec *rec, const struct dt_key *key, + struct thandle *handle, struct lustre_capa *capa); + /* + * precondition: dt_object_exists(dt); + */ + int (*dio_delete)(const struct lu_env *env, struct dt_object *dt, + const struct dt_key *key, struct thandle *handle, + struct lustre_capa *capa); + /* + * Iterator interface + */ + struct dt_it_ops { + /* + * Allocate and initialize new iterator. + * + * precondition: dt_object_exists(dt); + */ + struct dt_it *(*init)(const struct lu_env *env, + struct dt_object *dt, int writable, + struct lustre_capa *capa); + void (*fini)(const struct lu_env *env, + struct dt_it *di); + int (*get)(const struct lu_env *env, + struct dt_it *di, + const struct dt_key *key); + void (*put)(const struct lu_env *env, + struct dt_it *di); + int (*del)(const struct lu_env *env, + struct dt_it *di, struct thandle *th); + int (*next)(const struct lu_env *env, + struct dt_it *di); + struct dt_key *(*key)(const struct lu_env *env, + const struct dt_it *di); + int (*key_size)(const struct lu_env *env, + const struct dt_it *di); + struct dt_rec *(*rec)(const struct lu_env *env, + const struct dt_it *di); + __u32 (*store)(const struct lu_env *env, + const struct dt_it *di); + int (*load)(const struct lu_env *env, + const struct dt_it *di, __u32 hash); + } dio_it; +}; + +struct dt_device { + struct lu_device dd_lu_dev; + struct dt_device_operations *dd_ops; + + /* + * List of dt_txn_callback (see below). This is not protected in any + * way, because callbacks are supposed to be added/deleted only during + * single-threaded start-up shut-down procedures. + */ + struct list_head dd_txn_callbacks; +}; + +int dt_device_init(struct dt_device *dev, struct lu_device_type *t); +void dt_device_fini(struct dt_device *dev); + +static inline int lu_device_is_dt(const struct lu_device *d) +{ + return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT); +} + +static inline struct dt_device * lu2dt_dev(struct lu_device *l) +{ + LASSERT(lu_device_is_dt(l)); + return container_of0(l, struct dt_device, dd_lu_dev); +} + +struct dt_object { + struct lu_object do_lu; + struct dt_object_operations *do_ops; + struct dt_body_operations *do_body_ops; + struct dt_index_operations *do_index_ops; +}; + +int dt_object_init(struct dt_object *obj, + struct lu_object_header *h, struct lu_device *d); + +void dt_object_fini(struct dt_object *obj); + +static inline int dt_object_exists(const struct dt_object *dt) +{ + return lu_object_exists(&dt->do_lu); +} + +struct txn_param { + /* number of blocks this transaction will modify */ + unsigned int tp_credits; + /* sync transaction is needed */ + __u32 tp_sync:1; +}; + +static inline void txn_param_init(struct txn_param *p, unsigned int credits) +{ + memset(p, 0, sizeof(*p)); + p->tp_credits = credits; +} + +/* + * This is the general purpose transaction handle. + * 1. Transaction Life Cycle + * This transaction handle is allocated upon starting a new transaction, + * and deallocated after this transaction is committed. + * 2. Transaction Nesting + * We do _NOT_ support nested transaction. So, every thread should only + * have one active transaction, and a transaction only belongs to one + * thread. Due to this, transaction handle need no reference count. + * 3. Transaction & dt_object locking + * dt_object locks should be taken inside transaction. + * 4. Transaction & RPC + * No RPC request should be issued inside transaction. + */ +struct thandle { + /* the dt device on which the transactions are executed */ + struct dt_device *th_dev; + + /* context for this transaction, tag is LCT_TX_HANDLE */ + struct lu_context th_ctx; + + /* the last operation result in this transaction. + * this value is used in recovery */ + __s32 th_result; +}; + +/* + * Transaction call-backs. + * + * These are invoked by osd (or underlying transaction engine) when + * transaction changes state. + * + * Call-backs are used by upper layers to modify transaction parameters and to + * perform some actions on for each transaction state transition. Typical + * example is mdt registering call-back to write into last-received file + * before each transaction commit. + */ +struct dt_txn_callback { + int (*dtc_txn_start)(const struct lu_env *env, + struct txn_param *param, void *cookie); + int (*dtc_txn_stop)(const struct lu_env *env, + struct thandle *txn, void *cookie); + int (*dtc_txn_commit)(const struct lu_env *env, + struct thandle *txn, void *cookie); + void *dtc_cookie; + struct list_head dtc_linkage; +}; + +void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb); +void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb); + +int dt_txn_hook_start(const struct lu_env *env, + struct dt_device *dev, struct txn_param *param); +int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn); +int dt_txn_hook_commit(const struct lu_env *env, struct thandle *txn); + +int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj); +struct dt_object *dt_store_open(const struct lu_env *env, + struct dt_device *dt, const char *name, + struct lu_fid *fid); + +#endif /* __LUSTRE_DT_OBJECT_H */ diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index b7a7d59..1f99cbc 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -62,6 +62,7 @@ #endif #include #include +#include #include #include @@ -71,9 +72,9 @@ #ifdef __CYGWIN__ -#define CFS_PAGE_SHIFT 12 -#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT) -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) +#define CFS_PAGE_SHIFT 12 +#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT) +#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) #define loff_t long long #define ERESTART 2001 typedef unsigned short umode_t; @@ -84,6 +85,10 @@ typedef unsigned short umode_t; # define CURRENT_SECONDS time(0) #endif +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) ((sizeof (a))/(sizeof ((a)[0]))) +#endif + /* This is because lprocfs_status.h gets included here indirectly. It would * be much better to just avoid lprocfs being included into liblustre entirely * but that requires more header surgery than I can handle right now. @@ -145,14 +150,10 @@ static inline void *kmalloc(int size, int prot) #define GFP_HIGHUSER 1 #define GFP_ATOMIC 1 #define GFP_NOFS 1 -#define IS_ERR(a) ((unsigned long)(a) < 1000) +#define IS_ERR(a) ((unsigned long)(a) > (unsigned long)-1000L) #define PTR_ERR(a) ((long)(a)) #define ERR_PTR(a) ((void*)((long)(a))) -typedef struct { - void *cwd; -}mm_segment_t; - typedef int (read_proc_t)(char *page, char **start, off_t off, int count, int *eof, void *data); @@ -208,7 +209,7 @@ static __inline__ int clear_bit(int nr, long * addr) return nr; } -static __inline__ int test_bit(int nr, long * addr) +static __inline__ int test_bit(int nr, const long * addr) { return ((1UL << (nr & (BITS_PER_LONG - 1))) & ((addr)[nr / BITS_PER_LONG])) != 0; } @@ -286,6 +287,7 @@ extern int ldlm_init(void); extern int osc_init(void); extern int lov_init(void); extern int mdc_init(void); +extern int lmv_init(void); extern int mgc_init(void); extern int echo_client_init(void); @@ -295,6 +297,8 @@ extern int echo_client_init(void); #define EXPORT_SYMBOL(S) +struct rcu_head { }; + typedef struct { } spinlock_t; typedef __u64 kdev_t; @@ -314,6 +318,14 @@ static inline void spin_unlock_bh(spinlock_t *l) {} static inline void spin_lock_irqsave(spinlock_t *a, unsigned long b) {} static inline void spin_unlock_irqrestore(spinlock_t *a, unsigned long b) {} +typedef spinlock_t rwlock_t; +#define RW_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED +#define read_lock(l) spin_lock(l) +#define read_unlock(l) spin_unlock(l) +#define write_lock(l) spin_lock(l) +#define write_unlock(l) spin_unlock(l) + + #define min(x,y) ((x)<(y) ? (x) : (y)) #define max(x,y) ((x)>(y) ? (x) : (y)) @@ -463,6 +475,7 @@ static inline cfs_page_t* __grab_cache_page(unsigned long index) #define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ #define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ #define ATTR_CTIME_SET 0x2000 +#define ATTR_BLOCKS 0x4000 struct iattr { unsigned int ia_valid; @@ -475,7 +488,8 @@ struct iattr { time_t ia_ctime; unsigned int ia_attr_flags; }; -#define ll_iattr_struct iattr + +#define ll_iattr iattr #define IT_OPEN 0x0001 #define IT_CREAT 0x0002 @@ -519,7 +533,6 @@ static inline void intent_init(struct lookup_intent *it, int op, int flags) it->it_flags = flags; } - struct dentry { int d_count; }; @@ -579,6 +592,8 @@ struct task_struct { int state; struct signal pending; char comm[32]; + int uid; + int gid; int pid; int fsuid; int fsgid; @@ -615,7 +630,7 @@ static inline int capable(int cap) .sleepers = LIST_HEAD_INIT(HEAD.sleepers) \ } #define init_waitqueue_head(l) INIT_LIST_HEAD(&(l)->sleepers) -#define wake_up(l) do { int a = 0; a++; } while (0) +#define wake_up(l) do { int a; a++; } while (0) #define TASK_INTERRUPTIBLE 0 #define TASK_UNINTERRUPTIBLE 1 #define TASK_RUNNING 2 @@ -701,6 +716,7 @@ static inline void del_timer(struct timer_list *l) typedef struct { volatile int counter; } atomic_t; +#define ATOMIC_INIT(i) { (i) } #define atomic_read(a) ((a)->counter) #define atomic_set(a,b) do {(a)->counter = b; } while (0) #define atomic_dec_and_test(a) ((--((a)->counter)) == 0) @@ -709,6 +725,7 @@ typedef struct { volatile int counter; } atomic_t; #define atomic_dec(a) do { (a)->counter--; } while (0) #define atomic_add(b,a) do {(a)->counter += b;} while (0) #define atomic_sub(b,a) do {(a)->counter -= b;} while (0) +#define ATOMIC_INIT(i) { i } #ifndef likely #define likely(exp) (exp) @@ -717,9 +734,43 @@ typedef struct { volatile int counter; } atomic_t; #define unlikely(exp) (exp) #endif +#define might_sleep() +#define might_sleep_if(c) +#define smp_mb() + +static inline +int test_and_set_bit(int nr, unsigned long *addr) +{ + int oldbit; + + while (nr >= sizeof(long)) { + nr -= sizeof(long); + addr++; + } + + oldbit = (*addr) & (1 << nr); + *addr |= (1 << nr); + return oldbit; +} + +static inline +int test_and_clear_bit(int nr, unsigned long *addr) +{ + int oldbit; + + while (nr >= sizeof(long)) { + nr -= sizeof(long); + addr++; + } + + oldbit = (*addr) & (1 << nr); + *addr &= ~(1 << nr); + return oldbit; +} + /* FIXME sys/capability will finally included linux/fs.h thus * cause numerous trouble on x86-64. as temporary solution for - * build broken at cary, we copy definition we need from capability.h + * build broken at Cray, we copy definition we need from capability.h * FIXME */ struct _cap_struct; @@ -815,7 +866,7 @@ typedef struct file_lock { unsigned long fl_break_time; /* for nonblocking lease breaks */ union { - struct nfs_lock_info nfs_fl; + struct nfs_lock_info nfs_fl; } fl_u; } cfs_flock_t; @@ -891,6 +942,20 @@ void posix_acl_release(struct posix_acl *acl) #define ENOTSUPP ENOTSUP #endif +typedef int mm_segment_t; +enum { + KERNEL_DS, + USER_DS +}; +static inline mm_segment_t get_fs(void) +{ + return USER_DS; +} + +static inline void set_fs(mm_segment_t seg) +{ +} + #include #include #include @@ -898,4 +963,60 @@ void posix_acl_release(struct posix_acl *acl) #include #include +/* Fast hashing routine for a long. + (C) 2002 William Lee Irwin III, IBM */ + +/* + * Knuth recommends primes in approximately golden ratio to the maximum + * integer representable by a machine word for multiplicative hashing. + * Chuck Lever verified the effectiveness of this technique: + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * + * These primes are chosen to be bit-sparse, that is operations on + * them can use shifts and additions instead of multiplications for + * machines where multiplications are slow. + */ +#if BITS_PER_LONG == 32 +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e370001UL +#elif BITS_PER_LONG == 64 +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL +#else +#error Define GOLDEN_RATIO_PRIME for your wordsize. +#endif + +static inline unsigned long hash_long(unsigned long val, unsigned int bits) +{ + unsigned long hash = val; + +#if BITS_PER_LONG == 64 + /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ + unsigned long n = hash; + n <<= 18; + hash -= n; + n <<= 33; + hash -= n; + n <<= 3; + hash += n; + n <<= 3; + hash -= n; + n <<= 4; + hash += n; + n <<= 2; + hash += n; +#else + /* On some cpus multiply is faster, on others gcc will do shifts */ + hash *= GOLDEN_RATIO_PRIME; +#endif + + /* High bits are more random, so use them. */ + return hash >> (BITS_PER_LONG - bits); +} + +static inline unsigned long hash_ptr(void *ptr, unsigned int bits) +{ + return hash_long((unsigned long)ptr, bits); +} + #endif diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am index 9604a6a..96bb17c 100644 --- a/lustre/include/linux/Makefile.am +++ b/lustre/include/linux/Makefile.am @@ -9,9 +9,8 @@ if UTILS linux_HEADERS = lustre_types.h lustre_user.h endif -EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_lib.h \ - lustre_dlm.h lustre_handles.h lustre_net.h obd_class.h obd_support.h \ - lustre_log.h lustre_compat25.h lustre_fsfilt.h lustre_mds.h obd.h \ - lvfs.h lvfs_linux.h lustre_lite.h lustre_quota.h \ - lustre_user.h lustre_types.h lustre_patchless_compat.h lustre_intent.h - +EXTRA_DIST = lprocfs_status.h lustre_acl.h lustre_debug.h lustre_lib.h \ + lustre_dlm.h lustre_handles.h lustre_net.h obd_class.h obd_support.h \ + lustre_log.h lustre_compat25.h lustre_fsfilt.h lustre_mds.h \ + obd.h lvfs.h lvfs_linux.h lustre_lite.h lustre_quota.h \ + lustre_user.h lustre_types.h lustre_patchless_compat.h lustre_intent.h diff --git a/lustre/include/linux/lustre_acl.h b/lustre/include/linux/lustre_acl.h new file mode 100644 index 0000000..e028891 --- /dev/null +++ b/lustre/include/linux/lustre_acl.h @@ -0,0 +1,39 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * This file is part of Lustre, http://www.lustre.org + * + * MDS data structures. + * See also lustre_idl.h for wire formats of requests. + */ + +#ifndef _LUSTRE_ACL_H +#define _LUSTRE_ACL_H + +#ifdef __KERNEL__ +# include +# include +# ifdef CONFIG_FS_POSIX_ACL +# include +# endif +#endif + +/* ACL */ +#ifdef CONFIG_FS_POSIX_ACL +#ifdef HAVE_XATTR_ACL +# define MDS_XATTR_NAME_ACL_ACCESS XATTR_NAME_ACL_ACCESS +# define mds_xattr_acl_size(entry) xattr_acl_size(entry) +# else +# define MDS_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS +# define mds_xattr_acl_size(entry) posix_acl_xattr_size(entry) +# endif +# define LUSTRE_POSIX_ACL_MAX_ENTRIES (32) +# define LUSTRE_POSIX_ACL_MAX_SIZE \ + (sizeof(xattr_acl_header) + \ + LUSTRE_POSIX_ACL_MAX_ENTRIES * \ + sizeof(xattr_acl_entry)) +#else +# define LUSTRE_POSIX_ACL_MAX_SIZE 0 +#endif + +#endif diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 7e69bce..d5b97fd 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -34,12 +34,12 @@ #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) -struct ll_iattr_struct { +struct ll_iattr { struct iattr iattr; unsigned int ia_attr_flags; }; #else -#define ll_iattr_struct iattr +#define ll_iattr iattr #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */ #ifndef HAVE_SET_FS_PWD @@ -65,7 +65,9 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, #define ll_set_fs_pwd set_fs_pwd #endif /* HAVE_SET_FS_PWD */ -#ifdef HAVE_INODE_I_MUTEX +#define ATTR_BLOCKS 0x4000 + +#if HAVE_INODE_I_MUTEX #define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0) #define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0) #define TRYLOCK_INODE_MUTEX(inode) mutex_trylock(&(inode)->i_mutex) @@ -216,205 +218,8 @@ static inline int cleanup_group_info(void) #define CheckWriteback(page, cmd) \ (!(!PageWriteback(page) && cmd == OBD_BRW_WRITE)) -#else /* 2.4.. */ - -#define ll_flock_lock_file_wait(file, lock, can_sleep) \ - do {} while(0) - -#define lock_dentry(___dentry) -#define unlock_dentry(___dentry) - -#define lock_24kernel() lock_kernel() -#define unlock_24kernel() unlock_kernel() -#define ll_kernel_locked() (current->lock_depth >= 0) - -/* 2.4 kernels have HZ=100 on i386/x86_64, this should be reasonably safe */ -#define get_jiffies_64() (__u64)jiffies - -#ifdef HAVE_MM_INLINE -#include -#endif - -#ifndef pgoff_t -#define pgoff_t unsigned long -#endif - -#define ll_vfs_create(a,b,c,d) vfs_create(a,b,c) -#define ll_permission(inode,mask,nd) permission(inode,mask) -#define ILOOKUP(sb, ino, test, data) ilookup4(sb, ino, test, data); -#define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED -#define ll_dev_t int -#define old_encode_dev(dev) (dev) - -/* 2.5 uses hlists for some things, like the d_hash. we'll treat them - * as 2.5 and let macros drop back.. */ -#ifndef HLIST_HEAD /* until we get a kernel newer than l28 */ -#define hlist_entry list_entry -#define hlist_head list_head -#define hlist_node list_head -#define HLIST_HEAD LIST_HEAD -#define INIT_HLIST_HEAD INIT_LIST_HEAD -#define hlist_del_init list_del_init -#define hlist_add_head list_add -#endif - -#ifndef INIT_HLIST_NODE -#define INIT_HLIST_NODE(p) ((p)->next = NULL, (p)->prev = NULL) -#endif - -#ifndef hlist_for_each -#define hlist_for_each list_for_each -#endif - -#ifndef hlist_for_each_safe -#define hlist_for_each_safe list_for_each_safe -#endif - -#define KDEVT_INIT(val) (val) -#define ext3_xattr_set_handle ext3_xattr_set -#define try_module_get __MOD_INC_USE_COUNT -#define module_put __MOD_DEC_USE_COUNT -#define LTIME_S(time) (time) - -#if !defined(CONFIG_RH_2_4_20) && !defined(cpu_online) -#define cpu_online(cpu) test_bit(cpu, &(cpu_online_map)) #endif -static inline int ll_path_lookup(const char *path, unsigned flags, - struct nameidata *nd) -{ - int error = 0; - if (path_init(path, flags, nd)) - error = path_walk(path, nd); - return error; -} -#define ll_permission(inode,mask,nd) permission(inode,mask) -typedef long sector_t; - -#define ll_pgcache_lock(mapping) spin_lock(&pagecache_lock) -#define ll_pgcache_unlock(mapping) spin_unlock(&pagecache_lock) -#define ll_call_writepage(inode, page) \ - (inode)->i_mapping->a_ops->writepage(page) -#define ll_invalidate_inode_pages(inode) invalidate_inode_pages(inode) -#define ll_truncate_complete_page(page) truncate_complete_page(page) - -static inline void clear_page_dirty(struct page *page) -{ - if (PageDirty(page)) - ClearPageDirty(page); -} - -static inline int clear_page_dirty_for_io(struct page *page) -{ - struct address_space *mapping = page->mapping; - - if (page->mapping && PageDirty(page)) { - ClearPageDirty(page); - ll_pgcache_lock(mapping); - list_del(&page->list); - list_add(&page->list, &mapping->locked_pages); - ll_pgcache_unlock(mapping); - return 1; - } - return 0; -} - -static inline void ll_redirty_page(struct page *page) -{ - SetPageDirty(page); - ClearPageLaunder(page); -} - -static inline void __d_drop(struct dentry *dentry) -{ - list_del_init(&dentry->d_hash); -} - -static inline int cleanup_group_info(void) -{ - /* Get rid of unneeded supplementary groups */ - current->ngroups = 0; - memset(current->groups, 0, sizeof(current->groups)); - return 0; -} - -#ifndef HAVE_COND_RESCHED -static inline void cond_resched(void) -{ - if (unlikely(need_resched())) { - set_current_state(TASK_RUNNING); - schedule(); - } -} -#endif - -/* to find proc_dir_entry from inode. 2.6 has native one -bzzz */ -#ifndef HAVE_PDE -#define PDE(ii) ((ii)->u.generic_ip) -#endif - -#define __set_page_ll_data(page, llap) set_page_private(page, (unsigned long)llap) -#define __clear_page_ll_data(page) set_page_private(page, 0) -#define PageWriteback(page) 0 -#define CheckWriteback(page, cmd) 1 -#define set_page_writeback(page) do {} while (0) -#define end_page_writeback(page) do {} while (0) - -static inline int mapping_mapped(struct address_space *mapping) -{ - if (mapping->i_mmap_shared) - return 1; - if (mapping->i_mmap) - return 1; - return 0; -} - -#ifdef ZAP_PAGE_RANGE_VMA -#define ll_zap_page_range(vma, addr, len) zap_page_range(vma, addr, len) -#else -#define ll_zap_page_range(vma, addr, len) zap_page_range(vma->vm_mm, addr, len) -#endif - -#ifndef HAVE_PAGE_MAPPED -/* Poor man's page_mapped. substract from page count, counts from - buffers/pagecache and our own count (we are supposed to hold one reference). - What is left are user mappings and also others who work with this page now, - but there are supposedly none. */ -static inline int page_mapped(struct page *page) -{ - return page_count(page) - !!page->mapping - !!page->buffers - 1; -} -#endif /* !HAVE_PAGE_MAPPED */ - -static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry) -{ - update_atime(dentry->d_inode); -} - -static inline void file_accessed(struct file *file) -{ -#ifdef O_NOATIME - if (file->f_flags & O_NOATIME) - return; -#endif - touch_atime(file->f_vfsmnt, file->f_dentry); -} - -#ifndef typecheck -/* - * Check at compile time that something is of a particular type. - * Always evaluates to 1 so you may use it easily in comparisons. - */ -#define typecheck(type,x) \ -({ type __dummy; \ - typeof(x) __dummy2; \ - (void)(&__dummy == &__dummy2); \ - 1; \ -}) -#endif - -#endif /* end of 2.4 compat macros */ - #ifdef HAVE_PAGE_LIST static inline int mapping_has_pages(struct address_space *mapping) { @@ -497,6 +302,7 @@ ll_kern_mount(const char *fstype, int flags, const char *name, void *data) if (!type) return ERR_PTR(-ENODEV); mnt = vfs_kern_mount(type, flags, name, data); + module_put(type->owner); return mnt; } #else diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 70a4bc8..a091f45 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -92,17 +92,17 @@ struct fsfilt_operations { int pages, unsigned long *blocks, int *created, int create, struct semaphore *sem); - int (* fs_prep_san_write)(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); int (* fs_write_record)(struct file *, void *, int size, loff_t *, int force_sync); int (* fs_read_record)(struct file *, void *, int size, loff_t *); int (* fs_setup)(struct super_block *sb); int (* fs_get_op_len)(int, struct fsfilt_objinfo *, int); - int (* fs_quotacheck)(struct super_block *sb, - struct obd_quotactl *oqctl); int (* fs_quotactl)(struct super_block *sb, struct obd_quotactl *oqctl); + int (* fs_quotacheck)(struct super_block *sb, + struct obd_quotactl *oqctl); + __u64 (* fs_get_version) (struct inode *inode); + __u64 (* fs_set_version) (struct inode *inode, __u64 new_version); int (* fs_quotainfo)(struct lustre_quota_info *lqi, int type, int cmd); int (* fs_qids)(struct file *file, struct inode *inode, int type, @@ -430,13 +430,6 @@ static inline int fsfilt_map_inode_pages(struct obd_device *obd, created, create, sem); } -static inline int fs_prep_san_write(struct obd_device *obd, struct inode *inode, - long *blocks, int nblocks, loff_t newsize) -{ - return obd->obd_fsops->fs_prep_san_write(inode, blocks, - nblocks, newsize); -} - static inline int fsfilt_read_record(struct obd_device *obd, struct file *file, void *buf, loff_t size, loff_t *offs) { @@ -457,6 +450,22 @@ static inline int fsfilt_setup(struct obd_device *obd, struct super_block *fs) return 0; } +static inline __u64 fsfilt_set_version(struct obd_device *obd, + struct inode *inode, __u64 new_version) +{ + if (obd->obd_fsops->fs_set_version) + return obd->obd_fsops->fs_set_version(inode, new_version); + return -EOPNOTSUPP; +} + +static inline __u64 fsfilt_get_version(struct obd_device *obd, + struct inode *inode) +{ + if (obd->obd_fsops->fs_set_version) + return obd->obd_fsops->fs_get_version(inode); + return -EOPNOTSUPP; +} + #endif /* __KERNEL__ */ #endif diff --git a/lustre/include/linux/lustre_handles.h b/lustre/include/linux/lustre_handles.h index 21eb047..166beb3 100644 --- a/lustre/include/linux/lustre_handles.h +++ b/lustre/include/linux/lustre_handles.h @@ -10,6 +10,16 @@ #include #include #include -#endif +#include +#include +#include + +# ifdef HAVE_RCU +# include /* for rcu_head{} */ +# else +struct rcu_head { }; +# endif + +#endif /* ifdef __KERNEL__ */ #endif diff --git a/lustre/include/linux/lustre_intent.h b/lustre/include/linux/lustre_intent.h index 3d8cb2c..84629e1 100644 --- a/lustre/include/linux/lustre_intent.h +++ b/lustre/include/linux/lustre_intent.h @@ -4,14 +4,15 @@ #include #ifndef LUSTRE_KERNEL_VERSION -#define IT_OPEN (1) -#define IT_CREAT (1<<1) -#define IT_READDIR (1<<2) -#define IT_GETATTR (1<<3) -#define IT_LOOKUP (1<<4) -#define IT_UNLINK (1<<5) -#define IT_TRUNC (1<<6) -#define IT_GETXATTR (1<<7) + +#define IT_OPEN (1 << 0) +#define IT_CREAT (1 << 1) +#define IT_READDIR (1 << 2) +#define IT_GETATTR (1 << 3) +#define IT_LOOKUP (1 << 4) +#define IT_UNLINK (1 << 5) +#define IT_TRUNC (1 << 6) +#define IT_GETXATTR (1 << 7) struct lustre_intent_data { int it_disposition; @@ -30,6 +31,5 @@ struct lookup_intent { } d; }; - #endif #endif diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 9e09a06..41c670e 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -23,7 +23,6 @@ #include #include -#include #include #include @@ -50,6 +49,7 @@ enum { LPROC_LL_FSYNC, LPROC_LL_SETATTR, LPROC_LL_TRUNC, + LPROC_LL_FLOCK, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) LPROC_LL_GETATTR, @@ -60,7 +60,9 @@ enum { LPROC_LL_ALLOC_INODE, LPROC_LL_SETXATTR, LPROC_LL_GETXATTR, - + LPROC_LL_LISTXATTR, + LPROC_LL_REMOVEXATTR, + LPROC_LL_INODE_PERM, LPROC_LL_DIRECT_READ, LPROC_LL_DIRECT_WRITE, LPROC_LL_FILE_OPCODES diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index 8e4c08e..c418603 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -30,11 +30,7 @@ # endif #endif -struct ldlm_lock_desc; struct mds_obd; -struct ptlrpc_connection; -struct ptlrpc_client; -struct obd_export; struct ptlrpc_request; struct obd_device; struct ll_file_data; @@ -44,7 +40,7 @@ struct ll_file_data; struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid, struct vfsmount **mnt, int lock_mode, struct lustre_handle *lockh, - char *name, int namelen, __u64 lockpart); + __u64 lockpart); struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, struct vfsmount **mnt); int mds_update_server_data(struct obd_device *, int force_sync); diff --git a/lustre/include/linux/lustre_patchless_compat.h b/lustre/include/linux/lustre_patchless_compat.h index 6199c61..5de494e 100644 --- a/lustre/include/linux/lustre_patchless_compat.h +++ b/lustre/include/linux/lustre_patchless_compat.h @@ -1,3 +1,25 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + #ifndef LUSTRE_PATCHLESS_COMPAT_H #define LUSTRE_PATCHLESS_COMPAT_H @@ -26,10 +48,11 @@ static inline void ll_remove_from_page_cache(struct page *page) page->mapping = NULL; mapping->nrpages--; #ifdef HAVE_NR_PAGECACHE - atomic_add(-1, &nr_pagecache); // XXX pagecache_acct(-1); + atomic_add(-1, &nr_pagecache); // XXX pagecache_acct(-1); #else - __dec_zone_page_state(page, NR_FILE_PAGES); + __dec_zone_page_state(page, NR_FILE_PAGES); #endif + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)) write_unlock_irq(&mapping->tree_lock); #else @@ -46,7 +69,11 @@ truncate_complete_page(struct address_space *mapping, struct page *page) if (PagePrivate(page)) page->mapping->a_ops->invalidatepage(page, 0); +#ifdef HAVE_CANCEL_DIRTY_PAGE + cancel_dirty_page(page, PAGE_SIZE); +#else clear_page_dirty(page); +#endif ClearPageUptodate(page); ClearPageMappedToDisk(page); ll_remove_from_page_cache(page); @@ -67,7 +94,7 @@ static inline void d_rehash_cond(struct dentry * entry, int lock) } #define __d_rehash(dentry, lock) d_rehash_cond(dentry, lock) - + #define LUSTRE_PATCHLESS #ifndef ATTR_FROM_OPEN diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index 085ffb5..51d549a 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -33,6 +33,7 @@ #include #else struct group_info { /* unused */ }; +#include #endif #define LLOG_LVFS @@ -40,13 +41,14 @@ struct group_info { /* unused */ }; /* simple.c */ struct lvfs_ucred { - struct upcall_cache_entry *luc_uce; - __u32 luc_fsuid; - __u32 luc_fsgid; - __u32 luc_cap; - __u32 luc_suppgid1; - __u32 luc_suppgid2; - __u32 luc_umask; + __u32 luc_uid; + __u32 luc_gid; + __u32 luc_fsuid; + __u32 luc_fsgid; + __u32 luc_cap; + __u32 luc_umask; + struct group_info *luc_ginfo; + struct mdt_identity *luc_identity; }; struct lvfs_callback_ops { diff --git a/lustre/include/linux/lvfs_linux.h b/lustre/include/linux/lvfs_linux.h index c7b0f7d..6df9c36 100644 --- a/lustre/include/linux/lvfs_linux.h +++ b/lustre/include/linux/lvfs_linux.h @@ -64,6 +64,8 @@ struct lvfs_dentry_params __lvfs_set_rdonly(lvfs_sbdev(sb), fsfilt_journal_sbdev(obd, sb)) void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev); + int lvfs_check_rdonly(lvfs_sbdev_type dev); +void lvfs_clear_rdonly(lvfs_sbdev_type dev); #endif /* __LVFS_LINUX_H__ */ diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index 3b58632..ba0d89e 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -12,8 +12,8 @@ #ifdef __KERNEL__ # include # include -# include /* for struct task_struct, for current.h */ -# include /* for smp_lock.h */ +# include /* for struct task_struct, for current.h */ +# include /* for smp_lock.h */ # include # include # include diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index cb032a4..d210f91 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -45,9 +45,8 @@ extern void (*obd_zombie_impexp_notify)(void); /* obdo.c */ #ifdef __KERNEL__ -void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid); -void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid); void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); +void obdo_from_la(struct obdo *dst, struct lu_attr *la, obd_flag valid); void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid); void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid); #endif diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 5c466c3..be35c26 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -75,16 +75,19 @@ static inline __u32 crc32_le(__u32 crc, unsigned char const *p, size_t len) # include # include -#define OBD_FAIL_WRITE(obd, id, sb) \ -{ \ - if (OBD_FAIL_CHECK(id)) { \ - BDEVNAME_DECLARE_STORAGE(tmp); \ - CERROR("obd_fail_loc=%x, fail write operation on %s\n", \ - id, ll_bdevname(sb, tmp)); \ - lvfs_set_rdonly(obd, sb); \ - /* We set FAIL_ONCE because we never "un-fail" a device */ \ - obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE; \ - } \ +static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) +{ + if (OBD_FAIL_CHECK(id)) { +#ifdef LIBCFS_DEBUG + BDEVNAME_DECLARE_STORAGE(tmp); + CERROR("obd_fail_loc=%x, fail write operation on %s\n", + id, ll_bdevname(sb, tmp)); +#endif + /* TODO-CMD: fix getting jdev */ + __lvfs_set_rdonly(lvfs_sbdev(sb), (lvfs_sbdev_type)0); + /* We set FAIL_ONCE because we never "un-fail" a device */ + obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE; + } } #define OBD_SLEEP_ON(wq, state) wait_event_interruptible(wq, state) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index da1dc16..f9aaac7 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -164,7 +164,7 @@ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, { struct lprocfs_counter *percpu_cntr; - if (!stats) + if (stats == NULL) return; percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); atomic_inc(&percpu_cntr->lc_cntl.la_entry); @@ -186,7 +186,7 @@ static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) { struct lprocfs_counter *percpu_cntr; - if (!stats) + if (stats == NULL) return; percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); atomic_inc(&percpu_cntr->lc_cntl.la_entry); @@ -403,7 +403,7 @@ static inline void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats) { return; } static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev, - unsigned int num_private_stats) + unsigned int num_private_stats) { return 0; } static inline void lprocfs_free_obd_stats(struct obd_device *obddev) { return; } diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h new file mode 100644 index 0000000..e9761de --- /dev/null +++ b/lustre/include/lu_object.h @@ -0,0 +1,1074 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LUSTRE_LU_OBJECT_H +#define __LUSTRE_LU_OBJECT_H + +/* + * struct lu_fid + */ +#include + +#include +#include + +/* + * Layered objects support for CMD3/C5. + */ + +struct seq_file; +struct proc_dir_entry; +struct lustre_cfg; +struct lprocfs_stats; + +/* + * lu_* data-types represent server-side entities shared by data and meta-data + * stacks. + * + * Design goals: + * + * 0. support for layering. + * + * Server side object is split into layers, one per device in the + * corresponding device stack. Individual layer is represented by struct + * lu_object. Compound layered object --- by struct lu_object_header. Most + * interface functions take lu_object as an argument and operate on the + * whole compound object. This decision was made due to the following + * reasons: + * + * - it's envisaged that lu_object will be used much more often than + * lu_object_header; + * + * - we want lower (non-top) layers to be able to initiate operations + * on the whole object. + * + * Generic code supports layering more complex than simple stacking, e.g., + * it is possible that at some layer object "spawns" multiple sub-objects + * on the lower layer. + * + * 1. fid-based identification. + * + * Compound object is uniquely identified by its fid. Objects are indexed + * by their fids (hash table is used for index). + * + * 2. caching and life-cycle management. + * + * Object's life-time is controlled by reference counting. When reference + * count drops to 0, object is returned to cache. Cached objects still + * retain their identity (i.e., fid), and can be recovered from cache. + * + * Objects are kept in the global LRU list, and lu_site_purge() function + * can be used to reclaim given number of unused objects from the tail of + * the LRU. + * + * 3. avoiding recursion. + * + * Generic code tries to replace recursion through layers by iterations + * where possible. Additionally to the end of reducing stack consumption, + * data, when practically possible, are allocated through lu_context_key + * interface rather than on stack. + * + */ + +struct lu_site; +struct lu_object; +struct lu_device; +struct lu_object_header; +struct lu_context; +struct lu_env; + +/* + * Operations common for data and meta-data devices. + */ +struct lu_device_operations { + /* + * Object creation protocol. + * + * Due to design goal of avoiding recursion, object creation (see + * lu_object_alloc()) is somewhat involved: + * + * - first, ->ldo_object_alloc() method of the top-level device + * in the stack is called. It should allocate top level object + * (including lu_object_header), but without any lower-layer + * sub-object(s). + * + * - then lu_object_alloc() sets fid in the header of newly created + * object. + * + * - then ->loo_object_init() (a method from struct + * lu_object_operations) is called. It has to allocate lower-layer + * object(s). To do this, ->loo_object_init() calls + * ldo_object_alloc() of the lower-layer device(s). + * + * - for all new objects allocated by ->loo_object_init() (and + * inserted into object stack), ->loo_object_init() is called again + * repeatedly, until no new objects are created. + * + */ + + /* + * Allocate object for the given device (without lower-layer + * parts). This is called by ->loo_object_init() from the parent + * layer, and should setup at least ->lo_dev and ->lo_ops fields of + * resulting lu_object. + * + * postcondition: ergo(!IS_ERR(result), result->lo_dev == d && + * result->lo_ops != NULL); + */ + struct lu_object *(*ldo_object_alloc)(const struct lu_env *env, + const struct lu_object_header *h, + struct lu_device *d); + /* + * process config specific for device + */ + int (*ldo_process_config)(const struct lu_env *env, + struct lu_device *, struct lustre_cfg *); + int (*ldo_recovery_complete)(const struct lu_env *, + struct lu_device *); + +}; + +/* + * Type of "printer" function used by ->loo_object_print() method. + * + * Printer function is needed to provide some flexibility in (semi-)debugging + * output: possible implementations: printk, CDEBUG, sysfs/seq_file + */ +typedef int (*lu_printer_t)(const struct lu_env *env, + void *cookie, const char *format, ...) + __attribute__ ((format (printf, 3, 4))); + +/* + * Operations specific for particular lu_object. + */ +struct lu_object_operations { + + /* + * Allocate lower-layer parts of the object by calling + * ->ldo_object_alloc() of the corresponding underlying device. + * + * This method is called once for each object inserted into object + * stack. It's responsibility of this method to insert lower-layer + * object(s) it create into appropriate places of object stack. + */ + int (*loo_object_init)(const struct lu_env *env, + struct lu_object *o); + /* + * Called (in top-to-bottom order) during object allocation after all + * layers were allocated and initialized. Can be used to perform + * initialization depending on lower layers. + */ + int (*loo_object_start)(const struct lu_env *env, + struct lu_object *o); + /* + * Called before ->loo_object_free() to signal that object is being + * destroyed. Dual to ->loo_object_init(). + */ + void (*loo_object_delete)(const struct lu_env *env, + struct lu_object *o); + + /* + * Dual to ->ldo_object_alloc(). Called when object is removed from + * memory. + */ + void (*loo_object_free)(const struct lu_env *env, + struct lu_object *o); + + /* + * Called when last active reference to the object is released (and + * object returns to the cache). This method is optional. + */ + void (*loo_object_release)(const struct lu_env *env, + struct lu_object *o); + /* + * Debugging helper. Print given object. + */ + int (*loo_object_print)(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *o); + /* + * Optional debugging method. Returns true iff method is internally + * consistent. + */ + int (*loo_object_invariant)(const struct lu_object *o); +}; + +/* + * Type of lu_device. + */ +struct lu_device_type; + +/* + * Device: a layer in the server side abstraction stacking. + */ +struct lu_device { + /* + * reference count. This is incremented, in particular, on each object + * created at this layer. + * + * XXX which means that atomic_t is probably too small. + */ + atomic_t ld_ref; + /* + * Pointer to device type. Never modified once set. + */ + struct lu_device_type *ld_type; + /* + * Operation vector for this device. + */ + struct lu_device_operations *ld_ops; + /* + * Stack this device belongs to. + */ + struct lu_site *ld_site; + struct proc_dir_entry *ld_proc_entry; + + /* XXX: temporary back pointer into obd. */ + struct obd_device *ld_obd; +}; + +struct lu_device_type_operations; + +/* + * Tag bits for device type. They are used to distinguish certain groups of + * device types. + */ +enum lu_device_tag { + /* this is meta-data device */ + LU_DEVICE_MD = (1 << 0), + /* this is data device */ + LU_DEVICE_DT = (1 << 1) +}; + +/* + * Type of device. + */ +struct lu_device_type { + /* + * Tag bits. Taken from enum lu_device_tag. Never modified once set. + */ + __u32 ldt_tags; + /* + * Name of this class. Unique system-wide. Never modified once set. + */ + char *ldt_name; + /* + * Operations for this type. + */ + struct lu_device_type_operations *ldt_ops; + /* + * XXX: temporary pointer to associated obd_type. + */ + struct obd_type *ldt_obd_type; + /* + * XXX: temporary: context tags used by obd_*() calls. + */ + __u32 ldt_ctx_tags; +}; + +/* + * Operations on a device type. + */ +struct lu_device_type_operations { + /* + * Allocate new device. + */ + struct lu_device *(*ldto_device_alloc)(const struct lu_env *env, + struct lu_device_type *t, + struct lustre_cfg *lcfg); + /* + * Free device. Dual to ->ldto_device_alloc(). + */ + void (*ldto_device_free)(const struct lu_env *, + struct lu_device *); + + /* + * Initialize the devices after allocation + */ + int (*ldto_device_init)(const struct lu_env *env, + struct lu_device *, const char *, + struct lu_device *); + /* + * Finalize device. Dual to ->ldto_device_init(). Returns pointer to + * the next device in the stack. + */ + struct lu_device *(*ldto_device_fini)(const struct lu_env *env, + struct lu_device *); + + /* + * Initialize device type. This is called on module load. + */ + int (*ldto_init)(struct lu_device_type *t); + /* + * Finalize device type. Dual to ->ldto_init(). Called on module + * unload. + */ + void (*ldto_fini)(struct lu_device_type *t); +}; + +/* + * Flags for the object layers. + */ +enum lu_object_flags { + /* + * this flags is set if ->loo_object_init() has been called for this + * layer. Used by lu_object_alloc(). + */ + LU_OBJECT_ALLOCATED = (1 << 0) +}; + +/* + * Common object attributes. + */ +/* valid flags */ +enum la_valid { + LA_ATIME = 1 << 0, + LA_MTIME = 1 << 1, + LA_CTIME = 1 << 2, + LA_SIZE = 1 << 3, + LA_MODE = 1 << 4, + LA_UID = 1 << 5, + LA_GID = 1 << 6, + LA_BLOCKS = 1 << 7, + LA_TYPE = 1 << 8, + LA_FLAGS = 1 << 9, + LA_NLINK = 1 << 10, + LA_RDEV = 1 << 11, + LA_BLKSIZE = 1 << 12, +}; + +struct lu_attr { + __u64 la_size; /* size in bytes */ + __u64 la_mtime; /* modification time in seconds since Epoch */ + __u64 la_atime; /* access time in seconds since Epoch */ + __u64 la_ctime; /* change time in seconds since Epoch */ + __u64 la_blocks; /* 512-byte blocks allocated to object */ + __u32 la_mode; /* permission bits and file type */ + __u32 la_uid; /* owner id */ + __u32 la_gid; /* group id */ + __u32 la_flags; /* object flags */ + __u32 la_nlink; /* number of persistent references to this + * object */ + __u32 la_blksize; /* blk size of the object*/ + + __u32 la_rdev; /* real device */ + __u64 la_valid; /* valid bits */ +}; + + +/* + * Layer in the layered object. + */ +struct lu_object { + /* + * Header for this object. + */ + struct lu_object_header *lo_header; + /* + * Device for this layer. + */ + struct lu_device *lo_dev; + /* + * Operations for this object. + */ + struct lu_object_operations *lo_ops; + /* + * Linkage into list of all layers. + */ + struct list_head lo_linkage; + /* + * Depth. Top level layer depth is 0. + */ + int lo_depth; + /* + * Flags from enum lu_object_flags. + */ + unsigned long lo_flags; +}; + +enum lu_object_header_flags { + /* + * Don't keep this object in cache. Object will be destroyed as soon + * as last reference to it is released. This flag cannot be cleared + * once set. + */ + LU_OBJECT_HEARD_BANSHEE = 0 +}; + +enum lu_object_header_attr { + LOHA_EXISTS = 1 << 0, + LOHA_REMOTE = 1 << 1, + /* + * UNIX file type is stored in S_IFMT bits. + */ + LOHA_FT_START = 1 << 12, /* S_IFIFO */ + LOHA_FT_END = 1 << 15, /* S_IFREG */ +}; + +/* + * "Compound" object, consisting of multiple layers. + * + * Compound object with given fid is unique with given lu_site. + * + * Note, that object does *not* necessary correspond to the real object in the + * persistent storage: object is an anchor for locking and method calling, so + * it is created for things like not-yet-existing child created by mkdir or + * create calls. ->loo_exists() can be used to check whether object is backed + * by persistent storage entity. + */ +struct lu_object_header { + /* + * Object flags from enum lu_object_header_flags. Set and checked + * atomically. + */ + unsigned long loh_flags; + /* + * Object reference count. Protected by site guard lock. + */ + atomic_t loh_ref; + /* + * Fid, uniquely identifying this object. + */ + struct lu_fid loh_fid; + /* + * Common object attributes, cached for efficiency. From enum + * lu_object_header_attr. + */ + __u32 loh_attr; + /* + * Linkage into per-site hash table. Protected by site guard lock. + */ + struct hlist_node loh_hash; + /* + * Linkage into per-site LRU list. Protected by site guard lock. + */ + struct list_head loh_lru; + /* + * Linkage into list of layers. Never modified once set (except lately + * during object destruction). No locking is necessary. + */ + struct list_head loh_layers; +}; + +struct fld; + +/* + * lu_site is a "compartment" within which objects are unique, and LRU + * discipline is maintained. + * + * lu_site exists so that multiple layered stacks can co-exist in the same + * address space. + * + * lu_site has the same relation to lu_device as lu_object_header to + * lu_object. + */ +struct lu_site { + /* + * lock protecting: + * + * - ->ls_hash hash table (and its linkages in objects); + * + * - ->ls_lru list (and its linkages in objects); + * + * - 0/1 transitions of object ->loh_ref reference count; + * + * yes, it's heavy. + */ + rwlock_t ls_guard; + /* + * Hash-table where objects are indexed by fid. + */ + struct hlist_head *ls_hash; + /* + * Bit-mask for hash-table size. + */ + int ls_hash_mask; + /* + * Order of hash-table. + */ + int ls_hash_bits; + /* + * Number of buckets in the hash-table. + */ + int ls_hash_size; + + /* + * LRU list, updated on each access to object. Protected by + * ->ls_guard. + * + * "Cold" end of LRU is ->ls_lru.next. Accessed object are moved to + * the ->ls_lru.prev (this is due to the non-existence of + * list_for_each_entry_safe_reverse()). + */ + struct list_head ls_lru; + /* + * Total number of objects in this site. Protected by ->ls_guard. + */ + unsigned ls_total; + /* + * Total number of objects in this site with reference counter greater + * than 0. Protected by ->ls_guard. + */ + unsigned ls_busy; + + /* + * Top-level device for this stack. + */ + struct lu_device *ls_top_dev; + /* + * mds number of this site. + */ + mdsno_t ls_node_id; + /* + * Fid location database + */ + struct lu_server_fld *ls_server_fld; + struct lu_client_fld *ls_client_fld; + + /* + * Server Seq Manager + */ + struct lu_server_seq *ls_server_seq; + + /* + * Controller Seq Manager + */ + struct lu_server_seq *ls_control_seq; + struct obd_export *ls_control_exp; + + /* + * Client Seq Manager + */ + struct lu_client_seq *ls_client_seq; + + /* statistical counters. Protected by nothing, races are accepted. */ + struct { + __u32 s_created; + __u32 s_cache_hit; + __u32 s_cache_miss; + /* + * Number of hash-table entry checks made. + * + * ->s_cache_check / (->s_cache_miss + ->s_cache_hit) + * + * is an average number of hash slots inspected during single + * lookup. + */ + __u32 s_cache_check; + /* raced cache insertions */ + __u32 s_cache_race; + __u32 s_lru_purged; + } ls_stats; + + /* + * Linkage into global list of sites. + */ + struct list_head ls_linkage; + struct lprocfs_stats *ls_time_stats; +}; + +/* + * Constructors/destructors. + */ + +/* + * Initialize site @s, with @d as the top level device. + */ +int lu_site_init(struct lu_site *s, struct lu_device *d); +/* + * Finalize @s and release its resources. + */ +void lu_site_fini(struct lu_site *s); + +/* + * Called when initialization of stack for this site is completed. + */ +int lu_site_init_finish(struct lu_site *s); + +/* + * Acquire additional reference on device @d + */ +void lu_device_get(struct lu_device *d); +/* + * Release reference on device @d. + */ +void lu_device_put(struct lu_device *d); + +/* + * Initialize device @d of type @t. + */ +int lu_device_init(struct lu_device *d, struct lu_device_type *t); +/* + * Finalize device @d. + */ +void lu_device_fini(struct lu_device *d); + +/* + * Initialize compound object. + */ +int lu_object_header_init(struct lu_object_header *h); +/* + * Finalize compound object. + */ +void lu_object_header_fini(struct lu_object_header *h); + +/* + * Initialize object @o that is part of compound object @h and was created by + * device @d. + */ +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d); +/* + * Finalize object and release its resources. + */ +void lu_object_fini(struct lu_object *o); +/* + * Add object @o as first layer of compound object @h. + * + * This is typically called by the ->ldo_object_alloc() method of top-level + * device. + */ +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); +/* + * Add object @o as a layer of compound object, going after @before.1 + * + * This is typically called by the ->ldo_object_alloc() method of + * @before->lo_dev. + */ +void lu_object_add(struct lu_object *before, struct lu_object *o); + +/* + * Caching and reference counting. + */ + +/* + * Acquire additional reference to the given object. This function is used to + * attain additional reference. To acquire initial reference use + * lu_object_find(). + */ +static inline void lu_object_get(struct lu_object *o) +{ + LASSERT(atomic_read(&o->lo_header->loh_ref) > 0); + atomic_inc(&o->lo_header->loh_ref); +} + +/* + * Return true of object will not be cached after last reference to it is + * released. + */ +static inline int lu_object_is_dying(const struct lu_object_header *h) +{ + return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); +} + +/* + * Decrease reference counter on object. If last reference is freed, return + * object to the cache, unless lu_object_is_dying(o) holds. In the latter + * case, free object immediately. + */ +void lu_object_put(const struct lu_env *env, + struct lu_object *o); + +/* + * Free @nr objects from the cold end of the site LRU list. + */ +int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr); + +/* + * Print all objects in @s. + */ +void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie, + lu_printer_t printer); +/* + * Search cache for an object with the fid @f. If such object is found, return + * it. Otherwise, create new object, insert it into cache and return it. In + * any case, additional reference is acquired on the returned object. + */ +struct lu_object *lu_object_find(const struct lu_env *env, + struct lu_site *s, const struct lu_fid *f); + +/* + * Helpers. + */ + +/* + * First (topmost) sub-object of given compound object + */ +static inline struct lu_object *lu_object_top(struct lu_object_header *h) +{ + LASSERT(!list_empty(&h->loh_layers)); + return container_of0(h->loh_layers.next, struct lu_object, lo_linkage); +} + +/* + * Next sub-object in the layering + */ +static inline struct lu_object *lu_object_next(const struct lu_object *o) +{ + return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage); +} + +/* + * Pointer to the fid of this object. + */ +static inline const struct lu_fid *lu_object_fid(const struct lu_object *o) +{ + return &o->lo_header->loh_fid; +} + +/* + * return device operations vector for this object + */ +static inline struct lu_device_operations * +lu_object_ops(const struct lu_object *o) +{ + return o->lo_dev->ld_ops; +} + +/* + * Given a compound object, find its slice, corresponding to the device type + * @dtype. + */ +struct lu_object *lu_object_locate(struct lu_object_header *h, + struct lu_device_type *dtype); + +struct lu_cdebug_print_info { + int lpi_subsys; + int lpi_mask; + const char *lpi_file; + const char *lpi_fn; + int lpi_line; +}; + +/* + * Printer function emitting messages through libcfs_debug_msg(). + */ +int lu_cdebug_printer(const struct lu_env *env, + void *cookie, const char *format, ...); + +#define DECLARE_LU_CDEBUG_PRINT_INFO(var, mask) \ + struct lu_cdebug_print_info var = { \ + .lpi_subsys = DEBUG_SUBSYSTEM, \ + .lpi_mask = (mask), \ + .lpi_file = __FILE__, \ + .lpi_fn = __FUNCTION__, \ + .lpi_line = __LINE__ \ + }; + +/* + * Print object description followed by user-supplied message. + */ +#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ +({ \ + static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask); \ + \ + lu_object_print(env, &__info, lu_cdebug_printer, object); \ + CDEBUG(mask, format , ## __VA_ARGS__); \ +}) + +/* + * Print human readable representation of the @o to the @f. + */ +void lu_object_print(const struct lu_env *env, void *cookie, + lu_printer_t printer, const struct lu_object *o); + +/* + * Check object consistency. + */ +int lu_object_invariant(const struct lu_object *o); + +/* + * Returns 1 iff object @o exists on the stable storage, + * returns -1 iff object @o is on remote server. + */ +static inline int lu_object_exists(const struct lu_object *o) +{ + __u32 attr; + + attr = o->lo_header->loh_attr; + if (attr & LOHA_REMOTE) + return -1; + else if (attr & LOHA_EXISTS) + return +1; + else + return 0; +} + +static inline int lu_object_assert_exists(const struct lu_object *o) +{ + return lu_object_exists(o) != 0; +} + +static inline int lu_object_assert_not_exists(const struct lu_object *o) +{ + return lu_object_exists(o) <= 0; +} + +/* + * Attr of this object. + */ +static inline const __u32 lu_object_attr(const struct lu_object *o) +{ + LASSERT(lu_object_exists(o) > 0); + return o->lo_header->loh_attr; +} + +struct lu_rdpg { + /* input params, should be filled out by mdt */ + __u32 rp_hash; /* hash */ + int rp_count; /* count in bytes */ + int rp_npages; /* number of pages */ + struct page **rp_pages; /* pointers to pages */ +}; + +enum lu_xattr_flags { + LU_XATTR_REPLACE = (1 << 0), + LU_XATTR_CREATE = (1 << 1) +}; + +/* + * lu_context. Execution context for lu_object methods. Currently associated + * with thread. + * + * All lu_object methods, except device and device type methods (called during + * system initialization and shutdown) are executed "within" some + * lu_context. This means, that pointer to some "current" lu_context is passed + * as an argument to all methods. + * + * All service ptlrpc threads create lu_context as part of their + * initialization. It is possible to create "stand-alone" context for other + * execution environments (like system calls). + * + * lu_object methods mainly use lu_context through lu_context_key interface + * that allows each layer to associate arbitrary pieces of data with each + * context (see pthread_key_create(3) for similar interface). + * + */ +struct lu_context { + /* + * Theoretically we'd want to use lu_objects and lu_contexts on the + * client side too. On the other hand, we don't want to allocate + * values of server-side keys for the client contexts and vice versa. + * + * To achieve this, set of tags in introduced. Contexts and keys are + * marked with tags. Key value are created only for context whose set + * of tags has non-empty intersection with one for key. Tags are taken + * from enum lu_context_tag. + */ + __u32 lc_tags; + /* + * Pointer to the home service thread. NULL for other execution + * contexts. + */ + struct ptlrpc_thread *lc_thread; + /* + * Pointer to an array with key values. Internal implementation + * detail. + */ + void **lc_value; +}; + +/* + * lu_context_key interface. Similar to pthread_key. + */ + +enum lu_context_tag { + /* + * Thread on md server + */ + LCT_MD_THREAD = 1 << 0, + /* + * Thread on dt server + */ + LCT_DT_THREAD = 1 << 1, + /* + * Context for transaction handle + */ + LCT_TX_HANDLE = 1 << 2, + /* + * Thread on client + */ + LCT_CL_THREAD = 1 << 3, + /* + * Per-request session on server + */ + LCT_SESSION = 1 << 4, + /* + * Don't add references for modules creating key values in that context. + * This is only for contexts used internally by lu_object framework. + */ + LCT_NOREF = 1 << 30, + /* + * Contexts usable in cache shrinker thread. + */ + LCT_SHRINKER = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF +}; + +/* + * Key. Represents per-context value slot. + */ +struct lu_context_key { + /* + * Set of tags for which values of this key are to be instantiated. + */ + __u32 lct_tags; + /* + * Value constructor. This is called when new value is created for a + * context. Returns pointer to new value of error pointer. + */ + void *(*lct_init)(const struct lu_context *ctx, + struct lu_context_key *key); + /* + * Value destructor. Called when context with previously allocated + * value of this slot is destroyed. @data is a value that was returned + * by a matching call to ->lct_init(). + */ + void (*lct_fini)(const struct lu_context *ctx, + struct lu_context_key *key, void *data); + /* + * Optional method called on lu_context_exit() for all allocated + * keys. Can be used by debugging code checking that locks are + * released, etc. + */ + void (*lct_exit)(const struct lu_context *ctx, + struct lu_context_key *key, void *data); + /* + * Internal implementation detail: index within ->lc_value[] reserved + * for this key. + */ + int lct_index; + /* + * Internal implementation detail: number of values created for this + * key. + */ + atomic_t lct_used; + /* + * Internal implementation detail: module for this key. + */ + struct module *lct_owner; +}; + +#define LU_CONTEXT_KEY_INIT(key) \ +do { \ + (key)->lct_owner = THIS_MODULE; \ +} while (0) + +/* + * Register new key. + */ +int lu_context_key_register(struct lu_context_key *key); +/* + * Deregister key. + */ +void lu_context_key_degister(struct lu_context_key *key); +/* + * Return value associated with key @key in context @ctx. + */ +void *lu_context_key_get(const struct lu_context *ctx, + struct lu_context_key *key); + +/* + * Initialize context data-structure. Create values for all keys. + */ +int lu_context_init(struct lu_context *ctx, __u32 tags); +/* + * Finalize context data-structure. Destroy key values. + */ +void lu_context_fini(struct lu_context *ctx); + +/* + * Called before entering context. + */ +void lu_context_enter(struct lu_context *ctx); +/* + * Called after exiting from @ctx + */ +void lu_context_exit(struct lu_context *ctx); + +/* + * Allocate for context all missing keys that were registered after context + * creation. + */ +int lu_context_refill(const struct lu_context *ctx); + +/* + * Environment. + */ +struct lu_env { + /* + * "Local" context, used to store data instead of stack. + */ + struct lu_context le_ctx; + /* + * "Session" context for per-request data. + */ + struct lu_context *le_ses; +}; + +int lu_env_init(struct lu_env *env, struct lu_context *ses, __u32 tags); +void lu_env_fini(struct lu_env *env); + +/* + * Common name structure to be passed around for various name related methods. + */ +struct lu_name { + char *ln_name; + int ln_namelen; +}; + +/* + * Common buffer structure to be passed around for various xattr_{s,g}et() + * methods. + */ +struct lu_buf { + void *lb_buf; + ssize_t lb_len; +}; + +extern struct lu_buf LU_BUF_NULL; /* null buffer */ + +#define DLUBUF "(%p %z)" +#define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len +/* + * One-time initializers, called at obdclass module initialization, not + * exported. + */ + +/* + * Initialization of global lu_* data. + */ +int lu_global_init(void); + +/* + * Dual to lu_global_init(). + */ +void lu_global_fini(void); + +enum { + LU_TIME_FIND_LOOKUP, + LU_TIME_FIND_ALLOC, + LU_TIME_FIND_INSERT, + LU_TIME_NR +}; + +extern const char *lu_time_names[LU_TIME_NR]; + +#endif /* __LUSTRE_LU_OBJECT_H */ diff --git a/lustre/include/lu_time.h b/lustre/include/lu_time.h new file mode 100644 index 0000000..e5d3eda --- /dev/null +++ b/lustre/include/lu_time.h @@ -0,0 +1,47 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LUSTRE_LU_TIME_H +#define __LUSTRE_LU_TIME_H + +struct lprocfs_stats; +struct lu_env; + +int lu_time_global_init(void); +void lu_time_global_fini(void); + +int lu_time_named_init(struct lprocfs_stats **stats, const char *name, + cfs_proc_dir_entry_t *entry, + const char **names, int nr); +int lu_time_init(struct lprocfs_stats **stats, + cfs_proc_dir_entry_t *entry, + const char **names, int nr); +void lu_time_fini(struct lprocfs_stats **stats); + +void lu_lprocfs_time_start(const struct lu_env *env); +void lu_lprocfs_time_end(const struct lu_env *env, + struct lprocfs_stats *stats, int idx); + +int lu_time_is_clean(const struct lu_env *env); + +#endif /* __LUSTRE_LU_TIME_H */ diff --git a/lustre/include/lustre/Makefile.am b/lustre/include/lustre/Makefile.am index 0acd90f..09b9290 100644 --- a/lustre/include/lustre/Makefile.am +++ b/lustre/include/lustre/Makefile.am @@ -4,7 +4,7 @@ # See the file COPYING in this distribution if UTILS -pkginclude_HEADERS = lustre_idl.h lustre_user.h liblustreapi.h types.h +pkginclude_HEADERS = lustre_idl.h lustre_user.h liblustreapi.h libiam.h types.h endif -EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h types.h +EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h libiam.h types.h diff --git a/lustre/include/lustre/libiam.h b/lustre/include/lustre/libiam.h new file mode 100644 index 0000000..5196388 --- /dev/null +++ b/lustre/include/lustre/libiam.h @@ -0,0 +1,127 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * libiam.h + * iam user level library + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Wang Di + * Author: Nikita Danilov + * Author: Fan Yong + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +/* + * lustre/libiam.h + */ + +#ifndef __IAM_ULIB_H__ +#define __IAM_ULIB_H__ + + +#define DX_FMT_NAME_LEN 16 + +enum iam_fmt_t { + FMT_LFIX, + FMT_LVAR +}; + +struct iam_uapi_info { + __u16 iui_keysize; + __u16 iui_recsize; + __u16 iui_ptrsize; + __u16 iui_height; + char iui_fmt_name[DX_FMT_NAME_LEN]; +}; + +/* + * Creat an iam file, but do NOT open it. + * Return 0 if success, else -1. + */ +int iam_creat(char *filename, enum iam_fmt_t fmt, + int blocksize, int keysize, int recsize, int ptrsize); + +/* + * Open an iam file, but do NOT creat it if the file doesn't exist. + * Please use iam_creat for creating the file before use iam_open. + * Return file id (fd) if success, else -1. + */ +int iam_open(char *filename, struct iam_uapi_info *ua); + +/* + * Close file opened by iam_open. + */ +int iam_close(int fd); + +/* + * Please use iam_open before use this function. + */ +int iam_insert(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *keybuf, + int rec_need_convert, char *recbuf); + +/* + * Please use iam_open before use this function. + */ +int iam_lookup(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int *keysize, char *save_key, + int rec_need_convert, char *rec_buf, + int *recsize, char *save_rec); + +/* + * Please use iam_open before use this function. + */ +int iam_delete(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *keybuf, + int rec_need_convert, char *recbuf); + +/* + * Please use iam_open before use this function. + */ +int iam_it_start(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int *keysize, char *save_key, + int rec_need_convert, char *rec_buf, + int *recsize, char *save_rec); + +/* + * Please use iam_open before use this function. + */ +int iam_it_next(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int *keysize, char *save_key, + int rec_need_convert, char *rec_buf, + int *recsize, char *save_rec); + +/* + * Please use iam_open before use this function. + */ +int iam_it_stop(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *keybuf, + int rec_need_convert, char *recbuf); + +/* + * Change iam file mode. + */ +int iam_polymorph(char *filename, unsigned long mode); + + +#endif diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index ea1c9d0..5371b87 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -17,7 +17,7 @@ extern int llapi_file_create(const char *name, unsigned long stripe_size, extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum); #define HAVE_LLAPI_FILE_LOOKUP extern int llapi_file_lookup(int dirfd, const char *name); - + struct find_param { unsigned int maxdepth; time_t atime; @@ -67,4 +67,6 @@ extern int llapi_quotacheck(char *mnt, int check_type); extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk); extern int llapi_quotactl(char *mnt, struct if_quotactl *qctl); extern int llapi_target_iterate(int type_num, char **obd_type, void *args, llapi_cb_t cb); +extern int llapi_getfacl(char *fname, char *cmd); +extern int llapi_setfacl(char *fname, char *cmd); #endif diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 38acc4d..86c6321 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -5,6 +5,11 @@ * * Lustre wire protocol definitions. * + * + * We assume all nodes are either little-endian or big-endian, and we + * always send messages in the sender's native format. The receiver + * detects the message format by checking the 'magic' field of the message + * (see lustre_msg_swabbed() below). * ALL structs passing over the wire should be declared here. Structs * that are used in interfaces with userspace should go in lustre_user.h. * @@ -31,11 +36,6 @@ * in the code to ensure that new/old clients that see this larger struct * do not fail, otherwise you need to implement protocol compatibility). * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines, * implemented either here, inline (trivial implementations) or in * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other" @@ -52,6 +52,8 @@ #ifndef _LUSTRE_IDL_H_ #define _LUSTRE_IDL_H_ +#include + #if defined(__linux__) #include #elif defined(__APPLE__) @@ -66,14 +68,6 @@ #include /* - * this file contains all data structures used in Lustre interfaces: - * - obdo and obd_request records - * - mds_request records - * - ldlm data - * - ioctl's - */ - -/* * GENERAL STUFF */ /* FOO_REQUEST_PORTAL is for incoming requests on the FOO @@ -104,11 +98,16 @@ //#define PTLBD_BULK_PORTAL 21 #define MDS_SETATTR_PORTAL 22 #define MDS_READPAGE_PORTAL 23 +#define MDS_MDS_PORTAL 24 #define MGC_REPLY_PORTAL 25 #define MGS_REQUEST_PORTAL 26 #define MGS_REPLY_PORTAL 27 #define OST_REQUEST_PORTAL 28 +#define FLD_REQUEST_PORTAL 29 +#define SEQ_METADATA_PORTAL 30 +#define SEQ_DATA_PORTAL 31 +#define SEQ_CONTROLLER_PORTAL 32 #define SVC_KILLED 1 #define SVC_EVENT 2 @@ -140,6 +139,243 @@ #define LUSTRE_LOG_VERSION 0x00050000 #define LUSTRE_MGS_VERSION 0x00060000 +typedef __u64 mdsno_t; +typedef __u64 seqno_t; + +struct lu_range { + __u64 lr_start; + __u64 lr_end; +}; + +static inline __u64 range_space(struct lu_range *r) +{ + return r->lr_end - r->lr_start; +} + +static inline void range_zero(struct lu_range *r) +{ + r->lr_start = r->lr_end = 0; +} + +static inline int range_within(struct lu_range *r, + __u64 s) +{ + return s >= r->lr_start && s < r->lr_end; +} + +static inline void range_alloc(struct lu_range *r, + struct lu_range *s, + __u64 w) +{ + r->lr_start = s->lr_start; + r->lr_end = s->lr_start + w; + s->lr_start += w; +} + +static inline int range_is_sane(struct lu_range *r) +{ + return (r->lr_end >= r->lr_start); +} + +static inline int range_is_zero(struct lu_range *r) +{ + return (r->lr_start == 0 && r->lr_end == 0); +} + +static inline int range_is_exhausted(struct lu_range *r) +{ + return range_space(r) == 0; +} + +#define DRANGE "[%#16.16"LPF64"x-%#16.16"LPF64"x]" + +#define PRANGE(range) \ + (range)->lr_start, \ + (range)->lr_end + +struct lu_fid { + __u64 f_seq; /* holds fid sequence. Lustre should support 2 ^ 64 + * objects, thus even if one sequence has one object we + * reach this value. */ + __u32 f_oid; /* fid number within its sequence. */ + __u32 f_ver; /* holds fid version. */ +}; + +/* + * fid constants + */ +enum { + LUSTRE_ROOT_FID_SEQ = 1ULL, /* XXX: should go into mkfs. */ + + /* initial fid id value */ + LUSTRE_FID_INIT_OID = 1UL +}; + +/* get object sequence */ +static inline __u64 fid_seq(const struct lu_fid *fid) +{ + return fid->f_seq; +} + +/* get object id */ +static inline __u32 fid_oid(const struct lu_fid *fid) +{ + return fid->f_oid; +} + +/* get object version */ +static inline __u32 fid_ver(const struct lu_fid *fid) +{ + return fid->f_ver; +} + +static inline int fid_seq_is_sane(__u64 seq) +{ + return seq != 0; +} + +static inline void fid_zero(struct lu_fid *fid) +{ + memset(fid, 0, sizeof(*fid)); +} + +static inline int fid_is_igif(const struct lu_fid *fid) +{ + return fid_seq(fid) == LUSTRE_ROOT_FID_SEQ; +} + +#define DFID "[0x%16.16"LPF64"x/0x%8.8x:0x%8.8x]" + +#define PFID(fid) \ + fid_seq(fid), \ + fid_oid(fid), \ + fid_ver(fid) + +#ifdef __KERNEL__ +static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); + dst->f_seq = cpu_to_be64(fid_seq(src)); + dst->f_oid = cpu_to_be32(fid_oid(src)); + dst->f_ver = cpu_to_be32(fid_ver(src)); +} + +static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = be64_to_cpu(fid_seq(src)); + dst->f_oid = be32_to_cpu(fid_oid(src)); + dst->f_ver = be32_to_cpu(fid_ver(src)); + LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); +} + +/* + * Storage representation for fids. + * + * Variable size, first byte contains the length of the whole record. + */ + +struct lu_fid_pack { + char fp_len; + char fp_area[sizeof(struct lu_fid)]; +}; + +void fid_pack(struct lu_fid_pack *pack, const struct lu_fid *fid, + struct lu_fid *befider); +void fid_unpack(const struct lu_fid_pack *pack, struct lu_fid *fid); + +/* __KERNEL__ */ +#endif + +static inline int fid_is_sane(const struct lu_fid *fid) +{ + return + fid != NULL && + ((fid_seq_is_sane(fid_seq(fid)) && fid_oid(fid) != 0 + && fid_ver(fid) == 0) || + fid_is_igif(fid)); +} + +static inline int fid_is_zero(const struct lu_fid *fid) +{ + return fid_seq(fid) == 0 && fid_oid(fid) == 0; +} + +extern void lustre_swab_lu_fid(struct lu_fid *fid); +extern void lustre_swab_lu_range(struct lu_range *range); + +static inline int lu_fid_eq(const struct lu_fid *f0, + const struct lu_fid *f1) +{ + /* Check that there is no alignment padding. */ + CLASSERT(sizeof *f0 == + sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver); + LASSERTF(fid_is_igif(f0) || fid_ver(f0) == 0, DFID, PFID(f0)); + LASSERTF(fid_is_igif(f1) || fid_ver(f1) == 0, DFID, PFID(f1)); + return memcmp(f0, f1, sizeof *f0) == 0; +} + +/* + * Layout of readdir pages, as transmitted on wire. + */ +struct lu_dirent { + struct lu_fid lde_fid; + __u32 lde_hash; + __u16 lde_reclen; + __u16 lde_namelen; + char lde_name[0]; +}; + +struct lu_dirpage { + __u32 ldp_hash_start; + __u32 ldp_hash_end; + __u16 ldp_flags; + __u32 ldp_pad0; + struct lu_dirent ldp_entries[0]; +}; + +enum lu_dirpage_flags { + LDF_EMPTY = 1 << 0 +}; + +static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp) +{ + if (le16_to_cpu(dp->ldp_flags) & LDF_EMPTY) + return NULL; + else + return dp->ldp_entries; +} + +static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent) +{ + struct lu_dirent *next; + + if (le16_to_cpu(ent->lde_reclen) != 0) + next = ((void *)ent) + le16_to_cpu(ent->lde_reclen); + else + next = NULL; + + return next; +} + +static inline int lu_dirent_size(struct lu_dirent *ent) +{ + if (le16_to_cpu(ent->lde_reclen) == 0) { + return (sizeof(*ent) + + le16_to_cpu(ent->lde_namelen) + 3) & ~3; + } + return le16_to_cpu(ent->lde_reclen); +} + +#define DIR_END_OFF 0xfffffffeUL + struct lustre_handle { __u64 cookie; }; @@ -195,7 +431,7 @@ struct lustre_msg_v2 { __u32 lm_buflens[0]; }; -/* without security, ptlrpc_body is put in the first buffer. */ +/* without gss, ptlrpc_body is put at the first buffer. */ struct ptlrpc_body { struct lustre_handle pb_handle; __u32 pb_type; @@ -247,6 +483,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define MSG_LAST_REPLAY 1 #define MSG_RESENT 2 #define MSG_REPLAY 4 +#define MSG_REQ_REPLAY_DONE 8 +#define MSG_LOCK_REPLAY_DONE 16 /* * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT) @@ -259,41 +497,48 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define MSG_CONNECT_LIBCLIENT 0x10 #define MSG_CONNECT_INITIAL 0x20 #define MSG_CONNECT_ASYNC 0x40 -#define MSG_CONNECT_NEXT_VER 0x80 /* use next version of lustre_msg */ +#define MSG_CONNECT_NEXT_VER 0x80 /* use next version of lustre_msg */ +#define MSG_CONNECT_TRANSNO 0x100 /* report transno */ /* Connect flags */ -#define OBD_CONNECT_RDONLY 0x1ULL /* client allowed read-only access */ -#define OBD_CONNECT_INDEX 0x2ULL /* connect to specific LOV idx */ -#define OBD_CONNECT_GRANT 0x8ULL /* OSC acquires grant at connect */ -#define OBD_CONNECT_SRVLOCK 0x10ULL /* server takes locks for client */ -#define OBD_CONNECT_VERSION 0x20ULL /* Server supports versions in ocd */ -#define OBD_CONNECT_REQPORTAL 0x40ULL /* Separate portal for non-IO reqs */ -#define OBD_CONNECT_ACL 0x80ULL /* client uses access control lists */ -#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/ -#define OBD_CONNECT_CROW 0x200ULL /* MDS+OST create objects on write */ -#define OBD_CONNECT_TRUNCLOCK 0x400ULL /* locks on server for punch b=9528 */ -#define OBD_CONNECT_TRANSNO 0x800ULL /* replay sends initial transno */ -#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */ -#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */ -#define OBD_CONNECT_ATTRFID 0x4000ULL /* Server supports GetAttr By Fid */ -#define OBD_CONNECT_NODEVOH 0x8000ULL /* No open handle for special nodes */ -#define OBD_CONNECT_LCL_CLIENT 0x10000ULL /* local 1.8 client */ -#define OBD_CONNECT_RMT_CLIENT 0x20000ULL /* Remote 1.8 client */ -#define OBD_CONNECT_BRW_SIZE 0x40000ULL /* Max bytes per rpc */ -#define OBD_CONNECT_QUOTA64 0x80000ULL /* 64bit qunit_data.qd_count b=10707*/ -#define OBD_CONNECT_FID_CAPA 0x100000ULL /* fid capability */ -#define OBD_CONNECT_OSS_CAPA 0x200000ULL /* OSS capability */ +#define OBD_CONNECT_RDONLY 0x00000001ULL /* client allowed read-only access */ +#define OBD_CONNECT_INDEX 0x00000002ULL /* connect to specific LOV idx */ +#define OBD_CONNECT_GRANT 0x00000008ULL /* OSC acquires grant at connect */ +#define OBD_CONNECT_SRVLOCK 0x00000010ULL /* server takes locks for client */ +#define OBD_CONNECT_VERSION 0x00000020ULL /* Server supports versions in ocd */ +#define OBD_CONNECT_REQPORTAL 0x00000040ULL /* Separate portal for non-IO reqs */ +#define OBD_CONNECT_ACL 0x00000080ULL /* client uses access control lists */ +#define OBD_CONNECT_XATTR 0x00000100ULL /* client using extended attributes*/ +#define OBD_CONNECT_TRUNCLOCK 0x00000400ULL /* locks on server for punch b=9528 */ +#define OBD_CONNECT_IBITS 0x00001000ULL /* support for inodebits locks */ +#define OBD_CONNECT_JOIN 0x00002000ULL /* files can be concatenated */ +#define OBD_CONNECT_ATTRFID 0x00004000ULL /* Server supports GetAttr By Fid */ +#define OBD_CONNECT_NODEVOH 0x00008000ULL /* No open handle for special nodes */ +#define OBD_CONNECT_LCL_CLIENT 0x00010000ULL /* local 1.8 client */ +#define OBD_CONNECT_RMT_CLIENT 0x00020000ULL /* Remote 1.8 client */ +#define OBD_CONNECT_BRW_SIZE 0x00040000ULL /* Max bytes per rpc */ +#define OBD_CONNECT_QUOTA64 0x00080000ULL /* 64bit qunit_data.qd_count b=10707*/ +#define OBD_CONNECT_MDS_CAPA 0x00100000ULL /* MDS capability */ +#define OBD_CONNECT_OSS_CAPA 0x00200000ULL /* OSS capability */ +#define OBD_CONNECT_MDS_MDS 0x00400000ULL /* MDS-MDS connection*/ +#define OBD_CONNECT_SOM 0x00800000ULL /* SOM feature */ +#define OBD_CONNECT_REAL 0x00000200ULL /* real connection */ /* also update obd_connect_names[] for lprocfs_rd_connect_flags() * and lustre/utils/wirecheck.c */ -#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ +#define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ - OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID) + OBD_CONNECT_NODEVOH |/* OBD_CONNECT_ATTRFID |*/\ + OBD_CONNECT_LCL_CLIENT | \ + OBD_CONNECT_RMT_CLIENT | \ + OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \ + OBD_CONNECT_MDS_MDS) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ - OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64) + OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64 | \ + OBD_CONNECT_OSS_CAPA) #define ECHO_CONNECT_SUPPORTED (0) #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION) @@ -311,18 +556,19 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); * If we eventually have separate connect data for different types, which we * almost certainly will, then perhaps we stick a union in here. */ struct obd_connect_data { - __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ - __u32 ocd_version; /* lustre release version number */ - __u32 ocd_grant; /* initial cache grant amount (bytes) */ - __u32 ocd_index; /* LOV index to connect to */ - __u32 ocd_brw_size; /* Maximum BRW size in bytes */ - __u64 ocd_ibits_known; /* inode bits this client understands */ - __u32 ocd_nllu; /* non-local-lustre-user */ - __u32 ocd_nllg; /* non-local-lustre-group */ - __u64 padding1; /* also fix lustre_swab_connect */ - __u64 padding2; /* also fix lustre_swab_connect */ - __u64 padding3; /* also fix lustre_swab_connect */ - __u64 padding4; /* also fix lustre_swab_connect */ + __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ + __u32 ocd_version; /* lustre release version number */ + __u32 ocd_grant; /* initial cache grant amount (bytes) */ + __u32 ocd_index; /* LOV index to connect to */ + __u32 ocd_brw_size; /* Maximum BRW size in bytes */ + __u64 ocd_ibits_known; /* inode bits this client understands */ + __u32 ocd_nllu; /* non-local-lustre-user */ + __u32 ocd_nllg; /* non-local-lustre-group */ + __u64 ocd_transno; /* first transno from client to be replayed */ + __u32 ocd_group; /* MDS group on OST */ + __u32 padding1; /* also fix lustre_swab_connect */ + __u64 padding2; /* also fix lustre_swab_connect */ + __u64 padding3; /* also fix lustre_swab_connect */ }; extern void lustre_swab_connect(struct obd_connect_data *ocd); @@ -347,8 +593,6 @@ typedef enum { OST_OPEN = 11, OST_CLOSE = 12, OST_STATFS = 13, -/* OST_SAN_READ = 14, deprecated */ -/* OST_SAN_WRITE = 15, deprecated */ OST_SYNC = 16, OST_SET_INFO = 17, OST_QUOTACHECK = 18, @@ -380,18 +624,19 @@ typedef uint32_t obd_count; #define OBD_FL_DEBUG_CHECK (0x00000040) /* echo client/server debug check */ #define OBD_FL_NO_USRQUOTA (0x00000100) /* the object's owner is over quota */ #define OBD_FL_NO_GRPQUOTA (0x00000200) /* the object's group is over quota */ -#define OBD_FL_CREATE_CROW (0x00000400) /* object should be create on write */ /* - * set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs + * Set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs * that declared OBD_CONNECT_TRUNCLOCK in their connect flags support this * functionality. */ #define OBD_FL_TRUNCLOCK (0x00000800) -/* this should be not smaller than sizeof(struct lustre_handle) + sizeof(struct - * llog_cookie) + sizeof(ll_fid). Nevertheless struct ll_fid is not longer - * stored in o_inline, we keep this just for case. */ +/* + * This should not be smaller than sizeof(struct lustre_handle) + sizeof(struct + * llog_cookie) + sizeof(struct ll_fid). Nevertheless struct ll_fid is not + * longer stored in o_inline, we keep this just for case. + */ #define OBD_INLINESZ 80 /* Note: 64-bit types are 64-bit aligned in structure */ @@ -460,6 +705,18 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ }; +extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); + +#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) +#define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) + +#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" +#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default" +#define XATTR_NAME_LOV "trusted.lov" + +/* remote ACL */ +#define XATTR_NAME_LUSTRE_ACL "system.lustre_acl" + #define OBD_MD_FLID (0x00000001ULL) /* object ID */ #define OBD_MD_FLATIME (0x00000002ULL) /* access time */ #define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ @@ -486,6 +743,7 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLGROUP (0x01000000ULL) /* group */ #define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ #define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write easize is epoch */ + /* ->mds if epoch opens or closes */ #define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */ #define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ #define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */ @@ -494,11 +752,17 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ #define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ +#define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */ #define OBD_MD_FLXATTR (0x0000001000000000ULL) /* xattr */ #define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ #define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ #define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ +#define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */ +#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ +#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ +#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ +#define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */ #define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \ OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ @@ -625,6 +889,8 @@ typedef enum { MDS_QUOTACTL = 48, MDS_GETXATTR = 49, MDS_SETXATTR = 50, + MDS_WRITEPAGE = 51, + MDS_IS_SUBDIR = 52, MDS_LAST_OPC } mds_cmd_t; @@ -641,10 +907,10 @@ typedef enum { REINT_UNLINK = 4, REINT_RENAME = 5, REINT_OPEN = 6, -// REINT_CLOSE = 7, -// REINT_WRITE = 8, REINT_MAX -} mds_reint_t; +} mds_reint_t, mdt_reint_t; + +extern void lustre_swab_generic_32s (__u32 *val); /* the disposition of the intent outlines what was executed */ #define DISP_IT_EXECD 0x00000001 @@ -688,6 +954,15 @@ struct mds_status_req { extern void lustre_swab_mds_status_req (struct mds_status_req *r); +/* mdt_thread_info.mti_flags. */ +enum mdt_ioepoch_flags { + /* The flag indicates Size-on-MDS attributes are changed. */ + MF_SOM_CHANGE = (1 << 0), + /* Flags indicates an epoch opens or closes. */ + MF_EPOCH_OPEN = (1 << 1), + MF_EPOCH_CLOSE = (1 << 2), +}; + #define MDS_BFLAG_UNCOMMITTED_WRITES 0x1 #define MDS_BFLAG_EXT_FLAGS 0x80000000 /* == EXT3_RESERVED_FL */ @@ -732,6 +1007,33 @@ static inline int ll_inode_to_ext_flags(int oflags, int iflags) } #endif +struct mdt_body { + struct lu_fid fid1; + struct lu_fid fid2; + struct lustre_handle handle; + __u64 valid; + __u64 size; /* Offset, in the case of MDS_READPAGE */ + __u64 mtime; + __u64 atime; + __u64 ctime; + __u64 blocks; /* XID, in the case of MDS_READPAGE */ + __u64 ioepoch; + __u32 fsuid; + __u32 fsgid; + __u32 capability; + __u32 mode; + __u32 uid; + __u32 gid; + __u32 flags; /* from vfs for pin/unpin, MDS_BFLAG for close */ + __u32 rdev; + __u32 nlink; /* #bytes to read in the case of MDS_READPAGE */ + __u32 suppgid; + __u32 eadatasize; + __u32 aclsize; + __u32 max_mdsize; + __u32 max_cookiesize; /* also fix lustre_swab_mdt_body */ +}; + struct mds_body { struct ll_fid fid1; struct ll_fid fid2; @@ -763,6 +1065,16 @@ struct mds_body { }; extern void lustre_swab_mds_body (struct mds_body *b); +extern void lustre_swab_mdt_body (struct mdt_body *b); + +struct mdt_epoch { + struct lustre_handle handle; + __u64 ioepoch; + __u32 flags; + __u32 padding; +}; + +extern void lustre_swab_mdt_epoch (struct mdt_epoch *b); #define Q_QUOTACHECK 0x800100 #define Q_INITQUOTA 0x800101 /* init slave limits */ @@ -786,6 +1098,33 @@ struct obd_quotactl { extern void lustre_swab_obd_quotactl(struct obd_quotactl *q); +/* inode access permission for remote user, the inode info are omitted, + * for client knows them. */ +struct mds_remote_perm { + __u32 rp_uid; + __u32 rp_gid; + __u32 rp_fsuid; + __u32 rp_fsgid; + __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */ +}; + +/* setxid permissions for mds_setxid_perm.mp_perm */ +#define LUSTRE_SETUID_PERM 0x01 +#define LUSTRE_SETGID_PERM 0x02 +#define LUSTRE_SETGRP_PERM 0x04 + +extern void lustre_swab_mds_remote_perm(struct mds_remote_perm *p); + +struct mdt_remote_perm { + __u32 rp_uid; + __u32 rp_gid; + __u32 rp_fsuid; + __u32 rp_fsgid; + __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */ +}; + +extern void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p); + struct mds_rec_setattr { __u32 sa_opcode; __u32 sa_fsuid; @@ -805,17 +1144,48 @@ struct mds_rec_setattr { __u32 sa_padding; /* also fix lustre_swab_mds_rec_setattr */ }; +extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); + +struct mdt_rec_setattr { + __u32 sa_opcode; + __u32 sa_fsuid; + __u32 sa_fsgid; + __u32 sa_cap; + __u32 sa_suppgid; + __u32 sa_mode; + struct lu_fid sa_fid; + __u64 sa_valid; + __u64 sa_size; + __u64 sa_blocks; + __u64 sa_mtime; + __u64 sa_atime; + __u64 sa_ctime; + __u32 sa_uid; + __u32 sa_gid; + __u32 sa_attr_flags; + __u32 sa_padding; /* also fix lustre_swab_mds_rec_setattr */ +}; + +extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa); + /* Remove this once we declare it in include/linux/fs.h (v21 kernel patch?) */ #ifndef ATTR_CTIME_SET #define ATTR_CTIME_SET 0x2000 #endif -extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); - #ifndef FMODE_READ #define FMODE_READ 00000001 #define FMODE_WRITE 00000002 #endif + +#define FMODE_EPOCH 01000000 +#define FMODE_EPOCHLCK 02000000 +#define FMODE_SOM 04000000 +#define FMODE_CLOSED 0 + +#define MDS_OPEN_CREATED 00000010 +#define MDS_OPEN_CROSS 00000020 + #define MDS_FMODE_EXEC 00000004 #define MDS_OPEN_CREAT 00000100 #define MDS_OPEN_EXCL 00000200 @@ -827,10 +1197,51 @@ extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); #define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ #define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ #define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file*/ +#define MDS_CREATE_RMT_ACL 01000000000 /* indicate create on remote server + * with default ACL */ +#define MDS_CREATE_SLAVE_OBJ 02000000000 /* indicate create slave object + * actually, this is for create, not + * conflict with other open flags */ #define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */ #define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ #define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ +/* permission for create non-directory file */ +#define MAY_CREATE (1 << 7) +/* permission for create directory file */ +#define MAY_LINK (1 << 8) +/* permission for delete from the directory */ +#define MAY_UNLINK (1 << 9) +/* source's permission for rename */ +#define MAY_RENAME_SRC (1 << 10) +/* target's permission for rename */ +#define MAY_RENAME_TAR (1 << 11) +/* part (parent's) VTX permission check */ +#define MAY_VTX_PART (1 << 12) +/* full VTX permission check */ +#define MAY_VTX_FULL (1 << 13) + +enum { + MDS_CHECK_SPLIT = 1 << 0, + MDS_CROSS_REF = 1 << 1, + MDS_VTX_BYPASS = 1 << 2, + MDS_PERM_BYPASS = 1 << 3 +}; + +struct mds_rec_join { + struct ll_fid jr_fid; + __u64 jr_headsize; +}; + +extern void lustre_swab_mds_rec_join (struct mds_rec_join *jr); + +struct mdt_rec_join { + struct lu_fid jr_fid; + __u64 jr_headsize; +}; + +extern void lustre_swab_mdt_rec_join (struct mdt_rec_join *jr); + struct mds_rec_create { __u32 cr_opcode; __u32 cr_fsuid; @@ -852,12 +1263,26 @@ struct mds_rec_create { extern void lustre_swab_mds_rec_create (struct mds_rec_create *cr); -struct mds_rec_join { - struct ll_fid jr_fid; - __u64 jr_headsize; +struct mdt_rec_create { + __u32 cr_opcode; + __u32 cr_fsuid; + __u32 cr_fsgid; + __u32 cr_cap; + __u32 cr_flags; /* for use with open */ + __u32 cr_mode; + struct lustre_handle cr_old_handle; /* u64 handle in case of open replay */ + struct lu_fid cr_fid1; + struct lu_fid cr_fid2; + __u64 cr_time; + __u64 cr_rdev; + __u64 cr_ioepoch; + __u32 cr_suppgid1; + __u32 cr_suppgid2; + __u32 cr_bias; + __u32 cr_padding_1; /* pad for 64 bits*/ }; -extern void lustre_swab_mds_rec_join (struct mds_rec_join *jr); +extern void lustre_swab_mdt_rec_create (struct mdt_rec_create *cr); struct mds_rec_link { __u32 lk_opcode; @@ -877,6 +1302,24 @@ struct mds_rec_link { extern void lustre_swab_mds_rec_link (struct mds_rec_link *lk); +struct mdt_rec_link { + __u32 lk_opcode; + __u32 lk_fsuid; + __u32 lk_fsgid; + __u32 lk_cap; + __u32 lk_suppgid1; + __u32 lk_suppgid2; + struct lu_fid lk_fid1; + struct lu_fid lk_fid2; + __u64 lk_time; + __u32 lk_bias; + __u32 lk_padding_2; /* also fix lustre_swab_mds_rec_link */ + __u32 lk_padding_3; /* also fix lustre_swab_mds_rec_link */ + __u32 lk_padding_4; /* also fix lustre_swab_mds_rec_link */ +}; + +extern void lustre_swab_mdt_rec_link (struct mdt_rec_link *lk); + struct mds_rec_unlink { __u32 ul_opcode; __u32 ul_fsuid; @@ -895,6 +1338,24 @@ struct mds_rec_unlink { extern void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul); +struct mdt_rec_unlink { + __u32 ul_opcode; + __u32 ul_fsuid; + __u32 ul_fsgid; + __u32 ul_cap; + __u32 ul_suppgid; + __u32 ul_mode; + struct lu_fid ul_fid1; + struct lu_fid ul_fid2; + __u64 ul_time; + __u32 ul_bias; + __u32 ul_padding_2; /* also fix lustre_swab_mds_rec_unlink */ + __u32 ul_padding_3; /* also fix lustre_swab_mds_rec_unlink */ + __u32 ul_padding_4; /* also fix lustre_swab_mds_rec_unlink */ +}; + +extern void lustre_swab_mdt_rec_unlink (struct mdt_rec_unlink *ul); + struct mds_rec_rename { __u32 rn_opcode; __u32 rn_fsuid; @@ -913,6 +1374,58 @@ struct mds_rec_rename { extern void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn); +struct mdt_rec_rename { + __u32 rn_opcode; + __u32 rn_fsuid; + __u32 rn_fsgid; + __u32 rn_cap; + __u32 rn_suppgid1; + __u32 rn_suppgid2; + struct lu_fid rn_fid1; + struct lu_fid rn_fid2; + __u64 rn_time; + __u32 rn_mode; /* cross-ref rename has mode */ + __u32 rn_bias; /* some operation flags */ + __u32 rn_padding_3; /* also fix lustre_swab_mdt_rec_rename */ + __u32 rn_padding_4; /* also fix lustre_swab_mdt_rec_rename */ +}; + +extern void lustre_swab_mdt_rec_rename (struct mdt_rec_rename *rn); + +/* begin adding MDT by huanghua@clusterfs.com */ +struct lmv_desc { + __u32 ld_tgt_count; /* how many MDS's */ + __u32 ld_active_tgt_count; /* how many active */ + struct obd_uuid ld_uuid; +}; + +extern void lustre_swab_lmv_desc (struct lmv_desc *ld); +/* end adding MDT by huanghua@clusterfs.com */ + +struct md_fld { + seqno_t mf_seq; + mdsno_t mf_mds; +}; + +extern void lustre_swab_md_fld (struct md_fld *mf); + +enum fld_rpc_opc { + FLD_QUERY = 600, + FLD_LAST_OPC, + FLD_FIRST_OPC = FLD_QUERY +}; + +enum seq_rpc_opc { + SEQ_QUERY = 700, + SEQ_LAST_OPC, + SEQ_FIRST_OPC = SEQ_QUERY +}; + +enum seq_op { + SEQ_ALLOC_SUPER = 0, + SEQ_ALLOC_META = 1 +}; + /* * LOV data structures */ @@ -971,13 +1484,13 @@ extern void lustre_swab_ldlm_res_id (struct ldlm_res_id *id); /* lock types */ typedef enum { LCK_MINMODE = 0, - LCK_EX = 1, - LCK_PW = 2, - LCK_PR = 4, - LCK_CW = 8, - LCK_CR = 16, - LCK_NL = 32, - LCK_GROUP = 64, + LCK_EX = 1, + LCK_PW = 2, + LCK_PR = 4, + LCK_CW = 8, + LCK_CR = 16, + LCK_NL = 32, + LCK_GROUP = 64, LCK_MAXMODE } ldlm_mode_t; @@ -1079,6 +1592,7 @@ typedef enum { MGS_TARGET_DEL, MGS_LAST_OPC } mgs_cmd_t; +#define MGS_FIRST_OPC MGS_CONNECT /* We pass this info to the MGS so it can write config logs */ #define MTI_NAME_MAXLEN 64 @@ -1267,7 +1781,7 @@ struct llog_setattr_rec { struct llog_size_change_rec { struct llog_rec_hdr lsc_hdr; struct ll_fid lsc_fid; - __u32 lsc_io_epoch; + __u32 lsc_ioepoch; __u32 padding; struct llog_rec_tail lsc_tail; } __attribute__((packed)); @@ -1332,6 +1846,8 @@ enum llogd_rpc_ops { LLOG_CATINFO = 507, /* for lfs catinfo */ LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ + LLOG_LAST_OPC, + LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE }; struct llogd_body { @@ -1417,6 +1933,93 @@ typedef enum { #define JOIN_FILE_ALIGN 4096 +/* security opcodes */ +typedef enum { + SEC_CTX_INIT = 801, + SEC_CTX_INIT_CONT = 802, + SEC_CTX_FINI = 803, + SEC_LAST_OPC, + SEC_FIRST_OPC = SEC_CTX_INIT +} sec_cmd_t; + +/* + * capa related definitions + */ +#define CAPA_HMAC_MAX_LEN 64 +#define CAPA_HMAC_KEY_MAX_LEN 56 + +/* NB take care when changing the sequence of elements this struct, + * because the offset info is used in find_capa() */ +struct lustre_capa { + struct lu_fid lc_fid; /* fid */ + __u64 lc_opc; /* operations allowed */ + __u32 lc_uid; /* uid, this is legacy and for OSS only */ + __u32 lc_flags; /* HMAC algorithm & flags */ + __u32 lc_keyid; /* key used for the capability */ + __u32 lc_timeout; /* capa timeout value (sec) */ + __u64 lc_expiry; /* expiry time (sec) */ + __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /* HMAC */ +} __attribute__((packed)); + +extern void lustre_swab_lustre_capa(struct lustre_capa *c); + +/* lustre_capa.lc_opc */ +enum { + CAPA_OPC_BODY_WRITE = 1<<0, /* write object data */ + CAPA_OPC_BODY_READ = 1<<1, /* read object data */ + CAPA_OPC_INDEX_LOOKUP = 1<<2, /* lookup object fid */ + CAPA_OPC_INDEX_INSERT = 1<<3, /* insert object fid */ + CAPA_OPC_INDEX_DELETE = 1<<4, /* delete object fid */ + CAPA_OPC_OSS_WRITE = 1<<5, /* write oss object data */ + CAPA_OPC_OSS_READ = 1<<6, /* read oss object data */ + CAPA_OPC_OSS_TRUNC = 1<<7, /* truncate oss object */ + CAPA_OPC_META_WRITE = 1<<8, /* write object meta data */ + CAPA_OPC_META_READ = 1<<9, /* read object meta data */ + +}; + +#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE) +#define CAPA_OPC_MDS_ONLY \ + (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \ + CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE) +#define CAPA_OPC_OSS_ONLY \ + (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC) +#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY +#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY) + +/* MDS capability covers object capability for operations of body r/w + * (dir readpage/sendpage), index lookup/insert/delete and meta data r/w, + * while OSS capability only covers object capability for operations of + * oss data(file content) r/w/truncate. + */ +static inline int capa_for_mds(struct lustre_capa *c) +{ + return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) != 0; +} + +static inline int capa_for_oss(struct lustre_capa *c) +{ + return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) == 0; +} + +/* lustre_capa.lc_hmac_alg */ +enum { + CAPA_HMAC_ALG_SHA1 = 1, /* sha1 algorithm */ + CAPA_HMAC_ALG_MAX, +}; + +#define CAPA_FL_MASK 0x00ffffff +#define CAPA_HMAC_ALG_MASK 0xff000000 + +struct lustre_capa_key { + __u64 lk_mdsid; /* mds# */ + __u32 lk_keyid; /* key# */ + __u32 lk_padding; + __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /* key */ +} __attribute__((packed)); + +extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); + /* quota check function */ #define QUOTA_RET_OK 0 /* return successfully */ #define QUOTA_RET_NOQUOTA 1 /* not support quota */ diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index c306dfa..cb51975 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -58,6 +58,9 @@ struct obd_statfs; #define LL_IOC_JOIN _IOW ('f', 163, long) #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) #define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) +#define LL_IOC_FLUSHCTX _IOW ('f', 166, long) +#define LL_IOC_GETFACL _IOWR('f', 167, struct rmtacl_ioctl_data *) +#define LL_IOC_SETFACL _IOWR('f', 168, struct rmtacl_ioctl_data *) #define LL_STATFS_MDC 1 #define LL_STATFS_LOV 2 @@ -72,6 +75,9 @@ struct obd_statfs; #define LL_IOC_OBD_STATFS IOC_OBD_STATFS #define IOC_MDC_GETSTRIPE IOC_MDC_GETFILESTRIPE +/* Do not define O_CHECK_STALE as 0200000000, + * which is conflict with MDS_OPEN_OWNEROVERRIDE */ +#define O_CHECK_STALE 020000000 /* hopefully this does not conflict */ #define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ #define O_JOIN_FILE 0400000000 /* hopefully this does not conflict */ @@ -121,6 +127,7 @@ struct lov_user_mds_data_v1 { struct ll_recreate_obj { __u64 lrc_id; + __u64 lrc_group; __u32 lrc_ost_idx; }; @@ -138,7 +145,7 @@ static inline int obd_uuid_empty(struct obd_uuid *uuid) return uuid->uuid[0] == '\0'; } -static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp) +static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp) { strncpy((char *)uuid->uuid, tmp, sizeof(*uuid)); uuid->uuid[sizeof(*uuid) - 1] = '\0'; @@ -174,15 +181,37 @@ struct if_quotacheck { struct obd_uuid obd_uuid; }; -#define MDS_GRP_DOWNCALL_MAGIC 0x6d6dd620 +#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd620 -struct mds_grp_downcall_data { - __u32 mgd_magic; - __u32 mgd_err; - __u32 mgd_uid; - __u32 mgd_gid; - __u32 mgd_ngroups; - __u32 mgd_groups[0]; +/* setxid permission */ +#define N_SETXID_PERMS_MAX 64 + +struct setxid_perm_downcall_data { + __u64 pdd_nid; + __u32 pdd_perm; +}; + +struct identity_downcall_data { + __u32 idd_magic; + __u32 idd_err; + __u32 idd_uid; + __u32 idd_gid; + __u32 idd_nperms; + struct setxid_perm_downcall_data idd_perms[N_SETXID_PERMS_MAX]; + __u32 idd_ngroups; + __u32 idd_groups[0]; +}; + +#define RMTACL_DOWNCALL_MAGIC 0x6d6dd620 +#define RMTACL_SIZE_MAX (4096) + +struct rmtacl_downcall_data { + __u32 add_magic; + __u32 add_handle; + __u64 add_key; + __u32 add_buflen; + __u32 add_padding; + __u8 add_buf[0]; }; #ifdef NEED_QUOTA_DEFS @@ -250,4 +279,12 @@ struct if_quotactl { # define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) #endif +/* remote acl ioctl */ +struct rmtacl_ioctl_data { + char *cmd; /* IN */ + unsigned long cmd_len; + char *res; /* OUT */ + unsigned long res_len; +}; + #endif /* _LUSTRE_USER_H */ diff --git a/lustre/include/lustre_capa.h b/lustre/include/lustre_capa.h new file mode 100644 index 0000000..add2982 --- /dev/null +++ b/lustre/include/lustre_capa.h @@ -0,0 +1,272 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * Author: Lai Siyao + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Lustre capability support. + */ + +#ifndef __LINUX_CAPA_H_ +#define __LINUX_CAPA_H_ + +/* + * capability + */ +#ifdef __KERNEL__ +#include +#endif +#include + +#define CAPA_TIMEOUT 1800 /* sec, == 30 min */ +#define CAPA_KEY_TIMEOUT (24 * 60 * 60) /* sec, == 1 days */ + +struct capa_hmac_alg { + const char *ha_name; + int ha_len; + int ha_keylen; +}; + +#define DEF_CAPA_HMAC_ALG(name, type, len, keylen) \ +[CAPA_HMAC_ALG_ ## type] = { \ + .ha_name = name, \ + .ha_len = len, \ + .ha_keylen = keylen, \ +} + +struct client_capa { + struct inode *inode; + struct list_head lli_list; /* link to lli_oss_capas */ +}; + +struct target_capa { + struct hlist_node c_hash; /* link to capa hash */ +}; + +struct obd_capa { + struct list_head c_list; /* link to capa_list */ + + struct lustre_capa c_capa; /* capa */ + atomic_t c_refc; /* ref count */ + cfs_time_t c_expiry; /* jiffies */ + spinlock_t c_lock; /* protect capa content */ + int c_site; + + union { + struct client_capa cli; + struct target_capa tgt; + } u; +}; + +enum { + CAPA_SITE_CLIENT = 0, + CAPA_SITE_SERVER, + CAPA_SITE_MAX +}; + +static inline __u64 capa_opc(struct lustre_capa *capa) +{ + return capa->lc_opc; +} + +static inline __u32 capa_uid(struct lustre_capa *capa) +{ + return capa->lc_uid; +} + +static inline struct lu_fid *capa_fid(struct lustre_capa *capa) +{ + return &capa->lc_fid; +} + +static inline __u32 capa_keyid(struct lustre_capa *capa) +{ + return capa->lc_keyid; +} + +static inline __u64 capa_expiry(struct lustre_capa *capa) +{ + return capa->lc_expiry; +} + +static inline __u32 capa_flags(struct lustre_capa *capa) +{ + return capa->lc_flags & 0xffffff; +} + +static inline __u32 capa_alg(struct lustre_capa *capa) +{ + __u32 alg = capa->lc_flags; + + return alg >> 24; +} + +static inline __u64 capa_key_mdsid(struct lustre_capa_key *key) +{ + return key->lk_mdsid; +} + +static inline __u32 capa_key_keyid(struct lustre_capa_key *key) +{ + return key->lk_keyid; +} + +#define DEBUG_CAPA(level, c, fmt, args...) \ +do { \ +CDEBUG(level, fmt " capability@%p uid %u opc "LPX64" fid "DFID" keyid %u " \ + "expiry "LPU64" flags %u alg %d\n", \ + ##args, c, capa_uid(c), capa_opc(c), PFID(capa_fid(c)), capa_keyid(c), \ + capa_expiry(c), capa_flags(c), capa_alg(c)); \ +} while (0) + +#define DEBUG_CAPA_KEY(level, k, fmt, args...) \ +do { \ +CDEBUG(level, fmt " capability key@%p mdsid "LPU64" keyid %u\n", \ + ##args, k, capa_key_mdsid(k), capa_key_keyid(k)); \ +} while (0) + +typedef int (* renew_capa_cb_t)(struct obd_capa *, struct lustre_capa *); + +/* obdclass/capa.c */ +extern struct list_head capa_list[]; +extern spinlock_t capa_lock; +extern int capa_count[]; +extern cfs_mem_cache_t *capa_cachep; + +struct hlist_head *init_capa_hash(void); +void cleanup_capa_hash(struct hlist_head *hash); + +struct obd_capa *capa_add(struct hlist_head *hash, struct lustre_capa *capa); +struct obd_capa *capa_lookup(struct hlist_head *hash, struct lustre_capa *capa, + int alive); + +int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key); +void capa_cpy(void *dst, struct obd_capa *ocapa); + +char *dump_capa_content(char *buf, char *key, int len); + +static inline struct obd_capa *alloc_capa(int site) +{ +#ifdef __KERNEL__ + struct obd_capa *ocapa; + + OBD_SLAB_ALLOC(ocapa, capa_cachep, SLAB_KERNEL, sizeof(*ocapa)); + if (ocapa) { + atomic_set(&ocapa->c_refc, 0); + spin_lock_init(&ocapa->c_lock); + INIT_LIST_HEAD(&ocapa->c_list); + ocapa->c_site = site; + } + return ocapa; +#else + return NULL; +#endif +} + +static inline void free_capa(struct obd_capa *ocapa) +{ +#ifdef __KERNEL__ + if (atomic_read(&ocapa->c_refc)) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc %d for", + atomic_read(&ocapa->c_refc)); + LBUG(); + } + OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa)); +#else +#endif +} + +static inline struct obd_capa *capa_get(struct obd_capa *ocapa) +{ + if (!ocapa) + return NULL; + + atomic_inc(&ocapa->c_refc); + return ocapa; +} + +static inline void capa_put(struct obd_capa *ocapa) +{ + if (!ocapa) + return; + + if (atomic_read(&ocapa->c_refc) == 0) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc is 0 for"); + LBUG(); + } + atomic_dec(&ocapa->c_refc); +} + +static inline int open_flags_to_accmode(int flags) +{ + int mode = flags; + + if ((mode + 1) & O_ACCMODE) + mode++; + if (mode & O_TRUNC) + mode |= 2; + + return mode; +} + +static inline __u64 capa_open_opc(int mode) +{ + return mode & FMODE_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_READ; +} + +static inline void set_capa_expiry(struct obd_capa *ocapa) +{ + cfs_time_t expiry = cfs_time_sub((cfs_time_t)ocapa->c_capa.lc_expiry, + cfs_time_current_sec()); + ocapa->c_expiry = cfs_time_add(cfs_time_current(), + cfs_time_seconds(expiry)); +} + +static inline int capa_is_expired(struct obd_capa *ocapa) +{ + return cfs_time_beforeq(ocapa->c_expiry, cfs_time_current()); +} + +static inline int capa_opc_supported(struct lustre_capa *capa, __u64 opc) +{ + return (capa_opc(capa) & opc) == opc; +} + +static inline struct lustre_capa * +lustre_unpack_capa(struct lustre_msg *msg, unsigned int offset) +{ + struct lustre_capa *capa; + + capa = lustre_swab_buf(msg, offset, sizeof(*capa), + lustre_swab_lustre_capa); + if (capa == NULL) + CERROR("bufcount %u, bufsize %u\n", + lustre_msg_bufcount(msg), + (lustre_msg_bufcount(msg) <= offset) ? + -1 : lustre_msg_buflen(msg, offset)); + + return capa; +} + +struct filter_capa_key { + struct list_head k_list; + struct lustre_capa_key k_key; +}; + +#define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT) +#endif /* __LINUX_CAPA_H_ */ diff --git a/lustre/include/lustre_cfg.h b/lustre/include/lustre_cfg.h index de0ddc2..13fa4e5 100644 --- a/lustre/include/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -56,6 +56,9 @@ enum lcfg_command_type { LCFG_LOG_START = 0x00ce011, LCFG_LOG_END = 0x00ce012, LCFG_LOV_ADD_INA = 0x00ce013, + LCFG_ADD_MDC = 0x00cf014, + LCFG_DEL_MDC = 0x00cf015, + LCFG_SEC_FLAVOR = 0x00ce016, }; struct lustre_cfg_bufs { @@ -253,4 +256,9 @@ static inline int lustre_cfg_sanity_check(void *buf, int len) RETURN(0); } +/* default value for nllu/nllg for llite */ +#define NOBODY_UID 99 +#define NOBODY_GID 99 +#define INVALID_UID (-1) + #endif // _LUSTRE_CFG_H diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 0cc200a..2393080 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -34,10 +34,10 @@ #define MOUNT_CONFIGS_DIR "CONFIGS" /* Persistent mount data are stored on the disk in this file. */ #define MOUNT_DATA_FILE MOUNT_CONFIGS_DIR"/mountdata" -#define LAST_RCVD "last_rcvd" +#define LAST_RCVD "last_received" #define LOV_OBJID "lov_objid" #define HEALTH_CHECK "health_check" - +#define CAPA_KEYS "capa_keys" /****************** persistent mount data *********************/ @@ -46,7 +46,7 @@ #define LDD_F_SV_TYPE_MGS 0x0004 #define LDD_F_NEED_INDEX 0x0010 /* need an index assignment */ #define LDD_F_VIRGIN 0x0020 /* never registered */ -#define LDD_F_UPDATE 0x0040 /* update all related config logs */ +#define LDD_F_UPDATE 0x0040 /* update the config logs for this server*/ #define LDD_F_REWRITE_LDD 0x0080 /* rewrite the LDD */ #define LDD_F_WRITECONF 0x0100 /* regenerate all logs for this fs */ #define LDD_F_UPGRADE14 0x0200 /* COMPAT_14 */ @@ -68,7 +68,7 @@ static inline char *mt_str(enum ldd_mount_type mt) "ldiskfs", "smfs", "reiserfs", - "ldiskfs2", + "ldiskfs2" }; return mount_type_string[mt]; } @@ -141,6 +141,10 @@ struct lustre_mount_data { int lmd_exclude_count; char *lmd_dev; /* device name */ char *lmd_profile; /* client only */ + char *lmd_sec_mdt; /* sec from mdt (to ost/mdt) */ + char *lmd_sec_cli; /* sec from client (to ost/mdt) */ + uid_t lmd_nllu; /* non-lustre-local-user id */ + gid_t lmd_nllg; /* non-lustre-local-group id */ char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ __u32 *lmd_exclude; /* array of OSTs to ignore */ @@ -174,7 +178,6 @@ struct lustre_mount_data { /* end COMPAT_146 */ #define OBD_ROCOMPAT_LOVOBJID 0x00000001 /* MDS handles LOV_OBJID file */ -#define OBD_ROCOMPAT_CROW 0x00000002 /* OST will CROW create objects */ #define OBD_INCOMPAT_GROUPS 0x00000001 /* OST handles group subdirs */ #define OBD_INCOMPAT_OST 0x00000002 /* this is an OST */ @@ -271,8 +274,10 @@ int lustre_process_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); int lustre_end_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); -struct lustre_mount_info *server_get_mount(char *name); -int server_put_mount(char *name, struct vfsmount *mnt); +struct lustre_mount_info *server_get_mount(const char *name); +struct lustre_mount_info *server_get_mount_2(const char *name); +int server_put_mount(const char *name, struct vfsmount *mnt); +int server_put_mount_2(const char *name, struct vfsmount *mnt); int server_register_target(struct super_block *sb); struct mgs_target_info; int server_mti_print(char *title, struct mgs_target_info *mti); diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 632b7bb..b263623 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -332,7 +332,7 @@ struct ldlm_lock { /* for ldlm_add_ast_work_item() */ struct list_head l_bl_ast; struct list_head l_cp_ast; - struct ldlm_lock *l_blocking_lock; + struct ldlm_lock *l_blocking_lock; int l_bl_ast_run; }; @@ -381,8 +381,8 @@ extern struct obd_ops ldlm_obd_ops; extern char *ldlm_lockname[]; extern char *ldlm_typename[]; extern char *ldlm_it2str(int it); - -#define ldlm_lock_debug(cdls, level, lock, file, func, line, fmt, a...) do { \ +#ifdef LIBCFS_DEBUG +#define ldlm_lock_debug(cdls, level, lock, file, func, line, fmt, a...) do { \ CHECK_STACK(); \ \ if (((level) & D_CANTMASK) != 0 || \ @@ -401,11 +401,11 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 mask, ...) __attribute__ ((format (printf, 4, 5))); -#define LDLM_ERROR(lock, fmt, a...) do { \ - static cfs_debug_limit_state_t _ldlm_cdls; \ - ldlm_lock_debug(&_ldlm_cdls, D_ERROR, lock, \ - __FILE__, __FUNCTION__, __LINE__, \ - "### " fmt , ##a); \ +#define LDLM_ERROR(lock, fmt, a...) do { \ + static cfs_debug_limit_state_t _ldlm_cdls; \ + ldlm_lock_debug(&_ldlm_cdls, D_ERROR, lock, \ + __FILE__, __FUNCTION__, __LINE__, \ + "### " fmt , ##a); \ } while (0) #define LDLM_DEBUG(lock, fmt, a...) do { \ @@ -413,8 +413,12 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 mask, __FILE__, __FUNCTION__, __LINE__, \ "### " fmt , ##a); \ } while (0) +#else +#define LDLM_DEBUG(lock, fmt, a...) ((void)0) +#define LDLM_ERROR(lock, fmt, a...) ((void)0) +#endif -#define LDLM_DEBUG_NOLOCK(format, a...) \ +#define LDLM_DEBUG_NOLOCK(format, a...) \ CDEBUG(D_DLMTRACE, "### " format "\n" , ##a) typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, int *flags, @@ -439,7 +443,7 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, ldlm_res_iterator_t iter, void *closure); int ldlm_replay_locks(struct obd_import *imp); -void ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *, +void ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *, ldlm_iterator_t iter, void *data); @@ -461,21 +465,23 @@ int ldlm_handle_convert(struct ptlrpc_request *req); int ldlm_handle_cancel(struct ptlrpc_request *req); int ldlm_del_waiting_lock(struct ldlm_lock *lock); int ldlm_refresh_waiting_lock(struct ldlm_lock *lock); +void ldlm_revoke_export_locks(struct obd_export *exp); int ldlm_get_ref(void); void ldlm_put_ref(int force); /* ldlm_lock.c */ ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res); void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg); -void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh); -struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags); +void ldlm_lock2handle(const struct ldlm_lock *lock, + struct lustre_handle *lockh); +struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, int flags); void ldlm_cancel_callback(struct ldlm_lock *); int ldlm_lock_set_data(struct lustre_handle *, void *data); int ldlm_lock_remove_from_lru(struct ldlm_lock *); struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *, - struct lustre_handle *); + const struct lustre_handle *); -static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h) +static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h) { return __ldlm_handle2lock(h, 0); } @@ -501,7 +507,8 @@ void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode); void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode); void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); void ldlm_lock_allow_match(struct ldlm_lock *lock); -int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *, +int ldlm_lock_match(struct ldlm_namespace *ns, int flags, + const struct ldlm_res_id *, ldlm_type_t type, ldlm_policy_data_t *, ldlm_mode_t mode, struct lustre_handle *); struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, @@ -512,6 +519,7 @@ void ldlm_reprocess_all(struct ldlm_resource *res); void ldlm_reprocess_all_ns(struct ldlm_namespace *ns); void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos); void ldlm_lock_dump_handle(int level, struct lustre_handle *); +void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); /* resource.c */ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 local); @@ -527,8 +535,8 @@ static inline void ldlm_proc_cleanup(void) {} /* resource.c - internal */ struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, - struct ldlm_res_id, ldlm_type_t type, - int create); + const struct ldlm_res_id *, + ldlm_type_t type, int create); struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res); int ldlm_resource_putref(struct ldlm_resource *res); void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, @@ -539,7 +547,13 @@ void ldlm_dump_all_namespaces(int level); void ldlm_namespace_dump(int level, struct ldlm_namespace *); void ldlm_resource_dump(int level, struct ldlm_resource *); int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, - struct ldlm_res_id); + const struct ldlm_res_id *); + +struct ldlm_callback_suite { + ldlm_completion_callback lcs_completion; + ldlm_blocking_callback lcs_blocking; + ldlm_glimpse_callback lcs_glimpse; +}; /* ldlm_request.c */ int ldlm_expired_completion_wait(void *data); @@ -547,20 +561,25 @@ int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp); int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data); -int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **req, - struct ldlm_res_id res_id, ldlm_type_t type, - ldlm_policy_data_t *policy, ldlm_mode_t mode, int *flags, +int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, + const struct ldlm_res_id *res_id, + ldlm_type_t type, ldlm_policy_data_t *policy, + ldlm_mode_t mode, int *flags, ldlm_blocking_callback blocking, ldlm_completion_callback completion, ldlm_glimpse_callback glimpse, void *data, void *lvb, __u32 lvb_len, void *lvb_swabber, struct lustre_handle *lockh, int async); +int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req, + const struct ldlm_request *dlm_req, + const struct ldlm_callback_suite *cbs); int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, ldlm_type_t type, __u8 with_policy, ldlm_mode_t mode, int *flags, void *lvb, __u32 lvb_len, void *lvb_swabber, struct lustre_handle *lockh, int rc); -int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, struct ldlm_res_id res_id, +int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, + const struct ldlm_res_id *res_id, ldlm_type_t type, ldlm_policy_data_t *policy, ldlm_mode_t mode, int *flags, ldlm_blocking_callback blocking, @@ -571,10 +590,13 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, struct ldlm_res_id res_id, int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new, void *data, __u32 data_len); int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags); +int ldlm_handle_convert0(struct ptlrpc_request *req, + const struct ldlm_request *dlm_req); int ldlm_cli_cancel(struct lustre_handle *lockh); -int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *, +int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *, int flags, void *opaque); -int ldlm_cli_join_lru(struct ldlm_namespace *, struct ldlm_res_id *, int join); +int ldlm_cli_join_lru(struct ldlm_namespace *, + const struct ldlm_res_id *, int join); /* mds/handler.c */ /* This has to be here because recursive inclusion sucks. */ diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index b3c675a..a405fae 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -11,6 +11,9 @@ /* Data stored per client in the last_rcvd file. In le32 order. */ struct mds_client_data; +struct mdt_client_data; +struct mds_idmap_table; +struct mdt_idmap_table; struct mds_export_data { struct list_head med_open_head; @@ -19,6 +22,24 @@ struct mds_export_data { __u64 med_ibits_known; loff_t med_lr_off; int med_lr_idx; + unsigned int med_rmtclient:1; /* remote client? */ + __u32 med_nllu; + __u32 med_nllg; + struct mds_idmap_table *med_idmap; +}; + +struct mdt_export_data { + struct list_head med_open_head; + spinlock_t med_open_lock; /* lock med_open_head, mfd_list*/ + struct semaphore med_mcd_lock; + struct mdt_client_data *med_mcd; + __u64 med_ibits_known; + loff_t med_lr_off; + int med_lr_idx; + unsigned int med_rmtclient:1; /* remote client? */ + __u32 med_nllu; + __u32 med_nllg; + struct mdt_idmap_table *med_idmap; }; struct osc_creator { @@ -34,7 +55,7 @@ struct osc_creator { }; struct ldlm_export_data { - struct list_head led_held_locks; /* protected by led_lock below */ + struct list_head led_held_locks; /* protected by led_lock */ spinlock_t led_lock; }; @@ -54,6 +75,7 @@ struct filter_export_data { struct list_head fed_mod_list; /* files being modified */ int fed_mod_count;/* items in fed_writing list */ long fed_pending; /* bytes just being written */ + __u32 fed_group; struct brw_stats fed_brw_stats; }; @@ -69,6 +91,7 @@ struct obd_export { struct obd_import *exp_imp_reverse; /* to make RPCs backwards */ struct proc_dir_entry *exp_proc; struct lprocfs_stats *exp_ops_stats; + struct lprocfs_stats *exp_md_stats; struct lprocfs_stats *exp_ldlm_stats; struct ptlrpc_connection *exp_connection; __u32 exp_conn_cnt; @@ -80,19 +103,23 @@ struct obd_export { __u64 exp_connect_flags; int exp_flags; unsigned int exp_failed:1, + exp_in_recovery:1, exp_disconnected:1, exp_connecting:1, - exp_replay_needed:1, - exp_need_sync:1, /* needs sync from connect */ + exp_req_replay_needed:1, + exp_lock_replay_needed:1, + exp_need_sync:1, exp_libclient:1; /* liblustre client? */ union { struct mds_export_data eu_mds_data; + struct mdt_export_data eu_mdt_data; struct filter_export_data eu_filter_data; struct ec_export_data eu_ec_data; } u; }; #define exp_mds_data u.eu_mds_data +#define exp_mdt_data u.eu_mdt_data #define exp_lov_data u.eu_lov_data #define exp_filter_data u.eu_filter_data #define exp_ec_data u.eu_ec_data diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h new file mode 100644 index 0000000..a4dec46 --- /dev/null +++ b/lustre/include/lustre_fid.h @@ -0,0 +1,263 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Yury Umanets + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LINUX_FID_H +#define __LINUX_FID_H + +/* + * struct lu_fid + */ +#include +#include +#include + +#include +#include + +struct lu_site; +struct lu_context; + +/* Whole sequences space range and zero range definitions */ +extern const struct lu_range LUSTRE_SEQ_SPACE_RANGE; +extern const struct lu_range LUSTRE_SEQ_ZERO_RANGE; +extern const struct lu_fid LUSTRE_BFL_FID; + +enum { + /* + * This is how may FIDs may be allocated in one sequence. 16384 for + * now. + */ + LUSTRE_SEQ_MAX_WIDTH = 0x0000000000004000ULL, + + /* + * How many sequences may be allocate for meta-sequence (this is 128 + * sequences). + */ + /* changed to 16 to avoid overflow in test11 */ + LUSTRE_SEQ_META_WIDTH = 0x0000000000000010ULL, + + /* + * This is how many sequences may be in one super-sequence allocated to + * MDTs. + */ + LUSTRE_SEQ_SUPER_WIDTH = (LUSTRE_SEQ_META_WIDTH * LUSTRE_SEQ_META_WIDTH) +}; + +enum lu_mgr_type { + LUSTRE_SEQ_SERVER, + LUSTRE_SEQ_CONTROLLER +}; + +enum lu_cli_type { + LUSTRE_SEQ_METADATA, + LUSTRE_SEQ_DATA +}; + +struct lu_server_seq; + +/* Client sequence manager interface. */ +struct lu_client_seq { + /* Sequence-controller export. */ + struct obd_export *lcs_exp; + struct semaphore lcs_sem; + + /* + * Range of allowed for allocation sequeces. When using lu_client_seq on + * clients, this contains meta-sequence range. And for servers this + * contains super-sequence range. + */ + struct lu_range lcs_space; + + /* Seq related proc */ + cfs_proc_dir_entry_t *lcs_proc_dir; + + /* This holds last allocated fid in last obtained seq */ + struct lu_fid lcs_fid; + + /* LUSTRE_SEQ_METADATA or LUSTRE_SEQ_DATA */ + enum lu_cli_type lcs_type; + + /* + * Service uuid, passed from MDT + seq name to form unique seq name to + * use it with procfs. + */ + char lcs_name[80]; + + /* + * Sequence width, that is how many objects may be allocated in one + * sequence. Default value for it is LUSTRE_SEQ_MAX_WIDTH. + */ + __u64 lcs_width; + + /* Seq-server for direct talking */ + struct lu_server_seq *lcs_srv; +}; + +/* server sequence manager interface */ +struct lu_server_seq { + /* Available sequences space */ + struct lu_range lss_space; + + /* + * Device for server side seq manager needs (saving sequences to backing + * store). + */ + struct dt_device *lss_dev; + + /* /seq file object device */ + struct dt_object *lss_obj; + + /* Seq related proc */ + cfs_proc_dir_entry_t *lss_proc_dir; + + /* LUSTRE_SEQ_SERVER or LUSTRE_SEQ_CONTROLLER */ + enum lu_mgr_type lss_type; + + /* Client interafce to request controller */ + struct lu_client_seq *lss_cli; + + /* Semaphore for protecting allocation */ + struct semaphore lss_sem; + + /* + * Service uuid, passed from MDT + seq name to form unique seq name to + * use it with procfs. + */ + char lss_name[80]; + + /* + * Allocation chunks for super and meta sequences. Default values are + * LUSTRE_SEQ_SUPER_WIDTH and LUSTRE_SEQ_META_WIDTH. + */ + __u64 lss_width; +}; + +int seq_query(struct com_thread_info *info); + +/* Server methods */ +int seq_server_init(struct lu_server_seq *seq, + struct dt_device *dev, + const char *prefix, + enum lu_mgr_type type, + const struct lu_env *env); + +void seq_server_fini(struct lu_server_seq *seq, + const struct lu_env *env); + +int seq_server_alloc_super(struct lu_server_seq *seq, + struct lu_range *in, + struct lu_range *out, + const struct lu_env *env); + +int seq_server_alloc_meta(struct lu_server_seq *seq, + struct lu_range *in, + struct lu_range *out, + const struct lu_env *env); + +int seq_server_set_cli(struct lu_server_seq *seq, + struct lu_client_seq *cli, + const struct lu_env *env); + +/* Client methods */ +int seq_client_init(struct lu_client_seq *seq, + struct obd_export *exp, + enum lu_cli_type type, + const char *prefix, + struct lu_server_seq *srv); + +void seq_client_fini(struct lu_client_seq *seq); + +void seq_client_flush(struct lu_client_seq *seq); + +int seq_client_alloc_fid(struct lu_client_seq *seq, + struct lu_fid *fid); + +/* Fids common stuff */ +int fid_is_local(struct lu_site *site, const struct lu_fid *fid); + +void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src); +void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src); + +/* fid locking */ + +struct ldlm_namespace; + +enum { + LUSTRE_RES_ID_SEQ_OFF = 0, + LUSTRE_RES_ID_OID_OFF = 1, + LUSTRE_RES_ID_VER_OFF = 2, + LUSTRE_RES_ID_HSH_OFF = 3 +}; + +/* + * Build (DLM) resource name from fid. + */ +static inline struct ldlm_res_id * +fid_build_reg_res_name(const struct lu_fid *f, + struct ldlm_res_id *name) +{ + memset(name, 0, sizeof *name); + name->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(f); + name->name[LUSTRE_RES_ID_OID_OFF] = fid_oid(f); + name->name[LUSTRE_RES_ID_VER_OFF] = fid_ver(f); + return name; +} + +/* + * Return true if resource is for object identified by fid. + */ +static inline int fid_res_name_eq(const struct lu_fid *f, + const struct ldlm_res_id *name) +{ + return + name->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(f) && + name->name[LUSTRE_RES_ID_OID_OFF] == fid_oid(f) && + name->name[LUSTRE_RES_ID_VER_OFF] == fid_ver(f); +} + + +static inline struct ldlm_res_id * +fid_build_pdo_res_name(const struct lu_fid *f, + unsigned int hash, + struct ldlm_res_id *name) +{ + fid_build_reg_res_name(f, name); + name->name[LUSTRE_RES_ID_HSH_OFF] = hash; + return name; +} + +static inline __u64 fid_flatten(const struct lu_fid *fid) +{ + return (fid_seq(fid) - 1) * LUSTRE_SEQ_MAX_WIDTH + fid_oid(fid); +} + +#define LUSTRE_SEQ_SRV_NAME "seq_srv" +#define LUSTRE_SEQ_CTL_NAME "seq_ctl" + +/* Range common stuff */ +void range_cpu_to_le(struct lu_range *dst, const struct lu_range *src); +void range_cpu_to_be(struct lu_range *dst, const struct lu_range *src); +void range_le_to_cpu(struct lu_range *dst, const struct lu_range *src); +void range_be_to_cpu(struct lu_range *dst, const struct lu_range *src); + +#endif /* __LINUX_FID_H */ diff --git a/lustre/include/lustre_fld.h b/lustre/include/lustre_fld.h new file mode 100644 index 0000000..4202836 --- /dev/null +++ b/lustre/include/lustre_fld.h @@ -0,0 +1,237 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LINUX_FLD_H +#define __LINUX_FLD_H + +#include +#include +#include + +#include +#include + +struct lu_client_fld; +struct lu_server_fld; + +struct fld_stats { + __u64 fst_count; + __u64 fst_cache; + __u64 fst_inflight; +}; + +/* + * FLD (Fid Location Database) interface. + */ +enum { + LUSTRE_CLI_FLD_HASH_DHT = 0, + LUSTRE_CLI_FLD_HASH_RRB +}; + +struct lu_server_fld; + +struct lu_fld_target { + struct list_head ft_chain; + struct obd_export *ft_exp; + struct lu_server_fld *ft_srv; + __u64 ft_idx; +}; + +typedef int +(*fld_hash_func_t) (struct lu_client_fld *, __u64); + +typedef struct lu_fld_target * +(*fld_scan_func_t) (struct lu_client_fld *, __u64); + +struct lu_fld_hash { + const char *fh_name; + fld_hash_func_t fh_hash_func; + fld_scan_func_t fh_scan_func; +}; + +struct fld_cache_entry { + struct hlist_node fce_list; + struct list_head fce_lru; + mdsno_t fce_mds; + seqno_t fce_seq; + cfs_waitq_t fce_waitq; + __u32 fce_inflight:1, + fce_invalid:1; +}; + +struct fld_cache { + /* + * Cache guard, protects fci_hash mostly because others immutable after + * init is finished. + */ + spinlock_t fci_lock; + + /* Cache shrink threshold */ + int fci_threshold; + + /* Prefered number of cached entries */ + int fci_cache_size; + + /* Current number of cached entries. Protected by @fci_lock */ + int fci_cache_count; + + /* Hash table size (number of collision lists) */ + int fci_hash_size; + + /* Hash table mask */ + int fci_hash_mask; + + /* Hash table for all collision lists */ + struct hlist_head *fci_hash_table; + + /* Lru list */ + struct list_head fci_lru; + + /* Cache statistics. */ + struct fld_stats fci_stat; + + /* Cache name used for debug and messages. */ + char fci_name[80]; +}; + +struct lu_server_fld { + /* Fld dir proc entry. */ + cfs_proc_dir_entry_t *lsf_proc_dir; + + /* /fld file object device */ + struct dt_object *lsf_obj; + + /* Client FLD cache. */ + struct fld_cache *lsf_cache; + + /* Protect index modifications */ + struct semaphore lsf_sem; + + /* Fld service name in form "fld-srv-lustre-MDTXXX" */ + char lsf_name[80]; +}; + +enum { + LUSTRE_FLD_INIT = 1 << 0, + LUSTRE_FLD_RUN = 1 << 1 +}; + +struct lu_client_fld { + /* Client side proc entry. */ + cfs_proc_dir_entry_t *lcf_proc_dir; + + /* List of exports client FLD knows about. */ + struct list_head lcf_targets; + + /* Current hash to be used to chose an export. */ + struct lu_fld_hash *lcf_hash; + + /* Exports count. */ + int lcf_count; + + /* Lock protecting exports list and fld_hash. */ + spinlock_t lcf_lock; + + /* Client FLD cache. */ + struct fld_cache *lcf_cache; + + /* Client fld proc entry name. */ + char lcf_name[80]; + + const struct lu_context *lcf_ctx; + + int lcf_flags; +}; + +int fld_query(struct com_thread_info *info); + +/* Server methods */ +int fld_server_init(struct lu_server_fld *fld, + struct dt_device *dt, + const char *prefix, + const struct lu_env *env); + +void fld_server_fini(struct lu_server_fld *fld, + const struct lu_env *env); + +int fld_server_create(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t mds); + +int fld_server_delete(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq); + +int fld_server_lookup(struct lu_server_fld *fld, + const struct lu_env *env, + seqno_t seq, mdsno_t *mds); + +/* Client methods */ +int fld_client_init(struct lu_client_fld *fld, + const char *prefix, int hash); + +void fld_client_fini(struct lu_client_fld *fld); + +void fld_client_flush(struct lu_client_fld *fld); + +int fld_client_lookup(struct lu_client_fld *fld, + seqno_t seq, mdsno_t *mds, + const struct lu_env *env); + +int fld_client_create(struct lu_client_fld *fld, + seqno_t seq, mdsno_t mds, + const struct lu_env *env); + +int fld_client_delete(struct lu_client_fld *fld, + seqno_t seq, + const struct lu_env *env); + +int fld_client_add_target(struct lu_client_fld *fld, + struct lu_fld_target *tar); + +int fld_client_del_target(struct lu_client_fld *fld, + __u64 idx); + +/* Cache methods */ +struct fld_cache *fld_cache_init(const char *name, + int hash_size, + int cache_size, + int cache_threshold); + +void fld_cache_fini(struct fld_cache *cache); + +void fld_cache_flush(struct fld_cache *cache); + +int fld_cache_insert(struct fld_cache *cache, + seqno_t seq, mdsno_t mds); + +int fld_cache_insert_inflight(struct fld_cache *cache, + seqno_t seq); + +void fld_cache_delete(struct fld_cache *cache, + seqno_t seq); + +int +fld_cache_lookup(struct fld_cache *cache, + seqno_t seq, mdsno_t *mds); + +#endif diff --git a/lustre/include/lustre_ha.h b/lustre/include/lustre_ha.h index 43071ba..3879126 100644 --- a/lustre/include/lustre_ha.h +++ b/lustre/include/lustre_ha.h @@ -21,5 +21,8 @@ void ptlrpc_activate_import(struct obd_import *imp); void ptlrpc_deactivate_import(struct obd_import *imp); void ptlrpc_invalidate_import(struct obd_import *imp); void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt); +int ptlrpc_check_suspend(void); +void ptlrpc_activate_timeouts(struct obd_import *imp); +void ptlrpc_deactivate_timeouts(struct obd_import *imp); #endif diff --git a/lustre/include/lustre_handles.h b/lustre/include/lustre_handles.h index bbd2fcd..cf6b403 100644 --- a/lustre/include/lustre_handles.h +++ b/lustre/include/lustre_handles.h @@ -29,14 +29,24 @@ struct portals_handle { struct list_head h_link; __u64 h_cookie; portals_handle_addref_cb h_addref; + + /* newly added fields to handle the RCU issue. -jxiong */ + spinlock_t h_lock; + unsigned int h_size; + void *h_ptr; + void (*h_free_cb)(void *, size_t); + struct rcu_head h_rcu; }; +#define RCU2HANDLE(rcu) container_of(rcu, struct portals_handle, h_rcu) /* handles.c */ /* Add a handle to the hash table */ void class_handle_hash(struct portals_handle *, portals_handle_addref_cb); void class_handle_unhash(struct portals_handle *); +void class_handle_hash_back(struct portals_handle *); void *class_handle2object(__u64 cookie); +void class_handle_free_cb(struct rcu_head *); int class_handle_init(void); void class_handle_cleanup(void); diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 4eceda9..b9dcf85 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -65,6 +65,7 @@ struct obd_import { struct list_head imp_delayed_list; struct obd_device *imp_obd; + struct ptlrpc_sec *imp_sec; cfs_waitq_t imp_recovery_waitq; atomic_t imp_inflight; @@ -79,6 +80,7 @@ struct obd_import { struct lustre_handle imp_remote_handle; cfs_time_t imp_next_ping; /* jiffies */ __u64 imp_last_success_conn; /* jiffies, 64-bit */ + cfs_time_t imp_next_reconnect; /* seconds */ /* all available obd_import_conn linked here */ struct list_head imp_conn_list; @@ -88,7 +90,8 @@ struct obd_import { spinlock_t imp_lock; /* flags */ - unsigned int imp_invalid:1, /* evicted */ + unsigned int imp_no_timeout:1, /* timeouts are disabled */ + imp_invalid:1, /* evicted */ imp_deactive:1, /* administratively disabled */ imp_replayable:1, /* try to recover the import */ imp_dlm_fake:1, /* don't run recovery (timeout instead) */ @@ -103,6 +106,7 @@ struct obd_import { __u32 imp_connect_op; struct obd_connect_data imp_connect_data; __u64 imp_connect_flags_orig; + int imp_connect_error; __u32 imp_msg_magic; diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 3e929d1..206a1bf 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -55,11 +55,11 @@ struct obd_export; void target_client_add_cb(struct obd_device *obd, __u64 transno, void *cb_data, int error); -int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler); +int target_handle_connect(struct ptlrpc_request *req); int target_handle_disconnect(struct ptlrpc_request *req); void target_destroy_export(struct obd_export *exp); int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, - struct obd_uuid *cluuid); + struct obd_uuid *cluuid, int); int target_handle_ping(struct ptlrpc_request *req); void target_committed_to_req(struct ptlrpc_request *req); @@ -75,8 +75,10 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req); void target_cancel_recovery_timer(struct obd_device *obd); #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) /* *waves hands* */ -void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler); -void target_abort_recovery(void *data); +void target_start_recovery_timer(struct obd_device *obd); +int target_start_recovery_thread(struct obd_device *obd, + svc_handler_t handler); +void target_stop_recovery_thread(struct obd_device *obd); void target_cleanup_recovery(struct obd_device *obd); int target_queue_recovery_request(struct ptlrpc_request *req, struct obd_device *obd); @@ -85,15 +87,17 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); /* client.c */ -int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf); +int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg); struct client_obd *client_conn2cli(struct lustre_handle *conn); struct mdc_open_data; struct obd_client_handle { - struct lustre_handle och_fh; - struct llog_cookie och_cookie; + struct lustre_handle och_fh; + struct lu_fid och_fid; + struct llog_cookie och_cookie; struct mdc_open_data *och_mod; __u32 och_magic; + int och_flags; }; #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed @@ -131,37 +135,37 @@ struct obd_ioctl_data { struct obdo ioc_obdo1; struct obdo ioc_obdo2; - obd_size ioc_count; - obd_off ioc_offset; - __u32 ioc_dev; - __u32 ioc_command; + obd_size ioc_count; + obd_off ioc_offset; + __u32 ioc_dev; + __u32 ioc_command; __u64 ioc_nid; __u32 ioc_nal; __u32 ioc_type; /* buffers the kernel will treat as user pointers */ - __u32 ioc_plen1; - char *ioc_pbuf1; - __u32 ioc_plen2; - char *ioc_pbuf2; + __u32 ioc_plen1; + char *ioc_pbuf1; + __u32 ioc_plen2; + char *ioc_pbuf2; /* inline buffers for various arguments */ - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - __u32 ioc_inllen3; - char *ioc_inlbuf3; - __u32 ioc_inllen4; - char *ioc_inlbuf4; + __u32 ioc_inllen1; + char *ioc_inlbuf1; + __u32 ioc_inllen2; + char *ioc_inlbuf2; + __u32 ioc_inllen3; + char *ioc_inlbuf3; + __u32 ioc_inllen4; + char *ioc_inlbuf4; char ioc_bulk[0]; }; struct obd_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; + __u32 ioc_len; + __u32 ioc_version; }; static inline int obd_ioctl_packlen(struct obd_ioctl_data *data) @@ -452,8 +456,12 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) #define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) + #define OBD_GET_VERSION _IOWR ('f', 144, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) + #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) @@ -688,7 +696,7 @@ do { \ } while (0) #else /* !__KERNEL__ */ -#define __l_wait_event(wq, condition, info, ret, excl) \ +#define __l_wait_event(wq, condition, info, ret, excl) \ do { \ long __timeout = info->lwi_timeout; \ long __now; \ diff --git a/lustre/include/lustre_lite.h b/lustre/include/lustre_lite.h index e4e8ee8..8821a75 100644 --- a/lustre/include/lustre_lite.h +++ b/lustre/include/lustre_lite.h @@ -32,7 +32,7 @@ (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC), \ (struct ll_async_page *)(c)) -// 4*1024*1024 +/* 4UL * 1024 * 1024 */ #define LL_MAX_BLKSIZE_BITS (22) #define LL_MAX_BLKSIZE (1UL<obd_llog_ctxt[index]; } +static inline struct llog_ctxt * +llog_get_context_from_llogs(struct obd_llogs *llogs, int index) +{ + if (index < 0 || index >= LLOG_MAX_CTXTS) + return NULL; + return llogs->llog_ctxt[index]; +} + static inline int llog_write_rec(struct llog_handle *handle, struct llog_rec_hdr *rec, struct llog_cookie *logcookies, @@ -288,6 +298,7 @@ static inline int llog_write_rec(struct llog_handle *handle, rc = llog_handle2ops(handle, &lop); if (rc) RETURN(rc); + LASSERT(lop); if (lop->lop_write_rec == NULL) RETURN(-EOPNOTSUPP); diff --git a/lustre/include/lustre_mdc.h b/lustre/include/lustre_mdc.h new file mode 100644 index 0000000..d2c30d3 --- /dev/null +++ b/lustre/include/lustre_mdc.h @@ -0,0 +1,80 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * This file is part of Lustre, http://www.lustre.org + * + * MDS data structures. + * See also lustre_idl.h for wire formats of requests. + */ + +#ifndef _LUSTRE_MDC_H +#define _LUSTRE_MDC_H + +#ifdef __KERNEL__ +# include +# include +# ifdef CONFIG_FS_POSIX_ACL +# include +# endif +#endif +#include +#include +#include +#include +#include +#include +#include + +struct ptlrpc_client; +struct obd_export; +struct ptlrpc_request; +struct obd_device; + +struct mdc_rpc_lock { + struct semaphore rpcl_sem; + struct lookup_intent *rpcl_it; +}; + +static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck) +{ + sema_init(&lck->rpcl_sem, 1); + lck->rpcl_it = NULL; +} + +static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, + struct lookup_intent *it) +{ + ENTRY; + if (!it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) { + down(&lck->rpcl_sem); + LASSERT(lck->rpcl_it == NULL); + lck->rpcl_it = it; + } +} + +static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, + struct lookup_intent *it) +{ + if (!it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) { + LASSERT(it == lck->rpcl_it); + lck->rpcl_it = NULL; + up(&lck->rpcl_sem); + } + EXIT; +} + +struct mdc_cache_waiter { + struct list_head mcw_entry; + wait_queue_head_t mcw_waitq; +}; + +/* mdc/mdc_locks.c */ +int it_disposition(struct lookup_intent *it, int flag); +void it_clear_disposition(struct lookup_intent *it, int flag); +void it_set_disposition(struct lookup_intent *it, int flag); +int it_open_error(int phase, struct lookup_intent *it); +#ifdef HAVE_SPLIT_SUPPORT +int mdc_sendpage(struct obd_export *exp, const struct lu_fid *fid, + const struct page *page, int offset); +#endif +#endif diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h index 85da7a4..c36024d 100644 --- a/lustre/include/lustre_mds.h +++ b/lustre/include/lustre_mds.h @@ -37,24 +37,6 @@ struct ptlrpc_request; struct obd_device; struct ll_file_data; -struct lustre_md { - struct mds_body *body; - struct lov_stripe_md *lsm; -#ifdef CONFIG_FS_POSIX_ACL - struct posix_acl *posix_acl; -#endif -}; - -struct mdc_op_data { - struct ll_fid fid1; - struct ll_fid fid2; - __u64 mod_time; - const char *name; - int namelen; - __u32 create_mode; - __u32 suppgids[2]; -}; - struct mds_update_record { __u32 ur_opcode; struct ll_fid *ur_fid1; @@ -86,116 +68,32 @@ struct mds_file_data { struct dentry *mfd_dentry; }; -/* ACL */ -#ifdef CONFIG_FS_POSIX_ACL -#ifdef HAVE_XATTR_ACL -# define MDS_XATTR_NAME_ACL_ACCESS XATTR_NAME_ACL_ACCESS -# define mds_xattr_acl_size(entry) xattr_acl_size(entry) -# else -# define MDS_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS -# define mds_xattr_acl_size(entry) posix_acl_xattr_size(entry) -# endif -# define LUSTRE_POSIX_ACL_MAX_ENTRIES (32) -# define LUSTRE_POSIX_ACL_MAX_SIZE \ - (mds_xattr_acl_size(LUSTRE_POSIX_ACL_MAX_ENTRIES)) -#else -# define LUSTRE_POSIX_ACL_MAX_SIZE 0 -#endif +struct mds_group_info { + struct obd_uuid *uuid; + int group; +}; /* mds/mds_reint.c */ +struct inode; + int mds_reint_rec(struct mds_update_record *r, int offset, struct ptlrpc_request *req, struct lustre_handle *); -/* mds/mds_lov.c */ - -/* mdc/mdc_locks.c */ -int it_disposition(struct lookup_intent *it, int flag); -void it_set_disposition(struct lookup_intent *it, int flag); -void it_clear_disposition(struct lookup_intent *it, int flag); -int it_open_error(int phase, struct lookup_intent *it); -void mdc_set_lock_data(__u64 *lockh, void *data); -int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, - ldlm_iterator_t it, void *data); -int mdc_intent_lock(struct obd_export *exp, - struct mdc_op_data *, - void *lmm, int lmmsize, - struct lookup_intent *, int, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, int extra_lock_flags); -int mdc_enqueue(struct obd_export *exp, - int lock_type, - struct lookup_intent *it, - int lock_mode, - struct mdc_op_data *data, - struct lustre_handle *lockh, - void *lmm, - int lmmlen, - ldlm_completion_callback cb_completion, - ldlm_blocking_callback cb_blocking, - void *cb_data, int extra_lock_flags); - -/* mdc/mdc_request.c */ -int mdc_init_ea_size(struct obd_export *mdc_exp, struct obd_export *lov_exp); -int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, - struct obd_export *exp, struct lustre_md *md); -void mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md); -int mdc_getstatus(struct obd_export *exp, struct ll_fid *rootfid); -int mdc_getattr(struct obd_export *exp, struct ll_fid *fid, - obd_valid valid, unsigned int ea_size, - struct ptlrpc_request **request); -int mdc_getattr_name(struct obd_export *exp, struct ll_fid *fid, - const char *filename, int namelen, unsigned long valid, - unsigned int ea_size, struct ptlrpc_request **request); -int mdc_setattr(struct obd_export *exp, struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len, - struct ptlrpc_request **request); -int mdc_setxattr(struct obd_export *exp, struct ll_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, int flags, - struct ptlrpc_request **request); -int mdc_getxattr(struct obd_export *exp, struct ll_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, struct ptlrpc_request **request); -int mdc_open(struct obd_export *exp, obd_id ino, int type, int flags, - struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh, - struct ptlrpc_request **); -struct obd_client_handle; -void mdc_set_open_replay_data(struct obd_client_handle *och, - struct ptlrpc_request *open_req); -void mdc_clear_open_replay_data(struct obd_client_handle *och); -int mdc_close(struct obd_export *, struct obdo *, struct obd_client_handle *, - struct ptlrpc_request **); -int mdc_readpage(struct obd_export *exp, struct ll_fid *mdc_fid, __u64 offset, - struct page *, struct ptlrpc_request **); -int mdc_create(struct obd_export *exp, struct mdc_op_data *op_data, - const void *data, int datalen, int mode, __u32 uid, __u32 gid, - __u32 cap_effective, __u64 rdev,struct ptlrpc_request **request); -int mdc_unlink(struct obd_export *exp, struct mdc_op_data *data, - struct ptlrpc_request **request); -int mdc_link(struct obd_export *exp, struct mdc_op_data *data, - struct ptlrpc_request **); -int mdc_rename(struct obd_export *exp, struct mdc_op_data *data, - const char *old, int oldlen, const char *new, int newlen, - struct ptlrpc_request **request); -int mdc_sync(struct obd_export *exp, struct ll_fid *fid, - struct ptlrpc_request **); -int mdc_create_client(struct obd_uuid uuid, struct ptlrpc_client *cl); - -/* Store the generation of a newly-created inode in |req| for replay. */ -void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, - int repoff); -int mdc_llog_process(struct obd_export *, char *logname, llog_cb_t, void *data); -int mdc_done_writing(struct obd_export *exp, struct obdo *); - -static inline void mdc_pack_fid(struct ll_fid *fid, obd_id ino, __u32 gen, - int type) -{ - fid->id = ino; - fid->generation = gen; - fid->f_type = type; -} +int mds_osc_setattr_async(struct obd_device *obd, __u32 uid, __u32 gid, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, __u64 id, __u32 gen, + struct obd_capa *oc); + +int mds_log_op_unlink(struct obd_device *obd, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, int cookies_size); +int mds_log_op_setattr(struct obd_device *obd, __u32 uid, __u32 gid, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, int cookies_size); + +int mds_lov_write_objids(struct obd_device *obd); +void mds_lov_update_objids(struct obd_device *obd, obd_id *ids); +void mds_objids_from_lmm(obd_id *, struct lov_mds_md *, struct lov_desc *); /* ioctls for trying requests */ #define IOC_REQUEST_TYPE 'f' @@ -209,4 +107,11 @@ static inline void mdc_pack_fid(struct ll_fid *fid, obd_id ino, __u32 gen, #define IOC_REQUEST_CLOSE _IOWR('f', 35, long) #define IOC_REQUEST_MAX_NR 35 +#define MDS_LOV_MD_NAME "trusted.lov" +#define MDS_LMV_MD_NAME "trusted.lmv" +#define MDD_OBD_NAME "mdd_obd" +#define MDD_OBD_UUID "mdd_obd_uuid" +#define MDD_OBD_TYPE "mds" +#define MDD_OBD_PROFILE "lustre-MDT0000" + #endif diff --git a/lustre/include/lustre_mdt.h b/lustre/include/lustre_mdt.h new file mode 100644 index 0000000..861a077 --- /dev/null +++ b/lustre/include/lustre_mdt.h @@ -0,0 +1,85 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LINUX_MDT_H +#define __LINUX_MDT_H + +#include +#include +#include +#include +#include +#include + +/* + * Common thread info for mdt, seq and fld + */ +struct com_thread_info { + /* + * for req-layout interface. + */ + struct req_capsule cti_pill; +}; + +/* id map */ +#define MDT_IDMAP_HASHSIZE (32) +#define MDT_IDMAP_HASHFUNC(id) ((id) & (MDT_IDMAP_HASHSIZE - 1)) + +enum mdt_idmap_idx { + RMT_UIDMAP_IDX, + LCL_UIDMAP_IDX, + RMT_GIDMAP_IDX, + LCL_GIDMAP_IDX, + MDT_IDMAP_N_HASHES +}; + +struct mdt_idmap_table { + spinlock_t mit_lock; + struct list_head mit_idmaps[MDT_IDMAP_N_HASHES] + [MDT_IDMAP_HASHSIZE]; +}; + +enum { + ESERIOUS = 0x0001000 +}; + +static inline int err_serious(int rc) +{ + LASSERT(rc < 0); + LASSERT(-rc < ESERIOUS); + return -(-rc | ESERIOUS); +} + +static inline int clear_serious(int rc) +{ + if (rc < 0) + rc = -(-rc & ~ESERIOUS); + return rc; +} + +static inline int is_serious(int rc) +{ + return (rc < 0 && -rc & ESERIOUS); +} + + +#endif diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index e6e7d21..bbe3c01 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -38,8 +38,10 @@ #include #include #include +#include #include #include +#include /* MD flags we _always_ use */ #define PTLRPC_MD_OPTIONS 0 @@ -93,6 +95,15 @@ #define LDLM_MAXREQSIZE (5 * 1024) #define LDLM_MAXREPSIZE (1024) +#define MDT_MIN_THREADS 2UL +#define MDT_MAX_THREADS 512UL +#define MDT_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ + num_physpages >> (25 - PAGE_SHIFT)), 2UL) +#define FLD_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ + num_physpages >> (25 - PAGE_SHIFT)), 2UL) +#define SEQ_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ + num_physpages >> (25 - PAGE_SHIFT)), 2UL) + /* Absolute limits */ #define MDS_THREADS_MIN 2 #define MDS_THREADS_MAX 512 @@ -119,11 +130,24 @@ #define MDS_MAXREQSIZE (5 * 1024) #define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56) +/* FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + md_fld */ +#define FLD_MAXREQSIZE (160) + +/* FLD_MAXREPSIZE == lustre_msg + ptlrpc_body + md_fld */ +#define FLD_MAXREPSIZE (152) + +/* SEQ_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + lu_range + + * __u32 padding */ +#define SEQ_MAXREQSIZE (160) + +/* SEQ_MAXREPSIZE == lustre_msg + ptlrpc_body + lu_range */ +#define SEQ_MAXREPSIZE (152) + #define MGS_THREADS_AUTO_MIN 2 #define MGS_THREADS_AUTO_MAX 32 #define MGS_NBUFS (64 * smp_num_cpus) #define MGS_BUFSIZE (8 * 1024) -#define MGS_MAXREQSIZE (8 * 1024) +#define MGS_MAXREQSIZE (7 * 1024) #define MGS_MAXREPSIZE (9 * 1024) /* Absolute limits */ @@ -181,6 +205,8 @@ struct ptlrpc_request_set { struct list_head set_requests; set_interpreter_func set_interpret; /* completion callback */ void *set_arg; /* completion context */ + void *set_countp; /* pointer to NOB counter in case + * of directIO (bug11737) */ /* locked so that any old caller can communicate requests to * the set holder who can then fold them into the lock-free set */ spinlock_t set_new_req_lock; @@ -224,12 +250,16 @@ struct ptlrpc_reply_state { lnet_handle_md_t rs_md_h; atomic_t rs_refcount; + struct ptlrpc_svc_ctx *rs_svc_ctx; + struct lustre_msg *rs_repbuf; /* wrapper */ + int rs_repbuf_len; /* wrapper buf length */ + int rs_repdata_len; /* wrapper msg length */ + struct lustre_msg *rs_msg; /* reply message */ + /* locks awaiting client reply ACK */ int rs_nlocks; struct lustre_handle rs_locks[RS_MAX_LOCKS]; ldlm_mode_t rs_modes[RS_MAX_LOCKS]; - /* last member: variable sized reply message */ - struct lustre_msg *rs_msg; }; struct ptlrpc_thread; @@ -249,6 +279,9 @@ struct ptlrpc_request_pool { void (*prp_populate)(struct ptlrpc_request_pool *, int); }; +struct lu_context; +struct lu_env; + struct ptlrpc_request { int rq_type; /* one of PTL_RPC_MSG_* */ struct list_head rq_list; @@ -270,7 +303,7 @@ struct ptlrpc_request { */ rq_replay:1, rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, - rq_no_delay:1, rq_net_err:1; + rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ atomic_t rq_refcount; /* client-side refcount for SENT race */ @@ -291,6 +324,38 @@ struct ptlrpc_request { __u64 rq_xid; struct list_head rq_replay_list; + struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */ + struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */ + struct list_head rq_ctx_chain; /* link to waited ctx */ + ptlrpc_sec_flavor_t rq_sec_flavor; /* client & server */ + /* client security flags */ + unsigned int rq_ctx_init:1, /* context initiation */ + rq_ctx_fini:1, /* context destroy */ + rq_bulk_read:1, /* request bulk read */ + rq_bulk_write:1, /* request bulk write */ + /* server authentication flags */ + rq_auth_gss:1, /* authenticated by gss */ + rq_auth_remote:1, /* authed as remote user */ + rq_auth_usr_root:1, /* authed as root */ + rq_auth_usr_mdt:1; /* authed as mdt */ + + uid_t rq_auth_uid; /* authed uid */ + uid_t rq_auth_mapped_uid; /* authed uid mapped to */ + + /* (server side), pointed directly into req buffer */ + struct ptlrpc_user_desc *rq_user_desc; + + /* various buffer pointers */ + struct lustre_msg *rq_reqbuf; /* req wrapper */ + int rq_reqbuf_len; /* req wrapper buf len */ + int rq_reqdata_len; /* req wrapper msg len */ + struct lustre_msg *rq_repbuf; /* rep wrapper */ + int rq_repbuf_len; /* rep wrapper buf len */ + int rq_repdata_len; /* rep wrapper msg len */ + struct lustre_msg *rq_clrbuf; /* only in priv mode */ + int rq_clrbuf_len; /* only in priv mode */ + int rq_clrdata_len; /* only in priv mode */ + __u32 rq_req_swab_mask; __u32 rq_rep_swab_mask; @@ -334,6 +399,7 @@ struct ptlrpc_request { void *rq_ptlrpcd_data; struct ptlrpc_request_pool *rq_pool; /* Pool if request from preallocated list */ + struct lu_context rq_session; }; static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index) @@ -381,7 +447,7 @@ static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index) #endif static inline const char * -ptlrpc_rqphase2str(struct ptlrpc_request *req) +ptlrpc_rqphase2str(const struct ptlrpc_request *req) { switch (req->rq_phase) { case RQ_PHASE_NEW: @@ -402,16 +468,17 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) /* Spare the preprocessor, spoil the bugs. */ #define FLAG(field, str) (field ? str : "") -#define DEBUG_REQ_FLAGS(req) \ - ptlrpc_rqphase2str(req), \ - FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), \ - FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ - FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ - FLAG(req->rq_no_resend, "N"), \ - FLAG(req->rq_waiting, "W") +#define DEBUG_REQ_FLAGS(req) \ + ptlrpc_rqphase2str(req), \ + FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ + FLAG(req->rq_err, "E"), \ + FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ + FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ + FLAG(req->rq_no_resend, "N"), \ + FLAG(req->rq_waiting, "W"), \ + FLAG(req->rq_wait_ctx, "C") -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s" +#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s" void _debug_req(struct ptlrpc_request *req, __u32 mask, struct libcfs_debug_msg_data *data, const char *fmt, ...) @@ -434,19 +501,19 @@ do { \ #define DEBUG_REQ(level, req, fmt, args...) \ do { \ if ((level) & (D_ERROR | D_WARNING)) { \ - static cfs_debug_limit_state_t cdls; \ - debug_req(&cdls, level, req, __FILE__, __func__, __LINE__, \ - "@@@ "fmt" ", ## args); \ + static cfs_debug_limit_state_t cdls; \ + debug_req(&cdls, level, req, __FILE__, __func__, __LINE__, \ + "@@@ "fmt" ", ## args); \ } else \ - debug_req(NULL, level, req, __FILE__, __func__, __LINE__, \ - "@@@ "fmt" ", ## args); \ + debug_req(NULL, level, req, __FILE__, __func__, __LINE__, \ + "@@@ "fmt" ", ## args); \ } while (0) struct ptlrpc_bulk_page { struct list_head bp_link; - int bp_buflen; - int bp_pageoffset; /* offset within a page */ - struct page *bp_page; + int bp_buflen; + int bp_pageoffset; /* offset within a page */ + struct page *bp_page; }; #define BULK_GET_SOURCE 0 @@ -477,10 +544,11 @@ struct ptlrpc_bulk_desc { lnet_handle_md_t bd_md_h; /* associated MD */ lnet_nid_t bd_sender; /* stash event::sender */ + cfs_page_t **bd_enc_pages; #if defined(__KERNEL__) - lnet_kiov_t bd_iov[0]; + lnet_kiov_t bd_iov[0]; #else - lnet_md_iovec_t bd_iov[0]; + lnet_md_iovec_t bd_iov[0]; #endif }; @@ -493,6 +561,7 @@ struct ptlrpc_thread { unsigned int t_id; /* service thread index, from ptlrpc_start_threads */ cfs_waitq_t t_ctl_waitq; + struct lu_env *t_env; }; struct ptlrpc_request_buffer_desc { @@ -523,7 +592,7 @@ struct ptlrpc_service { int srv_n_difficult_replies; /* # 'difficult' replies */ int srv_n_active_reqs; /* # reqs being served */ cfs_duration_t srv_rqbd_timeout; /* timeout before re-posting reqs, in tick */ - int srv_watchdog_timeout; /* soft watchdog timeout, in ms */ + int srv_watchdog_timeout; /* soft watchdog timeout, in ms */ unsigned srv_cpu_affinity:1; /* bind threads to CPUs */ __u32 srv_req_portal; @@ -556,8 +625,8 @@ struct ptlrpc_service { struct list_head srv_threads; /* service thread list */ svc_handler_t srv_handler; - char *srv_name; /* only statically allocated strings here; we don't clean them */ - char *srv_thread_name; /* only statically allocated strings here; we don't clean them */ + char *srv_name; /* only statically allocated strings here; we don't clean them */ + char *srv_thread_name; /* only statically allocated strings here; we don't clean them */ spinlock_t srv_lock; @@ -568,7 +637,12 @@ struct ptlrpc_service { struct list_head srv_free_rs_list; /* waitq to run, when adding stuff to srv_free_rs_list */ cfs_waitq_t srv_free_rs_waitq; - + + /* + * Tags for lu_context associated with this thread, see struct + * lu_context. + */ + __u32 srv_ctx_tags; /* * if non-NULL called during thread creation (ptlrpc_start_thread()) * to initialize service specific per-thread state. @@ -693,7 +767,8 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version, struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, int count, int *lengths, char **bufs, - struct ptlrpc_request_pool *pool); + struct ptlrpc_request_pool *pool, + struct ptlrpc_cli_ctx *ctx); void ptlrpc_free_req(struct ptlrpc_request *request); void ptlrpc_req_finished(struct ptlrpc_request *request); void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request); @@ -711,20 +786,39 @@ __u64 ptlrpc_next_xid(void); __u64 ptlrpc_sample_next_xid(void); __u64 ptlrpc_req_xid(struct ptlrpc_request *request); +struct ptlrpc_service_conf { + int psc_nbufs; + int psc_bufsize; + int psc_max_req_size; + int psc_max_reply_size; + int psc_req_portal; + int psc_rep_portal; + int psc_watchdog_timeout; /* in ms */ + int psc_min_threads; + int psc_max_threads; + __u32 psc_ctx_tags; +}; + /* ptlrpc/service.c */ void ptlrpc_save_lock (struct ptlrpc_request *req, struct lustre_handle *lock, int mode); void ptlrpc_commit_replies (struct obd_device *obd); void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs); +struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, + svc_handler_t h, char *name, + struct proc_dir_entry *proc_entry, + svcreq_printfn_t prntfn, + char *threadname); + struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, int req_portal, int rep_portal, int watchdog_timeout, /* in ms */ svc_handler_t, char *name, cfs_proc_dir_entry_t *proc_entry, - svcreq_printfn_t, + svcreq_printfn_t, int min_threads, int max_threads, - char *threadname); + char *threadname, __u32 ctx_tags); void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc); @@ -751,15 +845,22 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp); /* ptlrpc/pack_generic.c */ int lustre_msg_swabbed(struct lustre_msg *msg); int lustre_msg_check_version(struct lustre_msg *msg, __u32 version); +void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, + char **bufs); int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count, int *lens, char **bufs); int lustre_pack_reply(struct ptlrpc_request *, int count, int *lens, char **bufs); -void lustre_shrink_reply(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data); +int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, + int *lens, char **bufs); +int lustre_shrink_msg(struct lustre_msg *msg, int segment, + unsigned int newlen, int move_data); void lustre_free_reply_state(struct ptlrpc_reply_state *rs); int lustre_msg_size(__u32 magic, int count, int *lengths); +int lustre_msg_size_v2(int count, int *lengths); int lustre_unpack_msg(struct lustre_msg *m, int len); +void *lustre_msg_buf_v1(void *msg, int n, int min_size); +void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size); void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen); int lustre_msg_buflen(struct lustre_msg *m, int n); void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len); @@ -785,7 +886,7 @@ __u32 lustre_msg_get_opc(struct lustre_msg *msg); __u64 lustre_msg_get_last_xid(struct lustre_msg *msg); __u64 lustre_msg_get_last_committed(struct lustre_msg *msg); __u64 lustre_msg_get_transno(struct lustre_msg *msg); -int lustre_msg_get_status(struct lustre_msg *msg); +int lustre_msg_get_status(struct lustre_msg *msg); __u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg); __u32 lustre_msg_get_magic(struct lustre_msg *msg); void lustre_msg_set_handle(struct lustre_msg *msg,struct lustre_handle *handle); @@ -798,6 +899,16 @@ void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt); static inline void +lustre_shrink_reply(struct ptlrpc_request *req, int segment, + unsigned int newlen, int move_data) +{ + LASSERT(req->rq_reply_state); + LASSERT(req->rq_repmsg); + req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment, + newlen, move_data); +} + +static inline void ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) { LASSERT(atomic_read(&rs->rs_refcount) > 0); @@ -841,9 +952,10 @@ ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, int *lens) } /* ldlm/ldlm_lib.c */ -int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf); -int client_obd_cleanup(struct obd_device * obddev); -int client_connect_import(struct lustre_handle *conn, struct obd_device *obd, +int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg); +int client_obd_cleanup(struct obd_device *obddev); +int client_connect_import(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *); int client_disconnect_export(struct obd_export *exp); int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, @@ -861,6 +973,7 @@ void ping_evictor_stop(void); #define ping_evictor_start() do {} while (0) #define ping_evictor_stop() do {} while (0) #endif +int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req); /* ptlrpc/ptlrpcd.c */ void ptlrpcd_wake(struct ptlrpc_request *req); @@ -869,6 +982,7 @@ int ptlrpcd_addref(void); void ptlrpcd_decref(void); /* ptlrpc/lproc_ptlrpc.c */ +const char* ll_opcode2str(__u32 opcode); #ifdef LPROCFS void ptlrpc_lprocfs_register_obd(struct obd_device *obd); void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd); diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index 429c9e9..8acf23e 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -60,5 +60,9 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, #define PARAM_MDC "mdc." #define PARAM_LLITE "llite." #define PARAM_LOV "lov." +#define PARAM_SEC "security." +#define PARAM_SEC_RPC PARAM_SEC"rpc." +#define PARAM_SEC_RPC_MDT PARAM_SEC_RPC"mdt=" +#define PARAM_SEC_RPC_CLI PARAM_SEC_RPC"cli=" #endif /* _LUSTRE_PARAM_H */ diff --git a/lustre/include/lustre_quota.h b/lustre/include/lustre_quota.h index cf81f60..d52f661 100644 --- a/lustre/include/lustre_quota.h +++ b/lustre/include/lustre_quota.h @@ -203,12 +203,12 @@ typedef struct { /* For quota slave, acquire/release quota from master if needed */ int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int); - + /* For quota slave, check whether specified uid/gid's remaining quota * can finish a write rpc */ int (*quota_chkquota) (struct obd_device *, unsigned int, unsigned int, int); - + /* For quota client, poll if the quota check done */ int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *); @@ -255,7 +255,7 @@ static inline int lquota_exit(quota_interface_t *interface) } static inline int lquota_setup(quota_interface_t *interface, - struct obd_device *obd) + struct obd_device *obd) { int rc; ENTRY; diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h new file mode 100644 index 0000000..a087e27 --- /dev/null +++ b/lustre/include/lustre_req_layout.h @@ -0,0 +1,183 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/include/linux/lustre_req_layout.h + * Lustre Metadata Target (mdt) request handler + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef _LUSTRE_REQ_LAYOUT_H__ +#define _LUSTRE_REQ_LAYOUT_H__ + +struct req_msg_field; +struct req_format; +struct req_capsule; + +struct ptlrpc_request; + +enum req_location { + RCL_CLIENT, + RCL_SERVER, + RCL_NR +}; + +struct req_capsule { + struct ptlrpc_request *rc_req; + const struct req_format *rc_fmt; + __u32 rc_swabbed; + enum req_location rc_loc; + int *rc_area; +}; + +enum { + /* + * Maximal number of fields (buffers) in a request message. + */ + REQ_MAX_FIELD_NR = 8 +}; + +#if !defined(__REQ_LAYOUT_USER__) + +/* struct ptlrpc_request, lustre_msg* */ +#include + +void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req, + enum req_location location, int *area); +void req_capsule_fini(struct req_capsule *pill); + +void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt); +int req_capsule_pack(struct req_capsule *pill); + +void *req_capsule_client_get(struct req_capsule *pill, + const struct req_msg_field *field); +void *req_capsule_server_get(struct req_capsule *pill, + const struct req_msg_field *field); +const void *req_capsule_other_get(struct req_capsule *pill, + const struct req_msg_field *field); + +void req_capsule_set_size(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc, int size); +int req_capsule_get_size(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc); +void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt); + +int req_capsule_has_field(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc); +int req_capsule_field_present(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc); + +int req_capsule_shrink(const struct req_capsule *pill, + const struct req_msg_field *field, + const unsigned int newlen, + const int adjust, + const int move_data); + +int req_layout_init(void); +void req_layout_fini(void); + +/* __REQ_LAYOUT_USER__ */ +#endif + +extern const struct req_format RQF_SEQ_QUERY; +extern const struct req_format RQF_FLD_QUERY; +extern const struct req_format RQF_MDS_GETSTATUS; +extern const struct req_format RQF_MDS_STATFS; +extern const struct req_format RQF_MDS_SYNC; +extern const struct req_format RQF_MDS_GETXATTR; +extern const struct req_format RQF_MDS_SETXATTR; +extern const struct req_format RQF_MDS_GETATTR; +extern const struct req_format RQF_MDS_CLOSE; +extern const struct req_format RQF_MDS_PIN; +extern const struct req_format RQF_MDS_CONNECT; +extern const struct req_format RQF_MDS_DISCONNECT; +extern const struct req_format RQF_MDS_SET_INFO; +extern const struct req_format RQF_MDS_READPAGE; +extern const struct req_format RQF_MDS_WRITEPAGE; +extern const struct req_format RQF_MDS_IS_SUBDIR; +extern const struct req_format RQF_MDS_DONE_WRITING; + +/* + * This is format of direct (non-intent) MDS_GETATTR_NAME request. + */ +extern const struct req_format RQF_MDS_GETATTR_NAME; +extern const struct req_format RQF_MDS_REINT; +extern const struct req_format RQF_MDS_REINT_CREATE; +extern const struct req_format RQF_MDS_REINT_CREATE_RMT_ACL; +extern const struct req_format RQF_MDS_REINT_CREATE_SLAVE; +extern const struct req_format RQF_MDS_REINT_CREATE_SYM; +extern const struct req_format RQF_MDS_REINT_OPEN; +extern const struct req_format RQF_MDS_REINT_UNLINK; +extern const struct req_format RQF_MDS_REINT_LINK; +extern const struct req_format RQF_MDS_REINT_RENAME; +extern const struct req_format RQF_MDS_REINT_SETATTR; +extern const struct req_format RQF_LDLM_ENQUEUE; +extern const struct req_format RQF_LDLM_INTENT; +extern const struct req_format RQF_LDLM_INTENT_GETATTR; +extern const struct req_format RQF_LDLM_INTENT_OPEN; +extern const struct req_format RQF_LDLM_INTENT_CREATE; +extern const struct req_format RQF_LDLM_INTENT_UNLINK; + +extern const struct req_msg_field RMF_PTLRPC_BODY; +extern const struct req_msg_field RMF_MDT_BODY; +extern const struct req_msg_field RMF_MDT_EPOCH; +extern const struct req_msg_field RMF_OBD_STATFS; +extern const struct req_msg_field RMF_NAME; +extern const struct req_msg_field RMF_SYMTGT; +extern const struct req_msg_field RMF_TGTUUID; +extern const struct req_msg_field RMF_CLUUID; +extern const struct req_msg_field RMF_SETINFO_VAL; +extern const struct req_msg_field RMF_SETINFO_KEY; +/* + * connection handle received in MDS_CONNECT request. + */ +extern const struct req_msg_field RMF_CONN; +extern const struct req_msg_field RMF_CONNECT_DATA; +extern const struct req_msg_field RMF_DLM_REQ; +extern const struct req_msg_field RMF_DLM_REP; +extern const struct req_msg_field RMF_LDLM_INTENT; +extern const struct req_msg_field RMF_MDT_MD; +extern const struct req_msg_field RMF_REC_CREATE; +extern const struct req_msg_field RMF_REC_LINK; +extern const struct req_msg_field RMF_REC_UNLINK; +extern const struct req_msg_field RMF_REC_RENAME; +extern const struct req_msg_field RMF_REC_SETATTR; +extern const struct req_msg_field RMF_EADATA; +extern const struct req_msg_field RMF_ACL; +extern const struct req_msg_field RMF_LOGCOOKIES; +extern const struct req_msg_field RMF_REINT_OPC; +extern const struct req_msg_field RMF_CAPA1; +extern const struct req_msg_field RMF_CAPA2; + +/* seq-mgr fields */ +extern const struct req_msg_field RMF_SEQ_OPC; +extern const struct req_msg_field RMF_SEQ_RANGE; + +/* FLD fields */ +extern const struct req_msg_field RMF_FLD_OPC; +extern const struct req_msg_field RMF_FLD_MDFLD; + +#endif /* _LUSTRE_REQ_LAYOUT_H__ */ diff --git a/lustre/include/lustre_sec.h b/lustre/include/lustre_sec.h new file mode 100644 index 0000000..be0904e --- /dev/null +++ b/lustre/include/lustre_sec.h @@ -0,0 +1,572 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LUSTRE_SEC_H_ +#define _LUSTRE_SEC_H_ + +/* + * to avoid include + */ +struct obd_import; +struct ptlrpc_request; +struct ptlrpc_reply_state; +struct ptlrpc_bulk_desc; +struct brw_page; + +/* + * forward declaration + */ +struct ptlrpc_sec_policy; +struct ptlrpc_sec_cops; +struct ptlrpc_sec_sops; +struct ptlrpc_sec; +struct ptlrpc_svc_ctx; +struct ptlrpc_cli_ctx; +struct ptlrpc_ctx_ops; + +/* + * flavor constants + */ +enum sptlrpc_policies { + SPTLRPC_POLICY_NULL = 0, + SPTLRPC_POLICY_PLAIN = 1, + SPTLRPC_POLICY_GSS = 2, + SPTLRPC_POLICY_MAX, +}; + +enum sptlrpc_subpolicy_null { + SPTLRPC_SUBPOLICY_NULL = 0, + SPTLRPC_SUBPOLICY_NULL_MAX, +}; + +enum sptlrpc_subpolicy_plain { + SPTLRPC_SUBPOLICY_PLAIN = 0, + SPTLRPC_SUBPOLICY_PLAIN_MAX, +}; + +enum sptlrpc_subpolicy_gss { + SPTLRPC_SUBPOLICY_GSS_NONE = 0, + SPTLRPC_SUBPOLICY_GSS_KRB5 = 1, + SPTLRPC_SUBPOLICY_GSS_MAX, +}; + +enum sptlrpc_service_type { + SPTLRPC_SVC_NONE = 0, /* no security */ + SPTLRPC_SVC_AUTH = 1, /* authentication */ + SPTLRPC_SVC_PRIV = 2, /* privacy */ + SPTLRPC_SVC_MAX, +}; + +/* + * flavor compose/extract + */ + +typedef __u32 ptlrpc_sec_flavor_t; + +/* + * 8b (reserved) | 8b (flags) | 6b (policy) | 6b (subpolicy) | 4b (svc) + */ +#define SEC_FLAVOR_FLAGS_OFFSET (16) +#define SEC_FLAVOR_POLICY_OFFSET (10) +#define SEC_FLAVOR_SUBPOLICY_OFFSET (4) +#define SEC_FLAVOR_SVC_OFFSET (0) + +#define SEC_MAKE_RPC_FLAVOR(policy, subpolicy, svc) \ + (((__u32)(policy) << SEC_FLAVOR_POLICY_OFFSET) | \ + ((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) | \ + ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET)) + +#define SEC_MAKE_RPC_SUBFLAVOR(subpolicy, svc) \ + (((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) | \ + ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET)) + +#define SEC_FLAVOR_POLICY(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_POLICY_OFFSET) & 0x3F) +#define SEC_FLAVOR_SUBPOLICY(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_SUBPOLICY_OFFSET) & 0x3F) +#define SEC_FLAVOR_SVC(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0xF) +#define SEC_FLAVOR_SUB(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0x3FF) + +#define SEC_FLAVOR_RPC(f) \ + (((__u32) f) & ((1 << SEC_FLAVOR_FLAGS_OFFSET) - 1)) + +/* + * general gss flavors + */ +#define SPTLRPC_FLVR_GSS_NONE \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_NONE, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_GSS_AUTH \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_NONE, \ + SPTLRPC_SVC_AUTH) +#define SPTLRPC_FLVR_GSS_PRIV \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_NONE, \ + SPTLRPC_SVC_PRIV) + +/* + * gss subflavors + */ +#define SPTLRPC_SUBFLVR_KRB5 \ + SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_SUBFLVR_KRB5I \ + SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_AUTH) +#define SPTLRPC_SUBFLVR_KRB5P \ + SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_PRIV) + +/* + * "end user" flavors + */ +#define SPTLRPC_FLVR_NULL \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_NULL, \ + SPTLRPC_SUBPOLICY_NULL, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_PLAIN \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_PLAIN, \ + SPTLRPC_SUBPOLICY_PLAIN, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_KRB5 \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_KRB5I \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_AUTH) +#define SPTLRPC_FLVR_KRB5P \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_PRIV) + +#define SPTLRPC_FLVR_INVALID (-1) + +#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL + +/* + * flavor flags (maximum 8 flags) + */ +#define SEC_FLAVOR_FL_BULK (1 << (0 + SEC_FLAVOR_FLAGS_OFFSET)) +#define SEC_FLAVOR_FL_USER (1 << (1 + SEC_FLAVOR_FLAGS_OFFSET)) + +#define SEC_FLAVOR_HAS_BULK(flavor) \ + (((flavor) & SEC_FLAVOR_FL_BULK) != 0) +#define SEC_FLAVOR_HAS_USER(flavor) \ + (((flavor) & SEC_FLAVOR_FL_USER) != 0) + + +struct sec_flavor_config { + __u32 sfc_rpc_flavor; /* main rpc flavor */ + __u32 sfc_bulk_priv; /* bulk encryption algorithm */ + __u32 sfc_bulk_csum; /* bulk checksum algorithm */ + __u32 sfc_flags; /* extra flags */ +}; + +enum lustre_part { + LUSTRE_CLI = 0, + LUSTRE_MDT, + LUSTRE_OST, + LUSTRE_MGC, + LUSTRE_MGS, +}; + +/* The maximum length of security payload. 1024 is enough for Kerberos 5, + * and should be enough for other future mechanisms but not sure. + * Only used by pre-allocated request/reply pool. + */ +#define SPTLRPC_MAX_PAYLOAD (1024) + + +struct vfs_cred { + uint32_t vc_uid; + uint32_t vc_gid; +}; + +struct ptlrpc_ctx_ops { + int (*match) (struct ptlrpc_cli_ctx *ctx, + struct vfs_cred *vcred); + int (*refresh) (struct ptlrpc_cli_ctx *ctx); + int (*display) (struct ptlrpc_cli_ctx *ctx, + char *buf, int bufsize); + /* + * rpc data transform + */ + int (*sign) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + int (*verify) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + int (*seal) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + int (*unseal) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + /* + * bulk transform + */ + int (*wrap_bulk) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); + int (*unwrap_bulk) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +}; + +#define PTLRPC_CTX_UPTODATE_BIT (0) /* uptodate */ +#define PTLRPC_CTX_DEAD_BIT (1) /* mark expired gracefully */ +#define PTLRPC_CTX_ERROR_BIT (2) /* fatal error (refresh, etc.) */ +#define PTLRPC_CTX_HASHED_BIT (8) /* in hash table */ +#define PTLRPC_CTX_ETERNAL_BIT (9) /* always valid */ + +#define PTLRPC_CTX_UPTODATE (1 << PTLRPC_CTX_UPTODATE_BIT) +#define PTLRPC_CTX_DEAD (1 << PTLRPC_CTX_DEAD_BIT) +#define PTLRPC_CTX_ERROR (1 << PTLRPC_CTX_ERROR_BIT) +#define PTLRPC_CTX_HASHED (1 << PTLRPC_CTX_HASHED_BIT) +#define PTLRPC_CTX_ETERNAL (1 << PTLRPC_CTX_ETERNAL_BIT) + +#define PTLRPC_CTX_STATUS_MASK (PTLRPC_CTX_UPTODATE | \ + PTLRPC_CTX_DEAD | \ + PTLRPC_CTX_ERROR) + +struct ptlrpc_cli_ctx { + struct hlist_node cc_hash; /* linked into hash table */ + atomic_t cc_refcount; + struct ptlrpc_sec *cc_sec; + struct ptlrpc_ctx_ops *cc_ops; + cfs_time_t cc_expire; /* in seconds */ + unsigned long cc_flags; + struct vfs_cred cc_vcred; + spinlock_t cc_lock; + struct list_head cc_req_list; /* waiting reqs linked here */ +}; + +struct ptlrpc_sec_cops { + /* + * ptlrpc_sec constructor/destructor + */ + struct ptlrpc_sec * (*create_sec) (struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags); + void (*destroy_sec) (struct ptlrpc_sec *sec); + /* + * search ctx for a certain user, if this function is missing, + * a generic function will be invoked by caller. implement this + * for any special need. + */ + struct ptlrpc_cli_ctx * (*lookup_ctx) (struct ptlrpc_sec *sec, + struct vfs_cred *vcred); + /* + * ptlrpc_cli_ctx constructor/destructor + */ + struct ptlrpc_cli_ctx * (*create_ctx) (struct ptlrpc_sec *sec, + struct vfs_cred *vcred); + void (*destroy_ctx) (struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx); + /* reverse service */ + int (*install_rctx)(struct obd_import *imp, + struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx); + /* + * request/reply buffer manipulation + */ + int (*alloc_reqbuf)(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int lustre_msg_size); + void (*free_reqbuf) (struct ptlrpc_sec *sec, + struct ptlrpc_request *req); + int (*alloc_repbuf)(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int lustre_msg_size); + void (*free_repbuf) (struct ptlrpc_sec *sec, + struct ptlrpc_request *req); + int (*enlarge_reqbuf) + (struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize); +}; + +struct ptlrpc_sec_sops { + int (*accept) (struct ptlrpc_request *req); + int (*authorize) (struct ptlrpc_request *req); + void (*invalidate_ctx) + (struct ptlrpc_svc_ctx *ctx); + /* buffer manipulation */ + int (*alloc_rs) (struct ptlrpc_request *req, + int msgsize); + void (*free_rs) (struct ptlrpc_reply_state *rs); + void (*free_ctx) (struct ptlrpc_svc_ctx *ctx); + /* reverse credential */ + int (*install_rctx)(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx); + /* bulk transform */ + int (*unwrap_bulk) (struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); + int (*wrap_bulk) (struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +}; + +struct ptlrpc_sec_policy { + struct module *sp_owner; + char *sp_name; + __u32 sp_policy; /* policy number */ + struct ptlrpc_sec_cops *sp_cops; /* client ops */ + struct ptlrpc_sec_sops *sp_sops; /* server ops */ +}; + +#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */ +#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */ +#define PTLRPC_SEC_FL_BULK 0x0004 /* intensive bulk i/o expected */ + +struct ptlrpc_sec { + struct ptlrpc_sec_policy *ps_policy; + atomic_t ps_refcount; + __u32 ps_flavor; /* rpc flavor */ + unsigned long ps_flags; /* PTLRPC_SEC_FL_XX */ + struct obd_import *ps_import; /* owning import */ + spinlock_t ps_lock; /* protect ccache */ + int ps_ccache_size; /* must be 2^n */ + struct hlist_head *ps_ccache; /* ctx cache hash */ + atomic_t ps_busy; /* busy count */ + cfs_time_t ps_gc_interval; /* in seconds */ + cfs_time_t ps_gc_next; /* in seconds */ +}; + +struct ptlrpc_svc_ctx { + atomic_t sc_refcount; + struct ptlrpc_sec_policy *sc_policy; +}; + +/* + * user identity descriptor + */ +#define LUSTRE_MAX_GROUPS (128) + +struct ptlrpc_user_desc { + __u32 pud_uid; + __u32 pud_gid; + __u32 pud_fsuid; + __u32 pud_fsgid; + __u32 pud_cap; + __u32 pud_ngroups; + __u32 pud_groups[0]; +}; + +/* + * bulk flavors + */ +enum bulk_checksum_alg { + BULK_CSUM_ALG_NULL = 0, + BULK_CSUM_ALG_CRC32, + BULK_CSUM_ALG_MD5, + BULK_CSUM_ALG_SHA1, + BULK_CSUM_ALG_SHA256, + BULK_CSUM_ALG_SHA384, + BULK_CSUM_ALG_SHA512, + BULK_CSUM_ALG_MAX +}; + +enum bulk_encrypt_alg { + BULK_PRIV_ALG_NULL = 0, + BULK_PRIV_ALG_ARC4, + BULK_PRIV_ALG_MAX +}; + +struct ptlrpc_bulk_sec_desc { + __u32 bsd_version; + __u32 bsd_pad; + __u32 bsd_csum_alg; /* checksum algorithm */ + __u32 bsd_priv_alg; /* encrypt algorithm */ + __u8 bsd_iv[16]; /* encrypt iv */ + __u8 bsd_csum[0]; +}; + +const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg); +const char * sptlrpc_bulk_priv_alg2name(__u32 priv_alg); + +/* + * lprocfs + */ +struct proc_dir_entry; +extern struct proc_dir_entry *sptlrpc_proc_root; + +/* + * round size up to next power of 2, for slab allocation. + * @size must be sane (can't overflow after round up) + */ +static inline int size_roundup_power2(int size) +{ + size--; + size |= size >> 1; + size |= size >> 2; + size |= size >> 4; + size |= size >> 8; + size |= size >> 16; + size++; + return size; +} + +/* + * internal support libraries + */ +void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, + int segment, int newsize); + +/* + * security type + */ +int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy); +int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy); + +__u32 sptlrpc_name2flavor(const char *name); +char *sptlrpc_flavor2name(__u32 flavor); + +static inline +struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy) +{ + __module_get(policy->sp_owner); + return policy; +} + +static inline +void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy) +{ + module_put(policy->sp_owner); +} + +/* + * client credential + */ +struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx); +void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync); +void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx); +void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new); +void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx); +int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize); + +/* + * client wrap/buffers + */ +int sptlrpc_cli_wrap_request(struct ptlrpc_request *req); +int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req); +int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize); +void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req); +int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize); +void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req); +int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, + int segment, int newsize); +void sptlrpc_request_out_callback(struct ptlrpc_request *req); + +/* + * higher interface of import & request + */ +int sptlrpc_import_get_sec(struct obd_import *imp, struct ptlrpc_svc_ctx *svc_ctx, + __u32 flavor, unsigned long flags); +void sptlrpc_import_put_sec(struct obd_import *imp); +int sptlrpc_import_check_ctx(struct obd_import *imp); +void sptlrpc_import_flush_root_ctx(struct obd_import *imp); +void sptlrpc_import_flush_my_ctx(struct obd_import *imp); +void sptlrpc_import_flush_all_ctx(struct obd_import *imp); +int sptlrpc_req_get_ctx(struct ptlrpc_request *req); +void sptlrpc_req_put_ctx(struct ptlrpc_request *req); +int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout); +int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req); +void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode); + +int sptlrpc_parse_flavor(enum lustre_part from, enum lustre_part to, + char *str, struct sec_flavor_config *conf); +/* misc */ +const char * sec2target_str(struct ptlrpc_sec *sec); +int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count, + int *eof, void *data); + +/* + * server side + */ +enum secsvc_accept_res { + SECSVC_OK = 0, + SECSVC_COMPLETE, + SECSVC_DROP, +}; + +int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req); +int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen); +int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req); +void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs); +void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req); +void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req); +void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req); + +/* + * reverse context + */ +int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx); +int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_cli_ctx *ctx); + +/* bulk security api */ +int sptlrpc_enc_pool_add_user(void); +int sptlrpc_enc_pool_del_user(void); +int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc); +void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc); + +int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, + int nob, obd_count pg_count, + struct brw_page **pga); +int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); + +/* user descriptor helpers */ +static inline int sptlrpc_user_desc_size(int ngroups) +{ + return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32); +} + +int sptlrpc_current_user_desc_size(void); +int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset); +int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset); + +/* bulk helpers (internal use only by policies) */ +int bulk_sec_desc_size(__u32 csum_alg, int request, int read); +int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset); + +int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read, + __u32 alg, struct lustre_msg *rmsg, int roff); +int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *rmsg, int roff, + struct lustre_msg *vmsg, int voff); +int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *vmsg, int voff, + struct lustre_msg *rmsg, int roff); + + +#endif /* _LUSTRE_SEC_H_ */ diff --git a/lustre/include/lustre_ucache.h b/lustre/include/lustre_ucache.h index 16b5c1a..0cba634 100644 --- a/lustre/include/lustre_ucache.h +++ b/lustre/include/lustre_ucache.h @@ -27,22 +27,74 @@ #define UC_CACHE_CLEAR_INVALID(i) (i)->ue_flags &= ~UC_CACHE_INVALID #define UC_CACHE_CLEAR_EXPIRED(i) (i)->ue_flags &= ~UC_CACHE_EXPIRED +struct upcall_cache_entry; + +struct mdt_setxid_perm { + lnet_nid_t mp_nid; + __u32 mp_perm; +}; + +struct mdt_identity { + struct upcall_cache_entry *mi_uc_entry; + uid_t mi_uid; + gid_t mi_gid; + struct group_info *mi_ginfo; + int mi_nperms; + struct mdt_setxid_perm *mi_perms; +}; + +struct rmtacl_upcall_data { + uid_t aud_uid; + gid_t aud_gid; + char *aud_cmd; +}; + +struct mdt_rmtacl { + uid_t ra_uid; + gid_t ra_gid; + __u32 ra_handle; + char *ra_cmd; + char *ra_buf; +}; + struct upcall_cache_entry { struct list_head ue_hash; __u64 ue_key; - __u64 ue_primary; - struct group_info *ue_group_info; +// __u64 ue_primary; +// struct group_info *ue_group_info; atomic_t ue_refcount; int ue_flags; cfs_waitq_t ue_waitq; cfs_time_t ue_acquire_expire; cfs_time_t ue_expire; + union { + struct mdt_identity identity; + struct mdt_rmtacl acl; + } u; }; #define UC_CACHE_HASH_SIZE (128) #define UC_CACHE_HASH_INDEX(id) ((id) & (UC_CACHE_HASH_SIZE - 1)) #define UC_CACHE_UPCALL_MAXPATH (1024UL) +struct upcall_cache; + +struct upcall_cache_ops { + void (*init_entry)(struct upcall_cache_entry *, void *args); + void (*free_entry)(struct upcall_cache *, + struct upcall_cache_entry *); + int (*upcall_compare)(struct upcall_cache *, + struct upcall_cache_entry *, + __u64 key, void *args); + int (*downcall_compare)(struct upcall_cache *, + struct upcall_cache_entry *, + __u64 key, void *args); + int (*do_upcall)(struct upcall_cache *, + struct upcall_cache_entry *); + int (*parse_downcall)(struct upcall_cache *, + struct upcall_cache_entry *, void *); +}; + struct upcall_cache { struct list_head uc_hashtable[UC_CACHE_HASH_SIZE]; spinlock_t uc_lock; @@ -51,8 +103,23 @@ struct upcall_cache { char uc_upcall[UC_CACHE_UPCALL_MAXPATH]; cfs_time_t uc_acquire_expire; /* jiffies */ cfs_time_t uc_entry_expire; /* jiffies */ + struct upcall_cache_ops *uc_ops; }; +struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache, + __u64 key, void *args); +void upcall_cache_put_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry); +int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, + void *args); +void upcall_cache_flush_idle(struct upcall_cache *cache); +void upcall_cache_flush_all(struct upcall_cache *cache); +void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args); +struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, + struct upcall_cache_ops *ops); +void upcall_cache_cleanup(struct upcall_cache *cache); + +#if 0 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, __u64 key, __u32 primary, __u32 ngroups, __u32 *groups); @@ -65,4 +132,5 @@ void upcall_cache_flush_all(struct upcall_cache *cache); struct upcall_cache *upcall_cache_init(const char *name); void upcall_cache_cleanup(struct upcall_cache *hash); +#endif #endif /* _UPCALL_CACHE_H */ diff --git a/lustre/include/lustre_ver.h.in b/lustre/include/lustre_ver.h.in index 1c635109..583447e 100644 --- a/lustre/include/lustre_ver.h.in +++ b/lustre/include/lustre_ver.h.in @@ -28,5 +28,4 @@ #define LUSTRE_VERSION_OFFSET_WARN @AC_LUSTRE_LIB_VER_OFFSET_WARN@ #endif - #endif diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h new file mode 100644 index 0000000..c4ebbb0 --- /dev/null +++ b/lustre/include/md_object.h @@ -0,0 +1,658 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Extention of lu_object.h for metadata objects + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _LUSTRE_MD_OBJECT_H +#define _LUSTRE_MD_OBJECT_H + +/* + * Sub-class of lu_object with methods common for "meta-data" objects in MDT + * stack. + * + * Meta-data objects implement namespace operations: you can link, unlink + * them, and treat them as directories. + * + * Examples: mdt, cmm, and mdt are implementations of md interface. + */ + + +/* + * super-class definitions. + */ +#include +#include + +struct md_device; +struct md_device_operations; +struct md_object; + + +typedef enum { + UCRED_INVALID = -1, + UCRED_INIT = 0, + UCRED_OLD = 1, + UCRED_NEW = 2, +} ucred_t; + +#define SQUASH_NONE 0x00 +#define SQUASH_UID 0x01 +#define SQUASH_GID 0x02 + +struct md_ucred { + ucred_t mu_valid; + __u32 mu_squash; + __u32 mu_o_uid; + __u32 mu_o_gid; + __u32 mu_o_fsuid; + __u32 mu_o_fsgid; + __u32 mu_uid; + __u32 mu_gid; + __u32 mu_fsuid; + __u32 mu_fsgid; + __u32 mu_suppgids[2]; + __u32 mu_cap; + __u32 mu_umask; + struct group_info *mu_ginfo; + struct mdt_identity *mu_identity; +}; + +#define MD_CAPAINFO_MAX 5 + +/* there are at most 5 fids in one operation, see rename, NOTE the last one + * is a temporary one used for is_subdir() */ +struct md_capainfo { + const struct lu_fid *mc_fid[MD_CAPAINFO_MAX]; + struct lustre_capa *mc_capa[MD_CAPAINFO_MAX]; +}; + +/* + * Implemented in mdd/mdd_handler.c. + * + * XXX should be moved into separate .h/.c together with all md security + * related definitions. + */ +struct md_ucred *md_ucred(const struct lu_env *env); +struct md_capainfo *md_capainfo(const struct lu_env *env); + +/* metadata attributes */ +enum ma_valid { + MA_INODE = (1 << 0), + MA_LOV = (1 << 1), + MA_COOKIE = (1 << 2), + MA_FLAGS = (1 << 3), + MA_LMV = (1 << 4), + MA_ACL_DEF = (1 << 5) +}; + +typedef enum { + MDL_MINMODE = 0, + MDL_EX = 1, + MDL_PW = 2, + MDL_PR = 4, + MDL_CW = 8, + MDL_CR = 16, + MDL_NL = 32, + MDL_GROUP = 64, + MDL_MAXMODE +} mdl_mode_t; + +typedef enum { + MDT_NUL_LOCK = 0, + MDT_REG_LOCK = (1 << 0), + MDT_PDO_LOCK = (1 << 1) +} mdl_type_t; + +struct md_attr { + __u64 ma_valid; + __u64 ma_need; + __u64 ma_attr_flags; + struct lu_attr ma_attr; + struct lov_mds_md *ma_lmm; + int ma_lmm_size; + struct lmv_stripe_md *ma_lmv; + int ma_lmv_size; + void *ma_acl; + int ma_acl_size; + struct llog_cookie *ma_cookie; + int ma_cookie_size; + struct lustre_capa *ma_capa; +}; + +/* Additional parameters for create */ +struct md_op_spec { + union { + /* symlink target */ + const char *sp_symname; + /* parent FID for cross-ref mkdir */ + const struct lu_fid *sp_pfid; + /* eadata for regular files */ + struct md_spec_reg { + /* lov objs exist already */ + const struct lu_fid *fid; + int no_lov_create; + const void *eadata; + int eadatalen; + } sp_ea; + } u; + + /* Create flag from client: such as MDS_OPEN_CREAT, and others. */ + __u32 sp_cr_flags; + + /* Should mdd do lookup sanity check or not. */ + int sp_cr_lookup; + + /* Current lock mode for parent dir where create is performing. */ + mdl_mode_t sp_cr_mode; + + /* Check for split */ + int sp_ck_split; +}; + +/* + * Operations implemented for each md object (both directory and leaf). + */ +struct md_object_operations { + int (*moo_permission)(const struct lu_env *env, + struct md_object *pobj, struct md_object *cobj, + struct md_attr *attr, int mask); + + int (*moo_attr_get)(const struct lu_env *env, struct md_object *obj, + struct md_attr *attr); + + int (*moo_attr_set)(const struct lu_env *env, struct md_object *obj, + const struct md_attr *attr); + + int (*moo_xattr_get)(const struct lu_env *env, struct md_object *obj, + struct lu_buf *buf, const char *name); + + int (*moo_xattr_list)(const struct lu_env *env, struct md_object *obj, + struct lu_buf *buf); + + int (*moo_xattr_set)(const struct lu_env *env, struct md_object *obj, + const struct lu_buf *buf, const char *name, + int fl); + + int (*moo_xattr_del)(const struct lu_env *env, struct md_object *obj, + const char *name); + + int (*moo_readpage)(const struct lu_env *env, struct md_object *obj, + const struct lu_rdpg *rdpg); + + int (*moo_readlink)(const struct lu_env *env, struct md_object *obj, + struct lu_buf *buf); + + /* part of cross-ref operation */ + int (*moo_object_create)(const struct lu_env *env, + struct md_object *obj, + const struct md_op_spec *spec, + struct md_attr *ma); + + int (*moo_ref_add)(const struct lu_env *env, + struct md_object *obj, + const struct md_attr *ma); + + int (*moo_ref_del)(const struct lu_env *env, + struct md_object *obj, + struct md_attr *ma); + + int (*moo_open)(const struct lu_env *env, + struct md_object *obj, int flag); + + int (*moo_close)(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma); + + int (*moo_capa_get)(const struct lu_env *, struct md_object *, + struct lustre_capa *, int renewal); +}; + +/* + * Operations implemented for each directory object. + */ +struct md_dir_operations { + int (*mdo_is_subdir) (const struct lu_env *env, struct md_object *obj, + const struct lu_fid *fid, struct lu_fid *sfid); + + int (*mdo_lookup)(const struct lu_env *env, struct md_object *obj, + const struct lu_name *lname, struct lu_fid *fid, + struct md_op_spec *spec); + + mdl_mode_t (*mdo_lock_mode)(const struct lu_env *env, + struct md_object *obj, + mdl_mode_t mode); + + int (*mdo_create)(const struct lu_env *env, struct md_object *pobj, + const struct lu_name *lname, struct md_object *child, + struct md_op_spec *spec, + struct md_attr *ma); + + /* This method is used for creating data object for this meta object*/ + int (*mdo_create_data)(const struct lu_env *env, struct md_object *p, + struct md_object *o, + const struct md_op_spec *spec, + struct md_attr *ma); + + int (*mdo_rename)(const struct lu_env *env, struct md_object *spobj, + struct md_object *tpobj, const struct lu_fid *lf, + const struct lu_name *lsname, struct md_object *tobj, + const struct lu_name *ltname, struct md_attr *ma); + + int (*mdo_link)(const struct lu_env *env, struct md_object *tgt_obj, + struct md_object *src_obj, const struct lu_name *lname, + struct md_attr *ma); + + int (*mdo_unlink)(const struct lu_env *env, struct md_object *pobj, + struct md_object *cobj, const struct lu_name *lname, + struct md_attr *ma); + + /* partial ops for cross-ref case */ + int (*mdo_name_insert)(const struct lu_env *env, + struct md_object *obj, + const struct lu_name *lname, + const struct lu_fid *fid, + const struct md_attr *ma); + + int (*mdo_name_remove)(const struct lu_env *env, + struct md_object *obj, + const struct lu_name *lname, + const struct md_attr *ma); + + int (*mdo_rename_tgt)(const struct lu_env *env, struct md_object *pobj, + struct md_object *tobj, const struct lu_fid *fid, + const struct lu_name *lname, struct md_attr *ma); +}; + +struct md_device_operations { + /* meta-data device related handlers. */ + int (*mdo_root_get)(const struct lu_env *env, struct md_device *m, + struct lu_fid *f); + + int (*mdo_maxsize_get)(const struct lu_env *env, struct md_device *m, + int *md_size, int *cookie_size); + + int (*mdo_statfs)(const struct lu_env *env, struct md_device *m, + struct kstatfs *sfs); + + int (*mdo_init_capa_ctxt)(const struct lu_env *env, struct md_device *m, + int mode, unsigned long timeout, __u32 alg, + struct lustre_capa_key *keys); + + int (*mdo_update_capa_key)(const struct lu_env *env, + struct md_device *m, + struct lustre_capa_key *key); +}; + +enum md_upcall_event { + /*sync the md layer*/ + MD_LOV_SYNC = (1 << 0), + MD_NO_TRANS = (1 << 1), /* Just for split, no need trans, for replay */ +}; + +struct md_upcall { + /* this lock protects upcall using against its removal + * read lock is for usage the upcall, write - for init/fini */ + struct rw_semaphore mu_upcall_sem; + /* device to call, upper layer normally */ + struct md_device *mu_upcall_dev; + /* upcall function */ + int (*mu_upcall)(const struct lu_env *env, struct md_device *md, + enum md_upcall_event ev); +}; + +struct md_device { + struct lu_device md_lu_dev; + struct md_device_operations *md_ops; + struct md_upcall md_upcall; +}; + +static inline void md_upcall_init(struct md_device *m, void *upcl) +{ + init_rwsem(&m->md_upcall.mu_upcall_sem); + m->md_upcall.mu_upcall_dev = NULL; + m->md_upcall.mu_upcall = upcl; +} + +static inline void md_upcall_dev_set(struct md_device *m, struct md_device *up) +{ + down_write(&m->md_upcall.mu_upcall_sem); + m->md_upcall.mu_upcall_dev = up; + up_write(&m->md_upcall.mu_upcall_sem); +} + +static inline void md_upcall_fini(struct md_device *m) +{ + down_write(&m->md_upcall.mu_upcall_sem); + m->md_upcall.mu_upcall_dev = NULL; + m->md_upcall.mu_upcall = NULL; + up_write(&m->md_upcall.mu_upcall_sem); +} + +static inline int md_do_upcall(const struct lu_env *env, struct md_device *m, + enum md_upcall_event ev) +{ + int rc = 0; + down_read(&m->md_upcall.mu_upcall_sem); + if (m->md_upcall.mu_upcall_dev != NULL && + m->md_upcall.mu_upcall_dev->md_upcall.mu_upcall != NULL) { + rc = m->md_upcall.mu_upcall_dev->md_upcall.mu_upcall(env, + m->md_upcall.mu_upcall_dev, ev); + } + up_read(&m->md_upcall.mu_upcall_sem); + return rc; +} + +struct md_object { + struct lu_object mo_lu; + struct md_object_operations *mo_ops; + struct md_dir_operations *mo_dir_ops; +}; + +static inline int lu_device_is_md(const struct lu_device *d) +{ + return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD); +} + +static inline struct md_device *lu2md_dev(const struct lu_device *d) +{ + LASSERT(lu_device_is_md(d)); + return container_of0(d, struct md_device, md_lu_dev); +} + +static inline struct lu_device *md2lu_dev(struct md_device *d) +{ + return &d->md_lu_dev; +} + +static inline struct md_object *lu2md(const struct lu_object *o) +{ + LASSERT(lu_device_is_md(o->lo_dev)); + return container_of0(o, struct md_object, mo_lu); +} + +static inline struct md_object *md_object_next(const struct md_object *obj) +{ + return (obj ? lu2md(lu_object_next(&obj->mo_lu)) : NULL); +} + +static inline struct md_device *md_obj2dev(const struct md_object *o) +{ + LASSERT(lu_device_is_md(o->mo_lu.lo_dev)); + return container_of0(o->mo_lu.lo_dev, struct md_device, md_lu_dev); +} + +static inline int md_device_init(struct md_device *md, struct lu_device_type *t) +{ + return lu_device_init(&md->md_lu_dev, t); +} + +static inline void md_device_fini(struct md_device *md) +{ + lu_device_fini(&md->md_lu_dev); +} + +/* md operations */ +static inline int mo_permission(const struct lu_env *env, + struct md_object *p, + struct md_object *c, + struct md_attr *at, + int mask) +{ + LASSERT(c->mo_ops->moo_permission); + return c->mo_ops->moo_permission(env, p, c, at, mask); +} + +static inline int mo_attr_get(const struct lu_env *env, + struct md_object *m, + struct md_attr *at) +{ + LASSERT(m->mo_ops->moo_attr_get); + return m->mo_ops->moo_attr_get(env, m, at); +} + +static inline int mo_readlink(const struct lu_env *env, + struct md_object *m, + struct lu_buf *buf) +{ + LASSERT(m->mo_ops->moo_readlink); + return m->mo_ops->moo_readlink(env, m, buf); +} + +static inline int mo_attr_set(const struct lu_env *env, + struct md_object *m, + const struct md_attr *at) +{ + LASSERT(m->mo_ops->moo_attr_set); + return m->mo_ops->moo_attr_set(env, m, at); +} + +static inline int mo_xattr_get(const struct lu_env *env, + struct md_object *m, + struct lu_buf *buf, + const char *name) +{ + LASSERT(m->mo_ops->moo_xattr_get); + return m->mo_ops->moo_xattr_get(env, m, buf, name); +} + +static inline int mo_xattr_del(const struct lu_env *env, + struct md_object *m, + const char *name) +{ + LASSERT(m->mo_ops->moo_xattr_del); + return m->mo_ops->moo_xattr_del(env, m, name); +} + +static inline int mo_xattr_set(const struct lu_env *env, + struct md_object *m, + const struct lu_buf *buf, + const char *name, + int flags) +{ + LASSERT(m->mo_ops->moo_xattr_set); + return m->mo_ops->moo_xattr_set(env, m, buf, name, flags); +} + +static inline int mo_xattr_list(const struct lu_env *env, + struct md_object *m, + struct lu_buf *buf) +{ + LASSERT(m->mo_ops->moo_xattr_list); + return m->mo_ops->moo_xattr_list(env, m, buf); +} + +static inline int mo_open(const struct lu_env *env, + struct md_object *m, + int flags) +{ + LASSERT(m->mo_ops->moo_open); + return m->mo_ops->moo_open(env, m, flags); +} + +static inline int mo_close(const struct lu_env *env, + struct md_object *m, + struct md_attr *ma) +{ + LASSERT(m->mo_ops->moo_close); + return m->mo_ops->moo_close(env, m, ma); +} + +static inline int mo_readpage(const struct lu_env *env, + struct md_object *m, + const struct lu_rdpg *rdpg) +{ + LASSERT(m->mo_ops->moo_readpage); + return m->mo_ops->moo_readpage(env, m, rdpg); +} + +static inline int mo_object_create(const struct lu_env *env, + struct md_object *m, + const struct md_op_spec *spc, + struct md_attr *at) +{ + LASSERT(m->mo_ops->moo_object_create); + return m->mo_ops->moo_object_create(env, m, spc, at); +} + +static inline int mo_ref_add(const struct lu_env *env, + struct md_object *m, + const struct md_attr *ma) +{ + LASSERT(m->mo_ops->moo_ref_add); + return m->mo_ops->moo_ref_add(env, m, ma); +} + +static inline int mo_ref_del(const struct lu_env *env, + struct md_object *m, + struct md_attr *ma) +{ + LASSERT(m->mo_ops->moo_ref_del); + return m->mo_ops->moo_ref_del(env, m, ma); +} + +static inline int mo_capa_get(const struct lu_env *env, + struct md_object *m, + struct lustre_capa *c, + int renewal) +{ + LASSERT(m->mo_ops->moo_capa_get); + return m->mo_ops->moo_capa_get(env, m, c, renewal); +} + +static inline int mdo_lookup(const struct lu_env *env, + struct md_object *p, + const struct lu_name *lname, + struct lu_fid *f, + struct md_op_spec *spec) +{ + LASSERT(p->mo_dir_ops->mdo_lookup); + return p->mo_dir_ops->mdo_lookup(env, p, lname, f, spec); +} + +static inline mdl_mode_t mdo_lock_mode(const struct lu_env *env, + struct md_object *mo, + mdl_mode_t lm) +{ + if (mo->mo_dir_ops->mdo_lock_mode == NULL) + return MDL_MINMODE; + return mo->mo_dir_ops->mdo_lock_mode(env, mo, lm); +} + +static inline int mdo_create(const struct lu_env *env, + struct md_object *p, + const struct lu_name *lchild_name, + struct md_object *c, + struct md_op_spec *spc, + struct md_attr *at) +{ + LASSERT(c->mo_dir_ops->mdo_create); + return c->mo_dir_ops->mdo_create(env, p, lchild_name, c, spc, at); +} + +static inline int mdo_create_data(const struct lu_env *env, + struct md_object *p, + struct md_object *c, + const struct md_op_spec *spec, + struct md_attr *ma) +{ + LASSERT(c->mo_dir_ops->mdo_create_data); + return c->mo_dir_ops->mdo_create_data(env, p, c, spec, ma); +} + +static inline int mdo_rename(const struct lu_env *env, + struct md_object *sp, + struct md_object *tp, + const struct lu_fid *lf, + const struct lu_name *lsname, + struct md_object *t, + const struct lu_name *ltname, + struct md_attr *ma) +{ + LASSERT(tp->mo_dir_ops->mdo_rename); + return tp->mo_dir_ops->mdo_rename(env, sp, tp, lf, lsname, t, ltname, + ma); +} + +static inline int mdo_is_subdir(const struct lu_env *env, + struct md_object *mo, + const struct lu_fid *fid, + struct lu_fid *sfid) +{ + LASSERT(mo->mo_dir_ops->mdo_is_subdir); + return mo->mo_dir_ops->mdo_is_subdir(env, mo, fid, sfid); +} + +static inline int mdo_link(const struct lu_env *env, + struct md_object *p, + struct md_object *s, + const struct lu_name *lname, + struct md_attr *ma) +{ + LASSERT(s->mo_dir_ops->mdo_link); + return s->mo_dir_ops->mdo_link(env, p, s, lname, ma); +} + +static inline int mdo_unlink(const struct lu_env *env, + struct md_object *p, + struct md_object *c, + const struct lu_name *lname, + struct md_attr *ma) +{ + LASSERT(c->mo_dir_ops->mdo_unlink); + return c->mo_dir_ops->mdo_unlink(env, p, c, lname, ma); +} + +static inline int mdo_name_insert(const struct lu_env *env, + struct md_object *p, + const struct lu_name *lname, + const struct lu_fid *f, + const struct md_attr *ma) +{ + LASSERT(p->mo_dir_ops->mdo_name_insert); + return p->mo_dir_ops->mdo_name_insert(env, p, lname, f, ma); +} + +static inline int mdo_name_remove(const struct lu_env *env, + struct md_object *p, + const struct lu_name *lname, + const struct md_attr *ma) +{ + LASSERT(p->mo_dir_ops->mdo_name_remove); + return p->mo_dir_ops->mdo_name_remove(env, p, lname, ma); +} + +static inline int mdo_rename_tgt(const struct lu_env *env, + struct md_object *p, + struct md_object *t, + const struct lu_fid *lf, + const struct lu_name *lname, + struct md_attr *ma) +{ + if (t) { + LASSERT(t->mo_dir_ops->mdo_rename_tgt); + return t->mo_dir_ops->mdo_rename_tgt(env, p, t, lf, lname, ma); + } else { + LASSERT(p->mo_dir_ops->mdo_rename_tgt); + return p->mo_dir_ops->mdo_rename_tgt(env, p, t, lf, lname, ma); + } +} + +#endif /* _LINUX_MD_OBJECT_H */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 85073a3..1dd41e4 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -27,10 +27,13 @@ #define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */ #define IOC_MDC_MAX_NR 50 -#include #include +#include +#include #include #include +#include +#include #define MAX_OBD_DEVICES 8192 @@ -166,7 +169,10 @@ struct obd_info { * level. E.g. it is used for update lsm->lsm_oinfo at every recieved * request in osc level for enqueue requests. It is also possible to * update some caller data from LOV layer if needed. */ - obd_enqueue_update_f oi_cb_up; + obd_enqueue_update_f oi_cb_up; + /* oss capability, its type is obd_capa in client to avoid copy. + * in contrary its type is lustre_capa in OSS. */ + void *oi_capa; }; /* compare all relevant fields. */ @@ -185,10 +191,12 @@ void lov_stripe_unlock(struct lov_stripe_md *md); struct obd_type { struct list_head typ_chain; - struct obd_ops *typ_ops; + struct obd_ops *typ_dt_ops; + struct md_ops *typ_md_ops; cfs_proc_dir_entry_t *typ_procroot; char *typ_name; int typ_refcnt; + struct lu_device_type *typ_lu; spinlock_t obd_type_lock; }; @@ -219,6 +227,7 @@ struct obd_async_page_ops { void (*ap_update_obdo)(void *data, int cmd, struct obdo *oa, obd_valid valid); int (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); + struct obd_capa *(*ap_lookup_capa)(void *data, int cmd); }; /* the `oig' is passed down from a caller of obd rw methods. the callee @@ -255,22 +264,66 @@ struct obd_device_target { struct lustre_quota_ctxt obt_qctxt; }; +/* llog contexts */ +enum llog_ctxt_id { + LLOG_CONFIG_ORIG_CTXT = 0, + LLOG_CONFIG_REPL_CTXT = 1, + LLOG_MDS_OST_ORIG_CTXT = 2, + LLOG_MDS_OST_REPL_CTXT = 3, + LLOG_SIZE_ORIG_CTXT = 4, + LLOG_SIZE_REPL_CTXT = 5, + LLOG_MD_ORIG_CTXT = 6, + LLOG_MD_REPL_CTXT = 7, + LLOG_RD1_ORIG_CTXT = 8, + LLOG_RD1_REPL_CTXT = 9, + LLOG_TEST_ORIG_CTXT = 10, + LLOG_TEST_REPL_CTXT = 11, + LLOG_LOVEA_ORIG_CTXT = 12, + LLOG_LOVEA_REPL_CTXT = 13, + LLOG_MAX_CTXTS +}; + +#define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ + #define FILTER_GROUP_LLOG 1 #define FILTER_GROUP_ECHO 2 +#define FILTER_GROUP_MDS0 3 + +struct filter_subdirs { + cfs_dentry_t *dentry[FILTER_SUBDIR_COUNT]; +}; + struct filter_ext { __u64 fe_start; __u64 fe_end; }; +struct obd_llogs { + struct llog_ctxt *llog_ctxt[LLOG_MAX_CTXTS]; +}; + +struct filter_group_llog { + struct list_head list; + int group; + struct obd_llogs *llogs; + struct obd_export *exp; +}; + struct filter_obd { /* NB this field MUST be first */ struct obd_device_target fo_obt; const char *fo_fstype; struct vfsmount *fo_vfsmnt; + + int fo_group_count; cfs_dentry_t *fo_dentry_O; cfs_dentry_t **fo_dentry_O_groups; - cfs_dentry_t **fo_dentry_O_sub; + struct filter_subdirs *fo_dentry_O_sub; + struct semaphore fo_init_lock; /* group initialization lock */ + int fo_committed_group; + + spinlock_t fo_objidlock; /* protect fo_lastobjid */ spinlock_t fo_translock; /* protect fsd_last_transno */ struct file *fo_rcvd_filp; @@ -279,8 +332,8 @@ struct filter_obd { unsigned long *fo_last_rcvd_slots; __u64 fo_mount_count; - int fo_destroy_in_progress; - struct semaphore fo_create_lock; + unsigned long fo_destroys_in_progress; + struct semaphore fo_create_locks[FILTER_SUBDIR_COUNT]; struct list_head fo_export_list; int fo_subdir_count; @@ -318,6 +371,9 @@ struct filter_obd { struct filter_iobuf **fo_iobuf_pool; int fo_iobuf_count; + struct list_head fo_llog_list; + spinlock_t fo_llog_list_lock; + struct brw_stats fo_filter_stats; struct lustre_quota_ctxt fo_quota_ctxt; spinlock_t fo_quotacheck_lock; @@ -325,6 +381,11 @@ struct filter_obd { int fo_fmd_max_num; /* per exp filter_mod_data */ int fo_fmd_max_age; /* jiffies to fmd expiry */ + + /* capability related */ + unsigned int fo_fl_oss_capa; + struct list_head fo_capa_keys; + struct hlist_head *fo_capa_hash; }; #define OSC_MAX_RIF_DEFAULT 8 @@ -348,6 +409,9 @@ struct client_obd { int cl_max_mds_easize; int cl_max_mds_cookiesize; + /* security configuration */ + struct sec_flavor_config cl_sec_conf; + //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */ void *cl_llcd_offset; @@ -414,6 +478,9 @@ struct client_obd { /* used by quotacheck */ int cl_qchk_stat; /* quotacheck stat of the peer */ + + /* sequence manager */ + struct lu_client_seq *cl_seq; }; #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) @@ -430,6 +497,15 @@ struct mgs_obd { cfs_proc_dir_entry_t *mgs_proc_live; }; +/* hah, upper limit 64 should be enough */ +#define N_NOSQUASH_NIDS 64 +struct rootsquash_info { + uid_t rsi_uid; + gid_t rsi_gid; + int rsi_n_nosquash_nids; + lnet_nid_t rsi_nosquash_nids[N_NOSQUASH_NIDS]; +}; + struct mds_obd { /* NB this field MUST be first */ struct obd_device_target mds_obt; @@ -459,6 +535,7 @@ struct mds_obd { char *mds_profile; struct obd_export *mds_osc_exp; /* XXX lov_exp */ struct lov_desc mds_lov_desc; + __u32 mds_id; obd_id *mds_lov_objids; int mds_lov_objids_size; __u32 mds_lov_objids_in_file; @@ -467,7 +544,7 @@ struct mds_obd { struct file *mds_lov_objid_filp; struct file *mds_health_check_filp; unsigned long *mds_client_bitmap; - struct upcall_cache *mds_group_hash; +// struct upcall_cache *mds_group_hash; struct lustre_quota_info mds_quota_info; struct semaphore mds_qonoff_sem; @@ -475,6 +552,16 @@ struct mds_obd { unsigned long mds_lov_objids_valid:1, mds_fl_user_xattr:1, mds_fl_acl:1; + + + struct upcall_cache *mds_identity_cache; + struct upcall_cache *mds_rmtacl_cache; + + /* root squash */ + struct rootsquash_info *mds_rootsquash_info; + + /* for capability keys update */ + struct lustre_capa_key *mds_capa_keys; }; struct echo_obd { @@ -559,6 +646,38 @@ struct lov_obd { int lov_connects; }; +struct lmv_tgt_desc { + struct obd_uuid ltd_uuid; + struct obd_export *ltd_exp; + int ltd_active; /* is this target up for requests */ + int ltd_idx; + struct semaphore ltd_fid_sem; +}; + +struct lmv_obd { + int refcount; + struct lu_client_fld lmv_fld; + spinlock_t lmv_lock; + struct lmv_desc desc; + struct obd_uuid cluuid; + struct obd_export *exp; + + int connected; + int max_easize; + int max_def_easize; + int max_cookiesize; + int server_timeout; + struct semaphore init_sem; + + struct lmv_tgt_desc *tgts; + int tgts_size; + + struct obd_connect_data *datas; + int datas_size; + + struct obd_connect_data conn_data; +}; + struct niobuf_local { __u64 offset; __u32 len; @@ -569,13 +688,22 @@ struct niobuf_local { int rc; }; +#define LUSTRE_FLD_NAME "fld" +#define LUSTRE_SEQ_NAME "seq" + +#define LUSTRE_CMM_NAME "cmm" +#define LUSTRE_MDD_NAME "mdd" +#define LUSTRE_OSD_NAME "osd" +#define LUSTRE_LMV_NAME "lmv" +#define LUSTRE_CMM_MDC_NAME "cmm-mdc" + /* obd device type names */ /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */ #define LUSTRE_MDS_NAME "mds" #define LUSTRE_MDT_NAME "mdt" #define LUSTRE_MDC_NAME "mdc" -#define LUSTRE_OSS_NAME "ost" /*FIXME change name to oss*/ -#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost*/ +#define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */ +#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */ #define LUSTRE_OSC_NAME "osc" #define LUSTRE_LOV_NAME "lov" #define LUSTRE_MGS_NAME "mgs" @@ -659,25 +787,6 @@ static inline void oti_free_cookies(struct obd_trans_info *oti) oti->oti_numcookies = 0; } -/* llog contexts */ -enum llog_ctxt_id { - LLOG_CONFIG_ORIG_CTXT = 0, - LLOG_CONFIG_REPL_CTXT = 1, - LLOG_MDS_OST_ORIG_CTXT = 2, - LLOG_MDS_OST_REPL_CTXT = 3, - LLOG_SIZE_ORIG_CTXT = 4, - LLOG_SIZE_REPL_CTXT = 5, - LLOG_MD_ORIG_CTXT = 6, - LLOG_MD_REPL_CTXT = 7, - LLOG_RD1_ORIG_CTXT = 8, - LLOG_RD1_REPL_CTXT = 9, - LLOG_TEST_ORIG_CTXT = 10, - LLOG_TEST_REPL_CTXT = 11, - LLOG_LOVEA_ORIG_CTXT = 12, - LLOG_LOVEA_REPL_CTXT = 13, - LLOG_MAX_CTXTS -}; - /* * Events signalled through obd_notify() upcall-chain. */ @@ -686,6 +795,8 @@ enum obd_notify_event { OBD_NOTIFY_ACTIVE, /* Device deactivated */ OBD_NOTIFY_INACTIVE, + /* Device disconnected */ + OBD_NOTIFY_DISCON, /* Connect data for import were changed */ OBD_NOTIFY_OCD, /* Sync request */ @@ -706,6 +817,13 @@ struct obd_notify_upcall { void *onu_owner; }; +struct target_recovery_data { + svc_handler_t trd_recovery_handler; + pid_t trd_processing_task; + struct completion trd_starting; + struct completion trd_finishing; +}; + /* corresponds to one of the obd's */ #define MAX_OBD_NAME 128 #define OBD_DEVICE_MAGIC 0XAB5CD6EF @@ -717,14 +835,17 @@ struct obd_device { char obd_name[MAX_OBD_NAME]; struct obd_uuid obd_uuid; + struct lu_device *obd_lu_dev; + int obd_minor; unsigned int obd_attached:1, /* finished attach */ obd_set_up:1, /* finished setup */ obd_recovering:1, /* there are recoverable clients */ - obd_abort_recovery:1,/* somebody ioctl'ed us to abort */ + obd_abort_recovery:1,/* somebody ioctl'ed us to abort */ obd_replayable:1, /* recovery is enabled; inform clients */ obd_no_transno:1, /* no committed-transno notification */ obd_no_recov:1, /* fail instead of retry messages */ + obd_req_replaying:1, /* replaying requests */ obd_stopping:1, /* started cleanup */ obd_starting:1, /* started setup */ obd_force:1, /* cleanup with > 0 obd refcount */ @@ -744,7 +865,7 @@ struct obd_device { struct fsfilt_operations *obd_fsops; spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ - __u64 obd_osfs_age; + __u64 obd_osfs_age; struct lvfs_run_ctxt obd_lvfs_ctxt; struct llog_ctxt *obd_llog_ctxt[LLOG_MAX_CTXTS]; struct obd_device *obd_observer; @@ -756,11 +877,12 @@ struct obd_device { /* XXX encapsulate all this recovery data into one struct */ svc_handler_t obd_recovery_handler; + pid_t obd_processing_task; + int obd_max_recoverable_clients; int obd_connected_clients; int obd_recoverable_clients; spinlock_t obd_processing_task_lock; /* BH lock (timer) */ - pid_t obd_processing_task; __u64 obd_next_recovery_transno; int obd_replayed_requests; int obd_requests_queued_for_recovery; @@ -768,10 +890,18 @@ struct obd_device { struct list_head obd_uncommitted_replies; spinlock_t obd_uncommitted_replies_lock; cfs_timer_t obd_recovery_timer; - struct list_head obd_recovery_queue; - struct list_head obd_delayed_reply_queue; time_t obd_recovery_start; time_t obd_recovery_end; + + /* new recovery stuff from CMD2 */ + struct target_recovery_data obd_recovery_data; + int obd_replayed_locks; + atomic_t obd_req_replay_clients; + atomic_t obd_lock_replay_clients; + struct list_head obd_req_replay_queue; + struct list_head obd_lock_replay_queue; + struct list_head obd_final_req_queue; + int obd_recovery_stage; union { struct obd_device_target obt; @@ -782,15 +912,20 @@ struct obd_device { struct echo_client_obd echo_client; struct echo_obd echo; struct lov_obd lov; + struct lmv_obd lmv; struct mgs_obd mgs; } u; /* Fields used by LProcFS */ + unsigned int obd_cntr_base; + struct lprocfs_stats *obd_stats; + + unsigned int md_cntr_base; + struct lprocfs_stats *md_stats; + cfs_proc_dir_entry_t *obd_proc_entry; cfs_proc_dir_entry_t *obd_proc_exports; cfs_proc_dir_entry_t *obd_svc_procroot; - struct lprocfs_stats *obd_stats; struct lprocfs_stats *obd_svc_stats; - unsigned int obd_cntr_base; struct semaphore obd_proc_exp_sem; }; @@ -814,11 +949,59 @@ enum obd_cleanup_stage { }; /* get/set_info keys */ -#define KEY_MDS_CONN "mds_conn" -#define KEY_NEXT_ID "next_id" -#define KEY_LOVDESC "lovdesc" -#define KEY_INIT_RECOV "initial_recov" -#define KEY_INIT_RECOV_BACKUP "init_recov_bk" +#define KEY_READ_ONLY "read-only" +#define KEY_MDS_CONN "mds_conn" +#define KEY_NEXT_ID "next_id" +#define KEY_LOVDESC "lovdesc" +#define KEY_INIT_RECOV "initial_recov" +#define KEY_INIT_RECOV_BACKUP "init_recov_bk" +#define KEY_FLUSH_CTX "flush_ctx" +#define KEY_CAPA_KEY "capa_key" +#define KEY_CONN_DATA "conn_data" +#define KEY_MAX_EASIZE "max_easize" +#define KEY_REVIMP_UPD "revimp_update" + +struct lu_context; + +struct md_op_data { + struct lu_fid op_fid1; /* operation fid1 (usualy parent) */ + struct lu_fid op_fid2; /* operation fid2 (usualy child) */ + mdsno_t op_mds; /* what mds server open will go to */ + struct lustre_handle op_handle; + __u64 op_mod_time; + const char *op_name; + int op_namelen; + __u32 op_mode; + struct lmv_stripe_md *op_mea1; + struct lmv_stripe_md *op_mea2; + __u32 op_suppgids[2]; + __u32 op_fsuid; + __u32 op_fsgid; + __u32 op_cap; + + /* iattr fields and blocks. */ + struct iattr op_attr; +#ifdef __KERNEL__ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) + unsigned int op_attr_flags; +#endif +#endif + loff_t op_attr_blocks; + + /* Size-on-MDS epoch and flags. */ + __u64 op_ioepoch; + __u32 op_flags; + + /* Capa fields */ + struct obd_capa *op_capa1; + struct obd_capa *op_capa2; + + /* Various operation flags. */ + __u32 op_bias; + + /* Operation type */ + __u32 op_opc; +}; struct obd_ops { struct module *o_owner; @@ -831,7 +1014,7 @@ struct obd_ops { struct ptlrpc_request_set *set); int (*o_attach)(struct obd_device *dev, obd_count len, void *data); int (*o_detach)(struct obd_device *dev); - int (*o_setup) (struct obd_device *dev, obd_count len, void *data); + int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg); int (*o_precleanup)(struct obd_device *dev, enum obd_cleanup_stage cleanup_stage); int (*o_cleanup)(struct obd_device *dev); @@ -845,13 +1028,28 @@ struct obd_ops { * data. @ocd->ocd_connect_flags is modified to reflect flags actually * granted by the target, which are guaranteed to be a subset of flags * asked for. If @ocd == NULL, use default parameters. */ - int (*o_connect)(struct lustre_handle *conn, struct obd_device *src, + int (*o_connect)(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *src, struct obd_uuid *cluuid, struct obd_connect_data *ocd); int (*o_reconnect)(struct obd_export *exp, struct obd_device *src, struct obd_uuid *cluuid, struct obd_connect_data *ocd); int (*o_disconnect)(struct obd_export *exp); + /* Initialize/finalize fids infrastructure. */ + int (*o_fid_init)(struct obd_export *exp); + int (*o_fid_fini)(struct obd_export *exp); + + /* Allocate new fid according to passed @hint. */ + int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid, + struct md_op_data *op_data); + + /* + * Object with @fid is getting deleted, we may want to do something + * about this. + */ + int (*o_fid_delete)(struct obd_export *exp, const struct lu_fid *fid); + int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs, __u64 max_age); int (*o_statfs_async)(struct obd_device *obd, struct obd_info *oinfo, @@ -864,6 +1062,7 @@ struct obd_ops { struct lov_stripe_md *mem_tgt); int (*o_preallocate)(struct lustre_handle *, obd_count *req, obd_id *ids); + /* FIXME: add fid capability support for create & destroy! */ int (*o_create)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); int (*o_destroy)(struct obd_export *exp, struct obdo *oa, @@ -887,7 +1086,7 @@ struct obd_ops { int (*o_prep_async_page)(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_oinfo *loi, - cfs_page_t *page, obd_off offset, + cfs_page_t *page, obd_off offset, struct obd_async_page_ops *ops, void *data, void **res); int (*o_queue_async_io)(struct obd_export *exp, @@ -920,7 +1119,8 @@ struct obd_ops { struct obd_trans_info *oti, struct ptlrpc_request_set *rqset); int (*o_sync)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, obd_size end); + struct lov_stripe_md *ea, obd_size start, obd_size end, + void *capa); int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst, struct lov_stripe_md *src, obd_size start, obd_size end, struct obd_trans_info *oti); @@ -933,7 +1133,8 @@ struct obd_ops { int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, struct obd_trans_info *oti); + struct niobuf_local *local, struct obd_trans_info *oti, + struct lustre_capa *capa); int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *local, @@ -957,14 +1158,15 @@ struct obd_ops { int cmd, obd_off *); /* llog related obd_methods */ - int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd, - int count, struct llog_catid *logid, - struct obd_uuid *uuid); + int (*o_llog_init)(struct obd_device *obd, struct obd_llogs *llog, + struct obd_device *disk_obd, int count, + struct llog_catid *logid, struct obd_uuid *uuid); int (*o_llog_finish)(struct obd_device *obd, int count); + int (*o_llog_connect)(struct obd_export *, struct llogd_conn_body *); /* metadata-only methods */ - int (*o_pin)(struct obd_export *, obd_id ino, __u32 gen, int type, - struct obd_client_handle *, int flag); + int (*o_pin)(struct obd_export *, const struct lu_fid *fid, + struct obd_capa *, struct obd_client_handle *, int flag); int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int); int (*o_import_event)(struct obd_device *, struct obd_import *, @@ -986,6 +1188,132 @@ struct obd_ops { * Also, add a wrapper function in include/linux/obd_class.h. */ }; +/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */ +struct lmv_stripe_md { + __u32 mea_magic; + __u32 mea_count; + __u32 mea_master; + __u32 mea_padding; + struct lu_fid mea_ids[0]; +}; + +enum { + LUSTRE_OPC_MKDIR = (1 << 0), + LUSTRE_OPC_SYMLINK = (1 << 1), + LUSTRE_OPC_MKNOD = (1 << 2), + LUSTRE_OPC_CREATE = (1 << 3), + LUSTRE_OPC_ANY = (1 << 4) +}; + +/* lmv structures */ +#define MEA_MAGIC_LAST_CHAR 0xb2221ca1 +#define MEA_MAGIC_ALL_CHARS 0xb222a11c +#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b + +#define MAX_HASH_SIZE 0x7fffffffUL +#define MAX_HASH_HIGHEST_BIT 0x10000000 + +struct lustre_md { + struct mdt_body *body; + struct lov_stripe_md *lsm; + struct lmv_stripe_md *mea; +#ifdef CONFIG_FS_POSIX_ACL + struct posix_acl *posix_acl; +#endif + struct mdt_remote_perm *remote_perm; + struct obd_capa *mds_capa; + struct obd_capa *oss_capa; +}; + +struct md_ops { + int (*m_getstatus)(struct obd_export *, struct lu_fid *, + struct obd_capa **); + int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *, + ldlm_iterator_t, void *); + int (*m_close)(struct obd_export *, struct md_op_data *, + struct obd_client_handle *, struct ptlrpc_request **); + int (*m_create)(struct obd_export *, struct md_op_data *, + const void *, int, int, __u32, __u32, __u32, + __u64, struct ptlrpc_request **); + int (*m_done_writing)(struct obd_export *, struct md_op_data *, + struct obd_client_handle *); + int (*m_enqueue)(struct obd_export *, int, struct lookup_intent *, + int, struct md_op_data *, struct lustre_handle *, + void *, int, ldlm_completion_callback, + ldlm_blocking_callback, void *, int); + int (*m_getattr)(struct obd_export *, const struct lu_fid *, + struct obd_capa *, obd_valid, int, + struct ptlrpc_request **); + int (*m_getattr_name)(struct obd_export *, const struct lu_fid *, + struct obd_capa *, const char *, int, obd_valid, + int, struct ptlrpc_request **); + int (*m_intent_lock)(struct obd_export *, struct md_op_data *, + void *, int, struct lookup_intent *, int, + struct ptlrpc_request **, + ldlm_blocking_callback, int); + int (*m_link)(struct obd_export *, struct md_op_data *, + struct ptlrpc_request **); + int (*m_rename)(struct obd_export *, struct md_op_data *, + const char *, int, const char *, int, + struct ptlrpc_request **); + int (*m_is_subdir)(struct obd_export *, const struct lu_fid *, + const struct lu_fid *, + struct ptlrpc_request **); + int (*m_setattr)(struct obd_export *, struct md_op_data *, void *, + int , void *, int, struct ptlrpc_request **); + int (*m_sync)(struct obd_export *, const struct lu_fid *, + struct obd_capa *, struct ptlrpc_request **); + int (*m_readpage)(struct obd_export *, const struct lu_fid *, + struct obd_capa *, __u64, struct page *, + struct ptlrpc_request **); + + int (*m_unlink)(struct obd_export *, struct md_op_data *, + struct ptlrpc_request **); + + int (*m_setxattr)(struct obd_export *, const struct lu_fid *, + struct obd_capa *, obd_valid, const char *, + const char *, int, int, int, + struct ptlrpc_request **); + + int (*m_getxattr)(struct obd_export *, const struct lu_fid *, + struct obd_capa *, obd_valid, const char *, + const char *, int, int, int, + struct ptlrpc_request **); + + int (*m_init_ea_size)(struct obd_export *, int, int, int); + + int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *, + int, struct obd_export *, struct obd_export *, + struct lustre_md *); + + int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *); + + int (*m_set_open_replay_data)(struct obd_export *, + struct obd_client_handle *, + struct ptlrpc_request *); + int (*m_clear_open_replay_data)(struct obd_export *, + struct obd_client_handle *); + int (*m_set_lock_data)(struct obd_export *, __u64 *, void *); + + int (*m_lock_match)(struct obd_export *, int, const struct lu_fid *, + ldlm_type_t, ldlm_policy_data_t *, ldlm_mode_t, + struct lustre_handle *); + + int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *, + int flags, void *opaque); + int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc, + renew_capa_cb_t cb); + + int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *, + struct obd_capa *, struct ptlrpc_request **); + + /* + * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to + * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a + * wrapper function in include/linux/obd_class.h. + */ +}; + struct lsm_operations { void (*lsm_free)(struct lov_stripe_md *); int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa, @@ -1014,7 +1342,7 @@ static inline struct lsm_operations *lsm_op_find(int magic) case LOV_MAGIC_JOIN: return &lsm_join_ops; default: - CERROR("Cannot recognize lsm_magic %d", magic); + CERROR("Cannot recognize lsm_magic %d\n", magic); return NULL; } } @@ -1052,4 +1380,14 @@ static inline void init_obd_quota_ops(quota_interface_t *interface, obd_ops->o_quotactl = QUOTA_OP(interface, ctl); } +static inline __u64 oinfo_mdsno(struct obd_info *oinfo) +{ + return oinfo->oi_oa->o_gr - FILTER_GROUP_MDS0; +} + +static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo) +{ + return oinfo->oi_capa; +} + #endif /* __OBD_H */ diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index ac668cf..e5e0ff8 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -23,6 +23,10 @@ #ifndef __CLASS_OBD_H #define __CLASS_OBD_H +#ifndef __KERNEL__ +# include +#endif + #include #include #include @@ -49,10 +53,13 @@ extern spinlock_t obd_dev_lock; extern struct obd_device *class_conn2obd(struct lustre_handle *); extern struct obd_device *class_exp2obd(struct obd_export *); +struct lu_device_type; + /* genops.c */ struct obd_export *class_conn2export(struct lustre_handle *); -int class_register_type(struct obd_ops *ops, struct lprocfs_vars *, - const char *nm); +int class_register_type(struct obd_ops *, struct md_ops *, + struct lprocfs_vars *, const char *nm, + struct lu_device_type *ldt); int class_unregister_type(const char *nm); struct obd_device *class_newdev(const char *type_name, const char *name); @@ -81,12 +88,12 @@ int oig_wait(struct obd_io_group *oig); char *obd_export_nid2str(struct obd_export *exp); -int obd_export_evict_by_nid(struct obd_device *obd, char *nid); -int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid); +int obd_export_evict_by_nid(struct obd_device *obd, const char *nid); +int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid); /* obd_config.c */ int class_process_config(struct lustre_cfg *lcfg); -int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, +int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, struct lustre_cfg *lcfg, void *data); int class_attach(struct lustre_cfg *lcfg); int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg); @@ -100,7 +107,6 @@ void class_decref(struct obd_device *obd); #define CFG_F_SKIP 0x04 /* We should ignore this cfg command */ #define CFG_F_COMPAT146 0x08 /* Allow old-style logs */ #define CFG_F_EXCLUDE 0x10 /* OST exclusion list */ -#define CFG_F_SERVER146 0x20 /* Using old server */ /* Passed as data param to class_config_parse_llog */ struct config_llog_instance { @@ -108,7 +114,7 @@ struct config_llog_instance { struct super_block *cfg_sb; struct obd_uuid cfg_uuid; int cfg_last_idx; /* for partial llog processing */ - int cfg_flags; + int cfg_flags; }; int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, struct config_llog_instance *cfg); @@ -123,19 +129,19 @@ struct config_llog_data { struct list_head cld_list_chain; atomic_t cld_refcount; struct obd_export *cld_mgcexp; - unsigned int cld_stopping:1; /* we were told to stop watching */ - unsigned int cld_lostlock:1; /* lock not requeued */ + unsigned int cld_stopping:1, /* we were told to stop watching */ + cld_lostlock:1; /* lock not requeued */ }; struct lustre_profile { struct list_head lp_list; - char * lp_profile; - char * lp_osc; - char * lp_mdc; + char *lp_profile; + char *lp_dt; + char *lp_md; }; -struct lustre_profile *class_get_profile(char * prof); -void class_del_profile(char *prof); +struct lustre_profile *class_get_profile(const char * prof); +void class_del_profile(const char *prof); void class_del_profiles(void); #define class_export_rpc_get(exp) \ @@ -192,16 +198,22 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd, int class_disconnect(struct obd_export *exp); void class_fail_export(struct obd_export *exp); void class_disconnect_exports(struct obd_device *obddev); -void class_disconnect_stale_exports(struct obd_device *obddev); +int class_disconnect_stale_exports(struct obd_device *, + int (*test_export)(struct obd_export *)); int class_manual_cleanup(struct obd_device *obd); -/* obdo.c */ void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid); void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj); - +void obdo_from_iattr(struct obdo *oa, struct iattr *attr, + unsigned int ia_valid); +void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid); +void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, obd_flag valid); +void obdo_from_md(struct obdo *oa, struct md_op_data *op_data, + unsigned int valid); #define OBT(dev) (dev)->obd_type -#define OBP(dev, op) (dev)->obd_type->typ_ops->o_ ## op +#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->o_ ## op +#define MDP(dev, op) (dev)->obd_type->typ_md_ops->m_ ## op #define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op /* Ensure obd_setup: used for cleanup which must be called @@ -253,23 +265,51 @@ do { \ (export)->exp_ops_stats, coffset); \ } +#define MD_COUNTER_OFFSET(op) \ + ((offsetof(struct md_ops, m_ ## op) - \ + offsetof(struct md_ops, m_getstatus)) \ + / sizeof(((struct md_ops *)(0))->m_getstatus)) + +#define MD_COUNTER_INCREMENT(obdx, op) \ + if ((obd)->md_stats != NULL) { \ + unsigned int coffset; \ + coffset = (unsigned int)((obdx)->md_cntr_base) + \ + MD_COUNTER_OFFSET(op); \ + LASSERT(coffset < (obdx)->md_stats->ls_num); \ + lprocfs_counter_incr((obdx)->md_stats, coffset); \ + } + +#define EXP_MD_COUNTER_INCREMENT(export, op) \ + if ((export)->exp_obd->obd_stats != NULL) { \ + unsigned int coffset; \ + coffset = (unsigned int)((export)->exp_obd->md_cntr_base) + \ + MD_COUNTER_OFFSET(op); \ + LASSERT(coffset < (export)->exp_obd->md_stats->ls_num); \ + lprocfs_counter_incr((export)->exp_obd->md_stats, coffset); \ + if ((export)->exp_md_stats != NULL) \ + lprocfs_counter_incr( \ + (export)->exp_md_stats, coffset); \ + } + #else #define OBD_COUNTER_OFFSET(op) #define OBD_COUNTER_INCREMENT(obd, op) -#define EXP_COUNTER_INCREMENT(exp, op); +#define EXP_COUNTER_INCREMENT(exp, op) +#define MD_COUNTER_INCREMENT(obd, op) +#define EXP_MD_COUNTER_INCREMENT(exp, op) #endif -#define OBD_CHECK_OP(obd, op, err) \ +#define OBD_CHECK_MD_OP(obd, op, err) \ do { \ - if (!OBT(obd) || !OBP((obd), op)) {\ + if (!OBT(obd) || !MDP((obd), op)) { \ if (err) \ - CERROR("obd_" #op ": dev %d no operation\n", \ - obd->obd_minor); \ + CERROR("md_" #op ": dev %s/%d no operation\n", \ + obd->obd_name, obd->obd_minor); \ RETURN(err); \ } \ } while (0) -#define EXP_CHECK_OP(exp, op) \ +#define EXP_CHECK_MD_OP(exp, op) \ do { \ if ((exp) == NULL) { \ CERROR("obd_" #op ": NULL export\n"); \ @@ -279,23 +319,52 @@ do { \ CERROR("obd_" #op ": cleaned up obd\n"); \ RETURN(-EOPNOTSUPP); \ } \ - if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %d no operation\n", \ - (exp)->exp_obd->obd_minor); \ + if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \ + CERROR("obd_" #op ": dev %s/%d no operation\n", \ + (exp)->exp_obd->obd_name, \ + (exp)->exp_obd->obd_minor); \ RETURN(-EOPNOTSUPP); \ } \ } while (0) -#define CTXT_CHECK_OP(ctxt, op, err) \ + +#define OBD_CHECK_DT_OP(obd, op, err) \ do { \ - if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \ + if (!OBT(obd) || !OBP((obd), op)) { \ if (err) \ - CERROR("lop_" #op ": dev %d no operation\n", \ - ctxt->loc_obd->obd_minor); \ + CERROR("obd_" #op ": dev %d no operation\n", \ + obd->obd_minor); \ RETURN(err); \ } \ } while (0) +#define EXP_CHECK_DT_OP(exp, op) \ +do { \ + if ((exp) == NULL) { \ + CERROR("obd_" #op ": NULL export\n"); \ + RETURN(-ENODEV); \ + } \ + if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) { \ + CERROR("obd_" #op ": cleaned up obd\n"); \ + RETURN(-EOPNOTSUPP); \ + } \ + if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \ + CERROR("obd_" #op ": dev %d no operation\n", \ + (exp)->exp_obd->obd_minor); \ + RETURN(-EOPNOTSUPP); \ + } \ +} while (0) + +#define CTXT_CHECK_OP(ctxt, op, err) \ +do { \ + if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \ + if (err) \ + CERROR("lop_" #op ": dev %d no operation\n", \ + ctxt->loc_obd->obd_minor); \ + RETURN(err); \ + } \ +} while (0) + static inline int class_devno_max(void) { return MAX_OBD_DEVICES; @@ -307,7 +376,7 @@ static inline int obd_get_info(struct obd_export *exp, __u32 keylen, int rc; ENTRY; - EXP_CHECK_OP(exp, get_info); + EXP_CHECK_DT_OP(exp, get_info); EXP_COUNTER_INCREMENT(exp, get_info); rc = OBP(exp->exp_obd, get_info)(exp, keylen, key, vallen, val); @@ -321,49 +390,117 @@ static inline int obd_set_info_async(struct obd_export *exp, obd_count keylen, int rc; ENTRY; - EXP_CHECK_OP(exp, set_info_async); + EXP_CHECK_DT_OP(exp, set_info_async); EXP_COUNTER_INCREMENT(exp, set_info_async); - rc = OBP(exp->exp_obd, set_info_async)(exp, keylen, key, vallen, val, + rc = OBP(exp->exp_obd, set_info_async)(exp, keylen, key, vallen, val, set); RETURN(rc); } -static inline int obd_setup(struct obd_device *obd, int datalen, void *data) +#ifdef __KERNEL__ +#define DECLARE_LU_VARS(ldt, d) \ + struct lu_device_type *ldt; \ + struct lu_device *d +#else +#define DECLARE_LU_VARS(ldt, d) \ + extern void __placeholder_to_put_a_semicolon(void) +#endif +static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) { int rc; + DECLARE_LU_VARS(ldt, d); ENTRY; - OBD_CHECK_OP(obd, setup, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, setup); +#ifdef __KERNEL__ + ldt = obd->obd_type->typ_lu; + if (ldt != NULL) { + struct lu_env env; - rc = OBP(obd, setup)(obd, datalen, data); + rc = lu_env_init(&env, NULL, ldt->ldt_ctx_tags); + if (rc == 0) { + d = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg); + lu_env_fini(&env); + if (!IS_ERR(d)) { + obd->obd_lu_dev = d; + d->ld_obd = obd; + rc = 0; + } else + rc = PTR_ERR(d); + } + } else +#endif + { + OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, setup); + rc = OBP(obd, setup)(obd, cfg); + } RETURN(rc); } -static inline int obd_precleanup(struct obd_device *obd, +static inline int obd_precleanup(struct obd_device *obd, enum obd_cleanup_stage cleanup_stage) { int rc; + DECLARE_LU_VARS(ldt, d); ENTRY; - OBD_CHECK_OP(obd, precleanup, 0); - OBD_COUNTER_INCREMENT(obd, precleanup); + OBD_CHECK_DEV(obd); +#ifdef __KERNEL__ + ldt = obd->obd_type->typ_lu; + d = obd->obd_lu_dev; + if (ldt != NULL) { + LASSERT(d != NULL); + if (cleanup_stage == OBD_CLEANUP_EXPORTS) { + struct lu_env env; + + rc = lu_env_init(&env, NULL, ldt->ldt_ctx_tags); + if (rc == 0) { + ldt->ldt_ops->ldto_device_fini(&env, d); + lu_env_fini(&env); + } + } else { + rc = 0; + } + } else +#endif + { + OBD_CHECK_DT_OP(obd, precleanup, 0); + rc = OBP(obd, precleanup)(obd, cleanup_stage); + } - rc = OBP(obd, precleanup)(obd, cleanup_stage); + OBD_COUNTER_INCREMENT(obd, precleanup); RETURN(rc); } static inline int obd_cleanup(struct obd_device *obd) { int rc; + DECLARE_LU_VARS(ldt, d); ENTRY; OBD_CHECK_DEV(obd); - OBD_CHECK_OP(obd, cleanup, 0); - OBD_COUNTER_INCREMENT(obd, cleanup); - rc = OBP(obd, cleanup)(obd); +#ifdef __KERNEL__ + ldt = obd->obd_type->typ_lu; + d = obd->obd_lu_dev; + if (ldt != NULL) { + struct lu_env env; + LASSERT(d != NULL); + + rc = lu_env_init(&env, NULL, ldt->ldt_ctx_tags); + if (rc == 0) { + ldt->ldt_ops->ldto_device_free(&env, d); + lu_env_fini(&env); + obd->obd_lu_dev = NULL; + } + } else +#endif + { + OBD_CHECK_DT_OP(obd, cleanup, 0); + rc = OBP(obd, cleanup)(obd); + } + OBD_COUNTER_INCREMENT(obd, cleanup); RETURN(rc); } @@ -371,12 +508,30 @@ static inline int obd_process_config(struct obd_device *obd, int datalen, void *data) { int rc; + DECLARE_LU_VARS(ldt, d); ENTRY; - OBD_CHECK_OP(obd, process_config, -EOPNOTSUPP); + OBD_CHECK_DEV(obd); + +#ifdef __KERNEL__ + ldt = obd->obd_type->typ_lu; + d = obd->obd_lu_dev; + if (ldt != NULL && d != NULL) { + struct lu_env env; + + rc = lu_env_init(&env, NULL, ldt->ldt_ctx_tags); + if (rc == 0) { + rc = d->ld_ops->ldo_process_config(&env, d, data); + lu_env_fini(&env); + } + } else +#endif + { + OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP); + rc = OBP(obd, process_config)(obd, datalen, data); + } OBD_COUNTER_INCREMENT(obd, process_config); - rc = OBP(obd, process_config)(obd, datalen, data); RETURN(rc); } @@ -394,7 +549,7 @@ static inline int obd_packmd(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, packmd); + EXP_CHECK_DT_OP(exp, packmd); EXP_COUNTER_INCREMENT(exp, packmd); rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src); @@ -439,7 +594,7 @@ static inline int obd_unpackmd(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, unpackmd); + EXP_CHECK_DT_OP(exp, unpackmd); EXP_COUNTER_INCREMENT(exp, unpackmd); rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len); @@ -470,7 +625,7 @@ static inline int obd_checkmd(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, checkmd); + EXP_CHECK_DT_OP(exp, checkmd); EXP_COUNTER_INCREMENT(exp, checkmd); rc = OBP(exp->exp_obd, checkmd)(exp, md_exp, mem_tgt); @@ -484,7 +639,7 @@ static inline int obd_create(struct obd_export *exp, struct obdo *obdo, int rc; ENTRY; - EXP_CHECK_OP(exp, create); + EXP_CHECK_DT_OP(exp, create); EXP_COUNTER_INCREMENT(exp, create); rc = OBP(exp->exp_obd, create)(exp, obdo, ea, oti); @@ -499,7 +654,7 @@ static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, int rc; ENTRY; - EXP_CHECK_OP(exp, destroy); + EXP_CHECK_DT_OP(exp, destroy); EXP_COUNTER_INCREMENT(exp, destroy); rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp); @@ -511,7 +666,7 @@ static inline int obd_getattr(struct obd_export *exp, struct obd_info *oinfo) int rc; ENTRY; - EXP_CHECK_OP(exp, getattr); + EXP_CHECK_DT_OP(exp, getattr); EXP_COUNTER_INCREMENT(exp, getattr); rc = OBP(exp->exp_obd, getattr)(exp, oinfo); @@ -525,7 +680,7 @@ static inline int obd_getattr_async(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, getattr_async); + EXP_CHECK_DT_OP(exp, getattr_async); EXP_COUNTER_INCREMENT(exp, getattr_async); rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set); @@ -538,7 +693,7 @@ static inline int obd_setattr(struct obd_export *exp, struct obd_info *oinfo, int rc; ENTRY; - EXP_CHECK_OP(exp, setattr); + EXP_CHECK_DT_OP(exp, setattr); EXP_COUNTER_INCREMENT(exp, setattr); rc = OBP(exp->exp_obd, setattr)(exp, oinfo, oti); @@ -554,7 +709,7 @@ static inline int obd_setattr_rqset(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, setattr_async); + EXP_CHECK_DT_OP(exp, setattr_async); EXP_COUNTER_INCREMENT(exp, setattr_async); set = ptlrpc_prep_set(); @@ -578,7 +733,7 @@ static inline int obd_setattr_async(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, setattr_async); + EXP_CHECK_DT_OP(exp, setattr_async); EXP_COUNTER_INCREMENT(exp, setattr_async); rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set); @@ -593,7 +748,7 @@ static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid, ENTRY; OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_OP(obd, add_conn, -EOPNOTSUPP); + OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP); OBD_COUNTER_INCREMENT(obd, add_conn); rc = OBP(obd, add_conn)(imp, uuid, priority); @@ -607,26 +762,30 @@ static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid) ENTRY; OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_OP(obd, del_conn, -EOPNOTSUPP); + OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP); OBD_COUNTER_INCREMENT(obd, del_conn); rc = OBP(obd, del_conn)(imp, uuid); RETURN(rc); } -static inline int obd_connect(struct lustre_handle *conn,struct obd_device *obd, +static inline int obd_connect(const struct lu_env *env, + struct lustre_handle *conn,struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *d) { int rc; - __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ +#ifdef LIBCFS_DEBUG + __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition + * check */ +#endif ENTRY; OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_OP(obd, connect, -EOPNOTSUPP); + OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP); OBD_COUNTER_INCREMENT(obd, connect); - rc = OBP(obd, connect)(conn, obd, cluuid, d); + rc = OBP(obd, connect)(env, conn, obd, cluuid, d); /* check that only subset is granted */ LASSERT(ergo(d != NULL, (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); @@ -639,11 +798,15 @@ static inline int obd_reconnect(struct obd_export *exp, struct obd_connect_data *d) { int rc; - __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ +#ifdef LIBCFS_DEBUG + __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition + * check */ +#endif + ENTRY; OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_OP(obd, reconnect, 0); + OBD_CHECK_DT_OP(obd, reconnect, 0); OBD_COUNTER_INCREMENT(obd, reconnect); rc = OBP(obd, reconnect)(exp, obd, cluuid, d); @@ -658,19 +821,70 @@ static inline int obd_disconnect(struct obd_export *exp) int rc; ENTRY; - EXP_CHECK_OP(exp, disconnect); + EXP_CHECK_DT_OP(exp, disconnect); EXP_COUNTER_INCREMENT(exp, disconnect); rc = OBP(exp->exp_obd, disconnect)(exp); RETURN(rc); } +static inline int obd_fid_init(struct obd_export *exp) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, fid_init, 0); + EXP_COUNTER_INCREMENT(exp, fid_init); + + rc = OBP(exp->exp_obd, fid_init)(exp); + RETURN(rc); +} + +static inline int obd_fid_fini(struct obd_export *exp) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, fid_fini, 0); + EXP_COUNTER_INCREMENT(exp, fid_fini); + + rc = OBP(exp->exp_obd, fid_fini)(exp); + RETURN(rc); +} + +static inline int obd_fid_alloc(struct obd_export *exp, + struct lu_fid *fid, + struct md_op_data *op_data) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, fid_alloc); + EXP_COUNTER_INCREMENT(exp, fid_alloc); + + rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, op_data); + RETURN(rc); +} + +static inline int obd_fid_delete(struct obd_export *exp, + const struct lu_fid *fid) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, fid_delete); + EXP_COUNTER_INCREMENT(exp, fid_delete); + + rc = OBP(exp->exp_obd, fid_delete)(exp, fid); + RETURN(rc); +} + static inline int obd_ping(struct obd_export *exp) { int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, ping, 0); + OBD_CHECK_DT_OP(exp->exp_obd, ping, 0); EXP_COUNTER_INCREMENT(exp, ping); rc = OBP(exp->exp_obd, ping)(exp); @@ -703,7 +917,7 @@ static inline int obd_extent_calc(struct obd_export *exp, { int rc; ENTRY; - EXP_CHECK_OP(exp, extent_calc); + EXP_CHECK_DT_OP(exp, extent_calc); rc = OBP(exp->exp_obd, extent_calc)(exp, md, cmd, offset); RETURN(rc); } @@ -717,6 +931,18 @@ obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr) exp->exp_obd); } +static inline int +obd_lvfs_open_llog(struct obd_export *exp, __u64 id_ino, struct dentry *dentry) +{ + LASSERT(exp->exp_obd); + CERROR("FIXME what's the story here? This needs to be an obd fn?\n"); +#if 0 + return lvfs_open_llog(&exp->exp_obd->obd_lvfs_ctxt, id_ino, + dentry, exp->exp_obd); +#endif + return 0; +} + #ifndef time_before #define time_before(t1, t2) ((long)t2 - (long)t1 > 0) #endif @@ -735,7 +961,7 @@ static inline int obd_statfs_async(struct obd_device *obd, if (obd == NULL) RETURN(-EINVAL); - OBD_CHECK_OP(obd, statfs, -EOPNOTSUPP); + OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); OBD_COUNTER_INCREMENT(obd, statfs); CDEBUG(D_SUPER, "%s: osfs %p age "LPU64", max_age "LPU64"\n", @@ -765,7 +991,7 @@ static inline int obd_statfs_rqset(struct obd_device *obd, int rc = 0; ENTRY; - set = ptlrpc_prep_set(); + set = ptlrpc_prep_set(); if (set == NULL) RETURN(-ENOMEM); @@ -789,7 +1015,7 @@ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, if (obd == NULL) RETURN(-EINVAL); - OBD_CHECK_OP(obd, statfs, -EOPNOTSUPP); + OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); OBD_COUNTER_INCREMENT(obd, statfs); CDEBUG(D_SUPER, "osfs "LPU64", max_age "LPU64"\n", @@ -817,15 +1043,15 @@ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, static inline int obd_sync(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, obd_size start, - obd_size end) + obd_size end, void *capa) { int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, sync, -EOPNOTSUPP); + OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP); EXP_COUNTER_INCREMENT(exp, sync); - rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end); + rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end, capa); RETURN(rc); } @@ -837,7 +1063,7 @@ static inline int obd_punch_rqset(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, punch); + EXP_CHECK_DT_OP(exp, punch); EXP_COUNTER_INCREMENT(exp, punch); set = ptlrpc_prep_set(); @@ -858,7 +1084,7 @@ static inline int obd_punch(struct obd_export *exp, struct obd_info *oinfo, int rc; ENTRY; - EXP_CHECK_OP(exp, punch); + EXP_CHECK_DT_OP(exp, punch); EXP_COUNTER_INCREMENT(exp, punch); rc = OBP(exp->exp_obd, punch)(exp, oinfo, oti, rqset); @@ -872,7 +1098,7 @@ static inline int obd_brw(int cmd, struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, brw); + EXP_CHECK_DT_OP(exp, brw); EXP_COUNTER_INCREMENT(exp, brw); if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) { @@ -893,7 +1119,7 @@ static inline int obd_brw_async(int cmd, struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, brw_async); + EXP_CHECK_DT_OP(exp, brw_async); EXP_COUNTER_INCREMENT(exp, brw_async); if (!(cmd & OBD_BRW_RWMASK)) { @@ -908,24 +1134,31 @@ static inline int obd_brw_async(int cmd, struct obd_export *exp, static inline int obd_brw_rqset(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_count oa_bufs, struct brw_page *pg, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct obd_capa *ocapa) { struct ptlrpc_request_set *set = NULL; struct obd_info oinfo = { { { 0 } } }; + atomic_t nob; int rc = 0; ENTRY; set = ptlrpc_prep_set(); if (set == NULL) RETURN(-ENOMEM); + atomic_set(&nob, 0); + set->set_countp = &nob; oinfo.oi_oa = oa; oinfo.oi_md = lsm; + oinfo.oi_capa = ocapa; rc = obd_brw_async(cmd, exp, &oinfo, oa_bufs, pg, oti, set); if (rc == 0) { rc = ptlrpc_set_wait(set); if (rc) CERROR("error from callback: rc = %d\n", rc); + else + rc = atomic_read(&nob); } else { CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, "error from obd_brw_async: rc = %d\n", rc); @@ -944,7 +1177,7 @@ static inline int obd_prep_async_page(struct obd_export *exp, int ret; ENTRY; - OBD_CHECK_OP(exp->exp_obd, prep_async_page, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, prep_async_page); EXP_COUNTER_INCREMENT(exp, prep_async_page); ret = OBP(exp->exp_obd, prep_async_page)(exp, lsm, loi, page, offset, @@ -961,7 +1194,7 @@ static inline int obd_queue_async_io(struct obd_export *exp, int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, queue_async_io, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, queue_async_io); EXP_COUNTER_INCREMENT(exp, queue_async_io); LASSERT(cmd & OBD_BRW_RWMASK); @@ -978,7 +1211,7 @@ static inline int obd_set_async_flags(struct obd_export *exp, int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, set_async_flags, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, set_async_flags); EXP_COUNTER_INCREMENT(exp, set_async_flags); rc = OBP(exp->exp_obd, set_async_flags)(exp, lsm, loi, cookie, @@ -997,7 +1230,7 @@ static inline int obd_queue_group_io(struct obd_export *exp, int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, queue_group_io, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, queue_group_io); EXP_COUNTER_INCREMENT(exp, queue_group_io); LASSERT(cmd & OBD_BRW_RWMASK); @@ -1015,7 +1248,7 @@ static inline int obd_trigger_group_io(struct obd_export *exp, int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, trigger_group_io, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, trigger_group_io); EXP_COUNTER_INCREMENT(exp, trigger_group_io); rc = OBP(exp->exp_obd, trigger_group_io)(exp, lsm, loi, oig); @@ -1029,7 +1262,7 @@ static inline int obd_teardown_async_page(struct obd_export *exp, int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, teardown_async_page, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, teardown_async_page); EXP_COUNTER_INCREMENT(exp, teardown_async_page); rc = OBP(exp->exp_obd, teardown_async_page)(exp, lsm, loi, cookie); @@ -1040,16 +1273,17 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *remote, struct niobuf_local *local, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct lustre_capa *capa) { int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, preprw, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, preprw); EXP_COUNTER_INCREMENT(exp, preprw); rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount, - remote, local, oti); + remote, local, oti, capa); RETURN(rc); } @@ -1060,7 +1294,7 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, { ENTRY; - OBD_CHECK_OP(exp->exp_obd, commitrw, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, commitrw); EXP_COUNTER_INCREMENT(exp, commitrw); rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount, @@ -1075,7 +1309,7 @@ static inline int obd_merge_lvb(struct obd_export *exp, int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, merge_lvb, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, merge_lvb); EXP_COUNTER_INCREMENT(exp, merge_lvb); rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only); @@ -1089,7 +1323,7 @@ static inline int obd_adjust_kms(struct obd_export *exp, int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP); + EXP_CHECK_DT_OP(exp, adjust_kms); EXP_COUNTER_INCREMENT(exp, adjust_kms); rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink); @@ -1102,7 +1336,7 @@ static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, iocontrol); + EXP_CHECK_DT_OP(exp, iocontrol); EXP_COUNTER_INCREMENT(exp, iocontrol); rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg); @@ -1116,7 +1350,7 @@ static inline int obd_enqueue_rqset(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, enqueue); + EXP_CHECK_DT_OP(exp, enqueue); EXP_COUNTER_INCREMENT(exp, enqueue); einfo->ei_rqset = ptlrpc_prep_set(); @@ -1139,7 +1373,7 @@ static inline int obd_enqueue(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, enqueue); + EXP_CHECK_DT_OP(exp, enqueue); EXP_COUNTER_INCREMENT(exp, enqueue); rc = OBP(exp->exp_obd, enqueue)(exp, oinfo, einfo); @@ -1153,7 +1387,7 @@ static inline int obd_match(struct obd_export *exp, struct lov_stripe_md *ea, int rc; ENTRY; - EXP_CHECK_OP(exp, match); + EXP_CHECK_DT_OP(exp, match); EXP_COUNTER_INCREMENT(exp, match); rc = OBP(exp->exp_obd, match)(exp, ea, type, policy, mode, flags, data, @@ -1168,7 +1402,7 @@ static inline int obd_change_cbdata(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, change_cbdata); + EXP_CHECK_DT_OP(exp, change_cbdata); EXP_COUNTER_INCREMENT(exp, change_cbdata); rc = OBP(exp->exp_obd, change_cbdata)(exp, lsm, it, data); @@ -1182,7 +1416,7 @@ static inline int obd_cancel(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, cancel); + EXP_CHECK_DT_OP(exp, cancel); EXP_COUNTER_INCREMENT(exp, cancel); rc = OBP(exp->exp_obd, cancel)(exp, ea, mode, lockh); @@ -1190,13 +1424,13 @@ static inline int obd_cancel(struct obd_export *exp, } static inline int obd_cancel_unused(struct obd_export *exp, - struct lov_stripe_md *ea, int flags, - void *opaque) + struct lov_stripe_md *ea, + int flags, void *opaque) { int rc; ENTRY; - EXP_CHECK_OP(exp, cancel_unused); + EXP_CHECK_DT_OP(exp, cancel_unused); EXP_COUNTER_INCREMENT(exp, cancel_unused); rc = OBP(exp->exp_obd, cancel_unused)(exp, ea, flags, opaque); @@ -1209,23 +1443,24 @@ static inline int obd_join_lru(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, join_lru); + EXP_CHECK_DT_OP(exp, join_lru); EXP_COUNTER_INCREMENT(exp, join_lru); rc = OBP(exp->exp_obd, join_lru)(exp, ea, join); RETURN(rc); } -static inline int obd_pin(struct obd_export *exp, obd_id ino, __u32 gen, - int type, struct obd_client_handle *handle, int flag) +static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct obd_client_handle *handle, + int flag) { int rc; ENTRY; - EXP_CHECK_OP(exp, pin); + EXP_CHECK_DT_OP(exp, pin); EXP_COUNTER_INCREMENT(exp, pin); - rc = OBP(exp->exp_obd, pin)(exp, ino, gen, type, handle, flag); + rc = OBP(exp->exp_obd, pin)(exp, fid, oc, handle, flag); RETURN(rc); } @@ -1235,7 +1470,7 @@ static inline int obd_unpin(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, unpin); + EXP_CHECK_DT_OP(exp, unpin); EXP_COUNTER_INCREMENT(exp, unpin); rc = OBP(exp->exp_obd, unpin)(exp, handle, flag); @@ -1260,32 +1495,52 @@ static inline void obd_import_event(struct obd_device *obd, EXIT; } +static inline int obd_llog_connect(struct obd_export *exp, + struct llogd_conn_body *body) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, llog_connect, 0); + EXP_COUNTER_INCREMENT(exp, llog_connect); + + rc = OBP(exp->exp_obd, llog_connect)(exp, body); + RETURN(rc); +} + + static inline int obd_notify(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data) + enum obd_notify_event ev, + void *data) { + int rc; ENTRY; OBD_CHECK_DEV(obd); /* the check for async_recov is a complete hack - I'm hereby overloading the meaning to also mean "this was called from mds_postsetup". I know that my mds is able to handle notifies - by this point, and it needs to get them to execute mds_postrecov. */ + by this point, and it needs to get them to execute mds_postrecov. */ if (!obd->obd_set_up && !obd->obd_async_recov) { CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name); RETURN(-EINVAL); } - if (!OBP(obd, notify)) + if (!OBP(obd, notify)) { + CERROR("obd %s has no notify handler\n", obd->obd_name); RETURN(-ENOSYS); + } OBD_COUNTER_INCREMENT(obd, notify); - RETURN(OBP(obd, notify)(obd, watched, ev, data)); + rc = OBP(obd, notify)(obd, watched, ev, data); + RETURN(rc); } static inline int obd_notify_observer(struct obd_device *observer, struct obd_device *observed, - enum obd_notify_event ev, void *data) + enum obd_notify_event ev, + void *data) { int rc1; int rc2; @@ -1305,8 +1560,8 @@ static inline int obd_notify_observer(struct obd_device *observer, else rc2 = 0; - return rc1 ?: rc2; - } + return rc1 ? rc1 : rc2; +} static inline int obd_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl) @@ -1314,7 +1569,7 @@ static inline int obd_quotacheck(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, quotacheck); + EXP_CHECK_DT_OP(exp, quotacheck); EXP_COUNTER_INCREMENT(exp, quotacheck); rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl); @@ -1327,7 +1582,7 @@ static inline int obd_quotactl(struct obd_export *exp, int rc; ENTRY; - EXP_CHECK_OP(exp, quotactl); + EXP_CHECK_DT_OP(exp, quotactl); EXP_COUNTER_INCREMENT(exp, quotactl); rc = OBP(exp->exp_obd, quotactl)(exp, oqctl); @@ -1346,7 +1601,7 @@ static inline int obd_health_check(struct obd_device *obd) int rc; ENTRY; - /* don't use EXP_CHECK_OP, because NULL method is normal here */ + /* don't use EXP_CHECK_DT_OP, because NULL method is normal here */ if (obd == NULL || !OBT(obd)) { CERROR("cleaned up obd\n"); RETURN(-EOPNOTSUPP); @@ -1371,6 +1626,357 @@ static inline int obd_register_observer(struct obd_device *obd, RETURN(0); } +/* metadata helpers */ +static inline int md_getstatus(struct obd_export *exp, + struct lu_fid *fid, struct obd_capa **pc) +{ + int rc; + ENTRY; + + EXP_CHECK_MD_OP(exp, getstatus); + EXP_MD_COUNTER_INCREMENT(exp, getstatus); + rc = MDP(exp->exp_obd, getstatus)(exp, fid, pc); + RETURN(rc); +} + +static inline int md_getattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, int ea_size, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, getattr); + EXP_MD_COUNTER_INCREMENT(exp, getattr); + rc = MDP(exp->exp_obd, getattr)(exp, fid, oc, valid, + ea_size, request); + RETURN(rc); +} + +static inline int md_change_cbdata(struct obd_export *exp, + const struct lu_fid *fid, + ldlm_iterator_t it, void *data) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, change_cbdata); + EXP_MD_COUNTER_INCREMENT(exp, change_cbdata); + rc = MDP(exp->exp_obd, change_cbdata)(exp, fid, it, data); + RETURN(rc); +} + +static inline int md_close(struct obd_export *exp, struct md_op_data *op_data, + struct obd_client_handle *och, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, close); + EXP_MD_COUNTER_INCREMENT(exp, close); + rc = MDP(exp->exp_obd, close)(exp, op_data, och, request); + RETURN(rc); +} + +static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, + const void *data, int datalen, int mode, __u32 uid, + __u32 gid, __u32 cap_effective, __u64 rdev, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, create); + EXP_MD_COUNTER_INCREMENT(exp, create); + rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode, + uid, gid, cap_effective, rdev, request); + RETURN(rc); +} + +static inline int md_done_writing(struct obd_export *exp, + struct md_op_data *op_data, + struct obd_client_handle *och) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, done_writing); + EXP_MD_COUNTER_INCREMENT(exp, done_writing); + rc = MDP(exp->exp_obd, done_writing)(exp, op_data, och); + RETURN(rc); +} + +static inline int md_enqueue(struct obd_export *exp, int lock_type, + struct lookup_intent *it, int lock_mode, + struct md_op_data *op_data, + struct lustre_handle *lockh, + void *lmm, int lmmsize, + ldlm_completion_callback cb_completion, + ldlm_blocking_callback cb_blocking, + void *cb_data, int extra_lock_flags) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, enqueue); + EXP_MD_COUNTER_INCREMENT(exp, enqueue); + rc = MDP(exp->exp_obd, enqueue)(exp, lock_type, it, lock_mode, + op_data, lockh, lmm, lmmsize, + cb_completion, cb_blocking, + cb_data, extra_lock_flags); + RETURN(rc); +} + +static inline int md_getattr_name(struct obd_export *exp, + const struct lu_fid *fid, struct obd_capa *oc, + const char *name, int namelen, + obd_valid valid, int ea_size, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, getattr_name); + EXP_MD_COUNTER_INCREMENT(exp, getattr_name); + rc = MDP(exp->exp_obd, getattr_name)(exp, fid, oc, name, namelen, + valid, ea_size, request); + RETURN(rc); +} + +static inline int md_intent_lock(struct obd_export *exp, + struct md_op_data *op_data, void *lmm, + int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, intent_lock); + EXP_MD_COUNTER_INCREMENT(exp, intent_lock); + rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize, + it, flags, reqp, cb_blocking, + extra_lock_flags); + RETURN(rc); +} + +static inline int md_link(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, link); + EXP_MD_COUNTER_INCREMENT(exp, link); + rc = MDP(exp->exp_obd, link)(exp, op_data, request); + RETURN(rc); +} + +static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data, + const char *old, int oldlen, const char *new, + int newlen, struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, rename); + EXP_MD_COUNTER_INCREMENT(exp, rename); + rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new, + newlen, request); + RETURN(rc); +} + +static inline int md_is_subdir(struct obd_export *exp, + const struct lu_fid *pfid, + const struct lu_fid *cfid, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, is_subdir); + EXP_MD_COUNTER_INCREMENT(exp, is_subdir); + rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request); + RETURN(rc); +} + +static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, + void *ea, int ealen, void *ea2, int ea2len, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, setattr); + EXP_MD_COUNTER_INCREMENT(exp, setattr); + rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, + ea2, ea2len, request); + RETURN(rc); +} + +static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, sync); + EXP_MD_COUNTER_INCREMENT(exp, sync); + rc = MDP(exp->exp_obd, sync)(exp, fid, oc, request); + RETURN(rc); +} + +static inline int md_readpage(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, __u64 offset, + struct page *page, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, readpage); + EXP_MD_COUNTER_INCREMENT(exp, readpage); + rc = MDP(exp->exp_obd, readpage)(exp, fid, oc, offset, page, request); + RETURN(rc); +} + +static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, unlink); + EXP_MD_COUNTER_INCREMENT(exp, unlink); + rc = MDP(exp->exp_obd, unlink)(exp, op_data, request); + RETURN(rc); +} + +static inline int md_get_lustre_md(struct obd_export *exp, + struct ptlrpc_request *req, + int offset, struct obd_export *dt_exp, + struct obd_export *md_exp, + struct lustre_md *md) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, get_lustre_md); + EXP_MD_COUNTER_INCREMENT(exp, get_lustre_md); + RETURN(MDP(exp->exp_obd, get_lustre_md)(exp, req, offset, + dt_exp, md_exp, md)); +} + +static inline int md_free_lustre_md(struct obd_export *exp, + struct lustre_md *md) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, free_lustre_md); + EXP_MD_COUNTER_INCREMENT(exp, free_lustre_md); + RETURN(MDP(exp->exp_obd, free_lustre_md)(exp, md)); +} + +static inline int md_setxattr(struct obd_export *exp, + const struct lu_fid *fid, struct obd_capa *oc, + obd_valid valid, const char *name, + const char *input, int input_size, + int output_size, int flags, + struct ptlrpc_request **request) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, setxattr); + EXP_MD_COUNTER_INCREMENT(exp, setxattr); + RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, oc, valid, name, input, + input_size, output_size, flags, + request)); +} + +static inline int md_getxattr(struct obd_export *exp, + const struct lu_fid *fid, struct obd_capa *oc, + obd_valid valid, const char *name, + const char *input, int input_size, + int output_size, int flags, + struct ptlrpc_request **request) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, getxattr); + EXP_MD_COUNTER_INCREMENT(exp, getxattr); + RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, oc, valid, name, input, + input_size, output_size, flags, + request)); +} + +static inline int md_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, set_open_replay_data); + EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data); + RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req)); +} + +static inline int md_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, clear_open_replay_data); + EXP_MD_COUNTER_INCREMENT(exp, clear_open_replay_data); + RETURN(MDP(exp->exp_obd, clear_open_replay_data)(exp, och)); +} + +static inline int md_set_lock_data(struct obd_export *exp, + __u64 *lockh, void *data) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, set_lock_data); + EXP_MD_COUNTER_INCREMENT(exp, set_lock_data); + RETURN(MDP(exp->exp_obd, set_lock_data)(exp, lockh, data)); +} + +static inline int md_cancel_unused(struct obd_export *exp, + const struct lu_fid *fid, + int flags, void *opaque) +{ + int rc; + ENTRY; + + EXP_CHECK_MD_OP(exp, cancel_unused); + EXP_MD_COUNTER_INCREMENT(exp, cancel_unused); + + rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, flags, opaque); + RETURN(rc); +} + +static inline int md_lock_match(struct obd_export *exp, int flags, + const struct lu_fid *fid, ldlm_type_t type, + ldlm_policy_data_t *policy, ldlm_mode_t mode, + struct lustre_handle *lockh) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, lock_match); + EXP_MD_COUNTER_INCREMENT(exp, lock_match); + RETURN(MDP(exp->exp_obd, lock_match)(exp, flags, fid, type, + policy, mode, lockh)); +} + +static inline int md_init_ea_size(struct obd_export *exp, int easize, + int def_asize, int cookiesize) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, init_ea_size); + EXP_MD_COUNTER_INCREMENT(exp, init_ea_size); + RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize, + cookiesize)); +} + +static inline int md_get_remote_perm(struct obd_export *exp, + const struct lu_fid *fid, + struct obd_capa *oc, + struct ptlrpc_request **request) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, get_remote_perm); + EXP_MD_COUNTER_INCREMENT(exp, get_remote_perm); + RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, oc, request)); +} + +static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa, + renew_capa_cb_t cb) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, renew_capa); + EXP_MD_COUNTER_INCREMENT(exp, renew_capa); + rc = MDP(exp->exp_obd, renew_capa)(exp, ocapa, cb); + RETURN(rc); +} + /* OBD Metadata Support */ extern int obd_init_caches(void); @@ -1378,18 +1984,26 @@ extern void obd_cleanup_caches(void); /* support routines */ extern cfs_mem_cache_t *obdo_cachep; -static inline struct obdo *obdo_alloc(void) -{ - struct obdo *oa; - OBD_SLAB_ALLOC(oa, obdo_cachep, CFS_ALLOC_STD, sizeof(*oa)); +#define OBDO_ALLOC(ptr) \ +do { \ + OBD_SLAB_ALLOC_PTR((ptr), obdo_cachep); \ +} while(0) - return oa; +#define OBDO_FREE(ptr) \ +do { \ + OBD_SLAB_FREE_PTR((ptr), obdo_cachep); \ +} while(0) + + +static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid) +{ + /* something here */ } -static inline void obdo_free(struct obdo *oa) +static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa) { - OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa)); + /* something here */ } /* I'm as embarrassed about this as you are. @@ -1407,12 +2021,16 @@ typedef __u8 class_uuid_t[16]; void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); /* lustre_peer.c */ -int lustre_uuid_to_peer(char *uuid, lnet_nid_t *peer_nid, int index); -int class_add_uuid(char *uuid, __u64 nid); -int class_del_uuid (char *uuid); +int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index); +int class_add_uuid(const char *uuid, __u64 nid); +int class_del_uuid (const char *uuid); void class_init_uuidlist(void); void class_exit_uuidlist(void); +/* mea.c */ +int mea_name2idx(struct lmv_stripe_md *mea, const char *name, int namelen); +int raw_name2idx(int hashtype, int count, const char *name, int namelen); + /* prng.c */ void ll_generate_random_uuid(class_uuid_t uuid_out); diff --git a/lustre/include/obd_ost.h b/lustre/include/obd_ost.h index 12ea558..cfc9e5d 100644 --- a/lustre/include/obd_ost.h +++ b/lustre/include/obd_ost.h @@ -14,14 +14,14 @@ #include struct osc_brw_async_args { - struct obdo *aa_oa; - int aa_requested_nob; - int aa_nio_count; - obd_count aa_page_count; - int aa_retries; - struct brw_page **aa_ppga; + struct obdo *aa_oa; + int aa_requested_nob; + int aa_nio_count; + obd_count aa_page_count; + int aa_retries; + struct brw_page **aa_ppga; struct client_obd *aa_cli; - struct list_head aa_oaps; + struct list_head aa_oaps; }; struct osc_async_args { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index ee2c6ba..a2ddd27 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -44,7 +44,6 @@ extern atomic_t obd_dirty_pages; extern cfs_waitq_t obd_race_waitq; extern int obd_race_state; - #define OBD_FAIL_MDS 0x100 #define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 #define OBD_FAIL_MDS_GETATTR_NET 0x102 @@ -100,6 +99,12 @@ extern int obd_race_state; #define OBD_FAIL_MDS_SETXATTR_WRITE 0x134 #define OBD_FAIL_MDS_FS_SETUP 0x135 #define OBD_FAIL_MDS_RESEND 0x136 +#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x137 +#define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x138 +#define OBD_FAIL_MDS_SET_INFO_NET 0x139 +#define OBD_FAIL_MDS_WRITEPAGE_NET 0x13a +#define OBD_FAIL_MDS_WRITEPAGE_PACK 0x13b +#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x13c #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -169,6 +174,7 @@ extern int obd_race_state; #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 #define OBD_FAIL_PTLRPC_DROP_RPC 0x505 #define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 +#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507 #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 @@ -193,9 +199,20 @@ extern int obd_race_state; #define OBD_FAIL_MGS_SLOW_REQUEST_NET 0x904 #define OBD_FAIL_MGS_SLOW_TARGET_REG 0x905 -#define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 +#define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xa00 + +#define OBD_FAIL_LPROC_REMOVE 0xb00 + +#define OBD_FAIL_SEQ 0x1000 +#define OBD_FAIL_SEQ_QUERY_NET 0x1001 + +#define OBD_FAIL_FLD 0x1100 +#define OBD_FAIL_FLD_QUERY_NET 0x1101 -#define OBD_FAIL_LPROC_REMOVE 0xB00 +#define OBD_FAIL_SEC_CTX 0x1200 +#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 +#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1202 +#define OBD_FAIL_SEC_CTX_FINI_NET 0x1203 /* preparation for a more advanced failure testbed (not functional yet) */ #define OBD_FAIL_MASK_SYS 0x0000FF00 @@ -210,8 +227,8 @@ extern int obd_race_state; #define OBD_FAIL_CHECK_ONCE(id) \ ({ int _ret_ = 0; \ - if (OBD_FAIL_CHECK(id)) { \ - CERROR("*** obd_fail_loc=%x ***\n", id); \ + if (unlikely(OBD_FAIL_CHECK(id))) { \ + CERROR("*** obd_fail_loc=0x%x ***\n", id); \ obd_fail_loc |= OBD_FAILED; \ if ((id) & OBD_FAIL_ONCE) \ obd_fail_loc |= OBD_FAIL_ONCE; \ @@ -222,14 +239,14 @@ extern int obd_race_state; #define OBD_FAIL_RETURN(id, ret) \ do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ + if (unlikely(OBD_FAIL_CHECK_ONCE(id))) { \ RETURN(ret); \ } \ } while(0) #define OBD_FAIL_TIMEOUT(id, secs) \ do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ + if (unlikely(OBD_FAIL_CHECK_ONCE(id))) { \ CERROR("obd_fail_timeout id %x sleeping for %d secs\n", \ (id), (secs)); \ set_current_state(TASK_UNINTERRUPTIBLE); \ @@ -247,7 +264,7 @@ do { \ * the first and continues. */ #define OBD_RACE(id) \ do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ + if (unlikely(OBD_FAIL_CHECK_ONCE(id))) { \ obd_race_state = 0; \ CERROR("obd_race id %x sleeping\n", (id)); \ OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0); \ @@ -268,11 +285,148 @@ do { \ extern atomic_t libcfs_kmemory; +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + +#define OBD_MT_WRONG_SIZE (1 << 0) +#define OBD_MT_ALREADY_FREED (1 << 1) +#define OBD_MT_LOC_LEN 128 + +struct obd_mem_track { + struct hlist_node mt_hash; + char mt_loc[OBD_MT_LOC_LEN]; + int mt_flags; + void *mt_ptr; + int mt_size; +}; + +void lvfs_memdbg_show(void); +void lvfs_memdbg_insert(struct obd_mem_track *mt); +void lvfs_memdbg_remove(struct obd_mem_track *mt); +struct obd_mem_track *lvfs_memdbg_find(void *ptr); + +int lvfs_memdbg_check_insert(struct obd_mem_track *mt); +struct obd_mem_track *lvfs_memdbg_check_remove(void *ptr); + +static inline struct obd_mem_track * +__new_mem_track(void *ptr, int size, + char *file, int line) +{ + struct obd_mem_track *mt; + + mt = kmalloc(sizeof(*mt), GFP_KERNEL); + if (unlikely(!mt)) + return NULL; + + snprintf(mt->mt_loc, sizeof(mt->mt_loc) - 1, + "%s:%d", file, line); + + mt->mt_size = size; + mt->mt_ptr = ptr; + mt->mt_flags = 0; + return mt; +} + +static inline void +__free_mem_track(struct obd_mem_track *mt) +{ + kfree(mt); +} + +static inline int +__get_mem_track(void *ptr, int size, + char *file, int line) +{ + struct obd_mem_track *mt; + + mt = __new_mem_track(ptr, size, file, line); + if (unlikely(!mt)) { + CWARN("Can't allocate new memory track\n"); + return 0; + } + + if (!lvfs_memdbg_check_insert(mt)) + __free_mem_track(mt); + + return 1; +} + +static inline int +__put_mem_track(void *ptr, int size, + char *file, int line) +{ + struct obd_mem_track *mt; + + if (unlikely(!(mt = lvfs_memdbg_check_remove(ptr)))) { + CWARN("Ptr 0x%p is not allocated. Attempt to free " + "not allocated memory at %s:%d\n", ptr, + file, line); + LBUG(); + return 0; + } else { + if (unlikely(mt->mt_size != size)) { + if (!(mt->mt_flags & OBD_MT_ALREADY_FREED)) { + mt->mt_flags |= (OBD_MT_WRONG_SIZE | + OBD_MT_ALREADY_FREED); + + CWARN("Freeing memory chunk (at 0x%p) of " + "different size than allocated " + "(%d != %d) at %s:%d, allocated at %s\n", + ptr, mt->mt_size, size, file, line, + mt->mt_loc); + } + } else { + __free_mem_track(mt); + } + return 1; + } +} + +#define get_mem_track(ptr, size, file, line) \ + __get_mem_track((ptr), (size), (file), (line)) + +#define put_mem_track(ptr, size, file, line) \ + __put_mem_track((ptr), (size), (file), (line)) + +#else /* !CONFIG_DEBUG_MEMORY */ + +#define get_mem_track(ptr, size, file, line) \ + do {} while (0) + +#define put_mem_track(ptr, size, file, line) \ + do {} while (0) +#endif /* !CONFIG_DEBUG_MEMORY */ + +#define OBD_DEBUG_MEMUSAGE (1) + +#if OBD_DEBUG_MEMUSAGE +#define OBD_ALLOC_POST(ptr, size, name) \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + get_mem_track((ptr), (size), __FILE__, __LINE__); \ + CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p (tot %d)\n", \ + (int)(size), ptr, atomic_read(&obd_memory)) + +#define OBD_FREE_PRE(ptr, size, name) \ + LASSERT(ptr); \ + put_mem_track((ptr), (size), __FILE__, __LINE__); \ + atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + POISON(ptr, 0x5a, size) + +#else /* !OBD_DEBUG_MEMUSAGE */ + +#define OBD_ALLOC_POST(ptr, size, name) ((void)0) +#define OBD_FREE_PRE(ptr, size, name) ((void)0) + +#endif /* !OBD_DEBUG_MEMUSAGE */ + #if defined(LUSTRE_UTILS) /* this version is for utils only */ #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = cfs_alloc(size, (gfp_mask)); \ - if ((ptr) == NULL) { \ + if (unlikely((ptr) == NULL)) { \ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ } else { \ @@ -285,18 +439,14 @@ do { \ #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = cfs_alloc(size, (gfp_mask)); \ - if ((ptr) == NULL) { \ + if (unlikely((ptr) == NULL)) { \ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ } else { \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + OBD_ALLOC_POST(ptr, size, "kmalloced"); \ } \ } while (0) #endif @@ -316,26 +466,24 @@ do { \ # define OBD_VMALLOC(ptr, size) \ do { \ (ptr) = cfs_alloc_large(size); \ - if ((ptr) == NULL) { \ + if (unlikely((ptr) == NULL)) { \ CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ } else { \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + OBD_ALLOC_POST(ptr, size, "vmalloced"); \ } \ -} while (0) +} while(0) #endif #ifdef CONFIG_DEBUG_SLAB #define POISON(ptr, c, s) do {} while (0) +#define POISON_PTR(ptr) ((void)0) #else #define POISON(ptr, c, s) memset(ptr, c, s) +#define POISON_PTR(ptr) (ptr) = (void *)0xdeadbeef #endif #ifdef POISON_BULK @@ -348,31 +496,49 @@ do { \ #ifdef __KERNEL__ #define OBD_FREE(ptr, size) \ do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ + OBD_FREE_PRE(ptr, size, "kfreed"); \ cfs_free(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) + POISON_PTR(ptr); \ +} while(0) + + +#ifdef HAVE_RCU +# ifdef HAVE_CALL_RCU_PARAM +# define my_call_rcu(rcu, cb) call_rcu(rcu, cb, rcu) +# else +# define my_call_rcu(rcu, cb) call_rcu(rcu, cb) +# endif #else -#define OBD_FREE(ptr, size) ((void)(size), free((ptr))) +# define my_call_rcu(rcu, cb) (cb)(rcu) #endif +#define OBD_FREE_RCU_CB(ptr, size, handle, free_cb) \ +do { \ + struct portals_handle *__h = (handle); \ + LASSERT(handle); \ + __h->h_ptr = (ptr); \ + __h->h_size = (size); \ + __h->h_free_cb = (void (*)(void *, size_t))(free_cb); \ + my_call_rcu(&__h->h_rcu, class_handle_free_cb); \ + POISON_PTR(ptr); \ +} while(0) +#define OBD_FREE_RCU(ptr, size, handle) OBD_FREE_RCU_CB(ptr, size, handle, NULL) + +#else +#define OBD_FREE(ptr, size) ((void)(size), free((ptr))) +#define OBD_FREE_RCU(ptr, size, handle) (OBD_FREE(ptr, size)) +#define OBD_FREE_RCU_CB(ptr, size, handle, cb) ((*(cb))(ptr, size)) +#endif /* ifdef __KERNEL__ */ + #ifdef __arch_um__ # define OBD_VFREE(ptr, size) OBD_FREE(ptr, size) #else # define OBD_VFREE(ptr, size) \ do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ + OBD_FREE_PRE(ptr, size, "vfreed"); \ cfs_free_large(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) + POISON_PTR(ptr); \ +} while(0) #endif /* we memset() the slab object to 0 when allocation succeeds, so DO NOT @@ -382,35 +548,32 @@ do { \ do { \ LASSERT(!in_interrupt()); \ (ptr) = cfs_mem_cache_alloc(slab, (type)); \ - if ((ptr) == NULL) { \ + if (unlikely((ptr) == NULL)) { \ CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ } else { \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ - (int)(size), ptr, atomic_read(&obd_memory)); \ + OBD_ALLOC_POST(ptr, size, "slab-alloced"); \ } \ -} while (0) +} while(0) #define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr)) #define OBD_SLAB_FREE(ptr, slab, size) \ do { \ - LASSERT(ptr); \ - CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - atomic_sub(size, &obd_memory); \ - POISON(ptr, 0x5a, size); \ + OBD_FREE_PRE(ptr, size, "slab-freed"); \ cfs_mem_cache_free(slab, ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) + POISON_PTR(ptr); \ +} while(0) + +#define OBD_SLAB_ALLOC_PTR(ptr, slab) \ + OBD_SLAB_ALLOC((ptr), (slab), CFS_ALLOC_STD, sizeof *(ptr)) +#define OBD_SLAB_FREE_PTR(ptr, slab) \ + OBD_SLAB_FREE((ptr), (slab), sizeof *(ptr)) -#define KEY_IS(str) (keylen >= strlen(key) && strcmp(key, str) == 0) +#define KEY_IS(str) (keylen >= strlen(str) && strcmp(key, str) == 0) #if defined(__linux__) #include diff --git a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos b/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos deleted file mode 100644 index 0de1146..0000000 --- a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos +++ /dev/null @@ -1,1035 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_LOLAT=y -# CONFIG_LOLAT_SYSCTL is not set -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -# CONFIG_M686 is not set -# CONFIG_MPENTIUMIII is not set -CONFIG_MPENTIUM4=y -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_MCE=y -# CONFIG_CPU_FREQ is not set -# CONFIG_TOSHIBA is not set -# CONFIG_I8K is not set -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -# CONFIG_MULTIQUAD is not set -CONFIG_HAVE_DEC_LOCK=y - -# -# General setup -# -CONFIG_HZ=100 -CONFIG_NET=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_NAMES=y - -# -# Performance-monitoring counters support -# -CONFIG_PERFCTR=m -CONFIG_KPERFCTR=y -# CONFIG_PERFCTR_DEBUG is not set -# CONFIG_PERFCTR_INIT_TESTS is not set -CONFIG_PERFCTR_VIRTUAL=y -CONFIG_PERFCTR_GLOBAL=y -# CONFIG_EISA is not set -# CONFIG_MCA is not set -# CONFIG_HOTPLUG is not set -# CONFIG_PCMCIA is not set -# CONFIG_HOTPLUG_PCI is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -# CONFIG_IKCONFIG is not set -CONFIG_PM=y - -# -# Additional device driver support -# -# CONFIG_CIPE is not set -# CONFIG_CRYPTO_AEP is not set -# CONFIG_MEGARAC is not set -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -# CONFIG_SCSI_ISCSI is not set -CONFIG_IBMASM=m -CONFIG_IBMSER=m -# CONFIG_ACPI is not set -CONFIG_APM=y -CONFIG_APM_IGNORE_USER_SUSPEND=y -# CONFIG_APM_DO_ENABLE is not set -# CONFIG_APM_CPU_IDLE is not set -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Binary emulation of other systems -# -# CONFIG_ABI is not set -# CONFIG_ABI_SVR4 is not set -# CONFIG_BINFMT_COFF is not set -# CONFIG_BINFMT_XOUT is not set -# CONFIG_BINFMT_XOUT_X286 is not set - -# -# Memory Technology Devices (MTD) -# -CONFIG_MTD=y -# CONFIG_MTD_DEBUG is not set -# CONFIG_MTD_PARTITIONS is not set -# CONFIG_MTD_CONCAT is not set -# CONFIG_MTD_REDBOOT_PARTS is not set -# CONFIG_MTD_CMDLINE_PARTS is not set -CONFIG_MTD_CHAR=m -# CONFIG_MTD_BLOCK is not set -# CONFIG_MTD_BLOCK_RO is not set -# CONFIG_FTL is not set -# CONFIG_NFTL is not set - -# -# RAM/ROM/Flash chip drivers -# -# CONFIG_MTD_CFI is not set -CONFIG_MTD_JEDECPROBE=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_NOSWAP=y -# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set -CONFIG_MTD_CFI_GEOMETRY=y -CONFIG_MTD_CFI_B1=y -# CONFIG_MTD_CFI_B2 is not set -# CONFIG_MTD_CFI_B4 is not set -# CONFIG_MTD_CFI_B8 is not set -CONFIG_MTD_CFI_I1=y -# CONFIG_MTD_CFI_I2 is not set -# CONFIG_MTD_CFI_I4 is not set -# CONFIG_MTD_CFI_I8 is not set -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -# CONFIG_MTD_RAM is not set -CONFIG_MTD_ROM=y -# CONFIG_MTD_ABSENT is not set -# CONFIG_MTD_OBSOLETE_CHIPS is not set -# CONFIG_MTD_AMDSTD is not set -# CONFIG_MTD_SHARP is not set -# CONFIG_MTD_JEDEC is not set - -# -# Mapping drivers for chip access -# -# CONFIG_MTD_PHYSMAP is not set -# CONFIG_MTD_PNC2000 is not set -# CONFIG_MTD_SC520CDP is not set -# CONFIG_MTD_NETSC520 is not set -# CONFIG_MTD_SBC_GXX is not set -# CONFIG_MTD_ELAN_104NC is not set -# CONFIG_MTD_DILNETPC is not set -# CONFIG_MTD_MIXMEM is not set -# CONFIG_MTD_OCTAGON is not set -# CONFIG_MTD_VMAX is not set -# CONFIG_MTD_L440GX is not set -# CONFIG_MTD_AMD766ROM is not set -CONFIG_MTD_ICH2ROM=m -# CONFIG_MTD_PCI is not set - -# -# Self-contained MTD device drivers -# -# CONFIG_MTD_PMC551 is not set -# CONFIG_MTD_SLRAM is not set -# CONFIG_MTD_MTDRAM is not set -# CONFIG_MTD_BLKMTD is not set -# CONFIG_MTD_DOC1000 is not set -# CONFIG_MTD_DOC2000 is not set -# CONFIG_MTD_DOC2001 is not set -# CONFIG_MTD_DOCPROBE is not set - -# -# NAND Flash Device Drivers -# -# CONFIG_MTD_NAND is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_ENBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -# CONFIG_MD_LINEAR is not set -# CONFIG_MD_RAID0 is not set -# CONFIG_MD_RAID1 is not set -# CONFIG_MD_RAID5 is not set -# CONFIG_MD_MULTIPATH is not set -CONFIG_BLK_DEV_LVM=m - -# -# Cryptography support (CryptoAPI) -# -# CONFIG_CRYPTO is not set -# CONFIG_CIPHERS is not set -# CONFIG_CRYPTODEV is not set -# CONFIG_CRYPTOLOOP is not set - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_TUX is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -# CONFIG_IP_NF_FTP is not set -# CONFIG_IP_NF_IRC is not set -# CONFIG_IP_NF_QUEUE is not set -# CONFIG_IP_NF_IPTABLES is not set -# CONFIG_IP_NF_ARPTABLES is not set -# CONFIG_IP_NF_COMPAT_IPCHAINS is not set -# CONFIG_IP_NF_COMPAT_IPFWADM is not set - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -CONFIG_KHTTPD=m -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -CONFIG_NET_PKTGEN=m - -# -# Telephony Support -# -# CONFIG_PHONE is not set -# CONFIG_PHONE_IXJ is not set -# CONFIG_PHONE_IXJ_PCMCIA is not set - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set -# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set -# CONFIG_BLK_DEV_IDEDISK_IBM is not set -# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set -# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set -# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set -# CONFIG_BLK_DEV_IDEDISK_WD is not set -# CONFIG_BLK_DEV_COMMERIAL is not set -# CONFIG_BLK_DEV_TIVO is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m -# CONFIG_BLK_DEV_IDETAPE is not set -CONFIG_BLK_DEV_IDEFLOPPY=y -# CONFIG_BLK_DEV_IDESCSI is not set -# CONFIG_IDE_TASK_IOCTL is not set -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -# CONFIG_BLK_DEV_IDEDMA_TIMEOUT is not set -# CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set -CONFIG_BLK_DEV_ADMA=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_AEC62XX_TUNING=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_CMD680=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_PIIX_TUNING=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_ADMA100 is not set -CONFIG_BLK_DEV_PDC202XX=y -# CONFIG_PDC202XX_BURST is not set -# CONFIG_PDC202XX_FORCE is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_BLK_DEV_CENATEK=y -# CONFIG_IDE_CHIPSETS is not set -# CONFIG_BLK_DEV_ELEVATOR_NOOP is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_IDE_MODES=y -# CONFIG_BLK_DEV_ATARAID is not set -# CONFIG_BLK_DEV_ATARAID_PDC is not set -# CONFIG_BLK_DEV_ATARAID_HPT is not set - -# -# SCSI support -# -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -# CONFIG_SCSI_AACRAID is not set -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -# CONFIG_SCSI_MEGARAID is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_NCR53C8XX is not set -# CONFIG_SCSI_SYM53C8XX is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_ISP is not set -# CONFIG_SCSI_QLOGIC_FC is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_NEWISP is not set -# CONFIG_SCSI_SEAGATE is not set -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_ULTRASTOR is not set -CONFIG_SCSI_DEBUG=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_BOOT=y -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -# CONFIG_FUSION_LAN is not set - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set -# CONFIG_I2O_PCI is not set -# CONFIG_I2O_BLOCK is not set -# CONFIG_I2O_LAN is not set -# CONFIG_I2O_SCSI is not set -# CONFIG_I2O_PROC is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set -# CONFIG_NET_SB1000 is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -# CONFIG_SUNGEM is not set -# CONFIG_NET_VENDOR_3COM is not set -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_AT1700 is not set -# CONFIG_DEPCA is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_AC3200 is not set -# CONFIG_APRICOT is not set -# CONFIG_CS89x0 is not set -CONFIG_TULIP=m -# CONFIG_TC35815 is not set -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -# CONFIG_DGRS is not set -# CONFIG_DM9102 is not set -CONFIG_EEPRO100=m -CONFIG_NET_E100=m -# CONFIG_LNE390 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_NEW_RX_RESET is not set -# CONFIG_SIS900 is not set -# CONFIG_SIS900_OLD is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -# CONFIG_DL2K is not set -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_SK98LIN is not set -CONFIG_NET_BROADCOM=m -CONFIG_TIGON3=m -CONFIG_NET_E1000=m -# CONFIG_FDDI is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set -# CONFIG_NET_FC is not set -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Quadrics Supercomputers -# - -# -# QsNet -# -CONFIG_QUADRICS=y -CONFIG_QSNETMOD=m -CONFIG_ELAN3MOD=m -CONFIG_EPMOD=m -CONFIG_EIPMOD=m -CONFIG_RMSMOD=m -CONFIG_JTAG=m - -# -# QsNet II -# - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -# CONFIG_IRDA is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -# CONFIG_INPUT is not set -# CONFIG_INPUT_KEYBDEV is not set -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_EVDEV is not set - -# -# Character devices -# -CONFIG_ECC=m -CONFIG_CHAOSTEST=m -CONFIG_P4THERM=m -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -# CONFIG_SERIAL_MANY_PORTS is not set -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -# CONFIG_SERIAL_MULTIPORT is not set -# CONFIG_HUB6 is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -# CONFIG_PRINTER is not set -# CONFIG_PPDEV is not set - -# -# I2C support -# -CONFIG_I2C=y -# CONFIG_I2C_ALGOBIT is not set -# CONFIG_I2C_ALGOPCF is not set -CONFIG_I2C_MAINBOARD=y -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_HYDRA is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -# CONFIG_I2C_I810 is not set -# CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_SIS5595 is not set -# CONFIG_I2C_VIA is not set -# CONFIG_I2C_VIAPRO is not set -# CONFIG_I2C_VOODOO3 is not set -CONFIG_I2C_ISA=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_PROC=y - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -# CONFIG_SENSORS_ADM1024 is not set -# CONFIG_SENSORS_ADM1025 is not set -# CONFIG_SENSORS_ADM9240 is not set -# CONFIG_SENSORS_DS1621 is not set -# CONFIG_SENSORS_FSCPOS is not set -# CONFIG_SENSORS_FSCSCY is not set -# CONFIG_SENSORS_GL518SM is not set -# CONFIG_SENSORS_GL520SM is not set -# CONFIG_SENSORS_MAXILIFE is not set -# CONFIG_SENSORS_IT87 is not set -# CONFIG_SENSORS_MTP008 is not set -# CONFIG_SENSORS_LM75 is not set -# CONFIG_SENSORS_LM78 is not set -# CONFIG_SENSORS_LM80 is not set -CONFIG_SENSORS_LM87=m -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_THMC50 is not set -# CONFIG_SENSORS_VIA686A is not set -CONFIG_SENSORS_W83781D=y -# CONFIG_SENSORS_OTHER is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -# CONFIG_MK712_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_QIC02_TAPE is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_AMD_RNG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -CONFIG_RTC=y -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -# CONFIG_AGP_I810 is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_AMD is not set -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_SWORKS is not set -# CONFIG_DRM is not set -# CONFIG_MWAVE is not set -# CONFIG_BATTERY_GERICOM is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Crypto Hardware support -# -# CONFIG_CRYPTO is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_EXTN_FS=m -CONFIG_JBD=y -CONFIG_JBD_DEBUG=y -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=y -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -# CONFIG_PFS_FS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -CONFIG_NLS_CODEPAGE_850=m -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -CONFIG_NLS_ISO8859_1=m -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -# CONFIG_MDA_CONSOLE is not set - -# -# Frame-buffer support -# -# CONFIG_FB is not set -# CONFIG_SPEAKUP is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB is not set - -# -# Bluetooth support -# -# CONFIG_BLUEZ is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_FRAME_POINTER=y -CONFIG_STACK_TRACE_SCAN=y -CONFIG_STACK_TRACE_FPTR=y -CONFIG_STACK_TRACE_PARAM_COUNT=4 -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_MCL_COREDUMP=y -CONFIG_BOOTIMG=y -# CONFIG_OPROFILE is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y diff --git a/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh b/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh deleted file mode 100644 index dec210a..0000000 --- a/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh +++ /dev/null @@ -1,1849 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_LOLAT=y -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -CONFIG_CPU_FREQ=y -# CONFIG_CPU_FREQ_24_API is not set -CONFIG_X86_POWERNOW_K6=m -# CONFIG_X86_LONGHAUL is not set -CONFIG_X86_SPEEDSTEP=m -# CONFIG_X86_P4_CLOCKMOD is not set -# CONFIG_X86_LONGRUN is not set -CONFIG_TOSHIBA=m -CONFIG_I8K=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHPTE=y -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -# CONFIG_SMP is not set -CONFIG_X86_UP_APIC=y -CONFIG_X86_UP_IOAPIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y -# CONFIG_X86_TSC_DISABLE is not set -CONFIG_X86_TSC=y - -# -# General setup -# -CONFIG_NET=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_IBM is not set -# CONFIG_HOTPLUG_PCI_H2999 is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_PM=y -# CONFIG_ACPI is not set -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -# CONFIG_APM_DO_ENABLE is not set -CONFIG_APM_CPU_IDLE=y -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=m -CONFIG_CIPHERS=m -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_IDENTITY=m -CONFIG_CRYPTODEV=m -CONFIG_CRYPTOLOOP=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IPV6=m - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_BLK_DEV_NFORCE=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=253 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -CONFIG_AIC79XX_ENABLE_RD_STRM=y -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_NEWISP=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -# CONFIG_SCSI_U14_34F_LINKED_COMMANDS is not set -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -# CONFIG_IEEE1394_PCILYNX is not set -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -CONFIG_EWRK3=m -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -CONFIG_SUNDANCE_MMIO=y -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y -CONFIG_PCMCIA_HERMES_OLD=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_TMSISA=m -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_ARCNET_COM20020_CS is not set -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_WVLAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -# CONFIG_MKISS is not set -# CONFIG_6PACK is not set -# CONFIG_BPQETHER is not set -# CONFIG_DMASCC is not set -# CONFIG_SCC is not set -# CONFIG_BAYCOM_SER_FDX is not set -# CONFIG_BAYCOM_SER_HDX is not set -# CONFIG_BAYCOM_PAR is not set -# CONFIG_BAYCOM_EPP is not set -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -# CONFIG_YAM is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -# CONFIG_SCx200_I2C is not set -# CONFIG_SCx200_ACB is not set -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -CONFIG_I2C_I810=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=m -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -CONFIG_AMD7XX_TCO=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set -CONFIG_SONYPI=m - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -# CONFIG_FT_PROC_FS is not set -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m -CONFIG_BATTERY_GERICOM=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -CONFIG_VIDEO_MEYE=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# Crypto Hardware support -# -CONFIG_CRYPTO=m -CONFIG_CRYPTO_BROADCOM=m - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -CONFIG_AFS_FS=m -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -# CONFIG_FB_MATROX_G450 is not set -CONFIG_FB_MATROX_G100A=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -CONFIG_SOUND_MSNDCLAS=m -# CONFIG_MSNDCLAS_HAVE_BOOT is not set -CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin" -CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin" -CONFIG_SOUND_MSNDPIN=m -# CONFIG_MSNDPIN_HAVE_BOOT is not set -CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin" -CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin" -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -CONFIG_SOUND_GUS16=y -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -# CONFIG_PSS_MIXER is not set -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_POWERMATE=m -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -# CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -CONFIG_CRYPTO_AEP=m -CONFIG_MEGARAC=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_MCL_COREDUMP=y -CONFIG_BOOTIMG=y - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64-smp.config deleted file mode 100644 index 4446f20..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64-smp.config +++ /dev/null @@ -1,1047 +0,0 @@ -# -# Automatically generated make config: don't edit -# - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# General setup -# -CONFIG_IA64=y -# CONFIG_ISA is not set -# CONFIG_EISA is not set -# CONFIG_MCA is not set -# CONFIG_SBUS is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -# CONFIG_ITANIUM is not set -CONFIG_MCKINLEY=y -# CONFIG_IA64_GENERIC is not set -# CONFIG_IA64_DIG is not set -# CONFIG_IA64_HP_SIM is not set -CONFIG_IA64_HP_ZX1=y -# CONFIG_IA64_SGI_SN1 is not set -# CONFIG_IA64_SGI_SN2 is not set -# CONFIG_IA64_PAGE_SIZE_4KB is not set -CONFIG_IA64_PAGE_SIZE_8KB=y -# CONFIG_IA64_PAGE_SIZE_16KB is not set -# CONFIG_IA64_PAGE_SIZE_64KB is not set -CONFIG_IA64_L1_CACHE_SHIFT=7 -CONFIG_IA64_MCA=y -CONFIG_PM=y -CONFIG_KCORE_ELF=y -CONFIG_IA64_PAL_IDLE=y -CONFIG_SMP=y -CONFIG_IA32_SUPPORT=y -CONFIG_PERFMON=y -CONFIG_IA64_PALINFO=y -CONFIG_EFI_VARS=y -CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m - -# -# Misc devices -# -# CONFIG_BMC is not set -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_KERNEL_CONFIG=y - -# -# ACPI Support -# -CONFIG_ACPI_PCI=y -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_BUTTON=y -CONFIG_ACPI_FAN=y -CONFIG_ACPI_PROCESSOR=y -CONFIG_ACPI_THERMAL=y -# CONFIG_ACPI_DEBUG is not set -CONFIG_PCI=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_NETLINK_DEV=m -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -CONFIG_NET_IPIP=m -# CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -# CONFIG_IP_NF_MATCH_AH_ESP is not set -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -# CONFIG_IP_NF_NAT_LOCAL is not set -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -# CONFIG_IP_NF_TARGET_ULOG is not set -CONFIG_IP_NF_TARGET_TCPMSS=m -# CONFIG_IP_NF_ARPTABLES is not set -# CONFIG_IP_NF_COMPAT_IPCHAINS is not set -# CONFIG_IP_NF_COMPAT_IPFWADM is not set - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -# CONFIG_IPV6 is not set -# CONFIG_KHTTPD is not set -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set - -# -# -# -# CONFIG_IPX is not set -# CONFIG_ATALK is not set - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Plug and Play configuration -# -# CONFIG_PNP is not set -# CONFIG_ISAPNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_NBD is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_BLK_STATS is not set - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set -# CONFIG_I2O_PCI is not set -# CONFIG_I2O_BLOCK is not set -# CONFIG_I2O_LAN is not set -# CONFIG_I2O_SCSI is not set -# CONFIG_I2O_PROC is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -CONFIG_MD_RAID0=y -CONFIG_MD_RAID1=y -CONFIG_MD_RAID5=y -CONFIG_MD_MULTIPATH=y -CONFIG_BLK_DEV_LVM=y - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_BOOT=y -CONFIG_FUSION_MAX_SGE=40 -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -# CONFIG_IDEDISK_MULTI_MODE is not set -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set -# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set -# CONFIG_BLK_DEV_IDEDISK_IBM is not set -# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set -# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set -# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set -# CONFIG_BLK_DEV_IDEDISK_WD is not set -# CONFIG_BLK_DEV_COMMERIAL is not set -# CONFIG_BLK_DEV_TIVO is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=y -# CONFIG_BLK_DEV_IDETAPE is not set -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=y -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -# CONFIG_BLK_DEV_ISAPNP is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -# CONFIG_BLK_DEV_IDEDMA_TIMEOUT is not set -# CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set -CONFIG_BLK_DEV_ADMA=y -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_AEC62XX_TUNING is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -# CONFIG_WDC_ALI15X3 is not set -# CONFIG_BLK_DEV_AMD74XX is not set -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -# CONFIG_BLK_DEV_CMD680 is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_HPT34X_AUTODMA is not set -# CONFIG_BLK_DEV_HPT366 is not set -CONFIG_BLK_DEV_PIIX=y -CONFIG_PIIX_TUNING=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_PDC202XX is not set -# CONFIG_PDC202XX_BURST is not set -# CONFIG_PDC202XX_FORCE is not set -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_IDE_MODES=y -# CONFIG_BLK_DEV_ATARAID is not set -# CONFIG_BLK_DEV_ATARAID_PDC is not set -# CONFIG_BLK_DEV_ATARAID_HPT is not set - -# -# SCSI support -# -CONFIG_SCSI=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=y -CONFIG_CHR_DEV_OSST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=2 -CONFIG_CHR_DEV_SG=y - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=y -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_ISP is not set -# CONFIG_SCSI_QLOGIC_FC is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -CONFIG_SCSI_QLOGIC_QLA2XXX=y -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2100=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2200=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2300=m -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_DEBUG is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -# CONFIG_SUNGEM is not set -# CONFIG_NET_VENDOR_3COM is not set -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_APRICOT is not set -# CONFIG_CS89x0 is not set -# CONFIG_TULIP is not set -# CONFIG_DE4X5 is not set -# CONFIG_DGRS is not set -# CONFIG_DM9102 is not set -CONFIG_EEPRO100=y -# CONFIG_E100 is not set -# CONFIG_LNE390 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_SUNDANCE_MMIO is not set -# CONFIG_TLAN is not set -# CONFIG_TC35815 is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y -# CONFIG_DL2K is not set -CONFIG_E1000=y -# CONFIG_MYRI_SBUS is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_SK98LIN is not set -CONFIG_TIGON3=y - -# -# Quadrics QsNet -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_EP3=m -CONFIG_EIP=m -CONFIG_ELAN4=m -CONFIG_RMS=m -CONFIG_JTAG=m -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=y -CONFIG_INPUT_KEYBDEV=y -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_HCDP=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_ACPI=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_PRINTER is not set -# CONFIG_PPDEV is not set - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set -# CONFIG_I2C_ALGOPCF is not set -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -# CONFIG_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_INPUT_NS558 is not set -# CONFIG_INPUT_LIGHTNING is not set -# CONFIG_INPUT_PCIGAME is not set -# CONFIG_INPUT_CS461X is not set -# CONFIG_INPUT_EMU10K1 is not set -# CONFIG_INPUT_SERIO is not set -# CONFIG_INPUT_SERPORT is not set - -# -# Joysticks -# -# CONFIG_INPUT_ANALOG is not set -# CONFIG_INPUT_A3D is not set -# CONFIG_INPUT_ADI is not set -# CONFIG_INPUT_COBRA is not set -# CONFIG_INPUT_GF2K is not set -# CONFIG_INPUT_GRIP is not set -# CONFIG_INPUT_INTERACT is not set -# CONFIG_INPUT_TMDC is not set -# CONFIG_INPUT_SIDEWINDER is not set -# CONFIG_INPUT_IFORCE_USB is not set -# CONFIG_INPUT_IFORCE_232 is not set -# CONFIG_INPUT_WARRIOR is not set -# CONFIG_INPUT_MAGELLAN is not set -# CONFIG_INPUT_SPACEORB is not set -# CONFIG_INPUT_SPACEBALL is not set -# CONFIG_INPUT_STINGER is not set -# CONFIG_INPUT_DB9 is not set -# CONFIG_INPUT_GAMECON is not set -# CONFIG_INPUT_TURBOGRAFX is not set -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -CONFIG_IPMI_PANIC_EVENT=y -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -CONFIG_EFI_RTC=y -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set -# CONFIG_DRM is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_QFMT_V1 is not set -# CONFIG_QFMT_V2 is not set -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -# CONFIG_UMSDOS_FS is not set -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=y -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -# CONFIG_ZISOFS is not set -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set -CONFIG_XFS_FS=y -# CONFIG_XFS_POSIX_ACL is not set -CONFIG_XFS_RT=y -# CONFIG_XFS_QUOTA is not set -CONFIG_XFS_DMAPI=y -# CONFIG_XFS_DEBUG is not set -CONFIG_PAGEBUF_DEBUG=y - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_ROOT_NFS=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y -CONFIG_SMB_NLS_REMOTE="cp437" -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -# CONFIG_ZISOFS_FS is not set -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -CONFIG_EFI_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -# CONFIG_NLS_CODEPAGE_1250 is not set -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y - -# -# Frame-buffer support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -CONFIG_USB=y -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -# CONFIG_USB_LONG_TIMEOUT is not set - -# -# USB Host Controller Drivers -# -# CONFIG_USB_EHCI_HCD is not set -CONFIG_USB_UHCI_ALT=y -CONFIG_USB_OHCI=y - -# -# USB Device Class drivers -# -# CONFIG_USB_AUDIO is not set -# CONFIG_USB_EMI26 is not set -# CONFIG_USB_BLUETOOTH is not set -# CONFIG_USB_MIDI is not set -# CONFIG_USB_STORAGE is not set -# CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set -# CONFIG_USB_STORAGE_HP8200e is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_ACM is not set -# CONFIG_USB_PRINTER is not set - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT is not set -# CONFIG_USB_HIDDEV is not set -# CONFIG_USB_AIPTEK is not set -# CONFIG_USB_WACOM is not set - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_SCANNER is not set -# CONFIG_USB_MICROTEK is not set -# CONFIG_USB_HPUSBSCSI is not set - -# -# USB Multimedia devices -# - -# -# Video4Linux support is needed for USB Multimedia device support -# - -# -# USB Network adaptors -# -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_RTL8150 is not set -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_CATC is not set -# CONFIG_USB_CDCETHER is not set -# CONFIG_USB_USBNET is not set - -# -# USB port drivers -# -# CONFIG_USB_USS720 is not set - -# -# USB Serial Converter support -# -# CONFIG_USB_SERIAL is not set - -# -# USB Miscellaneous drivers -# -# CONFIG_USB_RIO500 is not set -# CONFIG_USB_AUERSWALD is not set -# CONFIG_USB_TIGL is not set -# CONFIG_USB_BRLVGER is not set -# CONFIG_USB_LCD is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -# CONFIG_ZLIB_DEFLATE is not set - -# -# Bluetooth support -# -# CONFIG_BLUEZ is not set - -# -# Kernel hacking -# -# CONFIG_IA64_GRANULE_16MB is not set -CONFIG_IA64_GRANULE_64MB=y -CONFIG_DUMP=m -CONFIG_DUMP_COMPRESS_RLE=m -CONFIG_DUMP_COMPRESS_GZIP=m -CONFIG_DEBUG_KERNEL=y -# CONFIG_IA64_PRINT_HAZARDS is not set -# CONFIG_DISABLE_VHPT is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_IA64_EARLY_PRINTK=y -CONFIG_IA64_EARLY_PRINTK_UART=y -CONFIG_IA64_EARLY_PRINTK_UART_BASE=0 -# CONFIG_IA64_EARLY_PRINTK_VGA is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_IA64_DEBUG_CMPXCHG is not set -# CONFIG_IA64_DEBUG_IRQ is not set -CONFIG_KALLSYMS=y -CONFIG_IEEE1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64.config deleted file mode 100644 index 8ba58df..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-hp_pnnl-2.4-ia64.config +++ /dev/null @@ -1,1047 +0,0 @@ -# -# Automatically generated make config: don't edit -# - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# General setup -# -CONFIG_IA64=y -# CONFIG_ISA is not set -# CONFIG_EISA is not set -# CONFIG_MCA is not set -# CONFIG_SBUS is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -# CONFIG_ITANIUM is not set -CONFIG_MCKINLEY=y -# CONFIG_IA64_GENERIC is not set -# CONFIG_IA64_DIG is not set -# CONFIG_IA64_HP_SIM is not set -CONFIG_IA64_HP_ZX1=y -# CONFIG_IA64_SGI_SN1 is not set -# CONFIG_IA64_SGI_SN2 is not set -# CONFIG_IA64_PAGE_SIZE_4KB is not set -CONFIG_IA64_PAGE_SIZE_8KB=y -# CONFIG_IA64_PAGE_SIZE_16KB=y -# CONFIG_IA64_PAGE_SIZE_64KB is not set -CONFIG_IA64_L1_CACHE_SHIFT=7 -CONFIG_IA64_MCA=y -CONFIG_PM=y -CONFIG_KCORE_ELF=y -CONFIG_IA64_PAL_IDLE=y -# CONFIG_SMP is not set -CONFIG_IA32_SUPPORT=y -CONFIG_PERFMON=y -CONFIG_IA64_PALINFO=y -CONFIG_EFI_VARS=y -CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m - -# -# Misc devices -# -# CONFIG_BMC is not set -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_KERNEL_CONFIG=y - -# -# ACPI Support -# -CONFIG_ACPI_PCI=y -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_BUTTON=y -CONFIG_ACPI_FAN=y -CONFIG_ACPI_PROCESSOR=y -CONFIG_ACPI_THERMAL=y -# CONFIG_ACPI_DEBUG is not set -CONFIG_PCI=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_NETLINK_DEV=m -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -CONFIG_NET_IPIP=m -# CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -# CONFIG_IP_NF_MATCH_AH_ESP is not set -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -# CONFIG_IP_NF_NAT_LOCAL is not set -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -# CONFIG_IP_NF_TARGET_ULOG is not set -CONFIG_IP_NF_TARGET_TCPMSS=m -# CONFIG_IP_NF_ARPTABLES is not set -# CONFIG_IP_NF_COMPAT_IPCHAINS is not set -# CONFIG_IP_NF_COMPAT_IPFWADM is not set - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -# CONFIG_IPV6 is not set -# CONFIG_KHTTPD is not set -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set - -# -# -# -# CONFIG_IPX is not set -# CONFIG_ATALK is not set - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Plug and Play configuration -# -# CONFIG_PNP is not set -# CONFIG_ISAPNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_NBD is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_BLK_STATS is not set - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set -# CONFIG_I2O_PCI is not set -# CONFIG_I2O_BLOCK is not set -# CONFIG_I2O_LAN is not set -# CONFIG_I2O_SCSI is not set -# CONFIG_I2O_PROC is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -CONFIG_MD_RAID0=y -CONFIG_MD_RAID1=y -CONFIG_MD_RAID5=y -CONFIG_MD_MULTIPATH=y -CONFIG_BLK_DEV_LVM=y - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_BOOT=y -CONFIG_FUSION_MAX_SGE=40 -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -# CONFIG_IDEDISK_MULTI_MODE is not set -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set -# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set -# CONFIG_BLK_DEV_IDEDISK_IBM is not set -# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set -# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set -# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set -# CONFIG_BLK_DEV_IDEDISK_WD is not set -# CONFIG_BLK_DEV_COMMERIAL is not set -# CONFIG_BLK_DEV_TIVO is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=y -# CONFIG_BLK_DEV_IDETAPE is not set -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=y -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -# CONFIG_BLK_DEV_ISAPNP is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -# CONFIG_BLK_DEV_IDEDMA_TIMEOUT is not set -# CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set -CONFIG_BLK_DEV_ADMA=y -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_AEC62XX_TUNING is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -# CONFIG_WDC_ALI15X3 is not set -# CONFIG_BLK_DEV_AMD74XX is not set -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -# CONFIG_BLK_DEV_CMD680 is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_HPT34X_AUTODMA is not set -# CONFIG_BLK_DEV_HPT366 is not set -CONFIG_BLK_DEV_PIIX=y -CONFIG_PIIX_TUNING=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_PDC202XX is not set -# CONFIG_PDC202XX_BURST is not set -# CONFIG_PDC202XX_FORCE is not set -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_IDE_MODES=y -# CONFIG_BLK_DEV_ATARAID is not set -# CONFIG_BLK_DEV_ATARAID_PDC is not set -# CONFIG_BLK_DEV_ATARAID_HPT is not set - -# -# SCSI support -# -CONFIG_SCSI=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=y -CONFIG_CHR_DEV_OSST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=2 -CONFIG_CHR_DEV_SG=y - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=y -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_ISP is not set -# CONFIG_SCSI_QLOGIC_FC is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -CONFIG_SCSI_QLOGIC_QLA2XXX=y -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2100=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2200=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2300=m -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_DEBUG is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -# CONFIG_SUNGEM is not set -# CONFIG_NET_VENDOR_3COM is not set -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_APRICOT is not set -# CONFIG_CS89x0 is not set -# CONFIG_TULIP is not set -# CONFIG_DE4X5 is not set -# CONFIG_DGRS is not set -# CONFIG_DM9102 is not set -CONFIG_EEPRO100=y -# CONFIG_E100 is not set -# CONFIG_LNE390 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_SUNDANCE_MMIO is not set -# CONFIG_TLAN is not set -# CONFIG_TC35815 is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y -# CONFIG_DL2K is not set -CONFIG_E1000=y -# CONFIG_MYRI_SBUS is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_SK98LIN is not set -CONFIG_TIGON3=y - -# -# Quadrics QsNet -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_EP3=m -CONFIG_EIP=m -CONFIG_ELAN4=m -CONFIG_RMS=m -CONFIG_JTAG=m -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=y -CONFIG_INPUT_KEYBDEV=y -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_HCDP=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_ACPI=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_PRINTER is not set -# CONFIG_PPDEV is not set - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set -# CONFIG_I2C_ALGOPCF is not set -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -# CONFIG_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_INPUT_NS558 is not set -# CONFIG_INPUT_LIGHTNING is not set -# CONFIG_INPUT_PCIGAME is not set -# CONFIG_INPUT_CS461X is not set -# CONFIG_INPUT_EMU10K1 is not set -# CONFIG_INPUT_SERIO is not set -# CONFIG_INPUT_SERPORT is not set - -# -# Joysticks -# -# CONFIG_INPUT_ANALOG is not set -# CONFIG_INPUT_A3D is not set -# CONFIG_INPUT_ADI is not set -# CONFIG_INPUT_COBRA is not set -# CONFIG_INPUT_GF2K is not set -# CONFIG_INPUT_GRIP is not set -# CONFIG_INPUT_INTERACT is not set -# CONFIG_INPUT_TMDC is not set -# CONFIG_INPUT_SIDEWINDER is not set -# CONFIG_INPUT_IFORCE_USB is not set -# CONFIG_INPUT_IFORCE_232 is not set -# CONFIG_INPUT_WARRIOR is not set -# CONFIG_INPUT_MAGELLAN is not set -# CONFIG_INPUT_SPACEORB is not set -# CONFIG_INPUT_SPACEBALL is not set -# CONFIG_INPUT_STINGER is not set -# CONFIG_INPUT_DB9 is not set -# CONFIG_INPUT_GAMECON is not set -# CONFIG_INPUT_TURBOGRAFX is not set -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -CONFIG_IPMI_PANIC_EVENT=y -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -CONFIG_EFI_RTC=y -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set -# CONFIG_DRM is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_QFMT_V1 is not set -# CONFIG_QFMT_V2 is not set -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -# CONFIG_UMSDOS_FS is not set -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=y -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -# CONFIG_ZISOFS is not set -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set -CONFIG_XFS_FS=y -# CONFIG_XFS_POSIX_ACL is not set -CONFIG_XFS_RT=y -# CONFIG_XFS_QUOTA is not set -CONFIG_XFS_DMAPI=y -# CONFIG_XFS_DEBUG is not set -CONFIG_PAGEBUF_DEBUG=y - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_ROOT_NFS=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y -CONFIG_SMB_NLS_REMOTE="cp437" -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -# CONFIG_ZISOFS_FS is not set -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -CONFIG_EFI_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -# CONFIG_NLS_CODEPAGE_1250 is not set -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y - -# -# Frame-buffer support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -CONFIG_USB=y -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -# CONFIG_USB_LONG_TIMEOUT is not set - -# -# USB Host Controller Drivers -# -# CONFIG_USB_EHCI_HCD is not set -CONFIG_USB_UHCI_ALT=y -CONFIG_USB_OHCI=y - -# -# USB Device Class drivers -# -# CONFIG_USB_AUDIO is not set -# CONFIG_USB_EMI26 is not set -# CONFIG_USB_BLUETOOTH is not set -# CONFIG_USB_MIDI is not set -# CONFIG_USB_STORAGE is not set -# CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set -# CONFIG_USB_STORAGE_HP8200e is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_ACM is not set -# CONFIG_USB_PRINTER is not set - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT is not set -# CONFIG_USB_HIDDEV is not set -# CONFIG_USB_AIPTEK is not set -# CONFIG_USB_WACOM is not set - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_SCANNER is not set -# CONFIG_USB_MICROTEK is not set -# CONFIG_USB_HPUSBSCSI is not set - -# -# USB Multimedia devices -# - -# -# Video4Linux support is needed for USB Multimedia device support -# - -# -# USB Network adaptors -# -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_RTL8150 is not set -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_CATC is not set -# CONFIG_USB_CDCETHER is not set -# CONFIG_USB_USBNET is not set - -# -# USB port drivers -# -# CONFIG_USB_USS720 is not set - -# -# USB Serial Converter support -# -# CONFIG_USB_SERIAL is not set - -# -# USB Miscellaneous drivers -# -# CONFIG_USB_RIO500 is not set -# CONFIG_USB_AUERSWALD is not set -# CONFIG_USB_TIGL is not set -# CONFIG_USB_BRLVGER is not set -# CONFIG_USB_LCD is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -# CONFIG_ZLIB_DEFLATE is not set - -# -# Bluetooth support -# -# CONFIG_BLUEZ is not set - -# -# Kernel hacking -# -# CONFIG_IA64_GRANULE_16MB is not set -CONFIG_IA64_GRANULE_64MB=y -CONFIG_DUMP=m -CONFIG_DUMP_COMPRESS_RLE=m -CONFIG_DUMP_COMPRESS_GZIP=m -CONFIG_DEBUG_KERNEL=y -# CONFIG_IA64_PRINT_HAZARDS is not set -# CONFIG_DISABLE_VHPT is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_IA64_EARLY_PRINTK=y -CONFIG_IA64_EARLY_PRINTK_UART=y -CONFIG_IA64_EARLY_PRINTK_UART_BASE=0 -# CONFIG_IA64_EARLY_PRINTK_VGA is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_IA64_DEBUG_CMPXCHG is not set -# CONFIG_IA64_DEBUG_IRQ is not set -CONFIG_KALLSYMS=y -CONFIG_IEEE1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686-smp.config deleted file mode 100644 index 1103250..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686-smp.config +++ /dev/null @@ -1,1866 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_LOLAT=y -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -# CONFIG_CPU_FREQ is not set -CONFIG_TOSHIBA=m -CONFIG_I8K=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHPTE=y -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -# CONFIG_X86_NUMA is not set -# CONFIG_X86_TSC_DISABLE is not set -CONFIG_X86_TSC=y -CONFIG_HAVE_DEC_LOCK=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 - -# -# General setup -# -CONFIG_NET=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=y -# CONFIG_HOTPLUG_PCI_ACPI is not set -CONFIG_HOTPLUG_PCI_COMPAQ=m -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -CONFIG_HOTPLUG_PCI_IBM=m -# CONFIG_HOTPLUG_PCI_H2999 is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_PM=y -# CONFIG_ACPI is not set -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -# CONFIG_APM_DO_ENABLE is not set -CONFIG_APM_CPU_IDLE=y -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=m -CONFIG_CIPHERS=m -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_IDENTITY=m -CONFIG_CRYPTODEV=m -CONFIG_CRYPTOLOOP=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IPV6=m - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_NEWISP=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -# CONFIG_SCSI_U14_34F_LINKED_COMMANDS is not set -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -# CONFIG_EWRK3 is not set -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -# CONFIG_PPP_BSDCOMP is not set -CONFIG_PPPOE=m -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y -CONFIG_PCMCIA_HERMES_OLD=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_TMSISA=m -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_ARCNET_COM20020_CS is not set -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_WVLAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -# CONFIG_MKISS is not set -# CONFIG_6PACK is not set -# CONFIG_BPQETHER is not set -# CONFIG_DMASCC is not set -# CONFIG_SCC is not set -# CONFIG_BAYCOM_SER_FDX is not set -# CONFIG_BAYCOM_SER_HDX is not set -# CONFIG_BAYCOM_PAR is not set -# CONFIG_BAYCOM_EPP is not set -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -# CONFIG_YAM is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -# CONFIG_SCx200_I2C is not set -# CONFIG_SCx200_ACB is not set -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -# CONFIG_I2C_I810 is not set -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=m -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -CONFIG_AMD7XX_TCO=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set -CONFIG_SONYPI=m - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -# CONFIG_FT_PROC_FS is not set -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m -CONFIG_BATTERY_GERICOM=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -CONFIG_VIDEO_MEYE=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# Crypto Hardware support -# -CONFIG_CRYPTO=m -CONFIG_CRYPTO_BROADCOM=m - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -CONFIG_AFS_FS=m -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -# CONFIG_FB_MATROX_G450 is not set -CONFIG_FB_MATROX_G100A=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -CONFIG_SOUND_MSNDCLAS=m -# CONFIG_MSNDCLAS_HAVE_BOOT is not set -CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin" -CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin" -CONFIG_SOUND_MSNDPIN=m -# CONFIG_MSNDPIN_HAVE_BOOT is not set -CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin" -CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin" -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -CONFIG_SOUND_GUS16=y -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -# CONFIG_PSS_MIXER is not set -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -CONFIG_CRYPTO_AEP=m -CONFIG_MEGARAC=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -CONFIG_FRAME_POINTER=y -# CONFIG_MCL_COREDUMP is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686.config deleted file mode 100644 index 1103250..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686.config +++ /dev/null @@ -1,1866 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_LOLAT=y -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -# CONFIG_CPU_FREQ is not set -CONFIG_TOSHIBA=m -CONFIG_I8K=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHPTE=y -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -# CONFIG_X86_NUMA is not set -# CONFIG_X86_TSC_DISABLE is not set -CONFIG_X86_TSC=y -CONFIG_HAVE_DEC_LOCK=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 - -# -# General setup -# -CONFIG_NET=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=y -# CONFIG_HOTPLUG_PCI_ACPI is not set -CONFIG_HOTPLUG_PCI_COMPAQ=m -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -CONFIG_HOTPLUG_PCI_IBM=m -# CONFIG_HOTPLUG_PCI_H2999 is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_PM=y -# CONFIG_ACPI is not set -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -# CONFIG_APM_DO_ENABLE is not set -CONFIG_APM_CPU_IDLE=y -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=m -CONFIG_CIPHERS=m -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_IDENTITY=m -CONFIG_CRYPTODEV=m -CONFIG_CRYPTOLOOP=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IPV6=m - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_NEWISP=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -# CONFIG_SCSI_U14_34F_LINKED_COMMANDS is not set -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -# CONFIG_EWRK3 is not set -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -# CONFIG_PPP_BSDCOMP is not set -CONFIG_PPPOE=m -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y -CONFIG_PCMCIA_HERMES_OLD=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_TMSISA=m -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_ARCNET_COM20020_CS is not set -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_WVLAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -# CONFIG_MKISS is not set -# CONFIG_6PACK is not set -# CONFIG_BPQETHER is not set -# CONFIG_DMASCC is not set -# CONFIG_SCC is not set -# CONFIG_BAYCOM_SER_FDX is not set -# CONFIG_BAYCOM_SER_HDX is not set -# CONFIG_BAYCOM_PAR is not set -# CONFIG_BAYCOM_EPP is not set -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -# CONFIG_YAM is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -# CONFIG_SCx200_I2C is not set -# CONFIG_SCx200_ACB is not set -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -# CONFIG_I2C_I810 is not set -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=m -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -CONFIG_AMD7XX_TCO=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set -CONFIG_SONYPI=m - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -# CONFIG_FT_PROC_FS is not set -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m -CONFIG_BATTERY_GERICOM=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -CONFIG_VIDEO_MEYE=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# Crypto Hardware support -# -CONFIG_CRYPTO=m -CONFIG_CRYPTO_BROADCOM=m - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -CONFIG_AFS_FS=m -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -# CONFIG_FB_MATROX_G450 is not set -CONFIG_FB_MATROX_G100A=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -CONFIG_SOUND_MSNDCLAS=m -# CONFIG_MSNDCLAS_HAVE_BOOT is not set -CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin" -CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin" -CONFIG_SOUND_MSNDPIN=m -# CONFIG_MSNDPIN_HAVE_BOOT is not set -CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin" -CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin" -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -CONFIG_SOUND_GUS16=y -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -# CONFIG_PSS_MIXER is not set -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -CONFIG_CRYPTO_AEP=m -CONFIG_MEGARAC=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -CONFIG_FRAME_POINTER=y -# CONFIG_MCL_COREDUMP is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686-smp.config deleted file mode 100644 index 7d0ceb4..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686-smp.config +++ /dev/null @@ -1,2139 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -# CONFIG_CPU_FREQ is not set -CONFIG_TOSHIBA=m -CONFIG_I8K=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -# CONFIG_HIGHMEM4G is not set -CONFIG_HIGHMEM64G=y -CONFIG_HIGHMEM=y -CONFIG_HIGHPTE=y -CONFIG_X86_PAE=y -CONFIG_HIGHIO=y -# CONFIG_X86_4G is not set -CONFIG_3GB=y -# CONFIG_2GB is not set -# CONFIG_1GB is not set -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -# CONFIG_NR_SIBLINGS_0 is not set -CONFIG_NR_SIBLINGS_2=y -CONFIG_SHARE_RUNQUEUE=y -CONFIG_MAX_NR_SIBLINGS=2 -CONFIG_X86_CLUSTERED_APIC=y -CONFIG_X86_NUMA=y -# CONFIG_X86_NUMAQ is not set -CONFIG_X86_SUMMIT=y -CONFIG_X86_CLUSTERED_APIC=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 -CONFIG_HAVE_DEC_LOCK=y - -# -# General setup -# -CONFIG_NET=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -# CONFIG_SCx200 is not set -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_COMPAQ=m -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -CONFIG_HOTPLUG_PCI_IBM=m -# CONFIG_HOTPLUG_PCI_ACPI is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -# CONFIG_IKCONFIG is not set -CONFIG_AUDIT=m -CONFIG_PM=y -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -# CONFIG_APM_DO_ENABLE is not set -CONFIG_APM_CPU_IDLE=y -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -CONFIG_BLK_DEV_FD=m -# CONFIG_BLK_DEV_XD is not set -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m - -# -# Parallel IDE high-level drivers -# -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m - -# -# Parallel IDE protocol modules -# -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -# CONFIG_CISS_MONITOR_THREAD is not set -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y -CONFIG_DISKDUMP=m -CONFIG_BLOCKDUMP=m - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_RECENT=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_TUNNEL=m -CONFIG_XFRM=y -CONFIG_XFRM_USER=y -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m - -# -# -# -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -CONFIG_EDP2=m -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=256 -CONFIG_SD_IOSTATS=y -CONFIG_SCSI_DUMP=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_BLK_DEV_3W_9XXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -# CONFIG_SCSI_ADP94XX is not set -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -# CONFIG_SCSI_U14_34F_LINKED_COMMANDS is not set -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m - -# -# Device Drivers -# -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m - -# -# Protocol Drivers -# -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -# CONFIG_EWRK3 is not set -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_B44=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_FORCEDETH=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NAPI=y -CONFIG_E1000_DISABLE_PACKET_SPLIT=y -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_BNX2=m - -# -# Quadrics QsNet device support -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_ELAN4=m -CONFIG_EP=m -CONFIG_EIP=m -CONFIG_RMS=m -CONFIG_JTAG=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m - -# -# Wireless Pcmcia cards support -# -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y - -# -# Wireless Pcmcia cards support -# -CONFIG_PCMCIA_HERMES_OLD=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -# CONFIG_TMSISA is not set -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_ARCNET_COM20020_CS is not set -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_WVLAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y - -# -# Packet Radio protocols -# -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# - -# -# AX.25 network device drivers -# -# CONFIG_MKISS is not set -# CONFIG_6PACK is not set -# CONFIG_BPQETHER is not set -# CONFIG_DMASCC is not set -# CONFIG_SCC is not set -# CONFIG_BAYCOM_SER_FDX is not set -# CONFIG_BAYCOM_SER_HDX is not set -# CONFIG_BAYCOM_PAR is not set -# CONFIG_BAYCOM_EPP is not set -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -# CONFIG_YAM is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y - -# -# IrDA options -# -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m - -# -# Dongle support -# -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m - -# -# FIR device drivers -# -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# low-level hardware drivers -# - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y - -# -# D-channel protocol features -# -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 - -# -# HiSax supported cards -# -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_ATI_CD1865=m -# CONFIG_COMPUTONE is not set -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -# CONFIG_ICOM is not set -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -# CONFIG_SCx200_I2C is not set -# CONFIG_SCx200_ACB is not set -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -CONFIG_I2C_I810=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=m -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m - -# -# Joysticks -# -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_PANIC_STRING is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -CONFIG_AMD7XX_TCO=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_HANGCHECK_DELAY=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -# CONFIG_NVRAM is not set -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 - -# -# The compressor will be built as a module only! -# -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -# CONFIG_FT_PROC_FS is not set -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set - -# -# Hardware configuration -# -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_AGP_NVIDIA=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set - -# -# DRM 4.1 drivers -# -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_GAMMA=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m - -# -# Video Adapters -# -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -# CONFIG_VIDEO_MEYE is not set - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_DIRECTIO=y -CONFIG_NFS_ACL=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_INTEL=m -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -CONFIG_SOUND_MSNDCLAS=m -# CONFIG_MSNDCLAS_HAVE_BOOT is not set -CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin" -CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin" -CONFIG_SOUND_MSNDPIN=m -# CONFIG_MSNDPIN_HAVE_BOOT is not set -CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin" -CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin" -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -CONFIG_SOUND_GUS16=y -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -# CONFIG_PSS_MIXER is not set -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m -CONFIG_SOUND_AD1980=m -CONFIG_SOUND_WM97XX=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set - -# -# USB Host Controller Drivers -# -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m - -# -# USB Device Class drivers -# -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set - -# -# USB Bluetooth can only be used with disabled Bluetooth subsystem -# -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m - -# -# USB Multimedia devices -# -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m - -# -# USB Network adaptors -# -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -# CONFIG_USB_AX8817X is not set -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m - -# -# USB port drivers -# -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m - -# -# USB Miscellaneous drivers -# -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -CONFIG_CRYPTO_AEP=m -CONFIG_CRYPTO_BROADCOM=m -CONFIG_MEGARAC=m -CONFIG_FC_QLA2100=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m -# CONFIG_SCSI_IPR is not set -CONFIG_SCSI_LPFC=m -CONFIG_FUSION_ISENSE=m -# CONFIG_DCDBAS is not set -CONFIG_DELL_RBU=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_FRAME_POINTER is not set -CONFIG_DEBUG_HIGHMEM=y -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_PANIC_MORSE=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_SMALL is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_TEST=m - -# -# Library routines -# -CONFIG_CRC32=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_QSORT=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686.config deleted file mode 100644 index 2e0d417..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-i686.config +++ /dev/null @@ -1,2139 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -# CONFIG_CPU_FREQ is not set -CONFIG_TOSHIBA=m -CONFIG_I8K=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -# CONFIG_HIGHMEM4G is not set -CONFIG_HIGHMEM64G=y -CONFIG_HIGHMEM=y -CONFIG_HIGHPTE=y -CONFIG_X86_PAE=y -CONFIG_HIGHIO=y -# CONFIG_X86_4G is not set -CONFIG_3GB=y -# CONFIG_2GB is not set -# CONFIG_1GB is not set -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -# CONFIG_NR_SIBLINGS_0 is not set -CONFIG_NR_SIBLINGS_2=y -CONFIG_SHARE_RUNQUEUE=y -CONFIG_MAX_NR_SIBLINGS=2 -CONFIG_X86_CLUSTERED_APIC=y -CONFIG_X86_NUMA=y -# CONFIG_X86_NUMAQ is not set -CONFIG_X86_SUMMIT=y -CONFIG_X86_CLUSTERED_APIC=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 -CONFIG_HAVE_DEC_LOCK=y - -# -# General setup -# -CONFIG_NET=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -# CONFIG_SCx200 is not set -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_COMPAQ=m -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -CONFIG_HOTPLUG_PCI_IBM=m -# CONFIG_HOTPLUG_PCI_ACPI is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -# CONFIG_IKCONFIG is not set -CONFIG_AUDIT=m -CONFIG_PM=y -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -# CONFIG_APM_DO_ENABLE is not set -CONFIG_APM_CPU_IDLE=y -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -CONFIG_BLK_DEV_FD=m -# CONFIG_BLK_DEV_XD is not set -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m - -# -# Parallel IDE high-level drivers -# -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m - -# -# Parallel IDE protocol modules -# -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -# CONFIG_CISS_MONITOR_THREAD is not set -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y -CONFIG_DISKDUMP=m -CONFIG_BLOCKDUMP=m - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_RECENT=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_TUNNEL=m -CONFIG_XFRM=y -CONFIG_XFRM_USER=y -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m - -# -# -# -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -CONFIG_EDP2=m -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=256 -CONFIG_SD_IOSTATS=y -CONFIG_SCSI_DUMP=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_BLK_DEV_3W_9XXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -# CONFIG_SCSI_ADP94XX is not set -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -# CONFIG_SCSI_U14_34F_LINKED_COMMANDS is not set -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m - -# -# Device Drivers -# -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m - -# -# Protocol Drivers -# -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -# CONFIG_EWRK3 is not set -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_B44=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_FORCEDETH=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NAPI=y -CONFIG_E1000_DISABLE_PACKET_SPLIT=y -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_BNX2=m - -# -# Quadrics QsNet device support -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_ELAN4=m -CONFIG_EP=m -CONFIG_EIP=m -CONFIG_RMS=m -CONFIG_JTAG=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m - -# -# Wireless Pcmcia cards support -# -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y - -# -# Wireless Pcmcia cards support -# -CONFIG_PCMCIA_HERMES_OLD=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -# CONFIG_TMSISA is not set -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_ARCNET_COM20020_CS is not set -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_WVLAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y - -# -# Packet Radio protocols -# -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# - -# -# AX.25 network device drivers -# -# CONFIG_MKISS is not set -# CONFIG_6PACK is not set -# CONFIG_BPQETHER is not set -# CONFIG_DMASCC is not set -# CONFIG_SCC is not set -# CONFIG_BAYCOM_SER_FDX is not set -# CONFIG_BAYCOM_SER_HDX is not set -# CONFIG_BAYCOM_PAR is not set -# CONFIG_BAYCOM_EPP is not set -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -# CONFIG_YAM is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y - -# -# IrDA options -# -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m - -# -# Dongle support -# -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m - -# -# FIR device drivers -# -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# low-level hardware drivers -# - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y - -# -# D-channel protocol features -# -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 - -# -# HiSax supported cards -# -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_ATI_CD1865=m -# CONFIG_COMPUTONE is not set -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -# CONFIG_ICOM is not set -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -# CONFIG_SCx200_I2C is not set -# CONFIG_SCx200_ACB is not set -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -CONFIG_I2C_I810=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=m -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m - -# -# Joysticks -# -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_PANIC_STRING is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -CONFIG_AMD7XX_TCO=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_HANGCHECK_DELAY=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -# CONFIG_NVRAM is not set -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 - -# -# The compressor will be built as a module only! -# -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -# CONFIG_FT_PROC_FS is not set -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set - -# -# Hardware configuration -# -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_AGP_NVIDIA=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set - -# -# DRM 4.1 drivers -# -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_GAMMA=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m - -# -# Video Adapters -# -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -# CONFIG_VIDEO_MEYE is not set - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_DIRECTIO=y -CONFIG_NFS_ACL=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_INTEL=m -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -CONFIG_SOUND_MSNDCLAS=m -# CONFIG_MSNDCLAS_HAVE_BOOT is not set -CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin" -CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin" -CONFIG_SOUND_MSNDPIN=m -# CONFIG_MSNDPIN_HAVE_BOOT is not set -CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin" -CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin" -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -CONFIG_SOUND_GUS16=y -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -# CONFIG_PSS_MIXER is not set -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m -CONFIG_SOUND_AD1980=m -CONFIG_SOUND_WM97XX=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set - -# -# USB Host Controller Drivers -# -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m - -# -# USB Device Class drivers -# -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set - -# -# USB Bluetooth can only be used with disabled Bluetooth subsystem -# -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m - -# -# USB Multimedia devices -# -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m - -# -# USB Network adaptors -# -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -# CONFIG_USB_AX8817X is not set -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m - -# -# USB port drivers -# -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m - -# -# USB Miscellaneous drivers -# -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -CONFIG_CRYPTO_AEP=m -CONFIG_CRYPTO_BROADCOM=m -CONFIG_MEGARAC=m -CONFIG_FC_QLA2100=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m -# CONFIG_SCSI_IPR is not set -CONFIG_SCSI_LPFC=m -# CONFIG_FUSION_ISENSE is not set -# CONFIG_DCDBAS is not set -CONFIG_DELL_RBU=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_FRAME_POINTER is not set -CONFIG_DEBUG_HIGHMEM=y -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_PANIC_MORSE=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_SMALL is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_TEST=m - -# -# Library routines -# -CONFIG_CRC32=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_QSORT=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config deleted file mode 100644 index ff4b0d1..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config +++ /dev/null @@ -1,1451 +0,0 @@ -# -# Automatically generated make config: don't edit -# - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# General setup -# -CONFIG_IA64=y -# CONFIG_HIGHPTE is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHIO=y -# CONFIG_ISA is not set -# CONFIG_EISA is not set -# CONFIG_MCA is not set -# CONFIG_SBUS is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -# CONFIG_ITANIUM is not set -CONFIG_MCKINLEY=y -CONFIG_IA64_GENERIC=y -# CONFIG_IA64_DIG is not set -# CONFIG_IA64_HP_SIM is not set -# CONFIG_IA64_HP_ZX1 is not set -# CONFIG_IA64_SGI_SN1 is not set -# CONFIG_IA64_SGI_SN2 is not set -# CONFIG_IA64_PAGE_SIZE_4KB is not set -# CONFIG_IA64_PAGE_SIZE_8KB is not set -CONFIG_IA64_PAGE_SIZE_16KB=y -# CONFIG_IA64_PAGE_SIZE_64KB is not set -CONFIG_IA64_L1_CACHE_SHIFT=7 -CONFIG_IA64_MCA=y -CONFIG_PM=y -CONFIG_KCORE_ELF=y -CONFIG_FORCE_MAX_ZONEORDER=15 -# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set -# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set -CONFIG_HUGETLB_PAGE_SIZE_256MB=y -# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set -# CONFIG_IA64_PAL_IDLE is not set -CONFIG_SMP=y -CONFIG_IA32_SUPPORT=y -CONFIG_COMPAT=y -CONFIG_PERFMON=y -CONFIG_IA64_PALINFO=y -CONFIG_EFI_VARS=y -CONFIG_IA64_CYCLONE=y -CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_AUDIT=m -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_KERNEL_CONFIG=y - -# -# ACPI Support -# -CONFIG_ACPI_PCI=y -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_THERMAL=m -# CONFIG_ACPI_DEBUG is not set -CONFIG_PCI=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=m -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -CONFIG_HOTPLUG_PCI_ACPI=m - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_RECENT=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_TUNNEL=m -CONFIG_XFRM=y -CONFIG_XFRM_USER=y -# CONFIG_KHTTPD is not set -# CONFIG_ATM is not set -CONFIG_VLAN_8021Q=m - -# -# -# -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -CONFIG_EDP2=m -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Plug and Play configuration -# -# CONFIG_PNP is not set -# CONFIG_ISAPNP is not set -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -# CONFIG_CISS_MONITOR_THREAD is not set -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y -CONFIG_DISKDUMP=m -CONFIG_BLOCKDUMP=m - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m - -# -# Device Drivers -# - -# -# Texas Instruments PCILynx requires I2C bit-banging -# -CONFIG_IEEE1394_OHCI1394=m - -# -# Protocol Drivers -# -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -# CONFIG_BLK_DEV_RZ1000 is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=256 -CONFIG_SD_IOSTATS=y -CONFIG_SCSI_DUMP=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -CONFIG_SCSI_DEBUG_QUEUES=y -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -# CONFIG_SCSI_LOGGING is not set - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_BLK_DEV_3W_9XXX_RAID=m -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -# CONFIG_SCSI_ADP94XX is not set -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -CONFIG_SCSI_GDTH=m -# CONFIG_SCSI_GENERIC_NCR5380 is not set -CONFIG_SCSI_IPS=m -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -CONFIG_SCSI_NSP32=m -# CONFIG_SCSI_DEBUG is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -# CONFIG_EL1 is not set -# CONFIG_EL2 is not set -# CONFIG_ELPLUS is not set -# CONFIG_EL16 is not set -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -# CONFIG_APRICOT is not set -CONFIG_B44=m -# CONFIG_CS89x0 is not set -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -# CONFIG_DE4X5 is not set -# CONFIG_DGRS is not set -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -# CONFIG_LNE390 is not set -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_FORCEDETH=m -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -# CONFIG_SUNDANCE is not set -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NAPI=y -CONFIG_E1000_DISABLE_PACKET_SPLIT=y -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_BNX2=m - -# -# Quadrics QsNet device support -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_ELAN4=m -CONFIG_EP=m -CONFIG_EIP=m -CONFIG_RMS=m -CONFIG_JTAG=m -# CONFIG_FDDI is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -# CONFIG_TMS380TR is not set -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -CONFIG_SHAPER=m - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -# CONFIG_ECC is not set -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_HCDP=y -CONFIG_SERIAL_ACPI=y -CONFIG_HP_DIVA=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m - -# -# Joysticks -# -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -# CONFIG_INPUT_DB9 is not set -# CONFIG_INPUT_GAMECON is not set -# CONFIG_INPUT_TURBOGRAFX is not set -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_PANIC_STRING is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -CONFIG_HANGCHECK_TIMER=m -CONFIG_HANGCHECK_DELAY=m -# CONFIG_SCx200_GPIO is not set -CONFIG_INTEL_RNG=m -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -CONFIG_EFI_RTC=y -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -# CONFIG_AGP_I810 is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_AMD is not set -CONFIG_AGP_AMD_8151=y -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_SWORKS is not set -CONFIG_AGP_I460=y -CONFIG_AGP_HP_ZX1=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set - -# -# DRM 4.1 drivers -# -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_GAMMA=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -# CONFIG_DRM_I810 is not set -# CONFIG_DRM_I810_XFREE_41 is not set -# CONFIG_DRM_I830 is not set -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_DIRECTIO=y -CONFIG_NFS_ACL=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -CONFIG_EFI_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -# CONFIG_FB_RIVA is not set -# CONFIG_FB_CLGEN is not set -# CONFIG_FB_PM2 is not set -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VGA16=m -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -# CONFIG_FB_ATY is not set -# CONFIG_FB_RADEON is not set -CONFIG_FB_ATY128=m -# CONFIG_FB_INTEL is not set -# CONFIG_FB_SIS is not set -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_CFB8=m -CONFIG_FBCON_CFB16=m -CONFIG_FBCON_CFB24=m -CONFIG_FBCON_CFB32=m -CONFIG_FBCON_VGA_PLANES=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_PCI_CONSOLE=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -# CONFIG_SOUND_BT878 is not set -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -# CONFIG_SOUND_AD1816 is not set -CONFIG_SOUND_AD1889=m -# CONFIG_SOUND_SGALAXY is not set -# CONFIG_SOUND_ADLIB is not set -# CONFIG_SOUND_ACI_MIXER is not set -# CONFIG_SOUND_CS4232 is not set -# CONFIG_SOUND_SSCAPE is not set -# CONFIG_SOUND_GUS is not set -CONFIG_SOUND_VMIDI=m -# CONFIG_SOUND_TRIX is not set -# CONFIG_SOUND_MSS is not set -# CONFIG_SOUND_MPU401 is not set -# CONFIG_SOUND_NM256 is not set -# CONFIG_SOUND_MAD16 is not set -# CONFIG_SOUND_PAS is not set -# CONFIG_PAS_JOYSTICK is not set -# CONFIG_SOUND_PSS is not set -# CONFIG_SOUND_SB is not set -# CONFIG_SOUND_AWE32_SYNTH is not set -# CONFIG_SOUND_KAHLUA is not set -# CONFIG_SOUND_WAVEFRONT is not set -# CONFIG_SOUND_MAUI is not set -# CONFIG_SOUND_YM3812 is not set -# CONFIG_SOUND_OPL3SA1 is not set -# CONFIG_SOUND_OPL3SA2 is not set -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -# CONFIG_SOUND_UART6850 is not set -# CONFIG_SOUND_AEDSP16 is not set -# CONFIG_SOUND_TVMIXER is not set -CONFIG_SOUND_AD1980=m -CONFIG_SOUND_WM97XX=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set - -# -# USB Host Controller Drivers -# -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m - -# -# USB Device Class drivers -# -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set - -# -# USB Bluetooth can only be used with disabled Bluetooth subsystem -# -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m - -# -# USB Multimedia devices -# - -# -# Video4Linux support is needed for USB Multimedia device support -# - -# -# USB Network adaptors -# -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -# CONFIG_USB_AX8817X is not set -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m - -# -# USB port drivers -# -# CONFIG_USB_USS720 is not set - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m - -# -# USB Miscellaneous drivers -# -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_TEST=m - -# -# Library routines -# -CONFIG_CRC32=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_QSORT=y - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -# CONFIG_BLUEZ_HCIDTL1 is not set -# CONFIG_BLUEZ_HCIBT3C is not set -# CONFIG_BLUEZ_HCIBLUECARD is not set -# CONFIG_BLUEZ_HCIBTUART is not set -CONFIG_BLUEZ_HCIVHCI=m - -# -# Simulated drivers -# -# CONFIG_HP_SIMETH is not set -# CONFIG_HP_SIMSERIAL is not set -# CONFIG_HP_SIMSCSI is not set - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -# CONFIG_CRYPTO_AEP is not set -CONFIG_CRYPTO_BROADCOM=m -# CONFIG_MEGARAC is not set -CONFIG_FC_QLA2100=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m -# CONFIG_SCSI_IPR is not set -CONFIG_SCSI_LPFC=m -# CONFIG_FUSION_ISENSE is not set -# CONFIG_DCDBAS is not set -CONFIG_DELL_RBU=m - -# -# Kernel hacking -# -CONFIG_IA64_GRANULE_16MB=y -# CONFIG_IA64_GRANULE_64MB is not set -CONFIG_DEBUG_KERNEL=y -CONFIG_IA64_PRINT_HAZARDS=y -# CONFIG_DISABLE_VHPT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_IA64_EARLY_PRINTK is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_IA64_DEBUG_CMPXCHG is not set -# CONFIG_IA64_DEBUG_IRQ is not set -CONFIG_KALLSYMS=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config deleted file mode 100644 index ff4b0d1..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config +++ /dev/null @@ -1,1451 +0,0 @@ -# -# Automatically generated make config: don't edit -# - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# General setup -# -CONFIG_IA64=y -# CONFIG_HIGHPTE is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHIO=y -# CONFIG_ISA is not set -# CONFIG_EISA is not set -# CONFIG_MCA is not set -# CONFIG_SBUS is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -# CONFIG_ITANIUM is not set -CONFIG_MCKINLEY=y -CONFIG_IA64_GENERIC=y -# CONFIG_IA64_DIG is not set -# CONFIG_IA64_HP_SIM is not set -# CONFIG_IA64_HP_ZX1 is not set -# CONFIG_IA64_SGI_SN1 is not set -# CONFIG_IA64_SGI_SN2 is not set -# CONFIG_IA64_PAGE_SIZE_4KB is not set -# CONFIG_IA64_PAGE_SIZE_8KB is not set -CONFIG_IA64_PAGE_SIZE_16KB=y -# CONFIG_IA64_PAGE_SIZE_64KB is not set -CONFIG_IA64_L1_CACHE_SHIFT=7 -CONFIG_IA64_MCA=y -CONFIG_PM=y -CONFIG_KCORE_ELF=y -CONFIG_FORCE_MAX_ZONEORDER=15 -# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set -# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set -CONFIG_HUGETLB_PAGE_SIZE_256MB=y -# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set -# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set -# CONFIG_IA64_PAL_IDLE is not set -CONFIG_SMP=y -CONFIG_IA32_SUPPORT=y -CONFIG_COMPAT=y -CONFIG_PERFMON=y -CONFIG_IA64_PALINFO=y -CONFIG_EFI_VARS=y -CONFIG_IA64_CYCLONE=y -CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_AUDIT=m -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_KERNEL_CONFIG=y - -# -# ACPI Support -# -CONFIG_ACPI_PCI=y -CONFIG_ACPI=y -CONFIG_ACPI_EFI=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_THERMAL=m -# CONFIG_ACPI_DEBUG is not set -CONFIG_PCI=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=m -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -CONFIG_HOTPLUG_PCI_ACPI=m - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_RECENT=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_TUNNEL=m -CONFIG_XFRM=y -CONFIG_XFRM_USER=y -# CONFIG_KHTTPD is not set -# CONFIG_ATM is not set -CONFIG_VLAN_8021Q=m - -# -# -# -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -CONFIG_EDP2=m -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Plug and Play configuration -# -# CONFIG_PNP is not set -# CONFIG_ISAPNP is not set -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -# CONFIG_CISS_MONITOR_THREAD is not set -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y -CONFIG_DISKDUMP=m -CONFIG_BLOCKDUMP=m - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m - -# -# Device Drivers -# - -# -# Texas Instruments PCILynx requires I2C bit-banging -# -CONFIG_IEEE1394_OHCI1394=m - -# -# Protocol Drivers -# -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -# CONFIG_BLK_DEV_RZ1000 is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=256 -CONFIG_SD_IOSTATS=y -CONFIG_SCSI_DUMP=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -CONFIG_SCSI_DEBUG_QUEUES=y -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -# CONFIG_SCSI_LOGGING is not set - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_BLK_DEV_3W_9XXX_RAID=m -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -# CONFIG_SCSI_ADP94XX is not set -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -CONFIG_SCSI_GDTH=m -# CONFIG_SCSI_GENERIC_NCR5380 is not set -CONFIG_SCSI_IPS=m -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -CONFIG_SCSI_NSP32=m -# CONFIG_SCSI_DEBUG is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -# CONFIG_EL1 is not set -# CONFIG_EL2 is not set -# CONFIG_ELPLUS is not set -# CONFIG_EL16 is not set -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -# CONFIG_APRICOT is not set -CONFIG_B44=m -# CONFIG_CS89x0 is not set -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -# CONFIG_DE4X5 is not set -# CONFIG_DGRS is not set -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -# CONFIG_LNE390 is not set -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_FORCEDETH=m -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -# CONFIG_SUNDANCE is not set -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NAPI=y -CONFIG_E1000_DISABLE_PACKET_SPLIT=y -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_BNX2=m - -# -# Quadrics QsNet device support -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_ELAN4=m -CONFIG_EP=m -CONFIG_EIP=m -CONFIG_RMS=m -CONFIG_JTAG=m -# CONFIG_FDDI is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -# CONFIG_TMS380TR is not set -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -CONFIG_SHAPER=m - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -# CONFIG_ECC is not set -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_HCDP=y -CONFIG_SERIAL_ACPI=y -CONFIG_HP_DIVA=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m - -# -# Joysticks -# -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -# CONFIG_INPUT_DB9 is not set -# CONFIG_INPUT_GAMECON is not set -# CONFIG_INPUT_TURBOGRAFX is not set -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_PANIC_STRING is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -CONFIG_HANGCHECK_TIMER=m -CONFIG_HANGCHECK_DELAY=m -# CONFIG_SCx200_GPIO is not set -CONFIG_INTEL_RNG=m -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -CONFIG_EFI_RTC=y -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -# CONFIG_AGP_I810 is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_AMD is not set -CONFIG_AGP_AMD_8151=y -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_SWORKS is not set -CONFIG_AGP_I460=y -CONFIG_AGP_HP_ZX1=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set - -# -# DRM 4.1 drivers -# -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_GAMMA=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -# CONFIG_DRM_I810 is not set -# CONFIG_DRM_I810_XFREE_41 is not set -# CONFIG_DRM_I830 is not set -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_DIRECTIO=y -CONFIG_NFS_ACL=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -CONFIG_EFI_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -# CONFIG_FB_RIVA is not set -# CONFIG_FB_CLGEN is not set -# CONFIG_FB_PM2 is not set -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VGA16=m -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -# CONFIG_FB_ATY is not set -# CONFIG_FB_RADEON is not set -CONFIG_FB_ATY128=m -# CONFIG_FB_INTEL is not set -# CONFIG_FB_SIS is not set -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_CFB8=m -CONFIG_FBCON_CFB16=m -CONFIG_FBCON_CFB24=m -CONFIG_FBCON_CFB32=m -CONFIG_FBCON_VGA_PLANES=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_PCI_CONSOLE=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -# CONFIG_SOUND_BT878 is not set -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -# CONFIG_SOUND_AD1816 is not set -CONFIG_SOUND_AD1889=m -# CONFIG_SOUND_SGALAXY is not set -# CONFIG_SOUND_ADLIB is not set -# CONFIG_SOUND_ACI_MIXER is not set -# CONFIG_SOUND_CS4232 is not set -# CONFIG_SOUND_SSCAPE is not set -# CONFIG_SOUND_GUS is not set -CONFIG_SOUND_VMIDI=m -# CONFIG_SOUND_TRIX is not set -# CONFIG_SOUND_MSS is not set -# CONFIG_SOUND_MPU401 is not set -# CONFIG_SOUND_NM256 is not set -# CONFIG_SOUND_MAD16 is not set -# CONFIG_SOUND_PAS is not set -# CONFIG_PAS_JOYSTICK is not set -# CONFIG_SOUND_PSS is not set -# CONFIG_SOUND_SB is not set -# CONFIG_SOUND_AWE32_SYNTH is not set -# CONFIG_SOUND_KAHLUA is not set -# CONFIG_SOUND_WAVEFRONT is not set -# CONFIG_SOUND_MAUI is not set -# CONFIG_SOUND_YM3812 is not set -# CONFIG_SOUND_OPL3SA1 is not set -# CONFIG_SOUND_OPL3SA2 is not set -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -# CONFIG_SOUND_UART6850 is not set -# CONFIG_SOUND_AEDSP16 is not set -# CONFIG_SOUND_TVMIXER is not set -CONFIG_SOUND_AD1980=m -CONFIG_SOUND_WM97XX=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set - -# -# USB Host Controller Drivers -# -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m - -# -# USB Device Class drivers -# -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set - -# -# USB Bluetooth can only be used with disabled Bluetooth subsystem -# -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m - -# -# USB Multimedia devices -# - -# -# Video4Linux support is needed for USB Multimedia device support -# - -# -# USB Network adaptors -# -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -# CONFIG_USB_AX8817X is not set -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m - -# -# USB port drivers -# -# CONFIG_USB_USS720 is not set - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m - -# -# USB Miscellaneous drivers -# -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_TEST=m - -# -# Library routines -# -CONFIG_CRC32=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_QSORT=y - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -# CONFIG_BLUEZ_HCIDTL1 is not set -# CONFIG_BLUEZ_HCIBT3C is not set -# CONFIG_BLUEZ_HCIBLUECARD is not set -# CONFIG_BLUEZ_HCIBTUART is not set -CONFIG_BLUEZ_HCIVHCI=m - -# -# Simulated drivers -# -# CONFIG_HP_SIMETH is not set -# CONFIG_HP_SIMSERIAL is not set -# CONFIG_HP_SIMSCSI is not set - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -# CONFIG_CRYPTO_AEP is not set -CONFIG_CRYPTO_BROADCOM=m -# CONFIG_MEGARAC is not set -CONFIG_FC_QLA2100=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m -# CONFIG_SCSI_IPR is not set -CONFIG_SCSI_LPFC=m -# CONFIG_FUSION_ISENSE is not set -# CONFIG_DCDBAS is not set -CONFIG_DELL_RBU=m - -# -# Kernel hacking -# -CONFIG_IA64_GRANULE_16MB=y -# CONFIG_IA64_GRANULE_64MB is not set -CONFIG_DEBUG_KERNEL=y -CONFIG_IA64_PRINT_HAZARDS=y -# CONFIG_DISABLE_VHPT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_IA64_EARLY_PRINTK is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_IA64_DEBUG_CMPXCHG is not set -# CONFIG_IA64_DEBUG_IRQ is not set -CONFIG_KALLSYMS=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64-smp.config deleted file mode 100644 index 0d9818a..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64-smp.config +++ /dev/null @@ -1,1787 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_X86_64=y -CONFIG_X86=y -# CONFIG_ISA is not set -# CONFIG_SBUS is not set -# CONFIG_UID16 is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -CONFIG_X86_CMPXCHG=y -CONFIG_EARLY_PRINTK=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -# CONFIG_MK8 is not set -CONFIG_IA32E=y -# CONFIG_GENERIC_CPU is not set -CONFIG_X86_L1_CACHE_BYTES=64 -CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_TSC=y -CONFIG_X86_GOOD_APIC=y -# CONFIG_MICROCODE is not set -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_MATH_EMULATION is not set -# CONFIG_MCA is not set -# CONFIG_EISA is not set -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_MTRR=y -CONFIG_SMP=y -CONFIG_HPET_TIMER=y -CONFIG_GART_IOMMU=y -CONFIG_SWIOTLB=y -CONFIG_NR_SIBLINGS_0=y -# CONFIG_NR_SIBLINGS_2 is not set -CONFIG_HAVE_DEC_LOCK=y -CONFIG_MCE=y -CONFIG_K8_NUMA=y -CONFIG_DISCONTIGMEM=y -CONFIG_NUMA=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 - -# -# General setup -# -CONFIG_NET=y -CONFIG_PCI=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_IBM is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_AUDIT=m -CONFIG_PM=y -CONFIG_IA32_EMULATION=y -CONFIG_COMPAT=y -CONFIG_ACPI=y - -# -# ACPI Support -# -CONFIG_ACPI=y -# CONFIG_ACPI_HT_ONLY is not set -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_EC=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y -# CONFIG_ACPI_SLEEP is not set -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_THERMAL=m -# CONFIG_ACPI_NUMA is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set -# CONFIG_ACPI_DEBUG is not set -# CONFIG_ACPI_PMTMR is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Block devices -# -CONFIG_BLK_DEV_FD=m -# CONFIG_BLK_DEV_XD is not set -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m - -# -# Parallel IDE high-level drivers -# -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m - -# -# Parallel IDE protocol modules -# -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -# CONFIG_CISS_MONITOR_THREAD is not set -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y -CONFIG_DISKDUMP=m -CONFIG_BLOCKDUMP=m - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_RECENT=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_TUNNEL=m -CONFIG_XFRM=y -CONFIG_XFRM_USER=y -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m - -# -# -# -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -CONFIG_EDP2=m -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set -# CONFIG_PHONE_IXJ is not set -# CONFIG_PHONE_IXJ_PCMCIA is not set - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -# CONFIG_BLK_DEV_RZ1000 is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=256 -CONFIG_SD_IOSTATS=y -CONFIG_SCSI_DUMP=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_BLK_DEV_3W_9XXX_RAID=m -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -# CONFIG_SCSI_ADP94XX is not set -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -CONFIG_SCSI_GDTH=m -# CONFIG_SCSI_GENERIC_NCR5380 is not set -CONFIG_SCSI_IPS=m -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_SEAGATE=m -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -# CONFIG_SCSI_DEBUG is not set - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m - -# -# Device Drivers -# - -# -# Texas Instruments PCILynx requires I2C bit-banging -# -CONFIG_IEEE1394_OHCI1394=m - -# -# Protocol Drivers -# -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -# CONFIG_EL1 is not set -# CONFIG_EL2 is not set -# CONFIG_ELPLUS is not set -# CONFIG_EL16 is not set -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -# CONFIG_LANCE is not set -CONFIG_NET_VENDOR_SMC=y -# CONFIG_WD80x3 is not set -# CONFIG_ULTRAMCA is not set -# CONFIG_ULTRA is not set -# CONFIG_ULTRA32 is not set -# CONFIG_SMC9194 is not set -CONFIG_NET_VENDOR_RACAL=y -# CONFIG_NI5010 is not set -# CONFIG_NI52 is not set -# CONFIG_NI65 is not set -CONFIG_HP100=m -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -# CONFIG_APRICOT is not set -CONFIG_B44=m -# CONFIG_CS89x0 is not set -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -# CONFIG_LNE390 is not set -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_FORCEDETH=m -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -# CONFIG_DE600 is not set -# CONFIG_DE620 is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NAPI=y -CONFIG_E1000_DISABLE_PACKET_SPLIT=y -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_BNX2=m - -# -# Quadrics QsNet device support -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_ELAN4=m -CONFIG_EP=m -CONFIG_EIP=m -CONFIG_RMS=m -CONFIG_JTAG=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -# CONFIG_NETCONSOLE is not set -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_NET_WIRELESS=y - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -# CONFIG_TMS380TR is not set -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -# CONFIG_CYCLADES_SYNC is not set -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y - -# -# IrDA options -# -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m - -# -# Dongle support -# -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m - -# -# FIR device drivers -# -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# low-level hardware drivers -# - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y - -# -# D-channel protocol features -# -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 - -# -# HiSax supported cards -# -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -# CONFIG_HISAX_AVM_A1_PCMCIA is not set -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -# CONFIG_HISAX_SEDLBAUER_CS is not set -# CONFIG_HISAX_ELSA_CS is not set -# CONFIG_HISAX_AVM_A1_CS is not set -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -# CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON is not set -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -# CONFIG_ISDN_DRV_AVMB1_B1ISA is not set -# CONFIG_ISDN_DRV_AVMB1_B1PCI is not set -# CONFIG_ISDN_DRV_AVMB1_B1PCIV4 is not set -# CONFIG_ISDN_DRV_AVMB1_T1ISA is not set -# CONFIG_ISDN_DRV_AVMB1_B1PCMCIA is not set -# CONFIG_ISDN_DRV_AVMB1_AVM_CS is not set -# CONFIG_ISDN_DRV_AVMB1_T1PCI is not set -# CONFIG_ISDN_DRV_AVMB1_C4 is not set -# CONFIG_HYSDN is not set -# CONFIG_HYSDN_CAPI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -# CONFIG_ECC is not set -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_ACPI=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_ATI_CD1865=m -# CONFIG_COMPUTONE is not set -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -# CONFIG_ESPSERIAL is not set -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -# CONFIG_ICOM is not set -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m - -# -# Joysticks -# -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_PANIC_STRING is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -# CONFIG_AMD7XX_TCO is not set -CONFIG_HANGCHECK_TIMER=m -CONFIG_HANGCHECK_DELAY=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -# CONFIG_AMD_PM768 is not set -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_AGP_NVIDIA=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set - -# -# DRM 4.1 drivers -# -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_GAMMA=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set -CONFIG_MWAVE=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -# CONFIG_I2C_PARPORT is not set - -# -# Video Adapters -# -# CONFIG_VIDEO_BT848 is not set -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -# CONFIG_VIDEO_SAA5249 is not set -# CONFIG_TUNER_3036 is not set -CONFIG_VIDEO_STRADIS=m -# CONFIG_VIDEO_ZORAN is not set -# CONFIG_VIDEO_ZORAN_BUZ is not set -# CONFIG_VIDEO_ZORAN_DC10 is not set -# CONFIG_VIDEO_ZORAN_LML33 is not set -# CONFIG_VIDEO_ZR36120 is not set -# CONFIG_VIDEO_MEYE is not set - -# -# Radio Adapters -# -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -# CONFIG_RADIO_MIROPCM20 is not set - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_DIRECTIO=y -CONFIG_NFS_ACL=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -# CONFIG_FB_INTEL is not set -# CONFIG_FB_SIS is not set -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_TEST=m - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -# CONFIG_SOUND_BT878 is not set -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -# CONFIG_SOUND_AD1816 is not set -CONFIG_SOUND_AD1889=m -# CONFIG_SOUND_SGALAXY is not set -# CONFIG_SOUND_ADLIB is not set -# CONFIG_SOUND_ACI_MIXER is not set -# CONFIG_SOUND_CS4232 is not set -# CONFIG_SOUND_SSCAPE is not set -# CONFIG_SOUND_GUS is not set -CONFIG_SOUND_VMIDI=m -# CONFIG_SOUND_TRIX is not set -# CONFIG_SOUND_MSS is not set -# CONFIG_SOUND_MPU401 is not set -# CONFIG_SOUND_NM256 is not set -# CONFIG_SOUND_MAD16 is not set -# CONFIG_SOUND_PAS is not set -# CONFIG_PAS_JOYSTICK is not set -# CONFIG_SOUND_PSS is not set -# CONFIG_SOUND_SB is not set -# CONFIG_SOUND_AWE32_SYNTH is not set -# CONFIG_SOUND_KAHLUA is not set -# CONFIG_SOUND_WAVEFRONT is not set -# CONFIG_SOUND_MAUI is not set -# CONFIG_SOUND_YM3812 is not set -# CONFIG_SOUND_OPL3SA1 is not set -# CONFIG_SOUND_OPL3SA2 is not set -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -# CONFIG_SOUND_UART6850 is not set -# CONFIG_SOUND_AEDSP16 is not set -# CONFIG_SOUND_TVMIXER is not set -# CONFIG_SOUND_AD1980 is not set -# CONFIG_SOUND_WM97XX is not set - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set - -# -# USB Host Controller Drivers -# -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m - -# -# USB Device Class drivers -# -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set - -# -# USB Bluetooth can only be used with disabled Bluetooth subsystem -# -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m - -# -# USB Multimedia devices -# -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m - -# -# USB Network adaptors -# -CONFIG_USB_PEGASUS=m -# CONFIG_USB_RTL8150 is not set -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -# CONFIG_USB_AX8817X is not set -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m - -# -# USB port drivers -# -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m - -# -# USB Miscellaneous drivers -# -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -# CONFIG_BLUEZ_HCIDTL1 is not set -# CONFIG_BLUEZ_HCIBT3C is not set -# CONFIG_BLUEZ_HCIBLUECARD is not set -# CONFIG_BLUEZ_HCIBTUART is not set -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -# CONFIG_CRYPTO_AEP is not set -CONFIG_CRYPTO_BROADCOM=m -# CONFIG_MEGARAC is not set -CONFIG_FC_QLA2100=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m -# CONFIG_SCSI_IPR is not set -CONFIG_SCSI_LPFC=m -# CONFIG_FUSION_ISENSE is not set -# CONFIG_DCDBAS is not set -CONFIG_DELL_RBU=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_SLAB is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -CONFIG_CHECKING=y -CONFIG_INIT_DEBUG=y -# CONFIG_IOMMU_DEBUG is not set -# CONFIG_IOMMU_LEAK is not set -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_KALLSYMS=y - -# -# Library routines -# -CONFIG_CRC32=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_QSORT=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64.config deleted file mode 100644 index 0d9818a..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64.config +++ /dev/null @@ -1,1787 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_X86_64=y -CONFIG_X86=y -# CONFIG_ISA is not set -# CONFIG_SBUS is not set -# CONFIG_UID16 is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -CONFIG_X86_CMPXCHG=y -CONFIG_EARLY_PRINTK=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -# CONFIG_MK8 is not set -CONFIG_IA32E=y -# CONFIG_GENERIC_CPU is not set -CONFIG_X86_L1_CACHE_BYTES=64 -CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_TSC=y -CONFIG_X86_GOOD_APIC=y -# CONFIG_MICROCODE is not set -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_MATH_EMULATION is not set -# CONFIG_MCA is not set -# CONFIG_EISA is not set -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_MTRR=y -CONFIG_SMP=y -CONFIG_HPET_TIMER=y -CONFIG_GART_IOMMU=y -CONFIG_SWIOTLB=y -CONFIG_NR_SIBLINGS_0=y -# CONFIG_NR_SIBLINGS_2 is not set -CONFIG_HAVE_DEC_LOCK=y -CONFIG_MCE=y -CONFIG_K8_NUMA=y -CONFIG_DISCONTIGMEM=y -CONFIG_NUMA=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 - -# -# General setup -# -CONFIG_NET=y -CONFIG_PCI=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_IBM is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_AUDIT=m -CONFIG_PM=y -CONFIG_IA32_EMULATION=y -CONFIG_COMPAT=y -CONFIG_ACPI=y - -# -# ACPI Support -# -CONFIG_ACPI=y -# CONFIG_ACPI_HT_ONLY is not set -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_EC=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y -# CONFIG_ACPI_SLEEP is not set -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_THERMAL=m -# CONFIG_ACPI_NUMA is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set -# CONFIG_ACPI_DEBUG is not set -# CONFIG_ACPI_PMTMR is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Block devices -# -CONFIG_BLK_DEV_FD=m -# CONFIG_BLK_DEV_XD is not set -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m - -# -# Parallel IDE high-level drivers -# -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m - -# -# Parallel IDE protocol modules -# -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -# CONFIG_CISS_MONITOR_THREAD is not set -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y -CONFIG_DISKDUMP=m -CONFIG_BLOCKDUMP=m - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_RECENT=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_TUNNEL=m -CONFIG_XFRM=y -CONFIG_XFRM_USER=y -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m - -# -# -# -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -CONFIG_EDP2=m -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set -# CONFIG_PHONE_IXJ is not set -# CONFIG_PHONE_IXJ_PCMCIA is not set - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set - -# -# IDE chipset support/bugfixes -# -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -# CONFIG_BLK_DEV_RZ1000 is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=256 -CONFIG_SD_IOSTATS=y -CONFIG_SCSI_DUMP=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_BLK_DEV_3W_9XXX_RAID=m -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -# CONFIG_SCSI_ADP94XX is not set -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -CONFIG_SCSI_GDTH=m -# CONFIG_SCSI_GENERIC_NCR5380 is not set -CONFIG_SCSI_IPS=m -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_SEAGATE=m -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -# CONFIG_SCSI_DEBUG is not set - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m - -# -# Device Drivers -# - -# -# Texas Instruments PCILynx requires I2C bit-banging -# -CONFIG_IEEE1394_OHCI1394=m - -# -# Protocol Drivers -# -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -# CONFIG_EL1 is not set -# CONFIG_EL2 is not set -# CONFIG_ELPLUS is not set -# CONFIG_EL16 is not set -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -# CONFIG_LANCE is not set -CONFIG_NET_VENDOR_SMC=y -# CONFIG_WD80x3 is not set -# CONFIG_ULTRAMCA is not set -# CONFIG_ULTRA is not set -# CONFIG_ULTRA32 is not set -# CONFIG_SMC9194 is not set -CONFIG_NET_VENDOR_RACAL=y -# CONFIG_NI5010 is not set -# CONFIG_NI52 is not set -# CONFIG_NI65 is not set -CONFIG_HP100=m -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -# CONFIG_APRICOT is not set -CONFIG_B44=m -# CONFIG_CS89x0 is not set -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -# CONFIG_LNE390 is not set -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_FORCEDETH=m -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -# CONFIG_DE600 is not set -# CONFIG_DE620 is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NAPI=y -CONFIG_E1000_DISABLE_PACKET_SPLIT=y -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_BNX2=m - -# -# Quadrics QsNet device support -# -CONFIG_QSNET=m -CONFIG_ELAN3=m -CONFIG_ELAN4=m -CONFIG_EP=m -CONFIG_EIP=m -CONFIG_RMS=m -CONFIG_JTAG=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -# CONFIG_NETCONSOLE is not set -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_NET_WIRELESS=y - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -# CONFIG_TMS380TR is not set -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -# CONFIG_RCPCI is not set -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -# CONFIG_CYCLADES_SYNC is not set -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y - -# -# IrDA options -# -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m - -# -# Dongle support -# -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m - -# -# FIR device drivers -# -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# low-level hardware drivers -# - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y - -# -# D-channel protocol features -# -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 - -# -# HiSax supported cards -# -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -# CONFIG_HISAX_AVM_A1_PCMCIA is not set -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -# CONFIG_HISAX_SEDLBAUER_CS is not set -# CONFIG_HISAX_ELSA_CS is not set -# CONFIG_HISAX_AVM_A1_CS is not set -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -# CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON is not set -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -# CONFIG_ISDN_DRV_AVMB1_B1ISA is not set -# CONFIG_ISDN_DRV_AVMB1_B1PCI is not set -# CONFIG_ISDN_DRV_AVMB1_B1PCIV4 is not set -# CONFIG_ISDN_DRV_AVMB1_T1ISA is not set -# CONFIG_ISDN_DRV_AVMB1_B1PCMCIA is not set -# CONFIG_ISDN_DRV_AVMB1_AVM_CS is not set -# CONFIG_ISDN_DRV_AVMB1_T1PCI is not set -# CONFIG_ISDN_DRV_AVMB1_C4 is not set -# CONFIG_HYSDN is not set -# CONFIG_HYSDN_CAPI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -# CONFIG_ECC is not set -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_ACPI=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_ATI_CD1865=m -# CONFIG_COMPUTONE is not set -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -# CONFIG_ESPSERIAL is not set -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -# CONFIG_ICOM is not set -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m - -# -# Joysticks -# -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_PANIC_STRING is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -# CONFIG_AMD7XX_TCO is not set -CONFIG_HANGCHECK_TIMER=m -CONFIG_HANGCHECK_DELAY=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -# CONFIG_AMD_PM768 is not set -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_AGP_NVIDIA=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set - -# -# DRM 4.1 drivers -# -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_GAMMA=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set -CONFIG_MWAVE=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -# CONFIG_I2C_PARPORT is not set - -# -# Video Adapters -# -# CONFIG_VIDEO_BT848 is not set -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -# CONFIG_VIDEO_SAA5249 is not set -# CONFIG_TUNER_3036 is not set -CONFIG_VIDEO_STRADIS=m -# CONFIG_VIDEO_ZORAN is not set -# CONFIG_VIDEO_ZORAN_BUZ is not set -# CONFIG_VIDEO_ZORAN_DC10 is not set -# CONFIG_VIDEO_ZORAN_LML33 is not set -# CONFIG_VIDEO_ZR36120 is not set -# CONFIG_VIDEO_MEYE is not set - -# -# Radio Adapters -# -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -# CONFIG_RADIO_MIROPCM20 is not set - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_DIRECTIO=y -CONFIG_NFS_ACL=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -# CONFIG_FB_INTEL is not set -# CONFIG_FB_SIS is not set -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_TEST=m - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -# CONFIG_SOUND_BT878 is not set -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -# CONFIG_SOUND_AD1816 is not set -CONFIG_SOUND_AD1889=m -# CONFIG_SOUND_SGALAXY is not set -# CONFIG_SOUND_ADLIB is not set -# CONFIG_SOUND_ACI_MIXER is not set -# CONFIG_SOUND_CS4232 is not set -# CONFIG_SOUND_SSCAPE is not set -# CONFIG_SOUND_GUS is not set -CONFIG_SOUND_VMIDI=m -# CONFIG_SOUND_TRIX is not set -# CONFIG_SOUND_MSS is not set -# CONFIG_SOUND_MPU401 is not set -# CONFIG_SOUND_NM256 is not set -# CONFIG_SOUND_MAD16 is not set -# CONFIG_SOUND_PAS is not set -# CONFIG_PAS_JOYSTICK is not set -# CONFIG_SOUND_PSS is not set -# CONFIG_SOUND_SB is not set -# CONFIG_SOUND_AWE32_SYNTH is not set -# CONFIG_SOUND_KAHLUA is not set -# CONFIG_SOUND_WAVEFRONT is not set -# CONFIG_SOUND_MAUI is not set -# CONFIG_SOUND_YM3812 is not set -# CONFIG_SOUND_OPL3SA1 is not set -# CONFIG_SOUND_OPL3SA2 is not set -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -# CONFIG_SOUND_UART6850 is not set -# CONFIG_SOUND_AEDSP16 is not set -# CONFIG_SOUND_TVMIXER is not set -# CONFIG_SOUND_AD1980 is not set -# CONFIG_SOUND_WM97XX is not set - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set - -# -# USB Host Controller Drivers -# -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m - -# -# USB Device Class drivers -# -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set - -# -# USB Bluetooth can only be used with disabled Bluetooth subsystem -# -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m - -# -# USB Human Interface Devices (HID) -# -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m - -# -# USB Imaging devices -# -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m - -# -# USB Multimedia devices -# -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m - -# -# USB Network adaptors -# -CONFIG_USB_PEGASUS=m -# CONFIG_USB_RTL8150 is not set -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -# CONFIG_USB_AX8817X is not set -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m - -# -# USB port drivers -# -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m - -# -# USB Miscellaneous drivers -# -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -# CONFIG_BLUEZ_HCIDTL1 is not set -# CONFIG_BLUEZ_HCIBT3C is not set -# CONFIG_BLUEZ_HCIBLUECARD is not set -# CONFIG_BLUEZ_HCIBTUART is not set -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -# CONFIG_CRYPTO_AEP is not set -CONFIG_CRYPTO_BROADCOM=m -# CONFIG_MEGARAC is not set -CONFIG_FC_QLA2100=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m -# CONFIG_SCSI_IPR is not set -CONFIG_SCSI_LPFC=m -# CONFIG_FUSION_ISENSE is not set -# CONFIG_DCDBAS is not set -CONFIG_DELL_RBU=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_SLAB is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -CONFIG_CHECKING=y -CONFIG_INIT_DEBUG=y -# CONFIG_IOMMU_DEBUG is not set -# CONFIG_IOMMU_LEAK is not set -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_KALLSYMS=y - -# -# Library routines -# -CONFIG_CRC32=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_QSORT=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686-smp.config deleted file mode 100644 index c369622..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686-smp.config +++ /dev/null @@ -1,2383 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -# CONFIG_M586NOCX8 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -CONFIG_X86_CMPXCHG8=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -# CONFIG_CPU_FREQ is not set -CONFIG_TOSHIBA=m -# CONFIG_OMNIBOOK is not set -CONFIG_I8K=m -CONFIG_THINKPAD=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -# CONFIG_HIGHMEM4G is not set -CONFIG_HIGHMEM64G=y -CONFIG_HIGHMEM=y -CONFIG_X86_PAE=y -CONFIG_FORCE_MAX_ZONEORDER=10 -CONFIG_1GB=y -# CONFIG_2GB is not set -# CONFIG_3GB is not set -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -CONFIG_ES7000=y -CONFIG_X86_CLUSTERED_APIC=y -CONFIG_X86_NUMA=y -# CONFIG_X86_NUMAQ is not set -CONFIG_X86_SUMMIT=y -CONFIG_X86_CLUSTERED_APIC=y -CONFIG_HAVE_DEC_LOCK=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 - -# -# General setup -# -CONFIG_NET=y -CONFIG_EVLOG=y -CONFIG_EVLOG_BUFSIZE=128 -CONFIG_EVLOG_FWPRINTK=y -# CONFIG_EVLOG_PRINTKWLOC is not set -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=m -CONFIG_HOTPLUG_PCI_COMPAQ=m -CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y -CONFIG_HOTPLUG_PCI_IBM=m -CONFIG_HOTPLUG_PCI_ACPI=m -CONFIG_HOTPLUG_PCI_AMD=m -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_MAX_USER_RT_PRIO=100 -CONFIG_MAX_RT_PRIO=0 -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_AUDIT=m -CONFIG_PM=y -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -CONFIG_APM_DO_ENABLE=y -# CONFIG_APM_CPU_IDLE is not set -CONFIG_APM_DISPLAY_BLANK=y -# CONFIG_APM_RTC_IS_GMT is not set -CONFIG_APM_ALLOW_INTS=y -CONFIG_SISBUG=m - -# -# ACPI Support -# -CONFIG_ACPI=y -# CONFIG_ACPI_HT_ONLY is not set -CONFIG_ACPI_INITRD=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_EC=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y -CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_THERMAL=m -CONFIG_ACPI_ASUS=m -CONFIG_ACPI_TOSHIBA=m -# CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_RELAXED_AML=y -CONFIG_PROC_MM=y - -# -# Binary emulation of other systems -# -CONFIG_ABI=m -CONFIG_ABI_SVR4=m -CONFIG_ABI_UW7=m -CONFIG_ABI_SOLARIS=m -CONFIG_ABI_IBCS=m -CONFIG_ABI_ISC=m -CONFIG_ABI_SCO=m -CONFIG_ABI_WYSE=m -CONFIG_BINFMT_COFF=m -CONFIG_BINFMT_XOUT=m -CONFIG_BINFMT_XOUT_X286=y -CONFIG_ABI_SPX=y -CONFIG_ABI_XTI=y -CONFIG_ABI_TLI_OPTMGMT=y -# CONFIG_ABI_XTI_OPTMGMT is not set - -# -# Memory Technology Devices (MTD) -# -CONFIG_MTD=m -# CONFIG_MTD_DEBUG is not set -CONFIG_MTD_PARTITIONS=m -CONFIG_MTD_CONCAT=m -CONFIG_MTD_REDBOOT_PARTS=m -CONFIG_MTD_CMDLINE_PARTS=m -CONFIG_MTD_CHAR=m -CONFIG_MTD_BLOCK=m -CONFIG_MTD_BLOCK_RO=m -CONFIG_FTL=m -CONFIG_NFTL=m -# CONFIG_NFTL_RW is not set - -# -# RAM/ROM/Flash chip drivers -# -CONFIG_MTD_CFI=m -CONFIG_MTD_JEDECPROBE=m -CONFIG_MTD_GEN_PROBE=m -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_NOSWAP=y -# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_GEOMETRY is not set -CONFIG_MTD_CFI_INTELEXT=m -CONFIG_MTD_CFI_AMDSTD=m -CONFIG_MTD_CFI_STAA=m -# CONFIG_MTD_RAM is not set -# CONFIG_MTD_ROM is not set -CONFIG_MTD_ABSENT=m -CONFIG_MTD_OBSOLETE_CHIPS=y -CONFIG_MTD_AMDSTD=m -CONFIG_MTD_SHARP=m -CONFIG_MTD_JEDEC=m - -# -# Mapping drivers for chip access -# -CONFIG_MTD_PHYSMAP=m -CONFIG_MTD_PHYSMAP_START=8000000 -CONFIG_MTD_PHYSMAP_LEN=4000000 -CONFIG_MTD_PHYSMAP_BUSWIDTH=2 -CONFIG_MTD_PNC2000=m -CONFIG_MTD_SC520CDP=m -CONFIG_MTD_NETSC520=m -CONFIG_MTD_SBC_GXX=m -CONFIG_MTD_ELAN_104NC=m -CONFIG_MTD_DILNETPC=m -CONFIG_MTD_DILNETPC_BOOTSIZE=80000 -CONFIG_MTD_MIXMEM=m -CONFIG_MTD_OCTAGON=m -CONFIG_MTD_VMAX=m -CONFIG_MTD_SCx200_DOCFLASH=m -CONFIG_MTD_L440GX=m -CONFIG_MTD_AMD76XROM=m -CONFIG_MTD_ICH2ROM=m -CONFIG_MTD_NETtel=m -CONFIG_MTD_SCB2_FLASH=m -CONFIG_MTD_PCI=m -CONFIG_MTD_PCMCIA=m - -# -# Self-contained MTD device drivers -# -CONFIG_MTD_PMC551=m -CONFIG_MTD_PMC551_BUGFIX=y -# CONFIG_MTD_PMC551_DEBUG is not set -CONFIG_MTD_SLRAM=m -CONFIG_MTD_MTDRAM=m -CONFIG_MTDRAM_TOTAL_SIZE=4096 -CONFIG_MTDRAM_ERASE_SIZE=128 -CONFIG_MTD_BLKMTD=m -CONFIG_MTD_DOC1000=m -CONFIG_MTD_DOC2000=m -CONFIG_MTD_DOC2001=m -CONFIG_MTD_DOCPROBE=m -CONFIG_MTD_DOCPROBE_ADVANCED=y -CONFIG_MTD_DOCPROBE_ADDRESS=0000 -CONFIG_MTD_DOCPROBE_HIGH=y -CONFIG_MTD_DOCPROBE_55AA=y - -# -# NAND Flash Device Drivers -# -CONFIG_MTD_NAND=m -# CONFIG_MTD_NAND_VERIFY_WRITE is not set -CONFIG_MTD_NAND_IDS=m - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -CONFIG_PARPORT_PC_FIFO=y -CONFIG_PARPORT_PC_SUPERIO=y -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -CONFIG_PARPORT_OTHER=y -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=m - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_CIPHER_TWOFISH=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=64000 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y - -# -# Enterprise Volume Management System -# -CONFIG_EVMS=m -CONFIG_EVMS_LOCAL_DEV_MGR=m -CONFIG_EVMS_DOS_SEGMENT_MGR=m -CONFIG_EVMS_GPT_SEGMENT_MGR=m -CONFIG_EVMS_SNAPSHOT=m -CONFIG_EVMS_DRIVELINK=m -CONFIG_EVMS_BBR=m -CONFIG_EVMS_LVM=m -CONFIG_EVMS_MD=m -CONFIG_EVMS_MD_LINEAR=m -CONFIG_EVMS_MD_RAID0=m -CONFIG_EVMS_MD_RAID1=m -CONFIG_EVMS_MD_RAID5=m -CONFIG_EVMS_AIX=m -CONFIG_EVMS_OS2=m -# CONFIG_EVMS_INFO_CRITICAL is not set -# CONFIG_EVMS_INFO_SERIOUS is not set -# CONFIG_EVMS_INFO_ERROR is not set -# CONFIG_EVMS_INFO_WARNING is not set -CONFIG_EVMS_INFO_DEFAULT=y -# CONFIG_EVMS_INFO_DETAILS is not set -# CONFIG_EVMS_INFO_DEBUG is not set -# CONFIG_EVMS_INFO_EXTRA is not set -# CONFIG_EVMS_INFO_ENTRY_EXIT is not set -# CONFIG_EVMS_INFO_EVERYTHING is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m -CONFIG_BLK_DEV_DM=m -CONFIG_BLK_DEV_DM_MIRROR=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=y -CONFIG_CIPHERS=y -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_3DES=m -CONFIG_CIPHER_NULL=m -CONFIG_CIPHER_DES=m -CONFIG_DIGESTS=y -CONFIG_DIGEST_MD5=m -CONFIG_DIGEST_SHA1=m -CONFIG_CRYPTODEV=y -CONFIG_CRYPTOLOOP=m -CONFIG_CRYPTOLOOP_ATOMIC=y -# CONFIG_CRYPTOLOOP_IV_HACK is not set -# CONFIG_CRYPTOLOOP_DEBUG is not set - -# -# Networking options -# -CONFIG_PACKET=m -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=m -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IPSEC=m -CONFIG_IPSEC_DEBUG=y -CONFIG_IPSEC_DEBUG_DISABLE_DEFAULT=y -CONFIG_IPSEC_TUNNEL=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -CONFIG_TUX_EXTENDED_LOG=y -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -CONFIG_INET_ECN=y -CONFIG_SYN_COOKIES=y -CONFIG_IP_IPSEC=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_PSD=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_IPLIMIT=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_STRING=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -# CONFIG_IP_NF_NAT_LOCAL is not set -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -# CONFIG_IPV6_DEBUG is not set -CONFIG_IPV6_IM=y -CONFIG_IPV6_MODULE_IP_GRE=y -CONFIG_IPV6_ISATAP=y -CONFIG_IPV6_PREFIXLIST=y -CONFIG_IPV6_6TO4_NEXTHOP=y -CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_SUBTREES=y -# CONFIG_IPV6_MLD6_ALL_DONE is not set -# CONFIG_IPV6_NODEINFO is not set -CONFIG_IPV6_ZONE=y -# CONFIG_IPV6_ZONE_SITELOCAL is not set -CONFIG_IPV6_DROP_FAKE_V4MAPPED=y - -# -# IPv6: Netfilter Configuration -# -CONFIG_IP6_NF_QUEUE=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_IPV6_IPSEC=y -CONFIG_IPV6_IPSEC_TUNNEL=y -CONFIG_IPV6_IPV6_TUNNEL=m -CONFIG_IPV6_MOBILITY=m -CONFIG_IPV6_MOBILITY_CN=m -CONFIG_IPV6_MOBILITY_MN=m -CONFIG_IPV6_MOBILITY_HA=m -# CONFIG_IPV6_MOBILITY_DEBUG is not set -# CONFIG_SHARED_IPV6_CARDS is not set -CONFIG_KHTTPD=m -CONFIG_KHTTPD_IPV6=y -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -CONFIG_ATM_CLIP_NO_ICMP=y -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -# CONFIG_ATM_BR2684_IPFILTER is not set -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -# CONFIG_DECNET_ROUTER is not set -CONFIG_BRIDGE=m -CONFIG_X25=m -CONFIG_LAPB=m -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -CONFIG_ECONET=m -# CONFIG_ECONET_AUNUDP is not set -# CONFIG_ECONET_NATIVE is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -CONFIG_NET_SCH_ATM=y -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -CONFIG_NET_PKTGEN=m - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_IDEDISK_STROKE=y -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -CONFIG_BLK_DEV_CMD640_ENHANCED=y -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -CONFIG_BLK_DEV_OFFBOARD=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -CONFIG_IDEDMA_ONLYDISK=y -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=m -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_BLK_DEV_NS87415=y -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -CONFIG_PDC202XX_BURST=y -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -CONFIG_BLK_DEV_SC1200=y -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_BLK_DEV_CENATEK=y -CONFIG_IDE_CHIPSETS=y -CONFIG_BLK_DEV_4DRIVES=y -CONFIG_BLK_DEV_ALI14XX=y -CONFIG_BLK_DEV_DTC2278=y -CONFIG_BLK_DEV_HT6560B=y -# CONFIG_BLK_DEV_PDC4030 is not set -CONFIG_BLK_DEV_QD65XX=y -CONFIG_BLK_DEV_UMC8672=y -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_MAX_MAJORS=144 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -# CONFIG_BLK_DEV_SR_VENDOR is not set -CONFIG_SR_EXTRA_DEVS=32 -CONFIG_CHR_DEV_SCH=m -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -CONFIG_AIC7XXX_PROBE_EISA_VL=y -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=24 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_ATA=y -# CONFIG_SCSI_ATA_PATA is not set -# CONFIG_SCSI_ATA_ATAPI is not set -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_ATA_VIA=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -CONFIG_SCSI_EATA_LINKED_COMMANDS=y -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_LPFC=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -CONFIG_SCSI_GENERIC_NCR53C400=y -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS_OLD=m -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_PPSCSI=m -CONFIG_PPSCSI_T348=m -CONFIG_PPSCSI_T358=m -CONFIG_PPSCSI_VPI0=m -CONFIG_PPSCSI_VPI2=m -CONFIG_PPSCSI_ONSCSI=m -CONFIG_PPSCSI_SPARCSI=m -CONFIG_PPSCSI_EPSA2=m -CONFIG_PPSCSI_EPST=m -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=80 -CONFIG_SCSI_NCR53C8XX_PROFILE=y -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX_PQS_PDS=y -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_QLOGIC_QLA2XXX=y -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2100=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2200=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2300=m -CONFIG_SCSI_QLA2XXX_60500=y -CONFIG_SCSI_QLA2XXX_QLA2100_60500=m -CONFIG_SCSI_QLA2XXX_QLA2200_60500=m -CONFIG_SCSI_QLA2XXX_QLA2300_60500=m -CONFIG_SCSI_QLA2XXX_60650=y -CONFIG_SCSI_QLA2XXX_QLA2100_60650=m -CONFIG_SCSI_QLA2XXX_QLA2200_60650=m -CONFIG_SCSI_QLA2XXX_QLA2300_60650=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC395x_TRMS1040=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_PDC_ULTRA=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set -CONFIG_IEEE1394_OUI_DB=y - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -CONFIG_ARCNET=m -CONFIG_ARCNET_1201=m -CONFIG_ARCNET_1051=m -CONFIG_ARCNET_RAW=m -CONFIG_ARCNET_COM90xx=m -CONFIG_ARCNET_COM90xxIO=m -CONFIG_ARCNET_RIM_I=m -CONFIG_ARCNET_COM20020=m -CONFIG_ARCNET_COM20020_ISA=m -CONFIG_ARCNET_COM20020_PCI=m -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -CONFIG_EWRK3=m -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_PCNET32_OLD=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_NET_BCM4400=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NETGEAR_GA621=m -CONFIG_NETGEAR_GA622=m -CONFIG_NE2K_PCI=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NEW=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_SK98LIN_T1=y -CONFIG_SK98LIN_T3=y -CONFIG_SK98LIN_T8=y -CONFIG_SK98LIN_T6=y -CONFIG_SK98LIN_T9=y -CONFIG_SK98LIN_T4=y -CONFIG_SK98LIN_T7=y -CONFIG_SK98LIN_T2=y -CONFIG_SK98LIN_T5=y -CONFIG_SK9DLIN=m -CONFIG_TIGON3=m -CONFIG_NET_BROADCOM=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -CONFIG_HIPPI=y -CONFIG_ROADRUNNER=m -CONFIG_ROADRUNNER_LARGE_RINGS=y -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_TMSISA=m -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -CONFIG_COMX=m -CONFIG_COMX_HW_COMX=m -CONFIG_COMX_HW_LOCOMX=m -CONFIG_COMX_HW_MIXCOM=m -CONFIG_COMX_HW_MUNICH=m -CONFIG_COMX_PROTO_PPP=m -CONFIG_COMX_PROTO_LAPB=m -CONFIG_COMX_PROTO_FR=m -CONFIG_DSCC4=m -CONFIG_LANMEDIA=m -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -CONFIG_SYNCLINK_SYNCPPP=m -CONFIG_HDLC=m -CONFIG_HDLC_RAW=y -CONFIG_HDLC_CISCO=y -CONFIG_HDLC_FR=y -CONFIG_HDLC_PPP=y -CONFIG_HDLC_X25=y -CONFIG_N2=m -CONFIG_C101=m -CONFIG_FARSYNC=m -# CONFIG_HDLC_DEBUG_PKT is not set -# CONFIG_HDLC_DEBUG_HARD_HEADER is not set -# CONFIG_HDLC_DEBUG_ECN is not set -# CONFIG_HDLC_DEBUG_RINGS is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -# CONFIG_WANPIPE_FR is not set -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -CONFIG_LAPBETHER=m -CONFIG_X25_ASY=m -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -CONFIG_ARCNET_COM20020_CS=m -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -CONFIG_ATM_IDT77252_RCV_ALL=y -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -CONFIG_AX25_DAMA_SLAVE=y -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -CONFIG_MKISS=m -CONFIG_6PACK=m -CONFIG_BPQETHER=m -CONFIG_DMASCC=m -CONFIG_SCC=m -CONFIG_SCC_DELAY=y -CONFIG_SCC_TRXECHO=y -CONFIG_BAYCOM_SER_FDX=m -CONFIG_BAYCOM_SER_HDX=m -CONFIG_BAYCOM_PAR=m -CONFIG_BAYCOM_EPP=m -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -CONFIG_YAM=m - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -# CONFIG_IRDA_FAST_RR is not set -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_IPPP_FILTER=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y -CONFIG_ISDN_X25=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -CONFIG_ISDN_DIVERSION=m - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -# CONFIG_HISAX_DEBUG is not set -CONFIG_HISAX_TELES_CS=m -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -CONFIG_ISDN_DRV_SC=m -CONFIG_ISDN_DRV_ACT2000=m -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -CONFIG_ISDN_DRV_EICON_OLD=m -CONFIG_ISDN_DRV_EICON_PCI=y -CONFIG_ISDN_DRV_EICON_ISA=y -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -CONFIG_CD_NO_IDESCSI=y -CONFIG_AZTCD=m -CONFIG_GSCD=m -CONFIG_SBPCD=m -CONFIG_MCD=m -CONFIG_MCD_IRQ=11 -CONFIG_MCD_BASE=300 -CONFIG_MCDX=m -CONFIG_OPTCD=m -CONFIG_CM206=m -CONFIG_SJCD=m -CONFIG_ISP16_CDI=m -CONFIG_CDU31A=m -CONFIG_CDU535=m - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -CONFIG_HUB6=y -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -CONFIG_SYNCLINKMP=m -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -CONFIG_RIO=m -CONFIG_RIO_OLDPCI=y -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -CONFIG_PRINTER=m -# CONFIG_LP_CONSOLE is not set -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -CONFIG_SCx200_I2C=m -CONFIG_SCx200_I2C_SCL=12 -CONFIG_SCx200_I2C_SDA=13 -CONFIG_SCx200_ACB=m -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -CONFIG_I2C_AMD8111=m -CONFIG_I2C_I801=m -CONFIG_I2C_I810=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS645=m -CONFIG_I2C_SAVAGE4=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=y -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_FM801=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -CONFIG_QIC02_TAPE=m -CONFIG_QIC02_DYNCONF=y -CONFIG_IPMI_HANDLER=m -CONFIG_IPMI_PANIC_EVENT=y -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -CONFIG_MIXCOMWD=m -CONFIG_60XX_WDT=m -CONFIG_SC1200_WDT=m -CONFIG_SCx200_WDT=m -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -CONFIG_WDT_501=y -CONFIG_WDT_501_FAN=y -CONFIG_MACHZ_WDT=m -CONFIG_DEADMAN=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_AMD7XX_TCO=m -CONFIG_SCx200_GPIO=m -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -CONFIG_APPLICOM=m -CONFIG_SONYPI=m - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_AGP_NVIDIA=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m -CONFIG_IBMASM=y -CONFIG_IBMASM_ASM=m -CONFIG_IBMASM_SER=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -CONFIG_VIDEO_MEYE=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m -CONFIG_DXR3=y -CONFIG_EM8300=m -# CONFIG_EM8300_LOOPBACK is not set -# CONFIG_EM8300_UCODETIMEOUT is not set -# CONFIG_EM8300_DICOMFIX is not set -# CONFIG_EM8300_DICOMCTRL is not set -CONFIG_EM8300_DICOMPAL=y -CONFIG_ADV717X=m -# CONFIG_ADV717X_SWAP is not set -# CONFIG_ADV717X_PIXELPORT16BIT is not set -# CONFIG_ADV717X_PIXELPORTPAL is not set -CONFIG_BT865=m - -# -# File systems -# -CONFIG_QUOTA=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_QIFACE_COMPAT=y -# CONFIG_QIFACE_V1 is not set -CONFIG_QIFACE_V2=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_XATTR_USER=y -CONFIG_REISERFS_FS_XATTR_TRUSTED=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_ADFS_FS=m -# CONFIG_ADFS_FS_RW is not set -CONFIG_AFFS_FS=m -CONFIG_HFS_FS=m -CONFIG_HFSPLUS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -CONFIG_JBD_DEBUG=y -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FATX_FS=m -CONFIG_EFS_FS=m -CONFIG_JFFS_FS=m -CONFIG_JFFS_FS_VERBOSE=0 -CONFIG_JFFS_PROC_FS=y -CONFIG_JFFS2_FS=m -CONFIG_JFFS2_FS_DEBUG=0 -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -# CONFIG_JFS_DEBUG is not set -CONFIG_JFS_STATISTICS=y -CONFIG_MINIX_FS=y -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_DEBUG is not set -# CONFIG_NTFS_RW is not set -CONFIG_HPFS_FS=m -CONFIG_PROC_FS=y -CONFIG_PROC_CONFIG=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -CONFIG_QNX4FS_FS=m -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -CONFIG_EXT2_FS_XATTR_TRUSTED=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set -CONFIG_XFS_FS=m -CONFIG_XFS_POSIX_ACL=y -# CONFIG_XFS_RT is not set -CONFIG_XFS_QUOTA=y -CONFIG_XFS_DMAPI=y -# CONFIG_XFS_DEBUG is not set -# CONFIG_PAGEBUF_DEBUG is not set -CONFIG_OCFS_FS=m - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_ACL=y -CONFIG_NFS_DIRECTIO=y -CONFIG_ROOT_NFS=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_NFSD_FHALIAS=y -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_CIFS=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y -CONFIG_SMB_NLS_REMOTE="cp437" -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -CONFIG_ATARI_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_XBOX_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -# CONFIG_MINIX_SUBPARTITION is not set -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -CONFIG_ULTRIX_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_EFI_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -CONFIG_FB_PM2_FIFO_DISCONNECT=y -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -CONFIG_UNICON=y -CONFIG_UNICON_GB=m -CONFIG_UNICON_GBK=m -CONFIG_UNICON_BIG5=m -CONFIG_UNICON_JIS=m -CONFIG_UNICON_KSCM=m -CONFIG_FB_CYBER2000=m -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -CONFIG_FB_MATROX_PROC=m -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_INTEL=m -# CONFIG_FB_SIS is not set -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_VMWARE_SVGA=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -CONFIG_FB_TRIDENT=m -# CONFIG_FB_VIRTUAL is not set -CONFIG_FBCON_SPLASHSCREEN=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_ADVANCED=y -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB2=m -CONFIG_FBCON_CFB4=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_AFB=m -CONFIG_FBCON_ILBM=m -CONFIG_FBCON_IPLAN2P2=m -CONFIG_FBCON_IPLAN2P4=m -CONFIG_FBCON_IPLAN2P8=m -CONFIG_FBCON_MAC=m -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_VGA=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -CONFIG_FBCON_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -# CONFIG_FONT_SUN8x16 is not set -# CONFIG_FONT_SUN12x22 is not set -# CONFIG_FONT_6x11 is not set -# CONFIG_FONT_PEARL_8x8 is not set -# CONFIG_FONT_ACORN_8x8 is not set - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -CONFIG_SOUND_TRACEINIT=y -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -# CONFIG_SOUND_GUS16 is not set -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -CONFIG_PSS_MIXER=y -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# Advanced Linux Sound Architecture -# -CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_OSSEMUL=y -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_VERBOSE_PRINTK=y -CONFIG_SND_DEBUG=y -CONFIG_SND_DEBUG_MEMORY=y -# CONFIG_SND_DEBUG_DETECT is not set - -# -# Generic devices -# -CONFIG_SND_DUMMY=m -CONFIG_SND_VIRMIDI=m -CONFIG_SND_MTPAV=m -CONFIG_SND_SERIAL_U16550=m -CONFIG_SND_MPU401=m -CONFIG_SND_SERIALMIDI=m - -# -# ISA devices -# -CONFIG_SND_AD1816A=m -CONFIG_SND_AD1848=m -CONFIG_SND_CS4231=m -CONFIG_SND_CS4232=m -CONFIG_SND_CS4236=m -CONFIG_SND_ES968=m -CONFIG_SND_ES1688=m -CONFIG_SND_ES18XX=m -CONFIG_SND_GUSCLASSIC=m -CONFIG_SND_GUSEXTREME=m -CONFIG_SND_GUSMAX=m -CONFIG_SND_INTERWAVE=m -CONFIG_SND_INTERWAVE_STB=m -CONFIG_SND_OPTI92X_AD1848=m -CONFIG_SND_OPTI92X_CS4231=m -CONFIG_SND_OPTI93X=m -CONFIG_SND_SB8=m -CONFIG_SND_SB16=m -CONFIG_SND_SBAWE=m -CONFIG_SND_SB16_CSP=y -CONFIG_SND_WAVEFRONT=m -CONFIG_SND_ALS100=m -CONFIG_SND_AZT2320=m -CONFIG_SND_CMI8330=m -CONFIG_SND_DT019X=m -CONFIG_SND_OPL3SA2=m -CONFIG_SND_SGALAXY=m -CONFIG_SND_SSCAPE=m -CONFIG_SND_MSND_PINNACLE=m - -# -# PCI devices -# -CONFIG_SND_ALI5451=m -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CS4281=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_PDPLUS=m -CONFIG_SND_KORG1212=m -CONFIG_SND_NM256=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_HDSP=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_YMFPCI=m -CONFIG_SND_ALS4000=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968=m -CONFIG_SND_MAESTRO3=m -CONFIG_SND_FM801=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_SONICVIBES=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VX222=m -CONFIG_SND_MIXART=m -CONFIG_SND_AZT3328=m - -# -# ALSA USB devices -# -CONFIG_SND_USB_AUDIO=m - -# -# ALSA PCMCIA devices -# -CONFIG_SND_VXPOCKET=m -CONFIG_SND_VXP440=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -CONFIG_USB_EMI26=m -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m -CONFIG_USB_DC2XX=m -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -CONFIG_USB_LOGITECH_CAM=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_AX8817X=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USBDNET=m -CONFIG_USB_USBDNET_VENDOR=0000 -CONFIG_USB_USBDNET_PRODUCT=0000 -CONFIG_USB_USBDNET_CLASS=0000 -CONFIG_USB_USBDNET_SUBCLASS=0000 -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SAFE_SERIAL_VENDOR=0000 -CONFIG_USB_SAFE_SERIAL_PRODUCT=0000 -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m -CONFIG_USB_SPEEDTOUCH=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y -CONFIG_BLUEZ_CMTP=m - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -# CONFIG_BLUEZ_USB_ZERO_PACKET is not set -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIBFUSB=m -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Kernel hacking -# -CONFIG_DUMP=m -CONFIG_DUMP_COMPRESS_RLE=m -CONFIG_DUMP_COMPRESS_GZIP=m -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_HZ is not set -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_KMSGDUMP is not set -# CONFIG_DEBUG_SPINLOCK is not set -CONFIG_FRAME_POINTER=y -# CONFIG_HIGHMEM_EMULATION is not set -# CONFIG_X86_REMOTE_DEBUG is not set -# CONFIG_KDB is not set -# CONFIG_KDB_MODULES is not set -CONFIG_KALLSYMS=y -# CONFIG_HOOK is not set -CONFIG_VTUNE=m - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_QSORT=y -CONFIG_FW_LOADER=m - -# -# Build options -# -# CONFIG_SUSE_KERNEL is not set -CONFIG_UNITEDLINUX_KERNEL=y -CONFIG_CFGNAME="smp" -CONFIG_RELEASE="273" diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686.config deleted file mode 100644 index c369622..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-sles-2.4-i686.config +++ /dev/null @@ -1,2383 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -# CONFIG_M586NOCX8 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -CONFIG_X86_CMPXCHG8=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -# CONFIG_CPU_FREQ is not set -CONFIG_TOSHIBA=m -# CONFIG_OMNIBOOK is not set -CONFIG_I8K=m -CONFIG_THINKPAD=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -# CONFIG_HIGHMEM4G is not set -CONFIG_HIGHMEM64G=y -CONFIG_HIGHMEM=y -CONFIG_X86_PAE=y -CONFIG_FORCE_MAX_ZONEORDER=10 -CONFIG_1GB=y -# CONFIG_2GB is not set -# CONFIG_3GB is not set -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -CONFIG_ES7000=y -CONFIG_X86_CLUSTERED_APIC=y -CONFIG_X86_NUMA=y -# CONFIG_X86_NUMAQ is not set -CONFIG_X86_SUMMIT=y -CONFIG_X86_CLUSTERED_APIC=y -CONFIG_HAVE_DEC_LOCK=y -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 - -# -# General setup -# -CONFIG_NET=y -CONFIG_EVLOG=y -CONFIG_EVLOG_BUFSIZE=128 -CONFIG_EVLOG_FWPRINTK=y -# CONFIG_EVLOG_PRINTKWLOC is not set -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=m -CONFIG_HOTPLUG_PCI_COMPAQ=m -CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y -CONFIG_HOTPLUG_PCI_IBM=m -CONFIG_HOTPLUG_PCI_ACPI=m -CONFIG_HOTPLUG_PCI_AMD=m -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_MAX_USER_RT_PRIO=100 -CONFIG_MAX_RT_PRIO=0 -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_AUDIT=m -CONFIG_PM=y -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -CONFIG_APM_DO_ENABLE=y -# CONFIG_APM_CPU_IDLE is not set -CONFIG_APM_DISPLAY_BLANK=y -# CONFIG_APM_RTC_IS_GMT is not set -CONFIG_APM_ALLOW_INTS=y -CONFIG_SISBUG=m - -# -# ACPI Support -# -CONFIG_ACPI=y -# CONFIG_ACPI_HT_ONLY is not set -CONFIG_ACPI_INITRD=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_EC=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y -CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_THERMAL=m -CONFIG_ACPI_ASUS=m -CONFIG_ACPI_TOSHIBA=m -# CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_RELAXED_AML=y -CONFIG_PROC_MM=y - -# -# Binary emulation of other systems -# -CONFIG_ABI=m -CONFIG_ABI_SVR4=m -CONFIG_ABI_UW7=m -CONFIG_ABI_SOLARIS=m -CONFIG_ABI_IBCS=m -CONFIG_ABI_ISC=m -CONFIG_ABI_SCO=m -CONFIG_ABI_WYSE=m -CONFIG_BINFMT_COFF=m -CONFIG_BINFMT_XOUT=m -CONFIG_BINFMT_XOUT_X286=y -CONFIG_ABI_SPX=y -CONFIG_ABI_XTI=y -CONFIG_ABI_TLI_OPTMGMT=y -# CONFIG_ABI_XTI_OPTMGMT is not set - -# -# Memory Technology Devices (MTD) -# -CONFIG_MTD=m -# CONFIG_MTD_DEBUG is not set -CONFIG_MTD_PARTITIONS=m -CONFIG_MTD_CONCAT=m -CONFIG_MTD_REDBOOT_PARTS=m -CONFIG_MTD_CMDLINE_PARTS=m -CONFIG_MTD_CHAR=m -CONFIG_MTD_BLOCK=m -CONFIG_MTD_BLOCK_RO=m -CONFIG_FTL=m -CONFIG_NFTL=m -# CONFIG_NFTL_RW is not set - -# -# RAM/ROM/Flash chip drivers -# -CONFIG_MTD_CFI=m -CONFIG_MTD_JEDECPROBE=m -CONFIG_MTD_GEN_PROBE=m -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_NOSWAP=y -# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_GEOMETRY is not set -CONFIG_MTD_CFI_INTELEXT=m -CONFIG_MTD_CFI_AMDSTD=m -CONFIG_MTD_CFI_STAA=m -# CONFIG_MTD_RAM is not set -# CONFIG_MTD_ROM is not set -CONFIG_MTD_ABSENT=m -CONFIG_MTD_OBSOLETE_CHIPS=y -CONFIG_MTD_AMDSTD=m -CONFIG_MTD_SHARP=m -CONFIG_MTD_JEDEC=m - -# -# Mapping drivers for chip access -# -CONFIG_MTD_PHYSMAP=m -CONFIG_MTD_PHYSMAP_START=8000000 -CONFIG_MTD_PHYSMAP_LEN=4000000 -CONFIG_MTD_PHYSMAP_BUSWIDTH=2 -CONFIG_MTD_PNC2000=m -CONFIG_MTD_SC520CDP=m -CONFIG_MTD_NETSC520=m -CONFIG_MTD_SBC_GXX=m -CONFIG_MTD_ELAN_104NC=m -CONFIG_MTD_DILNETPC=m -CONFIG_MTD_DILNETPC_BOOTSIZE=80000 -CONFIG_MTD_MIXMEM=m -CONFIG_MTD_OCTAGON=m -CONFIG_MTD_VMAX=m -CONFIG_MTD_SCx200_DOCFLASH=m -CONFIG_MTD_L440GX=m -CONFIG_MTD_AMD76XROM=m -CONFIG_MTD_ICH2ROM=m -CONFIG_MTD_NETtel=m -CONFIG_MTD_SCB2_FLASH=m -CONFIG_MTD_PCI=m -CONFIG_MTD_PCMCIA=m - -# -# Self-contained MTD device drivers -# -CONFIG_MTD_PMC551=m -CONFIG_MTD_PMC551_BUGFIX=y -# CONFIG_MTD_PMC551_DEBUG is not set -CONFIG_MTD_SLRAM=m -CONFIG_MTD_MTDRAM=m -CONFIG_MTDRAM_TOTAL_SIZE=4096 -CONFIG_MTDRAM_ERASE_SIZE=128 -CONFIG_MTD_BLKMTD=m -CONFIG_MTD_DOC1000=m -CONFIG_MTD_DOC2000=m -CONFIG_MTD_DOC2001=m -CONFIG_MTD_DOCPROBE=m -CONFIG_MTD_DOCPROBE_ADVANCED=y -CONFIG_MTD_DOCPROBE_ADDRESS=0000 -CONFIG_MTD_DOCPROBE_HIGH=y -CONFIG_MTD_DOCPROBE_55AA=y - -# -# NAND Flash Device Drivers -# -CONFIG_MTD_NAND=m -# CONFIG_MTD_NAND_VERIFY_WRITE is not set -CONFIG_MTD_NAND_IDS=m - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -CONFIG_PARPORT_PC_FIFO=y -CONFIG_PARPORT_PC_SUPERIO=y -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -CONFIG_PARPORT_OTHER=y -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=m - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_CIPHER_TWOFISH=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=64000 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y - -# -# Enterprise Volume Management System -# -CONFIG_EVMS=m -CONFIG_EVMS_LOCAL_DEV_MGR=m -CONFIG_EVMS_DOS_SEGMENT_MGR=m -CONFIG_EVMS_GPT_SEGMENT_MGR=m -CONFIG_EVMS_SNAPSHOT=m -CONFIG_EVMS_DRIVELINK=m -CONFIG_EVMS_BBR=m -CONFIG_EVMS_LVM=m -CONFIG_EVMS_MD=m -CONFIG_EVMS_MD_LINEAR=m -CONFIG_EVMS_MD_RAID0=m -CONFIG_EVMS_MD_RAID1=m -CONFIG_EVMS_MD_RAID5=m -CONFIG_EVMS_AIX=m -CONFIG_EVMS_OS2=m -# CONFIG_EVMS_INFO_CRITICAL is not set -# CONFIG_EVMS_INFO_SERIOUS is not set -# CONFIG_EVMS_INFO_ERROR is not set -# CONFIG_EVMS_INFO_WARNING is not set -CONFIG_EVMS_INFO_DEFAULT=y -# CONFIG_EVMS_INFO_DETAILS is not set -# CONFIG_EVMS_INFO_DEBUG is not set -# CONFIG_EVMS_INFO_EXTRA is not set -# CONFIG_EVMS_INFO_ENTRY_EXIT is not set -# CONFIG_EVMS_INFO_EVERYTHING is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m -CONFIG_BLK_DEV_DM=m -CONFIG_BLK_DEV_DM_MIRROR=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=y -CONFIG_CIPHERS=y -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_3DES=m -CONFIG_CIPHER_NULL=m -CONFIG_CIPHER_DES=m -CONFIG_DIGESTS=y -CONFIG_DIGEST_MD5=m -CONFIG_DIGEST_SHA1=m -CONFIG_CRYPTODEV=y -CONFIG_CRYPTOLOOP=m -CONFIG_CRYPTOLOOP_ATOMIC=y -# CONFIG_CRYPTOLOOP_IV_HACK is not set -# CONFIG_CRYPTOLOOP_DEBUG is not set - -# -# Networking options -# -CONFIG_PACKET=m -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=m -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IPSEC=m -CONFIG_IPSEC_DEBUG=y -CONFIG_IPSEC_DEBUG_DISABLE_DEFAULT=y -CONFIG_IPSEC_TUNNEL=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -CONFIG_TUX_EXTENDED_LOG=y -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -CONFIG_INET_ECN=y -CONFIG_SYN_COOKIES=y -CONFIG_IP_IPSEC=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_PSD=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_IPLIMIT=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_STRING=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -# CONFIG_IP_NF_NAT_LOCAL is not set -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -# CONFIG_IPV6_DEBUG is not set -CONFIG_IPV6_IM=y -CONFIG_IPV6_MODULE_IP_GRE=y -CONFIG_IPV6_ISATAP=y -CONFIG_IPV6_PREFIXLIST=y -CONFIG_IPV6_6TO4_NEXTHOP=y -CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_SUBTREES=y -# CONFIG_IPV6_MLD6_ALL_DONE is not set -# CONFIG_IPV6_NODEINFO is not set -CONFIG_IPV6_ZONE=y -# CONFIG_IPV6_ZONE_SITELOCAL is not set -CONFIG_IPV6_DROP_FAKE_V4MAPPED=y - -# -# IPv6: Netfilter Configuration -# -CONFIG_IP6_NF_QUEUE=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_IPV6_IPSEC=y -CONFIG_IPV6_IPSEC_TUNNEL=y -CONFIG_IPV6_IPV6_TUNNEL=m -CONFIG_IPV6_MOBILITY=m -CONFIG_IPV6_MOBILITY_CN=m -CONFIG_IPV6_MOBILITY_MN=m -CONFIG_IPV6_MOBILITY_HA=m -# CONFIG_IPV6_MOBILITY_DEBUG is not set -# CONFIG_SHARED_IPV6_CARDS is not set -CONFIG_KHTTPD=m -CONFIG_KHTTPD_IPV6=y -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -CONFIG_ATM_CLIP_NO_ICMP=y -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -# CONFIG_ATM_BR2684_IPFILTER is not set -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -# CONFIG_DECNET_ROUTER is not set -CONFIG_BRIDGE=m -CONFIG_X25=m -CONFIG_LAPB=m -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -CONFIG_ECONET=m -# CONFIG_ECONET_AUNUDP is not set -# CONFIG_ECONET_NATIVE is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -CONFIG_NET_SCH_ATM=y -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -CONFIG_NET_PKTGEN=m - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_IDEDISK_STROKE=y -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -CONFIG_BLK_DEV_CMD640_ENHANCED=y -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -CONFIG_BLK_DEV_OFFBOARD=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -CONFIG_IDEDMA_ONLYDISK=y -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=m -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_BLK_DEV_NS87415=y -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -CONFIG_PDC202XX_BURST=y -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -CONFIG_BLK_DEV_SC1200=y -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_BLK_DEV_CENATEK=y -CONFIG_IDE_CHIPSETS=y -CONFIG_BLK_DEV_4DRIVES=y -CONFIG_BLK_DEV_ALI14XX=y -CONFIG_BLK_DEV_DTC2278=y -CONFIG_BLK_DEV_HT6560B=y -# CONFIG_BLK_DEV_PDC4030 is not set -CONFIG_BLK_DEV_QD65XX=y -CONFIG_BLK_DEV_UMC8672=y -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_MAX_MAJORS=144 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -# CONFIG_BLK_DEV_SR_VENDOR is not set -CONFIG_SR_EXTRA_DEVS=32 -CONFIG_CHR_DEV_SCH=m -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -CONFIG_AIC7XXX_PROBE_EISA_VL=y -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=24 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_ATA=y -# CONFIG_SCSI_ATA_PATA is not set -# CONFIG_SCSI_ATA_ATAPI is not set -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_ATA_VIA=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -CONFIG_SCSI_EATA_LINKED_COMMANDS=y -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_LPFC=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -CONFIG_SCSI_GENERIC_NCR53C400=y -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS_OLD=m -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_PPSCSI=m -CONFIG_PPSCSI_T348=m -CONFIG_PPSCSI_T358=m -CONFIG_PPSCSI_VPI0=m -CONFIG_PPSCSI_VPI2=m -CONFIG_PPSCSI_ONSCSI=m -CONFIG_PPSCSI_SPARCSI=m -CONFIG_PPSCSI_EPSA2=m -CONFIG_PPSCSI_EPST=m -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=80 -CONFIG_SCSI_NCR53C8XX_PROFILE=y -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX_PQS_PDS=y -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_QLOGIC_QLA2XXX=y -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2100=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2200=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2300=m -CONFIG_SCSI_QLA2XXX_60500=y -CONFIG_SCSI_QLA2XXX_QLA2100_60500=m -CONFIG_SCSI_QLA2XXX_QLA2200_60500=m -CONFIG_SCSI_QLA2XXX_QLA2300_60500=m -CONFIG_SCSI_QLA2XXX_60650=y -CONFIG_SCSI_QLA2XXX_QLA2100_60650=m -CONFIG_SCSI_QLA2XXX_QLA2200_60650=m -CONFIG_SCSI_QLA2XXX_QLA2300_60650=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC395x_TRMS1040=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_PDC_ULTRA=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set -CONFIG_IEEE1394_OUI_DB=y - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -CONFIG_ARCNET=m -CONFIG_ARCNET_1201=m -CONFIG_ARCNET_1051=m -CONFIG_ARCNET_RAW=m -CONFIG_ARCNET_COM90xx=m -CONFIG_ARCNET_COM90xxIO=m -CONFIG_ARCNET_RIM_I=m -CONFIG_ARCNET_COM20020=m -CONFIG_ARCNET_COM20020_ISA=m -CONFIG_ARCNET_COM20020_PCI=m -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -CONFIG_EWRK3=m -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_PCNET32_OLD=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_NET_BCM4400=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NETGEAR_GA621=m -CONFIG_NETGEAR_GA622=m -CONFIG_NE2K_PCI=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000_NEW=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_SK98LIN_T1=y -CONFIG_SK98LIN_T3=y -CONFIG_SK98LIN_T8=y -CONFIG_SK98LIN_T6=y -CONFIG_SK98LIN_T9=y -CONFIG_SK98LIN_T4=y -CONFIG_SK98LIN_T7=y -CONFIG_SK98LIN_T2=y -CONFIG_SK98LIN_T5=y -CONFIG_SK9DLIN=m -CONFIG_TIGON3=m -CONFIG_NET_BROADCOM=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -CONFIG_HIPPI=y -CONFIG_ROADRUNNER=m -CONFIG_ROADRUNNER_LARGE_RINGS=y -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_TMSISA=m -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -CONFIG_COMX=m -CONFIG_COMX_HW_COMX=m -CONFIG_COMX_HW_LOCOMX=m -CONFIG_COMX_HW_MIXCOM=m -CONFIG_COMX_HW_MUNICH=m -CONFIG_COMX_PROTO_PPP=m -CONFIG_COMX_PROTO_LAPB=m -CONFIG_COMX_PROTO_FR=m -CONFIG_DSCC4=m -CONFIG_LANMEDIA=m -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -CONFIG_SYNCLINK_SYNCPPP=m -CONFIG_HDLC=m -CONFIG_HDLC_RAW=y -CONFIG_HDLC_CISCO=y -CONFIG_HDLC_FR=y -CONFIG_HDLC_PPP=y -CONFIG_HDLC_X25=y -CONFIG_N2=m -CONFIG_C101=m -CONFIG_FARSYNC=m -# CONFIG_HDLC_DEBUG_PKT is not set -# CONFIG_HDLC_DEBUG_HARD_HEADER is not set -# CONFIG_HDLC_DEBUG_ECN is not set -# CONFIG_HDLC_DEBUG_RINGS is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -# CONFIG_WANPIPE_FR is not set -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -CONFIG_LAPBETHER=m -CONFIG_X25_ASY=m -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -CONFIG_ARCNET_COM20020_CS=m -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -CONFIG_ATM_IDT77252_RCV_ALL=y -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -CONFIG_AX25_DAMA_SLAVE=y -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -CONFIG_MKISS=m -CONFIG_6PACK=m -CONFIG_BPQETHER=m -CONFIG_DMASCC=m -CONFIG_SCC=m -CONFIG_SCC_DELAY=y -CONFIG_SCC_TRXECHO=y -CONFIG_BAYCOM_SER_FDX=m -CONFIG_BAYCOM_SER_HDX=m -CONFIG_BAYCOM_PAR=m -CONFIG_BAYCOM_EPP=m -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -CONFIG_YAM=m - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -# CONFIG_IRDA_FAST_RR is not set -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_IPPP_FILTER=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y -CONFIG_ISDN_X25=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -CONFIG_ISDN_DIVERSION=m - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -# CONFIG_HISAX_DEBUG is not set -CONFIG_HISAX_TELES_CS=m -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -CONFIG_ISDN_DRV_SC=m -CONFIG_ISDN_DRV_ACT2000=m -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -CONFIG_ISDN_DRV_EICON_OLD=m -CONFIG_ISDN_DRV_EICON_PCI=y -CONFIG_ISDN_DRV_EICON_ISA=y -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -CONFIG_CD_NO_IDESCSI=y -CONFIG_AZTCD=m -CONFIG_GSCD=m -CONFIG_SBPCD=m -CONFIG_MCD=m -CONFIG_MCD_IRQ=11 -CONFIG_MCD_BASE=300 -CONFIG_MCDX=m -CONFIG_OPTCD=m -CONFIG_CM206=m -CONFIG_SJCD=m -CONFIG_ISP16_CDI=m -CONFIG_CDU31A=m -CONFIG_CDU535=m - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -CONFIG_HUB6=y -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -CONFIG_SYNCLINKMP=m -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -CONFIG_RIO=m -CONFIG_RIO_OLDPCI=y -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -CONFIG_PRINTER=m -# CONFIG_LP_CONSOLE is not set -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -CONFIG_SCx200_I2C=m -CONFIG_SCx200_I2C_SCL=12 -CONFIG_SCx200_I2C_SDA=13 -CONFIG_SCx200_ACB=m -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -CONFIG_I2C_AMD8111=m -CONFIG_I2C_I801=m -CONFIG_I2C_I810=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS645=m -CONFIG_I2C_SAVAGE4=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=y -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_FM801=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -CONFIG_QIC02_TAPE=m -CONFIG_QIC02_DYNCONF=y -CONFIG_IPMI_HANDLER=m -CONFIG_IPMI_PANIC_EVENT=y -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -CONFIG_MIXCOMWD=m -CONFIG_60XX_WDT=m -CONFIG_SC1200_WDT=m -CONFIG_SCx200_WDT=m -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -CONFIG_WDT_501=y -CONFIG_WDT_501_FAN=y -CONFIG_MACHZ_WDT=m -CONFIG_DEADMAN=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_AMD7XX_TCO=m -CONFIG_SCx200_GPIO=m -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -CONFIG_APPLICOM=m -CONFIG_SONYPI=m - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_AGP_NVIDIA=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m -CONFIG_IBMASM=y -CONFIG_IBMASM_ASM=m -CONFIG_IBMASM_SER=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -CONFIG_VIDEO_MEYE=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m -CONFIG_DXR3=y -CONFIG_EM8300=m -# CONFIG_EM8300_LOOPBACK is not set -# CONFIG_EM8300_UCODETIMEOUT is not set -# CONFIG_EM8300_DICOMFIX is not set -# CONFIG_EM8300_DICOMCTRL is not set -CONFIG_EM8300_DICOMPAL=y -CONFIG_ADV717X=m -# CONFIG_ADV717X_SWAP is not set -# CONFIG_ADV717X_PIXELPORT16BIT is not set -# CONFIG_ADV717X_PIXELPORTPAL is not set -CONFIG_BT865=m - -# -# File systems -# -CONFIG_QUOTA=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_QIFACE_COMPAT=y -# CONFIG_QIFACE_V1 is not set -CONFIG_QIFACE_V2=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_XATTR_USER=y -CONFIG_REISERFS_FS_XATTR_TRUSTED=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_ADFS_FS=m -# CONFIG_ADFS_FS_RW is not set -CONFIG_AFFS_FS=m -CONFIG_HFS_FS=m -CONFIG_HFSPLUS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=m -CONFIG_JBD_DEBUG=y -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FATX_FS=m -CONFIG_EFS_FS=m -CONFIG_JFFS_FS=m -CONFIG_JFFS_FS_VERBOSE=0 -CONFIG_JFFS_PROC_FS=y -CONFIG_JFFS2_FS=m -CONFIG_JFFS2_FS_DEBUG=0 -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -# CONFIG_JFS_DEBUG is not set -CONFIG_JFS_STATISTICS=y -CONFIG_MINIX_FS=y -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_DEBUG is not set -# CONFIG_NTFS_RW is not set -CONFIG_HPFS_FS=m -CONFIG_PROC_FS=y -CONFIG_PROC_CONFIG=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -CONFIG_QNX4FS_FS=m -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -CONFIG_EXT2_FS_XATTR_TRUSTED=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set -CONFIG_XFS_FS=m -CONFIG_XFS_POSIX_ACL=y -# CONFIG_XFS_RT is not set -CONFIG_XFS_QUOTA=y -CONFIG_XFS_DMAPI=y -# CONFIG_XFS_DEBUG is not set -# CONFIG_PAGEBUF_DEBUG is not set -CONFIG_OCFS_FS=m - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_ACL=y -CONFIG_NFS_DIRECTIO=y -CONFIG_ROOT_NFS=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_NFSD_FHALIAS=y -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_CIFS=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y -CONFIG_SMB_NLS_REMOTE="cp437" -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -CONFIG_ATARI_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_XBOX_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -# CONFIG_MINIX_SUBPARTITION is not set -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -CONFIG_ULTRIX_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_EFI_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -CONFIG_FB_PM2_FIFO_DISCONNECT=y -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -CONFIG_UNICON=y -CONFIG_UNICON_GB=m -CONFIG_UNICON_GBK=m -CONFIG_UNICON_BIG5=m -CONFIG_UNICON_JIS=m -CONFIG_UNICON_KSCM=m -CONFIG_FB_CYBER2000=m -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -CONFIG_FB_MATROX_PROC=m -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_INTEL=m -# CONFIG_FB_SIS is not set -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_VMWARE_SVGA=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -CONFIG_FB_TRIDENT=m -# CONFIG_FB_VIRTUAL is not set -CONFIG_FBCON_SPLASHSCREEN=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_ADVANCED=y -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB2=m -CONFIG_FBCON_CFB4=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_AFB=m -CONFIG_FBCON_ILBM=m -CONFIG_FBCON_IPLAN2P2=m -CONFIG_FBCON_IPLAN2P4=m -CONFIG_FBCON_IPLAN2P8=m -CONFIG_FBCON_MAC=m -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_VGA=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -CONFIG_FBCON_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -# CONFIG_FONT_SUN8x16 is not set -# CONFIG_FONT_SUN12x22 is not set -# CONFIG_FONT_6x11 is not set -# CONFIG_FONT_PEARL_8x8 is not set -# CONFIG_FONT_ACORN_8x8 is not set - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -CONFIG_SOUND_TRACEINIT=y -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -# CONFIG_SOUND_GUS16 is not set -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -CONFIG_PSS_MIXER=y -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# Advanced Linux Sound Architecture -# -CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_OSSEMUL=y -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_VERBOSE_PRINTK=y -CONFIG_SND_DEBUG=y -CONFIG_SND_DEBUG_MEMORY=y -# CONFIG_SND_DEBUG_DETECT is not set - -# -# Generic devices -# -CONFIG_SND_DUMMY=m -CONFIG_SND_VIRMIDI=m -CONFIG_SND_MTPAV=m -CONFIG_SND_SERIAL_U16550=m -CONFIG_SND_MPU401=m -CONFIG_SND_SERIALMIDI=m - -# -# ISA devices -# -CONFIG_SND_AD1816A=m -CONFIG_SND_AD1848=m -CONFIG_SND_CS4231=m -CONFIG_SND_CS4232=m -CONFIG_SND_CS4236=m -CONFIG_SND_ES968=m -CONFIG_SND_ES1688=m -CONFIG_SND_ES18XX=m -CONFIG_SND_GUSCLASSIC=m -CONFIG_SND_GUSEXTREME=m -CONFIG_SND_GUSMAX=m -CONFIG_SND_INTERWAVE=m -CONFIG_SND_INTERWAVE_STB=m -CONFIG_SND_OPTI92X_AD1848=m -CONFIG_SND_OPTI92X_CS4231=m -CONFIG_SND_OPTI93X=m -CONFIG_SND_SB8=m -CONFIG_SND_SB16=m -CONFIG_SND_SBAWE=m -CONFIG_SND_SB16_CSP=y -CONFIG_SND_WAVEFRONT=m -CONFIG_SND_ALS100=m -CONFIG_SND_AZT2320=m -CONFIG_SND_CMI8330=m -CONFIG_SND_DT019X=m -CONFIG_SND_OPL3SA2=m -CONFIG_SND_SGALAXY=m -CONFIG_SND_SSCAPE=m -CONFIG_SND_MSND_PINNACLE=m - -# -# PCI devices -# -CONFIG_SND_ALI5451=m -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CS4281=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_PDPLUS=m -CONFIG_SND_KORG1212=m -CONFIG_SND_NM256=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_HDSP=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_YMFPCI=m -CONFIG_SND_ALS4000=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968=m -CONFIG_SND_MAESTRO3=m -CONFIG_SND_FM801=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_SONICVIBES=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VX222=m -CONFIG_SND_MIXART=m -CONFIG_SND_AZT3328=m - -# -# ALSA USB devices -# -CONFIG_SND_USB_AUDIO=m - -# -# ALSA PCMCIA devices -# -CONFIG_SND_VXPOCKET=m -CONFIG_SND_VXP440=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -CONFIG_USB_EMI26=m -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m -CONFIG_USB_DC2XX=m -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -CONFIG_USB_LOGITECH_CAM=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_AX8817X=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USBDNET=m -CONFIG_USB_USBDNET_VENDOR=0000 -CONFIG_USB_USBDNET_PRODUCT=0000 -CONFIG_USB_USBDNET_CLASS=0000 -CONFIG_USB_USBDNET_SUBCLASS=0000 -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SAFE_SERIAL_VENDOR=0000 -CONFIG_USB_SAFE_SERIAL_PRODUCT=0000 -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m -CONFIG_USB_SPEEDTOUCH=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y -CONFIG_BLUEZ_CMTP=m - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -# CONFIG_BLUEZ_USB_ZERO_PACKET is not set -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIBFUSB=m -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Kernel hacking -# -CONFIG_DUMP=m -CONFIG_DUMP_COMPRESS_RLE=m -CONFIG_DUMP_COMPRESS_GZIP=m -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_HZ is not set -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_KMSGDUMP is not set -# CONFIG_DEBUG_SPINLOCK is not set -CONFIG_FRAME_POINTER=y -# CONFIG_HIGHMEM_EMULATION is not set -# CONFIG_X86_REMOTE_DEBUG is not set -# CONFIG_KDB is not set -# CONFIG_KDB_MODULES is not set -CONFIG_KALLSYMS=y -# CONFIG_HOOK is not set -CONFIG_VTUNE=m - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_QSORT=y -CONFIG_FW_LOADER=m - -# -# Build options -# -# CONFIG_SUSE_KERNEL is not set -CONFIG_UNITEDLINUX_KERNEL=y -CONFIG_CFGNAME="smp" -CONFIG_RELEASE="273" diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config deleted file mode 100644 index d8c0697..0000000 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config +++ /dev/null @@ -1,2042 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86_64=y -CONFIG_X86=y -# CONFIG_ISA is not set -# CONFIG_SBUS is not set -CONFIG_UID16=y -CONFIG_X86_CMPXCHG=y -CONFIG_EARLY_PRINTK=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -# CONFIG_MK8 is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_L1_CACHE_BYTES=64 -CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_TSC=y -CONFIG_X86_GOOD_APIC=y - -# -# CPU Frequency scaling -# -CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=y -CONFIG_CPU_FREQ_PROC_INTF=y -CONFIG_CPU_FREQ_GOV_USERSPACE=y -CONFIG_CPU_FREQ_24_API=y -CONFIG_X86_POWERNOW_K8=m -# CONFIG_X86_POWERNOW_K8_DBG is not set -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_MATH_EMULATION is not set -# CONFIG_MCA is not set -# CONFIG_EISA is not set -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_MTRR=y -# CONFIG_SMP is not set -CONFIG_HPET_TIMER=y -CONFIG_GART_IOMMU=y -CONFIG_X86_UP_IOAPIC=y -CONFIG_MCE=y -# CONFIG_K8_NUMA is not set -# CONFIG_NOBIGSTACK is not set -CONFIG_STACK_SIZE_16KB=y -# CONFIG_STACK_SIZE_32KB is not set -# CONFIG_STACK_SIZE_64KB is not set -CONFIG_STACK_SIZE_SHIFT=2 - -# -# General setup -# -# CONFIG_DESKTOP is not set -CONFIG_NET=y -CONFIG_EVLOG=y -CONFIG_EVLOG_BUFSIZE=128 -CONFIG_EVLOG_FWPRINTK=y -# CONFIG_EVLOG_PRINTKWLOC is not set -CONFIG_PCI=y -CONFIG_PCI_DIRECT=y -# CONFIG_PCI_NAMES is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -CONFIG_HOTPLUG_PCI=m -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_IBM is not set -CONFIG_HOTPLUG_PCI_ACPI=m -CONFIG_HOTPLUG_PCI_AMD=m -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_MAX_USER_RT_PRIO=100 -CONFIG_MAX_RT_PRIO=0 -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_PM=y -CONFIG_IA32_EMULATION=y - -# -# ACPI Support -# -CONFIG_ACPI=y -# CONFIG_ACPI_HT_ONLY is not set -CONFIG_ACPI_INITRD=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_EC=y -CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y -CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_SYSTEM=y -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_THERMAL=m -CONFIG_ACPI_ASUS=m -CONFIG_ACPI_TOSHIBA=m -# CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_RELAXED_AML=y - -# -# Memory Technology Devices (MTD) -# -CONFIG_MTD=m -# CONFIG_MTD_DEBUG is not set -CONFIG_MTD_PARTITIONS=m -CONFIG_MTD_CONCAT=m -CONFIG_MTD_REDBOOT_PARTS=m -CONFIG_MTD_CMDLINE_PARTS=m -CONFIG_MTD_CHAR=m -CONFIG_MTD_BLOCK=m -CONFIG_MTD_BLOCK_RO=m -CONFIG_FTL=m -CONFIG_NFTL=m -# CONFIG_NFTL_RW is not set - -# -# RAM/ROM/Flash chip drivers -# -CONFIG_MTD_CFI=m -CONFIG_MTD_JEDECPROBE=m -CONFIG_MTD_GEN_PROBE=m -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_NOSWAP=y -# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_GEOMETRY is not set -CONFIG_MTD_CFI_INTELEXT=m -CONFIG_MTD_CFI_AMDSTD=m -CONFIG_MTD_CFI_STAA=m -# CONFIG_MTD_RAM is not set -# CONFIG_MTD_ROM is not set -CONFIG_MTD_ABSENT=m -CONFIG_MTD_OBSOLETE_CHIPS=y -CONFIG_MTD_AMDSTD=m -CONFIG_MTD_SHARP=m -CONFIG_MTD_JEDEC=m - -# -# Mapping drivers for chip access -# -CONFIG_MTD_PHYSMAP=m -CONFIG_MTD_PHYSMAP_START=8000000 -CONFIG_MTD_PHYSMAP_LEN=4000000 -CONFIG_MTD_PHYSMAP_BUSWIDTH=2 -CONFIG_MTD_PNC2000=m -CONFIG_MTD_SC520CDP=m -CONFIG_MTD_NETSC520=m -CONFIG_MTD_SBC_GXX=m -CONFIG_MTD_ELAN_104NC=m -CONFIG_MTD_DILNETPC=m -CONFIG_MTD_DILNETPC_BOOTSIZE=80000 -CONFIG_MTD_MIXMEM=m -CONFIG_MTD_OCTAGON=m -CONFIG_MTD_VMAX=m -CONFIG_MTD_SCx200_DOCFLASH=m -CONFIG_MTD_L440GX=m -# CONFIG_MTD_AMD76XROM is not set -CONFIG_MTD_ICH2ROM=m -CONFIG_MTD_NETtel=m -# CONFIG_MTD_SCB2_FLASH is not set -CONFIG_MTD_PCI=m -CONFIG_MTD_PCMCIA=m - -# -# Self-contained MTD device drivers -# -CONFIG_MTD_PMC551=m -CONFIG_MTD_PMC551_BUGFIX=y -# CONFIG_MTD_PMC551_DEBUG is not set -CONFIG_MTD_SLRAM=m -CONFIG_MTD_MTDRAM=m -CONFIG_MTDRAM_TOTAL_SIZE=4096 -CONFIG_MTDRAM_ERASE_SIZE=128 -CONFIG_MTD_BLKMTD=m -CONFIG_MTD_DOC1000=m -CONFIG_MTD_DOC2000=m -CONFIG_MTD_DOC2001=m -CONFIG_MTD_DOCPROBE=m -CONFIG_MTD_DOCPROBE_ADVANCED=y -CONFIG_MTD_DOCPROBE_ADDRESS=0000 -CONFIG_MTD_DOCPROBE_HIGH=y -CONFIG_MTD_DOCPROBE_55AA=y - -# -# NAND Flash Device Drivers -# -CONFIG_MTD_NAND=m -# CONFIG_MTD_NAND_VERIFY_WRITE is not set -CONFIG_MTD_NAND_IDS=m - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -CONFIG_PARPORT_PC_FIFO=y -CONFIG_PARPORT_PC_SUPERIO=y -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -CONFIG_PARPORT_OTHER=y -CONFIG_PARPORT_1284=y - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -# CONFIG_BLK_DEV_XD is not set -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_CIPHER_TWOFISH=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=128000 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m -CONFIG_BLK_DEV_DM=m -CONFIG_BLK_DEV_DM_MIRROR=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=y -CONFIG_CIPHERS=y -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_3DES=m -CONFIG_CIPHER_NULL=m -CONFIG_CIPHER_DES=m -CONFIG_DIGESTS=y -CONFIG_DIGEST_MD5=m -CONFIG_DIGEST_SHA1=m -CONFIG_CRYPTODEV=y -CONFIG_CRYPTOLOOP=m -CONFIG_CRYPTOLOOP_ATOMIC=y -# CONFIG_CRYPTOLOOP_IV_HACK is not set -# CONFIG_CRYPTOLOOP_DEBUG is not set - -# -# Networking options -# -CONFIG_PACKET=m -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=m -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IPSEC=m -CONFIG_IPSEC_DEBUG=y -CONFIG_IPSEC_DEBUG_DISABLE_DEFAULT=y -CONFIG_IPSEC_TUNNEL=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -CONFIG_TUX_EXTENDED_LOG=y -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -CONFIG_INET_ECN=y -CONFIG_SYN_COOKIES=y -CONFIG_IP_IPSEC=m - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_AMANDA=m -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_PSD=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_IPLIMIT=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_STRING=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_AMANDA=m -# CONFIG_IP_NF_NAT_LOCAL is not set -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m -# CONFIG_IPV6_DEBUG is not set -CONFIG_IPV6_IM=y -CONFIG_IPV6_MODULE_IP_GRE=y -CONFIG_IPV6_ISATAP=y -CONFIG_IPV6_PREFIXLIST=y -CONFIG_IPV6_6TO4_NEXTHOP=y -CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_SUBTREES=y -# CONFIG_IPV6_MLD6_ALL_DONE is not set -# CONFIG_IPV6_NODEINFO is not set -# CONFIG_IPV6_ZONE is not set -CONFIG_IPV6_DROP_FAKE_V4MAPPED=y - -# -# IPv6: Netfilter Configuration -# -CONFIG_IP6_NF_QUEUE=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_AHESP=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -CONFIG_IPV6_IPSEC=y -CONFIG_IPV6_IPSEC_TUNNEL=y -CONFIG_IPV6_IPV6_TUNNEL=m -CONFIG_IPV6_MOBILITY=m -CONFIG_IPV6_MOBILITY_CN=m -CONFIG_IPV6_MOBILITY_MN=m -CONFIG_IPV6_MOBILITY_HA=m -# CONFIG_IPV6_MOBILITY_DEBUG is not set -# CONFIG_SHARED_IPV6_CARDS is not set -CONFIG_KHTTPD=m -CONFIG_KHTTPD_IPV6=y -# CONFIG_ATM is not set -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -# CONFIG_DECNET_ROUTER is not set -CONFIG_BRIDGE=m -CONFIG_X25=m -CONFIG_LAPB=m -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -CONFIG_ECONET=m -# CONFIG_ECONET_AUNUDP is not set -# CONFIG_ECONET_NATIVE is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -CONFIG_NET_PKTGEN=m - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_IDEDISK_STROKE=y -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -CONFIG_BLK_DEV_CMD640_ENHANCED=y -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -CONFIG_BLK_DEV_OFFBOARD=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -CONFIG_IDEDMA_ONLYDISK=y -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -# CONFIG_BLK_DEV_ADMA100 is not set -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -# CONFIG_BLK_DEV_PIIX is not set -CONFIG_BLK_DEV_NS87415=y -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -CONFIG_PDC202XX_BURST=y -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_SC1200=y -# CONFIG_BLK_DEV_SVWKS is not set -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -CONFIG_BLK_DEV_TRM290=y -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_BLK_DEV_CENATEK=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_MAX_MAJORS=144 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -# CONFIG_BLK_DEV_SR_VENDOR is not set -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SCH=m -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -# CONFIG_SCSI_7000FASST is not set -CONFIG_SCSI_ACARD=m -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -CONFIG_AIC7XXX_PROBE_EISA_VL=y -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=24 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_MEGARAID2=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -# CONFIG_SCSI_DTC3280 is not set -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -CONFIG_SCSI_EATA_LINKED_COMMANDS=y -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_LPFC=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -CONFIG_SCSI_GENERIC_NCR53C400=y -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS_OLD=m -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_PPSCSI=m -CONFIG_PPSCSI_T348=m -CONFIG_PPSCSI_T358=m -CONFIG_PPSCSI_VPI0=m -CONFIG_PPSCSI_VPI2=m -CONFIG_PPSCSI_ONSCSI=m -CONFIG_PPSCSI_SPARCSI=m -CONFIG_PPSCSI_EPSA2=m -CONFIG_PPSCSI_EPST=m -# CONFIG_SCSI_NCR53C406A is not set -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=80 -CONFIG_SCSI_NCR53C8XX_PROFILE=y -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX_PQS_PDS=y -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -# CONFIG_SCSI_PAS16 is not set -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_QLOGIC_QLA2XXX=y -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2100=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2200=m -CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2300=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC395x_TRMS1040=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -# CONFIG_SCSI_T128 is not set -CONFIG_SCSI_U14_34F=m -CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set -# CONFIG_IEEE1394_OUI_DB is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -CONFIG_ARCNET=m -CONFIG_ARCNET_1201=m -CONFIG_ARCNET_1051=m -CONFIG_ARCNET_RAW=m -CONFIG_ARCNET_COM90xx=m -CONFIG_ARCNET_COM90xxIO=m -CONFIG_ARCNET_RIM_I=m -CONFIG_ARCNET_COM20020=m -# CONFIG_ARCNET_COM20020_ISA is not set -CONFIG_ARCNET_COM20020_PCI=m -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -# CONFIG_EL1 is not set -# CONFIG_EL2 is not set -# CONFIG_ELPLUS is not set -# CONFIG_EL16 is not set -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -# CONFIG_LANCE is not set -CONFIG_NET_VENDOR_SMC=y -# CONFIG_WD80x3 is not set -# CONFIG_ULTRAMCA is not set -# CONFIG_ULTRA is not set -# CONFIG_ULTRA32 is not set -# CONFIG_SMC9194 is not set -CONFIG_NET_VENDOR_RACAL=y -# CONFIG_NI5010 is not set -# CONFIG_NI52 is not set -# CONFIG_NI65 is not set -CONFIG_HP100=m -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_PCNET32_OLD=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -# CONFIG_APRICOT is not set -CONFIG_NET_BCM4400=m -# CONFIG_CS89x0 is not set -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=m -# CONFIG_LNE390 is not set -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NETGEAR_GA621=m -CONFIG_NETGEAR_GA622=m -CONFIG_NE2K_PCI=m -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -# CONFIG_DE600 is not set -# CONFIG_DE620 is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_SK98LIN_T1=y -CONFIG_SK98LIN_T3=y -CONFIG_SK98LIN_T8=y -CONFIG_SK98LIN_T6=y -CONFIG_SK98LIN_T9=y -CONFIG_SK98LIN_T4=y -CONFIG_SK98LIN_T7=y -CONFIG_SK98LIN_T2=y -CONFIG_SK98LIN_T5=y -CONFIG_SK9DLIN=m -CONFIG_TIGON3=m -CONFIG_NET_BROADCOM=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -CONFIG_HIPPI=y -CONFIG_ROADRUNNER=m -CONFIG_ROADRUNNER_LARGE_RINGS=y -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y - -# -# Token Ring devices -# -# CONFIG_TR is not set -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -CONFIG_ARCNET_COM20020_CS=m -# CONFIG_PCMCIA_IBMTR is not set -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_AIRONET4500_CS=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -CONFIG_AX25_DAMA_SLAVE=y -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -CONFIG_MKISS=m -CONFIG_6PACK=m -CONFIG_BPQETHER=m -CONFIG_SCC_DELAY=y -CONFIG_SCC_TRXECHO=y -CONFIG_BAYCOM_SER_FDX=m -CONFIG_BAYCOM_SER_HDX=m -CONFIG_BAYCOM_PAR=m -CONFIG_BAYCOM_EPP=m -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -CONFIG_YAM=m - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -# CONFIG_IRDA_FAST_RR is not set -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -# CONFIG_TOSHIBA_OLD is not set -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_IPPP_FILTER=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y -CONFIG_ISDN_X25=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -CONFIG_ISDN_DIVERSION=m - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -# CONFIG_HISAX_DEBUG is not set -# CONFIG_HISAX_TELES_CS is not set -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -# CONFIG_HISAX_AVM_A1_CS is not set -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m -CONFIG_USB_AUERISDN=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -CONFIG_ISDN_DRV_SC=m -CONFIG_ISDN_DRV_ACT2000=m -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -CONFIG_ISDN_DRV_EICON_OLD=m -CONFIG_ISDN_DRV_EICON_PCI=y -CONFIG_ISDN_DRV_EICON_ISA=y -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -# CONFIG_ISDN_DRV_AVMB1_B1ISA is not set -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -# CONFIG_ISDN_DRV_AVMB1_T1ISA is not set -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -CONFIG_HUB6=y -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -# CONFIG_ESPSERIAL is not set -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -CONFIG_SYNCLINKMP=m -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -CONFIG_PRINTER=m -# CONFIG_LP_CONSOLE is not set -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set -CONFIG_SCx200_I2C=m -CONFIG_SCx200_I2C_SCL=12 -CONFIG_SCx200_I2C_SDA=13 -CONFIG_SCx200_ACB=m -CONFIG_I2C_ALGOPCF=m -# CONFIG_I2C_ELEKTOR is not set -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -CONFIG_I2C_AMD8111=m -CONFIG_I2C_I801=m -# CONFIG_I2C_I810 is not set -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS645=m -CONFIG_I2C_SAVAGE4=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -# CONFIG_SENSORS_LM92 is not set -CONFIG_SENSORS_SIS5595=m -# CONFIG_SENSORS_SMSC47M1 is not set -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -# CONFIG_SENSORS_PCF8574 is not set -# CONFIG_SENSORS_PCF8591 is not set - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=y -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_FM801=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -CONFIG_QIC02_TAPE=m -CONFIG_QIC02_DYNCONF=y -CONFIG_IPMI_HANDLER=m -CONFIG_IPMI_PANIC_EVENT=y -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -# CONFIG_I810_TCO is not set -CONFIG_MIXCOMWD=m -CONFIG_60XX_WDT=m -CONFIG_SC1200_WDT=m -CONFIG_SCx200_WDT=m -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -CONFIG_WDT_501=y -CONFIG_WDT_501_FAN=y -CONFIG_MACHZ_WDT=m -CONFIG_DEADMAN=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_AMD7XX_TCO=m -CONFIG_SCx200_GPIO=m -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -CONFIG_APPLICOM=m - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -CONFIG_FT_PROC_FS=y -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=y -CONFIG_AGP_AMD_8151=y -# CONFIG_AGP_INTEL is not set -# CONFIG_AGP_I810 is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_AMD is not set -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_SWORKS is not set -# CONFIG_AGP_NVIDIA is not set -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -# CONFIG_DRM_SIS is not set - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -# CONFIG_VIDEO_MEYE is not set - -# -# Radio Adapters -# -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -# CONFIG_DXR3 is not set - -# -# File systems -# -CONFIG_QUOTA=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_QIFACE_COMPAT=y -# CONFIG_QIFACE_V1 is not set -CONFIG_QIFACE_V2=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_XATTR_USER=y -CONFIG_REISERFS_FS_XATTR_TRUSTED=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_ADFS_FS=m -# CONFIG_ADFS_FS_RW is not set -CONFIG_AFFS_FS=m -CONFIG_HFS_FS=m -CONFIG_HFSPLUS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_EXT3_FS_XATTR_TRUSTED=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_JBD=y -CONFIG_JBD_DEBUG=y -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FATX_FS=m -CONFIG_EFS_FS=m -CONFIG_JFFS_FS=m -CONFIG_JFFS_FS_VERBOSE=0 -CONFIG_JFFS_PROC_FS=y -CONFIG_JFFS2_FS=m -CONFIG_JFFS2_FS_DEBUG=0 -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -# CONFIG_JFS_DEBUG is not set -CONFIG_JFS_STATISTICS=y -CONFIG_MINIX_FS=y -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_DEBUG is not set -# CONFIG_NTFS_RW is not set -CONFIG_HPFS_FS=m -CONFIG_PROC_FS=y -CONFIG_PROC_CONFIG=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -CONFIG_QNX4FS_FS=m -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -CONFIG_EXT2_FS_XATTR_TRUSTED=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set -CONFIG_XFS_FS=m -CONFIG_XFS_POSIX_ACL=y -# CONFIG_XFS_RT is not set -CONFIG_XFS_QUOTA=y -CONFIG_XFS_DMAPI=y -# CONFIG_XFS_DEBUG is not set -# CONFIG_PAGEBUF_DEBUG is not set -# CONFIG_OCFS_FS is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_ACL=y -CONFIG_NFS_DIRECTIO=y -CONFIG_ROOT_NFS=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_ACL=y -CONFIG_NFSD_TCP=y -CONFIG_NFSD_FHALIAS=y -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_CIFS=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y -CONFIG_SMB_NLS_REMOTE="cp437" -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y -CONFIG_FS_POSIX_ACL=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -CONFIG_ATARI_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_XBOX_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -# CONFIG_MINIX_SUBPARTITION is not set -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -CONFIG_ULTRIX_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_EFI_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -CONFIG_FB_PM2_FIFO_DISCONNECT=y -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_UNICON is not set -# CONFIG_UNICON_GB is not set -# CONFIG_UNICON_GBK is not set -# CONFIG_UNICON_BIG5 is not set -# CONFIG_UNICON_JIS is not set -# CONFIG_UNICON_KSCM is not set -CONFIG_FB_CYBER2000=m -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -CONFIG_FB_MATROX_PROC=m -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -# CONFIG_FB_INTEL is not set -# CONFIG_FB_SIS is not set -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_VMWARE_SVGA=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -CONFIG_FB_TRIDENT=m -# CONFIG_FB_VIRTUAL is not set -CONFIG_FBCON_SPLASHSCREEN=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_ADVANCED=y -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB2=m -CONFIG_FBCON_CFB4=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_AFB=m -CONFIG_FBCON_ILBM=m -CONFIG_FBCON_IPLAN2P2=m -CONFIG_FBCON_IPLAN2P4=m -CONFIG_FBCON_IPLAN2P8=m -CONFIG_FBCON_MAC=m -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_VGA=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -CONFIG_FBCON_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -# CONFIG_FONT_SUN8x16 is not set -# CONFIG_FONT_SUN12x22 is not set -# CONFIG_FONT_6x11 is not set -# CONFIG_FONT_PEARL_8x8 is not set -# CONFIG_FONT_ACORN_8x8 is not set - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -CONFIG_SOUND_TRACEINIT=y -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -# CONFIG_SOUND_GUS16 is not set -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -CONFIG_PSS_MIXER=y -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_KAHLUA=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# Advanced Linux Sound Architecture -# -CONFIG_SND=m -CONFIG_SND_BIT32_EMUL=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_OSSEMUL=y -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_VERBOSE_PRINTK=y -CONFIG_SND_DEBUG=y -CONFIG_SND_DEBUG_MEMORY=y -# CONFIG_SND_DEBUG_DETECT is not set - -# -# Generic devices -# -CONFIG_SND_DUMMY=m -CONFIG_SND_VIRMIDI=m -CONFIG_SND_MTPAV=m -CONFIG_SND_SERIAL_U16550=m -CONFIG_SND_MPU401=m -CONFIG_SND_SERIALMIDI=m - -# -# PCI devices -# -CONFIG_SND_ALI5451=m -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CS4281=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_PDPLUS=m -CONFIG_SND_KORG1212=m -CONFIG_SND_NM256=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_HDSP=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_YMFPCI=m -CONFIG_SND_ALS4000=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968=m -CONFIG_SND_MAESTRO3=m -CONFIG_SND_FM801=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_SONICVIBES=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VX222=m -CONFIG_SND_MIXART=m -CONFIG_SND_AZT3328=m - -# -# ALSA USB devices -# -CONFIG_SND_USB_AUDIO=m - -# -# ALSA PCMCIA devices -# -CONFIG_SND_VXPOCKET=m -CONFIG_SND_VXP440=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -CONFIG_USB_EMI26=m -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_KBTAB=m -CONFIG_USB_POWERMATE=m -CONFIG_USB_DC2XX=m -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_KONICAWC=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -# CONFIG_USB_LOGITECH_CAM is not set -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_AX8817X=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USBDNET=m -CONFIG_USB_USBDNET_VENDOR=0000 -CONFIG_USB_USBDNET_PRODUCT=0000 -CONFIG_USB_USBDNET_CLASS=0000 -CONFIG_USB_USBDNET_SUBCLASS=0000 -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SAFE_SERIAL_VENDOR=0000 -CONFIG_USB_SAFE_SERIAL_PRODUCT=0000 -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m -# CONFIG_USB_SPEEDTOUCH is not set - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y -CONFIG_BLUEZ_CMTP=m - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_SCO=y -# CONFIG_BLUEZ_USB_ZERO_PACKET is not set -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIBFUSB=m -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_SLAB is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_CHECKING is not set -# CONFIG_INIT_DEBUG is not set -# CONFIG_X86_REMOTE_DEBUG is not set -# CONFIG_IOMMU_DEBUG is not set -# CONFIG_IOMMU_LEAK is not set -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_KALLSYMS=y - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_QSORT=y -CONFIG_FW_LOADER=m - -# -# Build options -# -CONFIG_SUSE_KERNEL=y -CONFIG_CFGNAME="default" -CONFIG_RELEASE=171 -CONFIG_SWIOTLB=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686-bigsmp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686-bigsmp.config index b35a129..013ad43 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686-bigsmp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686-bigsmp.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.27 -# Mon Mar 12 22:26:47 2007 +# Linux kernel version: 2.6.16.46 +# Tue Jul 3 17:56:03 2007 # CONFIG_X86_32=y CONFIG_SEMAPHORE_SLEEPERS=y @@ -25,6 +25,9 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SUSE_KERNEL=y +CONFIG_SLE_VERSION=10 +CONFIG_SLE_SP=1 +CONFIG_SLE_SP_SUBLEVEL=0 CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -32,12 +35,14 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y CONFIG_SYSCTL=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y +CONFIG_RELAY=y CONFIG_INITRAMFS_SOURCE="" CONFIG_UID16=y CONFIG_VM86=y @@ -79,6 +84,7 @@ CONFIG_STOP_MACHINE=y # Block layer # CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y # # IO Schedulers @@ -205,6 +211,7 @@ CONFIG_KEXEC=y CONFIG_PHYSICAL_START=0x100000 CONFIG_HOTPLUG_CPU=y CONFIG_DOUBLEFAULT=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # # Power management options (ACPI, APM) @@ -858,6 +865,7 @@ CONFIG_IEEE80211_CRYPT_TKIP=m CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=m # CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -1163,6 +1171,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -1185,15 +1194,15 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y # -# SCSI Transports +# SCSI Transport Attributes # CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set CONFIG_ISCSI_TARGET=m -CONFIG_SCSI_SAS_DOMAIN_ATTRS=m -CONFIG_SCSI_SAS_DOMAIN_DEBUG=y # # SCSI low-level drivers @@ -1223,29 +1232,15 @@ CONFIG_AIC79XX_REG_PRETTY_PRINT=y CONFIG_SCSI_AIC94XX=m CONFIG_AIC94XX_DEBUG=y CONFIG_SCSI_DPT_I2O=m +CONFIG_SCSI_ADVANSYS=m CONFIG_SCSI_IN2000=m +CONFIG_SCSI_ARCMSR=m CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=m -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -# CONFIG_SCSI_SATA_ULI is not set -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_ACPI=y +CONFIG_SCSI_HPTIOP=m CONFIG_SCSI_BUSLOGIC=m # CONFIG_SCSI_OMIT_FLASHPOINT is not set CONFIG_SCSI_DMX3191D=m @@ -1271,7 +1266,7 @@ CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_SYM53C8XX_MMIO=y CONFIG_SCSI_IPR=m CONFIG_SCSI_IPR_TRACE=y CONFIG_SCSI_IPR_DUMP=y @@ -1282,8 +1277,7 @@ CONFIG_SCSI_QLOGIC_FC=m CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y CONFIG_SCSI_QLOGIC_1280=m CONFIG_SCSI_QLA_FC=m -# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set -CONFIG_SCSI_QLA4XXX=m +CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_SCSI_SYM53C416=m CONFIG_SCSI_DC395x=m @@ -1300,13 +1294,76 @@ CONFIG_SCSI_DEBUG=m # # PCMCIA SCSI adapter support # -CONFIG_PCMCIA_AHA152X=m +# CONFIG_PCMCIA_AHA152X is not set CONFIG_PCMCIA_FDOMAIN=m CONFIG_PCMCIA_NINJA_SCSI=m CONFIG_PCMCIA_QLOGIC=m CONFIG_PCMCIA_SYM53C500=m # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=m +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y +CONFIG_ATA_ACPI=y +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +CONFIG_PATA_ARTOP=m +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CYPRESS is not set +CONFIG_PATA_EFAR=m +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +CONFIG_PATA_HPT37X=m +CONFIG_PATA_HPT3X2N=m +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_ISAPNP is not set +CONFIG_PATA_IT821X=m +CONFIG_PATA_IT8213=m +CONFIG_PATA_JMICRON=m +# CONFIG_PATA_LEGACY is not set +# CONFIG_PATA_TRIFLEX is not set +CONFIG_PATA_MARVELL=m +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_NETCELL=m +# CONFIG_PATA_NS87410 is not set +CONFIG_PATA_OPTI=m +CONFIG_PATA_OPTIDMA=m +# CONFIG_PATA_PCMCIA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_QDI is not set +CONFIG_PATA_RADISYS=m +CONFIG_PATA_RZ1000=m +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +CONFIG_PATA_SIL680=m +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_WINBOND_VLB is not set + +# # Old CD-ROM drivers (not SCSI, not IDE) # CONFIG_CD_NO_IDESCSI=y @@ -1338,6 +1395,8 @@ CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_MULTIPATH_HP_SW=m +CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_NL_EVT=y # @@ -1348,6 +1407,7 @@ CONFIG_FUSION_SPI=m CONFIG_FUSION_FC=m CONFIG_FUSION_SAS=m CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_MAX_FC_SGE=256 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m @@ -1548,6 +1608,7 @@ CONFIG_SK98LIN=m CONFIG_VIA_VELOCITY=m CONFIG_TIGON3=m CONFIG_BNX2=m +CONFIG_QLA3XXX=m # # Ethernet (10000 Mbit) @@ -1557,6 +1618,7 @@ CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y +CONFIG_NETXEN_NIC=m # # Token Ring devices @@ -2004,6 +2066,7 @@ CONFIG_SPECIALIX=m # CONFIG_SPECIALIX_RTSCTS is not set CONFIG_SX=m CONFIG_STALDRV=y +CONFIG_NOZOMI=m # # Serial drivers @@ -2154,6 +2217,7 @@ CONFIG_HANGCHECK_TIMER=m # TPM devices # CONFIG_TCG_TPM=m +CONFIG_TCG_TIS=m CONFIG_TCG_NSC=m CONFIG_TCG_ATMEL=m CONFIG_TCG_INFINEON=m @@ -2260,6 +2324,7 @@ CONFIG_SENSORS_ADM1025=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_ADM1031=m CONFIG_SENSORS_ADM9240=m +CONFIG_SENSORS_K8TEMP=m CONFIG_SENSORS_ASB100=m CONFIG_SENSORS_ATXP1=m CONFIG_SENSORS_DS1621=m @@ -2281,12 +2346,15 @@ CONFIG_SENSORS_LM90=m CONFIG_SENSORS_LM92=m CONFIG_SENSORS_MAX1619=m CONFIG_SENSORS_PC87360=m +CONFIG_SENSORS_PC87427=m CONFIG_SENSORS_SIS5595=m CONFIG_SENSORS_SMSC47M1=m +CONFIG_SENSORS_SMSC47M192=m CONFIG_SENSORS_SMSC47B397=m CONFIG_SENSORS_VIA686A=m CONFIG_SENSORS_VT8231=m CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_W83791D=m CONFIG_SENSORS_W83792D=m CONFIG_SENSORS_W83L785TS=m CONFIG_SENSORS_W83627HF=m @@ -2683,6 +2751,18 @@ CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m CONFIG_SND_CS46XX_NEW_DSP=y CONFIG_SND_CS5535AUDIO=m +CONFIG_SND_DARLA20=m +CONFIG_SND_GINA20=m +CONFIG_SND_LAYLA20=m +CONFIG_SND_DARLA24=m +CONFIG_SND_GINA24=m +CONFIG_SND_LAYLA24=m +CONFIG_SND_MONA=m +CONFIG_SND_MIA=m +CONFIG_SND_ECHO3G=m +CONFIG_SND_INDIGO=m +CONFIG_SND_INDIGOIO=m +CONFIG_SND_INDIGODJ=m CONFIG_SND_EMU10K1=m CONFIG_SND_EMU10K1X=m CONFIG_SND_ENS1370=m @@ -2690,6 +2770,7 @@ CONFIG_SND_ENS1371=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m +CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m @@ -2985,6 +3066,7 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_HP4X=m CONFIG_USB_SERIAL_SAFE=m CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_SIERRAWIRELESS=m CONFIG_USB_SERIAL_TI=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m @@ -3001,6 +3083,7 @@ CONFIG_USB_AUERSWALD=m CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m +CONFIG_USB_BERRY_CHARGE=m CONFIG_USB_LED=m CONFIG_USB_CYTHERM=m CONFIG_USB_PHIDGETKIT=m @@ -3089,7 +3172,6 @@ CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_FS_POSIX_ACL=y CONFIG_XFS_FS=m -CONFIG_XFS_EXPORT=y CONFIG_XFS_QUOTA=m CONFIG_XFS_DMAPI=m CONFIG_XFS_SECURITY=y @@ -3102,6 +3184,7 @@ CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m CONFIG_MINIX_FS=y CONFIG_ROMFS_FS=m CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y CONFIG_DMAPI=m # CONFIG_DMAPI_DEBUG is not set CONFIG_QUOTA=y @@ -3146,7 +3229,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -CONFIG_RELAYFS_FS=m CONFIG_CONFIGFS_FS=m # @@ -3395,6 +3477,7 @@ CONFIG_CRC_CCITT=m CONFIG_CRC16=m CONFIG_CRC32=y CONFIG_LIBCRC32C=m +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m CONFIG_REED_SOLOMON=m @@ -3410,5 +3493,4 @@ CONFIG_X86_SMP=y CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y CONFIG_X86_TRAMPOLINE=y -CONFIG_X86_SYSENTER=y CONFIG_KTIME_SCALAR=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686.config index 1f4b63c..3d04f51 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-i686.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.27 -# Mon Mar 12 22:26:27 2007 +# Linux kernel version: 2.6.16.46 +# Tue Jul 3 17:46:57 2007 # CONFIG_X86_32=y CONFIG_SEMAPHORE_SLEEPERS=y @@ -25,6 +25,9 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SUSE_KERNEL=y +CONFIG_SLE_VERSION=10 +CONFIG_SLE_SP=1 +CONFIG_SLE_SP_SUBLEVEL=0 CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -32,12 +35,14 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y CONFIG_SYSCTL=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y +CONFIG_RELAY=y CONFIG_INITRAMFS_SOURCE="" CONFIG_UID16=y CONFIG_VM86=y @@ -79,6 +84,7 @@ CONFIG_STOP_MACHINE=y # Block layer # CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y # # IO Schedulers @@ -205,6 +211,7 @@ CONFIG_KEXEC=y CONFIG_PHYSICAL_START=0x100000 CONFIG_HOTPLUG_CPU=y CONFIG_DOUBLEFAULT=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # # Power management options (ACPI, APM) @@ -858,6 +865,7 @@ CONFIG_IEEE80211_CRYPT_TKIP=m CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=m # CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -1163,6 +1171,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -1185,15 +1194,15 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y # -# SCSI Transports +# SCSI Transport Attributes # CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set CONFIG_ISCSI_TARGET=m -CONFIG_SCSI_SAS_DOMAIN_ATTRS=m -CONFIG_SCSI_SAS_DOMAIN_DEBUG=y # # SCSI low-level drivers @@ -1223,29 +1232,15 @@ CONFIG_AIC79XX_REG_PRETTY_PRINT=y CONFIG_SCSI_AIC94XX=m CONFIG_AIC94XX_DEBUG=y CONFIG_SCSI_DPT_I2O=m +CONFIG_SCSI_ADVANSYS=m CONFIG_SCSI_IN2000=m +CONFIG_SCSI_ARCMSR=m CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=m -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -# CONFIG_SCSI_SATA_ULI is not set -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_ACPI=y +CONFIG_SCSI_HPTIOP=m CONFIG_SCSI_BUSLOGIC=m # CONFIG_SCSI_OMIT_FLASHPOINT is not set CONFIG_SCSI_DMX3191D=m @@ -1271,7 +1266,7 @@ CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_SYM53C8XX_MMIO=y CONFIG_SCSI_IPR=m CONFIG_SCSI_IPR_TRACE=y CONFIG_SCSI_IPR_DUMP=y @@ -1282,8 +1277,7 @@ CONFIG_SCSI_QLOGIC_FC=m CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y CONFIG_SCSI_QLOGIC_1280=m CONFIG_SCSI_QLA_FC=m -# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set -CONFIG_SCSI_QLA4XXX=m +CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_SCSI_SYM53C416=m CONFIG_SCSI_DC395x=m @@ -1300,13 +1294,76 @@ CONFIG_SCSI_DEBUG=m # # PCMCIA SCSI adapter support # -CONFIG_PCMCIA_AHA152X=m +# CONFIG_PCMCIA_AHA152X is not set CONFIG_PCMCIA_FDOMAIN=m CONFIG_PCMCIA_NINJA_SCSI=m CONFIG_PCMCIA_QLOGIC=m CONFIG_PCMCIA_SYM53C500=m # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=m +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y +CONFIG_ATA_ACPI=y +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +CONFIG_PATA_ARTOP=m +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CYPRESS is not set +CONFIG_PATA_EFAR=m +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +CONFIG_PATA_HPT37X=m +CONFIG_PATA_HPT3X2N=m +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_ISAPNP is not set +CONFIG_PATA_IT821X=m +CONFIG_PATA_IT8213=m +CONFIG_PATA_JMICRON=m +# CONFIG_PATA_LEGACY is not set +# CONFIG_PATA_TRIFLEX is not set +CONFIG_PATA_MARVELL=m +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_NETCELL=m +# CONFIG_PATA_NS87410 is not set +CONFIG_PATA_OPTI=m +CONFIG_PATA_OPTIDMA=m +# CONFIG_PATA_PCMCIA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_QDI is not set +CONFIG_PATA_RADISYS=m +CONFIG_PATA_RZ1000=m +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +CONFIG_PATA_SIL680=m +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_WINBOND_VLB is not set + +# # Old CD-ROM drivers (not SCSI, not IDE) # CONFIG_CD_NO_IDESCSI=y @@ -1338,6 +1395,8 @@ CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_MULTIPATH_HP_SW=m +CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_NL_EVT=y # @@ -1348,6 +1407,7 @@ CONFIG_FUSION_SPI=m CONFIG_FUSION_FC=m CONFIG_FUSION_SAS=m CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_MAX_FC_SGE=256 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m @@ -1548,6 +1608,7 @@ CONFIG_SK98LIN=m CONFIG_VIA_VELOCITY=m CONFIG_TIGON3=m CONFIG_BNX2=m +CONFIG_QLA3XXX=m # # Ethernet (10000 Mbit) @@ -1557,6 +1618,7 @@ CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y +CONFIG_NETXEN_NIC=m # # Token Ring devices @@ -2004,6 +2066,7 @@ CONFIG_SPECIALIX=m # CONFIG_SPECIALIX_RTSCTS is not set CONFIG_SX=m CONFIG_STALDRV=y +CONFIG_NOZOMI=m # # Serial drivers @@ -2154,6 +2217,7 @@ CONFIG_HANGCHECK_TIMER=m # TPM devices # CONFIG_TCG_TPM=m +CONFIG_TCG_TIS=m CONFIG_TCG_NSC=m CONFIG_TCG_ATMEL=m CONFIG_TCG_INFINEON=m @@ -2260,6 +2324,7 @@ CONFIG_SENSORS_ADM1025=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_ADM1031=m CONFIG_SENSORS_ADM9240=m +CONFIG_SENSORS_K8TEMP=m CONFIG_SENSORS_ASB100=m CONFIG_SENSORS_ATXP1=m CONFIG_SENSORS_DS1621=m @@ -2281,12 +2346,15 @@ CONFIG_SENSORS_LM90=m CONFIG_SENSORS_LM92=m CONFIG_SENSORS_MAX1619=m CONFIG_SENSORS_PC87360=m +CONFIG_SENSORS_PC87427=m CONFIG_SENSORS_SIS5595=m CONFIG_SENSORS_SMSC47M1=m +CONFIG_SENSORS_SMSC47M192=m CONFIG_SENSORS_SMSC47B397=m CONFIG_SENSORS_VIA686A=m CONFIG_SENSORS_VT8231=m CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_W83791D=m CONFIG_SENSORS_W83792D=m CONFIG_SENSORS_W83L785TS=m CONFIG_SENSORS_W83627HF=m @@ -2683,6 +2751,18 @@ CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m CONFIG_SND_CS46XX_NEW_DSP=y CONFIG_SND_CS5535AUDIO=m +CONFIG_SND_DARLA20=m +CONFIG_SND_GINA20=m +CONFIG_SND_LAYLA20=m +CONFIG_SND_DARLA24=m +CONFIG_SND_GINA24=m +CONFIG_SND_LAYLA24=m +CONFIG_SND_MONA=m +CONFIG_SND_MIA=m +CONFIG_SND_ECHO3G=m +CONFIG_SND_INDIGO=m +CONFIG_SND_INDIGOIO=m +CONFIG_SND_INDIGODJ=m CONFIG_SND_EMU10K1=m CONFIG_SND_EMU10K1X=m CONFIG_SND_ENS1370=m @@ -2690,6 +2770,7 @@ CONFIG_SND_ENS1371=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m +CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m @@ -2985,6 +3066,7 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_HP4X=m CONFIG_USB_SERIAL_SAFE=m CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_SIERRAWIRELESS=m CONFIG_USB_SERIAL_TI=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m @@ -3001,6 +3083,7 @@ CONFIG_USB_AUERSWALD=m CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m +CONFIG_USB_BERRY_CHARGE=m CONFIG_USB_LED=m CONFIG_USB_CYTHERM=m CONFIG_USB_PHIDGETKIT=m @@ -3089,7 +3172,6 @@ CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_FS_POSIX_ACL=y CONFIG_XFS_FS=m -CONFIG_XFS_EXPORT=y CONFIG_XFS_QUOTA=m CONFIG_XFS_DMAPI=m CONFIG_XFS_SECURITY=y @@ -3102,6 +3184,7 @@ CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m CONFIG_MINIX_FS=y CONFIG_ROMFS_FS=m CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y CONFIG_DMAPI=m # CONFIG_DMAPI_DEBUG is not set CONFIG_QUOTA=y @@ -3146,7 +3229,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -CONFIG_RELAYFS_FS=m CONFIG_CONFIGFS_FS=m # @@ -3395,6 +3477,7 @@ CONFIG_CRC_CCITT=m CONFIG_CRC16=m CONFIG_CRC32=y CONFIG_LIBCRC32C=m +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m CONFIG_REED_SOLOMON=m @@ -3410,5 +3493,4 @@ CONFIG_X86_SMP=y CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y CONFIG_X86_TRAMPOLINE=y -CONFIG_X86_SYSENTER=y CONFIG_KTIME_SCALAR=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64-smp.config index 7592103..7391827 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64-smp.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.27 -# Mon Mar 12 22:27:05 2007 +# Linux kernel version: 2.6.16.46 +# Tue Jul 3 18:35:09 2007 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -16,6 +16,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMI=y +CONFIG_AUDIT_ARCH=y # # Code maturity level options @@ -30,6 +31,9 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SUSE_KERNEL=y +CONFIG_SLE_VERSION=10 +CONFIG_SLE_SP=1 +CONFIG_SLE_SP_SUBLEVEL=0 CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,12 +41,14 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y CONFIG_SYSCTL=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CPUSETS=y +CONFIG_RELAY=y CONFIG_INITRAMFS_SOURCE="" CONFIG_UID16=y CONFIG_VM86=y @@ -84,6 +90,7 @@ CONFIG_STOP_MACHINE=y # Block layer # CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y # # IO Schedulers @@ -145,9 +152,12 @@ CONFIG_MIGRATION=y CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NR_CPUS=128 CONFIG_HOTPLUG_CPU=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_GART_IOMMU=y +CONFIG_CALGARY_IOMMU=y +# CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT is not set CONFIG_SWIOTLB=y CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y @@ -776,6 +786,7 @@ CONFIG_IEEE80211_CRYPT_TKIP=m CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=m # CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -1055,6 +1066,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -1077,15 +1089,15 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y # -# SCSI Transports +# SCSI Transport Attributes # CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set CONFIG_ISCSI_TARGET=m -CONFIG_SCSI_SAS_DOMAIN_ATTRS=m -CONFIG_SCSI_SAS_DOMAIN_DEBUG=y # # SCSI low-level drivers @@ -1111,28 +1123,13 @@ CONFIG_AIC79XX_DEBUG_MASK=0 CONFIG_AIC79XX_REG_PRETTY_PRINT=y CONFIG_SCSI_AIC94XX=m CONFIG_AIC94XX_DEBUG=y +CONFIG_SCSI_ARCMSR=m CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=m -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_ACPI=y +CONFIG_SCSI_HPTIOP=m CONFIG_SCSI_BUSLOGIC=m # CONFIG_SCSI_OMIT_FLASHPOINT is not set CONFIG_SCSI_DMX3191D=m @@ -1153,14 +1150,13 @@ CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_SYM53C8XX_MMIO=y # CONFIG_SCSI_IPR is not set CONFIG_SCSI_QLOGIC_FC=m CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y CONFIG_SCSI_QLOGIC_1280=m CONFIG_SCSI_QLA_FC=m -# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set -CONFIG_SCSI_QLA4XXX=m +CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_SCSI_DC395x=m CONFIG_SCSI_DC390T=m @@ -1174,6 +1170,64 @@ CONFIG_PCMCIA_QLOGIC=m CONFIG_PCMCIA_SYM53C500=m # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=m +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y +CONFIG_ATA_ACPI=y +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +CONFIG_PATA_ARTOP=m +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +CONFIG_PATA_EFAR=m +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +CONFIG_PATA_HPT37X=m +CONFIG_PATA_HPT3X2N=m +# CONFIG_PATA_HPT3X3 is not set +CONFIG_PATA_IT821X=m +CONFIG_PATA_IT8213=m +CONFIG_PATA_JMICRON=m +# CONFIG_PATA_TRIFLEX is not set +CONFIG_PATA_MARVELL=m +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_NETCELL=m +# CONFIG_PATA_NS87410 is not set +CONFIG_PATA_OPTI=m +CONFIG_PATA_OPTIDMA=m +# CONFIG_PATA_PCMCIA is not set +# CONFIG_PATA_PDC_OLD is not set +CONFIG_PATA_RADISYS=m +CONFIG_PATA_RZ1000=m +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +CONFIG_PATA_SIL680=m +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1193,6 +1247,8 @@ CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_MULTIPATH_HP_SW=m +CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_NL_EVT=y # @@ -1203,6 +1259,7 @@ CONFIG_FUSION_SPI=m CONFIG_FUSION_FC=m CONFIG_FUSION_SAS=m CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_MAX_FC_SGE=256 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m @@ -1365,6 +1422,7 @@ CONFIG_SK98LIN=m CONFIG_VIA_VELOCITY=m CONFIG_TIGON3=m CONFIG_BNX2=m +CONFIG_QLA3XXX=m # # Ethernet (10000 Mbit) @@ -1374,6 +1432,7 @@ CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y +CONFIG_NETXEN_NIC=m # # Token Ring devices @@ -1718,6 +1777,7 @@ CONFIG_SPECIALIX=m # CONFIG_SPECIALIX_RTSCTS is not set CONFIG_SX=m CONFIG_STALDRV=y +CONFIG_NOZOMI=m # # Serial drivers @@ -1840,7 +1900,7 @@ CONFIG_HANGCHECK_TIMER=m # TPM devices # CONFIG_TCG_TPM=m -CONFIG_TCG_NSC=m +CONFIG_TCG_TIS=m CONFIG_TCG_ATMEL=m CONFIG_TELCLOCK=m CONFIG_CRASHER=m @@ -1942,6 +2002,7 @@ CONFIG_SENSORS_ADM1025=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_ADM1031=m CONFIG_SENSORS_ADM9240=m +CONFIG_SENSORS_K8TEMP=m CONFIG_SENSORS_ASB100=m CONFIG_SENSORS_ATXP1=m CONFIG_SENSORS_DS1621=m @@ -1963,12 +2024,15 @@ CONFIG_SENSORS_LM90=m CONFIG_SENSORS_LM92=m CONFIG_SENSORS_MAX1619=m CONFIG_SENSORS_PC87360=m +CONFIG_SENSORS_PC87427=m CONFIG_SENSORS_SIS5595=m CONFIG_SENSORS_SMSC47M1=m +CONFIG_SENSORS_SMSC47M192=m CONFIG_SENSORS_SMSC47B397=m CONFIG_SENSORS_VIA686A=m CONFIG_SENSORS_VT8231=m CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_W83791D=m CONFIG_SENSORS_W83792D=m CONFIG_SENSORS_W83L785TS=m CONFIG_SENSORS_W83627HF=m @@ -2305,6 +2369,18 @@ CONFIG_SND_CMIPCI=m CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m CONFIG_SND_CS46XX_NEW_DSP=y +CONFIG_SND_DARLA20=m +CONFIG_SND_GINA20=m +CONFIG_SND_LAYLA20=m +CONFIG_SND_DARLA24=m +CONFIG_SND_GINA24=m +CONFIG_SND_LAYLA24=m +CONFIG_SND_MONA=m +CONFIG_SND_MIA=m +CONFIG_SND_ECHO3G=m +CONFIG_SND_INDIGO=m +CONFIG_SND_INDIGOIO=m +CONFIG_SND_INDIGODJ=m CONFIG_SND_EMU10K1=m CONFIG_SND_EMU10K1X=m CONFIG_SND_ENS1370=m @@ -2312,6 +2388,7 @@ CONFIG_SND_ENS1371=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m +CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m @@ -2571,6 +2648,7 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_HP4X=m CONFIG_USB_SERIAL_SAFE=m CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_SIERRAWIRELESS=m CONFIG_USB_SERIAL_TI=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m @@ -2587,6 +2665,7 @@ CONFIG_USB_AUERSWALD=m CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m +CONFIG_USB_BERRY_CHARGE=m CONFIG_USB_LED=m CONFIG_USB_CYTHERM=m CONFIG_USB_PHIDGETKIT=m @@ -2677,7 +2756,6 @@ CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_FS_POSIX_ACL=y CONFIG_XFS_FS=m -CONFIG_XFS_EXPORT=y CONFIG_XFS_QUOTA=m CONFIG_XFS_DMAPI=m CONFIG_XFS_SECURITY=y @@ -2690,6 +2768,7 @@ CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m CONFIG_MINIX_FS=y CONFIG_ROMFS_FS=m CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y CONFIG_DMAPI=m # CONFIG_DMAPI_DEBUG is not set CONFIG_QUOTA=y @@ -2734,7 +2813,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -CONFIG_RELAYFS_FS=m CONFIG_CONFIGFS_FS=m # diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64.config index 0020d17..d5e00e7 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.16-2.6-sles10-x86_64.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.27 -# Mon Mar 12 22:27:14 2007 +# Linux kernel version: 2.6.16.46 +# Tue Jul 3 18:30:50 2007 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -16,6 +16,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMI=y +CONFIG_AUDIT_ARCH=y # # Code maturity level options @@ -30,6 +31,9 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SUSE_KERNEL=y +CONFIG_SLE_VERSION=10 +CONFIG_SLE_SP=1 +CONFIG_SLE_SP_SUBLEVEL=0 CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -37,11 +41,13 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y CONFIG_SYSCTL=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_RELAY=y CONFIG_INITRAMFS_SOURCE="" CONFIG_UID16=y CONFIG_VM86=y @@ -82,6 +88,7 @@ CONFIG_KMOD=y # Block layer # CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y # # IO Schedulers @@ -129,9 +136,12 @@ CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_GART_IOMMU=y +CONFIG_CALGARY_IOMMU=y +# CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT is not set CONFIG_SWIOTLB=y CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y @@ -757,6 +767,7 @@ CONFIG_IEEE80211_CRYPT_TKIP=m CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=m # CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -1036,6 +1047,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y # @@ -1058,15 +1070,15 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y # -# SCSI Transports +# SCSI Transport Attributes # CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set CONFIG_ISCSI_TARGET=m -CONFIG_SCSI_SAS_DOMAIN_ATTRS=m -CONFIG_SCSI_SAS_DOMAIN_DEBUG=y # # SCSI low-level drivers @@ -1092,28 +1104,13 @@ CONFIG_AIC79XX_DEBUG_MASK=0 CONFIG_AIC79XX_REG_PRETTY_PRINT=y CONFIG_SCSI_AIC94XX=m CONFIG_AIC94XX_DEBUG=y +CONFIG_SCSI_ARCMSR=m CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=m -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_ACPI=y +CONFIG_SCSI_HPTIOP=m CONFIG_SCSI_BUSLOGIC=m # CONFIG_SCSI_OMIT_FLASHPOINT is not set CONFIG_SCSI_DMX3191D=m @@ -1134,14 +1131,13 @@ CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_SYM53C8XX_MMIO=y # CONFIG_SCSI_IPR is not set CONFIG_SCSI_QLOGIC_FC=m CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y CONFIG_SCSI_QLOGIC_1280=m CONFIG_SCSI_QLA_FC=m -# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set -CONFIG_SCSI_QLA4XXX=m +CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_SCSI_DC395x=m CONFIG_SCSI_DC390T=m @@ -1155,6 +1151,64 @@ CONFIG_PCMCIA_QLOGIC=m CONFIG_PCMCIA_SYM53C500=m # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=m +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y +CONFIG_ATA_ACPI=y +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +CONFIG_PATA_ARTOP=m +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +CONFIG_PATA_EFAR=m +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +CONFIG_PATA_HPT37X=m +CONFIG_PATA_HPT3X2N=m +# CONFIG_PATA_HPT3X3 is not set +CONFIG_PATA_IT821X=m +CONFIG_PATA_IT8213=m +CONFIG_PATA_JMICRON=m +# CONFIG_PATA_TRIFLEX is not set +CONFIG_PATA_MARVELL=m +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_NETCELL=m +# CONFIG_PATA_NS87410 is not set +CONFIG_PATA_OPTI=m +CONFIG_PATA_OPTIDMA=m +# CONFIG_PATA_PCMCIA is not set +# CONFIG_PATA_PDC_OLD is not set +CONFIG_PATA_RADISYS=m +CONFIG_PATA_RZ1000=m +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +CONFIG_PATA_SIL680=m +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1174,6 +1228,8 @@ CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_MULTIPATH_HP_SW=m +CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_NL_EVT=y # @@ -1184,6 +1240,7 @@ CONFIG_FUSION_SPI=m CONFIG_FUSION_FC=m CONFIG_FUSION_SAS=m CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_MAX_FC_SGE=256 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m @@ -1347,6 +1404,7 @@ CONFIG_SK98LIN=m CONFIG_VIA_VELOCITY=m CONFIG_TIGON3=m CONFIG_BNX2=m +CONFIG_QLA3XXX=m # # Ethernet (10000 Mbit) @@ -1356,6 +1414,7 @@ CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y +CONFIG_NETXEN_NIC=m # # Token Ring devices @@ -1705,6 +1764,7 @@ CONFIG_SX=m CONFIG_STALDRV=y # CONFIG_STALLION is not set # CONFIG_ISTALLION is not set +CONFIG_NOZOMI=m # # Serial drivers @@ -1828,7 +1888,7 @@ CONFIG_HANGCHECK_TIMER=m # TPM devices # CONFIG_TCG_TPM=m -CONFIG_TCG_NSC=m +CONFIG_TCG_TIS=m CONFIG_TCG_ATMEL=m CONFIG_TELCLOCK=m CONFIG_CRASHER=m @@ -1930,6 +1990,7 @@ CONFIG_SENSORS_ADM1025=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_ADM1031=m CONFIG_SENSORS_ADM9240=m +CONFIG_SENSORS_K8TEMP=m CONFIG_SENSORS_ASB100=m CONFIG_SENSORS_ATXP1=m CONFIG_SENSORS_DS1621=m @@ -1951,12 +2012,15 @@ CONFIG_SENSORS_LM90=m CONFIG_SENSORS_LM92=m CONFIG_SENSORS_MAX1619=m CONFIG_SENSORS_PC87360=m +CONFIG_SENSORS_PC87427=m CONFIG_SENSORS_SIS5595=m CONFIG_SENSORS_SMSC47M1=m +CONFIG_SENSORS_SMSC47M192=m CONFIG_SENSORS_SMSC47B397=m CONFIG_SENSORS_VIA686A=m CONFIG_SENSORS_VT8231=m CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_W83791D=m CONFIG_SENSORS_W83792D=m CONFIG_SENSORS_W83L785TS=m CONFIG_SENSORS_W83627HF=m @@ -2293,6 +2357,18 @@ CONFIG_SND_CMIPCI=m CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m CONFIG_SND_CS46XX_NEW_DSP=y +CONFIG_SND_DARLA20=m +CONFIG_SND_GINA20=m +CONFIG_SND_LAYLA20=m +CONFIG_SND_DARLA24=m +CONFIG_SND_GINA24=m +CONFIG_SND_LAYLA24=m +CONFIG_SND_MONA=m +CONFIG_SND_MIA=m +CONFIG_SND_ECHO3G=m +CONFIG_SND_INDIGO=m +CONFIG_SND_INDIGOIO=m +CONFIG_SND_INDIGODJ=m CONFIG_SND_EMU10K1=m CONFIG_SND_EMU10K1X=m CONFIG_SND_ENS1370=m @@ -2300,6 +2376,7 @@ CONFIG_SND_ENS1371=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m +CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m @@ -2559,6 +2636,7 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_HP4X=m CONFIG_USB_SERIAL_SAFE=m CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_SIERRAWIRELESS=m CONFIG_USB_SERIAL_TI=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m @@ -2575,6 +2653,7 @@ CONFIG_USB_AUERSWALD=m CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m +CONFIG_USB_BERRY_CHARGE=m CONFIG_USB_LED=m CONFIG_USB_CYTHERM=m CONFIG_USB_PHIDGETKIT=m @@ -2665,7 +2744,6 @@ CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_FS_POSIX_ACL=y CONFIG_XFS_FS=m -CONFIG_XFS_EXPORT=y CONFIG_XFS_QUOTA=m CONFIG_XFS_DMAPI=m CONFIG_XFS_SECURITY=y @@ -2678,6 +2756,7 @@ CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m CONFIG_MINIX_FS=y CONFIG_ROMFS_FS=m CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y CONFIG_DMAPI=m # CONFIG_DMAPI_DEBUG is not set CONFIG_QUOTA=y @@ -2722,7 +2801,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -CONFIG_RELAYFS_FS=m CONFIG_CONFIGFS_FS=m # diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-x86_64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-x86_64-smp.config index 81f3823..edd2e0a 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-x86_64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-x86_64-smp.config @@ -102,7 +102,7 @@ CONFIG_K8_NUMA=y CONFIG_DISCONTIGMEM=y CONFIG_NUMA=y CONFIG_HAVE_DEC_LOCK=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=128 CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686-smp.config index a74b21a..25ec2ea 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686-smp.config @@ -128,6 +128,7 @@ CONFIG_I8K=m CONFIG_MICROCODE=m CONFIG_X86_MSR=m CONFIG_X86_CPUID=m +# CONFIG_HOTPLUG_CPU is not set # # Firmware Drivers @@ -227,7 +228,7 @@ CONFIG_X86_P4_CLOCKMOD=m CONFIG_X86_SPEEDSTEP_LIB=y # CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK is not set CONFIG_X86_LONGRUN=y -CONFIG_X86_LONGHAUL=y +# CONFIG_X86_LONGHAUL is not set # # Bus options (PCI, PCMCIA, EISA, MCA, ISA) @@ -571,23 +572,6 @@ CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_DTC3280 is not set @@ -628,6 +612,8 @@ CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m CONFIG_SCSI_QLA24XX=m +CONFIG_QLA3XXX=m +CONFIG_SCSI_QLA4XXX=m # CONFIG_SCSI_SYM53C416 is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set @@ -647,6 +633,27 @@ CONFIG_PCMCIA_FDOMAIN=m # CONFIG_PCMCIA_SYM53C500 is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y + +# # Old CD-ROM drivers (not SCSI, not IDE) # # CONFIG_CD_NO_IDESCSI is not set @@ -1521,7 +1528,7 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m # CONFIG_SERIAL_8250_ACPI is not set -CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_NR_UARTS=64 CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_MANY_PORTS is not set CONFIG_SERIAL_8250_SHARE_IRQ=y @@ -1726,6 +1733,7 @@ CONFIG_HWMON=m CONFIG_HWMON_VID=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_LM87=m +CONFIG_SENSORS_SMSC47B397=m # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -2171,8 +2179,9 @@ CONFIG_INFINIBAND_SDP=m # CONFIG_INFINIBAND_SDP_DEBUG is not set CONFIG_INFINIBAND_SRP=m # CONFIG_INFINIBAND_ISER is not set -CONFIG_INFINIBAND_RDS=m +# CONFIG_INFINIBAND_RDS # CONFIG_INFINIBAND_RDS_DEBUG is not set +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) @@ -2191,6 +2200,7 @@ CONFIG_EDAC_I82875P=m CONFIG_EDAC_I82860=m CONFIG_EDAC_R82600=m CONFIG_EDAC_POLL=y +CONFIG_EDAC_K8=m # # File systems @@ -2290,7 +2300,7 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFS_DIRECTIO=y CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y +# CONFIG_NFSD_V2_ACL is not set CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y @@ -2402,8 +2412,12 @@ CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_4KSTACKS is not set # CONFIG_SCHEDSTATS is not set +CONFIG_LOCKMETER=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y +# CONFIG_KGDB is not set +# CONFIG_PROC_SLEEP is not set + # # Security options diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686.config index bcd56a0..f967ea2 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet -# Mon Mar 12 20:11:45 2007 +# Tue Nov 7 10:22:12 2006 # CONFIG_X86=y CONFIG_MMU=y @@ -111,7 +111,7 @@ CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y # CONFIG_SMP is not set # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_X86_UP_APIC is not set CONFIG_X86_TSC=y CONFIG_X86_MCE=y @@ -121,6 +121,7 @@ CONFIG_I8K=m CONFIG_MICROCODE=m CONFIG_X86_MSR=m CONFIG_X86_CPUID=m +# CONFIG_HOTPLUG_CPU is not set # # Firmware Drivers @@ -128,11 +129,12 @@ CONFIG_X86_CPUID=m CONFIG_EDD=m CONFIG_DELL_RBU=m CONFIG_DCDBAS=m -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set # CONFIG_HIGHMEM64G is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHPTE=y +# CONFIG_USER_3GB is not set +# CONFIG_USER_2GB is not set +CONFIG_USER_1GB=y # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y # CONFIG_EFI is not set @@ -216,7 +218,7 @@ CONFIG_X86_P4_CLOCKMOD=m CONFIG_X86_SPEEDSTEP_LIB=y # CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK is not set CONFIG_X86_LONGRUN=y -CONFIG_X86_LONGHAUL=y +# CONFIG_X86_LONGHAUL is not set # # Bus options (PCI, PCMCIA, EISA, MCA, ISA) @@ -437,7 +439,7 @@ CONFIG_BLK_DEV_IDEDISK=y CONFIG_IDEDISK_MULTI_MODE=y CONFIG_BLK_DEV_IDECS=m CONFIG_BLK_DEV_DELKIN=m -CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDECD is not set # CONFIG_BLK_DEV_IDETAPE is not set CONFIG_BLK_DEV_IDEFLOPPY=y CONFIG_BLK_DEV_IDESCSI=m @@ -558,23 +560,6 @@ CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_DTC3280 is not set @@ -615,6 +600,8 @@ CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m CONFIG_SCSI_QLA24XX=m +CONFIG_QLA3XXX=m +CONFIG_SCSI_QLA4XXX=m # CONFIG_SCSI_SYM53C416 is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set @@ -634,6 +621,27 @@ CONFIG_PCMCIA_FDOMAIN=m # CONFIG_PCMCIA_SYM53C500 is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y + +# # Old CD-ROM drivers (not SCSI, not IDE) # # CONFIG_CD_NO_IDESCSI is not set @@ -1526,7 +1534,7 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m # CONFIG_SERIAL_8250_ACPI is not set -CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_NR_UARTS=64 CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_MANY_PORTS is not set CONFIG_SERIAL_8250_SHARE_IRQ=y @@ -1733,6 +1741,7 @@ CONFIG_HWMON=m CONFIG_HWMON_VID=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_LM87=m +CONFIG_SENSORS_SMSC47B397=m # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -1991,11 +2000,9 @@ CONFIG_USB_SUSPEND=y # # USB Host Controller Drivers # -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_EHCI_SPLIT_ISO=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_UHCI_HCD=m +# CONFIG_USB_EHCI_HCD is not set +# CONFIG_USB_OHCI_HCD is not set +# CONFIG_USB_UHCI_HCD is not set # # USB Device Class drivers @@ -2180,8 +2187,9 @@ CONFIG_INFINIBAND_SDP=m # CONFIG_INFINIBAND_SDP_DEBUG is not set CONFIG_INFINIBAND_SRP=m # CONFIG_INFINIBAND_ISER is not set -CONFIG_INFINIBAND_RDS=m +# CONFIG_INFINIBAND_RDS # CONFIG_INFINIBAND_RDS_DEBUG is not set +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) @@ -2200,6 +2208,7 @@ CONFIG_EDAC_I82875P=m CONFIG_EDAC_I82860=m CONFIG_EDAC_R82600=m CONFIG_EDAC_POLL=y +CONFIG_EDAC_K8=m # # File systems @@ -2299,7 +2308,7 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFS_DIRECTIO=y CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y +# CONFIG_NFSD_V2_ACL is not set CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y @@ -2401,9 +2410,8 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_SLAB is not set CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_HIGHMEM=y CONFIG_DEBUG_INFO=y -# CONFIG_FRAME_POINTER is not set +CONFIG_FRAME_POINTER=y CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_KPROBES=y @@ -2411,6 +2419,8 @@ CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_4KSTACKS is not set # CONFIG_SCHEDSTATS is not set +# CONFIG_KGDB is not set +CONFIG_PROC_SLEEP=y # # Security options diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64-smp.config index 2672ec3..9c8c057 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64-smp.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet -# Mon Mar 12 20:21:12 2007 +# Mon Jul 31 19:41:42 2006 # # @@ -93,6 +93,7 @@ CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y CONFIG_IOPROC=y CONFIG_PTRACK=y +CONFIG_IA64_LOCAL_APIC_INFO_MOD=m # # Firmware Drivers @@ -339,23 +340,6 @@ CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -389,6 +373,8 @@ CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m CONFIG_SCSI_QLA24XX=m +CONFIG_QLA3XXX=m +CONFIG_SCSI_QLA4XXX=m # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -401,6 +387,27 @@ CONFIG_SCSI_QLA24XX=m # CONFIG_PCMCIA_SYM53C500 is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1194,7 +1201,7 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m CONFIG_SERIAL_8250_ACPI=y -CONFIG_SERIAL_8250_NR_UARTS=20 +CONFIG_SERIAL_8250_NR_UARTS=64 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y @@ -1361,6 +1368,7 @@ CONFIG_HWMON=m CONFIG_HWMON_VID=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_LM87=m +CONFIG_SENSORS_SMSC47B397=m # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -1757,8 +1765,8 @@ CONFIG_INFINIBAND_SDP=m # CONFIG_INFINIBAND_SDP_DEBUG is not set CONFIG_INFINIBAND_SRP=m # CONFIG_INFINIBAND_ISER is not set -CONFIG_INFINIBAND_RDS=m # CONFIG_INFINIBAND_RDS_DEBUG is not set +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) @@ -1855,7 +1863,7 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFS_DIRECTIO=y CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y +# CONFIG_NFSD_V2_ACL is not set CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y @@ -1981,6 +1989,7 @@ CONFIG_IA64_GRANULE_16MB=y # CONFIG_DISABLE_VHPT is not set # CONFIG_IA64_DEBUG_CMPXCHG is not set # CONFIG_IA64_DEBUG_IRQ is not set +# CONFIG_KGDB is not set # # Security options diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64.config index c27cc50..8f2a26a 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet -# Mon Mar 12 20:21:18 2007 +# Mon Jul 31 19:44:37 2006 # # @@ -93,6 +93,7 @@ CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y CONFIG_IOPROC=y CONFIG_PTRACK=y +CONFIG_IA64_LOCAL_APIC_INFO_MOD=m # # Firmware Drivers @@ -339,23 +340,6 @@ CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -389,6 +373,8 @@ CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m CONFIG_SCSI_QLA24XX=m +CONFIG_QLA3XXX=m +CONFIG_SCSI_QLA4XXX=m # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -401,6 +387,27 @@ CONFIG_SCSI_QLA24XX=m # CONFIG_PCMCIA_SYM53C500 is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1194,7 +1201,7 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m CONFIG_SERIAL_8250_ACPI=y -CONFIG_SERIAL_8250_NR_UARTS=20 +CONFIG_SERIAL_8250_NR_UARTS=64 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y @@ -1361,6 +1368,7 @@ CONFIG_HWMON=y CONFIG_HWMON_VID=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_LM87=m +CONFIG_SENSORS_SMSC47B397=m # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -1757,8 +1765,8 @@ CONFIG_INFINIBAND_SDP=m # CONFIG_INFINIBAND_SDP_DEBUG is not set CONFIG_INFINIBAND_SRP=m # CONFIG_INFINIBAND_ISER is not set -CONFIG_INFINIBAND_RDS=m # CONFIG_INFINIBAND_RDS_DEBUG is not set +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) @@ -1855,7 +1863,7 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFS_DIRECTIO=y CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y +# CONFIG_NFSD_V2_ACL is not set CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y @@ -1981,6 +1989,7 @@ CONFIG_IA64_GRANULE_16MB=y # CONFIG_DISABLE_VHPT is not set # CONFIG_IA64_DEBUG_CMPXCHG is not set # CONFIG_IA64_DEBUG_IRQ is not set +# CONFIG_KGDB is not set # # Security options diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64-smp.config index ea0569e..bfb5994 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64-smp.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet -# Mon Mar 12 20:21:34 2007 +# Mon Jul 31 19:52:59 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -98,7 +98,8 @@ CONFIG_X86_MCE=y CONFIG_X86_MCE_AMD=y CONFIG_IOPROC=y CONFIG_PTRACK=y - +# CONFIG_X86_64_XEN is not set + # # Power management options # @@ -161,6 +162,7 @@ CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y # CONFIG_UNORDERED_IO is not set +# CONFIG_HOTPLUG_CPU is not set CONFIG_PCI_MSI=y CONFIG_PCI_LEGACY_PROC=y # CONFIG_PCI_NAMES is not set @@ -471,23 +473,6 @@ CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -521,6 +506,9 @@ CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m CONFIG_SCSI_QLA24XX=m +CONFIG_QLA3XXX=m +CONFIG_SCSI_QLA4XXX=m + # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -533,6 +521,28 @@ CONFIG_SCSI_QLA24XX=m # CONFIG_PCMCIA_SYM53C500 is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y + + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1337,7 +1347,7 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m # CONFIG_SERIAL_8250_ACPI is not set -CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_NR_UARTS=64 CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_MANY_PORTS is not set CONFIG_SERIAL_8250_SHARE_IRQ=y @@ -1524,6 +1534,7 @@ CONFIG_HWMON=m CONFIG_HWMON_VID=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_LM87=m +CONFIG_SENSORS_SMSC47B397=m # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -1913,10 +1924,7 @@ CONFIG_USB_SPEEDTOUCH=m # # InfiniBand support # -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_MAD=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_INFINIBAND_ADDR_TRANS=y +# CONFIG_INFINIBAND is not set CONFIG_IPATH_CORE=m CONFIG_INFINIBAND_IPATH=m CONFIG_INFINIBAND_MTHCA=m @@ -1928,8 +1936,8 @@ CONFIG_INFINIBAND_SDP=m # CONFIG_INFINIBAND_SDP_DEBUG is not set CONFIG_INFINIBAND_SRP=m # CONFIG_INFINIBAND_ISER is not set -CONFIG_INFINIBAND_RDS=m # CONFIG_INFINIBAND_RDS_DEBUG is not set +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) @@ -1948,6 +1956,7 @@ CONFIG_EDAC_I82875P=m CONFIG_EDAC_I82860=m CONFIG_EDAC_R82600=m CONFIG_EDAC_POLL=y +CONFIG_EDAC_K8=m # # Firmware Drivers @@ -2054,7 +2063,7 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFS_DIRECTIO=y CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y +# CONFIG_NFSD_V2_ACL is not set CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y @@ -2154,13 +2163,17 @@ CONFIG_OPROFILE=m CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_SLAB is not set -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_INFO is not set +CONFIG_FRAME_POINTER=y +CONFIG_FRAME_POINTER_FORCE=y CONFIG_INIT_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_IOMMU_DEBUG is not set CONFIG_KPROBES=y +# CONFIG_LOCKMETER is not set +CONFIG_PROC_SLEEP=y # # Security options diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64.config index 818e8c6..c449c4e 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet -# Mon Mar 12 20:21:30 2007 +# Mon Jul 31 19:51:03 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -98,7 +98,8 @@ CONFIG_X86_MCE=y CONFIG_X86_MCE_AMD=y CONFIG_IOPROC=y CONFIG_PTRACK=y - +# CONFIG_X86_64_XEN is not set + # # Power management options # @@ -471,23 +472,6 @@ CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m CONFIG_MEGARAID_MAILBOX=m CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_SATA=y -CONFIG_SCSI_SATA_AHCI=m -CONFIG_SCSI_SATA_SVW=m -CONFIG_SCSI_ATA_PIIX=m -CONFIG_SCSI_SATA_MV=m -CONFIG_SCSI_SATA_NV=m -CONFIG_SCSI_PDC_ADMA=m -CONFIG_SCSI_SATA_QSTOR=m -CONFIG_SCSI_SATA_PROMISE=m -CONFIG_SCSI_SATA_SX4=m -CONFIG_SCSI_SATA_SIL=m -CONFIG_SCSI_SATA_SIL24=m -CONFIG_SCSI_SATA_SIS=m -CONFIG_SCSI_SATA_ULI=m -CONFIG_SCSI_SATA_VIA=m -CONFIG_SCSI_SATA_VITESSE=m -CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -521,6 +505,9 @@ CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m CONFIG_SCSI_QLA6312=m CONFIG_SCSI_QLA24XX=m +CONFIG_QLA3XXX=m +CONFIG_SCSI_QLA4XXX=m + # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -533,6 +520,27 @@ CONFIG_SCSI_QLA24XX=m # CONFIG_PCMCIA_SYM53C500 is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=m +CONFIG_SATA_SVW=m +CONFIG_ATA_PIIX=m +CONFIG_SATA_MV=m +CONFIG_SATA_NV=m +CONFIG_PDC_ADMA=m +CONFIG_SATA_QSTOR=m +CONFIG_SATA_PROMISE=m +CONFIG_SATA_SX4=m +CONFIG_SATA_SIL=m +CONFIG_SATA_SIL24=m +CONFIG_SATA_SIS=m +CONFIG_SATA_ULI=m +CONFIG_SATA_VIA=m +CONFIG_SATA_VITESSE=m +CONFIG_SATA_INTEL_COMBINED=y + +# # Multi-device support (RAID and LVM) # CONFIG_MD=y @@ -1337,7 +1345,7 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m # CONFIG_SERIAL_8250_ACPI is not set -CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_NR_UARTS=64 CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_MANY_PORTS is not set CONFIG_SERIAL_8250_SHARE_IRQ=y @@ -1524,6 +1532,7 @@ CONFIG_HWMON=m CONFIG_HWMON_VID=m CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_LM87=m +CONFIG_SENSORS_SMSC47B397=m # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -1928,8 +1937,8 @@ CONFIG_INFINIBAND_SDP=m # CONFIG_INFINIBAND_SDP_DEBUG is not set CONFIG_INFINIBAND_SRP=m # CONFIG_INFINIBAND_ISER is not set -CONFIG_INFINIBAND_RDS=m # CONFIG_INFINIBAND_RDS_DEBUG is not set +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) @@ -1948,6 +1957,7 @@ CONFIG_EDAC_I82875P=m CONFIG_EDAC_I82860=m CONFIG_EDAC_R82600=m CONFIG_EDAC_POLL=y +CONFIG_EDAC_K8=m # # Firmware Drivers @@ -2054,7 +2064,7 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFS_DIRECTIO=y CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y +# CONFIG_NFSD_V2_ACL is not set CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y @@ -2161,6 +2171,7 @@ CONFIG_INIT_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_IOMMU_DEBUG is not set CONFIG_KPROBES=y +# CONFIG_KGDB is not set # # Security options diff --git a/lustre/kernel_patches/kernel_configs/uml-vanilla-2.4.24.config b/lustre/kernel_patches/kernel_configs/uml-vanilla-2.4.24.config deleted file mode 100644 index 2803866..0000000 --- a/lustre/kernel_patches/kernel_configs/uml-vanilla-2.4.24.config +++ /dev/null @@ -1,413 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_USERMODE=y -# CONFIG_ISA is not set -# CONFIG_SBUS is not set -# CONFIG_PCI is not set -CONFIG_UID16=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General Setup -# -CONFIG_MODE_SKAS=y -CONFIG_MODE_TT=y -CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=y -CONFIG_HOSTFS=y -CONFIG_HPPFS=y -CONFIG_MCONSOLE=y -CONFIG_MAGIC_SYSRQ=y -# CONFIG_HOST_2G_2G is not set -# CONFIG_UML_SMP is not set -# CONFIG_SMP is not set -CONFIG_NEST_LEVEL=0 -CONFIG_KERNEL_HALF_GIGS=1 -# CONFIG_HIGHMEM is not set -CONFIG_PROC_MM=y -CONFIG_KERNEL_STACK_ORDER=4 -CONFIG_UML_REAL_TIME_CLOCK=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_KMOD=y - -# -# Character Devices -# -CONFIG_STDIO_CONSOLE=y -CONFIG_SSL=y -CONFIG_FD_CHAN=y -CONFIG_NULL_CHAN=y -CONFIG_PORT_CHAN=y -CONFIG_PTY_CHAN=y -CONFIG_TTY_CHAN=y -CONFIG_XTERM_CHAN=y -CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -CONFIG_CON_CHAN="xterm" -CONFIG_SSL_CHAN="pty" -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_WATCHDOG is not set -# CONFIG_WATCHDOG_NOWAYOUT is not set -# CONFIG_SOFT_WATCHDOG is not set -# CONFIG_UML_WATCHDOG is not set -# CONFIG_UML_SOUND is not set -# CONFIG_SOUND is not set -# CONFIG_HOSTAUDIO is not set -# CONFIG_TTY_LOG is not set - -# -# Block Devices -# -CONFIG_BLK_DEV_UBD=y -# CONFIG_BLK_DEV_UBD_SYNC is not set -CONFIG_COW=y -CONFIG_COW_COMMON=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_MMAPPER is not set -CONFIG_NETDEVICES=y - -# -# Network Devices -# -CONFIG_UML_NET=y -CONFIG_UML_NET_ETHERTAP=y -CONFIG_UML_NET_TUNTAP=y -CONFIG_UML_NET_SLIP=y -CONFIG_UML_NET_SLIRP=y -CONFIG_UML_NET_DAEMON=y -CONFIG_UML_NET_MCAST=y -# CONFIG_UML_NET_PCAP is not set -CONFIG_DUMMY=y -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -CONFIG_TUN=y -CONFIG_PPP=y -# CONFIG_PPP_MULTILINK is not set -# CONFIG_PPP_FILTER is not set -# CONFIG_PPP_ASYNC is not set -# CONFIG_PPP_SYNC_TTY is not set -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_SLIP=y -# CONFIG_SLIP_COMPRESSED is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLIP_MODE_SLIP6 is not set - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -# CONFIG_NETFILTER is not set -# CONFIG_FILTER is not set -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set -CONFIG_IPV6=m -# CONFIG_KHTTPD is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -CONFIG_IPV6_SCTP__=y -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set - -# -# -# -# CONFIG_IPX is not set -# CONFIG_ATALK is not set - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V2 is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -CONFIG_UMSDOS_FS=y -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -CONFIG_JFFS_FS=y -CONFIG_JFFS_FS_VERBOSE=0 -CONFIG_JFFS_PROC_FS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_FS_DEBUG=0 -# CONFIG_CRAMFS is not set -# CONFIG_TMPFS is not set -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=y -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -CONFIG_DEVFS_MOUNT=y -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_XATTR_SHARING=y -CONFIG_EXT2_FS_XATTR_USER=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -# CONFIG_NFS_FS is not set -# CONFIG_NFS_V3 is not set -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_ROOT_NFS is not set -# CONFIG_NFSD is not set -# CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set -# CONFIG_SUNRPC is not set -# CONFIG_LOCKD is not set -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -# CONFIG_ZISOFS_FS is not set -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -# CONFIG_NLS_CODEPAGE_437 is not set -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -# CONFIG_NLS_ISO8859_1 is not set -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# SCSI support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set -# CONFIG_BLK_DEV_MD is not set -# CONFIG_MD_LINEAR is not set -# CONFIG_MD_RAID0 is not set -# CONFIG_MD_RAID1 is not set -# CONFIG_MD_RAID5 is not set -# CONFIG_MD_MULTIPATH is not set -# CONFIG_BLK_DEV_LVM is not set - -# -# Memory Technology Devices (MTD) -# -CONFIG_MTD=y -# CONFIG_MTD_DEBUG is not set -# CONFIG_MTD_PARTITIONS is not set -# CONFIG_MTD_CONCAT is not set -# CONFIG_MTD_REDBOOT_PARTS is not set -# CONFIG_MTD_CMDLINE_PARTS is not set - -# -# User Modules And Translation Layers -# -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLOCK=y -# CONFIG_FTL is not set -# CONFIG_NFTL is not set - -# -# RAM/ROM/Flash chip drivers -# -# CONFIG_MTD_CFI is not set -# CONFIG_MTD_JEDECPROBE is not set -# CONFIG_MTD_GEN_PROBE is not set -# CONFIG_MTD_CFI_INTELEXT is not set -# CONFIG_MTD_CFI_AMDSTD is not set -# CONFIG_MTD_CFI_STAA is not set -# CONFIG_MTD_RAM is not set -# CONFIG_MTD_ROM is not set -# CONFIG_MTD_ABSENT is not set -# CONFIG_MTD_OBSOLETE_CHIPS is not set -# CONFIG_MTD_AMDSTD is not set -# CONFIG_MTD_SHARP is not set -# CONFIG_MTD_JEDEC is not set - -# -# Mapping drivers for chip access -# -# CONFIG_MTD_PHYSMAP is not set -# CONFIG_MTD_PCI is not set -# CONFIG_MTD_PCMCIA is not set - -# -# Self-contained MTD device drivers -# -# CONFIG_MTD_PMC551 is not set -# CONFIG_MTD_SLRAM is not set -# CONFIG_MTD_MTDRAM is not set -CONFIG_MTD_BLKMTD=y - -# -# Disk-On-Chip Device Drivers -# -# CONFIG_MTD_DOC1000 is not set -# CONFIG_MTD_DOC2000 is not set -# CONFIG_MTD_DOC2001 is not set -# CONFIG_MTD_DOCPROBE is not set - -# -# NAND Flash Device Drivers -# -# CONFIG_MTD_NAND is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y - -# -# Kernel hacking -# -# CONFIG_DEBUG_SLAB is not set -CONFIG_DEBUGSYM=y -CONFIG_PT_PROXY=y -# CONFIG_GPROF is not set -# CONFIG_GCOV is not set diff --git a/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch b/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch index f3067fa..ceaaa20 100644 --- a/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch +++ b/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch @@ -188,3204 +188,1619 @@ Kernel Hacking menu. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton ---- - - 25-akpm/Documentation/i386/kgdb/andthen | 100 + - 25-akpm/Documentation/i386/kgdb/debug-nmi.txt | 37 - 25-akpm/Documentation/i386/kgdb/gdb-globals.txt | 71 - 25-akpm/Documentation/i386/kgdb/gdbinit | 14 - 25-akpm/Documentation/i386/kgdb/gdbinit-modules | 146 + - 25-akpm/Documentation/i386/kgdb/gdbinit.hw | 117 + - 25-akpm/Documentation/i386/kgdb/kgdb.txt | 775 +++++++ - 25-akpm/Documentation/i386/kgdb/loadmodule.sh | 78 - 25-akpm/MAINTAINERS | 6 - 25-akpm/arch/i386/Kconfig | 8 - 25-akpm/arch/i386/Kconfig.debug | 2 - 25-akpm/arch/i386/Kconfig.kgdb | 175 + - 25-akpm/arch/i386/Makefile | 3 - 25-akpm/arch/i386/kernel/Makefile | 1 - 25-akpm/arch/i386/kernel/entry.S | 29 - 25-akpm/arch/i386/kernel/kgdb_stub.c | 2330 ++++++++++++++++++++++++ - 25-akpm/arch/i386/kernel/nmi.c | 25 - 25-akpm/arch/i386/kernel/smp.c | 12 - 25-akpm/arch/i386/kernel/traps.c | 77 - 25-akpm/arch/i386/lib/Makefile | 1 - 25-akpm/arch/i386/lib/kgdb_serial.c | 485 ++++ - 25-akpm/arch/i386/mm/fault.c | 6 - 25-akpm/arch/x86_64/boot/compressed/head.S | 1 - 25-akpm/arch/x86_64/boot/compressed/misc.c | 1 - 25-akpm/drivers/char/keyboard.c | 3 - 25-akpm/drivers/char/sysrq.c | 23 - 25-akpm/drivers/serial/8250.c | 40 - 25-akpm/drivers/serial/serial_core.c | 5 - 25-akpm/include/asm-i386/bugs.h | 21 - 25-akpm/include/asm-i386/kgdb.h | 59 - 25-akpm/include/asm-i386/kgdb_local.h | 102 + - 25-akpm/include/linux/config.h | 3 - 25-akpm/include/linux/dwarf2-lang.h | 132 + - 25-akpm/include/linux/dwarf2.h | 738 +++++++ - 25-akpm/include/linux/serial_core.h | 4 - 25-akpm/include/linux/spinlock.h | 12 - 25-akpm/kernel/pid.c | 6 - 25-akpm/kernel/sched.c | 7 - 38 files changed, 5645 insertions(+), 10 deletions(-) - -diff -puN arch/i386/Kconfig~kgdb-ga arch/i386/Kconfig ---- 25/arch/i386/Kconfig~kgdb-ga 2004-10-21 14:54:15.256604136 -0700 -+++ 25-akpm/arch/i386/Kconfig 2004-10-21 14:54:15.295598208 -0700 -@@ -1184,6 +1184,14 @@ menu "Executable file formats" - - source "fs/Kconfig.binfmt" - -+config TRAP_BAD_SYSCALL_EXITS -+ bool "Debug bad system call exits" -+ depends on KGDB -+ help -+ If you say Y here the kernel will check for system calls which -+ return without clearing preempt. -+ default n +Index: linux/Documentation/i386/kgdb/andthen +=================================================================== +--- linux.orig/Documentation/i386/kgdb/andthen ++++ linux/Documentation/i386/kgdb/andthen +@@ -0,0 +1,100 @@ + - endmenu - - source "drivers/Kconfig" -diff -puN arch/i386/kernel/entry.S~kgdb-ga arch/i386/kernel/entry.S ---- 25/arch/i386/kernel/entry.S~kgdb-ga 2004-10-21 14:54:15.257603984 -0700 -+++ 25-akpm/arch/i386/kernel/entry.S 2004-10-21 14:54:15.296598056 -0700 -@@ -48,6 +48,18 @@ - #include - #include - #include "irq_vectors.h" -+ /* We do not recover from a stack overflow, but at least -+ * we know it happened and should be able to track it down. -+ */ -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#define STACK_OVERFLOW_TEST \ -+ testl $(THREAD_SIZE - 512),%esp; \ -+ jnz 10f; \ -+ call stack_overflow; \ -+10: -+#else -+#define STACK_OVERFLOW_TEST -+#endif - - #define nr_syscalls ((syscall_table_size)/4) - -@@ -94,7 +106,8 @@ VM_MASK = 0x00020000 - pushl %ebx; \ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ -- movl %edx, %es; -+ movl %edx, %es; \ -+ STACK_OVERFLOW_TEST - - #define RESTORE_INT_REGS \ - popl %ebx; \ -@@ -198,6 +211,7 @@ need_resched: - # sysenter call handler stub - ENTRY(sysenter_entry) - movl TSS_sysenter_esp0(%esp),%esp -+ .globl sysenter_past_esp - sysenter_past_esp: - sti - pushl $(__USER_DS) -@@ -260,6 +274,19 @@ syscall_exit: - testw $_TIF_ALLWORK_MASK, %cx # current->work - jne syscall_exit_work - restore_all: -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS -+ movb CS(%esp), %al -+ testl $(VM_MASK | 3), %eax -+ jz resume_kernelX # returning to kernel or vm86-space ++define set_andthen ++ set var $thp=0 ++ set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] ++ set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) ++ set var $at_oc=kgdb_and_then_count ++ set var $at_cc=$at_oc ++end + -+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? -+ jz resume_kernelX ++define andthen_next ++ set var $at_cc=$arg0 ++end + -+ int $3 ++define andthen ++ andthen_set_edge ++ if ($at_cc >= $at_oc) ++ printf "Outside window. Window size is %d\n",($at_oc-$at_low) ++ else ++ printf "%d: ",$at_cc ++ output *($thp+($at_cc++ % $at_size )) ++ printf "\n" ++ end ++end ++define andthen_set_edge ++ set var $at_oc=kgdb_and_then_count ++ set var $at_low = $at_oc - $at_size ++ if ($at_low < 0 ) ++ set var $at_low = 0 ++ end ++ if (( $at_cc > $at_oc) || ($at_cc < $at_low)) ++ printf "Count outside of window, setting count to " ++ if ($at_cc >= $at_oc) ++ set var $at_cc = $at_oc ++ else ++ set var $at_cc = $at_low ++ end ++ printf "%d\n",$at_cc ++ end ++end + -+resume_kernelX: -+#endif - RESTORE_ALL - - # perform work that needs to be done immediately before resumption -diff -puN /dev/null arch/i386/kernel/kgdb_stub.c ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/kgdb_stub.c 2004-10-21 14:54:15.307596384 -0700 -@@ -0,0 +1,2330 @@ -+/* -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2, or (at your option) any -+ * later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ */ ++define beforethat ++ andthen_set_edge ++ if ($at_cc <= $at_low) ++ printf "Outside window. Window size is %d\n",($at_oc-$at_low) ++ else ++ printf "%d: ",$at_cc-1 ++ output *($thp+(--$at_cc % $at_size )) ++ printf "\n" ++ end ++end + -+/* -+ * Copyright (c) 2000 VERITAS Software Corporation. -+ * -+ */ -+/**************************************************************************** -+ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ -+ * -+ * Module name: remcom.c $ -+ * Revision: 1.34 $ -+ * Date: 91/03/09 12:29:49 $ -+ * Contributor: Lake Stevens Instrument Division$ -+ * -+ * Description: low level support for gdb debugger. $ -+ * -+ * Considerations: only works on target hardware $ -+ * -+ * Written by: Glenn Engel $ -+ * Updated by: David Grothe -+ * ModuleState: Experimental $ -+ * -+ * NOTES: See Below $ -+ * -+ * Modified for 386 by Jim Kingdon, Cygnus Support. -+ * Compatibility with 2.1.xx kernel by David Grothe -+ * -+ * Changes to allow auto initilization. All that is needed is that it -+ * be linked with the kernel and a break point (int 3) be executed. -+ * The header file defines BREAKPOINT to allow one to do -+ * this. It should also be possible, once the interrupt system is up, to -+ * call putDebugChar("+"). Once this is done, the remote debugger should -+ * get our attention by sending a ^C in a packet. George Anzinger -+ * -+ * Integrated into 2.2.5 kernel by Tigran Aivazian -+ * Added thread support, support for multiple processors, -+ * support for ia-32(x86) hardware debugging. -+ * Amit S. Kale ( akale@veritas.com ) -+ * -+ * -+ * To enable debugger support, two things need to happen. One, a -+ * call to set_debug_traps() is necessary in order to allow any breakpoints -+ * or error conditions to be properly intercepted and reported to gdb. -+ * Two, a breakpoint needs to be generated to begin communication. This -+ * is most easily accomplished by a call to breakpoint(). Breakpoint() -+ * simulates a breakpoint by executing an int 3. -+ * -+ ************* -+ * -+ * The following gdb commands are supported: -+ * -+ * command function Return value -+ * -+ * g return the value of the CPU registers hex data or ENN -+ * G set the value of the CPU registers OK or ENN -+ * -+ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN -+ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN -+ * -+ * c Resume at current address SNN ( signal NN) -+ * cAA..AA Continue at address AA..AA SNN -+ * -+ * s Step one instruction SNN -+ * sAA..AA Step one instruction from AA..AA SNN -+ * -+ * k kill -+ * -+ * ? What was the last sigval ? SNN (signal NN) -+ * -+ * All commands and responses are sent with a packet which includes a -+ * checksum. A packet consists of -+ * -+ * $#. -+ * -+ * where -+ * :: -+ * :: < two hex digits computed as modulo 256 sum of > -+ * -+ * When a packet is received, it is first acknowledged with either '+' or '-'. -+ * '+' indicates a successful transfer. '-' indicates a failed transfer. -+ * -+ * Example: -+ * -+ * Host: Reply: -+ * $m0,10#2a +$00010203040506070809101112131415#42 -+ * -+ ****************************************************************************/ -+#define KGDB_VERSION "<20030915.1651.33>" -+#include -+#include -+#include /* for strcpy */ -+#include -+#include -+#include -+#include -+#include /* for linux pt_regs struct */ -+#include -+#include -+#include -+#include -+#include -+#include ++document andthen_next ++ andthen_next ++ . sets the number of the event to display next. If this event ++ . is not in the event pool, either andthen or beforethat will ++ . correct it to the nearest event pool edge. The event pool ++ . ends at the last event recorded and begins ++ . prior to that. If beforethat is used next, it will display ++ . event -1. ++. ++ andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end + -+/************************************************************************ -+ * -+ * external low-level support routines -+ */ -+typedef void (*Function) (void); /* pointer to a function */ + -+/* Thread reference */ -+typedef unsigned char threadref[8]; ++document andthen ++ andthen ++. displays the next event in the list. sets up to display ++. the oldest saved event first. ++. (optional) count of the event to display. ++. note the number of events saved is specified at configure time. ++. if events are saved between calls to andthen the index will change ++. but the displayed event will be the next one (unless the event buffer ++. is overrun). ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end + -+extern void putDebugChar(int); /* write a single character */ -+extern int getDebugChar(void); /* read and return a single char */ ++document set_andthen ++ set_andthen ++. sets up to use the and commands. ++. if you have defined your own struct, use the above and ++. then enter the following: ++. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] ++. where is the name of your structure. ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end + -+/************************************************************************/ -+/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ -+/* at least NUMREGBYTES*2 are needed for register packets */ -+/* Longer buffer is needed to list all threads */ -+#define BUFMAX 400 ++document beforethat ++ beforethat ++. displays the next prior event in the list. sets up to ++. display the last occuring event first. ++. ++. note the number of events saved is specified at configure time. ++. if events are saved between calls to beforethat the index will change ++. but the displayed event will be the next one (unless the event buffer ++. is overrun). ++. ++. andthen commands are: set_andthen, andthen_next, andthen and beforethat ++end +Index: linux/Documentation/i386/kgdb/debug-nmi.txt +=================================================================== +--- linux.orig/Documentation/i386/kgdb/debug-nmi.txt ++++ linux/Documentation/i386/kgdb/debug-nmi.txt +@@ -0,0 +1,37 @@ ++Subject: Debugging with NMI ++Date: Mon, 12 Jul 1999 11:28:31 -0500 ++From: David Grothe ++Organization: Gcom, Inc ++To: David Grothe + -+char *kgdb_version = KGDB_VERSION; ++Kernel hackers: + -+/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ -+int debug_regs = 0; /* set to non-zero to print registers */ ++Maybe this is old hat, but it is new to me -- + -+/* filled in by an external module */ -+char *gdb_module_offsets; ++On an ISA bus machine, if you short out the A1 and B1 pins of an ISA ++slot you will generate an NMI to the CPU. This interrupts even a ++machine that is hung in a loop with interrupts disabled. Used in ++conjunction with kgdb < ++ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can ++gain debugger control of a machine that is hung in the kernel! Even ++without kgdb the kernel will print a stack trace so you can find out ++where it was hung. + -+static const char hexchars[] = "0123456789abcdef"; ++The A1/B1 pins are directly opposite one another and the farthest pins ++towards the bracket end of the ISA bus socket. You can stick a paper ++clip or multi-meter probe between them to short them out. + -+/* Number of bytes of registers. */ -+#define NUMREGBYTES 64 -+/* -+ * Note that this register image is in a different order than -+ * the register image that Linux produces at interrupt time. -+ * -+ * Linux's register image is defined by struct pt_regs in ptrace.h. -+ * Just why GDB uses a different order is a historical mystery. -+ */ -+enum regnames { _EAX, /* 0 */ -+ _ECX, /* 1 */ -+ _EDX, /* 2 */ -+ _EBX, /* 3 */ -+ _ESP, /* 4 */ -+ _EBP, /* 5 */ -+ _ESI, /* 6 */ -+ _EDI, /* 7 */ -+ _PC /* 8 also known as eip */ , -+ _PS /* 9 also known as eflags */ , -+ _CS, /* 10 */ -+ _SS, /* 11 */ -+ _DS, /* 12 */ -+ _ES, /* 13 */ -+ _FS, /* 14 */ -+ _GS /* 15 */ -+}; ++I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the ++board consists of two rows of wire wrap pins. So I wired a push button ++between the A1/B1 pins and now have an ISA board that I can stick into ++any ISA bus slot for debugger entry. + -+/*************************** ASSEMBLY CODE MACROS *************************/ -+/* -+ * Put the error code here just in case the user cares. -+ * Likewise, the vector number here (since GDB only gets the signal -+ * number through the usual means, and that's not very specific). -+ * The called_from is the return address so he can tell how we entered kgdb. -+ * This will allow him to seperate out the various possible entries. -+ */ -+#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ ++Microsoft has a circuit diagram of a PCI card at ++http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to ++build one you will have to mail them and ask for the PAL equations. ++Nobody makes one comercially. + -+#define PID_MAX PID_MAX_DEFAULT ++[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if ++your machine catches fire, it is your problem, not mine.] + -+#ifdef CONFIG_SMP -+void smp_send_nmi_allbutself(void); -+#define IF_SMP(x) x -+#undef MAX_NO_CPUS -+#ifndef CONFIG_NO_KGDB_CPUS -+#define CONFIG_NO_KGDB_CPUS 2 -+#endif -+#if CONFIG_NO_KGDB_CPUS > NR_CPUS -+#define MAX_NO_CPUS NR_CPUS -+#else -+#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS -+#endif -+#define hold_init hold_on_sstep: 1, -+#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) -+#define NUM_CPUS num_online_cpus() -+#else -+#define IF_SMP(x) -+#define hold_init -+#undef MAX_NO_CPUS -+#define MAX_NO_CPUS 1 -+#define NUM_CPUS 1 -+#endif -+#define NOCPU (struct task_struct *)0xbad1fbad -+/* *INDENT-OFF* */ -+struct kgdb_info { -+ int used_malloc; -+ void *called_from; -+ long long entry_tsc; -+ int errcode; -+ int vector; -+ int print_debug_info; -+#ifdef CONFIG_SMP -+ int hold_on_sstep; -+ struct { -+ volatile struct task_struct *task; -+ int pid; -+ int hold; -+ struct pt_regs *regs; -+ } cpus_waiting[MAX_NO_CPUS]; -+#endif -+} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; ++-- Dave (the kgdb guy) +Index: linux/Documentation/i386/kgdb/gdb-globals.txt +=================================================================== +--- linux.orig/Documentation/i386/kgdb/gdb-globals.txt ++++ linux/Documentation/i386/kgdb/gdb-globals.txt +@@ -0,0 +1,71 @@ ++Sender: akale@veritas.com ++Date: Fri, 23 Jun 2000 19:26:35 +0530 ++From: "Amit S. Kale" ++Organization: Veritas Software (India) ++To: Dave Grothe , linux-kernel@vger.rutgers.edu ++CC: David Milburn , ++ "Edouard G. Parmelan" , ++ ezannoni@cygnus.com, Keith Owens ++Subject: Re: Module debugging using kgdb + -+/* *INDENT-ON* */ ++Dave Grothe wrote: ++> ++> Amit: ++> ++> There is a 2.4.0 version of kgdb on our ftp site: ++> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb ++> and loadmodule.sh there. ++> ++> Have a look at the README file and see if I go it right. If not, send ++> me some corrections and I will update it. ++> ++> Does your version of gdb solve the global variable problem? + -+#define used_m kgdb_info.used_malloc -+/* -+ * This is little area we set aside to contain the stack we -+ * need to build to allow gdb to call functions. We use one -+ * per cpu to avoid locking issues. We will do all this work -+ * with interrupts off so that should take care of the protection -+ * issues. -+ */ -+#define LOOKASIDE_SIZE 200 /* should be more than enough */ -+#define MALLOC_MAX 200 /* Max malloc size */ -+struct { -+ unsigned int esp; -+ int array[LOOKASIDE_SIZE]; -+} fn_call_lookaside[MAX_NO_CPUS]; ++Yes. ++Thanks to Elena Zanoni, gdb (developement version) can now calculate ++correctly addresses of dynamically loaded object files. I have not been ++following gdb developement for sometime and am not sure when symbol ++address calculation fix is going to appear in a gdb stable version. + -+static int trap_cpu; -+static unsigned int OLD_esp; ++Elena, any idea when the fix will make it to a prebuilt gdb from a ++redhat release? + -+#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] -+#define IF_BIT 0x200 -+#define TF_BIT 0x100 ++For the time being I have built a gdb developement version. It can be ++used for module debugging with loadmodule.sh script. + -+#define MALLOC_ROUND 8-1 ++The problem with calculating of module addresses with previous versions ++of gdb was as follows: ++gdb did not use base address of a section while calculating address of ++a symbol in the section in an object file loaded via 'add-symbol-file'. ++It used address of .text segment instead. Due to this addresses of ++symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. + -+static char malloc_array[MALLOC_MAX]; -+IF_SMP(static void to_gdb(const char *mess)); -+void * -+malloc(int size) -+{ ++Above mentioned fix allow gdb to use base address of a segment while ++calculating address of a symbol in it. It adds a parameter '-s' to ++'add-symbol-file' command for specifying base address of a segment. + -+ if (size <= (MALLOC_MAX - used_m)) { -+ int old_used = used_m; -+ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); -+ return &malloc_array[old_used]; -+ } else { -+ return NULL; -+ } -+} ++loadmodule.sh script works as follows. + -+/* -+ * Gdb calls functions by pushing agruments, including a return address -+ * on the stack and the adjusting EIP to point to the function. The -+ * whole assumption in GDB is that we are on a different stack than the -+ * one the "user" i.e. code that hit the break point, is on. This, of -+ * course is not true in the kernel. Thus various dodges are needed to -+ * do the call without directly messing with EIP (which we can not change -+ * as it is just a location and not a register. To adjust it would then -+ * require that we move every thing below EIP up or down as needed. This -+ * will not work as we may well have stack relative pointer on the stack -+ * (such as the pointer to regs, for example). ++1. Copy a module file to target machine. ++2. Load the module on the target machine using insmod with -m parameter. ++insmod produces a module load map which contains base addresses of all ++sections in the module and addresses of symbols in the module file. ++3. Find all sections and their base addresses in the module from ++the module map. ++4. Generate a script that loads the module file. The script uses ++'add-symbol-file' and specifies address of text segment followed by ++addresses of all segments in the module. + -+ * So here is what we do: -+ * We detect gdb attempting to store into the stack area and instead, store -+ * into the fn_call_lookaside.array at the same relative location as if it -+ * were the area ESP pointed at. We also trap ESP modifications -+ * and uses these to adjust fn_call_lookaside.esp. On entry -+ * fn_call_lookaside.esp will be set to point at the last entry in -+ * fn_call_lookaside.array. This allows us to check if it has changed, and -+ * if so, on exit, we add the registers we will use to do the move and a -+ * trap/ interrupt return exit sequence. We then adjust the eflags in the -+ * regs array (remember we now have a copy in the fn_call_lookaside.array) to -+ * kill the interrupt bit, AND we change EIP to point at our set up stub. -+ * As part of the register set up we preset the registers to point at the -+ * begining and end of the fn_call_lookaside.array, so all the stub needs to -+ * do is move words from the array to the stack until ESP= the desired value -+ * then do the rti. This will then transfer to the desired function with -+ * all the correct registers. Nifty huh? -+ */ -+extern asmlinkage void fn_call_stub(void); -+extern asmlinkage void fn_rtn_stub(void); -+/* *INDENT-OFF* */ -+__asm__("fn_rtn_stub:\n\t" -+ "movl %eax,%esp\n\t" -+ "fn_call_stub:\n\t" -+ "1:\n\t" -+ "addl $-4,%ebx\n\t" -+ "movl (%ebx), %eax\n\t" -+ "pushl %eax\n\t" -+ "cmpl %esp,%ecx\n\t" -+ "jne 1b\n\t" -+ "popl %eax\n\t" -+ "popl %ebx\n\t" -+ "popl %ecx\n\t" -+ "iret \n\t"); -+/* *INDENT-ON* */ -+#define gdb_i386vector kgdb_info.vector -+#define gdb_i386errcode kgdb_info.errcode -+#define waiting_cpus kgdb_info.cpus_waiting -+#define remote_debug kgdb_info.print_debug_info -+#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold -+/* gdb locks */ -+ -+#ifdef CONFIG_SMP -+static int in_kgdb_called; -+static spinlock_t waitlocks[MAX_NO_CPUS] = -+ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; -+/* -+ * The following array has the thread pointer of each of the "other" -+ * cpus. We make it global so it can be seen by gdb. -+ */ -+volatile int in_kgdb_entry_log[MAX_NO_CPUS]; -+volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; -+/* -+static spinlock_t continuelocks[MAX_NO_CPUS]; -+*/ -+spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; -+/* waiters on our spinlock plus us */ -+static atomic_t spinlock_waiters = ATOMIC_INIT(1); -+static int spinlock_count = 0; -+static int spinlock_cpu = 0; -+/* -+ * Note we use nested spin locks to account for the case where a break -+ * point is encountered when calling a function by user direction from -+ * kgdb. Also there is the memory exception recursion to account for. -+ * Well, yes, but this lets other cpus thru too. Lets add a -+ * cpu id to the lock. -+ */ -+#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ -+ spinlock_cpu != smp_processor_id()){\ -+ atomic_inc(&spinlock_waiters); \ -+ while (! spin_trylock(x)) {\ -+ in_kgdb(®s);\ -+ }\ -+ atomic_dec(&spinlock_waiters); \ -+ spinlock_count = 1; \ -+ spinlock_cpu = smp_processor_id(); \ -+ }else{ \ -+ spinlock_count++; \ -+ } -+#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) -+#else -+unsigned kgdb_spinlock = 0; -+#define KGDB_SPIN_LOCK(x) --*x -+#define KGDB_SPIN_UNLOCK(x) ++*x -+#endif -+ -+int -+hex(char ch) -+{ -+ if ((ch >= 'a') && (ch <= 'f')) -+ return (ch - 'a' + 10); -+ if ((ch >= '0') && (ch <= '9')) -+ return (ch - '0'); -+ if ((ch >= 'A') && (ch <= 'F')) -+ return (ch - 'A' + 10); -+ return (-1); -+} -+ -+/* scan for the sequence $# */ -+void -+getpacket(char *buffer) -+{ -+ unsigned char checksum; -+ unsigned char xmitcsum; -+ int i; -+ int count; -+ char ch; -+ -+ do { -+ /* wait around for the start character, ignore all other characters */ -+ while ((ch = (getDebugChar() & 0x7f)) != '$') ; -+ checksum = 0; -+ xmitcsum = -1; -+ -+ count = 0; -+ -+ /* now, read until a # or end of buffer is found */ -+ while (count < BUFMAX) { -+ ch = getDebugChar() & 0x7f; -+ if (ch == '#') -+ break; -+ checksum = checksum + ch; -+ buffer[count] = ch; -+ count = count + 1; -+ } -+ buffer[count] = 0; -+ -+ if (ch == '#') { -+ xmitcsum = hex(getDebugChar() & 0x7f) << 4; -+ xmitcsum += hex(getDebugChar() & 0x7f); -+ if ((remote_debug) && (checksum != xmitcsum)) { -+ printk -+ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", -+ checksum, xmitcsum, buffer); -+ } -+ -+ if (checksum != xmitcsum) -+ putDebugChar('-'); /* failed checksum */ -+ else { -+ putDebugChar('+'); /* successful transfer */ -+ /* if a sequence char is present, reply the sequence ID */ -+ if (buffer[2] == ':') { -+ putDebugChar(buffer[0]); -+ putDebugChar(buffer[1]); -+ /* remove sequence chars from buffer */ -+ count = strlen(buffer); -+ for (i = 3; i <= count; i++) -+ buffer[i - 3] = buffer[i]; -+ } -+ } -+ } -+ } while (checksum != xmitcsum); -+ -+ if (remote_debug) -+ printk("R:%s\n", buffer); -+} -+ -+/* send the packet in buffer. */ -+ -+void -+putpacket(char *buffer) -+{ -+ unsigned char checksum; -+ int count; -+ char ch; -+ -+ /* $#. */ -+ do { -+ if (remote_debug) -+ printk("T:%s\n", buffer); -+ putDebugChar('$'); -+ checksum = 0; -+ count = 0; -+ -+ while ((ch = buffer[count])) { -+ putDebugChar(ch); -+ checksum += ch; -+ count += 1; -+ } -+ -+ putDebugChar('#'); -+ putDebugChar(hexchars[checksum >> 4]); -+ putDebugChar(hexchars[checksum % 16]); -+ -+ } while ((getDebugChar() & 0x7f) != '+'); -+ -+} -+ -+static char remcomInBuffer[BUFMAX]; -+static char remcomOutBuffer[BUFMAX]; -+static short error; -+ -+void -+debug_error(char *format, char *parm) -+{ -+ if (remote_debug) -+ printk(format, parm); -+} -+ -+static void -+print_regs(struct pt_regs *regs) -+{ -+ printk("EAX=%08lx ", regs->eax); -+ printk("EBX=%08lx ", regs->ebx); -+ printk("ECX=%08lx ", regs->ecx); -+ printk("EDX=%08lx ", regs->edx); -+ printk("\n"); -+ printk("ESI=%08lx ", regs->esi); -+ printk("EDI=%08lx ", regs->edi); -+ printk("EBP=%08lx ", regs->ebp); -+ printk("ESP=%08lx ", (long) ®s->esp); -+ printk("\n"); -+ printk(" DS=%08x ", regs->xds); -+ printk(" ES=%08x ", regs->xes); -+ printk(" SS=%08x ", __KERNEL_DS); -+ printk(" FL=%08lx ", regs->eflags); -+ printk("\n"); -+ printk(" CS=%08x ", regs->xcs); -+ printk(" IP=%08lx ", regs->eip); -+#if 0 -+ printk(" FS=%08x ", regs->fs); -+ printk(" GS=%08x ", regs->gs); -+#endif -+ printk("\n"); -+ -+} /* print_regs */ -+ -+#define NEW_esp fn_call_lookaside[trap_cpu].esp -+ -+static void -+regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ gdb_regs[_EAX] = regs->eax; -+ gdb_regs[_EBX] = regs->ebx; -+ gdb_regs[_ECX] = regs->ecx; -+ gdb_regs[_EDX] = regs->edx; -+ gdb_regs[_ESI] = regs->esi; -+ gdb_regs[_EDI] = regs->edi; -+ gdb_regs[_EBP] = regs->ebp; -+ gdb_regs[_DS] = regs->xds; -+ gdb_regs[_ES] = regs->xes; -+ gdb_regs[_PS] = regs->eflags; -+ gdb_regs[_CS] = regs->xcs; -+ gdb_regs[_PC] = regs->eip; -+ /* Note, as we are a debugging the kernel, we will always -+ * trap in kernel code, this means no priviledge change, -+ * and so the pt_regs structure is not completely valid. In a non -+ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, -+ * SS and ESP are not stacked, this means that the last 2 elements of -+ * pt_regs is not valid (they would normally refer to the user stack) -+ * also, using regs+1 is no good because you end up will a value that is -+ * 2 longs (8) too high. This used to cause stepping over functions -+ * to fail, so my fix is to use the address of regs->esp, which -+ * should point at the end of the stack frame. Note I have ignored -+ * completely exceptions that cause an error code to be stacked, such -+ * as double fault. Stuart Hughes, Zentropix. -+ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; -+ -+ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). -+ */ -+ gdb_regs[_ESP] = NEW_esp; -+ gdb_regs[_SS] = __KERNEL_DS; -+ gdb_regs[_FS] = 0xFFFF; -+ gdb_regs[_GS] = 0xFFFF; -+} /* regs_to_gdb_regs */ -+ -+static void -+gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ regs->eax = gdb_regs[_EAX]; -+ regs->ebx = gdb_regs[_EBX]; -+ regs->ecx = gdb_regs[_ECX]; -+ regs->edx = gdb_regs[_EDX]; -+ regs->esi = gdb_regs[_ESI]; -+ regs->edi = gdb_regs[_EDI]; -+ regs->ebp = gdb_regs[_EBP]; -+ regs->xds = gdb_regs[_DS]; -+ regs->xes = gdb_regs[_ES]; -+ regs->eflags = gdb_regs[_PS]; -+ regs->xcs = gdb_regs[_CS]; -+ regs->eip = gdb_regs[_PC]; -+ NEW_esp = gdb_regs[_ESP]; /* keep the value */ -+#if 0 /* can't change these */ -+ regs->esp = gdb_regs[_ESP]; -+ regs->xss = gdb_regs[_SS]; -+ regs->fs = gdb_regs[_FS]; -+ regs->gs = gdb_regs[_GS]; -+#endif -+ -+} /* gdb_regs_to_regs */ -+ -+int thread_list = 0; -+ -+void -+get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) -+{ -+ unsigned long stack_page; -+ int count = 0; -+ IF_SMP(int i); -+ if (!p || p == current) { -+ regs_to_gdb_regs(gdb_regs, regs); -+ return; -+ } -+#ifdef CONFIG_SMP -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (p == kgdb_info.cpus_waiting[i].task) { -+ regs_to_gdb_regs(gdb_regs, -+ kgdb_info.cpus_waiting[i].regs); -+ gdb_regs[_ESP] = -+ (int) &kgdb_info.cpus_waiting[i].regs->esp; -+ -+ return; -+ } -+ } -+#endif -+ memset(gdb_regs, 0, NUMREGBYTES); -+ gdb_regs[_ESP] = p->thread.esp; -+ gdb_regs[_PC] = p->thread.eip; -+ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; -+ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); -+ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); -+ -+/* -+ * This code is to give a more informative notion of where a process -+ * is waiting. It is used only when the user asks for a thread info -+ * list. If he then switches to the thread, s/he will find the task -+ * is in schedule, but a back trace should show the same info we come -+ * up with. This code was shamelessly purloined from process.c. It was -+ * then enhanced to provide more registers than simply the program -+ * counter. -+ */ -+ -+ if (!thread_list) { -+ return; -+ } -+ -+ if (p->state == TASK_RUNNING) -+ return; -+ stack_page = (unsigned long) p->thread_info; -+ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > -+ THREAD_SIZE - sizeof(long) + stack_page) -+ return; -+ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ -+ do { -+ if (gdb_regs[_EBP] < stack_page || -+ gdb_regs[_EBP] > THREAD_SIZE - 2*sizeof(long) + stack_page) -+ return; -+ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); -+ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; -+ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; -+ if (!in_sched_functions(gdb_regs[_PC])) -+ return; -+ } while (count++ < 16); -+ return; -+} -+ -+/* Indicate to caller of mem2hex or hex2mem that there has been an -+ error. */ -+static volatile int mem_err = 0; -+static volatile int mem_err_expected = 0; -+static volatile int mem_err_cnt = 0; -+static int garbage_loc = -1; -+ -+int -+get_char(char *addr) -+{ -+ return *addr; -+} -+ -+void -+set_char(char *addr, int val, int may_fault) -+{ -+ /* -+ * This code traps references to the area mapped to the kernel -+ * stack as given by the regs and, instead, stores to the -+ * fn_call_lookaside[cpu].array -+ */ -+ if (may_fault && -+ (unsigned int) addr < OLD_esp && -+ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { -+ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); -+ } -+ *addr = val; -+} -+ -+/* convert the memory pointed to by mem into hex, placing result in buf */ -+/* return a pointer to the last char put in buf (null) */ -+/* If MAY_FAULT is non-zero, then we should set mem_err in response to -+ a fault; if zero treat a fault like any other fault in the stub. */ -+char * -+mem2hex(char *mem, char *buf, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; -+ -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ /* printk("%lx = ", mem) ; */ -+ -+ ch = get_char(mem++); -+ -+ /* printk("%02x\n", ch & 0xFF) ; */ -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault fetching from addr %lx\n", -+ (long) (mem - 1)); -+ *buf = 0; /* truncate buffer */ -+ return (buf); -+ } -+ *buf++ = hexchars[ch >> 4]; -+ *buf++ = hexchars[ch % 16]; -+ } -+ *buf = 0; -+ if (may_fault) -+ mem_err_expected = 0; -+ return (buf); -+} -+ -+/* convert the hex array pointed to by buf into binary to be placed in mem */ -+/* return a pointer to the character AFTER the last byte written */ -+/* NOTE: We use the may fault flag to also indicate if the write is to -+ * the registers (0) or "other" memory (!=0) -+ */ -+char * -+hex2mem(char *buf, char *mem, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; ++Here is an example gdb script produced by loadmodule.sh script. + -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ ch = hex(*buf++) << 4; -+ ch = ch + hex(*buf++); -+ set_char(mem++, ch, may_fault); ++add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 ++-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 ++-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 + -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault storing to addr %lx\n", -+ (long) (mem - 1)); -+ return (mem); -+ } -+ } -+ if (may_fault) -+ mem_err_expected = 0; -+ return (mem); -+} ++With this command gdb can calculate addresses of symbols in ANY segment ++in a module file. + -+/**********************************************/ -+/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ -+/* RETURN NUMBER OF CHARS PROCESSED */ -+/**********************************************/ -+int -+hexToInt(char **ptr, int *intValue) -+{ -+ int numChars = 0; -+ int hexValue; ++Regards. ++-- ++Amit Kale ++Veritas Software ( http://www.veritas.com ) +Index: linux/Documentation/i386/kgdb/gdbinit +=================================================================== +--- linux.orig/Documentation/i386/kgdb/gdbinit ++++ linux/Documentation/i386/kgdb/gdbinit +@@ -0,0 +1,14 @@ ++shell echo -e "\003" >/dev/ttyS0 ++set remotebaud 38400 ++target remote /dev/ttyS0 ++define si ++stepi ++printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx ++printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp ++x/i $eip ++end ++define ni ++nexti ++printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx ++printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp ++x/i $eip +Index: linux/Documentation/i386/kgdb/gdbinit-modules +=================================================================== +--- linux.orig/Documentation/i386/kgdb/gdbinit-modules ++++ linux/Documentation/i386/kgdb/gdbinit-modules +@@ -0,0 +1,149 @@ ++# ++# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. ++# ++# This don't work for Linux-2.0 or older. ++# ++# Author Edouard G. Parmelan ++# ++# ++# Fri Apr 30 20:33:29 CEST 1999 ++# First public release. ++# ++# Major cleanup after experiment Linux-2.0 kernel without success. ++# Symbols of a module are not in the correct order, I can't explain ++# why :( ++# ++# Fri Mar 19 15:41:40 CET 1999 ++# Initial version. ++# ++# Thu Jan 6 16:29:03 CST 2000 ++# A little fixing by Dave Grothe ++# ++# Mon Jun 19 09:33:13 CDT 2000 ++# Alignment changes from Edouard Parmelan ++# ++# The basic idea is to find where insmod load the module and inform ++# GDB to load the symbol table of the module with the GDB command ++# ``add-symbol-file
''. ++# ++# The Linux kernel holds the list of all loaded modules in module_list, ++# this list end with &kernel_module (exactly with module->next == NULL, ++# but the last module is not a real module). ++# ++# Insmod allocates the struct module before the object file. Since ++# Linux-2.1, this structure contain his size. The real address of ++# the object file is then (char*)module + module->size_of_struct. ++# ++# You can use three user functions ``mod-list'', ``mod-print-symbols'' ++# and ``add-module-symbols''. ++# ++# mod-list list all loaded modules with the format: ++# ++# ++# As soon as you have found the address of your module, you can ++# print its exported symbols (mod-print-symbols) or inform GDB to add ++# symbols from your module file (mod-add-symbols). ++# ++# The argument that you give to mod-print-symbols or mod-add-symbols ++# is the from the mod-list command. ++# ++# When using the mod-add-symbols command you must also give the full ++# pathname of the modules object code file. ++# ++# The command mod-add-lis is an example of how to make this easier. ++# You can edit this macro to contain the path name of your own ++# favorite module and then use it as a shorthand to load it. You ++# still need the module-address, however. ++# ++# The internal function ``mod-validate'' set the GDB variable $mod ++# as a ``struct module*'' if the kernel known the module otherwise ++# $mod is set to NULL. This ensure to not add symbols for a wrong ++# address. ++# ++# ++# Sat Feb 12 20:05:47 CET 2005 ++# ++# Adapted to the 2.6.* module data structure. ++# (Getting miffed at gdb for not having "offsetof" in the process :-/ ) ++# ++# Autogenerate add-symbol-file statements from the module list instead ++# of relying on a no-longer-working loadmodule.sh program. ++# ++# Matthias Urlichs ++# ++# ++# Have a nice hacking day ! ++# ++# ++define mod-list ++ set $lmod = modules->next ++ # This is a circular data structure ++ while $lmod != &modules ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ printf "%p\t%s\n", $mod, $mod->name ++ set $lmod = $lmod->next ++ end ++end ++document mod-list ++mod-list ++List all modules in the form: ++Use the as the argument for the other ++mod-commands: mod-print-symbols, mod-add-symbols. ++end + -+ *intValue = 0; ++define mod-list-syms ++ set $lmod = modules->next ++ # This is a circular data structure ++ while $lmod != &modules ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ printf "add-symbol-file %s.ko %p\n", $mod->name, $mod->module_core ++ set $lmod = $lmod->next ++ end ++end ++document mod-list-syms ++mod-list-syms ++List all modules in the form: add-symbol-file ++for adding modules' symbol tables without loadmodule.sh. ++end + -+ while (**ptr) { -+ hexValue = hex(**ptr); -+ if (hexValue >= 0) { -+ *intValue = (*intValue << 4) | hexValue; -+ numChars++; -+ } else -+ break; ++define mod-validate ++ set $lmod = modules->next ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ while ($lmod != &modules) && ($mod != $arg0) ++ set $lmod = $lmod->next ++ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) ++ end ++ if $lmod == &modules ++ set $mod = 0 ++ printf "%p is not a module\n", $arg0 ++ end ++end ++document mod-validate ++mod-validate ++Internal user-command used to validate the module parameter. ++If is a real loaded module, set $mod to it, otherwise set $mod ++to 0. ++end + -+ (*ptr)++; -+ } ++define mod-print-symbols ++ mod-validate $arg0 ++ if $mod != 0 ++ set $i = 0 ++ while $i < $mod->num_syms ++ set $sym = $mod->syms[$i] ++ printf "%p\t%s\n", $sym->value, $sym->name ++ set $i = $i + 1 ++ end ++ set $i = 0 ++ while $i < $mod->num_gpl_syms ++ set $sym = $mod->gpl_syms[$i] ++ printf "%p\t%s\n", $sym->value, $sym->name ++ set $i = $i + 1 ++ end ++ end ++end ++document mod-print-symbols ++mod-print-symbols ++Print all exported symbols of the module. See mod-list ++end + -+ return (numChars); -+} +Index: linux/Documentation/i386/kgdb/gdbinit.hw +=================================================================== +--- linux.orig/Documentation/i386/kgdb/gdbinit.hw ++++ linux/Documentation/i386/kgdb/gdbinit.hw +@@ -0,0 +1,117 @@ + -+#define stubhex(h) hex(h) -+#ifdef old_thread_list ++#Using ia-32 hardware breakpoints. ++# ++#4 hardware breakpoints are available in ia-32 processors. These breakpoints ++#do not need code modification. They are set using debug registers. ++# ++#Each hardware breakpoint can be of one of the ++#three types: execution, write, access. ++#1. An Execution breakpoint is triggered when code at the breakpoint address is ++#executed. ++#2. A write breakpoint ( aka watchpoints ) is triggered when memory location ++#at the breakpoint address is written. ++#3. An access breakpoint is triggered when memory location at the breakpoint ++#address is either read or written. ++# ++#As hardware breakpoints are available in limited number, use software ++#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. ++# ++#Length of an access or a write breakpoint defines length of the datatype to ++#be watched. Length is 1 for char, 2 short , 3 int. ++# ++#For placing execution, write and access breakpoints, use commands ++#hwebrk, hwwbrk, hwabrk ++#To remove a breakpoint use hwrmbrk command. ++# ++#These commands take following types of arguments. For arguments associated ++#with each command, use help command. ++#1. breakpointno: 0 to 3 ++#2. length: 1 to 3 ++#3. address: Memory location in hex ( without 0x ) e.g c015e9bc ++# ++#Use the command exinfo to find which hardware breakpoint occured. + -+static int -+stub_unpack_int(char *buff, int fieldlength) -+{ -+ int nibble; -+ int retval = 0; ++#hwebrk breakpointno address ++define hwebrk ++ maintenance packet Y$arg0,0,0,$arg1 ++end ++document hwebrk ++ hwebrk
++ Places a hardware execution breakpoint ++ = 0 - 3 ++
= Hex digits without leading "0x". ++end + -+ while (fieldlength) { -+ nibble = stubhex(*buff++); -+ retval |= nibble; -+ fieldlength--; -+ if (fieldlength) -+ retval = retval << 4; -+ } -+ return retval; -+} -+#endif -+static char * -+pack_hex_byte(char *pkt, int byte) -+{ -+ *pkt++ = hexchars[(byte >> 4) & 0xf]; -+ *pkt++ = hexchars[(byte & 0xf)]; -+ return pkt; -+} ++#hwwbrk breakpointno length address ++define hwwbrk ++ maintenance packet Y$arg0,1,$arg1,$arg2 ++end ++document hwwbrk ++ hwwbrk
++ Places a hardware write breakpoint ++ = 0 - 3 ++ = 1 (1 byte), 2 (2 byte), 3 (4 byte) ++
= Hex digits without leading "0x". ++end + -+#define BUF_THREAD_ID_SIZE 16 ++#hwabrk breakpointno length address ++define hwabrk ++ maintenance packet Y$arg0,1,$arg1,$arg2 ++end ++document hwabrk ++ hwabrk
++ Places a hardware access breakpoint ++ = 0 - 3 ++ = 1 (1 byte), 2 (2 byte), 3 (4 byte) ++
= Hex digits without leading "0x". ++end + -+static char * -+pack_threadid(char *pkt, threadref * id) -+{ -+ char *limit; -+ unsigned char *altid; ++#hwrmbrk breakpointno ++define hwrmbrk ++ maintenance packet y$arg0 ++end ++document hwrmbrk ++ hwrmbrk ++ = 0 - 3 ++ Removes a hardware breakpoint ++end + -+ altid = (unsigned char *) id; -+ limit = pkt + BUF_THREAD_ID_SIZE; -+ while (pkt < limit) -+ pkt = pack_hex_byte(pkt, *altid++); -+ return pkt; -+} ++define reboot ++ maintenance packet r ++end ++#exinfo ++define exinfo ++ maintenance packet qE ++end ++document exinfo ++ exinfo ++ Gives information about a breakpoint. ++end ++define get_th ++ p $th=(struct thread_info *)((int)$esp & ~8191) ++end ++document get_th ++ get_tu ++ Gets and prints the current thread_info pointer, Defines th to be it. ++end ++define get_cu ++ p $cu=((struct thread_info *)((int)$esp & ~8191))->task ++end ++document get_cu ++ get_cu ++ Gets and print the "current" value. Defines $cu to be it. ++end ++define int_off ++ set var $flags=$eflags ++ set $eflags=$eflags&~0x200 ++ end ++define int_on ++ set var $eflags|=$flags&0x200 ++ end ++document int_off ++ saves the current interrupt state and clears the processor interrupt ++ flag. Use int_on to restore the saved flag. ++end ++document int_on ++ Restores the interrupt flag saved by int_off. ++end +Index: linux/Documentation/i386/kgdb/kgdb.txt +=================================================================== +--- linux.orig/Documentation/i386/kgdb/kgdb.txt ++++ linux/Documentation/i386/kgdb/kgdb.txt +@@ -0,0 +1,775 @@ ++Last edit: <20030806.1637.12> ++This file has information specific to the i386 kgdb option. Other ++platforms with the kgdb option may behave in a similar fashion. + -+#ifdef old_thread_list -+static char * -+unpack_byte(char *buf, int *value) -+{ -+ *value = stub_unpack_int(buf, 2); -+ return buf + 2; -+} ++New features: ++============ ++20030806.1557.37 ++This version was made against the 2.6.0-test2 kernel. We have made the ++following changes: + -+static char * -+unpack_threadid(char *inbuf, threadref * id) -+{ -+ char *altref; -+ char *limit = inbuf + BUF_THREAD_ID_SIZE; -+ int x, y; ++- The getthread() code in the stub calls find_task_by_pid(). It fails ++ if we are early in the bring up such that the pid arrays have yet to ++ be allocated. We have added a line to kernel/pid.c to make ++ "kgdb_pid_init_done" true once the arrays are allocated. This way the ++ getthread() code knows not to call. This is only used by the thread ++ debugging stuff and threads will not yet exist at this point in the ++ boot. + -+ altref = (char *) id; ++- For some reason, gdb was not asking for a new thread list when the ++ "info thread" command was given. We changed to the newer version of ++ the thread info command and gdb now seems to ask when needed. Result, ++ we now get all threads in the thread list. + -+ while (inbuf < limit) { -+ x = stubhex(*inbuf++); -+ y = stubhex(*inbuf++); -+ *altref++ = (x << 4) | y; -+ } -+ return inbuf; -+} -+#endif -+void -+int_to_threadref(threadref * id, int value) -+{ -+ unsigned char *scan; ++- We now respond to the ThreadExtraInfo request from gdb with the thread ++ name from task_struct .comm. This then appears in the thread list. ++ Thoughts on additional options for this are welcome. Things such as ++ "has BKL" and "Preempted" come to mind. I think we could have a flag ++ word that could enable different bits of info here. + -+ scan = (unsigned char *) id; -+ { -+ int i = 4; -+ while (i--) -+ *scan++ = 0; -+ } -+ *scan++ = (value >> 24) & 0xff; -+ *scan++ = (value >> 16) & 0xff; -+ *scan++ = (value >> 8) & 0xff; -+ *scan++ = (value & 0xff); -+} -+int -+int_to_hex_v(unsigned char * id, int value) -+{ -+ unsigned char *start = id; -+ int shift; -+ int ch; ++- We now honor, sort of, the C and S commands. These are continue and ++ single set after delivering a signal. We ignore the signal and do the ++ requested action. This only happens when we told gdb that a signal ++ was the reason for entry, which is only done on memory faults. The ++ result is that you can now continue into the Oops. + -+ for (shift = 28; shift >= 0; shift -= 4) { -+ if ((ch = (value >> shift) & 0xf) || (id != start)) { -+ *id = hexchars[ch]; -+ id++; -+ } -+ } -+ if (id == start) -+ *id++ = '0'; -+ return id - start; -+} -+#ifdef old_thread_list ++- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, ++ but it is more exact on what language to use. + -+static int -+threadref_to_int(threadref * ref) -+{ -+ int i, value = 0; -+ unsigned char *scan; ++- We added two dwarf2 include files and a bit of code at the end of ++ entry.S. This does not yet work, so it is disabled. Still we want to ++ keep track of the code and "maybe" someone out there can fix it. + -+ scan = (char *) ref; -+ scan += 4; -+ i = 4; -+ while (i-- > 0) -+ value = (value << 8) | ((*scan++) & 0xff); -+ return value; -+} -+#endif -+static int -+cmp_str(char *s1, char *s2, int count) -+{ -+ while (count--) { -+ if (*s1++ != *s2++) -+ return 0; -+ } -+ return 1; -+} ++- Randy Dunlap sent some fix ups for this file which are now merged. + -+#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ -+extern struct task_struct *kgdb_get_idle(int cpu); -+#define idle_task(cpu) kgdb_get_idle(cpu) -+#else -+#define idle_task(cpu) init_tasks[cpu] -+#endif ++- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a ++ compiler warning if CONFIG_KGDB is off (now who would do that :). + -+extern int kgdb_pid_init_done; ++- Andrew Morton sent a fix for the serial driver which is now merged. + -+struct task_struct * -+getthread(int pid) -+{ -+ struct task_struct *thread; -+ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { ++- Andrew also sent a change to the stub around the cpu managment code ++ which is also merged. + -+ return idle_task(pid - PID_MAX); -+ } else { -+ /* -+ * find_task_by_pid is relatively safe all the time -+ * Other pid functions require lock downs which imply -+ * that we may be interrupting them (as we get here -+ * in the middle of most any lock down). -+ * Still we don't want to call until the table exists! -+ */ -+ if (kgdb_pid_init_done){ -+ thread = find_task_by_pid(pid); -+ if (thread) { -+ return thread; -+ } -+ } -+ } -+ return NULL; -+} -+/* *INDENT-OFF* */ -+struct hw_breakpoint { -+ unsigned enabled; -+ unsigned type; -+ unsigned len; -+ unsigned addr; -+} breakinfo[4] = { {enabled:0}, -+ {enabled:0}, -+ {enabled:0}, -+ {enabled:0}}; -+/* *INDENT-ON* */ -+unsigned hw_breakpoint_status; -+void -+correct_hw_break(void) -+{ -+ int breakno; -+ int correctit; -+ int breakbit; -+ unsigned dr7; ++- Andrew also sent a patch to make "f" as well as "g" work as SysRq ++ commands to enter kgdb, merged. + -+ asm volatile ("movl %%db7, %0\n":"=r" (dr7) -+ :); -+ /* *INDENT-OFF* */ -+ do { -+ unsigned addr0, addr1, addr2, addr3; -+ asm volatile ("movl %%db0, %0\n" -+ "movl %%db1, %1\n" -+ "movl %%db2, %2\n" -+ "movl %%db3, %3\n" -+ :"=r" (addr0), "=r"(addr1), -+ "=r"(addr2), "=r"(addr3) -+ :); -+ } while (0); -+ /* *INDENT-ON* */ -+ correctit = 0; -+ for (breakno = 0; breakno < 3; breakno++) { -+ breakbit = 2 << (breakno << 1); -+ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 |= breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ dr7 |= (((breakinfo[breakno].len << 2) | -+ breakinfo[breakno].type) << 16) << -+ (breakno << 2); -+ switch (breakno) { -+ case 0: -+ asm volatile ("movl %0, %%dr0\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; ++- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a ++ "who" field to the spinlock data struct. This is filled with ++ "current" when ever the spinlock suceeds. Useful if you want to know ++ who has the lock. + -+ case 1: -+ asm volatile ("movl %0, %%dr1\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; ++_ And last, but not least, we fixed the "get_cu" macro to properly get ++ the current value of "current". + -+ case 2: -+ asm volatile ("movl %0, %%dr2\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; ++New features: ++============ ++20030505.1827.27 ++We are starting to align with the sourceforge version, at least in ++commands. To this end, the boot command string to start kgdb at ++boot time has been changed from "kgdb" to "gdb". + -+ case 3: -+ asm volatile ("movl %0, %%dr3\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ } -+ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 &= ~breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ } -+ } -+ if (correctit) { -+ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); -+ } -+} ++Andrew Morton sent a couple of patches which are now included as follows: ++1.) We now return a flag to the interrupt handler. ++2.) We no longer use smp_num_cpus (a conflict with the lock meter). ++3.) And from William Lee Irwin III code to make ++ sure high-mem is set up before we attempt to register our interrupt ++ handler. ++We now include asm/kgdb.h from config.h so you will most likely never ++have to include it. It also 'NULLS' the kgdb macros you might have in ++your code when CONFIG_KGDB is not defined. This allows you to just ++turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. ++This include is conditioned on the machine being an x86 so as to not ++mess with other archs. + -+int -+remove_hw_break(unsigned breakno) -+{ -+ if (!breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 0; -+ return 0; -+} ++20020801.1129.03 ++This is currently the version for the 2.4.18 (and beyond?) kernel. + -+int -+set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) -+{ -+ if (breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 1; -+ breakinfo[breakno].type = type; -+ breakinfo[breakno].len = len; -+ breakinfo[breakno].addr = addr; -+ return 0; -+} ++We have several new "features" beginning with this version: + -+#ifdef CONFIG_SMP -+static int in_kgdb_console = 0; ++1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more ++ waiting and it will pull that guy out of an IRQ off spin lock :) + -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ unsigned flags; -+ int cpu = smp_processor_id(); -+ in_kgdb_called = 1; -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ -+ in_kgdb_console) { /* or we are doing slow i/o */ -+ return 1; -+ } -+ return 0; -+ } ++2.) We doctored up the code that tells where a task is waiting and ++ included it so that the "info thread" command will show a bit more ++ than "schedule()". Try it... ++ ++3.) Added the ability to call a function from gdb. All the standard gdb ++ issues apply, i.e. if you hit a breakpoint in the function, you are ++ not allowed to call another (gdb limitation, not kgdb). To help ++ this capability we added a memory allocation function. Gdb does not ++ return this memory (it is used for strings that you pass to that function ++ you are calling from gdb) so we fixed up a way to allow you to ++ manually return the memory (see below). ++ ++4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the ++ interrupt flag to now also include the preemption count and the ++ "in_interrupt" info. The flag is now called "with_pif" to indicate ++ the order, preempt_count, in_interrupt, flag. The preempt_count is ++ shifted left by 4 bits so you can read the count in hex by dropping ++ the low order digit. In_interrupt is in bit 1, and the flag is in ++ bit 0. ++ ++5.) The command: "p kgdb_info" is now expanded and prints something ++ like: ++(gdb) p kgdb_info ++$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, ++ errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, ++ cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, ++ regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} ++ ++ Things to note here: a.) used_malloc is the amount of memory that ++ has been malloc'ed to do calls from gdb. You can reclaim this ++ memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) ++ cpus_waiting is now "sized" by the number of CPUs you enter at ++ configure time in the kgdb configure section. This is NOT used ++ anywhere else in the system, but it is "nice" here. c.) The task's ++ "pid" is now in the structure. This is the pid you will need to use ++ to decode to the thread id to get gdb to look at that thread. ++ Remember that the "info thread" command prints a list of threads ++ wherein it numbers each thread with its reference number followed ++ by the thread's pid. Note that the per-CPU idle threads actually ++ have pids of 0 (yes, there is more than one pid 0 in an SMP system). ++ To avoid confusion, kgdb numbers these threads with numbers beyond ++ the MAX_PID. That is why you see 32768 and above. + -+ /* As I see it the only reason not to let all cpus spin on -+ * the same spin_lock is to allow selected ones to proceed. -+ * This would be a good thing, so we leave it this way. -+ * Maybe someday.... Done ! ++6.) A subtle change, we now provide the complete register set for tasks ++ that are active on the other CPUs. This allows better trace back on ++ those tasks. + -+ * in_kgdb() is called from an NMI so we don't pretend -+ * to have any resources, like printk() for example. -+ */ ++ And, let's mention what we could not fix. Back-trace from all but the ++ thread that we trapped will, most likely, have a bogus entry in it. ++ The problem is that gdb does not recognize the entry code for ++ functions that use "current" near (at all?) the entry. The compiler ++ is putting the "current" decode as the first two instructions of the ++ function where gdb expects to find %ebp changing code. Back trace ++ also has trouble with interrupt frames. I am talking with Daniel ++ Jacobowitz about some way to fix this, but don't hold your breath. + -+ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ -+ /* -+ * log arival of this cpu -+ * The NMI keeps on ticking. Protect against recurring more -+ * than once, and ignor the cpu that has the kgdb lock -+ */ -+ in_kgdb_entry_log[cpu]++; -+ in_kgdb_here_log[cpu] = regs; -+ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { -+ goto exit_in_kgdb; -+ } -+ /* -+ * For protection of the initilization of the spin locks by kgdb -+ * it locks the kgdb spinlock before it gets the wait locks set -+ * up. We wait here for the wait lock to be taken. If the -+ * kgdb lock goes away first?? Well, it could be a slow exit -+ * sequence where the wait lock is removed prior to the kgdb lock -+ * so if kgdb gets unlocked, we just exit. -+ */ -+ while (spin_is_locked(&kgdb_spinlock) && -+ !spin_is_locked(waitlocks + cpu)) ; -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ goto exit_in_kgdb; -+ } -+ waiting_cpus[cpu].task = current; -+ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); -+ waiting_cpus[cpu].regs = regs; ++20011220.0050.35 ++Major enhancement with this version is the ability to hold one or more ++CPUs in an SMP system while allowing the others to continue. Also, by ++default only the current CPU is enabled on single-step commands (please ++note that gdb issues single-step commands at times other than when you ++use the si command). + -+ spin_unlock_wait(waitlocks + cpu); -+ /* -+ * log departure of this cpu -+ */ -+ waiting_cpus[cpu].task = 0; -+ waiting_cpus[cpu].pid = 0; -+ waiting_cpus[cpu].regs = 0; -+ correct_hw_break(); -+ exit_in_kgdb: -+ in_kgdb_here_log[cpu] = 0; -+ kgdb_local_irq_restore(flags); -+ return 1; -+ /* -+ spin_unlock(continuelocks + smp_processor_id()); -+ */ -+} ++Another change is to collect some useful information in ++a global structure called "kgdb_info". You should be able to just: + -+void -+smp__in_kgdb(struct pt_regs regs) -+{ -+ ack_APIC_irq(); -+ in_kgdb(®s); -+} -+#else -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ return (kgdb_spinlock); -+} -+#endif ++p kgdb_info + -+void -+printexceptioninfo(int exceptionNo, int errorcode, char *buffer) -+{ -+ unsigned dr6; -+ int i; -+ switch (exceptionNo) { -+ case 1: /* debug exception */ -+ break; -+ case 3: /* breakpoint */ -+ sprintf(buffer, "Software breakpoint"); -+ return; -+ default: -+ sprintf(buffer, "Details not available"); -+ return; -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (dr6 & 0x4000) { -+ sprintf(buffer, "Single step"); -+ return; -+ } -+ for (i = 0; i < 4; ++i) { -+ if (dr6 & (1 << i)) { -+ sprintf(buffer, "Hardware breakpoint %d", i); -+ return; -+ } -+ } -+ sprintf(buffer, "Unknown trap"); -+ return; -+} ++although I have seen cases where the first time this is done gdb just ++prints the first member but prints the whole structure if you then enter ++CR (carriage return or enter). This also works: + -+/* -+ * This function does all command procesing for interfacing to gdb. -+ * -+ * NOTE: The INT nn instruction leaves the state of the interrupt -+ * enable flag UNCHANGED. That means that when this routine -+ * is entered via a breakpoint (INT 3) instruction from code -+ * that has interrupts enabled, then interrupts will STILL BE -+ * enabled when this routine is entered. The first thing that -+ * we do here is disable interrupts so as to prevent recursive -+ * entries and bothersome serial interrupts while we are -+ * trying to run the serial port in polled mode. -+ * -+ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so -+ * it is always necessary to do a restore_flags before returning -+ * so as to let go of that lock. -+ */ -+int -+kgdb_handle_exception(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs) -+{ -+ struct task_struct *usethread = NULL; -+ struct task_struct *thread_list_start = 0, *thread = NULL; -+ int addr, length; -+ int breakno, breaktype; -+ char *ptr; -+ int newPC; -+ threadref thref; -+ int threadid; -+ int thread_min = PID_MAX + MAX_NO_CPUS; -+#ifdef old_thread_list -+ int maxthreads; -+#endif -+ int nothreads; -+ unsigned long flags; -+ int gdb_regs[NUMREGBYTES / 4]; -+ int dr6; -+ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ -+#define NO_NMI 1 -+#define NO_SYNC 2 -+#define regs (*linux_regs) -+#define NUMREGS NUMREGBYTES/4 -+ /* -+ * If the entry is not from the kernel then return to the Linux -+ * trap handler and let it process the interrupt normally. -+ */ -+ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { -+ printk("ignoring non-kernel exception\n"); -+ print_regs(®s); -+ return (0); -+ } ++p *&kgdb_info + -+ kgdb_local_irq_save(flags); ++Here is a sample: ++(gdb) p kgdb_info ++$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, ++ vector = 3, print_debug_info = 0} + -+ /* Get kgdb spinlock */ ++"Called_from" is the return address from the current entry into kgdb. ++Sometimes it is useful to know why you are in kgdb, for example, was ++it an NMI or a real breakpoint? The simple way to interrogate this ++return address is: + -+ KGDB_SPIN_LOCK(&kgdb_spinlock); -+ rdtscll(kgdb_info.entry_tsc); -+ /* -+ * We depend on this spinlock and the NMI watch dog to control the -+ * other cpus. They will arrive at "in_kgdb()" as a result of the -+ * NMI and will wait there for the following spin locks to be -+ * released. -+ */ -+#ifdef CONFIG_SMP ++l *0xc010732c + -+#if 0 -+ if (cpu_callout_map & ~MAX_CPU_MASK) { -+ printk("kgdb : too many cpus, possibly not mapped" -+ " in contiguous space, change MAX_NO_CPUS" -+ " in kgdb_stub and make new kernel.\n" -+ " cpu_callout_map is %lx\n", cpu_callout_map); -+ goto exit_just_unlock; -+ } -+#endif -+ if (spinlock_count == 1) { -+ int time = 0, end_time, dum = 0; -+ int i; -+ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) -+ }; -+ if (remote_debug) { -+ printk("kgdb : cpu %d entry, syncing others\n", -+ smp_processor_id()); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ /* -+ * Use trylock as we may already hold the lock if -+ * we are holding the cpu. Net result is all -+ * locked. -+ */ -+ spin_trylock(&waitlocks[i]); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) -+ cpu_logged_in[i] = 0; -+ /* -+ * Wait for their arrival. We know the watch dog is active if -+ * in_kgdb() has ever been called, as it is always called on a -+ * watchdog tick. -+ */ -+ rdtsc(dum, time); -+ end_time = time + 2; /* Note: we use the High order bits! */ -+ i = 1; -+ if (num_online_cpus() > 1) { -+ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; -+ smp_send_nmi_allbutself(); -+ while (i < num_online_cpus() && time != end_time) { -+ int j; -+ for (j = 0; j < MAX_NO_CPUS; j++) { -+ if (waiting_cpus[j].task && -+ !cpu_logged_in[j]) { -+ i++; -+ cpu_logged_in[j] = 1; -+ if (remote_debug) { -+ printk -+ ("kgdb : cpu %d arrived at kgdb\n", -+ j); -+ } -+ break; -+ } else if (!waiting_cpus[j].task && -+ !cpu_online(j)) { -+ waiting_cpus[j].task = NOCPU; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].hold = 1; -+ break; -+ } -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { ++which will print the surrounding few lines of source code. ++ ++"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the ++kgdb_ts entries). ++ ++"errcode" and "vector" are other entry parameters which may be helpful on ++some traps. + -+ int wait = 100000; -+ while (wait--) ; -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { -+ printk -+ ("kgdb : cpu %d stall" -+ " in in_kgdb\n", -+ j); -+ i++; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].task = -+ (struct task_struct -+ *) 1; -+ } -+ } -+ } ++"print_debug_info" is the internal debugging kgdb print enable flag. Yes, ++you can modify it. + -+ if (in_kgdb_entry_log[smp_processor_id()] > -+ (me_in_kgdb + 10)) { -+ break; -+ } ++In SMP systems kgdb_info also includes the "cpus_waiting" structure and ++"hold_on_step": + -+ rdtsc(dum, time); -+ } -+ if (i < num_online_cpus()) { -+ printk -+ ("kgdb : time out, proceeding without sync\n"); -+#if 0 -+ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", -+ waiting_cpus[0].task != 0, -+ waiting_cpus[1].task != 0); -+ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", -+ cpu_logged_in[0], cpu_logged_in[1]); -+ printk -+ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", -+ in_kgdb_here_log[0] != 0, -+ in_kgdb_here_log[1] != 0); -+#endif -+ entry_state = NO_SYNC; -+ } else { -+#if 0 -+ int ent = -+ in_kgdb_entry_log[smp_processor_id()] - -+ me_in_kgdb; -+ printk("kgdb : sync after %d entries\n", ent); -+#endif -+ } -+ } else { -+ if (remote_debug) { -+ printk -+ ("kgdb : %d cpus, but watchdog not active\n" -+ "proceeding without locking down other cpus\n", -+ num_online_cpus()); -+ entry_state = NO_NMI; -+ } -+ } -+ } -+#endif ++(gdb) p kgdb_info ++$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, ++ vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ ++ task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, ++ regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, ++ hold = 0, regs = 0x0}}} + -+ if (remote_debug) { -+ unsigned long *lp = (unsigned long *) &linux_regs; ++"Cpus_waiting" has an entry for each CPU other than the current one that ++has been stopped. Each entry contains the task_struct address for that ++CPU, the address of the regs for that task and a hold flag. All these ++have the proper typing so that, for example: + -+ printk("handle_exception(exceptionVector=%d, " -+ "signo=%d, err_code=%d, linux_regs=%p)\n", -+ exceptionVector, signo, err_code, linux_regs); -+ if (debug_regs) { -+ print_regs(®s); -+ printk("Stk: %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[0], lp[1], lp[2], lp[3], -+ lp[4], lp[5], lp[6], lp[7]); -+ printk(" %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[8], lp[9], lp[10], lp[11], -+ lp[12], lp[13], lp[14], lp[15]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[16], lp[17], lp[18], lp[19], -+ lp[20], lp[21], lp[22], lp[23]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[24], lp[25], lp[26], lp[27], -+ lp[28], lp[29], lp[30], lp[31]); -+ } -+ } ++p *kgdb_info.cpus_waiting[1].regs + -+ /* Disable hardware debugging while we are in kgdb */ -+ /* Get the debug register status register */ -+/* *INDENT-OFF* */ -+ __asm__("movl %0,%%db7" -+ : /* no output */ -+ :"r"(0)); ++will print the registers for CPU 1. + -+ asm volatile ("movl %%db6, %0\n" -+ :"=r" (hw_breakpoint_status) -+ :); ++"Hold_on_sstep" is a new feature with this version and comes up set or ++true. What this means is that whenever kgdb is asked to single-step all ++other CPUs are held (i.e. not allowed to execute). The flag applies to ++all but the current CPU and, again, can be changed: + -+/* *INDENT-ON* */ -+ switch (exceptionVector) { -+ case 0: /* divide error */ -+ case 1: /* debug exception */ -+ case 2: /* NMI */ -+ case 3: /* breakpoint */ -+ case 4: /* overflow */ -+ case 5: /* bounds check */ -+ case 6: /* invalid opcode */ -+ case 7: /* device not available */ -+ case 8: /* double fault (errcode) */ -+ case 10: /* invalid TSS (errcode) */ -+ case 12: /* stack fault (errcode) */ -+ case 16: /* floating point error */ -+ case 17: /* alignment check (errcode) */ -+ default: /* any undocumented */ -+ break; -+ case 11: /* segment not present (errcode) */ -+ case 13: /* general protection (errcode) */ -+ case 14: /* page fault (special errcode) */ -+ case 19: /* cache flush denied */ -+ if (mem_err_expected) { -+ /* -+ * This fault occured because of the -+ * get_char or set_char routines. These -+ * two routines use either eax of edx to -+ * indirectly reference the location in -+ * memory that they are working with. -+ * For a page fault, when we return the -+ * instruction will be retried, so we -+ * have to make sure that these -+ * registers point to valid memory. -+ */ -+ mem_err = 1; /* set mem error flag */ -+ mem_err_expected = 0; -+ mem_err_cnt++; /* helps in debugging */ -+ /* make valid address */ -+ regs.eax = (long) &garbage_loc; -+ /* make valid address */ -+ regs.edx = (long) &garbage_loc; -+ if (remote_debug) -+ printk("Return after memory error: " -+ "mem_err_cnt=%d\n", mem_err_cnt); -+ if (debug_regs) -+ print_regs(®s); -+ goto exit_kgdb; -+ } -+ break; -+ } -+ if (remote_debug) -+ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); ++p kgdb_info.hold_on_sstep=0 + -+ gdb_i386vector = exceptionVector; -+ gdb_i386errcode = err_code; -+ kgdb_info.called_from = __builtin_return_address(0); -+#ifdef CONFIG_SMP -+ /* -+ * OK, we can now communicate, lets tell gdb about the sync. -+ * but only if we had a problem. -+ */ -+ switch (entry_state) { -+ case NO_NMI: -+ to_gdb("NMI not active, other cpus not stopped\n"); -+ break; -+ case NO_SYNC: -+ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); -+ default:; -+ } ++restores the old behavior of letting all CPUs run during single-stepping. + -+#endif -+/* -+ * Set up the gdb function call area. -+ */ -+ trap_cpu = smp_processor_id(); -+ OLD_esp = NEW_esp = (int) (&linux_regs->esp); ++Likewise, each CPU has a "hold" flag, which if set, locks that CPU out ++of execution. Note that this has some risk in cases where the CPUs need ++to communicate with each other. If kgdb finds no CPU available on exit, ++it will push a message thru gdb and stay in kgdb. Note that it is legal ++to hold the current CPU as long as at least one CPU can execute. + -+ IF_SMP(once_again:) -+ /* reply to host that an exception has occurred */ -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; ++20010621.1117.09 ++This version implements an event queue. Events are signaled by calling ++a function in the kgdb stub and may be examined from gdb. See EVENTS ++below for details. This version also tightens up the interrupt and SMP ++handling to not allow interrupts on the way to kgdb from a breakpoint ++trap. It is fine to allow these interrupts for user code, but not ++system debugging. + -+ putpacket(remcomOutBuffer); ++Version ++======= + -+ while (1 == 1) { -+ error = 0; -+ remcomOutBuffer[0] = 0; -+ getpacket(remcomInBuffer); -+ switch (remcomInBuffer[0]) { -+ case '?': -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; -+ break; -+ case 'd': -+ remote_debug = !(remote_debug); /* toggle debug flag */ -+ printk("Remote debug %s\n", -+ remote_debug ? "on" : "off"); -+ break; -+ case 'g': /* return the value of the CPU registers */ -+ get_gdb_regs(usethread, ®s, gdb_regs); -+ mem2hex((char *) gdb_regs, -+ remcomOutBuffer, NUMREGBYTES, 0); -+ break; -+ case 'G': /* set the value of the CPU registers - return OK */ -+ hex2mem(&remcomInBuffer[1], -+ (char *) gdb_regs, NUMREGBYTES, 0); -+ if (!usethread || usethread == current) { -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "E00"); -+ } -+ break; ++This version of the kgdb package was developed and tested on ++kernel version 2.4.16. It will not install on any earlier kernels. ++It is possible that it will continue to work on later versions ++of 2.4 and then versions of 2.5 (I hope). ++ ++ ++Debugging Setup ++=============== ++ ++Designate one machine as the "development" machine. This is the ++machine on which you run your compiles and which has your source ++code for the kernel. Designate a second machine as the "target" ++machine. This is the machine that will run your experimental ++kernel. ++ ++The two machines will be connected together via a serial line out ++one or the other of the COM ports of the PC. You will need the ++appropriate modem eliminator (null modem) cable(s) for this. + -+ case 'P':{ /* set the value of a single CPU register - -+ return OK */ -+ /* -+ * For some reason, gdb wants to talk about psudo -+ * registers (greater than 15). These may have -+ * meaning for ptrace, but for us it is safe to -+ * ignor them. We do this by dumping them into -+ * _GS which we also ignor, but do have memory for. -+ */ -+ int regno; ++Decide on which tty port you want the machines to communicate, then ++connect them up back-to-back using the null modem cable. COM1 is ++/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection ++with the two machines prior to trying to debug a kernel. Once you ++have it working, on the TARGET machine, enter: + -+ ptr = &remcomInBuffer[1]; -+ regs_to_gdb_regs(gdb_regs, ®s); -+ if ((!usethread || usethread == current) && -+ hexToInt(&ptr, ®no) && -+ *ptr++ == '=' && (regno >= 0)) { -+ regno = -+ (regno >= NUMREGS ? _GS : regno); -+ hex2mem(ptr, (char *) &gdb_regs[regno], -+ 4, 0); -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ break; -+ } -+ strcpy(remcomOutBuffer, "E01"); -+ break; -+ } ++setserial /dev/ttyS0 (or what ever tty you are using) + -+ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ -+ case 'm': -+ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { -+ ptr = 0; -+ /* -+ * hex doubles the byte count -+ */ -+ if (length > (BUFMAX / 2)) -+ length = BUFMAX / 2; -+ mem2hex((char *) addr, -+ remcomOutBuffer, length, 1); -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } -+ } ++and record the port address and the IRQ number. + -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E01"); -+ debug_error -+ ("malformed read memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; ++On the DEVELOPMENT machine you need to apply the patch for the kgdb ++hooks. You have probably already done that if you are reading this ++file. + -+ /* MAA..AA,LLLL: -+ Write LLLL bytes at address AA.AA return OK */ -+ case 'M': -+ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && -+ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { -+ hex2mem(ptr, (char *) addr, length, 1); ++On your DEVELOPMENT machine, go to your kernel source directory and do ++"make Xconfig" where X is one of "x", "menu", or "". If you are ++configuring in the standard serial driver, it must not be a module. ++Either yes or no is ok, but making the serial driver a module means it ++will initialize after kgdb has set up the UART interrupt code and may ++cause a failure of the control-C option discussed below. The configure ++question for the serial driver is under the "Character devices" heading ++and is: + -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } else { -+ strcpy(remcomOutBuffer, "OK"); -+ } ++"Standard/generic (8250/16550 and compatible UARTs) serial support" + -+ ptr = 0; -+ } -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E02"); -+ debug_error -+ ("malformed write memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; -+ case 'S': -+ remcomInBuffer[0] = 's'; -+ case 'C': -+ /* Csig;AA..AA where ;AA..AA is optional -+ * continue with signal -+ * Since signals are meaning less to us, delete that -+ * part and then fall into the 'c' code. -+ */ -+ ptr = &remcomInBuffer[1]; -+ length = 2; -+ while (*ptr && *ptr != ';') { -+ length++; -+ ptr++; -+ } -+ if (*ptr) { -+ do { -+ ptr++; -+ *(ptr - length++) = *ptr; -+ } while (*ptr); -+ } else { -+ remcomInBuffer[1] = 0; -+ } ++Go down to the kernel debugging menu item and open it up. Enable the ++kernel kgdb stub code by selecting that item. You can also choose to ++turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler ++to put more debug info (like local symbols) in the object file. On the ++i386 -g and -ggdb are the same so this option just reduces to "O1". The ++-O1 reduces the optimization level. This may be helpful in some cases, ++be aware, however, that this may also mask the problem you are looking ++for. + -+ /* cAA..AA Continue at address AA..AA(optional) */ -+ /* sAA..AA Step one instruction from AA..AA(optional) */ -+ /* D detach, reply OK and then continue */ -+ case 'c': -+ case 's': -+ case 'D': ++The baud rate. Default is 115200. What ever you choose be sure that ++the host machine is set to the same speed. I recommend the default. + -+ /* try to read optional parameter, -+ pc unchanged if no parm */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr)) { -+ if (remote_debug) -+ printk("Changing EIP to 0x%x\n", addr); ++The port. This is the I/O address of the serial UART that you should ++have gotten using setserial as described above. The standard COM1 port ++(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ ++3. + -+ regs.eip = addr; -+ } ++The port IRQ (see above). + -+ newPC = regs.eip; ++Stack overflow test. This option makes a minor change in the trap, ++system call and interrupt code to detect stack overflow and transfer ++control to kgdb if it happens. (Some platforms have this in the ++baseline code, but the i386 does not.) + -+ /* clear the trace bit */ -+ regs.eflags &= 0xfffffeff; ++You can also configure the system to recognize the boot option ++"console=kgdb" which if given will cause all console output during ++booting to be put thru gdb as well as other consoles. This option ++requires that gdb and kgdb be connected prior to sending console output ++so, if they are not, a breakpoint is executed to force the connection. ++This will happen before any kernel output (it is going thru gdb, right), ++and will stall the boot until the connection is made. + -+ /* set the trace bit if we're stepping */ -+ if (remcomInBuffer[0] == 's') -+ regs.eflags |= 0x100; ++You can also configure in a patch to SysRq to enable the kGdb SysRq. ++This request generates a breakpoint. Since the serial port IRQ line is ++set up after any serial drivers, it is possible that this command will ++work when the control-C will not. + -+ /* detach is a friendly version of continue. Note that -+ debugging is still enabled (e.g hit control C) -+ */ -+ if (remcomInBuffer[0] == 'D') { -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ } ++Save and exit the Xconfig program. Then do "make clean" , "make dep" ++and "make bzImage" (or whatever target you want to make). This gets the ++kernel compiled with the "-g" option set -- necessary for debugging. + -+ if (remote_debug) { -+ printk("Resuming execution\n"); -+ print_regs(®s); -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (!(dr6 & 0x4000)) { -+ for (breakno = 0; breakno < 4; ++breakno) { -+ if (dr6 & (1 << breakno) && -+ (breakinfo[breakno].type == 0)) { -+ /* Set restore flag */ -+ regs.eflags |= 0x10000; -+ break; -+ } -+ } -+ } -+ correct_hw_break(); -+ asm volatile ("movl %0, %%db6\n"::"r" (0)); -+ goto exit_kgdb; ++You have just built the kernel on your DEVELOPMENT machine that you ++intend to run on your TARGET machine. + -+ /* kill the program */ -+ case 'k': /* do nothing */ -+ break; ++To install this new kernel, use the following installation procedure. ++Remember, you are on the DEVELOPMENT machine patching the kernel source ++for the kernel that you intend to run on the TARGET machine. + -+ /* query */ -+ case 'q': -+ nothreads = 0; -+ switch (remcomInBuffer[1]) { -+ case 'f': -+ threadid = 1; -+ thread_list = 2; -+ thread_list_start = (usethread ? : current); -+ case 's': -+ if (!cmp_str(&remcomInBuffer[2], -+ "ThreadInfo", 10)) -+ break; ++Copy this kernel to your target machine using your usual procedures. I ++usually arrange to copy development: ++/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine ++via a LAN based NFS access. That is, I run the cp command on the target ++and copy from the development machine via the LAN. Run Lilo (see "man ++lilo" for details on how to set this up) on the new kernel on the target ++machine so that it will boot! Then boot the kernel on the target ++machine. + -+ remcomOutBuffer[nothreads++] = 'm'; -+ for (; threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ thread = getthread(threadid); -+ if (thread) { -+ nothreads += int_to_hex_v( -+ &remcomOutBuffer[ -+ nothreads], -+ threadid); -+ if (thread_min > threadid) -+ thread_min = threadid; -+ remcomOutBuffer[ -+ nothreads] = ','; -+ nothreads++; -+ if (nothreads > BUFMAX - 10) -+ break; -+ } -+ } -+ if (remcomOutBuffer[nothreads - 1] == 'm') { -+ remcomOutBuffer[nothreads - 1] = 'l'; -+ } else { -+ nothreads--; -+ } -+ remcomOutBuffer[nothreads] = 0; -+ break; ++On the DEVELOPMENT machine, create a file called .gdbinit in the ++directory /usr/src/linux. An example .gdbinit file looks like this: ++ ++shell echo -e "\003" >/dev/ttyS0 ++set remotebaud 38400 (or what ever speed you have chosen) ++target remote /dev/ttyS0 + -+#ifdef old_thread_list /* Old thread info request */ -+ case 'L': -+ /* List threads */ -+ thread_list = 2; -+ thread_list_start = (usethread ? : current); -+ unpack_byte(remcomInBuffer + 3, &maxthreads); -+ unpack_threadid(remcomInBuffer + 5, &thref); -+ do { -+ int buf_thread_limit = -+ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; -+ if (maxthreads > buf_thread_limit) { -+ maxthreads = buf_thread_limit; -+ } -+ } while (0); -+ remcomOutBuffer[0] = 'q'; -+ remcomOutBuffer[1] = 'M'; -+ remcomOutBuffer[4] = '0'; -+ pack_threadid(remcomOutBuffer + 5, &thref); + -+ threadid = threadref_to_int(&thref); -+ for (nothreads = 0; -+ nothreads < maxthreads && -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ thread = getthread(threadid); -+ if (thread) { -+ int_to_threadref(&thref, -+ threadid); -+ pack_threadid(remcomOutBuffer + -+ 21 + -+ nothreads * 16, -+ &thref); -+ nothreads++; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } -+ } ++Change the "echo" and "target" definition so that it specifies the tty ++port that you intend to use. Change the "remotebaud" definition to ++match the data rate that you are going to use for the com line. + -+ if (threadid == PID_MAX + MAX_NO_CPUS) { -+ remcomOutBuffer[4] = '1'; -+ } -+ pack_hex_byte(remcomOutBuffer + 2, nothreads); -+ remcomOutBuffer[21 + nothreads * 16] = '\0'; -+ break; -+#endif -+ case 'C': -+ /* Current thread id */ -+ remcomOutBuffer[0] = 'Q'; -+ remcomOutBuffer[1] = 'C'; -+ threadid = current->pid; -+ if (!threadid) { -+ /* -+ * idle thread -+ */ -+ for (threadid = PID_MAX; -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ if (current == -+ idle_task(threadid - -+ PID_MAX)) -+ break; -+ } -+ } -+ int_to_threadref(&thref, threadid); -+ pack_threadid(remcomOutBuffer + 2, &thref); -+ remcomOutBuffer[18] = '\0'; -+ break; ++You are now ready to try it out. + -+ case 'E': -+ /* Print exception info */ -+ printexceptioninfo(exceptionVector, -+ err_code, remcomOutBuffer); -+ break; -+ case 'T':{ -+ char * nptr; -+ /* Thread extra info */ -+ if (!cmp_str(&remcomInBuffer[2], -+ "hreadExtraInfo,", 15)) { -+ break; -+ } -+ ptr = &remcomInBuffer[17]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ nptr = &thread->comm[0]; -+ length = 0; -+ ptr = &remcomOutBuffer[0]; -+ do { -+ length++; -+ ptr = pack_hex_byte(ptr, *nptr++); -+ } while (*nptr && length < 16); -+ /* -+ * would like that 16 to be the size of -+ * task_struct.comm but don't know the -+ * syntax.. -+ */ -+ *ptr = 0; -+ } -+ } -+ break; ++Boot your target machine with "kgdb" in the boot command i.e. something ++like: + -+ /* task related */ -+ case 'H': -+ switch (remcomInBuffer[1]) { -+ case 'g': -+ ptr = &remcomInBuffer[2]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (!thread) { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ break; -+ } -+ /* -+ * Just in case I forget what this is all about, -+ * the "thread info" command to gdb causes it -+ * to ask for a thread list. It then switches -+ * to each thread and asks for the registers. -+ * For this (and only this) usage, we want to -+ * fudge the registers of tasks not on the run -+ * list (i.e. waiting) to show the routine that -+ * called schedule. Also, gdb, is a minimalist -+ * in that if the current thread is the last -+ * it will not re-read the info when done. -+ * This means that in this case we must show -+ * the real registers. So here is how we do it: -+ * Each entry we keep track of the min -+ * thread in the list (the last that gdb will) -+ * get info for. We also keep track of the -+ * starting thread. -+ * "thread_list" is cleared when switching back -+ * to the min thread if it is was current, or -+ * if it was not current, thread_list is set -+ * to 1. When the switch to current comes, -+ * if thread_list is 1, clear it, else do -+ * nothing. -+ */ -+ usethread = thread; -+ if ((thread_list == 1) && -+ (thread == thread_list_start)) { -+ thread_list = 0; -+ } -+ if (thread_list && (threadid == thread_min)) { -+ if (thread == thread_list_start) { -+ thread_list = 0; -+ } else { -+ thread_list = 1; -+ } -+ } -+ /* follow through */ -+ case 'c': -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ break; -+ } -+ break; ++lilo> test kgdb + -+ /* Query thread status */ -+ case 'T': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (thread) { -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } else { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ } -+ break; ++or if you also want console output thru gdb: + -+ case 'Y': /* set up a hardware breakpoint */ -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ ptr++; -+ hexToInt(&ptr, &breaktype); -+ ptr++; -+ hexToInt(&ptr, &length); -+ ptr++; -+ hexToInt(&ptr, &addr); -+ if (set_hw_break(breakno & 0x3, -+ breaktype & 0x3, -+ length & 0x3, addr) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; ++lilo> test kgdb console=kgdb + -+ /* Remove hardware breakpoint */ -+ case 'y': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ if (remove_hw_break(breakno & 0x3) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; ++You should see the lilo message saying it has loaded the kernel and then ++all output stops. The kgdb stub is trying to connect with gdb. Start ++gdb something like this: ++ ++ ++On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". ++When gdb gets the symbols loaded it will read your .gdbinit file and, if ++everything is working correctly, you should see gdb print out a few ++lines indicating that a breakpoint has been taken. It will actually ++show a line of code in the target kernel inside the kgdb activation ++code. ++ ++The gdb interaction should look something like this: ++ ++ linux-dev:/usr/src/linux# gdb vmlinux ++ GDB is free software and you are welcome to distribute copies of it ++ under certain conditions; type "show copying" to see the conditions. ++ There is absolutely no warranty for GDB; type "show warranty" for details. ++ GDB 4.15.1 (i486-slackware-linux), ++ Copyright 1995 Free Software Foundation, Inc... ++ breakpoint () at i386-stub.c:750 ++ 750 } ++ (gdb) ++ ++You can now use whatever gdb commands you like to set breakpoints. ++Enter "continue" to start your target machine executing again. At this ++point the target system will run at full speed until it encounters ++your breakpoint or gets a segment violation in the kernel, or whatever. ++ ++If you have the kgdb console enabled when you continue, gdb will print ++out all the console messages. ++ ++The above example caused a breakpoint relatively early in the boot ++process. For the i386 kgdb it is possible to code a break instruction ++as the first C-language point in init/main.c, i.e. as the first instruction ++in start_kernel(). This could be done as follows: ++ ++#include ++ breakpoint(); ++ ++This breakpoint() is really a function that sets up the breakpoint and ++single-step hardware trap cells and then executes a breakpoint. Any ++early hard coded breakpoint will need to use this function. Once the ++trap cells are set up they need not be set again, but doing it again ++does not hurt anything, so you don't need to be concerned about which ++breakpoint is hit first. Once the trap cells are set up (and the kernel ++sets them up in due course even if breakpoint() is never called) the ++macro: ++ ++BREAKPOINT; + -+ case 'r': /* reboot */ -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ /*to_gdb("Rebooting\n"); */ -+ /* triplefault no return from here */ -+ { -+ static long no_idt[2]; -+ __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); -+ BREAKPOINT; -+ } ++will generate an inline breakpoint. This may be more useful as it stops ++the processor at the instruction instead of in a function a step removed ++from the location of interest. In either case must be ++included to define both breakpoint() and BREAKPOINT. + -+ } /* switch */ ++Triggering kgdbstub at other times ++================================== + -+ /* reply to the request */ -+ putpacket(remcomOutBuffer); -+ } /* while(1==1) */ -+ /* -+ * reached by goto only. -+ */ -+ exit_kgdb: -+ /* -+ * Here is where we set up to trap a gdb function call. NEW_esp -+ * will be changed if we are trying to do this. We handle both -+ * adding and subtracting, thus allowing gdb to put grung on -+ * the stack which it removes later. -+ */ -+ if (NEW_esp != OLD_esp) { -+ int *ptr = END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) -+ ptr -= (OLD_esp - NEW_esp) / sizeof (int); -+ *--ptr = linux_regs->eflags; -+ *--ptr = linux_regs->xcs; -+ *--ptr = linux_regs->eip; -+ *--ptr = linux_regs->ecx; -+ *--ptr = linux_regs->ebx; -+ *--ptr = linux_regs->eax; -+ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); -+ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) { -+ linux_regs->eip = (unsigned int) fn_call_stub; -+ } else { -+ linux_regs->eip = (unsigned int) fn_rtn_stub; -+ linux_regs->eax = NEW_esp; -+ } -+ linux_regs->eflags &= ~(IF_BIT | TF_BIT); -+ } -+#ifdef CONFIG_SMP -+ /* -+ * Release gdb wait locks -+ * Sanity check time. Must have at least one cpu to run. Also single -+ * step must not be done if the current cpu is on hold. -+ */ -+ if (spinlock_count == 1) { -+ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; -+ int cpu_avail = 0; -+ int i; ++Often you don't need to enter the debugger until much later in the boot ++or even after the machine has been running for some time. Once the ++kernel is booted and interrupts are on, you can force the system to ++enter the debugger by sending a control-C to the debug port. This is ++what the first line of the recommended .gdbinit file does. This allows ++you to start gdb any time after the system is up as well as when the ++system is already at a breakpoint. (In the case where the system is ++already at a breakpoint the control-C is not needed, however, it will ++be ignored by the target so no harm is done. Also note the the echo ++command assumes that the port speed is already set. This will be true ++once gdb has connected, but it is best to set the port speed before you ++run gdb.) + -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!cpu_online(i)) -+ break; -+ if (!hold_cpu(i)) { -+ cpu_avail = 1; -+ } -+ } -+ /* -+ * Early in the bring up there will be NO cpus on line... -+ */ -+ if (!cpu_avail && !cpus_empty(cpu_online_map)) { -+ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); -+ goto once_again; -+ } -+ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { -+ to_gdb -+ ("Current cpu must be unblocked to single step\n"); -+ goto once_again; -+ } -+ if (!(ss_hold)) { -+ int i; -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!hold_cpu(i)) { -+ spin_unlock(&waitlocks[i]); -+ } -+ } -+ } else { -+ spin_unlock(&waitlocks[smp_processor_id()]); -+ } -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ /* -+ * If this cpu is on hold, this is where we -+ * do it. Note, the NMI will pull us out of here, -+ * but will return as the above lock is not held. -+ * We will stay here till another cpu releases the lock for us. -+ */ -+ spin_unlock_wait(waitlocks + smp_processor_id()); -+ kgdb_local_irq_restore(flags); -+ return (0); -+ } -+#if 0 -+exit_just_unlock: -+#endif -+#endif -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ kgdb_local_irq_restore(flags); -+ return (0); -+} ++Another simple way to do this is to put the following file in you ~/bin ++directory: + -+/* this function is used to set up exception handlers for tracing and -+ * breakpoints. -+ * This function is not needed as the above line does all that is needed. -+ * We leave it for backward compatitability... -+ */ -+void -+set_debug_traps(void) -+{ -+ /* -+ * linux_debug_hook is defined in traps.c. We store a pointer -+ * to our own exception handler into it. ++#!/bin/bash ++echo -e "\003" > /dev/ttyS0 + -+ * But really folks, every hear of labeled common, an old Fortran -+ * concept. Lots of folks can reference it and it is define if -+ * anyone does. Only one can initialize it at link time. We do -+ * this with the hook. See the statement above. No need for any -+ * executable code and it is ready as soon as the kernel is -+ * loaded. Very desirable in kernel debugging. ++Here, the ttyS0 should be replaced with what ever port you are using. ++The "\003" is control-C. Once you are connected with gdb, you can enter ++control-C at the command prompt. + -+ linux_debug_hook = handle_exception ; -+ */ ++An alternative way to get control to the debugger is to enable the kGdb ++SysRq command. Then you would enter Alt-SysRq-g (all three keys at the ++same time, but push them down in the order given). To refresh your ++memory of the available SysRq commands try Alt-SysRq-=. Actually any ++undefined command could replace the "=", but I like to KNOW that what I ++am pushing will never be defined. + -+ /* In case GDB is started before us, ack any packets (presumably -+ "$?#xx") sitting there. -+ putDebugChar ('+'); ++Debugging hints ++=============== + -+ initialized = 1; -+ */ -+} ++You can break into the target machine at any time from the development ++machine by typing ^C (see above paragraph). If the target machine has ++interrupts enabled this will stop it in the kernel and enter the ++debugger. + -+/* This function will generate a breakpoint exception. It is used at the -+ beginning of a program to sync up with a debugger and can be used -+ otherwise as a quick means to stop program execution and "break" into -+ the debugger. */ -+/* But really, just use the BREAKPOINT macro. We will handle the int stuff -+ */ ++There is unfortunately no way of breaking into the kernel if it is ++in a loop with interrupts disabled, so if this happens to you then ++you need to place exploratory breakpoints or printk's into the kernel ++to find out where it is looping. The exploratory breakpoints can be ++entered either thru gdb or hard coded into the source. This is very ++handy if you do something like: + -+#ifdef later -+/* -+ * possibly we should not go thru the traps.c code at all? Someday. -+ */ -+void -+do_kgdb_int3(struct pt_regs *regs, long error_code) -+{ -+ kgdb_handle_exception(3, 5, error_code, regs); -+ return; -+} -+#endif -+#undef regs -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+asmlinkage void -+bad_sys_call_exit(int stuff) -+{ -+ struct pt_regs *regs = (struct pt_regs *) &stuff; -+ printk("Sys call %d return with %x preempt_count\n", -+ (int) regs->orig_eax, preempt_count()); -+} -+#endif -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#include -+asmlinkage void -+stack_overflow(void) -+{ -+#ifdef BREAKPOINT -+ BREAKPOINT; -+#else -+ printk("Kernel stack overflow, looping forever\n"); -+#endif -+ while (1) { -+ } -+} -+#endif ++if () BREAKPOINT; + -+#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) -+char gdbconbuf[BUFMAX]; + -+static void -+kgdb_gdb_message(const char *s, unsigned count) -+{ -+ int i; -+ int wcount; -+ char *bufptr; -+ /* -+ * This takes care of NMI while spining out chars to gdb -+ */ -+ IF_SMP(in_kgdb_console = 1); -+ gdbconbuf[0] = 'O'; -+ bufptr = gdbconbuf + 1; -+ while (count > 0) { -+ if ((count << 1) > (BUFMAX - 2)) { -+ wcount = (BUFMAX - 2) >> 1; -+ } else { -+ wcount = count; -+ } -+ count -= wcount; -+ for (i = 0; i < wcount; i++) { -+ bufptr = pack_hex_byte(bufptr, s[i]); -+ } -+ *bufptr = '\0'; -+ s += wcount; ++There is a copy of an e-mail in the Documentation/i386/kgdb/ directory ++(debug-nmi.txt) which describes how to create an NMI on an ISA bus ++machine using a paper clip. I have a sophisticated version of this made ++by wiring a push button switch into a PC104/ISA bus adapter card. The ++adapter card nicely furnishes wire wrap pins for all the ISA bus ++signals. + -+ putpacket(gdbconbuf); ++When you are done debugging the kernel on the target machine it is a ++good idea to leave it in a running state. This makes reboots faster, ++bypassing the fsck. So do a gdb "continue" as the last gdb command if ++this is possible. To terminate gdb itself on the development machine ++and leave the target machine running, first clear all breakpoints and ++continue, then type ^Z to suspend gdb and then kill it with "kill %1" or ++something similar. ++ ++If gdbstub Does Not Work ++======================== ++ ++If it doesn't work, you will have to troubleshoot it. Do the easy ++things first like double checking your cabling and data rates. You ++might try some non-kernel based programs to see if the back-to-back ++connection works properly. Just something simple like cat /etc/hosts ++>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you ++if you can send data from one machine to the other. Make sure it works ++in both directions. There is no point in tearing out your hair in the ++kernel if the line doesn't work. + -+ } -+ IF_SMP(in_kgdb_console = 0); -+} -+#endif -+#ifdef CONFIG_SMP -+static void -+to_gdb(const char *s) -+{ -+ int count = 0; -+ while (s[count] && (count++ < BUFMAX)) ; -+ kgdb_gdb_message(s, count); -+} -+#endif -+#ifdef CONFIG_KGDB_CONSOLE -+#include -+#include -+#include -+#include -+#include ++All of the real action takes place in the file ++/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target ++machine that interacts with gdb on the development machine. In gdb you can ++turn on a debug switch with the following command: + -+void -+kgdb_console_write(struct console *co, const char *s, unsigned count) -+{ ++ set remotedebug + -+ if (gdb_i386vector == -1) { -+ /* -+ * We have not yet talked to gdb. What to do... -+ * lets break, on continue we can do the write. -+ * But first tell him whats up. Uh, well no can do, -+ * as this IS the console. Oh well... -+ * We do need to wait or the messages will be lost. -+ * Other option would be to tell the above code to -+ * ignore this breakpoint and do an auto return, -+ * but that might confuse gdb. Also this happens -+ * early enough in boot up that we don't have the traps -+ * set up yet, so... -+ */ -+ breakpoint(); -+ } -+ kgdb_gdb_message(s, count); -+} ++This will print out the protocol messages that gdb is exchanging with ++the target machine. + -+/* -+ * ------------------------------------------------------------ -+ * Serial KGDB driver -+ * ------------------------------------------------------------ -+ */ ++Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is ++the code that talks to the serial port on the target side. There might ++be a problem there. In particular there is a section of this code that ++tests the UART which will tell you what UART you have if you define ++"PRNT" (just remove "_off" from the #define PRNT_off). To view this ++report you will need to boot the system without any beakpoints. This ++allows the kernel to run to the point where it calls kgdb to set up ++interrupts. At this time kgdb will test the UART and print out the type ++it finds. (You need to wait so that the printks are actually being ++printed. Early in the boot they are cached, waiting for the console to ++be enabled. Also, if kgdb is entered thru a breakpoint it is possible ++to cause a dead lock by calling printk when the console is locked. The ++stub thus avoids doing printks from breakpoints, especially in the ++serial code.) At this time, if the UART fails to do the expected thing, ++kgdb will print out (using printk) information on what failed. (These ++messages will be buried in all the other boot up messages. Look for ++lines that start with "gdb_hook_interrupt:". You may want to use dmesg ++once the system is up to view the log. If this fails or if you still ++don't connect, review your answers for the port address. Use: + -+static struct console kgdbcons = { -+ name:"kgdb", -+ write:kgdb_console_write, -+#ifdef CONFIG_KGDB_USER_CONSOLE -+ device:kgdb_console_device, -+#endif -+ flags:CON_PRINTBUFFER | CON_ENABLED, -+ index:-1, -+}; ++setserial /dev/ttyS0 + -+/* -+ * The trick here is that this file gets linked before printk.o -+ * That means we get to peer at the console info in the command -+ * line before it does. If we are up, we register, otherwise, -+ * do nothing. By returning 0, we allow printk to look also. -+ */ -+static int kgdb_console_enabled; ++to get the current port and IRQ information. This command will also ++tell you what the system found for the UART type. The stub recognizes ++the following UART types: + -+int __init -+kgdb_console_init(char *str) -+{ -+ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { -+ register_console(&kgdbcons); -+ kgdb_console_enabled = 1; -+ } -+ return 0; /* let others look at the string */ -+} ++16450, 16550, and 16550A + -+__setup("console=", kgdb_console_init); ++If you are really desperate you can use printk debugging in the ++kgdbstub code in the target kernel until you get it working. In particular, ++there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c ++named "remote_debug". Compile your kernel with this set to 1, rather ++than 0 and the debug stub will print out lots of stuff as it does ++what it does. Likewise there are debug printks in the kgdb_serial.c ++code that can be turned on with simple changes in the macro defines. + -+#ifdef CONFIG_KGDB_USER_CONSOLE -+static kdev_t kgdb_console_device(struct console *c); -+/* This stuff sort of works, but it knocks out telnet devices -+ * we are leaving it here in case we (or you) find time to figure it out -+ * better.. -+ */ + -+/* -+ * We need a real char device as well for when the console is opened for user -+ * space activities. -+ */ ++Debugging Loadable Modules ++========================== + -+static int -+kgdb_consdev_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} ++This technique comes courtesy of Edouard Parmelan ++ + -+static ssize_t -+kgdb_consdev_write(struct file *file, const char *buf, -+ size_t count, loff_t * ppos) -+{ -+ int size, ret = 0; -+ static char kbuf[128]; -+ static DECLARE_MUTEX(sem); ++When you run gdb, enter the command + -+ /* We are not reentrant... */ -+ if (down_interruptible(&sem)) -+ return -ERESTARTSYS; ++source gdbinit-modules + -+ while (count > 0) { -+ /* need to copy the data from user space */ -+ size = count; -+ if (size > sizeof (kbuf)) -+ size = sizeof (kbuf); -+ if (copy_from_user(kbuf, buf, size)) { -+ ret = -EFAULT; -+ break;; -+ } -+ kgdb_console_write(&kgdbcons, kbuf, size); -+ count -= size; -+ ret += size; -+ buf += size; -+ } ++This will read in a file of gdb macros that was installed in your ++kernel source directory when kgdb was installed. This file implements ++the following commands: + -+ up(&sem); ++mod-list ++ Lists the loaded modules in the form + -+ return ret; -+} ++mod-print-symbols ++ Prints all the symbols in the indicated module. + -+struct file_operations kgdb_consdev_fops = { -+ open:kgdb_consdev_open, -+ write:kgdb_consdev_write -+}; -+static kdev_t -+kgdb_console_device(struct console *c) -+{ -+ return MKDEV(TTYAUX_MAJOR, 1); -+} ++mod-add-symbols ++ Loads the symbols from the object file and associates them ++ with the indicated module. + -+/* -+ * This routine gets called from the serial stub in the i386/lib -+ * This is so it is done late in bring up (just before the console open). -+ */ -+void -+kgdb_console_finit(void) -+{ -+ if (kgdb_console_enabled) { -+ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); -+ char *cp = cptr; -+ while (*cptr && *cptr != '(') -+ cptr++; -+ *cptr = 0; -+ unregister_chrdev(TTYAUX_MAJOR, cp); -+ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); -+ } -+} -+#endif -+#endif -+#ifdef CONFIG_KGDB_TS -+#include /* time stamp code */ -+#include /* in_interrupt */ -+#ifdef CONFIG_KGDB_TS_64 -+#define DATA_POINTS 64 -+#endif -+#ifdef CONFIG_KGDB_TS_128 -+#define DATA_POINTS 128 -+#endif -+#ifdef CONFIG_KGDB_TS_256 -+#define DATA_POINTS 256 -+#endif -+#ifdef CONFIG_KGDB_TS_512 -+#define DATA_POINTS 512 -+#endif -+#ifdef CONFIG_KGDB_TS_1024 -+#define DATA_POINTS 1024 -+#endif -+#ifndef DATA_POINTS -+#define DATA_POINTS 128 /* must be a power of two */ -+#endif -+#define INDEX_MASK (DATA_POINTS - 1) -+#if (INDEX_MASK & DATA_POINTS) -+#error "CONFIG_KGDB_TS_COUNT must be a power of 2" -+#endif -+struct kgdb_and_then_struct { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ int data0; -+ int data1; -+}; -+struct kgdb_and_then_struct2 { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ struct task_struct *t1; -+ struct task_struct *t2; -+}; -+struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; ++After you have loaded the module that you want to debug, use the command ++mod-list to find the of your module. Then use that ++address in the mod-add-symbols command to load your module's symbols. ++From that point onward you can debug your module as if it were a part ++of the kernel. + -+struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; -+int kgdb_and_then_count; ++The file gdbinit-modules also contains a command named mod-add-lis as ++an example of how to construct a command of your own to load your ++favorite module. The idea is to "can" the pathname of the module ++in the command so you don't have to type so much. + -+void -+kgdb_tstamp(int line, char *source, int data0, int data1) -+{ -+ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; -+ int flags; -+ kgdb_local_irq_save(flags); -+ spin_lock(&ts_spin); -+ rdtscll(kgdb_and_then->at_time); -+#ifdef CONFIG_SMP -+ kgdb_and_then->on_cpu = smp_processor_id(); -+#endif -+ kgdb_and_then->task = current; -+ kgdb_and_then->from_ln = line; -+ kgdb_and_then->in_src = source; -+ kgdb_and_then->from = __builtin_return_address(0); -+ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | -+ (preempt_count() << 8)); -+ kgdb_and_then->data0 = data0; -+ kgdb_and_then->data1 = data1; -+ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; -+ spin_unlock(&ts_spin); -+ kgdb_local_irq_restore(flags); -+#ifdef CONFIG_PREEMPT ++Threads ++======= + -+#endif -+ return; -+} -+#endif -+typedef int gdb_debug_hook(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs); -+gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ -diff -puN arch/i386/kernel/Makefile~kgdb-ga arch/i386/kernel/Makefile ---- 25/arch/i386/kernel/Makefile~kgdb-ga 2004-10-21 14:54:15.259603680 -0700 -+++ 25-akpm/arch/i386/kernel/Makefile 2004-10-21 14:54:15.308596232 -0700 -@@ -14,6 +14,7 @@ obj-y += timers/ - obj-$(CONFIG_ACPI_BOOT) += acpi/ - obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o - obj-$(CONFIG_MCA) += mca.o -+obj-$(CONFIG_KGDB) += kgdb_stub.o - obj-$(CONFIG_X86_MSR) += msr.o - obj-$(CONFIG_X86_CPUID) += cpuid.o - obj-$(CONFIG_MICROCODE) += microcode.o -diff -puN arch/i386/kernel/nmi.c~kgdb-ga arch/i386/kernel/nmi.c ---- 25/arch/i386/kernel/nmi.c~kgdb-ga 2004-10-21 14:54:15.261603376 -0700 -+++ 25-akpm/arch/i386/kernel/nmi.c 2004-10-21 14:54:15.308596232 -0700 -@@ -34,7 +34,17 @@ - - #include "mach_traps.h" - -+#ifdef CONFIG_KGDB -+#include -+#ifdef CONFIG_SMP -+unsigned int nmi_watchdog = NMI_IO_APIC; -+#else -+unsigned int nmi_watchdog = NMI_LOCAL_APIC; -+#endif -+#else - unsigned int nmi_watchdog = NMI_NONE; -+#endif ++Each process in a target machine is seen as a gdb thread. gdb thread ++related commands (info threads, thread n) can be used. + - extern int unknown_nmi_panic; - static unsigned int nmi_hz = HZ; - static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ -@@ -466,6 +476,9 @@ void touch_nmi_watchdog (void) - for (i = 0; i < NR_CPUS; i++) - alert_counter[i] = 0; - } -+#ifdef CONFIG_KGDB -+int tune_watchdog = 5*HZ; -+#endif - - extern void die_nmi(struct pt_regs *, const char *msg); - -@@ -481,12 +494,24 @@ void nmi_watchdog_tick (struct pt_regs * - */ - sum = irq_stat[cpu].apic_timer_irqs; - -+#ifdef CONFIG_KGDB -+ if (!in_kgdb(regs) && last_irq_sums[cpu] == sum) { ++ia-32 hardware breakpoints ++========================== + -+#else - if (last_irq_sums[cpu] == sum) { -+#endif - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - alert_counter[cpu]++; -+#ifdef CONFIG_KGDB -+ if (alert_counter[cpu] == tune_watchdog) { -+ kgdb_handle_exception(2, SIGPWR, 0, regs); -+ last_irq_sums[cpu] = sum; -+ alert_counter[cpu] = 0; -+ } -+#endif - if (alert_counter[cpu] == 30*nmi_hz) - die_nmi(regs, "NMI Watchdog detected LOCKUP"); - } else { -diff -puN arch/i386/kernel/smp.c~kgdb-ga arch/i386/kernel/smp.c ---- 25/arch/i386/kernel/smp.c~kgdb-ga 2004-10-21 14:54:15.262603224 -0700 -+++ 25-akpm/arch/i386/kernel/smp.c 2004-10-21 14:54:15.309596080 -0700 -@@ -466,7 +466,17 @@ void flush_tlb_all(void) - { - on_each_cpu(do_flush_tlb_all, NULL, 1, 1); - } -- -+#ifdef CONFIG_KGDB -+/* -+ * By using the NMI code instead of a vector we just sneak thru the -+ * word generator coming out with just what we want. AND it does -+ * not matter if clustered_apic_mode is set or not. -+ */ -+void smp_send_nmi_allbutself(void) -+{ -+ send_IPI_allbutself(APIC_DM_NMI); -+} -+#endif - /* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing -diff -puN arch/i386/kernel/traps.c~kgdb-ga arch/i386/kernel/traps.c ---- 25/arch/i386/kernel/traps.c~kgdb-ga 2004-10-21 14:54:15.264602920 -0700 -+++ 25-akpm/arch/i386/kernel/traps.c 2004-10-21 14:54:15.311595776 -0700 -@@ -105,6 +105,39 @@ int register_die_notifier(struct notifie - return err; - } - -+#ifdef CONFIG_KGDB -+extern void sysenter_past_esp(void); -+#include -+#include -+void set_intr_gate(unsigned int n, void *addr); -+static void set_intr_usr_gate(unsigned int n, void *addr); -+/* -+ * Should be able to call this breakpoint() very early in -+ * bring up. Just hard code the call where needed. -+ * The breakpoint() code is here because set_?_gate() functions -+ * are local (static) to trap.c. They need be done only once, -+ * but it does not hurt to do them over. -+ */ -+void breakpoint(void) -+{ -+ set_intr_usr_gate(3,&int3); /* disable ints on trap */ -+ set_intr_gate(1,&debug); -+ set_intr_gate(14,&page_fault); ++kgdb stub contains support for hardware breakpoints using debugging features ++of ia-32(x86) processors. These breakpoints do not need code modification. ++They use debugging registers. 4 hardware breakpoints are available in ia-32 ++processors. + -+ BREAKPOINT; -+} -+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ -+ { \ -+ if (!user_mode(regs) ) \ -+ { \ -+ kgdb_handle_exception(trapnr, signr, error_code, regs); \ -+ after; \ -+ } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ -+ } -+#else -+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) -+#endif ++Each hardware breakpoint can be of one of the following three types. + - static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) - { - return p > (void *)tinfo && -@@ -332,6 +365,15 @@ void die(const char * str, struct pt_reg - #endif - if (nl) - printk("\n"); -+#ifdef CONFIG_KGDB -+ /* This is about the only place we want to go to kgdb even if in -+ * user mode. But we must go in via a trap so within kgdb we will -+ * always be in kernel mode. -+ */ -+ if (user_mode(regs)) -+ BREAKPOINT; -+#endif -+ CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) - notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); - show_registers(regs); - } else -@@ -406,6 +448,7 @@ static inline void do_trap(int trapnr, i - #define DO_ERROR(trapnr, signr, str, name) \ - asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ - { \ -+ CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,) \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ -@@ -429,6 +472,7 @@ asmlinkage void do_##name(struct pt_regs - #define DO_VM86_ERROR(trapnr, signr, str, name) \ - asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ - { \ -+ CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return) \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ -@@ -512,7 +556,8 @@ gp_in_vm86: - - gp_in_kernel: - if (!fixup_exception(regs)) { - die: -+ CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) - if (notify_die(DIE_GPF, "general protection fault", regs, - error_code, 13, SIGSEGV) == NOTIFY_STOP) - return; -@@ -721,8 +766,18 @@ asmlinkage void do_debug(struct pt_regs - * allowing programs to debug themselves without the ptrace() - * interface. - */ -+#ifdef CONFIG_KGDB -+ /* -+ * I think this is the only "real" case of a TF in the kernel -+ * that really belongs to user space. Others are -+ * "Ours all ours!" -+ */ -+ if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_past_esp)) -+ goto clear_TF_reenable; -+#else - if ((regs->xcs & 3) == 0) - goto clear_TF_reenable; -+#endif - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; - } -@@ -734,6 +789,17 @@ asmlinkage void do_debug(struct pt_regs - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - -+#ifdef CONFIG_KGDB -+ /* -+ * If this is a kernel mode trap, we need to reset db7 to allow us -+ * to continue sanely ALSO skip the signal delivery -+ */ -+ if ((regs->xcs & 3) == 0) -+ goto clear_dr7; ++1. Execution breakpoint - An Execution breakpoint is triggered when code ++ at the breakpoint address is executed. ++ ++ As limited number of hardware breakpoints are available, it is ++ advisable to use software breakpoints ( break command ) instead ++ of execution hardware breakpoints, unless modification of code ++ is to be avoided. ++ ++2. Write breakpoint - A write breakpoint is triggered when memory ++ location at the breakpoint address is written. ++ ++ A write or can be placed for data of variable length. Length of ++ a write breakpoint indicates length of the datatype to be ++ watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for ++ 4 byte data. ++ ++3. Access breakpoint - An access breakpoint is triggered when memory ++ location at the breakpoint address is either read or written. ++ ++ Access breakpoints also have lengths similar to write breakpoints. ++ ++IO breakpoints in ia-32 are not supported. ++ ++Since gdb stub at present does not use the protocol used by gdb for hardware ++breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros ++for hardware breakpoints are described below. ++ ++hwebrk - Places an execution breakpoint ++ hwebrk breakpointno address ++hwwbrk - Places a write breakpoint ++ hwwbrk breakpointno length address ++hwabrk - Places an access breakpoint ++ hwabrk breakpointno length address ++hwrmbrk - Removes a breakpoint ++ hwrmbrk breakpointno ++exinfo - Tells whether a software or hardware breakpoint has occurred. ++ Prints number of the hardware breakpoint if a hardware breakpoint has ++ occurred. + -+ /* if not kernel, allow ints but only if they were on */ -+ if ( regs->eflags & 0x200) local_irq_enable(); -+#endif - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ -@@ -748,6 +814,7 @@ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); -+ CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) - return; - - debug_vm86: -@@ -1004,6 +1071,12 @@ static void __init set_task_gate(unsigne - { - _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); - } -+#ifdef CONFIG_KGDB -+void set_intr_usr_gate(unsigned int n, void *addr) -+{ -+ _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); -+} -+#endif - - - void __init trap_init(void) -@@ -1021,7 +1094,11 @@ void __init trap_init(void) - set_trap_gate(0,÷_error); - set_intr_gate(1,&debug); - set_intr_gate(2,&nmi); -+#ifndef CONFIG_KGDB - set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ -+#else -+ set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ -+#endif - set_system_gate(4,&overflow); - set_system_gate(5,&bounds); - set_trap_gate(6,&invalid_op); -diff -puN /dev/null arch/i386/lib/kgdb_serial.c ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/arch/i386/lib/kgdb_serial.c 2004-10-21 14:54:15.313595472 -0700 -@@ -0,0 +1,485 @@ -+/* -+ * Serial interface GDB stub -+ * -+ * Written (hacked together) by David Grothe (dave@gcom.com) -+ * Modified to allow invokation early in boot see also -+ * kgdb.h for instructions by George Anzinger(george@mvista.com) -+ * -+ */ ++Arguments required by these commands are as follows ++breakpointno - 0 to 3 ++length - 1 to 3 ++address - Memory location in hex digits ( without 0x ) e.g c015e9bc + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_KGDB_USER_CONSOLE -+extern void kgdb_console_finit(void); -+#endif -+#define PRNT_off -+#define TEST_EXISTANCE -+#ifdef PRNT -+#define dbprintk(s) printk s -+#else -+#define dbprintk(s) -+#endif -+#define TEST_INTERRUPT_off -+#ifdef TEST_INTERRUPT -+#define intprintk(s) printk s -+#else -+#define intprintk(s) -+#endif ++SMP support ++========== + -+#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) ++When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb ++client, all the processors are forced to enter the debugger. Current ++thread corresponds to the thread running on the processor where ++breakpoint occurred. Threads running on other processor(s) appear ++similar to other non-running threads in the 'info threads' output. ++Within the kgdb stub there is a structure "waiting_cpus" in which kgdb ++records the values of "current" and "regs" for each CPU other than the ++one that hit the breakpoint. "current" is a pointer to the task ++structure for the task that CPU is running, while "regs" points to the ++saved registers for the task. This structure can be examined with the ++gdb "p" command. + -+#define GDB_BUF_SIZE 512 /* power of 2, please */ ++ia-32 hardware debugging registers on all processors are set to same ++values. Hence any hardware breakpoints may occur on any processor. + -+static char gdb_buf[GDB_BUF_SIZE]; -+static int gdb_buf_in_inx; -+static atomic_t gdb_buf_in_cnt; -+static int gdb_buf_out_inx; ++gdb troubleshooting ++=================== + -+struct async_struct *gdb_async_info; -+static int gdb_async_irq; ++1. gdb hangs ++Kill it. restart gdb. Connect to target machine. + -+#define outb_px(a,b) outb_p(b,a) ++2. gdb cannot connect to target machine (after killing a gdb and ++restarting another) If the target machine was not inside debugger when ++you killed gdb, gdb cannot connect because the target machine won't ++respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. ++e.g. echo -e "\003" > /dev/ttyS1 ++This forces that target machine into the debugger, after which you ++can connect. + -+static void program_uart(struct async_struct *info); -+static void write_char(struct async_struct *info, int chr); -+/* -+ * Get a byte from the hardware data buffer and return it -+ */ -+static int -+read_data_bfr(struct async_struct *info) -+{ -+ char it = inb_p(info->port + UART_LSR); ++3. gdb cannot connect even after echoing Ctrl+C into serial line ++Try changing serial line settings min to 1 and time to 0 ++e.g. stty min 1 time 0 < /dev/ttyS1 ++Try echoing again + -+ if (it & UART_LSR_DR) -+ return (inb_p(info->port + UART_RX)); -+ /* -+ * If we have a framing error assume somebody messed with -+ * our uart. Reprogram it and send '-' both ways... -+ */ -+ if (it & 0xc) { -+ program_uart(info); -+ write_char(info, '-'); -+ return ('-'); -+ } -+ return (-1); ++Check serial line speed and set it to correct value if required ++e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 + -+} /* read_data_bfr */ ++EVENTS ++====== + -+/* -+ * Get a char if available, return -1 if nothing available. -+ * Empty the receive buffer first, then look at the interface hardware. ++Ever want to know the order of things happening? Which CPU did what and ++when? How did the spinlock get the way it is? Then events are for ++you. Events are defined by calls to an event collection interface and ++saved for later examination. In this case, kgdb events are saved by a ++very fast bit of code in kgdb which is fully SMP and interrupt protected ++and they are examined by using gdb to display them. Kgdb keeps only ++the last N events, where N must be a power of two and is defined at ++configure time. + -+ * Locking here is a bit of a problem. We MUST not lock out communication -+ * if we are trying to talk to gdb about a kgdb entry. ON the other hand -+ * we can loose chars in the console pass thru if we don't lock. It is also -+ * possible that we could hold the lock or be waiting for it when kgdb -+ * NEEDS to talk. Since kgdb locks down the world, it does not need locks. -+ * We do, of course have possible issues with interrupting a uart operation, -+ * but we will just depend on the uart status to help keep that straight. + -+ */ -+static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; -+#ifdef CONFIG_SMP -+extern spinlock_t kgdb_spinlock; -+#endif ++Events are signaled to kgdb by calling: + -+static int -+read_char(struct async_struct *info) -+{ -+ int chr; -+ unsigned long flags; -+ local_irq_save(flags); ++kgdb_ts(data0,data1) ++ ++For each call kgdb records each call in an array along with other info. ++Here is the array definition: ++ ++struct kgdb_and_then_struct { +#ifdef CONFIG_SMP -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ spin_lock(&uart_interrupt_lock); -+ } ++ int on_cpu; +#endif -+ if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ -+ chr = gdb_buf[gdb_buf_out_inx++]; -+ gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); -+ atomic_dec(&gdb_buf_in_cnt); -+ } else { -+ chr = read_data_bfr(info); -+ } ++ long long at_time; ++ int from_ln; ++ char * in_src; ++ void *from; ++ int with_if; ++ int data0; ++ int data1; ++}; ++ ++For SMP machines the CPU is recorded, for all machines the TSC is ++recorded (gets a time stamp) as well as the line number and source file ++the call was made from. The address of the (from), the "if" (interrupt ++flag) and the two data items are also recorded. The macro kgdb_ts casts ++the types to int, so you can put any 32-bit values here. There is a ++configure option to select the number of events you want to keep. A ++nice number might be 128, but you can keep up to 1024 if you want. The ++number must be a power of two. An "andthen" macro library is provided ++for gdb to help you look at these events. It is also possible to define ++a different structure for the event storage and cast the data to this ++structure. For example the following structure is defined in kgdb: ++ ++struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ spin_unlock(&uart_interrupt_lock); -+ } ++ int on_cpu; +#endif -+ local_irq_restore(flags); -+ return (chr); -+} ++ long long at_time; ++ int from_ln; ++ char * in_src; ++ void *from; ++ int with_if; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++ ++If you use this for display, the data elements will be displayed as ++pointers to task_struct entries. You may want to define your own ++structure to use in casting. You should only change the last two items ++and you must keep the structure size the same. Kgdb will handle these ++as 32-bit ints, but within that constraint you can define a structure to ++cast to any 32-bit quantity. This need only be available to gdb and is ++only used for casting in the display code. ++ ++Final Items ++=========== + -+/* -+ * Wait until the interface can accept a char, then write it. -+ */ -+static void -+write_char(struct async_struct *info, int chr) -+{ -+ while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; ++I picked up this code from Amit S. Kale and enhanced it. + -+ outb_p(chr, info->port + UART_TX); ++If you make some really cool modification to this stuff, or if you ++fix a bug, please let me know. + -+} /* write_char */ ++George Anzinger ++ + -+/* -+ * Mostly we don't need a spinlock, but since the console goes -+ * thru here with interrutps on, well, we need to catch those -+ * chars. -+ */ -+/* -+ * This is the receiver interrupt routine for the GDB stub. -+ * It will receive a limited number of characters of input -+ * from the gdb host machine and save them up in a buffer. -+ * -+ * When the gdb stub routine getDebugChar() is called it -+ * draws characters out of the buffer until it is empty and -+ * then reads directly from the serial port. -+ * -+ * We do not attempt to write chars from the interrupt routine -+ * since the stubs do all of that via putDebugChar() which -+ * writes one byte after waiting for the interface to become -+ * ready. -+ * -+ * The debug stubs like to run with interrupts disabled since, -+ * after all, they run as a consequence of a breakpoint in -+ * the kernel. -+ * -+ * Perhaps someone who knows more about the tty driver than I -+ * care to learn can make this work for any low level serial -+ * driver. -+ */ -+static irqreturn_t -+gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ struct async_struct *info; -+ unsigned long flags; ++Amit S. Kale ++ + -+ info = gdb_async_info; -+ if (!info || !info->tty || irq != gdb_async_irq) -+ return IRQ_NONE; ++(First kgdb by David Grothe ) + -+ local_irq_save(flags); -+ spin_lock(&uart_interrupt_lock); -+ do { -+ int chr = read_data_bfr(info); -+ intprintk(("Debug char on int: %x hex\n", chr)); -+ if (chr < 0) -+ continue; ++(modified by Tigran Aivazian ) ++ Putting gdbstub into the kernel config menu. + -+ if (chr == 3) { /* Ctrl-C means remote interrupt */ -+ BREAKPOINT; -+ continue; -+ } ++(modified by Scott Foehner ) ++ Hooks for entering gdbstub at boot time. + -+ if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { -+ /* buffer overflow tosses early char */ -+ read_char(info); -+ } -+ gdb_buf[gdb_buf_in_inx++] = chr; -+ gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); -+ } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); -+ spin_unlock(&uart_interrupt_lock); -+ local_irq_restore(flags); -+ return IRQ_HANDLED; -+} /* gdb_interrupt */ ++(modified by Amit S. Kale ) ++ Threads, ia-32 hw debugging, mp support, console support, ++ nmi watchdog handling. + -+/* -+ * Just a NULL routine for testing. -+ */ -+void -+gdb_null(void) -+{ -+} /* gdb_null */ ++(modified by George Anzinger ) ++ Extended threads to include the idle threads. ++ Enhancements to allow breakpoint() at first C code. ++ Use of module_init() and __setup() to automate the configure. ++ Enhanced the cpu "collection" code to work in early bring-up. ++ Added ability to call functions from gdb ++ Print info thread stuff without going back to schedule() ++ Now collect the "other" cpus with an IPI/ NMI. +Index: linux/Documentation/i386/kgdb/loadmodule.sh +=================================================================== +--- linux.orig/Documentation/i386/kgdb/loadmodule.sh ++++ linux/Documentation/i386/kgdb/loadmodule.sh +@@ -0,0 +1,78 @@ ++#/bin/sh ++# This script loads a module on a target machine and generates a gdb script. ++# source generated gdb script to load the module file at appropriate addresses ++# in gdb. ++# ++# Usage: ++# Loading the module on target machine and generating gdb script) ++# [foo]$ loadmodule.sh ++# ++# Loading the module file into gdb ++# (gdb) source ++# ++# Modify following variables according to your setup. ++# TESTMACHINE - Name of the target machine ++# GDBSCRIPTS - The directory where a gdb script will be generated ++# ++# Author: Amit S. Kale (akale@veritas.com). ++# ++# If you run into problems, please check files pointed to by following ++# variables. ++# ERRFILE - /tmp/.errs contains stderr output of insmod ++# MAPFILE - /tmp/.map contains stdout output of insmod ++# GDBSCRIPT - $GDBSCRIPTS/load gdb script. + -+/* These structure are filled in with values defined in asm/kgdb_local.h -+ */ -+static struct serial_state state = SB_STATE; -+static struct async_struct local_info = SB_INFO; -+static int ok_to_enable_ints = 0; -+static void kgdb_enable_ints_now(void); ++TESTMACHINE=foo ++GDBSCRIPTS=/home/bar + -+extern char *kgdb_version; -+/* -+ * Hook an IRQ for KGDB. -+ * -+ * This routine is called from putDebugChar, below. -+ */ -+static int ints_disabled = 1; -+int -+gdb_hook_interrupt(struct async_struct *info, int verb) -+{ -+ struct serial_state *state = info->state; -+ unsigned long flags; -+ int port; -+#ifdef TEST_EXISTANCE -+ int scratch, scratch2; -+#endif ++if [ $# -lt 1 ] ; then { ++ echo Usage: $0 modulefile ++ exit ++} ; fi + -+ /* The above fails if memory managment is not set up yet. -+ * Rather than fail the set up, just keep track of the fact -+ * and pick up the interrupt thing later. -+ */ -+ gdb_async_info = info; -+ port = gdb_async_info->port; -+ gdb_async_irq = state->irq; -+ if (verb) { -+ printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", -+ kgdb_version, -+ port, -+ gdb_async_irq, gdb_async_info->state->custom_divisor); -+ } -+ local_irq_save(flags); -+#ifdef TEST_EXISTANCE -+ /* Existance test */ -+ /* Should not need all this, but just in case.... */ ++MODULEFILE=$1 ++MODULEFILEBASENAME=`basename $1` + -+ scratch = inb_p(port + UART_IER); -+ outb_px(port + UART_IER, 0); -+ outb_px(0xff, 0x080); -+ scratch2 = inb_p(port + UART_IER); -+ outb_px(port + UART_IER, scratch); -+ if (scratch2) { -+ printk -+ ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); -+ local_irq_restore(flags); -+ return 1; /* We failed; there's nothing here */ -+ } -+ scratch2 = inb_p(port + UART_LCR); -+ outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ -+ outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ -+ outb_px(port + UART_LCR, 0); -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); -+ scratch = inb_p(port + UART_IIR) >> 6; -+ if (scratch == 1) { -+ printk("gdb_hook_interrupt: Undefined UART type!" -+ " Not a UART! \n"); -+ local_irq_restore(flags); -+ return 1; -+ } else { -+ dbprintk(("gdb_hook_interrupt: UART type " -+ "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); -+ } -+ scratch = inb_p(port + UART_MCR); -+ outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); -+ outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); -+ scratch2 = inb_p(port + UART_MSR) & 0xF0; -+ outb_px(port + UART_MCR, scratch); -+ if (scratch2 != 0x90) { -+ printk("gdb_hook_interrupt: " -+ "Loop back test failed! Not a UART!\n"); -+ local_irq_restore(flags); -+ return scratch2 + 1000; /* force 0 to fail */ ++if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { ++ MODULEFILE=`pwd`/$MODULEFILE ++} fi ++ ++ERRFILE=/tmp/$MODULEFILEBASENAME.errs ++MAPFILE=/tmp/$MODULEFILEBASENAME.map ++GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME ++ ++function findaddr() { ++ local ADDR=0x$(echo "$SEGMENTS" | \ ++ grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ ++ sed 's/[ ]*[^ ]*$//') ++ echo $ADDR ++} ++ ++function checkerrs() { ++ if [ "`cat $ERRFILE`" != "" ] ; then { ++ cat $ERRFILE ++ exit ++ } fi ++} ++ ++#load the module ++echo Copying $MODULEFILE to $TESTMACHINE ++rcp $MODULEFILE root@${TESTMACHINE}: ++ ++echo Loading module $MODULEFILE ++rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ ++ > $MAPFILE 2> $ERRFILE ++checkerrs ++ ++SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` ++TEXTADDR=$(findaddr "\\.text[^.]") ++LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" ++SEGADDRS=`echo "$SEGMENTS" | awk '//{ ++ if ($1 != ".text" && $1 != ".this" && ++ $1 != ".kstrtab" && $1 != ".kmodtab") { ++ print " -s " $1 " 0x" $3 " " + } -+#endif /* test existance */ -+ program_uart(info); -+ local_irq_restore(flags); ++}'` ++LOADSTRING="$LOADSTRING $SEGADDRS" ++echo Generating script $GDBSCRIPT ++echo $LOADSTRING > $GDBSCRIPT +Index: linux/MAINTAINERS +=================================================================== +--- linux.orig/MAINTAINERS ++++ linux/MAINTAINERS +@@ -1242,6 +1242,12 @@ W: http://sf.net/projects/kernel-janitor + W: http://developer.osdl.org/rddunlap/kj-patches/ + S: Maintained + ++KGDB FOR I386 PLATFORM ++P: George Anzinger ++M: george@mvista.com ++L: linux-net@vger.kernel.org ++S: Supported + -+ return (0); + KERNEL NFSD + P: Neil Brown + M: neilb@cse.unsw.edu.au +Index: linux/arch/i386/Kconfig +=================================================================== +--- linux.orig/arch/i386/Kconfig ++++ linux/arch/i386/Kconfig +@@ -1250,6 +1250,14 @@ menu "Executable file formats" + + source "fs/Kconfig.binfmt" + ++config TRAP_BAD_SYSCALL_EXITS ++ bool "Debug bad system call exits" ++ depends on KGDB ++ help ++ If you say Y here the kernel will check for system calls which ++ return without clearing preempt. ++ default n + -+} /* gdb_hook_interrupt */ + endmenu + + source "drivers/Kconfig" +Index: linux/arch/i386/Kconfig.debug +=================================================================== +--- linux.orig/arch/i386/Kconfig.debug ++++ linux/arch/i386/Kconfig.debug +@@ -77,4 +77,6 @@ config X86_MPPARSE + depends on X86_LOCAL_APIC && !X86_VISWS + default y + ++source "arch/i386/Kconfig.kgdb" + -+static void -+program_uart(struct async_struct *info) -+{ -+ int port = info->port; + endmenu +Index: linux/arch/i386/Kconfig.kgdb +=================================================================== +--- linux.orig/arch/i386/Kconfig.kgdb ++++ linux/arch/i386/Kconfig.kgdb +@@ -0,0 +1,175 @@ ++config KGDB ++ bool "Include kgdb kernel debugger" ++ depends on DEBUG_KERNEL ++ help ++ If you say Y here, the system will be compiled with the debug ++ option (-g) and a debugging stub will be included in the ++ kernel. This stub communicates with gdb on another (host) ++ computer via a serial port. The host computer should have ++ access to the kernel binary file (vmlinux) and a serial port ++ that is connected to the target machine. Gdb can be made to ++ configure the serial port or you can use stty and setserial to ++ do this. See the 'target' command in gdb. This option also ++ configures in the ability to request a breakpoint early in the ++ boot process. To request the breakpoint just include 'kgdb' ++ as a boot option when booting the target machine. The system ++ will then break as soon as it looks at the boot options. This ++ option also installs a breakpoint in panic and sends any ++ kernel faults to the debugger. For more information see the ++ Documentation/i386/kgdb/kgdb.txt file. + -+ (void) inb_p(port + UART_RX); -+ outb_px(port + UART_IER, 0); ++choice ++ depends on KGDB ++ prompt "Debug serial port BAUD" ++ default KGDB_115200BAUD ++ help ++ Gdb and the kernel stub need to agree on the baud rate to be ++ used. Some systems (x86 family at this writing) allow this to ++ be configured. + -+ (void) inb_p(port + UART_RX); /* serial driver comments say */ -+ (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ -+ (void) inb_p(port + UART_MSR); -+ outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); -+ outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ -+ outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ -+ outb_px(port + UART_MCR, info->MCR); ++config KGDB_9600BAUD ++ bool "9600" + -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ -+ outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ -+ if (!ints_disabled) { -+ intprintk(("KGDB: Sending %d to port %x offset %d\n", -+ gdb_async_info->IER, -+ (int) gdb_async_info->port, UART_IER)); -+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); -+ } -+ return; -+} ++config KGDB_19200BAUD ++ bool "19200" + -+/* -+ * getDebugChar -+ * -+ * This is a GDB stub routine. It waits for a character from the -+ * serial interface and then returns it. If there is no serial -+ * interface connection then it returns a bogus value which will -+ * almost certainly cause the system to hang. In the -+ */ -+int kgdb_in_isr = 0; -+int kgdb_in_lsr = 0; -+extern spinlock_t kgdb_spinlock; ++config KGDB_38400BAUD ++ bool "38400" + -+/* Caller takes needed protections */ ++config KGDB_57600BAUD ++ bool "57600" + -+int -+getDebugChar(void) -+{ -+ volatile int chr, dum, time, end_time; ++config KGDB_115200BAUD ++ bool "115200" ++endchoice + -+ dbprintk(("getDebugChar(port %x): ", gdb_async_info->port)); ++config KGDB_PORT ++ hex "hex I/O port address of the debug serial port" ++ depends on KGDB ++ default 3f8 ++ help ++ Some systems (x86 family at this writing) allow the port ++ address to be configured. The number entered is assumed to be ++ hex, don't put 0x in front of it. The standard address are: ++ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx ++ will tell you what you have. It is good to test the serial ++ connection with a live system before trying to debug. + -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 0); -+ } -+ /* -+ * This trick says if we wait a very long time and get -+ * no char, return the -1 and let the upper level deal -+ * with it. -+ */ -+ rdtsc(dum, time); -+ end_time = time + 2; -+ while (((chr = read_char(gdb_async_info)) == -1) && -+ (end_time - time) > 0) { -+ rdtsc(dum, time); -+ }; -+ /* -+ * This covers our butts if some other code messes with -+ * our uart, hay, it happens :o) -+ */ -+ if (chr == -1) -+ program_uart(gdb_async_info); ++config KGDB_IRQ ++ int "IRQ of the debug serial port" ++ depends on KGDB ++ default 4 ++ help ++ This is the irq for the debug port. If everything is working ++ correctly and the kernel has interrupts on a control C to the ++ port should cause a break into the kernel debug stub. + -+ dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); -+ return (chr); ++config DEBUG_INFO ++ bool ++ depends on KGDB ++ default y + -+} /* getDebugChar */ ++config KGDB_MORE ++ bool "Add any additional compile options" ++ depends on KGDB ++ default n ++ help ++ Saying yes here turns on the ability to enter additional ++ compile options. + -+static int count = 3; -+static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + -+static int __init -+kgdb_enable_ints(void) -+{ -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 1); -+ } -+ ok_to_enable_ints = 1; -+ kgdb_enable_ints_now(); -+#ifdef CONFIG_KGDB_USER_CONSOLE -+ kgdb_console_finit(); -+#endif -+ return 0; -+} ++config KGDB_OPTIONS ++ depends on KGDB_MORE ++ string "Additional compile arguments" ++ default "-O1" ++ help ++ This option allows you enter additional compile options for ++ the whole kernel compile. Each platform will have a default ++ that seems right for it. For example on PPC "-ggdb -O1", and ++ for i386 "-O1". Note that by configuring KGDB "-g" is already ++ turned on. In addition, on i386 platforms ++ "-fomit-frame-pointer" is deleted from the standard compile ++ options. + -+#ifdef CONFIG_SERIAL_8250 -+void shutdown_for_kgdb(struct async_struct *gdb_async_info); -+#endif ++config NO_KGDB_CPUS ++ int "Number of CPUs" ++ depends on KGDB && SMP ++ default NR_CPUS ++ help + -+#ifdef CONFIG_DISCONTIGMEM -+static inline int kgdb_mem_init_done(void) -+{ -+ return highmem_start_page != NULL; -+} -+#else -+static inline int kgdb_mem_init_done(void) -+{ -+ return max_mapnr != 0; -+} -+#endif ++ This option sets the number of cpus for kgdb ONLY. It is used ++ to prune some internal structures so they look "nice" when ++ displayed with gdb. This is to overcome possibly larger ++ numbers that may have been entered above. Enter the real ++ number to get nice clean kgdb_info displays. + -+static void -+kgdb_enable_ints_now(void) -+{ -+ if (!spin_trylock(&one_at_atime)) -+ return; -+ if (!ints_disabled) -+ goto exit; -+ if (kgdb_mem_init_done() && -+ ints_disabled) { /* don't try till mem init */ -+#ifdef CONFIG_SERIAL_8250 -+ /* -+ * The ifdef here allows the system to be configured -+ * without the serial driver. -+ * Don't make it a module, however, it will steal the port -+ */ -+ shutdown_for_kgdb(gdb_async_info); -+#endif -+ ints_disabled = request_irq(gdb_async_info->state->irq, -+ gdb_interrupt, -+ IRQ_T(gdb_async_info), -+ "KGDB-stub", NULL); -+ intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); -+ } -+ if (!ints_disabled) { -+ intprintk(("KGDB: Sending %d to port %x offset %d\n", -+ gdb_async_info->IER, -+ (int) gdb_async_info->port, UART_IER)); -+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); -+ } -+ exit: -+ spin_unlock(&one_at_atime); -+} ++config KGDB_TS ++ bool "Enable kgdb time stamp macros?" ++ depends on KGDB ++ default n ++ help ++ Kgdb event macros allow you to instrument your code with calls ++ to the kgdb event recording function. The event log may be ++ examined with gdb at a break point. Turning on this ++ capability also allows you to choose how many events to ++ keep. Kgdb always keeps the lastest events. ++ ++choice ++ depends on KGDB_TS ++ prompt "Max number of time stamps to save?" ++ default KGDB_TS_128 + -+/* -+ * putDebugChar -+ * -+ * This is a GDB stub routine. It waits until the interface is ready -+ * to transmit a char and then sends it. If there is no serial -+ * interface connection then it simply returns to its caller, having -+ * pretended to send the char. Caller takes needed protections. -+ */ -+void -+putDebugChar(int chr) -+{ -+ dbprintk(("putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", -+ gdb_async_info->port, -+ chr, -+ chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); ++config KGDB_TS_64 ++ bool "64" + -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 0); -+ } ++config KGDB_TS_128 ++ bool "128" + -+ write_char(gdb_async_info, chr); /* this routine will wait */ -+ count = (chr == '#') ? 0 : count + 1; -+ if ((count == 2)) { /* try to enable after */ -+ if (ints_disabled & ok_to_enable_ints) -+ kgdb_enable_ints_now(); /* try to enable after */ ++config KGDB_TS_256 ++ bool "256" + -+ /* We do this a lot because, well we really want to get these -+ * interrupts. The serial driver will clear these bits when it -+ * initializes the chip. Every thing else it does is ok, -+ * but this. -+ */ -+ if (!ints_disabled) { -+ outb_px(gdb_async_info->port + UART_IER, -+ gdb_async_info->IER); -+ } -+ } ++config KGDB_TS_512 ++ bool "512" + -+} /* putDebugChar */ ++config KGDB_TS_1024 ++ bool "1024" + -+module_init(kgdb_enable_ints); -diff -puN arch/i386/lib/Makefile~kgdb-ga arch/i386/lib/Makefile ---- 25/arch/i386/lib/Makefile~kgdb-ga 2004-10-21 14:54:15.265602768 -0700 -+++ 25-akpm/arch/i386/lib/Makefile 2004-10-21 14:54:15.313595472 -0700 -@@ -8,3 +8,4 @@ lib-y = checksum.o delay.o usercopy.o ge - - lib-$(CONFIG_X86_USE_3DNOW) += mmx.o - lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o -+lib-$(CONFIG_KGDB) += kgdb_serial.o -diff -puN arch/i386/Makefile~kgdb-ga arch/i386/Makefile ---- 25/arch/i386/Makefile~kgdb-ga 2004-10-21 14:54:15.266602616 -0700 -+++ 25-akpm/arch/i386/Makefile 2004-10-21 14:54:15.314595320 -0700 -@@ -99,6 +99,9 @@ core-$(CONFIG_X86_ES7000) := arch/i386/m ++endchoice ++ ++config STACK_OVERFLOW_TEST ++ bool "Turn on kernel stack overflow testing?" ++ depends on KGDB ++ default n ++ help ++ This option enables code in the front line interrupt handlers ++ to check for kernel stack overflow on interrupts and system ++ calls. This is part of the kgdb code on x86 systems. ++ ++config KGDB_CONSOLE ++ bool "Enable serial console thru kgdb port" ++ depends on KGDB ++ default n ++ help ++ This option enables the command line "console=kgdb" option. ++ When the system is booted with this option in the command line ++ all kernel printk output is sent to gdb (as well as to other ++ consoles). For this to work gdb must be connected. For this ++ reason, this command line option will generate a breakpoint if ++ gdb has not yet connected. After the gdb continue command is ++ given all pent up console output will be printed by gdb on the ++ host machine. Neither this option, nor KGDB require the ++ serial driver to be configured. ++ ++config KGDB_SYSRQ ++ bool "Turn on SysRq 'G' command to do a break?" ++ depends on KGDB ++ default y ++ help ++ This option includes an option in the SysRq code that allows ++ you to enter SysRq G which generates a breakpoint to the KGDB ++ stub. This will work if the keyboard is alive and can ++ interrupt the system. Because of constraints on when the ++ serial port interrupt can be enabled, this code may allow you ++ to interrupt the system before the serial port control C is ++ available. Just say yes here. ++ +Index: linux/arch/i386/Makefile +=================================================================== +--- linux.orig/arch/i386/Makefile ++++ linux/arch/i386/Makefile +@@ -98,6 +98,9 @@ core-$(CONFIG_X86_ES7000) := arch/i386/m # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default @@ -3395,1421 +1810,3193 @@ diff -puN arch/i386/Makefile~kgdb-ga arch/i386/Makefile head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o libs-y += arch/i386/lib/ -diff -puN arch/i386/mm/fault.c~kgdb-ga arch/i386/mm/fault.c ---- 25/arch/i386/mm/fault.c~kgdb-ga 2004-10-21 14:54:15.268602312 -0700 -+++ 25-akpm/arch/i386/mm/fault.c 2004-10-21 14:54:15.314595320 -0700 -@@ -430,6 +430,12 @@ no_context: - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ -+#ifdef CONFIG_KGDB -+ if (!user_mode(regs)){ -+ kgdb_handle_exception(14,SIGBUS, error_code, regs); -+ return; -+ } +Index: linux/arch/i386/kernel/Makefile +=================================================================== +--- linux.orig/arch/i386/kernel/Makefile ++++ linux/arch/i386/kernel/Makefile +@@ -14,6 +14,7 @@ obj-y += timers/ + obj-$(CONFIG_ACPI_BOOT) += acpi/ + obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o + obj-$(CONFIG_MCA) += mca.o ++obj-$(CONFIG_KGDB) += kgdb_stub.o + obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_MICROCODE) += microcode.o +Index: linux/arch/i386/kernel/entry.S +=================================================================== +--- linux.orig/arch/i386/kernel/entry.S ++++ linux/arch/i386/kernel/entry.S +@@ -50,6 +50,18 @@ + #include + #include + #include "irq_vectors.h" ++ /* We do not recover from a stack overflow, but at least ++ * we know it happened and should be able to track it down. ++ */ ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#define STACK_OVERFLOW_TEST \ ++ testl $(THREAD_SIZE - 512),%esp; \ ++ jnz 10f; \ ++ call stack_overflow; \ ++10: ++#else ++#define STACK_OVERFLOW_TEST +#endif - bust_spinlocks(1); - -diff -puN arch/x86_64/boot/compressed/head.S~kgdb-ga arch/x86_64/boot/compressed/head.S ---- 25/arch/x86_64/boot/compressed/head.S~kgdb-ga 2004-10-21 14:54:15.269602160 -0700 -+++ 25-akpm/arch/x86_64/boot/compressed/head.S 2004-10-21 14:54:15.315595168 -0700 -@@ -26,6 +26,7 @@ - .code32 - .text - -+#define IN_BOOTLOADER - #include - #include + #define nr_syscalls ((syscall_table_size)/4) -diff -puN arch/x86_64/boot/compressed/misc.c~kgdb-ga arch/x86_64/boot/compressed/misc.c ---- 25/arch/x86_64/boot/compressed/misc.c~kgdb-ga 2004-10-21 14:54:15.270602008 -0700 -+++ 25-akpm/arch/x86_64/boot/compressed/misc.c 2004-10-21 14:54:15.315595168 -0700 -@@ -9,6 +9,7 @@ - * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 - */ +@@ -191,7 +203,8 @@ int80_ret_end_marker: \ + pushl %ebx; \ + movl $(__USER_DS), %edx; \ + movl %edx, %ds; \ +- movl %edx, %es; ++ movl %edx, %es; \ ++ STACK_OVERFLOW_TEST -+#define IN_BOOTLOADER - #include "miscsetup.h" - #include + #define __RESTORE_INT_REGS \ + popl %ebx; \ +@@ -357,6 +370,7 @@ need_resched: + # sysenter call handler stub + ENTRY(sysenter_entry) + movl TSS_sysenter_esp0(%esp),%esp ++ .globl sysenter_past_esp + sysenter_past_esp: + sti + pushl $(__USER_DS) +@@ -437,6 +451,19 @@ syscall_exit: + testw $_TIF_ALLWORK_MASK, %cx # current->work + jne syscall_exit_work + restore_all: ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++ movl EFLAGS(%esp), %eax # mix EFLAGS and CS ++ movb CS(%esp), %al ++ testl $(VM_MASK | 3), %eax ++ jz resume_kernelX # returning to kernel or vm86-space ++ ++ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? ++ jz resume_kernelX ++ ++ int $3 ++ ++resume_kernelX: ++#endif + RESTORE_ALL -diff -puN /dev/null Documentation/i386/kgdb/andthen ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/andthen 2004-10-21 14:54:15.316595016 -0700 -@@ -0,0 +1,100 @@ + # perform work that needs to be done immediately before resumption +Index: linux/arch/i386/kernel/kgdb_stub.c +=================================================================== +--- linux.orig/arch/i386/kernel/kgdb_stub.c ++++ linux/arch/i386/kernel/kgdb_stub.c +@@ -0,0 +1,2330 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (c) 2000 VERITAS Software Corporation. ++ * ++ */ ++/**************************************************************************** ++ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ ++ * ++ * Module name: remcom.c $ ++ * Revision: 1.34 $ ++ * Date: 91/03/09 12:29:49 $ ++ * Contributor: Lake Stevens Instrument Division$ ++ * ++ * Description: low level support for gdb debugger. $ ++ * ++ * Considerations: only works on target hardware $ ++ * ++ * Written by: Glenn Engel $ ++ * Updated by: David Grothe ++ * ModuleState: Experimental $ ++ * ++ * NOTES: See Below $ ++ * ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Compatibility with 2.1.xx kernel by David Grothe ++ * ++ * Changes to allow auto initilization. All that is needed is that it ++ * be linked with the kernel and a break point (int 3) be executed. ++ * The header file defines BREAKPOINT to allow one to do ++ * this. It should also be possible, once the interrupt system is up, to ++ * call putDebugChar("+"). Once this is done, the remote debugger should ++ * get our attention by sending a ^C in a packet. George Anzinger ++ * ++ * Integrated into 2.2.5 kernel by Tigran Aivazian ++ * Added thread support, support for multiple processors, ++ * support for ia-32(x86) hardware debugging. ++ * Amit S. Kale ( akale@veritas.com ) ++ * ++ * ++ * To enable debugger support, two things need to happen. One, a ++ * call to set_debug_traps() is necessary in order to allow any breakpoints ++ * or error conditions to be properly intercepted and reported to gdb. ++ * Two, a breakpoint needs to be generated to begin communication. This ++ * is most easily accomplished by a call to breakpoint(). Breakpoint() ++ * simulates a breakpoint by executing an int 3. ++ * ++ ************* ++ * ++ * The following gdb commands are supported: ++ * ++ * command function Return value ++ * ++ * g return the value of the CPU registers hex data or ENN ++ * G set the value of the CPU registers OK or ENN ++ * ++ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN ++ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN ++ * ++ * c Resume at current address SNN ( signal NN) ++ * cAA..AA Continue at address AA..AA SNN ++ * ++ * s Step one instruction SNN ++ * sAA..AA Step one instruction from AA..AA SNN ++ * ++ * k kill ++ * ++ * ? What was the last sigval ? SNN (signal NN) ++ * ++ * All commands and responses are sent with a packet which includes a ++ * checksum. A packet consists of ++ * ++ * $#. ++ * ++ * where ++ * :: ++ * :: < two hex digits computed as modulo 256 sum of > ++ * ++ * When a packet is received, it is first acknowledged with either '+' or '-'. ++ * '+' indicates a successful transfer. '-' indicates a failed transfer. ++ * ++ * Example: ++ * ++ * Host: Reply: ++ * $m0,10#2a +$00010203040506070809101112131415#42 ++ * ++ ****************************************************************************/ ++#define KGDB_VERSION "<20030915.1651.33>" ++#include ++#include ++#include /* for strcpy */ ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include + -+define set_andthen -+ set var $thp=0 -+ set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] -+ set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) -+ set var $at_oc=kgdb_and_then_count -+ set var $at_cc=$at_oc -+end ++/************************************************************************ ++ * ++ * external low-level support routines ++ */ ++typedef void (*Function) (void); /* pointer to a function */ + -+define andthen_next -+ set var $at_cc=$arg0 -+end ++/* Thread reference */ ++typedef unsigned char threadref[8]; + -+define andthen -+ andthen_set_edge -+ if ($at_cc >= $at_oc) -+ printf "Outside window. Window size is %d\n",($at_oc-$at_low) -+ else -+ printf "%d: ",$at_cc -+ output *($thp+($at_cc++ % $at_size )) -+ printf "\n" -+ end -+end -+define andthen_set_edge -+ set var $at_oc=kgdb_and_then_count -+ set var $at_low = $at_oc - $at_size -+ if ($at_low < 0 ) -+ set var $at_low = 0 -+ end -+ if (( $at_cc > $at_oc) || ($at_cc < $at_low)) -+ printf "Count outside of window, setting count to " -+ if ($at_cc >= $at_oc) -+ set var $at_cc = $at_oc -+ else -+ set var $at_cc = $at_low -+ end -+ printf "%d\n",$at_cc -+ end -+end ++extern void putDebugChar(int); /* write a single character */ ++extern int getDebugChar(void); /* read and return a single char */ + -+define beforethat -+ andthen_set_edge -+ if ($at_cc <= $at_low) -+ printf "Outside window. Window size is %d\n",($at_oc-$at_low) -+ else -+ printf "%d: ",$at_cc-1 -+ output *($thp+(--$at_cc % $at_size )) -+ printf "\n" -+ end -+end ++/************************************************************************/ ++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ ++/* at least NUMREGBYTES*2 are needed for register packets */ ++/* Longer buffer is needed to list all threads */ ++#define BUFMAX 400 + -+document andthen_next -+ andthen_next -+ . sets the number of the event to display next. If this event -+ . is not in the event pool, either andthen or beforethat will -+ . correct it to the nearest event pool edge. The event pool -+ . ends at the last event recorded and begins -+ . prior to that. If beforethat is used next, it will display -+ . event -1. -+. -+ andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end ++char *kgdb_version = KGDB_VERSION; + ++/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ ++int debug_regs = 0; /* set to non-zero to print registers */ + -+document andthen -+ andthen -+. displays the next event in the list. sets up to display -+. the oldest saved event first. -+. (optional) count of the event to display. -+. note the number of events saved is specified at configure time. -+. if events are saved between calls to andthen the index will change -+. but the displayed event will be the next one (unless the event buffer -+. is overrun). -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end ++/* filled in by an external module */ ++char *gdb_module_offsets; + -+document set_andthen -+ set_andthen -+. sets up to use the and commands. -+. if you have defined your own struct, use the above and -+. then enter the following: -+. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] -+. where is the name of your structure. -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end ++static const char hexchars[] = "0123456789abcdef"; + -+document beforethat -+ beforethat -+. displays the next prior event in the list. sets up to -+. display the last occuring event first. -+. -+. note the number of events saved is specified at configure time. -+. if events are saved between calls to beforethat the index will change -+. but the displayed event will be the next one (unless the event buffer -+. is overrun). -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -diff -puN /dev/null Documentation/i386/kgdb/debug-nmi.txt ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/debug-nmi.txt 2004-10-21 14:54:15.316595016 -0700 -@@ -0,0 +1,37 @@ -+Subject: Debugging with NMI -+Date: Mon, 12 Jul 1999 11:28:31 -0500 -+From: David Grothe -+Organization: Gcom, Inc -+To: David Grothe ++/* Number of bytes of registers. */ ++#define NUMREGBYTES 64 ++/* ++ * Note that this register image is in a different order than ++ * the register image that Linux produces at interrupt time. ++ * ++ * Linux's register image is defined by struct pt_regs in ptrace.h. ++ * Just why GDB uses a different order is a historical mystery. ++ */ ++enum regnames { _EAX, /* 0 */ ++ _ECX, /* 1 */ ++ _EDX, /* 2 */ ++ _EBX, /* 3 */ ++ _ESP, /* 4 */ ++ _EBP, /* 5 */ ++ _ESI, /* 6 */ ++ _EDI, /* 7 */ ++ _PC /* 8 also known as eip */ , ++ _PS /* 9 also known as eflags */ , ++ _CS, /* 10 */ ++ _SS, /* 11 */ ++ _DS, /* 12 */ ++ _ES, /* 13 */ ++ _FS, /* 14 */ ++ _GS /* 15 */ ++}; + -+Kernel hackers: ++/*************************** ASSEMBLY CODE MACROS *************************/ ++/* ++ * Put the error code here just in case the user cares. ++ * Likewise, the vector number here (since GDB only gets the signal ++ * number through the usual means, and that's not very specific). ++ * The called_from is the return address so he can tell how we entered kgdb. ++ * This will allow him to seperate out the various possible entries. ++ */ ++#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + -+Maybe this is old hat, but it is new to me -- ++#define PID_MAX PID_MAX_DEFAULT + -+On an ISA bus machine, if you short out the A1 and B1 pins of an ISA -+slot you will generate an NMI to the CPU. This interrupts even a -+machine that is hung in a loop with interrupts disabled. Used in -+conjunction with kgdb < -+ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can -+gain debugger control of a machine that is hung in the kernel! Even -+without kgdb the kernel will print a stack trace so you can find out -+where it was hung. ++#ifdef CONFIG_SMP ++void smp_send_nmi_allbutself(void); ++#define IF_SMP(x) x ++#undef MAX_NO_CPUS ++#ifndef CONFIG_NO_KGDB_CPUS ++#define CONFIG_NO_KGDB_CPUS 2 ++#endif ++#if CONFIG_NO_KGDB_CPUS > NR_CPUS ++#define MAX_NO_CPUS NR_CPUS ++#else ++#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS ++#endif ++#define hold_init hold_on_sstep: 1, ++#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) ++#define NUM_CPUS num_online_cpus() ++#else ++#define IF_SMP(x) ++#define hold_init ++#undef MAX_NO_CPUS ++#define MAX_NO_CPUS 1 ++#define NUM_CPUS 1 ++#endif ++#define NOCPU (struct task_struct *)0xbad1fbad ++/* *INDENT-OFF* */ ++struct kgdb_info { ++ int used_malloc; ++ void *called_from; ++ long long entry_tsc; ++ int errcode; ++ int vector; ++ int print_debug_info; ++#ifdef CONFIG_SMP ++ int hold_on_sstep; ++ struct { ++ volatile struct task_struct *task; ++ int pid; ++ int hold; ++ struct pt_regs *regs; ++ } cpus_waiting[MAX_NO_CPUS]; ++#endif ++} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + -+The A1/B1 pins are directly opposite one another and the farthest pins -+towards the bracket end of the ISA bus socket. You can stick a paper -+clip or multi-meter probe between them to short them out. ++/* *INDENT-ON* */ + -+I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the -+board consists of two rows of wire wrap pins. So I wired a push button -+between the A1/B1 pins and now have an ISA board that I can stick into -+any ISA bus slot for debugger entry. ++#define used_m kgdb_info.used_malloc ++/* ++ * This is little area we set aside to contain the stack we ++ * need to build to allow gdb to call functions. We use one ++ * per cpu to avoid locking issues. We will do all this work ++ * with interrupts off so that should take care of the protection ++ * issues. ++ */ ++#define LOOKASIDE_SIZE 200 /* should be more than enough */ ++#define MALLOC_MAX 200 /* Max malloc size */ ++struct { ++ unsigned int esp; ++ int array[LOOKASIDE_SIZE]; ++} fn_call_lookaside[MAX_NO_CPUS]; ++ ++static int trap_cpu; ++static unsigned int OLD_esp; + -+Microsoft has a circuit diagram of a PCI card at -+http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to -+build one you will have to mail them and ask for the PAL equations. -+Nobody makes one comercially. ++#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] ++#define IF_BIT 0x200 ++#define TF_BIT 0x100 + -+[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if -+your machine catches fire, it is your problem, not mine.] ++#define MALLOC_ROUND 8-1 + -+-- Dave (the kgdb guy) -diff -puN /dev/null Documentation/i386/kgdb/gdb-globals.txt ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdb-globals.txt 2004-10-21 14:54:15.317594864 -0700 -@@ -0,0 +1,71 @@ -+Sender: akale@veritas.com -+Date: Fri, 23 Jun 2000 19:26:35 +0530 -+From: "Amit S. Kale" -+Organization: Veritas Software (India) -+To: Dave Grothe , linux-kernel@vger.rutgers.edu -+CC: David Milburn , -+ "Edouard G. Parmelan" , -+ ezannoni@cygnus.com, Keith Owens -+Subject: Re: Module debugging using kgdb ++static char malloc_array[MALLOC_MAX]; ++IF_SMP(static void to_gdb(const char *mess)); ++void * ++malloc(int size) ++{ + -+Dave Grothe wrote: -+> -+> Amit: -+> -+> There is a 2.4.0 version of kgdb on our ftp site: -+> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb -+> and loadmodule.sh there. -+> -+> Have a look at the README file and see if I go it right. If not, send -+> me some corrections and I will update it. -+> -+> Does your version of gdb solve the global variable problem? ++ if (size <= (MALLOC_MAX - used_m)) { ++ int old_used = used_m; ++ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); ++ return &malloc_array[old_used]; ++ } else { ++ return NULL; ++ } ++} + -+Yes. -+Thanks to Elena Zanoni, gdb (developement version) can now calculate -+correctly addresses of dynamically loaded object files. I have not been -+following gdb developement for sometime and am not sure when symbol -+address calculation fix is going to appear in a gdb stable version. ++/* ++ * Gdb calls functions by pushing agruments, including a return address ++ * on the stack and the adjusting EIP to point to the function. The ++ * whole assumption in GDB is that we are on a different stack than the ++ * one the "user" i.e. code that hit the break point, is on. This, of ++ * course is not true in the kernel. Thus various dodges are needed to ++ * do the call without directly messing with EIP (which we can not change ++ * as it is just a location and not a register. To adjust it would then ++ * require that we move every thing below EIP up or down as needed. This ++ * will not work as we may well have stack relative pointer on the stack ++ * (such as the pointer to regs, for example). + -+Elena, any idea when the fix will make it to a prebuilt gdb from a -+redhat release? ++ * So here is what we do: ++ * We detect gdb attempting to store into the stack area and instead, store ++ * into the fn_call_lookaside.array at the same relative location as if it ++ * were the area ESP pointed at. We also trap ESP modifications ++ * and uses these to adjust fn_call_lookaside.esp. On entry ++ * fn_call_lookaside.esp will be set to point at the last entry in ++ * fn_call_lookaside.array. This allows us to check if it has changed, and ++ * if so, on exit, we add the registers we will use to do the move and a ++ * trap/ interrupt return exit sequence. We then adjust the eflags in the ++ * regs array (remember we now have a copy in the fn_call_lookaside.array) to ++ * kill the interrupt bit, AND we change EIP to point at our set up stub. ++ * As part of the register set up we preset the registers to point at the ++ * begining and end of the fn_call_lookaside.array, so all the stub needs to ++ * do is move words from the array to the stack until ESP= the desired value ++ * then do the rti. This will then transfer to the desired function with ++ * all the correct registers. Nifty huh? ++ */ ++extern asmlinkage void fn_call_stub(void); ++extern asmlinkage void fn_rtn_stub(void); ++/* *INDENT-OFF* */ ++__asm__("fn_rtn_stub:\n\t" ++ "movl %eax,%esp\n\t" ++ "fn_call_stub:\n\t" ++ "1:\n\t" ++ "addl $-4,%ebx\n\t" ++ "movl (%ebx), %eax\n\t" ++ "pushl %eax\n\t" ++ "cmpl %esp,%ecx\n\t" ++ "jne 1b\n\t" ++ "popl %eax\n\t" ++ "popl %ebx\n\t" ++ "popl %ecx\n\t" ++ "iret \n\t"); ++/* *INDENT-ON* */ ++#define gdb_i386vector kgdb_info.vector ++#define gdb_i386errcode kgdb_info.errcode ++#define waiting_cpus kgdb_info.cpus_waiting ++#define remote_debug kgdb_info.print_debug_info ++#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold ++/* gdb locks */ + -+For the time being I have built a gdb developement version. It can be -+used for module debugging with loadmodule.sh script. ++#ifdef CONFIG_SMP ++static int in_kgdb_called; ++static spinlock_t waitlocks[MAX_NO_CPUS] = ++ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; ++/* ++ * The following array has the thread pointer of each of the "other" ++ * cpus. We make it global so it can be seen by gdb. ++ */ ++volatile int in_kgdb_entry_log[MAX_NO_CPUS]; ++volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; ++/* ++static spinlock_t continuelocks[MAX_NO_CPUS]; ++*/ ++spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; ++/* waiters on our spinlock plus us */ ++static atomic_t spinlock_waiters = ATOMIC_INIT(1); ++static int spinlock_count = 0; ++static int spinlock_cpu = 0; ++/* ++ * Note we use nested spin locks to account for the case where a break ++ * point is encountered when calling a function by user direction from ++ * kgdb. Also there is the memory exception recursion to account for. ++ * Well, yes, but this lets other cpus thru too. Lets add a ++ * cpu id to the lock. ++ */ ++#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ ++ spinlock_cpu != smp_processor_id()){\ ++ atomic_inc(&spinlock_waiters); \ ++ while (! spin_trylock(x)) {\ ++ in_kgdb(®s);\ ++ }\ ++ atomic_dec(&spinlock_waiters); \ ++ spinlock_count = 1; \ ++ spinlock_cpu = smp_processor_id(); \ ++ }else{ \ ++ spinlock_count++; \ ++ } ++#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) ++#else ++unsigned kgdb_spinlock = 0; ++#define KGDB_SPIN_LOCK(x) --*x ++#define KGDB_SPIN_UNLOCK(x) ++*x ++#endif + -+The problem with calculating of module addresses with previous versions -+of gdb was as follows: -+gdb did not use base address of a section while calculating address of -+a symbol in the section in an object file loaded via 'add-symbol-file'. -+It used address of .text segment instead. Due to this addresses of -+symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. ++int ++hex(char ch) ++{ ++ if ((ch >= 'a') && (ch <= 'f')) ++ return (ch - 'a' + 10); ++ if ((ch >= '0') && (ch <= '9')) ++ return (ch - '0'); ++ if ((ch >= 'A') && (ch <= 'F')) ++ return (ch - 'A' + 10); ++ return (-1); ++} + -+Above mentioned fix allow gdb to use base address of a segment while -+calculating address of a symbol in it. It adds a parameter '-s' to -+'add-symbol-file' command for specifying base address of a segment. ++/* scan for the sequence $# */ ++void ++getpacket(char *buffer) ++{ ++ unsigned char checksum; ++ unsigned char xmitcsum; ++ int i; ++ int count; ++ char ch; + -+loadmodule.sh script works as follows. ++ do { ++ /* wait around for the start character, ignore all other characters */ ++ while ((ch = (getDebugChar() & 0x7f)) != '$') ; ++ checksum = 0; ++ xmitcsum = -1; + -+1. Copy a module file to target machine. -+2. Load the module on the target machine using insmod with -m parameter. -+insmod produces a module load map which contains base addresses of all -+sections in the module and addresses of symbols in the module file. -+3. Find all sections and their base addresses in the module from -+the module map. -+4. Generate a script that loads the module file. The script uses -+'add-symbol-file' and specifies address of text segment followed by -+addresses of all segments in the module. ++ count = 0; + -+Here is an example gdb script produced by loadmodule.sh script. ++ /* now, read until a # or end of buffer is found */ ++ while (count < BUFMAX) { ++ ch = getDebugChar() & 0x7f; ++ if (ch == '#') ++ break; ++ checksum = checksum + ch; ++ buffer[count] = ch; ++ count = count + 1; ++ } ++ buffer[count] = 0; + -+add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 -+-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 -+-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 ++ if (ch == '#') { ++ xmitcsum = hex(getDebugChar() & 0x7f) << 4; ++ xmitcsum += hex(getDebugChar() & 0x7f); ++ if ((remote_debug) && (checksum != xmitcsum)) { ++ printk ++ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", ++ checksum, xmitcsum, buffer); ++ } + -+With this command gdb can calculate addresses of symbols in ANY segment -+in a module file. ++ if (checksum != xmitcsum) ++ putDebugChar('-'); /* failed checksum */ ++ else { ++ putDebugChar('+'); /* successful transfer */ ++ /* if a sequence char is present, reply the sequence ID */ ++ if (buffer[2] == ':') { ++ putDebugChar(buffer[0]); ++ putDebugChar(buffer[1]); ++ /* remove sequence chars from buffer */ ++ count = strlen(buffer); ++ for (i = 3; i <= count; i++) ++ buffer[i - 3] = buffer[i]; ++ } ++ } ++ } ++ } while (checksum != xmitcsum); + -+Regards. -+-- -+Amit Kale -+Veritas Software ( http://www.veritas.com ) -diff -puN /dev/null Documentation/i386/kgdb/gdbinit ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit 2004-10-21 14:54:15.317594864 -0700 -@@ -0,0 +1,14 @@ -+shell echo -e "\003" >/dev/ttyS0 -+set remotebaud 38400 -+target remote /dev/ttyS0 -+define si -+stepi -+printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx -+printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp -+x/i $eip -+end -+define ni -+nexti -+printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx -+printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp -+x/i $eip -diff -puN /dev/null Documentation/i386/kgdb/gdbinit.hw ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit.hw 2004-10-21 14:54:15.318594712 -0700 -@@ -0,0 +1,117 @@ ++ if (remote_debug) ++ printk("R:%s\n", buffer); ++} + -+#Using ia-32 hardware breakpoints. -+# -+#4 hardware breakpoints are available in ia-32 processors. These breakpoints -+#do not need code modification. They are set using debug registers. -+# -+#Each hardware breakpoint can be of one of the -+#three types: execution, write, access. -+#1. An Execution breakpoint is triggered when code at the breakpoint address is -+#executed. -+#2. A write breakpoint ( aka watchpoints ) is triggered when memory location -+#at the breakpoint address is written. -+#3. An access breakpoint is triggered when memory location at the breakpoint -+#address is either read or written. -+# -+#As hardware breakpoints are available in limited number, use software -+#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. -+# -+#Length of an access or a write breakpoint defines length of the datatype to -+#be watched. Length is 1 for char, 2 short , 3 int. -+# -+#For placing execution, write and access breakpoints, use commands -+#hwebrk, hwwbrk, hwabrk -+#To remove a breakpoint use hwrmbrk command. -+# -+#These commands take following types of arguments. For arguments associated -+#with each command, use help command. -+#1. breakpointno: 0 to 3 -+#2. length: 1 to 3 -+#3. address: Memory location in hex ( without 0x ) e.g c015e9bc -+# -+#Use the command exinfo to find which hardware breakpoint occured. ++/* send the packet in buffer. */ + -+#hwebrk breakpointno address -+define hwebrk -+ maintenance packet Y$arg0,0,0,$arg1 -+end -+document hwebrk -+ hwebrk
-+ Places a hardware execution breakpoint -+ = 0 - 3 -+
= Hex digits without leading "0x". -+end ++void ++putpacket(char *buffer) ++{ ++ unsigned char checksum; ++ int count; ++ char ch; + -+#hwwbrk breakpointno length address -+define hwwbrk -+ maintenance packet Y$arg0,1,$arg1,$arg2 -+end -+document hwwbrk -+ hwwbrk
-+ Places a hardware write breakpoint -+ = 0 - 3 -+ = 1 (1 byte), 2 (2 byte), 3 (4 byte) -+
= Hex digits without leading "0x". -+end ++ /* $#. */ ++ do { ++ if (remote_debug) ++ printk("T:%s\n", buffer); ++ putDebugChar('$'); ++ checksum = 0; ++ count = 0; + -+#hwabrk breakpointno length address -+define hwabrk -+ maintenance packet Y$arg0,1,$arg1,$arg2 -+end -+document hwabrk -+ hwabrk
-+ Places a hardware access breakpoint -+ = 0 - 3 -+ = 1 (1 byte), 2 (2 byte), 3 (4 byte) -+
= Hex digits without leading "0x". -+end ++ while ((ch = buffer[count])) { ++ putDebugChar(ch); ++ checksum += ch; ++ count += 1; ++ } + -+#hwrmbrk breakpointno -+define hwrmbrk -+ maintenance packet y$arg0 -+end -+document hwrmbrk -+ hwrmbrk -+ = 0 - 3 -+ Removes a hardware breakpoint -+end ++ putDebugChar('#'); ++ putDebugChar(hexchars[checksum >> 4]); ++ putDebugChar(hexchars[checksum % 16]); + -+define reboot -+ maintenance packet r -+end -+#exinfo -+define exinfo -+ maintenance packet qE -+end -+document exinfo -+ exinfo -+ Gives information about a breakpoint. -+end -+define get_th -+ p $th=(struct thread_info *)((int)$esp & ~8191) -+end -+document get_th -+ get_tu -+ Gets and prints the current thread_info pointer, Defines th to be it. -+end -+define get_cu -+ p $cu=((struct thread_info *)((int)$esp & ~8191))->task -+end -+document get_cu -+ get_cu -+ Gets and print the "current" value. Defines $cu to be it. -+end -+define int_off -+ set var $flags=$eflags -+ set $eflags=$eflags&~0x200 -+ end -+define int_on -+ set var $eflags|=$flags&0x200 -+ end -+document int_off -+ saves the current interrupt state and clears the processor interrupt -+ flag. Use int_on to restore the saved flag. -+end -+document int_on -+ Restores the interrupt flag saved by int_off. -+end -diff -puN /dev/null Documentation/i386/kgdb/gdbinit-modules ---- /dev/null Thu Apr 11 07:25:15 2002 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit-modules Fri Jan 13 17:54:25 2006 -@@ -0,0 +1,149 @@ -+# -+# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. -+# -+# This don't work for Linux-2.0 or older. -+# -+# Author Edouard G. Parmelan -+# -+# -+# Fri Apr 30 20:33:29 CEST 1999 -+# First public release. -+# -+# Major cleanup after experiment Linux-2.0 kernel without success. -+# Symbols of a module are not in the correct order, I can't explain -+# why :( -+# -+# Fri Mar 19 15:41:40 CET 1999 -+# Initial version. -+# -+# Thu Jan 6 16:29:03 CST 2000 -+# A little fixing by Dave Grothe -+# -+# Mon Jun 19 09:33:13 CDT 2000 -+# Alignment changes from Edouard Parmelan -+# -+# The basic idea is to find where insmod load the module and inform -+# GDB to load the symbol table of the module with the GDB command -+# ``add-symbol-file
''. -+# -+# The Linux kernel holds the list of all loaded modules in module_list, -+# this list end with &kernel_module (exactly with module->next == NULL, -+# but the last module is not a real module). -+# -+# Insmod allocates the struct module before the object file. Since -+# Linux-2.1, this structure contain his size. The real address of -+# the object file is then (char*)module + module->size_of_struct. -+# -+# You can use three user functions ``mod-list'', ``mod-print-symbols'' -+# and ``add-module-symbols''. -+# -+# mod-list list all loaded modules with the format: -+# -+# -+# As soon as you have found the address of your module, you can -+# print its exported symbols (mod-print-symbols) or inform GDB to add -+# symbols from your module file (mod-add-symbols). -+# -+# The argument that you give to mod-print-symbols or mod-add-symbols -+# is the from the mod-list command. -+# -+# When using the mod-add-symbols command you must also give the full -+# pathname of the modules object code file. -+# -+# The command mod-add-lis is an example of how to make this easier. -+# You can edit this macro to contain the path name of your own -+# favorite module and then use it as a shorthand to load it. You -+# still need the module-address, however. -+# -+# The internal function ``mod-validate'' set the GDB variable $mod -+# as a ``struct module*'' if the kernel known the module otherwise -+# $mod is set to NULL. This ensure to not add symbols for a wrong -+# address. -+# -+# -+# Sat Feb 12 20:05:47 CET 2005 -+# -+# Adapted to the 2.6.* module data structure. -+# (Getting miffed at gdb for not having "offsetof" in the process :-/ ) -+# -+# Autogenerate add-symbol-file statements from the module list instead -+# of relying on a no-longer-working loadmodule.sh program. -+# -+# Matthias Urlichs -+# -+# -+# Have a nice hacking day ! -+# -+# -+define mod-list -+ set $lmod = modules->next -+ # This is a circular data structure -+ while $lmod != &modules -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ printf "%p\t%s\n", $mod, $mod->name -+ set $lmod = $lmod->next -+ end -+end -+document mod-list -+mod-list -+List all modules in the form: -+Use the as the argument for the other -+mod-commands: mod-print-symbols, mod-add-symbols. -+end ++ } while ((getDebugChar() & 0x7f) != '+'); ++ ++} ++ ++static char remcomInBuffer[BUFMAX]; ++static char remcomOutBuffer[BUFMAX]; ++static short error; ++ ++void ++debug_error(char *format, char *parm) ++{ ++ if (remote_debug) ++ printk(format, parm); ++} ++ ++static void ++print_regs(struct pt_regs *regs) ++{ ++ printk("EAX=%08lx ", regs->eax); ++ printk("EBX=%08lx ", regs->ebx); ++ printk("ECX=%08lx ", regs->ecx); ++ printk("EDX=%08lx ", regs->edx); ++ printk("\n"); ++ printk("ESI=%08lx ", regs->esi); ++ printk("EDI=%08lx ", regs->edi); ++ printk("EBP=%08lx ", regs->ebp); ++ printk("ESP=%08lx ", (long) ®s->esp); ++ printk("\n"); ++ printk(" DS=%08x ", regs->xds); ++ printk(" ES=%08x ", regs->xes); ++ printk(" SS=%08x ", __KERNEL_DS); ++ printk(" FL=%08lx ", regs->eflags); ++ printk("\n"); ++ printk(" CS=%08x ", regs->xcs); ++ printk(" IP=%08lx ", regs->eip); ++#if 0 ++ printk(" FS=%08x ", regs->fs); ++ printk(" GS=%08x ", regs->gs); ++#endif ++ printk("\n"); + -+define mod-list-syms -+ set $lmod = modules->next -+ # This is a circular data structure -+ while $lmod != &modules -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ printf "add-symbol-file %s.ko %p\n", $mod->name, $mod->module_core -+ set $lmod = $lmod->next -+ end -+end -+document mod-list-syms -+mod-list-syms -+List all modules in the form: add-symbol-file -+for adding modules' symbol tables without loadmodule.sh. -+end ++} /* print_regs */ + -+define mod-validate -+ set $lmod = modules->next -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ while ($lmod != &modules) && ($mod != $arg0) -+ set $lmod = $lmod->next -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ end -+ if $lmod == &modules -+ set $mod = 0 -+ printf "%p is not a module\n", $arg0 -+ end -+end -+document mod-validate -+mod-validate -+Internal user-command used to validate the module parameter. -+If is a real loaded module, set $mod to it, otherwise set $mod -+to 0. -+end ++#define NEW_esp fn_call_lookaside[trap_cpu].esp + -+define mod-print-symbols -+ mod-validate $arg0 -+ if $mod != 0 -+ set $i = 0 -+ while $i < $mod->num_syms -+ set $sym = $mod->syms[$i] -+ printf "%p\t%s\n", $sym->value, $sym->name -+ set $i = $i + 1 -+ end -+ set $i = 0 -+ while $i < $mod->num_gpl_syms -+ set $sym = $mod->gpl_syms[$i] -+ printf "%p\t%s\n", $sym->value, $sym->name -+ set $i = $i + 1 -+ end -+ end -+end -+document mod-print-symbols -+mod-print-symbols -+Print all exported symbols of the module. See mod-list -+end ++static void ++regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ gdb_regs[_EAX] = regs->eax; ++ gdb_regs[_EBX] = regs->ebx; ++ gdb_regs[_ECX] = regs->ecx; ++ gdb_regs[_EDX] = regs->edx; ++ gdb_regs[_ESI] = regs->esi; ++ gdb_regs[_EDI] = regs->edi; ++ gdb_regs[_EBP] = regs->ebp; ++ gdb_regs[_DS] = regs->xds; ++ gdb_regs[_ES] = regs->xes; ++ gdb_regs[_PS] = regs->eflags; ++ gdb_regs[_CS] = regs->xcs; ++ gdb_regs[_PC] = regs->eip; ++ /* Note, as we are a debugging the kernel, we will always ++ * trap in kernel code, this means no priviledge change, ++ * and so the pt_regs structure is not completely valid. In a non ++ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, ++ * SS and ESP are not stacked, this means that the last 2 elements of ++ * pt_regs is not valid (they would normally refer to the user stack) ++ * also, using regs+1 is no good because you end up will a value that is ++ * 2 longs (8) too high. This used to cause stepping over functions ++ * to fail, so my fix is to use the address of regs->esp, which ++ * should point at the end of the stack frame. Note I have ignored ++ * completely exceptions that cause an error code to be stacked, such ++ * as double fault. Stuart Hughes, Zentropix. ++ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + -diff -puN /dev/null Documentation/i386/kgdb/kgdb.txt ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/kgdb.txt 2004-10-21 14:54:15.324593800 -0700 -@@ -0,0 +1,775 @@ -+Last edit: <20030806.1637.12> -+This file has information specific to the i386 kgdb option. Other -+platforms with the kgdb option may behave in a similar fashion. ++ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). ++ */ ++ gdb_regs[_ESP] = NEW_esp; ++ gdb_regs[_SS] = __KERNEL_DS; ++ gdb_regs[_FS] = 0xFFFF; ++ gdb_regs[_GS] = 0xFFFF; ++} /* regs_to_gdb_regs */ + -+New features: -+============ -+20030806.1557.37 -+This version was made against the 2.6.0-test2 kernel. We have made the -+following changes: ++static void ++gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) ++{ ++ regs->eax = gdb_regs[_EAX]; ++ regs->ebx = gdb_regs[_EBX]; ++ regs->ecx = gdb_regs[_ECX]; ++ regs->edx = gdb_regs[_EDX]; ++ regs->esi = gdb_regs[_ESI]; ++ regs->edi = gdb_regs[_EDI]; ++ regs->ebp = gdb_regs[_EBP]; ++ regs->xds = gdb_regs[_DS]; ++ regs->xes = gdb_regs[_ES]; ++ regs->eflags = gdb_regs[_PS]; ++ regs->xcs = gdb_regs[_CS]; ++ regs->eip = gdb_regs[_PC]; ++ NEW_esp = gdb_regs[_ESP]; /* keep the value */ ++#if 0 /* can't change these */ ++ regs->esp = gdb_regs[_ESP]; ++ regs->xss = gdb_regs[_SS]; ++ regs->fs = gdb_regs[_FS]; ++ regs->gs = gdb_regs[_GS]; ++#endif + -+- The getthread() code in the stub calls find_task_by_pid(). It fails -+ if we are early in the bring up such that the pid arrays have yet to -+ be allocated. We have added a line to kernel/pid.c to make -+ "kgdb_pid_init_done" true once the arrays are allocated. This way the -+ getthread() code knows not to call. This is only used by the thread -+ debugging stuff and threads will not yet exist at this point in the -+ boot. ++} /* gdb_regs_to_regs */ + -+- For some reason, gdb was not asking for a new thread list when the -+ "info thread" command was given. We changed to the newer version of -+ the thread info command and gdb now seems to ask when needed. Result, -+ we now get all threads in the thread list. ++int thread_list = 0; + -+- We now respond to the ThreadExtraInfo request from gdb with the thread -+ name from task_struct .comm. This then appears in the thread list. -+ Thoughts on additional options for this are welcome. Things such as -+ "has BKL" and "Preempted" come to mind. I think we could have a flag -+ word that could enable different bits of info here. ++void ++get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) ++{ ++ unsigned long stack_page; ++ int count = 0; ++ IF_SMP(int i); ++ if (!p || p == current) { ++ regs_to_gdb_regs(gdb_regs, regs); ++ return; ++ } ++#ifdef CONFIG_SMP ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (p == kgdb_info.cpus_waiting[i].task) { ++ regs_to_gdb_regs(gdb_regs, ++ kgdb_info.cpus_waiting[i].regs); ++ gdb_regs[_ESP] = ++ (int) &kgdb_info.cpus_waiting[i].regs->esp; + -+- We now honor, sort of, the C and S commands. These are continue and -+ single set after delivering a signal. We ignore the signal and do the -+ requested action. This only happens when we told gdb that a signal -+ was the reason for entry, which is only done on memory faults. The -+ result is that you can now continue into the Oops. ++ return; ++ } ++ } ++#endif ++ memset(gdb_regs, 0, NUMREGBYTES); ++ gdb_regs[_ESP] = p->thread.esp; ++ gdb_regs[_PC] = p->thread.eip; ++ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; ++ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); ++ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); + -+- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, -+ but it is more exact on what language to use. ++/* ++ * This code is to give a more informative notion of where a process ++ * is waiting. It is used only when the user asks for a thread info ++ * list. If he then switches to the thread, s/he will find the task ++ * is in schedule, but a back trace should show the same info we come ++ * up with. This code was shamelessly purloined from process.c. It was ++ * then enhanced to provide more registers than simply the program ++ * counter. ++ */ + -+- We added two dwarf2 include files and a bit of code at the end of -+ entry.S. This does not yet work, so it is disabled. Still we want to -+ keep track of the code and "maybe" someone out there can fix it. ++ if (!thread_list) { ++ return; ++ } ++ ++ if (p->state == TASK_RUNNING) ++ return; ++ stack_page = (unsigned long) p->thread_info; ++ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > ++ THREAD_SIZE - sizeof(long) + stack_page) ++ return; ++ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ++ do { ++ if (gdb_regs[_EBP] < stack_page || ++ gdb_regs[_EBP] > THREAD_SIZE - 2*sizeof(long) + stack_page) ++ return; ++ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); ++ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; ++ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; ++ if (!in_sched_functions(gdb_regs[_PC])) ++ return; ++ } while (count++ < 16); ++ return; ++} ++ ++/* Indicate to caller of mem2hex or hex2mem that there has been an ++ error. */ ++static volatile int mem_err = 0; ++static volatile int mem_err_expected = 0; ++static volatile int mem_err_cnt = 0; ++static int garbage_loc = -1; ++ ++int ++get_char(char *addr) ++{ ++ return *addr; ++} ++ ++void ++set_char(char *addr, int val, int may_fault) ++{ ++ /* ++ * This code traps references to the area mapped to the kernel ++ * stack as given by the regs and, instead, stores to the ++ * fn_call_lookaside[cpu].array ++ */ ++ if (may_fault && ++ (unsigned int) addr < OLD_esp && ++ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { ++ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); ++ } ++ *addr = val; ++} ++ ++/* convert the memory pointed to by mem into hex, placing result in buf */ ++/* return a pointer to the last char put in buf (null) */ ++/* If MAY_FAULT is non-zero, then we should set mem_err in response to ++ a fault; if zero treat a fault like any other fault in the stub. */ ++char * ++mem2hex(char *mem, char *buf, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; + -+- Randy Dunlap sent some fix ups for this file which are now merged. ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ /* printk("%lx = ", mem) ; */ + -+- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a -+ compiler warning if CONFIG_KGDB is off (now who would do that :). ++ ch = get_char(mem++); + -+- Andrew Morton sent a fix for the serial driver which is now merged. ++ /* printk("%02x\n", ch & 0xFF) ; */ ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault fetching from addr %lx\n", ++ (long) (mem - 1)); ++ *buf = 0; /* truncate buffer */ ++ return (buf); ++ } ++ *buf++ = hexchars[ch >> 4]; ++ *buf++ = hexchars[ch % 16]; ++ } ++ *buf = 0; ++ if (may_fault) ++ mem_err_expected = 0; ++ return (buf); ++} + -+- Andrew also sent a change to the stub around the cpu managment code -+ which is also merged. ++/* convert the hex array pointed to by buf into binary to be placed in mem */ ++/* return a pointer to the character AFTER the last byte written */ ++/* NOTE: We use the may fault flag to also indicate if the write is to ++ * the registers (0) or "other" memory (!=0) ++ */ ++char * ++hex2mem(char *buf, char *mem, int count, int may_fault) ++{ ++ int i; ++ unsigned char ch; + -+- Andrew also sent a patch to make "f" as well as "g" work as SysRq -+ commands to enter kgdb, merged. ++ if (may_fault) { ++ mem_err_expected = 1; ++ mem_err = 0; ++ } ++ for (i = 0; i < count; i++) { ++ ch = hex(*buf++) << 4; ++ ch = ch + hex(*buf++); ++ set_char(mem++, ch, may_fault); + -+- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a -+ "who" field to the spinlock data struct. This is filled with -+ "current" when ever the spinlock suceeds. Useful if you want to know -+ who has the lock. ++ if (may_fault && mem_err) { ++ if (remote_debug) ++ printk("Mem fault storing to addr %lx\n", ++ (long) (mem - 1)); ++ return (mem); ++ } ++ } ++ if (may_fault) ++ mem_err_expected = 0; ++ return (mem); ++} + -+_ And last, but not least, we fixed the "get_cu" macro to properly get -+ the current value of "current". ++/**********************************************/ ++/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ ++/* RETURN NUMBER OF CHARS PROCESSED */ ++/**********************************************/ ++int ++hexToInt(char **ptr, int *intValue) ++{ ++ int numChars = 0; ++ int hexValue; + -+New features: -+============ -+20030505.1827.27 -+We are starting to align with the sourceforge version, at least in -+commands. To this end, the boot command string to start kgdb at -+boot time has been changed from "kgdb" to "gdb". ++ *intValue = 0; + -+Andrew Morton sent a couple of patches which are now included as follows: -+1.) We now return a flag to the interrupt handler. -+2.) We no longer use smp_num_cpus (a conflict with the lock meter). -+3.) And from William Lee Irwin III code to make -+ sure high-mem is set up before we attempt to register our interrupt -+ handler. -+We now include asm/kgdb.h from config.h so you will most likely never -+have to include it. It also 'NULLS' the kgdb macros you might have in -+your code when CONFIG_KGDB is not defined. This allows you to just -+turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. -+This include is conditioned on the machine being an x86 so as to not -+mess with other archs. ++ while (**ptr) { ++ hexValue = hex(**ptr); ++ if (hexValue >= 0) { ++ *intValue = (*intValue << 4) | hexValue; ++ numChars++; ++ } else ++ break; + -+20020801.1129.03 -+This is currently the version for the 2.4.18 (and beyond?) kernel. ++ (*ptr)++; ++ } + -+We have several new "features" beginning with this version: ++ return (numChars); ++} + -+1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more -+ waiting and it will pull that guy out of an IRQ off spin lock :) ++#define stubhex(h) hex(h) ++#ifdef old_thread_list + -+2.) We doctored up the code that tells where a task is waiting and -+ included it so that the "info thread" command will show a bit more -+ than "schedule()". Try it... ++static int ++stub_unpack_int(char *buff, int fieldlength) ++{ ++ int nibble; ++ int retval = 0; + -+3.) Added the ability to call a function from gdb. All the standard gdb -+ issues apply, i.e. if you hit a breakpoint in the function, you are -+ not allowed to call another (gdb limitation, not kgdb). To help -+ this capability we added a memory allocation function. Gdb does not -+ return this memory (it is used for strings that you pass to that function -+ you are calling from gdb) so we fixed up a way to allow you to -+ manually return the memory (see below). ++ while (fieldlength) { ++ nibble = stubhex(*buff++); ++ retval |= nibble; ++ fieldlength--; ++ if (fieldlength) ++ retval = retval << 4; ++ } ++ return retval; ++} ++#endif ++static char * ++pack_hex_byte(char *pkt, int byte) ++{ ++ *pkt++ = hexchars[(byte >> 4) & 0xf]; ++ *pkt++ = hexchars[(byte & 0xf)]; ++ return pkt; ++} + -+4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the -+ interrupt flag to now also include the preemption count and the -+ "in_interrupt" info. The flag is now called "with_pif" to indicate -+ the order, preempt_count, in_interrupt, flag. The preempt_count is -+ shifted left by 4 bits so you can read the count in hex by dropping -+ the low order digit. In_interrupt is in bit 1, and the flag is in -+ bit 0. ++#define BUF_THREAD_ID_SIZE 16 + -+5.) The command: "p kgdb_info" is now expanded and prints something -+ like: -+(gdb) p kgdb_info -+$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, -+ errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, -+ cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, -+ regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} ++static char * ++pack_threadid(char *pkt, threadref * id) ++{ ++ char *limit; ++ unsigned char *altid; + -+ Things to note here: a.) used_malloc is the amount of memory that -+ has been malloc'ed to do calls from gdb. You can reclaim this -+ memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) -+ cpus_waiting is now "sized" by the number of CPUs you enter at -+ configure time in the kgdb configure section. This is NOT used -+ anywhere else in the system, but it is "nice" here. c.) The task's -+ "pid" is now in the structure. This is the pid you will need to use -+ to decode to the thread id to get gdb to look at that thread. -+ Remember that the "info thread" command prints a list of threads -+ wherein it numbers each thread with its reference number followed -+ by the thread's pid. Note that the per-CPU idle threads actually -+ have pids of 0 (yes, there is more than one pid 0 in an SMP system). -+ To avoid confusion, kgdb numbers these threads with numbers beyond -+ the MAX_PID. That is why you see 32768 and above. ++ altid = (unsigned char *) id; ++ limit = pkt + BUF_THREAD_ID_SIZE; ++ while (pkt < limit) ++ pkt = pack_hex_byte(pkt, *altid++); ++ return pkt; ++} + -+6.) A subtle change, we now provide the complete register set for tasks -+ that are active on the other CPUs. This allows better trace back on -+ those tasks. ++#ifdef old_thread_list ++static char * ++unpack_byte(char *buf, int *value) ++{ ++ *value = stub_unpack_int(buf, 2); ++ return buf + 2; ++} + -+ And, let's mention what we could not fix. Back-trace from all but the -+ thread that we trapped will, most likely, have a bogus entry in it. -+ The problem is that gdb does not recognize the entry code for -+ functions that use "current" near (at all?) the entry. The compiler -+ is putting the "current" decode as the first two instructions of the -+ function where gdb expects to find %ebp changing code. Back trace -+ also has trouble with interrupt frames. I am talking with Daniel -+ Jacobowitz about some way to fix this, but don't hold your breath. ++static char * ++unpack_threadid(char *inbuf, threadref * id) ++{ ++ char *altref; ++ char *limit = inbuf + BUF_THREAD_ID_SIZE; ++ int x, y; + -+20011220.0050.35 -+Major enhancement with this version is the ability to hold one or more -+CPUs in an SMP system while allowing the others to continue. Also, by -+default only the current CPU is enabled on single-step commands (please -+note that gdb issues single-step commands at times other than when you -+use the si command). ++ altref = (char *) id; + -+Another change is to collect some useful information in -+a global structure called "kgdb_info". You should be able to just: ++ while (inbuf < limit) { ++ x = stubhex(*inbuf++); ++ y = stubhex(*inbuf++); ++ *altref++ = (x << 4) | y; ++ } ++ return inbuf; ++} ++#endif ++void ++int_to_threadref(threadref * id, int value) ++{ ++ unsigned char *scan; + -+p kgdb_info ++ scan = (unsigned char *) id; ++ { ++ int i = 4; ++ while (i--) ++ *scan++ = 0; ++ } ++ *scan++ = (value >> 24) & 0xff; ++ *scan++ = (value >> 16) & 0xff; ++ *scan++ = (value >> 8) & 0xff; ++ *scan++ = (value & 0xff); ++} ++int ++int_to_hex_v(unsigned char * id, int value) ++{ ++ unsigned char *start = id; ++ int shift; ++ int ch; + -+although I have seen cases where the first time this is done gdb just -+prints the first member but prints the whole structure if you then enter -+CR (carriage return or enter). This also works: ++ for (shift = 28; shift >= 0; shift -= 4) { ++ if ((ch = (value >> shift) & 0xf) || (id != start)) { ++ *id = hexchars[ch]; ++ id++; ++ } ++ } ++ if (id == start) ++ *id++ = '0'; ++ return id - start; ++} ++#ifdef old_thread_list + -+p *&kgdb_info ++static int ++threadref_to_int(threadref * ref) ++{ ++ int i, value = 0; ++ unsigned char *scan; + -+Here is a sample: -+(gdb) p kgdb_info -+$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, -+ vector = 3, print_debug_info = 0} ++ scan = (char *) ref; ++ scan += 4; ++ i = 4; ++ while (i-- > 0) ++ value = (value << 8) | ((*scan++) & 0xff); ++ return value; ++} ++#endif ++static int ++cmp_str(char *s1, char *s2, int count) ++{ ++ while (count--) { ++ if (*s1++ != *s2++) ++ return 0; ++ } ++ return 1; ++} + -+"Called_from" is the return address from the current entry into kgdb. -+Sometimes it is useful to know why you are in kgdb, for example, was -+it an NMI or a real breakpoint? The simple way to interrogate this -+return address is: ++#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ ++extern struct task_struct *kgdb_get_idle(int cpu); ++#define idle_task(cpu) kgdb_get_idle(cpu) ++#else ++#define idle_task(cpu) init_tasks[cpu] ++#endif + -+l *0xc010732c ++extern int kgdb_pid_init_done; + -+which will print the surrounding few lines of source code. ++struct task_struct * ++getthread(int pid) ++{ ++ struct task_struct *thread; ++ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + -+"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the -+kgdb_ts entries). ++ return idle_task(pid - PID_MAX); ++ } else { ++ /* ++ * find_task_by_pid is relatively safe all the time ++ * Other pid functions require lock downs which imply ++ * that we may be interrupting them (as we get here ++ * in the middle of most any lock down). ++ * Still we don't want to call until the table exists! ++ */ ++ if (kgdb_pid_init_done){ ++ thread = find_task_by_pid(pid); ++ if (thread) { ++ return thread; ++ } ++ } ++ } ++ return NULL; ++} ++/* *INDENT-OFF* */ ++struct hw_breakpoint { ++ unsigned enabled; ++ unsigned type; ++ unsigned len; ++ unsigned addr; ++} breakinfo[4] = { {enabled:0}, ++ {enabled:0}, ++ {enabled:0}, ++ {enabled:0}}; ++/* *INDENT-ON* */ ++unsigned hw_breakpoint_status; ++void ++correct_hw_break(void) ++{ ++ int breakno; ++ int correctit; ++ int breakbit; ++ unsigned dr7; + -+"errcode" and "vector" are other entry parameters which may be helpful on -+some traps. ++ asm volatile ("movl %%db7, %0\n":"=r" (dr7) ++ :); ++ /* *INDENT-OFF* */ ++ do { ++ unsigned addr0, addr1, addr2, addr3; ++ asm volatile ("movl %%db0, %0\n" ++ "movl %%db1, %1\n" ++ "movl %%db2, %2\n" ++ "movl %%db3, %3\n" ++ :"=r" (addr0), "=r"(addr1), ++ "=r"(addr2), "=r"(addr3) ++ :); ++ } while (0); ++ /* *INDENT-ON* */ ++ correctit = 0; ++ for (breakno = 0; breakno < 3; breakno++) { ++ breakbit = 2 << (breakno << 1); ++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 |= breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ dr7 |= (((breakinfo[breakno].len << 2) | ++ breakinfo[breakno].type) << 16) << ++ (breakno << 2); ++ switch (breakno) { ++ case 0: ++ asm volatile ("movl %0, %%dr0\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; + -+"print_debug_info" is the internal debugging kgdb print enable flag. Yes, -+you can modify it. ++ case 1: ++ asm volatile ("movl %0, %%dr1\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; + -+In SMP systems kgdb_info also includes the "cpus_waiting" structure and -+"hold_on_step": ++ case 2: ++ asm volatile ("movl %0, %%dr2\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; + -+(gdb) p kgdb_info -+$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, -+ vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ -+ task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, -+ regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}}} ++ case 3: ++ asm volatile ("movl %0, %%dr3\n"::"r" ++ (breakinfo[breakno].addr)); ++ break; ++ } ++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 &= ~breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ } ++ } ++ if (correctit) { ++ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); ++ } ++} + -+"Cpus_waiting" has an entry for each CPU other than the current one that -+has been stopped. Each entry contains the task_struct address for that -+CPU, the address of the regs for that task and a hold flag. All these -+have the proper typing so that, for example: ++int ++remove_hw_break(unsigned breakno) ++{ ++ if (!breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 0; ++ return 0; ++} + -+p *kgdb_info.cpus_waiting[1].regs ++int ++set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) ++{ ++ if (breakinfo[breakno].enabled) { ++ return -1; ++ } ++ breakinfo[breakno].enabled = 1; ++ breakinfo[breakno].type = type; ++ breakinfo[breakno].len = len; ++ breakinfo[breakno].addr = addr; ++ return 0; ++} + -+will print the registers for CPU 1. ++#ifdef CONFIG_SMP ++static int in_kgdb_console = 0; + -+"Hold_on_sstep" is a new feature with this version and comes up set or -+true. What this means is that whenever kgdb is asked to single-step all -+other CPUs are held (i.e. not allowed to execute). The flag applies to -+all but the current CPU and, again, can be changed: ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ unsigned flags; ++ int cpu = smp_processor_id(); ++ in_kgdb_called = 1; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ ++ in_kgdb_console) { /* or we are doing slow i/o */ ++ return 1; ++ } ++ return 0; ++ } + -+p kgdb_info.hold_on_sstep=0 ++ /* As I see it the only reason not to let all cpus spin on ++ * the same spin_lock is to allow selected ones to proceed. ++ * This would be a good thing, so we leave it this way. ++ * Maybe someday.... Done ! + -+restores the old behavior of letting all CPUs run during single-stepping. ++ * in_kgdb() is called from an NMI so we don't pretend ++ * to have any resources, like printk() for example. ++ */ + -+Likewise, each CPU has a "hold" flag, which if set, locks that CPU out -+of execution. Note that this has some risk in cases where the CPUs need -+to communicate with each other. If kgdb finds no CPU available on exit, -+it will push a message thru gdb and stay in kgdb. Note that it is legal -+to hold the current CPU as long as at least one CPU can execute. ++ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ ++ /* ++ * log arival of this cpu ++ * The NMI keeps on ticking. Protect against recurring more ++ * than once, and ignor the cpu that has the kgdb lock ++ */ ++ in_kgdb_entry_log[cpu]++; ++ in_kgdb_here_log[cpu] = regs; ++ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { ++ goto exit_in_kgdb; ++ } ++ /* ++ * For protection of the initilization of the spin locks by kgdb ++ * it locks the kgdb spinlock before it gets the wait locks set ++ * up. We wait here for the wait lock to be taken. If the ++ * kgdb lock goes away first?? Well, it could be a slow exit ++ * sequence where the wait lock is removed prior to the kgdb lock ++ * so if kgdb gets unlocked, we just exit. ++ */ ++ while (spin_is_locked(&kgdb_spinlock) && ++ !spin_is_locked(waitlocks + cpu)) ; ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ goto exit_in_kgdb; ++ } ++ waiting_cpus[cpu].task = current; ++ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); ++ waiting_cpus[cpu].regs = regs; + -+20010621.1117.09 -+This version implements an event queue. Events are signaled by calling -+a function in the kgdb stub and may be examined from gdb. See EVENTS -+below for details. This version also tightens up the interrupt and SMP -+handling to not allow interrupts on the way to kgdb from a breakpoint -+trap. It is fine to allow these interrupts for user code, but not -+system debugging. ++ spin_unlock_wait(waitlocks + cpu); ++ /* ++ * log departure of this cpu ++ */ ++ waiting_cpus[cpu].task = 0; ++ waiting_cpus[cpu].pid = 0; ++ waiting_cpus[cpu].regs = 0; ++ correct_hw_break(); ++ exit_in_kgdb: ++ in_kgdb_here_log[cpu] = 0; ++ kgdb_local_irq_restore(flags); ++ return 1; ++ /* ++ spin_unlock(continuelocks + smp_processor_id()); ++ */ ++} + -+Version -+======= ++void ++smp__in_kgdb(struct pt_regs regs) ++{ ++ ack_APIC_irq(); ++ in_kgdb(®s); ++} ++#else ++int ++in_kgdb(struct pt_regs *regs) ++{ ++ return (kgdb_spinlock); ++} ++#endif + -+This version of the kgdb package was developed and tested on -+kernel version 2.4.16. It will not install on any earlier kernels. -+It is possible that it will continue to work on later versions -+of 2.4 and then versions of 2.5 (I hope). ++void ++printexceptioninfo(int exceptionNo, int errorcode, char *buffer) ++{ ++ unsigned dr6; ++ int i; ++ switch (exceptionNo) { ++ case 1: /* debug exception */ ++ break; ++ case 3: /* breakpoint */ ++ sprintf(buffer, "Software breakpoint"); ++ return; ++ default: ++ sprintf(buffer, "Details not available"); ++ return; ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (dr6 & 0x4000) { ++ sprintf(buffer, "Single step"); ++ return; ++ } ++ for (i = 0; i < 4; ++i) { ++ if (dr6 & (1 << i)) { ++ sprintf(buffer, "Hardware breakpoint %d", i); ++ return; ++ } ++ } ++ sprintf(buffer, "Unknown trap"); ++ return; ++} + ++/* ++ * This function does all command procesing for interfacing to gdb. ++ * ++ * NOTE: The INT nn instruction leaves the state of the interrupt ++ * enable flag UNCHANGED. That means that when this routine ++ * is entered via a breakpoint (INT 3) instruction from code ++ * that has interrupts enabled, then interrupts will STILL BE ++ * enabled when this routine is entered. The first thing that ++ * we do here is disable interrupts so as to prevent recursive ++ * entries and bothersome serial interrupts while we are ++ * trying to run the serial port in polled mode. ++ * ++ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so ++ * it is always necessary to do a restore_flags before returning ++ * so as to let go of that lock. ++ */ ++int ++kgdb_handle_exception(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs) ++{ ++ struct task_struct *usethread = NULL; ++ struct task_struct *thread_list_start = 0, *thread = NULL; ++ int addr, length; ++ int breakno, breaktype; ++ char *ptr; ++ int newPC; ++ threadref thref; ++ int threadid; ++ int thread_min = PID_MAX + MAX_NO_CPUS; ++#ifdef old_thread_list ++ int maxthreads; ++#endif ++ int nothreads; ++ unsigned long flags; ++ int gdb_regs[NUMREGBYTES / 4]; ++ int dr6; ++ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ ++#define NO_NMI 1 ++#define NO_SYNC 2 ++#define regs (*linux_regs) ++#define NUMREGS NUMREGBYTES/4 ++ /* ++ * If the entry is not from the kernel then return to the Linux ++ * trap handler and let it process the interrupt normally. ++ */ ++ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { ++ printk("ignoring non-kernel exception\n"); ++ print_regs(®s); ++ return (0); ++ } + -+Debugging Setup -+=============== ++ kgdb_local_irq_save(flags); + -+Designate one machine as the "development" machine. This is the -+machine on which you run your compiles and which has your source -+code for the kernel. Designate a second machine as the "target" -+machine. This is the machine that will run your experimental -+kernel. ++ /* Get kgdb spinlock */ + -+The two machines will be connected together via a serial line out -+one or the other of the COM ports of the PC. You will need the -+appropriate modem eliminator (null modem) cable(s) for this. ++ KGDB_SPIN_LOCK(&kgdb_spinlock); ++ rdtscll(kgdb_info.entry_tsc); ++ /* ++ * We depend on this spinlock and the NMI watch dog to control the ++ * other cpus. They will arrive at "in_kgdb()" as a result of the ++ * NMI and will wait there for the following spin locks to be ++ * released. ++ */ ++#ifdef CONFIG_SMP + -+Decide on which tty port you want the machines to communicate, then -+connect them up back-to-back using the null modem cable. COM1 is -+/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection -+with the two machines prior to trying to debug a kernel. Once you -+have it working, on the TARGET machine, enter: ++#if 0 ++ if (cpu_callout_map & ~MAX_CPU_MASK) { ++ printk("kgdb : too many cpus, possibly not mapped" ++ " in contiguous space, change MAX_NO_CPUS" ++ " in kgdb_stub and make new kernel.\n" ++ " cpu_callout_map is %lx\n", cpu_callout_map); ++ goto exit_just_unlock; ++ } ++#endif ++ if (spinlock_count == 1) { ++ int time = 0, end_time, dum = 0; ++ int i; ++ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) ++ }; ++ if (remote_debug) { ++ printk("kgdb : cpu %d entry, syncing others\n", ++ smp_processor_id()); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ /* ++ * Use trylock as we may already hold the lock if ++ * we are holding the cpu. Net result is all ++ * locked. ++ */ ++ spin_trylock(&waitlocks[i]); ++ } ++ for (i = 0; i < MAX_NO_CPUS; i++) ++ cpu_logged_in[i] = 0; ++ /* ++ * Wait for their arrival. We know the watch dog is active if ++ * in_kgdb() has ever been called, as it is always called on a ++ * watchdog tick. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; /* Note: we use the High order bits! */ ++ i = 1; ++ if (num_online_cpus() > 1) { ++ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; ++ smp_send_nmi_allbutself(); ++ while (i < num_online_cpus() && time != end_time) { ++ int j; ++ for (j = 0; j < MAX_NO_CPUS; j++) { ++ if (waiting_cpus[j].task && ++ !cpu_logged_in[j]) { ++ i++; ++ cpu_logged_in[j] = 1; ++ if (remote_debug) { ++ printk ++ ("kgdb : cpu %d arrived at kgdb\n", ++ j); ++ } ++ break; ++ } else if (!waiting_cpus[j].task && ++ !cpu_online(j)) { ++ waiting_cpus[j].task = NOCPU; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].hold = 1; ++ break; ++ } ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { + -+setserial /dev/ttyS0 (or what ever tty you are using) ++ int wait = 100000; ++ while (wait--) ; ++ if (!waiting_cpus[j].task && ++ in_kgdb_here_log[j]) { ++ printk ++ ("kgdb : cpu %d stall" ++ " in in_kgdb\n", ++ j); ++ i++; ++ cpu_logged_in[j] = 1; ++ waiting_cpus[j].task = ++ (struct task_struct ++ *) 1; ++ } ++ } ++ } + -+and record the port address and the IRQ number. ++ if (in_kgdb_entry_log[smp_processor_id()] > ++ (me_in_kgdb + 10)) { ++ break; ++ } + -+On the DEVELOPMENT machine you need to apply the patch for the kgdb -+hooks. You have probably already done that if you are reading this -+file. ++ rdtsc(dum, time); ++ } ++ if (i < num_online_cpus()) { ++ printk ++ ("kgdb : time out, proceeding without sync\n"); ++#if 0 ++ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", ++ waiting_cpus[0].task != 0, ++ waiting_cpus[1].task != 0); ++ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", ++ cpu_logged_in[0], cpu_logged_in[1]); ++ printk ++ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", ++ in_kgdb_here_log[0] != 0, ++ in_kgdb_here_log[1] != 0); ++#endif ++ entry_state = NO_SYNC; ++ } else { ++#if 0 ++ int ent = ++ in_kgdb_entry_log[smp_processor_id()] - ++ me_in_kgdb; ++ printk("kgdb : sync after %d entries\n", ent); ++#endif ++ } ++ } else { ++ if (remote_debug) { ++ printk ++ ("kgdb : %d cpus, but watchdog not active\n" ++ "proceeding without locking down other cpus\n", ++ num_online_cpus()); ++ entry_state = NO_NMI; ++ } ++ } ++ } ++#endif + -+On your DEVELOPMENT machine, go to your kernel source directory and do -+"make Xconfig" where X is one of "x", "menu", or "". If you are -+configuring in the standard serial driver, it must not be a module. -+Either yes or no is ok, but making the serial driver a module means it -+will initialize after kgdb has set up the UART interrupt code and may -+cause a failure of the control-C option discussed below. The configure -+question for the serial driver is under the "Character devices" heading -+and is: ++ if (remote_debug) { ++ unsigned long *lp = (unsigned long *) &linux_regs; + -+"Standard/generic (8250/16550 and compatible UARTs) serial support" ++ printk("handle_exception(exceptionVector=%d, " ++ "signo=%d, err_code=%d, linux_regs=%p)\n", ++ exceptionVector, signo, err_code, linux_regs); ++ if (debug_regs) { ++ print_regs(®s); ++ printk("Stk: %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[0], lp[1], lp[2], lp[3], ++ lp[4], lp[5], lp[6], lp[7]); ++ printk(" %8lx %8lx %8lx %8lx" ++ " %8lx %8lx %8lx %8lx\n", ++ lp[8], lp[9], lp[10], lp[11], ++ lp[12], lp[13], lp[14], lp[15]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[16], lp[17], lp[18], lp[19], ++ lp[20], lp[21], lp[22], lp[23]); ++ printk(" %8lx %8lx %8lx %8lx " ++ "%8lx %8lx %8lx %8lx\n", ++ lp[24], lp[25], lp[26], lp[27], ++ lp[28], lp[29], lp[30], lp[31]); ++ } ++ } + -+Go down to the kernel debugging menu item and open it up. Enable the -+kernel kgdb stub code by selecting that item. You can also choose to -+turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler -+to put more debug info (like local symbols) in the object file. On the -+i386 -g and -ggdb are the same so this option just reduces to "O1". The -+-O1 reduces the optimization level. This may be helpful in some cases, -+be aware, however, that this may also mask the problem you are looking -+for. ++ /* Disable hardware debugging while we are in kgdb */ ++ /* Get the debug register status register */ ++/* *INDENT-OFF* */ ++ __asm__("movl %0,%%db7" ++ : /* no output */ ++ :"r"(0)); + -+The baud rate. Default is 115200. What ever you choose be sure that -+the host machine is set to the same speed. I recommend the default. ++ asm volatile ("movl %%db6, %0\n" ++ :"=r" (hw_breakpoint_status) ++ :); + -+The port. This is the I/O address of the serial UART that you should -+have gotten using setserial as described above. The standard COM1 port -+(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ -+3. ++/* *INDENT-ON* */ ++ switch (exceptionVector) { ++ case 0: /* divide error */ ++ case 1: /* debug exception */ ++ case 2: /* NMI */ ++ case 3: /* breakpoint */ ++ case 4: /* overflow */ ++ case 5: /* bounds check */ ++ case 6: /* invalid opcode */ ++ case 7: /* device not available */ ++ case 8: /* double fault (errcode) */ ++ case 10: /* invalid TSS (errcode) */ ++ case 12: /* stack fault (errcode) */ ++ case 16: /* floating point error */ ++ case 17: /* alignment check (errcode) */ ++ default: /* any undocumented */ ++ break; ++ case 11: /* segment not present (errcode) */ ++ case 13: /* general protection (errcode) */ ++ case 14: /* page fault (special errcode) */ ++ case 19: /* cache flush denied */ ++ if (mem_err_expected) { ++ /* ++ * This fault occured because of the ++ * get_char or set_char routines. These ++ * two routines use either eax of edx to ++ * indirectly reference the location in ++ * memory that they are working with. ++ * For a page fault, when we return the ++ * instruction will be retried, so we ++ * have to make sure that these ++ * registers point to valid memory. ++ */ ++ mem_err = 1; /* set mem error flag */ ++ mem_err_expected = 0; ++ mem_err_cnt++; /* helps in debugging */ ++ /* make valid address */ ++ regs.eax = (long) &garbage_loc; ++ /* make valid address */ ++ regs.edx = (long) &garbage_loc; ++ if (remote_debug) ++ printk("Return after memory error: " ++ "mem_err_cnt=%d\n", mem_err_cnt); ++ if (debug_regs) ++ print_regs(®s); ++ goto exit_kgdb; ++ } ++ break; ++ } ++ if (remote_debug) ++ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + -+The port IRQ (see above). ++ gdb_i386vector = exceptionVector; ++ gdb_i386errcode = err_code; ++ kgdb_info.called_from = __builtin_return_address(0); ++#ifdef CONFIG_SMP ++ /* ++ * OK, we can now communicate, lets tell gdb about the sync. ++ * but only if we had a problem. ++ */ ++ switch (entry_state) { ++ case NO_NMI: ++ to_gdb("NMI not active, other cpus not stopped\n"); ++ break; ++ case NO_SYNC: ++ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); ++ default:; ++ } + -+Stack overflow test. This option makes a minor change in the trap, -+system call and interrupt code to detect stack overflow and transfer -+control to kgdb if it happens. (Some platforms have this in the -+baseline code, but the i386 does not.) ++#endif ++/* ++ * Set up the gdb function call area. ++ */ ++ trap_cpu = smp_processor_id(); ++ OLD_esp = NEW_esp = (int) (&linux_regs->esp); + -+You can also configure the system to recognize the boot option -+"console=kgdb" which if given will cause all console output during -+booting to be put thru gdb as well as other consoles. This option -+requires that gdb and kgdb be connected prior to sending console output -+so, if they are not, a breakpoint is executed to force the connection. -+This will happen before any kernel output (it is going thru gdb, right), -+and will stall the boot until the connection is made. ++ IF_SMP(once_again:) ++ /* reply to host that an exception has occurred */ ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; + -+You can also configure in a patch to SysRq to enable the kGdb SysRq. -+This request generates a breakpoint. Since the serial port IRQ line is -+set up after any serial drivers, it is possible that this command will -+work when the control-C will not. ++ putpacket(remcomOutBuffer); + -+Save and exit the Xconfig program. Then do "make clean" , "make dep" -+and "make bzImage" (or whatever target you want to make). This gets the -+kernel compiled with the "-g" option set -- necessary for debugging. ++ while (1 == 1) { ++ error = 0; ++ remcomOutBuffer[0] = 0; ++ getpacket(remcomInBuffer); ++ switch (remcomInBuffer[0]) { ++ case '?': ++ remcomOutBuffer[0] = 'S'; ++ remcomOutBuffer[1] = hexchars[signo >> 4]; ++ remcomOutBuffer[2] = hexchars[signo % 16]; ++ remcomOutBuffer[3] = 0; ++ break; ++ case 'd': ++ remote_debug = !(remote_debug); /* toggle debug flag */ ++ printk("Remote debug %s\n", ++ remote_debug ? "on" : "off"); ++ break; ++ case 'g': /* return the value of the CPU registers */ ++ get_gdb_regs(usethread, ®s, gdb_regs); ++ mem2hex((char *) gdb_regs, ++ remcomOutBuffer, NUMREGBYTES, 0); ++ break; ++ case 'G': /* set the value of the CPU registers - return OK */ ++ hex2mem(&remcomInBuffer[1], ++ (char *) gdb_regs, NUMREGBYTES, 0); ++ if (!usethread || usethread == current) { ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "E00"); ++ } ++ break; + -+You have just built the kernel on your DEVELOPMENT machine that you -+intend to run on your TARGET machine. ++ case 'P':{ /* set the value of a single CPU register - ++ return OK */ ++ /* ++ * For some reason, gdb wants to talk about psudo ++ * registers (greater than 15). These may have ++ * meaning for ptrace, but for us it is safe to ++ * ignor them. We do this by dumping them into ++ * _GS which we also ignor, but do have memory for. ++ */ ++ int regno; + -+To install this new kernel, use the following installation procedure. -+Remember, you are on the DEVELOPMENT machine patching the kernel source -+for the kernel that you intend to run on the TARGET machine. ++ ptr = &remcomInBuffer[1]; ++ regs_to_gdb_regs(gdb_regs, ®s); ++ if ((!usethread || usethread == current) && ++ hexToInt(&ptr, ®no) && ++ *ptr++ == '=' && (regno >= 0)) { ++ regno = ++ (regno >= NUMREGS ? _GS : regno); ++ hex2mem(ptr, (char *) &gdb_regs[regno], ++ 4, 0); ++ gdb_regs_to_regs(gdb_regs, ®s); ++ strcpy(remcomOutBuffer, "OK"); ++ break; ++ } ++ strcpy(remcomOutBuffer, "E01"); ++ break; ++ } + -+Copy this kernel to your target machine using your usual procedures. I -+usually arrange to copy development: -+/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine -+via a LAN based NFS access. That is, I run the cp command on the target -+and copy from the development machine via the LAN. Run Lilo (see "man -+lilo" for details on how to set this up) on the new kernel on the target -+machine so that it will boot! Then boot the kernel on the target -+machine. ++ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ ++ case 'm': ++ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { ++ ptr = 0; ++ /* ++ * hex doubles the byte count ++ */ ++ if (length > (BUFMAX / 2)) ++ length = BUFMAX / 2; ++ mem2hex((char *) addr, ++ remcomOutBuffer, length, 1); ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } ++ } + -+On the DEVELOPMENT machine, create a file called .gdbinit in the -+directory /usr/src/linux. An example .gdbinit file looks like this: ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E01"); ++ debug_error ++ ("malformed read memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; + -+shell echo -e "\003" >/dev/ttyS0 -+set remotebaud 38400 (or what ever speed you have chosen) -+target remote /dev/ttyS0 ++ /* MAA..AA,LLLL: ++ Write LLLL bytes at address AA.AA return OK */ ++ case 'M': ++ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr) && ++ (*(ptr++) == ',') && ++ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { ++ hex2mem(ptr, (char *) addr, length, 1); + ++ if (mem_err) { ++ strcpy(remcomOutBuffer, "E03"); ++ debug_error("memory fault\n", NULL); ++ } else { ++ strcpy(remcomOutBuffer, "OK"); ++ } + -+Change the "echo" and "target" definition so that it specifies the tty -+port that you intend to use. Change the "remotebaud" definition to -+match the data rate that you are going to use for the com line. ++ ptr = 0; ++ } ++ if (ptr) { ++ strcpy(remcomOutBuffer, "E02"); ++ debug_error ++ ("malformed write memory command: %s\n", ++ remcomInBuffer); ++ } ++ break; ++ case 'S': ++ remcomInBuffer[0] = 's'; ++ case 'C': ++ /* Csig;AA..AA where ;AA..AA is optional ++ * continue with signal ++ * Since signals are meaning less to us, delete that ++ * part and then fall into the 'c' code. ++ */ ++ ptr = &remcomInBuffer[1]; ++ length = 2; ++ while (*ptr && *ptr != ';') { ++ length++; ++ ptr++; ++ } ++ if (*ptr) { ++ do { ++ ptr++; ++ *(ptr - length++) = *ptr; ++ } while (*ptr); ++ } else { ++ remcomInBuffer[1] = 0; ++ } + -+You are now ready to try it out. ++ /* cAA..AA Continue at address AA..AA(optional) */ ++ /* sAA..AA Step one instruction from AA..AA(optional) */ ++ /* D detach, reply OK and then continue */ ++ case 'c': ++ case 's': ++ case 'D': + -+Boot your target machine with "kgdb" in the boot command i.e. something -+like: ++ /* try to read optional parameter, ++ pc unchanged if no parm */ ++ ptr = &remcomInBuffer[1]; ++ if (hexToInt(&ptr, &addr)) { ++ if (remote_debug) ++ printk("Changing EIP to 0x%x\n", addr); + -+lilo> test kgdb ++ regs.eip = addr; ++ } + -+or if you also want console output thru gdb: ++ newPC = regs.eip; + -+lilo> test kgdb console=kgdb ++ /* clear the trace bit */ ++ regs.eflags &= 0xfffffeff; + -+You should see the lilo message saying it has loaded the kernel and then -+all output stops. The kgdb stub is trying to connect with gdb. Start -+gdb something like this: ++ /* set the trace bit if we're stepping */ ++ if (remcomInBuffer[0] == 's') ++ regs.eflags |= 0x100; + ++ /* detach is a friendly version of continue. Note that ++ debugging is still enabled (e.g hit control C) ++ */ ++ if (remcomInBuffer[0] == 'D') { ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ } + -+On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". -+When gdb gets the symbols loaded it will read your .gdbinit file and, if -+everything is working correctly, you should see gdb print out a few -+lines indicating that a breakpoint has been taken. It will actually -+show a line of code in the target kernel inside the kgdb activation -+code. ++ if (remote_debug) { ++ printk("Resuming execution\n"); ++ print_regs(®s); ++ } ++ asm volatile ("movl %%db6, %0\n":"=r" (dr6) ++ :); ++ if (!(dr6 & 0x4000)) { ++ for (breakno = 0; breakno < 4; ++breakno) { ++ if (dr6 & (1 << breakno) && ++ (breakinfo[breakno].type == 0)) { ++ /* Set restore flag */ ++ regs.eflags |= 0x10000; ++ break; ++ } ++ } ++ } ++ correct_hw_break(); ++ asm volatile ("movl %0, %%db6\n"::"r" (0)); ++ goto exit_kgdb; + -+The gdb interaction should look something like this: ++ /* kill the program */ ++ case 'k': /* do nothing */ ++ break; + -+ linux-dev:/usr/src/linux# gdb vmlinux -+ GDB is free software and you are welcome to distribute copies of it -+ under certain conditions; type "show copying" to see the conditions. -+ There is absolutely no warranty for GDB; type "show warranty" for details. -+ GDB 4.15.1 (i486-slackware-linux), -+ Copyright 1995 Free Software Foundation, Inc... -+ breakpoint () at i386-stub.c:750 -+ 750 } -+ (gdb) ++ /* query */ ++ case 'q': ++ nothreads = 0; ++ switch (remcomInBuffer[1]) { ++ case 'f': ++ threadid = 1; ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ case 's': ++ if (!cmp_str(&remcomInBuffer[2], ++ "ThreadInfo", 10)) ++ break; + -+You can now use whatever gdb commands you like to set breakpoints. -+Enter "continue" to start your target machine executing again. At this -+point the target system will run at full speed until it encounters -+your breakpoint or gets a segment violation in the kernel, or whatever. ++ remcomOutBuffer[nothreads++] = 'm'; ++ for (; threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ nothreads += int_to_hex_v( ++ &remcomOutBuffer[ ++ nothreads], ++ threadid); ++ if (thread_min > threadid) ++ thread_min = threadid; ++ remcomOutBuffer[ ++ nothreads] = ','; ++ nothreads++; ++ if (nothreads > BUFMAX - 10) ++ break; ++ } ++ } ++ if (remcomOutBuffer[nothreads - 1] == 'm') { ++ remcomOutBuffer[nothreads - 1] = 'l'; ++ } else { ++ nothreads--; ++ } ++ remcomOutBuffer[nothreads] = 0; ++ break; + -+If you have the kgdb console enabled when you continue, gdb will print -+out all the console messages. ++#ifdef old_thread_list /* Old thread info request */ ++ case 'L': ++ /* List threads */ ++ thread_list = 2; ++ thread_list_start = (usethread ? : current); ++ unpack_byte(remcomInBuffer + 3, &maxthreads); ++ unpack_threadid(remcomInBuffer + 5, &thref); ++ do { ++ int buf_thread_limit = ++ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; ++ if (maxthreads > buf_thread_limit) { ++ maxthreads = buf_thread_limit; ++ } ++ } while (0); ++ remcomOutBuffer[0] = 'q'; ++ remcomOutBuffer[1] = 'M'; ++ remcomOutBuffer[4] = '0'; ++ pack_threadid(remcomOutBuffer + 5, &thref); + -+The above example caused a breakpoint relatively early in the boot -+process. For the i386 kgdb it is possible to code a break instruction -+as the first C-language point in init/main.c, i.e. as the first instruction -+in start_kernel(). This could be done as follows: ++ threadid = threadref_to_int(&thref); ++ for (nothreads = 0; ++ nothreads < maxthreads && ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ thread = getthread(threadid); ++ if (thread) { ++ int_to_threadref(&thref, ++ threadid); ++ pack_threadid(remcomOutBuffer + ++ 21 + ++ nothreads * 16, ++ &thref); ++ nothreads++; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } ++ } + -+#include -+ breakpoint(); ++ if (threadid == PID_MAX + MAX_NO_CPUS) { ++ remcomOutBuffer[4] = '1'; ++ } ++ pack_hex_byte(remcomOutBuffer + 2, nothreads); ++ remcomOutBuffer[21 + nothreads * 16] = '\0'; ++ break; ++#endif ++ case 'C': ++ /* Current thread id */ ++ remcomOutBuffer[0] = 'Q'; ++ remcomOutBuffer[1] = 'C'; ++ threadid = current->pid; ++ if (!threadid) { ++ /* ++ * idle thread ++ */ ++ for (threadid = PID_MAX; ++ threadid < PID_MAX + MAX_NO_CPUS; ++ threadid++) { ++ if (current == ++ idle_task(threadid - ++ PID_MAX)) ++ break; ++ } ++ } ++ int_to_threadref(&thref, threadid); ++ pack_threadid(remcomOutBuffer + 2, &thref); ++ remcomOutBuffer[18] = '\0'; ++ break; + -+This breakpoint() is really a function that sets up the breakpoint and -+single-step hardware trap cells and then executes a breakpoint. Any -+early hard coded breakpoint will need to use this function. Once the -+trap cells are set up they need not be set again, but doing it again -+does not hurt anything, so you don't need to be concerned about which -+breakpoint is hit first. Once the trap cells are set up (and the kernel -+sets them up in due course even if breakpoint() is never called) the -+macro: ++ case 'E': ++ /* Print exception info */ ++ printexceptioninfo(exceptionVector, ++ err_code, remcomOutBuffer); ++ break; ++ case 'T':{ ++ char * nptr; ++ /* Thread extra info */ ++ if (!cmp_str(&remcomInBuffer[2], ++ "hreadExtraInfo,", 15)) { ++ break; ++ } ++ ptr = &remcomInBuffer[17]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ nptr = &thread->comm[0]; ++ length = 0; ++ ptr = &remcomOutBuffer[0]; ++ do { ++ length++; ++ ptr = pack_hex_byte(ptr, *nptr++); ++ } while (*nptr && length < 16); ++ /* ++ * would like that 16 to be the size of ++ * task_struct.comm but don't know the ++ * syntax.. ++ */ ++ *ptr = 0; ++ } ++ } ++ break; + -+BREAKPOINT; ++ /* task related */ ++ case 'H': ++ switch (remcomInBuffer[1]) { ++ case 'g': ++ ptr = &remcomInBuffer[2]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (!thread) { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ break; ++ } ++ /* ++ * Just in case I forget what this is all about, ++ * the "thread info" command to gdb causes it ++ * to ask for a thread list. It then switches ++ * to each thread and asks for the registers. ++ * For this (and only this) usage, we want to ++ * fudge the registers of tasks not on the run ++ * list (i.e. waiting) to show the routine that ++ * called schedule. Also, gdb, is a minimalist ++ * in that if the current thread is the last ++ * it will not re-read the info when done. ++ * This means that in this case we must show ++ * the real registers. So here is how we do it: ++ * Each entry we keep track of the min ++ * thread in the list (the last that gdb will) ++ * get info for. We also keep track of the ++ * starting thread. ++ * "thread_list" is cleared when switching back ++ * to the min thread if it is was current, or ++ * if it was not current, thread_list is set ++ * to 1. When the switch to current comes, ++ * if thread_list is 1, clear it, else do ++ * nothing. ++ */ ++ usethread = thread; ++ if ((thread_list == 1) && ++ (thread == thread_list_start)) { ++ thread_list = 0; ++ } ++ if (thread_list && (threadid == thread_min)) { ++ if (thread == thread_list_start) { ++ thread_list = 0; ++ } else { ++ thread_list = 1; ++ } ++ } ++ /* follow through */ ++ case 'c': ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ break; ++ } ++ break; + -+will generate an inline breakpoint. This may be more useful as it stops -+the processor at the instruction instead of in a function a step removed -+from the location of interest. In either case must be -+included to define both breakpoint() and BREAKPOINT. ++ /* Query thread status */ ++ case 'T': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &threadid); ++ thread = getthread(threadid); ++ if (thread) { ++ remcomOutBuffer[0] = 'O'; ++ remcomOutBuffer[1] = 'K'; ++ remcomOutBuffer[2] = '\0'; ++ if (thread_min > threadid) ++ thread_min = threadid; ++ } else { ++ remcomOutBuffer[0] = 'E'; ++ remcomOutBuffer[1] = '\0'; ++ } ++ break; + -+Triggering kgdbstub at other times -+================================== ++ case 'Y': /* set up a hardware breakpoint */ ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ ptr++; ++ hexToInt(&ptr, &breaktype); ++ ptr++; ++ hexToInt(&ptr, &length); ++ ptr++; ++ hexToInt(&ptr, &addr); ++ if (set_hw_break(breakno & 0x3, ++ breaktype & 0x3, ++ length & 0x3, addr) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; + -+Often you don't need to enter the debugger until much later in the boot -+or even after the machine has been running for some time. Once the -+kernel is booted and interrupts are on, you can force the system to -+enter the debugger by sending a control-C to the debug port. This is -+what the first line of the recommended .gdbinit file does. This allows -+you to start gdb any time after the system is up as well as when the -+system is already at a breakpoint. (In the case where the system is -+already at a breakpoint the control-C is not needed, however, it will -+be ignored by the target so no harm is done. Also note the the echo -+command assumes that the port speed is already set. This will be true -+once gdb has connected, but it is best to set the port speed before you -+run gdb.) ++ /* Remove hardware breakpoint */ ++ case 'y': ++ ptr = &remcomInBuffer[1]; ++ hexToInt(&ptr, &breakno); ++ if (remove_hw_break(breakno & 0x3) == 0) { ++ strcpy(remcomOutBuffer, "OK"); ++ } else { ++ strcpy(remcomOutBuffer, "ERROR"); ++ } ++ break; + -+Another simple way to do this is to put the following file in you ~/bin -+directory: ++ case 'r': /* reboot */ ++ strcpy(remcomOutBuffer, "OK"); ++ putpacket(remcomOutBuffer); ++ /*to_gdb("Rebooting\n"); */ ++ /* triplefault no return from here */ ++ { ++ static long no_idt[2]; ++ __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); ++ BREAKPOINT(); ++ } + -+#!/bin/bash -+echo -e "\003" > /dev/ttyS0 ++ } /* switch */ + -+Here, the ttyS0 should be replaced with what ever port you are using. -+The "\003" is control-C. Once you are connected with gdb, you can enter -+control-C at the command prompt. ++ /* reply to the request */ ++ putpacket(remcomOutBuffer); ++ } /* while(1==1) */ ++ /* ++ * reached by goto only. ++ */ ++ exit_kgdb: ++ /* ++ * Here is where we set up to trap a gdb function call. NEW_esp ++ * will be changed if we are trying to do this. We handle both ++ * adding and subtracting, thus allowing gdb to put grung on ++ * the stack which it removes later. ++ */ ++ if (NEW_esp != OLD_esp) { ++ int *ptr = END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) ++ ptr -= (OLD_esp - NEW_esp) / sizeof (int); ++ *--ptr = linux_regs->eflags; ++ *--ptr = linux_regs->xcs; ++ *--ptr = linux_regs->eip; ++ *--ptr = linux_regs->ecx; ++ *--ptr = linux_regs->ebx; ++ *--ptr = linux_regs->eax; ++ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); ++ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; ++ if (NEW_esp < OLD_esp) { ++ linux_regs->eip = (unsigned int) fn_call_stub; ++ } else { ++ linux_regs->eip = (unsigned int) fn_rtn_stub; ++ linux_regs->eax = NEW_esp; ++ } ++ linux_regs->eflags &= ~(IF_BIT | TF_BIT); ++ } ++#ifdef CONFIG_SMP ++ /* ++ * Release gdb wait locks ++ * Sanity check time. Must have at least one cpu to run. Also single ++ * step must not be done if the current cpu is on hold. ++ */ ++ if (spinlock_count == 1) { ++ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; ++ int cpu_avail = 0; ++ int i; + -+An alternative way to get control to the debugger is to enable the kGdb -+SysRq command. Then you would enter Alt-SysRq-g (all three keys at the -+same time, but push them down in the order given). To refresh your -+memory of the available SysRq commands try Alt-SysRq-=. Actually any -+undefined command could replace the "=", but I like to KNOW that what I -+am pushing will never be defined. ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!cpu_online(i)) ++ break; ++ if (!hold_cpu(i)) { ++ cpu_avail = 1; ++ } ++ } ++ /* ++ * Early in the bring up there will be NO cpus on line... ++ */ ++ if (!cpu_avail && !cpus_empty(cpu_online_map)) { ++ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); ++ goto once_again; ++ } ++ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { ++ to_gdb ++ ("Current cpu must be unblocked to single step\n"); ++ goto once_again; ++ } ++ if (!(ss_hold)) { ++ int i; ++ for (i = 0; i < MAX_NO_CPUS; i++) { ++ if (!hold_cpu(i)) { ++ spin_unlock(&waitlocks[i]); ++ } ++ } ++ } else { ++ spin_unlock(&waitlocks[smp_processor_id()]); ++ } ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ /* ++ * If this cpu is on hold, this is where we ++ * do it. Note, the NMI will pull us out of here, ++ * but will return as the above lock is not held. ++ * We will stay here till another cpu releases the lock for us. ++ */ ++ spin_unlock_wait(waitlocks + smp_processor_id()); ++ kgdb_local_irq_restore(flags); ++ return (0); ++ } ++#if 0 ++exit_just_unlock: ++#endif ++#endif ++ /* Release kgdb spinlock */ ++ KGDB_SPIN_UNLOCK(&kgdb_spinlock); ++ kgdb_local_irq_restore(flags); ++ return (0); ++} + -+Debugging hints -+=============== ++/* this function is used to set up exception handlers for tracing and ++ * breakpoints. ++ * This function is not needed as the above line does all that is needed. ++ * We leave it for backward compatitability... ++ */ ++void ++set_debug_traps(void) ++{ ++ /* ++ * linux_debug_hook is defined in traps.c. We store a pointer ++ * to our own exception handler into it. + -+You can break into the target machine at any time from the development -+machine by typing ^C (see above paragraph). If the target machine has -+interrupts enabled this will stop it in the kernel and enter the -+debugger. ++ * But really folks, every hear of labeled common, an old Fortran ++ * concept. Lots of folks can reference it and it is define if ++ * anyone does. Only one can initialize it at link time. We do ++ * this with the hook. See the statement above. No need for any ++ * executable code and it is ready as soon as the kernel is ++ * loaded. Very desirable in kernel debugging. + -+There is unfortunately no way of breaking into the kernel if it is -+in a loop with interrupts disabled, so if this happens to you then -+you need to place exploratory breakpoints or printk's into the kernel -+to find out where it is looping. The exploratory breakpoints can be -+entered either thru gdb or hard coded into the source. This is very -+handy if you do something like: ++ linux_debug_hook = handle_exception ; ++ */ + -+if () BREAKPOINT; ++ /* In case GDB is started before us, ack any packets (presumably ++ "$?#xx") sitting there. ++ putDebugChar ('+'); + ++ initialized = 1; ++ */ ++} + -+There is a copy of an e-mail in the Documentation/i386/kgdb/ directory -+(debug-nmi.txt) which describes how to create an NMI on an ISA bus -+machine using a paper clip. I have a sophisticated version of this made -+by wiring a push button switch into a PC104/ISA bus adapter card. The -+adapter card nicely furnishes wire wrap pins for all the ISA bus -+signals. ++/* This function will generate a breakpoint exception. It is used at the ++ beginning of a program to sync up with a debugger and can be used ++ otherwise as a quick means to stop program execution and "break" into ++ the debugger. */ ++/* But really, just use the BREAKPOINT macro. We will handle the int stuff ++ */ + -+When you are done debugging the kernel on the target machine it is a -+good idea to leave it in a running state. This makes reboots faster, -+bypassing the fsck. So do a gdb "continue" as the last gdb command if -+this is possible. To terminate gdb itself on the development machine -+and leave the target machine running, first clear all breakpoints and -+continue, then type ^Z to suspend gdb and then kill it with "kill %1" or -+something similar. ++#ifdef later ++/* ++ * possibly we should not go thru the traps.c code at all? Someday. ++ */ ++void ++do_kgdb_int3(struct pt_regs *regs, long error_code) ++{ ++ kgdb_handle_exception(3, 5, error_code, regs); ++ return; ++} ++#endif ++#undef regs ++#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS ++asmlinkage void ++bad_sys_call_exit(int stuff) ++{ ++ struct pt_regs *regs = (struct pt_regs *) &stuff; ++ printk("Sys call %d return with %x preempt_count\n", ++ (int) regs->orig_eax, preempt_count()); ++} ++#endif ++#ifdef CONFIG_STACK_OVERFLOW_TEST ++#include ++asmlinkage void ++stack_overflow(void) ++{ ++#ifdef BREAKPOINT ++ BREAKPOINT(); ++#else ++ printk("Kernel stack overflow, looping forever\n"); ++#endif ++ while (1) { ++ } ++} ++#endif + -+If gdbstub Does Not Work -+======================== ++#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) ++char gdbconbuf[BUFMAX]; + -+If it doesn't work, you will have to troubleshoot it. Do the easy -+things first like double checking your cabling and data rates. You -+might try some non-kernel based programs to see if the back-to-back -+connection works properly. Just something simple like cat /etc/hosts -+>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you -+if you can send data from one machine to the other. Make sure it works -+in both directions. There is no point in tearing out your hair in the -+kernel if the line doesn't work. ++static void ++kgdb_gdb_message(const char *s, unsigned count) ++{ ++ int i; ++ int wcount; ++ char *bufptr; ++ /* ++ * This takes care of NMI while spining out chars to gdb ++ */ ++ IF_SMP(in_kgdb_console = 1); ++ gdbconbuf[0] = 'O'; ++ bufptr = gdbconbuf + 1; ++ while (count > 0) { ++ if ((count << 1) > (BUFMAX - 2)) { ++ wcount = (BUFMAX - 2) >> 1; ++ } else { ++ wcount = count; ++ } ++ count -= wcount; ++ for (i = 0; i < wcount; i++) { ++ bufptr = pack_hex_byte(bufptr, s[i]); ++ } ++ *bufptr = '\0'; ++ s += wcount; + -+All of the real action takes place in the file -+/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target -+machine that interacts with gdb on the development machine. In gdb you can -+turn on a debug switch with the following command: ++ putpacket(gdbconbuf); + -+ set remotedebug ++ } ++ IF_SMP(in_kgdb_console = 0); ++} ++#endif ++#ifdef CONFIG_SMP ++static void ++to_gdb(const char *s) ++{ ++ int count = 0; ++ while (s[count] && (count++ < BUFMAX)) ; ++ kgdb_gdb_message(s, count); ++} ++#endif ++#ifdef CONFIG_KGDB_CONSOLE ++#include ++#include ++#include ++#include ++#include + -+This will print out the protocol messages that gdb is exchanging with -+the target machine. ++void ++kgdb_console_write(struct console *co, const char *s, unsigned count) ++{ + -+Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is -+the code that talks to the serial port on the target side. There might -+be a problem there. In particular there is a section of this code that -+tests the UART which will tell you what UART you have if you define -+"PRNT" (just remove "_off" from the #define PRNT_off). To view this -+report you will need to boot the system without any beakpoints. This -+allows the kernel to run to the point where it calls kgdb to set up -+interrupts. At this time kgdb will test the UART and print out the type -+it finds. (You need to wait so that the printks are actually being -+printed. Early in the boot they are cached, waiting for the console to -+be enabled. Also, if kgdb is entered thru a breakpoint it is possible -+to cause a dead lock by calling printk when the console is locked. The -+stub thus avoids doing printks from breakpoints, especially in the -+serial code.) At this time, if the UART fails to do the expected thing, -+kgdb will print out (using printk) information on what failed. (These -+messages will be buried in all the other boot up messages. Look for -+lines that start with "gdb_hook_interrupt:". You may want to use dmesg -+once the system is up to view the log. If this fails or if you still -+don't connect, review your answers for the port address. Use: ++ if (gdb_i386vector == -1) { ++ /* ++ * We have not yet talked to gdb. What to do... ++ * lets break, on continue we can do the write. ++ * But first tell him whats up. Uh, well no can do, ++ * as this IS the console. Oh well... ++ * We do need to wait or the messages will be lost. ++ * Other option would be to tell the above code to ++ * ignore this breakpoint and do an auto return, ++ * but that might confuse gdb. Also this happens ++ * early enough in boot up that we don't have the traps ++ * set up yet, so... ++ */ ++ breakpoint(); ++ } ++ kgdb_gdb_message(s, count); ++} + -+setserial /dev/ttyS0 ++/* ++ * ------------------------------------------------------------ ++ * Serial KGDB driver ++ * ------------------------------------------------------------ ++ */ + -+to get the current port and IRQ information. This command will also -+tell you what the system found for the UART type. The stub recognizes -+the following UART types: ++static struct console kgdbcons = { ++ name:"kgdb", ++ write:kgdb_console_write, ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ device:kgdb_console_device, ++#endif ++ flags:CON_PRINTBUFFER | CON_ENABLED, ++ index:-1, ++}; + -+16450, 16550, and 16550A ++/* ++ * The trick here is that this file gets linked before printk.o ++ * That means we get to peer at the console info in the command ++ * line before it does. If we are up, we register, otherwise, ++ * do nothing. By returning 0, we allow printk to look also. ++ */ ++static int kgdb_console_enabled; + -+If you are really desperate you can use printk debugging in the -+kgdbstub code in the target kernel until you get it working. In particular, -+there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c -+named "remote_debug". Compile your kernel with this set to 1, rather -+than 0 and the debug stub will print out lots of stuff as it does -+what it does. Likewise there are debug printks in the kgdb_serial.c -+code that can be turned on with simple changes in the macro defines. ++int __init ++kgdb_console_init(char *str) ++{ ++ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { ++ register_console(&kgdbcons); ++ kgdb_console_enabled = 1; ++ } ++ return 0; /* let others look at the string */ ++} + ++__setup("console=", kgdb_console_init); + -+Debugging Loadable Modules -+========================== ++#ifdef CONFIG_KGDB_USER_CONSOLE ++static kdev_t kgdb_console_device(struct console *c); ++/* This stuff sort of works, but it knocks out telnet devices ++ * we are leaving it here in case we (or you) find time to figure it out ++ * better.. ++ */ + -+This technique comes courtesy of Edouard Parmelan -+ ++/* ++ * We need a real char device as well for when the console is opened for user ++ * space activities. ++ */ + -+When you run gdb, enter the command ++static int ++kgdb_consdev_open(struct inode *inode, struct file *file) ++{ ++ return 0; ++} + -+source gdbinit-modules ++static ssize_t ++kgdb_consdev_write(struct file *file, const char *buf, ++ size_t count, loff_t * ppos) ++{ ++ int size, ret = 0; ++ static char kbuf[128]; ++ static DECLARE_MUTEX(sem); + -+This will read in a file of gdb macros that was installed in your -+kernel source directory when kgdb was installed. This file implements -+the following commands: ++ /* We are not reentrant... */ ++ if (down_interruptible(&sem)) ++ return -ERESTARTSYS; + -+mod-list -+ Lists the loaded modules in the form ++ while (count > 0) { ++ /* need to copy the data from user space */ ++ size = count; ++ if (size > sizeof (kbuf)) ++ size = sizeof (kbuf); ++ if (copy_from_user(kbuf, buf, size)) { ++ ret = -EFAULT; ++ break;; ++ } ++ kgdb_console_write(&kgdbcons, kbuf, size); ++ count -= size; ++ ret += size; ++ buf += size; ++ } + -+mod-print-symbols -+ Prints all the symbols in the indicated module. ++ up(&sem); + -+mod-add-symbols -+ Loads the symbols from the object file and associates them -+ with the indicated module. ++ return ret; ++} + -+After you have loaded the module that you want to debug, use the command -+mod-list to find the of your module. Then use that -+address in the mod-add-symbols command to load your module's symbols. -+From that point onward you can debug your module as if it were a part -+of the kernel. ++struct file_operations kgdb_consdev_fops = { ++ open:kgdb_consdev_open, ++ write:kgdb_consdev_write ++}; ++static kdev_t ++kgdb_console_device(struct console *c) ++{ ++ return MKDEV(TTYAUX_MAJOR, 1); ++} + -+The file gdbinit-modules also contains a command named mod-add-lis as -+an example of how to construct a command of your own to load your -+favorite module. The idea is to "can" the pathname of the module -+in the command so you don't have to type so much. ++/* ++ * This routine gets called from the serial stub in the i386/lib ++ * This is so it is done late in bring up (just before the console open). ++ */ ++void ++kgdb_console_finit(void) ++{ ++ if (kgdb_console_enabled) { ++ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); ++ char *cp = cptr; ++ while (*cptr && *cptr != '(') ++ cptr++; ++ *cptr = 0; ++ unregister_chrdev(TTYAUX_MAJOR, cp); ++ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); ++ } ++} ++#endif ++#endif ++#ifdef CONFIG_KGDB_TS ++#include /* time stamp code */ ++#include /* in_interrupt */ ++#ifdef CONFIG_KGDB_TS_64 ++#define DATA_POINTS 64 ++#endif ++#ifdef CONFIG_KGDB_TS_128 ++#define DATA_POINTS 128 ++#endif ++#ifdef CONFIG_KGDB_TS_256 ++#define DATA_POINTS 256 ++#endif ++#ifdef CONFIG_KGDB_TS_512 ++#define DATA_POINTS 512 ++#endif ++#ifdef CONFIG_KGDB_TS_1024 ++#define DATA_POINTS 1024 ++#endif ++#ifndef DATA_POINTS ++#define DATA_POINTS 128 /* must be a power of two */ ++#endif ++#define INDEX_MASK (DATA_POINTS - 1) ++#if (INDEX_MASK & DATA_POINTS) ++#error "CONFIG_KGDB_TS_COUNT must be a power of 2" ++#endif ++struct kgdb_and_then_struct { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ int data0; ++ int data1; ++}; ++struct kgdb_and_then_struct2 { ++#ifdef CONFIG_SMP ++ int on_cpu; ++#endif ++ struct task_struct *task; ++ long long at_time; ++ int from_ln; ++ char *in_src; ++ void *from; ++ int *with_shpf; ++ struct task_struct *t1; ++ struct task_struct *t2; ++}; ++struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + -+Threads -+======= ++struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; ++int kgdb_and_then_count; + -+Each process in a target machine is seen as a gdb thread. gdb thread -+related commands (info threads, thread n) can be used. ++void ++kgdb_tstamp(int line, char *source, int data0, int data1) ++{ ++ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; ++ int flags; ++ kgdb_local_irq_save(flags); ++ spin_lock(&ts_spin); ++ rdtscll(kgdb_and_then->at_time); ++#ifdef CONFIG_SMP ++ kgdb_and_then->on_cpu = smp_processor_id(); ++#endif ++ kgdb_and_then->task = current; ++ kgdb_and_then->from_ln = line; ++ kgdb_and_then->in_src = source; ++ kgdb_and_then->from = __builtin_return_address(0); ++ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | ++ (preempt_count() << 8)); ++ kgdb_and_then->data0 = data0; ++ kgdb_and_then->data1 = data1; ++ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; ++ spin_unlock(&ts_spin); ++ kgdb_local_irq_restore(flags); ++#ifdef CONFIG_PREEMPT + -+ia-32 hardware breakpoints -+========================== ++#endif ++ return; ++} ++#endif ++typedef int gdb_debug_hook(int exceptionVector, ++ int signo, int err_code, struct pt_regs *linux_regs); ++gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ +Index: linux/arch/i386/kernel/nmi.c +=================================================================== +--- linux.orig/arch/i386/kernel/nmi.c ++++ linux/arch/i386/kernel/nmi.c +@@ -34,7 +34,17 @@ + + #include "mach_traps.h" + ++#ifdef CONFIG_KGDB ++#include ++#ifdef CONFIG_SMP ++unsigned int nmi_watchdog = NMI_IO_APIC; ++#else ++unsigned int nmi_watchdog = NMI_LOCAL_APIC; ++#endif ++#else + unsigned int nmi_watchdog = NMI_NONE; ++#endif + -+kgdb stub contains support for hardware breakpoints using debugging features -+of ia-32(x86) processors. These breakpoints do not need code modification. -+They use debugging registers. 4 hardware breakpoints are available in ia-32 -+processors. + extern int unknown_nmi_panic; + static unsigned int nmi_hz = HZ; + static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +@@ -466,6 +476,9 @@ void touch_nmi_watchdog (void) + for (i = 0; i < NR_CPUS; i++) + alert_counter[i] = 0; + } ++#ifdef CONFIG_KGDB ++int tune_watchdog = 5*HZ; ++#endif + + extern void die_nmi(struct pt_regs *, const char *msg); + +@@ -481,12 +494,24 @@ void nmi_watchdog_tick (struct pt_regs * + + sum = irq_stat[cpu].apic_timer_irqs; + ++#ifdef CONFIG_KGDB ++ if (!in_kgdb(regs) && last_irq_sums[cpu] == sum) { + -+Each hardware breakpoint can be of one of the following three types. ++#else + if (last_irq_sums[cpu] == sum) { ++#endif + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + alert_counter[cpu]++; ++#ifdef CONFIG_KGDB ++ if (alert_counter[cpu] == tune_watchdog) { ++ kgdb_handle_exception(2, SIGPWR, 0, regs); ++ last_irq_sums[cpu] = sum; ++ alert_counter[cpu] = 0; ++ } ++#endif + if (alert_counter[cpu] == 30*nmi_hz) + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } else { +Index: linux/arch/i386/kernel/smp.c +=================================================================== +--- linux.orig/arch/i386/kernel/smp.c ++++ linux/arch/i386/kernel/smp.c +@@ -459,7 +459,17 @@ void flush_tlb_all(void) + { + on_each_cpu(do_flush_tlb_all, NULL, 1, 1); + } +- ++#ifdef CONFIG_KGDB ++/* ++ * By using the NMI code instead of a vector we just sneak thru the ++ * word generator coming out with just what we want. AND it does ++ * not matter if clustered_apic_mode is set or not. ++ */ ++void smp_send_nmi_allbutself(void) ++{ ++ send_IPI_allbutself(APIC_DM_NMI); ++} ++#endif + /* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing +Index: linux/arch/i386/kernel/traps.c +=================================================================== +--- linux.orig/arch/i386/kernel/traps.c ++++ linux/arch/i386/kernel/traps.c +@@ -103,6 +103,39 @@ int register_die_notifier(struct notifie + return err; + } + ++#ifdef CONFIG_KGDB ++extern void sysenter_past_esp(void); ++#include ++#include ++void set_intr_gate(unsigned int n, void *addr); ++static void set_intr_usr_gate(unsigned int n, void *addr); ++/* ++ * Should be able to call this breakpoint() very early in ++ * bring up. Just hard code the call where needed. ++ * The breakpoint() code is here because set_?_gate() functions ++ * are local (static) to trap.c. They need be done only once, ++ * but it does not hurt to do them over. ++ */ ++void breakpoint(void) ++{ ++ set_intr_usr_gate(3,&int3); /* disable ints on trap */ ++ set_intr_gate(1,&debug); ++ set_intr_gate(14,&page_fault); + -+1. Execution breakpoint - An Execution breakpoint is triggered when code -+ at the breakpoint address is executed. ++ BREAKPOINT(); ++} ++#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ ++ { \ ++ if (!user_mode(regs) ) \ ++ { \ ++ kgdb_handle_exception(trapnr, signr, error_code, regs); \ ++ after; \ ++ } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ ++ } ++#else ++#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) ++#endif + -+ As limited number of hardware breakpoints are available, it is -+ advisable to use software breakpoints ( break command ) instead -+ of execution hardware breakpoints, unless modification of code -+ is to be avoided. + static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) + { + return p > (void *)tinfo && +@@ -328,6 +361,15 @@ void die(const char * str, struct pt_reg + #endif + if (nl) + printk("\n"); ++#ifdef CONFIG_KGDB ++ /* This is about the only place we want to go to kgdb even if in ++ * user mode. But we must go in via a trap so within kgdb we will ++ * always be in kernel mode. ++ */ ++ if (user_mode(regs)) ++ BREAKPOINT(); ++#endif ++ CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) + notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); + show_registers(regs); + try_crashdump(regs); +@@ -453,6 +495,7 @@ static inline void do_trap(int trapnr, i + #define DO_ERROR(trapnr, signr, str, name) \ + asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ + { \ ++ CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,) \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ +@@ -476,6 +519,7 @@ asmlinkage void do_##name(struct pt_regs + #define DO_VM86_ERROR(trapnr, signr, str, name) \ + asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ + { \ ++ CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return) \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ +@@ -605,6 +649,7 @@ gp_in_vm86: + gp_in_kernel: + if (!fixup_exception(regs)) { + die: ++ CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) + if (notify_die(DIE_GPF, "general protection fault", regs, + error_code, 13, SIGSEGV) == NOTIFY_STOP) + return; +@@ -829,8 +874,18 @@ asmlinkage void do_debug(struct pt_regs + * allowing programs to debug themselves without the ptrace() + * interface. + */ ++#ifdef CONFIG_KGDB ++ /* ++ * I think this is the only "real" case of a TF in the kernel ++ * that really belongs to user space. Others are ++ * "Ours all ours!" ++ */ ++ if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_past_esp)) ++ goto clear_TF_reenable; ++#else + if ((regs->xcs & 3) == 0) + goto clear_TF_reenable; ++#endif + if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) + goto clear_TF; + } +@@ -842,6 +897,17 @@ asmlinkage void do_debug(struct pt_regs + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + ++#ifdef CONFIG_KGDB ++ /* ++ * If this is a kernel mode trap, we need to reset db7 to allow us ++ * to continue sanely ALSO skip the signal delivery ++ */ ++ if ((regs->xcs & 3) == 0) ++ goto clear_dr7; + -+2. Write breakpoint - A write breakpoint is triggered when memory -+ location at the breakpoint address is written. ++ /* if not kernel, allow ints but only if they were on */ ++ if ( regs->eflags & 0x200) local_irq_enable(); ++#endif + /* If this is a kernel mode trap, save the user PC on entry to + * the kernel, that's what the debugger can make sense of. + */ +@@ -856,6 +922,7 @@ clear_dr7: + __asm__("movl %0,%%db7" + : /* no output */ + : "r" (0)); ++ CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) + return; + + debug_vm86: +@@ -1151,6 +1218,12 @@ static void __init set_task_gate(unsigne + { + _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); + } ++#ifdef CONFIG_KGDB ++void set_intr_usr_gate(unsigned int n, void *addr) ++{ ++ _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); ++} ++#endif + + + void __init trap_init(void) +@@ -1169,7 +1242,11 @@ void __init trap_init(void) + set_trap_gate(0,÷_error); + set_intr_gate(1,&debug); + set_intr_gate(2,&nmi); ++#ifndef CONFIG_KGDB + set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ ++#else ++ set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ ++#endif + set_system_gate(4,&overflow); + set_system_gate(5,&bounds); + set_trap_gate(6,&invalid_op); +Index: linux/arch/i386/lib/Makefile +=================================================================== +--- linux.orig/arch/i386/lib/Makefile ++++ linux/arch/i386/lib/Makefile +@@ -8,3 +8,4 @@ lib-y = checksum.o delay.o usercopy.o ge + + lib-$(CONFIG_X86_USE_3DNOW) += mmx.o + lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o ++lib-$(CONFIG_KGDB) += kgdb_serial.o +Index: linux/arch/i386/lib/kgdb_serial.c +=================================================================== +--- linux.orig/arch/i386/lib/kgdb_serial.c ++++ linux/arch/i386/lib/kgdb_serial.c +@@ -0,0 +1,485 @@ ++/* ++ * Serial interface GDB stub ++ * ++ * Written (hacked together) by David Grothe (dave@gcom.com) ++ * Modified to allow invokation early in boot see also ++ * kgdb.h for instructions by George Anzinger(george@mvista.com) ++ * ++ */ + -+ A write or can be placed for data of variable length. Length of -+ a write breakpoint indicates length of the datatype to be -+ watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for -+ 4 byte data. ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_KGDB_USER_CONSOLE ++extern void kgdb_console_finit(void); ++#endif ++#define PRNT_off ++#define TEST_EXISTANCE ++#ifdef PRNT ++#define dbprintk(s) printk s ++#else ++#define dbprintk(s) ++#endif ++#define TEST_INTERRUPT_off ++#ifdef TEST_INTERRUPT ++#define intprintk(s) printk s ++#else ++#define intprintk(s) ++#endif + -+3. Access breakpoint - An access breakpoint is triggered when memory -+ location at the breakpoint address is either read or written. ++#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + -+ Access breakpoints also have lengths similar to write breakpoints. ++#define GDB_BUF_SIZE 512 /* power of 2, please */ + -+IO breakpoints in ia-32 are not supported. ++static char gdb_buf[GDB_BUF_SIZE]; ++static int gdb_buf_in_inx; ++static atomic_t gdb_buf_in_cnt; ++static int gdb_buf_out_inx; + -+Since gdb stub at present does not use the protocol used by gdb for hardware -+breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros -+for hardware breakpoints are described below. ++struct async_struct *gdb_async_info; ++static int gdb_async_irq; + -+hwebrk - Places an execution breakpoint -+ hwebrk breakpointno address -+hwwbrk - Places a write breakpoint -+ hwwbrk breakpointno length address -+hwabrk - Places an access breakpoint -+ hwabrk breakpointno length address -+hwrmbrk - Removes a breakpoint -+ hwrmbrk breakpointno -+exinfo - Tells whether a software or hardware breakpoint has occurred. -+ Prints number of the hardware breakpoint if a hardware breakpoint has -+ occurred. ++#define outb_px(a,b) outb_p(b,a) + -+Arguments required by these commands are as follows -+breakpointno - 0 to 3 -+length - 1 to 3 -+address - Memory location in hex digits ( without 0x ) e.g c015e9bc ++static void program_uart(struct async_struct *info); ++static void write_char(struct async_struct *info, int chr); ++/* ++ * Get a byte from the hardware data buffer and return it ++ */ ++static int ++read_data_bfr(struct async_struct *info) ++{ ++ char it = inb_p(info->port + UART_LSR); + -+SMP support -+========== ++ if (it & UART_LSR_DR) ++ return (inb_p(info->port + UART_RX)); ++ /* ++ * If we have a framing error assume somebody messed with ++ * our uart. Reprogram it and send '-' both ways... ++ */ ++ if (it & 0xc) { ++ program_uart(info); ++ write_char(info, '-'); ++ return ('-'); ++ } ++ return (-1); + -+When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb -+client, all the processors are forced to enter the debugger. Current -+thread corresponds to the thread running on the processor where -+breakpoint occurred. Threads running on other processor(s) appear -+similar to other non-running threads in the 'info threads' output. -+Within the kgdb stub there is a structure "waiting_cpus" in which kgdb -+records the values of "current" and "regs" for each CPU other than the -+one that hit the breakpoint. "current" is a pointer to the task -+structure for the task that CPU is running, while "regs" points to the -+saved registers for the task. This structure can be examined with the -+gdb "p" command. ++} /* read_data_bfr */ + -+ia-32 hardware debugging registers on all processors are set to same -+values. Hence any hardware breakpoints may occur on any processor. ++/* ++ * Get a char if available, return -1 if nothing available. ++ * Empty the receive buffer first, then look at the interface hardware. + -+gdb troubleshooting -+=================== ++ * Locking here is a bit of a problem. We MUST not lock out communication ++ * if we are trying to talk to gdb about a kgdb entry. ON the other hand ++ * we can loose chars in the console pass thru if we don't lock. It is also ++ * possible that we could hold the lock or be waiting for it when kgdb ++ * NEEDS to talk. Since kgdb locks down the world, it does not need locks. ++ * We do, of course have possible issues with interrupting a uart operation, ++ * but we will just depend on the uart status to help keep that straight. + -+1. gdb hangs -+Kill it. restart gdb. Connect to target machine. ++ */ ++static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; ++#ifdef CONFIG_SMP ++extern spinlock_t kgdb_spinlock; ++#endif + -+2. gdb cannot connect to target machine (after killing a gdb and -+restarting another) If the target machine was not inside debugger when -+you killed gdb, gdb cannot connect because the target machine won't -+respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. -+e.g. echo -e "\003" > /dev/ttyS1 -+This forces that target machine into the debugger, after which you -+can connect. ++static int ++read_char(struct async_struct *info) ++{ ++ int chr; ++ unsigned long flags; ++ local_irq_save(flags); ++#ifdef CONFIG_SMP ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ spin_lock(&uart_interrupt_lock); ++ } ++#endif ++ if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ ++ chr = gdb_buf[gdb_buf_out_inx++]; ++ gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); ++ atomic_dec(&gdb_buf_in_cnt); ++ } else { ++ chr = read_data_bfr(info); ++ } ++#ifdef CONFIG_SMP ++ if (!spin_is_locked(&kgdb_spinlock)) { ++ spin_unlock(&uart_interrupt_lock); ++ } ++#endif ++ local_irq_restore(flags); ++ return (chr); ++} + -+3. gdb cannot connect even after echoing Ctrl+C into serial line -+Try changing serial line settings min to 1 and time to 0 -+e.g. stty min 1 time 0 < /dev/ttyS1 -+Try echoing again ++/* ++ * Wait until the interface can accept a char, then write it. ++ */ ++static void ++write_char(struct async_struct *info, int chr) ++{ ++ while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + -+Check serial line speed and set it to correct value if required -+e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 ++ outb_p(chr, info->port + UART_TX); + -+EVENTS -+====== ++} /* write_char */ + -+Ever want to know the order of things happening? Which CPU did what and -+when? How did the spinlock get the way it is? Then events are for -+you. Events are defined by calls to an event collection interface and -+saved for later examination. In this case, kgdb events are saved by a -+very fast bit of code in kgdb which is fully SMP and interrupt protected -+and they are examined by using gdb to display them. Kgdb keeps only -+the last N events, where N must be a power of two and is defined at -+configure time. ++/* ++ * Mostly we don't need a spinlock, but since the console goes ++ * thru here with interrutps on, well, we need to catch those ++ * chars. ++ */ ++/* ++ * This is the receiver interrupt routine for the GDB stub. ++ * It will receive a limited number of characters of input ++ * from the gdb host machine and save them up in a buffer. ++ * ++ * When the gdb stub routine getDebugChar() is called it ++ * draws characters out of the buffer until it is empty and ++ * then reads directly from the serial port. ++ * ++ * We do not attempt to write chars from the interrupt routine ++ * since the stubs do all of that via putDebugChar() which ++ * writes one byte after waiting for the interface to become ++ * ready. ++ * ++ * The debug stubs like to run with interrupts disabled since, ++ * after all, they run as a consequence of a breakpoint in ++ * the kernel. ++ * ++ * Perhaps someone who knows more about the tty driver than I ++ * care to learn can make this work for any low level serial ++ * driver. ++ */ ++static irqreturn_t ++gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct async_struct *info; ++ unsigned long flags; + ++ info = gdb_async_info; ++ if (!info || !info->tty || irq != gdb_async_irq) ++ return IRQ_NONE; + -+Events are signaled to kgdb by calling: ++ local_irq_save(flags); ++ spin_lock(&uart_interrupt_lock); ++ do { ++ int chr = read_data_bfr(info); ++ intprintk(("Debug char on int: %x hex\n", chr)); ++ if (chr < 0) ++ continue; + -+kgdb_ts(data0,data1) ++ if (chr == 3) { /* Ctrl-C means remote interrupt */ ++ BREAKPOINT(); ++ continue; ++ } + -+For each call kgdb records each call in an array along with other info. -+Here is the array definition: ++ if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { ++ /* buffer overflow tosses early char */ ++ read_char(info); ++ } ++ gdb_buf[gdb_buf_in_inx++] = chr; ++ gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); ++ } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); ++ spin_unlock(&uart_interrupt_lock); ++ local_irq_restore(flags); ++ return IRQ_HANDLED; ++} /* gdb_interrupt */ + -+struct kgdb_and_then_struct { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ long long at_time; -+ int from_ln; -+ char * in_src; -+ void *from; -+ int with_if; -+ int data0; -+ int data1; -+}; ++/* ++ * Just a NULL routine for testing. ++ */ ++void ++gdb_null(void) ++{ ++} /* gdb_null */ + -+For SMP machines the CPU is recorded, for all machines the TSC is -+recorded (gets a time stamp) as well as the line number and source file -+the call was made from. The address of the (from), the "if" (interrupt -+flag) and the two data items are also recorded. The macro kgdb_ts casts -+the types to int, so you can put any 32-bit values here. There is a -+configure option to select the number of events you want to keep. A -+nice number might be 128, but you can keep up to 1024 if you want. The -+number must be a power of two. An "andthen" macro library is provided -+for gdb to help you look at these events. It is also possible to define -+a different structure for the event storage and cast the data to this -+structure. For example the following structure is defined in kgdb: ++/* These structure are filled in with values defined in asm/kgdb_local.h ++ */ ++static struct serial_state state = SB_STATE; ++static struct async_struct local_info = SB_INFO; ++static int ok_to_enable_ints = 0; ++static void kgdb_enable_ints_now(void); + -+struct kgdb_and_then_struct2 { -+#ifdef CONFIG_SMP -+ int on_cpu; ++extern char *kgdb_version; ++/* ++ * Hook an IRQ for KGDB. ++ * ++ * This routine is called from putDebugChar, below. ++ */ ++static int ints_disabled = 1; ++int ++gdb_hook_interrupt(struct async_struct *info, int verb) ++{ ++ struct serial_state *state = info->state; ++ unsigned long flags; ++ int port; ++#ifdef TEST_EXISTANCE ++ int scratch, scratch2; +#endif -+ long long at_time; -+ int from_ln; -+ char * in_src; -+ void *from; -+ int with_if; -+ struct task_struct *t1; -+ struct task_struct *t2; -+}; + -+If you use this for display, the data elements will be displayed as -+pointers to task_struct entries. You may want to define your own -+structure to use in casting. You should only change the last two items -+and you must keep the structure size the same. Kgdb will handle these -+as 32-bit ints, but within that constraint you can define a structure to -+cast to any 32-bit quantity. This need only be available to gdb and is -+only used for casting in the display code. ++ /* The above fails if memory managment is not set up yet. ++ * Rather than fail the set up, just keep track of the fact ++ * and pick up the interrupt thing later. ++ */ ++ gdb_async_info = info; ++ port = gdb_async_info->port; ++ gdb_async_irq = state->irq; ++ if (verb) { ++ printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", ++ kgdb_version, ++ port, ++ gdb_async_irq, gdb_async_info->state->custom_divisor); ++ } ++ local_irq_save(flags); ++#ifdef TEST_EXISTANCE ++ /* Existance test */ ++ /* Should not need all this, but just in case.... */ ++ ++ scratch = inb_p(port + UART_IER); ++ outb_px(port + UART_IER, 0); ++ outb_px(0xff, 0x080); ++ scratch2 = inb_p(port + UART_IER); ++ outb_px(port + UART_IER, scratch); ++ if (scratch2) { ++ printk ++ ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); ++ local_irq_restore(flags); ++ return 1; /* We failed; there's nothing here */ ++ } ++ scratch2 = inb_p(port + UART_LCR); ++ outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ ++ outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ ++ outb_px(port + UART_LCR, 0); ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); ++ scratch = inb_p(port + UART_IIR) >> 6; ++ if (scratch == 1) { ++ printk("gdb_hook_interrupt: Undefined UART type!" ++ " Not a UART! \n"); ++ local_irq_restore(flags); ++ return 1; ++ } else { ++ dbprintk(("gdb_hook_interrupt: UART type " ++ "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); ++ } ++ scratch = inb_p(port + UART_MCR); ++ outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); ++ outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); ++ scratch2 = inb_p(port + UART_MSR) & 0xF0; ++ outb_px(port + UART_MCR, scratch); ++ if (scratch2 != 0x90) { ++ printk("gdb_hook_interrupt: " ++ "Loop back test failed! Not a UART!\n"); ++ local_irq_restore(flags); ++ return scratch2 + 1000; /* force 0 to fail */ ++ } ++#endif /* test existance */ ++ program_uart(info); ++ local_irq_restore(flags); + -+Final Items -+=========== ++ return (0); + -+I picked up this code from Amit S. Kale and enhanced it. ++} /* gdb_hook_interrupt */ + -+If you make some really cool modification to this stuff, or if you -+fix a bug, please let me know. ++static void ++program_uart(struct async_struct *info) ++{ ++ int port = info->port; + -+George Anzinger -+ ++ (void) inb_p(port + UART_RX); ++ outb_px(port + UART_IER, 0); + -+Amit S. Kale -+ ++ (void) inb_p(port + UART_RX); /* serial driver comments say */ ++ (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ ++ (void) inb_p(port + UART_MSR); ++ outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); ++ outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ ++ outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ ++ outb_px(port + UART_MCR, info->MCR); + -+(First kgdb by David Grothe ) ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ ++ outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ ++ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ ++ if (!ints_disabled) { ++ intprintk(("KGDB: Sending %d to port %x offset %d\n", ++ gdb_async_info->IER, ++ (int) gdb_async_info->port, UART_IER)); ++ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); ++ } ++ return; ++} + -+(modified by Tigran Aivazian ) -+ Putting gdbstub into the kernel config menu. ++/* ++ * getDebugChar ++ * ++ * This is a GDB stub routine. It waits for a character from the ++ * serial interface and then returns it. If there is no serial ++ * interface connection then it returns a bogus value which will ++ * almost certainly cause the system to hang. In the ++ */ ++int kgdb_in_isr = 0; ++int kgdb_in_lsr = 0; ++extern spinlock_t kgdb_spinlock; + -+(modified by Scott Foehner ) -+ Hooks for entering gdbstub at boot time. ++/* Caller takes needed protections */ + -+(modified by Amit S. Kale ) -+ Threads, ia-32 hw debugging, mp support, console support, -+ nmi watchdog handling. ++int ++getDebugChar(void) ++{ ++ volatile int chr, dum, time, end_time; + -+(modified by George Anzinger ) -+ Extended threads to include the idle threads. -+ Enhancements to allow breakpoint() at first C code. -+ Use of module_init() and __setup() to automate the configure. -+ Enhanced the cpu "collection" code to work in early bring-up. -+ Added ability to call functions from gdb -+ Print info thread stuff without going back to schedule() -+ Now collect the "other" cpus with an IPI/ NMI. -diff -puN /dev/null Documentation/i386/kgdb/loadmodule.sh ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/loadmodule.sh 2004-10-21 14:54:15.325593648 -0700 -@@ -0,0 +1,78 @@ -+#/bin/sh -+# This script loads a module on a target machine and generates a gdb script. -+# source generated gdb script to load the module file at appropriate addresses -+# in gdb. -+# -+# Usage: -+# Loading the module on target machine and generating gdb script) -+# [foo]$ loadmodule.sh -+# -+# Loading the module file into gdb -+# (gdb) source -+# -+# Modify following variables according to your setup. -+# TESTMACHINE - Name of the target machine -+# GDBSCRIPTS - The directory where a gdb script will be generated -+# -+# Author: Amit S. Kale (akale@veritas.com). -+# -+# If you run into problems, please check files pointed to by following -+# variables. -+# ERRFILE - /tmp/.errs contains stderr output of insmod -+# MAPFILE - /tmp/.map contains stdout output of insmod -+# GDBSCRIPT - $GDBSCRIPTS/load gdb script. ++ dbprintk(("getDebugChar(port %x): ", gdb_async_info->port)); + -+TESTMACHINE=foo -+GDBSCRIPTS=/home/bar ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 0); ++ } ++ /* ++ * This trick says if we wait a very long time and get ++ * no char, return the -1 and let the upper level deal ++ * with it. ++ */ ++ rdtsc(dum, time); ++ end_time = time + 2; ++ while (((chr = read_char(gdb_async_info)) == -1) && ++ (end_time - time) > 0) { ++ rdtsc(dum, time); ++ }; ++ /* ++ * This covers our butts if some other code messes with ++ * our uart, hay, it happens :o) ++ */ ++ if (chr == -1) ++ program_uart(gdb_async_info); + -+if [ $# -lt 1 ] ; then { -+ echo Usage: $0 modulefile -+ exit -+} ; fi ++ dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); ++ return (chr); + -+MODULEFILE=$1 -+MODULEFILEBASENAME=`basename $1` ++} /* getDebugChar */ + -+if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { -+ MODULEFILE=`pwd`/$MODULEFILE -+} fi ++static int count = 3; ++static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + -+ERRFILE=/tmp/$MODULEFILEBASENAME.errs -+MAPFILE=/tmp/$MODULEFILEBASENAME.map -+GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME ++static int __init ++kgdb_enable_ints(void) ++{ ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 1); ++ } ++ ok_to_enable_ints = 1; ++ kgdb_enable_ints_now(); ++#ifdef CONFIG_KGDB_USER_CONSOLE ++ kgdb_console_finit(); ++#endif ++ return 0; ++} + -+function findaddr() { -+ local ADDR=0x$(echo "$SEGMENTS" | \ -+ grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ -+ sed 's/[ ]*[^ ]*$//') -+ echo $ADDR ++#ifdef CONFIG_SERIAL_8250 ++void shutdown_for_kgdb(struct async_struct *gdb_async_info); ++#endif ++ ++#ifdef CONFIG_DISCONTIGMEM ++static inline int kgdb_mem_init_done(void) ++{ ++ return highmem_start_page != NULL; ++} ++#else ++static inline int kgdb_mem_init_done(void) ++{ ++ return max_mapnr != 0; +} ++#endif + -+function checkerrs() { -+ if [ "`cat $ERRFILE`" != "" ] ; then { -+ cat $ERRFILE -+ exit -+ } fi ++static void ++kgdb_enable_ints_now(void) ++{ ++ if (!spin_trylock(&one_at_atime)) ++ return; ++ if (!ints_disabled) ++ goto exit; ++ if (kgdb_mem_init_done() && ++ ints_disabled) { /* don't try till mem init */ ++#ifdef CONFIG_SERIAL_8250 ++ /* ++ * The ifdef here allows the system to be configured ++ * without the serial driver. ++ * Don't make it a module, however, it will steal the port ++ */ ++ shutdown_for_kgdb(gdb_async_info); ++#endif ++ ints_disabled = request_irq(gdb_async_info->state->irq, ++ gdb_interrupt, ++ IRQ_T(gdb_async_info), ++ "KGDB-stub", NULL); ++ intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); ++ } ++ if (!ints_disabled) { ++ intprintk(("KGDB: Sending %d to port %x offset %d\n", ++ gdb_async_info->IER, ++ (int) gdb_async_info->port, UART_IER)); ++ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); ++ } ++ exit: ++ spin_unlock(&one_at_atime); +} + -+#load the module -+echo Copying $MODULEFILE to $TESTMACHINE -+rcp $MODULEFILE root@${TESTMACHINE}: ++/* ++ * putDebugChar ++ * ++ * This is a GDB stub routine. It waits until the interface is ready ++ * to transmit a char and then sends it. If there is no serial ++ * interface connection then it simply returns to its caller, having ++ * pretended to send the char. Caller takes needed protections. ++ */ ++void ++putDebugChar(int chr) ++{ ++ dbprintk(("putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", ++ gdb_async_info->port, ++ chr, ++ chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + -+echo Loading module $MODULEFILE -+rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ -+ > $MAPFILE 2> $ERRFILE -+checkerrs ++ if (gdb_async_info == NULL) { ++ gdb_hook_interrupt(&local_info, 0); ++ } + -+SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` -+TEXTADDR=$(findaddr "\\.text[^.]") -+LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" -+SEGADDRS=`echo "$SEGMENTS" | awk '//{ -+ if ($1 != ".text" && $1 != ".this" && -+ $1 != ".kstrtab" && $1 != ".kmodtab") { -+ print " -s " $1 " 0x" $3 " " ++ write_char(gdb_async_info, chr); /* this routine will wait */ ++ count = (chr == '#') ? 0 : count + 1; ++ if ((count == 2)) { /* try to enable after */ ++ if (ints_disabled & ok_to_enable_ints) ++ kgdb_enable_ints_now(); /* try to enable after */ ++ ++ /* We do this a lot because, well we really want to get these ++ * interrupts. The serial driver will clear these bits when it ++ * initializes the chip. Every thing else it does is ok, ++ * but this. ++ */ ++ if (!ints_disabled) { ++ outb_px(gdb_async_info->port + UART_IER, ++ gdb_async_info->IER); ++ } + } -+}'` -+LOADSTRING="$LOADSTRING $SEGADDRS" -+echo Generating script $GDBSCRIPT -+echo $LOADSTRING > $GDBSCRIPT -diff -puN drivers/char/keyboard.c~kgdb-ga drivers/char/keyboard.c ---- 25/drivers/char/keyboard.c~kgdb-ga 2004-10-21 14:54:15.273601552 -0700 -+++ 25-akpm/drivers/char/keyboard.c 2004-10-21 14:54:15.326593496 -0700 -@@ -1081,6 +1081,9 @@ void kbd_keycode(unsigned int keycode, i ++ ++} /* putDebugChar */ ++ ++module_init(kgdb_enable_ints); +Index: linux/arch/i386/mm/fault.c +=================================================================== +--- linux.orig/arch/i386/mm/fault.c ++++ linux/arch/i386/mm/fault.c +@@ -453,6 +453,12 @@ no_context: + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ ++#ifdef CONFIG_KGDB ++ if (!user_mode(regs)){ ++ kgdb_handle_exception(14,SIGBUS, error_code, regs); ++ return; ++ } ++#endif + + bust_spinlocks(1); + +Index: linux/arch/x86_64/boot/compressed/head.S +=================================================================== +--- linux.orig/arch/x86_64/boot/compressed/head.S ++++ linux/arch/x86_64/boot/compressed/head.S +@@ -26,6 +26,7 @@ + .code32 + .text + ++#define IN_BOOTLOADER + #include + #include + +Index: linux/arch/x86_64/boot/compressed/misc.c +=================================================================== +--- linux.orig/arch/x86_64/boot/compressed/misc.c ++++ linux/arch/x86_64/boot/compressed/misc.c +@@ -9,6 +9,7 @@ + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + ++#define IN_BOOTLOADER + #include "miscsetup.h" + #include + +Index: linux/drivers/char/keyboard.c +=================================================================== +--- linux.orig/drivers/char/keyboard.c ++++ linux/drivers/char/keyboard.c +@@ -1078,6 +1078,9 @@ void kbd_keycode(unsigned int keycode, i } if (sysrq_down && down && !rep) { handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); @@ -4819,9 +5006,10 @@ diff -puN drivers/char/keyboard.c~kgdb-ga drivers/char/keyboard.c return; } #endif -diff -puN drivers/char/sysrq.c~kgdb-ga drivers/char/sysrq.c ---- 25/drivers/char/sysrq.c~kgdb-ga 2004-10-21 14:54:15.275601248 -0700 -+++ 25-akpm/drivers/char/sysrq.c 2004-10-21 14:54:15.326593496 -0700 +Index: linux/drivers/char/sysrq.c +=================================================================== +--- linux.orig/drivers/char/sysrq.c ++++ linux/drivers/char/sysrq.c @@ -35,6 +35,25 @@ #include @@ -4848,8 +5036,8 @@ diff -puN drivers/char/sysrq.c~kgdb-ga drivers/char/sysrq.c extern void reset_vc(unsigned int); -@@ -238,8 +257,8 @@ static struct sysrq_key_op *sysrq_key_ta - /* c */ NULL, +@@ -271,8 +290,8 @@ static struct sysrq_key_op *sysrq_key_ta + /* c */ &sysrq_crash_op, /* d */ NULL, /* e */ &sysrq_term_op, -/* f */ NULL, @@ -4859,10 +5047,11 @@ diff -puN drivers/char/sysrq.c~kgdb-ga drivers/char/sysrq.c /* h */ NULL, /* i */ &sysrq_kill_op, /* j */ NULL, -diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c ---- 25/drivers/serial/8250.c~kgdb-ga 2004-10-21 14:54:15.276601096 -0700 -+++ 25-akpm/drivers/serial/8250.c 2004-10-21 14:54:15.328593192 -0700 -@@ -983,7 +983,7 @@ receive_chars(struct uart_8250_port *up, +Index: linux/drivers/serial/8250.c +=================================================================== +--- linux.orig/drivers/serial/8250.c ++++ linux/drivers/serial/8250.c +@@ -880,7 +880,7 @@ receive_chars(struct uart_8250_port *up, if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { tty->flip.work.func((void *)tty); if (tty->flip.count >= TTY_FLIPBUF_SIZE) @@ -4871,7 +5060,7 @@ diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c } ch = serial_inp(up, UART_RX); *tty->flip.char_buf_ptr = ch; -@@ -1348,12 +1348,21 @@ static void serial8250_break_ctl(struct +@@ -1241,12 +1241,21 @@ static void serial8250_break_ctl(struct spin_unlock_irqrestore(&up->port.lock, flags); } @@ -4893,7 +5082,7 @@ diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c up->capabilities = uart_config[up->port.type].flags; up->mcr = 0; -@@ -1990,6 +1999,10 @@ serial8250_register_ports(struct uart_dr +@@ -1877,6 +1886,10 @@ static void __init serial8250_register_p for (i = 0; i < UART_NR; i++) { struct uart_8250_port *up = &serial8250_ports[i]; @@ -4903,10 +5092,10 @@ diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c +#endif up->port.line = i; up->port.ops = &serial8250_pops; - up->port.dev = dev; -@@ -2376,6 +2389,31 @@ void serial8250_unregister_port(int line + init_timer(&up->timer); +@@ -2160,6 +2173,31 @@ void serial8250_resume_port(int line) + uart_resume_port(&serial8250_reg, &serial8250_ports[line].port); } - EXPORT_SYMBOL(serial8250_unregister_port); +#ifdef CONFIG_KGDB +/* @@ -4936,10 +5125,11 @@ diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c static int __init serial8250_init(void) { int ret, i; -diff -puN drivers/serial/serial_core.c~kgdb-ga drivers/serial/serial_core.c ---- 25/drivers/serial/serial_core.c~kgdb-ga 2004-10-21 14:54:15.278600792 -0700 -+++ 25-akpm/drivers/serial/serial_core.c 2004-10-21 14:54:15.330592888 -0700 -@@ -1976,6 +1976,11 @@ uart_configure_port(struct uart_driver * +Index: linux/drivers/serial/serial_core.c +=================================================================== +--- linux.orig/drivers/serial/serial_core.c ++++ linux/drivers/serial/serial_core.c +@@ -1981,6 +1981,11 @@ uart_configure_port(struct uart_driver * { unsigned int flags; @@ -4951,23 +5141,10 @@ diff -puN drivers/serial/serial_core.c~kgdb-ga drivers/serial/serial_core.c /* * If there isn't a port here, don't do anything further. */ -diff -puN include/asm-i386/bugs.h~kgdb-ga include/asm-i386/bugs.h ---- 25/include/asm-i386/bugs.h~kgdb-ga 2004-10-21 14:54:15.279600640 -0700 -+++ 25-akpm/include/asm-i386/bugs.h 2004-10-21 14:54:15.331592736 -0700 -@@ -1,11 +1,11 @@ - /* - * include/asm-i386/bugs.h - * -- * Copyright (C) 1994 Linus Torvalds -+ * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), -- * -+ * - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - * +Index: linux/include/asm-i386/bugs.h +=================================================================== +--- linux.orig/include/asm-i386/bugs.h ++++ linux/include/asm-i386/bugs.h @@ -25,7 +25,20 @@ #include #include @@ -4990,18 +5167,10 @@ diff -puN include/asm-i386/bugs.h~kgdb-ga include/asm-i386/bugs.h static int __init no_halt(char *s) { boot_cpu_data.hlt_works_ok = 0; -@@ -140,7 +153,7 @@ static void __init check_popad(void) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); -- else printk( "OK.\n" ); -+ else printk( "OK.\n" ); - #endif - } - -diff -puN /dev/null include/asm-i386/kgdb.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/asm-i386/kgdb.h 2004-10-21 14:54:15.331592736 -0700 +Index: linux/include/asm-i386/kgdb.h +=================================================================== +--- linux.orig/include/asm-i386/kgdb.h ++++ linux/include/asm-i386/kgdb.h @@ -0,0 +1,59 @@ +#ifndef __KGDB +#define __KGDB @@ -5021,7 +5190,7 @@ diff -puN /dev/null include/asm-i386/kgdb.h +#define INIT_KGDB_INTS kgdb_enable_ints() + +#ifndef BREAKPOINT -+#define BREAKPOINT asm(" int $3") ++#define BREAKPOINT() asm(" int $3") +#endif +/* + * GDB debug stub (or any debug stub) can point the 'linux_debug_hook' @@ -5053,7 +5222,7 @@ diff -puN /dev/null include/asm-i386/kgdb.h +#endif +#else /* CONFIG_KGDB && ! __ASSEMBLY__ ,stubs follow... */ +#ifndef BREAKPOINT -+#define BREAKPOINT ++#define BREAKPOINT() +#endif +#define kgdb_ts(data0,data1) +#define in_kgdb @@ -5062,9 +5231,10 @@ diff -puN /dev/null include/asm-i386/kgdb.h +#define INIT_KGDB_INTS +#endif +#endif /* __KGDB */ -diff -puN /dev/null include/asm-i386/kgdb_local.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/asm-i386/kgdb_local.h 2004-10-21 14:54:15.332592584 -0700 +Index: linux/include/asm-i386/kgdb_local.h +=================================================================== +--- linux.orig/include/asm-i386/kgdb_local.h ++++ linux/include/asm-i386/kgdb_local.h @@ -0,0 +1,102 @@ +#ifndef __KGDB_LOCAL +#define ___KGDB_LOCAL @@ -5077,113 +5247,280 @@ diff -puN /dev/null include/asm-i386/kgdb_local.h +#include +#include + -+#define PORT 0x3f8 -+#ifdef CONFIG_KGDB_PORT -+#undef PORT -+#define PORT CONFIG_KGDB_PORT -+#endif -+#define IRQ 4 -+#ifdef CONFIG_KGDB_IRQ -+#undef IRQ -+#define IRQ CONFIG_KGDB_IRQ -+#endif -+#define SB_CLOCK 1843200 -+#define SB_BASE (SB_CLOCK/16) -+#define SB_BAUD9600 SB_BASE/9600 -+#define SB_BAUD192 SB_BASE/19200 -+#define SB_BAUD384 SB_BASE/38400 -+#define SB_BAUD576 SB_BASE/57600 -+#define SB_BAUD1152 SB_BASE/115200 -+#ifdef CONFIG_KGDB_9600BAUD -+#define SB_BAUD SB_BAUD9600 -+#endif -+#ifdef CONFIG_KGDB_19200BAUD -+#define SB_BAUD SB_BAUD192 -+#endif -+#ifdef CONFIG_KGDB_38400BAUD -+#define SB_BAUD SB_BAUD384 -+#endif -+#ifdef CONFIG_KGDB_57600BAUD -+#define SB_BAUD SB_BAUD576 -+#endif -+#ifdef CONFIG_KGDB_115200BAUD -+#define SB_BAUD SB_BAUD1152 -+#endif -+#ifndef SB_BAUD -+#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ -+#endif ++#define PORT 0x3f8 ++#ifdef CONFIG_KGDB_PORT ++#undef PORT ++#define PORT CONFIG_KGDB_PORT ++#endif ++#define IRQ 4 ++#ifdef CONFIG_KGDB_IRQ ++#undef IRQ ++#define IRQ CONFIG_KGDB_IRQ ++#endif ++#define SB_CLOCK 1843200 ++#define SB_BASE (SB_CLOCK/16) ++#define SB_BAUD9600 SB_BASE/9600 ++#define SB_BAUD192 SB_BASE/19200 ++#define SB_BAUD384 SB_BASE/38400 ++#define SB_BAUD576 SB_BASE/57600 ++#define SB_BAUD1152 SB_BASE/115200 ++#ifdef CONFIG_KGDB_9600BAUD ++#define SB_BAUD SB_BAUD9600 ++#endif ++#ifdef CONFIG_KGDB_19200BAUD ++#define SB_BAUD SB_BAUD192 ++#endif ++#ifdef CONFIG_KGDB_38400BAUD ++#define SB_BAUD SB_BAUD384 ++#endif ++#ifdef CONFIG_KGDB_57600BAUD ++#define SB_BAUD SB_BAUD576 ++#endif ++#ifdef CONFIG_KGDB_115200BAUD ++#define SB_BAUD SB_BAUD1152 ++#endif ++#ifndef SB_BAUD ++#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ ++#endif ++ ++#ifndef CONFIG_X86_TSC ++#undef rdtsc ++#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} ++#undef rdtscll ++#define rdtscll(s) s++ ++#endif ++ ++#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ ++#undef spin_lock ++#undef spin_trylock ++#undef spin_unlock ++#define spin_lock _raw_spin_lock ++#define spin_trylock _raw_spin_trylock ++#define spin_unlock _raw_spin_unlock ++#else ++#endif ++#undef spin_unlock_wait ++#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ ++ while(spin_is_locked(x)) ++ ++#define SB_IER 1 ++#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS ++ ++#define FLAGS 0 ++#define SB_STATE { \ ++ magic: SSTATE_MAGIC, \ ++ baud_base: SB_BASE, \ ++ port: PORT, \ ++ irq: IRQ, \ ++ flags: FLAGS, \ ++ custom_divisor:SB_BAUD} ++#define SB_INFO { \ ++ magic: SERIAL_MAGIC, \ ++ port: PORT,0,FLAGS, \ ++ state: &state, \ ++ tty: (struct tty_struct *)&state, \ ++ IER: SB_IER, \ ++ MCR: SB_MCR} ++extern void putDebugChar(int); ++/* RTAI support needs us to really stop/start interrupts */ ++ ++#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") ++#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") ++#define kgdb_local_save_flags(x) __asm__ __volatile__(\ ++ "pushfl ; popl %0":"=g" (x): /* no input */) ++#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ ++ "pushl %0 ; popfl": \ ++ /* no output */ :"g" (x):"memory", "cc") ++#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() ++ ++#ifdef CONFIG_SERIAL ++extern void shutdown_for_kgdb(struct async_struct *info); ++#endif ++#define INIT_KDEBUG putDebugChar("+"); ++#endif /* __KGDB_LOCAL */ +Index: linux/include/asm-x86_64/kgdb.h +=================================================================== +--- linux.orig/include/asm-x86_64/kgdb.h ++++ linux/include/asm-x86_64/kgdb.h +@@ -0,0 +1,23 @@ ++#ifndef __KGDB ++#define __KGDB ++ ++/* ++ * This file should not include ANY others. This makes it usable ++ * most anywhere without the fear of include order or inclusion. ++ * Make it so! ++ * ++ * This file may be included all the time. It is only active if ++ * CONFIG_KGDB is defined, otherwise it stubs out all the macros ++ * and entry points. ++ */ ++#ifndef BREAKPOINT ++#define BREAKPOINT() ++#endif ++ ++#define kgdb_ts(data0,data1) ++#define in_kgdb ++#define kgdb_handle_exception ++#define breakpoint ++#define INIT_KGDB_INTS ++ ++#endif /* __KGDB */ +Index: linux/include/linux/config.h +=================================================================== +--- linux.orig/include/linux/config.h ++++ linux/include/linux/config.h +@@ -2,6 +2,9 @@ + #define _LINUX_CONFIG_H + + #include ++#if defined(__i386__) && !defined(IN_BOOTLOADER) ++#include ++#endif + #if !defined (__KERNEL__) && !defined(__KERNGLUE__) + #error including kernel header in userspace; use the glibc headers instead! + #endif +Index: linux/include/linux/dwarf2-lang.h +=================================================================== +--- linux.orig/include/linux/dwarf2-lang.h ++++ linux/include/linux/dwarf2-lang.h +@@ -0,0 +1,132 @@ ++#ifndef DWARF2_LANG ++#define DWARF2_LANG ++#include ++ ++/* ++ * This is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License as published by the Free ++ * Software Foundation; either version 2, or (at your option) any later ++ * version. ++ */ ++/* ++ * This file defines macros that allow generation of DWARF debug records ++ * for asm files. This file is platform independent. Register numbers ++ * (which are about the only thing that is platform dependent) are to be ++ * supplied by a platform defined file. ++ */ ++#define DWARF_preamble() .section .debug_frame,"",@progbits ++/* ++ * This macro starts a debug frame section. The debug_frame describes ++ * where to find the registers that the enclosing function saved on ++ * entry. ++ * ++ * ORD is use by the label generator and should be the same as what is ++ * passed to CFI_postamble. ++ * ++ * pc, pc register gdb ordinal. ++ * ++ * code_align this is the factor used to define locations or regions ++ * where the given definitions apply. If you use labels to define these ++ * this should be 1. ++ * ++ * data_align this is the factor used to define register offsets. If ++ * you use struct offset, this should be the size of the register in ++ * bytes or the negative of that. This is how it is used: you will ++ * define a register as the reference register, say the stack pointer, ++ * then you will say where a register is located relative to this ++ * reference registers value, say 40 for register 3 (the gdb register ++ * number). The <40> will be multiplied by to define the ++ * byte offset of the given register (3, in this example). So if your ++ * <40> is the byte offset and the reference register points at the ++ * begining, you would want 1 for the data_offset. If <40> was the 40th ++ * 4-byte element in that structure you would want 4. And if your ++ * reference register points at the end of the structure you would want ++ * a negative data_align value(and you would have to do other math as ++ * well). ++ */ ++ ++#define CFI_preamble(ORD, pc, code_align, data_align) \ ++.section .debug_frame,"",@progbits ; \ ++frame/**/_/**/ORD: \ ++ .long end/**/_/**/ORD-start/**/_/**/ORD; \ ++start/**/_/**/ORD: \ ++ .long DW_CIE_ID; \ ++ .byte DW_CIE_VERSION; \ ++ .byte 0 ; \ ++ .uleb128 code_align; \ ++ .sleb128 data_align; \ ++ .byte pc; ++ ++/* ++ * After the above macro and prior to the CFI_postamble, you need to ++ * define the initial state. This starts with defining the reference ++ * register and, usually the pc. Here are some helper macros: ++ */ ++ ++#define CFA_define_reference(reg, offset) \ ++ .byte DW_CFA_def_cfa; \ ++ .uleb128 reg; \ ++ .uleb128 (offset); ++ ++#define CFA_define_offset(reg, offset) \ ++ .byte (DW_CFA_offset + reg); \ ++ .uleb128 (offset); ++ ++#define CFI_postamble(ORD) \ ++ .align 4; \ ++end/**/_/**/ORD: ++/* ++ * So now your code pushs stuff on the stack, you need a new location ++ * and the rules for what to do. This starts a running description of ++ * the call frame. You need to describe what changes with respect to ++ * the call registers as the location of the pc moves through the code. ++ * The following builds an FDE (fram descriptor entry?). Like the ++ * above, it has a preamble and a postamble. It also is tied to the CFI ++ * above. ++ * The first entry after the preamble must be the location in the code ++ * that the call frame is being described for. ++ */ ++#define FDE_preamble(ORD, fde_no, initial_address, length) \ ++ .long FDE_end/**/_/**/fde_no-FDE_start/**/_/**/fde_no; \ ++FDE_start/**/_/**/fde_no: \ ++ .long frame/**/_/**/ORD; \ ++ .long initial_address; \ ++ .long length; ++ ++#define FDE_postamble(fde_no) \ ++ .align 4; \ ++FDE_end/**/_/**/fde_no: ++/* ++ * That done, you can now add registers, subtract registers, move the ++ * reference and even change the reference. You can also define a new ++ * area of code the info applies to. For discontinuous bits you should ++ * start a new FDE. You may have as many as you like. ++ */ ++ ++/* ++ * To advance the address by ++ */ + -+#ifndef CONFIG_X86_TSC -+#undef rdtsc -+#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} -+#undef rdtscll -+#define rdtscll(s) s++ -+#endif ++#define FDE_advance(bytes) \ ++ .byte DW_CFA_advance_loc4 \ ++ .long bytes + -+#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ -+#undef spin_lock -+#undef spin_trylock -+#undef spin_unlock -+#define spin_lock _raw_spin_lock -+#define spin_trylock _raw_spin_trylock -+#define spin_unlock _raw_spin_unlock -+#else -+#endif -+#undef spin_unlock_wait -+#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ -+ while(spin_is_locked(x)) + -+#define SB_IER 1 -+#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS + -+#define FLAGS 0 -+#define SB_STATE { \ -+ magic: SSTATE_MAGIC, \ -+ baud_base: SB_BASE, \ -+ port: PORT, \ -+ irq: IRQ, \ -+ flags: FLAGS, \ -+ custom_divisor:SB_BAUD} -+#define SB_INFO { \ -+ magic: SERIAL_MAGIC, \ -+ port: PORT,0,FLAGS, \ -+ state: &state, \ -+ tty: (struct tty_struct *)&state, \ -+ IER: SB_IER, \ -+ MCR: SB_MCR} -+extern void putDebugChar(int); -+/* RTAI support needs us to really stop/start interrupts */ ++/* ++ * With the above you can define all the register locations. But ++ * suppose the reference register moves... Takes the new offset NOT an ++ * increment. This is how esp is tracked if it is not saved. ++ */ + -+#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") -+#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") -+#define kgdb_local_save_flags(x) __asm__ __volatile__(\ -+ "pushfl ; popl %0":"=g" (x): /* no input */) -+#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ -+ "pushl %0 ; popfl": \ -+ /* no output */ :"g" (x):"memory", "cc") -+#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() ++#define CFA_define_cfa_offset(offset) \ ++ .byte $DW_CFA_def_cfa_offset; \ ++ .uleb128 (offset); ++/* ++ * Or suppose you want to use a different reference register... ++ */ ++#define CFA_define_cfa_register(reg) \ ++ .byte DW_CFA_def_cfa_register; \ ++ .uleb128 reg; + -+#ifdef CONFIG_SERIAL -+extern void shutdown_for_kgdb(struct async_struct *info); -+#endif -+#define INIT_KDEBUG putDebugChar("+"); -+#endif /* __KGDB_LOCAL */ -diff -puN include/linux/config.h~kgdb-ga include/linux/config.h ---- 25/include/linux/config.h~kgdb-ga 2004-10-21 14:54:15.281600336 -0700 -+++ 25-akpm/include/linux/config.h 2004-10-21 14:54:15.332592584 -0700 -@@ -2,6 +2,9 @@ - #define _LINUX_CONFIG_H - - #include -+#if defined(__i386__) && !defined(IN_BOOTLOADER) -+#include +#endif - #if !defined (__KERNEL__) && !defined(__KERNGLUE__) - #error including kernel header in userspace; use the glibc headers instead! - #endif -diff -puN /dev/null include/linux/dwarf2.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/linux/dwarf2.h 2004-10-21 14:54:15.336591976 -0700 +Index: linux/include/linux/dwarf2.h +=================================================================== +--- linux.orig/include/linux/dwarf2.h ++++ linux/include/linux/dwarf2.h @@ -0,0 +1,738 @@ +/* Declarations and definitions of codes relating to the DWARF2 symbolic + debugging information format. @@ -5884,185 +6221,50 @@ diff -puN /dev/null include/linux/dwarf2.h + DW_LANG_Mips_Assembler = 0x8001 COMMA + /* UPC. */ + DW_LANG_Upc = 0x8765 -+IF_NOT_ASM(};) -+ -+#define DW_LANG_lo_user 0x8000 /* Implementation-defined range start. */ -+#define DW_LANG_hi_user 0xffff /* Implementation-defined range start. */ -+ -+/* Names and codes for macro information. */ -+ENUM(dwarf_macinfo_record_type) -+ -+ DW_MACINFO_define = 1 COMMA -+ DW_MACINFO_undef = 2 COMMA -+ DW_MACINFO_start_file = 3 COMMA -+ DW_MACINFO_end_file = 4 COMMA -+ DW_MACINFO_vendor_ext = 255 -+IF_NOT_ASM(};) -+ -+/* @@@ For use with GNU frame unwind information. */ -+ -+#define DW_EH_PE_absptr 0x00 -+#define DW_EH_PE_omit 0xff -+ -+#define DW_EH_PE_uleb128 0x01 -+#define DW_EH_PE_udata2 0x02 -+#define DW_EH_PE_udata4 0x03 -+#define DW_EH_PE_udata8 0x04 -+#define DW_EH_PE_sleb128 0x09 -+#define DW_EH_PE_sdata2 0x0A -+#define DW_EH_PE_sdata4 0x0B -+#define DW_EH_PE_sdata8 0x0C -+#define DW_EH_PE_signed 0x08 -+ -+#define DW_EH_PE_pcrel 0x10 -+#define DW_EH_PE_textrel 0x20 -+#define DW_EH_PE_datarel 0x30 -+#define DW_EH_PE_funcrel 0x40 -+#define DW_EH_PE_aligned 0x50 -+ -+#define DW_EH_PE_indirect 0x80 -+ -+#endif /* _ELF_DWARF2_H */ -diff -puN /dev/null include/linux/dwarf2-lang.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/linux/dwarf2-lang.h 2004-10-21 14:54:15.337591824 -0700 -@@ -0,0 +1,132 @@ -+#ifndef DWARF2_LANG -+#define DWARF2_LANG -+#include -+ -+/* -+ * This is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License as published by the Free -+ * Software Foundation; either version 2, or (at your option) any later -+ * version. -+ */ -+/* -+ * This file defines macros that allow generation of DWARF debug records -+ * for asm files. This file is platform independent. Register numbers -+ * (which are about the only thing that is platform dependent) are to be -+ * supplied by a platform defined file. -+ */ -+#define DWARF_preamble() .section .debug_frame,"",@progbits -+/* -+ * This macro starts a debug frame section. The debug_frame describes -+ * where to find the registers that the enclosing function saved on -+ * entry. -+ * -+ * ORD is use by the label generator and should be the same as what is -+ * passed to CFI_postamble. -+ * -+ * pc, pc register gdb ordinal. -+ * -+ * code_align this is the factor used to define locations or regions -+ * where the given definitions apply. If you use labels to define these -+ * this should be 1. -+ * -+ * data_align this is the factor used to define register offsets. If -+ * you use struct offset, this should be the size of the register in -+ * bytes or the negative of that. This is how it is used: you will -+ * define a register as the reference register, say the stack pointer, -+ * then you will say where a register is located relative to this -+ * reference registers value, say 40 for register 3 (the gdb register -+ * number). The <40> will be multiplied by to define the -+ * byte offset of the given register (3, in this example). So if your -+ * <40> is the byte offset and the reference register points at the -+ * begining, you would want 1 for the data_offset. If <40> was the 40th -+ * 4-byte element in that structure you would want 4. And if your -+ * reference register points at the end of the structure you would want -+ * a negative data_align value(and you would have to do other math as -+ * well). -+ */ -+ -+#define CFI_preamble(ORD, pc, code_align, data_align) \ -+.section .debug_frame,"",@progbits ; \ -+frame/**/_/**/ORD: \ -+ .long end/**/_/**/ORD-start/**/_/**/ORD; \ -+start/**/_/**/ORD: \ -+ .long DW_CIE_ID; \ -+ .byte DW_CIE_VERSION; \ -+ .byte 0 ; \ -+ .uleb128 code_align; \ -+ .sleb128 data_align; \ -+ .byte pc; -+ -+/* -+ * After the above macro and prior to the CFI_postamble, you need to -+ * define the initial state. This starts with defining the reference -+ * register and, usually the pc. Here are some helper macros: -+ */ -+ -+#define CFA_define_reference(reg, offset) \ -+ .byte DW_CFA_def_cfa; \ -+ .uleb128 reg; \ -+ .uleb128 (offset); -+ -+#define CFA_define_offset(reg, offset) \ -+ .byte (DW_CFA_offset + reg); \ -+ .uleb128 (offset); -+ -+#define CFI_postamble(ORD) \ -+ .align 4; \ -+end/**/_/**/ORD: -+/* -+ * So now your code pushs stuff on the stack, you need a new location -+ * and the rules for what to do. This starts a running description of -+ * the call frame. You need to describe what changes with respect to -+ * the call registers as the location of the pc moves through the code. -+ * The following builds an FDE (fram descriptor entry?). Like the -+ * above, it has a preamble and a postamble. It also is tied to the CFI -+ * above. -+ * The first entry after the preamble must be the location in the code -+ * that the call frame is being described for. -+ */ -+#define FDE_preamble(ORD, fde_no, initial_address, length) \ -+ .long FDE_end/**/_/**/fde_no-FDE_start/**/_/**/fde_no; \ -+FDE_start/**/_/**/fde_no: \ -+ .long frame/**/_/**/ORD; \ -+ .long initial_address; \ -+ .long length; ++IF_NOT_ASM(};) + -+#define FDE_postamble(fde_no) \ -+ .align 4; \ -+FDE_end/**/_/**/fde_no: -+/* -+ * That done, you can now add registers, subtract registers, move the -+ * reference and even change the reference. You can also define a new -+ * area of code the info applies to. For discontinuous bits you should -+ * start a new FDE. You may have as many as you like. -+ */ ++#define DW_LANG_lo_user 0x8000 /* Implementation-defined range start. */ ++#define DW_LANG_hi_user 0xffff /* Implementation-defined range start. */ + -+/* -+ * To advance the address by -+ */ ++/* Names and codes for macro information. */ ++ENUM(dwarf_macinfo_record_type) + -+#define FDE_advance(bytes) \ -+ .byte DW_CFA_advance_loc4 \ -+ .long bytes ++ DW_MACINFO_define = 1 COMMA ++ DW_MACINFO_undef = 2 COMMA ++ DW_MACINFO_start_file = 3 COMMA ++ DW_MACINFO_end_file = 4 COMMA ++ DW_MACINFO_vendor_ext = 255 ++IF_NOT_ASM(};) ++ ++/* @@@ For use with GNU frame unwind information. */ + ++#define DW_EH_PE_absptr 0x00 ++#define DW_EH_PE_omit 0xff + ++#define DW_EH_PE_uleb128 0x01 ++#define DW_EH_PE_udata2 0x02 ++#define DW_EH_PE_udata4 0x03 ++#define DW_EH_PE_udata8 0x04 ++#define DW_EH_PE_sleb128 0x09 ++#define DW_EH_PE_sdata2 0x0A ++#define DW_EH_PE_sdata4 0x0B ++#define DW_EH_PE_sdata8 0x0C ++#define DW_EH_PE_signed 0x08 + -+/* -+ * With the above you can define all the register locations. But -+ * suppose the reference register moves... Takes the new offset NOT an -+ * increment. This is how esp is tracked if it is not saved. -+ */ ++#define DW_EH_PE_pcrel 0x10 ++#define DW_EH_PE_textrel 0x20 ++#define DW_EH_PE_datarel 0x30 ++#define DW_EH_PE_funcrel 0x40 ++#define DW_EH_PE_aligned 0x50 + -+#define CFA_define_cfa_offset(offset) \ -+ .byte $DW_CFA_def_cfa_offset; \ -+ .uleb128 (offset); -+/* -+ * Or suppose you want to use a different reference register... -+ */ -+#define CFA_define_cfa_register(reg) \ -+ .byte DW_CFA_def_cfa_register; \ -+ .uleb128 reg; ++#define DW_EH_PE_indirect 0x80 + -+#endif -diff -puN include/linux/serial_core.h~kgdb-ga include/linux/serial_core.h ---- 25/include/linux/serial_core.h~kgdb-ga 2004-10-21 14:54:15.282600184 -0700 -+++ 25-akpm/include/linux/serial_core.h 2004-10-21 14:54:15.338591672 -0700 -@@ -172,7 +172,9 @@ struct uart_port { ++#endif /* _ELF_DWARF2_H */ +Index: linux/include/linux/serial_core.h +=================================================================== +--- linux.orig/include/linux/serial_core.h ++++ linux/include/linux/serial_core.h +@@ -174,7 +174,9 @@ struct uart_port { unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char iotype; /* io access style */ @@ -6073,9 +6275,10 @@ diff -puN include/linux/serial_core.h~kgdb-ga include/linux/serial_core.h #define UPIO_PORT (0) #define UPIO_HUB6 (1) #define UPIO_MEM (2) -diff -puN include/linux/spinlock.h~kgdb-ga include/linux/spinlock.h ---- 25/include/linux/spinlock.h~kgdb-ga 2004-10-21 14:54:15.284599880 -0700 -+++ 25-akpm/include/linux/spinlock.h 2004-10-21 14:54:15.338591672 -0700 +Index: linux/include/linux/spinlock.h +=================================================================== +--- linux.orig/include/linux/spinlock.h ++++ linux/include/linux/spinlock.h @@ -15,6 +15,12 @@ #include /* for cpu relax */ @@ -6123,10 +6326,11 @@ diff -puN include/linux/spinlock.h~kgdb-ga include/linux/spinlock.h 1; \ }) -diff -puN kernel/pid.c~kgdb-ga kernel/pid.c ---- 25/kernel/pid.c~kgdb-ga 2004-10-21 14:54:15.285599728 -0700 -+++ 25-akpm/kernel/pid.c 2004-10-21 14:54:15.339591520 -0700 -@@ -252,6 +252,9 @@ void switch_exec_pids(task_t *leader, ta +Index: linux/kernel/pid.c +=================================================================== +--- linux.orig/kernel/pid.c ++++ linux/kernel/pid.c +@@ -276,6 +276,9 @@ int pid_alive(struct task_struct *p) * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or * more. */ @@ -6136,7 +6340,7 @@ diff -puN kernel/pid.c~kgdb-ga kernel/pid.c void __init pidhash_init(void) { int i, j, pidhash_size; -@@ -273,6 +276,9 @@ void __init pidhash_init(void) +@@ -297,6 +300,9 @@ void __init pidhash_init(void) for (j = 0; j < pidhash_size; j++) INIT_HLIST_HEAD(&pid_hash[i][j]); } @@ -6146,10 +6350,11 @@ diff -puN kernel/pid.c~kgdb-ga kernel/pid.c } void __init pidmap_init(void) -diff -puN kernel/sched.c~kgdb-ga kernel/sched.c ---- 25/kernel/sched.c~kgdb-ga 2004-10-21 14:54:15.287599424 -0700 -+++ 25-akpm/kernel/sched.c 2004-10-21 14:54:15.342591064 -0700 -@@ -2931,6 +2931,13 @@ out_unlock: +Index: linux/kernel/sched.c +=================================================================== +--- linux.orig/kernel/sched.c ++++ linux/kernel/sched.c +@@ -3190,6 +3190,13 @@ out_unlock: EXPORT_SYMBOL(set_user_nice); @@ -6163,209 +6368,3 @@ diff -puN kernel/sched.c~kgdb-ga kernel/sched.c #ifdef __ARCH_WANT_SYS_NICE /* -diff -puN MAINTAINERS~kgdb-ga MAINTAINERS ---- 25/MAINTAINERS~kgdb-ga 2004-10-21 14:54:15.288599272 -0700 -+++ 25-akpm/MAINTAINERS 2004-10-21 14:54:15.344590760 -0700 -@@ -1242,6 +1242,12 @@ W: http://sf.net/projects/kernel-janitor - W: http://developer.osdl.org/rddunlap/kj-patches/ - S: Maintained - -+KGDB FOR I386 PLATFORM -+P: George Anzinger -+M: george@mvista.com -+L: linux-net@vger.kernel.org -+S: Supported -+ - KERNEL NFSD - P: Neil Brown - M: neilb@cse.unsw.edu.au -diff -puN arch/i386/Kconfig.debug~kgdb-ga arch/i386/Kconfig.debug ---- 25/arch/i386/Kconfig.debug~kgdb-ga 2004-10-21 14:54:15.290598968 -0700 -+++ 25-akpm/arch/i386/Kconfig.debug 2004-10-21 14:54:15.344590760 -0700 -@@ -65,4 +65,6 @@ config X86_MPPARSE - depends on X86_LOCAL_APIC && !X86_VISWS - default y - -+source "arch/i386/Kconfig.kgdb" -+ - endmenu -diff -puN /dev/null arch/i386/Kconfig.kgdb ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/arch/i386/Kconfig.kgdb 2004-10-21 14:54:15.345590608 -0700 -@@ -0,0 +1,175 @@ -+config KGDB -+ bool "Include kgdb kernel debugger" -+ depends on DEBUG_KERNEL -+ help -+ If you say Y here, the system will be compiled with the debug -+ option (-g) and a debugging stub will be included in the -+ kernel. This stub communicates with gdb on another (host) -+ computer via a serial port. The host computer should have -+ access to the kernel binary file (vmlinux) and a serial port -+ that is connected to the target machine. Gdb can be made to -+ configure the serial port or you can use stty and setserial to -+ do this. See the 'target' command in gdb. This option also -+ configures in the ability to request a breakpoint early in the -+ boot process. To request the breakpoint just include 'kgdb' -+ as a boot option when booting the target machine. The system -+ will then break as soon as it looks at the boot options. This -+ option also installs a breakpoint in panic and sends any -+ kernel faults to the debugger. For more information see the -+ Documentation/i386/kgdb/kgdb.txt file. -+ -+choice -+ depends on KGDB -+ prompt "Debug serial port BAUD" -+ default KGDB_115200BAUD -+ help -+ Gdb and the kernel stub need to agree on the baud rate to be -+ used. Some systems (x86 family at this writing) allow this to -+ be configured. -+ -+config KGDB_9600BAUD -+ bool "9600" -+ -+config KGDB_19200BAUD -+ bool "19200" -+ -+config KGDB_38400BAUD -+ bool "38400" -+ -+config KGDB_57600BAUD -+ bool "57600" -+ -+config KGDB_115200BAUD -+ bool "115200" -+endchoice -+ -+config KGDB_PORT -+ hex "hex I/O port address of the debug serial port" -+ depends on KGDB -+ default 3f8 -+ help -+ Some systems (x86 family at this writing) allow the port -+ address to be configured. The number entered is assumed to be -+ hex, don't put 0x in front of it. The standard address are: -+ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx -+ will tell you what you have. It is good to test the serial -+ connection with a live system before trying to debug. -+ -+config KGDB_IRQ -+ int "IRQ of the debug serial port" -+ depends on KGDB -+ default 4 -+ help -+ This is the irq for the debug port. If everything is working -+ correctly and the kernel has interrupts on a control C to the -+ port should cause a break into the kernel debug stub. -+ -+config DEBUG_INFO -+ bool -+ depends on KGDB -+ default y -+ -+config KGDB_MORE -+ bool "Add any additional compile options" -+ depends on KGDB -+ default n -+ help -+ Saying yes here turns on the ability to enter additional -+ compile options. -+ -+ -+config KGDB_OPTIONS -+ depends on KGDB_MORE -+ string "Additional compile arguments" -+ default "-O1" -+ help -+ This option allows you enter additional compile options for -+ the whole kernel compile. Each platform will have a default -+ that seems right for it. For example on PPC "-ggdb -O1", and -+ for i386 "-O1". Note that by configuring KGDB "-g" is already -+ turned on. In addition, on i386 platforms -+ "-fomit-frame-pointer" is deleted from the standard compile -+ options. -+ -+config NO_KGDB_CPUS -+ int "Number of CPUs" -+ depends on KGDB && SMP -+ default NR_CPUS -+ help -+ -+ This option sets the number of cpus for kgdb ONLY. It is used -+ to prune some internal structures so they look "nice" when -+ displayed with gdb. This is to overcome possibly larger -+ numbers that may have been entered above. Enter the real -+ number to get nice clean kgdb_info displays. -+ -+config KGDB_TS -+ bool "Enable kgdb time stamp macros?" -+ depends on KGDB -+ default n -+ help -+ Kgdb event macros allow you to instrument your code with calls -+ to the kgdb event recording function. The event log may be -+ examined with gdb at a break point. Turning on this -+ capability also allows you to choose how many events to -+ keep. Kgdb always keeps the lastest events. -+ -+choice -+ depends on KGDB_TS -+ prompt "Max number of time stamps to save?" -+ default KGDB_TS_128 -+ -+config KGDB_TS_64 -+ bool "64" -+ -+config KGDB_TS_128 -+ bool "128" -+ -+config KGDB_TS_256 -+ bool "256" -+ -+config KGDB_TS_512 -+ bool "512" -+ -+config KGDB_TS_1024 -+ bool "1024" -+ -+endchoice -+ -+config STACK_OVERFLOW_TEST -+ bool "Turn on kernel stack overflow testing?" -+ depends on KGDB -+ default n -+ help -+ This option enables code in the front line interrupt handlers -+ to check for kernel stack overflow on interrupts and system -+ calls. This is part of the kgdb code on x86 systems. -+ -+config KGDB_CONSOLE -+ bool "Enable serial console thru kgdb port" -+ depends on KGDB -+ default n -+ help -+ This option enables the command line "console=kgdb" option. -+ When the system is booted with this option in the command line -+ all kernel printk output is sent to gdb (as well as to other -+ consoles). For this to work gdb must be connected. For this -+ reason, this command line option will generate a breakpoint if -+ gdb has not yet connected. After the gdb continue command is -+ given all pent up console output will be printed by gdb on the -+ host machine. Neither this option, nor KGDB require the -+ serial driver to be configured. -+ -+config KGDB_SYSRQ -+ bool "Turn on SysRq 'G' command to do a break?" -+ depends on KGDB -+ default y -+ help -+ This option includes an option in the SysRq code that allows -+ you to enter SysRq G which generates a breakpoint to the KGDB -+ stub. This will work if the keyboard is alive and can -+ interrupt the system. Because of constraints on when the -+ serial port interrupt can be enabled, this code may allow you -+ to interrupt the system before the serial port control C is -+ available. Just say yes here. -+ -_ diff --git a/lustre/kernel_patches/patches/3.5G-address-space-2.4.22-vanilla.patch b/lustre/kernel_patches/patches/3.5G-address-space-2.4.22-vanilla.patch deleted file mode 100644 index 0276131..0000000 --- a/lustre/kernel_patches/patches/3.5G-address-space-2.4.22-vanilla.patch +++ /dev/null @@ -1,352 +0,0 @@ - Rules.make | 17 +++++++ - arch/i386/Makefile | 4 + - arch/i386/config.in | 10 ++++ - arch/i386/vmlinux.lds | 99 ----------------------------------------- - arch/i386/vmlinux.lds.S | 99 +++++++++++++++++++++++++++++++++++++++++ - include/asm-i386/page.h | 4 + - include/asm-i386/page_offset.h | 10 ++++ - include/asm-i386/processor.h | 4 + - mm/memory.c | 23 +++++++-- - 9 files changed, 165 insertions(+), 105 deletions(-) - -Index: linux-2.4.22-vanilla/arch/i386/config.in -=================================================================== ---- linux-2.4.22-vanilla.orig/arch/i386/config.in 2003-12-02 23:55:28.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/i386/config.in 2003-12-15 23:09:28.000000000 +0300 -@@ -212,6 +212,16 @@ - fi - if [ "$CONFIG_HIGHMEM64G" = "y" ]; then - define_bool CONFIG_X86_PAE y -+ choice 'User address space size' \ -+ "3GB CONFIG_1GB \ -+ 2GB CONFIG_2GB \ -+ 1GB CONFIG_3GB" 3GB -+else -+ choice 'User address space size' \ -+ "3GB CONFIG_1GB \ -+ 2GB CONFIG_2GB \ -+ 1GB CONFIG_3GB \ -+ 3.5GB CONFIG_05GB" 3GB - fi - - if [ "$CONFIG_HIGHMEM" = "y" ]; then -Index: linux-2.4.22-vanilla/arch/i386/Makefile -=================================================================== ---- linux-2.4.22-vanilla.orig/arch/i386/Makefile 2003-11-03 22:50:58.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/i386/Makefile 2003-12-15 23:09:28.000000000 +0300 -@@ -114,6 +114,9 @@ - - MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot - -+arch/i386/vmlinux.lds: arch/i386/vmlinux.lds.S FORCE -+ $(CPP) -C -P -I$(HPATH) -imacros $(HPATH)/asm-i386/page_offset.h -Ui386 arch/i386/vmlinux.lds.S >arch/i386/vmlinux.lds -+ - vmlinux: arch/i386/vmlinux.lds - - FORCE: ; -@@ -150,6 +153,7 @@ - @$(MAKEBOOT) clean - - archmrproper: -+ rm -f arch/i386/vmlinux.lds - - archdep: - @$(MAKEBOOT) dep -Index: linux-2.4.22-vanilla/arch/i386/vmlinux.lds.S -=================================================================== ---- linux-2.4.22-vanilla.orig/arch/i386/vmlinux.lds.S 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/i386/vmlinux.lds.S 2003-12-15 23:10:46.000000000 +0300 -@@ -0,0 +1,83 @@ -+/* ld script to make i386 Linux kernel -+ * Written by Martin Mares ; -+ */ -+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -+OUTPUT_ARCH(i386) -+ENTRY(_start) -+SECTIONS -+{ -+ . = PAGE_OFFSET_RAW + 0x100000; -+ _text = .; /* Text and read-only data */ -+ .text : { -+ *(.text) -+ *(.fixup) -+ *(.gnu.warning) -+ } = 0x9090 -+ -+ _etext = .; /* End of text section */ -+ -+ .rodata : { *(.rodata) *(.rodata.*) } -+ .kstrtab : { *(.kstrtab) } -+ -+ . = ALIGN(16); /* Exception table */ -+ __start___ex_table = .; -+ __ex_table : { *(__ex_table) } -+ __stop___ex_table = .; -+ -+ __start___ksymtab = .; /* Kernel symbol table */ -+ __ksymtab : { *(__ksymtab) } -+ __stop___ksymtab = .; -+ -+ .data : { /* Data */ -+ *(.data) -+ CONSTRUCTORS -+ } -+ -+ _edata = .; /* End of data section */ -+ -+/* chose the biggest of the possible stack sizes here? */ -+ . = ALIGN(65536); /* init_task */ -+ .data.init_task : { *(.data.init_task) } -+ -+ . = ALIGN(4096); /* Init code and data */ -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ .data.init : { *(.data.init) } -+ . = ALIGN(16); -+ __setup_start = .; -+ .setup.init : { *(.setup.init) } -+ __setup_end = .; -+ __initcall_start = .; -+ .initcall.init : { *(.initcall.init) } -+ __initcall_end = .; -+ . = ALIGN(4096); -+ __init_end = .; -+ -+ . = ALIGN(4096); -+ .data.page_aligned : { *(.data.idt) } -+ -+ . = ALIGN(32); -+ .data.cacheline_aligned : { *(.data.cacheline_aligned) } -+ -+ __bss_start = .; /* BSS */ -+ .bss : { -+ *(.bss) -+ } -+ _end = . ; -+ -+ /* Sections to be discarded */ -+ /DISCARD/ : { -+ *(.text.exit) -+ *(.data.exit) -+ *(.exitcall.exit) -+ } -+ -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+} -Index: linux-2.4.22-vanilla/arch/i386/vmlinux.lds -=================================================================== ---- linux-2.4.22-vanilla.orig/arch/i386/vmlinux.lds 2003-12-15 23:17:09.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/i386/vmlinux.lds 2003-01-30 13:24:37.000000000 +0300 -@@ -1,83 +0,0 @@ --/* ld script to make i386 Linux kernel -- * Written by Martin Mares ; -- */ --OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") --OUTPUT_ARCH(i386) --ENTRY(_start) --SECTIONS --{ -- . = 0xC0000000 + 0x100000; -- _text = .; /* Text and read-only data */ -- .text : { -- *(.text) -- *(.fixup) -- *(.gnu.warning) -- } = 0x9090 -- -- _etext = .; /* End of text section */ -- -- .rodata : { *(.rodata) *(.rodata.*) } -- .kstrtab : { *(.kstrtab) } -- -- . = ALIGN(16); /* Exception table */ -- __start___ex_table = .; -- __ex_table : { *(__ex_table) } -- __stop___ex_table = .; -- -- __start___ksymtab = .; /* Kernel symbol table */ -- __ksymtab : { *(__ksymtab) } -- __stop___ksymtab = .; -- -- .data : { /* Data */ -- *(.data) -- CONSTRUCTORS -- } -- -- _edata = .; /* End of data section */ -- --/* chose the biggest of the possible stack sizes here? */ -- . = ALIGN(65536); /* init_task */ -- .data.init_task : { *(.data.init_task) } -- -- . = ALIGN(4096); /* Init code and data */ -- __init_begin = .; -- .text.init : { *(.text.init) } -- .data.init : { *(.data.init) } -- . = ALIGN(16); -- __setup_start = .; -- .setup.init : { *(.setup.init) } -- __setup_end = .; -- __initcall_start = .; -- .initcall.init : { *(.initcall.init) } -- __initcall_end = .; -- . = ALIGN(4096); -- __init_end = .; -- -- . = ALIGN(4096); -- .data.page_aligned : { *(.data.idt) } -- -- . = ALIGN(32); -- .data.cacheline_aligned : { *(.data.cacheline_aligned) } -- -- __bss_start = .; /* BSS */ -- .bss : { -- *(.bss) -- } -- _end = . ; -- -- /* Sections to be discarded */ -- /DISCARD/ : { -- *(.text.exit) -- *(.data.exit) -- *(.exitcall.exit) -- } -- -- /* Stabs debugging sections. */ -- .stab 0 : { *(.stab) } -- .stabstr 0 : { *(.stabstr) } -- .stab.excl 0 : { *(.stab.excl) } -- .stab.exclstr 0 : { *(.stab.exclstr) } -- .stab.index 0 : { *(.stab.index) } -- .stab.indexstr 0 : { *(.stab.indexstr) } -- .comment 0 : { *(.comment) } --} -Index: linux-2.4.22-vanilla/include/asm-i386/page.h -=================================================================== ---- linux-2.4.22-vanilla.orig/include/asm-i386/page.h 2003-11-03 23:51:46.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-i386/page.h 2003-12-15 23:09:28.000000000 +0300 -@@ -78,7 +78,9 @@ - * and CONFIG_HIGHMEM64G options in the kernel configuration. - */ - --#define __PAGE_OFFSET (0xC0000000) -+#include -+ -+#define __PAGE_OFFSET (PAGE_OFFSET_RAW) - - /* - * This much address space is reserved for vmalloc() and iomap() -Index: linux-2.4.22-vanilla/include/asm-i386/page_offset.h -=================================================================== ---- linux-2.4.22-vanilla.orig/include/asm-i386/page_offset.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-i386/page_offset.h 2003-12-15 23:09:28.000000000 +0300 -@@ -0,0 +1,10 @@ -+#include -+#ifdef CONFIG_05GB -+#define PAGE_OFFSET_RAW 0xE0000000 -+#elif defined(CONFIG_1GB) -+#define PAGE_OFFSET_RAW 0xC0000000 -+#elif defined(CONFIG_2GB) -+#define PAGE_OFFSET_RAW 0x80000000 -+#elif defined(CONFIG_3GB) -+#define PAGE_OFFSET_RAW 0x40000000 -+#endif -Index: linux-2.4.22-vanilla/include/asm-i386/processor.h -=================================================================== ---- linux-2.4.22-vanilla.orig/include/asm-i386/processor.h 2003-12-02 23:55:28.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-i386/processor.h 2003-12-15 23:09:28.000000000 +0300 -@@ -265,7 +265,11 @@ - /* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -+#ifndef CONFIG_05GB - #define TASK_UNMAPPED_BASE (TASK_SIZE / 3) -+#else -+#define TASK_UNMAPPED_BASE (TASK_SIZE / 16) -+#endif - - /* - * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. -Index: linux-2.4.22-vanilla/mm/memory.c -=================================================================== ---- linux-2.4.22-vanilla.orig/mm/memory.c 2003-05-16 05:29:15.000000000 +0400 -+++ linux-2.4.22-vanilla/mm/memory.c 2003-12-15 23:09:28.000000000 +0300 -@@ -108,8 +108,7 @@ - - static inline void free_one_pgd(pgd_t * dir) - { -- int j; -- pmd_t * pmd; -+ pmd_t * pmd, * md, * emd; - - if (pgd_none(*dir)) - return; -@@ -120,9 +119,23 @@ - } - pmd = pmd_offset(dir, 0); - pgd_clear(dir); -- for (j = 0; j < PTRS_PER_PMD ; j++) { -- prefetchw(pmd+j+(PREFETCH_STRIDE/16)); -- free_one_pmd(pmd+j); -+ -+ /* -+ * Beware if changing the loop below. It once used int j, -+ * for (j = 0; j < PTRS_PER_PMD; j++) -+ * free_one_pmd(pmd+j); -+ * but some older i386 compilers (e.g. egcs-2.91.66, gcc-2.95.3) -+ * terminated the loop with a _signed_ address comparison -+ * using "jle", when configured for HIGHMEM64GB (X86_PAE). -+ * If also configured for 3GB of kernel virtual address space, -+ * if page at physical 0x3ffff000 virtual 0x7ffff000 is used as -+ * a pmd, when that mm exits the loop goes on to free "entries" -+ * found at 0x80000000 onwards. The loop below compiles instead -+ * to be terminated by unsigned address comparison using "jb". -+ */ -+ for (md = pmd, emd = pmd + PTRS_PER_PMD; md < emd; md++) { -+ prefetchw(md+(PREFETCH_STRIDE/16)); -+ free_one_pmd(md); - } - pmd_free(pmd); - } -Index: linux-2.4.22-vanilla/Rules.make -=================================================================== ---- linux-2.4.22-vanilla.orig/Rules.make 2003-05-16 05:28:27.000000000 +0400 -+++ linux-2.4.22-vanilla/Rules.make 2003-12-15 23:09:28.000000000 +0300 -@@ -215,6 +215,7 @@ - # - # Added the SMP separator to stop module accidents between uniprocessor - # and SMP Intel boxes - AC - from bits by Michael Chastain -+# Added separator for different PAGE_OFFSET memory models - Ingo. - # - - ifdef CONFIG_SMP -@@ -223,6 +224,22 @@ - genksyms_smp_prefix := - endif - -+ifdef CONFIG_2GB -+ifdef CONFIG_SMP -+ genksyms_smp_prefix := -p smp_2gig_ -+else -+ genksyms_smp_prefix := -p 2gig_ -+endif -+endif -+ -+ifdef CONFIG_3GB -+ifdef CONFIG_SMP -+ genksyms_smp_prefix := -p smp_3gig_ -+else -+ genksyms_smp_prefix := -p 3gig_ -+endif -+endif -+ - $(MODINCL)/%.ver: %.c - @if [ ! -r $(MODINCL)/$*.stamp -o $(MODINCL)/$*.stamp -ot $< ]; then \ - echo '$(CC) $(CFLAGS) $(EXTRA_CFLAGS_nostdinc) -E -D__GENKSYMS__ $<'; \ diff --git a/lustre/kernel_patches/patches/__find_get_block_slow-scale.patch b/lustre/kernel_patches/patches/__find_get_block_slow-scale.patch new file mode 100644 index 0000000..1da92d4 --- /dev/null +++ b/lustre/kernel_patches/patches/__find_get_block_slow-scale.patch @@ -0,0 +1,43 @@ +Index: linux/fs/buffer.c +=================================================================== +--- linux.orig/fs/buffer.c ++++ linux/fs/buffer.c +@@ -509,13 +509,18 @@ __find_get_block_slow(struct block_devic + struct buffer_head *head; + struct page *page; + int all_mapped = 1; ++ int spinlock = 0; + + index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits); + page = find_get_page(bd_mapping, index); + if (!page) + goto out; + ++ if (unlikely(TestSetPageLocked(page))) { + spin_lock(&bd_mapping->private_lock); ++ spinlock = 1; ++ } ++ + if (!page_has_buffers(page)) + goto out_unlock; + head = page_buffers(page); +@@ -536,7 +541,7 @@ __find_get_block_slow(struct block_devic + * file io on the block device and getblk. It gets dealt with + * elsewhere, don't buffer_error if we had some unmapped buffers + */ +- if (all_mapped) { ++ if (unlikely(all_mapped)) { + printk("__find_get_block_slow() failed. " + "block=%llu, b_blocknr=%llu\n", + (unsigned long long)block, (unsigned long long)bh->b_blocknr); +@@ -544,7 +549,10 @@ __find_get_block_slow(struct block_devic + printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits); + } + out_unlock: ++ if (unlikely(spinlock)) + spin_unlock(&bd_mapping->private_lock); ++ else ++ unlock_page(page); + page_cache_release(page); + out: + return ret; diff --git a/lustre/kernel_patches/patches/add_page_private.patch b/lustre/kernel_patches/patches/add_page_private.patch deleted file mode 100644 index 93005b66..0000000 --- a/lustre/kernel_patches/patches/add_page_private.patch +++ /dev/null @@ -1,23 +0,0 @@ -Index: linux-ia64/include/linux/mm.h -=================================================================== ---- linux-ia64.orig/include/linux/mm.h 2004-04-27 12:39:16.000000000 -0700 -+++ linux-ia64/include/linux/mm.h 2004-04-27 12:42:10.000000000 -0700 -@@ -205,6 +205,7 @@ typedef struct page { - struct page **pprev_hash; /* Complement to *next_hash. */ - struct buffer_head * buffers; /* Buffer maps us to a disk block. */ -+ unsigned long private; - - /* - * On machines where all RAM is mapped into kernel address space, - -_ -Index: linux-ia64/mm/filemap.c -=================================================================== ---- linux-ia64.orig/mm/filemap.c 2004-04-27 12:35:18.000000000 -0700 -+++ linux-ia64/mm/filemap.c 2004-04-27 12:42:47.000000000 -0700 -@@ -676,6 +676,7 @@ static inline void __add_to_page_cache(s - page_cache_get(page); - page->index = offset; -+ page->private = 0; - add_page_to_inode_queue(mapping, page); - add_page_to_hash_queue(page, hash); diff --git a/lustre/kernel_patches/patches/atomic_add_return-sles9.patch b/lustre/kernel_patches/patches/atomic_add_return-sles9.patch new file mode 100644 index 0000000..3c2555b --- /dev/null +++ b/lustre/kernel_patches/patches/atomic_add_return-sles9.patch @@ -0,0 +1,104 @@ +Index: linux-2.6.5-7.283/include/asm-i386/atomic.h +=================================================================== +--- linux-2.6.5-7.283.orig/include/asm-i386/atomic.h ++++ linux-2.6.5-7.283/include/asm-i386/atomic.h +@@ -2,6 +2,7 @@ + #define __ARCH_I386_ATOMIC__ + + #include ++#include + + /* + * Atomic operations that C can't guarantee us. Useful for +@@ -176,6 +177,47 @@ static __inline__ int atomic_add_negativ + return c; + } + ++/** ++ * atomic_add_return - add and return ++ * @v: pointer of type atomic_t ++ * @i: integer value to add ++ * ++ * Atomically adds @i to @v and returns @i + @v ++ */ ++static __inline__ int atomic_add_return(int i, atomic_t *v) ++{ ++ int __i; ++#ifdef CONFIG_M386 ++ unsigned long flags; ++ if(unlikely(boot_cpu_data.x86==3)) ++ goto no_xadd; ++#endif ++ /* Modern 486+ processor */ ++ __i = i; ++ __asm__ __volatile__( ++ LOCK_PREFIX "xaddl %0, %1" ++ :"+r" (i), "+m" (v->counter) ++ : : "memory"); ++ return i + __i; ++ ++#ifdef CONFIG_M386 ++no_xadd: /* Legacy 386 processor */ ++ local_irq_save(flags); ++ __i = atomic_read(v); ++ atomic_set(v, i + __i); ++ local_irq_restore(flags); ++ return i + __i; ++#endif ++} ++ ++static __inline__ int atomic_sub_return(int i, atomic_t *v) ++{ ++ return atomic_add_return(-i,v); ++} ++ ++#define atomic_inc_return(v) (atomic_add_return(1,v)) ++#define atomic_dec_return(v) (atomic_sub_return(1,v)) ++ + /* These are x86-specific, used by some header files */ + #define atomic_clear_mask(mask, addr) \ + __asm__ __volatile__(LOCK "andl %0,%1" \ +Index: linux-2.6.5-7.283/include/asm-x86_64/atomic.h +=================================================================== +--- linux-2.6.5-7.283.orig/include/asm-x86_64/atomic.h ++++ linux-2.6.5-7.283/include/asm-x86_64/atomic.h +@@ -2,6 +2,7 @@ + #define __ARCH_X86_64_ATOMIC__ + + #include ++#include + + /* atomic_t should be 32 bit signed type */ + +@@ -178,6 +179,31 @@ static __inline__ int atomic_add_negativ + return c; + } + ++/** ++ * atomic_add_return - add and return ++ * @i: integer value to add ++ * @v: pointer of type atomic_t ++ * ++ * Atomically adds @i to @v and returns @i + @v ++ */ ++static __inline__ int atomic_add_return(int i, atomic_t *v) ++{ ++ int __i = i; ++ __asm__ __volatile__( ++ LOCK_PREFIX "xaddl %0, %1" ++ :"+r" (i), "+m" (v->counter) ++ : : "memory"); ++ return i + __i; ++} ++ ++static __inline__ int atomic_sub_return(int i, atomic_t *v) ++{ ++ return atomic_add_return(-i,v); ++} ++ ++#define atomic_inc_return(v) (atomic_add_return(1,v)) ++#define atomic_dec_return(v) (atomic_sub_return(1,v)) ++ + /* These are x86-specific, used by some header files */ + #define atomic_clear_mask(mask, addr) \ + __asm__ __volatile__(LOCK "andl %0,%1" \ diff --git a/lustre/kernel_patches/patches/blkdev_tunables-2.4.21-chaos.patch b/lustre/kernel_patches/patches/blkdev_tunables-2.4.21-chaos.patch deleted file mode 100644 index e863bd6..0000000 --- a/lustre/kernel_patches/patches/blkdev_tunables-2.4.21-chaos.patch +++ /dev/null @@ -1,52 +0,0 @@ -Index: linux-2.4.21/drivers/addon/qla2200/qla2x00.h -=================================================================== ---- linux-2.4.21.orig/drivers/addon/qla2200/qla2x00.h 2005-06-01 22:51:57.000000000 -0400 -+++ linux-2.4.21/drivers/addon/qla2200/qla2x00.h 2005-06-01 23:06:10.592857440 -0400 -@@ -3275,7 +3275,7 @@ - /* Kernel version specific template additions */ - - /* Number of segments 1 - 65535 */ --#define SG_SEGMENTS 32 /* Cmd entry + 6 continuations */ -+#define SG_SEGMENTS 512 /* Cmd entry + 6 continuations */ - - /* - * Scsi_Host_template (see hosts.h) -@@ -3289,7 +3289,7 @@ - * - */ - #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,8) --#define TEMPLATE_MAX_SECTORS max_sectors: 512, -+#define TEMPLATE_MAX_SECTORS max_sectors: 2048, - #else - #define TEMPLATE_MAX_SECTORS - #endif -Index: linux-2.4.21/include/linux/blkdev.h -=================================================================== ---- linux-2.4.21.orig/include/linux/blkdev.h 2005-06-01 22:51:55.000000000 -0400 -+++ linux-2.4.21/include/linux/blkdev.h 2005-06-01 23:07:26.186365480 -0400 -@@ -262,10 +262,10 @@ - - extern char * blkdev_varyio[MAX_BLKDEV]; - --#define MAX_SEGMENTS 128 -+#define MAX_SEGMENTS 256 - #define MAX_SECTORS 255 - /* General-case limit for superbh size: */ --#define MAX_SUPERBH 32768 /* must fit info ->b_size right now */ -+#define MAX_SUPERBH (1<<20) /* must fit info ->b_size right now */ - - /* Limit for superbh when we're certain it cannot be bounce-buffered: */ - #define MAX_SUPERBH_NOBOUNCE (1024*1024) /* must fit info ->b_size right now */ -Index: linux-2.4.21/mm/highmem.c -=================================================================== ---- linux-2.4.21.orig/mm/highmem.c 2005-06-01 22:51:50.000000000 -0400 -+++ linux-2.4.21/mm/highmem.c 2005-06-01 23:06:10.594857136 -0400 -@@ -474,7 +474,7 @@ - /* - * FIXME: assuming PAGE_SIZE buffer_heads - */ --#define SUPERBH_MAX_USERS (POOL_SIZE * PAGE_SIZE / MAX_SUPERBH) -+#define SUPERBH_MAX_USERS max(POOL_SIZE * PAGE_SIZE / MAX_SUPERBH, 1) - - static int superbh_users; - static DECLARE_WAIT_QUEUE_HEAD(superbh_wait); diff --git a/lustre/kernel_patches/patches/blkdev_tunables-2.6-sles10.patch b/lustre/kernel_patches/patches/blkdev_tunables-2.6-sles10.patch new file mode 100644 index 0000000..a2d7da2 --- /dev/null +++ b/lustre/kernel_patches/patches/blkdev_tunables-2.6-sles10.patch @@ -0,0 +1,13 @@ +Index: linux-2616-46014/include/scsi/scsi_host.h +=================================================================== +--- linux-2616-46014.orig/include/scsi/scsi_host.h ++++ linux-2616-46014/include/scsi/scsi_host.h +@@ -28,7 +28,7 @@ struct scsi_transport_template; + * used in one scatter-gather request. + */ + #define SG_NONE 0 +-#define SG_ALL 0xff ++#define SG_ALL 256 + + + #define DISABLE_CLUSTERING 0 diff --git a/lustre/kernel_patches/patches/compile-fixes-2.4.21-rhel.patch b/lustre/kernel_patches/patches/compile-fixes-2.4.21-rhel.patch deleted file mode 100644 index dae8a47..0000000 --- a/lustre/kernel_patches/patches/compile-fixes-2.4.21-rhel.patch +++ /dev/null @@ -1,90 +0,0 @@ -diff -X dontdiff -urp kern_oldest/drivers/i2c/i2c-ali1535.c kern_fix/drivers/i2c/i2c-ali1535.c ---- kern_oldest/drivers/i2c/i2c-ali1535.c 2006-05-01 11:55:43.000000000 -0700 -+++ kern_fix/drivers/i2c/i2c-ali1535.c 2006-05-01 15:25:34.000000000 -0700 -@@ -672,8 +672,8 @@ EXPORT_NO_SYMBOLS; - #ifdef MODULE - - MODULE_AUTHOR -- ("Frodo Looijaard , Philip Edelbrock , -- Mark D. Studebaker and Dan Eaton "); -+ ("Frodo Looijaard , Philip Edelbrock , " -+ "Mark D. Studebaker and Dan Eaton "); - MODULE_DESCRIPTION("ALI1535 SMBus driver"); - - int init_module(void) -diff -X dontdiff -urp kern_oldest/drivers/net/pcmcia/wvlan_cs.c kern_fix/drivers/net/pcmcia/wvlan_cs.c ---- kern_oldest/drivers/net/pcmcia/wvlan_cs.c 2006-05-01 11:55:40.000000000 -0700 -+++ kern_fix/drivers/net/pcmcia/wvlan_cs.c 2006-05-01 15:25:34.000000000 -0700 -@@ -1084,9 +1084,9 @@ static int wvlan_hw_config (struct net_d - /* This is a PrismII card. It is is *very* similar - * to the Lucent, and the driver work 95%, - * therefore, we attempt to support it... */ -- printk(KERN_NOTICE "%s: This is a PrismII card, not a Wavelan IEEE card :-( --You may want report firmare revision (0x%X) and what the card support. --I will try to make it work, but you should look for a better driver.\n", dev_info, firmware); -+ printk(KERN_NOTICE "%s: This is a PrismII card, not a Wavelan IEEE card :-(\n" -+"You may want report firmare revision (0x%X) and what the card support.\n" -+"I will try to make it work, but you should look for a better driver.\n", dev_info, firmware); - local->has_port3 = 1; - local->has_ibssid = 0; - local->has_mwo = 0; -diff -X dontdiff -urp kern_oldest/drivers/net/wan/sbni.c kern_fix/drivers/net/wan/sbni.c ---- kern_oldest/drivers/net/wan/sbni.c 2006-05-01 11:55:40.000000000 -0700 -+++ kern_fix/drivers/net/wan/sbni.c 2006-05-01 15:25:34.000000000 -0700 -@@ -1552,13 +1552,13 @@ __setup( "sbni=", sbni_setup ); - static u32 - calc_crc32( u32 crc, u8 *p, u32 len ) - { -- register u32 _crc __asm ( "ax" ); -+ register u32 _crc; - _crc = crc; - - __asm __volatile ( - "xorl %%ebx, %%ebx\n" -- "movl %1, %%esi\n" -- "movl %2, %%ecx\n" -+ "movl %2, %%esi\n" -+ "movl %3, %%ecx\n" - "movl $crc32tab, %%edi\n" - "shrl $2, %%ecx\n" - "jz 1f\n" -@@ -1594,7 +1594,7 @@ calc_crc32( u32 crc, u8 *p, u32 len - "jnz 0b\n" - - "1:\n" -- "movl %2, %%ecx\n" -+ "movl %3, %%ecx\n" - "andl $3, %%ecx\n" - "jz 2f\n" - -@@ -1619,9 +1619,9 @@ calc_crc32( u32 crc, u8 *p, u32 len - "xorb 2(%%esi), %%bl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - "2:\n" -- : -- : "a" (_crc), "g" (p), "g" (len) -- : "ax", "bx", "cx", "dx", "si", "di" -+ : "=a" (_crc) -+ : "0" (_crc), "g" (p), "g" (len) -+ : "bx", "cx", "dx", "si", "di" - ); - - return _crc; -diff -X dontdiff -urp kern_oldest/drivers/sensors/lm87.c kern_fix/drivers/sensors/lm87.c ---- kern_oldest/drivers/sensors/lm87.c 2006-05-01 11:55:44.000000000 -0700 -+++ kern_fix/drivers/sensors/lm87.c 2006-05-01 15:25:34.000000000 -0700 -@@ -1060,10 +1060,10 @@ MODULE_LICENSE("GPL"); - #endif - - MODULE_AUTHOR -- ("Frodo Looijaard , -- Philip Edelbrock , -- Mark Studebaker , -- and Stephen Rousset "); -+ ("Frodo Looijaard , " -+ "Philip Edelbrock , " -+ "Mark Studebaker , " -+ "and Stephen Rousset "); - - MODULE_DESCRIPTION("LM87 driver"); - diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch deleted file mode 100644 index 38bafff..0000000 --- a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch +++ /dev/null @@ -1,318 +0,0 @@ -Index: kernel-2.4.21/arch/i386/kernel/entry.S -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/entry.S 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/kernel/entry.S 2003-12-04 11:57:01.000000000 -0800 -@@ -45,6 +45,7 @@ - #include - #include - #include -+#include - - EBX = 0x00 - ECX = 0x04 -@@ -130,10 +131,6 @@ - .long 3b,6b; \ - .previous - --#define GET_CURRENT(reg) \ -- movl $-8192, reg; \ -- andl %esp, reg -- - ENTRY(lcall7) - pushfl # We get a different stack layout with call gates, - pushl %eax # which has to be cleaned up later.. -@@ -149,7 +146,7 @@ - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx -- andl $-8192,%ebx # GET_CURRENT -+ andl $-THREAD_SIZE,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x7 -@@ -173,7 +170,7 @@ - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx -- andl $-8192,%ebx # GET_CURRENT -+ andl $-THREAD_SIZE,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x27 -Index: kernel-2.4.21/arch/i386/kernel/smpboot.c -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/smpboot.c 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/kernel/smpboot.c 2003-12-04 11:57:01.000000000 -0800 -@@ -819,7 +819,7 @@ - - /* So we see what's up */ - printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); -- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); -+ stack_start.esp = (void *)idle->thread.esp; - - /* - * This grunge runs the startup process for -@@ -892,7 +892,7 @@ - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; -- if (*((volatile unsigned char *)phys_to_virt(8192)) -+ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); -@@ -915,7 +915,7 @@ - } - - /* mark "stuck" area as not stuck */ -- *((volatile unsigned long *)phys_to_virt(8192)) = 0; -+ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; - - if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { - printk("Restoring NMI vector\n"); -Index: kernel-2.4.21/arch/i386/kernel/traps.c -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/traps.c 2002-11-28 15:53:09.000000000 -0800 -+++ kernel-2.4.21/arch/i386/kernel/traps.c 2003-12-04 11:57:01.000000000 -0800 -@@ -158,7 +158,7 @@ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ -- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) - return; - show_trace((unsigned long *)esp); - } -Index: kernel-2.4.21/arch/i386/kernel/head.S -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/head.S 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/kernel/head.S 2003-12-04 11:57:01.000000000 -0800 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - - #define OLD_CL_MAGIC_ADDR 0x90020 - #define OLD_CL_MAGIC 0xA33F -@@ -320,7 +321,7 @@ - ret - - ENTRY(stack_start) -- .long SYMBOL_NAME(init_task_union)+8192 -+ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE - .long __KERNEL_DS - - /* This is the default interrupt "handler" :-) */ -Index: kernel-2.4.21/arch/i386/kernel/irq.c -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/irq.c 2002-11-28 15:53:09.000000000 -0800 -+++ kernel-2.4.21/arch/i386/kernel/irq.c 2003-12-04 11:57:01.000000000 -0800 -@@ -581,7 +581,10 @@ - long esp; - - /* Debugging check for stack overflow: is there less than 1KB free? */ -- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); -+ __asm__ __volatile__( -+ "andl %%esp,%0" -+ : "=r" (esp) : "0" (THREAD_SIZE-1)); -+ - if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { - extern void show_stack(unsigned long *); - -Index: kernel-2.4.21/arch/i386/lib/getuser.S -=================================================================== ---- kernel-2.4.21.orig/arch/i386/lib/getuser.S 1998-01-12 13:42:52.000000000 -0800 -+++ kernel-2.4.21/arch/i386/lib/getuser.S 2003-12-04 11:57:01.000000000 -0800 -@@ -21,6 +21,10 @@ - * as they get called from within inline assembly. - */ - -+/* Duplicated from asm/processor.h */ -+#include -+#include -+ - addr_limit = 12 - - .text -@@ -28,7 +32,7 @@ - .globl __get_user_1 - __get_user_1: - movl %esp,%edx -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 1: movzbl (%eax),%edx -@@ -41,7 +45,7 @@ - addl $1,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 2: movzwl -1(%eax),%edx -@@ -54,7 +58,7 @@ - addl $3,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 3: movl -3(%eax),%edx -Index: kernel-2.4.21/arch/i386/config.in -=================================================================== ---- kernel-2.4.21.orig/arch/i386/config.in 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/config.in 2003-12-04 11:57:01.000000000 -0800 -@@ -256,6 +256,29 @@ - if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y - fi -+ -+choice 'Bigger Stack Size Support' \ -+ "off CONFIG_NOBIGSTACK \ -+ 16KB CONFIG_STACK_SIZE_16KB \ -+ 32KB CONFIG_STACK_SIZE_32KB \ -+ 64KB CONFIG_STACK_SIZE_64KB" off -+ -+if [ "$CONFIG_NOBIGSTACK" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 1 -+else -+ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 2 -+ else -+ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 3 -+ else -+ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 4 -+ fi -+ fi -+ fi -+fi -+ - endmenu - - mainmenu_option next_comment -Index: kernel-2.4.21/arch/i386/vmlinux.lds -=================================================================== ---- kernel-2.4.21.orig/arch/i386/vmlinux.lds 2002-02-25 11:37:53.000000000 -0800 -+++ kernel-2.4.21/arch/i386/vmlinux.lds 2003-12-04 11:57:01.000000000 -0800 -@@ -35,7 +35,8 @@ - - _edata = .; /* End of data section */ - -- . = ALIGN(8192); /* init_task */ -+/* chose the biggest of the possible stack sizes here? */ -+ . = ALIGN(65536); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ -Index: kernel-2.4.21/include/asm-i386/current.h -=================================================================== ---- kernel-2.4.21.orig/include/asm-i386/current.h 1998-08-14 16:35:22.000000000 -0700 -+++ kernel-2.4.21/include/asm-i386/current.h 2003-12-04 11:57:01.000000000 -0800 -@@ -1,15 +1,43 @@ - #ifndef _I386_CURRENT_H - #define _I386_CURRENT_H -+#include -+ -+/* -+ * Configurable page sizes on i386, mainly for debugging purposes. -+ * (c) Balbir Singh -+ */ -+ -+#ifdef __ASSEMBLY__ -+ -+#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */ -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) -+ -+#define GET_CURRENT(reg) \ -+ movl $-THREAD_SIZE, reg; \ -+ andl %esp, reg -+ -+#else /* __ASSEMBLY__ */ -+ -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) -+#define alloc_task_struct() \ -+ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT)) -+ -+#define free_task_struct(p) \ -+ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT) -+ -+#define INIT_TASK_SIZE THREAD_SIZE - - struct task_struct; - - static inline struct task_struct * get_current(void) - { - struct task_struct *current; -- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); -+ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); - return current; - } - - #define current get_current() - -+#endif /* __ASSEMBLY__ */ -+ - #endif /* !(_I386_CURRENT_H) */ -Index: kernel-2.4.21/include/asm-i386/hw_irq.h -=================================================================== ---- kernel-2.4.21.orig/include/asm-i386/hw_irq.h 2001-11-22 11:46:18.000000000 -0800 -+++ kernel-2.4.21/include/asm-i386/hw_irq.h 2003-12-04 11:57:01.000000000 -0800 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - - /* - * IDT vectors usable for external interrupt sources start -@@ -113,10 +114,6 @@ - #define IRQ_NAME2(nr) nr##_interrupt(void) - #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - --#define GET_CURRENT \ -- "movl %esp, %ebx\n\t" \ -- "andl $-8192, %ebx\n\t" -- - /* - * SMP has a few special interrupts for IPI messages - */ -Index: kernel-2.4.21/include/asm-i386/processor.h -=================================================================== ---- kernel-2.4.21.orig/include/asm-i386/processor.h 2003-06-13 07:51:38.000000000 -0700 -+++ kernel-2.4.21/include/asm-i386/processor.h 2003-12-04 11:57:01.000000000 -0800 -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -453,9 +454,6 @@ - #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) - #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) - --#define THREAD_SIZE (2*PAGE_SIZE) --#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) --#define free_task_struct(p) free_pages((unsigned long) (p), 1) - #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) - - #define init_task (init_task_union.task) -Index: kernel-2.4.21/include/linux/sched.h -=================================================================== ---- kernel-2.4.21.orig/include/linux/sched.h 2003-06-13 15:26:52.000000000 -0700 -+++ kernel-2.4.21/include/linux/sched.h 2003-12-04 12:00:14.000000000 -0800 -@@ -2,6 +2,7 @@ - #define _LINUX_SCHED_H - - #include /* for HZ */ -+#include /* maybe for INIT_TASK_SIZE */ - - extern unsigned long event; - diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch deleted file mode 100644 index 271ee19..0000000 --- a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-chaos.patch +++ /dev/null @@ -1,468 +0,0 @@ -Index: linux-2.4.21-27.EL/include/asm-i386/hw_irq.h -=================================================================== ---- linux-2.4.21-27.EL.orig/include/asm-i386/hw_irq.h 2004-12-21 13:51:09.000000000 -0500 -+++ linux-2.4.21-27.EL/include/asm-i386/hw_irq.h 2005-01-07 10:55:45.367690072 -0500 -@@ -135,21 +135,17 @@ - " \ - /* load the real stack - keep the offset */ \ - \ -- movl $-8192, %ebx; \ -+ movl $- " STR(THREAD_SIZE) ", %ebx; \ - andl %esp, %ebx; \ - movl 36(%ebx), %edx; \ - movl %esp, %ebx; \ -- andl $0x1fff, %ebx; \ -+ andl $( " STR(THREAD_SIZE) "-1), %ebx; \ - orl %ebx, %edx; \ - movl %edx, %esp;" - - #define IRQ_NAME2(nr) nr##_interrupt(void) - #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - --#define GET_CURRENT \ -- "movl %esp, %ebx\n\t" \ -- "andl $-8192, %ebx\n\t" -- - /* - * SMP has a few special interrupts for IPI messages - */ -Index: linux-2.4.21-27.EL/include/asm-i386/processor.h -=================================================================== ---- linux-2.4.21-27.EL.orig/include/asm-i386/processor.h 2004-12-21 13:51:31.000000000 -0500 -+++ linux-2.4.21-27.EL/include/asm-i386/processor.h 2005-01-07 10:55:45.376688704 -0500 -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -490,10 +491,6 @@ - #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) - #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) - --#define THREAD_SIZE (2*PAGE_SIZE) --#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) --#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0) -- - #define init_task (init_task_union.task) - #define init_stack (init_task_union.stack) - -Index: linux-2.4.21-27.EL/include/asm-i386/current.h -=================================================================== ---- linux-2.4.21-27.EL.orig/include/asm-i386/current.h 1998-08-14 19:35:22.000000000 -0400 -+++ linux-2.4.21-27.EL/include/asm-i386/current.h 2005-01-07 10:55:45.356691744 -0500 -@@ -1,15 +1,64 @@ - #ifndef _I386_CURRENT_H - #define _I386_CURRENT_H -+#include -+ -+/* -+ * Configurable page sizes on i386, mainly for debugging purposes. -+ * (c) Balbir Singh -+ */ -+ -+/* enumerate the values, include/asm-i386/hw_irq.h in particular needs this */ -+#if (PAGE_SIZE != 4096) -+#error PAGE_SIZE != 4096 unsupported -+#endif -+ -+#if (CONFIG_STACK_SIZE_SHIFT == 0) -+#define THREAD_SIZE 4096 -+#elif (CONFIG_STACK_SIZE_SHIFT == 1) -+#define THREAD_SIZE 8192 -+#elif (CONFIG_STACK_SIZE_SHIFT == 2) -+#define THREAD_SIZE 16384 -+#elif (CONFIG_STACK_SIZE_SHIFT == 3) -+#define THREAD_SIZE 32768 -+#elif (CONFIG_STACK_SIZE_SHIFT == 4) -+#define THREAD_SIZE 65536 -+#else -+#error CONFIG_STACK_SIZE_SHIFT > 4 unsupported -+#endif -+ -+#if (CONFIG_STACK_SIZE_SHIFT != 1) && defined(CONFIG_X86_4G) -+#error Large stacks with 4G/4G split unsupported -+#endif -+ -+#ifdef __ASSEMBLY__ -+ -+#define GET_CURRENT(reg) \ -+ movl $-THREAD_SIZE, reg; \ -+ andl %esp, reg -+ -+#else /* __ASSEMBLY__ */ -+ -+#define __alloc_task_struct() \ -+ ((struct task_struct *) __get_free_pages(GFP_KERNEL, CONFIG_STACK_SIZE_SHIFT)) -+ -+#define __free_task_struct(p) do { \ -+ BUG_ON((p)->state < TASK_ZOMBIE); \ -+ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT); \ -+} while(0) -+ -+#define INIT_TASK_SIZE THREAD_SIZE - - struct task_struct; - - static inline struct task_struct * get_current(void) - { - struct task_struct *current; -- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); -+ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); - return current; - } - - #define current get_current() - -+#endif /* __ASSEMBLY__ */ -+ - #endif /* !(_I386_CURRENT_H) */ -Index: linux-2.4.21-27.EL/include/asm-x86_64/processor.h -=================================================================== ---- linux-2.4.21-27.EL.orig/include/asm-x86_64/processor.h 2004-12-21 13:51:31.000000000 -0500 -+++ linux-2.4.21-27.EL/include/asm-x86_64/processor.h 2005-01-07 10:58:24.167548824 -0500 -@@ -407,8 +407,8 @@ - /* Note: most of the infrastructure to separate stack and task_struct - are already there. When you run out of stack try this first. */ - --#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) --#define __free_task_struct(p) free_pages((unsigned long) (p), 1) -+#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL, THREAD_ORDER)) -+#define __free_task_struct(p) free_pages((unsigned long) (p), THREAD_ORDER) - - #define init_task (init_task_union.task) - #define init_stack (init_task_union.stack) -Index: linux-2.4.21-27.EL/include/asm-x86_64/page.h -=================================================================== ---- linux-2.4.21-27.EL.orig/include/asm-x86_64/page.h 2004-12-21 13:51:10.000000000 -0500 -+++ linux-2.4.21-27.EL/include/asm-x86_64/page.h 2005-01-07 10:55:45.404684448 -0500 -@@ -27,8 +27,8 @@ - /* We still hope 8K is enough, but ... */ - /* Currently it is actually ~6k. This would change when task_struct moves into - an own slab. */ --#define THREAD_ORDER 1 --#define THREAD_SIZE (2*PAGE_SIZE) -+#define THREAD_ORDER CONFIG_STACK_SIZE_SHIFT -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) - - #define INIT_TASK_SIZE THREAD_SIZE - #define CURRENT_MASK (~(THREAD_SIZE-1)) -Index: linux-2.4.21-27.EL/include/asm-x86_64/current.h -=================================================================== ---- linux-2.4.21-27.EL.orig/include/asm-x86_64/current.h 2002-11-28 18:53:15.000000000 -0500 -+++ linux-2.4.21-27.EL/include/asm-x86_64/current.h 2005-01-07 10:55:45.394685968 -0500 -@@ -5,6 +5,7 @@ - struct task_struct; - - #include -+#include - - static inline struct task_struct *get_current(void) - { -Index: linux-2.4.21-27.EL/arch/x86_64/config.in -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/x86_64/config.in 2004-12-21 13:51:30.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/x86_64/config.in 2005-01-07 10:55:45.324696608 -0500 -@@ -90,6 +90,28 @@ - define_bool CONFIG_NUMA y - fi - -+choice 'Bigger Stack Size Support' \ -+ "off CONFIG_NOBIGSTACK \ -+ 16KB CONFIG_STACK_SIZE_16KB \ -+ 32KB CONFIG_STACK_SIZE_32KB \ -+ 64KB CONFIG_STACK_SIZE_64KB" off -+ -+if [ "$CONFIG_NOBIGSTACK" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 1 -+else -+ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 2 -+ else -+ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 3 -+ else -+ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 4 -+ fi -+ fi -+ fi -+fi -+ - endmenu - - mainmenu_option next_comment -Index: linux-2.4.21-27.EL/arch/x86_64/kernel/smpboot.c -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/x86_64/kernel/smpboot.c 2004-12-21 13:51:01.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/x86_64/kernel/smpboot.c 2005-01-07 10:55:45.325696456 -0500 -@@ -753,7 +753,7 @@ - Dprintk("CPU has booted.\n"); - } else { - boot_status = 1; -- if (*((volatile unsigned char *)phys_to_virt(8192)) -+ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); -@@ -771,7 +771,7 @@ - } - - /* mark "stuck" area as not stuck */ -- *((volatile unsigned int *)phys_to_virt(8192)) = 0; -+ *((volatile unsigned int *)phys_to_virt(THREAD_SIZE)) = 0; - - return cpu; - } -Index: linux-2.4.21-27.EL/arch/x86_64/kernel/traps.c -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/x86_64/kernel/traps.c 2004-12-21 13:51:15.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/x86_64/kernel/traps.c 2005-01-07 10:55:45.326696304 -0500 -@@ -240,7 +240,7 @@ - unsigned long rsp = tsk->thread.rsp; - - /* User space on another CPU? */ -- if ((rsp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ if ((rsp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) - return; - show_trace((unsigned long *)rsp); - } -Index: linux-2.4.21-27.EL/arch/x86_64/vmlinux.lds -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/x86_64/vmlinux.lds 2003-06-13 10:51:32.000000000 -0400 -+++ linux-2.4.21-27.EL/arch/x86_64/vmlinux.lds 2005-01-07 10:55:45.327696152 -0500 -@@ -72,7 +72,8 @@ - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT (LOADADDR(.vsyscall_0) + 1024) { *(.vsyscall_1) } - . = LOADADDR(.vsyscall_0) + 4096; - -- . = ALIGN(8192); /* init_task */ -+/* chose the biggest of the possible stack sizes here? */ -+ . = ALIGN(65536); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); -Index: linux-2.4.21-27.EL/arch/i386/config.in -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/config.in 2004-12-21 13:51:31.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/config.in 2005-01-07 10:55:45.324696608 -0500 -@@ -306,6 +306,28 @@ - fi - fi - -+choice 'Bigger Stack Size Support' \ -+ "off CONFIG_NOBIGSTACK \ -+ 16KB CONFIG_STACK_SIZE_16KB \ -+ 32KB CONFIG_STACK_SIZE_32KB \ -+ 64KB CONFIG_STACK_SIZE_64KB" off -+ -+if [ "$CONFIG_NOBIGSTACK" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 1 -+else -+ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 2 -+ else -+ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 3 -+ else -+ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 4 -+ fi -+ fi -+ fi -+fi -+ - if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y - fi -Index: linux-2.4.21-27.EL/arch/i386/kernel/entry.S -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/kernel/entry.S 2004-12-21 13:51:31.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/kernel/entry.S 2005-01-07 10:55:45.271704664 -0500 -@@ -46,6 +46,7 @@ - #include - #include - #include -+#include - #include - - EBX = 0x00 -@@ -94,10 +95,6 @@ - - ENOSYS = 38 - --#define GET_CURRENT(reg) \ -- movl $-8192, reg; \ -- andl %esp, reg -- - #if CONFIG_X86_HIGH_ENTRY - - #define call_SYMBOL_NAME_ABS(X) movl $X, %ebp; call *%ebp -@@ -193,7 +190,7 @@ - GET_CURRENT(%ebx); \ - movl real_stack(%ebx), %edx; \ - movl %esp, %ebx; \ -- andl $0x1fff, %ebx; \ -+ andl $(THREAD_SIZE-1), %ebx; \ - orl %ebx, %edx; \ - movl %edx, %esp; - -@@ -228,7 +225,7 @@ - return_path_start_marker: \ - nop; \ - movl %esp, %ebx; \ -- andl $0x1fff, %ebx; \ -+ andl $(THREAD_SIZE-1), %ebx; \ - orl %ebx, %edx; \ - movl %esp, %eax; \ - movl %edx, %esp; \ -Index: linux-2.4.21-27.EL/arch/i386/kernel/irq.c -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/kernel/irq.c 2004-12-21 13:51:22.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/kernel/irq.c 2005-01-07 10:55:45.307699192 -0500 -@@ -45,6 +45,7 @@ - #include - #include - #include -+#include - - - -@@ -585,7 +586,7 @@ - long esp; - - /* Debugging check for stack overflow: is there less than 1KB free? */ -- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); -+ __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE-1)); - if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { - extern void show_stack(unsigned long *); - -Index: linux-2.4.21-27.EL/arch/i386/kernel/smpboot.c -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/kernel/smpboot.c 2004-12-21 13:51:01.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/kernel/smpboot.c 2005-01-07 10:55:45.305699496 -0500 -@@ -814,7 +814,7 @@ - - /* So we see what's up */ - printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); -- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); -+ stack_start.esp = (void *)idle->thread.esp; - - /* - * This grunge runs the startup process for -@@ -887,7 +887,7 @@ - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; -- if (*((volatile unsigned char *)phys_to_virt(8192)) -+ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); -@@ -910,7 +910,7 @@ - } - - /* mark "stuck" area as not stuck */ -- *((volatile unsigned long *)phys_to_virt(8192)) = 0; -+ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; - - if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { - printk("Restoring NMI vector\n"); -Index: linux-2.4.21-27.EL/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/kernel/traps.c 2004-12-21 13:51:15.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/kernel/traps.c 2005-01-07 10:55:45.306699344 -0500 -@@ -180,7 +180,7 @@ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ -- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) - return; - show_trace((unsigned long *)esp); - } -Index: linux-2.4.21-27.EL/arch/i386/kernel/head.S -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/kernel/head.S 2004-12-21 13:51:07.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/kernel/head.S 2005-01-07 10:55:45.307699192 -0500 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - - #define OLD_CL_MAGIC_ADDR 0x90020 - #define OLD_CL_MAGIC 0xA33F -@@ -343,7 +344,7 @@ - ret - - ENTRY(stack_start) -- .long SYMBOL_NAME(init_task_union)+8192 -+ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE - .long __KERNEL_DS - - /* This is the default interrupt "handler" :-) */ -Index: linux-2.4.21-27.EL/arch/i386/lib/getuser.S -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/lib/getuser.S 1998-01-12 16:42:52.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/lib/getuser.S 2005-01-07 10:55:45.323696760 -0500 -@@ -21,6 +21,10 @@ - * as they get called from within inline assembly. - */ - -+/* Duplicated from asm/processor.h */ -+#include -+#include -+ - addr_limit = 12 - - .text -@@ -28,7 +32,7 @@ - .globl __get_user_1 - __get_user_1: - movl %esp,%edx -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 1: movzbl (%eax),%edx -@@ -41,7 +45,7 @@ - addl $1,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 2: movzwl -1(%eax),%edx -@@ -54,7 +58,7 @@ - addl $3,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 3: movl -3(%eax),%edx -Index: linux-2.4.21-27.EL/arch/i386/vmlinux.lds.in -=================================================================== ---- linux-2.4.21-27.EL.orig/arch/i386/vmlinux.lds.in 2004-12-21 13:51:08.000000000 -0500 -+++ linux-2.4.21-27.EL/arch/i386/vmlinux.lds.in 2005-01-07 10:55:45.324696608 -0500 -@@ -1,6 +1,7 @@ - - #define __ASSEMBLY__ - #include -+#include - - /* ld script to make i386 Linux kernel - * Written by Martin Mares ; -@@ -51,7 +52,7 @@ - - _edata = .; /* End of data section */ - -- . = ALIGN(8192); /* init_task */ -+ . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : { *(.data.init_task) } - - entry_tramp_start = .; diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse-171.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse-171.patch deleted file mode 100644 index 6e708bf..0000000 --- a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse-171.patch +++ /dev/null @@ -1,317 +0,0 @@ -Index: linux-2.4.21-171/arch/i386/kernel/entry.S -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/kernel/entry.S 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/kernel/entry.S 2004-04-03 16:02:32.000000000 -0500 -@@ -45,6 +45,7 @@ - #include - #include - #include -+#include - - EBX = 0x00 - ECX = 0x04 -@@ -130,10 +131,6 @@ - .long 3b,6b; \ - .previous - --#define GET_CURRENT(reg) \ -- movl $-8192, reg; \ -- andl %esp, reg -- - ENTRY(lcall7) - pushfl # We get a different stack layout with call gates, - pushl %eax # which has to be cleaned up later.. -@@ -149,7 +146,7 @@ - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx -- andl $-8192,%ebx # GET_CURRENT -+ andl $-THREAD_SIZE,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x7 -@@ -173,7 +170,7 @@ - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx -- andl $-8192,%ebx # GET_CURRENT -+ andl $-THREAD_SIZE,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x27 -Index: linux-2.4.21-171/arch/i386/kernel/smpboot.c -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/kernel/smpboot.c 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/kernel/smpboot.c 2004-04-03 16:02:32.000000000 -0500 -@@ -833,7 +833,7 @@ - - /* So we see what's up */ - printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); -- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); -+ stack_start.esp = (void *)idle->thread.esp; - - /* - * This grunge runs the startup process for -@@ -914,7 +914,7 @@ - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; -- if (*((volatile unsigned char *)phys_to_virt(8192)) -+ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); -@@ -937,7 +937,7 @@ - } - - /* mark "stuck" area as not stuck */ -- *((volatile unsigned long *)phys_to_virt(8192)) = 0; -+ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; - - #ifdef CONFIG_ES7000 - if (!es7000_plat) -Index: linux-2.4.21-171/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/kernel/traps.c 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/kernel/traps.c 2004-04-03 16:02:32.000000000 -0500 -@@ -193,7 +193,7 @@ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ -- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) - return; - show_trace((unsigned long *)esp); - } -Index: linux-2.4.21-171/arch/i386/kernel/head.S -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/kernel/head.S 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/kernel/head.S 2004-04-03 16:02:32.000000000 -0500 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - - #define OLD_CL_MAGIC_ADDR 0x90020 - #define OLD_CL_MAGIC 0xA33F -@@ -326,7 +327,7 @@ - ret - - ENTRY(stack_start) -- .long SYMBOL_NAME(init_task_union)+8192 -+ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE - .long __KERNEL_DS - - /* This is the default interrupt "handler" :-) */ -Index: linux-2.4.21-171/arch/i386/kernel/irq.c -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/kernel/irq.c 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/kernel/irq.c 2004-04-03 16:05:17.000000000 -0500 -@@ -597,7 +597,9 @@ - long esp; - - /* Debugging check for stack overflow: is there less than 1KB free? */ -- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); -+ __asm__ __volatile__( -+ "andl %%esp,%0" -+ : "=r" (esp) : "0" (THREAD_SIZE-1)); - if (unlikely(esp < (sizeof(struct task_struct) + sysctl_stackwarn))) { - static unsigned long next_jiffies; /* ratelimiting */ - static long least_esp = THREAD_SIZE; -Index: linux-2.4.21-171/arch/i386/lib/getuser.S -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/lib/getuser.S 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/lib/getuser.S 2004-04-03 16:02:32.000000000 -0500 -@@ -21,6 +21,10 @@ - * as they get called from within inline assembly. - */ - -+/* Duplicated from asm/processor.h */ -+#include -+#include -+ - addr_limit = 12 - - .text -@@ -28,7 +32,7 @@ - .globl __get_user_1 - __get_user_1: - movl %esp,%edx -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 1: movzbl (%eax),%edx -@@ -41,7 +45,7 @@ - addl $1,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 2: movzwl -1(%eax),%edx -@@ -54,7 +58,7 @@ - addl $3,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 3: movl -3(%eax),%edx -Index: linux-2.4.21-171/arch/i386/config.in -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/config.in 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/config.in 2004-04-03 16:02:32.000000000 -0500 -@@ -325,6 +325,29 @@ - if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y - fi -+ -+choice 'Bigger Stack Size Support' \ -+ "off CONFIG_NOBIGSTACK \ -+ 16KB CONFIG_STACK_SIZE_16KB \ -+ 32KB CONFIG_STACK_SIZE_32KB \ -+ 64KB CONFIG_STACK_SIZE_64KB" off -+ -+if [ "$CONFIG_NOBIGSTACK" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 1 -+else -+ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 2 -+ else -+ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 3 -+ else -+ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 4 -+ fi -+ fi -+ fi -+fi -+ - endmenu - - mainmenu_option next_comment -Index: linux-2.4.21-171/arch/i386/vmlinux.lds.S -=================================================================== ---- linux-2.4.21-171.orig/arch/i386/vmlinux.lds.S 2004-02-24 13:43:40.000000000 -0500 -+++ linux-2.4.21-171/arch/i386/vmlinux.lds.S 2004-04-03 16:02:32.000000000 -0500 -@@ -39,7 +39,8 @@ - - _edata = .; /* End of data section */ - -- . = ALIGN(8192); /* init_task */ -+/* chose the biggest of the possible stack sizes here? */ -+ . = ALIGN(65536); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ -Index: linux-2.4.21-171/include/asm-i386/current.h -=================================================================== ---- linux-2.4.21-171.orig/include/asm-i386/current.h 2004-02-24 13:42:29.000000000 -0500 -+++ linux-2.4.21-171/include/asm-i386/current.h 2004-04-03 16:02:32.000000000 -0500 -@@ -1,15 +1,43 @@ - #ifndef _I386_CURRENT_H - #define _I386_CURRENT_H -+#include -+ -+/* -+ * Configurable page sizes on i386, mainly for debugging purposes. -+ * (c) Balbir Singh -+ */ -+ -+#ifdef __ASSEMBLY__ -+ -+#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */ -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) -+ -+#define GET_CURRENT(reg) \ -+ movl $-THREAD_SIZE, reg; \ -+ andl %esp, reg -+ -+#else /* __ASSEMBLY__ */ -+ -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) -+#define alloc_task_struct() \ -+ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT)) -+ -+#define free_task_struct(p) \ -+ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT) -+ -+#define INIT_TASK_SIZE THREAD_SIZE - - struct task_struct; - - static inline struct task_struct * get_current(void) - { - struct task_struct *current; -- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); -+ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); - return current; - } - - #define current get_current() - -+#endif /* __ASSEMBLY__ */ -+ - #endif /* !(_I386_CURRENT_H) */ -Index: linux-2.4.21-171/include/asm-i386/hw_irq.h -=================================================================== ---- linux-2.4.21-171.orig/include/asm-i386/hw_irq.h 2004-02-24 13:42:29.000000000 -0500 -+++ linux-2.4.21-171/include/asm-i386/hw_irq.h 2004-04-03 16:02:32.000000000 -0500 -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - - /* - * IDT vectors usable for external interrupt sources start -@@ -116,10 +117,6 @@ - #define IRQ_NAME2(nr) nr##_interrupt(void) - #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - --#define GET_CURRENT \ -- "movl %esp, %ebx\n\t" \ -- "andl $-8192, %ebx\n\t" -- - /* - * SMP has a few special interrupts for IPI messages - */ -Index: linux-2.4.21-171/include/asm-i386/processor.h -=================================================================== ---- linux-2.4.21-171.orig/include/asm-i386/processor.h 2004-02-24 13:42:29.000000000 -0500 -+++ linux-2.4.21-171/include/asm-i386/processor.h 2004-04-03 16:02:32.000000000 -0500 -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -464,9 +465,6 @@ - #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) - #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) - --#define THREAD_SIZE (2*PAGE_SIZE) --#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) --#define free_task_struct(p) free_pages((unsigned long) (p), 1) - #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) - - #define init_task (init_task_union.task) -Index: linux-2.4.21-171/include/linux/sched.h -=================================================================== ---- linux-2.4.21-171.orig/include/linux/sched.h 2004-03-31 14:58:26.000000000 -0500 -+++ linux-2.4.21-171/include/linux/sched.h 2004-04-03 16:02:32.000000000 -0500 -@@ -2,6 +2,7 @@ - #define _LINUX_SCHED_H - - #include /* for HZ */ -+#include /* maybe for INIT_TASK_SIZE */ - - extern unsigned long event; - diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse2.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse2.patch deleted file mode 100644 index dc68b3e..0000000 --- a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.21-suse2.patch +++ /dev/null @@ -1,318 +0,0 @@ -Index: kernel-2.4.21/arch/i386/kernel/entry.S -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/entry.S 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/kernel/entry.S 2003-12-04 11:57:01.000000000 -0800 -@@ -45,6 +45,7 @@ - #include - #include - #include -+#include - - EBX = 0x00 - ECX = 0x04 -@@ -130,10 +131,6 @@ - .long 3b,6b; \ - .previous - --#define GET_CURRENT(reg) \ -- movl $-8192, reg; \ -- andl %esp, reg -- - ENTRY(lcall7) - pushfl # We get a different stack layout with call gates, - pushl %eax # which has to be cleaned up later.. -@@ -149,7 +146,7 @@ - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx -- andl $-8192,%ebx # GET_CURRENT -+ andl $-THREAD_SIZE,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x7 -@@ -173,7 +170,7 @@ - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx -- andl $-8192,%ebx # GET_CURRENT -+ andl $-THREAD_SIZE,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x27 -Index: kernel-2.4.21/arch/i386/kernel/smpboot.c -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/smpboot.c 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/kernel/smpboot.c 2003-12-04 11:57:01.000000000 -0800 -@@ -819,7 +819,7 @@ - - /* So we see what's up */ - printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); -- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); -+ stack_start.esp = (void *)idle->thread.esp; - - /* - * This grunge runs the startup process for -@@ -892,7 +892,7 @@ - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; -- if (*((volatile unsigned char *)phys_to_virt(8192)) -+ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); -@@ -915,7 +915,7 @@ - } - - /* mark "stuck" area as not stuck */ -- *((volatile unsigned long *)phys_to_virt(8192)) = 0; -+ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; - - if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { - printk("Restoring NMI vector\n"); -Index: kernel-2.4.21/arch/i386/kernel/traps.c -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/traps.c 2002-11-28 15:53:09.000000000 -0800 -+++ kernel-2.4.21/arch/i386/kernel/traps.c 2003-12-04 11:57:01.000000000 -0800 -@@ -158,7 +158,7 @@ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ -- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) - return; - show_trace((unsigned long *)esp); - } -Index: kernel-2.4.21/arch/i386/kernel/head.S -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/head.S 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/kernel/head.S 2003-12-04 11:57:01.000000000 -0800 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - - #define OLD_CL_MAGIC_ADDR 0x90020 - #define OLD_CL_MAGIC 0xA33F -@@ -320,7 +321,7 @@ - ret - - ENTRY(stack_start) -- .long SYMBOL_NAME(init_task_union)+8192 -+ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE - .long __KERNEL_DS - - /* This is the default interrupt "handler" :-) */ -Index: kernel-2.4.21/arch/i386/kernel/irq.c -=================================================================== ---- kernel-2.4.21.orig/arch/i386/kernel/irq.c 2002-11-28 15:53:09.000000000 -0800 -+++ kernel-2.4.21/arch/i386/kernel/irq.c 2003-12-04 11:57:01.000000000 -0800 -@@ -581,7 +581,10 @@ - long esp; - - /* Debugging check for stack overflow: is there less than 1KB free? */ -- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); -+ __asm__ __volatile__( -+ "andl %%esp,%0" -+ : "=r" (esp) : "0" (THREAD_SIZE-1)); -+ - if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { - extern void show_stack(unsigned long *); - -Index: kernel-2.4.21/arch/i386/lib/getuser.S -=================================================================== ---- kernel-2.4.21.orig/arch/i386/lib/getuser.S 1998-01-12 13:42:52.000000000 -0800 -+++ kernel-2.4.21/arch/i386/lib/getuser.S 2003-12-04 11:57:01.000000000 -0800 -@@ -21,6 +21,10 @@ - * as they get called from within inline assembly. - */ - -+/* Duplicated from asm/processor.h */ -+#include -+#include -+ - addr_limit = 12 - - .text -@@ -28,7 +32,7 @@ - .globl __get_user_1 - __get_user_1: - movl %esp,%edx -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 1: movzbl (%eax),%edx -@@ -41,7 +45,7 @@ - addl $1,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 2: movzwl -1(%eax),%edx -@@ -54,7 +58,7 @@ - addl $3,%eax - movl %esp,%edx - jc bad_get_user -- andl $0xffffe000,%edx -+ andl $~(THREAD_SIZE - 1),%edx - cmpl addr_limit(%edx),%eax - jae bad_get_user - 3: movl -3(%eax),%edx -Index: kernel-2.4.21/arch/i386/config.in -=================================================================== ---- kernel-2.4.21.orig/arch/i386/config.in 2003-06-13 07:51:29.000000000 -0700 -+++ kernel-2.4.21/arch/i386/config.in 2003-12-04 11:57:01.000000000 -0800 -@@ -256,6 +256,29 @@ - if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y - fi -+ -+choice 'Bigger Stack Size Support' \ -+ "off CONFIG_NOBIGSTACK \ -+ 16KB CONFIG_STACK_SIZE_16KB \ -+ 32KB CONFIG_STACK_SIZE_32KB \ -+ 64KB CONFIG_STACK_SIZE_64KB" off -+ -+if [ "$CONFIG_NOBIGSTACK" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 1 -+else -+ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 2 -+ else -+ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 3 -+ else -+ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 4 -+ fi -+ fi -+ fi -+fi -+ - endmenu - - mainmenu_option next_comment -Index: kernel-2.4.21/arch/i386/vmlinux.lds -=================================================================== ---- kernel-2.4.21.orig/arch/i386/vmlinux.lds.S 2002-02-25 11:37:53.000000000 -0800 -+++ kernel-2.4.21/arch/i386/vmlinux.lds.S 2003-12-04 11:57:01.000000000 -0800 -@@ -35,7 +35,8 @@ - - _edata = .; /* End of data section */ - -- . = ALIGN(8192); /* init_task */ -+/* chose the biggest of the possible stack sizes here? */ -+ . = ALIGN(65536); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ -Index: kernel-2.4.21/include/asm-i386/current.h -=================================================================== ---- kernel-2.4.21.orig/include/asm-i386/current.h 1998-08-14 16:35:22.000000000 -0700 -+++ kernel-2.4.21/include/asm-i386/current.h 2003-12-04 11:57:01.000000000 -0800 -@@ -1,15 +1,43 @@ - #ifndef _I386_CURRENT_H - #define _I386_CURRENT_H -+#include -+ -+/* -+ * Configurable page sizes on i386, mainly for debugging purposes. -+ * (c) Balbir Singh -+ */ -+ -+#ifdef __ASSEMBLY__ -+ -+#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */ -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) -+ -+#define GET_CURRENT(reg) \ -+ movl $-THREAD_SIZE, reg; \ -+ andl %esp, reg -+ -+#else /* __ASSEMBLY__ */ -+ -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) -+#define alloc_task_struct() \ -+ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT)) -+ -+#define free_task_struct(p) \ -+ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT) -+ -+#define INIT_TASK_SIZE THREAD_SIZE - - struct task_struct; - - static inline struct task_struct * get_current(void) - { - struct task_struct *current; -- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); -+ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); - return current; - } - - #define current get_current() - -+#endif /* __ASSEMBLY__ */ -+ - #endif /* !(_I386_CURRENT_H) */ -Index: kernel-2.4.21/include/asm-i386/hw_irq.h -=================================================================== ---- kernel-2.4.21.orig/include/asm-i386/hw_irq.h 2001-11-22 11:46:18.000000000 -0800 -+++ kernel-2.4.21/include/asm-i386/hw_irq.h 2003-12-04 11:57:01.000000000 -0800 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - - /* - * IDT vectors usable for external interrupt sources start -@@ -113,10 +114,6 @@ - #define IRQ_NAME2(nr) nr##_interrupt(void) - #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - --#define GET_CURRENT \ -- "movl %esp, %ebx\n\t" \ -- "andl $-8192, %ebx\n\t" -- - /* - * SMP has a few special interrupts for IPI messages - */ -Index: kernel-2.4.21/include/asm-i386/processor.h -=================================================================== ---- kernel-2.4.21.orig/include/asm-i386/processor.h 2003-06-13 07:51:38.000000000 -0700 -+++ kernel-2.4.21/include/asm-i386/processor.h 2003-12-04 11:57:01.000000000 -0800 -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -453,9 +454,6 @@ - #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) - #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) - --#define THREAD_SIZE (2*PAGE_SIZE) --#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) --#define free_task_struct(p) free_pages((unsigned long) (p), 1) - #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) - - #define init_task (init_task_union.task) -Index: kernel-2.4.21/include/linux/sched.h -=================================================================== ---- kernel-2.4.21.orig/include/linux/sched.h 2003-06-13 15:26:52.000000000 -0700 -+++ kernel-2.4.21/include/linux/sched.h 2003-12-04 12:00:14.000000000 -0800 -@@ -2,6 +2,7 @@ - #define _LINUX_SCHED_H - - #include /* for HZ */ -+#include /* maybe for INIT_TASK_SIZE */ - - extern unsigned long event; - diff --git a/lustre/kernel_patches/patches/configurable-x86_64-2.4.21.patch b/lustre/kernel_patches/patches/configurable-x86_64-2.4.21.patch deleted file mode 100644 index 757ee71..0000000 --- a/lustre/kernel_patches/patches/configurable-x86_64-2.4.21.patch +++ /dev/null @@ -1,122 +0,0 @@ -Index: linux-2.4.21-drop2/arch/x86_64/kernel/smpboot.c -=================================================================== ---- linux-2.4.21-drop2.orig/arch/x86_64/kernel/smpboot.c 2003-10-28 10:34:20.000000000 -0800 -+++ linux-2.4.21-drop2/arch/x86_64/kernel/smpboot.c 2004-08-06 06:18:39.000000000 -0700 -@@ -751,7 +751,7 @@ - Dprintk("CPU has booted.\n"); - } else { - boot_status = 1; -- if (*((volatile unsigned char *)phys_to_virt(8192)) -+ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); -@@ -770,7 +770,7 @@ - } - - /* mark "stuck" area as not stuck */ -- *((volatile unsigned int *)phys_to_virt(8192)) = 0; -+ *((volatile unsigned int *)phys_to_virt(THREAD_SIZE)) = 0; - - return cpu; - } -Index: linux-2.4.21-drop2/arch/x86_64/kernel/traps.c -=================================================================== ---- linux-2.4.21-drop2.orig/arch/x86_64/kernel/traps.c 2003-11-06 15:52:41.000000000 -0800 -+++ linux-2.4.21-drop2/arch/x86_64/kernel/traps.c 2004-08-06 06:18:39.000000000 -0700 -@@ -239,7 +239,7 @@ - unsigned long rsp = tsk->thread.rsp; - - /* User space on another CPU? */ -- if ((rsp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ if ((rsp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) - return; - show_trace((unsigned long *)rsp); - } -Index: linux-2.4.21-drop2/arch/x86_64/config.in -=================================================================== ---- linux-2.4.21-drop2.orig/arch/x86_64/config.in 2003-10-28 10:34:25.000000000 -0800 -+++ linux-2.4.21-drop2/arch/x86_64/config.in 2004-08-06 06:20:20.000000000 -0700 -@@ -91,6 +91,28 @@ - define_bool CONFIG_NUMA y - fi - -+choice 'Bigger Stack Size Support' \ -+ "off CONFIG_NOBIGSTACK \ -+ 16KB CONFIG_STACK_SIZE_16KB \ -+ 32KB CONFIG_STACK_SIZE_32KB \ -+ 64KB CONFIG_STACK_SIZE_64KB" off -+ -+if [ "$CONFIG_NOBIGSTACK" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 1 -+else -+ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 2 -+ else -+ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 3 -+ else -+ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then -+ define_int CONFIG_STACK_SIZE_SHIFT 4 -+ fi -+ fi -+ fi -+fi -+ - endmenu - - mainmenu_option next_comment -Index: linux-2.4.21-drop2/arch/x86_64/vmlinux.lds -=================================================================== ---- linux-2.4.21-drop2.orig/arch/x86_64/vmlinux.lds 2003-06-13 07:51:32.000000000 -0700 -+++ linux-2.4.21-drop2/arch/x86_64/vmlinux.lds 2004-08-06 06:18:39.000000000 -0700 -@@ -72,7 +72,8 @@ - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT (LOADADDR(.vsyscall_0) + 1024) { *(.vsyscall_1) } - . = LOADADDR(.vsyscall_0) + 4096; - -- . = ALIGN(8192); /* init_task */ -+/* chose the biggest of the possible stack sizes here? */ -+ . = ALIGN(65536); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); -Index: linux-2.4.21-drop2/include/asm-x86_64/current.h -=================================================================== ---- linux-2.4.21-drop2.orig/include/asm-x86_64/current.h 2003-11-10 16:44:28.000000000 -0800 -+++ linux-2.4.21-drop2/include/asm-x86_64/current.h 2004-08-06 06:24:33.000000000 -0700 -@@ -5,6 +5,7 @@ - struct task_struct; - - #include -+#include - - static inline struct task_struct *get_current(void) - { -Index: linux-2.4.21-drop2/include/asm-x86_64/page.h -=================================================================== ---- linux-2.4.21-drop2.orig/include/asm-x86_64/page.h 2003-10-28 10:34:00.000000000 -0800 -+++ linux-2.4.21-drop2/include/asm-x86_64/page.h 2004-08-06 06:24:33.000000000 -0700 -@@ -27,8 +27,8 @@ - /* We still hope 8K is enough, but ... */ - /* Currently it is actually ~6k. This would change when task_struct moves into - an own slab. */ --#define THREAD_ORDER 1 --#define THREAD_SIZE (2*PAGE_SIZE) -+#define THREAD_ORDER CONFIG_STACK_SIZE_SHIFT -+#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) - - #define INIT_TASK_SIZE THREAD_SIZE - #define CURRENT_MASK (~(THREAD_SIZE-1)) -Index: linux-2.4.21-drop2/include/asm-x86_64/processor.h -=================================================================== ---- linux-2.4.21-drop2.orig/include/asm-x86_64/processor.h 2003-11-10 16:44:28.000000000 -0800 -+++ linux-2.4.21-drop2/include/asm-x86_64/processor.h 2004-08-06 06:24:33.000000000 -0700 -@@ -385,7 +385,7 @@ - are already there. When you run out of stack try this first. */ - #define alloc_task_struct() \ - ((struct task_struct *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)) --#define free_task_struct(p) free_pages((unsigned long) (p), 1) -+#define free_task_struct(p) free_pages((unsigned long) (p), THREAD_ORDER) - #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) - - #define init_task (init_task_union.task) diff --git a/lustre/kernel_patches/patches/dcache_refcount_debug.patch b/lustre/kernel_patches/patches/dcache_refcount_debug.patch deleted file mode 100644 index 0eddc23..0000000 --- a/lustre/kernel_patches/patches/dcache_refcount_debug.patch +++ /dev/null @@ -1,24 +0,0 @@ ---- ./fs/dcache.c.orig 2004-01-30 14:54:45.000000000 -0700 -+++ ./fs/dcache.c 2004-02-20 14:49:18.000000000 -0700 -@@ -348,8 +348,20 @@ - dentry_stat.nr_unused--; - - /* Unused dentry with a count? */ -- if (atomic_read(&dentry->d_count)) -+ if (atomic_read(&dentry->d_count)) { -+ struct inode *inode = dentry->d_inode; -+ printk(KERN_CRIT "dentry %*s %p->%lu/%u(%p/%s) ct %d\n", -+ dentry->d_name.len, dentry->d_name.name, dentry, -+ inode ? inode->i_ino : 0, -+ inode ? inode->i_generation : 0, inode, -+ inode ? inode->i_sb ? -+ inode->i_sb->s_type->name : "" : "", -+ atomic_read(&dentry->d_count)); -+ spin_unlock(&dcache_lock); -+ set_task_state(current, TASK_UNINTERRUPTIBLE); -+ schedule(); - BUG(); -+ } - - prune_one_dentry(dentry); - if (!--count) diff --git a/lustre/kernel_patches/patches/debugging-fields-in-current.patch b/lustre/kernel_patches/patches/debugging-fields-in-current.patch new file mode 100644 index 0000000..5bbba37 --- /dev/null +++ b/lustre/kernel_patches/patches/debugging-fields-in-current.patch @@ -0,0 +1,14 @@ +Index: linux/include/linux/sched.h +=================================================================== +--- linux.orig/include/linux/sched.h ++++ linux/include/linux/sched.h +@@ -650,6 +650,9 @@ struct task_struct { + #endif + /* where this task blocked */ + struct sleep_info sinfo; ++ ++ __u64 debugging0; ++ __u64 debugging1; + }; + + static inline pid_t process_group(struct task_struct *tsk) diff --git a/lustre/kernel_patches/patches/dev_read_only-2.6-fc5.patch b/lustre/kernel_patches/patches/dev_read_only-2.6-fc5.patch index 808f0b7..5cab63e 100644 --- a/lustre/kernel_patches/patches/dev_read_only-2.6-fc5.patch +++ b/lustre/kernel_patches/patches/dev_read_only-2.6-fc5.patch @@ -1,20 +1,7 @@ -Index: linux-2.6.16.i686/fs/block_dev.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/block_dev.c 2006-03-20 13:53:29.000000000 +0800 -+++ linux-2.6.16.i686/fs/block_dev.c 2006-05-30 21:15:22.000000000 +0800 -@@ -747,6 +747,7 @@ - } - unlock_kernel(); - up(&bdev->bd_sem); -+ dev_clear_rdonly(bdev); - bdput(bdev); - return ret; - } -Index: linux-2.6.16.i686/block/ll_rw_blk.c -=================================================================== ---- linux-2.6.16.i686.orig/block/ll_rw_blk.c 2006-05-30 15:47:03.000000000 +0800 -+++ linux-2.6.16.i686/block/ll_rw_blk.c 2006-05-30 21:18:49.000000000 +0800 -@@ -2940,6 +2940,8 @@ +diff -rup linux-2.6.16.i686.orig/block/ll_rw_blk.c linux-2.6.16.i686/block/ll_rw_blk.c +--- linux-2.6.16.i686.orig/block/ll_rw_blk.c 2007-05-29 15:24:36.000000000 +0300 ++++ linux-2.6.16.i686/block/ll_rw_blk.c 2007-05-29 15:33:50.000000000 +0300 +@@ -2940,6 +2940,8 @@ static void handle_bad_sector(struct bio set_bit(BIO_EOF, &bio->bi_flags); } @@ -23,7 +10,7 @@ Index: linux-2.6.16.i686/block/ll_rw_blk.c /** * generic_make_request: hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. -@@ -3020,6 +3022,12 @@ +@@ -3020,6 +3022,12 @@ end_io: if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) goto end_io; @@ -36,7 +23,7 @@ Index: linux-2.6.16.i686/block/ll_rw_blk.c /* * If this device has partitions, remap block n -@@ -3593,6 +3601,91 @@ +@@ -3593,6 +3601,91 @@ void swap_io_context(struct io_context * *ioc2 = temp; } EXPORT_SYMBOL(swap_io_context); @@ -128,11 +115,21 @@ Index: linux-2.6.16.i686/block/ll_rw_blk.c /* * sysfs parts below -Index: linux-2.6.16.i686/include/linux/fs.h -=================================================================== ---- linux-2.6.16.i686.orig/include/linux/fs.h 2006-05-30 21:12:27.000000000 +0800 -+++ linux-2.6.16.i686/include/linux/fs.h 2006-05-30 21:15:22.000000000 +0800 -@@ -1558,6 +1558,10 @@ +diff -rup linux-2.6.16.i686.orig/fs/block_dev.c linux-2.6.16.i686/fs/block_dev.c +--- linux-2.6.16.i686.orig/fs/block_dev.c 2006-03-20 07:53:29.000000000 +0200 ++++ linux-2.6.16.i686/fs/block_dev.c 2007-05-29 15:35:00.000000000 +0300 +@@ -60,6 +60,7 @@ static void kill_bdev(struct block_devic + { + invalidate_bdev(bdev, 1); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); ++ dev_clear_rdonly(bdev); + } + + int set_blocksize(struct block_device *bdev, int size) +diff -rup linux-2.6.16.i686.orig/include/linux/fs.h linux-2.6.16.i686/include/linux/fs.h +--- linux-2.6.16.i686.orig/include/linux/fs.h 2007-05-29 15:24:38.000000000 +0300 ++++ linux-2.6.16.i686/include/linux/fs.h 2007-05-29 15:33:50.000000000 +0300 +@@ -1541,6 +1541,10 @@ extern void file_kill(struct file *f); struct bio; extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/lustre/kernel_patches/patches/dev_read_only-2.6-suse.patch b/lustre/kernel_patches/patches/dev_read_only-2.6-suse.patch index d5a5ac4..e486944 100644 --- a/lustre/kernel_patches/patches/dev_read_only-2.6-suse.patch +++ b/lustre/kernel_patches/patches/dev_read_only-2.6-suse.patch @@ -1,16 +1,17 @@ -diff -ur linux-2.6.5-lnxi.orig/drivers/block/ll_rw_blk.c linux-2.6.5-lnxi/drivers/block/ll_rw_blk.c ---- linux-2.6.5-lnxi.orig/drivers/block/ll_rw_blk.c 2005-04-11 10:16:14.278505679 -0700 -+++ linux-2.6.5-lnxi/drivers/block/ll_rw_blk.c 2005-04-11 09:42:22.750936924 -0700 -@@ -2458,6 +2458,8 @@ static inline void blk_partition_remap(s - } +Index: linux-2.6.9/drivers/block/ll_rw_blk.c +=================================================================== +--- linux-2.6.9.orig/drivers/block/ll_rw_blk.c ++++ linux-2.6.9/drivers/block/ll_rw_blk.c +@@ -2326,6 +2326,8 @@ static inline int attempt_front_merge(re + return 0; } +int dev_check_rdonly(struct block_device *bdev); + /** - * generic_make_request: hand a buffer to its device driver for I/O - * @bio: The bio describing the location in memory and on the device. -@@ -2546,6 +2548,13 @@ end_io: + * blk_attempt_remerge - attempt to remerge active head with next request + * @q: The &request_queue_t belonging to the device +@@ -2631,6 +2633,13 @@ end_io: if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) goto end_io; @@ -24,8 +25,8 @@ diff -ur linux-2.6.5-lnxi.orig/drivers/block/ll_rw_blk.c linux-2.6.5-lnxi/driver /* * If this device has partitions, remap block n * of partition p to block n+start(p) of the disk. -@@ -3078,6 +3087,92 @@ void swap_io_context(struct io_context * - } +@@ -3180,6 +3189,92 @@ void swap_io_context(struct io_context * + /* + * Debug code for turning block devices "read-only" (will discard writes @@ -117,23 +118,25 @@ diff -ur linux-2.6.5-lnxi.orig/drivers/block/ll_rw_blk.c linux-2.6.5-lnxi/driver * sysfs parts below */ struct queue_sysfs_entry { -diff -ur linux-2.6.5-lnxi.orig/fs/block_dev.c linux-2.6.5-lnxi/fs/block_dev.c ---- linux-2.6.5-lnxi.orig/fs/block_dev.c 2004-11-11 07:28:30.000000000 -0800 -+++ linux-2.6.5-lnxi/fs/block_dev.c 2005-04-11 09:49:01.891407856 -0700 -@@ -739,6 +739,7 @@ int blkdev_put(struct block_device *bdev - } - unlock_kernel(); - up(&bdev->bd_sem); +Index: linux-2.6.9/fs/block_dev.c +=================================================================== +--- linux-2.6.9.orig/fs/block_dev.c ++++ linux-2.6.9/fs/block_dev.c +@@ -60,6 +60,7 @@ static void kill_bdev(struct block_devic + { + invalidate_bdev(bdev, 1); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); + dev_clear_rdonly(bdev); - bdput(bdev); - return ret; - } -diff -ur linux-2.6.5-lnxi.orig/include/linux/fs.h linux-2.6.5-lnxi/include/linux/fs.h ---- linux-2.6.5-lnxi.orig/include/linux/fs.h 2004-11-11 07:28:45.000000000 -0800 -+++ linux-2.6.5-lnxi/include/linux/fs.h 2005-04-11 09:43:27.423116140 -0700 -@@ -1385,6 +1385,10 @@ extern void file_kill(struct file *f); + } + + int set_blocksize(struct block_device *bdev, int size) +Index: linux-2.6.9/include/linux/fs.h +=================================================================== +--- linux-2.6.9.orig/include/linux/fs.h ++++ linux-2.6.9/include/linux/fs.h +@@ -1492,6 +1492,10 @@ extern void file_kill(struct file *f); struct bio; - extern int submit_bio(int, struct bio *); + extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); +#define HAVE_CLEAR_RDONLY_ON_PUT +void dev_set_rdonly(struct block_device *bdev); diff --git a/lustre/kernel_patches/patches/dev_read_only-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/dev_read_only-2.6.18-vanilla.patch index 253efb8..ff6cf91 100644 --- a/lustre/kernel_patches/patches/dev_read_only-2.6.18-vanilla.patch +++ b/lustre/kernel_patches/patches/dev_read_only-2.6.18-vanilla.patch @@ -1,19 +1,6 @@ -Index: linux-2.6/fs/block_dev.c -=================================================================== ---- linux-2.6.orig/fs/block_dev.c 2006-07-06 23:41:48.000000000 +0800 -+++ linux-2.6/fs/block_dev.c 2006-07-15 16:20:25.000000000 +0800 -@@ -1118,6 +1118,7 @@ static int __blkdev_put(struct block_dev - } - unlock_kernel(); - mutex_unlock(&bdev->bd_mutex); -+ dev_clear_rdonly(bdev); - bdput(bdev); - return ret; - } -Index: linux-2.6/block/ll_rw_blk.c -=================================================================== ---- linux-2.6.orig/block/ll_rw_blk.c 2006-07-10 22:30:08.000000000 +0800 -+++ linux-2.6/block/ll_rw_blk.c 2006-07-15 16:15:14.000000000 +0800 +diff -urp linux-2.6.18.1.orig/block/ll_rw_blk.c linux-2.6.18.1/block/ll_rw_blk.c +--- linux-2.6.18.1.orig/block/ll_rw_blk.c 2006-10-14 06:34:03.000000000 +0300 ++++ linux-2.6.18.1/block/ll_rw_blk.c 2007-05-29 14:50:46.000000000 +0300 @@ -2993,6 +2993,8 @@ static void handle_bad_sector(struct bio set_bit(BIO_EOF, &bio->bi_flags); } @@ -36,7 +23,7 @@ Index: linux-2.6/block/ll_rw_blk.c /* * If this device has partitions, remap block n -@@ -3673,6 +3681,91 @@ void swap_io_context(struct io_context * +@@ -3675,6 +3683,91 @@ void swap_io_context(struct io_context * *ioc2 = temp; } EXPORT_SYMBOL(swap_io_context); @@ -128,11 +115,21 @@ Index: linux-2.6/block/ll_rw_blk.c /* * sysfs parts below -Index: linux-2.6/include/linux/fs.h -=================================================================== ---- linux-2.6.orig/include/linux/fs.h 2006-07-15 16:14:58.000000000 +0800 -+++ linux-2.6/include/linux/fs.h 2006-07-15 16:15:14.000000000 +0800 -@@ -1648,6 +1648,10 @@ extern void file_kill(struct file *f); +diff -urp linux-2.6.18.1.orig/fs/block_dev.c linux-2.6.18.1/fs/block_dev.c +--- linux-2.6.18.1.orig/fs/block_dev.c 2006-10-14 06:34:03.000000000 +0300 ++++ linux-2.6.18.1/fs/block_dev.c 2007-05-29 14:53:38.000000000 +0300 +@@ -58,6 +58,7 @@ static void kill_bdev(struct block_devic + { + invalidate_bdev(bdev, 1); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); ++ dev_clear_rdonly(bdev); + } + + int set_blocksize(struct block_device *bdev, int size) +diff -urp linux-2.6.18.1.orig/include/linux/fs.h linux-2.6.18.1/include/linux/fs.h +--- linux-2.6.18.1.orig/include/linux/fs.h 2006-10-14 06:34:03.000000000 +0300 ++++ linux-2.6.18.1/include/linux/fs.h 2007-05-29 14:50:46.000000000 +0300 +@@ -1632,6 +1632,10 @@ extern void file_kill(struct file *f); struct bio; extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch b/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch deleted file mode 100644 index c7650fd..0000000 --- a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch +++ /dev/null @@ -1,125 +0,0 @@ -diff -ur linux-2.4.20-rh.orig/drivers/block/ll_rw_blk.c linux-2.4.20-rh/drivers/block/ll_rw_blk.c ---- linux-2.4.20-rh.orig/drivers/block/ll_rw_blk.c 2004-05-27 11:25:09.000000000 -0700 -+++ linux-2.4.20-rh/drivers/block/ll_rw_blk.c 2005-04-08 09:02:14.734804881 -0700 -@@ -645,6 +645,84 @@ void set_device_ro(kdev_t dev,int flag) - else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); - } - -+/* -+ * Debug code for turning block devices read-only *silently* (will -+ * discard writes silently). This is only for filesystem crash/recovery -+ * testing. -+ */ -+struct deventry { -+ kdev_t dev; -+ struct deventry *next; -+}; -+ -+static struct deventry *devlist = NULL; -+static spinlock_t devlock = SPIN_LOCK_UNLOCKED; -+ -+int dev_check_rdonly(kdev_t dev) { -+ struct deventry *cur; -+ spin_lock(&devlock); -+ cur = devlist; -+ while(cur) { -+ if (dev == cur->dev) { -+ spin_unlock(&devlock); -+ return 1; -+ } -+ cur = cur->next; -+ } -+ spin_unlock(&devlock); -+ return 0; -+} -+ -+void dev_set_rdonly(kdev_t dev) -+{ -+ struct deventry *newdev, *cur; -+ newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL); -+ if (!newdev) return; -+ -+ spin_lock(&devlock); -+ cur = devlist; -+ while(cur) { -+ if (dev == cur->dev) { -+ spin_unlock(&devlock); -+ kfree(newdev); -+ return; -+ } -+ cur = cur->next; -+ } -+ newdev->dev = dev; -+ newdev->next = devlist; -+ devlist = newdev; -+ spin_unlock(&devlock); -+ printk(KERN_WARNING "Turning device %s read-only\n", bdevname(dev)); -+} -+ -+void dev_clear_rdonly(kdev_t dev) { -+ struct deventry *cur, *last = NULL; -+ -+ spin_lock(&devlock); -+ cur = devlist; -+ while(cur) { -+ if (dev == cur->dev) { -+ if (last) -+ last->next = cur->next; -+ else -+ devlist = cur->next; -+ spin_unlock(&devlock); -+ kfree(cur); -+ printk(KERN_WARNING "Removing read-only on %s\n", -+ bdevname(dev)); -+ return; -+ } -+ last = cur; -+ cur = cur->next; -+ } -+ spin_unlock(&devlock); -+} -+ -+EXPORT_SYMBOL(dev_set_rdonly); -+EXPORT_SYMBOL(dev_check_rdonly); -+EXPORT_SYMBOL(dev_clear_rdonly); -+ - inline void drive_stat_acct (kdev_t dev, int rw, - unsigned long nr_sectors, int new_io) - { -@@ -1183,6 +1263,10 @@ void generic_make_request (int rw, struc - buffer_IO_error(bh); - break; - } -+ if ((rw & WRITE)&&(dev_check_rdonly(bh->b_rdev))) { -+ bh->b_end_io(bh, 0); -+ break; -+ } - } while (q->make_request_fn(q, rw, bh)); - } - -diff -ur linux-2.4.20-rh.orig/fs/block_dev.c linux-2.4.20-rh/fs/block_dev.c ---- linux-2.4.20-rh.orig/fs/block_dev.c 2002-08-02 17:39:45.000000000 -0700 -+++ linux-2.4.20-rh/fs/block_dev.c 2005-04-08 09:02:15.005789333 -0700 -@@ -645,6 +645,7 @@ int blkdev_put(struct block_device *bdev - bdev->bd_op = NULL; - unlock_kernel(); - up(&bdev->bd_sem); -+ dev_clear_rdonly(to_kdev_t(bdev->bd_dev)); - bdput(bdev); - return ret; - } -diff -ur linux-2.4.20-rh.orig/include/linux/fs.h linux-2.4.20-rh/include/linux/fs.h ---- linux-2.4.20-rh.orig/include/linux/fs.h 2004-05-27 11:25:09.000000000 -0700 -+++ linux-2.4.20-rh/include/linux/fs.h 2005-04-08 09:02:14.915794496 -0700 -@@ -1425,6 +1425,10 @@ extern struct buffer_head * getblk(kdev_ - extern void ll_rw_block(int, int, struct buffer_head * bh[]); - extern void submit_bh(int, struct buffer_head *); - extern int is_read_only(kdev_t); -+#define HAVE_CLEAR_RDONLY_ON_PUT -+void dev_set_rdonly(kdev_t dev); -+int dev_check_rdonly(kdev_t dev); -+void dev_clear_rdonly(kdev_t dev); - extern void __brelse(struct buffer_head *); - static inline void brelse(struct buffer_head *buf) - { - diff --git a/lustre/kernel_patches/patches/dev_read_only_2.4.21-chaos.patch b/lustre/kernel_patches/patches/dev_read_only_2.4.21-chaos.patch deleted file mode 100644 index 3902db5..0000000 --- a/lustre/kernel_patches/patches/dev_read_only_2.4.21-chaos.patch +++ /dev/null @@ -1,122 +0,0 @@ ---- linux-2.4.24.orig/drivers/block/ll_rw_blk.c 2005-04-07 17:30:58.978035892 -0700 -+++ linux-2.4.24/drivers/block/ll_rw_blk.c 2005-04-07 17:22:04.354867801 -0700 -@@ -691,6 +691,85 @@ void set_device_ro(kdev_t dev,int flag) - else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); - } - -+ -+/* -+ * Debug code for turning block devices read-only *silently* (will -+ * discard writes silently). This is only for filesystem crash/recovery -+ * testing. -+ */ -+struct deventry { -+ kdev_t dev; -+ struct deventry *next; -+}; -+ -+static struct deventry *devlist = NULL; -+static spinlock_t devlock = SPIN_LOCK_UNLOCKED; -+ -+int dev_check_rdonly(kdev_t dev) { -+ struct deventry *cur; -+ spin_lock(&devlock); -+ cur = devlist; -+ while(cur) { -+ if (dev == cur->dev) { -+ spin_unlock(&devlock); -+ return 1; -+ } -+ cur = cur->next; -+ } -+ spin_unlock(&devlock); -+ return 0; -+} -+ -+void dev_set_rdonly(kdev_t dev) -+{ -+ struct deventry *newdev, *cur; -+ newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL); -+ if (!newdev) return; -+ -+ spin_lock(&devlock); -+ cur = devlist; -+ while(cur) { -+ if (dev == cur->dev) { -+ spin_unlock(&devlock); -+ kfree(newdev); -+ return; -+ } -+ cur = cur->next; -+ } -+ newdev->dev = dev; -+ newdev->next = devlist; -+ devlist = newdev; -+ spin_unlock(&devlock); -+ printk(KERN_WARNING "Turning device %s read-only\n", bdevname(dev)); -+} -+ -+void dev_clear_rdonly(kdev_t dev) { -+ struct deventry *cur, *last = NULL; -+ spin_lock(&devlock); -+ cur = devlist; -+ while(cur) { -+ if (dev == cur->dev) { -+ if (last) -+ last->next = cur->next; -+ else -+ devlist = cur->next; -+ spin_unlock(&devlock); -+ kfree(cur); -+ printk(KERN_WARNING "Removing read-only on %s\n", -+ bdevname(dev)); -+ return; -+ } -+ last = cur; -+ cur = cur->next; -+ } -+ spin_unlock(&devlock); -+} -+ -+EXPORT_SYMBOL(dev_set_rdonly); -+EXPORT_SYMBOL(dev_check_rdonly); -+EXPORT_SYMBOL(dev_clear_rdonly); -+ -+ - inline void drive_stat_acct (kdev_t dev, int rw, - unsigned long nr_sectors, int new_io) - { -@@ -1256,6 +1336,10 @@ void generic_make_request (int rw, struc - break; - } - -+ if ((rw & WRITE)&&(dev_check_rdonly(bh->b_rdev))) { -+ bh->b_end_io(bh, 0); -+ break; -+ } - } while (q->make_request_fn(q, rw, bh)); - } - ---- linux-2.4.24.orig/include/linux/fs.h 2003-11-28 10:26:21.000000000 -0800 -+++ linux-2.4.24/include/linux/fs.h 2005-04-07 17:03:36.810595409 -0700 -@@ -1418,6 +1418,10 @@ extern struct buffer_head * getblk(kdev_ - extern void ll_rw_block(int, int, struct buffer_head * bh[]); - extern void submit_bh(int, struct buffer_head *); - extern int is_read_only(kdev_t); -+#define HAVE_CLEAR_RDONLY_ON_PUT -+void dev_set_rdonly(kdev_t dev); -+int dev_check_rdonly(kdev_t dev); -+void dev_clear_rdonly(kdev_t dev); - extern void __brelse(struct buffer_head *); - static inline void brelse(struct buffer_head *buf) - { ---- linux-2.4.24.orig/fs/block_dev.c 2003-06-13 07:51:37.000000000 -0700 -+++ linux-2.4.24/fs/block_dev.c 2005-04-07 14:01:49.000000000 -0700 -@@ -645,6 +645,7 @@ int blkdev_put(struct block_device *bdev - bdev->bd_op = NULL; - unlock_kernel(); - up(&bdev->bd_sem); -+ dev_clear_rdonly(to_kdev_t(bdev->bd_dev)); - bdput(bdev); - return ret; - } diff --git a/lustre/kernel_patches/patches/dynamic-locks-2.6-fc3.patch b/lustre/kernel_patches/patches/dynamic-locks-2.6-fc3.patch new file mode 100644 index 0000000..426af25 --- /dev/null +++ b/lustre/kernel_patches/patches/dynamic-locks-2.6-fc3.patch @@ -0,0 +1,288 @@ + include/linux/dynlocks.h | 33 ++++++++++ + lib/Makefile | 4 - + lib/dynlocks.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 187 insertions(+), 2 deletions(-) + +Index: linux-2.6.10/fs/dcache.c +=================================================================== +--- linux-2.6.10.orig/fs/dcache.c 2006-10-21 11:52:54.000000000 +0800 ++++ linux-2.6.10/fs/dcache.c 2006-10-21 13:04:55.000000000 +0800 +@@ -1664,6 +1664,7 @@ + + extern void bdev_cache_init(void); + extern void chrdev_init(void); ++extern void dynlock_cache_init(void); + + void __init vfs_caches_init_early(void) + { +@@ -1693,6 +1694,7 @@ + mnt_init(mempages); + bdev_cache_init(); + chrdev_init(); ++ dynlock_cache_init(); + } + + EXPORT_SYMBOL(d_alloc); +Index: linux-2.6.10/include/linux/dynlocks.h +=================================================================== +--- linux-2.6.10.orig/include/linux/dynlocks.h 2006-05-31 09:15:07.000000000 +0800 ++++ linux-2.6.10/include/linux/dynlocks.h 2006-10-21 13:04:55.000000000 +0800 +@@ -0,0 +1,37 @@ ++#ifndef _LINUX_DYNLOCKS_H ++#define _LINUX_DYNLOCKS_H ++ ++#include ++#include ++ ++#define DYNLOCK_MAGIC 0xd19a10c ++#define DYNLOCK_MAGIC2 0xd1956ee ++ ++struct dynlock; ++struct dynlock_handle; ++ ++/* ++ * lock's namespace: ++ * - list of locks ++ * - lock to protect this list ++ */ ++ ++struct dynlock { ++ unsigned dl_magic; ++ struct list_head dl_list; ++ spinlock_t dl_list_lock; ++}; ++ ++enum dynlock_type { ++ DLT_NONE, ++ DLT_WRITE, ++ DLT_READ ++}; ++ ++void dynlock_init(struct dynlock *dl); ++struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, ++ enum dynlock_type lt, int gfp); ++void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *lock); ++ ++#endif ++ +Index: linux-2.6.10/lib/Makefile +=================================================================== +--- linux-2.6.10.orig/lib/Makefile 2004-12-25 05:33:50.000000000 +0800 ++++ linux-2.6.10/lib/Makefile 2006-10-21 13:08:20.000000000 +0800 +@@ -5,7 +5,7 @@ + lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ + bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ + kobject.o kref.o idr.o div64.o parser.o int_sqrt.o \ +- bitmap.o extable.o kobject_uevent.o ++ bitmap.o extable.o kobject_uevent.o dynlocks.o + + ifeq ($(CONFIG_DEBUG_KOBJECT),y) + CFLAGS_kobject.o += -DDEBUG +Index: linux-2.6.10/lib/dynlocks.c +=================================================================== +--- linux-2.6.10.orig/lib/dynlocks.c 2006-05-31 09:15:07.000000000 +0800 ++++ linux-2.6.10/lib/dynlocks.c 2006-10-21 13:04:55.000000000 +0800 +@@ -0,0 +1,203 @@ ++/* ++ * Dynamic Locks ++ * ++ * struct dynlock is lockspace ++ * one may request lock (exclusive or shared) for some value ++ * in that lockspace ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++ ++static kmem_cache_t * dynlock_cachep = NULL; ++ ++struct dynlock_handle { ++ unsigned dl_magic; ++ struct list_head dl_list; ++ unsigned long dl_value; /* lock value */ ++ int dl_refcount; /* number of users */ ++ int dl_readers; ++ int dl_writers; ++ int dl_pid; /* holder of the lock */ ++ wait_queue_head_t dl_wait; ++}; ++ ++#define DYNLOCK_LIST_MAGIC 0x11ee91e6 ++ ++void __init dynlock_cache_init(void) ++{ ++ printk(KERN_INFO "init dynlocks cache\n"); ++ dynlock_cachep = kmem_cache_create("dynlock_cache", ++ sizeof(struct dynlock_handle), ++ 0, ++ SLAB_HWCACHE_ALIGN, ++ NULL, NULL); ++ if (dynlock_cachep == NULL) ++ panic("Can't create dynlock cache"); ++} ++ ++/* ++ * dynlock_init ++ * ++ * initialize lockspace ++ * ++ */ ++void dynlock_init(struct dynlock *dl) ++{ ++ spin_lock_init(&dl->dl_list_lock); ++ INIT_LIST_HEAD(&dl->dl_list); ++ dl->dl_magic = DYNLOCK_LIST_MAGIC; ++} ++ ++/* ++ * dynlock_lock ++ * ++ * acquires lock (exclusive or shared) in specified lockspace ++ * each lock in lockspace is allocated separately, so user have ++ * to specify GFP flags. ++ * routine returns pointer to lock. this pointer is intended to ++ * be passed to dynlock_unlock ++ * ++ */ ++struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, ++ enum dynlock_type lt, int gfp) ++{ ++ struct dynlock_handle *nhl = NULL; ++ struct dynlock_handle *hl; ++ struct list_head *cur; ++ int num = 0; ++ ++ BUG_ON(dl == NULL); ++ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); ++ ++ if (lt == DLT_NONE) ++ return NULL; ++repeat: ++ /* find requested lock in lockspace */ ++ spin_lock(&dl->dl_list_lock); ++ BUG_ON(dl->dl_list.next == NULL); ++ BUG_ON(dl->dl_list.prev == NULL); ++ list_for_each(cur, &dl->dl_list) { ++ BUG_ON(cur->next == NULL); ++ BUG_ON(cur->prev == NULL); ++ hl = list_entry(cur, struct dynlock_handle, dl_list); ++ BUG_ON(hl->dl_magic != DYNLOCK_MAGIC); ++ if (hl->dl_value == value) { ++ /* lock is found */ ++ if (nhl) { ++ /* someone else just allocated ++ * lock we didn't find and just created ++ * so, we drop our lock ++ */ ++ kmem_cache_free(dynlock_cachep, nhl); ++ nhl = NULL; ++ } ++ hl->dl_refcount++; ++ goto found; ++ } ++ num++; ++ } ++ /* lock not found */ ++ if (nhl) { ++ /* we already have allocated lock. use it */ ++ hl = nhl; ++ nhl = NULL; ++ list_add(&hl->dl_list, &dl->dl_list); ++ goto found; ++ } ++ spin_unlock(&dl->dl_list_lock); ++ ++ /* lock not found and we haven't allocated lock yet. allocate it */ ++ nhl = kmem_cache_alloc(dynlock_cachep, gfp); ++ if (nhl == NULL) ++ return NULL; ++ nhl->dl_refcount = 1; ++ nhl->dl_value = value; ++ nhl->dl_readers = 0; ++ nhl->dl_writers = 0; ++ nhl->dl_magic = DYNLOCK_MAGIC; ++ init_waitqueue_head(&nhl->dl_wait); ++ ++ /* while lock is being allocated, someone else may allocate it ++ * and put onto to list. check this situation ++ */ ++ goto repeat; ++ ++found: ++ if (lt == DLT_WRITE) { ++ /* exclusive lock: user don't want to share lock at all ++ * NOTE: one process may take the same lock several times ++ * this functionaly is useful for rename operations */ ++ while ((hl->dl_writers && hl->dl_pid != current->pid) || ++ hl->dl_readers) { ++ spin_unlock(&dl->dl_list_lock); ++ wait_event(hl->dl_wait, ++ hl->dl_writers == 0 && hl->dl_readers == 0); ++ spin_lock(&dl->dl_list_lock); ++ } ++ hl->dl_writers++; ++ } else { ++ /* shared lock: user do not want to share lock with writer */ ++ while (hl->dl_writers) { ++ spin_unlock(&dl->dl_list_lock); ++ wait_event(hl->dl_wait, hl->dl_writers == 0); ++ spin_lock(&dl->dl_list_lock); ++ } ++ hl->dl_readers++; ++ } ++ hl->dl_pid = current->pid; ++ spin_unlock(&dl->dl_list_lock); ++ ++ return hl; ++} ++ ++ ++/* ++ * dynlock_unlock ++ * ++ * user have to specify lockspace (dl) and pointer to lock structure ++ * returned by dynlock_lock() ++ * ++ */ ++void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *hl) ++{ ++ int wakeup = 0; ++ ++ BUG_ON(dl == NULL); ++ BUG_ON(hl == NULL); ++ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); ++ BUG_ON(hl->dl_magic != DYNLOCK_MAGIC); ++ BUG_ON(current->pid != hl->dl_pid); ++ ++ spin_lock(&dl->dl_list_lock); ++ if (hl->dl_writers) { ++ BUG_ON(hl->dl_readers > 0 || hl->dl_readers < 0); ++ hl->dl_writers--; ++ if (hl->dl_writers == 0) ++ wakeup = 1; ++ } else if (hl->dl_readers) { ++ hl->dl_readers--; ++ if (hl->dl_readers == 0) ++ wakeup = 1; ++ } else { ++ BUG_ON(1); ++ } ++ if (wakeup) { ++ hl->dl_pid = 0; ++ wake_up(&hl->dl_wait); ++ } ++ if (--(hl->dl_refcount) == 0) { ++ hl->dl_magic = DYNLOCK_MAGIC2; ++ list_del(&hl->dl_list); ++ kmem_cache_free(dynlock_cachep, hl); ++ } ++ spin_unlock(&dl->dl_list_lock); ++} ++ ++EXPORT_SYMBOL(dynlock_init); ++EXPORT_SYMBOL(dynlock_lock); ++EXPORT_SYMBOL(dynlock_unlock); ++ diff --git a/lustre/kernel_patches/patches/dynamic-locks-2.6.9.patch b/lustre/kernel_patches/patches/dynamic-locks-2.6.9.patch new file mode 100644 index 0000000..e5889b7 --- /dev/null +++ b/lustre/kernel_patches/patches/dynamic-locks-2.6.9.patch @@ -0,0 +1,311 @@ + include/linux/dynlocks.h | 33 ++++++++++ + lib/Makefile | 4 - + lib/dynlocks.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 187 insertions(+), 2 deletions(-) + +Index: linux/fs/dcache.c +=================================================================== +--- linux.orig/fs/dcache.c ++++ linux/fs/dcache.c +@@ -1678,6 +1678,7 @@ EXPORT_SYMBOL(d_genocide); + + extern void bdev_cache_init(void); + extern void chrdev_init(void); ++extern void dynlock_cache_init(void); + + void __init vfs_caches_init_early(void) + { +@@ -1707,6 +1708,7 @@ void __init vfs_caches_init(unsigned lon + mnt_init(mempages); + bdev_cache_init(); + chrdev_init(); ++ dynlock_cache_init(); + } + + EXPORT_SYMBOL(d_alloc); +Index: linux/include/linux/dynlocks.h +=================================================================== +--- linux.orig/include/linux/dynlocks.h ++++ linux/include/linux/dynlocks.h +@@ -0,0 +1,37 @@ ++#ifndef _LINUX_DYNLOCKS_H ++#define _LINUX_DYNLOCKS_H ++ ++#include ++#include ++ ++#define DYNLOCK_MAGIC 0xd19a10c ++#define DYNLOCK_MAGIC2 0xd1956ee ++ ++struct dynlock; ++struct dynlock_handle; ++ ++/* ++ * lock's namespace: ++ * - list of locks ++ * - lock to protect this list ++ */ ++ ++struct dynlock { ++ unsigned dl_magic; ++ struct list_head dl_list; ++ spinlock_t dl_list_lock; ++}; ++ ++enum dynlock_type { ++ DLT_WRITE, ++ DLT_READ ++}; ++ ++void dynlock_init(struct dynlock *dl); ++struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, ++ enum dynlock_type lt, int gfp); ++void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *lock); ++ ++int dynlock_is_locked(struct dynlock *dl, unsigned long value); ++#endif ++ +Index: linux/lib/Makefile +=================================================================== +--- linux.orig/lib/Makefile ++++ linux/lib/Makefile +@@ -6,7 +6,7 @@ + lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ + bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ + kobject.o kref.o idr.o div64.o parser.o int_sqrt.o \ +- bitmap.o extable.o ++ bitmap.o extable.o dynlocks.o + + obj-y := sort.o + +Index: linux/lib/dynlocks.c +=================================================================== +--- linux.orig/lib/dynlocks.c ++++ linux/lib/dynlocks.c +@@ -0,0 +1,226 @@ ++/* ++ * Dynamic Locks ++ * ++ * struct dynlock is lockspace ++ * one may request lock (exclusive or shared) for some value ++ * in that lockspace ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++ ++static kmem_cache_t * dynlock_cachep = NULL; ++ ++struct dynlock_handle { ++ unsigned dl_magic; ++ struct list_head dl_list; ++ unsigned long dl_value; /* lock value */ ++ int dl_refcount; /* number of users */ ++ int dl_readers; ++ int dl_writers; ++ int dl_pid; /* holder of the lock */ ++ wait_queue_head_t dl_wait; ++}; ++ ++#define DYNLOCK_LIST_MAGIC 0x11ee91e6 ++ ++void __init dynlock_cache_init(void) ++{ ++ printk(KERN_INFO "init dynlocks cache\n"); ++ dynlock_cachep = kmem_cache_create("dynlock_cache", ++ sizeof(struct dynlock_handle), ++ 0, ++ SLAB_HWCACHE_ALIGN, ++ NULL, NULL); ++ if (dynlock_cachep == NULL) ++ panic("Can't create dynlock cache"); ++} ++ ++/* ++ * dynlock_init ++ * ++ * initialize lockspace ++ * ++ */ ++void dynlock_init(struct dynlock *dl) ++{ ++ spin_lock_init(&dl->dl_list_lock); ++ INIT_LIST_HEAD(&dl->dl_list); ++ dl->dl_magic = DYNLOCK_LIST_MAGIC; ++} ++ ++/* ++ * dynlock_lock ++ * ++ * acquires lock (exclusive or shared) in specified lockspace ++ * each lock in lockspace is allocated separately, so user have ++ * to specify GFP flags. ++ * routine returns pointer to lock. this pointer is intended to ++ * be passed to dynlock_unlock ++ * ++ */ ++struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, ++ enum dynlock_type lt, int gfp) ++{ ++ struct dynlock_handle *nhl = NULL; ++ struct dynlock_handle *hl; ++ ++ BUG_ON(dl == NULL); ++ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); ++ ++repeat: ++ /* find requested lock in lockspace */ ++ spin_lock(&dl->dl_list_lock); ++ BUG_ON(dl->dl_list.next == NULL); ++ BUG_ON(dl->dl_list.prev == NULL); ++ list_for_each_entry(hl, &dl->dl_list, dl_list) { ++ BUG_ON(hl->dl_list.next == NULL); ++ BUG_ON(hl->dl_list.prev == NULL); ++ BUG_ON(hl->dl_magic != DYNLOCK_MAGIC); ++ if (hl->dl_value == value) { ++ /* lock is found */ ++ if (nhl) { ++ /* someone else just allocated ++ * lock we didn't find and just created ++ * so, we drop our lock ++ */ ++ kmem_cache_free(dynlock_cachep, nhl); ++ nhl = NULL; ++ } ++ hl->dl_refcount++; ++ goto found; ++ } ++ } ++ /* lock not found */ ++ if (nhl) { ++ /* we already have allocated lock. use it */ ++ hl = nhl; ++ nhl = NULL; ++ list_add(&hl->dl_list, &dl->dl_list); ++ goto found; ++ } ++ spin_unlock(&dl->dl_list_lock); ++ ++ /* lock not found and we haven't allocated lock yet. allocate it */ ++ nhl = kmem_cache_alloc(dynlock_cachep, gfp); ++ if (nhl == NULL) ++ return NULL; ++ nhl->dl_refcount = 1; ++ nhl->dl_value = value; ++ nhl->dl_readers = 0; ++ nhl->dl_writers = 0; ++ nhl->dl_magic = DYNLOCK_MAGIC; ++ init_waitqueue_head(&nhl->dl_wait); ++ ++ /* while lock is being allocated, someone else may allocate it ++ * and put onto to list. check this situation ++ */ ++ goto repeat; ++ ++found: ++ if (lt == DLT_WRITE) { ++ /* exclusive lock: user don't want to share lock at all ++ * NOTE: one process may take the same lock several times ++ * this functionaly is useful for rename operations */ ++ while ((hl->dl_writers && hl->dl_pid != current->pid) || ++ hl->dl_readers) { ++ spin_unlock(&dl->dl_list_lock); ++ wait_event(hl->dl_wait, ++ hl->dl_writers == 0 && hl->dl_readers == 0); ++ spin_lock(&dl->dl_list_lock); ++ } ++ hl->dl_writers++; ++ } else { ++ /* shared lock: user do not want to share lock with writer */ ++ while (hl->dl_writers) { ++ spin_unlock(&dl->dl_list_lock); ++ wait_event(hl->dl_wait, hl->dl_writers == 0); ++ spin_lock(&dl->dl_list_lock); ++ } ++ hl->dl_readers++; ++ } ++ hl->dl_pid = current->pid; ++ spin_unlock(&dl->dl_list_lock); ++ ++ return hl; ++} ++ ++ ++/* ++ * dynlock_unlock ++ * ++ * user have to specify lockspace (dl) and pointer to lock structure ++ * returned by dynlock_lock() ++ * ++ */ ++void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *hl) ++{ ++ int wakeup = 0; ++ ++ BUG_ON(dl == NULL); ++ BUG_ON(hl == NULL); ++ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); ++ ++ if (hl->dl_magic != DYNLOCK_MAGIC) ++ printk(KERN_EMERG "wrong lock magic: %#x\n", hl->dl_magic); ++ ++ BUG_ON(hl->dl_magic != DYNLOCK_MAGIC); ++ BUG_ON(hl->dl_writers != 0 && current->pid != hl->dl_pid); ++ ++ spin_lock(&dl->dl_list_lock); ++ if (hl->dl_writers) { ++ BUG_ON(hl->dl_readers != 0); ++ hl->dl_writers--; ++ if (hl->dl_writers == 0) ++ wakeup = 1; ++ } else if (hl->dl_readers) { ++ hl->dl_readers--; ++ if (hl->dl_readers == 0) ++ wakeup = 1; ++ } else { ++ BUG(); ++ } ++ if (wakeup) { ++ hl->dl_pid = 0; ++ wake_up(&hl->dl_wait); ++ } ++ if (--(hl->dl_refcount) == 0) { ++ hl->dl_magic = DYNLOCK_MAGIC2; ++ list_del(&hl->dl_list); ++ kmem_cache_free(dynlock_cachep, hl); ++ } ++ spin_unlock(&dl->dl_list_lock); ++} ++ ++int dynlock_is_locked(struct dynlock *dl, unsigned long value) ++{ ++ struct dynlock_handle *hl; ++ int result; ++ ++ result = 0; ++ /* find requested lock in lockspace */ ++ spin_lock(&dl->dl_list_lock); ++ BUG_ON(dl->dl_list.next == NULL); ++ BUG_ON(dl->dl_list.prev == NULL); ++ list_for_each_entry(hl, &dl->dl_list, dl_list) { ++ BUG_ON(hl->dl_list.next == NULL); ++ BUG_ON(hl->dl_list.prev == NULL); ++ BUG_ON(hl->dl_magic != DYNLOCK_MAGIC); ++ if (hl->dl_value == value && hl->dl_pid == current->pid) { ++ /* lock is found */ ++ result = 1; ++ break; ++ } ++ } ++ spin_unlock(&dl->dl_list_lock); ++ return result; ++} ++ ++EXPORT_SYMBOL(dynlock_init); ++EXPORT_SYMBOL(dynlock_lock); ++EXPORT_SYMBOL(dynlock_unlock); ++EXPORT_SYMBOL(dynlock_is_locked); ++ diff --git a/lustre/kernel_patches/patches/elevator-cfq.patch b/lustre/kernel_patches/patches/elevator-cfq.patch deleted file mode 100644 index a13194e..0000000 --- a/lustre/kernel_patches/patches/elevator-cfq.patch +++ /dev/null @@ -1,20 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/block/ll_rw_blk.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/block/ll_rw_blk.c 2005-06-28 01:53:39.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/block/ll_rw_blk.c 2005-06-28 01:58:45.000000000 -0600 -@@ -1380,12 +1380,12 @@ - static int __make_request(request_queue_t *, struct bio *); - - static elevator_t *chosen_elevator = --#if defined(CONFIG_IOSCHED_AS) -+#if defined(CONFIG_IOSCHED_CFQ) -+ &iosched_cfq; -+#elif defined(CONFIG_IOSCHED_AS) - &iosched_as; - #elif defined(CONFIG_IOSCHED_DEADLINE) - &iosched_deadline; --#elif defined(CONFIG_IOSCHED_CFQ) -- &iosched_cfq; - #elif defined(CONFIG_IOSCHED_NOOP) - &elevator_noop; - #else diff --git a/lustre/kernel_patches/patches/export-2.6-fc5.patch b/lustre/kernel_patches/patches/export-2.6-fc5.patch index fdfb2f7..3eb7817 100644 --- a/lustre/kernel_patches/patches/export-2.6-fc5.patch +++ b/lustre/kernel_patches/patches/export-2.6-fc5.patch @@ -1,3 +1,15 @@ +Index: linux-2.6.16.i686/fs/open.c +=================================================================== +--- linux-2.6.16.i686.orig/fs/open.c 2006-05-30 22:10:15.000000000 +0800 ++++ linux-2.6.16.i686/fs/open.c 2006-05-30 22:17:45.000000000 +0800 +@@ -811,7 +811,6 @@ + return error; + } + +- + asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) + { + struct file * file; Index: linux-2.6.16.i686/fs/jbd/journal.c =================================================================== --- linux-2.6.16.i686.orig/fs/jbd/journal.c 2006-05-30 22:10:16.000000000 +0800 diff --git a/lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch b/lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch deleted file mode 100644 index 7b4f0c8..0000000 --- a/lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch +++ /dev/null @@ -1,33 +0,0 @@ -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:50:46.077845320 +0200 -+++ linux-stage/fs/ext3/super.c 2005-02-25 14:51:32.241827328 +0200 -@@ -123,6 +123,8 @@ - journal_abort_handle(handle); - } - -+EXPORT_SYMBOL(ext3_journal_abort_handle); -+ - /* Deal with the reporting of failure conditions on a filesystem such as - * inconsistencies detected or read IO failures. - * -@@ -2002,6 +2004,8 @@ - return ret; - } - -+EXPORT_SYMBOL(ext3_force_commit); -+ - /* - * Ext3 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this -@@ -2433,6 +2437,10 @@ - unsigned long *blocks, int *created, int create); - EXPORT_SYMBOL(ext3_map_inode_page); - -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_set_handle); -+EXPORT_SYMBOL(ext3_bread); -+ - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); diff --git a/lustre/kernel_patches/patches/export-ext3-2.6-suse.patch b/lustre/kernel_patches/patches/export-ext3-2.6-suse.patch deleted file mode 100644 index 638b4bf..0000000 --- a/lustre/kernel_patches/patches/export-ext3-2.6-suse.patch +++ /dev/null @@ -1,33 +0,0 @@ -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-04-02 16:40:18.000000000 -0500 -+++ linux-stage/fs/ext3/super.c 2004-04-02 16:40:18.000000000 -0500 -@@ -115,6 +115,8 @@ - handle->h_err = err; - } - -+EXPORT_SYMBOL(ext3_journal_abort_handle); -+ - static char error_buf[1024]; - - /* Deal with the reporting of failure conditions on a filesystem such as -@@ -1774,6 +1776,8 @@ - return ret; - } - -+EXPORT_SYMBOL(ext3_force_commit); -+ - /* - * Ext3 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this -@@ -2088,6 +2092,10 @@ - unsigned long *blocks, int *created, int create); - EXPORT_SYMBOL(ext3_map_inode_page); - -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_set_handle); -+EXPORT_SYMBOL(ext3_bread); -+ - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); diff --git a/lustre/kernel_patches/patches/export-nr_free_buffer_pages.patch b/lustre/kernel_patches/patches/export-nr_free_buffer_pages.patch new file mode 100644 index 0000000..1d6712a --- /dev/null +++ b/lustre/kernel_patches/patches/export-nr_free_buffer_pages.patch @@ -0,0 +1,12 @@ +Index: linux/mm/page_alloc.c +=================================================================== +--- linux.orig/mm/page_alloc.c ++++ linux/mm/page_alloc.c +@@ -875,6 +875,7 @@ unsigned int nr_free_buffer_pages(void) + { + return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK); + } ++EXPORT_SYMBOL(nr_free_buffer_pages); + + /* + * Amount of free RAM allocatable within all zones diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-cray.patch b/lustre/kernel_patches/patches/export-show_task-2.4-cray.patch deleted file mode 100644 index 8211401..0000000 --- a/lustre/kernel_patches/patches/export-show_task-2.4-cray.patch +++ /dev/null @@ -1,33 +0,0 @@ -Index: kernel-l0405/kernel/sched.c -=================================================================== ---- kernel-l0405.orig/kernel/sched.c 2003-11-06 16:15:20.000000000 -0800 -+++ kernel-l0405/kernel/sched.c 2005-04-05 14:44:27.000000000 -0700 -@@ -1627,7 +1627,7 @@ - return retval; - } - --static void show_task(task_t * p) -+void show_task(task_t * p) - { - unsigned long free = 0; - int state; -Index: kernel-l0405/kernel/ksyms.c -=================================================================== ---- kernel-l0405.orig/kernel/ksyms.c 2005-04-05 14:44:15.000000000 -0700 -+++ kernel-l0405/kernel/ksyms.c 2005-04-05 14:44:50.000000000 -0700 -@@ -55,6 +55,7 @@ - #include - #include - -+extern void show_task(task_t *); - - #if defined(CONFIG_PROC_FS) - #include -@@ -684,6 +685,7 @@ - - /* debug */ - EXPORT_SYMBOL(dump_stack); -+EXPORT_SYMBOL(show_task); - - #if defined(CONFIG_KDB_USB) - #include diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-rh.patch b/lustre/kernel_patches/patches/export-show_task-2.4-rh.patch deleted file mode 100644 index 3799348..0000000 --- a/lustre/kernel_patches/patches/export-show_task-2.4-rh.patch +++ /dev/null @@ -1,171 +0,0 @@ -Index: linux-2.4.20/kernel/ksyms.c -=================================================================== ---- linux-2.4.20.orig/kernel/ksyms.c 2004-10-21 21:30:14.000000000 -0400 -+++ linux-2.4.20/kernel/ksyms.c 2004-10-21 21:32:00.000000000 -0400 -@@ -75,6 +75,7 @@ - extern spinlock_t dma_spin_lock; - extern int panic_timeout; - -+extern void show_task(task_t *); - - #ifdef CONFIG_MODVERSIONS - const struct module_symbol __export_Using_Versions -@@ -636,3 +637,4 @@ - extern void check_tasklist_locked(void); - EXPORT_SYMBOL_GPL(check_tasklist_locked); - EXPORT_SYMBOL(dump_stack); -+EXPORT_SYMBOL(show_task); -Index: linux-2.4.20/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.20.orig/arch/i386/kernel/traps.c 2004-10-21 21:30:15.000000000 -0400 -+++ linux-2.4.20/arch/i386/kernel/traps.c 2004-10-25 14:34:41.000000000 -0400 -@@ -137,29 +137,141 @@ - - #endif - --void show_trace(unsigned long * stack) -+void scan_stack (unsigned long *stack) - { - int i; - unsigned long addr; -- /* static to not take up stackspace; if we race here too bad */ -- static char buffer[512]; -+ /* static to not take up stackspace */ -+ static char buffer[NR_CPUS][512], *bufp; - -- if (!stack) -- stack = (unsigned long*)&stack; -+ bufp = buffer[smp_processor_id()]; - -- printk("Call Trace: "); -+ /* -+ * If we have frame pointers then use them to get -+ * a 100% exact backtrace, up until the entry frame: -+ */ - i = 1; - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { -- lookup_symbol(addr, buffer, 512); -- printk("[<%08lx>] %s (0x%x))\n", addr,buffer,stack-1); -+ lookup_symbol(addr, bufp, 512); -+ printk("[<%08lx>] %s (0x%p)\n", addr,bufp,stack-1); - i++; - } - } -+} -+ -+#if CONFIG_FRAME_POINTER -+void show_stack_frame_params (int param_count, unsigned long params[]) -+{ -+ int i; -+ unsigned long *p, task_addr, stack_base; -+ -+ if (param_count <= 0) -+ return; -+ -+ task_addr = (unsigned long) current; -+ stack_base = task_addr + THREAD_SIZE - 1; -+ -+ printk(" ("); -+ -+ for (i = 0, p = params; -+ ((param_count - i) > 1) && (p >= task_addr) && (p <= stack_base); -+ i++, p++) { -+ printk("0x%x, ", *p); -+ -+ if ((i % 4) == 3) -+ printk("\n "); -+ } -+ -+ if ((p >= task_addr) && (p <= stack_base)) -+ printk("0x%x)\n", *p); -+} -+ -+/* Display a stack trace for the currently executing task. The 'dummy' -+ * parameter serves a purpose although its value is unused. We use the -+ * address of 'dummy' as a reference point for finding the saved %ebp register -+ * value on the stack. -+ */ -+void frame_pointer_walk (void *dummy) -+{ -+ int i; -+ unsigned long addr, task_addr, *frame_ptr, *next_frame_ptr, *eip_ptr, -+ eip, stack_base; -+ /* static to not take up stackspace */ -+ static char buffer[NR_CPUS][512], *bufp; -+ -+ bufp = buffer[smp_processor_id()]; -+ task_addr = (unsigned long) current; -+ stack_base = task_addr + THREAD_SIZE - 1; -+ frame_ptr = (unsigned long *) (&dummy - 2); -+ -+ for (; ; ) { -+ next_frame_ptr = (unsigned long *) (*frame_ptr); -+ addr = (unsigned long) next_frame_ptr; -+ -+ /* Stop when we reach a frame pointer that points to a -+ * location clearly outside our own kernel stack. -+ */ -+ if ((addr < task_addr) || (addr > stack_base)) -+ break; -+ -+ eip_ptr = frame_ptr + 1; -+ eip = *eip_ptr; -+ -+ if (kernel_text_address(eip)) { -+ lookup_symbol(eip, bufp, 512); -+ show_stack_frame_params(4, frame_ptr + 2); -+ printk("[<%08lx>] %s (0x%x)\n", eip, bufp, -+ eip_ptr); -+ } -+ -+ frame_ptr = next_frame_ptr; -+ } -+} -+ -+typedef void (*stack_trace_fn_t) (unsigned long *stack); -+ -+void show_trace(unsigned long * stack) -+{ -+ static const stack_trace_fn_t trace_fn_vector[] = -+ { scan_stack, frame_pointer_walk }; -+ unsigned long addr, task_addr, stack_base; -+ int task_is_current; -+ -+ if (!stack) -+ stack = (unsigned long*)&stack; -+ -+ printk("Call Trace:\n"); -+ addr = (unsigned long) stack; -+ task_addr = (unsigned long) current; -+ stack_base = task_addr + THREAD_SIZE - 1; -+ task_is_current = (addr >= task_addr) && (addr <= stack_base); -+ -+ /* We may use frame pointers to do a stack trace only if the current -+ * task is being traced. Tracing some other task in this manner -+ * would require a saved %ebp register value. Perhaps in the future -+ * I'll consider providing a means of obtaining this. -+ */ -+ trace_fn_vector[task_is_current](stack); -+ - printk("\n"); - } - -+#else /* CONFIG_FRAME_POINTER */ -+ -+void show_trace(unsigned long * stack) -+{ -+ if (!stack) -+ stack = (unsigned long*)&stack; -+ -+ printk("Call Trace:\n"); -+ scan_stack(stack); -+ printk("\n"); -+} -+ -+#endif /* CONFIG_FRAME_POINTER */ -+ - void show_trace_task(struct task_struct *tsk) - { - unsigned long esp = tsk->thread.esp; diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch b/lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch deleted file mode 100644 index a1937d3..0000000 --- a/lustre/kernel_patches/patches/export-show_task-2.4-rhel.patch +++ /dev/null @@ -1,20 +0,0 @@ -Index: linux-2.4.21/kernel/ksyms.c -=================================================================== ---- linux-2.4.21.orig/kernel/ksyms.c 2004-11-01 22:00:03.000000000 -0500 -+++ linux-2.4.21/kernel/ksyms.c 2004-11-01 22:07:07.000000000 -0500 -@@ -86,6 +86,7 @@ - }; - #endif - -+extern void show_task(task_t *); - - EXPORT_SYMBOL(inter_module_register); - EXPORT_SYMBOL(inter_module_unregister); -@@ -670,6 +671,7 @@ - extern void check_tasklist_locked(void); - EXPORT_SYMBOL_GPL(check_tasklist_locked); - EXPORT_SYMBOL(dump_stack); -+EXPORT_SYMBOL(show_task); - - EXPORT_SYMBOL_GPL(netdump_func); - EXPORT_SYMBOL_GPL(netdump_mode); diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch b/lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch deleted file mode 100644 index 4e05709..0000000 --- a/lustre/kernel_patches/patches/export-show_task-2.4-vanilla.patch +++ /dev/null @@ -1,34 +0,0 @@ -Index: linux-2.4.24/kernel/ksyms.c -=================================================================== ---- linux-2.4.24.orig/kernel/ksyms.c 2004-10-22 14:00:00.000000000 -0400 -+++ linux-2.4.24/kernel/ksyms.c 2004-10-22 14:03:54.000000000 -0400 -@@ -70,6 +70,8 @@ - extern spinlock_t dma_spin_lock; - extern int panic_timeout; - -+extern void show_task(struct task_struct *); -+ - #ifdef CONFIG_MODVERSIONS - const struct module_symbol __export_Using_Versions - __attribute__((section("__ksymtab"))) = { -@@ -619,6 +621,7 @@ - - /* debug */ - EXPORT_SYMBOL(dump_stack); -+EXPORT_SYMBOL(show_task); - - /* To match ksyms with System.map */ - extern const char _end[]; -Index: linux-2.4.24/kernel/sched.c -=================================================================== ---- linux-2.4.24.orig/kernel/sched.c 2003-11-28 13:26:21.000000000 -0500 -+++ linux-2.4.24/kernel/sched.c 2004-10-22 14:10:10.000000000 -0400 -@@ -1177,7 +1177,7 @@ - return retval; - } - --static void show_task(struct task_struct * p) -+void show_task(struct task_struct * p) - { - unsigned long free = 0; - int state; diff --git a/lustre/kernel_patches/patches/export-truncate.patch b/lustre/kernel_patches/patches/export-truncate.patch deleted file mode 100644 index 2cd96b9..0000000 --- a/lustre/kernel_patches/patches/export-truncate.patch +++ /dev/null @@ -1,35 +0,0 @@ - include/linux/mm.h | 1 + - mm/filemap.c | 3 ++- - 2 files changed, 3 insertions(+), 1 deletion(-) - ---- linux-2.4.18-18/include/linux/mm.h~export-truncate 2003-04-05 02:54:55.000000000 -0700 -+++ linux-2.4.18-18-braam/include/linux/mm.h 2003-04-09 17:37:46.000000000 -0600 -@@ -650,6 +650,7 @@ struct zone_t; - /* filemap.c */ - extern void remove_inode_page(struct page *); - extern unsigned long page_unuse(struct page *); -+extern void truncate_complete_page(struct page *); - extern void truncate_inode_pages(struct address_space *, loff_t); - - /* generic vm_area_ops exported for stackable file systems */ ---- linux-2.4.18-18/mm/filemap.c~export-truncate 2003-04-05 02:54:55.000000000 -0700 -+++ linux-2.4.18-18-braam/mm/filemap.c 2003-04-09 17:37:46.000000000 -0600 -@@ -245,7 +245,7 @@ static inline void truncate_partial_page - do_flushpage(page, partial); - } - --static void truncate_complete_page(struct page *page) -+void truncate_complete_page(struct page *page) - { - /* - * Leave it on the LRU if it gets converted into anonymous buffers -@@ -266,6 +266,7 @@ static void truncate_complete_page(struc - remove_inode_page(page); - page_cache_release(page); - } -+EXPORT_SYMBOL_GPL(truncate_complete_page); - - static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *)); - static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial) - -_ diff --git a/lustre/kernel_patches/patches/export-zap-page-range.patch b/lustre/kernel_patches/patches/export-zap-page-range.patch deleted file mode 100644 index 9b9d48f..0000000 --- a/lustre/kernel_patches/patches/export-zap-page-range.patch +++ /dev/null @@ -1,12 +0,0 @@ -Index: linux-2.4.24-l36mmap/mm/memory.c -=================================================================== ---- linux-2.4.24-l36mmap.orig/mm/memory.c 2004-05-27 17:44:13.000000000 -0700 -+++ linux-2.4.24-l36mmap/mm/memory.c 2004-05-27 17:45:07.000000000 -0700 -@@ -411,6 +411,7 @@ - mm->rss = 0; - spin_unlock(&mm->page_table_lock); - } -+EXPORT_SYMBOL_GPL(zap_page_range); - - /* - * Do a quick page-table lookup for a single page. diff --git a/lustre/kernel_patches/patches/export_num_siblings.patch b/lustre/kernel_patches/patches/export_num_siblings.patch deleted file mode 100644 index 0fdefb5..0000000 --- a/lustre/kernel_patches/patches/export_num_siblings.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- linux/arch/i386/kernel/i386_ksyms.c.orig 2003-11-28 11:26:19.000000000 -0700 -+++ linux/arch/i386/kernel/i386_ksyms.c 2004-08-13 11:31:33.000000000 -0600 -@@ -131,6 +131,7 @@ - EXPORT_SYMBOL(cpu_data); - EXPORT_SYMBOL(kernel_flag_cacheline); - EXPORT_SYMBOL(smp_num_cpus); -+EXPORT_SYMBOL(smp_num_siblings); - EXPORT_SYMBOL(cpu_online_map); - EXPORT_SYMBOL_NOVERS(__write_lock_failed); - EXPORT_SYMBOL_NOVERS(__read_lock_failed); diff --git a/lustre/kernel_patches/patches/export_symbol_numa-2.6.18.patch b/lustre/kernel_patches/patches/export_symbol_numa-2.6.18.patch new file mode 100644 index 0000000..a15b287 --- /dev/null +++ b/lustre/kernel_patches/patches/export_symbol_numa-2.6.18.patch @@ -0,0 +1,24 @@ +Index: linux-2.6.18.8/arch/i386/kernel/smpboot.c +=================================================================== +--- linux-2.6.18.8.orig/arch/i386/kernel/smpboot.c 2007-06-05 13:20:25.000000000 +0200 ++++ linux-2.6.18.8/arch/i386/kernel/smpboot.c 2007-06-05 13:20:33.000000000 +0200 +@@ -607,6 +607,7 @@ extern struct { + /* which logical CPUs are on which nodes */ + cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = + { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; ++EXPORT_SYMBOL(node_2_cpu_mask); + /* which node each logical CPU is on */ + int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; + EXPORT_SYMBOL(cpu_2_node); +Index: linux-2.6.18.8/arch/ia64/kernel/numa.c +=================================================================== +--- linux-2.6.18.8.orig/arch/ia64/kernel/numa.c 2007-06-05 13:21:04.000000000 +0200 ++++ linux-2.6.18.8/arch/ia64/kernel/numa.c 2007-06-05 13:21:28.000000000 +0200 +@@ -28,6 +28,7 @@ u16 cpu_to_node_map[NR_CPUS] __cacheline + EXPORT_SYMBOL(cpu_to_node_map); + + cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; ++EXPORT_SYMBOL(node_to_cpu_mask); + + void __cpuinit map_cpu_to_node(int cpu, int nid) + { diff --git a/lustre/kernel_patches/patches/export_symbols-2.6-rhel4.patch b/lustre/kernel_patches/patches/export_symbols-2.6-rhel4.patch index 0561e65..2a08192 100644 --- a/lustre/kernel_patches/patches/export_symbols-2.6-rhel4.patch +++ b/lustre/kernel_patches/patches/export_symbols-2.6-rhel4.patch @@ -79,3 +79,16 @@ Index: linux-2.6.9-5.0.3.EL/fs/dcache.c void d_genocide(struct dentry *root) { +Index: linux-2.6.12-rc6/net/sunrpc/sunrpc_syms.c +=================================================================== +--- linux-2.6.12.orig/net/sunrpc/sunrpc_syms.c 2005-12-14 23:20:39.000000000 -0700 ++++ linux-2.6.12/net/sunrpc/sunrpc_syms.c 2005-12-14 23:21:47.000000000 -0700 +@@ -58,6 +58,8 @@ EXPORT_SYMBOL(rpc_unlink); + EXPORT_SYMBOL(rpc_wake_up); + EXPORT_SYMBOL(rpc_queue_upcall); + EXPORT_SYMBOL(rpc_mkpipe); ++EXPORT_SYMBOL(rpc_mkdir); ++EXPORT_SYMBOL(rpc_rmdir); + + /* Client transport */ + EXPORT_SYMBOL(xprt_create_proto); diff --git a/lustre/kernel_patches/patches/export_symbols-2.6.12.patch b/lustre/kernel_patches/patches/export_symbols-2.6.12.patch index e21fcf4..6521703 100644 --- a/lustre/kernel_patches/patches/export_symbols-2.6.12.patch +++ b/lustre/kernel_patches/patches/export_symbols-2.6.12.patch @@ -62,3 +62,16 @@ Index: linux-2.6.12-rc6/fs/dcache.c void d_genocide(struct dentry *root) { +Index: linux-2.6.12-rc6/net/sunrpc/sunrpc_syms.c +=================================================================== +--- linux-2.6.12.orig/net/sunrpc/sunrpc_syms.c 2005-12-14 23:20:39.000000000 -0700 ++++ linux-2.6.12/net/sunrpc/sunrpc_syms.c 2005-12-14 23:21:47.000000000 -0700 +@@ -58,6 +58,8 @@ EXPORT_SYMBOL(rpc_unlink); + EXPORT_SYMBOL(rpc_wake_up); + EXPORT_SYMBOL(rpc_queue_upcall); + EXPORT_SYMBOL(rpc_mkpipe); ++EXPORT_SYMBOL(rpc_mkdir); ++EXPORT_SYMBOL(rpc_rmdir); + + /* Client transport */ + EXPORT_SYMBOL(xprt_create_proto); diff --git a/lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch b/lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch deleted file mode 100644 index 294a9cd..0000000 --- a/lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch +++ /dev/null @@ -1,17 +0,0 @@ -Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-03 08:36:51.000000000 +0300 -+++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300 -@@ -19,9 +19,12 @@ - #ifdef __KERNEL__ - #include - #include -+#ifndef EXT_INCLUDE -+#define EXT_INCLUDE - #include - #include - #endif -+#endif - #include - - /* diff --git a/lustre/kernel_patches/patches/exports-2.4.21-chaos.patch b/lustre/kernel_patches/patches/exports-2.4.21-chaos.patch deleted file mode 100644 index 61a3f5c..0000000 --- a/lustre/kernel_patches/patches/exports-2.4.21-chaos.patch +++ /dev/null @@ -1,59 +0,0 @@ - fs/ext3/Makefile | 2 ++ - fs/ext3/super.c | 2 +- - include/linux/fs.h | 1 + - kernel/ksyms.c | 4 ++++ - 4 files changed, 8 insertions(+), 1 deletion(-) - -Index: 57chaos/fs/ext3/Makefile -=================================================================== ---- 57chaos.orig/fs/ext3/Makefile 2004-06-21 13:39:47.000000000 -0700 -+++ 57chaos/fs/ext3/Makefile 2004-06-21 13:59:16.000000000 -0700 -@@ -9,6 +9,8 @@ - - O_TARGET := ext3.o - -+export-objs := super.o inode.o -+ - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) -Index: 57chaos/fs/ext3/super.c -=================================================================== ---- 57chaos.orig/fs/ext3/super.c 2004-06-21 13:39:47.000000000 -0700 -+++ 57chaos/fs/ext3/super.c 2004-06-21 13:59:16.000000000 -0700 -@@ -1938,7 +1938,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_NO_SYMBOLS; -+EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); -Index: 57chaos/include/linux/fs.h -=================================================================== ---- 57chaos.orig/include/linux/fs.h 2004-06-21 13:39:47.000000000 -0700 -+++ 57chaos/include/linux/fs.h 2004-06-21 13:59:57.000000000 -0700 -@@ -1073,6 +1073,7 @@ extern int unregister_filesystem(struct - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); - extern long do_mount(char *, char *, char *, unsigned long, void *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - - #define kern_umount mntput - -Index: 57chaos/kernel/ksyms.c -=================================================================== ---- 57chaos.orig/kernel/ksyms.c 2004-06-21 13:39:47.000000000 -0700 -+++ 57chaos/kernel/ksyms.c 2004-06-21 13:59:16.000000000 -0700 -@@ -336,6 +336,10 @@ EXPORT_SYMBOL(dcache_dir_fsync); - EXPORT_SYMBOL(dcache_readdir); - EXPORT_SYMBOL(dcache_dir_ops); - -+/* lustre */ -+EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(do_kern_mount); -+ - /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ - EXPORT_SYMBOL(default_llseek); - EXPORT_SYMBOL(dentry_open); diff --git a/lustre/kernel_patches/patches/exports_2.4.19-suse.patch b/lustre/kernel_patches/patches/exports_2.4.19-suse.patch deleted file mode 100644 index feaeec6..0000000 --- a/lustre/kernel_patches/patches/exports_2.4.19-suse.patch +++ /dev/null @@ -1,53 +0,0 @@ - fs/ext3/Makefile | 2 ++ - fs/ext3/super.c | 2 +- - include/linux/fs.h | 1 + - kernel/ksyms.c | 4 ++++ - 4 files changed, 8 insertions(+), 1 deletion(-) - ---- linux/fs/ext3/Makefile~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/fs/ext3/Makefile Wed Apr 9 10:19:53 2003 -@@ -9,6 +9,8 @@ - - O_TARGET := ext3.o - -+export-objs := super.o inode.o -+ - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) ---- linux/fs/ext3/super.c~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/fs/ext3/super.c Wed Apr 9 10:19:53 2003 -@@ -1769,7 +1769,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_NO_SYMBOLS; -+EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); ---- linux/include/linux/fs.h~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/include/linux/fs.h Wed Apr 9 10:19:53 2003 -@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); - extern long do_mount(char *, char *, char *, unsigned long, void *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - - #define kern_umount mntput - ---- linux/kernel/ksyms.c~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/kernel/ksyms.c Wed Apr 9 10:19:53 2003 -@@ -308,6 +308,10 @@ EXPORT_SYMBOL(dcache_dir_fsync); - EXPORT_SYMBOL(dcache_readdir); - EXPORT_SYMBOL(dcache_dir_ops); - -+/* lustre */ -+EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(do_kern_mount); -+ - /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ - EXPORT_SYMBOL(default_llseek); - EXPORT_SYMBOL(dentry_open); - -_ diff --git a/lustre/kernel_patches/patches/exports_2.4.19-suse2.patch b/lustre/kernel_patches/patches/exports_2.4.19-suse2.patch deleted file mode 100644 index 41744b9..0000000 --- a/lustre/kernel_patches/patches/exports_2.4.19-suse2.patch +++ /dev/null @@ -1,59 +0,0 @@ - fs/ext3/Makefile | 2 ++ - fs/ext3/super.c | 2 +- - include/linux/fs.h | 1 + - kernel/ksyms.c | 4 ++++ - 4 files changed, 8 insertions(+), 1 deletion(-) - -Index: linux-2.4.19.SuSE/fs/ext3/Makefile -=================================================================== ---- linux-2.4.19.SuSE.orig/fs/ext3/Makefile 2004-04-29 16:18:08.000000000 -0700 -+++ linux-2.4.19.SuSE/fs/ext3/Makefile 2004-04-29 16:36:09.000000000 -0700 -@@ -9,6 +9,8 @@ - - O_TARGET := ext3.o - -+export-objs := super.o inode.o -+ - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) -Index: linux-2.4.19.SuSE/fs/ext3/super.c -=================================================================== ---- linux-2.4.19.SuSE.orig/fs/ext3/super.c 2004-04-29 16:18:08.000000000 -0700 -+++ linux-2.4.19.SuSE/fs/ext3/super.c 2004-04-29 16:36:09.000000000 -0700 -@@ -1821,7 +1821,7 @@ - exit_ext3_xattr(); - } - --EXPORT_NO_SYMBOLS; -+EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); -Index: linux-2.4.19.SuSE/include/linux/fs.h -=================================================================== ---- linux-2.4.19.SuSE.orig/include/linux/fs.h 2004-04-29 16:19:41.000000000 -0700 -+++ linux-2.4.19.SuSE/include/linux/fs.h 2004-04-29 16:36:52.000000000 -0700 -@@ -1174,6 +1174,7 @@ - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); - extern long do_mount(char *, char *, char *, unsigned long, void *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - extern void umount_tree(struct vfsmount *); - - #define kern_umount mntput -Index: linux-2.4.19.SuSE/kernel/ksyms.c -=================================================================== ---- linux-2.4.19.SuSE.orig/kernel/ksyms.c 2004-04-29 16:19:35.000000000 -0700 -+++ linux-2.4.19.SuSE/kernel/ksyms.c 2004-04-29 16:36:09.000000000 -0700 -@@ -330,6 +330,10 @@ - EXPORT_SYMBOL(dcache_readdir); - EXPORT_SYMBOL(dcache_dir_ops); - -+/* lustre */ -+EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(do_kern_mount); -+ - /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ - EXPORT_SYMBOL(default_llseek); - EXPORT_SYMBOL(dentry_open); diff --git a/lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch b/lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch deleted file mode 100644 index feaeec6..0000000 --- a/lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch +++ /dev/null @@ -1,53 +0,0 @@ - fs/ext3/Makefile | 2 ++ - fs/ext3/super.c | 2 +- - include/linux/fs.h | 1 + - kernel/ksyms.c | 4 ++++ - 4 files changed, 8 insertions(+), 1 deletion(-) - ---- linux/fs/ext3/Makefile~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/fs/ext3/Makefile Wed Apr 9 10:19:53 2003 -@@ -9,6 +9,8 @@ - - O_TARGET := ext3.o - -+export-objs := super.o inode.o -+ - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) ---- linux/fs/ext3/super.c~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/fs/ext3/super.c Wed Apr 9 10:19:53 2003 -@@ -1769,7 +1769,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_NO_SYMBOLS; -+EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); ---- linux/include/linux/fs.h~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/include/linux/fs.h Wed Apr 9 10:19:53 2003 -@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); - extern long do_mount(char *, char *, char *, unsigned long, void *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - - #define kern_umount mntput - ---- linux/kernel/ksyms.c~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/kernel/ksyms.c Wed Apr 9 10:19:53 2003 -@@ -308,6 +308,10 @@ EXPORT_SYMBOL(dcache_dir_fsync); - EXPORT_SYMBOL(dcache_readdir); - EXPORT_SYMBOL(dcache_dir_ops); - -+/* lustre */ -+EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(do_kern_mount); -+ - /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ - EXPORT_SYMBOL(default_llseek); - EXPORT_SYMBOL(dentry_open); - -_ diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1.patch deleted file mode 100644 index c5e1ee0..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-1.patch +++ /dev/null @@ -1,2536 +0,0 @@ - fs/ext3/Makefile | 2 - fs/ext3/dir.c | 299 +++++++++ - fs/ext3/file.c | 3 - fs/ext3/hash.c | 215 ++++++ - fs/ext3/namei.c | 1387 ++++++++++++++++++++++++++++++++++++++++----- - fs/ext3/super.c | 7 - include/linux/ext3_fs.h | 85 ++ - include/linux/ext3_fs_sb.h | 2 - include/linux/ext3_jbd.h | 2 - include/linux/rbtree.h | 2 - lib/rbtree.c | 42 + - 11 files changed, 1886 insertions(+), 160 deletions(-) - ---- linux-2.4.20/fs/ext3/Makefile~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/Makefile Sat Apr 5 03:57:05 2003 -@@ -12,7 +12,7 @@ O_TARGET := ext3.o - export-objs := super.o inode.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o hash.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make ---- linux-2.4.20/fs/ext3/dir.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/dir.c Sat Apr 5 03:56:31 2003 -@@ -21,12 +21,16 @@ - #include - #include - #include -+#include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK - }; - - static int ext3_readdir(struct file *, void *, filldir_t); -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); - - struct file_operations ext3_dir_operations = { - read: generic_read_dir, -@@ -35,6 +39,17 @@ struct file_operations ext3_dir_operatio - fsync: ext3_sync_file, /* BKL held */ - }; - -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3_filetype_table[filetype]); -+} -+ -+ - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, -@@ -79,6 +94,16 @@ static int ext3_readdir(struct file * fi - - sb = inode->i_sb; - -+ if (is_dx(inode)) { -+ err = ext3_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) -+ return err; -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; -+ } - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); -@@ -162,18 +187,12 @@ revalidate: - * during the copy operation. - */ - unsigned long version = filp->f_version; -- unsigned char d_type = DT_UNKNOWN; - -- if (EXT3_HAS_INCOMPAT_FEATURE(sb, -- EXT3_FEATURE_INCOMPAT_FILETYPE) -- && de->file_type < EXT3_FT_MAX) -- d_type = -- ext3_filetype_table[de->file_type]; - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), -- d_type); -+ get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) -@@ -188,3 +207,269 @@ revalidate: - UPDATE_ATIME(inode); - return 0; - } -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ rb_node_t rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(rb_root_t *root) -+{ -+ rb_node_t *n = root->rb_node; -+ rb_node_t *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = n->rb_parent; -+ fname = rb_entry(n, struct fname, rb_hash); -+ kfree(fname); -+ if (!parent) -+ root->rb_node = 0; -+ else if (parent->rb_left == n) -+ parent->rb_left = 0; -+ else if (parent->rb_right == n) -+ parent->rb_right = 0; -+ n = parent; -+ } -+ root->rb_node = 0; -+} -+ -+ -+struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = 0; -+ p->curr_node = 0; -+ p->extra_fname = 0; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent) -+{ -+ rb_node_t **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kmalloc(len, GFP_KERNEL); -+ memset(new_fn, 0, len); -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = 0; -+ info->extra_fname = 0; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_get_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = 0; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) -+ break; -+ info->curr_node = rb_get_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_get_next(info->curr_node); -+ if (!info->curr_node) { -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ UPDATE_ATIME(inode); -+ return 0; -+} -+#endif ---- linux-2.4.20/fs/ext3/file.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/file.c Sat Apr 5 03:56:31 2003 -@@ -35,6 +35,9 @@ static int ext3_release_file (struct ino - { - if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); -+ if (is_dx(inode) && filp->private_data) -+ ext3_htree_free_dir_info(filp->private_data); -+ - return 0; - } - ---- /dev/null Fri Aug 30 17:31:37 2002 -+++ linux-2.4.20-braam/fs/ext3/hash.c Sat Apr 5 03:56:31 2003 -@@ -0,0 +1,215 @@ -+/* -+ * linux/fs/ext3/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+/* F, G and H are basic MD4 functions: selection, majority, parity */ -+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -+#define H(x, y, z) ((x) ^ (y) ^ (z)) -+ -+/* -+ * The generic round function. The application is so specific that -+ * we don't bother protecting all the arguments with parens, as is generally -+ * good macro practice, in favor of extra legibility. -+ * Rotation is separate from addition to prevent recomputation -+ */ -+#define ROUND(f, a, b, c, d, x, s) \ -+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) -+#define K1 0 -+#define K2 013240474631UL -+#define K3 015666365641UL -+ -+/* -+ * Basic cut-down MD4 transform. Returns only 32 bits of result. -+ */ -+static void halfMD4Transform (__u32 buf[4], __u32 const in[]) -+{ -+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; -+ -+ /* Round 1 */ -+ ROUND(F, a, b, c, d, in[0] + K1, 3); -+ ROUND(F, d, a, b, c, in[1] + K1, 7); -+ ROUND(F, c, d, a, b, in[2] + K1, 11); -+ ROUND(F, b, c, d, a, in[3] + K1, 19); -+ ROUND(F, a, b, c, d, in[4] + K1, 3); -+ ROUND(F, d, a, b, c, in[5] + K1, 7); -+ ROUND(F, c, d, a, b, in[6] + K1, 11); -+ ROUND(F, b, c, d, a, in[7] + K1, 19); -+ -+ /* Round 2 */ -+ ROUND(G, a, b, c, d, in[1] + K2, 3); -+ ROUND(G, d, a, b, c, in[3] + K2, 5); -+ ROUND(G, c, d, a, b, in[5] + K2, 9); -+ ROUND(G, b, c, d, a, in[7] + K2, 13); -+ ROUND(G, a, b, c, d, in[0] + K2, 3); -+ ROUND(G, d, a, b, c, in[2] + K2, 5); -+ ROUND(G, c, d, a, b, in[4] + K2, 9); -+ ROUND(G, b, c, d, a, in[6] + K2, 13); -+ -+ /* Round 3 */ -+ ROUND(H, a, b, c, d, in[3] + K3, 3); -+ ROUND(H, d, a, b, c, in[7] + K3, 9); -+ ROUND(H, c, d, a, b, in[2] + K3, 11); -+ ROUND(H, b, c, d, a, in[6] + K3, 15); -+ ROUND(H, a, b, c, d, in[1] + K3, 3); -+ ROUND(H, d, a, b, c, in[5] + K3, 9); -+ ROUND(H, c, d, a, b, in[0] + K3, 11); -+ ROUND(H, b, c, d, a, in[4] + K3, 15); -+ -+ buf[0] += a; -+ buf[1] += b; -+ buf[2] += c; -+ buf[3] += d; -+} -+ -+#undef ROUND -+#undef F -+#undef G -+#undef H -+#undef K1 -+#undef K2 -+#undef K3 -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ halfMD4Transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hinfo->hash = hash & ~1; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} ---- linux-2.4.20/fs/ext3/namei.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/namei.c Sat Apr 5 03:56:31 2003 -@@ -16,6 +16,12 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 - */ - - #include -@@ -38,6 +44,630 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash); -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err); -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ if (dentry) -+ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will return -1. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int num_frames = 0; -+ __u32 bhash; -+ -+ *err = ENOENT; -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), -+ 0, err))) -+ return -1; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -+{ -+ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct buffer_head *bh; -+ struct ext3_dir_entry_2 *de, *top; -+ static struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_dentry->d_inode; -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ dxtrace(printk("Reading block %d\n", block)); -+ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) -+ goto errout; -+ -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) { -+ ext3fs_dirhash(de->name, de->name_len, &hinfo); -+ if ((hinfo.hash < start_hash) || -+ ((hinfo.hash == start_hash) && -+ (hinfo.minor_hash < start_minor_hash))) -+ continue; -+ ext3_htree_store_dirent(dir_file, hinfo.hash, -+ hinfo.minor_hash, de); -+ count++; -+ } -+ brelse (bh); -+ hashval = ~1; -+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &err, &hashval); -+ if (next_hash) -+ *next_hash = hashval; -+ if (ret == -1) -+ goto errout; -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries\n", count)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -94,6 +724,7 @@ static int inline search_dirblock(struct - return 0; - } - -+ - /* - * ext3_find_entry() - * -@@ -105,6 +736,8 @@ static int inline search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -119,12 +752,32 @@ static struct buffer_head * ext3_find_en - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; -- -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ bh = ext3_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); -+ } -+#endif - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -165,7 +818,7 @@ restart: - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -196,6 +849,74 @@ cleanup_and_exit: - return ret; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ /* NFS may look up ".." - look at dx_root directory block */ -+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -+ if (!(frame = dx_probe(dentry, 0, &hinfo, frames, err))) -+ return NULL; -+ } else { -+ frame = frames; -+ frame->bh = NULL; /* for dx_release() */ -+ frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ -+ dx_set_block(frame->at, 0); /* dx_root block is 0 */ -+ } -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *)((char *)de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match (namelen, name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3_htree_next_block(dir, hash, frame, -+ frames, err, 0); -+ if (retval == -1) { -+ ext3_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -212,8 +925,9 @@ static struct dentry *ext3_lookup(struct - brelse (bh); - inode = iget(dir->i_sb, ino); - -- if (!inode) -+ if (!inode) { - return ERR_PTR(-EACCES); -+ } - } - d_add(dentry, inode); - return NULL; -@@ -237,6 +951,301 @@ static inline void ext3_set_de_type(stru - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct ext3_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len); -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = cpu_to_le16(rec_len); -+ prev = to; -+ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match (namelen, name, de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *)&root->dotdot; -+ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len)); -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3_dir_entry_2 *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ ext3fs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ - /* - * ext3_add_entry() - * -@@ -247,127 +1255,198 @@ static inline void ext3_set_de_type(stru - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -- --/* -- * AKPM: the journalling code here looks wrong on the error paths -- */ - static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -+ struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; -+#ifdef CONFIG_EXT3_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ unsigned nlen, rlen; -+ u32 block, blocks; - - sb = dir->i_sb; -- -- if (!namelen) -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ retval = ext3_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)) -+ return retval; -+ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; -+ dx_fallback++; -+ ext3_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; - de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -- } -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = blocksize); -+ nlen = 0; -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} - -- ext3_debug ("creating next block\n"); -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3_dir_entry_2 *de; -+ int err; - -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -- } else { -+ frame = dx_probe(dentry, 0, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; - -- ext3_debug ("skipping to next block\n"); -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; - -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- } -- } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -- brelse (bh); -- return -EEXIST; -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (err != -ENOSPC) { -+ bh = 0; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3_warning(sb, __FUNCTION__, -+ "Directory index full!\n"); -+ err = -ENOSPC; -+ goto cleanup; - } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -- brelse(bh); -- return 0; -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- brelse (bh); -- return -ENOSPC; -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = 0; -+ goto cleanup; -+ -+journal_error: -+ ext3_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; - } -+#endif - - /* - * ext3_delete_entry deletes a directory entry by merging it with the -@@ -451,9 +1530,11 @@ static int ext3_create (struct inode * d - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -478,9 +1559,11 @@ static int ext3_mknod (struct inode * di - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -507,9 +1590,11 @@ static int ext3_mkdir(struct inode * dir - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -521,7 +1606,7 @@ static int ext3_mkdir(struct inode * dir - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -@@ -554,21 +1639,19 @@ static int ext3_mkdir(struct inode * dir - inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); -- if (err) -- goto out_no_entry; -+ if (err) { -+ inode->i_nlink = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- inode->i_nlink = 0; -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - /* -@@ -655,7 +1738,7 @@ int ext3_orphan_add(handle_t *handle, st - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -696,7 +1779,7 @@ int ext3_orphan_add(handle_t *handle, st - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -714,25 +1797,26 @@ out_unlock: - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - ino_t ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del(&ei->i_orphan); -+ INIT_LIST_HEAD(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -793,8 +1877,9 @@ static int ext3_rmdir (struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); -@@ -832,7 +1917,7 @@ static int ext3_rmdir (struct inode * di - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -850,8 +1935,9 @@ static int ext3_unlink(struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -878,7 +1964,7 @@ static int ext3_unlink(struct inode * di - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -904,9 +1990,11 @@ static int ext3_symlink (struct inode * - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -916,7 +2004,7 @@ static int ext3_symlink (struct inode * - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof (EXT3_I(inode)->i_data)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* -@@ -925,25 +2013,23 @@ static int ext3_symlink (struct inode * - * i_size in generic_commit_write(). - */ - err = block_symlink(inode, symname, l); -- if (err) -- goto out_no_entry; -+ if (err) { -+ ext3_dec_count(handle, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - ext3_mark_inode_dirty(handle, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- ext3_dec_count(handle, inode); -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - static int ext3_link (struct dentry * old_dentry, -@@ -956,12 +2042,15 @@ static int ext3_link (struct dentry * ol - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (inode->i_nlink >= EXT3_LINK_MAX) { - return -EMLINK; -+ } - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -995,9 +2084,11 @@ static int ext3_rename (struct inode * o - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) - handle->h_sync = 1; -@@ -1077,7 +2168,7 @@ static int ext3_rename (struct inode * o - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +2180,7 @@ static int ext3_rename (struct inode * o - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } ---- linux-2.4.20/fs/ext3/super.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/super.c Sat Apr 5 03:56:31 2003 -@@ -707,6 +707,7 @@ static int ext3_setup_super(struct super - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO -@@ -717,6 +718,7 @@ static int ext3_setup_super(struct super - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); -+ - printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", - bdevname(sb->s_dev)); - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { -@@ -890,6 +892,7 @@ static loff_t ext3_max_size(int bits) - return res; - } - -+ - struct super_block * ext3_read_super (struct super_block * sb, void * data, - int silent) - { -@@ -1066,6 +1069,9 @@ struct super_block * ext3_read_super (st - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR -@@ -1769,6 +1775,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - -+EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); ---- linux-2.4.20/include/linux/ext3_fs.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/ext3_fs.h Sat Apr 5 03:56:31 2003 -@@ -40,6 +40,11 @@ - #define EXT3FS_VERSION "2.4-0.9.19" - - /* -+ * Always enable hashed directories -+ */ -+#define CONFIG_EXT3_INDEX -+ -+/* - * Debug code - */ - #ifdef EXT3FS_DEBUG -@@ -437,8 +442,11 @@ struct ext3_super_block { - /*E0*/ __u32 s_journal_inum; /* inode number of journal file */ - __u32 s_journal_dev; /* device number of journal file */ - __u32 s_last_orphan; /* start of list of inodes to delete */ -- --/*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ -+ __u32 s_hash_seed[4]; /* HTREE hash seed */ -+ __u8 s_def_hash_version; /* Default hash version to use */ -+ __u8 s_reserved_char_pad; -+ __u16 s_reserved_word_pad; -+ __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -575,9 +583,46 @@ struct ext3_dir_entry_2 { - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3_INDEX -+ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; - - #ifdef __KERNEL__ - /* -+ * Control parameters used by ext3_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* - * Describe an inode's exact location on disk and in memory - */ - struct ext3_iloc -@@ -587,6 +632,27 @@ struct ext3_iloc - unsigned long block_group; - }; - -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ rb_root_t root; -+ rb_node_t *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ - /* - * Function prototypes - */ -@@ -614,11 +680,20 @@ extern struct ext3_group_desc * ext3_get - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -- struct ext3_dir_entry_2 *, struct buffer_head *, -- unsigned long); -+ struct ext3_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent); -+extern void ext3_htree_free_dir_info(struct dir_private_info *p); -+ - /* fsync.c */ - extern int ext3_sync_file (struct file *, struct dentry *, int); - -+/* hash.c */ -+extern int ext3fs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); -@@ -650,6 +725,8 @@ extern int ext3_ioctl (struct inode *, s - /* namei.c */ - extern int ext3_orphan_add(handle_t *, struct inode *); - extern int ext3_orphan_del(handle_t *, struct inode *); -+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) ---- linux-2.4.20/include/linux/ext3_fs_sb.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/ext3_fs_sb.h Sat Apr 5 03:56:31 2003 -@@ -62,6 +62,8 @@ struct ext3_sb_info { - int s_inode_size; - int s_first_ino; - u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; - - /* Journaling */ - struct inode * s_journal_inode; ---- linux-2.4.20/include/linux/ext3_jbd.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/ext3_jbd.h Sat Apr 5 03:56:31 2003 -@@ -63,6 +63,8 @@ extern int ext3_writepage_trans_blocks(s - - #define EXT3_RESERVE_TRANS_BLOCKS 12 - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, ---- linux-2.4.20/include/linux/rbtree.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/rbtree.h Sat Apr 5 03:56:31 2003 -@@ -120,6 +120,8 @@ rb_root_t; - - extern void rb_insert_color(rb_node_t *, rb_root_t *); - extern void rb_erase(rb_node_t *, rb_root_t *); -+extern rb_node_t *rb_get_first(rb_root_t *root); -+extern rb_node_t *rb_get_next(rb_node_t *n); - - static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) - { ---- linux-2.4.20/lib/rbtree.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/lib/rbtree.c Sat Apr 5 03:56:31 2003 -@@ -17,6 +17,8 @@ - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -+ -+ rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002 - */ - - #include -@@ -294,3 +296,43 @@ void rb_erase(rb_node_t * node, rb_root_ - __rb_erase_color(child, parent, root); - } - EXPORT_SYMBOL(rb_erase); -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+rb_node_t *rb_get_first(rb_root_t *root) -+{ -+ rb_node_t *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return 0; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+EXPORT_SYMBOL(rb_get_first); -+ -+/* -+ * Given a node, this function will return the next node in the tree. -+ */ -+rb_node_t *rb_get_next(rb_node_t *n) -+{ -+ rb_node_t *parent; -+ -+ if (n->rb_right) { -+ n = n->rb_right; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+ } else { -+ while ((parent = n->rb_parent)) { -+ if (n == parent->rb_left) -+ return parent; -+ n = parent; -+ } -+ return 0; -+ } -+} -+EXPORT_SYMBOL(rb_get_next); -+ - -_ diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-2.patch b/lustre/kernel_patches/patches/ext-2.4-patch-2.patch deleted file mode 100644 index 689d33b..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-2.patch +++ /dev/null @@ -1,34 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# -# namei.c | 9 +++++++++ -# 1 files changed, 9 insertions(+) -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/11/07 tytso@snap.thunk.org 1.777 -# Add '.' and '..' entries to be returned by readdir of htree directories -# -# This patch from Chris Li adds '.' and '..' to the rbtree so that they -# are properly returned by readdir. -# -------------------------------------------- -# -diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c ---- a/fs/ext3/namei.c Thu Nov 7 10:57:30 2002 -+++ b/fs/ext3/namei.c Thu Nov 7 10:57:30 2002 -@@ -546,6 +546,15 @@ - if (!frame) - return err; - -+ /* Add '.' and '..' from the htree header */ -+ if (!start_hash && !start_minor_hash) { -+ de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -+ ext3_htree_store_dirent(dir_file, 0, 0, de); -+ de = ext3_next_entry(de); -+ ext3_htree_store_dirent(dir_file, 0, 0, de); -+ count += 2; -+ } -+ - while (1) { - block = dx_get_block(frame->at); - dxtrace(printk("Reading block %d\n", block)); diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-3.patch b/lustre/kernel_patches/patches/ext-2.4-patch-3.patch deleted file mode 100644 index 2600b02..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-3.patch +++ /dev/null @@ -1,96 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# -# fs/ext3/dir.c | 7 +++++-- -# fs/ext3/namei.c | 11 +++++++---- -# include/linux/ext3_fs.h | 2 +- -# 3 files changed, 13 insertions(+), 7 deletions(-) -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/11/07 tytso@snap.thunk.org 1.778 -# Check for failed kmalloc() in ext3_htree_store_dirent() -# -# This patch checks for a failed kmalloc() in ext3_htree_store_dirent(), -# and passes the error up to its caller, ext3_htree_fill_tree(). -# -------------------------------------------- -# -diff -Nru a/fs/ext3/dir.c b/fs/ext3/dir.c ---- a/fs/ext3/dir.c Thu Nov 7 10:57:34 2002 -+++ b/fs/ext3/dir.c Thu Nov 7 10:57:34 2002 -@@ -308,7 +308,7 @@ - /* - * Given a directory entry, enter it into the fname rb tree. - */ --void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext3_dir_entry_2 *dirent) - { -@@ -323,6 +323,8 @@ - /* Create and allocate the fname structure */ - len = sizeof(struct fname) + dirent->name_len + 1; - new_fn = kmalloc(len, GFP_KERNEL); -+ if (!new_fn) -+ return -ENOMEM; - memset(new_fn, 0, len); - new_fn->hash = hash; - new_fn->minor_hash = minor_hash; -@@ -344,7 +346,7 @@ - (new_fn->minor_hash == fname->minor_hash)) { - new_fn->next = fname->next; - fname->next = new_fn; -- return; -+ return 0; - } - - if (new_fn->hash < fname->hash) -@@ -359,6 +361,7 @@ - - rb_link_node(&new_fn->rb_hash, parent, p); - rb_insert_color(&new_fn->rb_hash, &info->root); -+ return 0; - } - - -diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c ---- a/fs/ext3/namei.c Thu Nov 7 10:57:34 2002 -+++ b/fs/ext3/namei.c Thu Nov 7 10:57:34 2002 -@@ -549,9 +549,11 @@ - /* Add '.' and '..' from the htree header */ - if (!start_hash && !start_minor_hash) { - de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -- ext3_htree_store_dirent(dir_file, 0, 0, de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; - de = ext3_next_entry(de); -- ext3_htree_store_dirent(dir_file, 0, 0, de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; - count += 2; - } - -@@ -570,8 +572,9 @@ - ((hinfo.hash == start_hash) && - (hinfo.minor_hash < start_minor_hash))) - continue; -- ext3_htree_store_dirent(dir_file, hinfo.hash, -- hinfo.minor_hash, de); -+ if ((err = ext3_htree_store_dirent(dir_file, -+ hinfo.hash, hinfo.minor_hash, de)) != 0) -+ goto errout; - count++; - } - brelse (bh); -diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h ---- a/include/linux/ext3_fs.h Thu Nov 7 10:57:34 2002 -+++ b/include/linux/ext3_fs.h Thu Nov 7 10:57:34 2002 -@@ -682,7 +682,7 @@ - extern int ext3_check_dir_entry(const char *, struct inode *, - struct ext3_dir_entry_2 *, - struct buffer_head *, unsigned long); --extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext3_dir_entry_2 *dirent); - extern void ext3_htree_free_dir_info(struct dir_private_info *p); diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-4.patch b/lustre/kernel_patches/patches/ext-2.4-patch-4.patch deleted file mode 100644 index 4c8d4fa..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-4.patch +++ /dev/null @@ -1,52 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# -# namei.c | 21 ++++++++++++++++++++- -# 1 files changed, 20 insertions(+), 1 deletion(-) -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/11/07 tytso@snap.thunk.org 1.779 -# Fix ext3 htree rename bug. -# -# This fixes an ext3 htree bug pointed out by Christopher Li; if -# adding the new name to the directory causes a split, this can cause -# the directory entry containing the old name to move to another -# block, and then the removal of the old name will fail. -# -------------------------------------------- -# -diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c ---- a/fs/ext3/namei.c Thu Nov 7 10:57:49 2002 -+++ b/fs/ext3/namei.c Thu Nov 7 10:57:49 2002 -@@ -2173,7 +2173,30 @@ - /* - * ok, that's it - */ -- ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ if (le32_to_cpu(old_de->inode) != old_inode->i_ino || -+ old_de->name_len != old_dentry->d_name.len || -+ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || -+ (retval = ext3_delete_entry(handle, old_dir, -+ old_de, old_bh)) == -ENOENT) { -+ /* old_de could have moved from under us during htree split, so -+ * make sure that we are deleting the right entry. We might -+ * also be pointing to a stale entry in the unused part of -+ * old_bh so just checking inum and the name isn't enough. */ -+ struct buffer_head *old_bh2; -+ struct ext3_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3_delete_entry(handle, old_dir, -+ old_de2, old_bh2); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3_warning(old_dir->i_sb, "ext3_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } - - if (new_inode) { - new_inode->i_nlink--; diff --git a/lustre/kernel_patches/patches/ext3-2.4-ino_t.patch b/lustre/kernel_patches/patches/ext3-2.4-ino_t.patch deleted file mode 100644 index 1786d0f..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4-ino_t.patch +++ /dev/null @@ -1,144 +0,0 @@ - fs/ext3/ialloc.c | 20 ++++++++++---------- - fs/ext3/namei.c | 16 ++++++++-------- - include/linux/ext3_fs.h | 2 +- - 3 files changed, 19 insertions(+), 19 deletions(-) - ---- linux-2.4.20/fs/ext3/ialloc.c~ext3-2.4-ino_t 2003-04-08 23:35:24.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/ialloc.c 2003-04-08 23:35:24.000000000 -0600 -@@ -65,8 +65,8 @@ static int read_inode_bitmap (struct sup - if (!bh) { - ext3_error (sb, "read_inode_bitmap", - "Cannot read inode bitmap - " -- "block_group = %lu, inode_bitmap = %lu", -- block_group, (unsigned long) gdp->bg_inode_bitmap); -+ "block_group = %lu, inode_bitmap = %u", -+ block_group, gdp->bg_inode_bitmap); - retval = -EIO; - } - /* -@@ -533,19 +533,19 @@ out: - } - - /* Verify that we are loading a valid orphan from disk */ --struct inode *ext3_orphan_get (struct super_block * sb, ino_t ino) -+struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) - { -- ino_t max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); -+ unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); - unsigned long block_group; - int bit; - int bitmap_nr; - struct buffer_head *bh; - struct inode *inode = NULL; -- -+ - /* Error cases - e2fsck has already cleaned up for us */ - if (ino > max_ino) { - ext3_warning(sb, __FUNCTION__, -- "bad orphan ino %ld! e2fsck was run?\n", ino); -+ "bad orphan ino %lu! e2fsck was run?\n", ino); - return NULL; - } - -@@ -554,7 +554,7 @@ struct inode *ext3_orphan_get (struct su - if ((bitmap_nr = load_inode_bitmap(sb, block_group)) < 0 || - !(bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr])) { - ext3_warning(sb, __FUNCTION__, -- "inode bitmap error for orphan %ld\n", ino); -+ "inode bitmap error for orphan %lu\n", ino); - return NULL; - } - -@@ -565,16 +565,16 @@ struct inode *ext3_orphan_get (struct su - if (!ext3_test_bit(bit, bh->b_data) || !(inode = iget(sb, ino)) || - is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) { - ext3_warning(sb, __FUNCTION__, -- "bad orphan inode %ld! e2fsck was run?\n", ino); -+ "bad orphan inode %lu! e2fsck was run?\n", ino); - printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%ld) = %d\n", - bit, bh->b_blocknr, ext3_test_bit(bit, bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); -- printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%d\n", -+ printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); -- printk(KERN_NOTICE "max_ino=%ld\n", max_ino); -+ printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } - /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) ---- linux-2.4.20/fs/ext3/namei.c~ext3-2.4-ino_t 2003-04-08 23:35:24.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-04-08 23:35:24.000000000 -0600 -@@ -1808,10 +1808,10 @@ int ext3_orphan_del(handle_t *handle, st - struct list_head *prev; - struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; -- ino_t ino_next; -+ unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; -- -+ - lock_super(inode->i_sb); - if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); -@@ -1822,7 +1822,7 @@ int ext3_orphan_del(handle_t *handle, st - prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - -- jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino); -+ jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - - list_del(&ei->i_orphan); - INIT_LIST_HEAD(&ei->i_orphan); -@@ -1833,13 +1833,13 @@ int ext3_orphan_del(handle_t *handle, st - * list in memory. */ - if (!handle) - goto out; -- -+ - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_err; - - if (prev == &sbi->s_orphan) { -- jbd_debug(4, "superblock will point to %ld\n", ino_next); -+ jbd_debug(4, "superblock will point to %lu\n", ino_next); - BUFFER_TRACE(sbi->s_sbh, "get_write_access"); - err = ext3_journal_get_write_access(handle, sbi->s_sbh); - if (err) -@@ -1850,8 +1850,8 @@ int ext3_orphan_del(handle_t *handle, st - struct ext3_iloc iloc2; - struct inode *i_prev = - list_entry(prev, struct inode, u.ext3_i.i_orphan); -- -- jbd_debug(4, "orphan inode %ld will point to %ld\n", -+ -+ jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); - err = ext3_reserve_inode_write(handle, i_prev, &iloc2); - if (err) -@@ -1866,7 +1866,7 @@ int ext3_orphan_del(handle_t *handle, st - if (err) - goto out_brelse; - --out_err: -+out_err: - ext3_std_error(inode->i_sb, err); - out: - unlock_super(inode->i_sb); ---- linux-2.4.20/include/linux/ext3_fs.h~ext3-2.4-ino_t 2003-04-08 23:35:24.000000000 -0600 -+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-04-08 23:35:24.000000000 -0600 -@@ -673,7 +673,7 @@ extern int ext3fs_dirhash(const char *na - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); --extern struct inode * ext3_orphan_get (struct super_block *, ino_t); -+extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); - extern void ext3_check_inodes_bitmap (struct super_block *); - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - -_ diff --git a/lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch b/lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch deleted file mode 100644 index 5f566de..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch +++ /dev/null @@ -1,118 +0,0 @@ - - - - fs/ext3/balloc.c | 53 +++++++++++++++++++++++++++++++---------------------- - 1 files changed, 31 insertions(+), 22 deletions(-) - ---- linux-2.4.20/fs/ext3/balloc.c~ext3-2.4.20-fixes 2003-04-08 23:35:17.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/balloc.c 2003-04-08 23:35:17.000000000 -0600 -@@ -276,7 +276,8 @@ void ext3_free_blocks (handle_t *handle, - } - lock_super (sb); - es = sb->u.ext3_sb.s_es; -- if (block < le32_to_cpu(es->s_first_data_block) || -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || - (block + count) > le32_to_cpu(es->s_blocks_count)) { - ext3_error (sb, "ext3_free_blocks", - "Freeing blocks not in datazone - " -@@ -309,17 +310,6 @@ do_more: - if (!gdp) - goto error_return; - -- if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -- in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -- in_range (block, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group) || -- in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group)) -- ext3_error (sb, "ext3_free_blocks", -- "Freeing blocks in system zones - " -- "Block = %lu, count = %lu", -- block, count); -- - /* - * We are about to start releasing blocks in the bitmap, - * so we need undo access. -@@ -345,14 +335,24 @@ do_more: - if (err) - goto error_return; - -- for (i = 0; i < count; i++) { -+ for (i = 0; i < count; i++, block++) { -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) { -+ ext3_error(sb, __FUNCTION__, -+ "Freeing block in system zone - block = %lu", -+ block); -+ continue; -+ } -+ - /* - * An HJ special. This is expensive... - */ - #ifdef CONFIG_JBD_DEBUG - { - struct buffer_head *debug_bh; -- debug_bh = sb_get_hash_table(sb, block + i); -+ debug_bh = sb_get_hash_table(sb, block); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "Deleted!"); - if (!bh2jh(bitmap_bh)->b_committed_data) -@@ -365,9 +365,8 @@ do_more: - #endif - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) { -- ext3_error (sb, __FUNCTION__, -- "bit already cleared for block %lu", -- block + i); -+ ext3_error(sb, __FUNCTION__, -+ "bit already cleared for block %lu", block); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - dquot_freed_blocks++; -@@ -415,7 +414,6 @@ do_more: - if (!err) err = ret; - - if (overflow && !err) { -- block += count; - count = overflow; - goto do_more; - } -@@ -576,6 +574,7 @@ int ext3_new_block (handle_t *handle, st - - ext3_debug ("goal=%lu.\n", goal); - -+repeat: - /* - * First, test whether the goal block is free. - */ -@@ -684,10 +683,20 @@ got_block: - if (tmp == le32_to_cpu(gdp->bg_block_bitmap) || - tmp == le32_to_cpu(gdp->bg_inode_bitmap) || - in_range (tmp, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group)) -- ext3_error (sb, "ext3_new_block", -- "Allocating block in system zone - " -- "block = %u", tmp); -+ EXT3_SB(sb)->s_itb_per_group)) { -+ ext3_error(sb, __FUNCTION__, -+ "Allocating block in system zone - block = %u", tmp); -+ -+ /* Note: This will potentially use up one of the handle's -+ * buffer credits. Normally we have way too many credits, -+ * so that is OK. In _very_ rare cases it might not be OK. -+ * We will trigger an assertion if we run out of credits, -+ * and we will have to do a full fsck of the filesystem - -+ * better than randomly corrupting filesystem metadata. -+ */ -+ ext3_set_bit(j, bh->b_data); -+ goto repeat; -+ } - - /* The superblock lock should guard against anybody else beating - * us to this point! */ - -_ diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch new file mode 100644 index 0000000..3724fd9 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch @@ -0,0 +1,83 @@ +Index: linux-stage/include/linux/ext3_fs.h +=================================================================== +--- linux-stage.orig/include/linux/ext3_fs.h ++++ linux-stage/include/linux/ext3_fs.h +@@ -921,6 +921,7 @@ extern unsigned ext3_list_backups(struct + unsigned *five, unsigned *seven); + + /* super.c */ ++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); + extern void ext3_error (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); + extern void __ext3_std_error (struct super_block *, const char *, int); +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c ++++ linux-stage/fs/ext3/super.c +@@ -47,9 +47,6 @@ static int ext3_load_journal(struct supe + unsigned long journal_devnum); + static int ext3_create_journal(struct super_block *, struct ext3_super_block *, + int); +-static void ext3_commit_super (struct super_block * sb, +- struct ext3_super_block * es, +- int sync); + static void ext3_mark_recovery_complete(struct super_block * sb, + struct ext3_super_block * es); + static void ext3_clear_journal_err(struct super_block * sb, +@@ -2175,7 +2172,7 @@ static int ext3_create_journal(struct su + return 0; + } + +-static void ext3_commit_super (struct super_block * sb, ++void ext3_commit_super (struct super_block * sb, + struct ext3_super_block * es, + int sync) + { +Index: linux-stage/fs/ext3/namei.c +=================================================================== +--- linux-stage.orig/fs/ext3/namei.c ++++ linux-stage/fs/ext3/namei.c +@@ -1591,7 +1591,7 @@ static int ext3_delete_entry (handle_t * + struct buffer_head * bh) + { + struct ext3_dir_entry_2 * de, * pde; +- int i; ++ int i, err; + + i = 0; + pde = NULL; +@@ -1601,7 +1601,9 @@ static int ext3_delete_entry (handle_t * + return -EIO; + if (de == de_del) { + BUFFER_TRACE(bh, "get_write_access"); +- ext3_journal_get_write_access(handle, bh); ++ err = ext3_journal_get_write_access(handle, bh); ++ if (err) ++ return err; + if (pde) + pde->rec_len = + cpu_to_le16(le16_to_cpu(pde->rec_len) + +Index: linux-stage/fs/ext3/inode.c +=================================================================== +--- linux-stage.orig/fs/ext3/inode.c ++++ linux-stage/fs/ext3/inode.c +@@ -1838,8 +1838,18 @@ ext3_clear_blocks(handle_t *handle, stru + ext3_mark_inode_dirty(handle, inode); + ext3_journal_test_restart(handle, inode); + if (bh) { ++ int err; + BUFFER_TRACE(bh, "retaking write access"); +- ext3_journal_get_write_access(handle, bh); ++ err = ext3_journal_get_write_access(handle, bh); ++ if (err) { ++ struct super_block *sb = inode->i_sb; ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ printk (KERN_CRIT"EXT3-fs: can't continue truncate\n"); ++ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; ++ es->s_state |= cpu_to_le16(EXT3_ERROR_FS); ++ ext3_commit_super(sb, es, 1); ++ return; ++ } + } + } + diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch deleted file mode 100644 index e54774f..0000000 --- a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch +++ /dev/null @@ -1,113 +0,0 @@ -Index: linux-2.6.5-7.201-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-7.201-full.orig/include/linux/ext3_fs.h 2006-08-09 17:59:34.000000000 +0400 -+++ linux-2.6.5-7.201-full/include/linux/ext3_fs.h 2006-08-22 12:35:55.000000000 +0400 -@@ -793,6 +793,7 @@ extern void ext3_put_super (struct super - extern void ext3_write_super (struct super_block *); - extern void ext3_write_super_lockfs (struct super_block *); - extern void ext3_unlockfs (struct super_block *); -+extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); - extern int ext3_remount (struct super_block *, int *, char *); - extern int ext3_statfs (struct super_block *, struct kstatfs *); - -Index: linux-2.6.5-7.201-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.5-7.201-full.orig/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400 -+++ linux-2.6.5-7.201-full/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400 -@@ -39,7 +39,7 @@ - static int ext3_load_journal(struct super_block *, struct ext3_super_block *); - static int ext3_create_journal(struct super_block *, struct ext3_super_block *, - int); --static void ext3_commit_super (struct super_block * sb, -+void ext3_commit_super (struct super_block * sb, - struct ext3_super_block * es, - int sync); - static void ext3_mark_recovery_complete(struct super_block * sb, -@@ -1781,7 +1781,7 @@ static int ext3_create_journal(struct su - return 0; - } - --static void ext3_commit_super (struct super_block * sb, -+void ext3_commit_super (struct super_block * sb, - struct ext3_super_block * es, - int sync) - { -Index: linux-2.6.5-7.201-full/fs/ext3/namei.c -=================================================================== ---- linux-2.6.5-7.201-full.orig/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400 -+++ linux-2.6.5-7.201-full/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400 -@@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t * - struct buffer_head * bh) - { - struct ext3_dir_entry_2 * de, * pde; -- int i; -+ int i, err; - - i = 0; - pde = NULL; -@@ -1608,7 +1608,9 @@ static int ext3_delete_entry (handle_t * - return -EIO; - if (de == de_del) { - BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ return err; - if (pde) - pde->rec_len = - cpu_to_le16(le16_to_cpu(pde->rec_len) + -Index: linux-2.6.5-7.201-full/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.5-7.201-full.orig/fs/ext3/xattr.c 2006-07-14 01:53:23.000000000 +0400 -+++ linux-2.6.5-7.201-full/fs/ext3/xattr.c 2006-08-09 17:59:37.000000000 +0400 -@@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru - { - int error = -EINVAL; - -- if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ if (name_index > 0 && name_index < EXT3_XATTR_INDEX_MAX) { - write_lock(&ext3_handler_lock); - if (!ext3_xattr_handlers[name_index-1]) { - ext3_xattr_handlers[name_index-1] = handler; -Index: linux-2.6.5-7.201-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.5-7.201-full.orig/fs/ext3/inode.c 2006-07-14 01:53:22.000000000 +0400 -+++ linux-2.6.5-7.201-full/fs/ext3/inode.c 2006-08-22 12:35:28.000000000 +0400 -@@ -1517,9 +1517,14 @@ out_stop: - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); -- err = ext3_mark_inode_dirty(handle, inode); -- if (!ret) -- ret = err; -+ /* -+ * We're going to return a positive `ret' -+ * here due to non-zero-length I/O, so there's -+ * no way of reporting error returns from -+ * ext3_mark_inode_dirty() to userspace. So -+ * ignore it. -+ */ -+ ext3_mark_inode_dirty(handle, inode); - } - } - err = ext3_journal_stop(handle); -@@ -1811,8 +1816,18 @@ ext3_clear_blocks(handle_t *handle, stru - ext3_mark_inode_dirty(handle, inode); - ext3_journal_test_restart(handle, inode); - if (bh) { -+ int err; - BUFFER_TRACE(bh, "retaking write access"); -- ext3_journal_get_write_access(handle, bh); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ struct super_block *sb = inode->i_sb; -+ struct ext3_super_block *es = EXT3_SB(sb)->s_es; -+ printk (KERN_CRIT"EXT3-fs: can't continue truncate\n"); -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; -+ es->s_state |= cpu_to_le16(EXT3_ERROR_FS); -+ ext3_commit_super(sb, es, 1); -+ return; -+ } - } - } - diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch deleted file mode 100644 index f6904f2..0000000 --- a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch +++ /dev/null @@ -1,113 +0,0 @@ -Index: linux-2.6.9-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-08-09 17:56:39.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-08-22 12:36:22.000000000 +0400 -@@ -826,6 +826,7 @@ extern void ext3_put_super (struct super - extern void ext3_write_super (struct super_block *); - extern void ext3_write_super_lockfs (struct super_block *); - extern void ext3_unlockfs (struct super_block *); -+extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); - extern int ext3_remount (struct super_block *, int *, char *); - extern int ext3_statfs (struct super_block *, struct kstatfs *); - -Index: linux-2.6.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400 -@@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe - unsigned long journal_devnum); - static int ext3_create_journal(struct super_block *, struct ext3_super_block *, - int); --static void ext3_commit_super (struct super_block * sb, -+void ext3_commit_super (struct super_block * sb, - struct ext3_super_block * es, - int sync); - static void ext3_mark_recovery_complete(struct super_block * sb, -@@ -1991,7 +1991,7 @@ static int ext3_create_journal(struct su - return 0; - } - --static void ext3_commit_super (struct super_block * sb, -+void ext3_commit_super (struct super_block * sb, - struct ext3_super_block * es, - int sync) - { -Index: linux-2.6.9-full/fs/ext3/namei.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400 -@@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t * - struct buffer_head * bh) - { - struct ext3_dir_entry_2 * de, * pde; -- int i; -+ int i, err; - - i = 0; - pde = NULL; -@@ -1609,7 +1609,9 @@ static int ext3_delete_entry (handle_t * - return -EIO; - if (de == de_del) { - BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ return err; - if (pde) - pde->rec_len = - cpu_to_le16(le16_to_cpu(pde->rec_len) + -Index: linux-2.6.9-full/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-06-01 14:58:48.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/xattr.c 2006-08-09 17:56:40.000000000 +0400 -@@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index) - { - struct xattr_handler *handler = NULL; - -- if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) -+ if (name_index > 0 && name_index < EXT3_XATTR_INDEX_MAX) - handler = ext3_xattr_handler_map[name_index]; - return handler; - } -Index: linux-2.6.9-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-06-02 23:37:38.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/inode.c 2006-08-22 12:34:28.000000000 +0400 -@@ -1513,9 +1513,14 @@ out_stop: - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); -- err = ext3_mark_inode_dirty(handle, inode); -- if (!ret) -- ret = err; -+ /* -+ * We're going to return a positive `ret' -+ * here due to non-zero-length I/O, so there's -+ * no way of reporting error returns from -+ * ext3_mark_inode_dirty() to userspace. So -+ * ignore it. -+ */ -+ ext3_mark_inode_dirty(handle, inode); - } - } - err = ext3_journal_stop(handle); -@@ -1807,8 +1812,18 @@ ext3_clear_blocks(handle_t *handle, stru - ext3_mark_inode_dirty(handle, inode); - ext3_journal_test_restart(handle, inode); - if (bh) { -+ int err; - BUFFER_TRACE(bh, "retaking write access"); -- ext3_journal_get_write_access(handle, bh); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ struct super_block *sb = inode->i_sb; -+ struct ext3_super_block *es = EXT3_SB(sb)->s_es; -+ printk (KERN_CRIT"EXT3-fs: can't continue truncate\n"); -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; -+ es->s_state |= cpu_to_le16(EXT3_ERROR_FS); -+ ext3_commit_super(sb, es, 1); -+ return; -+ } - } - } - diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch deleted file mode 100644 index 619b845..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch +++ /dev/null @@ -1,499 +0,0 @@ - fs/ext3/file.c | 4 - fs/ext3/inode.c | 116 ++++++++++++++++++++++ - fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 5 - include/linux/ext3_fs_sb.h | 10 + - 5 files changed, 365 insertions(+) - -Index: linux-2.4.20-hp_pnnl39/fs/ext3/super.c -=================================================================== ---- linux-2.4.20-hp_pnnl39.orig/fs/ext3/super.c 2004-11-02 22:16:42.000000000 +0300 -+++ linux-2.4.20-hp_pnnl39/fs/ext3/super.c 2004-11-02 23:47:18.513035128 +0300 -@@ -400,6 +400,221 @@ - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, -+ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * create a new inode locally and put it on a list for the truncate thread. -+ * We need large parts of the inode struct in order to complete the -+ * truncate and unlink, so we may as well just have a real inode to do it. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_delete; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) -+ goto out_delete; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_delete; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ sbi->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug("delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ goto out_delete; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&oei->i_orphan)); -+ -+ nei = EXT3_I(new_inode); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * in the same order as the on-disk orphan list (badness otherwise). -+ */ -+ nei->i_orphan = oei->i_orphan; -+ nei->i_orphan.next->prev = &nei->i_orphan; -+ nei->i_orphan.prev->next = &nei->i_orphan; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_delete: -+ ext3_delete_inode(old_inode); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -407,6 +622,9 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+#ifdef EXT3_DELETE_THREAD -+ J_ASSERT(sbi->s_delete_inodes == 0); -+#endif - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -455,7 +673,11 @@ - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -- delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else -+ delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -524,6 +746,13 @@ - clear_opt (*mount_options, XATTR_USER); - else - #endif -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1223,6 +1452,7 @@ - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1614,7 +1844,12 @@ - static int ext3_sync_fs(struct super_block *sb) - { - tid_t target; -- -+ -+ if (atomic_read(&sb->s_active) == 0) { -+ /* fs is being umounted: time to stop delete thread */ -+ ext3_stop_delete_thread(EXT3_SB(sb)); -+ } -+ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1678,6 +1913,9 @@ - if (!parse_options(data, &tmp, sbi, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -Index: linux-2.4.20-hp_pnnl39/fs/ext3/inode.c -=================================================================== ---- linux-2.4.20-hp_pnnl39.orig/fs/ext3/inode.c 2004-11-02 22:16:41.000000000 +0300 -+++ linux-2.4.20-hp_pnnl39/fs/ext3/inode.c 2004-11-02 22:16:42.000000000 +0300 -@@ -2500,6 +2500,118 @@ - return err; - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 0; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. -Index: linux-2.4.20-hp_pnnl39/fs/ext3/file.c -=================================================================== ---- linux-2.4.20-hp_pnnl39.orig/fs/ext3/file.c 2004-11-02 22:16:41.000000000 +0300 -+++ linux-2.4.20-hp_pnnl39/fs/ext3/file.c 2004-11-02 22:16:42.132490592 +0300 -@@ -125,7 +125,11 @@ - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - setxattr: ext3_setxattr, /* BKL held */ - getxattr: ext3_getxattr, /* BKL held */ -Index: linux-2.4.20-hp_pnnl39/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.20-hp_pnnl39.orig/include/linux/ext3_fs.h 2004-11-02 22:16:41.000000000 +0300 -+++ linux-2.4.20-hp_pnnl39/include/linux/ext3_fs.h 2004-11-02 22:16:42.000000000 +0300 -@@ -193,6 +193,7 @@ - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -320,6 +321,7 @@ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -694,6 +696,9 @@ - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, -Index: linux-2.4.20-hp_pnnl39/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.20-hp_pnnl39.orig/include/linux/ext3_fs_sb.h 2004-11-02 22:16:42.000000000 +0300 -+++ linux-2.4.20-hp_pnnl39/include/linux/ext3_fs_sb.h 2004-11-02 23:43:11.521583536 +0300 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 8 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +78,14 @@ - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch deleted file mode 100644 index 6d6720d..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch +++ /dev/null @@ -1,449 +0,0 @@ - fs/ext3/file.c | 4 - fs/ext3/inode.c | 116 ++++++++++++++++++++++ - fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 5 - include/linux/ext3_fs_sb.h | 10 + - 5 files changed, 365 insertions(+) - -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2004-01-12 19:20:07.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2004-01-13 17:25:49.000000000 +0300 -@@ -425,6 +425,127 @@ - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_devices); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_devices); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ J_ASSERT(EXT3_I(inode)->i_state & EXT3_STATE_DELETE); -+ J_ASSERT(inode->i_nlink == 1); -+ inode->i_nlink = 0; -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, -+ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -432,6 +647,9 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+#ifdef EXT3_DELETE_THREAD -+ J_ASSERT(sbi->s_delete_inodes == 0); -+#endif - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -579,6 +799,13 @@ - *mount_flags &= ~MS_POSIXACL; - else - #endif -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1283,6 +1510,7 @@ - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1676,7 +1904,12 @@ - static int ext3_sync_fs(struct super_block *sb) - { - tid_t target; -- -+ -+ if (atomic_read(&sb->s_active) == 0) { -+ /* fs is being umounted: time to stop delete thread */ -+ ext3_stop_delete_thread(EXT3_SB(sb)); -+ } -+ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1746,6 +1979,9 @@ - if (!parse_options(data, &tmp, sbi, &mount_flags, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2004-01-12 19:20:06.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2004-01-12 19:20:07.000000000 +0300 -@@ -2179,6 +2179,118 @@ - return; /* AKPM: return what? */ - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 1; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_devices)); -+ list_add_tail(&new_inode->i_devices, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. -Index: linux-2.4.21-chaos/fs/ext3/file.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/file.c 2004-01-12 19:20:06.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/file.c 2004-01-12 19:20:07.000000000 +0300 -@@ -132,7 +132,11 @@ - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - setxattr: ext3_setxattr, /* BKL held */ - getxattr: ext3_getxattr, /* BKL held */ -Index: linux-2.4.21-chaos/fs/ext3/namei.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/namei.c 2004-01-12 20:36:31.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/namei.c 2004-01-12 20:36:32.000000000 +0300 -@@ -1936,6 +1936,40 @@ - return retval; - } - -+#ifdef EXT3_DELETE_THREAD -+static int ext3_try_to_delay_deletion(struct inode *inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long blocks; -+ -+ if (!test_opt(inode->i_sb, ASYNCDEL)) -+ return 0; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ blocks = inode->i_blocks >> (inode->i_blkbits - 9); -+ if (IS_SYNC(inode) || blocks <= EXT3_NDIR_BLOCKS) -+ return 0; -+ -+ inode->i_nlink = 1; -+ atomic_inc(&inode->i_count); -+ ei->i_state |= EXT3_STATE_DELETE; -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&inode->i_devices)); -+ list_add_tail(&inode->i_devices, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ -+ return 0; -+} -+#else -+#define ext3_try_to_delay_deletion(inode) do {} while (0) -+#endif -+ - static int ext3_unlink(struct inode * dir, struct dentry *dentry) - { - int retval; -@@ -1977,8 +2007,10 @@ - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; -- if (!inode->i_nlink) -+ if (!inode->i_nlink) { -+ ext3_try_to_delay_deletion(inode); - ext3_orphan_add(handle, inode); -+ } - inode->i_ctime = dir->i_ctime; - ext3_mark_inode_dirty(handle, inode); - retval = 0; -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2004-01-12 19:20:06.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2004-01-12 19:20:07.000000000 +0300 -@@ -195,6 +195,7 @@ - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -323,6 +324,7 @@ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -697,6 +699,9 @@ - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - extern void ext3_set_inode_flags(struct inode *); - - /* ioctl.c */ -Index: linux-2.4.21-chaos/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs_sb.h 2004-01-12 19:20:07.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs_sb.h 2004-01-12 20:53:51.000000000 +0300 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 32 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +78,14 @@ - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-suse-171.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-suse-171.patch deleted file mode 100644 index 7eb6442..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-suse-171.patch +++ /dev/null @@ -1,496 +0,0 @@ - fs/ext3/file.c | 4 - fs/ext3/inode.c | 116 ++++++++++++++++++++++ - fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 5 - include/linux/ext3_fs_sb.h | 10 + - 5 files changed, 365 insertions(+) - -Index: linux-2.4.21-241/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/super.c 2004-10-04 02:48:16.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/super.c 2004-10-04 02:48:18.000000000 -0400 -@@ -401,6 +401,221 @@ - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, -+ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * create a new inode locally and put it on a list for the truncate thread. -+ * We need large parts of the inode struct in order to complete the -+ * truncate and unlink, so we may as well just have a real inode to do it. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_delete; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) -+ goto out_delete; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_delete; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ sbi->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug("delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ goto out_delete; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&oei->i_orphan)); -+ -+ nei = EXT3_I(new_inode); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * in the same order as the on-disk orphan list (badness otherwise). -+ */ -+ nei->i_orphan = oei->i_orphan; -+ nei->i_orphan.next->prev = &nei->i_orphan; -+ nei->i_orphan.prev->next = &nei->i_orphan; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_delete: -+ ext3_delete_inode(old_inode); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -408,6 +623,7 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ J_ASSERT(sbi->s_delete_inodes == 0); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -479,7 +695,11 @@ - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else - delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -557,6 +777,13 @@ - *mount_flags &= ~MS_POSIXACL; - else - #endif -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1261,6 +1488,7 @@ - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1652,7 +1880,12 @@ - static int ext3_sync_fs(struct super_block *sb) - { - tid_t target; -- -+ -+ if (atomic_read(&sb->s_active) == 0) { -+ /* fs is being umounted: time to stop delete thread */ -+ ext3_stop_delete_thread(EXT3_SB(sb)); -+ } -+ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1716,6 +1949,9 @@ - if (!parse_options(data, &tmp, sbi, &mount_flags, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -Index: linux-2.4.21-241/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/inode.c 2004-10-04 02:48:17.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/inode.c 2004-10-04 02:48:18.000000000 -0400 -@@ -2694,6 +2694,118 @@ - return err; - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 0; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. -Index: linux-2.4.21-241/fs/ext3/file.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/file.c 2004-10-04 02:48:13.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/file.c 2004-10-04 02:48:18.000000000 -0400 -@@ -132,7 +132,11 @@ - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - setxattr: ext3_setxattr, /* BKL held */ - getxattr: ext3_getxattr, /* BKL held */ -Index: linux-2.4.21-241/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-241.orig/include/linux/ext3_fs.h 2004-10-04 02:48:17.000000000 -0400 -+++ linux-2.4.21-241/include/linux/ext3_fs.h 2004-10-04 02:48:18.000000000 -0400 -@@ -193,6 +193,7 @@ - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -321,6 +322,7 @@ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -699,6 +701,9 @@ - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); - extern void ext3_set_inode_flags(struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, -Index: linux-2.4.21-241/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.21-241.orig/include/linux/ext3_fs_sb.h 2004-10-04 02:48:16.000000000 -0400 -+++ linux-2.4.21-241/include/linux/ext3_fs_sb.h 2004-10-04 02:48:18.000000000 -0400 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 8 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +78,14 @@ - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch deleted file mode 100644 index 61bad1b..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch +++ /dev/null @@ -1,449 +0,0 @@ - fs/ext3/file.c | 4 - fs/ext3/inode.c | 116 ++++++++++++++++++++++ - fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 5 - include/linux/ext3_fs_sb.h | 10 + - 5 files changed, 365 insertions(+) - -Index: linux-2.4.24/fs/ext3/super.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/super.c 2004-01-12 20:36:31.000000000 +0300 -+++ linux-2.4.24/fs/ext3/super.c 2004-01-13 16:27:43.000000000 +0300 -@@ -400,6 +400,127 @@ - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_devices); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_devices); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ J_ASSERT(EXT3_I(inode)->i_state & EXT3_STATE_DELETE); -+ J_ASSERT(inode->i_nlink == 1); -+ inode->i_nlink = 0; -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, -+ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -407,6 +529,9 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+#ifdef EXT3_DELETE_THREAD -+ J_ASSERT(sbi->s_delete_inodes == 0); -+#endif - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -527,6 +650,13 @@ - clear_opt (*mount_options, XATTR_USER); - else - #endif -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1227,6 +1357,7 @@ - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1618,7 +1749,12 @@ - static int ext3_sync_fs(struct super_block *sb) - { - tid_t target; -- -+ -+ if (atomic_read(&sb->s_active) == 0) { -+ /* fs is being umounted: time to stop delete thread */ -+ ext3_stop_delete_thread(EXT3_SB(sb)); -+ } -+ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1682,6 +1818,9 @@ - if (!parse_options(data, &tmp, sbi, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -Index: linux-2.4.24/fs/ext3/inode.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/inode.c 2004-01-12 20:36:31.000000000 +0300 -+++ linux-2.4.24/fs/ext3/inode.c 2004-01-12 20:36:32.000000000 +0300 -@@ -2551,6 +2551,118 @@ - return err; - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 1; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_devices)); -+ list_add_tail(&new_inode->i_devices, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. -Index: linux-2.4.24/fs/ext3/file.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/file.c 2004-01-12 20:36:29.000000000 +0300 -+++ linux-2.4.24/fs/ext3/file.c 2004-01-12 20:36:32.000000000 +0300 -@@ -126,7 +126,11 @@ - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - setxattr: ext3_setxattr, /* BKL held */ - getxattr: ext3_getxattr, /* BKL held */ -Index: linux-2.4.24/fs/ext3/namei.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/namei.c 2004-01-12 20:36:31.000000000 +0300 -+++ linux-2.4.24/fs/ext3/namei.c 2004-01-12 20:36:32.000000000 +0300 -@@ -1936,6 +1936,40 @@ - return retval; - } - -+#ifdef EXT3_DELETE_THREAD -+static int ext3_try_to_delay_deletion(struct inode *inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long blocks; -+ -+ if (!test_opt(inode->i_sb, ASYNCDEL)) -+ return 0; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ blocks = inode->i_blocks >> (inode->i_blkbits - 9); -+ if (IS_SYNC(inode) || blocks <= EXT3_NDIR_BLOCKS) -+ return 0; -+ -+ inode->i_nlink = 1; -+ atomic_inc(&inode->i_count); -+ ei->i_state |= EXT3_STATE_DELETE; -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&inode->i_devices)); -+ list_add_tail(&inode->i_devices, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ -+ return 0; -+} -+#else -+#define ext3_try_to_delay_deletion(inode) do {} while (0) -+#endif -+ - static int ext3_unlink(struct inode * dir, struct dentry *dentry) - { - int retval; -@@ -1977,8 +2007,10 @@ - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; -- if (!inode->i_nlink) -+ if (!inode->i_nlink) { -+ ext3_try_to_delay_deletion(inode); - ext3_orphan_add(handle, inode); -+ } - inode->i_ctime = dir->i_ctime; - ext3_mark_inode_dirty(handle, inode); - retval = 0; -Index: linux-2.4.24/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-01-12 20:36:31.000000000 +0300 -+++ linux-2.4.24/include/linux/ext3_fs.h 2004-01-12 20:36:32.000000000 +0300 -@@ -193,6 +193,7 @@ - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -320,6 +321,7 @@ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -697,6 +699,9 @@ - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - extern void ext3_set_inode_flags(struct inode *); - - /* ioctl.c */ -Index: linux-2.4.24/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.24.orig/include/linux/ext3_fs_sb.h 2004-01-12 20:36:31.000000000 +0300 -+++ linux-2.4.24/include/linux/ext3_fs_sb.h 2004-01-12 20:36:32.000000000 +0300 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 8 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +78,14 @@ - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.29.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.29.patch deleted file mode 100644 index 39c47a7..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.29.patch +++ /dev/null @@ -1,442 +0,0 @@ -Index: linux-2.4.29/fs/ext3/super.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/super.c 2005-05-03 15:53:33.047533872 +0300 -+++ linux-2.4.29/fs/ext3/super.c 2005-05-03 15:54:47.192262160 +0300 -@@ -400,6 +400,127 @@ - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_devices); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_devices); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ J_ASSERT(EXT3_I(inode)->i_state & EXT3_STATE_DELETE); -+ J_ASSERT(inode->i_nlink == 1); -+ inode->i_nlink = 0; -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, -+ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -407,6 +528,9 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+#ifdef EXT3_DELETE_THREAD -+ J_ASSERT(sbi->s_delete_inodes == 0); -+#endif - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -526,6 +650,13 @@ - clear_opt (*mount_options, XATTR_USER); - else - #endif -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1244,6 +1375,7 @@ - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; - ext3_orphan_cleanup(sb, es); - EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; -@@ -1626,7 +1758,12 @@ - static int ext3_sync_fs(struct super_block *sb) - { - tid_t target; -- -+ -+ if (atomic_read(&sb->s_active) == 0) { -+ /* fs is being umounted: time to stop delete thread */ -+ ext3_stop_delete_thread(EXT3_SB(sb)); -+ } -+ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1690,6 +1827,9 @@ - if (!parse_options(data, &tmp, sbi, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -Index: linux-2.4.29/fs/ext3/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/inode.c 2005-05-03 15:53:36.555000656 +0300 -+++ linux-2.4.29/fs/ext3/inode.c 2005-05-03 15:53:56.901907456 +0300 -@@ -2562,6 +2562,118 @@ - return err; - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 1; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_devices)); -+ list_add_tail(&new_inode->i_devices, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. -Index: linux-2.4.29/fs/ext3/file.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/file.c 2005-04-07 19:31:00.000000000 +0300 -+++ linux-2.4.29/fs/ext3/file.c 2005-05-03 15:53:56.902907304 +0300 -@@ -123,7 +123,11 @@ - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - setxattr: ext3_setxattr, /* BKL held */ - getxattr: ext3_getxattr, /* BKL held */ -Index: linux-2.4.29/fs/ext3/namei.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/namei.c 2005-05-03 15:53:33.044534328 +0300 -+++ linux-2.4.29/fs/ext3/namei.c 2005-05-03 15:53:56.905906848 +0300 -@@ -838,6 +838,40 @@ - return retval; - } - -+#ifdef EXT3_DELETE_THREAD -+static int ext3_try_to_delay_deletion(struct inode *inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long blocks; -+ -+ if (!test_opt(inode->i_sb, ASYNCDEL)) -+ return 0; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ blocks = inode->i_blocks >> (inode->i_blkbits - 9); -+ if (IS_SYNC(inode) || blocks <= EXT3_NDIR_BLOCKS) -+ return 0; -+ -+ inode->i_nlink = 1; -+ atomic_inc(&inode->i_count); -+ ei->i_state |= EXT3_STATE_DELETE; -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&inode->i_devices)); -+ list_add_tail(&inode->i_devices, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ -+ return 0; -+} -+#else -+#define ext3_try_to_delay_deletion(inode) do {} while (0) -+#endif -+ - static int ext3_unlink(struct inode * dir, struct dentry *dentry) - { - int retval; -@@ -878,8 +912,10 @@ - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; -- if (!inode->i_nlink) -+ if (!inode->i_nlink) { -+ ext3_try_to_delay_deletion(inode); - ext3_orphan_add(handle, inode); -+ } - inode->i_ctime = dir->i_ctime; - ext3_mark_inode_dirty(handle, inode); - retval = 0; -Index: linux-2.4.29/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs.h 2005-05-03 15:53:37.124914016 +0300 -+++ linux-2.4.29/include/linux/ext3_fs.h 2005-05-03 15:53:56.907906544 +0300 -@@ -188,6 +188,7 @@ - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -315,6 +316,7 @@ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -639,6 +641,9 @@ - extern void ext3_dirty_inode(struct inode *); - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - extern void ext3_set_inode_flags(struct inode *); - - /* ioctl.c */ -Index: linux-2.4.29/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs_sb.h 2005-05-03 15:53:33.048533720 +0300 -+++ linux-2.4.29/include/linux/ext3_fs_sb.h 2005-05-03 15:53:56.909906240 +0300 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 8 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -74,6 +76,14 @@ - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-disable-write-bar-by-default-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-disable-write-bar-by-default-2.6-sles10.patch deleted file mode 100644 index 9b8d331..0000000 --- a/lustre/kernel_patches/patches/ext3-disable-write-bar-by-default-2.6-sles10.patch +++ /dev/null @@ -1,15 +0,0 @@ ---- - fs/ext3/super.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- linux-2.6.16.21-0.8.orig/fs/ext3/super.c -+++ linux-2.6.16.21-0.8/fs/ext3/super.c -@@ -1425,7 +1425,7 @@ static int ext3_fill_super (struct super - sbi->s_resgid = le16_to_cpu(es->s_def_resgid); - - /* enable barriers by default */ -- set_opt(sbi->s_mount_opt, BARRIER); -+ /* set_opt(sbi->s_mount_opt, BARRIER); */ - set_opt(sbi->s_mount_opt, RESERVATION); - - if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.20.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.20.patch deleted file mode 100644 index 0e93ced..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.20.patch +++ /dev/null @@ -1,747 +0,0 @@ - fs/ext3/ialloc.c | 6 - fs/ext3/inode.c | 12 - fs/ext3/super.c | 6 - fs/ext3/xattr.c | 597 +++++++++++++++++++++++++++++++++++++++++++++- - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_i.h | 3 - 6 files changed, 615 insertions(+), 11 deletions(-) - ---- linux-2.4.20/fs/ext3/ialloc.c~ext3-ea-in-inode-2.4.20 2003-10-08 23:18:08.000000000 +0400 -+++ linux-2.4.20-alexey/fs/ext3/ialloc.c 2003-10-12 16:25:21.000000000 +0400 -@@ -577,6 +577,10 @@ repeat: - inode->i_generation = sb->u.ext3_sb.s_next_generation++; - - inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ inode->u.ext3_i.i_extra_isize = -+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ - err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; - BUFFER_TRACE(iloc->bh, "get_write_access"); ---- linux-2.4.20/fs/ext3/inode.c~ext3-ea-in-inode-2.4.20 2003-10-08 23:18:08.000000000 +0400 -+++ linux-2.4.20-alexey/fs/ext3/inode.c 2003-10-12 16:25:21.000000000 +0400 -@@ -2209,6 +2209,12 @@ void ext3_read_inode(struct inode * inod - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ inode->u.ext3_i.i_extra_isize = -+ le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ inode->u.ext3_i.i_extra_isize = 0; -+ - brelse (iloc.bh); - - if (S_ISREG(inode->i_mode)) { -@@ -2274,6 +2280,8 @@ static int ext3_do_update_inode(handle_t - if (err) - goto out_brelse; - } -+ if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) -+ memset(raw_inode, 0, EXT3_INODE_SIZE(inode->i_sb)); - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -@@ -2357,6 +2365,10 @@ static int ext3_do_update_inode(handle_t - else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = -+ cpu_to_le16(EXT3_I(inode)->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) ---- linux-2.4.20/fs/ext3/xattr.c~ext3-ea-in-inode-2.4.20 2003-10-08 23:18:06.000000000 +0400 -+++ linux-2.4.20-alexey/fs/ext3/xattr.c 2003-10-12 16:26:31.000000000 +0400 -@@ -100,6 +100,9 @@ - static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, - struct ext3_xattr_header *); - -+int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); -+ - #ifdef CONFIG_EXT3_FS_XATTR_SHARING - - static int ext3_xattr_cache_insert(struct buffer_head *); -@@ -348,17 +351,12 @@ ext3_removexattr(struct dentry *dentry, - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -447,6 +445,94 @@ cleanup: - } - - /* -+ * ext3_xattr_ibody_get() -+ * -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+int ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ return err; -+} -+ -+/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer -@@ -457,7 +543,7 @@ cleanup: - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -530,6 +616,131 @@ cleanup: - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -553,6 +764,279 @@ static void ext3_xattr_update_super_bloc - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return -EIO; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return err; -+ raw_inode = iloc.raw_inode; -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set() - * - * Create, replace or remove an extended attribute for this inode. Buffer -@@ -566,6 +1050,101 @@ static void ext3_xattr_update_super_bloc - */ - int - ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, int flags) - { - struct super_block *sb = inode->i_sb; ---- linux-2.4.20/include/linux/ext3_fs.h~ext3-ea-in-inode-2.4.20 2003-10-08 23:18:08.000000000 +0400 -+++ linux-2.4.20-alexey/include/linux/ext3_fs.h 2003-10-12 16:35:46.000000000 +0400 -@@ -264,6 +264,8 @@ struct ext3_inode { - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl ---- linux-2.4.20/include/linux/ext3_fs_i.h~ext3-ea-in-inode-2.4.20 2001-11-22 22:46:19.000000000 +0300 -+++ linux-2.4.20-alexey/include/linux/ext3_fs_i.h 2003-10-12 16:34:14.000000000 +0400 -@@ -62,6 +62,9 @@ struct ext3_inode_info { - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's ---- linux-2.4.20/fs/ext3/super.c~ext3-ea-in-inode-2.4.20 2003-10-08 23:18:09.000000000 +0400 -+++ linux-2.4.20-alexey/fs/ext3/super.c 2003-10-12 16:25:21.000000000 +0400 -@@ -1292,8 +1292,10 @@ struct super_block * ext3_read_super (st - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); -- if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) { -- printk (KERN_ERR -+ if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || -+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) || -+ (sbi->s_inode_size > blocksize)) { -+ printk (KERN_ERR - "EXT3-fs: unsupported inode size: %d\n", - sbi->s_inode_size); - goto failed_mount; - -_ diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch deleted file mode 100644 index 491a2df..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch +++ /dev/null @@ -1,758 +0,0 @@ - fs/ext3/ialloc.c | 6 - fs/ext3/inode.c | 12 - fs/ext3/super.c | 6 - fs/ext3/xattr.c | 597 +++++++++++++++++++++++++++++++++++++++++++++- - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_i.h | 3 - 6 files changed, 615 insertions(+), 11 deletions(-) - -Index: linux-2.4.21-chaos/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c 2003-12-12 17:39:10.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/ialloc.c 2003-12-12 17:39:55.000000000 +0300 -@@ -580,6 +580,10 @@ - inode->i_generation = sbi->s_next_generation++; - - ei->i_state = EXT3_STATE_NEW; -+ ei->i_extra_isize = -+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ - err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; - BUFFER_TRACE(iloc->bh, "get_write_access"); -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2003-12-12 17:39:11.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2003-12-12 17:39:55.000000000 +0300 -@@ -2502,6 +2502,12 @@ - ei->i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ EXT3_I(inode)->i_extra_isize = -+ le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ EXT3_I(inode)->i_extra_isize = 0; -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2564,6 +2570,8 @@ - if (err) - goto out_brelse; - } -+ if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) -+ memset(raw_inode, 0, EXT3_INODE_SIZE(inode->i_sb)); - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -@@ -2646,6 +2654,10 @@ - else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = -+ cpu_to_le16(EXT3_I(inode)->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) -Index: linux-2.4.21-chaos/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr.c 2003-12-12 17:38:44.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/xattr.c 2003-12-12 17:42:58.000000000 +0300 -@@ -88,6 +88,9 @@ - struct buffer_head *, - struct ext3_xattr_header *); - -+int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); -+ - #ifdef CONFIG_EXT3_FS_XATTR_SHARING - - static int ext3_xattr_cache_insert(struct buffer_head *); -@@ -256,17 +259,12 @@ - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -359,6 +357,94 @@ - } - - /* -+ * ext3_xattr_ibody_get() -+ * -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+int ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ return err; -+} -+ -+/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer -@@ -369,7 +455,7 @@ - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -446,6 +532,131 @@ - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -480,6 +691,102 @@ - */ - int - ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - int flags) - { -@@ -868,6 +1174,279 @@ - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return -EIO; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return err; -+ raw_inode = iloc.raw_inode; -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set_trans() - * - * Like ext3_xattr_set_handle, but start from an inode. This extended -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2003-12-12 17:39:11.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2003-12-12 17:39:55.000000000 +0300 -@@ -1354,8 +1354,10 @@ - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); -- if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) { -- printk (KERN_ERR -+ if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || -+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) || -+ (sbi->s_inode_size > blocksize)) { -+ printk (KERN_ERR - "EXT3-fs: unsupported inode size: %d\n", - sbi->s_inode_size); - goto failed_mount; -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2003-12-12 17:39:10.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2003-12-12 17:39:55.000000000 +0300 -@@ -268,6 +268,8 @@ - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl -Index: linux-2.4.21-chaos/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs_i.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs_i.h 2003-12-12 17:39:55.000000000 +0300 -@@ -76,6 +76,9 @@ - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-sles.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-sles.patch deleted file mode 100644 index 5cc683f..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-sles.patch +++ /dev/null @@ -1,758 +0,0 @@ - fs/ext3/ialloc.c | 6 - fs/ext3/inode.c | 12 - fs/ext3/super.c | 6 - fs/ext3/xattr.c | 597 +++++++++++++++++++++++++++++++++++++++++++++- - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_i.h | 3 - 6 files changed, 615 insertions(+), 11 deletions(-) - -Index: linux-2.4.21-273/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-273.orig/fs/ext3/ialloc.c 2005-04-05 20:00:00.732329548 -0400 -+++ linux-2.4.21-273/fs/ext3/ialloc.c 2005-04-05 20:01:09.836317838 -0400 -@@ -576,6 +576,12 @@ - insert_inode_hash(inode); - inode->i_generation = sb->u.ext3_sb.s_next_generation++; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { -+ EXT3_I(inode)->i_extra_isize = sizeof(__u16) /* i_extra_isize */ -+ + sizeof(__u16); /* i_pad1 */ -+ } else -+ EXT3_I(inode)->i_extra_isize = 0; -+ - inode->u.ext3_i.i_state = EXT3_STATE_NEW; - err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; -Index: linux-2.4.21-273/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-273.orig/fs/ext3/inode.c 2005-04-05 20:00:02.455079957 -0400 -+++ linux-2.4.21-273/fs/ext3/inode.c 2005-04-05 20:00:11.012840119 -0400 -@@ -2341,6 +2341,12 @@ - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ EXT3_I(inode)->i_extra_isize = -+ le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ EXT3_I(inode)->i_extra_isize = 0; -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2401,6 +2407,8 @@ - if (err) - goto out_brelse; - } -+ if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) -+ memset(raw_inode, 0, EXT3_INODE_SIZE(inode->i_sb)); - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -@@ -2484,6 +2492,10 @@ - else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = -+ cpu_to_le16(EXT3_I(inode)->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) -Index: linux-2.4.21-273/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.21-273.orig/fs/ext3/xattr.c 2005-04-05 19:59:59.134561031 -0400 -+++ linux-2.4.21-273/fs/ext3/xattr.c 2005-04-05 20:00:11.014839829 -0400 -@@ -88,6 +88,9 @@ - struct buffer_head *, - struct ext3_xattr_header *); - -+int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); -+ - #ifdef CONFIG_EXT3_FS_XATTR_SHARING - - static int ext3_xattr_cache_insert(struct buffer_head *); -@@ -256,17 +259,12 @@ - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -359,6 +357,94 @@ - } - - /* -+ * ext3_xattr_ibody_get() -+ * -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+int ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ return err; -+} -+ -+/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer -@@ -369,7 +455,7 @@ - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -446,6 +532,131 @@ - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -480,6 +691,102 @@ - */ - int - ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - int flags) - { -@@ -870,6 +1177,279 @@ - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return -EIO; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return err; -+ raw_inode = iloc.raw_inode; -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set_trans() - * - * Like ext3_xattr_set_handle, but start from an inode. This extended -Index: linux-2.4.21-273/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-273.orig/fs/ext3/super.c 2005-04-05 20:00:04.146834858 -0400 -+++ linux-2.4.21-273/fs/ext3/super.c 2005-04-05 20:00:11.016839539 -0400 -@@ -1334,7 +1334,9 @@ - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); -- if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) { -+ if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || -+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) || -+ (sbi->s_inode_size > blocksize)) { - printk (KERN_ERR - "EXT3-fs: unsupported inode size: %d\n", - sbi->s_inode_size); -Index: linux-2.4.21-273/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-273.orig/include/linux/ext3_fs.h 2005-04-05 20:00:02.461079088 -0400 -+++ linux-2.4.21-273/include/linux/ext3_fs.h 2005-04-05 20:00:11.017839394 -0400 -@@ -264,6 +264,8 @@ - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl -Index: linux-2.4.21-273/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.21-273.orig/include/linux/ext3_fs_i.h 2005-01-17 07:09:09.000000000 -0500 -+++ linux-2.4.21-273/include/linux/ext3_fs_i.h 2005-04-05 20:00:11.017839394 -0400 -@@ -76,6 +76,9 @@ - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-suse2.patch deleted file mode 100644 index 90ff24a..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-suse2.patch +++ /dev/null @@ -1,758 +0,0 @@ - fs/ext3/ialloc.c | 6 - fs/ext3/inode.c | 12 - fs/ext3/super.c | 6 - fs/ext3/xattr.c | 597 +++++++++++++++++++++++++++++++++++++++++++++- - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_i.h | 3 - 6 files changed, 615 insertions(+), 11 deletions(-) - -Index: linux-2.4.21-chaos/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c 2003-12-12 17:39:10.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/ialloc.c 2003-12-12 17:39:55.000000000 +0300 -@@ -580,6 +580,10 @@ - inode->i_generation = sbi->s_next_generation++; - - inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ inode->u.ext3_i.i_extra_isize = -+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ - err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; - BUFFER_TRACE(iloc->bh, "get_write_access"); -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2003-12-12 17:39:11.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2003-12-12 17:39:55.000000000 +0300 -@@ -2502,6 +2502,12 @@ - ei->i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ EXT3_I(inode)->i_extra_isize = -+ le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ EXT3_I(inode)->i_extra_isize = 0; -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2564,6 +2570,8 @@ - if (err) - goto out_brelse; - } -+ if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) -+ memset(raw_inode, 0, EXT3_INODE_SIZE(inode->i_sb)); - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -@@ -2646,6 +2654,10 @@ - else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = -+ cpu_to_le16(EXT3_I(inode)->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) -Index: linux-2.4.21-chaos/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr.c 2003-12-12 17:38:44.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/xattr.c 2003-12-12 17:42:58.000000000 +0300 -@@ -88,6 +88,9 @@ - struct buffer_head *, - struct ext3_xattr_header *); - -+int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); -+ - #ifdef CONFIG_EXT3_FS_XATTR_SHARING - - static int ext3_xattr_cache_insert(struct buffer_head *); -@@ -256,17 +259,12 @@ - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -359,6 +357,94 @@ - } - - /* -+ * ext3_xattr_ibody_get() -+ * -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+int ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ return err; -+} -+ -+/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer -@@ -369,7 +455,7 @@ - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -446,6 +532,131 @@ - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -480,6 +691,102 @@ - */ - int - ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - int flags) - { -@@ -868,6 +1174,279 @@ - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return -EIO; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return err; -+ raw_inode = iloc.raw_inode; -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set_trans() - * - * Like ext3_xattr_set_handle, but start from an inode. This extended -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2003-12-12 17:39:11.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2003-12-12 17:39:55.000000000 +0300 -@@ -1354,8 +1354,10 @@ - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); -- if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) { -- printk (KERN_ERR -+ if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || -+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) || -+ (sbi->s_inode_size > blocksize)) { -+ printk (KERN_ERR - "EXT3-fs: unsupported inode size: %d\n", - sbi->s_inode_size); - goto failed_mount; -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2003-12-12 17:39:10.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2003-12-12 17:39:55.000000000 +0300 -@@ -268,6 +268,8 @@ - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl -Index: linux-2.4.21-chaos/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs_i.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs_i.h 2003-12-12 17:39:55.000000000 +0300 -@@ -76,6 +76,9 @@ - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.22-rh.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.22-rh.patch deleted file mode 100644 index 6b22a71..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.22-rh.patch +++ /dev/null @@ -1,755 +0,0 @@ - fs/ext3/ialloc.c | 6 - fs/ext3/inode.c | 12 - fs/ext3/super.c | 6 - fs/ext3/xattr.c | 597 +++++++++++++++++++++++++++++++++++++++++++++- - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_i.h | 3 - 6 files changed, 615 insertions(+), 11 deletions(-) - ---- linux-2.4.22-ac1/fs/ext3/ialloc.c~ext3-ea-in-inode-2.4.22-rh 2003-10-08 13:57:56.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/ialloc.c 2003-10-08 15:13:31.000000000 +0400 -@@ -715,6 +715,10 @@ have_bit_and_group: - inode->i_generation = sb->u.ext3_sb.s_next_generation++; - - inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ inode->u.ext3_i.i_extra_isize = -+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ - err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; - BUFFER_TRACE(iloc->bh, "get_write_access"); ---- linux-2.4.22-ac1/fs/ext3/inode.c~ext3-ea-in-inode-2.4.22-rh 2003-10-08 13:57:57.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/inode.c 2003-10-08 15:14:57.000000000 +0400 -@@ -2229,6 +2229,12 @@ void ext3_read_inode(struct inode * inod - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ inode->u.ext3_i.i_extra_isize = -+ le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ inode->u.ext3_i.i_extra_isize = 0; -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2277,6 +2283,8 @@ static int ext3_do_update_inode(handle_t - if (err) - goto out_brelse; - } -+ if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) -+ memset(raw_inode, 0, EXT3_INODE_SIZE(inode->i_sb)); - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -@@ -2360,6 +2368,10 @@ static int ext3_do_update_inode(handle_t - else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = -+ cpu_to_le16(EXT3_I(inode)->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) ---- linux-2.4.22-ac1/fs/ext3/super.c~ext3-ea-in-inode-2.4.22-rh 2003-10-08 13:57:57.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/super.c 2003-10-08 15:13:31.000000000 +0400 -@@ -1299,8 +1299,10 @@ struct super_block * ext3_read_super (st - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); -- if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) { -- printk (KERN_ERR -+ if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || -+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) || -+ (sbi->s_inode_size > blocksize)) { -+ printk (KERN_ERR - "EXT3-fs: unsupported inode size: %d\n", - sbi->s_inode_size); - goto failed_mount; ---- linux-2.4.22-ac1/fs/ext3/xattr.c~ext3-ea-in-inode-2.4.22-rh 2003-10-08 13:57:56.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/xattr.c 2003-10-12 16:36:07.000000000 +0400 -@@ -100,6 +100,9 @@ - static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, - struct ext3_xattr_header *); - -+int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); -+ - #ifdef CONFIG_EXT3_FS_XATTR_SHARING - - static int ext3_xattr_cache_insert(struct buffer_head *); -@@ -348,17 +351,12 @@ ext3_removexattr(struct dentry *dentry, - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -447,6 +445,94 @@ cleanup: - } - - /* -+ * ext3_xattr_ibody_get() -+ * -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+int ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ return err; -+} -+ -+/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer -@@ -457,7 +543,7 @@ cleanup: - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -530,6 +616,131 @@ cleanup: - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -553,6 +764,279 @@ static void ext3_xattr_update_super_bloc - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return -EIO; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return err; -+ raw_inode = iloc.raw_inode; -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set() - * - * Create, replace or remove an extended attribute for this inode. Buffer -@@ -566,6 +1050,101 @@ static void ext3_xattr_update_super_bloc - */ - int - ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, int flags) - { - struct super_block *sb = inode->i_sb; -@@ -603,6 +1181,7 @@ ext3_xattr_set(handle_t *handle, struct - name_len = strlen(name); - if (name_len > 255 || value_len > sb->s_blocksize) - return -ERANGE; -+ - down(&ext3_xattr_sem); - - if (block) { ---- linux-2.4.22-ac1/include/linux/ext3_fs.h~ext3-ea-in-inode-2.4.22-rh 2003-10-08 13:57:57.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/include/linux/ext3_fs.h 2003-10-08 15:13:31.000000000 +0400 -@@ -265,6 +265,8 @@ struct ext3_inode { - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl ---- linux-2.4.22-ac1/include/linux/ext3_fs_i.h~ext3-ea-in-inode-2.4.22-rh 2003-09-26 00:54:44.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/include/linux/ext3_fs_i.h 2003-10-08 15:13:31.000000000 +0400 -@@ -62,6 +62,9 @@ struct ext3_inode_info { - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's - -_ diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.29.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.29.patch deleted file mode 100644 index 2376ffa..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.29.patch +++ /dev/null @@ -1,731 +0,0 @@ -Index: linux-2.4.29/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/ialloc.c 2005-05-03 15:56:43.831530296 +0300 -+++ linux-2.4.29/fs/ext3/ialloc.c 2005-05-03 16:07:32.990843080 +0300 -@@ -576,6 +576,10 @@ - inode->i_generation = sb->u.ext3_sb.s_next_generation++; - - inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ inode->u.ext3_i.i_extra_isize = -+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ - err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; - BUFFER_TRACE(iloc->bh, "get_write_access"); -Index: linux-2.4.29/fs/ext3/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/inode.c 2005-05-03 15:58:30.758274960 +0300 -+++ linux-2.4.29/fs/ext3/inode.c 2005-05-03 16:07:32.995842320 +0300 -@@ -2240,6 +2240,12 @@ - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ inode->u.ext3_i.i_extra_isize = -+ le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ inode->u.ext3_i.i_extra_isize = 0; -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2367,6 +2373,10 @@ - else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = -+ cpu_to_le16(EXT3_I(inode)->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) -Index: linux-2.4.29/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/xattr.c 2005-04-07 19:31:00.000000000 +0300 -+++ linux-2.4.29/fs/ext3/xattr.c 2005-05-03 16:07:33.007840496 +0300 -@@ -100,6 +100,9 @@ - static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, - struct ext3_xattr_header *); - -+int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); -+ - #ifdef CONFIG_EXT3_FS_XATTR_SHARING - - static int ext3_xattr_cache_insert(struct buffer_head *); -@@ -348,17 +351,12 @@ - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -447,6 +445,94 @@ - } - - /* -+ * ext3_xattr_ibody_get() -+ * -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+int ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ return err; -+} -+ -+/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer -@@ -457,7 +543,7 @@ - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -530,6 +616,131 @@ - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) -+ return ret; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -553,6 +764,279 @@ - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return -EIO; -+ raw_inode = iloc.raw_inode; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc); -+ if (err) -+ return err; -+ raw_inode = iloc.raw_inode; -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %d > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set() - * - * Create, replace or remove an extended attribute for this inode. Buffer -@@ -566,6 +1050,101 @@ - */ - int - ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, int flags) - { - struct super_block *sb = inode->i_sb; -@@ -603,6 +1182,7 @@ - name_len = strlen(name); - if (name_len > 255 || value_len > sb->s_blocksize) - return -ERANGE; -+ - down(&ext3_xattr_sem); - - if (block) { -Index: linux-2.4.29/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs.h 2005-05-03 15:58:30.767273592 +0300 -+++ linux-2.4.29/include/linux/ext3_fs.h 2005-05-03 16:07:33.009840192 +0300 -@@ -259,6 +259,8 @@ - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl -Index: linux-2.4.29/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs_i.h 2005-04-07 18:52:18.000000000 +0300 -+++ linux-2.4.29/include/linux/ext3_fs_i.h 2005-05-03 16:07:33.010840040 +0300 -@@ -62,6 +62,9 @@ - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch deleted file mode 100644 index 89cc1b5..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch +++ /dev/null @@ -1,840 +0,0 @@ -Index: linux-stage/fs/ext3/ialloc.c -=================================================================== ---- linux-stage.orig/fs/ext3/ialloc.c 2005-10-04 16:53:24.000000000 -0600 -+++ linux-stage/fs/ext3/ialloc.c 2005-10-04 17:07:25.000000000 -0600 -@@ -629,6 +629,9 @@ - spin_unlock(&sbi->s_next_gen_lock); - - ei->i_state = EXT3_STATE_NEW; -+ ei->i_extra_isize = -+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; - - ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2005-10-04 17:00:22.000000000 -0600 -+++ linux-stage/fs/ext3/inode.c 2005-10-04 17:07:25.000000000 -0600 -@@ -2274,7 +2274,7 @@ - * trying to determine the inode's location on-disk and no read need be - * performed. - */ --static int ext3_get_inode_loc(struct inode *inode, -+int ext3_get_inode_loc(struct inode *inode, - struct ext3_iloc *iloc, int in_mem) - { - unsigned long block; -@@ -2484,6 +2484,11 @@ void ext3_read_inode(struct inode * inod - ei->i_data[block] = raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ ei->i_extra_isize = 0; -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2619,6 +2624,9 @@ static int ext3_do_update_inode(handle_t - } else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) -@@ -2849,7 +2857,8 @@ ext3_reserve_inode_write(handle_t *handl - { - int err = 0; - if (handle) { -- err = ext3_get_inode_loc(inode, iloc, 1); -+ err = ext3_get_inode_loc(inode, iloc, EXT3_I(inode)->i_state & -+ EXT3_STATE_NEW); - if (!err) { - BUFFER_TRACE(iloc->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, iloc->bh); -Index: linux-stage/fs/ext3/xattr.c -=================================================================== ---- linux-stage.orig/fs/ext3/xattr.c 2005-10-04 16:50:11.000000000 -0600 -+++ linux-stage/fs/ext3/xattr.c 2005-10-04 17:19:43.000000000 -0600 -@@ -149,17 +149,12 @@ - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -173,7 +168,6 @@ - - if (name == NULL) - return -EINVAL; -- down_read(&EXT3_I(inode)->xattr_sem); - error = -ENODATA; - if (!EXT3_I(inode)->i_file_acl) - goto cleanup; -@@ -246,15 +240,87 @@ - - cleanup: - brelse(bh); -- up_read(&EXT3_I(inode)->xattr_sem); - - return error; - } - - /* -- * ext3_xattr_list() -+ * ext3_xattr_ibody_get() - * -- * Copy a list of attribute names into the buffer -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc, 0); -+ if (ret) -+ return ret; -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer - * provided, or compute the buffer size required. - * Buffer is NULL to compute the size of the buffer required. - * -@@ -262,7 +328,31 @@ - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ down_read(&EXT3_I(inode)->xattr_sem); -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ up_read(&EXT3_I(inode)->xattr_sem); -+ -+ return err; -+} -+ -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in EA block -+ */ -+int -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -273,7 +363,6 @@ - ea_idebug(inode, "buffer=%p, buffer_size=%ld", - buffer, (long)buffer_size); - -- down_read(&EXT3_I(inode)->xattr_sem); - error = 0; - if (!EXT3_I(inode)->i_file_acl) - goto cleanup; -@@ -330,11 +419,149 @@ - - cleanup: - brelse(bh); -- up_read(&EXT3_I(inode)->xattr_sem); - - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ size_t rest = buffer_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc, 0); -+ if (ret) -+ return ret; -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(inode, NULL, 0, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) { -+ size_t size = handler->list(inode, buffer, rest, -+ last->e_name, -+ last->e_name_len); -+ if (buffer) { -+ if (size > rest) { -+ ret = -ERANGE; -+ goto cleanup; -+ } -+ buffer += size; -+ } -+ rest -= size; -+ } -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ down_read(&EXT3_I(inode)->xattr_sem); -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ up_read(&EXT3_I(inode)->xattr_sem); -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -356,6 +583,279 @@ - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc, 0); -+ if (err) -+ return -EIO; -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc, 0); -+ if (err) -+ return err; -+ raw_inode = ext3_raw_inode(&iloc); -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %zd > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set_handle() - * - * Create, replace or remove an extended attribute for this inode. Buffer -@@ -369,6 +869,104 @@ - */ - int - ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ down_write(&EXT3_I(inode)->xattr_sem); -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ up_write(&EXT3_I(inode)->xattr_sem); -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - int flags) - { -@@ -391,22 +989,7 @@ - * towards the end of the block). - * end -- Points right after the block pointed to by header. - */ -- -- ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -- name_index, name, value, (long)value_len); -- -- if (IS_RDONLY(inode)) -- return -EROFS; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- return -EPERM; -- if (value == NULL) -- value_len = 0; -- if (name == NULL) -- return -EINVAL; - name_len = strlen(name); -- if (name_len > 255 || value_len > sb->s_blocksize) -- return -ERANGE; -- down_write(&EXT3_I(inode)->xattr_sem); - if (EXT3_I(inode)->i_file_acl) { - /* The inode already has an extended attribute block. */ - bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); -@@ -638,7 +1221,6 @@ - brelse(bh); - if (!(bh && header == HDR(bh))) - kfree(header); -- up_write(&EXT3_I(inode)->xattr_sem); - - return error; - } -Index: linux-stage/fs/ext3/xattr.h -=================================================================== ---- linux-stage.orig/fs/ext3/xattr.h 2005-10-04 16:50:11.000000000 -0600 -+++ linux-stage/fs/ext3/xattr.h 2005-10-04 17:07:25.000000000 -0600 -@@ -67,7 +67,8 @@ - extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); - extern int ext3_xattr_list(struct inode *, char *, size_t); - extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int); --extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); -+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int); -+extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int); - - extern void ext3_xattr_delete_inode(handle_t *, struct inode *); - extern void ext3_xattr_put_super(struct super_block *); -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2005-10-04 16:53:29.000000000 -0600 -+++ linux-stage/include/linux/ext3_fs.h 2005-10-04 17:07:25.000000000 -0600 -@@ -293,6 +293,8 @@ - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl -@@ -757,6 +759,7 @@ - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc, int in_mem); - - extern void ext3_read_inode (struct inode *); - extern int ext3_write_inode (struct inode *, int); -Index: linux-stage/include/linux/ext3_fs_i.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs_i.h 2005-10-04 16:50:11.000000000 -0600 -+++ linux-stage/include/linux/ext3_fs_i.h 2005-10-04 17:07:25.000000000 -0600 -@@ -113,6 +113,9 @@ - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch deleted file mode 100644 index 72c25a4..0000000 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch +++ /dev/null @@ -1,840 +0,0 @@ -%patch -Index: linux-2.6.0/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.0.orig/fs/ext3/ialloc.c 2004-01-14 18:54:11.000000000 +0300 -+++ linux-2.6.0/fs/ext3/ialloc.c 2004-01-14 18:54:12.000000000 +0300 -@@ -627,6 +627,9 @@ - inode->i_generation = EXT3_SB(sb)->s_next_generation++; - - ei->i_state = EXT3_STATE_NEW; -+ ei->i_extra_isize = -+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; - - ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { -Index: linux-2.6.0/fs/ext3/inode.c -=================================================================== ---- linux-2.6.0.orig/fs/ext3/inode.c 2004-01-14 18:54:12.000000000 +0300 -+++ linux-2.6.0/fs/ext3/inode.c 2004-01-14 19:09:46.000000000 +0300 -@@ -2339,7 +2339,7 @@ - * trying to determine the inode's location on-disk and no read need be - * performed. - */ --static int ext3_get_inode_loc(struct inode *inode, -+int ext3_get_inode_loc(struct inode *inode, - struct ext3_iloc *iloc, int in_mem) - { - unsigned long block; -@@ -2547,6 +2547,11 @@ - ei->i_data[block] = raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); -+ else -+ ei->i_extra_isize = 0; -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2682,6 +2687,9 @@ - } else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) -+ raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); -+ - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) -@@ -2849,7 +2857,8 @@ ext3_reserve_inode_write(handle_t *handl - { - int err = 0; - if (handle) { -- err = ext3_get_inode_loc(inode, iloc, 1); -+ err = ext3_get_inode_loc(inode, iloc, EXT3_I(inode)->i_state & -+ EXT3_STATE_NEW); - if (!err) { - BUFFER_TRACE(iloc->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, iloc->bh); -Index: linux-2.6.0/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.0.orig/fs/ext3/xattr.c 2003-12-30 08:33:13.000000000 +0300 -+++ linux-2.6.0/fs/ext3/xattr.c 2004-01-14 18:54:12.000000000 +0300 -@@ -246,17 +246,12 @@ - } - - /* -- * ext3_xattr_get() -- * -- * Copy an extended attribute into the buffer -- * provided, or compute the buffer size required. -- * Buffer is NULL to compute the size of the buffer required. -+ * ext3_xattr_block_get() - * -- * Returns a negative error number on failure, or the number of bytes -- * used / required on success. -+ * routine looks for attribute in EA block and returns it's value and size - */ - int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; -@@ -270,7 +265,6 @@ - - if (name == NULL) - return -EINVAL; -- down_read(&EXT3_I(inode)->xattr_sem); - error = -ENODATA; - if (!EXT3_I(inode)->i_file_acl) - goto cleanup; -@@ -343,15 +337,87 @@ - - cleanup: - brelse(bh); -- up_read(&EXT3_I(inode)->xattr_sem); - - return error; - } - - /* -- * ext3_xattr_list() -+ * ext3_xattr_ibody_get() - * -- * Copy a list of attribute names into the buffer -+ * routine looks for attribute in inode body and returns it's value and size -+ */ -+int -+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int size, name_len = strlen(name), storage_size; -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOENT; -+ -+ ret = ext3_get_inode_loc(inode, &iloc, 0); -+ if (ret) -+ return ret; -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_get", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ goto found; -+ last = next; -+ } -+ -+ /* can't find EA */ -+ brelse(iloc.bh); -+ return -ENOENT; -+ -+found: -+ size = le32_to_cpu(last->e_value_size); -+ if (buffer) { -+ ret = -ERANGE; -+ if (buffer_size >= size) { -+ memcpy(buffer, start + le16_to_cpu(last->e_value_offs), -+ size); -+ ret = size; -+ } -+ } else -+ ret = size; -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer - * provided, or compute the buffer size required. - * Buffer is NULL to compute the size of the buffer required. - * -@@ -359,7 +425,31 @@ - * used / required on success. - */ - int --ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ int err; -+ -+ down_read(&EXT3_I(inode)->xattr_sem); -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_get(inode, name_index, name, -+ buffer, buffer_size); -+ if (err < 0) -+ /* search was unsuccessful, try to find EA in dedicated block */ -+ err = ext3_xattr_block_get(inode, name_index, name, -+ buffer, buffer_size); -+ up_read(&EXT3_I(inode)->xattr_sem); -+ -+ return err; -+} -+ -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in EA block -+ */ -+int -+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) - { - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; -@@ -370,7 +460,6 @@ - ea_idebug(inode, "buffer=%p, buffer_size=%ld", - buffer, (long)buffer_size); - -- down_read(&EXT3_I(inode)->xattr_sem); - error = 0; - if (!EXT3_I(inode)->i_file_acl) - goto cleanup; -@@ -431,11 +520,138 @@ - - cleanup: - brelse(bh); -- up_read(&EXT3_I(inode)->xattr_sem); - - return error; - } - -+/* ext3_xattr_ibody_list() -+ * -+ * generate list of attributes stored in inode body -+ */ -+int -+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ char *start, *end, *buf; -+ struct ext3_iloc iloc; -+ int storage_size; -+ int ret; -+ int size = 0; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return 0; -+ -+ ret = ext3_get_inode_loc(inode, &iloc, 0); -+ if (ret) -+ return ret; -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return 0; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_list", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ -+ if (!buffer) { -+ ret = size; -+ goto cleanup; -+ } else { -+ ret = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ last = (struct ext3_xattr_entry *) start; -+ buf = buffer; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ struct ext3_xattr_handler *handler; -+ handler = ext3_xattr_handler(last->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, last->e_name, -+ last->e_name_len); -+ last = next; -+ } -+ ret = size; -+cleanup: -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ int error; -+ int size = buffer_size; -+ -+ down_read(&EXT3_I(inode)->xattr_sem); -+ -+ /* get list of attributes stored in inode body */ -+ error = ext3_xattr_ibody_list(inode, buffer, buffer_size); -+ if (error < 0) { -+ /* some error occured while collecting -+ * attributes in inode body */ -+ size = 0; -+ goto cleanup; -+ } -+ size = error; -+ -+ /* get list of attributes stored in dedicated block */ -+ if (buffer) { -+ buffer_size -= error; -+ if (buffer_size <= 0) { -+ buffer = NULL; -+ buffer_size = 0; -+ } else -+ buffer += error; -+ } -+ -+ error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) -+ /* listing was successful, so we return len */ -+ size = 0; -+ -+cleanup: -+ up_read(&EXT3_I(inode)->xattr_sem); -+ return error + size; -+} -+ - /* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. -@@ -457,6 +673,279 @@ - } - - /* -+ * ext3_xattr_ibody_find() -+ * -+ * search attribute and calculate free space in inode body -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, -+ const char *name, struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct ext3_xattr_entry *last; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ char *start, *end; -+ int ret = -ENOENT; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return ret; -+ -+ err = ext3_get_inode_loc(inode, &iloc, 0); -+ if (err) -+ return -EIO; -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ *free = storage_size - sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) { -+ brelse(iloc.bh); -+ return -ENOENT; -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_find", -+ "inode %ld", inode->i_ino); -+ brelse(iloc.bh); -+ return -EIO; -+ } -+ -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) { -+ memcpy(rentry, last, sizeof(struct ext3_xattr_entry)); -+ ret = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(last->e_name_len); -+ *free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ brelse(iloc.bh); -+ return ret; -+} -+ -+/* -+ * ext3_xattr_block_find() -+ * -+ * search attribute and calculate free space in EA block (if it allocated) -+ * NOTE: free space includes space our attribute hold -+ */ -+int -+ext3_xattr_block_find(struct inode *inode, int name_index, const char *name, -+ struct ext3_xattr_entry *rentry, int *free) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ char *end; -+ int name_len, error = -ENOENT; -+ -+ if (!EXT3_I(inode)->i_file_acl) { -+ *free = inode->i_sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - -+ sizeof(__u32); -+ return -ENOENT; -+ } -+ ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ brelse(bh); -+ return -EIO; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ *free = bh->b_size - sizeof(__u32); -+ -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) { -+ memcpy(rentry, entry, sizeof(struct ext3_xattr_entry)); -+ error = 0; -+ } else { -+ *free -= EXT3_XATTR_LEN(entry->e_name_len); -+ *free -= le32_to_cpu(entry->e_value_size); -+ } -+ entry = next; -+ } -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_inode_set() -+ * -+ * this routine add/remove/replace attribute in inode body -+ */ -+int -+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry *last, *next, *here = NULL; -+ struct ext3_inode *raw_inode; -+ int name_len = strlen(name); -+ int esize = EXT3_XATTR_LEN(name_len); -+ struct buffer_head *bh; -+ int err, storage_size; -+ struct ext3_iloc iloc; -+ int free, min_offs; -+ char *start, *end; -+ -+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) -+ return -ENOSPC; -+ -+ err = ext3_get_inode_loc(inode, &iloc, 0); -+ if (err) -+ return err; -+ raw_inode = ext3_raw_inode(&iloc); -+ bh = iloc.bh; -+ -+ storage_size = EXT3_SB(inode->i_sb)->s_inode_size - -+ EXT3_GOOD_OLD_INODE_SIZE - -+ EXT3_I(inode)->i_extra_isize - -+ sizeof(__u32); -+ start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE + -+ EXT3_I(inode)->i_extra_isize; -+ if ((*(__u32*) start) != EXT3_XATTR_MAGIC) { -+ /* inode had no attributes before */ -+ *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC); -+ } -+ start += sizeof(__u32); -+ end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = storage_size; -+ free = storage_size - sizeof(__u32); -+ -+ last = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(last)) { -+ next = EXT3_XATTR_NEXT(last); -+ if (le32_to_cpu(last->e_value_size) > storage_size || -+ (char *) next >= end) { -+ ext3_error(inode->i_sb, "ext3_xattr_ibody_set", -+ "inode %ld", inode->i_ino); -+ brelse(bh); -+ return -EIO; -+ } -+ -+ if (last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ if (name_index == last->e_name_index && -+ name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) -+ here = last; -+ else { -+ /* we calculate all but our attribute -+ * because it will be removed before changing */ -+ free -= EXT3_XATTR_LEN(last->e_name_len); -+ free -= le32_to_cpu(last->e_value_size); -+ } -+ last = next; -+ } -+ -+ if (value && (esize + value_len > free)) { -+ brelse(bh); -+ return -ENOSPC; -+ } -+ -+ err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (err) { -+ brelse(bh); -+ return err; -+ } -+ -+ if (here) { -+ /* time to remove old value */ -+ struct ext3_xattr_entry *e; -+ int size = le32_to_cpu(here->e_value_size); -+ int border = le16_to_cpu(here->e_value_offs); -+ char *src; -+ -+ /* move tail */ -+ memmove(start + min_offs + size, start + min_offs, -+ border - min_offs); -+ -+ /* recalculate offsets */ -+ e = (struct ext3_xattr_entry *) start; -+ while (!IS_LAST_ENTRY(e)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = -+ cpu_to_le16(offs + size); -+ e = next; -+ } -+ min_offs += size; -+ -+ /* remove entry */ -+ border = EXT3_XATTR_LEN(here->e_name_len); -+ src = (char *) here + EXT3_XATTR_LEN(here->e_name_len); -+ size = (char *) last - src; -+ if ((char *) here + size > end) -+ printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n", -+ __FILE__, __LINE__, here, size, end); -+ memmove(here, src, size); -+ last = (struct ext3_xattr_entry *) ((char *) last - border); -+ *((__u32 *) last) = 0; -+ } -+ -+ if (value) { -+ int offs = min_offs - value_len; -+ /* use last to create new entry */ -+ last->e_name_len = strlen(name); -+ last->e_name_index = name_index; -+ last->e_value_offs = cpu_to_le16(offs); -+ last->e_value_size = cpu_to_le32(value_len); -+ last->e_hash = last->e_value_block = 0; -+ memset(last->e_name, 0, esize); -+ memcpy(last->e_name, name, last->e_name_len); -+ if (start + offs + value_len > end) -+ printk("ALERT at %s:%d: 0x%p + %d + %zd > 0x%p\n", -+ __FILE__, __LINE__, start, offs, -+ value_len, end); -+ memcpy(start + offs, value, value_len); -+ last = EXT3_XATTR_NEXT(last); -+ *((__u32 *) last) = 0; -+ } -+ -+ ext3_mark_iloc_dirty(handle, inode, &iloc); -+ brelse(bh); -+ -+ return 0; -+} -+ -+/* - * ext3_xattr_set_handle() - * - * Create, replace or remove an extended attribute for this inode. Buffer -@@ -470,6 +959,104 @@ - */ - int - ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ struct ext3_xattr_entry entry; -+ int err, where = 0, found = 0, total; -+ int free1 = -1, free2 = -1; -+ int name_len; -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > inode->i_sb->s_blocksize) -+ return -ERANGE; -+ down_write(&EXT3_I(inode)->xattr_sem); -+ -+ /* try to find attribute in inode body */ -+ err = ext3_xattr_ibody_find(inode, name_index, name, &entry, &free1); -+ if (err == 0) { -+ /* found EA in inode */ -+ found = 1; -+ where = 0; -+ } else if (err == -ENOENT) { -+ /* there is no such attribute in inode body */ -+ /* try to find attribute in dedicated block */ -+ err = ext3_xattr_block_find(inode, name_index, name, -+ &entry, &free2); -+ if (err != 0 && err != -ENOENT) { -+ /* not found EA in block */ -+ goto finish; -+ } else if (err == 0) { -+ /* found EA in block */ -+ where = 1; -+ found = 1; -+ } -+ } else -+ goto finish; -+ -+ /* check flags: may replace? may create ? */ -+ if (found && (flags & XATTR_CREATE)) { -+ err = -EEXIST; -+ goto finish; -+ } else if (!found && (flags & XATTR_REPLACE)) { -+ err = -ENODATA; -+ goto finish; -+ } -+ -+ /* check if we have enough space to store attribute */ -+ total = EXT3_XATTR_LEN(strlen(name)) + value_len; -+ if (free1 >= 0 && total > free1 && free2 >= 0 && total > free2) { -+ /* have no enough space */ -+ err = -ENOSPC; -+ goto finish; -+ } -+ -+ /* time to remove attribute */ -+ if (found) { -+ if (where == 0) { -+ /* EA is stored in inode body */ -+ ext3_xattr_ibody_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } else { -+ /* EA is stored in separated block */ -+ ext3_xattr_block_set(handle, inode, name_index, name, -+ NULL, 0, flags); -+ } -+ } -+ -+ /* try to store EA in inode body */ -+ err = ext3_xattr_ibody_set(handle, inode, name_index, name, -+ value, value_len, flags); -+ if (err) { -+ /* can't store EA in inode body */ -+ /* try to store in block */ -+ err = ext3_xattr_block_set(handle, inode, name_index, -+ name, value, value_len, flags); -+ } -+ -+finish: -+ up_write(&EXT3_I(inode)->xattr_sem); -+ return err; -+} -+ -+/* -+ * ext3_xattr_block_set() -+ * -+ * this routine add/remove/replace attribute in EA block -+ */ -+int -+ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - int flags) - { -@@ -492,22 +1078,7 @@ - * towards the end of the block). - * end -- Points right after the block pointed to by header. - */ -- -- ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -- name_index, name, value, (long)value_len); -- -- if (IS_RDONLY(inode)) -- return -EROFS; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- return -EPERM; -- if (value == NULL) -- value_len = 0; -- if (name == NULL) -- return -EINVAL; - name_len = strlen(name); -- if (name_len > 255 || value_len > sb->s_blocksize) -- return -ERANGE; -- down_write(&EXT3_I(inode)->xattr_sem); - if (EXT3_I(inode)->i_file_acl) { - /* The inode already has an extended attribute block. */ - bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); -@@ -733,7 +1304,6 @@ - brelse(bh); - if (!(bh && header == HDR(bh))) - kfree(header); -- up_write(&EXT3_I(inode)->xattr_sem); - - return error; - } -Index: linux-2.6.0/fs/ext3/xattr.h -=================================================================== ---- linux-2.6.0.orig/fs/ext3/xattr.h 2003-06-24 18:04:43.000000000 +0400 -+++ linux-2.6.0/fs/ext3/xattr.h 2004-01-14 18:54:12.000000000 +0300 -@@ -77,7 +77,8 @@ - extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); - extern int ext3_xattr_list(struct inode *, char *, size_t); - extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int); --extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); -+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int); -+extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int); - - extern void ext3_xattr_delete_inode(handle_t *, struct inode *); - extern void ext3_xattr_put_super(struct super_block *); -Index: linux-2.6.0/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.0.orig/include/linux/ext3_fs.h 2004-01-14 18:54:11.000000000 +0300 -+++ linux-2.6.0/include/linux/ext3_fs.h 2004-01-14 18:54:12.000000000 +0300 -@@ -265,6 +265,8 @@ - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ -+ __u16 i_extra_isize; -+ __u16 i_pad1; - }; - - #define i_size_high i_dir_acl -@@ -721,6 +723,7 @@ - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc, int in_mem); - - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); -Index: linux-2.6.0/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.0.orig/include/linux/ext3_fs_i.h 2003-12-30 08:32:44.000000000 +0300 -+++ linux-2.6.0/include/linux/ext3_fs_i.h 2004-01-14 18:54:12.000000000 +0300 -@@ -96,6 +96,9 @@ - */ - loff_t i_disksize; - -+ /* on-disk additional length */ -+ __u16 i_extra_isize; -+ - /* - * truncate_sem is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's - -%diffstat - fs/ext3/ialloc.c | 5 - fs/ext3/inode.c | 10 - fs/ext3/xattr.c | 634 +++++++++++++++++++++++++++++++++++++++++++--- - fs/ext3/xattr.h | 3 - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_i.h | 3 - 6 files changed, 623 insertions(+), 34 deletions(-) - diff --git a/lustre/kernel_patches/patches/ext3-error-export.patch b/lustre/kernel_patches/patches/ext3-error-export.patch deleted file mode 100644 index c52fc18..0000000 --- a/lustre/kernel_patches/patches/ext3-error-export.patch +++ /dev/null @@ -1,16 +0,0 @@ - fs/ext3/ext3-exports.c | 5 +++++ - 1 files changed, 5 insertions(+) - ---- linux/fs/ext3/ext3-exports.c~ext3-error-export Mon Jul 14 19:30:50 2003 -+++ linux-mmonroe/fs/ext3/ext3-exports.c Mon Jul 14 19:32:37 2003 -@@ -21,3 +21,8 @@ EXPORT_SYMBOL(ext3_xattr_list); - EXPORT_SYMBOL(ext3_xattr_set); - EXPORT_SYMBOL(ext3_prep_san_write); - EXPORT_SYMBOL(ext3_map_inode_page); -+ -+EXPORT_SYMBOL(ext3_abort); -+EXPORT_SYMBOL(ext3_decode_error); -+EXPORT_SYMBOL(__ext3_std_error); -+ - -_ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch deleted file mode 100644 index e1bb92c..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch +++ /dev/null @@ -1,2877 +0,0 @@ -Index: linux-2.4.21-rhel/fs/ext3/extents.c -=================================================================== ---- linux-2.4.21-rhel.orig/fs/ext3/extents.c 2005-03-02 22:42:20.659360368 +0300 -+++ linux-2.4.21-rhel/fs/ext3/extents.c 2005-03-04 02:34:52.000000000 +0300 -@@ -0,0 +1,2324 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); -+ unlock_kernel(); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle, inode); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle, inode); -+ -+ return err; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_get_hash_table(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle, tree->inode); -+ return 0; -+} -+ -+int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ lock_kernel(); -+ ex->ee_start = ext3_new_block(handle, inode, goal, 0, 0, err); -+ ex->ee_start_hi = 0; -+ unlock_kernel(); -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, int create) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_bit(BH_New, &bh_result->b_state); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); -+ unlock_kernel(); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_bit(BH_New, &bh_result->b_state); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ set_bit(BH_Mapped, &bh_result->b_state); -+ bh_result->b_dev = inode->i_sb->s_dev; -+ bh_result->b_blocknr = newblock; -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up_write(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) -+ goto out_unlock; -+ -+ if (page) -+ ext3_block_truncate_page(handle, mapping, inode->i_size, page, -+ inode->i_sb->s_blocksize); -+ -+ down_write(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up_write(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle, inode); -+ return; -+ -+out_unlock: -+ if (page) { -+ UnlockPage(page); -+ page_cache_release(page); -+ } -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.4.21-rhel/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-rhel.orig/fs/ext3/ialloc.c 2005-03-04 00:44:34.000000000 +0300 -+++ linux-2.4.21-rhel/fs/ext3/ialloc.c 2005-03-04 00:44:35.000000000 +0300 -@@ -553,7 +553,7 @@ repeat: - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -@@ -596,6 +596,19 @@ - iloc.bh = NULL; - goto fail; - } -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (err) goto fail; - -Index: linux-2.4.21-rhel/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-rhel.orig/fs/ext3/inode.c 2005-03-04 00:44:34.000000000 +0300 -+++ linux-2.4.21-rhel/fs/ext3/inode.c 2005-03-04 00:44:35.000000000 +0300 -@@ -859,6 +859,16 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - /* - * The BKL is not held on entry here. - */ -@@ -872,7 +882,7 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 1); - return ret; - } -@@ -921,7 +931,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1564,7 +1574,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, -+int ext3_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from, - struct page *page, unsigned blocksize) - { -@@ -2049,6 +2059,9 @@ - (inode->i_mapping, inode->i_size, blocksize); - if (IS_ERR(page)) - return; -+ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); - - handle = start_transaction(inode); - if (IS_ERR(handle)) -@@ -2502,6 +2515,7 @@ - for (block = 0; block < EXT3_N_BLOCKS; block++) - ei->i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); - - if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) - EXT3_I(inode)->i_extra_isize = -@@ -2842,6 +2856,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -@@ -3166,7 +3183,7 @@ - - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { -- ret = ext3_get_block_handle(handle, inode, blocks[i], -+ ret = ext3_get_block_wrap(handle, inode, blocks[i], - &bh_tmp, 1, 1); - if (ret) - break; -@@ -3242,7 +3259,7 @@ - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1); -+ rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error %d " - "allocating block %ld\n", rc, iblock); -Index: linux-2.4.21-rhel/fs/ext3/Makefile -=================================================================== ---- linux-2.4.21-rhel.orig/fs/ext3/Makefile 2005-03-04 00:44:33.000000000 +0300 -+++ linux-2.4.21-rhel/fs/ext3/Makefile 2005-03-04 00:44:35.000000000 +0300 -@@ -9,10 +9,11 @@ - - O_TARGET := ext3.o - --export-objs := ext3-exports.o -+export-objs := ext3-exports.o extents.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \ -+ extents.o - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.21-rhel/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-rhel.orig/fs/ext3/super.c 2005-03-04 00:44:34.000000000 +0300 -+++ linux-2.4.21-rhel/fs/ext3/super.c 2005-03-04 00:44:35.000000000 +0300 -@@ -556,6 +556,7 @@ - #ifdef EXT3_DELETE_THREAD - J_ASSERT(sbi->s_delete_inodes == 0); - #endif -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -755,6 +756,12 @@ - return 0; - } - } -+ else if (!strcmp (this_char, "extents")) -+ set_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "noextents")) -+ clear_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "extdebug")) -+ set_opt (*mount_options, EXTDEBUG); - else if (!strcmp (this_char, "grpid") || - !strcmp (this_char, "bsdgroups")) - set_opt (*mount_options, GRPID); -@@ -1450,6 +1455,8 @@ - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); - -+ ext3_ext_init(sb); -+ - return sb; - - failed_mount3: -Index: linux-2.4.21-rhel/fs/ext3/ioctl.c -=================================================================== ---- linux-2.4.21-rhel.orig/fs/ext3/ioctl.c 2005-03-04 00:44:32.000000000 +0300 -+++ linux-2.4.21-rhel/fs/ext3/ioctl.c 2005-03-04 00:44:35.000000000 +0300 -@@ -173,6 +173,10 @@ - return ret; - } - #endif -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - default: - return -ENOTTY; - } -Index: linux-2.4.21-rhel/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-rhel.orig/include/linux/ext3_fs.h 2005-03-04 00:44:34.000000000 +0300 -+++ linux-2.4.21-rhel/include/linux/ext3_fs.h 2005-03-04 00:44:35.000000000 +0300 -@@ -188,8 +188,9 @@ - #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ - #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - --#define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x00085FFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ - - /* -@@ -212,6 +213,9 @@ - #ifdef CONFIG_JBD_DEBUG - #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) - #endif -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Structure of an inode on the disk -@@ -332,6 +336,8 @@ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - #define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -504,10 +510,12 @@ - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ -- EXT3_FEATURE_INCOMPAT_RECOVER) -+ EXT3_FEATURE_INCOMPAT_RECOVER| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -689,6 +697,9 @@ - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct address_space *, loff_t, -+ struct page *, unsigned); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -770,6 +780,16 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.4.21-rhel/include/linux/ext3_extents.h -=================================================================== ---- linux-2.4.21-rhel.orig/include/linux/ext3_extents.h 2005-03-02 22:42:20.659360368 +0300 -+++ linux-2.4.21-rhel/include/linux/ext3_extents.h 2005-03-04 02:34:52.000000000 +0300 -@@ -0,0 +1,261 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.4.21-rhel/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.21-rhel.orig/include/linux/ext3_fs_i.h 2005-03-04 00:44:34.000000000 +0300 -+++ linux-2.4.21-rhel/include/linux/ext3_fs_i.h 2005-03-04 01:56:36.000000000 +0300 -@@ -90,6 +90,8 @@ - * by other means, so we have truncate_sem. - */ - struct rw_semaphore truncate_sem; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch deleted file mode 100644 index 34a0b9d..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch +++ /dev/null @@ -1,2875 +0,0 @@ -Index: linux-2.4.21-suse2/fs/ext3/extents.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/extents.c 2004-11-03 00:34:45.404241880 +0300 -@@ -0,0 +1,2315 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); -+ unlock_kernel(); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle, inode); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle, inode); -+ -+ return err; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_get_hash_table(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle, tree->inode); -+ return 0; -+} -+ -+int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ lock_kernel(); -+ ex->ee_start = ext3_new_block(handle, inode, goal, 0, 0, err); -+ ex->ee_start_hi = 0; -+ unlock_kernel(); -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, int create) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_bit(BH_New, &bh_result->b_state); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); -+ unlock_kernel(); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_bit(BH_New, &bh_result->b_state); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ set_bit(BH_Mapped, &bh_result->b_state); -+ bh_result->b_dev = inode->i_sb->s_dev; -+ bh_result->b_blocknr = newblock; -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up_write(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) -+ return; -+ -+ ext3_block_truncate_page(handle, mapping, inode->i_size); -+ -+ down_write(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up_write(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle, inode); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.4.21-suse2/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/ialloc.c 2004-11-02 20:31:37.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/ialloc.c 2004-11-02 20:34:00.000000000 +0300 -@@ -553,7 +553,8 @@ repeat: - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; -+ inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & -+ ~(EXT3_INDEX_FL | EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -@@ -592,6 +592,19 @@ - iloc.bh = NULL; - goto fail; - } -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (err) goto fail; - -Index: linux-2.4.21-suse2/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/inode.c 2004-11-02 20:31:38.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/inode.c 2004-11-02 20:34:00.000000000 +0300 -@@ -853,6 +853,16 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - /* - * The BKL is not held on entry here. - */ -@@ -866,7 +876,7 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 1); - return ret; - } -@@ -893,7 +903,7 @@ - } - } - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 0); - if (ret == 0) - bh_result->b_size = (1 << inode->i_blkbits); -@@ -915,7 +925,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1502,7 +1512,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, -+int ext3_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -1988,6 +1998,9 @@ - - ext3_discard_prealloc(inode); - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) - return; /* AKPM: return what? */ -@@ -2324,6 +2337,7 @@ - for (block = 0; block < EXT3_N_BLOCKS; block++) - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); - - brelse (iloc.bh); - -@@ -2664,6 +2678,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -@@ -3100,7 +3117,7 @@ - - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { -- ret = ext3_get_block_handle(handle, inode, blocks[i], -+ ret = ext3_get_block_wrap(handle, inode, blocks[i], - &bh_tmp, 1, 1); - if (ret) - break; -@@ -3176,7 +3193,7 @@ - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1); -+ rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error %d " - "allocating block %ld\n", rc, iblock); -Index: linux-2.4.21-suse2/fs/ext3/Makefile -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/Makefile 2004-11-02 20:31:33.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/Makefile 2004-11-02 20:31:39.000000000 +0300 -@@ -12,7 +12,10 @@ - export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \ -+ extents.o -+export-objs += extents.o -+ - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.21-suse2/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/super.c 2005-04-04 05:44:58.000000000 -0600 -+++ linux-2.4.21-suse2/fs/ext3/super.c 2005-04-04 05:45:43.000000000 -0600 -@@ -532,6 +532,7 @@ - #ifdef EXT3_DELETE_THREAD - J_ASSERT(sbi->s_delete_inodes == 0); - #endif -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -733,6 +734,12 @@ - return 0; - } - } -+ else if (!strcmp (this_char, "extents")) -+ set_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "noextents")) -+ clear_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "extdebug")) -+ set_opt (*mount_options, EXTDEBUG); - else if (!strcmp (this_char, "grpid") || - !strcmp (this_char, "bsdgroups")) - set_opt (*mount_options, GRPID); -@@ -1428,6 +1433,8 @@ - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); - -+ ext3_ext_init(sb); -+ - return sb; - - failed_mount3: -Index: linux-2.4.21-suse2/fs/ext3/ioctl.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/ioctl.c 2004-11-02 20:31:32.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/ioctl.c 2004-11-02 20:31:39.000000000 +0300 -@@ -174,6 +174,10 @@ - return ret; - } - #endif -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - default: - return -ENOTTY; - } -Index: linux-2.4.21-suse2/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-suse2.orig/include/linux/ext3_fs.h 2004-11-02 20:31:37.000000000 +0300 -+++ linux-2.4.21-suse2/include/linux/ext3_fs.h 2004-11-02 20:31:39.000000000 +0300 -@@ -184,8 +184,9 @@ - #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ - #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - --#define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x00085FFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ - - /* -@@ -208,6 +209,9 @@ - #ifdef CONFIG_JBD_DEBUG - #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) - #endif -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Structure of an inode on the disk -@@ -328,6 +332,8 @@ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - #define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -504,10 +510,12 @@ - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ -- EXT3_FEATURE_INCOMPAT_RECOVER) -+ EXT3_FEATURE_INCOMPAT_RECOVER| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -689,6 +697,8 @@ - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct address_space *, loff_t); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -770,6 +779,16 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int); -+extern void ext3_ext_truncate(struct inode *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.4.21-suse2/include/linux/ext3_extents.h -=================================================================== ---- linux-2.4.21-suse2.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-suse2/include/linux/ext3_extents.h 2004-11-02 20:34:00.000000000 +0300 -@@ -0,0 +1,261 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.4.21-suse2/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.21-suse2.orig/include/linux/ext3_fs_i.h 2004-11-02 20:31:37.000000000 +0300 -+++ linux-2.4.21-suse2/include/linux/ext3_fs_i.h 2004-11-02 20:45:16.000000000 +0300 -@@ -90,6 +90,8 @@ - * by other means, so we have truncate_sem. - */ - struct rw_semaphore truncate_sem; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch deleted file mode 100644 index 067e6df..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch +++ /dev/null @@ -1,2863 +0,0 @@ -Index: linux-2.4.24/fs/ext3/extents.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24/fs/ext3/extents.c 2004-11-03 00:36:44.894076664 +0300 -@@ -0,0 +1,2314 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); -+ unlock_kernel(); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle, inode); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle, inode); -+ -+ return err; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_get_hash_table(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle, tree->inode); -+ return 0; -+} -+ -+int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ lock_kernel(); -+ ex->ee_start = ext3_new_block(handle, inode, goal, 0, 0, err); -+ ex->ee_start_hi = 0; -+ unlock_kernel(); -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, int create) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_bit(BH_New, &bh_result->b_state); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); -+ unlock_kernel(); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_bit(BH_New, &bh_result->b_state); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ set_bit(BH_Mapped, &bh_result->b_state); -+ bh_result->b_dev = inode->i_sb->s_dev; -+ bh_result->b_blocknr = newblock; -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up_write(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) -+ return; -+ -+ ext3_block_truncate_page(handle, mapping, inode->i_size); -+ -+ down_write(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up_write(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle, inode); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.4.24/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-11-02 20:28:32.000000000 +0300 -+++ linux-2.4.24/fs/ext3/ialloc.c 2004-11-02 20:32:17.000000000 +0300 -@@ -553,7 +553,8 @@ repeat: - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; -+ inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & -+ ~(EXT3_INDEX_FL | EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -@@ -592,6 +592,19 @@ - iloc.bh = NULL; - goto fail; - } -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (err) goto fail; - -Index: linux-2.4.24/fs/ext3/inode.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/inode.c 2004-11-02 20:28:33.000000000 +0300 -+++ linux-2.4.24/fs/ext3/inode.c 2004-11-02 20:32:17.000000000 +0300 -@@ -848,6 +848,15 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create); -+ return ext3_get_block_handle(handle, inode, block, bh, create); -+} -+ - /* - * The BKL is not held on entry here. - */ -@@ -861,7 +870,7 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create); -+ ret = ext3_get_block_wrap(handle, inode, iblock, bh_result, create); - return ret; - } - -@@ -879,7 +888,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1403,7 +1412,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, -+int ext3_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -1889,6 +1898,9 @@ - - ext3_discard_prealloc(inode); - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) - return; /* AKPM: return what? */ -@@ -2225,6 +2237,7 @@ - for (block = 0; block < EXT3_N_BLOCKS; block++) - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); - - if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) - inode->u.ext3_i.i_extra_isize = -@@ -2537,6 +2550,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -@@ -2973,7 +2989,7 @@ - - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { -- ret = ext3_get_block_handle(handle, inode, blocks[i], -+ ret = ext3_get_block_wrap(handle, inode, blocks[i], - &bh_tmp, 1); - if (ret) - break; -@@ -3049,7 +3065,7 @@ - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1); -+ rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error %d " - "allocating block %ld\n", rc, iblock); -Index: linux-2.4.24/fs/ext3/Makefile -=================================================================== ---- linux-2.4.24.orig/fs/ext3/Makefile 2004-11-02 20:28:32.000000000 +0300 -+++ linux-2.4.24/fs/ext3/Makefile 2004-11-02 20:32:17.000000000 +0300 -@@ -13,7 +13,9 @@ - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \ -- xattr_trusted.o -+ xattr_trusted.o extents.o -+export-objs += extents.o -+ - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.24/fs/ext3/super.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/super.c 2004-11-02 20:28:32.000000000 +0300 -+++ linux-2.4.24/fs/ext3/super.c 2004-11-02 20:32:17.000000000 +0300 -@@ -532,6 +532,7 @@ - #ifdef EXT3_DELETE_THREAD - J_ASSERT(sbi->s_delete_inodes == 0); - #endif -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -704,6 +705,12 @@ - return 0; - } - } -+ else if (!strcmp (this_char, "extents")) -+ set_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "noextents")) -+ clear_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "extdebug")) -+ set_opt (*mount_options, EXTDEBUG); - else if (!strcmp (this_char, "grpid") || - !strcmp (this_char, "bsdgroups")) - set_opt (*mount_options, GRPID); -@@ -1395,6 +1400,8 @@ - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); - -+ ext3_ext_init(sb); -+ - return sb; - - failed_mount3: -Index: linux-2.4.24/fs/ext3/ioctl.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/ioctl.c 2004-11-02 20:28:29.000000000 +0300 -+++ linux-2.4.24/fs/ext3/ioctl.c 2004-11-02 20:32:17.000000000 +0300 -@@ -174,6 +174,10 @@ - return ret; - } - #endif -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - default: - return -ENOTTY; - } -Index: linux-2.4.24/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-11-02 20:28:32.000000000 +0300 -+++ linux-2.4.24/include/linux/ext3_fs.h 2004-11-02 20:32:17.000000000 +0300 -@@ -184,8 +184,9 @@ - #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ - #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - --#define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x00085FFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ - - /* -@@ -208,6 +209,9 @@ - #ifdef CONFIG_JBD_DEBUG - #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) - #endif -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Structure of an inode on the disk -@@ -327,6 +331,8 @@ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - #define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -503,10 +509,12 @@ - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ -- EXT3_FEATURE_INCOMPAT_RECOVER) -+ EXT3_FEATURE_INCOMPAT_RECOVER| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -688,6 +696,8 @@ - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct address_space *, loff_t); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -769,6 +778,16 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int); -+extern void ext3_ext_truncate(struct inode *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.4.24/include/linux/ext3_extents.h -=================================================================== ---- linux-2.4.24.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24/include/linux/ext3_extents.h 2004-11-02 20:32:17.000000000 +0300 -@@ -0,0 +1,261 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.4.24/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.24.orig/include/linux/ext3_fs_i.h 2004-11-02 20:28:32.000000000 +0300 -+++ linux-2.4.24/include/linux/ext3_fs_i.h 2004-11-02 20:43:45.000000000 +0300 -@@ -76,6 +76,8 @@ - * by other means, so we have truncate_sem. - */ - struct rw_semaphore truncate_sem; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch deleted file mode 100644 index f3b1fb5..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch +++ /dev/null @@ -1,2858 +0,0 @@ -Index: linux-2.4.29/fs/ext3/extents.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/extents.c 2005-05-03 16:52:08.723069952 +0300 -+++ linux-2.4.29/fs/ext3/extents.c 2005-05-03 16:52:08.802057944 +0300 -@@ -0,0 +1,2308 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); -+ unlock_kernel(); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ mark_buffer_uptodate(bh, 1); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle, inode); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle, inode); -+ -+ return err; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_get_hash_table(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle, tree->inode); -+ return 0; -+} -+ -+int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ lock_kernel(); -+ ex->ee_start = ext3_new_block(handle, inode, goal, 0, 0, err); -+ unlock_kernel(); -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, int create) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_bit(BH_New, &bh_result->b_state); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ lock_kernel(); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); -+ unlock_kernel(); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_bit(BH_New, &bh_result->b_state); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ set_bit(BH_Mapped, &bh_result->b_state); -+ bh_result->b_dev = inode->i_sb->s_dev; -+ bh_result->b_blocknr = newblock; -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up_write(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) -+ return; -+ -+ ext3_block_truncate_page(handle, mapping, inode->i_size); -+ -+ down_write(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up_write(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle, inode); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up_write(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.4.29/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/ialloc.c 2005-05-03 16:50:30.216045296 +0300 -+++ linux-2.4.29/fs/ext3/ialloc.c 2005-05-03 16:52:08.804057640 +0300 -@@ -553,7 +553,8 @@ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; -+ inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & -+ ~(EXT3_INDEX_FL | EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -@@ -592,6 +593,19 @@ - iloc.bh = NULL; - goto fail; - } -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (err) goto fail; - -Index: linux-2.4.29/fs/ext3/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/inode.c 2005-05-03 16:51:50.331865840 +0300 -+++ linux-2.4.29/fs/ext3/inode.c 2005-05-03 16:52:08.808057032 +0300 -@@ -861,6 +861,15 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create); -+ return ext3_get_block_handle(handle, inode, block, bh, create); -+} -+ - /* - * The BKL is not held on entry here. - */ -@@ -874,7 +883,7 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create); -+ ret = ext3_get_block_wrap(handle, inode, iblock, bh_result, create); - return ret; - } - -@@ -892,7 +901,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1416,7 +1425,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, -+int ext3_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -1904,6 +1913,9 @@ - - ext3_discard_prealloc(inode); - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) - return; /* AKPM: return what? */ -@@ -2240,6 +2252,7 @@ - for (block = 0; block < EXT3_N_BLOCKS; block++) - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ memset(&EXT3_I(inode)->i_cached_extent, 0, sizeof(__u32) * 4); - - if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) - inode->u.ext3_i.i_extra_isize = -@@ -2546,6 +2559,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -@@ -2982,7 +2998,7 @@ - - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { -- ret = ext3_get_block_handle(handle, inode, blocks[i], -+ ret = ext3_get_block_wrap(handle, inode, blocks[i], - &bh_tmp, 1); - if (ret) - break; -@@ -3058,7 +3074,7 @@ - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1); -+ rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error %d " - "allocating block %ld\n", rc, iblock); -Index: linux-2.4.29/fs/ext3/Makefile -=================================================================== ---- linux-2.4.29.orig/fs/ext3/Makefile 2005-05-03 16:51:32.127633304 +0300 -+++ linux-2.4.29/fs/ext3/Makefile 2005-05-03 16:53:38.634401352 +0300 -@@ -13,7 +13,9 @@ - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \ -- xattr_trusted.o -+ xattr_trusted.o extents.o -+export-objs += extents.o -+ - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.29/fs/ext3/super.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/super.c 2005-05-03 16:50:14.750396432 +0300 -+++ linux-2.4.29/fs/ext3/super.c 2005-05-03 16:52:08.813056272 +0300 -@@ -531,6 +531,7 @@ - #ifdef EXT3_DELETE_THREAD - J_ASSERT(sbi->s_delete_inodes == 0); - #endif -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -702,6 +703,12 @@ - return 0; - } - } -+ else if (!strcmp (this_char, "extents")) -+ set_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "noextents")) -+ clear_opt (*mount_options, EXTENTS); -+ else if (!strcmp (this_char, "extdebug")) -+ set_opt (*mount_options, EXTDEBUG); - else if (!strcmp (this_char, "grpid") || - !strcmp (this_char, "bsdgroups")) - set_opt (*mount_options, GRPID); -@@ -1405,6 +1410,8 @@ - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); - -+ ext3_ext_init(sb); -+ - return sb; - - failed_mount3: -Index: linux-2.4.29/fs/ext3/ioctl.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/ioctl.c 2005-05-03 16:49:36.825161944 +0300 -+++ linux-2.4.29/fs/ext3/ioctl.c 2005-05-03 16:52:08.814056120 +0300 -@@ -174,6 +174,10 @@ - return ret; - } - #endif -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - default: - return -ENOTTY; - } -Index: linux-2.4.29/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs.h 2005-05-03 16:50:30.228043472 +0300 -+++ linux-2.4.29/include/linux/ext3_fs.h 2005-05-03 16:52:08.817055664 +0300 -@@ -184,8 +184,9 @@ - #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ - #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - --#define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x00085FFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ - - /* -@@ -208,6 +209,9 @@ - #ifdef CONFIG_JBD_DEBUG - #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) - #endif -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Structure of an inode on the disk -@@ -327,6 +331,8 @@ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - #define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -506,11 +512,13 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -702,6 +711,8 @@ - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct address_space *, loff_t); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -783,6 +793,16 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int); -+extern void ext3_ext_truncate(struct inode *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.4.29/include/linux/ext3_extents.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_extents.h 2005-05-03 16:52:08.724069800 +0300 -+++ linux-2.4.29/include/linux/ext3_extents.h 2005-05-03 16:52:08.819055360 +0300 -@@ -0,0 +1,261 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.4.29/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs_i.h 2005-05-03 16:50:30.229043320 +0300 -+++ linux-2.4.29/include/linux/ext3_fs_i.h 2005-05-03 16:52:08.823054752 +0300 -@@ -76,6 +76,8 @@ - * by other means, so we have truncate_sem. - */ - struct rw_semaphore truncate_sem; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch deleted file mode 100644 index f421f88..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch +++ /dev/null @@ -1,2940 +0,0 @@ -Index: linux-2.6.12-rc6/fs/ext3/extents.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/extents.c 2005-06-14 16:31:25.756503133 +0200 -+++ linux-2.6.12-rc6/fs/ext3/extents.c 2005-06-14 16:31:25.836581257 +0200 -@@ -0,0 +1,2359 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ newblock = ext3_new_block(handle, inode, goal, err); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) { -+ /* don't free previously allocated path -+ * -- caller should take care */ -+ path = NULL; -+ goto err; -+ } -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle); -+ -+ return err; -+} -+ -+int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) -+{ -+ int lcap, icap, rcap, leafs, idxs, num; -+ -+ rcap = ext3_ext_space_root(tree); -+ if (blocks <= rcap) { -+ /* all extents fit to the root */ -+ return 0; -+ } -+ -+ rcap = ext3_ext_space_root_idx(tree); -+ lcap = ext3_ext_space_block(tree); -+ icap = ext3_ext_space_block_idx(tree); -+ -+ num = leafs = (blocks + lcap - 1) / lcap; -+ if (leafs <= rcap) { -+ /* all pointers to leafs fit to the root */ -+ return leafs; -+ } -+ -+ /* ok. we need separate index block(s) to link all leaf blocks */ -+ idxs = (leafs + icap - 1) / icap; -+ do { -+ num += idxs; -+ idxs = (idxs + icap - 1) / icap; -+ } while (idxs > rcap); -+ -+ return num; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_find_get_block(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle); -+ return 0; -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, err); -+ ex->ee_start_hi = 0; -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_buffer_new(bh_result); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_buffer_new(bh_result); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ map_bh(bh_result, inode->i_sb, newblock); -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ down(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.6.12-rc6/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/ialloc.c 2005-06-14 16:31:08.634433030 +0200 -+++ linux-2.6.12-rc6/fs/ext3/ialloc.c 2005-06-14 16:31:25.846346882 +0200 -@@ -598,7 +598,7 @@ - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - -- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - /* dirsync only applies to directories */ -@@ -639,6 +639,18 @@ - DQUOT_FREE_INODE(inode); - goto fail2; - } -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-2.6.12-rc6/fs/ext3/inode.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/inode.c 2005-06-14 16:31:09.701815830 +0200 -+++ linux-2.6.12-rc6/fs/ext3/inode.c 2005-06-14 16:31:25.861971882 +0200 -@@ -40,7 +40,7 @@ - #include "iopen.h" - #include "acl.h" - --static int ext3_writepage_trans_blocks(struct inode *inode); -+int ext3_writepage_trans_blocks(struct inode *inode); - - /* - * Test whether an inode is a fast symlink. -@@ -784,6 +784,17 @@ - return err; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -794,8 +805,8 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 1); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 1); - return ret; - } - -@@ -839,7 +850,7 @@ - - get_block: - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 0); - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -@@ -859,7 +870,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1593,7 +1604,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2104,6 +2115,9 @@ - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -2850,12 +2864,15 @@ - * block and work out the exact number of indirects which are touched. Pah. - */ - --static int ext3_writepage_trans_blocks(struct inode *inode) -+int ext3_writepage_trans_blocks(struct inode *inode) - { - int bpp = ext3_journal_blocks_per_page(inode); - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -Index: linux-2.6.12-rc6/fs/ext3/Makefile -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/Makefile 2005-06-14 16:31:09.179354899 +0200 -+++ linux-2.6.12-rc6/fs/ext3/Makefile 2005-06-14 16:31:25.872714069 +0200 -@@ -5,7 +5,8 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o resize.o -+ ioctl.o namei.o super.o symlink.o hash.o resize.o \ -+ extents.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.12-rc6/fs/ext3/super.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/super.c 2005-06-14 16:31:09.950839264 +0200 -+++ linux-2.6.12-rc6/fs/ext3/super.c 2005-06-14 16:31:25.886385944 +0200 -@@ -387,6 +387,7 @@ - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -451,6 +452,8 @@ - #endif - ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; -+ -+ memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); - return &ei->vfs_inode; - } - -@@ -593,6 +596,7 @@ - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -+ Opt_extents, Opt_noextents, Opt_extdebug, - }; - - static match_table_t tokens = { -@@ -644,6 +647,9 @@ - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_noextents, "noextents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -953,6 +958,15 @@ - case Opt_nobh: - set_opt(sbi->s_mount_opt, NOBH); - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_noextents: -+ clear_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1668,6 +1681,7 @@ - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - -+ ext3_ext_init(sb); - lock_kernel(); - return 0; - -Index: linux-2.6.12-rc6/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/ioctl.c 2005-06-14 16:31:08.646151780 +0200 -+++ linux-2.6.12-rc6/fs/ext3/ioctl.c 2005-06-14 16:31:25.897128131 +0200 -@@ -124,6 +124,10 @@ - err = ext3_change_inode_journal_flag(inode, jflag); - return err; - } -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - case EXT3_IOC_GETVERSION: - case EXT3_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int __user *) arg); -Index: linux-2.6.12-rc6/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.12-rc6.orig/include/linux/ext3_fs.h 2005-06-14 16:31:10.185214261 +0200 -+++ linux-2.6.12-rc6/include/linux/ext3_fs.h 2005-06-14 16:31:52.859041864 +0200 -@@ -186,8 +186,9 @@ - #define EXT3_NOTAIL_FL 0x00008000 /* don't merge file tail */ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ - --#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - -@@ -237,6 +238,9 @@ - #endif - #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) - #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Structure of an inode on the disk -@@ -360,6 +364,8 @@ - #define EXT3_MOUNT_NOBH 0x40000 /* No bufferheads */ - #define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -548,11 +554,13 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -759,6 +767,9 @@ - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, -+ struct address_space *, loff_t); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -828,6 +837,16 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.6.12-rc6/include/linux/ext3_extents.h -=================================================================== ---- linux-2.6.12-rc6.orig/include/linux/ext3_extents.h 2005-06-14 16:31:25.780917195 +0200 -+++ linux-2.6.12-rc6/include/linux/ext3_extents.h 2005-06-14 16:31:25.932284381 +0200 -@@ -0,0 +1,262 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.6.12-rc6/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.12-rc6.orig/include/linux/ext3_fs_i.h 2005-06-06 17:22:29.000000000 +0200 -+++ linux-2.6.12-rc6/include/linux/ext3_fs_i.h 2005-06-14 16:31:25.941073443 +0200 -@@ -133,6 +133,8 @@ - */ - struct semaphore truncate_sem; - struct inode vfs_inode; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.15.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.15.patch deleted file mode 100644 index ea69e84..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.15.patch +++ /dev/null @@ -1,2947 +0,0 @@ -Index: linux-2.6.16.21-0.8/fs/ext3/extents.c -=================================================================== ---- /dev/null -+++ linux-2.6.16.21-0.8/fs/ext3/extents.c -@@ -0,0 +1,2359 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ newblock = ext3_new_block(handle, inode, goal, err); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) { -+ /* don't free previously allocated path -+ * -- caller should take care */ -+ path = NULL; -+ goto err; -+ } -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle); -+ -+ return err; -+} -+ -+int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) -+{ -+ int lcap, icap, rcap, leafs, idxs, num; -+ -+ rcap = ext3_ext_space_root(tree); -+ if (blocks <= rcap) { -+ /* all extents fit to the root */ -+ return 0; -+ } -+ -+ rcap = ext3_ext_space_root_idx(tree); -+ lcap = ext3_ext_space_block(tree); -+ icap = ext3_ext_space_block_idx(tree); -+ -+ num = leafs = (blocks + lcap - 1) / lcap; -+ if (leafs <= rcap) { -+ /* all pointers to leafs fit to the root */ -+ return leafs; -+ } -+ -+ /* ok. we need separate index block(s) to link all leaf blocks */ -+ idxs = (leafs + icap - 1) / icap; -+ do { -+ num += idxs; -+ idxs = (idxs + icap - 1) / icap; -+ } while (idxs > rcap); -+ -+ return num; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_find_get_block(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle); -+ return 0; -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, err); -+ ex->ee_start_hi = 0; -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_buffer_new(bh_result); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_buffer_new(bh_result); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ map_bh(bh_result, inode->i_sb, newblock); -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ down(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.6.16.21-0.8/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/ext3/ialloc.c -+++ linux-2.6.16.21-0.8/fs/ext3/ialloc.c -@@ -598,7 +598,7 @@ got: - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - -- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - /* dirsync only applies to directories */ -@@ -642,6 +642,18 @@ got: - if (err) - goto fail_free_drop; - -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-2.6.16.21-0.8/fs/ext3/inode.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/ext3/inode.c -+++ linux-2.6.16.21-0.8/fs/ext3/inode.c -@@ -40,7 +40,7 @@ - #include "iopen.h" - #include "acl.h" - --static int ext3_writepage_trans_blocks(struct inode *inode); -+int ext3_writepage_trans_blocks(struct inode *inode); - - /* - * Test whether an inode is a fast symlink. -@@ -788,6 +788,17 @@ out: - return err; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -798,8 +809,8 @@ static int ext3_get_block(struct inode * - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 1); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 1); - return ret; - } - -@@ -843,7 +854,7 @@ ext3_direct_io_get_blocks(struct inode * - - get_block: - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 0); - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -@@ -863,7 +874,7 @@ struct buffer_head *ext3_getblk(handle_t - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1606,7 +1617,7 @@ void ext3_set_aops(struct inode *inode) - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2116,6 +2127,9 @@ void ext3_truncate(struct inode * inode) - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -2863,12 +2877,15 @@ err_out: - * block and work out the exact number of indirects which are touched. Pah. - */ - --static int ext3_writepage_trans_blocks(struct inode *inode) -+int ext3_writepage_trans_blocks(struct inode *inode) - { - int bpp = ext3_journal_blocks_per_page(inode); - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -Index: linux-2.6.16.21-0.8/fs/ext3/Makefile -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/ext3/Makefile -+++ linux-2.6.16.21-0.8/fs/ext3/Makefile -@@ -5,7 +5,8 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o resize.o -+ ioctl.o namei.o super.o symlink.o hash.o resize.o \ -+ extents.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.16.21-0.8/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/ext3/super.c -+++ linux-2.6.16.21-0.8/fs/ext3/super.c -@@ -392,6 +392,7 @@ static void ext3_put_super (struct super - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -456,6 +457,8 @@ static struct inode *ext3_alloc_inode(st - #endif - ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; -+ -+ memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); - return &ei->vfs_inode; - } - -@@ -638,6 +641,7 @@ enum { - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -+ Opt_extents, Opt_noextents, Opt_extdebug, - Opt_grpquota - }; - -@@ -689,6 +693,9 @@ static match_table_t tokens = { - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_noextents, "noextents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -1030,6 +1036,15 @@ clear_qf_name: - case Opt_nobh: - set_opt(sbi->s_mount_opt, NOBH); - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_noextents: -+ clear_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1756,6 +1768,7 @@ static int ext3_fill_super (struct super - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - -+ ext3_ext_init(sb); - lock_kernel(); - return 0; - -Index: linux-2.6.16.21-0.8/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/ext3/ioctl.c -+++ linux-2.6.16.21-0.8/fs/ext3/ioctl.c -@@ -125,6 +125,10 @@ flags_err: - err = ext3_change_inode_journal_flag(inode, jflag); - return err; - } -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - case EXT3_IOC_GETVERSION: - case EXT3_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int __user *) arg); -Index: linux-2.6.16.21-0.8/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16.21-0.8.orig/include/linux/ext3_fs.h -+++ linux-2.6.16.21-0.8/include/linux/ext3_fs.h -@@ -185,9 +185,10 @@ struct ext3_group_desc - #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ - --#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - - /* -@@ -237,6 +238,9 @@ struct ext3_new_group_data { - #endif - #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) - #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Mount options -@@ -377,6 +381,8 @@ struct ext3_inode { - #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ - #define EXT3_MOUNT_IOPEN 0x400000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -565,11 +571,13 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -776,6 +784,7 @@ extern unsigned long ext3_count_free (st - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -792,6 +801,7 @@ extern int ext3_get_inode_loc(struct ino - extern void ext3_truncate (struct inode *); - extern void ext3_set_inode_flags(struct inode *); - extern void ext3_set_aops(struct inode *inode); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, -@@ -845,6 +855,16 @@ extern struct inode_operations ext3_spec - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.6.16.21-0.8/include/linux/ext3_extents.h -=================================================================== ---- /dev/null -+++ linux-2.6.16.21-0.8/include/linux/ext3_extents.h -@@ -0,0 +1,262 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.6.16.21-0.8/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.16.21-0.8.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.16.21-0.8/include/linux/ext3_fs_i.h -@@ -133,6 +133,8 @@ struct ext3_inode_info { - */ - struct semaphore truncate_sem; - struct inode vfs_inode; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.16-sles10.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.16-sles10.patch deleted file mode 100644 index fd17dab..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.16-sles10.patch +++ /dev/null @@ -1,2947 +0,0 @@ -Index: linux-2.6.16.27-0.9/fs/ext3/extents.c -=================================================================== ---- /dev/null -+++ linux-2.6.16.27-0.9/fs/ext3/extents.c -@@ -0,0 +1,2359 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ newblock = ext3_new_block(handle, inode, goal, err); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) { -+ /* don't free previously allocated path -+ * -- caller should take care */ -+ path = NULL; -+ goto err; -+ } -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle); -+ -+ return err; -+} -+ -+int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) -+{ -+ int lcap, icap, rcap, leafs, idxs, num; -+ -+ rcap = ext3_ext_space_root(tree); -+ if (blocks <= rcap) { -+ /* all extents fit to the root */ -+ return 0; -+ } -+ -+ rcap = ext3_ext_space_root_idx(tree); -+ lcap = ext3_ext_space_block(tree); -+ icap = ext3_ext_space_block_idx(tree); -+ -+ num = leafs = (blocks + lcap - 1) / lcap; -+ if (leafs <= rcap) { -+ /* all pointers to leafs fit to the root */ -+ return leafs; -+ } -+ -+ /* ok. we need separate index block(s) to link all leaf blocks */ -+ idxs = (leafs + icap - 1) / icap; -+ do { -+ num += idxs; -+ idxs = (idxs + icap - 1) / icap; -+ } while (idxs > rcap); -+ -+ return num; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_find_get_block(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle); -+ return 0; -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, err); -+ ex->ee_start_hi = 0; -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_buffer_new(bh_result); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_buffer_new(bh_result); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ map_bh(bh_result, inode->i_sb, newblock); -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ down(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.6.16.27-0.9/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/ialloc.c -+++ linux-2.6.16.27-0.9/fs/ext3/ialloc.c -@@ -601,7 +601,7 @@ got: - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - -- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - /* dirsync only applies to directories */ -@@ -645,6 +645,18 @@ got: - if (err) - goto fail_free_drop; - -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-2.6.16.27-0.9/fs/ext3/inode.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/inode.c -+++ linux-2.6.16.27-0.9/fs/ext3/inode.c -@@ -40,7 +40,7 @@ - #include "iopen.h" - #include "acl.h" - --static int ext3_writepage_trans_blocks(struct inode *inode); -+int ext3_writepage_trans_blocks(struct inode *inode); - - /* - * Test whether an inode is a fast symlink. -@@ -788,6 +788,17 @@ out: - return err; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -798,8 +809,8 @@ static int ext3_get_block(struct inode * - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 1); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 1); - return ret; - } - -@@ -843,7 +854,7 @@ ext3_direct_io_get_blocks(struct inode * - - get_block: - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 0); - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -@@ -863,7 +874,7 @@ struct buffer_head *ext3_getblk(handle_t - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1606,7 +1617,7 @@ void ext3_set_aops(struct inode *inode) - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2116,6 +2127,9 @@ void ext3_truncate(struct inode * inode) - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -2863,12 +2877,15 @@ err_out: - * block and work out the exact number of indirects which are touched. Pah. - */ - --static int ext3_writepage_trans_blocks(struct inode *inode) -+int ext3_writepage_trans_blocks(struct inode *inode) - { - int bpp = ext3_journal_blocks_per_page(inode); - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -Index: linux-2.6.16.27-0.9/fs/ext3/Makefile -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/Makefile -+++ linux-2.6.16.27-0.9/fs/ext3/Makefile -@@ -5,7 +5,8 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o resize.o -+ ioctl.o namei.o super.o symlink.o hash.o resize.o \ -+ extents.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.16.27-0.9/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/super.c -+++ linux-2.6.16.27-0.9/fs/ext3/super.c -@@ -392,6 +392,7 @@ static void ext3_put_super (struct super - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -456,6 +457,8 @@ static struct inode *ext3_alloc_inode(st - #endif - ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; -+ -+ memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); - return &ei->vfs_inode; - } - -@@ -681,6 +684,7 @@ enum { - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -+ Opt_extents, Opt_noextents, Opt_extdebug, - Opt_grpquota - }; - -@@ -732,6 +736,9 @@ static match_table_t tokens = { - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_noextents, "noextents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -1073,6 +1080,15 @@ clear_qf_name: - case Opt_nobh: - set_opt(sbi->s_mount_opt, NOBH); - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_noextents: -+ clear_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1799,6 +1815,7 @@ static int ext3_fill_super (struct super - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - -+ ext3_ext_init(sb); - lock_kernel(); - return 0; - -Index: linux-2.6.16.27-0.9/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/ioctl.c -+++ linux-2.6.16.27-0.9/fs/ext3/ioctl.c -@@ -125,6 +125,10 @@ flags_err: - err = ext3_change_inode_journal_flag(inode, jflag); - return err; - } -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - case EXT3_IOC_GETVERSION: - case EXT3_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int __user *) arg); -Index: linux-2.6.16.27-0.9/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16.27-0.9.orig/include/linux/ext3_fs.h -+++ linux-2.6.16.27-0.9/include/linux/ext3_fs.h -@@ -185,9 +185,10 @@ struct ext3_group_desc - #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ - --#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - - /* -@@ -237,6 +238,9 @@ struct ext3_new_group_data { - #endif - #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) - #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Mount options -@@ -377,6 +381,8 @@ struct ext3_inode { - #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ - #define EXT3_MOUNT_IOPEN 0x400000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -565,11 +571,13 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -776,6 +784,7 @@ extern unsigned long ext3_count_free (st - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t); - int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -795,6 +804,7 @@ extern int ext3_get_inode_loc(struct ino - extern void ext3_truncate (struct inode *); - extern void ext3_set_inode_flags(struct inode *); - extern void ext3_set_aops(struct inode *inode); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, -@@ -848,6 +858,16 @@ extern struct inode_operations ext3_spec - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.6.16.27-0.9/include/linux/ext3_extents.h -=================================================================== ---- /dev/null -+++ linux-2.6.16.27-0.9/include/linux/ext3_extents.h -@@ -0,0 +1,262 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.6.16.27-0.9/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.16.27-0.9.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.16.27-0.9/include/linux/ext3_fs_i.h -@@ -133,6 +133,8 @@ struct ext3_inode_info { - */ - struct semaphore truncate_sem; - struct inode vfs_inode; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.18-vanilla.patch deleted file mode 100644 index 7bc712e..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.18-vanilla.patch +++ /dev/null @@ -1,2950 +0,0 @@ -Index: linux-stage/fs/ext3/extents.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ linux-stage/fs/ext3/extents.c 2006-07-16 14:10:21.000000000 +0800 -@@ -0,0 +1,2359 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ newblock = ext3_new_block(handle, inode, goal, err); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) { -+ /* don't free previously allocated path -+ * -- caller should take care */ -+ path = NULL; -+ goto err; -+ } -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle); -+ -+ return err; -+} -+ -+int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) -+{ -+ int lcap, icap, rcap, leafs, idxs, num; -+ -+ rcap = ext3_ext_space_root(tree); -+ if (blocks <= rcap) { -+ /* all extents fit to the root */ -+ return 0; -+ } -+ -+ rcap = ext3_ext_space_root_idx(tree); -+ lcap = ext3_ext_space_block(tree); -+ icap = ext3_ext_space_block_idx(tree); -+ -+ num = leafs = (blocks + lcap - 1) / lcap; -+ if (leafs <= rcap) { -+ /* all pointers to leafs fit to the root */ -+ return leafs; -+ } -+ -+ /* ok. we need separate index block(s) to link all leaf blocks */ -+ idxs = (leafs + icap - 1) / icap; -+ do { -+ num += idxs; -+ idxs = (idxs + icap - 1) / icap; -+ } while (idxs > rcap); -+ -+ return num; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_find_get_block(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle); -+ return 0; -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, err); -+ ex->ee_start_hi = 0; -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_buffer_new(bh_result); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ mutex_lock(&EXT3_I(inode)->truncate_mutex); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_buffer_new(bh_result); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ map_bh(bh_result, inode->i_sb, newblock); -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ mutex_unlock(&EXT3_I(inode)->truncate_mutex); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ mutex_lock(&EXT3_I(inode)->truncate_mutex); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ mutex_unlock(&EXT3_I(inode)->truncate_mutex); -+ ext3_journal_stop(handle); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ mutex_lock(&EXT3_I(inode)->truncate_mutex); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ mutex_unlock(&EXT3_I(inode)->truncate_mutex); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ mutex_lock(&EXT3_I(inode)->truncate_mutex); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ mutex_unlock(&EXT3_I(inode)->truncate_mutex); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ mutex_lock(&EXT3_I(inode)->truncate_mutex); -+ err = EXT_DEPTH(&tree); -+ mutex_unlock(&EXT3_I(inode)->truncate_mutex); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-stage/fs/ext3/ialloc.c -=================================================================== ---- linux-stage.orig/fs/ext3/ialloc.c 2006-07-16 13:55:31.000000000 +0800 -+++ linux-stage/fs/ext3/ialloc.c 2006-07-16 14:10:20.000000000 +0800 -@@ -600,7 +600,7 @@ got: - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - -- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - /* dirsync only applies to directories */ -@@ -644,6 +644,18 @@ got: - if (err) - goto fail_free_drop; - -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2006-07-16 13:55:31.000000000 +0800 -+++ linux-stage/fs/ext3/inode.c 2006-07-16 14:11:28.000000000 +0800 -@@ -40,7 +40,7 @@ - #include "iopen.h" - #include "acl.h" - --static int ext3_writepage_trans_blocks(struct inode *inode); -+int ext3_writepage_trans_blocks(struct inode *inode); - - /* - * Test whether an inode is a fast symlink. -@@ -944,6 +944,17 @@ out: - - #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_blocks_handle(handle, inode, block, 1, bh, create, -+ extend_disksize); -+} -+ - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -984,8 +995,8 @@ static int ext3_get_block(struct inode * - - get_block: - if (ret == 0) { -- ret = ext3_get_blocks_handle(handle, inode, iblock, -- max_blocks, bh_result, create, 0); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; -@@ -1008,7 +1019,7 @@ struct buffer_head *ext3_getblk(handle_t - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- err = ext3_get_blocks_handle(handle, inode, block, 1, -+ err = ext3_get_block_wrap(handle, inode, block, - &dummy, create, 1); - if (err == 1) { - err = 0; -@@ -1756,7 +1767,7 @@ void ext3_set_aops(struct inode *inode) - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; -@@ -2260,6 +2271,9 @@ void ext3_truncate(struct inode *inode) - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -3004,12 +3018,15 @@ err_out: - * block and work out the exact number of indirects which are touched. Pah. - */ - --static int ext3_writepage_trans_blocks(struct inode *inode) -+int ext3_writepage_trans_blocks(struct inode *inode) - { - int bpp = ext3_journal_blocks_per_page(inode); - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -@@ -3277,7 +3294,7 @@ int ext3_prep_san_write(struct inode *in - - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { -- ret = ext3_get_block_handle(handle, inode, blocks[i], -+ ret = ext3_get_blocks_handle(handle, inode, blocks[i], 1, - &bh_tmp, 1, 1); - if (ret) - break; -@@ -3337,7 +3354,7 @@ int ext3_map_inode_page(struct inode *in - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1); -+ rc = ext3_get_blocks_handle(handle, inode, iblock, 1, &dummy, 1, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error reading " - "block %ld\n", iblock); -Index: linux-stage/fs/ext3/Makefile -=================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2006-07-16 13:55:31.000000000 +0800 -+++ linux-stage/fs/ext3/Makefile 2006-07-16 14:10:21.000000000 +0800 -@@ -5,7 +5,8 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o resize.o -+ ioctl.o namei.o super.o symlink.o hash.o resize.o \ -+ extents.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2006-07-16 13:55:31.000000000 +0800 -+++ linux-stage/fs/ext3/super.c 2006-07-16 14:10:21.000000000 +0800 -@@ -391,6 +391,7 @@ static void ext3_put_super (struct super - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -455,6 +456,8 @@ static struct inode *ext3_alloc_inode(st - #endif - ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; -+ -+ memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); - return &ei->vfs_inode; - } - -@@ -638,6 +641,7 @@ enum { - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -+ Opt_extents, Opt_noextents, Opt_extdebug, - Opt_grpquota - }; - -@@ -690,6 +694,9 @@ static match_table_t tokens = { - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_noextents, "noextents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -1035,6 +1041,15 @@ clear_qf_name: - case Opt_bh: - clear_opt(sbi->s_mount_opt, NOBH); - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_noextents: -+ clear_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1760,6 +1772,7 @@ static int ext3_fill_super (struct super - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); - -+ ext3_ext_init(sb); - lock_kernel(); - return 0; - -Index: linux-stage/fs/ext3/ioctl.c -=================================================================== ---- linux-stage.orig/fs/ext3/ioctl.c 2006-07-16 13:55:31.000000000 +0800 -+++ linux-stage/fs/ext3/ioctl.c 2006-07-16 13:55:31.000000000 +0800 -@@ -135,6 +135,10 @@ flags_err: - mutex_unlock(&inode->i_mutex); - return err; - } -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - case EXT3_IOC_GETVERSION: - case EXT3_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int __user *) arg); -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2006-07-16 13:55:31.000000000 +0800 -+++ linux-stage/include/linux/ext3_fs.h 2006-07-16 14:10:21.000000000 +0800 -@@ -181,9 +181,10 @@ struct ext3_group_desc - #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ - --#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - - /* -@@ -233,6 +234,9 @@ struct ext3_new_group_data { - #endif - #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) - #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Mount options -@@ -373,6 +377,8 @@ struct ext3_inode { - #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ - #define EXT3_MOUNT_IOPEN 0x400000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -563,11 +569,13 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -787,6 +795,9 @@ extern unsigned long ext3_count_free (st - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, -+ struct address_space *, loff_t); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext3_fsblk_t blocknr); - struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); -@@ -860,6 +870,16 @@ extern struct inode_operations ext3_spec - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-stage/include/linux/ext3_extents.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ linux-stage/include/linux/ext3_extents.h 2006-07-16 13:55:31.000000000 +0800 -@@ -0,0 +1,262 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-stage/include/linux/ext3_fs_i.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs_i.h 2006-07-16 13:55:30.000000000 +0800 -+++ linux-stage/include/linux/ext3_fs_i.h 2006-07-16 14:10:20.000000000 +0800 -@@ -142,6 +142,8 @@ struct ext3_inode_info { - */ - struct mutex truncate_mutex; - struct inode vfs_inode; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch deleted file mode 100644 index b6c37c1..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch +++ /dev/null @@ -1,2951 +0,0 @@ -%patch -Index: linux-2.6.5-sles9/fs/ext3/extents.c -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2005-02-17 22:07:57.023609040 +0300 -+++ linux-2.6.5-sles9/fs/ext3/extents.c 2005-02-23 01:02:37.396435640 +0300 -@@ -0,0 +1,2361 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ newblock = ext3_new_block(handle, inode, goal, err); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) { -+ /* don't free previously allocated path -+ * -- caller should take care */ -+ path = NULL; -+ goto err; -+ } -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle); -+ -+ return err; -+} -+ -+int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) -+{ -+ int lcap, icap, rcap, leafs, idxs, num; -+ -+ rcap = ext3_ext_space_root(tree); -+ if (blocks <= rcap) { -+ /* all extents fit to the root */ -+ return 0; -+ } -+ -+ rcap = ext3_ext_space_root_idx(tree); -+ lcap = ext3_ext_space_block(tree); -+ icap = ext3_ext_space_block_idx(tree); -+ -+ num = leafs = (blocks + lcap - 1) / lcap; -+ if (leafs <= rcap) { -+ /* all pointers to leafs fit to the root */ -+ return leafs; -+ } -+ -+ /* ok. we need separate index block(s) to link all leaf blocks */ -+ idxs = (leafs + icap - 1) / icap; -+ do { -+ num += idxs; -+ idxs = (idxs + icap - 1) / icap; -+ } while (idxs > rcap); -+ -+ return num; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_find_get_block(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle); -+ return 0; -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, err); -+ ex->ee_start_hi = 0; -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ __clear_bit(BH_New, &bh_result->b_state); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ __set_bit(BH_New, &bh_result->b_state); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ __set_bit(BH_Mapped, &bh_result->b_state); -+ bh_result->b_bdev = inode->i_sb->s_bdev; -+ bh_result->b_blocknr = newblock; -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ down(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-2.6.5-sles9/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2005-02-23 01:01:52.366281264 +0300 -+++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2005-02-23 01:02:37.398435336 +0300 -@@ -566,7 +566,7 @@ repeat: - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - -- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - /* dirsync only applies to directories */ -@@ -647,6 +647,18 @@ - DQUOT_FREE_INODE(inode); - goto fail2; - } -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-2.6.5-sles9/fs/ext3/inode.c -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2005-02-23 01:01:52.373280200 +0300 -+++ linux-2.6.5-sles9/fs/ext3/inode.c 2005-02-23 01:02:37.404434424 +0300 -@@ -796,6 +796,17 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -806,8 +817,8 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 1); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 1); - return ret; - } - -@@ -833,8 +844,8 @@ - } - } - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 0); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 0); - if (ret == 0) - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -@@ -855,7 +866,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1587,7 +1598,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2083,6 +2094,9 @@ - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -2789,6 +2803,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -Index: linux-2.6.5-sles9/fs/ext3/Makefile -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2005-02-23 01:01:46.501172896 +0300 -+++ linux-2.6.5-sles9/fs/ext3/Makefile 2005-02-23 01:02:37.405434272 +0300 -@@ -5,7 +5,8 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o \ -+ extents.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.5-sles9/fs/ext3/super.c -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/super.c 2005-02-23 01:02:34.072940888 +0300 -+++ linux-2.6.5-sles9/fs/ext3/super.c 2005-02-23 01:47:15.291333736 +0300 -@@ -389,6 +389,7 @@ - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -447,6 +448,8 @@ - #endif - ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - ei->vfs_inode.i_version = 1; -+ -+ memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); - return &ei->vfs_inode; - } - -@@ -537,6 +540,7 @@ - Opt_ignore, Opt_barrier, - Opt_err, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -+ Opt_extents, Opt_noextents, Opt_extdebug, - }; - - static match_table_t tokens = { -@@ -582,6 +585,9 @@ - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_noextents, "noextents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL} - }; -@@ -797,6 +802,15 @@ - break; - case Opt_ignore: - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_noextents: -+ clear_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1449,6 +1460,8 @@ - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - -+ ext3_ext_init(sb); -+ - return 0; - - failed_mount3: -Index: linux-2.6.5-sles9/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2005-02-23 01:01:42.887722224 +0300 -+++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2005-02-23 01:02:37.412433208 +0300 -@@ -124,6 +124,10 @@ - err = ext3_change_inode_journal_flag(inode, jflag); - return err; - } -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - case EXT3_IOC_GETVERSION: - case EXT3_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int *) arg); -Index: linux-2.6.5-sles9/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2005-02-23 01:02:35.823674736 +0300 -+++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2005-02-23 01:02:37.414432904 +0300 -@@ -186,8 +186,9 @@ - #define EXT3_NOTAIL_FL 0x00008000 /* don't merge file tail */ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ - --#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - -@@ -211,6 +212,9 @@ - #endif - #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) - #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Structure of an inode on the disk -@@ -333,6 +337,8 @@ - #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ - #define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -503,11 +509,13 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -729,6 +735,9 @@ - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, -+ struct address_space *, loff_t); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -802,6 +809,16 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-2.6.5-sles9/include/linux/ext3_extents.h -=================================================================== ---- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2005-02-17 22:07:57.023609040 +0300 -+++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2005-02-23 01:02:37.416432600 +0300 -@@ -0,0 +1,262 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2005-02-23 01:01:52.425272296 +0300 -+++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2005-02-23 01:45:55.611446920 +0300 -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - - struct reserve_window { - __u32 _rsv_start; /* First byte reserved */ -@@ -128,6 +129,8 @@ - */ - struct semaphore truncate_sem; - struct inode vfs_inode; -+ -+ struct ext3_ext_cache i_cached_extent; - }; - - #endif /* _LINUX_EXT3_FS_I */ - -%diffstat - fs/ext3/Makefile | 2 - fs/ext3/extents.c | 2356 +++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/ialloc.c | 4 - fs/ext3/inode.c | 29 - fs/ext3/ioctl.c | 4 - fs/ext3/super.c | 15 - include/linux/ext3_extents.h | 265 ++++ - include/linux/ext3_fs.h | 17 - include/linux/ext3_fs_i.h | 3 - 9 files changed, 2687 insertions(+), 8 deletions(-) - diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch deleted file mode 100644 index 5b5558c..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch +++ /dev/null @@ -1,2925 +0,0 @@ -Index: linux-stage/fs/ext3/extents.c -=================================================================== ---- linux-stage.orig/fs/ext3/extents.c 2005-02-25 15:33:48.890198160 +0200 -+++ linux-stage/fs/ext3/extents.c 2005-02-25 15:33:48.917194056 +0200 -@@ -0,0 +1,2359 @@ -+/* -+ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static inline int ext3_ext_check_header(struct ext3_extent_header *eh) -+{ -+ if (eh->eh_magic != EXT3_EXT_MAGIC) { -+ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -+ (unsigned)eh->eh_magic); -+ return -EIO; -+ } -+ if (eh->eh_max == 0) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -+ (unsigned)eh->eh_max); -+ return -EIO; -+ } -+ if (eh->eh_entries > eh->eh_max) { -+ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -+ (unsigned)eh->eh_entries); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; -+ -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} -+ -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} -+ -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); -+ } -+ return err; -+} -+ -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO -+ */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} -+ -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; -+ -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); -+ -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; -+ } -+ -+ newblock = ext3_new_block(handle, inode, goal, err); -+ return newblock; -+} -+ -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_HDR_GEN(neh) + 1); -+} -+ -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / -+ sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) { -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; -+ -+ BUG_ON(tree->buffer_len == 0); -+ ext3_ext_get_access_for_root(handle, tree); -+ eh = EXT_ROOT_HDR(tree); -+ eh->eh_depth = 0; -+ eh->eh_entries = 0; -+ eh->eh_magic = EXT3_EXT_MAGIC; -+ eh->eh_max = ext3_ext_space_root(tree); -+ ext3_ext_mark_root_dirty(handle, tree); -+ ext3_ext_invalidate_cache(tree); -+ return 0; -+} -+ -+struct ext3_ext_path * -+ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ struct buffer_head *bh; -+ int depth, i, ppos = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ eh = EXT_ROOT_HDR(tree); -+ EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) { -+ /* don't free previously allocated path -+ * -- caller should take care */ -+ path = NULL; -+ goto err; -+ } -+ -+ i = depth = EXT_DEPTH(tree); -+ EXT_ASSERT(eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* account possible depth increase */ -+ if (!path) { -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); -+ if (!path) -+ return ERR_PTR(-ENOMEM); -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[0].p_hdr = eh; -+ -+ /* walk through the tree */ -+ while (i) { -+ ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); -+ ext3_ext_binsearch_idx(tree, path + ppos, block); -+ path[ppos].p_block = path[ppos].p_idx->ei_leaf; -+ path[ppos].p_depth = i; -+ path[ppos].p_ext = NULL; -+ -+ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) -+ goto err; -+ -+ eh = EXT_BLOCK_HDR(bh); -+ ppos++; -+ EXT_ASSERT(ppos <= depth); -+ path[ppos].p_bh = bh; -+ path[ppos].p_hdr = eh; -+ i--; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ } -+ -+ path[ppos].p_depth = i; -+ path[ppos].p_hdr = eh; -+ path[ppos].p_ext = NULL; -+ path[ppos].p_idx = NULL; -+ -+ if (ext3_ext_check_header(eh)) -+ goto err; -+ -+ /* find extent */ -+ ext3_ext_binsearch(tree, path + ppos, block); -+ -+ ext3_ext_show_path(tree, path); -+ -+ return path; -+ -+err: -+ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ return ERR_PTR(-EIO); -+} -+ -+/* -+ * insert new index [logical;ptr] into the block at cupr -+ * it check where to insert: before curp or after curp -+ */ -+static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) -+{ -+ struct ext3_extent_idx *ix; -+ int len, err; -+ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ return err; -+ -+ EXT_ASSERT(logical != curp->p_idx->ei_block); -+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; -+ if (logical > curp->p_idx->ei_block) { -+ /* insert after */ -+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { -+ len = (len - 1) * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); -+ memmove(curp->p_idx + 2, curp->p_idx + 1, len); -+ } -+ ix = curp->p_idx + 1; -+ } else { -+ /* insert before */ -+ len = len * sizeof(struct ext3_extent_idx); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); -+ memmove(curp->p_idx + 1, curp->p_idx, len); -+ ix = curp->p_idx; -+ } -+ -+ ix->ei_block = logical; -+ ix->ei_leaf = ptr; -+ ix->ei_leaf_hi = ix->ei_unused = 0; -+ curp->p_hdr->eh_entries++; -+ -+ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); -+ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); -+ -+ err = ext3_ext_dirty(handle, tree, curp); -+ ext3_std_error(tree->inode->i_sb, err); -+ -+ return err; -+} -+ -+/* -+ * routine inserts new subtree into the path, using free index entry -+ * at depth 'at: -+ * - allocates all needed blocks (new leaf and all intermediate index blocks) -+ * - makes decision where to split -+ * - moves remaining extens and index entries (right to the split point) -+ * into the newly allocated blocks -+ * - initialize subtree -+ */ -+static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) -+{ -+ struct buffer_head *bh = NULL; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct ext3_extent *ex; -+ int i = at, k, m, a; -+ unsigned long newblock, oldblock, border; -+ int *ablocks = NULL; /* array of allocated blocks */ -+ int err = 0; -+ -+ /* make decision: where to split? */ -+ /* FIXME: now desicion is simplest: at current extent */ -+ -+ /* if current leaf will be splitted, then we should use -+ * border from split point */ -+ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); -+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ border = path[depth].p_ext[1].ee_block; -+ ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); -+ } else { -+ border = newext->ee_block; -+ ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); -+ } -+ -+ /* -+ * if error occurs, then we break processing -+ * and turn filesystem read-only. so, index won't -+ * be inserted and tree will be in consistent -+ * state. next mount will repair buffers too -+ */ -+ -+ /* -+ * get array to track all allocated blocks -+ * we need this to handle errors and free blocks -+ * upon them -+ */ -+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); -+ if (!ablocks) -+ return -ENOMEM; -+ memset(ablocks, 0, sizeof(unsigned long) * depth); -+ -+ /* allocate all needed blocks */ -+ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); -+ for (a = 0; a < depth - at; a++) { -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ goto cleanup; -+ ablocks[a] = newblock; -+ } -+ -+ /* initialize new leaf */ -+ newblock = ablocks[--a]; -+ EXT_ASSERT(newblock); -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 0; -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_depth = 0; -+ ex = EXT_FIRST_EXTENT(neh); -+ -+ /* move remain of path[depth] to the new leaf */ -+ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); -+ /* start copy from next extent */ -+ /* TODO: we could do it by single memmove */ -+ m = 0; -+ path[depth].p_ext++; -+ while (path[depth].p_ext <= -+ EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); -+ neh->eh_entries++; -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old leaf */ -+ if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ path[depth].p_hdr->eh_entries -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path + depth))) -+ goto cleanup; -+ -+ } -+ -+ /* create intermediate indexes */ -+ k = depth - at - 1; -+ EXT_ASSERT(k >= 0); -+ if (k) -+ ext_debug(tree, "create %d intermediate indices\n", k); -+ /* insert new index into current index block */ -+ /* current depth stored in i var */ -+ i = depth - 1; -+ while (k--) { -+ oldblock = newblock; -+ newblock = ablocks[--a]; -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) -+ goto cleanup; -+ -+ neh = EXT_BLOCK_HDR(bh); -+ neh->eh_entries = 1; -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ neh->eh_depth = depth - i; -+ fidx = EXT_FIRST_INDEX(neh); -+ fidx->ei_block = border; -+ fidx->ei_leaf = oldblock; -+ fidx->ei_leaf_hi = fidx->ei_unused = 0; -+ -+ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); -+ /* copy indexes */ -+ m = 0; -+ path[i].p_idx++; -+ -+ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); -+ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); -+ memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); -+ neh->eh_entries++; -+ EXT_ASSERT(neh->eh_entries <= neh->eh_max); -+ m++; -+ } -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto cleanup; -+ brelse(bh); -+ bh = NULL; -+ -+ /* correct old index */ -+ if (m) { -+ err = ext3_ext_get_access(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ path[i].p_hdr->eh_entries -= m; -+ err = ext3_ext_dirty(handle, tree, path + i); -+ if (err) -+ goto cleanup; -+ } -+ -+ i--; -+ } -+ -+ /* insert new index */ -+ if (!err) -+ err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); -+ -+cleanup: -+ if (bh) { -+ if (buffer_locked(bh)) -+ unlock_buffer(bh); -+ brelse(bh); -+ } -+ -+ if (err) { -+ /* free all allocated blocks in error case */ -+ for (i = 0; i < depth; i++) { -+ if (!ablocks[i]) -+ continue; -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ } -+ } -+ kfree(ablocks); -+ -+ return err; -+} -+ -+/* -+ * routine implements tree growing procedure: -+ * - allocates new block -+ * - moves top-level data (index block or leaf) into the new block -+ * - initialize new top-level, creating index that points to the -+ * just created block -+ */ -+static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp = path; -+ struct ext3_extent_header *neh; -+ struct ext3_extent_idx *fidx; -+ struct buffer_head *bh; -+ unsigned long newblock; -+ int err = 0; -+ -+ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); -+ if (newblock == 0) -+ return err; -+ -+ bh = sb_getblk(tree->inode->i_sb, newblock); -+ if (!bh) { -+ err = -EIO; -+ ext3_std_error(tree->inode->i_sb, err); -+ return err; -+ } -+ lock_buffer(bh); -+ -+ if ((err = ext3_journal_get_create_access(handle, bh))) { -+ unlock_buffer(bh); -+ goto out; -+ } -+ -+ /* move top-level index/leaf into new block */ -+ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); -+ -+ /* set size of new block */ -+ neh = EXT_BLOCK_HDR(bh); -+ /* old root could have indexes or leaves -+ * so calculate eh_max right way */ -+ if (EXT_DEPTH(tree)) -+ neh->eh_max = ext3_ext_space_block_idx(tree); -+ else -+ neh->eh_max = ext3_ext_space_block(tree); -+ neh->eh_magic = EXT3_EXT_MAGIC; -+ set_buffer_uptodate(bh); -+ unlock_buffer(bh); -+ -+ if ((err = ext3_journal_dirty_metadata(handle, bh))) -+ goto out; -+ -+ /* create index in new top-level index: num,max,pointer */ -+ if ((err = ext3_ext_get_access(handle, tree, curp))) -+ goto out; -+ -+ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; -+ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); -+ curp->p_hdr->eh_entries = 1; -+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); -+ /* FIXME: it works, but actually path[0] can be index */ -+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; -+ curp->p_idx->ei_leaf = newblock; -+ curp->p_idx->ei_leaf_hi = curp->p_idx->ei_unused = 0; -+ -+ neh = EXT_ROOT_HDR(tree); -+ fidx = EXT_FIRST_INDEX(neh); -+ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); -+ -+ neh->eh_depth = path->p_depth + 1; -+ err = ext3_ext_dirty(handle, tree, curp); -+out: -+ brelse(bh); -+ -+ return err; -+} -+ -+/* -+ * routine finds empty index and adds new leaf. if no free index found -+ * then it requests in-depth growing -+ */ -+static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_ext_path *curp; -+ int depth, i, err = 0; -+ -+repeat: -+ i = depth = EXT_DEPTH(tree); -+ -+ /* walk up to the tree and look for free index entry */ -+ curp = path + depth; -+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { -+ i--; -+ curp--; -+ } -+ -+ /* we use already allocated block for index block -+ * so, subsequent data blocks should be contigoues */ -+ if (EXT_HAS_FREE_INDEX(curp)) { -+ /* if we found index with free entry, then use that -+ * entry: create all needed subtree and add new leaf */ -+ err = ext3_ext_split(handle, tree, path, newext, i); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ } else { -+ /* tree is full, time to grow in depth */ -+ err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ -+ /* refill path */ -+ ext3_ext_drop_refs(path); -+ path = ext3_ext_find_extent(tree, newext->ee_block, path); -+ if (IS_ERR(path)) -+ err = PTR_ERR(path); -+ -+ /* -+ * only first (depth 0 -> 1) produces free space -+ * in all other cases we have to split growed tree -+ */ -+ depth = EXT_DEPTH(tree); -+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { -+ /* now we need split */ -+ goto repeat; -+ } -+ } -+ -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK -+ * NOTE: it consider block number from index entry as -+ * allocated block. thus, index entries have to be consistent -+ * with leafs -+ */ -+static unsigned long -+ext3_ext_next_allocated_block(struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return EXT_MAX_BLOCK; -+ -+ /* FIXME: what if index isn't full ?! */ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ /* leaf */ -+ if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) -+ return path[depth].p_ext[1].ee_block; -+ } else { -+ /* index */ -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ } -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * returns first allocated block from next leaf or EXT_MAX_BLOCK -+ */ -+static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth; -+ -+ EXT_ASSERT(path != NULL); -+ depth = path->p_depth; -+ -+ /* zero-tree has no leaf blocks at all */ -+ if (depth == 0) -+ return EXT_MAX_BLOCK; -+ -+ /* go to index block */ -+ depth--; -+ -+ while (depth >= 0) { -+ if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) -+ return path[depth].p_idx[1].ei_block; -+ depth--; -+ } -+ -+ return EXT_MAX_BLOCK; -+} -+ -+/* -+ * if leaf gets modified and modified extent is first in the leaf -+ * then we have to correct all indexes above -+ * TODO: do we need to correct tree in all cases? -+ */ -+int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct ext3_extent_header *eh; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex; -+ unsigned long border; -+ int k, err = 0; -+ -+ eh = path[depth].p_hdr; -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(eh); -+ -+ if (depth == 0) { -+ /* there is no tree at all */ -+ return 0; -+ } -+ -+ if (ex != EXT_FIRST_EXTENT(eh)) { -+ /* we correct tree if first leaf got modified only */ -+ return 0; -+ } -+ -+ /* -+ * TODO: we need correction if border is smaller then current one -+ */ -+ k = depth - 1; -+ border = path[depth].p_ext->ee_block; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ return err; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ return err; -+ -+ while (k--) { -+ /* change all left-side indexes */ -+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) -+ break; -+ if ((err = ext3_ext_get_access(handle, tree, path + k))) -+ break; -+ path[k].p_idx->ei_block = border; -+ if ((err = ext3_ext_dirty(handle, tree, path + k))) -+ break; -+ } -+ -+ return err; -+} -+ -+static int inline -+ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) -+ return 0; -+ -+#ifdef AGRESSIVE_TEST -+ if (ex1->ee_len >= 4) -+ return 0; -+#endif -+ -+ if (!tree->ops->mergable) -+ return 1; -+ -+ return tree->ops->mergable(ex1, ex2); -+} -+ -+/* -+ * this routine tries to merge requsted extent into the existing -+ * extent or inserts requested extent as new one into the tree, -+ * creating new leaf in no-space case -+ */ -+int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) -+{ -+ struct ext3_extent_header * eh; -+ struct ext3_extent *ex, *fex; -+ struct ext3_extent *nearex; /* nearest extent */ -+ struct ext3_ext_path *npath = NULL; -+ int depth, len, err, next; -+ -+ EXT_ASSERT(newext->ee_len > 0); -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(path[depth].p_hdr); -+ -+ /* try to insert block into found extent and return */ -+ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { -+ ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ return err; -+ ex->ee_len += newext->ee_len; -+ eh = path[depth].p_hdr; -+ nearex = ex; -+ goto merge; -+ } -+ -+repeat: -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) -+ goto has_space; -+ -+ /* probably next leaf has space for us? */ -+ fex = EXT_LAST_EXTENT(eh); -+ next = ext3_ext_next_leaf_block(tree, path); -+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->eh_entries < eh->eh_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); -+ } -+ -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ depth = EXT_DEPTH(tree); -+ eh = path[depth].p_hdr; -+ -+has_space: -+ nearex = path[depth].p_ext; -+ -+ if ((err = ext3_ext_get_access(handle, tree, path + depth))) -+ goto cleanup; -+ -+ if (!nearex) { -+ /* there is no extent in this leaf, create first one */ -+ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); -+ path[depth].p_ext = EXT_FIRST_EXTENT(eh); -+ } else if (newext->ee_block > nearex->ee_block) { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ if (nearex != EXT_LAST_EXTENT(eh)) { -+ len = EXT_MAX_EXTENT(eh) - nearex; -+ len = (len - 1) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 2, nearex + 1, len); -+ } -+ path[depth].p_ext = nearex + 1; -+ } else { -+ EXT_ASSERT(newext->ee_block != nearex->ee_block); -+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); -+ len = len < 0 ? 0 : len; -+ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); -+ memmove(nearex + 1, nearex, len); -+ path[depth].p_ext = nearex; -+ } -+ -+ eh->eh_entries++; -+ nearex = path[depth].p_ext; -+ nearex->ee_block = newext->ee_block; -+ nearex->ee_start = newext->ee_start; -+ nearex->ee_len = newext->ee_len; -+ /* FIXME: support for large fs */ -+ nearex->ee_start_hi = 0; -+ -+merge: -+ /* try to merge extents to the right */ -+ while (nearex < EXT_LAST_EXTENT(eh)) { -+ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) -+ break; -+ /* merge with next extent! */ -+ nearex->ee_len += nearex[1].ee_len; -+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * -+ sizeof(struct ext3_extent); -+ memmove(nearex + 1, nearex + 2, len); -+ } -+ eh->eh_entries--; -+ EXT_ASSERT(eh->eh_entries > 0); -+ } -+ -+ /* try to merge extents to the left */ -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ if (err) -+ goto cleanup; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ -+cleanup: -+ if (npath) { -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ } -+ ext3_ext_tree_changed(tree); -+ ext3_ext_invalidate_cache(tree); -+ return err; -+} -+ -+int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, -+ unsigned long num, ext_prepare_callback func) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_ext_cache cbex; -+ struct ext3_extent *ex; -+ unsigned long next, start = 0, end = 0; -+ unsigned long last = block + num; -+ int depth, exists, err = 0; -+ -+ EXT_ASSERT(tree); -+ EXT_ASSERT(func); -+ EXT_ASSERT(tree->inode); -+ EXT_ASSERT(tree->root); -+ -+ while (block < last && block != EXT_MAX_BLOCK) { -+ num = last - block; -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(tree, block, path); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ break; -+ } -+ -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(path[depth].p_hdr); -+ ex = path[depth].p_ext; -+ next = ext3_ext_next_allocated_block(path); -+ -+ exists = 0; -+ if (!ex) { -+ /* there is no extent yet, so try to allocate -+ * all requested space */ -+ start = block; -+ end = block + num; -+ } else if (ex->ee_block > block) { -+ /* need to allocate space before found extent */ -+ start = block; -+ end = ex->ee_block; -+ if (block + num < end) -+ end = block + num; -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ /* need to allocate space after found extent */ -+ start = block; -+ end = block + num; -+ if (end >= next) -+ end = next; -+ } else if (block >= ex->ee_block) { -+ /* -+ * some part of requested space is covered -+ * by found extent -+ */ -+ start = block; -+ end = ex->ee_block + ex->ee_len; -+ if (block + num < end) -+ end = block + num; -+ exists = 1; -+ } else { -+ BUG(); -+ } -+ EXT_ASSERT(end > start); -+ -+ if (!exists) { -+ cbex.ec_block = start; -+ cbex.ec_len = end - start; -+ cbex.ec_start = 0; -+ cbex.ec_type = EXT3_EXT_CACHE_GAP; -+ } else { -+ cbex.ec_block = ex->ee_block; -+ cbex.ec_len = ex->ee_len; -+ cbex.ec_start = ex->ee_start; -+ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; -+ } -+ -+ EXT_ASSERT(cbex.ec_len > 0); -+ EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex); -+ ext3_ext_drop_refs(path); -+ -+ if (err < 0) -+ break; -+ if (err == EXT_REPEAT) -+ continue; -+ else if (err == EXT_BREAK) { -+ err = 0; -+ break; -+ } -+ -+ if (EXT_DEPTH(tree) != depth) { -+ /* depth was changed. we have to realloc path */ -+ kfree(path); -+ path = NULL; -+ } -+ -+ block = cbex.ec_block + cbex.ec_len; -+ } -+ -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ -+ return err; -+} -+ -+static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, -+ __u32 len, __u32 start, int type) -+{ -+ EXT_ASSERT(len > 0); -+ if (tree->cex) { -+ tree->cex->ec_type = type; -+ tree->cex->ec_block = block; -+ tree->cex->ec_len = len; -+ tree->cex->ec_start = start; -+ } -+} -+ -+/* -+ * this routine calculate boundaries of the gap requested block fits into -+ * and cache this gap -+ */ -+static inline void -+ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) -+{ -+ int depth = EXT_DEPTH(tree); -+ unsigned long lblock, len; -+ struct ext3_extent *ex; -+ -+ if (!tree->cex) -+ return; -+ -+ ex = path[depth].p_ext; -+ if (ex == NULL) { -+ /* there is no extent yet, so gap is [0;-] */ -+ lblock = 0; -+ len = EXT_MAX_BLOCK; -+ ext_debug(tree, "cache gap(whole file):"); -+ } else if (block < ex->ee_block) { -+ lblock = block; -+ len = ex->ee_block - block; -+ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); -+ } else if (block >= ex->ee_block + ex->ee_len) { -+ lblock = ex->ee_block + ex->ee_len; -+ len = ext3_ext_next_allocated_block(path); -+ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(len > lblock); -+ len = len - lblock; -+ } else { -+ lblock = len = 0; -+ BUG(); -+ } -+ -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); -+ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); -+} -+ -+static inline int -+ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) -+{ -+ struct ext3_ext_cache *cex = tree->cex; -+ -+ /* is there cache storage at all? */ -+ if (!cex) -+ return EXT3_EXT_CACHE_NO; -+ -+ /* has cache valid data? */ -+ if (cex->ec_type == EXT3_EXT_CACHE_NO) -+ return EXT3_EXT_CACHE_NO; -+ -+ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || -+ cex->ec_type == EXT3_EXT_CACHE_EXTENT); -+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { -+ ex->ee_block = cex->ec_block; -+ ex->ee_start = cex->ec_start; -+ ex->ee_start_hi = 0; -+ ex->ee_len = cex->ec_len; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return cex->ec_type; -+ } -+ -+ /* not in cache */ -+ return EXT3_EXT_CACHE_NO; -+} -+ -+/* -+ * routine removes index from the index block -+ * it's used in truncate case only. thus all requests are for -+ * last index in the block only -+ */ -+int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ struct buffer_head *bh; -+ int err; -+ -+ /* free index block */ -+ path--; -+ EXT_ASSERT(path->p_hdr->eh_entries); -+ if ((err = ext3_ext_get_access(handle, tree, path))) -+ return err; -+ path->p_hdr->eh_entries--; -+ if ((err = ext3_ext_dirty(handle, tree, path))) -+ return err; -+ ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); -+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); -+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ return err; -+} -+ -+int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int depth = EXT_DEPTH(tree); -+ int needed; -+ -+ if (path) { -+ /* probably there is space in leaf? */ -+ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) -+ return 1; -+ } -+ -+ /* -+ * the worste case we're expecting is creation of the -+ * new root (growing in depth) with index splitting -+ * for splitting we have to consider depth + 1 because -+ * previous growing could increase it -+ */ -+ depth = depth + 1; -+ -+ /* -+ * growing in depth: -+ * block allocation + new root + old root -+ */ -+ needed = EXT3_ALLOC_NEEDED + 2; -+ -+ /* index split. we may need: -+ * allocate intermediate indexes and new leaf -+ * change two blocks at each level, but root -+ * modify root block (inode) -+ */ -+ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; -+ -+ return needed; -+} -+ -+static int -+ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, tex; -+ struct ext3_ext_path *npath; -+ int depth, creds, err; -+ -+ depth = EXT_DEPTH(tree); -+ ex = path[depth].p_ext; -+ EXT_ASSERT(ex); -+ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); -+ EXT_ASSERT(ex->ee_block < start); -+ -+ /* calculate tail extent */ -+ tex.ee_block = end + 1; -+ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); -+ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; -+ -+ creds = ext3_ext_calc_credits_for_insert(tree, path); -+ handle = ext3_ext_journal_restart(handle, creds); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ /* calculate head extent. use primary extent */ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ return err; -+ ex->ee_len = start - ex->ee_block; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ return err; -+ -+ /* FIXME: some callback to free underlying resource -+ * and correct ee_start? */ -+ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); -+ -+ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ depth = EXT_DEPTH(tree); -+ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); -+ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); -+ -+ err = ext3_ext_insert_extent(handle, tree, npath, &tex); -+ ext3_ext_drop_refs(npath); -+ kfree(npath); -+ -+ return err; -+} -+ -+static int -+ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) -+{ -+ struct ext3_extent *ex, *fu = NULL, *lu, *le; -+ int err = 0, correct_index = 0; -+ int depth = EXT_DEPTH(tree), credits; -+ struct ext3_extent_header *eh; -+ unsigned a, b, block, num; -+ -+ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); -+ if (!path[depth].p_hdr) -+ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ eh = path[depth].p_hdr; -+ EXT_ASSERT(eh); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ -+ /* find where to start removing */ -+ le = ex = EXT_LAST_EXTENT(eh); -+ while (ex != EXT_FIRST_EXTENT(eh)) { -+ if (ex->ee_block <= end) -+ break; -+ ex--; -+ } -+ -+ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { -+ /* removal of internal part of the extent requested -+ * tail and head must be placed in different extent -+ * so, we have to insert one more extent */ -+ path[depth].p_ext = ex; -+ return ext3_ext_split_for_rm(handle, tree, path, start, end); -+ } -+ -+ lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { -+ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); -+ path[depth].p_ext = ex; -+ -+ a = ex->ee_block > start ? ex->ee_block : start; -+ b = ex->ee_block + ex->ee_len - 1 < end ? -+ ex->ee_block + ex->ee_len - 1 : end; -+ -+ ext_debug(tree, " border %u:%u\n", a, b); -+ -+ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { -+ block = 0; -+ num = 0; -+ BUG(); -+ } else if (a != ex->ee_block) { -+ /* remove tail of the extent */ -+ block = ex->ee_block; -+ num = a - block; -+ } else if (b != ex->ee_block + ex->ee_len - 1) { -+ /* remove head of the extent */ -+ block = a; -+ num = b - a; -+ } else { -+ /* remove whole extent: excelent! */ -+ block = ex->ee_block; -+ num = 0; -+ EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); -+ } -+ -+ if (ex == EXT_FIRST_EXTENT(eh)) -+ correct_index = 1; -+ -+ credits = 1; -+ if (correct_index) -+ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; -+ if (tree->ops->remove_extent_credits) -+ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); -+ -+ handle = ext3_ext_journal_restart(handle, credits); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out; -+ } -+ -+ err = ext3_ext_get_access(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ if (tree->ops->remove_extent) -+ err = tree->ops->remove_extent(tree, ex, a, b); -+ if (err) -+ goto out; -+ -+ if (num == 0) { -+ /* this extent is removed entirely mark slot unused */ -+ ex->ee_start = ex->ee_start_hi = 0; -+ eh->eh_entries--; -+ fu = ex; -+ } -+ -+ ex->ee_block = block; -+ ex->ee_len = num; -+ -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ if (err) -+ goto out; -+ -+ ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ ex--; -+ } -+ -+ if (fu) { -+ /* reuse unused slots */ -+ while (lu < le) { -+ if (lu->ee_start) { -+ *fu = *lu; -+ lu->ee_start = lu->ee_start_hi = 0; -+ fu++; -+ } -+ lu++; -+ } -+ } -+ -+ if (correct_index && eh->eh_entries) -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ -+ /* if this leaf is free, then we should -+ * remove it from index block above */ -+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) -+ err = ext3_ext_rm_idx(handle, tree, path + depth); -+ -+out: -+ return err; -+} -+ -+ -+static struct ext3_extent_idx * -+ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) -+{ -+ struct ext3_extent_idx *ix; -+ -+ ix = EXT_LAST_INDEX(hdr); -+ while (ix != EXT_FIRST_INDEX(hdr)) { -+ if (ix->ei_block <= block) -+ break; -+ ix--; -+ } -+ return ix; -+} -+ -+/* -+ * returns 1 if current index have to be freed (even partial) -+ */ -+static int inline -+ext3_ext_more_to_rm(struct ext3_ext_path *path) -+{ -+ EXT_ASSERT(path->p_idx); -+ -+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) -+ return 0; -+ -+ /* -+ * if truncate on deeper level happened it it wasn't partial -+ * so we have to consider current index for truncation -+ */ -+ if (path->p_hdr->eh_entries == path->p_block) -+ return 0; -+ return 1; -+} -+ -+int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) -+{ -+ struct inode *inode = tree->inode; -+ struct super_block *sb = inode->i_sb; -+ int depth = EXT_DEPTH(tree); -+ struct ext3_ext_path *path; -+ handle_t *handle; -+ int i = 0, err = 0; -+ -+ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); -+ -+ /* probably first extent we're gonna free will be last in block */ -+ handle = ext3_journal_start(inode, depth + 1); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ ext3_ext_invalidate_cache(tree); -+ -+ /* -+ * we start scanning from right side freeing all the blocks -+ * after i_size and walking into the deep -+ */ -+ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); -+ if (IS_ERR(path)) { -+ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); -+ ext3_journal_stop(handle); -+ return -ENOMEM; -+ } -+ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); -+ path[i].p_hdr = EXT_ROOT_HDR(tree); -+ -+ while (i >= 0 && err == 0) { -+ if (i == depth) { -+ /* this is leaf block */ -+ err = ext3_ext_rm_leaf(handle, tree, path, start, end); -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ continue; -+ } -+ -+ /* this is index block */ -+ if (!path[i].p_hdr) { -+ ext_debug(tree, "initialize header\n"); -+ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); -+ } -+ -+ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -+ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -+ -+ if (!path[i].p_idx) { -+ /* this level hasn't touched yet */ -+ path[i].p_idx = -+ ext3_ext_last_covered(path[i].p_hdr, end); -+ path[i].p_block = path[i].p_hdr->eh_entries + 1; -+ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); -+ } else { -+ /* we've already was here, see at next index */ -+ path[i].p_idx--; -+ } -+ -+ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); -+ if (ext3_ext_more_to_rm(path + i)) { -+ /* go to the next level */ -+ ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); -+ memset(path + i + 1, 0, sizeof(*path)); -+ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); -+ if (!path[i+1].p_bh) { -+ /* should we reset i_size? */ -+ err = -EIO; -+ break; -+ } -+ /* put actual number of indexes to know is this -+ * number got changed at the next iteration */ -+ path[i].p_block = path[i].p_hdr->eh_entries; -+ i++; -+ } else { -+ /* we finish processing this index, go up */ -+ if (path[i].p_hdr->eh_entries == 0 && i > 0) { -+ /* index is empty, remove it -+ * handle must be already prepared by the -+ * truncatei_leaf() */ -+ err = ext3_ext_rm_idx(handle, tree, path + i); -+ } -+ /* root level have p_bh == NULL, brelse() eats this */ -+ brelse(path[i].p_bh); -+ i--; -+ ext_debug(tree, "return to level %d\n", i); -+ } -+ } -+ -+ /* TODO: flexible tree reduction should be here */ -+ if (path->p_hdr->eh_entries == 0) { -+ /* -+ * truncate to zero freed all the tree -+ * so, we need to correct eh_depth -+ */ -+ err = ext3_ext_get_access(handle, tree, path); -+ if (err == 0) { -+ EXT_ROOT_HDR(tree)->eh_depth = 0; -+ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); -+ err = ext3_ext_dirty(handle, tree, path); -+ } -+ } -+ ext3_ext_tree_changed(tree); -+ -+ kfree(path); -+ ext3_journal_stop(handle); -+ -+ return err; -+} -+ -+int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) -+{ -+ int lcap, icap, rcap, leafs, idxs, num; -+ -+ rcap = ext3_ext_space_root(tree); -+ if (blocks <= rcap) { -+ /* all extents fit to the root */ -+ return 0; -+ } -+ -+ rcap = ext3_ext_space_root_idx(tree); -+ lcap = ext3_ext_space_block(tree); -+ icap = ext3_ext_space_block_idx(tree); -+ -+ num = leafs = (blocks + lcap - 1) / lcap; -+ if (leafs <= rcap) { -+ /* all pointers to leafs fit to the root */ -+ return leafs; -+ } -+ -+ /* ok. we need separate index block(s) to link all leaf blocks */ -+ idxs = (leafs + icap - 1) / icap; -+ do { -+ num += idxs; -+ idxs = (idxs + icap - 1) / icap; -+ } while (idxs > rcap); -+ -+ return num; -+} -+ -+/* -+ * called at mount time -+ */ -+void ext3_ext_init(struct super_block *sb) -+{ -+ /* -+ * possible initialization would be here -+ */ -+ -+ if (test_opt(sb, EXTENTS)) { -+ printk("EXT3-fs: file extents enabled"); -+#ifdef AGRESSIVE_TEST -+ printk(", agressive tests"); -+#endif -+#ifdef CHECK_BINSEARCH -+ printk(", check binsearch"); -+#endif -+ printk("\n"); -+ } -+} -+ -+/* -+ * called at umount time -+ */ -+void ext3_ext_release(struct super_block *sb) -+{ -+} -+ -+/************************************************************************ -+ * VFS related routines -+ ************************************************************************/ -+ -+static int ext3_get_inode_write_access(handle_t *handle, void *buffer) -+{ -+ /* we use in-core data, not bh */ -+ return 0; -+} -+ -+static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) -+{ -+ struct inode *inode = buffer; -+ return ext3_mark_inode_dirty(handle, inode); -+} -+ -+static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) -+{ -+ /* FIXME: support for large fs */ -+ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) -+ return 1; -+ return 0; -+} -+ -+static int -+ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed; -+ -+ /* at present, extent can't cross block group */; -+ needed = 4; /* bitmap + group desc + sb + inode */ -+ -+#ifdef CONFIG_QUOTA -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ return needed; -+} -+ -+static int -+ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) -+{ -+ int needed = ext3_remove_blocks_credits(tree, ex, from, to); -+ handle_t *handle = ext3_journal_start(tree->inode, needed); -+ struct buffer_head *bh; -+ int i; -+ -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { -+ /* tail removal */ -+ unsigned long num, start; -+ num = ex->ee_block + ex->ee_len - from; -+ start = ex->ee_start + ex->ee_len - num; -+ ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); -+ for (i = 0; i < num; i++) { -+ bh = sb_find_get_block(tree->inode->i_sb, start + i); -+ ext3_forget(handle, 0, tree->inode, bh, start + i); -+ } -+ ext3_free_blocks(handle, tree->inode, start, num); -+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { -+ printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } else { -+ printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); -+ } -+ ext3_journal_stop(handle); -+ return 0; -+} -+ -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; -+ int depth; -+ -+ if (path) { -+ struct ext3_extent *ex; -+ depth = path->p_depth; -+ -+ /* try to predict block placement */ -+ if ((ex = path[depth].p_ext)) -+ return ex->ee_start + (block - ex->ee_block); -+ -+ /* it looks index is empty -+ * try to find starting from index itself */ -+ if (path[depth].p_bh) -+ return path[depth].p_bh->b_blocknr; -+ } -+ -+ /* OK. use inode's group */ -+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour + block; -+} -+ -+static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) -+{ -+ struct inode *inode = tree->inode; -+ int newblock, goal; -+ -+ EXT_ASSERT(path); -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_start); -+ EXT_ASSERT(ex->ee_len); -+ -+ /* reuse block from the extent to order data/metadata */ -+ newblock = ex->ee_start++; -+ ex->ee_len--; -+ if (ex->ee_len == 0) { -+ ex->ee_len = 1; -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, err); -+ ex->ee_start_hi = 0; -+ if (ex->ee_start == 0) { -+ /* error occured: restore old extent */ -+ ex->ee_start = newblock; -+ return 0; -+ } -+ } -+ return newblock; -+} -+ -+static struct ext3_extents_helpers ext3_blockmap_helpers = { -+ .get_write_access = ext3_get_inode_write_access, -+ .mark_buffer_dirty = ext3_mark_buffer_dirty, -+ .mergable = ext3_ext_mergable, -+ .new_block = ext3_new_block_cb, -+ .remove_extent = ext3_remove_blocks, -+ .remove_extent_credits = ext3_remove_blocks_credits, -+}; -+ -+void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) -+{ -+ tree->inode = inode; -+ tree->root = (void *) EXT3_I(inode)->i_data; -+ tree->buffer = (void *) inode; -+ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; -+ tree->ops = &ext3_blockmap_helpers; -+} -+ -+int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) -+{ -+ struct ext3_ext_path *path = NULL; -+ struct ext3_extent newex; -+ struct ext3_extent *ex; -+ int goal, newblock, err = 0, depth; -+ struct ext3_extents_tree tree; -+ -+ clear_buffer_new(bh_result); -+ ext3_init_tree_desc(&tree, inode); -+ ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); -+ down(&EXT3_I(inode)->truncate_sem); -+ -+ /* check in cache */ -+ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { -+ if (goal == EXT3_EXT_CACHE_GAP) { -+ if (!create) { -+ /* block isn't allocated yet and -+ * user don't want to allocate it */ -+ goto out2; -+ } -+ /* we should allocate requested block */ -+ } else if (goal == EXT3_EXT_CACHE_EXTENT) { -+ /* block is already allocated */ -+ newblock = iblock - newex.ee_block + newex.ee_start; -+ goto out; -+ } else { -+ EXT_ASSERT(0); -+ } -+ } -+ -+ /* find extent for this block */ -+ path = ext3_ext_find_extent(&tree, iblock, NULL); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ path = NULL; -+ goto out2; -+ } -+ -+ depth = EXT_DEPTH(&tree); -+ -+ /* -+ * consistent leaf must not be empty -+ * this situations is possible, though, _during_ tree modification -+ * this is why assert can't be put in ext3_ext_find_extent() -+ */ -+ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); -+ -+ if ((ex = path[depth].p_ext)) { -+ /* if found exent covers block, simple return it */ -+ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { -+ newblock = iblock - ex->ee_block + ex->ee_start; -+ ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex->ee_block, -+ ex->ee_len, ex->ee_start, -+ EXT3_EXT_CACHE_EXTENT); -+ goto out; -+ } -+ } -+ -+ /* -+ * requested block isn't allocated yet -+ * we couldn't try to create block if create flag is zero -+ */ -+ if (!create) { -+ /* put just found gap into cache to speedup subsequest reqs */ -+ ext3_ext_put_gap_in_cache(&tree, path, iblock); -+ goto out2; -+ } -+ -+ /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, &err); -+ if (!newblock) -+ goto out2; -+ ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); -+ -+ /* try to insert new extent into found leaf and return */ -+ newex.ee_block = iblock; -+ newex.ee_start = newblock; -+ newex.ee_start_hi = 0; -+ newex.ee_len = 1; -+ err = ext3_ext_insert_extent(handle, &tree, path, &newex); -+ if (err) -+ goto out2; -+ -+ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ -+ /* previous routine could use block we allocated */ -+ newblock = newex.ee_start; -+ set_buffer_new(bh_result); -+ -+ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -+ newex.ee_start, EXT3_EXT_CACHE_EXTENT); -+out: -+ ext3_ext_show_leaf(&tree, path); -+ map_bh(bh_result, inode->i_sb, newblock); -+out2: -+ if (path) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ } -+ up(&EXT3_I(inode)->truncate_sem); -+ -+ return err; -+} -+ -+void ext3_ext_truncate(struct inode * inode, struct page *page) -+{ -+ struct address_space *mapping = inode->i_mapping; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_extents_tree tree; -+ unsigned long last_block; -+ handle_t *handle; -+ int err = 0; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ /* -+ * probably first extent we're gonna free will be last in block -+ */ -+ err = ext3_writepage_trans_blocks(inode) + 3; -+ handle = ext3_journal_start(inode, err); -+ if (IS_ERR(handle)) { -+ if (page) { -+ clear_highpage(page); -+ flush_dcache_page(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ return; -+ } -+ -+ if (page) -+ ext3_block_truncate_page(handle, page, mapping, inode->i_size); -+ -+ down(&EXT3_I(inode)->truncate_sem); -+ ext3_ext_invalidate_cache(&tree); -+ -+ /* -+ * TODO: optimization is possible here -+ * probably we need not scaning at all, -+ * because page truncation is enough -+ */ -+ if (ext3_orphan_add(handle, inode)) -+ goto out_stop; -+ -+ /* we have to know where to truncate from in crash case */ -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_mark_inode_dirty(handle, inode); -+ -+ last_block = (inode->i_size + sb->s_blocksize - 1) >> -+ EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); -+ -+ up(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle); -+} -+ -+/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode -+ */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); -+ -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return needed; -+} -+ -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} -+ -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *newex) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; -+ -+ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; -+ -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} -+ -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_ext_cache *ex) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; -+ -+ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) -+ return EXT_CONTINUE; -+ -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; -+} -+ -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; -+ -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; -+ -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; -+ -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up(&EXT3_I(inode)->truncate_sem); -+ } -+ -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); -Index: linux-stage/fs/ext3/ialloc.c -=================================================================== ---- linux-stage.orig/fs/ext3/ialloc.c 2005-02-25 14:50:50.304202816 +0200 -+++ linux-stage/fs/ext3/ialloc.c 2005-02-25 15:33:48.920193600 +0200 -@@ -566,7 +566,7 @@ repeat: - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - -- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - /* dirsync only applies to directories */ -@@ -646,6 +646,18 @@ - DQUOT_FREE_INODE(inode); - goto fail2; - } -+ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } -+ - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2005-02-25 14:50:50.309202056 +0200 -+++ linux-stage/fs/ext3/inode.c 2005-02-25 15:36:51.846384592 +0200 -@@ -796,6 +796,17 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) -+{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); -+} -+ - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -806,8 +817,8 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 1); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 1); - return ret; - } - -@@ -851,7 +862,7 @@ - - get_block: - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, - bh_result, create, 0); - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -@@ -871,7 +882,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1589,7 +1600,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2087,6 +2098,9 @@ - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); -+ - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -2814,6 +2828,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -Index: linux-stage/fs/ext3/Makefile -=================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2005-02-25 14:49:42.168561008 +0200 -+++ linux-stage/fs/ext3/Makefile 2005-02-25 15:39:28.384587168 +0200 -@@ -5,7 +5,8 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o resize.o -+ ioctl.o namei.o super.o symlink.o hash.o resize.o \ -+ extents.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:52:33.550506992 +0200 -+++ linux-stage/fs/ext3/super.c 2005-02-25 15:38:10.474431312 +0200 -@@ -394,6 +394,7 @@ - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -457,6 +458,8 @@ - #endif - ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - ei->vfs_inode.i_version = 1; -+ -+ memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); - return &ei->vfs_inode; - } - -@@ -589,6 +594,7 @@ - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -+ Opt_extents, Opt_noextents, Opt_extdebug, - }; - - static match_table_t tokens = { -@@ -639,6 +644,9 @@ - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_noextents, "noextents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -943,6 +950,15 @@ - match_int(&args[0], &option); - *n_blocks_count = option; - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_noextents: -+ clear_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1625,6 +1638,8 @@ - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - -+ ext3_ext_init(sb); -+ - return 0; - - failed_mount3: -Index: linux-stage/fs/ext3/ioctl.c -=================================================================== ---- linux-stage.orig/fs/ext3/ioctl.c 2005-02-25 14:37:28.971023976 +0200 -+++ linux-stage/fs/ext3/ioctl.c 2005-02-25 15:33:48.938190864 +0200 -@@ -124,6 +124,10 @@ - err = ext3_change_inode_journal_flag(inode, jflag); - return err; - } -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - case EXT3_IOC_GETVERSION: - case EXT3_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int __user *) arg); -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2005-02-25 14:53:56.424908168 +0200 -+++ linux-stage/include/linux/ext3_fs.h 2005-02-25 15:39:12.841950008 +0200 -@@ -186,8 +186,9 @@ - #define EXT3_NOTAIL_FL 0x00008000 /* don't merge file tail */ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ - --#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -+#define EXT3_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - -@@ -237,6 +238,9 @@ - #endif - #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) - #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) - - /* - * Structure of an inode on the disk -@@ -359,6 +363,8 @@ - #define EXT3_MOUNT_RESERVATION 0x20000 /* Preallocation */ - #define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -503,11 +509,13 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -756,6 +763,9 @@ - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, -+ struct address_space *, loff_t); -+extern int ext3_writepage_trans_blocks(struct inode *inode); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -836,6 +844,16 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); -+extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg); - - #endif /* __KERNEL__ */ - -Index: linux-stage/include/linux/ext3_extents.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_extents.h 2005-02-25 15:33:48.891198008 +0200 -+++ linux-stage/include/linux/ext3_extents.h 2005-02-25 15:33:48.944189952 +0200 -@@ -0,0 +1,262 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ -+/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search -+ */ -+#define CHECK_BINSEARCH_ -+ -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG_ -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif -+ -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ -+ -+ -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ -+ -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ -+ -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; -+ -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; -+ -+/* -+ * each block (leaves and indexes), even inode-stored has header -+ */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ -+}; -+ -+#define EXT3_EXT_MAGIC 0xf30a -+ -+/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking -+ */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; -+ -+/* -+ * structure for external API -+ */ -+ -+/* -+ * storage for cached extent -+ */ -+struct ext3_ext_cache { -+ __u32 ec_start; -+ __u32 ec_block; -+ __u32 ec_len; -+ __u32 ec_type; -+}; -+ -+#define EXT3_EXT_CACHE_NO 0 -+#define EXT3_EXT_CACHE_GAP 1 -+#define EXT3_EXT_CACHE_EXTENT 2 -+ -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_ext_cache *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; -+ -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; -+ -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_ext_cache *); -+ -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 -+ -+ -+#define EXT_MAX_BLOCK 0xffffffff -+ -+ -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) -+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) -+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ -+ -+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) -+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) -+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) -+ -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+ -+#define EXT_CHECK_PATH(tree,path) \ -+{ \ -+ int depth = EXT_DEPTH(tree); \ -+ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_idx < \ -+ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_ext < \ -+ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ -+ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ -+ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ -+ && depth != 0); \ -+ BUG_ON((path)[0].p_depth != depth); \ -+} -+ -+ -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; -+ -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; -+ -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); -+ -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) -+{ -+ if (tree->cex) -+ tree->cex->ec_type = EXT3_EXT_CACHE_NO; -+} -+ -+ -+#endif /* _LINUX_EXT3_EXTENTS */ -Index: linux-stage/include/linux/ext3_fs_i.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs_i.h 2005-02-25 14:50:50.320200384 +0200 -+++ linux-stage/include/linux/ext3_fs_i.h 2005-02-25 15:33:48.945189800 +0200 -@@ -128,6 +128,8 @@ - */ - struct semaphore truncate_sem; - struct inode vfs_inode; -+ -+ __u32 i_cached_extent[4]; - }; - - #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch deleted file mode 100644 index cd37db4..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch +++ /dev/null @@ -1,31 +0,0 @@ -Index: 57chaos/fs/ext3/inode.c -=================================================================== ---- 57chaos.orig/fs/ext3/inode.c 2004-06-21 14:15:31.000000000 -0700 -+++ 57chaos/fs/ext3/inode.c 2004-06-21 14:19:27.000000000 -0700 -@@ -2270,6 +2270,12 @@ void ext3_truncate_thread(struct inode * - - memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); - memset(oei->i_data, 0, sizeof(oei->i_data)); -+ if (EXT3_I(old_inode)->i_flags & EXT3_EXTENTS_FL) { -+ EXT3_I(new_inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, old_inode); -+ } else { -+ EXT3_I(new_inode)->i_flags &= ~EXT3_EXTENTS_FL; -+ } - - nei->i_disksize = oei->i_disksize; - nei->i_state |= EXT3_STATE_DELETE; -@@ -2522,6 +2526,13 @@ void ext3_read_inode(struct inode * inod - else - EXT3_I(inode)->i_extra_isize = 0; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) { -+ inode->u.ext3_i.i_cached_extent[0] = 0; -+ inode->u.ext3_i.i_cached_extent[1] = 0; -+ inode->u.ext3_i.i_cached_extent[2] = 0; -+ inode->u.ext3_i.i_cached_extent[3] = 0; -+ } -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; diff --git a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch deleted file mode 100644 index bc752e5..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch +++ /dev/null @@ -1,31 +0,0 @@ -Index: linux-2.4.24/fs/ext3/inode.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/inode.c 2004-05-18 12:34:48.000000000 -0700 -+++ linux-2.4.24/fs/ext3/inode.c 2004-05-18 12:47:50.000000000 -0700 -@@ -2244,6 +2244,13 @@ - else - inode->u.ext3_i.i_extra_isize = 0; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) { -+ inode->u.ext3_i.i_cached_extent[0] = 0; -+ inode->u.ext3_i.i_cached_extent[1] = 0; -+ inode->u.ext3_i.i_cached_extent[2] = 0; -+ inode->u.ext3_i.i_cached_extent[3] = 0; -+ } -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2659,6 +2665,12 @@ - - memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); - memset(oei->i_data, 0, sizeof(oei->i_data)); -+ if (EXT3_I(old_inode)->i_flags & EXT3_EXTENTS_FL) { -+ EXT3_I(new_inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, old_inode); -+ } else { -+ EXT3_I(new_inode)->i_flags &= ~EXT3_EXTENTS_FL; -+ } - - nei->i_disksize = oei->i_disksize; - nei->i_state |= EXT3_STATE_DELETE; diff --git a/lustre/kernel_patches/patches/ext3-extents-bug11324.patch b/lustre/kernel_patches/patches/ext3-extents-bug11324.patch deleted file mode 100644 index c7ed475..0000000 --- a/lustre/kernel_patches/patches/ext3-extents-bug11324.patch +++ /dev/null @@ -1,252 +0,0 @@ -Index: linux-stage/fs/ext3/extents.c -=================================================================== ---- linux-stage.orig/fs/ext3/extents.c 2007-04-17 22:09:19.000000000 -0700 -+++ linux-stage/fs/ext3/extents.c 2007-04-17 22:12:05.000000000 -0700 -@@ -44,26 +44,49 @@ - #include - - --static inline int ext3_ext_check_header(struct ext3_extent_header *eh) --{ -- if (eh->eh_magic != EXT3_EXT_MAGIC) { -- printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", -- (unsigned)eh->eh_magic); -- return -EIO; -- } -- if (eh->eh_max == 0) { -- printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", -- (unsigned)eh->eh_max); -- return -EIO; -- } -- if (eh->eh_entries > eh->eh_max) { -- printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", -- (unsigned)eh->eh_entries); -- return -EIO; -+static int __ext3_ext_check_header(const char *function, struct inode *inode, -+ struct ext3_extent_header *eh, int depth, -+ int max) -+{ -+ const char *error_msg = NULL; -+ -+ if (unlikely(eh->eh_magic != EXT3_EXT_MAGIC)) { -+ error_msg = "invalid magic"; -+ goto corrupted; -+ } -+ if (unlikely(eh->eh_depth != depth)) { -+ error_msg = "unexpected eh_depth"; -+ goto corrupted; -+ } -+ if (unlikely(eh->eh_max == 0)) { -+ error_msg = "too small eh_max"; -+ goto corrupted; -+ } -+ if (unlikely(eh->eh_max > max)) { -+ error_msg = "too large eh_max"; -+ goto corrupted; -+ } -+ if (unlikely(eh->eh_entries > eh->eh_max)) { -+ error_msg = "invalid eh_entries"; -+ goto corrupted; - } - return 0; -+ -+corrupted: -+ ext3_error(inode->i_sb, function, -+ "bad header in inode #%lu: %s - magic %x, " -+ "entries %u, max %u(%u), depth %u(%u)", -+ inode->i_ino, error_msg, eh->eh_magic, -+ eh->eh_entries, eh->eh_max, max, -+ eh->eh_depth, depth); -+ -+ return -EIO; - } - -+#define ext3_ext_check_header(inode,eh,depth,max) \ -+ __ext3_ext_check_header(__FUNCTION__,inode,eh,depth,max) -+ -+ - static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) - { - int err; -@@ -226,6 +249,26 @@ - return size; - } - -+static inline int -+ext3_ext_max_entries(struct ext3_extents_tree *tree, int root, int depth) -+{ -+ int max; -+ -+ if (root) { -+ if (depth == 0) -+ max = ext3_ext_space_root(tree); -+ else -+ max = ext3_ext_space_root_idx(tree); -+ } else { -+ if (depth == 0) -+ max = ext3_ext_space_block(tree); -+ else -+ max = ext3_ext_space_block_idx(tree); -+ } -+ -+ return max; -+} -+ - static void ext3_ext_show_path(struct ext3_extents_tree *tree, - struct ext3_ext_path *path) - { -@@ -296,10 +339,6 @@ - struct ext3_extent_idx *ix; - int l = 0, k, r; - -- EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -- EXT_ASSERT(eh->eh_entries <= eh->eh_max); -- EXT_ASSERT(eh->eh_entries > 0); -- - ext_debug(tree, "binsearch for %d(idx): ", block); - - path->p_idx = ix = EXT_FIRST_INDEX(eh); -@@ -359,9 +398,6 @@ - struct ext3_extent *ex; - int l = 0, k, r; - -- EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -- EXT_ASSERT(eh->eh_entries <= eh->eh_max); -- - if (eh->eh_entries == 0) { - /* - * this leaf is empty yet: -@@ -436,6 +472,7 @@ - struct ext3_extent_header *eh; - struct buffer_head *bh; - int depth, i, ppos = 0; -+ int max; - - EXT_ASSERT(tree); - EXT_ASSERT(tree->inode); -@@ -443,17 +480,15 @@ - - eh = EXT_ROOT_HDR(tree); - EXT_ASSERT(eh); -- if (ext3_ext_check_header(eh)) { -+ i = depth = EXT_DEPTH(tree); -+ max = ext3_ext_max_entries(tree, 1, i); -+ if (ext3_ext_check_header(tree->inode, eh, i, max)) { - /* don't free previously allocated path - * -- caller should take care */ - path = NULL; - goto err; - } - -- i = depth = EXT_DEPTH(tree); -- EXT_ASSERT(eh->eh_max); -- EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -- - /* account possible depth increase */ - if (!path) { - path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -@@ -484,8 +519,10 @@ - path[ppos].p_hdr = eh; - i--; - -- if (ext3_ext_check_header(eh)) -+ max = ext3_ext_max_entries(tree, 0, i); -+ if (ext3_ext_check_header(tree->inode, eh, i, max)) - goto err; -+ - } - - path[ppos].p_depth = i; -@@ -493,9 +530,6 @@ - path[ppos].p_ext = NULL; - path[ppos].p_idx = NULL; - -- if (ext3_ext_check_header(eh)) -- goto err; -- - /* find extent */ - ext3_ext_binsearch(tree, path + ppos, block); - -@@ -1545,6 +1579,8 @@ - ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); - if (!path[depth].p_hdr) - path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); -+ -+ /* the header must be checked already in ext3_ext_remove_space() */ - eh = path[depth].p_hdr; - EXT_ASSERT(eh); - EXT_ASSERT(eh->eh_entries <= eh->eh_max); -@@ -1707,7 +1743,7 @@ - int depth = EXT_DEPTH(tree); - struct ext3_ext_path *path; - handle_t *handle; -- int i = 0, err = 0; -+ int i = 0, err = 0, max; - - ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); - -@@ -1730,7 +1766,13 @@ - } - memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); - path[i].p_hdr = EXT_ROOT_HDR(tree); -- -+ -+ max = ext3_ext_max_entries(tree, 1, depth); -+ if (ext3_ext_check_header(inode, path[i].p_hdr, depth, max)) { -+ err = -EIO; -+ goto out; -+ } -+ - while (i >= 0 && err == 0) { - if (i == depth) { - /* this is leaf block */ -@@ -1740,16 +1782,13 @@ - i--; - continue; - } -- -+ - /* this is index block */ - if (!path[i].p_hdr) { - ext_debug(tree, "initialize header\n"); - path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); - } - -- EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); -- EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); -- - if (!path[i].p_idx) { - /* this level hasn't touched yet */ - path[i].p_idx = -@@ -1776,6 +1815,14 @@ - err = -EIO; - break; - } -+ BUG_ON(i + 1 > depth); -+ max = ext3_ext_max_entries(tree, 0, depth - i - 1); -+ if (ext3_ext_check_header(inode, -+ EXT_BLOCK_HDR(path[i+1].p_bh), -+ depth - i - 1, max)) { -+ err = -EIO; -+ break; -+ } - /* put actual number of indexes to know is this - * number got changed at the next iteration */ - path[i].p_block = path[i].p_hdr->eh_entries; -@@ -1796,7 +1843,7 @@ - } - - /* TODO: flexible tree reduction should be here */ -- if (path->p_hdr->eh_entries == 0) { -+ if (err == 0 && path->p_hdr->eh_entries == 0) { - /* - * truncate to zero freed all the tree - * so, we need to correct eh_depth -@@ -1810,6 +1857,7 @@ - } - ext3_ext_tree_changed(tree); - -+out: - kfree(path); - ext3_journal_stop(handle); - diff --git a/lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch new file mode 100644 index 0000000..ffb9700 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch @@ -0,0 +1,86 @@ + - minor fixes + - multiblock get_block() for direct I/O + +Index: linux-2.6.9-full/include/linux/ext3_extents.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_extents.h 2007-03-23 15:57:00.000000000 +0300 ++++ linux-2.6.9-full/include/linux/ext3_extents.h 2007-03-28 00:59:32.000000000 +0400 +@@ -205,7 +205,7 @@ typedef int (*ext_prepare_callback)(stru + #define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + #define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +-#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); ++#define EXT_ASSERT(__x__) if (unlikely(!(__x__))) BUG(); + + #define EXT_CHECK_PATH(tree,path) \ + { \ +Index: linux-2.6.9-full/fs/ext3/extents.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/extents.c 2007-03-23 15:57:00.000000000 +0300 ++++ linux-2.6.9-full/fs/ext3/extents.c 2007-03-28 00:59:41.000000000 +0400 +@@ -895,6 +895,8 @@ repeat: + /* if we found index with free entry, then use that + * entry: create all needed subtree and add new leaf */ + err = ext3_ext_split(handle, tree, path, newext, i); ++ if (err) ++ goto out; + + /* refill path */ + ext3_ext_drop_refs(path); +@@ -904,12 +906,16 @@ repeat: + } else { + /* tree is full, time to grow in depth */ + err = ext3_ext_grow_indepth(handle, tree, path, newext); ++ if (err) ++ goto out; + + /* refill path */ + ext3_ext_drop_refs(path); + path = ext3_ext_find_extent(tree, newext->ee_block, path); +- if (IS_ERR(path)) ++ if (IS_ERR(path)) { + err = PTR_ERR(path); ++ goto out; ++ } + + /* + * only first (depth 0 -> 1) produces free space +@@ -922,10 +928,8 @@ repeat: + } + } + +- if (err) +- return err; +- +- return 0; ++out: ++ return err; + } + + /* +@@ -1992,21 +1996,10 @@ static int ext3_new_block_cb(handle_t *h + EXT_ASSERT(ex->ee_start); + EXT_ASSERT(ex->ee_len); + +- /* reuse block from the extent to order data/metadata */ +- newblock = ex->ee_start++; +- ex->ee_len--; +- if (ex->ee_len == 0) { +- ex->ee_len = 1; +- /* allocate new block for the extent */ +- goal = ext3_ext_find_goal(inode, path, ex->ee_block); +- ex->ee_start = ext3_new_block(handle, inode, goal, err); +- ex->ee_start_hi = 0; +- if (ex->ee_start == 0) { +- /* error occured: restore old extent */ +- ex->ee_start = newblock; +- return 0; +- } +- } ++ /* allocate new block for the extent */ ++ goal = ext3_ext_find_goal(inode, path, ex->ee_block); ++ newblock = ext3_new_block(handle, inode, goal, err); ++ + return newblock; + } + diff --git a/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch b/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch new file mode 100644 index 0000000..744cc45 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch @@ -0,0 +1,157 @@ +Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 02:13:37.000000000 +0400 ++++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h 2007-03-28 02:21:37.000000000 +0400 +@@ -815,7 +815,7 @@ extern struct inode_operations ext3_fast + + /* extents.c */ + extern int ext3_ext_writepage_trans_blocks(struct inode *, int); +-extern int ext3_ext_get_block(handle_t *, struct inode *, long, ++extern int ext3_ext_get_block(handle_t *, struct inode *, long, int, + struct buffer_head *, int, int); + extern void ext3_ext_truncate(struct inode *, struct page *); + extern void ext3_ext_init(struct super_block *); +Index: linux-2.6.5-7.283-full/fs/ext3/extents.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/extents.c 2007-03-28 02:14:25.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/extents.c 2007-03-28 02:21:37.000000000 +0400 +@@ -2024,7 +2024,8 @@ void ext3_init_tree_desc(struct ext3_ext + } + + int ext3_ext_get_block(handle_t *handle, struct inode *inode, +- long iblock, struct buffer_head *bh_result, ++ long iblock, int max_blocks, ++ struct buffer_head *bh_result, + int create, int extend_disksize) + { + struct ext3_ext_path *path = NULL; +@@ -2032,6 +2033,11 @@ int ext3_ext_get_block(handle_t *handle, + struct ext3_extent *ex; + int goal, newblock, err = 0, depth; + struct ext3_extents_tree tree; ++ unsigned long next; ++ int allocated = 0; ++ ++ /* until we have multiblock allocation */ ++ max_blocks = 1; + + __clear_bit(BH_New, &bh_result->b_state); + ext3_init_tree_desc(&tree, inode); +@@ -2051,6 +2057,9 @@ int ext3_ext_get_block(handle_t *handle, + } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; ++ /* number of remaining blocks in the extent */ ++ EXT_ASSERT(iblock >= newex.ee_block); ++ allocated = newex.ee_len - (iblock - newex.ee_block); + goto out; + } else { + EXT_ASSERT(0); +@@ -2078,6 +2087,8 @@ int ext3_ext_get_block(handle_t *handle, + /* if found exent covers block, simple return it */ + if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { + newblock = iblock - ex->ee_block + ex->ee_start; ++ /* number of remaining blocks in the extent */ ++ allocated = ex->ee_len - (iblock - ex->ee_block); + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); +@@ -2098,6 +2109,15 @@ int ext3_ext_get_block(handle_t *handle, + goto out2; + } + ++ /* find next allocated block so that we know how many ++ * blocks we can allocate without ovelapping next extent */ ++ EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); ++ next = ext3_ext_next_allocated_block(path); ++ EXT_ASSERT(next > iblock); ++ allocated = next - iblock; ++ if (allocated > max_blocks) ++ allocated = max_blocks; ++ + /* allocate new block */ + goal = ext3_ext_find_goal(inode, path, iblock); + newblock = ext3_new_block(handle, inode, goal, &err); +@@ -2112,8 +2132,11 @@ int ext3_ext_get_block(handle_t *handle, + newex.ee_start_hi = 0; + newex.ee_len = 1; + err = ext3_ext_insert_extent(handle, &tree, path, &newex); +- if (err) ++ if (err) { ++ /* free data blocks we just allocated */ ++ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); + goto out2; ++ } + + if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) + EXT3_I(inode)->i_disksize = inode->i_size; +@@ -2125,10 +2148,13 @@ int ext3_ext_get_block(handle_t *handle, + ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, + newex.ee_start, EXT3_EXT_CACHE_EXTENT); + out: ++ if (allocated > max_blocks) ++ allocated = max_blocks; + ext3_ext_show_leaf(&tree, path); + __set_bit(BH_Mapped, &bh_result->b_state); + bh_result->b_bdev = inode->i_sb->s_bdev; + bh_result->b_blocknr = newblock; ++ bh_result->b_size = (allocated << inode->i_blkbits); + out2: + if (path) { + ext3_ext_drop_refs(path); +Index: linux-2.6.5-7.283-full/fs/ext3/inode.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/inode.c 2007-03-28 02:13:37.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/inode.c 2007-03-28 02:50:19.000000000 +0400 +@@ -800,13 +800,17 @@ changed: + + static inline int + ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, +- struct buffer_head *bh, int create, int extend_disksize) ++ int max_blocks, struct buffer_head *bh, int create, ++ int extend_disksize) + { ++ int ret; + if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) +- return ext3_ext_get_block(handle, inode, block, bh, create, +- extend_disksize); +- return ext3_get_block_handle(handle, inode, block, bh, create, ++ return ext3_ext_get_block(handle, inode, block, max_blocks, ++ bh, create, extend_disksize); ++ ret = ext3_get_block_handle(handle, inode, block, bh, create, + extend_disksize); ++ bh->b_size = (1 << inode->i_blkbits); ++ return ret; + } + + static int ext3_get_block(struct inode *inode, sector_t iblock, +@@ -819,7 +823,7 @@ static int ext3_get_block(struct inode * + handle = ext3_journal_current_handle(); + J_ASSERT(handle != 0); + } +- ret = ext3_get_block_wrap(handle, inode, iblock, ++ ret = ext3_get_block_wrap(handle, inode, iblock, 1, + bh_result, create, 1); + return ret; + } +@@ -847,10 +851,8 @@ ext3_direct_io_get_blocks(struct inode * + } + } + if (ret == 0) +- ret = ext3_get_block_wrap(handle, inode, iblock, ++ ret = ext3_get_block_wrap(handle, inode, iblock, max_blocks, + bh_result, create, 0); +- if (ret == 0) +- bh_result->b_size = (1 << inode->i_blkbits); + return ret; + } + +@@ -869,7 +871,7 @@ struct buffer_head *ext3_getblk(handle_t + dummy.b_state = 0; + dummy.b_blocknr = -1000; + buffer_trace_init(&dummy.b_history); +- *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); ++ *errp = ext3_get_block_wrap(handle, inode, block, 1, &dummy, create, 1); + if (!*errp && buffer_mapped(&dummy)) { + struct buffer_head *bh; + bh = sb_getblk(inode->i_sb, dummy.b_blocknr); diff --git a/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch new file mode 100644 index 0000000..726a473 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch @@ -0,0 +1,149 @@ +--- linux-2.6.9-full/include/linux/ext3_fs.h 2007-03-23 15:57:00.000000000 +0300 ++++ linux-2.6.9-full/include/linux/ext3_fs.h 2007-02-16 17:16:23.000000000 +0300 +@@ -850,7 +850,7 @@ extern struct inode_operations ext3_fast + + /* extents.c */ + extern int ext3_ext_writepage_trans_blocks(struct inode *, int); +-extern int ext3_ext_get_block(handle_t *, struct inode *, long, ++extern int ext3_ext_get_block(handle_t *, struct inode *, long, int, + struct buffer_head *, int, int); + extern void ext3_ext_truncate(struct inode *, struct page *); + extern void ext3_ext_init(struct super_block *); + +--- linux-2.6.9-full/fs/ext3/extents.c 2007-03-23 15:57:00.000000000 +0300 ++++ linux-2.6.9-full/fs/ext3/extents.c 2007-02-22 17:45:05.000000000 +0300 +@@ -2031,7 +2168,8 @@ void ext3_init_tree_desc(struct ext3_ext + } + + int ext3_ext_get_block(handle_t *handle, struct inode *inode, +- long iblock, struct buffer_head *bh_result, ++ long iblock, int max_blocks, ++ struct buffer_head *bh_result, + int create, int extend_disksize) + { + struct ext3_ext_path *path = NULL; +@@ -2039,6 +2177,11 @@ int ext3_ext_get_block(handle_t *handle, + struct ext3_extent *ex; + int goal, newblock, err = 0, depth; + struct ext3_extents_tree tree; ++ unsigned long next; ++ int allocated = 0; ++ ++ /* until we have multiblock allocation */ ++ max_blocks = 1; + + clear_buffer_new(bh_result); + ext3_init_tree_desc(&tree, inode); +@@ -2058,6 +2201,9 @@ int ext3_ext_get_block(handle_t *handle, + } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; ++ /* number of remaining blocks in the extent */ ++ EXT_ASSERT(iblock >= newex.ee_block); ++ allocated = newex.ee_len - (iblock - newex.ee_block); + goto out; + } else { + EXT_ASSERT(0); +@@ -2085,6 +2231,8 @@ int ext3_ext_get_block(handle_t *handle, + /* if found exent covers block, simple return it */ + if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { + newblock = iblock - ex->ee_block + ex->ee_start; ++ /* number of remaining blocks in the extent */ ++ allocated = ex->ee_len - (iblock - ex->ee_block); + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); +@@ -2105,6 +2253,15 @@ int ext3_ext_get_block(handle_t *handle, + goto out2; + } + ++ /* find next allocated block so that we know how many ++ * blocks we can allocate without ovelapping next extent */ ++ EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); ++ next = ext3_ext_next_allocated_block(path); ++ EXT_ASSERT(next > iblock); ++ allocated = next - iblock; ++ if (allocated > max_blocks) ++ allocated = max_blocks; ++ + /* allocate new block */ + goal = ext3_ext_find_goal(inode, path, iblock); + newblock = ext3_new_block(handle, inode, goal, &err); +@@ -2119,8 +2276,11 @@ int ext3_ext_get_block(handle_t *handle, + newex.ee_start_hi = 0; + newex.ee_len = 1; + err = ext3_ext_insert_extent(handle, &tree, path, &newex); +- if (err) ++ if (err) { ++ /* free data blocks we just allocated */ ++ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); + goto out2; ++ } + + if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) + EXT3_I(inode)->i_disksize = inode->i_size; +@@ -2132,8 +2292,11 @@ int ext3_ext_get_block(handle_t *handle, + ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, + newex.ee_start, EXT3_EXT_CACHE_EXTENT); + out: ++ if (allocated > max_blocks) ++ allocated = max_blocks; + ext3_ext_show_leaf(&tree, path); + map_bh(bh_result, inode->i_sb, newblock); ++ bh_result->b_size = (allocated << inode->i_blkbits); + out2: + if (path) { + ext3_ext_drop_refs(path); +--- linux-2.6.9-full/fs/ext3/inode.c 2007-03-23 15:57:00.000000000 +0300 ++++ linux-2.6.9-full/fs/ext3/inode.c 2007-02-16 17:17:03.000000000 +0300 +@@ -798,13 +798,17 @@ changed: + + static inline int + ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, +- struct buffer_head *bh, int create, int extend_disksize) ++ int max_blocks, struct buffer_head *bh, int create, ++ int extend_disksize) + { ++ int ret; + if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) +- return ext3_ext_get_block(handle, inode, block, bh, create, +- extend_disksize); +- return ext3_get_block_handle(handle, inode, block, bh, create, ++ return ext3_ext_get_block(handle, inode, block, max_blocks, ++ bh, create, extend_disksize); ++ ret = ext3_get_block_handle(handle, inode, block, bh, create, + extend_disksize); ++ bh->b_size = (1 << inode->i_blkbits); ++ return ret; + } + + static int ext3_get_block(struct inode *inode, sector_t iblock, +@@ -817,7 +821,7 @@ static int ext3_get_block(struct inode * + handle = ext3_journal_current_handle(); + J_ASSERT(handle != 0); + } +- ret = ext3_get_block_wrap(handle, inode, iblock, ++ ret = ext3_get_block_wrap(handle, inode, iblock, 1, + bh_result, create, 1); + return ret; + } +@@ -862,9 +866,8 @@ ext3_direct_io_get_blocks(struct inode * + + get_block: + if (ret == 0) +- ret = ext3_get_block_wrap(handle, inode, iblock, ++ ret = ext3_get_block_wrap(handle, inode, iblock, max_blocks, + bh_result, create, 0); +- bh_result->b_size = (1 << inode->i_blkbits); + return ret; + } + +@@ -882,7 +885,7 @@ struct buffer_head *ext3_getblk(handle_t + dummy.b_state = 0; + dummy.b_blocknr = -1000; + buffer_trace_init(&dummy.b_history); +- *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); ++ *errp = ext3_get_block_wrap(handle, inode, block, 1, &dummy, create, 1); + if (!*errp && buffer_mapped(&dummy)) { + struct buffer_head *bh; + bh = sb_getblk(inode->i_sb, dummy.b_blocknr); diff --git a/lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch new file mode 100644 index 0000000..2ad69c8 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch @@ -0,0 +1,168 @@ +Index: linux-2.6.9-full/include/linux/ext3_extents.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_extents.h 2007-03-23 15:57:00.000000000 +0300 ++++ linux-2.6.9-full/include/linux/ext3_extents.h 2007-03-26 22:08:16.000000000 +0400 +@@ -242,6 +242,8 @@ struct ext3_extent_tree_stats { + int leaf_num; + }; + ++extern int ext3_ext_search_left(struct ext3_extents_tree *, struct ext3_ext_path *, unsigned long *, unsigned long *); ++extern int ext3_ext_search_right(struct ext3_extents_tree *, struct ext3_ext_path *, unsigned long *, unsigned long *); + extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); + extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); + extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); +Index: linux-2.6.9-full/fs/ext3/extents.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/extents.c 2007-03-23 15:57:00.000000000 +0300 ++++ linux-2.6.9-full/fs/ext3/extents.c 2007-03-26 22:07:37.000000000 +0400 +@@ -929,6 +929,150 @@ repeat: + } + + /* ++ * search the closest allocated block to the left for *logical ++ * and returns it at @logical + it's physical address at @phys ++ * if *logical is the smallest allocated block, the function ++ * returns 0 at @phys ++ * return value contains 0 (success) or error code ++ */ ++int ++ext3_ext_search_left(struct ext3_extents_tree *tree, struct ext3_ext_path *path, ++ unsigned long *logical, unsigned long *phys) ++{ ++ struct ext3_extent_idx *ix; ++ struct ext3_extent *ex; ++ int depth; ++ ++ BUG_ON(path == NULL); ++ depth = path->p_depth; ++ *phys = 0; ++ ++ if (depth == 0 && path->p_ext == NULL) ++ return 0; ++ ++ /* usually extent in the path covers blocks smaller ++ * then *logical, but it can be that extent is the ++ * first one in the file */ ++ ++ ex = path[depth].p_ext; ++ if (*logical < ex->ee_block) { ++ BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); ++ while (--depth >= 0) { ++ ix = path[depth].p_idx; ++ BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); ++ } ++ return 0; ++ } ++ ++ BUG_ON(*logical < ex->ee_block + ex->ee_len); ++ ++ *logical = ex->ee_block + ex->ee_len - 1; ++ *phys = ex->ee_start + ex->ee_len - 1; ++ return 0; ++} ++EXPORT_SYMBOL(ext3_ext_search_left); ++ ++/* ++ * search the closest allocated block to the right for *logical ++ * and returns it at @logical + it's physical address at @phys ++ * if *logical is the smallest allocated block, the function ++ * returns 0 at @phys ++ * return value contains 0 (success) or error code ++ */ ++int ++ext3_ext_search_right(struct ext3_extents_tree *tree, struct ext3_ext_path *path, ++ unsigned long *logical, unsigned long *phys) ++{ ++ struct buffer_head *bh = NULL; ++ struct ext3_extent_header *eh; ++ struct ext3_extent_idx *ix; ++ struct ext3_extent *ex; ++ unsigned long block; ++ int depth; ++ ++ BUG_ON(path == NULL); ++ depth = path->p_depth; ++ *phys = 0; ++ ++ if (depth == 0 && path->p_ext == NULL) ++ return 0; ++ ++ /* usually extent in the path covers blocks smaller ++ * then *logical, but it can be that extent is the ++ * first one in the file */ ++ ++ ex = path[depth].p_ext; ++ if (*logical < ex->ee_block) { ++ BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); ++ while (--depth >= 0) { ++ ix = path[depth].p_idx; ++ BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); ++ } ++ *logical = ex->ee_block; ++ *phys = ex->ee_start; ++ return 0; ++ } ++ ++ BUG_ON(*logical < ex->ee_block + ex->ee_len); ++ ++ if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { ++ /* next allocated block in this leaf */ ++ ex++; ++ *logical = ex->ee_block; ++ *phys = ex->ee_start; ++ return 0; ++ } ++ ++ /* go up and search for index to the right */ ++ while (--depth >= 0) { ++ ix = path[depth].p_idx; ++ if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) ++ break; ++ } ++ ++ if (depth < 0) { ++ /* we've gone up to the root and ++ * found no index to the right */ ++ return 0; ++ } ++ ++ /* we've found index to the right, let's ++ * follow it and find the closest allocated ++ * block to the right */ ++ ix++; ++ block = ix->ei_leaf; ++ while (++depth < path->p_depth) { ++ bh = sb_bread(tree->inode->i_sb, block); ++ if (bh == NULL) ++ return -EIO; ++ eh = EXT_BLOCK_HDR(bh); ++ if (ext3_ext_check_header(eh)) { ++ brelse(bh); ++ return -EIO; ++ } ++ ix = EXT_FIRST_INDEX(eh); ++ block = ix->ei_leaf; ++ brelse(bh); ++ } ++ ++ bh = sb_bread(tree->inode->i_sb, block); ++ if (bh == NULL) ++ return -EIO; ++ eh = EXT_BLOCK_HDR(bh); ++ if (ext3_ext_check_header(eh)) { ++ brelse(bh); ++ return -EIO; ++ } ++ ex = EXT_FIRST_EXTENT(eh); ++ *logical = ex->ee_block; ++ *phys = ex->ee_start; ++ brelse(bh); ++ return 0; ++ ++} ++EXPORT_SYMBOL(ext3_ext_search_right); ++ ++/* + * returns allocated block in subsequent extent or EXT_MAX_BLOCK + * NOTE: it consider block number from index entry as + * allocated block. thus, index entries have to be consistent diff --git a/lustre/kernel_patches/patches/ext3-external-journal-2.6.12.patch b/lustre/kernel_patches/patches/ext3-external-journal-2.6.12.patch deleted file mode 100644 index bcfdae2..0000000 --- a/lustre/kernel_patches/patches/ext3-external-journal-2.6.12.patch +++ /dev/null @@ -1,148 +0,0 @@ -Signed-off-by: Johann Lombardi - ---- linux-2.6.12.orig/fs/ext3/super.c 2005-06-17 21:48:29.000000000 +0200 -+++ linux-2.6.12/fs/ext3/super.c 2005-11-07 13:37:30.000000000 +0100 -@@ -39,7 +39,8 @@ - #include "xattr.h" - #include "acl.h" - --static int ext3_load_journal(struct super_block *, struct ext3_super_block *); -+static int ext3_load_journal(struct super_block *, struct ext3_super_block *, -+ unsigned long journal_devnum); - static int ext3_create_journal(struct super_block *, struct ext3_super_block *, - int); - static void ext3_commit_super (struct super_block * sb, -@@ -586,7 +587,7 @@ enum { - Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, -- Opt_commit, Opt_journal_update, Opt_journal_inum, -+ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, -@@ -624,6 +625,7 @@ static match_table_t tokens = { - {Opt_commit, "commit=%u"}, - {Opt_journal_update, "journal=update"}, - {Opt_journal_inum, "journal=%u"}, -+ {Opt_journal_dev, "journal_dev=%u"}, - {Opt_abort, "abort"}, - {Opt_data_journal, "data=journal"}, - {Opt_data_ordered, "data=ordered"}, -@@ -663,8 +665,9 @@ static unsigned long get_sb_block(void * - return sb_block; - } - --static int parse_options (char * options, struct super_block *sb, -- unsigned long * inum, unsigned long *n_blocks_count, int is_remount) -+static int parse_options (char *options, struct super_block *sb, -+ unsigned long *inum, unsigned long *journal_devnum, -+ unsigned long *n_blocks_count, int is_remount) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); - char * p; -@@ -805,6 +808,16 @@ static int parse_options (char * options - return 0; - *inum = option; - break; -+ case Opt_journal_dev: -+ if (is_remount) { -+ printk(KERN_ERR "EXT3-fs: cannot specify " -+ "journal on remount\n"); -+ return 0; -+ } -+ if (match_int(&args[0], &option)) -+ return 0; -+ *journal_devnum = option; -+ break; - case Opt_noload: - set_opt (sbi->s_mount_opt, NOLOAD); - break; -@@ -1250,6 +1263,7 @@ static int ext3_fill_super (struct super - unsigned long logic_sb_block; - unsigned long offset = 0; - unsigned long journal_inum = 0; -+ unsigned long journal_devnum = 0; - unsigned long def_mount_opts; - struct inode *root; - int blocksize; -@@ -1330,7 +1344,8 @@ static int ext3_fill_super (struct super - - set_opt(sbi->s_mount_opt, RESERVATION); - -- if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) -+ if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, -+ NULL, 0)) - goto failed_mount; - - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | -@@ -1541,7 +1556,7 @@ static int ext3_fill_super (struct super - */ - if (!test_opt(sb, NOLOAD) && - EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { -- if (ext3_load_journal(sb, es)) -+ if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount2; - } else if (journal_inum) { - if (ext3_create_journal(sb, es, journal_inum)) -@@ -1821,15 +1836,24 @@ out_bdev: - return NULL; - } - --static int ext3_load_journal(struct super_block * sb, -- struct ext3_super_block * es) -+static int ext3_load_journal(struct super_block *sb, -+ struct ext3_super_block *es, -+ unsigned long journal_devnum) - { - journal_t *journal; - int journal_inum = le32_to_cpu(es->s_journal_inum); -- dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); -+ dev_t journal_dev; - int err = 0; - int really_read_only; - -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ printk(KERN_INFO "EXT3-fs: external journal device major/minor " -+ "numbers have changed\n"); -+ journal_dev = new_decode_dev(journal_devnum); -+ } else -+ journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); -+ - really_read_only = bdev_read_only(sb->s_bdev); - - /* -@@ -1888,6 +1912,16 @@ static int ext3_load_journal(struct supe - - EXT3_SB(sb)->s_journal = journal; - ext3_clear_journal_err(sb, es); -+ -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ es->s_journal_dev = cpu_to_le32(journal_devnum); -+ sb->s_dirt = 1; -+ -+ /* Make sure we flush the recovery flag to disk. */ -+ ext3_commit_super(sb, es, 1); -+ } -+ - return 0; - } - -@@ -2093,13 +2127,13 @@ static int ext3_remount (struct super_bl - { - struct ext3_super_block * es; - struct ext3_sb_info *sbi = EXT3_SB(sb); -- unsigned long tmp; -+ unsigned long tmp1, tmp2; - unsigned long n_blocks_count = 0; - - /* - * Allow the "check" option to be passed as a remount option. - */ -- if (!parse_options(data, sb, &tmp, &n_blocks_count, 1)) -+ if (!parse_options(data, sb, &tmp1, &tmp2, &n_blocks_count, 1)) - return -EINVAL; - - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) diff --git a/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch b/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch deleted file mode 100644 index 7cc86f2..0000000 --- a/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch +++ /dev/null @@ -1,150 +0,0 @@ -Signed-off-by: Johann Lombardi - -Index: linux-2.6.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-05-20 01:14:14.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2006-05-20 01:17:10.000000000 +0400 -@@ -39,7 +39,8 @@ - #include "xattr.h" - #include "acl.h" - --static int ext3_load_journal(struct super_block *, struct ext3_super_block *); -+static int ext3_load_journal(struct super_block *, struct ext3_super_block *, -+ unsigned long journal_devnum); - static int ext3_create_journal(struct super_block *, struct ext3_super_block *, - int); - static void ext3_commit_super (struct super_block * sb, -@@ -591,7 +592,7 @@ enum { - Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_reservation, Opt_noreservation, Opt_noload, -- Opt_commit, Opt_journal_update, Opt_journal_inum, -+ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, -@@ -630,6 +631,7 @@ static match_table_t tokens = { - {Opt_commit, "commit=%u"}, - {Opt_journal_update, "journal=update"}, - {Opt_journal_inum, "journal=%u"}, -+ {Opt_journal_dev, "journal_dev=%u"}, - {Opt_abort, "abort"}, - {Opt_data_journal, "data=journal"}, - {Opt_data_ordered, "data=ordered"}, -@@ -675,8 +677,9 @@ static unsigned long get_sb_block(void * - return sb_block; - } - --static int parse_options (char * options, struct super_block *sb, -- unsigned long * inum, unsigned long *n_blocks_count, int is_remount) -+static int parse_options (char *options, struct super_block *sb, -+ unsigned long *inum, unsigned long *journal_devnum, -+ unsigned long *n_blocks_count, int is_remount) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); - char * p; -@@ -816,6 +819,16 @@ static int parse_options (char * options - return 0; - *inum = option; - break; -+ case Opt_journal_dev: -+ if (is_remount) { -+ printk(KERN_ERR "EXT3-fs: cannot specify " -+ "journal on remount\n"); -+ return 0; -+ } -+ if (match_int(&args[0], &option)) -+ return 0; -+ *journal_devnum = option; -+ break; - case Opt_noload: - set_opt (sbi->s_mount_opt, NOLOAD); - break; -@@ -1278,6 +1291,7 @@ static int ext3_fill_super (struct super - unsigned long logic_sb_block; - unsigned long offset = 0; - unsigned long journal_inum = 0; -+ unsigned long journal_devnum = 0; - unsigned long def_mount_opts; - struct inode *root; - int blocksize; -@@ -1361,7 +1375,8 @@ static int ext3_fill_super (struct super - - set_opt(sbi->s_mount_opt, RESERVATION); - -- if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) -+ if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, -+ NULL, 0)) - goto failed_mount; - - set_sb_time_gran(sb, 1000000000U); -@@ -1567,7 +1582,7 @@ static int ext3_fill_super (struct super - */ - if (!test_opt(sb, NOLOAD) && - EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { -- if (ext3_load_journal(sb, es)) -+ if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount2; - } else if (journal_inum) { - if (ext3_create_journal(sb, es, journal_inum)) -@@ -1831,15 +1846,24 @@ out_bdev: - return NULL; - } - --static int ext3_load_journal(struct super_block * sb, -- struct ext3_super_block * es) -+static int ext3_load_journal(struct super_block *sb, -+ struct ext3_super_block *es, -+ unsigned long journal_devnum) - { - journal_t *journal; - int journal_inum = le32_to_cpu(es->s_journal_inum); -- dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); -+ dev_t journal_dev; - int err = 0; - int really_read_only; - -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ printk(KERN_INFO "EXT3-fs: external journal device major/minor " -+ "numbers have changed\n"); -+ journal_dev = new_decode_dev(journal_devnum); -+ } else -+ journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); -+ - really_read_only = bdev_read_only(sb->s_bdev); - - /* -@@ -1898,6 +1922,16 @@ static int ext3_load_journal(struct supe - - EXT3_SB(sb)->s_journal = journal; - ext3_clear_journal_err(sb, es); -+ -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ es->s_journal_dev = cpu_to_le32(journal_devnum); -+ sb->s_dirt = 1; -+ -+ /* Make sure we flush the recovery flag to disk. */ -+ ext3_commit_super(sb, es, 1); -+ } -+ - return 0; - } - -@@ -2105,13 +2139,13 @@ int ext3_remount (struct super_block * s - { - struct ext3_super_block * es; - struct ext3_sb_info *sbi = EXT3_SB(sb); -- unsigned long tmp; -+ unsigned long tmp1, tmp2; - unsigned long n_blocks_count = 0; - - /* - * Allow the "check" option to be passed as a remount option. - */ -- if (!parse_options(data, sb, &tmp, &n_blocks_count, 1)) -+ if (!parse_options(data, sb, &tmp1, &tmp2, &n_blocks_count, 1)) - return -EINVAL; - - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) diff --git a/lustre/kernel_patches/patches/ext3-filterdata-2.6.15.patch b/lustre/kernel_patches/patches/ext3-filterdata-2.6.15.patch deleted file mode 100644 index e6d431f..0000000 --- a/lustre/kernel_patches/patches/ext3-filterdata-2.6.15.patch +++ /dev/null @@ -1,25 +0,0 @@ -Index: linux-2.6.15/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.15.orig/include/linux/ext3_fs_i.h 2006-02-24 15:41:30.000000000 +0300 -+++ linux-2.6.15/include/linux/ext3_fs_i.h 2006-02-24 15:41:31.000000000 +0300 -@@ -135,6 +135,8 @@ struct ext3_inode_info { - struct inode vfs_inode; - - __u32 i_cached_extent[4]; -+ -+ void *i_filterdata; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.15/fs/ext3/super.c -=================================================================== ---- linux-2.6.15.orig/fs/ext3/super.c 2006-02-24 15:41:30.000000000 +0300 -+++ linux-2.6.15/fs/ext3/super.c 2006-02-24 15:42:02.000000000 +0300 -@@ -459,6 +459,7 @@ static struct inode *ext3_alloc_inode(st - ei->vfs_inode.i_version = 1; - - memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); -+ ei->i_filterdata = NULL; - return &ei->vfs_inode; - } - diff --git a/lustre/kernel_patches/patches/ext3-filterdata-sles10.patch b/lustre/kernel_patches/patches/ext3-filterdata-sles10.patch new file mode 100644 index 0000000..5f7c8c9 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-filterdata-sles10.patch @@ -0,0 +1,25 @@ +Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_i.h 2007-03-28 16:03:20.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h 2007-03-28 19:40:53.000000000 +0400 +@@ -139,6 +139,8 @@ struct ext3_inode_info { + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; ++ ++ void *i_filterdata; + }; + + #endif /* _LINUX_EXT3_FS_I */ +Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c 2007-03-28 18:20:17.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/super.c 2007-03-28 19:40:53.000000000 +0400 +@@ -462,6 +462,7 @@ static struct inode *ext3_alloc_inode(st + memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); + INIT_LIST_HEAD(&ei->i_prealloc_list); + spin_lock_init(&ei->i_prealloc_lock); ++ ei->i_filterdata = NULL; + return &ei->vfs_inode; + } + diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch deleted file mode 100644 index 4b445f5..0000000 --- a/lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch +++ /dev/null @@ -1,2593 +0,0 @@ - fs/ext3/Makefile | 2 - fs/ext3/dir.c | 302 +++++++++ - fs/ext3/file.c | 3 - fs/ext3/hash.c | 215 ++++++ - fs/ext3/namei.c | 1421 ++++++++++++++++++++++++++++++++++++++++----- - fs/ext3/super.c | 7 - include/linux/ext3_fs.h | 85 ++ - include/linux/ext3_fs_sb.h | 2 - include/linux/ext3_jbd.h | 2 - include/linux/rbtree.h | 2 - lib/rbtree.c | 42 + - 11 files changed, 1922 insertions(+), 161 deletions(-) - -Index: linux-2.4.21-chaos/fs/ext3/dir.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/dir.c 2002-05-08 01:53:46.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/dir.c 2003-12-12 16:18:17.000000000 +0300 -@@ -21,12 +21,16 @@ - #include - #include - #include -+#include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK - }; - - static int ext3_readdir(struct file *, void *, filldir_t); -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); - - struct file_operations ext3_dir_operations = { - read: generic_read_dir, -@@ -35,6 +39,17 @@ - fsync: ext3_sync_file, /* BKL held */ - }; - -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3_filetype_table[filetype]); -+} -+ -+ - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, -@@ -79,6 +94,16 @@ - - sb = inode->i_sb; - -+ if (is_dx(inode)) { -+ err = ext3_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) -+ return err; -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; -+ } - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); -@@ -162,18 +187,12 @@ - * during the copy operation. - */ - unsigned long version = filp->f_version; -- unsigned char d_type = DT_UNKNOWN; - -- if (EXT3_HAS_INCOMPAT_FEATURE(sb, -- EXT3_FEATURE_INCOMPAT_FILETYPE) -- && de->file_type < EXT3_FT_MAX) -- d_type = -- ext3_filetype_table[de->file_type]; - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), -- d_type); -+ get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) -@@ -188,3 +207,272 @@ - UPDATE_ATIME(inode); - return 0; - } -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ rb_node_t rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(rb_root_t *root) -+{ -+ rb_node_t *n = root->rb_node; -+ rb_node_t *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = n->rb_parent; -+ fname = rb_entry(n, struct fname, rb_hash); -+ kfree(fname); -+ if (!parent) -+ root->rb_node = 0; -+ else if (parent->rb_left == n) -+ parent->rb_left = 0; -+ else if (parent->rb_right == n) -+ parent->rb_right = 0; -+ n = parent; -+ } -+ root->rb_node = 0; -+} -+ -+ -+struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = 0; -+ p->curr_node = 0; -+ p->extra_fname = 0; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent) -+{ -+ rb_node_t **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kmalloc(len, GFP_KERNEL); -+ if (!new_fn) -+ return -ENOMEM; -+ memset(new_fn, 0, len); -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return 0; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+ return 0; -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = 0; -+ info->extra_fname = 0; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_get_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = 0; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) -+ break; -+ info->curr_node = rb_get_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_get_next(info->curr_node); -+ if (!info->curr_node) { -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ UPDATE_ATIME(inode); -+ return 0; -+} -+#endif -Index: linux-2.4.21-chaos/fs/ext3/file.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/file.c 2003-12-05 07:55:47.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/file.c 2003-12-12 16:18:17.000000000 +0300 -@@ -38,6 +38,9 @@ - { - if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); -+ if (is_dx(inode) && filp->private_data) -+ ext3_htree_free_dir_info(filp->private_data); -+ - return 0; - } - -Index: linux-2.4.21-chaos/fs/ext3/hash.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/hash.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/hash.c 2003-12-12 16:18:17.000000000 +0300 -@@ -0,0 +1,215 @@ -+/* -+ * linux/fs/ext3/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+/* F, G and H are basic MD4 functions: selection, majority, parity */ -+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -+#define H(x, y, z) ((x) ^ (y) ^ (z)) -+ -+/* -+ * The generic round function. The application is so specific that -+ * we don't bother protecting all the arguments with parens, as is generally -+ * good macro practice, in favor of extra legibility. -+ * Rotation is separate from addition to prevent recomputation -+ */ -+#define ROUND(f, a, b, c, d, x, s) \ -+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) -+#define K1 0 -+#define K2 013240474631UL -+#define K3 015666365641UL -+ -+/* -+ * Basic cut-down MD4 transform. Returns only 32 bits of result. -+ */ -+static void halfMD4Transform (__u32 buf[4], __u32 const in[]) -+{ -+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; -+ -+ /* Round 1 */ -+ ROUND(F, a, b, c, d, in[0] + K1, 3); -+ ROUND(F, d, a, b, c, in[1] + K1, 7); -+ ROUND(F, c, d, a, b, in[2] + K1, 11); -+ ROUND(F, b, c, d, a, in[3] + K1, 19); -+ ROUND(F, a, b, c, d, in[4] + K1, 3); -+ ROUND(F, d, a, b, c, in[5] + K1, 7); -+ ROUND(F, c, d, a, b, in[6] + K1, 11); -+ ROUND(F, b, c, d, a, in[7] + K1, 19); -+ -+ /* Round 2 */ -+ ROUND(G, a, b, c, d, in[1] + K2, 3); -+ ROUND(G, d, a, b, c, in[3] + K2, 5); -+ ROUND(G, c, d, a, b, in[5] + K2, 9); -+ ROUND(G, b, c, d, a, in[7] + K2, 13); -+ ROUND(G, a, b, c, d, in[0] + K2, 3); -+ ROUND(G, d, a, b, c, in[2] + K2, 5); -+ ROUND(G, c, d, a, b, in[4] + K2, 9); -+ ROUND(G, b, c, d, a, in[6] + K2, 13); -+ -+ /* Round 3 */ -+ ROUND(H, a, b, c, d, in[3] + K3, 3); -+ ROUND(H, d, a, b, c, in[7] + K3, 9); -+ ROUND(H, c, d, a, b, in[2] + K3, 11); -+ ROUND(H, b, c, d, a, in[6] + K3, 15); -+ ROUND(H, a, b, c, d, in[1] + K3, 3); -+ ROUND(H, d, a, b, c, in[5] + K3, 9); -+ ROUND(H, c, d, a, b, in[0] + K3, 11); -+ ROUND(H, b, c, d, a, in[4] + K3, 15); -+ -+ buf[0] += a; -+ buf[1] += b; -+ buf[2] += c; -+ buf[3] += d; -+} -+ -+#undef ROUND -+#undef F -+#undef G -+#undef H -+#undef K1 -+#undef K2 -+#undef K3 -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ halfMD4Transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hinfo->hash = hash & ~1; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} -Index: linux-2.4.21-chaos/fs/ext3/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/Makefile 2003-12-12 16:17:59.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/Makefile 2003-12-12 16:18:17.000000000 +0300 -@@ -12,7 +12,7 @@ - export-objs := super.o inode.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o hash.o - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.21-chaos/fs/ext3/namei.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/namei.c 2003-07-15 04:41:01.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/namei.c 2003-12-12 16:18:17.000000000 +0300 -@@ -16,6 +16,12 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 - */ - - #include -@@ -40,6 +46,642 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash); -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err); -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ if (dentry) -+ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will return -1. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int num_frames = 0; -+ __u32 bhash; -+ -+ *err = ENOENT; -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), -+ 0, err))) -+ return -1; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -+{ -+ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct buffer_head *bh; -+ struct ext3_dir_entry_2 *de, *top; -+ static struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_dentry->d_inode; -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ /* Add '.' and '..' from the htree header */ -+ if (!start_hash && !start_minor_hash) { -+ de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ de = ext3_next_entry(de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ count += 2; -+ } -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ dxtrace(printk("Reading block %d\n", block)); -+ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) -+ goto errout; -+ -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) { -+ ext3fs_dirhash(de->name, de->name_len, &hinfo); -+ if ((hinfo.hash < start_hash) || -+ ((hinfo.hash == start_hash) && -+ (hinfo.minor_hash < start_minor_hash))) -+ continue; -+ if ((err = ext3_htree_store_dirent(dir_file, -+ hinfo.hash, hinfo.minor_hash, de)) != 0) -+ goto errout; -+ count++; -+ } -+ brelse (bh); -+ hashval = ~1; -+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &err, &hashval); -+ if (next_hash) -+ *next_hash = hashval; -+ if (ret == -1) -+ goto errout; -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries\n", count)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -96,6 +738,7 @@ - return 0; - } - -+ - /* - * ext3_find_entry() - * -@@ -107,6 +750,8 @@ - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -121,12 +766,32 @@ - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; -- -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ bh = ext3_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); -+ } -+#endif - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -167,7 +832,7 @@ - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -198,6 +863,66 @@ - return ret; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err))) -+ return NULL; -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match (namelen, name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3_htree_next_block(dir, hash, frame, -+ frames, err, 0); -+ if (retval == -1) { -+ ext3_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -214,8 +939,9 @@ - brelse (bh); - inode = iget(dir->i_sb, ino); - -- if (!inode) -+ if (!inode) { - return ERR_PTR(-EACCES); -+ } - } - d_add(dentry, inode); - return NULL; -@@ -239,6 +965,301 @@ - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct ext3_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len); -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = cpu_to_le16(rec_len); -+ prev = to; -+ to = (struct ext3_dir_entry_2 *)((char *) to + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match (namelen, name, de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *)&root->dotdot; -+ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len)); -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3_dir_entry_2 *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ ext3fs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ - /* - * ext3_add_entry() - * -@@ -249,127 +1270,198 @@ - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -- --/* -- * AKPM: the journalling code here looks wrong on the error paths -- */ - static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -+ struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; -+#ifdef CONFIG_EXT3_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ unsigned nlen, rlen; -+ u32 block, blocks; - - sb = dir->i_sb; -- -- if (!namelen) -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ retval = ext3_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)) -+ return retval; -+ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; -+ dx_fallback++; -+ ext3_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; - de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -- } -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = blocksize); -+ nlen = 0; -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} - -- ext3_debug ("creating next block\n"); -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3_dir_entry_2 *de; -+ int err; - -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -- } else { -+ frame = dx_probe(dentry, 0, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; - -- ext3_debug ("skipping to next block\n"); -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; - -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- } -- } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -- brelse (bh); -- return -EEXIST; -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (err != -ENOSPC) { -+ bh = 0; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3_warning(sb, __FUNCTION__, -+ "Directory index full!\n"); -+ err = -ENOSPC; -+ goto cleanup; - } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -- brelse(bh); -- return 0; -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- brelse (bh); -- return -ENOSPC; -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = 0; -+ goto cleanup; -+ -+journal_error: -+ ext3_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; - } -+#endif - - /* - * ext3_delete_entry deletes a directory entry by merging it with the -@@ -456,9 +1548,11 @@ - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -482,9 +1576,11 @@ - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -513,9 +1609,11 @@ - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -527,7 +1625,7 @@ - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -556,21 +1654,19 @@ - brelse (dir_block); - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); -- if (err) -- goto out_no_entry; -+ if (err) { -+ inode->i_nlink = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- inode->i_nlink = 0; -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - /* -@@ -657,7 +1753,7 @@ - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -698,7 +1794,7 @@ - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -716,25 +1812,26 @@ - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del(&ei->i_orphan); -+ INIT_LIST_HEAD(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -795,8 +1892,9 @@ - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); -@@ -834,7 +1932,7 @@ - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -852,8 +1950,9 @@ - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -880,7 +1979,7 @@ - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -906,9 +2005,11 @@ - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -918,7 +2019,7 @@ - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof (EXT3_I(inode)->i_data)) { - inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* -@@ -927,24 +2028,22 @@ - * i_size in generic_commit_write(). - */ - err = block_symlink(inode, symname, l); -- if (err) -- goto out_no_entry; -+ if (err) { -+ ext3_dec_count(handle, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- ext3_dec_count(handle, inode); -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - static int ext3_link (struct dentry * old_dentry, -@@ -957,12 +2056,15 @@ - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (inode->i_nlink >= EXT3_LINK_MAX) { - return -EMLINK; -+ } - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -995,9 +2097,11 @@ - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) - handle->h_sync = 1; -@@ -1070,14 +2174,37 @@ - /* - * ok, that's it - */ -- ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ if (le32_to_cpu(old_de->inode) != old_inode->i_ino || -+ old_de->name_len != old_dentry->d_name.len || -+ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || -+ (retval = ext3_delete_entry(handle, old_dir, -+ old_de, old_bh)) == -ENOENT) { -+ /* old_de could have moved from under us during htree split, so -+ * make sure that we are deleting the right entry. We might -+ * also be pointing to a stale entry in the unused part of -+ * old_bh so just checking inum and the name isn't enough. */ -+ struct buffer_head *old_bh2; -+ struct ext3_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3_delete_entry(handle, old_dir, -+ old_de2, old_bh2); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3_warning(old_dir->i_sb, "ext3_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } - - if (new_inode) { - new_inode->i_nlink--; - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +2212,7 @@ - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2003-12-12 16:17:59.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2003-12-12 16:18:17.000000000 +0300 -@@ -777,6 +777,7 @@ - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO -@@ -787,6 +788,7 @@ - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); -+ - printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", - bdevname(sb->s_dev)); - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { -@@ -960,6 +962,7 @@ - return res; - } - -+ - struct super_block * ext3_read_super (struct super_block * sb, void * data, - int silent) - { -@@ -1146,6 +1149,9 @@ - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR -@@ -1938,6 +1944,7 @@ - unregister_filesystem(&ext3_fs_type); - } - -+EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2003-12-12 16:18:17.000000000 +0300 -@@ -40,6 +40,11 @@ - #define EXT3FS_VERSION "2.4-0.9.19" - - /* -+ * Always enable hashed directories -+ */ -+#define CONFIG_EXT3_INDEX -+ -+/* - * Debug code - */ - #ifdef EXT3FS_DEBUG -@@ -415,8 +420,11 @@ - /*E0*/ __u32 s_journal_inum; /* inode number of journal file */ - __u32 s_journal_dev; /* device number of journal file */ - __u32 s_last_orphan; /* start of list of inodes to delete */ -- --/*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ -+ __u32 s_hash_seed[4]; /* HTREE hash seed */ -+ __u8 s_def_hash_version; /* Default hash version to use */ -+ __u8 s_reserved_char_pad; -+ __u16 s_reserved_word_pad; -+ __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -553,9 +561,46 @@ - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3_INDEX -+ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; - - #ifdef __KERNEL__ - /* -+ * Control parameters used by ext3_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* - * Describe an inode's exact location on disk and in memory - */ - struct ext3_iloc -@@ -565,6 +610,27 @@ - unsigned long block_group; - }; - -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ rb_root_t root; -+ rb_node_t *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ - /* - * Function prototypes - */ -@@ -592,11 +658,20 @@ - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -- struct ext3_dir_entry_2 *, struct buffer_head *, -- unsigned long); -+ struct ext3_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent); -+extern void ext3_htree_free_dir_info(struct dir_private_info *p); -+ - /* fsync.c */ - extern int ext3_sync_file (struct file *, struct dentry *, int); - -+/* hash.c */ -+extern int ext3fs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); -@@ -630,6 +705,8 @@ - /* namei.c */ - extern int ext3_orphan_add(handle_t *, struct inode *); - extern int ext3_orphan_del(handle_t *, struct inode *); -+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) -Index: linux-2.4.21-chaos/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs_sb.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs_sb.h 2003-12-12 16:18:17.000000000 +0300 -@@ -62,6 +62,8 @@ - int s_inode_size; - int s_first_ino; - u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; - - /* Journaling */ - struct inode * s_journal_inode; -Index: linux-2.4.21-chaos/include/linux/ext3_jbd.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_jbd.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_jbd.h 2003-12-12 16:18:17.000000000 +0300 -@@ -69,6 +69,8 @@ - - #define EXT3_RESERVE_TRANS_BLOCKS 12U - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, -Index: linux-2.4.21-chaos/include/linux/rbtree.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/rbtree.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/rbtree.h 2003-12-12 16:18:17.000000000 +0300 -@@ -120,6 +120,8 @@ - - extern void rb_insert_color(rb_node_t *, rb_root_t *); - extern void rb_erase(rb_node_t *, rb_root_t *); -+extern rb_node_t *rb_get_first(rb_root_t *root); -+extern rb_node_t *rb_get_next(rb_node_t *n); - - static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) - { -Index: linux-2.4.21-chaos/lib/rbtree.c -=================================================================== ---- linux-2.4.21-chaos.orig/lib/rbtree.c 2002-09-25 21:14:03.000000000 +0400 -+++ linux-2.4.21-chaos/lib/rbtree.c 2003-12-12 16:18:17.000000000 +0300 -@@ -17,6 +17,8 @@ - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -+ -+ rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002 - */ - - #include -@@ -294,3 +296,43 @@ - __rb_erase_color(child, parent, root); - } - EXPORT_SYMBOL(rb_erase); -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+rb_node_t *rb_get_first(rb_root_t *root) -+{ -+ rb_node_t *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return 0; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+EXPORT_SYMBOL(rb_get_first); -+ -+/* -+ * Given a node, this function will return the next node in the tree. -+ */ -+rb_node_t *rb_get_next(rb_node_t *n) -+{ -+ rb_node_t *parent; -+ -+ if (n->rb_right) { -+ n = n->rb_right; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+ } else { -+ while ((parent = n->rb_parent)) { -+ if (n == parent->rb_left) -+ return parent; -+ n = parent; -+ } -+ return 0; -+ } -+} -+EXPORT_SYMBOL(rb_get_next); -+ diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.21-rhel.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.21-rhel.patch deleted file mode 100644 index c42156b..0000000 --- a/lustre/kernel_patches/patches/ext3-htree-2.4.21-rhel.patch +++ /dev/null @@ -1,2531 +0,0 @@ - fs/ext3/Makefile | 2 - fs/ext3/dir.c | 302 +++++++++ - fs/ext3/file.c | 3 - fs/ext3/hash.c | 215 ++++++ - fs/ext3/namei.c | 1421 ++++++++++++++++++++++++++++++++++++++++----- - fs/ext3/super.c | 7 - include/linux/ext3_fs.h | 85 ++ - include/linux/ext3_fs_sb.h | 2 - include/linux/ext3_jbd.h | 2 - include/linux/rbtree.h | 2 - lib/rbtree.c | 42 + - 11 files changed, 1922 insertions(+), 161 deletions(-) - -Index: linux-2.4.21/fs/ext3/dir.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/dir.c 2001-11-09 17:25:04.000000000 -0500 -+++ linux-2.4.21/fs/ext3/dir.c 2004-09-16 19:41:03.000000000 -0400 -@@ -21,12 +21,16 @@ - #include - #include - #include -+#include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK - }; - - static int ext3_readdir(struct file *, void *, filldir_t); -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); - - struct file_operations ext3_dir_operations = { - read: generic_read_dir, -@@ -35,6 +39,17 @@ - fsync: ext3_sync_file, /* BKL held */ - }; - -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3_filetype_table[filetype]); -+} -+ -+ - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, -@@ -79,6 +94,16 @@ - - sb = inode->i_sb; - -+ if (is_dx(inode)) { -+ err = ext3_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) -+ return err; -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; -+ } - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); -@@ -162,18 +187,12 @@ - * during the copy operation. - */ - unsigned long version = filp->f_version; -- unsigned char d_type = DT_UNKNOWN; - -- if (EXT3_HAS_INCOMPAT_FEATURE(sb, -- EXT3_FEATURE_INCOMPAT_FILETYPE) -- && de->file_type < EXT3_FT_MAX) -- d_type = -- ext3_filetype_table[de->file_type]; - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), -- d_type); -+ get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) -@@ -188,3 +207,272 @@ - UPDATE_ATIME(inode); - return 0; - } -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ rb_node_t rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(rb_root_t *root) -+{ -+ rb_node_t *n = root->rb_node; -+ rb_node_t *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = n->rb_parent; -+ fname = rb_entry(n, struct fname, rb_hash); -+ kfree(fname); -+ if (!parent) -+ root->rb_node = 0; -+ else if (parent->rb_left == n) -+ parent->rb_left = 0; -+ else if (parent->rb_right == n) -+ parent->rb_right = 0; -+ n = parent; -+ } -+ root->rb_node = 0; -+} -+ -+ -+struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = 0; -+ p->curr_node = 0; -+ p->extra_fname = 0; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent) -+{ -+ rb_node_t **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kmalloc(len, GFP_KERNEL); -+ if (!new_fn) -+ return -ENOMEM; -+ memset(new_fn, 0, len); -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return 0; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+ return 0; -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = 0; -+ info->extra_fname = 0; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = 0; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) -+ break; -+ info->curr_node = rb_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_next(info->curr_node); -+ if (!info->curr_node) { -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ UPDATE_ATIME(inode); -+ return 0; -+} -+#endif -Index: linux-2.4.21/fs/ext3/file.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/file.c 2004-09-11 10:16:28.000000000 -0400 -+++ linux-2.4.21/fs/ext3/file.c 2004-09-16 19:40:16.000000000 -0400 -@@ -38,6 +38,9 @@ - { - if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); -+ if (is_dx(inode) && filp->private_data) -+ ext3_htree_free_dir_info(filp->private_data); -+ - return 0; - } - -Index: linux-2.4.21/fs/ext3/hash.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/hash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.4.21/fs/ext3/hash.c 2004-09-16 19:40:16.000000000 -0400 -@@ -0,0 +1,215 @@ -+/* -+ * linux/fs/ext3/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+/* F, G and H are basic MD4 functions: selection, majority, parity */ -+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -+#define H(x, y, z) ((x) ^ (y) ^ (z)) -+ -+/* -+ * The generic round function. The application is so specific that -+ * we don't bother protecting all the arguments with parens, as is generally -+ * good macro practice, in favor of extra legibility. -+ * Rotation is separate from addition to prevent recomputation -+ */ -+#define ROUND(f, a, b, c, d, x, s) \ -+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) -+#define K1 0 -+#define K2 013240474631UL -+#define K3 015666365641UL -+ -+/* -+ * Basic cut-down MD4 transform. Returns only 32 bits of result. -+ */ -+static void halfMD4Transform (__u32 buf[4], __u32 const in[]) -+{ -+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; -+ -+ /* Round 1 */ -+ ROUND(F, a, b, c, d, in[0] + K1, 3); -+ ROUND(F, d, a, b, c, in[1] + K1, 7); -+ ROUND(F, c, d, a, b, in[2] + K1, 11); -+ ROUND(F, b, c, d, a, in[3] + K1, 19); -+ ROUND(F, a, b, c, d, in[4] + K1, 3); -+ ROUND(F, d, a, b, c, in[5] + K1, 7); -+ ROUND(F, c, d, a, b, in[6] + K1, 11); -+ ROUND(F, b, c, d, a, in[7] + K1, 19); -+ -+ /* Round 2 */ -+ ROUND(G, a, b, c, d, in[1] + K2, 3); -+ ROUND(G, d, a, b, c, in[3] + K2, 5); -+ ROUND(G, c, d, a, b, in[5] + K2, 9); -+ ROUND(G, b, c, d, a, in[7] + K2, 13); -+ ROUND(G, a, b, c, d, in[0] + K2, 3); -+ ROUND(G, d, a, b, c, in[2] + K2, 5); -+ ROUND(G, c, d, a, b, in[4] + K2, 9); -+ ROUND(G, b, c, d, a, in[6] + K2, 13); -+ -+ /* Round 3 */ -+ ROUND(H, a, b, c, d, in[3] + K3, 3); -+ ROUND(H, d, a, b, c, in[7] + K3, 9); -+ ROUND(H, c, d, a, b, in[2] + K3, 11); -+ ROUND(H, b, c, d, a, in[6] + K3, 15); -+ ROUND(H, a, b, c, d, in[1] + K3, 3); -+ ROUND(H, d, a, b, c, in[5] + K3, 9); -+ ROUND(H, c, d, a, b, in[0] + K3, 11); -+ ROUND(H, b, c, d, a, in[4] + K3, 15); -+ -+ buf[0] += a; -+ buf[1] += b; -+ buf[2] += c; -+ buf[3] += d; -+} -+ -+#undef ROUND -+#undef F -+#undef G -+#undef H -+#undef K1 -+#undef K2 -+#undef K3 -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ halfMD4Transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hinfo->hash = hash & ~1; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} -Index: linux-2.4.21/fs/ext3/Makefile -=================================================================== ---- linux-2.4.21.orig/fs/ext3/Makefile 2004-09-16 19:21:00.000000000 -0400 -+++ linux-2.4.21/fs/ext3/Makefile 2004-09-16 19:40:16.000000000 -0400 -@@ -12,7 +12,7 @@ - export-objs := super.o inode.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o hash.o - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.21/fs/ext3/namei.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/namei.c 2004-09-11 10:16:28.000000000 -0400 -+++ linux-2.4.21/fs/ext3/namei.c 2004-09-16 19:40:16.000000000 -0400 -@@ -16,6 +16,12 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 - */ - - #include -@@ -40,6 +46,642 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash); -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err); -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ if (dentry) -+ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will return -1. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int num_frames = 0; -+ __u32 bhash; -+ -+ *err = ENOENT; -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), -+ 0, err))) -+ return -1; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -+{ -+ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct buffer_head *bh; -+ struct ext3_dir_entry_2 *de, *top; -+ static struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_dentry->d_inode; -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ /* Add '.' and '..' from the htree header */ -+ if (!start_hash && !start_minor_hash) { -+ de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ de = ext3_next_entry(de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ count += 2; -+ } -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ dxtrace(printk("Reading block %d\n", block)); -+ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) -+ goto errout; -+ -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) { -+ ext3fs_dirhash(de->name, de->name_len, &hinfo); -+ if ((hinfo.hash < start_hash) || -+ ((hinfo.hash == start_hash) && -+ (hinfo.minor_hash < start_minor_hash))) -+ continue; -+ if ((err = ext3_htree_store_dirent(dir_file, -+ hinfo.hash, hinfo.minor_hash, de)) != 0) -+ goto errout; -+ count++; -+ } -+ brelse (bh); -+ hashval = ~1; -+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &err, &hashval); -+ if (next_hash) -+ *next_hash = hashval; -+ if (ret == -1) -+ goto errout; -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries\n", count)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -96,6 +738,7 @@ - return 0; - } - -+ - /* - * ext3_find_entry() - * -@@ -107,6 +750,8 @@ - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -121,12 +766,32 @@ - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; -- -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ bh = ext3_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); -+ } -+#endif - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -167,7 +832,7 @@ - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -198,6 +863,74 @@ - return ret; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ /* NFS may look up ".." - look at dx_root directory block */ -+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -+ if (!(frame = dx_probe(dentry, 0, &hinfo, frames, err))) -+ return NULL; -+ } else { -+ frame = frames; -+ frame->bh = NULL; /* for dx_release() */ -+ frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ -+ dx_set_block(frame->at, 0); /* dx_root block is 0 */ -+ } -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *)((char *)de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match (namelen, name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3_htree_next_block(dir, hash, frame, -+ frames, err, 0); -+ if (retval == -1) { -+ ext3_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -214,8 +939,9 @@ - brelse (bh); - inode = iget(dir->i_sb, ino); - -- if (!inode) -+ if (!inode) { - return ERR_PTR(-EACCES); -+ } - } - d_add(dentry, inode); - return NULL; -@@ -239,6 +965,301 @@ - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct ext3_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len); -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = cpu_to_le16(rec_len); -+ prev = to; -+ to = (struct ext3_dir_entry_2 *)((char *) to + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match (namelen, name, de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *)&root->dotdot; -+ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len)); -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3_dir_entry_2 *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ ext3fs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ - /* - * ext3_add_entry() - * -@@ -249,127 +1270,198 @@ - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -- --/* -- * AKPM: the journalling code here looks wrong on the error paths -- */ - static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -+ struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; -+#ifdef CONFIG_EXT3_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ unsigned nlen, rlen; -+ u32 block, blocks; - - sb = dir->i_sb; -- -- if (!namelen) -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ retval = ext3_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)) -+ return retval; -+ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; -+ dx_fallback++; -+ ext3_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; - de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -- } -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = blocksize); -+ nlen = 0; -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} - -- ext3_debug ("creating next block\n"); -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3_dir_entry_2 *de; -+ int err; - -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -- } else { -+ frame = dx_probe(dentry, 0, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; - -- ext3_debug ("skipping to next block\n"); -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; - -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- } -- } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -- brelse (bh); -- return -EEXIST; -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (err != -ENOSPC) { -+ bh = 0; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3_warning(sb, __FUNCTION__, -+ "Directory index full!\n"); -+ err = -ENOSPC; -+ goto cleanup; - } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -- brelse(bh); -- return 0; -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- brelse (bh); -- return -ENOSPC; -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = 0; -+ goto cleanup; -+ -+journal_error: -+ ext3_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; - } -+#endif - - /* - * ext3_delete_entry deletes a directory entry by merging it with the -@@ -456,9 +1548,11 @@ - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -482,9 +1576,11 @@ - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -513,9 +1609,11 @@ - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -527,7 +1625,7 @@ - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -556,21 +1654,19 @@ - brelse (dir_block); - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); -- if (err) -- goto out_no_entry; -+ if (err) { -+ inode->i_nlink = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- inode->i_nlink = 0; -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - /* -@@ -657,7 +1753,7 @@ - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -698,7 +1794,7 @@ - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -716,25 +1812,26 @@ - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del(&ei->i_orphan); -+ INIT_LIST_HEAD(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -795,8 +1892,9 @@ - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); -@@ -834,7 +1932,7 @@ - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -852,8 +1950,9 @@ - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -880,7 +1979,7 @@ - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -906,9 +2005,11 @@ - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -918,7 +2019,7 @@ - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof (EXT3_I(inode)->i_data)) { - inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* -@@ -927,24 +2028,22 @@ - * i_size in generic_commit_write(). - */ - err = block_symlink(inode, symname, l); -- if (err) -- goto out_no_entry; -+ if (err) { -+ ext3_dec_count(handle, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- ext3_dec_count(handle, inode); -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - static int ext3_link (struct dentry * old_dentry, -@@ -957,12 +2056,15 @@ - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (inode->i_nlink >= EXT3_LINK_MAX) { - return -EMLINK; -+ } - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -995,9 +2097,11 @@ - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) - handle->h_sync = 1; -@@ -1070,14 +2174,37 @@ - /* - * ok, that's it - */ -- ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ if (le32_to_cpu(old_de->inode) != old_inode->i_ino || -+ old_de->name_len != old_dentry->d_name.len || -+ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || -+ (retval = ext3_delete_entry(handle, old_dir, -+ old_de, old_bh)) == -ENOENT) { -+ /* old_de could have moved from under us during htree split, so -+ * make sure that we are deleting the right entry. We might -+ * also be pointing to a stale entry in the unused part of -+ * old_bh so just checking inum and the name isn't enough. */ -+ struct buffer_head *old_bh2; -+ struct ext3_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3_delete_entry(handle, old_dir, -+ old_de2, old_bh2); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3_warning(old_dir->i_sb, "ext3_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } - - if (new_inode) { - new_inode->i_nlink--; - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +2216,7 @@ - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } -Index: linux-2.4.21/fs/ext3/super.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/super.c 2004-09-16 19:21:00.000000000 -0400 -+++ linux-2.4.21/fs/ext3/super.c 2004-09-16 19:40:16.000000000 -0400 -@@ -777,6 +777,7 @@ - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO -@@ -787,6 +788,7 @@ - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); -+ - printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", - bdevname(sb->s_dev)); - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { -@@ -960,6 +962,7 @@ - return res; - } - -+ - struct super_block * ext3_read_super (struct super_block * sb, void * data, - int silent) - { -@@ -1146,6 +1149,9 @@ - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR -@@ -1938,6 +1944,7 @@ - unregister_filesystem(&ext3_fs_type); - } - -+EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -Index: linux-2.4.21/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21.orig/include/linux/ext3_fs.h 2004-09-11 10:16:28.000000000 -0400 -+++ linux-2.4.21/include/linux/ext3_fs.h 2004-09-16 19:40:16.000000000 -0400 -@@ -40,6 +40,11 @@ - #define EXT3FS_VERSION "2.4-0.9.19" - - /* -+ * Always enable hashed directories -+ */ -+#define CONFIG_EXT3_INDEX -+ -+/* - * Debug code - */ - #ifdef EXT3FS_DEBUG -@@ -415,8 +420,11 @@ - /*E0*/ __u32 s_journal_inum; /* inode number of journal file */ - __u32 s_journal_dev; /* device number of journal file */ - __u32 s_last_orphan; /* start of list of inodes to delete */ -- --/*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ -+ __u32 s_hash_seed[4]; /* HTREE hash seed */ -+ __u8 s_def_hash_version; /* Default hash version to use */ -+ __u8 s_reserved_char_pad; -+ __u16 s_reserved_word_pad; -+ __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -553,9 +561,46 @@ - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3_INDEX -+ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; - - #ifdef __KERNEL__ - /* -+ * Control parameters used by ext3_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* - * Describe an inode's exact location on disk and in memory - */ - struct ext3_iloc -@@ -565,6 +610,27 @@ - unsigned long block_group; - }; - -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ rb_root_t root; -+ rb_node_t *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ - /* - * Function prototypes - */ -@@ -592,11 +658,20 @@ - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -- struct ext3_dir_entry_2 *, struct buffer_head *, -- unsigned long); -+ struct ext3_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent); -+extern void ext3_htree_free_dir_info(struct dir_private_info *p); -+ - /* fsync.c */ - extern int ext3_sync_file (struct file *, struct dentry *, int); - -+/* hash.c */ -+extern int ext3fs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); -@@ -630,6 +705,8 @@ - /* namei.c */ - extern int ext3_orphan_add(handle_t *, struct inode *); - extern int ext3_orphan_del(handle_t *, struct inode *); -+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) -Index: linux-2.4.21/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.21.orig/include/linux/ext3_fs_sb.h 2004-09-11 10:16:39.000000000 -0400 -+++ linux-2.4.21/include/linux/ext3_fs_sb.h 2004-09-16 19:40:16.000000000 -0400 -@@ -62,6 +62,8 @@ - int s_inode_size; - int s_first_ino; - u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; - - /* Journaling */ - struct inode * s_journal_inode; -Index: linux-2.4.21/include/linux/ext3_jbd.h -=================================================================== ---- linux-2.4.21.orig/include/linux/ext3_jbd.h 2004-09-11 10:16:39.000000000 -0400 -+++ linux-2.4.21/include/linux/ext3_jbd.h 2004-09-16 19:40:16.000000000 -0400 -@@ -69,6 +69,8 @@ - - #define EXT3_RESERVE_TRANS_BLOCKS 12U - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch deleted file mode 100644 index 7895513..0000000 --- a/lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch +++ /dev/null @@ -1,2581 +0,0 @@ - fs/ext3/Makefile | 2 - fs/ext3/dir.c | 302 +++++++++ - fs/ext3/file.c | 3 - fs/ext3/hash.c | 215 ++++++ - fs/ext3/namei.c | 1421 ++++++++++++++++++++++++++++++++++++++++----- - fs/ext3/super.c | 7 - include/linux/ext3_fs.h | 85 ++ - include/linux/ext3_fs_sb.h | 2 - include/linux/ext3_jbd.h | 2 - include/linux/rbtree.h | 2 - lib/rbtree.c | 42 + - 11 files changed, 1922 insertions(+), 161 deletions(-) - ---- linux-2.4.22-ac1/fs/ext3/dir.c~ext3-htree-2.4.22-rh 2001-11-10 01:25:04.000000000 +0300 -+++ linux-2.4.22-ac1-alexey/fs/ext3/dir.c 2003-09-25 14:58:30.000000000 +0400 -@@ -21,12 +21,16 @@ - #include - #include - #include -+#include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK - }; - - static int ext3_readdir(struct file *, void *, filldir_t); -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); - - struct file_operations ext3_dir_operations = { - read: generic_read_dir, -@@ -35,6 +39,17 @@ struct file_operations ext3_dir_operatio - fsync: ext3_sync_file, /* BKL held */ - }; - -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3_filetype_table[filetype]); -+} -+ -+ - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, -@@ -79,6 +94,16 @@ static int ext3_readdir(struct file * fi - - sb = inode->i_sb; - -+ if (is_dx(inode)) { -+ err = ext3_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) -+ return err; -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; -+ } - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); -@@ -162,18 +187,12 @@ revalidate: - * during the copy operation. - */ - unsigned long version = filp->f_version; -- unsigned char d_type = DT_UNKNOWN; - -- if (EXT3_HAS_INCOMPAT_FEATURE(sb, -- EXT3_FEATURE_INCOMPAT_FILETYPE) -- && de->file_type < EXT3_FT_MAX) -- d_type = -- ext3_filetype_table[de->file_type]; - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), -- d_type); -+ get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) -@@ -188,3 +207,272 @@ revalidate: - UPDATE_ATIME(inode); - return 0; - } -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ rb_node_t rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(rb_root_t *root) -+{ -+ rb_node_t *n = root->rb_node; -+ rb_node_t *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = n->rb_parent; -+ fname = rb_entry(n, struct fname, rb_hash); -+ kfree(fname); -+ if (!parent) -+ root->rb_node = 0; -+ else if (parent->rb_left == n) -+ parent->rb_left = 0; -+ else if (parent->rb_right == n) -+ parent->rb_right = 0; -+ n = parent; -+ } -+ root->rb_node = 0; -+} -+ -+ -+struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = 0; -+ p->curr_node = 0; -+ p->extra_fname = 0; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent) -+{ -+ rb_node_t **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kmalloc(len, GFP_KERNEL); -+ if (!new_fn) -+ return -ENOMEM; -+ memset(new_fn, 0, len); -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return 0; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+ return 0; -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = 0; -+ info->extra_fname = 0; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_get_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = 0; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) -+ break; -+ info->curr_node = rb_get_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_get_next(info->curr_node); -+ if (!info->curr_node) { -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ UPDATE_ATIME(inode); -+ return 0; -+} -+#endif ---- linux-2.4.22-ac1/fs/ext3/file.c~ext3-htree-2.4.22-rh 2003-08-25 15:44:43.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/file.c 2003-09-25 14:55:12.000000000 +0400 -@@ -35,6 +35,9 @@ static int ext3_release_file (struct ino - { - if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); -+ if (is_dx(inode) && filp->private_data) -+ ext3_htree_free_dir_info(filp->private_data); -+ - return 0; - } - ---- /dev/null 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.22-ac1-alexey/fs/ext3/hash.c 2003-09-25 14:55:12.000000000 +0400 -@@ -0,0 +1,215 @@ -+/* -+ * linux/fs/ext3/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+/* F, G and H are basic MD4 functions: selection, majority, parity */ -+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -+#define H(x, y, z) ((x) ^ (y) ^ (z)) -+ -+/* -+ * The generic round function. The application is so specific that -+ * we don't bother protecting all the arguments with parens, as is generally -+ * good macro practice, in favor of extra legibility. -+ * Rotation is separate from addition to prevent recomputation -+ */ -+#define ROUND(f, a, b, c, d, x, s) \ -+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) -+#define K1 0 -+#define K2 013240474631UL -+#define K3 015666365641UL -+ -+/* -+ * Basic cut-down MD4 transform. Returns only 32 bits of result. -+ */ -+static void halfMD4Transform (__u32 buf[4], __u32 const in[]) -+{ -+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; -+ -+ /* Round 1 */ -+ ROUND(F, a, b, c, d, in[0] + K1, 3); -+ ROUND(F, d, a, b, c, in[1] + K1, 7); -+ ROUND(F, c, d, a, b, in[2] + K1, 11); -+ ROUND(F, b, c, d, a, in[3] + K1, 19); -+ ROUND(F, a, b, c, d, in[4] + K1, 3); -+ ROUND(F, d, a, b, c, in[5] + K1, 7); -+ ROUND(F, c, d, a, b, in[6] + K1, 11); -+ ROUND(F, b, c, d, a, in[7] + K1, 19); -+ -+ /* Round 2 */ -+ ROUND(G, a, b, c, d, in[1] + K2, 3); -+ ROUND(G, d, a, b, c, in[3] + K2, 5); -+ ROUND(G, c, d, a, b, in[5] + K2, 9); -+ ROUND(G, b, c, d, a, in[7] + K2, 13); -+ ROUND(G, a, b, c, d, in[0] + K2, 3); -+ ROUND(G, d, a, b, c, in[2] + K2, 5); -+ ROUND(G, c, d, a, b, in[4] + K2, 9); -+ ROUND(G, b, c, d, a, in[6] + K2, 13); -+ -+ /* Round 3 */ -+ ROUND(H, a, b, c, d, in[3] + K3, 3); -+ ROUND(H, d, a, b, c, in[7] + K3, 9); -+ ROUND(H, c, d, a, b, in[2] + K3, 11); -+ ROUND(H, b, c, d, a, in[6] + K3, 15); -+ ROUND(H, a, b, c, d, in[1] + K3, 3); -+ ROUND(H, d, a, b, c, in[5] + K3, 9); -+ ROUND(H, c, d, a, b, in[0] + K3, 11); -+ ROUND(H, b, c, d, a, in[4] + K3, 15); -+ -+ buf[0] += a; -+ buf[1] += b; -+ buf[2] += c; -+ buf[3] += d; -+} -+ -+#undef ROUND -+#undef F -+#undef G -+#undef H -+#undef K1 -+#undef K2 -+#undef K3 -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ halfMD4Transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hinfo->hash = hash & ~1; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} ---- linux-2.4.22-ac1/fs/ext3/Makefile~ext3-htree-2.4.22-rh 2003-09-25 14:39:01.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/Makefile 2003-09-25 14:55:12.000000000 +0400 -@@ -12,7 +12,7 @@ O_TARGET := ext3.o - export-objs := super.o inode.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o hash.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make ---- linux-2.4.22-ac1/fs/ext3/namei.c~ext3-htree-2.4.22-rh 2003-09-25 14:16:29.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/namei.c 2003-09-25 14:58:37.000000000 +0400 -@@ -16,6 +16,12 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 - */ - - #include -@@ -38,6 +44,642 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash); -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err); -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ if (dentry) -+ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will return -1. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int num_frames = 0; -+ __u32 bhash; -+ -+ *err = ENOENT; -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), -+ 0, err))) -+ return -1; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -+{ -+ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct buffer_head *bh; -+ struct ext3_dir_entry_2 *de, *top; -+ static struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_dentry->d_inode; -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ /* Add '.' and '..' from the htree header */ -+ if (!start_hash && !start_minor_hash) { -+ de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ de = ext3_next_entry(de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ count += 2; -+ } -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ dxtrace(printk("Reading block %d\n", block)); -+ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) -+ goto errout; -+ -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) { -+ ext3fs_dirhash(de->name, de->name_len, &hinfo); -+ if ((hinfo.hash < start_hash) || -+ ((hinfo.hash == start_hash) && -+ (hinfo.minor_hash < start_minor_hash))) -+ continue; -+ if ((err = ext3_htree_store_dirent(dir_file, -+ hinfo.hash, hinfo.minor_hash, de)) != 0) -+ goto errout; -+ count++; -+ } -+ brelse (bh); -+ hashval = ~1; -+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &err, &hashval); -+ if (next_hash) -+ *next_hash = hashval; -+ if (ret == -1) -+ goto errout; -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries\n", count)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -94,6 +736,7 @@ static int inline search_dirblock(struct - return 0; - } - -+ - /* - * ext3_find_entry() - * -@@ -105,6 +748,8 @@ static int inline search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -119,12 +764,32 @@ static struct buffer_head * ext3_find_en - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; -- -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ bh = ext3_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); -+ } -+#endif - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -166,7 +831,7 @@ restart: - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -197,6 +862,74 @@ cleanup_and_exit: - return ret; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ /* NFS may look up ".." - look at dx_root directory block */ -+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -+ if (!(frame = dx_probe(dentry, 0, &hinfo, frames, err))) -+ return NULL; -+ } else { -+ frame = frames; -+ frame->bh = NULL; /* for dx_release() */ -+ frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ -+ dx_set_block(frame->at, 0); /* dx_root block is 0 */ -+ } -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *)((char *)de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match (namelen, name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3_htree_next_block(dir, hash, frame, -+ frames, err, 0); -+ if (retval == -1) { -+ ext3_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -213,8 +938,9 @@ static struct dentry *ext3_lookup(struct - brelse (bh); - inode = iget(dir->i_sb, ino); - -- if (!inode) -+ if (!inode) { - return ERR_PTR(-EACCES); -+ } - } - d_add(dentry, inode); - return NULL; -@@ -238,6 +964,301 @@ static inline void ext3_set_de_type(stru - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct ext3_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len); -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = cpu_to_le16(rec_len); -+ prev = to; -+ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match (namelen, name, de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *)&root->dotdot; -+ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len)); -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3_dir_entry_2 *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ ext3fs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ - /* - * ext3_add_entry() - * -@@ -248,127 +1269,198 @@ static inline void ext3_set_de_type(stru - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -- --/* -- * AKPM: the journalling code here looks wrong on the error paths -- */ - static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -+ struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; -+#ifdef CONFIG_EXT3_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ unsigned nlen, rlen; -+ u32 block, blocks; - - sb = dir->i_sb; -- -- if (!namelen) -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ retval = ext3_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)) -+ return retval; -+ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; -+ dx_fallback++; -+ ext3_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; - de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -- } -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = blocksize); -+ nlen = 0; -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} - -- ext3_debug ("creating next block\n"); -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3_dir_entry_2 *de; -+ int err; - -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -- } else { -+ frame = dx_probe(dentry, 0, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; - -- ext3_debug ("skipping to next block\n"); -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; - -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- } -- } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -- brelse (bh); -- return -EEXIST; -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (err != -ENOSPC) { -+ bh = 0; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3_warning(sb, __FUNCTION__, -+ "Directory index full!\n"); -+ err = -ENOSPC; -+ goto cleanup; - } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -- brelse(bh); -- return 0; -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- brelse (bh); -- return -ENOSPC; -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = 0; -+ goto cleanup; -+ -+journal_error: -+ ext3_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; - } -+#endif - - /* - * ext3_delete_entry deletes a directory entry by merging it with the -@@ -455,9 +1547,11 @@ static int ext3_create (struct inode * d - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -481,9 +1575,11 @@ static int ext3_mknod (struct inode * di - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -509,9 +1605,11 @@ static int ext3_mkdir(struct inode * dir - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -523,7 +1621,7 @@ static int ext3_mkdir(struct inode * dir - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -@@ -556,21 +1654,19 @@ static int ext3_mkdir(struct inode * dir - inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); -- if (err) -- goto out_no_entry; -+ if (err) { -+ inode->i_nlink = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- inode->i_nlink = 0; -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - /* -@@ -657,7 +1753,7 @@ int ext3_orphan_add(handle_t *handle, st - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -698,7 +1794,7 @@ int ext3_orphan_add(handle_t *handle, st - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -716,25 +1812,26 @@ out_unlock: - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del(&ei->i_orphan); -+ INIT_LIST_HEAD(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -795,8 +1892,9 @@ static int ext3_rmdir (struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); -@@ -834,7 +1932,7 @@ static int ext3_rmdir (struct inode * di - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -852,8 +1950,9 @@ static int ext3_unlink(struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -880,7 +1979,7 @@ static int ext3_unlink(struct inode * di - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -906,9 +2005,11 @@ static int ext3_symlink (struct inode * - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -918,7 +2019,7 @@ static int ext3_symlink (struct inode * - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof (EXT3_I(inode)->i_data)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* -@@ -927,24 +2028,22 @@ static int ext3_symlink (struct inode * - * i_size in generic_commit_write(). - */ - err = block_symlink(inode, symname, l); -- if (err) -- goto out_no_entry; -+ if (err) { -+ ext3_dec_count(handle, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- ext3_dec_count(handle, inode); -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - static int ext3_link (struct dentry * old_dentry, -@@ -957,12 +2056,15 @@ static int ext3_link (struct dentry * ol - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (inode->i_nlink >= EXT3_LINK_MAX) { - return -EMLINK; -+ } - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -995,9 +2097,11 @@ static int ext3_rename (struct inode * o - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) - handle->h_sync = 1; -@@ -1070,14 +2174,37 @@ static int ext3_rename (struct inode * o - /* - * ok, that's it - */ -- ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ if (le32_to_cpu(old_de->inode) != old_inode->i_ino || -+ old_de->name_len != old_dentry->d_name.len || -+ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || -+ (retval = ext3_delete_entry(handle, old_dir, -+ old_de, old_bh)) == -ENOENT) { -+ /* old_de could have moved from under us during htree split, so -+ * make sure that we are deleting the right entry. We might -+ * also be pointing to a stale entry in the unused part of -+ * old_bh so just checking inum and the name isn't enough. */ -+ struct buffer_head *old_bh2; -+ struct ext3_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3_delete_entry(handle, old_dir, -+ old_de2, old_bh2); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3_warning(old_dir->i_sb, "ext3_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } - - if (new_inode) { - new_inode->i_nlink--; - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +2212,7 @@ static int ext3_rename (struct inode * o - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } ---- linux-2.4.22-ac1/fs/ext3/super.c~ext3-htree-2.4.22-rh 2003-09-25 14:39:01.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/super.c 2003-09-25 14:55:12.000000000 +0400 -@@ -714,6 +714,7 @@ static int ext3_setup_super(struct super - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO -@@ -724,6 +725,7 @@ static int ext3_setup_super(struct super - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); -+ - printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", - bdevname(sb->s_dev)); - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { -@@ -897,6 +899,7 @@ static loff_t ext3_max_size(int bits) - return res; - } - -+ - struct super_block * ext3_read_super (struct super_block * sb, void * data, - int silent) - { -@@ -1073,6 +1076,9 @@ struct super_block * ext3_read_super (st - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR -@@ -1846,6 +1852,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - -+EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); ---- linux-2.4.22-ac1/include/linux/ext3_fs.h~ext3-htree-2.4.22-rh 2003-09-25 14:16:29.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/include/linux/ext3_fs.h 2003-09-25 14:58:30.000000000 +0400 -@@ -40,6 +40,11 @@ - #define EXT3FS_VERSION "2.4-0.9.19" - - /* -+ * Always enable hashed directories -+ */ -+#define CONFIG_EXT3_INDEX -+ -+/* - * Debug code - */ - #ifdef EXT3FS_DEBUG -@@ -440,8 +445,11 @@ struct ext3_super_block { - /*E0*/ __u32 s_journal_inum; /* inode number of journal file */ - __u32 s_journal_dev; /* device number of journal file */ - __u32 s_last_orphan; /* start of list of inodes to delete */ -- --/*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ -+ __u32 s_hash_seed[4]; /* HTREE hash seed */ -+ __u8 s_def_hash_version; /* Default hash version to use */ -+ __u8 s_reserved_char_pad; -+ __u16 s_reserved_word_pad; -+ __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -578,9 +586,46 @@ struct ext3_dir_entry_2 { - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3_INDEX -+ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; - - #ifdef __KERNEL__ - /* -+ * Control parameters used by ext3_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* - * Describe an inode's exact location on disk and in memory - */ - struct ext3_iloc -@@ -590,6 +635,27 @@ struct ext3_iloc - unsigned long block_group; - }; - -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ rb_root_t root; -+ rb_node_t *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ - /* - * Function prototypes - */ -@@ -617,11 +683,20 @@ extern struct ext3_group_desc * ext3_get - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -- struct ext3_dir_entry_2 *, struct buffer_head *, -- unsigned long); -+ struct ext3_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent); -+extern void ext3_htree_free_dir_info(struct dir_private_info *p); -+ - /* fsync.c */ - extern int ext3_sync_file (struct file *, struct dentry *, int); - -+/* hash.c */ -+extern int ext3fs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); -@@ -655,6 +730,8 @@ extern int ext3_ioctl (struct inode *, s - /* namei.c */ - extern int ext3_orphan_add(handle_t *, struct inode *); - extern int ext3_orphan_del(handle_t *, struct inode *); -+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) ---- linux-2.4.22-ac1/include/linux/ext3_fs_sb.h~ext3-htree-2.4.22-rh 2003-09-25 14:16:34.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/include/linux/ext3_fs_sb.h 2003-09-25 14:55:12.000000000 +0400 -@@ -62,6 +62,8 @@ struct ext3_sb_info { - int s_inode_size; - int s_first_ino; - u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; - - unsigned long s_dir_count; - u8 *s_debts; ---- linux-2.4.22-ac1/include/linux/ext3_jbd.h~ext3-htree-2.4.22-rh 2003-06-13 18:51:38.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/include/linux/ext3_jbd.h 2003-09-25 14:55:12.000000000 +0400 -@@ -63,6 +63,8 @@ extern int ext3_writepage_trans_blocks(s - - #define EXT3_RESERVE_TRANS_BLOCKS 12U - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, ---- linux-2.4.22-ac1/include/linux/rbtree.h~ext3-htree-2.4.22-rh 2001-11-22 22:46:18.000000000 +0300 -+++ linux-2.4.22-ac1-alexey/include/linux/rbtree.h 2003-09-25 14:55:12.000000000 +0400 -@@ -120,6 +120,8 @@ rb_root_t; - - extern void rb_insert_color(rb_node_t *, rb_root_t *); - extern void rb_erase(rb_node_t *, rb_root_t *); -+extern rb_node_t *rb_get_first(rb_root_t *root); -+extern rb_node_t *rb_get_next(rb_node_t *n); - - static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) - { ---- linux-2.4.22-ac1/lib/rbtree.c~ext3-htree-2.4.22-rh 2002-08-03 04:39:46.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/lib/rbtree.c 2003-09-25 14:55:12.000000000 +0400 -@@ -17,6 +17,8 @@ - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -+ -+ rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002 - */ - - #include -@@ -294,3 +296,43 @@ void rb_erase(rb_node_t * node, rb_root_ - __rb_erase_color(child, parent, root); - } - EXPORT_SYMBOL(rb_erase); -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+rb_node_t *rb_get_first(rb_root_t *root) -+{ -+ rb_node_t *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return 0; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+EXPORT_SYMBOL(rb_get_first); -+ -+/* -+ * Given a node, this function will return the next node in the tree. -+ */ -+rb_node_t *rb_get_next(rb_node_t *n) -+{ -+ rb_node_t *parent; -+ -+ if (n->rb_right) { -+ n = n->rb_right; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+ } else { -+ while ((parent = n->rb_parent)) { -+ if (n == parent->rb_left) -+ return parent; -+ n = parent; -+ } -+ return 0; -+ } -+} -+EXPORT_SYMBOL(rb_get_next); -+ - -_ diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.29.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.29.patch deleted file mode 100644 index 259c7b7..0000000 --- a/lustre/kernel_patches/patches/ext3-htree-2.4.29.patch +++ /dev/null @@ -1,2496 +0,0 @@ -Index: linux-2.4.29/fs/ext3/dir.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/dir.c 2005-04-07 18:53:53.000000000 +0300 -+++ linux-2.4.29/fs/ext3/dir.c 2005-05-03 16:34:05.481747664 +0300 -@@ -21,12 +21,16 @@ - #include - #include - #include -+#include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK - }; - - static int ext3_readdir(struct file *, void *, filldir_t); -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); - - struct file_operations ext3_dir_operations = { - read: generic_read_dir, -@@ -35,6 +39,17 @@ - fsync: ext3_sync_file, /* BKL held */ - }; - -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3_filetype_table[filetype]); -+} -+ -+ - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, -@@ -79,6 +94,16 @@ - - sb = inode->i_sb; - -+ if (is_dx(inode)) { -+ err = ext3_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) -+ return err; -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; -+ } - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); -@@ -162,18 +187,12 @@ - * during the copy operation. - */ - unsigned long version = filp->f_version; -- unsigned char d_type = DT_UNKNOWN; - -- if (EXT3_HAS_INCOMPAT_FEATURE(sb, -- EXT3_FEATURE_INCOMPAT_FILETYPE) -- && de->file_type < EXT3_FT_MAX) -- d_type = -- ext3_filetype_table[de->file_type]; - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), -- d_type); -+ get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) -@@ -188,3 +207,272 @@ - UPDATE_ATIME(inode); - return 0; - } -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ rb_node_t rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(rb_root_t *root) -+{ -+ rb_node_t *n = root->rb_node; -+ rb_node_t *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = n->rb_parent; -+ fname = rb_entry(n, struct fname, rb_hash); -+ kfree(fname); -+ if (!parent) -+ root->rb_node = 0; -+ else if (parent->rb_left == n) -+ parent->rb_left = 0; -+ else if (parent->rb_right == n) -+ parent->rb_right = 0; -+ n = parent; -+ } -+ root->rb_node = 0; -+} -+ -+ -+struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = 0; -+ p->curr_node = 0; -+ p->extra_fname = 0; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent) -+{ -+ rb_node_t **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kmalloc(len, GFP_KERNEL); -+ if (!new_fn) -+ return -ENOMEM; -+ memset(new_fn, 0, len); -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return 0; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+ return 0; -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = 0; -+ info->extra_fname = 0; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = 0; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) -+ break; -+ info->curr_node = rb_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_next(info->curr_node); -+ if (!info->curr_node) { -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ UPDATE_ATIME(inode); -+ return 0; -+} -+#endif -Index: linux-2.4.29/fs/ext3/file.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/file.c 2005-04-07 18:55:11.000000000 +0300 -+++ linux-2.4.29/fs/ext3/file.c 2005-05-03 16:29:50.563501128 +0300 -@@ -35,6 +35,9 @@ - { - if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); -+ if (is_dx(inode) && filp->private_data) -+ ext3_htree_free_dir_info(filp->private_data); -+ - return 0; - } - -Index: linux-2.4.29/fs/ext3/hash.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/hash.c 2005-05-03 16:29:50.539504776 +0300 -+++ linux-2.4.29/fs/ext3/hash.c 2005-05-03 16:29:50.565500824 +0300 -@@ -0,0 +1,215 @@ -+/* -+ * linux/fs/ext3/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+/* F, G and H are basic MD4 functions: selection, majority, parity */ -+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -+#define H(x, y, z) ((x) ^ (y) ^ (z)) -+ -+/* -+ * The generic round function. The application is so specific that -+ * we don't bother protecting all the arguments with parens, as is generally -+ * good macro practice, in favor of extra legibility. -+ * Rotation is separate from addition to prevent recomputation -+ */ -+#define ROUND(f, a, b, c, d, x, s) \ -+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) -+#define K1 0 -+#define K2 013240474631UL -+#define K3 015666365641UL -+ -+/* -+ * Basic cut-down MD4 transform. Returns only 32 bits of result. -+ */ -+static void halfMD4Transform (__u32 buf[4], __u32 const in[]) -+{ -+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; -+ -+ /* Round 1 */ -+ ROUND(F, a, b, c, d, in[0] + K1, 3); -+ ROUND(F, d, a, b, c, in[1] + K1, 7); -+ ROUND(F, c, d, a, b, in[2] + K1, 11); -+ ROUND(F, b, c, d, a, in[3] + K1, 19); -+ ROUND(F, a, b, c, d, in[4] + K1, 3); -+ ROUND(F, d, a, b, c, in[5] + K1, 7); -+ ROUND(F, c, d, a, b, in[6] + K1, 11); -+ ROUND(F, b, c, d, a, in[7] + K1, 19); -+ -+ /* Round 2 */ -+ ROUND(G, a, b, c, d, in[1] + K2, 3); -+ ROUND(G, d, a, b, c, in[3] + K2, 5); -+ ROUND(G, c, d, a, b, in[5] + K2, 9); -+ ROUND(G, b, c, d, a, in[7] + K2, 13); -+ ROUND(G, a, b, c, d, in[0] + K2, 3); -+ ROUND(G, d, a, b, c, in[2] + K2, 5); -+ ROUND(G, c, d, a, b, in[4] + K2, 9); -+ ROUND(G, b, c, d, a, in[6] + K2, 13); -+ -+ /* Round 3 */ -+ ROUND(H, a, b, c, d, in[3] + K3, 3); -+ ROUND(H, d, a, b, c, in[7] + K3, 9); -+ ROUND(H, c, d, a, b, in[2] + K3, 11); -+ ROUND(H, b, c, d, a, in[6] + K3, 15); -+ ROUND(H, a, b, c, d, in[1] + K3, 3); -+ ROUND(H, d, a, b, c, in[5] + K3, 9); -+ ROUND(H, c, d, a, b, in[0] + K3, 11); -+ ROUND(H, b, c, d, a, in[4] + K3, 15); -+ -+ buf[0] += a; -+ buf[1] += b; -+ buf[2] += c; -+ buf[3] += d; -+} -+ -+#undef ROUND -+#undef F -+#undef G -+#undef H -+#undef K1 -+#undef K2 -+#undef K3 -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ halfMD4Transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hinfo->hash = hash & ~1; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} -Index: linux-2.4.29/fs/ext3/Makefile -=================================================================== ---- linux-2.4.29.orig/fs/ext3/Makefile 2005-04-07 18:59:19.000000000 +0300 -+++ linux-2.4.29/fs/ext3/Makefile 2005-05-03 16:29:50.565500824 +0300 -@@ -12,7 +12,7 @@ - export-objs := super.o inode.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o hash.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make -Index: linux-2.4.29/fs/ext3/namei.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/namei.c 2005-04-07 18:53:59.000000000 +0300 -+++ linux-2.4.29/fs/ext3/namei.c 2005-05-03 16:29:50.576499152 +0300 -@@ -16,6 +16,12 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 - */ - - #include -@@ -38,6 +44,642 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash); -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err); -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ if (dentry) -+ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will return -1. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int num_frames = 0; -+ __u32 bhash; -+ -+ *err = ENOENT; -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), -+ 0, err))) -+ return -1; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -+{ -+ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct buffer_head *bh; -+ struct ext3_dir_entry_2 *de, *top; -+ static struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_dentry->d_inode; -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ /* Add '.' and '..' from the htree header */ -+ if (!start_hash && !start_minor_hash) { -+ de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ de = ext3_next_entry(de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; -+ count += 2; -+ } -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ dxtrace(printk("Reading block %d\n", block)); -+ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) -+ goto errout; -+ -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) { -+ ext3fs_dirhash(de->name, de->name_len, &hinfo); -+ if ((hinfo.hash < start_hash) || -+ ((hinfo.hash == start_hash) && -+ (hinfo.minor_hash < start_minor_hash))) -+ continue; -+ if ((err = ext3_htree_store_dirent(dir_file, -+ hinfo.hash, hinfo.minor_hash, de)) != 0) -+ goto errout; -+ count++; -+ } -+ brelse (bh); -+ hashval = ~1; -+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &err, &hashval); -+ if (next_hash) -+ *next_hash = hashval; -+ if (ret == -1) -+ goto errout; -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries\n", count)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -94,6 +736,7 @@ - return 0; - } - -+ - /* - * ext3_find_entry() - * -@@ -105,6 +748,8 @@ - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -119,12 +764,32 @@ - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; -- -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ bh = ext3_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); -+ } -+#endif - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -165,7 +830,7 @@ - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -196,6 +861,66 @@ - return ret; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err))) -+ return NULL; -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match (namelen, name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3_htree_next_block(dir, hash, frame, -+ frames, err, 0); -+ if (retval == -1) { -+ ext3_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -212,8 +937,9 @@ - brelse (bh); - inode = iget(dir->i_sb, ino); - -- if (!inode) -+ if (!inode) { - return ERR_PTR(-EACCES); -+ } - } - d_add(dentry, inode); - return NULL; -@@ -237,6 +963,301 @@ - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct ext3_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len); -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = cpu_to_le16(rec_len); -+ prev = to; -+ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match (namelen, name, de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *)&root->dotdot; -+ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len)); -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3_dir_entry_2 *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ ext3fs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ - /* - * ext3_add_entry() - * -@@ -247,127 +1268,198 @@ - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -- --/* -- * AKPM: the journalling code here looks wrong on the error paths -- */ - static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -+ struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; -+#ifdef CONFIG_EXT3_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ unsigned nlen, rlen; -+ u32 block, blocks; - - sb = dir->i_sb; -- -- if (!namelen) -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ retval = ext3_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)) -+ return retval; -+ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; -+ dx_fallback++; -+ ext3_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; - de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -- } -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = blocksize); -+ nlen = 0; -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} - -- ext3_debug ("creating next block\n"); -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3_dir_entry_2 *de; -+ int err; - -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -- } else { -+ frame = dx_probe(dentry, 0, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; - -- ext3_debug ("skipping to next block\n"); -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; - -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- } -- } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -- brelse (bh); -- return -EEXIST; -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (err != -ENOSPC) { -+ bh = 0; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3_warning(sb, __FUNCTION__, -+ "Directory index full!\n"); -+ err = -ENOSPC; -+ goto cleanup; - } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -- brelse(bh); -- return 0; -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- brelse (bh); -- return -ENOSPC; -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = 0; -+ goto cleanup; -+ -+journal_error: -+ ext3_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; - } -+#endif - - /* - * ext3_delete_entry deletes a directory entry by merging it with the -@@ -454,9 +1546,11 @@ - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -480,9 +1574,11 @@ - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -508,9 +1604,11 @@ - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -522,7 +1620,7 @@ - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -@@ -555,21 +1653,19 @@ - inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); -- if (err) -- goto out_no_entry; -+ if (err) { -+ inode->i_nlink = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- inode->i_nlink = 0; -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - /* -@@ -656,7 +1752,7 @@ - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -697,7 +1793,7 @@ - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -715,25 +1811,26 @@ - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del(&ei->i_orphan); -+ INIT_LIST_HEAD(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -794,8 +1891,9 @@ - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); -@@ -833,7 +1931,7 @@ - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -851,8 +1949,9 @@ - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -879,7 +1978,7 @@ - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -905,9 +2004,11 @@ - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -917,7 +2018,7 @@ - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof (EXT3_I(inode)->i_data)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* -@@ -926,24 +2027,22 @@ - * i_size in generic_commit_write(). - */ - err = block_symlink(inode, symname, l); -- if (err) -- goto out_no_entry; -+ if (err) { -+ ext3_dec_count(handle, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- ext3_dec_count(handle, inode); -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - static int ext3_link (struct dentry * old_dentry, -@@ -956,12 +2055,15 @@ - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (inode->i_nlink >= EXT3_LINK_MAX) { - return -EMLINK; -+ } - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -994,9 +2096,11 @@ - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) - handle->h_sync = 1; -@@ -1069,14 +2173,37 @@ - /* - * ok, that's it - */ -- ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ if (le32_to_cpu(old_de->inode) != old_inode->i_ino || -+ old_de->name_len != old_dentry->d_name.len || -+ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || -+ (retval = ext3_delete_entry(handle, old_dir, -+ old_de, old_bh)) == -ENOENT) { -+ /* old_de could have moved from under us during htree split, so -+ * make sure that we are deleting the right entry. We might -+ * also be pointing to a stale entry in the unused part of -+ * old_bh so just checking inum and the name isn't enough. */ -+ struct buffer_head *old_bh2; -+ struct ext3_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3_delete_entry(handle, old_dir, -+ old_de2, old_bh2); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3_warning(old_dir->i_sb, "ext3_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } - - if (new_inode) { - new_inode->i_nlink--; - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1088,7 +2215,7 @@ - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } -Index: linux-2.4.29/fs/ext3/super.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/super.c 2005-04-07 18:59:19.000000000 +0300 -+++ linux-2.4.29/fs/ext3/super.c 2005-05-03 16:29:50.580498544 +0300 -@@ -712,6 +712,7 @@ - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO -@@ -722,6 +723,7 @@ - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); -+ - printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", - bdevname(sb->s_dev)); - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { -@@ -915,6 +917,7 @@ - return res; - } - -+ - struct super_block * ext3_read_super (struct super_block * sb, void * data, - int silent) - { -@@ -1094,6 +1097,9 @@ - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR -@@ -1845,6 +1851,7 @@ - unregister_filesystem(&ext3_fs_type); - } - -+EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -Index: linux-2.4.29/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs.h 2005-04-07 18:52:26.000000000 +0300 -+++ linux-2.4.29/include/linux/ext3_fs.h 2005-05-03 16:29:50.584497936 +0300 -@@ -40,6 +40,11 @@ - #define EXT3FS_VERSION "2.4-0.9.19" - - /* -+ * Always enable hashed directories -+ */ -+#define CONFIG_EXT3_INDEX -+ -+/* - * Debug code - */ - #ifdef EXT3FS_DEBUG -@@ -593,9 +598,46 @@ - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3_INDEX -+ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; - - #ifdef __KERNEL__ - /* -+ * Control parameters used by ext3_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* - * Describe an inode's exact location on disk and in memory - */ - struct ext3_iloc -@@ -605,6 +647,27 @@ - unsigned long block_group; - }; - -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ rb_root_t root; -+ rb_node_t *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ - /* - * Function prototypes - */ -@@ -632,11 +695,20 @@ - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -- struct ext3_dir_entry_2 *, struct buffer_head *, -- unsigned long); -+ struct ext3_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent); -+extern void ext3_htree_free_dir_info(struct dir_private_info *p); -+ - /* fsync.c */ - extern int ext3_sync_file (struct file *, struct dentry *, int); - -+/* hash.c */ -+extern int ext3fs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); -@@ -669,6 +741,8 @@ - /* namei.c */ - extern int ext3_orphan_add(handle_t *, struct inode *); - extern int ext3_orphan_del(handle_t *, struct inode *); -+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) -Index: linux-2.4.29/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs_sb.h 2005-04-07 18:54:55.000000000 +0300 -+++ linux-2.4.29/include/linux/ext3_fs_sb.h 2005-05-03 16:29:50.586497632 +0300 -@@ -62,6 +62,8 @@ - int s_inode_size; - int s_first_ino; - u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; - - /* Journaling */ - struct inode * s_journal_inode; -Index: linux-2.4.29/include/linux/ext3_jbd.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_jbd.h 2005-04-07 18:52:32.000000000 +0300 -+++ linux-2.4.29/include/linux/ext3_jbd.h 2005-05-03 16:29:50.587497480 +0300 -@@ -63,6 +63,8 @@ - - #define EXT3_RESERVE_TRANS_BLOCKS 12U - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, diff --git a/lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch b/lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch deleted file mode 100644 index e8ed192..0000000 --- a/lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch +++ /dev/null @@ -1,23 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/namei.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/fs/ext3/namei.c 2005-04-04 05:06:46.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/namei.c 2005-04-04 05:09:18.000000000 -0600 -@@ -926,8 +926,16 @@ - struct inode *dir = dentry->d_parent->d_inode; - - sb = dir->i_sb; -- if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err))) -- return NULL; -+ /* NFS may look up ".." - look at dx_root directory block */ -+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -+ if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) -+ return NULL; -+ } else { -+ frame = frames; -+ frame->bh = NULL; /* for dx_release() */ -+ frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ -+ dx_set_block(frame->at, 0); /* dx_root block is 0 */ -+ } - hash = hinfo.hash; - do { - block = dx_get_block(frame->at); diff --git a/lustre/kernel_patches/patches/ext3-htree-dot-2.6.patch b/lustre/kernel_patches/patches/ext3-htree-dot-2.6.patch deleted file mode 100644 index 9192112..0000000 --- a/lustre/kernel_patches/patches/ext3-htree-dot-2.6.patch +++ /dev/null @@ -1,23 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/namei.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/fs/ext3/namei.c 2005-04-04 05:06:46.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/namei.c 2005-04-04 05:09:18.000000000 -0600 -@@ -926,8 +926,16 @@ - struct inode *dir = dentry->d_parent->d_inode; - - sb = dir->i_sb; -- if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) -- return NULL; -+ /* NFS may look up ".." - look at dx_root directory block */ -+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -+ if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) -+ return NULL; -+ } else { -+ frame = frames; -+ frame->bh = NULL; /* for dx_release() */ -+ frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ -+ dx_set_block(frame->at, 0); /* dx_root block is 0 */ -+ } - hash = hinfo.hash; - do { - block = dx_get_block(frame->at); diff --git a/lustre/kernel_patches/patches/ext3-htree-path-ops.patch b/lustre/kernel_patches/patches/ext3-htree-path-ops.patch deleted file mode 100644 index 9a2edbd..0000000 --- a/lustre/kernel_patches/patches/ext3-htree-path-ops.patch +++ /dev/null @@ -1,894 +0,0 @@ -Index: iam-src/fs/ext3/namei.c -=================================================================== ---- iam-src.orig/fs/ext3/namei.c 2006-02-12 16:43:57.000000000 +0300 -+++ iam-src/fs/ext3/namei.c 2006-02-12 23:22:12.000000000 +0300 -@@ -83,22 +83,21 @@ static struct buffer_head *ext3_append(h - #define dxtrace(command) - #endif - --struct fake_dirent --{ -+struct fake_dirent { - __le32 inode; - __le16 rec_len; - u8 name_len; - u8 file_type; - }; - --struct dx_countlimit --{ -+struct dx_countlimit { - __le16 limit; - __le16 count; - }; - --struct dx_entry --{ -+struct dx_entry; /* incomplete type */ -+ -+struct dx_entry_compat { - __le32 hash; - __le32 block; - }; -@@ -109,8 +108,7 @@ struct dx_entry - * hash version mod 4 should never be 0. Sincerely, the paranoia department. - */ - --struct dx_root --{ -+struct dx_root { - struct fake_dirent dot; - char dot_name[4]; - struct fake_dirent dotdot; -@@ -124,13 +122,13 @@ struct dx_root - u8 unused_flags; - } - info; -- struct dx_entry entries[0]; -+ struct {} entries[0]; - }; - - struct dx_node - { - struct fake_dirent fake; -- struct dx_entry entries[0]; -+ struct {} entries[0]; - }; - - -@@ -147,38 +145,76 @@ struct dx_map_entry - u32 offs; - }; - -+struct dx_path; -+struct dx_param { -+ size_t dpo_key_size; -+ size_t dpo_ptr_size; -+ size_t dpo_node_gap; -+ size_t dpo_root_gap; -+ -+ u32 (*dpo_root_ptr)(struct dx_path *path); -+ int (*dpo_node_check)(struct dx_path *path, -+ struct dx_frame *frame, void *cookie); -+ int (*dpo_node_init)(struct dx_path *path, -+ struct buffer_head *bh, int root); -+}; -+ - /* - * Structure to keep track of a path drilled through htree. - */ - struct dx_path { -- struct inode *dp_object; -- struct dx_frame dp_frames[DX_MAX_TREE_HEIGHT]; -- struct dx_frame *dp_frame; -+ struct inode *dp_object; -+ struct dx_param *dp_param; -+ int dp_indirect; -+ struct dx_frame dp_frames[DX_MAX_TREE_HEIGHT]; -+ struct dx_frame *dp_frame; -+ void *dp_key_target; -+ void *dp_key; - }; - -+static u32 htree_root_ptr(struct dx_path *p); -+static int htree_node_check(struct dx_path *path, -+ struct dx_frame *frame, void *cookie); -+static int htree_node_init(struct dx_path *path, -+ struct buffer_head *bh, int root); -+ -+static struct dx_param htree_compat_param = { -+ .dpo_key_size = sizeof ((struct dx_map_entry *)NULL)->hash, -+ .dpo_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs, -+ .dpo_node_gap = offsetof(struct dx_node, entries), -+ .dpo_root_gap = offsetof(struct dx_root, entries), -+ -+ .dpo_root_ptr = htree_root_ptr, -+ .dpo_node_check = htree_node_check, -+ .dpo_node_init = htree_node_init -+}; -+ -+ - #ifdef CONFIG_EXT3_INDEX --static inline unsigned dx_get_block (struct dx_entry *entry); --static void dx_set_block (struct dx_entry *entry, unsigned value); --static inline unsigned dx_get_hash (struct dx_entry *entry); --static void dx_set_hash (struct dx_entry *entry, unsigned value); --static unsigned dx_get_count (struct dx_entry *entries); --static unsigned dx_get_limit (struct dx_entry *entries); --static void dx_set_count (struct dx_entry *entries, unsigned value); --static void dx_set_limit (struct dx_entry *entries, unsigned value); --static unsigned dx_root_limit (struct inode *dir, unsigned infosize); --static unsigned dx_node_limit (struct inode *dir); --static struct dx_frame *dx_probe(struct dentry *dentry, -- struct inode *dir, -- struct dx_hash_info *hinfo, -- struct dx_path *path, -- int *err); -+static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry); -+static void dx_set_block(struct dx_path *p, -+ struct dx_entry *entry, unsigned value); -+static inline void *dx_get_key(struct dx_path *p, -+ struct dx_entry *entry, void *key); -+static void dx_set_key(struct dx_path *p, struct dx_entry *entry, void *key); -+static unsigned dx_get_count(struct dx_entry *entries); -+static unsigned dx_get_limit(struct dx_entry *entries); -+static void dx_set_count(struct dx_entry *entries, unsigned value); -+static void dx_set_limit(struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit(struct dx_path *p); -+static unsigned dx_node_limit(struct dx_path *p); -+static int dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_path *path); - static int dx_make_map (struct ext3_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); - static void dx_sort_map(struct dx_map_entry *map, unsigned count); - static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, - struct dx_map_entry *offsets, int count); - static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); --static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static void dx_insert_block (struct dx_path *path, -+ struct dx_frame *frame, u32 hash, u32 block); - static int ext3_htree_next_block(struct inode *dir, __u32 hash, - struct dx_path *path, __u32 *start_hash); - static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -@@ -186,29 +222,65 @@ static struct buffer_head * ext3_dx_find - static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode); - -+static inline void dx_path_init(struct dx_path *path, struct inode *inode); -+static inline void dx_path_fini(struct dx_path *path); -+ -+ - /* - * Future: use high four bits of block for coalesce-on-delete flags - * Mask them off for now. - */ - --static inline unsigned dx_get_block (struct dx_entry *entry) -+static inline void *entry_off(struct dx_entry *entry, ptrdiff_t off) -+{ -+ return (void *)((char *)entry + off); -+} -+ -+static inline size_t dx_entry_size(struct dx_path *p) - { -- return le32_to_cpu(entry->block) & 0x00ffffff; -+ return p->dp_param->dpo_key_size + p->dp_param->dpo_ptr_size; - } - --static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+static inline struct dx_entry *dx_entry_shift(struct dx_path *p, -+ struct dx_entry *entry, int shift) - { -- entry->block = cpu_to_le32(value); -+ void *e = entry; -+ return e + shift * dx_entry_size(p); - } - --static inline unsigned dx_get_hash (struct dx_entry *entry) -+static inline ptrdiff_t dx_entry_diff(struct dx_path *p, -+ struct dx_entry *e1, struct dx_entry *e2) - { -- return le32_to_cpu(entry->hash); -+ ptrdiff_t diff; -+ -+ diff = (void *)e1 - (void *)e2; -+ assert(diff / dx_entry_size(p) * dx_entry_size(p) == diff); -+ return diff / dx_entry_size(p); -+} -+ -+static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry) -+{ -+ return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size)) -+ & 0x00ffffff; - } - --static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+static inline void dx_set_block(struct dx_path *p, -+ struct dx_entry *entry, unsigned value) - { -- entry->hash = cpu_to_le32(value); -+ *(u32*)entry_off(entry, p->dp_param->dpo_key_size) = cpu_to_le32(value); -+} -+ -+static inline void *dx_get_key(struct dx_path *p, -+ struct dx_entry *entry, void *key) -+{ -+ memcpy(key, entry, p->dp_param->dpo_key_size); -+ return key; -+} -+ -+static inline void dx_set_key(struct dx_path *p, -+ struct dx_entry *entry, void *key) -+{ -+ memcpy(entry, key, p->dp_param->dpo_key_size); - } - - static inline unsigned dx_get_count (struct dx_entry *entries) -@@ -231,17 +303,123 @@ static inline void dx_set_limit (struct - ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); - } - --static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+static inline unsigned dx_root_limit(struct dx_path *p) - { -- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -- EXT3_DIR_REC_LEN(2) - infosize; -- return 0? 20: entry_space / sizeof(struct dx_entry); -+ struct dx_param *param = p->dp_param; -+ unsigned entry_space = p->dp_object->i_sb->s_blocksize - -+ param->dpo_root_gap; -+ return entry_space / (param->dpo_key_size + param->dpo_ptr_size); -+} -+ -+static inline unsigned dx_node_limit(struct dx_path *p) -+{ -+ struct dx_param *param = p->dp_param; -+ unsigned entry_space = p->dp_object->i_sb->s_blocksize - -+ param->dpo_node_gap; -+ return entry_space / (param->dpo_key_size + param->dpo_ptr_size); -+} -+ -+static inline int dx_index_is_compat(struct dx_path *path) -+{ -+ return path->dp_param == &htree_compat_param; -+} -+ -+static struct dx_entry *dx_get_entries(struct dx_path *path, void *data, -+ int root) -+{ -+ return data + -+ (root ? -+ path->dp_param->dpo_root_gap : path->dp_param->dpo_node_gap); -+} -+ -+static struct dx_entry *dx_node_get_entries(struct dx_path *path, -+ struct dx_frame *frame) -+{ -+ return dx_get_entries(path, -+ frame->bh->b_data, frame == path->dp_frames); -+} -+ -+static u32 htree_root_ptr(struct dx_path *path) -+{ -+ return 0; -+} -+ -+struct htree_cookie { -+ struct dx_hash_info *hinfo; -+ struct dentry *dentry; -+}; -+ -+static int htree_node_check(struct dx_path *path, struct dx_frame *frame, -+ void *cookie) -+{ -+ void *data; -+ struct dx_entry *entries; -+ struct super_block *sb; -+ -+ data = frame->bh->b_data; -+ entries = dx_node_get_entries(path, frame); -+ sb = path->dp_object->i_sb; -+ if (frame == path->dp_frames) { -+ /* root node */ -+ struct dx_root *root; -+ struct htree_cookie *hc = cookie; -+ -+ root = data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_R5 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ return ERR_BAD_DX_DIR; -+ } -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ return ERR_BAD_DX_DIR; -+ } -+ -+ path->dp_indirect = root->info.indirect_levels; -+ if (path->dp_indirect > DX_MAX_TREE_HEIGHT - 1) { -+ ext3_warning(sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ return ERR_BAD_DX_DIR; -+ } -+ -+ assert((char *)entries == (((char *)&root->info) + -+ root->info.info_length)); -+ assert(dx_get_limit(entries) == dx_root_limit(path)); -+ -+ hc->hinfo->hash_version = root->info.hash_version; -+ hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed; -+ if (hc->dentry) -+ ext3fs_dirhash(hc->dentry->d_name.name, -+ hc->dentry->d_name.len, hc->hinfo); -+ path->dp_key_target = &hc->hinfo->hash; -+ } else { -+ /* non-root index */ -+ assert(entries == data + path->dp_param->dpo_node_gap); -+ assert(dx_get_limit(entries) == dx_node_limit(path)); -+ } -+ frame->entries = frame->at = entries; -+ return 0; - } - --static inline unsigned dx_node_limit (struct inode *dir) -+static int htree_node_init(struct dx_path *path, -+ struct buffer_head *bh, int root) - { -- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -- return 0? 22: entry_space / sizeof(struct dx_entry); -+ struct dx_node *node; -+ -+ assert(!root); -+ -+ node = (void *)bh->b_data; -+ node->fake.rec_len = cpu_to_le16(path->dp_object->i_sb->s_blocksize); -+ node->fake.inode = 0; -+ return 0; - } - - /* -@@ -327,123 +505,101 @@ struct stats dx_show_entries(struct dx_h - } - #endif /* DX_DEBUG */ - --/* -- * Probe for a directory leaf block to search. -- * -- * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -- * error in the directory index, and the caller should fall back to -- * searching the directory normally. The callers of dx_probe **MUST** -- * check for this error code, and make sure it never gets reflected -- * back to userspace. -- */ --static struct dx_frame * --dx_probe(struct dentry *dentry, struct inode *dir, -- struct dx_hash_info *hinfo, struct dx_path *path, int *err) --{ -- unsigned count, indirect; -- struct dx_entry *at, *entries, *p, *q, *m; -- struct dx_root *root; -- struct buffer_head *bh; -- struct dx_frame *frame = path->dp_frames; -- u32 hash; -+static int dx_lookup(struct dx_path *path, void *cookie) -+{ -+ u32 ptr; -+ int err; -+ int i; - -- frame->bh = NULL; -- if (dentry) -- dir = dentry->d_parent->d_inode; -- if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -- goto fail; -- root = (struct dx_root *) bh->b_data; -- if (root->info.hash_version != DX_HASH_TEA && -- root->info.hash_version != DX_HASH_HALF_MD4 && -- root->info.hash_version != DX_HASH_R5 && -- root->info.hash_version != DX_HASH_LEGACY) { -- ext3_warning(dir->i_sb, __FUNCTION__, -- "Unrecognised inode hash code %d", root->info.hash_version); -- brelse(bh); -- *err = ERR_BAD_DX_DIR; -- goto fail; -- } -- hinfo->hash_version = root->info.hash_version; -- hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; -- if (dentry) -- ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -- hash = hinfo->hash; -- -- if (root->info.unused_flags & 1) { -- ext3_warning(dir->i_sb, __FUNCTION__, -- "Unimplemented inode hash flags: %#06x", -- root->info.unused_flags); -- brelse(bh); -- *err = ERR_BAD_DX_DIR; -- goto fail; -- } -+ struct dx_param *param; -+ struct dx_frame *frame; - -- if ((indirect = root->info.indirect_levels) > DX_MAX_TREE_HEIGHT - 1) { -- ext3_warning(dir->i_sb, __FUNCTION__, -- "Unimplemented inode hash depth: %#06x", -- root->info.indirect_levels); -- brelse(bh); -- *err = ERR_BAD_DX_DIR; -- goto fail; -- } -+ param = path->dp_param; - -- entries = (struct dx_entry *) (((char *)&root->info) + -- root->info.info_length); -- assert(dx_get_limit(entries) == dx_root_limit(dir, -- root->info.info_length)); -- dxtrace (printk("Look up %x", hash)); -- while (1) -- { -+ for (frame = path->dp_frames, i = 0, -+ ptr = param->dpo_root_ptr(path); i <= path->dp_indirect; -+ ptr = dx_get_block(path, frame->at), ++frame, ++i) { -+ struct dx_entry *entries; -+ struct dx_entry *p; -+ struct dx_entry *q; -+ struct dx_entry *m; -+ unsigned count; -+ -+ frame->bh = ext3_bread(NULL, path->dp_object, ptr, 0, &err); -+ if (frame->bh == NULL) { -+ err = -EIO; -+ break; -+ } -+ err = param->dpo_node_check(path, frame, cookie); -+ if (err != 0) -+ break; -+ -+ entries = frame->entries; - count = dx_get_count(entries); -- assert (count && count <= dx_get_limit(entries)); -- p = entries + 1; -- q = entries + count - 1; -- while (p <= q) -- { -- m = p + (q - p)/2; -+ assert(count && count <= dx_get_limit(entries)); -+ p = dx_entry_shift(path, entries, 1); -+ q = dx_entry_shift(path, entries, count - 1); -+ while (p <= q) { -+ m = dx_entry_shift(path, -+ p, dx_entry_diff(path, q, p) / 2); - dxtrace(printk(".")); -- if (dx_get_hash(m) > hash) -- q = m - 1; -+ if (memcmp(dx_get_key(path, m, path->dp_key), -+ path->dp_key_target, -+ param->dpo_key_size) > 0) -+ q = dx_entry_shift(path, m, -1); - else -- p = m + 1; -+ p = dx_entry_shift(path, m, +1); - } - -- if (0) // linear search cross check -- { -+ frame->at = dx_entry_shift(path, p, -1); -+ if (1) { // linear search cross check - unsigned n = count - 1; -+ struct dx_entry *at; -+ - at = entries; -- while (n--) -- { -+ while (n--) { - dxtrace(printk(",")); -- if (dx_get_hash(++at) > hash) -- { -- at--; -+ at = dx_entry_shift(path, at, +1); -+ if (memcmp(dx_get_key(path, at, path->dp_key), -+ path->dp_key_target, -+ param->dpo_key_size) > 0) { -+ at = dx_entry_shift(path, at, -1); - break; - } - } -- assert (at == p - 1); -+ assert(at == frame->at); - } -- -- at = p - 1; -- dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -- frame->bh = bh; -- frame->entries = entries; -- frame->at = at; -- if (!indirect--) -- return path->dp_frame = frame; -- if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -- goto fail2; -- at = entries = ((struct dx_node *) bh->b_data)->entries; -- assert (dx_get_limit(entries) == dx_node_limit (dir)); -- frame++; -- } --fail2: -- while (frame >= path->dp_frames) { -- brelse(frame->bh); -- frame--; - } --fail: -- return NULL; -+ if (err != 0) -+ dx_path_fini(path); -+ path->dp_frame = --frame; -+ return err; -+} -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static int dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_path *path) -+{ -+ int err; -+ __u32 hash_storage; -+ struct htree_cookie hc = { -+ .dentry = dentry, -+ .hinfo = hinfo -+ }; -+ -+ assert(dx_index_is_compat(path)); -+ path->dp_key = &hash_storage; -+ err = dx_lookup(path, &hc); -+ assert(err != 0 || path->dp_frames[path->dp_indirect].bh != NULL); -+ return err; - } - - static inline void dx_path_init(struct dx_path *path, struct inode *inode) -@@ -458,8 +614,10 @@ static inline void dx_path_fini(struct d - int i; - - for (i = 0; i < ARRAY_SIZE(path->dp_frames); i--) { -- if (path->dp_frames[i].bh != NULL) -+ if (path->dp_frames[i].bh != NULL) { - brelse(path->dp_frames[i].bh); -+ path->dp_frames[i].bh = NULL; -+ } - } - } - -@@ -488,6 +646,8 @@ static int ext3_htree_next_block(struct - int err, num_frames = 0; - __u32 bhash; - -+ assert(dx_index_is_compat(path)); -+ - p = path->dp_frame; - /* - * Find the next leaf page by incrementing the frame pointer. -@@ -497,7 +657,9 @@ static int ext3_htree_next_block(struct - * nodes need to be read. - */ - while (1) { -- if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ p->at = dx_entry_shift(path, p->at, +1); -+ if (p->at < dx_entry_shift(path, p->entries, -+ dx_get_count(p->entries))) - break; - if (p == path->dp_frames) - return 0; -@@ -512,7 +674,7 @@ static int ext3_htree_next_block(struct - * desired contiuation hash. If it doesn't, return since - * there's no point to read in the successive index pages. - */ -- bhash = dx_get_hash(p->at); -+ dx_get_key(path, p->at, &bhash); - if (start_hash) - *start_hash = bhash; - if ((hash & 1) == 0) { -@@ -524,12 +686,13 @@ static int ext3_htree_next_block(struct - * block so no check is necessary - */ - while (num_frames--) { -- if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), 0, &err))) -+ if (!(bh = ext3_bread(NULL, dir, -+ dx_get_block(path, p->at), 0, &err))) - return err; /* Failure */ - ++p; - brelse (p->bh); - p->bh = bh; -- p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ p->at = p->entries = dx_node_get_entries(path, p); - } - return 1; - } -@@ -609,6 +772,7 @@ int ext3_htree_fill_tree(struct file *di - start_minor_hash)); - dir = dir_file->f_dentry->d_inode; - dx_path_init(&path, dir); -+ path.dp_param = &htree_compat_param; - if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { - hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; - hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; -@@ -619,7 +783,8 @@ int ext3_htree_fill_tree(struct file *di - } - hinfo.hash = start_hash; - hinfo.minor_hash = 0; -- if (!dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path, &err)) -+ err = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path); -+ if (err != 0) - return err; - - /* Add '.' and '..' from the htree header */ -@@ -634,7 +799,7 @@ int ext3_htree_fill_tree(struct file *di - } - - while (1) { -- block = dx_get_block(path.dp_frame->at); -+ block = dx_get_block(&path, path.dp_frame->at); - ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, - start_hash, start_minor_hash); - if (ret < 0) { -@@ -722,17 +887,19 @@ static void dx_sort_map (struct dx_map_e - } while(more); - } - --static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+static void dx_insert_block(struct dx_path *path, -+ struct dx_frame *frame, u32 hash, u32 block) - { - struct dx_entry *entries = frame->entries; -- struct dx_entry *old = frame->at, *new = old + 1; -+ struct dx_entry *old = frame->at, *new = dx_entry_shift(path, old, +1); - int count = dx_get_count(entries); - - assert(count < dx_get_limit(entries)); -- assert(old < entries + count); -- memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -- dx_set_hash(new, hash); -- dx_set_block(new, block); -+ assert(old < dx_entry_shift(path, entries, count)); -+ memmove(dx_entry_shift(path, new, 1), new, -+ (char *)dx_entry_shift(path, entries, count) - (char *)new); -+ dx_set_key(path, new, &hash); -+ dx_set_block(path, new, block); - dx_set_count(entries, count + 1); - } - #endif -@@ -934,7 +1101,9 @@ static struct buffer_head * ext3_dx_find - struct dx_hash_info hinfo; - u32 hash; - struct dx_path path; -- struct dx_entry dummy_dot; -+ struct dx_entry_compat dummy_dot = { -+ .block = 0 -+ }; - struct ext3_dir_entry_2 *de, *top; - struct buffer_head *bh; - unsigned long block; -@@ -944,19 +1113,21 @@ static struct buffer_head * ext3_dx_find - struct inode *dir = dentry->d_parent->d_inode; - - dx_path_init(&path, dir); -+ path.dp_param = &htree_compat_param; -+ - sb = dir->i_sb; - /* NFS may look up ".." - look at dx_root directory block */ - if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -- if (!(dx_probe(dentry, NULL, &hinfo, &path, err))) -+ *err = dx_probe(dentry, NULL, &hinfo, &path); -+ if (*err != 0) - return NULL; - } else { -- path.dp_frame->bh = NULL; /* for dx_path_fini() */ -- path.dp_frame->at = &dummy_dot; /* hack for zero entry*/ -- dx_set_block(path.dp_frame->at, 0); /* dx_root block is 0 */ -+ path.dp_frame->bh = NULL; /* for dx_path_fini() */ -+ path.dp_frame->at = (void *)&dummy_dot; /* hack for zero entry*/ - } - hash = hinfo.hash; - do { -- block = dx_get_block(path.dp_frame->at); -+ block = dx_get_block(&path, path.dp_frame->at); - if (!(bh = ext3_bread (NULL,dir, block, 0, err))) - goto errout; - de = (struct ext3_dir_entry_2 *) bh->b_data; -@@ -1115,10 +1286,11 @@ static struct ext3_dir_entry_2* dx_pack_ - - /* Allocate new node, and split leaf node @bh into it, inserting new pointer - * into parent node identified by @frame */ --static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct dx_path *path, - struct buffer_head **bh,struct dx_frame *frame, - struct dx_hash_info *hinfo, int *error) - { -+ struct inode *dir = path->dp_object; - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; - struct buffer_head *bh2; -@@ -1180,7 +1352,7 @@ static struct ext3_dir_entry_2 *do_split - swap(*bh, bh2); - de = de2; - } -- dx_insert_block (frame, hash2 + continued, newblock); -+ dx_insert_block(path, frame, hash2 + continued, newblock); - err = ext3_journal_dirty_metadata (handle, bh2); - if (err) - goto journal_error; -@@ -1315,6 +1487,7 @@ static int make_indexed_dir(handle_t *ha - struct fake_dirent *fde; - - dx_path_init(&path, dir); -+ path.dp_param = &htree_compat_param; - blocksize = dir->i_sb->s_blocksize; - dxtrace(printk("Creating index\n")); - retval = ext3_journal_get_write_access(handle, bh); -@@ -1350,10 +1523,10 @@ static int make_indexed_dir(handle_t *ha - root->info.info_length = sizeof(root->info); - root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; - root->info.hash_version = DX_HASH_R5; -- entries = root->entries; -- dx_set_block (entries, 1); -+ entries = (void *)root->entries; -+ dx_set_block (&path, entries, 1); - dx_set_count (entries, 1); -- dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ dx_set_limit (entries, dx_root_limit(&path)); - - /* Initialize as for dx_probe */ - hinfo.hash_version = root->info.hash_version; -@@ -1363,7 +1536,7 @@ static int make_indexed_dir(handle_t *ha - path.dp_frame->at = entries; - path.dp_frame->bh = bh; - bh = bh2; -- de = do_split(handle,dir, &bh, path.dp_frame, &hinfo, &retval); -+ de = do_split(handle, &path, &bh, path.dp_frame, &hinfo, &retval); - dx_path_fini(&path); - if (!de) - return retval; -@@ -1446,8 +1619,8 @@ static int ext3_dx_add_entry(handle_t *h - struct inode *inode) - { - struct dx_path path; -+ struct dx_param *param; - struct dx_frame *frame, *safe; -- struct dx_node *node2; - struct dx_entry *entries; /* old block contents */ - struct dx_entry *entries2; /* new block contents */ - struct dx_hash_info hinfo; -@@ -1463,7 +1636,10 @@ static int ext3_dx_add_entry(handle_t *h - size_t isize; - - dx_path_init(&path, dir); -- if (!dx_probe(dentry, NULL, &hinfo, &path, &err)) -+ param = path.dp_param = &htree_compat_param; -+ -+ err = dx_probe(dentry, NULL, &hinfo, &path); -+ if (err != 0) - return err; - frame = path.dp_frame; - entries = frame->entries; -@@ -1471,7 +1647,8 @@ static int ext3_dx_add_entry(handle_t *h - /* XXX nikita: global serialization! */ - isize = dir->i_size; - -- if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ if (!(bh = ext3_bread(handle, dir, -+ dx_get_block(&path, frame->at), 0, &err))) - goto cleanup; - - BUFFER_TRACE(bh, "get_write_access"); -@@ -1519,12 +1696,9 @@ static int ext3_dx_add_entry(handle_t *h - * transaction... */ - for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) { - bh_new[i] = ext3_append (handle, dir, &newblock[i], &err); -- if (!bh_new[i]) -+ if (!bh_new[i] || -+ param->dpo_node_init(&path, bh_new[i], 0) != 0) - goto cleanup; -- node2 = (struct dx_node *)(bh_new[i]->b_data); -- entries2 = node2->entries; -- node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -- node2->fake.inode = 0; - BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, frame->bh); - if (err) -@@ -1545,11 +1719,10 @@ static int ext3_dx_add_entry(handle_t *h - - entries = frame->entries; - count = dx_get_count(entries); -- idx = frame->at - entries; -+ idx = dx_entry_diff(&path, frame->at, entries); - - bh2 = bh_new[i]; -- node2 = (struct dx_node *)(bh2->b_data); -- entries2 = node2->entries; -+ entries2 = dx_get_entries(&path, bh2->b_data, 0); - - if (frame == path.dp_frames) { - /* splitting root node. Tricky point: -@@ -1571,19 +1744,19 @@ static int ext3_dx_add_entry(handle_t *h - indirects = root->info.indirect_levels; - dxtrace(printk("Creating new root %d\n", indirects)); - memcpy((char *) entries2, (char *) entries, -- count * sizeof(struct dx_entry)); -- dx_set_limit(entries2, dx_node_limit(dir)); -+ count * dx_entry_size(&path)); -+ dx_set_limit(entries2, dx_node_limit(&path)); - - /* Set up root */ - dx_set_count(entries, 1); -- dx_set_block(entries + 0, newblock[i]); -+ dx_set_block(&path, entries, newblock[i]); - root->info.indirect_levels = indirects + 1; - - /* Shift frames in the path */ - memmove(frames + 2, frames + 1, - (sizeof path.dp_frames) - 2 * sizeof frames[0]); - /* Add new access path frame */ -- frames[1].at = entries2 + idx; -+ frames[1].at = dx_entry_shift(&path, entries2, idx); - frames[1].entries = entries = entries2; - frames[1].bh = bh2; - ++ frame; -@@ -1594,23 +1767,30 @@ static int ext3_dx_add_entry(handle_t *h - } else { - /* splitting non-root index node. */ - unsigned count1 = count/2, count2 = count - count1; -- unsigned hash2 = dx_get_hash(entries + count1); -+ unsigned hash2; -+ -+ dx_get_key(&path, -+ dx_entry_shift(&path, entries, count1), -+ &hash2); -+ - dxtrace(printk("Split index %i/%i\n", count1, count2)); - -- memcpy ((char *) entries2, (char *) (entries + count1), -- count2 * sizeof(struct dx_entry)); -+ memcpy ((char *) entries2, -+ (char *) dx_entry_shift(&path, entries, count1), -+ count2 * dx_entry_size(&path)); - dx_set_count (entries, count1); - dx_set_count (entries2, count2); -- dx_set_limit (entries2, dx_node_limit(dir)); -+ dx_set_limit (entries2, dx_node_limit(&path)); - - /* Which index block gets the new entry? */ - if (idx >= count1) { -- frame->at = entries2 + idx - count1; -+ frame->at = dx_entry_shift(&path, entries2, -+ idx - count1); - frame->entries = entries = entries2; - swap(frame->bh, bh2); - bh_new[i] = bh2; - } -- dx_insert_block (frame - 1, hash2, newblock[i]); -+ dx_insert_block(&path, frame - 1, hash2, newblock[i]); - dxtrace(dx_show_index ("node", frame->entries)); - dxtrace(dx_show_index ("node", - ((struct dx_node *) bh2->b_data)->entries)); -@@ -1619,7 +1799,7 @@ static int ext3_dx_add_entry(handle_t *h - goto journal_error; - } - } -- de = do_split(handle, dir, &bh, --frame, &hinfo, &err); -+ de = do_split(handle, &path, &bh, --frame, &hinfo, &err); - if (!de) - goto cleanup; - err = add_dirent_to_buf(handle, dentry, inode, de, bh); diff --git a/lustre/kernel_patches/patches/ext3-ialloc-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-ialloc-2.4.21-suse2.patch deleted file mode 100644 index acd404e..0000000 --- a/lustre/kernel_patches/patches/ext3-ialloc-2.4.21-suse2.patch +++ /dev/null @@ -1,237 +0,0 @@ -Index: linux-2.4.21-suse2/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/ialloc.c 2005-08-04 09:14:23.000000000 -0600 -+++ linux-2.4.21-suse2/fs/ext3/ialloc.c 2005-08-04 09:17:49.000000000 -0600 -@@ -328,19 +328,140 @@ - * directories already is chosen. - * - * For other inodes, search forward from the parent directory's block -- * group to find a free inode. -+ * group to find a free inode in a group with some free blocks. - */ -+static int find_group_dir(struct super_block *sb, const struct inode *parent, -+ struct ext3_group_desc **best_desc, -+ struct buffer_head **best_bh) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int ngroups = sbi->s_groups_count; -+ int avefreei; -+ struct ext3_group_desc *desc; -+ struct buffer_head *bh; -+ int group, best_group = -1, ndir_best = 999999999; -+ -+ *best_desc = NULL; -+ *best_bh = NULL; -+ -+ avefreei = le32_to_cpu(sbi->s_es->s_free_inodes_count) / ngroups; -+ -+ for (group = 0; group < ngroups; group++) { -+ desc = ext3_get_group_desc(sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) -+ continue; -+ if (le16_to_cpu(desc->bg_used_dirs_count) > ndir_best) -+ continue; -+ if (!*best_desc || -+ (le16_to_cpu(desc->bg_free_blocks_count) > -+ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { -+ *best_bh = bh; -+ *best_desc = desc; -+ best_group = group; -+ ndir_best = le16_to_cpu(desc->bg_used_dirs_count); -+ } -+ } -+ -+ return best_group; -+} -+ -+static int find_group_other(struct super_block *sb, const struct inode *parent, -+ struct ext3_group_desc **best_desc, -+ struct buffer_head **best_bh) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int parent_group = EXT3_I(parent)->i_block_group; -+ int ngroups = sbi->s_groups_count; -+ int avefreeb; -+ struct ext3_group_desc *desc; -+ struct buffer_head *bh; -+ int group, i, best_group = -1; -+ -+ *best_desc = NULL; -+ *best_bh = NULL; -+ -+ /* -+ * Try to place the inode in its parent directory -+ */ -+ group = parent_group; -+ desc = ext3_get_group_desc (sb, group, &bh); -+ if (desc && le16_to_cpu(desc->bg_free_inodes_count) && -+ le16_to_cpu(desc->bg_free_blocks_count)) { -+ *best_desc = desc; -+ *best_bh = bh; -+ return group; -+ } -+ -+ /* -+ * We're going to place this inode in a different blockgroup from its -+ * parent. We want to cause files in a common directory to all land in -+ * the same blockgroup if it has space. But we want files which are -+ * in a different directory which shares a blockgroup with our parent -+ * to land in a different blockgroup. -+ * -+ * So add our directory's i_ino into the starting point for the hash. -+ */ -+ group = (group + parent->i_ino) % ngroups; -+ -+ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / -+ sbi->s_groups_count / ngroups; -+ -+ /* -+ * Use a quadratic hash to find a group with a free inode and some free -+ * blocks. -+ */ -+ for (i = 1; i < ngroups; i <<= 1) { -+ group += i; -+ if (group >= ngroups) -+ group -= ngroups; -+ desc = ext3_get_group_desc(sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_free_blocks_count) > avefreeb) { -+ *best_bh = bh; -+ *best_desc = desc; -+ return group; -+ } -+ } -+ -+ /* -+ * That failed: try linear search for a group with free inodes and -+ * preferrably free blocks, returning as soon as we find a good one. -+ */ -+ group = sbi->s_last_group; -+ for (i = 0; i < ngroups; i++) { -+ if (++group >= ngroups) -+ group = 0; -+ desc = ext3_get_group_desc(sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (!*best_desc || -+ (le16_to_cpu(desc->bg_free_blocks_count) > -+ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { -+ *best_bh = bh; -+ *best_desc = desc; -+ best_group = group; -+ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) -+ break; -+ } -+ } -+ sbi->s_last_group = best_group; -+ -+ return best_group; -+} -+ - struct inode * ext3_new_inode(handle_t *handle, const struct inode * dir, - int mode, unsigned long goal) - { - struct super_block * sb; - struct buffer_head * bh; - struct buffer_head * bh2; -- int i, j, avefreei; -+ int i, j; - struct inode * inode; - int bitmap_nr; - struct ext3_group_desc * gdp; -- struct ext3_group_desc * tmp; - struct ext3_super_block * es; - struct ext3_iloc iloc; - int err = 0; -@@ -392,72 +513,10 @@ - } - - repeat: -- gdp = NULL; -- i = 0; -- -- if (S_ISDIR(mode)) { -- avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sb->u.ext3_sb.s_groups_count; -- if (!gdp) { -- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -- struct buffer_head *temp_buffer; -- tmp = ext3_get_group_desc (sb, j, &temp_buffer); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count) && -- le16_to_cpu(tmp->bg_free_inodes_count) >= -- avefreei) { -- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > -- le16_to_cpu(gdp->bg_free_blocks_count))) { -- i = j; -- gdp = tmp; -- bh2 = temp_buffer; -- } -- } -- } -- } -- } else { -- /* -- * Try to place the inode in its parent directory -- */ -- i = dir->u.ext3_i.i_block_group; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) -- gdp = tmp; -- else -- { -- /* -- * Use a quadratic hash to find a group with a -- * free inode -- */ -- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -- i += j; -- if (i >= sb->u.ext3_sb.s_groups_count) -- i -= sb->u.ext3_sb.s_groups_count; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- if (!gdp) { -- /* -- * That failed: try linear search for a free inode -- */ -- i = dir->u.ext3_i.i_block_group + 1; -- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -- if (++i >= sb->u.ext3_sb.s_groups_count) -- i = 0; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- } -+ if (S_ISDIR(mode)) -+ i = find_group_dir(sb, dir, &gdp, &bh2); -+ else -+ i = find_group_other(sb, dir, &gdp, &bh2); - - err = -ENOSPC; - if (!gdp) -Index: linux-2.4.21-suse2/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.21-suse2.orig/include/linux/ext3_fs_sb.h 2005-08-04 09:14:21.000000000 -0600 -+++ linux-2.4.21-suse2/include/linux/ext3_fs_sb.h 2005-08-04 09:19:32.000000000 -0600 -@@ -45,6 +45,7 @@ - unsigned long s_gdb_count; /* Number of group descriptor blocks */ - unsigned long s_desc_per_block; /* Number of group descriptors per block */ - unsigned long s_groups_count; /* Number of groups in the fs */ -+ unsigned long s_last_group; /* Last group used for inode allocation */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */ - struct buffer_head ** s_group_desc; diff --git a/lustre/kernel_patches/patches/ext3-ialloc-2.4.24.patch b/lustre/kernel_patches/patches/ext3-ialloc-2.4.24.patch deleted file mode 100644 index 83e25fa..0000000 --- a/lustre/kernel_patches/patches/ext3-ialloc-2.4.24.patch +++ /dev/null @@ -1,238 +0,0 @@ -Index: lum/fs/ext3/ialloc.c -=================================================================== ---- lum.orig/fs/ext3/ialloc.c 2004-08-26 13:14:35.000000000 -0600 -+++ lum/fs/ext3/ialloc.c 2004-08-31 15:00:35.000000000 -0600 -@@ -327,8 +327,130 @@ int ext3_itable_block_used(struct super_ - * directories already is chosen. - * - * For other inodes, search forward from the parent directory's block -- * group to find a free inode. -+ * group to find a free inode in a group with some free blocks. - */ -+static int find_group_dir(struct super_block *sb, const struct inode *parent, -+ struct ext3_group_desc **best_desc, -+ struct buffer_head **best_bh) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int ngroups = sbi->s_groups_count; -+ int avefreei; -+ struct ext3_group_desc *desc; -+ struct buffer_head *bh; -+ int group, best_group = -1, ndir_best = 999999999; -+ -+ *best_desc = NULL; -+ *best_bh = NULL; -+ -+ avefreei = le32_to_cpu(sbi->s_es->s_free_inodes_count) / ngroups; -+ -+ for (group = 0; group < ngroups; group++) { -+ desc = ext3_get_group_desc(sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) -+ continue; -+ if (le16_to_cpu(desc->bg_used_dirs_count) > ndir_best) -+ continue; -+ if (!*best_desc || -+ (le16_to_cpu(desc->bg_free_blocks_count) > -+ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { -+ *best_bh = bh; -+ *best_desc = desc; -+ best_group = group; -+ ndir_best = le16_to_cpu(desc->bg_used_dirs_count); -+ } -+ } -+ -+ return best_group; -+} -+ -+static int find_group_other(struct super_block *sb, const struct inode *parent, -+ struct ext3_group_desc **best_desc, -+ struct buffer_head **best_bh) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int parent_group = EXT3_I(parent)->i_block_group; -+ int ngroups = sbi->s_groups_count; -+ int avefreeb; -+ struct ext3_group_desc *desc; -+ struct buffer_head *bh; -+ int group, i, best_group = -1; -+ -+ *best_desc = NULL; -+ *best_bh = NULL; -+ -+ /* -+ * Try to place the inode in its parent directory -+ */ -+ group = parent_group; -+ desc = ext3_get_group_desc (sb, group, &bh); -+ if (desc && le16_to_cpu(desc->bg_free_inodes_count) && -+ le16_to_cpu(desc->bg_free_blocks_count)) { -+ *best_desc = desc; -+ *best_bh = bh; -+ return group; -+ } -+ -+ /* -+ * We're going to place this inode in a different blockgroup from its -+ * parent. We want to cause files in a common directory to all land in -+ * the same blockgroup if it has space. But we want files which are -+ * in a different directory which shares a blockgroup with our parent -+ * to land in a different blockgroup. -+ * -+ * So add our directory's i_ino into the starting point for the hash. -+ */ -+ group = (group + parent->i_ino) % ngroups; -+ -+ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / -+ sbi->s_groups_count / ngroups; -+ -+ /* -+ * Use a quadratic hash to find a group with a free inode and some free -+ * blocks. -+ */ -+ for (i = 1; i < ngroups; i <<= 1) { -+ group += i; -+ if (group >= ngroups) -+ group -= ngroups; -+ desc = ext3_get_group_desc(sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (le16_to_cpu(desc->bg_free_blocks_count) > avefreeb) { -+ *best_bh = bh; -+ *best_desc = desc; -+ return group; -+ } -+ } -+ -+ /* -+ * That failed: try linear search for a group with free inodes and -+ * preferrably free blocks, returning as soon as we find a good one. -+ */ -+ group = sbi->s_last_group; -+ for (i = 0; i < ngroups; i++) { -+ if (++group >= ngroups) -+ group = 0; -+ desc = ext3_get_group_desc(sb, group, &bh); -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (!*best_desc || -+ (le16_to_cpu(desc->bg_free_blocks_count) > -+ le16_to_cpu((*best_desc)->bg_free_blocks_count))) { -+ *best_bh = bh; -+ *best_desc = desc; -+ best_group = group; -+ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) -+ break; -+ } -+ } -+ sbi->s_last_group = best_group; -+ -+ return best_group; -+} -+ - struct inode * ext3_new_inode (handle_t *handle, - const struct inode * dir, int mode, - unsigned long goal) -@@ -336,11 +459,10 @@ struct inode * ext3_new_inode (handle_t - struct super_block * sb; - struct buffer_head * bh; - struct buffer_head * bh2; -- int i, j, avefreei; -+ int i, j; - struct inode * inode; - int bitmap_nr; - struct ext3_group_desc * gdp; -- struct ext3_group_desc * tmp; - struct ext3_super_block * es; - struct ext3_iloc iloc; - int err = 0; -@@ -392,72 +514,10 @@ struct inode * ext3_new_inode (handle_t - } - - repeat: -- gdp = NULL; -- i = 0; -- -- if (S_ISDIR(mode)) { -- avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sb->u.ext3_sb.s_groups_count; -- if (!gdp) { -- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -- struct buffer_head *temp_buffer; -- tmp = ext3_get_group_desc (sb, j, &temp_buffer); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count) && -- le16_to_cpu(tmp->bg_free_inodes_count) >= -- avefreei) { -- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > -- le16_to_cpu(gdp->bg_free_blocks_count))) { -- i = j; -- gdp = tmp; -- bh2 = temp_buffer; -- } -- } -- } -- } -- } else { -- /* -- * Try to place the inode in its parent directory -- */ -- i = dir->u.ext3_i.i_block_group; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) -- gdp = tmp; -- else -- { -- /* -- * Use a quadratic hash to find a group with a -- * free inode -- */ -- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -- i += j; -- if (i >= sb->u.ext3_sb.s_groups_count) -- i -= sb->u.ext3_sb.s_groups_count; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- if (!gdp) { -- /* -- * That failed: try linear search for a free inode -- */ -- i = dir->u.ext3_i.i_block_group + 1; -- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -- if (++i >= sb->u.ext3_sb.s_groups_count) -- i = 0; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- } -+ if (S_ISDIR(mode)) -+ i = find_group_dir(sb, dir, &gdp, &bh2); -+ else -+ i = find_group_other(sb, dir, &gdp, &bh2); - - err = -ENOSPC; - if (!gdp) -Index: lum/include/linux/ext3_fs_sb.h -=================================================================== ---- lum.orig/include/linux/ext3_fs_sb.h 2004-08-26 13:28:53.000000000 -0600 -+++ lum/include/linux/ext3_fs_sb.h 2004-08-31 11:04:27.000000000 -0600 -@@ -45,6 +45,7 @@ struct ext3_sb_info { - unsigned long s_gdb_count; /* Number of group descriptor blocks */ - unsigned long s_desc_per_block; /* Number of group descriptors per block */ - unsigned long s_groups_count; /* Number of groups in the fs */ -+ unsigned long s_last_group; /* Last group used for inode allocation */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */ - struct buffer_head ** s_group_desc; diff --git a/lustre/kernel_patches/patches/ext3-ialloc-2.6.patch b/lustre/kernel_patches/patches/ext3-ialloc-2.6.patch deleted file mode 100644 index 15d37a9..0000000 --- a/lustre/kernel_patches/patches/ext3-ialloc-2.6.patch +++ /dev/null @@ -1,128 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/fs/ext3/ialloc.c 2005-05-16 14:10:54.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/ialloc.c 2005-05-16 14:18:29.000000000 -0600 -@@ -352,13 +352,17 @@ - return -1; - } - --static int find_group_other(struct super_block *sb, struct inode *parent) -+static int find_group_other(struct super_block *sb, struct inode *parent, -+ int mode) - { - int parent_group = EXT3_I(parent)->i_block_group; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - int ngroups = EXT3_SB(sb)->s_groups_count; - struct ext3_group_desc *desc; - struct buffer_head *bh; - int group, i; -+ int best_group = -1; -+ int avefreeb, freeb, best_group_freeb = 0; - - /* - * Try to place the inode in its parent directory -@@ -366,9 +370,9 @@ - group = parent_group; - desc = ext3_get_group_desc (sb, group, &bh); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && -- le16_to_cpu(desc->bg_free_blocks_count)) -+ (!S_ISREG(mode) || le16_to_cpu(desc->bg_free_blocks_count))) - return group; -- -+ avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ngroups; - /* - * We're going to place this inode in a different blockgroup from its - * parent. We want to cause files in a common directory to all land in -@@ -381,33 +385,47 @@ - group = (group + parent->i_ino) % ngroups; - - /* -- * Use a quadratic hash to find a group with a free inode and some free -- * blocks. -+ * Use a quadratic hash to find a group with a free inode and -+ * average number of free blocks. - */ - for (i = 1; i < ngroups; i <<= 1) { - group += i; - if (group >= ngroups) - group -= ngroups; - desc = ext3_get_group_desc (sb, group, &bh); -- if (desc && le16_to_cpu(desc->bg_free_inodes_count) && -- le16_to_cpu(desc->bg_free_blocks_count)) -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ if (!S_ISREG(mode)) -+ return group; -+ if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) - return group; - } - - /* -- * That failed: try linear search for a free inode, even if that group -- * has no free blocks. -+ * That failed: start from last group used to allocate inode -+ * try linear search for a free inode and prefereably -+ * free blocks. - */ -- group = parent_group; -+ group = sbi->s_last_alloc_group; -+ if (group == -1) -+ group = parent_group; -+ - for (i = 0; i < ngroups; i++) { - if (++group >= ngroups) - group = 0; - desc = ext3_get_group_desc (sb, group, &bh); -- if (desc && le16_to_cpu(desc->bg_free_inodes_count)) -- return group; -+ if (!desc || !desc->bg_free_inodes_count) -+ continue; -+ freeb = le16_to_cpu(desc->bg_free_blocks_count); -+ if (freeb > best_group_freeb) { -+ best_group_freeb = freeb; -+ best_group = group; -+ if (freeb >= avefreeb || !S_ISREG(mode)) -+ break; -+ } - } -- -- return -1; -+ sbi->s_last_alloc_group = best_group; -+ return best_group; - } - - /* -@@ -454,7 +472,7 @@ - else - group = find_group_orlov(sb, dir); - } else -- group = find_group_other(sb, dir); -+ group = find_group_other(sb, dir, mode); - - err = -ENOSPC; - if (group == -1) -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/super.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/fs/ext3/super.c 2005-05-16 14:10:54.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/super.c 2005-05-16 14:17:14.000000000 -0600 -@@ -1297,6 +1297,7 @@ - percpu_counter_init(&sbi->s_dirs_counter); - bgl_lock_init(&sbi->s_blockgroup_lock); - -+ sbi->s_last_alloc_group = -1; - for (i = 0; i < db_count; i++) { - block = descriptor_loc(sb, logic_sb_block, i); - sbi->s_group_desc[i] = sb_bread(sb, block); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/ext3_fs_sb.h 2005-05-16 14:10:54.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/ext3_fs_sb.h 2005-05-16 14:17:14.000000000 -0600 -@@ -59,6 +59,8 @@ - struct percpu_counter s_freeinodes_counter; - struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; -+ /* Last group used to allocate inode */ -+ int s_last_alloc_group; - - /* root of the per fs reservation window tree */ - spinlock_t s_rsv_window_lock; diff --git a/lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.patch b/lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.patch deleted file mode 100644 index 52e5521..0000000 --- a/lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.patch +++ /dev/null @@ -1,20 +0,0 @@ -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2005-02-25 14:53:56.424908168 +0200 -+++ linux-stage/include/linux/ext3_fs.h 2005-02-25 14:53:59.376459464 +0200 -@@ -361,12 +361,13 @@ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ --#ifndef _LINUX_EXT2_FS_H -+#ifndef clear_opt - #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt - #define set_opt(o, opt) o |= EXT3_MOUNT_##opt - #define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ - EXT3_MOUNT_##opt) --#else -+#endif -+#ifndef EXT2_MOUNT_NOLOAD - #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD - #define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT - #define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS diff --git a/lustre/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch deleted file mode 100644 index 1ac944b..0000000 --- a/lustre/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch +++ /dev/null @@ -1,20 +0,0 @@ -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2004-04-02 16:43:37.000000000 -0500 -+++ linux-stage/include/linux/ext3_fs.h 2004-04-02 16:43:37.000000000 -0500 -@@ -331,12 +331,13 @@ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ --#ifndef _LINUX_EXT2_FS_H -+#ifndef clear_opt - #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt - #define set_opt(o, opt) o |= EXT3_MOUNT_##opt - #define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ - EXT3_MOUNT_##opt) --#else -+#endif -+#ifndef EXT2_MOUNT_NOLOAD - #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD - #define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT - #define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS diff --git a/lustre/kernel_patches/patches/ext3-ino_sb_macro-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-ino_sb_macro-2.4.21-chaos.patch deleted file mode 100644 index fb739a5..0000000 --- a/lustre/kernel_patches/patches/ext3-ino_sb_macro-2.4.21-chaos.patch +++ /dev/null @@ -1,1514 +0,0 @@ -Index: linux-2.4.21-chaos/fs/ext3/balloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/balloc.c 2003-07-15 02:09:35.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/balloc.c 2003-12-12 12:50:34.000000000 +0300 -@@ -46,18 +46,18 @@ - unsigned long desc; - struct ext3_group_desc * gdp; - -- if (block_group >= sb->u.ext3_sb.s_groups_count) { -+ if (block_group >= EXT3_SB(sb)->s_groups_count) { - ext3_error (sb, "ext3_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - - return NULL; - } - - group_desc = block_group / EXT3_DESC_PER_BLOCK(sb); - desc = block_group % EXT3_DESC_PER_BLOCK(sb); -- if (!sb->u.ext3_sb.s_group_desc[group_desc]) { -+ if (!EXT3_SB(sb)->s_group_desc[group_desc]) { - ext3_error (sb, "ext3_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", -@@ -66,9 +66,9 @@ - } - - gdp = (struct ext3_group_desc *) -- sb->u.ext3_sb.s_group_desc[group_desc]->b_data; -+ EXT3_SB(sb)->s_group_desc[group_desc]->b_data; - if (bh) -- *bh = sb->u.ext3_sb.s_group_desc[group_desc]; -+ *bh = EXT3_SB(sb)->s_group_desc[group_desc]; - return gdp + desc; - } - -@@ -104,8 +104,8 @@ - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_block_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_block_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -128,16 +128,17 @@ - int i, j, retval = 0; - unsigned long block_bitmap_number; - struct buffer_head * block_bitmap; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - -- if (block_group >= sb->u.ext3_sb.s_groups_count) -+ if (block_group >= sbi->s_groups_count) - ext3_panic (sb, "load_block_bitmap", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - -- if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) { -- if (sb->u.ext3_sb.s_block_bitmap[block_group]) { -- if (sb->u.ext3_sb.s_block_bitmap_number[block_group] == -+ if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) { -+ if (sbi->s_block_bitmap[block_group]) { -+ if (sbi->s_block_bitmap_number[block_group] == - block_group) - return block_group; - ext3_error (sb, "__load_block_bitmap", -@@ -149,21 +150,20 @@ - return block_group; - } - -- for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++) -+ for (i = 0; i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] != block_group; i++) - ; -- if (i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) { -- block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i]; -- block_bitmap = sb->u.ext3_sb.s_block_bitmap[i]; -+ if (i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] == block_group) { -+ block_bitmap_number = sbi->s_block_bitmap_number[i]; -+ block_bitmap = sbi->s_block_bitmap[i]; - for (j = i; j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } -- sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number; -- sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap; -+ sbi->s_block_bitmap_number[0] = block_bitmap_number; -+ sbi->s_block_bitmap[0] = block_bitmap; - - /* - * There's still one special case here --- if block_bitmap == 0 -@@ -173,17 +173,14 @@ - if (!block_bitmap) - retval = read_block_bitmap (sb, block_group, 0); - } else { -- if (sb->u.ext3_sb.s_loaded_block_bitmapsu.ext3_sb.s_loaded_block_bitmaps++; -+ if (sbi->s_loaded_block_bitmapss_loaded_block_bitmaps++; - else -- brelse (sb->u.ext3_sb.s_block_bitmap -- [EXT3_MAX_GROUP_LOADED - 1]); -- for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1; -- j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ brelse(sbi->s_block_bitmap[EXT3_MAX_GROUP_LOADED - 1]); -+ for (j = sbi->s_loaded_block_bitmaps - 1; j > 0; j--) { -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } - retval = read_block_bitmap (sb, block_group, 0); - } -@@ -206,24 +203,25 @@ - static inline int load_block_bitmap (struct super_block * sb, - unsigned int block_group) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - int slot; -- -+ - /* - * Do the lookup for the slot. First of all, check if we're asking - * for the same slot as last time, and did we succeed that last time? - */ -- if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 && -- sb->u.ext3_sb.s_block_bitmap_number[0] == block_group && -- sb->u.ext3_sb.s_block_bitmap[0]) { -+ if (sbi->s_loaded_block_bitmaps > 0 && -+ sbi->s_block_bitmap_number[0] == block_group && -+ sbi->s_block_bitmap[0]) { - return 0; - } - /* - * Or can we do a fast lookup based on a loaded group on a filesystem - * small enough to be mapped directly into the superblock? - */ -- else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && -- sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group -- && sb->u.ext3_sb.s_block_bitmap[block_group]) { -+ else if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED && -+ sbi->s_block_bitmap_number[block_group] == block_group -+ && sbi->s_block_bitmap[block_group]) { - slot = block_group; - } - /* -@@ -243,7 +241,7 @@ - * If it's a valid slot, we may still have cached a previous IO error, - * in which case the bh in the superblock cache will be zero. - */ -- if (!sb->u.ext3_sb.s_block_bitmap[slot]) -+ if (!sbi->s_block_bitmap[slot]) - return -EIO; - - /* -@@ -275,7 +273,7 @@ - return; - } - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - (block + count) > le32_to_cpu(es->s_blocks_count)) { -@@ -305,7 +303,7 @@ - if (bitmap_nr < 0) - goto error_return; - -- bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bitmap_bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - gdp = ext3_get_group_desc (sb, block_group, &gd_bh); - if (!gdp) - goto error_return; -@@ -330,8 +328,8 @@ - if (err) - goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto error_return; - -@@ -409,8 +407,8 @@ - if (!err) err = ret; - - /* And the superblock */ -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock"); -- ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock"); -+ ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!err) err = ret; - - if (overflow && !err) { -@@ -563,12 +561,12 @@ - } - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (le32_to_cpu(es->s_free_blocks_count) <= - le32_to_cpu(es->s_r_blocks_count) && -- ((sb->u.ext3_sb.s_resuid != current->fsuid) && -- (sb->u.ext3_sb.s_resgid == 0 || -- !in_group_p (sb->u.ext3_sb.s_resgid)) && -+ ((EXT3_SB(sb)->s_resuid != current->fsuid) && -+ (EXT3_SB(sb)->s_resgid == 0 || -+ !in_group_p (EXT3_SB(sb)->s_resgid)) && - !capable(CAP_SYS_RESOURCE))) - goto out; - -@@ -598,7 +596,7 @@ - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - - ext3_debug ("goal is at %d:%d.\n", i, j); - -@@ -621,9 +619,9 @@ - * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. - */ -- for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) { -+ for (k = 0; k < EXT3_SB(sb)->s_groups_count; k++) { - i++; -- if (i >= sb->u.ext3_sb.s_groups_count) -+ if (i >= EXT3_SB(sb)->s_groups_count) - i = 0; - gdp = ext3_get_group_desc (sb, i, &bh2); - if (!gdp) { -@@ -635,7 +633,7 @@ - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - j = find_next_usable_block(-1, bh, - EXT3_BLOCKS_PER_GROUP(sb)); - if (j >= 0) -@@ -673,8 +671,8 @@ - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto out; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto out; - - tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb) -@@ -807,7 +805,7 @@ - if (!fatal) fatal = err; - - BUFFER_TRACE(bh, "journal_dirty_metadata for superblock"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - - sb->s_dirt = 1; -@@ -845,11 +843,11 @@ - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -858,7 +856,7 @@ - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr], -+ x = ext3_count_free (EXT3_SB(sb)->s_block_bitmap[bitmap_nr], - sb->s_blocksize); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); -@@ -869,7 +867,7 @@ - unlock_super (sb); - return bitmap_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count); -+ return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count); - #endif - } - -@@ -878,7 +876,7 @@ - unsigned char * map) - { - return ext3_test_bit ((block - -- le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) % -+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb), map); - } - -@@ -946,11 +944,11 @@ - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -984,7 +982,7 @@ - "Inode bitmap for group %d is marked free", - i); - -- for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++) -+ for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++) - if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j, - sb, bh->b_data)) - ext3_error (sb, "ext3_check_blocks_bitmap", -Index: linux-2.4.21-chaos/fs/ext3/dir.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/dir.c 2003-12-12 11:36:13.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/dir.c 2003-12-12 12:50:34.000000000 +0300 -@@ -67,7 +67,7 @@ - else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) - error_msg = "directory entry across blocks"; - else if (le32_to_cpu(de->inode) > -- le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) - error_msg = "inode out of bounds"; - - if (error_msg != NULL) -Index: linux-2.4.21-chaos/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/ialloc.c 2003-12-12 12:53:11.000000000 +0300 -@@ -75,8 +75,8 @@ - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_inode_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_inode_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -228,7 +228,7 @@ - clear_inode (inode); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_free_inode", - "reserved or nonexistent inode %lu", ino); -@@ -240,7 +240,7 @@ - if (bitmap_nr < 0) - goto error_return; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; - - BUFFER_TRACE(bh, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, bh); -@@ -258,8 +258,8 @@ - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto error_return; - - if (gdp) { -@@ -274,9 +274,9 @@ - if (!fatal) fatal = err; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, - "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - } - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -@@ -307,6 +307,8 @@ - int i, j, avefreei; - struct inode * inode; - int bitmap_nr; -+ struct ext3_inode_info *ei; -+ struct ext3_sb_info *sbi; - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -@@ -320,19 +322,21 @@ - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ sbi = EXT3_SB(sb); -+ ei = EXT3_I(inode); -+ init_rwsem(&ei->truncate_sem); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = sbi->s_es; - repeat: - gdp = NULL; - i = 0; - - if (S_ISDIR(mode)) { - avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sb->u.ext3_sb.s_groups_count; -+ sbi->s_groups_count; - if (!gdp) { -- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -+ for (j = 0; j < sbi->s_groups_count; j++) { - struct buffer_head *temp_buffer; - tmp = ext3_get_group_desc (sb, j, &temp_buffer); - if (tmp && -@@ -352,7 +356,7 @@ - /* - * Try to place the inode in its parent directory - */ -- i = dir->u.ext3_i.i_block_group; -+ i = EXT3_I(dir)->i_block_group; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) - gdp = tmp; -@@ -362,10 +366,10 @@ - * Use a quadratic hash to find a group with a - * free inode - */ -- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -+ for (j = 1; j < sbi->s_groups_count; j <<= 1) { - i += j; -- if (i >= sb->u.ext3_sb.s_groups_count) -- i -= sb->u.ext3_sb.s_groups_count; -+ if (i >= sbi->s_groups_count) -+ i -= sbi->s_groups_count; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && - le16_to_cpu(tmp->bg_free_inodes_count)) { -@@ -378,9 +382,9 @@ - /* - * That failed: try linear search for a free inode - */ -- i = dir->u.ext3_i.i_block_group + 1; -- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -- if (++i >= sb->u.ext3_sb.s_groups_count) -+ i = EXT3_I(dir)->i_block_group + 1; -+ for (j = 2; j < sbi->s_groups_count; j++) { -+ if (++i >= sbi->s_groups_count) - i = 0; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && -@@ -401,11 +405,11 @@ - if (bitmap_nr < 0) - goto fail; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = sbi->s_inode_bitmap[bitmap_nr]; - - if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, -- EXT3_INODES_PER_GROUP(sb))) < -- EXT3_INODES_PER_GROUP(sb)) { -+ sbi->s_inodes_per_group)) < -+ sbi->s_inodes_per_group) { - BUFFER_TRACE(bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) goto fail; -@@ -459,13 +463,13 @@ - err = ext3_journal_dirty_metadata(handle, bh2); - if (err) goto fail; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, sbi->s_sbh); - if (err) goto fail; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - sb->s_dirt = 1; - if (err) goto fail; - -@@ -485,35 +489,35 @@ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; - if (S_ISLNK(mode)) -- inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); -+ ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = 0; -- inode->u.ext3_i.i_frag_no = 0; -- inode->u.ext3_i.i_frag_size = 0; -+ ei->i_faddr = 0; -+ ei->i_frag_no = 0; -+ ei->i_frag_size = 0; - #endif -- inode->u.ext3_i.i_file_acl = 0; -- inode->u.ext3_i.i_dir_acl = 0; -- inode->u.ext3_i.i_dtime = 0; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_file_acl = 0; -+ ei->i_dir_acl = 0; -+ ei->i_dtime = 0; -+ INIT_LIST_HEAD(&ei->i_orphan); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = i; -+ ei->i_block_group = i; - - ext3_set_inode_flags(inode); - if (IS_SYNC(inode)) - handle->h_sync = 1; - insert_inode_hash(inode); -- inode->i_generation = sb->u.ext3_sb.s_next_generation++; -+ inode->i_generation = sbi->s_next_generation++; - -- inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ ei->i_state = EXT3_STATE_NEW; - err = ext3_mark_inode_dirty(handle, inode); - if (err) goto fail; - - #ifdef CONFIG_EXT3_FS_XATTR -- init_rwsem(&inode->u.ext3_i.xattr_sem); -+ init_rwsem(&EXT3_I(inode)->xattr_sem); - #endif - - unlock_super (sb); -@@ -600,19 +604,19 @@ - - unsigned long ext3_count_free_inodes (struct super_block * sb) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_super_block *es = sbi->s_es; - #ifdef EXT3FS_DEBUG -- struct ext3_super_block * es; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -621,8 +625,8 @@ - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -- EXT3_INODES_PER_GROUP(sb) / 8); -+ x = ext3_count_free(sbi->s_inode_bitmap[bitmap_nr], -+ sbi->s_inodes_per_group / 8); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); - bitmap_count += x; -@@ -632,7 +636,7 @@ - unlock_super (sb); - return desc_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count); -+ return le32_to_cpu(es->s_free_inodes_count); - #endif - } - -@@ -641,16 +645,18 @@ - void ext3_check_inodes_bitmap (struct super_block * sb) - { - struct ext3_super_block * es; -+ struct ext3_sb_info *sbi; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ sbi = EXT3_SB(sb); -+ es = sbi->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -659,7 +665,7 @@ - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -+ x = ext3_count_free (sbi->s_inode_bitmap[bitmap_nr], - EXT3_INODES_PER_GROUP(sb) / 8); - if (le16_to_cpu(gdp->bg_free_inodes_count) != x) - ext3_error (sb, "ext3_check_inodes_bitmap", -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2003-12-05 07:55:47.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2003-12-12 12:55:41.000000000 +0300 -@@ -47,7 +47,7 @@ - */ - static inline int ext3_inode_is_fast_symlink(struct inode *inode) - { -- int ea_blocks = inode->u.ext3_i.i_file_acl ? -+ int ea_blocks = EXT3_I(inode)->i_file_acl ? - (inode->i_sb->s_blocksize >> 9) : 0; - - return (S_ISLNK(inode->i_mode) && -@@ -224,7 +224,7 @@ - * (Well, we could do this if we need to, but heck - it works) - */ - ext3_orphan_del(handle, inode); -- inode->u.ext3_i.i_dtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = CURRENT_TIME; - - /* - * One subtle ordering requirement: if anything has gone wrong -@@ -248,13 +248,14 @@ - void ext3_discard_prealloc (struct inode * inode) - { - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); - lock_kernel(); - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count) { -- unsigned short total = inode->u.ext3_i.i_prealloc_count; -- unsigned long block = inode->u.ext3_i.i_prealloc_block; -- inode->u.ext3_i.i_prealloc_count = 0; -- inode->u.ext3_i.i_prealloc_block = 0; -+ if (ei->i_prealloc_count) { -+ unsigned short total = ei->i_prealloc_count; -+ unsigned long block = ei->i_prealloc_block; -+ ei->i_prealloc_count = 0; -+ ei->i_prealloc_block = 0; - /* Writer: end */ - ext3_free_blocks (inode, block, total); - } -@@ -271,13 +272,15 @@ - unsigned long result; - - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count && -- (goal == inode->u.ext3_i.i_prealloc_block || -- goal + 1 == inode->u.ext3_i.i_prealloc_block)) -+ if (ei->i_prealloc_count && -+ (goal == ei->i_prealloc_block || -+ goal + 1 == ei->i_prealloc_block)) - { -- result = inode->u.ext3_i.i_prealloc_block++; -- inode->u.ext3_i.i_prealloc_count--; -+ result = ei->i_prealloc_block++; -+ ei->i_prealloc_count--; - /* Writer: end */ - ext3_debug ("preallocation hit (%lu/%lu).\n", - ++alloc_hits, ++alloc_attempts); -@@ -287,8 +290,8 @@ - alloc_hits, ++alloc_attempts); - if (S_ISREG(inode->i_mode)) - result = ext3_new_block (inode, goal, -- &inode->u.ext3_i.i_prealloc_count, -- &inode->u.ext3_i.i_prealloc_block, err); -+ &ei->i_prealloc_count, -+ &ei->i_prealloc_block, err); - else - result = ext3_new_block (inode, goal, 0, 0, err); - /* -@@ -422,7 +425,7 @@ - - *err = 0; - /* i_data is not going away, no lock needed */ -- add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets); -+ add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { -@@ -466,7 +469,8 @@ - - static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind) - { -- u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data; - u32 *p; - - /* Try to find previous block */ -@@ -482,9 +486,8 @@ - * It is going to be refered from inode itself? OK, just put it into - * the same cylinder group then. - */ -- return (inode->u.ext3_i.i_block_group * -- EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -- le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block); -+ return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); - } - - /** -@@ -503,14 +506,15 @@ - static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], - Indirect *partial, unsigned long *goal) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - /* Writer: ->i_next_alloc* */ -- if (block == inode->u.ext3_i.i_next_alloc_block + 1) { -- inode->u.ext3_i.i_next_alloc_block++; -- inode->u.ext3_i.i_next_alloc_goal++; -+ if (block == ei->i_next_alloc_block + 1) { -+ ei->i_next_alloc_block++; -+ ei->i_next_alloc_goal++; - } - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - /* Reader: pointers, ->i_next_alloc* */ -@@ -519,8 +523,8 @@ - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ -- if (block == inode->u.ext3_i.i_next_alloc_block) -- *goal = inode->u.ext3_i.i_next_alloc_goal; -+ if (block == ei->i_next_alloc_block) -+ *goal = ei->i_next_alloc_goal; - if (!*goal) - *goal = ext3_find_near(inode, partial); - #ifdef SEARCH_FROM_ZERO -@@ -646,6 +650,7 @@ - { - int i; - int err = 0; -+ struct ext3_inode_info *ei = EXT3_I(inode); - - /* - * If we're splicing into a [td]indirect block (as opposed to the -@@ -668,11 +673,11 @@ - /* That's it */ - - *where->p = where->key; -- inode->u.ext3_i.i_next_alloc_block = block; -- inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key); -+ ei->i_next_alloc_block = block; -+ ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - -@@ -756,6 +761,7 @@ - unsigned long goal; - int left; - int depth = ext3_block_to_path(inode, iblock, offsets); -+ struct ext3_inode_info *ei = EXT3_I(inode); - loff_t new_size; - - J_ASSERT(handle != NULL || create == 0); -@@ -809,7 +815,7 @@ - /* - * Block out ext3_truncate while we alter the tree - */ -- down_read(&inode->u.ext3_i.truncate_sem); -+ down_read(&ei->truncate_sem); - err = ext3_alloc_branch(handle, inode, left, goal, - offsets+(partial-chain), partial); - -@@ -821,7 +827,7 @@ - if (!err) - err = ext3_splice_branch(handle, inode, iblock, chain, - partial, left); -- up_read(&inode->u.ext3_i.truncate_sem); -+ up_read(&ei->truncate_sem); - if (err == -EAGAIN) - goto changed; - if (err) -@@ -981,7 +987,7 @@ - struct buffer_head *tmp_bh; - - for (i = 1; -- inode->u.ext3_i.i_prealloc_count && -+ EXT3_I(inode)->i_prealloc_count && - i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks; - i++) { - /* -@@ -1199,8 +1205,8 @@ - kunmap(page); - } - } -- if (inode->i_size > inode->u.ext3_i.i_disksize) { -- inode->u.ext3_i.i_disksize = inode->i_size; -+ if (inode->i_size > EXT3_I(inode)->i_disksize) { -+ EXT3_I(inode)->i_disksize = inode->i_size; - ret2 = ext3_mark_inode_dirty(handle, inode); - if (!ret) - ret = ret2; -@@ -2011,7 +2017,8 @@ - void ext3_truncate(struct inode * inode) - { - handle_t *handle; -- u32 *i_data = inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *i_data = EXT3_I(inode)->i_data; - int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); - int offsets[4]; - Indirect chain[4]; -@@ -2072,13 +2079,13 @@ - * on-disk inode. We do this via i_disksize, which is the value which - * ext3 *really* writes onto the disk inode. - */ -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - - /* - * From here we block out all ext3_get_block() callers who want to - * modify the block allocation tree. - */ -- down_write(&inode->u.ext3_i.truncate_sem); -+ down_write(&ei->truncate_sem); - - if (n == 1) { /* direct blocks */ - ext3_free_data(handle, inode, NULL, i_data+offsets[0], -@@ -2142,7 +2149,7 @@ - case EXT3_TIND_BLOCK: - ; - } -- up_write(&inode->u.ext3_i.truncate_sem); -+ up_write(&ei->truncate_sem); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - -@@ -2179,6 +2186,8 @@ - - int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) - { -+ struct super_block *sb = inode->i_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - struct buffer_head *bh = 0; - unsigned long block; - unsigned long block_group; -@@ -2189,25 +2198,21 @@ - - if ((inode->i_ino != EXT3_ROOT_INO && - inode->i_ino != EXT3_JOURNAL_INO && -- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || -- inode->i_ino > le32_to_cpu( -- inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "bad inode number: %lu", inode->i_ino); -+ inode->i_ino < EXT3_FIRST_INO(sb)) || -+ inode->i_ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { -+ ext3_error (sb, __FUNCTION__, "bad inode #%lu", inode->i_ino); - goto bad_inode; - } -- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); -- if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "group >= groups count"); -+ block_group = (inode->i_ino - 1) / sbi->s_inodes_per_group; -+ if (block_group >= sbi->s_groups_count) { -+ ext3_error(sb, __FUNCTION__, "group >= groups count"); - goto bad_inode; - } -- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); -- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; -+ group_desc = block_group >> sbi->s_desc_per_block_bits; -+ desc = block_group & (sbi->s_desc_per_block - 1); -+ bh = sbi->s_group_desc[group_desc]; - if (!bh) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "Descriptor not loaded"); -+ ext3_error(sb, __FUNCTION__, "Descriptor not loaded"); - goto bad_inode; - } - -@@ -2215,17 +2220,17 @@ - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) * -+ sbi->s_inode_size; - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -+ (offset >> EXT3_BLOCK_SIZE_BITS(sb)); -+ if (!(bh = sb_bread(sb, block))) { -+ ext3_error (sb, __FUNCTION__, - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ offset &= (EXT3_BLOCK_SIZE(sb) - 1); - - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -@@ -2239,7 +2244,7 @@ - - void ext3_set_inode_flags(struct inode *inode) - { -- unsigned int flags = inode->u.ext3_i.i_flags; -+ unsigned int flags = EXT3_I(inode)->i_flags; - - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME); - if (flags & EXT3_SYNC_FL) -@@ -2257,6 +2262,7 @@ - { - struct ext3_iloc iloc; - struct ext3_inode *raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh; - int block; - -@@ -2264,7 +2270,7 @@ - goto bad_inode; - bh = iloc.bh; - raw_inode = iloc.raw_inode; -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ init_rwsem(&ei->truncate_sem); - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); -@@ -2277,7 +2283,7 @@ - inode->i_atime = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime = le32_to_cpu(raw_inode->i_mtime); -- inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime); -+ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); - /* We now have enough fields to check if the inode was active or not. - * This is needed because nfsd might try to access dead inodes - * the test is that same one that e2fsck uses -@@ -2285,7 +2291,7 @@ - */ - if (inode->i_nlink == 0) { - if (inode->i_mode == 0 || -- !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) { -+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { - /* this inode is deleted */ - brelse (bh); - goto bad_inode; -@@ -2300,33 +2306,33 @@ - * size */ - inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); - inode->i_version = ++event; -- inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags); -+ ei->i_flags = le32_to_cpu(raw_inode->i_flags); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr); -- inode->u.ext3_i.i_frag_no = raw_inode->i_frag; -- inode->u.ext3_i.i_frag_size = raw_inode->i_fsize; -+ ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); -+ ei->i_frag_no = raw_inode->i_frag; -+ ei->i_frag_size = raw_inode->i_fsize; - #endif -- inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl); -+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); -+ ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); - } else { - inode->i_size |= - ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - inode->i_generation = le32_to_cpu(raw_inode->i_generation); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = iloc.block_group; -+ ei->i_block_group = iloc.block_group; - - /* - * NOTE! The in-memory inode i_data array is in little-endian order - * even on big-endian machines: we do NOT byteswap the block numbers! - */ - for (block = 0; block < EXT3_N_BLOCKS; block++) -- inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_data[block] = iloc.raw_inode->i_block[block]; -+ INIT_LIST_HEAD(&ei->i_orphan); - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -2350,15 +2356,15 @@ - brelse (iloc.bh); - ext3_set_inode_flags(inode); - #ifdef CONFIG_EXT3_FS_XATTR -- init_rwsem(&inode->u.ext3_i.xattr_sem); -+ init_rwsem(&ei->xattr_sem); - #endif - #ifdef CONFIG_EXT3_FS_POSIX_ACL -- if (inode->u.ext3_i.i_file_acl) { -+ if (ei->i_file_acl) { - /* The filesystem is mounted with ACL support, and there - are extended attributes for this inode. However we do - not yet know whether there are actually any ACLs. */ -- inode->u.ext3_i.i_acl = EXT3_ACL_NOT_CACHED; -- inode->u.ext3_i.i_default_acl = EXT3_ACL_NOT_CACHED; -+ ei->i_acl = EXT3_ACL_NOT_CACHED; -+ ei->i_default_acl = EXT3_ACL_NOT_CACHED; - } - #endif - -@@ -2380,6 +2386,7 @@ - struct ext3_iloc *iloc) - { - struct ext3_inode *raw_inode = iloc->raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh = iloc->bh; - int err = 0, rc, block; - -@@ -2397,7 +2404,7 @@ - * Fix up interoperability with old kernels. Otherwise, old inodes get - * re-used with the upper 16 bits of the uid/gid intact - */ -- if(!inode->u.ext3_i.i_dtime) { -+ if(!ei->i_dtime) { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); - raw_inode->i_gid_high = -@@ -2415,34 +2422,33 @@ - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); -- raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize); -+ raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime); - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); -- raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime); -- raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags); -+ raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); -+ raw_inode->i_flags = cpu_to_le32(ei->i_flags); - #ifdef EXT3_FRAGMENTS -- raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr); -- raw_inode->i_frag = inode->u.ext3_i.i_frag_no; -- raw_inode->i_fsize = inode->u.ext3_i.i_frag_size; -+ raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); -+ raw_inode->i_frag = ei->i_frag_no; -+ raw_inode->i_fsize = ei->i_frag_size; - #else - /* If we are not tracking these fields in the in-memory inode, - * then preserve them on disk, but still initialise them to zero - * for new inodes. */ -- if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { -+ if (ei->i_state & EXT3_STATE_NEW) { - raw_inode->i_faddr = 0; - raw_inode->i_frag = 0; - raw_inode->i_fsize = 0; - } - #endif -- raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl); -+ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl); -+ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); - } else { -- raw_inode->i_size_high = -- cpu_to_le32(inode->u.ext3_i.i_disksize >> 32); -- if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) { -+ raw_inode->i_size_high = cpu_to_le32(ei->i_disksize >> 32); -+ if (ei->i_disksize > MAX_NON_LFS) { - struct super_block *sb = inode->i_sb; - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || -@@ -2452,7 +2458,7 @@ - * created, add a flag to the superblock. - */ - err = ext3_journal_get_write_access(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext3_update_dynamic_rev(sb); -@@ -2461,7 +2467,7 @@ - sb->s_dirt = 1; - handle->h_sync = 1; - err = ext3_journal_dirty_metadata(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - } - } - } -@@ -2470,13 +2476,13 @@ - raw_inode->i_block[0] = - cpu_to_le32(kdev_t_to_nr(inode->i_rdev)); - else for (block = 0; block < EXT3_N_BLOCKS; block++) -- raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; -+ raw_inode->i_block[block] = ei->i_data[block]; - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) - err = rc; -- EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW; -+ ei->i_state &= ~EXT3_STATE_NEW; - - out_brelse: - brelse (bh); -@@ -2581,7 +2587,7 @@ - } - - error = ext3_orphan_add(handle, inode); -- inode->u.ext3_i.i_disksize = attr->ia_size; -+ EXT3_I(inode)->i_disksize = attr->ia_size; - rc = ext3_mark_inode_dirty(handle, inode); - if (!error) - error = rc; -@@ -2843,9 +2849,9 @@ - */ - - if (val) -- inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; - else -- inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; - - journal_unlock_updates(journal); - -Index: linux-2.4.21-chaos/fs/ext3/ioctl.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ioctl.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/ioctl.c 2003-12-12 12:50:34.000000000 +0300 -@@ -18,13 +18,14 @@ - int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - unsigned int flags; - - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { - case EXT3_IOC_GETFLAGS: -- flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE; -+ flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); - case EXT3_IOC_SETFLAGS: { - handle_t *handle = NULL; -@@ -42,7 +43,7 @@ - if (get_user(flags, (int *) arg)) - return -EFAULT; - -- oldflags = inode->u.ext3_i.i_flags; -+ oldflags = ei->i_flags; - - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT3_JOURNAL_DATA_FL; -@@ -79,7 +80,7 @@ - - flags = flags & EXT3_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE; -- inode->u.ext3_i.i_flags = flags; -+ ei->i_flags = flags; - - ext3_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME; -@@ -138,12 +139,12 @@ - int ret = 0; - - set_current_state(TASK_INTERRUPTIBLE); -- add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -- if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) { -+ add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); -+ if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) { - schedule(); - ret = 1; - } -- remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -+ remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); - return ret; - } - #endif -Index: linux-2.4.21-chaos/fs/ext3/namei.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/namei.c 2003-12-12 11:36:13.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/namei.c 2003-12-12 12:56:27.000000000 +0300 -@@ -343,7 +343,7 @@ - goto fail; - } - hinfo->hash_version = root->info.hash_version; -- hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; - if (dentry) - ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); - hash = hinfo->hash; -@@ -1236,7 +1236,7 @@ - de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); - memset (&root->info, 0, sizeof(root->info)); - root->info.info_length = sizeof(root->info); -- root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; - entries = root->entries; - dx_set_block (entries, 1); - dx_set_count (entries, 1); -@@ -1244,7 +1244,7 @@ - - /* Initialize as for dx_probe */ - hinfo.hash_version = root->info.hash_version; -- hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; - ext3fs_dirhash(name, namelen, &hinfo); - frame = frames; - frame->entries = entries; -@@ -1768,8 +1768,8 @@ - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto out_unlock; - -@@ -1780,7 +1780,7 @@ - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan); - EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - rc = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; -@@ -1854,8 +1854,7 @@ - err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - } else { - struct ext3_iloc iloc2; -- struct inode *i_prev = -- list_entry(prev, struct inode, u.ext3_i.i_orphan); -+ struct inode *i_prev = orphan_list_entry(prev); - - jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2003-12-12 12:14:29.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2003-12-12 12:50:34.000000000 +0300 -@@ -125,7 +125,7 @@ - /* If no overrides were specified on the mount, then fall back - * to the default behaviour set in the filesystem's superblock - * on disk. */ -- switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) { -+ switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) { - case EXT3_ERRORS_PANIC: - return EXT3_ERRORS_PANIC; - case EXT3_ERRORS_RO: -@@ -299,9 +299,9 @@ - return; - - printk (KERN_CRIT "Remounting filesystem read-only\n"); -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - sb->s_flags |= MS_RDONLY; -- sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT; -+ EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; - journal_abort(EXT3_SB(sb)->s_journal, -EIO); - } - -@@ -407,8 +407,6 @@ - return ret; - } - --#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan) -- - static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) - { - struct list_head *l; -@@ -889,7 +887,7 @@ - return; - } - -- if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) { -+ if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { - if (es->s_last_orphan) - jbd_debug(1, "Errors on filesystem, " - "clearing orphan list.\n"); -@@ -1571,12 +1569,14 @@ - struct ext3_super_block * es, - int sync) - { -+ struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; -+ - es->s_wtime = cpu_to_le32(CURRENT_TIME); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty"); -- mark_buffer_dirty(sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbh, "marking dirty"); -+ mark_buffer_dirty(sbh); - if (sync) { -- ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh); -- wait_on_buffer(sb->u.ext3_sb.s_sbh); -+ ll_rw_block(WRITE, 1, &sbh); -+ wait_on_buffer(sbh); - } - } - -@@ -1627,7 +1627,7 @@ - ext3_warning(sb, __FUNCTION__, "Marking fs in need of " - "filesystem check."); - -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - es->s_state |= cpu_to_le16(EXT3_ERROR_FS); - ext3_commit_super (sb, es, 1); - -Index: linux-2.4.21-chaos/fs/ext3/symlink.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/symlink.c 2003-07-15 04:41:01.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/symlink.c 2003-12-12 12:50:34.000000000 +0300 -@@ -24,14 +24,14 @@ - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_readlink(dentry, buffer, buflen, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_readlink(dentry, buffer, buflen, (char *)ei->i_data); - } - - static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_follow_link(nd, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_follow_link(nd, (char*)ei->i_data); - } - - struct inode_operations ext3_symlink_inode_operations = { -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2003-12-12 11:36:14.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2003-12-12 12:50:34.000000000 +0300 -@@ -87,22 +87,25 @@ - #define EXT3_MIN_BLOCK_SIZE 1024 - #define EXT3_MAX_BLOCK_SIZE 4096 - #define EXT3_MIN_BLOCK_LOG_SIZE 10 -+ - #ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) --#else --# define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) --#endif --#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) --#ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) --#else --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) --#endif --#ifdef __KERNEL__ --#define EXT3_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_addr_per_block_bits) --#define EXT3_INODE_SIZE(s) ((s)->u.ext3_sb.s_inode_size) --#define EXT3_FIRST_INO(s) ((s)->u.ext3_sb.s_first_ino) -+#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) -+#define EXT3_I(inode) (&((inode)->u.ext3_i)) -+ -+#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -+#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits) -+#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size) -+#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino) - #else -+ -+/* Assume that user mode programs are passing in an ext3fs superblock, not -+ * a kernel struct super_block. This will allow us to call the feature-test -+ * macros from user land. */ -+#define EXT3_SB(sb) (sb) -+ -+#define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) - #define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \ - EXT3_GOOD_OLD_INODE_SIZE : \ - (s)->s_inode_size) -@@ -110,6 +113,7 @@ - EXT3_GOOD_OLD_FIRST_INO : \ - (s)->s_first_ino) - #endif -+#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - - /* - * Macro-instructions used to manage fragments -@@ -118,8 +122,8 @@ - #define EXT3_MAX_FRAG_SIZE 4096 - #define EXT3_MIN_FRAG_LOG_SIZE 10 - #ifdef __KERNEL__ --# define EXT3_FRAG_SIZE(s) ((s)->u.ext3_sb.s_frag_size) --# define EXT3_FRAGS_PER_BLOCK(s) ((s)->u.ext3_sb.s_frags_per_block) -+# define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size) -+# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block) - #else - # define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size) - # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s)) -@@ -143,15 +147,13 @@ - /* - * Macro-instructions used to manage group descriptors - */ -+# define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group) -+# define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group) - #ifdef __KERNEL__ --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->u.ext3_sb.s_blocks_per_group) --# define EXT3_DESC_PER_BLOCK(s) ((s)->u.ext3_sb.s_desc_per_block) --# define EXT3_INODES_PER_GROUP(s) ((s)->u.ext3_sb.s_inodes_per_group) --# define EXT3_DESC_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_desc_per_block_bits) -+# define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block) -+# define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits) - #else --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) - # define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc)) --# define EXT3_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) - #endif - - /* -@@ -326,7 +328,7 @@ - #ifndef _LINUX_EXT2_FS_H - #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt - #define set_opt(o, opt) o |= EXT3_MOUNT_##opt --#define test_opt(sb, opt) ((sb)->u.ext3_sb.s_mount_opt & \ -+#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ - EXT3_MOUNT_##opt) - #else - #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD -@@ -427,17 +429,11 @@ - __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - --#ifdef __KERNEL__ --#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) --#define EXT3_I(inode) (&((inode)->u.ext3_i)) --#else --/* Assume that user mode programs are passing in an ext3fs superblock, not -- * a kernel struct super_block. This will allow us to call the feature-test -- * macros from user land. */ --#define EXT3_SB(sb) (sb) --#endif -- --#define NEXT_ORPHAN(inode) (inode)->u.ext3_i.i_dtime -+#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime -+static inline struct inode *orphan_list_entry(struct list_head *l) -+{ -+ return list_entry(l, struct inode, u.ext3_i.i_orphan); -+} - - /* - * Codes for operating systems -Index: linux-2.4.21-chaos/include/linux/ext3_jbd.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_jbd.h 2003-12-12 11:36:14.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_jbd.h 2003-12-12 12:50:34.000000000 +0300 -@@ -285,7 +285,7 @@ - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) - return 1; -- if (inode->u.ext3_i.i_flags & EXT3_JOURNAL_DATA_FL) -+ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL) - return 1; - return 0; - } diff --git a/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch new file mode 100644 index 0000000..7b6b1b8 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch @@ -0,0 +1,426 @@ +Index: linux-2.6.16-sles10/fs/ext3/inode.c +=================================================================== +--- linux-2.6.16-sles10.orig/fs/ext3/inode.c ++++ linux-2.6.16-sles10/fs/ext3/inode.c +@@ -2558,6 +2558,13 @@ void ext3_read_inode(struct inode * inod + EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode); + EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode); + ++ ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version); ++ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { ++ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) ++ ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi)) ++ << 32; ++ } ++ + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext3_file_inode_operations; + inode->i_fop = &ext3_file_operations; +@@ -2696,8 +2703,14 @@ static int ext3_do_update_inode(handle_t + } else for (block = 0; block < EXT3_N_BLOCKS; block++) + raw_inode->i_block[block] = ei->i_data[block]; + +- if (ei->i_extra_isize) ++ raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version); ++ if (ei->i_extra_isize) { ++ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) { ++ raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version ++ >> 32); ++ } + raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); ++ } + + BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); + rc = ext3_journal_dirty_metadata(handle, bh); +@@ -2971,10 +2984,32 @@ ext3_reserve_inode_write(handle_t *handl + int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) + { + struct ext3_iloc iloc; +- int err; ++ int err, ret; ++ static int expand_message; + + might_sleep(); + err = ext3_reserve_inode_write(handle, inode, &iloc); ++ if (EXT3_I(inode)->i_extra_isize < ++ EXT3_SB(inode->i_sb)->s_want_extra_isize && ++ !(EXT3_I(inode)->i_state & EXT3_STATE_NO_EXPAND)) { ++ /* We need extra buffer credits since we may write into EA block ++ * with this same handle */ ++ if ((ext3_journal_extend(handle, ++ EXT3_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { ++ ret = ext3_expand_extra_isize(inode, ++ EXT3_SB(inode->i_sb)->s_want_extra_isize, ++ iloc, handle); ++ if (ret) { ++ EXT3_I(inode)->i_state |= EXT3_STATE_NO_EXPAND; ++ if (!expand_message) { ++ ext3_warning(inode->i_sb, __FUNCTION__, ++ "Unable to expand inode %lu. Delete some" ++ " EAs or run e2fsck.", inode->i_ino); ++ expand_message = 1; ++ } ++ } ++ } ++ } + if (!err) + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + return err; +Index: linux-2.6.16-sles10/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.16-sles10.orig/include/linux/ext3_fs.h ++++ linux-2.6.16-sles10/include/linux/ext3_fs.h +@@ -205,6 +205,7 @@ struct ext3_group_desc + #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ + #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ + #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ ++#define EXT3_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ + + /* Used to pass group descriptor data when online resize is done */ + struct ext3_new_group_input { +@@ -281,7 +282,7 @@ struct ext3_inode { + __le32 i_flags; /* File flags */ + union { + struct { +- __u32 l_i_reserved1; ++ __u32 l_i_version; + } linux1; + struct { + __u32 h_i_translator; +@@ -326,6 +327,7 @@ struct ext3_inode { + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ ++ __le32 i_version_hi; /* high 32 bits for 64-bit version */ + }; + + #define i_size_high i_dir_acl +@@ -388,6 +390,8 @@ do { \ + raw_inode->xtime ## _extra); \ + } while (0) + ++#define i_disk_version osd1.linux1.l_i_version ++ + #if defined(__KERNEL__) || defined(__linux__) + #define i_reserved1 osd1.linux1.l_i_reserved1 + #define i_frag osd2.linux2.l_i_frag +Index: linux-2.6.16-sles10/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.16-sles10.orig/include/linux/ext3_fs_i.h ++++ linux-2.6.16-sles10/include/linux/ext3_fs_i.h +@@ -20,6 +20,8 @@ + #include + #include + ++#define HAVE_DISK_INODE_VERSION ++ + struct ext3_reserve_window { + __u32 _rsv_start; /* First byte reserved */ + __u32 _rsv_end; /* Last byte reserved or 0 */ +@@ -138,6 +140,8 @@ struct ext3_inode_info { + __u32 i_cached_extent[4]; + + void *i_filterdata; ++ ++ __u64 i_fs_version; + }; + + #endif /* _LINUX_EXT3_FS_I */ +Index: linux-2.6.16-sles10/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.16-sles10.orig/fs/ext3/xattr.c ++++ linux-2.6.16-sles10/fs/ext3/xattr.c +@@ -505,6 +505,20 @@ ext3_xattr_release_block(handle_t *handl + } + } + ++static inline size_t ext3_xattr_free_space(struct ext3_xattr_entry *last, ++ size_t *min_offs, void *base, int *total) ++{ ++ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { ++ *total += EXT3_XATTR_LEN(last->e_name_len); ++ if (!last->e_value_block && last->e_value_size) { ++ size_t offs = le16_to_cpu(last->e_value_offs); ++ if (offs < *min_offs) ++ *min_offs = offs; ++ } ++ } ++ return (*min_offs - ((void *)last - base) - sizeof(__u32)); ++} ++ + struct ext3_xattr_info { + int name_index; + const char *name; +@@ -1007,6 +1021,8 @@ ext3_xattr_set_handle(handle_t *handle, + if (!error) { + ext3_xattr_update_super_block(handle, inode->i_sb); + inode->i_ctime = ext3_current_time(inode); ++ if (!value) ++ EXT3_I(inode)->i_state &= ~EXT3_STATE_NO_EXPAND; + ext3_mark_inode_dirty(handle, inode); + /* + * The bh is consumed by ext3_mark_iloc_dirty, even with +@@ -1059,6 +1075,249 @@ retry: + return error; + } + ++static void ext3_xattr_shift_entries(struct ext3_xattr_entry *entry, ++ int value_offs_shift, void *to, ++ void *from, size_t n, int blocksize) ++{ ++ struct ext3_xattr_entry *last = entry; ++ int new_offs; ++ ++ /* Adjust the value offsets of the entries */ ++ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { ++ if (!last->e_value_block && last->e_value_size) { ++ new_offs = le16_to_cpu(last->e_value_offs) + ++ value_offs_shift; ++ BUG_ON(new_offs + le32_to_cpu(last->e_value_size) > ++ blocksize); ++ last->e_value_offs = cpu_to_le16(new_offs); ++ } ++ } ++ /* Shift the entries by n bytes */ ++ memmove(to, from, n); ++} ++ ++/* Expand an inode by new_extra_isize bytes. ++ * Returns 0 on success or negative error number on failure. ++ */ ++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, ++ struct ext3_iloc iloc, handle_t *handle) ++{ ++ struct ext3_inode *raw_inode; ++ struct ext3_xattr_ibody_header *header; ++ struct ext3_xattr_entry *entry, *last, *first; ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_ibody_find *is = NULL; ++ struct ext3_xattr_block_find *bs = NULL; ++ char *buffer = NULL, *b_entry_name = NULL; ++ size_t min_offs, free; ++ int total_ino, total_blk; ++ void *base, *start, *end; ++ int extra_isize = 0, error = 0, tried_min_extra_isize = 0; ++ int s_min_extra_isize = EXT3_SB(inode->i_sb)->s_es->s_min_extra_isize; ++ ++ down_write(&EXT3_I(inode)->xattr_sem); ++ ++retry: ++ if (EXT3_I(inode)->i_extra_isize >= new_extra_isize) { ++ up_write(&EXT3_I(inode)->xattr_sem); ++ return 0; ++ } ++ ++ raw_inode = ext3_raw_inode(&iloc); ++ ++ header = IHDR(inode, raw_inode); ++ entry = IFIRST(header); ++ ++ /* No extended attributes present */ ++ if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR) || ++ header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC)) { ++ memset((void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE, 0, ++ new_extra_isize); ++ EXT3_I(inode)->i_extra_isize = new_extra_isize; ++ goto cleanup; ++ } ++ ++ /* ++ * Check if enough free space is available in the inode to shift the ++ * entries ahead by new_extra_isize. ++ */ ++ ++ base = start = entry; ++ end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; ++ min_offs = end - base; ++ last = entry; ++ total_ino = sizeof(struct ext3_xattr_ibody_header); ++ ++ free = ext3_xattr_free_space(last, &min_offs, base, &total_ino); ++ if (free >= new_extra_isize) { ++ entry = IFIRST(header); ++ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - ++ new_extra_isize, (void *)raw_inode + ++ EXT3_GOOD_OLD_INODE_SIZE + new_extra_isize, ++ (void *)header, total_ino, ++ inode->i_sb->s_blocksize); ++ EXT3_I(inode)->i_extra_isize = new_extra_isize; ++ error = 0; ++ goto cleanup; ++ } ++ ++ /* ++ * Enough free space isn't available in the inode, check if ++ * EA block can hold new_extra_isize bytes. ++ */ ++ if (EXT3_I(inode)->i_file_acl) { ++ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); ++ error = -EIO; ++ if (!bh) ++ goto cleanup; ++ if (ext3_xattr_check_block(bh)) { ++ ext3_error(inode->i_sb, __FUNCTION__, ++ "inode %lu: bad block %d", inode->i_ino, ++ EXT3_I(inode)->i_file_acl); ++ error = -EIO; ++ goto cleanup; ++ } ++ base = BHDR(bh); ++ first = BFIRST(bh); ++ end = bh->b_data + bh->b_size; ++ min_offs = end - base; ++ free = ext3_xattr_free_space(first, &min_offs, base, ++ &total_blk); ++ if (free < new_extra_isize) { ++ if (!tried_min_extra_isize && s_min_extra_isize) { ++ tried_min_extra_isize++; ++ new_extra_isize = s_min_extra_isize; ++ goto retry; ++ } ++ error = -1; ++ goto cleanup; ++ } ++ } else { ++ free = inode->i_sb->s_blocksize; ++ } ++ ++ while (new_extra_isize > 0) { ++ size_t offs, size, entry_size; ++ struct ext3_xattr_entry *small_entry = NULL; ++ struct ext3_xattr_info i = { ++ .value = NULL, ++ .value_len = 0, ++ }; ++ unsigned int total_size, shift_bytes, temp = ~0U; ++ ++ is = (struct ext3_xattr_ibody_find *) kmalloc(sizeof(struct ++ ext3_xattr_ibody_find), GFP_KERNEL); ++ bs = (struct ext3_xattr_block_find *) kmalloc(sizeof(struct ++ ext3_xattr_block_find), GFP_KERNEL); ++ memset((void *)is, 0, sizeof(struct ext3_xattr_ibody_find)); ++ memset((void *)bs, 0, sizeof(struct ext3_xattr_block_find)); ++ ++ is->s.not_found = bs->s.not_found = -ENODATA; ++ is->iloc.bh = NULL; ++ bs->bh = NULL; ++ ++ last = IFIRST(header); ++ /* Find the entry best suited to be pushed into EA block */ ++ entry = NULL; ++ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { ++ total_size = EXT3_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + ++ EXT3_XATTR_LEN(last->e_name_len); ++ if (total_size <= free && total_size < temp) { ++ if (total_size < new_extra_isize) { ++ small_entry = last; ++ } else { ++ entry = last; ++ temp = total_size; ++ } ++ } ++ } ++ ++ if (entry == NULL) { ++ if (small_entry) { ++ entry = small_entry; ++ } else { ++ if (!tried_min_extra_isize && ++ s_min_extra_isize) { ++ tried_min_extra_isize++; ++ new_extra_isize = s_min_extra_isize; ++ goto retry; ++ } ++ error = -1; ++ goto cleanup; ++ } ++ } ++ offs = le16_to_cpu(entry->e_value_offs); ++ size = le32_to_cpu(entry->e_value_size); ++ entry_size = EXT3_XATTR_LEN(entry->e_name_len); ++ i.name_index = entry->e_name_index, ++ buffer = kmalloc(EXT3_XATTR_SIZE(size), GFP_KERNEL); ++ b_entry_name = kmalloc(entry->e_name_len + 1, GFP_KERNEL); ++ /* Save the entry name and the entry value */ ++ memcpy((void *)buffer, (void *)IFIRST(header) + offs, ++ EXT3_XATTR_SIZE(size)); ++ memcpy((void *)b_entry_name, (void *)entry->e_name, ++ entry->e_name_len); ++ b_entry_name[entry->e_name_len] = '\0'; ++ i.name = b_entry_name; ++ ++ error = ext3_get_inode_loc(inode, &is->iloc); ++ if (error) ++ goto cleanup; ++ ++ error = ext3_xattr_ibody_find(inode, &i, is); ++ if (error) ++ goto cleanup; ++ ++ /* Remove the chosen entry from the inode */ ++ error = ext3_xattr_ibody_set(handle, inode, &i, is); ++ ++ entry = IFIRST(header); ++ if (entry_size + EXT3_XATTR_SIZE(size) >= new_extra_isize) ++ shift_bytes = new_extra_isize; ++ else ++ shift_bytes = entry_size + size; ++ /* Adjust the offsets and shift the remaining entries ahead */ ++ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - ++ shift_bytes, (void *)raw_inode + ++ EXT3_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, ++ (void *)header, total_ino - entry_size, ++ inode->i_sb->s_blocksize); ++ ++ extra_isize += shift_bytes; ++ new_extra_isize -= shift_bytes; ++ EXT3_I(inode)->i_extra_isize = extra_isize; ++ ++ i.name = b_entry_name; ++ i.value = buffer; ++ i.value_len = cpu_to_le32(size); ++ error = ext3_xattr_block_find(inode, &i, bs); ++ if (error) ++ goto cleanup; ++ ++ /* Add entry which was removed from the inode into the block */ ++ error = ext3_xattr_block_set(handle, inode, &i, bs); ++ if (error) ++ goto cleanup; ++ } ++ ++cleanup: ++ if (b_entry_name) ++ kfree(b_entry_name); ++ if (buffer) ++ kfree(buffer); ++ if (is) { ++ brelse(is->iloc.bh); ++ kfree(is); ++ } ++ if (bs) ++ kfree(bs); ++ brelse(bh); ++ up_write(&EXT3_I(inode)->xattr_sem); ++ return error; ++} ++ ++ ++ + /* + * ext3_xattr_delete_inode() + * +Index: linux-2.6.16-sles10/fs/ext3/xattr.h +=================================================================== +--- linux-2.6.16-sles10.orig/fs/ext3/xattr.h ++++ linux-2.6.16-sles10/fs/ext3/xattr.h +@@ -75,6 +75,9 @@ extern int ext3_xattr_set_handle(handle_ + extern void ext3_xattr_delete_inode(handle_t *, struct inode *); + extern void ext3_xattr_put_super(struct super_block *); + ++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, ++ struct ext3_iloc iloc, handle_t *handle); ++ + extern int init_ext3_xattr(void); + extern void exit_ext3_xattr(void); + diff --git a/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch new file mode 100644 index 0000000..26f71ac --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch @@ -0,0 +1,426 @@ +Index: linux-2.6.18/fs/ext3/inode.c +=================================================================== +--- linux-2.6.18.orig/fs/ext3/inode.c ++++ linux-2.6.18/fs/ext3/inode.c +@@ -2703,6 +2703,13 @@ void ext3_read_inode(struct inode * inod + EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode); + EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode); + ++ ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version); ++ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { ++ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) ++ ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi)) ++ << 32; ++ } ++ + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext3_file_inode_operations; + inode->i_fop = &ext3_file_operations; +@@ -2841,8 +2848,14 @@ static int ext3_do_update_inode(handle_t + } else for (block = 0; block < EXT3_N_BLOCKS; block++) + raw_inode->i_block[block] = ei->i_data[block]; + +- if (ei->i_extra_isize) ++ raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version); ++ if (ei->i_extra_isize) { ++ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) { ++ raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version ++ >> 32); ++ } + raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); ++ } + + BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); + rc = ext3_journal_dirty_metadata(handle, bh); +@@ -3116,10 +3129,32 @@ ext3_reserve_inode_write(handle_t *handl + int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) + { + struct ext3_iloc iloc; +- int err; ++ int err, ret; ++ static int expand_message; + + might_sleep(); + err = ext3_reserve_inode_write(handle, inode, &iloc); ++ if (EXT3_I(inode)->i_extra_isize < ++ EXT3_SB(inode->i_sb)->s_want_extra_isize && ++ !(EXT3_I(inode)->i_state & EXT3_STATE_NO_EXPAND)) { ++ /* We need extra buffer credits since we may write into EA block ++ * with this same handle */ ++ if ((ext3_journal_extend(handle, ++ EXT3_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { ++ ret = ext3_expand_extra_isize(inode, ++ EXT3_SB(inode->i_sb)->s_want_extra_isize, ++ iloc, handle); ++ if (ret) { ++ EXT3_I(inode)->i_state |= EXT3_STATE_NO_EXPAND; ++ if (!expand_message) { ++ ext3_warning(inode->i_sb, __FUNCTION__, ++ "Unable to expand inode %lu. Delete some" ++ " EAs or run e2fsck.", inode->i_ino); ++ expand_message = 1; ++ } ++ } ++ } ++ } + if (!err) + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + return err; +Index: linux-2.6.18/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.18.orig/include/linux/ext3_fs.h ++++ linux-2.6.18/include/linux/ext3_fs.h +@@ -201,6 +201,7 @@ struct ext3_group_desc + #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ + #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ + #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ ++#define EXT3_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ + + /* Used to pass group descriptor data when online resize is done */ + struct ext3_new_group_input { +@@ -277,7 +278,7 @@ struct ext3_inode { + __le32 i_flags; /* File flags */ + union { + struct { +- __u32 l_i_reserved1; ++ __u32 l_i_version; + } linux1; + struct { + __u32 h_i_translator; +@@ -322,6 +323,7 @@ struct ext3_inode { + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ ++ __le32 i_version_hi; /* high 32 bits for 64-bit version */ + }; + + #define i_size_high i_dir_acl +@@ -384,6 +386,8 @@ do { \ + raw_inode->xtime ## _extra); \ + } while (0) + ++#define i_disk_version osd1.linux1.l_i_version ++ + #if defined(__KERNEL__) || defined(__linux__) + #define i_reserved1 osd1.linux1.l_i_reserved1 + #define i_frag osd2.linux2.l_i_frag +Index: linux-2.6.18/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.18.orig/include/linux/ext3_fs_i.h ++++ linux-2.6.18/include/linux/ext3_fs_i.h +@@ -21,6 +21,8 @@ + #include + #include + ++#define HAVE_DISK_INODE_VERSION ++ + /* data type for block offset of block group */ + typedef int ext3_grpblk_t; + +@@ -147,6 +149,8 @@ struct ext3_inode_info { + struct timespec i_crtime; + + void *i_filterdata; ++ ++ __u64 i_fs_version; + }; + + #endif /* _LINUX_EXT3_FS_I */ +Index: linux-2.6.18/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.18.orig/fs/ext3/xattr.c ++++ linux-2.6.18/fs/ext3/xattr.c +@@ -505,6 +505,20 @@ ext3_xattr_release_block(handle_t *handl + } + } + ++static inline size_t ext3_xattr_free_space(struct ext3_xattr_entry *last, ++ size_t *min_offs, void *base, int *total) ++{ ++ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { ++ *total += EXT3_XATTR_LEN(last->e_name_len); ++ if (!last->e_value_block && last->e_value_size) { ++ size_t offs = le16_to_cpu(last->e_value_offs); ++ if (offs < *min_offs) ++ *min_offs = offs; ++ } ++ } ++ return (*min_offs - ((void *)last - base) - sizeof(__u32)); ++} ++ + struct ext3_xattr_info { + int name_index; + const char *name; +@@ -1008,6 +1022,8 @@ ext3_xattr_set_handle(handle_t *handle, + if (!error) { + ext3_xattr_update_super_block(handle, inode->i_sb); + inode->i_ctime = ext3_current_time(inode); ++ if (!value) ++ EXT3_I(inode)->i_state &= ~EXT3_STATE_NO_EXPAND; + error = ext3_mark_iloc_dirty(handle, inode, &is.iloc); + /* + * The bh is consumed by ext3_mark_iloc_dirty, even with +@@ -1060,6 +1076,249 @@ retry: + return error; + } + ++static void ext3_xattr_shift_entries(struct ext3_xattr_entry *entry, ++ int value_offs_shift, void *to, ++ void *from, size_t n, int blocksize) ++{ ++ struct ext3_xattr_entry *last = entry; ++ int new_offs; ++ ++ /* Adjust the value offsets of the entries */ ++ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { ++ if (!last->e_value_block && last->e_value_size) { ++ new_offs = le16_to_cpu(last->e_value_offs) + ++ value_offs_shift; ++ BUG_ON(new_offs + le32_to_cpu(last->e_value_size) > ++ blocksize); ++ last->e_value_offs = cpu_to_le16(new_offs); ++ } ++ } ++ /* Shift the entries by n bytes */ ++ memmove(to, from, n); ++} ++ ++/* Expand an inode by new_extra_isize bytes. ++ * Returns 0 on success or negative error number on failure. ++ */ ++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, ++ struct ext3_iloc iloc, handle_t *handle) ++{ ++ struct ext3_inode *raw_inode; ++ struct ext3_xattr_ibody_header *header; ++ struct ext3_xattr_entry *entry, *last, *first; ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_ibody_find *is = NULL; ++ struct ext3_xattr_block_find *bs = NULL; ++ char *buffer = NULL, *b_entry_name = NULL; ++ size_t min_offs, free; ++ int total_ino, total_blk; ++ void *base, *start, *end; ++ int extra_isize = 0, error = 0, tried_min_extra_isize = 0; ++ int s_min_extra_isize = EXT3_SB(inode->i_sb)->s_es->s_min_extra_isize; ++ ++ down_write(&EXT3_I(inode)->xattr_sem); ++ ++retry: ++ if (EXT3_I(inode)->i_extra_isize >= new_extra_isize) { ++ up_write(&EXT3_I(inode)->xattr_sem); ++ return 0; ++ } ++ ++ raw_inode = ext3_raw_inode(&iloc); ++ ++ header = IHDR(inode, raw_inode); ++ entry = IFIRST(header); ++ ++ /* No extended attributes present */ ++ if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR) || ++ header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC)) { ++ memset((void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE, 0, ++ new_extra_isize); ++ EXT3_I(inode)->i_extra_isize = new_extra_isize; ++ goto cleanup; ++ } ++ ++ /* ++ * Check if enough free space is available in the inode to shift the ++ * entries ahead by new_extra_isize. ++ */ ++ ++ base = start = entry; ++ end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; ++ min_offs = end - base; ++ last = entry; ++ total_ino = sizeof(struct ext3_xattr_ibody_header); ++ ++ free = ext3_xattr_free_space(last, &min_offs, base, &total_ino); ++ if (free >= new_extra_isize) { ++ entry = IFIRST(header); ++ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - ++ new_extra_isize, (void *)raw_inode + ++ EXT3_GOOD_OLD_INODE_SIZE + new_extra_isize, ++ (void *)header, total_ino, ++ inode->i_sb->s_blocksize); ++ EXT3_I(inode)->i_extra_isize = new_extra_isize; ++ error = 0; ++ goto cleanup; ++ } ++ ++ /* ++ * Enough free space isn't available in the inode, check if ++ * EA block can hold new_extra_isize bytes. ++ */ ++ if (EXT3_I(inode)->i_file_acl) { ++ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); ++ error = -EIO; ++ if (!bh) ++ goto cleanup; ++ if (ext3_xattr_check_block(bh)) { ++ ext3_error(inode->i_sb, __FUNCTION__, ++ "inode %lu: bad block "E3FSBLK, inode->i_ino, ++ EXT3_I(inode)->i_file_acl); ++ error = -EIO; ++ goto cleanup; ++ } ++ base = BHDR(bh); ++ first = BFIRST(bh); ++ end = bh->b_data + bh->b_size; ++ min_offs = end - base; ++ free = ext3_xattr_free_space(first, &min_offs, base, ++ &total_blk); ++ if (free < new_extra_isize) { ++ if (!tried_min_extra_isize && s_min_extra_isize) { ++ tried_min_extra_isize++; ++ new_extra_isize = s_min_extra_isize; ++ goto retry; ++ } ++ error = -1; ++ goto cleanup; ++ } ++ } else { ++ free = inode->i_sb->s_blocksize; ++ } ++ ++ while (new_extra_isize > 0) { ++ size_t offs, size, entry_size; ++ struct ext3_xattr_entry *small_entry = NULL; ++ struct ext3_xattr_info i = { ++ .value = NULL, ++ .value_len = 0, ++ }; ++ unsigned int total_size, shift_bytes, temp = ~0U; ++ ++ is = (struct ext3_xattr_ibody_find *) kmalloc(sizeof(struct ++ ext3_xattr_ibody_find), GFP_KERNEL); ++ bs = (struct ext3_xattr_block_find *) kmalloc(sizeof(struct ++ ext3_xattr_block_find), GFP_KERNEL); ++ memset((void *)is, 0, sizeof(struct ext3_xattr_ibody_find)); ++ memset((void *)bs, 0, sizeof(struct ext3_xattr_block_find)); ++ ++ is->s.not_found = bs->s.not_found = -ENODATA; ++ is->iloc.bh = NULL; ++ bs->bh = NULL; ++ ++ last = IFIRST(header); ++ /* Find the entry best suited to be pushed into EA block */ ++ entry = NULL; ++ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { ++ total_size = EXT3_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + ++ EXT3_XATTR_LEN(last->e_name_len); ++ if (total_size <= free && total_size < temp) { ++ if (total_size < new_extra_isize) { ++ small_entry = last; ++ } else { ++ entry = last; ++ temp = total_size; ++ } ++ } ++ } ++ ++ if (entry == NULL) { ++ if (small_entry) { ++ entry = small_entry; ++ } else { ++ if (!tried_min_extra_isize && ++ s_min_extra_isize) { ++ tried_min_extra_isize++; ++ new_extra_isize = s_min_extra_isize; ++ goto retry; ++ } ++ error = -1; ++ goto cleanup; ++ } ++ } ++ offs = le16_to_cpu(entry->e_value_offs); ++ size = le32_to_cpu(entry->e_value_size); ++ entry_size = EXT3_XATTR_LEN(entry->e_name_len); ++ i.name_index = entry->e_name_index, ++ buffer = kmalloc(EXT3_XATTR_SIZE(size), GFP_KERNEL); ++ b_entry_name = kmalloc(entry->e_name_len + 1, GFP_KERNEL); ++ /* Save the entry name and the entry value */ ++ memcpy((void *)buffer, (void *)IFIRST(header) + offs, ++ EXT3_XATTR_SIZE(size)); ++ memcpy((void *)b_entry_name, (void *)entry->e_name, ++ entry->e_name_len); ++ b_entry_name[entry->e_name_len] = '\0'; ++ i.name = b_entry_name; ++ ++ error = ext3_get_inode_loc(inode, &is->iloc); ++ if (error) ++ goto cleanup; ++ ++ error = ext3_xattr_ibody_find(inode, &i, is); ++ if (error) ++ goto cleanup; ++ ++ /* Remove the chosen entry from the inode */ ++ error = ext3_xattr_ibody_set(handle, inode, &i, is); ++ ++ entry = IFIRST(header); ++ if (entry_size + EXT3_XATTR_SIZE(size) >= new_extra_isize) ++ shift_bytes = new_extra_isize; ++ else ++ shift_bytes = entry_size + size; ++ /* Adjust the offsets and shift the remaining entries ahead */ ++ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - ++ shift_bytes, (void *)raw_inode + ++ EXT3_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, ++ (void *)header, total_ino - entry_size, ++ inode->i_sb->s_blocksize); ++ ++ extra_isize += shift_bytes; ++ new_extra_isize -= shift_bytes; ++ EXT3_I(inode)->i_extra_isize = extra_isize; ++ ++ i.name = b_entry_name; ++ i.value = buffer; ++ i.value_len = cpu_to_le32(size); ++ error = ext3_xattr_block_find(inode, &i, bs); ++ if (error) ++ goto cleanup; ++ ++ /* Add entry which was removed from the inode into the block */ ++ error = ext3_xattr_block_set(handle, inode, &i, bs); ++ if (error) ++ goto cleanup; ++ } ++ ++cleanup: ++ if (b_entry_name) ++ kfree(b_entry_name); ++ if (buffer) ++ kfree(buffer); ++ if (is) { ++ brelse(is->iloc.bh); ++ kfree(is); ++ } ++ if (bs) ++ kfree(bs); ++ brelse(bh); ++ up_write(&EXT3_I(inode)->xattr_sem); ++ return error; ++} ++ ++ ++ + /* + * ext3_xattr_delete_inode() + * +Index: linux-2.6.18/fs/ext3/xattr.h +=================================================================== +--- linux-2.6.18.orig/fs/ext3/xattr.h ++++ linux-2.6.18/fs/ext3/xattr.h +@@ -74,6 +74,9 @@ extern int ext3_xattr_set_handle(handle_ + extern void ext3_xattr_delete_inode(handle_t *, struct inode *); + extern void ext3_xattr_put_super(struct super_block *); + ++int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, ++ struct ext3_iloc iloc, handle_t *handle); ++ + extern int init_ext3_xattr(void); + extern void exit_ext3_xattr(void); + diff --git a/lustre/kernel_patches/patches/ext3-largefile.patch b/lustre/kernel_patches/patches/ext3-largefile.patch deleted file mode 100644 index aa7a2f2..0000000 --- a/lustre/kernel_patches/patches/ext3-largefile.patch +++ /dev/null @@ -1,16 +0,0 @@ - fs/ext3/inode.c | 2 +- - 1 files changed, 1 insertion(+), 1 deletion(-) - ---- linux-2.4.20/fs/ext3/inode.c~ext3-largefile 2003-04-08 23:35:36.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-04-08 23:35:36.000000000 -0600 -@@ -2562,7 +2562,7 @@ void ext3_dirty_inode(struct inode *inod - handle_t *handle; - - lock_kernel(); -- handle = ext3_journal_start(inode, 1); -+ handle = ext3_journal_start(inode, 2); - if (IS_ERR(handle)) - goto out; - if (current_handle && - -_ diff --git a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch deleted file mode 100644 index 8fc1bd7..0000000 --- a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch +++ /dev/null @@ -1,63 +0,0 @@ -Index: linux-2.4.21/fs/ext3/namei.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/namei.c 2006-04-29 20:48:26.000000000 +0400 -+++ linux-2.4.21/fs/ext3/namei.c 2006-05-06 01:31:51.000000000 +0400 -@@ -955,6 +955,38 @@ static struct dentry *ext3_lookup(struct - } - } - -+ /* ".." shouldn't go into dcache to preserve dcache hierarchy -+ * otherwise we'll get parent being a child of actual child. -+ * see bug 10458 for details -bzzz */ -+ if (inode && (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 || -+ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.')))) { -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* first, look for an existing dentry - any one is good */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ if (goal == NULL) { -+ /* there is no alias, we need to make current dentry: -+ * a) inaccessible for __d_lookup() -+ * b) inaccessible for iopen */ -+ J_ASSERT(list_empty(&dentry->d_alias)); -+ dentry->d_flags |= DCACHE_NFSFS_RENAMED; -+ /* this is d_instantiate() ... */ -+ list_add(&dentry->d_alias, &inode->i_dentry); -+ dentry->d_inode = inode; -+ } -+ spin_unlock(&dcache_lock); -+ if (goal) -+ iput(inode); -+ return goal; -+ } -+ - return iopen_connect_dentry(dentry, inode, 1); - } - -Index: linux-2.4.21/fs/ext3/iopen.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/iopen.c 2006-04-29 20:48:23.000000000 +0400 -+++ linux-2.4.21/fs/ext3/iopen.c 2006-04-29 20:59:50.000000000 +0400 -@@ -92,9 +92,12 @@ static struct dentry *iopen_lookup(struc - assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); - } - -- if (!list_empty(&inode->i_dentry)) { -- alternate = list_entry(inode->i_dentry.next, -- struct dentry, d_alias); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ /* ignore dentries created for ".." to preserve -+ * proper dcache hierarchy -- bug 10458 */ -+ if (alternate->d_flags & DCACHE_NFSFS_RENAMED) -+ continue; - dget_locked(alternate); - alternate->d_vfs_flags |= DCACHE_REFERENCED; - iput(inode); diff --git a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch deleted file mode 100644 index a05256b..0000000 --- a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch +++ /dev/null @@ -1,63 +0,0 @@ -Index: linux-2.6.9-full/fs/ext3/iopen.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/iopen.c 2006-04-25 08:51:11.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/iopen.c 2006-05-06 01:21:11.000000000 +0400 -@@ -94,9 +94,12 @@ static struct dentry *iopen_lookup(struc - assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); - } - -- if (!list_empty(&inode->i_dentry)) { -- alternate = list_entry(inode->i_dentry.next, -- struct dentry, d_alias); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ /* ignore dentries created for ".." to preserve -+ * proper dcache hierarchy -- bug 10458 */ -+ if (alternate->d_flags & DCACHE_NFSFS_RENAMED) -+ continue; - dget_locked(alternate); - spin_lock(&alternate->d_lock); - alternate->d_flags |= DCACHE_REFERENCED; -Index: linux-2.6.9-full/fs/ext3/namei.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-05-06 01:21:10.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/namei.c 2006-05-06 01:29:30.000000000 +0400 -@@ -1003,6 +1003,38 @@ static struct dentry *ext3_lookup(struct - return ERR_PTR(-EACCES); - } - -+ /* ".." shouldn't go into dcache to preserve dcache hierarchy -+ * otherwise we'll get parent being a child of actual child. -+ * see bug 10458 for details -bzzz */ -+ if (inode && (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 || -+ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.')))) { -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* first, look for an existing dentry - any one is good */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ if (goal == NULL) { -+ /* there is no alias, we need to make current dentry: -+ * a) inaccessible for __d_lookup() -+ * b) inaccessible for iopen */ -+ J_ASSERT(list_empty(&dentry->d_alias)); -+ dentry->d_flags |= DCACHE_NFSFS_RENAMED; -+ /* this is d_instantiate() ... */ -+ list_add(&dentry->d_alias, &inode->i_dentry); -+ dentry->d_inode = inode; -+ } -+ spin_unlock(&dcache_lock); -+ if (goal) -+ iput(inode); -+ return goal; -+ } -+ - return iopen_connect_dentry(dentry, inode, 1); - } - diff --git a/lustre/kernel_patches/patches/ext3-map_inode_page-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-map_inode_page-2.4.21-suse2.patch deleted file mode 100644 index 76f5e21..0000000 --- a/lustre/kernel_patches/patches/ext3-map_inode_page-2.4.21-suse2.patch +++ /dev/null @@ -1,119 +0,0 @@ - - - - fs/ext3/ext3-exports.c | 3 ++ - fs/ext3/inode.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 58 insertions(+) - -Index: linux-2.4.21-suse2/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/inode.c 2004-01-10 15:38:24.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/inode.c 2004-01-10 16:22:45.000000000 +0300 -@@ -3084,7 +3084,7 @@ - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { - ret = ext3_get_block_handle(handle, inode, blocks[i], -- &bh_tmp, 1); -+ &bh_tmp, 1, 1); - if (ret) - break; - -@@ -3105,3 +3105,80 @@ - ret = ret2; - return ret; - } -+ -+/* copied from fs/buffer.c */ -+static void unmap_underlying_metadata(struct buffer_head * bh) -+{ -+ struct buffer_head *old_bh; -+ -+ old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size); -+ if (old_bh) { -+ mark_buffer_clean(old_bh); -+ wait_on_buffer(old_bh); -+ clear_bit(BH_Req, &old_bh->b_state); -+ __brelse(old_bh); -+ } -+} -+ -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *blocks, int *created, int create) -+{ -+ unsigned int blocksize, blocks_per_page; -+ unsigned long iblock; -+ void *handle; -+ int i, rc = 0, failed = 0, needed_blocks; -+ -+ blocksize = inode->i_sb->s_blocksize; -+ blocks_per_page = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; -+ iblock = page->index * blocks_per_page; -+ -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ blocks[i] = ext3_bmap(inode->i_mapping, iblock); -+ if (blocks[i] == 0) { -+ failed++; -+ if (created) -+ created[i] = -1; -+ } else if (created) { -+ created[i] = 0; -+ } -+ } -+ -+ if (failed == 0 || create == 0) -+ return 0; -+ -+ needed_blocks = ext3_writepage_trans_blocks(inode); -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ unlock_kernel(); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ iblock = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ struct buffer_head bh; -+ -+ if (blocks[i] != 0) -+ continue; -+ -+ rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1); -+ if (rc) { -+ printk(KERN_INFO "ext3_map_inode_page: error %d " -+ "allocating block %ld\n", rc, iblock); -+ goto out; -+ } -+ /* Unmap any metadata buffers from the block mapping, to avoid -+ * data corruption due to direct-write from Lustre being -+ * clobbered by a later flush of the blockdev metadata buffer.*/ -+ if (buffer_new(&bh)) -+ unmap_underlying_metadata(&bh); -+ blocks[i] = bh.b_blocknr; -+ if (created) -+ created[i] = 1; -+ } -+ -+ out: -+ lock_kernel(); -+ ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ return rc; -+} -Index: linux-2.4.21-suse2/fs/ext3/ext3-exports.c -=================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/ext3-exports.c 2004-01-10 15:38:24.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/ext3-exports.c 2004-01-10 16:22:09.000000000 +0300 -@@ -9,6 +9,8 @@ - - int ext3_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *block, int *created, int create); - - EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); -@@ -19,3 +21,4 @@ - EXPORT_SYMBOL(ext3_xattr_list); - EXPORT_SYMBOL(ext3_xattr_set); - EXPORT_SYMBOL(ext3_prep_san_write); -+EXPORT_SYMBOL(ext3_map_inode_page); diff --git a/lustre/kernel_patches/patches/ext3-map_inode_page-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-map_inode_page-2.6-suse.patch deleted file mode 100644 index 2b6bcf1..0000000 --- a/lustre/kernel_patches/patches/ext3-map_inode_page-2.6-suse.patch +++ /dev/null @@ -1,86 +0,0 @@ - fs/ext3/inode.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/super.c | 3 +++ - 2 files changed, 55 insertions(+) - -Index: linux-2.6.0/fs/ext3/inode.c -=================================================================== ---- linux-2.6.0.orig/fs/ext3/inode.c 2003-12-31 00:33:49.000000000 +0300 -+++ linux-2.6.0/fs/ext3/inode.c 2003-12-31 01:14:17.000000000 +0300 -@@ -3136,3 +3136,62 @@ - ret = ret2; - return ret; - } -+ -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *blocks, int *created, int create) -+{ -+ unsigned int blocksize, blocks_per_page; -+ unsigned long iblock; -+ struct buffer_head dummy; -+ void *handle; -+ int i, rc = 0, failed = 0, needed_blocks; -+ -+ blocksize = inode->i_sb->s_blocksize; -+ blocks_per_page = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; -+ iblock = page->index * blocks_per_page; -+ -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ blocks[i] = ext3_bmap(inode->i_mapping, iblock); -+ if (blocks[i] == 0) { -+ failed++; -+ if (created) -+ created[i] = -1; -+ } else if (created) { -+ created[i] = 0; -+ } -+ } -+ -+ if (failed == 0 || create == 0) -+ return 0; -+ -+ needed_blocks = ext3_writepage_trans_blocks(inode); -+ handle = ext3_journal_start(inode, needed_blocks); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ iblock = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ if (blocks[i] != 0) -+ continue; -+ -+ rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1); -+ if (rc) { -+ printk(KERN_INFO "ext3_map_inode_page: error reading " -+ "block %ld\n", iblock); -+ goto out; -+ } -+ /* Unmap any metadata buffers from the block mapping, to avoid -+ * data corruption due to direct-write from Lustre being -+ * clobbered by a later flush of the blockdev metadata buffer.*/ -+ if (buffer_new(&dummy)) -+ unmap_underlying_metadata(dummy.b_bdev, -+ dummy.b_blocknr); -+ blocks[i] = dummy.b_blocknr; -+ if (created) -+ created[i] = 1; -+ } -+ -+ out: -+ ext3_journal_stop(handle); -+ return rc; -+} -Index: linux-2.6.0/fs/ext3/super.c -=================================================================== ---- linux-2.6.0.orig/fs/ext3/super.c 2003-12-31 00:33:49.000000000 +0300 -+++ linux-2.6.0/fs/ext3/super.c 2003-12-31 01:10:40.000000000 +0300 -@@ -2051,6 +2051,10 @@ - int nblocks, loff_t newsize); - EXPORT_SYMBOL(ext3_prep_san_write); - -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *blocks, int *created, int create); -+EXPORT_SYMBOL(ext3_map_inode_page); -+ - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); diff --git a/lustre/kernel_patches/patches/ext3-map_inode_page.patch b/lustre/kernel_patches/patches/ext3-map_inode_page.patch deleted file mode 100644 index 4dda8d4..0000000 --- a/lustre/kernel_patches/patches/ext3-map_inode_page.patch +++ /dev/null @@ -1,110 +0,0 @@ - - - - fs/ext3/ext3-exports.c | 3 ++ - fs/ext3/inode.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 58 insertions(+) - -Index: lum/fs/ext3/inode.c -=================================================================== ---- lum.orig/fs/ext3/inode.c Sat Nov 22 16:38:51 2003 -+++ lum/fs/ext3/inode.c Fri Nov 28 00:37:18 2003 -@@ -2979,3 +2979,80 @@ - ret = ret2; - return ret; - } -+ -+/* copied from fs/buffer.c */ -+static void unmap_underlying_metadata(struct buffer_head * bh) -+{ -+ struct buffer_head *old_bh; -+ -+ old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size); -+ if (old_bh) { -+ mark_buffer_clean(old_bh); -+ wait_on_buffer(old_bh); -+ clear_bit(BH_Req, &old_bh->b_state); -+ __brelse(old_bh); -+ } -+} -+ -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *blocks, int *created, int create) -+{ -+ unsigned int blocksize, blocks_per_page; -+ unsigned long iblock; -+ void *handle; -+ int i, rc = 0, failed = 0, needed_blocks; -+ -+ blocksize = inode->i_sb->s_blocksize; -+ blocks_per_page = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; -+ iblock = page->index * blocks_per_page; -+ -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ blocks[i] = ext3_bmap(inode->i_mapping, iblock); -+ if (blocks[i] == 0) { -+ failed++; -+ if (created) -+ created[i] = -1; -+ } else if (created) { -+ created[i] = 0; -+ } -+ } -+ -+ if (failed == 0 || create == 0) -+ return 0; -+ -+ needed_blocks = ext3_writepage_trans_blocks(inode); -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ unlock_kernel(); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ iblock = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ struct buffer_head bh; -+ -+ if (blocks[i] != 0) -+ continue; -+ -+ rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1); -+ if (rc) { -+ printk(KERN_INFO "ext3_map_inode_page: error %d " -+ "allocating block %ld\n", rc, iblock); -+ goto out; -+ } -+ /* Unmap any metadata buffers from the block mapping, to avoid -+ * data corruption due to direct-write from Lustre being -+ * clobbered by a later flush of the blockdev metadata buffer.*/ -+ if (buffer_new(&bh)) -+ unmap_underlying_metadata(&bh); -+ blocks[i] = bh.b_blocknr; -+ if (created) -+ created[i] = 1; -+ } -+ -+ out: -+ lock_kernel(); -+ ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ return rc; -+} -Index: lum/fs/ext3/ext3-exports.c -=================================================================== ---- lum.orig/fs/ext3/ext3-exports.c Sat Nov 22 16:38:51 2003 -+++ lum/fs/ext3/ext3-exports.c Sat Nov 22 16:38:51 2003 -@@ -9,6 +9,8 @@ - - int ext3_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *block, int *created, int create); - - EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); -@@ -18,3 +20,4 @@ - EXPORT_SYMBOL(ext3_xattr_list); - EXPORT_SYMBOL(ext3_xattr_set); - EXPORT_SYMBOL(ext3_prep_san_write); -+EXPORT_SYMBOL(ext3_map_inode_page); diff --git a/lustre/kernel_patches/patches/ext3-map_inode_page_2.4.18.patch b/lustre/kernel_patches/patches/ext3-map_inode_page_2.4.18.patch deleted file mode 100644 index ca1c4a7..0000000 --- a/lustre/kernel_patches/patches/ext3-map_inode_page_2.4.18.patch +++ /dev/null @@ -1,110 +0,0 @@ - - - - fs/ext3/ext3-exports.c | 3 ++ - fs/ext3/inode.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 58 insertions(+) - -Index: linux-2.4.18-p4smp/fs/ext3/ext3-exports.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/ext3/ext3-exports.c Thu Nov 27 22:18:40 2003 -+++ linux-2.4.18-p4smp/fs/ext3/ext3-exports.c Thu Nov 27 22:18:40 2003 -@@ -9,6 +9,8 @@ - - int ext3_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *block, int *created, int create); - - EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); -@@ -18,3 +20,4 @@ - EXPORT_SYMBOL(ext3_xattr_list); - EXPORT_SYMBOL(ext3_xattr_set); - EXPORT_SYMBOL(ext3_prep_san_write); -+EXPORT_SYMBOL(ext3_map_inode_page); -Index: linux-2.4.18-p4smp/fs/ext3/inode.c -=================================================================== ---- linux-2.4.18-p4smp.orig/fs/ext3/inode.c Thu Nov 27 22:18:40 2003 -+++ linux-2.4.18-p4smp/fs/ext3/inode.c Thu Nov 27 22:20:36 2003 -@@ -3004,3 +3004,80 @@ - ret = ret2; - return ret; - } -+ -+/* copied from fs/buffer.c */ -+static void unmap_underlying_metadata(struct buffer_head * bh) -+{ -+ struct buffer_head *old_bh; -+ -+ old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size); -+ if (old_bh) { -+ mark_buffer_clean(old_bh); -+ wait_on_buffer(old_bh); -+ clear_bit(BH_Req, &old_bh->b_state); -+ __brelse(old_bh); -+ } -+} -+ -+int ext3_map_inode_page(struct inode *inode, struct page *page, -+ unsigned long *blocks, int *created, int create) -+{ -+ unsigned int blocksize, blocks_per_page; -+ unsigned long iblock; -+ void *handle; -+ int i, rc = 0, failed = 0, needed_blocks; -+ -+ blocksize = inode->i_sb->s_blocksize; -+ blocks_per_page = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; -+ iblock = page->index * blocks_per_page; -+ -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ blocks[i] = ext3_bmap(inode->i_mapping, iblock); -+ if (blocks[i] == 0) { -+ failed++; -+ if (created) -+ created[i] = -1; -+ } else if (created) { -+ created[i] = 0; -+ } -+ } -+ -+ if (failed == 0 || create == 0) -+ return 0; -+ -+ needed_blocks = ext3_writepage_trans_blocks(inode); -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ unlock_kernel(); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ -+ iblock = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++, iblock++) { -+ struct buffer_head bh; -+ -+ if (blocks[i] != 0) -+ continue; -+ -+ rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1); -+ if (rc) { -+ printk(KERN_INFO "ext3_map_inode_page: error %d " -+ "allocating block %ld\n", rc, iblock); -+ goto out; -+ } -+ /* Unmap any metadata buffers from the block mapping, to avoid -+ * data corruption due to direct-write from Lustre being -+ * clobbered by a later flush of the blockdev metadata buffer.*/ -+ if (buffer_new(&bh)) -+ unmap_underlying_metadata(&bh); -+ blocks[i] = bh.b_blocknr; -+ if (created) -+ created[i] = 1; -+ } -+ -+ out: -+ lock_kernel(); -+ ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ return rc; -+} diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch deleted file mode 100644 index 07ce289..0000000 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch +++ /dev/null @@ -1,3105 +0,0 @@ -Index: linux-2.6.16.i686/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16.i686.orig/include/linux/ext3_fs.h 2006-05-30 22:55:32.000000000 +0800 -+++ linux-2.6.16.i686/include/linux/ext3_fs.h 2006-05-30 23:02:59.000000000 +0800 -@@ -57,6 +57,14 @@ - #define ext3_debug(f, a...) do {} while (0) - #endif - -+#define EXT3_MULTIBLOCK_ALLOCATOR 1 -+ -+#define EXT3_MB_HINT_MERGE 1 -+#define EXT3_MB_HINT_RESERVED 2 -+#define EXT3_MB_HINT_METADATA 4 -+#define EXT3_MB_HINT_FIRST 8 -+#define EXT3_MB_HINT_BEST 16 -+ - /* - * Special inodes numbers - */ -@@ -383,6 +391,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x4000000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -404,6 +413,14 @@ - #define ext3_find_first_zero_bit ext2_find_first_zero_bit - #define ext3_find_next_zero_bit ext2_find_next_zero_bit - -+#ifndef ext2_find_next_le_bit -+#ifdef __LITTLE_ENDIAN -+#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) -+#else -+#error "mballoc needs a patch for big-endian systems - CFS bug 10634" -+#endif /* __LITTLE_ENDIAN */ -+#endif /* !ext2_find_next_le_bit */ -+ - /* - * Maximal mount counts between two filesystem checks - */ -@@ -744,7 +753,7 @@ - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); - extern void ext3_free_blocks_sb (handle_t *, struct super_block *, - unsigned long, unsigned long, int *); - extern unsigned long ext3_count_free_blocks (struct super_block *); -@@ -865,6 +874,17 @@ - extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); - -+/* mballoc.c */ -+extern long ext3_mb_stats; -+extern long ext3_mb_max_to_scan; -+extern int ext3_mb_init(struct super_block *, int); -+extern int ext3_mb_release(struct super_block *); -+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *); -+extern int ext3_mb_reserve_blocks(struct super_block *, int); -+extern void ext3_mb_release_blocks(struct super_block *, int); -+int __init init_ext3_proc(void); -+void exit_ext3_proc(void); -+ - #endif /* __KERNEL__ */ - - /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ -Index: linux-2.6.16.i686/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.16.i686.orig/include/linux/ext3_fs_sb.h 2006-03-20 13:53:29.000000000 +0800 -+++ linux-2.6.16.i686/include/linux/ext3_fs_sb.h 2006-05-30 23:02:59.000000000 +0800 -@@ -21,8 +21,14 @@ - #include - #include - #include -+#include - #endif - #include -+#include -+ -+struct ext3_buddy_group_blocks; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -@@ -78,6 +84,43 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ -+ /* for buddy allocator */ -+ struct ext3_group_info ***s_group_info; -+ struct inode *s_buddy_cache; -+ long s_blocks_reserved; -+ spinlock_t s_reserve_lock; -+ struct list_head s_active_transaction; -+ struct list_head s_closed_transaction; -+ struct list_head s_committed_transaction; -+ spinlock_t s_md_lock; -+ tid_t s_last_transaction; -+ int s_mb_factor; -+ unsigned short *s_mb_offsets, *s_mb_maxs; -+ unsigned long s_stripe; -+ -+ /* history to debug policy */ -+ struct ext3_mb_history *s_mb_history; -+ int s_mb_history_cur; -+ int s_mb_history_max; -+ struct proc_dir_entry *s_mb_proc; -+ spinlock_t s_mb_history_lock; -+ -+ /* stats for buddy allocator */ -+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -+ atomic_t s_bal_success; /* we found long enough chunks */ -+ atomic_t s_bal_allocated; /* in blocks */ -+ atomic_t s_bal_ex_scanned; /* total extents scanned */ -+ atomic_t s_bal_goals; /* goal hits */ -+ atomic_t s_bal_breaks; /* too long searches */ -+ atomic_t s_bal_2orders; /* 2^order hits */ -+ spinlock_t s_bal_lock; -+ unsigned long s_mb_buddies_generated; -+ unsigned long long s_mb_generation_time; - }; -+ -+#define EXT3_GROUP_INFO(sb, group) \ -+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ -+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] - - #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-2.6.16.i686/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/super.c 2006-05-30 22:55:32.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/super.c 2006-05-30 23:02:59.000000000 +0800 -@@ -392,6 +392,7 @@ static void ext3_put_super (struct super - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_mb_release(sb); - ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -640,6 +641,7 @@ enum { - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - Opt_grpquota - }; - -@@ -694,6 +695,9 @@ static match_table_t tokens = { - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -1041,6 +1043,19 @@ clear_qf_name: - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1766,6 +1771,7 @@ static int ext3_fill_super (struct super - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - lock_kernel(); - return 0; - -@@ -2699,7 +2705,13 @@ static struct file_system_type ext3_fs_t - - static int __init init_ext3_fs(void) - { -- int err = init_ext3_xattr(); -+ int err; -+ -+ err = init_ext3_proc(); -+ if (err) -+ return err; -+ -+ err = init_ext3_xattr(); - if (err) - return err; - err = init_inodecache(); -@@ -2721,6 +2733,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); - exit_ext3_xattr(); -+ exit_ext3_proc(); - } - - int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-2.6.16.i686/fs/ext3/extents.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/extents.c 2006-05-30 22:55:32.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/extents.c 2006-05-30 23:02:59.000000000 +0800 -@@ -771,7 +771,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -Index: linux-2.6.16.i686/fs/ext3/inode.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/inode.c 2006-05-30 22:55:32.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/inode.c 2006-05-30 23:02:59.000000000 +0800 -@@ -568,7 +568,7 @@ failed: - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); - return err; - } - -@@ -1862,7 +1862,7 @@ static void ext3_clear_blocks(handle_t * - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 1); - } - - /** -@@ -2035,7 +2035,7 @@ static void ext3_free_branches(handle_t - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* -Index: linux-2.6.16.i686/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/balloc.c 2006-03-20 13:53:29.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/balloc.c 2006-05-30 23:02:59.000000000 +0800 -@@ -80,7 +80,7 @@ struct ext3_group_desc * ext3_get_group_ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -491,24 +491,6 @@ error_return: - return; - } - --/* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks(handle_t *handle, struct inode *inode, -- unsigned long block, unsigned long count) --{ -- struct super_block * sb; -- int dquot_freed_blocks; -- -- sb = inode->i_sb; -- if (!sb) { -- printk ("ext3_free_blocks: nonexistent device"); -- return; -- } -- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); -- if (dquot_freed_blocks) -- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); -- return; --} -- - /* - * For ext3 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This -@@ -1154,7 +1136,7 @@ out: - * bitmap, and then for any free bit if that fails. - * This function also updates quota and i_blocks field. - */ --int ext3_new_block(handle_t *handle, struct inode *inode, -+int ext3_new_block_old(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) - { - struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.16.i686/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/xattr.c 2006-03-20 13:53:29.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/xattr.c 2006-05-30 23:02:59.000000000 +0800 -@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl - ea_bdebug(bh, "refcount now=0; freeing"); - if (ce) - mb_cache_entry_free(ce); -- ext3_free_blocks(handle, inode, bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - } else { -@@ -804,7 +804,7 @@ inserted: - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -Index: linux-2.6.16.i686/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/mballoc.c 2006-05-31 04:14:15.752410384 +0800 -+++ linux-2.6.16.i686/fs/ext3/mballoc.c 2006-05-30 23:03:38.000000000 +0800 -@@ -0,0 +1,2729 @@ -+/* -+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+ -+/* -+ * mballoc.c contains the multiblocks allocation routines -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * TODO: -+ * - bitmap read-ahead (proposed by Oleg Drokin aka green) -+ * - track min/max extents in each group for better group selection -+ * - mb_mark_used() may allocate chunk right after splitting buddy -+ * - special flag to advice allocator to look for requested + N blocks -+ * this may improve interaction between extents and mballoc -+ * - tree of groups sorted by number of free blocks -+ * - percpu reservation code (hotpath) -+ * - error handling -+ */ -+ -+/* -+ * with AGRESSIVE_CHECK allocator runs consistency checks over -+ * structures. these checks slow things down a lot -+ */ -+#define AGGRESSIVE_CHECK__ -+ -+/* -+ */ -+#define MB_DEBUG__ -+#ifdef MB_DEBUG -+#define mb_debug(fmt,a...) printk(fmt, ##a) -+#else -+#define mb_debug(fmt,a...) -+#endif -+ -+/* -+ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory -+ * and you can monitor it in /proc/fs/ext3//mb_history -+ */ -+#define EXT3_MB_HISTORY -+ -+/* -+ * How long mballoc can look for a best extent (in found extents) -+ */ -+long ext3_mb_max_to_scan = 500; -+ -+/* -+ * How long mballoc must look for a best extent -+ */ -+long ext3_mb_min_to_scan = 30; -+ -+/* -+ * with 'ext3_mb_stats' allocator will collect stats that will be -+ * shown at umount. The collecting costs though! -+ */ -+ -+long ext3_mb_stats = 1; -+ -+/* -+ * for which requests use 2^N search using buddies -+ */ -+long ext3_mb_order2_reqs = 8; -+ -+#ifdef EXT3_BB_MAX_BLOCKS -+#undef EXT3_BB_MAX_BLOCKS -+#endif -+#define EXT3_BB_MAX_BLOCKS 30 -+ -+struct ext3_free_metadata { -+ unsigned short group; -+ unsigned short num; -+ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; -+ struct list_head list; -+}; -+ -+struct ext3_group_info { -+ unsigned long bb_state; -+ unsigned long bb_tid; -+ struct ext3_free_metadata *bb_md_cur; -+ unsigned short bb_first_free; -+ unsigned short bb_free; -+ unsigned short bb_fragments; -+ unsigned short bb_counters[]; -+}; -+ -+ -+#define EXT3_GROUP_INFO_NEED_INIT_BIT 0 -+#define EXT3_GROUP_INFO_LOCKED_BIT 1 -+ -+#define EXT3_MB_GRP_NEED_INIT(grp) \ -+ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state)) -+ -+struct ext3_free_extent { -+ __u16 fe_start; -+ __u16 fe_len; -+ __u16 fe_group; -+}; -+ -+struct ext3_allocation_context { -+ struct super_block *ac_sb; -+ -+ /* search goals */ -+ struct ext3_free_extent ac_g_ex; -+ -+ /* the best found extent */ -+ struct ext3_free_extent ac_b_ex; -+ -+ /* number of iterations done. we have to track to limit searching */ -+ unsigned long ac_ex_scanned; -+ __u16 ac_groups_scanned; -+ __u16 ac_found; -+ __u16 ac_tail; -+ __u16 ac_buddy; -+ __u8 ac_status; -+ __u8 ac_flags; /* allocation hints */ -+ __u8 ac_criteria; -+ __u8 ac_repeats; -+ __u8 ac_2order; /* if request is to allocate 2^N blocks and -+ * N > 0, the field stores N, otherwise 0 */ -+ -+ struct page *ac_buddy_page; -+ struct page *ac_bitmap_page; -+}; -+ -+#define AC_STATUS_CONTINUE 1 -+#define AC_STATUS_FOUND 2 -+#define AC_STATUS_BREAK 3 -+ -+struct ext3_mb_history { -+ struct ext3_free_extent goal; /* goal allocation */ -+ struct ext3_free_extent result; /* result allocation */ -+ unsigned pid; -+ unsigned ino; -+ __u16 found; /* how many extents have been found */ -+ __u16 groups; /* how many groups have been scanned */ -+ __u16 tail; /* what tail broke some buddy */ -+ __u16 buddy; /* buddy the tail ^^^ broke */ -+ __u8 cr; /* which phase the result extent was found at */ -+ __u8 merged; -+}; -+ -+struct ext3_buddy { -+ struct page *bd_buddy_page; -+ void *bd_buddy; -+ struct page *bd_bitmap_page; -+ void *bd_bitmap; -+ struct ext3_group_info *bd_info; -+ struct super_block *bd_sb; -+ __u16 bd_blkbits; -+ __u16 bd_group; -+}; -+#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap) -+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy) -+ -+#ifndef EXT3_MB_HISTORY -+#define ext3_mb_store_history(sb,ino,ac) -+#else -+static void ext3_mb_store_history(struct super_block *, unsigned ino, -+ struct ext3_allocation_context *ac); -+#endif -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+static struct proc_dir_entry *proc_root_ext3; -+ -+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); -+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); -+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); -+int ext3_mb_reserve_blocks(struct super_block *, int); -+void ext3_mb_release_blocks(struct super_block *, int); -+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); -+void ext3_mb_free_committed_blocks(struct super_block *); -+ -+#if BITS_PER_LONG == 64 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 7UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~7UL); \ -+} -+#elif BITS_PER_LONG == 32 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 3UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~3UL); \ -+} -+#else -+#error "how many bits you are?!" -+#endif -+ -+static inline int mb_test_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ return ext2_test_bit(bit, addr); -+} -+ -+static inline void mb_set_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit(bit, addr); -+} -+ -+static inline void mb_set_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit_atomic(NULL, bit, addr); -+} -+ -+static inline void mb_clear_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit(bit, addr); -+} -+ -+static inline void mb_clear_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit_atomic(NULL, bit, addr); -+} -+ -+static inline int mb_find_next_zero_bit(void *addr, int max, int start) -+{ -+ int fix; -+#if BITS_PER_LONG == 64 -+ fix = ((unsigned long) addr & 7UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~7UL); -+#elif BITS_PER_LONG == 32 -+ fix = ((unsigned long) addr & 3UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~3UL); -+#else -+#error "how many bits you are?!" -+#endif -+ max += fix; -+ start += fix; -+ return ext2_find_next_zero_bit(addr, max, start) - fix; -+} -+ -+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) -+{ -+ char *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(max != NULL); -+ -+ if (order > e3b->bd_blkbits + 1) { -+ *max = 0; -+ return NULL; -+ } -+ -+ /* at order 0 we see each particular block */ -+ *max = 1 << (e3b->bd_blkbits + 3); -+ if (order == 0) -+ return EXT3_MB_BITMAP(e3b); -+ -+ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order]; -+ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order]; -+ -+ return bb; -+} -+ -+#ifdef AGGRESSIVE_CHECK -+ -+static void mb_check_buddy(struct ext3_buddy *e3b) -+{ -+ int order = e3b->bd_blkbits + 1; -+ int max, max2, i, j, k, count; -+ int fragments = 0, fstart; -+ void *buddy, *buddy2; -+ -+ if (!test_opt(e3b->bd_sb, MBALLOC)) -+ return; -+ -+ { -+ static int mb_check_counter = 0; -+ if (mb_check_counter++ % 300 != 0) -+ return; -+ } -+ -+ while (order > 1) { -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ buddy2 = mb_find_buddy(e3b, order - 1, &max2); -+ J_ASSERT(buddy2); -+ J_ASSERT(buddy != buddy2); -+ J_ASSERT(max * 2 == max2); -+ -+ count = 0; -+ for (i = 0; i < max; i++) { -+ -+ if (mb_test_bit(i, buddy)) { -+ /* only single bit in buddy2 may be 1 */ -+ if (!mb_test_bit(i << 1, buddy2)) -+ J_ASSERT(mb_test_bit((i<<1)+1, buddy2)); -+ else if (!mb_test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ continue; -+ } -+ -+ /* both bits in buddy2 must be 0 */ -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ J_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); -+ -+ for (j = 0; j < (1 << order); j++) { -+ k = (i * (1 << order)) + j; -+ J_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b))); -+ } -+ count++; -+ } -+ J_ASSERT(e3b->bd_info->bb_counters[order] == count); -+ order--; -+ } -+ -+ fstart = -1; -+ buddy = mb_find_buddy(e3b, 0, &max); -+ for (i = 0; i < max; i++) { -+ if (!mb_test_bit(i, buddy)) { -+ J_ASSERT(i >= e3b->bd_info->bb_first_free); -+ if (fstart == -1) { -+ fragments++; -+ fstart = i; -+ } -+ continue; -+ } -+ fstart = -1; -+ /* check used bits only */ -+ for (j = 0; j < e3b->bd_blkbits + 1; j++) { -+ buddy2 = mb_find_buddy(e3b, j, &max2); -+ k = i >> j; -+ J_ASSERT(k < max2); -+ J_ASSERT(mb_test_bit(k, buddy2)); -+ } -+ } -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(e3b->bd_info)); -+ J_ASSERT(e3b->bd_info->bb_fragments == fragments); -+} -+ -+#else -+#define mb_check_buddy(e3b) -+#endif -+ -+/* find most significant bit */ -+static int inline fmsb(unsigned short word) -+{ -+ int order; -+ -+ if (word > 255) { -+ order = 7; -+ word >>= 8; -+ } else { -+ order = -1; -+ } -+ -+ do { -+ order++; -+ word >>= 1; -+ } while (word != 0); -+ -+ return order; -+} -+ -+static void inline -+ext3_mb_mark_free_simple(struct super_block *sb, void *buddy, unsigned first, -+ int len, struct ext3_group_info *grp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned short min, max, chunk, border; -+ -+ mb_debug("mark %u/%u free\n", first, len); -+ J_ASSERT(len < EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ border = 2 << sb->s_blocksize_bits; -+ -+ while (len > 0) { -+ /* find how many blocks can be covered since this position */ -+ max = ffs(first | border) - 1; -+ -+ /* find how many blocks of power 2 we need to mark */ -+ min = fmsb(len); -+ -+ mb_debug(" %u/%u -> max %u, min %u\n", -+ first & ((2 << sb->s_blocksize_bits) - 1), -+ len, max, min); -+ -+ if (max < min) -+ min = max; -+ chunk = 1 << min; -+ -+ /* mark multiblock chunks only */ -+ grp->bb_counters[min]++; -+ if (min > 0) { -+ mb_debug(" set %u at %u \n", first >> min, -+ sbi->s_mb_offsets[min]); -+ mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); -+ } -+ -+ len -= chunk; -+ first += chunk; -+ } -+} -+ -+static void -+ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, -+ int group) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); -+ unsigned short i = 0, first, len; -+ unsigned free = 0, fragments = 0; -+ unsigned long long period = get_cycles(); -+ -+ i = mb_find_next_zero_bit(bitmap, max, 0); -+ grp->bb_first_free = i; -+ while (i < max) { -+ fragments++; -+ first = i; -+ i = ext2_find_next_le_bit(bitmap, max, i); -+ len = i - first; -+ free += len; -+ if (len > 1) -+ ext3_mb_mark_free_simple(sb, buddy, first, len, grp); -+ else -+ grp->bb_counters[0]++; -+ if (i < max) -+ i = mb_find_next_zero_bit(bitmap, max, i); -+ } -+ grp->bb_fragments = fragments; -+ -+ /* bb_state shouldn't being modified because all -+ * others waits for init completion on page lock */ -+ clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state); -+ if (free != grp->bb_free) { -+ printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n", -+ group, free, grp->bb_free); -+ grp->bb_free = free; -+ } -+ -+ period = get_cycles() - period; -+ spin_lock(&EXT3_SB(sb)->s_bal_lock); -+ EXT3_SB(sb)->s_mb_buddies_generated++; -+ EXT3_SB(sb)->s_mb_generation_time += period; -+ spin_unlock(&EXT3_SB(sb)->s_bal_lock); -+} -+ -+static int ext3_mb_init_cache(struct page *page) -+{ -+ int blocksize, blocks_per_page, groups_per_page; -+ int err = 0, i, first_group, first_block; -+ struct super_block *sb; -+ struct buffer_head *bhs; -+ struct buffer_head **bh; -+ struct inode *inode; -+ char *data, *bitmap; -+ -+ mb_debug("init page %lu\n", page->index); -+ -+ inode = page->mapping->host; -+ sb = inode->i_sb; -+ blocksize = 1 << inode->i_blkbits; -+ blocks_per_page = PAGE_CACHE_SIZE / blocksize; -+ -+ groups_per_page = blocks_per_page >> 1; -+ if (groups_per_page == 0) -+ groups_per_page = 1; -+ -+ /* allocate buffer_heads to read bitmaps */ -+ if (groups_per_page > 1) { -+ err = -ENOMEM; -+ i = sizeof(struct buffer_head *) * groups_per_page; -+ bh = kmalloc(i, GFP_NOFS); -+ if (bh == NULL) -+ goto out; -+ memset(bh, 0, i); -+ } else -+ bh = &bhs; -+ -+ first_group = page->index * blocks_per_page / 2; -+ -+ /* read all groups the page covers into the cache */ -+ for (i = 0; i < groups_per_page; i++) { -+ struct ext3_group_desc * desc; -+ -+ if (first_group + i >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ err = -EIO; -+ desc = ext3_get_group_desc(sb, first_group + i, NULL); -+ if (desc == NULL) -+ goto out; -+ -+ err = -ENOMEM; -+ bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (bh[i] == NULL) -+ goto out; -+ -+ if (buffer_uptodate(bh[i])) -+ continue; -+ -+ lock_buffer(bh[i]); -+ if (buffer_uptodate(bh[i])) { -+ unlock_buffer(bh[i]); -+ continue; -+ } -+ -+ get_bh(bh[i]); -+ bh[i]->b_end_io = end_buffer_read_sync; -+ submit_bh(READ, bh[i]); -+ mb_debug("read bitmap for group %u\n", first_group + i); -+ } -+ -+ /* wait for I/O completion */ -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ wait_on_buffer(bh[i]); -+ -+ err = -EIO; -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ if (!buffer_uptodate(bh[i])) -+ goto out; -+ -+ first_block = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++) { -+ int group; -+ -+ group = (first_block + i) >> 1; -+ if (group >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ data = page_address(page) + (i * blocksize); -+ bitmap = bh[group - first_group]->b_data; -+ -+ if ((first_block + i) & 1) { -+ /* this is block of buddy */ -+ mb_debug("put buddy for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memset(data, 0xff, blocksize); -+ EXT3_GROUP_INFO(sb, group)->bb_fragments = 0; -+ memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0, -+ sizeof(unsigned short)*(sb->s_blocksize_bits+2)); -+ ext3_mb_generate_buddy(sb, data, bitmap, group); -+ } else { -+ /* this is block of bitmap */ -+ mb_debug("put bitmap for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memcpy(data, bitmap, blocksize); -+ } -+ } -+ SetPageUptodate(page); -+ -+out: -+ if (bh) { -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ brelse(bh[i]); -+ if (bh != &bhs) -+ kfree(bh); -+ } -+ return err; -+} -+ -+static int ext3_mb_load_buddy(struct super_block *sb, int group, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *inode = sbi->s_buddy_cache; -+ int blocks_per_page, block, pnum, poff; -+ struct page *page; -+ -+ mb_debug("load group %u\n", group); -+ -+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; -+ -+ e3b->bd_blkbits = sb->s_blocksize_bits; -+ e3b->bd_info = EXT3_GROUP_INFO(sb, group); -+ e3b->bd_sb = sb; -+ e3b->bd_group = group; -+ e3b->bd_buddy_page = NULL; -+ e3b->bd_bitmap_page = NULL; -+ -+ block = group * 2; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ /* we could use find_or_create_page(), but it locks page -+ * what we'd like to avoid in fast path ... */ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_bitmap_page = page; -+ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ block++; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_buddy_page = page; -+ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ return 0; -+ -+err: -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+ e3b->bd_buddy = NULL; -+ e3b->bd_bitmap = NULL; -+ return -EIO; -+} -+ -+static void ext3_mb_release_desc(struct ext3_buddy *e3b) -+{ -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+} -+ -+ -+static inline void -+ext3_lock_group(struct super_block *sb, int group) -+{ -+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static inline void -+ext3_unlock_group(struct super_block *sb, int group) -+{ -+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) -+{ -+ int order = 1; -+ void *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); -+ -+ bb = EXT3_MB_BUDDY(e3b); -+ while (order <= e3b->bd_blkbits + 1) { -+ block = block >> 1; -+ if (!mb_test_bit(block, bb)) { -+ /* this block is part of buddy of order 'order' */ -+ return order; -+ } -+ bb += 1 << (e3b->bd_blkbits - order); -+ order++; -+ } -+ return 0; -+} -+ -+static inline void mb_clear_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0; -+ cur += 32; -+ continue; -+ } -+ mb_clear_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static inline void mb_set_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0xffffffff; -+ cur += 32; -+ continue; -+ } -+ mb_set_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) -+{ -+ int block = 0, max = 0, order; -+ void *buddy, *buddy2; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free += count; -+ if (first < e3b->bd_info->bb_first_free) -+ e3b->bd_info->bb_first_free = first; -+ -+ /* let's maintain fragments counter */ -+ if (first != 0) -+ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b)); -+ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b)); -+ if (block && max) -+ e3b->bd_info->bb_fragments--; -+ else if (!block && !max) -+ e3b->bd_info->bb_fragments++; -+ -+ /* let's maintain buddy itself */ -+ while (count-- > 0) { -+ block = first++; -+ order = 0; -+ -+ J_ASSERT(mb_test_bit(block, EXT3_MB_BITMAP(e3b))); -+ mb_clear_bit(block, EXT3_MB_BITMAP(e3b)); -+ e3b->bd_info->bb_counters[order]++; -+ -+ /* start of the buddy */ -+ buddy = mb_find_buddy(e3b, order, &max); -+ -+ do { -+ block &= ~1UL; -+ if (mb_test_bit(block, buddy) || -+ mb_test_bit(block + 1, buddy)) -+ break; -+ -+ /* both the buddies are free, try to coalesce them */ -+ buddy2 = mb_find_buddy(e3b, order + 1, &max); -+ -+ if (!buddy2) -+ break; -+ -+ if (order > 0) { -+ /* for special purposes, we don't set -+ * free bits in bitmap */ -+ mb_set_bit(block, buddy); -+ mb_set_bit(block + 1, buddy); -+ } -+ e3b->bd_info->bb_counters[order]--; -+ e3b->bd_info->bb_counters[order]--; -+ -+ block = block >> 1; -+ order++; -+ e3b->bd_info->bb_counters[order]++; -+ -+ mb_clear_bit(block, buddy2); -+ buddy = buddy2; -+ } while (1); -+ } -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block, -+ int needed, struct ext3_free_extent *ex) -+{ -+ int next = block, max, ord; -+ void *buddy; -+ -+ J_ASSERT(ex != NULL); -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ J_ASSERT(block < max); -+ if (mb_test_bit(block, buddy)) { -+ ex->fe_len = 0; -+ ex->fe_start = 0; -+ ex->fe_group = 0; -+ return 0; -+ } -+ -+ if (likely(order == 0)) { -+ /* find actual order */ -+ order = mb_find_order_for_block(e3b, block); -+ block = block >> order; -+ } -+ -+ ex->fe_len = 1 << order; -+ ex->fe_start = block << order; -+ ex->fe_group = e3b->bd_group; -+ -+ /* calc difference from given start */ -+ next = next - ex->fe_start; -+ ex->fe_len -= next; -+ ex->fe_start += next; -+ -+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) { -+ -+ if (block + 1 >= max) -+ break; -+ -+ next = (block + 1) * (1 << order); -+ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b))) -+ break; -+ -+ ord = mb_find_order_for_block(e3b, next); -+ -+ order = ord; -+ block = next >> order; -+ ex->fe_len += 1 << order; -+ } -+ -+ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); -+ return ex->fe_len; -+} -+ -+static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex) -+{ -+ int ord, mlen = 0, max = 0, cur; -+ int start = ex->fe_start; -+ int len = ex->fe_len; -+ unsigned ret = 0; -+ int len0 = len; -+ void *buddy; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free -= len; -+ if (e3b->bd_info->bb_first_free == start) -+ e3b->bd_info->bb_first_free += len; -+ -+ /* let's maintain fragments counter */ -+ if (start != 0) -+ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b)); -+ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b)); -+ if (mlen && max) -+ e3b->bd_info->bb_fragments++; -+ else if (!mlen && !max) -+ e3b->bd_info->bb_fragments--; -+ -+ /* let's maintain buddy itself */ -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ -+ if (((start >> ord) << ord) == start && len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ J_ASSERT((start >> ord) < max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ start += mlen; -+ len -= mlen; -+ J_ASSERT(len >= 0); -+ continue; -+ } -+ -+ /* store for history */ -+ if (ret == 0) -+ ret = len | (ord << 16); -+ -+ /* we have to split large buddy */ -+ J_ASSERT(ord > 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_clear_bit(cur, buddy); -+ mb_clear_bit(cur + 1, buddy); -+ e3b->bd_info->bb_counters[ord]++; -+ e3b->bd_info->bb_counters[ord]++; -+ } -+ -+ /* now drop all the bits in bitmap */ -+ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0); -+ -+ mb_check_buddy(e3b); -+ -+ return ret; -+} -+ -+/* -+ * Must be called under group lock! -+ */ -+static void ext3_mb_use_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ unsigned long ret; -+ -+ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); -+ ret = mb_mark_used(e3b, &ac->ac_b_ex); -+ -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_tail = ret & 0xffff; -+ ac->ac_buddy = ret >> 16; -+ -+ /* hold in-core structures until allocated -+ * blocks are marked non-free in on-disk bitmap */ -+ ac->ac_buddy_page = e3b->bd_buddy_page; -+ page_cache_get(e3b->bd_buddy_page); -+ ac->ac_bitmap_page = e3b->bd_bitmap_page; -+ page_cache_get(e3b->bd_bitmap_page); -+} -+ -+/* -+ * The routine checks whether found extent is good enough. If it is, -+ * then the extent gets marked used and flag is set to the context -+ * to stop scanning. Otherwise, the extent is compared with the -+ * previous found extent and if new one is better, then it's stored -+ * in the context. Later, the best found extent will be used, if -+ * mballoc can't find good enough extent. -+ * -+ * FIXME: real allocation policy is to be designed yet! -+ */ -+static void ext3_mb_measure_extent(struct ext3_allocation_context *ac, -+ struct ext3_free_extent *ex, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent *bex = &ac->ac_b_ex; -+ struct ext3_free_extent *gex = &ac->ac_g_ex; -+ -+ J_ASSERT(ex->fe_len > 0); -+ J_ASSERT(ex->fe_len < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ J_ASSERT(ex->fe_start < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ -+ ac->ac_found++; -+ -+ /* -+ * The special case - take what you catch first -+ */ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * Let's check whether the chunk is good enough -+ */ -+ if (ex->fe_len == gex->fe_len) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * If this is first found extent, just store it in the context -+ */ -+ if (bex->fe_len == 0) { -+ *bex = *ex; -+ return; -+ } -+ -+ /* -+ * If new found extent is better, store it in the context -+ */ -+ if (bex->fe_len < gex->fe_len) { -+ /* if the request isn't satisfied, any found extent -+ * larger than previous best one is better */ -+ if (ex->fe_len > bex->fe_len) -+ *bex = *ex; -+ } else if (ex->fe_len > gex->fe_len) { -+ /* if the request is satisfied, then we try to find -+ * an extent that still satisfy the request, but is -+ * smaller than previous one */ -+ *bex = *ex; -+ } -+ -+ /* -+ * Let's scan at least few extents and don't pick up a first one -+ */ -+ if (bex->fe_len > gex->fe_len && ac->ac_found > ext3_mb_min_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+ -+ /* -+ * We don't want to scan for a whole year -+ */ -+ if (ac->ac_found > ext3_mb_max_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+} -+ -+static int ext3_mb_try_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent ex = ac->ac_b_ex; -+ int group = ex.fe_group, max, err; -+ -+ J_ASSERT(ex.fe_len > 0); -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex); -+ -+ if (max > 0) { -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ int group = ac->ac_g_ex.fe_group, max, err; -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_super_block *es = sbi->s_es; -+ struct ext3_free_extent ex; -+ -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start, -+ ac->ac_g_ex.fe_len, &ex); -+ -+ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { -+ unsigned long start; -+ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) + -+ ex.fe_start + le32_to_cpu(es->s_first_data_block)); -+ if (start % sbi->s_stripe == 0) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ } else if (max >= ac->ac_g_ex.fe_len) { -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) { -+ /* Sometimes, caller may want to merge even small -+ * number of blocks to an existing extent */ -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+/* -+ * The routine scans buddy structures (not bitmap!) from given order -+ * to max order and tries to find big enough chunk to satisfy the req -+ */ -+static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_group_info *grp = e3b->bd_info; -+ void *buddy; -+ int i, k, max; -+ -+ J_ASSERT(ac->ac_2order > 0); -+ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { -+ if (grp->bb_counters[i] == 0) -+ continue; -+ -+ buddy = mb_find_buddy(e3b, i, &max); -+ if (buddy == NULL) { -+ printk(KERN_ALERT "looking for wrong order?\n"); -+ break; -+ } -+ -+ k = mb_find_next_zero_bit(buddy, max, 0); -+ J_ASSERT(k < max); -+ -+ ac->ac_found++; -+ -+ ac->ac_b_ex.fe_len = 1 << i; -+ ac->ac_b_ex.fe_start = k << i; -+ ac->ac_b_ex.fe_group = e3b->bd_group; -+ -+ ext3_mb_use_best_found(ac, e3b); -+ J_ASSERT(ac->ac_b_ex.fe_len == ac->ac_g_ex.fe_len); -+ -+ if (unlikely(ext3_mb_stats)) -+ atomic_inc(&EXT3_SB(sb)->s_bal_2orders); -+ -+ break; -+ } -+} -+ -+/* -+ * The routine scans the group and measures all found extents. -+ * In order to optimize scanning, caller must pass number of -+ * free blocks in the group, so the routine can know upper limit. -+ */ -+static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ int i, free; -+ -+ free = e3b->bd_info->bb_free; -+ J_ASSERT(free > 0); -+ -+ i = e3b->bd_info->bb_first_free; -+ -+ while (free && ac->ac_status == AC_STATUS_CONTINUE) { -+ i = mb_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i); -+ if (i >= sb->s_blocksize * 8) { -+ J_ASSERT(free == 0); -+ break; -+ } -+ -+ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex); -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(free >= ex.fe_len); -+ -+ ext3_mb_measure_extent(ac, &ex, e3b); -+ -+ i += ex.fe_len; -+ free -= ex.fe_len; -+ } -+} -+ -+/* -+ * This is a special case for storages like raid5 -+ * we try to find stripe-aligned chunks for stripe-size requests -+ */ -+static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ unsigned long i, max; -+ -+ J_ASSERT(sbi->s_stripe != 0); -+ -+ /* find first stripe-aligned block */ -+ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe; -+ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ -+ while (i < sb->s_blocksize * 8) { -+ if (!mb_test_bit(i, bitmap)) { -+ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex); -+ if (max >= sbi->s_stripe) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ break; -+ } -+ } -+ i += sbi->s_stripe; -+ } -+} -+ -+static int ext3_mb_good_group(struct ext3_allocation_context *ac, -+ int group, int cr) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group); -+ unsigned free, fragments, i, bits; -+ -+ J_ASSERT(cr >= 0 && cr < 4); -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(grp)); -+ -+ free = grp->bb_free; -+ fragments = grp->bb_fragments; -+ if (free == 0) -+ return 0; -+ if (fragments == 0) -+ return 0; -+ -+ switch (cr) { -+ case 0: -+ J_ASSERT(ac->ac_2order != 0); -+ bits = ac->ac_sb->s_blocksize_bits + 1; -+ for (i = ac->ac_2order; i <= bits; i++) -+ if (grp->bb_counters[i] > 0) -+ return 1; -+ break; -+ case 1: -+ if ((free / fragments) >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 2: -+ if (free >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 3: -+ return 1; -+ default: -+ BUG(); -+ } -+ -+ return 0; -+} -+ -+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *len, int flags, int *errp) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_allocation_context ac; -+ int i, group, block, cr, err = 0; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ struct buffer_head *gdp_bh; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ -+ J_ASSERT(len != NULL); -+ J_ASSERT(*len > 0); -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk("ext3_mb_new_nblocks: nonexistent device"); -+ return 0; -+ } -+ -+ if (!test_opt(sb, MBALLOC)) { -+ static int ext3_mballoc_warning = 0; -+ if (ext3_mballoc_warning == 0) { -+ printk(KERN_ERR "EXT3-fs: multiblock request with " -+ "mballoc disabled!\n"); -+ ext3_mballoc_warning++; -+ } -+ *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, errp); -+ return err; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ -+ /* -+ * We can't allocate > group size -+ */ -+ if (*len >= EXT3_BLOCKS_PER_GROUP(sb) - 10) -+ *len = EXT3_BLOCKS_PER_GROUP(sb) - 10; -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* someone asks for non-reserved blocks */ -+ BUG_ON(*len > 1); -+ err = ext3_mb_reserve_blocks(sb, 1); -+ if (err) { -+ *errp = err; -+ return 0; -+ } -+ } -+ -+ ac.ac_buddy_page = NULL; -+ ac.ac_bitmap_page = NULL; -+ -+ /* -+ * Check quota for allocation of this blocks. -+ */ -+ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) -+ *len -= 1; -+ if (*len == 0) { -+ *errp = -EDQUOT; -+ block = 0; -+ goto out; -+ } -+ -+ /* start searching from the goal */ -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ group = (goal - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ block = ((goal - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ /* set up allocation goals */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_groups_scanned = 0; -+ ac.ac_ex_scanned = 0; -+ ac.ac_found = 0; -+ ac.ac_sb = inode->i_sb; -+ ac.ac_g_ex.fe_group = group; -+ ac.ac_g_ex.fe_start = block; -+ ac.ac_g_ex.fe_len = *len; -+ ac.ac_flags = flags; -+ ac.ac_2order = 0; -+ ac.ac_criteria = 0; -+ -+ if (*len == 1 && sbi->s_stripe) { -+ /* looks like a metadata, let's use a dirty hack for raid5 -+ * move all metadata in first groups in hope to hit cached -+ * sectors and thus avoid read-modify cycles in raid5 */ -+ ac.ac_g_ex.fe_group = group = 0; -+ } -+ -+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */ -+ i = ffs(*len); -+ if (i >= ext3_mb_order2_reqs) { -+ i--; -+ if ((*len & (~(1 << i))) == 0) -+ ac.ac_2order = i; -+ } -+ -+ /* first, try the goal */ -+ err = ext3_mb_find_by_goal(&ac, &e3b); -+ if (err) -+ goto out_err; -+ if (ac.ac_status == AC_STATUS_FOUND) -+ goto found; -+ -+ /* Let's just scan groups to find more-less suitable blocks */ -+ cr = ac.ac_2order ? 0 : 1; -+repeat: -+ for (; cr < 4 && ac.ac_status == AC_STATUS_CONTINUE; cr++) { -+ ac.ac_criteria = cr; -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { -+ if (group == EXT3_SB(sb)->s_groups_count) -+ group = 0; -+ -+ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) { -+ /* we need full data about the group -+ * to make a good selection */ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ ext3_mb_release_desc(&e3b); -+ } -+ -+ /* check is group good for our criteries */ -+ if (!ext3_mb_good_group(&ac, group, cr)) -+ continue; -+ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ -+ ext3_lock_group(sb, group); -+ if (!ext3_mb_good_group(&ac, group, cr)) { -+ /* someone did allocation from this group */ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ continue; -+ } -+ -+ ac.ac_groups_scanned++; -+ if (cr == 0) -+ ext3_mb_simple_scan_group(&ac, &e3b); -+ else if (cr == 1 && *len == sbi->s_stripe) -+ ext3_mb_scan_aligned(&ac, &e3b); -+ else -+ ext3_mb_complex_scan_group(&ac, &e3b); -+ -+ ext3_unlock_group(sb, group); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ if (ac.ac_status != AC_STATUS_CONTINUE) -+ break; -+ } -+ } -+ -+ if (ac.ac_b_ex.fe_len > 0 && ac.ac_status != AC_STATUS_FOUND && -+ !(ac.ac_flags & EXT3_MB_HINT_FIRST)) { -+ /* -+ * We've been searching too long. Let's try to allocate -+ * the best chunk we've found so far -+ */ -+ -+ /*if (ac.ac_found > ext3_mb_max_to_scan) -+ printk(KERN_DEBUG "EXT3-fs: too long searching at " -+ "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len, -+ ac.ac_g_ex.fe_len);*/ -+ ext3_mb_try_best_found(&ac, &e3b); -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * Someone more lucky has already allocated it. -+ * The only thing we can do is just take first -+ * found block(s) -+ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n"); -+ */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_flags |= EXT3_MB_HINT_FIRST; -+ cr = 3; -+ goto repeat; -+ } -+ } -+ -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * We aren't lucky definitely -+ */ -+ DQUOT_FREE_BLOCK(inode, *len); -+ *errp = -ENOSPC; -+ block = 0; -+#if 1 -+ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n", -+ ac.ac_status, ac.ac_flags); -+ printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n", -+ ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group, -+ ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr); -+ printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n", -+ sbi->s_blocks_reserved, ac.ac_found); -+ printk("EXT3-fs: groups: "); -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) -+ printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free); -+ printk("\n"); -+#endif -+ goto out; -+ } -+ -+found: -+ J_ASSERT(ac.ac_b_ex.fe_len > 0); -+ -+ /* good news - free block(s) have been found. now it's time -+ * to mark block(s) in good old journaled bitmap */ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ /* we made a desicion, now mark found blocks in good old -+ * bitmap to be journaled */ -+ -+ ext3_debug("using block group %d(%d)\n", -+ ac.ac_b_group.group, gdp->bg_free_blocks_count); -+ -+ bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); -+ if (!bitmap_bh) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) { -+ *errp = err; -+ goto out_err; -+ } -+ -+ gdp = ext3_get_group_desc(sb, ac.ac_b_ex.fe_group, &gdp_bh); -+ if (!gdp) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, "ext3_new_block", -+ "Allocating block in system zone - " -+ "block = %u", block); -+#ifdef AGGRESSIVE_CHECK -+ for (i = 0; i < ac.ac_b_ex.fe_len; i++) -+ J_ASSERT(!mb_test_bit(ac.ac_b_ex.fe_start + i, bitmap_bh->b_data)); -+#endif -+ mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); -+ -+ spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -+ - ac.ac_b_ex.fe_len); -+ spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); -+ -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ err = ext3_journal_dirty_metadata(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ sb->s_dirt = 1; -+ *errp = 0; -+ brelse(bitmap_bh); -+ -+ /* drop non-allocated, but dquote'd blocks */ -+ J_ASSERT(*len >= ac.ac_b_ex.fe_len); -+ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_ex.fe_len); -+ -+ *len = ac.ac_b_ex.fe_len; -+ J_ASSERT(*len > 0); -+ J_ASSERT(block != 0); -+ goto out; -+ -+out_err: -+ /* if we've already allocated something, roll it back */ -+ if (ac.ac_status == AC_STATUS_FOUND) { -+ /* FIXME: free blocks here */ -+ } -+ -+ DQUOT_FREE_BLOCK(inode, *len); -+ brelse(bitmap_bh); -+ *errp = err; -+ block = 0; -+out: -+ if (ac.ac_buddy_page) -+ page_cache_release(ac.ac_buddy_page); -+ if (ac.ac_bitmap_page) -+ page_cache_release(ac.ac_bitmap_page); -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* block wasn't reserved before and we reserved it -+ * at the beginning of allocation. it doesn't matter -+ * whether we allocated anything or we failed: time -+ * to release reservation. NOTE: because I expect -+ * any multiblock request from delayed allocation -+ * path only, here is single block always */ -+ ext3_mb_release_blocks(sb, 1); -+ } -+ -+ if (unlikely(ext3_mb_stats) && ac.ac_g_ex.fe_len > 1) { -+ atomic_inc(&sbi->s_bal_reqs); -+ atomic_add(*len, &sbi->s_bal_allocated); -+ if (*len >= ac.ac_g_ex.fe_len) -+ atomic_inc(&sbi->s_bal_success); -+ atomic_add(ac.ac_found, &sbi->s_bal_ex_scanned); -+ if (ac.ac_g_ex.fe_start == ac.ac_b_ex.fe_start && -+ ac.ac_g_ex.fe_group == ac.ac_b_ex.fe_group) -+ atomic_inc(&sbi->s_bal_goals); -+ if (ac.ac_found > ext3_mb_max_to_scan) -+ atomic_inc(&sbi->s_bal_breaks); -+ } -+ -+ ext3_mb_store_history(sb, inode->i_ino, &ac); -+ -+ return block; -+} -+EXPORT_SYMBOL(ext3_mb_new_blocks); -+ -+#ifdef EXT3_MB_HISTORY -+struct ext3_mb_proc_session { -+ struct ext3_mb_history *history; -+ struct super_block *sb; -+ int start; -+ int max; -+}; -+ -+static void *ext3_mb_history_skip_empty(struct ext3_mb_proc_session *s, -+ struct ext3_mb_history *hs, -+ int first) -+{ -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (!first && hs == s->history + s->start) -+ return NULL; -+ while (hs->goal.fe_len == 0) { -+ hs++; -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (hs == s->history + s->start) -+ return NULL; -+ } -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs; -+ int l = *pos; -+ -+ if (l == 0) -+ return SEQ_START_TOKEN; -+ hs = ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ if (!hs) -+ return NULL; -+ while (--l && (hs = ext3_mb_history_skip_empty(s, ++hs, 0)) != NULL); -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs = v; -+ -+ ++*pos; -+ if (v == SEQ_START_TOKEN) -+ return ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ else -+ return ext3_mb_history_skip_empty(s, ++hs, 0); -+} -+ -+static int ext3_mb_seq_history_show(struct seq_file *seq, void *v) -+{ -+ struct ext3_mb_history *hs = v; -+ char buf[20], buf2[20]; -+ -+ if (v == SEQ_START_TOKEN) { -+ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n", -+ "pid", "inode", "goal", "result", "found", "grps", "cr", -+ "merge", "tail", "broken"); -+ return 0; -+ } -+ -+ sprintf(buf, "%u/%u/%u", hs->goal.fe_group, -+ hs->goal.fe_start, hs->goal.fe_len); -+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len); -+ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", -+ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups, -+ hs->cr, hs->merged ? "M" : "", hs->tail, -+ hs->buddy ? 1 << hs->buddy : 0); -+ return 0; -+} -+ -+static void ext3_mb_seq_history_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_history_ops = { -+ .start = ext3_mb_seq_history_start, -+ .next = ext3_mb_seq_history_next, -+ .stop = ext3_mb_seq_history_stop, -+ .show = ext3_mb_seq_history_show, -+}; -+ -+static int ext3_mb_seq_history_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -EIO; -+ size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max; -+ s->history = kmalloc(size, GFP_KERNEL); -+ if (s == NULL) { -+ kfree(s); -+ return -EIO; -+ } -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(s->history, sbi->s_mb_history, size); -+ s->max = sbi->s_mb_history_max; -+ s->start = sbi->s_mb_history_cur % s->max; -+ spin_unlock(&sbi->s_mb_history_lock); -+ -+ rc = seq_open(file, &ext3_mb_seq_history_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->history); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int ext3_mb_seq_history_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct ext3_mb_proc_session *s = seq->private; -+ kfree(s->history); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static struct file_operations ext3_mb_seq_history_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = ext3_mb_seq_history_release, -+}; -+ -+static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ -+ group = *pos + 1; -+ return (void *) group; -+} -+ -+static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ ++*pos; -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ group = *pos + 1; -+ return (void *) group;; -+} -+ -+static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v) -+{ -+ struct super_block *sb = seq->private; -+ long group = (long) v, i; -+ struct sg { -+ struct ext3_group_info info; -+ unsigned short counters[16]; -+ } sg; -+ -+ group--; -+ if (group == 0) -+ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", -+ "group", "free", "frags", "first", "2^0", "2^1", "2^2", -+ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10", -+ "2^11", "2^12", "2^13"); -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + -+ sizeof(struct ext3_group_info); -+ ext3_lock_group(sb, group); -+ memcpy(&sg, EXT3_GROUP_INFO(sb, group), i); -+ ext3_unlock_group(sb, group); -+ -+ if (EXT3_MB_GRP_NEED_INIT(&sg.info)) -+ return 0; -+ -+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, -+ sg.info.bb_fragments, sg.info.bb_first_free); -+ for (i = 0; i <= 13; i++) -+ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? -+ sg.info.bb_counters[i] : 0); -+ seq_printf(seq, " ]\n"); -+ -+ return 0; -+} -+ -+static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_groups_ops = { -+ .start = ext3_mb_seq_groups_start, -+ .next = ext3_mb_seq_groups_next, -+ .stop = ext3_mb_seq_groups_stop, -+ .show = ext3_mb_seq_groups_show, -+}; -+ -+static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ int rc; -+ -+ rc = seq_open(file, &ext3_mb_seq_groups_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = sb; -+ } -+ return rc; -+ -+} -+ -+static struct file_operations ext3_mb_seq_groups_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_groups_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static void ext3_mb_history_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ remove_proc_entry("mb_groups", sbi->s_mb_proc); -+ remove_proc_entry("mb_history", sbi->s_mb_proc); -+ remove_proc_entry(name, proc_root_ext3); -+ -+ if (sbi->s_mb_history) -+ kfree(sbi->s_mb_history); -+} -+ -+static void ext3_mb_history_init(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ int i; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ sbi->s_mb_proc = proc_mkdir(name, proc_root_ext3); -+ if (sbi->s_mb_proc != NULL) { -+ struct proc_dir_entry *p; -+ p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_history_fops; -+ p->data = sb; -+ } -+ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_groups_fops; -+ p->data = sb; -+ } -+ } -+ -+ sbi->s_mb_history_max = 1000; -+ sbi->s_mb_history_cur = 0; -+ spin_lock_init(&sbi->s_mb_history_lock); -+ i = sbi->s_mb_history_max * sizeof(struct ext3_mb_history); -+ sbi->s_mb_history = kmalloc(i, GFP_KERNEL); -+ memset(sbi->s_mb_history, 0, i); -+ /* if we can't allocate history, then we simple won't use it */ -+} -+ -+static void -+ext3_mb_store_history(struct super_block *sb, unsigned ino, -+ struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_history h; -+ -+ if (likely(sbi->s_mb_history == NULL)) -+ return; -+ -+ h.pid = current->pid; -+ h.ino = ino; -+ h.goal = ac->ac_g_ex; -+ h.result = ac->ac_b_ex; -+ h.found = ac->ac_found; -+ h.cr = ac->ac_criteria; -+ h.groups = ac->ac_groups_scanned; -+ h.tail = ac->ac_tail; -+ h.buddy = ac->ac_buddy; -+ h.merged = 0; -+ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && -+ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) -+ h.merged = 1; -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); -+ if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) -+ sbi->s_mb_history_cur = 0; -+ spin_unlock(&sbi->s_mb_history_lock); -+} -+ -+#else -+#define ext3_mb_history_release(sb) -+#define ext3_mb_history_init(sb) -+#endif -+ -+int ext3_mb_init_backend(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, j, len, metalen; -+ int num_meta_group_infos = -+ (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ struct ext3_group_info **meta_group_info; -+ -+ /* An 8TB filesystem with 64-bit pointers requires a 4096 byte -+ * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. -+ * So a two level scheme suffices for now. */ -+ sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * -+ num_meta_group_infos, GFP_KERNEL); -+ if (sbi->s_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n"); -+ return -ENOMEM; -+ } -+ sbi->s_buddy_cache = new_inode(sb); -+ if (sbi->s_buddy_cache == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't get new inode\n"); -+ goto err_freesgi; -+ } -+ -+ metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) { -+ if ((i + 1) == num_meta_group_infos) -+ metalen = sizeof(*meta_group_info) * -+ (sbi->s_groups_count - -+ (i << EXT3_DESC_PER_BLOCK_BITS(sb))); -+ meta_group_info = kmalloc(metalen, GFP_KERNEL); -+ if (meta_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate mem for a " -+ "buddy group\n"); -+ goto err_freemeta; -+ } -+ sbi->s_group_info[i] = meta_group_info; -+ } -+ -+ /* -+ * calculate needed size. if change bb_counters size, -+ * don't forget about ext3_mb_generate_buddy() -+ */ -+ len = sizeof(struct ext3_group_info); -+ len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ struct ext3_group_desc * desc; -+ -+ meta_group_info = -+ sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)]; -+ j = i & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ -+ meta_group_info[j] = kmalloc(len, GFP_KERNEL); -+ if (meta_group_info[j] == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n"); -+ i--; -+ goto err_freebuddy; -+ } -+ desc = ext3_get_group_desc(sb, i, NULL); -+ if (desc == NULL) { -+ printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); -+ goto err_freebuddy; -+ } -+ memset(meta_group_info[j], 0, len); -+ set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, -+ &meta_group_info[j]->bb_state); -+ meta_group_info[j]->bb_free = -+ le16_to_cpu(desc->bg_free_blocks_count); -+ } -+ -+ return 0; -+ -+err_freebuddy: -+ while (i >= 0) { -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ i--; -+ } -+ i = num_meta_group_infos; -+err_freemeta: -+ while (--i >= 0) -+ kfree(sbi->s_group_info[i]); -+ iput(sbi->s_buddy_cache); -+err_freesgi: -+ kfree(sbi->s_group_info); -+ return -ENOMEM; -+} -+ -+int ext3_mb_init(struct super_block *sb, int needs_recovery) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *root = sb->s_root->d_inode; -+ unsigned i, offset, max; -+ struct dentry *dentry; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); -+ -+ sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_offsets == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ return -ENOMEM; -+ } -+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_maxs == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ -+ /* order 0 is regular bitmap */ -+ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; -+ sbi->s_mb_offsets[0] = 0; -+ -+ i = 1; -+ offset = 0; -+ max = sb->s_blocksize << 2; -+ do { -+ sbi->s_mb_offsets[i] = offset; -+ sbi->s_mb_maxs[i] = max; -+ offset += 1 << (sb->s_blocksize_bits - i); -+ max = max >> 1; -+ i++; -+ } while (i <= sb->s_blocksize_bits + 1); -+ -+ /* init file for buddy data */ -+ if ((i = ext3_mb_init_backend(sb))) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return i; -+ } -+ -+ spin_lock_init(&sbi->s_reserve_lock); -+ spin_lock_init(&sbi->s_md_lock); -+ INIT_LIST_HEAD(&sbi->s_active_transaction); -+ INIT_LIST_HEAD(&sbi->s_closed_transaction); -+ INIT_LIST_HEAD(&sbi->s_committed_transaction); -+ spin_lock_init(&sbi->s_bal_lock); -+ -+ /* remove old on-disk buddy file */ -+ mutex_lock(&root->i_mutex); -+ dentry = lookup_one_len(".buddy", sb->s_root, strlen(".buddy")); -+ if (dentry->d_inode != NULL) { -+ i = vfs_unlink(root, dentry); -+ if (i != 0) -+ printk("EXT3-fs: can't remove .buddy file: %d\n", i); -+ } -+ dput(dentry); -+ mutex_unlock(&root->i_mutex); -+ -+ ext3_mb_history_init(sb); -+ -+ printk("EXT3-fs: mballoc enabled\n"); -+ return 0; -+} -+ -+int ext3_mb_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, num_meta_group_infos; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* release freed, non-committed blocks */ -+ spin_lock(&sbi->s_md_lock); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_committed_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ ext3_mb_free_committed_blocks(sb); -+ -+ if (sbi->s_group_info) { -+ for (i = 0; i < sbi->s_groups_count; i++) -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ num_meta_group_infos = (sbi->s_groups_count + -+ EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) -+ kfree(sbi->s_group_info[i]); -+ kfree(sbi->s_group_info); -+ } -+ if (sbi->s_mb_offsets) -+ kfree(sbi->s_mb_offsets); -+ if (sbi->s_mb_maxs) -+ kfree(sbi->s_mb_maxs); -+ if (sbi->s_buddy_cache) -+ iput(sbi->s_buddy_cache); -+ if (sbi->s_blocks_reserved) -+ printk("ext3-fs: %ld blocks being reserved at umount!\n", -+ sbi->s_blocks_reserved); -+ if (ext3_mb_stats) { -+ printk("EXT3-fs: mballoc: %u blocks %u reqs (%u success)\n", -+ atomic_read(&sbi->s_bal_allocated), -+ atomic_read(&sbi->s_bal_reqs), -+ atomic_read(&sbi->s_bal_success)); -+ printk("EXT3-fs: mballoc: %u extents scanned, %u goal hits, " -+ "%u 2^N hits, %u breaks\n", -+ atomic_read(&sbi->s_bal_ex_scanned), -+ atomic_read(&sbi->s_bal_goals), -+ atomic_read(&sbi->s_bal_2orders), -+ atomic_read(&sbi->s_bal_breaks)); -+ printk("EXT3-fs: mballoc: %lu generated and it took %Lu\n", -+ sbi->s_mb_buddies_generated++, -+ sbi->s_mb_generation_time); -+ } -+ -+ ext3_mb_history_release(sb); -+ -+ return 0; -+} -+ -+void ext3_mb_free_committed_blocks(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int err, i, count = 0, count2 = 0; -+ struct ext3_free_metadata *md; -+ struct ext3_buddy e3b; -+ -+ if (list_empty(&sbi->s_committed_transaction)) -+ return; -+ -+ /* there is committed blocks to be freed yet */ -+ do { -+ /* get next array of blocks */ -+ md = NULL; -+ spin_lock(&sbi->s_md_lock); -+ if (!list_empty(&sbi->s_committed_transaction)) { -+ md = list_entry(sbi->s_committed_transaction.next, -+ struct ext3_free_metadata, list); -+ list_del(&md->list); -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ if (md == NULL) -+ break; -+ -+ mb_debug("gonna free %u blocks in group %u (0x%p):", -+ md->num, md->group, md); -+ -+ err = ext3_mb_load_buddy(sb, md->group, &e3b); -+ /* we expect to find existing buddy because it's pinned */ -+ BUG_ON(err != 0); -+ -+ /* there are blocks to put in buddy to make them really free */ -+ count += md->num; -+ count2++; -+ ext3_lock_group(sb, md->group); -+ for (i = 0; i < md->num; i++) { -+ mb_debug(" %u", md->blocks[i]); -+ mb_free_blocks(&e3b, md->blocks[i], 1); -+ } -+ mb_debug("\n"); -+ ext3_unlock_group(sb, md->group); -+ -+ /* balance refcounts from ext3_mb_free_metadata() */ -+ page_cache_release(e3b.bd_buddy_page); -+ page_cache_release(e3b.bd_bitmap_page); -+ -+ kfree(md); -+ ext3_mb_release_desc(&e3b); -+ -+ } while (md); -+ mb_debug("freed %u blocks in %u structures\n", count, count2); -+} -+ -+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (sbi->s_last_transaction == handle->h_transaction->t_tid) -+ return; -+ -+ /* new transaction! time to close last one and free blocks for -+ * committed transaction. we know that only transaction can be -+ * active, so previos transaction can be being logged and we -+ * know that transaction before previous is known to be already -+ * logged. this means that now we may free blocks freed in all -+ * transactions before previous one. hope I'm clear enough ... */ -+ -+ spin_lock(&sbi->s_md_lock); -+ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { -+ mb_debug("new transaction %lu, old %lu\n", -+ (unsigned long) handle->h_transaction->t_tid, -+ (unsigned long) sbi->s_last_transaction); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_closed_transaction); -+ sbi->s_last_transaction = handle->h_transaction->t_tid; -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ ext3_mb_free_committed_blocks(sb); -+} -+ -+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, -+ int group, int block, int count) -+{ -+ struct ext3_group_info *db = e3b->bd_info; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_free_metadata *md; -+ int i; -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ ext3_lock_group(sb, group); -+ for (i = 0; i < count; i++) { -+ md = db->bb_md_cur; -+ if (md && db->bb_tid != handle->h_transaction->t_tid) { -+ db->bb_md_cur = NULL; -+ md = NULL; -+ } -+ -+ if (md == NULL) { -+ ext3_unlock_group(sb, group); -+ md = kmalloc(sizeof(*md), GFP_KERNEL); -+ if (md == NULL) -+ return -ENOMEM; -+ md->num = 0; -+ md->group = group; -+ -+ ext3_lock_group(sb, group); -+ if (db->bb_md_cur == NULL) { -+ spin_lock(&sbi->s_md_lock); -+ list_add(&md->list, &sbi->s_active_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ /* protect buddy cache from being freed, -+ * otherwise we'll refresh it from -+ * on-disk bitmap and lose not-yet-available -+ * blocks */ -+ page_cache_get(e3b->bd_buddy_page); -+ page_cache_get(e3b->bd_bitmap_page); -+ db->bb_md_cur = md; -+ db->bb_tid = handle->h_transaction->t_tid; -+ mb_debug("new md 0x%p for group %u\n", -+ md, md->group); -+ } else { -+ kfree(md); -+ md = db->bb_md_cur; -+ } -+ } -+ -+ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); -+ md->blocks[md->num] = block + i; -+ md->num++; -+ if (md->num == EXT3_BB_MAX_BLOCKS) { -+ /* no more space, put full container on a sb's list */ -+ db->bb_md_cur = NULL; -+ } -+ } -+ ext3_unlock_group(sb, group); -+ return 0; -+} -+ -+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, -+ unsigned long block, unsigned long count, -+ int metadata, int *freed) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ unsigned long bit, overflow; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ int err = 0, ret; -+ -+ *freed = 0; -+ sb = inode->i_sb; -+ if (!sb) { -+ printk ("ext3_free_blocks: nonexistent device"); -+ return; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks not in datazone - " -+ "block = %lu, count = %lu", block, count); -+ goto error_return; -+ } -+ -+ ext3_debug("freeing block %lu\n", block); -+ -+do_more: -+ overflow = 0; -+ block_group = (block - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ bit = (block - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); -+ if (!gdp) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks in system zones - " -+ "Block = %lu, count = %lu", -+ block, count); -+ -+ BUFFER_TRACE(bitmap_bh, "getting write access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ err = ext3_mb_load_buddy(sb, block_group, &e3b); -+ if (err) -+ goto error_return; -+ -+#ifdef AGGRESSIVE_CHECK -+ { -+ int i; -+ for (i = 0; i < count; i++) -+ J_ASSERT(mb_test_bit(bit + i, bitmap_bh->b_data)); -+ } -+#endif -+ mb_clear_bits(bitmap_bh->b_data, bit, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ if (metadata) { -+ /* blocks being freed are metadata. these blocks shouldn't -+ * be used until this transaction is committed */ -+ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); -+ } else { -+ ext3_lock_group(sb, block_group); -+ mb_free_blocks(&e3b, bit, count); -+ ext3_unlock_group(sb, block_group); -+ } -+ -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ *freed = count; -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3_std_error(sb, err); -+ return; -+} -+ -+int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int free, ret = -ENOSPC; -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ if (blocks <= free - sbi->s_blocks_reserved) { -+ sbi->s_blocks_reserved += blocks; -+ ret = 0; -+ } -+ spin_unlock(&sbi->s_reserve_lock); -+ return ret; -+} -+ -+void ext3_mb_release_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ sbi->s_blocks_reserved -= blocks; -+ WARN_ON(sbi->s_blocks_reserved < 0); -+ if (sbi->s_blocks_reserved < 0) -+ sbi->s_blocks_reserved = 0; -+ spin_unlock(&sbi->s_reserve_lock); -+} -+ -+int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *errp) -+{ -+ int ret, len; -+ -+ if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, errp); -+ goto out; -+ } -+ len = 1; -+ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); -+out: -+ return ret; -+} -+ -+ -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) -+{ -+ struct super_block *sb; -+ int freed; -+ -+ sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_sb(handle, sb, block, count, &freed); -+ else -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); -+ return; -+} -+ -+#define EXT3_ROOT "ext3" -+#define EXT3_MB_STATS_NAME "mb_stats" -+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan" -+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan" -+#define EXT3_MB_ORDER2_REQ "mb_order2_req" -+ -+static int ext3_mb_stats_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_stats); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_stats_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_STATS_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ ext3_mb_stats = (simple_strtol(str, NULL, 0) != 0); -+ return count; -+} -+ -+static int ext3_mb_max_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_max_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_max_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_max_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_min_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_min_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_min_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_order2_req_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_order2_req_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_order2_reqs = value; -+ -+ return count; -+} -+ -+int __init init_ext3_proc(void) -+{ -+ struct proc_dir_entry *proc_ext3_mb_stats; -+ struct proc_dir_entry *proc_ext3_mb_max_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_min_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_order2_req; -+ -+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs); -+ if (proc_root_ext3 == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT); -+ return -EIO; -+ } -+ -+ /* Initialize EXT3_MB_STATS_NAME */ -+ proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_stats == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_STATS_NAME); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_stats->data = NULL; -+ proc_ext3_mb_stats->read_proc = ext3_mb_stats_read; -+ proc_ext3_mb_stats->write_proc = ext3_mb_stats_write; -+ -+ /* Initialize EXT3_MAX_TO_SCAN_NAME */ -+ proc_ext3_mb_max_to_scan = create_proc_entry( -+ EXT3_MB_MAX_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_max_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MAX_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_max_to_scan->data = NULL; -+ proc_ext3_mb_max_to_scan->read_proc = ext3_mb_max_to_scan_read; -+ proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write; -+ -+ /* Initialize EXT3_MIN_TO_SCAN_NAME */ -+ proc_ext3_mb_min_to_scan = create_proc_entry( -+ EXT3_MB_MIN_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_min_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MIN_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_min_to_scan->data = NULL; -+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read; -+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write; -+ -+ /* Initialize EXT3_ORDER2_REQ */ -+ proc_ext3_mb_order2_req = create_proc_entry( -+ EXT3_MB_ORDER2_REQ, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_order2_req == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_ORDER2_REQ); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_order2_req->data = NULL; -+ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read; -+ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write; -+ -+ return 0; -+} -+ -+void exit_ext3_proc(void) -+{ -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+} -Index: linux-2.6.16.i686/fs/ext3/Makefile -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/Makefile 2006-05-30 22:55:32.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/Makefile 2006-05-30 23:02:59.000000000 +0800 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch deleted file mode 100644 index 1fb1b60..0000000 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ /dev/null @@ -1,3111 +0,0 @@ -Index: linux-2.6.5-7.282-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-7.282-full.orig/include/linux/ext3_fs.h 2006-10-24 22:18:28.000000000 +0400 -+++ linux-2.6.5-7.282-full/include/linux/ext3_fs.h 2006-10-24 22:18:28.000000000 +0400 -@@ -57,6 +57,14 @@ struct statfs; - #define ext3_debug(f, a...) do {} while (0) - #endif - -+#define EXT3_MULTIBLOCK_ALLOCATOR 1 -+ -+#define EXT3_MB_HINT_MERGE 1 -+#define EXT3_MB_HINT_RESERVED 2 -+#define EXT3_MB_HINT_METADATA 4 -+#define EXT3_MB_HINT_FIRST 8 -+#define EXT3_MB_HINT_BEST 16 -+ - /* - * Special inodes numbers - */ -@@ -339,6 +347,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -361,6 +370,14 @@ struct ext3_inode { - #define ext3_find_first_zero_bit ext2_find_first_zero_bit - #define ext3_find_next_zero_bit ext2_find_next_zero_bit - -+#ifndef ext2_find_next_le_bit -+#ifdef __LITTLE_ENDIAN -+#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) -+#else -+#error "mballoc needs a patch for big-endian systems - CFS bug 10634" -+#endif /* __LITTLE_ENDIAN */ -+#endif /* !ext2_find_next_le_bit */ -+ - /* - * Maximal mount counts between two filesystem checks - */ -@@ -700,7 +717,9 @@ extern int ext3_bg_has_super(struct supe - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); -+extern void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, -+ unsigned long); - extern unsigned long ext3_count_free_blocks (struct super_block *); - extern void ext3_check_blocks_bitmap (struct super_block *); - extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, -@@ -824,6 +843,17 @@ extern void ext3_extents_initialize_bloc - extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); - -+/* mballoc.c */ -+extern long ext3_mb_stats; -+extern long ext3_mb_max_to_scan; -+extern int ext3_mb_init(struct super_block *, int); -+extern int ext3_mb_release(struct super_block *); -+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *); -+extern int ext3_mb_reserve_blocks(struct super_block *, int); -+extern void ext3_mb_release_blocks(struct super_block *, int); -+int __init init_ext3_proc(void); -+void exit_ext3_proc(void); -+ - #endif /* __KERNEL__ */ - - #define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) -Index: linux-2.6.5-7.282-full/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.5-7.282-full.orig/include/linux/ext3_fs_sb.h 2006-10-24 22:18:28.000000000 +0400 -+++ linux-2.6.5-7.282-full/include/linux/ext3_fs_sb.h 2006-10-24 22:18:28.000000000 +0400 -@@ -23,9 +23,15 @@ - #define EXT_INCLUDE - #include - #include -+#include - #endif - #endif - #include -+#include -+ -+struct ext3_buddy_group_blocks; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -@@ -78,6 +84,43 @@ struct ext3_sb_info { - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+ -+ /* for buddy allocator */ -+ struct ext3_group_info ***s_group_info; -+ struct inode *s_buddy_cache; -+ long s_blocks_reserved; -+ spinlock_t s_reserve_lock; -+ struct list_head s_active_transaction; -+ struct list_head s_closed_transaction; -+ struct list_head s_committed_transaction; -+ spinlock_t s_md_lock; -+ tid_t s_last_transaction; -+ int s_mb_factor; -+ unsigned short *s_mb_offsets, *s_mb_maxs; -+ unsigned long s_stripe; -+ -+ /* history to debug policy */ -+ struct ext3_mb_history *s_mb_history; -+ int s_mb_history_cur; -+ int s_mb_history_max; -+ struct proc_dir_entry *s_mb_proc; -+ spinlock_t s_mb_history_lock; -+ -+ /* stats for buddy allocator */ -+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -+ atomic_t s_bal_success; /* we found long enough chunks */ -+ atomic_t s_bal_allocated; /* in blocks */ -+ atomic_t s_bal_ex_scanned; /* total extents scanned */ -+ atomic_t s_bal_goals; /* goal hits */ -+ atomic_t s_bal_breaks; /* too long searches */ -+ atomic_t s_bal_2orders; /* 2^order hits */ -+ spinlock_t s_bal_lock; -+ unsigned long s_mb_buddies_generated; -+ unsigned long long s_mb_generation_time; - }; - -+#define EXT3_GROUP_INFO(sb, group) \ -+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ -+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] -+ - #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-2.6.5-7.282-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.5-7.282-full.orig/fs/ext3/super.c 2006-10-24 22:18:28.000000000 +0400 -+++ linux-2.6.5-7.282-full/fs/ext3/super.c 2006-10-24 22:18:28.000000000 +0400 -@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_mb_release(sb); - ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -588,6 +589,7 @@ enum { - Opt_err, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - }; - - static match_table_t tokens = { -@@ -634,6 +636,9 @@ static match_table_t tokens = { - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL} - }; -@@ -859,6 +864,19 @@ static int parse_options (char * options - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1512,6 +1530,7 @@ static int ext3_fill_super (struct super - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - - return 0; - -@@ -2160,7 +2179,13 @@ static struct file_system_type ext3_fs_t - - static int __init init_ext3_fs(void) - { -- int err = init_ext3_xattr(); -+ int err; -+ -+ err = init_ext3_proc(); -+ if (err) -+ return err; -+ -+ err = init_ext3_xattr(); - if (err) - return err; - err = init_inodecache(); -@@ -2189,6 +2214,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); - exit_ext3_xattr(); -+ exit_ext3_proc(); - } - - int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-2.6.5-7.282-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.5-7.282-full.orig/fs/ext3/extents.c 2006-10-24 22:18:28.000000000 +0400 -+++ linux-2.6.5-7.282-full/fs/ext3/extents.c 2006-10-24 22:18:28.000000000 +0400 -@@ -779,7 +779,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1438,7 +1438,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -1923,10 +1923,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -1938,7 +1940,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -Index: linux-2.6.5-7.282-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.5-7.282-full.orig/fs/ext3/inode.c 2006-10-24 22:18:28.000000000 +0400 -+++ linux-2.6.5-7.282-full/fs/ext3/inode.c 2006-10-24 22:18:28.000000000 +0400 -@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); - return err; - } - -@@ -675,7 +675,7 @@ err_out: - if (err == -EAGAIN) - for (i = 0; i < num; i++) - ext3_free_blocks(handle, inode, -- le32_to_cpu(where[i].key), 1); -+ le32_to_cpu(where[i].key), 1, 1); - return err; - } - -@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 1); - } - - /** -@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* -Index: linux-2.6.5-7.282-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.5-7.282-full.orig/fs/ext3/balloc.c 2006-08-30 18:12:13.000000000 +0400 -+++ linux-2.6.5-7.282-full/fs/ext3/balloc.c 2006-10-24 22:18:28.000000000 +0400 -@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -274,7 +274,7 @@ void ext3_discard_reservation(struct ino - } - - /* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks(handle_t *handle, struct inode *inode, -+void ext3_free_blocks_old(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count) - { - struct buffer_head *bitmap_bh = NULL; -@@ -1142,7 +1142,7 @@ int ext3_should_retry_alloc(struct super - * bitmap, and then for any free bit if that fails. - * This function also updates quota and i_blocks field. - */ --int ext3_new_block(handle_t *handle, struct inode *inode, -+int ext3_new_block_old(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) - { - struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.5-7.282-full/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.5-7.282-full.orig/fs/ext3/xattr.c 2006-10-24 22:18:28.000000000 +0400 -+++ linux-2.6.5-7.282-full/fs/ext3/xattr.c 2006-10-24 22:18:28.000000000 +0400 -@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle, - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -@@ -1411,7 +1411,7 @@ getblk_failed: - if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { - /* Free the old block. */ - ea_bdebug(old_bh, "freeing"); -- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); - - /* ext3_forget() calls bforget() for us, but we - let our caller release old_bh, so we need to -@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle - mb_cache_entry_free(ce); - ce = NULL; - } -- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); -+ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); - } else { -Index: linux-2.6.5-7.282-full/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.5-7.282-full.orig/fs/ext3/mballoc.c 2006-10-23 18:07:54.821533176 +0400 -+++ linux-2.6.5-7.282-full/fs/ext3/mballoc.c 2006-10-24 22:20:45.000000000 +0400 -@@ -0,0 +1,2730 @@ -+/* -+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+ -+/* -+ * mballoc.c contains the multiblocks allocation routines -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * TODO: -+ * - bitmap read-ahead (proposed by Oleg Drokin aka green) -+ * - track min/max extents in each group for better group selection -+ * - mb_mark_used() may allocate chunk right after splitting buddy -+ * - special flag to advice allocator to look for requested + N blocks -+ * this may improve interaction between extents and mballoc -+ * - tree of groups sorted by number of free blocks -+ * - percpu reservation code (hotpath) -+ * - error handling -+ */ -+ -+/* -+ * with AGRESSIVE_CHECK allocator runs consistency checks over -+ * structures. these checks slow things down a lot -+ */ -+#define AGGRESSIVE_CHECK__ -+ -+/* -+ */ -+#define MB_DEBUG__ -+#ifdef MB_DEBUG -+#define mb_debug(fmt,a...) printk(fmt, ##a) -+#else -+#define mb_debug(fmt,a...) -+#endif -+ -+/* -+ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory -+ * and you can monitor it in /proc/fs/ext3//mb_history -+ */ -+#define EXT3_MB_HISTORY -+ -+/* -+ * How long mballoc can look for a best extent (in found extents) -+ */ -+long ext3_mb_max_to_scan = 500; -+ -+/* -+ * How long mballoc must look for a best extent -+ */ -+long ext3_mb_min_to_scan = 30; -+ -+/* -+ * with 'ext3_mb_stats' allocator will collect stats that will be -+ * shown at umount. The collecting costs though! -+ */ -+ -+long ext3_mb_stats = 1; -+ -+/* -+ * for which requests use 2^N search using buddies -+ */ -+long ext3_mb_order2_reqs = 8; -+ -+#ifdef EXT3_BB_MAX_BLOCKS -+#undef EXT3_BB_MAX_BLOCKS -+#endif -+#define EXT3_BB_MAX_BLOCKS 30 -+ -+struct ext3_free_metadata { -+ unsigned short group; -+ unsigned short num; -+ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; -+ struct list_head list; -+}; -+ -+struct ext3_group_info { -+ unsigned long bb_state; -+ unsigned long bb_tid; -+ struct ext3_free_metadata *bb_md_cur; -+ unsigned short bb_first_free; -+ unsigned short bb_free; -+ unsigned short bb_fragments; -+ unsigned short bb_counters[]; -+}; -+ -+ -+#define EXT3_GROUP_INFO_NEED_INIT_BIT 0 -+#define EXT3_GROUP_INFO_LOCKED_BIT 1 -+ -+#define EXT3_MB_GRP_NEED_INIT(grp) \ -+ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state)) -+ -+struct ext3_free_extent { -+ __u16 fe_start; -+ __u16 fe_len; -+ __u16 fe_group; -+}; -+ -+struct ext3_allocation_context { -+ struct super_block *ac_sb; -+ -+ /* search goals */ -+ struct ext3_free_extent ac_g_ex; -+ -+ /* the best found extent */ -+ struct ext3_free_extent ac_b_ex; -+ -+ /* number of iterations done. we have to track to limit searching */ -+ unsigned long ac_ex_scanned; -+ __u16 ac_groups_scanned; -+ __u16 ac_found; -+ __u16 ac_tail; -+ __u16 ac_buddy; -+ __u8 ac_status; -+ __u8 ac_flags; /* allocation hints */ -+ __u8 ac_criteria; -+ __u8 ac_repeats; -+ __u8 ac_2order; /* if request is to allocate 2^N blocks and -+ * N > 0, the field stores N, otherwise 0 */ -+ -+ struct page *ac_buddy_page; -+ struct page *ac_bitmap_page; -+}; -+ -+#define AC_STATUS_CONTINUE 1 -+#define AC_STATUS_FOUND 2 -+#define AC_STATUS_BREAK 3 -+ -+struct ext3_mb_history { -+ struct ext3_free_extent goal; /* goal allocation */ -+ struct ext3_free_extent result; /* result allocation */ -+ unsigned pid; -+ unsigned ino; -+ __u16 found; /* how many extents have been found */ -+ __u16 groups; /* how many groups have been scanned */ -+ __u16 tail; /* what tail broke some buddy */ -+ __u16 buddy; /* buddy the tail ^^^ broke */ -+ __u8 cr; /* which phase the result extent was found at */ -+ __u8 merged; -+}; -+ -+struct ext3_buddy { -+ struct page *bd_buddy_page; -+ void *bd_buddy; -+ struct page *bd_bitmap_page; -+ void *bd_bitmap; -+ struct ext3_group_info *bd_info; -+ struct super_block *bd_sb; -+ __u16 bd_blkbits; -+ __u16 bd_group; -+}; -+#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap) -+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy) -+ -+#ifndef EXT3_MB_HISTORY -+#define ext3_mb_store_history(sb,ino,ac) -+#else -+static void ext3_mb_store_history(struct super_block *, unsigned ino, -+ struct ext3_allocation_context *ac); -+#endif -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+static struct proc_dir_entry *proc_root_ext3; -+ -+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); -+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); -+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); -+int ext3_mb_reserve_blocks(struct super_block *, int); -+void ext3_mb_release_blocks(struct super_block *, int); -+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); -+void ext3_mb_free_committed_blocks(struct super_block *); -+ -+#if BITS_PER_LONG == 64 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 7UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~7UL); \ -+} -+#elif BITS_PER_LONG == 32 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 3UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~3UL); \ -+} -+#else -+#error "how many bits you are?!" -+#endif -+ -+static inline int mb_test_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ return ext2_test_bit(bit, addr); -+} -+ -+static inline void mb_set_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit(bit, addr); -+} -+ -+static inline void mb_set_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit_atomic(NULL, bit, addr); -+} -+ -+static inline void mb_clear_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit(bit, addr); -+} -+ -+static inline void mb_clear_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit_atomic(NULL, bit, addr); -+} -+ -+static inline int mb_find_next_zero_bit(void *addr, int max, int start) -+{ -+ int fix; -+#if BITS_PER_LONG == 64 -+ fix = ((unsigned long) addr & 7UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~7UL); -+#elif BITS_PER_LONG == 32 -+ fix = ((unsigned long) addr & 3UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~3UL); -+#else -+#error "how many bits you are?!" -+#endif -+ max += fix; -+ start += fix; -+ return ext2_find_next_zero_bit(addr, max, start) - fix; -+} -+ -+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) -+{ -+ char *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(max != NULL); -+ -+ if (order > e3b->bd_blkbits + 1) { -+ *max = 0; -+ return NULL; -+ } -+ -+ /* at order 0 we see each particular block */ -+ *max = 1 << (e3b->bd_blkbits + 3); -+ if (order == 0) -+ return EXT3_MB_BITMAP(e3b); -+ -+ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order]; -+ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order]; -+ -+ return bb; -+} -+ -+#ifdef AGGRESSIVE_CHECK -+ -+static void mb_check_buddy(struct ext3_buddy *e3b) -+{ -+ int order = e3b->bd_blkbits + 1; -+ int max, max2, i, j, k, count; -+ int fragments = 0, fstart; -+ void *buddy, *buddy2; -+ -+ if (!test_opt(e3b->bd_sb, MBALLOC)) -+ return; -+ -+ { -+ static int mb_check_counter = 0; -+ if (mb_check_counter++ % 300 != 0) -+ return; -+ } -+ -+ while (order > 1) { -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ buddy2 = mb_find_buddy(e3b, order - 1, &max2); -+ J_ASSERT(buddy2); -+ J_ASSERT(buddy != buddy2); -+ J_ASSERT(max * 2 == max2); -+ -+ count = 0; -+ for (i = 0; i < max; i++) { -+ -+ if (mb_test_bit(i, buddy)) { -+ /* only single bit in buddy2 may be 1 */ -+ if (!mb_test_bit(i << 1, buddy2)) -+ J_ASSERT(mb_test_bit((i<<1)+1, buddy2)); -+ else if (!mb_test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ continue; -+ } -+ -+ /* both bits in buddy2 must be 0 */ -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ J_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); -+ -+ for (j = 0; j < (1 << order); j++) { -+ k = (i * (1 << order)) + j; -+ J_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b))); -+ } -+ count++; -+ } -+ J_ASSERT(e3b->bd_info->bb_counters[order] == count); -+ order--; -+ } -+ -+ fstart = -1; -+ buddy = mb_find_buddy(e3b, 0, &max); -+ for (i = 0; i < max; i++) { -+ if (!mb_test_bit(i, buddy)) { -+ J_ASSERT(i >= e3b->bd_info->bb_first_free); -+ if (fstart == -1) { -+ fragments++; -+ fstart = i; -+ } -+ continue; -+ } -+ fstart = -1; -+ /* check used bits only */ -+ for (j = 0; j < e3b->bd_blkbits + 1; j++) { -+ buddy2 = mb_find_buddy(e3b, j, &max2); -+ k = i >> j; -+ J_ASSERT(k < max2); -+ J_ASSERT(mb_test_bit(k, buddy2)); -+ } -+ } -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(e3b->bd_info)); -+ J_ASSERT(e3b->bd_info->bb_fragments == fragments); -+} -+ -+#else -+#define mb_check_buddy(e3b) -+#endif -+ -+/* find most significant bit */ -+static int inline fmsb(unsigned short word) -+{ -+ int order; -+ -+ if (word > 255) { -+ order = 7; -+ word >>= 8; -+ } else { -+ order = -1; -+ } -+ -+ do { -+ order++; -+ word >>= 1; -+ } while (word != 0); -+ -+ return order; -+} -+ -+static void inline -+ext3_mb_mark_free_simple(struct super_block *sb, void *buddy, unsigned first, -+ int len, struct ext3_group_info *grp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned short min, max, chunk, border; -+ -+ mb_debug("mark %u/%u free\n", first, len); -+ J_ASSERT(len < EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ border = 2 << sb->s_blocksize_bits; -+ -+ while (len > 0) { -+ /* find how many blocks can be covered since this position */ -+ max = ffs(first | border) - 1; -+ -+ /* find how many blocks of power 2 we need to mark */ -+ min = fmsb(len); -+ -+ mb_debug(" %u/%u -> max %u, min %u\n", -+ first & ((2 << sb->s_blocksize_bits) - 1), -+ len, max, min); -+ -+ if (max < min) -+ min = max; -+ chunk = 1 << min; -+ -+ /* mark multiblock chunks only */ -+ grp->bb_counters[min]++; -+ if (min > 0) { -+ mb_debug(" set %u at %u \n", first >> min, -+ sbi->s_mb_offsets[min]); -+ mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); -+ } -+ -+ len -= chunk; -+ first += chunk; -+ } -+} -+ -+static void -+ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, -+ int group) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); -+ unsigned short i = 0, first, len; -+ unsigned free = 0, fragments = 0; -+ unsigned long long period = get_cycles(); -+ -+ i = mb_find_next_zero_bit(bitmap, max, 0); -+ grp->bb_first_free = i; -+ while (i < max) { -+ fragments++; -+ first = i; -+ i = ext2_find_next_le_bit(bitmap, max, i); -+ len = i - first; -+ free += len; -+ if (len > 1) -+ ext3_mb_mark_free_simple(sb, buddy, first, len, grp); -+ else -+ grp->bb_counters[0]++; -+ if (i < max) -+ i = mb_find_next_zero_bit(bitmap, max, i); -+ } -+ grp->bb_fragments = fragments; -+ -+ /* bb_state shouldn't being modified because all -+ * others waits for init completion on page lock */ -+ clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state); -+ if (free != grp->bb_free) { -+ printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n", -+ group, free, grp->bb_free); -+ grp->bb_free = free; -+ } -+ -+ period = get_cycles() - period; -+ spin_lock(&EXT3_SB(sb)->s_bal_lock); -+ EXT3_SB(sb)->s_mb_buddies_generated++; -+ EXT3_SB(sb)->s_mb_generation_time += period; -+ spin_unlock(&EXT3_SB(sb)->s_bal_lock); -+} -+ -+static int ext3_mb_init_cache(struct page *page) -+{ -+ int blocksize, blocks_per_page, groups_per_page; -+ int err = 0, i, first_group, first_block; -+ struct super_block *sb; -+ struct buffer_head *bhs; -+ struct buffer_head **bh; -+ struct inode *inode; -+ char *data, *bitmap; -+ -+ mb_debug("init page %lu\n", page->index); -+ -+ inode = page->mapping->host; -+ sb = inode->i_sb; -+ blocksize = 1 << inode->i_blkbits; -+ blocks_per_page = PAGE_CACHE_SIZE / blocksize; -+ -+ groups_per_page = blocks_per_page >> 1; -+ if (groups_per_page == 0) -+ groups_per_page = 1; -+ -+ /* allocate buffer_heads to read bitmaps */ -+ if (groups_per_page > 1) { -+ err = -ENOMEM; -+ i = sizeof(struct buffer_head *) * groups_per_page; -+ bh = kmalloc(i, GFP_NOFS); -+ if (bh == NULL) -+ goto out; -+ memset(bh, 0, i); -+ } else -+ bh = &bhs; -+ -+ first_group = page->index * blocks_per_page / 2; -+ -+ /* read all groups the page covers into the cache */ -+ for (i = 0; i < groups_per_page; i++) { -+ struct ext3_group_desc * desc; -+ -+ if (first_group + i >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ err = -EIO; -+ desc = ext3_get_group_desc(sb, first_group + i, NULL); -+ if (desc == NULL) -+ goto out; -+ -+ err = -ENOMEM; -+ bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (bh[i] == NULL) -+ goto out; -+ -+ if (buffer_uptodate(bh[i])) -+ continue; -+ -+ lock_buffer(bh[i]); -+ if (buffer_uptodate(bh[i])) { -+ unlock_buffer(bh[i]); -+ continue; -+ } -+ -+ get_bh(bh[i]); -+ bh[i]->b_end_io = end_buffer_read_sync; -+ submit_bh(READ, bh[i]); -+ mb_debug("read bitmap for group %u\n", first_group + i); -+ } -+ -+ /* wait for I/O completion */ -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ wait_on_buffer(bh[i]); -+ -+ err = -EIO; -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ if (!buffer_uptodate(bh[i])) -+ goto out; -+ -+ first_block = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++) { -+ int group; -+ -+ group = (first_block + i) >> 1; -+ if (group >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ data = page_address(page) + (i * blocksize); -+ bitmap = bh[group - first_group]->b_data; -+ -+ if ((first_block + i) & 1) { -+ /* this is block of buddy */ -+ mb_debug("put buddy for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memset(data, 0xff, blocksize); -+ EXT3_GROUP_INFO(sb, group)->bb_fragments = 0; -+ memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0, -+ sizeof(unsigned short)*(sb->s_blocksize_bits+2)); -+ ext3_mb_generate_buddy(sb, data, bitmap, group); -+ } else { -+ /* this is block of bitmap */ -+ mb_debug("put bitmap for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memcpy(data, bitmap, blocksize); -+ } -+ } -+ SetPageUptodate(page); -+ -+out: -+ if (bh) { -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ brelse(bh[i]); -+ if (bh != &bhs) -+ kfree(bh); -+ } -+ return err; -+} -+ -+static int ext3_mb_load_buddy(struct super_block *sb, int group, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *inode = sbi->s_buddy_cache; -+ int blocks_per_page, block, pnum, poff; -+ struct page *page; -+ -+ mb_debug("load group %u\n", group); -+ -+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; -+ -+ e3b->bd_blkbits = sb->s_blocksize_bits; -+ e3b->bd_info = EXT3_GROUP_INFO(sb, group); -+ e3b->bd_sb = sb; -+ e3b->bd_group = group; -+ e3b->bd_buddy_page = NULL; -+ e3b->bd_bitmap_page = NULL; -+ -+ block = group * 2; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ /* we could use find_or_create_page(), but it locks page -+ * what we'd like to avoid in fast path ... */ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_bitmap_page = page; -+ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ block++; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_buddy_page = page; -+ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ return 0; -+ -+err: -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+ e3b->bd_buddy = NULL; -+ e3b->bd_bitmap = NULL; -+ return -EIO; -+} -+ -+static void ext3_mb_release_desc(struct ext3_buddy *e3b) -+{ -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+} -+ -+ -+static inline void -+ext3_lock_group(struct super_block *sb, int group) -+{ -+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static inline void -+ext3_unlock_group(struct super_block *sb, int group) -+{ -+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) -+{ -+ int order = 1; -+ void *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); -+ -+ bb = EXT3_MB_BUDDY(e3b); -+ while (order <= e3b->bd_blkbits + 1) { -+ block = block >> 1; -+ if (!mb_test_bit(block, bb)) { -+ /* this block is part of buddy of order 'order' */ -+ return order; -+ } -+ bb += 1 << (e3b->bd_blkbits - order); -+ order++; -+ } -+ return 0; -+} -+ -+static inline void mb_clear_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0; -+ cur += 32; -+ continue; -+ } -+ mb_clear_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static inline void mb_set_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0xffffffff; -+ cur += 32; -+ continue; -+ } -+ mb_set_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) -+{ -+ int block = 0, max = 0, order; -+ void *buddy, *buddy2; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free += count; -+ if (first < e3b->bd_info->bb_first_free) -+ e3b->bd_info->bb_first_free = first; -+ -+ /* let's maintain fragments counter */ -+ if (first != 0) -+ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b)); -+ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b)); -+ if (block && max) -+ e3b->bd_info->bb_fragments--; -+ else if (!block && !max) -+ e3b->bd_info->bb_fragments++; -+ -+ /* let's maintain buddy itself */ -+ while (count-- > 0) { -+ block = first++; -+ order = 0; -+ -+ J_ASSERT(mb_test_bit(block, EXT3_MB_BITMAP(e3b))); -+ mb_clear_bit(block, EXT3_MB_BITMAP(e3b)); -+ e3b->bd_info->bb_counters[order]++; -+ -+ /* start of the buddy */ -+ buddy = mb_find_buddy(e3b, order, &max); -+ -+ do { -+ block &= ~1UL; -+ if (mb_test_bit(block, buddy) || -+ mb_test_bit(block + 1, buddy)) -+ break; -+ -+ /* both the buddies are free, try to coalesce them */ -+ buddy2 = mb_find_buddy(e3b, order + 1, &max); -+ -+ if (!buddy2) -+ break; -+ -+ if (order > 0) { -+ /* for special purposes, we don't set -+ * free bits in bitmap */ -+ mb_set_bit(block, buddy); -+ mb_set_bit(block + 1, buddy); -+ } -+ e3b->bd_info->bb_counters[order]--; -+ e3b->bd_info->bb_counters[order]--; -+ -+ block = block >> 1; -+ order++; -+ e3b->bd_info->bb_counters[order]++; -+ -+ mb_clear_bit(block, buddy2); -+ buddy = buddy2; -+ } while (1); -+ } -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block, -+ int needed, struct ext3_free_extent *ex) -+{ -+ int next = block, max, ord; -+ void *buddy; -+ -+ J_ASSERT(ex != NULL); -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ J_ASSERT(block < max); -+ if (mb_test_bit(block, buddy)) { -+ ex->fe_len = 0; -+ ex->fe_start = 0; -+ ex->fe_group = 0; -+ return 0; -+ } -+ -+ if (likely(order == 0)) { -+ /* find actual order */ -+ order = mb_find_order_for_block(e3b, block); -+ block = block >> order; -+ } -+ -+ ex->fe_len = 1 << order; -+ ex->fe_start = block << order; -+ ex->fe_group = e3b->bd_group; -+ -+ /* calc difference from given start */ -+ next = next - ex->fe_start; -+ ex->fe_len -= next; -+ ex->fe_start += next; -+ -+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) { -+ -+ if (block + 1 >= max) -+ break; -+ -+ next = (block + 1) * (1 << order); -+ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b))) -+ break; -+ -+ ord = mb_find_order_for_block(e3b, next); -+ -+ order = ord; -+ block = next >> order; -+ ex->fe_len += 1 << order; -+ } -+ -+ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); -+ return ex->fe_len; -+} -+ -+static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex) -+{ -+ int ord, mlen = 0, max = 0, cur; -+ int start = ex->fe_start; -+ int len = ex->fe_len; -+ unsigned ret = 0; -+ int len0 = len; -+ void *buddy; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free -= len; -+ if (e3b->bd_info->bb_first_free == start) -+ e3b->bd_info->bb_first_free += len; -+ -+ /* let's maintain fragments counter */ -+ if (start != 0) -+ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b)); -+ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b)); -+ if (mlen && max) -+ e3b->bd_info->bb_fragments++; -+ else if (!mlen && !max) -+ e3b->bd_info->bb_fragments--; -+ -+ /* let's maintain buddy itself */ -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ -+ if (((start >> ord) << ord) == start && len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ J_ASSERT((start >> ord) < max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ start += mlen; -+ len -= mlen; -+ J_ASSERT(len >= 0); -+ continue; -+ } -+ -+ /* store for history */ -+ if (ret == 0) -+ ret = len | (ord << 16); -+ -+ /* we have to split large buddy */ -+ J_ASSERT(ord > 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_clear_bit(cur, buddy); -+ mb_clear_bit(cur + 1, buddy); -+ e3b->bd_info->bb_counters[ord]++; -+ e3b->bd_info->bb_counters[ord]++; -+ } -+ -+ /* now drop all the bits in bitmap */ -+ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0); -+ -+ mb_check_buddy(e3b); -+ -+ return ret; -+} -+ -+/* -+ * Must be called under group lock! -+ */ -+static void ext3_mb_use_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ unsigned long ret; -+ -+ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); -+ ret = mb_mark_used(e3b, &ac->ac_b_ex); -+ -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_tail = ret & 0xffff; -+ ac->ac_buddy = ret >> 16; -+ -+ /* hold in-core structures until allocated -+ * blocks are marked non-free in on-disk bitmap */ -+ ac->ac_buddy_page = e3b->bd_buddy_page; -+ page_cache_get(e3b->bd_buddy_page); -+ ac->ac_bitmap_page = e3b->bd_bitmap_page; -+ page_cache_get(e3b->bd_bitmap_page); -+} -+ -+/* -+ * The routine checks whether found extent is good enough. If it is, -+ * then the extent gets marked used and flag is set to the context -+ * to stop scanning. Otherwise, the extent is compared with the -+ * previous found extent and if new one is better, then it's stored -+ * in the context. Later, the best found extent will be used, if -+ * mballoc can't find good enough extent. -+ * -+ * FIXME: real allocation policy is to be designed yet! -+ */ -+static void ext3_mb_measure_extent(struct ext3_allocation_context *ac, -+ struct ext3_free_extent *ex, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent *bex = &ac->ac_b_ex; -+ struct ext3_free_extent *gex = &ac->ac_g_ex; -+ -+ J_ASSERT(ex->fe_len > 0); -+ J_ASSERT(ex->fe_len < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ J_ASSERT(ex->fe_start < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ -+ ac->ac_found++; -+ -+ /* -+ * The special case - take what you catch first -+ */ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * Let's check whether the chunk is good enough -+ */ -+ if (ex->fe_len == gex->fe_len) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * If this is first found extent, just store it in the context -+ */ -+ if (bex->fe_len == 0) { -+ *bex = *ex; -+ return; -+ } -+ -+ /* -+ * If new found extent is better, store it in the context -+ */ -+ if (bex->fe_len < gex->fe_len) { -+ /* if the request isn't satisfied, any found extent -+ * larger than previous best one is better */ -+ if (ex->fe_len > bex->fe_len) -+ *bex = *ex; -+ } else if (ex->fe_len > gex->fe_len) { -+ /* if the request is satisfied, then we try to find -+ * an extent that still satisfy the request, but is -+ * smaller than previous one */ -+ *bex = *ex; -+ } -+ -+ /* -+ * Let's scan at least few extents and don't pick up a first one -+ */ -+ if (bex->fe_len > gex->fe_len && ac->ac_found > ext3_mb_min_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+ -+ /* -+ * We don't want to scan for a whole year -+ */ -+ if (ac->ac_found > ext3_mb_max_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+} -+ -+static int ext3_mb_try_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent ex = ac->ac_b_ex; -+ int group = ex.fe_group, max, err; -+ -+ J_ASSERT(ex.fe_len > 0); -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex); -+ -+ if (max > 0) { -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ int group = ac->ac_g_ex.fe_group, max, err; -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_super_block *es = sbi->s_es; -+ struct ext3_free_extent ex; -+ -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start, -+ ac->ac_g_ex.fe_len, &ex); -+ -+ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { -+ unsigned long start; -+ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) + -+ ex.fe_start + le32_to_cpu(es->s_first_data_block)); -+ if (start % sbi->s_stripe == 0) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ } else if (max >= ac->ac_g_ex.fe_len) { -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) { -+ /* Sometimes, caller may want to merge even small -+ * number of blocks to an existing extent */ -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+/* -+ * The routine scans buddy structures (not bitmap!) from given order -+ * to max order and tries to find big enough chunk to satisfy the req -+ */ -+static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_group_info *grp = e3b->bd_info; -+ void *buddy; -+ int i, k, max; -+ -+ J_ASSERT(ac->ac_2order > 0); -+ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { -+ if (grp->bb_counters[i] == 0) -+ continue; -+ -+ buddy = mb_find_buddy(e3b, i, &max); -+ if (buddy == NULL) { -+ printk(KERN_ALERT "looking for wrong order?\n"); -+ break; -+ } -+ -+ k = mb_find_next_zero_bit(buddy, max, 0); -+ J_ASSERT(k < max); -+ -+ ac->ac_found++; -+ -+ ac->ac_b_ex.fe_len = 1 << i; -+ ac->ac_b_ex.fe_start = k << i; -+ ac->ac_b_ex.fe_group = e3b->bd_group; -+ -+ ext3_mb_use_best_found(ac, e3b); -+ J_ASSERT(ac->ac_b_ex.fe_len == ac->ac_g_ex.fe_len); -+ -+ if (unlikely(ext3_mb_stats)) -+ atomic_inc(&EXT3_SB(sb)->s_bal_2orders); -+ -+ break; -+ } -+} -+ -+/* -+ * The routine scans the group and measures all found extents. -+ * In order to optimize scanning, caller must pass number of -+ * free blocks in the group, so the routine can know upper limit. -+ */ -+static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ int i, free; -+ -+ free = e3b->bd_info->bb_free; -+ J_ASSERT(free > 0); -+ -+ i = e3b->bd_info->bb_first_free; -+ -+ while (free && ac->ac_status == AC_STATUS_CONTINUE) { -+ i = mb_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i); -+ if (i >= sb->s_blocksize * 8) { -+ J_ASSERT(free == 0); -+ break; -+ } -+ -+ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex); -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(free >= ex.fe_len); -+ -+ ext3_mb_measure_extent(ac, &ex, e3b); -+ -+ i += ex.fe_len; -+ free -= ex.fe_len; -+ } -+} -+ -+/* -+ * This is a special case for storages like raid5 -+ * we try to find stripe-aligned chunks for stripe-size requests -+ */ -+static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ unsigned long i, max; -+ -+ J_ASSERT(sbi->s_stripe != 0); -+ -+ /* find first stripe-aligned block */ -+ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + le32_to_cpu(sbi->s_es->s_first_data_block); -+ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe; -+ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block)) -+ % EXT3_BLOCKS_PER_GROUP(sb); -+ -+ while (i < sb->s_blocksize * 8) { -+ if (!mb_test_bit(i, bitmap)) { -+ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex); -+ if (max >= sbi->s_stripe) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ break; -+ } -+ } -+ i += sbi->s_stripe; -+ } -+} -+ -+static int ext3_mb_good_group(struct ext3_allocation_context *ac, -+ int group, int cr) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group); -+ unsigned free, fragments, i, bits; -+ -+ J_ASSERT(cr >= 0 && cr < 4); -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(grp)); -+ -+ free = grp->bb_free; -+ fragments = grp->bb_fragments; -+ if (free == 0) -+ return 0; -+ if (fragments == 0) -+ return 0; -+ -+ switch (cr) { -+ case 0: -+ J_ASSERT(ac->ac_2order != 0); -+ bits = ac->ac_sb->s_blocksize_bits + 1; -+ for (i = ac->ac_2order; i <= bits; i++) -+ if (grp->bb_counters[i] > 0) -+ return 1; -+ break; -+ case 1: -+ if ((free / fragments) >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 2: -+ if (free >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 3: -+ return 1; -+ default: -+ BUG(); -+ } -+ -+ return 0; -+} -+ -+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *len, int flags, int *errp) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_allocation_context ac; -+ int i, group, block, cr, err = 0; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ struct buffer_head *gdp_bh; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ -+ J_ASSERT(len != NULL); -+ J_ASSERT(*len > 0); -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk("ext3_mb_new_nblocks: nonexistent device"); -+ return 0; -+ } -+ -+ if (!test_opt(sb, MBALLOC)) { -+ static int ext3_mballoc_warning = 0; -+ if (ext3_mballoc_warning == 0) { -+ printk(KERN_ERR "EXT3-fs: multiblock request with " -+ "mballoc disabled!\n"); -+ ext3_mballoc_warning++; -+ } -+ *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, errp); -+ return err; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ -+ /* -+ * We can't allocate > group size -+ */ -+ if (*len >= EXT3_BLOCKS_PER_GROUP(sb) - 10) -+ *len = EXT3_BLOCKS_PER_GROUP(sb) - 10; -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* someone asks for non-reserved blocks */ -+ BUG_ON(*len > 1); -+ err = ext3_mb_reserve_blocks(sb, 1); -+ if (err) { -+ *errp = err; -+ return 0; -+ } -+ } -+ -+ ac.ac_buddy_page = NULL; -+ ac.ac_bitmap_page = NULL; -+ -+ /* -+ * Check quota for allocation of this blocks. -+ */ -+ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) -+ *len -= 1; -+ if (*len == 0) { -+ *errp = -EDQUOT; -+ block = 0; -+ goto out; -+ } -+ -+ /* start searching from the goal */ -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ group = (goal - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ block = ((goal - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ /* set up allocation goals */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_groups_scanned = 0; -+ ac.ac_ex_scanned = 0; -+ ac.ac_found = 0; -+ ac.ac_sb = inode->i_sb; -+ ac.ac_g_ex.fe_group = group; -+ ac.ac_g_ex.fe_start = block; -+ ac.ac_g_ex.fe_len = *len; -+ ac.ac_flags = flags; -+ ac.ac_2order = 0; -+ ac.ac_criteria = 0; -+ -+ if (*len == 1 && sbi->s_stripe) { -+ /* looks like a metadata, let's use a dirty hack for raid5 -+ * move all metadata in first groups in hope to hit cached -+ * sectors and thus avoid read-modify cycles in raid5 */ -+ ac.ac_g_ex.fe_group = group = 0; -+ } -+ -+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */ -+ i = ffs(*len); -+ if (i >= ext3_mb_order2_reqs) { -+ i--; -+ if ((*len & (~(1 << i))) == 0) -+ ac.ac_2order = i; -+ } -+ -+ /* first, try the goal */ -+ err = ext3_mb_find_by_goal(&ac, &e3b); -+ if (err) -+ goto out_err; -+ if (ac.ac_status == AC_STATUS_FOUND) -+ goto found; -+ -+ /* Let's just scan groups to find more-less suitable blocks */ -+ cr = ac.ac_2order ? 0 : 1; -+repeat: -+ for (; cr < 4 && ac.ac_status == AC_STATUS_CONTINUE; cr++) { -+ ac.ac_criteria = cr; -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { -+ if (group == EXT3_SB(sb)->s_groups_count) -+ group = 0; -+ -+ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) { -+ /* we need full data about the group -+ * to make a good selection */ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ ext3_mb_release_desc(&e3b); -+ } -+ -+ /* check is group good for our criteries */ -+ if (!ext3_mb_good_group(&ac, group, cr)) -+ continue; -+ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ -+ ext3_lock_group(sb, group); -+ if (!ext3_mb_good_group(&ac, group, cr)) { -+ /* someone did allocation from this group */ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ continue; -+ } -+ -+ ac.ac_groups_scanned++; -+ if (cr == 0) -+ ext3_mb_simple_scan_group(&ac, &e3b); -+ else if (cr == 1 && *len == sbi->s_stripe) -+ ext3_mb_scan_aligned(&ac, &e3b); -+ else -+ ext3_mb_complex_scan_group(&ac, &e3b); -+ -+ ext3_unlock_group(sb, group); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ if (ac.ac_status != AC_STATUS_CONTINUE) -+ break; -+ } -+ } -+ -+ if (ac.ac_b_ex.fe_len > 0 && ac.ac_status != AC_STATUS_FOUND && -+ !(ac.ac_flags & EXT3_MB_HINT_FIRST)) { -+ /* -+ * We've been searching too long. Let's try to allocate -+ * the best chunk we've found so far -+ */ -+ -+ /*if (ac.ac_found > ext3_mb_max_to_scan) -+ printk(KERN_DEBUG "EXT3-fs: too long searching at " -+ "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len, -+ ac.ac_g_ex.fe_len);*/ -+ ext3_mb_try_best_found(&ac, &e3b); -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * Someone more lucky has already allocated it. -+ * The only thing we can do is just take first -+ * found block(s) -+ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n"); -+ */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_flags |= EXT3_MB_HINT_FIRST; -+ cr = 3; -+ goto repeat; -+ } -+ } -+ -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * We aren't lucky definitely -+ */ -+ DQUOT_FREE_BLOCK(inode, *len); -+ *errp = -ENOSPC; -+ block = 0; -+#if 1 -+ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n", -+ ac.ac_status, ac.ac_flags); -+ printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n", -+ ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group, -+ ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr); -+ printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n", -+ sbi->s_blocks_reserved, ac.ac_found); -+ printk("EXT3-fs: groups: "); -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) -+ printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free); -+ printk("\n"); -+#endif -+ goto out; -+ } -+ -+found: -+ J_ASSERT(ac.ac_b_ex.fe_len > 0); -+ -+ /* good news - free block(s) have been found. now it's time -+ * to mark block(s) in good old journaled bitmap */ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ /* we made a desicion, now mark found blocks in good old -+ * bitmap to be journaled */ -+ -+ ext3_debug("using block group %d(%d)\n", -+ ac.ac_b_group.group, gdp->bg_free_blocks_count); -+ -+ bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); -+ if (!bitmap_bh) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) { -+ *errp = err; -+ goto out_err; -+ } -+ -+ gdp = ext3_get_group_desc(sb, ac.ac_b_ex.fe_group, &gdp_bh); -+ if (!gdp) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, "ext3_new_block", -+ "Allocating block in system zone - " -+ "block = %u", block); -+#ifdef AGGRESSIVE_CHECK -+ for (i = 0; i < ac.ac_b_ex.fe_len; i++) -+ J_ASSERT(!mb_test_bit(ac.ac_b_ex.fe_start + i, bitmap_bh->b_data)); -+#endif -+ mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); -+ -+ spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -+ - ac.ac_b_ex.fe_len); -+ spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); -+ -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ err = ext3_journal_dirty_metadata(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ sb->s_dirt = 1; -+ *errp = 0; -+ brelse(bitmap_bh); -+ -+ /* drop non-allocated, but dquote'd blocks */ -+ J_ASSERT(*len >= ac.ac_b_ex.fe_len); -+ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_ex.fe_len); -+ -+ *len = ac.ac_b_ex.fe_len; -+ J_ASSERT(*len > 0); -+ J_ASSERT(block != 0); -+ goto out; -+ -+out_err: -+ /* if we've already allocated something, roll it back */ -+ if (ac.ac_status == AC_STATUS_FOUND) { -+ /* FIXME: free blocks here */ -+ } -+ -+ DQUOT_FREE_BLOCK(inode, *len); -+ brelse(bitmap_bh); -+ *errp = err; -+ block = 0; -+out: -+ if (ac.ac_buddy_page) -+ page_cache_release(ac.ac_buddy_page); -+ if (ac.ac_bitmap_page) -+ page_cache_release(ac.ac_bitmap_page); -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* block wasn't reserved before and we reserved it -+ * at the beginning of allocation. it doesn't matter -+ * whether we allocated anything or we failed: time -+ * to release reservation. NOTE: because I expect -+ * any multiblock request from delayed allocation -+ * path only, here is single block always */ -+ ext3_mb_release_blocks(sb, 1); -+ } -+ -+ if (unlikely(ext3_mb_stats) && ac.ac_g_ex.fe_len > 1) { -+ atomic_inc(&sbi->s_bal_reqs); -+ atomic_add(*len, &sbi->s_bal_allocated); -+ if (*len >= ac.ac_g_ex.fe_len) -+ atomic_inc(&sbi->s_bal_success); -+ atomic_add(ac.ac_found, &sbi->s_bal_ex_scanned); -+ if (ac.ac_g_ex.fe_start == ac.ac_b_ex.fe_start && -+ ac.ac_g_ex.fe_group == ac.ac_b_ex.fe_group) -+ atomic_inc(&sbi->s_bal_goals); -+ if (ac.ac_found > ext3_mb_max_to_scan) -+ atomic_inc(&sbi->s_bal_breaks); -+ } -+ -+ ext3_mb_store_history(sb, inode->i_ino, &ac); -+ -+ return block; -+} -+EXPORT_SYMBOL(ext3_mb_new_blocks); -+ -+#ifdef EXT3_MB_HISTORY -+struct ext3_mb_proc_session { -+ struct ext3_mb_history *history; -+ struct super_block *sb; -+ int start; -+ int max; -+}; -+ -+static void *ext3_mb_history_skip_empty(struct ext3_mb_proc_session *s, -+ struct ext3_mb_history *hs, -+ int first) -+{ -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (!first && hs == s->history + s->start) -+ return NULL; -+ while (hs->goal.fe_len == 0) { -+ hs++; -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (hs == s->history + s->start) -+ return NULL; -+ } -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs; -+ int l = *pos; -+ -+ if (l == 0) -+ return SEQ_START_TOKEN; -+ hs = ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ if (!hs) -+ return NULL; -+ while (--l && (hs = ext3_mb_history_skip_empty(s, ++hs, 0)) != NULL); -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs = v; -+ -+ ++*pos; -+ if (v == SEQ_START_TOKEN) -+ return ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ else -+ return ext3_mb_history_skip_empty(s, ++hs, 0); -+} -+ -+static int ext3_mb_seq_history_show(struct seq_file *seq, void *v) -+{ -+ struct ext3_mb_history *hs = v; -+ char buf[20], buf2[20]; -+ -+ if (v == SEQ_START_TOKEN) { -+ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n", -+ "pid", "inode", "goal", "result", "found", "grps", "cr", -+ "merge", "tail", "broken"); -+ return 0; -+ } -+ -+ sprintf(buf, "%u/%u/%u", hs->goal.fe_group, -+ hs->goal.fe_start, hs->goal.fe_len); -+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len); -+ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", -+ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups, -+ hs->cr, hs->merged ? "M" : "", hs->tail, -+ hs->buddy ? 1 << hs->buddy : 0); -+ return 0; -+} -+ -+static void ext3_mb_seq_history_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_history_ops = { -+ .start = ext3_mb_seq_history_start, -+ .next = ext3_mb_seq_history_next, -+ .stop = ext3_mb_seq_history_stop, -+ .show = ext3_mb_seq_history_show, -+}; -+ -+static int ext3_mb_seq_history_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -EIO; -+ size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max; -+ s->history = kmalloc(size, GFP_KERNEL); -+ if (s == NULL) { -+ kfree(s); -+ return -EIO; -+ } -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(s->history, sbi->s_mb_history, size); -+ s->max = sbi->s_mb_history_max; -+ s->start = sbi->s_mb_history_cur % s->max; -+ spin_unlock(&sbi->s_mb_history_lock); -+ -+ rc = seq_open(file, &ext3_mb_seq_history_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->history); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int ext3_mb_seq_history_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct ext3_mb_proc_session *s = seq->private; -+ kfree(s->history); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static struct file_operations ext3_mb_seq_history_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = ext3_mb_seq_history_release, -+}; -+ -+static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ -+ group = *pos + 1; -+ return (void *) group; -+} -+ -+static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ ++*pos; -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ group = *pos + 1; -+ return (void *) group;; -+} -+ -+static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v) -+{ -+ struct super_block *sb = seq->private; -+ long group = (long) v, i; -+ struct sg { -+ struct ext3_group_info info; -+ unsigned short counters[16]; -+ } sg; -+ -+ group--; -+ if (group == 0) -+ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", -+ "group", "free", "frags", "first", "2^0", "2^1", "2^2", -+ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10", -+ "2^11", "2^12", "2^13"); -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + -+ sizeof(struct ext3_group_info); -+ ext3_lock_group(sb, group); -+ memcpy(&sg, EXT3_GROUP_INFO(sb, group), i); -+ ext3_unlock_group(sb, group); -+ -+ if (EXT3_MB_GRP_NEED_INIT(&sg.info)) -+ return 0; -+ -+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, -+ sg.info.bb_fragments, sg.info.bb_first_free); -+ for (i = 0; i <= 13; i++) -+ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? -+ sg.info.bb_counters[i] : 0); -+ seq_printf(seq, " ]\n"); -+ -+ return 0; -+} -+ -+static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_groups_ops = { -+ .start = ext3_mb_seq_groups_start, -+ .next = ext3_mb_seq_groups_next, -+ .stop = ext3_mb_seq_groups_stop, -+ .show = ext3_mb_seq_groups_show, -+}; -+ -+static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ int rc; -+ -+ rc = seq_open(file, &ext3_mb_seq_groups_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = sb; -+ } -+ return rc; -+ -+} -+ -+static struct file_operations ext3_mb_seq_groups_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_groups_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static void ext3_mb_history_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ remove_proc_entry("mb_groups", sbi->s_mb_proc); -+ remove_proc_entry("mb_history", sbi->s_mb_proc); -+ remove_proc_entry(name, proc_root_ext3); -+ -+ if (sbi->s_mb_history) -+ kfree(sbi->s_mb_history); -+} -+ -+static void ext3_mb_history_init(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ int i; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ sbi->s_mb_proc = proc_mkdir(name, proc_root_ext3); -+ if (sbi->s_mb_proc != NULL) { -+ struct proc_dir_entry *p; -+ p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_history_fops; -+ p->data = sb; -+ } -+ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_groups_fops; -+ p->data = sb; -+ } -+ } -+ -+ sbi->s_mb_history_max = 1000; -+ sbi->s_mb_history_cur = 0; -+ spin_lock_init(&sbi->s_mb_history_lock); -+ i = sbi->s_mb_history_max * sizeof(struct ext3_mb_history); -+ sbi->s_mb_history = kmalloc(i, GFP_KERNEL); -+ memset(sbi->s_mb_history, 0, i); -+ /* if we can't allocate history, then we simple won't use it */ -+} -+ -+static void -+ext3_mb_store_history(struct super_block *sb, unsigned ino, -+ struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_history h; -+ -+ if (likely(sbi->s_mb_history == NULL)) -+ return; -+ -+ h.pid = current->pid; -+ h.ino = ino; -+ h.goal = ac->ac_g_ex; -+ h.result = ac->ac_b_ex; -+ h.found = ac->ac_found; -+ h.cr = ac->ac_criteria; -+ h.groups = ac->ac_groups_scanned; -+ h.tail = ac->ac_tail; -+ h.buddy = ac->ac_buddy; -+ h.merged = 0; -+ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && -+ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) -+ h.merged = 1; -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); -+ if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) -+ sbi->s_mb_history_cur = 0; -+ spin_unlock(&sbi->s_mb_history_lock); -+} -+ -+#else -+#define ext3_mb_history_release(sb) -+#define ext3_mb_history_init(sb) -+#endif -+ -+int ext3_mb_init_backend(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, j, len, metalen; -+ int num_meta_group_infos = -+ (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ struct ext3_group_info **meta_group_info; -+ -+ /* An 8TB filesystem with 64-bit pointers requires a 4096 byte -+ * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. -+ * So a two level scheme suffices for now. */ -+ sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * -+ num_meta_group_infos, GFP_KERNEL); -+ if (sbi->s_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n"); -+ return -ENOMEM; -+ } -+ sbi->s_buddy_cache = new_inode(sb); -+ if (sbi->s_buddy_cache == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't get new inode\n"); -+ goto err_freesgi; -+ } -+ -+ metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) { -+ if ((i + 1) == num_meta_group_infos) -+ metalen = sizeof(*meta_group_info) * -+ (sbi->s_groups_count - -+ (i << EXT3_DESC_PER_BLOCK_BITS(sb))); -+ meta_group_info = kmalloc(metalen, GFP_KERNEL); -+ if (meta_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate mem for a " -+ "buddy group\n"); -+ goto err_freemeta; -+ } -+ sbi->s_group_info[i] = meta_group_info; -+ } -+ -+ /* -+ * calculate needed size. if change bb_counters size, -+ * don't forget about ext3_mb_generate_buddy() -+ */ -+ len = sizeof(struct ext3_group_info); -+ len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ struct ext3_group_desc * desc; -+ -+ meta_group_info = -+ sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)]; -+ j = i & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ -+ meta_group_info[j] = kmalloc(len, GFP_KERNEL); -+ if (meta_group_info[j] == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n"); -+ i--; -+ goto err_freebuddy; -+ } -+ desc = ext3_get_group_desc(sb, i, NULL); -+ if (desc == NULL) { -+ printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); -+ goto err_freebuddy; -+ } -+ memset(meta_group_info[j], 0, len); -+ set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, -+ &meta_group_info[j]->bb_state); -+ meta_group_info[j]->bb_free = -+ le16_to_cpu(desc->bg_free_blocks_count); -+ } -+ -+ return 0; -+ -+err_freebuddy: -+ while (i >= 0) { -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ i--; -+ } -+ i = num_meta_group_infos; -+err_freemeta: -+ while (--i >= 0) -+ kfree(sbi->s_group_info[i]); -+ iput(sbi->s_buddy_cache); -+err_freesgi: -+ kfree(sbi->s_group_info); -+ return -ENOMEM; -+} -+ -+int ext3_mb_init(struct super_block *sb, int needs_recovery) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *root = sb->s_root->d_inode; -+ unsigned i, offset, max; -+ struct dentry *dentry; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); -+ -+ sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_offsets == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ return -ENOMEM; -+ } -+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_maxs == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ -+ /* order 0 is regular bitmap */ -+ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; -+ sbi->s_mb_offsets[0] = 0; -+ -+ i = 1; -+ offset = 0; -+ max = sb->s_blocksize << 2; -+ do { -+ sbi->s_mb_offsets[i] = offset; -+ sbi->s_mb_maxs[i] = max; -+ offset += 1 << (sb->s_blocksize_bits - i); -+ max = max >> 1; -+ i++; -+ } while (i <= sb->s_blocksize_bits + 1); -+ -+ /* init file for buddy data */ -+ if ((i = ext3_mb_init_backend(sb))) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return i; -+ } -+ -+ spin_lock_init(&sbi->s_reserve_lock); -+ spin_lock_init(&sbi->s_md_lock); -+ INIT_LIST_HEAD(&sbi->s_active_transaction); -+ INIT_LIST_HEAD(&sbi->s_closed_transaction); -+ INIT_LIST_HEAD(&sbi->s_committed_transaction); -+ spin_lock_init(&sbi->s_bal_lock); -+ -+ /* remove old on-disk buddy file */ -+ down(&root->i_sem); -+ dentry = lookup_one_len(".buddy", sb->s_root, strlen(".buddy")); -+ if (dentry->d_inode != NULL) { -+ i = vfs_unlink(root, dentry); -+ if (i != 0) -+ printk("EXT3-fs: can't remove .buddy file: %d\n", i); -+ } -+ dput(dentry); -+ up(&root->i_sem); -+ -+ ext3_mb_history_init(sb); -+ -+ printk("EXT3-fs: mballoc enabled\n"); -+ return 0; -+} -+ -+int ext3_mb_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, num_meta_group_infos; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* release freed, non-committed blocks */ -+ spin_lock(&sbi->s_md_lock); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_committed_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ ext3_mb_free_committed_blocks(sb); -+ -+ if (sbi->s_group_info) { -+ for (i = 0; i < sbi->s_groups_count; i++) -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ num_meta_group_infos = (sbi->s_groups_count + -+ EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) -+ kfree(sbi->s_group_info[i]); -+ kfree(sbi->s_group_info); -+ } -+ if (sbi->s_mb_offsets) -+ kfree(sbi->s_mb_offsets); -+ if (sbi->s_mb_maxs) -+ kfree(sbi->s_mb_maxs); -+ if (sbi->s_buddy_cache) -+ iput(sbi->s_buddy_cache); -+ if (sbi->s_blocks_reserved) -+ printk("ext3-fs: %ld blocks being reserved at umount!\n", -+ sbi->s_blocks_reserved); -+ if (ext3_mb_stats) { -+ printk("EXT3-fs: mballoc: %u blocks %u reqs (%u success)\n", -+ atomic_read(&sbi->s_bal_allocated), -+ atomic_read(&sbi->s_bal_reqs), -+ atomic_read(&sbi->s_bal_success)); -+ printk("EXT3-fs: mballoc: %u extents scanned, %u goal hits, " -+ "%u 2^N hits, %u breaks\n", -+ atomic_read(&sbi->s_bal_ex_scanned), -+ atomic_read(&sbi->s_bal_goals), -+ atomic_read(&sbi->s_bal_2orders), -+ atomic_read(&sbi->s_bal_breaks)); -+ printk("EXT3-fs: mballoc: %lu generated and it took %Lu\n", -+ sbi->s_mb_buddies_generated++, -+ sbi->s_mb_generation_time); -+ } -+ -+ ext3_mb_history_release(sb); -+ -+ return 0; -+} -+ -+void ext3_mb_free_committed_blocks(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int err, i, count = 0, count2 = 0; -+ struct ext3_free_metadata *md; -+ struct ext3_buddy e3b; -+ -+ if (list_empty(&sbi->s_committed_transaction)) -+ return; -+ -+ /* there is committed blocks to be freed yet */ -+ do { -+ /* get next array of blocks */ -+ md = NULL; -+ spin_lock(&sbi->s_md_lock); -+ if (!list_empty(&sbi->s_committed_transaction)) { -+ md = list_entry(sbi->s_committed_transaction.next, -+ struct ext3_free_metadata, list); -+ list_del(&md->list); -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ if (md == NULL) -+ break; -+ -+ mb_debug("gonna free %u blocks in group %u (0x%p):", -+ md->num, md->group, md); -+ -+ err = ext3_mb_load_buddy(sb, md->group, &e3b); -+ /* we expect to find existing buddy because it's pinned */ -+ BUG_ON(err != 0); -+ -+ /* there are blocks to put in buddy to make them really free */ -+ count += md->num; -+ count2++; -+ ext3_lock_group(sb, md->group); -+ for (i = 0; i < md->num; i++) { -+ mb_debug(" %u", md->blocks[i]); -+ mb_free_blocks(&e3b, md->blocks[i], 1); -+ } -+ mb_debug("\n"); -+ ext3_unlock_group(sb, md->group); -+ -+ /* balance refcounts from ext3_mb_free_metadata() */ -+ page_cache_release(e3b.bd_buddy_page); -+ page_cache_release(e3b.bd_bitmap_page); -+ -+ kfree(md); -+ ext3_mb_release_desc(&e3b); -+ -+ } while (md); -+ mb_debug("freed %u blocks in %u structures\n", count, count2); -+} -+ -+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (sbi->s_last_transaction == handle->h_transaction->t_tid) -+ return; -+ -+ /* new transaction! time to close last one and free blocks for -+ * committed transaction. we know that only transaction can be -+ * active, so previos transaction can be being logged and we -+ * know that transaction before previous is known to be already -+ * logged. this means that now we may free blocks freed in all -+ * transactions before previous one. hope I'm clear enough ... */ -+ -+ spin_lock(&sbi->s_md_lock); -+ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { -+ mb_debug("new transaction %lu, old %lu\n", -+ (unsigned long) handle->h_transaction->t_tid, -+ (unsigned long) sbi->s_last_transaction); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_closed_transaction); -+ sbi->s_last_transaction = handle->h_transaction->t_tid; -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ ext3_mb_free_committed_blocks(sb); -+} -+ -+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, -+ int group, int block, int count) -+{ -+ struct ext3_group_info *db = e3b->bd_info; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_free_metadata *md; -+ int i; -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ ext3_lock_group(sb, group); -+ for (i = 0; i < count; i++) { -+ md = db->bb_md_cur; -+ if (md && db->bb_tid != handle->h_transaction->t_tid) { -+ db->bb_md_cur = NULL; -+ md = NULL; -+ } -+ -+ if (md == NULL) { -+ ext3_unlock_group(sb, group); -+ md = kmalloc(sizeof(*md), GFP_KERNEL); -+ if (md == NULL) -+ return -ENOMEM; -+ md->num = 0; -+ md->group = group; -+ -+ ext3_lock_group(sb, group); -+ if (db->bb_md_cur == NULL) { -+ spin_lock(&sbi->s_md_lock); -+ list_add(&md->list, &sbi->s_active_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ /* protect buddy cache from being freed, -+ * otherwise we'll refresh it from -+ * on-disk bitmap and lose not-yet-available -+ * blocks */ -+ page_cache_get(e3b->bd_buddy_page); -+ page_cache_get(e3b->bd_bitmap_page); -+ db->bb_md_cur = md; -+ db->bb_tid = handle->h_transaction->t_tid; -+ mb_debug("new md 0x%p for group %u\n", -+ md, md->group); -+ } else { -+ kfree(md); -+ md = db->bb_md_cur; -+ } -+ } -+ -+ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); -+ md->blocks[md->num] = block + i; -+ md->num++; -+ if (md->num == EXT3_BB_MAX_BLOCKS) { -+ /* no more space, put full container on a sb's list */ -+ db->bb_md_cur = NULL; -+ } -+ } -+ ext3_unlock_group(sb, group); -+ return 0; -+} -+ -+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, -+ unsigned long block, unsigned long count, -+ int metadata, int *freed) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ unsigned long bit, overflow; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ int err = 0, ret; -+ -+ *freed = 0; -+ sb = inode->i_sb; -+ if (!sb) { -+ printk ("ext3_free_blocks: nonexistent device"); -+ return; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks not in datazone - " -+ "block = %lu, count = %lu", block, count); -+ goto error_return; -+ } -+ -+ ext3_debug("freeing block %lu\n", block); -+ -+do_more: -+ overflow = 0; -+ block_group = (block - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ bit = (block - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); -+ if (!gdp) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks in system zones - " -+ "Block = %lu, count = %lu", -+ block, count); -+ -+ BUFFER_TRACE(bitmap_bh, "getting write access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ err = ext3_mb_load_buddy(sb, block_group, &e3b); -+ if (err) -+ goto error_return; -+ -+#ifdef AGGRESSIVE_CHECK -+ { -+ int i; -+ for (i = 0; i < count; i++) -+ J_ASSERT(mb_test_bit(bit + i, bitmap_bh->b_data)); -+ } -+#endif -+ mb_clear_bits(bitmap_bh->b_data, bit, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ if (metadata) { -+ /* blocks being freed are metadata. these blocks shouldn't -+ * be used until this transaction is committed */ -+ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); -+ } else { -+ ext3_lock_group(sb, block_group); -+ mb_free_blocks(&e3b, bit, count); -+ ext3_unlock_group(sb, block_group); -+ } -+ -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ *freed = count; -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3_std_error(sb, err); -+ return; -+} -+ -+int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int free, ret = -ENOSPC; -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ if (blocks <= free - sbi->s_blocks_reserved) { -+ sbi->s_blocks_reserved += blocks; -+ ret = 0; -+ } -+ spin_unlock(&sbi->s_reserve_lock); -+ return ret; -+} -+ -+void ext3_mb_release_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ sbi->s_blocks_reserved -= blocks; -+ WARN_ON(sbi->s_blocks_reserved < 0); -+ if (sbi->s_blocks_reserved < 0) -+ sbi->s_blocks_reserved = 0; -+ spin_unlock(&sbi->s_reserve_lock); -+} -+ -+int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *errp) -+{ -+ int ret, len; -+ -+ if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, errp); -+ goto out; -+ } -+ len = 1; -+ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); -+out: -+ return ret; -+} -+ -+ -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) -+{ -+ struct super_block *sb; -+ int freed; -+ -+ sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_old(handle, inode, block, count); -+ else { -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); -+ } -+ return; -+} -+ -+#define EXT3_ROOT "ext3" -+#define EXT3_MB_STATS_NAME "mb_stats" -+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan" -+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan" -+#define EXT3_MB_ORDER2_REQ "mb_order2_req" -+ -+static int ext3_mb_stats_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_stats); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_stats_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_STATS_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ ext3_mb_stats = (simple_strtol(str, NULL, 0) != 0); -+ return count; -+} -+ -+static int ext3_mb_max_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_max_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_max_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_max_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_min_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_min_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_min_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_order2_req_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_order2_req_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_order2_reqs = value; -+ -+ return count; -+} -+ -+int __init init_ext3_proc(void) -+{ -+ struct proc_dir_entry *proc_ext3_mb_stats; -+ struct proc_dir_entry *proc_ext3_mb_max_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_min_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_order2_req; -+ -+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs); -+ if (proc_root_ext3 == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT); -+ return -EIO; -+ } -+ -+ /* Initialize EXT3_MB_STATS_NAME */ -+ proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_stats == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_STATS_NAME); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_stats->data = NULL; -+ proc_ext3_mb_stats->read_proc = ext3_mb_stats_read; -+ proc_ext3_mb_stats->write_proc = ext3_mb_stats_write; -+ -+ /* Initialize EXT3_MAX_TO_SCAN_NAME */ -+ proc_ext3_mb_max_to_scan = create_proc_entry( -+ EXT3_MB_MAX_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_max_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MAX_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_max_to_scan->data = NULL; -+ proc_ext3_mb_max_to_scan->read_proc = ext3_mb_max_to_scan_read; -+ proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write; -+ -+ /* Initialize EXT3_MIN_TO_SCAN_NAME */ -+ proc_ext3_mb_min_to_scan = create_proc_entry( -+ EXT3_MB_MIN_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_min_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MIN_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_min_to_scan->data = NULL; -+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read; -+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write; -+ -+ /* Initialize EXT3_ORDER2_REQ */ -+ proc_ext3_mb_order2_req = create_proc_entry( -+ EXT3_MB_ORDER2_REQ, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_order2_req == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_ORDER2_REQ); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_order2_req->data = NULL; -+ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read; -+ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write; -+ -+ return 0; -+} -+ -+void exit_ext3_proc(void) -+{ -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+} -Index: linux-2.6.5-7.282-full/fs/ext3/Makefile -=================================================================== ---- linux-2.6.5-7.282-full.orig/fs/ext3/Makefile 2006-10-24 22:18:28.000000000 +0400 -+++ linux-2.6.5-7.282-full/fs/ext3/Makefile 2006-10-24 22:18:28.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch deleted file mode 100644 index 7aaf8cd..0000000 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch +++ /dev/null @@ -1,3105 +0,0 @@ -Index: linux-2.6.12.6-bull/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.12.6-bull.orig/include/linux/ext3_fs.h 2006-04-29 20:39:09.000000000 +0400 -+++ linux-2.6.12.6-bull/include/linux/ext3_fs.h 2006-04-29 20:39:10.000000000 +0400 -@@ -57,6 +57,14 @@ struct statfs; - #define ext3_debug(f, a...) do {} while (0) - #endif - -+#define EXT3_MULTIBLOCK_ALLOCATOR 1 -+ -+#define EXT3_MB_HINT_MERGE 1 -+#define EXT3_MB_HINT_RESERVED 2 -+#define EXT3_MB_HINT_METADATA 4 -+#define EXT3_MB_HINT_FIRST 8 -+#define EXT3_MB_HINT_BEST 16 -+ - /* - * Special inodes numbers - */ -@@ -366,6 +374,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -387,6 +396,14 @@ struct ext3_inode { - #define ext3_find_first_zero_bit ext2_find_first_zero_bit - #define ext3_find_next_zero_bit ext2_find_next_zero_bit - -+#ifndef ext2_find_next_le_bit -+#ifdef __LITTLE_ENDIAN -+#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) -+#else -+#error "mballoc needs a patch for big-endian systems - CFS bug 10634" -+#endif /* __LITTLE_ENDIAN */ -+#endif /* !ext2_find_next_le_bit */ -+ - /* - * Maximal mount counts between two filesystem checks - */ -@@ -727,7 +736,7 @@ extern int ext3_bg_has_super(struct supe - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); - extern void ext3_free_blocks_sb (handle_t *, struct super_block *, - unsigned long, unsigned long, int *); - extern unsigned long ext3_count_free_blocks (struct super_block *); -@@ -848,6 +857,17 @@ extern void ext3_extents_initialize_bloc - extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); - -+/* mballoc.c */ -+extern long ext3_mb_stats; -+extern long ext3_mb_max_to_scan; -+extern int ext3_mb_init(struct super_block *, int); -+extern int ext3_mb_release(struct super_block *); -+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *); -+extern int ext3_mb_reserve_blocks(struct super_block *, int); -+extern void ext3_mb_release_blocks(struct super_block *, int); -+int __init init_ext3_proc(void); -+void exit_ext3_proc(void); -+ - #endif /* __KERNEL__ */ - - /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ -Index: linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.12.6-bull.orig/include/linux/ext3_fs_sb.h 2005-08-29 20:55:27.000000000 +0400 -+++ linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h 2006-04-29 20:39:10.000000000 +0400 -@@ -21,8 +21,14 @@ - #include - #include - #include -+#include - #endif - #include -+#include -+ -+struct ext3_buddy_group_blocks; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -@@ -78,6 +84,43 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ -+ /* for buddy allocator */ -+ struct ext3_group_info ***s_group_info; -+ struct inode *s_buddy_cache; -+ long s_blocks_reserved; -+ spinlock_t s_reserve_lock; -+ struct list_head s_active_transaction; -+ struct list_head s_closed_transaction; -+ struct list_head s_committed_transaction; -+ spinlock_t s_md_lock; -+ tid_t s_last_transaction; -+ int s_mb_factor; -+ unsigned short *s_mb_offsets, *s_mb_maxs; -+ unsigned long s_stripe; -+ -+ /* history to debug policy */ -+ struct ext3_mb_history *s_mb_history; -+ int s_mb_history_cur; -+ int s_mb_history_max; -+ struct proc_dir_entry *s_mb_proc; -+ spinlock_t s_mb_history_lock; -+ -+ /* stats for buddy allocator */ -+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -+ atomic_t s_bal_success; /* we found long enough chunks */ -+ atomic_t s_bal_allocated; /* in blocks */ -+ atomic_t s_bal_ex_scanned; /* total extents scanned */ -+ atomic_t s_bal_goals; /* goal hits */ -+ atomic_t s_bal_breaks; /* too long searches */ -+ atomic_t s_bal_2orders; /* 2^order hits */ -+ spinlock_t s_bal_lock; -+ unsigned long s_mb_buddies_generated; -+ unsigned long long s_mb_generation_time; - }; -+ -+#define EXT3_GROUP_INFO(sb, group) \ -+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ -+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] - - #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-2.6.12.6-bull/fs/ext3/super.c -=================================================================== ---- linux-2.6.12.6-bull.orig/fs/ext3/super.c 2006-04-29 20:39:09.000000000 +0400 -+++ linux-2.6.12.6-bull/fs/ext3/super.c 2006-04-29 20:39:10.000000000 +0400 -@@ -387,6 +387,7 @@ static void ext3_put_super (struct super - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_mb_release(sb); - ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -597,6 +598,7 @@ enum { - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - }; - - static match_table_t tokens = { -@@ -650,6 +651,9 @@ static match_table_t tokens = { - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -965,6 +967,19 @@ clear_qf_name: - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1670,6 +1675,7 @@ static int ext3_fill_super (struct super - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - lock_kernel(); - return 0; - -@@ -2549,7 +2555,13 @@ static struct file_system_type ext3_fs_t - - static int __init init_ext3_fs(void) - { -- int err = init_ext3_xattr(); -+ int err; -+ -+ err = init_ext3_proc(); -+ if (err) -+ return err; -+ -+ err = init_ext3_xattr(); - if (err) - return err; - err = init_inodecache(); -@@ -2571,6 +2583,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); - exit_ext3_xattr(); -+ exit_ext3_proc(); - } - - int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-2.6.12.6-bull/fs/ext3/extents.c -=================================================================== ---- linux-2.6.12.6-bull.orig/fs/ext3/extents.c 2006-04-29 20:39:09.000000000 +0400 -+++ linux-2.6.12.6-bull/fs/ext3/extents.c 2006-04-29 20:39:10.000000000 +0400 -@@ -777,7 +777,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -Index: linux-2.6.12.6-bull/fs/ext3/inode.c -=================================================================== ---- linux-2.6.12.6-bull.orig/fs/ext3/inode.c 2006-04-29 20:39:09.000000000 +0400 -+++ linux-2.6.12.6-bull/fs/ext3/inode.c 2006-04-29 20:39:10.000000000 +0400 -@@ -564,7 +564,7 @@ static int ext3_alloc_branch(handle_t *h - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); - return err; - } - -@@ -1850,7 +1850,7 @@ ext3_clear_blocks(handle_t *handle, stru - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 1); - } - - /** -@@ -2023,7 +2023,7 @@ static void ext3_free_branches(handle_t - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* -Index: linux-2.6.12.6-bull/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.12.6-bull.orig/fs/ext3/balloc.c 2005-08-29 20:55:27.000000000 +0400 -+++ linux-2.6.12.6-bull/fs/ext3/balloc.c 2006-04-29 20:39:10.000000000 +0400 -@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -490,24 +490,6 @@ error_return: - return; - } - --/* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks(handle_t *handle, struct inode *inode, -- unsigned long block, unsigned long count) --{ -- struct super_block * sb; -- int dquot_freed_blocks; -- -- sb = inode->i_sb; -- if (!sb) { -- printk ("ext3_free_blocks: nonexistent device"); -- return; -- } -- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); -- if (dquot_freed_blocks) -- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); -- return; --} -- - /* - * For ext3 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This -@@ -1162,7 +1144,7 @@ int ext3_should_retry_alloc(struct super - * bitmap, and then for any free bit if that fails. - * This function also updates quota and i_blocks field. - */ --int ext3_new_block(handle_t *handle, struct inode *inode, -+int ext3_new_block_old(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) - { - struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.12.6-bull/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.12.6-bull.orig/fs/ext3/xattr.c 2005-08-29 20:55:27.000000000 +0400 -+++ linux-2.6.12.6-bull/fs/ext3/xattr.c 2006-04-29 20:39:10.000000000 +0400 -@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl - ea_bdebug(bh, "refcount now=0; freeing"); - if (ce) - mb_cache_entry_free(ce); -- ext3_free_blocks(handle, inode, bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - } else { -@@ -804,7 +804,7 @@ inserted: - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.12.6-bull.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400 -+++ linux-2.6.12.6-bull/fs/ext3/mballoc.c 2006-04-30 01:24:11.000000000 +0400 -@@ -0,0 +1,2729 @@ -+/* -+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+ -+/* -+ * mballoc.c contains the multiblocks allocation routines -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * TODO: -+ * - bitmap read-ahead (proposed by Oleg Drokin aka green) -+ * - track min/max extents in each group for better group selection -+ * - mb_mark_used() may allocate chunk right after splitting buddy -+ * - special flag to advice allocator to look for requested + N blocks -+ * this may improve interaction between extents and mballoc -+ * - tree of groups sorted by number of free blocks -+ * - percpu reservation code (hotpath) -+ * - error handling -+ */ -+ -+/* -+ * with AGRESSIVE_CHECK allocator runs consistency checks over -+ * structures. these checks slow things down a lot -+ */ -+#define AGGRESSIVE_CHECK__ -+ -+/* -+ */ -+#define MB_DEBUG__ -+#ifdef MB_DEBUG -+#define mb_debug(fmt,a...) printk(fmt, ##a) -+#else -+#define mb_debug(fmt,a...) -+#endif -+ -+/* -+ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory -+ * and you can monitor it in /proc/fs/ext3//mb_history -+ */ -+#define EXT3_MB_HISTORY -+ -+/* -+ * How long mballoc can look for a best extent (in found extents) -+ */ -+long ext3_mb_max_to_scan = 500; -+ -+/* -+ * How long mballoc must look for a best extent -+ */ -+long ext3_mb_min_to_scan = 30; -+ -+/* -+ * with 'ext3_mb_stats' allocator will collect stats that will be -+ * shown at umount. The collecting costs though! -+ */ -+ -+long ext3_mb_stats = 1; -+ -+/* -+ * for which requests use 2^N search using buddies -+ */ -+long ext3_mb_order2_reqs = 8; -+ -+#ifdef EXT3_BB_MAX_BLOCKS -+#undef EXT3_BB_MAX_BLOCKS -+#endif -+#define EXT3_BB_MAX_BLOCKS 30 -+ -+struct ext3_free_metadata { -+ unsigned short group; -+ unsigned short num; -+ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; -+ struct list_head list; -+}; -+ -+struct ext3_group_info { -+ unsigned long bb_state; -+ unsigned long bb_tid; -+ struct ext3_free_metadata *bb_md_cur; -+ unsigned short bb_first_free; -+ unsigned short bb_free; -+ unsigned short bb_fragments; -+ unsigned short bb_counters[]; -+}; -+ -+ -+#define EXT3_GROUP_INFO_NEED_INIT_BIT 0 -+#define EXT3_GROUP_INFO_LOCKED_BIT 1 -+ -+#define EXT3_MB_GRP_NEED_INIT(grp) \ -+ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state)) -+ -+struct ext3_free_extent { -+ __u16 fe_start; -+ __u16 fe_len; -+ __u16 fe_group; -+}; -+ -+struct ext3_allocation_context { -+ struct super_block *ac_sb; -+ -+ /* search goals */ -+ struct ext3_free_extent ac_g_ex; -+ -+ /* the best found extent */ -+ struct ext3_free_extent ac_b_ex; -+ -+ /* number of iterations done. we have to track to limit searching */ -+ unsigned long ac_ex_scanned; -+ __u16 ac_groups_scanned; -+ __u16 ac_found; -+ __u16 ac_tail; -+ __u16 ac_buddy; -+ __u8 ac_status; -+ __u8 ac_flags; /* allocation hints */ -+ __u8 ac_criteria; -+ __u8 ac_repeats; -+ __u8 ac_2order; /* if request is to allocate 2^N blocks and -+ * N > 0, the field stores N, otherwise 0 */ -+ -+ struct page *ac_buddy_page; -+ struct page *ac_bitmap_page; -+}; -+ -+#define AC_STATUS_CONTINUE 1 -+#define AC_STATUS_FOUND 2 -+#define AC_STATUS_BREAK 3 -+ -+struct ext3_mb_history { -+ struct ext3_free_extent goal; /* goal allocation */ -+ struct ext3_free_extent result; /* result allocation */ -+ unsigned pid; -+ unsigned ino; -+ __u16 found; /* how many extents have been found */ -+ __u16 groups; /* how many groups have been scanned */ -+ __u16 tail; /* what tail broke some buddy */ -+ __u16 buddy; /* buddy the tail ^^^ broke */ -+ __u8 cr; /* which phase the result extent was found at */ -+ __u8 merged; -+}; -+ -+struct ext3_buddy { -+ struct page *bd_buddy_page; -+ void *bd_buddy; -+ struct page *bd_bitmap_page; -+ void *bd_bitmap; -+ struct ext3_group_info *bd_info; -+ struct super_block *bd_sb; -+ __u16 bd_blkbits; -+ __u16 bd_group; -+}; -+#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap) -+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy) -+ -+#ifndef EXT3_MB_HISTORY -+#define ext3_mb_store_history(sb,ino,ac) -+#else -+static void ext3_mb_store_history(struct super_block *, unsigned ino, -+ struct ext3_allocation_context *ac); -+#endif -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+static struct proc_dir_entry *proc_root_ext3; -+ -+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); -+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); -+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); -+int ext3_mb_reserve_blocks(struct super_block *, int); -+void ext3_mb_release_blocks(struct super_block *, int); -+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); -+void ext3_mb_free_committed_blocks(struct super_block *); -+ -+#if BITS_PER_LONG == 64 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 7UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~7UL); \ -+} -+#elif BITS_PER_LONG == 32 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 3UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~3UL); \ -+} -+#else -+#error "how many bits you are?!" -+#endif -+ -+static inline int mb_test_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ return ext2_test_bit(bit, addr); -+} -+ -+static inline void mb_set_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit(bit, addr); -+} -+ -+static inline void mb_set_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit_atomic(NULL, bit, addr); -+} -+ -+static inline void mb_clear_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit(bit, addr); -+} -+ -+static inline void mb_clear_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit_atomic(NULL, bit, addr); -+} -+ -+static inline int mb_find_next_zero_bit(void *addr, int max, int start) -+{ -+ int fix; -+#if BITS_PER_LONG == 64 -+ fix = ((unsigned long) addr & 7UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~7UL); -+#elif BITS_PER_LONG == 32 -+ fix = ((unsigned long) addr & 3UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~3UL); -+#else -+#error "how many bits you are?!" -+#endif -+ max += fix; -+ start += fix; -+ return ext2_find_next_zero_bit(addr, max, start) - fix; -+} -+ -+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) -+{ -+ char *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(max != NULL); -+ -+ if (order > e3b->bd_blkbits + 1) { -+ *max = 0; -+ return NULL; -+ } -+ -+ /* at order 0 we see each particular block */ -+ *max = 1 << (e3b->bd_blkbits + 3); -+ if (order == 0) -+ return EXT3_MB_BITMAP(e3b); -+ -+ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order]; -+ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order]; -+ -+ return bb; -+} -+ -+#ifdef AGGRESSIVE_CHECK -+ -+static void mb_check_buddy(struct ext3_buddy *e3b) -+{ -+ int order = e3b->bd_blkbits + 1; -+ int max, max2, i, j, k, count; -+ int fragments = 0, fstart; -+ void *buddy, *buddy2; -+ -+ if (!test_opt(e3b->bd_sb, MBALLOC)) -+ return; -+ -+ { -+ static int mb_check_counter = 0; -+ if (mb_check_counter++ % 300 != 0) -+ return; -+ } -+ -+ while (order > 1) { -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ buddy2 = mb_find_buddy(e3b, order - 1, &max2); -+ J_ASSERT(buddy2); -+ J_ASSERT(buddy != buddy2); -+ J_ASSERT(max * 2 == max2); -+ -+ count = 0; -+ for (i = 0; i < max; i++) { -+ -+ if (mb_test_bit(i, buddy)) { -+ /* only single bit in buddy2 may be 1 */ -+ if (!mb_test_bit(i << 1, buddy2)) -+ J_ASSERT(mb_test_bit((i<<1)+1, buddy2)); -+ else if (!mb_test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ continue; -+ } -+ -+ /* both bits in buddy2 must be 0 */ -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ J_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); -+ -+ for (j = 0; j < (1 << order); j++) { -+ k = (i * (1 << order)) + j; -+ J_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b))); -+ } -+ count++; -+ } -+ J_ASSERT(e3b->bd_info->bb_counters[order] == count); -+ order--; -+ } -+ -+ fstart = -1; -+ buddy = mb_find_buddy(e3b, 0, &max); -+ for (i = 0; i < max; i++) { -+ if (!mb_test_bit(i, buddy)) { -+ J_ASSERT(i >= e3b->bd_info->bb_first_free); -+ if (fstart == -1) { -+ fragments++; -+ fstart = i; -+ } -+ continue; -+ } -+ fstart = -1; -+ /* check used bits only */ -+ for (j = 0; j < e3b->bd_blkbits + 1; j++) { -+ buddy2 = mb_find_buddy(e3b, j, &max2); -+ k = i >> j; -+ J_ASSERT(k < max2); -+ J_ASSERT(mb_test_bit(k, buddy2)); -+ } -+ } -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(e3b->bd_info)); -+ J_ASSERT(e3b->bd_info->bb_fragments == fragments); -+} -+ -+#else -+#define mb_check_buddy(e3b) -+#endif -+ -+/* find most significant bit */ -+static int inline fmsb(unsigned short word) -+{ -+ int order; -+ -+ if (word > 255) { -+ order = 7; -+ word >>= 8; -+ } else { -+ order = -1; -+ } -+ -+ do { -+ order++; -+ word >>= 1; -+ } while (word != 0); -+ -+ return order; -+} -+ -+static void inline -+ext3_mb_mark_free_simple(struct super_block *sb, void *buddy, unsigned first, -+ int len, struct ext3_group_info *grp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned short min, max, chunk, border; -+ -+ mb_debug("mark %u/%u free\n", first, len); -+ J_ASSERT(len < EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ border = 2 << sb->s_blocksize_bits; -+ -+ while (len > 0) { -+ /* find how many blocks can be covered since this position */ -+ max = ffs(first | border) - 1; -+ -+ /* find how many blocks of power 2 we need to mark */ -+ min = fmsb(len); -+ -+ mb_debug(" %u/%u -> max %u, min %u\n", -+ first & ((2 << sb->s_blocksize_bits) - 1), -+ len, max, min); -+ -+ if (max < min) -+ min = max; -+ chunk = 1 << min; -+ -+ /* mark multiblock chunks only */ -+ grp->bb_counters[min]++; -+ if (min > 0) { -+ mb_debug(" set %u at %u \n", first >> min, -+ sbi->s_mb_offsets[min]); -+ mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); -+ } -+ -+ len -= chunk; -+ first += chunk; -+ } -+} -+ -+static void -+ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, -+ int group) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); -+ unsigned short i = 0, first, len; -+ unsigned free = 0, fragments = 0; -+ unsigned long long period = get_cycles(); -+ -+ i = mb_find_next_zero_bit(bitmap, max, 0); -+ grp->bb_first_free = i; -+ while (i < max) { -+ fragments++; -+ first = i; -+ i = ext2_find_next_le_bit(bitmap, max, i); -+ len = i - first; -+ free += len; -+ if (len > 1) -+ ext3_mb_mark_free_simple(sb, buddy, first, len, grp); -+ else -+ grp->bb_counters[0]++; -+ if (i < max) -+ i = mb_find_next_zero_bit(bitmap, max, i); -+ } -+ grp->bb_fragments = fragments; -+ -+ /* bb_state shouldn't being modified because all -+ * others waits for init completion on page lock */ -+ clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state); -+ if (free != grp->bb_free) { -+ printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n", -+ group, free, grp->bb_free); -+ grp->bb_free = free; -+ } -+ -+ period = get_cycles() - period; -+ spin_lock(&EXT3_SB(sb)->s_bal_lock); -+ EXT3_SB(sb)->s_mb_buddies_generated++; -+ EXT3_SB(sb)->s_mb_generation_time += period; -+ spin_unlock(&EXT3_SB(sb)->s_bal_lock); -+} -+ -+static int ext3_mb_init_cache(struct page *page) -+{ -+ int blocksize, blocks_per_page, groups_per_page; -+ int err = 0, i, first_group, first_block; -+ struct super_block *sb; -+ struct buffer_head *bhs; -+ struct buffer_head **bh; -+ struct inode *inode; -+ char *data, *bitmap; -+ -+ mb_debug("init page %lu\n", page->index); -+ -+ inode = page->mapping->host; -+ sb = inode->i_sb; -+ blocksize = 1 << inode->i_blkbits; -+ blocks_per_page = PAGE_CACHE_SIZE / blocksize; -+ -+ groups_per_page = blocks_per_page >> 1; -+ if (groups_per_page == 0) -+ groups_per_page = 1; -+ -+ /* allocate buffer_heads to read bitmaps */ -+ if (groups_per_page > 1) { -+ err = -ENOMEM; -+ i = sizeof(struct buffer_head *) * groups_per_page; -+ bh = kmalloc(i, GFP_NOFS); -+ if (bh == NULL) -+ goto out; -+ memset(bh, 0, i); -+ } else -+ bh = &bhs; -+ -+ first_group = page->index * blocks_per_page / 2; -+ -+ /* read all groups the page covers into the cache */ -+ for (i = 0; i < groups_per_page; i++) { -+ struct ext3_group_desc * desc; -+ -+ if (first_group + i >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ err = -EIO; -+ desc = ext3_get_group_desc(sb, first_group + i, NULL); -+ if (desc == NULL) -+ goto out; -+ -+ err = -ENOMEM; -+ bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (bh[i] == NULL) -+ goto out; -+ -+ if (buffer_uptodate(bh[i])) -+ continue; -+ -+ lock_buffer(bh[i]); -+ if (buffer_uptodate(bh[i])) { -+ unlock_buffer(bh[i]); -+ continue; -+ } -+ -+ get_bh(bh[i]); -+ bh[i]->b_end_io = end_buffer_read_sync; -+ submit_bh(READ, bh[i]); -+ mb_debug("read bitmap for group %u\n", first_group + i); -+ } -+ -+ /* wait for I/O completion */ -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ wait_on_buffer(bh[i]); -+ -+ err = -EIO; -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ if (!buffer_uptodate(bh[i])) -+ goto out; -+ -+ first_block = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++) { -+ int group; -+ -+ group = (first_block + i) >> 1; -+ if (group >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ data = page_address(page) + (i * blocksize); -+ bitmap = bh[group - first_group]->b_data; -+ -+ if ((first_block + i) & 1) { -+ /* this is block of buddy */ -+ mb_debug("put buddy for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memset(data, 0xff, blocksize); -+ EXT3_GROUP_INFO(sb, group)->bb_fragments = 0; -+ memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0, -+ sizeof(unsigned short)*(sb->s_blocksize_bits+2)); -+ ext3_mb_generate_buddy(sb, data, bitmap, group); -+ } else { -+ /* this is block of bitmap */ -+ mb_debug("put bitmap for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memcpy(data, bitmap, blocksize); -+ } -+ } -+ SetPageUptodate(page); -+ -+out: -+ if (bh) { -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ brelse(bh[i]); -+ if (bh != &bhs) -+ kfree(bh); -+ } -+ return err; -+} -+ -+static int ext3_mb_load_buddy(struct super_block *sb, int group, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *inode = sbi->s_buddy_cache; -+ int blocks_per_page, block, pnum, poff; -+ struct page *page; -+ -+ mb_debug("load group %u\n", group); -+ -+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; -+ -+ e3b->bd_blkbits = sb->s_blocksize_bits; -+ e3b->bd_info = EXT3_GROUP_INFO(sb, group); -+ e3b->bd_sb = sb; -+ e3b->bd_group = group; -+ e3b->bd_buddy_page = NULL; -+ e3b->bd_bitmap_page = NULL; -+ -+ block = group * 2; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ /* we could use find_or_create_page(), but it locks page -+ * what we'd like to avoid in fast path ... */ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_bitmap_page = page; -+ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ block++; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_buddy_page = page; -+ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ return 0; -+ -+err: -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+ e3b->bd_buddy = NULL; -+ e3b->bd_bitmap = NULL; -+ return -EIO; -+} -+ -+static void ext3_mb_release_desc(struct ext3_buddy *e3b) -+{ -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+} -+ -+ -+static inline void -+ext3_lock_group(struct super_block *sb, int group) -+{ -+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static inline void -+ext3_unlock_group(struct super_block *sb, int group) -+{ -+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) -+{ -+ int order = 1; -+ void *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); -+ -+ bb = EXT3_MB_BUDDY(e3b); -+ while (order <= e3b->bd_blkbits + 1) { -+ block = block >> 1; -+ if (!mb_test_bit(block, bb)) { -+ /* this block is part of buddy of order 'order' */ -+ return order; -+ } -+ bb += 1 << (e3b->bd_blkbits - order); -+ order++; -+ } -+ return 0; -+} -+ -+static inline void mb_clear_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0; -+ cur += 32; -+ continue; -+ } -+ mb_clear_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static inline void mb_set_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0xffffffff; -+ cur += 32; -+ continue; -+ } -+ mb_set_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) -+{ -+ int block = 0, max = 0, order; -+ void *buddy, *buddy2; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free += count; -+ if (first < e3b->bd_info->bb_first_free) -+ e3b->bd_info->bb_first_free = first; -+ -+ /* let's maintain fragments counter */ -+ if (first != 0) -+ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b)); -+ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b)); -+ if (block && max) -+ e3b->bd_info->bb_fragments--; -+ else if (!block && !max) -+ e3b->bd_info->bb_fragments++; -+ -+ /* let's maintain buddy itself */ -+ while (count-- > 0) { -+ block = first++; -+ order = 0; -+ -+ J_ASSERT(mb_test_bit(block, EXT3_MB_BITMAP(e3b))); -+ mb_clear_bit(block, EXT3_MB_BITMAP(e3b)); -+ e3b->bd_info->bb_counters[order]++; -+ -+ /* start of the buddy */ -+ buddy = mb_find_buddy(e3b, order, &max); -+ -+ do { -+ block &= ~1UL; -+ if (mb_test_bit(block, buddy) || -+ mb_test_bit(block + 1, buddy)) -+ break; -+ -+ /* both the buddies are free, try to coalesce them */ -+ buddy2 = mb_find_buddy(e3b, order + 1, &max); -+ -+ if (!buddy2) -+ break; -+ -+ if (order > 0) { -+ /* for special purposes, we don't set -+ * free bits in bitmap */ -+ mb_set_bit(block, buddy); -+ mb_set_bit(block + 1, buddy); -+ } -+ e3b->bd_info->bb_counters[order]--; -+ e3b->bd_info->bb_counters[order]--; -+ -+ block = block >> 1; -+ order++; -+ e3b->bd_info->bb_counters[order]++; -+ -+ mb_clear_bit(block, buddy2); -+ buddy = buddy2; -+ } while (1); -+ } -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block, -+ int needed, struct ext3_free_extent *ex) -+{ -+ int next = block, max, ord; -+ void *buddy; -+ -+ J_ASSERT(ex != NULL); -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ J_ASSERT(block < max); -+ if (mb_test_bit(block, buddy)) { -+ ex->fe_len = 0; -+ ex->fe_start = 0; -+ ex->fe_group = 0; -+ return 0; -+ } -+ -+ if (likely(order == 0)) { -+ /* find actual order */ -+ order = mb_find_order_for_block(e3b, block); -+ block = block >> order; -+ } -+ -+ ex->fe_len = 1 << order; -+ ex->fe_start = block << order; -+ ex->fe_group = e3b->bd_group; -+ -+ /* calc difference from given start */ -+ next = next - ex->fe_start; -+ ex->fe_len -= next; -+ ex->fe_start += next; -+ -+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) { -+ -+ if (block + 1 >= max) -+ break; -+ -+ next = (block + 1) * (1 << order); -+ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b))) -+ break; -+ -+ ord = mb_find_order_for_block(e3b, next); -+ -+ order = ord; -+ block = next >> order; -+ ex->fe_len += 1 << order; -+ } -+ -+ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); -+ return ex->fe_len; -+} -+ -+static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex) -+{ -+ int ord, mlen = 0, max = 0, cur; -+ int start = ex->fe_start; -+ int len = ex->fe_len; -+ unsigned ret = 0; -+ int len0 = len; -+ void *buddy; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free -= len; -+ if (e3b->bd_info->bb_first_free == start) -+ e3b->bd_info->bb_first_free += len; -+ -+ /* let's maintain fragments counter */ -+ if (start != 0) -+ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b)); -+ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b)); -+ if (mlen && max) -+ e3b->bd_info->bb_fragments++; -+ else if (!mlen && !max) -+ e3b->bd_info->bb_fragments--; -+ -+ /* let's maintain buddy itself */ -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ -+ if (((start >> ord) << ord) == start && len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ J_ASSERT((start >> ord) < max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ start += mlen; -+ len -= mlen; -+ J_ASSERT(len >= 0); -+ continue; -+ } -+ -+ /* store for history */ -+ if (ret == 0) -+ ret = len | (ord << 16); -+ -+ /* we have to split large buddy */ -+ J_ASSERT(ord > 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_clear_bit(cur, buddy); -+ mb_clear_bit(cur + 1, buddy); -+ e3b->bd_info->bb_counters[ord]++; -+ e3b->bd_info->bb_counters[ord]++; -+ } -+ -+ /* now drop all the bits in bitmap */ -+ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0); -+ -+ mb_check_buddy(e3b); -+ -+ return ret; -+} -+ -+/* -+ * Must be called under group lock! -+ */ -+static void ext3_mb_use_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ unsigned long ret; -+ -+ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); -+ ret = mb_mark_used(e3b, &ac->ac_b_ex); -+ -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_tail = ret & 0xffff; -+ ac->ac_buddy = ret >> 16; -+ -+ /* hold in-core structures until allocated -+ * blocks are marked non-free in on-disk bitmap */ -+ ac->ac_buddy_page = e3b->bd_buddy_page; -+ page_cache_get(e3b->bd_buddy_page); -+ ac->ac_bitmap_page = e3b->bd_bitmap_page; -+ page_cache_get(e3b->bd_bitmap_page); -+} -+ -+/* -+ * The routine checks whether found extent is good enough. If it is, -+ * then the extent gets marked used and flag is set to the context -+ * to stop scanning. Otherwise, the extent is compared with the -+ * previous found extent and if new one is better, then it's stored -+ * in the context. Later, the best found extent will be used, if -+ * mballoc can't find good enough extent. -+ * -+ * FIXME: real allocation policy is to be designed yet! -+ */ -+static void ext3_mb_measure_extent(struct ext3_allocation_context *ac, -+ struct ext3_free_extent *ex, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent *bex = &ac->ac_b_ex; -+ struct ext3_free_extent *gex = &ac->ac_g_ex; -+ -+ J_ASSERT(ex->fe_len > 0); -+ J_ASSERT(ex->fe_len < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ J_ASSERT(ex->fe_start < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ -+ ac->ac_found++; -+ -+ /* -+ * The special case - take what you catch first -+ */ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * Let's check whether the chunk is good enough -+ */ -+ if (ex->fe_len == gex->fe_len) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * If this is first found extent, just store it in the context -+ */ -+ if (bex->fe_len == 0) { -+ *bex = *ex; -+ return; -+ } -+ -+ /* -+ * If new found extent is better, store it in the context -+ */ -+ if (bex->fe_len < gex->fe_len) { -+ /* if the request isn't satisfied, any found extent -+ * larger than previous best one is better */ -+ if (ex->fe_len > bex->fe_len) -+ *bex = *ex; -+ } else if (ex->fe_len > gex->fe_len) { -+ /* if the request is satisfied, then we try to find -+ * an extent that still satisfy the request, but is -+ * smaller than previous one */ -+ *bex = *ex; -+ } -+ -+ /* -+ * Let's scan at least few extents and don't pick up a first one -+ */ -+ if (bex->fe_len > gex->fe_len && ac->ac_found > ext3_mb_min_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+ -+ /* -+ * We don't want to scan for a whole year -+ */ -+ if (ac->ac_found > ext3_mb_max_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+} -+ -+static int ext3_mb_try_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent ex = ac->ac_b_ex; -+ int group = ex.fe_group, max, err; -+ -+ J_ASSERT(ex.fe_len > 0); -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex); -+ -+ if (max > 0) { -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ int group = ac->ac_g_ex.fe_group, max, err; -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_super_block *es = sbi->s_es; -+ struct ext3_free_extent ex; -+ -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start, -+ ac->ac_g_ex.fe_len, &ex); -+ -+ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { -+ unsigned long start; -+ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) + -+ ex.fe_start + le32_to_cpu(es->s_first_data_block)); -+ if (start % sbi->s_stripe == 0) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ } else if (max >= ac->ac_g_ex.fe_len) { -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) { -+ /* Sometimes, caller may want to merge even small -+ * number of blocks to an existing extent */ -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+/* -+ * The routine scans buddy structures (not bitmap!) from given order -+ * to max order and tries to find big enough chunk to satisfy the req -+ */ -+static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_group_info *grp = e3b->bd_info; -+ void *buddy; -+ int i, k, max; -+ -+ J_ASSERT(ac->ac_2order > 0); -+ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { -+ if (grp->bb_counters[i] == 0) -+ continue; -+ -+ buddy = mb_find_buddy(e3b, i, &max); -+ if (buddy == NULL) { -+ printk(KERN_ALERT "looking for wrong order?\n"); -+ break; -+ } -+ -+ k = mb_find_next_zero_bit(buddy, max, 0); -+ J_ASSERT(k < max); -+ -+ ac->ac_found++; -+ -+ ac->ac_b_ex.fe_len = 1 << i; -+ ac->ac_b_ex.fe_start = k << i; -+ ac->ac_b_ex.fe_group = e3b->bd_group; -+ -+ ext3_mb_use_best_found(ac, e3b); -+ J_ASSERT(ac->ac_b_ex.fe_len == ac->ac_g_ex.fe_len); -+ -+ if (unlikely(ext3_mb_stats)) -+ atomic_inc(&EXT3_SB(sb)->s_bal_2orders); -+ -+ break; -+ } -+} -+ -+/* -+ * The routine scans the group and measures all found extents. -+ * In order to optimize scanning, caller must pass number of -+ * free blocks in the group, so the routine can know upper limit. -+ */ -+static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ int i, free; -+ -+ free = e3b->bd_info->bb_free; -+ J_ASSERT(free > 0); -+ -+ i = e3b->bd_info->bb_first_free; -+ -+ while (free && ac->ac_status == AC_STATUS_CONTINUE) { -+ i = mb_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i); -+ if (i >= sb->s_blocksize * 8) { -+ J_ASSERT(free == 0); -+ break; -+ } -+ -+ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex); -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(free >= ex.fe_len); -+ -+ ext3_mb_measure_extent(ac, &ex, e3b); -+ -+ i += ex.fe_len; -+ free -= ex.fe_len; -+ } -+} -+ -+/* -+ * This is a special case for storages like raid5 -+ * we try to find stripe-aligned chunks for stripe-size requests -+ */ -+static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ unsigned long i, max; -+ -+ J_ASSERT(sbi->s_stripe != 0); -+ -+ /* find first stripe-aligned block */ -+ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe; -+ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ -+ while (i < sb->s_blocksize * 8) { -+ if (!mb_test_bit(i, bitmap)) { -+ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex); -+ if (max >= sbi->s_stripe) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ break; -+ } -+ } -+ i += sbi->s_stripe; -+ } -+} -+ -+static int ext3_mb_good_group(struct ext3_allocation_context *ac, -+ int group, int cr) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group); -+ unsigned free, fragments, i, bits; -+ -+ J_ASSERT(cr >= 0 && cr < 4); -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(grp)); -+ -+ free = grp->bb_free; -+ fragments = grp->bb_fragments; -+ if (free == 0) -+ return 0; -+ if (fragments == 0) -+ return 0; -+ -+ switch (cr) { -+ case 0: -+ J_ASSERT(ac->ac_2order != 0); -+ bits = ac->ac_sb->s_blocksize_bits + 1; -+ for (i = ac->ac_2order; i <= bits; i++) -+ if (grp->bb_counters[i] > 0) -+ return 1; -+ break; -+ case 1: -+ if ((free / fragments) >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 2: -+ if (free >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 3: -+ return 1; -+ default: -+ BUG(); -+ } -+ -+ return 0; -+} -+ -+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *len, int flags, int *errp) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_allocation_context ac; -+ int i, group, block, cr, err = 0; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ struct buffer_head *gdp_bh; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ -+ J_ASSERT(len != NULL); -+ J_ASSERT(*len > 0); -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk("ext3_mb_new_nblocks: nonexistent device"); -+ return 0; -+ } -+ -+ if (!test_opt(sb, MBALLOC)) { -+ static int ext3_mballoc_warning = 0; -+ if (ext3_mballoc_warning == 0) { -+ printk(KERN_ERR "EXT3-fs: multiblock request with " -+ "mballoc disabled!\n"); -+ ext3_mballoc_warning++; -+ } -+ *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, errp); -+ return err; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ -+ /* -+ * We can't allocate > group size -+ */ -+ if (*len >= EXT3_BLOCKS_PER_GROUP(sb) - 10) -+ *len = EXT3_BLOCKS_PER_GROUP(sb) - 10; -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* someone asks for non-reserved blocks */ -+ BUG_ON(*len > 1); -+ err = ext3_mb_reserve_blocks(sb, 1); -+ if (err) { -+ *errp = err; -+ return 0; -+ } -+ } -+ -+ ac.ac_buddy_page = NULL; -+ ac.ac_bitmap_page = NULL; -+ -+ /* -+ * Check quota for allocation of this blocks. -+ */ -+ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) -+ *len -= 1; -+ if (*len == 0) { -+ *errp = -EDQUOT; -+ block = 0; -+ goto out; -+ } -+ -+ /* start searching from the goal */ -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ group = (goal - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ block = ((goal - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ /* set up allocation goals */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_groups_scanned = 0; -+ ac.ac_ex_scanned = 0; -+ ac.ac_found = 0; -+ ac.ac_sb = inode->i_sb; -+ ac.ac_g_ex.fe_group = group; -+ ac.ac_g_ex.fe_start = block; -+ ac.ac_g_ex.fe_len = *len; -+ ac.ac_flags = flags; -+ ac.ac_2order = 0; -+ ac.ac_criteria = 0; -+ -+ if (*len == 1 && sbi->s_stripe) { -+ /* looks like a metadata, let's use a dirty hack for raid5 -+ * move all metadata in first groups in hope to hit cached -+ * sectors and thus avoid read-modify cycles in raid5 */ -+ ac.ac_g_ex.fe_group = group = 0; -+ } -+ -+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */ -+ i = ffs(*len); -+ if (i >= ext3_mb_order2_reqs) { -+ i--; -+ if ((*len & (~(1 << i))) == 0) -+ ac.ac_2order = i; -+ } -+ -+ /* first, try the goal */ -+ err = ext3_mb_find_by_goal(&ac, &e3b); -+ if (err) -+ goto out_err; -+ if (ac.ac_status == AC_STATUS_FOUND) -+ goto found; -+ -+ /* Let's just scan groups to find more-less suitable blocks */ -+ cr = ac.ac_2order ? 0 : 1; -+repeat: -+ for (; cr < 4 && ac.ac_status == AC_STATUS_CONTINUE; cr++) { -+ ac.ac_criteria = cr; -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { -+ if (group == EXT3_SB(sb)->s_groups_count) -+ group = 0; -+ -+ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) { -+ /* we need full data about the group -+ * to make a good selection */ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ ext3_mb_release_desc(&e3b); -+ } -+ -+ /* check is group good for our criteries */ -+ if (!ext3_mb_good_group(&ac, group, cr)) -+ continue; -+ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ -+ ext3_lock_group(sb, group); -+ if (!ext3_mb_good_group(&ac, group, cr)) { -+ /* someone did allocation from this group */ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ continue; -+ } -+ -+ ac.ac_groups_scanned++; -+ if (cr == 0) -+ ext3_mb_simple_scan_group(&ac, &e3b); -+ else if (cr == 1 && *len == sbi->s_stripe) -+ ext3_mb_scan_aligned(&ac, &e3b); -+ else -+ ext3_mb_complex_scan_group(&ac, &e3b); -+ -+ ext3_unlock_group(sb, group); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ if (ac.ac_status != AC_STATUS_CONTINUE) -+ break; -+ } -+ } -+ -+ if (ac.ac_b_ex.fe_len > 0 && ac.ac_status != AC_STATUS_FOUND && -+ !(ac.ac_flags & EXT3_MB_HINT_FIRST)) { -+ /* -+ * We've been searching too long. Let's try to allocate -+ * the best chunk we've found so far -+ */ -+ -+ /*if (ac.ac_found > ext3_mb_max_to_scan) -+ printk(KERN_DEBUG "EXT3-fs: too long searching at " -+ "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len, -+ ac.ac_g_ex.fe_len);*/ -+ ext3_mb_try_best_found(&ac, &e3b); -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * Someone more lucky has already allocated it. -+ * The only thing we can do is just take first -+ * found block(s) -+ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n"); -+ */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_flags |= EXT3_MB_HINT_FIRST; -+ cr = 3; -+ goto repeat; -+ } -+ } -+ -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * We aren't lucky definitely -+ */ -+ DQUOT_FREE_BLOCK(inode, *len); -+ *errp = -ENOSPC; -+ block = 0; -+#if 1 -+ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n", -+ ac.ac_status, ac.ac_flags); -+ printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n", -+ ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group, -+ ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr); -+ printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n", -+ sbi->s_blocks_reserved, ac.ac_found); -+ printk("EXT3-fs: groups: "); -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) -+ printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free); -+ printk("\n"); -+#endif -+ goto out; -+ } -+ -+found: -+ J_ASSERT(ac.ac_b_ex.fe_len > 0); -+ -+ /* good news - free block(s) have been found. now it's time -+ * to mark block(s) in good old journaled bitmap */ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ /* we made a desicion, now mark found blocks in good old -+ * bitmap to be journaled */ -+ -+ ext3_debug("using block group %d(%d)\n", -+ ac.ac_b_group.group, gdp->bg_free_blocks_count); -+ -+ bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); -+ if (!bitmap_bh) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) { -+ *errp = err; -+ goto out_err; -+ } -+ -+ gdp = ext3_get_group_desc(sb, ac.ac_b_ex.fe_group, &gdp_bh); -+ if (!gdp) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, "ext3_new_block", -+ "Allocating block in system zone - " -+ "block = %u", block); -+#ifdef AGGRESSIVE_CHECK -+ for (i = 0; i < ac.ac_b_ex.fe_len; i++) -+ J_ASSERT(!mb_test_bit(ac.ac_b_ex.fe_start + i, bitmap_bh->b_data)); -+#endif -+ mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); -+ -+ spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -+ - ac.ac_b_ex.fe_len); -+ spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); -+ -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ err = ext3_journal_dirty_metadata(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ sb->s_dirt = 1; -+ *errp = 0; -+ brelse(bitmap_bh); -+ -+ /* drop non-allocated, but dquote'd blocks */ -+ J_ASSERT(*len >= ac.ac_b_ex.fe_len); -+ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_ex.fe_len); -+ -+ *len = ac.ac_b_ex.fe_len; -+ J_ASSERT(*len > 0); -+ J_ASSERT(block != 0); -+ goto out; -+ -+out_err: -+ /* if we've already allocated something, roll it back */ -+ if (ac.ac_status == AC_STATUS_FOUND) { -+ /* FIXME: free blocks here */ -+ } -+ -+ DQUOT_FREE_BLOCK(inode, *len); -+ brelse(bitmap_bh); -+ *errp = err; -+ block = 0; -+out: -+ if (ac.ac_buddy_page) -+ page_cache_release(ac.ac_buddy_page); -+ if (ac.ac_bitmap_page) -+ page_cache_release(ac.ac_bitmap_page); -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* block wasn't reserved before and we reserved it -+ * at the beginning of allocation. it doesn't matter -+ * whether we allocated anything or we failed: time -+ * to release reservation. NOTE: because I expect -+ * any multiblock request from delayed allocation -+ * path only, here is single block always */ -+ ext3_mb_release_blocks(sb, 1); -+ } -+ -+ if (unlikely(ext3_mb_stats) && ac.ac_g_ex.fe_len > 1) { -+ atomic_inc(&sbi->s_bal_reqs); -+ atomic_add(*len, &sbi->s_bal_allocated); -+ if (*len >= ac.ac_g_ex.fe_len) -+ atomic_inc(&sbi->s_bal_success); -+ atomic_add(ac.ac_found, &sbi->s_bal_ex_scanned); -+ if (ac.ac_g_ex.fe_start == ac.ac_b_ex.fe_start && -+ ac.ac_g_ex.fe_group == ac.ac_b_ex.fe_group) -+ atomic_inc(&sbi->s_bal_goals); -+ if (ac.ac_found > ext3_mb_max_to_scan) -+ atomic_inc(&sbi->s_bal_breaks); -+ } -+ -+ ext3_mb_store_history(sb, inode->i_ino, &ac); -+ -+ return block; -+} -+EXPORT_SYMBOL(ext3_mb_new_blocks); -+ -+#ifdef EXT3_MB_HISTORY -+struct ext3_mb_proc_session { -+ struct ext3_mb_history *history; -+ struct super_block *sb; -+ int start; -+ int max; -+}; -+ -+static void *ext3_mb_history_skip_empty(struct ext3_mb_proc_session *s, -+ struct ext3_mb_history *hs, -+ int first) -+{ -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (!first && hs == s->history + s->start) -+ return NULL; -+ while (hs->goal.fe_len == 0) { -+ hs++; -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (hs == s->history + s->start) -+ return NULL; -+ } -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs; -+ int l = *pos; -+ -+ if (l == 0) -+ return SEQ_START_TOKEN; -+ hs = ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ if (!hs) -+ return NULL; -+ while (--l && (hs = ext3_mb_history_skip_empty(s, ++hs, 0)) != NULL); -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs = v; -+ -+ ++*pos; -+ if (v == SEQ_START_TOKEN) -+ return ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ else -+ return ext3_mb_history_skip_empty(s, ++hs, 0); -+} -+ -+static int ext3_mb_seq_history_show(struct seq_file *seq, void *v) -+{ -+ struct ext3_mb_history *hs = v; -+ char buf[20], buf2[20]; -+ -+ if (v == SEQ_START_TOKEN) { -+ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n", -+ "pid", "inode", "goal", "result", "found", "grps", "cr", -+ "merge", "tail", "broken"); -+ return 0; -+ } -+ -+ sprintf(buf, "%u/%u/%u", hs->goal.fe_group, -+ hs->goal.fe_start, hs->goal.fe_len); -+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len); -+ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", -+ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups, -+ hs->cr, hs->merged ? "M" : "", hs->tail, -+ hs->buddy ? 1 << hs->buddy : 0); -+ return 0; -+} -+ -+static void ext3_mb_seq_history_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_history_ops = { -+ .start = ext3_mb_seq_history_start, -+ .next = ext3_mb_seq_history_next, -+ .stop = ext3_mb_seq_history_stop, -+ .show = ext3_mb_seq_history_show, -+}; -+ -+static int ext3_mb_seq_history_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -EIO; -+ size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max; -+ s->history = kmalloc(size, GFP_KERNEL); -+ if (s == NULL) { -+ kfree(s); -+ return -EIO; -+ } -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(s->history, sbi->s_mb_history, size); -+ s->max = sbi->s_mb_history_max; -+ s->start = sbi->s_mb_history_cur % s->max; -+ spin_unlock(&sbi->s_mb_history_lock); -+ -+ rc = seq_open(file, &ext3_mb_seq_history_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->history); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int ext3_mb_seq_history_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct ext3_mb_proc_session *s = seq->private; -+ kfree(s->history); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static struct file_operations ext3_mb_seq_history_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = ext3_mb_seq_history_release, -+}; -+ -+static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ -+ group = *pos + 1; -+ return (void *) group; -+} -+ -+static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ ++*pos; -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ group = *pos + 1; -+ return (void *) group;; -+} -+ -+static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v) -+{ -+ struct super_block *sb = seq->private; -+ long group = (long) v, i; -+ struct sg { -+ struct ext3_group_info info; -+ unsigned short counters[16]; -+ } sg; -+ -+ group--; -+ if (group == 0) -+ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", -+ "group", "free", "frags", "first", "2^0", "2^1", "2^2", -+ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10", -+ "2^11", "2^12", "2^13"); -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + -+ sizeof(struct ext3_group_info); -+ ext3_lock_group(sb, group); -+ memcpy(&sg, EXT3_GROUP_INFO(sb, group), i); -+ ext3_unlock_group(sb, group); -+ -+ if (EXT3_MB_GRP_NEED_INIT(&sg.info)) -+ return 0; -+ -+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, -+ sg.info.bb_fragments, sg.info.bb_first_free); -+ for (i = 0; i <= 13; i++) -+ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? -+ sg.info.bb_counters[i] : 0); -+ seq_printf(seq, " ]\n"); -+ -+ return 0; -+} -+ -+static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_groups_ops = { -+ .start = ext3_mb_seq_groups_start, -+ .next = ext3_mb_seq_groups_next, -+ .stop = ext3_mb_seq_groups_stop, -+ .show = ext3_mb_seq_groups_show, -+}; -+ -+static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ int rc; -+ -+ rc = seq_open(file, &ext3_mb_seq_groups_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = sb; -+ } -+ return rc; -+ -+} -+ -+static struct file_operations ext3_mb_seq_groups_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_groups_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static void ext3_mb_history_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ remove_proc_entry("mb_groups", sbi->s_mb_proc); -+ remove_proc_entry("mb_history", sbi->s_mb_proc); -+ remove_proc_entry(name, proc_root_ext3); -+ -+ if (sbi->s_mb_history) -+ kfree(sbi->s_mb_history); -+} -+ -+static void ext3_mb_history_init(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ int i; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ sbi->s_mb_proc = proc_mkdir(name, proc_root_ext3); -+ if (sbi->s_mb_proc != NULL) { -+ struct proc_dir_entry *p; -+ p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_history_fops; -+ p->data = sb; -+ } -+ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_groups_fops; -+ p->data = sb; -+ } -+ } -+ -+ sbi->s_mb_history_max = 1000; -+ sbi->s_mb_history_cur = 0; -+ spin_lock_init(&sbi->s_mb_history_lock); -+ i = sbi->s_mb_history_max * sizeof(struct ext3_mb_history); -+ sbi->s_mb_history = kmalloc(i, GFP_KERNEL); -+ memset(sbi->s_mb_history, 0, i); -+ /* if we can't allocate history, then we simple won't use it */ -+} -+ -+static void -+ext3_mb_store_history(struct super_block *sb, unsigned ino, -+ struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_history h; -+ -+ if (likely(sbi->s_mb_history == NULL)) -+ return; -+ -+ h.pid = current->pid; -+ h.ino = ino; -+ h.goal = ac->ac_g_ex; -+ h.result = ac->ac_b_ex; -+ h.found = ac->ac_found; -+ h.cr = ac->ac_criteria; -+ h.groups = ac->ac_groups_scanned; -+ h.tail = ac->ac_tail; -+ h.buddy = ac->ac_buddy; -+ h.merged = 0; -+ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && -+ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) -+ h.merged = 1; -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); -+ if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) -+ sbi->s_mb_history_cur = 0; -+ spin_unlock(&sbi->s_mb_history_lock); -+} -+ -+#else -+#define ext3_mb_history_release(sb) -+#define ext3_mb_history_init(sb) -+#endif -+ -+int ext3_mb_init_backend(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, j, len, metalen; -+ int num_meta_group_infos = -+ (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ struct ext3_group_info **meta_group_info; -+ -+ /* An 8TB filesystem with 64-bit pointers requires a 4096 byte -+ * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. -+ * So a two level scheme suffices for now. */ -+ sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * -+ num_meta_group_infos, GFP_KERNEL); -+ if (sbi->s_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n"); -+ return -ENOMEM; -+ } -+ sbi->s_buddy_cache = new_inode(sb); -+ if (sbi->s_buddy_cache == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't get new inode\n"); -+ goto err_freesgi; -+ } -+ -+ metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) { -+ if ((i + 1) == num_meta_group_infos) -+ metalen = sizeof(*meta_group_info) * -+ (sbi->s_groups_count - -+ (i << EXT3_DESC_PER_BLOCK_BITS(sb))); -+ meta_group_info = kmalloc(metalen, GFP_KERNEL); -+ if (meta_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate mem for a " -+ "buddy group\n"); -+ goto err_freemeta; -+ } -+ sbi->s_group_info[i] = meta_group_info; -+ } -+ -+ /* -+ * calculate needed size. if change bb_counters size, -+ * don't forget about ext3_mb_generate_buddy() -+ */ -+ len = sizeof(struct ext3_group_info); -+ len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ struct ext3_group_desc * desc; -+ -+ meta_group_info = -+ sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)]; -+ j = i & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ -+ meta_group_info[j] = kmalloc(len, GFP_KERNEL); -+ if (meta_group_info[j] == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n"); -+ i--; -+ goto err_freebuddy; -+ } -+ desc = ext3_get_group_desc(sb, i, NULL); -+ if (desc == NULL) { -+ printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); -+ goto err_freebuddy; -+ } -+ memset(meta_group_info[j], 0, len); -+ set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, -+ &meta_group_info[j]->bb_state); -+ meta_group_info[j]->bb_free = -+ le16_to_cpu(desc->bg_free_blocks_count); -+ } -+ -+ return 0; -+ -+err_freebuddy: -+ while (i >= 0) { -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ i--; -+ } -+ i = num_meta_group_infos; -+err_freemeta: -+ while (--i >= 0) -+ kfree(sbi->s_group_info[i]); -+ iput(sbi->s_buddy_cache); -+err_freesgi: -+ kfree(sbi->s_group_info); -+ return -ENOMEM; -+} -+ -+int ext3_mb_init(struct super_block *sb, int needs_recovery) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *root = sb->s_root->d_inode; -+ unsigned i, offset, max; -+ struct dentry *dentry; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); -+ -+ sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_offsets == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ return -ENOMEM; -+ } -+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_maxs == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ -+ /* order 0 is regular bitmap */ -+ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; -+ sbi->s_mb_offsets[0] = 0; -+ -+ i = 1; -+ offset = 0; -+ max = sb->s_blocksize << 2; -+ do { -+ sbi->s_mb_offsets[i] = offset; -+ sbi->s_mb_maxs[i] = max; -+ offset += 1 << (sb->s_blocksize_bits - i); -+ max = max >> 1; -+ i++; -+ } while (i <= sb->s_blocksize_bits + 1); -+ -+ /* init file for buddy data */ -+ if ((i = ext3_mb_init_backend(sb))) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return i; -+ } -+ -+ spin_lock_init(&sbi->s_reserve_lock); -+ spin_lock_init(&sbi->s_md_lock); -+ INIT_LIST_HEAD(&sbi->s_active_transaction); -+ INIT_LIST_HEAD(&sbi->s_closed_transaction); -+ INIT_LIST_HEAD(&sbi->s_committed_transaction); -+ spin_lock_init(&sbi->s_bal_lock); -+ -+ /* remove old on-disk buddy file */ -+ down(&root->i_sem); -+ dentry = lookup_one_len(".buddy", sb->s_root, strlen(".buddy")); -+ if (dentry->d_inode != NULL) { -+ i = vfs_unlink(root, dentry); -+ if (i != 0) -+ printk("EXT3-fs: can't remove .buddy file: %d\n", i); -+ } -+ dput(dentry); -+ up(&root->i_sem); -+ -+ ext3_mb_history_init(sb); -+ -+ printk("EXT3-fs: mballoc enabled\n"); -+ return 0; -+} -+ -+int ext3_mb_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, num_meta_group_infos; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* release freed, non-committed blocks */ -+ spin_lock(&sbi->s_md_lock); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_committed_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ ext3_mb_free_committed_blocks(sb); -+ -+ if (sbi->s_group_info) { -+ for (i = 0; i < sbi->s_groups_count; i++) -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ num_meta_group_infos = (sbi->s_groups_count + -+ EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) -+ kfree(sbi->s_group_info[i]); -+ kfree(sbi->s_group_info); -+ } -+ if (sbi->s_mb_offsets) -+ kfree(sbi->s_mb_offsets); -+ if (sbi->s_mb_maxs) -+ kfree(sbi->s_mb_maxs); -+ if (sbi->s_buddy_cache) -+ iput(sbi->s_buddy_cache); -+ if (sbi->s_blocks_reserved) -+ printk("ext3-fs: %ld blocks being reserved at umount!\n", -+ sbi->s_blocks_reserved); -+ if (ext3_mb_stats) { -+ printk("EXT3-fs: mballoc: %u blocks %u reqs (%u success)\n", -+ atomic_read(&sbi->s_bal_allocated), -+ atomic_read(&sbi->s_bal_reqs), -+ atomic_read(&sbi->s_bal_success)); -+ printk("EXT3-fs: mballoc: %u extents scanned, %u goal hits, " -+ "%u 2^N hits, %u breaks\n", -+ atomic_read(&sbi->s_bal_ex_scanned), -+ atomic_read(&sbi->s_bal_goals), -+ atomic_read(&sbi->s_bal_2orders), -+ atomic_read(&sbi->s_bal_breaks)); -+ printk("EXT3-fs: mballoc: %lu generated and it took %Lu\n", -+ sbi->s_mb_buddies_generated++, -+ sbi->s_mb_generation_time); -+ } -+ -+ ext3_mb_history_release(sb); -+ -+ return 0; -+} -+ -+void ext3_mb_free_committed_blocks(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int err, i, count = 0, count2 = 0; -+ struct ext3_free_metadata *md; -+ struct ext3_buddy e3b; -+ -+ if (list_empty(&sbi->s_committed_transaction)) -+ return; -+ -+ /* there is committed blocks to be freed yet */ -+ do { -+ /* get next array of blocks */ -+ md = NULL; -+ spin_lock(&sbi->s_md_lock); -+ if (!list_empty(&sbi->s_committed_transaction)) { -+ md = list_entry(sbi->s_committed_transaction.next, -+ struct ext3_free_metadata, list); -+ list_del(&md->list); -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ if (md == NULL) -+ break; -+ -+ mb_debug("gonna free %u blocks in group %u (0x%p):", -+ md->num, md->group, md); -+ -+ err = ext3_mb_load_buddy(sb, md->group, &e3b); -+ /* we expect to find existing buddy because it's pinned */ -+ BUG_ON(err != 0); -+ -+ /* there are blocks to put in buddy to make them really free */ -+ count += md->num; -+ count2++; -+ ext3_lock_group(sb, md->group); -+ for (i = 0; i < md->num; i++) { -+ mb_debug(" %u", md->blocks[i]); -+ mb_free_blocks(&e3b, md->blocks[i], 1); -+ } -+ mb_debug("\n"); -+ ext3_unlock_group(sb, md->group); -+ -+ /* balance refcounts from ext3_mb_free_metadata() */ -+ page_cache_release(e3b.bd_buddy_page); -+ page_cache_release(e3b.bd_bitmap_page); -+ -+ kfree(md); -+ ext3_mb_release_desc(&e3b); -+ -+ } while (md); -+ mb_debug("freed %u blocks in %u structures\n", count, count2); -+} -+ -+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (sbi->s_last_transaction == handle->h_transaction->t_tid) -+ return; -+ -+ /* new transaction! time to close last one and free blocks for -+ * committed transaction. we know that only transaction can be -+ * active, so previos transaction can be being logged and we -+ * know that transaction before previous is known to be already -+ * logged. this means that now we may free blocks freed in all -+ * transactions before previous one. hope I'm clear enough ... */ -+ -+ spin_lock(&sbi->s_md_lock); -+ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { -+ mb_debug("new transaction %lu, old %lu\n", -+ (unsigned long) handle->h_transaction->t_tid, -+ (unsigned long) sbi->s_last_transaction); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_closed_transaction); -+ sbi->s_last_transaction = handle->h_transaction->t_tid; -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ ext3_mb_free_committed_blocks(sb); -+} -+ -+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, -+ int group, int block, int count) -+{ -+ struct ext3_group_info *db = e3b->bd_info; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_free_metadata *md; -+ int i; -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ ext3_lock_group(sb, group); -+ for (i = 0; i < count; i++) { -+ md = db->bb_md_cur; -+ if (md && db->bb_tid != handle->h_transaction->t_tid) { -+ db->bb_md_cur = NULL; -+ md = NULL; -+ } -+ -+ if (md == NULL) { -+ ext3_unlock_group(sb, group); -+ md = kmalloc(sizeof(*md), GFP_KERNEL); -+ if (md == NULL) -+ return -ENOMEM; -+ md->num = 0; -+ md->group = group; -+ -+ ext3_lock_group(sb, group); -+ if (db->bb_md_cur == NULL) { -+ spin_lock(&sbi->s_md_lock); -+ list_add(&md->list, &sbi->s_active_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ /* protect buddy cache from being freed, -+ * otherwise we'll refresh it from -+ * on-disk bitmap and lose not-yet-available -+ * blocks */ -+ page_cache_get(e3b->bd_buddy_page); -+ page_cache_get(e3b->bd_bitmap_page); -+ db->bb_md_cur = md; -+ db->bb_tid = handle->h_transaction->t_tid; -+ mb_debug("new md 0x%p for group %u\n", -+ md, md->group); -+ } else { -+ kfree(md); -+ md = db->bb_md_cur; -+ } -+ } -+ -+ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); -+ md->blocks[md->num] = block + i; -+ md->num++; -+ if (md->num == EXT3_BB_MAX_BLOCKS) { -+ /* no more space, put full container on a sb's list */ -+ db->bb_md_cur = NULL; -+ } -+ } -+ ext3_unlock_group(sb, group); -+ return 0; -+} -+ -+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, -+ unsigned long block, unsigned long count, -+ int metadata, int *freed) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ unsigned long bit, overflow; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ int err = 0, ret; -+ -+ *freed = 0; -+ sb = inode->i_sb; -+ if (!sb) { -+ printk ("ext3_free_blocks: nonexistent device"); -+ return; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks not in datazone - " -+ "block = %lu, count = %lu", block, count); -+ goto error_return; -+ } -+ -+ ext3_debug("freeing block %lu\n", block); -+ -+do_more: -+ overflow = 0; -+ block_group = (block - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ bit = (block - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); -+ if (!gdp) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks in system zones - " -+ "Block = %lu, count = %lu", -+ block, count); -+ -+ BUFFER_TRACE(bitmap_bh, "getting write access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ err = ext3_mb_load_buddy(sb, block_group, &e3b); -+ if (err) -+ goto error_return; -+ -+#ifdef AGGRESSIVE_CHECK -+ { -+ int i; -+ for (i = 0; i < count; i++) -+ J_ASSERT(mb_test_bit(bit + i, bitmap_bh->b_data)); -+ } -+#endif -+ mb_clear_bits(bitmap_bh->b_data, bit, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ if (metadata) { -+ /* blocks being freed are metadata. these blocks shouldn't -+ * be used until this transaction is committed */ -+ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); -+ } else { -+ ext3_lock_group(sb, block_group); -+ mb_free_blocks(&e3b, bit, count); -+ ext3_unlock_group(sb, block_group); -+ } -+ -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ *freed = count; -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3_std_error(sb, err); -+ return; -+} -+ -+int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int free, ret = -ENOSPC; -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ if (blocks <= free - sbi->s_blocks_reserved) { -+ sbi->s_blocks_reserved += blocks; -+ ret = 0; -+ } -+ spin_unlock(&sbi->s_reserve_lock); -+ return ret; -+} -+ -+void ext3_mb_release_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ sbi->s_blocks_reserved -= blocks; -+ WARN_ON(sbi->s_blocks_reserved < 0); -+ if (sbi->s_blocks_reserved < 0) -+ sbi->s_blocks_reserved = 0; -+ spin_unlock(&sbi->s_reserve_lock); -+} -+ -+int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *errp) -+{ -+ int ret, len; -+ -+ if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, errp); -+ goto out; -+ } -+ len = 1; -+ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); -+out: -+ return ret; -+} -+ -+ -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) -+{ -+ struct super_block *sb; -+ int freed; -+ -+ sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_sb(handle, sb, block, count, &freed); -+ else -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); -+ return; -+} -+ -+#define EXT3_ROOT "ext3" -+#define EXT3_MB_STATS_NAME "mb_stats" -+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan" -+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan" -+#define EXT3_MB_ORDER2_REQ "mb_order2_req" -+ -+static int ext3_mb_stats_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_stats); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_stats_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_STATS_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ ext3_mb_stats = (simple_strtol(str, NULL, 0) != 0); -+ return count; -+} -+ -+static int ext3_mb_max_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_max_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_max_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_max_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_min_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_min_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_min_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_order2_req_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_order2_req_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_order2_reqs = value; -+ -+ return count; -+} -+ -+int __init init_ext3_proc(void) -+{ -+ struct proc_dir_entry *proc_ext3_mb_stats; -+ struct proc_dir_entry *proc_ext3_mb_max_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_min_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_order2_req; -+ -+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs); -+ if (proc_root_ext3 == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT); -+ return -EIO; -+ } -+ -+ /* Initialize EXT3_MB_STATS_NAME */ -+ proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_stats == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_STATS_NAME); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_stats->data = NULL; -+ proc_ext3_mb_stats->read_proc = ext3_mb_stats_read; -+ proc_ext3_mb_stats->write_proc = ext3_mb_stats_write; -+ -+ /* Initialize EXT3_MAX_TO_SCAN_NAME */ -+ proc_ext3_mb_max_to_scan = create_proc_entry( -+ EXT3_MB_MAX_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_max_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MAX_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_max_to_scan->data = NULL; -+ proc_ext3_mb_max_to_scan->read_proc = ext3_mb_max_to_scan_read; -+ proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write; -+ -+ /* Initialize EXT3_MIN_TO_SCAN_NAME */ -+ proc_ext3_mb_min_to_scan = create_proc_entry( -+ EXT3_MB_MIN_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_min_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MIN_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_min_to_scan->data = NULL; -+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read; -+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write; -+ -+ /* Initialize EXT3_ORDER2_REQ */ -+ proc_ext3_mb_order2_req = create_proc_entry( -+ EXT3_MB_ORDER2_REQ, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_order2_req == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_ORDER2_REQ); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_order2_req->data = NULL; -+ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read; -+ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write; -+ -+ return 0; -+} -+ -+void exit_ext3_proc(void) -+{ -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+} -Index: linux-2.6.12.6-bull/fs/ext3/Makefile -=================================================================== ---- linux-2.6.12.6-bull.orig/fs/ext3/Makefile 2006-04-29 20:39:09.000000000 +0400 -+++ linux-2.6.12.6-bull/fs/ext3/Makefile 2006-04-29 20:39:10.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch deleted file mode 100644 index 702dfcc..0000000 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch +++ /dev/null @@ -1,3140 +0,0 @@ -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2006-07-16 02:29:43.000000000 +0800 -+++ linux-stage/include/linux/ext3_fs.h 2006-07-16 02:29:49.000000000 +0800 -@@ -53,6 +53,14 @@ - #define ext3_debug(f, a...) do {} while (0) - #endif - -+#define EXT3_MULTIBLOCK_ALLOCATOR 1 -+ -+#define EXT3_MB_HINT_MERGE 1 -+#define EXT3_MB_HINT_RESERVED 2 -+#define EXT3_MB_HINT_METADATA 4 -+#define EXT3_MB_HINT_FIRST 8 -+#define EXT3_MB_HINT_BEST 16 -+ - /* - * Special inodes numbers - */ -@@ -379,6 +387,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x4000000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -405,6 +413,14 @@ - #define ext3_find_first_zero_bit ext2_find_first_zero_bit - #define ext3_find_next_zero_bit ext2_find_next_zero_bit - -+#ifndef ext2_find_next_le_bit -+#ifdef __LITTLE_ENDIAN -+#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) -+#else -+#error "mballoc needs a patch for big-endian systems - CFS bug 10634" -+#endif /* __LITTLE_ENDIAN */ -+#endif /* !ext2_find_next_le_bit */ -+ - /* - * Maximal mount counts between two filesystem checks - */ -@@ -749,12 +758,12 @@ ext3_group_first_block_no(struct super_b - /* balloc.c */ - extern int ext3_bg_has_super(struct super_block *sb, int group); - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); --extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode, -+extern ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, int *errp); - extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, unsigned long *count, int *errp); - extern void ext3_free_blocks (handle_t *handle, struct inode *inode, -- ext3_fsblk_t block, unsigned long count); -+ ext3_fsblk_t block, unsigned long count, int metadata); - extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb, - ext3_fsblk_t block, unsigned long count, - unsigned long *pdquot_freed_blocks); -@@ -881,6 +890,21 @@ extern void ext3_extents_initialize_bloc - extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); - -+/* mballoc.c */ -+extern long ext3_mb_stats; -+extern long ext3_mb_max_to_scan; -+extern int ext3_mb_init(struct super_block *sb, int needs_recovery); -+extern int ext3_mb_release(struct super_block *sb); -+extern ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, -+ ext3_fsblk_t goal, int *errp); -+extern ext3_fsblk_t ext3_mb_new_blocks(handle_t *handle, struct inode *inode, -+ ext3_fsblk_t goal, int *len, int flags, -+ int *errp); -+extern int ext3_mb_reserve_blocks(struct super_block *sb, int); -+extern void ext3_mb_release_blocks(struct super_block *sb, int); -+int __init init_ext3_proc(void); -+void exit_ext3_proc(void); -+ - #endif /* __KERNEL__ */ - - /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ -Index: linux-stage/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-07-16 02:29:43.000000000 +0800 -+++ linux-stage/include/linux/ext3_fs_sb.h 2006-07-16 02:29:49.000000000 +0800 -@@ -21,8 +21,14 @@ - #include - #include - #include -+#include - #endif - #include -+#include -+ -+struct ext3_buddy_group_blocks; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -@@ -78,6 +84,43 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ -+ /* for buddy allocator */ -+ struct ext3_group_info ***s_group_info; -+ struct inode *s_buddy_cache; -+ long s_blocks_reserved; -+ spinlock_t s_reserve_lock; -+ struct list_head s_active_transaction; -+ struct list_head s_closed_transaction; -+ struct list_head s_committed_transaction; -+ spinlock_t s_md_lock; -+ tid_t s_last_transaction; -+ int s_mb_factor; -+ unsigned short *s_mb_offsets, *s_mb_maxs; -+ unsigned long s_stripe; -+ -+ /* history to debug policy */ -+ struct ext3_mb_history *s_mb_history; -+ int s_mb_history_cur; -+ int s_mb_history_max; -+ struct proc_dir_entry *s_mb_proc; -+ spinlock_t s_mb_history_lock; -+ -+ /* stats for buddy allocator */ -+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -+ atomic_t s_bal_success; /* we found long enough chunks */ -+ atomic_t s_bal_allocated; /* in blocks */ -+ atomic_t s_bal_ex_scanned; /* total extents scanned */ -+ atomic_t s_bal_goals; /* goal hits */ -+ atomic_t s_bal_breaks; /* too long searches */ -+ atomic_t s_bal_2orders; /* 2^order hits */ -+ spinlock_t s_bal_lock; -+ unsigned long s_mb_buddies_generated; -+ unsigned long long s_mb_generation_time; - }; -+ -+#define EXT3_GROUP_INFO(sb, group) \ -+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ -+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] - - #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2006-07-16 02:29:43.000000000 +0800 -+++ linux-stage/fs/ext3/super.c 2006-07-16 02:29:49.000000000 +0800 -@@ -391,6 +391,7 @@ static void ext3_put_super (struct super - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_mb_release(sb); - ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -642,6 +643,7 @@ enum { - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - Opt_grpquota - }; - -@@ -696,6 +697,9 @@ static match_table_t tokens = { - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -1047,6 +1049,19 @@ clear_qf_name: - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1773,6 +1778,7 @@ static int ext3_fill_super (struct super - "writeback"); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - lock_kernel(); - return 0; - -@@ -2712,7 +2718,13 @@ static struct file_system_type ext3_fs_t - - static int __init init_ext3_fs(void) - { -- int err = init_ext3_xattr(); -+ int err; -+ -+ err = init_ext3_proc(); -+ if (err) -+ return err; -+ -+ err = init_ext3_xattr(); - if (err) - return err; - err = init_inodecache(); -@@ -2734,6 +2746,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); - exit_ext3_xattr(); -+ exit_ext3_proc(); - } - - int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-stage/fs/ext3/extents.c -=================================================================== ---- linux-stage.orig/fs/ext3/extents.c 2006-07-16 02:29:43.000000000 +0800 -+++ linux-stage/fs/ext3/extents.c 2006-07-16 02:29:49.000000000 +0800 -@@ -771,7 +771,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2006-07-16 02:29:43.000000000 +0800 -+++ linux-stage/fs/ext3/inode.c 2006-07-16 02:29:49.000000000 +0800 -@@ -562,7 +562,7 @@ static int ext3_alloc_blocks(handle_t *h - return ret; - failed_out: - for (i = 0; i i_sb; -- if (!sb) { -- printk ("ext3_free_blocks: nonexistent device"); -- return; -- } -- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); -- if (dquot_freed_blocks) -- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); -- return; --} -- - /* - * For ext3 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This -@@ -1463,7 +1445,7 @@ out: - return 0; - } - --ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, -+ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, int *errp) - { - unsigned long count = 1; -Index: linux-stage/fs/ext3/xattr.c -=================================================================== ---- linux-stage.orig/fs/ext3/xattr.c 2006-07-16 02:29:43.000000000 +0800 -+++ linux-stage/fs/ext3/xattr.c 2006-07-16 02:29:49.000000000 +0800 -@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl - ea_bdebug(bh, "refcount now=0; freeing"); - if (ce) - mb_cache_entry_free(ce); -- ext3_free_blocks(handle, inode, bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - } else { -@@ -805,7 +805,7 @@ inserted: - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -Index: linux-stage/fs/ext3/mballoc.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ linux-stage/fs/ext3/mballoc.c 2006-07-16 02:29:49.000000000 +0800 -@@ -0,0 +1,2730 @@ -+/* -+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+ -+/* -+ * mballoc.c contains the multiblocks allocation routines -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * TODO: -+ * - bitmap read-ahead (proposed by Oleg Drokin aka green) -+ * - track min/max extents in each group for better group selection -+ * - mb_mark_used() may allocate chunk right after splitting buddy -+ * - special flag to advice allocator to look for requested + N blocks -+ * this may improve interaction between extents and mballoc -+ * - tree of groups sorted by number of free blocks -+ * - percpu reservation code (hotpath) -+ * - error handling -+ */ -+ -+/* -+ * with AGRESSIVE_CHECK allocator runs consistency checks over -+ * structures. these checks slow things down a lot -+ */ -+#define AGGRESSIVE_CHECK__ -+ -+/* -+ */ -+#define MB_DEBUG__ -+#ifdef MB_DEBUG -+#define mb_debug(fmt,a...) printk(fmt, ##a) -+#else -+#define mb_debug(fmt,a...) -+#endif -+ -+/* -+ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory -+ * and you can monitor it in /proc/fs/ext3//mb_history -+ */ -+#define EXT3_MB_HISTORY -+ -+/* -+ * How long mballoc can look for a best extent (in found extents) -+ */ -+long ext3_mb_max_to_scan = 500; -+ -+/* -+ * How long mballoc must look for a best extent -+ */ -+long ext3_mb_min_to_scan = 30; -+ -+/* -+ * with 'ext3_mb_stats' allocator will collect stats that will be -+ * shown at umount. The collecting costs though! -+ */ -+ -+long ext3_mb_stats = 1; -+ -+/* -+ * for which requests use 2^N search using buddies -+ */ -+long ext3_mb_order2_reqs = 8; -+ -+#ifdef EXT3_BB_MAX_BLOCKS -+#undef EXT3_BB_MAX_BLOCKS -+#endif -+#define EXT3_BB_MAX_BLOCKS 30 -+ -+struct ext3_free_metadata { -+ unsigned short group; -+ unsigned short num; -+ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; -+ struct list_head list; -+}; -+ -+struct ext3_group_info { -+ unsigned long bb_state; -+ unsigned long bb_tid; -+ struct ext3_free_metadata *bb_md_cur; -+ unsigned short bb_first_free; -+ unsigned short bb_free; -+ unsigned short bb_fragments; -+ unsigned short bb_counters[]; -+}; -+ -+ -+#define EXT3_GROUP_INFO_NEED_INIT_BIT 0 -+#define EXT3_GROUP_INFO_LOCKED_BIT 1 -+ -+#define EXT3_MB_GRP_NEED_INIT(grp) \ -+ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state)) -+ -+struct ext3_free_extent { -+ __u16 fe_start; -+ __u16 fe_len; -+ __u16 fe_group; -+}; -+ -+struct ext3_allocation_context { -+ struct super_block *ac_sb; -+ -+ /* search goals */ -+ struct ext3_free_extent ac_g_ex; -+ -+ /* the best found extent */ -+ struct ext3_free_extent ac_b_ex; -+ -+ /* number of iterations done. we have to track to limit searching */ -+ unsigned long ac_ex_scanned; -+ __u16 ac_groups_scanned; -+ __u16 ac_found; -+ __u16 ac_tail; -+ __u16 ac_buddy; -+ __u8 ac_status; -+ __u8 ac_flags; /* allocation hints */ -+ __u8 ac_criteria; -+ __u8 ac_repeats; -+ __u8 ac_2order; /* if request is to allocate 2^N blocks and -+ * N > 0, the field stores N, otherwise 0 */ -+ -+ struct page *ac_buddy_page; -+ struct page *ac_bitmap_page; -+}; -+ -+#define AC_STATUS_CONTINUE 1 -+#define AC_STATUS_FOUND 2 -+#define AC_STATUS_BREAK 3 -+ -+struct ext3_mb_history { -+ struct ext3_free_extent goal; /* goal allocation */ -+ struct ext3_free_extent result; /* result allocation */ -+ unsigned pid; -+ unsigned ino; -+ __u16 found; /* how many extents have been found */ -+ __u16 groups; /* how many groups have been scanned */ -+ __u16 tail; /* what tail broke some buddy */ -+ __u16 buddy; /* buddy the tail ^^^ broke */ -+ __u8 cr; /* which phase the result extent was found at */ -+ __u8 merged; -+}; -+ -+struct ext3_buddy { -+ struct page *bd_buddy_page; -+ void *bd_buddy; -+ struct page *bd_bitmap_page; -+ void *bd_bitmap; -+ struct ext3_group_info *bd_info; -+ struct super_block *bd_sb; -+ __u16 bd_blkbits; -+ __u16 bd_group; -+}; -+#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap) -+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy) -+ -+#ifndef EXT3_MB_HISTORY -+#define ext3_mb_store_history(sb,ino,ac) -+#else -+static void ext3_mb_store_history(struct super_block *, unsigned ino, -+ struct ext3_allocation_context *ac); -+#endif -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+static struct proc_dir_entry *proc_root_ext3; -+ -+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); -+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); -+int ext3_mb_reserve_blocks(struct super_block *, int); -+void ext3_mb_release_blocks(struct super_block *, int); -+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); -+void ext3_mb_free_committed_blocks(struct super_block *); -+ -+#if BITS_PER_LONG == 64 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 7UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~7UL); \ -+} -+#elif BITS_PER_LONG == 32 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 3UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~3UL); \ -+} -+#else -+#error "how many bits you are?!" -+#endif -+ -+static inline int mb_test_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ return ext2_test_bit(bit, addr); -+} -+ -+static inline void mb_set_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit(bit, addr); -+} -+ -+static inline void mb_set_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit_atomic(NULL, bit, addr); -+} -+ -+static inline void mb_clear_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit(bit, addr); -+} -+ -+static inline void mb_clear_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit_atomic(NULL, bit, addr); -+} -+ -+static inline int mb_find_next_zero_bit(void *addr, int max, int start) -+{ -+ int fix; -+#if BITS_PER_LONG == 64 -+ fix = ((unsigned long) addr & 7UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~7UL); -+#elif BITS_PER_LONG == 32 -+ fix = ((unsigned long) addr & 3UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~3UL); -+#else -+#error "how many bits you are?!" -+#endif -+ max += fix; -+ start += fix; -+ return ext2_find_next_zero_bit(addr, max, start) - fix; -+} -+ -+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) -+{ -+ char *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(max != NULL); -+ -+ if (order > e3b->bd_blkbits + 1) { -+ *max = 0; -+ return NULL; -+ } -+ -+ /* at order 0 we see each particular block */ -+ *max = 1 << (e3b->bd_blkbits + 3); -+ if (order == 0) -+ return EXT3_MB_BITMAP(e3b); -+ -+ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order]; -+ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order]; -+ -+ return bb; -+} -+ -+#ifdef AGGRESSIVE_CHECK -+ -+static void mb_check_buddy(struct ext3_buddy *e3b) -+{ -+ int order = e3b->bd_blkbits + 1; -+ int max, max2, i, j, k, count; -+ int fragments = 0, fstart; -+ void *buddy, *buddy2; -+ -+ if (!test_opt(e3b->bd_sb, MBALLOC)) -+ return; -+ -+ { -+ static int mb_check_counter = 0; -+ if (mb_check_counter++ % 300 != 0) -+ return; -+ } -+ -+ while (order > 1) { -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ buddy2 = mb_find_buddy(e3b, order - 1, &max2); -+ J_ASSERT(buddy2); -+ J_ASSERT(buddy != buddy2); -+ J_ASSERT(max * 2 == max2); -+ -+ count = 0; -+ for (i = 0; i < max; i++) { -+ -+ if (mb_test_bit(i, buddy)) { -+ /* only single bit in buddy2 may be 1 */ -+ if (!mb_test_bit(i << 1, buddy2)) -+ J_ASSERT(mb_test_bit((i<<1)+1, buddy2)); -+ else if (!mb_test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ continue; -+ } -+ -+ /* both bits in buddy2 must be 0 */ -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ J_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); -+ -+ for (j = 0; j < (1 << order); j++) { -+ k = (i * (1 << order)) + j; -+ J_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b))); -+ } -+ count++; -+ } -+ J_ASSERT(e3b->bd_info->bb_counters[order] == count); -+ order--; -+ } -+ -+ fstart = -1; -+ buddy = mb_find_buddy(e3b, 0, &max); -+ for (i = 0; i < max; i++) { -+ if (!mb_test_bit(i, buddy)) { -+ J_ASSERT(i >= e3b->bd_info->bb_first_free); -+ if (fstart == -1) { -+ fragments++; -+ fstart = i; -+ } -+ continue; -+ } -+ fstart = -1; -+ /* check used bits only */ -+ for (j = 0; j < e3b->bd_blkbits + 1; j++) { -+ buddy2 = mb_find_buddy(e3b, j, &max2); -+ k = i >> j; -+ J_ASSERT(k < max2); -+ J_ASSERT(mb_test_bit(k, buddy2)); -+ } -+ } -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(e3b->bd_info)); -+ J_ASSERT(e3b->bd_info->bb_fragments == fragments); -+} -+ -+#else -+#define mb_check_buddy(e3b) -+#endif -+ -+/* find most significant bit */ -+static int inline fmsb(unsigned short word) -+{ -+ int order; -+ -+ if (word > 255) { -+ order = 7; -+ word >>= 8; -+ } else { -+ order = -1; -+ } -+ -+ do { -+ order++; -+ word >>= 1; -+ } while (word != 0); -+ -+ return order; -+} -+ -+static void inline -+ext3_mb_mark_free_simple(struct super_block *sb, void *buddy, unsigned first, -+ int len, struct ext3_group_info *grp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned short min, max, chunk, border; -+ -+ mb_debug("mark %u/%u free\n", first, len); -+ J_ASSERT(len < EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ border = 2 << sb->s_blocksize_bits; -+ -+ while (len > 0) { -+ /* find how many blocks can be covered since this position */ -+ max = ffs(first | border) - 1; -+ -+ /* find how many blocks of power 2 we need to mark */ -+ min = fmsb(len); -+ -+ mb_debug(" %u/%u -> max %u, min %u\n", -+ first & ((2 << sb->s_blocksize_bits) - 1), -+ len, max, min); -+ -+ if (max < min) -+ min = max; -+ chunk = 1 << min; -+ -+ /* mark multiblock chunks only */ -+ grp->bb_counters[min]++; -+ if (min > 0) { -+ mb_debug(" set %u at %u \n", first >> min, -+ sbi->s_mb_offsets[min]); -+ mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); -+ } -+ -+ len -= chunk; -+ first += chunk; -+ } -+} -+ -+static void -+ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, -+ int group) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); -+ unsigned short i = 0, first, len; -+ unsigned free = 0, fragments = 0; -+ unsigned long long period = get_cycles(); -+ -+ i = mb_find_next_zero_bit(bitmap, max, 0); -+ grp->bb_first_free = i; -+ while (i < max) { -+ fragments++; -+ first = i; -+ i = ext2_find_next_le_bit(bitmap, max, i); -+ len = i - first; -+ free += len; -+ if (len > 1) -+ ext3_mb_mark_free_simple(sb, buddy, first, len, grp); -+ else -+ grp->bb_counters[0]++; -+ if (i < max) -+ i = mb_find_next_zero_bit(bitmap, max, i); -+ } -+ grp->bb_fragments = fragments; -+ -+ /* bb_state shouldn't being modified because all -+ * others waits for init completion on page lock */ -+ clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state); -+ if (free != grp->bb_free) { -+ printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n", -+ group, free, grp->bb_free); -+ grp->bb_free = free; -+ } -+ -+ period = get_cycles() - period; -+ spin_lock(&EXT3_SB(sb)->s_bal_lock); -+ EXT3_SB(sb)->s_mb_buddies_generated++; -+ EXT3_SB(sb)->s_mb_generation_time += period; -+ spin_unlock(&EXT3_SB(sb)->s_bal_lock); -+} -+ -+static int ext3_mb_init_cache(struct page *page) -+{ -+ int blocksize, blocks_per_page, groups_per_page; -+ int err = 0, i, first_group, first_block; -+ struct super_block *sb; -+ struct buffer_head *bhs; -+ struct buffer_head **bh; -+ struct inode *inode; -+ char *data, *bitmap; -+ -+ mb_debug("init page %lu\n", page->index); -+ -+ inode = page->mapping->host; -+ sb = inode->i_sb; -+ blocksize = 1 << inode->i_blkbits; -+ blocks_per_page = PAGE_CACHE_SIZE / blocksize; -+ -+ groups_per_page = blocks_per_page >> 1; -+ if (groups_per_page == 0) -+ groups_per_page = 1; -+ -+ /* allocate buffer_heads to read bitmaps */ -+ if (groups_per_page > 1) { -+ err = -ENOMEM; -+ i = sizeof(struct buffer_head *) * groups_per_page; -+ bh = kmalloc(i, GFP_NOFS); -+ if (bh == NULL) -+ goto out; -+ memset(bh, 0, i); -+ } else -+ bh = &bhs; -+ -+ first_group = page->index * blocks_per_page / 2; -+ -+ /* read all groups the page covers into the cache */ -+ for (i = 0; i < groups_per_page; i++) { -+ struct ext3_group_desc * desc; -+ -+ if (first_group + i >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ err = -EIO; -+ desc = ext3_get_group_desc(sb, first_group + i, NULL); -+ if (desc == NULL) -+ goto out; -+ -+ err = -ENOMEM; -+ bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (bh[i] == NULL) -+ goto out; -+ -+ if (buffer_uptodate(bh[i])) -+ continue; -+ -+ lock_buffer(bh[i]); -+ if (buffer_uptodate(bh[i])) { -+ unlock_buffer(bh[i]); -+ continue; -+ } -+ -+ get_bh(bh[i]); -+ bh[i]->b_end_io = end_buffer_read_sync; -+ submit_bh(READ, bh[i]); -+ mb_debug("read bitmap for group %u\n", first_group + i); -+ } -+ -+ /* wait for I/O completion */ -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ wait_on_buffer(bh[i]); -+ -+ err = -EIO; -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ if (!buffer_uptodate(bh[i])) -+ goto out; -+ -+ first_block = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++) { -+ int group; -+ -+ group = (first_block + i) >> 1; -+ if (group >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ data = page_address(page) + (i * blocksize); -+ bitmap = bh[group - first_group]->b_data; -+ -+ if ((first_block + i) & 1) { -+ /* this is block of buddy */ -+ mb_debug("put buddy for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memset(data, 0xff, blocksize); -+ EXT3_GROUP_INFO(sb, group)->bb_fragments = 0; -+ memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0, -+ sizeof(unsigned short)*(sb->s_blocksize_bits+2)); -+ ext3_mb_generate_buddy(sb, data, bitmap, group); -+ } else { -+ /* this is block of bitmap */ -+ mb_debug("put bitmap for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memcpy(data, bitmap, blocksize); -+ } -+ } -+ SetPageUptodate(page); -+ -+out: -+ if (bh) { -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ brelse(bh[i]); -+ if (bh != &bhs) -+ kfree(bh); -+ } -+ return err; -+} -+ -+static int ext3_mb_load_buddy(struct super_block *sb, int group, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *inode = sbi->s_buddy_cache; -+ int blocks_per_page, block, pnum, poff; -+ struct page *page; -+ -+ mb_debug("load group %u\n", group); -+ -+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; -+ -+ e3b->bd_blkbits = sb->s_blocksize_bits; -+ e3b->bd_info = EXT3_GROUP_INFO(sb, group); -+ e3b->bd_sb = sb; -+ e3b->bd_group = group; -+ e3b->bd_buddy_page = NULL; -+ e3b->bd_bitmap_page = NULL; -+ -+ block = group * 2; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ /* we could use find_or_create_page(), but it locks page -+ * what we'd like to avoid in fast path ... */ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_bitmap_page = page; -+ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ block++; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_buddy_page = page; -+ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ return 0; -+ -+err: -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+ e3b->bd_buddy = NULL; -+ e3b->bd_bitmap = NULL; -+ return -EIO; -+} -+ -+static void ext3_mb_release_desc(struct ext3_buddy *e3b) -+{ -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+} -+ -+ -+static inline void -+ext3_lock_group(struct super_block *sb, int group) -+{ -+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static inline void -+ext3_unlock_group(struct super_block *sb, int group) -+{ -+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) -+{ -+ int order = 1; -+ void *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); -+ -+ bb = EXT3_MB_BUDDY(e3b); -+ while (order <= e3b->bd_blkbits + 1) { -+ block = block >> 1; -+ if (!mb_test_bit(block, bb)) { -+ /* this block is part of buddy of order 'order' */ -+ return order; -+ } -+ bb += 1 << (e3b->bd_blkbits - order); -+ order++; -+ } -+ return 0; -+} -+ -+static inline void mb_clear_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0; -+ cur += 32; -+ continue; -+ } -+ mb_clear_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static inline void mb_set_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0xffffffff; -+ cur += 32; -+ continue; -+ } -+ mb_set_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) -+{ -+ int block = 0, max = 0, order; -+ void *buddy, *buddy2; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free += count; -+ if (first < e3b->bd_info->bb_first_free) -+ e3b->bd_info->bb_first_free = first; -+ -+ /* let's maintain fragments counter */ -+ if (first != 0) -+ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b)); -+ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b)); -+ if (block && max) -+ e3b->bd_info->bb_fragments--; -+ else if (!block && !max) -+ e3b->bd_info->bb_fragments++; -+ -+ /* let's maintain buddy itself */ -+ while (count-- > 0) { -+ block = first++; -+ order = 0; -+ -+ J_ASSERT(mb_test_bit(block, EXT3_MB_BITMAP(e3b))); -+ mb_clear_bit(block, EXT3_MB_BITMAP(e3b)); -+ e3b->bd_info->bb_counters[order]++; -+ -+ /* start of the buddy */ -+ buddy = mb_find_buddy(e3b, order, &max); -+ -+ do { -+ block &= ~1UL; -+ if (mb_test_bit(block, buddy) || -+ mb_test_bit(block + 1, buddy)) -+ break; -+ -+ /* both the buddies are free, try to coalesce them */ -+ buddy2 = mb_find_buddy(e3b, order + 1, &max); -+ -+ if (!buddy2) -+ break; -+ -+ if (order > 0) { -+ /* for special purposes, we don't set -+ * free bits in bitmap */ -+ mb_set_bit(block, buddy); -+ mb_set_bit(block + 1, buddy); -+ } -+ e3b->bd_info->bb_counters[order]--; -+ e3b->bd_info->bb_counters[order]--; -+ -+ block = block >> 1; -+ order++; -+ e3b->bd_info->bb_counters[order]++; -+ -+ mb_clear_bit(block, buddy2); -+ buddy = buddy2; -+ } while (1); -+ } -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block, -+ int needed, struct ext3_free_extent *ex) -+{ -+ int next = block, max, ord; -+ void *buddy; -+ -+ J_ASSERT(ex != NULL); -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ J_ASSERT(block < max); -+ if (mb_test_bit(block, buddy)) { -+ ex->fe_len = 0; -+ ex->fe_start = 0; -+ ex->fe_group = 0; -+ return 0; -+ } -+ -+ if (likely(order == 0)) { -+ /* find actual order */ -+ order = mb_find_order_for_block(e3b, block); -+ block = block >> order; -+ } -+ -+ ex->fe_len = 1 << order; -+ ex->fe_start = block << order; -+ ex->fe_group = e3b->bd_group; -+ -+ /* calc difference from given start */ -+ next = next - ex->fe_start; -+ ex->fe_len -= next; -+ ex->fe_start += next; -+ -+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) { -+ -+ if (block + 1 >= max) -+ break; -+ -+ next = (block + 1) * (1 << order); -+ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b))) -+ break; -+ -+ ord = mb_find_order_for_block(e3b, next); -+ -+ order = ord; -+ block = next >> order; -+ ex->fe_len += 1 << order; -+ } -+ -+ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); -+ return ex->fe_len; -+} -+ -+static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex) -+{ -+ int ord, mlen = 0, max = 0, cur; -+ int start = ex->fe_start; -+ int len = ex->fe_len; -+ unsigned ret = 0; -+ int len0 = len; -+ void *buddy; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free -= len; -+ if (e3b->bd_info->bb_first_free == start) -+ e3b->bd_info->bb_first_free += len; -+ -+ /* let's maintain fragments counter */ -+ if (start != 0) -+ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b)); -+ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b)); -+ if (mlen && max) -+ e3b->bd_info->bb_fragments++; -+ else if (!mlen && !max) -+ e3b->bd_info->bb_fragments--; -+ -+ /* let's maintain buddy itself */ -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ -+ if (((start >> ord) << ord) == start && len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ J_ASSERT((start >> ord) < max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ start += mlen; -+ len -= mlen; -+ J_ASSERT(len >= 0); -+ continue; -+ } -+ -+ /* store for history */ -+ if (ret == 0) -+ ret = len | (ord << 16); -+ -+ /* we have to split large buddy */ -+ J_ASSERT(ord > 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_clear_bit(cur, buddy); -+ mb_clear_bit(cur + 1, buddy); -+ e3b->bd_info->bb_counters[ord]++; -+ e3b->bd_info->bb_counters[ord]++; -+ } -+ -+ /* now drop all the bits in bitmap */ -+ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0); -+ -+ mb_check_buddy(e3b); -+ -+ return ret; -+} -+ -+/* -+ * Must be called under group lock! -+ */ -+static void ext3_mb_use_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ unsigned long ret; -+ -+ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); -+ ret = mb_mark_used(e3b, &ac->ac_b_ex); -+ -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_tail = ret & 0xffff; -+ ac->ac_buddy = ret >> 16; -+ -+ /* hold in-core structures until allocated -+ * blocks are marked non-free in on-disk bitmap */ -+ ac->ac_buddy_page = e3b->bd_buddy_page; -+ page_cache_get(e3b->bd_buddy_page); -+ ac->ac_bitmap_page = e3b->bd_bitmap_page; -+ page_cache_get(e3b->bd_bitmap_page); -+} -+ -+/* -+ * The routine checks whether found extent is good enough. If it is, -+ * then the extent gets marked used and flag is set to the context -+ * to stop scanning. Otherwise, the extent is compared with the -+ * previous found extent and if new one is better, then it's stored -+ * in the context. Later, the best found extent will be used, if -+ * mballoc can't find good enough extent. -+ * -+ * FIXME: real allocation policy is to be designed yet! -+ */ -+static void ext3_mb_measure_extent(struct ext3_allocation_context *ac, -+ struct ext3_free_extent *ex, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent *bex = &ac->ac_b_ex; -+ struct ext3_free_extent *gex = &ac->ac_g_ex; -+ -+ J_ASSERT(ex->fe_len > 0); -+ J_ASSERT(ex->fe_len < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ J_ASSERT(ex->fe_start < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ -+ ac->ac_found++; -+ -+ /* -+ * The special case - take what you catch first -+ */ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * Let's check whether the chunk is good enough -+ */ -+ if (ex->fe_len == gex->fe_len) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * If this is first found extent, just store it in the context -+ */ -+ if (bex->fe_len == 0) { -+ *bex = *ex; -+ return; -+ } -+ -+ /* -+ * If new found extent is better, store it in the context -+ */ -+ if (bex->fe_len < gex->fe_len) { -+ /* if the request isn't satisfied, any found extent -+ * larger than previous best one is better */ -+ if (ex->fe_len > bex->fe_len) -+ *bex = *ex; -+ } else if (ex->fe_len > gex->fe_len) { -+ /* if the request is satisfied, then we try to find -+ * an extent that still satisfy the request, but is -+ * smaller than previous one */ -+ *bex = *ex; -+ } -+ -+ /* -+ * Let's scan at least few extents and don't pick up a first one -+ */ -+ if (bex->fe_len > gex->fe_len && ac->ac_found > ext3_mb_min_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+ -+ /* -+ * We don't want to scan for a whole year -+ */ -+ if (ac->ac_found > ext3_mb_max_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+} -+ -+static int ext3_mb_try_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent ex = ac->ac_b_ex; -+ int group = ex.fe_group, max, err; -+ -+ J_ASSERT(ex.fe_len > 0); -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex); -+ -+ if (max > 0) { -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ int group = ac->ac_g_ex.fe_group, max, err; -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_super_block *es = sbi->s_es; -+ struct ext3_free_extent ex; -+ -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start, -+ ac->ac_g_ex.fe_len, &ex); -+ -+ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { -+ ext3_fsblk_t start; -+ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) + -+ ex.fe_start + le32_to_cpu(es->s_first_data_block)); -+ if (start % sbi->s_stripe == 0) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ } else if (max >= ac->ac_g_ex.fe_len) { -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) { -+ /* Sometimes, caller may want to merge even small -+ * number of blocks to an existing extent */ -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+/* -+ * The routine scans buddy structures (not bitmap!) from given order -+ * to max order and tries to find big enough chunk to satisfy the req -+ */ -+static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_group_info *grp = e3b->bd_info; -+ void *buddy; -+ int i, k, max; -+ -+ J_ASSERT(ac->ac_2order > 0); -+ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { -+ if (grp->bb_counters[i] == 0) -+ continue; -+ -+ buddy = mb_find_buddy(e3b, i, &max); -+ if (buddy == NULL) { -+ printk(KERN_ALERT "looking for wrong order?\n"); -+ break; -+ } -+ -+ k = mb_find_next_zero_bit(buddy, max, 0); -+ J_ASSERT(k < max); -+ -+ ac->ac_found++; -+ -+ ac->ac_b_ex.fe_len = 1 << i; -+ ac->ac_b_ex.fe_start = k << i; -+ ac->ac_b_ex.fe_group = e3b->bd_group; -+ -+ ext3_mb_use_best_found(ac, e3b); -+ J_ASSERT(ac->ac_b_ex.fe_len == ac->ac_g_ex.fe_len); -+ -+ if (unlikely(ext3_mb_stats)) -+ atomic_inc(&EXT3_SB(sb)->s_bal_2orders); -+ -+ break; -+ } -+} -+ -+/* -+ * The routine scans the group and measures all found extents. -+ * In order to optimize scanning, caller must pass number of -+ * free blocks in the group, so the routine can know upper limit. -+ */ -+static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ int i, free; -+ -+ free = e3b->bd_info->bb_free; -+ J_ASSERT(free > 0); -+ -+ i = e3b->bd_info->bb_first_free; -+ -+ while (free && ac->ac_status == AC_STATUS_CONTINUE) { -+ i = mb_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i); -+ if (i >= sb->s_blocksize * 8) { -+ J_ASSERT(free == 0); -+ break; -+ } -+ -+ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex); -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(free >= ex.fe_len); -+ -+ ext3_mb_measure_extent(ac, &ex, e3b); -+ -+ i += ex.fe_len; -+ free -= ex.fe_len; -+ } -+} -+ -+/* -+ * This is a special case for storages like raid5 -+ * we try to find stripe-aligned chunks for stripe-size requests -+ */ -+static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ ext3_fsblk_t i, max; -+ -+ J_ASSERT(sbi->s_stripe != 0); -+ -+ /* find first stripe-aligned block */ -+ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe; -+ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ -+ while (i < sb->s_blocksize * 8) { -+ if (!mb_test_bit(i, bitmap)) { -+ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex); -+ if (max >= sbi->s_stripe) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ break; -+ } -+ } -+ i += sbi->s_stripe; -+ } -+} -+ -+static int ext3_mb_good_group(struct ext3_allocation_context *ac, -+ int group, int cr) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group); -+ unsigned free, fragments, i, bits; -+ -+ J_ASSERT(cr >= 0 && cr < 4); -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(grp)); -+ -+ free = grp->bb_free; -+ fragments = grp->bb_fragments; -+ if (free == 0) -+ return 0; -+ if (fragments == 0) -+ return 0; -+ -+ switch (cr) { -+ case 0: -+ J_ASSERT(ac->ac_2order != 0); -+ bits = ac->ac_sb->s_blocksize_bits + 1; -+ for (i = ac->ac_2order; i <= bits; i++) -+ if (grp->bb_counters[i] > 0) -+ return 1; -+ break; -+ case 1: -+ if ((free / fragments) >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 2: -+ if (free >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 3: -+ return 1; -+ default: -+ BUG(); -+ } -+ -+ return 0; -+} -+ -+ext3_fsblk_t ext3_mb_new_blocks(handle_t *handle, struct inode *inode, -+ ext3_fsblk_t goal, int *len,int flags,int *errp) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_allocation_context ac; -+ int i, group, cr, err = 0; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ struct buffer_head *gdp_bh; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ ext3_fsblk_t block; -+ -+ J_ASSERT(len != NULL); -+ J_ASSERT(*len > 0); -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk("ext3_mb_new_nblocks: nonexistent device"); -+ return 0; -+ } -+ -+ if (!test_opt(sb, MBALLOC)) { -+ static int ext3_mballoc_warning = 0; -+ if (ext3_mballoc_warning == 0) { -+ printk(KERN_ERR "EXT3-fs: multiblock request with " -+ "mballoc disabled!\n"); -+ ext3_mballoc_warning++; -+ } -+ *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, errp); -+ return err; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ -+ /* -+ * We can't allocate > group size -+ */ -+ if (*len >= EXT3_BLOCKS_PER_GROUP(sb) - 10) -+ *len = EXT3_BLOCKS_PER_GROUP(sb) - 10; -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* someone asks for non-reserved blocks */ -+ BUG_ON(*len > 1); -+ err = ext3_mb_reserve_blocks(sb, 1); -+ if (err) { -+ *errp = err; -+ return 0; -+ } -+ } -+ -+ ac.ac_buddy_page = NULL; -+ ac.ac_bitmap_page = NULL; -+ -+ /* -+ * Check quota for allocation of this blocks. -+ */ -+ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) -+ *len -= 1; -+ if (*len == 0) { -+ *errp = -EDQUOT; -+ block = 0; -+ goto out; -+ } -+ -+ /* start searching from the goal */ -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ group = (goal - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ block = ((goal - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ /* set up allocation goals */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_groups_scanned = 0; -+ ac.ac_ex_scanned = 0; -+ ac.ac_found = 0; -+ ac.ac_sb = inode->i_sb; -+ ac.ac_g_ex.fe_group = group; -+ ac.ac_g_ex.fe_start = block; -+ ac.ac_g_ex.fe_len = *len; -+ ac.ac_flags = flags; -+ ac.ac_2order = 0; -+ ac.ac_criteria = 0; -+ -+ if (*len == 1 && sbi->s_stripe) { -+ /* looks like a metadata, let's use a dirty hack for raid5 -+ * move all metadata in first groups in hope to hit cached -+ * sectors and thus avoid read-modify cycles in raid5 */ -+ ac.ac_g_ex.fe_group = group = 0; -+ } -+ -+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */ -+ i = ffs(*len); -+ if (i >= ext3_mb_order2_reqs) { -+ i--; -+ if ((*len & (~(1 << i))) == 0) -+ ac.ac_2order = i; -+ } -+ -+ /* first, try the goal */ -+ err = ext3_mb_find_by_goal(&ac, &e3b); -+ if (err) -+ goto out_err; -+ if (ac.ac_status == AC_STATUS_FOUND) -+ goto found; -+ -+ /* Let's just scan groups to find more-less suitable blocks */ -+ cr = ac.ac_2order ? 0 : 1; -+repeat: -+ for (; cr < 4 && ac.ac_status == AC_STATUS_CONTINUE; cr++) { -+ ac.ac_criteria = cr; -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { -+ if (group == EXT3_SB(sb)->s_groups_count) -+ group = 0; -+ -+ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) { -+ /* we need full data about the group -+ * to make a good selection */ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ ext3_mb_release_desc(&e3b); -+ } -+ -+ /* check is group good for our criteries */ -+ if (!ext3_mb_good_group(&ac, group, cr)) -+ continue; -+ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ -+ ext3_lock_group(sb, group); -+ if (!ext3_mb_good_group(&ac, group, cr)) { -+ /* someone did allocation from this group */ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ continue; -+ } -+ -+ ac.ac_groups_scanned++; -+ if (cr == 0) -+ ext3_mb_simple_scan_group(&ac, &e3b); -+ else if (cr == 1 && *len == sbi->s_stripe) -+ ext3_mb_scan_aligned(&ac, &e3b); -+ else -+ ext3_mb_complex_scan_group(&ac, &e3b); -+ -+ ext3_unlock_group(sb, group); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ if (ac.ac_status != AC_STATUS_CONTINUE) -+ break; -+ } -+ } -+ -+ if (ac.ac_b_ex.fe_len > 0 && ac.ac_status != AC_STATUS_FOUND && -+ !(ac.ac_flags & EXT3_MB_HINT_FIRST)) { -+ /* -+ * We've been searching too long. Let's try to allocate -+ * the best chunk we've found so far -+ */ -+ -+ /*if (ac.ac_found > ext3_mb_max_to_scan) -+ printk(KERN_DEBUG "EXT3-fs: too long searching at " -+ "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len, -+ ac.ac_g_ex.fe_len);*/ -+ ext3_mb_try_best_found(&ac, &e3b); -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * Someone more lucky has already allocated it. -+ * The only thing we can do is just take first -+ * found block(s) -+ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n"); -+ */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_flags |= EXT3_MB_HINT_FIRST; -+ cr = 3; -+ goto repeat; -+ } -+ } -+ -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * We aren't lucky definitely -+ */ -+ DQUOT_FREE_BLOCK(inode, *len); -+ *errp = -ENOSPC; -+ block = 0; -+#if 1 -+ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n", -+ ac.ac_status, ac.ac_flags); -+ printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n", -+ ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group, -+ ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr); -+ printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n", -+ sbi->s_blocks_reserved, ac.ac_found); -+ printk("EXT3-fs: groups: "); -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) -+ printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free); -+ printk("\n"); -+#endif -+ goto out; -+ } -+ -+found: -+ J_ASSERT(ac.ac_b_ex.fe_len > 0); -+ -+ /* good news - free block(s) have been found. now it's time -+ * to mark block(s) in good old journaled bitmap */ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ /* we made a desicion, now mark found blocks in good old -+ * bitmap to be journaled */ -+ -+ ext3_debug("using block group %d(%d)\n", -+ ac.ac_b_group.group, gdp->bg_free_blocks_count); -+ -+ bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); -+ if (!bitmap_bh) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) { -+ *errp = err; -+ goto out_err; -+ } -+ -+ gdp = ext3_get_group_desc(sb, ac.ac_b_ex.fe_group, &gdp_bh); -+ if (!gdp) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, "ext3_new_block", -+ "Allocating block in system zone - " -+ "block = "E3FSBLK, block); -+#ifdef AGGRESSIVE_CHECK -+ for (i = 0; i < ac.ac_b_ex.fe_len; i++) -+ J_ASSERT(!mb_test_bit(ac.ac_b_ex.fe_start + i, bitmap_bh->b_data)); -+#endif -+ mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); -+ -+ spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -+ - ac.ac_b_ex.fe_len); -+ spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); -+ -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ err = ext3_journal_dirty_metadata(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ sb->s_dirt = 1; -+ *errp = 0; -+ brelse(bitmap_bh); -+ -+ /* drop non-allocated, but dquote'd blocks */ -+ J_ASSERT(*len >= ac.ac_b_ex.fe_len); -+ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_ex.fe_len); -+ -+ *len = ac.ac_b_ex.fe_len; -+ J_ASSERT(*len > 0); -+ J_ASSERT(block != 0); -+ goto out; -+ -+out_err: -+ /* if we've already allocated something, roll it back */ -+ if (ac.ac_status == AC_STATUS_FOUND) { -+ /* FIXME: free blocks here */ -+ } -+ -+ DQUOT_FREE_BLOCK(inode, *len); -+ brelse(bitmap_bh); -+ *errp = err; -+ block = 0; -+out: -+ if (ac.ac_buddy_page) -+ page_cache_release(ac.ac_buddy_page); -+ if (ac.ac_bitmap_page) -+ page_cache_release(ac.ac_bitmap_page); -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* block wasn't reserved before and we reserved it -+ * at the beginning of allocation. it doesn't matter -+ * whether we allocated anything or we failed: time -+ * to release reservation. NOTE: because I expect -+ * any multiblock request from delayed allocation -+ * path only, here is single block always */ -+ ext3_mb_release_blocks(sb, 1); -+ } -+ -+ if (unlikely(ext3_mb_stats) && ac.ac_g_ex.fe_len > 1) { -+ atomic_inc(&sbi->s_bal_reqs); -+ atomic_add(*len, &sbi->s_bal_allocated); -+ if (*len >= ac.ac_g_ex.fe_len) -+ atomic_inc(&sbi->s_bal_success); -+ atomic_add(ac.ac_found, &sbi->s_bal_ex_scanned); -+ if (ac.ac_g_ex.fe_start == ac.ac_b_ex.fe_start && -+ ac.ac_g_ex.fe_group == ac.ac_b_ex.fe_group) -+ atomic_inc(&sbi->s_bal_goals); -+ if (ac.ac_found > ext3_mb_max_to_scan) -+ atomic_inc(&sbi->s_bal_breaks); -+ } -+ -+ ext3_mb_store_history(sb, inode->i_ino, &ac); -+ -+ return block; -+} -+EXPORT_SYMBOL(ext3_mb_new_blocks); -+ -+#ifdef EXT3_MB_HISTORY -+struct ext3_mb_proc_session { -+ struct ext3_mb_history *history; -+ struct super_block *sb; -+ int start; -+ int max; -+}; -+ -+static void *ext3_mb_history_skip_empty(struct ext3_mb_proc_session *s, -+ struct ext3_mb_history *hs, -+ int first) -+{ -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (!first && hs == s->history + s->start) -+ return NULL; -+ while (hs->goal.fe_len == 0) { -+ hs++; -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (hs == s->history + s->start) -+ return NULL; -+ } -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs; -+ int l = *pos; -+ -+ if (l == 0) -+ return SEQ_START_TOKEN; -+ hs = ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ if (!hs) -+ return NULL; -+ while (--l && (hs = ext3_mb_history_skip_empty(s, ++hs, 0)) != NULL); -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs = v; -+ -+ ++*pos; -+ if (v == SEQ_START_TOKEN) -+ return ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ else -+ return ext3_mb_history_skip_empty(s, ++hs, 0); -+} -+ -+static int ext3_mb_seq_history_show(struct seq_file *seq, void *v) -+{ -+ struct ext3_mb_history *hs = v; -+ char buf[20], buf2[20]; -+ -+ if (v == SEQ_START_TOKEN) { -+ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n", -+ "pid", "inode", "goal", "result", "found", "grps", "cr", -+ "merge", "tail", "broken"); -+ return 0; -+ } -+ -+ sprintf(buf, "%u/%u/%u", hs->goal.fe_group, -+ hs->goal.fe_start, hs->goal.fe_len); -+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len); -+ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", -+ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups, -+ hs->cr, hs->merged ? "M" : "", hs->tail, -+ hs->buddy ? 1 << hs->buddy : 0); -+ return 0; -+} -+ -+static void ext3_mb_seq_history_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_history_ops = { -+ .start = ext3_mb_seq_history_start, -+ .next = ext3_mb_seq_history_next, -+ .stop = ext3_mb_seq_history_stop, -+ .show = ext3_mb_seq_history_show, -+}; -+ -+static int ext3_mb_seq_history_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -EIO; -+ size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max; -+ s->history = kmalloc(size, GFP_KERNEL); -+ if (s == NULL) { -+ kfree(s); -+ return -EIO; -+ } -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(s->history, sbi->s_mb_history, size); -+ s->max = sbi->s_mb_history_max; -+ s->start = sbi->s_mb_history_cur % s->max; -+ spin_unlock(&sbi->s_mb_history_lock); -+ -+ rc = seq_open(file, &ext3_mb_seq_history_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->history); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int ext3_mb_seq_history_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct ext3_mb_proc_session *s = seq->private; -+ kfree(s->history); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static struct file_operations ext3_mb_seq_history_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = ext3_mb_seq_history_release, -+}; -+ -+static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ -+ group = *pos + 1; -+ return (void *) group; -+} -+ -+static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ ++*pos; -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ group = *pos + 1; -+ return (void *) group;; -+} -+ -+static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v) -+{ -+ struct super_block *sb = seq->private; -+ long group = (long) v, i; -+ struct sg { -+ struct ext3_group_info info; -+ unsigned short counters[16]; -+ } sg; -+ -+ group--; -+ if (group == 0) -+ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", -+ "group", "free", "frags", "first", "2^0", "2^1", "2^2", -+ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10", -+ "2^11", "2^12", "2^13"); -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + -+ sizeof(struct ext3_group_info); -+ ext3_lock_group(sb, group); -+ memcpy(&sg, EXT3_GROUP_INFO(sb, group), i); -+ ext3_unlock_group(sb, group); -+ -+ if (EXT3_MB_GRP_NEED_INIT(&sg.info)) -+ return 0; -+ -+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, -+ sg.info.bb_fragments, sg.info.bb_first_free); -+ for (i = 0; i <= 13; i++) -+ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? -+ sg.info.bb_counters[i] : 0); -+ seq_printf(seq, " ]\n"); -+ -+ return 0; -+} -+ -+static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_groups_ops = { -+ .start = ext3_mb_seq_groups_start, -+ .next = ext3_mb_seq_groups_next, -+ .stop = ext3_mb_seq_groups_stop, -+ .show = ext3_mb_seq_groups_show, -+}; -+ -+static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ int rc; -+ -+ rc = seq_open(file, &ext3_mb_seq_groups_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = sb; -+ } -+ return rc; -+ -+} -+ -+static struct file_operations ext3_mb_seq_groups_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_groups_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static void ext3_mb_history_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ remove_proc_entry("mb_groups", sbi->s_mb_proc); -+ remove_proc_entry("mb_history", sbi->s_mb_proc); -+ remove_proc_entry(name, proc_root_ext3); -+ -+ if (sbi->s_mb_history) -+ kfree(sbi->s_mb_history); -+} -+ -+static void ext3_mb_history_init(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ int i; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ sbi->s_mb_proc = proc_mkdir(name, proc_root_ext3); -+ if (sbi->s_mb_proc != NULL) { -+ struct proc_dir_entry *p; -+ p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_history_fops; -+ p->data = sb; -+ } -+ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_groups_fops; -+ p->data = sb; -+ } -+ } -+ -+ sbi->s_mb_history_max = 1000; -+ sbi->s_mb_history_cur = 0; -+ spin_lock_init(&sbi->s_mb_history_lock); -+ i = sbi->s_mb_history_max * sizeof(struct ext3_mb_history); -+ sbi->s_mb_history = kmalloc(i, GFP_KERNEL); -+ memset(sbi->s_mb_history, 0, i); -+ /* if we can't allocate history, then we simple won't use it */ -+} -+ -+static void -+ext3_mb_store_history(struct super_block *sb, unsigned ino, -+ struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_history h; -+ -+ if (likely(sbi->s_mb_history == NULL)) -+ return; -+ -+ h.pid = current->pid; -+ h.ino = ino; -+ h.goal = ac->ac_g_ex; -+ h.result = ac->ac_b_ex; -+ h.found = ac->ac_found; -+ h.cr = ac->ac_criteria; -+ h.groups = ac->ac_groups_scanned; -+ h.tail = ac->ac_tail; -+ h.buddy = ac->ac_buddy; -+ h.merged = 0; -+ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && -+ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) -+ h.merged = 1; -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); -+ if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) -+ sbi->s_mb_history_cur = 0; -+ spin_unlock(&sbi->s_mb_history_lock); -+} -+ -+#else -+#define ext3_mb_history_release(sb) -+#define ext3_mb_history_init(sb) -+#endif -+ -+int ext3_mb_init_backend(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, j, len, metalen; -+ int num_meta_group_infos = -+ (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ struct ext3_group_info **meta_group_info; -+ -+ /* An 8TB filesystem with 64-bit pointers requires a 4096 byte -+ * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. -+ * So a two level scheme suffices for now. */ -+ sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * -+ num_meta_group_infos, GFP_KERNEL); -+ if (sbi->s_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n"); -+ return -ENOMEM; -+ } -+ sbi->s_buddy_cache = new_inode(sb); -+ if (sbi->s_buddy_cache == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't get new inode\n"); -+ goto err_freesgi; -+ } -+ -+ metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) { -+ if ((i + 1) == num_meta_group_infos) -+ metalen = sizeof(*meta_group_info) * -+ (sbi->s_groups_count - -+ (i << EXT3_DESC_PER_BLOCK_BITS(sb))); -+ meta_group_info = kmalloc(metalen, GFP_KERNEL); -+ if (meta_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate mem for a " -+ "buddy group\n"); -+ goto err_freemeta; -+ } -+ sbi->s_group_info[i] = meta_group_info; -+ } -+ -+ /* -+ * calculate needed size. if change bb_counters size, -+ * don't forget about ext3_mb_generate_buddy() -+ */ -+ len = sizeof(struct ext3_group_info); -+ len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ struct ext3_group_desc * desc; -+ -+ meta_group_info = -+ sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)]; -+ j = i & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ -+ meta_group_info[j] = kmalloc(len, GFP_KERNEL); -+ if (meta_group_info[j] == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n"); -+ i--; -+ goto err_freebuddy; -+ } -+ desc = ext3_get_group_desc(sb, i, NULL); -+ if (desc == NULL) { -+ printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); -+ goto err_freebuddy; -+ } -+ memset(meta_group_info[j], 0, len); -+ set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, -+ &meta_group_info[j]->bb_state); -+ meta_group_info[j]->bb_free = -+ le16_to_cpu(desc->bg_free_blocks_count); -+ } -+ -+ return 0; -+ -+err_freebuddy: -+ while (i >= 0) { -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ i--; -+ } -+ i = num_meta_group_infos; -+err_freemeta: -+ while (--i >= 0) -+ kfree(sbi->s_group_info[i]); -+ iput(sbi->s_buddy_cache); -+err_freesgi: -+ kfree(sbi->s_group_info); -+ return -ENOMEM; -+} -+ -+int ext3_mb_init(struct super_block *sb, int needs_recovery) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *root = sb->s_root->d_inode; -+ unsigned i, offset, max; -+ struct dentry *dentry; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); -+ -+ sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_offsets == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ return -ENOMEM; -+ } -+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_maxs == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ -+ /* order 0 is regular bitmap */ -+ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; -+ sbi->s_mb_offsets[0] = 0; -+ -+ i = 1; -+ offset = 0; -+ max = sb->s_blocksize << 2; -+ do { -+ sbi->s_mb_offsets[i] = offset; -+ sbi->s_mb_maxs[i] = max; -+ offset += 1 << (sb->s_blocksize_bits - i); -+ max = max >> 1; -+ i++; -+ } while (i <= sb->s_blocksize_bits + 1); -+ -+ /* init file for buddy data */ -+ if ((i = ext3_mb_init_backend(sb))) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return i; -+ } -+ -+ spin_lock_init(&sbi->s_reserve_lock); -+ spin_lock_init(&sbi->s_md_lock); -+ INIT_LIST_HEAD(&sbi->s_active_transaction); -+ INIT_LIST_HEAD(&sbi->s_closed_transaction); -+ INIT_LIST_HEAD(&sbi->s_committed_transaction); -+ spin_lock_init(&sbi->s_bal_lock); -+ -+ /* remove old on-disk buddy file */ -+ mutex_lock(&root->i_mutex); -+ dentry = lookup_one_len(".buddy", sb->s_root, strlen(".buddy")); -+ if (dentry->d_inode != NULL) { -+ i = vfs_unlink(root, dentry); -+ if (i != 0) -+ printk("EXT3-fs: can't remove .buddy file: %d\n", i); -+ } -+ dput(dentry); -+ mutex_unlock(&root->i_mutex); -+ -+ ext3_mb_history_init(sb); -+ -+ printk("EXT3-fs: mballoc enabled\n"); -+ return 0; -+} -+ -+int ext3_mb_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, num_meta_group_infos; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* release freed, non-committed blocks */ -+ spin_lock(&sbi->s_md_lock); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_committed_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ ext3_mb_free_committed_blocks(sb); -+ -+ if (sbi->s_group_info) { -+ for (i = 0; i < sbi->s_groups_count; i++) -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ num_meta_group_infos = (sbi->s_groups_count + -+ EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) -+ kfree(sbi->s_group_info[i]); -+ kfree(sbi->s_group_info); -+ } -+ if (sbi->s_mb_offsets) -+ kfree(sbi->s_mb_offsets); -+ if (sbi->s_mb_maxs) -+ kfree(sbi->s_mb_maxs); -+ if (sbi->s_buddy_cache) -+ iput(sbi->s_buddy_cache); -+ if (sbi->s_blocks_reserved) -+ printk("ext3-fs: %ld blocks being reserved at umount!\n", -+ sbi->s_blocks_reserved); -+ if (ext3_mb_stats) { -+ printk("EXT3-fs: mballoc: %u blocks %u reqs (%u success)\n", -+ atomic_read(&sbi->s_bal_allocated), -+ atomic_read(&sbi->s_bal_reqs), -+ atomic_read(&sbi->s_bal_success)); -+ printk("EXT3-fs: mballoc: %u extents scanned, %u goal hits, " -+ "%u 2^N hits, %u breaks\n", -+ atomic_read(&sbi->s_bal_ex_scanned), -+ atomic_read(&sbi->s_bal_goals), -+ atomic_read(&sbi->s_bal_2orders), -+ atomic_read(&sbi->s_bal_breaks)); -+ printk("EXT3-fs: mballoc: %lu generated and it took %Lu\n", -+ sbi->s_mb_buddies_generated++, -+ sbi->s_mb_generation_time); -+ } -+ -+ ext3_mb_history_release(sb); -+ -+ return 0; -+} -+ -+void ext3_mb_free_committed_blocks(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int err, i, count = 0, count2 = 0; -+ struct ext3_free_metadata *md; -+ struct ext3_buddy e3b; -+ -+ if (list_empty(&sbi->s_committed_transaction)) -+ return; -+ -+ /* there is committed blocks to be freed yet */ -+ do { -+ /* get next array of blocks */ -+ md = NULL; -+ spin_lock(&sbi->s_md_lock); -+ if (!list_empty(&sbi->s_committed_transaction)) { -+ md = list_entry(sbi->s_committed_transaction.next, -+ struct ext3_free_metadata, list); -+ list_del(&md->list); -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ if (md == NULL) -+ break; -+ -+ mb_debug("gonna free %u blocks in group %u (0x%p):", -+ md->num, md->group, md); -+ -+ err = ext3_mb_load_buddy(sb, md->group, &e3b); -+ /* we expect to find existing buddy because it's pinned */ -+ BUG_ON(err != 0); -+ -+ /* there are blocks to put in buddy to make them really free */ -+ count += md->num; -+ count2++; -+ ext3_lock_group(sb, md->group); -+ for (i = 0; i < md->num; i++) { -+ mb_debug(" %u", md->blocks[i]); -+ mb_free_blocks(&e3b, md->blocks[i], 1); -+ } -+ mb_debug("\n"); -+ ext3_unlock_group(sb, md->group); -+ -+ /* balance refcounts from ext3_mb_free_metadata() */ -+ page_cache_release(e3b.bd_buddy_page); -+ page_cache_release(e3b.bd_bitmap_page); -+ -+ kfree(md); -+ ext3_mb_release_desc(&e3b); -+ -+ } while (md); -+ mb_debug("freed %u blocks in %u structures\n", count, count2); -+} -+ -+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (sbi->s_last_transaction == handle->h_transaction->t_tid) -+ return; -+ -+ /* new transaction! time to close last one and free blocks for -+ * committed transaction. we know that only transaction can be -+ * active, so previos transaction can be being logged and we -+ * know that transaction before previous is known to be already -+ * logged. this means that now we may free blocks freed in all -+ * transactions before previous one. hope I'm clear enough ... */ -+ -+ spin_lock(&sbi->s_md_lock); -+ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { -+ mb_debug("new transaction %lu, old %lu\n", -+ (unsigned long) handle->h_transaction->t_tid, -+ (unsigned long) sbi->s_last_transaction); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_closed_transaction); -+ sbi->s_last_transaction = handle->h_transaction->t_tid; -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ ext3_mb_free_committed_blocks(sb); -+} -+ -+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, -+ int group, int block, int count) -+{ -+ struct ext3_group_info *db = e3b->bd_info; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_free_metadata *md; -+ int i; -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ ext3_lock_group(sb, group); -+ for (i = 0; i < count; i++) { -+ md = db->bb_md_cur; -+ if (md && db->bb_tid != handle->h_transaction->t_tid) { -+ db->bb_md_cur = NULL; -+ md = NULL; -+ } -+ -+ if (md == NULL) { -+ ext3_unlock_group(sb, group); -+ md = kmalloc(sizeof(*md), GFP_KERNEL); -+ if (md == NULL) -+ return -ENOMEM; -+ md->num = 0; -+ md->group = group; -+ -+ ext3_lock_group(sb, group); -+ if (db->bb_md_cur == NULL) { -+ spin_lock(&sbi->s_md_lock); -+ list_add(&md->list, &sbi->s_active_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ /* protect buddy cache from being freed, -+ * otherwise we'll refresh it from -+ * on-disk bitmap and lose not-yet-available -+ * blocks */ -+ page_cache_get(e3b->bd_buddy_page); -+ page_cache_get(e3b->bd_bitmap_page); -+ db->bb_md_cur = md; -+ db->bb_tid = handle->h_transaction->t_tid; -+ mb_debug("new md 0x%p for group %u\n", -+ md, md->group); -+ } else { -+ kfree(md); -+ md = db->bb_md_cur; -+ } -+ } -+ -+ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); -+ md->blocks[md->num] = block + i; -+ md->num++; -+ if (md->num == EXT3_BB_MAX_BLOCKS) { -+ /* no more space, put full container on a sb's list */ -+ db->bb_md_cur = NULL; -+ } -+ } -+ ext3_unlock_group(sb, group); -+ return 0; -+} -+ -+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, -+ ext3_fsblk_t block, unsigned long count, -+ int metadata, unsigned long *freed) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ unsigned long bit, overflow; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ int err = 0, ret; -+ -+ *freed = 0; -+ sb = inode->i_sb; -+ if (!sb) { -+ printk ("ext3_free_blocks: nonexistent device"); -+ return; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks not in datazone - " -+ "block = %lu, count = %lu", block, count); -+ goto error_return; -+ } -+ -+ ext3_debug("freeing block %lu\n", block); -+ -+do_more: -+ overflow = 0; -+ block_group = (block - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ bit = (block - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); -+ if (!gdp) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks in system zones - " -+ "Block = %lu, count = %lu", -+ block, count); -+ -+ BUFFER_TRACE(bitmap_bh, "getting write access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ err = ext3_mb_load_buddy(sb, block_group, &e3b); -+ if (err) -+ goto error_return; -+ -+#ifdef AGGRESSIVE_CHECK -+ { -+ int i; -+ for (i = 0; i < count; i++) -+ J_ASSERT(mb_test_bit(bit + i, bitmap_bh->b_data)); -+ } -+#endif -+ mb_clear_bits(bitmap_bh->b_data, bit, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ if (metadata) { -+ /* blocks being freed are metadata. these blocks shouldn't -+ * be used until this transaction is committed */ -+ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); -+ } else { -+ ext3_lock_group(sb, block_group); -+ mb_free_blocks(&e3b, bit, count); -+ ext3_unlock_group(sb, block_group); -+ } -+ -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ *freed = count; -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3_std_error(sb, err); -+ return; -+} -+ -+int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int free, ret = -ENOSPC; -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ if (blocks <= free - sbi->s_blocks_reserved) { -+ sbi->s_blocks_reserved += blocks; -+ ret = 0; -+ } -+ spin_unlock(&sbi->s_reserve_lock); -+ return ret; -+} -+ -+void ext3_mb_release_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ sbi->s_blocks_reserved -= blocks; -+ WARN_ON(sbi->s_blocks_reserved < 0); -+ if (sbi->s_blocks_reserved < 0) -+ sbi->s_blocks_reserved = 0; -+ spin_unlock(&sbi->s_reserve_lock); -+} -+ -+ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, -+ ext3_fsblk_t goal, int *errp) -+{ -+ ext3_fsblk_t ret; -+ int len; -+ -+ if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, errp); -+ goto out; -+ } -+ len = 1; -+ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); -+out: -+ return ret; -+} -+ -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ ext3_fsblk_t block, unsigned long count, int metadata) -+{ -+ struct super_block *sb; -+ unsigned long freed; -+ -+ sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_sb(handle, sb, block, count, &freed); -+ else -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, -+ &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); -+ return; -+} -+ -+#define EXT3_ROOT "ext3" -+#define EXT3_MB_STATS_NAME "mb_stats" -+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan" -+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan" -+#define EXT3_MB_ORDER2_REQ "mb_order2_req" -+ -+static int ext3_mb_stats_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_stats); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_stats_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_STATS_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ ext3_mb_stats = (simple_strtol(str, NULL, 0) != 0); -+ return count; -+} -+ -+static int ext3_mb_max_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_max_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_max_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_max_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_min_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_min_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_min_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_order2_req_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_order2_req_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_order2_reqs = value; -+ -+ return count; -+} -+ -+int __init init_ext3_proc(void) -+{ -+ struct proc_dir_entry *proc_ext3_mb_stats; -+ struct proc_dir_entry *proc_ext3_mb_max_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_min_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_order2_req; -+ -+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs); -+ if (proc_root_ext3 == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT); -+ return -EIO; -+ } -+ -+ /* Initialize EXT3_MB_STATS_NAME */ -+ proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_stats == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_STATS_NAME); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_stats->data = NULL; -+ proc_ext3_mb_stats->read_proc = ext3_mb_stats_read; -+ proc_ext3_mb_stats->write_proc = ext3_mb_stats_write; -+ -+ /* Initialize EXT3_MAX_TO_SCAN_NAME */ -+ proc_ext3_mb_max_to_scan = create_proc_entry( -+ EXT3_MB_MAX_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_max_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MAX_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_max_to_scan->data = NULL; -+ proc_ext3_mb_max_to_scan->read_proc = ext3_mb_max_to_scan_read; -+ proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write; -+ -+ /* Initialize EXT3_MIN_TO_SCAN_NAME */ -+ proc_ext3_mb_min_to_scan = create_proc_entry( -+ EXT3_MB_MIN_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_min_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MIN_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_min_to_scan->data = NULL; -+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read; -+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write; -+ -+ /* Initialize EXT3_ORDER2_REQ */ -+ proc_ext3_mb_order2_req = create_proc_entry( -+ EXT3_MB_ORDER2_REQ, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_order2_req == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_ORDER2_REQ); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_order2_req->data = NULL; -+ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read; -+ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write; -+ -+ return 0; -+} -+ -+void exit_ext3_proc(void) -+{ -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+} -Index: linux-stage/fs/ext3/Makefile -=================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2006-07-16 02:29:43.000000000 +0800 -+++ linux-stage/fs/ext3/Makefile 2006-07-16 02:29:49.000000000 +0800 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch deleted file mode 100644 index 2a4e19b..0000000 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch +++ /dev/null @@ -1,3124 +0,0 @@ -Index: linux-2.6.9-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-06-01 14:58:46.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-10-24 12:54:31.000000000 +0400 -@@ -57,6 +57,14 @@ struct statfs; - #define ext3_debug(f, a...) do {} while (0) - #endif - -+#define EXT3_MULTIBLOCK_ALLOCATOR 1 -+ -+#define EXT3_MB_HINT_MERGE 1 -+#define EXT3_MB_HINT_RESERVED 2 -+#define EXT3_MB_HINT_METADATA 4 -+#define EXT3_MB_HINT_FIRST 8 -+#define EXT3_MB_HINT_BEST 16 -+ - /* - * Special inodes numbers - */ -@@ -365,6 +373,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -387,6 +396,14 @@ struct ext3_inode { - #define ext3_find_first_zero_bit ext2_find_first_zero_bit - #define ext3_find_next_zero_bit ext2_find_next_zero_bit - -+#ifndef ext2_find_next_le_bit -+#ifdef __LITTLE_ENDIAN -+#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) -+#else -+#error "mballoc needs a patch for big-endian systems - CFS bug 10634" -+#endif /* __LITTLE_ENDIAN */ -+#endif /* !ext2_find_next_le_bit */ -+ - /* - * Maximal mount counts between two filesystem checks - */ -@@ -726,7 +743,7 @@ extern int ext3_bg_has_super(struct supe - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); - extern void ext3_free_blocks_sb (handle_t *, struct super_block *, - unsigned long, unsigned long, int *); - extern unsigned long ext3_count_free_blocks (struct super_block *); -@@ -857,6 +874,17 @@ extern void ext3_extents_initialize_bloc - extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); - -+/* mballoc.c */ -+extern long ext3_mb_stats; -+extern long ext3_mb_max_to_scan; -+extern int ext3_mb_init(struct super_block *, int); -+extern int ext3_mb_release(struct super_block *); -+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *); -+extern int ext3_mb_reserve_blocks(struct super_block *, int); -+extern void ext3_mb_release_blocks(struct super_block *, int); -+int __init init_ext3_proc(void); -+void exit_ext3_proc(void); -+ - #endif /* __KERNEL__ */ - - /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ -Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2006-05-18 23:57:04.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2006-10-24 12:54:31.000000000 +0400 -@@ -23,9 +23,15 @@ - #define EXT_INCLUDE - #include - #include -+#include - #endif - #endif - #include -+#include -+ -+struct ext3_buddy_group_blocks; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -@@ -81,6 +87,43 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ -+ /* for buddy allocator */ -+ struct ext3_group_info ***s_group_info; -+ struct inode *s_buddy_cache; -+ long s_blocks_reserved; -+ spinlock_t s_reserve_lock; -+ struct list_head s_active_transaction; -+ struct list_head s_closed_transaction; -+ struct list_head s_committed_transaction; -+ spinlock_t s_md_lock; -+ tid_t s_last_transaction; -+ int s_mb_factor; -+ unsigned short *s_mb_offsets, *s_mb_maxs; -+ unsigned long s_stripe; -+ -+ /* history to debug policy */ -+ struct ext3_mb_history *s_mb_history; -+ int s_mb_history_cur; -+ int s_mb_history_max; -+ struct proc_dir_entry *s_mb_proc; -+ spinlock_t s_mb_history_lock; -+ -+ /* stats for buddy allocator */ -+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -+ atomic_t s_bal_success; /* we found long enough chunks */ -+ atomic_t s_bal_allocated; /* in blocks */ -+ atomic_t s_bal_ex_scanned; /* total extents scanned */ -+ atomic_t s_bal_goals; /* goal hits */ -+ atomic_t s_bal_breaks; /* too long searches */ -+ atomic_t s_bal_2orders; /* 2^order hits */ -+ spinlock_t s_bal_lock; -+ unsigned long s_mb_buddies_generated; -+ unsigned long long s_mb_generation_time; - }; - -+#define EXT3_GROUP_INFO(sb, group) \ -+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ -+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] -+ - #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-2.6.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-06-01 14:58:46.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2006-10-24 12:54:31.000000000 +0400 -@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_mb_release(sb); - ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -597,6 +598,7 @@ enum { - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - }; - - static match_table_t tokens = { -@@ -649,6 +651,9 @@ static match_table_t tokens = { - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -962,6 +967,19 @@ clear_qf_name: - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1651,6 +1669,7 @@ static int ext3_fill_super (struct super - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - - return 0; - -@@ -2433,7 +2452,13 @@ static struct file_system_type ext3_fs_t - - static int __init init_ext3_fs(void) - { -- int err = init_ext3_xattr(); -+ int err; -+ -+ err = init_ext3_proc(); -+ if (err) -+ return err; -+ -+ err = init_ext3_xattr(); - if (err) - return err; - err = init_inodecache(); -@@ -2455,6 +2480,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); - exit_ext3_xattr(); -+ exit_ext3_proc(); - } - - int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-2.6.9-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/extents.c 2006-06-01 14:58:46.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/extents.c 2006-10-24 12:54:31.000000000 +0400 -@@ -777,7 +777,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -Index: linux-2.6.9-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-06-01 14:58:46.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/inode.c 2006-10-24 12:54:31.000000000 +0400 -@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); - return err; - } - -@@ -673,7 +673,7 @@ err_out: - if (err == -EAGAIN) - for (i = 0; i < num; i++) - ext3_free_blocks(handle, inode, -- le32_to_cpu(where[i].key), 1); -+ le32_to_cpu(where[i].key), 1, 1); - return err; - } - -@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 1); - } - - /** -@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* -Index: linux-2.6.9-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/balloc.c 2006-03-10 18:20:03.000000000 +0300 -+++ linux-2.6.9-full/fs/ext3/balloc.c 2006-10-24 12:54:31.000000000 +0400 -@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -451,24 +451,6 @@ error_return: - return; - } - --/* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks(handle_t *handle, struct inode *inode, -- unsigned long block, unsigned long count) --{ -- struct super_block * sb; -- int dquot_freed_blocks; -- -- sb = inode->i_sb; -- if (!sb) { -- printk ("ext3_free_blocks: nonexistent device"); -- return; -- } -- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); -- if (dquot_freed_blocks) -- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); -- return; --} -- - /* - * For ext3 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This -@@ -1131,7 +1113,7 @@ int ext3_should_retry_alloc(struct super - * bitmap, and then for any free bit if that fails. - * This function also updates quota and i_blocks field. - */ --int ext3_new_block(handle_t *handle, struct inode *inode, -+int ext3_new_block_old(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) - { - struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.9-full/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-05-18 23:57:04.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/xattr.c 2006-10-24 12:54:31.000000000 +0400 -@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle, - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -@@ -1328,7 +1328,7 @@ getblk_failed: - if (ce) - mb_cache_entry_free(ce); - ea_bdebug(old_bh, "freeing"); -- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); - - /* ext3_forget() calls bforget() for us, but we - let our caller release old_bh, so we need to -@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle - if (HDR(bh)->h_refcount == cpu_to_le32(1)) { - if (ce) - mb_cache_entry_free(ce); -- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); -+ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); - } else { -Index: linux-2.6.9-full/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2006-10-23 18:07:54.821533176 +0400 -+++ linux-2.6.9-full/fs/ext3/mballoc.c 2006-10-24 13:00:56.000000000 +0400 -@@ -0,0 +1,2729 @@ -+/* -+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+ -+/* -+ * mballoc.c contains the multiblocks allocation routines -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * TODO: -+ * - bitmap read-ahead (proposed by Oleg Drokin aka green) -+ * - track min/max extents in each group for better group selection -+ * - mb_mark_used() may allocate chunk right after splitting buddy -+ * - special flag to advice allocator to look for requested + N blocks -+ * this may improve interaction between extents and mballoc -+ * - tree of groups sorted by number of free blocks -+ * - percpu reservation code (hotpath) -+ * - error handling -+ */ -+ -+/* -+ * with AGRESSIVE_CHECK allocator runs consistency checks over -+ * structures. these checks slow things down a lot -+ */ -+#define AGGRESSIVE_CHECK__ -+ -+/* -+ */ -+#define MB_DEBUG__ -+#ifdef MB_DEBUG -+#define mb_debug(fmt,a...) printk(fmt, ##a) -+#else -+#define mb_debug(fmt,a...) -+#endif -+ -+/* -+ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory -+ * and you can monitor it in /proc/fs/ext3//mb_history -+ */ -+#define EXT3_MB_HISTORY -+ -+/* -+ * How long mballoc can look for a best extent (in found extents) -+ */ -+long ext3_mb_max_to_scan = 500; -+ -+/* -+ * How long mballoc must look for a best extent -+ */ -+long ext3_mb_min_to_scan = 30; -+ -+/* -+ * with 'ext3_mb_stats' allocator will collect stats that will be -+ * shown at umount. The collecting costs though! -+ */ -+ -+long ext3_mb_stats = 1; -+ -+/* -+ * for which requests use 2^N search using buddies -+ */ -+long ext3_mb_order2_reqs = 8; -+ -+#ifdef EXT3_BB_MAX_BLOCKS -+#undef EXT3_BB_MAX_BLOCKS -+#endif -+#define EXT3_BB_MAX_BLOCKS 30 -+ -+struct ext3_free_metadata { -+ unsigned short group; -+ unsigned short num; -+ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; -+ struct list_head list; -+}; -+ -+struct ext3_group_info { -+ unsigned long bb_state; -+ unsigned long bb_tid; -+ struct ext3_free_metadata *bb_md_cur; -+ unsigned short bb_first_free; -+ unsigned short bb_free; -+ unsigned short bb_fragments; -+ unsigned short bb_counters[]; -+}; -+ -+ -+#define EXT3_GROUP_INFO_NEED_INIT_BIT 0 -+#define EXT3_GROUP_INFO_LOCKED_BIT 1 -+ -+#define EXT3_MB_GRP_NEED_INIT(grp) \ -+ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state)) -+ -+struct ext3_free_extent { -+ __u16 fe_start; -+ __u16 fe_len; -+ __u16 fe_group; -+}; -+ -+struct ext3_allocation_context { -+ struct super_block *ac_sb; -+ -+ /* search goals */ -+ struct ext3_free_extent ac_g_ex; -+ -+ /* the best found extent */ -+ struct ext3_free_extent ac_b_ex; -+ -+ /* number of iterations done. we have to track to limit searching */ -+ unsigned long ac_ex_scanned; -+ __u16 ac_groups_scanned; -+ __u16 ac_found; -+ __u16 ac_tail; -+ __u16 ac_buddy; -+ __u8 ac_status; -+ __u8 ac_flags; /* allocation hints */ -+ __u8 ac_criteria; -+ __u8 ac_repeats; -+ __u8 ac_2order; /* if request is to allocate 2^N blocks and -+ * N > 0, the field stores N, otherwise 0 */ -+ -+ struct page *ac_buddy_page; -+ struct page *ac_bitmap_page; -+}; -+ -+#define AC_STATUS_CONTINUE 1 -+#define AC_STATUS_FOUND 2 -+#define AC_STATUS_BREAK 3 -+ -+struct ext3_mb_history { -+ struct ext3_free_extent goal; /* goal allocation */ -+ struct ext3_free_extent result; /* result allocation */ -+ unsigned pid; -+ unsigned ino; -+ __u16 found; /* how many extents have been found */ -+ __u16 groups; /* how many groups have been scanned */ -+ __u16 tail; /* what tail broke some buddy */ -+ __u16 buddy; /* buddy the tail ^^^ broke */ -+ __u8 cr; /* which phase the result extent was found at */ -+ __u8 merged; -+}; -+ -+struct ext3_buddy { -+ struct page *bd_buddy_page; -+ void *bd_buddy; -+ struct page *bd_bitmap_page; -+ void *bd_bitmap; -+ struct ext3_group_info *bd_info; -+ struct super_block *bd_sb; -+ __u16 bd_blkbits; -+ __u16 bd_group; -+}; -+#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap) -+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy) -+ -+#ifndef EXT3_MB_HISTORY -+#define ext3_mb_store_history(sb,ino,ac) -+#else -+static void ext3_mb_store_history(struct super_block *, unsigned ino, -+ struct ext3_allocation_context *ac); -+#endif -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+static struct proc_dir_entry *proc_root_ext3; -+ -+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); -+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); -+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); -+int ext3_mb_reserve_blocks(struct super_block *, int); -+void ext3_mb_release_blocks(struct super_block *, int); -+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); -+void ext3_mb_free_committed_blocks(struct super_block *); -+ -+#if BITS_PER_LONG == 64 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 7UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~7UL); \ -+} -+#elif BITS_PER_LONG == 32 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 3UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~3UL); \ -+} -+#else -+#error "how many bits you are?!" -+#endif -+ -+static inline int mb_test_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ return ext2_test_bit(bit, addr); -+} -+ -+static inline void mb_set_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit(bit, addr); -+} -+ -+static inline void mb_set_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit_atomic(NULL, bit, addr); -+} -+ -+static inline void mb_clear_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit(bit, addr); -+} -+ -+static inline void mb_clear_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit_atomic(NULL, bit, addr); -+} -+ -+static inline int mb_find_next_zero_bit(void *addr, int max, int start) -+{ -+ int fix; -+#if BITS_PER_LONG == 64 -+ fix = ((unsigned long) addr & 7UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~7UL); -+#elif BITS_PER_LONG == 32 -+ fix = ((unsigned long) addr & 3UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~3UL); -+#else -+#error "how many bits you are?!" -+#endif -+ max += fix; -+ start += fix; -+ return ext2_find_next_zero_bit(addr, max, start) - fix; -+} -+ -+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) -+{ -+ char *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(max != NULL); -+ -+ if (order > e3b->bd_blkbits + 1) { -+ *max = 0; -+ return NULL; -+ } -+ -+ /* at order 0 we see each particular block */ -+ *max = 1 << (e3b->bd_blkbits + 3); -+ if (order == 0) -+ return EXT3_MB_BITMAP(e3b); -+ -+ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order]; -+ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order]; -+ -+ return bb; -+} -+ -+#ifdef AGGRESSIVE_CHECK -+ -+static void mb_check_buddy(struct ext3_buddy *e3b) -+{ -+ int order = e3b->bd_blkbits + 1; -+ int max, max2, i, j, k, count; -+ int fragments = 0, fstart; -+ void *buddy, *buddy2; -+ -+ if (!test_opt(e3b->bd_sb, MBALLOC)) -+ return; -+ -+ { -+ static int mb_check_counter = 0; -+ if (mb_check_counter++ % 300 != 0) -+ return; -+ } -+ -+ while (order > 1) { -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ buddy2 = mb_find_buddy(e3b, order - 1, &max2); -+ J_ASSERT(buddy2); -+ J_ASSERT(buddy != buddy2); -+ J_ASSERT(max * 2 == max2); -+ -+ count = 0; -+ for (i = 0; i < max; i++) { -+ -+ if (mb_test_bit(i, buddy)) { -+ /* only single bit in buddy2 may be 1 */ -+ if (!mb_test_bit(i << 1, buddy2)) -+ J_ASSERT(mb_test_bit((i<<1)+1, buddy2)); -+ else if (!mb_test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ continue; -+ } -+ -+ /* both bits in buddy2 must be 0 */ -+ J_ASSERT(mb_test_bit(i << 1, buddy2)); -+ J_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); -+ -+ for (j = 0; j < (1 << order); j++) { -+ k = (i * (1 << order)) + j; -+ J_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b))); -+ } -+ count++; -+ } -+ J_ASSERT(e3b->bd_info->bb_counters[order] == count); -+ order--; -+ } -+ -+ fstart = -1; -+ buddy = mb_find_buddy(e3b, 0, &max); -+ for (i = 0; i < max; i++) { -+ if (!mb_test_bit(i, buddy)) { -+ J_ASSERT(i >= e3b->bd_info->bb_first_free); -+ if (fstart == -1) { -+ fragments++; -+ fstart = i; -+ } -+ continue; -+ } -+ fstart = -1; -+ /* check used bits only */ -+ for (j = 0; j < e3b->bd_blkbits + 1; j++) { -+ buddy2 = mb_find_buddy(e3b, j, &max2); -+ k = i >> j; -+ J_ASSERT(k < max2); -+ J_ASSERT(mb_test_bit(k, buddy2)); -+ } -+ } -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(e3b->bd_info)); -+ J_ASSERT(e3b->bd_info->bb_fragments == fragments); -+} -+ -+#else -+#define mb_check_buddy(e3b) -+#endif -+ -+/* find most significant bit */ -+static int inline fmsb(unsigned short word) -+{ -+ int order; -+ -+ if (word > 255) { -+ order = 7; -+ word >>= 8; -+ } else { -+ order = -1; -+ } -+ -+ do { -+ order++; -+ word >>= 1; -+ } while (word != 0); -+ -+ return order; -+} -+ -+static void inline -+ext3_mb_mark_free_simple(struct super_block *sb, void *buddy, unsigned first, -+ int len, struct ext3_group_info *grp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned short min, max, chunk, border; -+ -+ mb_debug("mark %u/%u free\n", first, len); -+ J_ASSERT(len < EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ border = 2 << sb->s_blocksize_bits; -+ -+ while (len > 0) { -+ /* find how many blocks can be covered since this position */ -+ max = ffs(first | border) - 1; -+ -+ /* find how many blocks of power 2 we need to mark */ -+ min = fmsb(len); -+ -+ mb_debug(" %u/%u -> max %u, min %u\n", -+ first & ((2 << sb->s_blocksize_bits) - 1), -+ len, max, min); -+ -+ if (max < min) -+ min = max; -+ chunk = 1 << min; -+ -+ /* mark multiblock chunks only */ -+ grp->bb_counters[min]++; -+ if (min > 0) { -+ mb_debug(" set %u at %u \n", first >> min, -+ sbi->s_mb_offsets[min]); -+ mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); -+ } -+ -+ len -= chunk; -+ first += chunk; -+ } -+} -+ -+static void -+ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, -+ int group) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); -+ unsigned short i = 0, first, len; -+ unsigned free = 0, fragments = 0; -+ unsigned long long period = get_cycles(); -+ -+ i = mb_find_next_zero_bit(bitmap, max, 0); -+ grp->bb_first_free = i; -+ while (i < max) { -+ fragments++; -+ first = i; -+ i = ext2_find_next_le_bit(bitmap, max, i); -+ len = i - first; -+ free += len; -+ if (len > 1) -+ ext3_mb_mark_free_simple(sb, buddy, first, len, grp); -+ else -+ grp->bb_counters[0]++; -+ if (i < max) -+ i = mb_find_next_zero_bit(bitmap, max, i); -+ } -+ grp->bb_fragments = fragments; -+ -+ /* bb_state shouldn't being modified because all -+ * others waits for init completion on page lock */ -+ clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state); -+ if (free != grp->bb_free) { -+ printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n", -+ group, free, grp->bb_free); -+ grp->bb_free = free; -+ } -+ -+ period = get_cycles() - period; -+ spin_lock(&EXT3_SB(sb)->s_bal_lock); -+ EXT3_SB(sb)->s_mb_buddies_generated++; -+ EXT3_SB(sb)->s_mb_generation_time += period; -+ spin_unlock(&EXT3_SB(sb)->s_bal_lock); -+} -+ -+static int ext3_mb_init_cache(struct page *page) -+{ -+ int blocksize, blocks_per_page, groups_per_page; -+ int err = 0, i, first_group, first_block; -+ struct super_block *sb; -+ struct buffer_head *bhs; -+ struct buffer_head **bh; -+ struct inode *inode; -+ char *data, *bitmap; -+ -+ mb_debug("init page %lu\n", page->index); -+ -+ inode = page->mapping->host; -+ sb = inode->i_sb; -+ blocksize = 1 << inode->i_blkbits; -+ blocks_per_page = PAGE_CACHE_SIZE / blocksize; -+ -+ groups_per_page = blocks_per_page >> 1; -+ if (groups_per_page == 0) -+ groups_per_page = 1; -+ -+ /* allocate buffer_heads to read bitmaps */ -+ if (groups_per_page > 1) { -+ err = -ENOMEM; -+ i = sizeof(struct buffer_head *) * groups_per_page; -+ bh = kmalloc(i, GFP_NOFS); -+ if (bh == NULL) -+ goto out; -+ memset(bh, 0, i); -+ } else -+ bh = &bhs; -+ -+ first_group = page->index * blocks_per_page / 2; -+ -+ /* read all groups the page covers into the cache */ -+ for (i = 0; i < groups_per_page; i++) { -+ struct ext3_group_desc * desc; -+ -+ if (first_group + i >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ err = -EIO; -+ desc = ext3_get_group_desc(sb, first_group + i, NULL); -+ if (desc == NULL) -+ goto out; -+ -+ err = -ENOMEM; -+ bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (bh[i] == NULL) -+ goto out; -+ -+ if (buffer_uptodate(bh[i])) -+ continue; -+ -+ lock_buffer(bh[i]); -+ if (buffer_uptodate(bh[i])) { -+ unlock_buffer(bh[i]); -+ continue; -+ } -+ -+ get_bh(bh[i]); -+ bh[i]->b_end_io = end_buffer_read_sync; -+ submit_bh(READ, bh[i]); -+ mb_debug("read bitmap for group %u\n", first_group + i); -+ } -+ -+ /* wait for I/O completion */ -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ wait_on_buffer(bh[i]); -+ -+ err = -EIO; -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ if (!buffer_uptodate(bh[i])) -+ goto out; -+ -+ first_block = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++) { -+ int group; -+ -+ group = (first_block + i) >> 1; -+ if (group >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ data = page_address(page) + (i * blocksize); -+ bitmap = bh[group - first_group]->b_data; -+ -+ if ((first_block + i) & 1) { -+ /* this is block of buddy */ -+ mb_debug("put buddy for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memset(data, 0xff, blocksize); -+ EXT3_GROUP_INFO(sb, group)->bb_fragments = 0; -+ memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0, -+ sizeof(unsigned short)*(sb->s_blocksize_bits+2)); -+ ext3_mb_generate_buddy(sb, data, bitmap, group); -+ } else { -+ /* this is block of bitmap */ -+ mb_debug("put bitmap for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memcpy(data, bitmap, blocksize); -+ } -+ } -+ SetPageUptodate(page); -+ -+out: -+ if (bh) { -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ brelse(bh[i]); -+ if (bh != &bhs) -+ kfree(bh); -+ } -+ return err; -+} -+ -+static int ext3_mb_load_buddy(struct super_block *sb, int group, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *inode = sbi->s_buddy_cache; -+ int blocks_per_page, block, pnum, poff; -+ struct page *page; -+ -+ mb_debug("load group %u\n", group); -+ -+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; -+ -+ e3b->bd_blkbits = sb->s_blocksize_bits; -+ e3b->bd_info = EXT3_GROUP_INFO(sb, group); -+ e3b->bd_sb = sb; -+ e3b->bd_group = group; -+ e3b->bd_buddy_page = NULL; -+ e3b->bd_bitmap_page = NULL; -+ -+ block = group * 2; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ /* we could use find_or_create_page(), but it locks page -+ * what we'd like to avoid in fast path ... */ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_bitmap_page = page; -+ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ block++; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page); -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_buddy_page = page; -+ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ return 0; -+ -+err: -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+ e3b->bd_buddy = NULL; -+ e3b->bd_bitmap = NULL; -+ return -EIO; -+} -+ -+static void ext3_mb_release_desc(struct ext3_buddy *e3b) -+{ -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+} -+ -+ -+static inline void -+ext3_lock_group(struct super_block *sb, int group) -+{ -+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static inline void -+ext3_unlock_group(struct super_block *sb, int group) -+{ -+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) -+{ -+ int order = 1; -+ void *bb; -+ -+ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b)); -+ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); -+ -+ bb = EXT3_MB_BUDDY(e3b); -+ while (order <= e3b->bd_blkbits + 1) { -+ block = block >> 1; -+ if (!mb_test_bit(block, bb)) { -+ /* this block is part of buddy of order 'order' */ -+ return order; -+ } -+ bb += 1 << (e3b->bd_blkbits - order); -+ order++; -+ } -+ return 0; -+} -+ -+static inline void mb_clear_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0; -+ cur += 32; -+ continue; -+ } -+ mb_clear_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static inline void mb_set_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0xffffffff; -+ cur += 32; -+ continue; -+ } -+ mb_set_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) -+{ -+ int block = 0, max = 0, order; -+ void *buddy, *buddy2; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free += count; -+ if (first < e3b->bd_info->bb_first_free) -+ e3b->bd_info->bb_first_free = first; -+ -+ /* let's maintain fragments counter */ -+ if (first != 0) -+ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b)); -+ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b)); -+ if (block && max) -+ e3b->bd_info->bb_fragments--; -+ else if (!block && !max) -+ e3b->bd_info->bb_fragments++; -+ -+ /* let's maintain buddy itself */ -+ while (count-- > 0) { -+ block = first++; -+ order = 0; -+ -+ J_ASSERT(mb_test_bit(block, EXT3_MB_BITMAP(e3b))); -+ mb_clear_bit(block, EXT3_MB_BITMAP(e3b)); -+ e3b->bd_info->bb_counters[order]++; -+ -+ /* start of the buddy */ -+ buddy = mb_find_buddy(e3b, order, &max); -+ -+ do { -+ block &= ~1UL; -+ if (mb_test_bit(block, buddy) || -+ mb_test_bit(block + 1, buddy)) -+ break; -+ -+ /* both the buddies are free, try to coalesce them */ -+ buddy2 = mb_find_buddy(e3b, order + 1, &max); -+ -+ if (!buddy2) -+ break; -+ -+ if (order > 0) { -+ /* for special purposes, we don't set -+ * free bits in bitmap */ -+ mb_set_bit(block, buddy); -+ mb_set_bit(block + 1, buddy); -+ } -+ e3b->bd_info->bb_counters[order]--; -+ e3b->bd_info->bb_counters[order]--; -+ -+ block = block >> 1; -+ order++; -+ e3b->bd_info->bb_counters[order]++; -+ -+ mb_clear_bit(block, buddy2); -+ buddy = buddy2; -+ } while (1); -+ } -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block, -+ int needed, struct ext3_free_extent *ex) -+{ -+ int next = block, max, ord; -+ void *buddy; -+ -+ J_ASSERT(ex != NULL); -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ J_ASSERT(block < max); -+ if (mb_test_bit(block, buddy)) { -+ ex->fe_len = 0; -+ ex->fe_start = 0; -+ ex->fe_group = 0; -+ return 0; -+ } -+ -+ if (likely(order == 0)) { -+ /* find actual order */ -+ order = mb_find_order_for_block(e3b, block); -+ block = block >> order; -+ } -+ -+ ex->fe_len = 1 << order; -+ ex->fe_start = block << order; -+ ex->fe_group = e3b->bd_group; -+ -+ /* calc difference from given start */ -+ next = next - ex->fe_start; -+ ex->fe_len -= next; -+ ex->fe_start += next; -+ -+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) { -+ -+ if (block + 1 >= max) -+ break; -+ -+ next = (block + 1) * (1 << order); -+ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b))) -+ break; -+ -+ ord = mb_find_order_for_block(e3b, next); -+ -+ order = ord; -+ block = next >> order; -+ ex->fe_len += 1 << order; -+ } -+ -+ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); -+ return ex->fe_len; -+} -+ -+static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex) -+{ -+ int ord, mlen = 0, max = 0, cur; -+ int start = ex->fe_start; -+ int len = ex->fe_len; -+ unsigned ret = 0; -+ int len0 = len; -+ void *buddy; -+ -+ mb_check_buddy(e3b); -+ -+ e3b->bd_info->bb_free -= len; -+ if (e3b->bd_info->bb_first_free == start) -+ e3b->bd_info->bb_first_free += len; -+ -+ /* let's maintain fragments counter */ -+ if (start != 0) -+ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b)); -+ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b)); -+ if (mlen && max) -+ e3b->bd_info->bb_fragments++; -+ else if (!mlen && !max) -+ e3b->bd_info->bb_fragments--; -+ -+ /* let's maintain buddy itself */ -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ -+ if (((start >> ord) << ord) == start && len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ J_ASSERT((start >> ord) < max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ start += mlen; -+ len -= mlen; -+ J_ASSERT(len >= 0); -+ continue; -+ } -+ -+ /* store for history */ -+ if (ret == 0) -+ ret = len | (ord << 16); -+ -+ /* we have to split large buddy */ -+ J_ASSERT(ord > 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_clear_bit(cur, buddy); -+ mb_clear_bit(cur + 1, buddy); -+ e3b->bd_info->bb_counters[ord]++; -+ e3b->bd_info->bb_counters[ord]++; -+ } -+ -+ /* now drop all the bits in bitmap */ -+ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0); -+ -+ mb_check_buddy(e3b); -+ -+ return ret; -+} -+ -+/* -+ * Must be called under group lock! -+ */ -+static void ext3_mb_use_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ unsigned long ret; -+ -+ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); -+ ret = mb_mark_used(e3b, &ac->ac_b_ex); -+ -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_tail = ret & 0xffff; -+ ac->ac_buddy = ret >> 16; -+ -+ /* hold in-core structures until allocated -+ * blocks are marked non-free in on-disk bitmap */ -+ ac->ac_buddy_page = e3b->bd_buddy_page; -+ page_cache_get(e3b->bd_buddy_page); -+ ac->ac_bitmap_page = e3b->bd_bitmap_page; -+ page_cache_get(e3b->bd_bitmap_page); -+} -+ -+/* -+ * The routine checks whether found extent is good enough. If it is, -+ * then the extent gets marked used and flag is set to the context -+ * to stop scanning. Otherwise, the extent is compared with the -+ * previous found extent and if new one is better, then it's stored -+ * in the context. Later, the best found extent will be used, if -+ * mballoc can't find good enough extent. -+ * -+ * FIXME: real allocation policy is to be designed yet! -+ */ -+static void ext3_mb_measure_extent(struct ext3_allocation_context *ac, -+ struct ext3_free_extent *ex, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent *bex = &ac->ac_b_ex; -+ struct ext3_free_extent *gex = &ac->ac_g_ex; -+ -+ J_ASSERT(ex->fe_len > 0); -+ J_ASSERT(ex->fe_len < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ J_ASSERT(ex->fe_start < (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ -+ ac->ac_found++; -+ -+ /* -+ * The special case - take what you catch first -+ */ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * Let's check whether the chunk is good enough -+ */ -+ if (ex->fe_len == gex->fe_len) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * If this is first found extent, just store it in the context -+ */ -+ if (bex->fe_len == 0) { -+ *bex = *ex; -+ return; -+ } -+ -+ /* -+ * If new found extent is better, store it in the context -+ */ -+ if (bex->fe_len < gex->fe_len) { -+ /* if the request isn't satisfied, any found extent -+ * larger than previous best one is better */ -+ if (ex->fe_len > bex->fe_len) -+ *bex = *ex; -+ } else if (ex->fe_len > gex->fe_len) { -+ /* if the request is satisfied, then we try to find -+ * an extent that still satisfy the request, but is -+ * smaller than previous one */ -+ *bex = *ex; -+ } -+ -+ /* -+ * Let's scan at least few extents and don't pick up a first one -+ */ -+ if (bex->fe_len > gex->fe_len && ac->ac_found > ext3_mb_min_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+ -+ /* -+ * We don't want to scan for a whole year -+ */ -+ if (ac->ac_found > ext3_mb_max_to_scan) -+ ac->ac_status = AC_STATUS_BREAK; -+} -+ -+static int ext3_mb_try_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent ex = ac->ac_b_ex; -+ int group = ex.fe_group, max, err; -+ -+ J_ASSERT(ex.fe_len > 0); -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex); -+ -+ if (max > 0) { -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ int group = ac->ac_g_ex.fe_group, max, err; -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_super_block *es = sbi->s_es; -+ struct ext3_free_extent ex; -+ -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start, -+ ac->ac_g_ex.fe_len, &ex); -+ -+ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { -+ unsigned long start; -+ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) + -+ ex.fe_start + le32_to_cpu(es->s_first_data_block)); -+ if (start % sbi->s_stripe == 0) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ } else if (max >= ac->ac_g_ex.fe_len) { -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) { -+ /* Sometimes, caller may want to merge even small -+ * number of blocks to an existing extent */ -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group); -+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ ext3_unlock_group(ac->ac_sb, group); -+ -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+/* -+ * The routine scans buddy structures (not bitmap!) from given order -+ * to max order and tries to find big enough chunk to satisfy the req -+ */ -+static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_group_info *grp = e3b->bd_info; -+ void *buddy; -+ int i, k, max; -+ -+ J_ASSERT(ac->ac_2order > 0); -+ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { -+ if (grp->bb_counters[i] == 0) -+ continue; -+ -+ buddy = mb_find_buddy(e3b, i, &max); -+ if (buddy == NULL) { -+ printk(KERN_ALERT "looking for wrong order?\n"); -+ break; -+ } -+ -+ k = mb_find_next_zero_bit(buddy, max, 0); -+ J_ASSERT(k < max); -+ -+ ac->ac_found++; -+ -+ ac->ac_b_ex.fe_len = 1 << i; -+ ac->ac_b_ex.fe_start = k << i; -+ ac->ac_b_ex.fe_group = e3b->bd_group; -+ -+ ext3_mb_use_best_found(ac, e3b); -+ J_ASSERT(ac->ac_b_ex.fe_len == ac->ac_g_ex.fe_len); -+ -+ if (unlikely(ext3_mb_stats)) -+ atomic_inc(&EXT3_SB(sb)->s_bal_2orders); -+ -+ break; -+ } -+} -+ -+/* -+ * The routine scans the group and measures all found extents. -+ * In order to optimize scanning, caller must pass number of -+ * free blocks in the group, so the routine can know upper limit. -+ */ -+static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ int i, free; -+ -+ free = e3b->bd_info->bb_free; -+ J_ASSERT(free > 0); -+ -+ i = e3b->bd_info->bb_first_free; -+ -+ while (free && ac->ac_status == AC_STATUS_CONTINUE) { -+ i = mb_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i); -+ if (i >= sb->s_blocksize * 8) { -+ J_ASSERT(free == 0); -+ break; -+ } -+ -+ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex); -+ J_ASSERT(ex.fe_len > 0); -+ J_ASSERT(free >= ex.fe_len); -+ -+ ext3_mb_measure_extent(ac, &ex, e3b); -+ -+ i += ex.fe_len; -+ free -= ex.fe_len; -+ } -+} -+ -+/* -+ * This is a special case for storages like raid5 -+ * we try to find stripe-aligned chunks for stripe-size requests -+ */ -+static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ unsigned long i, max; -+ -+ J_ASSERT(sbi->s_stripe != 0); -+ -+ /* find first stripe-aligned block */ -+ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe; -+ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ -+ while (i < sb->s_blocksize * 8) { -+ if (!mb_test_bit(i, bitmap)) { -+ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex); -+ if (max >= sbi->s_stripe) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ break; -+ } -+ } -+ i += sbi->s_stripe; -+ } -+} -+ -+static int ext3_mb_good_group(struct ext3_allocation_context *ac, -+ int group, int cr) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group); -+ unsigned free, fragments, i, bits; -+ -+ J_ASSERT(cr >= 0 && cr < 4); -+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(grp)); -+ -+ free = grp->bb_free; -+ fragments = grp->bb_fragments; -+ if (free == 0) -+ return 0; -+ if (fragments == 0) -+ return 0; -+ -+ switch (cr) { -+ case 0: -+ J_ASSERT(ac->ac_2order != 0); -+ bits = ac->ac_sb->s_blocksize_bits + 1; -+ for (i = ac->ac_2order; i <= bits; i++) -+ if (grp->bb_counters[i] > 0) -+ return 1; -+ break; -+ case 1: -+ if ((free / fragments) >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 2: -+ if (free >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 3: -+ return 1; -+ default: -+ BUG(); -+ } -+ -+ return 0; -+} -+ -+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *len, int flags, int *errp) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_allocation_context ac; -+ int i, group, block, cr, err = 0; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ struct buffer_head *gdp_bh; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ -+ J_ASSERT(len != NULL); -+ J_ASSERT(*len > 0); -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk("ext3_mb_new_nblocks: nonexistent device"); -+ return 0; -+ } -+ -+ if (!test_opt(sb, MBALLOC)) { -+ static int ext3_mballoc_warning = 0; -+ if (ext3_mballoc_warning == 0) { -+ printk(KERN_ERR "EXT3-fs: multiblock request with " -+ "mballoc disabled!\n"); -+ ext3_mballoc_warning++; -+ } -+ *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, errp); -+ return err; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ -+ /* -+ * We can't allocate > group size -+ */ -+ if (*len >= EXT3_BLOCKS_PER_GROUP(sb) - 10) -+ *len = EXT3_BLOCKS_PER_GROUP(sb) - 10; -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* someone asks for non-reserved blocks */ -+ BUG_ON(*len > 1); -+ err = ext3_mb_reserve_blocks(sb, 1); -+ if (err) { -+ *errp = err; -+ return 0; -+ } -+ } -+ -+ ac.ac_buddy_page = NULL; -+ ac.ac_bitmap_page = NULL; -+ -+ /* -+ * Check quota for allocation of this blocks. -+ */ -+ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) -+ *len -= 1; -+ if (*len == 0) { -+ *errp = -EDQUOT; -+ block = 0; -+ goto out; -+ } -+ -+ /* start searching from the goal */ -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ group = (goal - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ block = ((goal - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ /* set up allocation goals */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_groups_scanned = 0; -+ ac.ac_ex_scanned = 0; -+ ac.ac_found = 0; -+ ac.ac_sb = inode->i_sb; -+ ac.ac_g_ex.fe_group = group; -+ ac.ac_g_ex.fe_start = block; -+ ac.ac_g_ex.fe_len = *len; -+ ac.ac_flags = flags; -+ ac.ac_2order = 0; -+ ac.ac_criteria = 0; -+ -+ if (*len == 1 && sbi->s_stripe) { -+ /* looks like a metadata, let's use a dirty hack for raid5 -+ * move all metadata in first groups in hope to hit cached -+ * sectors and thus avoid read-modify cycles in raid5 */ -+ ac.ac_g_ex.fe_group = group = 0; -+ } -+ -+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */ -+ i = ffs(*len); -+ if (i >= ext3_mb_order2_reqs) { -+ i--; -+ if ((*len & (~(1 << i))) == 0) -+ ac.ac_2order = i; -+ } -+ -+ /* first, try the goal */ -+ err = ext3_mb_find_by_goal(&ac, &e3b); -+ if (err) -+ goto out_err; -+ if (ac.ac_status == AC_STATUS_FOUND) -+ goto found; -+ -+ /* Let's just scan groups to find more-less suitable blocks */ -+ cr = ac.ac_2order ? 0 : 1; -+repeat: -+ for (; cr < 4 && ac.ac_status == AC_STATUS_CONTINUE; cr++) { -+ ac.ac_criteria = cr; -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { -+ if (group == EXT3_SB(sb)->s_groups_count) -+ group = 0; -+ -+ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) { -+ /* we need full data about the group -+ * to make a good selection */ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ ext3_mb_release_desc(&e3b); -+ } -+ -+ /* check is group good for our criteries */ -+ if (!ext3_mb_good_group(&ac, group, cr)) -+ continue; -+ -+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ -+ ext3_lock_group(sb, group); -+ if (!ext3_mb_good_group(&ac, group, cr)) { -+ /* someone did allocation from this group */ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ continue; -+ } -+ -+ ac.ac_groups_scanned++; -+ if (cr == 0) -+ ext3_mb_simple_scan_group(&ac, &e3b); -+ else if (cr == 1 && *len == sbi->s_stripe) -+ ext3_mb_scan_aligned(&ac, &e3b); -+ else -+ ext3_mb_complex_scan_group(&ac, &e3b); -+ -+ ext3_unlock_group(sb, group); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ if (ac.ac_status != AC_STATUS_CONTINUE) -+ break; -+ } -+ } -+ -+ if (ac.ac_b_ex.fe_len > 0 && ac.ac_status != AC_STATUS_FOUND && -+ !(ac.ac_flags & EXT3_MB_HINT_FIRST)) { -+ /* -+ * We've been searching too long. Let's try to allocate -+ * the best chunk we've found so far -+ */ -+ -+ /*if (ac.ac_found > ext3_mb_max_to_scan) -+ printk(KERN_DEBUG "EXT3-fs: too long searching at " -+ "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len, -+ ac.ac_g_ex.fe_len);*/ -+ ext3_mb_try_best_found(&ac, &e3b); -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * Someone more lucky has already allocated it. -+ * The only thing we can do is just take first -+ * found block(s) -+ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n"); -+ */ -+ ac.ac_b_ex.fe_group = 0; -+ ac.ac_b_ex.fe_start = 0; -+ ac.ac_b_ex.fe_len = 0; -+ ac.ac_status = AC_STATUS_CONTINUE; -+ ac.ac_flags |= EXT3_MB_HINT_FIRST; -+ cr = 3; -+ goto repeat; -+ } -+ } -+ -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* -+ * We aren't lucky definitely -+ */ -+ DQUOT_FREE_BLOCK(inode, *len); -+ *errp = -ENOSPC; -+ block = 0; -+#if 1 -+ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n", -+ ac.ac_status, ac.ac_flags); -+ printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n", -+ ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group, -+ ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr); -+ printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n", -+ sbi->s_blocks_reserved, ac.ac_found); -+ printk("EXT3-fs: groups: "); -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) -+ printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free); -+ printk("\n"); -+#endif -+ goto out; -+ } -+ -+found: -+ J_ASSERT(ac.ac_b_ex.fe_len > 0); -+ -+ /* good news - free block(s) have been found. now it's time -+ * to mark block(s) in good old journaled bitmap */ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ /* we made a desicion, now mark found blocks in good old -+ * bitmap to be journaled */ -+ -+ ext3_debug("using block group %d(%d)\n", -+ ac.ac_b_group.group, gdp->bg_free_blocks_count); -+ -+ bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); -+ if (!bitmap_bh) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) { -+ *errp = err; -+ goto out_err; -+ } -+ -+ gdp = ext3_get_group_desc(sb, ac.ac_b_ex.fe_group, &gdp_bh); -+ if (!gdp) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, "ext3_new_block", -+ "Allocating block in system zone - " -+ "block = %u", block); -+#ifdef AGGRESSIVE_CHECK -+ for (i = 0; i < ac.ac_b_ex.fe_len; i++) -+ J_ASSERT(!mb_test_bit(ac.ac_b_ex.fe_start + i, bitmap_bh->b_data)); -+#endif -+ mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); -+ -+ spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -+ - ac.ac_b_ex.fe_len); -+ spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); -+ -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ err = ext3_journal_dirty_metadata(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ sb->s_dirt = 1; -+ *errp = 0; -+ brelse(bitmap_bh); -+ -+ /* drop non-allocated, but dquote'd blocks */ -+ J_ASSERT(*len >= ac.ac_b_ex.fe_len); -+ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_ex.fe_len); -+ -+ *len = ac.ac_b_ex.fe_len; -+ J_ASSERT(*len > 0); -+ J_ASSERT(block != 0); -+ goto out; -+ -+out_err: -+ /* if we've already allocated something, roll it back */ -+ if (ac.ac_status == AC_STATUS_FOUND) { -+ /* FIXME: free blocks here */ -+ } -+ -+ DQUOT_FREE_BLOCK(inode, *len); -+ brelse(bitmap_bh); -+ *errp = err; -+ block = 0; -+out: -+ if (ac.ac_buddy_page) -+ page_cache_release(ac.ac_buddy_page); -+ if (ac.ac_bitmap_page) -+ page_cache_release(ac.ac_bitmap_page); -+ -+ if (!(flags & EXT3_MB_HINT_RESERVED)) { -+ /* block wasn't reserved before and we reserved it -+ * at the beginning of allocation. it doesn't matter -+ * whether we allocated anything or we failed: time -+ * to release reservation. NOTE: because I expect -+ * any multiblock request from delayed allocation -+ * path only, here is single block always */ -+ ext3_mb_release_blocks(sb, 1); -+ } -+ -+ if (unlikely(ext3_mb_stats) && ac.ac_g_ex.fe_len > 1) { -+ atomic_inc(&sbi->s_bal_reqs); -+ atomic_add(*len, &sbi->s_bal_allocated); -+ if (*len >= ac.ac_g_ex.fe_len) -+ atomic_inc(&sbi->s_bal_success); -+ atomic_add(ac.ac_found, &sbi->s_bal_ex_scanned); -+ if (ac.ac_g_ex.fe_start == ac.ac_b_ex.fe_start && -+ ac.ac_g_ex.fe_group == ac.ac_b_ex.fe_group) -+ atomic_inc(&sbi->s_bal_goals); -+ if (ac.ac_found > ext3_mb_max_to_scan) -+ atomic_inc(&sbi->s_bal_breaks); -+ } -+ -+ ext3_mb_store_history(sb, inode->i_ino, &ac); -+ -+ return block; -+} -+EXPORT_SYMBOL(ext3_mb_new_blocks); -+ -+#ifdef EXT3_MB_HISTORY -+struct ext3_mb_proc_session { -+ struct ext3_mb_history *history; -+ struct super_block *sb; -+ int start; -+ int max; -+}; -+ -+static void *ext3_mb_history_skip_empty(struct ext3_mb_proc_session *s, -+ struct ext3_mb_history *hs, -+ int first) -+{ -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (!first && hs == s->history + s->start) -+ return NULL; -+ while (hs->goal.fe_len == 0) { -+ hs++; -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (hs == s->history + s->start) -+ return NULL; -+ } -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs; -+ int l = *pos; -+ -+ if (l == 0) -+ return SEQ_START_TOKEN; -+ hs = ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ if (!hs) -+ return NULL; -+ while (--l && (hs = ext3_mb_history_skip_empty(s, ++hs, 0)) != NULL); -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs = v; -+ -+ ++*pos; -+ if (v == SEQ_START_TOKEN) -+ return ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ else -+ return ext3_mb_history_skip_empty(s, ++hs, 0); -+} -+ -+static int ext3_mb_seq_history_show(struct seq_file *seq, void *v) -+{ -+ struct ext3_mb_history *hs = v; -+ char buf[20], buf2[20]; -+ -+ if (v == SEQ_START_TOKEN) { -+ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n", -+ "pid", "inode", "goal", "result", "found", "grps", "cr", -+ "merge", "tail", "broken"); -+ return 0; -+ } -+ -+ sprintf(buf, "%u/%u/%u", hs->goal.fe_group, -+ hs->goal.fe_start, hs->goal.fe_len); -+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len); -+ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", -+ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups, -+ hs->cr, hs->merged ? "M" : "", hs->tail, -+ hs->buddy ? 1 << hs->buddy : 0); -+ return 0; -+} -+ -+static void ext3_mb_seq_history_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_history_ops = { -+ .start = ext3_mb_seq_history_start, -+ .next = ext3_mb_seq_history_next, -+ .stop = ext3_mb_seq_history_stop, -+ .show = ext3_mb_seq_history_show, -+}; -+ -+static int ext3_mb_seq_history_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -EIO; -+ size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max; -+ s->history = kmalloc(size, GFP_KERNEL); -+ if (s == NULL) { -+ kfree(s); -+ return -EIO; -+ } -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(s->history, sbi->s_mb_history, size); -+ s->max = sbi->s_mb_history_max; -+ s->start = sbi->s_mb_history_cur % s->max; -+ spin_unlock(&sbi->s_mb_history_lock); -+ -+ rc = seq_open(file, &ext3_mb_seq_history_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->history); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int ext3_mb_seq_history_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct ext3_mb_proc_session *s = seq->private; -+ kfree(s->history); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static struct file_operations ext3_mb_seq_history_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = ext3_mb_seq_history_release, -+}; -+ -+static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ -+ group = *pos + 1; -+ return (void *) group; -+} -+ -+static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ ++*pos; -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ group = *pos + 1; -+ return (void *) group;; -+} -+ -+static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v) -+{ -+ struct super_block *sb = seq->private; -+ long group = (long) v, i; -+ struct sg { -+ struct ext3_group_info info; -+ unsigned short counters[16]; -+ } sg; -+ -+ group--; -+ if (group == 0) -+ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", -+ "group", "free", "frags", "first", "2^0", "2^1", "2^2", -+ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10", -+ "2^11", "2^12", "2^13"); -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + -+ sizeof(struct ext3_group_info); -+ ext3_lock_group(sb, group); -+ memcpy(&sg, EXT3_GROUP_INFO(sb, group), i); -+ ext3_unlock_group(sb, group); -+ -+ if (EXT3_MB_GRP_NEED_INIT(&sg.info)) -+ return 0; -+ -+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, -+ sg.info.bb_fragments, sg.info.bb_first_free); -+ for (i = 0; i <= 13; i++) -+ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? -+ sg.info.bb_counters[i] : 0); -+ seq_printf(seq, " ]\n"); -+ -+ return 0; -+} -+ -+static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_groups_ops = { -+ .start = ext3_mb_seq_groups_start, -+ .next = ext3_mb_seq_groups_next, -+ .stop = ext3_mb_seq_groups_stop, -+ .show = ext3_mb_seq_groups_show, -+}; -+ -+static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ int rc; -+ -+ rc = seq_open(file, &ext3_mb_seq_groups_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = sb; -+ } -+ return rc; -+ -+} -+ -+static struct file_operations ext3_mb_seq_groups_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_groups_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static void ext3_mb_history_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ remove_proc_entry("mb_groups", sbi->s_mb_proc); -+ remove_proc_entry("mb_history", sbi->s_mb_proc); -+ remove_proc_entry(name, proc_root_ext3); -+ -+ if (sbi->s_mb_history) -+ kfree(sbi->s_mb_history); -+} -+ -+static void ext3_mb_history_init(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char name[64]; -+ int i; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name)); -+ sbi->s_mb_proc = proc_mkdir(name, proc_root_ext3); -+ if (sbi->s_mb_proc != NULL) { -+ struct proc_dir_entry *p; -+ p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_history_fops; -+ p->data = sb; -+ } -+ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_groups_fops; -+ p->data = sb; -+ } -+ } -+ -+ sbi->s_mb_history_max = 1000; -+ sbi->s_mb_history_cur = 0; -+ spin_lock_init(&sbi->s_mb_history_lock); -+ i = sbi->s_mb_history_max * sizeof(struct ext3_mb_history); -+ sbi->s_mb_history = kmalloc(i, GFP_KERNEL); -+ memset(sbi->s_mb_history, 0, i); -+ /* if we can't allocate history, then we simple won't use it */ -+} -+ -+static void -+ext3_mb_store_history(struct super_block *sb, unsigned ino, -+ struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_history h; -+ -+ if (likely(sbi->s_mb_history == NULL)) -+ return; -+ -+ h.pid = current->pid; -+ h.ino = ino; -+ h.goal = ac->ac_g_ex; -+ h.result = ac->ac_b_ex; -+ h.found = ac->ac_found; -+ h.cr = ac->ac_criteria; -+ h.groups = ac->ac_groups_scanned; -+ h.tail = ac->ac_tail; -+ h.buddy = ac->ac_buddy; -+ h.merged = 0; -+ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && -+ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) -+ h.merged = 1; -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); -+ if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) -+ sbi->s_mb_history_cur = 0; -+ spin_unlock(&sbi->s_mb_history_lock); -+} -+ -+#else -+#define ext3_mb_history_release(sb) -+#define ext3_mb_history_init(sb) -+#endif -+ -+int ext3_mb_init_backend(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, j, len, metalen; -+ int num_meta_group_infos = -+ (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ struct ext3_group_info **meta_group_info; -+ -+ /* An 8TB filesystem with 64-bit pointers requires a 4096 byte -+ * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. -+ * So a two level scheme suffices for now. */ -+ sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * -+ num_meta_group_infos, GFP_KERNEL); -+ if (sbi->s_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n"); -+ return -ENOMEM; -+ } -+ sbi->s_buddy_cache = new_inode(sb); -+ if (sbi->s_buddy_cache == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't get new inode\n"); -+ goto err_freesgi; -+ } -+ -+ metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) { -+ if ((i + 1) == num_meta_group_infos) -+ metalen = sizeof(*meta_group_info) * -+ (sbi->s_groups_count - -+ (i << EXT3_DESC_PER_BLOCK_BITS(sb))); -+ meta_group_info = kmalloc(metalen, GFP_KERNEL); -+ if (meta_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate mem for a " -+ "buddy group\n"); -+ goto err_freemeta; -+ } -+ sbi->s_group_info[i] = meta_group_info; -+ } -+ -+ /* -+ * calculate needed size. if change bb_counters size, -+ * don't forget about ext3_mb_generate_buddy() -+ */ -+ len = sizeof(struct ext3_group_info); -+ len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ struct ext3_group_desc * desc; -+ -+ meta_group_info = -+ sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)]; -+ j = i & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ -+ meta_group_info[j] = kmalloc(len, GFP_KERNEL); -+ if (meta_group_info[j] == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n"); -+ i--; -+ goto err_freebuddy; -+ } -+ desc = ext3_get_group_desc(sb, i, NULL); -+ if (desc == NULL) { -+ printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); -+ goto err_freebuddy; -+ } -+ memset(meta_group_info[j], 0, len); -+ set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, -+ &meta_group_info[j]->bb_state); -+ meta_group_info[j]->bb_free = -+ le16_to_cpu(desc->bg_free_blocks_count); -+ } -+ -+ return 0; -+ -+err_freebuddy: -+ while (i >= 0) { -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ i--; -+ } -+ i = num_meta_group_infos; -+err_freemeta: -+ while (--i >= 0) -+ kfree(sbi->s_group_info[i]); -+ iput(sbi->s_buddy_cache); -+err_freesgi: -+ kfree(sbi->s_group_info); -+ return -ENOMEM; -+} -+ -+int ext3_mb_init(struct super_block *sb, int needs_recovery) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *root = sb->s_root->d_inode; -+ unsigned i, offset, max; -+ struct dentry *dentry; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); -+ -+ sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_offsets == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ return -ENOMEM; -+ } -+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_maxs == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ -+ /* order 0 is regular bitmap */ -+ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; -+ sbi->s_mb_offsets[0] = 0; -+ -+ i = 1; -+ offset = 0; -+ max = sb->s_blocksize << 2; -+ do { -+ sbi->s_mb_offsets[i] = offset; -+ sbi->s_mb_maxs[i] = max; -+ offset += 1 << (sb->s_blocksize_bits - i); -+ max = max >> 1; -+ i++; -+ } while (i <= sb->s_blocksize_bits + 1); -+ -+ /* init file for buddy data */ -+ if ((i = ext3_mb_init_backend(sb))) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return i; -+ } -+ -+ spin_lock_init(&sbi->s_reserve_lock); -+ spin_lock_init(&sbi->s_md_lock); -+ INIT_LIST_HEAD(&sbi->s_active_transaction); -+ INIT_LIST_HEAD(&sbi->s_closed_transaction); -+ INIT_LIST_HEAD(&sbi->s_committed_transaction); -+ spin_lock_init(&sbi->s_bal_lock); -+ -+ /* remove old on-disk buddy file */ -+ down(&root->i_sem); -+ dentry = lookup_one_len(".buddy", sb->s_root, strlen(".buddy")); -+ if (dentry->d_inode != NULL) { -+ i = vfs_unlink(root, dentry); -+ if (i != 0) -+ printk("EXT3-fs: can't remove .buddy file: %d\n", i); -+ } -+ dput(dentry); -+ up(&root->i_sem); -+ -+ ext3_mb_history_init(sb); -+ -+ printk("EXT3-fs: mballoc enabled\n"); -+ return 0; -+} -+ -+int ext3_mb_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, num_meta_group_infos; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* release freed, non-committed blocks */ -+ spin_lock(&sbi->s_md_lock); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_committed_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ ext3_mb_free_committed_blocks(sb); -+ -+ if (sbi->s_group_info) { -+ for (i = 0; i < sbi->s_groups_count; i++) -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ num_meta_group_infos = (sbi->s_groups_count + -+ EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) -+ kfree(sbi->s_group_info[i]); -+ kfree(sbi->s_group_info); -+ } -+ if (sbi->s_mb_offsets) -+ kfree(sbi->s_mb_offsets); -+ if (sbi->s_mb_maxs) -+ kfree(sbi->s_mb_maxs); -+ if (sbi->s_buddy_cache) -+ iput(sbi->s_buddy_cache); -+ if (sbi->s_blocks_reserved) -+ printk("ext3-fs: %ld blocks being reserved at umount!\n", -+ sbi->s_blocks_reserved); -+ if (ext3_mb_stats) { -+ printk("EXT3-fs: mballoc: %u blocks %u reqs (%u success)\n", -+ atomic_read(&sbi->s_bal_allocated), -+ atomic_read(&sbi->s_bal_reqs), -+ atomic_read(&sbi->s_bal_success)); -+ printk("EXT3-fs: mballoc: %u extents scanned, %u goal hits, " -+ "%u 2^N hits, %u breaks\n", -+ atomic_read(&sbi->s_bal_ex_scanned), -+ atomic_read(&sbi->s_bal_goals), -+ atomic_read(&sbi->s_bal_2orders), -+ atomic_read(&sbi->s_bal_breaks)); -+ printk("EXT3-fs: mballoc: %lu generated and it took %Lu\n", -+ sbi->s_mb_buddies_generated++, -+ sbi->s_mb_generation_time); -+ } -+ -+ ext3_mb_history_release(sb); -+ -+ return 0; -+} -+ -+void ext3_mb_free_committed_blocks(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int err, i, count = 0, count2 = 0; -+ struct ext3_free_metadata *md; -+ struct ext3_buddy e3b; -+ -+ if (list_empty(&sbi->s_committed_transaction)) -+ return; -+ -+ /* there is committed blocks to be freed yet */ -+ do { -+ /* get next array of blocks */ -+ md = NULL; -+ spin_lock(&sbi->s_md_lock); -+ if (!list_empty(&sbi->s_committed_transaction)) { -+ md = list_entry(sbi->s_committed_transaction.next, -+ struct ext3_free_metadata, list); -+ list_del(&md->list); -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ if (md == NULL) -+ break; -+ -+ mb_debug("gonna free %u blocks in group %u (0x%p):", -+ md->num, md->group, md); -+ -+ err = ext3_mb_load_buddy(sb, md->group, &e3b); -+ /* we expect to find existing buddy because it's pinned */ -+ BUG_ON(err != 0); -+ -+ /* there are blocks to put in buddy to make them really free */ -+ count += md->num; -+ count2++; -+ ext3_lock_group(sb, md->group); -+ for (i = 0; i < md->num; i++) { -+ mb_debug(" %u", md->blocks[i]); -+ mb_free_blocks(&e3b, md->blocks[i], 1); -+ } -+ mb_debug("\n"); -+ ext3_unlock_group(sb, md->group); -+ -+ /* balance refcounts from ext3_mb_free_metadata() */ -+ page_cache_release(e3b.bd_buddy_page); -+ page_cache_release(e3b.bd_bitmap_page); -+ -+ kfree(md); -+ ext3_mb_release_desc(&e3b); -+ -+ } while (md); -+ mb_debug("freed %u blocks in %u structures\n", count, count2); -+} -+ -+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (sbi->s_last_transaction == handle->h_transaction->t_tid) -+ return; -+ -+ /* new transaction! time to close last one and free blocks for -+ * committed transaction. we know that only transaction can be -+ * active, so previos transaction can be being logged and we -+ * know that transaction before previous is known to be already -+ * logged. this means that now we may free blocks freed in all -+ * transactions before previous one. hope I'm clear enough ... */ -+ -+ spin_lock(&sbi->s_md_lock); -+ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { -+ mb_debug("new transaction %lu, old %lu\n", -+ (unsigned long) handle->h_transaction->t_tid, -+ (unsigned long) sbi->s_last_transaction); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_closed_transaction); -+ sbi->s_last_transaction = handle->h_transaction->t_tid; -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ ext3_mb_free_committed_blocks(sb); -+} -+ -+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, -+ int group, int block, int count) -+{ -+ struct ext3_group_info *db = e3b->bd_info; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_free_metadata *md; -+ int i; -+ -+ J_ASSERT(e3b->bd_bitmap_page != NULL); -+ J_ASSERT(e3b->bd_buddy_page != NULL); -+ -+ ext3_lock_group(sb, group); -+ for (i = 0; i < count; i++) { -+ md = db->bb_md_cur; -+ if (md && db->bb_tid != handle->h_transaction->t_tid) { -+ db->bb_md_cur = NULL; -+ md = NULL; -+ } -+ -+ if (md == NULL) { -+ ext3_unlock_group(sb, group); -+ md = kmalloc(sizeof(*md), GFP_KERNEL); -+ if (md == NULL) -+ return -ENOMEM; -+ md->num = 0; -+ md->group = group; -+ -+ ext3_lock_group(sb, group); -+ if (db->bb_md_cur == NULL) { -+ spin_lock(&sbi->s_md_lock); -+ list_add(&md->list, &sbi->s_active_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ /* protect buddy cache from being freed, -+ * otherwise we'll refresh it from -+ * on-disk bitmap and lose not-yet-available -+ * blocks */ -+ page_cache_get(e3b->bd_buddy_page); -+ page_cache_get(e3b->bd_bitmap_page); -+ db->bb_md_cur = md; -+ db->bb_tid = handle->h_transaction->t_tid; -+ mb_debug("new md 0x%p for group %u\n", -+ md, md->group); -+ } else { -+ kfree(md); -+ md = db->bb_md_cur; -+ } -+ } -+ -+ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); -+ md->blocks[md->num] = block + i; -+ md->num++; -+ if (md->num == EXT3_BB_MAX_BLOCKS) { -+ /* no more space, put full container on a sb's list */ -+ db->bb_md_cur = NULL; -+ } -+ } -+ ext3_unlock_group(sb, group); -+ return 0; -+} -+ -+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, -+ unsigned long block, unsigned long count, -+ int metadata, int *freed) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ unsigned long bit, overflow; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ int err = 0, ret; -+ -+ *freed = 0; -+ sb = inode->i_sb; -+ if (!sb) { -+ printk ("ext3_free_blocks: nonexistent device"); -+ return; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks not in datazone - " -+ "block = %lu, count = %lu", block, count); -+ goto error_return; -+ } -+ -+ ext3_debug("freeing block %lu\n", block); -+ -+do_more: -+ overflow = 0; -+ block_group = (block - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ bit = (block - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); -+ if (!gdp) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks in system zones - " -+ "Block = %lu, count = %lu", -+ block, count); -+ -+ BUFFER_TRACE(bitmap_bh, "getting write access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ err = ext3_mb_load_buddy(sb, block_group, &e3b); -+ if (err) -+ goto error_return; -+ -+#ifdef AGGRESSIVE_CHECK -+ { -+ int i; -+ for (i = 0; i < count; i++) -+ J_ASSERT(mb_test_bit(bit + i, bitmap_bh->b_data)); -+ } -+#endif -+ mb_clear_bits(bitmap_bh->b_data, bit, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ if (metadata) { -+ /* blocks being freed are metadata. these blocks shouldn't -+ * be used until this transaction is committed */ -+ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); -+ } else { -+ ext3_lock_group(sb, block_group); -+ mb_free_blocks(&e3b, bit, count); -+ ext3_unlock_group(sb, block_group); -+ } -+ -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ *freed = count; -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3_std_error(sb, err); -+ return; -+} -+ -+int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int free, ret = -ENOSPC; -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ if (blocks <= free - sbi->s_blocks_reserved) { -+ sbi->s_blocks_reserved += blocks; -+ ret = 0; -+ } -+ spin_unlock(&sbi->s_reserve_lock); -+ return ret; -+} -+ -+void ext3_mb_release_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ sbi->s_blocks_reserved -= blocks; -+ WARN_ON(sbi->s_blocks_reserved < 0); -+ if (sbi->s_blocks_reserved < 0) -+ sbi->s_blocks_reserved = 0; -+ spin_unlock(&sbi->s_reserve_lock); -+} -+ -+int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *errp) -+{ -+ int ret, len; -+ -+ if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, errp); -+ goto out; -+ } -+ len = 1; -+ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); -+out: -+ return ret; -+} -+ -+ -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) -+{ -+ struct super_block *sb; -+ int freed; -+ -+ sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_sb(handle, sb, block, count, &freed); -+ else -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); -+ return; -+} -+ -+#define EXT3_ROOT "ext3" -+#define EXT3_MB_STATS_NAME "mb_stats" -+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan" -+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan" -+#define EXT3_MB_ORDER2_REQ "mb_order2_req" -+ -+static int ext3_mb_stats_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_stats); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_stats_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_STATS_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ ext3_mb_stats = (simple_strtol(str, NULL, 0) != 0); -+ return count; -+} -+ -+static int ext3_mb_max_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_max_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_max_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_max_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_min_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_min_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_min_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_order2_req_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_order2_req_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ ext3_mb_order2_reqs = value; -+ -+ return count; -+} -+ -+int __init init_ext3_proc(void) -+{ -+ struct proc_dir_entry *proc_ext3_mb_stats; -+ struct proc_dir_entry *proc_ext3_mb_max_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_min_to_scan; -+ struct proc_dir_entry *proc_ext3_mb_order2_req; -+ -+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs); -+ if (proc_root_ext3 == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT); -+ return -EIO; -+ } -+ -+ /* Initialize EXT3_MB_STATS_NAME */ -+ proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_stats == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_STATS_NAME); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_stats->data = NULL; -+ proc_ext3_mb_stats->read_proc = ext3_mb_stats_read; -+ proc_ext3_mb_stats->write_proc = ext3_mb_stats_write; -+ -+ /* Initialize EXT3_MAX_TO_SCAN_NAME */ -+ proc_ext3_mb_max_to_scan = create_proc_entry( -+ EXT3_MB_MAX_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_max_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MAX_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_max_to_scan->data = NULL; -+ proc_ext3_mb_max_to_scan->read_proc = ext3_mb_max_to_scan_read; -+ proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write; -+ -+ /* Initialize EXT3_MIN_TO_SCAN_NAME */ -+ proc_ext3_mb_min_to_scan = create_proc_entry( -+ EXT3_MB_MIN_TO_SCAN_NAME, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_min_to_scan == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_MIN_TO_SCAN_NAME); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_min_to_scan->data = NULL; -+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read; -+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write; -+ -+ /* Initialize EXT3_ORDER2_REQ */ -+ proc_ext3_mb_order2_req = create_proc_entry( -+ EXT3_MB_ORDER2_REQ, -+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3); -+ if (proc_ext3_mb_order2_req == NULL) { -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", -+ EXT3_MB_ORDER2_REQ); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+ return -EIO; -+ } -+ -+ proc_ext3_mb_order2_req->data = NULL; -+ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read; -+ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write; -+ -+ return 0; -+} -+ -+void exit_ext3_proc(void) -+{ -+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3); -+ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+} -Index: linux-2.6.9-full/fs/ext3/Makefile -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/Makefile 2006-06-01 14:58:46.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/Makefile 2006-10-24 12:54:31.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-core.patch b/lustre/kernel_patches/patches/ext3-mballoc3-core.patch new file mode 100644 index 0000000..a6033d1 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-mballoc3-core.patch @@ -0,0 +1,4528 @@ +Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2006-05-18 23:57:04.000000000 +0400 ++++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2007-03-28 15:42:16.000000000 +0400 +@@ -81,6 +81,61 @@ struct ext3_sb_info { + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ + #endif ++ ++ /* for buddy allocator */ ++ struct ext3_group_info ***s_group_info; ++ struct inode *s_buddy_cache; ++ long s_blocks_reserved; ++ spinlock_t s_reserve_lock; ++ struct list_head s_active_transaction; ++ struct list_head s_closed_transaction; ++ struct list_head s_committed_transaction; ++ spinlock_t s_md_lock; ++ tid_t s_last_transaction; ++ unsigned short *s_mb_offsets, *s_mb_maxs; ++ ++ /* tunables */ ++ unsigned long s_mb_factor; ++ unsigned long s_stripe; ++ unsigned long s_mb_stream_request; ++ unsigned long s_mb_max_to_scan; ++ unsigned long s_mb_min_to_scan; ++ unsigned long s_mb_max_groups_to_scan; ++ unsigned long s_mb_stats; ++ unsigned long s_mb_order2_reqs; ++ ++ /* history to debug policy */ ++ struct ext3_mb_history *s_mb_history; ++ int s_mb_history_cur; ++ int s_mb_history_max; ++ int s_mb_history_num; ++ struct proc_dir_entry *s_mb_proc; ++ spinlock_t s_mb_history_lock; ++ int s_mb_history_filter; ++ ++ /* stats for buddy allocator */ ++ spinlock_t s_mb_pa_lock; ++ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ ++ atomic_t s_bal_success; /* we found long enough chunks */ ++ atomic_t s_bal_allocated; /* in blocks */ ++ atomic_t s_bal_ex_scanned; /* total extents scanned */ ++ atomic_t s_bal_goals; /* goal hits */ ++ atomic_t s_bal_breaks; /* too long searches */ ++ atomic_t s_bal_2orders; /* 2^order hits */ ++ spinlock_t s_bal_lock; ++ unsigned long s_mb_buddies_generated; ++ unsigned long long s_mb_generation_time; ++ atomic_t s_mb_lost_chunks; ++ atomic_t s_mb_preallocated; ++ atomic_t s_mb_discarded; ++ ++ /* locality groups */ ++ struct ext3_locality_group *s_locality_groups; ++ + }; + ++#define EXT3_GROUP_INFO(sb, group) \ ++ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ ++ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] ++ + #endif /* _LINUX_EXT3_FS_SB */ +Index: linux-2.6.9-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2007-03-28 01:29:39.000000000 +0400 ++++ linux-2.6.9-full/include/linux/ext3_fs.h 2007-03-28 15:45:07.000000000 +0400 +@@ -57,6 +57,30 @@ struct statfs; + #define ext3_debug(f, a...) do {} while (0) + #endif + ++#define EXT3_MULTIBLOCK_ALLOCATOR 1 ++ ++#define EXT3_MB_HINT_MERGE 1 /* prefer goal again. length */ ++#define EXT3_MB_HINT_RESERVED 2 /* blocks already reserved */ ++#define EXT3_MB_HINT_METADATA 4 /* metadata is being allocated */ ++#define EXT3_MB_HINT_FIRST 8 /* first blocks in the file */ ++#define EXT3_MB_HINT_BEST 16 /* search for the best chunk */ ++#define EXT3_MB_HINT_DATA 32 /* data is being allocated */ ++#define EXT3_MB_HINT_NOPREALLOC 64 /* don't preallocate (for tails) */ ++#define EXT3_MB_HINT_GROUP_ALLOC 128 /* allocate for locality group */ ++#define EXT3_MB_HINT_GOAL_ONLY 256 /* allocate goal blocks or none */ ++ ++struct ext3_allocation_request { ++ struct inode *inode; /* target inode for block we're allocating */ ++ unsigned long logical; /* logical block in target inode */ ++ unsigned long goal; /* phys. target (a hint) */ ++ unsigned long lleft; /* the closest logical allocated block to the left */ ++ unsigned long pleft; /* phys. block for ^^^ */ ++ unsigned long lright; /* the closest logical allocated block to the right */ ++ unsigned long pright; /* phys. block for ^^^ */ ++ unsigned long len; /* how many blocks we want to allocate */ ++ unsigned long flags; /* flags. see above EXT3_MB_HINT_* */ ++}; ++ + /* + * Special inodes numbers + */ +@@ -404,6 +413,14 @@ + #define ext3_find_first_zero_bit ext2_find_first_zero_bit + #define ext3_find_next_zero_bit ext2_find_next_zero_bit + ++#ifndef ext2_find_next_le_bit ++#ifdef __LITTLE_ENDIAN ++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) ++#else ++#error "mballoc needs a patch for big-endian systems - CFS bug 10634" ++#endif /* __LITTLE_ENDIAN */ ++#endif /* !ext2_find_next_le_bit */ ++ + /* + * Maximal mount counts between two filesystem checks + */ +@@ -763,6 +787,20 @@ extern unsigned long ext3_count_dirs (st + extern void ext3_check_inodes_bitmap (struct super_block *); + extern unsigned long ext3_count_free (struct buffer_head *, unsigned); + ++/* mballoc.c */ ++extern long ext3_mb_stats; ++extern long ext3_mb_max_to_scan; ++extern int ext3_mb_init(struct super_block *, int); ++extern int ext3_mb_release(struct super_block *); ++extern unsigned long ext3_mb_new_blocks(handle_t *, struct ext3_allocation_request *, int *); ++extern int ext3_mb_reserve_blocks(struct super_block *, int); ++extern void ext3_mb_release_blocks(struct super_block *, int); ++extern void ext3_mb_release_blocks(struct super_block *, int); ++extern void ext3_mb_discard_inode_preallocations(struct inode *); ++extern int __init init_ext3_proc(void); ++extern void exit_ext3_proc(void); ++extern void ext3_mb_free_blocks(handle_t *, struct inode *, unsigned long, unsigned long, int, int *); ++ + + /* inode.c */ + extern int ext3_block_truncate_page(handle_t *, struct page *, +Index: linux-2.6.9-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/super.c 2007-03-28 01:29:38.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/super.c 2007-03-28 15:42:16.000000000 +0400 +@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_mb_release(sb); + ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); +@@ -463,6 +464,8 @@ static struct inode *ext3_alloc_inode(st + ei->vfs_inode.i_version = 1; + + memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); ++ INIT_LIST_HEAD(&ei->i_prealloc_list); ++ spin_lock_init(&ei->i_prealloc_lock); + return &ei->vfs_inode; + } + +@@ -2433,7 +2436,13 @@ static struct file_system_type ext3_fs_t + + static int __init init_ext3_fs(void) + { +- int err = init_ext3_xattr(); ++ int err; ++ ++ err = init_ext3_proc(); ++ if (err) ++ return err; ++ ++ err = init_ext3_xattr(); + if (err) + return err; + err = init_inodecache(); +@@ -2455,6 +2464,7 @@ static void __exit exit_ext3_fs(void) + unregister_filesystem(&ext3_fs_type); + destroy_inodecache(); + exit_ext3_xattr(); ++ exit_ext3_proc(); + } + + int ext3_prep_san_write(struct inode *inode, long *blocks, +Index: linux-2.6.9-full/fs/ext3/mballoc.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2007-02-13 18:39:59.640066087 +0300 ++++ linux-2.6.9-full/fs/ext3/mballoc.c 2007-03-29 00:28:40.000000000 +0400 +@@ -0,0 +1,4342 @@ ++/* ++ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++ ++/* ++ * mballoc.c contains the multiblocks allocation routines ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * MUSTDO: ++ * - test ext3_ext_search_left() and ext3_ext_search_right() ++ * - search for metadata in few groups ++ * ++ * TODO v4: ++ * - normalization should take into account whether file is still open ++ * - discard preallocations if no free space left (policy?) ++ * - don't normalize tails ++ * - quota ++ * - reservation for superuser ++ * ++ * TODO v3: ++ * - bitmap read-ahead (proposed by Oleg Drokin aka green) ++ * - track min/max extents in each group for better group selection ++ * - mb_mark_used() may allocate chunk right after splitting buddy ++ * - tree of groups sorted by number of free blocks ++ * - error handling ++ */ ++ ++/* ++ * mballoc operates on the following data: ++ * - on-disk bitmap ++ * - in-core buddy (actually includes buddy and bitmap) ++ * - preallocation descriptors (PAs) ++ * ++ * there are two types of preallocations: ++ * - inode ++ * assiged to specific inode and can be used for this inode only. ++ * it describes part of inode's space preallocated to specific ++ * physical blocks. any block from that preallocated can be used ++ * independent. the descriptor just tracks number of blocks left ++ * unused. so, before taking some block from descriptor, one must ++ * make sure corresponded logical block isn't allocated yet. this ++ * also means that freeing any block within descriptor's range ++ * must discard all preallocated blocks. ++ * - locality group ++ * assigned to specific locality group which does not translate to ++ * permanent set of inodes: inode can join and leave group. space ++ * from this type of preallocation can be used for any inode. thus ++ * it's consumed from the beginning to the end. ++ * ++ * relation between them can be expressed as: ++ * in-core buddy = on-disk bitmap + preallocation descriptors ++ * ++ * this mean blocks mballoc considers used are: ++ * - allocated blocks (persistent) ++ * - preallocated blocks (non-persistent) ++ * ++ * consistency in mballoc world means that at any time a block is either ++ * free or used in ALL structures. notice: "any time" should not be read ++ * literally -- time is discrete and delimited by locks. ++ * ++ * to keep it simple, we don't use block numbers, instead we count number of ++ * blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA. ++ * ++ * all operations can be expressed as: ++ * - init buddy: buddy = on-disk + PAs ++ * - new PA: buddy += N; PA = N ++ * - use inode PA: on-disk += N; PA -= N ++ * - discard inode PA buddy -= on-disk - PA; PA = 0 ++ * - use locality group PA on-disk += N; PA -= N ++ * - discard locality group PA buddy -= PA; PA = 0 ++ * note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap ++ * is used in real operation because we can't know actual used ++ * bits from PA, only from on-disk bitmap ++ * ++ * if we follow this strict logic, then all operations above should be atomic. ++ * given some of them can block, we'd have to use something like semaphores ++ * killing performance on high-end SMP hardware. let's try to relax it using ++ * the following knowledge: ++ * 1) if buddy is referenced, it's already initialized ++ * 2) while block is used in buddy and the buddy is referenced, ++ * nobody can re-allocate that block ++ * 3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has ++ * bit set and PA claims same block, it's OK. IOW, one can set bit in ++ * on-disk bitmap if buddy has same bit set or/and PA covers corresponded ++ * block ++ * ++ * so, now we're building a concurrency table: ++ * - init buddy vs. ++ * - new PA ++ * blocks for PA are allocated in the buddy, buddy must be referenced ++ * until PA is linked to allocation group to avoid concurrent buddy init ++ * - use inode PA ++ * we need to make sure that either on-disk bitmap or PA has uptodate data ++ * given (3) we care that PA-=N operation doesn't interfere with init ++ * - discard inode PA ++ * the simplest way would be to have buddy initialized by the discard ++ * - use locality group PA ++ * again PA-=N must be serialized with init ++ * - discard locality group PA ++ * the simplest way would be to have buddy initialized by the discard ++ * - new PA vs. ++ * - use inode PA ++ * i_truncate_mutex serializes them ++ * - discard inode PA ++ * discard process must wait until PA isn't used by another process ++ * - use locality group PA ++ * some mutex should serialize them ++ * - discard locality group PA ++ * discard process must wait until PA isn't used by another process ++ * - use inode PA ++ * - use inode PA ++ * i_truncate_mutex or another mutex should serializes them ++ * - discard inode PA ++ * discard process must wait until PA isn't used by another process ++ * - use locality group PA ++ * nothing wrong here -- they're different PAs covering different blocks ++ * - discard locality group PA ++ * discard process must wait until PA isn't used by another process ++ * ++ * now we're ready to make few consequences: ++ * - PA is referenced and while it is no discard is possible ++ * - PA is referenced until block isn't marked in on-disk bitmap ++ * - PA changes only after on-disk bitmap ++ * - discard must not compete with init. either init is done before ++ * any discard or they're serialized somehow ++ * - buddy init as sum of on-disk bitmap and PAs is done atomically ++ * ++ * a special case when we've used PA to emptiness. no need to modify buddy ++ * in this case, but we should care about concurrent init ++ * ++ */ ++ ++ /* ++ * Logic in few words: ++ * ++ * - allocation: ++ * load group ++ * find blocks ++ * mark bits in on-disk bitmap ++ * release group ++ * ++ * - use preallocation: ++ * find proper PA (per-inode or group) ++ * load group ++ * mark bits in on-disk bitmap ++ * release group ++ * release PA ++ * ++ * - free: ++ * load group ++ * mark bits in on-disk bitmap ++ * release group ++ * ++ * - discard preallocations in group: ++ * mark PAs deleted ++ * move them onto local list ++ * load on-disk bitmap ++ * load group ++ * remove PA from object (inode or locality group) ++ * mark free blocks in-core ++ * ++ * - discard inode's preallocations: ++ */ ++ ++/* ++ * Locking rules ++ * ++ * Locks: ++ * - bitlock on a group (group) ++ * - object (inode/locality) (object) ++ * - per-pa lock (pa) ++ * ++ * Paths: ++ * - new pa ++ * object ++ * group ++ * ++ * - find and use pa: ++ * pa ++ * ++ * - release consumed pa: ++ * pa ++ * group ++ * object ++ * ++ * - generate in-core bitmap: ++ * group ++ * pa ++ * ++ * - discard all for given object (inode, locality group): ++ * object ++ * pa ++ * group ++ * ++ * - discard all for given group: ++ * group ++ * pa ++ * group ++ * object ++ * ++ */ ++ ++/* ++ * with AGGRESSIVE_CHECK allocator runs consistency checks over ++ * structures. these checks slow things down a lot ++ */ ++#define AGGRESSIVE_CHECK__ ++ ++/* ++ * with DOUBLE_CHECK defined mballoc creates persistent in-core ++ * bitmaps, maintains and uses them to check for double allocations ++ */ ++#define DOUBLE_CHECK__ ++ ++/* ++ */ ++#define MB_DEBUG__ ++#ifdef MB_DEBUG ++#define mb_debug(fmt,a...) printk(fmt, ##a) ++#else ++#define mb_debug(fmt,a...) ++#endif ++ ++/* ++ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory ++ * and you can monitor it in /proc/fs/ext3//mb_history ++ */ ++#define EXT3_MB_HISTORY ++#define EXT3_MB_HISTORY_ALLOC 1 /* allocation */ ++#define EXT3_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ ++#define EXT3_MB_HISTORY_DISCARD 4 /* preallocation discarded */ ++#define EXT3_MB_HISTORY_FREE 8 /* free */ ++ ++#define EXT3_MB_HISTORY_DEFAULT (EXT3_MB_HISTORY_ALLOC | \ ++ EXT3_MB_HISTORY_PREALLOC | \ ++ EXT3_MB_HISTORY_DISCARD | \ ++ EXT3_MB_HISTORY_FREE) ++ ++/* ++ * How long mballoc can look for a best extent (in found extents) ++ */ ++#define MB_DEFAULT_MAX_TO_SCAN 200 ++ ++/* ++ * How long mballoc must look for a best extent ++ */ ++#define MB_DEFAULT_MIN_TO_SCAN 10 ++ ++/* ++ * How many groups mballoc will scan looking for the best chunk ++ */ ++#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 ++ ++/* ++ * with 'ext3_mb_stats' allocator will collect stats that will be ++ * shown at umount. The collecting costs though! ++ */ ++#define MB_DEFAULT_STATS 1 ++ ++/* ++ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served ++ * by the stream allocator, which purpose is to pack requests ++ * as close each to other as possible to produce smooth I/O traffic ++ */ ++#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ ++ ++/* ++ * for which requests use 2^N search using buddies ++ */ ++#define MB_DEFAULT_ORDER2_REQS 8 ++ ++/* ++ * default stripe size = 1MB ++ */ ++#define MB_DEFAULT_STRIPE 256 ++ ++static kmem_cache_t *ext3_pspace_cachep = NULL; ++ ++#ifdef EXT3_BB_MAX_BLOCKS ++#undef EXT3_BB_MAX_BLOCKS ++#endif ++#define EXT3_BB_MAX_BLOCKS 30 ++ ++struct ext3_free_metadata { ++ unsigned short group; ++ unsigned short num; ++ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; ++ struct list_head list; ++}; ++ ++struct ext3_group_info { ++ unsigned long bb_state; ++ unsigned long bb_tid; ++ struct ext3_free_metadata *bb_md_cur; ++ unsigned short bb_first_free; ++ unsigned short bb_free; ++ unsigned short bb_fragments; ++ struct list_head bb_prealloc_list; ++#ifdef DOUBLE_CHECK ++ void *bb_bitmap; ++#endif ++ unsigned short bb_counters[]; ++}; ++ ++#define EXT3_GROUP_INFO_NEED_INIT_BIT 0 ++#define EXT3_GROUP_INFO_LOCKED_BIT 1 ++ ++#define EXT3_MB_GRP_NEED_INIT(grp) \ ++ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state)) ++ ++ ++struct ext3_prealloc_space { ++ struct list_head pa_inode_list; ++ struct list_head pa_group_list; ++ union { ++ struct list_head pa_tmp_list; ++ struct rcu_head pa_rcu; ++ } u; ++ spinlock_t pa_lock; ++ atomic_t pa_count; ++ unsigned pa_deleted; ++ unsigned long pa_pstart; /* phys. block */ ++ unsigned long pa_lstart; /* log. block */ ++ unsigned short pa_len; /* len of preallocated chunk */ ++ unsigned short pa_free; /* how many blocks are free */ ++ unsigned short pa_linear; /* consumed in one direction ++ * strictly, for group prealloc */ ++ spinlock_t *pa_obj_lock; ++ struct inode *pa_inode; /* hack, for history only */ ++}; ++ ++ ++struct ext3_free_extent { ++ unsigned long fe_logical; ++ unsigned long fe_start; ++ unsigned long fe_group; ++ unsigned long fe_len; ++}; ++ ++/* ++ * Locality group: ++ * we try to group all related changes together ++ * so that writeback can flush/allocate them together as well ++ */ ++struct ext3_locality_group { ++ /* for allocator */ ++ struct semaphore lg_sem; /* to serialize allocates */ ++ struct list_head lg_prealloc_list;/* list of preallocations */ ++ spinlock_t lg_prealloc_lock; ++}; ++ ++struct ext3_allocation_context { ++ struct inode *ac_inode; ++ struct super_block *ac_sb; ++ ++ /* original request */ ++ struct ext3_free_extent ac_o_ex; ++ ++ /* goal request (after normalization) */ ++ struct ext3_free_extent ac_g_ex; ++ ++ /* the best found extent */ ++ struct ext3_free_extent ac_b_ex; ++ ++ /* copy of the bext found extent taken before preallocation efforts */ ++ struct ext3_free_extent ac_f_ex; ++ ++ /* number of iterations done. we have to track to limit searching */ ++ unsigned long ac_ex_scanned; ++ __u16 ac_groups_scanned; ++ __u16 ac_found; ++ __u16 ac_tail; ++ __u16 ac_buddy; ++ __u16 ac_flags; /* allocation hints */ ++ __u8 ac_status; ++ __u8 ac_criteria; ++ __u8 ac_repeats; ++ __u8 ac_2order; /* if request is to allocate 2^N blocks and ++ * N > 0, the field stores N, otherwise 0 */ ++ __u8 ac_op; /* operation, for history only */ ++ struct page *ac_bitmap_page; ++ struct page *ac_buddy_page; ++ struct ext3_prealloc_space *ac_pa; ++ struct ext3_locality_group *ac_lg; ++}; ++ ++#define AC_STATUS_CONTINUE 1 ++#define AC_STATUS_FOUND 2 ++#define AC_STATUS_BREAK 3 ++ ++struct ext3_mb_history { ++ struct ext3_free_extent orig; /* orig allocation */ ++ struct ext3_free_extent goal; /* goal allocation */ ++ struct ext3_free_extent result; /* result allocation */ ++ unsigned pid; ++ unsigned ino; ++ __u16 found; /* how many extents have been found */ ++ __u16 groups; /* how many groups have been scanned */ ++ __u16 tail; /* what tail broke some buddy */ ++ __u16 buddy; /* buddy the tail ^^^ broke */ ++ __u16 flags; ++ __u8 cr:3; /* which phase the result extent was found at */ ++ __u8 op:4; ++ __u8 merged:1; ++}; ++ ++struct ext3_buddy { ++ struct page *bd_buddy_page; ++ void *bd_buddy; ++ struct page *bd_bitmap_page; ++ void *bd_bitmap; ++ struct ext3_group_info *bd_info; ++ struct super_block *bd_sb; ++ __u16 bd_blkbits; ++ __u16 bd_group; ++}; ++#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap) ++#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy) ++ ++#ifndef EXT3_MB_HISTORY ++#define ext3_mb_store_history(ac) ++#else ++static void ext3_mb_store_history(struct ext3_allocation_context *ac); ++#endif ++ ++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) ++ ++static struct proc_dir_entry *proc_root_ext3; ++ ++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); ++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); ++unsigned long ext3_new_blocks_old(handle_t *handle, struct inode *inode, ++ unsigned long goal, unsigned long *count, int *errp); ++void ext3_mb_release_blocks(struct super_block *, int); ++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); ++void ext3_mb_free_committed_blocks(struct super_block *); ++void ext3_mb_generate_from_pa(struct super_block *sb, void *bitmap, int group); ++void ext3_mb_free_consumed_preallocations(struct ext3_allocation_context *ac); ++void ext3_mb_return_to_preallocation(struct inode *inode, struct ext3_buddy *e3b, ++ sector_t block, int count); ++void ext3_mb_show_ac(struct ext3_allocation_context *ac); ++void ext3_mb_check_with_pa(struct ext3_buddy *e3b, int first, int count); ++void ext3_mb_put_pa(struct ext3_allocation_context *, struct super_block *, struct ext3_prealloc_space *pa); ++int ext3_mb_init_per_dev_proc(struct super_block *sb); ++int ext3_mb_destroy_per_dev_proc(struct super_block *sb); ++ ++/* ++ * Calculate the block group number and offset, given a block number ++ */ ++static void ext3_get_group_no_and_offset(struct super_block *sb, ++ unsigned long blocknr, ++ unsigned long *blockgrpp, ++ unsigned long *offsetp) ++{ ++ struct ext3_super_block *es = EXT3_SB(sb)->s_es; ++ unsigned long offset; ++ ++ blocknr = blocknr - le32_to_cpu(es->s_first_data_block); ++ offset = blocknr % EXT3_BLOCKS_PER_GROUP(sb); ++ blocknr = blocknr / EXT3_BLOCKS_PER_GROUP(sb); ++ if (offsetp) ++ *offsetp = offset; ++ if (blockgrpp) ++ *blockgrpp = blocknr; ++ ++} ++ ++static inline void ++ext3_lock_group(struct super_block *sb, int group) ++{ ++ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT, ++ &EXT3_GROUP_INFO(sb, group)->bb_state); ++} ++ ++static inline void ++ext3_unlock_group(struct super_block *sb, int group) ++{ ++ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT, ++ &EXT3_GROUP_INFO(sb, group)->bb_state); ++} ++ ++static inline int ++ext3_is_group_locked(struct super_block *sb, int group) ++{ ++ return bit_spin_is_locked(EXT3_GROUP_INFO_LOCKED_BIT, ++ &EXT3_GROUP_INFO(sb, group)->bb_state); ++} ++ ++unsigned long ext3_grp_offs_to_block(struct super_block *sb, ++ struct ext3_free_extent *fex) ++{ ++ unsigned long block; ++ ++ block = (unsigned long) fex->fe_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + fex->fe_start ++ + le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); ++ return block; ++} ++ ++#if BITS_PER_LONG == 64 ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ bit += ((unsigned long) addr & 7UL) << 3; \ ++ addr = (void *) ((unsigned long) addr & ~7UL); \ ++} ++#elif BITS_PER_LONG == 32 ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ bit += ((unsigned long) addr & 3UL) << 3; \ ++ addr = (void *) ((unsigned long) addr & ~3UL); \ ++} ++#else ++#error "how many bits you are?!" ++#endif ++ ++static inline int mb_test_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ return ext2_test_bit(bit, addr); ++} ++ ++static inline void mb_set_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ ext2_set_bit(bit, addr); ++} ++ ++static inline void mb_set_bit_atomic(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ ext2_set_bit_atomic(NULL, bit, addr); ++} ++ ++static inline void mb_clear_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ ext2_clear_bit(bit, addr); ++} ++ ++static inline void mb_clear_bit_atomic(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ ext2_clear_bit_atomic(NULL, bit, addr); ++} ++ ++static inline int mb_find_next_zero_bit(void *addr, int max, int start) ++{ ++ int fix; ++#if BITS_PER_LONG == 64 ++ fix = ((unsigned long) addr & 7UL) << 3; ++ addr = (void *) ((unsigned long) addr & ~7UL); ++#elif BITS_PER_LONG == 32 ++ fix = ((unsigned long) addr & 3UL) << 3; ++ addr = (void *) ((unsigned long) addr & ~3UL); ++#else ++#error "how many bits you are?!" ++#endif ++ max += fix; ++ start += fix; ++ return ext2_find_next_zero_bit(addr, max, start) - fix; ++} ++ ++static inline int mb_find_next_bit(void *addr, int max, int start) ++{ ++ int fix; ++#if BITS_PER_LONG == 64 ++ fix = ((unsigned long) addr & 7UL) << 3; ++ addr = (void *) ((unsigned long) addr & ~7UL); ++#elif BITS_PER_LONG == 32 ++ fix = ((unsigned long) addr & 3UL) << 3; ++ addr = (void *) ((unsigned long) addr & ~3UL); ++#else ++#error "how many bits you are?!" ++#endif ++ max += fix; ++ start += fix; ++ ++#ifdef __BIG_ENDIAN ++#else ++ return find_next_bit(addr, max, start) - fix; ++#endif ++} ++ ++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) ++{ ++ char *bb; ++ ++ BUG_ON(EXT3_MB_BITMAP(e3b) == EXT3_MB_BUDDY(e3b)); ++ BUG_ON(max == NULL); ++ ++ if (order > e3b->bd_blkbits + 1) { ++ *max = 0; ++ return NULL; ++ } ++ ++ /* at order 0 we see each particular block */ ++ *max = 1 << (e3b->bd_blkbits + 3); ++ if (order == 0) ++ return EXT3_MB_BITMAP(e3b); ++ ++ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order]; ++ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order]; ++ ++ return bb; ++} ++ ++#ifdef DOUBLE_CHECK ++void mb_free_blocks_double(struct ext3_buddy *e3b, int first, int count) ++{ ++ int i; ++ if (unlikely(e3b->bd_info->bb_bitmap == NULL)) ++ return; ++ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); ++ for (i = 0; i < count; i++) { ++ BUG_ON(!mb_test_bit(first + i, e3b->bd_info->bb_bitmap)); ++ mb_clear_bit(first + i, e3b->bd_info->bb_bitmap); ++ } ++} ++ ++void mb_mark_used_double(struct ext3_buddy *e3b, int first, int count) ++{ ++ int i; ++ if (unlikely(e3b->bd_info->bb_bitmap == NULL)) ++ return; ++ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); ++ for (i = 0; i < count; i++) { ++ BUG_ON(mb_test_bit(first + i, e3b->bd_info->bb_bitmap)); ++ mb_set_bit(first + i, e3b->bd_info->bb_bitmap); ++ } ++} ++ ++void mb_cmp_bitmaps(struct ext3_buddy *e3b, void *bitmap) ++{ ++ if (memcmp(e3b->bd_info->bb_bitmap, bitmap, e3b->bd_sb->s_blocksize)) { ++ unsigned char *b1, *b2; ++ int i; ++ b1 = (unsigned char *) e3b->bd_info->bb_bitmap; ++ b2 = (unsigned char *) bitmap; ++ for (i = 0; i < e3b->bd_sb->s_blocksize; i++) { ++ if (b1[i] != b2[i]) { ++ printk("corruption in group %u at byte %u(%u): " ++ "%x in copy != %x on disk/prealloc\n", ++ e3b->bd_group, i, i * 8, b1[i], b2[i]); ++ BUG(); ++ } ++ } ++ } ++} ++ ++#else ++#define mb_free_blocks_double(a,b,c) ++#define mb_mark_used_double(a,b,c) ++#define mb_cmp_bitmaps(a,b) ++#endif ++ ++#ifdef AGGRESSIVE_CHECK ++ ++#define MB_CHECK_ASSERT(assert) \ ++do { \ ++ if (!(assert)) { \ ++ printk (KERN_EMERG \ ++ "Assertion failure in %s() at %s:%d: \"%s\"\n", \ ++ function, file, line, # assert); \ ++ BUG(); \ ++ } \ ++} while (0) ++ ++static int __mb_check_buddy(struct ext3_buddy *e3b, char *file, ++ const char *function, int line) ++{ ++ struct super_block *sb = e3b->bd_sb; ++ int order = e3b->bd_blkbits + 1; ++ int max, max2, i, j, k, count; ++ struct ext3_group_info *grp; ++ int fragments = 0, fstart; ++ struct list_head *cur; ++ void *buddy, *buddy2; ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ { ++ static int mb_check_counter = 0; ++ if (mb_check_counter++ % 100 != 0) ++ return 0; ++ } ++ ++ while (order > 1) { ++ buddy = mb_find_buddy(e3b, order, &max); ++ MB_CHECK_ASSERT(buddy); ++ buddy2 = mb_find_buddy(e3b, order - 1, &max2); ++ MB_CHECK_ASSERT(buddy2); ++ MB_CHECK_ASSERT(buddy != buddy2); ++ MB_CHECK_ASSERT(max * 2 == max2); ++ ++ count = 0; ++ for (i = 0; i < max; i++) { ++ ++ if (mb_test_bit(i, buddy)) { ++ /* only single bit in buddy2 may be 1 */ ++ if (!mb_test_bit(i << 1, buddy2)) ++ MB_CHECK_ASSERT(mb_test_bit((i<<1)+1, buddy2)); ++ else if (!mb_test_bit((i << 1) + 1, buddy2)) ++ MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); ++ continue; ++ } ++ ++ /* both bits in buddy2 must be 0 */ ++ MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); ++ MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); ++ ++ for (j = 0; j < (1 << order); j++) { ++ k = (i * (1 << order)) + j; ++ MB_CHECK_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b))); ++ } ++ count++; ++ } ++ MB_CHECK_ASSERT(e3b->bd_info->bb_counters[order] == count); ++ order--; ++ } ++ ++ fstart = -1; ++ buddy = mb_find_buddy(e3b, 0, &max); ++ for (i = 0; i < max; i++) { ++ if (!mb_test_bit(i, buddy)) { ++ MB_CHECK_ASSERT(i >= e3b->bd_info->bb_first_free); ++ if (fstart == -1) { ++ fragments++; ++ fstart = i; ++ } ++ continue; ++ } ++ fstart = -1; ++ /* check used bits only */ ++ for (j = 0; j < e3b->bd_blkbits + 1; j++) { ++ buddy2 = mb_find_buddy(e3b, j, &max2); ++ k = i >> j; ++ MB_CHECK_ASSERT(k < max2); ++ MB_CHECK_ASSERT(mb_test_bit(k, buddy2)); ++ } ++ } ++ MB_CHECK_ASSERT(!EXT3_MB_GRP_NEED_INIT(e3b->bd_info)); ++ MB_CHECK_ASSERT(e3b->bd_info->bb_fragments == fragments); ++ ++ grp = EXT3_GROUP_INFO(sb, e3b->bd_group); ++ buddy = mb_find_buddy(e3b, 0, &max); ++ list_for_each(cur, &grp->bb_prealloc_list) { ++ unsigned long groupnr; ++ struct ext3_prealloc_space *pa; ++ pa = list_entry(cur, struct ext3_prealloc_space, group_list); ++ ext3_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); ++ MB_CHECK_ASSERT(groupnr == e3b->bd_group); ++ for (i = 0; i < pa->len; i++) ++ MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); ++ } ++ return 0; ++} ++#undef MB_CHECK_ASSERT ++#define mb_check_buddy(e3b) __mb_check_buddy(e3b,__FILE__,__FUNCTION__,__LINE__) ++#else ++#define mb_check_buddy(e3b) ++#endif ++ ++/* find most significant bit */ ++static int inline fmsb(unsigned short word) ++{ ++ int order; ++ ++ if (word > 255) { ++ order = 7; ++ word >>= 8; ++ } else { ++ order = -1; ++ } ++ ++ do { ++ order++; ++ word >>= 1; ++ } while (word != 0); ++ ++ return order; ++} ++ ++static void inline ++ext3_mb_mark_free_simple(struct super_block *sb, void *buddy, unsigned first, ++ int len, struct ext3_group_info *grp) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ unsigned short min, max, chunk, border; ++ ++ BUG_ON(len >= EXT3_BLOCKS_PER_GROUP(sb)); ++ ++ border = 2 << sb->s_blocksize_bits; ++ ++ while (len > 0) { ++ /* find how many blocks can be covered since this position */ ++ max = ffs(first | border) - 1; ++ ++ /* find how many blocks of power 2 we need to mark */ ++ min = fmsb(len); ++ ++ if (max < min) ++ min = max; ++ chunk = 1 << min; ++ ++ /* mark multiblock chunks only */ ++ grp->bb_counters[min]++; ++ if (min > 0) ++ mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); ++ ++ len -= chunk; ++ first += chunk; ++ } ++} ++ ++static void ++ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, ++ int group) ++{ ++ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); ++ unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); ++ unsigned short i = 0, first, len; ++ unsigned free = 0, fragments = 0; ++ unsigned long long period = get_cycles(); ++ ++ /* initialize buddy from bitmap which is aggregation ++ * of on-disk bitmap and preallocations */ ++ i = mb_find_next_zero_bit(bitmap, max, 0); ++ grp->bb_first_free = i; ++ while (i < max) { ++ fragments++; ++ first = i; ++ i = ext2_find_next_le_bit(bitmap, max, i); ++ len = i - first; ++ free += len; ++ if (len > 1) ++ ext3_mb_mark_free_simple(sb, buddy, first, len, grp); ++ else ++ grp->bb_counters[0]++; ++ if (i < max) ++ i = mb_find_next_zero_bit(bitmap, max, i); ++ } ++ grp->bb_fragments = fragments; ++ ++ if (free != grp->bb_free) { ++ printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n", ++ group, free, grp->bb_free); ++ grp->bb_free = free; ++ } ++ ++ clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state); ++ ++ period = get_cycles() - period; ++ spin_lock(&EXT3_SB(sb)->s_bal_lock); ++ EXT3_SB(sb)->s_mb_buddies_generated++; ++ EXT3_SB(sb)->s_mb_generation_time += period; ++ spin_unlock(&EXT3_SB(sb)->s_bal_lock); ++} ++ ++static int ext3_mb_init_cache(struct page *page, char *incore) ++{ ++ int blocksize, blocks_per_page, groups_per_page; ++ int err = 0, i, first_group, first_block; ++ struct super_block *sb; ++ struct buffer_head *bhs; ++ struct buffer_head **bh; ++ struct inode *inode; ++ char *data, *bitmap; ++ ++ mb_debug("init page %lu\n", page->index); ++ ++ inode = page->mapping->host; ++ sb = inode->i_sb; ++ blocksize = 1 << inode->i_blkbits; ++ blocks_per_page = PAGE_CACHE_SIZE / blocksize; ++ ++ groups_per_page = blocks_per_page >> 1; ++ if (groups_per_page == 0) ++ groups_per_page = 1; ++ ++ /* allocate buffer_heads to read bitmaps */ ++ if (groups_per_page > 1) { ++ err = -ENOMEM; ++ i = sizeof(struct buffer_head *) * groups_per_page; ++ bh = kmalloc(i, GFP_NOFS); ++ if (bh == NULL) ++ goto out; ++ memset(bh, 0, i); ++ } else ++ bh = &bhs; ++ ++ first_group = page->index * blocks_per_page / 2; ++ ++ /* read all groups the page covers into the cache */ ++ for (i = 0; i < groups_per_page; i++) { ++ struct ext3_group_desc * desc; ++ ++ if (first_group + i >= EXT3_SB(sb)->s_groups_count) ++ break; ++ ++ err = -EIO; ++ desc = ext3_get_group_desc(sb, first_group + i, NULL); ++ if (desc == NULL) ++ goto out; ++ ++ err = -ENOMEM; ++ bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ if (bh[i] == NULL) ++ goto out; ++ ++ if (buffer_uptodate(bh[i])) ++ continue; ++ ++ lock_buffer(bh[i]); ++ if (buffer_uptodate(bh[i])) { ++ unlock_buffer(bh[i]); ++ continue; ++ } ++ ++ get_bh(bh[i]); ++ bh[i]->b_end_io = end_buffer_read_sync; ++ submit_bh(READ, bh[i]); ++ mb_debug("read bitmap for group %u\n", first_group + i); ++ } ++ ++ /* wait for I/O completion */ ++ for (i = 0; i < groups_per_page && bh[i]; i++) ++ wait_on_buffer(bh[i]); ++ ++ err = -EIO; ++ for (i = 0; i < groups_per_page && bh[i]; i++) ++ if (!buffer_uptodate(bh[i])) ++ goto out; ++ ++ first_block = page->index * blocks_per_page; ++ for (i = 0; i < blocks_per_page; i++) { ++ int group; ++ ++ group = (first_block + i) >> 1; ++ if (group >= EXT3_SB(sb)->s_groups_count) ++ break; ++ ++ data = page_address(page) + (i * blocksize); ++ bitmap = bh[group - first_group]->b_data; ++ ++ if ((first_block + i) & 1) { ++ /* this is block of buddy */ ++ BUG_ON(incore == NULL); ++ mb_debug("put buddy for group %u in page %lu/%x\n", ++ group, page->index, i * blocksize); ++ memset(data, 0xff, blocksize); ++ EXT3_GROUP_INFO(sb, group)->bb_fragments = 0; ++ memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0, ++ sizeof(unsigned short)*(sb->s_blocksize_bits+2)); ++ ext3_mb_generate_buddy(sb, data, incore, group); ++ incore = NULL; ++ } else { ++ /* this is block of bitmap */ ++ BUG_ON(incore != NULL); ++ mb_debug("put bitmap for group %u in page %lu/%x\n", ++ group, page->index, i * blocksize); ++ ++ /* see comments in ext3_mb_put_pa() */ ++ ext3_lock_group(sb, group); ++ memcpy(data, bitmap, blocksize); ++ ++ /* mark all preallocated blocks used in in-core bitmap */ ++ ext3_mb_generate_from_pa(sb, data, group); ++ ext3_unlock_group(sb, group); ++ ++ incore = data; ++ } ++ } ++ SetPageUptodate(page); ++ ++out: ++ if (bh) { ++ for (i = 0; i < groups_per_page && bh[i]; i++) ++ brelse(bh[i]); ++ if (bh != &bhs) ++ kfree(bh); ++ } ++ return err; ++} ++ ++static int ext3_mb_load_buddy(struct super_block *sb, int group, ++ struct ext3_buddy *e3b) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct inode *inode = sbi->s_buddy_cache; ++ int blocks_per_page, block, pnum, poff; ++ struct page *page; ++ ++ mb_debug("load group %u\n", group); ++ ++ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; ++ ++ e3b->bd_blkbits = sb->s_blocksize_bits; ++ e3b->bd_info = EXT3_GROUP_INFO(sb, group); ++ e3b->bd_sb = sb; ++ e3b->bd_group = group; ++ e3b->bd_buddy_page = NULL; ++ e3b->bd_bitmap_page = NULL; ++ ++ block = group * 2; ++ pnum = block / blocks_per_page; ++ poff = block % blocks_per_page; ++ ++ /* we could use find_or_create_page(), but it locks page ++ * what we'd like to avoid in fast path ... */ ++ page = find_get_page(inode->i_mapping, pnum); ++ if (page == NULL || !PageUptodate(page)) { ++ if (page) ++ page_cache_release(page); ++ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); ++ if (page) { ++ BUG_ON(page->mapping != inode->i_mapping); ++ if (!PageUptodate(page)) { ++ ext3_mb_init_cache(page, NULL); ++ mb_cmp_bitmaps(e3b, page_address(page) + ++ (poff * sb->s_blocksize)); ++ } ++ unlock_page(page); ++ } ++ } ++ if (page == NULL || !PageUptodate(page)) ++ goto err; ++ e3b->bd_bitmap_page = page; ++ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); ++ mark_page_accessed(page); ++ ++ block++; ++ pnum = block / blocks_per_page; ++ poff = block % blocks_per_page; ++ ++ page = find_get_page(inode->i_mapping, pnum); ++ if (page == NULL || !PageUptodate(page)) { ++ if (page) ++ page_cache_release(page); ++ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); ++ if (page) { ++ BUG_ON(page->mapping != inode->i_mapping); ++ if (!PageUptodate(page)) ++ ext3_mb_init_cache(page, e3b->bd_bitmap); ++ ++ unlock_page(page); ++ } ++ } ++ if (page == NULL || !PageUptodate(page)) ++ goto err; ++ e3b->bd_buddy_page = page; ++ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); ++ mark_page_accessed(page); ++ ++ BUG_ON(e3b->bd_bitmap_page == NULL); ++ BUG_ON(e3b->bd_buddy_page == NULL); ++ ++ return 0; ++ ++err: ++ if (e3b->bd_bitmap_page) ++ page_cache_release(e3b->bd_bitmap_page); ++ if (e3b->bd_buddy_page) ++ page_cache_release(e3b->bd_buddy_page); ++ e3b->bd_buddy = NULL; ++ e3b->bd_bitmap = NULL; ++ return -EIO; ++} ++ ++static void ext3_mb_release_desc(struct ext3_buddy *e3b) ++{ ++ if (e3b->bd_bitmap_page) ++ page_cache_release(e3b->bd_bitmap_page); ++ if (e3b->bd_buddy_page) ++ page_cache_release(e3b->bd_buddy_page); ++} ++ ++ ++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) ++{ ++ int order = 1; ++ void *bb; ++ ++ BUG_ON(EXT3_MB_BITMAP(e3b) == EXT3_MB_BUDDY(e3b)); ++ BUG_ON(block >= (1 << (e3b->bd_blkbits + 3))); ++ ++ bb = EXT3_MB_BUDDY(e3b); ++ while (order <= e3b->bd_blkbits + 1) { ++ block = block >> 1; ++ if (!mb_test_bit(block, bb)) { ++ /* this block is part of buddy of order 'order' */ ++ return order; ++ } ++ bb += 1 << (e3b->bd_blkbits - order); ++ order++; ++ } ++ return 0; ++} ++ ++static inline void mb_clear_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0; ++ cur += 32; ++ continue; ++ } ++ mb_clear_bit_atomic(cur, bm); ++ cur++; ++ } ++} ++ ++static inline void mb_set_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0xffffffff; ++ cur += 32; ++ continue; ++ } ++ mb_set_bit_atomic(cur, bm); ++ cur++; ++ } ++} ++ ++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) ++{ ++ int block = 0, max = 0, order; ++ void *buddy, *buddy2; ++ ++ BUG_ON(first + count > (e3b->bd_sb->s_blocksize << 3)); ++ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); ++ mb_check_buddy(e3b); ++ mb_free_blocks_double(e3b, first, count); ++ ++ e3b->bd_info->bb_free += count; ++ if (first < e3b->bd_info->bb_first_free) ++ e3b->bd_info->bb_first_free = first; ++ ++ /* let's maintain fragments counter */ ++ if (first != 0) ++ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b)); ++ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) ++ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b)); ++ if (block && max) ++ e3b->bd_info->bb_fragments--; ++ else if (!block && !max) ++ e3b->bd_info->bb_fragments++; ++ ++ /* let's maintain buddy itself */ ++ while (count-- > 0) { ++ block = first++; ++ order = 0; ++ ++ BUG_ON(!mb_test_bit(block, EXT3_MB_BITMAP(e3b))); ++ mb_clear_bit(block, EXT3_MB_BITMAP(e3b)); ++ e3b->bd_info->bb_counters[order]++; ++ ++ /* start of the buddy */ ++ buddy = mb_find_buddy(e3b, order, &max); ++ ++ do { ++ block &= ~1UL; ++ if (mb_test_bit(block, buddy) || ++ mb_test_bit(block + 1, buddy)) ++ break; ++ ++ /* both the buddies are free, try to coalesce them */ ++ buddy2 = mb_find_buddy(e3b, order + 1, &max); ++ ++ if (!buddy2) ++ break; ++ ++ if (order > 0) { ++ /* for special purposes, we don't set ++ * free bits in bitmap */ ++ mb_set_bit(block, buddy); ++ mb_set_bit(block + 1, buddy); ++ } ++ e3b->bd_info->bb_counters[order]--; ++ e3b->bd_info->bb_counters[order]--; ++ ++ block = block >> 1; ++ order++; ++ e3b->bd_info->bb_counters[order]++; ++ ++ mb_clear_bit(block, buddy2); ++ buddy = buddy2; ++ } while (1); ++ } ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++static int mb_find_extent(struct ext3_buddy *e3b, int order, int block, ++ int needed, struct ext3_free_extent *ex) ++{ ++ int next = block, max, ord; ++ void *buddy; ++ ++ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); ++ BUG_ON(ex == NULL); ++ ++ buddy = mb_find_buddy(e3b, order, &max); ++ BUG_ON(buddy == NULL); ++ BUG_ON(block >= max); ++ if (mb_test_bit(block, buddy)) { ++ ex->fe_len = 0; ++ ex->fe_start = 0; ++ ex->fe_group = 0; ++ return 0; ++ } ++ ++ if (likely(order == 0)) { ++ /* find actual order */ ++ order = mb_find_order_for_block(e3b, block); ++ block = block >> order; ++ } ++ ++ ex->fe_len = 1 << order; ++ ex->fe_start = block << order; ++ ex->fe_group = e3b->bd_group; ++ ++ /* calc difference from given start */ ++ next = next - ex->fe_start; ++ ex->fe_len -= next; ++ ex->fe_start += next; ++ ++ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) { ++ ++ if (block + 1 >= max) ++ break; ++ ++ next = (block + 1) * (1 << order); ++ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b))) ++ break; ++ ++ ord = mb_find_order_for_block(e3b, next); ++ ++ order = ord; ++ block = next >> order; ++ ex->fe_len += 1 << order; ++ } ++ ++ BUG_ON(ex->fe_start + ex->fe_len > (1 << (e3b->bd_blkbits + 3))); ++ return ex->fe_len; ++} ++ ++static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex) ++{ ++ int ord, mlen = 0, max = 0, cur; ++ int start = ex->fe_start; ++ int len = ex->fe_len; ++ unsigned ret = 0; ++ int len0 = len; ++ void *buddy; ++ ++ BUG_ON(start + len > (e3b->bd_sb->s_blocksize << 3)); ++ BUG_ON(e3b->bd_group != ex->fe_group); ++ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); ++ mb_check_buddy(e3b); ++ mb_mark_used_double(e3b, start, len); ++ ++ e3b->bd_info->bb_free -= len; ++ if (e3b->bd_info->bb_first_free == start) ++ e3b->bd_info->bb_first_free += len; ++ ++ /* let's maintain fragments counter */ ++ if (start != 0) ++ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b)); ++ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) ++ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b)); ++ if (mlen && max) ++ e3b->bd_info->bb_fragments++; ++ else if (!mlen && !max) ++ e3b->bd_info->bb_fragments--; ++ ++ /* let's maintain buddy itself */ ++ while (len) { ++ ord = mb_find_order_for_block(e3b, start); ++ ++ if (((start >> ord) << ord) == start && len >= (1 << ord)) { ++ /* the whole chunk may be allocated at once! */ ++ mlen = 1 << ord; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ BUG_ON((start >> ord) >= max); ++ mb_set_bit(start >> ord, buddy); ++ e3b->bd_info->bb_counters[ord]--; ++ start += mlen; ++ len -= mlen; ++ BUG_ON(len < 0); ++ continue; ++ } ++ ++ /* store for history */ ++ if (ret == 0) ++ ret = len | (ord << 16); ++ ++ /* we have to split large buddy */ ++ BUG_ON(ord <= 0); ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_set_bit(start >> ord, buddy); ++ e3b->bd_info->bb_counters[ord]--; ++ ++ ord--; ++ cur = (start >> ord) & ~1U; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_clear_bit(cur, buddy); ++ mb_clear_bit(cur + 1, buddy); ++ e3b->bd_info->bb_counters[ord]++; ++ e3b->bd_info->bb_counters[ord]++; ++ } ++ ++ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0); ++ mb_check_buddy(e3b); ++ ++ return ret; ++} ++ ++/* ++ * Must be called under group lock! ++ */ ++static void ext3_mb_use_best_found(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b) ++{ ++ unsigned long ret; ++ ++ BUG_ON(ac->ac_b_ex.fe_group != e3b->bd_group); ++ BUG_ON(ac->ac_status == AC_STATUS_FOUND); ++ ++ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); ++ ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical; ++ ret = mb_mark_used(e3b, &ac->ac_b_ex); ++ ++ /* preallocation can change ac_b_ex, thus we store actually ++ * allocated blocks for history */ ++ ac->ac_f_ex = ac->ac_b_ex; ++ ++ ac->ac_status = AC_STATUS_FOUND; ++ ac->ac_tail = ret & 0xffff; ++ ac->ac_buddy = ret >> 16; ++ ++ /* XXXXXXX: SUCH A HORRIBLE **CK */ ++ ac->ac_bitmap_page = e3b->bd_bitmap_page; ++ get_page(ac->ac_bitmap_page); ++ ac->ac_buddy_page = e3b->bd_buddy_page; ++ get_page(ac->ac_buddy_page); ++} ++ ++/* ++ * regular allocator, for general purposes allocation ++ */ ++ ++void ext3_mb_check_limits(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b, ++ int finish_group) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); ++ struct ext3_free_extent *bex = &ac->ac_b_ex; ++ struct ext3_free_extent *gex = &ac->ac_g_ex; ++ struct ext3_free_extent ex; ++ int max; ++ ++ /* ++ * We don't want to scan for a whole year ++ */ ++ if (ac->ac_found > sbi->s_mb_max_to_scan && ++ !(ac->ac_flags & EXT3_MB_HINT_FIRST)) { ++ ac->ac_status = AC_STATUS_BREAK; ++ return; ++ } ++ ++ /* ++ * Haven't found good chunk so far, let's continue ++ */ ++ if (bex->fe_len < gex->fe_len) ++ return; ++ ++ if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan) ++ && bex->fe_group == e3b->bd_group) { ++ /* recheck chunk's availability - we don't know ++ * when it was found (within this lock-unlock ++ * period or not) */ ++ max = mb_find_extent(e3b, 0, bex->fe_start, gex->fe_len, &ex); ++ if (max >= gex->fe_len) { ++ ext3_mb_use_best_found(ac, e3b); ++ return; ++ } ++ } ++} ++ ++/* ++ * The routine checks whether found extent is good enough. If it is, ++ * then the extent gets marked used and flag is set to the context ++ * to stop scanning. Otherwise, the extent is compared with the ++ * previous found extent and if new one is better, then it's stored ++ * in the context. Later, the best found extent will be used, if ++ * mballoc can't find good enough extent. ++ * ++ * FIXME: real allocation policy is to be designed yet! ++ */ ++static void ext3_mb_measure_extent(struct ext3_allocation_context *ac, ++ struct ext3_free_extent *ex, ++ struct ext3_buddy *e3b) ++{ ++ struct ext3_free_extent *bex = &ac->ac_b_ex; ++ struct ext3_free_extent *gex = &ac->ac_g_ex; ++ ++ BUG_ON(ex->fe_len <= 0); ++ BUG_ON(ex->fe_len >= (1 << ac->ac_sb->s_blocksize_bits) * 8); ++ BUG_ON(ex->fe_start >= (1 << ac->ac_sb->s_blocksize_bits) * 8); ++ BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); ++ ++ ac->ac_found++; ++ ++ /* ++ * The special case - take what you catch first ++ */ ++ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) { ++ *bex = *ex; ++ ext3_mb_use_best_found(ac, e3b); ++ return; ++ } ++ ++ /* ++ * Let's check whether the chuck is good enough ++ */ ++ if (ex->fe_len == gex->fe_len) { ++ *bex = *ex; ++ ext3_mb_use_best_found(ac, e3b); ++ return; ++ } ++ ++ /* ++ * If this is first found extent, just store it in the context ++ */ ++ if (bex->fe_len == 0) { ++ *bex = *ex; ++ return; ++ } ++ ++ /* ++ * If new found extent is better, store it in the context ++ */ ++ if (bex->fe_len < gex->fe_len) { ++ /* if the request isn't satisfied, any found extent ++ * larger than previous best one is better */ ++ if (ex->fe_len > bex->fe_len) ++ *bex = *ex; ++ } else if (ex->fe_len > gex->fe_len) { ++ /* if the request is satisfied, then we try to find ++ * an extent that still satisfy the request, but is ++ * smaller than previous one */ ++ *bex = *ex; ++ } ++ ++ ext3_mb_check_limits(ac, e3b, 0); ++} ++ ++static int ext3_mb_try_best_found(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b) ++{ ++ struct ext3_free_extent ex = ac->ac_b_ex; ++ int group = ex.fe_group, max, err; ++ ++ BUG_ON(ex.fe_len <= 0); ++ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); ++ if (err) ++ return err; ++ ++ ext3_lock_group(ac->ac_sb, group); ++ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex); ++ ++ if (max > 0) { ++ ac->ac_b_ex = ex; ++ ext3_mb_use_best_found(ac, e3b); ++ } ++ ++ ext3_unlock_group(ac->ac_sb, group); ++ ext3_mb_release_desc(e3b); ++ ++ return 0; ++} ++ ++static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b) ++{ ++ int group = ac->ac_g_ex.fe_group, max, err; ++ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); ++ struct ext3_super_block *es = sbi->s_es; ++ struct ext3_free_extent ex; ++ ++ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); ++ if (err) ++ return err; ++ ++ ext3_lock_group(ac->ac_sb, group); ++ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start, ++ ac->ac_g_ex.fe_len, &ex); ++ ++ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { ++ unsigned long start; ++ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) + ++ ex.fe_start + le32_to_cpu(es->s_first_data_block)); ++ if (start % sbi->s_stripe == 0) { ++ ac->ac_found++; ++ ac->ac_b_ex = ex; ++ ext3_mb_use_best_found(ac, e3b); ++ } ++ } else if (max >= ac->ac_g_ex.fe_len) { ++ BUG_ON(ex.fe_len <= 0); ++ BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); ++ BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); ++ ac->ac_found++; ++ ac->ac_b_ex = ex; ++ ext3_mb_use_best_found(ac, e3b); ++ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) { ++ /* Sometimes, caller may want to merge even small ++ * number of blocks to an existing extent */ ++ BUG_ON(ex.fe_len <= 0); ++ BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); ++ BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); ++ ac->ac_found++; ++ ac->ac_b_ex = ex; ++ ext3_mb_use_best_found(ac, e3b); ++ } ++ ext3_unlock_group(ac->ac_sb, group); ++ ext3_mb_release_desc(e3b); ++ ++ return 0; ++} ++ ++/* ++ * The routine scans buddy structures (not bitmap!) from given order ++ * to max order and tries to find big enough chunk to satisfy the req ++ */ ++static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b) ++{ ++ struct super_block *sb = ac->ac_sb; ++ struct ext3_group_info *grp = e3b->bd_info; ++ void *buddy; ++ int i, k, max; ++ ++ BUG_ON(ac->ac_2order <= 0); ++ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { ++ if (grp->bb_counters[i] == 0) ++ continue; ++ ++ buddy = mb_find_buddy(e3b, i, &max); ++ BUG_ON(buddy == NULL); ++ ++ k = mb_find_next_zero_bit(buddy, max, 0); ++ BUG_ON(k >= max); ++ ++ ac->ac_found++; ++ ++ ac->ac_b_ex.fe_len = 1 << i; ++ ac->ac_b_ex.fe_start = k << i; ++ ac->ac_b_ex.fe_group = e3b->bd_group; ++ ++ ext3_mb_use_best_found(ac, e3b); ++ ++ BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len); ++ ++ if (EXT3_SB(sb)->s_mb_stats) ++ atomic_inc(&EXT3_SB(sb)->s_bal_2orders); ++ ++ break; ++ } ++} ++ ++/* ++ * The routine scans the group and measures all found extents. ++ * In order to optimize scanning, caller must pass number of ++ * free blocks in the group, so the routine can know upper limit. ++ */ ++static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b) ++{ ++ struct super_block *sb = ac->ac_sb; ++ void *bitmap = EXT3_MB_BITMAP(e3b); ++ struct ext3_free_extent ex; ++ int i, free; ++ ++ free = e3b->bd_info->bb_free; ++ BUG_ON(free <= 0); ++ ++ i = e3b->bd_info->bb_first_free; ++ ++ while (free && ac->ac_status == AC_STATUS_CONTINUE) { ++ i = mb_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i); ++ if (i >= sb->s_blocksize * 8) { ++ BUG_ON(free != 0); ++ break; ++ } ++ ++ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex); ++ BUG_ON(ex.fe_len <= 0); ++ BUG_ON(free < ex.fe_len); ++ ++ ext3_mb_measure_extent(ac, &ex, e3b); ++ ++ i += ex.fe_len; ++ free -= ex.fe_len; ++ } ++ ++ ext3_mb_check_limits(ac, e3b, 1); ++} ++ ++/* ++ * This is a special case for storages like raid5 ++ * we try to find stripe-aligned chunks for stripe-size requests ++ */ ++static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b) ++{ ++ struct super_block *sb = ac->ac_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ void *bitmap = EXT3_MB_BITMAP(e3b); ++ struct ext3_free_extent ex; ++ unsigned long i, max; ++ ++ BUG_ON(sbi->s_stripe == 0); ++ ++ /* find first stripe-aligned block */ ++ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + le32_to_cpu(sbi->s_es->s_first_data_block); ++ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe; ++ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block)) ++ % EXT3_BLOCKS_PER_GROUP(sb); ++ ++ while (i < sb->s_blocksize * 8) { ++ if (!mb_test_bit(i, bitmap)) { ++ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex); ++ if (max >= sbi->s_stripe) { ++ ac->ac_found++; ++ ac->ac_b_ex = ex; ++ ext3_mb_use_best_found(ac, e3b); ++ break; ++ } ++ } ++ i += sbi->s_stripe; ++ } ++} ++ ++static int ext3_mb_good_group(struct ext3_allocation_context *ac, ++ int group, int cr) ++{ ++ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group); ++ unsigned free, fragments, i, bits; ++ ++ BUG_ON(cr < 0 || cr >= 4); ++ BUG_ON(EXT3_MB_GRP_NEED_INIT(grp)); ++ ++ free = grp->bb_free; ++ fragments = grp->bb_fragments; ++ if (free == 0) ++ return 0; ++ if (fragments == 0) ++ return 0; ++ ++ switch (cr) { ++ case 0: ++ BUG_ON(ac->ac_2order == 0); ++ bits = ac->ac_sb->s_blocksize_bits + 1; ++ for (i = ac->ac_2order; i <= bits; i++) ++ if (grp->bb_counters[i] > 0) ++ return 1; ++ break; ++ case 1: ++ if ((free / fragments) >= ac->ac_g_ex.fe_len) ++ return 1; ++ break; ++ case 2: ++ if (free >= ac->ac_g_ex.fe_len) ++ return 1; ++ break; ++ case 3: ++ return 1; ++ default: ++ BUG(); ++ } ++ ++ return 0; ++} ++ ++int ext3_mb_regular_allocator(struct ext3_allocation_context *ac) ++{ ++ int group, i, cr, err = 0; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ struct ext3_buddy e3b; ++ ++ sb = ac->ac_sb; ++ sbi = EXT3_SB(sb); ++ BUG_ON(ac->ac_status == AC_STATUS_FOUND); ++ ++ /* first, try the goal */ ++ err = ext3_mb_find_by_goal(ac, &e3b); ++ if (err || ac->ac_status == AC_STATUS_FOUND) ++ goto out; ++ ++ if (unlikely(ac->ac_flags & EXT3_MB_HINT_GOAL_ONLY)) ++ goto out; ++ ++ i = ffs(ac->ac_g_ex.fe_len); ++ ac->ac_2order = 0; ++ if (i >= sbi->s_mb_order2_reqs) { ++ i--; ++ if ((ac->ac_g_ex.fe_len & (~(1 << i))) == 0) ++ ac->ac_2order = i; ++ } ++ ++ group = ac->ac_g_ex.fe_group; ++ ++ /* Let's just scan groups to find more-less suitable blocks */ ++ cr = ac->ac_2order ? 0 : 1; ++repeat: ++ for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { ++ ac->ac_criteria = cr; ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { ++ struct ext3_group_info *grp; ++ ++ if (group == EXT3_SB(sb)->s_groups_count) ++ group = 0; ++ ++ /* quick check to skip empty groups */ ++ grp = EXT3_GROUP_INFO(ac->ac_sb, group); ++ if (grp->bb_free == 0) ++ continue; ++ ++ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) { ++ /* we need full data about the group ++ * to make a good selection */ ++ err = ext3_mb_load_buddy(sb, group, &e3b); ++ if (err) ++ goto out; ++ ext3_mb_release_desc(&e3b); ++ } ++ ++ /* check is group good for our criteries */ ++ if (!ext3_mb_good_group(ac, group, cr)) ++ continue; ++ ++ err = ext3_mb_load_buddy(sb, group, &e3b); ++ if (err) ++ goto out; ++ ++ ext3_lock_group(sb, group); ++ if (!ext3_mb_good_group(ac, group, cr)) { ++ /* someone did allocation from this group */ ++ ext3_unlock_group(sb, group); ++ ext3_mb_release_desc(&e3b); ++ continue; ++ } ++ ++ ac->ac_groups_scanned++; ++ if (cr == 0) ++ ext3_mb_simple_scan_group(ac, &e3b); ++ else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) ++ ext3_mb_scan_aligned(ac, &e3b); ++ else ++ ext3_mb_complex_scan_group(ac, &e3b); ++ ++ ext3_unlock_group(sb, group); ++ ext3_mb_release_desc(&e3b); ++ ++ if (ac->ac_status != AC_STATUS_CONTINUE) ++ break; ++ } ++ } ++ ++ if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && ++ !(ac->ac_flags & EXT3_MB_HINT_FIRST)) { ++ /* ++ * We've been searching too long. Let's try to allocate ++ * the best chunk we've found so far ++ */ ++ ++ ext3_mb_try_best_found(ac, &e3b); ++ if (ac->ac_status != AC_STATUS_FOUND) { ++ /* ++ * Someone more lucky has already allocated it. ++ * The only thing we can do is just take first ++ * found block(s) ++ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n"); ++ */ ++ ac->ac_b_ex.fe_group = 0; ++ ac->ac_b_ex.fe_start = 0; ++ ac->ac_b_ex.fe_len = 0; ++ ac->ac_status = AC_STATUS_CONTINUE; ++ ac->ac_flags |= EXT3_MB_HINT_FIRST; ++ cr = 3; ++ atomic_inc(&sbi->s_mb_lost_chunks); ++ goto repeat; ++ } ++ } ++out: ++ return err; ++} ++ ++#ifdef EXT3_MB_HISTORY ++struct ext3_mb_proc_session { ++ struct ext3_mb_history *history; ++ struct super_block *sb; ++ int start; ++ int max; ++}; ++ ++static void *ext3_mb_history_skip_empty(struct ext3_mb_proc_session *s, ++ struct ext3_mb_history *hs, ++ int first) ++{ ++ if (hs == s->history + s->max) ++ hs = s->history; ++ if (!first && hs == s->history + s->start) ++ return NULL; ++ while (hs->orig.fe_len == 0) { ++ hs++; ++ if (hs == s->history + s->max) ++ hs = s->history; ++ if (hs == s->history + s->start) ++ return NULL; ++ } ++ return hs; ++} ++ ++static void *ext3_mb_seq_history_start(struct seq_file *seq, loff_t *pos) ++{ ++ struct ext3_mb_proc_session *s = seq->private; ++ struct ext3_mb_history *hs; ++ int l = *pos; ++ ++ if (l == 0) ++ return SEQ_START_TOKEN; ++ hs = ext3_mb_history_skip_empty(s, s->history + s->start, 1); ++ if (!hs) ++ return NULL; ++ while (--l && (hs = ext3_mb_history_skip_empty(s, ++hs, 0)) != NULL); ++ return hs; ++} ++ ++static void *ext3_mb_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) ++{ ++ struct ext3_mb_proc_session *s = seq->private; ++ struct ext3_mb_history *hs = v; ++ ++ ++*pos; ++ if (v == SEQ_START_TOKEN) ++ return ext3_mb_history_skip_empty(s, s->history + s->start, 1); ++ else ++ return ext3_mb_history_skip_empty(s, ++hs, 0); ++} ++ ++static int ext3_mb_seq_history_show(struct seq_file *seq, void *v) ++{ ++ char buf[25], buf2[25], buf3[25], *fmt; ++ struct ext3_mb_history *hs = v; ++ ++ if (v == SEQ_START_TOKEN) { ++ seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " ++ "%-5s %-2s %-5s %-5s %-5s %-6s\n", ++ "pid", "inode", "original", "goal", "result","found", ++ "grps", "cr", "flags", "merge", "tail", "broken"); ++ return 0; ++ } ++ ++ if (hs->op == EXT3_MB_HISTORY_ALLOC) { ++ fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " ++ "%-5u %-5s %-5u %-6u\n"; ++ sprintf(buf2, "%lu/%lu/%lu@%lu", hs->result.fe_group, ++ hs->result.fe_start, hs->result.fe_len, ++ hs->result.fe_logical); ++ sprintf(buf, "%lu/%lu/%lu@%lu", hs->orig.fe_group, ++ hs->orig.fe_start, hs->orig.fe_len, ++ hs->orig.fe_logical); ++ sprintf(buf3, "%lu/%lu/%lu@%lu", hs->goal.fe_group, ++ hs->goal.fe_start, hs->goal.fe_len, ++ hs->goal.fe_logical); ++ seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, ++ hs->found, hs->groups, hs->cr, hs->flags, ++ hs->merged ? "M" : "", hs->tail, ++ hs->buddy ? 1 << hs->buddy : 0); ++ } else if (hs->op == EXT3_MB_HISTORY_PREALLOC) { ++ fmt = "%-5u %-8u %-23s %-23s %-23s\n"; ++ sprintf(buf2, "%lu/%lu/%lu@%lu", hs->result.fe_group, ++ hs->result.fe_start, hs->result.fe_len, ++ hs->result.fe_logical); ++ sprintf(buf, "%lu/%lu/%lu@%lu", hs->orig.fe_group, ++ hs->orig.fe_start, hs->orig.fe_len, ++ hs->orig.fe_logical); ++ seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); ++ } else if (hs->op == EXT3_MB_HISTORY_DISCARD) { ++ sprintf(buf2, "%lu/%lu/%lu", hs->result.fe_group, ++ hs->result.fe_start, hs->result.fe_len); ++ seq_printf(seq, "%-5u %-8u %-23s discard\n", ++ hs->pid, hs->ino, buf2); ++ } else if (hs->op == EXT3_MB_HISTORY_FREE) { ++ sprintf(buf2, "%lu/%lu/%lu", hs->result.fe_group, ++ hs->result.fe_start, hs->result.fe_len); ++ seq_printf(seq, "%-5u %-8u %-23s free\n", ++ hs->pid, hs->ino, buf2); ++ } ++ return 0; ++} ++ ++static void ext3_mb_seq_history_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static struct seq_operations ext3_mb_seq_history_ops = { ++ .start = ext3_mb_seq_history_start, ++ .next = ext3_mb_seq_history_next, ++ .stop = ext3_mb_seq_history_stop, ++ .show = ext3_mb_seq_history_show, ++}; ++ ++static int ext3_mb_seq_history_open(struct inode *inode, struct file *file) ++{ ++ struct super_block *sb = PDE(inode)->data; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct ext3_mb_proc_session *s; ++ int rc, size; ++ ++ s = kmalloc(sizeof(*s), GFP_KERNEL); ++ if (s == NULL) ++ return -ENOMEM; ++ s->sb = sb; ++ size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max; ++ s->history = kmalloc(size, GFP_KERNEL); ++ if (s->history == NULL) { ++ kfree(s); ++ return -ENOMEM; ++ } ++ ++ spin_lock(&sbi->s_mb_history_lock); ++ memcpy(s->history, sbi->s_mb_history, size); ++ s->max = sbi->s_mb_history_max; ++ s->start = sbi->s_mb_history_cur % s->max; ++ spin_unlock(&sbi->s_mb_history_lock); ++ ++ rc = seq_open(file, &ext3_mb_seq_history_ops); ++ if (rc == 0) { ++ struct seq_file *m = (struct seq_file *)file->private_data; ++ m->private = s; ++ } else { ++ kfree(s->history); ++ kfree(s); ++ } ++ return rc; ++ ++} ++ ++static int ext3_mb_seq_history_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = (struct seq_file *)file->private_data; ++ struct ext3_mb_proc_session *s = seq->private; ++ kfree(s->history); ++ kfree(s); ++ return seq_release(inode, file); ++} ++ ++static ssize_t ext3_mb_seq_history_write(struct file *file, ++ const char __user *buffer, ++ size_t count, loff_t *ppos) ++{ ++ struct seq_file *seq = (struct seq_file *)file->private_data; ++ struct ext3_mb_proc_session *s = seq->private; ++ struct super_block *sb = s->sb; ++ char str[32]; ++ int value; ++ ++ if (count >= sizeof(str)) { ++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", ++ "mb_history", (int)sizeof(str)); ++ return -EOVERFLOW; ++ } ++ ++ if (copy_from_user(str, buffer, count)) ++ return -EFAULT; ++ ++ value = simple_strtol(str, NULL, 0); ++ if (value < 0) ++ return -ERANGE; ++ EXT3_SB(sb)->s_mb_history_filter = value; ++ ++ return count; ++} ++ ++static struct file_operations ext3_mb_seq_history_fops = { ++ .owner = THIS_MODULE, ++ .open = ext3_mb_seq_history_open, ++ .read = seq_read, ++ .write = ext3_mb_seq_history_write, ++ .llseek = seq_lseek, ++ .release = ext3_mb_seq_history_release, ++}; ++ ++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) ++{ ++ struct super_block *sb = seq->private; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ long group; ++ ++ if (*pos < 0 || *pos >= sbi->s_groups_count) ++ return NULL; ++ ++ group = *pos + 1; ++ return (void *) group; ++} ++ ++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) ++{ ++ struct super_block *sb = seq->private; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ long group; ++ ++ ++*pos; ++ if (*pos < 0 || *pos >= sbi->s_groups_count) ++ return NULL; ++ group = *pos + 1; ++ return (void *) group;; ++} ++ ++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v) ++{ ++ struct super_block *sb = seq->private; ++ long group = (long) v; ++ int i, err; ++ struct ext3_buddy e3b; ++ struct sg { ++ struct ext3_group_info info; ++ unsigned short counters[16]; ++ } sg; ++ ++ group--; ++ if (group == 0) ++ seq_printf(seq, "#%-5s: %-5s %-5s %-5s " ++ "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s " ++ "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", ++ "group", "free", "frags", "first", ++ "2^0", "2^1", "2^2", "2^3", "2^4", "2^5","2^6", ++ "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13"); ++ ++ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + ++ sizeof(struct ext3_group_info); ++ err = ext3_mb_load_buddy(sb, group, &e3b); ++ if (err) { ++ seq_printf(seq, "#%-5lu: I/O error\n", group); ++ return 0; ++ } ++ ext3_lock_group(sb, group); ++ memcpy(&sg, EXT3_GROUP_INFO(sb, group), i); ++ ext3_unlock_group(sb, group); ++ ext3_mb_release_desc(&e3b); ++ ++ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, ++ sg.info.bb_fragments, sg.info.bb_first_free); ++ for (i = 0; i <= 13; i++) ++ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? ++ sg.info.bb_counters[i] : 0); ++ seq_printf(seq, " ]\n"); ++ ++ return 0; ++} ++ ++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static struct seq_operations ext3_mb_seq_groups_ops = { ++ .start = ext3_mb_seq_groups_start, ++ .next = ext3_mb_seq_groups_next, ++ .stop = ext3_mb_seq_groups_stop, ++ .show = ext3_mb_seq_groups_show, ++}; ++ ++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file) ++{ ++ struct super_block *sb = PDE(inode)->data; ++ int rc; ++ ++ rc = seq_open(file, &ext3_mb_seq_groups_ops); ++ if (rc == 0) { ++ struct seq_file *m = (struct seq_file *)file->private_data; ++ m->private = sb; ++ } ++ return rc; ++ ++} ++ ++static struct file_operations ext3_mb_seq_groups_fops = { ++ .owner = THIS_MODULE, ++ .open = ext3_mb_seq_groups_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static void ext3_mb_history_release(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ remove_proc_entry("mb_groups", sbi->s_mb_proc); ++ remove_proc_entry("mb_history", sbi->s_mb_proc); ++ ++ if (sbi->s_mb_history) ++ kfree(sbi->s_mb_history); ++} ++ ++static void ext3_mb_history_init(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int i; ++ ++ if (sbi->s_mb_proc != NULL) { ++ struct proc_dir_entry *p; ++ p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); ++ if (p) { ++ p->proc_fops = &ext3_mb_seq_history_fops; ++ p->data = sb; ++ } ++ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); ++ if (p) { ++ p->proc_fops = &ext3_mb_seq_groups_fops; ++ p->data = sb; ++ } ++ } ++ ++ sbi->s_mb_history_max = 1000; ++ sbi->s_mb_history_cur = 0; ++ spin_lock_init(&sbi->s_mb_history_lock); ++ i = sbi->s_mb_history_max * sizeof(struct ext3_mb_history); ++ sbi->s_mb_history = kmalloc(i, GFP_KERNEL); ++ memset(sbi->s_mb_history, 0, i); ++ /* if we can't allocate history, then we simple won't use it */ ++} ++ ++static void ++ext3_mb_store_history(struct ext3_allocation_context *ac) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); ++ struct ext3_mb_history h; ++ ++ if (likely(sbi->s_mb_history == NULL)) ++ return; ++ ++ if (!(ac->ac_op & sbi->s_mb_history_filter)) ++ return; ++ ++ h.op = ac->ac_op; ++ h.pid = current->pid; ++ h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0; ++ h.orig = ac->ac_o_ex; ++ h.result = ac->ac_b_ex; ++ h.flags = ac->ac_flags; ++ h.merged = 0; ++ if (ac->ac_op == EXT3_MB_HISTORY_ALLOC) { ++ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && ++ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) ++ h.merged = 1; ++ h.goal = ac->ac_g_ex; ++ h.result = ac->ac_f_ex; ++ } ++ ++ spin_lock(&sbi->s_mb_history_lock); ++ memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); ++ if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) ++ sbi->s_mb_history_cur = 0; ++ spin_unlock(&sbi->s_mb_history_lock); ++} ++ ++#else ++#define ext3_mb_history_release(sb) ++#define ext3_mb_history_init(sb) ++#endif ++ ++int ext3_mb_init_backend(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int i, j, len, metalen; ++ int num_meta_group_infos = ++ (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >> ++ EXT3_DESC_PER_BLOCK_BITS(sb); ++ struct ext3_group_info **meta_group_info; ++ ++ /* An 8TB filesystem with 64-bit pointers requires a 4096 byte ++ * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. ++ * So a two level scheme suffices for now. */ ++ sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * ++ num_meta_group_infos, GFP_KERNEL); ++ if (sbi->s_group_info == NULL) { ++ printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n"); ++ return -ENOMEM; ++ } ++ sbi->s_buddy_cache = new_inode(sb); ++ if (sbi->s_buddy_cache == NULL) { ++ printk(KERN_ERR "EXT3-fs: can't get new inode\n"); ++ goto err_freesgi; ++ } ++ EXT3_I(sbi->s_buddy_cache)->i_disksize = 0; ++ ++ metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb); ++ for (i = 0; i < num_meta_group_infos; i++) { ++ if ((i + 1) == num_meta_group_infos) ++ metalen = sizeof(*meta_group_info) * ++ (sbi->s_groups_count - ++ (i << EXT3_DESC_PER_BLOCK_BITS(sb))); ++ meta_group_info = kmalloc(metalen, GFP_KERNEL); ++ if (meta_group_info == NULL) { ++ printk(KERN_ERR "EXT3-fs: can't allocate mem for a " ++ "buddy group\n"); ++ goto err_freemeta; ++ } ++ sbi->s_group_info[i] = meta_group_info; ++ } ++ ++ /* ++ * calculate needed size. if change bb_counters size, ++ * don't forget about ext3_mb_generate_buddy() ++ */ ++ len = sizeof(struct ext3_group_info); ++ len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); ++ for (i = 0; i < sbi->s_groups_count; i++) { ++ struct ext3_group_desc * desc; ++ ++ meta_group_info = ++ sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)]; ++ j = i & (EXT3_DESC_PER_BLOCK(sb) - 1); ++ ++ meta_group_info[j] = kmalloc(len, GFP_KERNEL); ++ if (meta_group_info[j] == NULL) { ++ printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n"); ++ i--; ++ goto err_freebuddy; ++ } ++ desc = ext3_get_group_desc(sb, i, NULL); ++ if (desc == NULL) { ++ printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); ++ goto err_freebuddy; ++ } ++ memset(meta_group_info[j], 0, len); ++ set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, ++ &meta_group_info[j]->bb_state); ++ ++ /* initialize bb_free to be able to skip ++ * empty groups without initialization */ ++ meta_group_info[j]->bb_free = ++ le16_to_cpu(desc->bg_free_blocks_count); ++ ++ INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list); ++ ++#ifdef DOUBLE_CHECK ++ { ++ struct buffer_head *bh; ++ meta_group_info[j]->bb_bitmap = ++ kmalloc(sb->s_blocksize, GFP_KERNEL); ++ BUG_ON(meta_group_info[j]->bb_bitmap == NULL); ++ bh = read_block_bitmap(sb, i); ++ BUG_ON(bh == NULL); ++ memcpy(meta_group_info[j]->bb_bitmap, bh->b_data, ++ sb->s_blocksize); ++ brelse(bh); ++ } ++#endif ++ ++ } ++ ++ return 0; ++ ++err_freebuddy: ++ while (i >= 0) { ++ kfree(EXT3_GROUP_INFO(sb, i)); ++ i--; ++ } ++ i = num_meta_group_infos; ++err_freemeta: ++ while (--i >= 0) ++ kfree(sbi->s_group_info[i]); ++ iput(sbi->s_buddy_cache); ++err_freesgi: ++ kfree(sbi->s_group_info); ++ return -ENOMEM; ++} ++ ++int ext3_mb_init(struct super_block *sb, int needs_recovery) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ unsigned i, offset, max; ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); ++ ++ sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); ++ if (sbi->s_mb_offsets == NULL) { ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ return -ENOMEM; ++ } ++ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); ++ if (sbi->s_mb_maxs == NULL) { ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ kfree(sbi->s_mb_maxs); ++ return -ENOMEM; ++ } ++ ++ /* order 0 is regular bitmap */ ++ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; ++ sbi->s_mb_offsets[0] = 0; ++ ++ i = 1; ++ offset = 0; ++ max = sb->s_blocksize << 2; ++ do { ++ sbi->s_mb_offsets[i] = offset; ++ sbi->s_mb_maxs[i] = max; ++ offset += 1 << (sb->s_blocksize_bits - i); ++ max = max >> 1; ++ i++; ++ } while (i <= sb->s_blocksize_bits + 1); ++ ++ /* init file for buddy data */ ++ if ((i = ext3_mb_init_backend(sb))) { ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ kfree(sbi->s_mb_offsets); ++ kfree(sbi->s_mb_maxs); ++ return i; ++ } ++ ++ spin_lock_init(&sbi->s_md_lock); ++ INIT_LIST_HEAD(&sbi->s_active_transaction); ++ INIT_LIST_HEAD(&sbi->s_closed_transaction); ++ INIT_LIST_HEAD(&sbi->s_committed_transaction); ++ spin_lock_init(&sbi->s_bal_lock); ++ ++ sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; ++ sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; ++ sbi->s_mb_max_groups_to_scan = MB_DEFAULT_MAX_GROUPS_TO_SCAN; ++ sbi->s_mb_stats = MB_DEFAULT_STATS; ++ sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; ++ sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; ++ sbi->s_mb_history_filter = EXT3_MB_HISTORY_DEFAULT; ++ ++ i = sizeof(struct ext3_locality_group) * NR_CPUS; ++ sbi->s_locality_groups = kmalloc(i, GFP_NOFS); ++ if (sbi->s_locality_groups == NULL) { ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ kfree(sbi->s_mb_offsets); ++ kfree(sbi->s_mb_maxs); ++ return -ENOMEM; ++ } ++ for (i = 0; i < NR_CPUS; i++) { ++ struct ext3_locality_group *lg; ++ lg = &sbi->s_locality_groups[i]; ++ sema_init(&lg->lg_sem, 1); ++ INIT_LIST_HEAD(&lg->lg_prealloc_list); ++ spin_lock_init(&lg->lg_prealloc_lock); ++ } ++ ++ ext3_mb_init_per_dev_proc(sb); ++ ext3_mb_history_init(sb); ++ ++ printk("EXT3-fs: mballoc enabled\n"); ++ return 0; ++} ++ ++void ext3_mb_cleanup_pa(struct ext3_group_info *grp) ++{ ++ struct ext3_prealloc_space *pa; ++ struct list_head *cur, *tmp; ++ int count = 0; ++ ++ list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) { ++ pa = list_entry(cur, struct ext3_prealloc_space, pa_group_list); ++ list_del_rcu(&pa->pa_group_list); ++ count++; ++ kfree(pa); ++ } ++ if (count) ++ mb_debug("mballoc: %u PAs left\n", count); ++ ++} ++ ++int ext3_mb_release(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int i, num_meta_group_infos; ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ /* release freed, non-committed blocks */ ++ spin_lock(&sbi->s_md_lock); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_committed_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ ext3_mb_free_committed_blocks(sb); ++ ++ if (sbi->s_group_info) { ++ for (i = 0; i < sbi->s_groups_count; i++) { ++#ifdef DOUBLE_CHECK ++ if (EXT3_GROUP_INFO(sb, i)->bb_bitmap) ++ kfree(EXT3_GROUP_INFO(sb, i)->bb_bitmap); ++#endif ++ ext3_mb_cleanup_pa(EXT3_GROUP_INFO(sb, i)); ++ kfree(EXT3_GROUP_INFO(sb, i)); ++ } ++ num_meta_group_infos = (sbi->s_groups_count + ++ EXT3_DESC_PER_BLOCK(sb) - 1) >> ++ EXT3_DESC_PER_BLOCK_BITS(sb); ++ for (i = 0; i < num_meta_group_infos; i++) ++ kfree(sbi->s_group_info[i]); ++ kfree(sbi->s_group_info); ++ } ++ if (sbi->s_mb_offsets) ++ kfree(sbi->s_mb_offsets); ++ if (sbi->s_mb_maxs) ++ kfree(sbi->s_mb_maxs); ++ if (sbi->s_buddy_cache) ++ iput(sbi->s_buddy_cache); ++ if (sbi->s_mb_stats) { ++ printk("EXT3-fs: mballoc: %u blocks %u reqs (%u success)\n", ++ atomic_read(&sbi->s_bal_allocated), ++ atomic_read(&sbi->s_bal_reqs), ++ atomic_read(&sbi->s_bal_success)); ++ printk("EXT3-fs: mballoc: %u extents scanned, %u goal hits, " ++ "%u 2^N hits, %u breaks, %u lost\n", ++ atomic_read(&sbi->s_bal_ex_scanned), ++ atomic_read(&sbi->s_bal_goals), ++ atomic_read(&sbi->s_bal_2orders), ++ atomic_read(&sbi->s_bal_breaks), ++ atomic_read(&sbi->s_mb_lost_chunks)); ++ printk("EXT3-fs: mballoc: %lu generated and it took %Lu\n", ++ sbi->s_mb_buddies_generated++, ++ sbi->s_mb_generation_time); ++ printk("EXT3-fs: mballoc: %u preallocated, %u discarded\n", ++ atomic_read(&sbi->s_mb_preallocated), ++ atomic_read(&sbi->s_mb_discarded)); ++ } ++ ++ if (sbi->s_locality_groups) ++ kfree(sbi->s_locality_groups); ++ ++ ext3_mb_history_release(sb); ++ ext3_mb_destroy_per_dev_proc(sb); ++ ++ return 0; ++} ++ ++void ext3_mb_free_committed_blocks(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int err, i, count = 0, count2 = 0; ++ struct ext3_free_metadata *md; ++ struct ext3_buddy e3b; ++ ++ if (list_empty(&sbi->s_committed_transaction)) ++ return; ++ ++ /* there is committed blocks to be freed yet */ ++ do { ++ /* get next array of blocks */ ++ md = NULL; ++ spin_lock(&sbi->s_md_lock); ++ if (!list_empty(&sbi->s_committed_transaction)) { ++ md = list_entry(sbi->s_committed_transaction.next, ++ struct ext3_free_metadata, list); ++ list_del(&md->list); ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ if (md == NULL) ++ break; ++ ++ mb_debug("gonna free %u blocks in group %u (0x%p):", ++ md->num, md->group, md); ++ ++ err = ext3_mb_load_buddy(sb, md->group, &e3b); ++ /* we expect to find existing buddy because it's pinned */ ++ BUG_ON(err != 0); ++ ++ /* there are blocks to put in buddy to make them really free */ ++ count += md->num; ++ count2++; ++ ext3_lock_group(sb, md->group); ++ for (i = 0; i < md->num; i++) { ++ mb_debug(" %u", md->blocks[i]); ++ err = mb_free_blocks(&e3b, md->blocks[i], 1); ++ BUG_ON(err != 0); ++ } ++ mb_debug("\n"); ++ ext3_unlock_group(sb, md->group); ++ ++ /* balance refcounts from ext3_mb_free_metadata() */ ++ page_cache_release(e3b.bd_buddy_page); ++ page_cache_release(e3b.bd_bitmap_page); ++ ++ kfree(md); ++ ext3_mb_release_desc(&e3b); ++ ++ } while (md); ++ ++ mb_debug("freed %u blocks in %u structures\n", count, count2); ++} ++ ++#define EXT3_ROOT "ext3" ++#define EXT3_MB_STATS_NAME "stats" ++#define EXT3_MB_MAX_TO_SCAN_NAME "max_to_scan" ++#define EXT3_MB_MIN_TO_SCAN_NAME "min_to_scan" ++#define EXT3_MB_ORDER2_REQ "order2_req" ++#define EXT3_MB_STREAM_REQ "stream_req" ++ ++static int ext3_mb_stats_read(char *page, char **start, off_t off, ++ int count, int *eof, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ int len; ++ ++ *eof = 1; ++ if (off != 0) ++ return 0; ++ ++ len = sprintf(page, "%ld\n", sbi->s_mb_stats); ++ *start = page; ++ return len; ++} ++ ++static int ext3_mb_stats_write(struct file *file, const char *buffer, ++ unsigned long count, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ char str[32]; ++ ++ if (count >= sizeof(str)) { ++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", ++ EXT3_MB_STATS_NAME, (int)sizeof(str)); ++ return -EOVERFLOW; ++ } ++ ++ if (copy_from_user(str, buffer, count)) ++ return -EFAULT; ++ ++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ ++ sbi->s_mb_stats = (simple_strtol(str, NULL, 0) != 0); ++ return count; ++} ++ ++static int ext3_mb_max_to_scan_read(char *page, char **start, off_t off, ++ int count, int *eof, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ int len; ++ ++ *eof = 1; ++ if (off != 0) ++ return 0; ++ ++ len = sprintf(page, "%ld\n", sbi->s_mb_max_to_scan); ++ *start = page; ++ return len; ++} ++ ++static int ext3_mb_max_to_scan_write(struct file *file, const char *buffer, ++ unsigned long count, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ char str[32]; ++ long value; ++ ++ if (count >= sizeof(str)) { ++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", ++ EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str)); ++ return -EOVERFLOW; ++ } ++ ++ if (copy_from_user(str, buffer, count)) ++ return -EFAULT; ++ ++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ ++ value = simple_strtol(str, NULL, 0); ++ if (value <= 0) ++ return -ERANGE; ++ ++ sbi->s_mb_max_to_scan = value; ++ ++ return count; ++} ++ ++static int ext3_mb_min_to_scan_read(char *page, char **start, off_t off, ++ int count, int *eof, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ int len; ++ ++ *eof = 1; ++ if (off != 0) ++ return 0; ++ ++ len = sprintf(page, "%ld\n", sbi->s_mb_min_to_scan); ++ *start = page; ++ return len; ++} ++ ++static int ext3_mb_order2_req_write(struct file *file, const char *buffer, ++ unsigned long count, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ char str[32]; ++ long value; ++ ++ if (count >= sizeof(str)) { ++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", ++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); ++ return -EOVERFLOW; ++ } ++ ++ if (copy_from_user(str, buffer, count)) ++ return -EFAULT; ++ ++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ ++ value = simple_strtol(str, NULL, 0); ++ if (value <= 0) ++ return -ERANGE; ++ ++ sbi->s_mb_order2_reqs = value; ++ ++ return count; ++} ++ ++static int ext3_mb_order2_req_read(char *page, char **start, off_t off, ++ int count, int *eof, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ int len; ++ ++ *eof = 1; ++ if (off != 0) ++ return 0; ++ ++ len = sprintf(page, "%ld\n", sbi->s_mb_order2_reqs); ++ *start = page; ++ return len; ++} ++ ++static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer, ++ unsigned long count, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ char str[32]; ++ long value; ++ ++ if (count >= sizeof(str)) { ++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", ++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); ++ return -EOVERFLOW; ++ } ++ ++ if (copy_from_user(str, buffer, count)) ++ return -EFAULT; ++ ++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ ++ value = simple_strtol(str, NULL, 0); ++ if (value <= 0) ++ return -ERANGE; ++ ++ sbi->s_mb_min_to_scan = value; ++ ++ return count; ++} ++ ++static int ext3_mb_stream_req_read(char *page, char **start, off_t off, ++ int count, int *eof, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ int len; ++ ++ *eof = 1; ++ if (off != 0) ++ return 0; ++ ++ len = sprintf(page, "%ld\n", sbi->s_mb_stream_request); ++ *start = page; ++ return len; ++} ++ ++static int ext3_mb_stream_req_write(struct file *file, const char *buffer, ++ unsigned long count, void *data) ++{ ++ struct ext3_sb_info *sbi = data; ++ char str[32]; ++ long value; ++ ++ if (count >= sizeof(str)) { ++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", ++ EXT3_MB_STREAM_REQ, (int)sizeof(str)); ++ return -EOVERFLOW; ++ } ++ ++ if (copy_from_user(str, buffer, count)) ++ return -EFAULT; ++ ++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ ++ value = simple_strtol(str, NULL, 0); ++ if (value <= 0) ++ return -ERANGE; ++ ++ sbi->s_mb_stream_request = value; ++ ++ return count; ++} ++ ++int ext3_mb_init_per_dev_proc(struct super_block *sb) ++{ ++ mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct proc_dir_entry *proc; ++ char devname[64], *name; ++ ++ snprintf(devname, sizeof(devname) - 1, "%s", ++ bdevname(sb->s_bdev, devname)); ++ sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext3); ++ ++ name = EXT3_MB_STATS_NAME; ++ proc = create_proc_entry(name, mode, sbi->s_mb_proc); ++ if (proc == NULL) ++ goto err_out; ++ proc->data = sbi; ++ proc->read_proc = ext3_mb_stats_read; ++ proc->write_proc = ext3_mb_stats_write; ++ ++ name = EXT3_MB_MAX_TO_SCAN_NAME; ++ proc = create_proc_entry(name, mode, sbi->s_mb_proc); ++ if (proc == NULL) ++ goto err_out; ++ proc->data = sbi; ++ proc->read_proc = ext3_mb_max_to_scan_read; ++ proc->write_proc = ext3_mb_max_to_scan_write; ++ ++ name = EXT3_MB_MIN_TO_SCAN_NAME; ++ proc = create_proc_entry(name, mode, sbi->s_mb_proc); ++ if (proc == NULL) ++ goto err_out; ++ proc->data = sbi; ++ proc->read_proc = ext3_mb_min_to_scan_read; ++ proc->write_proc = ext3_mb_min_to_scan_write; ++ ++ name = EXT3_MB_ORDER2_REQ; ++ proc = create_proc_entry(name, mode, sbi->s_mb_proc); ++ if (proc == NULL) ++ goto err_out; ++ proc->data = sbi; ++ proc->read_proc = ext3_mb_order2_req_read; ++ proc->write_proc = ext3_mb_order2_req_write; ++ ++ name = EXT3_MB_STREAM_REQ; ++ proc = create_proc_entry(name, mode, sbi->s_mb_proc); ++ if (proc == NULL) ++ goto err_out; ++ proc->data = sbi; ++ proc->read_proc = ext3_mb_stream_req_read; ++ proc->write_proc = ext3_mb_stream_req_write; ++ ++ return 0; ++ ++err_out: ++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", name); ++ remove_proc_entry(EXT3_MB_STREAM_REQ, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_ORDER2_REQ, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_STATS_NAME, sbi->s_mb_proc); ++ remove_proc_entry(devname, proc_root_ext3); ++ sbi->s_mb_proc = NULL; ++ ++ return -ENOMEM; ++} ++ ++int ext3_mb_destroy_per_dev_proc(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ char devname[64]; ++ ++ if (sbi->s_mb_proc == NULL) ++ return -EINVAL; ++ ++ snprintf(devname, sizeof(devname) - 1, "%s", ++ bdevname(sb->s_bdev, devname)); ++ remove_proc_entry(EXT3_MB_STREAM_REQ, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_ORDER2_REQ, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); ++ remove_proc_entry(EXT3_MB_STATS_NAME, sbi->s_mb_proc); ++ remove_proc_entry(devname, proc_root_ext3); ++ ++ return 0; ++} ++ ++int __init init_ext3_proc(void) ++{ ++ ext3_pspace_cachep = ++ kmem_cache_create("ext3_prealloc_space", ++ sizeof(struct ext3_prealloc_space), ++ 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL); ++ if (ext3_pspace_cachep == NULL) ++ return -ENOMEM; ++ ++ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs); ++ if (proc_root_ext3 == NULL) ++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT); ++ ++ return 0; ++} ++ ++void exit_ext3_proc(void) ++{ ++ /* XXX: synchronize_rcu(); */ ++ kmem_cache_destroy(ext3_pspace_cachep); ++ remove_proc_entry(EXT3_ROOT, proc_root_fs); ++} ++ ++ ++/* ++ * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps ++ * Returns 0 if success or error code ++ */ ++int ext3_mb_mark_diskspace_used(struct ext3_allocation_context *ac, handle_t *handle) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_super_block *es; ++ struct ext3_group_desc *gdp; ++ struct buffer_head *gdp_bh; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ sector_t block; ++ int len, err; ++ ++ BUG_ON(ac->ac_status != AC_STATUS_FOUND); ++ BUG_ON(ac->ac_b_ex.fe_len <= 0); ++ ++ sb = ac->ac_sb; ++ sbi = EXT3_SB(sb); ++ es = sbi->s_es; ++ ++ ext3_debug("using block group %d(%d)\n", ac->ac_b_group.group, ++ gdp->bg_free_blocks_count); ++ ++ /* time to check quota, we can't do this before because ++ * having quota spent on preallocated-unused-yet blocks ++ * would be wrong */ ++ len = ac->ac_b_ex.fe_len; ++ while (len && DQUOT_ALLOC_BLOCK(ac->ac_inode, len)) len--; ++ if (ac->ac_b_ex.fe_len != len) { ++ /* some blocks can't be allocated due to quota ++ * we have to return them back */ ++ BUG(); ++ } ++ err = -EDQUOT; ++ if (len == 0) ++ goto out_err; ++ ++ err = -EIO; ++ bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); ++ if (!bitmap_bh) ++ goto out_err; ++ ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) ++ goto out_err; ++ ++ err = -EIO; ++ gdp = ext3_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh); ++ if (!gdp) ++ goto out_err; ++ ++ err = ext3_journal_get_write_access(handle, gdp_bh); ++ if (err) ++ goto out_err; ++ ++ block = ac->ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + ac->ac_b_ex.fe_start ++ + le32_to_cpu(es->s_first_data_block); ++ ++ if (block == le32_to_cpu(gdp->bg_block_bitmap) || ++ block == le32_to_cpu(gdp->bg_inode_bitmap) || ++ in_range(block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error(sb, __FUNCTION__, ++ "Allocating block in system zone - block = %lu", ++ (unsigned long) block); ++#ifdef AGGRESSIVE_CHECK ++ { ++ int i; ++ for (i = 0; i < ac->ac_b_ex.fe_len; i++) { ++ BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i, ++ bitmap_bh->b_data)); ++ } ++ } ++#endif ++ mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); ++ ++ spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) ++ - ac->ac_b_ex.fe_len); ++ spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); ++ ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ if (err) ++ goto out_err; ++ err = ext3_journal_dirty_metadata(handle, gdp_bh); ++ ++out_err: ++ sb->s_dirt = 1; ++ brelse(bitmap_bh); ++ return err; ++} ++ ++/* ++ * here we normalize request for locality group ++ * XXX: should we try to preallocate more than the group has now? ++ */ ++void ext3_mb_normalize_group_request(struct ext3_allocation_context *ac) ++{ ++ struct super_block *sb = ac->ac_sb; ++ struct ext3_locality_group *lg = ac->ac_lg; ++ ++ BUG_ON(lg == NULL); ++ if (EXT3_SB(sb)->s_stripe) ++ ac->ac_g_ex.fe_len = EXT3_SB(sb)->s_stripe; ++ else ++ ac->ac_g_ex.fe_len = (1024 * 1024) >> sb->s_blocksize_bits; ++ ++ mb_debug("#%u: goal %u blocks for locality group\n", ++ current->pid, ac->ac_g_ex.fe_len); ++} ++ ++/* ++ * Normalization means making request better in terms of ++ * size and alignment ++ */ ++void ext3_mb_normalize_request(struct ext3_allocation_context *ac, ++ struct ext3_allocation_request *ar) ++{ ++ struct ext3_inode_info *ei = EXT3_I(ac->ac_inode); ++ loff_t start, end, size, orig_size, orig_start; ++ struct list_head *cur; ++ int bsbits; ++ ++ /* do normalize only data requests, metadata requests ++ do not need preallocation */ ++ if (!(ac->ac_flags & EXT3_MB_HINT_DATA)) ++ return; ++ ++ /* sometime caller may want exact blocks */ ++ if (unlikely(ac->ac_flags & EXT3_MB_HINT_GOAL_ONLY)) ++ return; ++ ++ /* caller may indicate that preallocation isn't ++ * required (it's a tail, for example) */ ++ if (ac->ac_flags & EXT3_MB_HINT_NOPREALLOC) ++ return; ++ ++ if (ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC) ++ return ext3_mb_normalize_group_request(ac); ++ ++ bsbits = ac->ac_sb->s_blocksize_bits; ++ ++ /* first, let's learn actual file size ++ * given current request is allocated */ ++ size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; ++ size = size << bsbits; ++ if (size < i_size_read(ac->ac_inode)) ++ size = i_size_read(ac->ac_inode); ++ ++ /* first, try to predict filesize */ ++ /* XXX: should this table be tunable? */ ++ start = 0; ++ if (size <= 16 * 1024) { ++ size = 16 * 1024; ++ } else if (size <= 32 * 1024) { ++ size = 32 * 1024; ++ } else if (size <= 64 * 1024) { ++ size = 64 * 1024; ++ } else if (size <= 128 * 1024) { ++ size = 128 * 1024; ++ } else if (size <= 256 * 1024) { ++ size = 256 * 1024; ++ } else if (size <= 512 * 1024) { ++ size = 512 * 1024; ++ } else if (size <= 1024 * 1024) { ++ size = 1024 * 1024; ++ } else if (size < 4 * 1024 * 1024) { ++ start = ac->ac_o_ex.fe_logical << bsbits; ++ start = (start / (1024 * 1024)) * (1024 * 1024); ++ size = 1024 * 1024; ++ } else if (size < 8 * 1024 * 1024) { ++ start = ac->ac_o_ex.fe_logical << bsbits; ++ start = (start / (4 * (1024 * 1024))) * 4 * (1024 * 1024); ++ size = 4 * 1024 * 1024; ++ } else if (ac->ac_o_ex.fe_len < ((8 << 20) >> bsbits)) { ++ start = ac->ac_o_ex.fe_logical; ++ start = start << bsbits; ++ start = (start / (8 * (1024 * 1024))) * 8 * (1024 * 1024); ++ size = 8 * 1024 * 1024; ++ } else { ++ start = ac->ac_o_ex.fe_logical; ++ start = start << bsbits; ++ size = ac->ac_o_ex.fe_len << bsbits; ++ } ++ orig_size = size = size >> bsbits; ++ orig_start = start = start >> bsbits; ++ ++ /* don't cover already allocated blocks in selected range */ ++ if (ar->pleft && start <= ar->lleft) { ++ size -= ar->lleft + 1 - start; ++ start = ar->lleft + 1; ++ } ++ if (ar->pright && start + size - 1 >= ar->lright) ++ size -= start + size - ar->lright; ++ ++ end = start + size; ++ ++ /* check we don't cross already preallocated blocks */ ++ rcu_read_lock(); ++ list_for_each_rcu(cur, &ei->i_prealloc_list) { ++ struct ext3_prealloc_space *pa; ++ unsigned long pa_end; ++ ++ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); ++ pa_end = pa->pa_lstart + pa->pa_len; ++ ++ /* PA must not overlap original request */ ++ BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || ++ ac->ac_o_ex.fe_logical < pa->pa_lstart)); ++ ++ /* skip PA normalized request doesn't overlap with */ ++ if (pa->pa_lstart >= end) ++ continue; ++ if (pa_end <= start) ++ continue; ++ BUG_ON(pa->pa_lstart <= start && pa_end >= end); ++ ++ if (pa_end <= ac->ac_o_ex.fe_logical) { ++ BUG_ON(pa_end < start); ++ start = pa_end; ++ } ++ ++ if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { ++ BUG_ON(pa->pa_lstart > end); ++ end = pa->pa_lstart; ++ } ++ } ++ rcu_read_unlock(); ++ size = end - start; ++ ++ /* XXX: extra loop to check we really don't overlap preallocations */ ++ rcu_read_lock(); ++ list_for_each_rcu(cur, &ei->i_prealloc_list) { ++ struct ext3_prealloc_space *pa; ++ unsigned long pa_end; ++ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); ++ pa_end = pa->pa_lstart + pa->pa_len; ++ BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); ++ } ++ rcu_read_unlock(); ++ ++ if (start + size <= ac->ac_o_ex.fe_logical && ++ start > ac->ac_o_ex.fe_logical) { ++ printk("start %lu, size %lu, fe_logical %lu\n", ++ (unsigned long) start, (unsigned long) size, ++ (unsigned long) ac->ac_o_ex.fe_logical); ++ } ++ BUG_ON(start + size <= ac->ac_o_ex.fe_logical && ++ start > ac->ac_o_ex.fe_logical); ++ ++ /* now prepare goal request */ ++ BUG_ON(size <= 0 || size >= EXT3_BLOCKS_PER_GROUP(ac->ac_sb)); ++ if (size < ac->ac_o_ex.fe_len) { ++ /* XXX: don't normalize tails? */ ++ } ++ ++ /* XXX: is it better to align blocks WRT to logical placement ++ * or satisfy big request as is */ ++ ac->ac_g_ex.fe_logical = start; ++ ac->ac_g_ex.fe_len = size; ++ ++ mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, ++ (unsigned) orig_size, (unsigned) start); ++} ++ ++void ext3_mb_collect_stats(struct ext3_allocation_context *ac) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); ++ ++ if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { ++ atomic_inc(&sbi->s_bal_reqs); ++ atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); ++ if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) ++ atomic_inc(&sbi->s_bal_success); ++ atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); ++ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && ++ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) ++ atomic_inc(&sbi->s_bal_goals); ++ if (ac->ac_found > sbi->s_mb_max_to_scan) ++ atomic_inc(&sbi->s_bal_breaks); ++ } ++ ++ ext3_mb_store_history(ac); ++} ++ ++/* ++ * use blocks preallocated to inode ++ */ ++void ext3_mb_use_inode_pa(struct ext3_allocation_context *ac, ++ struct ext3_prealloc_space *pa) ++{ ++ unsigned long start, len; ++ ++ /* found preallocated blocks, use them */ ++ start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); ++ len = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); ++ len = len - start; ++ ext3_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, ++ &ac->ac_b_ex.fe_start); ++ ac->ac_b_ex.fe_len = len; ++ ac->ac_status = AC_STATUS_FOUND; ++ ac->ac_pa = pa; ++ ++ BUG_ON(start < pa->pa_pstart); ++ BUG_ON(start + len > pa->pa_pstart + pa->pa_len); ++ BUG_ON(pa->pa_free < len); ++ pa->pa_free -= len; ++ ++ mb_debug("use %lu/%lu from inode pa %p\n", start, len, pa); ++} ++ ++/* ++ * use blocks preallocated to locality group ++ */ ++void ext3_mb_use_group_pa(struct ext3_allocation_context *ac, ++ struct ext3_prealloc_space *pa) ++{ ++ unsigned len = ac->ac_o_ex.fe_len; ++ ++ ext3_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, ++ &ac->ac_b_ex.fe_group, ++ &ac->ac_b_ex.fe_start); ++ ac->ac_b_ex.fe_len = len; ++ ac->ac_status = AC_STATUS_FOUND; ++ ac->ac_pa = pa; ++ ++ /* we don't correct pa_pstart or pa_plen here to avoid ++ * possible race when tte group is being loaded concurrently ++ * instead we correct pa later, after blocks are marked ++ * in on-disk bitmap -- see ext3_mb_release_context() */ ++ mb_debug("use %lu/%lu from group pa %p\n", pa->pa_lstart-len, len, pa); ++} ++ ++/* ++ * search goal blocks in preallocated space ++ */ ++int ext3_mb_use_preallocated(struct ext3_allocation_context *ac) ++{ ++ struct ext3_inode_info *ei = EXT3_I(ac->ac_inode); ++ struct ext3_locality_group *lg; ++ struct ext3_prealloc_space *pa; ++ struct list_head *cur; ++ ++ /* only data can be preallocated */ ++ if (!(ac->ac_flags & EXT3_MB_HINT_DATA)) ++ return 0; ++ ++ /* first, try per-file preallocation */ ++ rcu_read_lock(); ++ list_for_each_rcu(cur, &ei->i_prealloc_list) { ++ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); ++ ++ /* all fields in this condition don't change, ++ * so we can skip locking for them */ ++ if (ac->ac_o_ex.fe_logical < pa->pa_lstart || ++ ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) ++ continue; ++ ++ /* found preallocated blocks, use them */ ++ spin_lock(&pa->pa_lock); ++ if (pa->pa_deleted == 0 && pa->pa_free) { ++ atomic_inc(&pa->pa_count); ++ ext3_mb_use_inode_pa(ac, pa); ++ spin_unlock(&pa->pa_lock); ++ ac->ac_criteria = 10; ++ rcu_read_unlock(); ++ return 1; ++ } ++ spin_unlock(&pa->pa_lock); ++ } ++ rcu_read_unlock(); ++ ++ /* can we use group allocation? */ ++ if (!(ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC)) ++ return 0; ++ ++ /* inode may have no locality group for some reason */ ++ lg = ac->ac_lg; ++ if (lg == NULL) ++ return 0; ++ ++ rcu_read_lock(); ++ list_for_each_rcu(cur, &lg->lg_prealloc_list) { ++ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); ++ spin_lock(&pa->pa_lock); ++ if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { ++ atomic_inc(&pa->pa_count); ++ ext3_mb_use_group_pa(ac, pa); ++ spin_unlock(&pa->pa_lock); ++ ac->ac_criteria = 20; ++ rcu_read_unlock(); ++ return 1; ++ } ++ spin_unlock(&pa->pa_lock); ++ } ++ rcu_read_unlock(); ++ ++ return 0; ++} ++ ++/* ++ * the function goes through all preallocation in this group and marks them ++ * used in in-core bitmap. buddy must be generated from this bitmap ++ */ ++void ext3_mb_generate_from_pa(struct super_block *sb, void *bitmap, int group) ++{ ++ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); ++ struct ext3_prealloc_space *pa; ++ struct list_head *cur; ++ unsigned long groupnr; ++ unsigned long start; ++ int preallocated = 0, count = 0, len; ++ ++ /* all form of preallocation discards first load group, ++ * so the only competing code is preallocation use. ++ * we don't need any locking here ++ * notice we do NOT ignore preallocations with pa_deleted ++ * otherwise we could leave used blocks available for ++ * allocation in buddy when concurrent ext3_mb_put_pa() ++ * is dropping preallocation ++ */ ++ list_for_each_rcu(cur, &grp->bb_prealloc_list) { ++ pa = list_entry(cur, struct ext3_prealloc_space, pa_group_list); ++ spin_lock(&pa->pa_lock); ++ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &start); ++ len = pa->pa_len; ++ spin_unlock(&pa->pa_lock); ++ BUG_ON(groupnr != group); ++ mb_set_bits(bitmap, start, len); ++ preallocated += len; ++ count++; ++ } ++ mb_debug("prellocated %u for group %u\n", preallocated, group); ++} ++ ++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,5) ++static void ext3_mb_pa_callback(struct rcu_head *head) ++{ ++ struct ext3_prealloc_space *pa; ++ pa = container_of(head, struct ext3_prealloc_space, u.pa_rcu); ++ kmem_cache_free(ext3_pspace_cachep, pa); ++} ++#define mb_call_rcu(__pa) call_rcu(&(__pa)->u.pa_rcu, ext3_mb_pa_callback) ++#else ++static void ext3_mb_pa_callback(void *pa) ++{ ++ kmem_cache_free(ext3_pspace_cachep, pa); ++} ++#define mb_call_rcu(__pa) call_rcu(&(__pa)->u.pa_rcu, ext3_mb_pa_callback, pa) ++#endif ++ ++/* ++ * drops a reference to preallocated space descriptor ++ * if this was the last reference and the space is consumed ++ */ ++void ext3_mb_put_pa(struct ext3_allocation_context *ac, ++ struct super_block *sb, struct ext3_prealloc_space *pa) ++{ ++ unsigned long grp; ++ ++ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) ++ return; ++ ++ /* in this short window concurrent discard can set pa_deleted */ ++ spin_lock(&pa->pa_lock); ++ if (pa->pa_deleted == 0) { ++ spin_unlock(&pa->pa_lock); ++ return; ++ } ++ ++ pa->pa_deleted = 1; ++ spin_unlock(&pa->pa_lock); ++ ++ /* -1 is to protect from crossing allocation group */ ++ ext3_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL); ++ ++ /* ++ * possible race: ++ * ++ * P1 (buddy init) P2 (regular allocation) ++ * find block B in PA ++ * copy on-disk bitmap to buddy ++ * mark B in on-disk bitmap ++ * drop PA from group ++ * mark all PAs in buddy ++ * ++ * thus, P1 initializes buddy with B available. to prevent this ++ * we make "copy" and "mark all PAs" atomic and serialize "drop PA" ++ * against that pair ++ */ ++ ext3_lock_group(sb, grp); ++ list_del_rcu(&pa->pa_group_list); ++ ext3_unlock_group(sb, grp); ++ ++ spin_lock(pa->pa_obj_lock); ++ list_del_rcu(&pa->pa_inode_list); ++ spin_unlock(pa->pa_obj_lock); ++ ++ mb_call_rcu(pa); ++} ++ ++/* ++ * creates new preallocated space for given inode ++ */ ++int ext3_mb_new_inode_pa(struct ext3_allocation_context *ac) ++{ ++ struct super_block *sb = ac->ac_sb; ++ struct ext3_prealloc_space *pa; ++ struct ext3_group_info *grp; ++ struct ext3_inode_info *ei; ++ ++ /* preallocate only when found space is larger then requested */ ++ BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); ++ BUG_ON(ac->ac_status != AC_STATUS_FOUND); ++ BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); ++ ++ pa = kmem_cache_alloc(ext3_pspace_cachep, SLAB_NOFS); ++ if (pa == NULL) ++ return -ENOMEM; ++ ++ if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { ++ int winl, wins, win, offs; ++ ++ /* we can't allocate as much as normalizer wants. ++ * so, found space must get proper lstart ++ * to cover original request */ ++ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); ++ BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); ++ ++ /* we're limited by original request in that ++ * logical block must be covered any way ++ * winl is window we can move our chunk within */ ++ winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; ++ ++ /* also, we should cover whole original request */ ++ wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; ++ ++ /* the smallest one defines real window */ ++ win = min(winl, wins); ++ ++ offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; ++ if (offs && offs < win) ++ win = offs; ++ ++ ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; ++ BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); ++ BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); ++ } ++ ++ /* preallocation can change ac_b_ex, thus we store actually ++ * allocated blocks for history */ ++ ac->ac_f_ex = ac->ac_b_ex; ++ ++ pa->pa_lstart = ac->ac_b_ex.fe_logical; ++ pa->pa_pstart = ext3_grp_offs_to_block(sb, &ac->ac_b_ex); ++ pa->pa_len = ac->ac_b_ex.fe_len; ++ pa->pa_free = pa->pa_len; ++ atomic_set(&pa->pa_count, 1); ++ spin_lock_init(&pa->pa_lock); ++ pa->pa_deleted = 0; ++ pa->pa_linear = 0; ++ ++ mb_debug("new inode pa %p: %lu/%lu for %lu\n", pa, ++ pa->pa_pstart, pa->pa_len, pa->pa_lstart); ++ ++ ext3_mb_use_inode_pa(ac, pa); ++ atomic_add(pa->pa_free, &EXT3_SB(sb)->s_mb_preallocated); ++ ++ ei = EXT3_I(ac->ac_inode); ++ grp = EXT3_GROUP_INFO(sb, ac->ac_b_ex.fe_group); ++ ++ pa->pa_obj_lock = &ei->i_prealloc_lock; ++ pa->pa_inode = ac->ac_inode; ++ ++ ext3_lock_group(sb, ac->ac_b_ex.fe_group); ++ list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list); ++ ext3_unlock_group(sb, ac->ac_b_ex.fe_group); ++ ++ spin_lock(pa->pa_obj_lock); ++ list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list); ++ spin_unlock(pa->pa_obj_lock); ++ ++ return 0; ++} ++ ++/* ++ * creates new preallocated space for locality group inodes belongs to ++ */ ++int ext3_mb_new_group_pa(struct ext3_allocation_context *ac) ++{ ++ struct super_block *sb = ac->ac_sb; ++ struct ext3_locality_group *lg; ++ struct ext3_prealloc_space *pa; ++ struct ext3_group_info *grp; ++ ++ /* preallocate only when found space is larger then requested */ ++ BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); ++ BUG_ON(ac->ac_status != AC_STATUS_FOUND); ++ BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); ++ ++ BUG_ON(ext3_pspace_cachep == NULL); ++ pa = kmem_cache_alloc(ext3_pspace_cachep, SLAB_NOFS); ++ if (pa == NULL) ++ return -ENOMEM; ++ ++ /* preallocation can change ac_b_ex, thus we store actually ++ * allocated blocks for history */ ++ ac->ac_f_ex = ac->ac_b_ex; ++ ++ pa->pa_pstart = ext3_grp_offs_to_block(sb, &ac->ac_b_ex); ++ pa->pa_lstart = pa->pa_pstart; ++ pa->pa_len = ac->ac_b_ex.fe_len; ++ pa->pa_free = pa->pa_len; ++ atomic_set(&pa->pa_count, 1); ++ spin_lock_init(&pa->pa_lock); ++ pa->pa_deleted = 0; ++ pa->pa_linear = 1; ++ ++ mb_debug("new group pa %p: %lu/%lu for %lu\n", pa, ++ pa->pa_pstart, pa->pa_len, pa->pa_lstart); ++ ++ ext3_mb_use_group_pa(ac, pa); ++ atomic_add(pa->pa_free, &EXT3_SB(sb)->s_mb_preallocated); ++ ++ grp = EXT3_GROUP_INFO(sb, ac->ac_b_ex.fe_group); ++ lg = ac->ac_lg; ++ BUG_ON(lg == NULL); ++ ++ pa->pa_obj_lock = &lg->lg_prealloc_lock; ++ pa->pa_inode = NULL; ++ ++ ext3_lock_group(sb, ac->ac_b_ex.fe_group); ++ list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list); ++ ext3_unlock_group(sb, ac->ac_b_ex.fe_group); ++ ++ spin_lock(pa->pa_obj_lock); ++ list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); ++ spin_unlock(pa->pa_obj_lock); ++ ++ return 0; ++} ++ ++int ext3_mb_new_preallocation(struct ext3_allocation_context *ac) ++{ ++ int err; ++ ++ if (ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC) ++ err = ext3_mb_new_group_pa(ac); ++ else ++ err = ext3_mb_new_inode_pa(ac); ++ return err; ++} ++ ++/* ++ * finds all unused blocks in on-disk bitmap, frees them in ++ * in-core bitmap and buddy. ++ * @pa must be unlinked from inode and group lists, so that ++ * nobody else can find/use it. ++ * the caller MUST hold group/inode locks. ++ * TODO: optimize the case when there are no in-core structures yet ++ */ ++int ext3_mb_release_inode_pa(struct ext3_buddy *e3b, ++ struct buffer_head *bitmap_bh, ++ struct ext3_prealloc_space *pa) ++{ ++ struct ext3_allocation_context ac; ++ struct super_block *sb = e3b->bd_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ unsigned long bit, end, next, group; ++ sector_t start; ++ int err = 0, free = 0; ++ ++ BUG_ON(pa->pa_deleted == 0); ++ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); ++ BUG_ON(group != e3b->bd_group); ++ end = bit + pa->pa_len; ++ ++ ac.ac_sb = sb; ++ ac.ac_inode = pa->pa_inode; ++ ac.ac_op = EXT3_MB_HISTORY_DISCARD; ++ ++ while (bit < end) { ++ bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); ++ if (bit >= end) ++ break; ++ next = mb_find_next_bit(bitmap_bh->b_data, end, bit); ++ if (next > end) ++ next = end; ++ start = group * EXT3_BLOCKS_PER_GROUP(sb) + bit + ++ le32_to_cpu(sbi->s_es->s_first_data_block); ++ mb_debug(" free preallocated %u/%u in group %u\n", ++ (unsigned) start, (unsigned) next - bit, ++ (unsigned) group); ++ free += next - bit; ++ ++ ac.ac_b_ex.fe_group = group; ++ ac.ac_b_ex.fe_start = bit; ++ ac.ac_b_ex.fe_len = next - bit; ++ ac.ac_b_ex.fe_logical = 0; ++ ext3_mb_store_history(&ac); ++ ++ mb_free_blocks(e3b, bit, next - bit); ++ bit = next + 1; ++ } ++ if (free != pa->pa_free) { ++ printk("pa %p: logic %lu, phys. %lu, len %lu\n", ++ pa, (unsigned long) pa->pa_lstart, ++ (unsigned long) pa->pa_pstart, ++ (unsigned long) pa->pa_len); ++ printk("free %u, pa_free %u\n", free, pa->pa_free); ++ } ++ BUG_ON(free != pa->pa_free); ++ atomic_add(free, &sbi->s_mb_discarded); ++ ++ return err; ++} ++ ++int ext3_mb_release_group_pa(struct ext3_buddy *e3b, ++ struct ext3_prealloc_space *pa) ++{ ++ struct ext3_allocation_context ac; ++ struct super_block *sb = e3b->bd_sb; ++ unsigned long bit, group; ++ ++ ac.ac_op = EXT3_MB_HISTORY_DISCARD; ++ ++ BUG_ON(pa->pa_deleted == 0); ++ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); ++ BUG_ON(group != e3b->bd_group); ++ mb_free_blocks(e3b, bit, pa->pa_len); ++ atomic_add(pa->pa_len, &EXT3_SB(sb)->s_mb_discarded); ++ ++ ac.ac_sb = sb; ++ ac.ac_inode = NULL; ++ ac.ac_b_ex.fe_group = group; ++ ac.ac_b_ex.fe_start = bit; ++ ac.ac_b_ex.fe_len = pa->pa_len; ++ ac.ac_b_ex.fe_logical = 0; ++ ext3_mb_store_history(&ac); ++ ++ return 0; ++} ++ ++/* ++ * releases all preallocations in given group ++ * ++ * first, we need to decide discard policy: ++ * - when do we discard ++ * 1) ENOSPC ++ * - how many do we discard ++ * 1) how many requested ++ */ ++int ext3_mb_discard_group_preallocations(struct super_block *sb, ++ int group, int needed) ++{ ++ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_prealloc_space *pa, *tmp; ++ struct list_head list; ++ struct ext3_buddy e3b; ++ int err, busy, free = 0; ++ ++ mb_debug("discard preallocation for group %lu\n", group); ++ ++ if (list_empty(&grp->bb_prealloc_list)) ++ return 0; ++ ++ bitmap_bh = read_block_bitmap(sb, group); ++ if (bitmap_bh == NULL) { ++ /* error handling here */ ++ ext3_mb_release_desc(&e3b); ++ BUG_ON(bitmap_bh == NULL); ++ } ++ ++ err = ext3_mb_load_buddy(sb, group, &e3b); ++ BUG_ON(err != 0); /* error handling here */ ++ ++ if (needed == 0) ++ needed = EXT3_BLOCKS_PER_GROUP(sb) + 1; ++ ++ grp = EXT3_GROUP_INFO(sb, group); ++ INIT_LIST_HEAD(&list); ++ ++repeat: ++ busy = 0; ++ ext3_lock_group(sb, group); ++ list_for_each_entry_safe (pa, tmp, &grp->bb_prealloc_list, pa_group_list) { ++ spin_lock(&pa->pa_lock); ++ if (atomic_read(&pa->pa_count)) { ++ spin_unlock(&pa->pa_lock); ++ printk("uh! busy PA\n"); ++ dump_stack(); ++ busy = 1; ++ continue; ++ } ++ if (pa->pa_deleted) { ++ spin_unlock(&pa->pa_lock); ++ continue; ++ } ++ ++ /* seems this one can be freed ... */ ++ pa->pa_deleted = 1; ++ ++ /* we can trust pa_free ... */ ++ free += pa->pa_free; ++ ++ spin_unlock(&pa->pa_lock); ++ ++ list_del_rcu(&pa->pa_group_list); ++ list_add(&pa->u.pa_tmp_list, &list); ++ } ++ ++ /* if we still need more blocks and some PAs were used, try again */ ++ if (free < needed && busy) ++ goto repeat; ++ ++ /* found anything to free? */ ++ if (list_empty(&list)) { ++ BUG_ON(free != 0); ++ goto out; ++ } ++ ++ /* now free all selected PAs */ ++ list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { ++ ++ /* remove from object (inode or locality group) */ ++ spin_lock(pa->pa_obj_lock); ++ list_del_rcu(&pa->pa_inode_list); ++ spin_unlock(pa->pa_obj_lock); ++ ++ if (pa->pa_linear) ++ ext3_mb_release_group_pa(&e3b, pa); ++ else ++ ext3_mb_release_inode_pa(&e3b, bitmap_bh, pa); ++ ++ list_del(&pa->u.pa_tmp_list); ++ mb_call_rcu(pa); ++ } ++ ++out: ++ ext3_unlock_group(sb, group); ++ ext3_mb_release_desc(&e3b); ++ brelse(bitmap_bh); ++ return free; ++} ++ ++/* ++ * releases all non-used preallocated blocks for given inode ++ */ ++void ext3_mb_discard_inode_preallocations(struct inode *inode) ++{ ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_prealloc_space *pa, *tmp; ++ unsigned long group = 0; ++ struct list_head list; ++ struct ext3_buddy e3b; ++ int err; ++ ++ if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { ++ /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ ++ return; ++ } ++ ++ mb_debug("discard preallocation for inode %lu\n", inode->i_ino); ++ ++ INIT_LIST_HEAD(&list); ++ ++repeat: ++ /* first, collect all pa's in the inode */ ++ spin_lock(&ei->i_prealloc_lock); ++ while (!list_empty(&ei->i_prealloc_list)) { ++ pa = list_entry(ei->i_prealloc_list.next, ++ struct ext3_prealloc_space, pa_inode_list); ++ BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock); ++ spin_lock(&pa->pa_lock); ++ if (atomic_read(&pa->pa_count)) { ++ /* this shouldn't happen often - nobody should ++ * use preallocation while we're discarding it */ ++ spin_unlock(&pa->pa_lock); ++ spin_unlock(&ei->i_prealloc_lock); ++ printk("uh-oh! used pa while discarding\n"); ++ dump_stack(); ++ current->state = TASK_UNINTERRUPTIBLE; ++ schedule_timeout(HZ); ++ goto repeat; ++ ++ } ++ if (pa->pa_deleted == 0) { ++ pa->pa_deleted = 1; ++ spin_unlock(&pa->pa_lock); ++ list_del_rcu(&pa->pa_inode_list); ++ list_add(&pa->u.pa_tmp_list, &list); ++ continue; ++ } ++ ++ /* someone is deleting pa right now */ ++ spin_unlock(&pa->pa_lock); ++ spin_unlock(&ei->i_prealloc_lock); ++ ++ /* we have to wait here because pa_deleted ++ * doesn't mean pa is already unlinked from ++ * the list. as we might be called from ++ * ->clear_inode() the inode will get freed ++ * and concurrent thread which is unlinking ++ * pa from inode's list may access already ++ * freed memory, bad-bad-bad */ ++ ++ /* XXX: if this happens too often, we can ++ * add a flag to force wait only in case ++ * of ->clear_inode(), but not in case of ++ * regular truncate */ ++ printk("uh-oh! some one just deleted it\n"); ++ dump_stack(); ++ current->state = TASK_UNINTERRUPTIBLE; ++ schedule_timeout(HZ); ++ goto repeat; ++ } ++ spin_unlock(&ei->i_prealloc_lock); ++ ++ list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { ++ BUG_ON(pa->pa_linear != 0); ++ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); ++ ++ err = ext3_mb_load_buddy(sb, group, &e3b); ++ BUG_ON(err != 0); /* error handling here */ ++ ++ bitmap_bh = read_block_bitmap(sb, group); ++ if (bitmap_bh == NULL) { ++ /* error handling here */ ++ ext3_mb_release_desc(&e3b); ++ BUG_ON(bitmap_bh == NULL); ++ } ++ ++ ext3_lock_group(sb, group); ++ list_del_rcu(&pa->pa_group_list); ++ ext3_mb_release_inode_pa(&e3b, bitmap_bh, pa); ++ ext3_unlock_group(sb, group); ++ ++ ext3_mb_release_desc(&e3b); ++ brelse(bitmap_bh); ++ ++ list_del(&pa->u.pa_tmp_list); ++ mb_call_rcu(pa); ++ } ++} ++ ++/* ++ * finds all preallocated spaces and return blocks being freed to them ++ * if preallocated space becomes full (no block is used from the space) ++ * then the function frees space in buddy ++ * XXX: at the moment, truncate (which is the only way to free blocks) ++ * discards all preallocations ++ */ ++void ext3_mb_return_to_preallocation(struct inode *inode, struct ext3_buddy *e3b, ++ sector_t block, int count) ++{ ++ BUG_ON(!list_empty(&EXT3_I(inode)->i_prealloc_list)); ++} ++ ++void ext3_mb_show_ac(struct ext3_allocation_context *ac) ++{ ++#if 0 ++ struct super_block *sb = ac->ac_sb; ++ int i; ++ ++ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n", ++ ac->ac_status, ac->ac_flags); ++ printk(KERN_ERR "EXT3-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " ++ "best %lu/%lu/%lu@%lu cr %d\n", ++ ac->ac_o_ex.fe_group, ac->ac_o_ex.fe_start, ++ ac->ac_o_ex.fe_len, ac->ac_o_ex.fe_logical, ++ ac->ac_g_ex.fe_group, ac->ac_g_ex.fe_start, ++ ac->ac_g_ex.fe_len, ac->ac_g_ex.fe_logical, ++ ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, ++ ac->ac_b_ex.fe_len, ac->ac_b_ex.fe_logical, ++ ac->ac_criteria); ++ printk(KERN_ERR "EXT3-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, ++ ac->ac_found); ++ printk("EXT3-fs: groups: "); ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { ++ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, i); ++ struct ext3_prealloc_space *pa; ++ unsigned long start; ++ struct list_head *cur; ++ list_for_each_rcu(cur, &grp->bb_prealloc_list) { ++ pa = list_entry(cur, struct ext3_prealloc_space, ++ pa_group_list); ++ spin_lock(&pa->pa_lock); ++ ext3_get_group_no_and_offset(sb, pa->pa_pstart, NULL, &start); ++ spin_unlock(&pa->pa_lock); ++ printk("PA:%u:%lu:%u ", i, start, pa->pa_len); ++ } ++ ++ if (grp->bb_free == 0) ++ continue; ++ printk("%d: %d/%d ", i, grp->bb_free, grp->bb_fragments); ++ } ++ printk("\n"); ++ //dump_stack(); ++#endif ++} ++ ++void ext3_mb_group_or_file(struct ext3_allocation_context *ac) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); ++ int bsbits = ac->ac_sb->s_blocksize_bits; ++ loff_t size, isize; ++ ++ if (!(ac->ac_flags & EXT3_MB_HINT_DATA)) ++ return; ++ ++ size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; ++ isize = i_size_read(ac->ac_inode) >> bsbits; ++ if (size < isize) ++ size = isize; ++ ++ /* don't use group allocation for large files */ ++ if (size >= sbi->s_mb_stream_request) ++ return; ++ ++ if (unlikely(ac->ac_flags & EXT3_MB_HINT_GOAL_ONLY)) ++ return; ++ ++ BUG_ON(ac->ac_lg != NULL); ++ ac->ac_lg = &sbi->s_locality_groups[smp_processor_id()]; ++ ++ /* we're going to use group allocation */ ++ ac->ac_flags |= EXT3_MB_HINT_GROUP_ALLOC; ++ ++ /* serialize all allocations in the group */ ++ down(&ac->ac_lg->lg_sem); ++} ++ ++int ext3_mb_initialize_context(struct ext3_allocation_context *ac, ++ struct ext3_allocation_request *ar) ++{ ++ struct super_block *sb = ar->inode->i_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct ext3_super_block *es = sbi->s_es; ++ unsigned long group, len, goal; ++ unsigned long block; ++ ++ /* we can't allocate > group size */ ++ len = ar->len; ++ if (len >= EXT3_BLOCKS_PER_GROUP(sb) - 10) ++ len = EXT3_BLOCKS_PER_GROUP(sb) - 10; ++ ++ /* start searching from the goal */ ++ goal = ar->goal; ++ if (goal < le32_to_cpu(es->s_first_data_block) || ++ goal >= le32_to_cpu(es->s_blocks_count)) ++ goal = le32_to_cpu(es->s_first_data_block); ++ ext3_get_group_no_and_offset(sb, goal, &group, &block); ++ ++ /* set up allocation goals */ ++ ac->ac_b_ex.fe_logical = ar->logical; ++ ac->ac_b_ex.fe_group = 0; ++ ac->ac_b_ex.fe_start = 0; ++ ac->ac_b_ex.fe_len = 0; ++ ac->ac_status = AC_STATUS_CONTINUE; ++ ac->ac_groups_scanned = 0; ++ ac->ac_ex_scanned = 0; ++ ac->ac_found = 0; ++ ac->ac_sb = sb; ++ ac->ac_inode = ar->inode; ++ ac->ac_o_ex.fe_logical = ar->logical; ++ ac->ac_o_ex.fe_group = group; ++ ac->ac_o_ex.fe_start = block; ++ ac->ac_o_ex.fe_len = len; ++ ac->ac_g_ex.fe_logical = ar->logical; ++ ac->ac_g_ex.fe_group = group; ++ ac->ac_g_ex.fe_start = block; ++ ac->ac_g_ex.fe_len = len; ++ ac->ac_f_ex.fe_len = 0; ++ ac->ac_flags = ar->flags; ++ ac->ac_2order = 0; ++ ac->ac_criteria = 0; ++ ac->ac_pa = NULL; ++ ac->ac_bitmap_page = NULL; ++ ac->ac_buddy_page = NULL; ++ ac->ac_lg = NULL; ++ ++ /* we have to define context: we'll we work with a file or ++ * locality group. this is a policy, actually */ ++ ext3_mb_group_or_file(ac); ++ ++ mb_debug("init ac: %u blocks @ %llu, goal %llu, flags %x, 2^%d, " ++ "left: %llu/%llu, right %llu/%llu to %swritable\n", ++ (unsigned) ar->len, (unsigned) ar->logical, ++ (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, ++ (unsigned) ar->lleft, (unsigned) ar->pleft, ++ (unsigned) ar->lright, (unsigned) ar->pright, ++ atomic_read(&ar->inode->i_writecount) ? "" : "non-"); ++ return 0; ++ ++} ++ ++/* ++ * release all resource we used in allocation ++ */ ++int ext3_mb_release_context(struct ext3_allocation_context *ac) ++{ ++ if (ac->ac_pa) { ++ if (ac->ac_pa->pa_linear) { ++ /* see comment in ext3_mb_use_group_pa() */ ++ spin_lock(&ac->ac_pa->pa_lock); ++ ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; ++ ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; ++ ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; ++ ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; ++ spin_unlock(&ac->ac_pa->pa_lock); ++ } ++ ext3_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); ++ } ++ if (ac->ac_bitmap_page) ++ page_cache_release(ac->ac_bitmap_page); ++ if (ac->ac_buddy_page) ++ page_cache_release(ac->ac_buddy_page); ++ if (ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC) ++ up(&ac->ac_lg->lg_sem); ++ ext3_mb_collect_stats(ac); ++ return 0; ++} ++ ++int ext3_mb_discard_preallocations(struct super_block *sb, int needed) ++{ ++ int i, ret, freed = 0; ++ ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count && needed > 0; i++) { ++ ret = ext3_mb_discard_group_preallocations(sb, i, needed); ++ freed += ret; ++ needed -= ret; ++ } ++ ++ return freed; ++} ++ ++/* ++ * Main entry point into mballoc to allocate blocks ++ * it tries to use preallocation first, then falls back ++ * to usual allocation ++ */ ++unsigned long ext3_mb_new_blocks(handle_t *handle, ++ struct ext3_allocation_request *ar, int *errp) ++{ ++ struct ext3_allocation_context ac; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ unsigned long block; ++ int err, freed; ++ ++ sb = ar->inode->i_sb; ++ sbi = EXT3_SB(sb); ++ ++ if (!test_opt(sb, MBALLOC)) { ++ static int ext3_mballoc_warning = 0; ++ if (ext3_mballoc_warning++ == 0) ++ printk(KERN_ERR "EXT3-fs: multiblock request with " ++ "mballoc disabled!\n"); ++ ar->len = 1; ++ err = ext3_new_block_old(handle, ar->inode, ar->goal, errp); ++ return err; ++ } ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ if ((err = ext3_mb_initialize_context(&ac, ar))) ++ return err; ++ ++ ac.ac_op = EXT3_MB_HISTORY_PREALLOC; ++ if (!ext3_mb_use_preallocated(&ac)) { ++ ++ ac.ac_op = EXT3_MB_HISTORY_ALLOC; ++ ext3_mb_normalize_request(&ac, ar); ++ ++repeat: ++ /* allocate space in core */ ++ ext3_mb_regular_allocator(&ac); ++ ++ /* as we've just preallocated more space than ++ * user requested orinally, we store allocated ++ * space in a special descriptor */ ++ if (ac.ac_status == AC_STATUS_FOUND && ++ ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) ++ ext3_mb_new_preallocation(&ac); ++ } ++ ++ if (likely(ac.ac_status == AC_STATUS_FOUND)) { ++ ext3_mb_mark_diskspace_used(&ac, handle); ++ *errp = 0; ++ block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex); ++ ar->len = ac.ac_b_ex.fe_len; ++ } else { ++ freed = ext3_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); ++ if (freed) ++ goto repeat; ++ *errp = -ENOSPC; ++ ac.ac_b_ex.fe_len = 0; ++ block = 0; ++ ext3_mb_show_ac(&ac); ++ } ++ ++ ext3_mb_release_context(&ac); ++ ++ return block; ++} ++EXPORT_SYMBOL(ext3_mb_new_blocks); ++ ++int ext3_new_block(handle_t *handle, struct inode *inode, ++ unsigned long goal, int *errp) ++{ ++ struct ext3_allocation_request ar; ++ unsigned long ret; ++ ++ if (!test_opt(inode->i_sb, MBALLOC)) { ++ ret = ext3_new_block_old(handle, inode, goal, errp); ++ return ret; ++ } ++ ++ ar.inode = inode; ++ ar.goal = goal; ++ ar.len = 1; ++ ar.logical = 0; ++ ar.lleft = 0; ++ ar.pleft = 0; ++ ar.lright = 0; ++ ar.pright = 0; ++ ar.flags = 0; ++ ret = ext3_mb_new_blocks(handle, &ar, errp); ++ return ret; ++} ++ ++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (sbi->s_last_transaction == handle->h_transaction->t_tid) ++ return; ++ ++ /* new transaction! time to close last one and free blocks for ++ * committed transaction. we know that only transaction can be ++ * active, so previos transaction can be being logged and we ++ * know that transaction before previous is known to be already ++ * logged. this means that now we may free blocks freed in all ++ * transactions before previous one. hope I'm clear enough ... */ ++ ++ spin_lock(&sbi->s_md_lock); ++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { ++ mb_debug("new transaction %lu, old %lu\n", ++ (unsigned long) handle->h_transaction->t_tid, ++ (unsigned long) sbi->s_last_transaction); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_closed_transaction); ++ sbi->s_last_transaction = handle->h_transaction->t_tid; ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ ext3_mb_free_committed_blocks(sb); ++} ++ ++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, ++ int group, int block, int count) ++{ ++ struct ext3_group_info *db = e3b->bd_info; ++ struct super_block *sb = e3b->bd_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct ext3_free_metadata *md; ++ int i; ++ ++ BUG_ON(e3b->bd_bitmap_page == NULL); ++ BUG_ON(e3b->bd_buddy_page == NULL); ++ ++ ext3_lock_group(sb, group); ++ for (i = 0; i < count; i++) { ++ md = db->bb_md_cur; ++ if (md && db->bb_tid != handle->h_transaction->t_tid) { ++ db->bb_md_cur = NULL; ++ md = NULL; ++ } ++ ++ if (md == NULL) { ++ ext3_unlock_group(sb, group); ++ md = kmalloc(sizeof(*md), GFP_KERNEL); ++ if (md == NULL) ++ return -ENOMEM; ++ md->num = 0; ++ md->group = group; ++ ++ ext3_lock_group(sb, group); ++ if (db->bb_md_cur == NULL) { ++ spin_lock(&sbi->s_md_lock); ++ list_add(&md->list, &sbi->s_active_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ /* protect buddy cache from being freed, ++ * otherwise we'll refresh it from ++ * on-disk bitmap and lose not-yet-available ++ * blocks */ ++ page_cache_get(e3b->bd_buddy_page); ++ page_cache_get(e3b->bd_bitmap_page); ++ db->bb_md_cur = md; ++ db->bb_tid = handle->h_transaction->t_tid; ++ mb_debug("new md 0x%p for group %u\n", ++ md, md->group); ++ } else { ++ kfree(md); ++ md = db->bb_md_cur; ++ } ++ } ++ ++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); ++ md->blocks[md->num] = block + i; ++ md->num++; ++ if (md->num == EXT3_BB_MAX_BLOCKS) { ++ /* no more space, put full container on a sb's list */ ++ db->bb_md_cur = NULL; ++ } ++ } ++ ext3_unlock_group(sb, group); ++ return 0; ++} ++ ++/* ++ * Main entry point into mballoc to free blocks ++ */ ++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, ++ unsigned long block, unsigned long count, ++ int metadata, int *freed) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct super_block *sb = inode->i_sb; ++ struct ext3_allocation_context ac; ++ struct ext3_group_desc *gdp; ++ struct ext3_super_block *es; ++ unsigned long bit, overflow; ++ struct buffer_head *gd_bh; ++ unsigned long block_group; ++ struct ext3_sb_info *sbi; ++ struct ext3_buddy e3b; ++ int err = 0, ret; ++ ++ *freed = 0; ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ sbi = EXT3_SB(sb); ++ es = EXT3_SB(sb)->s_es; ++ if (block < le32_to_cpu(es->s_first_data_block) || ++ block + count < block || ++ block + count > le32_to_cpu(es->s_blocks_count)) { ++ ext3_error (sb, __FUNCTION__, ++ "Freeing blocks not in datazone - " ++ "block = %lu, count = %lu", block, count); ++ goto error_return; ++ } ++ ++ ext3_debug("freeing block %lu\n", block); ++ ++ ac.ac_op = EXT3_MB_HISTORY_FREE; ++ ac.ac_inode = inode; ++ ac.ac_sb = sb; ++ ++do_more: ++ overflow = 0; ++ ext3_get_group_no_and_offset(sb, block, &block_group, &bit); ++ ++ /* ++ * Check to see if we are freeing blocks across a group ++ * boundary. ++ */ ++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { ++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); ++ count -= overflow; ++ } ++ brelse(bitmap_bh); ++ bitmap_bh = read_block_bitmap(sb, block_group); ++ if (!bitmap_bh) ++ goto error_return; ++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); ++ if (!gdp) ++ goto error_return; ++ ++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || ++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || ++ in_range (block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group) || ++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error(sb, __FUNCTION__, ++ "Freeing blocks in system zone - " ++ "Block = %lu, count = %lu", block, count); ++ ++ BUFFER_TRACE(bitmap_bh, "getting write access"); ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) ++ goto error_return; ++ ++ /* ++ * We are about to modify some metadata. Call the journal APIs ++ * to unshare ->b_data if a currently-committing transaction is ++ * using it ++ */ ++ BUFFER_TRACE(gd_bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, gd_bh); ++ if (err) ++ goto error_return; ++ ++ err = ext3_mb_load_buddy(sb, block_group, &e3b); ++ if (err) ++ goto error_return; ++ ++#ifdef AGGRESSIVE_CHECK ++ { ++ int i; ++ for (i = 0; i < count; i++) ++ BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); ++ } ++#endif ++ mb_clear_bits(bitmap_bh->b_data, bit, count); ++ ++ /* We dirtied the bitmap block */ ++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ ++ ac.ac_b_ex.fe_group = block_group; ++ ac.ac_b_ex.fe_start = bit; ++ ac.ac_b_ex.fe_len = count; ++ ext3_mb_store_history(&ac); ++ ++ if (metadata) { ++ /* blocks being freed are metadata. these blocks shouldn't ++ * be used until this transaction is committed */ ++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); ++ } else { ++ ext3_lock_group(sb, block_group); ++ err = mb_free_blocks(&e3b, bit, count); ++ ext3_mb_return_to_preallocation(inode, &e3b, block, count); ++ ext3_unlock_group(sb, block_group); ++ BUG_ON(err != 0); ++ } ++ ++ spin_lock(sb_bgl_lock(sbi, block_group)); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ spin_unlock(sb_bgl_lock(sbi, block_group)); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, count); ++ ++ ext3_mb_release_desc(&e3b); ++ ++ *freed += count; ++ ++ /* And the group descriptor block */ ++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); ++ ret = ext3_journal_dirty_metadata(handle, gd_bh); ++ if (!err) err = ret; ++ ++ if (overflow && !err) { ++ block += count; ++ count = overflow; ++ goto do_more; ++ } ++ sb->s_dirt = 1; ++error_return: ++ brelse(bitmap_bh); ++ ext3_std_error(sb, err); ++ return; ++} diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch b/lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch new file mode 100644 index 0000000..910df7c --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch @@ -0,0 +1,396 @@ +Index: linux-2.6.9-full/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_fs_i.h 2007-03-28 01:29:38.000000000 +0400 ++++ linux-2.6.9-full/include/linux/ext3_fs_i.h 2007-03-28 15:45:41.000000000 +0400 +@@ -130,6 +130,10 @@ struct ext3_inode_info { + struct inode vfs_inode; + + __u32 i_cached_extent[4]; ++ ++ /* mballoc */ ++ struct list_head i_prealloc_list; ++ spinlock_t i_prealloc_lock; + }; + + #endif /* _LINUX_EXT3_FS_I */ +Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2007-03-28 15:42:16.000000000 +0400 ++++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2007-03-28 15:45:41.000000000 +0400 +@@ -23,9 +23,16 @@ + #define EXT_INCLUDE + #include + #include ++#include + #endif + #endif + #include ++#include ++ ++struct ext3_buddy_group_blocks; ++struct ext3_locality_group; ++struct ext3_mb_history; ++#define EXT3_BB_MAX_BLOCKS + + /* + * third extended-fs super-block data in memory +Index: linux-2.6.9-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2007-03-28 15:45:07.000000000 +0400 ++++ linux-2.6.9-full/include/linux/ext3_fs.h 2007-03-28 15:45:41.000000000 +0400 +@@ -389,6 +389,7 @@ struct ext3_inode { + #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ + #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -749,8 +750,9 @@ struct dir_private_info { + extern int ext3_bg_has_super(struct super_block *sb, int group); + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); ++extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); + extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, +- unsigned long); ++ unsigned long, int); + extern void ext3_free_blocks_sb (handle_t *, struct super_block *, + unsigned long, unsigned long, int *); + extern unsigned long ext3_count_free_blocks (struct super_block *); +Index: linux-2.6.9-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/super.c 2007-03-28 15:42:16.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/super.c 2007-03-28 15:45:41.000000000 +0400 +@@ -600,6 +600,7 @@ enum { + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_extents, Opt_noextents, Opt_extdebug, ++ Opt_mballoc, Opt_nomballoc, Opt_stripe, + }; + + static match_table_t tokens = { +@@ -653,6 +654,9 @@ static match_table_t tokens = { + {Opt_noextents, "noextents"}, + {Opt_extdebug, "extdebug"}, + {Opt_barrier, "barrier=%u"}, ++ {Opt_mballoc, "mballoc"}, ++ {Opt_nomballoc, "nomballoc"}, ++ {Opt_stripe, "stripe=%u"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, + }; +@@ -965,6 +969,19 @@ clear_qf_name: + case Opt_extdebug: + set_opt (sbi->s_mount_opt, EXTDEBUG); + break; ++ case Opt_mballoc: ++ set_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_nomballoc: ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_stripe: ++ if (match_int(&args[0], &option)) ++ return 0; ++ if (option < 0) ++ return 0; ++ sbi->s_stripe = option; ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1654,6 +1671,7 @@ static int ext3_fill_super (struct super + ext3_count_dirs(sb)); + + ext3_ext_init(sb); ++ ext3_mb_init(sb, needs_recovery); + + return 0; + +Index: linux-2.6.9-full/fs/ext3/extents.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/extents.c 2007-03-28 01:29:41.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/extents.c 2007-03-28 15:45:41.000000000 +0400 +@@ -779,7 +779,7 @@ cleanup: + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; +- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); + } + } + kfree(ablocks); +@@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st + path->p_idx->ei_leaf); + bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); + ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); +- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); + return err; + } + +@@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); + struct buffer_head *bh; +- int i; ++ int i, metadata = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); ++ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) ++ metadata = 1; + if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { + /* tail removal */ + unsigned long num, start; +@@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t + bh = sb_find_get_block(tree->inode->i_sb, start + i); + ext3_forget(handle, 0, tree->inode, bh, start + i); + } +- ext3_free_blocks(handle, tree->inode, start, num); ++ ext3_free_blocks(handle, tree->inode, start, num, metadata); + } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, ex->ee_block, ex->ee_len); +@@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle, + struct ext3_extent *ex; + int goal, newblock, err = 0, depth; + struct ext3_extents_tree tree; +- unsigned long next; +- int allocated = 0; +- +- /* until we have multiblock allocation */ +- max_blocks = 1; ++ unsigned long allocated = 0; ++ struct ext3_allocation_request ar; + + clear_buffer_new(bh_result); + ext3_init_tree_desc(&tree, inode); +@@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle, + goto out2; + } + ++ /* find neighbour allocated blocks */ ++ ar.lleft = iblock; ++ err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); ++ if (err) ++ goto out2; ++ ar.lright = iblock; ++ err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); ++ if (err) ++ goto out2; ++ + /* find next allocated block so that we know how many + * blocks we can allocate without ovelapping next extent */ +- EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); +- next = ext3_ext_next_allocated_block(path); +- EXT_ASSERT(next > iblock); +- allocated = next - iblock; ++ EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); ++ if (ar.pright == 0) ++ allocated = EXT_MAX_BLOCK - iblock; ++ else ++ allocated = ar.lright - iblock; + if (allocated > max_blocks) + allocated = max_blocks; + + /* allocate new block */ +- goal = ext3_ext_find_goal(inode, path, iblock); +- newblock = ext3_new_block(handle, inode, goal, &err); ++ ar.inode = inode; ++ ar.goal = ext3_ext_find_goal(inode, path, iblock); ++ ar.logical = iblock; ++ ar.len = allocated; ++ ar.flags = EXT3_MB_HINT_DATA; ++ newblock = ext3_mb_new_blocks(handle, &ar, &err); + if (!newblock) + goto out2; + ext_debug(&tree, "allocate new block: goal %d, found %d\n", +@@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle, + newex.ee_block = iblock; + newex.ee_start = newblock; + newex.ee_start_hi = 0; +- newex.ee_len = 1; ++ newex.ee_len = ar.len; + err = ext3_ext_insert_extent(handle, &tree, path, &newex); + if (err) { + /* free data blocks we just allocated */ +- ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); ++ /* not a good idea to call discard here directly, ++ * but otherwise we'd need to call it every free() */ ++ ext3_mb_discard_inode_preallocations(inode); ++ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); + goto out2; + } + +@@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle, + + /* previous routine could use block we allocated */ + newblock = newex.ee_start; ++ allocated = newex.ee_len; + set_buffer_new(bh_result); + + ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, +@@ -2339,6 +2357,9 @@ void ext3_ext_truncate(struct inode * in + down(&EXT3_I(inode)->truncate_sem); + ext3_ext_invalidate_cache(&tree); + ++ /* it's important to discard preallocations under truncate_sem */ ++ ext3_mb_discard_inode_preallocations(inode); ++ + /* + * TODO: optimization is possible here + * probably we need not scaning at all, +Index: linux-2.6.9-full/fs/ext3/Makefile +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/Makefile 2007-03-28 01:29:38.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/Makefile 2007-03-28 15:45:41.000000000 +0400 +@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o hash.o resize.o \ +- extents.o ++ extents.o mballoc.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.9-full/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-05-18 23:57:04.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/xattr.c 2007-03-28 15:45:41.000000000 +0400 +@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle, + new_bh = sb_getblk(sb, block); + if (!new_bh) { + getblk_failed: +- ext3_free_blocks(handle, inode, block, 1); ++ ext3_free_blocks(handle, inode, block, 1, 1); + error = -EIO; + goto cleanup; + } +@@ -1328,7 +1328,7 @@ getblk_failed: + if (ce) + mb_cache_entry_free(ce); + ea_bdebug(old_bh, "freeing"); +- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); ++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); + + /* ext3_forget() calls bforget() for us, but we + let our caller release old_bh, so we need to +@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle + if (HDR(bh)->h_refcount == cpu_to_le32(1)) { + if (ce) + mb_cache_entry_free(ce); +- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); ++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); + get_bh(bh); + ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); + } else { +Index: linux-2.6.9-full/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/balloc.c 2006-03-10 18:20:03.000000000 +0300 ++++ linux-2.6.9-full/fs/ext3/balloc.c 2007-03-28 15:45:41.000000000 +0400 +@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ + * + * Return buffer_head on success or NULL in case of failure. + */ +-static struct buffer_head * ++struct buffer_head * + read_block_bitmap(struct super_block *sb, unsigned int block_group) + { + struct ext3_group_desc * desc; +@@ -267,6 +267,8 @@ void ext3_discard_reservation(struct ino + struct reserve_window_node *rsv = &ei->i_rsv_window; + spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + ++ ext3_mb_discard_inode_preallocations(inode); ++ + if (!rsv_is_empty(&rsv->rsv_window)) { + spin_lock(rsv_lock); + if (!rsv_is_empty(&rsv->rsv_window)) +@@ -451,21 +453,25 @@ error_return: + return; + } + +-/* Free given blocks, update quota and i_blocks field */ +-void ext3_free_blocks(handle_t *handle, struct inode *inode, +- unsigned long block, unsigned long count) ++void ext3_free_blocks(handle_t *handle, struct inode * inode, ++ unsigned long block, unsigned long count, int metadata) + { +- struct super_block * sb; +- int dquot_freed_blocks; ++ struct super_block *sb; ++ int freed; ++ ++ /* this isn't the right place to decide whether block is metadata ++ * inode.c/extents.c knows better, but for safety ... */ ++ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || ++ ext3_should_journal_data(inode)) ++ metadata = 1; + + sb = inode->i_sb; +- if (!sb) { +- printk ("ext3_free_blocks: nonexistent device"); +- return; +- } +- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); +- if (dquot_freed_blocks) +- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) ++ ext3_free_blocks_sb(handle, sb, block, count, &freed); ++ else ++ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); ++ if (freed) ++ DQUOT_FREE_BLOCK(inode, freed); + return; + } + +@@ -1131,7 +1137,7 @@ int ext3_should_retry_alloc(struct super + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +-int ext3_new_block(handle_t *handle, struct inode *inode, ++int ext3_new_block_old(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) + { + struct buffer_head *bitmap_bh = NULL; +Index: linux-2.6.9-full/fs/ext3/inode.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/inode.c 2007-03-28 01:29:39.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/inode.c 2007-03-28 15:45:41.000000000 +0400 +@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h + ext3_journal_forget(handle, branch[i].bh); + } + for (i = 0; i < keys; i++) +- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); ++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); + return err; + } + +@@ -673,7 +673,7 @@ err_out: + if (err == -EAGAIN) + for (i = 0; i < num; i++) + ext3_free_blocks(handle, inode, +- le32_to_cpu(where[i].key), 1); ++ le32_to_cpu(where[i].key), 1, 0); + return err; + } + +@@ -1834,7 +1834,7 @@ ext3_clear_blocks(handle_t *handle, stru + } + } + +- ext3_free_blocks(handle, inode, block_to_free, count); ++ ext3_free_blocks(handle, inode, block_to_free, count, 0); + } + + /** +@@ -2007,7 +2007,7 @@ static void ext3_free_branches(handle_t + ext3_journal_test_restart(handle, inode); + } + +- ext3_free_blocks(handle, inode, nr, 1); ++ ext3_free_blocks(handle, inode, nr, 1, 1); + + if (parent_bh) { + /* diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch b/lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch new file mode 100644 index 0000000..373f0c6 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch @@ -0,0 +1,377 @@ +Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_i.h 2007-03-28 05:12:50.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h 2007-03-28 16:03:20.000000000 +0400 +@@ -135,6 +135,10 @@ struct ext3_inode_info { + struct inode vfs_inode; + + __u32 i_cached_extent[4]; ++ ++ /* mballoc */ ++ struct list_head i_prealloc_list; ++ spinlock_t i_prealloc_lock; + }; + + #endif /* _LINUX_EXT3_FS_I */ +Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_sb.h 2007-03-28 16:03:19.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_sb.h 2007-03-28 16:03:20.000000000 +0400 +@@ -21,8 +21,15 @@ + #include + #include + #include ++#include + #endif + #include ++#include ++ ++struct ext3_buddy_group_blocks; ++struct ext3_locality_group; ++struct ext3_mb_history; ++#define EXT3_BB_MAX_BLOCKS + + /* + * third extended-fs super-block data in memory +Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs.h 2007-03-28 16:03:19.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h 2007-03-28 16:03:20.000000000 +0400 +@@ -407,6 +407,7 @@ struct ext3_inode { + #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ + #define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x4000000/* Buddy allocation support */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -767,8 +768,9 @@ struct dir_private_info { + extern int ext3_bg_has_super(struct super_block *sb, int group); + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); ++extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); + extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, +- unsigned long); ++ unsigned long, int); + extern void ext3_free_blocks_sb (handle_t *, struct super_block *, + unsigned long, unsigned long, int *); + extern unsigned long ext3_count_free_blocks (struct super_block *); +Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c 2007-03-28 16:03:19.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/super.c 2007-03-28 16:03:20.000000000 +0400 +@@ -688,6 +688,7 @@ enum { + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_extents, Opt_noextents, Opt_extdebug, ++ Opt_mballoc, Opt_nomballoc, Opt_stripe, + Opt_grpquota + }; + +@@ -743,6 +744,9 @@ static match_table_t tokens = { + {Opt_noextents, "noextents"}, + {Opt_extdebug, "extdebug"}, + {Opt_barrier, "barrier=%u"}, ++ {Opt_mballoc, "mballoc"}, ++ {Opt_nomballoc, "nomballoc"}, ++ {Opt_stripe, "stripe=%u"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, + }; +@@ -1092,6 +1096,19 @@ clear_qf_name: + case Opt_extdebug: + set_opt (sbi->s_mount_opt, EXTDEBUG); + break; ++ case Opt_mballoc: ++ set_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_nomballoc: ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_stripe: ++ if (match_int(&args[0], &option)) ++ return 0; ++ if (option < 0) ++ return 0; ++ sbi->s_stripe = option; ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1819,6 +1836,7 @@ static int ext3_fill_super (struct super + ext3_count_dirs(sb)); + + ext3_ext_init(sb); ++ ext3_mb_init(sb, needs_recovery); + lock_kernel(); + return 0; + +Index: linux-2.6.16.27-0.9-full/fs/ext3/extents.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/extents.c 2007-03-28 05:13:39.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/extents.c 2007-03-28 16:03:20.000000000 +0400 +@@ -779,7 +779,7 @@ cleanup: + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; +- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); + } + } + kfree(ablocks); +@@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st + path->p_idx->ei_leaf); + bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); + ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); +- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); + return err; + } + +@@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); + struct buffer_head *bh; +- int i; ++ int i, metadata = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); ++ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) ++ metadata = 1; + if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { + /* tail removal */ + unsigned long num, start; +@@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t + bh = sb_find_get_block(tree->inode->i_sb, start + i); + ext3_forget(handle, 0, tree->inode, bh, start + i); + } +- ext3_free_blocks(handle, tree->inode, start, num); ++ ext3_free_blocks(handle, tree->inode, start, num, metadata); + } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, ex->ee_block, ex->ee_len); +@@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle, + struct ext3_extent *ex; + int goal, newblock, err = 0, depth; + struct ext3_extents_tree tree; +- unsigned long next; +- int allocated = 0; +- +- /* until we have multiblock allocation */ +- max_blocks = 1; ++ unsigned long allocated = 0; ++ struct ext3_allocation_request ar; + + clear_buffer_new(bh_result); + ext3_init_tree_desc(&tree, inode); +@@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle, + goto out2; + } + ++ /* find neighbour allocated blocks */ ++ ar.lleft = iblock; ++ err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); ++ if (err) ++ goto out2; ++ ar.lright = iblock; ++ err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); ++ if (err) ++ goto out2; ++ + /* find next allocated block so that we know how many + * blocks we can allocate without ovelapping next extent */ +- EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); +- next = ext3_ext_next_allocated_block(path); +- EXT_ASSERT(next > iblock); +- allocated = next - iblock; ++ EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); ++ if (ar.pright == 0) ++ allocated = EXT_MAX_BLOCK - iblock; ++ else ++ allocated = ar.lright - iblock; + if (allocated > max_blocks) + allocated = max_blocks; + + /* allocate new block */ +- goal = ext3_ext_find_goal(inode, path, iblock); +- newblock = ext3_new_block(handle, inode, goal, &err); ++ ar.inode = inode; ++ ar.goal = ext3_ext_find_goal(inode, path, iblock); ++ ar.logical = iblock; ++ ar.len = allocated; ++ ar.flags = EXT3_MB_HINT_DATA; ++ newblock = ext3_mb_new_blocks(handle, &ar, &err); + if (!newblock) + goto out2; + ext_debug(&tree, "allocate new block: goal %d, found %d\n", +@@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle, + newex.ee_block = iblock; + newex.ee_start = newblock; + newex.ee_start_hi = 0; +- newex.ee_len = 1; ++ newex.ee_len = ar.len; + err = ext3_ext_insert_extent(handle, &tree, path, &newex); + if (err) { + /* free data blocks we just allocated */ +- ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); ++ /* not a good idea to call discard here directly, ++ * but otherwise we'd need to call it every free() */ ++ ext3_mb_discard_inode_preallocations(inode); ++ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); + goto out2; + } + +@@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle, + + /* previous routine could use block we allocated */ + newblock = newex.ee_start; ++ allocated = newex.ee_len; + set_buffer_new(bh_result); + + ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, +@@ -2339,6 +2357,9 @@ void ext3_ext_truncate(struct inode * in + down(&EXT3_I(inode)->truncate_sem); + ext3_ext_invalidate_cache(&tree); + ++ /* it's important to discard preallocations under truncate_sem */ ++ ext3_mb_discard_inode_preallocations(inode); ++ + /* + * TODO: optimization is possible here + * probably we need not scaning at all, +Index: linux-2.6.16.27-0.9-full/fs/ext3/Makefile +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/Makefile 2007-03-28 05:12:50.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/Makefile 2007-03-28 16:03:20.000000000 +0400 +@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o hash.o resize.o \ +- extents.o ++ extents.o mballoc.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.16.27-0.9-full/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/xattr.c 2007-03-13 02:56:52.000000000 +0300 ++++ linux-2.6.16.27-0.9-full/fs/ext3/xattr.c 2007-03-28 16:03:20.000000000 +0400 +@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl + ea_bdebug(bh, "refcount now=0; freeing"); + if (ce) + mb_cache_entry_free(ce); +- ext3_free_blocks(handle, inode, bh->b_blocknr, 1); ++ ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); + get_bh(bh); + ext3_forget(handle, 1, inode, bh, bh->b_blocknr); + } else { +@@ -804,7 +804,7 @@ inserted: + new_bh = sb_getblk(sb, block); + if (!new_bh) { + getblk_failed: +- ext3_free_blocks(handle, inode, block, 1); ++ ext3_free_blocks(handle, inode, block, 1, 1); + error = -EIO; + goto cleanup; + } +Index: linux-2.6.16.27-0.9-full/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/balloc.c 2007-03-13 02:56:52.000000000 +0300 ++++ linux-2.6.16.27-0.9-full/fs/ext3/balloc.c 2007-03-28 16:03:20.000000000 +0400 +@@ -80,7 +80,7 @@ struct ext3_group_desc * ext3_get_group_ + * + * Return buffer_head on success or NULL in case of failure. + */ +-static struct buffer_head * ++struct buffer_head * + read_block_bitmap(struct super_block *sb, unsigned int block_group) + { + struct ext3_group_desc * desc; +@@ -296,6 +296,8 @@ void ext3_discard_reservation(struct ino + struct ext3_reserve_window_node *rsv; + spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + ++ ext3_mb_discard_inode_preallocations(inode); ++ + if (!block_i) + return; + +@@ -491,21 +493,25 @@ error_return: + return; + } + +-/* Free given blocks, update quota and i_blocks field */ +-void ext3_free_blocks(handle_t *handle, struct inode *inode, +- unsigned long block, unsigned long count) ++void ext3_free_blocks(handle_t *handle, struct inode * inode, ++ unsigned long block, unsigned long count, int metadata) + { +- struct super_block * sb; +- int dquot_freed_blocks; ++ struct super_block *sb; ++ int freed; ++ ++ /* this isn't the right place to decide whether block is metadata ++ * inode.c/extents.c knows better, but for safety ... */ ++ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || ++ ext3_should_journal_data(inode)) ++ metadata = 1; + + sb = inode->i_sb; +- if (!sb) { +- printk ("ext3_free_blocks: nonexistent device"); +- return; +- } +- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); +- if (dquot_freed_blocks) +- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) ++ ext3_free_blocks_sb(handle, sb, block, count, &freed); ++ else ++ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); ++ if (freed) ++ DQUOT_FREE_BLOCK(inode, freed); + return; + } + +@@ -1154,7 +1160,7 @@ int ext3_should_retry_alloc(struct super + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +-int ext3_new_block(handle_t *handle, struct inode *inode, ++int ext3_new_block_old(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) + { + struct buffer_head *bitmap_bh = NULL; +Index: linux-2.6.16.27-0.9-full/fs/ext3/inode.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/inode.c 2007-03-28 05:13:38.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/inode.c 2007-03-28 16:03:20.000000000 +0400 +@@ -568,7 +568,7 @@ static int ext3_alloc_branch(handle_t *h + ext3_journal_forget(handle, branch[i].bh); + } + for (i = 0; i < keys; i++) +- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); ++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); + return err; + } + +@@ -1865,7 +1865,7 @@ ext3_clear_blocks(handle_t *handle, stru + } + } + +- ext3_free_blocks(handle, inode, block_to_free, count); ++ ext3_free_blocks(handle, inode, block_to_free, count, 0); + } + + /** +@@ -2038,7 +2038,7 @@ static void ext3_free_branches(handle_t + ext3_journal_test_restart(handle, inode); + } + +- ext3_free_blocks(handle, inode, nr, 1); ++ ext3_free_blocks(handle, inode, nr, 1, 1); + + if (parent_bh) { + /* diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc3-suse.patch new file mode 100644 index 0000000..dd07148 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-mballoc3-suse.patch @@ -0,0 +1,397 @@ +Index: linux-2.6.5-7.283-full/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs_i.h 2007-03-28 02:13:37.000000000 +0400 ++++ linux-2.6.5-7.283-full/include/linux/ext3_fs_i.h 2007-03-28 15:46:02.000000000 +0400 +@@ -131,6 +131,10 @@ struct ext3_inode_info { + struct inode vfs_inode; + + struct ext3_ext_cache i_cached_extent; ++ ++ /* mballoc */ ++ struct list_head i_prealloc_list; ++ spinlock_t i_prealloc_lock; + }; + + #endif /* _LINUX_EXT3_FS_I */ +Index: linux-2.6.5-7.283-full/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs_sb.h 2007-03-28 15:46:00.000000000 +0400 ++++ linux-2.6.5-7.283-full/include/linux/ext3_fs_sb.h 2007-03-28 15:46:02.000000000 +0400 +@@ -23,9 +23,16 @@ + #define EXT_INCLUDE + #include + #include ++#include + #endif + #endif + #include ++#include ++ ++struct ext3_buddy_group_blocks; ++struct ext3_locality_group; ++struct ext3_mb_history; ++#define EXT3_BB_MAX_BLOCKS + + /* + * third extended-fs super-block data in memory +Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 15:46:00.000000000 +0400 ++++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h 2007-03-28 15:46:02.000000000 +0400 +@@ -363,6 +363,7 @@ struct ext3_inode { + #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ + #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -723,8 +724,9 @@ struct dir_private_info { + extern int ext3_bg_has_super(struct super_block *sb, int group); + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); ++extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); + extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, +- unsigned long); ++ unsigned long, int); + extern unsigned long ext3_count_free_blocks (struct super_block *); + extern void ext3_check_blocks_bitmap (struct super_block *); + extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, +Index: linux-2.6.5-7.283-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/super.c 2007-03-28 15:46:00.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/super.c 2007-03-28 15:46:02.000000000 +0400 +@@ -622,6 +622,7 @@ enum { + Opt_err, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_extents, Opt_noextents, Opt_extdebug, ++ Opt_mballoc, Opt_nomballoc, Opt_stripe, + }; + + static match_table_t tokens = { +@@ -669,6 +670,9 @@ static match_table_t tokens = { + {Opt_noextents, "noextents"}, + {Opt_extdebug, "extdebug"}, + {Opt_barrier, "barrier=%u"}, ++ {Opt_mballoc, "mballoc"}, ++ {Opt_nomballoc, "nomballoc"}, ++ {Opt_stripe, "stripe=%u"}, + {Opt_err, NULL} + }; + +@@ -893,6 +897,19 @@ static int parse_options (char * options + case Opt_extdebug: + set_opt (sbi->s_mount_opt, EXTDEBUG); + break; ++ case Opt_mballoc: ++ set_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_nomballoc: ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_stripe: ++ if (match_int(&args[0], &option)) ++ return 0; ++ if (option < 0) ++ return 0; ++ sbi->s_stripe = option; ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1548,6 +1565,7 @@ static int ext3_fill_super (struct super + ext3_count_dirs(sb)); + + ext3_ext_init(sb); ++ ext3_mb_init(sb, needs_recovery); + + return 0; + +Index: linux-2.6.5-7.283-full/fs/ext3/extents.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/extents.c 2007-03-28 03:18:19.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/extents.c 2007-03-28 15:46:02.000000000 +0400 +@@ -779,7 +779,7 @@ cleanup: + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; +- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); + } + } + kfree(ablocks); +@@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st + path->p_idx->ei_leaf); + bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); + ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); +- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); + return err; + } + +@@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); + struct buffer_head *bh; +- int i; ++ int i, metadata = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); ++ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) ++ metadata = 1; + if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { + /* tail removal */ + unsigned long num, start; +@@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t + bh = sb_find_get_block(tree->inode->i_sb, start + i); + ext3_forget(handle, 0, tree->inode, bh, start + i); + } +- ext3_free_blocks(handle, tree->inode, start, num); ++ ext3_free_blocks(handle, tree->inode, start, num, metadata); + } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, ex->ee_block, ex->ee_len); +@@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle, + struct ext3_extent *ex; + int goal, newblock, err = 0, depth; + struct ext3_extents_tree tree; +- unsigned long next; +- int allocated = 0; +- +- /* until we have multiblock allocation */ +- max_blocks = 1; ++ unsigned long allocated = 0; ++ struct ext3_allocation_request ar; + + __clear_bit(BH_New, &bh_result->b_state); + ext3_init_tree_desc(&tree, inode); +@@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle, + goto out2; + } + ++ /* find neighbour allocated blocks */ ++ ar.lleft = iblock; ++ err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); ++ if (err) ++ goto out2; ++ ar.lright = iblock; ++ err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); ++ if (err) ++ goto out2; ++ + /* find next allocated block so that we know how many + * blocks we can allocate without ovelapping next extent */ +- EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); +- next = ext3_ext_next_allocated_block(path); +- EXT_ASSERT(next > iblock); +- allocated = next - iblock; ++ EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); ++ if (ar.pright == 0) ++ allocated = EXT_MAX_BLOCK - iblock; ++ else ++ allocated = ar.lright - iblock; + if (allocated > max_blocks) + allocated = max_blocks; + + /* allocate new block */ +- goal = ext3_ext_find_goal(inode, path, iblock); +- newblock = ext3_new_block(handle, inode, goal, &err); ++ ar.inode = inode; ++ ar.goal = ext3_ext_find_goal(inode, path, iblock); ++ ar.logical = iblock; ++ ar.len = allocated; ++ ar.flags = EXT3_MB_HINT_DATA; ++ newblock = ext3_mb_new_blocks(handle, &ar, &err); + if (!newblock) + goto out2; + ext_debug(&tree, "allocate new block: goal %d, found %d\n", +@@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle, + newex.ee_block = iblock; + newex.ee_start = newblock; + newex.ee_start_hi = 0; +- newex.ee_len = 1; ++ newex.ee_len = ar.len; + err = ext3_ext_insert_extent(handle, &tree, path, &newex); + if (err) { + /* free data blocks we just allocated */ +- ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); ++ /* not a good idea to call discard here directly, ++ * but otherwise we'd need to call it every free() */ ++ ext3_mb_discard_inode_preallocations(inode); ++ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); + goto out2; + } + +@@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle, + + /* previous routine could use block we allocated */ + newblock = newex.ee_start; ++ allocated = newex.ee_len; + __set_bit(BH_New, &bh_result->b_state); + + ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, +@@ -2341,6 +2359,9 @@ void ext3_ext_truncate(struct inode * in + down(&EXT3_I(inode)->truncate_sem); + ext3_ext_invalidate_cache(&tree); + ++ /* it's important to discard preallocations under truncate_sem */ ++ ext3_mb_discard_inode_preallocations(inode); ++ + /* + * TODO: optimization is possible here + * probably we need not scaning at all, +Index: linux-2.6.5-7.283-full/fs/ext3/Makefile +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/Makefile 2007-03-28 15:27:39.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/Makefile 2007-03-28 15:46:02.000000000 +0400 +@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o hash.o \ +- extents.o ++ extents.o mballoc.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.5-7.283-full/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/xattr.c 2007-03-28 02:13:37.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/xattr.c 2007-03-28 15:46:02.000000000 +0400 +@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle, + new_bh = sb_getblk(sb, block); + if (!new_bh) { + getblk_failed: +- ext3_free_blocks(handle, inode, block, 1); ++ ext3_free_blocks(handle, inode, block, 1, 1); + error = -EIO; + goto cleanup; + } +@@ -1411,7 +1411,7 @@ getblk_failed: + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { + /* Free the old block. */ + ea_bdebug(old_bh, "freeing"); +- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); ++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); + + /* ext3_forget() calls bforget() for us, but we + let our caller release old_bh, so we need to +@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle + mb_cache_entry_free(ce); + ce = NULL; + } +- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); ++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); + get_bh(bh); + ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); + } else { +Index: linux-2.6.5-7.283-full/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/balloc.c 2006-12-01 18:39:48.000000000 +0300 ++++ linux-2.6.5-7.283-full/fs/ext3/balloc.c 2007-03-28 15:46:02.000000000 +0400 +@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_ + * + * Return buffer_head on success or NULL in case of failure. + */ +-static struct buffer_head * ++struct buffer_head * + read_block_bitmap(struct super_block *sb, unsigned int block_group) + { + struct ext3_group_desc * desc; +@@ -266,6 +266,8 @@ void ext3_discard_reservation(struct ino + struct reserve_window_node *rsv = &ei->i_rsv_window; + spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + ++ ext3_mb_discard_inode_preallocations(inode); ++ + if (!rsv_is_empty(&rsv->rsv_window)) { + spin_lock(rsv_lock); + rsv_window_remove(inode->i_sb, rsv); +@@ -274,7 +276,7 @@ void ext3_discard_reservation(struct ino + } + + /* Free given blocks, update quota and i_blocks field */ +-void ext3_free_blocks(handle_t *handle, struct inode *inode, ++void ext3_free_blocks_old(handle_t *handle, struct inode *inode, + unsigned long block, unsigned long count) + { + struct buffer_head *bitmap_bh = NULL; +@@ -456,6 +458,29 @@ error_return: + return; + } + ++void ext3_free_blocks(handle_t *handle, struct inode * inode, ++ unsigned long block, unsigned long count, int metadata) ++{ ++ struct super_block *sb; ++ int freed; ++ ++ /* this isn't the right place to decide whether block is metadata ++ * inode.c/extents.c knows better, but for safety ... */ ++ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || ++ ext3_should_journal_data(inode)) ++ metadata = 1; ++ ++ sb = inode->i_sb; ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) ++ ext3_free_blocks_old(handle, inode, block, count); ++ else { ++ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); ++ if (freed) ++ DQUOT_FREE_BLOCK(inode, freed); ++ } ++ return; ++} ++ + /* + * For ext3 allocations, we must not reuse any blocks which are + * allocated in the bitmap buffer's "last committed data" copy. This +@@ -1142,7 +1167,7 @@ int ext3_should_retry_alloc(struct super + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +-int ext3_new_block(handle_t *handle, struct inode *inode, ++int ext3_new_block_old(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) + { + struct buffer_head *bitmap_bh = NULL; +Index: linux-2.6.5-7.283-full/fs/ext3/inode.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/inode.c 2007-03-28 02:50:19.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/inode.c 2007-03-28 15:46:02.000000000 +0400 +@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h + ext3_journal_forget(handle, branch[i].bh); + } + for (i = 0; i < keys; i++) +- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); ++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); + return err; + } + +@@ -675,7 +675,7 @@ err_out: + if (err == -EAGAIN) + for (i = 0; i < num; i++) + ext3_free_blocks(handle, inode, +- le32_to_cpu(where[i].key), 1); ++ le32_to_cpu(where[i].key), 1, 0); + return err; + } + +@@ -1839,7 +1839,7 @@ ext3_clear_blocks(handle_t *handle, stru + } + } + +- ext3_free_blocks(handle, inode, block_to_free, count); ++ ext3_free_blocks(handle, inode, block_to_free, count, 0); + } + + /** +@@ -2010,7 +2010,7 @@ static void ext3_free_branches(handle_t + ext3_journal_test_restart(handle, inode); + } + +- ext3_free_blocks(handle, inode, nr, 1); ++ ext3_free_blocks(handle, inode, nr, 1, 1); + + if (parent_bh) { + /* diff --git a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch b/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch deleted file mode 100644 index 0c41b47..0000000 --- a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch +++ /dev/null @@ -1,381 +0,0 @@ -Index: mmp/fs/ext3/al.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ mmp/fs/ext3/al.h 2006-07-24 10:39:26.000000000 +0800 -@@ -0,0 +1,11 @@ -+/* -+ * (C) 2006 Qi Yong -+ */ -+ -+#define ALIVE_MAGIC 0xA1153C29 -+struct alive_struct { -+ __le32 al_magic; -+ __le32 al_seq; -+ __le32 al_time; -+ char al_nodename[65]; -+}; -Index: mmp/fs/ext3/namei.c -=================================================================== ---- mmp.orig/fs/ext3/namei.c 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/fs/ext3/namei.c 2006-07-24 10:39:26.000000000 +0800 -@@ -805,7 +805,7 @@ static inline int search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ --static struct buffer_head * ext3_find_entry (struct dentry *dentry, -+struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { - struct super_block * sb; -Index: mmp/fs/ext3/super.c -=================================================================== ---- mmp.orig/fs/ext3/super.c 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/fs/ext3/super.c 2006-07-24 10:45:19.000000000 +0800 -@@ -36,12 +36,14 @@ - #include - #include - #include -+#include - - #include - - #include "xattr.h" - #include "acl.h" - #include "namei.h" -+#include "al.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *, - unsigned long journal_devnum); -@@ -62,6 +64,8 @@ static int ext3_statfs (struct super_blo - static void ext3_unlockfs(struct super_block *sb); - static void ext3_write_super (struct super_block * sb); - static void ext3_write_super_lockfs(struct super_block *sb); -+struct buffer_head * ext3_find_entry (struct dentry *dentry, -+ struct ext3_dir_entry_2 ** res_dir); - - /* - * Wrappers for journal_start/end. -@@ -435,6 +439,9 @@ static void ext3_put_super (struct super - invalidate_bdev(sbi->journal_bdev, 0); - ext3_blkdev_remove(sbi); - } -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); -+ - sb->s_fs_info = NULL; - kfree(sbi); - return; -@@ -1369,6 +1376,261 @@ static unsigned long descriptor_loc(stru - return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); - } - -+static int write_alive(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_write_sync; -+ get_bh(bh); -+ submit_bh(WRITE, bh); -+ wait_on_buffer(bh); -+ if (unlikely(!buffer_uptodate(bh))) -+ return 1; -+ return 0; -+} -+ -+static int read_alive_again(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_read_sync; -+ get_bh(bh); -+ submit_bh(READ, bh); -+ wait_on_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ brelse(bh); -+ return 1; -+ } -+ return 0; -+} -+ -+/* -+ * The caller must have a ref on the buffer_head. -+ */ -+static int kalived(void *data) -+{ -+ struct buffer_head * bh; -+ struct alive_struct * alive; -+ char b[BDEVNAME_SIZE]; -+ u32 seq = 0; -+ -+ bh = (struct buffer_head *)data; -+ bdevname(bh->b_bdev, b); -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ alive->al_magic = cpu_to_le32(ALIVE_MAGIC); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ down_read(&uts_sem); -+ memcpy(alive->al_nodename, system_utsname.nodename, 65); -+ up_read(&uts_sem); -+ -+ while (!kthread_should_stop()) { -+ if (++seq == 0) -+ ++seq; -+ -+ alive->al_seq = cpu_to_le32(seq); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) { -+ /* panic here? */ -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", b); -+ continue; -+ } -+ -+ schedule_timeout_interruptible(5 * HZ); -+ } -+ -+ alive->al_seq = 0; -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) -+ printk(KERN_ERR "Alive (device %s): " -+ "can't reset alive block\n", b); -+ brelse(bh); -+ return 0; -+} -+ -+static unsigned long get_alive_ino(struct super_block *sb) -+{ -+ unsigned long ino = 0; -+ struct dentry alive; -+ struct dentry * root; -+ struct inode * root_inode; -+ struct ext3_dir_entry_2 * de; -+ struct buffer_head * bh; -+ -+ root_inode = iget(sb, EXT3_ROOT_INO); -+ root = d_alloc_root(root_inode); -+ if (!root) { -+ printk(KERN_ERR "Alive (device %s): get root inode failed\n", -+ sb->s_id); -+ iput(root_inode); -+ goto out; -+ } -+ -+ alive.d_name.name = ".alive"; -+ alive.d_name.len = 6; -+ alive.d_parent = root; -+ -+ bh = ext3_find_entry(&alive, &de); -+ dput(root); -+ -+ if (!bh) { -+ printk(KERN_WARNING "Alive (device %s): alive lookup failed\n", -+ sb->s_id); -+ goto out; -+ } -+ -+ ino = le32_to_cpu(de->inode); -+ brelse (bh); -+ pr_debug("Alive (device %s): alive_ino=%lu\n", sb->s_id, ino); -+out: -+ return ino; -+} -+ -+/* check alive file */ -+static int check_alive(struct super_block *sb, struct ext3_sb_info *sbi) -+{ -+ unsigned long ino; -+ struct buffer_head * bh; -+ struct ext3_inode_info * ei; -+ struct inode * alive_inode; -+ struct alive_struct * alive; -+ u32 alive_block; -+ u32 seq; -+ -+ ino = get_alive_ino(sb); -+ if (!ino) -+ goto failed; -+ -+ alive_inode = iget(sb, ino); -+ if (!alive_inode) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): get alive inode failed\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!alive_inode->i_nlink) { -+ make_bad_inode(alive_inode); -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): alive inode is deleted\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!S_ISREG(alive_inode->i_mode)) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (EXT3_I(alive_inode)->i_flags & EXT3_EXTENTS_FL) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode, " -+ "in extents format\n", sb->s_id); -+ goto failed; -+ } -+ -+ ei = EXT3_I(alive_inode); -+ alive_block = ei->i_data[0]; -+ iput(alive_inode); -+ -+ pr_debug("Alive (device %s): read in alive block #%u\n", -+ sb->s_id, alive_block); -+ -+ /* first read */ -+ bh = sb_bread(sb, alive_block); -+ if (!bh) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ if (le32_to_cpu(alive->al_magic) != ALIVE_MAGIC) { -+ printk(KERN_ERR "Alive (device %s): " -+ "magic mismatch\n", sb->s_id); -+ brelse(bh); -+ goto failed; -+ } -+ -+ seq = le32_to_cpu(alive->al_seq); -+ pr_debug("Alive (device %s): seq=%u\n", sb->s_id, seq); -+ pr_info ("Alive (device %s): last touched by node: %s, " -+ "%li seconds ago\n", sb->s_id, alive->al_nodename, -+ get_seconds() - le32_to_cpu(alive->al_time)); -+ -+ if (seq == 0) -+ goto skip; -+ -+ /* wait 8s */ -+ pr_info("Alive (device %s): wait for 8 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 8); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+skip: -+ /* write a new random seq */ -+ get_random_bytes(&seq, sizeof(u32)); -+ alive->al_seq = cpu_to_le32(seq); -+ if (unlikely(write_alive(bh))) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", sb->s_id); -+ goto failed; -+ } -+ pr_debug("Alive (device %s): write random seq=%u\n", sb->s_id, seq); -+ -+ /* wait 6s */ -+ pr_info("Alive (device %s): wait for 6 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 6); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+ -+ /* succeed */ -+ pr_info("Alive (device %s): alive check passed!\n", sb->s_id); -+ sbi->s_alive_tsk = kthread_run(kalived, bh, "kalived"); -+ return 0; -+ -+failed: -+ printk(KERN_WARNING "Alive (device %s): alive check failed!\n", -+ sb->s_id); -+ return 1; -+} -+ - - static int ext3_fill_super (struct super_block *sb, void *data, int silent) - { -@@ -1668,6 +1930,10 @@ static int ext3_fill_super (struct super - EXT3_HAS_INCOMPAT_FEATURE(sb, - EXT3_FEATURE_INCOMPAT_RECOVER)); - -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_ALIVE)) -+ if (check_alive(sb, sbi)) -+ goto failed_mount2; -+ - /* - * The first inode we look at is the journal inode. Don't try - * root first: it may be modified in the journal! -@@ -1785,6 +2051,8 @@ cantfind_ext3: - - failed_mount3: - journal_destroy(sbi->s_journal); -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); - failed_mount2: - for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); -Index: mmp/include/linux/ext3_fs.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs.h 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/include/linux/ext3_fs.h 2006-07-24 10:39:26.000000000 +0800 -@@ -581,12 +581,14 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 - #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ -+#define EXT3_FEATURE_INCOMPAT_ALIVE 0x0080 - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ - EXT3_FEATURE_INCOMPAT_META_BG| \ -- EXT3_FEATURE_INCOMPAT_EXTENTS) -+ EXT3_FEATURE_INCOMPAT_EXTENTS| \ -+ EXT3_FEATURE_INCOMPAT_ALIVE) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -Index: mmp/include/linux/ext3_fs_sb.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs_sb.h 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/include/linux/ext3_fs_sb.h 2006-07-24 10:39:26.000000000 +0800 -@@ -86,6 +86,7 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ struct task_struct * s_alive_tsk; - - /* for buddy allocator */ - struct ext3_group_info **s_group_info; diff --git a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch deleted file mode 100644 index 989ca26..0000000 --- a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch +++ /dev/null @@ -1,381 +0,0 @@ -Index: mmp/fs/ext3/al.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ mmp/fs/ext3/al.h 2006-07-18 20:43:51.000000000 +0800 -@@ -0,0 +1,11 @@ -+/* -+ * (C) 2006 Qi Yong -+ */ -+ -+#define ALIVE_MAGIC 0xA1153C29 -+struct alive_struct { -+ __le32 al_magic; -+ __le32 al_seq; -+ __le32 al_time; -+ char al_nodename[65]; -+}; -Index: mmp/fs/ext3/namei.c -=================================================================== ---- mmp.orig/fs/ext3/namei.c 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/fs/ext3/namei.c 2006-07-18 20:43:51.000000000 +0800 -@@ -805,7 +805,7 @@ static inline int search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ --static struct buffer_head * ext3_find_entry (struct dentry *dentry, -+struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { - struct super_block * sb; -Index: mmp/fs/ext3/super.c -=================================================================== ---- mmp.orig/fs/ext3/super.c 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/fs/ext3/super.c 2006-07-18 23:49:54.000000000 +0800 -@@ -35,12 +35,14 @@ - #include - #include - #include -+#include - - #include - - #include "xattr.h" - #include "acl.h" - #include "namei.h" -+#include "al.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *, - unsigned long journal_devnum); -@@ -61,6 +63,8 @@ static int ext3_statfs (struct dentry * - static void ext3_unlockfs(struct super_block *sb); - static void ext3_write_super (struct super_block * sb); - static void ext3_write_super_lockfs(struct super_block *sb); -+struct buffer_head * ext3_find_entry (struct dentry *dentry, -+ struct ext3_dir_entry_2 ** res_dir); - - /* - * Wrappers for journal_start/end. -@@ -434,6 +438,9 @@ static void ext3_put_super (struct super - invalidate_bdev(sbi->journal_bdev, 0); - ext3_blkdev_remove(sbi); - } -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); -+ - sb->s_fs_info = NULL; - kfree(sbi); - return; -@@ -1374,6 +1381,261 @@ static ext3_fsblk_t descriptor_loc(struc - return (has_super + ext3_group_first_block_no(sb, bg)); - } - -+static int write_alive(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_write_sync; -+ get_bh(bh); -+ submit_bh(WRITE, bh); -+ wait_on_buffer(bh); -+ if (unlikely(!buffer_uptodate(bh))) -+ return 1; -+ return 0; -+} -+ -+static int read_alive_again(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_read_sync; -+ get_bh(bh); -+ submit_bh(READ, bh); -+ wait_on_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ brelse(bh); -+ return 1; -+ } -+ return 0; -+} -+ -+/* -+ * The caller must have a ref on the buffer_head. -+ */ -+static int kalived(void *data) -+{ -+ struct buffer_head * bh; -+ struct alive_struct * alive; -+ char b[BDEVNAME_SIZE]; -+ u32 seq = 0; -+ -+ bh = (struct buffer_head *)data; -+ bdevname(bh->b_bdev, b); -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ alive->al_magic = cpu_to_le32(ALIVE_MAGIC); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ down_read(&uts_sem); -+ memcpy(alive->al_nodename, system_utsname.nodename, 65); -+ up_read(&uts_sem); -+ -+ while (!kthread_should_stop()) { -+ if (++seq == 0) -+ ++seq; -+ -+ alive->al_seq = cpu_to_le32(seq); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) { -+ /* panic here? */ -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", b); -+ continue; -+ } -+ -+ schedule_timeout_interruptible(5 * HZ); -+ } -+ -+ alive->al_seq = 0; -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) -+ printk(KERN_ERR "Alive (device %s): " -+ "can't reset alive block\n", b); -+ brelse(bh); -+ return 0; -+} -+ -+static unsigned long get_alive_ino(struct super_block *sb) -+{ -+ unsigned long ino = 0; -+ struct dentry alive; -+ struct dentry * root; -+ struct inode * root_inode; -+ struct ext3_dir_entry_2 * de; -+ struct buffer_head * bh; -+ -+ root_inode = iget(sb, EXT3_ROOT_INO); -+ root = d_alloc_root(root_inode); -+ if (!root) { -+ printk(KERN_ERR "Alive (device %s): get root inode failed\n", -+ sb->s_id); -+ iput(root_inode); -+ goto out; -+ } -+ -+ alive.d_name.name = ".alive"; -+ alive.d_name.len = 6; -+ alive.d_parent = root; -+ -+ bh = ext3_find_entry(&alive, &de); -+ dput(root); -+ -+ if (!bh) { -+ printk(KERN_WARNING "Alive (device %s): alive lookup failed\n", -+ sb->s_id); -+ goto out; -+ } -+ -+ ino = le32_to_cpu(de->inode); -+ brelse (bh); -+ pr_debug("Alive (device %s): alive_ino=%lu\n", sb->s_id, ino); -+out: -+ return ino; -+} -+ -+/* check alive file */ -+static int check_alive(struct super_block *sb, struct ext3_sb_info *sbi) -+{ -+ unsigned long ino; -+ struct buffer_head * bh; -+ struct ext3_inode_info * ei; -+ struct inode * alive_inode; -+ struct alive_struct * alive; -+ u32 alive_block; -+ u32 seq; -+ -+ ino = get_alive_ino(sb); -+ if (!ino) -+ goto failed; -+ -+ alive_inode = iget(sb, ino); -+ if (!alive_inode) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): get alive inode failed\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!alive_inode->i_nlink) { -+ make_bad_inode(alive_inode); -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): alive inode is deleted\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!S_ISREG(alive_inode->i_mode)) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (EXT3_I(alive_inode)->i_flags & EXT3_EXTENTS_FL) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode, " -+ "in extents format\n", sb->s_id); -+ goto failed; -+ } -+ -+ ei = EXT3_I(alive_inode); -+ alive_block = ei->i_data[0]; -+ iput(alive_inode); -+ -+ pr_debug("Alive (device %s): read in alive block #%u\n", -+ sb->s_id, alive_block); -+ -+ /* first read */ -+ bh = sb_bread(sb, alive_block); -+ if (!bh) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ if (le32_to_cpu(alive->al_magic) != ALIVE_MAGIC) { -+ printk(KERN_ERR "Alive (device %s): " -+ "magic mismatch\n", sb->s_id); -+ brelse(bh); -+ goto failed; -+ } -+ -+ seq = le32_to_cpu(alive->al_seq); -+ pr_debug("Alive (device %s): seq=%u\n", sb->s_id, seq); -+ pr_info ("Alive (device %s): last touched by node: %s, " -+ "%li seconds ago\n", sb->s_id, alive->al_nodename, -+ get_seconds() - le32_to_cpu(alive->al_time)); -+ -+ if (seq == 0) -+ goto skip; -+ -+ /* wait 8s */ -+ pr_info("Alive (device %s): wait for 8 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 8); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+skip: -+ /* write a new random seq */ -+ get_random_bytes(&seq, sizeof(u32)); -+ alive->al_seq = cpu_to_le32(seq); -+ if (unlikely(write_alive(bh))) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", sb->s_id); -+ goto failed; -+ } -+ pr_debug("Alive (device %s): write random seq=%u\n", sb->s_id, seq); -+ -+ /* wait 6s */ -+ pr_info("Alive (device %s): wait for 6 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 6); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+ -+ /* succeed */ -+ pr_info("Alive (device %s): alive check passed!\n", sb->s_id); -+ sbi->s_alive_tsk = kthread_run(kalived, bh, "kalived"); -+ return 0; -+ -+failed: -+ printk(KERN_WARNING "Alive (device %s): alive check failed!\n", -+ sb->s_id); -+ return 1; -+} -+ - - static int ext3_fill_super (struct super_block *sb, void *data, int silent) - { -@@ -1688,6 +1950,10 @@ static int ext3_fill_super (struct super - EXT3_HAS_INCOMPAT_FEATURE(sb, - EXT3_FEATURE_INCOMPAT_RECOVER)); - -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_ALIVE)) -+ if (check_alive(sb, sbi)) -+ goto failed_mount2; -+ - /* - * The first inode we look at is the journal inode. Don't try - * root first: it may be modified in the journal! -@@ -1796,6 +2062,8 @@ failed_mount3: - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); - failed_mount2: - for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); -Index: mmp/include/linux/ext3_fs.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs.h 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/include/linux/ext3_fs.h 2006-07-18 20:43:52.000000000 +0800 -@@ -579,12 +579,14 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 - #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ -+#define EXT3_FEATURE_INCOMPAT_ALIVE 0x0080 - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ - EXT3_FEATURE_INCOMPAT_META_BG| \ -- EXT3_FEATURE_INCOMPAT_EXTENTS) -+ EXT3_FEATURE_INCOMPAT_EXTENTS| \ -+ EXT3_FEATURE_INCOMPAT_ALIVE) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -Index: mmp/include/linux/ext3_fs_sb.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs_sb.h 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/include/linux/ext3_fs_sb.h 2006-07-18 20:43:52.000000000 +0800 -@@ -86,6 +86,7 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ struct task_struct * s_alive_tsk; - - /* for buddy allocator */ - struct ext3_group_info **s_group_info; diff --git a/lustre/kernel_patches/patches/ext3-nanosecond-2.6-rhel4.patch b/lustre/kernel_patches/patches/ext3-nanosecond-2.6-rhel4.patch deleted file mode 100644 index 423a399..0000000 --- a/lustre/kernel_patches/patches/ext3-nanosecond-2.6-rhel4.patch +++ /dev/null @@ -1,401 +0,0 @@ -Index: linux-2.6.9-rhel4/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.9-rhel4.orig/fs/ext3/ialloc.c -+++ linux-2.6.9-rhel4/fs/ext3/ialloc.c -@@ -612,7 +612,8 @@ got: - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; -- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = -+ ext3_current_time(inode); - - memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_next_alloc_block = 0; -@@ -650,9 +651,8 @@ got: - spin_unlock(&sbi->s_next_gen_lock); - - ei->i_state = EXT3_STATE_NEW; -- ei->i_extra_isize = -- (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -- sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ -+ ei->i_extra_isize = EXT3_SB(sb)->s_want_extra_isize; - - ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { -Index: linux-2.6.9-rhel4/fs/ext3/inode.c -=================================================================== ---- linux-2.6.9-rhel4.orig/fs/ext3/inode.c -+++ linux-2.6.9-rhel4/fs/ext3/inode.c -@@ -627,7 +627,7 @@ static int ext3_splice_branch(handle_t * - - /* We are done with atomic stuff, now do the rest of housekeeping */ - -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - - /* had we spliced it onto indirect block? */ -@@ -2227,7 +2227,7 @@ do_indirects: - ; - } - up(&ei->truncate_sem); -- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_mtime = inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - - /* In a multi-transaction truncate, we only make the final -@@ -2454,10 +2454,6 @@ void ext3_read_inode(struct inode * inod - } - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); - inode->i_size = le32_to_cpu(raw_inode->i_size); -- inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); -- inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); -- inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); -- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - - ei->i_state = 0; - ei->i_next_alloc_block = 0; -@@ -2518,6 +2514,11 @@ void ext3_read_inode(struct inode * inod - else - ei->i_extra_isize = 0; - -+ EXT3_INODE_GET_XTIME(i_ctime, inode, raw_inode); -+ EXT3_INODE_GET_XTIME(i_mtime, inode, raw_inode); -+ EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode); -+ EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode); -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2598,9 +2599,12 @@ static int ext3_do_update_inode(handle_t - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); -- raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); -- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); -- raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); -+ -+ EXT3_INODE_SET_XTIME(i_ctime, inode, raw_inode); -+ EXT3_INODE_SET_XTIME(i_mtime, inode, raw_inode); -+ EXT3_INODE_SET_XTIME(i_atime, inode, raw_inode); -+ EXT3_EINODE_SET_XTIME(i_crtime, ei, raw_inode); -+ - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); -Index: linux-2.6.9-rhel4/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.9-rhel4.orig/fs/ext3/ioctl.c -+++ linux-2.6.9-rhel4/fs/ext3/ioctl.c -@@ -112,7 +112,7 @@ int ext3_ioctl (struct inode * inode, st - ei->i_flags = flags; - - ext3_set_inode_flags(inode); -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - flags_err: -@@ -150,7 +150,7 @@ flags_err: - return PTR_ERR(handle); - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err == 0) { -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - inode->i_generation = generation; - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - } -Index: linux-2.6.9-rhel4/fs/ext3/namei.c -=================================================================== ---- linux-2.6.9-rhel4.orig/fs/ext3/namei.c -+++ linux-2.6.9-rhel4/fs/ext3/namei.c -@@ -1302,7 +1302,7 @@ static int add_dirent_to_buf(handle_t *h - * happen is that the times are slightly out of date - * and/or different from the directory change time. - */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; -+ dir->i_mtime = dir->i_ctime = ext3_current_time(dir); - ext3_update_dx_flag(dir); - dir->i_version++; - ext3_mark_inode_dirty(handle, dir); -@@ -2098,7 +2098,7 @@ static int ext3_rmdir (struct inode * di - inode->i_version++; - inode->i_nlink = 0; - ext3_orphan_add(handle, inode); -- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ inode->i_ctime = dir->i_ctime = dir->i_mtime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - ext3_dec_count(handle, dir); - ext3_update_dx_flag(dir); -@@ -2148,13 +2148,13 @@ static int ext3_unlink(struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_unlink; -- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ dir->i_ctime = dir->i_mtime = ext3_current_time(dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - ext3_dec_count(handle, inode); - if (!inode->i_nlink) - ext3_orphan_add(handle, inode); -- inode->i_ctime = dir->i_ctime; -+ inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - retval = 0; - -@@ -2255,7 +2255,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -@@ -2357,7 +2357,7 @@ static int ext3_rename (struct inode * o - * Like most other Unix systems, set the ctime for inodes on a - * rename. - */ -- old_inode->i_ctime = CURRENT_TIME_SEC; -+ old_inode->i_ctime = ext3_current_time(old_inode); - ext3_mark_inode_dirty(handle, old_inode); - - /* -@@ -2390,9 +2390,9 @@ static int ext3_rename (struct inode * o - - if (new_inode) { - ext3_dec_count(handle, new_inode); -- new_inode->i_ctime = CURRENT_TIME_SEC; -+ new_inode->i_ctime = ext3_current_time(new_inode); - } -- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; -+ old_dir->i_ctime = old_dir->i_mtime = ext3_current_time(old_dir); - ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); -Index: linux-2.6.9-rhel4/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-rhel4.orig/fs/ext3/super.c -+++ linux-2.6.9-rhel4/fs/ext3/super.c -@@ -1468,6 +1468,8 @@ static int ext3_fill_super (struct super - sbi->s_inode_size); - goto failed_mount; - } -+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) -+ sb->s_time_gran = 1 << (EXT3_EPOCH_BITS - 2); - } - sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); -@@ -1654,6 +1656,32 @@ static int ext3_fill_super (struct super - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ -+ /* determine the minimum size of new large inodes, if present */ -+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_want_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_want_extra_isize); -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_min_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_min_extra_isize); -+ } -+ } -+ /* Check if enough inode space is available */ -+ if (EXT3_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > -+ sbi->s_inode_size) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ printk(KERN_INFO "EXT3-fs: required extra inode space not" -+ "available.\n"); -+ } -+ - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -Index: linux-2.6.9-rhel4/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.9-rhel4.orig/fs/ext3/xattr.c -+++ linux-2.6.9-rhel4/fs/ext3/xattr.c -@@ -1305,7 +1305,7 @@ getblk_failed: - - /* Update the inode. */ - EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - if (IS_SYNC(inode)) - handle->h_sync = 1; -Index: linux-2.6.9-rhel4/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.9-rhel4.orig/include/linux/ext3_fs.h -+++ linux-2.6.9-rhel4/include/linux/ext3_fs.h -@@ -258,7 +258,7 @@ struct ext3_inode { - __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size; /* Size in bytes */ - __le32 i_atime; /* Access time */ -- __le32 i_ctime; /* Creation time */ -+ __le32 i_ctime; /* Inode Change time */ - __le32 i_mtime; /* Modification time */ - __le32 i_dtime; /* Deletion Time */ - __le16 i_gid; /* Low 16 bits of Group Id */ -@@ -307,10 +307,73 @@ struct ext3_inode { - } osd2; /* OS dependent 2 */ - __u16 i_extra_isize; - __u16 i_pad1; -+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ -+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ -+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ -+ __le32 i_crtime; /* File Creation time */ -+ __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ - }; - - #define i_size_high i_dir_acl - -+#define EXT3_EPOCH_BITS 2 -+#define EXT3_EPOCH_MASK ((1 << EXT3_EPOCH_BITS) - 1) -+#define EXT3_NSEC_MASK (~0UL << EXT3_EPOCH_BITS) -+ -+#define EXT3_FITS_IN_INODE(ext3_inode, einode, field) \ -+ ((offsetof(typeof(*ext3_inode), field) + \ -+ sizeof((ext3_inode)->field)) \ -+ <= (EXT3_GOOD_OLD_INODE_SIZE + \ -+ (einode)->i_extra_isize)) \ -+ -+static inline __le32 ext3_encode_extra_time(struct timespec *time) -+{ -+ return cpu_to_le32((sizeof(time->tv_sec) > 4 ? -+ time->tv_sec >> 32 : 0) | -+ ((time->tv_nsec << 2) & EXT3_NSEC_MASK)); -+} -+ -+static inline void ext3_decode_extra_time(struct timespec *time, __le32 extra) { -+ if (sizeof(time->tv_sec) > 4) -+ time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT3_EPOCH_MASK) -+ << 32; -+ time->tv_nsec = (le32_to_cpu(extra) & EXT3_NSEC_MASK) >> 2; -+} -+ -+#define EXT3_INODE_SET_XTIME(xtime, inode, raw_inode) \ -+do { \ -+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra))\ -+ (raw_inode)->xtime ## _extra = \ -+ ext3_encode_extra_time(&(inode)->xtime); \ -+} while (0) -+ -+#define EXT3_EINODE_SET_XTIME(xtime, einode, raw_inode) \ -+do { \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \ -+ (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ -+ (raw_inode)->xtime ## _extra = \ -+ ext3_encode_extra_time(&(einode)->xtime); \ -+} while (0) -+ -+#define EXT3_INODE_GET_XTIME(xtime, inode, raw_inode) \ -+do { \ -+ (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra))\ -+ ext3_decode_extra_time(&(inode)->xtime, \ -+ raw_inode->xtime ## _extra); \ -+} while (0) -+ -+#define EXT3_EINODE_GET_XTIME(xtime, einode, raw_inode) \ -+do { \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \ -+ (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ -+ ext3_decode_extra_time(&(einode)->xtime, \ -+ raw_inode->xtime ## _extra); \ -+} while (0) -+ - #if defined(__KERNEL__) || defined(__linux__) - #define i_reserved1 osd1.linux1.l_i_reserved1 - #define i_frag osd2.linux2.l_i_frag -@@ -485,11 +548,19 @@ struct ext3_super_block { - __le32 s_last_orphan; /* start of list of inodes to delete */ - __le32 s_hash_seed[4]; /* HTREE hash seed */ - __u8 s_def_hash_version; /* Default hash version to use */ -- __u8 s_reserved_char_pad; -- __u16 s_reserved_word_pad; -+ __u8 s_jnl_backup_type; /* Default type of journal backup */ -+ __le16 s_desc_size; /* Group desc. size: INCOMPAT_64BIT */ - __le32 s_default_mount_opts; -- __le32 s_first_meta_bg; /* First metablock block group */ -- __u32 s_reserved[190]; /* Padding to the end of the block */ -+ __le32 s_first_meta_bg; /* First metablock block group */ -+ __le32 s_mkfs_time; /* When the filesystem was created */ -+ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ -+ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ -+ __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ -+ __le32 s_free_blocks_hi; /* Free blocks count high 32 bits */ -+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */ -+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ -+ __le32 s_flags; /* Miscellaneous flags */ -+ __u32 s_reserved[167]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -565,6 +630,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 -+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -581,6 +647,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* -@@ -708,6 +775,12 @@ static inline struct ext3_inode *ext3_ra - return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset); - } - -+static inline struct timespec ext3_current_time(struct inode *inode) -+{ -+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? -+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -+} -+ - /* - * This structure is stuffed into the struct file's private_data field - * for directories. It is where we put information so that we can do -Index: linux-2.6.9-rhel4/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.9-rhel4.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.9-rhel4/include/linux/ext3_fs_i.h -@@ -130,6 +130,7 @@ struct ext3_inode_info { - struct inode vfs_inode; - - __u32 i_cached_extent[4]; -+ struct timespec i_crtime; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.9-rhel4/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.9-rhel4.orig/include/linux/ext3_fs_sb.h -+++ linux-2.6.9-rhel4/include/linux/ext3_fs_sb.h -@@ -122,6 +122,8 @@ struct ext3_sb_info { - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; -+ -+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ - }; - - #define EXT3_GROUP_INFO(sb, group) \ diff --git a/lustre/kernel_patches/patches/ext3-nanosecond-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-nanosecond-2.6-sles10.patch deleted file mode 100644 index eb3d0c2..0000000 --- a/lustre/kernel_patches/patches/ext3-nanosecond-2.6-sles10.patch +++ /dev/null @@ -1,404 +0,0 @@ -Index: linux-2.6.16.27-0.9/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/ialloc.c -+++ linux-2.6.16.27-0.9/fs/ext3/ialloc.c -@@ -577,7 +577,8 @@ got: - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; -- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = -+ ext3_current_time(inode); - - memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_dir_start_lookup = 0; -@@ -609,9 +610,8 @@ got: - spin_unlock(&sbi->s_next_gen_lock); - - ei->i_state = EXT3_STATE_NEW; -- ei->i_extra_isize = -- (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -- sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ -+ ei->i_extra_isize = EXT3_SB(sb)->s_want_extra_isize; - - ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { -Index: linux-2.6.16.27-0.9/fs/ext3/inode.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/inode.c -+++ linux-2.6.16.27-0.9/fs/ext3/inode.c -@@ -620,7 +620,7 @@ static int ext3_splice_branch(handle_t * - - /* We are done with atomic stuff, now do the rest of housekeeping */ - -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - - /* had we spliced it onto indirect block? */ -@@ -2244,7 +2244,7 @@ do_indirects: - ext3_discard_reservation(inode); - - up(&ei->truncate_sem); -- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_mtime = inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - - /* In a multi-transaction truncate, we only make the final -@@ -2479,10 +2479,6 @@ void ext3_read_inode(struct inode * inod - } - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); - inode->i_size = le32_to_cpu(raw_inode->i_size); -- inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); -- inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); -- inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); -- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - - ei->i_state = 0; - ei->i_dir_start_lookup = 0; -@@ -2557,6 +2553,11 @@ void ext3_read_inode(struct inode * inod - } else - ei->i_extra_isize = 0; - -+ EXT3_INODE_GET_XTIME(i_ctime, inode, raw_inode); -+ EXT3_INODE_GET_XTIME(i_mtime, inode, raw_inode); -+ EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode); -+ EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode); -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2637,9 +2638,12 @@ static int ext3_do_update_inode(handle_t - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); -- raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); -- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); -- raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); -+ -+ EXT3_INODE_SET_XTIME(i_ctime, inode, raw_inode); -+ EXT3_INODE_SET_XTIME(i_mtime, inode, raw_inode); -+ EXT3_INODE_SET_XTIME(i_atime, inode, raw_inode); -+ EXT3_EINODE_SET_XTIME(i_crtime, ei, raw_inode); -+ - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); -Index: linux-2.6.16.27-0.9/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/ioctl.c -+++ linux-2.6.16.27-0.9/fs/ext3/ioctl.c -@@ -88,7 +88,7 @@ int ext3_ioctl (struct inode * inode, st - ei->i_flags = flags; - - ext3_set_inode_flags(inode); -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - flags_err: -@@ -126,7 +126,7 @@ flags_err: - return PTR_ERR(handle); - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err == 0) { -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - inode->i_generation = generation; - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - } -Index: linux-2.6.16.27-0.9/fs/ext3/namei.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/namei.c -+++ linux-2.6.16.27-0.9/fs/ext3/namei.c -@@ -1276,7 +1276,7 @@ static int add_dirent_to_buf(handle_t *h - * happen is that the times are slightly out of date - * and/or different from the directory change time. - */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; -+ dir->i_mtime = dir->i_ctime = ext3_current_time(dir); - ext3_update_dx_flag(dir); - dir->i_version++; - ext3_mark_inode_dirty(handle, dir); -@@ -2056,7 +2056,7 @@ static int ext3_rmdir (struct inode * di - inode->i_version++; - inode->i_nlink = 0; - ext3_orphan_add(handle, inode); -- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ inode->i_ctime = dir->i_ctime = dir->i_mtime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - ext3_dec_count(handle, dir); - ext3_update_dx_flag(dir); -@@ -2106,13 +2106,13 @@ static int ext3_unlink(struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_unlink; -- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ dir->i_ctime = dir->i_mtime = ext3_current_time(dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - ext3_dec_count(handle, inode); - if (!inode->i_nlink) - ext3_orphan_add(handle, inode); -- inode->i_ctime = dir->i_ctime; -+ inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - retval = 0; - -@@ -2214,7 +2214,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -@@ -2317,7 +2317,7 @@ static int ext3_rename (struct inode * o - * Like most other Unix systems, set the ctime for inodes on a - * rename. - */ -- old_inode->i_ctime = CURRENT_TIME_SEC; -+ old_inode->i_ctime = ext3_current_time(old_inode); - ext3_mark_inode_dirty(handle, old_inode); - - /* -@@ -2350,9 +2350,9 @@ static int ext3_rename (struct inode * o - - if (new_inode) { - ext3_dec_count(handle, new_inode); -- new_inode->i_ctime = CURRENT_TIME_SEC; -+ new_inode->i_ctime = ext3_current_time(new_inode); - } -- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; -+ old_dir->i_ctime = old_dir->i_mtime = ext3_current_time(old_dir); - ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); -Index: linux-2.6.16.27-0.9/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/super.c -+++ linux-2.6.16.27-0.9/fs/ext3/super.c -@@ -1614,6 +1614,8 @@ static int ext3_fill_super (struct super - sbi->s_inode_size); - goto failed_mount; - } -+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) -+ sb->s_time_gran = 1 << (EXT3_EPOCH_BITS - 2); - } - sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); -@@ -1809,6 +1811,32 @@ static int ext3_fill_super (struct super - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ -+ /* determine the minimum size of new large inodes, if present */ -+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_want_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_want_extra_isize); -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_min_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_min_extra_isize); -+ } -+ } -+ /* Check if enough inode space is available */ -+ if (EXT3_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > -+ sbi->s_inode_size) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ printk(KERN_INFO "EXT3-fs: required extra inode space not" -+ "available.\n"); -+ } -+ - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -Index: linux-2.6.16.27-0.9/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.16.27-0.9.orig/fs/ext3/xattr.c -+++ linux-2.6.16.27-0.9/fs/ext3/xattr.c -@@ -1006,8 +1006,8 @@ ext3_xattr_set_handle(handle_t *handle, - } - if (!error) { - ext3_xattr_update_super_block(handle, inode->i_sb); -- inode->i_ctime = CURRENT_TIME_SEC; -- error = ext3_mark_iloc_dirty(handle, inode, &is.iloc); -+ inode->i_ctime = ext3_current_time(inode); -+ ext3_mark_inode_dirty(handle, inode); - /* - * The bh is consumed by ext3_mark_iloc_dirty, even with - * error != 0. -Index: linux-2.6.16.27-0.9/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16.27-0.9.orig/include/linux/ext3_fs.h -+++ linux-2.6.16.27-0.9/include/linux/ext3_fs.h -@@ -272,7 +272,7 @@ struct ext3_inode { - __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size; /* Size in bytes */ - __le32 i_atime; /* Access time */ -- __le32 i_ctime; /* Creation time */ -+ __le32 i_ctime; /* Inode Change time */ - __le32 i_mtime; /* Modification time */ - __le32 i_dtime; /* Deletion Time */ - __le16 i_gid; /* Low 16 bits of Group Id */ -@@ -321,10 +321,73 @@ struct ext3_inode { - } osd2; /* OS dependent 2 */ - __le16 i_extra_isize; - __le16 i_pad1; -+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ -+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ -+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ -+ __le32 i_crtime; /* File Creation time */ -+ __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ - }; - - #define i_size_high i_dir_acl - -+#define EXT3_EPOCH_BITS 2 -+#define EXT3_EPOCH_MASK ((1 << EXT3_EPOCH_BITS) - 1) -+#define EXT3_NSEC_MASK (~0UL << EXT3_EPOCH_BITS) -+ -+#define EXT3_FITS_IN_INODE(ext3_inode, einode, field) \ -+ ((offsetof(typeof(*ext3_inode), field) + \ -+ sizeof((ext3_inode)->field)) \ -+ <= (EXT3_GOOD_OLD_INODE_SIZE + \ -+ (einode)->i_extra_isize)) \ -+ -+static inline __le32 ext3_encode_extra_time(struct timespec *time) -+{ -+ return cpu_to_le32((sizeof(time->tv_sec) > 4 ? -+ time->tv_sec >> 32 : 0) | -+ ((time->tv_nsec << 2) & EXT3_NSEC_MASK)); -+} -+ -+static inline void ext3_decode_extra_time(struct timespec *time, __le32 extra) { -+ if (sizeof(time->tv_sec) > 4) -+ time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT3_EPOCH_MASK) -+ << 32; -+ time->tv_nsec = (le32_to_cpu(extra) & EXT3_NSEC_MASK) >> 2; -+} -+ -+#define EXT3_INODE_SET_XTIME(xtime, inode, raw_inode) \ -+do { \ -+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra)) \ -+ (raw_inode)->xtime ## _extra = \ -+ ext3_encode_extra_time(&(inode)->xtime); \ -+} while (0) -+ -+#define EXT3_EINODE_SET_XTIME(xtime, einode, raw_inode) \ -+do { \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \ -+ (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ -+ (raw_inode)->xtime ## _extra = \ -+ ext3_encode_extra_time(&(einode)->xtime); \ -+} while (0) -+ -+#define EXT3_INODE_GET_XTIME(xtime, inode, raw_inode) \ -+do { \ -+ (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra))\ -+ ext3_decode_extra_time(&(inode)->xtime, \ -+ raw_inode->xtime ## _extra); \ -+} while (0) -+ -+#define EXT3_EINODE_GET_XTIME(xtime, einode, raw_inode) \ -+do { \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \ -+ (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime);\ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ -+ ext3_decode_extra_time(&(einode)->xtime, \ -+ raw_inode->xtime ## _extra); \ -+} while (0) -+ - #if defined(__KERNEL__) || defined(__linux__) - #define i_reserved1 osd1.linux1.l_i_reserved1 - #define i_frag osd2.linux2.l_i_frag -@@ -504,11 +567,19 @@ struct ext3_super_block { - __le32 s_last_orphan; /* start of list of inodes to delete */ - __le32 s_hash_seed[4]; /* HTREE hash seed */ - __u8 s_def_hash_version; /* Default hash version to use */ -- __u8 s_reserved_char_pad; -- __u16 s_reserved_word_pad; -+ __u8 s_jnl_backup_type; /* Default type of journal backup */ -+ __le16 s_desc_size; /* Group desc. size: INCOMPAT_64BIT */ - __le32 s_default_mount_opts; -- __le32 s_first_meta_bg; /* First metablock block group */ -- __u32 s_reserved[190]; /* Padding to the end of the block */ -+ __le32 s_first_meta_bg; /* First metablock block group */ -+ __le32 s_mkfs_time; /* When the filesystem was created */ -+ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ -+ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ -+ __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ -+ __le32 s_free_blocks_hi; /* Free blocks count high 32 bits */ -+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */ -+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ -+ __le32 s_flags; /* Miscellaneous flags */ -+ __u32 s_reserved[167]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -583,6 +648,8 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 -+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 -+ - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -599,6 +666,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* -@@ -726,6 +794,12 @@ static inline struct ext3_inode *ext3_ra - return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset); - } - -+static inline struct timespec ext3_current_time(struct inode *inode) -+{ -+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? -+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -+} -+ - /* - * This structure is stuffed into the struct file's private_data field - * for directories. It is where we put information so that we can do -Index: linux-2.6.16.27-0.9/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.16.27-0.9.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.16.27-0.9/include/linux/ext3_fs_i.h -@@ -135,6 +135,7 @@ struct ext3_inode_info { - struct inode vfs_inode; - - __u32 i_cached_extent[4]; -+ struct timespec i_crtime; - - void *i_filterdata; - }; -Index: linux-2.6.16.27-0.9/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.16.27-0.9.orig/include/linux/ext3_fs_sb.h -+++ linux-2.6.16.27-0.9/include/linux/ext3_fs_sb.h -@@ -119,6 +119,8 @@ struct ext3_sb_info { - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; -+ -+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ - }; - - #define EXT3_GROUP_INFO(sb, group) \ diff --git a/lustre/kernel_patches/patches/ext3-nanosecond-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-nanosecond-2.6-suse.patch deleted file mode 100644 index fa05b0c..0000000 --- a/lustre/kernel_patches/patches/ext3-nanosecond-2.6-suse.patch +++ /dev/null @@ -1,195 +0,0 @@ -Index: linux-2.6.5-7.283/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.5-7.283.orig/fs/ext3/ialloc.c -+++ linux-2.6.5-7.283/fs/ext3/ialloc.c -@@ -613,7 +613,8 @@ got: - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; -- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = -+ CURRENT_TIME; - - memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_next_alloc_block = 0; -@@ -651,9 +652,8 @@ got: - spin_unlock(&sbi->s_next_gen_lock); - - ei->i_state = EXT3_STATE_NEW; -- ei->i_extra_isize = -- (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -- sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ -+ ei->i_extra_isize = EXT3_SB(sb)->s_want_extra_isize; - - ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { -Index: linux-2.6.5-7.283/fs/ext3/inode.c -=================================================================== ---- linux-2.6.5-7.283.orig/fs/ext3/inode.c -+++ linux-2.6.5-7.283/fs/ext3/inode.c -@@ -2459,7 +2459,11 @@ void ext3_read_inode(struct inode * inod - inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); -- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; -+ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_crtime)) { -+ ei->i_crtime.tv_sec = le32_to_cpu(raw_inode->i_crtime); -+ } -+ inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = -+ ei->i_crtime.tv_nsec = 0; - - ei->i_state = 0; - ei->i_next_alloc_block = 0; -@@ -2603,6 +2607,10 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); -+ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_crtime)) { -+ raw_inode->i_crtime = cpu_to_le32(ei->i_crtime.tv_sec); -+ } -+ - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); -Index: linux-2.6.5-7.283/fs/ext3/super.c -=================================================================== ---- linux-2.6.5-7.283.orig/fs/ext3/super.c -+++ linux-2.6.5-7.283/fs/ext3/super.c -@@ -1515,6 +1515,32 @@ static int ext3_fill_super (struct super - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ -+ /* determine the minimum size of new large inodes, if present */ -+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_want_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_want_extra_isize); -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_min_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_min_extra_isize); -+ } -+ } -+ /* Check if enough inode space is available */ -+ if (EXT3_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > -+ sbi->s_inode_size) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ printk(KERN_INFO "EXT3-fs: required extra inode space not" -+ "available.\n"); -+ } -+ - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -Index: linux-2.6.5-7.283/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-7.283.orig/include/linux/ext3_fs.h -+++ linux-2.6.5-7.283/include/linux/ext3_fs.h -@@ -232,7 +232,7 @@ struct ext3_inode { - __u16 i_uid; /* Low 16 bits of Owner Uid */ - __u32 i_size; /* Size in bytes */ - __u32 i_atime; /* Access time */ -- __u32 i_ctime; /* Creation time */ -+ __u32 i_ctime; /* Inode Change time */ - __u32 i_mtime; /* Modification time */ - __u32 i_dtime; /* Deletion Time */ - __u16 i_gid; /* Low 16 bits of Group Id */ -@@ -281,10 +281,25 @@ struct ext3_inode { - } osd2; /* OS dependent 2 */ - __u16 i_extra_isize; - __u16 i_pad1; -+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ -+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ -+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ -+ __le32 i_crtime; /* File Creation time */ -+ __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ - }; - - #define i_size_high i_dir_acl - -+#define EXT3_EPOCH_BITS 2 -+#define EXT3_EPOCH_MASK ((1 << EXT3_EPOCH_BITS) - 1) -+#define EXT3_NSEC_MASK (~0UL << EXT3_EPOCH_BITS) -+ -+#define EXT3_FITS_IN_INODE(ext3_inode, einode, field) \ -+ ((offsetof(typeof(*ext3_inode), field) + \ -+ sizeof((ext3_inode)->field)) \ -+ <= (EXT3_GOOD_OLD_INODE_SIZE + \ -+ (einode)->i_extra_isize)) \ -+ - #if defined(__KERNEL__) || defined(__linux__) - #define i_reserved1 osd1.linux1.l_i_reserved1 - #define i_frag osd2.linux2.l_i_frag -@@ -460,11 +475,19 @@ struct ext3_super_block { - __u32 s_last_orphan; /* start of list of inodes to delete */ - __u32 s_hash_seed[4]; /* HTREE hash seed */ - __u8 s_def_hash_version; /* Default hash version to use */ -- __u8 s_reserved_char_pad; -- __u16 s_reserved_word_pad; -+ __u8 s_jnl_backup_type; /* Default type of journal backup */ -+ __u16 s_desc_size; /* Group desc. size: INCOMPAT_64BIT */ - __u32 s_default_mount_opts; -- __u32 s_first_meta_bg; /* First metablock block group */ -- __u32 s_reserved[190]; /* Padding to the end of the block */ -+ __u32 s_first_meta_bg; /* First metablock block group */ -+ __u32 s_mkfs_time; /* When the filesystem was created */ -+ __u32 s_jnl_blocks[17]; /* Backup of the journal inode */ -+ __u32 s_blocks_count_hi; /* Blocks count high 32 bits */ -+ __u32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ -+ __u32 s_free_blocks_hi; /* Free blocks count high 32 bits */ -+ __u16 s_min_extra_isize; /* All inodes have at least # bytes */ -+ __u16 s_want_extra_isize; /* New inodes should reserve # bytes */ -+ __u32 s_flags; /* Miscellaneous flags */ -+ __u32 s_reserved[167]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -539,6 +556,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 -+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -555,6 +573,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* -Index: linux-2.6.5-7.283/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.5-7.283.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.5-7.283/include/linux/ext3_fs_i.h -@@ -131,6 +131,7 @@ struct ext3_inode_info { - struct inode vfs_inode; - - struct ext3_ext_cache i_cached_extent; -+ struct timespec i_crtime; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.5-7.283/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.5-7.283.orig/include/linux/ext3_fs_sb.h -+++ linux-2.6.5-7.283/include/linux/ext3_fs_sb.h -@@ -119,6 +119,8 @@ struct ext3_sb_info { - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; -+ -+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ - }; - - #define EXT3_GROUP_INFO(sb, group) \ diff --git a/lustre/kernel_patches/patches/ext3-nanosecond-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-nanosecond-2.6.18-vanilla.patch deleted file mode 100644 index 6dd7ad7..0000000 --- a/lustre/kernel_patches/patches/ext3-nanosecond-2.6.18-vanilla.patch +++ /dev/null @@ -1,403 +0,0 @@ -Index: linux-2.6.18/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/ialloc.c -+++ linux-2.6.18/fs/ext3/ialloc.c -@@ -615,7 +615,8 @@ got: - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; -- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = -+ ext3_current_time(inode); - - memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_dir_start_lookup = 0; -@@ -647,9 +648,8 @@ got: - spin_unlock(&sbi->s_next_gen_lock); - - ei->i_state = EXT3_STATE_NEW; -- ei->i_extra_isize = -- (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? -- sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; -+ -+ ei->i_extra_isize = EXT3_SB(sb)->s_want_extra_isize; - - ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { -Index: linux-2.6.18/fs/ext3/inode.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/inode.c -+++ linux-2.6.18/fs/ext3/inode.c -@@ -729,7 +729,7 @@ static int ext3_splice_branch(handle_t * - - /* We are done with atomic stuff, now do the rest of housekeeping */ - -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - - /* had we spliced it onto indirect block? */ -@@ -2388,7 +2388,7 @@ do_indirects: - ext3_discard_reservation(inode); - - mutex_unlock(&ei->truncate_mutex); -- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_mtime = inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - - /* -@@ -2624,10 +2624,6 @@ void ext3_read_inode(struct inode * inod - } - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); - inode->i_size = le32_to_cpu(raw_inode->i_size); -- inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); -- inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); -- inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); -- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - - ei->i_state = 0; - ei->i_dir_start_lookup = 0; -@@ -2702,6 +2698,11 @@ void ext3_read_inode(struct inode * inod - } else - ei->i_extra_isize = 0; - -+ EXT3_INODE_GET_XTIME(i_ctime, inode, raw_inode); -+ EXT3_INODE_GET_XTIME(i_mtime, inode, raw_inode); -+ EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode); -+ EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode); -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2782,9 +2783,12 @@ static int ext3_do_update_inode(handle_t - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); -- raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); -- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); -- raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); -+ -+ EXT3_INODE_SET_XTIME(i_ctime, inode, raw_inode); -+ EXT3_INODE_SET_XTIME(i_mtime, inode, raw_inode); -+ EXT3_INODE_SET_XTIME(i_atime, inode, raw_inode); -+ EXT3_EINODE_SET_XTIME(i_crtime, ei, raw_inode); -+ - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); -Index: linux-2.6.18/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/ioctl.c -+++ linux-2.6.18/fs/ext3/ioctl.c -@@ -120,7 +120,7 @@ int ext3_ioctl (struct inode * inode, st - ei->i_flags = flags; - - ext3_set_inode_flags(inode); -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - flags_err: -@@ -161,7 +161,7 @@ flags_err: - return PTR_ERR(handle); - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err == 0) { -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - inode->i_generation = generation; - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - } -Index: linux-2.6.18/fs/ext3/namei.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/namei.c -+++ linux-2.6.18/fs/ext3/namei.c -@@ -1287,7 +1287,7 @@ static int add_dirent_to_buf(handle_t *h - * happen is that the times are slightly out of date - * and/or different from the directory change time. - */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; -+ dir->i_mtime = dir->i_ctime = ext3_current_time(dir); - ext3_update_dx_flag(dir); - dir->i_version++; - ext3_mark_inode_dirty(handle, dir); -@@ -2079,7 +2079,7 @@ static int ext3_rmdir (struct inode * di - inode->i_version++; - inode->i_nlink = 0; - ext3_orphan_add(handle, inode); -- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ inode->i_ctime = dir->i_ctime = dir->i_mtime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - ext3_dec_count(handle, dir); - ext3_update_dx_flag(dir); -@@ -2129,13 +2129,13 @@ static int ext3_unlink(struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_unlink; -- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; -+ dir->i_ctime = dir->i_mtime = ext3_current_time(dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - ext3_dec_count(handle, inode); - if (!inode->i_nlink) - ext3_orphan_add(handle, inode); -- inode->i_ctime = dir->i_ctime; -+ inode->i_ctime = ext3_current_time(inode); - ext3_mark_inode_dirty(handle, inode); - retval = 0; - -@@ -2237,7 +2237,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -@@ -2340,7 +2340,7 @@ static int ext3_rename (struct inode * o - * Like most other Unix systems, set the ctime for inodes on a - * rename. - */ -- old_inode->i_ctime = CURRENT_TIME_SEC; -+ old_inode->i_ctime = ext3_current_time(old_inode); - ext3_mark_inode_dirty(handle, old_inode); - - /* -@@ -2373,9 +2373,9 @@ static int ext3_rename (struct inode * o - - if (new_inode) { - ext3_dec_count(handle, new_inode); -- new_inode->i_ctime = CURRENT_TIME_SEC; -+ new_inode->i_ctime = ext3_current_time(new_inode); - } -- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; -+ old_dir->i_ctime = old_dir->i_mtime = ext3_current_time(old_dir); - ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); -Index: linux-2.6.18/fs/ext3/super.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/super.c -+++ linux-2.6.18/fs/ext3/super.c -@@ -1615,6 +1615,8 @@ static int ext3_fill_super (struct super - sbi->s_inode_size); - goto failed_mount; - } -+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) -+ sb->s_time_gran = 1 << (EXT3_EPOCH_BITS - 2); - } - sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); -@@ -1819,6 +1821,32 @@ static int ext3_fill_super (struct super - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ -+ /* determine the minimum size of new large inodes, if present */ -+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_want_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_want_extra_isize); -+ if (sbi->s_want_extra_isize < -+ le16_to_cpu(es->s_min_extra_isize)) -+ sbi->s_want_extra_isize = -+ le16_to_cpu(es->s_min_extra_isize); -+ } -+ } -+ /* Check if enough inode space is available */ -+ if (EXT3_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > -+ sbi->s_inode_size) { -+ sbi->s_want_extra_isize = sizeof(struct ext3_inode) - -+ EXT3_GOOD_OLD_INODE_SIZE; -+ printk(KERN_INFO "EXT3-fs: required extra inode space not" -+ "available.\n"); -+ } -+ - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -Index: linux-2.6.18/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/xattr.c -+++ linux-2.6.18/fs/ext3/xattr.c -@@ -1007,7 +1007,7 @@ ext3_xattr_set_handle(handle_t *handle, - } - if (!error) { - ext3_xattr_update_super_block(handle, inode->i_sb); -- inode->i_ctime = CURRENT_TIME_SEC; -+ inode->i_ctime = ext3_current_time(inode); - error = ext3_mark_iloc_dirty(handle, inode, &is.iloc); - /* - * The bh is consumed by ext3_mark_iloc_dirty, even with -Index: linux-2.6.18/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.18.orig/include/linux/ext3_fs.h -+++ linux-2.6.18/include/linux/ext3_fs.h -@@ -268,7 +268,7 @@ struct ext3_inode { - __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size; /* Size in bytes */ - __le32 i_atime; /* Access time */ -- __le32 i_ctime; /* Creation time */ -+ __le32 i_ctime; /* Inode Change time */ - __le32 i_mtime; /* Modification time */ - __le32 i_dtime; /* Deletion Time */ - __le16 i_gid; /* Low 16 bits of Group Id */ -@@ -317,10 +317,73 @@ struct ext3_inode { - } osd2; /* OS dependent 2 */ - __le16 i_extra_isize; - __le16 i_pad1; -+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ -+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ -+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ -+ __le32 i_crtime; /* File Creation time */ -+ __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ - }; - - #define i_size_high i_dir_acl - -+#define EXT3_EPOCH_BITS 2 -+#define EXT3_EPOCH_MASK ((1 << EXT3_EPOCH_BITS) - 1) -+#define EXT3_NSEC_MASK (~0UL << EXT3_EPOCH_BITS) -+ -+#define EXT3_FITS_IN_INODE(ext3_inode, einode, field) \ -+ ((offsetof(typeof(*ext3_inode), field) + \ -+ sizeof((ext3_inode)->field)) \ -+ <= (EXT3_GOOD_OLD_INODE_SIZE + \ -+ (einode)->i_extra_isize)) \ -+ -+static inline __le32 ext3_encode_extra_time(struct timespec *time) -+{ -+ return cpu_to_le32((sizeof(time->tv_sec) > 4 ? -+ time->tv_sec >> 32 : 0) | -+ ((time->tv_nsec << 2) & EXT3_NSEC_MASK)); -+} -+ -+static inline void ext3_decode_extra_time(struct timespec *time, __le32 extra) { -+ if (sizeof(time->tv_sec) > 4) -+ time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT3_EPOCH_MASK) -+ << 32; -+ time->tv_nsec = (le32_to_cpu(extra) & EXT3_NSEC_MASK) >> 2; -+} -+ -+#define EXT3_INODE_SET_XTIME(xtime, inode, raw_inode) \ -+do { \ -+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra)) \ -+ (raw_inode)->xtime ## _extra = \ -+ ext3_encode_extra_time(&(inode)->xtime); \ -+} while (0) -+ -+#define EXT3_EINODE_SET_XTIME(xtime, einode, raw_inode)\ -+do { \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \ -+ (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ -+ (raw_inode)->xtime ## _extra = \ -+ ext3_encode_extra_time(&(einode)->xtime); \ -+} while (0) -+ -+#define EXT3_INODE_GET_XTIME(xtime, inode, raw_inode) \ -+do { \ -+ (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, EXT3_I(inode), xtime ## _extra)) \ -+ ext3_decode_extra_time(&(inode)->xtime, \ -+ raw_inode->xtime ## _extra); \ -+} while (0) -+ -+#define EXT3_EINODE_GET_XTIME(xtime, einode, raw_inode) \ -+do { \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime)) \ -+ (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \ -+ if (EXT3_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ -+ ext3_decode_extra_time(&(einode)->xtime, \ -+ raw_inode->xtime ## _extra); \ -+} while (0) -+ - #if defined(__KERNEL__) || defined(__linux__) - #define i_reserved1 osd1.linux1.l_i_reserved1 - #define i_frag osd2.linux2.l_i_frag -@@ -498,11 +562,19 @@ struct ext3_super_block { - __le32 s_last_orphan; /* start of list of inodes to delete */ - __le32 s_hash_seed[4]; /* HTREE hash seed */ - __u8 s_def_hash_version; /* Default hash version to use */ -- __u8 s_reserved_char_pad; -- __u16 s_reserved_word_pad; -+ __u8 s_jnl_backup_type; /* Default type of journal backup */ -+ __le16 s_desc_size; /* Group desc. size: INCOMPAT_64BIT */ - __le32 s_default_mount_opts; -- __le32 s_first_meta_bg; /* First metablock block group */ -- __u32 s_reserved[190]; /* Padding to the end of the block */ -+ __le32 s_first_meta_bg; /* First metablock block group */ -+ __le32 s_mkfs_time; /* When the filesystem was created */ -+ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ -+ __le32 s_blocks_count_hi; /* Blocks count high 32 bits */ -+ __le32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ -+ __le32 s_free_blocks_count_hi; /* Free blocks count high 32 bits */ -+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */ -+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ -+ __le32 s_flags; /* Miscellaneous flags */ -+ __u32 s_reserved[167]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -519,6 +584,13 @@ static inline struct ext3_inode_info *EX - return container_of(inode, struct ext3_inode_info, vfs_inode); - } - -+static inline struct timespec ext3_current_time(struct inode *inode) -+{ -+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? -+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -+} -+ -+ - static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino) - { - return ino == EXT3_ROOT_INO || -@@ -590,6 +662,8 @@ static inline int ext3_valid_inum(struct - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 -+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 -+ - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -606,6 +680,7 @@ static inline int ext3_valid_inum(struct - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ -+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* -Index: linux-2.6.18/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.18.orig/include/linux/ext3_fs_sb.h -+++ linux-2.6.18/include/linux/ext3_fs_sb.h -@@ -119,6 +119,8 @@ struct ext3_sb_info { - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; -+ -+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ - }; - - #define EXT3_GROUP_INFO(sb, group) \ -Index: linux-2.6.18/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.18.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.18/include/linux/ext3_fs_i.h -@@ -144,6 +144,7 @@ struct ext3_inode_info { - struct inode vfs_inode; - - __u32 i_cached_extent[4]; -+ struct timespec i_crtime; - - void *i_filterdata; - }; diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch deleted file mode 100644 index 56ea50a..0000000 --- a/lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch +++ /dev/null @@ -1,172 +0,0 @@ -Index: linux/fs/ext3/namei.c -=================================================================== ---- linux.orig/fs/ext3/namei.c -+++ linux/fs/ext3/namei.c -@@ -1549,11 +1549,17 @@ static int ext3_delete_entry (handle_t * - static inline void ext3_inc_count(handle_t *handle, struct inode *inode) - { - inode->i_nlink++; -+ if (is_dx(inode) && inode->i_nlink > 1) { -+ /* limit is 16-bit i_links_count */ -+ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) -+ inode->i_nlink = 1; -+ } - } - - static inline void ext3_dec_count(handle_t *handle, struct inode *inode) - { -- inode->i_nlink--; -+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) -+ inode->i_nlink--; - } - - static int ext3_add_nondir(handle_t *handle, -@@ -1654,7 +1660,7 @@ static int ext3_mkdir(struct inode * dir - struct ext3_dir_entry_2 * de; - int err; - -- if (dir->i_nlink >= EXT3_LINK_MAX) -+ if (EXT3_DIR_LINK_MAX(dir)) - return -EMLINK; - - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -@@ -1676,7 +1682,7 @@ static int ext3_mkdir(struct inode * dir - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -- inode->i_nlink--; /* is this nlink == 0? */ -+ ext3_dec_count(handle, inode); /* is this nlink == 0? */ - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; -@@ -1708,7 +1714,7 @@ static int ext3_mkdir(struct inode * dir - iput (inode); - goto out_stop; - } -- dir->i_nlink++; -+ ext3_inc_count(handle, dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); -@@ -1769,10 +1775,11 @@ static int empty_dir (struct inode * ino - } - de = (struct ext3_dir_entry_2 *) bh->b_data; - } -- if (!ext3_check_dir_entry ("empty_dir", inode, de, bh, -- offset)) { -- brelse (bh); -- return 1; -+ if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) { -+ /* On error skip the de and offset to the next block. */ -+ de = (void *)(bh->b_data + sb->s_blocksize); -+ offset = (offset | (sb->s_blocksize - 1)) + 1; -+ continue; - } - if (le32_to_cpu(de->inode)) { - brelse (bh); -@@ -1965,14 +1972,14 @@ static int ext3_rmdir (struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_rmdir; -- if (inode->i_nlink != 2) -- ext3_warning (inode->i_sb, "ext3_rmdir", -- "empty directory has nlink!=2 (%d)", -- inode->i_nlink); -+ if (!EXT3_DIR_LINK_EMPTY(inode)) -+ ext3_warning(inode->i_sb, __FUNCTION__, -+ "empty directory has too many links (%d)", -+ inode->i_nlink); - inode->i_version = ++event; - inode->i_nlink = 0; - ext3_orphan_add(handle, inode); -- dir->i_nlink--; -+ ext3_dec_count(handle, dir); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - ext3_update_dx_flag(dir); -@@ -2024,7 +2031,7 @@ static int ext3_unlink(struct inode * di - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); -- inode->i_nlink--; -+ ext3_dec_count(handle, inode); - if (!inode->i_nlink) - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; -@@ -2116,9 +2123,8 @@ static int ext3_link (struct dentry * ol - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) { -+ if (EXT3_DIR_LINK_MAX(inode)) - return -EMLINK; -- } - - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS); -@@ -2202,8 +2208,8 @@ static int ext3_rename (struct inode * o - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; -- if (!new_inode && new_dir!=old_dir && -- new_dir->i_nlink >= EXT3_LINK_MAX) -+ if (!new_inode && new_dir != old_dir && -+ EXT3_DIR_LINK_MAX(new_dir)) - goto end_rename; - } - if (!new_bh) { -@@ -2261,7 +2267,7 @@ static int ext3_rename (struct inode * o - } - - if (new_inode) { -- new_inode->i_nlink--; -+ ext3_dec_count(handle, new_inode); - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -@@ -2272,11 +2278,13 @@ static int ext3_rename (struct inode * o - PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_bh); -- old_dir->i_nlink--; -+ ext3_dec_count(handle, old_dir); - if (new_inode) { -- new_inode->i_nlink--; -+ /* checked empty_dir above, can't have another parent, -+ * ext3_dec_count() won't work for many-linked dirs */ -+ new_inode->i_nlink = 0; - } else { -- new_dir->i_nlink++; -+ ext3_inc_count(handle, new_dir); - ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } -Index: linux/include/linux/ext3_fs.h -=================================================================== ---- linux.orig/include/linux/ext3_fs.h -+++ linux/include/linux/ext3_fs.h -@@ -79,7 +79,7 @@ - /* - * Maximal count of links to a file - */ --#define EXT3_LINK_MAX 32000 -+#define EXT3_LINK_MAX 65000 - - /* - * Macro-instructions used to manage several block sizes -@@ -503,6 +503,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -516,6 +517,7 @@ struct ext3_super_block { - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch deleted file mode 100644 index bed994b..0000000 --- a/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch +++ /dev/null @@ -1,172 +0,0 @@ -Index: linux-2.4.21/fs/ext3/namei.c -=================================================================== ---- linux-2.4.21.orig/fs/ext3/namei.c -+++ linux-2.4.21/fs/ext3/namei.c -@@ -1550,11 +1550,17 @@ static int ext3_delete_entry (handle_t * - static inline void ext3_inc_count(handle_t *handle, struct inode *inode) - { - inode->i_nlink++; -+ if (is_dx(inode) && inode->i_nlink > 1) { -+ /* limit is 16-bit i_links_count */ -+ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) -+ inode->i_nlink = 1; -+ } - } - - static inline void ext3_dec_count(handle_t *handle, struct inode *inode) - { -- inode->i_nlink--; -+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) -+ inode->i_nlink--; - } - - static int ext3_add_nondir(handle_t *handle, -@@ -1658,7 +1664,7 @@ static int ext3_mkdir(struct inode * dir - struct ext3_dir_entry_2 * de; - int err; - -- if (dir->i_nlink >= EXT3_LINK_MAX) -+ if (EXT3_DIR_LINK_MAX(dir)) - return -EMLINK; - - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -@@ -1680,7 +1686,7 @@ static int ext3_mkdir(struct inode * dir - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -- inode->i_nlink--; /* is this nlink == 0? */ -+ ext3_dec_count(handle, inode); /* is this nlink == 0? */ - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; -@@ -1712,7 +1718,7 @@ static int ext3_mkdir(struct inode * dir - iput (inode); - goto out_stop; - } -- dir->i_nlink++; -+ ext3_inc_count(handle, dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); -@@ -1773,10 +1779,11 @@ static int empty_dir (struct inode * ino - } - de = (struct ext3_dir_entry_2 *) bh->b_data; - } -- if (!ext3_check_dir_entry ("empty_dir", inode, de, bh, -- offset)) { -- brelse (bh); -- return 1; -+ if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) { -+ /* On error skip the de and offset to the next block. */ -+ de = (void *)(bh->b_data + sb->s_blocksize); -+ offset = (offset | (sb->s_blocksize - 1)) + 1; -+ continue; - } - if (le32_to_cpu(de->inode)) { - brelse (bh); -@@ -1968,14 +1975,14 @@ static int ext3_rmdir (struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_rmdir; -- if (inode->i_nlink != 2) -- ext3_warning (inode->i_sb, "ext3_rmdir", -- "empty directory has nlink!=2 (%d)", -- inode->i_nlink); -+ if (!EXT3_DIR_LINK_EMPTY(inode)) -+ ext3_warning(inode->i_sb, __FUNCTION__, -+ "empty directory has too many links (%d)", -+ inode->i_nlink); - inode->i_version = ++event; - inode->i_nlink = 0; - ext3_orphan_add(handle, inode); -- dir->i_nlink--; -+ ext3_dec_count(handle, dir); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - ext3_update_dx_flag(dir); -@@ -2061,7 +2068,7 @@ static int ext3_unlink(struct inode * di - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); -- inode->i_nlink--; -+ ext3_dec_count(handle, inode); - if (!inode->i_nlink) { - ext3_try_to_delay_deletion(inode); - ext3_orphan_add(handle, inode); -@@ -2155,9 +2162,8 @@ static int ext3_link (struct dentry * ol - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) { -+ if (EXT3_DIR_LINK_MAX(inode)) - return -EMLINK; -- } - - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS); -@@ -2241,8 +2247,8 @@ static int ext3_rename (struct inode * o - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; -- if (!new_inode && new_dir!=old_dir && -- new_dir->i_nlink >= EXT3_LINK_MAX) -+ if (!new_inode && new_dir != old_dir && -+ EXT3_DIR_LINK_MAX(new_dir)) - goto end_rename; - } - if (!new_bh) { -@@ -2300,7 +2306,7 @@ static int ext3_rename (struct inode * o - } - - if (new_inode) { -- new_inode->i_nlink--; -+ ext3_dec_count(handle, new_inode); - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -@@ -2311,11 +2317,13 @@ static int ext3_rename (struct inode * o - PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_bh); -- old_dir->i_nlink--; -+ ext3_dec_count(handle, old_dir); - if (new_inode) { -- new_inode->i_nlink--; -+ /* checked empty_dir above, can't have another parent, -+ * ext3_dec_count() won't work for many-linked dirs */ -+ new_inode->i_nlink = 0; - } else { -- new_dir->i_nlink++; -+ ext3_inc_count(handle, new_dir); - ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } -Index: linux-2.4.21/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21.orig/include/linux/ext3_fs.h -+++ linux-2.4.21/include/linux/ext3_fs.h -@@ -81,7 +81,7 @@ - /* - * Maximal count of links to a file - */ --#define EXT3_LINK_MAX 32000 -+#define EXT3_LINK_MAX 65000 - - /* - * Macro-instructions used to manage several block sizes -@@ -505,6 +505,7 @@ static inline struct inode *orphan_list_ - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -518,6 +519,7 @@ static inline struct inode *orphan_list_ - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch deleted file mode 100644 index 0e47e28..0000000 --- a/lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch +++ /dev/null @@ -1,172 +0,0 @@ -Index: linux-2.4.24/fs/ext3/namei.c -=================================================================== ---- linux-2.4.24.orig/fs/ext3/namei.c -+++ linux-2.4.24/fs/ext3/namei.c -@@ -1549,11 +1549,17 @@ static int ext3_delete_entry (handle_t * - static inline void ext3_inc_count(handle_t *handle, struct inode *inode) - { - inode->i_nlink++; -+ if (is_dx(inode) && inode->i_nlink > 1) { -+ /* limit is 16-bit i_links_count */ -+ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) -+ inode->i_nlink = 1; -+ } - } - - static inline void ext3_dec_count(handle_t *handle, struct inode *inode) - { -- inode->i_nlink--; -+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) -+ inode->i_nlink--; - } - - static int ext3_add_nondir(handle_t *handle, -@@ -1654,7 +1660,7 @@ static int ext3_mkdir(struct inode * dir - struct ext3_dir_entry_2 * de; - int err; - -- if (dir->i_nlink >= EXT3_LINK_MAX) -+ if (EXT3_DIR_LINK_MAX(dir)) - return -EMLINK; - - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -@@ -1676,7 +1682,7 @@ static int ext3_mkdir(struct inode * dir - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -- inode->i_nlink--; /* is this nlink == 0? */ -+ ext3_dec_count(handle, inode); /* is this nlink == 0? */ - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; -@@ -1708,7 +1714,7 @@ static int ext3_mkdir(struct inode * dir - iput (inode); - goto out_stop; - } -- dir->i_nlink++; -+ ext3_inc_count(handle, dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); -@@ -1769,10 +1775,11 @@ static int empty_dir (struct inode * ino - } - de = (struct ext3_dir_entry_2 *) bh->b_data; - } -- if (!ext3_check_dir_entry ("empty_dir", inode, de, bh, -- offset)) { -- brelse (bh); -- return 1; -+ if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) { -+ /* On error skip the de and offset to the next block. */ -+ de = (void *)(bh->b_data + sb->s_blocksize); -+ offset = (offset | (sb->s_blocksize - 1)) + 1; -+ continue; - } - if (le32_to_cpu(de->inode)) { - brelse (bh); -@@ -1965,14 +1972,14 @@ static int ext3_rmdir (struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_rmdir; -- if (inode->i_nlink != 2) -- ext3_warning (inode->i_sb, "ext3_rmdir", -- "empty directory has nlink!=2 (%d)", -- inode->i_nlink); -+ if (!EXT3_DIR_LINK_EMPTY(inode)) -+ ext3_warning(inode->i_sb, __FUNCTION__, -+ "empty directory has too many links (%d)", -+ inode->i_nlink); - inode->i_version = ++event; - inode->i_nlink = 0; - ext3_orphan_add(handle, inode); -- dir->i_nlink--; -+ ext3_dec_count(handle, dir); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - ext3_update_dx_flag(dir); -@@ -2058,7 +2065,7 @@ static int ext3_unlink(struct inode * di - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); -- inode->i_nlink--; -+ ext3_dec_count(handle, inode); - if (!inode->i_nlink) { - ext3_try_to_delay_deletion(inode); - ext3_orphan_add(handle, inode); -@@ -2152,9 +2159,8 @@ static int ext3_link (struct dentry * ol - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) { -+ if (EXT3_DIR_LINK_MAX(inode)) - return -EMLINK; -- } - - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS); -@@ -2238,8 +2244,8 @@ static int ext3_rename (struct inode * o - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; -- if (!new_inode && new_dir!=old_dir && -- new_dir->i_nlink >= EXT3_LINK_MAX) -+ if (!new_inode && new_dir != old_dir && -+ EXT3_DIR_LINK_MAX(new_dir)) - goto end_rename; - } - if (!new_bh) { -@@ -2297,7 +2303,7 @@ static int ext3_rename (struct inode * o - } - - if (new_inode) { -- new_inode->i_nlink--; -+ ext3_dec_count(handle, new_inode); - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -@@ -2308,11 +2314,13 @@ static int ext3_rename (struct inode * o - PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_bh); -- old_dir->i_nlink--; -+ ext3_dec_count(handle, old_dir); - if (new_inode) { -- new_inode->i_nlink--; -+ /* checked empty_dir above, can't have another parent, -+ * ext3_dec_count() won't work for many-linked dirs */ -+ new_inode->i_nlink = 0; - } else { -- new_dir->i_nlink++; -+ ext3_inc_count(handle, new_dir); - ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } -Index: linux-2.4.24/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.24.orig/include/linux/ext3_fs.h -+++ linux-2.4.24/include/linux/ext3_fs.h -@@ -79,7 +79,7 @@ - /* - * Maximal count of links to a file - */ --#define EXT3_LINK_MAX 32000 -+#define EXT3_LINK_MAX 65000 - - /* - * Macro-instructions used to manage several block sizes -@@ -504,6 +504,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -517,6 +518,7 @@ struct ext3_super_block { - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch deleted file mode 100644 index 5054b0c..0000000 --- a/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch +++ /dev/null @@ -1,156 +0,0 @@ -Index: linux-2.6.5-7.283/fs/ext3/namei.c -=================================================================== ---- linux-2.6.5-7.283.orig/fs/ext3/namei.c -+++ linux-2.6.5-7.283/fs/ext3/namei.c -@@ -1613,11 +1613,17 @@ static int ext3_delete_entry (handle_t * - static inline void ext3_inc_count(handle_t *handle, struct inode *inode) - { - inode->i_nlink++; -+ if (is_dx(inode) && inode->i_nlink > 1) { -+ /* limit is 16-bit i_links_count */ -+ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) -+ inode->i_nlink = 1; -+ } - } - - static inline void ext3_dec_count(handle_t *handle, struct inode *inode) - { -- inode->i_nlink--; -+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) -+ inode->i_nlink--; - } - - static int ext3_add_nondir(handle_t *handle, -@@ -1730,7 +1736,7 @@ static int ext3_mkdir(struct inode * dir - int retries = 0; - int err; - -- if (dir->i_nlink >= EXT3_LINK_MAX) -+ if (EXT3_DIR_LINK_MAX(dir)) - return -EMLINK; - - retry: -@@ -1752,7 +1758,7 @@ retry: - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -- inode->i_nlink--; /* is this nlink == 0? */ -+ ext3_dec_count(handle, inode); /* is this nlink == 0? */ - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; -@@ -1784,7 +1790,7 @@ retry: - iput (inode); - goto out_stop; - } -- dir->i_nlink++; -+ ext3_inc_count(handle, dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); -@@ -2042,16 +2048,16 @@ static int ext3_rmdir (struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_rmdir; -- if (inode->i_nlink != 2) -- ext3_warning (inode->i_sb, "ext3_rmdir", -- "empty directory has nlink!=2 (%d)", -- inode->i_nlink); -+ if (!EXT3_DIR_LINK_EMPTY(inode)) -+ ext3_warning(inode->i_sb, "ext3_rmdir", -+ "empty directory has too many links (%d)", -+ inode->i_nlink); - inode->i_version++; - inode->i_nlink = 0; - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->i_nlink--; -+ ext3_dec_count(handle, dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - -@@ -2100,7 +2106,7 @@ static int ext3_unlink(struct inode * di - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); -- inode->i_nlink--; -+ ext3_dec_count(handle, inode); - if (!inode->i_nlink) - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; -@@ -2191,7 +2197,7 @@ static int ext3_link (struct dentry * ol - struct inode *inode = old_dentry->d_inode; - int err, retries = 0; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (EXT3_DIR_LINK_MAX(inode)) - return -EMLINK; - - retry: -@@ -2277,8 +2283,8 @@ static int ext3_rename (struct inode * o - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; -- if (!new_inode && new_dir!=old_dir && -- new_dir->i_nlink >= EXT3_LINK_MAX) -+ if (!new_inode && new_dir != old_dir && -+ EXT3_DIR_LINK_MAX(new_dir)) - goto end_rename; - } - if (!new_bh) { -@@ -2335,7 +2341,7 @@ static int ext3_rename (struct inode * o - } - - if (new_inode) { -- new_inode->i_nlink--; -+ ext3_dec_count(handle, new_inode); - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -@@ -2346,11 +2352,13 @@ static int ext3_rename (struct inode * o - PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_bh); -- old_dir->i_nlink--; -+ ext3_dec_count(handle, old_dir); - if (new_inode) { -- new_inode->i_nlink--; -+ /* checked empty_dir above, can't have another parent, -+ * ext3_dec_count() won't work for many-linked dirs */ -+ new_inode->i_nlink = 0; - } else { -- new_dir->i_nlink++; -+ ext3_inc_count(handle, new_dir); - ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } -Index: linux-2.6.5-7.283/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-7.283.orig/include/linux/ext3_fs.h -+++ linux-2.6.5-7.283/include/linux/ext3_fs.h -@@ -86,7 +86,7 @@ struct statfs; - /* - * Maximal count of links to a file - */ --#define EXT3_LINK_MAX 32000 -+#define EXT3_LINK_MAX 65000 - - /* - * Macro-instructions used to manage several block sizes -@@ -538,6 +538,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -553,6 +554,7 @@ static inline struct ext3_inode_info *EX - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.6.9.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.6.9.patch deleted file mode 100644 index d572c8f..0000000 --- a/lustre/kernel_patches/patches/ext3-nlinks-2.6.9.patch +++ /dev/null @@ -1,158 +0,0 @@ -Index: linux-2.6.12/fs/ext3/namei.c -=================================================================== ---- linux-2.6.12.orig/fs/ext3/namei.c -+++ linux-2.6.12/fs/ext3/namei.c -@@ -1600,11 +1600,17 @@ static int ext3_delete_entry (handle_t * - static inline void ext3_inc_count(handle_t *handle, struct inode *inode) - { - inode->i_nlink++; -+ if (is_dx(inode) && inode->i_nlink > 1) { -+ /* limit is 16-bit i_links_count */ -+ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) -+ inode->i_nlink = 1; -+ } - } - - static inline void ext3_dec_count(handle_t *handle, struct inode *inode) - { -- inode->i_nlink--; -+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) -+ inode->i_nlink--; - } - - static int ext3_add_nondir(handle_t *handle, -@@ -1703,7 +1709,7 @@ static int ext3_mkdir(struct inode * dir - struct ext3_dir_entry_2 * de; - int err, retries = 0; - -- if (dir->i_nlink >= EXT3_LINK_MAX) -+ if (EXT3_DIR_LINK_MAX(dir)) - return -EMLINK; - - retry: -@@ -1726,7 +1732,7 @@ retry: - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -- inode->i_nlink--; /* is this nlink == 0? */ -+ ext3_dec_count(handle, inode); /* is this nlink == 0? */ - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; -@@ -1758,7 +1764,7 @@ retry: - iput (inode); - goto out_stop; - } -- dir->i_nlink++; -+ ext3_inc_count(handle, dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); -@@ -2023,10 +2029,10 @@ static int ext3_rmdir (struct inode * di - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_rmdir; -- if (inode->i_nlink != 2) -- ext3_warning (inode->i_sb, "ext3_rmdir", -- "empty directory has nlink!=2 (%d)", -- inode->i_nlink); -+ if (!EXT3_DIR_LINK_EMPTY(inode)) -+ ext3_warning(inode->i_sb, "ext3_rmdir", -+ "empty directory has too many links (%d)", -+ inode->i_nlink); - inode->i_version++; - inode->i_nlink = 0; - /* There's no need to set i_disksize: the fact that i_nlink is -@@ -2036,7 +2042,7 @@ static int ext3_rmdir (struct inode * di - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, inode); -- dir->i_nlink--; -+ ext3_dec_count(handle, dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - -@@ -2087,7 +2093,7 @@ static int ext3_unlink(struct inode * di - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); -- inode->i_nlink--; -+ ext3_dec_count(handle, inode); - if (!inode->i_nlink) - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; -@@ -2162,7 +2168,7 @@ static int ext3_link (struct dentry * ol - struct inode *inode = old_dentry->d_inode; - int err, retries = 0; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (EXT3_DIR_LINK_MAX(inode)) - return -EMLINK; - - retry: -@@ -2249,8 +2255,8 @@ static int ext3_rename (struct inode * o - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; -- if (!new_inode && new_dir!=old_dir && -- new_dir->i_nlink >= EXT3_LINK_MAX) -+ if (!new_inode && new_dir != old_dir && -+ EXT3_DIR_LINK_MAX(new_dir)) - goto end_rename; - } - if (!new_bh) { -@@ -2307,7 +2313,7 @@ static int ext3_rename (struct inode * o - } - - if (new_inode) { -- new_inode->i_nlink--; -+ ext3_dec_count(handle, new_inode); - new_inode->i_ctime = CURRENT_TIME_SEC; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; -@@ -2318,11 +2324,13 @@ static int ext3_rename (struct inode * o - PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_bh); -- old_dir->i_nlink--; -+ ext3_dec_count(handle, old_dir); - if (new_inode) { -- new_inode->i_nlink--; -+ /* checked empty_dir above, can't have another parent, -+ * ext3_dec_count() won't work for many-linked dirs */ -+ new_inode->i_nlink = 0; - } else { -- new_dir->i_nlink++; -+ ext3_inc_count(handle, new_dir); - ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } -Index: linux-2.6.12/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.12.orig/include/linux/ext3_fs.h -+++ linux-2.6.12/include/linux/ext3_fs.h -@@ -78,7 +78,7 @@ struct statfs; - /* - * Maximal count of links to a file - */ --#define EXT3_LINK_MAX 32000 -+#define EXT3_LINK_MAX 65000 - - /* - * Macro-instructions used to manage several block sizes -@@ -539,6 +539,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 - #define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -@@ -552,6 +553,7 @@ static inline struct ext3_inode_info *EX - EXT3_FEATURE_INCOMPAT_META_BG) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - - /* diff --git a/lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch b/lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch deleted file mode 100644 index 37a5d7a..0000000 --- a/lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch +++ /dev/null @@ -1,15 +0,0 @@ - fs/ext3/super.c | 1 - - 1 files changed, 1 deletion(-) - ---- linux-2.4.18-chaos/fs/ext3/super.c~ext3-no-write-super-chaos 2003-08-24 21:34:53.000000000 +0400 -+++ linux-2.4.18-chaos-alexey/fs/ext3/super.c 2003-08-24 21:40:47.000000000 +0400 -@@ -1818,7 +1818,6 @@ void ext3_write_super (struct super_bloc - if (down_trylock(&sb->s_lock) == 0) - BUG(); - sb->s_dirt = 0; -- log_start_commit(EXT3_SB(sb)->s_journal, NULL); - } - - static int ext3_sync_fs(struct super_block *sb) - -_ diff --git a/lustre/kernel_patches/patches/ext3-noread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-noread-2.4.20.patch deleted file mode 100644 index f8270b2..0000000 --- a/lustre/kernel_patches/patches/ext3-noread-2.4.20.patch +++ /dev/null @@ -1,218 +0,0 @@ - fs/ext3/ialloc.c | 47 ++++++++++++++++++++++- - fs/ext3/inode.c | 96 +++++++++++++++++++++++++++++++++++++----------- - include/linux/ext3_fs.h | 2 + - 3 files changed, 121 insertions(+), 24 deletions(-) - ---- linux-2.4.20/fs/ext3/ialloc.c~ext3-noread-2.4.20 2003-05-16 12:21:39.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/ialloc.c 2003-05-16 12:21:46.000000000 +0800 -@@ -289,6 +289,37 @@ error_return: - } - - /* -+ * @block_group: block group of inode -+ * @offset: relative offset of inode within @block_group -+ * -+ * Check whether any of the inodes in this disk block are in use. -+ * -+ * Caller must be holding superblock lock (group/bitmap read lock in future). -+ */ -+int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, -+ int offset) -+{ -+ int bitmap_nr = load_inode_bitmap(sb, block_group); -+ int inodes_per_block; -+ unsigned long inum, iend; -+ struct buffer_head *ibitmap; -+ -+ if (bitmap_nr < 0) -+ return 1; -+ -+ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; -+ inum = offset & ~(inodes_per_block - 1); -+ iend = inum + inodes_per_block; -+ ibitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ for (; inum < iend; inum++) { -+ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of -@@ -310,6 +341,7 @@ struct inode * ext3_new_inode (handle_t - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -+ struct ext3_iloc iloc; - int err = 0; - - /* Cannot create files in a deleted directory */ -@@ -510,8 +542,19 @@ repeat: - inode->i_generation = sb->u.ext3_sb.s_next_generation++; - - inode->u.ext3_i.i_state = EXT3_STATE_NEW; -- err = ext3_mark_inode_dirty(handle, inode); -- if (err) goto fail; -+ err = ext3_get_inode_loc_new(inode, &iloc, 1); -+ if (err) goto fail; -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, iloc.bh); -+ if (err) { -+ brelse(iloc.bh); -+ iloc.bh = NULL; -+ goto fail; -+ } -+ err = ext3_mark_iloc_dirty(handle, inode, &iloc); -+ if (err) goto fail; -+ -+ - - unlock_super (sb); - if(DQUOT_ALLOC_INODE(inode)) { ---- linux-2.4.20/fs/ext3/inode.c~ext3-noread-2.4.20 2003-05-16 12:21:41.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/inode.c 2003-05-16 12:22:15.000000000 +0800 -@@ -2013,14 +2013,19 @@ out_stop: - ext3_journal_stop(handle, inode); - } - --/* -- * ext3_get_inode_loc returns with an extra refcount against the -- * inode's underlying buffer_head on success. -- */ -- --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+#define NUM_INODE_PREREAD 16 -+ -+/* -+ * ext3_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If this is for a new inode allocation -+ * (new is non-zero) then we may be able to optimize away the read if there -+ * are no other in-use inodes in this inode table block. If we need to do -+ * a read, then read in a whole chunk of blocks to avoid blocking again soon -+ * if we are doing lots of creates/updates. -+ */ -+int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) - { -- struct buffer_head *bh = 0; -+ struct buffer_head *bh[NUM_INODE_PREREAD]; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; -@@ -2045,31 +2050,73 @@ int ext3_get_inode_loc (struct inode *in - } - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); - desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; -- if (!bh) { -+ if (!(inode->i_sb->u.ext3_sb.s_group_desc[group_desc])) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "Descriptor not loaded"); - goto bad_inode; - } - -- gdp = (struct ext3_group_desc *) bh->b_data; -+ gdp = (struct ext3_group_desc *)(inode->i_sb->u.ext3_sb.s_group_desc[group_desc]->b_data); - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)); -+ - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "unable to read inode block - " -- "inode=%lu, block=%lu", inode->i_ino, block); -- goto bad_inode; -- } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ (offset * EXT3_INODE_SIZE(inode->i_sb) >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); - -- iloc->bh = bh; -- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -+ bh[0] = sb_getblk(inode->i_sb, block); -+ if (buffer_uptodate(bh[0])) -+ goto done; -+ -+ /* If we don't really need to read this block, and it isn't already -+ * in memory, then we just zero it out. Otherwise, we keep the -+ * current block contents (deleted inode data) for posterity. -+ */ -+ if (new && !ext3_itable_block_used(inode->i_sb, block_group, offset)) { -+ lock_buffer(bh[0]); -+ memset(bh[0]->b_data, 0, bh[0]->b_size); -+ mark_buffer_uptodate(bh[0], 1); -+ unlock_buffer(bh[0]); -+ } else { -+ unsigned long block_end, itable_end; -+ int count = 1; -+ -+ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + -+ inode->i_sb->u.ext3_sb.s_itb_per_group; -+ block_end = block + NUM_INODE_PREREAD; -+ if (block_end > itable_end) -+ block_end = itable_end; -+ -+ for (++block; block < block_end; block++) { -+ bh[count] = sb_getblk(inode->i_sb, block); -+ if (count && (buffer_uptodate(bh[count]) || -+ buffer_locked(bh[count]))) { -+ __brelse(bh[count]); -+ } else -+ count++; -+ } -+ -+ ll_rw_block(READ, count, bh); -+ -+ /* Release all but the block we actually need (bh[0]) */ -+ while (--count > 0) -+ __brelse(bh[count]); -+ -+ wait_on_buffer(bh[0]); -+ if (!buffer_uptodate(bh[0])) { -+ ext3_error(inode->i_sb, __FUNCTION__, -+ "unable to read inode block - " -+ "inode=%lu, block=%lu", inode->i_ino, -+ bh[0]->b_blocknr); -+ goto bad_inode; -+ } -+ } -+done: -+ offset = (offset * EXT3_INODE_SIZE(inode->i_sb)) & (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ -+ iloc->bh = bh[0]; -+ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); - iloc->block_group = block_group; - - return 0; -@@ -2078,6 +2125,11 @@ int ext3_get_inode_loc (struct inode *in - return -EIO; - } - -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -+{ -+ return ext3_get_inode_loc_new(inode, iloc, 0); -+} -+ - void ext3_read_inode(struct inode * inode) - { - struct ext3_iloc iloc; ---- linux-2.4.20/include/linux/ext3_fs.h~ext3-noread-2.4.20 2003-05-16 12:21:39.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext3_fs.h 2003-05-16 12:21:46.000000000 +0800 -@@ -683,6 +683,8 @@ extern int ext3_forget(handle_t *, int, - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -+extern int ext3_itable_block_used(struct super_block *sb, unsigned int, int); -+extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); - extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); - -_ diff --git a/lustre/kernel_patches/patches/ext3-noread-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-noread-2.4.21-chaos.patch deleted file mode 100644 index 1903baa..0000000 --- a/lustre/kernel_patches/patches/ext3-noread-2.4.21-chaos.patch +++ /dev/null @@ -1,223 +0,0 @@ - fs/ext3/ialloc.c | 47 ++++++++++++++++++++++- - fs/ext3/inode.c | 96 +++++++++++++++++++++++++++++++++++++----------- - include/linux/ext3_fs.h | 2 + - 3 files changed, 121 insertions(+), 24 deletions(-) - -Index: linux-2.4.21-chaos/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c 2003-12-12 12:56:39.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/ialloc.c 2003-12-12 13:21:50.000000000 +0300 -@@ -290,6 +290,37 @@ - } - - /* -+ * @block_group: block group of inode -+ * @offset: relative offset of inode within @block_group -+ * -+ * Check whether any of the inodes in this disk block are in use. -+ * -+ * Caller must be holding superblock lock (group/bitmap read lock in future). -+ */ -+int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, -+ int offset) -+{ -+ int bitmap_nr = load_inode_bitmap(sb, block_group); -+ int inodes_per_block; -+ unsigned long inum, iend; -+ struct buffer_head *ibitmap; -+ -+ if (bitmap_nr < 0) -+ return 1; -+ -+ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; -+ inum = offset & ~(inodes_per_block - 1); -+ iend = inum + inodes_per_block; -+ ibitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ for (; inum < iend; inum++) { -+ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of -@@ -312,6 +343,7 @@ - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -+ struct ext3_iloc iloc; - int err = 0; - - /* Cannot create files in a deleted directory */ -@@ -513,8 +545,19 @@ - inode->i_generation = sbi->s_next_generation++; - - ei->i_state = EXT3_STATE_NEW; -- err = ext3_mark_inode_dirty(handle, inode); -- if (err) goto fail; -+ err = ext3_get_inode_loc_new(inode, &iloc, 1); -+ if (err) goto fail; -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, iloc.bh); -+ if (err) { -+ brelse(iloc.bh); -+ iloc.bh = NULL; -+ goto fail; -+ } -+ err = ext3_mark_iloc_dirty(handle, inode, &iloc); -+ if (err) goto fail; -+ -+ - - #ifdef CONFIG_EXT3_FS_XATTR - init_rwsem(&EXT3_I(inode)->xattr_sem); -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2003-12-12 13:01:48.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2003-12-12 13:22:45.000000000 +0300 -@@ -2291,16 +2291,21 @@ - } - #endif /* EXT3_DELETE_THREAD */ - --/* -- * ext3_get_inode_loc returns with an extra refcount against the -- * inode's underlying buffer_head on success. -- */ -- --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+#define NUM_INODE_PREREAD 16 -+ -+/* -+ * ext3_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If this is for a new inode allocation -+ * (new is non-zero) then we may be able to optimize away the read if there -+ * are no other in-use inodes in this inode table block. If we need to do -+ * a read, then read in a whole chunk of blocks to avoid blocking again soon -+ * if we are doing lots of creates/updates. -+ */ -+int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) - { - struct super_block *sb = inode->i_sb; - struct ext3_sb_info *sbi = EXT3_SB(sb); -- struct buffer_head *bh = 0; -+ struct buffer_head *bh[NUM_INODE_PREREAD]; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; -@@ -2322,30 +2327,72 @@ - } - group_desc = block_group >> sbi->s_desc_per_block_bits; - desc = block_group & (sbi->s_desc_per_block - 1); -- bh = sbi->s_group_desc[group_desc]; -- if (!bh) { -+ if (!(sbi->s_group_desc[group_desc])) { - ext3_error(sb, __FUNCTION__, "Descriptor not loaded"); - goto bad_inode; - } - -- gdp = (struct ext3_group_desc *) bh->b_data; -+ gdp = (struct ext3_group_desc *)(sbi->s_group_desc[group_desc]->b_data); - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) * -- sbi->s_inode_size; -+ offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)); -+ - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(sb)); -- if (!(bh = sb_bread(sb, block))) { -- ext3_error (sb, __FUNCTION__, -- "unable to read inode block - " -- "inode=%lu, block=%lu", inode->i_ino, block); -- goto bad_inode; -- } -- offset &= (EXT3_BLOCK_SIZE(sb) - 1); -+ (offset * EXT3_INODE_SIZE(sb) >> EXT3_BLOCK_SIZE_BITS(sb)); - -- iloc->bh = bh; -- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -+ bh[0] = sb_getblk(inode->i_sb, block); -+ if (buffer_uptodate(bh[0])) -+ goto done; -+ -+ /* If we don't really need to read this block, and it isn't already -+ * in memory, then we just zero it out. Otherwise, we keep the -+ * current block contents (deleted inode data) for posterity. -+ */ -+ if (new && !ext3_itable_block_used(inode->i_sb, block_group, offset)) { -+ lock_buffer(bh[0]); -+ memset(bh[0]->b_data, 0, bh[0]->b_size); -+ mark_buffer_uptodate(bh[0], 1); -+ unlock_buffer(bh[0]); -+ } else { -+ unsigned long block_end, itable_end; -+ int count = 1; -+ -+ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + -+ inode->i_sb->u.ext3_sb.s_itb_per_group; -+ block_end = block + NUM_INODE_PREREAD; -+ if (block_end > itable_end) -+ block_end = itable_end; -+ -+ for (++block; block < block_end; block++) { -+ bh[count] = sb_getblk(inode->i_sb, block); -+ if (count && (buffer_uptodate(bh[count]) || -+ buffer_locked(bh[count]))) { -+ __brelse(bh[count]); -+ } else -+ count++; -+ } -+ -+ ll_rw_block(READ, count, bh); -+ -+ /* Release all but the block we actually need (bh[0]) */ -+ while (--count > 0) -+ __brelse(bh[count]); -+ -+ wait_on_buffer(bh[0]); -+ if (!buffer_uptodate(bh[0])) { -+ ext3_error(inode->i_sb, __FUNCTION__, -+ "unable to read inode block - " -+ "inode=%lu, block=%lu", inode->i_ino, -+ bh[0]->b_blocknr); -+ goto bad_inode; -+ } -+ } -+ done: -+ offset = (offset * EXT3_INODE_SIZE(inode->i_sb)) & (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ -+ iloc->bh = bh[0]; -+ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); - iloc->block_group = block_group; - - return 0; -@@ -2370,6 +2417,11 @@ - } - - -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -+{ -+ return ext3_get_inode_loc_new(inode, iloc, 0); -+} -+ - void ext3_read_inode(struct inode * inode) - { - struct ext3_iloc iloc; -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2003-12-12 13:01:48.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2003-12-12 13:21:50.000000000 +0300 -@@ -683,6 +683,8 @@ - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -+extern int ext3_itable_block_used(struct super_block *sb, unsigned int, int); -+extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); - extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); diff --git a/lustre/kernel_patches/patches/ext3-noread-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-noread-2.4.21-suse2.patch deleted file mode 100644 index 66274d8..0000000 --- a/lustre/kernel_patches/patches/ext3-noread-2.4.21-suse2.patch +++ /dev/null @@ -1,218 +0,0 @@ - fs/ext3/ialloc.c | 47 ++++++++++++++++++++++- - fs/ext3/inode.c | 96 +++++++++++++++++++++++++++++++++++++----------- - include/linux/ext3_fs.h | 2 + - 3 files changed, 121 insertions(+), 24 deletions(-) - ---- linux-2.4.20/fs/ext3/ialloc.c~ext3-noread-2.4.20 2003-05-16 12:21:39.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/ialloc.c 2003-05-16 12:21:46.000000000 +0800 -@@ -289,6 +289,37 @@ error_return: - } - - /* -+ * @block_group: block group of inode -+ * @offset: relative offset of inode within @block_group -+ * -+ * Check whether any of the inodes in this disk block are in use. -+ * -+ * Caller must be holding superblock lock (group/bitmap read lock in future). -+ */ -+int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, -+ int offset) -+{ -+ int bitmap_nr = load_inode_bitmap(sb, block_group); -+ int inodes_per_block; -+ unsigned long inum, iend; -+ struct buffer_head *ibitmap; -+ -+ if (bitmap_nr < 0) -+ return 1; -+ -+ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; -+ inum = offset & ~(inodes_per_block - 1); -+ iend = inum + inodes_per_block; -+ ibitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ for (; inum < iend; inum++) { -+ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of -@@ -310,6 +341,7 @@ struct inode * ext3_new_inode (handle_t - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -+ struct ext3_iloc iloc; - int err = 0; - - /* Cannot create files in a deleted directory */ -@@ -510,8 +542,19 @@ repeat: - inode->i_generation = sb->u.ext3_sb.s_next_generation++; - - inode->u.ext3_i.i_state = EXT3_STATE_NEW; -- err = ext3_mark_inode_dirty(handle, inode); -- if (err) goto fail; -+ err = ext3_get_inode_loc_new(inode, &iloc, 1); -+ if (err) goto fail; -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, iloc.bh); -+ if (err) { -+ brelse(iloc.bh); -+ iloc.bh = NULL; -+ goto fail; -+ } -+ err = ext3_mark_iloc_dirty(handle, inode, &iloc); -+ if (err) goto fail; -+ -+ - - #ifdef CONFIG_EXT3_FS_XATTR - init_rwsem(&inode->u.ext3_i.xattr_sem); ---- linux-2.4.20/fs/ext3/inode.c~ext3-noread-2.4.20 2003-05-16 12:21:41.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/inode.c 2003-05-16 12:22:15.000000000 +0800 -@@ -2013,14 +2013,19 @@ out_stop: - ext3_journal_stop(handle, inode); - } - --/* -- * ext3_get_inode_loc returns with an extra refcount against the -- * inode's underlying buffer_head on success. -- */ -- --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+#define NUM_INODE_PREREAD 16 -+ -+/* -+ * ext3_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If this is for a new inode allocation -+ * (new is non-zero) then we may be able to optimize away the read if there -+ * are no other in-use inodes in this inode table block. If we need to do -+ * a read, then read in a whole chunk of blocks to avoid blocking again soon -+ * if we are doing lots of creates/updates. -+ */ -+int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) - { -- struct buffer_head *bh = 0; -+ struct buffer_head *bh[NUM_INODE_PREREAD]; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; -@@ -2045,31 +2050,73 @@ int ext3_get_inode_loc (struct inode *in - } - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); - desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; -- if (!bh) { -+ if (!(inode->i_sb->u.ext3_sb.s_group_desc[group_desc])) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "Descriptor not loaded"); - goto bad_inode; - } - -- gdp = (struct ext3_group_desc *) bh->b_data; -+ gdp = (struct ext3_group_desc *)(inode->i_sb->u.ext3_sb.s_group_desc[group_desc]->b_data); - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)); -+ - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "unable to read inode block - " -- "inode=%lu, block=%lu", inode->i_ino, block); -- goto bad_inode; -- } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ (offset * EXT3_INODE_SIZE(inode->i_sb) >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); - -- iloc->bh = bh; -- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -+ bh[0] = sb_getblk(inode->i_sb, block); -+ if (buffer_uptodate(bh[0])) -+ goto done; -+ -+ /* If we don't really need to read this block, and it isn't already -+ * in memory, then we just zero it out. Otherwise, we keep the -+ * current block contents (deleted inode data) for posterity. -+ */ -+ if (new && !ext3_itable_block_used(inode->i_sb, block_group, offset)) { -+ lock_buffer(bh[0]); -+ memset(bh[0]->b_data, 0, bh[0]->b_size); -+ mark_buffer_uptodate(bh[0], 1); -+ unlock_buffer(bh[0]); -+ } else { -+ unsigned long block_end, itable_end; -+ int count = 1; -+ -+ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + -+ inode->i_sb->u.ext3_sb.s_itb_per_group; -+ block_end = block + NUM_INODE_PREREAD; -+ if (block_end > itable_end) -+ block_end = itable_end; -+ -+ for (++block; block < block_end; block++) { -+ bh[count] = sb_getblk(inode->i_sb, block); -+ if (count && (buffer_uptodate(bh[count]) || -+ buffer_locked(bh[count]))) { -+ __brelse(bh[count]); -+ } else -+ count++; -+ } -+ -+ ll_rw_block(READ, count, bh); -+ -+ /* Release all but the block we actually need (bh[0]) */ -+ while (--count > 0) -+ __brelse(bh[count]); -+ -+ wait_on_buffer(bh[0]); -+ if (!buffer_uptodate(bh[0])) { -+ ext3_error(inode->i_sb, __FUNCTION__, -+ "unable to read inode block - " -+ "inode=%lu, block=%lu", inode->i_ino, -+ bh[0]->b_blocknr); -+ goto bad_inode; -+ } -+ } -+ done: -+ offset = (offset * EXT3_INODE_SIZE(inode->i_sb)) & (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ -+ iloc->bh = bh[0]; -+ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); - iloc->block_group = block_group; - - return 0; -@@ -2078,6 +2125,11 @@ int ext3_get_inode_loc (struct inode *in - return -EIO; - } - -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -+{ -+ return ext3_get_inode_loc_new(inode, iloc, 0); -+} -+ - void ext3_read_inode(struct inode * inode) - { - struct ext3_iloc iloc; ---- linux-2.4.20/include/linux/ext3_fs.h~ext3-noread-2.4.20 2003-05-16 12:21:39.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext3_fs.h 2003-05-16 12:21:46.000000000 +0800 -@@ -683,6 +683,8 @@ extern int ext3_forget(handle_t *, int, - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -+extern int ext3_itable_block_used(struct super_block *sb, unsigned int, int); -+extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); - extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); - -_ diff --git a/lustre/kernel_patches/patches/ext3-o_direct-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-o_direct-2.4.21-chaos.patch deleted file mode 100644 index 6f7bf17..0000000 --- a/lustre/kernel_patches/patches/ext3-o_direct-2.4.21-chaos.patch +++ /dev/null @@ -1,23 +0,0 @@ - -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2003-12-12 16:19:13.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2003-12-12 16:19:16.000000000 +0300 -@@ -3154,7 +3154,7 @@ - /* alloc blocks one by one */ - for (i = 0; i < nblocks; i++) { - ret = ext3_get_block_handle(handle, inode, blocks[i], -- &bh_tmp, 1); -+ &bh_tmp, 1, 1); - if (ret) - break; - -@@ -3229,7 +3229,7 @@ - if (blocks[i] != 0) - continue; - -- rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1); -+ rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1); - if (rc) { - printk(KERN_INFO "ext3_map_inode_page: error %d " - "allocating block %ld\n", rc, iblock); diff --git a/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.22-rh.patch b/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.22-rh.patch deleted file mode 100644 index f2d4889..0000000 --- a/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.22-rh.patch +++ /dev/null @@ -1,82 +0,0 @@ - fs/ext3/namei.c | 15 +++++++-------- - fs/ext3/namei.c.orig | 21 +++++++++++++++------ - fs/ext3/super.c | 1 + - include/linux/ext3_fs_sb.h | 1 + - include/linux/ext3_fs_sb.h.orig | 2 ++ - 5 files changed, 26 insertions(+), 14 deletions(-) - ---- linux-2.4.22-ac1/fs/ext3/namei.c~ext3-orphan_lock-2.4.22-rh 2003-09-26 00:24:09.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/namei.c 2003-09-26 00:26:36.000000000 +0400 -@@ -1748,8 +1748,8 @@ int ext3_orphan_add(handle_t *handle, st - struct super_block *sb = inode->i_sb; - struct ext3_iloc iloc; - int err = 0, rc; -- -- lock_super(sb); -+ -+ down(&EXT3_SB(sb)->s_orphan_lock); - if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - -@@ -1797,7 +1797,7 @@ int ext3_orphan_add(handle_t *handle, st - jbd_debug(4, "orphan inode %ld will point to %d\n", - inode->i_ino, NEXT_ORPHAN(inode)); - out_unlock: -- unlock_super(sb); -+ up(&EXT3_SB(sb)->s_orphan_lock); - ext3_std_error(inode->i_sb, err); - return err; - } -@@ -1810,20 +1810,19 @@ int ext3_orphan_del(handle_t *handle, st - { - struct list_head *prev; - struct ext3_inode_info *ei = EXT3_I(inode); -- struct ext3_sb_info *sbi; -+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - -- lock_super(inode->i_sb); -+ down(&sbi->s_orphan_lock); - if (list_empty(&ei->i_orphan)) { -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; -- sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -@@ -1872,7 +1871,7 @@ int ext3_orphan_del(handle_t *handle, st - out_err: - ext3_std_error(inode->i_sb, err); - out: -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return err; - - out_brelse: ---- linux-2.4.22-ac1/fs/ext3/super.c~ext3-orphan_lock-2.4.22-rh 2003-09-26 00:24:09.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/fs/ext3/super.c 2003-09-26 00:25:22.000000000 +0400 -@@ -1164,6 +1164,7 @@ struct super_block * ext3_read_super (st - sb->s_op = &ext3_sops; - sb->dq_op = &ext3_qops; - INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ -+ sema_init(&sbi->s_orphan_lock, 1); - - sb->s_root = 0; - ---- linux-2.4.22-ac1/include/linux/ext3_fs_sb.h~ext3-orphan_lock-2.4.22-rh 2003-09-26 00:24:08.000000000 +0400 -+++ linux-2.4.22-ac1-alexey/include/linux/ext3_fs_sb.h 2003-09-26 00:25:22.000000000 +0400 -@@ -72,6 +72,7 @@ struct ext3_sb_info { - struct inode * s_journal_inode; - struct journal_s * s_journal; - struct list_head s_orphan; -+ struct semaphore s_orphan_lock; - unsigned long s_commit_interval; - struct block_device *journal_bdev; - #ifdef CONFIG_JBD_DEBUG diff --git a/lustre/kernel_patches/patches/ext3-orphan_lock.patch b/lustre/kernel_patches/patches/ext3-orphan_lock.patch deleted file mode 100644 index d1e5c8d..0000000 --- a/lustre/kernel_patches/patches/ext3-orphan_lock.patch +++ /dev/null @@ -1,79 +0,0 @@ ---- linux/fs/ext3/namei.c.orig Fri Mar 14 14:11:58 2003 -+++ linux/fs/ext3/namei.c Fri Mar 14 14:39:48 2003 -@@ -1406,8 +1409,8 @@ - struct super_block *sb = inode->i_sb; - struct ext3_iloc iloc; - int err = 0, rc; -- -- lock_super(sb); -+ -+ down(&EXT3_SB(sb)->s_orphan_lock); - if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - -@@ -1455,7 +1458,7 @@ - jbd_debug(4, "orphan inode %ld will point to %d\n", - inode->i_ino, NEXT_ORPHAN(inode)); - out_unlock: -- unlock_super(sb); -+ up(&EXT3_SB(sb)->s_orphan_lock); - ext3_std_error(inode->i_sb, err); - return err; - } -@@ -1468,20 +1471,19 @@ - { - struct list_head *prev; - struct ext3_inode_info *ei = EXT3_I(inode); -- struct ext3_sb_info *sbi; -+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - -- lock_super(inode->i_sb); -+ down(&sbi->s_orphan_lock); - if (list_empty(&ei->i_orphan)) { -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; -- sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -@@ -1525,10 +1527,10 @@ - if (err) - goto out_brelse; - --out_err: -+out_err: - ext3_std_error(inode->i_sb, err); - out: -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return err; - - out_brelse: ---- linux/fs/ext3/super.c.orig Fri Mar 14 14:11:58 2003 -+++ linux/fs/ext3/super.c Fri Mar 14 14:36:00 2003 -@@ -1134,6 +1314,7 @@ - */ - sb->s_op = &ext3_sops; - INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ -+ sema_init(&sbi->s_orphan_lock, 1); - - sb->s_root = 0; - ---- linux/include/linux/ext3_fs_sb.h.orig Tue Feb 11 16:34:33 2003 -+++ linux/include/linux/ext3_fs_sb.h Fri Mar 14 14:30:11 2003 -@@ -67,6 +69,7 @@ - struct inode * s_journal_inode; - struct journal_s * s_journal; - struct list_head s_orphan; -+ struct semaphore s_orphan_lock; - unsigned long s_commit_interval; - struct block_device *journal_bdev; - #ifdef CONFIG_JBD_DEBUG diff --git a/lustre/kernel_patches/patches/ext3-raw-lookup.patch b/lustre/kernel_patches/patches/ext3-raw-lookup.patch deleted file mode 100644 index 216accc..0000000 --- a/lustre/kernel_patches/patches/ext3-raw-lookup.patch +++ /dev/null @@ -1,61 +0,0 @@ - fs/ext3/namei.c | 29 +++++++++++++++++++++++++++++ - include/linux/fs.h | 1 + - 2 files changed, 30 insertions(+) - ---- linux-2.4.20-vanilla/include/linux/fs.h~ext3-raw-lookup 2003-09-13 17:03:05.000000000 +0400 -+++ linux-2.4.20-vanilla-alexey/include/linux/fs.h 2003-09-15 10:16:38.000000000 +0400 -@@ -865,6 +865,7 @@ struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); - int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ int (*lookup_raw) (struct inode *, const char *, int, ino_t *); - struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); - int (*link_raw) (struct nameidata *,struct nameidata *); ---- linux-2.4.20-vanilla/fs/ext3/namei.c~ext3-raw-lookup 2003-09-13 17:03:05.000000000 +0400 -+++ linux-2.4.20-vanilla-alexey/fs/ext3/namei.c 2003-09-15 10:18:52.000000000 +0400 -@@ -957,6 +957,34 @@ static struct dentry *ext3_lookup(struct - return NULL; - } - -+static int ext3_lookup_raw(struct inode *dir, const char *name, -+ int len, ino_t *data) -+{ -+ struct ext3_dir_entry_2 *de; -+ struct buffer_head *bh; -+ struct dentry parent; -+ struct dentry dentry; -+ -+ if (len > EXT3_NAME_LEN) -+ return -ENAMETOOLONG; -+ -+ parent.d_inode = dir; -+ dentry.d_parent = &parent; -+ dentry.d_name.name = name; -+ dentry.d_name.len = len; -+ -+ bh = ext3_find_entry(&dentry, &de); -+ if (bh) { -+ unsigned long ino = le32_to_cpu(de->inode); -+ brelse (bh); -+ if (data) -+ *data = ino; -+ return 0; /* found name */ -+ } -+ -+ return -ENOENT; -+} -+ - #define S_SHIFT 12 - static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] EXT3_FT_REG_FILE, -@@ -2247,6 +2275,7 @@ end_rename: - struct inode_operations ext3_dir_inode_operations = { - create: ext3_create, /* BKL held */ - lookup: ext3_lookup, /* BKL held */ -+ lookup_raw: ext3_lookup_raw, /* BKL held */ - link: ext3_link, /* BKL held */ - unlink: ext3_unlink, /* BKL held */ - symlink: ext3_symlink, /* BKL held */ - -_ diff --git a/lustre/kernel_patches/patches/ext3-remove-cond_resched-calls-2.6.12.patch b/lustre/kernel_patches/patches/ext3-remove-cond_resched-calls-2.6.12.patch deleted file mode 100644 index 57898d5..0000000 --- a/lustre/kernel_patches/patches/ext3-remove-cond_resched-calls-2.6.12.patch +++ /dev/null @@ -1,29 +0,0 @@ -Index: linux-stage/fs/ext3/ialloc.c -=================================================================== ---- linux-stage.orig/fs/ext3/ialloc.c 2005-06-26 10:59:43.048185981 +0200 -+++ linux-stage/fs/ext3/ialloc.c 2005-06-26 11:01:21.317716027 +0200 -@@ -775,7 +775,6 @@ - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); -- cond_resched(); - } - return desc_count; - #endif -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2005-06-26 10:59:43.205412542 +0200 -+++ linux-stage/fs/ext3/super.c 2005-06-26 11:02:29.599941754 +0200 -@@ -2236,11 +2232,9 @@ - * block group descriptors. If the sparse superblocks - * feature is turned on, then not all groups have this. - */ -- for (i = 0; i < ngroups; i++) { -+ for (i = 0; i < ngroups; i++) - overhead += ext3_bg_has_super(sb, i) + - ext3_bg_num_gdb(sb, i); -- cond_resched(); -- } - - /* - * Every block group has an inode bitmap, a block diff --git a/lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch deleted file mode 100644 index f323584..0000000 --- a/lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch +++ /dev/null @@ -1,263 +0,0 @@ -Index: linux-2.6.5-sles9/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:29:14.878513832 +0300 -+++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:32:14.151260232 +0300 -@@ -709,7 +709,7 @@ - unsigned int block_group, - struct buffer_head ** bh); - extern int ext3_should_retry_alloc(struct super_block *sb, int *retries); --extern void rsv_window_add(struct super_block *sb, struct reserve_window_node *rsv); -+extern void rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv); - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300 -+++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:32:27.996155488 +0300 -@@ -86,7 +86,7 @@ - /* root of the per fs reservation window tree */ - spinlock_t s_rsv_window_lock; - struct rb_root s_rsv_window_root; -- struct reserve_window_node s_rsv_window_head; -+ struct ext3_reserve_window_node s_rsv_window_head; - - /* Journaling */ - struct inode * s_journal_inode; -Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300 -+++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:32:08.752081032 +0300 -@@ -20,17 +20,17 @@ - #include - #include - --struct reserve_window { -+struct ext3_reserve_window { - __u32 _rsv_start; /* First byte reserved */ - __u32 _rsv_end; /* Last byte reserved or 0 */ - }; - --struct reserve_window_node { -+struct ext3_reserve_window_node { - struct rb_node rsv_node; - atomic_t rsv_goal_size; - atomic_t rsv_alloc_hit; - seqlock_t rsv_seqlock; -- struct reserve_window rsv_window; -+ struct ext3_reserve_window rsv_window; - }; - - #define rsv_start rsv_window._rsv_start -@@ -76,7 +76,7 @@ - */ - __u32 i_next_alloc_goal; - /* block reservation window */ -- struct reserve_window_node i_rsv_window; -+ struct ext3_reserve_window_node i_rsv_window; - - __u32 i_dir_start_lookup; - #ifdef CONFIG_EXT3_FS_XATTR -Index: linux-2.6.5-sles9/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300 -+++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:32:43.108858008 +0300 -@@ -115,7 +115,7 @@ - const char *fn) - { - struct rb_node *n; -- struct reserve_window_node *rsv, *prev; -+ struct ext3_reserve_window_node *rsv, *prev; - int bad; - - restart: -@@ -125,7 +125,7 @@ - - printk("Block Allocation Reservation Windows Map (%s):\n", fn); - while (n) { -- rsv = list_entry(n, struct reserve_window_node, rsv_node); -+ rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); - if (verbose) - printk("reservation window 0x%p " - "start: %d, end: %d\n", -@@ -161,7 +161,7 @@ - #endif - - static int --goal_in_my_reservation(struct reserve_window *rsv, int goal, -+goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, - unsigned int group, struct super_block * sb) - { - unsigned long group_first_block, group_last_block; -@@ -184,18 +184,18 @@ - * if the goal is not in any window. - * Returns NULL if there are no windows or if all windows start after the goal. - */ --static struct reserve_window_node *search_reserve_window(struct rb_root *root, -+static struct ext3_reserve_window_node *search_ext3_reserve_window(struct rb_root *root, - unsigned long goal) - { - struct rb_node *n = root->rb_node; -- struct reserve_window_node *rsv; -+ struct ext3_reserve_window_node *rsv; - - if (!n) - return NULL; - - while (n) - { -- rsv = rb_entry(n, struct reserve_window_node, rsv_node); -+ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); - - if (goal < rsv->rsv_start) - n = n->rb_left; -@@ -212,13 +212,13 @@ - */ - if (rsv->rsv_start > goal) { - n = rb_prev(&rsv->rsv_node); -- rsv = rb_entry(n, struct reserve_window_node, rsv_node); -+ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); - } - return rsv; - } - - void rsv_window_add(struct super_block *sb, -- struct reserve_window_node *rsv) -+ struct ext3_reserve_window_node *rsv) - { - struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; - struct rb_node *node = &rsv->rsv_node; -@@ -226,12 +226,12 @@ - - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; -- struct reserve_window_node *this; -+ struct ext3_reserve_window_node *this; - - while (*p) - { - parent = *p; -- this = rb_entry(parent, struct reserve_window_node, rsv_node); -+ this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); - - if (start < this->rsv_start) - p = &(*p)->rb_left; -@@ -246,7 +246,7 @@ - } - - static void rsv_window_remove(struct super_block *sb, -- struct reserve_window_node *rsv) -+ struct ext3_reserve_window_node *rsv) - { - rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; -@@ -254,7 +254,7 @@ - rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); - } - --static inline int rsv_is_empty(struct reserve_window *rsv) -+static inline int rsv_is_empty(struct ext3_reserve_window *rsv) - { - /* a valid reservation end block could not be 0 */ - return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED); -@@ -263,7 +263,7 @@ - void ext3_discard_reservation(struct inode *inode) - { - struct ext3_inode_info *ei = EXT3_I(inode); -- struct reserve_window_node *rsv = &ei->i_rsv_window; -+ struct ext3_reserve_window_node *rsv = &ei->i_rsv_window; - spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; - - if (!rsv_is_empty(&rsv->rsv_window)) { -@@ -600,7 +600,7 @@ - */ - static int - ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, -- struct buffer_head *bitmap_bh, int goal, struct reserve_window *my_rsv) -+ struct buffer_head *bitmap_bh, int goal, struct ext3_reserve_window *my_rsv) - { - int group_first_block, start, end; - -@@ -700,13 +700,13 @@ - * on succeed, it returns the reservation window to be appended to. - * failed, return NULL. - */ --static struct reserve_window_node *find_next_reservable_window( -- struct reserve_window_node *search_head, -+static struct ext3_reserve_window_node *find_next_reservable_window( -+ struct ext3_reserve_window_node *search_head, - unsigned long size, int *start_block, - int last_block) - { - struct rb_node *next; -- struct reserve_window_node *rsv, *prev; -+ struct ext3_reserve_window_node *rsv, *prev; - int cur; - - /* TODO: make the start of the reservation window byte-aligned */ -@@ -734,7 +734,7 @@ - - prev = rsv; - next = rb_next(&rsv->rsv_node); -- rsv = list_entry(next, struct reserve_window_node, rsv_node); -+ rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); - - /* - * Reached the last reservation, we can just append to the -@@ -801,15 +801,15 @@ - * @group: the group we are trying to allocate in - * @bitmap_bh: the block group block bitmap - */ --static int alloc_new_reservation(struct reserve_window_node *my_rsv, -+static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, - int goal, struct super_block *sb, - unsigned int group, struct buffer_head *bitmap_bh) - { -- struct reserve_window_node *search_head; -+ struct ext3_reserve_window_node *search_head; - int group_first_block, group_end_block, start_block; - int first_free_block; - int reservable_space_start; -- struct reserve_window_node *prev_rsv; -+ struct ext3_reserve_window_node *prev_rsv; - struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; - unsigned long size; - -@@ -859,7 +859,7 @@ - /* - * shift the search start to the window near the goal block - */ -- search_head = search_reserve_window(fs_rsv_root, start_block); -+ search_head = search_ext3_reserve_window(fs_rsv_root, start_block); - - /* - * find_next_reservable_window() simply finds a reservable window -@@ -968,7 +968,7 @@ - static int - ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, - unsigned int group, struct buffer_head *bitmap_bh, -- int goal, struct reserve_window_node * my_rsv, -+ int goal, struct ext3_reserve_window_node * my_rsv, - int *errp) - { - spinlock_t *rsv_lock; -@@ -1027,7 +1027,7 @@ - * then we could go to allocate from the reservation window directly. - */ - while (1) { -- struct reserve_window rsv_copy; -+ struct ext3_reserve_window rsv_copy; - unsigned int seq; - - do { -@@ -1159,8 +1159,8 @@ - struct ext3_group_desc *gdp; - struct ext3_super_block *es; - struct ext3_sb_info *sbi; -- struct reserve_window_node *my_rsv = NULL; -- struct reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; -+ struct ext3_reserve_window_node *my_rsv = NULL; -+ struct ext3_reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; - unsigned short windowsz = 0; - #ifdef EXT3FS_DEBUG - static int goal_hits, goal_attempts; diff --git a/lustre/kernel_patches/patches/ext3-san-2.4.20.patch b/lustre/kernel_patches/patches/ext3-san-2.4.20.patch deleted file mode 100644 index 148f4e3..0000000 --- a/lustre/kernel_patches/patches/ext3-san-2.4.20.patch +++ /dev/null @@ -1,117 +0,0 @@ - fs/ext3/ext3-exports.c | 9 ++++- - fs/ext3/inode.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 89 insertions(+), 1 deletion(-) - ---- linux/fs/ext3/inode.c~ext3-san-2.4.20-hp Tue Apr 29 11:01:52 2003 -+++ linux-mmonroe/fs/ext3/inode.c Tue Apr 29 11:01:53 2003 -@@ -2734,3 +2734,84 @@ int ext3_change_inode_journal_flag(struc - * here, in ext3_aops_journal_start() to ensure that the forthcoming "see if we - * need to extend" test in ext3_prepare_write() succeeds. - */ -+ -+/* for each block: 1 ind + 1 dind + 1 tind -+ * for each block: 3 bitmap blocks -+ * for each block: 3 group descriptor blocks -+ * i inode block -+ * 1 superblock -+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files -+ * ((1+1+1) * 3 * nblocks) + 1 + 1 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ * -+ * XXX assuming: -+ * (1) fs logic block size == page size -+ * (2) ext3 in writeback mode -+ */ -+static inline int ext3_san_write_trans_blocks(int nblocks) -+{ -+ int ret; -+ -+ ret = (1 + 1 + 1) * 3 * nblocks + 1 + 1; -+ -+#ifdef CONFIG_QUOTA -+ ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return ret; -+} -+ -+/* Alloc blocks for an inode, while don't create any buffer/page -+ * for data I/O; set the inode size if file is extended. -+ * -+ * @inode: target inode -+ * @blocks: array of logic block number -+ * @nblocks: how many blocks need be alloced -+ * @newsize: new filesize we should set -+ * -+ * return: 0 success, otherwise failed -+ * (*blocks) contains physical block number alloced -+ * -+ * XXX this assume the fs block size == page size -+ */ -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize) -+{ -+ handle_t *handle; -+ struct buffer_head bh_tmp; -+ int needed_blocks; -+ int i, ret = 0, ret2; -+ -+ needed_blocks = ext3_san_write_trans_blocks(nblocks); -+ -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ if (IS_ERR(handle)) { -+ unlock_kernel(); -+ return PTR_ERR(handle); -+ } -+ unlock_kernel(); -+ -+ /* alloc blocks one by one */ -+ for (i = 0; i < nblocks; i++) { -+ ret = ext3_get_block_handle(handle, inode, blocks[i], -+ &bh_tmp, 1); -+ if (ret) -+ break; -+ -+ blocks[i] = bh_tmp.b_blocknr; -+ } -+ -+ /* set inode size if needed */ -+ if (!ret && (newsize > inode->i_size)) { -+ inode->i_size = newsize; -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ -+ lock_kernel(); -+ ret2 = ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ -+ if (!ret) -+ ret = ret2; -+ return ret; -+} ---- linux/fs/ext3/ext3-exports.c~ext3-san-2.4.20-hp Tue Apr 29 11:01:51 2003 -+++ linux-mmonroe/fs/ext3/ext3-exports.c Tue Apr 29 11:07:19 2003 -@@ -1,9 +1,15 @@ - #include - #include --#include -+#include -+#include -+#include - #include -+#include - #include - -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize); -+ - EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - EXPORT_SYMBOL(ext3_xattr_register); -@@ -11,3 +17,4 @@ EXPORT_SYMBOL(ext3_xattr_unregister); - EXPORT_SYMBOL(ext3_xattr_get); - EXPORT_SYMBOL(ext3_xattr_list); - EXPORT_SYMBOL(ext3_xattr_set); -+EXPORT_SYMBOL(ext3_prep_san_write); - -_ diff --git a/lustre/kernel_patches/patches/ext3-san-jdike-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-san-jdike-2.6-suse.patch deleted file mode 100644 index afda0bd..0000000 --- a/lustre/kernel_patches/patches/ext3-san-jdike-2.6-suse.patch +++ /dev/null @@ -1,106 +0,0 @@ - fs/ext3/inode.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/super.c | 4 ++ - 2 files changed, 85 insertions(+) - ---- linux-2.5.73/fs/ext3/inode.c~ext3-san-jdike-2.5.73 2003-06-22 12:32:58.000000000 -0600 -+++ linux-2.5.73-braam/fs/ext3/inode.c 2003-06-30 12:19:21.000000000 -0600 -@@ -2945,3 +2945,84 @@ int ext3_change_inode_journal_flag(struc - - return err; - } -+ -+/* for each block: 1 ind + 1 dind + 1 tind -+ * for each block: 3 bitmap blocks -+ * for each block: 3 group descriptor blocks -+ * i inode block -+ * 1 superblock -+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files -+ * ((1+1+1) * 3 * nblocks) + 1 + 1 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ * -+ * XXX assuming: -+ * (1) fs logic block size == page size -+ * (2) ext3 in writeback mode -+ */ -+static inline int ext3_san_write_trans_blocks(int nblocks) -+{ -+ int ret; -+ -+ ret = (1 + 1 + 1) * 3 * nblocks + 1 + 1; -+ -+#ifdef CONFIG_QUOTA -+ ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return ret; -+} -+ -+/* Alloc blocks for an inode, while don't create any buffer/page -+ * for data I/O; set the inode size if file is extended. -+ * -+ * @inode: target inode -+ * @blocks: array of logic block number -+ * @nblocks: how many blocks need be alloced -+ * @newsize: new filesize we should set -+ * -+ * return: 0 success, otherwise failed -+ * (*blocks) contains physical block number alloced -+ * -+ * XXX this assume the fs block size == page size -+ */ -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize) -+{ -+ handle_t *handle; -+ struct buffer_head bh_tmp; -+ int needed_blocks; -+ int i, ret = 0, ret2; -+ -+ needed_blocks = ext3_san_write_trans_blocks(nblocks); -+ -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ if (IS_ERR(handle)) { -+ unlock_kernel(); -+ return PTR_ERR(handle); -+ } -+ unlock_kernel(); -+ -+ /* alloc blocks one by one */ -+ for (i = 0; i < nblocks; i++) { -+ ret = ext3_get_block_handle(handle, inode, blocks[i], -+ &bh_tmp, 1, 1); -+ if (ret) -+ break; -+ -+ blocks[i] = bh_tmp.b_blocknr; -+ } -+ -+ /* set inode size if needed */ -+ if (!ret && (newsize > inode->i_size)) { -+ inode->i_size = newsize; -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ -+ lock_kernel(); -+ ret2 = ext3_journal_stop(handle); -+ unlock_kernel(); -+ -+ if (!ret) -+ ret = ret2; -+ return ret; -+} ---- linux-2.5.73/fs/ext3/super.c~ext3-san-jdike-2.5.73 2003-06-22 12:33:16.000000000 -0600 -+++ linux-2.5.73-braam/fs/ext3/super.c 2003-06-30 12:16:36.000000000 -0600 -@@ -2080,6 +2080,10 @@ static void __exit exit_ext3_fs(void) - exit_ext3_xattr(); - } - -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize); -+EXPORT_SYMBOL(ext3_prep_san_write); -+ - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); - -_ diff --git a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.4.patch b/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.4.patch deleted file mode 100644 index 950ec9a..0000000 --- a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.4.patch +++ /dev/null @@ -1,41 +0,0 @@ -Subject: Avoid disk sector_t overflow for >2TB ext3 filesystem -From: Mingming Cao - - -If ext3 filesystem is larger than 2TB, and sector_t is a u32 (i.e. -CONFIG_LBD not defined in the kernel), the calculation of the disk sector -will overflow. Add check at ext3_fill_super() and ext3_group_extend() to -prevent mount/remount/resize >2TB ext3 filesystem if sector_t size is 4 -bytes. - -Verified this patch on a 32 bit platform without CONFIG_LBD defined -(sector_t is 32 bits long), mount refuse to mount a 10TB ext3. - -Signed-off-by: Mingming Cao -Acked-by: Andreas Dilger -Signed-off-by: Andrew Morton ---- - - fs/ext3/resize.c | 10 ++++++++++ - fs/ext3/super.c | 10 ++++++++++ - 2 files changed, 20 insertions(+) - -diff -puN fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem fs/ext3/super.c ---- devel/fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem 2006-05-22 14:09:53.000000000 -0700 -+++ devel-akpm/fs/ext3/super.c 2006-05-22 14:11:10.000000000 -0700 -@@ -1565,6 +1565,14 @@ static int ext3_fill_super (struct super - goto failed_mount; - } - -+ if (le32_to_cpu(es->s_blocks_count) > -+ (unsigned long)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3-fs: filesystem on %s: " -+ "too large to mount safely - %u blocks\n", -+ bdevname(sb->s_dev), le32_to_cpu(es->s_blocks_count)); -+ goto failed_mount; -+ } -+ - sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) + - EXT3_BLOCKS_PER_GROUP(sb) - 1) / -_ diff --git a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.12.patch b/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.12.patch deleted file mode 100644 index ef0f4a4..0000000 --- a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.12.patch +++ /dev/null @@ -1,64 +0,0 @@ -Subject: Avoid disk sector_t overflow for >2TB ext3 filesystem -From: Mingming Cao - - -If ext3 filesystem is larger than 2TB, and sector_t is a u32 (i.e. -CONFIG_LBD not defined in the kernel), the calculation of the disk sector -will overflow. Add check at ext3_fill_super() and ext3_group_extend() to -prevent mount/remount/resize >2TB ext3 filesystem if sector_t size is 4 -bytes. - -Verified this patch on a 32 bit platform without CONFIG_LBD defined -(sector_t is 32 bits long), mount refuse to mount a 10TB ext3. - -Signed-off-by: Mingming Cao -Acked-by: Andreas Dilger -Signed-off-by: Andrew Morton ---- - - fs/ext3/resize.c | 10 ++++++++++ - fs/ext3/super.c | 10 ++++++++++ - 2 files changed, 20 insertions(+) - -diff -puN fs/ext3/resize.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem fs/ext3/resize.c ---- devel/fs/ext3/resize.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem 2006-05-22 14:09:53.000000000 -0700 -+++ devel-akpm/fs/ext3/resize.c 2006-05-22 14:10:56.000000000 -0700 -@@ -926,6 +926,16 @@ int ext3_group_extend(struct super_block - if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) - return 0; - -+ if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3-fs: filesystem on %s: " -+ "too large to resize to %lu blocks safely\n", -+ sb->s_id, n_blocks_count); -+ if (sizeof(sector_t) < 8) -+ ext3_warning(sb, __FUNCTION__, -+ "CONFIG_LBD not enabled\n"); -+ return -EINVAL; -+ } -+ - if (n_blocks_count < o_blocks_count) { - ext3_warning(sb, __FUNCTION__, - "can't shrink FS - resize aborted"); -diff -puN fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem fs/ext3/super.c ---- devel/fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem 2006-05-22 14:09:53.000000000 -0700 -+++ devel-akpm/fs/ext3/super.c 2006-05-22 14:11:10.000000000 -0700 -@@ -1565,6 +1565,17 @@ static int ext3_fill_super (struct super - goto failed_mount; - } - -+ if (le32_to_cpu(es->s_blocks_count) > -+ (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3-fs: filesystem on %s: " -+ "too large to mount safely - %u blocks\n", sb->s_id, -+ le32_to_cpu(es->s_blocks_count)); -+ if (sizeof(sector_t) < 8) -+ printk(KERN_WARNING -+ "EXT3-fs: CONFIG_LBD not enabled\n"); -+ goto failed_mount; -+ } -+ - if (EXT3_BLOCKS_PER_GROUP(sb) == 0) - goto cantfind_ext3; - sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - -_ diff --git a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.5-suse.patch b/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.5-suse.patch deleted file mode 100644 index fe655da..0000000 --- a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.5-suse.patch +++ /dev/null @@ -1,44 +0,0 @@ -Subject: Avoid disk sector_t overflow for >2TB ext3 filesystem -From: Mingming Cao - - -If ext3 filesystem is larger than 2TB, and sector_t is a u32 (i.e. -CONFIG_LBD not defined in the kernel), the calculation of the disk sector -will overflow. Add check at ext3_fill_super() and ext3_group_extend() to -prevent mount/remount/resize >2TB ext3 filesystem if sector_t size is 4 -bytes. - -Verified this patch on a 32 bit platform without CONFIG_LBD defined -(sector_t is 32 bits long), mount refuse to mount a 10TB ext3. - -Signed-off-by: Mingming Cao -Acked-by: Andreas Dilger -Signed-off-by: Andrew Morton ---- - - fs/ext3/resize.c | 10 ++++++++++ - fs/ext3/super.c | 10 ++++++++++ - 2 files changed, 20 insertions(+) - -diff -puN fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem fs/ext3/super.c ---- devel/fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem 2006-05-22 14:09:53.000000000 -0700 -+++ devel-akpm/fs/ext3/super.c 2006-05-22 14:11:10.000000000 -0700 -@@ -1565,6 +1565,17 @@ static int ext3_fill_super (struct super - goto failed_mount; - } - -+ if (le32_to_cpu(es->s_blocks_count) > -+ (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3-fs: filesystem on %s: " -+ "too large to mount safely - %u blocks\n", sb->s_id, -+ le32_to_cpu(es->s_blocks_count)); -+ if (sizeof(sector_t) < 8) -+ printk(KERN_WARNING -+ "EXT3-fs: CONFIG_LBD not enabled\n"); -+ goto failed_mount; -+ } -+ - sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) + - EXT3_BLOCKS_PER_GROUP(sb) - 1) / -_ diff --git a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.9-rhel4.patch deleted file mode 100644 index 9bfdf80..0000000 --- a/lustre/kernel_patches/patches/ext3-sector_t-overflow-2.6.9-rhel4.patch +++ /dev/null @@ -1,64 +0,0 @@ -Subject: Avoid disk sector_t overflow for >2TB ext3 filesystem -From: Mingming Cao - - -If ext3 filesystem is larger than 2TB, and sector_t is a u32 (i.e. -CONFIG_LBD not defined in the kernel), the calculation of the disk sector -will overflow. Add check at ext3_fill_super() and ext3_group_extend() to -prevent mount/remount/resize >2TB ext3 filesystem if sector_t size is 4 -bytes. - -Verified this patch on a 32 bit platform without CONFIG_LBD defined -(sector_t is 32 bits long), mount refuse to mount a 10TB ext3. - -Signed-off-by: Mingming Cao -Acked-by: Andreas Dilger -Signed-off-by: Andrew Morton ---- - - fs/ext3/resize.c | 10 ++++++++++ - fs/ext3/super.c | 10 ++++++++++ - 2 files changed, 20 insertions(+) - -diff -puN fs/ext3/resize.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem fs/ext3/resize.c ---- devel/fs/ext3/resize.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem 2006-05-22 14:09:53.000000000 -0700 -+++ devel-akpm/fs/ext3/resize.c 2006-05-22 14:10:56.000000000 -0700 -@@ -926,6 +926,16 @@ int ext3_group_extend(struct super_block - if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) - return 0; - -+ if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3-fs: filesystem on %s: " -+ "too large to resize to %lu blocks safely\n", -+ sb->s_id, n_blocks_count); -+ if (sizeof(sector_t) < 8) -+ ext3_warning(sb, __FUNCTION__, -+ "CONFIG_LBD not enabled\n"); -+ return -EINVAL; -+ } -+ - if (n_blocks_count < o_blocks_count) { - ext3_warning(sb, __FUNCTION__, - "can't shrink FS - resize aborted"); -diff -puN fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem fs/ext3/super.c ---- devel/fs/ext3/super.c~avoid-disk-sector_t-overflow-for-2tb-ext3-filesystem 2006-05-22 14:09:53.000000000 -0700 -+++ devel-akpm/fs/ext3/super.c 2006-05-22 14:11:10.000000000 -0700 -@@ -1565,6 +1565,17 @@ static int ext3_fill_super (struct super - goto failed_mount; - } - -+ if (le32_to_cpu(es->s_blocks_count) > -+ (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { -+ printk(KERN_ERR "EXT3-fs: filesystem on %s: " -+ "too large to mount safely - %u blocks\n", sb->s_id, -+ le32_to_cpu(es->s_blocks_count)); -+ if (sizeof(sector_t) < 8) -+ printk(KERN_WARNING -+ "EXT3-fs: CONFIG_LBD not enabled\n"); -+ goto failed_mount; -+ } -+ - sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) + - EXT3_BLOCKS_PER_GROUP(sb) - 1) / -_ diff --git a/lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch b/lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch deleted file mode 100644 index ad7d79b..0000000 --- a/lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch +++ /dev/null @@ -1,177 +0,0 @@ -Index: linux-2.6.12/fs/ext3/super.c -=================================================================== ---- linux-2.6.12.orig/fs/ext3/super.c 2005-06-17 13:48:29.000000000 -0600 -+++ linux-2.6.12/fs/ext3/super.c 2005-11-25 05:59:47.000000000 -0700 -@@ -2165,13 +2165,13 @@ - { - struct ext3_super_block *es = EXT3_SB(sb)->s_es; - unsigned long overhead; -- int i; - - if (test_opt (sb, MINIX_DF)) - overhead = 0; - else { -- unsigned long ngroups; -- ngroups = EXT3_SB(sb)->s_groups_count; -+ unsigned long ngroups = EXT3_SB(sb)->s_groups_count, group; -+ unsigned long three = 1, five = 5, seven = 7; -+ unsigned long metabg = -1UL; - smp_rmb(); - - /* -@@ -2189,11 +2188,14 @@ - * block group descriptors. If the sparse superblocks - * feature is turned on, then not all groups have this. - */ -- for (i = 0; i < ngroups; i++) { -- overhead += ext3_bg_has_super(sb, i) + -- ext3_bg_num_gdb(sb, i); -- cond_resched(); -- } -+ overhead += 1 + EXT3_SB(sb)->s_gdb_count; /* group 0 */ -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG)) -+ metabg =le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg); -+ -+ while ((group = ext3_list_backups(sb, &three, &five, &seven)) < -+ ngroups) /* sb + group descriptors backups */ -+ overhead += 1 + (group >= metabg ? 1 : -+ EXT3_SB(sb)->s_gdb_count); - - /* - * Every block group has an inode bitmap, a block -@@ -2205,12 +2204,16 @@ - buf->f_type = EXT3_SUPER_MAGIC; - buf->f_bsize = sb->s_blocksize; - buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; -- buf->f_bfree = ext3_count_free_blocks (sb); -+ buf->f_bfree = percpu_counter_read(&EXT3_SB(sb)->s_freeblocks_counter); -+ if (buf->f_bfree < 0) -+ buf->f_bfree = 0; - buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); - if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) - buf->f_bavail = 0; - buf->f_files = le32_to_cpu(es->s_inodes_count); -- buf->f_ffree = ext3_count_free_inodes (sb); -+ buf->f_ffree = percpu_counter_read(&EXT3_SB(sb)->s_freeinodes_counter); -+ if (buf->f_ffree < 0) -+ buf->f_ffree = 0; - buf->f_namelen = EXT3_NAME_LEN; - return 0; - } -Index: linux-2.6.12/fs/ext3/resize.c -=================================================================== ---- linux-2.6.12.orig/fs/ext3/resize.c 2005-11-24 15:17:06.000000000 -0700 -+++ linux-2.6.12/fs/ext3/resize.c 2005-11-25 06:01:01.000000000 -0700 -@@ -285,17 +285,17 @@ - * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... - * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... - */ --static unsigned ext3_list_backups(struct super_block *sb, unsigned *three, -- unsigned *five, unsigned *seven) -+unsigned long ext3_list_backups(struct super_block *sb, unsigned long *three, -+ unsigned long *five, unsigned long *seven) - { -- unsigned *min = three; -+ unsigned long metabg = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg); -+ unsigned long *min = three, ret; - int mult = 3; -- unsigned ret; - - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { -- ret = *min; -- *min += 1; -+ ret = *three; -+ *three += 1; - return ret; - } - -@@ -308,8 +307,26 @@ - mult = 7; - } - -- ret = *min; -- *min *= mult; -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) && -+ *min >= metabg * EXT3_DESC_PER_BLOCK(sb)) { -+ ret = *min; -+ switch (ret & (EXT3_DESC_PER_BLOCK(sb) - 1)) { -+ case 0: -+ *three = ret + 1; -+ break; -+ case 1: -+ *three = ret + EXT3_DESC_PER_BLOCK(sb) - 2; -+ break; -+ default: -+ *three = (ret | (EXT3_DESC_PER_BLOCK(sb) - 1)) + 1; -+ break; -+ } -+ *five = -1UL; -+ *seven = -1UL; -+ } else { -+ ret = *min; -+ *min *= mult; -+ } - - return ret; - } -@@ -324,17 +337,17 @@ - { - const unsigned long blk = primary->b_blocknr; - const unsigned long end = EXT3_SB(sb)->s_groups_count; -- unsigned three = 1; -- unsigned five = 5; -- unsigned seven = 7; -- unsigned grp; -+ unsigned long three = 1; -+ unsigned long five = 5; -+ unsigned long seven = 7; -+ unsigned long grp; - __u32 *p = (__u32 *)primary->b_data; - int gdbackups = 0; - - while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { - if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ - ext3_warning(sb, __FUNCTION__, -- "reserved GDT %ld missing grp %d (%ld)\n", -+ "reserved GDT %ld missing grp %ld (%ld)\n", - blk, grp, - grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); - return -EINVAL; -@@ -618,10 +631,8 @@ - struct ext3_sb_info *sbi = EXT3_SB(sb); - const unsigned long last = sbi->s_groups_count; - const int bpg = EXT3_BLOCKS_PER_GROUP(sb); -- unsigned three = 1; -- unsigned five = 5; -- unsigned seven = 7; -- unsigned group; -+ unsigned long three = 1, five = 5, seven = 7; -+ unsigned long group; - int rest = sb->s_blocksize - size; - handle_t *handle; - int err = 0, err2; -@@ -672,7 +683,7 @@ - exit_err: - if (err) { - ext3_warning(sb, __FUNCTION__, -- "can't update backup for group %d (err %d), " -+ "can't update backup for group %ld (err %d), " - "forcing fsck on next reboot\n", group, err); - sbi->s_mount_state &= ~EXT3_VALID_FS; - sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); -Index: linux-2.6.12/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.12.orig/include/linux/ext3_fs.h 2005-06-17 13:48:29.000000000 -0600 -+++ linux-2.6.12/include/linux/ext3_fs.h 2005-11-25 05:59:47.000000000 -0700 -@@ -788,6 +788,10 @@ - extern int ext3_group_extend(struct super_block *sb, - struct ext3_super_block *es, - unsigned long n_blocks_count); -+extern unsigned long ext3_list_backups(struct super_block *sb, -+ unsigned long *three, -+ unsigned long *five, -+ unsigned long *seven); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) diff --git a/lustre/kernel_patches/patches/ext3-truncate-buffer-head.patch b/lustre/kernel_patches/patches/ext3-truncate-buffer-head.patch deleted file mode 100644 index 3741213..0000000 --- a/lustre/kernel_patches/patches/ext3-truncate-buffer-head.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- lum/fs/ext3/inode.c~ 2004-01-30 16:47:12.000000000 -0800 -+++ lum/fs/ext3/inode.c 2004-01-30 16:46:14.000000000 -0800 -@@ -1485,6 +1485,7 @@ - if (ext3_should_order_data(inode)) - err = ext3_journal_dirty_data(handle, bh, 0); - __mark_buffer_dirty(bh); -+ buffer_insert_inode_data_queue(bh, inode); - } - - unlock: diff --git a/lustre/kernel_patches/patches/ext3-truncate_blocks.patch b/lustre/kernel_patches/patches/ext3-truncate_blocks.patch deleted file mode 100644 index ce3928d..0000000 --- a/lustre/kernel_patches/patches/ext3-truncate_blocks.patch +++ /dev/null @@ -1,92 +0,0 @@ ---- ./fs/ext3/inode.c.orig Wed Mar 12 02:44:06 2003 -+++ ./fs/ext3/inode.c Wed Mar 12 11:55:20 2003 -@@ -99,7 +99,35 @@ int ext3_forget(handle_t *handle, int is - return err; - } - --/* -+/* -+ * Work out how many blocks we need to progress with the next chunk of a -+ * truncate transaction. -+ */ -+ -+static unsigned long blocks_for_truncate(struct inode *inode) -+{ -+ unsigned long needed; -+ -+ needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); -+ -+ /* Give ourselves just enough room to cope with inodes in which -+ * i_blocks is corrupt: we've seen disk corruptions in the past -+ * which resulted in random data in an inode which looked enough -+ * like a regular file for ext3 to try to delete it. Things -+ * will go a bit crazy if that happens, but at least we should -+ * try not to panic the whole kernel. */ -+ if (needed < 2) -+ needed = 2; -+ -+ /* But we need to bound the transaction so we don't overflow the -+ * journal. */ -+ if (needed > EXT3_MAX_TRANS_DATA) -+ needed = EXT3_MAX_TRANS_DATA; -+ -+ return EXT3_DATA_TRANS_BLOCKS + needed; -+} -+ -+/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. -@@ -110,19 +138,14 @@ int ext3_forget(handle_t *handle, int is - * transaction in the top-level truncate loop. --sct - */ - --static handle_t *start_transaction(struct inode *inode) -+static handle_t *start_transaction(struct inode *inode) - { -- long needed; - handle_t *result; -- -- needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; -- -- result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed); -+ -+ result = ext3_journal_start(inode, blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; -- -+ - ext3_std_error(inode->i_sb, PTR_ERR(result)); - return result; - } -@@ -135,14 +158,9 @@ static handle_t *start_transaction(struc - */ - static int try_to_extend_transaction(handle_t *handle, struct inode *inode) - { -- long needed; -- - if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) - return 0; -- needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; -- if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed)) -+ if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) - return 0; - return 1; - } -@@ -154,11 +172,8 @@ static int try_to_extend_transaction(han - */ - static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) - { -- long needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; - jbd_debug(2, "restarting handle %p\n", handle); -- return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed); -+ return ext3_journal_restart(handle, blocks_for_truncate(inode)); - } - - /* diff --git a/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.20.patch b/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.20.patch deleted file mode 100644 index 0a3bdb8..0000000 --- a/lustre/kernel_patches/patches/ext3-trusted_ea-2.4.20.patch +++ /dev/null @@ -1,180 +0,0 @@ - fs/ext3/xattr.c | 12 +++++- - fs/ext3/xattr_trusted.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_xattr.h | 6 +++ - 3 files changed, 102 insertions(+), 2 deletions(-) - -Index: linux-2.4.20/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.20.orig/fs/ext3/xattr.c 2003-10-22 02:29:40.000000000 +0400 -+++ linux-2.4.20/fs/ext3/xattr.c 2003-10-24 01:03:22.000000000 +0400 -@@ -1771,18 +1771,25 @@ - int __init - init_ext3_xattr(void) - { -+ int error; -+ - ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, - sizeof(struct mb_cache_entry) + - sizeof(struct mb_cache_entry_index), 1, 61); - if (!ext3_xattr_cache) - return -ENOMEM; - -- return 0; -+ error = init_ext3_xattr_trusted(); -+ if (error) -+ mb_cache_destroy(ext3_xattr_cache); -+ -+ return error; - } - - void - exit_ext3_xattr(void) - { -+ exit_ext3_xattr_trusted(); - if (ext3_xattr_cache) - mb_cache_destroy(ext3_xattr_cache); - ext3_xattr_cache = NULL; -@@ -1793,12 +1800,13 @@ - int __init - init_ext3_xattr(void) - { -- return 0; -+ return init_ext3_xattr_trusted(); - } - - void - exit_ext3_xattr(void) - { -+ exit_ext3_xattr_trusted(); - } - - #endif /* CONFIG_EXT3_FS_XATTR_SHARING */ -Index: linux-2.4.20/fs/ext3/xattr_trusted.c -=================================================================== ---- linux-2.4.20.orig/fs/ext3/xattr_trusted.c 2003-10-24 01:03:22.000000000 +0400 -+++ linux-2.4.20/fs/ext3/xattr_trusted.c 2003-10-24 01:03:22.000000000 +0400 -@@ -0,0 +1,86 @@ -+/* -+ * linux/fs/ext3/xattr_trusted.c -+ * Handler for trusted extended attributes. -+ * -+ * Copyright (C) 2003 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define XATTR_TRUSTED_PREFIX "trusted." -+ -+static size_t -+ext3_xattr_trusted_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_trusted_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_trusted_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_TRUSTED, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_trusted_handler = { -+ .prefix = XATTR_TRUSTED_PREFIX, -+ .list = ext3_xattr_trusted_list, -+ .get = ext3_xattr_trusted_get, -+ .set = ext3_xattr_trusted_set, -+}; -+ -+int __init -+init_ext3_xattr_trusted(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED, -+ &ext3_xattr_trusted_handler); -+} -+ -+void -+exit_ext3_xattr_trusted(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED, -+ &ext3_xattr_trusted_handler); -+} -Index: linux-2.4.20/include/linux/ext3_xattr.h -=================================================================== ---- linux-2.4.20.orig/include/linux/ext3_xattr.h 2003-10-22 02:29:39.000000000 +0400 -+++ linux-2.4.20/include/linux/ext3_xattr.h 2003-10-24 01:03:22.000000000 +0400 -@@ -21,6 +21,9 @@ - #define EXT3_XATTR_INDEX_USER 1 - #define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 - #define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+#define EXT3_XATTR_INDEX_TRUSTED 4 -+#define EXT3_XATTR_INDEX_LUSTRE 5 -+#define EXT3_XATTR_INDEX_SECURITY 6 - - struct ext3_xattr_header { - __u32 h_magic; /* magic number for identification */ -@@ -84,6 +87,9 @@ - extern int init_ext3_xattr(void) __init; - extern void exit_ext3_xattr(void); - -+extern int init_ext3_xattr_trusted(void) __init; -+extern void exit_ext3_xattr_trusted(void); -+ - # else /* CONFIG_EXT3_FS_XATTR */ - # define ext3_setxattr NULL - # define ext3_getxattr NULL -Index: linux-2.4.20/fs/ext3/Makefile -=================================================================== ---- linux-2.4.20.orig/fs/ext3/Makefile 2003-10-22 02:29:40.000000000 +0400 -+++ linux-2.4.20/fs/ext3/Makefile 2003-10-24 01:03:47.000000000 +0400 -@@ -12,7 +12,8 @@ - export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \ -+ xattr_trusted.o - obj-m := $(O_TARGET) - - export-objs += xattr.o diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch new file mode 100644 index 0000000..62b1f50 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch @@ -0,0 +1,674 @@ +Add support for the uninit_groups feature to the kernel. + +Keep a high water mark of used inodes for each group to improve e2fsck time. +Block and inode bitmaps can be uninitialized on disk via a flag in the +group descriptor to avoid reading or scanning them at e2fsck time. +A checksum of each group descriptor is used to ensure that corruption in +the group descriptor's bit flags does not cause incorrect operation. + +Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs.h 2007-03-28 18:20:16.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h 2007-03-28 18:30:06.000000000 +0400 +@@ -153,16 +153,22 @@ struct ext3_allocation_request { + */ + struct ext3_group_desc + { +- __le32 bg_block_bitmap; /* Blocks bitmap block */ +- __le32 bg_inode_bitmap; /* Inodes bitmap block */ ++ __le32 bg_block_bitmap; /* Blocks bitmap block */ ++ __le32 bg_inode_bitmap; /* Inodes bitmap block */ + __le32 bg_inode_table; /* Inodes table block */ + __le16 bg_free_blocks_count; /* Free blocks count */ + __le16 bg_free_inodes_count; /* Free inodes count */ + __le16 bg_used_dirs_count; /* Directories count */ +- __u16 bg_pad; +- __le32 bg_reserved[3]; ++ __le16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ ++ __le32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ ++ __le16 bg_itable_unused; /* Unused inodes count */ ++ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ + }; + ++#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ ++#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ ++#define EXT3_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ ++ + /* + * Macro-instructions used to manage group descriptors + */ +@@ -590,6 +596,7 @@ static inline struct ext3_inode_info *EX + #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 + #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 + #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 ++#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 + #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 + + #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 +@@ -606,6 +613,7 @@ static inline struct ext3_inode_info *EX + EXT3_FEATURE_INCOMPAT_EXTENTS) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ ++ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ + EXT3_FEATURE_RO_COMPAT_BTREE_DIR) + +Index: linux-2.6.16.27-0.9-full/fs/ext3/resize.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/resize.c 2007-03-13 02:56:52.000000000 +0300 ++++ linux-2.6.16.27-0.9-full/fs/ext3/resize.c 2007-03-28 18:30:06.000000000 +0400 +@@ -19,6 +19,7 @@ + #include + #include + ++#include "group.h" + + #define outside(b, first, last) ((b) < (first) || (b) >= (last)) + #define inside(b, first, last) ((b) >= (first) && (b) < (last)) +@@ -818,6 +819,7 @@ int ext3_group_add(struct super_block *s + gdp->bg_inode_table = cpu_to_le32(input->inode_table); + gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); + gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp); + + /* + * Make the new blocks and inodes valid next. We do this before +Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c 2007-03-28 18:25:51.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/super.c 2007-03-28 18:30:06.000000000 +0400 +@@ -42,6 +42,7 @@ + #include "xattr.h" + #include "acl.h" + #include "namei.h" ++#include "group.h" + + static int ext3_load_journal(struct super_block *, struct ext3_super_block *, + unsigned long journal_devnum); +@@ -1221,6 +1222,90 @@ static int ext3_setup_super(struct super + return res; + } + ++#if !defined(CONFIG_CRC16) && !defined(CONFIG_CRC16_MODULE) ++/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ ++__u16 const crc16_table[256] = { ++ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, ++ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, ++ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, ++ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, ++ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, ++ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, ++ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, ++ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, ++ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, ++ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, ++ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, ++ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, ++ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, ++ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, ++ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, ++ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, ++ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, ++ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, ++ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, ++ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, ++ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, ++ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, ++ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, ++ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, ++ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, ++ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, ++ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, ++ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, ++ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, ++ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, ++ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, ++ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 ++}; ++ ++static inline __u16 crc16_byte(__u16 crc, const __u8 data) ++{ ++ return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; ++} ++ ++__u16 crc16(__u16 crc, __u8 const *buffer, size_t len) ++{ ++ while (len--) ++ crc = crc16_byte(crc, *buffer++); ++ return crc; ++} ++#endif ++ ++__le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, ++ struct ext3_group_desc *gdp) ++{ ++ __u16 crc = 0; ++ ++ if (sbi->s_es->s_feature_ro_compat & ++ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { ++ int offset = offsetof(struct ext3_group_desc, bg_checksum); ++ __le32 le_group = cpu_to_le32(block_group); ++ ++ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); ++ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); ++ crc = crc16(crc, (__u8 *)gdp, offset); ++ offset += sizeof(gdp->bg_checksum); /* skip checksum */ ++ BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ ++ /* for checksum of struct ext4_group_desc do the rest... ++ if (offset < sbi->s_es->s_desc_size) { ++ crc = crc16(crc, (__u8 *)gdp + offset, ++ sbi->s_es->s_desc_size - offset); ++ */ ++ } ++ ++ return cpu_to_le16(crc); ++} ++ ++int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, ++ struct ext3_group_desc *gdp) ++{ ++ if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) ++ return 0; ++ ++ return 1; ++} ++ + /* Called at mount-time, super-block is locked */ + static int ext3_check_descriptors (struct super_block * sb) + { +@@ -1270,6 +1355,13 @@ static int ext3_check_descriptors (struc + le32_to_cpu(gdp->bg_inode_table)); + return 0; + } ++ if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { ++ ext3_error(sb, __FUNCTION__, ++ "Checksum for group %d failed (%u!=%u)\n", i, ++ le16_to_cpu(ext3_group_desc_csum(sbi,i,gdp)), ++ le16_to_cpu(gdp->bg_checksum)); ++ return 0; ++ } + block += EXT3_BLOCKS_PER_GROUP(sb); + gdp++; + } +Index: linux-2.6.16.27-0.9-full/fs/ext3/group.h +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/group.h 2007-02-13 18:39:59.640066087 +0300 ++++ linux-2.6.16.27-0.9-full/fs/ext3/group.h 2007-03-28 18:30:06.000000000 +0400 +@@ -0,0 +1,29 @@ ++/* ++ * linux/fs/ext3/group.h ++ * ++ * Copyright (C) 2007 Cluster File Systems, Inc ++ * ++ * Author: Andreas Dilger ++ */ ++ ++#ifndef _LINUX_EXT3_GROUP_H ++#define _LINUX_EXT3_GROUP_H ++#if defined(CONFIG_CRC16) || defined(CONFIG_CRC16_MODULE) ++#include ++#endif ++ ++extern __le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, ++ struct ext3_group_desc *gdp); ++extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, ++ struct ext3_group_desc *gdp); ++struct buffer_head *read_block_bitmap(struct super_block *sb, ++ unsigned int block_group); ++extern unsigned ext3_init_block_bitmap(struct super_block *sb, ++ struct buffer_head *bh, int group, ++ struct ext3_group_desc *desc); ++#define ext3_free_blocks_after_init(sb, group, desc) \ ++ ext3_init_block_bitmap(sb, NULL, group, desc) ++extern unsigned ext3_init_inode_bitmap(struct super_block *sb, ++ struct buffer_head *bh, int group, ++ struct ext3_group_desc *desc); ++#endif /* _LINUX_EXT3_GROUP_H */ +Index: linux-2.6.16.27-0.9-full/fs/ext3/ialloc.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/ialloc.c 2007-03-28 18:20:17.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/ialloc.c 2007-03-28 18:30:06.000000000 +0400 +@@ -28,6 +28,7 @@ + + #include "xattr.h" + #include "acl.h" ++#include "group.h" + + /* + * ialloc.c contains the inodes allocation and deallocation routines +@@ -43,6 +44,52 @@ + * the free blocks count in the block. + */ + ++/* ++ * To avoid calling the atomic setbit hundreds or thousands of times, we only ++ * need to use it within a single byte (to ensure we get endianness right). ++ * We can use memset for the rest of the bitmap as there are no other users. ++ */ ++static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) ++{ ++ int i; ++ ++ if (start_bit >= end_bit) ++ return; ++ ++ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); ++ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) ++ ext3_set_bit(i, bitmap); ++ if (i < end_bit) ++ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); ++} ++ ++/* Initializes an uninitialized inode bitmap */ ++unsigned ext3_init_inode_bitmap(struct super_block *sb, ++ struct buffer_head *bh, int block_group, ++ struct ext3_group_desc *gdp) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ J_ASSERT_BH(bh, buffer_locked(bh)); ++ ++ /* If checksum is bad mark all blocks and inodes use to prevent ++ * allocation, essentially implementing a per-group read-only flag. */ ++ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { ++ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", ++ block_group); ++ gdp->bg_free_blocks_count = 0; ++ gdp->bg_free_inodes_count = 0; ++ gdp->bg_itable_unused = 0; ++ memset(bh->b_data, 0xff, sb->s_blocksize); ++ return 0; ++ } ++ ++ memset(bh->b_data, 0, (EXT3_INODES_PER_GROUP(sb) + 7) / 8); ++ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), ++ bh->b_data); ++ ++ return EXT3_INODES_PER_GROUP(sb); ++} + + /* + * Read the inode allocation bitmap for a given block_group, reading +@@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s + desc = ext3_get_group_desc(sb, block_group, NULL); + if (!desc) + goto error_out; +- +- bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { ++ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ if (!buffer_uptodate(bh)) { ++ lock_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ ext3_init_inode_bitmap(sb, bh,block_group,desc); ++ set_buffer_uptodate(bh); ++ } ++ unlock_buffer(bh); ++ } ++ } else { ++ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ } + if (!bh) + ext3_error(sb, "read_inode_bitmap", + "Cannot read inode bitmap - " +@@ -169,6 +227,8 @@ void ext3_free_inode (handle_t *handle, + if (is_directory) + gdp->bg_used_dirs_count = cpu_to_le16( + le16_to_cpu(gdp->bg_used_dirs_count) - 1); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, ++ gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_inc(&sbi->s_freeinodes_counter); + if (is_directory) +@@ -453,7 +513,7 @@ struct inode *ext3_new_inode(handle_t *h + struct ext3_sb_info *sbi; + int err = 0; + struct inode *ret; +- int i; ++ int i, free = 0; + + /* Cannot create files in a deleted directory */ + if (!dir || !dir->i_nlink) +@@ -570,11 +630,13 @@ repeat_in_this_group: + goto out; + + got: +- ino += group * EXT3_INODES_PER_GROUP(sb) + 1; +- if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { +- ext3_error (sb, "ext3_new_inode", +- "reserved inode or inode > inodes count - " +- "block_group = %d, inode=%lu", group, ino); ++ ino++; ++ if ((group == 0 && ino < EXT3_FIRST_INO(sb)) || ++ ino > EXT3_INODES_PER_GROUP(sb)) { ++ ext3_error(sb, __FUNCTION__, ++ "reserved inode or inode > inodes count - " ++ "block_group = %d, inode=%lu", group, ++ ino + group * EXT3_INODES_PER_GROUP(sb)); + err = -EIO; + goto fail; + } +@@ -582,13 +644,65 @@ got: + BUFFER_TRACE(bh2, "get_write_access"); + err = ext3_journal_get_write_access(handle, bh2); + if (err) goto fail; ++ ++ /* We may have to initialize the block bitmap if it isn't already */ ++ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && ++ gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ struct buffer_head *block_bh = read_block_bitmap(sb, group); ++ ++ BUFFER_TRACE(block_bh, "get block bitmap access"); ++ err = ext3_journal_get_write_access(handle, block_bh); ++ if (err) { ++ brelse(block_bh); ++ goto fail; ++ } ++ ++ free = 0; ++ spin_lock(sb_bgl_lock(sbi, group)); ++ /* recheck and clear flag under lock if we still need to */ ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); ++ free = ext3_free_blocks_after_init(sb, group, gdp); ++ gdp->bg_free_blocks_count = cpu_to_le16(free); ++ } ++ spin_unlock(sb_bgl_lock(sbi, group)); ++ ++ /* Don't need to dirty bitmap block if we didn't change it */ ++ if (free) { ++ BUFFER_TRACE(block_bh, "dirty block bitmap"); ++ err = ext3_journal_dirty_metadata(handle, block_bh); ++ } ++ ++ brelse(block_bh); ++ if (err) ++ goto fail; ++ } ++ + spin_lock(sb_bgl_lock(sbi, group)); ++ /* If we didn't allocate from within the initialized part of the inode ++ * table then we need to initialize up to this inode. */ ++ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); ++ free = EXT3_INODES_PER_GROUP(sb); ++ } else { ++ free = EXT3_INODES_PER_GROUP(sb) - ++ le16_to_cpu(gdp->bg_itable_unused); ++ } ++ ++ if (ino > free) { ++ gdp->bg_itable_unused = ++ cpu_to_le16(EXT3_INODES_PER_GROUP(sb) - ino); ++ } ++ } ++ + gdp->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + if (S_ISDIR(mode)) { + gdp->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + } ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); + spin_unlock(sb_bgl_lock(sbi, group)); + BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); + err = ext3_journal_dirty_metadata(handle, bh2); +@@ -610,7 +724,7 @@ got: + inode->i_gid = current->fsgid; + inode->i_mode = mode; + +- inode->i_ino = ino; ++ inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb); + /* This is the optimal IO size (for stat), not the fs block size */ + inode->i_blksize = PAGE_SIZE; + inode->i_blocks = 0; +Index: linux-2.6.16.27-0.9-full/fs/ext3/mballoc.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/mballoc.c 2007-03-28 16:03:19.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/mballoc.c 2007-03-28 18:30:36.000000000 +0400 +@@ -36,6 +36,8 @@ + #include + #include + ++#include "group.h" ++ + /* + * MUSTDO: + * - test ext3_ext_search_left() and ext3_ext_search_right() +@@ -323,6 +325,7 @@ struct ext3_group_info { + unsigned long bb_state; + unsigned long bb_tid; + struct ext3_free_metadata *bb_md_cur; ++ struct ext3_group_desc *bb_gdp; + unsigned short bb_first_free; + unsigned short bb_free; + unsigned short bb_fragments; +@@ -928,10 +931,7 @@ static int ext3_mb_init_cache(struct pag + if (first_group + i >= EXT3_SB(sb)->s_groups_count) + break; + +- err = -EIO; +- desc = ext3_get_group_desc(sb, first_group + i, NULL); +- if (desc == NULL) +- goto out; ++ desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; + + err = -ENOMEM; + bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); +@@ -946,7 +946,12 @@ static int ext3_mb_init_cache(struct pag + unlock_buffer(bh[i]); + continue; + } +- ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); ++ set_buffer_uptodate(bh[i]); ++ unlock_buffer(bh[i]); ++ continue; ++ } + get_bh(bh[i]); + bh[i]->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh[i]); +@@ -1703,6 +1708,10 @@ static int ext3_mb_good_group(struct ext + switch (cr) { + case 0: + BUG_ON(ac->ac_2order == 0); ++ /* If this group is uninitialized, skip it initially */ ++ if (grp->bb_gdp->bg_flags & ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) ++ return 0; + bits = ac->ac_sb->s_blocksize_bits + 1; + for (i = ac->ac_2order; i <= bits; i++) + if (grp->bb_counters[i] > 0) +@@ -1796,7 +1805,9 @@ repeat: + } + + ac->ac_groups_scanned++; +- if (cr == 0) ++ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) && ++ ac->ac_2order != 0)) + ext3_mb_simple_scan_group(ac, &e3b); + else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) + ext3_mb_scan_aligned(ac, &e3b); +@@ -2267,12 +2278,13 @@ int ext3_mb_init_backend(struct super_bl + i--; + goto err_freebuddy; + } ++ memset(meta_group_info[j], 0, len); + desc = ext3_get_group_desc(sb, i, NULL); ++ meta_group_info[j]->bb_gdp = desc; + if (desc == NULL) { + printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); + goto err_freebuddy; + } +- memset(meta_group_info[j], 0, len); + set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, + &meta_group_info[j]->bb_state); + +@@ -2936,9 +2948,17 @@ int ext3_mb_mark_diskspace_used(struct e + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); + + spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(ext3_free_blocks_after_init(sb, ++ ac->ac_b_ex.fe_group, ++ gdp)); ++ } + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + - ac->ac_b_ex.fe_len); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); + spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); + +@@ -4303,6 +4323,7 @@ do_more: + spin_lock(sb_bgl_lock(sbi, block_group)); + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + +Index: linux-2.6.16.27-0.9-full/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.16.27-0.9-full.orig/fs/ext3/balloc.c 2007-03-28 16:03:20.000000000 +0400 ++++ linux-2.6.16.27-0.9-full/fs/ext3/balloc.c 2007-03-28 18:30:06.000000000 +0400 +@@ -21,6 +21,7 @@ + #include + #include + ++#include "group.h" + /* + * balloc.c contains the blocks allocation and deallocation routines + */ +@@ -74,6 +75,75 @@ struct ext3_group_desc * ext3_get_group_ + return desc + offset; + } + ++/* Initializes an uninitialized block bitmap if given, and returns the ++ * number of blocks free in the group. */ ++unsigned ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ++ int block_group, struct ext3_group_desc *gdp) ++{ ++ unsigned long start; ++ int bit, bit_max; ++ unsigned free_blocks; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (bh) { ++ J_ASSERT_BH(bh, buffer_locked(bh)); ++ ++ /* If checksum is bad mark all blocks use to prevent allocation, ++ * essentially implementing a per-group read-only flag. */ ++ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { ++ ext3_error(sb, __FUNCTION__, ++ "Checksum bad for group %u\n", block_group); ++ gdp->bg_free_blocks_count = 0; ++ gdp->bg_free_inodes_count = 0; ++ gdp->bg_itable_unused = 0; ++ memset(bh->b_data, 0xff, sb->s_blocksize); ++ return 0; ++ } ++ memset(bh->b_data, 0, sb->s_blocksize); ++ } ++ ++ /* Check for superblock and gdt backups in this group */ ++ bit_max = ext3_bg_has_super(sb, block_group); ++ ++ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || ++ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * ++ sbi->s_desc_per_block) { ++ if (bit_max) { ++ bit_max += ext3_bg_num_gdb(sb, block_group); ++ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); ++ } ++ } else { /* For META_BG_BLOCK_GROUPS */ ++ int group_rel = (block_group - ++ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % ++ EXT3_DESC_PER_BLOCK(sb); ++ if (group_rel == 0 || group_rel == 1 || ++ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) ++ bit_max += 1; ++ } ++ ++ /* Last and first groups are always initialized */ ++ free_blocks = EXT3_BLOCKS_PER_GROUP(sb) - bit_max; ++ ++ if (bh) { ++ for (bit = 0; bit < bit_max; bit++) ++ ext3_set_bit(bit, bh->b_data); ++ ++ start = block_group * EXT3_BLOCKS_PER_GROUP(sb) + ++ le32_to_cpu(sbi->s_es->s_first_data_block); ++ ++ /* Set bits for block and inode bitmaps, and inode table */ ++ ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start, ++ bh->b_data); ++ ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start, ++ bh->b_data); ++ for (bit = le32_to_cpu(gdp->bg_inode_table) - start, ++ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) ++ ext3_set_bit(bit, bh->b_data); ++ } ++ ++ return free_blocks - sbi->s_itb_per_group - 2; ++} ++ + /* + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. +@@ -89,7 +159,19 @@ read_block_bitmap(struct super_block *sb + desc = ext3_get_group_desc (sb, block_group, NULL); + if (!desc) + goto error_out; +- bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ if (!buffer_uptodate(bh)) { ++ lock_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ ext3_init_block_bitmap(sb, bh,block_group,desc); ++ set_buffer_uptodate(bh); ++ } ++ unlock_buffer(bh); ++ } ++ } else { ++ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ } + if (!bh) + ext3_error (sb, "read_block_bitmap", + "Cannot read block bitmap - " +@@ -468,6 +550,7 @@ do_more: + desc->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + + group_freed); ++ desc->bg_checksum = ext3_group_desc_csum(sbi, block_group, desc); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + +@@ -1378,8 +1461,11 @@ allocated: + ret_block, goal_hits, goal_attempts); + + spin_lock(sb_bgl_lock(sbi, group_no)); ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); + spin_unlock(sb_bgl_lock(sbi, group_no)); + percpu_counter_mod(&sbi->s_freeblocks_counter, -1); + + +%diffstat + fs/ext3/balloc.c | 88 +++++++++++++++++++++++++++++ + fs/ext3/group.h | 38 ++++++++++++ + fs/ext3/ialloc.c | 144 +++++++++++++++++++++++++++++++++++++++++++----- + fs/ext3/mballoc.c | 35 +++++++++-- + fs/ext3/resize.c | 2 + fs/ext3/super.c | 92 ++++++++++++++++++++++++++++++ + include/linux/ext3_fs.h | 16 ++++- + 7 files changed, 388 insertions(+), 27 deletions(-) diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch new file mode 100644 index 0000000..8a34ea5 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch @@ -0,0 +1,653 @@ +Add support for the uninit_groups feature to the kernel. + +Keep a high water mark of used inodes for each group to improve e2fsck time. +Block and inode bitmaps can be uninitialized on disk via a flag in the +group descriptor to avoid reading or scanning them at e2fsck time. +A checksum of each group descriptor is used to ensure that corruption in +the group descriptor's bit flags does not cause incorrect operation. + +Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 17:33:05.000000000 +0400 ++++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h 2007-03-28 18:33:35.000000000 +0400 +@@ -153,16 +153,22 @@ struct ext3_allocation_request { + */ + struct ext3_group_desc + { +- __u32 bg_block_bitmap; /* Blocks bitmap block */ +- __u32 bg_inode_bitmap; /* Inodes bitmap block */ ++ __u32 bg_block_bitmap; /* Blocks bitmap block */ ++ __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ + __u16 bg_used_dirs_count; /* Directories count */ +- __u16 bg_pad; +- __u32 bg_reserved[3]; ++ __u16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ ++ __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ ++ __u16 bg_itable_unused; /* Unused inodes count */ ++ __u16 bg_checksum; /* crc16(sb_uuid+group+desc) */ + }; + ++#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ ++#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ ++#define EXT3_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ ++ + /* + * Macro-instructions used to manage group descriptors + */ +@@ -458,7 +464,7 @@ struct ext3_super_block { + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ +- __u16 s_padding1; ++ __u16 s_reserved_gdt_blocks; /* Per group desc for online growth */ + /* + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. + */ +@@ -546,6 +552,7 @@ static inline struct ext3_inode_info *EX + #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 + #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 + #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 ++#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 + #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 + + #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 +@@ -562,6 +569,7 @@ static inline struct ext3_inode_info *EX + EXT3_FEATURE_INCOMPAT_EXTENTS) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ ++ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ + EXT3_FEATURE_RO_COMPAT_BTREE_DIR) + +Index: linux-2.6.5-7.283-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/super.c 2007-03-28 17:33:05.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/super.c 2007-03-28 18:33:35.000000000 +0400 +@@ -36,6 +36,7 @@ + #include + #include "xattr.h" + #include "acl.h" ++#include "group.h" + + static int ext3_load_journal(struct super_block *, struct ext3_super_block *); + static int ext3_create_journal(struct super_block *, struct ext3_super_block *, +@@ -996,6 +997,90 @@ static int ext3_setup_super(struct super + return res; + } + ++#if !defined(CONFIG_CRC16) && !defined(CONFIG_CRC16_MODULE) ++/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ ++__u16 const crc16_table[256] = { ++ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, ++ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, ++ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, ++ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, ++ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, ++ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, ++ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, ++ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, ++ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, ++ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, ++ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, ++ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, ++ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, ++ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, ++ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, ++ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, ++ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, ++ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, ++ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, ++ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, ++ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, ++ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, ++ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, ++ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, ++ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, ++ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, ++ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, ++ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, ++ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, ++ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, ++ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, ++ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 ++}; ++ ++static inline __u16 crc16_byte(__u16 crc, const __u8 data) ++{ ++ return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; ++} ++ ++__u16 crc16(__u16 crc, __u8 const *buffer, size_t len) ++{ ++ while (len--) ++ crc = crc16_byte(crc, *buffer++); ++ return crc; ++} ++#endif ++ ++__le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, ++ struct ext3_group_desc *gdp) ++{ ++ __u16 crc = 0; ++ ++ if (sbi->s_es->s_feature_ro_compat & ++ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { ++ int offset = offsetof(struct ext3_group_desc, bg_checksum); ++ __le32 le_group = cpu_to_le32(block_group); ++ ++ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); ++ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); ++ crc = crc16(crc, (__u8 *)gdp, offset); ++ offset += sizeof(gdp->bg_checksum); /* skip checksum */ ++ BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ ++ /* for checksum of struct ext4_group_desc do the rest... ++ if (offset < sbi->s_es->s_desc_size) { ++ crc = crc16(crc, (__u8 *)gdp + offset, ++ sbi->s_es->s_desc_size - offset); ++ */ ++ } ++ ++ return cpu_to_le16(crc); ++} ++ ++int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, ++ struct ext3_group_desc *gdp) ++{ ++ if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) ++ return 0; ++ ++ return 1; ++} ++ + static int ext3_check_descriptors (struct super_block * sb) + { + struct ext3_sb_info *sbi = EXT3_SB(sb); +@@ -1044,6 +1129,13 @@ static int ext3_check_descriptors (struc + le32_to_cpu(gdp->bg_inode_table)); + return 0; + } ++ if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { ++ ext3_error(sb, __FUNCTION__, ++ "Checksum for group %d failed (%u!=%u)\n", i, ++ le16_to_cpu(ext3_group_desc_csum(sbi,i,gdp)), ++ le16_to_cpu(gdp->bg_checksum)); ++ return 0; ++ } + block += EXT3_BLOCKS_PER_GROUP(sb); + gdp++; + } +Index: linux-2.6.5-7.283-full/fs/ext3/group.h +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/group.h 2007-02-13 18:39:59.640066087 +0300 ++++ linux-2.6.5-7.283-full/fs/ext3/group.h 2007-03-28 18:33:35.000000000 +0400 +@@ -0,0 +1,29 @@ ++/* ++ * linux/fs/ext3/group.h ++ * ++ * Copyright (C) 2007 Cluster File Systems, Inc ++ * ++ * Author: Andreas Dilger ++ */ ++ ++#ifndef _LINUX_EXT3_GROUP_H ++#define _LINUX_EXT3_GROUP_H ++#if defined(CONFIG_CRC16) || defined(CONFIG_CRC16_MODULE) ++#include ++#endif ++ ++extern __le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, ++ struct ext3_group_desc *gdp); ++extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, ++ struct ext3_group_desc *gdp); ++struct buffer_head *read_block_bitmap(struct super_block *sb, ++ unsigned int block_group); ++extern unsigned ext3_init_block_bitmap(struct super_block *sb, ++ struct buffer_head *bh, int group, ++ struct ext3_group_desc *desc); ++#define ext3_free_blocks_after_init(sb, group, desc) \ ++ ext3_init_block_bitmap(sb, NULL, group, desc) ++extern unsigned ext3_init_inode_bitmap(struct super_block *sb, ++ struct buffer_head *bh, int group, ++ struct ext3_group_desc *desc); ++#endif /* _LINUX_EXT3_GROUP_H */ +Index: linux-2.6.5-7.283-full/fs/ext3/ialloc.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/ialloc.c 2007-03-28 17:33:03.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/ialloc.c 2007-03-28 18:33:35.000000000 +0400 +@@ -28,6 +28,7 @@ + + #include "xattr.h" + #include "acl.h" ++#include "group.h" + + /* + * ialloc.c contains the inodes allocation and deallocation routines +@@ -43,6 +44,52 @@ + * the free blocks count in the block. + */ + ++/* ++ * To avoid calling the atomic setbit hundreds or thousands of times, we only ++ * need to use it within a single byte (to ensure we get endianness right). ++ * We can use memset for the rest of the bitmap as there are no other users. ++ */ ++static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) ++{ ++ int i; ++ ++ if (start_bit >= end_bit) ++ return; ++ ++ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); ++ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) ++ ext3_set_bit(i, bitmap); ++ if (i < end_bit) ++ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); ++} ++ ++/* Initializes an uninitialized inode bitmap */ ++unsigned ext3_init_inode_bitmap(struct super_block *sb, ++ struct buffer_head *bh, int block_group, ++ struct ext3_group_desc *gdp) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ J_ASSERT_BH(bh, buffer_locked(bh)); ++ ++ /* If checksum is bad mark all blocks and inodes use to prevent ++ * allocation, essentially implementing a per-group read-only flag. */ ++ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { ++ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", ++ block_group); ++ gdp->bg_free_blocks_count = 0; ++ gdp->bg_free_inodes_count = 0; ++ gdp->bg_itable_unused = 0; ++ memset(bh->b_data, 0xff, sb->s_blocksize); ++ return 0; ++ } ++ ++ memset(bh->b_data, 0, (EXT3_INODES_PER_GROUP(sb) + 7) / 8); ++ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), ++ bh->b_data); ++ ++ return EXT3_INODES_PER_GROUP(sb); ++} + + /* + * Read the inode allocation bitmap for a given block_group, reading +@@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s + desc = ext3_get_group_desc(sb, block_group, NULL); + if (!desc) + goto error_out; +- +- bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { ++ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ if (!buffer_uptodate(bh)) { ++ lock_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ ext3_init_inode_bitmap(sb, bh,block_group,desc); ++ set_buffer_uptodate(bh); ++ } ++ unlock_buffer(bh); ++ } ++ } else { ++ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ } + if (!bh) + ext3_error(sb, "read_inode_bitmap", + "Cannot read inode bitmap - " +@@ -168,6 +226,8 @@ void ext3_free_inode (handle_t *handle, + if (is_directory) + gdp->bg_used_dirs_count = cpu_to_le16( + le16_to_cpu(gdp->bg_used_dirs_count) - 1); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, ++ gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_inc(&sbi->s_freeinodes_counter); + if (is_directory) +@@ -454,7 +514,7 @@ struct inode *ext3_new_inode(handle_t *h + struct ext3_sb_info *sbi; + int err = 0; + struct inode *ret; +- int i; ++ int i, free = 0; + + /* Cannot create files in a deleted directory */ + if (!dir || !dir->i_nlink) +@@ -570,11 +630,13 @@ repeat_in_this_group: + goto out; + + got: +- ino += group * EXT3_INODES_PER_GROUP(sb) + 1; +- if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { +- ext3_error (sb, "ext3_new_inode", +- "reserved inode or inode > inodes count - " +- "block_group = %d, inode=%lu", group, ino); ++ ino++; ++ if ((group == 0 && ino < EXT3_FIRST_INO(sb)) || ++ ino > EXT3_INODES_PER_GROUP(sb)) { ++ ext3_error(sb, __FUNCTION__, ++ "reserved inode or inode > inodes count - " ++ "block_group = %d, inode=%lu", group, ++ ino + group * EXT3_INODES_PER_GROUP(sb)); + err = -EIO; + goto fail; + } +@@ -582,13 +644,65 @@ got: + BUFFER_TRACE(bh2, "get_write_access"); + err = ext3_journal_get_write_access(handle, bh2); + if (err) goto fail; ++ ++ /* We may have to initialize the block bitmap if it isn't already */ ++ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && ++ gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ struct buffer_head *block_bh = read_block_bitmap(sb, group); ++ ++ BUFFER_TRACE(block_bh, "get block bitmap access"); ++ err = ext3_journal_get_write_access(handle, block_bh); ++ if (err) { ++ brelse(block_bh); ++ goto fail; ++ } ++ ++ free = 0; ++ spin_lock(sb_bgl_lock(sbi, group)); ++ /* recheck and clear flag under lock if we still need to */ ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); ++ free = ext3_free_blocks_after_init(sb, group, gdp); ++ gdp->bg_free_blocks_count = cpu_to_le16(free); ++ } ++ spin_unlock(sb_bgl_lock(sbi, group)); ++ ++ /* Don't need to dirty bitmap block if we didn't change it */ ++ if (free) { ++ BUFFER_TRACE(block_bh, "dirty block bitmap"); ++ err = ext3_journal_dirty_metadata(handle, block_bh); ++ } ++ ++ brelse(block_bh); ++ if (err) ++ goto fail; ++ } ++ + spin_lock(sb_bgl_lock(sbi, group)); ++ /* If we didn't allocate from within the initialized part of the inode ++ * table then we need to initialize up to this inode. */ ++ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); ++ free = EXT3_INODES_PER_GROUP(sb); ++ } else { ++ free = EXT3_INODES_PER_GROUP(sb) - ++ le16_to_cpu(gdp->bg_itable_unused); ++ } ++ ++ if (ino > free) { ++ gdp->bg_itable_unused = ++ cpu_to_le16(EXT3_INODES_PER_GROUP(sb) - ino); ++ } ++ } ++ + gdp->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + if (S_ISDIR(mode)) { + gdp->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + } ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); + spin_unlock(sb_bgl_lock(sbi, group)); + BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); + err = ext3_journal_dirty_metadata(handle, bh2); +@@ -610,7 +724,7 @@ got: + inode->i_gid = current->fsgid; + inode->i_mode = mode; + +- inode->i_ino = ino; ++ inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb); + /* This is the optimal IO size (for stat), not the fs block size */ + inode->i_blksize = PAGE_SIZE; + inode->i_blocks = 0; +Index: linux-2.6.5-7.283-full/fs/ext3/mballoc.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/mballoc.c 2007-03-28 15:46:00.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/mballoc.c 2007-03-28 18:33:35.000000000 +0400 +@@ -36,6 +36,8 @@ + #include + #include + ++#include "group.h" ++ + /* + * MUSTDO: + * - test ext3_ext_search_left() and ext3_ext_search_right() +@@ -323,6 +325,7 @@ struct ext3_group_info { + unsigned long bb_state; + unsigned long bb_tid; + struct ext3_free_metadata *bb_md_cur; ++ struct ext3_group_desc *bb_gdp; + unsigned short bb_first_free; + unsigned short bb_free; + unsigned short bb_fragments; +@@ -928,10 +931,7 @@ static int ext3_mb_init_cache(struct pag + if (first_group + i >= EXT3_SB(sb)->s_groups_count) + break; + +- err = -EIO; +- desc = ext3_get_group_desc(sb, first_group + i, NULL); +- if (desc == NULL) +- goto out; ++ desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; + + err = -ENOMEM; + bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); +@@ -946,7 +946,12 @@ static int ext3_mb_init_cache(struct pag + unlock_buffer(bh[i]); + continue; + } +- ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); ++ set_buffer_uptodate(bh[i]); ++ unlock_buffer(bh[i]); ++ continue; ++ } + get_bh(bh[i]); + bh[i]->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh[i]); +@@ -1703,6 +1708,10 @@ static int ext3_mb_good_group(struct ext + switch (cr) { + case 0: + BUG_ON(ac->ac_2order == 0); ++ /* If this group is uninitialized, skip it initially */ ++ if (grp->bb_gdp->bg_flags & ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) ++ return 0; + bits = ac->ac_sb->s_blocksize_bits + 1; + for (i = ac->ac_2order; i <= bits; i++) + if (grp->bb_counters[i] > 0) +@@ -1796,7 +1805,9 @@ repeat: + } + + ac->ac_groups_scanned++; +- if (cr == 0) ++ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) && ++ ac->ac_2order != 0)) + ext3_mb_simple_scan_group(ac, &e3b); + else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) + ext3_mb_scan_aligned(ac, &e3b); +@@ -2267,12 +2278,13 @@ int ext3_mb_init_backend(struct super_bl + i--; + goto err_freebuddy; + } ++ memset(meta_group_info[j], 0, len); + desc = ext3_get_group_desc(sb, i, NULL); ++ meta_group_info[j]->bb_gdp = desc; + if (desc == NULL) { + printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); + goto err_freebuddy; + } +- memset(meta_group_info[j], 0, len); + set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, + &meta_group_info[j]->bb_state); + +@@ -2936,9 +2948,17 @@ int ext3_mb_mark_diskspace_used(struct e + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); + + spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(ext3_free_blocks_after_init(sb, ++ ac->ac_b_ex.fe_group, ++ gdp)); ++ } + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + - ac->ac_b_ex.fe_len); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); + spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); + +@@ -4303,6 +4323,7 @@ do_more: + spin_lock(sb_bgl_lock(sbi, block_group)); + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + +Index: linux-2.6.5-7.283-full/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.5-7.283-full.orig/fs/ext3/balloc.c 2007-03-28 17:33:02.000000000 +0400 ++++ linux-2.6.5-7.283-full/fs/ext3/balloc.c 2007-03-28 18:33:35.000000000 +0400 +@@ -20,6 +20,7 @@ + #include + #include + ++#include "group.h" + /* + * balloc.c contains the blocks allocation and deallocation routines + */ +@@ -72,6 +73,75 @@ struct ext3_group_desc * ext3_get_group_ + return gdp + desc; + } + ++/* Initializes an uninitialized block bitmap if given, and returns the ++ * number of blocks free in the group. */ ++unsigned ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ++ int block_group, struct ext3_group_desc *gdp) ++{ ++ unsigned long start; ++ int bit, bit_max; ++ unsigned free_blocks; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (bh) { ++ J_ASSERT_BH(bh, buffer_locked(bh)); ++ ++ /* If checksum is bad mark all blocks use to prevent allocation, ++ * essentially implementing a per-group read-only flag. */ ++ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { ++ ext3_error(sb, __FUNCTION__, ++ "Checksum bad for group %u\n", block_group); ++ gdp->bg_free_blocks_count = 0; ++ gdp->bg_free_inodes_count = 0; ++ gdp->bg_itable_unused = 0; ++ memset(bh->b_data, 0xff, sb->s_blocksize); ++ return 0; ++ } ++ memset(bh->b_data, 0, sb->s_blocksize); ++ } ++ ++ /* Check for superblock and gdt backups in this group */ ++ bit_max = ext3_bg_has_super(sb, block_group); ++ ++ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || ++ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * ++ sbi->s_desc_per_block) { ++ if (bit_max) { ++ bit_max += ext3_bg_num_gdb(sb, block_group); ++ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); ++ } ++ } else { /* For META_BG_BLOCK_GROUPS */ ++ int group_rel = (block_group - ++ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % ++ EXT3_DESC_PER_BLOCK(sb); ++ if (group_rel == 0 || group_rel == 1 || ++ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) ++ bit_max += 1; ++ } ++ ++ /* Last and first groups are always initialized */ ++ free_blocks = EXT3_BLOCKS_PER_GROUP(sb) - bit_max; ++ ++ if (bh) { ++ for (bit = 0; bit < bit_max; bit++) ++ ext3_set_bit(bit, bh->b_data); ++ ++ start = block_group * EXT3_BLOCKS_PER_GROUP(sb) + ++ le32_to_cpu(sbi->s_es->s_first_data_block); ++ ++ /* Set bits for block and inode bitmaps, and inode table */ ++ ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start, ++ bh->b_data); ++ ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start, ++ bh->b_data); ++ for (bit = le32_to_cpu(gdp->bg_inode_table) - start, ++ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) ++ ext3_set_bit(bit, bh->b_data); ++ } ++ ++ return free_blocks - sbi->s_itb_per_group - 2; ++} ++ + /* + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. +@@ -87,7 +157,19 @@ read_block_bitmap(struct super_block *sb + desc = ext3_get_group_desc (sb, block_group, NULL); + if (!desc) + goto error_out; +- bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ if (!buffer_uptodate(bh)) { ++ lock_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ ext3_init_block_bitmap(sb, bh,block_group,desc); ++ set_buffer_uptodate(bh); ++ } ++ unlock_buffer(bh); ++ } ++ } else { ++ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ } + if (!bh) + ext3_error (sb, "read_block_bitmap", + "Cannot read block bitmap - " +@@ -432,6 +514,7 @@ do_more: + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + + dquot_freed_blocks); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + +@@ -1372,8 +1455,11 @@ allocated: + ret_block, goal_hits, goal_attempts); + + spin_lock(sb_bgl_lock(sbi, group_no)); ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); + spin_unlock(sb_bgl_lock(sbi, group_no)); + percpu_counter_mod(&sbi->s_freeblocks_counter, -1); + diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch deleted file mode 100644 index f66d713..0000000 --- a/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch +++ /dev/null @@ -1,779 +0,0 @@ -Keep a high water mark of used inodes for each group to improve e2fsck time. -Block and inode bitmaps can be uninitialized on disk via a flag in the -group descriptor to avoid reading or scanning them at e2fsck time. -A checksum of each group descriptor is used to ensure that corruption in -the group descriptor's bit flags does not cause incorrect operation. - -Index: linux-stage/fs/ext3/balloc.c -=================================================================== ---- linux-stage.orig/fs/ext3/balloc.c 2007-03-14 04:44:12.000000000 -0400 -+++ linux-stage/fs/ext3/balloc.c 2007-03-14 16:23:48.000000000 -0400 -@@ -73,6 +73,95 @@ struct ext3_group_desc * ext3_get_group_ - return gdp + desc; - } - -+unsigned long ext3_free_blocks_after_init(struct super_block *sb, -+ int block_group, -+ struct ext3_group_desc *gdp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned long blks; -+ int has_sb; -+ -+ /* Last and first groups are always initialized */ -+ blks = le32_to_cpu(EXT3_BLOCKS_PER_GROUP(sb)); -+ /* Account for for sb, gdt */ -+ has_sb = ext3_bg_has_super(sb, block_group); -+ if (has_sb) -+ blks--; -+ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || -+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) { -+ if (has_sb) { -+ blks -= le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); -+ blks -= ext3_bg_num_gdb(sb, block_group); -+ } -+ } else { /* For META_BG BLOCK GROUPS */ -+ int group_rel = (block_group - -+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % -+ EXT3_DESC_PER_BLOCK(sb); -+ if (group_rel == 0 || group_rel == 1 || -+ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) -+ blks--; -+ } -+ -+ /* Account for bitmaps and inode table */ -+ blks -= sbi->s_itb_per_group + 2; -+ return blks; -+} -+ -+/* Initializes an uninitialized block bitmap */ -+void ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, -+ int block_group, struct ext3_group_desc *gdp) -+{ -+ unsigned long startblk; -+ int bit, bit_max; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If the checksum is bad, then just mark all bits in use for safety */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", -+ block_group); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ return; -+ } -+ memset(bh->b_data, 0, bh->b_size); -+ -+ /* Set bits for sb, gdt */ -+ startblk = block_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ -+ bit = 0; -+ bit_max = ext3_bg_has_super(sb, block_group); -+ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || -+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) { -+ if (bit_max) { -+ bit_max += ext3_bg_num_gdb(sb, block_group); -+ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); -+ } -+ } else { /* For META_BG_BLOCK_GROUPS */ -+ int group_rel = (block_group - -+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % -+ EXT3_DESC_PER_BLOCK(sb); -+ if (group_rel == 0 || group_rel == 1 || -+ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) -+ bit_max += 1; -+ } -+ for (; bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+ -+ /* Set bits for bitmaps and inode table */ -+ ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - startblk, bh->b_data); -+ ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - startblk, bh->b_data); -+ bit_max = bit + sbi->s_itb_per_group; -+ for (bit = le32_to_cpu(gdp->bg_inode_table) - startblk; -+ bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+} -+ - /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. -@@ -88,7 +170,19 @@ read_block_bitmap(struct super_block *sb - desc = ext3_get_group_desc (sb, block_group, NULL); - if (!desc) - goto error_out; -- bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_block_bitmap(sb, bh,block_group,desc); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ } - if (!bh) - ext3_error (sb, "read_block_bitmap", - "Cannot read block bitmap - " -@@ -427,6 +520,7 @@ do_more: - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + - *pdquot_freed_blocks); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -@@ -1175,7 +1270,7 @@ int ext3_new_block_old(handle_t *handle, - - goal_group = group_no; - retry: -- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); -+ free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp); - if (free_blocks > 0) { - ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb)); -@@ -1206,7 +1304,7 @@ retry: - *errp = -EIO; - goto out; - } -- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); -+ free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp); - if (free_blocks <= 0) - continue; - -@@ -1306,10 +1407,17 @@ allocated: - ret_block, goal_hits, goal_attempts); - - spin_lock(sb_bgl_lock(sbi, group_no)); -+ free_blocks = 0; -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ free_blocks = ext3_free_blocks_after_init(sb, group_no, gdp); -+ gdp->bg_free_blocks_count = cpu_to_le16(free_blocks); -+ } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); - spin_unlock(sb_bgl_lock(sbi, group_no)); -- percpu_counter_mod(&sbi->s_freeblocks_counter, -1); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, free_blocks); - - BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); - err = ext3_journal_dirty_metadata(handle, gdp_bh); -@@ -1379,7 +1479,7 @@ unsigned long ext3_count_free_blocks(str - gdp = ext3_get_group_desc(sb, i, NULL); - if (!gdp) - continue; -- desc_count += le16_to_cpu(gdp->bg_free_blocks_count); -+ desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, i); - if (bitmap_bh == NULL) -@@ -1387,7 +1487,7 @@ unsigned long ext3_count_free_blocks(str - - x = ext3_count_free(bitmap_bh, sb->s_blocksize); - printk("group %d: stored = %d, counted = %lu\n", -- i, le16_to_cpu(gdp->bg_free_blocks_count), x); -+ i, EXT3_BG_BLOCKS_FREE(sb, i, gdp), x); - bitmap_count += x; - } - brelse(bitmap_bh); -@@ -1403,7 +1503,7 @@ unsigned long ext3_count_free_blocks(str - gdp = ext3_get_group_desc(sb, i, NULL); - if (!gdp) - continue; -- desc_count += le16_to_cpu(gdp->bg_free_blocks_count); -+ desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); - } - - return desc_count; -@@ -1590,7 +1587,7 @@ void ext3_check_blocks_bitmap (struct su - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -- desc_count += le16_to_cpu(gdp->bg_free_blocks_count); -+ desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, i); - if (bitmap_bh == NULL) -@@ -1628,11 +1625,11 @@ void ext3_check_blocks_bitmap (struct su - "group %d is marked free", j, i); - - x = ext3_count_free(bitmap_bh, sb->s_blocksize); -- if (le16_to_cpu(gdp->bg_free_blocks_count) != x) -+ if (EXT3_BG_BLOCKS_FREE(sb, i, gdp) != x) - ext3_error (sb, "ext3_check_blocks_bitmap", - "Wrong free blocks count for group %d, " - "stored = %d, counted = %lu", i, -- le16_to_cpu(gdp->bg_free_blocks_count), x); -+ EXT3_BG_BLOCKS_FREE(sb, i, gdp), x); - bitmap_count += x; - } - brelse(bitmap_bh); -Index: linux-stage/fs/ext3/ialloc.c ---- linux-stage.orig/fs/ext3/ialloc.c 2007-03-14 04:44:13.000000000 -0400 -+++ linux-stage/fs/ext3/ialloc.c 2007-03-14 16:13:29.000000000 -0400 -@@ -43,6 +43,47 @@ - * the free blocks count in the block. - */ - -+/* -+ * To avoid calling the atomic setbit hundreds or thousands of times, we only -+ * need to use it within a single byte (to ensure we get endianness right). -+ * We can use memset for the rest of the bitmap as there are no other users. -+ */ -+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -+{ -+ int i; -+ -+ if (start_bit >= end_bit) -+ return; -+ -+ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); -+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) -+ ext3_set_bit(i, bitmap); -+ if (i < end_bit) -+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -+} -+ -+/* Initializes an uninitialized inode bitmap */ -+void ext3_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, -+ int block_group, struct ext3_group_desc *gdp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If the checksum is bad, then just mark all bits in use for safety */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", -+ block_group); -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ return; -+ } -+ -+ memset(bh->b_data, 0, EXT3_INODES_PER_GROUP(sb) + 7 / 8); -+ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), -+ bh->b_data); -+} - - /* - * Read the inode allocation bitmap for a given block_group, reading -@@ -59,8 +103,15 @@ read_inode_bitmap(struct super_block * s - desc = ext3_get_group_desc(sb, block_group, NULL); - if (!desc) - goto error_out; -- -- bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_inode_bitmap(sb, bh, block_group, desc); -+ set_buffer_uptodate(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ } - if (!bh) - ext3_error(sb, "read_inode_bitmap", - "Cannot read inode bitmap - " -@@ -169,6 +175,8 @@ void ext3_free_inode (handle_t *handle, - if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, -+ gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_inc(&sbi->s_freeinodes_counter); - if (is_directory) -@@ -202,8 +210,8 @@ error_return: - static int find_group_dir(struct super_block *sb, struct inode *parent) - { - int ngroups = EXT3_SB(sb)->s_groups_count; -- int freei, avefreei; -- struct ext3_group_desc *desc, *best_desc = NULL; -+ int freei, avefreei, freeb, best_freeb = 0; -+ struct ext3_group_desc *desc; - struct buffer_head *bh; - int group, best_group = -1; - -@@ -212,15 +220,14 @@ static int find_group_dir(struct super_b - - for (group = 0; group < ngroups; group++) { - desc = ext3_get_group_desc (sb, group, &bh); -- if (!desc || !desc->bg_free_inodes_count) -+ if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) - continue; -- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) -+ if (freei < avefreei) - continue; -- if (!best_desc || -- (le16_to_cpu(desc->bg_free_blocks_count) > -- le16_to_cpu(best_desc->bg_free_blocks_count))) { -+ freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc); -+ if (freeb > best_freeb) { - best_group = group; -- best_desc = desc; -+ best_freeb = freeb; - } - } - return best_group; -@@ -284,14 +294,15 @@ static int find_group_orlov(struct super - parent_group = (unsigned)group % ngroups; - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; -- desc = ext3_get_group_desc (sb, group, &bh); -- if (!desc || !desc->bg_free_inodes_count) -+ desc = ext3_get_group_desc(sb, group, &bh); -+ if (!desc || -+ (freei = EXT3_BG_INODES_FREE(sb, groups, desc)) ==0) - continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) - continue; -- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) -+ if (freei < avefreei) - continue; -- if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) -+ if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < avefreeb) - continue; - best_group = group; - best_ndir = le16_to_cpu(desc->bg_used_dirs_count); -@@ -318,13 +330,13 @@ static int find_group_orlov(struct super - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext3_get_group_desc (sb, group, &bh); -- if (!desc || !desc->bg_free_inodes_count) -+ if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) - continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) - continue; -- if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) -+ if (freei < min_inodes) - continue; -- if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) -+ if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < min_blocks) - continue; - return group; - } -@@ -333,9 +347,9 @@ fallback: - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext3_get_group_desc (sb, group, &bh); -- if (!desc || !desc->bg_free_inodes_count) -+ if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) - continue; -- if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) -+ if (freei >= avefreei) - return group; - } - -@@ -362,6 +377,7 @@ static int find_group_other(struct super - int group, i; - int best_group = -1; - int avefreeb, freeb, best_group_freeb = 0; -+ int freei; - - /* - * Try to place the inode in its parent directory -@@ -380,8 +396,8 @@ static int find_group_other(struct super - */ - group = parent_group; - desc = ext3_get_group_desc (sb, group, &bh); -- if (desc && le16_to_cpu(desc->bg_free_inodes_count) && -- (!S_ISREG(mode) || le16_to_cpu(desc->bg_free_blocks_count))) -+ if (desc && EXT3_BG_INODES_FREE(sb, group, desc) && -+ (!S_ISREG(mode) || EXT3_BG_BLOCKS_PNLT(sb, group, desc))) - return group; - avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ngroups; - /* -@@ -392,11 +408,11 @@ static int find_group_other(struct super - if (group >= ngroups) - group -= ngroups; - desc = ext3_get_group_desc (sb, group, &bh); -- if (!desc || !desc->bg_free_inodes_count) -+ if (!desc || EXT3_BG_INODES_FREE(sb, group, desc) == 0) - continue; - if (!S_ISREG(mode)) - return group; -- if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) -+ if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) >= avefreeb) - return group; - } - -@@ -413,9 +431,10 @@ static int find_group_other(struct super - if (++group >= ngroups) - group = 0; - desc = ext3_get_group_desc (sb, group, &bh); -- if (!desc || !desc->bg_free_inodes_count) -+ freei = EXT3_BG_INODES_FREE(sb, group, desc); -+ if (!desc || !freei) - continue; -- freeb = le16_to_cpu(desc->bg_free_blocks_count); -+ freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc); - if (freeb > best_group_freeb) { - best_group_freeb = freeb; - best_group = group; -@@ -453,6 +472,7 @@ struct inode *ext3_new_inode(handle_t *h - int err = 0; - struct inode *ret; - int i; -+ int unused_flag = 0; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) -@@ -581,18 +601,34 @@ got: - err = ext3_journal_get_write_access(handle, bh2); - if (err) goto fail; - spin_lock(sb_bgl_lock(sbi, group)); -- gdp->bg_free_inodes_count = -- cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); -+ if (gdp->bg_free_inodes_count == 0) { -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ gdp->bg_itable_unused = -+ cpu_to_le16(le32_to_cpu(es->s_inodes_per_group)); -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); -+ } -+ /* If we didn't allocate from free initialized inodes, -+ * then we allocated from uninitialized inodes. In this -+ * case initialize one inode. */ -+ gdp->bg_itable_unused = -+ cpu_to_le16(le16_to_cpu(gdp->bg_itable_unused) - 1); -+ unused_flag = 1; -+ } else { -+ gdp->bg_free_inodes_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) -1); -+ } - if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); - } -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); - spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh2); - if (err) goto fail; - -- percpu_counter_dec(&sbi->s_freeinodes_counter); -+ if (!unused_flag) -+ percpu_counter_dec(&sbi->s_freeinodes_counter); - if (S_ISDIR(mode)) - percpu_counter_inc(&sbi->s_dirs_counter); - sb->s_dirt = 1; -Index: linux-stage/fs/ext3/mballoc.c -=================================================================== ---- linux-stage.orig/fs/ext3/mballoc.c 2007-03-14 04:44:12.000000000 -0400 -+++ linux-stage/fs/ext3/mballoc.c 2007-03-14 16:13:29.000000000 -0400 -@@ -107,6 +107,7 @@ struct ext3_group_info { - unsigned long bb_state; - unsigned long bb_tid; - struct ext3_free_metadata *bb_md_cur; -+ struct ext3_group_desc *bb_gdp; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; -@@ -511,10 +512,7 @@ static int ext3_mb_init_cache(struct pag - if (first_group + i >= EXT3_SB(sb)->s_groups_count) - break; - -- err = -EIO; -- desc = ext3_get_group_desc(sb, first_group + i, NULL); -- if (desc == NULL) -- goto out; -+ desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; - - err = -ENOMEM; - bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -@@ -529,7 +530,12 @@ static int ext3_mb_init_cache(struct pag - unlock_buffer(bh[i]); - continue; - } -- -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); -+ set_buffer_uptodate(bh[i]); -+ unlock_buffer(bh[i]); -+ continue; -+ } - get_bh(bh[i]); - bh[i]->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh[i]); -@@ -1246,6 +1244,10 @@ static int ext3_mb_good_group(struct ext - switch (cr) { - case 0: - J_ASSERT(ac->ac_2order != 0); -+ /* If this group is uninitialized, skip it initially */ -+ if (grp->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) -+ return 0; - bits = ac->ac_sb->s_blocksize_bits + 1; - for (i = ac->ac_2order; i <= bits; i++) - if (grp->bb_counters[i] > 0) -@@ -1421,7 +1421,8 @@ repeat: - } - - ac.ac_groups_scanned++; -- if (cr == 0) -+ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT))) - ext3_mb_simple_scan_group(&ac, &e3b); - else if (cr == 1 && *len == sbi->s_stripe) - ext3_mb_scan_aligned(&ac, &e3b); -@@ -1500,7 +1499,8 @@ found: - * bitmap to be journaled */ - - ext3_debug("using block group %d(%d)\n", -- ac.ac_b_group.group, gdp->bg_free_blocks_count); -+ ac.ac_b_group.group, -+ EXT3_BG_BLOCKS_FREE(sb, ac.ac_b_group.group, gdp)); - - bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); - if (!bitmap_bh) { -@@ -1537,9 +1543,17 @@ found: - mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); - - spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(ext3_free_blocks_after_init(sb, -+ ac.ac_b_ex.fe_group, -+ gdp)); -+ } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - - ac.ac_b_ex.fe_len); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, ac.ac_b_ex.fe_group, gdp); - spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); - -@@ -1992,16 +1991,16 @@ int ext3_mb_init_backend(struct super_bl - i--; - goto err_freebuddy; - } -+ memset(meta_group_info[j], 0, len); - desc = ext3_get_group_desc(sb, i, NULL); -+ meta_group_info[j]->bb_gdp = desc; - if (desc == NULL) { - printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); - goto err_freebuddy; - } -- memset(meta_group_info[j], 0, len); - set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, - &meta_group_info[j]->bb_state); -- meta_group_info[j]->bb_free = -- le16_to_cpu(desc->bg_free_blocks_count); -+ meta_group_info[j]->bb_free = EXT3_BG_BLOCKS_FREE(sb, j, desc); - } - - return 0; -@@ -2397,6 +2410,7 @@ do_more: - spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -Index: linux-stage/fs/ext3/resize.c -=================================================================== ---- linux-stage.orig/fs/ext3/resize.c 2007-03-14 04:44:13.000000000 -0400 -+++ linux-stage/fs/ext3/resize.c 2007-03-14 16:13:29.000000000 -0400 -@@ -807,6 +807,9 @@ int ext3_group_add(struct super_block *s - gdp->bg_inode_table = cpu_to_le32(input->inode_table); - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); -+ spin_lock(sb_bgl_lock(sbi, input->group)); /* not strictly needed */ -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp); -+ spin_unlock(sb_bgl_lock(sbi, input->group)); - - /* - * Make the new blocks and inodes valid next. We do this before -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2007-03-14 04:44:13.000000000 -0400 -+++ linux-stage/fs/ext3/super.c 2007-03-14 16:13:29.000000000 -0400 -@@ -1084,6 +1084,56 @@ static int ext3_setup_super(struct super - return res; - } - -+static __u16 crc16(__u16 crc, const u8 *buf, size_t len) -+{ -+ __u16 tmp; -+ -+ while (len--) { -+ crc ^= *buf++; -+ crc ^= (u8)crc >> 4; -+ tmp = (u8)crc; -+ crc ^= (tmp ^ (tmp << 1)) << 4; -+ } -+ return crc; -+} -+ -+__u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ __u16 crc = 0; -+ -+#ifdef CONFIG_SMP -+ J_ASSERT(spin_is_locked(sb_bgl_lock(sbi, block_group))); -+#endif -+ if (sbi->s_es->s_feature_ro_compat & -+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -+ int offset = offsetof(struct ext3_group_desc, bg_checksum); -+ -+ block_group = cpu_to_le32(block_group); -+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); -+ crc = crc16(crc, (__u8 *)&block_group, sizeof(block_group)); -+ crc = crc16(crc, (__u8 *)gdp, offset); -+ offset += sizeof(gdp->bg_checksum); /* skip checksum */ -+ BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ -+ /* for checksum of struct ext4_group_desc do the rest... -+ if (offset < sbi->s_es->s_desc_size) { -+ crc = crc16(crc, (__u8 *)gdp + offset, -+ sbi->s_es->s_desc_size - offset); -+ */ -+ } -+ -+ return cpu_to_le16(crc); -+} -+ -+int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) -+ return 0; -+ -+ return 1; -+} -+ - /* Called at mount-time, super-block is locked */ - static int ext3_check_descriptors (struct super_block * sb) - { -@@ -1133,6 +1179,13 @@ static int ext3_check_descriptors (struc - le32_to_cpu(gdp->bg_inode_table)); - return 0; - } -+ if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { -+ ext3_error(sb, __FUNCTION__, -+ "Checksum for group %d failed (%u != %u)\n", -+ i, ext3_group_desc_csum(sbi, i, gdp), -+ gdp->bg_checksum); -+ return 0; -+ } - block += EXT3_BLOCKS_PER_GROUP(sb); - gdp++; - } -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2007-03-14 16:29:47.000000000 -0400 -+++ linux-stage/include/linux/ext3_fs.h 2007-03-14 16:37:23.000000000 -0400 -@@ -118,6 +118,26 @@ struct statfs; - (s)->s_first_ino) - #endif - -+/* Macro-instructions used to calculate Free inodes and blocks count. -+ * Value is approximate for the blocks count until it is initialized. */ -+#define EXT3_BG_INODES_FREE(sb,gr,gdp) ((gdp)->bg_flags & \ -+ cpu_to_le16(EXT3_BG_INODE_UNINIT) ? \ -+ EXT3_INODES_PER_GROUP(sb) : \ -+ le16_to_cpu((gdp)->bg_itable_unused) + \ -+ le16_to_cpu((gdp)->bg_free_inodes_count)) -+#define EXT3_BG_BLOCKS_FREE(sb,gr,gdp) ((gdp)->bg_flags & \ -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ? \ -+ ext3_free_blocks_after_init(sb,gr,gdp) :\ -+ le16_to_cpu((gdp)->bg_free_blocks_count)) -+ -+/* When looking for groups to allocate from, we reduce the free blocks count -+ * to avoid always picking uninitialized groups. Essentially, we don't start -+ * allocating from uninitialized groups until other groups are 1/2 full. */ -+#define EXT3_BG_BLOCKS_PNLT(sb,gr,gdp) ((gdp)->bg_flags & \ -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ? \ -+ EXT3_BLOCKS_PER_GROUP(sb) / 2 : \ -+ le16_to_cpu((gdp)->bg_free_blocks_count)) -+ - /* - * Macro-instructions used to manage fragments - */ -@@ -138,16 +148,22 @@ struct statfs; - */ - struct ext3_group_desc - { -- __le32 bg_block_bitmap; /* Blocks bitmap block */ -- __le32 bg_inode_bitmap; /* Inodes bitmap block */ -+ __le32 bg_block_bitmap; /* Blocks bitmap block */ -+ __le32 bg_inode_bitmap; /* Inodes bitmap block */ - __le32 bg_inode_table; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ -- __u16 bg_pad; -- __le32 bg_reserved[3]; -+ __le16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ -+ __le32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ -+ __le16 bg_itable_unused; /* Unused inodes count */ -+ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ - }; - -+#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ -+#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ -+#define EXT3_BG_BLOCK_WRITTEN 0x0004 /* On-disk structures were overwritten */ -+ - /* - * Macro-instructions used to manage group descriptors - */ -@@ -564,6 +581,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 -@@ -580,6 +598,7 @@ static inline struct ext3_inode_info *EX - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - -@@ -841,6 +860,16 @@ extern void ext3_unlockfs (struct super_ - extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); - extern int ext3_remount (struct super_block *, int *, char *); - extern int ext3_statfs (struct super_block *, struct kstatfs *); -+extern __u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+extern unsigned long ext3_free_blocks_after_init(struct super_block *sb, -+ int block_group, -+ struct ext3_group_desc *gdp); -+extern void ext3_init_block_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int group, -+ struct ext3_group_desc *desc); - - #define ext3_std_error(sb, errno) \ - do { \ - -%diffstat - fs/ext3/balloc.c | 116 +++++++++++++++++++++++++++++++++++++++++-- - fs/ext3/ialloc.c | 85 ++++++++++++++++++++++--------- - fs/ext3/mballoc.c | 18 +++++- - fs/ext3/resize.c | 1 - fs/ext3/super.c | 53 +++++++++++++++++++ - include/linux/ext3_fs.h | 36 +++++++++++-- - include/linux/ext3_fs.h.orig | 23 +++++++- - 7 files changed, 294 insertions(+), 38 deletions(-) - diff --git a/lustre/kernel_patches/patches/ext3-use-after-free.patch b/lustre/kernel_patches/patches/ext3-use-after-free.patch deleted file mode 100644 index dd999bf..0000000 --- a/lustre/kernel_patches/patches/ext3-use-after-free.patch +++ /dev/null @@ -1,53 +0,0 @@ - ./fs/ext3/namei.c | 11 +++++------ - 1 files changed, 5 insertions(+), 6 deletions(-) - ---- linux-2.4.20/./fs/ext3/namei.c~ext3-use-after-free 2003-04-08 23:35:51.000000000 -0600 -+++ linux-2.4.20-braam/./fs/ext3/namei.c 2003-04-08 23:35:51.000000000 -0600 -@@ -1521,8 +1521,11 @@ static int ext3_add_nondir(handle_t *han - { - int err = ext3_add_entry(handle, dentry, inode); - if (!err) { -- d_instantiate(dentry, inode); -- return 0; -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ d_instantiate(dentry, inode); -+ return 0; -+ } - } - ext3_dec_count(handle, inode); - iput(inode); -@@ -1559,7 +1562,6 @@ static int ext3_create (struct inode * d - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -1586,7 +1588,6 @@ static int ext3_mknod (struct inode * di - if (!IS_ERR(inode)) { - init_special_inode(inode, mode, rdev); - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -2035,7 +2036,6 @@ static int ext3_symlink (struct inode * - } - EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -@@ -2069,7 +2069,6 @@ static int ext3_link (struct dentry * ol - atomic_inc(&inode->i_count); - - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle, dir); - return err; - } - -_ diff --git a/lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch b/lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch deleted file mode 100644 index f71e470005..0000000 --- a/lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch +++ /dev/null @@ -1,193 +0,0 @@ -diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c RH_2_6_9_42_0_3/fs/ext3/ialloc.c ---- RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c 2006-10-23 13:32:46.000000000 +0300 -+++ RH_2_6_9_42_0_3/fs/ext3/ialloc.c 2007-02-16 07:22:28.000000000 +0200 -@@ -419,7 +419,8 @@ static int find_group_other(struct super - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ --struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) -+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode, -+ unsigned long goal) - { - struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; -@@ -447,6 +448,41 @@ struct inode *ext3_new_inode(handle_t *h - - sbi = EXT3_SB(sb); - es = sbi->s_es; -+ if (goal) { -+ group = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ ino = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ err = -EIO; -+ -+ gdp = ext3_get_group_desc(sb, group, &bh2); -+ if (!gdp) -+ goto fail; -+ -+ bitmap_bh = read_inode_bitmap (sb, group); -+ if (!bitmap_bh) -+ goto fail; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group), -+ ino, bitmap_bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto continue_allocation; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto got; -+ } -+ -+continue_allocation: - if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) - group = find_group_dir(sb, dir); -diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c RH_2_6_9_42_0_3/fs/ext3/ioctl.c ---- RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c 2006-10-23 13:32:46.000000000 +0300 -+++ RH_2_6_9_42_0_3/fs/ext3/ioctl.c 2007-02-16 07:22:28.000000000 +0200 -@@ -25,6 +25,31 @@ int ext3_ioctl (struct inode * inode, st - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644, NULL); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); -diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c ---- RH_2_6_9_42_0_3.orig/fs/ext3/namei.c 2006-10-23 13:32:59.000000000 +0300 -+++ RH_2_6_9_42_0_3/fs/ext3/namei.c 2007-02-22 18:58:13.000000000 +0200 -@@ -97,6 +97,7 @@ struct dx_entry - __le32 block; - }; - -+ - /* - * dx_root_info is laid out so that if it should somehow get overlaid by a - * dirent the two low bits of the hash version will be zero. Therefore, the -@@ -141,6 +142,14 @@ struct dx_map_entry - u32 offs; - }; - -+#define LVFS_DENTRY_PARAM_MAGIC 20070216UL -+struct lvfs_dentry_params -+{ -+ unsigned long p_inum; -+ void *p_ptr; -+ u32 magic; -+}; -+ - #ifdef CONFIG_EXT3_INDEX - static inline unsigned dx_get_block (struct dx_entry *entry); - static void dx_set_block (struct dx_entry *entry, unsigned value); -@@ -1624,6 +1633,20 @@ static int ext3_add_nondir(handle_t *han - return err; - } - -+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -+ int mode, struct dentry *dentry) -+{ -+ unsigned long inum = 0; -+ -+ if (dentry->d_fsdata != NULL) { -+ struct lvfs_dentry_params *param = dentry->d_fsdata; -+ -+ if (param->magic == LVFS_DENTRY_PARAM_MAGIC) -+ inum = param->p_inum; -+ } -+ return ext3_new_inode(handle, dir, mode, inum); -+} -+ - /* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it -@@ -1649,7 +1672,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1683,7 +1706,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -@@ -1719,7 +1742,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2124,7 +2147,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -diff -urp RH_2_6_9_42_0_3.orig/include/linux/ext3_fs.h RH_2_6_9_42_0_3/include/linux/ext3_fs.h ---- RH_2_6_9_42_0_3.orig/include/linux/ext3_fs.h 2006-10-23 13:32:46.000000000 +0300 -+++ RH_2_6_9_42_0_3/include/linux/ext3_fs.h 2007-02-16 07:22:28.000000000 +0200 -@@ -741,7 +741,8 @@ extern int ext3fs_dirhash(const char *na - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -833,4 +834,6 @@ extern struct inode_operations ext3_fast - - #endif /* __KERNEL__ */ - -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ diff --git a/lustre/kernel_patches/patches/ext3-wantedi-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-wantedi-2.6-suse.patch deleted file mode 100644 index e38bedb..0000000 --- a/lustre/kernel_patches/patches/ext3-wantedi-2.6-suse.patch +++ /dev/null @@ -1,192 +0,0 @@ -diff -urp linux-2.6.5-7.282.orig/fs/ext3/ialloc.c linux-2.6.5-7.282/fs/ext3/ialloc.c ---- linux-2.6.5-7.282.orig/fs/ext3/ialloc.c 2006-08-30 17:12:13.000000000 +0300 -+++ linux-2.6.5-7.282/fs/ext3/ialloc.c 2007-02-16 07:43:08.000000000 +0200 -@@ -420,7 +420,8 @@ static int find_group_other(struct super - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ --struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) -+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode, -+ unsigned long goal) - { - struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; -@@ -448,6 +449,41 @@ struct inode *ext3_new_inode(handle_t *h - - sbi = EXT3_SB(sb); - es = sbi->s_es; -+ if (goal) { -+ group = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ ino = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ err = -EIO; -+ -+ gdp = ext3_get_group_desc(sb, group, &bh2); -+ if (!gdp) -+ goto fail; -+ -+ bitmap_bh = read_inode_bitmap (sb, group); -+ if (!bitmap_bh) -+ goto fail; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group), -+ ino, bitmap_bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto continue_allocation; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto got; -+ } -+ -+continue_allocation: - if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) - group = find_group_dir(sb, dir); -diff -urp linux-2.6.5-7.282.orig/fs/ext3/ioctl.c linux-2.6.5-7.282/fs/ext3/ioctl.c ---- linux-2.6.5-7.282.orig/fs/ext3/ioctl.c 2006-08-30 17:12:13.000000000 +0300 -+++ linux-2.6.5-7.282/fs/ext3/ioctl.c 2007-02-16 07:43:08.000000000 +0200 -@@ -25,6 +25,31 @@ int ext3_ioctl (struct inode * inode, st - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644, NULL); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); -diff -urp linux-2.6.5-7.282.orig/fs/ext3/namei.c linux-2.6.5-7.282/fs/ext3/namei.c ---- linux-2.6.5-7.282.orig/fs/ext3/namei.c 2006-08-30 17:12:34.000000000 +0300 -+++ linux-2.6.5-7.282/fs/ext3/namei.c 2007-02-16 07:46:13.000000000 +0200 -@@ -144,6 +144,14 @@ struct dx_map_entry - u32 offs; - }; - -+#define LVFS_DENTRY_PARAM_MAGIC 20070216UL -+struct lvfs_dentry_params -+{ -+ unsigned long p_inum; -+ void *p_ptr; -+ u32 magic; -+}; -+ - #ifdef CONFIG_EXT3_INDEX - static inline unsigned dx_get_block (struct dx_entry *entry); - static void dx_set_block (struct dx_entry *entry, unsigned value); -@@ -1625,6 +1633,20 @@ static int ext3_add_nondir(handle_t *han - return err; - } - -+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -+ int mode, struct dentry *dentry) -+{ -+ unsigned long inum = 0; -+ -+ if (dentry->d_fsdata != NULL) { -+ struct lvfs_dentry_params *param = dentry->d_fsdata; -+ -+ if (param->magic == LVFS_DENTRY_PARAM_MAGIC) -+ inum = param->p_inum; -+ } -+ return ext3_new_inode(handle, dir, mode, inum); -+} -+ - /* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it -@@ -1649,7 +1671,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1682,7 +1704,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -@@ -1718,7 +1740,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2113,7 +2135,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -diff -urp linux-2.6.5-7.282.orig/include/linux/ext3_fs.h linux-2.6.5-7.282/include/linux/ext3_fs.h ---- linux-2.6.5-7.282.orig/include/linux/ext3_fs.h 2006-08-30 17:12:13.000000000 +0300 -+++ linux-2.6.5-7.282/include/linux/ext3_fs.h 2007-02-16 07:43:08.000000000 +0200 -@@ -203,6 +203,7 @@ struct ext3_group_desc - #define EXT3_IOC_SETFLAGS _IOW('f', 2, long) - #define EXT3_IOC_GETVERSION _IOR('f', 3, long) - #define EXT3_IOC_SETVERSION _IOW('f', 4, long) -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ - #define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) - #define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) - #ifdef CONFIG_JBD_DEBUG -@@ -712,7 +713,8 @@ extern int ext3fs_dirhash(const char *na - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -797,4 +799,5 @@ extern struct inode_operations ext3_fast - - #endif /* __KERNEL__ */ - -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ diff --git a/lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch b/lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch deleted file mode 100644 index 9ed150e..0000000 --- a/lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch +++ /dev/null @@ -1,174 +0,0 @@ - fs/ext3/ialloc.c | 35 ++++++++++++++++++++++++++++++++++- - fs/ext3/ioctl.c | 25 +++++++++++++++++++++++++ - fs/ext3/namei.c | 21 +++++++++++++++++---- - include/linux/dcache.h | 5 +++++ - include/linux/ext3_fs.h | 5 ++++- - 5 files changed, 85 insertions(+), 6 deletions(-) - -Index: linux-2.6.15/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.15.orig/fs/ext3/ialloc.c 2006-02-19 15:23:12.000000000 +0300 -+++ linux-2.6.15/fs/ext3/ialloc.c 2006-02-21 00:26:52.000000000 +0300 -@@ -420,7 +420,8 @@ static int find_group_other(struct super - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ --struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) -+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode, -+ unsigned long goal) - { - struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; -@@ -448,6 +449,38 @@ struct inode *ext3_new_inode(handle_t *h - - sbi = EXT3_SB(sb); - es = sbi->s_es; -+ if (goal) { -+ group = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ ino = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ gdp = ext3_get_group_desc(sb, group, &bh2); -+ -+ err = -EIO; -+ bitmap_bh = read_inode_bitmap (sb, group); -+ if (!bitmap_bh) -+ goto fail; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group), -+ ino, bitmap_bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto continue_allocation; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto got; -+ } -+ -+continue_allocation: - if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) - group = find_group_dir(sb, dir); -Index: linux-2.6.15/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.15.orig/fs/ext3/ioctl.c 2005-11-11 08:33:12.000000000 +0300 -+++ linux-2.6.15/fs/ext3/ioctl.c 2006-02-21 00:26:52.000000000 +0300 -@@ -25,6 +25,31 @@ int ext3_ioctl (struct inode * inode, st - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644, NULL); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); -Index: linux-2.6.15/fs/ext3/namei.c -=================================================================== ---- linux-2.6.15.orig/fs/ext3/namei.c 2006-02-19 15:23:12.000000000 +0300 -+++ linux-2.6.15/fs/ext3/namei.c 2006-02-21 00:28:17.000000000 +0300 -@@ -1631,6 +1631,16 @@ static int ext3_add_nondir(handle_t *han - return err; - } - -+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -+ int mode, struct dentry *dentry) -+{ -+ unsigned long inum = 0; -+ -+ if (dentry->d_fsdata != NULL) -+ inum = (unsigned long) dentry->d_fsdata; -+ return ext3_new_inode(handle, dir, mode, inum); -+} -+ - /* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it -@@ -1656,7 +1666,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1690,7 +1700,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -@@ -1726,7 +1736,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2131,7 +2141,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -Index: linux-2.6.15/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.15.orig/include/linux/ext3_fs.h 2005-11-11 08:33:12.000000000 +0300 -+++ linux-2.6.15/include/linux/ext3_fs.h 2006-02-21 00:26:52.000000000 +0300 -@@ -762,7 +762,8 @@ extern int ext3fs_dirhash(const char *na - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -844,4 +845,6 @@ extern struct inode_operations ext3_fast - - #endif /* __KERNEL__ */ - -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ diff --git a/lustre/kernel_patches/patches/ext3-wantedi-misc-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-wantedi-misc-2.6-suse.patch new file mode 100644 index 0000000..0f52cf6 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-wantedi-misc-2.6-suse.patch @@ -0,0 +1,16 @@ +Index: uml-2.6.3/include/linux/dcache.h +=================================================================== +--- uml-2.6.3.orig/include/linux/dcache.h 2004-02-21 00:19:14.365988600 +0800 ++++ uml-2.6.3/include/linux/dcache.h 2004-02-21 00:21:04.612228624 +0800 +@@ -25,6 +25,11 @@ + + #define IS_ROOT(x) ((x) == (x)->d_parent) + ++struct dentry_params { ++ unsigned long p_inum; ++ void *p_ptr; ++}; ++ + /* + * "quick string" -- eases parameter passing, but more importantly + * saves "metadata" about the string (ie length and the hash). diff --git a/lustre/kernel_patches/patches/ext3-wantedi-misc-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-wantedi-misc-2.6.18-vanilla.patch new file mode 100644 index 0000000..11c6ada --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-wantedi-misc-2.6.18-vanilla.patch @@ -0,0 +1,16 @@ +Index: linux-2.6/include/linux/dcache.h +=================================================================== +--- linux-2.6.orig/include/linux/dcache.h 2006-07-15 16:11:52.000000000 +0800 ++++ linux-2.6/include/linux/dcache.h 2006-07-15 16:12:04.000000000 +0800 +@@ -24,6 +24,11 @@ struct vfsmount; + + #define IS_ROOT(x) ((x) == (x)->d_parent) + ++struct dentry_params { ++ unsigned long p_inum; ++ void *p_ptr; ++}; ++ + /* + * "quick string" -- eases parameter passing, but more importantly + * saves "metadata" about the string (ie length and the hash). diff --git a/lustre/kernel_patches/patches/ext3-xattr-ptr-arith-fix.patch b/lustre/kernel_patches/patches/ext3-xattr-ptr-arith-fix.patch deleted file mode 100644 index 818596c..0000000 --- a/lustre/kernel_patches/patches/ext3-xattr-ptr-arith-fix.patch +++ /dev/null @@ -1,18 +0,0 @@ -Index: linux-2.4.20/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.20.orig/fs/ext3/xattr.c 2003-11-13 10:59:33.000000000 +0800 -+++ linux-2.4.20/fs/ext3/xattr.c 2003-11-25 21:16:51.000000000 +0800 -@@ -1293,9 +1293,10 @@ - goto cleanup; - memcpy(header, HDR(bh), bh->b_size); - header->h_refcount = cpu_to_le32(1); -- offset = (char *)header - bh->b_data; -- here = ENTRY((char *)here + offset); -- last = ENTRY((char *)last + offset); -+ offset = (char *)here - bh->b_data; -+ here = ENTRY((char *)header + offset); -+ offset = (char *)last - bh->b_data; -+ last = ENTRY((char *)header + offset); - } - } else { - /* Allocate a buffer where we construct the new block. */ diff --git a/lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch b/lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch deleted file mode 100644 index df46643..0000000 --- a/lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch +++ /dev/null @@ -1,33 +0,0 @@ ---- ./include/linux/ext3_fs.h.orig Tue May 7 17:06:03 2002 -+++ ./include/linux/ext3_fs.h Tue May 7 17:07:11 2002 -@@ -17,6 +17,8 @@ - #define _LINUX_EXT3_FS_H - - #include -+#include -+#include - - /* - * The second extended filesystem constants/structures -@@ -86,8 +88,8 @@ - #define EXT3_MIN_BLOCK_LOG_SIZE 10 - - #ifdef __KERNEL__ --#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) --#define EXT3_I(inode) (&((inode)->u.ext3_i)) -+#define EXT3_SB(sb) ((struct ext3_sb_info *)&((sb)->u.generic_sbp)) -+#define EXT3_I(inode) ((struct ext3_inode_info *)&((inode)->u.generic_ip)) - - #define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) - #define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -447,7 +447,9 @@ - #define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime - static inline struct inode *orphan_list_entry(struct list_head *l) - { -- return list_entry(l, struct inode, u.ext3_i.i_orphan); -+ return ((struct inode *)((char *)l - -+ (unsigned long)(offsetof(struct inode, u.generic_ip) + -+ offsetof(struct ext3_inode_info, i_orphan)))); - } - - /* diff --git a/lustre/kernel_patches/patches/extN-misc-fixup.patch b/lustre/kernel_patches/patches/extN-misc-fixup.patch deleted file mode 100644 index 65d9347..0000000 --- a/lustre/kernel_patches/patches/extN-misc-fixup.patch +++ /dev/null @@ -1,20 +0,0 @@ - fs/ext3/super.c | 4 ++-- - 1 files changed, 2 insertions(+), 2 deletions(-) - ---- linux-2.4.18-p4smp/fs/ext3/super.c~extN-misc-fixup 2003-07-21 23:07:50.000000000 -0600 -+++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-21 23:08:06.000000000 -0600 -@@ -1578,10 +1578,10 @@ static journal_t *ext3_get_dev_journal(s - printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); - goto out_journal; - } -- if (ntohl(journal->j_superblock->s_nr_users) != 1) { -+ if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXT3-fs: External journal has more than one " - "user (unsupported) - %d\n", -- ntohl(journal->j_superblock->s_nr_users)); -+ be32_to_cpu(journal->j_superblock->s_nr_users)); - goto out_journal; - } - EXT3_SB(sb)->journal_bdev = bdev; - -_ diff --git a/lustre/kernel_patches/patches/extN-wantedi-2.4.21-chaos.patch b/lustre/kernel_patches/patches/extN-wantedi-2.4.21-chaos.patch deleted file mode 100644 index 3dba64d..0000000 --- a/lustre/kernel_patches/patches/extN-wantedi-2.4.21-chaos.patch +++ /dev/null @@ -1,213 +0,0 @@ -diff -urp linux-2.4.21.orig/fs/ext3/ialloc.c linux-2.4.21/fs/ext3/ialloc.c ---- linux-2.4.21.orig/fs/ext3/ialloc.c 2007-02-16 10:23:09.000000000 +0200 -+++ linux-2.4.21/fs/ext3/ialloc.c 2007-02-16 10:30:28.000000000 +0200 -@@ -330,7 +330,8 @@ int ext3_itable_block_used(struct super_ - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ --struct inode * ext3_new_inode (handle_t *handle, struct inode * dir, int mode) -+struct inode * ext3_new_inode(handle_t *handle, const struct inode * dir, -+ int mode, unsigned long goal) - { - struct super_block * sb; - struct buffer_head * bh; -@@ -359,7 +360,41 @@ struct inode * ext3_new_inode (handle_t - init_rwsem(&ei->truncate_sem); - - lock_super (sb); -- es = sbi->s_es; -+ es = EXT3_SB(sb)->s_es; -+ -+ if (goal) { -+ i = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ j = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ gdp = ext3_get_group_desc(sb, i, &bh2); -+ -+ bitmap_nr = load_inode_bitmap (sb, i); -+ if (bitmap_nr < 0) { -+ err = bitmap_nr; -+ goto fail; -+ } -+ -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit(j, bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto repeat; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto have_bit_and_group; -+ } -+ - repeat: - gdp = NULL; - i = 0; -@@ -474,6 +509,7 @@ repeat: - } - goto repeat; - } -+ have_bit_and_group: - j += i * EXT3_INODES_PER_GROUP(sb) + 1; - if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_new_inode", -diff -urp linux-2.4.21.orig/fs/ext3/inode.c linux-2.4.21/fs/ext3/inode.c ---- linux-2.4.21.orig/fs/ext3/inode.c 2007-02-16 10:23:09.000000000 +0200 -+++ linux-2.4.21/fs/ext3/inode.c 2007-02-16 10:30:28.000000000 +0200 -@@ -2235,7 +2235,7 @@ void ext3_truncate_thread(struct inode * - if (IS_ERR(handle)) - goto out_truncate; - -- new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0); - if (IS_ERR(new_inode)) { - ext3_debug("truncate inode %lu directly (no new inodes)\n", - old_inode->i_ino); -diff -urp linux-2.4.21.orig/fs/ext3/ioctl.c linux-2.4.21/fs/ext3/ioctl.c ---- linux-2.4.21.orig/fs/ext3/ioctl.c 2007-02-16 10:23:09.000000000 +0200 -+++ linux-2.4.21/fs/ext3/ioctl.c 2007-02-16 10:30:28.000000000 +0200 -@@ -24,6 +24,31 @@ int ext3_ioctl (struct inode * inode, st - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); -diff -urp linux-2.4.21.orig/fs/ext3/namei.c linux-2.4.21/fs/ext3/namei.c ---- linux-2.4.21.orig/fs/ext3/namei.c 2007-02-16 10:23:09.000000000 +0200 -+++ linux-2.4.21/fs/ext3/namei.c 2007-02-16 10:32:17.000000000 +0200 -@@ -143,6 +143,14 @@ struct dx_map_entry - u32 offs; - }; - -+#define LVFS_DENTRY_PARAM_MAGIC 20070216UL -+struct lvfs_dentry_params -+{ -+ unsigned long p_inum; -+ void *p_ptr; -+ u32 magic; -+}; -+ - #ifdef CONFIG_EXT3_INDEX - static inline unsigned dx_get_block (struct dx_entry *entry); - static void dx_set_block (struct dx_entry *entry, unsigned value); -@@ -1542,6 +1550,20 @@ static int ext3_add_nondir(handle_t *han - return err; - } - -+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -+ int mode, struct dentry *dentry) -+{ -+ unsigned long inum = 0; -+ -+ if (dentry->d_fsdata != NULL) { -+ struct lvfs_dentry_params *param = -+ dentry->d_fsdata; -+ if (param->magic == LVFS_DENTRY_PARAM_MAGIC) -+ inum = param->p_inum; -+ } -+ return ext3_new_inode(handle, dir, mode, inum); -+} -+ - /* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it -@@ -1565,7 +1587,7 @@ static int ext3_create (struct inode * d - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1593,7 +1615,7 @@ static int ext3_mknod (struct inode * di - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -@@ -1626,7 +1648,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2056,7 +2078,7 @@ static int ext3_symlink (struct inode * - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -diff -urp linux-2.4.21.orig/include/linux/ext3_fs.h linux-2.4.21/include/linux/ext3_fs.h ---- linux-2.4.21.orig/include/linux/ext3_fs.h 2007-02-16 10:23:09.000000000 +0200 -+++ linux-2.4.21/include/linux/ext3_fs.h 2007-02-16 10:30:28.000000000 +0200 -@@ -204,6 +204,7 @@ struct ext3_group_desc - #define EXT3_IOC_SETFLAGS _IOW('f', 2, long) - #define EXT3_IOC_GETVERSION _IOR('f', 3, long) - #define EXT3_IOC_SETVERSION _IOW('f', 4, long) -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ - #define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) - #define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) - #ifdef CONFIG_JBD_DEBUG -@@ -671,7 +672,8 @@ extern int ext3fs_dirhash(const char *na - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -763,4 +765,5 @@ extern struct inode_operations ext3_fast - - #endif /* __KERNEL__ */ - -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ diff --git a/lustre/kernel_patches/patches/extN-wantedi-2.4.21-suse2.patch b/lustre/kernel_patches/patches/extN-wantedi-2.4.21-suse2.patch deleted file mode 100644 index 400f7a8..0000000 --- a/lustre/kernel_patches/patches/extN-wantedi-2.4.21-suse2.patch +++ /dev/null @@ -1,226 +0,0 @@ - fs/ext3/ialloc.c | 40 ++++++++++++++++++++++++++++++++++++++-- - fs/ext3/inode.c | 2 +- - fs/ext3/ioctl.c | 25 +++++++++++++++++++++++++ - fs/ext3/namei.c | 21 +++++++++++++++++---- - include/linux/dcache.h | 5 +++++ - include/linux/ext3_fs.h | 5 ++++- - 6 files changed, 90 insertions(+), 8 deletions(-) - -Index: linux-2.4.21-chaos/fs/ext3/namei.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/namei.c 2003-12-12 16:18:41.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/namei.c 2003-12-12 16:18:53.000000000 +0300 -@@ -1534,6 +1534,19 @@ - return err; - } - -+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -+ int mode, struct dentry *dentry) -+{ -+ unsigned long inum = 0; -+ -+ if (dentry->d_fsdata != NULL) { -+ struct dentry_params *param = -+ (struct dentry_params *) dentry->d_fsdata; -+ inum = param->p_inum; -+ } -+ return ext3_new_inode(handle, dir, mode, inum); -+} -+ - /* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it -@@ -1557,7 +1570,7 @@ - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1585,7 +1598,7 @@ - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -@@ -1618,7 +1631,7 @@ - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2012,7 +2025,7 @@ - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -Index: linux-2.4.21-chaos/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c 2003-12-12 16:18:52.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/ialloc.c 2003-12-12 16:18:53.000000000 +0300 -@@ -330,7 +330,8 @@ - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ --struct inode * ext3_new_inode (handle_t *handle, struct inode * dir, int mode) -+struct inode * ext3_new_inode(handle_t *handle, const struct inode * dir, -+ int mode, unsigned long goal) - { - struct super_block * sb; - struct buffer_head * bh; -@@ -359,7 +360,41 @@ - init_rwsem(&ei->truncate_sem); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; -+ -+ if (goal) { -+ i = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ j = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ gdp = ext3_get_group_desc(sb, i, &bh2); -+ -+ bitmap_nr = load_inode_bitmap (sb, i); -+ if (bitmap_nr < 0) { -+ err = bitmap_nr; -+ goto fail; -+ } -+ -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit(j, bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto repeat; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto have_bit_and_group; -+ } -+ - repeat: - gdp = NULL; - i = 0; -@@ -474,6 +509,7 @@ - } - goto repeat; - } -+ have_bit_and_group: - j += i * EXT3_INODES_PER_GROUP(sb) + 1; - if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_new_inode", -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2003-12-12 16:18:52.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2003-12-12 16:18:53.000000000 +0300 -@@ -2233,7 +2233,7 @@ - if (IS_ERR(handle)) - goto out_truncate; - -- new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0); - if (IS_ERR(new_inode)) { - ext3_debug("truncate inode %lu directly (no new inodes)\n", - old_inode->i_ino); -Index: linux-2.4.21-chaos/fs/ext3/ioctl.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ioctl.c 2003-12-12 16:18:40.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/ioctl.c 2003-12-12 16:18:53.000000000 +0300 -@@ -24,6 +24,31 @@ - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2003-12-12 16:18:52.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2003-12-12 16:18:53.000000000 +0300 -@@ -204,6 +204,7 @@ - #define EXT3_IOC_SETFLAGS _IOW('f', 2, long) - #define EXT3_IOC_GETVERSION _IOR('f', 3, long) - #define EXT3_IOC_SETVERSION _IOW('f', 4, long) -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ - #define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) - #define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) - #ifdef CONFIG_JBD_DEBUG -@@ -671,7 +672,8 @@ - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -763,4 +765,5 @@ - - #endif /* __KERNEL__ */ - -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ -Index: linux-2.4.21-chaos/include/linux/dcache.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/dcache.h 2003-12-12 16:18:06.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/dcache.h 2003-12-12 16:18:53.000000000 +0300 -@@ -63,6 +63,11 @@ - - #define IS_ROOT(x) ((x) == (x)->d_parent) - -+struct dentry_params { -+ unsigned long p_inum; -+ void *p_ptr; -+}; -+ - /* - * "quick string" -- eases parameter passing, but more importantly - * saves "metadata" about the string (ie length and the hash). diff --git a/lustre/kernel_patches/patches/extN-wantedi.patch b/lustre/kernel_patches/patches/extN-wantedi.patch deleted file mode 100644 index e9cb3bc..0000000 --- a/lustre/kernel_patches/patches/extN-wantedi.patch +++ /dev/null @@ -1,213 +0,0 @@ -diff -urp linux-2.4.24.orig/fs/ext3/ialloc.c linux-2.4.24/fs/ext3/ialloc.c ---- linux-2.4.24.orig/fs/ext3/ialloc.c 2007-02-16 10:54:31.000000000 +0200 -+++ linux-2.4.24/fs/ext3/ialloc.c 2007-02-16 10:55:30.000000000 +0200 -@@ -330,7 +330,8 @@ int ext3_itable_block_used(struct super_ - * group to find a free inode. - */ - struct inode * ext3_new_inode (handle_t *handle, -- const struct inode * dir, int mode) -+ const struct inode * dir, int mode, -+ unsigned long goal) - { - struct super_block * sb; - struct buffer_head * bh; -@@ -355,7 +356,41 @@ struct inode * ext3_new_inode (handle_t - init_rwsem(&inode->u.ext3_i.truncate_sem); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; -+ -+ if (goal) { -+ i = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ j = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ gdp = ext3_get_group_desc(sb, i, &bh2); -+ -+ bitmap_nr = load_inode_bitmap (sb, i); -+ if (bitmap_nr < 0) { -+ err = bitmap_nr; -+ goto fail; -+ } -+ -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit(j, bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto repeat; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto have_bit_and_group; -+ } -+ - repeat: - gdp = NULL; - i = 0; -@@ -470,6 +505,7 @@ repeat: - } - goto repeat; - } -+ have_bit_and_group: - j += i * EXT3_INODES_PER_GROUP(sb) + 1; - if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_new_inode", -diff -urp linux-2.4.24.orig/fs/ext3/inode.c linux-2.4.24/fs/ext3/inode.c ---- linux-2.4.24.orig/fs/ext3/inode.c 2007-02-16 10:54:31.000000000 +0200 -+++ linux-2.4.24/fs/ext3/inode.c 2007-02-16 10:55:30.000000000 +0200 -@@ -2605,7 +2605,7 @@ void ext3_truncate_thread(struct inode * - if (IS_ERR(handle)) - goto out_truncate; - -- new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0); - if (IS_ERR(new_inode)) { - ext3_debug("truncate inode %lu directly (no new inodes)\n", - old_inode->i_ino); -diff -urp linux-2.4.24.orig/fs/ext3/ioctl.c linux-2.4.24/fs/ext3/ioctl.c ---- linux-2.4.24.orig/fs/ext3/ioctl.c 2003-06-13 17:51:37.000000000 +0300 -+++ linux-2.4.24/fs/ext3/ioctl.c 2007-02-16 10:55:30.000000000 +0200 -@@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); -diff -urp linux-2.4.24.orig/fs/ext3/namei.c linux-2.4.24/fs/ext3/namei.c ---- linux-2.4.24.orig/fs/ext3/namei.c 2007-02-16 10:54:31.000000000 +0200 -+++ linux-2.4.24/fs/ext3/namei.c 2007-02-16 10:56:22.000000000 +0200 -@@ -142,6 +142,14 @@ struct dx_map_entry - u32 offs; - }; - -+#define LVFS_DENTRY_PARAM_MAGIC 20070216UL -+struct lvfs_dentry_params -+{ -+ unsigned long p_inum; -+ void *p_ptr; -+ u32 magic; -+}; -+ - #ifdef CONFIG_EXT3_INDEX - static inline unsigned dx_get_block (struct dx_entry *entry); - static void dx_set_block (struct dx_entry *entry, unsigned value); -@@ -1541,6 +1549,20 @@ static int ext3_add_nondir(handle_t *han - return err; - } - -+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -+ int mode, struct dentry *dentry) -+{ -+ unsigned long inum = 0; -+ -+ if (dentry->d_fsdata != NULL) { -+ struct lvfs_dentry_params *param = dentry->d_fsdata; -+ -+ if (param->magic == LVFS_DENTRY_PARAM_MAGIC) -+ inum = param->p_inum; -+ } -+ return ext3_new_inode(handle, dir, mode, inum); -+} -+ - /* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it -@@ -1564,7 +1586,7 @@ static int ext3_create (struct inode * d - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1592,7 +1614,7 @@ static int ext3_mknod (struct inode * di - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, mode, rdev); -@@ -1622,7 +1644,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2053,7 +2075,7 @@ static int ext3_symlink (struct inode * - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -diff -urp linux-2.4.24.orig/include/linux/ext3_fs.h linux-2.4.24/include/linux/ext3_fs.h ---- linux-2.4.24.orig/include/linux/ext3_fs.h 2007-02-16 10:54:31.000000000 +0200 -+++ linux-2.4.24/include/linux/ext3_fs.h 2007-02-16 10:55:30.000000000 +0200 -@@ -202,6 +202,7 @@ struct ext3_group_desc - #define EXT3_IOC_SETFLAGS _IOW('f', 2, long) - #define EXT3_IOC_GETVERSION _IOR('f', 3, long) - #define EXT3_IOC_SETVERSION _IOW('f', 4, long) -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ - #define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) - #define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) - #ifdef CONFIG_JBD_DEBUG -@@ -674,7 +675,8 @@ extern int ext3fs_dirhash(const char *na - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -766,4 +768,5 @@ extern struct inode_operations ext3_fast - - #endif /* __KERNEL__ */ - -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ diff --git a/lustre/kernel_patches/patches/fsprivate-2.4-suse.patch b/lustre/kernel_patches/patches/fsprivate-2.4-suse.patch deleted file mode 100644 index fc3a72a..0000000 --- a/lustre/kernel_patches/patches/fsprivate-2.4-suse.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- uml-2.4.24/include/linux/fs.h.orig 2005-05-18 21:27:58.120742112 +0300 -+++ uml-2.4.24/include/linux/fs.h 2005-05-18 22:48:12.081908776 +0300 -@@ -592,6 +592,7 @@ - /* needed for tty driver, and maybe others */ - void *private_data; - struct lookup_intent *f_it; -+ void *fs_private; - struct list_head f_ep_links; - spinlock_t f_ep_lock; - }; diff --git a/lustre/kernel_patches/patches/fsprivate-2.4.patch b/lustre/kernel_patches/patches/fsprivate-2.4.patch deleted file mode 100644 index 4e83dbe..0000000 --- a/lustre/kernel_patches/patches/fsprivate-2.4.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- uml-2.4.24/include/linux/fs.h.orig 2005-05-18 21:27:58.120742112 +0300 -+++ uml-2.4.24/include/linux/fs.h 2005-05-18 22:48:12.081908776 +0300 -@@ -592,6 +592,7 @@ - /* needed for tty driver, and maybe others */ - void *private_data; - struct lookup_intent *f_it; -+ void *fs_private; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; diff --git a/lustre/kernel_patches/patches/gfp_debug-2.4.21-rhel.patch b/lustre/kernel_patches/patches/gfp_debug-2.4.21-rhel.patch deleted file mode 100644 index 6686b15..0000000 --- a/lustre/kernel_patches/patches/gfp_debug-2.4.21-rhel.patch +++ /dev/null @@ -1,77 +0,0 @@ -Index: linux-2.4.21-rhel/kernel/sysctl.c -=================================================================== ---- linux-2.4.21-rhel.orig/kernel/sysctl.c 2005-01-20 18:52:39.000000000 -0700 -+++ linux-2.4.21-rhel/kernel/sysctl.c 2005-05-31 18:47:22.000000000 -0600 -@@ -355,6 +355,8 @@ - extern int skip_mapped_pages; - - static ctl_table vm_table[] = { -+ {VM_GFP_DEBUG, "vm_gfp_debug", -+ &vm_gfp_debug, sizeof(int), 0644, NULL, &proc_dointvec}, - {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL, - &proc_dointvec_minmax, &sysctl_intvec, NULL, - &bdflush_min, &bdflush_max}, -Index: linux-2.4.21-rhel/mm/page_alloc.c -=================================================================== ---- linux-2.4.21-rhel.orig/mm/page_alloc.c 2005-05-31 18:29:37.000000000 -0600 -+++ linux-2.4.21-rhel/mm/page_alloc.c 2005-05-31 18:49:27.000000000 -0600 -@@ -58,6 +58,8 @@ - static int zone_extrafree_max[MAX_NR_ZONES] __initdata = { 1024 , 1024, 0, }; - #endif - -+int vm_gfp_debug = 0; -+ - /* - * Temporary debugging check. - */ -@@ -773,8 +775,12 @@ - } - - out_failed: -- /* No luck.. */ --// printk(KERN_ERR "__alloc_pages: %lu-order allocation failed.\n", order); -+ if (!(gfp_mask & __GFP_NOWARN)) { -+ printk(KERN_ERR "__alloc_pages: %lu-order allocation failed (gfp=%#x/%i).\n", -+ order, gfp_mask, !!(current->flags & PF_MEMALLOC)); -+ if (unlikely(vm_gfp_debug)) -+ dump_stack(); -+ } - return NULL; - } - -Index: linux-2.4.21-rhel/include/linux/sysctl.h -=================================================================== ---- linux-2.4.21-rhel.orig/include/linux/sysctl.h 2005-03-18 18:25:26.000000000 -0700 -+++ linux-2.4.21-rhel/include/linux/sysctl.h 2005-05-31 18:48:17.000000000 -0600 -@@ -153,6 +153,7 @@ - VM_MIN_READAHEAD=12, /* Min file readahead */ - VM_MAX_READAHEAD=13, /* Max file readahead */ - VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ -+ VM_GFP_DEBUG=18, /* debug GFP failures */ - VM_PAGEBUF=22, /* struct: Control pagebuf parameters */ - VM_HUGETLB_POOL=23, /* int: size of the hugetlb pool, in MB */ - VM_DCACHE_PRIORITY=24, /* int: priority of the dcache pool */ -Index: linux-2.4.21-rhel/include/linux/mm.h -=================================================================== ---- linux-2.4.21-rhel.orig/include/linux/mm.h 2005-05-31 18:50:32.000000000 -0600 -+++ linux-2.4.21-rhel/include/linux/mm.h 2005-05-31 18:50:45.000000000 -0600 -@@ -846,6 +846,7 @@ - #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ - #define __GFP_FS 0x100 /* Can call down to low-level FS? */ - #define __GFP_WIRED 0x200 /* Highmem bias and wired */ -+#define __GFP_NOWARN 0x400 /* Don't report error on allocation failure */ - - #define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) - #define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) -Index: linux-2.4.21-rhel/include/linux/swap.h -=================================================================== ---- linux-2.4.21-rhel.orig/include/linux/swap.h 2005-03-18 18:25:26.000000000 -0700 -+++ linux-2.4.21-rhel/include/linux/swap.h 2005-05-31 18:52:44.000000000 -0600 -@@ -178,6 +178,7 @@ - extern int rebalance_inactive(int); - extern void wakeup_kswapd(unsigned int); - extern void rss_free_pages(unsigned int); -+extern int vm_gfp_debug; - - /* - * Limits, in percent, on how large the cache can be and how to do diff --git a/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.4.21-suse2.patch b/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.4.21-suse2.patch deleted file mode 100644 index 94c50d0..0000000 --- a/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.4.21-suse2.patch +++ /dev/null @@ -1,85 +0,0 @@ -Index: linux-2.4.21-suse2/mm/filemap.c -=================================================================== ---- linux-2.4.21-suse2.orig/mm/filemap.c 2005-04-04 05:58:21.000000000 -0600 -+++ linux-2.4.21-suse2/mm/filemap.c 2005-04-04 06:18:57.000000000 -0600 -@@ -1022,6 +1022,14 @@ - */ - struct page *grab_cache_page_nowait(struct address_space *mapping, unsigned long index) - { -+ return grab_cache_page_nowait_gfp(mapping, index, mapping->gfp_mask); -+} -+ -+ -+struct page *grab_cache_page_nowait_gfp(struct address_space *mapping, -+ unsigned long index, -+ unsigned int gfp_mask) -+{ - struct page *page, **hash; - - hash = page_hash(mapping, index); -@@ -1046,7 +1054,7 @@ - } - } - -- page = page_cache_alloc(mapping); -+ page = alloc_pages(gfp_mask, 0); - if ( unlikely(!page) ) - return NULL; /* Failed to allocate a page */ - -Index: linux-2.4.21-suse2/mm/page_alloc.c -=================================================================== ---- linux-2.4.21-suse2.orig/mm/page_alloc.c 2005-04-04 05:58:04.000000000 -0600 -+++ linux-2.4.21-suse2/mm/page_alloc.c 2005-04-04 06:12:11.000000000 -0600 -@@ -435,7 +435,7 @@ - break; - - min = z->watermarks[class_idx].min; -- if (!(gfp_mask & __GFP_WAIT)) -+ if (!(gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_HIGH)) - min >>= 2; - else if (current->rt_priority) - min >>= 1; -@@ -504,6 +504,7 @@ - } - - out: -+ if (!(gfp_mask & __GFP_NOWARN)) - printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n", - order, gfp_mask, !!(current->flags & PF_MEMALLOC)); - if (unlikely(vm_gfp_debug)) -Index: linux-2.4.21-suse2/kernel/ksyms.c -=================================================================== ---- linux-2.4.21-suse2.orig/kernel/ksyms.c 2005-04-04 06:01:39.000000000 -0600 -+++ linux-2.4.21-suse2/kernel/ksyms.c 2005-04-04 06:01:43.000000000 -0600 -@@ -326,6 +326,7 @@ - EXPORT_SYMBOL(__find_lock_page); - EXPORT_SYMBOL(find_or_create_page); - EXPORT_SYMBOL(grab_cache_page_nowait); -+EXPORT_SYMBOL(grab_cache_page_nowait_gfp); - EXPORT_SYMBOL(read_cache_page); - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); -Index: linux-2.4.21-suse2/include/linux/pagemap.h -=================================================================== ---- linux-2.4.21-suse2.orig/include/linux/pagemap.h 2003-11-10 17:44:33.000000000 -0700 -+++ linux-2.4.21-suse2/include/linux/pagemap.h 2005-04-04 06:01:43.000000000 -0600 -@@ -110,6 +110,7 @@ - - - extern struct page * grab_cache_page_nowait (struct address_space *, unsigned long); -+extern struct page * grab_cache_page_nowait_gfp (struct address_space *, unsigned long, unsigned int); - - typedef int filler_t(void *, struct page*); - -Index: linux-2.4.21-suse2/include/linux/mm.h -=================================================================== ---- linux-2.4.21-suse2.orig/include/linux/mm.h 2005-04-04 05:58:14.000000000 -0600 -+++ linux-2.4.21-suse2/include/linux/mm.h 2005-04-04 06:17:10.000000000 -0600 -@@ -661,6 +661,7 @@ - #define __GFP_IO 0x40 /* Can start low memory physical IO? */ - #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ - #define __GFP_FS 0x100 /* Can call down to low-level FS? */ -+#define __GFP_NOWARN 0x200 /* Don't warn on allocation failure */ - - #define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) - #define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) diff --git a/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-rh-2.4.patch b/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-rh-2.4.patch deleted file mode 100644 index c7b00f7..0000000 --- a/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-rh-2.4.patch +++ /dev/null @@ -1,65 +0,0 @@ -Index: linux-2.4.20/mm/filemap.c -=================================================================== ---- linux-2.4.20.orig/mm/filemap.c 2004-12-17 17:51:44.000000000 -0500 -+++ linux-2.4.20/mm/filemap.c 2004-12-17 17:56:54.000000000 -0500 -@@ -1153,6 +1153,14 @@ - */ - struct page *grab_cache_page_nowait(struct address_space *mapping, unsigned long index) - { -+ return grab_cache_page_nowait_gfp(mapping, index, mapping->gfp_mask); -+} -+ -+ -+struct page *grab_cache_page_nowait_gfp(struct address_space *mapping, -+ unsigned long index, -+ unsigned int gfp_mask) -+{ - struct page *page, **hash; - - hash = page_hash(mapping, index); -@@ -1177,7 +1185,7 @@ - } - } - -- page = page_cache_alloc(mapping); -+ page = alloc_pages(gfp_mask, 0); - if ( unlikely(!page) ) - return NULL; /* Failed to allocate a page */ - -Index: linux-2.4.20/mm/page_alloc.c -=================================================================== ---- linux-2.4.20.orig/mm/page_alloc.c 2004-12-17 17:51:30.000000000 -0500 -+++ linux-2.4.20/mm/page_alloc.c 2004-12-17 18:26:45.000000000 -0500 -@@ -547,6 +547,8 @@ - */ - return NULL; - } -+ } else if (!(gfp_mask & __GFP_HIGH)) { -+ return NULL; - } - } - -Index: linux-2.4.20/kernel/ksyms.c -=================================================================== ---- linux-2.4.20.orig/kernel/ksyms.c 2004-12-17 17:51:46.000000000 -0500 -+++ linux-2.4.20/kernel/ksyms.c 2004-12-17 17:52:59.000000000 -0500 -@@ -300,6 +300,7 @@ - EXPORT_SYMBOL(__find_lock_page); - EXPORT_SYMBOL(find_or_create_page); - EXPORT_SYMBOL(grab_cache_page_nowait); -+EXPORT_SYMBOL(grab_cache_page_nowait_gfp); - EXPORT_SYMBOL(read_cache_page); - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); -Index: linux-2.4.20/include/linux/pagemap.h -=================================================================== ---- linux-2.4.20.orig/include/linux/pagemap.h 2004-05-27 14:25:05.000000000 -0400 -+++ linux-2.4.20/include/linux/pagemap.h 2004-12-17 18:11:09.000000000 -0500 -@@ -110,6 +110,7 @@ - - - extern struct page * grab_cache_page_nowait (struct address_space *, unsigned long); -+extern struct page * grab_cache_page_nowait_gfp (struct address_space *, unsigned long, unsigned int); - - typedef int filler_t(void *, struct page*); - diff --git a/lustre/kernel_patches/patches/highmem-split-2.6-rhel4.patch b/lustre/kernel_patches/patches/highmem-split-2.6-rhel4.patch new file mode 100644 index 0000000..124e09c --- /dev/null +++ b/lustre/kernel_patches/patches/highmem-split-2.6-rhel4.patch @@ -0,0 +1,96 @@ +diff -Naur linux-2.6.10/arch/i386/Kconfig linux-2.6.10/arch/i386/Kconfig +--- linux-2.6.10/arch/i386/Kconfig 2004-08-25 21:38:29.435578760 -0600 ++++ linux-2.6.10/arch/i386/Kconfig 2004-08-25 20:50:36.000000000 -0600 +@@ -789,6 +789,26 @@ + depends on HIGHMEM64G + default y + ++choice ++ depends on !X86_4G ++ depends on NOHIGHMEM ++ prompt "User address space size" ++ ++config USER_3GB ++ depends on X86 ++ bool "3GB User Address Space" ++ ++config USER_2GB ++ depends on X86 ++ bool "2GB User Address Space" ++ ++config USER_1GB ++ depends on X86 ++ bool "1GB User Address Space" ++ ++endchoice ++ ++ + # Common NUMA Features + config NUMA + bool "Numa Memory Allocation and Scheduler Support" +diff -Naur linux-2.6.10/include/asm-generic/page_offset.h linux-2.6.10/include/asm-generic/page_offset.h +--- linux-2.6.10/include/asm-generic/page_offset.h 1969-12-31 17:00:00.000000000 -0700 ++++ linux-2.6.10/include/asm-generic/page_offset.h 2004-08-25 20:51:03.000000000 -0600 +@@ -0,0 +1,24 @@ ++ ++#include ++ ++#ifdef __ASSEMBLY__ ++ ++#if defined(CONFIG_USER_1GB) ++#define PAGE_OFFSET_RAW 0x40000000 ++#elif defined(CONFIG_USER_2GB) ++#define PAGE_OFFSET_RAW 0x80000000 ++#else ++#define PAGE_OFFSET_RAW 0xc0000000 ++#endif ++ ++#else ++ ++#if defined(CONFIG_USER_1GB) ++#define PAGE_OFFSET_RAW 0x40000000UL ++#elif defined(CONFIG_USER_2GB) ++#define PAGE_OFFSET_RAW 0x80000000UL ++#else ++#define PAGE_OFFSET_RAW 0xc0000000UL ++#endif ++ ++#endif +diff -Naur linux-2.6.10/include/asm-generic/vmlinux.lds.h linux-2.6.10/include/asm-generic/vmlinux.lds.h +--- linux-2.6.10/include/asm-generic/vmlinux.lds.h 2004-08-14 04:54:48.000000000 -0600 ++++ linux-2.6.10/include/asm-generic/vmlinux.lds.h 2004-08-25 20:49:32.000000000 -0600 +@@ -1,3 +1,6 @@ ++ ++#include ++ + #ifndef LOAD_OFFSET + #define LOAD_OFFSET 0 + #endif +diff -Naur linux-2.6.10/include/asm-i386/page.h linux-2.6.10/include/asm-i386/page.h +--- linux-2.6.10/include/asm-i386/page.h 2004-08-14 04:54:50.000000000 -0600 ++++ linux-2.6.10/include/asm-i386/page.h 2004-08-25 20:49:32.000000000 -0600 +@@ -103,7 +103,8 @@ + #define __PAGE_OFFSET (0x02000000) + #define TASK_SIZE ((current->personality & 0x8000000) ? 0xc0000000 : 0xff000000) + #else +-#define __PAGE_OFFSET (0xc0000000) +-#define TASK_SIZE (0xc0000000) ++#include ++#define __PAGE_OFFSET (PAGE_OFFSET_RAW) ++#define TASK_SIZE (PAGE_OFFSET_RAW) + #endif + + #ifndef __ASSEMBLY__ +diff -Naur linux-2.6.10/arch/i386/mm/pageattr.c linux-2.6.10/arch/i386/mm/pageattr.c +--- linux-2.6.10/arch/i386/mm/pageattr.c 2006-08-25 12:39:09.000000000 +0400 ++++ linux-2.6.10/arch/i386/mm/pageattr.c 2006-10-19 00:43:49.000000000 +0400 +@@ -15,7 +15,7 @@ + + static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; + static struct list_head df_list = LIST_HEAD_INIT(df_list); +- ++extern char _stext[], _etext[], _sinittext[], _einittext[]; + + pte_t *lookup_address(unsigned long address) + { + diff --git a/lustre/kernel_patches/patches/i_filter_data.patch b/lustre/kernel_patches/patches/i_filter_data.patch new file mode 100644 index 0000000..8a21a9e --- /dev/null +++ b/lustre/kernel_patches/patches/i_filter_data.patch @@ -0,0 +1,12 @@ +Index: linux-2.6.18.8/include/linux/fs.h +=================================================================== +--- linux-2.6.18.8.orig/include/linux/fs.h 2007-06-05 12:55:19.000000000 +0200 ++++ linux-2.6.18.8/include/linux/fs.h 2007-06-05 12:55:44.000000000 +0200 +@@ -533,6 +533,7 @@ struct inode { + struct block_device *i_bdev; + struct cdev *i_cdev; + int i_cindex; ++ void *i_filterdata; + + __u32 i_generation; + diff --git a/lustre/kernel_patches/patches/increase-BH_LRU_SIZE.patch b/lustre/kernel_patches/patches/increase-BH_LRU_SIZE.patch new file mode 100644 index 0000000..1da2708 --- /dev/null +++ b/lustre/kernel_patches/patches/increase-BH_LRU_SIZE.patch @@ -0,0 +1,13 @@ +Index: linux/fs/buffer.c +=================================================================== +--- linux.orig/fs/buffer.c ++++ linux/fs/buffer.c +@@ -1429,7 +1429,7 @@ static struct buffer_head *__bread_slow( + * a local interrupt disable for that. + */ + +-#define BH_LRU_SIZE 8 ++#define BH_LRU_SIZE 32 + + struct bh_lru { + struct buffer_head *bhs[BH_LRU_SIZE]; diff --git a/lustre/kernel_patches/patches/inode-max-readahead-2.4.24.patch b/lustre/kernel_patches/patches/inode-max-readahead-2.4.24.patch deleted file mode 100644 index 9203859..0000000 --- a/lustre/kernel_patches/patches/inode-max-readahead-2.4.24.patch +++ /dev/null @@ -1,22 +0,0 @@ ---- linux-2.4.24-l32/mm/filemap.c.inode_ramax 2004-03-14 13:16:21.000000000 -0800 -+++ linux-2.4.24-l32/mm/filemap.c 2004-03-16 10:57:14.000000000 -0800 -@@ -1226,6 +1226,8 @@ - - static inline int get_max_readahead(struct inode * inode) - { -+ if (inode->i_mapping->a_ops->max_readahead) -+ return inode->i_mapping->a_ops->max_readahead(inode); - if (!inode->i_dev || !max_readahead[MAJOR(inode->i_dev)]) - return vm_max_readahead; - return max_readahead[MAJOR(inode->i_dev)][MINOR(inode->i_dev)]; ---- linux-2.4.24-l32/include/linux/fs.h.inode_ramax 2004-03-14 13:15:49.000000000 -0800 -+++ linux-2.4.24-l32/include/linux/fs.h 2004-03-15 11:56:56.000000000 -0800 -@@ -410,6 +410,8 @@ - #define KERNEL_HAS_DIRECT_FILEIO /* Unfortunate kludge due to lack of foresight */ - int (*direct_fileIO)(int, struct file *, struct kiobuf *, unsigned long, int); - void (*removepage)(struct page *); /* called when page gets removed from the inode */ -+#define KERNEL_HAS_AS_MAX_READAHEAD -+ int (*max_readahead)(struct inode *); - }; - - struct address_space { diff --git a/lustre/kernel_patches/patches/inode-nr_unused-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/inode-nr_unused-2.6.9-rhel4.patch index 4f7f591..2508221 100644 --- a/lustre/kernel_patches/patches/inode-nr_unused-2.6.9-rhel4.patch +++ b/lustre/kernel_patches/patches/inode-nr_unused-2.6.9-rhel4.patch @@ -1,7 +1,8 @@ -diff -urp b1_4.RH_2_6_9_42_0_3.old/fs/fs-writeback.c b1_4.RH_2_6_9_42_0_3/fs/fs-writeback.c ---- b1_4.RH_2_6_9_42_0_3.old/fs/fs-writeback.c 2006-10-23 13:33:05.000000000 +0300 -+++ b1_4.RH_2_6_9_42_0_3/fs/fs-writeback.c 2007-04-15 00:31:43.000000000 +0300 -@@ -230,7 +230,6 @@ __sync_single_inode(struct inode *inode, +Index: RH_2_6_9_55/fs/fs-writeback.c +=================================================================== +--- RH_2_6_9_55.orig/fs/fs-writeback.c ++++ RH_2_6_9_55/fs/fs-writeback.c +@@ -230,7 +230,6 @@ * The inode is clean, unused */ list_move(&inode->i_list, &inode_unused); @@ -9,19 +10,19 @@ diff -urp b1_4.RH_2_6_9_42_0_3.old/fs/fs-writeback.c b1_4.RH_2_6_9_42_0_3/fs/fs- } } wake_up_inode(inode); -@@ -244,6 +243,11 @@ static int +@@ -244,6 +243,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { -+ if (!atomic_read(&inode->i_count)) -+ WARN_ON(!(inode->i_state & I_WILL_FREE)); ++ if (!atomic_read(&inode->i_count)) ++ WARN_ON(!(inode->i_state & I_WILL_FREE)); + else + WARN_ON(inode->i_state & I_WILL_FREE); + if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { list_move(&inode->i_list, &inode->i_sb->s_dirty); return 0; -@@ -253,10 +257,8 @@ __writeback_single_inode(struct inode *i +@@ -253,10 +257,8 @@ * It's a data-integrity sync. We must wait. */ while (inode->i_state & I_LOCK) { @@ -32,15 +33,15 @@ diff -urp b1_4.RH_2_6_9_42_0_3.old/fs/fs-writeback.c b1_4.RH_2_6_9_42_0_3/fs/fs- spin_lock(&inode_lock); } return __sync_single_inode(inode, wbc); -ä×ÏÉÞÎÙÅ ÆÁÊÌÙ b1_4.RH_2_6_9_42_0_3.old/fs/fs-writeback.o É b1_4.RH_2_6_9_42_0_3/fs/fs-writeback.o ÒÁÚÌÉÞÁÀÔÓÑ -diff -urp b1_4.RH_2_6_9_42_0_3.old/fs/inode.c b1_4.RH_2_6_9_42_0_3/fs/inode.c ---- b1_4.RH_2_6_9_42_0_3.old/fs/inode.c 2006-12-14 15:20:40.000000000 +0200 -+++ b1_4.RH_2_6_9_42_0_3/fs/inode.c 2007-04-15 00:31:28.000000000 +0300 -@@ -1054,6 +1054,7 @@ static void generic_forget_inode(struct +Index: RH_2_6_9_55/fs/inode.c +=================================================================== +--- RH_2_6_9_55.orig/fs/inode.c ++++ RH_2_6_9_55/fs/inode.c +@@ -1056,6 +1056,7 @@ if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); -+ wake_up_inode(inode); ++ wake_up_inode(inode); destroy_inode(inode); } diff --git a/lustre/kernel_patches/patches/invalidate_show-2.4.20-hp.patch b/lustre/kernel_patches/patches/invalidate_show-2.4.20-hp.patch deleted file mode 100644 index fad6233..0000000 --- a/lustre/kernel_patches/patches/invalidate_show-2.4.20-hp.patch +++ /dev/null @@ -1,123 +0,0 @@ - - - - fs/inode.c | 21 ++++++++++++++------- - fs/smbfs/inode.c | 2 +- - fs/super.c | 4 ++-- - include/linux/fs.h | 2 +- - 4 files changed, 18 insertions(+), 11 deletions(-) - -Index: linux/fs/inode.c -=================================================================== ---- linux.orig/fs/inode.c Mon Feb 2 21:24:21 2004 -+++ linux/fs/inode.c Mon Feb 2 21:27:53 2004 -@@ -632,7 +632,8 @@ - /* - * Invalidate all inodes for a device. - */ --static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) -+static int invalidate_list(struct list_head *head, struct super_block * sb, -+ struct list_head * dispose, int show) - { - struct list_head *next; - int busy = 0, count = 0; -@@ -657,6 +658,11 @@ - count++; - continue; - } -+ if (show) -+ printk(KERN_ERR -+ "inode busy: dev %s:%lu (%p) mode %o count %u\n", -+ kdevname(sb->s_dev), inode->i_ino, inode, -+ inode->i_mode, atomic_read(&inode->i_count)); - busy = 1; - } - /* only unused inodes may be cached with i_count zero */ -@@ -675,23 +681,24 @@ - /** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock -+ * @show: whether we should display any busy inodes found - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ - --int invalidate_inodes(struct super_block * sb) -+int invalidate_inodes(struct super_block * sb, int show) - { - int busy; - LIST_HEAD(throw_away); - - spin_lock(&inode_lock); -- busy = invalidate_list(&inode_in_use, sb, &throw_away); -- busy |= invalidate_list(&inode_unused, sb, &throw_away); -- busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away); -- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); -- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); -+ busy = invalidate_list(&inode_in_use, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show); - spin_unlock(&inode_lock); - - dispose_list(&throw_away); -@@ -717,7 +724,7 @@ - * hold). - */ - shrink_dcache_sb(sb); -- res = invalidate_inodes(sb); -+ res = invalidate_inodes(sb, 0); - drop_super(sb); - } - invalidate_buffers(dev); -Index: linux/fs/super.c -=================================================================== ---- linux.orig/fs/super.c Mon Feb 2 21:24:21 2004 -+++ linux/fs/super.c Mon Feb 2 21:26:08 2004 -@@ -844,7 +844,7 @@ - lock_super(sb); - lock_kernel(); - sb->s_flags &= ~MS_ACTIVE; -- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ -+ invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */ - if (sop) { - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); -@@ -853,7 +853,7 @@ - } - - /* Forget any remaining inodes */ -- if (invalidate_inodes(sb)) { -+ if (invalidate_inodes(sb, 1)) { - printk(KERN_ERR "VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); - } -Index: linux/include/linux/fs.h -=================================================================== ---- linux.orig/include/linux/fs.h Mon Feb 2 21:24:23 2004 -+++ linux/include/linux/fs.h Mon Feb 2 21:26:08 2004 -@@ -1257,7 +1257,7 @@ - extern void set_buffer_flushtime(struct buffer_head *); - extern void balance_dirty(void); - extern int check_disk_change(kdev_t); --extern int invalidate_inodes(struct super_block *); -+extern int invalidate_inodes(struct super_block *, int); - extern int invalidate_device(kdev_t, int); - extern void invalidate_inode_pages(struct inode *); - extern void invalidate_inode_pages2(struct address_space *); -Index: linux/fs/smbfs/inode.c -=================================================================== ---- linux.orig/fs/smbfs/inode.c Thu Nov 28 18:53:15 2002 -+++ linux/fs/smbfs/inode.c Mon Feb 2 21:26:08 2004 -@@ -167,7 +167,7 @@ - { - VERBOSE("\n"); - shrink_dcache_sb(SB_of(server)); -- invalidate_inodes(SB_of(server)); -+ invalidate_inodes(SB_of(server), 0); - } - - /* diff --git a/lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch b/lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch deleted file mode 100644 index 2ff807a..0000000 --- a/lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch +++ /dev/null @@ -1,114 +0,0 @@ - fs/inode.c | 23 +++++++++++++++-------- - fs/smbfs/inode.c | 2 +- - fs/super.c | 4 ++-- - include/linux/fs.h | 2 +- - 4 files changed, 19 insertions(+), 12 deletions(-) - ---- kernel-2.4.20/fs/inode.c~invalidate_show-2.4.20-rh 2003-05-24 01:56:40.000000000 -0400 -+++ kernel-2.4.20-root/fs/inode.c 2003-06-02 00:35:37.000000000 -0400 -@@ -628,7 +628,8 @@ static void dispose_list(struct list_hea - /* - * Invalidate all inodes for a device. - */ --static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) -+static int invalidate_list(struct list_head *head, struct super_block * sb, -+ struct list_head * dispose, int show) - { - struct list_head *next; - int busy = 0, count = 0; -@@ -653,6 +654,11 @@ static int invalidate_list(struct list_h - count++; - continue; - } -+ if (show) -+ printk(KERN_ERR -+ "inode busy: dev %s:%lu (%p) mode %o count %u\n", -+ kdevname(sb->s_dev), inode->i_ino, inode, -+ inode->i_mode, atomic_read(&inode->i_count)); - busy = 1; - } - /* only unused inodes may be cached with i_count zero */ -@@ -671,23 +677,24 @@ static int invalidate_list(struct list_h - /** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock -+ * @show: whether we should display any busy inodes found - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ - --int invalidate_inodes(struct super_block * sb) -+int invalidate_inodes(struct super_block * sb, int show) - { - int busy; - LIST_HEAD(throw_away); - - spin_lock(&inode_lock); -- busy = invalidate_list(&inode_in_use, sb, &throw_away); -- busy |= invalidate_list(&inode_unused, sb, &throw_away); -- busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away); -- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); -- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); -+ busy = invalidate_list(&inode_in_use, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show); - spin_unlock(&inode_lock); - - dispose_list(&throw_away); -@@ -713,7 +720,7 @@ int invalidate_device(kdev_t dev, int do - * hold). - */ - shrink_dcache_sb(sb); -- res = invalidate_inodes(sb); -+ res = invalidate_inodes(sb, 0); - drop_super(sb); - } - invalidate_buffers(dev); ---- kernel-2.4.20/fs/super.c~invalidate_show-2.4.20-rh 2003-05-24 01:56:24.000000000 -0400 -+++ kernel-2.4.20-root/fs/super.c 2003-06-02 00:35:00.000000000 -0400 -@@ -943,7 +943,7 @@ void kill_super(struct super_block *sb) - lock_super(sb); - lock_kernel(); - sb->s_flags &= ~MS_ACTIVE; -- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ -+ invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */ - if (sop) { - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); -@@ -952,7 +952,7 @@ void kill_super(struct super_block *sb) - } - - /* Forget any remaining inodes */ -- if (invalidate_inodes(sb)) { -+ if (invalidate_inodes(sb, 1)) { - printk(KERN_ERR "VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); - } ---- kernel-2.4.20/include/linux/fs.h~invalidate_show-2.4.20-rh 2003-06-02 00:31:47.000000000 -0400 -+++ kernel-2.4.20-root/include/linux/fs.h 2003-06-02 00:35:00.000000000 -0400 -@@ -1284,7 +1284,7 @@ static inline void mark_buffer_dirty_ino - extern void set_buffer_flushtime(struct buffer_head *); - extern void balance_dirty(void); - extern int check_disk_change(kdev_t); --extern int invalidate_inodes(struct super_block *); -+extern int invalidate_inodes(struct super_block *, int); - extern int invalidate_device(kdev_t, int); - extern void invalidate_inode_pages(struct inode *); - extern void invalidate_inode_pages2(struct address_space *); ---- kernel-2.4.20/fs/smbfs/inode.c~invalidate_show-2.4.20-rh 2002-11-28 18:53:15.000000000 -0500 -+++ kernel-2.4.20-root/fs/smbfs/inode.c 2003-06-02 00:35:00.000000000 -0400 -@@ -167,7 +167,7 @@ smb_invalidate_inodes(struct smb_sb_info - { - VERBOSE("\n"); - shrink_dcache_sb(SB_of(server)); -- invalidate_inodes(SB_of(server)); -+ invalidate_inodes(SB_of(server), 0); - } - - /* - -_ diff --git a/lustre/kernel_patches/patches/invalidate_show-2.4.29.patch b/lustre/kernel_patches/patches/invalidate_show-2.4.29.patch deleted file mode 100644 index dedc90b..0000000 --- a/lustre/kernel_patches/patches/invalidate_show-2.4.29.patch +++ /dev/null @@ -1,107 +0,0 @@ -Index: linux-2.4.29/fs/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/inode.c 2005-04-07 18:55:16.732416736 +0300 -+++ linux-2.4.29/fs/inode.c 2005-04-07 19:16:46.772300864 +0300 -@@ -670,7 +670,8 @@ - /* - * Invalidate all inodes for a device. - */ --static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) -+static int invalidate_list(struct list_head *head, struct super_block * sb, -+ struct list_head * dispose, int show) - { - struct list_head *next; - int busy = 0, count = 0; -@@ -695,6 +696,11 @@ - count++; - continue; - } -+ if (show) -+ printk(KERN_ERR -+ "inode busy: dev %s:%lu (%p) mode %o count %u\n", -+ kdevname(sb->s_dev), inode->i_ino, inode, -+ inode->i_mode, atomic_read(&inode->i_count)); - busy = 1; - } - /* only unused inodes may be cached with i_count zero */ -@@ -719,17 +725,17 @@ - * If the discard is successful all the inodes have been discarded. - */ - --int invalidate_inodes(struct super_block * sb) -+int invalidate_inodes(struct super_block * sb, int show) - { - int busy; - LIST_HEAD(throw_away); - - spin_lock(&inode_lock); -- busy = invalidate_list(&inode_in_use, sb, &throw_away); -- busy |= invalidate_list(&inode_unused, sb, &throw_away); -- busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away); -- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); -- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); -+ busy = invalidate_list(&inode_in_use, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show); - spin_unlock(&inode_lock); - - dispose_list(&throw_away); -@@ -755,7 +761,7 @@ - * hold). - */ - shrink_dcache_sb(sb); -- res = invalidate_inodes(sb); -+ res = invalidate_inodes(sb, 0); - drop_super(sb); - } - invalidate_buffers(dev); -Index: linux-2.4.29/fs/super.c -=================================================================== ---- linux-2.4.29.orig/fs/super.c 2005-04-07 18:53:30.978493776 +0300 -+++ linux-2.4.29/fs/super.c 2005-04-07 19:14:26.187672976 +0300 -@@ -844,7 +844,7 @@ - lock_super(sb); - lock_kernel(); - sb->s_flags &= ~MS_ACTIVE; -- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ -+ invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */ - if (sop) { - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); -@@ -853,7 +853,7 @@ - } - - /* Forget any remaining inodes */ -- if (invalidate_inodes(sb)) { -+ if (invalidate_inodes(sb, 1)) { - printk(KERN_ERR "VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); - } -Index: linux-2.4.29/include/linux/fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/fs.h 2005-04-07 19:14:06.319693368 +0300 -+++ linux-2.4.29/include/linux/fs.h 2005-04-07 19:14:26.190672520 +0300 -@@ -1286,7 +1286,7 @@ - extern int get_buffer_flushtime(void); - extern void balance_dirty(void); - extern int check_disk_change(kdev_t); --extern int invalidate_inodes(struct super_block *); -+extern int invalidate_inodes(struct super_block *, int); - extern int invalidate_device(kdev_t, int); - extern void invalidate_inode_pages(struct inode *); - extern void invalidate_inode_pages2(struct address_space *); -Index: linux-2.4.29/fs/smbfs/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/smbfs/inode.c 2005-04-07 18:52:37.889564520 +0300 -+++ linux-2.4.29/fs/smbfs/inode.c 2005-04-07 19:14:26.192672216 +0300 -@@ -175,7 +175,7 @@ - { - VERBOSE("\n"); - shrink_dcache_sb(SB_of(server)); -- invalidate_inodes(SB_of(server)); -+ invalidate_inodes(SB_of(server), 0); - } - - /* diff --git a/lustre/kernel_patches/patches/invalidate_show.patch b/lustre/kernel_patches/patches/invalidate_show.patch deleted file mode 100644 index 217e98e..0000000 --- a/lustre/kernel_patches/patches/invalidate_show.patch +++ /dev/null @@ -1,112 +0,0 @@ - fs/inode.c | 21 ++++++++++++++------- - fs/smbfs/inode.c | 2 +- - fs/super.c | 4 ++-- - include/linux/fs.h | 2 +- - 4 files changed, 18 insertions(+), 11 deletions(-) - ---- linux-rh-2.4.20-8/fs/inode.c~invalidate_show 2003-04-11 14:04:56.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/inode.c 2003-04-16 20:59:35.000000000 +0800 -@@ -604,7 +604,8 @@ static void dispose_list(struct list_hea - /* - * Invalidate all inodes for a device. - */ --static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) -+static int invalidate_list(struct list_head *head, struct super_block * sb, -+ struct list_head * dispose, int show) - { - struct list_head *next; - int busy = 0, count = 0; -@@ -629,6 +630,11 @@ static int invalidate_list(struct list_h - count++; - continue; - } -+ if (show) -+ printk(KERN_ERR -+ "inode busy: dev %s:%lu (%p) mode %o count %u\n", -+ kdevname(sb->s_dev), inode->i_ino, inode, -+ inode->i_mode, atomic_read(&inode->i_count)); - busy = 1; - } - /* only unused inodes may be cached with i_count zero */ -@@ -647,22 +653,23 @@ static int invalidate_list(struct list_h - /** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock -+ * @show: whether we should display any busy inodes found - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ - --int invalidate_inodes(struct super_block * sb) -+int invalidate_inodes(struct super_block * sb, int show) - { - int busy; - LIST_HEAD(throw_away); - - spin_lock(&inode_lock); -- busy = invalidate_list(&inode_in_use, sb, &throw_away); -- busy |= invalidate_list(&inode_unused, sb, &throw_away); -- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); -- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); -+ busy = invalidate_list(&inode_in_use, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show); - spin_unlock(&inode_lock); - - dispose_list(&throw_away); -@@ -688,7 +695,7 @@ int invalidate_device(kdev_t dev, int do - * hold). - */ - shrink_dcache_sb(sb); -- res = invalidate_inodes(sb); -+ res = invalidate_inodes(sb, 0); - drop_super(sb); - } - invalidate_buffers(dev); ---- linux-rh-2.4.20-8/fs/super.c~invalidate_show 2003-04-11 14:04:57.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/super.c 2003-04-16 20:59:35.000000000 +0800 -@@ -943,7 +943,7 @@ void kill_super(struct super_block *sb) - lock_super(sb); - lock_kernel(); - sb->s_flags &= ~MS_ACTIVE; -- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ -+ invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */ - if (sop) { - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); -@@ -952,7 +952,7 @@ void kill_super(struct super_block *sb) - } - - /* Forget any remaining inodes */ -- if (invalidate_inodes(sb)) { -+ if (invalidate_inodes(sb, 1)) { - printk(KERN_ERR "VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); - } ---- linux-rh-2.4.20-8/include/linux/fs.h~invalidate_show 2003-04-16 20:55:35.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/fs.h 2003-04-16 20:59:35.000000000 +0800 -@@ -1283,7 +1283,7 @@ static inline void mark_buffer_dirty_ino - extern void set_buffer_flushtime(struct buffer_head *); - extern void balance_dirty(void); - extern int check_disk_change(kdev_t); --extern int invalidate_inodes(struct super_block *); -+extern int invalidate_inodes(struct super_block *, int); - extern int invalidate_device(kdev_t, int); - extern void invalidate_inode_pages(struct inode *); - extern void invalidate_inode_pages2(struct address_space *); ---- linux-rh-2.4.20-8/fs/smbfs/inode.c~invalidate_show 2003-04-16 20:59:48.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/smbfs/inode.c 2003-04-16 21:00:43.000000000 +0800 -@@ -167,7 +167,7 @@ smb_invalidate_inodes(struct smb_sb_info - { - VERBOSE("\n"); - shrink_dcache_sb(SB_of(server)); -- invalidate_inodes(SB_of(server)); -+ invalidate_inodes(SB_of(server), 0); - } - - /* - -_ diff --git a/lustre/kernel_patches/patches/iod-rmap-exports-2.4.21-chaos.patch b/lustre/kernel_patches/patches/iod-rmap-exports-2.4.21-chaos.patch deleted file mode 100644 index 63a6274..0000000 --- a/lustre/kernel_patches/patches/iod-rmap-exports-2.4.21-chaos.patch +++ /dev/null @@ -1,94 +0,0 @@ - fs/Makefile | 4 +++- - fs/inode.c | 4 +++- - mm/Makefile | 2 +- - mm/page_alloc.c | 1 + - mm/vmscan.c | 3 +++ - 5 files changed, 11 insertions(+), 3 deletions(-) - -Index: linux-2.4.21-chaos/fs/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/inode.c 2003-12-12 16:18:06.000000000 +0300 -+++ linux-2.4.21-chaos/fs/inode.c 2003-12-12 16:18:08.000000000 +0300 -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -69,7 +70,8 @@ - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ --static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+EXPORT_SYMBOL(inode_lock); - - /* - * Statistics gathering.. -Index: linux-2.4.21-chaos/fs/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/Makefile 2003-07-15 04:41:00.000000000 +0400 -+++ linux-2.4.21-chaos/fs/Makefile 2003-12-12 16:18:08.000000000 +0300 -@@ -1,3 +1,5 @@ -+ -+ - # - # Makefile for the Linux filesystems. - # -@@ -7,7 +9,7 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o dcookies.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o dcookies.o inode.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ -Index: linux-2.4.21-chaos/mm/vmscan.c -=================================================================== ---- linux-2.4.21-chaos.orig/mm/vmscan.c 2003-12-05 07:55:51.000000000 +0300 -+++ linux-2.4.21-chaos/mm/vmscan.c 2003-12-12 16:18:08.000000000 +0300 -@@ -15,6 +15,8 @@ - * O(1) rmap vm, Arjan van de ven - */ - -+#include -+#include - #include - #include - #include -@@ -1210,6 +1212,7 @@ - set_current_state(TASK_RUNNING); - remove_wait_queue(&kswapd_done, &wait); - } -+EXPORT_SYMBOL(wakeup_kswapd); - - static void wakeup_memwaiters(void) - { -Index: linux-2.4.21-chaos/mm/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/mm/Makefile 2003-07-15 04:41:42.000000000 +0400 -+++ linux-2.4.21-chaos/mm/Makefile 2003-12-12 16:18:08.000000000 +0300 -@@ -9,7 +9,7 @@ - - O_TARGET := mm.o - --export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o usercopy.o -+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o usercopy.o vmscan.o - - obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ -Index: linux-2.4.21-chaos/mm/page_alloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/mm/page_alloc.c 2003-12-05 07:55:51.000000000 +0300 -+++ linux-2.4.21-chaos/mm/page_alloc.c 2003-12-12 16:18:08.000000000 +0300 -@@ -27,6 +27,7 @@ - - int nr_swap_pages; - pg_data_t *pgdat_list; -+EXPORT_SYMBOL(pgdat_list); - - /* - * diff --git a/lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch b/lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch deleted file mode 100644 index 3035f55..0000000 --- a/lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch +++ /dev/null @@ -1,48 +0,0 @@ - fs/Makefile | 2 +- - fs/inode.c | 4 +++- - mm/page_alloc.c | 1 + - 3 files changed, 5 insertions(+), 2 deletions(-) - ---- linux/fs/inode.c~iod-stock-24-exports_hp Wed Apr 9 10:44:54 2003 -+++ linux-mmonroe/fs/inode.c Wed Apr 9 10:49:50 2003 -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -66,7 +67,8 @@ static LIST_HEAD(anon_hash_chain); /* fo - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ --static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+EXPORT_SYMBOL(inode_lock); - - /* - * Statistics gathering.. ---- linux/fs/Makefile~iod-stock-24-exports_hp Wed Apr 9 10:26:08 2003 -+++ linux-mmonroe/fs/Makefile Wed Apr 9 10:49:50 2003 -@@ -7,7 +7,7 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o - mod-subdirs := nls xfs - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ---- linux/mm/page_alloc.c~iod-stock-24-exports_hp Wed Apr 9 10:26:14 2003 -+++ linux-mmonroe/mm/page_alloc.c Wed Apr 9 10:49:50 2003 -@@ -28,6 +28,7 @@ int nr_inactive_pages; - LIST_HEAD(inactive_list); - LIST_HEAD(active_list); - pg_data_t *pgdat_list; -+EXPORT_SYMBOL(pgdat_list); - - /* - * - -_ diff --git a/lustre/kernel_patches/patches/iod-stock-exports-2.4.22.patch b/lustre/kernel_patches/patches/iod-stock-exports-2.4.22.patch deleted file mode 100644 index 47a6ff9..0000000 --- a/lustre/kernel_patches/patches/iod-stock-exports-2.4.22.patch +++ /dev/null @@ -1,52 +0,0 @@ - fs/Makefile | 2 +- - fs/inode.c | 4 +++- - mm/page_alloc.c | 1 + - 3 files changed, 5 insertions(+), 2 deletions(-) - -Index: linux-2.4.22-vanilla/fs/inode.c -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/inode.c 2003-11-03 23:22:24.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/inode.c 2003-11-03 23:25:00.000000000 +0300 -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -66,7 +67,8 @@ - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ --static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+EXPORT_SYMBOL(inode_lock); - - /* - * Statistics gathering.. -Index: linux-2.4.22-vanilla/fs/Makefile -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/Makefile 2003-11-03 23:22:11.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/Makefile 2003-11-03 23:25:24.000000000 +0300 -@@ -7,7 +7,7 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ -Index: linux-2.4.22-vanilla/mm/page_alloc.c -=================================================================== ---- linux-2.4.22-vanilla.orig/mm/page_alloc.c 2003-11-03 23:21:29.000000000 +0300 -+++ linux-2.4.22-vanilla/mm/page_alloc.c 2003-11-03 23:25:00.000000000 +0300 -@@ -28,6 +28,7 @@ - LIST_HEAD(inactive_list); - LIST_HEAD(active_list); - pg_data_t *pgdat_list; -+EXPORT_SYMBOL(pgdat_list); - - /* - * diff --git a/lustre/kernel_patches/patches/iopen-2.4.20.patch b/lustre/kernel_patches/patches/iopen-2.4.20.patch deleted file mode 100644 index d5a28e6..0000000 --- a/lustre/kernel_patches/patches/iopen-2.4.20.patch +++ /dev/null @@ -1,495 +0,0 @@ - Documentation/filesystems/ext2.txt | 16 ++ - fs/ext3/Makefile | 2 - fs/ext3/inode.c | 4 - fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++ - fs/ext3/iopen.h | 13 + - fs/ext3/namei.c | 13 + - fs/ext3/super.c | 11 + - include/linux/ext3_fs.h | 2 - 8 files changed, 318 insertions(+), 2 deletions(-) - -Index: lum/Documentation/filesystems/ext2.txt -=================================================================== ---- lum.orig/Documentation/filesystems/ext2.txt 2001-07-11 16:44:45.000000000 -0600 -+++ lum/Documentation/filesystems/ext2.txt 2004-03-09 16:46:38.000000000 -0700 -@@ -35,6 +35,22 @@ - - sb=n Use alternate superblock at this location. - -+iopen Makes an invisible pseudo-directory called -+ __iopen__ available in the root directory -+ of the filesystem. Allows open-by-inode- -+ number. i.e., inode 3145 can be accessed -+ via /mntpt/__iopen__/3145 -+ -+iopen_nopriv This option makes the iopen directory be -+ world-readable. This may be safer since it -+ allows daemons to run as an unprivileged user, -+ however it significantly changes the security -+ model of a Unix filesystem, since previously -+ all files under a mode 700 directory were not -+ generally avilable even if the -+ permissions on the file itself is -+ world-readable. -+ - grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. - - -Index: lum/fs/ext3/Makefile -=================================================================== ---- lum.orig/fs/ext3/Makefile 2004-01-30 14:54:50.000000000 -0700 -+++ lum/fs/ext3/Makefile 2004-03-09 16:46:39.000000000 -0700 -@@ -11,7 +11,7 @@ - - export-objs := ext3-exports.o - --obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -Index: lum/fs/ext3/inode.c -=================================================================== ---- lum.orig/fs/ext3/inode.c 2004-01-30 14:54:55.000000000 -0700 -+++ lum/fs/ext3/inode.c 2004-03-09 16:46:41.000000000 -0700 -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2153,6 +2154,9 @@ - struct buffer_head *bh; - int block; - -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if(ext3_get_inode_loc(inode, &iloc)) - goto bad_inode; - bh = iloc.bh; -Index: lum/fs/ext3/iopen.c -=================================================================== ---- lum.orig/fs/ext3/iopen.c 2004-03-09 16:46:37.000000000 -0700 -+++ lum/fs/ext3/iopen.c 2004-03-09 16:48:03.000000000 -0700 -@@ -0,0 +1,285 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ if (!test_opt(inode->i_sb, IOPEN)) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue - like d_move() */ -+ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; -+ list_del_init(&goal->d_hash); -+ -+ list_del(&goal->d_child); -+ list_del(&dentry->d_child); -+ -+ /* Switch the parents and the names.. */ -+ switch_names(goal, dentry); -+ do_switch(goal->d_parent, dentry->d_parent); -+ do_switch(goal->d_name.len, dentry->d_name.len); -+ do_switch(goal->d_name.hash, dentry->d_name.hash); -+ -+ /* And add them back to the (new) parent lists */ -+ list_add(&goal->d_child, &goal->d_parent->d_subdirs); -+ list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); -+ __d_rehash(goal, 0); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ inode->u.ext3_i.i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: lum/fs/ext3/iopen.h -=================================================================== ---- lum.orig/fs/ext3/iopen.h 2004-03-09 16:46:37.000000000 -0700 -+++ lum/fs/ext3/iopen.h 2004-03-09 16:48:03.000000000 -0700 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: lum/fs/ext3/namei.c -=================================================================== ---- lum.orig/fs/ext3/namei.c 2004-01-30 14:54:53.000000000 -0700 -+++ lum/fs/ext3/namei.c 2004-03-09 16:49:25.000000000 -0700 -@@ -35,7 +35,7 @@ - #include - #include - #include -- -+#include "iopen.h" - - /* - * define how far ahead to read directories while searching them. -@@ -931,6 +931,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -942,8 +945,8 @@ - return ERR_PTR(-EACCES); - } - } -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - #define S_SHIFT 12 -@@ -1932,10 +1935,6 @@ - inode->i_nlink); - inode->i_version = ++event; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; -@@ -2086,6 +2085,23 @@ - return err; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ dput(iopen_connect_dentry(dentry, inode, 0)); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -2113,7 +2129,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_journal_stop(handle, dir); - return err; - } -Index: lum/fs/ext3/super.c -=================================================================== ---- lum.orig/fs/ext3/super.c 2004-01-30 14:54:53.000000000 -0700 -+++ lum/fs/ext3/super.c 2004-03-09 16:46:45.000000000 -0700 -@@ -742,6 +742,18 @@ - || !strcmp (this_char, "quota") - || !strcmp (this_char, "usrquota")) - /* Don't do anything ;-) */ ; -+ else if (!strcmp (this_char, "iopen")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "noiopen")) { -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "iopen_nopriv")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } - else if (!strcmp (this_char, "journal")) { - /* @@@ FIXME */ - /* Eventually we will want to be able to create -Index: lum/include/linux/ext3_fs.h -=================================================================== ---- lum.orig/include/linux/ext3_fs.h 2004-01-30 14:54:53.000000000 -0700 -+++ lum/include/linux/ext3_fs.h 2004-03-09 16:46:46.000000000 -0700 -@@ -324,4 +324,6 @@ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ -+#define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ diff --git a/lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch b/lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch deleted file mode 100644 index 1510c9b..0000000 --- a/lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch +++ /dev/null @@ -1,497 +0,0 @@ - Documentation/filesystems/ext2.txt | 16 ++ - fs/ext3/Makefile | 2 - fs/ext3/inode.c | 4 - fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++ - fs/ext3/iopen.h | 13 + - fs/ext3/namei.c | 13 + - fs/ext3/super.c | 11 + - include/linux/ext3_fs.h | 2 - 8 files changed, 318 insertions(+), 2 deletions(-) - -Index: linux-ia64/Documentation/filesystems/ext2.txt -=================================================================== ---- linux-ia64.orig/Documentation/filesystems/ext2.txt 2004-03-17 15:47:15.000000000 -0800 -+++ linux-ia64/Documentation/filesystems/ext2.txt 2004-03-17 18:03:15.000000000 -0800 -@@ -35,6 +35,22 @@ resgid=n The group ID which may use th - - sb=n Use alternate superblock at this location. - -+iopen Makes an invisible pseudo-directory called -+ __iopen__ available in the root directory -+ of the filesystem. Allows open-by-inode- -+ number. i.e., inode 3145 can be accessed -+ via /mntpt/__iopen__/3145 -+ -+iopen_nopriv This option makes the iopen directory be -+ world-readable. This may be safer since it -+ allows daemons to run as an unprivileged user, -+ however it significantly changes the security -+ model of a Unix filesystem, since previously -+ all files under a mode 700 directory were not -+ generally avilable even if the -+ permissions on the file itself is -+ world-readable. -+ - grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. - - -Index: linux-ia64/fs/ext3/Makefile -=================================================================== ---- linux-ia64.orig/fs/ext3/Makefile 2004-03-17 18:03:14.000000000 -0800 -+++ linux-ia64/fs/ext3/Makefile 2004-03-17 18:03:15.000000000 -0800 -@@ -11,7 +11,7 @@ O_TARGET := ext3.o - - export-objs := ext3-exports.o - --obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -Index: linux-ia64/fs/ext3/inode.c -=================================================================== ---- linux-ia64.orig/fs/ext3/inode.c 2004-03-17 18:03:15.000000000 -0800 -+++ linux-ia64/fs/ext3/inode.c 2004-03-17 18:10:36.000000000 -0800 -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2430,6 +2431,9 @@ void ext3_read_inode(struct inode * inod - struct buffer_head *bh; - int block; - -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if(ext3_get_inode_loc(inode, &iloc)) - goto bad_inode; - bh = iloc.bh; -Index: linux-ia64/fs/ext3/iopen.c -=================================================================== ---- linux-ia64.orig/fs/ext3/iopen.c 2004-03-17 18:02:08.000000000 -0800 -+++ linux-ia64/fs/ext3/iopen.c 2004-03-17 18:10:58.000000000 -0800 -@@ -0,0 +1,285 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(list_empty(&dentry->d_hash)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ if (!test_opt(inode->i_sb, IOPEN)) -+ goto do_instantiate; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue - like d_move() */ -+ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; -+ list_del_init(&goal->d_hash); -+ -+ list_del(&goal->d_child); -+ list_del(&dentry->d_child); -+ -+ /* Switch the parents and the names.. */ -+ switch_names(goal, dentry); -+ do_switch(goal->d_parent, dentry->d_parent); -+ do_switch(goal->d_name.len, dentry->d_name.len); -+ do_switch(goal->d_name.hash, dentry->d_name.hash); -+ -+ /* And add them back to the (new) parent lists */ -+ list_add(&goal->d_child, &goal->d_parent->d_subdirs); -+ list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); -+ __d_rehash(goal, 0); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ inode->u.ext3_i.i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-ia64/fs/ext3/iopen.h -=================================================================== ---- linux-ia64.orig/fs/ext3/iopen.h 2004-03-17 15:47:15.000000000 -0800 -+++ linux-ia64/fs/ext3/iopen.h 2004-03-17 18:03:15.000000000 -0800 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-ia64/fs/ext3/namei.c -=================================================================== ---- linux-ia64.orig/fs/ext3/namei.c 2004-03-17 18:03:15.000000000 -0800 -+++ linux-ia64/fs/ext3/namei.c 2004-03-17 18:10:35.000000000 -0800 -@@ -36,7 +36,7 @@ - #include - #include - #include -- -+#include "iopen.h" - - /* - * define how far ahead to read directories while searching them. -@@ -932,6 +932,9 @@ static struct dentry *ext3_lookup(struct - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -943,8 +946,8 @@ static struct dentry *ext3_lookup(struct - return ERR_PTR(-EACCES); - } - } -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - #define S_SHIFT 12 -@@ -1935,10 +1938,6 @@ static int ext3_rmdir (struct inode * di - inode->i_nlink); - inode->i_version = ++event; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; -@@ -2057,6 +2056,23 @@ out_stop: - return err; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ dput(iopen_connect_dentry(dentry, inode, 0)); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -2084,7 +2100,8 @@ static int ext3_link (struct dentry * ol - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_journal_stop(handle, dir); - return err; - } -Index: linux-ia64/fs/ext3/super.c -=================================================================== ---- linux-ia64.orig/fs/ext3/super.c 2004-03-17 18:03:14.000000000 -0800 -+++ linux-ia64/fs/ext3/super.c 2004-03-17 18:10:35.000000000 -0800 -@@ -891,6 +891,18 @@ static int parse_options (char * options - || !strcmp (this_char, "quota") - || !strcmp (this_char, "usrquota")) - /* Don't do anything ;-) */ ; -+ else if (!strcmp (this_char, "iopen")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "noiopen")) { -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "iopen_nopriv")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } - else if (!strcmp (this_char, "journal")) { - /* @@@ FIXME */ - /* Eventually we will want to be able to create -Index: linux-ia64/include/linux/ext3_fs.h -=================================================================== ---- linux-ia64.orig/include/linux/ext3_fs.h 2004-03-17 18:03:15.000000000 -0800 -+++ linux-ia64/include/linux/ext3_fs.h 2004-03-17 18:03:15.000000000 -0800 -@@ -328,6 +328,8 @@ struct ext3_inode { - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ -+#define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/iopen-2.6-fc5.patch b/lustre/kernel_patches/patches/iopen-2.6-fc5.patch deleted file mode 100644 index 6bbcec5..0000000 --- a/lustre/kernel_patches/patches/iopen-2.6-fc5.patch +++ /dev/null @@ -1,448 +0,0 @@ -Index: linux-2.6.16.i686/fs/ext3/iopen.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/iopen.c 2006-05-31 04:14:15.752410384 +0800 -+++ linux-2.6.16.i686/fs/ext3/iopen.c 2006-05-30 22:52:38.000000000 +0800 -@@ -0,0 +1,259 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ spin_lock(&alternate->d_lock); -+ alternate->d_flags |= DCACHE_REFERENCED; -+ spin_unlock(&alternate->d_lock); -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ spin_unlock(&dcache_lock); -+ -+ d_rehash(dentry); -+ -+ return NULL; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ if (!test_opt(inode->i_sb, IOPEN)) -+ goto do_instantiate; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue */ -+ goal->d_flags &= ~DCACHE_DISCONNECTED; -+ security_d_instantiate(goal, inode); -+ __d_drop(dentry); -+ spin_unlock(&dcache_lock); -+ d_rehash(dentry); -+ d_move(goal, dentry); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ spin_unlock(&dcache_lock); -+ if (rehash) -+ d_rehash(dentry); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-2.6.16.i686/fs/ext3/iopen.h -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/iopen.h 2006-05-31 04:14:15.752410384 +0800 -+++ linux-2.6.16.i686/fs/ext3/iopen.h 2006-05-30 22:52:38.000000000 +0800 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-2.6.16.i686/fs/ext3/inode.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/inode.c 2006-05-30 22:52:03.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/inode.c 2006-05-30 22:52:38.000000000 +0800 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - static int ext3_writepage_trans_blocks(struct inode *inode); -@@ -2448,6 +2449,8 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; - #endif - ei->i_block_alloc_info = NULL; -+ if (ext3_iopen_get_inode(inode)) -+ return; - - if (__ext3_get_inode_loc(inode, &iloc, 0)) - goto bad_inode; -Index: linux-2.6.16.i686/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/super.c 2006-05-30 22:52:03.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/super.c 2006-05-30 22:52:38.000000000 +0800 -@@ -634,6 +634,7 @@ - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, -+ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_grpquota - }; - -@@ -682,6 +683,9 @@ - {Opt_noquota, "noquota"}, - {Opt_quota, "quota"}, - {Opt_usrquota, "usrquota"}, -+ {Opt_iopen, "iopen"}, -+ {Opt_noiopen, "noiopen"}, -+ {Opt_iopen_nopriv, "iopen_nopriv"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -996,6 +1000,18 @@ - else - clear_opt(sbi->s_mount_opt, BARRIER); - break; -+ case Opt_iopen: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_noiopen: -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_iopen_nopriv: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; - case Opt_ignore: - break; - case Opt_resize: -Index: linux-2.6.16.i686/fs/ext3/namei.c -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/namei.c 2006-05-30 22:52:00.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/namei.c 2006-05-30 22:55:19.000000000 +0800 -@@ -39,6 +39,7 @@ - - #include "namei.h" - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -995,6 +996,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -1005,7 +1009,7 @@ - if (!inode) - return ERR_PTR(-EACCES); - } -- return d_splice_alias(inode, dentry); -+ return iopen_connect_dentry(dentry, inode, 1); - } - - -@@ -2046,10 +2050,6 @@ - inode->i_nlink); - inode->i_version++; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, inode); -@@ -2173,6 +2173,23 @@ - return err; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ dput(iopen_connect_dentry(dentry, inode, 0)); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -2196,7 +2213,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; -Index: linux-2.6.16.i686/fs/ext3/Makefile -=================================================================== ---- linux-2.6.16.i686.orig/fs/ext3/Makefile 2006-03-20 13:53:29.000000000 +0800 -+++ linux-2.6.16.i686/fs/ext3/Makefile 2006-05-30 22:52:38.000000000 +0800 -@@ -4,7 +4,7 @@ - - obj-$(CONFIG_EXT3_FS) += ext3.o - --ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -Index: linux-2.6.16.i686/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16.i686.orig/include/linux/ext3_fs.h 2006-05-30 22:52:00.000000000 +0800 -+++ linux-2.6.16.i686/include/linux/ext3_fs.h 2006-05-30 22:52:38.000000000 +0800 -@@ -375,6 +375,8 @@ - #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ - #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ - #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ -+#define EXT3_MOUNT_IOPEN 0x400000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/iopen-2.6-rhel4.patch b/lustre/kernel_patches/patches/iopen-2.6-rhel4.patch deleted file mode 100644 index 98dbca4..0000000 --- a/lustre/kernel_patches/patches/iopen-2.6-rhel4.patch +++ /dev/null @@ -1,471 +0,0 @@ -Index: linux-stage/fs/ext3/Makefile -=================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2005-02-25 14:31:53.151076368 +0200 -+++ linux-stage/fs/ext3/Makefile 2005-02-25 14:41:51.259150120 +0200 -@@ -4,7 +4,7 @@ - - obj-$(CONFIG_EXT3_FS) += ext3.o - --ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2005-02-25 14:37:30.983718000 +0200 -+++ linux-stage/fs/ext3/inode.c 2005-02-25 14:47:42.069818792 +0200 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -2408,6 +2409,8 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; - #endif - ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; -+ if (ext3_iopen_get_inode(inode)) -+ return; - - if (ext3_get_inode_loc(inode, &iloc, 0)) - goto bad_inode; -Index: linux-stage/fs/ext3/iopen.c -=================================================================== ---- linux-stage.orig/fs/ext3/iopen.c 2005-02-25 14:41:01.017787968 +0200 -+++ linux-stage/fs/ext3/iopen.c 2005-02-25 14:41:01.045783712 +0200 -@@ -0,0 +1,278 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ spin_lock(&alternate->d_lock); -+ alternate->d_flags |= DCACHE_REFERENCED; -+ spin_unlock(&alternate->d_lock); -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN_MIN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ if (!test_opt(inode->i_sb, IOPEN)) -+ goto do_instantiate; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue */ -+ goal->d_flags &= ~DCACHE_DISCONNECTED; -+ security_d_instantiate(goal, inode); -+ __d_drop(dentry); -+ __d_rehash(dentry, 0); -+ __d_move(goal, dentry); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-stage/fs/ext3/iopen.h -=================================================================== ---- linux-stage.orig/fs/ext3/iopen.h 2005-02-25 14:41:01.017787968 +0200 -+++ linux-stage/fs/ext3/iopen.h 2005-02-25 14:41:01.045783712 +0200 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-stage/fs/ext3/namei.c -=================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2005-02-25 14:37:28.975023368 +0200 -+++ linux-stage/fs/ext3/namei.c 2005-02-25 14:46:43.090784968 +0200 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -980,6 +981,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -990,10 +994,8 @@ - if (!inode) - return ERR_PTR(-EACCES); - } -- if (inode) -- return d_splice_alias(inode, dentry); -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - -@@ -2037,10 +2039,6 @@ - inode->i_nlink); - inode->i_version++; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -@@ -2163,6 +2161,23 @@ - return err; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ dput(iopen_connect_dentry(dentry, inode, 0)); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -2186,7 +2201,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:37:30.987717392 +0200 -+++ linux-stage/fs/ext3/super.c 2005-02-25 14:44:50.495901992 +0200 -@@ -586,6 +586,7 @@ - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, -+ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - }; - - static match_table_t tokens = { -@@ -633,6 +634,9 @@ - {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, -+ {Opt_iopen, "iopen"}, -+ {Opt_noiopen, "noiopen"}, -+ {Opt_iopen_nopriv, "iopen_nopriv"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -914,6 +918,18 @@ - else - clear_opt(sbi->s_mount_opt, BARRIER); - break; -+ case Opt_iopen: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_noiopen: -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_iopen_nopriv: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; - case Opt_ignore: - break; - case Opt_resize: -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2005-02-25 14:37:28.977023064 +0200 -+++ linux-stage/include/linux/ext3_fs.h 2005-02-25 14:49:00.569884968 +0200 -@@ -355,6 +355,8 @@ - #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ - #define EXT3_MOUNT_BARRIER 0x10000 /* Use block barriers */ - #define EXT3_MOUNT_RESERVATION 0x20000 /* Preallocation */ -+#define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/iopen-2.6-suse.patch b/lustre/kernel_patches/patches/iopen-2.6-suse.patch deleted file mode 100644 index 1c5e900..0000000 --- a/lustre/kernel_patches/patches/iopen-2.6-suse.patch +++ /dev/null @@ -1,472 +0,0 @@ -Index: linux-stage/fs/ext3/Makefile -=================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2005-02-25 14:31:53.151076368 +0200 -+++ linux-stage/fs/ext3/Makefile 2005-02-25 14:41:51.259150120 +0200 -@@ -4,7 +4,7 @@ - - obj-$(CONFIG_EXT3_FS) += ext3.o - --ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2005-02-25 14:37:30.983718000 +0200 -+++ linux-stage/fs/ext3/inode.c 2005-02-25 14:47:42.069818792 +0200 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -2408,6 +2409,9 @@ - #endif - ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if (ext3_get_inode_loc(inode, &iloc, 0)) - goto bad_inode; - bh = iloc.bh; -Index: linux-stage/fs/ext3/iopen.c -=================================================================== ---- linux-2.6.5-sles9.orig/fs/ext3/iopen.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.5-sles9/fs/ext3/iopen.c 2004-11-09 02:18:27.611913312 +0300 -@@ -0,0 +1,278 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ spin_lock(&alternate->d_lock); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ spin_unlock(&alternate->d_lock); -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ if (!test_opt(inode->i_sb, IOPEN)) -+ goto do_instantiate; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue */ -+ goal->d_flags &= ~DCACHE_DISCONNECTED; -+ security_d_instantiate(goal, inode); -+ __d_drop(dentry); -+ __d_rehash(dentry, 0); -+ __d_move(goal, dentry); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ __d_rehash(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-stage/fs/ext3/iopen.h -=================================================================== ---- linux-stage.orig/fs/ext3/iopen.h 2005-02-25 14:41:01.017787968 +0200 -+++ linux-stage/fs/ext3/iopen.h 2005-02-25 14:41:01.045783712 +0200 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-stage/fs/ext3/namei.c -=================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2005-02-25 14:37:28.975023368 +0200 -+++ linux-stage/fs/ext3/namei.c 2005-02-25 14:46:43.090784968 +0200 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -980,6 +981,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -990,10 +994,8 @@ - if (!inode) - return ERR_PTR(-EACCES); - } -- if (inode) -- return d_splice_alias(inode, dentry); -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - -@@ -2037,10 +2039,6 @@ - inode->i_nlink); - inode->i_version++; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -@@ -2163,6 +2161,23 @@ - return err; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ dput(iopen_connect_dentry(dentry, inode, 0)); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -2186,7 +2201,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:37:30.987717392 +0200 -+++ linux-stage/fs/ext3/super.c 2005-02-25 14:44:50.495901992 +0200 -@@ -586,6 +586,7 @@ - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_ignore, Opt_barrier, - Opt_err, -+ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - }; - - static match_table_t tokens = { -@@ -633,6 +634,9 @@ - {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, -+ {Opt_iopen, "iopen"}, -+ {Opt_noiopen, "noiopen"}, -+ {Opt_iopen_nopriv, "iopen_nopriv"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL} - }; -@@ -914,6 +918,18 @@ - else - clear_opt(sbi->s_mount_opt, BARRIER); - break; -+ case Opt_iopen: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_noiopen: -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_iopen_nopriv: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; - case Opt_ignore: - break; - default: -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2005-02-25 14:37:28.977023064 +0200 -+++ linux-stage/include/linux/ext3_fs.h 2005-02-25 14:49:00.569884968 +0200 -@@ -355,6 +355,8 @@ - #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ - #define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ - #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ -+#define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/iopen-2.6.12.patch b/lustre/kernel_patches/patches/iopen-2.6.12.patch deleted file mode 100644 index 8d456ac..0000000 --- a/lustre/kernel_patches/patches/iopen-2.6.12.patch +++ /dev/null @@ -1,471 +0,0 @@ -Index: linux-2.6.12-rc6/fs/ext3/Makefile -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/Makefile 2005-06-14 16:00:45.206720992 +0200 -+++ linux-2.6.12-rc6/fs/ext3/Makefile 2005-06-14 16:14:33.595382720 +0200 -@@ -4,7 +4,7 @@ - - obj-$(CONFIG_EXT3_FS) += ext3.o - --ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -Index: linux-2.6.12-rc6/fs/ext3/inode.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/inode.c 2005-06-14 16:01:16.272150299 +0200 -+++ linux-2.6.12-rc6/fs/ext3/inode.c 2005-06-14 16:24:55.686195412 +0200 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - static int ext3_writepage_trans_blocks(struct inode *inode); -@@ -2437,6 +2438,8 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; - #endif - ei->i_block_alloc_info = NULL; -+ if (ext3_iopen_get_inode(inode)) -+ return; - - if (__ext3_get_inode_loc(inode, &iloc, 0)) - goto bad_inode; -Index: linux-2.6.12-rc6/fs/ext3/iopen.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/iopen.c 2005-06-14 16:14:33.530929595 +0200 -+++ linux-2.6.12-rc6/fs/ext3/iopen.c 2005-06-14 16:14:33.626632719 +0200 -@@ -0,0 +1,278 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ spin_lock(&alternate->d_lock); -+ alternate->d_flags |= DCACHE_REFERENCED; -+ spin_unlock(&alternate->d_lock); -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+ -+ d_rehash_cond(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN_MIN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, -+ int rehash) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* verify this dentry is really new */ -+ assert(dentry->d_inode == NULL); -+ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ -+ if (rehash) -+ assert(d_unhashed(dentry)); /* d_rehash */ -+ assert(list_empty(&dentry->d_subdirs)); -+ -+ spin_lock(&dcache_lock); -+ if (!inode) -+ goto do_rehash; -+ -+ if (!test_opt(inode->i_sb, IOPEN)) -+ goto do_instantiate; -+ -+ /* preferrably return a connected dentry */ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ goto do_instantiate; -+ -+ /* Move the goal to the de hash queue */ -+ goal->d_flags &= ~DCACHE_DISCONNECTED; -+ security_d_instantiate(goal, inode); -+ __d_drop(dentry); -+ d_rehash_cond(dentry, 0); -+ __d_move(goal, dentry); -+ spin_unlock(&dcache_lock); -+ iput(inode); -+ -+ return goal; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+do_instantiate: -+ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ -+ dentry->d_inode = inode; -+do_rehash: -+ if (rehash) -+ d_rehash_cond(dentry, 0); /* d_rehash */ -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} -Index: linux-2.6.12-rc6/fs/ext3/iopen.h -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/iopen.h 2005-06-14 16:14:33.534835845 +0200 -+++ linux-2.6.12-rc6/fs/ext3/iopen.h 2005-06-14 16:14:33.633468657 +0200 -@@ -0,0 +1,15 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); -+extern struct dentry *iopen_connect_dentry(struct dentry *dentry, -+ struct inode *inode, int rehash); -Index: linux-2.6.12-rc6/fs/ext3/namei.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/namei.c 2005-06-14 16:01:14.701837819 +0200 -+++ linux-2.6.12-rc6/fs/ext3/namei.c 2005-06-14 16:14:33.644210844 +0200 -@@ -37,6 +37,7 @@ - #include - #include - #include "xattr.h" -+#include "iopen.h" - #include "acl.h" - - /* -@@ -985,6 +986,9 @@ - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -995,10 +999,8 @@ - if (!inode) - return ERR_PTR(-EACCES); - } -- if (inode) -- return d_splice_alias(inode, dentry); -- d_add(dentry, inode); -- return NULL; -+ -+ return iopen_connect_dentry(dentry, inode, 1); - } - - -@@ -2042,10 +2044,6 @@ - inode->i_nlink); - inode->i_version++; - inode->i_nlink = 0; -- /* There's no need to set i_disksize: the fact that i_nlink is -- * zero will ensure that the right thing happens during any -- * recovery. */ -- inode->i_size = 0; - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, inode); -@@ -2168,6 +2166,23 @@ - return err; - } - -+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ -+static int ext3_add_link(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ int err = ext3_add_entry(handle, dentry, inode); -+ if (!err) { -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ dput(iopen_connect_dentry(dentry, inode, 0)); -+ return 0; -+ } -+ } -+ ext3_dec_count(handle, inode); -+ iput(inode); -+ return err; -+} -+ - static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) - { -@@ -2191,7 +2206,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- err = ext3_add_nondir(handle, dentry, inode); -+ err = ext3_add_link(handle, dentry, inode); -+ ext3_orphan_del(handle, inode); - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; -Index: linux-2.6.12-rc6/fs/ext3/super.c -=================================================================== ---- linux-2.6.12-rc6.orig/fs/ext3/super.c 2005-06-14 16:01:16.287775299 +0200 -+++ linux-2.6.12-rc6/fs/ext3/super.c 2005-06-14 16:14:33.656906156 +0200 -@@ -590,6 +590,7 @@ - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, -+ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - }; - - static match_table_t tokens = { -@@ -638,6 +639,9 @@ - {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, -+ {Opt_iopen, "iopen"}, -+ {Opt_noiopen, "noiopen"}, -+ {Opt_iopen_nopriv, "iopen_nopriv"}, - {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, -@@ -921,6 +925,18 @@ - else - clear_opt(sbi->s_mount_opt, BARRIER); - break; -+ case Opt_iopen: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_noiopen: -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; -+ case Opt_iopen_nopriv: -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ break; - case Opt_ignore: - break; - case Opt_resize: -Index: linux-2.6.12-rc6/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.12-rc6.orig/include/linux/ext3_fs.h 2005-06-14 16:01:14.709650318 +0200 -+++ linux-2.6.12-rc6/include/linux/ext3_fs.h 2005-06-14 16:28:38.452794245 +0200 -@@ -358,6 +358,8 @@ - #define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ - #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ - #define EXT3_MOUNT_NOBH 0x40000 /* No bufferheads */ -+#define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/ipoib_tcpdump.patch b/lustre/kernel_patches/patches/ipoib_tcpdump.patch new file mode 100644 index 0000000..bf9126f --- /dev/null +++ b/lustre/kernel_patches/patches/ipoib_tcpdump.patch @@ -0,0 +1,18 @@ +--- linux/net/core/dev.c.orig 2007-01-22 15:51:34.000000000 +0000 ++++ linux/net/core/dev.c 2007-01-22 16:00:09.000000000 +0000 +@@ -2333,10 +2333,11 @@ static int dev_ifsioc(struct ifreq *ifr, + return dev_set_mtu(dev, ifr->ifr_mtu); + + case SIOCGIFHWADDR: +- if ((size_t) dev->addr_len > sizeof ifr->ifr_hwaddr.sa_data) +- return -EOVERFLOW; +- memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); +- memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, dev->addr_len); ++ if (!dev->addr_len) ++ memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); ++ else ++ memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, ++ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + ifr->ifr_hwaddr.sa_family = dev->type; + return 0; + diff --git a/lustre/kernel_patches/patches/jbd-checkpoint-on-commit.patch b/lustre/kernel_patches/patches/jbd-checkpoint-on-commit.patch new file mode 100644 index 0000000..2ee1829 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-checkpoint-on-commit.patch @@ -0,0 +1,138 @@ +Index: linux/fs/jbd/checkpoint.c +=================================================================== +--- linux.orig/fs/jbd/checkpoint.c ++++ linux/fs/jbd/checkpoint.c +@@ -170,6 +170,15 @@ static int __cleanup_transaction(journal + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); ++ if (journal->j_task == current) { ++ /* ++ * Don't wait for commit if called from ++ * kjournald to avoid deadlock. ++ */ ++ ret = -EAGAIN; ++ spin_lock(&journal->j_list_lock); ++ break; ++ } + log_wait_commit(journal, tid); + goto out_return_1; + } +@@ -286,6 +295,7 @@ static int __flush_buffer(journal_t *jou + int log_do_checkpoint(journal_t *journal) + { + int result; ++ int result2; + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; + +@@ -300,6 +310,7 @@ int log_do_checkpoint(journal_t *journal + jbd_debug(1, "cleanup_journal_tail returned %d\n", result); + if (result <= 0) + return result; ++ result = 0; + + /* + * OK, we need to start writing disk blocks. Try to free up a +@@ -314,7 +325,7 @@ int log_do_checkpoint(journal_t *journal + transaction_t *transaction; + struct journal_head *jh, *last_jh, *next_jh; + int drop_count = 0; +- int cleanup_ret, retry = 0; ++ int retry = 0; + tid_t this_tid; + + transaction = journal->j_checkpoint_transactions; +@@ -364,17 +375,20 @@ int log_do_checkpoint(journal_t *journal + * finding anything to write to disk. We had better be + * able to make some progress or we are in trouble. + */ +- cleanup_ret = __cleanup_transaction(journal, transaction); +- J_ASSERT(drop_count != 0 || cleanup_ret != 0); ++ result = __cleanup_transaction(journal, transaction); ++ J_ASSERT(drop_count != 0 || result != 0); ++ if (result == -EAGAIN) ++ break; ++ result = 0; + if (journal->j_checkpoint_transactions != transaction) + break; + } + spin_unlock(&journal->j_list_lock); +- result = cleanup_journal_tail(journal); +- if (result < 0) +- return result; ++ result2 = cleanup_journal_tail(journal); ++ if (result == 0 && result2 < 0) ++ result = result2; + +- return 0; ++ return result; + } + + /* +Index: linux/fs/jbd/journal.c +=================================================================== +--- linux.orig/fs/jbd/journal.c ++++ linux/fs/jbd/journal.c +@@ -130,6 +130,8 @@ int kjournald(void *arg) + journal_t *journal = (journal_t *) arg; + transaction_t *transaction; + struct timer_list timer; ++ int transaction_size; ++ int canlock; + + current_journal = journal; + +@@ -154,12 +156,15 @@ int kjournald(void *arg) + */ + spin_lock(&journal->j_state_lock); + ++ transaction_size = 0; + loop: + jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", + journal->j_commit_sequence, journal->j_commit_request); + + if (journal->j_commit_sequence != journal->j_commit_request) { + jbd_debug(1, "OK, requests differ\n"); ++ transaction_size = ++ journal->j_running_transaction->t_outstanding_credits; + spin_unlock(&journal->j_state_lock); + del_timer_sync(journal->j_commit_timer); + journal_commit_transaction(journal); +@@ -168,6 +173,36 @@ loop: + } + + wake_up(&journal->j_wait_done_commit); ++ ++ /* ++ * If journal is low on free space, do a checkpoint, pro-actively. ++ * ++ * Do this after wake-up to reduce waiters latency. ++ */ ++ ++ /* ++ * Trylock to avoid deadlock with threads waiting for commit under ++ * journal->j_checkpoint_sem. ++ */ ++ spin_unlock(&journal->j_state_lock); ++ canlock = !down_trylock(&journal->j_checkpoint_sem); ++ spin_lock(&journal->j_state_lock); ++ ++ if (canlock) { ++ while (__log_space_left(journal) < ++ journal->j_max_transaction_buffers + ++ 2 * transaction_size) { ++ int result; ++ ++ spin_unlock(&journal->j_state_lock); ++ result = log_do_checkpoint(journal); ++ spin_lock(&journal->j_state_lock); ++ if (result < 0) ++ break; ++ } ++ up(&journal->j_checkpoint_sem); ++ } ++ + if (current->flags & PF_FREEZE) { + /* + * The simpler the better. Flushing journal isn't a diff --git a/lustre/kernel_patches/patches/jbd-commit-tricks-rhel3.patch b/lustre/kernel_patches/patches/jbd-commit-tricks-rhel3.patch deleted file mode 100644 index 725931c..0000000 --- a/lustre/kernel_patches/patches/jbd-commit-tricks-rhel3.patch +++ /dev/null @@ -1,132 +0,0 @@ - ---- linux-2.4.18/fs/jbd/checkpoint.c~jbd-commit-tricks Mon Jul 28 13:52:05 2003 -+++ linux-2.4.18-alexey/fs/jbd/checkpoint.c Mon Jul 28 14:03:59 2003 -@@ -77,19 +77,23 @@ static int __try_to_free_cp_buf(struct j - * to wait for a checkpoint to free up some space in the log. - */ - --void log_wait_for_space(journal_t *journal, int nblocks) -+void log_wait_for_space(journal_t *journal) - { -+ int nblocks; -+ -+ nblocks = jbd_space_needed(journal); - while (log_space_left(journal) < nblocks) { - if (journal->j_flags & JFS_ABORT) - return; - unlock_journal(journal); - down(&journal->j_checkpoint_sem); - lock_journal(journal); -+ nblocks = jbd_space_needed(journal); - - /* Test again, another process may have checkpointed - * while we were waiting for the checkpoint lock */ - if (log_space_left(journal) < nblocks) { -- log_do_checkpoint(journal, nblocks); -+ log_do_checkpoint(journal); - } - up(&journal->j_checkpoint_sem); - } -@@ -260,8 +264,7 @@ static int __flush_buffer(journal_t *jou - * The journal should be locked before calling this function. - */ - --/* @@@ `nblocks' is unused. Should it be used? */ --int log_do_checkpoint (journal_t *journal, int nblocks) -+int log_do_checkpoint (journal_t *journal) - { - transaction_t *transaction, *last_transaction, *next_transaction; - int result; -@@ -315,6 +318,8 @@ repeat: - retry = __flush_buffer(journal, jh, bhs, &batch_count, - &drop_count); - } while (jh != last_jh && !retry); -+ if (journal->j_checkpoint_transactions != transaction) -+ goto done; - if (batch_count) { - __flush_batch(bhs, &batch_count); - goto repeat; -@@ -328,6 +333,8 @@ repeat: - */ - cleanup_ret = __cleanup_transaction(journal, transaction); - J_ASSERT(drop_count != 0 || cleanup_ret != 0); -+ if (journal->j_checkpoint_transactions != transaction) -+ goto done; - goto repeat; /* __cleanup may have dropped lock */ - } while (transaction != last_transaction); - ---- linux-2.4.18/fs/jbd/journal.c~jbd-commit-tricks Mon Jul 28 13:52:05 2003 -+++ linux-2.4.18-alexey/fs/jbd/journal.c Mon Jul 28 14:03:59 2003 -@@ -1115,7 +1115,7 @@ void journal_destroy (journal_t *journal - /* Force any old transactions to disk */ - lock_journal(journal); - while (journal->j_checkpoint_transactions != NULL) -- log_do_checkpoint(journal, 1); -+ log_do_checkpoint(journal); - - J_ASSERT(journal->j_running_transaction == NULL); - J_ASSERT(journal->j_committing_transaction == NULL); -@@ -1302,7 +1302,7 @@ int journal_flush (journal_t *journal) - /* ...and flush everything in the log out to disk. */ - lock_journal(journal); - while (!err && journal->j_checkpoint_transactions != NULL) -- err = log_do_checkpoint(journal, journal->j_maxlen); -+ err = log_do_checkpoint(journal); - cleanup_journal_tail(journal); - - /* Finally, mark the journal as really needing no recovery. ---- linux-2.4.18/fs/jbd/transaction.c~jbd-commit-tricks Mon Jul 28 13:52:05 2003 -+++ linux-2.4.18-alexey/fs/jbd/transaction.c Mon Jul 28 14:03:59 2003 -@@ -182,14 +182,9 @@ repeat_locked: - * Also, this test is inconsitent with the matching one in - * journal_extend(). - */ -- needed = journal->j_max_transaction_buffers; -- if (journal->j_committing_transaction) -- needed += journal->j_committing_transaction-> -- t_outstanding_credits; -- -- if (log_space_left(journal) < needed) { -+ if (log_space_left(journal) < jbd_space_needed(journal)) { - jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); -- log_wait_for_space(journal, needed); -+ log_wait_for_space(journal); - goto repeat_locked; - } - ---- linux-2.4.18/include/linux/jbd.h~jbd-commit-tricks Mon Jul 28 13:52:17 2003 -+++ linux-2.4.18-alexey/include/linux/jbd.h Mon Jul 28 14:03:59 2003 -@@ -740,9 +740,9 @@ extern void journal_brelse_array(stru - extern int log_space_left (journal_t *); /* Called with journal locked */ - extern tid_t log_start_commit (journal_t *, transaction_t *); - extern int log_wait_commit (journal_t *, tid_t); --extern int log_do_checkpoint (journal_t *, int); -+extern int log_do_checkpoint (journal_t *); - --extern void log_wait_for_space(journal_t *, int nblocks); -+extern void log_wait_for_space(journal_t *); - extern void __journal_drop_transaction(journal_t *, transaction_t *); - extern int cleanup_journal_tail(journal_t *); - -@@ -815,6 +815,19 @@ static inline int tid_geq(tid_t x, tid_t - } - - extern int journal_blocks_per_page(struct inode *inode); -+ -+/* -+ * Return the minimum number of blocks which must be free in the journal -+ * before a new transaction may be started. Must be called under j_state_lock. -+ */ -+static inline int jbd_space_needed(journal_t *journal) -+{ -+ int nblocks = journal->j_max_transaction_buffers; -+ if (journal->j_committing_transaction) -+ nblocks += journal->j_committing_transaction-> -+ t_outstanding_credits; -+ return nblocks; -+} - - /* - * Definitions which augment the buffer_head layer - -_ diff --git a/lustre/kernel_patches/patches/jbd-commit-tricks.patch b/lustre/kernel_patches/patches/jbd-commit-tricks.patch deleted file mode 100644 index 467c29f..0000000 --- a/lustre/kernel_patches/patches/jbd-commit-tricks.patch +++ /dev/null @@ -1,132 +0,0 @@ - ---- linux-2.4.18/fs/jbd/checkpoint.c~jbd-commit-tricks Mon Jul 28 13:52:05 2003 -+++ linux-2.4.18-alexey/fs/jbd/checkpoint.c Mon Jul 28 14:03:59 2003 -@@ -77,19 +77,23 @@ static int __try_to_free_cp_buf(struct j - * to wait for a checkpoint to free up some space in the log. - */ - --void log_wait_for_space(journal_t *journal, int nblocks) -+void log_wait_for_space(journal_t *journal) - { -+ int nblocks; -+ -+ nblocks = jbd_space_needed(journal); - while (log_space_left(journal) < nblocks) { - if (journal->j_flags & JFS_ABORT) - return; - unlock_journal(journal); - down(&journal->j_checkpoint_sem); - lock_journal(journal); -+ nblocks = jbd_space_needed(journal); - - /* Test again, another process may have checkpointed - * while we were waiting for the checkpoint lock */ - if (log_space_left(journal) < nblocks) { -- log_do_checkpoint(journal, nblocks); -+ log_do_checkpoint(journal); - } - up(&journal->j_checkpoint_sem); - } -@@ -260,8 +264,7 @@ static int __flush_buffer(journal_t *jou - * The journal should be locked before calling this function. - */ - --/* @@@ `nblocks' is unused. Should it be used? */ --int log_do_checkpoint (journal_t *journal, int nblocks) -+int log_do_checkpoint (journal_t *journal) - { - transaction_t *transaction, *last_transaction, *next_transaction; - int result; -@@ -315,6 +318,8 @@ repeat: - retry = __flush_buffer(journal, jh, bhs, &batch_count, - &drop_count); - } while (jh != last_jh && !retry); -+ if (journal->j_checkpoint_transactions != transaction) -+ goto done; - if (batch_count) { - __flush_batch(bhs, &batch_count); - goto repeat; -@@ -328,6 +333,8 @@ repeat: - */ - cleanup_ret = __cleanup_transaction(journal, transaction); - J_ASSERT(drop_count != 0 || cleanup_ret != 0); -+ if (journal->j_checkpoint_transactions != transaction) -+ goto done; - goto repeat; /* __cleanup may have dropped lock */ - } while (transaction != last_transaction); - ---- linux-2.4.18/fs/jbd/journal.c~jbd-commit-tricks Mon Jul 28 13:52:05 2003 -+++ linux-2.4.18-alexey/fs/jbd/journal.c Mon Jul 28 14:03:59 2003 -@@ -1115,7 +1115,7 @@ void journal_destroy (journal_t *journal - /* Force any old transactions to disk */ - lock_journal(journal); - while (journal->j_checkpoint_transactions != NULL) -- log_do_checkpoint(journal, 1); -+ log_do_checkpoint(journal); - - J_ASSERT(journal->j_running_transaction == NULL); - J_ASSERT(journal->j_committing_transaction == NULL); -@@ -1302,7 +1302,7 @@ int journal_flush (journal_t *journal) - /* ...and flush everything in the log out to disk. */ - lock_journal(journal); - while (!err && journal->j_checkpoint_transactions != NULL) -- err = log_do_checkpoint(journal, journal->j_maxlen); -+ err = log_do_checkpoint(journal); - cleanup_journal_tail(journal); - - /* Finally, mark the journal as really needing no recovery. ---- linux-2.4.18/fs/jbd/transaction.c~jbd-commit-tricks Mon Jul 28 13:52:05 2003 -+++ linux-2.4.18-alexey/fs/jbd/transaction.c Mon Jul 28 14:03:59 2003 -@@ -182,14 +182,9 @@ repeat_locked: - * Also, this test is inconsitent with the matching one in - * journal_extend(). - */ -- needed = journal->j_max_transaction_buffers; -- if (journal->j_committing_transaction) -- needed += journal->j_committing_transaction-> -- t_outstanding_credits; -- -- if (log_space_left(journal) < needed) { -+ if (log_space_left(journal) < jbd_space_needed(journal)) { - jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); -- log_wait_for_space(journal, needed); -+ log_wait_for_space(journal); - goto repeat_locked; - } - ---- linux-2.4.18/include/linux/jbd.h~jbd-commit-tricks Mon Jul 28 13:52:17 2003 -+++ linux-2.4.18-alexey/include/linux/jbd.h Mon Jul 28 14:03:59 2003 -@@ -740,9 +740,9 @@ extern void journal_brelse_array(stru - extern int log_space_left (journal_t *); /* Called with journal locked */ - extern tid_t log_start_commit (journal_t *, transaction_t *); - extern void log_wait_commit (journal_t *, tid_t); --extern int log_do_checkpoint (journal_t *, int); -+extern int log_do_checkpoint (journal_t *); - --extern void log_wait_for_space(journal_t *, int nblocks); -+extern void log_wait_for_space(journal_t *); - extern void __journal_drop_transaction(journal_t *, transaction_t *); - extern int cleanup_journal_tail(journal_t *); - -@@ -815,6 +815,19 @@ static inline int tid_geq(tid_t x, tid_t - } - - extern int journal_blocks_per_page(struct inode *inode); -+ -+/* -+ * Return the minimum number of blocks which must be free in the journal -+ * before a new transaction may be started. Must be called under j_state_lock. -+ */ -+static inline int jbd_space_needed(journal_t *journal) -+{ -+ int nblocks = journal->j_max_transaction_buffers; -+ if (journal->j_committing_transaction) -+ nblocks += journal->j_committing_transaction-> -+ t_outstanding_credits; -+ return nblocks; -+} - - /* - * Definitions which augment the buffer_head layer - -_ diff --git a/lustre/kernel_patches/patches/jbd-copy-out-everything.patch b/lustre/kernel_patches/patches/jbd-copy-out-everything.patch new file mode 100644 index 0000000..219c82d --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-copy-out-everything.patch @@ -0,0 +1,52 @@ +Index: linux/fs/jbd/journal.c +=================================================================== +--- linux.orig/fs/jbd/journal.c ++++ linux/fs/jbd/journal.c +@@ -323,7 +323,7 @@ int journal_write_metadata_buffer(transa + struct journal_head **jh_out, + int blocknr) + { +- int need_copy_out = 0; ++ int need_copy_out; + int done_copy_out = 0; + int do_escape = 0; + char *mapped_data; +@@ -369,7 +369,8 @@ repeat: + cpu_to_be32(JFS_MAGIC_NUMBER)) { + need_copy_out = 1; + do_escape = 1; +- } ++ } else ++ need_copy_out = jh_in->b_committed_data == NULL; + kunmap_atomic(mapped_data, KM_USER0); + + /* +Index: linux/fs/jbd/transaction.c +=================================================================== +--- linux.orig/fs/jbd/transaction.c ++++ linux/fs/jbd/transaction.c +@@ -817,6 +817,15 @@ int journal_get_create_access(handle_t * + */ + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); ++ if (!((jh->b_transaction == transaction || ++ jh->b_transaction == NULL || ++ (jh->b_transaction == journal->j_committing_transaction && ++ jh->b_jlist == BJ_Forget)))) { ++ printk("%s: %p %p %p %p %i\n", __FUNCTION__, ++ jh, jh->b_transaction, transaction, ++ journal->j_committing_transaction, jh->b_jlist); ++ ++ } + J_ASSERT_JH(jh, (jh->b_transaction == transaction || + jh->b_transaction == NULL || + (jh->b_transaction == journal->j_committing_transaction && +@@ -1115,6 +1124,8 @@ int journal_dirty_metadata(handle_t *han + journal_t *journal = transaction->t_journal; + struct journal_head *jh = bh2jh(bh); + ++ BUG_ON(transaction == NULL); ++ + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); + if (is_handle_aborted(handle)) diff --git a/lustre/kernel_patches/patches/jbd-ctx_switch.patch b/lustre/kernel_patches/patches/jbd-ctx_switch.patch deleted file mode 100644 index 8d4607f..0000000 --- a/lustre/kernel_patches/patches/jbd-ctx_switch.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff -rup linux-2.4.20-uml/fs/jbd/transaction.c linux-2.4.21-rc2/fs/jbd/transaction.c ---- linux-2.4.20-uml/fs/jbd/transaction.c Thu Nov 28 16:53:15 2002 -+++ linux-2.4.21-rc2/fs/jbd/transaction.c Fri May 16 11:00:40 2003 -@@ -666,7 +673,8 @@ repeat: - spin_unlock(&journal_datalist_lock); - unlock_journal(journal); - /* commit wakes up all shadow buffers after IO */ -- sleep_on(&jh2bh(jh)->b_wait); -+ wait_event(jh2bh(jh)->b_wait, -+ jh->b_jlist != BJ_Shadow); - lock_journal(journal); - goto repeat; - } diff --git a/lustre/kernel_patches/patches/jbd-dont-account-blocks-twice.patch b/lustre/kernel_patches/patches/jbd-dont-account-blocks-twice.patch deleted file mode 100644 index d1be6f02..0000000 --- a/lustre/kernel_patches/patches/jbd-dont-account-blocks-twice.patch +++ /dev/null @@ -1,17 +0,0 @@ - ---- linux-2.4.18/fs/jbd/commit.c~jbd-dont-account-blocks-twice Mon Jul 28 13:52:05 2003 -+++ linux-2.4.18-alexey/fs/jbd/commit.c Mon Jul 28 14:03:53 2003 -@@ -407,6 +407,11 @@ sync_datalist_empty: - continue; - } - -+ /* start_this_handle() accounts t_outstanding_credits -+ * to know free space in log, but this counter is changed -+ * by journal_next_log_block() also. */ -+ commit_transaction->t_outstanding_credits--; -+ - /* Bump b_count to prevent truncate from stumbling over - the shadowed buffer! @@@ This can go if we ever get - rid of the BJ_IO/BJ_Shadow pairing of buffers. */ - -_ diff --git a/lustre/kernel_patches/patches/jbd-flushtime.patch b/lustre/kernel_patches/patches/jbd-flushtime.patch deleted file mode 100644 index 938c142..0000000 --- a/lustre/kernel_patches/patches/jbd-flushtime.patch +++ /dev/null @@ -1,34 +0,0 @@ -diff -rup linux-2.4.20-uml/fs/jbd/transaction.c linux-2.4.21-rc2/fs/jbd/transaction.c ---- linux-2.4.20-uml/fs/jbd/transaction.c Thu Nov 28 16:53:15 2002 -+++ linux-2.4.21-rc2/fs/jbd/transaction.c Fri May 16 11:00:40 2003 -@@ -1109,7 +1142,6 @@ int journal_dirty_metadata (handle_t *ha - - spin_lock(&journal_datalist_lock); - set_bit(BH_JBDDirty, &bh->b_state); -- set_buffer_flushtime(bh); - - J_ASSERT_JH(jh, jh->b_transaction != NULL); - -@@ -2035,6 +2093,13 @@ void journal_file_buffer(struct journal_ - spin_unlock(&journal_datalist_lock); - } - -+static void jbd_refile_buffer(struct buffer_head *bh) -+{ -+ if (buffer_dirty(bh) && (bh->b_list != BUF_DIRTY)) -+ set_buffer_flushtime(bh); -+ refile_buffer(bh); -+} -+ - /* - * Remove a buffer from its current buffer list in preparation for - * dropping it from its current transaction entirely. If the buffer has -@@ -2055,7 +2120,7 @@ void __journal_refile_buffer(struct jour - __journal_unfile_buffer(jh); - jh->b_transaction = NULL; - /* Onto BUF_DIRTY for writeback */ -- refile_buffer(jh2bh(jh)); -+ jbd_refile_buffer(jh2bh(jh)); - return; - } - diff --git a/lustre/kernel_patches/patches/jbd-get_write_access.patch b/lustre/kernel_patches/patches/jbd-get_write_access.patch deleted file mode 100644 index ea569dc..0000000 --- a/lustre/kernel_patches/patches/jbd-get_write_access.patch +++ /dev/null @@ -1,56 +0,0 @@ -diff -rup linux-2.4.20-uml/fs/jbd/transaction.c linux-2.4.21-rc2/fs/jbd/transaction.c ---- linux-2.4.20-uml/fs/jbd/transaction.c Thu Nov 28 16:53:15 2002 -+++ linux-2.4.21-rc2/fs/jbd/transaction.c Fri May 16 11:00:40 2003 -@@ -735,7 +743,8 @@ done_locked: - int offset; - char *source; - -- J_ASSERT_JH(jh, buffer_uptodate(jh2bh(jh))); -+ J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), -+ "Possible IO failure.\n"); - page = jh2bh(jh)->b_page; - offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; - source = kmap(page); -diff -rup linux-2.4.20-uml/include/linux/jbd.h linux-2.4.21-rc2/include/linux/jbd.h ---- linux-2.4.20-uml/include/linux/jbd.h Mon Aug 25 16:16:57 2003 -+++ linux-2.4.21-rc2/include/linux/jbd.h Tue May 13 13:28:29 2003 -@@ -40,6 +40,15 @@ - */ - #undef JBD_PARANOID_WRITES - -+/* -+ * Define JBD_PARANIOD_IOFAIL to cause a kernel BUG() if ext3 finds -+ * certain classes of error which can occur due to failed IOs. Under -+ * normal use we want ext3 to continue after such errors, because -+ * hardware _can_ fail, but for debugging purposes when running tests on -+ * known-good hardware we may want to trap these errors. -+ */ -+#undef JBD_PARANOID_IOFAIL -+ - #ifdef CONFIG_JBD_DEBUG - /* - * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal -@@ -232,6 +272,23 @@ void buffer_assertion_failure(struct buf - #define J_ASSERT(assert) do { } while (0) - #endif /* JBD_ASSERTIONS */ - -+#if defined(JBD_PARANOID_IOFAIL) -+#define J_EXPECT(expr, why...) J_ASSERT(expr) -+#define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) -+#define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr) -+#else -+#define __journal_expect(expr, why...) \ -+ do { \ -+ if (!(expr)) { \ -+ printk(KERN_ERR "EXT3-fs unexpected failure: %s;\n", # expr); \ -+ printk(KERN_ERR why); \ -+ } \ -+ } while (0) -+#define J_EXPECT(expr, why...) __journal_expect(expr, ## why) -+#define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why) -+#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) -+#endif -+ - enum jbd_state_bits { - BH_JWrite - = BH_PrivateStart, /* 1 if being written to log (@@@ DEBUGGING) */ diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch b/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch index 7a48375..3847e1e 100644 --- a/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch +++ b/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch @@ -1,7 +1,7 @@ Index: linux-2.6.9/include/linux/jbd.h =================================================================== ---- linux-2.6.9.orig/include/linux/jbd.h 2006-03-10 18:20:03.000000000 +0300 -+++ linux-2.6.9/include/linux/jbd.h 2006-07-28 02:32:18.000000000 +0400 +--- linux-2.6.9.orig/include/linux/jbd.h ++++ linux-2.6.9/include/linux/jbd.h @@ -422,6 +422,16 @@ struct handle_s }; @@ -118,14 +118,14 @@ Index: linux-2.6.9/include/linux/jbd.h */ Index: linux-2.6.9/fs/jbd/commit.c =================================================================== ---- linux-2.6.9.orig/fs/jbd/commit.c 2006-03-10 18:20:39.000000000 +0300 -+++ linux-2.6.9/fs/jbd/commit.c 2006-07-28 02:32:18.000000000 +0400 +--- linux-2.6.9.orig/fs/jbd/commit.c ++++ linux-2.6.9/fs/jbd/commit.c @@ -21,6 +21,7 @@ #include #include #include +#include - + /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -101,6 +102,7 @@ static int inverted_lock(journal_t *jour @@ -133,66 +133,67 @@ Index: linux-2.6.9/fs/jbd/commit.c void journal_commit_transaction(journal_t *journal) { + struct transaction_stats_s stats; - transaction_t *commit_transaction; - struct journal_head *jh, *new_jh, *descriptor; - struct buffer_head *wbuf[64]; + transaction_t *commit_transaction; + struct journal_head *jh, *new_jh, *descriptor; + struct buffer_head *wbuf[64]; @@ -147,6 +149,11 @@ void journal_commit_transaction(journal_ - spin_lock(&journal->j_state_lock); - commit_transaction->t_state = T_LOCKED; - + spin_lock(&journal->j_state_lock); + commit_transaction->t_state = T_LOCKED; + + stats.ts_wait = commit_transaction->t_max_wait; + stats.ts_locked = CURRENT_MSECS; + stats.ts_running = jbd_time_diff(commit_transaction->t_start, + stats.ts_locked); + - spin_lock(&commit_transaction->t_handle_lock); - while (commit_transaction->t_updates) { - DEFINE_WAIT(wait); + spin_lock(&commit_transaction->t_handle_lock); + while (commit_transaction->t_updates) { + DEFINE_WAIT(wait); @@ -219,6 +226,9 @@ void journal_commit_transaction(journal_ - */ - journal_switch_revoke_table(journal); - + */ + journal_switch_revoke_table(journal); + + stats.ts_flushing = CURRENT_MSECS; + stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing); + - commit_transaction->t_state = T_FLUSH; - journal->j_committing_transaction = commit_transaction; - journal->j_running_transaction = NULL; + commit_transaction->t_state = T_FLUSH; + journal->j_committing_transaction = commit_transaction; + journal->j_running_transaction = NULL; @@ -365,6 +375,11 @@ write_out_data: - */ - commit_transaction->t_state = T_COMMIT; - + */ + commit_transaction->t_state = T_COMMIT; + + stats.ts_logging = CURRENT_MSECS; + stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging); + stats.ts_blocks = commit_transaction->t_outstanding_credits; + stats.ts_blocks_logged = 0; + - descriptor = NULL; - bufs = 0; - while (commit_transaction->t_buffers) { + descriptor = NULL; + bufs = 0; + while (commit_transaction->t_buffers) { @@ -513,6 +528,7 @@ start_journal_io: - submit_bh(WRITE, bh); - } - cond_resched(); + submit_bh(WRITE, bh); + } + cond_resched(); + stats.ts_blocks_logged += bufs; - - /* Force a new descriptor to be generated next + + /* Force a new descriptor to be generated next time round the loop. */ -@@ -760,6 +776,7 @@ skip_commit: /* The journal should be un - cp_transaction = jh->b_cp_transaction; - if (cp_transaction) { - JBUFFER_TRACE(jh, "remove from old cp transaction"); +@@ -767,6 +783,7 @@ restart_loop: + cp_transaction = jh->b_cp_transaction; + if (cp_transaction) { + JBUFFER_TRACE(jh, "remove from old cp transaction"); + cp_transaction->t_chp_stats.cs_dropped++; - __journal_remove_checkpoint(jh); - } - -@@ -806,6 +823,36 @@ skip_commit: /* The journal should be un - - J_ASSERT(commit_transaction->t_state == T_COMMIT); - + __journal_remove_checkpoint(jh); + } + +@@ -816,6 +833,37 @@ restart_loop: + } + cond_resched_lock(&journal->j_list_lock); + } ++ + commit_transaction->t_start = CURRENT_MSECS; + stats.ts_logging = jbd_time_diff(stats.ts_logging, -+ commit_transaction->t_start); ++ commit_transaction->t_start); + + /* + * File the transaction for history @@ -202,7 +203,7 @@ Index: linux-2.6.9/fs/jbd/commit.c + stats.ts_handle_count = commit_transaction->t_handle_count; + spin_lock(&journal->j_history_lock); + memcpy(journal->j_history + journal->j_history_cur, &stats, -+ sizeof(stats)); ++ sizeof(stats)); + if (++journal->j_history_cur == journal->j_history_max) + journal->j_history_cur = 0; + @@ -220,70 +221,70 @@ Index: linux-2.6.9/fs/jbd/commit.c + journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged; + spin_unlock(&journal->j_history_lock); + - /* - * This is a bit sleazy. We borrow j_list_lock to protect - * journal->j_committing_transaction in __journal_remove_checkpoint. + spin_unlock(&journal->j_list_lock); + /* + * This is a bit sleazy. We borrow j_list_lock to protect Index: linux-2.6.9/fs/jbd/checkpoint.c =================================================================== ---- linux-2.6.9.orig/fs/jbd/checkpoint.c 2006-03-10 18:20:03.000000000 +0300 -+++ linux-2.6.9/fs/jbd/checkpoint.c 2006-07-28 02:35:21.000000000 +0400 +--- linux-2.6.9.orig/fs/jbd/checkpoint.c ++++ linux-2.6.9/fs/jbd/checkpoint.c @@ -166,6 +166,7 @@ static int __cleanup_transaction(journal - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + + transaction->t_chp_stats.cs_forced_to_close++; - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); -@@ -227,7 +228,7 @@ __flush_batch(journal_t *journal, struct + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); +@@ -226,7 +227,7 @@ __flush_batch(journal_t *journal, struct */ static int __flush_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, + struct buffer_head **bhs, int *batch_count, - int *drop_count) + int *drop_count, transaction_t *transaction) { - struct buffer_head *bh = jh2bh(jh); - int ret = 0; -@@ -248,6 +249,7 @@ static int __flush_buffer(journal_t *jou - set_buffer_jwrite(bh); - bhs[*batch_count] = bh; - jbd_unlock_bh_state(bh); + struct buffer_head *bh = jh2bh(jh); + int ret = 0; +@@ -247,6 +248,7 @@ static int __flush_buffer(journal_t *jou + set_buffer_jwrite(bh); + bhs[*batch_count] = bh; + jbd_unlock_bh_state(bh); + transaction->t_chp_stats.cs_written++; - (*batch_count)++; - if (*batch_count == NR_BATCH) { - __flush_batch(journal, bhs, batch_count); -@@ -316,6 +318,8 @@ int log_do_checkpoint(journal_t *journal - tid_t this_tid; - - transaction = journal->j_checkpoint_transactions; + (*batch_count)++; + if (*batch_count == NR_BATCH) { + __flush_batch(journal, bhs, batch_count); +@@ -315,6 +317,8 @@ int log_do_checkpoint(journal_t *journal + tid_t this_tid; + + transaction = journal->j_checkpoint_transactions; + if (transaction->t_chp_stats.cs_chp_time == 0) + transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS; - this_tid = transaction->t_tid; - jh = transaction->t_checkpoint_list; - last_jh = jh->b_cpprev; -@@ -332,7 +336,8 @@ int log_do_checkpoint(journal_t *journal - retry = 1; - break; - } + this_tid = transaction->t_tid; + jh = transaction->t_checkpoint_list; + last_jh = jh->b_cpprev; +@@ -331,7 +335,8 @@ int log_do_checkpoint(journal_t *journal + retry = 1; + break; + } - retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); -+ retry = __flush_buffer(journal, jh, bhs, &batch_count, -+ &drop_count, transaction); - } while (jh != last_jh && !retry); - - if (batch_count) { -@@ -598,6 +603,8 @@ void __journal_insert_checkpoint(struct - ++ retry = __flush_buffer(journal, jh, bhs, &batch_count, ++ &drop_count, transaction); + } while (jh != last_jh && !retry); + + if (batch_count) { +@@ -597,6 +602,8 @@ void __journal_insert_checkpoint(struct + void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) { + struct transaction_stats_s stats; + - assert_spin_locked(&journal->j_list_lock); - if (transaction->t_cpnext) { - transaction->t_cpnext->t_cpprev = transaction->t_cpprev; -@@ -623,5 +630,25 @@ void __journal_drop_transaction(journal_ - J_ASSERT(journal->j_running_transaction != transaction); - - jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); + assert_spin_locked(&journal->j_list_lock); + if (transaction->t_cpnext) { + transaction->t_cpnext->t_cpprev = transaction->t_cpprev; +@@ -622,5 +629,25 @@ void __journal_drop_transaction(journal_ + J_ASSERT(journal->j_running_transaction != transaction); + + jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); + + /* + * File the transaction for history @@ -304,12 +305,12 @@ Index: linux-2.6.9/fs/jbd/checkpoint.c + spin_unlock(&journal->j_history_lock); + } + - kfree(transaction); + kfree(transaction); } Index: linux-2.6.9/fs/jbd/transaction.c =================================================================== ---- linux-2.6.9.orig/fs/jbd/transaction.c 2006-03-10 18:20:03.000000000 +0300 -+++ linux-2.6.9/fs/jbd/transaction.c 2006-07-28 02:32:18.000000000 +0400 +--- linux-2.6.9.orig/fs/jbd/transaction.c ++++ linux-2.6.9/fs/jbd/transaction.c @@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran J_ASSERT(journal->j_running_transaction == NULL); @@ -342,8 +343,8 @@ Index: linux-2.6.9/fs/jbd/transaction.c transaction->t_updates++; Index: linux-2.6.9/fs/jbd/journal.c =================================================================== ---- linux-2.6.9.orig/fs/jbd/journal.c 2006-06-19 21:31:57.000000000 +0400 -+++ linux-2.6.9/fs/jbd/journal.c 2006-07-28 02:32:18.000000000 +0400 +--- linux-2.6.9.orig/fs/jbd/journal.c ++++ linux-2.6.9/fs/jbd/journal.c @@ -36,6 +36,7 @@ #include #include @@ -688,7 +689,7 @@ Index: linux-2.6.9/fs/jbd/journal.c if (journal->j_inode) iput(journal->j_inode); if (journal->j_revoke) -@@ -1895,6 +2197,28 @@ static void __exit remove_jbd_proc_entry +@@ -1896,6 +2198,28 @@ static void __exit remove_jbd_proc_entry #endif @@ -717,7 +718,7 @@ Index: linux-2.6.9/fs/jbd/journal.c kmem_cache_t *jbd_handle_cache; static int __init journal_init_handle_cache(void) -@@ -1949,6 +2273,7 @@ static int __init journal_init(void) +@@ -1950,6 +2274,7 @@ static int __init journal_init(void) if (ret != 0) journal_destroy_caches(); create_jbd_proc_entry(); @@ -725,7 +726,7 @@ Index: linux-2.6.9/fs/jbd/journal.c return ret; } -@@ -1960,6 +2285,7 @@ static void __exit journal_exit(void) +@@ -1961,6 +2286,7 @@ static void __exit journal_exit(void) printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); #endif remove_jbd_proc_entry(); diff --git a/lustre/kernel_patches/patches/kallsyms-2.4.29.patch b/lustre/kernel_patches/patches/kallsyms-2.4.29.patch deleted file mode 100644 index d270aa4..0000000 --- a/lustre/kernel_patches/patches/kallsyms-2.4.29.patch +++ /dev/null @@ -1,689 +0,0 @@ -Index: linux-2.4.29/arch/arm/vmlinux-armo.lds.in -=================================================================== ---- linux-2.4.29.orig/arch/arm/vmlinux-armo.lds.in 2005-05-08 23:06:26.916055656 +0300 -+++ linux-2.4.29/arch/arm/vmlinux-armo.lds.in 2005-05-08 23:07:11.214321296 +0300 -@@ -62,6 +62,10 @@ - *(__ksymtab) - __stop___ksymtab = .; - -+ __start___kallsyms = .; /* All kernel symbols */ -+ *(__kallsyms) -+ __stop___kallsyms = .; -+ - *(.got) /* Global offset table */ - - _etext = .; /* End of text section */ -Index: linux-2.4.29/arch/arm/vmlinux-armv.lds.in -=================================================================== ---- linux-2.4.29.orig/arch/arm/vmlinux-armv.lds.in 2005-05-08 23:06:26.917055504 +0300 -+++ linux-2.4.29/arch/arm/vmlinux-armv.lds.in 2005-05-08 23:07:11.215321144 +0300 -@@ -67,6 +67,12 @@ - __stop___ksymtab = .; - } - -+ __kallsyms : { /* Kernel debugging table */ -+ __start___kallsyms = .; /* All kernel symbols */ -+ *(__kallsyms) -+ __stop___kallsyms = .; -+ } -+ - . = ALIGN(8192); - - .data : { -Index: linux-2.4.29/arch/ppc/config.in -=================================================================== ---- linux-2.4.29.orig/arch/ppc/config.in 2005-05-08 23:06:26.933053072 +0300 -+++ linux-2.4.29/arch/ppc/config.in 2005-05-08 23:07:11.216320992 +0300 -@@ -655,6 +655,7 @@ - fi - fi - fi -+bool 'Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS - - if [ "$CONFIG_ALL_PPC" = "y" ]; then - bool 'Support for early boot text console (BootX or OpenFirmware only)' CONFIG_BOOTX_TEXT -Index: linux-2.4.29/arch/ppc/vmlinux.lds -=================================================================== ---- linux-2.4.29.orig/arch/ppc/vmlinux.lds 2005-05-08 23:06:26.934052920 +0300 -+++ linux-2.4.29/arch/ppc/vmlinux.lds 2005-05-08 23:07:11.217320840 +0300 -@@ -74,6 +74,10 @@ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - -+ __start___kallsyms = .; /* All kernel symbols */ -+ __kallsyms : { *(__kallsyms) } -+ __stop___kallsyms = .; -+ - . = ALIGN(8); - __start___ftr_fixup = .; - __ftr_fixup : { *(__ftr_fixup) } -Index: linux-2.4.29/arch/i386/config.in -=================================================================== ---- linux-2.4.29.orig/arch/i386/config.in 2005-05-08 23:07:09.946514032 +0300 -+++ linux-2.4.29/arch/i386/config.in 2005-05-08 23:33:00.395809912 +0300 -@@ -512,6 +512,7 @@ - bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ - bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK - bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER -+ bool ' Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS - fi - - int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0 -Index: linux-2.4.29/arch/ia64/config.in -=================================================================== ---- linux-2.4.29.orig/arch/ia64/config.in 2005-05-08 23:06:26.936052616 +0300 -+++ linux-2.4.29/arch/ia64/config.in 2005-05-08 23:07:11.219320536 +0300 -@@ -318,4 +318,6 @@ - - int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0 - -+bool ' Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS -+ - endmenu -Index: linux-2.4.29/arch/alpha/vmlinux.lds.in -=================================================================== ---- linux-2.4.29.orig/arch/alpha/vmlinux.lds.in 2005-05-08 23:06:26.937052464 +0300 -+++ linux-2.4.29/arch/alpha/vmlinux.lds.in 2005-05-08 23:07:11.220320384 +0300 -@@ -28,6 +28,10 @@ - __stop___ksymtab = .; - .kstrtab : { *(.kstrtab) } - -+ __start___kallsyms = .; /* All kernel symbols */ -+ __kallsyms : { *(__kallsyms) } -+ __stop___kallsyms = .; -+ - /* Startup code */ - . = ALIGN(8192); - __init_begin = .; -Index: linux-2.4.29/Makefile -=================================================================== ---- linux-2.4.29.orig/Makefile 2005-05-08 22:59:19.203077912 +0300 -+++ linux-2.4.29/Makefile 2005-05-08 23:07:11.222320080 +0300 -@@ -37,6 +37,7 @@ - MAKEFILES = $(TOPDIR)/.config - GENKSYMS = /sbin/genksyms - DEPMOD = /sbin/depmod -+KALLSYMS = /sbin/kallsyms - MODFLAGS = -DMODULE - CFLAGS_KERNEL = - PERL = perl -@@ -44,6 +45,8 @@ - RPM := $(shell if [ -x "/usr/bin/rpmbuild" ]; then echo rpmbuild; \ - else echo rpm; fi) - -+TMPPREFIX = -+ - export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ - CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ - CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK -@@ -202,7 +205,7 @@ - CLEAN_FILES = \ - kernel/ksyms.lst include/linux/compile.h \ - vmlinux System.map \ -- .tmp* \ -+ $(TMPPREFIX).tmp* \ - drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ - drivers/char/conmakehash \ - drivers/char/drm/*-mod.c \ -@@ -285,16 +288,39 @@ - boot: vmlinux - @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot - -+LD_VMLINUX := $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \ -+ --start-group \ -+ $(CORE_FILES) \ -+ $(DRIVERS) \ -+ $(NETWORKS) \ -+ $(LIBS) \ -+ --end-group -+ifeq ($(CONFIG_KALLSYMS),y) -+LD_VMLINUX_KALLSYMS := $(TMPPREFIX).tmp_kallsyms3.o -+else -+LD_VMLINUX_KALLSYMS := -+endif -+ - vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o init/do_mounts.o linuxsubdirs -- $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \ -- --start-group \ -- $(CORE_FILES) \ -- $(DRIVERS) \ -- $(NETWORKS) \ -- $(LIBS) \ -- --end-group \ -- -o vmlinux -+ @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" kallsyms -+ -+.PHONY: kallsyms -+ -+kallsyms: -+ifeq ($(CONFIG_KALLSYMS),y) -+ @echo kallsyms pass 1 -+ $(LD_VMLINUX) -o $(TMPPREFIX).tmp_vmlinux1 -+ @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux1 > $(TMPPREFIX).tmp_kallsyms1.o -+ @echo kallsyms pass 2 -+ @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms1.o -o $(TMPPREFIX).tmp_vmlinux2 -+ @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux2 > $(TMPPREFIX).tmp_kallsyms2.o -+ @echo kallsyms pass 3 -+ @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms2.o -o $(TMPPREFIX).tmp_vmlinux3 -+ @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux3 > $(TMPPREFIX).tmp_kallsyms3.o -+endif -+ $(LD_VMLINUX) $(LD_VMLINUX_KALLSYMS) -o vmlinux - $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map -+ @rm -f $(TMPPREFIX).tmp_vmlinux* $(TMPPREFIX).tmp_kallsyms* - - symlinks: - rm -f include/asm -Index: linux-2.4.29/kernel/Makefile -=================================================================== ---- linux-2.4.29.orig/kernel/Makefile 2005-05-08 23:06:26.939052160 +0300 -+++ linux-2.4.29/kernel/Makefile 2005-05-08 23:07:11.223319928 +0300 -@@ -19,6 +19,7 @@ - obj-$(CONFIG_UID16) += uid16.o - obj-$(CONFIG_MODULES) += ksyms.o - obj-$(CONFIG_PM) += pm.o -+obj-$(CONFIG_KALLSYMS) += kallsyms.o - - ifneq ($(CONFIG_IA64),y) - # According to Alan Modra , the -fno-omit-frame-pointer is -Index: linux-2.4.29/kernel/ksyms.c -=================================================================== ---- linux-2.4.29.orig/kernel/ksyms.c 2005-05-08 23:07:10.878372368 +0300 -+++ linux-2.4.29/kernel/ksyms.c 2005-05-08 23:07:11.224319776 +0300 -@@ -59,6 +59,9 @@ - #ifdef CONFIG_KMOD - #include - #endif -+#ifdef CONFIG_KALLSYMS -+#include -+#endif - - extern void set_device_ro(kdev_t dev,int flag); - -@@ -87,6 +90,15 @@ - EXPORT_SYMBOL(inter_module_put); - EXPORT_SYMBOL(try_inc_mod_count); - -+#ifdef CONFIG_KALLSYMS -+extern const char __start___kallsyms[]; -+extern const char __stop___kallsyms[]; -+EXPORT_SYMBOL(__start___kallsyms); -+EXPORT_SYMBOL(__stop___kallsyms); -+ -+ -+#endif -+ - /* process memory management */ - EXPORT_SYMBOL(do_mmap_pgoff); - EXPORT_SYMBOL(do_munmap); -Index: linux-2.4.29/kernel/kallsyms.c -=================================================================== ---- linux-2.4.29.orig/kernel/kallsyms.c 2005-05-08 23:07:11.196324032 +0300 -+++ linux-2.4.29/kernel/kallsyms.c 2005-05-08 23:07:11.226319472 +0300 -@@ -0,0 +1,306 @@ -+/* An example of using kallsyms data in a kernel debugger. -+ -+ Copyright 2000 Keith Owens April 2000 -+ -+ This file is part of the Linux modutils. -+ -+ This program is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by the -+ Free Software Foundation; either version 2 of the License, or (at your -+ option) any later version. -+ -+ This program is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software Foundation, -+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ */ -+ -+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.1 2005/03/24 22:50:28 jacob Exp $" -+ -+/* -+ This code uses the list of all kernel and module symbols to :- -+ -+ * Find any non-stack symbol in a kernel or module. Symbols do -+ not have to be exported for debugging. -+ -+ * Convert an address to the module (or kernel) that owns it, the -+ section it is in and the nearest symbol. This finds all non-stack -+ symbols, not just exported ones. -+ -+ You need modutils >= 2.3.11 and a kernel with the kallsyms patch -+ which was compiled with CONFIG_KALLSYMS. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+/* These external symbols are only set on kernels compiled with -+ * CONFIG_KALLSYMS. -+ */ -+ -+extern const char __start___kallsyms[]; -+extern const char __stop___kallsyms[]; -+ -+static struct module **kallsyms_module_list; -+ -+static void kallsyms_get_module_list(void) -+{ -+ const struct kallsyms_header *ka_hdr; -+ const struct kallsyms_section *ka_sec; -+ const struct kallsyms_symbol *ka_sym; -+ const char *ka_str; -+ int i; -+ const char *p; -+ -+ if (__start___kallsyms >= __stop___kallsyms) -+ return; -+ ka_hdr = (struct kallsyms_header *)__start___kallsyms; -+ ka_sec = (struct kallsyms_section *) -+ ((char *)(ka_hdr) + ka_hdr->section_off); -+ ka_sym = (struct kallsyms_symbol *) -+ ((char *)(ka_hdr) + ka_hdr->symbol_off); -+ ka_str = -+ ((char *)(ka_hdr) + ka_hdr->string_off); -+ -+ for (i = 0; i < ka_hdr->symbols; kallsyms_next_sym(ka_hdr, ka_sym), ++i) { -+ p = ka_str + ka_sym->name_off; -+ if (strcmp(p, "module_list") == 0) { -+ if (ka_sym->symbol_addr) -+ kallsyms_module_list = (struct module **)(ka_sym->symbol_addr); -+ break; -+ } -+ } -+} -+ -+static inline void kallsyms_do_first_time(void) -+{ -+ static int first_time = 1; -+ if (first_time) -+ kallsyms_get_module_list(); -+ first_time = 0; -+} -+ -+/* A symbol can appear in more than one module. A token is used to -+ * restart the scan at the next module, set the token to 0 for the -+ * first scan of each symbol. -+ */ -+ -+int kallsyms_symbol_to_address( -+ const char *name, /* Name to lookup */ -+ unsigned long *token, /* Which module to start at */ -+ const char **mod_name, /* Set to module name */ -+ unsigned long *mod_start, /* Set to start address of module */ -+ unsigned long *mod_end, /* Set to end address of module */ -+ const char **sec_name, /* Set to section name */ -+ unsigned long *sec_start, /* Set to start address of section */ -+ unsigned long *sec_end, /* Set to end address of section */ -+ const char **sym_name, /* Set to full symbol name */ -+ unsigned long *sym_start, /* Set to start address of symbol */ -+ unsigned long *sym_end /* Set to end address of symbol */ -+ ) -+{ -+ const struct kallsyms_header *ka_hdr = NULL; /* stupid gcc */ -+ const struct kallsyms_section *ka_sec; -+ const struct kallsyms_symbol *ka_sym = NULL; -+ const char *ka_str = NULL; -+ const struct module *m; -+ int i = 0, l; -+ const char *p, *pt_R; -+ char *p2; -+ -+ kallsyms_do_first_time(); -+ if (!kallsyms_module_list) -+ return(0); -+ -+ /* Restart? */ -+ m = *kallsyms_module_list; -+ if (token && *token) { -+ for (; m; m = m->next) -+ if ((unsigned long)m == *token) -+ break; -+ if (m) -+ m = m->next; -+ } -+ -+ for (; m; m = m->next) { -+ if (!mod_member_present(m, kallsyms_start) || -+ !mod_member_present(m, kallsyms_end) || -+ m->kallsyms_start >= m->kallsyms_end) -+ continue; -+ ka_hdr = (struct kallsyms_header *)m->kallsyms_start; -+ ka_sym = (struct kallsyms_symbol *) -+ ((char *)(ka_hdr) + ka_hdr->symbol_off); -+ ka_str = -+ ((char *)(ka_hdr) + ka_hdr->string_off); -+ for (i = 0; i < ka_hdr->symbols; ++i, kallsyms_next_sym(ka_hdr, ka_sym)) { -+ p = ka_str + ka_sym->name_off; -+ if (strcmp(p, name) == 0) -+ break; -+ /* Unversioned requests match versioned names */ -+ if (!(pt_R = strstr(p, "_R"))) -+ continue; -+ l = strlen(pt_R); -+ if (l < 10) -+ continue; /* Not _R.*xxxxxxxx */ -+ (void)simple_strtoul(pt_R+l-8, &p2, 16); -+ if (*p2) -+ continue; /* Not _R.*xxxxxxxx */ -+ if (strncmp(p, name, pt_R-p) == 0) -+ break; /* Match with version */ -+ } -+ if (i < ka_hdr->symbols) -+ break; -+ } -+ -+ if (token) -+ *token = (unsigned long)m; -+ if (!m) -+ return(0); /* not found */ -+ -+ ka_sec = (const struct kallsyms_section *) -+ ((char *)ka_hdr + ka_hdr->section_off + ka_sym->section_off); -+ *mod_name = *(m->name) ? m->name : "kernel"; -+ *mod_start = ka_hdr->start; -+ *mod_end = ka_hdr->end; -+ *sec_name = ka_sec->name_off + ka_str; -+ *sec_start = ka_sec->start; -+ *sec_end = ka_sec->start + ka_sec->size; -+ *sym_name = ka_sym->name_off + ka_str; -+ *sym_start = ka_sym->symbol_addr; -+ if (i < ka_hdr->symbols-1) { -+ const struct kallsyms_symbol *ka_symn = ka_sym; -+ kallsyms_next_sym(ka_hdr, ka_symn); -+ *sym_end = ka_symn->symbol_addr; -+ } -+ else -+ *sym_end = *sec_end; -+ return(1); -+} -+ -+int kallsyms_address_to_symbol( -+ unsigned long address, /* Address to lookup */ -+ const char **mod_name, /* Set to module name */ -+ unsigned long *mod_start, /* Set to start address of module */ -+ unsigned long *mod_end, /* Set to end address of module */ -+ const char **sec_name, /* Set to section name */ -+ unsigned long *sec_start, /* Set to start address of section */ -+ unsigned long *sec_end, /* Set to end address of section */ -+ const char **sym_name, /* Set to full symbol name */ -+ unsigned long *sym_start, /* Set to start address of symbol */ -+ unsigned long *sym_end /* Set to end address of symbol */ -+ ) -+{ -+ const struct kallsyms_header *ka_hdr = NULL; /* stupid gcc */ -+ const struct kallsyms_section *ka_sec = NULL; -+ const struct kallsyms_symbol *ka_sym; -+ const char *ka_str; -+ const struct module *m; -+ int i; -+ unsigned long end; -+ -+ kallsyms_do_first_time(); -+ if (!kallsyms_module_list) -+ return(0); -+ -+ for (m = *kallsyms_module_list; m; m = m->next) { -+ if (!mod_member_present(m, kallsyms_start) || -+ !mod_member_present(m, kallsyms_end) || -+ m->kallsyms_start >= m->kallsyms_end) -+ continue; -+ ka_hdr = (struct kallsyms_header *)m->kallsyms_start; -+ ka_sec = (const struct kallsyms_section *) -+ ((char *)ka_hdr + ka_hdr->section_off); -+ /* Is the address in any section in this module? */ -+ for (i = 0; i < ka_hdr->sections; ++i, kallsyms_next_sec(ka_hdr, ka_sec)) { -+ if (ka_sec->start <= address && -+ (ka_sec->start + ka_sec->size) > address) -+ break; -+ } -+ if (i < ka_hdr->sections) -+ break; /* Found a matching section */ -+ } -+ -+ if (!m) -+ return(0); /* not found */ -+ -+ ka_sym = (struct kallsyms_symbol *) -+ ((char *)(ka_hdr) + ka_hdr->symbol_off); -+ ka_str = -+ ((char *)(ka_hdr) + ka_hdr->string_off); -+ *mod_name = *(m->name) ? m->name : "kernel"; -+ *mod_start = ka_hdr->start; -+ *mod_end = ka_hdr->end; -+ *sec_name = ka_sec->name_off + ka_str; -+ *sec_start = ka_sec->start; -+ *sec_end = ka_sec->start + ka_sec->size; -+ *sym_name = *sec_name; /* In case we find no matching symbol */ -+ *sym_start = *sec_start; -+ *sym_end = *sec_end; -+ -+ for (i = 0; i < ka_hdr->symbols; ++i, kallsyms_next_sym(ka_hdr, ka_sym)) { -+ if (ka_sym->symbol_addr > address) -+ continue; -+ if (i < ka_hdr->symbols-1) { -+ const struct kallsyms_symbol *ka_symn = ka_sym; -+ kallsyms_next_sym(ka_hdr, ka_symn); -+ end = ka_symn->symbol_addr; -+ } -+ else -+ end = *sec_end; -+ if (end <= address) -+ continue; -+ if ((char *)ka_hdr + ka_hdr->section_off + ka_sym->section_off -+ != (char *)ka_sec) -+ continue; /* wrong section */ -+ *sym_name = ka_str + ka_sym->name_off; -+ *sym_start = ka_sym->symbol_addr; -+ *sym_end = end; -+ break; -+ } -+ return(1); -+} -+ -+/* List all sections in all modules. The callback routine is invoked with -+ * token, module name, section name, section start, section end, section flags. -+ */ -+int kallsyms_sections(void *token, -+ int (*callback)(void *, const char *, const char *, ElfW(Addr), ElfW(Addr), ElfW(Word))) -+{ -+ const struct kallsyms_header *ka_hdr = NULL; /* stupid gcc */ -+ const struct kallsyms_section *ka_sec = NULL; -+ const char *ka_str; -+ const struct module *m; -+ int i; -+ -+ kallsyms_do_first_time(); -+ if (!kallsyms_module_list) -+ return(0); -+ -+ for (m = *kallsyms_module_list; m; m = m->next) { -+ if (!mod_member_present(m, kallsyms_start) || -+ !mod_member_present(m, kallsyms_end) || -+ m->kallsyms_start >= m->kallsyms_end) -+ continue; -+ ka_hdr = (struct kallsyms_header *)m->kallsyms_start; -+ ka_sec = (const struct kallsyms_section *) ((char *)ka_hdr + ka_hdr->section_off); -+ ka_str = ((char *)(ka_hdr) + ka_hdr->string_off); -+ for (i = 0; i < ka_hdr->sections; ++i, kallsyms_next_sec(ka_hdr, ka_sec)) { -+ if (callback( -+ token, -+ *(m->name) ? m->name : "kernel", -+ ka_sec->name_off + ka_str, -+ ka_sec->start, -+ ka_sec->start + ka_sec->size, -+ ka_sec->flags)) -+ return(0); -+ } -+ } -+ return(1); -+} -Index: linux-2.4.29/include/linux/kallsyms.h -=================================================================== ---- linux-2.4.29.orig/include/linux/kallsyms.h 2005-05-08 23:07:11.196324032 +0300 -+++ linux-2.4.29/include/linux/kallsyms.h 2005-05-08 23:08:04.316248576 +0300 -@@ -0,0 +1,141 @@ -+/* kallsyms headers -+ Copyright 2000 Keith Owens -+ -+ This file is part of the Linux modutils. It is exported to kernel -+ space so debuggers can access the kallsyms data. -+ -+ The kallsyms data contains all the non-stack symbols from a kernel -+ or a module. The kernel symbols are held between __start___kallsyms -+ and __stop___kallsyms. The symbols for a module are accessed via -+ the struct module chain which is based at module_list. -+ -+ This program is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by the -+ Free Software Foundation; either version 2 of the License, or (at your -+ option) any later version. -+ -+ This program is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software Foundation, -+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -+ */ -+ -+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.1 2005/03/24 22:50:28 jacob Exp $" -+ -+#ifndef MODUTILS_KALLSYMS_H -+#define MODUTILS_KALLSYMS_H 1 -+ -+/* Have to (re)define these ElfW entries here because external kallsyms -+ * code does not have access to modutils/include/obj.h. This code is -+ * included from user spaces tools (modutils) and kernel, they need -+ * different includes. -+ */ -+ -+#ifndef ELFCLASS32 -+#ifdef __KERNEL__ -+#include -+#else /* __KERNEL__ */ -+#include -+#endif /* __KERNEL__ */ -+#endif /* ELFCLASS32 */ -+ -+#ifndef ELFCLASSM -+#define ELFCLASSM ELF_CLASS -+#endif -+ -+#ifndef ElfW -+# if ELFCLASSM == ELFCLASS32 -+# define ElfW(x) Elf32_ ## x -+# define ELFW(x) ELF32_ ## x -+# else -+# define ElfW(x) Elf64_ ## x -+# define ELFW(x) ELF64_ ## x -+# endif -+#endif -+ -+/* Format of data in the kallsyms section. -+ * Most of the fields are small numbers but the total size and all -+ * offsets can be large so use the 32/64 bit types for these fields. -+ * -+ * Do not use sizeof() on these structures, modutils may be using extra -+ * fields. Instead use the size fields in the header to access the -+ * other bits of data. -+ */ -+ -+struct kallsyms_header { -+ int size; /* Size of this header */ -+ ElfW(Word) total_size; /* Total size of kallsyms data */ -+ int sections; /* Number of section entries */ -+ ElfW(Off) section_off; /* Offset to first section entry */ -+ int section_size; /* Size of one section entry */ -+ int symbols; /* Number of symbol entries */ -+ ElfW(Off) symbol_off; /* Offset to first symbol entry */ -+ int symbol_size; /* Size of one symbol entry */ -+ ElfW(Off) string_off; /* Offset to first string */ -+ ElfW(Addr) start; /* Start address of first section */ -+ ElfW(Addr) end; /* End address of last section */ -+}; -+ -+struct kallsyms_section { -+ ElfW(Addr) start; /* Start address of section */ -+ ElfW(Word) size; /* Size of this section */ -+ ElfW(Off) name_off; /* Offset to section name */ -+ ElfW(Word) flags; /* Flags from section */ -+}; -+ -+struct kallsyms_symbol { -+ ElfW(Off) section_off; /* Offset to section that owns this symbol */ -+ ElfW(Addr) symbol_addr; /* Address of symbol */ -+ ElfW(Off) name_off; /* Offset to symbol name */ -+}; -+ -+#define KALLSYMS_SEC_NAME "__kallsyms" -+#define KALLSYMS_IDX 2 /* obj_kallsyms creates kallsyms as section 2 */ -+ -+#define kallsyms_next_sec(h,s) \ -+ ((s) = (struct kallsyms_section *)((char *)(s) + (h)->section_size)) -+#define kallsyms_next_sym(h,s) \ -+ ((s) = (struct kallsyms_symbol *)((char *)(s) + (h)->symbol_size)) -+ -+int kallsyms_symbol_to_address( -+ const char *name, /* Name to lookup */ -+ unsigned long *token, /* Which module to start with */ -+ const char **mod_name, /* Set to module name or "kernel" */ -+ unsigned long *mod_start, /* Set to start address of module */ -+ unsigned long *mod_end, /* Set to end address of module */ -+ const char **sec_name, /* Set to section name */ -+ unsigned long *sec_start, /* Set to start address of section */ -+ unsigned long *sec_end, /* Set to end address of section */ -+ const char **sym_name, /* Set to full symbol name */ -+ unsigned long *sym_start, /* Set to start address of symbol */ -+ unsigned long *sym_end /* Set to end address of symbol */ -+ ); -+ -+int kallsyms_address_to_symbol( -+ unsigned long address, /* Address to lookup */ -+ const char **mod_name, /* Set to module name */ -+ unsigned long *mod_start, /* Set to start address of module */ -+ unsigned long *mod_end, /* Set to end address of module */ -+ const char **sec_name, /* Set to section name */ -+ unsigned long *sec_start, /* Set to start address of section */ -+ unsigned long *sec_end, /* Set to end address of section */ -+ const char **sym_name, /* Set to full symbol name */ -+ unsigned long *sym_start, /* Set to start address of symbol */ -+ unsigned long *sym_end /* Set to end address of symbol */ -+ ); -+ -+int kallsyms_sections(void *token, -+ int (*callback)(void *, /* token */ -+ const char *, /* module name */ -+ const char *, /* section name */ -+ ElfW(Addr), /* Section start */ -+ ElfW(Addr), /* Section end */ -+ ElfW(Word) /* Section flags */ -+ ) -+ ); -+ -+#endif /* kallsyms.h */ -Index: linux-2.4.29/arch/i386/vmlinux.lds.S -=================================================================== ---- linux-2.4.29.orig/arch/i386/vmlinux.lds.S 2005-05-08 23:07:09.948513728 +0300 -+++ linux-2.4.29/arch/i386/vmlinux.lds.S 2005-05-08 23:14:24.128508336 +0300 -@@ -28,6 +28,10 @@ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - -+ __start___kallsyms = .; /* All kernel symbols */ -+ __kallsyms : { *(__kallsyms) } -+ __stop___kallsyms = .; -+ - .data : { /* Data */ - *(.data) - CONSTRUCTORS diff --git a/lustre/kernel_patches/patches/kjournald_affinity.patch b/lustre/kernel_patches/patches/kjournald_affinity.patch deleted file mode 100644 index 977676b..0000000 --- a/lustre/kernel_patches/patches/kjournald_affinity.patch +++ /dev/null @@ -1,52 +0,0 @@ ---- linux-2.6.10.orig/fs/ext3/super.c 2004-12-24 22:35:28.000000000 +0100 -+++ linux-2.6.10/fs/ext3/super.c 2005-01-18 12:27:51.896529310 +0100 -@@ -1787,6 +1787,33 @@ - return NULL; - } - -+#ifdef CONFIG_NUMA -+#include -+#include -+ -+static int journal_node_affinity = 0; -+spinlock_t journal_node_affinity_lock = SPIN_LOCK_UNLOCKED; -+ -+static void ext3_bind_journal_thread(journal_t *journal) { -+ int i, node; -+ -+ spin_lock(&journal_node_affinity_lock); -+ for (i = 0; i < MAX_NUMNODES; i++) { -+ node = (journal_node_affinity + i) % MAX_NUMNODES; -+ -+ if (!nr_cpus_node(node) || !node_online(node)) -+ continue; -+ -+ set_cpus_allowed(journal->j_task, node_to_cpumask(node)); -+ journal_node_affinity = (node + 1) % MAX_NUMNODES; -+ break; -+ } -+ spin_unlock(&journal_node_affinity_lock); -+} -+#else -+#define ext3_bind_journal_thread(journal) do {} while (0) -+#endif -+ - static int ext3_load_journal(struct super_block * sb, - struct ext3_super_block * es) - { -@@ -1852,6 +1875,7 @@ - return err; - } - -+ ext3_bind_journal_thread(journal); - EXT3_SB(sb)->s_journal = journal; - ext3_clear_journal_err(sb, es); - return 0; -@@ -1881,6 +1908,7 @@ - return -EIO; - } - -+ ext3_bind_journal_thread(journal); - EXT3_SB(sb)->s_journal = journal; - - ext3_update_dynamic_rev(sb); diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch deleted file mode 100644 index 3c156e8..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch +++ /dev/null @@ -1,4875 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/ia64/defconfig | 7 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 9 - fs/ext3/ext3-exports.c | 13 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 35 + - 62 files changed, 4343 insertions(+), 182 deletions(-) - ---- linux/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:23 2003 -+++ linux-mmonroe/Documentation/Configure.help Fri May 16 08:43:00 2003 -@@ -15309,6 +15309,39 @@ CONFIG_EXT2_FS - be compiled as a module, and so this could be dangerous. Most - everyone wants to say Y here. - -+Ext2 extended attributes -+CONFIG_EXT2_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext2 extended attribute block sharing -+CONFIG_EXT2_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext2 extended user attributes -+CONFIG_EXT2_FS_XATTR_USER -+ This option enables extended user attributes on ext2. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext2 trusted extended attributes -+CONFIG_EXT2_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext2 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Ext3 journalling file system support (EXPERIMENTAL) - CONFIG_EXT3_FS - This is the journalling version of the Second extended file system -@@ -15341,6 +15374,39 @@ CONFIG_EXT3_FS - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this may be dangerous. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is ---- linux/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/ia64/defconfig Fri May 16 08:43:00 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options ---- linux/fs/Config.in~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/fs/Config.in Fri May 16 08:43:01 2003 -@@ -35,6 +35,11 @@ dep_mbool ' Debug Befs' CONFIG_BEFS_DEB - dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL - - tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS -+dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS -+dep_bool ' Ext3 extended attribute block sharing' \ -+ CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR -+dep_bool ' Ext3 extended user attributes' \ -+ CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR - # CONFIG_JBD could be its own option (even modular), but until there are - # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS - # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS -@@ -98,6 +103,11 @@ dep_mbool ' QNX4FS write support (DANGE - tristate 'ROM file system support' CONFIG_ROMFS_FS - - tristate 'Second extended fs support' CONFIG_EXT2_FS -+dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS -+dep_bool ' Ext2 extended attribute block sharing' \ -+ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR -+dep_bool ' Ext2 extended user attributes' \ -+ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR - - tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS - -@@ -176,6 +186,10 @@ else - define_tristate CONFIG_ZISOFS_FS n - fi - -+# Meta block cache for Extended Attributes (ext2/ext3) -+#tristate 'Meta block cache' CONFIG_FS_MBCACHE -+define_tristate CONFIG_FS_MBCACHE y -+ - mainmenu_option next_comment - comment 'Partition Types' - source fs/partitions/Config.in ---- linux/fs/Makefile~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/Makefile Fri May 16 08:43:01 2003 -@@ -80,6 +80,9 @@ obj-y += binfmt_script.o - - obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o - -+export-objs += mbcache.o -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o -+ - # persistent filesystems - obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) - ---- linux/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-hp Thu Oct 11 08:05:18 2001 -+++ linux-mmonroe/fs/ext2/Makefile Fri May 16 08:43:01 2003 -@@ -13,4 +13,8 @@ obj-y := balloc.o bitmap.o dir.o file - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-hp Thu Oct 11 08:05:18 2001 -+++ linux-mmonroe/fs/ext2/file.c Fri May 16 08:43:01 2003 -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - - /* -@@ -51,4 +52,8 @@ struct file_operations ext2_file_operati - - struct inode_operations ext2_file_inode_operations = { - truncate: ext2_truncate, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext2/ialloc.c Fri May 16 08:43:01 2003 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -167,6 +168,7 @@ void ext2_free_inode (struct inode * ino - */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ -+ ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } ---- linux/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext2/inode.c Fri May 16 08:43:01 2003 -@@ -39,6 +39,18 @@ MODULE_LICENSE("GPL"); - static int ext2_update_inode(struct inode * inode, int do_sync); - - /* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext2_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext2_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ -+/* - * Called at each iput() - */ - void ext2_put_inode (struct inode * inode) -@@ -53,9 +65,7 @@ void ext2_delete_inode (struct inode * i - { - lock_kernel(); - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - inode->u.ext2_i.i_dtime = CURRENT_TIME; - mark_inode_dirty(inode); -@@ -801,6 +811,8 @@ void ext2_truncate (struct inode * inode - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext2_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -888,8 +900,7 @@ void ext2_read_inode (struct inode * ino - unsigned long offset; - struct ext2_group_desc * gdp; - -- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && -- inode->i_ino != EXT2_ACL_DATA_INO && -+ if ((inode->i_ino != EXT2_ROOT_INO && - inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { - ext2_error (inode->i_sb, "ext2_read_inode", -@@ -974,10 +985,7 @@ void ext2_read_inode (struct inode * ino - for (block = 0; block < EXT2_N_BLOCKS; block++) - inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; - -- if (inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; -@@ -986,15 +994,17 @@ void ext2_read_inode (struct inode * ino - inode->i_fop = &ext2_dir_operations; - inode->i_mapping->a_ops = &ext2_aops; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext2_inode_is_fast_symlink(inode)) - inode->i_op = &ext2_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - } -- } else -+ } else { -+ inode->i_op = &ext2_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(raw_inode->i_block[0])); -+ } - brelse (bh); - inode->i_attr_flags = 0; - if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) { ---- linux/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-hp Wed Oct 3 22:57:36 2001 -+++ linux-mmonroe/fs/ext2/namei.c Fri May 16 08:43:01 2003 -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - - /* -@@ -136,7 +137,7 @@ static int ext2_symlink (struct inode * - - if (l > sizeof (inode->u.ext2_i.i_data)) { - /* slow symlink */ -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - err = block_symlink(inode, symname, l); - if (err) -@@ -345,4 +346,15 @@ struct inode_operations ext2_dir_inode_o - rmdir: ext2_rmdir, - mknod: ext2_mknod, - rename: ext2_rename, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ -+struct inode_operations ext2_special_inode_operations = { -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext2/super.c Fri May 16 08:43:01 2003 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -125,6 +126,7 @@ void ext2_put_super (struct super_block - int db_count; - int i; - -+ ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - struct ext2_super_block *es = EXT2_SB(sb)->s_es; - -@@ -175,6 +177,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -424,6 +433,9 @@ struct super_block * ext2_read_super (st - blocksize = BLOCK_SIZE; - - sb->u.ext2_sb.s_mount_opt = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ -+#endif - if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, - &sb->u.ext2_sb.s_mount_opt)) { - return NULL; -@@ -813,12 +825,27 @@ static DECLARE_FSTYPE_DEV(ext2_fs_type, - - static int __init init_ext2_fs(void) - { -- return register_filesystem(&ext2_fs_type); -+ int error = init_ext2_xattr(); -+ if (error) -+ return error; -+ error = init_ext2_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext2_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext2_xattr_user(); -+fail: -+ exit_ext2_xattr(); -+ return error; - } - - static void __exit exit_ext2_fs(void) - { - unregister_filesystem(&ext2_fs_type); -+ exit_ext2_xattr_user(); -+ exit_ext2_xattr(); - } - - EXPORT_NO_SYMBOLS; ---- linux/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-hp Wed Sep 27 13:41:33 2000 -+++ linux-mmonroe/fs/ext2/symlink.c Fri May 16 08:43:01 2003 -@@ -19,6 +19,7 @@ - - #include - #include -+#include - - static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -32,7 +33,20 @@ static int ext2_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext2_symlink_inode_operations = { -+ readlink: page_readlink, -+ follow_link: page_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ - struct inode_operations ext2_fast_symlink_inode_operations = { - readlink: ext2_readlink, - follow_link: ext2_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext2/xattr.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,1212 @@ -+/* -+ * linux/fs/ext2/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT2_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext2_xattr_register); -+EXPORT_SYMBOL(ext2_xattr_unregister); -+EXPORT_SYMBOL(ext2_xattr_get); -+EXPORT_SYMBOL(ext2_xattr_list); -+EXPORT_SYMBOL(ext2_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT2_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext2_xattr_set2(struct inode *, struct buffer_head *, -+ struct ext2_xattr_header *); -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+static int ext2_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext2_xattr_cache_find(struct inode *, -+ struct ext2_xattr_header *); -+static void ext2_xattr_cache_remove(struct buffer_head *); -+static void ext2_xattr_rehash(struct ext2_xattr_header *, -+ struct ext2_xattr_entry *); -+ -+static struct mb_cache *ext2_xattr_cache; -+ -+#else -+# define ext2_xattr_cache_insert(bh) 0 -+# define ext2_xattr_cache_find(inode, header) NULL -+# define ext2_xattr_cache_remove(bh) while(0) {} -+# define ext2_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext2_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext2_xattr_sem); -+ -+static inline int -+ext2_xattr_new_block(struct inode *inode, int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + -+ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext2_new_block(inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext2_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext2_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext2_xattr_free_block(struct inode * inode, unsigned long block) -+{ -+ ext2_free_blocks(inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext2_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext2_xattr_free_block(inode, block) \ -+ ext2_free_blocks(inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; -+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ if (!ext2_xattr_handlers[name_index-1]) { -+ ext2_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext2_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) -+{ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ ext2_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext2_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static struct ext2_xattr_handler * -+ext2_xattr_resolve_name(const char **name) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext2_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext2_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext2_handler_lock); -+ return handler; -+} -+ -+static inline struct ext2_xattr_handler * -+ext2_xattr_handler(int name_index) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ read_lock(&ext2_handler_lock); -+ handler = ext2_xattr_handlers[name_index-1]; -+ read_unlock(&ext2_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext2_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext2_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT2_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT2_I(inode)->i_file_acl) -+ return 0; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext2_xattr_update_super_block(struct super_block *sb) -+{ -+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT2_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext2_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_header *header = NULL; -+ struct ext2_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT2_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext2_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(sb, "ext2_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext2_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT2_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT2_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT2_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext2_xattr_cache_remove(bh); -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT2_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT2_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext2_xattr_set2(inode, bh, NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT2_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT2_XATTR_PAD, 0, -+ EXT2_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext2_xattr_rehash(header, here); -+ -+ error = ext2_xattr_set2(inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext2_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext2_xattr_set(): Update the file system. -+ */ -+static int -+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, -+ struct ext2_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext2_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext2_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext2_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT2_I(inode)->i_file_acl != 0; -+ int block = ext2_xattr_new_block(inode, &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+ ext2_xattr_free_block(inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext2_xattr_cache_insert(new_bh); -+ -+ ext2_xattr_update_super_block(sb); -+ } -+ mark_buffer_dirty(new_bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &new_bh); -+ wait_on_buffer(new_bh); -+ error = -EIO; -+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ if (IS_SYNC(inode)) { -+ error = ext2_sync_inode (inode); -+ if (error) -+ goto cleanup; -+ } else -+ mark_inode_dirty(inode); -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext2_xattr_free_block(inode, old_bh->b_blocknr); -+ mark_buffer_clean(old_bh); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext2_xattr_quota_free(inode); -+ mark_buffer_dirty(old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT2_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext2_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext2_xattr_cache_remove(bh); -+ ext2_xattr_free_block(inode, block); -+ bforget(bh); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ mark_buffer_dirty(bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &bh); -+ wait_on_buffer(bh); -+ } -+ ext2_xattr_quota_free(inode); -+ } -+ EXT2_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext2_xattr_sem); -+} -+ -+/* -+ * ext2_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+/* -+ * ext2_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext2_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext2_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext2_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext2_xattr_cmp(struct ext2_xattr_header *header1, -+ struct ext2_xattr_header *header2) -+{ -+ struct ext2_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT2_XATTR_NEXT(entry1); -+ entry2 = EXT2_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext2_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT2_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT2_XATTR_REFCOUNT_MAX); -+ } else if (!ext2_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext2_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext2_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext2_xattr_rehash(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ struct ext2_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext2_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT2_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext2_xattr(void) -+{ -+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext2_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+ mb_cache_destroy(ext2_xattr_cache); -+} -+ -+#else /* CONFIG_EXT2_FS_XATTR_SHARING */ -+ -+int __init -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext2/xattr_user.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,103 @@ -+/* -+ * linux/fs/ext2/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext2_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext2_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext2_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct ext2_xattr_handler ext2_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext2_xattr_user_list, -+ get: ext2_xattr_user_get, -+ set: ext2_xattr_user_set, -+}; -+ -+int __init -+init_ext2_xattr_user(void) -+{ -+ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -+ -+void -+exit_ext2_xattr_user(void) -+{ -+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} ---- linux/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/ext3/Makefile Fri May 16 08:43:01 2003 -@@ -1,5 +1,5 @@ - # --# Makefile for the linux ext2-filesystem routines. -+# Makefile for the linux ext3-filesystem routines. - # - # Note! Dependencies are done automagically by 'make dep', which also - # removes any old dependencies. DON'T put your own dependencies here -@@ -9,10 +9,13 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -+obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/ext3/file.c Fri May 16 08:43:01 2003 -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -126,5 +127,9 @@ struct file_operations ext3_file_operati - struct inode_operations ext3_file_inode_operations = { - truncate: ext3_truncate, /* BKL held */ - setattr: ext3_setattr, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; - ---- linux/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext3/ialloc.c Fri May 16 08:43:01 2003 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - ---- linux/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext3/inode.c Fri May 16 08:43:01 2003 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext3_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1855,6 +1865,8 @@ void ext3_truncate(struct inode * inode) - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2002,8 +2014,6 @@ int ext3_get_inode_loc (struct inode *in - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2130,10 +2140,7 @@ void ext3_read_inode(struct inode * inod - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2141,15 +2148,17 @@ void ext3_read_inode(struct inode * inod - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - } -- } else -+ } else { -+ inode->i_op = &ext3_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); -+ } - /* inode->i_attr_flags = 0; unused */ - if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ ---- linux/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:47 2003 -+++ linux-mmonroe/fs/ext3/namei.c Fri May 16 08:43:01 2003 -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1611,7 +1612,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1619,7 +1620,6 @@ static int ext3_mkdir(struct inode * dir - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1646,9 +1646,6 @@ static int ext3_mkdir(struct inode * dir - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { -@@ -2017,7 +2014,7 @@ static int ext3_symlink (struct inode * - goto out_stop; - - if (l > sizeof (EXT3_I(inode)->i_data)) { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* - * block_symlink() calls back into ext3_prepare/commit_write. -@@ -2244,4 +2241,16 @@ struct inode_operations ext3_dir_inode_o - rmdir: ext3_rmdir, /* BKL held */ - mknod: ext3_mknod, /* BKL held */ - rename: ext3_rename, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -+ -+struct inode_operations ext3_special_inode_operations = { -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ ---- linux/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/ext3/super.c Fri May 16 08:43:01 2003 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -502,6 +504,7 @@ static int parse_options (char * options - int is_remount) - { - unsigned long *mount_options = &sbi->s_mount_opt; -+ - uid_t *resuid = &sbi->s_resuid; - gid_t *resgid = &sbi->s_resgid; - char * this_char; -@@ -514,6 +517,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -931,6 +941,12 @@ struct super_block * ext3_read_super (st - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; -+ -+ /* Default extended attribute flags */ -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ /* set_opt(sbi->s_mount_opt, XATTR_USER); */ -+#endif -+ - if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { - sb->s_dev = 0; - goto out_fail; -@@ -1768,17 +1784,29 @@ static DECLARE_FSTYPE_DEV(ext3_fs_type, - - static int __init init_ext3_fs(void) - { -- return register_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); -+ if (error) -+ return error; -+ error = init_ext3_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_xattr_user(); -+fail: -+ exit_ext3_xattr(); -+ return error; - } - - static void __exit exit_ext3_fs(void) - { - unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); ---- linux/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-hp Fri Nov 9 14:25:04 2001 -+++ linux-mmonroe/fs/ext3/symlink.c Fri May 16 08:43:01 2003 -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -33,7 +34,20 @@ static int ext3_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext3_symlink_inode_operations = { -+ readlink: page_readlink, /* BKL not held. Don't need */ -+ follow_link: page_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ - struct inode_operations ext3_fast_symlink_inode_operations = { - readlink: ext3_readlink, /* BKL not held. Don't need */ - follow_link: ext3_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext3/xattr.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,1225 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define EXT3_EA_USER "user." -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) while(0) {} -+# define ext3_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT3_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext3_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext3_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); -+ -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext3_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext3_xattr_sem); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext3/xattr_user.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,111 @@ -+/* -+ * linux/fs/ext3/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext3_xattr_user_list, -+ get: ext3_xattr_user_get, -+ set: ext3_xattr_user_set, -+}; -+ -+int __init -+init_ext3_xattr_user(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -+ -+void -+exit_ext3_xattr_user(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext3/ext3-exports.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); ---- linux/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/jfs/jfs_xattr.h Fri May 16 08:43:01 2003 -@@ -52,8 +52,10 @@ struct jfs_ea_list { - #define END_EALIST(ealist) \ - ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) - --extern int __jfs_setxattr(struct inode *, const char *, void *, size_t, int); --extern int jfs_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, -+ int); -+extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, -+ int); - extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); - extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); - extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); ---- linux/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/jfs/xattr.c Fri May 16 08:43:01 2003 -@@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s - } - - static int can_set_xattr(struct inode *inode, const char *name, -- void *value, size_t value_len) -+ const void *value, size_t value_len) - { - if (IS_RDONLY(inode)) - return -EROFS; -@@ -660,7 +660,7 @@ static int can_set_xattr(struct inode *i - return permission(inode, MAY_WRITE); - } - --int __jfs_setxattr(struct inode *inode, const char *name, void *value, -+int __jfs_setxattr(struct inode *inode, const char *name, const void *value, - size_t value_len, int flags) - { - struct jfs_ea_list *ealist; -@@ -799,7 +799,7 @@ int __jfs_setxattr(struct inode *inode, - return rc; - } - --int jfs_setxattr(struct dentry *dentry, const char *name, void *value, -+int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t value_len, int flags) - { - if (value == NULL) { /* empty EA, do not remove */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/mbcache.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,648 @@ -+/* -+ * linux/fs/mbcache.c -+ * (C) 2001-2002 Andreas Gruenbacher, -+ */ -+ -+/* -+ * Filesystem Meta Information Block Cache (mbcache) -+ * -+ * The mbcache caches blocks of block devices that need to be located -+ * by their device/block number, as well as by other criteria (such -+ * as the block's contents). -+ * -+ * There can only be one cache entry in a cache per device and block number. -+ * Additional indexes need not be unique in this sense. The number of -+ * additional indexes (=other criteria) can be hardwired at compile time -+ * or specified at cache create time. -+ * -+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' -+ * in the cache. A valid entry is in the main hash tables of the cache, -+ * and may also be in the lru list. An invalid entry is not in any hashes -+ * or lists. -+ * -+ * A valid cache entry is only in the lru list if no handles refer to it. -+ * Invalid cache entries will be freed when the last handle to the cache -+ * entry is released. Entries that cannot be freed immediately are put -+ * back on the lru list. -+ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+#ifdef MB_CACHE_DEBUG -+# define mb_debug(f...) do { \ -+ printk(KERN_DEBUG f); \ -+ printk("\n"); \ -+ } while (0) -+#define mb_assert(c) do { if (!(c)) \ -+ printk(KERN_ERR "assertion " #c " failed\n"); \ -+ } while(0) -+#else -+# define mb_debug(f...) do { } while(0) -+# define mb_assert(c) do { } while(0) -+#endif -+#define mb_error(f...) do { \ -+ printk(KERN_ERR f); \ -+ printk("\n"); \ -+ } while(0) -+ -+MODULE_AUTHOR("Andreas Gruenbacher "); -+MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+MODULE_LICENSE("GPL"); -+#endif -+ -+EXPORT_SYMBOL(mb_cache_create); -+EXPORT_SYMBOL(mb_cache_shrink); -+EXPORT_SYMBOL(mb_cache_destroy); -+EXPORT_SYMBOL(mb_cache_entry_alloc); -+EXPORT_SYMBOL(mb_cache_entry_insert); -+EXPORT_SYMBOL(mb_cache_entry_release); -+EXPORT_SYMBOL(mb_cache_entry_takeout); -+EXPORT_SYMBOL(mb_cache_entry_free); -+EXPORT_SYMBOL(mb_cache_entry_dup); -+EXPORT_SYMBOL(mb_cache_entry_get); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+EXPORT_SYMBOL(mb_cache_entry_find_first); -+EXPORT_SYMBOL(mb_cache_entry_find_next); -+#endif -+ -+ -+/* -+ * Global data: list of all mbcache's, lru list, and a spinlock for -+ * accessing cache data structures on SMP machines. The lru list is -+ * global across all mbcaches. -+ */ -+ -+static LIST_HEAD(mb_cache_list); -+static LIST_HEAD(mb_cache_lru_list); -+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; -+ -+static inline int -+mb_cache_indexes(struct mb_cache *cache) -+{ -+#ifdef MB_CACHE_INDEXES_COUNT -+ return MB_CACHE_INDEXES_COUNT; -+#else -+ return cache->c_indexes_count; -+#endif -+} -+ -+/* -+ * What the mbcache registers as to get shrunk dynamically. -+ */ -+ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask); -+ -+static struct cache_definition mb_cache_definition = { -+ "mb_cache", -+ mb_cache_memory_pressure -+}; -+ -+ -+static inline int -+__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) -+{ -+ return !list_empty(&ce->e_block_list); -+} -+ -+ -+static inline void -+__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+{ -+ int n; -+ -+ if (__mb_cache_entry_is_hashed(ce)) { -+ list_del_init(&ce->e_block_list); -+ for (n=0; ne_cache); n++) -+ list_del(&ce->e_indexes[n].o_list); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ -+ mb_assert(atomic_read(&ce->e_used) == 0); -+ if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { -+ /* free failed -- put back on the lru list -+ for freeing later. */ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&ce->e_lru_list, &mb_cache_lru_list); -+ spin_unlock(&mb_cache_spinlock); -+ } else { -+ kmem_cache_free(cache->c_entry_cache, ce); -+ atomic_dec(&cache->c_entry_count); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -+{ -+ if (atomic_dec_and_test(&ce->e_used)) { -+ if (__mb_cache_entry_is_hashed(ce)) -+ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ else { -+ spin_unlock(&mb_cache_spinlock); -+ __mb_cache_entry_forget(ce, GFP_KERNEL); -+ return; -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_memory_pressure() memory pressure callback -+ * -+ * This function is called by the kernel memory management when memory -+ * gets low. -+ * -+ * @priority: Amount by which to shrink the cache (0 = highes priority) -+ * @gfp_mask: (ignored) -+ */ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int count = 0; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &mb_cache_list) { -+ struct mb_cache *cache = -+ list_entry(l, struct mb_cache, c_cache_list); -+ mb_debug("cache %s (%d)", cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ count += atomic_read(&cache->c_entry_count); -+ } -+ mb_debug("trying to free %d of %d entries", -+ count / (priority ? priority : 1), count); -+ if (priority) -+ count /= priority; -+ while (count-- && !list_empty(&mb_cache_lru_list)) { -+ struct mb_cache_entry *ce = -+ list_entry(mb_cache_lru_list.next, -+ struct mb_cache_entry, e_lru_list); -+ list_del(&ce->e_lru_list); -+ __mb_cache_entry_unhash(ce); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), gfp_mask); -+ } -+} -+ -+ -+/* -+ * mb_cache_create() create a new cache -+ * -+ * All entries in one cache are equal size. Cache entries may be from -+ * multiple devices. If this is the first mbcache created, registers -+ * the cache with kernel memory management. Returns NULL if no more -+ * memory was available. -+ * -+ * @name: name of the cache (informal) -+ * @cache_op: contains the callback called when freeing a cache entry -+ * @entry_size: The size of a cache entry, including -+ * struct mb_cache_entry -+ * @indexes_count: number of additional indexes in the cache. Must equal -+ * MB_CACHE_INDEXES_COUNT if the number of indexes is -+ * hardwired. -+ * @bucket_count: number of hash buckets -+ */ -+struct mb_cache * -+mb_cache_create(const char *name, struct mb_cache_op *cache_op, -+ size_t entry_size, int indexes_count, int bucket_count) -+{ -+ int m=0, n; -+ struct mb_cache *cache = NULL; -+ -+ if(entry_size < sizeof(struct mb_cache_entry) + -+ indexes_count * sizeof(struct mb_cache_entry_index)) -+ return NULL; -+ -+ MOD_INC_USE_COUNT; -+ cache = kmalloc(sizeof(struct mb_cache) + -+ indexes_count * sizeof(struct list_head), GFP_KERNEL); -+ if (!cache) -+ goto fail; -+ cache->c_name = name; -+ cache->c_op.free = NULL; -+ if (cache_op) -+ cache->c_op.free = cache_op->free; -+ atomic_set(&cache->c_entry_count, 0); -+ cache->c_bucket_count = bucket_count; -+#ifdef MB_CACHE_INDEXES_COUNT -+ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -+#else -+ cache->c_indexes_count = indexes_count; -+#endif -+ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_block_hash) -+ goto fail; -+ for (n=0; nc_block_hash[n]); -+ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * -+ sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_indexes_hash[m]) -+ goto fail; -+ for (n=0; nc_indexes_hash[m][n]); -+ } -+ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, -+ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); -+ if (!cache->c_entry_cache) -+ goto fail; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&cache->c_cache_list, &mb_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ return cache; -+ -+fail: -+ if (cache) { -+ while (--m >= 0) -+ kfree(cache->c_indexes_hash[m]); -+ if (cache->c_block_hash) -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ } -+ MOD_DEC_USE_COUNT; -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_shrink() -+ * -+ * Removes all cache entires of a device from the cache. All cache entries -+ * currently in use cannot be freed, and thus remain in the cache. -+ * -+ * @cache: which cache to shrink -+ * @dev: which device's cache entries to shrink -+ */ -+void -+mb_cache_shrink(struct mb_cache *cache, kdev_t dev) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_dev == dev) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+} -+ -+ -+/* -+ * mb_cache_destroy() -+ * -+ * Shrinks the cache to its minimum possible size (hopefully 0 entries), -+ * and then destroys it. If this was the last mbcache, un-registers the -+ * mbcache from kernel memory management. -+ */ -+void -+mb_cache_destroy(struct mb_cache *cache) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_cache == cache) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ list_del(&cache->c_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+ -+ if (atomic_read(&cache->c_entry_count) > 0) { -+ mb_error("cache %s: %d orphaned entries", -+ cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ } -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) -+ /* We don't have kmem_cache_destroy() in 2.2.x */ -+ kmem_cache_shrink(cache->c_entry_cache); -+#else -+ kmem_cache_destroy(cache->c_entry_cache); -+#endif -+ for (n=0; n < mb_cache_indexes(cache); n++) -+ kfree(cache->c_indexes_hash[n]); -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+ -+/* -+ * mb_cache_entry_alloc() -+ * -+ * Allocates a new cache entry. The new entry will not be valid initially, -+ * and thus cannot be looked up yet. It should be filled with data, and -+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL -+ * if no more memory was available. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_alloc(struct mb_cache *cache) -+{ -+ struct mb_cache_entry *ce; -+ -+ atomic_inc(&cache->c_entry_count); -+ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); -+ if (ce) { -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_LIST_HEAD(&ce->e_block_list); -+ ce->e_cache = cache; -+ atomic_set(&ce->e_used, 1); -+ } -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_insert() -+ * -+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into -+ * the cache. After this, the cache entry can be looked up, but is not yet -+ * in the lru list as the caller still holds a handle to it. Returns 0 on -+ * success, or -EBUSY if a cache entry for that device + inode exists -+ * already (this may happen after a failed lookup, if another process has -+ * inserted the same cache entry in the meantime). -+ * -+ * @dev: device the cache entry belongs to -+ * @block: block number -+ * @keys: array of additional keys. There must be indexes_count entries -+ * in the array (as specified when creating the cache). -+ */ -+int -+mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev, -+ unsigned long block, unsigned int keys[]) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ int error = -EBUSY, n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) -+ goto out; -+ } -+ __mb_cache_entry_unhash(ce); -+ ce->e_dev = dev; -+ ce->e_block = block; -+ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ for (n=0; ne_indexes[n].o_key = keys[n]; -+ bucket = keys[n] % cache->c_bucket_count; -+ list_add(&ce->e_indexes[n].o_list, -+ &cache->c_indexes_hash[n][bucket]); -+ } -+out: -+ spin_unlock(&mb_cache_spinlock); -+ return error; -+} -+ -+ -+/* -+ * mb_cache_entry_release() -+ * -+ * Release a handle to a cache entry. When the last handle to a cache entry -+ * is released it is either freed (if it is invalid) or otherwise inserted -+ * in to the lru list. -+ */ -+void -+mb_cache_entry_release(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_takeout() -+ * -+ * Take a cache entry out of the cache, making it invalid. The entry can later -+ * be re-inserted using mb_cache_entry_insert(), or released using -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_takeout(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_entry_free() -+ * -+ * This is equivalent to the sequence mb_cache_entry_takeout() -- -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_free(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_dup() -+ * -+ * Duplicate a handle to a cache entry (does not duplicate the cache entry -+ * itself). After the call, both the old and the new handle must be released. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_dup(struct mb_cache_entry *ce) -+{ -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_get() -+ * -+ * Get a cache entry by device / block number. (There can only be one entry -+ * in the cache per device and block.) Returns NULL if no such cache entry -+ * exists. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block) -+{ -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ goto cleanup; -+ } -+ } -+ ce = NULL; -+ -+cleanup: -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+ -+static struct mb_cache_entry * -+__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+ int index, kdev_t dev, unsigned int key) -+{ -+ while (l != head) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, -+ e_indexes[index].o_list); -+ if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ return ce; -+ } -+ l = l->next; -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_entry_find_first() -+ * -+ * Find the first cache entry on a given device with a certain key in -+ * an additional index. Additonal matches can be found with -+ * mb_cache_entry_find_next(). Returns NULL if no match was found. -+ * -+ * @cache: the cache to search -+ * @index: the number of the additonal index to search (0<=indexc_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = cache->c_indexes_hash[index][bucket].next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_find_next() -+ * -+ * Find the next cache entry on a given device with a certain key in an -+ * additional index. Returns NULL if no match could be found. The previous -+ * entry is atomatically released, so that mb_cache_entry_find_next() can -+ * be called like this: -+ * -+ * entry = mb_cache_entry_find_first(); -+ * while (entry) { -+ * ... -+ * entry = mb_cache_entry_find_next(entry, ...); -+ * } -+ * -+ * @prev: The previous match -+ * @index: the number of the additonal index to search (0<=indexe_cache; -+ unsigned int bucket = key % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = prev->e_indexes[index].o_list.next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ __mb_cache_entry_release_unlock(prev); -+ return ce; -+} -+ -+#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ -+ -+static int __init init_mbcache(void) -+{ -+ register_cache(&mb_cache_definition); -+ return 0; -+} -+ -+static void __exit exit_mbcache(void) -+{ -+ unregister_cache(&mb_cache_definition); -+} -+ -+module_init(init_mbcache) -+module_exit(exit_mbcache) -+ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/cache_def.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,15 @@ -+/* -+ * linux/cache_def.h -+ * Handling of caches defined in drivers, filesystems, ... -+ * -+ * Copyright (C) 2002 by Andreas Gruenbacher, -+ */ -+ -+struct cache_definition { -+ const char *name; -+ void (*shrink)(int, unsigned int); -+ struct list_head link; -+}; -+ -+extern void register_cache(struct cache_definition *); -+extern void unregister_cache(struct cache_definition *); ---- linux/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-hp Fri Feb 9 14:46:13 2001 -+++ linux-mmonroe/include/linux/errno.h Fri May 16 08:43:01 2003 -@@ -23,4 +23,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif ---- linux/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-hp Thu Nov 22 11:46:52 2001 -+++ linux-mmonroe/include/linux/ext2_fs.h Fri May 16 08:43:01 2003 -@@ -57,8 +57,6 @@ - */ - #define EXT2_BAD_INO 1 /* Bad blocks inode */ - #define EXT2_ROOT_INO 2 /* Root inode */ --#define EXT2_ACL_IDX_INO 3 /* ACL inode */ --#define EXT2_ACL_DATA_INO 4 /* ACL inode */ - #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ - -@@ -86,7 +84,6 @@ - #else - # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) - #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -121,28 +118,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext2_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext2_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext2_group_desc -@@ -314,6 +289,7 @@ struct ext2_inode { - #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ - #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ - #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ -+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt - #define set_opt(o, opt) o |= EXT2_MOUNT_##opt -@@ -397,6 +373,7 @@ struct ext2_super_block { - - #ifdef __KERNEL__ - #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) -+#define EXT2_I(inode) (&((inode)->u.ext2_i)) - #else - /* Assume that user mode programs are passing in an ext2fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test -@@ -466,7 +443,7 @@ struct ext2_super_block { - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 - #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff - --#define EXT2_FEATURE_COMPAT_SUPP 0 -+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE - #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ -@@ -623,8 +600,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext2_dir_inode_operations; -+extern struct inode_operations ext2_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext2_symlink_inode_operations; - extern struct inode_operations ext2_fast_symlink_inode_operations; - - #endif /* __KERNEL__ */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/ext2_xattr.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext2_xattr.h -+ -+ On-disk format of extended attributes for the ext2 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT2_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT2_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT2_XATTR_INDEX_MAX 10 -+#define EXT2_XATTR_INDEX_USER 1 -+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext2_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext2_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT2_XATTR_PAD_BITS 2 -+#define EXT2_XATTR_PAD (1<e_name_len)) ) -+#define EXT2_XATTR_SIZE(size) \ -+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT2_FS_XATTR -+ -+struct ext2_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext2_xattr_register(int, struct ext2_xattr_handler *); -+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); -+ -+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); -+extern int ext2_removexattr(struct dentry *, const char *); -+ -+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext2_xattr_list(struct inode *, char *, size_t); -+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext2_xattr_delete_inode(struct inode *); -+extern void ext2_xattr_put_super(struct super_block *); -+ -+extern int init_ext2_xattr(void) __init; -+extern void exit_ext2_xattr(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR */ -+# define ext2_setxattr NULL -+# define ext2_getxattr NULL -+# define ext2_listxattr NULL -+# define ext2_removexattr NULL -+ -+static inline int -+ext2_xattr_get(struct inode *inode, int name_index, -+ const char *name, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+} -+ -+static inline void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR */ -+ -+# ifdef CONFIG_EXT2_FS_XATTR_USER -+ -+extern int init_ext2_xattr_user(void) __init; -+extern void exit_ext2_xattr_user(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+static inline int -+init_ext2_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr_user(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:47 2003 -+++ linux-mmonroe/include/linux/ext3_fs.h Fri May 16 08:43:01 2003 -@@ -63,8 +63,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -94,7 +92,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -129,28 +126,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -344,6 +319,7 @@ struct ext3_inode { - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -520,7 +496,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -@@ -771,8 +748,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext3_dir_inode_operations; -+extern struct inode_operations ext3_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - - ---- linux/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/include/linux/ext3_jbd.h Fri May 16 08:43:01 2003 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/ext3_xattr.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/include/linux/fs.h Fri May 16 08:43:01 2003 -@@ -909,7 +909,7 @@ struct inode_operations { - int (*setattr) (struct dentry *, struct iattr *); - int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); -- int (*setxattr) (struct dentry *, const char *, void *, size_t, int); -+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/mbcache.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,69 @@ -+/* -+ File: linux/mbcache.h -+ -+ (C) 2001 by Andreas Gruenbacher, -+*/ -+ -+/* Hardwire the number of additional indexes */ -+#define MB_CACHE_INDEXES_COUNT 1 -+ -+struct mb_cache_entry; -+ -+struct mb_cache_op { -+ int (*free)(struct mb_cache_entry *, int); -+}; -+ -+struct mb_cache { -+ struct list_head c_cache_list; -+ const char *c_name; -+ struct mb_cache_op c_op; -+ atomic_t c_entry_count; -+ int c_bucket_count; -+#ifndef MB_CACHE_INDEXES_COUNT -+ int c_indexes_count; -+#endif -+ kmem_cache_t *c_entry_cache; -+ struct list_head *c_block_hash; -+ struct list_head *c_indexes_hash[0]; -+}; -+ -+struct mb_cache_entry_index { -+ struct list_head o_list; -+ unsigned int o_key; -+}; -+ -+struct mb_cache_entry { -+ struct list_head e_lru_list; -+ struct mb_cache *e_cache; -+ atomic_t e_used; -+ kdev_t e_dev; -+ unsigned long e_block; -+ struct list_head e_block_list; -+ struct mb_cache_entry_index e_indexes[0]; -+}; -+ -+/* Functions on caches */ -+ -+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, -+ int, int); -+void mb_cache_shrink(struct mb_cache *, kdev_t); -+void mb_cache_destroy(struct mb_cache *); -+ -+/* Functions on cache entries */ -+ -+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); -+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long, -+ unsigned int[]); -+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); -+void mb_cache_entry_release(struct mb_cache_entry *); -+void mb_cache_entry_takeout(struct mb_cache_entry *); -+void mb_cache_entry_free(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t, -+ unsigned long); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, -+ kdev_t, unsigned int); -+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, -+ kdev_t, unsigned int); -+#endif ---- linux/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:45 2003 -+++ linux-mmonroe/kernel/ksyms.c Fri May 16 08:43:52 2003 -@@ -11,6 +11,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -106,6 +107,7 @@ EXPORT_SYMBOL(exit_mm); - EXPORT_SYMBOL(exit_files); - EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); -+EXPORT_SYMBOL(copy_fs_struct); - EXPORT_SYMBOL_GPL(make_pages_present); - - /* internal kernel memory management */ -@@ -126,6 +128,8 @@ EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_size); -+EXPORT_SYMBOL(register_cache); -+EXPORT_SYMBOL(unregister_cache); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); - EXPORT_SYMBOL(vfree); ---- linux/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:23 2003 -+++ linux-mmonroe/mm/vmscan.c Fri May 16 08:43:01 2003 -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -35,6 +36,39 @@ - */ - #define DEF_PRIORITY (6) - -+static DECLARE_MUTEX(other_caches_sem); -+static LIST_HEAD(cache_definitions); -+ -+void register_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_add(&cache->link, &cache_definitions); -+ up(&other_caches_sem); -+} -+ -+void unregister_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_del(&cache->link); -+ up(&other_caches_sem); -+} -+ -+static void shrink_other_caches(unsigned int priority, int gfp_mask) -+{ -+ struct list_head *p; -+ -+ if (down_trylock(&other_caches_sem)) -+ return; -+ -+ list_for_each_prev(p, &cache_definitions) { -+ struct cache_definition *cache = -+ list_entry(p, struct cache_definition, link); -+ -+ cache->shrink(priority, gfp_mask); -+ } -+ up(&other_caches_sem); -+} -+ - /* - * The swap-out function returns 1 if it successfully - * scanned all the pages it was asked to (`count'). -@@ -579,6 +613,7 @@ static int shrink_caches(zone_t * classz - - shrink_dcache_memory(priority, gfp_mask); - shrink_icache_memory(priority, gfp_mask); -+ shrink_other_caches(priority, gfp_mask); - #ifdef CONFIG_QUOTA - shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); - #endif - -_ diff --git a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch deleted file mode 100644 index 686b1ea..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-chaos.patch +++ /dev/null @@ -1,2172 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/alpha/defconfig | 7 - arch/alpha/kernel/entry.S | 12 - arch/arm/defconfig | 7 - arch/arm/kernel/calls.S | 24 - arch/i386/defconfig | 7 - arch/ia64/defconfig | 7 - arch/ia64/kernel/entry.S | 24 - arch/m68k/defconfig | 7 - arch/mips/defconfig | 7 - arch/mips64/defconfig | 7 - arch/ppc/defconfig | 14 - arch/ppc64/kernel/misc.S | 2 - arch/s390/defconfig | 7 - arch/s390/kernel/entry.S | 24 - arch/s390x/defconfig | 7 - arch/s390x/kernel/entry.S | 24 - arch/s390x/kernel/wrapper32.S | 92 +++ - arch/sparc/defconfig | 7 - arch/sparc/kernel/systbls.S | 10 - arch/sparc64/defconfig | 7 - arch/sparc64/kernel/systbls.S | 20 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 10 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/asm-arm/unistd.h | 2 - include/asm-ia64/unistd.h | 13 - include/asm-ppc64/unistd.h | 2 - include/asm-s390/unistd.h | 15 - include/asm-s390x/unistd.h | 15 - include/asm-sparc/unistd.h | 24 - include/asm-sparc64/unistd.h | 24 - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 35 + - fs/ext3/ext3-exports.c | 14 + - 64 files changed, 4355 insertions(+), 195 deletions(-) - -Index: linux-2.4.21-chaos/Documentation/Configure.help -=================================================================== ---- linux-2.4.21-chaos.orig/Documentation/Configure.help 2003-10-04 01:18:57.000000000 +0400 -+++ linux-2.4.21-chaos/Documentation/Configure.help 2003-12-14 15:11:46.000000000 +0300 -@@ -15939,6 +15939,39 @@ - be compiled as a module, and so this could be dangerous. Most - everyone wants to say Y here. - -+Ext2 extended attributes -+CONFIG_EXT2_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext2 extended attribute block sharing -+CONFIG_EXT2_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext2 extended user attributes -+CONFIG_EXT2_FS_XATTR_USER -+ This option enables extended user attributes on ext2. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext2 trusted extended attributes -+CONFIG_EXT2_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext2 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Ext3 journalling file system support (EXPERIMENTAL) - CONFIG_EXT3_FS - This is the journalling version of the Second extended file system -@@ -16008,6 +16041,39 @@ - - If unsure, say N. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is -Index: linux-2.4.21-chaos/fs/Config.in -=================================================================== ---- linux-2.4.21-chaos.orig/fs/Config.in 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/Config.in 2003-12-14 15:11:46.000000000 +0300 -@@ -108,6 +108,11 @@ - tristate 'ROM file system support' CONFIG_ROMFS_FS - - tristate 'Second extended fs support' CONFIG_EXT2_FS -+dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS -+dep_bool ' Ext2 extended attribute block sharing' \ -+ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR -+dep_bool ' Ext2 extended user attributes' \ -+ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR - - tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS - -@@ -196,6 +201,10 @@ - bool "POSIX ACL helper functions" CONFIG_FS_POSIX_ACL - fi - -+# Meta block cache for Extended Attributes (ext2/ext3) -+#tristate 'Meta block cache' CONFIG_FS_MBCACHE -+define_tristate CONFIG_FS_MBCACHE y -+ - mainmenu_option next_comment - comment 'Partition Types' - source fs/partitions/Config.in -Index: linux-2.4.21-chaos/fs/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/Makefile 2003-12-14 15:09:27.000000000 +0300 -+++ linux-2.4.21-chaos/fs/Makefile 2003-12-14 15:11:46.000000000 +0300 -@@ -95,6 +95,9 @@ - obj-$(CONFIG_NFS_ACL) += solaris_acl.o - obj-$(CONFIG_NFSD_ACL) += solaris_acl.o - -+export-objs += mbcache.o -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o -+ - # persistent filesystems - obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) - -Index: linux-2.4.21-chaos/fs/ext2/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/Makefile 2002-05-08 01:53:46.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext2/Makefile 2003-12-14 15:11:46.000000000 +0300 -@@ -13,4 +13,8 @@ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make -Index: linux-2.4.21-chaos/fs/ext2/file.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/file.c 2003-07-15 04:41:01.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext2/file.c 2003-12-14 15:11:46.000000000 +0300 -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - - /* -@@ -55,4 +56,8 @@ - - struct inode_operations ext2_file_inode_operations = { - truncate: ext2_truncate, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.21-chaos/fs/ext2/ialloc.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/ialloc.c 2003-07-15 02:09:35.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext2/ialloc.c 2003-12-14 15:11:46.000000000 +0300 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -167,6 +168,7 @@ - */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ -+ ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } -Index: linux-2.4.21-chaos/fs/ext2/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/inode.c 2003-07-15 04:41:01.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext2/inode.c 2003-12-14 15:11:46.000000000 +0300 -@@ -53,9 +65,7 @@ - { - lock_kernel(); - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - inode->u.ext2_i.i_dtime = CURRENT_TIME; - mark_inode_dirty(inode); -@@ -802,6 +812,8 @@ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext2_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -904,8 +916,7 @@ - unsigned long offset; - struct ext2_group_desc * gdp; - -- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && -- inode->i_ino != EXT2_ACL_DATA_INO && -+ if ((inode->i_ino != EXT2_ROOT_INO && - inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { - ext2_error (inode->i_sb, "ext2_read_inode", -@@ -990,10 +1001,7 @@ - for (block = 0; block < EXT2_N_BLOCKS; block++) - inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; - -- if (inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; -@@ -1002,12 +1010,14 @@ - if (ext2_inode_is_fast_symlink(inode)) - inode->i_op = &ext2_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - } -- } else -+ } else { -+ inode->i_op = &ext2_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(raw_inode->i_block[0])); -+ } - brelse (bh); - inode->i_attr_flags = 0; - ext2_set_inode_flags(inode); -Index: linux-2.4.21-chaos/fs/ext2/namei.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/namei.c 2002-05-08 01:53:46.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext2/namei.c 2003-12-14 15:11:46.000000000 +0300 -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - - /* -@@ -136,7 +137,7 @@ - - if (l > sizeof (inode->u.ext2_i.i_data)) { - /* slow symlink */ -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - err = block_symlink(inode, symname, l); - if (err) -@@ -345,4 +346,15 @@ - rmdir: ext2_rmdir, - mknod: ext2_mknod, - rename: ext2_rename, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ -+struct inode_operations ext2_special_inode_operations = { -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.21-chaos/fs/ext2/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/super.c 2003-02-15 01:59:09.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext2/super.c 2003-12-14 15:11:46.000000000 +0300 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -125,6 +126,7 @@ - int db_count; - int i; - -+ ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - struct ext2_super_block *es = EXT2_SB(sb)->s_es; - -@@ -175,6 +177,13 @@ - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -424,6 +433,9 @@ - blocksize = BLOCK_SIZE; - - sb->u.ext2_sb.s_mount_opt = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ -+#endif - if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, - &sb->u.ext2_sb.s_mount_opt)) { - return NULL; -@@ -813,12 +825,27 @@ - - static int __init init_ext2_fs(void) - { -- return register_filesystem(&ext2_fs_type); -+ int error = init_ext2_xattr(); -+ if (error) -+ return error; -+ error = init_ext2_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext2_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext2_xattr_user(); -+fail: -+ exit_ext2_xattr(); -+ return error; - } - - static void __exit exit_ext2_fs(void) - { - unregister_filesystem(&ext2_fs_type); -+ exit_ext2_xattr_user(); -+ exit_ext2_xattr(); - } - - EXPORT_NO_SYMBOLS; -Index: linux-2.4.21-chaos/fs/ext2/symlink.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/symlink.c 2002-05-08 01:53:46.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext2/symlink.c 2003-12-14 15:11:46.000000000 +0300 -@@ -19,6 +19,7 @@ - - #include - #include -+#include - - static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -32,7 +33,20 @@ - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext2_symlink_inode_operations = { -+ readlink: page_readlink, -+ follow_link: page_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ - struct inode_operations ext2_fast_symlink_inode_operations = { - readlink: ext2_readlink, - follow_link: ext2_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.21-chaos/fs/ext2/xattr.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/xattr.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext2/xattr.c 2003-12-14 15:11:46.000000000 +0300 -@@ -0,0 +1,1212 @@ -+/* -+ * linux/fs/ext2/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT2_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext2_xattr_register); -+EXPORT_SYMBOL(ext2_xattr_unregister); -+EXPORT_SYMBOL(ext2_xattr_get); -+EXPORT_SYMBOL(ext2_xattr_list); -+EXPORT_SYMBOL(ext2_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT2_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext2_xattr_set2(struct inode *, struct buffer_head *, -+ struct ext2_xattr_header *); -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+static int ext2_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext2_xattr_cache_find(struct inode *, -+ struct ext2_xattr_header *); -+static void ext2_xattr_cache_remove(struct buffer_head *); -+static void ext2_xattr_rehash(struct ext2_xattr_header *, -+ struct ext2_xattr_entry *); -+ -+static struct mb_cache *ext2_xattr_cache; -+ -+#else -+# define ext2_xattr_cache_insert(bh) 0 -+# define ext2_xattr_cache_find(inode, header) NULL -+# define ext2_xattr_cache_remove(bh) while(0) {} -+# define ext2_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext2_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext2_xattr_sem); -+ -+static inline int -+ext2_xattr_new_block(struct inode *inode, int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + -+ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext2_new_block(inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext2_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext2_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext2_xattr_free_block(struct inode * inode, unsigned long block) -+{ -+ ext2_free_blocks(inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext2_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext2_xattr_free_block(inode, block) \ -+ ext2_free_blocks(inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; -+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ if (!ext2_xattr_handlers[name_index-1]) { -+ ext2_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext2_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) -+{ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ ext2_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext2_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static struct ext2_xattr_handler * -+ext2_xattr_resolve_name(const char **name) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext2_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext2_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext2_handler_lock); -+ return handler; -+} -+ -+static inline struct ext2_xattr_handler * -+ext2_xattr_handler(int name_index) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ read_lock(&ext2_handler_lock); -+ handler = ext2_xattr_handlers[name_index-1]; -+ read_unlock(&ext2_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext2_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext2_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT2_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT2_I(inode)->i_file_acl) -+ return 0; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext2_xattr_update_super_block(struct super_block *sb) -+{ -+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT2_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext2_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_header *header = NULL; -+ struct ext2_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT2_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext2_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(sb, "ext2_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext2_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT2_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT2_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT2_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext2_xattr_cache_remove(bh); -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT2_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT2_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext2_xattr_set2(inode, bh, NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT2_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT2_XATTR_PAD, 0, -+ EXT2_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext2_xattr_rehash(header, here); -+ -+ error = ext2_xattr_set2(inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext2_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext2_xattr_set(): Update the file system. -+ */ -+static int -+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, -+ struct ext2_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext2_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext2_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext2_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT2_I(inode)->i_file_acl != 0; -+ int block = ext2_xattr_new_block(inode, &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+ ext2_xattr_free_block(inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext2_xattr_cache_insert(new_bh); -+ -+ ext2_xattr_update_super_block(sb); -+ } -+ mark_buffer_dirty(new_bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &new_bh); -+ wait_on_buffer(new_bh); -+ error = -EIO; -+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ if (IS_SYNC(inode)) { -+ error = ext2_sync_inode (inode); -+ if (error) -+ goto cleanup; -+ } else -+ mark_inode_dirty(inode); -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext2_xattr_free_block(inode, old_bh->b_blocknr); -+ mark_buffer_clean(old_bh); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext2_xattr_quota_free(inode); -+ mark_buffer_dirty(old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT2_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext2_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext2_xattr_cache_remove(bh); -+ ext2_xattr_free_block(inode, block); -+ bforget(bh); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ mark_buffer_dirty(bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &bh); -+ wait_on_buffer(bh); -+ } -+ ext2_xattr_quota_free(inode); -+ } -+ EXT2_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext2_xattr_sem); -+} -+ -+/* -+ * ext2_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+/* -+ * ext2_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext2_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext2_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext2_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext2_xattr_cmp(struct ext2_xattr_header *header1, -+ struct ext2_xattr_header *header2) -+{ -+ struct ext2_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT2_XATTR_NEXT(entry1); -+ entry2 = EXT2_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext2_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT2_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT2_XATTR_REFCOUNT_MAX); -+ } else if (!ext2_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext2_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext2_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext2_xattr_rehash(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ struct ext2_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext2_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT2_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext2_xattr(void) -+{ -+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext2_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+ mb_cache_destroy(ext2_xattr_cache); -+} -+ -+#else /* CONFIG_EXT2_FS_XATTR_SHARING */ -+ -+int __init -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ -Index: linux-2.4.21-chaos/fs/ext2/xattr_user.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext2/xattr_user.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext2/xattr_user.c 2003-12-14 15:11:46.000000000 +0300 -@@ -0,0 +1,103 @@ -+/* -+ * linux/fs/ext2/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext2_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext2_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext2_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct ext2_xattr_handler ext2_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext2_xattr_user_list, -+ get: ext2_xattr_user_get, -+ set: ext2_xattr_user_set, -+}; -+ -+int __init -+init_ext2_xattr_user(void) -+{ -+ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -+ -+void -+exit_ext2_xattr_user(void) -+{ -+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -Index: linux-2.4.21-chaos/fs/ext3/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/Makefile 2003-12-14 15:09:30.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/Makefile 2003-12-14 15:11:46.000000000 +0300 -@@ -9,10 +9,10 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2003-12-14 15:09:30.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2003-12-14 15:11:46.000000000 +0300 -@@ -1944,9 +1944,6 @@ - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); -Index: linux-2.4.21-chaos/fs/ext3/ext3-exports.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ext3-exports.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/ext3-exports.c 2003-12-14 15:11:46.000000000 +0300 -@@ -0,0 +1,14 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_journal_abort_handle); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); -Index: linux-2.4.21-chaos/fs/ext3/xattr_trusted.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr_trusted.c 2003-07-15 04:41:01.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/xattr_trusted.c 2003-12-14 15:11:46.000000000 +0300 -@@ -50,7 +50,7 @@ - return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; -- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, -+ return ext3_xattr_set_trans(inode, EXT3_XATTR_INDEX_TRUSTED, name, - value, size, flags); - } - -Index: linux-2.4.21-chaos/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr.c 2003-07-30 04:11:55.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/xattr.c 2003-12-14 15:11:46.000000000 +0300 -@@ -868,7 +868,7 @@ - } - - /* -- * ext3_xattr_set() -+ * ext3_xattr_set_trans() - * - * Like ext3_xattr_set_handle, but start from an inode. This extended - * attribute modification is a filesystem transaction by itself. -@@ -876,7 +876,7 @@ - * Returns 0, or a negative error number on failure. - */ - int --ext3_xattr_set(struct inode *inode, int name_index, const char *name, -+ext3_xattr_set_trans(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, int flags) - { - handle_t *handle; -@@ -895,6 +895,20 @@ - return error ? error : error2; - } - -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ int error; -+ -+ lock_kernel(); -+ error = ext3_xattr_set_handle(handle, inode, name_index, name, -+ value, value_len, flags); -+ unlock_kernel(); -+ return error; -+} -+ - /* - * ext3_xattr_delete_inode() - * -Index: linux-2.4.21-chaos/fs/ext3/xattr_user.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr_user.c 2003-07-30 04:11:55.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/xattr_user.c 2003-12-14 15:26:58.000000000 +0300 -@@ -70,7 +70,7 @@ - if (error) - return error; - -- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, -+ return ext3_xattr_set_trans(inode, EXT3_XATTR_INDEX_USER, name, - value, size, flags); - } - -Index: linux-2.4.21-chaos/include/linux/errno.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/errno.h 2003-07-15 04:41:12.000000000 +0400 -+++ linux-2.4.21-chaos/include/linux/errno.h 2003-12-14 15:11:46.000000000 +0300 -@@ -26,4 +26,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif -Index: linux-2.4.21-chaos/include/linux/ext2_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext2_fs.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext2_fs.h 2003-12-14 15:13:54.000000000 +0300 -@@ -57,8 +57,6 @@ - */ - #define EXT2_BAD_INO 1 /* Bad blocks inode */ - #define EXT2_ROOT_INO 2 /* Root inode */ --#define EXT2_ACL_IDX_INO 3 /* ACL inode */ --#define EXT2_ACL_DATA_INO 4 /* ACL inode */ - #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ - -@@ -86,7 +84,6 @@ - #else - # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) - #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -121,28 +118,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext2_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext2_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext2_group_desc -@@ -314,6 +289,7 @@ - #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ - #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ - #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ -+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt - #define set_opt(o, opt) o |= EXT2_MOUNT_##opt -@@ -397,6 +373,7 @@ - - #ifdef __KERNEL__ - #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) -+#define EXT2_I(inode) (&((inode)->u.ext2_i)) - #else - /* Assume that user mode programs are passing in an ext2fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test -@@ -466,7 +443,7 @@ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 - #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff - --#define EXT2_FEATURE_COMPAT_SUPP 0 -+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE - #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ -@@ -624,8 +601,10 @@ - - /* namei.c */ - extern struct inode_operations ext2_dir_inode_operations; -+extern struct inode_operations ext2_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext2_symlink_inode_operations; - extern struct inode_operations ext2_fast_symlink_inode_operations; - - #endif /* __KERNEL__ */ -Index: linux-2.4.21-chaos/include/linux/ext2_xattr.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext2_xattr.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext2_xattr.h 2003-12-14 15:13:54.000000000 +0300 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext2_xattr.h -+ -+ On-disk format of extended attributes for the ext2 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT2_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT2_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT2_XATTR_INDEX_MAX 10 -+#define EXT2_XATTR_INDEX_USER 1 -+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext2_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext2_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT2_XATTR_PAD_BITS 2 -+#define EXT2_XATTR_PAD (1<e_name_len)) ) -+#define EXT2_XATTR_SIZE(size) \ -+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT2_FS_XATTR -+ -+struct ext2_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext2_xattr_register(int, struct ext2_xattr_handler *); -+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); -+ -+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); -+extern int ext2_removexattr(struct dentry *, const char *); -+ -+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext2_xattr_list(struct inode *, char *, size_t); -+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext2_xattr_delete_inode(struct inode *); -+extern void ext2_xattr_put_super(struct super_block *); -+ -+extern int init_ext2_xattr(void) __init; -+extern void exit_ext2_xattr(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR */ -+# define ext2_setxattr NULL -+# define ext2_getxattr NULL -+# define ext2_listxattr NULL -+# define ext2_removexattr NULL -+ -+static inline int -+ext2_xattr_get(struct inode *inode, int name_index, -+ const char *name, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+} -+ -+static inline void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR */ -+ -+# ifdef CONFIG_EXT2_FS_XATTR_USER -+ -+extern int init_ext2_xattr_user(void) __init; -+extern void exit_ext2_xattr_user(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+static inline int -+init_ext2_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr_user(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ -Index: linux-2.4.21-chaos/include/linux/ext3_xattr.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_xattr.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_xattr.h 2003-12-14 15:21:13.000000000 +0300 -@@ -82,8 +82,10 @@ - extern int ext3_xattr_list(struct inode *, char *, size_t); - extern int ext3_xattr_set_handle(handle_t *handle, struct inode *, int, - const char *, const void *, size_t, int); --extern int ext3_xattr_set(struct inode *, int, const char *, const void *, -+extern int ext3_xattr_set_trans(struct inode *, int, const char *, const void *, - size_t, int); -+extern int ext3_xattr_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); - - extern void ext3_xattr_delete_inode(handle_t *, struct inode *); - extern void ext3_xattr_put_super(struct super_block *); diff --git a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse-171.patch b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse-171.patch deleted file mode 100644 index f203b15..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse-171.patch +++ /dev/null @@ -1,276 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/alpha/defconfig | 7 - arch/alpha/kernel/entry.S | 12 - arch/arm/defconfig | 7 - arch/arm/kernel/calls.S | 24 - arch/i386/defconfig | 7 - arch/ia64/defconfig | 7 - arch/ia64/kernel/entry.S | 24 - arch/m68k/defconfig | 7 - arch/mips/defconfig | 7 - arch/mips64/defconfig | 7 - arch/ppc/defconfig | 14 - arch/ppc64/kernel/misc.S | 2 - arch/s390/defconfig | 7 - arch/s390/kernel/entry.S | 24 - arch/s390x/defconfig | 7 - arch/s390x/kernel/entry.S | 24 - arch/s390x/kernel/wrapper32.S | 92 +++ - arch/sparc/defconfig | 7 - arch/sparc/kernel/systbls.S | 10 - arch/sparc64/defconfig | 7 - arch/sparc64/kernel/systbls.S | 20 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 10 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/asm-arm/unistd.h | 2 - include/asm-ia64/unistd.h | 13 - include/asm-ppc64/unistd.h | 2 - include/asm-s390/unistd.h | 15 - include/asm-s390x/unistd.h | 15 - include/asm-sparc/unistd.h | 24 - include/asm-sparc64/unistd.h | 24 - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 35 + - fs/ext3/ext3-exports.c | 14 + - 64 files changed, 4355 insertions(+), 195 deletions(-) - -Index: linux-2.4.21-241/Documentation/Configure.help -=================================================================== ---- linux-2.4.21-241.orig/Documentation/Configure.help 2004-10-03 17:37:47.000000000 -0400 -+++ linux-2.4.21-241/Documentation/Configure.help 2004-10-04 02:19:55.000000000 -0400 -@@ -16421,6 +16421,39 @@ - - If unsure, say N. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is -Index: linux-2.4.21-241/fs/ext3/Makefile -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/Makefile 2004-10-04 02:19:51.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/Makefile 2004-10-04 02:19:55.000000000 -0400 -@@ -9,10 +9,10 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.21-241/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/super.c 2004-10-04 02:19:51.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/super.c 2004-10-04 02:19:55.000000000 -0400 -@@ -1914,9 +1914,6 @@ - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); -Index: linux-2.4.21-241/fs/ext3/ext3-exports.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/ext3-exports.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.4.21-241/fs/ext3/ext3-exports.c 2004-10-04 02:19:55.000000000 -0400 -@@ -0,0 +1,14 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_journal_abort_handle); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); -Index: linux-2.4.21-241/fs/ext3/xattr_trusted.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/xattr_trusted.c 2004-10-03 17:37:47.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/xattr_trusted.c 2004-10-04 02:19:55.000000000 -0400 -@@ -50,7 +50,7 @@ - return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; -- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, -+ return ext3_xattr_set_trans(inode, EXT3_XATTR_INDEX_TRUSTED, name, - value, size, flags); - } - -Index: linux-2.4.21-241/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/xattr.c 2004-10-03 17:37:47.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/xattr.c 2004-10-04 02:23:13.000000000 -0400 -@@ -785,7 +785,7 @@ - * don't need to change the reference count. */ - new_bh = old_bh; - get_bh(new_bh); -- ext3_xattr_cache_insert(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); - } else { - /* We need to allocate a new block */ - int goal = le32_to_cpu(EXT3_SB(inode->i_sb)->s_es-> -@@ -814,7 +814,7 @@ - memcpy(new_bh->b_data, header, new_bh->b_size); - mark_buffer_uptodate(new_bh, 1); - unlock_buffer(new_bh); -- ext3_xattr_cache_insert(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); - - ext3_xattr_update_super_block(handle, sb); - } -@@ -870,7 +870,7 @@ - } - - /* -- * ext3_xattr_set() -+ * ext3_xattr_set_trans() - * - * Like ext3_xattr_set_handle, but start from an inode. This extended - * attribute modification is a filesystem transaction by itself. -@@ -878,7 +878,7 @@ - * Returns 0, or a negative error number on failure. - */ - int --ext3_xattr_set(struct inode *inode, int name_index, const char *name, -+ext3_xattr_set_trans(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, int flags) - { - handle_t *handle; -@@ -900,6 +900,20 @@ - return error; - } - -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ int error; -+ -+ lock_kernel(); -+ error = ext3_xattr_set_handle(handle, inode, name_index, name, -+ value, value_len, flags); -+ unlock_kernel(); -+ return error; -+} -+ - /* - * ext3_xattr_delete_inode() - * -Index: linux-2.4.21-241/fs/ext3/xattr_user.c -=================================================================== ---- linux-2.4.21-241.orig/fs/ext3/xattr_user.c 2004-10-03 17:37:47.000000000 -0400 -+++ linux-2.4.21-241/fs/ext3/xattr_user.c 2004-10-04 02:19:55.000000000 -0400 -@@ -70,7 +70,7 @@ - if (error) - return error; - -- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, -+ return ext3_xattr_set_trans(inode, EXT3_XATTR_INDEX_USER, name, - value, size, flags); - } - -Index: linux-2.4.21-241/include/linux/errno.h -=================================================================== ---- linux-2.4.21-241.orig/include/linux/errno.h 2004-10-03 17:37:47.000000000 -0400 -+++ linux-2.4.21-241/include/linux/errno.h 2004-10-04 02:19:55.000000000 -0400 -@@ -26,4 +26,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif -Index: linux-2.4.21-241/include/linux/ext3_xattr.h -=================================================================== ---- linux-2.4.21-241.orig/include/linux/ext3_xattr.h 2004-10-03 17:37:47.000000000 -0400 -+++ linux-2.4.21-241/include/linux/ext3_xattr.h 2004-10-04 02:19:55.000000000 -0400 -@@ -80,8 +80,10 @@ - extern int ext3_xattr_list(struct inode *, char *, size_t); - extern int ext3_xattr_set_handle(handle_t *handle, struct inode *, int, - const char *, const void *, size_t, int); --extern int ext3_xattr_set(struct inode *, int, const char *, const void *, -+extern int ext3_xattr_set_trans(struct inode *, int, const char *, const void *, - size_t, int); -+extern int ext3_xattr_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); - - extern void ext3_xattr_delete_inode(handle_t *, struct inode *); - extern void ext3_xattr_put_super(struct super_block *); diff --git a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse2.patch b/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse2.patch deleted file mode 100644 index 0a8891f..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.21-xattr-0.8.54-suse2.patch +++ /dev/null @@ -1,258 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/alpha/defconfig | 7 - arch/alpha/kernel/entry.S | 12 - arch/arm/defconfig | 7 - arch/arm/kernel/calls.S | 24 - arch/i386/defconfig | 7 - arch/ia64/defconfig | 7 - arch/ia64/kernel/entry.S | 24 - arch/m68k/defconfig | 7 - arch/mips/defconfig | 7 - arch/mips64/defconfig | 7 - arch/ppc/defconfig | 14 - arch/ppc64/kernel/misc.S | 2 - arch/s390/defconfig | 7 - arch/s390/kernel/entry.S | 24 - arch/s390x/defconfig | 7 - arch/s390x/kernel/entry.S | 24 - arch/s390x/kernel/wrapper32.S | 92 +++ - arch/sparc/defconfig | 7 - arch/sparc/kernel/systbls.S | 10 - arch/sparc64/defconfig | 7 - arch/sparc64/kernel/systbls.S | 20 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 10 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/asm-arm/unistd.h | 2 - include/asm-ia64/unistd.h | 13 - include/asm-ppc64/unistd.h | 2 - include/asm-s390/unistd.h | 15 - include/asm-s390x/unistd.h | 15 - include/asm-sparc/unistd.h | 24 - include/asm-sparc64/unistd.h | 24 - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 35 + - fs/ext3/ext3-exports.c | 14 + - 64 files changed, 4355 insertions(+), 195 deletions(-) - -Index: linux-2.4.21-chaos/Documentation/Configure.help -=================================================================== ---- linux-2.4.21-chaos.orig/Documentation/Configure.help 2003-10-04 01:18:57.000000000 +0400 -+++ linux-2.4.21-chaos/Documentation/Configure.help 2003-12-14 15:11:46.000000000 +0300 -@@ -16008,6 +16041,39 @@ - - If unsure, say N. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is -Index: linux-2.4.21-chaos/fs/ext3/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/Makefile 2003-12-14 15:09:30.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/Makefile 2003-12-14 15:11:46.000000000 +0300 -@@ -9,10 +9,10 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - - export-objs += xattr.o -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2003-12-14 15:09:30.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2003-12-14 15:11:46.000000000 +0300 -@@ -1944,9 +1944,6 @@ - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); -Index: linux-2.4.21-chaos/fs/ext3/ext3-exports.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/ext3-exports.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/ext3-exports.c 2003-12-14 15:11:46.000000000 +0300 -@@ -0,0 +1,14 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_journal_abort_handle); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); -Index: linux-2.4.21-chaos/fs/ext3/xattr_trusted.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr_trusted.c 2003-07-15 04:41:01.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/xattr_trusted.c 2003-12-14 15:11:46.000000000 +0300 -@@ -50,7 +50,7 @@ - return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; -- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, -+ return ext3_xattr_set_trans(inode, EXT3_XATTR_INDEX_TRUSTED, name, - value, size, flags); - } - -Index: linux-2.4.21-chaos/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr.c 2003-07-30 04:11:55.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/xattr.c 2003-12-14 15:11:46.000000000 +0300 -@@ -868,7 +868,7 @@ - } - - /* -- * ext3_xattr_set() -+ * ext3_xattr_set_trans() - * - * Like ext3_xattr_set_handle, but start from an inode. This extended - * attribute modification is a filesystem transaction by itself. -@@ -876,7 +876,7 @@ - * Returns 0, or a negative error number on failure. - */ - int --ext3_xattr_set(struct inode *inode, int name_index, const char *name, -+ext3_xattr_set_trans(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, int flags) - { - handle_t *handle; -@@ -895,6 +895,20 @@ - return error ? error : error2; - } - -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, -+ int flags) -+{ -+ int error; -+ -+ lock_kernel(); -+ error = ext3_xattr_set_handle(handle, inode, name_index, name, -+ value, value_len, flags); -+ unlock_kernel(); -+ return error; -+} -+ - /* - * ext3_xattr_delete_inode() - * -Index: linux-2.4.21-chaos/fs/ext3/xattr_user.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/xattr_user.c 2003-07-30 04:11:55.000000000 +0400 -+++ linux-2.4.21-chaos/fs/ext3/xattr_user.c 2003-12-14 15:26:58.000000000 +0300 -@@ -70,7 +70,7 @@ - if (error) - return error; - -- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, -+ return ext3_xattr_set_trans(inode, EXT3_XATTR_INDEX_USER, name, - value, size, flags); - } - -Index: linux-2.4.21-chaos/include/linux/errno.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/errno.h 2003-07-15 04:41:12.000000000 +0400 -+++ linux-2.4.21-chaos/include/linux/errno.h 2003-12-14 15:11:46.000000000 +0300 -@@ -26,4 +26,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif -Index: linux-2.4.21-chaos/include/linux/ext3_xattr.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_xattr.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_xattr.h 2003-12-14 15:21:13.000000000 +0300 -@@ -82,8 +82,10 @@ - extern int ext3_xattr_list(struct inode *, char *, size_t); - extern int ext3_xattr_set_handle(handle_t *handle, struct inode *, int, - const char *, const void *, size_t, int); --extern int ext3_xattr_set(struct inode *, int, const char *, const void *, -+extern int ext3_xattr_set_trans(struct inode *, int, const char *, const void *, - size_t, int); -+extern int ext3_xattr_set(handle_t *, struct inode *, int, const char *, -+ const void *, size_t, int); - - extern void ext3_xattr_delete_inode(handle_t *, struct inode *); - extern void ext3_xattr_put_super(struct super_block *); diff --git a/lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO-rhel3.patch b/lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO-rhel3.patch deleted file mode 100644 index bc66351..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO-rhel3.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff -X /home/nikita/src/linux-git/linux-2.6.git/Documentation/dontdiff -rupbB linux-2.4.24.orig/fs/jbd/commit.c linux-2.4.24/fs/jbd/commit.c ---- linux-2.4.24.orig/fs/jbd/commit.c 2005-06-23 17:39:32.000000000 +0400 -+++ linux-2.4.24/fs/jbd/commit.c 2005-06-23 15:56:05.000000000 +0400 -@@ -505,6 +505,9 @@ start_journal_io: - goto wait_for_iobuf; - } - -+ if (unlikely(!buffer_uptodate(bh))) -+ err = -EIO; -+ - clear_bit(BH_JWrite, &jh2bh(jh)->b_state); - - JBUFFER_TRACE(jh, "ph4: unfile after journal write"); -@@ -566,6 +569,9 @@ start_journal_io: - goto wait_for_ctlbuf; - } - -+ if (unlikely(!buffer_uptodate(bh))) -+ err = -EIO; -+ - BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); - clear_bit(BH_JWrite, &bh->b_state); - journal_unfile_buffer(jh); diff --git a/lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO.patch b/lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO.patch deleted file mode 100644 index ff2991f..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.24-jbd-handle-EIO.patch +++ /dev/null @@ -1,51 +0,0 @@ -diff -X /home/nikita/src/linux-git/linux-2.6.git/Documentation/dontdiff -rupbB linux-2.4.24.orig/fs/jbd/commit.c linux-2.4.24/fs/jbd/commit.c ---- linux-2.4.24.orig/fs/jbd/commit.c 2005-06-23 17:39:32.000000000 +0400 -+++ linux-2.4.24/fs/jbd/commit.c 2005-06-23 15:56:05.000000000 +0400 -@@ -47,7 +47,7 @@ void journal_commit_transaction(journal_ - struct buffer_head *wbuf[64]; - int bufs; - int flags; -- int err; -+ int err = 0; - unsigned long blocknr; - char *tagp = NULL; - journal_header_t *header; -@@ -505,6 +505,9 @@ start_journal_io: - goto wait_for_iobuf; - } - -+ if (unlikely(!buffer_uptodate(bh))) -+ err = -EIO; -+ - clear_bit(BH_JWrite, &jh2bh(jh)->b_state); - - JBUFFER_TRACE(jh, "ph4: unfile after journal write"); -@@ -566,6 +569,9 @@ start_journal_io: - goto wait_for_ctlbuf; - } - -+ if (unlikely(!buffer_uptodate(bh))) -+ err = -EIO; -+ - BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); - clear_bit(BH_JWrite, &bh->b_state); - journal_unfile_buffer(jh); -@@ -610,6 +616,8 @@ start_journal_io: - bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(WRITE, bh); - wait_on_buffer(bh); -+ if (unlikely(!buffer_uptodate(bh))) -+ err = -EIO; - put_bh(bh); /* One for getblk() */ - journal_unlock_journal_head(descriptor); - } -@@ -621,6 +629,9 @@ start_journal_io: - - skip_commit: /* The journal should be unlocked by now. */ - -+ if (err) -+ __journal_abort_hard(journal); -+ - /* Call any callbacks that had been registered for handles in this - * transaction. It is up to the callback to free any allocated - * memory. diff --git a/lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch deleted file mode 100644 index 5bd7ac4..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.24-xattr-0.8.54.patch +++ /dev/null @@ -1,5474 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/alpha/defconfig | 7 - arch/alpha/kernel/entry.S | 12 - arch/arm/defconfig | 7 - arch/arm/kernel/calls.S | 24 - arch/i386/defconfig | 7 - arch/ia64/defconfig | 7 - arch/ia64/kernel/entry.S | 24 - arch/m68k/defconfig | 7 - arch/mips/defconfig | 7 - arch/mips64/defconfig | 7 - arch/ppc/defconfig | 14 - arch/ppc64/kernel/misc.S | 2 - arch/s390/defconfig | 7 - arch/s390/kernel/entry.S | 24 - arch/s390x/defconfig | 7 - arch/s390x/kernel/entry.S | 24 - arch/s390x/kernel/wrapper32.S | 92 +++ - arch/sparc/defconfig | 7 - arch/sparc/kernel/systbls.S | 10 - arch/sparc64/defconfig | 7 - arch/sparc64/kernel/systbls.S | 20 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 10 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/asm-arm/unistd.h | 2 - include/asm-ia64/unistd.h | 13 - include/asm-ppc64/unistd.h | 2 - include/asm-s390/unistd.h | 15 - include/asm-s390x/unistd.h | 15 - include/asm-sparc/unistd.h | 24 - include/asm-sparc64/unistd.h | 24 - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 35 + - fs/ext3/ext3-exports.c | 14 + - 64 files changed, 4355 insertions(+), 195 deletions(-) - -Index: linux-2.4.24-vanilla/Documentation/Configure.help -=================================================================== ---- linux-2.4.24-vanilla.orig/Documentation/Configure.help 2004-01-10 17:05:37.000000000 +0300 -+++ linux-2.4.24-vanilla/Documentation/Configure.help 2004-01-10 17:20:28.000000000 +0300 -@@ -16295,6 +16295,39 @@ - be compiled as a module, and so this could be dangerous. Most - everyone wants to say Y here. - -+Ext2 extended attributes -+CONFIG_EXT2_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext2 extended attribute block sharing -+CONFIG_EXT2_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext2 extended user attributes -+CONFIG_EXT2_FS_XATTR_USER -+ This option enables extended user attributes on ext2. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext2 trusted extended attributes -+CONFIG_EXT2_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext2 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Ext3 journalling file system support (EXPERIMENTAL) - CONFIG_EXT3_FS - This is the journalling version of the Second extended file system -@@ -16327,6 +16360,39 @@ - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this may be dangerous. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is -Index: linux-2.4.24-vanilla/arch/alpha/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/alpha/defconfig 2004-01-10 17:04:37.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/alpha/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ALPHA=y - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set -Index: linux-2.4.24-vanilla/arch/alpha/kernel/entry.S -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/alpha/kernel/entry.S 2004-01-10 17:04:37.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/alpha/kernel/entry.S 2004-01-10 17:20:28.000000000 +0300 -@@ -1154,6 +1154,18 @@ - .quad sys_readahead - .quad sys_ni_syscall /* 380, sys_security */ - .quad sys_tkill -+ .quad sys_setxattr -+ .quad sys_lsetxattr -+ .quad sys_fsetxattr -+ .quad sys_getxattr /* 385 */ -+ .quad sys_lgetxattr -+ .quad sys_fgetxattr -+ .quad sys_listxattr -+ .quad sys_llistxattr -+ .quad sys_flistxattr /* 390 */ -+ .quad sys_removexattr -+ .quad sys_lremovexattr -+ .quad sys_fremovexattr - - /* Remember to update everything, kids. */ - .ifne (. - sys_call_table) - (NR_SYSCALLS * 8) -Index: linux-2.4.24-vanilla/arch/arm/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/arm/defconfig 2001-05-20 04:43:05.000000000 +0400 -+++ linux-2.4.24-vanilla/arch/arm/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ARM=y - # CONFIG_EISA is not set - # CONFIG_SBUS is not set -Index: linux-2.4.24-vanilla/arch/arm/kernel/calls.S -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/arm/kernel/calls.S 2004-01-10 17:04:58.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/arm/kernel/calls.S 2004-01-10 17:20:28.000000000 +0300 -@@ -240,18 +240,18 @@ - .long SYMBOL_NAME(sys_ni_syscall) /* Security */ - .long SYMBOL_NAME(sys_gettid) - /* 225 */ .long SYMBOL_NAME(sys_readahead) -- .long SYMBOL_NAME(sys_ni_syscall) /* setxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* lsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* fsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* getxattr */ --/* 230 */ .long SYMBOL_NAME(sys_ni_syscall) /* lgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* fgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* listxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* llistxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* flistxattr */ --/* 235 */ .long SYMBOL_NAME(sys_ni_syscall) /* removexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* lremovexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* fremovexattr */ -+ .long SYMBOL_NAME(sys_setxattr) -+ .long SYMBOL_NAME(sys_lsetxattr) -+ .long SYMBOL_NAME(sys_fsetxattr) -+ .long SYMBOL_NAME(sys_getxattr) -+/* 230 */ .long SYMBOL_NAME(sys_lgetxattr) -+ .long SYMBOL_NAME(sys_fgetxattr) -+ .long SYMBOL_NAME(sys_listxattr) -+ .long SYMBOL_NAME(sys_llistxattr) -+ .long SYMBOL_NAME(sys_flistxattr) -+/* 235 */ .long SYMBOL_NAME(sys_removexattr) -+ .long SYMBOL_NAME(sys_lremovexattr) -+ .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - .long SYMBOL_NAME(sys_ni_syscall) /* sendfile64 */ - /* 240 */ .long SYMBOL_NAME(sys_ni_syscall) /* futex */ -Index: linux-2.4.24-vanilla/arch/i386/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/i386/defconfig 2004-01-10 17:05:45.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/i386/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_X86=y - CONFIG_ISA=y - # CONFIG_SBUS is not set -Index: linux-2.4.24-vanilla/arch/ia64/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/ia64/defconfig 2004-01-10 17:05:52.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/ia64/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options -Index: linux-2.4.24-vanilla/arch/m68k/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/m68k/defconfig 2004-01-10 17:05:52.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/m68k/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - - # -Index: linux-2.4.24-vanilla/arch/mips/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/mips/defconfig 2004-01-10 17:04:59.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/mips/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - CONFIG_MIPS32=y - # CONFIG_MIPS64 is not set -Index: linux-2.4.24-vanilla/arch/mips64/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/mips64/defconfig 2004-01-10 17:05:52.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/mips64/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - # CONFIG_MIPS32 is not set - CONFIG_MIPS64=y -Index: linux-2.4.24-vanilla/arch/s390/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/s390/defconfig 2004-01-10 17:05:52.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/s390/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set -Index: linux-2.4.24-vanilla/arch/s390/kernel/entry.S -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/s390/kernel/entry.S 2004-01-10 17:04:39.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/s390/kernel/entry.S 2004-01-10 17:20:28.000000000 +0300 -@@ -558,18 +558,18 @@ - .long sys_fcntl64 - .long sys_readahead - .long sys_ni_syscall -- .long sys_ni_syscall /* 224 - reserved for setxattr */ -- .long sys_ni_syscall /* 225 - reserved for lsetxattr */ -- .long sys_ni_syscall /* 226 - reserved for fsetxattr */ -- .long sys_ni_syscall /* 227 - reserved for getxattr */ -- .long sys_ni_syscall /* 228 - reserved for lgetxattr */ -- .long sys_ni_syscall /* 229 - reserved for fgetxattr */ -- .long sys_ni_syscall /* 230 - reserved for listxattr */ -- .long sys_ni_syscall /* 231 - reserved for llistxattr */ -- .long sys_ni_syscall /* 232 - reserved for flistxattr */ -- .long sys_ni_syscall /* 233 - reserved for removexattr */ -- .long sys_ni_syscall /* 234 - reserved for lremovexattr */ -- .long sys_ni_syscall /* 235 - reserved for fremovexattr */ -+ .long sys_setxattr -+ .long sys_lsetxattr /* 225 */ -+ .long sys_fsetxattr -+ .long sys_getxattr -+ .long sys_lgetxattr -+ .long sys_fgetxattr -+ .long sys_listxattr /* 230 */ -+ .long sys_llistxattr -+ .long sys_flistxattr -+ .long sys_removexattr -+ .long sys_lremovexattr -+ .long sys_fremovexattr /* 235 */ - .long sys_gettid - .long sys_tkill - .rept 255-237 -Index: linux-2.4.24-vanilla/arch/s390x/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/s390x/defconfig 2004-01-10 17:05:52.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/s390x/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set -Index: linux-2.4.24-vanilla/arch/s390x/kernel/entry.S -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/s390x/kernel/entry.S 2004-01-10 17:05:00.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/s390x/kernel/entry.S 2004-01-10 17:20:28.000000000 +0300 -@@ -591,18 +591,18 @@ - .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) - .long SYSCALL(sys_readahead,sys32_readahead) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 224 - reserved for setxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 225 - reserved for lsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 226 - reserved for fsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 227 - reserved for getxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 228 - reserved for lgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 229 - reserved for fgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 230 - reserved for listxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 231 - reserved for llistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 232 - reserved for flistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 233 - reserved for removexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 234 - reserved for lremovexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 235 - reserved for fremovexattr */ -+ .long SYSCALL(sys_setxattr,sys32_setxattr_wrapper) -+ .long SYSCALL(sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -+ .long SYSCALL(sys_fsetxattr,sys32_fsetxattr_wrapper) -+ .long SYSCALL(sys_getxattr,sys32_getxattr_wrapper) -+ .long SYSCALL(sys_lgetxattr,sys32_lgetxattr_wrapper) -+ .long SYSCALL(sys_fgetxattr,sys32_fgetxattr_wrapper) -+ .long SYSCALL(sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -+ .long SYSCALL(sys_llistxattr,sys32_llistxattr_wrapper) -+ .long SYSCALL(sys_flistxattr,sys32_flistxattr_wrapper) -+ .long SYSCALL(sys_removexattr,sys32_removexattr_wrapper) -+ .long SYSCALL(sys_lremovexattr,sys32_lremovexattr_wrapper) -+ .long SYSCALL(sys_fremovexattr,sys32_fremovexattr_wrapper)/* 235 */ - .long SYSCALL(sys_gettid,sys_gettid) - .long SYSCALL(sys_tkill,sys_tkill) - .rept 255-237 -Index: linux-2.4.24-vanilla/arch/s390x/kernel/wrapper32.S -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/s390x/kernel/wrapper32.S 2004-01-10 17:05:00.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/s390x/kernel/wrapper32.S 2004-01-10 17:20:28.000000000 +0300 -@@ -1098,6 +1098,98 @@ - llgfr %r4,%r4 # long - jg sys32_fstat64 # branch to system call - -+ .globl sys32_setxattr_wrapper -+sys32_setxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_setxattr -+ -+ .globl sys32_lsetxattr_wrapper -+sys32_lsetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_lsetxattr -+ -+ .globl sys32_fsetxattr_wrapper -+sys32_fsetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_fsetxattr -+ -+ .globl sys32_getxattr_wrapper -+sys32_getxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_getxattr -+ -+ .globl sys32_lgetxattr_wrapper -+sys32_lgetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_lgetxattr -+ -+ .globl sys32_fgetxattr_wrapper -+sys32_fgetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_fgetxattr -+ -+ .globl sys32_listxattr_wrapper -+sys32_listxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_listxattr -+ -+ .globl sys32_llistxattr_wrapper -+sys32_llistxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_llistxattr -+ -+ .globl sys32_flistxattr_wrapper -+sys32_flistxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_flistxattr -+ -+ .globl sys32_removexattr_wrapper -+sys32_removexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_removexattr -+ -+ .globl sys32_lremovexattr_wrapper -+sys32_lremovexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_lremovexattr -+ -+ .globl sys32_fremovexattr_wrapper -+sys32_fremovexattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ jg sys_fremovexattr -+ -+ -+ - .globl sys32_stime_wrapper - sys32_stime_wrapper: - llgtr %r2,%r2 # int * -Index: linux-2.4.24-vanilla/arch/sparc64/defconfig -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/sparc64/defconfig 2004-01-10 17:05:52.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/sparc64/defconfig 2004-01-10 17:20:28.000000000 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options -Index: linux-2.4.24-vanilla/fs/Config.in -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/Config.in 2004-01-10 17:05:55.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/Config.in 2004-01-10 17:20:28.000000000 +0300 -@@ -29,6 +29,11 @@ - dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL - - tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS -+dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS -+dep_bool ' Ext3 extended attribute block sharing' \ -+ CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR -+dep_bool ' Ext3 extended user attributes' \ -+ CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR - # CONFIG_JBD could be its own option (even modular), but until there are - # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS - # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS -@@ -92,6 +97,11 @@ - tristate 'ROM file system support' CONFIG_ROMFS_FS - - tristate 'Second extended fs support' CONFIG_EXT2_FS -+dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS -+dep_bool ' Ext2 extended attribute block sharing' \ -+ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR -+dep_bool ' Ext2 extended user attributes' \ -+ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR - - tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS - -@@ -164,6 +174,10 @@ - define_tristate CONFIG_ZISOFS_FS n - fi - -+# Meta block cache for Extended Attributes (ext2/ext3) -+#tristate 'Meta block cache' CONFIG_FS_MBCACHE -+define_tristate CONFIG_FS_MBCACHE y -+ - mainmenu_option next_comment - comment 'Partition Types' - source fs/partitions/Config.in -Index: linux-2.4.24-vanilla/fs/Makefile -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/Makefile 2004-01-10 17:11:48.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/Makefile 2004-01-10 17:20:28.000000000 +0300 -@@ -77,6 +77,9 @@ - - obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o - -+export-objs += mbcache.o -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o -+ - # persistent filesystems - obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) - -Index: linux-2.4.24-vanilla/fs/ext2/Makefile -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/Makefile 2001-10-11 19:05:18.000000000 +0400 -+++ linux-2.4.24-vanilla/fs/ext2/Makefile 2004-01-10 17:20:28.000000000 +0300 -@@ -13,4 +13,8 @@ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make -Index: linux-2.4.24-vanilla/fs/ext2/file.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/file.c 2001-10-11 19:05:18.000000000 +0400 -+++ linux-2.4.24-vanilla/fs/ext2/file.c 2004-01-10 17:20:28.000000000 +0300 -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - - /* -@@ -51,4 +52,8 @@ - - struct inode_operations ext2_file_inode_operations = { - truncate: ext2_truncate, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.24-vanilla/fs/ext2/ialloc.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/ialloc.c 2004-01-10 17:04:42.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext2/ialloc.c 2004-01-10 17:20:28.000000000 +0300 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -167,6 +168,7 @@ - */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ -+ ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } -Index: linux-2.4.24-vanilla/fs/ext2/inode.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/inode.c 2004-01-10 17:04:42.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext2/inode.c 2004-01-10 17:20:28.000000000 +0300 -@@ -39,6 +39,18 @@ - static int ext2_update_inode(struct inode * inode, int do_sync); - - /* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext2_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext2_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ -+/* - * Called at each iput() - */ - void ext2_put_inode (struct inode * inode) -@@ -53,9 +65,7 @@ - { - lock_kernel(); - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - inode->u.ext2_i.i_dtime = CURRENT_TIME; - mark_inode_dirty(inode); -@@ -801,6 +811,8 @@ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext2_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -903,8 +915,7 @@ - unsigned long offset; - struct ext2_group_desc * gdp; - -- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && -- inode->i_ino != EXT2_ACL_DATA_INO && -+ if ((inode->i_ino != EXT2_ROOT_INO && - inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { - ext2_error (inode->i_sb, "ext2_read_inode", -@@ -989,10 +1000,7 @@ - for (block = 0; block < EXT2_N_BLOCKS; block++) - inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; - -- if (inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; -@@ -1001,15 +1009,17 @@ - inode->i_fop = &ext2_dir_operations; - inode->i_mapping->a_ops = &ext2_aops; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext2_inode_is_fast_symlink(inode)) - inode->i_op = &ext2_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - } -- } else -+ } else { -+ inode->i_op = &ext2_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(raw_inode->i_block[0])); -+ } - brelse (bh); - inode->i_attr_flags = 0; - ext2_set_inode_flags(inode); -Index: linux-2.4.24-vanilla/fs/ext2/namei.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/namei.c 2001-10-04 09:57:36.000000000 +0400 -+++ linux-2.4.24-vanilla/fs/ext2/namei.c 2004-01-10 17:20:28.000000000 +0300 -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - - /* -@@ -136,7 +137,7 @@ - - if (l > sizeof (inode->u.ext2_i.i_data)) { - /* slow symlink */ -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - err = block_symlink(inode, symname, l); - if (err) -@@ -345,4 +346,15 @@ - rmdir: ext2_rmdir, - mknod: ext2_mknod, - rename: ext2_rename, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ -+struct inode_operations ext2_special_inode_operations = { -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.24-vanilla/fs/ext2/super.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/super.c 2003-05-16 05:29:12.000000000 +0400 -+++ linux-2.4.24-vanilla/fs/ext2/super.c 2004-01-10 17:20:28.000000000 +0300 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -125,6 +126,7 @@ - int db_count; - int i; - -+ ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - struct ext2_super_block *es = EXT2_SB(sb)->s_es; - -@@ -175,6 +177,13 @@ - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -424,6 +433,9 @@ - blocksize = BLOCK_SIZE; - - sb->u.ext2_sb.s_mount_opt = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ -+#endif - if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, - &sb->u.ext2_sb.s_mount_opt)) { - return NULL; -@@ -813,12 +825,27 @@ - - static int __init init_ext2_fs(void) - { -- return register_filesystem(&ext2_fs_type); -+ int error = init_ext2_xattr(); -+ if (error) -+ return error; -+ error = init_ext2_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext2_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext2_xattr_user(); -+fail: -+ exit_ext2_xattr(); -+ return error; - } - - static void __exit exit_ext2_fs(void) - { - unregister_filesystem(&ext2_fs_type); -+ exit_ext2_xattr_user(); -+ exit_ext2_xattr(); - } - - EXPORT_NO_SYMBOLS; -Index: linux-2.4.24-vanilla/fs/ext2/symlink.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/symlink.c 2000-09-28 00:41:33.000000000 +0400 -+++ linux-2.4.24-vanilla/fs/ext2/symlink.c 2004-01-10 17:20:28.000000000 +0300 -@@ -19,6 +19,7 @@ - - #include - #include -+#include - - static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -32,7 +33,20 @@ - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext2_symlink_inode_operations = { -+ readlink: page_readlink, -+ follow_link: page_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ - struct inode_operations ext2_fast_symlink_inode_operations = { - readlink: ext2_readlink, - follow_link: ext2_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.24-vanilla/fs/ext2/xattr.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/xattr.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext2/xattr.c 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,1212 @@ -+/* -+ * linux/fs/ext2/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT2_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext2_xattr_register); -+EXPORT_SYMBOL(ext2_xattr_unregister); -+EXPORT_SYMBOL(ext2_xattr_get); -+EXPORT_SYMBOL(ext2_xattr_list); -+EXPORT_SYMBOL(ext2_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT2_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext2_xattr_set2(struct inode *, struct buffer_head *, -+ struct ext2_xattr_header *); -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+static int ext2_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext2_xattr_cache_find(struct inode *, -+ struct ext2_xattr_header *); -+static void ext2_xattr_cache_remove(struct buffer_head *); -+static void ext2_xattr_rehash(struct ext2_xattr_header *, -+ struct ext2_xattr_entry *); -+ -+static struct mb_cache *ext2_xattr_cache; -+ -+#else -+# define ext2_xattr_cache_insert(bh) 0 -+# define ext2_xattr_cache_find(inode, header) NULL -+# define ext2_xattr_cache_remove(bh) while(0) {} -+# define ext2_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext2_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext2_xattr_sem); -+ -+static inline int -+ext2_xattr_new_block(struct inode *inode, int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + -+ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext2_new_block(inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext2_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext2_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext2_xattr_free_block(struct inode * inode, unsigned long block) -+{ -+ ext2_free_blocks(inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext2_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext2_xattr_free_block(inode, block) \ -+ ext2_free_blocks(inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; -+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ if (!ext2_xattr_handlers[name_index-1]) { -+ ext2_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext2_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) -+{ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ ext2_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext2_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static struct ext2_xattr_handler * -+ext2_xattr_resolve_name(const char **name) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext2_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext2_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext2_handler_lock); -+ return handler; -+} -+ -+static inline struct ext2_xattr_handler * -+ext2_xattr_handler(int name_index) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ read_lock(&ext2_handler_lock); -+ handler = ext2_xattr_handlers[name_index-1]; -+ read_unlock(&ext2_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext2_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext2_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT2_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT2_I(inode)->i_file_acl) -+ return 0; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext2_xattr_update_super_block(struct super_block *sb) -+{ -+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT2_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext2_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_header *header = NULL; -+ struct ext2_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT2_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext2_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(sb, "ext2_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext2_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT2_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT2_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT2_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext2_xattr_cache_remove(bh); -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT2_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT2_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext2_xattr_set2(inode, bh, NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT2_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT2_XATTR_PAD, 0, -+ EXT2_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext2_xattr_rehash(header, here); -+ -+ error = ext2_xattr_set2(inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext2_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext2_xattr_set(): Update the file system. -+ */ -+static int -+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, -+ struct ext2_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext2_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext2_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext2_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT2_I(inode)->i_file_acl != 0; -+ int block = ext2_xattr_new_block(inode, &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+ ext2_xattr_free_block(inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext2_xattr_cache_insert(new_bh); -+ -+ ext2_xattr_update_super_block(sb); -+ } -+ mark_buffer_dirty(new_bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &new_bh); -+ wait_on_buffer(new_bh); -+ error = -EIO; -+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ if (IS_SYNC(inode)) { -+ error = ext2_sync_inode (inode); -+ if (error) -+ goto cleanup; -+ } else -+ mark_inode_dirty(inode); -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext2_xattr_free_block(inode, old_bh->b_blocknr); -+ mark_buffer_clean(old_bh); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext2_xattr_quota_free(inode); -+ mark_buffer_dirty(old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT2_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext2_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext2_xattr_cache_remove(bh); -+ ext2_xattr_free_block(inode, block); -+ bforget(bh); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ mark_buffer_dirty(bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &bh); -+ wait_on_buffer(bh); -+ } -+ ext2_xattr_quota_free(inode); -+ } -+ EXT2_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext2_xattr_sem); -+} -+ -+/* -+ * ext2_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+/* -+ * ext2_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext2_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext2_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext2_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext2_xattr_cmp(struct ext2_xattr_header *header1, -+ struct ext2_xattr_header *header2) -+{ -+ struct ext2_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT2_XATTR_NEXT(entry1); -+ entry2 = EXT2_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext2_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT2_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT2_XATTR_REFCOUNT_MAX); -+ } else if (!ext2_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext2_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext2_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext2_xattr_rehash(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ struct ext2_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext2_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT2_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext2_xattr(void) -+{ -+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext2_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+ mb_cache_destroy(ext2_xattr_cache); -+} -+ -+#else /* CONFIG_EXT2_FS_XATTR_SHARING */ -+ -+int __init -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ -Index: linux-2.4.24-vanilla/fs/ext2/xattr_user.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext2/xattr_user.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext2/xattr_user.c 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,103 @@ -+/* -+ * linux/fs/ext2/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext2_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext2_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext2_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct ext2_xattr_handler ext2_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext2_xattr_user_list, -+ get: ext2_xattr_user_get, -+ set: ext2_xattr_user_set, -+}; -+ -+int __init -+init_ext2_xattr_user(void) -+{ -+ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -+ -+void -+exit_ext2_xattr_user(void) -+{ -+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -Index: linux-2.4.24-vanilla/fs/ext3/Makefile -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/Makefile 2004-01-10 17:11:50.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/Makefile 2004-01-10 17:20:28.000000000 +0300 -@@ -1,5 +1,5 @@ - # --# Makefile for the linux ext2-filesystem routines. -+# Makefile for the linux ext3-filesystem routines. - # - # Note! Dependencies are done automagically by 'make dep', which also - # removes any old dependencies. DON'T put your own dependencies here -@@ -9,10 +9,14 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make -Index: linux-2.4.24-vanilla/fs/ext3/file.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/file.c 2004-01-10 17:11:50.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/file.c 2004-01-10 17:20:28.000000000 +0300 -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -127,5 +128,9 @@ - struct inode_operations ext3_file_inode_operations = { - truncate: ext3_truncate, /* BKL held */ - setattr: ext3_setattr, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; - -Index: linux-2.4.24-vanilla/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/ialloc.c 2004-01-10 17:04:42.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/ialloc.c 2004-01-10 17:20:28.000000000 +0300 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - -Index: linux-2.4.24-vanilla/fs/ext3/inode.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/inode.c 2004-01-10 17:05:05.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/inode.c 2004-01-10 17:20:28.000000000 +0300 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext3_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -179,9 +191,7 @@ - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1870,6 +1880,8 @@ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2017,8 +2029,6 @@ - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2159,10 +2169,7 @@ - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2170,15 +2177,17 @@ - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - } -- } else -+ } else { -+ inode->i_op = &ext3_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); -+ } - brelse(iloc.bh); - ext3_set_inode_flags(inode); - return; -Index: linux-2.4.24-vanilla/fs/ext3/namei.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/namei.c 2004-01-10 17:11:50.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/namei.c 2004-01-10 17:20:28.000000000 +0300 -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1613,7 +1614,7 @@ - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1621,7 +1622,6 @@ - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1648,9 +1648,6 @@ - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { -@@ -2019,7 +2016,7 @@ - goto out_stop; - - if (l > sizeof (EXT3_I(inode)->i_data)) { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* - * block_symlink() calls back into ext3_prepare/commit_write. -@@ -2244,4 +2241,16 @@ - rmdir: ext3_rmdir, /* BKL held */ - mknod: ext3_mknod, /* BKL held */ - rename: ext3_rename, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -+ -+struct inode_operations ext3_special_inode_operations = { -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ -Index: linux-2.4.24-vanilla/fs/ext3/super.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/super.c 2004-01-10 17:11:50.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/super.c 2004-01-10 17:20:28.000000000 +0300 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,7 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -505,6 +507,7 @@ - int is_remount) - { - unsigned long *mount_options = &sbi->s_mount_opt; -+ - uid_t *resuid = &sbi->s_resuid; - gid_t *resgid = &sbi->s_resgid; - char * this_char; -@@ -517,6 +520,13 @@ - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -934,6 +944,12 @@ - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; -+ -+ /* Default extended attribute flags */ -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ /* set_opt(sbi->s_mount_opt, XATTR_USER); */ -+#endif -+ - if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { - sb->s_dev = 0; - goto out_fail; -@@ -1822,22 +1838,35 @@ - - static int __init init_ext3_fs(void) - { -+ int error; - #ifdef CONFIG_QUOTA - init_dquot_operations(&ext3_qops); - old_sync_dquot = ext3_qops.sync_dquot; - ext3_qops.sync_dquot = ext3_sync_dquot; - #endif -- return register_filesystem(&ext3_fs_type); -+ error = init_ext3_xattr(); -+ if (error) -+ return error; -+ error = init_ext3_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_xattr_user(); -+fail: -+ exit_ext3_xattr(); -+ return error; - } - - static void __exit exit_ext3_fs(void) - { - unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); -Index: linux-2.4.24-vanilla/fs/ext3/symlink.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/symlink.c 2001-11-10 01:25:04.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/symlink.c 2004-01-10 17:20:28.000000000 +0300 -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -33,7 +34,20 @@ - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext3_symlink_inode_operations = { -+ readlink: page_readlink, /* BKL not held. Don't need */ -+ follow_link: page_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ - struct inode_operations ext3_fast_symlink_inode_operations = { - readlink: ext3_readlink, /* BKL not held. Don't need */ - follow_link: ext3_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -Index: linux-2.4.24-vanilla/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/xattr.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/xattr.c 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,1225 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define EXT3_EA_USER "user." -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) while(0) {} -+# define ext3_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT3_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext3_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext3_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); -+ -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext3_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext3_xattr_sem); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ -Index: linux-2.4.24-vanilla/fs/ext3/xattr_user.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/xattr_user.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/xattr_user.c 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,111 @@ -+/* -+ * linux/fs/ext3/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext3_xattr_user_list, -+ get: ext3_xattr_user_get, -+ set: ext3_xattr_user_set, -+}; -+ -+int __init -+init_ext3_xattr_user(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -+ -+void -+exit_ext3_xattr_user(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -Index: linux-2.4.24-vanilla/fs/ext3/ext3-exports.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/ext3/ext3-exports.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/ext3/ext3-exports.c 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); -Index: linux-2.4.24-vanilla/fs/jfs/jfs_xattr.h -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/jfs/jfs_xattr.h 2003-05-16 05:29:12.000000000 +0400 -+++ linux-2.4.24-vanilla/fs/jfs/jfs_xattr.h 2004-01-10 17:20:28.000000000 +0300 -@@ -52,8 +52,10 @@ - #define END_EALIST(ealist) \ - ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) - --extern int __jfs_setxattr(struct inode *, const char *, void *, size_t, int); --extern int jfs_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, -+ int); -+extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, -+ int); - extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); - extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); - extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); -Index: linux-2.4.24-vanilla/fs/jfs/xattr.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/jfs/xattr.c 2004-01-10 17:05:55.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/jfs/xattr.c 2004-01-10 17:20:28.000000000 +0300 -@@ -648,7 +648,7 @@ - } - - static int can_set_xattr(struct inode *inode, const char *name, -- void *value, size_t value_len) -+ const void *value, size_t value_len) - { - if (IS_RDONLY(inode)) - return -EROFS; -@@ -667,7 +667,7 @@ - return permission(inode, MAY_WRITE); - } - --int __jfs_setxattr(struct inode *inode, const char *name, void *value, -+int __jfs_setxattr(struct inode *inode, const char *name, const void *value, - size_t value_len, int flags) - { - struct jfs_ea_list *ealist; -@@ -806,7 +806,7 @@ - return rc; - } - --int jfs_setxattr(struct dentry *dentry, const char *name, void *value, -+int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t value_len, int flags) - { - if (value == NULL) { /* empty EA, do not remove */ -Index: linux-2.4.24-vanilla/fs/mbcache.c -=================================================================== ---- linux-2.4.24-vanilla.orig/fs/mbcache.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/fs/mbcache.c 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,648 @@ -+/* -+ * linux/fs/mbcache.c -+ * (C) 2001-2002 Andreas Gruenbacher, -+ */ -+ -+/* -+ * Filesystem Meta Information Block Cache (mbcache) -+ * -+ * The mbcache caches blocks of block devices that need to be located -+ * by their device/block number, as well as by other criteria (such -+ * as the block's contents). -+ * -+ * There can only be one cache entry in a cache per device and block number. -+ * Additional indexes need not be unique in this sense. The number of -+ * additional indexes (=other criteria) can be hardwired at compile time -+ * or specified at cache create time. -+ * -+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' -+ * in the cache. A valid entry is in the main hash tables of the cache, -+ * and may also be in the lru list. An invalid entry is not in any hashes -+ * or lists. -+ * -+ * A valid cache entry is only in the lru list if no handles refer to it. -+ * Invalid cache entries will be freed when the last handle to the cache -+ * entry is released. Entries that cannot be freed immediately are put -+ * back on the lru list. -+ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+#ifdef MB_CACHE_DEBUG -+# define mb_debug(f...) do { \ -+ printk(KERN_DEBUG f); \ -+ printk("\n"); \ -+ } while (0) -+#define mb_assert(c) do { if (!(c)) \ -+ printk(KERN_ERR "assertion " #c " failed\n"); \ -+ } while(0) -+#else -+# define mb_debug(f...) do { } while(0) -+# define mb_assert(c) do { } while(0) -+#endif -+#define mb_error(f...) do { \ -+ printk(KERN_ERR f); \ -+ printk("\n"); \ -+ } while(0) -+ -+MODULE_AUTHOR("Andreas Gruenbacher "); -+MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+MODULE_LICENSE("GPL"); -+#endif -+ -+EXPORT_SYMBOL(mb_cache_create); -+EXPORT_SYMBOL(mb_cache_shrink); -+EXPORT_SYMBOL(mb_cache_destroy); -+EXPORT_SYMBOL(mb_cache_entry_alloc); -+EXPORT_SYMBOL(mb_cache_entry_insert); -+EXPORT_SYMBOL(mb_cache_entry_release); -+EXPORT_SYMBOL(mb_cache_entry_takeout); -+EXPORT_SYMBOL(mb_cache_entry_free); -+EXPORT_SYMBOL(mb_cache_entry_dup); -+EXPORT_SYMBOL(mb_cache_entry_get); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+EXPORT_SYMBOL(mb_cache_entry_find_first); -+EXPORT_SYMBOL(mb_cache_entry_find_next); -+#endif -+ -+ -+/* -+ * Global data: list of all mbcache's, lru list, and a spinlock for -+ * accessing cache data structures on SMP machines. The lru list is -+ * global across all mbcaches. -+ */ -+ -+static LIST_HEAD(mb_cache_list); -+static LIST_HEAD(mb_cache_lru_list); -+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; -+ -+static inline int -+mb_cache_indexes(struct mb_cache *cache) -+{ -+#ifdef MB_CACHE_INDEXES_COUNT -+ return MB_CACHE_INDEXES_COUNT; -+#else -+ return cache->c_indexes_count; -+#endif -+} -+ -+/* -+ * What the mbcache registers as to get shrunk dynamically. -+ */ -+ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask); -+ -+static struct cache_definition mb_cache_definition = { -+ "mb_cache", -+ mb_cache_memory_pressure -+}; -+ -+ -+static inline int -+__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) -+{ -+ return !list_empty(&ce->e_block_list); -+} -+ -+ -+static inline void -+__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+{ -+ int n; -+ -+ if (__mb_cache_entry_is_hashed(ce)) { -+ list_del_init(&ce->e_block_list); -+ for (n=0; ne_cache); n++) -+ list_del(&ce->e_indexes[n].o_list); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ -+ mb_assert(atomic_read(&ce->e_used) == 0); -+ if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { -+ /* free failed -- put back on the lru list -+ for freeing later. */ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&ce->e_lru_list, &mb_cache_lru_list); -+ spin_unlock(&mb_cache_spinlock); -+ } else { -+ kmem_cache_free(cache->c_entry_cache, ce); -+ atomic_dec(&cache->c_entry_count); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -+{ -+ if (atomic_dec_and_test(&ce->e_used)) { -+ if (__mb_cache_entry_is_hashed(ce)) -+ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ else { -+ spin_unlock(&mb_cache_spinlock); -+ __mb_cache_entry_forget(ce, GFP_KERNEL); -+ return; -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_memory_pressure() memory pressure callback -+ * -+ * This function is called by the kernel memory management when memory -+ * gets low. -+ * -+ * @priority: Amount by which to shrink the cache (0 = highes priority) -+ * @gfp_mask: (ignored) -+ */ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int count = 0; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &mb_cache_list) { -+ struct mb_cache *cache = -+ list_entry(l, struct mb_cache, c_cache_list); -+ mb_debug("cache %s (%d)", cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ count += atomic_read(&cache->c_entry_count); -+ } -+ mb_debug("trying to free %d of %d entries", -+ count / (priority ? priority : 1), count); -+ if (priority) -+ count /= priority; -+ while (count-- && !list_empty(&mb_cache_lru_list)) { -+ struct mb_cache_entry *ce = -+ list_entry(mb_cache_lru_list.next, -+ struct mb_cache_entry, e_lru_list); -+ list_del(&ce->e_lru_list); -+ __mb_cache_entry_unhash(ce); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), gfp_mask); -+ } -+} -+ -+ -+/* -+ * mb_cache_create() create a new cache -+ * -+ * All entries in one cache are equal size. Cache entries may be from -+ * multiple devices. If this is the first mbcache created, registers -+ * the cache with kernel memory management. Returns NULL if no more -+ * memory was available. -+ * -+ * @name: name of the cache (informal) -+ * @cache_op: contains the callback called when freeing a cache entry -+ * @entry_size: The size of a cache entry, including -+ * struct mb_cache_entry -+ * @indexes_count: number of additional indexes in the cache. Must equal -+ * MB_CACHE_INDEXES_COUNT if the number of indexes is -+ * hardwired. -+ * @bucket_count: number of hash buckets -+ */ -+struct mb_cache * -+mb_cache_create(const char *name, struct mb_cache_op *cache_op, -+ size_t entry_size, int indexes_count, int bucket_count) -+{ -+ int m=0, n; -+ struct mb_cache *cache = NULL; -+ -+ if(entry_size < sizeof(struct mb_cache_entry) + -+ indexes_count * sizeof(struct mb_cache_entry_index)) -+ return NULL; -+ -+ MOD_INC_USE_COUNT; -+ cache = kmalloc(sizeof(struct mb_cache) + -+ indexes_count * sizeof(struct list_head), GFP_KERNEL); -+ if (!cache) -+ goto fail; -+ cache->c_name = name; -+ cache->c_op.free = NULL; -+ if (cache_op) -+ cache->c_op.free = cache_op->free; -+ atomic_set(&cache->c_entry_count, 0); -+ cache->c_bucket_count = bucket_count; -+#ifdef MB_CACHE_INDEXES_COUNT -+ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -+#else -+ cache->c_indexes_count = indexes_count; -+#endif -+ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_block_hash) -+ goto fail; -+ for (n=0; nc_block_hash[n]); -+ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * -+ sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_indexes_hash[m]) -+ goto fail; -+ for (n=0; nc_indexes_hash[m][n]); -+ } -+ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, -+ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); -+ if (!cache->c_entry_cache) -+ goto fail; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&cache->c_cache_list, &mb_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ return cache; -+ -+fail: -+ if (cache) { -+ while (--m >= 0) -+ kfree(cache->c_indexes_hash[m]); -+ if (cache->c_block_hash) -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ } -+ MOD_DEC_USE_COUNT; -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_shrink() -+ * -+ * Removes all cache entires of a device from the cache. All cache entries -+ * currently in use cannot be freed, and thus remain in the cache. -+ * -+ * @cache: which cache to shrink -+ * @dev: which device's cache entries to shrink -+ */ -+void -+mb_cache_shrink(struct mb_cache *cache, kdev_t dev) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_dev == dev) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+} -+ -+ -+/* -+ * mb_cache_destroy() -+ * -+ * Shrinks the cache to its minimum possible size (hopefully 0 entries), -+ * and then destroys it. If this was the last mbcache, un-registers the -+ * mbcache from kernel memory management. -+ */ -+void -+mb_cache_destroy(struct mb_cache *cache) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_cache == cache) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ list_del(&cache->c_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+ -+ if (atomic_read(&cache->c_entry_count) > 0) { -+ mb_error("cache %s: %d orphaned entries", -+ cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ } -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) -+ /* We don't have kmem_cache_destroy() in 2.2.x */ -+ kmem_cache_shrink(cache->c_entry_cache); -+#else -+ kmem_cache_destroy(cache->c_entry_cache); -+#endif -+ for (n=0; n < mb_cache_indexes(cache); n++) -+ kfree(cache->c_indexes_hash[n]); -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+ -+/* -+ * mb_cache_entry_alloc() -+ * -+ * Allocates a new cache entry. The new entry will not be valid initially, -+ * and thus cannot be looked up yet. It should be filled with data, and -+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL -+ * if no more memory was available. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_alloc(struct mb_cache *cache) -+{ -+ struct mb_cache_entry *ce; -+ -+ atomic_inc(&cache->c_entry_count); -+ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); -+ if (ce) { -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_LIST_HEAD(&ce->e_block_list); -+ ce->e_cache = cache; -+ atomic_set(&ce->e_used, 1); -+ } -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_insert() -+ * -+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into -+ * the cache. After this, the cache entry can be looked up, but is not yet -+ * in the lru list as the caller still holds a handle to it. Returns 0 on -+ * success, or -EBUSY if a cache entry for that device + inode exists -+ * already (this may happen after a failed lookup, if another process has -+ * inserted the same cache entry in the meantime). -+ * -+ * @dev: device the cache entry belongs to -+ * @block: block number -+ * @keys: array of additional keys. There must be indexes_count entries -+ * in the array (as specified when creating the cache). -+ */ -+int -+mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev, -+ unsigned long block, unsigned int keys[]) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ int error = -EBUSY, n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) -+ goto out; -+ } -+ __mb_cache_entry_unhash(ce); -+ ce->e_dev = dev; -+ ce->e_block = block; -+ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ for (n=0; ne_indexes[n].o_key = keys[n]; -+ bucket = keys[n] % cache->c_bucket_count; -+ list_add(&ce->e_indexes[n].o_list, -+ &cache->c_indexes_hash[n][bucket]); -+ } -+out: -+ spin_unlock(&mb_cache_spinlock); -+ return error; -+} -+ -+ -+/* -+ * mb_cache_entry_release() -+ * -+ * Release a handle to a cache entry. When the last handle to a cache entry -+ * is released it is either freed (if it is invalid) or otherwise inserted -+ * in to the lru list. -+ */ -+void -+mb_cache_entry_release(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_takeout() -+ * -+ * Take a cache entry out of the cache, making it invalid. The entry can later -+ * be re-inserted using mb_cache_entry_insert(), or released using -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_takeout(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_entry_free() -+ * -+ * This is equivalent to the sequence mb_cache_entry_takeout() -- -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_free(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_dup() -+ * -+ * Duplicate a handle to a cache entry (does not duplicate the cache entry -+ * itself). After the call, both the old and the new handle must be released. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_dup(struct mb_cache_entry *ce) -+{ -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_get() -+ * -+ * Get a cache entry by device / block number. (There can only be one entry -+ * in the cache per device and block.) Returns NULL if no such cache entry -+ * exists. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block) -+{ -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ goto cleanup; -+ } -+ } -+ ce = NULL; -+ -+cleanup: -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+ -+static struct mb_cache_entry * -+__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+ int index, kdev_t dev, unsigned int key) -+{ -+ while (l != head) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, -+ e_indexes[index].o_list); -+ if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ return ce; -+ } -+ l = l->next; -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_entry_find_first() -+ * -+ * Find the first cache entry on a given device with a certain key in -+ * an additional index. Additonal matches can be found with -+ * mb_cache_entry_find_next(). Returns NULL if no match was found. -+ * -+ * @cache: the cache to search -+ * @index: the number of the additonal index to search (0<=indexc_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = cache->c_indexes_hash[index][bucket].next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_find_next() -+ * -+ * Find the next cache entry on a given device with a certain key in an -+ * additional index. Returns NULL if no match could be found. The previous -+ * entry is atomatically released, so that mb_cache_entry_find_next() can -+ * be called like this: -+ * -+ * entry = mb_cache_entry_find_first(); -+ * while (entry) { -+ * ... -+ * entry = mb_cache_entry_find_next(entry, ...); -+ * } -+ * -+ * @prev: The previous match -+ * @index: the number of the additonal index to search (0<=indexe_cache; -+ unsigned int bucket = key % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = prev->e_indexes[index].o_list.next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ __mb_cache_entry_release_unlock(prev); -+ return ce; -+} -+ -+#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ -+ -+static int __init init_mbcache(void) -+{ -+ register_cache(&mb_cache_definition); -+ return 0; -+} -+ -+static void __exit exit_mbcache(void) -+{ -+ unregister_cache(&mb_cache_definition); -+} -+ -+module_init(init_mbcache) -+module_exit(exit_mbcache) -+ -Index: linux-2.4.24-vanilla/include/asm-arm/unistd.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/asm-arm/unistd.h 2004-01-10 17:05:06.000000000 +0300 -+++ linux-2.4.24-vanilla/include/asm-arm/unistd.h 2004-01-10 17:20:28.000000000 +0300 -@@ -250,7 +250,6 @@ - #define __NR_security (__NR_SYSCALL_BASE+223) - #define __NR_gettid (__NR_SYSCALL_BASE+224) - #define __NR_readahead (__NR_SYSCALL_BASE+225) --#if 0 /* allocated in 2.5 */ - #define __NR_setxattr (__NR_SYSCALL_BASE+226) - #define __NR_lsetxattr (__NR_SYSCALL_BASE+227) - #define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -@@ -263,7 +262,6 @@ - #define __NR_removexattr (__NR_SYSCALL_BASE+235) - #define __NR_lremovexattr (__NR_SYSCALL_BASE+236) - #define __NR_fremovexattr (__NR_SYSCALL_BASE+237) --#endif - #define __NR_tkill (__NR_SYSCALL_BASE+238) - #if 0 /* allocated in 2.5 */ - #define __NR_sendfile64 (__NR_SYSCALL_BASE+239) -Index: linux-2.4.24-vanilla/include/asm-ppc64/unistd.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/asm-ppc64/unistd.h 2004-01-10 17:05:59.000000000 +0300 -+++ linux-2.4.24-vanilla/include/asm-ppc64/unistd.h 2004-01-10 17:20:28.000000000 +0300 -@@ -218,6 +218,7 @@ - #define __NR_mincore 206 - #define __NR_gettid 207 - #define __NR_tkill 208 -+#endif - #define __NR_setxattr 209 - #define __NR_lsetxattr 210 - #define __NR_fsetxattr 211 -@@ -230,6 +231,7 @@ - #define __NR_removexattr 218 - #define __NR_lremovexattr 219 - #define __NR_fremovexattr 220 -+#if 0 /* Reserved syscalls */ - #define __NR_futex 221 - #define __NR_sched_setaffinity 222 - #define __NR_sched_getaffinity 223 -Index: linux-2.4.24-vanilla/include/asm-s390/unistd.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/asm-s390/unistd.h 2004-01-10 17:04:42.000000000 +0300 -+++ linux-2.4.24-vanilla/include/asm-s390/unistd.h 2004-01-10 17:20:28.000000000 +0300 -@@ -213,9 +213,18 @@ - #define __NR_getdents64 220 - #define __NR_fcntl64 221 - #define __NR_readahead 222 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - -Index: linux-2.4.24-vanilla/include/asm-s390x/unistd.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/asm-s390x/unistd.h 2004-01-10 17:04:42.000000000 +0300 -+++ linux-2.4.24-vanilla/include/asm-s390x/unistd.h 2004-01-10 17:20:28.000000000 +0300 -@@ -181,9 +181,18 @@ - #define __NR_mincore 218 - #define __NR_madvise 219 - #define __NR_readahead 222 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - -Index: linux-2.4.24-vanilla/include/linux/cache_def.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/cache_def.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/cache_def.h 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,15 @@ -+/* -+ * linux/cache_def.h -+ * Handling of caches defined in drivers, filesystems, ... -+ * -+ * Copyright (C) 2002 by Andreas Gruenbacher, -+ */ -+ -+struct cache_definition { -+ const char *name; -+ void (*shrink)(int, unsigned int); -+ struct list_head link; -+}; -+ -+extern void register_cache(struct cache_definition *); -+extern void unregister_cache(struct cache_definition *); -Index: linux-2.4.24-vanilla/include/linux/errno.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/errno.h 2001-02-10 01:46:13.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/errno.h 2004-01-10 17:20:28.000000000 +0300 -@@ -23,4 +23,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif -Index: linux-2.4.24-vanilla/include/linux/ext2_fs.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/ext2_fs.h 2004-01-10 17:04:42.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/ext2_fs.h 2004-01-10 17:20:28.000000000 +0300 -@@ -57,8 +57,6 @@ - */ - #define EXT2_BAD_INO 1 /* Bad blocks inode */ - #define EXT2_ROOT_INO 2 /* Root inode */ --#define EXT2_ACL_IDX_INO 3 /* ACL inode */ --#define EXT2_ACL_DATA_INO 4 /* ACL inode */ - #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ - -@@ -86,7 +84,6 @@ - #else - # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) - #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -121,28 +118,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext2_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext2_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext2_group_desc -@@ -314,6 +289,7 @@ - #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ - #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ - #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ -+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt - #define set_opt(o, opt) o |= EXT2_MOUNT_##opt -@@ -397,6 +373,7 @@ - - #ifdef __KERNEL__ - #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) -+#define EXT2_I(inode) (&((inode)->u.ext2_i)) - #else - /* Assume that user mode programs are passing in an ext2fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test -@@ -466,7 +443,7 @@ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 - #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff - --#define EXT2_FEATURE_COMPAT_SUPP 0 -+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE - #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ -@@ -624,8 +601,10 @@ - - /* namei.c */ - extern struct inode_operations ext2_dir_inode_operations; -+extern struct inode_operations ext2_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext2_symlink_inode_operations; - extern struct inode_operations ext2_fast_symlink_inode_operations; - - #endif /* __KERNEL__ */ -Index: linux-2.4.24-vanilla/include/linux/ext2_xattr.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/ext2_xattr.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/ext2_xattr.h 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext2_xattr.h -+ -+ On-disk format of extended attributes for the ext2 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT2_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT2_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT2_XATTR_INDEX_MAX 10 -+#define EXT2_XATTR_INDEX_USER 1 -+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext2_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext2_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT2_XATTR_PAD_BITS 2 -+#define EXT2_XATTR_PAD (1<e_name_len)) ) -+#define EXT2_XATTR_SIZE(size) \ -+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT2_FS_XATTR -+ -+struct ext2_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext2_xattr_register(int, struct ext2_xattr_handler *); -+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); -+ -+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); -+extern int ext2_removexattr(struct dentry *, const char *); -+ -+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext2_xattr_list(struct inode *, char *, size_t); -+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext2_xattr_delete_inode(struct inode *); -+extern void ext2_xattr_put_super(struct super_block *); -+ -+extern int init_ext2_xattr(void) __init; -+extern void exit_ext2_xattr(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR */ -+# define ext2_setxattr NULL -+# define ext2_getxattr NULL -+# define ext2_listxattr NULL -+# define ext2_removexattr NULL -+ -+static inline int -+ext2_xattr_get(struct inode *inode, int name_index, -+ const char *name, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+} -+ -+static inline void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR */ -+ -+# ifdef CONFIG_EXT2_FS_XATTR_USER -+ -+extern int init_ext2_xattr_user(void) __init; -+extern void exit_ext2_xattr_user(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+static inline int -+init_ext2_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr_user(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ -Index: linux-2.4.24-vanilla/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/ext3_fs.h 2004-01-10 17:11:50.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/ext3_fs.h 2004-01-10 17:20:28.000000000 +0300 -@@ -63,8 +63,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -94,7 +92,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -129,28 +126,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -344,6 +319,7 @@ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -521,7 +497,7 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -704,6 +680,7 @@ - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -@@ -773,8 +750,10 @@ - - /* namei.c */ - extern struct inode_operations ext3_dir_inode_operations; -+extern struct inode_operations ext3_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - - -Index: linux-2.4.24-vanilla/include/linux/ext3_jbd.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/ext3_jbd.h 2004-01-10 17:11:50.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/ext3_jbd.h 2004-01-10 17:20:28.000000000 +0300 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - -Index: linux-2.4.24-vanilla/include/linux/ext3_xattr.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/ext3_xattr.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/ext3_xattr.h 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ -Index: linux-2.4.24-vanilla/include/linux/fs.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/fs.h 2004-01-10 17:11:45.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/fs.h 2004-01-10 17:20:28.000000000 +0300 -@@ -913,7 +913,7 @@ - int (*setattr) (struct dentry *, struct iattr *); - int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); -- int (*setxattr) (struct dentry *, const char *, void *, size_t, int); -+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); -Index: linux-2.4.24-vanilla/include/linux/mbcache.h -=================================================================== ---- linux-2.4.24-vanilla.orig/include/linux/mbcache.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24-vanilla/include/linux/mbcache.h 2004-01-10 17:20:28.000000000 +0300 -@@ -0,0 +1,69 @@ -+/* -+ File: linux/mbcache.h -+ -+ (C) 2001 by Andreas Gruenbacher, -+*/ -+ -+/* Hardwire the number of additional indexes */ -+#define MB_CACHE_INDEXES_COUNT 1 -+ -+struct mb_cache_entry; -+ -+struct mb_cache_op { -+ int (*free)(struct mb_cache_entry *, int); -+}; -+ -+struct mb_cache { -+ struct list_head c_cache_list; -+ const char *c_name; -+ struct mb_cache_op c_op; -+ atomic_t c_entry_count; -+ int c_bucket_count; -+#ifndef MB_CACHE_INDEXES_COUNT -+ int c_indexes_count; -+#endif -+ kmem_cache_t *c_entry_cache; -+ struct list_head *c_block_hash; -+ struct list_head *c_indexes_hash[0]; -+}; -+ -+struct mb_cache_entry_index { -+ struct list_head o_list; -+ unsigned int o_key; -+}; -+ -+struct mb_cache_entry { -+ struct list_head e_lru_list; -+ struct mb_cache *e_cache; -+ atomic_t e_used; -+ kdev_t e_dev; -+ unsigned long e_block; -+ struct list_head e_block_list; -+ struct mb_cache_entry_index e_indexes[0]; -+}; -+ -+/* Functions on caches */ -+ -+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, -+ int, int); -+void mb_cache_shrink(struct mb_cache *, kdev_t); -+void mb_cache_destroy(struct mb_cache *); -+ -+/* Functions on cache entries */ -+ -+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); -+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long, -+ unsigned int[]); -+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); -+void mb_cache_entry_release(struct mb_cache_entry *); -+void mb_cache_entry_takeout(struct mb_cache_entry *); -+void mb_cache_entry_free(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t, -+ unsigned long); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, -+ kdev_t, unsigned int); -+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, -+ kdev_t, unsigned int); -+#endif -Index: linux-2.4.24-vanilla/kernel/ksyms.c -=================================================================== ---- linux-2.4.24-vanilla.orig/kernel/ksyms.c 2004-01-10 17:11:43.000000000 +0300 -+++ linux-2.4.24-vanilla/kernel/ksyms.c 2004-01-10 17:20:28.000000000 +0300 -@@ -11,6 +11,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -92,6 +93,7 @@ - EXPORT_SYMBOL(exit_files); - EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); -+EXPORT_SYMBOL(copy_fs_struct); - - /* internal kernel memory management */ - EXPORT_SYMBOL(_alloc_pages); -@@ -109,6 +111,8 @@ - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_size); -+EXPORT_SYMBOL(register_cache); -+EXPORT_SYMBOL(unregister_cache); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); - EXPORT_SYMBOL(vfree); -Index: linux-2.4.24-vanilla/mm/vmscan.c -=================================================================== ---- linux-2.4.24-vanilla.orig/mm/vmscan.c 2004-01-10 17:06:00.000000000 +0300 -+++ linux-2.4.24-vanilla/mm/vmscan.c 2004-01-10 17:21:00.000000000 +0300 -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -34,6 +35,39 @@ - */ - int vm_passes = 60; - -+static DECLARE_MUTEX(other_caches_sem); -+static LIST_HEAD(cache_definitions); -+ -+void register_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_add(&cache->link, &cache_definitions); -+ up(&other_caches_sem); -+} -+ -+void unregister_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_del(&cache->link); -+ up(&other_caches_sem); -+} -+ -+static void shrink_other_caches(unsigned int priority, int gfp_mask) -+{ -+ struct list_head *p; -+ -+ if (down_trylock(&other_caches_sem)) -+ return; -+ -+ list_for_each_prev(p, &cache_definitions) { -+ struct cache_definition *cache = -+ list_entry(p, struct cache_definition, link); -+ -+ cache->shrink(priority, gfp_mask); -+ } -+ up(&other_caches_sem); -+} -+ - /* - * "vm_cache_scan_ratio" is how much of the inactive LRU queue we will scan - * in one go. A value of 6 for vm_cache_scan_ratio implies that we'll -@@ -523,6 +557,7 @@ - #ifdef CONFIG_QUOTA - shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); - #endif -+ shrink_other_caches(vm_vfs_scan_ratio, gfp_mask); - - if (!*failed_swapout) - *failed_swapout = !swap_out(classzone); -@@ -645,6 +680,7 @@ - #ifdef CONFIG_QUOTA - shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); - #endif -+ shrink_other_caches(vm_vfs_scan_ratio, gfp_mask); - if (!failed_swapout) - failed_swapout = !swap_out(classzone); - } while (--tries); diff --git a/lustre/kernel_patches/patches/linux-2.4.29-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.29-xattr-0.8.54.patch deleted file mode 100644 index 8225ea3..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.29-xattr-0.8.54.patch +++ /dev/null @@ -1,5362 +0,0 @@ -Index: linux-2.4.29/Documentation/Configure.help -=================================================================== ---- linux-2.4.29.orig/Documentation/Configure.help 2005-04-07 18:55:00.000000000 +0300 -+++ linux-2.4.29/Documentation/Configure.help 2005-05-03 17:59:40.363127040 +0300 -@@ -16679,6 +16679,39 @@ - be compiled as a module, and so this could be dangerous. Most - everyone wants to say Y here. - -+Ext2 extended attributes -+CONFIG_EXT2_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext2 extended attribute block sharing -+CONFIG_EXT2_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext2 extended user attributes -+CONFIG_EXT2_FS_XATTR_USER -+ This option enables extended user attributes on ext2. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext2 trusted extended attributes -+CONFIG_EXT2_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext2 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Ext3 journalling file system support (EXPERIMENTAL) - CONFIG_EXT3_FS - This is the journalling version of the Second extended file system -@@ -16711,6 +16744,39 @@ - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this may be dangerous. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is -Index: linux-2.4.29/arch/alpha/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/alpha/defconfig 2005-04-07 18:53:42.000000000 +0300 -+++ linux-2.4.29/arch/alpha/defconfig 2005-05-03 17:59:40.365126736 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ALPHA=y - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set -Index: linux-2.4.29/arch/alpha/kernel/entry.S -=================================================================== ---- linux-2.4.29.orig/arch/alpha/kernel/entry.S 2005-04-07 18:52:17.000000000 +0300 -+++ linux-2.4.29/arch/alpha/kernel/entry.S 2005-05-03 17:59:40.367126432 +0300 -@@ -1154,6 +1154,18 @@ - .quad sys_readahead - .quad sys_ni_syscall /* 380, sys_security */ - .quad sys_tkill -+ .quad sys_setxattr -+ .quad sys_lsetxattr -+ .quad sys_fsetxattr -+ .quad sys_getxattr /* 385 */ -+ .quad sys_lgetxattr -+ .quad sys_fgetxattr -+ .quad sys_listxattr -+ .quad sys_llistxattr -+ .quad sys_flistxattr /* 390 */ -+ .quad sys_removexattr -+ .quad sys_lremovexattr -+ .quad sys_fremovexattr - - /* Remember to update everything, kids. */ - .ifne (. - sys_call_table) - (NR_SYSCALLS * 8) -Index: linux-2.4.29/arch/arm/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/arm/defconfig 2005-04-07 18:53:03.000000000 +0300 -+++ linux-2.4.29/arch/arm/defconfig 2005-05-03 17:59:40.369126128 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ARM=y - # CONFIG_EISA is not set - # CONFIG_SBUS is not set -Index: linux-2.4.29/arch/arm/kernel/calls.S -=================================================================== ---- linux-2.4.29.orig/arch/arm/kernel/calls.S 2005-04-07 18:55:23.000000000 +0300 -+++ linux-2.4.29/arch/arm/kernel/calls.S 2005-05-03 17:59:40.371125824 +0300 -@@ -240,18 +240,18 @@ - .long SYMBOL_NAME(sys_ni_syscall) /* Security */ - .long SYMBOL_NAME(sys_gettid) - /* 225 */ .long SYMBOL_NAME(sys_readahead) -- .long SYMBOL_NAME(sys_ni_syscall) /* setxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* lsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* fsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* getxattr */ --/* 230 */ .long SYMBOL_NAME(sys_ni_syscall) /* lgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* fgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* listxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* llistxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* flistxattr */ --/* 235 */ .long SYMBOL_NAME(sys_ni_syscall) /* removexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* lremovexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* fremovexattr */ -+ .long SYMBOL_NAME(sys_setxattr) -+ .long SYMBOL_NAME(sys_lsetxattr) -+ .long SYMBOL_NAME(sys_fsetxattr) -+ .long SYMBOL_NAME(sys_getxattr) -+/* 230 */ .long SYMBOL_NAME(sys_lgetxattr) -+ .long SYMBOL_NAME(sys_fgetxattr) -+ .long SYMBOL_NAME(sys_listxattr) -+ .long SYMBOL_NAME(sys_llistxattr) -+ .long SYMBOL_NAME(sys_flistxattr) -+/* 235 */ .long SYMBOL_NAME(sys_removexattr) -+ .long SYMBOL_NAME(sys_lremovexattr) -+ .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - .long SYMBOL_NAME(sys_ni_syscall) /* sendfile64 */ - /* 240 */ .long SYMBOL_NAME(sys_ni_syscall) /* futex */ -Index: linux-2.4.29/arch/i386/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/i386/defconfig 2005-04-07 18:52:37.000000000 +0300 -+++ linux-2.4.29/arch/i386/defconfig 2005-05-03 17:59:40.372125672 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_X86=y - # CONFIG_SBUS is not set - CONFIG_UID16=y -Index: linux-2.4.29/arch/ia64/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/ia64/defconfig 2005-04-07 18:52:32.000000000 +0300 -+++ linux-2.4.29/arch/ia64/defconfig 2005-05-03 17:59:40.374125368 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options -Index: linux-2.4.29/arch/m68k/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/m68k/defconfig 2005-04-07 18:52:26.000000000 +0300 -+++ linux-2.4.29/arch/m68k/defconfig 2005-05-03 17:59:40.375125216 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - - # -Index: linux-2.4.29/arch/mips/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/mips/defconfig 2005-04-07 18:52:42.000000000 +0300 -+++ linux-2.4.29/arch/mips/defconfig 2005-05-03 17:59:40.376125064 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - CONFIG_MIPS32=y - # CONFIG_MIPS64 is not set -Index: linux-2.4.29/arch/mips64/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/mips64/defconfig 2005-04-07 18:52:47.000000000 +0300 -+++ linux-2.4.29/arch/mips64/defconfig 2005-05-03 17:59:40.378124760 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - # CONFIG_MIPS32 is not set - CONFIG_MIPS64=y -Index: linux-2.4.29/arch/s390/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/s390/defconfig 2005-04-07 18:54:49.000000000 +0300 -+++ linux-2.4.29/arch/s390/defconfig 2005-05-03 17:59:40.379124608 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set -Index: linux-2.4.29/arch/s390/kernel/entry.S -=================================================================== ---- linux-2.4.29.orig/arch/s390/kernel/entry.S 2005-04-07 18:52:47.000000000 +0300 -+++ linux-2.4.29/arch/s390/kernel/entry.S 2005-05-03 17:59:40.381124304 +0300 -@@ -558,18 +558,18 @@ - .long sys_fcntl64 - .long sys_readahead - .long sys_ni_syscall -- .long sys_ni_syscall /* 224 - reserved for setxattr */ -- .long sys_ni_syscall /* 225 - reserved for lsetxattr */ -- .long sys_ni_syscall /* 226 - reserved for fsetxattr */ -- .long sys_ni_syscall /* 227 - reserved for getxattr */ -- .long sys_ni_syscall /* 228 - reserved for lgetxattr */ -- .long sys_ni_syscall /* 229 - reserved for fgetxattr */ -- .long sys_ni_syscall /* 230 - reserved for listxattr */ -- .long sys_ni_syscall /* 231 - reserved for llistxattr */ -- .long sys_ni_syscall /* 232 - reserved for flistxattr */ -- .long sys_ni_syscall /* 233 - reserved for removexattr */ -- .long sys_ni_syscall /* 234 - reserved for lremovexattr */ -- .long sys_ni_syscall /* 235 - reserved for fremovexattr */ -+ .long sys_setxattr -+ .long sys_lsetxattr /* 225 */ -+ .long sys_fsetxattr -+ .long sys_getxattr -+ .long sys_lgetxattr -+ .long sys_fgetxattr -+ .long sys_listxattr /* 230 */ -+ .long sys_llistxattr -+ .long sys_flistxattr -+ .long sys_removexattr -+ .long sys_lremovexattr -+ .long sys_fremovexattr /* 235 */ - .long sys_gettid - .long sys_tkill - .rept 255-237 -Index: linux-2.4.29/arch/s390x/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/s390x/defconfig 2005-04-07 18:52:17.000000000 +0300 -+++ linux-2.4.29/arch/s390x/defconfig 2005-05-03 17:59:40.382124152 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set -Index: linux-2.4.29/arch/s390x/kernel/entry.S -=================================================================== ---- linux-2.4.29.orig/arch/s390x/kernel/entry.S 2005-04-07 18:52:58.000000000 +0300 -+++ linux-2.4.29/arch/s390x/kernel/entry.S 2005-05-03 17:59:40.384123848 +0300 -@@ -591,18 +591,18 @@ - .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) - .long SYSCALL(sys_readahead,sys32_readahead) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 224 - reserved for setxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 225 - reserved for lsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 226 - reserved for fsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 227 - reserved for getxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 228 - reserved for lgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 229 - reserved for fgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 230 - reserved for listxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 231 - reserved for llistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 232 - reserved for flistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 233 - reserved for removexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 234 - reserved for lremovexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 235 - reserved for fremovexattr */ -+ .long SYSCALL(sys_setxattr,sys32_setxattr_wrapper) -+ .long SYSCALL(sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -+ .long SYSCALL(sys_fsetxattr,sys32_fsetxattr_wrapper) -+ .long SYSCALL(sys_getxattr,sys32_getxattr_wrapper) -+ .long SYSCALL(sys_lgetxattr,sys32_lgetxattr_wrapper) -+ .long SYSCALL(sys_fgetxattr,sys32_fgetxattr_wrapper) -+ .long SYSCALL(sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -+ .long SYSCALL(sys_llistxattr,sys32_llistxattr_wrapper) -+ .long SYSCALL(sys_flistxattr,sys32_flistxattr_wrapper) -+ .long SYSCALL(sys_removexattr,sys32_removexattr_wrapper) -+ .long SYSCALL(sys_lremovexattr,sys32_lremovexattr_wrapper) -+ .long SYSCALL(sys_fremovexattr,sys32_fremovexattr_wrapper)/* 235 */ - .long SYSCALL(sys_gettid,sys_gettid) - .long SYSCALL(sys_tkill,sys_tkill) - .rept 255-237 -Index: linux-2.4.29/arch/s390x/kernel/wrapper32.S -=================================================================== ---- linux-2.4.29.orig/arch/s390x/kernel/wrapper32.S 2005-04-07 18:55:12.000000000 +0300 -+++ linux-2.4.29/arch/s390x/kernel/wrapper32.S 2005-05-03 17:59:40.386123544 +0300 -@@ -1098,6 +1098,98 @@ - llgfr %r4,%r4 # long - jg sys32_fstat64 # branch to system call - -+ .globl sys32_setxattr_wrapper -+sys32_setxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_setxattr -+ -+ .globl sys32_lsetxattr_wrapper -+sys32_lsetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_lsetxattr -+ -+ .globl sys32_fsetxattr_wrapper -+sys32_fsetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_fsetxattr -+ -+ .globl sys32_getxattr_wrapper -+sys32_getxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_getxattr -+ -+ .globl sys32_lgetxattr_wrapper -+sys32_lgetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_lgetxattr -+ -+ .globl sys32_fgetxattr_wrapper -+sys32_fgetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_fgetxattr -+ -+ .globl sys32_listxattr_wrapper -+sys32_listxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_listxattr -+ -+ .globl sys32_llistxattr_wrapper -+sys32_llistxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_llistxattr -+ -+ .globl sys32_flistxattr_wrapper -+sys32_flistxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_flistxattr -+ -+ .globl sys32_removexattr_wrapper -+sys32_removexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_removexattr -+ -+ .globl sys32_lremovexattr_wrapper -+sys32_lremovexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_lremovexattr -+ -+ .globl sys32_fremovexattr_wrapper -+sys32_fremovexattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ jg sys_fremovexattr -+ -+ -+ - .globl sys32_stime_wrapper - sys32_stime_wrapper: - llgtr %r2,%r2 # int * -Index: linux-2.4.29/arch/sparc64/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/sparc64/defconfig 2005-04-07 18:53:09.000000000 +0300 -+++ linux-2.4.29/arch/sparc64/defconfig 2005-05-03 17:59:40.388123240 +0300 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+CONFIG_EXT3_FS_XATTR=y -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options -Index: linux-2.4.29/fs/Config.in -=================================================================== ---- linux-2.4.29.orig/fs/Config.in 2005-04-07 18:54:16.000000000 +0300 -+++ linux-2.4.29/fs/Config.in 2005-05-03 17:59:40.389123088 +0300 -@@ -29,6 +29,11 @@ - dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL - - tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS -+dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS -+dep_bool ' Ext3 extended attribute block sharing' \ -+ CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR -+dep_bool ' Ext3 extended user attributes' \ -+ CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR - # CONFIG_JBD could be its own option (even modular), but until there are - # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS - # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS -@@ -92,6 +97,11 @@ - tristate 'ROM file system support' CONFIG_ROMFS_FS - - tristate 'Second extended fs support' CONFIG_EXT2_FS -+dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS -+dep_bool ' Ext2 extended attribute block sharing' \ -+ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR -+dep_bool ' Ext2 extended user attributes' \ -+ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR - - tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS - -@@ -171,6 +181,10 @@ - define_tristate CONFIG_ZISOFS_FS n - fi - -+# Meta block cache for Extended Attributes (ext2/ext3) -+#tristate 'Meta block cache' CONFIG_FS_MBCACHE -+define_tristate CONFIG_FS_MBCACHE y -+ - mainmenu_option next_comment - comment 'Partition Types' - source fs/partitions/Config.in -Index: linux-2.4.29/fs/Makefile -=================================================================== ---- linux-2.4.29.orig/fs/Makefile 2005-05-03 17:23:53.969428480 +0300 -+++ linux-2.4.29/fs/Makefile 2005-05-03 17:59:40.390122936 +0300 -@@ -77,6 +77,9 @@ - - obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o - -+export-objs += mbcache.o -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o -+ - # persistent filesystems - obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) - -Index: linux-2.4.29/fs/ext2/Makefile -=================================================================== ---- linux-2.4.29.orig/fs/ext2/Makefile 2005-04-07 18:54:32.000000000 +0300 -+++ linux-2.4.29/fs/ext2/Makefile 2005-05-03 17:59:40.391122784 +0300 -@@ -13,4 +13,8 @@ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make -Index: linux-2.4.29/fs/ext2/file.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/file.c 2005-04-07 18:53:14.000000000 +0300 -+++ linux-2.4.29/fs/ext2/file.c 2005-05-03 17:59:40.392122632 +0300 -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - - /* -@@ -51,4 +52,8 @@ - - struct inode_operations ext2_file_inode_operations = { - truncate: ext2_truncate, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.29/fs/ext2/ialloc.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/ialloc.c 2005-04-07 18:53:47.000000000 +0300 -+++ linux-2.4.29/fs/ext2/ialloc.c 2005-05-03 17:59:40.393122480 +0300 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -167,6 +168,7 @@ - */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ -+ ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } -Index: linux-2.4.29/fs/ext2/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/inode.c 2005-04-07 18:52:32.000000000 +0300 -+++ linux-2.4.29/fs/ext2/inode.c 2005-05-03 17:59:40.396122024 +0300 -@@ -64,9 +64,7 @@ - { - lock_kernel(); - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - inode->u.ext2_i.i_dtime = CURRENT_TIME; - mark_inode_dirty(inode); -@@ -815,6 +813,8 @@ - return; - if (ext2_inode_is_fast_symlink(inode)) - return; -+ if (ext2_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -917,8 +917,7 @@ - unsigned long offset; - struct ext2_group_desc * gdp; - -- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && -- inode->i_ino != EXT2_ACL_DATA_INO && -+ if ((inode->i_ino != EXT2_ROOT_INO && - inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { - ext2_error (inode->i_sb, "ext2_read_inode", -@@ -1004,10 +1003,7 @@ - for (block = 0; block < EXT2_N_BLOCKS; block++) - inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; - -- if (inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; -@@ -1019,12 +1015,14 @@ - if (ext2_inode_is_fast_symlink(inode)) - inode->i_op = &ext2_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - } -- } else -+ } else { -+ inode->i_op = &ext2_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(raw_inode->i_block[0])); -+ } - brelse (bh); - inode->i_attr_flags = 0; - ext2_set_inode_flags(inode); -Index: linux-2.4.29/fs/ext2/namei.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/namei.c 2005-04-07 18:54:50.000000000 +0300 -+++ linux-2.4.29/fs/ext2/namei.c 2005-05-03 17:59:40.397121872 +0300 -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - - /* -@@ -136,7 +137,7 @@ - - if (l > sizeof (inode->u.ext2_i.i_data)) { - /* slow symlink */ -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - err = block_symlink(inode, symname, l); - if (err) -@@ -345,4 +346,15 @@ - rmdir: ext2_rmdir, - mknod: ext2_mknod, - rename: ext2_rename, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ -+struct inode_operations ext2_special_inode_operations = { -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.29/fs/ext2/super.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/super.c 2005-04-07 18:54:16.000000000 +0300 -+++ linux-2.4.29/fs/ext2/super.c 2005-05-03 17:59:40.400121416 +0300 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -125,6 +126,7 @@ - int db_count; - int i; - -+ ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - struct ext2_super_block *es = EXT2_SB(sb)->s_es; - -@@ -175,6 +177,13 @@ - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -446,6 +455,9 @@ - blocksize = BLOCK_SIZE; - - sb->u.ext2_sb.s_mount_opt = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ -+#endif - if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, - &sb->u.ext2_sb.s_mount_opt)) { - return NULL; -@@ -840,12 +852,27 @@ - - static int __init init_ext2_fs(void) - { -- return register_filesystem(&ext2_fs_type); -+ int error = init_ext2_xattr(); -+ if (error) -+ return error; -+ error = init_ext2_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext2_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext2_xattr_user(); -+fail: -+ exit_ext2_xattr(); -+ return error; - } - - static void __exit exit_ext2_fs(void) - { - unregister_filesystem(&ext2_fs_type); -+ exit_ext2_xattr_user(); -+ exit_ext2_xattr(); - } - - EXPORT_NO_SYMBOLS; -Index: linux-2.4.29/fs/ext2/symlink.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/symlink.c 2005-04-07 18:52:53.000000000 +0300 -+++ linux-2.4.29/fs/ext2/symlink.c 2005-05-03 17:59:40.400121416 +0300 -@@ -19,6 +19,7 @@ - - #include - #include -+#include - - static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -32,7 +33,20 @@ - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext2_symlink_inode_operations = { -+ readlink: page_readlink, -+ follow_link: page_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ - struct inode_operations ext2_fast_symlink_inode_operations = { - readlink: ext2_readlink, - follow_link: ext2_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; -Index: linux-2.4.29/fs/ext2/xattr.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/xattr.c 2005-05-03 17:59:40.233146800 +0300 -+++ linux-2.4.29/fs/ext2/xattr.c 2005-05-03 17:59:40.405120656 +0300 -@@ -0,0 +1,1212 @@ -+/* -+ * linux/fs/ext2/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT2_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext2_xattr_register); -+EXPORT_SYMBOL(ext2_xattr_unregister); -+EXPORT_SYMBOL(ext2_xattr_get); -+EXPORT_SYMBOL(ext2_xattr_list); -+EXPORT_SYMBOL(ext2_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT2_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext2_xattr_set2(struct inode *, struct buffer_head *, -+ struct ext2_xattr_header *); -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+static int ext2_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext2_xattr_cache_find(struct inode *, -+ struct ext2_xattr_header *); -+static void ext2_xattr_cache_remove(struct buffer_head *); -+static void ext2_xattr_rehash(struct ext2_xattr_header *, -+ struct ext2_xattr_entry *); -+ -+static struct mb_cache *ext2_xattr_cache; -+ -+#else -+# define ext2_xattr_cache_insert(bh) 0 -+# define ext2_xattr_cache_find(inode, header) NULL -+# define ext2_xattr_cache_remove(bh) while(0) {} -+# define ext2_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext2_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext2_xattr_sem); -+ -+static inline int -+ext2_xattr_new_block(struct inode *inode, int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + -+ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext2_new_block(inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext2_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext2_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext2_xattr_free_block(struct inode * inode, unsigned long block) -+{ -+ ext2_free_blocks(inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext2_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext2_xattr_free_block(inode, block) \ -+ ext2_free_blocks(inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; -+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ if (!ext2_xattr_handlers[name_index-1]) { -+ ext2_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext2_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) -+{ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ ext2_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext2_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static struct ext2_xattr_handler * -+ext2_xattr_resolve_name(const char **name) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext2_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext2_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext2_handler_lock); -+ return handler; -+} -+ -+static inline struct ext2_xattr_handler * -+ext2_xattr_handler(int name_index) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ read_lock(&ext2_handler_lock); -+ handler = ext2_xattr_handlers[name_index-1]; -+ read_unlock(&ext2_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext2_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext2_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT2_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT2_I(inode)->i_file_acl) -+ return 0; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext2_xattr_update_super_block(struct super_block *sb) -+{ -+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT2_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext2_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_header *header = NULL; -+ struct ext2_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT2_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext2_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(sb, "ext2_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext2_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT2_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT2_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT2_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext2_xattr_cache_remove(bh); -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT2_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT2_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext2_xattr_set2(inode, bh, NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT2_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT2_XATTR_PAD, 0, -+ EXT2_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext2_xattr_rehash(header, here); -+ -+ error = ext2_xattr_set2(inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext2_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext2_xattr_set(): Update the file system. -+ */ -+static int -+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, -+ struct ext2_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext2_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext2_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext2_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT2_I(inode)->i_file_acl != 0; -+ int block = ext2_xattr_new_block(inode, &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+ ext2_xattr_free_block(inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext2_xattr_cache_insert(new_bh); -+ -+ ext2_xattr_update_super_block(sb); -+ } -+ mark_buffer_dirty(new_bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &new_bh); -+ wait_on_buffer(new_bh); -+ error = -EIO; -+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ if (IS_SYNC(inode)) { -+ error = ext2_sync_inode (inode); -+ if (error) -+ goto cleanup; -+ } else -+ mark_inode_dirty(inode); -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext2_xattr_free_block(inode, old_bh->b_blocknr); -+ mark_buffer_clean(old_bh); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext2_xattr_quota_free(inode); -+ mark_buffer_dirty(old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT2_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext2_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext2_xattr_cache_remove(bh); -+ ext2_xattr_free_block(inode, block); -+ bforget(bh); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ mark_buffer_dirty(bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &bh); -+ wait_on_buffer(bh); -+ } -+ ext2_xattr_quota_free(inode); -+ } -+ EXT2_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext2_xattr_sem); -+} -+ -+/* -+ * ext2_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+/* -+ * ext2_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext2_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext2_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext2_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext2_xattr_cmp(struct ext2_xattr_header *header1, -+ struct ext2_xattr_header *header2) -+{ -+ struct ext2_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT2_XATTR_NEXT(entry1); -+ entry2 = EXT2_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext2_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT2_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT2_XATTR_REFCOUNT_MAX); -+ } else if (!ext2_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext2_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext2_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext2_xattr_rehash(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ struct ext2_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext2_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT2_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext2_xattr(void) -+{ -+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext2_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+ mb_cache_destroy(ext2_xattr_cache); -+} -+ -+#else /* CONFIG_EXT2_FS_XATTR_SHARING */ -+ -+int __init -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ -Index: linux-2.4.29/fs/ext2/xattr_user.c -=================================================================== ---- linux-2.4.29.orig/fs/ext2/xattr_user.c 2005-05-03 17:59:40.233146800 +0300 -+++ linux-2.4.29/fs/ext2/xattr_user.c 2005-05-03 17:59:40.407120352 +0300 -@@ -0,0 +1,103 @@ -+/* -+ * linux/fs/ext2/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext2_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext2_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext2_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct ext2_xattr_handler ext2_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext2_xattr_user_list, -+ get: ext2_xattr_user_get, -+ set: ext2_xattr_user_set, -+}; -+ -+int __init -+init_ext2_xattr_user(void) -+{ -+ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -+ -+void -+exit_ext2_xattr_user(void) -+{ -+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -Index: linux-2.4.29/fs/ext3/Makefile -=================================================================== ---- linux-2.4.29.orig/fs/ext3/Makefile 2005-05-03 17:23:54.093409632 +0300 -+++ linux-2.4.29/fs/ext3/Makefile 2005-05-03 17:59:40.408120200 +0300 -@@ -1,5 +1,5 @@ - # --# Makefile for the linux ext2-filesystem routines. -+# Makefile for the linux ext3-filesystem routines. - # - # Note! Dependencies are done automagically by 'make dep', which also - # removes any old dependencies. DON'T put your own dependencies here -@@ -9,10 +9,14 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make -Index: linux-2.4.29/fs/ext3/file.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/file.c 2005-05-03 17:23:54.091409936 +0300 -+++ linux-2.4.29/fs/ext3/file.c 2005-05-03 17:59:40.410119896 +0300 -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -127,5 +128,9 @@ - struct inode_operations ext3_file_inode_operations = { - truncate: ext3_truncate, /* BKL held */ - setattr: ext3_setattr, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; - -Index: linux-2.4.29/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/ialloc.c 2005-04-07 18:53:42.000000000 +0300 -+++ linux-2.4.29/fs/ext3/ialloc.c 2005-05-03 17:59:40.411119744 +0300 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - -Index: linux-2.4.29/fs/ext3/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/inode.c 2005-04-07 18:54:16.000000000 +0300 -+++ linux-2.4.29/fs/ext3/inode.c 2005-05-03 17:59:40.415119136 +0300 -@@ -60,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -191,9 +191,7 @@ - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1885,6 +1883,8 @@ - return; - if (ext3_inode_is_fast_symlink(inode)) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2032,8 +2032,6 @@ - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2174,10 +2172,7 @@ - inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; - INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2188,12 +2183,14 @@ - if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - } -- } else -+ } else { -+ inode->i_op = &ext3_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); -+ } - brelse(iloc.bh); - ext3_set_inode_flags(inode); - return; -Index: linux-2.4.29/fs/ext3/namei.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/namei.c 2005-05-03 17:23:54.101408416 +0300 -+++ linux-2.4.29/fs/ext3/namei.c 2005-05-03 17:59:40.419118528 +0300 -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1613,7 +1614,7 @@ - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1621,7 +1622,6 @@ - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1648,9 +1648,6 @@ - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { -@@ -2019,7 +2016,7 @@ - goto out_stop; - - if (l > sizeof (EXT3_I(inode)->i_data)) { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* - * block_symlink() calls back into ext3_prepare/commit_write. -@@ -2248,4 +2245,16 @@ - rmdir: ext3_rmdir, /* BKL held */ - mknod: ext3_mknod, /* BKL held */ - rename: ext3_rename, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -+ -+struct inode_operations ext3_special_inode_operations = { -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ -Index: linux-2.4.29/fs/ext3/super.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/super.c 2005-05-03 17:23:54.104407960 +0300 -+++ linux-2.4.29/fs/ext3/super.c 2005-05-03 18:00:16.805586944 +0300 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,7 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -504,6 +506,7 @@ - int is_remount) - { - unsigned long *mount_options = &sbi->s_mount_opt; -+ - uid_t *resuid = &sbi->s_resuid; - gid_t *resgid = &sbi->s_resgid; - char * this_char; -@@ -516,6 +519,13 @@ - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -954,6 +964,12 @@ - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; -+ -+ /* Default extended attribute flags */ -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ /* set_opt(sbi->s_mount_opt, XATTR_USER); */ -+#endif -+ - if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { - sb->s_dev = 0; - goto out_fail; -@@ -1838,22 +1854,35 @@ - - static int __init init_ext3_fs(void) - { -+ int error; - #ifdef CONFIG_QUOTA - init_dquot_operations(&ext3_qops); - old_write_dquot = ext3_qops.write_dquot; - ext3_qops.write_dquot = ext3_write_dquot; - #endif -- return register_filesystem(&ext3_fs_type); -+ error = init_ext3_xattr(); -+ if (error) -+ return error; -+ error = init_ext3_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_xattr_user(); -+fail: -+ exit_ext3_xattr(); -+ return error; - } - - static void __exit exit_ext3_fs(void) - { - unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); -Index: linux-2.4.29/fs/ext3/symlink.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/symlink.c 2005-04-07 18:53:53.000000000 +0300 -+++ linux-2.4.29/fs/ext3/symlink.c 2005-05-03 17:59:40.423117920 +0300 -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -33,7 +34,20 @@ - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext3_symlink_inode_operations = { -+ readlink: page_readlink, /* BKL not held. Don't need */ -+ follow_link: page_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ - struct inode_operations ext3_fast_symlink_inode_operations = { - readlink: ext3_readlink, /* BKL not held. Don't need */ - follow_link: ext3_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -Index: linux-2.4.29/fs/ext3/xattr.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/xattr.c 2005-05-03 17:59:40.234146648 +0300 -+++ linux-2.4.29/fs/ext3/xattr.c 2005-05-03 17:59:40.428117160 +0300 -@@ -0,0 +1,1225 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define EXT3_EA_USER "user." -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) while(0) {} -+# define ext3_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT3_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext3_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext3_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); -+ -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext3_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext3_xattr_sem); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ -Index: linux-2.4.29/fs/ext3/xattr_user.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/xattr_user.c 2005-05-03 17:59:40.234146648 +0300 -+++ linux-2.4.29/fs/ext3/xattr_user.c 2005-05-03 17:59:40.429117008 +0300 -@@ -0,0 +1,111 @@ -+/* -+ * linux/fs/ext3/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext3_xattr_user_list, -+ get: ext3_xattr_user_get, -+ set: ext3_xattr_user_set, -+}; -+ -+int __init -+init_ext3_xattr_user(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -+ -+void -+exit_ext3_xattr_user(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -Index: linux-2.4.29/fs/ext3/ext3-exports.c -=================================================================== ---- linux-2.4.29.orig/fs/ext3/ext3-exports.c 2005-05-03 17:59:40.234146648 +0300 -+++ linux-2.4.29/fs/ext3/ext3-exports.c 2005-05-03 18:00:08.195895816 +0300 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); -Index: linux-2.4.29/fs/jfs/jfs_xattr.h -=================================================================== ---- linux-2.4.29.orig/fs/jfs/jfs_xattr.h 2005-04-07 18:53:29.000000000 +0300 -+++ linux-2.4.29/fs/jfs/jfs_xattr.h 2005-05-03 17:59:40.431116704 +0300 -@@ -52,8 +52,10 @@ - #define END_EALIST(ealist) \ - ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) - --extern int __jfs_setxattr(struct inode *, const char *, void *, size_t, int); --extern int jfs_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, -+ int); -+extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, -+ int); - extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); - extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); - extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); -Index: linux-2.4.29/fs/jfs/xattr.c -=================================================================== ---- linux-2.4.29.orig/fs/jfs/xattr.c 2005-04-07 18:52:32.000000000 +0300 -+++ linux-2.4.29/fs/jfs/xattr.c 2005-05-03 17:59:40.433116400 +0300 -@@ -649,7 +649,7 @@ - } - - static int can_set_xattr(struct inode *inode, const char *name, -- void *value, size_t value_len) -+ const void *value, size_t value_len) - { - if (IS_RDONLY(inode)) - return -EROFS; -@@ -668,7 +668,7 @@ - return permission(inode, MAY_WRITE); - } - --int __jfs_setxattr(struct inode *inode, const char *name, void *value, -+int __jfs_setxattr(struct inode *inode, const char *name, const void *value, - size_t value_len, int flags) - { - struct jfs_ea_list *ealist; -@@ -807,7 +807,7 @@ - return rc; - } - --int jfs_setxattr(struct dentry *dentry, const char *name, void *value, -+int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t value_len, int flags) - { - if (value == NULL) { /* empty EA, do not remove */ -Index: linux-2.4.29/fs/mbcache.c -=================================================================== ---- linux-2.4.29.orig/fs/mbcache.c 2005-05-03 17:59:40.235146496 +0300 -+++ linux-2.4.29/fs/mbcache.c 2005-05-03 17:59:40.436115944 +0300 -@@ -0,0 +1,648 @@ -+/* -+ * linux/fs/mbcache.c -+ * (C) 2001-2002 Andreas Gruenbacher, -+ */ -+ -+/* -+ * Filesystem Meta Information Block Cache (mbcache) -+ * -+ * The mbcache caches blocks of block devices that need to be located -+ * by their device/block number, as well as by other criteria (such -+ * as the block's contents). -+ * -+ * There can only be one cache entry in a cache per device and block number. -+ * Additional indexes need not be unique in this sense. The number of -+ * additional indexes (=other criteria) can be hardwired at compile time -+ * or specified at cache create time. -+ * -+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' -+ * in the cache. A valid entry is in the main hash tables of the cache, -+ * and may also be in the lru list. An invalid entry is not in any hashes -+ * or lists. -+ * -+ * A valid cache entry is only in the lru list if no handles refer to it. -+ * Invalid cache entries will be freed when the last handle to the cache -+ * entry is released. Entries that cannot be freed immediately are put -+ * back on the lru list. -+ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+#ifdef MB_CACHE_DEBUG -+# define mb_debug(f...) do { \ -+ printk(KERN_DEBUG f); \ -+ printk("\n"); \ -+ } while (0) -+#define mb_assert(c) do { if (!(c)) \ -+ printk(KERN_ERR "assertion " #c " failed\n"); \ -+ } while(0) -+#else -+# define mb_debug(f...) do { } while(0) -+# define mb_assert(c) do { } while(0) -+#endif -+#define mb_error(f...) do { \ -+ printk(KERN_ERR f); \ -+ printk("\n"); \ -+ } while(0) -+ -+MODULE_AUTHOR("Andreas Gruenbacher "); -+MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+MODULE_LICENSE("GPL"); -+#endif -+ -+EXPORT_SYMBOL(mb_cache_create); -+EXPORT_SYMBOL(mb_cache_shrink); -+EXPORT_SYMBOL(mb_cache_destroy); -+EXPORT_SYMBOL(mb_cache_entry_alloc); -+EXPORT_SYMBOL(mb_cache_entry_insert); -+EXPORT_SYMBOL(mb_cache_entry_release); -+EXPORT_SYMBOL(mb_cache_entry_takeout); -+EXPORT_SYMBOL(mb_cache_entry_free); -+EXPORT_SYMBOL(mb_cache_entry_dup); -+EXPORT_SYMBOL(mb_cache_entry_get); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+EXPORT_SYMBOL(mb_cache_entry_find_first); -+EXPORT_SYMBOL(mb_cache_entry_find_next); -+#endif -+ -+ -+/* -+ * Global data: list of all mbcache's, lru list, and a spinlock for -+ * accessing cache data structures on SMP machines. The lru list is -+ * global across all mbcaches. -+ */ -+ -+static LIST_HEAD(mb_cache_list); -+static LIST_HEAD(mb_cache_lru_list); -+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; -+ -+static inline int -+mb_cache_indexes(struct mb_cache *cache) -+{ -+#ifdef MB_CACHE_INDEXES_COUNT -+ return MB_CACHE_INDEXES_COUNT; -+#else -+ return cache->c_indexes_count; -+#endif -+} -+ -+/* -+ * What the mbcache registers as to get shrunk dynamically. -+ */ -+ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask); -+ -+static struct cache_definition mb_cache_definition = { -+ "mb_cache", -+ mb_cache_memory_pressure -+}; -+ -+ -+static inline int -+__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) -+{ -+ return !list_empty(&ce->e_block_list); -+} -+ -+ -+static inline void -+__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+{ -+ int n; -+ -+ if (__mb_cache_entry_is_hashed(ce)) { -+ list_del_init(&ce->e_block_list); -+ for (n=0; ne_cache); n++) -+ list_del(&ce->e_indexes[n].o_list); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ -+ mb_assert(atomic_read(&ce->e_used) == 0); -+ if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { -+ /* free failed -- put back on the lru list -+ for freeing later. */ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&ce->e_lru_list, &mb_cache_lru_list); -+ spin_unlock(&mb_cache_spinlock); -+ } else { -+ kmem_cache_free(cache->c_entry_cache, ce); -+ atomic_dec(&cache->c_entry_count); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -+{ -+ if (atomic_dec_and_test(&ce->e_used)) { -+ if (__mb_cache_entry_is_hashed(ce)) -+ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ else { -+ spin_unlock(&mb_cache_spinlock); -+ __mb_cache_entry_forget(ce, GFP_KERNEL); -+ return; -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_memory_pressure() memory pressure callback -+ * -+ * This function is called by the kernel memory management when memory -+ * gets low. -+ * -+ * @priority: Amount by which to shrink the cache (0 = highes priority) -+ * @gfp_mask: (ignored) -+ */ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int count = 0; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &mb_cache_list) { -+ struct mb_cache *cache = -+ list_entry(l, struct mb_cache, c_cache_list); -+ mb_debug("cache %s (%d)", cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ count += atomic_read(&cache->c_entry_count); -+ } -+ mb_debug("trying to free %d of %d entries", -+ count / (priority ? priority : 1), count); -+ if (priority) -+ count /= priority; -+ while (count-- && !list_empty(&mb_cache_lru_list)) { -+ struct mb_cache_entry *ce = -+ list_entry(mb_cache_lru_list.next, -+ struct mb_cache_entry, e_lru_list); -+ list_del(&ce->e_lru_list); -+ __mb_cache_entry_unhash(ce); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), gfp_mask); -+ } -+} -+ -+ -+/* -+ * mb_cache_create() create a new cache -+ * -+ * All entries in one cache are equal size. Cache entries may be from -+ * multiple devices. If this is the first mbcache created, registers -+ * the cache with kernel memory management. Returns NULL if no more -+ * memory was available. -+ * -+ * @name: name of the cache (informal) -+ * @cache_op: contains the callback called when freeing a cache entry -+ * @entry_size: The size of a cache entry, including -+ * struct mb_cache_entry -+ * @indexes_count: number of additional indexes in the cache. Must equal -+ * MB_CACHE_INDEXES_COUNT if the number of indexes is -+ * hardwired. -+ * @bucket_count: number of hash buckets -+ */ -+struct mb_cache * -+mb_cache_create(const char *name, struct mb_cache_op *cache_op, -+ size_t entry_size, int indexes_count, int bucket_count) -+{ -+ int m=0, n; -+ struct mb_cache *cache = NULL; -+ -+ if(entry_size < sizeof(struct mb_cache_entry) + -+ indexes_count * sizeof(struct mb_cache_entry_index)) -+ return NULL; -+ -+ MOD_INC_USE_COUNT; -+ cache = kmalloc(sizeof(struct mb_cache) + -+ indexes_count * sizeof(struct list_head), GFP_KERNEL); -+ if (!cache) -+ goto fail; -+ cache->c_name = name; -+ cache->c_op.free = NULL; -+ if (cache_op) -+ cache->c_op.free = cache_op->free; -+ atomic_set(&cache->c_entry_count, 0); -+ cache->c_bucket_count = bucket_count; -+#ifdef MB_CACHE_INDEXES_COUNT -+ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -+#else -+ cache->c_indexes_count = indexes_count; -+#endif -+ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_block_hash) -+ goto fail; -+ for (n=0; nc_block_hash[n]); -+ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * -+ sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_indexes_hash[m]) -+ goto fail; -+ for (n=0; nc_indexes_hash[m][n]); -+ } -+ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, -+ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); -+ if (!cache->c_entry_cache) -+ goto fail; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&cache->c_cache_list, &mb_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ return cache; -+ -+fail: -+ if (cache) { -+ while (--m >= 0) -+ kfree(cache->c_indexes_hash[m]); -+ if (cache->c_block_hash) -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ } -+ MOD_DEC_USE_COUNT; -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_shrink() -+ * -+ * Removes all cache entires of a device from the cache. All cache entries -+ * currently in use cannot be freed, and thus remain in the cache. -+ * -+ * @cache: which cache to shrink -+ * @dev: which device's cache entries to shrink -+ */ -+void -+mb_cache_shrink(struct mb_cache *cache, kdev_t dev) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_dev == dev) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+} -+ -+ -+/* -+ * mb_cache_destroy() -+ * -+ * Shrinks the cache to its minimum possible size (hopefully 0 entries), -+ * and then destroys it. If this was the last mbcache, un-registers the -+ * mbcache from kernel memory management. -+ */ -+void -+mb_cache_destroy(struct mb_cache *cache) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_cache == cache) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ list_del(&cache->c_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+ -+ if (atomic_read(&cache->c_entry_count) > 0) { -+ mb_error("cache %s: %d orphaned entries", -+ cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ } -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) -+ /* We don't have kmem_cache_destroy() in 2.2.x */ -+ kmem_cache_shrink(cache->c_entry_cache); -+#else -+ kmem_cache_destroy(cache->c_entry_cache); -+#endif -+ for (n=0; n < mb_cache_indexes(cache); n++) -+ kfree(cache->c_indexes_hash[n]); -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+ -+/* -+ * mb_cache_entry_alloc() -+ * -+ * Allocates a new cache entry. The new entry will not be valid initially, -+ * and thus cannot be looked up yet. It should be filled with data, and -+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL -+ * if no more memory was available. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_alloc(struct mb_cache *cache) -+{ -+ struct mb_cache_entry *ce; -+ -+ atomic_inc(&cache->c_entry_count); -+ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); -+ if (ce) { -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_LIST_HEAD(&ce->e_block_list); -+ ce->e_cache = cache; -+ atomic_set(&ce->e_used, 1); -+ } -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_insert() -+ * -+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into -+ * the cache. After this, the cache entry can be looked up, but is not yet -+ * in the lru list as the caller still holds a handle to it. Returns 0 on -+ * success, or -EBUSY if a cache entry for that device + inode exists -+ * already (this may happen after a failed lookup, if another process has -+ * inserted the same cache entry in the meantime). -+ * -+ * @dev: device the cache entry belongs to -+ * @block: block number -+ * @keys: array of additional keys. There must be indexes_count entries -+ * in the array (as specified when creating the cache). -+ */ -+int -+mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev, -+ unsigned long block, unsigned int keys[]) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ int error = -EBUSY, n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) -+ goto out; -+ } -+ __mb_cache_entry_unhash(ce); -+ ce->e_dev = dev; -+ ce->e_block = block; -+ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ for (n=0; ne_indexes[n].o_key = keys[n]; -+ bucket = keys[n] % cache->c_bucket_count; -+ list_add(&ce->e_indexes[n].o_list, -+ &cache->c_indexes_hash[n][bucket]); -+ } -+out: -+ spin_unlock(&mb_cache_spinlock); -+ return error; -+} -+ -+ -+/* -+ * mb_cache_entry_release() -+ * -+ * Release a handle to a cache entry. When the last handle to a cache entry -+ * is released it is either freed (if it is invalid) or otherwise inserted -+ * in to the lru list. -+ */ -+void -+mb_cache_entry_release(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_takeout() -+ * -+ * Take a cache entry out of the cache, making it invalid. The entry can later -+ * be re-inserted using mb_cache_entry_insert(), or released using -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_takeout(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_entry_free() -+ * -+ * This is equivalent to the sequence mb_cache_entry_takeout() -- -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_free(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_dup() -+ * -+ * Duplicate a handle to a cache entry (does not duplicate the cache entry -+ * itself). After the call, both the old and the new handle must be released. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_dup(struct mb_cache_entry *ce) -+{ -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_get() -+ * -+ * Get a cache entry by device / block number. (There can only be one entry -+ * in the cache per device and block.) Returns NULL if no such cache entry -+ * exists. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block) -+{ -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ goto cleanup; -+ } -+ } -+ ce = NULL; -+ -+cleanup: -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+ -+static struct mb_cache_entry * -+__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+ int index, kdev_t dev, unsigned int key) -+{ -+ while (l != head) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, -+ e_indexes[index].o_list); -+ if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ return ce; -+ } -+ l = l->next; -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_entry_find_first() -+ * -+ * Find the first cache entry on a given device with a certain key in -+ * an additional index. Additonal matches can be found with -+ * mb_cache_entry_find_next(). Returns NULL if no match was found. -+ * -+ * @cache: the cache to search -+ * @index: the number of the additonal index to search (0<=indexc_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = cache->c_indexes_hash[index][bucket].next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_find_next() -+ * -+ * Find the next cache entry on a given device with a certain key in an -+ * additional index. Returns NULL if no match could be found. The previous -+ * entry is atomatically released, so that mb_cache_entry_find_next() can -+ * be called like this: -+ * -+ * entry = mb_cache_entry_find_first(); -+ * while (entry) { -+ * ... -+ * entry = mb_cache_entry_find_next(entry, ...); -+ * } -+ * -+ * @prev: The previous match -+ * @index: the number of the additonal index to search (0<=indexe_cache; -+ unsigned int bucket = key % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = prev->e_indexes[index].o_list.next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ __mb_cache_entry_release_unlock(prev); -+ return ce; -+} -+ -+#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ -+ -+static int __init init_mbcache(void) -+{ -+ register_cache(&mb_cache_definition); -+ return 0; -+} -+ -+static void __exit exit_mbcache(void) -+{ -+ unregister_cache(&mb_cache_definition); -+} -+ -+module_init(init_mbcache) -+module_exit(exit_mbcache) -+ -Index: linux-2.4.29/include/asm-arm/unistd.h -=================================================================== ---- linux-2.4.29.orig/include/asm-arm/unistd.h 2005-04-07 18:55:01.000000000 +0300 -+++ linux-2.4.29/include/asm-arm/unistd.h 2005-05-03 17:59:40.438115640 +0300 -@@ -250,7 +250,6 @@ - #define __NR_security (__NR_SYSCALL_BASE+223) - #define __NR_gettid (__NR_SYSCALL_BASE+224) - #define __NR_readahead (__NR_SYSCALL_BASE+225) --#if 0 /* allocated in 2.5 */ - #define __NR_setxattr (__NR_SYSCALL_BASE+226) - #define __NR_lsetxattr (__NR_SYSCALL_BASE+227) - #define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -@@ -263,7 +262,6 @@ - #define __NR_removexattr (__NR_SYSCALL_BASE+235) - #define __NR_lremovexattr (__NR_SYSCALL_BASE+236) - #define __NR_fremovexattr (__NR_SYSCALL_BASE+237) --#endif - #define __NR_tkill (__NR_SYSCALL_BASE+238) - #if 0 /* allocated in 2.5 */ - #define __NR_sendfile64 (__NR_SYSCALL_BASE+239) -Index: linux-2.4.29/include/asm-ppc64/unistd.h -=================================================================== ---- linux-2.4.29.orig/include/asm-ppc64/unistd.h 2005-04-07 18:52:47.000000000 +0300 -+++ linux-2.4.29/include/asm-ppc64/unistd.h 2005-05-03 17:59:40.439115488 +0300 -@@ -218,6 +218,7 @@ - #define __NR_mincore 206 - #define __NR_gettid 207 - #define __NR_tkill 208 -+#endif - #define __NR_setxattr 209 - #define __NR_lsetxattr 210 - #define __NR_fsetxattr 211 -@@ -230,6 +231,7 @@ - #define __NR_removexattr 218 - #define __NR_lremovexattr 219 - #define __NR_fremovexattr 220 -+#if 0 /* Reserved syscalls */ - #define __NR_futex 221 - #define __NR_sched_setaffinity 222 - #define __NR_sched_getaffinity 223 -Index: linux-2.4.29/include/asm-s390/unistd.h -=================================================================== ---- linux-2.4.29.orig/include/asm-s390/unistd.h 2005-04-07 18:55:23.000000000 +0300 -+++ linux-2.4.29/include/asm-s390/unistd.h 2005-05-03 17:59:40.440115336 +0300 -@@ -213,9 +213,18 @@ - #define __NR_getdents64 220 - #define __NR_fcntl64 221 - #define __NR_readahead 222 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - -Index: linux-2.4.29/include/asm-s390x/unistd.h -=================================================================== ---- linux-2.4.29.orig/include/asm-s390x/unistd.h 2005-04-07 18:54:22.000000000 +0300 -+++ linux-2.4.29/include/asm-s390x/unistd.h 2005-05-03 17:59:40.441115184 +0300 -@@ -181,9 +181,18 @@ - #define __NR_mincore 218 - #define __NR_madvise 219 - #define __NR_readahead 222 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - -Index: linux-2.4.29/include/linux/cache_def.h -=================================================================== ---- linux-2.4.29.orig/include/linux/cache_def.h 2005-05-03 17:59:40.235146496 +0300 -+++ linux-2.4.29/include/linux/cache_def.h 2005-05-03 17:59:40.442115032 +0300 -@@ -0,0 +1,15 @@ -+/* -+ * linux/cache_def.h -+ * Handling of caches defined in drivers, filesystems, ... -+ * -+ * Copyright (C) 2002 by Andreas Gruenbacher, -+ */ -+ -+struct cache_definition { -+ const char *name; -+ void (*shrink)(int, unsigned int); -+ struct list_head link; -+}; -+ -+extern void register_cache(struct cache_definition *); -+extern void unregister_cache(struct cache_definition *); -Index: linux-2.4.29/include/linux/errno.h -=================================================================== ---- linux-2.4.29.orig/include/linux/errno.h 2005-04-07 18:54:43.000000000 +0300 -+++ linux-2.4.29/include/linux/errno.h 2005-05-03 17:59:40.443114880 +0300 -@@ -23,4 +23,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif -Index: linux-2.4.29/include/linux/ext2_fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext2_fs.h 2005-04-07 18:52:37.000000000 +0300 -+++ linux-2.4.29/include/linux/ext2_fs.h 2005-05-03 17:59:40.445114576 +0300 -@@ -57,8 +57,6 @@ - */ - #define EXT2_BAD_INO 1 /* Bad blocks inode */ - #define EXT2_ROOT_INO 2 /* Root inode */ --#define EXT2_ACL_IDX_INO 3 /* ACL inode */ --#define EXT2_ACL_DATA_INO 4 /* ACL inode */ - #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ - -@@ -86,7 +84,6 @@ - #else - # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) - #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -121,28 +118,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext2_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext2_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext2_group_desc -@@ -314,6 +289,7 @@ - #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ - #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ - #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ -+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt - #define set_opt(o, opt) o |= EXT2_MOUNT_##opt -@@ -410,6 +386,7 @@ - - #ifdef __KERNEL__ - #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) -+#define EXT2_I(inode) (&((inode)->u.ext2_i)) - #else - /* Assume that user mode programs are passing in an ext2fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test -@@ -480,7 +457,7 @@ - #define EXT2_FEATURE_INCOMPAT_META_BG 0x0010 - #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff - --#define EXT2_FEATURE_COMPAT_SUPP 0 -+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \ - EXT2_FEATURE_INCOMPAT_META_BG) - #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -650,8 +627,10 @@ - - /* namei.c */ - extern struct inode_operations ext2_dir_inode_operations; -+extern struct inode_operations ext2_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext2_symlink_inode_operations; - extern struct inode_operations ext2_fast_symlink_inode_operations; - - #endif /* __KERNEL__ */ -Index: linux-2.4.29/include/linux/ext2_xattr.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext2_xattr.h 2005-05-03 17:59:40.236146344 +0300 -+++ linux-2.4.29/include/linux/ext2_xattr.h 2005-05-03 17:59:40.446114424 +0300 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext2_xattr.h -+ -+ On-disk format of extended attributes for the ext2 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT2_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT2_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT2_XATTR_INDEX_MAX 10 -+#define EXT2_XATTR_INDEX_USER 1 -+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext2_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext2_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT2_XATTR_PAD_BITS 2 -+#define EXT2_XATTR_PAD (1<e_name_len)) ) -+#define EXT2_XATTR_SIZE(size) \ -+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT2_FS_XATTR -+ -+struct ext2_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext2_xattr_register(int, struct ext2_xattr_handler *); -+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); -+ -+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); -+extern int ext2_removexattr(struct dentry *, const char *); -+ -+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext2_xattr_list(struct inode *, char *, size_t); -+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext2_xattr_delete_inode(struct inode *); -+extern void ext2_xattr_put_super(struct super_block *); -+ -+extern int init_ext2_xattr(void) __init; -+extern void exit_ext2_xattr(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR */ -+# define ext2_setxattr NULL -+# define ext2_getxattr NULL -+# define ext2_listxattr NULL -+# define ext2_removexattr NULL -+ -+static inline int -+ext2_xattr_get(struct inode *inode, int name_index, -+ const char *name, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+} -+ -+static inline void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR */ -+ -+# ifdef CONFIG_EXT2_FS_XATTR_USER -+ -+extern int init_ext2_xattr_user(void) __init; -+extern void exit_ext2_xattr_user(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+static inline int -+init_ext2_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr_user(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ -Index: linux-2.4.29/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_fs.h 2005-05-03 17:23:54.107407504 +0300 -+++ linux-2.4.29/include/linux/ext3_fs.h 2005-05-03 17:59:40.448114120 +0300 -@@ -63,8 +63,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -94,7 +92,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -129,28 +126,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -344,6 +319,7 @@ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -524,7 +500,7 @@ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ - EXT3_FEATURE_INCOMPAT_META_BG) -@@ -718,6 +694,7 @@ - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -@@ -787,8 +764,10 @@ - - /* namei.c */ - extern struct inode_operations ext3_dir_inode_operations; -+extern struct inode_operations ext3_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - - -Index: linux-2.4.29/include/linux/ext3_jbd.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_jbd.h 2005-05-03 17:23:54.109407200 +0300 -+++ linux-2.4.29/include/linux/ext3_jbd.h 2005-05-03 17:59:40.449113968 +0300 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - -Index: linux-2.4.29/include/linux/ext3_xattr.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ext3_xattr.h 2005-05-03 17:59:40.236146344 +0300 -+++ linux-2.4.29/include/linux/ext3_xattr.h 2005-05-03 17:59:40.451113664 +0300 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ -Index: linux-2.4.29/include/linux/fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/fs.h 2005-05-03 17:23:53.736463896 +0300 -+++ linux-2.4.29/include/linux/fs.h 2005-05-03 17:59:40.453113360 +0300 -@@ -915,7 +915,7 @@ - int (*setattr) (struct dentry *, struct iattr *); - int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); -- int (*setxattr) (struct dentry *, const char *, void *, size_t, int); -+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); -Index: linux-2.4.29/include/linux/mbcache.h -=================================================================== ---- linux-2.4.29.orig/include/linux/mbcache.h 2005-05-03 17:59:40.236146344 +0300 -+++ linux-2.4.29/include/linux/mbcache.h 2005-05-03 17:59:40.454113208 +0300 -@@ -0,0 +1,69 @@ -+/* -+ File: linux/mbcache.h -+ -+ (C) 2001 by Andreas Gruenbacher, -+*/ -+ -+/* Hardwire the number of additional indexes */ -+#define MB_CACHE_INDEXES_COUNT 1 -+ -+struct mb_cache_entry; -+ -+struct mb_cache_op { -+ int (*free)(struct mb_cache_entry *, int); -+}; -+ -+struct mb_cache { -+ struct list_head c_cache_list; -+ const char *c_name; -+ struct mb_cache_op c_op; -+ atomic_t c_entry_count; -+ int c_bucket_count; -+#ifndef MB_CACHE_INDEXES_COUNT -+ int c_indexes_count; -+#endif -+ kmem_cache_t *c_entry_cache; -+ struct list_head *c_block_hash; -+ struct list_head *c_indexes_hash[0]; -+}; -+ -+struct mb_cache_entry_index { -+ struct list_head o_list; -+ unsigned int o_key; -+}; -+ -+struct mb_cache_entry { -+ struct list_head e_lru_list; -+ struct mb_cache *e_cache; -+ atomic_t e_used; -+ kdev_t e_dev; -+ unsigned long e_block; -+ struct list_head e_block_list; -+ struct mb_cache_entry_index e_indexes[0]; -+}; -+ -+/* Functions on caches */ -+ -+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, -+ int, int); -+void mb_cache_shrink(struct mb_cache *, kdev_t); -+void mb_cache_destroy(struct mb_cache *); -+ -+/* Functions on cache entries */ -+ -+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); -+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long, -+ unsigned int[]); -+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); -+void mb_cache_entry_release(struct mb_cache_entry *); -+void mb_cache_entry_takeout(struct mb_cache_entry *); -+void mb_cache_entry_free(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t, -+ unsigned long); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, -+ kdev_t, unsigned int); -+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, -+ kdev_t, unsigned int); -+#endif -Index: linux-2.4.29/kernel/ksyms.c -=================================================================== ---- linux-2.4.29.orig/kernel/ksyms.c 2005-04-07 19:14:06.000000000 +0300 -+++ linux-2.4.29/kernel/ksyms.c 2005-05-03 17:59:40.456112904 +0300 -@@ -11,6 +11,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -92,6 +93,7 @@ - EXPORT_SYMBOL(exit_files); - EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); -+EXPORT_SYMBOL(copy_fs_struct); - - /* internal kernel memory management */ - EXPORT_SYMBOL(_alloc_pages); -@@ -109,6 +111,8 @@ - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_size); -+EXPORT_SYMBOL(register_cache); -+EXPORT_SYMBOL(unregister_cache); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); - EXPORT_SYMBOL(vfree); -Index: linux-2.4.29/mm/vmscan.c -=================================================================== ---- linux-2.4.29.orig/mm/vmscan.c 2005-04-07 18:52:37.000000000 +0300 -+++ linux-2.4.29/mm/vmscan.c 2005-05-03 17:59:40.458112600 +0300 -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -34,6 +35,39 @@ - */ - int vm_passes = 60; - -+static DECLARE_MUTEX(other_caches_sem); -+static LIST_HEAD(cache_definitions); -+ -+void register_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_add(&cache->link, &cache_definitions); -+ up(&other_caches_sem); -+} -+ -+void unregister_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_del(&cache->link); -+ up(&other_caches_sem); -+} -+ -+static void shrink_other_caches(unsigned int priority, int gfp_mask) -+{ -+ struct list_head *p; -+ -+ if (down_trylock(&other_caches_sem)) -+ return; -+ -+ list_for_each_prev(p, &cache_definitions) { -+ struct cache_definition *cache = -+ list_entry(p, struct cache_definition, link); -+ -+ cache->shrink(priority, gfp_mask); -+ } -+ up(&other_caches_sem); -+} -+ - /* - * "vm_cache_scan_ratio" is how much of the inactive LRU queue we will scan - * in one go. A value of 6 for vm_cache_scan_ratio implies that we'll -@@ -544,6 +578,7 @@ - #ifdef CONFIG_QUOTA - shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); - #endif -+ shrink_other_caches(vm_vfs_scan_ratio, gfp_mask); - - if (!*failed_swapout) - *failed_swapout = !swap_out(classzone); -@@ -666,6 +701,7 @@ - #ifdef CONFIG_QUOTA - shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); - #endif -+ shrink_other_caches(vm_vfs_scan_ratio, gfp_mask); - if (!failed_swapout) - failed_swapout = !swap_out(classzone); - } while (--tries); diff --git a/lustre/kernel_patches/patches/linux-2.6.9-network_driver-for-sk98.patch b/lustre/kernel_patches/patches/linux-2.6.9-network_driver-for-sk98.patch new file mode 100644 index 0000000..64439f9 --- /dev/null +++ b/lustre/kernel_patches/patches/linux-2.6.9-network_driver-for-sk98.patch @@ -0,0 +1,39833 @@ +diff -ruN linux-2.6.9.old/Documentation/networking/sk98lin.txt linux-2.6.9.new/Documentation/networking/sk98lin.txt +--- linux-2.6.9.old/Documentation/networking/sk98lin.txt 2004-10-19 05:54:38.000000000 +0800 ++++ linux-2.6.9.new/Documentation/networking/sk98lin.txt 2006-12-07 14:35:03.000000000 +0800 +@@ -1,38 +1,56 @@ +-(C)Copyright 1999-2004 Marvell(R). +-All rights reserved +-=========================================================================== ++(C)Copyright 1999-2005 Marvell(R). ++All rights reserved. ++================================================================================ + +-sk98lin.txt created 13-Feb-2004 ++sk98lin.txt created 20-Jun-2005 + +-Readme File for sk98lin v6.23 +-Marvell Yukon/SysKonnect SK-98xx Gigabit Ethernet Adapter family driver for LINUX ++Readme File for sk98lin v8.23.1.3 ++Marvell Yukon/SysKonnect SK-98xx Gigabit Ethernet Adapter driver for LINUX + + This file contains + 1 Overview +- 2 Required Files +- 3 Installation +- 3.1 Driver Installation +- 3.2 Inclusion of adapter at system start +- 4 Driver Parameters +- 4.1 Per-Port Parameters +- 4.2 Adapter Parameters +- 5 Large Frame Support +- 6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad) +- 7 Troubleshooting ++ 2 Supported Functions ++ 3 Required Files ++ 4 Installation ++ 4.1 Driver Installation ++ 4.2 Inclusion of adapter at system start ++ 5 Driver Parameters ++ 5.1 Per-Port Parameters ++ 5.2 Adapter Parameters ++ 6 Ethtool Support ++ 7 Large Frame Support ++ 8 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad) ++ 9 Wake on Lan support ++10 Troubleshooting + +-=========================================================================== ++================================================================================ + + + 1 Overview + =========== + +-The sk98lin driver supports the Marvell Yukon and SysKonnect +-SK-98xx/SK-95xx compliant Gigabit Ethernet Adapter on Linux. It has +-been tested with Linux on Intel/x86 machines. ++The sk98lin driver supports the Marvell Yukon, Yukon EC/FE, Yukon 2 ++and SysKonnect SK-98xx/SK-95xx compliant Gigabit Ethernet Adapter on Linux. ++It has been tested with Linux on Intel/x86, x86_64 and IA64 machines. + *** + ++2 Supported Functions ++====================== ++ ++The following functions are supported by the driver: + +-2 Required Files ++ NOTE 1: The hardware support depends on the used card ++ ++ - RX/TX HW Checksum ++ - Hardware interrupt moderation (static/dynamic) ++ - Transmit poll ++ - Zerocopy/Scatter-Gather ++ - Ethtool support ++ - Wake on Lan (Magic Packet only) (From suspend and APM only) ++ - DualNet ++ ++ ++3 Required Files + ================= + + The linux kernel source. +@@ -40,16 +58,14 @@ + *** + + +-3 Installation ++4 Installation + =============== + + It is recommended to download the latest version of the driver from the +-SysKonnect web site www.syskonnect.com. If you have downloaded the latest +-driver, the Linux kernel has to be patched before the driver can be +-installed. For details on how to patch a Linux kernel, refer to the +-patch.txt file. ++SysKonnect web site www.syskonnect.com. For details on Installation ++Instructions for sk98lin Driver, please refer to the README.txt file. + +-3.1 Driver Installation ++4.1 Driver Installation + ------------------------ + + The following steps describe the actions that are required to install +@@ -110,13 +126,13 @@ + + NOTE 1: If you have more than one Marvell Yukon or SysKonnect SK-98xx + adapter installed, the adapters will be listed as 'eth0', +- 'eth1', 'eth2', etc. +- For each adapter, repeat steps 3 and 4 below. ++ 'eth1', 'eth2', etc. ++ For each adapter, repeat steps 3 and 4 below. + + NOTE 2: If you have other Ethernet adapters installed, your Marvell + Yukon or SysKonnect SK-98xx adapter will be mapped to the +- next available number, e.g. 'eth1'. The mapping is executed +- automatically. ++ next available number, e.g. 'eth1'. The mapping is executed ++ automatically. + The module installation message (displayed either in a system + log file or on the console) prints a line for each adapter + found containing the corresponding 'ethX'. +@@ -153,7 +169,7 @@ + 1. Execute the command "ifconfig eth0 down". + 2. Execute the command "rmmod sk98lin". + +-3.2 Inclusion of adapter at system start ++4.2 Inclusion of adapter at system start + ----------------------------------------- + + Since a large number of different Linux distributions are +@@ -165,7 +181,8 @@ + + *** + +-4 Driver Parameters ++ ++5 Driver Parameters + ==================== + + Parameters can be set at the command line after the module has been +@@ -174,7 +191,7 @@ + to the driver module. + + If you use the kernel module loader, you can set driver parameters +-in the file /etc/modprobe.conf (or /etc/modules.conf in 2.4 or earlier). ++in the file /etc/modules.conf (or old name: /etc/conf.modules). + To set the driver parameters in this file, proceed as follows: + + 1. Insert a line of the form : +@@ -208,7 +225,7 @@ + more adapters, adjust this and recompile. + + +-4.1 Per-Port Parameters ++5.1 Per-Port Parameters + ------------------------ + + These settings are available for each port on the adapter. +@@ -282,7 +299,7 @@ + with this parameter. + + +-4.2 Adapter Parameters ++5.2 Adapter Parameters + ----------------------- + + Connection Type (SK-98xx V2.0 copper adapters only) +@@ -379,7 +396,6 @@ + is tremendous. On the other hand, selecting a very short moderation time might + compensate the use of any moderation being applied. + +- + Preferred Port + -------------- + Parameter: PrefPort +@@ -394,7 +410,7 @@ + ------------------------------------------------ + Parameter: RlmtMode + Values: CheckLinkState,CheckLocalPort, CheckSeg, DualNet +-Default: CheckLinkState ++Default: CheckLinkState (DualNet on dual port adapters) + + RLMT monitors the status of the port. If the link of the active port + fails, RLMT switches immediately to the standby link. The virtual link is +@@ -429,10 +445,94 @@ + where a network path between the ports on one adapter exists. + Moreover, they are not designed to work where adapters are connected + back-to-back. ++ ++LowLatency ++---------- ++Parameter: LowLatency ++Values: On, Off ++Default: Off ++ ++This is used to reduce the packet latency time of the adapter. Setting the ++LowLatency parameter to 'On' forces the adapter to pass any received packet ++immediately to upper network layers and to send out any transmit packet as ++fast as possible. ++ ++NOTE 1: The system load increases if LowLatency is set to 'On' and a lot ++ of data packets are transmitted and received. ++ ++NOTE 2: This parameter is only used on adapters which are based on ++ PCI Express compatible chipsets. + *** + + +-5 Large Frame Support ++6 Ethtool Support ++================== ++ ++The sk98lin driver provides built-in ethtool support. The ethtool ++can be used to display or modify interface specific configurations. ++ ++Ethtool commands are invoked using a single parameter which reflects ++the requested ethtool command plus an optional number of parameters ++which belong to the desired command. ++ ++It is not the intention of this section to explain the ethtool command ++line tool and all its options. For further information refer to the ++manpage of the ethtool. This sections describes only the sk98lin ++driver supported ethtool commands. ++ ++Pause Parameters ++---------------- ++Query command: -a ++Set command: -A [autoneg on|off] [rx on|off] [tx on|off] ++Sample: ethtool -A eth0 rx off tx off ++ ++Coalescing Parameters ++--------------------- ++Query command: -c ++Set command: -C [sample-interval I] ++ [rx-usecs N] [tx-usecs N] ++ [rx-usecs-low N] [tx-usecs-low N] ++ [rx-usecs-high N] [tx-usecs-high N] ++Parameter: I = Length of sample interval, in seconds ++ (supported values range from 1...10) ++ N = Length of coalescing interval, in microseconds ++ (supported values range from 25...33,333) ++Sample: ethtool -C eth2 rx-usecs 500 tx-usecs 500 ++ ++NOTE: The sk98lin driver does not support different settings ++ for the rx and tx interrupt coalescing parameters. ++ ++Driver Information ++------------------ ++Query command: -i ++Sample: ethtool -i eth1 ++ ++Checksumming Parameters ++----------------------- ++Query command: -k ++Set command: -K [rx on|off] [tx on|off] [sg on|off] ++Sample: ethtool -K eth0 sg off ++ ++Locate NIC Command ++------------------ ++Query command: -p [N] ++Parameter: N = Amount of time to perform locate NIC command, in seconds ++Sample: ethtool -p 10 eth1 ++ ++Driver-specific Statistics ++-------------------------- ++Query command: -S ++Sample: ethtool -S eth0 ++ ++Setting Parameters ++------------------ ++Set command: -s [speed 10|100|1000] [duplex half|full] ++ [autoneg on|off] [wol gd] ++Sample: ethtool -s eth2 wol d ++*** ++ ++ ++7 Large Frame Support + ====================== + + The driver supports large frames (also called jumbo frames). Using large +@@ -444,10 +544,10 @@ + ifconfig eth0 mtu 9000 + This will only work if you have two adapters connected back-to-back + or if you use a switch that supports large frames. When using a switch, +-it should be configured to allow large frames and auto-negotiation should +-be set to OFF. The setting must be configured on all adapters that can be +-reached by the large frames. If one adapter is not set to receive large +-frames, it will simply drop them. ++it should be configured to allow large frames. The setting must be ++configured on all adapters that can be reached by the large frames. ++If one adapter is not set to receive large frames, it will simply drop ++them. + + You can switch back to the standard ethernet frame size by executing the + following command: +@@ -459,7 +559,7 @@ + *** + + +-6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad) ++8 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad) + ================================================================== + + The Marvell Yukon/SysKonnect Linux drivers are able to support VLAN and +@@ -477,8 +577,21 @@ + cause problems when unloading the driver. + + +-7 Troubleshooting +-================== ++9 Wake on Lan support ++====================== ++ ++The sk98lin driver supports wake up from suspend mode with MagicPacket ++on APM systems. Wake on Lan support is enabled by default. To disable it ++please use the ethtool. ++ ++NOTE 1: APM support has to be enabled in BIOS and in the kernel. ++ ++NOTE 2: Refer to the kernel documentation for additional requirements ++ regarding APM support. ++ ++ ++10 Troubleshooting ++=================== + + If any problems occur during the installation process, check the + following list: +diff -ruN linux-2.6.9.old/drivers/net/Kconfig linux-2.6.9.new/drivers/net/Kconfig +--- linux-2.6.9.old/drivers/net/Kconfig 2006-12-07 14:37:37.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/Kconfig 2006-12-07 14:45:12.000000000 +0800 +@@ -2071,6 +2071,7 @@ + To compile this driver as a module, choose M here: the module + will be called sky2. This is recommended. + ++ + config SK98LIN + tristate "Marvell Yukon Chipset / SysKonnect SK-98xx Support" + depends on PCI +@@ -2080,6 +2081,22 @@ + by this driver: + - 3Com 3C940 Gigabit LOM Ethernet Adapter + - 3Com 3C941 Gigabit LOM Ethernet Adapter ++ - 88E8021 Marvell 1000 Mbit PCI-X, single Port Copper ++ - 88E8021 Marvell 1000 Mbit PCI-X, single Port Fiber LX ++ - 88E8021 Marvell 1000 Mbit PCI-X, single Port Fiber SX ++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Copper ++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Copper (Gateway) ++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Fiber LX ++ - 88E8022 Marvell 1000 Mbit PCI-X, dual Port Fiber SX ++ - 88E8061 Marvell 1000 Mbit PCI-E, single Port Copper ++ - 88E8061 Marvell 1000 Mbit PCI-E, single Port Fiber LX ++ - 88E8061 Marvell 1000 Mbit PCI-E, single Port Fiber SX ++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Copper ++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Copper (Gateway) ++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Fiber LX ++ - 88E8062 Marvell 1000 Mbit PCI-E, dual Port Fiber SX ++ - Abocom EFE3K - 10/100 Ethernet Expresscard ++ - Abocom EGE5K - Giga Ethernet Expresscard + - Allied Telesyn AT-2970LX Gigabit Ethernet Adapter + - Allied Telesyn AT-2970LX/2SC Gigabit Ethernet Adapter + - Allied Telesyn AT-2970SX Gigabit Ethernet Adapter +@@ -2090,31 +2107,79 @@ + - Allied Telesyn AT-2971T Gigabit Ethernet Adapter + - Belkin Gigabit Desktop Card 10/100/1000Base-T Adapter, Copper RJ-45 + - DGE-530T Gigabit Ethernet Adapter ++ - DGE-560T Gigabit Ethernet Adapter + - EG1032 v2 Instant Gigabit Network Adapter + - EG1064 v2 Instant Gigabit Network Adapter +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Abit) +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Albatron) +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Asus) +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (ECS) +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Epox) +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Foxconn) +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Gigabyte) +- - Marvell 88E8001 Gigabit LOM Ethernet Adapter (Iwill) +- - Marvell 88E8050 Gigabit LOM Ethernet Adapter (Intel) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Abit) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Albatron) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Asus) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Chaintech) ++ - Marvell 88E8001 Gigabit Ethernet Controller (ECS) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Epox) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Foxconn) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Gigabyte) ++ - Marvell 88E8001 Gigabit Ethernet Controller (Iwill) ++ - Marvell 88E8035 Fast Ethernet Controller (LGE) ++ - Marvell 88E8035 Fast Ethernet Controller (Toshiba) ++ - Marvell 88E8036 Fast Ethernet Controller (Arima) ++ - Marvell 88E8036 Fast Ethernet Controller (Compal) ++ - Marvell 88E8036 Fast Ethernet Controller (Inventec) ++ - Marvell 88E8036 Fast Ethernet Controller (LGE) ++ - Marvell 88E8036 Fast Ethernet Controller (Mitac) ++ - Marvell 88E8036 Fast Ethernet Controller (Panasonic) ++ - Marvell 88E8036 Fast Ethernet Controller (Quanta) ++ - Marvell 88E8036 Fast Ethernet Controller (Toshiba) ++ - Marvell 88E8036 Fast Ethernet Controller (Wistron) ++ - Marvell 88E8050 Gigabit Ethernet Controller (Gateway) ++ - Marvell 88E8050 Gigabit Ethernet Controller (Intel) ++ - Marvell 88E8052 Gigabit Ethernet Controller (ASRock) ++ - Marvell 88E8052 Gigabit Ethernet Controller (Aopen) ++ - Marvell 88E8052 Gigabit Ethernet Controller (Asus) ++ - Marvell 88E8052 Gigabit Ethernet Controller (Gateway) ++ - Marvell 88E8052 Gigabit Ethernet Controller (Gigabyte) ++ - Marvell 88E8052 Gigabit Ethernet Controller (MSI) ++ - Marvell 88E8052 Gigabit Ethernet Controller (Wistron) ++ - Marvell 88E8053 Gigabit Ethernet Controller (ASRock) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Albatron) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Aopen) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Arima) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Asus) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Chaintech) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Clevo) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Compal) ++ - Marvell 88E8053 Gigabit Ethernet Controller (DFI) ++ - Marvell 88E8053 Gigabit Ethernet Controller (ECS) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Epox) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Gigabyte) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Inventec) ++ - Marvell 88E8053 Gigabit Ethernet Controller (LGE) ++ - Marvell 88E8053 Gigabit Ethernet Controller (MSI) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Mitac) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Panasonic) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Quanta) ++ - Marvell 88E8053 Gigabit Ethernet Controller (SOYO) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Shuttle) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Toshiba) ++ - Marvell 88E8053 Gigabit Ethernet Controller (Trigem) ++ - Marvell RDK-8001 + - Marvell RDK-8001 Adapter + - Marvell RDK-8002 Adapter ++ - Marvell RDK-8003 + - Marvell RDK-8003 Adapter + - Marvell RDK-8004 Adapter + - Marvell RDK-8006 Adapter + - Marvell RDK-8007 Adapter + - Marvell RDK-8008 Adapter + - Marvell RDK-8009 Adapter +- - Marvell RDK-8010 Adapter ++ - Marvell RDK-8010 + - Marvell RDK-8011 Adapter + - Marvell RDK-8012 Adapter +- - Marvell RDK-8052 Adapter +- - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Adapter (32 bit) +- - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Adapter (64 bit) ++ - Marvell RDK-8035 ++ - Marvell RDK-8036 ++ - Marvell RDK-8052 ++ - Marvell RDK-8053 ++ - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Controller (32 bit) ++ - Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Controller (64 bit) + - N-Way PCI-Bus Giga-Card 1000/100/10Mbps(L) + - SK-9521 10/100/1000Base-T Adapter + - SK-9521 V2.0 10/100/1000Base-T Adapter +@@ -2134,6 +2199,14 @@ + - SK-9871 Gigabit Ethernet Server Adapter (SK-NET GE-ZX) + - SK-9871 V2.0 Gigabit Ethernet 1000Base-ZX Adapter + - SK-9872 Gigabit Ethernet Server Adapter (SK-NET GE-ZX dual link) ++ - SK-9S21 Server Adapter ++ - SK-9S22 Server Adapter ++ - SK-9S24 Server Adapter ++ - SK-9S34 Server Adapter ++ - SK-9S81 Server Adapter ++ - SK-9S82 Server Adapter ++ - SK-9S91 Server Adapter ++ - SK-9S92 Server Adapter + - SMC EZ Card 1000 (SMC9452TXV.2) + + The adapters support Jumbo Frames. +@@ -2147,8 +2220,9 @@ + + If you want to compile this driver as a module ( = code which can be + inserted in and removed from the running kernel whenever you want), +- say M here and read Documentation/kbuild/modules.txt. The module will +- be called sk98lin. This is recommended. ++ say M here and read Documentation/modules.txt. This is recommended. ++ The module will be called sk98lin. This is recommended. ++ + + config VIA_VELOCITY + tristate "VIA Velocity support" +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/lm80.h linux-2.6.9.new/drivers/net/sk98lin/h/lm80.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/lm80.h 2004-10-19 05:55:06.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/lm80.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: lm80.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.6 $ +- * Date: $Date: 2003/05/13 17:26:52 $ ++ * Version: $Revision: 2.1 $ ++ * Date: $Date: 2003/10/27 14:16:08 $ + * Purpose: Contains all defines for the LM80 Chip + * (National Semiconductor). + * +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skaddr.h linux-2.6.9.new/drivers/net/sk98lin/h/skaddr.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skaddr.h 2004-10-19 05:54:30.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skaddr.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skaddr.h + * Project: Gigabit Ethernet Adapters, ADDR-Modul +- * Version: $Revision: 1.29 $ +- * Date: $Date: 2003/05/13 16:57:24 $ ++ * Version: $Revision: 2.1 $ ++ * Date: $Date: 2003/10/27 14:16:07 $ + * Purpose: Header file for Address Management (MC, UC, Prom). + * + ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skcsum.h linux-2.6.9.new/drivers/net/sk98lin/h/skcsum.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skcsum.h 2004-10-19 05:53:10.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skcsum.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skcsum.h + * Project: GEnesis - SysKonnect SK-NET Gigabit Ethernet (SK-98xx) +- * Version: $Revision: 1.10 $ +- * Date: $Date: 2003/08/20 13:59:57 $ ++ * Version: $Revision: 2.2 $ ++ * Date: $Date: 2003/12/29 15:37:26 $ + * Purpose: Store/verify Internet checksum in send/receive packets. + * + ******************************************************************************/ +@@ -157,9 +157,7 @@ + typedef struct s_Csum { + /* Enabled receive SK_PROTO_XXX bit flags. */ + unsigned ReceiveFlags[SK_MAX_NETS]; +-#ifdef TX_CSUM + unsigned TransmitFlags[SK_MAX_NETS]; +-#endif /* TX_CSUM */ + + /* The protocol statistics structure; one per supported protocol. */ + SKCS_PROTO_STATS ProtoStats[SK_MAX_NETS][SKCS_NUM_PROTOCOLS]; +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skdebug.h linux-2.6.9.new/drivers/net/sk98lin/h/skdebug.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skdebug.h 2004-10-19 05:54:26.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skdebug.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skdebug.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.14 $ +- * Date: $Date: 2003/05/13 17:26:00 $ ++ * Version: $Revision: 2.3 $ ++ * Date: $Date: 2005/01/25 16:44:28 $ + * Purpose: SK specific DEBUG support + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -28,9 +27,9 @@ + #ifdef DEBUG + #ifndef SK_DBG_MSG + #define SK_DBG_MSG(pAC,comp,cat,arg) \ +- if ( ((comp) & SK_DBG_CHKMOD(pAC)) && \ +- ((cat) & SK_DBG_CHKCAT(pAC)) ) { \ +- SK_DBG_PRINTF arg ; \ ++ if ( ((comp) & SK_DBG_CHKMOD(pAC)) && \ ++ ((cat) & SK_DBG_CHKCAT(pAC)) ) { \ ++ SK_DBG_PRINTF arg; \ + } + #endif + #else +@@ -58,6 +57,13 @@ + #define SK_DBGMOD_ADDR 0x00000080L /* ADDR module */ + #define SK_DBGMOD_PECP 0x00000100L /* PECP module */ + #define SK_DBGMOD_POWM 0x00000200L /* Power Management module */ ++#ifdef SK_ASF ++#define SK_DBGMOD_ASF 0x00000400L /* ASF module */ ++#endif ++#ifdef SK_LBFO ++#define SK_DBGMOD_LACP 0x00000800L /* link aggregation control protocol */ ++#define SK_DBGMOD_FD 0x00001000L /* frame distributor (link aggregation) */ ++#endif /* SK_LBFO */ + + /* Debug events */ + +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skdrv1st.h linux-2.6.9.new/drivers/net/sk98lin/h/skdrv1st.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skdrv1st.h 2004-10-19 05:53:06.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skdrv1st.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skdrv1st.h + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.4 $ +- * Date: $Date: 2003/11/12 14:28:14 $ ++ * Version: $Revision: 1.5.2.5 $ ++ * Date: $Date: 2005/04/11 09:00:53 $ + * Purpose: First header file for driver and all other modules + * + ******************************************************************************/ +@@ -11,7 +11,7 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -22,20 +22,6 @@ + * + ******************************************************************************/ + +-/****************************************************************************** +- * +- * Description: +- * +- * This is the first include file of the driver, which includes all +- * neccessary system header files and some of the GEnesis header files. +- * It also defines some basic items. +- * +- * Include File Hierarchy: +- * +- * see skge.c +- * +- ******************************************************************************/ +- + #ifndef __INC_SKDRV1ST_H + #define __INC_SKDRV1ST_H + +@@ -58,6 +44,9 @@ + + #define SK_ADDR_EQUAL(a1,a2) (!memcmp(a1,a2,6)) + ++#define SK_STRNCMP(s1,s2,len) strncmp(s1,s2,len) ++#define SK_STRCPY(dest,src) strcpy(dest,src) ++ + #include + #include + #include +@@ -78,11 +67,7 @@ + #include + + #define SK_CS_CALCULATE_CHECKSUM +-#ifndef CONFIG_X86_64 +-#define SkCsCalculateChecksum(p,l) ((~ip_compute_csum(p, l)) & 0xffff) +-#else +-#define SkCsCalculateChecksum(p,l) ((~ip_fast_csum(p, l)) & 0xffff) +-#endif ++#define SkCsCalculateChecksum(p,l) (~csum_fold(csum_partial(p, l, 0))) + + #include "h/sktypes.h" + #include "h/skerror.h" +@@ -90,6 +75,10 @@ + #include "h/lm80.h" + #include "h/xmac_ii.h" + ++#ifndef SK_BMU_RX_WM_PEX ++#define SK_BMU_RX_WM_PEX 0x80 ++#endif ++ + #ifdef __LITTLE_ENDIAN + #define SK_LITTLE_ENDIAN + #else +@@ -188,3 +177,8 @@ + + #endif + ++/******************************************************************************* ++ * ++ * End of file ++ * ++ ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skdrv2nd.h linux-2.6.9.new/drivers/net/sk98lin/h/skdrv2nd.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skdrv2nd.h 2004-10-19 05:55:29.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skdrv2nd.h 2006-12-07 14:35:03.000000000 +0800 +@@ -1,17 +1,17 @@ + /****************************************************************************** + * +- * Name: skdrv2nd.h +- * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.10 $ +- * Date: $Date: 2003/12/11 16:04:45 $ +- * Purpose: Second header file for driver and all other modules ++ * Name: skdrv2nd.h ++ * Project: GEnesis, PCI Gigabit Ethernet Adapter ++ * Version: $Revision: 1.29.2.20 $ ++ * Date: $Date: 2005/06/17 14:09:50 $ ++ * Purpose: Second header file for driver and all other modules + * + ******************************************************************************/ + + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -42,10 +42,11 @@ + #include "h/skqueue.h" + #include "h/skgehwt.h" + #include "h/sktimer.h" +-#include "h/ski2c.h" ++#include "h/sktwsi.h" + #include "h/skgepnmi.h" + #include "h/skvpd.h" + #include "h/skgehw.h" ++#include "h/sky2le.h" + #include "h/skgeinit.h" + #include "h/skaddr.h" + #include "h/skgesirq.h" +@@ -53,104 +54,178 @@ + #include "h/skrlmt.h" + #include "h/skgedrv.h" + ++/* Defines for the poll cotroller */ ++#ifdef HAVE_POLL_CONTROLLER ++#define SK_POLL_CONTROLLER ++#define CONFIG_SK98LIN_NAPI ++#elif CONFIG_NET_POLL_CONTROLLER ++#define SK_POLL_CONTROLLER ++#define CONFIG_SK98LIN_NAPI ++#endif ++ + +-extern SK_MBUF *SkDrvAllocRlmtMbuf(SK_AC*, SK_IOC, unsigned); +-extern void SkDrvFreeRlmtMbuf(SK_AC*, SK_IOC, SK_MBUF*); +-extern SK_U64 SkOsGetTime(SK_AC*); +-extern int SkPciReadCfgDWord(SK_AC*, int, SK_U32*); +-extern int SkPciReadCfgWord(SK_AC*, int, SK_U16*); +-extern int SkPciReadCfgByte(SK_AC*, int, SK_U8*); +-extern int SkPciWriteCfgDWord(SK_AC*, int, SK_U32); +-extern int SkPciWriteCfgWord(SK_AC*, int, SK_U16); +-extern int SkPciWriteCfgByte(SK_AC*, int, SK_U8); +-extern int SkDrvEvent(SK_AC*, SK_IOC IoC, SK_U32, SK_EVPARA); +- +-#ifdef SK_DIAG_SUPPORT +-extern int SkDrvEnterDiagMode(SK_AC *pAc); +-extern int SkDrvLeaveDiagMode(SK_AC *pAc); ++/****************************************************************************** ++ * ++ * Generic driver defines ++ * ++ ******************************************************************************/ ++ ++#define USE_TIST_FOR_RESET /* Use timestamp for reset */ ++#define Y2_RECOVERY /* use specific recovery yukon2 functions */ ++#define Y2_LE_CHECK /* activate check for LE order */ ++#define Y2_SYNC_CHECK /* activate check for receiver in sync */ ++#define SK_YUKON2 /* Enable Yukon2 dual net support */ ++#define USE_SK_TX_CHECKSUM /* use the tx hw checksum driver functionality */ ++#define USE_SK_RX_CHECKSUM /* use the rx hw checksum driver functionality */ ++#define USE_SK_TSO_FEATURE /* use TCP segmentation offload if possible */ ++#define SK_COPY_THRESHOLD 50 /* threshold for copying small RX frames; ++ * 0 avoids copying, 9001 copies all */ ++#define SK_MAX_CARD_PARAM 16 /* number of adapters that can be configured via ++ * command line params */ ++//#define USE_TX_COMPLETE /* use of a transmit complete interrupt */ ++#ifndef CONFIG_SK98LIN_NAPI ++#define Y2_RX_CHECK /* RX Check timestamp */ ++#endif ++ ++/* ++ * use those defines for a compile-in version of the driver instead ++ * of command line parameters ++ */ ++// #define LINK_SPEED_A {"Auto",} ++// #define LINK_SPEED_B {"Auto",} ++// #define AUTO_NEG_A {"Sense",} ++// #define AUTO_NEG_B {"Sense"} ++// #define DUP_CAP_A {"Both",} ++// #define DUP_CAP_B {"Both",} ++// #define FLOW_CTRL_A {"SymOrRem",} ++// #define FLOW_CTRL_B {"SymOrRem",} ++// #define ROLE_A {"Auto",} ++// #define ROLE_B {"Auto",} ++// #define PREF_PORT {"A",} ++// #define CON_TYPE {"Auto",} ++// #define RLMT_MODE {"CheckLinkState",} ++ ++#ifdef Y2_RECOVERY ++#define CHECK_TRANSMIT_TIMEOUT ++#define Y2_RESYNC_WATERMARK 1000000L + #endif + ++ ++/****************************************************************************** ++ * ++ * Generic ISR defines ++ * ++ ******************************************************************************/ ++ ++#define SkIsrRetVar irqreturn_t ++#define SkIsrRetNone IRQ_NONE ++#define SkIsrRetHandled IRQ_HANDLED ++ ++#define DEV_KFREE_SKB(skb) dev_kfree_skb(skb) ++#define DEV_KFREE_SKB_IRQ(skb) dev_kfree_skb_irq(skb) ++#define DEV_KFREE_SKB_ANY(skb) dev_kfree_skb_any(skb) ++ ++/****************************************************************************** ++ * ++ * Global function prototypes ++ * ++ ******************************************************************************/ ++ ++extern SK_MBUF *SkDrvAllocRlmtMbuf(SK_AC*, SK_IOC, unsigned); ++extern void SkDrvFreeRlmtMbuf(SK_AC*, SK_IOC, SK_MBUF*); ++extern SK_U64 SkOsGetTime(SK_AC*); ++extern int SkPciReadCfgDWord(SK_AC*, int, SK_U32*); ++extern int SkPciReadCfgWord(SK_AC*, int, SK_U16*); ++extern int SkPciReadCfgByte(SK_AC*, int, SK_U8*); ++extern int SkPciWriteCfgDWord(SK_AC*, int, SK_U32); ++extern int SkPciWriteCfgWord(SK_AC*, int, SK_U16); ++extern int SkPciWriteCfgByte(SK_AC*, int, SK_U8); ++extern int SkDrvEvent(SK_AC*, SK_IOC IoC, SK_U32, SK_EVPARA); ++extern int SkDrvEnterDiagMode(SK_AC *pAc); ++extern int SkDrvLeaveDiagMode(SK_AC *pAc); ++ ++/****************************************************************************** ++ * ++ * Linux specific RLMT buffer structure (SK_MBUF typedef in skdrv1st)! ++ * ++ ******************************************************************************/ ++ + struct s_DrvRlmtMbuf { +- SK_MBUF *pNext; /* Pointer to next RLMT Mbuf. */ +- SK_U8 *pData; /* Data buffer (virtually contig.). */ +- unsigned Size; /* Data buffer size. */ +- unsigned Length; /* Length of packet (<= Size). */ +- SK_U32 PortIdx; /* Receiving/transmitting port. */ ++ SK_MBUF *pNext; /* Pointer to next RLMT Mbuf. */ ++ SK_U8 *pData; /* Data buffer (virtually contig.). */ ++ unsigned Size; /* Data buffer size. */ ++ unsigned Length; /* Length of packet (<= Size). */ ++ SK_U32 PortIdx; /* Receiving/transmitting port. */ + #ifdef SK_RLMT_MBUF_PRIVATE +- SK_RLMT_MBUF Rlmt; /* Private part for RLMT. */ +-#endif /* SK_RLMT_MBUF_PRIVATE */ +- struct sk_buff *pOs; /* Pointer to message block */ ++ SK_RLMT_MBUF Rlmt; /* Private part for RLMT. */ ++#endif ++ struct sk_buff *pOs; /* Pointer to message block */ + }; + ++/****************************************************************************** ++ * ++ * Linux specific TIME defines ++ * ++ ******************************************************************************/ + +-/* +- * Time macros +- */ + #if SK_TICKS_PER_SEC == 100 + #define SK_PNMI_HUNDREDS_SEC(t) (t) + #else +-#define SK_PNMI_HUNDREDS_SEC(t) ((((unsigned long)t) * 100) / \ +- (SK_TICKS_PER_SEC)) ++#define SK_PNMI_HUNDREDS_SEC(t) ((((unsigned long)t)*100)/(SK_TICKS_PER_SEC)) + #endif + +-/* +- * New SkOsGetTime +- */ + #define SkOsGetTimeCurrent(pAC, pUsec) {\ + struct timeval t;\ + do_gettimeofday(&t);\ + *pUsec = ((((t.tv_sec) * 1000000L)+t.tv_usec)/10000);\ + } + ++/****************************************************************************** ++ * ++ * Linux specific IOCTL defines and typedefs ++ * ++ ******************************************************************************/ + +-/* +- * ioctl definitions +- */ +-#define SK_IOCTL_BASE (SIOCDEVPRIVATE) +-#define SK_IOCTL_GETMIB (SK_IOCTL_BASE + 0) +-#define SK_IOCTL_SETMIB (SK_IOCTL_BASE + 1) +-#define SK_IOCTL_PRESETMIB (SK_IOCTL_BASE + 2) +-#define SK_IOCTL_GEN (SK_IOCTL_BASE + 3) +-#define SK_IOCTL_DIAG (SK_IOCTL_BASE + 4) +- +-typedef struct s_IOCTL SK_GE_IOCTL; ++#define SK_IOCTL_BASE (SIOCDEVPRIVATE) ++#define SK_IOCTL_GETMIB (SK_IOCTL_BASE + 0) ++#define SK_IOCTL_SETMIB (SK_IOCTL_BASE + 1) ++#define SK_IOCTL_PRESETMIB (SK_IOCTL_BASE + 2) ++#define SK_IOCTL_GEN (SK_IOCTL_BASE + 3) ++#define SK_IOCTL_DIAG (SK_IOCTL_BASE + 4) + ++typedef struct s_IOCTL SK_GE_IOCTL; + struct s_IOCTL { + char __user * pData; + unsigned int Len; + }; + ++/****************************************************************************** ++ * ++ * Generic sizes and length definitions ++ * ++ ******************************************************************************/ + +-/* +- * define sizes of descriptor rings in bytes +- */ +- +-#define TX_RING_SIZE (8*1024) +-#define RX_RING_SIZE (24*1024) +- +-/* +- * Buffer size for ethernet packets +- */ +-#define ETH_BUF_SIZE 1540 +-#define ETH_MAX_MTU 1514 +-#define ETH_MIN_MTU 60 +-#define ETH_MULTICAST_BIT 0x01 +-#define SK_JUMBO_MTU 9000 +- +-/* +- * transmit priority selects the queue: LOW=asynchron, HIGH=synchron +- */ +-#define TX_PRIO_LOW 0 +-#define TX_PRIO_HIGH 1 ++#define TX_RING_SIZE (24*1024) /* GEnesis/Yukon */ ++#define RX_RING_SIZE (24*1024) /* GEnesis/Yukon */ ++#define RX_MAX_NBR_BUFFERS 128 /* Yukon-EC/-II */ ++#define TX_MAX_NBR_BUFFERS 128 /* Yukon-EC/-II */ ++ ++#define ETH_BUF_SIZE 1560 /* multiples of 8 bytes */ ++#define ETH_MAX_MTU 1514 ++#define ETH_MIN_MTU 60 ++#define ETH_MULTICAST_BIT 0x01 ++#define SK_JUMBO_MTU 9000 ++ ++#define TX_PRIO_LOW 0 /* asynchronous queue */ ++#define TX_PRIO_HIGH 1 /* synchronous queue */ ++#define DESCR_ALIGN 64 /* alignment of Rx/Tx descriptors */ + +-/* +- * alignment of rx/tx descriptors +- */ +-#define DESCR_ALIGN 64 ++/****************************************************************************** ++ * ++ * PNMI related definitions ++ * ++ ******************************************************************************/ + +-/* +- * definitions for pnmi. TODO +- */ + #define SK_DRIVER_RESET(pAC, IoC) 0 + #define SK_DRIVER_SENDEVENT(pAC, IoC) 0 + #define SK_DRIVER_SELFTEST(pAC, IoC) 0 +@@ -159,20 +234,16 @@ + #define SK_DRIVER_SET_MTU(pAc,IoC,i,v) 0 + #define SK_DRIVER_PRESET_MTU(pAc,IoC,i,v) 0 + +-/* +-** Interim definition of SK_DRV_TIMER placed in this file until +-** common modules have boon finallized +-*/ +-#define SK_DRV_TIMER 11 +-#define SK_DRV_MODERATION_TIMER 1 +-#define SK_DRV_MODERATION_TIMER_LENGTH 1000000 /* 1 second */ +-#define SK_DRV_RX_CLEANUP_TIMER 2 +-#define SK_DRV_RX_CLEANUP_TIMER_LENGTH 1000000 /* 100 millisecs */ + +-/* +-** Definitions regarding transmitting frames +-** any calculating any checksum. +-*/ ++/****************************************************************************** ++ * ++ * Various offsets and sizes ++ * ++ ******************************************************************************/ ++ ++#define SK_DRV_MODERATION_TIMER 1 /* id */ ++#define SK_DRV_MODERATION_TIMER_LENGTH 1 /* 1 second */ ++ + #define C_LEN_ETHERMAC_HEADER_DEST_ADDR 6 + #define C_LEN_ETHERMAC_HEADER_SRC_ADDR 6 + #define C_LEN_ETHERMAC_HEADER_LENTYPE 2 +@@ -198,114 +269,430 @@ + #define C_PROTO_ID_UDP 17 /* refer to RFC 790 or Stevens' */ + #define C_PROTO_ID_TCP 6 /* TCP/IP illustrated for details */ + +-/* TX and RX descriptors *****************************************************/ ++/****************************************************************************** ++ * ++ * Tx and Rx descriptor definitions ++ * ++ ******************************************************************************/ + + typedef struct s_RxD RXD; /* the receive descriptor */ +- + struct s_RxD { +- volatile SK_U32 RBControl; /* Receive Buffer Control */ +- SK_U32 VNextRxd; /* Next receive descriptor,low dword */ +- SK_U32 VDataLow; /* Receive buffer Addr, low dword */ +- SK_U32 VDataHigh; /* Receive buffer Addr, high dword */ +- SK_U32 FrameStat; /* Receive Frame Status word */ +- SK_U32 TimeStamp; /* Time stamp from XMAC */ +- SK_U32 TcpSums; /* TCP Sum 2 / TCP Sum 1 */ +- SK_U32 TcpSumStarts; /* TCP Sum Start 2 / TCP Sum Start 1 */ +- RXD *pNextRxd; /* Pointer to next Rxd */ +- struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */ ++ volatile SK_U32 RBControl; /* Receive Buffer Control */ ++ SK_U32 VNextRxd; /* Next receive descriptor,low dword */ ++ SK_U32 VDataLow; /* Receive buffer Addr, low dword */ ++ SK_U32 VDataHigh; /* Receive buffer Addr, high dword */ ++ SK_U32 FrameStat; /* Receive Frame Status word */ ++ SK_U32 TimeStamp; /* Time stamp from XMAC */ ++ SK_U32 TcpSums; /* TCP Sum 2 / TCP Sum 1 */ ++ SK_U32 TcpSumStarts; /* TCP Sum Start 2 / TCP Sum Start 1 */ ++ RXD *pNextRxd; /* Pointer to next Rxd */ ++ struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */ + }; + + typedef struct s_TxD TXD; /* the transmit descriptor */ +- + struct s_TxD { +- volatile SK_U32 TBControl; /* Transmit Buffer Control */ +- SK_U32 VNextTxd; /* Next transmit descriptor,low dword */ +- SK_U32 VDataLow; /* Transmit Buffer Addr, low dword */ +- SK_U32 VDataHigh; /* Transmit Buffer Addr, high dword */ +- SK_U32 FrameStat; /* Transmit Frame Status Word */ +- SK_U32 TcpSumOfs; /* Reserved / TCP Sum Offset */ +- SK_U16 TcpSumSt; /* TCP Sum Start */ +- SK_U16 TcpSumWr; /* TCP Sum Write */ +- SK_U32 TcpReserved; /* not used */ +- TXD *pNextTxd; /* Pointer to next Txd */ +- struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */ ++ volatile SK_U32 TBControl; /* Transmit Buffer Control */ ++ SK_U32 VNextTxd; /* Next transmit descriptor,low dword */ ++ SK_U32 VDataLow; /* Transmit Buffer Addr, low dword */ ++ SK_U32 VDataHigh; /* Transmit Buffer Addr, high dword */ ++ SK_U32 FrameStat; /* Transmit Frame Status Word */ ++ SK_U32 TcpSumOfs; /* Reserved / TCP Sum Offset */ ++ SK_U16 TcpSumSt; /* TCP Sum Start */ ++ SK_U16 TcpSumWr; /* TCP Sum Write */ ++ SK_U32 TcpReserved; /* not used */ ++ TXD *pNextTxd; /* Pointer to next Txd */ ++ struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */ + }; + +-/* Used interrupt bits in the interrupts source register *********************/ ++/****************************************************************************** ++ * ++ * Generic Yukon-II defines ++ * ++ ******************************************************************************/ + +-#define DRIVER_IRQS ((IS_IRQ_SW) | \ +- (IS_R1_F) |(IS_R2_F) | \ +- (IS_XS1_F) |(IS_XA1_F) | \ +- (IS_XS2_F) |(IS_XA2_F)) +- +-#define SPECIAL_IRQS ((IS_HW_ERR) |(IS_I2C_READY) | \ +- (IS_EXT_REG) |(IS_TIMINT) | \ +- (IS_PA_TO_RX1) |(IS_PA_TO_RX2) | \ +- (IS_PA_TO_TX1) |(IS_PA_TO_TX2) | \ +- (IS_MAC1) |(IS_LNK_SYNC_M1)| \ +- (IS_MAC2) |(IS_LNK_SYNC_M2)| \ +- (IS_R1_C) |(IS_R2_C) | \ +- (IS_XS1_C) |(IS_XA1_C) | \ +- (IS_XS2_C) |(IS_XA2_C)) +- +-#define IRQ_MASK ((IS_IRQ_SW) | \ +- (IS_R1_B) |(IS_R1_F) |(IS_R2_B) |(IS_R2_F) | \ +- (IS_XS1_B) |(IS_XS1_F) |(IS_XA1_B)|(IS_XA1_F)| \ +- (IS_XS2_B) |(IS_XS2_F) |(IS_XA2_B)|(IS_XA2_F)| \ +- (IS_HW_ERR) |(IS_I2C_READY)| \ +- (IS_EXT_REG) |(IS_TIMINT) | \ +- (IS_PA_TO_RX1) |(IS_PA_TO_RX2)| \ +- (IS_PA_TO_TX1) |(IS_PA_TO_TX2)| \ +- (IS_MAC1) |(IS_MAC2) | \ +- (IS_R1_C) |(IS_R2_C) | \ +- (IS_XS1_C) |(IS_XA1_C) | \ +- (IS_XS2_C) |(IS_XA2_C)) ++#define LE_SIZE sizeof(SK_HWLE) ++#define MAX_NUM_FRAGS (MAX_SKB_FRAGS + 1) ++#define MIN_LEN_OF_LE_TAB 128 ++#define MAX_LEN_OF_LE_TAB 4096 ++#define MAX_UNUSED_RX_LE_WORKING 8 ++#ifdef MAX_FRAG_OVERHEAD ++#undef MAX_FRAG_OVERHEAD ++#define MAX_FRAG_OVERHEAD 4 ++#endif ++// as we have a maximum of 16 physical fragments, ++// maximum 1 ADDR64 per physical fragment ++// maximum 4 LEs for VLAN, Csum, LargeSend, Packet ++#define MIN_LE_FREE_REQUIRED ((16*2) + 4) ++#define IS_GMAC(pAc) (!pAc->GIni.GIGenesis) ++#ifdef USE_SYNC_TX_QUEUE ++#define TXS_MAX_LE 256 ++#else /* !USE_SYNC_TX_QUEUE */ ++#define TXS_MAX_LE 0 ++#endif ++ ++#define ETHER_MAC_HDR_LEN (6+6+2) // MAC SRC ADDR, MAC DST ADDR, TYPE ++#define IP_HDR_LEN 20 ++#define TCP_CSUM_OFFS 0x10 ++#define UDP_CSUM_OFFS 0x06 ++#define TXA_MAX_LE 256 ++#define RX_MAX_LE 256 ++#define ST_MAX_LE (SK_MAX_MACS)*((3*RX_MAX_LE)+(TXA_MAX_LE)+(TXS_MAX_LE)) ++ ++#if (defined (Y2_RECOVERY) || defined (Y2_LE_CHECK)) ++/* event for recovery from tx hang or rx out of sync */ ++#define SK_DRV_RECOVER 17 ++#endif ++/****************************************************************************** ++ * ++ * Structures specific for Yukon-II ++ * ++ ******************************************************************************/ ++ ++typedef struct s_frag SK_FRAG; ++struct s_frag { ++ SK_FRAG *pNext; ++ char *pVirt; ++ SK_U64 pPhys; ++ unsigned int FragLen; ++}; ++ ++typedef struct s_packet SK_PACKET; ++struct s_packet { ++ /* Common infos: */ ++ SK_PACKET *pNext; /* pointer for packet queues */ ++ unsigned int PacketLen; /* length of packet */ ++ unsigned int NumFrags; /* nbr of fragments (for Rx always 1) */ ++ SK_FRAG *pFrag; /* fragment list */ ++ SK_FRAG FragArray[MAX_NUM_FRAGS]; /* TX fragment array */ ++ unsigned int NextLE; /* next LE to use for the next packet */ ++ ++ /* Private infos: */ ++ struct sk_buff *pMBuf; /* Pointer to Linux' socket buffer */ ++}; ++ ++typedef struct s_queue SK_PKT_QUEUE; ++struct s_queue { ++ SK_PACKET *pHead; ++ SK_PACKET *pTail; ++ spinlock_t QueueLock; /* serialize packet accesses */ ++}; ++ ++/******************************************************************************* ++ * ++ * Macros specific for Yukon-II queues ++ * ++ ******************************************************************************/ ++ ++#define IS_Q_EMPTY(pQueue) ((pQueue)->pHead != NULL) ? SK_FALSE : SK_TRUE ++#define IS_Q_LOCKED(pQueue) spin_is_locked(&((pQueue)->QueueLock)) ++ ++#define PLAIN_POP_FIRST_PKT_FROM_QUEUE(pQueue, pPacket) { \ ++ if ((pQueue)->pHead != NULL) { \ ++ (pPacket) = (pQueue)->pHead; \ ++ (pQueue)->pHead = (pPacket)->pNext; \ ++ if ((pQueue)->pHead == NULL) { \ ++ (pQueue)->pTail = NULL; \ ++ } \ ++ (pPacket)->pNext = NULL; \ ++ } else { \ ++ (pPacket) = NULL; \ ++ } \ ++} ++ ++#define PLAIN_PUSH_PKT_AS_FIRST_IN_QUEUE(pQueue, pPacket) { \ ++ if ((pQueue)->pHead != NULL) { \ ++ (pPacket)->pNext = (pQueue)->pHead; \ ++ } else { \ ++ (pPacket)->pNext = NULL; \ ++ (pQueue)->pTail = (pPacket); \ ++ } \ ++ (pQueue)->pHead = (pPacket); \ ++} ++ ++#define PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(pQueue, pPacket) { \ ++ (pPacket)->pNext = NULL; \ ++ if ((pQueue)->pTail != NULL) { \ ++ (pQueue)->pTail->pNext = (pPacket); \ ++ } else { \ ++ (pQueue)->pHead = (pPacket); \ ++ } \ ++ (pQueue)->pTail = (pPacket); \ ++} ++ ++#define PLAIN_PUSH_MULTIPLE_PKT_AS_LAST_IN_QUEUE(pQueue,pPktGrpStart,pPktGrpEnd) { \ ++ if ((pPktGrpStart) != NULL) { \ ++ if ((pQueue)->pTail != NULL) { \ ++ (pQueue)->pTail->pNext = (pPktGrpStart); \ ++ } else { \ ++ (pQueue)->pHead = (pPktGrpStart); \ ++ } \ ++ (pQueue)->pTail = (pPktGrpEnd); \ ++ } \ ++} ++ ++/* Required: 'Flags' */ ++#define POP_FIRST_PKT_FROM_QUEUE(pQueue, pPacket) { \ ++ spin_lock_irqsave(&((pQueue)->QueueLock), Flags); \ ++ if ((pQueue)->pHead != NULL) { \ ++ (pPacket) = (pQueue)->pHead; \ ++ (pQueue)->pHead = (pPacket)->pNext; \ ++ if ((pQueue)->pHead == NULL) { \ ++ (pQueue)->pTail = NULL; \ ++ } \ ++ (pPacket)->pNext = NULL; \ ++ } else { \ ++ (pPacket) = NULL; \ ++ } \ ++ spin_unlock_irqrestore(&((pQueue)->QueueLock), Flags); \ ++} ++ ++/* Required: 'Flags' */ ++#define PUSH_PKT_AS_FIRST_IN_QUEUE(pQueue, pPacket) { \ ++ spin_lock_irqsave(&(pQueue)->QueueLock, Flags); \ ++ if ((pQueue)->pHead != NULL) { \ ++ (pPacket)->pNext = (pQueue)->pHead; \ ++ } else { \ ++ (pPacket)->pNext = NULL; \ ++ (pQueue)->pTail = (pPacket); \ ++ } \ ++ (pQueue)->pHead = (pPacket); \ ++ spin_unlock_irqrestore(&(pQueue)->QueueLock, Flags); \ ++} + +-#define IRQ_HWE_MASK (IS_ERR_MSK) /* enable all HW irqs */ ++/* Required: 'Flags' */ ++#define PUSH_PKT_AS_LAST_IN_QUEUE(pQueue, pPacket) { \ ++ (pPacket)->pNext = NULL; \ ++ spin_lock_irqsave(&(pQueue)->QueueLock, Flags); \ ++ if ((pQueue)->pTail != NULL) { \ ++ (pQueue)->pTail->pNext = (pPacket); \ ++ } else { \ ++ (pQueue)->pHead = (pPacket); \ ++ } \ ++ (pQueue)->pTail = (pPacket); \ ++ spin_unlock_irqrestore(&(pQueue)->QueueLock, Flags); \ ++} ++ ++/* Required: 'Flags' */ ++#define PUSH_MULTIPLE_PKT_AS_LAST_IN_QUEUE(pQueue,pPktGrpStart,pPktGrpEnd) { \ ++ if ((pPktGrpStart) != NULL) { \ ++ spin_lock_irqsave(&(pQueue)->QueueLock, Flags); \ ++ if ((pQueue)->pTail != NULL) { \ ++ (pQueue)->pTail->pNext = (pPktGrpStart); \ ++ } else { \ ++ (pQueue)->pHead = (pPktGrpStart); \ ++ } \ ++ (pQueue)->pTail = (pPktGrpEnd); \ ++ spin_unlock_irqrestore(&(pQueue)->QueueLock, Flags); \ ++ } \ ++} ++ ++/******************************************************************************* ++ * ++ * Macros specific for Yukon-II queues (tist) ++ * ++ ******************************************************************************/ ++ ++#ifdef USE_TIST_FOR_RESET ++/* port is fully operational */ ++#define SK_PSTATE_NOT_WAITING_FOR_TIST 0 ++/* port in reset until any tist LE */ ++#define SK_PSTATE_WAITING_FOR_ANY_TIST BIT_0 ++/* port in reset until timer reaches pAC->MinTistLo */ ++#define SK_PSTATE_WAITING_FOR_SPECIFIC_TIST BIT_1 ++#define SK_PSTATE_PORT_SHIFT 4 ++#define SK_PSTATE_PORT_MASK ((1 << SK_PSTATE_PORT_SHIFT) - 1) ++ ++/* use this + Port to build OP_MOD_TXINDEX_NO_PORT_A|B */ ++#define OP_MOD_TXINDEX 0x71 ++/* opcode for a TX_INDEX LE in which Port A has to be ignored */ ++#define OP_MOD_TXINDEX_NO_PORT_A 0x71 ++/* opcode for a TX_INDEX LE in which Port B has to be ignored */ ++#define OP_MOD_TXINDEX_NO_PORT_B 0x72 ++/* opcode for LE to be ignored because port is still in reset */ ++#define OP_MOD_LE 0x7F ++ ++/* set tist wait mode Bit for port */ ++#define SK_SET_WAIT_BIT_FOR_PORT(pAC, Bit, Port) \ ++ { \ ++ (pAC)->AdapterResetState |= ((Bit) << (SK_PSTATE_PORT_SHIFT * Port)); \ ++ } ++ ++/* reset tist waiting for specified port */ ++#define SK_CLR_STATE_FOR_PORT(pAC, Port) \ ++ { \ ++ (pAC)->AdapterResetState &= \ ++ ~(SK_PSTATE_PORT_MASK << (SK_PSTATE_PORT_SHIFT * Port)); \ ++ } ++ ++/* return SK_TRUE when port is in reset waiting for tist */ ++#define SK_PORT_WAITING_FOR_TIST(pAC, Port) \ ++ ((((pAC)->AdapterResetState >> (SK_PSTATE_PORT_SHIFT * Port)) & \ ++ SK_PSTATE_PORT_MASK) != SK_PSTATE_NOT_WAITING_FOR_TIST) ++ ++/* return SK_TRUE when port is in reset waiting for any tist */ ++#define SK_PORT_WAITING_FOR_ANY_TIST(pAC, Port) \ ++ ((((pAC)->AdapterResetState >> (SK_PSTATE_PORT_SHIFT * Port)) & \ ++ SK_PSTATE_WAITING_FOR_ANY_TIST) == SK_PSTATE_WAITING_FOR_ANY_TIST) ++ ++/* return SK_TRUE when port is in reset waiting for a specific tist */ ++#define SK_PORT_WAITING_FOR_SPECIFIC_TIST(pAC, Port) \ ++ ((((pAC)->AdapterResetState >> (SK_PSTATE_PORT_SHIFT * Port)) & \ ++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST) == \ ++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST) ++ ++/* return whether adapter is expecting a tist LE */ ++#define SK_ADAPTER_WAITING_FOR_TIST(pAC) ((pAC)->AdapterResetState != 0) ++ ++/* enable timestamp timer and force creation of tist LEs */ ++#define Y2_ENABLE_TIST(IoC) \ ++ SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8) GMT_ST_START) ++ ++/* disable timestamp timer and stop creation of tist LEs */ ++#define Y2_DISABLE_TIST(IoC) \ ++ SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8) GMT_ST_STOP) ++ ++/* get current value of timestamp timer */ ++#define Y2_GET_TIST_LOW_VAL(IoC, pVal) \ ++ SK_IN32(IoC, GMAC_TI_ST_VAL, pVal) ++ ++#endif ++ ++ ++/******************************************************************************* ++ * ++ * Used interrupt bits in the interrupts source register ++ * ++ ******************************************************************************/ ++ ++#define DRIVER_IRQS ((IS_IRQ_SW) | \ ++ (IS_R1_F) | (IS_R2_F) | \ ++ (IS_XS1_F) | (IS_XA1_F) | \ ++ (IS_XS2_F) | (IS_XA2_F)) ++ ++#define TX_COMPL_IRQS ((IS_XS1_B) | (IS_XS1_F) | \ ++ (IS_XA1_B) | (IS_XA1_F) | \ ++ (IS_XS2_B) | (IS_XS2_F) | \ ++ (IS_XA2_B) | (IS_XA2_F)) ++ ++#define NAPI_DRV_IRQS ((IS_R1_F) | (IS_R2_F) | \ ++ (IS_XS1_F) | (IS_XA1_F)| \ ++ (IS_XS2_F) | (IS_XA2_F)) ++ ++#define Y2_DRIVER_IRQS ((Y2_IS_STAT_BMU) | (Y2_IS_IRQ_SW) | (Y2_IS_POLL_CHK)) ++ ++#define SPECIAL_IRQS ((IS_HW_ERR) |(IS_I2C_READY) | \ ++ (IS_EXT_REG) |(IS_TIMINT) | \ ++ (IS_PA_TO_RX1) |(IS_PA_TO_RX2) | \ ++ (IS_PA_TO_TX1) |(IS_PA_TO_TX2) | \ ++ (IS_MAC1) |(IS_LNK_SYNC_M1)| \ ++ (IS_MAC2) |(IS_LNK_SYNC_M2)| \ ++ (IS_R1_C) |(IS_R2_C) | \ ++ (IS_XS1_C) |(IS_XA1_C) | \ ++ (IS_XS2_C) |(IS_XA2_C)) ++ ++#define Y2_SPECIAL_IRQS ((Y2_IS_HW_ERR) |(Y2_IS_ASF) | \ ++ (Y2_IS_TWSI_RDY) |(Y2_IS_TIMINT) | \ ++ (Y2_IS_IRQ_PHY2) |(Y2_IS_IRQ_MAC2) | \ ++ (Y2_IS_CHK_RX2) |(Y2_IS_CHK_TXS2) | \ ++ (Y2_IS_CHK_TXA2) |(Y2_IS_IRQ_PHY1) | \ ++ (Y2_IS_IRQ_MAC1) |(Y2_IS_CHK_RX1) | \ ++ (Y2_IS_CHK_TXS1) |(Y2_IS_CHK_TXA1)) ++ ++#define IRQ_MASK ((IS_IRQ_SW) | \ ++ (IS_R1_F) |(IS_R2_F) | \ ++ (IS_XS1_F) |(IS_XA1_F) | \ ++ (IS_XS2_F) |(IS_XA2_F) | \ ++ (IS_HW_ERR) |(IS_I2C_READY)| \ ++ (IS_EXT_REG) |(IS_TIMINT) | \ ++ (IS_PA_TO_RX1) |(IS_PA_TO_RX2)| \ ++ (IS_PA_TO_TX1) |(IS_PA_TO_TX2)| \ ++ (IS_MAC1) |(IS_MAC2) | \ ++ (IS_R1_C) |(IS_R2_C) | \ ++ (IS_XS1_C) |(IS_XA1_C) | \ ++ (IS_XS2_C) |(IS_XA2_C)) ++ ++#define Y2_IRQ_MASK ((Y2_DRIVER_IRQS) | (Y2_SPECIAL_IRQS)) ++ ++#define IRQ_HWE_MASK (IS_ERR_MSK) /* enable all HW irqs */ ++#define Y2_IRQ_HWE_MASK (Y2_HWE_ALL_MSK) /* enable all HW irqs */ + + typedef struct s_DevNet DEV_NET; + + struct s_DevNet { +- struct proc_dir_entry *proc; +- int PortNr; +- int NetNr; +- int Mtu; +- int Up; +- SK_AC *pAC; ++ struct proc_dir_entry *proc; ++ int PortNr; ++ int NetNr; ++ char InitialDevName[20]; ++ SK_BOOL NetConsoleMode; ++#ifdef Y2_RECOVERY ++ struct timer_list KernelTimer; /* Kernel timer struct */ ++ int TransmitTimeoutTimer; /* Transmit timer */ ++ SK_BOOL TimerExpired; /* Transmit timer */ ++ SK_BOOL InRecover; /* Recover flag */ ++#ifdef Y2_RX_CHECK ++ SK_U8 FifoReadPointer; /* Backup of the FRP */ ++ SK_U8 FifoReadLevel; /* Backup of the FRL */ ++ SK_U32 BmuStateMachine; /* Backup of the MBU SM */ ++ SK_U32 LastJiffies; /* Backup of the jiffies */ ++#endif ++#endif ++ SK_AC *pAC; + }; + +-typedef struct s_TxPort TX_PORT; ++/******************************************************************************* ++ * ++ * Rx/Tx Port structures ++ * ++ ******************************************************************************/ + +-struct s_TxPort { +- /* the transmit descriptor rings */ +- caddr_t pTxDescrRing; /* descriptor area memory */ +- SK_U64 VTxDescrRing; /* descr. area bus virt. addr. */ +- TXD *pTxdRingHead; /* Head of Tx rings */ +- TXD *pTxdRingTail; /* Tail of Tx rings */ +- TXD *pTxdRingPrev; /* descriptor sent previously */ +- int TxdRingFree; /* # of free entrys */ +- spinlock_t TxDesRingLock; /* serialize descriptor accesses */ +- caddr_t HwAddr; /* bmu registers address */ +- int PortIndex; /* index number of port (0 or 1) */ ++typedef struct s_TxPort TX_PORT; ++struct s_TxPort { /* the transmit descriptor rings */ ++ caddr_t pTxDescrRing; /* descriptor area memory */ ++ SK_U64 VTxDescrRing; /* descr. area bus virt. addr. */ ++ TXD *pTxdRingHead; /* Head of Tx rings */ ++ TXD *pTxdRingTail; /* Tail of Tx rings */ ++ TXD *pTxdRingPrev; /* descriptor sent previously */ ++ int TxdRingPrevFree;/* previously # of free entrys */ ++ int TxdRingFree; /* # of free entrys */ ++ spinlock_t TxDesRingLock; /* serialize descriptor accesses */ ++ caddr_t HwAddr; /* bmu registers address */ ++ int PortIndex; /* index number of port (0 or 1) */ ++ SK_PACKET *TransmitPacketTable; ++ SK_LE_TABLE TxALET; /* tx (async) list element table */ ++ SK_LE_TABLE TxSLET; /* tx (sync) list element table */ ++ SK_PKT_QUEUE TxQ_free; ++ SK_PKT_QUEUE TxAQ_waiting; ++ SK_PKT_QUEUE TxSQ_waiting; ++ SK_PKT_QUEUE TxAQ_working; ++ SK_PKT_QUEUE TxSQ_working; ++ unsigned LastDone; + }; + +-typedef struct s_RxPort RX_PORT; +- +-struct s_RxPort { +- /* the receive descriptor rings */ +- caddr_t pRxDescrRing; /* descriptor area memory */ +- SK_U64 VRxDescrRing; /* descr. area bus virt. addr. */ +- RXD *pRxdRingHead; /* Head of Rx rings */ +- RXD *pRxdRingTail; /* Tail of Rx rings */ +- RXD *pRxdRingPrev; /* descriptor given to BMU previously */ +- int RxdRingFree; /* # of free entrys */ +- spinlock_t RxDesRingLock; /* serialize descriptor accesses */ +- int RxFillLimit; /* limit for buffers in ring */ +- caddr_t HwAddr; /* bmu registers address */ +- int PortIndex; /* index number of port (0 or 1) */ ++typedef struct s_RxPort RX_PORT; ++struct s_RxPort { /* the receive descriptor rings */ ++ caddr_t pRxDescrRing; /* descriptor area memory */ ++ SK_U64 VRxDescrRing; /* descr. area bus virt. addr. */ ++ RXD *pRxdRingHead; /* Head of Rx rings */ ++ RXD *pRxdRingTail; /* Tail of Rx rings */ ++ RXD *pRxdRingPrev; /* descr given to BMU previously */ ++ int RxdRingFree; /* # of free entrys */ ++ spinlock_t RxDesRingLock; /* serialize descriptor accesses */ ++ int RxFillLimit; /* limit for buffers in ring */ ++ caddr_t HwAddr; /* bmu registers address */ ++ int PortIndex; /* index number of port (0 or 1) */ ++ SK_BOOL UseRxCsum; /* use Rx checksumming (yes/no) */ ++ SK_PACKET *ReceivePacketTable; ++ SK_LE_TABLE RxLET; /* rx list element table */ ++ SK_PKT_QUEUE RxQ_working; ++ SK_PKT_QUEUE RxQ_waiting; ++ int RxBufSize; + }; + +-/* Definitions needed for interrupt moderation *******************************/ ++/******************************************************************************* ++ * ++ * Interrupt masks used in combination with interrupt moderation ++ * ++ ******************************************************************************/ + + #define IRQ_EOF_AS_TX ((IS_XA1_F) | (IS_XA2_F)) + #define IRQ_EOF_SY_TX ((IS_XS1_F) | (IS_XS2_F)) +@@ -317,139 +704,150 @@ + #define IRQ_MASK_SP_TX ((SPECIAL_IRQS) | (IRQ_MASK_TX_ONLY)) + #define IRQ_MASK_RX_TX_SP ((SPECIAL_IRQS) | (IRQ_MASK_TX_RX)) + +-#define C_INT_MOD_NONE 1 +-#define C_INT_MOD_STATIC 2 +-#define C_INT_MOD_DYNAMIC 4 +- +-#define C_CLK_FREQ_GENESIS 53215000 /* shorter: 53.125 MHz */ +-#define C_CLK_FREQ_YUKON 78215000 /* shorter: 78.125 MHz */ +- +-#define C_INTS_PER_SEC_DEFAULT 2000 +-#define C_INT_MOD_ENABLE_PERCENTAGE 50 /* if higher 50% enable */ +-#define C_INT_MOD_DISABLE_PERCENTAGE 50 /* if lower 50% disable */ +-#define C_INT_MOD_IPS_LOWER_RANGE 30 +-#define C_INT_MOD_IPS_UPPER_RANGE 40000 +- +- +-typedef struct s_DynIrqModInfo DIM_INFO; +-struct s_DynIrqModInfo { +- unsigned long PrevTimeVal; +- unsigned int PrevSysLoad; +- unsigned int PrevUsedTime; +- unsigned int PrevTotalTime; +- int PrevUsedDescrRatio; +- int NbrProcessedDescr; +- SK_U64 PrevPort0RxIntrCts; +- SK_U64 PrevPort1RxIntrCts; +- SK_U64 PrevPort0TxIntrCts; +- SK_U64 PrevPort1TxIntrCts; +- SK_BOOL ModJustEnabled; /* Moderation just enabled yes/no */ +- +- int MaxModIntsPerSec; /* Moderation Threshold */ +- int MaxModIntsPerSecUpperLimit; /* Upper limit for DIM */ +- int MaxModIntsPerSecLowerLimit; /* Lower limit for DIM */ +- +- long MaskIrqModeration; /* ModIrqType (eg. 'TxRx') */ +- SK_BOOL DisplayStats; /* Stats yes/no */ +- SK_BOOL AutoSizing; /* Resize DIM-timer on/off */ +- int IntModTypeSelect; /* EnableIntMod (eg. 'dynamic') */ ++#define IRQ_MASK_Y2_TX_ONLY (Y2_IS_STAT_BMU) ++#define IRQ_MASK_Y2_RX_ONLY (Y2_IS_STAT_BMU) ++#define IRQ_MASK_Y2_SP_ONLY (SPECIAL_IRQS) ++#define IRQ_MASK_Y2_TX_RX ((IRQ_MASK_TX_ONLY)| (IRQ_MASK_RX_ONLY)) ++#define IRQ_MASK_Y2_SP_RX ((SPECIAL_IRQS) | (IRQ_MASK_RX_ONLY)) ++#define IRQ_MASK_Y2_SP_TX ((SPECIAL_IRQS) | (IRQ_MASK_TX_ONLY)) ++#define IRQ_MASK_Y2_RX_TX_SP ((SPECIAL_IRQS) | (IRQ_MASK_TX_RX)) + +- SK_TIMER ModTimer; /* just some timer */ +-}; ++/******************************************************************************* ++ * ++ * Defines and typedefs regarding interrupt moderation ++ * ++ ******************************************************************************/ + +-typedef struct s_PerStrm PER_STRM; ++#define C_INT_MOD_NONE 1 ++#define C_INT_MOD_STATIC 2 ++#define C_INT_MOD_DYNAMIC 4 ++ ++#define C_CLK_FREQ_GENESIS 53215000 /* or: 53.125 MHz */ ++#define C_CLK_FREQ_YUKON 78215000 /* or: 78.125 MHz */ ++#define C_CLK_FREQ_YUKON_EC 125000000 /* or: 125.000 MHz */ ++ ++#define C_Y2_INTS_PER_SEC_DEFAULT 5000 ++#define C_INTS_PER_SEC_DEFAULT 2000 ++#define C_INT_MOD_IPS_LOWER_RANGE 30 /* in IRQs/second */ ++#define C_INT_MOD_IPS_UPPER_RANGE 40000 /* in IRQs/second */ ++ ++typedef struct s_DynIrqModInfo { ++ SK_U64 PrevPort0RxIntrCts; ++ SK_U64 PrevPort1RxIntrCts; ++ SK_U64 PrevPort0TxIntrCts; ++ SK_U64 PrevPort1TxIntrCts; ++ SK_U64 PrevPort0StatusLeIntrCts; ++ SK_U64 PrevPort1StatusLeIntrCts; ++ int MaxModIntsPerSec; /* Moderation Threshold */ ++ int MaxModIntsPerSecUpperLimit; /* Upper limit for DIM */ ++ int MaxModIntsPerSecLowerLimit; /* Lower limit for DIM */ ++ long MaskIrqModeration; /* IRQ Mask (eg. 'TxRx') */ ++ int IntModTypeSelect; /* Type (eg. 'dynamic') */ ++ int DynIrqModSampleInterval; /* expressed in seconds! */ ++ SK_TIMER ModTimer; /* Timer for dynamic mod. */ ++} DIM_INFO; + +-#define SK_ALLOC_IRQ 0x00000001 ++/******************************************************************************* ++ * ++ * Defines and typedefs regarding wake-on-lan ++ * ++ ******************************************************************************/ ++ ++typedef struct s_WakeOnLanInfo { ++ SK_U32 SupportedWolOptions; /* e.g. WAKE_PHY... */ ++ SK_U32 ConfiguredWolOptions; /* e.g. WAKE_PHY... */ ++} WOL_INFO; + +-#ifdef SK_DIAG_SUPPORT ++#define SK_ALLOC_IRQ 0x00000001 + #define DIAG_ACTIVE 1 + #define DIAG_NOTACTIVE 0 +-#endif + + /**************************************************************************** ++ * + * Per board structure / Adapter Context structure: +- * Allocated within attach(9e) and freed within detach(9e). +- * Contains all 'per device' necessary handles, flags, locks etc.: +- */ ++ * Contains all 'per device' necessary handles, flags, locks etc.: ++ * ++ ******************************************************************************/ ++ + struct s_AC { +- SK_GEINIT GIni; /* GE init struct */ +- SK_PNMI Pnmi; /* PNMI data struct */ +- SK_VPD vpd; /* vpd data struct */ +- SK_QUEUE Event; /* Event queue */ +- SK_HWT Hwt; /* Hardware Timer control struct */ +- SK_TIMCTRL Tim; /* Software Timer control struct */ +- SK_I2C I2c; /* I2C relevant data structure */ +- SK_ADDR Addr; /* for Address module */ +- SK_CSUM Csum; /* for checksum module */ +- SK_RLMT Rlmt; /* for rlmt module */ +- spinlock_t SlowPathLock; /* Normal IRQ lock */ +- SK_PNMI_STRUCT_DATA PnmiStruct; /* structure to get all Pnmi-Data */ +- int RlmtMode; /* link check mode to set */ +- int RlmtNets; /* Number of nets */ +- +- SK_IOC IoBase; /* register set of adapter */ +- int BoardLevel; /* level of active hw init (0-2) */ +- char DeviceStr[80]; /* adapter string from vpd */ +- SK_U32 AllocFlag; /* flag allocation of resources */ +- struct pci_dev *PciDev; /* for access to pci config space */ +- SK_U32 PciDevId; /* pci device id */ +- struct SK_NET_DEVICE *dev[2]; /* pointer to device struct */ +- char Name[30]; /* driver name */ +- struct SK_NET_DEVICE *Next; /* link all devices (for clearing) */ +- int RxBufSize; /* length of receive buffers */ +- struct net_device_stats stats; /* linux 'netstat -i' statistics */ +- int Index; /* internal board index number */ +- +- /* adapter RAM sizes for queues of active port */ +- int RxQueueSize; /* memory used for receive queue */ +- int TxSQueueSize; /* memory used for sync. tx queue */ +- int TxAQueueSize; /* memory used for async. tx queue */ +- +- int PromiscCount; /* promiscuous mode counter */ +- int AllMultiCount; /* allmulticast mode counter */ +- int MulticCount; /* number of different MC */ +- /* addresses for this board */ +- /* (may be more than HW can)*/ +- +- int HWRevision; /* Hardware revision */ +- int ActivePort; /* the active XMAC port */ +- int MaxPorts; /* number of activated ports */ +- int TxDescrPerRing; /* # of descriptors per tx ring */ +- int RxDescrPerRing; /* # of descriptors per rx ring */ +- +- caddr_t pDescrMem; /* Pointer to the descriptor area */ +- dma_addr_t pDescrMemDMA; /* PCI DMA address of area */ +- +- /* the port structures with descriptor rings */ +- TX_PORT TxPort[SK_MAX_MACS][2]; +- RX_PORT RxPort[SK_MAX_MACS]; +- +- unsigned int CsOfs1; /* for checksum calculation */ +- unsigned int CsOfs2; /* for checksum calculation */ +- SK_U32 CsOfs; /* for checksum calculation */ +- +- SK_BOOL CheckQueue; /* check event queue soon */ +- SK_TIMER DrvCleanupTimer;/* to check for pending descriptors */ +- DIM_INFO DynIrqModInfo; /* all data related to DIM */ +- +- /* Only for tests */ +- int PortUp; +- int PortDown; +- int ChipsetType; /* Chipset family type +- * 0 == Genesis family support +- * 1 == Yukon family support +- */ +-#ifdef SK_DIAG_SUPPORT +- SK_U32 DiagModeActive; /* is diag active? */ +- SK_BOOL DiagFlowCtrl; /* for control purposes */ +- SK_PNMI_STRUCT_DATA PnmiBackup; /* backup structure for all Pnmi-Data */ +- SK_BOOL WasIfUp[SK_MAX_MACS]; /* for OpenClose while +- * DIAG is busy with NIC +- */ ++ SK_GEINIT GIni; /* GE init struct */ ++ SK_PNMI Pnmi; /* PNMI data struct */ ++ SK_VPD vpd; /* vpd data struct */ ++ SK_QUEUE Event; /* Event queue */ ++ SK_HWT Hwt; /* Hardware Timer ctrl struct */ ++ SK_TIMCTRL Tim; /* Software Timer ctrl struct */ ++ SK_I2C I2c; /* I2C relevant data structure*/ ++ SK_ADDR Addr; /* for Address module */ ++ SK_CSUM Csum; /* for checksum module */ ++ SK_RLMT Rlmt; /* for rlmt module */ ++ spinlock_t SlowPathLock; /* Normal IRQ lock */ ++ spinlock_t TxQueueLock; /* TX Queue lock */ ++ SK_PNMI_STRUCT_DATA PnmiStruct; /* struct for all Pnmi-Data */ ++ int RlmtMode; /* link check mode to set */ ++ int RlmtNets; /* Number of nets */ ++ SK_IOC IoBase; /* register set of adapter */ ++ int BoardLevel; /* level of hw init (0-2) */ ++ char DeviceStr[80]; /* adapter string from vpd */ ++ SK_U32 AllocFlag; /* alloc flag of resources */ ++ struct pci_dev *PciDev; /* for access to pci cfg space*/ ++ SK_U32 PciDevId; /* pci device id */ ++ struct SK_NET_DEVICE *dev[2]; /* pointer to device struct */ ++ char Name[30]; /* driver name */ ++ struct SK_NET_DEVICE *Next; /* link all devs for cleanup */ ++ struct net_device_stats stats; /* linux 'netstat -i' stats */ ++ int Index; /* internal board idx number */ ++ int RxQueueSize; /* memory used for RX queue */ ++ int TxSQueueSize; /* memory used for TXS queue */ ++ int TxAQueueSize; /* memory used for TXA queue */ ++ int PromiscCount; /* promiscuous mode counter */ ++ int AllMultiCount; /* allmulticast mode counter */ ++ int MulticCount; /* number of MC addresses used*/ ++ int HWRevision; /* Hardware revision */ ++ int ActivePort; /* the active XMAC port */ ++ int MaxPorts; /* number of activated ports */ ++ int TxDescrPerRing;/* # of descriptors TX ring */ ++ int RxDescrPerRing;/* # of descriptors RX ring */ ++ caddr_t pDescrMem; /* Ptr to the descriptor area */ ++ dma_addr_t pDescrMemDMA; /* PCI DMA address of area */ ++ SK_U32 PciState[16]; /* PCI state */ ++ TX_PORT TxPort[SK_MAX_MACS][2]; ++ RX_PORT RxPort[SK_MAX_MACS]; ++ SK_LE_TABLE StatusLETable; ++ unsigned SizeOfAlignedLETables; ++ spinlock_t SetPutIndexLock; ++ int MaxUnusedRxLeWorking; ++ unsigned int CsOfs1; /* for checksum calculation */ ++ unsigned int CsOfs2; /* for checksum calculation */ ++ SK_U32 CsOfs; /* for checksum calculation */ ++ SK_BOOL CheckQueue; /* check event queue soon */ ++ DIM_INFO DynIrqModInfo; /* all data related to IntMod */ ++ WOL_INFO WolInfo; /* all info regarding WOL */ ++ int ChipsetType; /* 0=GENESIS; 1=Yukon */ ++ SK_BOOL LowLatency; /* LowLatency optimization on?*/ ++ SK_U32 DiagModeActive;/* is diag active? */ ++ SK_BOOL DiagFlowCtrl; /* for control purposes */ ++ SK_PNMI_STRUCT_DATA PnmiBackup; /* backup structure for PNMI */ ++ SK_BOOL WasIfUp[SK_MAX_MACS]; ++#ifdef USE_TIST_FOR_RESET ++ int AdapterResetState; ++ SK_U32 MinTistLo; ++ SK_U32 MinTistHi; ++#endif ++#ifdef Y2_RECOVERY ++ int LastPort; /* port for curr. handled rx */ ++ int LastOpc; /* last rx LEs opcode */ ++#endif ++#ifdef Y2_SYNC_CHECK ++ unsigned long FramesWithoutSyncCheck; /* since last check */ + #endif +- + }; + + +-#endif /* __INC_SKDRV2ND_H */ + ++#endif ++ ++/******************************************************************************* ++ * ++ * End of file ++ * ++ ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skerror.h linux-2.6.9.new/drivers/net/sk98lin/h/skerror.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skerror.h 2004-10-19 05:54:08.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skerror.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skerror.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.7 $ +- * Date: $Date: 2003/05/13 17:25:13 $ ++ * Version: $Revision: 2.2 $ ++ * Date: $Date: 2004/05/24 15:27:19 $ + * Purpose: SK specific Error log support + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -36,7 +35,6 @@ + #define SK_ERRCL_HW (1L<<4) /* Hardware Failure */ + #define SK_ERRCL_COMM (1L<<5) /* Communication error */ + +- + /* + * Define Error Code Bases + */ +@@ -49,7 +47,9 @@ + #define SK_ERRBASE_I2C 700 /* Base Error number for I2C module */ + #define SK_ERRBASE_QUEUE 800 /* Base Error number for Scheduler */ + #define SK_ERRBASE_ADDR 900 /* Base Error number for Address module */ +-#define SK_ERRBASE_PECP 1000 /* Base Error number for PECP */ ++#define SK_ERRBASE_PECP 1000 /* Base Error number for PECP */ + #define SK_ERRBASE_DRV 1100 /* Base Error number for Driver */ ++#define SK_ERRBASE_ASF 1200 /* Base Error number for ASF */ + + #endif /* _INC_SKERROR_H_ */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgedrv.h linux-2.6.9.new/drivers/net/sk98lin/h/skgedrv.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgedrv.h 2004-10-19 05:54:37.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgedrv.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgedrv.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.10 $ +- * Date: $Date: 2003/07/04 12:25:01 $ ++ * Version: $Revision: 2.1 $ ++ * Date: $Date: 2003/10/27 14:16:08 $ + * Purpose: Interface with the driver + * + ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgehw.h linux-2.6.9.new/drivers/net/sk98lin/h/skgehw.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgehw.h 2004-10-19 05:55:28.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgehw.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgehw.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.56 $ +- * Date: $Date: 2003/09/23 09:01:00 $ ++ * Version: $Revision: 2.49 $ ++ * Date: $Date: 2005/01/20 13:01:35 $ + * Purpose: Defines and Macros for the Gigabit Ethernet Adapter Product Family + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -114,6 +113,16 @@ + #define SHIFT1(x) ((x) << 1) + #define SHIFT0(x) ((x) << 0) + ++/* Macro for arbitrary alignment of a given pointer */ ++#define ALIGN_ADDR( ADDRESS, GRANULARITY ) { \ ++ SK_UPTR addr = (SK_UPTR)(ADDRESS); \ ++ if (addr & ((GRANULARITY)-1)) { \ ++ addr += (GRANULARITY); \ ++ addr &= ~(SK_UPTR)((GRANULARITY)-1); \ ++ ADDRESS = (void *)addr; \ ++ }\ ++} ++ + /* + * Configuration Space header + * Since this module is used for different OS', those may be +@@ -132,34 +141,74 @@ + #define PCI_BIST 0x0f /* 8 bit Built-in selftest */ + #define PCI_BASE_1ST 0x10 /* 32 bit 1st Base address */ + #define PCI_BASE_2ND 0x14 /* 32 bit 2nd Base address */ +- /* Byte 0x18..0x2b: reserved */ ++ /* Bytes 0x18..0x2b: reserved */ + #define PCI_SUB_VID 0x2c /* 16 bit Subsystem Vendor ID */ + #define PCI_SUB_ID 0x2e /* 16 bit Subsystem ID */ + #define PCI_BASE_ROM 0x30 /* 32 bit Expansion ROM Base Address */ +-#define PCI_CAP_PTR 0x34 /* 8 bit Capabilities Ptr */ +- /* Byte 0x35..0x3b: reserved */ ++#define PCI_CAP_PTR 0x34 /* 8 bit Capabilities Pointer */ ++ /* Bytes 0x35..0x3b: reserved */ + #define PCI_IRQ_LINE 0x3c /* 8 bit Interrupt Line */ + #define PCI_IRQ_PIN 0x3d /* 8 bit Interrupt Pin */ + #define PCI_MIN_GNT 0x3e /* 8 bit Min_Gnt */ + #define PCI_MAX_LAT 0x3f /* 8 bit Max_Lat */ + /* Device Dependent Region */ +-#define PCI_OUR_REG_1 0x40 /* 32 bit Our Register 1 */ +-#define PCI_OUR_REG_2 0x44 /* 32 bit Our Register 2 */ ++#define PCI_OUR_REG_1 0x40 /* 32 bit Our Register 1 */ ++#define PCI_OUR_REG_2 0x44 /* 32 bit Our Register 2 */ + /* Power Management Region */ +-#define PCI_PM_CAP_ID 0x48 /* 8 bit Power Management Cap. ID */ +-#define PCI_PM_NITEM 0x49 /* 8 bit Next Item Ptr */ +-#define PCI_PM_CAP_REG 0x4a /* 16 bit Power Management Capabilities */ +-#define PCI_PM_CTL_STS 0x4c /* 16 bit Power Manag. Control/Status */ ++#define PCI_PM_CAP_ID 0x48 /* 8 bit Power Management Cap. ID */ ++#define PCI_PM_NITEM 0x49 /* 8 bit PM Next Item Pointer */ ++#define PCI_PM_CAP_REG 0x4a /* 16 bit Power Management Capabilities */ ++#define PCI_PM_CTL_STS 0x4c /* 16 bit Power Manag. Control/Status */ + /* Byte 0x4e: reserved */ +-#define PCI_PM_DAT_REG 0x4f /* 8 bit Power Manag. Data Register */ ++#define PCI_PM_DAT_REG 0x4f /* 8 bit Power Manag. Data Register */ + /* VPD Region */ +-#define PCI_VPD_CAP_ID 0x50 /* 8 bit VPD Cap. ID */ +-#define PCI_VPD_NITEM 0x51 /* 8 bit Next Item Ptr */ +-#define PCI_VPD_ADR_REG 0x52 /* 16 bit VPD Address Register */ +-#define PCI_VPD_DAT_REG 0x54 /* 32 bit VPD Data Register */ +- /* Byte 0x58..0x59: reserved */ +-#define PCI_SER_LD_CTRL 0x5a /* 16 bit SEEPROM Loader Ctrl (YUKON only) */ +- /* Byte 0x5c..0xff: reserved */ ++#define PCI_VPD_CAP_ID 0x50 /* 8 bit VPD Cap. ID */ ++#define PCI_VPD_NITEM 0x51 /* 8 bit VPD Next Item Pointer */ ++#define PCI_VPD_ADR_REG 0x52 /* 16 bit VPD Address Register */ ++#define PCI_VPD_DAT_REG 0x54 /* 32 bit VPD Data Register */ ++ /* Bytes 0x58..0x59: reserved */ ++#define PCI_SER_LD_CTRL 0x5a /* 16 bit SEEPROM Loader Ctrl (YUKON only) */ ++ /* Bytes 0x5c..0xfc: used by Yukon-2 */ ++#define PCI_MSI_CAP_ID 0x5c /* 8 bit MSI Capability ID Register */ ++#define PCI_MSI_NITEM 0x5d /* 8 bit MSI Next Item Pointer */ ++#define PCI_MSI_CTRL 0x5e /* 16 bit MSI Message Control */ ++#define PCI_MSI_ADR_LO 0x60 /* 32 bit MSI Message Address (Lower) */ ++#define PCI_MSI_ADR_HI 0x64 /* 32 bit MSI Message Address (Upper) */ ++#define PCI_MSI_DATA 0x68 /* 16 bit MSI Message Data */ ++ /* Bytes 0x6a..0x6b: reserved */ ++#define PCI_X_CAP_ID 0x6c /* 8 bit PCI-X Capability ID Register */ ++#define PCI_X_NITEM 0x6d /* 8 bit PCI-X Next Item Pointer */ ++#define PCI_X_COMMAND 0x6e /* 16 bit PCI-X Command */ ++#define PCI_X_PE_STAT 0x70 /* 32 bit PCI-X / PE Status */ ++#define PCI_CAL_CTRL 0x74 /* 16 bit PCI Calibration Control Register */ ++#define PCI_CAL_STAT 0x76 /* 16 bit PCI Calibration Status Register */ ++#define PCI_DISC_CNT 0x78 /* 16 bit PCI Discard Counter */ ++#define PCI_RETRY_CNT 0x7a /* 8 bit PCI Retry Counter */ ++ /* Byte 0x7b: reserved */ ++#define PCI_OUR_STATUS 0x7c /* 32 bit Adapter Status Register */ ++ /* Bytes 0x80..0xdf: reserved */ ++ ++/* PCI Express Capability */ ++#define PEX_CAP_ID 0xe0 /* 8 bit PEX Capability ID */ ++#define PEX_NITEM 0xe1 /* 8 bit PEX Next Item Pointer */ ++#define PEX_CAP_REG 0xe2 /* 16 bit PEX Capability Register */ ++#define PEX_DEV_CAP 0xe4 /* 32 bit PEX Device Capabilities */ ++#define PEX_DEV_CTRL 0xe8 /* 16 bit PEX Device Control */ ++#define PEX_DEV_STAT 0xea /* 16 bit PEX Device Status */ ++#define PEX_LNK_CAP 0xec /* 32 bit PEX Link Capabilities */ ++#define PEX_LNK_CTRL 0xf0 /* 16 bit PEX Link Control */ ++#define PEX_LNK_STAT 0xf2 /* 16 bit PEX Link Status */ ++ /* Bytes 0xf4..0xff: reserved */ ++ ++/* PCI Express Extended Capabilities */ ++#define PEX_ADV_ERR_REP 0x100 /* 32 bit PEX Advanced Error Reporting */ ++#define PEX_UNC_ERR_STAT 0x104 /* 32 bit PEX Uncorr. Errors Status */ ++#define PEX_UNC_ERR_MASK 0x108 /* 32 bit PEX Uncorr. Errors Mask */ ++#define PEX_UNC_ERR_SEV 0x10c /* 32 bit PEX Uncorr. Errors Severity */ ++#define PEX_COR_ERR_STAT 0x110 /* 32 bit PEX Correc. Errors Status */ ++#define PEX_COR_ERR_MASK 0x114 /* 32 bit PEX Correc. Errors Mask */ ++#define PEX_ADV_ERR_CAP_C 0x118 /* 32 bit PEX Advanced Error Cap./Ctrl */ ++#define PEX_HEADER_LOG 0x11c /* 4x32 bit PEX Header Log Register */ + + /* + * I2C Address (PCI Config) +@@ -180,13 +229,13 @@ + #define PCI_ADSTEP BIT_7S /* Address Stepping */ + #define PCI_PERREN BIT_6S /* Parity Report Response enable */ + #define PCI_VGA_SNOOP BIT_5S /* VGA palette snoop */ +-#define PCI_MWIEN BIT_4S /* Memory write an inv cycl ena */ ++#define PCI_MWIEN BIT_4S /* Memory write an inv cycl enable */ + #define PCI_SCYCEN BIT_3S /* Special Cycle enable */ + #define PCI_BMEN BIT_2S /* Bus Master enable */ + #define PCI_MEMEN BIT_1S /* Memory Space Access enable */ + #define PCI_IOEN BIT_0S /* I/O Space Access enable */ + +-#define PCI_COMMAND_VAL (PCI_FBTEN | PCI_SERREN | PCI_PERREN | PCI_MWIEN |\ ++#define PCI_COMMAND_VAL (PCI_INT_DIS | PCI_SERREN | PCI_PERREN | \ + PCI_BMEN | PCI_MEMEN | PCI_IOEN) + + /* PCI_STATUS 16 bit Status */ +@@ -220,7 +269,7 @@ + + /* PCI_HEADER_T 8 bit Header Type */ + #define PCI_HD_MF_DEV BIT_7S /* 0= single, 1= multi-func dev */ +-#define PCI_HD_TYPE 0x7f /* Bit 6..0: Header Layout 0= normal */ ++#define PCI_HD_TYPE 0x7f /* Bit 6..0: Header Layout (0=normal) */ + + /* PCI_BIST 8 bit Built-in selftest */ + /* Built-in Self test not supported (optional) */ +@@ -229,33 +278,42 @@ + #define PCI_MEMSIZE 0x4000L /* use 16 kB Memory Base */ + #define PCI_MEMBASE_MSK 0xffffc000L /* Bit 31..14: Memory Base Address */ + #define PCI_MEMSIZE_MSK 0x00003ff0L /* Bit 13.. 4: Memory Size Req. */ +-#define PCI_PREFEN BIT_3 /* Prefetchable */ +-#define PCI_MEM_TYP (3L<<2) /* Bit 2.. 1: Memory Type */ ++#define PCI_PREFEN BIT_3 /* Prefetch enable */ ++#define PCI_MEM_TYP_MSK (3L<<1) /* Bit 2.. 1: Memory Type Mask */ ++#define PCI_MEMSPACE BIT_0 /* Memory Space Indicator */ ++ + #define PCI_MEM32BIT (0L<<1) /* Base addr anywhere in 32 Bit range */ + #define PCI_MEM1M (1L<<1) /* Base addr below 1 MegaByte */ + #define PCI_MEM64BIT (2L<<1) /* Base addr anywhere in 64 Bit range */ +-#define PCI_MEMSPACE BIT_0 /* Memory Space Indicator */ + + /* PCI_BASE_2ND 32 bit 2nd Base address */ + #define PCI_IOBASE 0xffffff00L /* Bit 31.. 8: I/O Base address */ + #define PCI_IOSIZE 0x000000fcL /* Bit 7.. 2: I/O Size Requirements */ +- /* Bit 1: reserved */ ++ /* Bit 1: reserved */ + #define PCI_IOSPACE BIT_0 /* I/O Space Indicator */ + + /* PCI_BASE_ROM 32 bit Expansion ROM Base Address */ + #define PCI_ROMBASE_MSK 0xfffe0000L /* Bit 31..17: ROM Base address */ + #define PCI_ROMBASE_SIZ (0x1cL<<14) /* Bit 16..14: Treat as Base or Size */ + #define PCI_ROMSIZE (0x38L<<11) /* Bit 13..11: ROM Size Requirements */ +- /* Bit 10.. 1: reserved */ ++ /* Bit 10.. 1: reserved */ + #define PCI_ROMEN BIT_0 /* Address Decode enable */ + + /* Device Dependent Region */ + /* PCI_OUR_REG_1 32 bit Our Register 1 */ +- /* Bit 31..29: reserved */ ++ /* Bit 31..29: reserved */ + #define PCI_PHY_COMA BIT_28 /* Set PHY to Coma Mode (YUKON only) */ + #define PCI_TEST_CAL BIT_27 /* Test PCI buffer calib. (YUKON only) */ + #define PCI_EN_CAL BIT_26 /* Enable PCI buffer calib. (YUKON only) */ + #define PCI_VIO BIT_25 /* PCI I/O Voltage, 0 = 3.3V, 1 = 5V */ ++/* Yukon-2 */ ++#define PCI_Y2_PIG_ENA BIT_31 /* Enable Plug-in-Go (YUKON-2) */ ++#define PCI_Y2_DLL_DIS BIT_30 /* Disable PCI DLL (YUKON-2) */ ++#define PCI_Y2_PHY2_COMA BIT_29 /* Set PHY 2 to Coma Mode (YUKON-2) */ ++#define PCI_Y2_PHY1_COMA BIT_28 /* Set PHY 1 to Coma Mode (YUKON-2) */ ++#define PCI_Y2_PHY2_POWD BIT_27 /* Set PHY 2 to Power Down (YUKON-2) */ ++#define PCI_Y2_PHY1_POWD BIT_26 /* Set PHY 1 to Power Down (YUKON-2) */ ++ /* Bit 25: reserved */ + #define PCI_DIS_BOOT BIT_24 /* Disable BOOT via ROM */ + #define PCI_EN_IO BIT_23 /* Mapping to I/O space */ + #define PCI_EN_FPROM BIT_22 /* Enable FLASH mapping to memory */ +@@ -266,9 +324,10 @@ + #define PCI_PAGE_32K (1L<<20) /* 32 k pages */ + #define PCI_PAGE_64K (2L<<20) /* 64 k pages */ + #define PCI_PAGE_128K (3L<<20) /* 128 k pages */ +- /* Bit 19: reserved */ ++ /* Bit 19: reserved */ + #define PCI_PAGEREG (7L<<16) /* Bit 18..16: Page Register */ + #define PCI_NOTAR BIT_15 /* No turnaround cycle */ ++#define PCI_PEX_LEGNAT BIT_15 /* PEX PM legacy/native mode (YUKON-2) */ + #define PCI_FORCE_BE BIT_14 /* Assert all BEs on MR */ + #define PCI_DIS_MRL BIT_13 /* Disable Mem Read Line */ + #define PCI_DIS_MRM BIT_12 /* Disable Mem Read Multiple */ +@@ -278,13 +337,13 @@ + #define PCI_DIS_PCI_CLK BIT_8 /* Disable PCI clock driving */ + #define PCI_SKEW_DAS (0xfL<<4) /* Bit 7.. 4: Skew Ctrl, DAS Ext */ + #define PCI_SKEW_BASE 0xfL /* Bit 3.. 0: Skew Ctrl, Base */ +- ++#define PCI_CLS_OPT BIT_3 /* Cache Line Size opt. PCI-X (YUKON-2) */ + + /* PCI_OUR_REG_2 32 bit Our Register 2 */ + #define PCI_VPD_WR_THR (0xffL<<24) /* Bit 31..24: VPD Write Threshold */ + #define PCI_DEV_SEL (0x7fL<<17) /* Bit 23..17: EEPROM Device Select */ + #define PCI_VPD_ROM_SZ (7L<<14) /* Bit 16..14: VPD ROM Size */ +- /* Bit 13..12: reserved */ ++ /* Bit 13..12: reserved */ + #define PCI_PATCH_DIR (0xfL<<8) /* Bit 11.. 8: Ext Patches dir 3..0 */ + #define PCI_PATCH_DIR_3 BIT_11 + #define PCI_PATCH_DIR_2 BIT_10 +@@ -297,21 +356,20 @@ + #define PCI_EXT_PATCH_0 BIT_4 + #define PCI_EN_DUMMY_RD BIT_3 /* Enable Dummy Read */ + #define PCI_REV_DESC BIT_2 /* Reverse Desc. Bytes */ +- /* Bit 1: reserved */ ++ /* Bit 1: reserved */ + #define PCI_USEDATA64 BIT_0 /* Use 64Bit Data bus ext */ + +- + /* Power Management Region */ + /* PCI_PM_CAP_REG 16 bit Power Management Capabilities */ + #define PCI_PME_SUP_MSK (0x1f<<11) /* Bit 15..11: PM Event Support Mask */ +-#define PCI_PME_D3C_SUP BIT_15S /* PME from D3cold Support (if Vaux) */ ++#define PCI_PME_D3C_SUP BIT_15S /* PME from D3cold Support (if VAUX) */ + #define PCI_PME_D3H_SUP BIT_14S /* PME from D3hot Support */ + #define PCI_PME_D2_SUP BIT_13S /* PME from D2 Support */ + #define PCI_PME_D1_SUP BIT_12S /* PME from D1 Support */ + #define PCI_PME_D0_SUP BIT_11S /* PME from D0 Support */ + #define PCI_PM_D2_SUP BIT_10S /* D2 Support in 33 MHz mode */ + #define PCI_PM_D1_SUP BIT_9S /* D1 Support */ +- /* Bit 8.. 6: reserved */ ++ /* Bit 8.. 6: reserved */ + #define PCI_PM_DSI BIT_5S /* Device Specific Initialization */ + #define PCI_PM_APS BIT_4S /* Auxialiary Power Source */ + #define PCI_PME_CLOCK BIT_3S /* PM Event Clock */ +@@ -322,7 +380,7 @@ + #define PCI_PM_DAT_SCL (3<<13) /* Bit 14..13: Data Reg. scaling factor */ + #define PCI_PM_DAT_SEL (0xf<<9) /* Bit 12.. 9: PM data selector field */ + #define PCI_PME_EN BIT_8S /* Enable PME# generation (YUKON only) */ +- /* Bit 7.. 2: reserved */ ++ /* Bit 7.. 2: reserved */ + #define PCI_PM_STATE_MSK 3 /* Bit 1.. 0: Power Management State */ + + #define PCI_PM_STATE_D0 0 /* D0: Operational (default) */ +@@ -333,7 +391,67 @@ + /* VPD Region */ + /* PCI_VPD_ADR_REG 16 bit VPD Address Register */ + #define PCI_VPD_FLAG BIT_15S /* starts VPD rd/wr cycle */ +-#define PCI_VPD_ADR_MSK 0x7fffL /* Bit 14.. 0: VPD address mask */ ++#define PCI_VPD_ADR_MSK 0x7fffL /* Bit 14.. 0: VPD Address Mask */ ++ ++/* PCI_OUR_STATUS 32 bit Adapter Status Register (Yukon-2) */ ++#define PCI_OS_PCI64B BIT_31 /* Conventional PCI 64 bits Bus */ ++#define PCI_OS_PCIX BIT_30 /* PCI-X Bus */ ++#define PCI_OS_MODE_MSK (3L<<28) /* Bit 29..28: PCI-X Bus Mode Mask */ ++#define PCI_OS_PCI66M BIT_27 /* PCI 66 MHz Bus */ ++#define PCI_OS_PCI_X BIT_26 /* PCI/PCI-X Bus (0 = PEX) */ ++#define PCI_OS_DLLE_MSK (3L<<24) /* Bit 25..24: DLL Status Indication */ ++#define PCI_OS_DLLR_MSK (0xfL<<20) /* Bit 23..20: DLL Row Counters Values */ ++#define PCI_OS_DLLC_MSK (0xfL<<16) /* Bit 19..16: DLL Col. Counters Values */ ++ /* Bit 15.. 8: reserved */ ++ ++#define PCI_OS_SPEED(val) ((val & PCI_OS_MODE_MSK) >> 28) /* PCI-X Speed */ ++/* possible values for the speed field of the register */ ++#define PCI_OS_SPD_PCI 0 /* PCI Conventional Bus */ ++#define PCI_OS_SPD_X66 1 /* PCI-X 66MHz Bus */ ++#define PCI_OS_SPD_X100 2 /* PCI-X 100MHz Bus */ ++#define PCI_OS_SPD_X133 3 /* PCI-X 133MHz Bus */ ++ ++/* PEX_DEV_CTRL 16 bit PEX Device Control (Yukon-2) */ ++ /* Bit 15 reserved */ ++#define PEX_DC_MAX_RRS_MSK (7<<12) /* Bit 14..12: Max. Read Request Size */ ++#define PEX_DC_EN_NO_SNOOP BIT_11S /* Enable No Snoop */ ++#define PEX_DC_EN_AUX_POW BIT_10S /* Enable AUX Power */ ++#define PEX_DC_EN_PHANTOM BIT_9S /* Enable Phantom Functions */ ++#define PEX_DC_EN_EXT_TAG BIT_8S /* Enable Extended Tag Field */ ++#define PEX_DC_MAX_PLS_MSK (7<<5) /* Bit 7.. 5: Max. Payload Size Mask */ ++#define PEX_DC_EN_REL_ORD BIT_4S /* Enable Relaxed Ordering */ ++#define PEX_DC_EN_UNS_RQ_RP BIT_3S /* Enable Unsupported Request Reporting */ ++#define PEX_DC_EN_FAT_ER_RP BIT_2S /* Enable Fatal Error Reporting */ ++#define PEX_DC_EN_NFA_ER_RP BIT_1S /* Enable Non-Fatal Error Reporting */ ++#define PEX_DC_EN_COR_ER_RP BIT_0S /* Enable Correctable Error Reporting */ ++ ++#define PEX_DC_MAX_RD_RQ_SIZE(x) (SHIFT12(x) & PEX_DC_MAX_RRS_MSK) ++ ++/* PEX_LNK_STAT 16 bit PEX Link Status (Yukon-2) */ ++ /* Bit 15..13 reserved */ ++#define PEX_LS_SLOT_CLK_CFG BIT_12S /* Slot Clock Config */ ++#define PEX_LS_LINK_TRAIN BIT_11S /* Link Training */ ++#define PEX_LS_TRAIN_ERROR BIT_10S /* Training Error */ ++#define PEX_LS_LINK_WI_MSK (0x3f<<4) /* Bit 9.. 4: Neg. Link Width Mask */ ++#define PEX_LS_LINK_SP_MSK 0x0f /* Bit 3.. 0: Link Speed Mask */ ++ ++/* PEX_UNC_ERR_STAT PEX Uncorrectable Errors Status Register (Yukon-2) */ ++ /* Bit 31..21 reserved */ ++#define PEX_UNSUP_REQ BIT_20 /* Unsupported Request Error */ ++ /* ECRC Error (not supported) */ ++#define PEX_MALFOR_TLP BIT_18 /* Malformed TLP */ ++ /* Receiver Overflow (not supported) */ ++#define PEX_UNEXP_COMP BIT_16 /* Unexpected Completion */ ++ /* Completer Abort (not supported) */ ++#define PEX_COMP_TO BIT_14 /* Completion Timeout */ ++#define PEX_FLOW_CTRL_P BIT_13 /* Flow Control Protocol Error */ ++#define PEX_POIS_TLP BIT_12 /* Poisoned TLP */ ++ /* Bit 11.. 5: reserved */ ++#define PEX_DATA_LINK_P BIT_4 /* Data Link Protocol Error */ ++ /* Bit 3.. 1: reserved */ ++ /* Training Error (not supported) */ ++ ++#define PEX_FATAL_ERRORS (PEX_MALFOR_TLP | PEX_FLOW_CTRL_P | PEX_DATA_LINK_P) + + /* Control Register File (Address Map) */ + +@@ -349,8 +467,14 @@ + #define B0_IMSK 0x000c /* 32 bit Interrupt Mask Register */ + #define B0_HWE_ISRC 0x0010 /* 32 bit HW Error Interrupt Src Reg */ + #define B0_HWE_IMSK 0x0014 /* 32 bit HW Error Interrupt Mask Reg */ +-#define B0_SP_ISRC 0x0018 /* 32 bit Special Interrupt Source Reg */ +- /* 0x001c: reserved */ ++#define B0_SP_ISRC 0x0018 /* 32 bit Special Interrupt Source Reg 1 */ ++ ++/* Special ISR registers (Yukon-2 only) */ ++#define B0_Y2_SP_ISRC2 0x001c /* 32 bit Special Interrupt Source Reg 2 */ ++#define B0_Y2_SP_ISRC3 0x0020 /* 32 bit Special Interrupt Source Reg 3 */ ++#define B0_Y2_SP_EISR 0x0024 /* 32 bit Enter ISR Reg */ ++#define B0_Y2_SP_LISR 0x0028 /* 32 bit Leave ISR Reg */ ++#define B0_Y2_SP_ICR 0x002c /* 32 bit Interrupt Control Reg */ + + /* B0 XMAC 1 registers (GENESIS only) */ + #define B0_XM1_IMSK 0x0020 /* 16 bit r/w XMAC 1 Interrupt Mask Register*/ +@@ -400,14 +524,23 @@ + #define B2_CONN_TYP 0x0118 /* 8 bit Connector type */ + #define B2_PMD_TYP 0x0119 /* 8 bit PMD type */ + #define B2_MAC_CFG 0x011a /* 8 bit MAC Configuration / Chip Revision */ +-#define B2_CHIP_ID 0x011b /* 8 bit Chip Identification Number */ +- /* Eprom registers are currently of no use */ ++#define B2_CHIP_ID 0x011b /* 8 bit Chip Identification Number */ ++ /* Eprom registers */ + #define B2_E_0 0x011c /* 8 bit EPROM Byte 0 (ext. SRAM size */ ++/* Yukon and Genesis */ + #define B2_E_1 0x011d /* 8 bit EPROM Byte 1 (PHY type) */ + #define B2_E_2 0x011e /* 8 bit EPROM Byte 2 */ ++/* Yukon-2 */ ++#define B2_Y2_CLK_GATE 0x011d /* 8 bit Clock Gating (Yukon-2) */ ++#define B2_Y2_HW_RES 0x011e /* 8 bit HW Resources (Yukon-2) */ ++ + #define B2_E_3 0x011f /* 8 bit EPROM Byte 3 */ ++ ++/* Yukon and Genesis */ + #define B2_FAR 0x0120 /* 32 bit Flash-Prom Addr Reg/Cnt */ + #define B2_FDP 0x0124 /* 8 bit Flash-Prom Data Port */ ++/* Yukon-2 */ ++#define B2_Y2_CLK_CTRL 0x0120 /* 32 bit Core Clock Frequency Control */ + /* 0x0125 - 0x0127: reserved */ + #define B2_LD_CTRL 0x0128 /* 8 bit EPROM loader control register */ + #define B2_LD_TEST 0x0129 /* 8 bit EPROM loader test register */ +@@ -439,6 +572,10 @@ + #define B2_BSC_CTRL 0x0178 /* 8 bit Blink Source Counter Control */ + #define B2_BSC_STAT 0x0179 /* 8 bit Blink Source Counter Status */ + #define B2_BSC_TST 0x017a /* 16 bit Blink Source Counter Test Reg */ ++ ++/* Yukon-2 */ ++#define Y2_PEX_PHY_DATA 0x0170 /* 16 bit PEX PHY Data Register */ ++#define Y2_PEX_PHY_ADDR 0x0172 /* 16 bit PEX PHY Address Register */ + /* 0x017c - 0x017f: reserved */ + + /* +@@ -448,9 +585,13 @@ + #define B3_RAM_ADDR 0x0180 /* 32 bit RAM Address, to read or write */ + #define B3_RAM_DATA_LO 0x0184 /* 32 bit RAM Data Word (low dWord) */ + #define B3_RAM_DATA_HI 0x0188 /* 32 bit RAM Data Word (high dWord) */ ++ ++#define SELECT_RAM_BUFFER(rb, addr) (addr | (rb << 6)) /* Yukon-2 only */ ++ + /* 0x018c - 0x018f: reserved */ + + /* RAM Interface Registers */ ++/* Yukon-2: use SELECT_RAM_BUFFER() to access the RAM buffer */ + /* + * The HW-Spec. calls this registers Timeout Value 0..11. But this names are + * not usable in SW. Please notice these are NOT real timeouts, these are +@@ -517,8 +658,8 @@ + /* 0x01ea - 0x01eb: reserved */ + #define B3_PA_TOVAL_TX2 0x01ec /* 16 bit Timeout Val Tx Path MAC 2 */ + /* 0x01ee - 0x01ef: reserved */ +-#define B3_PA_CTRL 0x01f0 /* 16 bit Packet Arbiter Ctrl Register */ +-#define B3_PA_TEST 0x01f2 /* 16 bit Packet Arbiter Test Register */ ++#define B3_PA_CTRL 0x01f0 /* 16 bit Packet Arbiter Ctrl Register */ ++#define B3_PA_TEST 0x01f2 /* 16 bit Packet Arbiter Test Register */ + /* 0x01f4 - 0x01ff: reserved */ + + /* +@@ -532,7 +673,16 @@ + #define TXA_CTRL 0x0210 /* 8 bit Tx Arbiter Control Register */ + #define TXA_TEST 0x0211 /* 8 bit Tx Arbiter Test Register */ + #define TXA_STAT 0x0212 /* 8 bit Tx Arbiter Status Register */ +- /* 0x0213 - 0x027f: reserved */ ++ /* 0x0213 - 0x021f: reserved */ ++ ++ /* RSS key registers for Yukon-2 Family */ ++#define B4_RSS_KEY 0x0220 /* 4x32 bit RSS Key register (Yukon-2) */ ++ /* RSS key register offsets */ ++#define KEY_IDX_0 0 /* offset for location of KEY 0 */ ++#define KEY_IDX_1 4 /* offset for location of KEY 1 */ ++#define KEY_IDX_2 8 /* offset for location of KEY 2 */ ++#define KEY_IDX_3 12 /* offset for location of KEY 3 */ ++ + /* 0x0280 - 0x0292: MAC 2 */ + /* 0x0213 - 0x027f: reserved */ + +@@ -570,8 +720,37 @@ + #define Q_T1_SV 0x3f /* 8 bit Test Register 1 Supervisor SM */ + #define Q_T2 0x40 /* 32 bit Test Register 2 */ + #define Q_T3 0x44 /* 32 bit Test Register 3 */ ++ ++/* Yukon-2 */ ++#define Q_DONE 0x24 /* 16 bit Done Index (Yukon-2 only) */ ++#define Q_WM 0x40 /* 16 bit FIFO Watermark */ ++#define Q_AL 0x42 /* 8 bit FIFO Alignment */ ++#define Q_RSP 0x44 /* 16 bit FIFO Read Shadow Pointer */ ++#define Q_RSL 0x46 /* 8 bit FIFO Read Shadow Level */ ++#define Q_RP 0x48 /* 8 bit FIFO Read Pointer */ ++#define Q_RL 0x4a /* 8 bit FIFO Read Level */ ++#define Q_WP 0x4c /* 8 bit FIFO Write Pointer */ ++#define Q_WSP 0x4d /* 8 bit FIFO Write Shadow Pointer */ ++#define Q_WL 0x4e /* 8 bit FIFO Write Level */ ++#define Q_WSL 0x4f /* 8 bit FIFO Write Shadow Level */ + /* 0x48 - 0x7f: reserved */ + ++/* Queue Prefetch Unit Offsets, use Y2_PREF_Q_ADDR() to address (Yukon-2 only)*/ ++#define Y2_B8_PREF_REGS 0x0450 ++ ++#define PREF_UNIT_CTRL_REG 0x00 /* 32 bit Prefetch Control register */ ++#define PREF_UNIT_LAST_IDX_REG 0x04 /* 16 bit Last Index */ ++#define PREF_UNIT_ADDR_LOW_REG 0x08 /* 32 bit List start addr, low part */ ++#define PREF_UNIT_ADDR_HI_REG 0x0c /* 32 bit List start addr, high part*/ ++#define PREF_UNIT_GET_IDX_REG 0x10 /* 16 bit Get Index */ ++#define PREF_UNIT_PUT_IDX_REG 0x14 /* 16 bit Put Index */ ++#define PREF_UNIT_FIFO_WP_REG 0x20 /* 8 bit FIFO write pointer */ ++#define PREF_UNIT_FIFO_RP_REG 0x24 /* 8 bit FIFO read pointer */ ++#define PREF_UNIT_FIFO_WM_REG 0x28 /* 8 bit FIFO watermark */ ++#define PREF_UNIT_FIFO_LEV_REG 0x2c /* 8 bit FIFO level */ ++ ++#define PREF_UNIT_MASK_IDX 0x0fff ++ + /* + * Bank 16 - 23 + */ +@@ -583,17 +762,17 @@ + #define RB_END 0x04 /* 32 bit RAM Buffer End Address */ + #define RB_WP 0x08 /* 32 bit RAM Buffer Write Pointer */ + #define RB_RP 0x0c /* 32 bit RAM Buffer Read Pointer */ +-#define RB_RX_UTPP 0x10 /* 32 bit Rx Upper Threshold, Pause Pack */ +-#define RB_RX_LTPP 0x14 /* 32 bit Rx Lower Threshold, Pause Pack */ ++#define RB_RX_UTPP 0x10 /* 32 bit Rx Upper Threshold, Pause Packet */ ++#define RB_RX_LTPP 0x14 /* 32 bit Rx Lower Threshold, Pause Packet */ + #define RB_RX_UTHP 0x18 /* 32 bit Rx Upper Threshold, High Prio */ + #define RB_RX_LTHP 0x1c /* 32 bit Rx Lower Threshold, High Prio */ + /* 0x10 - 0x1f: reserved at Tx RAM Buffer Registers */ + #define RB_PC 0x20 /* 32 bit RAM Buffer Packet Counter */ + #define RB_LEV 0x24 /* 32 bit RAM Buffer Level Register */ +-#define RB_CTRL 0x28 /* 8 bit RAM Buffer Control Register */ ++#define RB_CTRL 0x28 /* 32 bit RAM Buffer Control Register */ + #define RB_TST1 0x29 /* 8 bit RAM Buffer Test Register 1 */ +-#define RB_TST2 0x2A /* 8 bit RAM Buffer Test Register 2 */ +- /* 0x2c - 0x7f: reserved */ ++#define RB_TST2 0x2a /* 8 bit RAM Buffer Test Register 2 */ ++ /* 0x2b - 0x7f: reserved */ + + /* + * Bank 24 +@@ -603,7 +782,7 @@ + * use MR_ADDR() to access + */ + #define RX_MFF_EA 0x0c00 /* 32 bit Receive MAC FIFO End Address */ +-#define RX_MFF_WP 0x0c04 /* 32 bit Receive MAC FIFO Write Pointer */ ++#define RX_MFF_WP 0x0c04 /* 32 bit Receive MAC FIFO Write Pointer */ + /* 0x0c08 - 0x0c0b: reserved */ + #define RX_MFF_RP 0x0c0c /* 32 bit Receive MAC FIFO Read Pointer */ + #define RX_MFF_PC 0x0c10 /* 32 bit Receive MAC FIFO Packet Cnt */ +@@ -628,20 +807,22 @@ + #define LNK_LED_REG 0x0c3c /* 8 bit Link LED Register */ + /* 0x0c3d - 0x0c3f: reserved */ + +-/* Receive GMAC FIFO (YUKON only), use MR_ADDR() to access */ ++/* Receive GMAC FIFO (YUKON and Yukon-2), use MR_ADDR() to access */ + #define RX_GMF_EA 0x0c40 /* 32 bit Rx GMAC FIFO End Address */ + #define RX_GMF_AF_THR 0x0c44 /* 32 bit Rx GMAC FIFO Almost Full Thresh. */ + #define RX_GMF_CTRL_T 0x0c48 /* 32 bit Rx GMAC FIFO Control/Test */ + #define RX_GMF_FL_MSK 0x0c4c /* 32 bit Rx GMAC FIFO Flush Mask */ + #define RX_GMF_FL_THR 0x0c50 /* 32 bit Rx GMAC FIFO Flush Threshold */ +- /* 0x0c54 - 0x0c5f: reserved */ +-#define RX_GMF_WP 0x0c60 /* 32 bit Rx GMAC FIFO Write Pointer */ ++#define RX_GMF_TR_THR 0x0c54 /* 32 bit Rx Truncation Threshold (Yukon-2) */ ++ /* 0x0c58 - 0x0c5b: reserved */ ++#define RX_GMF_VLAN 0x0c5c /* 32 bit Rx VLAN Type Register (Yukon-2) */ ++#define RX_GMF_WP 0x0c60 /* 32 bit Rx GMAC FIFO Write Pointer */ + /* 0x0c64 - 0x0c67: reserved */ +-#define RX_GMF_WLEV 0x0c68 /* 32 bit Rx GMAC FIFO Write Level */ ++#define RX_GMF_WLEV 0x0c68 /* 32 bit Rx GMAC FIFO Write Level */ + /* 0x0c6c - 0x0c6f: reserved */ +-#define RX_GMF_RP 0x0c70 /* 32 bit Rx GMAC FIFO Read Pointer */ ++#define RX_GMF_RP 0x0c70 /* 32 bit Rx GMAC FIFO Read Pointer */ + /* 0x0c74 - 0x0c77: reserved */ +-#define RX_GMF_RLEV 0x0c78 /* 32 bit Rx GMAC FIFO Read Level */ ++#define RX_GMF_RLEV 0x0c78 /* 32 bit Rx GMAC FIFO Read Level */ + /* 0x0c7c - 0x0c7f: reserved */ + + /* +@@ -658,7 +839,7 @@ + * use MR_ADDR() to access + */ + #define TX_MFF_EA 0x0d00 /* 32 bit Transmit MAC FIFO End Address */ +-#define TX_MFF_WP 0x0d04 /* 32 bit Transmit MAC FIFO WR Pointer */ ++#define TX_MFF_WP 0x0d04 /* 32 bit Transmit MAC FIFO WR Pointer */ + #define TX_MFF_WSP 0x0d08 /* 32 bit Transmit MAC FIFO WR Shadow Ptr */ + #define TX_MFF_RP 0x0d0c /* 32 bit Transmit MAC FIFO RD Pointer */ + #define TX_MFF_PC 0x0d10 /* 32 bit Transmit MAC FIFO Packet Cnt */ +@@ -676,18 +857,19 @@ + #define TX_LED_TST 0x0d29 /* 8 bit Transmit LED Cnt Test Reg */ + /* 0x0d2a - 0x0d3f: reserved */ + +-/* Transmit GMAC FIFO (YUKON only), use MR_ADDR() to access */ ++/* Transmit GMAC FIFO (YUKON and Yukon-2), use MR_ADDR() to access */ + #define TX_GMF_EA 0x0d40 /* 32 bit Tx GMAC FIFO End Address */ + #define TX_GMF_AE_THR 0x0d44 /* 32 bit Tx GMAC FIFO Almost Empty Thresh.*/ + #define TX_GMF_CTRL_T 0x0d48 /* 32 bit Tx GMAC FIFO Control/Test */ +- /* 0x0d4c - 0x0d5f: reserved */ +-#define TX_GMF_WP 0x0d60 /* 32 bit Tx GMAC FIFO Write Pointer */ +-#define TX_GMF_WSP 0x0d64 /* 32 bit Tx GMAC FIFO Write Shadow Ptr. */ +-#define TX_GMF_WLEV 0x0d68 /* 32 bit Tx GMAC FIFO Write Level */ ++ /* 0x0d4c - 0x0d5b: reserved */ ++#define TX_GMF_VLAN 0x0d5c /* 32 bit Tx VLAN Type Register (Yukon-2) */ ++#define TX_GMF_WP 0x0d60 /* 32 bit Tx GMAC FIFO Write Pointer */ ++#define TX_GMF_WSP 0x0d64 /* 32 bit Tx GMAC FIFO Write Shadow Pointer */ ++#define TX_GMF_WLEV 0x0d68 /* 32 bit Tx GMAC FIFO Write Level */ + /* 0x0d6c - 0x0d6f: reserved */ +-#define TX_GMF_RP 0x0d70 /* 32 bit Tx GMAC FIFO Read Pointer */ +-#define TX_GMF_RSTP 0x0d74 /* 32 bit Tx GMAC FIFO Restart Pointer */ +-#define TX_GMF_RLEV 0x0d78 /* 32 bit Tx GMAC FIFO Read Level */ ++#define TX_GMF_RP 0x0d70 /* 32 bit Tx GMAC FIFO Read Pointer */ ++#define TX_GMF_RSTP 0x0d74 /* 32 bit Tx GMAC FIFO Restart Pointer */ ++#define TX_GMF_RLEV 0x0d78 /* 32 bit Tx GMAC FIFO Read Level */ + /* 0x0d7c - 0x0d7f: reserved */ + + /* +@@ -713,12 +895,84 @@ + #define GMAC_TI_ST_CTRL 0x0e18 /* 8 bit Time Stamp Timer Ctrl Reg */ + /* 0x0e19: reserved */ + #define GMAC_TI_ST_TST 0x0e1a /* 8 bit Time Stamp Timer Test Reg */ +- /* 0x0e1b - 0x0e7f: reserved */ ++ /* 0x0e1b - 0x0e1f: reserved */ ++ ++/* Polling Unit Registers (Yukon-2 only) */ ++#define POLL_CTRL 0x0e20 /* 32 bit Polling Unit Control Reg */ ++#define POLL_LAST_IDX 0x0e24 /* 16 bit Polling Unit List Last Index */ ++ /* 0x0e26 - 0x0e27: reserved */ ++#define POLL_LIST_ADDR_LO 0x0e28 /* 32 bit Poll. List Start Addr (low) */ ++#define POLL_LIST_ADDR_HI 0x0e2c /* 32 bit Poll. List Start Addr (high) */ ++ /* 0x0e30 - 0x0e3f: reserved */ ++ ++/* ASF Subsystem Registers (Yukon-2 only) */ ++#define B28_Y2_SMB_CONFIG 0x0e40 /* 32 bit ASF SMBus Config Register */ ++#define B28_Y2_SMB_CSD_REG 0x0e44 /* 32 bit ASF SMB Control/Status/Data */ ++ /* 0x0e48 - 0x0e5f: reserved */ ++#define B28_Y2_ASF_IRQ_V_BASE 0x0e60 /* 32 bit ASF IRQ Vector Base */ ++ /* 0x0e64 - 0x0e67: reserved */ ++#define B28_Y2_ASF_STAT_CMD 0x0e68 /* 32 bit ASF Status and Command Reg */ ++#define B28_Y2_ASF_HOST_COM 0x0e6c /* 32 bit ASF Host Communication Reg */ ++#define B28_Y2_DATA_REG_1 0x0e70 /* 32 bit ASF/Host Data Register 1 */ ++#define B28_Y2_DATA_REG_2 0x0e74 /* 32 bit ASF/Host Data Register 2 */ ++#define B28_Y2_DATA_REG_3 0x0e78 /* 32 bit ASF/Host Data Register 3 */ ++#define B28_Y2_DATA_REG_4 0x0e7c /* 32 bit ASF/Host Data Register 4 */ + + /* + * Bank 29 + */ +- /* 0x0e80 - 0x0efc: reserved */ ++ ++/* Status BMU Registers (Yukon-2 only)*/ ++#define STAT_CTRL 0x0e80 /* 32 bit Status BMU Control Reg */ ++#define STAT_LAST_IDX 0x0e84 /* 16 bit Status BMU Last Index */ ++ /* 0x0e85 - 0x0e86: reserved */ ++#define STAT_LIST_ADDR_LO 0x0e88 /* 32 bit Status List Start Addr (low) */ ++#define STAT_LIST_ADDR_HI 0x0e8c /* 32 bit Status List Start Addr (high) */ ++#define STAT_TXA1_RIDX 0x0e90 /* 16 bit Status TxA1 Report Index Reg */ ++#define STAT_TXS1_RIDX 0x0e92 /* 16 bit Status TxS1 Report Index Reg */ ++#define STAT_TXA2_RIDX 0x0e94 /* 16 bit Status TxA2 Report Index Reg */ ++#define STAT_TXS2_RIDX 0x0e96 /* 16 bit Status TxS2 Report Index Reg */ ++#define STAT_TX_IDX_TH 0x0e98 /* 16 bit Status Tx Index Threshold Reg */ ++ /* 0x0e9a - 0x0e9b: reserved */ ++#define STAT_PUT_IDX 0x0e9c /* 16 bit Status Put Index Reg */ ++ /* 0x0e9e - 0x0e9f: reserved */ ++ ++/* FIFO Control/Status Registers (Yukon-2 only)*/ ++#define STAT_FIFO_WP 0x0ea0 /* 8 bit Status FIFO Write Pointer Reg */ ++ /* 0x0ea1 - 0x0ea3: reserved */ ++#define STAT_FIFO_RP 0x0ea4 /* 8 bit Status FIFO Read Pointer Reg */ ++ /* 0x0ea5: reserved */ ++#define STAT_FIFO_RSP 0x0ea6 /* 8 bit Status FIFO Read Shadow Ptr */ ++ /* 0x0ea7: reserved */ ++#define STAT_FIFO_LEVEL 0x0ea8 /* 8 bit Status FIFO Level Reg */ ++ /* 0x0ea9: reserved */ ++#define STAT_FIFO_SHLVL 0x0eaa /* 8 bit Status FIFO Shadow Level Reg */ ++ /* 0x0eab: reserved */ ++#define STAT_FIFO_WM 0x0eac /* 8 bit Status FIFO Watermark Reg */ ++#define STAT_FIFO_ISR_WM 0x0ead /* 8 bit Status FIFO ISR Watermark Reg */ ++ /* 0x0eae - 0x0eaf: reserved */ ++ ++/* Level and ISR Timer Registers (Yukon-2 only)*/ ++#define STAT_LEV_TIMER_INI 0x0eb0 /* 32 bit Level Timer Init. Value Reg */ ++#define STAT_LEV_TIMER_CNT 0x0eb4 /* 32 bit Level Timer Counter Reg */ ++#define STAT_LEV_TIMER_CTRL 0x0eb8 /* 8 bit Level Timer Control Reg */ ++#define STAT_LEV_TIMER_TEST 0x0eb9 /* 8 bit Level Timer Test Reg */ ++ /* 0x0eba - 0x0ebf: reserved */ ++#define STAT_TX_TIMER_INI 0x0ec0 /* 32 bit Tx Timer Init. Value Reg */ ++#define STAT_TX_TIMER_CNT 0x0ec4 /* 32 bit Tx Timer Counter Reg */ ++#define STAT_TX_TIMER_CTRL 0x0ec8 /* 8 bit Tx Timer Control Reg */ ++#define STAT_TX_TIMER_TEST 0x0ec9 /* 8 bit Tx Timer Test Reg */ ++ /* 0x0eca - 0x0ecf: reserved */ ++#define STAT_ISR_TIMER_INI 0x0ed0 /* 32 bit ISR Timer Init. Value Reg */ ++#define STAT_ISR_TIMER_CNT 0x0ed4 /* 32 bit ISR Timer Counter Reg */ ++#define STAT_ISR_TIMER_CTRL 0x0ed8 /* 8 bit ISR Timer Control Reg */ ++#define STAT_ISR_TIMER_TEST 0x0ed9 /* 8 bit ISR Timer Test Reg */ ++ /* 0x0eda - 0x0eff: reserved */ ++ ++#define ST_LAST_IDX_MASK 0x007f /* Last Index Mask */ ++#define ST_TXRP_IDX_MASK 0x0fff /* Tx Report Index Mask */ ++#define ST_TXTH_IDX_MASK 0x0fff /* Tx Threshold Index Mask */ ++#define ST_WM_IDX_MASK 0x3f /* FIFO Watermark Index Mask */ + + /* + * Bank 30 +@@ -742,11 +996,9 @@ + #define WOL_MATCH_RES 0x0f23 /* 8 bit WOL Match Result Reg */ + #define WOL_MAC_ADDR_LO 0x0f24 /* 32 bit WOL MAC Address Low */ + #define WOL_MAC_ADDR_HI 0x0f28 /* 16 bit WOL MAC Address High */ +-#define WOL_PATT_RPTR 0x0f2c /* 8 bit WOL Pattern Read Ptr */ +- +-/* use this macro to access above registers */ +-#define WOL_REG(Reg) ((Reg) + (pAC->GIni.GIWolOffs)) +- ++#define WOL_PATT_PME 0x0f2a /* 8 bit WOL PME Match Enable (Yukon-2) */ ++#define WOL_PATT_ASFM 0x0f2b /* 8 bit WOL ASF Match Enable (Yukon-2) */ ++#define WOL_PATT_RPTR 0x0f2c /* 8 bit WOL Pattern Read Pointer */ + + /* WOL Pattern Length Registers (YUKON only) */ + +@@ -764,11 +1016,22 @@ + */ + /* 0x0f80 - 0x0fff: reserved */ + ++/* WOL registers link 2 */ ++ ++/* use this macro to access WOL registers */ ++#define WOL_REG(Port, Reg) ((Reg) + ((Port)*0x80) + (pAC->GIni.GIWolOffs)) ++ + /* + * Bank 32 - 33 + */ + #define WOL_PATT_RAM_1 0x1000 /* WOL Pattern RAM Link 1 */ ++#define WOL_PATT_RAM_2 0x1400 /* WOL Pattern RAM Link 2 */ + ++/* use this macro to retrieve the pattern ram base address */ ++#define WOL_PATT_RAM_BASE(Port) (WOL_PATT_RAM_1 + (Port)*0x400) ++ ++/* offset to configuration space on Yukon-2 */ ++#define Y2_CFG_SPC 0x1c00 + /* + * Bank 0x22 - 0x3f + */ +@@ -800,13 +1063,26 @@ + */ + /* B0_RAP 8 bit Register Address Port */ + /* Bit 7: reserved */ +-#define RAP_RAP 0x3f /* Bit 6..0: 0 = block 0,..,6f = block 6f */ ++#define RAP_MSK 0x7f /* Bit 6..0: 0 = block 0,..,6f = block 6f */ ++ ++/* B0_CTST 24 bit Control/Status register */ ++ /* Bit 23..18: reserved */ ++#define Y2_VMAIN_AVAIL BIT_17 /* VMAIN available (YUKON-2 only) */ ++#define Y2_VAUX_AVAIL BIT_16 /* VAUX available (YUKON-2 only) */ ++ /* Bit 15..14: reserved */ ++#define Y2_ASF_ENABLE BIT_13S /* ASF Unit Enable (YUKON-2 only) */ ++#define Y2_ASF_DISABLE BIT_12S /* ASF Unit Disable (YUKON-2 only) */ ++#define Y2_CLK_RUN_ENA BIT_11S /* CLK_RUN Enable (YUKON-2 only) */ ++#define Y2_CLK_RUN_DIS BIT_10S /* CLK_RUN Disable (YUKON-2 only) */ ++#define Y2_LED_STAT_ON BIT_9S /* Status LED On (YUKON-2 only) */ ++#define Y2_LED_STAT_OFF BIT_8S /* Status LED Off (YUKON-2 only) */ ++ /* Bit 7.. 0: same as below */ + + /* B0_CTST 16 bit Control/Status register */ + /* Bit 15..14: reserved */ +-#define CS_CLK_RUN_HOT BIT_13S /* CLK_RUN hot m. (YUKON-Lite only) */ +-#define CS_CLK_RUN_RST BIT_12S /* CLK_RUN reset (YUKON-Lite only) */ +-#define CS_CLK_RUN_ENA BIT_11S /* CLK_RUN enable (YUKON-Lite only) */ ++#define CS_CLK_RUN_HOT BIT_13S /* CLK_RUN Hot m. (YUKON-Lite only) */ ++#define CS_CLK_RUN_RST BIT_12S /* CLK_RUN Reset (YUKON-Lite only) */ ++#define CS_CLK_RUN_ENA BIT_11S /* CLK_RUN Enable (YUKON-Lite only) */ + #define CS_VAUX_AVAIL BIT_10S /* VAUX available (YUKON only) */ + #define CS_BUS_CLOCK BIT_9S /* Bus Clock 0/1 = 33/66 MHz */ + #define CS_BUS_SLOT_SZ BIT_8S /* Slot Size 0/1 = 32/64 bit slot */ +@@ -814,26 +1090,27 @@ + #define CS_CL_SW_IRQ BIT_6S /* Clear IRQ SW Request */ + #define CS_STOP_DONE BIT_5S /* Stop Master is finished */ + #define CS_STOP_MAST BIT_4S /* Command Bit to stop the master */ +-#define CS_MRST_CLR BIT_3S /* Clear Master reset */ +-#define CS_MRST_SET BIT_2S /* Set Master reset */ +-#define CS_RST_CLR BIT_1S /* Clear Software reset */ +-#define CS_RST_SET BIT_0S /* Set Software reset */ ++#define CS_MRST_CLR BIT_3S /* Clear Master Reset */ ++#define CS_MRST_SET BIT_2S /* Set Master Reset */ ++#define CS_RST_CLR BIT_1S /* Clear Software Reset */ ++#define CS_RST_SET BIT_0S /* Set Software Reset */ + +-/* B0_LED 8 Bit LED register */ ++/* B0_LED 8 Bit LED register (GENESIS only)*/ + /* Bit 7.. 2: reserved */ +-#define LED_STAT_ON BIT_1S /* Status LED on */ +-#define LED_STAT_OFF BIT_0S /* Status LED off */ ++#define LED_STAT_ON BIT_1S /* Status LED On */ ++#define LED_STAT_OFF BIT_0S /* Status LED Off */ + + /* B0_POWER_CTRL 8 Bit Power Control reg (YUKON only) */ + #define PC_VAUX_ENA BIT_7 /* Switch VAUX Enable */ +-#define PC_VAUX_DIS BIT_6 /* Switch VAUX Disable */ +-#define PC_VCC_ENA BIT_5 /* Switch VCC Enable */ +-#define PC_VCC_DIS BIT_4 /* Switch VCC Disable */ +-#define PC_VAUX_ON BIT_3 /* Switch VAUX On */ +-#define PC_VAUX_OFF BIT_2 /* Switch VAUX Off */ +-#define PC_VCC_ON BIT_1 /* Switch VCC On */ +-#define PC_VCC_OFF BIT_0 /* Switch VCC Off */ ++#define PC_VAUX_DIS BIT_6 /* Switch VAUX Disable */ ++#define PC_VCC_ENA BIT_5 /* Switch VCC Enable */ ++#define PC_VCC_DIS BIT_4 /* Switch VCC Disable */ ++#define PC_VAUX_ON BIT_3 /* Switch VAUX On */ ++#define PC_VAUX_OFF BIT_2 /* Switch VAUX Off */ ++#define PC_VCC_ON BIT_1 /* Switch VCC On */ ++#define PC_VCC_OFF BIT_0 /* Switch VCC Off */ + ++/* Yukon and Genesis */ + /* B0_ISRC 32 bit Interrupt Source Register */ + /* B0_IMSK 32 bit Interrupt Mask Register */ + /* B0_SP_ISRC 32 bit Special Interrupt Source Reg */ +@@ -879,12 +1156,51 @@ + #define IS_XA2_F BIT_1 /* Q_XA2 End of Frame */ + #define IS_XA2_C BIT_0 /* Q_XA2 Encoding Error */ + ++/* (Yukon-2) */ ++/* B0_ISRC 32 bit Interrupt Source Register */ ++/* B0_IMSK 32 bit Interrupt Mask Register */ ++/* B0_SP_ISRC 32 bit Special Interrupt Source Reg */ ++/* B2_IRQM_MSK 32 bit IRQ Moderation Mask */ ++/* B0_Y2_SP_ISRC2 32 bit Special Interrupt Source Reg 2 */ ++/* B0_Y2_SP_ISRC3 32 bit Special Interrupt Source Reg 3 */ ++/* B0_Y2_SP_EISR 32 bit Enter ISR Reg */ ++/* B0_Y2_SP_LISR 32 bit Leave ISR Reg */ ++#define Y2_IS_PORT_MASK(Port, Mask) ((Mask) << (Port*8)) ++#define Y2_IS_HW_ERR BIT_31 /* Interrupt HW Error */ ++#define Y2_IS_STAT_BMU BIT_30 /* Status BMU Interrupt */ ++#define Y2_IS_ASF BIT_29 /* ASF subsystem Interrupt */ ++ /* Bit 28: reserved */ ++#define Y2_IS_POLL_CHK BIT_27 /* Check IRQ from polling unit */ ++#define Y2_IS_TWSI_RDY BIT_26 /* IRQ on end of TWSI Tx */ ++#define Y2_IS_IRQ_SW BIT_25 /* SW forced IRQ */ ++#define Y2_IS_TIMINT BIT_24 /* IRQ from Timer */ ++ /* Bit 23..16 reserved */ ++ /* Link 2 Interrupts */ ++#define Y2_IS_IRQ_PHY2 BIT_12 /* Interrupt from PHY 2 */ ++#define Y2_IS_IRQ_MAC2 BIT_11 /* Interrupt from MAC 2 */ ++#define Y2_IS_CHK_RX2 BIT_10 /* Descriptor error Rx 2 */ ++#define Y2_IS_CHK_TXS2 BIT_9 /* Descriptor error TXS 2 */ ++#define Y2_IS_CHK_TXA2 BIT_8 /* Descriptor error TXA 2 */ ++ /* Bit 7.. 5 reserved */ ++ /* Link 1 interrupts */ ++#define Y2_IS_IRQ_PHY1 BIT_4 /* Interrupt from PHY 1 */ ++#define Y2_IS_IRQ_MAC1 BIT_3 /* Interrupt from MAC 1 */ ++#define Y2_IS_CHK_RX1 BIT_2 /* Descriptor error Rx 1 */ ++#define Y2_IS_CHK_TXS1 BIT_1 /* Descriptor error TXS 1 */ ++#define Y2_IS_CHK_TXA1 BIT_0 /* Descriptor error TXA 1 */ ++ ++#define Y2_IS_L1_MASK 0x0000001fUL /* IRQ Mask for port 1 */ + ++#define Y2_IS_L2_MASK 0x00001f00UL /* IRQ Mask for port 2 */ ++ ++#define Y2_IS_ALL_MSK 0xef001f1fUL /* All Interrupt bits */ ++ ++/* Yukon and Genesis */ + /* B0_HWE_ISRC 32 bit HW Error Interrupt Src Reg */ + /* B0_HWE_IMSK 32 bit HW Error Interrupt Mask Reg */ + /* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */ + #define IS_ERR_MSK 0x00000fffL /* All Error bits */ +- /* Bit 31..14: reserved */ ++ /* Bit 31..14: reserved */ + #define IS_IRQ_TIST_OV BIT_13 /* Time Stamp Timer Overflow (YUKON only) */ + #define IS_IRQ_SENSOR BIT_12 /* IRQ from Sensor (YUKON only) */ + #define IS_IRQ_MST_ERR BIT_11 /* IRQ master error detected */ +@@ -900,6 +1216,43 @@ + #define IS_R1_PAR_ERR BIT_1 /* Queue R1 Parity Error */ + #define IS_R2_PAR_ERR BIT_0 /* Queue R2 Parity Error */ + ++ /* Yukon-2 */ ++/* B0_HWE_ISRC 32 bit HW Error Interrupt Src Reg */ ++/* B0_HWE_IMSK 32 bit HW Error Interrupt Mask Reg */ ++/* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */ ++ /* Bit: 31..30 reserved */ ++#define Y2_IS_TIST_OV BIT_29 /* Time Stamp Timer overflow interrupt */ ++#define Y2_IS_SENSOR BIT_28 /* Sensor interrupt */ ++#define Y2_IS_MST_ERR BIT_27 /* Master error interrupt */ ++#define Y2_IS_IRQ_STAT BIT_26 /* Status exception interrupt */ ++#define Y2_IS_PCI_EXP BIT_25 /* PCI-Express interrupt */ ++#define Y2_IS_PCI_NEXP BIT_24 /* PCI-Express error similar to PCI error */ ++ /* Bit: 23..14 reserved */ ++ /* Link 2 */ ++#define Y2_IS_PAR_RD2 BIT_13 /* Read RAM parity error interrupt */ ++#define Y2_IS_PAR_WR2 BIT_12 /* Write RAM parity error interrupt */ ++#define Y2_IS_PAR_MAC2 BIT_11 /* MAC hardware fault interrupt */ ++#define Y2_IS_PAR_RX2 BIT_10 /* Parity Error Rx Queue 2 */ ++#define Y2_IS_TCP_TXS2 BIT_9 /* TCP length mismatch sync Tx queue IRQ */ ++#define Y2_IS_TCP_TXA2 BIT_8 /* TCP length mismatch async Tx queue IRQ */ ++ /* Bit: 9.. 6 reserved */ ++ /* Link 1 */ ++#define Y2_IS_PAR_RD1 BIT_5 /* Read RAM parity error interrupt */ ++#define Y2_IS_PAR_WR1 BIT_4 /* Write RAM parity error interrupt */ ++#define Y2_IS_PAR_MAC1 BIT_3 /* MAC hardware fault interrupt */ ++#define Y2_IS_PAR_RX1 BIT_2 /* Parity Error Rx Queue 1 */ ++#define Y2_IS_TCP_TXS1 BIT_1 /* TCP length mismatch sync Tx queue IRQ */ ++#define Y2_IS_TCP_TXA1 BIT_0 /* TCP length mismatch async Tx queue IRQ */ ++ ++#define Y2_HWE_L1_MASK (Y2_IS_PAR_RD1 | Y2_IS_PAR_WR1 | Y2_IS_PAR_MAC1 |\ ++ Y2_IS_PAR_RX1 | Y2_IS_TCP_TXS1| Y2_IS_TCP_TXA1) ++#define Y2_HWE_L2_MASK (Y2_IS_PAR_RD2 | Y2_IS_PAR_WR2 | Y2_IS_PAR_MAC2 |\ ++ Y2_IS_PAR_RX2 | Y2_IS_TCP_TXS2| Y2_IS_TCP_TXA2) ++ ++#define Y2_HWE_ALL_MSK (Y2_IS_TIST_OV | /* Y2_IS_SENSOR | */ Y2_IS_MST_ERR |\ ++ Y2_IS_IRQ_STAT | Y2_IS_PCI_EXP | Y2_IS_PCI_NEXP |\ ++ Y2_HWE_L1_MASK | Y2_HWE_L2_MASK) ++ + /* B2_CONN_TYP 8 bit Connector type */ + /* B2_PMD_TYP 8 bit PMD type */ + /* Values of connector and PMD type comply to SysKonnect internal std */ +@@ -908,19 +1261,65 @@ + #define CFG_CHIP_R_MSK (0xf<<4) /* Bit 7.. 4: Chip Revision */ + /* Bit 3.. 2: reserved */ + #define CFG_DIS_M2_CLK BIT_1S /* Disable Clock for 2nd MAC */ +-#define CFG_SNG_MAC BIT_0S /* MAC Config: 0=2 MACs / 1=1 MAC*/ ++#define CFG_SNG_MAC BIT_0S /* MAC Config: 0 = 2 MACs; 1 = 1 MAC */ + +-/* B2_CHIP_ID 8 bit Chip Identification Number */ ++/* B2_CHIP_ID 8 bit Chip Identification Number */ + #define CHIP_ID_GENESIS 0x0a /* Chip ID for GENESIS */ + #define CHIP_ID_YUKON 0xb0 /* Chip ID for YUKON */ + #define CHIP_ID_YUKON_LITE 0xb1 /* Chip ID for YUKON-Lite (Rev. A1-A3) */ + #define CHIP_ID_YUKON_LP 0xb2 /* Chip ID for YUKON-LP */ ++#define CHIP_ID_YUKON_XL 0xb3 /* Chip ID for YUKON-2 XL */ ++#define CHIP_ID_YUKON_EC 0xb6 /* Chip ID for YUKON-2 EC */ ++#define CHIP_ID_YUKON_FE 0xb7 /* Chip ID for YUKON-2 FE */ + + #define CHIP_REV_YU_LITE_A1 3 /* Chip Rev. for YUKON-Lite A1,A2 */ + #define CHIP_REV_YU_LITE_A3 7 /* Chip Rev. for YUKON-Lite A3 */ + ++#define CHIP_REV_YU_EC_A1 0 /* Chip Rev. for Yukon-EC A1/A0 */ ++#define CHIP_REV_YU_EC_A2 1 /* Chip Rev. for Yukon-EC A2 */ ++#define CHIP_REV_YU_EC_A3 2 /* Chip Rev. for Yukon-EC A3 */ ++ ++/* B2_Y2_CLK_GATE 8 bit Clock Gating (Yukon-2 only) */ ++#define Y2_STATUS_LNK2_INAC BIT_7S /* Status Link 2 inactiv (0 = activ) */ ++#define Y2_CLK_GAT_LNK2_DIS BIT_6S /* Disable clock gating Link 2 */ ++#define Y2_COR_CLK_LNK2_DIS BIT_5S /* Disable Core clock Link 2 */ ++#define Y2_PCI_CLK_LNK2_DIS BIT_4S /* Disable PCI clock Link 2 */ ++#define Y2_STATUS_LNK1_INAC BIT_3S /* Status Link 1 inactiv (0 = activ) */ ++#define Y2_CLK_GAT_LNK1_DIS BIT_2S /* Disable clock gating Link 1 */ ++#define Y2_COR_CLK_LNK1_DIS BIT_1S /* Disable Core clock Link 1 */ ++#define Y2_PCI_CLK_LNK1_DIS BIT_0S /* Disable PCI clock Link 1 */ ++ ++/* B2_Y2_HW_RES 8 bit HW Resources (Yukon-2 only) */ ++ /* Bit 7.. 5: reserved */ ++#define CFG_LED_MODE_MSK (7<<2) /* Bit 4.. 2: LED Mode Mask */ ++#define CFG_LINK_2_AVAIL BIT_1S /* Link 2 available */ ++#define CFG_LINK_1_AVAIL BIT_0S /* Link 1 available */ ++ ++#define CFG_LED_MODE(x) (((x) & CFG_LED_MODE_MSK) >> 2) ++#define CFG_DUAL_MAC_MSK (CFG_LINK_2_AVAIL | CFG_LINK_1_AVAIL) ++ ++#define CFG_LED_SING_ACT_LNK 0 /* Single LED ACT/LNK mode */ ++#define CFG_LED_DUAL_ACT_LNK 1 /* Dual LED ACT/LNK mode */ ++ ++/* B2_E_3 8 bit lower 4 bits used for HW self test result */ ++#define B2_E3_RES_MASK 0x0f ++ + /* B2_FAR 32 bit Flash-Prom Addr Reg/Cnt */ +-#define FAR_ADDR 0x1ffffL /* Bit 16.. 0: FPROM Address mask */ ++#define FAR_ADDR 0x1ffffL /* Bit 16.. 0: FPROM Address Mask */ ++ ++/* B2_Y2_CLK_CTRL 32 bit Core Clock Frequency Control Register (Yukon-2/EC) */ ++ /* Bit 31..24: reserved */ ++/* Yukon-EC/FE */ ++#define Y2_CLK_DIV_VAL_MSK (0xffL<<16) /* Bit 23..16: Clock Divisor Value */ ++#define Y2_CLK_DIV_VAL(x) (SHIFT16(x) & Y2_CLK_DIV_VAL_MSK) ++/* Yukon-2 */ ++#define Y2_CLK_DIV_VAL2_MSK (7L<<21) /* Bit 23..21: Clock Divisor Value */ ++#define Y2_CLK_SELECT2_MSK (0x1fL<<16) /* Bit 20..16: Clock Select */ ++#define Y2_CLK_DIV_VAL_2(x) (SHIFT21(x) & Y2_CLK_DIV_VAL2_MSK) ++#define Y2_CLK_SEL_VAL_2(x) (SHIFT16(x) & Y2_CLK_SELECT2_MSK) ++ /* Bit 15.. 2: reserved */ ++#define Y2_CLK_DIV_ENA BIT_1S /* Enable Core Clock Division */ ++#define Y2_CLK_DIV_DIS BIT_0S /* Disable Core Clock Division */ + + /* B2_LD_CTRL 8 bit EPROM loader control register */ + /* Bits are currently reserved */ +@@ -960,9 +1359,6 @@ + #define DPT_START BIT_1S /* Start Descriptor Poll Timer */ + #define DPT_STOP BIT_0S /* Stop Descriptor Poll Timer */ + +-/* B2_E_3 8 bit lower 4 bits used for HW self test result */ +-#define B2_E3_RES_MASK 0x0f +- + /* B2_TST_CTRL1 8 bit Test Control Register 1 */ + #define TST_FRC_DPERR_MR BIT_7S /* force DATAPERR on MST RD */ + #define TST_FRC_DPERR_MW BIT_6S /* force DATAPERR on MST WR */ +@@ -982,7 +1378,7 @@ + #define TST_FRC_APERR_2M64 BIT_0S /* AddrPERR on 2. phase */ + + /* B2_GP_IO 32 bit General Purpose I/O Register */ +- /* Bit 31..26: reserved */ ++ /* Bit 31..26: reserved */ + #define GP_DIR_9 BIT_25 /* IO_9 direct, 0=In/1=Out */ + #define GP_DIR_8 BIT_24 /* IO_8 direct, 0=In/1=Out */ + #define GP_DIR_7 BIT_23 /* IO_7 direct, 0=In/1=Out */ +@@ -1032,10 +1428,8 @@ + #define I2C_DATA BIT_1S /* I2C Data Port */ + #define I2C_CLK BIT_0S /* I2C Clock Port */ + +-/* +- * I2C Address +- */ +-#define I2C_SENS_ADDR LM80_ADDR /* I2C Sensor Address, (Volt and Temp)*/ ++/* I2C Address */ ++#define I2C_SENS_ADDR LM80_ADDR /* I2C Sensor Address (Volt and Temp) */ + + + /* B2_BSC_CTRL 8 bit Blink Source Counter Control */ +@@ -1052,16 +1446,20 @@ + #define BSC_T_OFF BIT_1S /* Test mode off */ + #define BSC_T_STEP BIT_0S /* Test step */ + ++/* Y2_PEX_PHY_ADDR/DATA PEX PHY address and data reg (Yukon-2 only) */ ++#define PEX_RD_ACCESS BIT_31 /* Access Mode Read = 1, Write = 0 */ ++#define PEX_DB_ACCESS BIT_30 /* Access to debug register */ ++ + + /* B3_RAM_ADDR 32 bit RAM Address, to read or write */ + /* Bit 31..19: reserved */ + #define RAM_ADR_RAN 0x0007ffffL /* Bit 18.. 0: RAM Address Range */ + + /* RAM Interface Registers */ +-/* B3_RI_CTRL 16 bit RAM Iface Control Register */ ++/* B3_RI_CTRL 16 bit RAM Interface Control Register */ + /* Bit 15..10: reserved */ +-#define RI_CLR_RD_PERR BIT_9S /* Clear IRQ RAM Read Parity Err */ +-#define RI_CLR_WR_PERR BIT_8S /* Clear IRQ RAM Write Parity Err*/ ++#define RI_CLR_RD_PERR BIT_9S /* Clear IRQ RAM Read Parity Err */ ++#define RI_CLR_WR_PERR BIT_8S /* Clear IRQ RAM Write Parity Err */ + /* Bit 7.. 2: reserved */ + #define RI_RST_CLR BIT_1S /* Clear RAM Interface Reset */ + #define RI_RST_SET BIT_0S /* Set RAM Interface Reset */ +@@ -1171,7 +1569,7 @@ + /* Bit 31..16: reserved */ + #define BC_MAX 0xffff /* Bit 15.. 0: Byte counter */ + +-/* BMU Control Status Registers */ ++/* BMU Control / Status Registers (Yukon and Genesis) */ + /* B0_R1_CSR 32 bit BMU Ctrl/Stat Rx Queue 1 */ + /* B0_R2_CSR 32 bit BMU Ctrl/Stat Rx Queue 2 */ + /* B0_XA1_CSR 32 bit BMU Ctrl/Stat Sync Tx Queue 1 */ +@@ -1212,6 +1610,41 @@ + CSR_SV_RUN | CSR_DREAD_RUN | CSR_DWRITE_RUN |\ + CSR_TRANS_RUN) + ++/* Rx BMU Control / Status Registers (Yukon-2) */ ++#define BMU_IDLE BIT_31 /* BMU Idle State */ ++#define BMU_RX_TCP_PKT BIT_30 /* Rx TCP Packet (when RSS Hash enabled) */ ++#define BMU_RX_IP_PKT BIT_29 /* Rx IP Packet (when RSS Hash enabled) */ ++ /* Bit 28..16: reserved */ ++#define BMU_ENA_RX_RSS_HASH BIT_15 /* Enable Rx RSS Hash */ ++#define BMU_DIS_RX_RSS_HASH BIT_14 /* Disable Rx RSS Hash */ ++#define BMU_ENA_RX_CHKSUM BIT_13 /* Enable Rx TCP/IP Checksum Check */ ++#define BMU_DIS_RX_CHKSUM BIT_12 /* Disable Rx TCP/IP Checksum Check */ ++#define BMU_CLR_IRQ_PAR BIT_11 /* Clear IRQ on Parity errors (Rx) */ ++#define BMU_CLR_IRQ_TCP BIT_11 /* Clear IRQ on TCP segmen. error (Tx) */ ++#define BMU_CLR_IRQ_CHK BIT_10 /* Clear IRQ Check */ ++#define BMU_STOP BIT_9 /* Stop Rx/Tx Queue */ ++#define BMU_START BIT_8 /* Start Rx/Tx Queue */ ++#define BMU_FIFO_OP_ON BIT_7 /* FIFO Operational On */ ++#define BMU_FIFO_OP_OFF BIT_6 /* FIFO Operational Off */ ++#define BMU_FIFO_ENA BIT_5 /* Enable FIFO */ ++#define BMU_FIFO_RST BIT_4 /* Reset FIFO */ ++#define BMU_OP_ON BIT_3 /* BMU Operational On */ ++#define BMU_OP_OFF BIT_2 /* BMU Operational Off */ ++#define BMU_RST_CLR BIT_1 /* Clear BMU Reset (Enable) */ ++#define BMU_RST_SET BIT_0 /* Set BMU Reset */ ++ ++#define BMU_CLR_RESET (BMU_FIFO_RST | BMU_OP_OFF | BMU_RST_CLR) ++#define BMU_OPER_INIT (BMU_CLR_IRQ_PAR | BMU_CLR_IRQ_CHK | BMU_START | \ ++ BMU_FIFO_ENA | BMU_OP_ON) ++ ++/* Tx BMU Control / Status Registers (Yukon-2) */ ++ /* Bit 31: same as for Rx */ ++ /* Bit 30..14: reserved */ ++#define BMU_TX_IPIDINCR_ON BIT_13 /* Enable IP ID Increment */ ++#define BMU_TX_IPIDINCR_OFF BIT_12 /* Disable IP ID Increment */ ++#define BMU_TX_CLR_IRQ_TCP BIT_11 /* Clear IRQ on TCP segm. length mism. */ ++ /* Bit 10..0: same as for Rx */ ++ + /* Q_F 32 bit Flag Register */ + /* Bit 31..28: reserved */ + #define F_ALM_FULL BIT_27 /* Rx FIFO: almost full */ +@@ -1260,6 +1693,13 @@ + /* Bit 3: reserved */ + #define T3_VRAM_MSK 7 /* Bit 2.. 0: Virtual RAM Buffer Address */ + ++/* Queue Prefetch Unit Offsets, use Y2_PREF_Q_ADDR() to address (Yukon-2 only)*/ ++/* PREF_UNIT_CTRL_REG 32 bit Prefetch Control register */ ++#define PREF_UNIT_OP_ON BIT_3 /* prefetch unit operational */ ++#define PREF_UNIT_OP_OFF BIT_2 /* prefetch unit not operational */ ++#define PREF_UNIT_RST_CLR BIT_1 /* Clear Prefetch Unit Reset */ ++#define PREF_UNIT_RST_SET BIT_0 /* Set Prefetch Unit Reset */ ++ + /* RAM Buffer Register Offsets, use RB_ADDR(Queue, Offs) to access */ + /* RB_START 32 bit RAM Buffer Start Address */ + /* RB_END 32 bit RAM Buffer End Address */ +@@ -1275,24 +1715,24 @@ + #define RB_MSK 0x0007ffff /* Bit 18.. 0: RAM Buffer Pointer Bits */ + + /* RB_TST2 8 bit RAM Buffer Test Register 2 */ +- /* Bit 7.. 4: reserved */ +-#define RB_PC_DEC BIT_3S /* Packet Counter Decrem */ ++ /* Bit 7.. 4: reserved */ ++#define RB_PC_DEC BIT_3S /* Packet Counter Decrement */ + #define RB_PC_T_ON BIT_2S /* Packet Counter Test On */ +-#define RB_PC_T_OFF BIT_1S /* Packet Counter Tst Off */ +-#define RB_PC_INC BIT_0S /* Packet Counter Increm */ ++#define RB_PC_T_OFF BIT_1S /* Packet Counter Test Off */ ++#define RB_PC_INC BIT_0S /* Packet Counter Increment */ + + /* RB_TST1 8 bit RAM Buffer Test Register 1 */ + /* Bit 7: reserved */ + #define RB_WP_T_ON BIT_6S /* Write Pointer Test On */ + #define RB_WP_T_OFF BIT_5S /* Write Pointer Test Off */ +-#define RB_WP_INC BIT_4S /* Write Pointer Increm */ ++#define RB_WP_INC BIT_4S /* Write Pointer Increment */ + /* Bit 3: reserved */ + #define RB_RP_T_ON BIT_2S /* Read Pointer Test On */ + #define RB_RP_T_OFF BIT_1S /* Read Pointer Test Off */ +-#define RB_RP_DEC BIT_0S /* Read Pointer Decrement */ ++#define RB_RP_INC BIT_0S /* Read Pointer Increment */ + + /* RB_CTRL 8 bit RAM Buffer Control Register */ +- /* Bit 7.. 6: reserved */ ++ /* Bit 7.. 6: reserved */ + #define RB_ENA_STFWD BIT_5S /* Enable Store & Forward */ + #define RB_DIS_STFWD BIT_4S /* Disable Store & Forward */ + #define RB_ENA_OP_MD BIT_3S /* Enable Operation Mode */ +@@ -1300,16 +1740,31 @@ + #define RB_RST_CLR BIT_1S /* Clear RAM Buf STM Reset */ + #define RB_RST_SET BIT_0S /* Set RAM Buf STM Reset */ + ++/* Yukon-2 */ ++ /* Bit 31..20: reserved */ ++#define RB_CNT_DOWN BIT_19 /* Packet Counter Decrement */ ++#define RB_CNT_TST_ON BIT_18 /* Packet Counter Test On */ ++#define RB_CNT_TST_OFF BIT_17 /* Packet Counter Test Off */ ++#define RB_CNT_UP BIT_16 /* Packet Counter Increment */ ++ /* Bit 15: reserved */ ++#define RB_WP_TST_ON BIT_14 /* Write Pointer Test On */ ++#define RB_WP_TST_OFF BIT_13 /* Write Pointer Test Off */ ++#define RB_WP_UP BIT_12 /* Write Pointer Increment */ ++ /* Bit 11: reserved */ ++#define RB_RP_TST_ON BIT_10 /* Read Pointer Test On */ ++#define RB_RP_TST_OFF BIT_9 /* Read Pointer Test Off */ ++#define RB_RP_UP BIT_8 /* Read Pointer Increment */ ++ + + /* Receive and Transmit MAC FIFO Registers (GENESIS only) */ + + /* RX_MFF_EA 32 bit Receive MAC FIFO End Address */ +-/* RX_MFF_WP 32 bit Receive MAC FIFO Write Pointer */ ++/* RX_MFF_WP 32 bit Receive MAC FIFO Write Pointer */ + /* RX_MFF_RP 32 bit Receive MAC FIFO Read Pointer */ + /* RX_MFF_PC 32 bit Receive MAC FIFO Packet Counter */ + /* RX_MFF_LEV 32 bit Receive MAC FIFO Level */ + /* TX_MFF_EA 32 bit Transmit MAC FIFO End Address */ +-/* TX_MFF_WP 32 bit Transmit MAC FIFO Write Pointer */ ++/* TX_MFF_WP 32 bit Transmit MAC FIFO Write Pointer */ + /* TX_MFF_WSP 32 bit Transmit MAC FIFO WR Shadow Pointer */ + /* TX_MFF_RP 32 bit Transmit MAC FIFO Read Pointer */ + /* TX_MFF_PC 32 bit Transmit MAC FIFO Packet Cnt */ +@@ -1359,9 +1814,9 @@ + /* RX_MFF_TST2 8 bit Receive MAC FIFO Test Register 2 */ + /* TX_MFF_TST2 8 bit Transmit MAC FIFO Test Register 2 */ + /* Bit 7: reserved */ +-#define MFF_WSP_T_ON BIT_6S /* Tx: Write Shadow Ptr TestOn */ +-#define MFF_WSP_T_OFF BIT_5S /* Tx: Write Shadow Ptr TstOff */ +-#define MFF_WSP_INC BIT_4S /* Tx: Write Shadow Ptr Increment */ ++#define MFF_WSP_T_ON BIT_6S /* Tx: Write Shadow Pointer Test On */ ++#define MFF_WSP_T_OFF BIT_5S /* Tx: Write Shadow Pointer Test Off */ ++#define MFF_WSP_INC BIT_4S /* Tx: Write Shadow Pointer Increment */ + #define MFF_PC_DEC BIT_3S /* Packet Counter Decrement */ + #define MFF_PC_T_ON BIT_2S /* Packet Counter Test On */ + #define MFF_PC_T_OFF BIT_1S /* Packet Counter Test Off */ +@@ -1372,7 +1827,7 @@ + /* Bit 7: reserved */ + #define MFF_WP_T_ON BIT_6S /* Write Pointer Test On */ + #define MFF_WP_T_OFF BIT_5S /* Write Pointer Test Off */ +-#define MFF_WP_INC BIT_4S /* Write Pointer Increm */ ++#define MFF_WP_INC BIT_4S /* Write Pointer Increment */ + /* Bit 3: reserved */ + #define MFF_RP_T_ON BIT_2S /* Read Pointer Test On */ + #define MFF_RP_T_OFF BIT_1S /* Read Pointer Test Off */ +@@ -1391,12 +1846,16 @@ + + /* RX_LED_CTRL 8 bit Receive LED Cnt Control Reg */ + /* TX_LED_CTRL 8 bit Transmit LED Cnt Control Reg */ ++ /* Bit 7.. 3: reserved */ ++#define LED_START BIT_2S /* Start Counter */ ++#define LED_STOP BIT_1S /* Stop Counter */ ++#define LED_STATE BIT_0S /* Rx/Tx: LED State, 1=LED On */ ++ + /* LNK_SYNC_CTRL 8 bit Link Sync Cnt Control Register */ + /* Bit 7.. 3: reserved */ +-#define LED_START BIT_2S /* Start Timer */ +-#define LED_STOP BIT_1S /* Stop Timer */ +-#define LED_STATE BIT_0S /* Rx/Tx: LED State, 1=LED on */ +-#define LED_CLR_IRQ BIT_0S /* Lnk: Clear Link IRQ */ ++#define LNK_START BIT_2S /* Start Counter */ ++#define LNK_STOP BIT_1S /* Stop Counter */ ++#define LNK_CLR_IRQ BIT_0S /* Clear Link IRQ */ + + /* RX_LED_TST 8 bit Receive LED Cnt Test Register */ + /* TX_LED_TST 8 bit Transmit LED Cnt Test Register */ +@@ -1407,86 +1866,138 @@ + #define LED_T_STEP BIT_0S /* LED Counter Step */ + + /* LNK_LED_REG 8 bit Link LED Register */ +- /* Bit 7.. 6: reserved */ ++ /* Bit 7.. 6: reserved */ + #define LED_BLK_ON BIT_5S /* Link LED Blinking On */ + #define LED_BLK_OFF BIT_4S /* Link LED Blinking Off */ + #define LED_SYNC_ON BIT_3S /* Use Sync Wire to switch LED */ + #define LED_SYNC_OFF BIT_2S /* Disable Sync Wire Input */ +-#define LED_ON BIT_1S /* switch LED on */ +-#define LED_OFF BIT_0S /* switch LED off */ ++#define LED_ON BIT_1S /* Switch LED On */ ++#define LED_OFF BIT_0S /* Switch LED Off */ + + /* Receive and Transmit GMAC FIFO Registers (YUKON only) */ + + /* RX_GMF_EA 32 bit Rx GMAC FIFO End Address */ + /* RX_GMF_AF_THR 32 bit Rx GMAC FIFO Almost Full Thresh. */ +-/* RX_GMF_WP 32 bit Rx GMAC FIFO Write Pointer */ +-/* RX_GMF_WLEV 32 bit Rx GMAC FIFO Write Level */ +-/* RX_GMF_RP 32 bit Rx GMAC FIFO Read Pointer */ +-/* RX_GMF_RLEV 32 bit Rx GMAC FIFO Read Level */ ++/* RX_GMF_WP 32 bit Rx GMAC FIFO Write Pointer */ ++/* RX_GMF_WLEV 32 bit Rx GMAC FIFO Write Level */ ++/* RX_GMF_RP 32 bit Rx GMAC FIFO Read Pointer */ ++/* RX_GMF_RLEV 32 bit Rx GMAC FIFO Read Level */ + /* TX_GMF_EA 32 bit Tx GMAC FIFO End Address */ + /* TX_GMF_AE_THR 32 bit Tx GMAC FIFO Almost Empty Thresh.*/ +-/* TX_GMF_WP 32 bit Tx GMAC FIFO Write Pointer */ +-/* TX_GMF_WSP 32 bit Tx GMAC FIFO Write Shadow Ptr. */ +-/* TX_GMF_WLEV 32 bit Tx GMAC FIFO Write Level */ +-/* TX_GMF_RP 32 bit Tx GMAC FIFO Read Pointer */ +-/* TX_GMF_RSTP 32 bit Tx GMAC FIFO Restart Pointer */ +-/* TX_GMF_RLEV 32 bit Tx GMAC FIFO Read Level */ ++/* TX_GMF_WP 32 bit Tx GMAC FIFO Write Pointer */ ++/* TX_GMF_WSP 32 bit Tx GMAC FIFO Write Shadow Pointer */ ++/* TX_GMF_WLEV 32 bit Tx GMAC FIFO Write Level */ ++/* TX_GMF_RP 32 bit Tx GMAC FIFO Read Pointer */ ++/* TX_GMF_RSTP 32 bit Tx GMAC FIFO Restart Pointer */ ++/* TX_GMF_RLEV 32 bit Tx GMAC FIFO Read Level */ + + /* RX_GMF_CTRL_T 32 bit Rx GMAC FIFO Control/Test */ +- /* Bits 31..15: reserved */ +-#define GMF_WP_TST_ON BIT_14 /* Write Pointer Test On */ +-#define GMF_WP_TST_OFF BIT_13 /* Write Pointer Test Off */ +-#define GMF_WP_STEP BIT_12 /* Write Pointer Step/Increment */ ++ /* Bit 31..28 reserved */ ++#define RX_TRUNC_ON BIT_27 /* enable packet truncation */ ++#define RX_TRUNC_OFF BIT_26 /* disable packet truncation */ ++#define RX_VLAN_STRIP_ON BIT_25 /* enable VLAN stripping */ ++#define RX_VLAN_STRIP_OFF BIT_24 /* disable VLAN stripping */ ++ /* Bit 23..15 reserved */ ++#define GMF_WP_TST_ON BIT_14 /* Write Pointer Test On */ ++#define GMF_WP_TST_OFF BIT_13 /* Write Pointer Test Off */ ++#define GMF_WP_STEP BIT_12 /* Write Pointer Step/Increment */ + /* Bit 11: reserved */ +-#define GMF_RP_TST_ON BIT_10 /* Read Pointer Test On */ +-#define GMF_RP_TST_OFF BIT_9 /* Read Pointer Test Off */ +-#define GMF_RP_STEP BIT_8 /* Read Pointer Step/Increment */ +-#define GMF_RX_F_FL_ON BIT_7 /* Rx FIFO Flush Mode On */ +-#define GMF_RX_F_FL_OFF BIT_6 /* Rx FIFO Flush Mode Off */ +-#define GMF_CLI_RX_FO BIT_5 /* Clear IRQ Rx FIFO Overrun */ +-#define GMF_CLI_RX_FC BIT_4 /* Clear IRQ Rx Frame Complete */ +-#define GMF_OPER_ON BIT_3 /* Operational Mode On */ +-#define GMF_OPER_OFF BIT_2 /* Operational Mode Off */ +-#define GMF_RST_CLR BIT_1 /* Clear GMAC FIFO Reset */ +-#define GMF_RST_SET BIT_0 /* Set GMAC FIFO Reset */ +- +-/* TX_GMF_CTRL_T 32 bit Tx GMAC FIFO Control/Test */ +- /* Bits 31..19: reserved */ +-#define GMF_WSP_TST_ON BIT_18 /* Write Shadow Pointer Test On */ +-#define GMF_WSP_TST_OFF BIT_17 /* Write Shadow Pointer Test Off */ +-#define GMF_WSP_STEP BIT_16 /* Write Shadow Pointer Step/Increment */ +- /* Bits 15..7: same as for RX_GMF_CTRL_T */ +-#define GMF_CLI_TX_FU BIT_6 /* Clear IRQ Tx FIFO Underrun */ +-#define GMF_CLI_TX_FC BIT_5 /* Clear IRQ Tx Frame Complete */ +-#define GMF_CLI_TX_PE BIT_4 /* Clear IRQ Tx Parity Error */ ++#define GMF_RP_TST_ON BIT_10 /* Read Pointer Test On */ ++#define GMF_RP_TST_OFF BIT_9 /* Read Pointer Test Off */ ++#define GMF_RP_STEP BIT_8 /* Read Pointer Step/Increment */ ++#define GMF_RX_F_FL_ON BIT_7 /* Rx FIFO Flush Mode On */ ++#define GMF_RX_F_FL_OFF BIT_6 /* Rx FIFO Flush Mode Off */ ++#define GMF_CLI_RX_FO BIT_5 /* Clear IRQ Rx FIFO Overrun */ ++#define GMF_CLI_RX_FC BIT_4 /* Clear IRQ Rx Frame Complete */ ++#define GMF_OPER_ON BIT_3 /* Operational Mode On */ ++#define GMF_OPER_OFF BIT_2 /* Operational Mode Off */ ++#define GMF_RST_CLR BIT_1 /* Clear GMAC FIFO Reset */ ++#define GMF_RST_SET BIT_0 /* Set GMAC FIFO Reset */ ++ ++/* TX_GMF_CTRL_T 32 bit Tx GMAC FIFO Control/Test (YUKON and Yukon-2) */ ++ /* Bits 31..26: reserved */ ++#define TX_VLAN_TAG_ON BIT_25 /* enable VLAN tagging */ ++#define TX_VLAN_TAG_OFF BIT_24 /* disable VLAN tagging */ ++ /* Bits 23..19: reserved */ ++#define GMF_WSP_TST_ON BIT_18 /* Write Shadow Pointer Test On */ ++#define GMF_WSP_TST_OFF BIT_17 /* Write Shadow Pointer Test Off */ ++#define GMF_WSP_STEP BIT_16 /* Write Shadow Pointer Step/Increment */ ++ /* Bits 15..8: same as for RX_GMF_CTRL_T */ ++ /* Bit 7: reserved */ ++#define GMF_CLI_TX_FU BIT_6 /* Clear IRQ Tx FIFO Underrun */ ++#define GMF_CLI_TX_FC BIT_5 /* Clear IRQ Tx Frame Complete */ ++#define GMF_CLI_TX_PE BIT_4 /* Clear IRQ Tx Parity Error */ + /* Bits 3..0: same as for RX_GMF_CTRL_T */ + + #define GMF_RX_CTRL_DEF (GMF_OPER_ON | GMF_RX_F_FL_ON) + #define GMF_TX_CTRL_DEF GMF_OPER_ON + ++#define RX_GMF_AF_THR_MIN 0x0c /* Rx GMAC FIFO Almost Full Thresh. min. */ + #define RX_GMF_FL_THR_DEF 0x0a /* Rx GMAC FIFO Flush Threshold default */ + + /* GMAC_TI_ST_CTRL 8 bit Time Stamp Timer Ctrl Reg (YUKON only) */ +- /* Bit 7.. 3: reserved */ +-#define GMT_ST_START BIT_2S /* Start Time Stamp Timer */ +-#define GMT_ST_STOP BIT_1S /* Stop Time Stamp Timer */ +-#define GMT_ST_CLR_IRQ BIT_0S /* Clear Time Stamp Timer IRQ */ +- ++ /* Bit 7.. 3: reserved */ ++#define GMT_ST_START BIT_2S /* Start Time Stamp Timer */ ++#define GMT_ST_STOP BIT_1S /* Stop Time Stamp Timer */ ++#define GMT_ST_CLR_IRQ BIT_0S /* Clear Time Stamp Timer IRQ */ ++ ++/* POLL_CTRL 32 bit Polling Unit control register (Yukon-2 only) */ ++ /* Bit 31.. 6: reserved */ ++#define PC_CLR_IRQ_CHK BIT_5 /* Clear IRQ Check */ ++#define PC_POLL_RQ BIT_4 /* Poll Request Start */ ++#define PC_POLL_OP_ON BIT_3 /* Operational Mode On */ ++#define PC_POLL_OP_OFF BIT_2 /* Operational Mode Off */ ++#define PC_POLL_RST_CLR BIT_1 /* Clear Polling Unit Reset (Enable) */ ++#define PC_POLL_RST_SET BIT_0 /* Set Polling Unit Reset */ ++ ++ ++/* The bit definition of the following registers is still missing! */ ++/* B28_Y2_SMB_CONFIG 32 bit ASF SMBus Config Register */ ++/* B28_Y2_SMB_CSD_REG 32 bit ASF SMB Control/Status/Data */ ++/* B28_Y2_ASF_IRQ_V_BASE 32 bit ASF IRQ Vector Base */ ++ ++/* B28_Y2_ASF_STAT_CMD 32 bit ASF Status and Command Reg */ ++/* This register is used by the host driver software */ ++ /* Bit 31:5 reserved */ ++#define Y2_ASF_OS_PRES BIT_4S /* ASF operation system present */ ++#define Y2_ASF_RESET BIT_3S /* ASF system in reset state */ ++#define Y2_ASF_RUNNING BIT_2S /* ASF system operational */ ++#define Y2_ASF_CLR_HSTI BIT_1S /* Clear ASF IRQ */ ++#define Y2_ASF_IRQ BIT_0S /* Issue an IRQ to ASF system */ ++ ++#define Y2_ASF_UC_STATE (3<<2) /* ASF uC State */ ++#define Y2_ASF_CLK_HALT 0 /* ASF system clock stopped */ ++ ++/* B28_Y2_ASF_HOST_COM 32 bit ASF Host Communication Reg */ ++/* This register is used by the ASF firmware */ ++ /* Bit 31:2 reserved */ ++#define Y2_ASF_CLR_ASFI BIT_1 /* Clear host IRQ */ ++#define Y2_ASF_HOST_IRQ BIT_0 /* Issue an IRQ to HOST system */ ++ ++ ++/* STAT_CTRL 32 bit Status BMU control register (Yukon-2 only) */ ++ /* Bit 7.. 5: reserved */ ++#define SC_STAT_CLR_IRQ BIT_4 /* Status Burst IRQ clear */ ++#define SC_STAT_OP_ON BIT_3 /* Operational Mode On */ ++#define SC_STAT_OP_OFF BIT_2 /* Operational Mode Off */ ++#define SC_STAT_RST_CLR BIT_1 /* Clear Status Unit Reset (Enable) */ ++#define SC_STAT_RST_SET BIT_0 /* Set Status Unit Reset */ ++ + /* GMAC_CTRL 32 bit GMAC Control Reg (YUKON only) */ + /* Bits 31.. 8: reserved */ +-#define GMC_H_BURST_ON BIT_7 /* Half Duplex Burst Mode On */ +-#define GMC_H_BURST_OFF BIT_6 /* Half Duplex Burst Mode Off */ +-#define GMC_F_LOOPB_ON BIT_5 /* FIFO Loopback On */ +-#define GMC_F_LOOPB_OFF BIT_4 /* FIFO Loopback Off */ +-#define GMC_PAUSE_ON BIT_3 /* Pause On */ +-#define GMC_PAUSE_OFF BIT_2 /* Pause Off */ +-#define GMC_RST_CLR BIT_1 /* Clear GMAC Reset */ +-#define GMC_RST_SET BIT_0 /* Set GMAC Reset */ ++#define GMC_H_BURST_ON BIT_7 /* Half Duplex Burst Mode On */ ++#define GMC_H_BURST_OFF BIT_6 /* Half Duplex Burst Mode Off */ ++#define GMC_F_LOOPB_ON BIT_5 /* FIFO Loopback On */ ++#define GMC_F_LOOPB_OFF BIT_4 /* FIFO Loopback Off */ ++#define GMC_PAUSE_ON BIT_3 /* Pause On */ ++#define GMC_PAUSE_OFF BIT_2 /* Pause Off */ ++#define GMC_RST_CLR BIT_1 /* Clear GMAC Reset */ ++#define GMC_RST_SET BIT_0 /* Set GMAC Reset */ + + /* GPHY_CTRL 32 bit GPHY Control Reg (YUKON only) */ + /* Bits 31..29: reserved */ + #define GPC_SEL_BDT BIT_28 /* Select Bi-Dir. Transfer for MDC/MDIO */ +-#define GPC_INT_POL_HI BIT_27 /* IRQ Polarity is Active HIGH */ ++#define GPC_INT_POL BIT_27 /* IRQ Polarity is Active Low */ + #define GPC_75_OHM BIT_26 /* Use 75 Ohm Termination instead of 50 */ + #define GPC_DIS_FC BIT_25 /* Disable Automatic Fiber/Copper Detection */ + #define GPC_DIS_SLEEP BIT_24 /* Disable Energy Detect */ +@@ -1540,14 +2051,14 @@ + + /* GMAC_IRQ_SRC 8 bit GMAC Interrupt Source Reg (YUKON only) */ + /* GMAC_IRQ_MSK 8 bit GMAC Interrupt Mask Reg (YUKON only) */ +-#define GM_IS_TX_CO_OV BIT_5 /* Transmit Counter Overflow IRQ */ +-#define GM_IS_RX_CO_OV BIT_4 /* Receive Counter Overflow IRQ */ +-#define GM_IS_TX_FF_UR BIT_3 /* Transmit FIFO Underrun */ +-#define GM_IS_TX_COMPL BIT_2 /* Frame Transmission Complete */ +-#define GM_IS_RX_FF_OR BIT_1 /* Receive FIFO Overrun */ +-#define GM_IS_RX_COMPL BIT_0 /* Frame Reception Complete */ ++#define GM_IS_RX_CO_OV BIT_5S /* Receive Counter Overflow IRQ */ ++#define GM_IS_TX_CO_OV BIT_4S /* Transmit Counter Overflow IRQ */ ++#define GM_IS_TX_FF_UR BIT_3S /* Transmit FIFO Underrun */ ++#define GM_IS_TX_COMPL BIT_2S /* Frame Transmission Complete */ ++#define GM_IS_RX_FF_OR BIT_1S /* Receive FIFO Overrun */ ++#define GM_IS_RX_COMPL BIT_0S /* Frame Reception Complete */ + +-#define GMAC_DEF_MSK (GM_IS_TX_CO_OV | GM_IS_RX_CO_OV | \ ++#define GMAC_DEF_MSK (GM_IS_RX_CO_OV | GM_IS_TX_CO_OV | \ + GM_IS_TX_FF_UR) + + /* GMAC_LINK_CTRL 16 bit GMAC Link Control Reg (YUKON only) */ +@@ -1579,15 +2090,19 @@ + + #define WOL_CTL_DEFAULT \ + (WOL_CTL_DIS_PME_ON_LINK_CHG | \ +- WOL_CTL_DIS_PME_ON_PATTERN | \ +- WOL_CTL_DIS_PME_ON_MAGIC_PKT | \ +- WOL_CTL_DIS_LINK_CHG_UNIT | \ +- WOL_CTL_DIS_PATTERN_UNIT | \ +- WOL_CTL_DIS_MAGIC_PKT_UNIT) ++ WOL_CTL_DIS_PME_ON_PATTERN | \ ++ WOL_CTL_DIS_PME_ON_MAGIC_PKT | \ ++ WOL_CTL_DIS_LINK_CHG_UNIT | \ ++ WOL_CTL_DIS_PATTERN_UNIT | \ ++ WOL_CTL_DIS_MAGIC_PKT_UNIT) + + /* WOL_MATCH_CTL 8 bit WOL Match Control Reg */ + #define WOL_CTL_PATT_ENA(x) (BIT_0 << (x)) + ++/* WOL_PATT_PME 8 bit WOL PME Match Enable (Yukon-2) */ ++#define WOL_PATT_FORCE_PME BIT_7 /* Generates a PME */ ++#define WOL_PATT_MATCH_PME_ALL 0x7f ++ + #define SK_NUM_WOL_PATTERN 7 + #define SK_PATTERN_PER_WORD 4 + #define SK_BITMASK_PATTERN 7 +@@ -1606,17 +2121,17 @@ + SK_U32 TxAdrLo; /* Physical Tx Buffer Address lower dword */ + SK_U32 TxAdrHi; /* Physical Tx Buffer Address upper dword */ + SK_U32 TxStat; /* Transmit Frame Status Word */ +-#ifndef SK_USE_REV_DESC ++#ifndef SK_USE_REV_DESC + SK_U16 TxTcpOffs; /* TCP Checksum Calculation Start Value */ + SK_U16 TxRes1; /* 16 bit reserved field */ + SK_U16 TxTcpWp; /* TCP Checksum Write Position */ + SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */ +-#else /* SK_USE_REV_DESC */ ++#else /* SK_USE_REV_DESC */ + SK_U16 TxRes1; /* 16 bit reserved field */ + SK_U16 TxTcpOffs; /* TCP Checksum Calculation Start Value */ + SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */ + SK_U16 TxTcpWp; /* TCP Checksum Write Position */ +-#endif /* SK_USE_REV_DESC */ ++#endif /* SK_USE_REV_DESC */ + SK_U32 TxRes2; /* 32 bit reserved field */ + } SK_HWTXD; + +@@ -1628,29 +2143,262 @@ + SK_U32 RxAdrHi; /* Physical Rx Buffer Address upper dword */ + SK_U32 RxStat; /* Receive Frame Status Word */ + SK_U32 RxTiSt; /* Receive Time Stamp (from XMAC on GENESIS) */ +-#ifndef SK_USE_REV_DESC +- SK_U16 RxTcpSum1; /* TCP Checksum 1 */ +- SK_U16 RxTcpSum2; /* TCP Checksum 2 */ ++#ifndef SK_USE_REV_DESC ++ SK_U16 RxTcpSum1; /* Rx TCP Checksum 1 */ ++ SK_U16 RxTcpSum2; /* Rx TCP Checksum 2 */ + SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */ + SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */ +-#else /* SK_USE_REV_DESC */ +- SK_U16 RxTcpSum2; /* TCP Checksum 2 */ +- SK_U16 RxTcpSum1; /* TCP Checksum 1 */ ++#else /* SK_USE_REV_DESC */ ++ SK_U16 RxTcpSum2; /* Rx TCP Checksum 2 */ ++ SK_U16 RxTcpSum1; /* Rx TCP Checksum 1 */ + SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */ + SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */ +-#endif /* SK_USE_REV_DESC */ ++#endif /* SK_USE_REV_DESC */ + } SK_HWRXD; + + /* + * Drivers which use the reverse descriptor feature (PCI_OUR_REG_2) + * should set the define SK_USE_REV_DESC. +- * Structures are 'normaly' not endianess dependent. But in +- * this case the SK_U16 fields are bound to bit positions inside the +- * descriptor. RxTcpSum1 e.g. must start at bit 0 within the 6.th DWord. ++ * Structures are 'normally' not endianess dependent. But in this case ++ * the SK_U16 fields are bound to bit positions inside the descriptor. ++ * RxTcpSum1 e.g. must start at bit 0 within the 7.th DWord. + * The bit positions inside a DWord are of course endianess dependent and +- * swaps if the DWord is swapped by the hardware. ++ * swap if the DWord is swapped by the hardware. + */ + ++/* YUKON-2 descriptors ******************************************************/ ++ ++typedef struct _TxChksum { ++#ifndef SK_USE_REV_DESC ++ SK_U16 TxTcpWp; /* TCP Checksum Write Position */ ++ SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */ ++#else /* SK_USE_REV_DESC */ ++ SK_U16 TxTcpSp; /* TCP Checksum Calculation Start Position */ ++ SK_U16 TxTcpWp; /* TCP Checksum Write Position */ ++#endif /* SK_USE_REV_DESC */ ++} SK_HWTXCS; ++ ++typedef struct _LargeSend { ++#ifndef SK_USE_REV_DESC ++ SK_U16 Length; /* Large Send Segment Length */ ++ SK_U16 Reserved; /* reserved */ ++#else /* SK_USE_REV_DESC */ ++ SK_U16 Reserved; /* reserved */ ++ SK_U16 Length; /* Large Send Segment Length */ ++#endif /* SK_USE_REV_DESC */ ++} SK_HWTXLS; ++ ++typedef union u_HwTxBuf { ++ SK_U16 BufLen; /* Tx Buffer Length */ ++ SK_U16 VlanTag; /* VLAN Tag */ ++ SK_U16 InitCsum; /* Init. Checksum */ ++} SK_HWTXBUF; ++ ++/* Tx List Element structure */ ++typedef struct s_HwLeTx { ++ union { ++ SK_U32 BufAddr; /* Tx LE Buffer Address high/low */ ++ SK_HWTXCS ChkSum; /* Tx LE TCP Checksum parameters */ ++ SK_HWTXLS LargeSend;/* Large Send length */ ++ } TxUn; ++#ifndef SK_USE_REV_DESC ++ SK_HWTXBUF Send; ++ SK_U8 ControlFlags; /* Tx LE Control field or Lock Number */ ++ SK_U8 Opcode; /* Tx LE Opcode field */ ++#else /* SK_USE_REV_DESC */ ++ SK_U8 Opcode; /* Tx LE Opcode field */ ++ SK_U8 ControlFlags; /* Tx LE Control field or Lock Number */ ++ SK_HWTXBUF Send; ++#endif /* SK_USE_REV_DESC */ ++} SK_HWLETX; ++ ++typedef struct _RxChkSum{ ++#ifndef SK_USE_REV_DESC ++ SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */ ++ SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */ ++#else /* SK_USE_REV_DESC */ ++ SK_U16 RxTcpSp2; /* TCP Checksum Calculation Start Position 2 */ ++ SK_U16 RxTcpSp1; /* TCP Checksum Calculation Start Position 1 */ ++#endif /* SK_USE_REV_DESC */ ++} SK_HWRXCS; ++ ++/* Rx List Element structure */ ++typedef struct s_HwLeRx { ++ union { ++ SK_U32 BufAddr; /* Rx LE Buffer Address high/low */ ++ SK_HWRXCS ChkSum; /* Rx LE TCP Checksum parameters */ ++ } RxUn; ++#ifndef SK_USE_REV_DESC ++ SK_U16 BufferLength; /* Rx LE Buffer Length field */ ++ SK_U8 ControlFlags; /* Rx LE Control field */ ++ SK_U8 Opcode; /* Rx LE Opcode field */ ++#else /* SK_USE_REV_DESC */ ++ SK_U8 Opcode; /* Rx LE Opcode field */ ++ SK_U8 ControlFlags; /* Rx LE Control field */ ++ SK_U16 BufferLength; /* Rx LE Buffer Length field */ ++#endif /* SK_USE_REV_DESC */ ++} SK_HWLERX; ++ ++typedef struct s_StRxTCPChkSum { ++#ifndef SK_USE_REV_DESC ++ SK_U16 RxTCPSum1; /* Rx TCP Checksum 1 */ ++ SK_U16 RxTCPSum2; /* Rx TCP Checksum 2 */ ++#else /* SK_USE_REV_DESC */ ++ SK_U16 RxTCPSum2; /* Rx TCP Checksum 2 */ ++ SK_U16 RxTCPSum1; /* Rx TCP Checksum 1 */ ++#endif /* SK_USE_REV_DESC */ ++} SK_HWSTCS; ++ ++typedef struct s_StRxRssFlags { ++#ifndef SK_USE_REV_DESC ++ SK_U8 FlagField; /* contains TCP and IP flags */ ++ SK_U8 reserved; /* reserved */ ++#else /* SK_USE_REV_DESC */ ++ SK_U8 reserved; /* reserved */ ++ SK_U8 FlagField; /* contains TCP and IP flags */ ++#endif /* SK_USE_REV_DESC */ ++} SK_HWSTRSS; ++ ++/* bit definition of RSS LE bit 32/33 (SK_HWSTRSS.FlagField) */ ++ /* bit 7..2 reserved */ ++#define RSS_TCP_FLAG BIT_1S /* RSS value related to TCP area */ ++#define RSS_IP_FLAG BIT_0S /* RSS value related to IP area */ ++/* StRxRssValue is valid if at least RSS_IP_FLAG is set. */ ++/* For protocol errors or other protocols an empty RSS LE is generated. */ ++ ++typedef union u_HwStBuf { ++ SK_U16 BufLen; /* Rx Buffer Length */ ++ SK_U16 VlanTag; /* VLAN Tag */ ++ SK_U16 StTxStatHi; /* Tx Queue Status (high) */ ++ SK_HWSTRSS Rss; /* Flag Field for TCP and IP protocol */ ++} SK_HWSTBUF; ++ ++/* Status List Element structure */ ++typedef struct s_HwLeSt { ++ union { ++ SK_U32 StRxStatWord; /* Rx Status Dword */ ++ SK_U32 StRxTimeStamp; /* Rx Timestamp */ ++ SK_HWSTCS StRxTCPCSum; /* Rx TCP Checksum */ ++ SK_U32 StTxStatLow; /* Tx Queue Status (low) */ ++ SK_U32 StRxRssValue; /* Rx RSS value */ ++ } StUn; ++#ifndef SK_USE_REV_DESC ++ SK_HWSTBUF Stat; ++ SK_U8 Link; /* Status LE Link field */ ++ SK_U8 Opcode; /* Status LE Opcode field */ ++#else /* SK_USE_REV_DESC */ ++ SK_U8 Opcode; /* Status LE Opcode field */ ++ SK_U8 Link; /* Status LE Link field */ ++ SK_HWSTBUF Stat; ++#endif /* SK_USE_REV_DESC */ ++} SK_HWLEST; ++ ++/* Special Action List Element */ ++typedef struct s_HwLeSa { ++#ifndef SK_USE_REV_DESC ++ SK_U16 TxAIdxVld; /* Special Action LE TxA Put Index field */ ++ SK_U16 TxSIdxVld; /* Special Action LE TxS Put Index field */ ++ SK_U16 RxIdxVld; /* Special Action LE Rx Put Index field */ ++ SK_U8 Link; /* Special Action LE Link field */ ++ SK_U8 Opcode; /* Special Action LE Opcode field */ ++#else /* SK_USE_REV_DESC */ ++ SK_U16 TxSIdxVld; /* Special Action LE TxS Put Index field */ ++ SK_U16 TxAIdxVld; /* Special Action LE TxA Put Index field */ ++ SK_U8 Opcode; /* Special Action LE Opcode field */ ++ SK_U8 Link; /* Special Action LE Link field */ ++ SK_U16 RxIdxVld; /* Special Action LE Rx Put Index field */ ++#endif /* SK_USE_REV_DESC */ ++} SK_HWLESA; ++ ++/* Common List Element union */ ++typedef union u_HwLeTxRxSt { ++ /* Transmit List Element Structure */ ++ SK_HWLETX Tx; ++ /* Receive List Element Structure */ ++ SK_HWLERX Rx; ++ /* Status List Element Structure */ ++ SK_HWLEST St; ++ /* Special Action List Element Structure */ ++ SK_HWLESA Sa; ++ /* Full List Element */ ++ SK_U64 Full; ++} SK_HWLE; ++ ++/* mask and shift value to get Tx async queue status for port 1 */ ++#define STLE_TXA1_MSKL 0x00000fff ++#define STLE_TXA1_SHIFTL 0 ++ ++/* mask and shift value to get Tx sync queue status for port 1 */ ++#define STLE_TXS1_MSKL 0x00fff000 ++#define STLE_TXS1_SHIFTL 12 ++ ++/* mask and shift value to get Tx async queue status for port 2 */ ++#define STLE_TXA2_MSKL 0xff000000 ++#define STLE_TXA2_SHIFTL 24 ++#define STLE_TXA2_MSKH 0x000f ++/* this one shifts up */ ++#define STLE_TXA2_SHIFTH 8 ++ ++/* mask and shift value to get Tx sync queue status for port 2 */ ++#define STLE_TXS2_MSKL 0x00000000 ++#define STLE_TXS2_SHIFTL 0 ++#define STLE_TXS2_MSKH 0xfff0 ++#define STLE_TXS2_SHIFTH 4 ++ ++/* YUKON-2 bit values */ ++#define HW_OWNER BIT_7 ++#define SW_OWNER 0 ++ ++#define PU_PUTIDX_VALID BIT_12 ++ ++/* YUKON-2 Control flags */ ++#define UDPTCP BIT_0S ++#define CALSUM BIT_1S ++#define WR_SUM BIT_2S ++#define INIT_SUM BIT_3S ++#define LOCK_SUM BIT_4S ++#define INS_VLAN BIT_5S ++#define FRC_STAT BIT_6S ++#define EOP BIT_7S ++ ++#define TX_LOCK BIT_8S ++#define BUF_SEND BIT_9S ++#define PACKET_SEND BIT_10S ++ ++#define NO_WARNING BIT_14S ++#define NO_UPDATE BIT_15S ++ ++/* YUKON-2 Rx/Tx opcodes defines */ ++#define OP_TCPWRITE 0x11 ++#define OP_TCPSTART 0x12 ++#define OP_TCPINIT 0x14 ++#define OP_TCPLCK 0x18 ++#define OP_TCPCHKSUM OP_TCPSTART ++#define OP_TCPIS (OP_TCPINIT | OP_TCPSTART) ++#define OP_TCPLW (OP_TCPLCK | OP_TCPWRITE) ++#define OP_TCPLSW (OP_TCPLCK | OP_TCPSTART | OP_TCPWRITE) ++#define OP_TCPLISW (OP_TCPLCK | OP_TCPINIT | OP_TCPSTART | OP_TCPWRITE) ++#define OP_ADDR64 0x21 ++#define OP_VLAN 0x22 ++#define OP_ADDR64VLAN (OP_ADDR64 | OP_VLAN) ++#define OP_LRGLEN 0x24 ++#define OP_LRGLENVLAN (OP_LRGLEN | OP_VLAN) ++#define OP_BUFFER 0x40 ++#define OP_PACKET 0x41 ++#define OP_LARGESEND 0x43 ++ ++/* YUKON-2 STATUS opcodes defines */ ++#define OP_RXSTAT 0x60 ++#define OP_RXTIMESTAMP 0x61 ++#define OP_RXVLAN 0x62 ++#define OP_RXCHKS 0x64 ++#define OP_RXCHKSVLAN (OP_RXCHKS | OP_RXVLAN) ++#define OP_RXTIMEVLAN (OP_RXTIMESTAMP | OP_RXVLAN) ++#define OP_RSS_HASH 0x65 ++#define OP_TXINDEXLE 0x68 ++ ++/* YUKON-2 SPECIAL opcodes defines */ ++#define OP_PUTIDX 0x70 + + /* Descriptor Bit Definition */ + /* TxCtrl Transmit Buffer Control Field */ +@@ -1685,6 +2433,10 @@ + + /* macros ********************************************************************/ + ++/* Macro for accessing the key registers */ ++#define RSS_KEY_ADDR(Port, KeyIndex) \ ++ ((B4_RSS_KEY | ( ((Port) == 0) ? 0 : 0x80)) + (KeyIndex)) ++ + /* Receive and Transmit Queues */ + #define Q_R1 0x0000 /* Receive Queue 1 */ + #define Q_R2 0x0080 /* Receive Queue 2 */ +@@ -1693,6 +2445,10 @@ + #define Q_XS2 0x0300 /* Synchronous Transmit Queue 2 */ + #define Q_XA2 0x0380 /* Asynchronous Transmit Queue 2 */ + ++#define Q_ASF_R1 0x100 /* ASF Rx Queue 1 */ ++#define Q_ASF_R2 0x180 /* ASF Rx Queue 2 */ ++#define Q_ASF_T1 0x140 /* ASF Tx Queue 1 */ ++#define Q_ASF_T2 0x1c0 /* ASF Tx Queue 2 */ + /* + * Macro Q_ADDR() + * +@@ -1704,11 +2460,27 @@ + * Offs Queue register offset. + * Values: Q_D, Q_DA_L ... Q_T2, Q_T3 + * +- * usage SK_IN32(pAC, Q_ADDR(Q_R2, Q_BC), pVal) ++ * usage SK_IN32(IoC, Q_ADDR(Q_R2, Q_BC), pVal) + */ + #define Q_ADDR(Queue, Offs) (B8_Q_REGS + (Queue) + (Offs)) + + /* ++ * Macro Y2_PREF_Q_ADDR() ++ * ++ * Use this macro to access the Prefetch Units of the receive and ++ * transmit queues of Yukon-2. ++ * ++ * para: ++ * Queue Queue to access. ++ * Values: Q_R1, Q_R2, Q_XS1, Q_XA1, Q_XS2, Q_XA2, ++ * Offs Queue register offset. ++ * Values: PREF_UNIT_CTRL_REG ... PREF_UNIT_FIFO_LEV_REG ++ * ++ * usage SK_IN16(IoC, Y2_Q_ADDR(Q_R2, PREF_UNIT_GET_IDX_REG), pVal) ++ */ ++#define Y2_PREF_Q_ADDR(Queue, Offs) (Y2_B8_PREF_REGS + (Queue) + (Offs)) ++ ++/* + * Macro RB_ADDR() + * + * Use this macro to access the RAM Buffer Registers. +@@ -1719,14 +2491,14 @@ + * Offs Queue register offset. + * Values: RB_START, RB_END ... RB_LEV, RB_CTRL + * +- * usage SK_IN32(pAC, RB_ADDR(Q_R2, RB_RP), pVal) ++ * usage SK_IN32(IoC, RB_ADDR(Q_R2, RB_RP), pVal) + */ + #define RB_ADDR(Queue, Offs) (B16_RAM_REGS + (Queue) + (Offs)) + + + /* MAC Related Registers */ +-#define MAC_1 0 /* belongs to the port near the slot */ +-#define MAC_2 1 /* belongs to the port far away from the slot */ ++#define MAC_1 0 /* 1st port */ ++#define MAC_2 1 /* 2nd port */ + + /* + * Macro MR_ADDR() +@@ -1740,19 +2512,10 @@ + * Values: RX_MFF_EA, RX_MFF_WP ... LNK_LED_REG, + * TX_MFF_EA, TX_MFF_WP ... TX_LED_TST + * +- * usage SK_IN32(pAC, MR_ADDR(MAC_1, TX_MFF_EA), pVal) ++ * usage SK_IN32(IoC, MR_ADDR(MAC_1, TX_MFF_EA), pVal) + */ + #define MR_ADDR(Mac, Offs) (((Mac) << 7) + (Offs)) + +-#ifdef SK_LITTLE_ENDIAN +-#define XM_WORD_LO 0 +-#define XM_WORD_HI 1 +-#else /* !SK_LITTLE_ENDIAN */ +-#define XM_WORD_LO 1 +-#define XM_WORD_HI 0 +-#endif /* !SK_LITTLE_ENDIAN */ +- +- + /* + * macros to access the XMAC (GENESIS only) + * +@@ -1777,22 +2540,31 @@ + #define XMA(Mac, Reg) \ + ((BASE_XMAC_1 + (Mac) * (BASE_XMAC_2 - BASE_XMAC_1)) | ((Reg) << 1)) + +-#define XM_IN16(IoC, Mac, Reg, pVal) \ +- SK_IN16((IoC), XMA((Mac), (Reg)), (pVal)) ++#define XM_IN16(IoC, Mac, Reg, pVal) \ ++ SK_IN16(IoC, XMA(Mac, Reg), pVal) ++ ++#define XM_OUT16(IoC, Mac, Reg, Val) \ ++ SK_OUT16(IoC, XMA(Mac, Reg), Val) ++ ++#ifdef SK_LITTLE_ENDIAN ++ ++#define XM_IN32(IoC, Mac, Reg, pVal) { \ ++ SK_IN16(IoC, XMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal)); \ ++ SK_IN16(IoC, XMA(Mac, (Reg) + 2), (SK_U16 SK_FAR *)(pVal) + 1); \ ++} + +-#define XM_OUT16(IoC, Mac, Reg, Val) \ +- SK_OUT16((IoC), XMA((Mac), (Reg)), (Val)) ++#else /* !SK_LITTLE_ENDIAN */ + +-#define XM_IN32(IoC, Mac, Reg, pVal) { \ +- SK_IN16((IoC), XMA((Mac), (Reg)), \ +- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_LO]); \ +- SK_IN16((IoC), XMA((Mac), (Reg+2)), \ +- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_HI]); \ ++#define XM_IN32(IoC, Mac, Reg, pVal) { \ ++ SK_IN16(IoC, XMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal) + 1); \ ++ SK_IN16(IoC, XMA(Mac, (Reg) + 2), (SK_U16 SK_FAR *)(pVal)); \ + } + ++#endif /* !SK_LITTLE_ENDIAN */ ++ + #define XM_OUT32(IoC, Mac, Reg, Val) { \ +- SK_OUT16((IoC), XMA((Mac), (Reg)), (SK_U16)((Val) & 0xffffL)); \ +- SK_OUT16((IoC), XMA((Mac), (Reg+2)), (SK_U16)(((Val) >> 16) & 0xffffL));\ ++ SK_OUT16(IoC, XMA(Mac, Reg), (SK_U16)((Val) & 0xffffL)); \ ++ SK_OUT16(IoC, XMA(Mac, (Reg) + 2), (SK_U16)(((Val) >> 16) & 0xffffL)); \ + } + + /* Remember: we are always writing to / reading from LITTLE ENDIAN memory */ +@@ -1802,13 +2574,13 @@ + SK_U8 *pByte; \ + pByte = (SK_U8 *)&((SK_U8 *)(pVal))[0]; \ + SK_IN16((IoC), XMA((Mac), (Reg)), &Word); \ +- pByte[0] = (SK_U8)(Word & 0x00ff); \ ++ pByte[0] = (SK_U8)(Word & 0x00ff); \ + pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), XMA((Mac), (Reg+2)), &Word); \ +- pByte[2] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), XMA((Mac), (Reg) + 2), &Word); \ ++ pByte[2] = (SK_U8)(Word & 0x00ff); \ + pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), XMA((Mac), (Reg+4)), &Word); \ +- pByte[4] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), XMA((Mac), (Reg) + 4), &Word); \ ++ pByte[4] = (SK_U8)(Word & 0x00ff); \ + pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \ + } + +@@ -1818,10 +2590,10 @@ + SK_OUT16((IoC), XMA((Mac), (Reg)), (SK_U16) \ + (((SK_U16)(pByte[0]) & 0x00ff) | \ + (((SK_U16)(pByte[1]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), XMA((Mac), (Reg+2)), (SK_U16) \ ++ SK_OUT16((IoC), XMA((Mac), (Reg) + 2), (SK_U16) \ + (((SK_U16)(pByte[2]) & 0x00ff) | \ + (((SK_U16)(pByte[3]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), XMA((Mac), (Reg+4)), (SK_U16) \ ++ SK_OUT16((IoC), XMA((Mac), (Reg) + 4), (SK_U16) \ + (((SK_U16)(pByte[4]) & 0x00ff) | \ + (((SK_U16)(pByte[5]) << 8) & 0xff00))); \ + } +@@ -1831,16 +2603,16 @@ + SK_U8 SK_FAR *pByte; \ + pByte = (SK_U8 SK_FAR *)&((SK_U8 SK_FAR *)(pVal))[0]; \ + SK_IN16((IoC), XMA((Mac), (Reg)), &Word); \ +- pByte[0] = (SK_U8)(Word & 0x00ff); \ ++ pByte[0] = (SK_U8)(Word & 0x00ff); \ + pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), XMA((Mac), (Reg+2)), &Word); \ +- pByte[2] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), XMA((Mac), (Reg) + 2), &Word); \ ++ pByte[2] = (SK_U8)(Word & 0x00ff); \ + pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), XMA((Mac), (Reg+4)), &Word); \ +- pByte[4] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), XMA((Mac), (Reg) + 4), &Word); \ ++ pByte[4] = (SK_U8)(Word & 0x00ff); \ + pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), XMA((Mac), (Reg+6)), &Word); \ +- pByte[6] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), XMA((Mac), (Reg) + 6), &Word); \ ++ pByte[6] = (SK_U8)(Word & 0x00ff); \ + pByte[7] = (SK_U8)((Word >> 8) & 0x00ff); \ + } + +@@ -1850,13 +2622,13 @@ + SK_OUT16((IoC), XMA((Mac), (Reg)), (SK_U16) \ + (((SK_U16)(pByte[0]) & 0x00ff)| \ + (((SK_U16)(pByte[1]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), XMA((Mac), (Reg+2)), (SK_U16) \ ++ SK_OUT16((IoC), XMA((Mac), (Reg) + 2), (SK_U16) \ + (((SK_U16)(pByte[2]) & 0x00ff)| \ + (((SK_U16)(pByte[3]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), XMA((Mac), (Reg+4)), (SK_U16) \ ++ SK_OUT16((IoC), XMA((Mac), (Reg) + 4), (SK_U16) \ + (((SK_U16)(pByte[4]) & 0x00ff)| \ + (((SK_U16)(pByte[5]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), XMA((Mac), (Reg+6)), (SK_U16) \ ++ SK_OUT16((IoC), XMA((Mac), (Reg) + 6), (SK_U16) \ + (((SK_U16)(pByte[6]) & 0x00ff)| \ + (((SK_U16)(pByte[7]) << 8) & 0xff00))); \ + } +@@ -1866,7 +2638,7 @@ + * + * GM_IN16(), to read a 16 bit register (e.g. GM_GP_STAT) + * GM_OUT16(), to write a 16 bit register (e.g. GM_GP_CTRL) +- * GM_IN32(), to read a 32 bit register (e.g. GM_) ++ * GM_IN32(), to read a 32 bit register (e.g. GM_RXF_UC_OK) + * GM_OUT32(), to write a 32 bit register (e.g. GM_) + * GM_INADDR(), to read a network address register (e.g. GM_SRC_ADDR_1L) + * GM_OUTADDR(), to write a network address register (e.g. GM_SRC_ADDR_2L) +@@ -1885,22 +2657,31 @@ + #define GMA(Mac, Reg) \ + ((BASE_GMAC_1 + (Mac) * (BASE_GMAC_2 - BASE_GMAC_1)) | (Reg)) + +-#define GM_IN16(IoC, Mac, Reg, pVal) \ +- SK_IN16((IoC), GMA((Mac), (Reg)), (pVal)) ++#define GM_IN16(IoC, Mac, Reg, pVal) \ ++ SK_IN16(IoC, GMA(Mac, Reg), pVal) ++ ++#define GM_OUT16(IoC, Mac, Reg, Val) \ ++ SK_OUT16(IoC, GMA(Mac, Reg), Val) + +-#define GM_OUT16(IoC, Mac, Reg, Val) \ +- SK_OUT16((IoC), GMA((Mac), (Reg)), (Val)) ++#ifdef SK_LITTLE_ENDIAN + +-#define GM_IN32(IoC, Mac, Reg, pVal) { \ +- SK_IN16((IoC), GMA((Mac), (Reg)), \ +- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_LO]); \ +- SK_IN16((IoC), GMA((Mac), (Reg+4)), \ +- (SK_U16 SK_FAR*)&((SK_U16 SK_FAR*)(pVal))[XM_WORD_HI]); \ ++#define GM_IN32(IoC, Mac, Reg, pVal) { \ ++ SK_IN16(IoC, GMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal)); \ ++ SK_IN16((IoC), GMA(Mac, (Reg) + 4), (SK_U16 SK_FAR *)(pVal) + 1); \ + } + ++#else /* !SK_LITTLE_ENDIAN */ ++ ++#define GM_IN32(IoC, Mac, Reg, pVal) { \ ++ SK_IN16(IoC, GMA(Mac, Reg), (SK_U16 SK_FAR *)(pVal) + 1); \ ++ SK_IN16(IoC, GMA(Mac, (Reg) + 4), (SK_U16 SK_FAR *)(pVal)); \ ++} ++ ++#endif /* !SK_LITTLE_ENDIAN */ ++ + #define GM_OUT32(IoC, Mac, Reg, Val) { \ +- SK_OUT16((IoC), GMA((Mac), (Reg)), (SK_U16)((Val) & 0xffffL)); \ +- SK_OUT16((IoC), GMA((Mac), (Reg+4)), (SK_U16)(((Val) >> 16) & 0xffffL));\ ++ SK_OUT16(IoC, GMA(Mac, Reg), (SK_U16)((Val) & 0xffffL)); \ ++ SK_OUT16(IoC, GMA(Mac, (Reg) + 4), (SK_U16)(((Val) >> 16) & 0xffffL)); \ + } + + #define GM_INADDR(IoC, Mac, Reg, pVal) { \ +@@ -1908,13 +2689,13 @@ + SK_U8 *pByte; \ + pByte = (SK_U8 *)&((SK_U8 *)(pVal))[0]; \ + SK_IN16((IoC), GMA((Mac), (Reg)), &Word); \ +- pByte[0] = (SK_U8)(Word & 0x00ff); \ ++ pByte[0] = (SK_U8)(Word & 0x00ff); \ + pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), GMA((Mac), (Reg+4)), &Word); \ +- pByte[2] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), GMA((Mac), (Reg) + 4), &Word); \ ++ pByte[2] = (SK_U8)(Word & 0x00ff); \ + pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), GMA((Mac), (Reg+8)), &Word); \ +- pByte[4] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), GMA((Mac), (Reg) + 8), &Word); \ ++ pByte[4] = (SK_U8)(Word & 0x00ff); \ + pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \ + } + +@@ -1924,10 +2705,10 @@ + SK_OUT16((IoC), GMA((Mac), (Reg)), (SK_U16) \ + (((SK_U16)(pByte[0]) & 0x00ff) | \ + (((SK_U16)(pByte[1]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), GMA((Mac), (Reg+4)), (SK_U16) \ ++ SK_OUT16((IoC), GMA((Mac), (Reg) + 4), (SK_U16) \ + (((SK_U16)(pByte[2]) & 0x00ff) | \ + (((SK_U16)(pByte[3]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), GMA((Mac), (Reg+8)), (SK_U16) \ ++ SK_OUT16((IoC), GMA((Mac), (Reg) + 8), (SK_U16) \ + (((SK_U16)(pByte[4]) & 0x00ff) | \ + (((SK_U16)(pByte[5]) << 8) & 0xff00))); \ + } +@@ -1937,16 +2718,16 @@ + SK_U8 *pByte; \ + pByte = (SK_U8 *)&((SK_U8 *)(pVal))[0]; \ + SK_IN16((IoC), GMA((Mac), (Reg)), &Word); \ +- pByte[0] = (SK_U8)(Word & 0x00ff); \ ++ pByte[0] = (SK_U8)(Word & 0x00ff); \ + pByte[1] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), GMA((Mac), (Reg+4)), &Word); \ +- pByte[2] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), GMA((Mac), (Reg) + 4), &Word); \ ++ pByte[2] = (SK_U8)(Word & 0x00ff); \ + pByte[3] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), GMA((Mac), (Reg+8)), &Word); \ +- pByte[4] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), GMA((Mac), (Reg) + 8), &Word); \ ++ pByte[4] = (SK_U8)(Word & 0x00ff); \ + pByte[5] = (SK_U8)((Word >> 8) & 0x00ff); \ +- SK_IN16((IoC), GMA((Mac), (Reg+12)), &Word); \ +- pByte[6] = (SK_U8)(Word & 0x00ff); \ ++ SK_IN16((IoC), GMA((Mac), (Reg) + 12), &Word); \ ++ pByte[6] = (SK_U8)(Word & 0x00ff); \ + pByte[7] = (SK_U8)((Word >> 8) & 0x00ff); \ + } + +@@ -1956,13 +2737,13 @@ + SK_OUT16((IoC), GMA((Mac), (Reg)), (SK_U16) \ + (((SK_U16)(pByte[0]) & 0x00ff)| \ + (((SK_U16)(pByte[1]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), GMA((Mac), (Reg+4)), (SK_U16) \ ++ SK_OUT16((IoC), GMA((Mac), (Reg) + 4), (SK_U16) \ + (((SK_U16)(pByte[2]) & 0x00ff)| \ + (((SK_U16)(pByte[3]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), GMA((Mac), (Reg+8)), (SK_U16) \ ++ SK_OUT16((IoC), GMA((Mac), (Reg) + 8), (SK_U16) \ + (((SK_U16)(pByte[4]) & 0x00ff)| \ + (((SK_U16)(pByte[5]) << 8) & 0xff00))); \ +- SK_OUT16((IoC), GMA((Mac), (Reg+12)), (SK_U16) \ ++ SK_OUT16((IoC), GMA((Mac), (Reg) + 12), (SK_U16) \ + (((SK_U16)(pByte[6]) & 0x00ff)| \ + (((SK_U16)(pByte[7]) << 8) & 0xff00))); \ + } +@@ -2010,30 +2791,30 @@ + * + * usage: PHY_READ(IoC, pPort, MAC_1, PHY_CTRL, Value); + * Warning: a PHY_READ on an uninitialized PHY (PHY still in reset) never +- * comes back. This is checked in DEBUG mode. ++ * comes back. This is checked in DEBUG mode. + */ + #ifndef DEBUG + #define PHY_READ(IoC, pPort, Mac, PhyReg, pVal) { \ +- SK_U16 Mmu; \ ++ SK_U16 Mmu; \ + \ + XM_OUT16((IoC), (Mac), XM_PHY_ADDR, (PhyReg) | (pPort)->PhyAddr); \ + XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \ + if ((pPort)->PhyType != SK_PHY_XMAC) { \ +- do { \ ++ do { \ + XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \ + } while ((Mmu & XM_MMU_PHY_RDY) == 0); \ + XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \ +- } \ ++ } \ + } + #else + #define PHY_READ(IoC, pPort, Mac, PhyReg, pVal) { \ +- SK_U16 Mmu; \ ++ SK_U16 Mmu; \ + int __i = 0; \ + \ + XM_OUT16((IoC), (Mac), XM_PHY_ADDR, (PhyReg) | (pPort)->PhyAddr); \ + XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \ + if ((pPort)->PhyType != SK_PHY_XMAC) { \ +- do { \ ++ do { \ + XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \ + __i++; \ + if (__i > 100000) { \ +@@ -2044,7 +2825,7 @@ + } \ + } while ((Mmu & XM_MMU_PHY_RDY) == 0); \ + XM_IN16((IoC), (Mac), XM_PHY_DATA, (pVal)); \ +- } \ ++ } \ + } + #endif /* DEBUG */ + +@@ -2052,17 +2833,17 @@ + SK_U16 Mmu; \ + \ + if ((pPort)->PhyType != SK_PHY_XMAC) { \ +- do { \ ++ do { \ + XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \ + } while ((Mmu & XM_MMU_PHY_BUSY) != 0); \ +- } \ ++ } \ + XM_OUT16((IoC), (Mac), XM_PHY_ADDR, (PhyReg) | (pPort)->PhyAddr); \ + XM_OUT16((IoC), (Mac), XM_PHY_DATA, (Val)); \ + if ((pPort)->PhyType != SK_PHY_XMAC) { \ +- do { \ ++ do { \ + XM_IN16((IoC), (Mac), XM_MMU_CMD, &Mmu); \ + } while ((Mmu & XM_MMU_PHY_BUSY) != 0); \ +- } \ ++ } \ + } + + /* +@@ -2071,12 +2852,14 @@ + * Use this macro to access PCI config register from the I/O space. + * + * para: ++ * pAC Pointer to adapter context + * Addr PCI configuration register to access. + * Values: PCI_VENDOR_ID ... PCI_VPD_ADR_REG, + * +- * usage SK_IN16(pAC, PCI_C(PCI_VENDOR_ID), pVal); ++ * usage SK_IN16(IoC, PCI_C(pAC, PCI_VENDOR_ID), pVal); + */ +-#define PCI_C(Addr) (B7_CFG_SPC + (Addr)) /* PCI Config Space */ ++#define PCI_C(p, Addr) \ ++ (((CHIP_ID_YUKON_2(p)) ? Y2_CFG_SPC : B7_CFG_SPC) + (Addr)) + + /* + * Macro SK_HW_ADDR(Base, Addr) +@@ -2088,7 +2871,7 @@ + * Addr Address offset + * + * usage: May be used in SK_INxx and SK_OUTxx macros +- * #define SK_IN8(pAC, Addr, pVal) ...\ ++ * #define SK_IN8(IoC, Addr, pVal) ...\ + * *pVal = (SK_U8)inp(SK_HW_ADDR(pAC->Hw.Iop, Addr))) + */ + #ifdef SK_MEM_MAPPED_IO +@@ -2107,12 +2890,27 @@ + * para: + * pAC Pointer to adapter context struct + * IoC I/O context needed for SK I/O macros +- * Port Port number ++ * Port Port number + * Mode Mode to set for this LED + */ + #define SK_HWAC_LINK_LED(pAC, IoC, Port, Mode) \ + SK_OUT8(IoC, MR_ADDR(Port, LNK_LED_REG), Mode); + ++#define SK_SET_GP_IO(IoC, Bit) { \ ++ SK_U32 DWord; \ ++ SK_IN32(IoC, B2_GP_IO, &DWord); \ ++ DWord |= ((GP_DIR_0 | GP_IO_0) << (Bit));\ ++ SK_OUT32(IoC, B2_GP_IO, DWord); \ ++} ++ ++#define SK_CLR_GP_IO(IoC, Bit) { \ ++ SK_U32 DWord; \ ++ SK_IN32(IoC, B2_GP_IO, &DWord); \ ++ DWord &= ~((GP_DIR_0 | GP_IO_0) << (Bit));\ ++ SK_OUT32(IoC, B2_GP_IO, DWord); \ ++} ++ ++#define SK_GE_PCI_FIFO_SIZE 1600 /* PCI FIFO Size */ + + /* typedefs *******************************************************************/ + +@@ -2124,3 +2922,4 @@ + #endif /* __cplusplus */ + + #endif /* __INC_SKGEHW_H */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgehwt.h linux-2.6.9.new/drivers/net/sk98lin/h/skgehwt.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgehwt.h 2004-10-19 05:53:51.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgehwt.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skhwt.h + * Project: Gigabit Ethernet Adapters, Event Scheduler Module +- * Version: $Revision: 1.7 $ +- * Date: $Date: 2003/09/16 12:55:08 $ ++ * Version: $Revision: 2.1 $ ++ * Date: $Date: 2003/10/27 14:16:09 $ + * Purpose: Defines for the hardware timer functions + * + ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgei2c.h linux-2.6.9.new/drivers/net/sk98lin/h/skgei2c.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgei2c.h 2004-10-19 05:54:40.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgei2c.h 1970-01-01 08:00:00.000000000 +0800 +@@ -1,210 +0,0 @@ +-/****************************************************************************** +- * +- * Name: skgei2c.h +- * Project: Gigabit Ethernet Adapters, TWSI-Module +- * Version: $Revision: 1.25 $ +- * Date: $Date: 2003/10/20 09:06:05 $ +- * Purpose: Special defines for TWSI +- * +- ******************************************************************************/ +- +-/****************************************************************************** +- * +- * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * The information in this file is provided "AS IS" without warranty. +- * +- ******************************************************************************/ +- +-/* +- * SKGEI2C.H contains all SK-98xx specific defines for the TWSI handling +- */ +- +-#ifndef _INC_SKGEI2C_H_ +-#define _INC_SKGEI2C_H_ +- +-/* +- * Macros to access the B2_I2C_CTRL +- */ +-#define SK_I2C_CTL(IoC, flag, dev, dev_size, reg, burst) \ +- SK_OUT32(IoC, B2_I2C_CTRL,\ +- (flag ? 0x80000000UL : 0x0L) | \ +- (((SK_U32)reg << 16) & I2C_ADDR) | \ +- (((SK_U32)dev << 9) & I2C_DEV_SEL) | \ +- (dev_size & I2C_DEV_SIZE) | \ +- ((burst << 4) & I2C_BURST_LEN)) +- +-#define SK_I2C_STOP(IoC) { \ +- SK_U32 I2cCtrl; \ +- SK_IN32(IoC, B2_I2C_CTRL, &I2cCtrl); \ +- SK_OUT32(IoC, B2_I2C_CTRL, I2cCtrl | I2C_STOP); \ +-} +- +-#define SK_I2C_GET_CTL(IoC, pI2cCtrl) SK_IN32(IoC, B2_I2C_CTRL, pI2cCtrl) +- +-/* +- * Macros to access the TWSI SW Registers +- */ +-#define SK_I2C_SET_BIT(IoC, SetBits) { \ +- SK_U8 OrgBits; \ +- SK_IN8(IoC, B2_I2C_SW, &OrgBits); \ +- SK_OUT8(IoC, B2_I2C_SW, OrgBits | (SK_U8)(SetBits)); \ +-} +- +-#define SK_I2C_CLR_BIT(IoC, ClrBits) { \ +- SK_U8 OrgBits; \ +- SK_IN8(IoC, B2_I2C_SW, &OrgBits); \ +- SK_OUT8(IoC, B2_I2C_SW, OrgBits & ~((SK_U8)(ClrBits))); \ +-} +- +-#define SK_I2C_GET_SW(IoC, pI2cSw) SK_IN8(IoC, B2_I2C_SW, pI2cSw) +- +-/* +- * define the possible sensor states +- */ +-#define SK_SEN_IDLE 0 /* Idle: sensor not read */ +-#define SK_SEN_VALUE 1 /* Value Read cycle */ +-#define SK_SEN_VALEXT 2 /* Extended Value Read cycle */ +- +-/* +- * Conversion factor to convert read Voltage sensor to milli Volt +- * Conversion factor to convert read Temperature sensor to 10th degree Celsius +- */ +-#define SK_LM80_VT_LSB 22 /* 22mV LSB resolution */ +-#define SK_LM80_TEMP_LSB 10 /* 1 degree LSB resolution */ +-#define SK_LM80_TEMPEXT_LSB 5 /* 0.5 degree LSB resolution for ext. val. */ +- +-/* +- * formula: counter = (22500*60)/(rpm * divisor * pulses/2) +- * assuming: 6500rpm, 4 pulses, divisor 1 +- */ +-#define SK_LM80_FAN_FAKTOR ((22500L*60)/(1*2)) +- +-/* +- * Define sensor management data +- * Maximum is reached on Genesis copper dual port and Yukon-64 +- * Board specific maximum is in pAC->I2c.MaxSens +- */ +-#define SK_MAX_SENSORS 8 /* maximal no. of installed sensors */ +-#define SK_MIN_SENSORS 5 /* minimal no. of installed sensors */ +- +-/* +- * To watch the state machine (SM) use the timer in two ways +- * instead of one as hitherto +- */ +-#define SK_TIMER_WATCH_SM 0 /* Watch the SM to finish in a spec. time */ +-#define SK_TIMER_NEW_GAUGING 1 /* Start a new gauging when timer expires */ +- +-/* +- * Defines for the individual thresholds +- */ +- +-/* Temperature sensor */ +-#define SK_SEN_TEMP_HIGH_ERR 800 /* Temperature High Err Threshold */ +-#define SK_SEN_TEMP_HIGH_WARN 700 /* Temperature High Warn Threshold */ +-#define SK_SEN_TEMP_LOW_WARN 100 /* Temperature Low Warn Threshold */ +-#define SK_SEN_TEMP_LOW_ERR 0 /* Temperature Low Err Threshold */ +- +-/* VCC which should be 5 V */ +-#define SK_SEN_PCI_5V_HIGH_ERR 5588 /* Voltage PCI High Err Threshold */ +-#define SK_SEN_PCI_5V_HIGH_WARN 5346 /* Voltage PCI High Warn Threshold */ +-#define SK_SEN_PCI_5V_LOW_WARN 4664 /* Voltage PCI Low Warn Threshold */ +-#define SK_SEN_PCI_5V_LOW_ERR 4422 /* Voltage PCI Low Err Threshold */ +- +-/* +- * VIO may be 5 V or 3.3 V. Initialization takes two parts: +- * 1. Initialize lowest lower limit and highest higher limit. +- * 2. After the first value is read correct the upper or the lower limit to +- * the appropriate C constant. +- * +- * Warning limits are +-5% of the exepected voltage. +- * Error limits are +-10% of the expected voltage. +- */ +- +-/* Bug fix AF: 16.Aug.2001: Correct the init base of LM80 sensor */ +- +-#define SK_SEN_PCI_IO_5V_HIGH_ERR 5566 /* + 10% V PCI-IO High Err Threshold */ +-#define SK_SEN_PCI_IO_5V_HIGH_WARN 5324 /* + 5% V PCI-IO High Warn Threshold */ +- /* 5000 mVolt */ +-#define SK_SEN_PCI_IO_5V_LOW_WARN 4686 /* - 5% V PCI-IO Low Warn Threshold */ +-#define SK_SEN_PCI_IO_5V_LOW_ERR 4444 /* - 10% V PCI-IO Low Err Threshold */ +- +-#define SK_SEN_PCI_IO_RANGE_LIMITER 4000 /* 4000 mV range delimiter */ +- +-/* correction values for the second pass */ +-#define SK_SEN_PCI_IO_3V3_HIGH_ERR 3850 /* + 15% V PCI-IO High Err Threshold */ +-#define SK_SEN_PCI_IO_3V3_HIGH_WARN 3674 /* + 10% V PCI-IO High Warn Threshold */ +- /* 3300 mVolt */ +-#define SK_SEN_PCI_IO_3V3_LOW_WARN 2926 /* - 10% V PCI-IO Low Warn Threshold */ +-#define SK_SEN_PCI_IO_3V3_LOW_ERR 2772 /* - 15% V PCI-IO Low Err Threshold */ +- +-/* +- * VDD voltage +- */ +-#define SK_SEN_VDD_HIGH_ERR 3630 /* Voltage ASIC High Err Threshold */ +-#define SK_SEN_VDD_HIGH_WARN 3476 /* Voltage ASIC High Warn Threshold */ +-#define SK_SEN_VDD_LOW_WARN 3146 /* Voltage ASIC Low Warn Threshold */ +-#define SK_SEN_VDD_LOW_ERR 2970 /* Voltage ASIC Low Err Threshold */ +- +-/* +- * PHY PLL 3V3 voltage +- */ +-#define SK_SEN_PLL_3V3_HIGH_ERR 3630 /* Voltage PMA High Err Threshold */ +-#define SK_SEN_PLL_3V3_HIGH_WARN 3476 /* Voltage PMA High Warn Threshold */ +-#define SK_SEN_PLL_3V3_LOW_WARN 3146 /* Voltage PMA Low Warn Threshold */ +-#define SK_SEN_PLL_3V3_LOW_ERR 2970 /* Voltage PMA Low Err Threshold */ +- +-/* +- * VAUX (YUKON only) +- */ +-#define SK_SEN_VAUX_3V3_HIGH_ERR 3630 /* Voltage VAUX High Err Threshold */ +-#define SK_SEN_VAUX_3V3_HIGH_WARN 3476 /* Voltage VAUX High Warn Threshold */ +-#define SK_SEN_VAUX_3V3_LOW_WARN 3146 /* Voltage VAUX Low Warn Threshold */ +-#define SK_SEN_VAUX_3V3_LOW_ERR 2970 /* Voltage VAUX Low Err Threshold */ +-#define SK_SEN_VAUX_0V_WARN_ERR 0 /* if VAUX not present */ +-#define SK_SEN_VAUX_RANGE_LIMITER 1000 /* 1000 mV range delimiter */ +- +-/* +- * PHY 2V5 voltage +- */ +-#define SK_SEN_PHY_2V5_HIGH_ERR 2750 /* Voltage PHY High Err Threshold */ +-#define SK_SEN_PHY_2V5_HIGH_WARN 2640 /* Voltage PHY High Warn Threshold */ +-#define SK_SEN_PHY_2V5_LOW_WARN 2376 /* Voltage PHY Low Warn Threshold */ +-#define SK_SEN_PHY_2V5_LOW_ERR 2222 /* Voltage PHY Low Err Threshold */ +- +-/* +- * ASIC Core 1V5 voltage (YUKON only) +- */ +-#define SK_SEN_CORE_1V5_HIGH_ERR 1650 /* Voltage ASIC Core High Err Threshold */ +-#define SK_SEN_CORE_1V5_HIGH_WARN 1575 /* Voltage ASIC Core High Warn Threshold */ +-#define SK_SEN_CORE_1V5_LOW_WARN 1425 /* Voltage ASIC Core Low Warn Threshold */ +-#define SK_SEN_CORE_1V5_LOW_ERR 1350 /* Voltage ASIC Core Low Err Threshold */ +- +-/* +- * FAN 1 speed +- */ +-/* assuming: 6500rpm +-15%, 4 pulses, +- * warning at: 80 % +- * error at: 70 % +- * no upper limit +- */ +-#define SK_SEN_FAN_HIGH_ERR 20000 /* FAN Speed High Err Threshold */ +-#define SK_SEN_FAN_HIGH_WARN 20000 /* FAN Speed High Warn Threshold */ +-#define SK_SEN_FAN_LOW_WARN 5200 /* FAN Speed Low Warn Threshold */ +-#define SK_SEN_FAN_LOW_ERR 4550 /* FAN Speed Low Err Threshold */ +- +-/* +- * Some Voltages need dynamic thresholds +- */ +-#define SK_SEN_DYN_INIT_NONE 0 /* No dynamic init of thresholds */ +-#define SK_SEN_DYN_INIT_PCI_IO 10 /* Init PCI-IO with new thresholds */ +-#define SK_SEN_DYN_INIT_VAUX 11 /* Init VAUX with new thresholds */ +- +-extern int SkLm80ReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen); +-#endif /* n_INC_SKGEI2C_H */ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgeinit.h linux-2.6.9.new/drivers/net/sk98lin/h/skgeinit.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgeinit.h 2004-10-19 05:54:40.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgeinit.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgeinit.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.83 $ +- * Date: $Date: 2003/09/16 14:07:37 $ ++ * Version: $Revision: 2.37 $ ++ * Date: $Date: 2005/05/24 08:42:19 $ + * Purpose: Structures and prototypes for the GE Init Module + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -60,14 +59,17 @@ + #define SK_XMIT_DUR 0x002faf08UL /* 50 ms */ + #define SK_BLK_DUR 0x01dcd650UL /* 500 ms */ + +-#define SK_DPOLL_DEF 0x00ee6b28UL /* 250 ms at 62.5 MHz */ ++#define SK_DPOLL_DEF 0x00ee6b28UL /* 250 ms at 62.5 MHz (Genesis) */ ++#define SK_DPOLL_DEF_Y2 0x0000124fUL /* 75 us (Yukon-2) */ + + #define SK_DPOLL_MAX 0x00ffffffUL /* 268 ms at 62.5 MHz */ +- /* 215 ms at 78.12 MHz */ ++ /* 215 ms at 78.12 MHz (Yukon) */ + + #define SK_FACT_62 100 /* is given in percent */ +-#define SK_FACT_53 85 /* on GENESIS: 53.12 MHz */ ++#define SK_FACT_53 85 /* on GENESIS: 53.12 MHz */ + #define SK_FACT_78 125 /* on YUKON: 78.12 MHz */ ++#define SK_FACT_100 161 /* on YUKON-FE: 100 MHz */ ++#define SK_FACT_125 202 /* on YUKON-EC: 125 MHz */ + + /* Timeout values */ + #define SK_MAC_TO_53 72 /* MAC arbiter timeout */ +@@ -83,10 +85,16 @@ + #define SK_RB_LLPP_B (16 * 1024) /* Lower Level for big Queues */ + + #ifndef SK_BMU_RX_WM +-#define SK_BMU_RX_WM 0x600 /* BMU Rx Watermark */ ++#define SK_BMU_RX_WM 0x600 /* BMU Rx Watermark */ + #endif ++ + #ifndef SK_BMU_TX_WM +-#define SK_BMU_TX_WM 0x600 /* BMU Tx Watermark */ ++#define SK_BMU_TX_WM 0x600 /* BMU Tx Watermark */ ++#endif ++ ++/* performance sensitive drivers should set this define to 0x80 */ ++#ifndef SK_BMU_RX_WM_PEX ++#define SK_BMU_RX_WM_PEX 0x600 /* BMU Rx Watermark for PEX */ + #endif + + /* XMAC II Rx High Watermark */ +@@ -98,37 +106,31 @@ + #define SK_XM_THR_MULL 0x01fb /* .. for multiple link usage */ + #define SK_XM_THR_JUMBO 0x03fc /* .. for jumbo frame usage */ + +-/* values for GIPortUsage */ ++/* values for PortUsage */ + #define SK_RED_LINK 1 /* redundant link usage */ + #define SK_MUL_LINK 2 /* multiple link usage */ + #define SK_JUMBO_LINK 3 /* driver uses jumbo frames */ + + /* Minimum RAM Buffer Rx Queue Size */ +-#define SK_MIN_RXQ_SIZE 16 /* 16 kB */ ++#define SK_MIN_RXQ_SIZE (((pAC)->GIni.GIYukon2) ? 10 : 16) /* 10/16 kB */ + + /* Minimum RAM Buffer Tx Queue Size */ +-#define SK_MIN_TXQ_SIZE 16 /* 16 kB */ ++#define SK_MIN_TXQ_SIZE (((pAC)->GIni.GIYukon2) ? 10 : 16) /* 10/16 kB */ + +-/* Queue Size units */ +-#define QZ_UNITS 0x7 ++/* Queue Size units (Genesis/Yukon) */ ++#define QZ_UNITS 7 + #define QZ_STEP 8 + ++/* Queue Size units (Yukon-2) */ ++#define QZ_STEP_Y2 1 ++ + /* Percentage of queue size from whole memory */ + /* 80 % for receive */ +-#define RAM_QUOTA_RX 80L +-/* 0% for sync transfer */ +-#define RAM_QUOTA_SYNC 0L ++#define RAM_QUOTA_RX 80 ++/* 0 % for sync transfer */ ++#define RAM_QUOTA_SYNC 0 + /* the rest (20%) is taken for async transfer */ + +-/* Get the rounded queue size in Bytes in 8k steps */ +-#define ROUND_QUEUE_SIZE(SizeInBytes) \ +- ((((unsigned long) (SizeInBytes) + (QZ_STEP*1024L)-1) / 1024) & \ +- ~(QZ_STEP-1)) +- +-/* Get the rounded queue size in KBytes in 8k steps */ +-#define ROUND_QUEUE_SIZE_KB(Kilobytes) \ +- ROUND_QUEUE_SIZE((Kilobytes) * 1024L) +- + /* Types of RAM Buffer Queues */ + #define SK_RX_SRAM_Q 1 /* small receive queue */ + #define SK_RX_BRAM_Q 2 /* big receive queue */ +@@ -167,11 +169,11 @@ + + + /* Link Speed Capabilities */ +-#define SK_LSPEED_CAP_AUTO (1<<0) /* Automatic resolution */ +-#define SK_LSPEED_CAP_10MBPS (1<<1) /* 10 Mbps */ +-#define SK_LSPEED_CAP_100MBPS (1<<2) /* 100 Mbps */ +-#define SK_LSPEED_CAP_1000MBPS (1<<3) /* 1000 Mbps */ +-#define SK_LSPEED_CAP_INDETERMINATED (1<<4) /* indeterminated */ ++#define SK_LSPEED_CAP_AUTO BIT_0S /* Automatic resolution */ ++#define SK_LSPEED_CAP_10MBPS BIT_1S /* 10 Mbps */ ++#define SK_LSPEED_CAP_100MBPS BIT_2S /* 100 Mbps */ ++#define SK_LSPEED_CAP_1000MBPS BIT_3S /* 1000 Mbps */ ++#define SK_LSPEED_CAP_INDETERMINATED BIT_4S /* indeterminated */ + + /* Link Speed Parameter */ + #define SK_LSPEED_AUTO 1 /* Automatic resolution */ +@@ -189,11 +191,11 @@ + + + /* Link Capability Parameter */ +-#define SK_LMODE_CAP_HALF (1<<0) /* Half Duplex Mode */ +-#define SK_LMODE_CAP_FULL (1<<1) /* Full Duplex Mode */ +-#define SK_LMODE_CAP_AUTOHALF (1<<2) /* AutoHalf Duplex Mode */ +-#define SK_LMODE_CAP_AUTOFULL (1<<3) /* AutoFull Duplex Mode */ +-#define SK_LMODE_CAP_INDETERMINATED (1<<4) /* indeterminated */ ++#define SK_LMODE_CAP_HALF BIT_0S /* Half Duplex Mode */ ++#define SK_LMODE_CAP_FULL BIT_1S /* Full Duplex Mode */ ++#define SK_LMODE_CAP_AUTOHALF BIT_2S /* AutoHalf Duplex Mode */ ++#define SK_LMODE_CAP_AUTOFULL BIT_3S /* AutoFull Duplex Mode */ ++#define SK_LMODE_CAP_INDETERMINATED BIT_4S /* indeterminated */ + + /* Link Mode Current State */ + #define SK_LMODE_STAT_UNKNOWN 1 /* Unknown Duplex Mode */ +@@ -220,10 +222,10 @@ + #define SK_FLOW_STAT_INDETERMINATED 5 /* indeterminated */ + + /* Master/Slave Mode Capabilities */ +-#define SK_MS_CAP_AUTO (1<<0) /* Automatic resolution */ +-#define SK_MS_CAP_MASTER (1<<1) /* This station is master */ +-#define SK_MS_CAP_SLAVE (1<<2) /* This station is slave */ +-#define SK_MS_CAP_INDETERMINATED (1<<3) /* indeterminated */ ++#define SK_MS_CAP_AUTO BIT_0S /* Automatic resolution */ ++#define SK_MS_CAP_MASTER BIT_1S /* This station is master */ ++#define SK_MS_CAP_SLAVE BIT_2S /* This station is slave */ ++#define SK_MS_CAP_INDETERMINATED BIT_3S /* indeterminated */ + + /* Set Master/Slave Mode Parameter (and capabilities) */ + #define SK_MS_MODE_AUTO 1 /* Automatic resolution */ +@@ -238,25 +240,25 @@ + #define SK_MS_STAT_FAULT 4 /* M/S resolution failed */ + #define SK_MS_STAT_INDETERMINATED 5 /* indeterminated */ + +-/* parameter 'Mode' when calling SkXmSetRxCmd() */ +-#define SK_STRIP_FCS_ON (1<<0) /* Enable FCS stripping of Rx frames */ +-#define SK_STRIP_FCS_OFF (1<<1) /* Disable FCS stripping of Rx frames */ +-#define SK_STRIP_PAD_ON (1<<2) /* Enable pad byte stripping of Rx fr */ +-#define SK_STRIP_PAD_OFF (1<<3) /* Disable pad byte stripping of Rx fr */ +-#define SK_LENERR_OK_ON (1<<4) /* Don't chk fr for in range len error */ +-#define SK_LENERR_OK_OFF (1<<5) /* Check frames for in range len error */ +-#define SK_BIG_PK_OK_ON (1<<6) /* Don't set Rx Error bit for big frames */ +-#define SK_BIG_PK_OK_OFF (1<<7) /* Set Rx Error bit for big frames */ +-#define SK_SELF_RX_ON (1<<8) /* Enable Rx of own packets */ +-#define SK_SELF_RX_OFF (1<<9) /* Disable Rx of own packets */ ++/* parameter 'Mode' when calling SkMacSetRxCmd() */ ++#define SK_STRIP_FCS_ON BIT_0S /* Enable FCS stripping of Rx frames */ ++#define SK_STRIP_FCS_OFF BIT_1S /* Disable FCS stripping of Rx frames */ ++#define SK_STRIP_PAD_ON BIT_2S /* Enable pad byte stripping of Rx fr */ ++#define SK_STRIP_PAD_OFF BIT_3S /* Disable pad byte stripping of Rx fr */ ++#define SK_LENERR_OK_ON BIT_4S /* Don't chk fr for in range len error */ ++#define SK_LENERR_OK_OFF BIT_5S /* Check frames for in range len error */ ++#define SK_BIG_PK_OK_ON BIT_6S /* Don't set Rx Error bit for big frames */ ++#define SK_BIG_PK_OK_OFF BIT_7S /* Set Rx Error bit for big frames */ ++#define SK_SELF_RX_ON BIT_8S /* Enable Rx of own packets */ ++#define SK_SELF_RX_OFF BIT_9S /* Disable Rx of own packets */ + + /* parameter 'Para' when calling SkMacSetRxTxEn() */ +-#define SK_MAC_LOOPB_ON (1<<0) /* Enable MAC Loopback Mode */ +-#define SK_MAC_LOOPB_OFF (1<<1) /* Disable MAC Loopback Mode */ +-#define SK_PHY_LOOPB_ON (1<<2) /* Enable PHY Loopback Mode */ +-#define SK_PHY_LOOPB_OFF (1<<3) /* Disable PHY Loopback Mode */ +-#define SK_PHY_FULLD_ON (1<<4) /* Enable GMII Full Duplex */ +-#define SK_PHY_FULLD_OFF (1<<5) /* Disable GMII Full Duplex */ ++#define SK_MAC_LOOPB_ON BIT_0S /* Enable MAC Loopback Mode */ ++#define SK_MAC_LOOPB_OFF BIT_1S /* Disable MAC Loopback Mode */ ++#define SK_PHY_LOOPB_ON BIT_2S /* Enable PHY Loopback Mode */ ++#define SK_PHY_LOOPB_OFF BIT_3S /* Disable PHY Loopback Mode */ ++#define SK_PHY_FULLD_ON BIT_4S /* Enable GMII Full Duplex */ ++#define SK_PHY_FULLD_OFF BIT_5S /* Disable GMII Full Duplex */ + + /* States of PState */ + #define SK_PRT_RESET 0 /* the port is reset */ +@@ -266,18 +268,24 @@ + + /* PHY power down modes */ + #define PHY_PM_OPERATIONAL_MODE 0 /* PHY operational mode */ +-#define PHY_PM_DEEP_SLEEP 1 /* coma mode --> minimal power */ ++#define PHY_PM_DEEP_SLEEP 1 /* Coma mode --> minimal power */ + #define PHY_PM_IEEE_POWER_DOWN 2 /* IEEE 22.2.4.1.5 compl. power down */ +-#define PHY_PM_ENERGY_DETECT 3 /* energy detect */ +-#define PHY_PM_ENERGY_DETECT_PLUS 4 /* energy detect plus */ ++#define PHY_PM_ENERGY_DETECT 3 /* Energy detect */ ++#define PHY_PM_ENERGY_DETECT_PLUS 4 /* Energy detect plus */ ++ ++/* PCI Bus Types */ ++#define SK_PCI_BUS BIT_0S /* normal PCI bus */ ++#define SK_PCIX_BUS BIT_1S /* PCI-X bus */ ++#define SK_PEX_BUS BIT_2S /* PCI-Express bus */ + + /* Default receive frame limit for Workaround of XMAC Errata */ + #define SK_DEF_RX_WA_LIM SK_CONSTU64(100) + + /* values for GILedBlinkCtrl (LED Blink Control) */ +-#define SK_ACT_LED_BLINK (1<<0) /* Active LED blinking */ +-#define SK_DUP_LED_NORMAL (1<<1) /* Duplex LED normal */ +-#define SK_LED_LINK100_ON (1<<2) /* Link 100M LED on */ ++#define SK_ACT_LED_BLINK BIT_0S /* Active LED blinking */ ++#define SK_DUP_LED_NORMAL BIT_1S /* Duplex LED normal */ ++#define SK_LED_LINK100_ON BIT_2S /* Link 100M LED on */ ++#define SK_DUAL_LED_ACT_LNK BIT_3S /* Dual LED ACT/LNK configuration */ + + /* Link Partner Status */ + #define SK_LIPA_UNKNOWN 0 /* Link partner is in unknown state */ +@@ -290,18 +298,165 @@ + /* Max. Auto-neg. timeouts before link detection in sense mode is reset */ + #define SK_MAX_ANEG_TO 10 /* Max. 10 times the sense mode is reset */ + ++ ++/****************************************************************************** ++ * ++ * HW_FEATURE() macro ++ */ ++ ++/* DWORD 0: Features */ ++#define HWF_RED_CORE_CLK_SUP 0x01000000UL /* Reduced Core Clock supp. */ ++#define HWF_SYNC_TX_SUP 0x00800000UL /* synch Tx queue available */ ++#define HWF_SINGLE_PORT_DEVICE 0x00400000UL /* device has only one LAN IF */ ++#define HWF_JUMBO_FRAMES_SUP 0x00200000UL /* Jumbo frames supported */ ++#define HWF_TX_TCP_CSUM_SUP 0x00100000UL /* TCP Tx checksum supported */ ++#define HWF_TX_UDP_CSUM_SUP 0x00080000UL /* UDP Tx checksum supported */ ++#define HWF_RX_CSUM_SUP 0x00040000UL /* RX checksum supported */ ++#define HWF_TCP_SEGM_SUP 0x00020000UL /* TCP segmentation supported */ ++#define HWF_RSS_HASH_SUP 0x00010000UL /* RSS Hash supported */ ++#define HWF_PORT_VLAN_SUP 0x00008000UL /* VLAN can be config per port*/ ++#define HWF_ROLE_PARAM_SUP 0x00004000UL /* Role parameter supported */ ++#define HWF_LOW_PMODE_SUP 0x00002000UL /* Low Power Mode supported */ ++#define HWF_ENERGIE_DEMO_SUP 0x00001000UL /* Energie detect mode supp. */ ++#define HWF_SPEED1000_SUP 0x00000800UL /* Line Speed 1000 supported */ ++#define HWF_SPEED100_SUP 0x00000400UL /* Line Speed 100 supported */ ++#define HWF_SPEED10_SUP 0x00000200UL /* Line Speed 10 supported */ ++#define HWF_AUTONEGSENSE_SUP 0x00000100UL /* Autoneg Sense supported */ ++#define HWF_PHY_LOOPB_MD_SUP 0x00000080UL /* PHY loopback mode supp. */ ++#define HWF_ASF_SUP 0x00000040UL /* ASF support possible */ ++#define HWF_QS_STEPS_1KB 0x00000020UL /* The Rx/Tx queues can be */ ++ /* configured with 1 kB res. */ ++#define HWF_OWN_RAM_PER_PORT 0x00000010UL /* Each port has a separate */ ++ /* RAM buffer */ ++#define HWF_MIN_LED_IF 0x00000008UL /* Minimal LED interface */ ++ /* (e.g. for Yukon-EC) */ ++#define HWF_LIST_ELEMENTS_USED 0x00000004UL /* HW uses list elements */ ++ /* (otherwise desc. are used) */ ++#define HWF_GMAC_INSIDE 0x00000002UL /* device contains GMAC */ ++#define HWF_TWSI_PRESENT 0x00000001UL /* TWSI sensor bus present */ ++ ++/*-RMV- DWORD 1: Deviations */ ++#define HWF_WA_DEV_4115 0x10010000UL /*-RMV- 4.115 (Rx MAC FIFO) */ ++#define HWF_WA_DEV_4109 0x10008000UL /*-RMV- 4.109 (BIU hang) */ ++#define HWF_WA_DEV_483 0x10004000UL /*-RMV- 4.83 (Rx TCP wrong) */ ++#define HWF_WA_DEV_479 0x10002000UL /*-RMV- 4.79 (Rx BMU hang II) */ ++#define HWF_WA_DEV_472 0x10001000UL /*-RMV- 4.72 (GPHY2 MDC clk) */ ++#define HWF_WA_DEV_463 0x10000800UL /*-RMV- 4.63 (Rx BMU hang I) */ ++#define HWF_WA_DEV_427 0x10000400UL /*-RMV- 4.27 (Tx Done Rep) */ ++#define HWF_WA_DEV_42 0x10000200UL /*-RMV- 4.2 (pref unit burst) */ ++#define HWF_WA_DEV_46 0x10000100UL /*-RMV- 4.6 (CPU crash II) */ ++#define HWF_WA_DEV_43_418 0x10000080UL /*-RMV- 4.3 & 4.18 (PCI unexp */ ++ /*-RMV- compl&Stat BMU deadl) */ ++#define HWF_WA_DEV_420 0x10000040UL /*-RMV- 4.20 (Status BMU ov) */ ++#define HWF_WA_DEV_423 0x10000020UL /*-RMV- 4.23 (TCP Segm Hang) */ ++#define HWF_WA_DEV_424 0x10000010UL /*-RMV- 4.24 (MAC reg overwr) */ ++#define HWF_WA_DEV_425 0x10000008UL /*-RMV- 4.25 (Magic packet */ ++ /*-RMV- with odd offset) */ ++#define HWF_WA_DEV_428 0x10000004UL /*-RMV- 4.28 (Poll-U &BigEndi)*/ ++#define HWF_WA_FIFO_FLUSH_YLA0 0x10000002UL /*-RMV- dis Rx GMAC FIFO Flush*/ ++ /*-RMV- for Yu-L Rev. A0 only */ ++#define HWF_WA_COMA_MODE 0x10000001UL /*-RMV- Coma Mode WA req */ ++ ++/* DWORD 2: still unused */ ++/* DWORD 3: still unused */ ++ ++ ++/* ++ * HW_FEATURE() - returns whether the feature is serviced or not ++ */ ++#define HW_FEATURE(pAC, ReqFeature) \ ++ (((pAC)->GIni.HwF.Features[((ReqFeature) & 0x30000000UL) >> 28] &\ ++ ((ReqFeature) & 0x0fffffffUL)) != 0) ++ ++#define HW_FEAT_LIST 0 ++#define HW_DEV_LIST 1 ++ ++#define SET_HW_FEATURE_MASK(pAC, List, OffMaskValue, OnMaskValue) { \ ++ if ((List) == HW_FEAT_LIST || (List) == HW_DEV_LIST) { \ ++ (pAC)->GIni.HwF.OffMask[List] = (OffMaskValue); \ ++ (pAC)->GIni.HwF.OnMask[List] = (OnMaskValue); \ ++ } \ ++} ++ ++/* driver access macros for GIni structure ***********************************/ ++ ++#define CHIP_ID_YUKON_2(pAC) ((pAC)->GIni.GIYukon2) ++#define HW_SYNC_TX_SUPPORTED(pAC) \ ++ ((pAC)->GIni.GIChipId != CHIP_ID_YUKON_EC && \ ++ (pAC)->GIni.GIChipId != CHIP_ID_YUKON_FE) ++ ++#define HW_MS_TO_TICKS(pAC, MsTime) \ ++ ((MsTime) * (62500L/100) * (pAC)->GIni.GIHstClkFact) ++ ++#ifdef XXX ++/* still under construction */ ++#define HW_IS_SINGLE_PORT(pAC) ((pAC)->GIni.GIMacsFound == 1) ++#define HW_NUMBER_OF_PORTS(pAC) ((pAC)->GIni.GIMacsFound) ++ ++#define HW_TX_UDP_CSUM_SUPPORTED(pAC) \ ++ ((((pAC)->GIni.GIChipId >= CHIP_ID_YUKON) && ((pAC)->GIni.GIChipRev != 0)) ++ ++#define HW_DEFAULT_LINESPEED(pAC) \ ++ ((!(pAC)->GIni.GIGenesis && (pAC)->GIni.GICopperType) ? \ ++ SK_LSPEED_AUTO : SK_LSPEED_1000MBPS) ++ ++#define HW_ROLE_PARAM_SUPPORTED(pAC) ((pAC)->GIni.GICopperType) ++ ++#define HW_SPEED1000_SUPPORTED(pAC, Port) \ ++ ((pAC)->GIni.GP[Port].PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) ++ ++#define HW_SPEED100_SUPPORTED(pAC, Port) \ ++ ((pAC)->GIni.GP[Port].PLinkSpeedCap & SK_LSPEED_CAP_100MBPS) ++ ++#define HW_SPEED10_SUPPORTED(pAC, Port) \ ++ ((pAC)->GIni.GP[Port].PLinkSpeedCap & SK_LSPEED_CAP_10MBPS) ++ ++#define HW_AUTONEGSENSE_SUPPORTED(pAC) ((pAC)->GIni.GP[0].PhyType==SK_PHY_XMAC) ++ ++#define HW_FREQ_TO_CARD_TICKS(pAC, AdapterClkSpeed, Freq) \ ++ (((AdapterClkSpeed / 100) * (pAC)->GIni.GIHstClkFact) / Freq) ++ ++#define HW_IS_LINK_UP(pAC, Port) ((pAC)->GIni.GP[Port].PHWLinkUp) ++#define HW_LINK_SPEED_USED(pAC, Port) ((pAC)->GIni.GP[Port].PLinkSpeedUsed) ++#define HW_RAM_SIZE(pAC) ((pAC)->GIni.GIRamSize) ++ ++#define HW_PHY_LP_MODE_SUPPORTED(pAC) (pAC0->??? ++#define HW_ASF_ACTIVE(pAC) ??? ++#define RAWIO_OUT32(pAC, pAC->RegIrqMask, pAC->GIni.GIValIrqMask)... ++ ++/* macro to check whether Tx checksum is supported */ ++#define HW_TX_CSUM_SUPPORTED(pAC) ((pAC)->GIni.GIChipId != CHIP_ID_GENESIS) ++ ++BMU_UDP_CHECK : BMU_TCP_CHECK; ++ ++/* macro for - Own Bit mirrored to DWORD7 (Yukon LP receive descriptor) */ ++#endif /* 0 */ ++ ++ + /* structures *****************************************************************/ + + /* ++ * HW Feature structure ++ */ ++typedef struct s_HwFeatures { ++ SK_U32 Features[4]; /* Feature list */ ++ SK_U32 OffMask[4]; /* Off Mask */ ++ SK_U32 OnMask[4]; /* On Mask */ ++} SK_HW_FEATURES; ++ ++/* + * MAC specific functions + */ + typedef struct s_GeMacFunc { +- int (*pFnMacUpdateStats)(SK_AC *pAC, SK_IOC IoC, unsigned int Port); +- int (*pFnMacStatistic)(SK_AC *pAC, SK_IOC IoC, unsigned int Port, +- SK_U16 StatAddr, SK_U32 SK_FAR *pVal); +- int (*pFnMacResetCounter)(SK_AC *pAC, SK_IOC IoC, unsigned int Port); +- int (*pFnMacOverflow)(SK_AC *pAC, SK_IOC IoC, unsigned int Port, +- SK_U16 IStatus, SK_U64 SK_FAR *pVal); ++ int (*pFnMacUpdateStats)(SK_AC *, SK_IOC, unsigned int); ++ int (*pFnMacStatistic)(SK_AC *, SK_IOC, unsigned int, SK_U16, SK_U32 SK_FAR *); ++ int (*pFnMacResetCounter)(SK_AC *, SK_IOC, unsigned int); ++ int (*pFnMacOverflow)(SK_AC *, SK_IOC, unsigned int, SK_U16, SK_U64 SK_FAR *); ++ void (*pSkGeSirqIsr)(SK_AC *, SK_IOC, SK_U32); ++#ifdef SK_DIAG ++ int (*pFnMacPhyRead)(SK_AC *, SK_IOC, int, int, SK_U16 SK_FAR *); ++ int (*pFnMacPhyWrite)(SK_AC *, SK_IOC, int, int, SK_U16); ++#endif /* SK_DIAG */ + } SK_GEMACFUNC; + + /* +@@ -311,7 +466,7 @@ + #ifndef SK_DIAG + SK_TIMER PWaTimer; /* Workaround Timer */ + SK_TIMER HalfDupChkTimer; +-#endif /* SK_DIAG */ ++#endif /* !SK_DIAG */ + SK_U32 PPrevShorts; /* Previous Short Counter checking */ + SK_U32 PPrevFcs; /* Previous FCS Error Counter checking */ + SK_U64 PPrevRx; /* Previous RxOk Counter checking */ +@@ -335,6 +490,7 @@ + int PXaQOff; /* Asynchronous Tx Queue Address Offset */ + int PhyType; /* PHY used on this port */ + int PState; /* Port status (reset, stop, init, run) */ ++ int PPortUsage; /* Driver Port Usage */ + SK_U16 PhyId1; /* PHY Id1 on this port */ + SK_U16 PhyAddr; /* MDIO/MDC PHY address */ + SK_U16 PIsave; /* Saved Interrupt status word */ +@@ -367,6 +523,8 @@ + int PMacJamLen; /* MAC Jam length */ + int PMacJamIpgVal; /* MAC Jam IPG */ + int PMacJamIpgData; /* MAC IPG Jam to Data */ ++ int PMacBackOffLim; /* MAC Back-off Limit */ ++ int PMacDataBlind; /* MAC Data Blinder */ + int PMacIpgData; /* MAC Data IPG */ + SK_BOOL PMacLimit4; /* reset collision counter and backoff algorithm */ + } SK_GEPORT; +@@ -379,27 +537,37 @@ + int GIChipId; /* Chip Identification Number */ + int GIChipRev; /* Chip Revision Number */ + SK_U8 GIPciHwRev; /* PCI HW Revision Number */ ++ SK_U8 GIPciBus; /* PCI Bus Type (PCI / PCI-X / PCI-Express) */ ++ SK_U8 GIPciMode; /* PCI / PCI-X Mode @ Clock */ ++ SK_U8 GIPexWidth; /* PCI-Express Negotiated Link Width */ + SK_BOOL GIGenesis; /* Genesis adapter ? */ +- SK_BOOL GIYukon; /* YUKON-A1/Bx chip */ ++ SK_BOOL GIYukon; /* YUKON family (1 and 2) */ + SK_BOOL GIYukonLite; /* YUKON-Lite chip */ ++ SK_BOOL GIYukon2; /* YUKON-2 chip (-XL, -EC or -FE) */ ++ SK_U8 GIConTyp; /* Connector Type */ ++ SK_U8 GIPmdTyp; /* PMD Type */ + SK_BOOL GICopperType; /* Copper Type adapter ? */ + SK_BOOL GIPciSlot64; /* 64-bit PCI Slot */ + SK_BOOL GIPciClock66; /* 66 MHz PCI Clock */ + SK_BOOL GIVauxAvail; /* VAUX available (YUKON) */ + SK_BOOL GIYukon32Bit; /* 32-Bit YUKON adapter */ ++ SK_BOOL GIAsfEnabled; /* ASF subsystem enabled */ ++ SK_BOOL GIAsfRunning; /* ASF subsystem running */ + SK_U16 GILedBlinkCtrl; /* LED Blink Control */ + int GIMacsFound; /* Number of MACs found on this adapter */ + int GIMacType; /* MAC Type used on this adapter */ +- int GIHstClkFact; /* Host Clock Factor (62.5 / HstClk * 100) */ +- int GIPortUsage; /* Driver Port Usage */ ++ int GIChipCap; /* Adapter's Capabilities */ ++ int GIHstClkFact; /* Host Clock Factor (HstClk / 62.5 * 100) */ + int GILevel; /* Initialization Level completed */ + int GIRamSize; /* The RAM size of the adapter in kB */ + int GIWolOffs; /* WOL Register Offset (HW-Bug in Rev. A) */ + SK_U32 GIRamOffs; /* RAM Address Offset for addr calculation */ + SK_U32 GIPollTimerVal; /* Descr. Poll Timer Init Val (HstClk ticks) */ + SK_U32 GIValIrqMask; /* Value for Interrupt Mask */ ++ SK_U32 GIValHwIrqMask; /* Value for Interrupt Mask */ + SK_U32 GITimeStampCnt; /* Time Stamp High Counter (YUKON only) */ + SK_GEPORT GP[SK_MAX_MACS];/* Port Dependent Information */ ++ SK_HW_FEATURES HwF; /* HW Features struct */ + SK_GEMACFUNC GIFunc; /* MAC depedent functions */ + } SK_GEINIT; + +@@ -417,7 +585,7 @@ + #define SKERR_HWI_E005 (SKERR_HWI_E004+1) + #define SKERR_HWI_E005MSG "SkGeInitPort(): cannot init running ports" + #define SKERR_HWI_E006 (SKERR_HWI_E005+1) +-#define SKERR_HWI_E006MSG "SkGeMacInit(): PState does not match HW state" ++#define SKERR_HWI_E006MSG "unused" + #define SKERR_HWI_E007 (SKERR_HWI_E006+1) + #define SKERR_HWI_E007MSG "SkXmInitDupMd() called with invalid Dup Mode" + #define SKERR_HWI_E008 (SKERR_HWI_E007+1) +@@ -433,11 +601,11 @@ + #define SKERR_HWI_E013 (SKERR_HWI_E012+1) + #define SKERR_HWI_E013MSG "SkGeInitPort(): cfg changed for running queue" + #define SKERR_HWI_E014 (SKERR_HWI_E013+1) +-#define SKERR_HWI_E014MSG "SkGeInitPort(): unknown GIPortUsage specified" ++#define SKERR_HWI_E014MSG "SkGeInitPort(): unknown PortUsage specified" + #define SKERR_HWI_E015 (SKERR_HWI_E014+1) +-#define SKERR_HWI_E015MSG "Illegal Link mode parameter" ++#define SKERR_HWI_E015MSG "Illegal Link Mode parameter" + #define SKERR_HWI_E016 (SKERR_HWI_E015+1) +-#define SKERR_HWI_E016MSG "Illegal Flow control mode parameter" ++#define SKERR_HWI_E016MSG "Illegal Flow Control Mode parameter" + #define SKERR_HWI_E017 (SKERR_HWI_E016+1) + #define SKERR_HWI_E017MSG "Illegal value specified for GIPollTimerVal" + #define SKERR_HWI_E018 (SKERR_HWI_E017+1) +@@ -447,9 +615,9 @@ + #define SKERR_HWI_E020 (SKERR_HWI_E019+1) + #define SKERR_HWI_E020MSG "Illegal Master/Slave parameter" + #define SKERR_HWI_E021 (SKERR_HWI_E020+1) +-#define SKERR_HWI_E021MSG "MacUpdateStats(): cannot update statistic counter" +-#define SKERR_HWI_E022 (SKERR_HWI_E021+1) +-#define SKERR_HWI_E022MSG "MacStatistic(): illegal statistic base address" ++#define SKERR_HWI_E021MSG "MacUpdateStats(): cannot update statistic counter" ++#define SKERR_HWI_E022 (SKERR_HWI_E021+1) ++#define SKERR_HWI_E022MSG "MacStatistic(): illegal statistic base address" + #define SKERR_HWI_E023 (SKERR_HWI_E022+1) + #define SKERR_HWI_E023MSG "SkGeInitPort(): Transmit Queue Size too small" + #define SKERR_HWI_E024 (SKERR_HWI_E023+1) +@@ -464,6 +632,24 @@ + /* + * public functions in skgeinit.c + */ ++extern void SkGePortVlan( ++ SK_AC *pAC, ++ SK_IOC IoC, ++ int Port, ++ SK_BOOL Enable); ++ ++extern void SkGeRxRss( ++ SK_AC *pAC, ++ SK_IOC IoC, ++ int Port, ++ SK_BOOL Enable); ++ ++extern void SkGeRxCsum( ++ SK_AC *pAC, ++ SK_IOC IoC, ++ int Port, ++ SK_BOOL Enable); ++ + extern void SkGePollRxD( + SK_AC *pAC, + SK_IOC IoC, +@@ -601,13 +787,13 @@ + int Port, + SK_U16 IStatus); + +-extern void SkMacSetRxTxEn( ++extern void SkMacSetRxTxEn( + SK_AC *pAC, + SK_IOC IoC, + int Port, + int Para); + +-extern int SkMacRxTxEnable( ++extern int SkMacRxTxEnable( + SK_AC *pAC, + SK_IOC IoC, + int Port); +@@ -624,28 +810,28 @@ + int Port, + SK_BOOL Enable); + +-extern void SkXmPhyRead( ++extern int SkXmPhyRead( + SK_AC *pAC, + SK_IOC IoC, + int Port, + int Addr, + SK_U16 SK_FAR *pVal); + +-extern void SkXmPhyWrite( ++extern int SkXmPhyWrite( + SK_AC *pAC, + SK_IOC IoC, + int Port, + int Addr, + SK_U16 Val); + +-extern void SkGmPhyRead( ++extern int SkGmPhyRead( + SK_AC *pAC, + SK_IOC IoC, + int Port, + int Addr, + SK_U16 SK_FAR *pVal); + +-extern void SkGmPhyWrite( ++extern int SkGmPhyWrite( + SK_AC *pAC, + SK_IOC IoC, + int Port, +@@ -713,7 +899,7 @@ + SK_AC *pAC, + SK_IOC IoC, + unsigned int Port, +- SK_U16 IStatus, ++ SK_U16 IStatus, + SK_U64 SK_FAR *pStatus); + + extern int SkGmOverflowStatus( +@@ -729,6 +915,7 @@ + int Port, + SK_BOOL StartTest); + ++#ifdef SK_PHY_LP_MODE + extern int SkGmEnterLowPowerMode( + SK_AC *pAC, + SK_IOC IoC, +@@ -739,6 +926,7 @@ + SK_AC *pAC, + SK_IOC IoC, + int Port); ++#endif /* SK_PHY_LP_MODE */ + + #ifdef SK_DIAG + extern void SkGePhyRead( +@@ -794,6 +982,9 @@ + extern void SkGeXmitLED(); + extern void SkGeInitRamIface(); + extern int SkGeInitAssignRamToQueues(); ++extern void SkGePortVlan(); ++extern void SkGeRxCsum(); ++extern void SkGeRxRss(); + + /* + * public functions in skxmac2.c +@@ -803,7 +994,7 @@ + extern void SkMacHardRst(); + extern void SkMacClearRst(); + extern void SkMacInitPhy(); +-extern int SkMacRxTxEnable(); ++extern int SkMacRxTxEnable(); + extern void SkMacPromiscMode(); + extern void SkMacHashing(); + extern void SkMacIrqDisable(); +@@ -814,11 +1005,11 @@ + extern void SkMacAutoNegLipaPhy(); + extern void SkMacSetRxTxEn(); + extern void SkXmInitMac(); +-extern void SkXmPhyRead(); +-extern void SkXmPhyWrite(); ++extern int SkXmPhyRead(); ++extern int SkXmPhyWrite(); + extern void SkGmInitMac(); +-extern void SkGmPhyRead(); +-extern void SkGmPhyWrite(); ++extern int SkGmPhyRead(); ++extern int SkGmPhyWrite(); + extern void SkXmClrExactAddr(); + extern void SkXmInitDupMd(); + extern void SkXmInitPauseMd(); +@@ -832,8 +1023,10 @@ + extern int SkXmOverflowStatus(); + extern int SkGmOverflowStatus(); + extern int SkGmCableDiagStatus(); ++#ifdef SK_PHY_LP_MODE + extern int SkGmEnterLowPowerMode(); + extern int SkGmLeaveLowPowerMode(); ++#endif /* SK_PHY_LP_MODE */ + + #ifdef SK_DIAG + extern void SkGePhyRead(); +@@ -844,10 +1037,11 @@ + extern void SkXmSendCont(); + #endif /* SK_DIAG */ + +-#endif /* SK_KR_PROTO */ ++#endif /* SK_KR_PROTO */ + + #ifdef __cplusplus + } + #endif /* __cplusplus */ + + #endif /* __INC_SKGEINIT_H_ */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgepnm2.h linux-2.6.9.new/drivers/net/sk98lin/h/skgepnm2.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgepnm2.h 2004-10-19 05:54:38.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgepnm2.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgepnm2.h + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.36 $ +- * Date: $Date: 2003/05/23 12:45:13 $ ++ * Version: $Revision: 2.4 $ ++ * Date: $Date: 2005/05/03 06:42:43 $ + * Purpose: Defines for Private Network Management Interface + * + ****************************************************************************/ +@@ -28,8 +28,13 @@ + /* + * General definitions + */ +-#define SK_PNMI_CHIPSET_XMAC 1 /* XMAC11800FP */ +-#define SK_PNMI_CHIPSET_YUKON 2 /* YUKON */ ++#define SK_PNMI_CHIPSET_XMAC 1 /* XMAC11800FP */ ++#define SK_PNMI_CHIPSET_YUKON 2 /* YUKON */ ++#define SK_PNMI_CHIPSET_YUKON_LITE 3 /* YUKON-Lite (Rev. A1-A3) */ ++#define SK_PNMI_CHIPSET_YUKON_LP 4 /* YUKON-LP */ ++#define SK_PNMI_CHIPSET_YUKON_XL 5 /* YUKON-2 XL */ ++#define SK_PNMI_CHIPSET_YUKON_EC 6 /* YUKON-2 EC */ ++#define SK_PNMI_CHIPSET_YUKON_FE 7 /* YUKON-2 FE */ + + #define SK_PNMI_BUS_PCI 1 /* PCI bus*/ + +@@ -70,9 +75,9 @@ + /* + * VCT internal status values + */ +-#define SK_PNMI_VCT_PENDING 32 +-#define SK_PNMI_VCT_TEST_DONE 64 +-#define SK_PNMI_VCT_LINK 128 ++#define SK_PNMI_VCT_PENDING 0x20 ++#define SK_PNMI_VCT_TEST_DONE 0x40 ++#define SK_PNMI_VCT_LINK 0x80 + + /* + * Internal table definitions +@@ -323,7 +328,7 @@ + vSt, \ + pAC->Pnmi.MacUpdatedFlag, \ + pAC->Pnmi.RlmtUpdatedFlag, \ +- pAC->Pnmi.SirqUpdatedFlag))}} ++ pAC->Pnmi.SirqUpdatedFlag));}} + + #else /* !DEBUG */ + +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgepnmi.h linux-2.6.9.new/drivers/net/sk98lin/h/skgepnmi.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgepnmi.h 2004-10-19 05:53:13.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgepnmi.h 2006-12-07 14:35:03.000000000 +0800 +@@ -1,9 +1,9 @@ + /***************************************************************************** + * + * Name: skgepnmi.h +- * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.62 $ +- * Date: $Date: 2003/08/15 12:31:52 $ ++ * Project: Gigabit Ethernet Adapters, PNMI-Module ++ * Version: $Revision: 2.9 $ ++ * Date: $Date: 2004/10/26 12:42:39 $ + * Purpose: Defines for Private Network Management Interface + * + ****************************************************************************/ +@@ -31,7 +31,7 @@ + #include "h/sktypes.h" + #include "h/skerror.h" + #include "h/sktimer.h" +-#include "h/ski2c.h" ++#include "h/sktwsi.h" + #include "h/skaddr.h" + #include "h/skrlmt.h" + #include "h/skvpd.h" +@@ -41,7 +41,6 @@ + */ + #define SK_PNMI_MDB_VERSION 0x00030001 /* 3.1 */ + +- + /* + * Event definitions + */ +@@ -54,16 +53,13 @@ + #define SK_PNMI_EVT_UTILIZATION_TIMER 7 /* Timer event for Utiliza. */ + #define SK_PNMI_EVT_CLEAR_COUNTER 8 /* Clear statistic counters */ + #define SK_PNMI_EVT_XMAC_RESET 9 /* XMAC will be reset */ +- + #define SK_PNMI_EVT_RLMT_PORT_UP 10 /* Port came logically up */ + #define SK_PNMI_EVT_RLMT_PORT_DOWN 11 /* Port went logically down */ + #define SK_PNMI_EVT_RLMT_SEGMENTATION 13 /* Two SP root bridges found */ + #define SK_PNMI_EVT_RLMT_ACTIVE_DOWN 14 /* Port went logically down */ + #define SK_PNMI_EVT_RLMT_ACTIVE_UP 15 /* Port came logically up */ +-#define SK_PNMI_EVT_RLMT_SET_NETS 16 /* 1. Parameter is number of nets +- 1 = single net; 2 = dual net */ +-#define SK_PNMI_EVT_VCT_RESET 17 /* VCT port reset timer event started with SET. */ +- ++#define SK_PNMI_EVT_RLMT_SET_NETS 16 /* Number of nets (1 or 2). */ ++#define SK_PNMI_EVT_VCT_RESET 17 /* VCT port reset timer event started with SET. */ + + /* + * Return values +@@ -78,7 +74,6 @@ + #define SK_PNMI_ERR_UNKNOWN_NET 7 + #define SK_PNMI_ERR_NOT_SUPPORTED 10 + +- + /* + * Return values of driver reset function SK_DRIVER_RESET() and + * driver event function SK_DRIVER_EVENT() +@@ -86,19 +81,17 @@ + #define SK_PNMI_ERR_OK 0 + #define SK_PNMI_ERR_FAIL 1 + +- + /* + * Return values of driver test function SK_DRIVER_SELFTEST() + */ + #define SK_PNMI_TST_UNKNOWN (1 << 0) +-#define SK_PNMI_TST_TRANCEIVER (1 << 1) ++#define SK_PNMI_TST_TRANCEIVER (1 << 1) + #define SK_PNMI_TST_ASIC (1 << 2) + #define SK_PNMI_TST_SENSOR (1 << 3) +-#define SK_PNMI_TST_POWERMGMT (1 << 4) ++#define SK_PNMI_TST_POWERMGMT (1 << 4) + #define SK_PNMI_TST_PCI (1 << 5) + #define SK_PNMI_TST_MAC (1 << 6) + +- + /* + * RLMT specific definitions + */ +@@ -352,6 +345,7 @@ + #define OID_SKGE_VCT_GET 0xFF020200 + #define OID_SKGE_VCT_SET 0xFF020201 + #define OID_SKGE_VCT_STATUS 0xFF020202 ++#define OID_SKGE_VCT_CAPABILITIES 0xFF020203 + + #ifdef SK_DIAG_SUPPORT + /* Defines for driver DIAG mode. */ +@@ -367,22 +361,69 @@ + #define OID_SKGE_PHY_TYPE 0xFF020215 + #define OID_SKGE_PHY_LP_MODE 0xFF020216 + ++/* ++ * Added for new DualNet IM driver V2 ++ * these OIDs should later be in pnmi.h ++ */ ++#define OID_SKGE_MAC_COUNT 0xFF020217 ++#define OID_SKGE_DUALNET_MODE 0xFF020218 ++#define OID_SKGE_SET_TAGHEADER 0xFF020219 ++ ++#ifdef SK_ASF ++/* Defines for ASF */ ++#define OID_SKGE_ASF 0xFF02021a ++#define OID_SKGE_ASF_STORE_CONFIG 0xFF02021b ++#define OID_SKGE_ASF_ENA 0xFF02021c ++#define OID_SKGE_ASF_RETRANS 0xFF02021d ++#define OID_SKGE_ASF_RETRANS_INT 0xFF02021e ++#define OID_SKGE_ASF_HB_ENA 0xFF02021f ++#define OID_SKGE_ASF_HB_INT 0xFF020220 ++#define OID_SKGE_ASF_WD_ENA 0xFF020221 ++#define OID_SKGE_ASF_WD_TIME 0xFF020222 ++#define OID_SKGE_ASF_IP_SOURCE 0xFF020223 ++#define OID_SKGE_ASF_MAC_SOURCE 0xFF020224 ++#define OID_SKGE_ASF_IP_DEST 0xFF020225 ++#define OID_SKGE_ASF_MAC_DEST 0xFF020226 ++#define OID_SKGE_ASF_COMMUNITY_NAME 0xFF020227 ++#define OID_SKGE_ASF_RSP_ENA 0xFF020228 ++#define OID_SKGE_ASF_RETRANS_COUNT_MIN 0xFF020229 ++#define OID_SKGE_ASF_RETRANS_COUNT_MAX 0xFF02022a ++#define OID_SKGE_ASF_RETRANS_INT_MIN 0xFF02022b ++#define OID_SKGE_ASF_RETRANS_INT_MAX 0xFF02022c ++#define OID_SKGE_ASF_HB_INT_MIN 0xFF02022d ++#define OID_SKGE_ASF_HB_INT_MAX 0xFF02022e ++#define OID_SKGE_ASF_WD_TIME_MIN 0xFF02022f ++#define OID_SKGE_ASF_WD_TIME_MAX 0xFF020230 ++#define OID_SKGE_ASF_HB_CAP 0xFF020231 ++#define OID_SKGE_ASF_WD_TIMER_RES 0xFF020232 ++#define OID_SKGE_ASF_GUID 0xFF020233 ++#define OID_SKGE_ASF_KEY_OP 0xFF020234 ++#define OID_SKGE_ASF_KEY_ADM 0xFF020235 ++#define OID_SKGE_ASF_KEY_GEN 0xFF020236 ++#define OID_SKGE_ASF_CAP 0xFF020237 ++#define OID_SKGE_ASF_PAR_1 0xFF020238 ++#define OID_SKGE_ASF_OVERALL_OID 0xFF020239 ++#define OID_SKGE_ASF_FWVER_OID 0xFF020240 ++#define OID_SKGE_ASF_ACPI_OID 0xFF020241 ++#define OID_SKGE_ASF_SMBUS_OID 0xFF020242 ++#endif /* SK_ASF */ ++ + /* VCT struct to store a backup copy of VCT data after a port reset. */ + typedef struct s_PnmiVct { + SK_U8 VctStatus; +- SK_U8 PCableLen; +- SK_U32 PMdiPairLen[4]; +- SK_U8 PMdiPairSts[4]; ++ SK_U8 CableLen; ++ SK_U32 MdiPairLen[4]; ++ SK_U8 MdiPairSts[4]; + } SK_PNMI_VCT; + + + /* VCT status values (to be given to CPA via OID_SKGE_VCT_STATUS). */ +-#define SK_PNMI_VCT_NONE 0 +-#define SK_PNMI_VCT_OLD_VCT_DATA 1 +-#define SK_PNMI_VCT_NEW_VCT_DATA 2 +-#define SK_PNMI_VCT_OLD_DSP_DATA 4 +-#define SK_PNMI_VCT_NEW_DSP_DATA 8 +-#define SK_PNMI_VCT_RUNNING 16 ++#define SK_PNMI_VCT_NONE 0x00 ++#define SK_PNMI_VCT_OLD_VCT_DATA 0x01 ++#define SK_PNMI_VCT_NEW_VCT_DATA 0x02 ++#define SK_PNMI_VCT_OLD_DSP_DATA 0x04 ++#define SK_PNMI_VCT_NEW_DSP_DATA 0x08 ++#define SK_PNMI_VCT_RUNNING 0x10 + + + /* VCT cable test status. */ +@@ -390,7 +431,12 @@ + #define SK_PNMI_VCT_SHORT_CABLE 1 + #define SK_PNMI_VCT_OPEN_CABLE 2 + #define SK_PNMI_VCT_TEST_FAIL 3 +-#define SK_PNMI_VCT_IMPEDANCE_MISMATCH 4 ++#define SK_PNMI_VCT_IMPEDANCE_MISMATCH 4 ++#define SK_PNMI_VCT_NOT_PRESENT 5 ++ ++/* VCT capabilities (needed for OID_SKGE_VCT_CAPABILITIES. */ ++#define SK_PNMI_VCT_SUPPORTED 1 ++#define SK_PNMI_VCT_NOT_SUPPORTED 0 + + #define OID_SKGE_TRAP_SEN_WAR_LOW 500 + #define OID_SKGE_TRAP_SEN_WAR_UPP 501 +@@ -419,7 +465,6 @@ + #define SK_SET_FULL_MIB 5 + #define SK_PRESET_FULL_MIB 6 + +- + /* + * Define error numbers and messages for syslog + */ +@@ -452,7 +497,7 @@ + #define SK_PNMI_ERR014 (SK_ERRBASE_PNMI + 14) + #define SK_PNMI_ERR014MSG "Vpd: Cannot read VPD keys" + #define SK_PNMI_ERR015 (SK_ERRBASE_PNMI + 15) +-#define SK_PNMI_ERR015MSG "Vpd: Internal array for VPD keys to small" ++#define SK_PNMI_ERR015MSG "Vpd: Internal array for VPD keys too small" + #define SK_PNMI_ERR016 (SK_ERRBASE_PNMI + 16) + #define SK_PNMI_ERR016MSG "Vpd: Key string too long" + #define SK_PNMI_ERR017 (SK_ERRBASE_PNMI + 17) +@@ -494,9 +539,9 @@ + #define SK_PNMI_ERR036 (SK_ERRBASE_PNMI + 36) + #define SK_PNMI_ERR036MSG "" + #define SK_PNMI_ERR037 (SK_ERRBASE_PNMI + 37) +-#define SK_PNMI_ERR037MSG "Rlmt: SK_RLMT_MODE_CHANGE event return not 0" ++#define SK_PNMI_ERR037MSG "Rlmt: SK_RLMT_MODE_CHANGE event returned not 0" + #define SK_PNMI_ERR038 (SK_ERRBASE_PNMI + 38) +-#define SK_PNMI_ERR038MSG "Rlmt: SK_RLMT_PREFPORT_CHANGE event return not 0" ++#define SK_PNMI_ERR038MSG "Rlmt: SK_RLMT_PREFPORT_CHANGE event returned not 0" + #define SK_PNMI_ERR039 (SK_ERRBASE_PNMI + 39) + #define SK_PNMI_ERR039MSG "RlmtStat: Unknown OID" + #define SK_PNMI_ERR040 (SK_ERRBASE_PNMI + 40) +@@ -514,9 +559,9 @@ + #define SK_PNMI_ERR046 (SK_ERRBASE_PNMI + 46) + #define SK_PNMI_ERR046MSG "Monitor: Unknown OID" + #define SK_PNMI_ERR047 (SK_ERRBASE_PNMI + 47) +-#define SK_PNMI_ERR047MSG "SirqUpdate: Event function returns not 0" ++#define SK_PNMI_ERR047MSG "SirqUpdate: Event function returned not 0" + #define SK_PNMI_ERR048 (SK_ERRBASE_PNMI + 48) +-#define SK_PNMI_ERR048MSG "RlmtUpdate: Event function returns not 0" ++#define SK_PNMI_ERR048MSG "RlmtUpdate: Event function returned not 0" + #define SK_PNMI_ERR049 (SK_ERRBASE_PNMI + 49) + #define SK_PNMI_ERR049MSG "SkPnmiInit: Invalid size of 'CounterOffset' struct!!" + #define SK_PNMI_ERR050 (SK_ERRBASE_PNMI + 50) +@@ -826,23 +871,25 @@ + } SK_PNMI_STRUCT_DATA; + + #define SK_PNMI_STRUCT_SIZE (sizeof(SK_PNMI_STRUCT_DATA)) ++ ++/* The ReturnStatus field must be located before VpdFreeBytes! */ + #define SK_PNMI_MIN_STRUCT_SIZE ((unsigned int)(SK_UPTR)\ + &(((SK_PNMI_STRUCT_DATA *)0)->VpdFreeBytes)) +- /* +- * ReturnStatus field +- * must be located +- * before VpdFreeBytes +- */ + + /* + * Various definitions + */ ++#define SK_PNMI_EVT_TIMER_CHECK 28125000L /* 28125 ms */ ++ ++#define SK_PNMI_VCT_TIMER_CHECK 4000000L /* 4 sec. */ ++ + #define SK_PNMI_MAX_PROTOS 3 + +-#define SK_PNMI_CNT_NO 66 /* Must have the value of the enum +- * SK_PNMI_MAX_IDX. Define SK_PNMI_CHECK +- * for check while init phase 1 +- */ ++/* ++ * SK_PNMI_CNT_NO must have the value of the enum SK_PNMI_MAX_IDX. ++ * Define SK_PNMI_CHECK to check this during init level SK_INIT_IO. ++ */ ++#define SK_PNMI_CNT_NO 66 + + /* + * Estimate data structure +@@ -856,14 +903,6 @@ + + + /* +- * VCT timer data structure +- */ +-typedef struct s_VctTimer { +- SK_TIMER VctTimer; +-} SK_PNMI_VCT_TIMER; +- +- +-/* + * PNMI specific adapter context structure + */ + typedef struct s_PnmiPort { +@@ -933,9 +972,9 @@ + unsigned int TrapQueueEnd; + unsigned int TrapBufPad; + unsigned int TrapUnique; +- SK_U8 VctStatus[SK_MAX_MACS]; +- SK_PNMI_VCT VctBackup[SK_MAX_MACS]; +- SK_PNMI_VCT_TIMER VctTimeout[SK_MAX_MACS]; ++ SK_U8 VctStatus[SK_MAX_MACS]; ++ SK_PNMI_VCT VctBackup[SK_MAX_MACS]; ++ SK_TIMER VctTimeout[SK_MAX_MACS]; + #ifdef SK_DIAG_SUPPORT + SK_U32 DiagAttached; + #endif /* SK_DIAG_SUPPORT */ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgesirq.h linux-2.6.9.new/drivers/net/sk98lin/h/skgesirq.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgesirq.h 2004-10-19 05:55:17.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgesirq.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,22 +2,21 @@ + * + * Name: skgesirq.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.30 $ +- * Date: $Date: 2003/07/04 12:34:13 $ +- * Purpose: SK specific Gigabit Ethernet special IRQ functions ++ * Version: $Revision: 2.3 $ ++ * Date: $Date: 2004/05/28 14:42:03 $ ++ * Purpose: Gigabit Ethernet special IRQ functions + * + ******************************************************************************/ + + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -44,10 +43,10 @@ + #define SK_HWEV_SET_SPEED 9 /* Set Link Speed by PNMI */ + #define SK_HWEV_HALFDUP_CHK 10 /* Half Duplex Hangup Workaround */ + +-#define SK_WA_ACT_TIME (5000000UL) /* 5 sec */ +-#define SK_WA_INA_TIME (100000UL) /* 100 msec */ ++#define SK_WA_ACT_TIME 1000000UL /* 1000 msec (1 sec) */ ++#define SK_WA_INA_TIME 100000UL /* 100 msec */ + +-#define SK_HALFDUP_CHK_TIME (10000UL) /* 10 msec */ ++#define SK_HALFDUP_CHK_TIME 10000UL /* 10 msec */ + + /* + * Define the error numbers and messages +@@ -102,10 +101,35 @@ + #define SKERR_SIRQ_E024MSG "FIFO overflow error" + #define SKERR_SIRQ_E025 (SKERR_SIRQ_E024+1) + #define SKERR_SIRQ_E025MSG "2 Pair Downshift detected" ++#define SKERR_SIRQ_E026 (SKERR_SIRQ_E025+1) ++#define SKERR_SIRQ_E026MSG "Uncorrectable PCI Express error" ++#define SKERR_SIRQ_E027 (SKERR_SIRQ_E026+1) ++#define SKERR_SIRQ_E027MSG "PCI express protocol violation error" ++#define SKERR_SIRQ_E028 (SKERR_SIRQ_E027+1) ++#define SKERR_SIRQ_E028MSG "Parity error on RAM 1 (read)" ++#define SKERR_SIRQ_E029 (SKERR_SIRQ_E028+1) ++#define SKERR_SIRQ_E029MSG "Parity error on RAM 1 (write)" ++#define SKERR_SIRQ_E030 (SKERR_SIRQ_E029+1) ++#define SKERR_SIRQ_E030MSG "Parity error on RAM 2 (read)" ++#define SKERR_SIRQ_E031 (SKERR_SIRQ_E030+1) ++#define SKERR_SIRQ_E031MSG "Parity error on RAM 2 (write)" ++#define SKERR_SIRQ_E032 (SKERR_SIRQ_E031+1) ++#define SKERR_SIRQ_E032MSG "TCP segmentation error async. queue 1" ++#define SKERR_SIRQ_E033 (SKERR_SIRQ_E032+1) ++#define SKERR_SIRQ_E033MSG "TCP segmentation error sync. queue 1" ++#define SKERR_SIRQ_E034 (SKERR_SIRQ_E033+1) ++#define SKERR_SIRQ_E034MSG "TCP segmentation error async. queue 2" ++#define SKERR_SIRQ_E035 (SKERR_SIRQ_E034+1) ++#define SKERR_SIRQ_E035MSG "TCP segmentation error sync. queue 2" ++#define SKERR_SIRQ_E036 (SKERR_SIRQ_E035+1) ++#define SKERR_SIRQ_E036MSG "CHECK failure polling unit" + + extern void SkGeSirqIsr(SK_AC *pAC, SK_IOC IoC, SK_U32 Istatus); + extern int SkGeSirqEvent(SK_AC *pAC, SK_IOC IoC, SK_U32 Event, SK_EVPARA Para); + extern void SkHWLinkUp(SK_AC *pAC, SK_IOC IoC, int Port); + extern void SkHWLinkDown(SK_AC *pAC, SK_IOC IoC, int Port); ++extern void SkGeYuSirqIsr(SK_AC *pAC, SK_IOC IoC, SK_U32 Istatus); ++extern void SkYuk2SirqIsr(SK_AC *pAC, SK_IOC IoC, SK_U32 Istatus); + + #endif /* _INC_SKGESIRQ_H_ */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skgetwsi.h linux-2.6.9.new/drivers/net/sk98lin/h/skgetwsi.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skgetwsi.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skgetwsi.h 2006-12-07 14:35:03.000000000 +0800 +@@ -0,0 +1,241 @@ ++/****************************************************************************** ++ * ++ * Name: skgetwsi.h ++ * Project: Gigabit Ethernet Adapters, TWSI-Module ++ * Version: $Revision: 1.7 $ ++ * Date: $Date: 2004/12/20 14:48:51 $ ++ * Purpose: Special defines for TWSI ++ * ++ ******************************************************************************/ ++ ++/****************************************************************************** ++ * ++ * (C)Copyright 1998-2002 SysKonnect. ++ * (C)Copyright 2002-2004 Marvell. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * The information in this file is provided "AS IS" without warranty. ++ * ++ ******************************************************************************/ ++ ++/* ++ * SKGETWSI.H contains all SK-98xx specific defines for the TWSI handling ++ */ ++ ++#ifndef _INC_SKGETWSI_H_ ++#define _INC_SKGETWSI_H_ ++ ++/* ++ * Macros to access the B2_I2C_CTRL ++ */ ++#define SK_I2C_CTL(IoC, flag, dev, dev_size, reg, burst) \ ++ SK_OUT32(IoC, B2_I2C_CTRL,\ ++ (flag ? 0x80000000UL : 0x0L) | \ ++ (((SK_U32)reg << 16) & I2C_ADDR) | \ ++ (((SK_U32)dev << 9) & I2C_DEV_SEL) | \ ++ (dev_size & I2C_DEV_SIZE) | \ ++ ((burst << 4) & I2C_BURST_LEN)) ++ ++#define SK_I2C_STOP(IoC) { \ ++ SK_U32 I2cCtrl; \ ++ SK_IN32(IoC, B2_I2C_CTRL, &I2cCtrl); \ ++ SK_OUT32(IoC, B2_I2C_CTRL, I2cCtrl | I2C_STOP); \ ++} ++ ++#define SK_I2C_GET_CTL(IoC, pI2cCtrl) SK_IN32(IoC, B2_I2C_CTRL, pI2cCtrl) ++ ++/* ++ * Macros to access the TWSI SW Registers ++ */ ++#define SK_I2C_SET_BIT(IoC, SetBits) { \ ++ SK_U8 OrgBits; \ ++ SK_IN8(IoC, B2_I2C_SW, &OrgBits); \ ++ SK_OUT8(IoC, B2_I2C_SW, OrgBits | (SK_U8)(SetBits)); \ ++} ++ ++#define SK_I2C_CLR_BIT(IoC, ClrBits) { \ ++ SK_U8 OrgBits; \ ++ SK_IN8(IoC, B2_I2C_SW, &OrgBits); \ ++ SK_OUT8(IoC, B2_I2C_SW, OrgBits & ~((SK_U8)(ClrBits))); \ ++} ++ ++#define SK_I2C_GET_SW(IoC, pI2cSw) SK_IN8(IoC, B2_I2C_SW, pI2cSw) ++ ++/* ++ * define the possible sensor states ++ */ ++#define SK_SEN_IDLE 0 /* Idle: sensor not read */ ++#define SK_SEN_VALUE 1 /* Value Read cycle */ ++#define SK_SEN_VALEXT 2 /* Extended Value Read cycle */ ++ ++/* ++ * Conversion factor to convert read Voltage sensor to milli Volt ++ * Conversion factor to convert read Temperature sensor to 10th degree Celsius ++ */ ++#define SK_LM80_VT_LSB 22 /* 22mV LSB resolution */ ++#define SK_LM80_TEMP_LSB 10 /* 1 degree LSB resolution */ ++#define SK_LM80_TEMPEXT_LSB 5 /* 0.5 degree LSB resolution for ext. val. */ ++ ++/* ++ * formula: counter = (22500*60)/(rpm * divisor * pulses/2) ++ * assuming: 6500rpm, 4 pulses, divisor 1 ++ */ ++#define SK_LM80_FAN_FAKTOR ((22500L*60)/(1*2)) ++ ++/* ++ * Define sensor management data ++ * Maximum is reached on Genesis copper dual port and Yukon-64 ++ * Board specific maximum is in pAC->I2c.MaxSens ++ */ ++#define SK_MAX_SENSORS 8 /* maximal no. of installed sensors */ ++#define SK_MIN_SENSORS 5 /* minimal no. of installed sensors */ ++ ++/* ++ * To watch the state machine (SM) use the timer in two ways ++ * instead of one as hitherto ++ */ ++#define SK_TIMER_WATCH_SM 0 /* Watch the SM to finish in a spec. time */ ++#define SK_TIMER_NEW_GAUGING 1 /* Start a new gauging when timer expires */ ++ ++/* ++ * Defines for the individual thresholds ++ */ ++ ++#define C_PLUS_20 120 / 100 ++#define C_PLUS_15 115 / 100 ++#define C_PLUS_10 110 / 100 ++#define C_PLUS_5 105 / 100 ++#define C_MINUS_5 95 / 100 ++#define C_MINUS_10 90 / 100 ++#define C_MINUS_15 85 / 100 ++ ++/* Temperature sensor */ ++#define SK_SEN_TEMP_HIGH_ERR 800 /* Temperature High Err Threshold */ ++#define SK_SEN_TEMP_HIGH_WARN 700 /* Temperature High Warn Threshold */ ++#define SK_SEN_TEMP_LOW_WARN 100 /* Temperature Low Warn Threshold */ ++#define SK_SEN_TEMP_LOW_ERR 0 /* Temperature Low Err Threshold */ ++ ++/* VCC which should be 5 V */ ++#define SK_SEN_PCI_5V_HIGH_ERR 5588 /* Voltage PCI High Err Threshold */ ++#define SK_SEN_PCI_5V_HIGH_WARN 5346 /* Voltage PCI High Warn Threshold */ ++#define SK_SEN_PCI_5V_LOW_WARN 4664 /* Voltage PCI Low Warn Threshold */ ++#define SK_SEN_PCI_5V_LOW_ERR 4422 /* Voltage PCI Low Err Threshold */ ++ ++/* ++ * VIO may be 5 V or 3.3 V. Initialization takes two parts: ++ * 1. Initialize lowest lower limit and highest higher limit. ++ * 2. After the first value is read correct the upper or the lower limit to ++ * the appropriate C constant. ++ * ++ * Warning limits are +-5% of the exepected voltage. ++ * Error limits are +-10% of the expected voltage. ++ */ ++ ++/* Bug fix AF: 16.Aug.2001: Correct the init base of LM80 sensor */ ++ ++#define SK_SEN_PCI_IO_5V_HIGH_ERR 5566 /* + 10% V PCI-IO High Err Threshold */ ++#define SK_SEN_PCI_IO_5V_HIGH_WARN 5324 /* + 5% V PCI-IO High Warn Threshold */ ++ /* 5000 mVolt */ ++#define SK_SEN_PCI_IO_5V_LOW_WARN 4686 /* - 5% V PCI-IO Low Warn Threshold */ ++#define SK_SEN_PCI_IO_5V_LOW_ERR 4444 /* - 10% V PCI-IO Low Err Threshold */ ++ ++#define SK_SEN_PCI_IO_RANGE_LIMITER 4000 /* 4000 mV range delimiter */ ++ ++/* correction values for the second pass */ ++#define SK_SEN_PCI_IO_3V3_HIGH_ERR 3850 /* + 15% V PCI-IO High Err Threshold */ ++#define SK_SEN_PCI_IO_3V3_HIGH_WARN 3674 /* + 10% V PCI-IO High Warn Threshold */ ++ /* 3300 mVolt */ ++#define SK_SEN_PCI_IO_3V3_LOW_WARN 2926 /* - 10% V PCI-IO Low Warn Threshold */ ++#define SK_SEN_PCI_IO_3V3_LOW_ERR 2772 /* - 15% V PCI-IO Low Err Threshold */ ++ ++/* ++ * VDD voltage ++ */ ++#define SK_SEN_VDD_HIGH_ERR 3630 /* Voltage ASIC High Err Threshold */ ++#define SK_SEN_VDD_HIGH_WARN 3476 /* Voltage ASIC High Warn Threshold */ ++#define SK_SEN_VDD_LOW_WARN 3146 /* Voltage ASIC Low Warn Threshold */ ++#define SK_SEN_VDD_LOW_ERR 2970 /* Voltage ASIC Low Err Threshold */ ++ ++/* ++ * PHY PLL 3V3 voltage ++ */ ++#define SK_SEN_PLL_3V3_HIGH_ERR 3630 /* Voltage PMA High Err Threshold */ ++#define SK_SEN_PLL_3V3_HIGH_WARN 3476 /* Voltage PMA High Warn Threshold */ ++#define SK_SEN_PLL_3V3_LOW_WARN 3146 /* Voltage PMA Low Warn Threshold */ ++#define SK_SEN_PLL_3V3_LOW_ERR 2970 /* Voltage PMA Low Err Threshold */ ++ ++/* ++ * VAUX (YUKON only) ++ */ ++#define SK_SEN_VAUX_3V3_VAL 3300 /* Voltage VAUX 3.3 Volt */ ++ ++#define SK_SEN_VAUX_3V3_HIGH_ERR (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_PLUS_10) ++#define SK_SEN_VAUX_3V3_HIGH_WARN (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_PLUS_5) ++#define SK_SEN_VAUX_3V3_LOW_WARN (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_MINUS_5) ++#define SK_SEN_VAUX_3V3_LOW_ERR (SK_I32)(SK_SEN_VAUX_3V3_VAL * C_MINUS_10) ++ ++#define SK_SEN_VAUX_RANGE_LIMITER 1000 /* 1000 mV range delimiter */ ++ ++/* ++ * PHY 2V5 voltage ++ */ ++#define SK_SEN_PHY_2V5_VAL 2500 /* Voltage PHY 2.5 Volt */ ++ ++#define SK_SEN_PHY_2V5_HIGH_ERR (SK_I32)(SK_SEN_PHY_2V5_VAL * C_PLUS_10) ++#define SK_SEN_PHY_2V5_HIGH_WARN (SK_I32)(SK_SEN_PHY_2V5_VAL * C_PLUS_5) ++#define SK_SEN_PHY_2V5_LOW_WARN (SK_I32)(SK_SEN_PHY_2V5_VAL * C_MINUS_5) ++#define SK_SEN_PHY_2V5_LOW_ERR (SK_I32)(SK_SEN_PHY_2V5_VAL * C_MINUS_10) ++ ++/* ++ * ASIC Core 1V5 voltage (YUKON only) ++ */ ++#define SK_SEN_CORE_1V5_VAL 1500 /* Voltage ASIC Core 1.5 Volt */ ++ ++#define SK_SEN_CORE_1V5_HIGH_ERR (SK_I32)(SK_SEN_CORE_1V5_VAL * C_PLUS_10) ++#define SK_SEN_CORE_1V5_HIGH_WARN (SK_I32)(SK_SEN_CORE_1V5_VAL * C_PLUS_5) ++#define SK_SEN_CORE_1V5_LOW_WARN (SK_I32)(SK_SEN_CORE_1V5_VAL * C_MINUS_5) ++#define SK_SEN_CORE_1V5_LOW_ERR (SK_I32)(SK_SEN_CORE_1V5_VAL * C_MINUS_10) ++ ++/* ++ * ASIC Core 1V2 (1V3) voltage (YUKON-2 only) ++ */ ++#define SK_SEN_CORE_1V2_VAL 1200 /* Voltage ASIC Core 1.2 Volt */ ++ ++#define SK_SEN_CORE_1V2_HIGH_ERR (SK_I32)(SK_SEN_CORE_1V2_VAL * C_PLUS_20) ++#define SK_SEN_CORE_1V2_HIGH_WARN (SK_I32)(SK_SEN_CORE_1V2_VAL * C_PLUS_15) ++#define SK_SEN_CORE_1V2_LOW_WARN (SK_I32)(SK_SEN_CORE_1V2_VAL * C_MINUS_5) ++#define SK_SEN_CORE_1V2_LOW_ERR (SK_I32)(SK_SEN_CORE_1V2_VAL * C_MINUS_10) ++ ++#define SK_SEN_CORE_1V3_VAL 1300 /* Voltage ASIC Core 1.3 Volt */ ++ ++#define SK_SEN_CORE_1V3_HIGH_ERR (SK_I32)(SK_SEN_CORE_1V3_VAL * C_PLUS_15) ++#define SK_SEN_CORE_1V3_HIGH_WARN (SK_I32)(SK_SEN_CORE_1V3_VAL * C_PLUS_10) ++#define SK_SEN_CORE_1V3_LOW_WARN (SK_I32)(SK_SEN_CORE_1V3_VAL * C_MINUS_5) ++#define SK_SEN_CORE_1V3_LOW_ERR (SK_I32)(SK_SEN_CORE_1V3_VAL * C_MINUS_10) ++ ++/* ++ * FAN 1 speed ++ */ ++/* assuming: 6500rpm +-15%, 4 pulses, ++ * warning at: 80 % ++ * error at: 70 % ++ * no upper limit ++ */ ++#define SK_SEN_FAN_HIGH_ERR 20000 /* FAN Speed High Err Threshold */ ++#define SK_SEN_FAN_HIGH_WARN 20000 /* FAN Speed High Warn Threshold */ ++#define SK_SEN_FAN_LOW_WARN 5200 /* FAN Speed Low Warn Threshold */ ++#define SK_SEN_FAN_LOW_ERR 4550 /* FAN Speed Low Err Threshold */ ++ ++/* ++ * Some Voltages need dynamic thresholds ++ */ ++#define SK_SEN_DYN_INIT_NONE 0 /* No dynamic init of thresholds */ ++#define SK_SEN_DYN_INIT_PCI_IO 10 /* Init PCI-IO with new thresholds */ ++#define SK_SEN_DYN_INIT_VAUX 11 /* Init VAUX with new thresholds */ ++ ++extern int SkLm80ReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen); ++#endif /* n_INC_SKGETWSI_H */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/ski2c.h linux-2.6.9.new/drivers/net/sk98lin/h/ski2c.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/ski2c.h 2004-10-19 05:53:45.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/ski2c.h 1970-01-01 08:00:00.000000000 +0800 +@@ -1,177 +0,0 @@ +-/****************************************************************************** +- * +- * Name: ski2c.h +- * Project: Gigabit Ethernet Adapters, TWSI-Module +- * Version: $Revision: 1.35 $ +- * Date: $Date: 2003/10/20 09:06:30 $ +- * Purpose: Defines to access Voltage and Temperature Sensor +- * +- ******************************************************************************/ +- +-/****************************************************************************** +- * +- * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * The information in this file is provided "AS IS" without warranty. +- * +- ******************************************************************************/ +- +-/* +- * SKI2C.H contains all I2C specific defines +- */ +- +-#ifndef _SKI2C_H_ +-#define _SKI2C_H_ +- +-typedef struct s_Sensor SK_SENSOR; +- +-#include "h/skgei2c.h" +- +-/* +- * Define the I2C events. +- */ +-#define SK_I2CEV_IRQ 1 /* IRQ happened Event */ +-#define SK_I2CEV_TIM 2 /* Timeout event */ +-#define SK_I2CEV_CLEAR 3 /* Clear MIB Values */ +- +-/* +- * Define READ and WRITE Constants. +- */ +-#define I2C_READ 0 +-#define I2C_WRITE 1 +-#define I2C_BURST 1 +-#define I2C_SINGLE 0 +- +-#define SKERR_I2C_E001 (SK_ERRBASE_I2C+0) +-#define SKERR_I2C_E001MSG "Sensor index unknown" +-#define SKERR_I2C_E002 (SKERR_I2C_E001+1) +-#define SKERR_I2C_E002MSG "TWSI: transfer does not complete" +-#define SKERR_I2C_E003 (SKERR_I2C_E002+1) +-#define SKERR_I2C_E003MSG "LM80: NAK on device send" +-#define SKERR_I2C_E004 (SKERR_I2C_E003+1) +-#define SKERR_I2C_E004MSG "LM80: NAK on register send" +-#define SKERR_I2C_E005 (SKERR_I2C_E004+1) +-#define SKERR_I2C_E005MSG "LM80: NAK on device (2) send" +-#define SKERR_I2C_E006 (SKERR_I2C_E005+1) +-#define SKERR_I2C_E006MSG "Unknown event" +-#define SKERR_I2C_E007 (SKERR_I2C_E006+1) +-#define SKERR_I2C_E007MSG "LM80 read out of state" +-#define SKERR_I2C_E008 (SKERR_I2C_E007+1) +-#define SKERR_I2C_E008MSG "Unexpected sensor read completed" +-#define SKERR_I2C_E009 (SKERR_I2C_E008+1) +-#define SKERR_I2C_E009MSG "WARNING: temperature sensor out of range" +-#define SKERR_I2C_E010 (SKERR_I2C_E009+1) +-#define SKERR_I2C_E010MSG "WARNING: voltage sensor out of range" +-#define SKERR_I2C_E011 (SKERR_I2C_E010+1) +-#define SKERR_I2C_E011MSG "ERROR: temperature sensor out of range" +-#define SKERR_I2C_E012 (SKERR_I2C_E011+1) +-#define SKERR_I2C_E012MSG "ERROR: voltage sensor out of range" +-#define SKERR_I2C_E013 (SKERR_I2C_E012+1) +-#define SKERR_I2C_E013MSG "ERROR: couldn't init sensor" +-#define SKERR_I2C_E014 (SKERR_I2C_E013+1) +-#define SKERR_I2C_E014MSG "WARNING: fan sensor out of range" +-#define SKERR_I2C_E015 (SKERR_I2C_E014+1) +-#define SKERR_I2C_E015MSG "ERROR: fan sensor out of range" +-#define SKERR_I2C_E016 (SKERR_I2C_E015+1) +-#define SKERR_I2C_E016MSG "TWSI: active transfer does not complete" +- +-/* +- * Define Timeout values +- */ +-#define SK_I2C_TIM_LONG 2000000L /* 2 seconds */ +-#define SK_I2C_TIM_SHORT 100000L /* 100 milliseconds */ +-#define SK_I2C_TIM_WATCH 1000000L /* 1 second */ +- +-/* +- * Define trap and error log hold times +- */ +-#ifndef SK_SEN_ERR_TR_HOLD +-#define SK_SEN_ERR_TR_HOLD (4*SK_TICKS_PER_SEC) +-#endif +-#ifndef SK_SEN_ERR_LOG_HOLD +-#define SK_SEN_ERR_LOG_HOLD (60*SK_TICKS_PER_SEC) +-#endif +-#ifndef SK_SEN_WARN_TR_HOLD +-#define SK_SEN_WARN_TR_HOLD (15*SK_TICKS_PER_SEC) +-#endif +-#ifndef SK_SEN_WARN_LOG_HOLD +-#define SK_SEN_WARN_LOG_HOLD (15*60*SK_TICKS_PER_SEC) +-#endif +- +-/* +- * Defines for SenType +- */ +-#define SK_SEN_UNKNOWN 0 +-#define SK_SEN_TEMP 1 +-#define SK_SEN_VOLT 2 +-#define SK_SEN_FAN 3 +- +-/* +- * Define for the SenErrorFlag +- */ +-#define SK_SEN_ERR_NOT_PRESENT 0 /* Error Flag: Sensor not present */ +-#define SK_SEN_ERR_OK 1 /* Error Flag: O.K. */ +-#define SK_SEN_ERR_WARN 2 /* Error Flag: Warning */ +-#define SK_SEN_ERR_ERR 3 /* Error Flag: Error */ +-#define SK_SEN_ERR_FAULTY 4 /* Error Flag: Faulty */ +- +-/* +- * Define the Sensor struct +- */ +-struct s_Sensor { +- char *SenDesc; /* Description */ +- int SenType; /* Voltage or Temperature */ +- SK_I32 SenValue; /* Current value of the sensor */ +- SK_I32 SenThreErrHigh; /* High error Threshhold of this sensor */ +- SK_I32 SenThreWarnHigh; /* High warning Threshhold of this sensor */ +- SK_I32 SenThreErrLow; /* Lower error Threshold of the sensor */ +- SK_I32 SenThreWarnLow; /* Lower warning Threshold of the sensor */ +- int SenErrFlag; /* Sensor indicated an error */ +- SK_BOOL SenInit; /* Is sensor initialized ? */ +- SK_U64 SenErrCts; /* Error trap counter */ +- SK_U64 SenWarnCts; /* Warning trap counter */ +- SK_U64 SenBegErrTS; /* Begin error timestamp */ +- SK_U64 SenBegWarnTS; /* Begin warning timestamp */ +- SK_U64 SenLastErrTrapTS; /* Last error trap timestamp */ +- SK_U64 SenLastErrLogTS; /* Last error log timestamp */ +- SK_U64 SenLastWarnTrapTS; /* Last warning trap timestamp */ +- SK_U64 SenLastWarnLogTS; /* Last warning log timestamp */ +- int SenState; /* Sensor State (see HW specific include) */ +- int (*SenRead)(SK_AC *pAC, SK_IOC IoC, struct s_Sensor *pSen); +- /* Sensors read function */ +- SK_U16 SenReg; /* Register Address for this sensor */ +- SK_U8 SenDev; /* Device Selection for this sensor */ +-}; +- +-typedef struct s_I2c { +- SK_SENSOR SenTable[SK_MAX_SENSORS]; /* Sensor Table */ +- int CurrSens; /* Which sensor is currently queried */ +- int MaxSens; /* Max. number of sensors */ +- int TimerMode; /* Use the timer also to watch the state machine */ +- int InitLevel; /* Initialized Level */ +-#ifndef SK_DIAG +- int DummyReads; /* Number of non-checked dummy reads */ +- SK_TIMER SenTimer; /* Sensors timer */ +-#endif /* !SK_DIAG */ +-} SK_I2C; +- +-extern int SkI2cInit(SK_AC *pAC, SK_IOC IoC, int Level); +-extern int SkI2cWrite(SK_AC *pAC, SK_IOC IoC, SK_U32 Data, int Dev, int Size, +- int Reg, int Burst); +-extern int SkI2cReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen); +-#ifdef SK_DIAG +-extern SK_U32 SkI2cRead(SK_AC *pAC, SK_IOC IoC, int Dev, int Size, int Reg, +- int Burst); +-#else /* !SK_DIAG */ +-extern int SkI2cEvent(SK_AC *pAC, SK_IOC IoC, SK_U32 Event, SK_EVPARA Para); +-extern void SkI2cWaitIrq(SK_AC *pAC, SK_IOC IoC); +-extern void SkI2cIsr(SK_AC *pAC, SK_IOC IoC); +-#endif /* !SK_DIAG */ +-#endif /* n_SKI2C_H */ +- +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skqueue.h linux-2.6.9.new/drivers/net/sk98lin/h/skqueue.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skqueue.h 2004-10-19 05:55:29.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skqueue.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skqueue.h + * Project: Gigabit Ethernet Adapters, Event Scheduler Module +- * Version: $Revision: 1.16 $ +- * Date: $Date: 2003/09/16 12:50:32 $ ++ * Version: $Revision: 2.3 $ ++ * Date: $Date: 2004/05/14 13:39:15 $ + * Purpose: Defines for the Event queue + * + ******************************************************************************/ +@@ -45,6 +45,9 @@ + #define SKGE_RSF 11 /* RSF Aggregation Event Class */ + #define SKGE_MARKER 12 /* MARKER Aggregation Event Class */ + #define SKGE_FD 13 /* FD Distributor Event Class */ ++#ifdef SK_ASF ++#define SKGE_ASF 14 /* ASF Event Class */ ++#endif + + /* + * define event queue as circular buffer +@@ -90,5 +93,11 @@ + #define SKERR_Q_E001MSG "Event queue overflow" + #define SKERR_Q_E002 (SKERR_Q_E001+1) + #define SKERR_Q_E002MSG "Undefined event class" ++#define SKERR_Q_E003 (SKERR_Q_E001+2) ++#define SKERR_Q_E003MSG "Event queued in Init Level 0" ++#define SKERR_Q_E004 (SKERR_Q_E001+3) ++#define SKERR_Q_E004MSG "Error Reported from Event Fuction (Queue Blocked)" ++#define SKERR_Q_E005 (SKERR_Q_E001+4) ++#define SKERR_Q_E005MSG "Event scheduler called in Init Level 0 or 1" + #endif /* _SKQUEUE_H_ */ + +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skrlmt.h linux-2.6.9.new/drivers/net/sk98lin/h/skrlmt.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skrlmt.h 2004-10-19 05:55:36.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skrlmt.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skrlmt.h + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.37 $ +- * Date: $Date: 2003/04/15 09:43:43 $ ++ * Version: $Revision: 2.1 $ ++ * Date: $Date: 2003/10/27 14:16:09 $ + * Purpose: Header file for Redundant Link ManagemenT. + * + ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/sktimer.h linux-2.6.9.new/drivers/net/sk98lin/h/sktimer.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/sktimer.h 2004-10-19 05:53:35.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/sktimer.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: sktimer.h + * Project: Gigabit Ethernet Adapters, Event Scheduler Module +- * Version: $Revision: 1.11 $ +- * Date: $Date: 2003/09/16 12:58:18 $ ++ * Version: $Revision: 2.1 $ ++ * Date: $Date: 2003/10/27 14:16:09 $ + * Purpose: Defines for the timer functions + * + ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/sktwsi.h linux-2.6.9.new/drivers/net/sk98lin/h/sktwsi.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/sktwsi.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/sktwsi.h 2006-12-07 14:35:03.000000000 +0800 +@@ -0,0 +1,177 @@ ++/****************************************************************************** ++ * ++ * Name: sktwsi.h ++ * Project: Gigabit Ethernet Adapters, TWSI-Module ++ * Version: $Revision: 1.1 $ ++ * Date: $Date: 2003/12/19 14:02:56 $ ++ * Purpose: Defines to access Voltage and Temperature Sensor ++ * ++ ******************************************************************************/ ++ ++/****************************************************************************** ++ * ++ * (C)Copyright 1998-2002 SysKonnect. ++ * (C)Copyright 2002-2003 Marvell. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * The information in this file is provided "AS IS" without warranty. ++ * ++ ******************************************************************************/ ++ ++/* ++ * SKTWSI.H contains all TWSI specific defines ++ */ ++ ++#ifndef _SKTWSI_H_ ++#define _SKTWSI_H_ ++ ++typedef struct s_Sensor SK_SENSOR; ++ ++#include "h/skgetwsi.h" ++ ++/* ++ * Define the TWSI events. ++ */ ++#define SK_I2CEV_IRQ 1 /* IRQ happened Event */ ++#define SK_I2CEV_TIM 2 /* Timeout event */ ++#define SK_I2CEV_CLEAR 3 /* Clear MIB Values */ ++ ++/* ++ * Define READ and WRITE Constants. ++ */ ++#define I2C_READ 0 ++#define I2C_WRITE 1 ++#define I2C_BURST 1 ++#define I2C_SINGLE 0 ++ ++#define SKERR_I2C_E001 (SK_ERRBASE_I2C+0) ++#define SKERR_I2C_E001MSG "Sensor index unknown" ++#define SKERR_I2C_E002 (SKERR_I2C_E001+1) ++#define SKERR_I2C_E002MSG "TWSI: transfer does not complete" ++#define SKERR_I2C_E003 (SKERR_I2C_E002+1) ++#define SKERR_I2C_E003MSG "LM80: NAK on device send" ++#define SKERR_I2C_E004 (SKERR_I2C_E003+1) ++#define SKERR_I2C_E004MSG "LM80: NAK on register send" ++#define SKERR_I2C_E005 (SKERR_I2C_E004+1) ++#define SKERR_I2C_E005MSG "LM80: NAK on device (2) send" ++#define SKERR_I2C_E006 (SKERR_I2C_E005+1) ++#define SKERR_I2C_E006MSG "Unknown event" ++#define SKERR_I2C_E007 (SKERR_I2C_E006+1) ++#define SKERR_I2C_E007MSG "LM80 read out of state" ++#define SKERR_I2C_E008 (SKERR_I2C_E007+1) ++#define SKERR_I2C_E008MSG "Unexpected sensor read completed" ++#define SKERR_I2C_E009 (SKERR_I2C_E008+1) ++#define SKERR_I2C_E009MSG "WARNING: temperature sensor out of range" ++#define SKERR_I2C_E010 (SKERR_I2C_E009+1) ++#define SKERR_I2C_E010MSG "WARNING: voltage sensor out of range" ++#define SKERR_I2C_E011 (SKERR_I2C_E010+1) ++#define SKERR_I2C_E011MSG "ERROR: temperature sensor out of range" ++#define SKERR_I2C_E012 (SKERR_I2C_E011+1) ++#define SKERR_I2C_E012MSG "ERROR: voltage sensor out of range" ++#define SKERR_I2C_E013 (SKERR_I2C_E012+1) ++#define SKERR_I2C_E013MSG "ERROR: couldn't init sensor" ++#define SKERR_I2C_E014 (SKERR_I2C_E013+1) ++#define SKERR_I2C_E014MSG "WARNING: fan sensor out of range" ++#define SKERR_I2C_E015 (SKERR_I2C_E014+1) ++#define SKERR_I2C_E015MSG "ERROR: fan sensor out of range" ++#define SKERR_I2C_E016 (SKERR_I2C_E015+1) ++#define SKERR_I2C_E016MSG "TWSI: active transfer does not complete" ++ ++/* ++ * Define Timeout values ++ */ ++#define SK_I2C_TIM_LONG 2000000L /* 2 seconds */ ++#define SK_I2C_TIM_SHORT 100000L /* 100 milliseconds */ ++#define SK_I2C_TIM_WATCH 1000000L /* 1 second */ ++ ++/* ++ * Define trap and error log hold times ++ */ ++#ifndef SK_SEN_ERR_TR_HOLD ++#define SK_SEN_ERR_TR_HOLD (4*SK_TICKS_PER_SEC) ++#endif ++#ifndef SK_SEN_ERR_LOG_HOLD ++#define SK_SEN_ERR_LOG_HOLD (60*SK_TICKS_PER_SEC) ++#endif ++#ifndef SK_SEN_WARN_TR_HOLD ++#define SK_SEN_WARN_TR_HOLD (15*SK_TICKS_PER_SEC) ++#endif ++#ifndef SK_SEN_WARN_LOG_HOLD ++#define SK_SEN_WARN_LOG_HOLD (15*60*SK_TICKS_PER_SEC) ++#endif ++ ++/* ++ * Defines for SenType ++ */ ++#define SK_SEN_UNKNOWN 0 ++#define SK_SEN_TEMP 1 ++#define SK_SEN_VOLT 2 ++#define SK_SEN_FAN 3 ++ ++/* ++ * Define for the SenErrorFlag ++ */ ++#define SK_SEN_ERR_NOT_PRESENT 0 /* Error Flag: Sensor not present */ ++#define SK_SEN_ERR_OK 1 /* Error Flag: O.K. */ ++#define SK_SEN_ERR_WARN 2 /* Error Flag: Warning */ ++#define SK_SEN_ERR_ERR 3 /* Error Flag: Error */ ++#define SK_SEN_ERR_FAULTY 4 /* Error Flag: Faulty */ ++ ++/* ++ * Define the Sensor struct ++ */ ++struct s_Sensor { ++ char *SenDesc; /* Description */ ++ int SenType; /* Voltage or Temperature */ ++ SK_I32 SenValue; /* Current value of the sensor */ ++ SK_I32 SenThreErrHigh; /* High error Threshhold of this sensor */ ++ SK_I32 SenThreWarnHigh; /* High warning Threshhold of this sensor */ ++ SK_I32 SenThreErrLow; /* Lower error Threshold of the sensor */ ++ SK_I32 SenThreWarnLow; /* Lower warning Threshold of the sensor */ ++ int SenErrFlag; /* Sensor indicated an error */ ++ SK_BOOL SenInit; /* Is sensor initialized ? */ ++ SK_U64 SenErrCts; /* Error trap counter */ ++ SK_U64 SenWarnCts; /* Warning trap counter */ ++ SK_U64 SenBegErrTS; /* Begin error timestamp */ ++ SK_U64 SenBegWarnTS; /* Begin warning timestamp */ ++ SK_U64 SenLastErrTrapTS; /* Last error trap timestamp */ ++ SK_U64 SenLastErrLogTS; /* Last error log timestamp */ ++ SK_U64 SenLastWarnTrapTS; /* Last warning trap timestamp */ ++ SK_U64 SenLastWarnLogTS; /* Last warning log timestamp */ ++ int SenState; /* Sensor State (see HW specific include) */ ++ int (*SenRead)(SK_AC *pAC, SK_IOC IoC, struct s_Sensor *pSen); ++ /* Sensors read function */ ++ SK_U16 SenReg; /* Register Address for this sensor */ ++ SK_U8 SenDev; /* Device Selection for this sensor */ ++}; ++ ++typedef struct s_I2c { ++ SK_SENSOR SenTable[SK_MAX_SENSORS]; /* Sensor Table */ ++ int CurrSens; /* Which sensor is currently queried */ ++ int MaxSens; /* Max. number of sensors */ ++ int TimerMode; /* Use the timer also to watch the state machine */ ++ int InitLevel; /* Initialized Level */ ++#ifndef SK_DIAG ++ int DummyReads; /* Number of non-checked dummy reads */ ++ SK_TIMER SenTimer; /* Sensors timer */ ++#endif /* !SK_DIAG */ ++} SK_I2C; ++ ++extern int SkI2cInit(SK_AC *pAC, SK_IOC IoC, int Level); ++extern int SkI2cWrite(SK_AC *pAC, SK_IOC IoC, SK_U32 Data, int Dev, int Size, ++ int Reg, int Burst); ++extern int SkI2cReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen); ++#ifdef SK_DIAG ++extern SK_U32 SkI2cRead(SK_AC *pAC, SK_IOC IoC, int Dev, int Size, int Reg, ++ int Burst); ++#else /* !SK_DIAG */ ++extern int SkI2cEvent(SK_AC *pAC, SK_IOC IoC, SK_U32 Event, SK_EVPARA Para); ++extern void SkI2cWaitIrq(SK_AC *pAC, SK_IOC IoC); ++extern void SkI2cIsr(SK_AC *pAC, SK_IOC IoC); ++#endif /* !SK_DIAG */ ++#endif /* n_SKTWSI_H */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/sktypes.h linux-2.6.9.new/drivers/net/sk98lin/h/sktypes.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/sktypes.h 2004-10-19 05:55:35.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/sktypes.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: sktypes.h + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.2 $ +- * Date: $Date: 2003/10/07 08:16:51 $ ++ * Version: $Revision: 1.2.2.1 $ ++ * Date: $Date: 2005/04/11 09:00:53 $ + * Purpose: Define data types for Linux + * + ******************************************************************************/ +@@ -11,7 +11,7 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -22,48 +22,28 @@ + * + ******************************************************************************/ + +-/****************************************************************************** +- * +- * Description: +- * +- * In this file, all data types that are needed by the common modules +- * are mapped to Linux data types. +- * +- * +- * Include File Hierarchy: +- * +- * +- ******************************************************************************/ +- + #ifndef __INC_SKTYPES_H + #define __INC_SKTYPES_H + +- +-/* defines *******************************************************************/ +- +-/* +- * Data types with a specific size. 'I' = signed, 'U' = unsigned. +- */ +-#define SK_I8 s8 +-#define SK_U8 u8 +-#define SK_I16 s16 +-#define SK_U16 u16 +-#define SK_I32 s32 +-#define SK_U32 u32 +-#define SK_I64 s64 +-#define SK_U64 u64 +- +-#define SK_UPTR ulong /* casting pointer <-> integral */ +- +-/* +-* Boolean type. +-*/ +-#define SK_BOOL SK_U8 +-#define SK_FALSE 0 +-#define SK_TRUE (!SK_FALSE) +- +-/* typedefs *******************************************************************/ +- +-/* function prototypes ********************************************************/ ++#define SK_I8 s8 /* 8 bits (1 byte) signed */ ++#define SK_U8 u8 /* 8 bits (1 byte) unsigned */ ++#define SK_I16 s16 /* 16 bits (2 bytes) signed */ ++#define SK_U16 u16 /* 16 bits (2 bytes) unsigned */ ++#define SK_I32 s32 /* 32 bits (4 bytes) signed */ ++#define SK_U32 u32 /* 32 bits (4 bytes) unsigned */ ++#define SK_I64 s64 /* 64 bits (8 bytes) signed */ ++#define SK_U64 u64 /* 64 bits (8 bytes) unsigned */ ++ ++#define SK_UPTR ulong /* casting pointer <-> integral */ ++ ++#define SK_BOOL SK_U8 ++#define SK_FALSE 0 ++#define SK_TRUE (!SK_FALSE) + + #endif /* __INC_SKTYPES_H */ ++ ++/******************************************************************************* ++ * ++ * End of file ++ * ++ ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skversion.h linux-2.6.9.new/drivers/net/sk98lin/h/skversion.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skversion.h 2004-10-19 05:54:32.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skversion.h 2006-12-07 14:35:03.000000000 +0800 +@@ -1,17 +1,17 @@ + /****************************************************************************** + * +- * Name: version.h ++ * Name: skversion.h + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.5 $ +- * Date: $Date: 2003/10/07 08:16:51 $ +- * Purpose: SK specific Error log support ++ * Version: $Revision: 1.3.2.1 $ ++ * Date: $Date: 2005/04/11 09:00:53 $ ++ * Purpose: specific version strings and numbers + * + ******************************************************************************/ + + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -22,17 +22,15 @@ + * + ******************************************************************************/ + +-#ifdef lint +-static const char SysKonnectFileId[] = "@(#) (C) SysKonnect GmbH."; +-static const char SysKonnectBuildNumber[] = +- "@(#)SK-BUILD: 6.23 PL: 01"; +-#endif /* !defined(lint) */ +- +-#define BOOT_STRING "sk98lin: Network Device Driver v6.23\n" \ +- "(C)Copyright 1999-2004 Marvell(R)." +- +-#define VER_STRING "6.23" +-#define DRIVER_FILE_NAME "sk98lin" +-#define DRIVER_REL_DATE "Feb-13-2004" +- ++#define BOOT_STRING "sk98lin: Network Device Driver v8.23.1.3\n" \ ++ "(C)Copyright 1999-2005 Marvell(R)." ++#define VER_STRING "8.23.1.3" ++#define PATCHLEVEL "01" ++#define DRIVER_FILE_NAME "sk98lin" ++#define DRIVER_REL_DATE "Jun-20-2005" + ++/******************************************************************************* ++ * ++ * End of file ++ * ++ ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/skvpd.h linux-2.6.9.new/drivers/net/sk98lin/h/skvpd.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/skvpd.h 2004-10-19 05:53:46.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/skvpd.h 2006-12-07 14:35:03.000000000 +0800 +@@ -1,22 +1,22 @@ + /****************************************************************************** + * + * Name: skvpd.h +- * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.15 $ +- * Date: $Date: 2003/01/13 10:39:38 $ ++ * Project: Gigabit Ethernet Adapters, VPD-Module ++ * Version: $Revision: 2.6 $ ++ * Date: $Date: 2004/11/09 15:18:00 $ + * Purpose: Defines and Macros for VPD handling + * + ******************************************************************************/ + + /****************************************************************************** + * +- * (C)Copyright 1998-2003 SysKonnect GmbH. ++ * (C)Copyright 1998-2002 SysKonnect. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -31,7 +31,7 @@ + /* + * Define Resource Type Identifiers and VPD keywords + */ +-#define RES_ID 0x82 /* Resource Type ID String (Product Name) */ ++#define RES_ID 0x82 /* Resource Type ID String (Product Name) */ + #define RES_VPD_R 0x90 /* start of VPD read only area */ + #define RES_VPD_W 0x91 /* start of VPD read/write area */ + #define RES_END 0x78 /* Resource Type End Tag */ +@@ -40,14 +40,16 @@ + #define VPD_NAME "Name" /* Product Name, VPD name of RES_ID */ + #endif /* VPD_NAME */ + #define VPD_PN "PN" /* Adapter Part Number */ +-#define VPD_EC "EC" /* Adapter Engineering Level */ ++#define VPD_EC "EC" /* Adapter Engineering Level */ + #define VPD_MN "MN" /* Manufacture ID */ + #define VPD_SN "SN" /* Serial Number */ + #define VPD_CP "CP" /* Extended Capability */ + #define VPD_RV "RV" /* Checksum and Reserved */ +-#define VPD_YA "YA" /* Asset Tag Identifier */ ++#define VPD_YA "YA" /* Asset Tag Identifier */ + #define VPD_VL "VL" /* First Error Log Message (SK specific) */ + #define VPD_VF "VF" /* Second Error Log Message (SK specific) */ ++#define VPD_VB "VB" /* Boot Agent ROM Configuration (SK specific) */ ++#define VPD_VE "VE" /* EFI UNDI Configuration (SK specific) */ + #define VPD_RW "RW" /* Remaining Read / Write Area */ + + /* 'type' values for vpd_setup_para() */ +@@ -55,7 +57,7 @@ + #define VPD_RW_KEY 2 /* RW keys are "Yx", "Vx", and "RW" */ + + /* 'op' values for vpd_setup_para() */ +-#define ADD_KEY 1 /* add the key at the pos "RV" or "RW" */ ++#define ADD_KEY 1 /* add the key at the pos "RV" or "RW" */ + #define OWR_KEY 2 /* overwrite key if already exists */ + + /* +@@ -64,18 +66,18 @@ + + #define VPD_DEV_ID_GENESIS 0x4300 + +-#define VPD_SIZE_YUKON 256 +-#define VPD_SIZE_GENESIS 512 +-#define VPD_SIZE 512 ++#define VPD_SIZE_YUKON 256 ++#define VPD_SIZE_GENESIS 512 ++#define VPD_SIZE 512 + #define VPD_READ 0x0000 + #define VPD_WRITE 0x8000 + + #define VPD_STOP(pAC,IoC) VPD_OUT16(pAC,IoC,PCI_VPD_ADR_REG,VPD_WRITE) + +-#define VPD_GET_RES_LEN(p) ((unsigned int) \ +- (* (SK_U8 *)&(p)[1]) |\ +- ((* (SK_U8 *)&(p)[2]) << 8)) +-#define VPD_GET_VPD_LEN(p) ((unsigned int)(* (SK_U8 *)&(p)[2])) ++#define VPD_GET_RES_LEN(p) ((unsigned int)\ ++ (*(SK_U8 *)&(p)[1]) |\ ++ ((*(SK_U8 *)&(p)[2]) << 8)) ++#define VPD_GET_VPD_LEN(p) ((unsigned int)(*(SK_U8 *)&(p)[2])) + #define VPD_GET_VAL(p) ((char *)&(p)[3]) + + #define VPD_MAX_LEN 50 +@@ -126,7 +128,7 @@ + /* + * System specific VPD macros + */ +-#ifndef SKDIAG ++#ifndef SK_DIAG + #ifndef VPD_DO_IO + #define VPD_OUT8(pAC,IoC,Addr,Val) (void)SkPciWriteCfgByte(pAC,Addr,Val) + #define VPD_OUT16(pAC,IoC,Addr,Val) (void)SkPciWriteCfgWord(pAC,Addr,Val) +@@ -135,61 +137,61 @@ + #define VPD_IN16(pAC,IoC,Addr,pVal) (void)SkPciReadCfgWord(pAC,Addr,pVal) + #define VPD_IN32(pAC,IoC,Addr,pVal) (void)SkPciReadCfgDWord(pAC,Addr,pVal) + #else /* VPD_DO_IO */ +-#define VPD_OUT8(pAC,IoC,Addr,Val) SK_OUT8(IoC,PCI_C(Addr),Val) +-#define VPD_OUT16(pAC,IoC,Addr,Val) SK_OUT16(IoC,PCI_C(Addr),Val) +-#define VPD_OUT32(pAC,IoC,Addr,Val) SK_OUT32(IoC,PCI_C(Addr),Val) +-#define VPD_IN8(pAC,IoC,Addr,pVal) SK_IN8(IoC,PCI_C(Addr),pVal) +-#define VPD_IN16(pAC,IoC,Addr,pVal) SK_IN16(IoC,PCI_C(Addr),pVal) +-#define VPD_IN32(pAC,IoC,Addr,pVal) SK_IN32(IoC,PCI_C(Addr),pVal) ++#define VPD_OUT8(pAC,IoC,Addr,Val) SK_OUT8(IoC,PCI_C(pAC,Addr),Val) ++#define VPD_OUT16(pAC,IoC,Addr,Val) SK_OUT16(IoC,PCI_C(pAC,Addr),Val) ++#define VPD_OUT32(pAC,IoC,Addr,Val) SK_OUT32(IoC,PCI_C(pAC,Addr),Val) ++#define VPD_IN8(pAC,IoC,Addr,pVal) SK_IN8(IoC,PCI_C(pAC,Addr),pVal) ++#define VPD_IN16(pAC,IoC,Addr,pVal) SK_IN16(IoC,PCI_C(pAC,Addr),pVal) ++#define VPD_IN32(pAC,IoC,Addr,pVal) SK_IN32(IoC,PCI_C(pAC,Addr),pVal) + #endif /* VPD_DO_IO */ +-#else /* SKDIAG */ ++#else /* SK_DIAG */ + #define VPD_OUT8(pAC,Ioc,Addr,Val) { \ + if ((pAC)->DgT.DgUseCfgCycle) \ + SkPciWriteCfgByte(pAC,Addr,Val); \ + else \ +- SK_OUT8(pAC,PCI_C(Addr),Val); \ ++ SK_OUT8(pAC,PCI_C(pAC,Addr),Val); \ + } + #define VPD_OUT16(pAC,Ioc,Addr,Val) { \ + if ((pAC)->DgT.DgUseCfgCycle) \ + SkPciWriteCfgWord(pAC,Addr,Val); \ + else \ +- SK_OUT16(pAC,PCI_C(Addr),Val); \ ++ SK_OUT16(pAC,PCI_C(pAC,Addr),Val); \ + } + #define VPD_OUT32(pAC,Ioc,Addr,Val) { \ + if ((pAC)->DgT.DgUseCfgCycle) \ + SkPciWriteCfgDWord(pAC,Addr,Val); \ + else \ +- SK_OUT32(pAC,PCI_C(Addr),Val); \ ++ SK_OUT32(pAC,PCI_C(pAC,Addr),Val); \ + } + #define VPD_IN8(pAC,Ioc,Addr,pVal) { \ +- if ((pAC)->DgT.DgUseCfgCycle) \ ++ if ((pAC)->DgT.DgUseCfgCycle) \ + SkPciReadCfgByte(pAC,Addr,pVal); \ + else \ +- SK_IN8(pAC,PCI_C(Addr),pVal); \ ++ SK_IN8(pAC,PCI_C(pAC,Addr),pVal); \ + } + #define VPD_IN16(pAC,Ioc,Addr,pVal) { \ +- if ((pAC)->DgT.DgUseCfgCycle) \ ++ if ((pAC)->DgT.DgUseCfgCycle) \ + SkPciReadCfgWord(pAC,Addr,pVal); \ + else \ +- SK_IN16(pAC,PCI_C(Addr),pVal); \ ++ SK_IN16(pAC,PCI_C(pAC,Addr),pVal); \ + } + #define VPD_IN32(pAC,Ioc,Addr,pVal) { \ + if ((pAC)->DgT.DgUseCfgCycle) \ + SkPciReadCfgDWord(pAC,Addr,pVal); \ + else \ +- SK_IN32(pAC,PCI_C(Addr),pVal); \ ++ SK_IN32(pAC,PCI_C(pAC,Addr),pVal); \ + } +-#endif /* nSKDIAG */ ++#endif /* SK_DIAG */ + + /* function prototypes ********************************************************/ + + #ifndef SK_KR_PROTO +-#ifdef SKDIAG ++#ifdef SK_DIAG + extern SK_U32 VpdReadDWord( + SK_AC *pAC, + SK_IOC IoC, + int addr); +-#endif /* SKDIAG */ ++#endif /* SK_DIAG */ + + extern int VpdSetupPara( + SK_AC *pAC, +@@ -240,7 +242,12 @@ + SK_IOC IoC, + char *msg); + +-#ifdef SKDIAG ++int VpdInit( ++ SK_AC *pAC, ++ SK_IOC IoC); ++ ++#if defined(SK_DIAG) || defined(SK_ASF) ++ + extern int VpdReadBlock( + SK_AC *pAC, + SK_IOC IoC, +@@ -254,7 +261,9 @@ + char *buf, + int addr, + int len); +-#endif /* SKDIAG */ ++ ++#endif /* SK_DIAG || SK_ASF */ ++ + #else /* SK_KR_PROTO */ + extern SK_U32 VpdReadDWord(); + extern int VpdSetupPara(); +@@ -269,3 +278,4 @@ + #endif /* SK_KR_PROTO */ + + #endif /* __INC_SKVPD_H_ */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/sky2le.h linux-2.6.9.new/drivers/net/sk98lin/h/sky2le.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/sky2le.h 1970-01-01 08:00:00.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/sky2le.h 2006-12-07 14:35:03.000000000 +0800 +@@ -0,0 +1,891 @@ ++/****************************************************************************** ++ * ++ * Name: sky2le.h ++ * Project: Gigabit Ethernet Adapters, Common Modules ++ * Version: $Revision: 1.9 $ ++ * Date: $Date: 2005/01/26 10:53:34 $ ++ * Purpose: Common list element definitions and access macros. ++ * ++ ******************************************************************************/ ++ ++/****************************************************************************** ++ * ++ * (C)Copyright 2003-2004 Marvell ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * The information in this file is provided "AS IS" without warranty. ++ * ++ ******************************************************************************/ ++ ++#ifndef __INC_SKY2LE_H ++#define __INC_SKY2LE_H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif /* __cplusplus */ ++ ++/* defines ********************************************************************/ ++ ++#define MIN_LEN_OF_LE_TAB 128 ++#define MAX_LEN_OF_LE_TAB 4096 ++#ifdef USE_POLLING_UNIT ++#define NUM_LE_POLLING_UNIT 2 ++#endif ++#define MAX_FRAG_OVERHEAD 10 ++ ++/* Macro for aligning a given value */ ++#define SK_ALIGN_SIZE(Value, Alignment, AlignedVal) { \ ++ (AlignedVal) = (((Value) + (Alignment) - 1) & (~((Alignment) - 1)));\ ++} ++ ++/****************************************************************************** ++ * ++ * LE2DWord() - Converts the given Little Endian value to machine order value ++ * ++ * Description: ++ * This function converts the Little Endian value received as an argument to ++ * the machine order value. ++ * ++ * Returns: ++ * The converted value ++ * ++ */ ++ ++#ifdef SK_LITTLE_ENDIAN ++ ++#ifndef SK_USE_REV_DESC ++#define LE2DWord(value) (value) ++#else /* SK_USE_REV_DESC */ ++#define LE2DWord(value) \ ++ ((((value)<<24L) & 0xff000000L) + \ ++ (((value)<< 8L) & 0x00ff0000L) + \ ++ (((value)>> 8L) & 0x0000ff00L) + \ ++ (((value)>>24L) & 0x000000ffL)) ++#endif /* SK_USE_REV_DESC */ ++ ++#else /* !SK_LITTLE_ENDIAN */ ++ ++#ifndef SK_USE_REV_DESC ++#define LE2DWord(value) \ ++ ((((value)<<24L) & 0xff000000L) + \ ++ (((value)<< 8L) & 0x00ff0000L) + \ ++ (((value)>> 8L) & 0x0000ff00L) + \ ++ (((value)>>24L) & 0x000000ffL)) ++#else /* SK_USE_REV_DESC */ ++#define LE2DWord(value) (value) ++#endif /* SK_USE_REV_DESC */ ++ ++#endif /* !SK_LITTLE_ENDIAN */ ++ ++/****************************************************************************** ++ * ++ * DWord2LE() - Converts the given value to a Little Endian value ++ * ++ * Description: ++ * This function converts the value received as an argument to a Little Endian ++ * value on Big Endian machines. If the machine running the code is Little ++ * Endian, then no conversion is done. ++ * ++ * Returns: ++ * The converted value ++ * ++ */ ++ ++#ifdef SK_LITTLE_ENDIAN ++ ++#ifndef SK_USE_REV_DESC ++#define DWord2LE(value) (value) ++#else /* SK_USE_REV_DESC */ ++#define DWord2LE(value) \ ++ ((((value)<<24L) & 0xff000000L) + \ ++ (((value)<< 8L) & 0x00ff0000L) + \ ++ (((value)>> 8L) & 0x0000ff00L) + \ ++ (((value)>>24L) & 0x000000ffL)) ++#endif /* SK_USE_REV_DESC */ ++ ++#else /* !SK_LITTLE_ENDIAN */ ++ ++#ifndef SK_USE_REV_DESC ++#define DWord2LE(value) \ ++ ((((value)<<24L) & 0xff000000L) + \ ++ (((value)<< 8L) & 0x00ff0000L) + \ ++ (((value)>> 8L) & 0x0000ff00L) + \ ++ (((value)>>24L) & 0x000000ffL)) ++#else /* SK_USE_REV_DESC */ ++#define DWord2LE(value) (value) ++#endif /* SK_USE_REV_DESC */ ++#endif /* !SK_LITTLE_ENDIAN */ ++ ++/****************************************************************************** ++ * ++ * LE2Word() - Converts the given Little Endian value to machine order value ++ * ++ * Description: ++ * This function converts the Little Endian value received as an argument to ++ * the machine order value. ++ * ++ * Returns: ++ * The converted value ++ * ++ */ ++ ++#ifdef SK_LITTLE_ENDIAN ++#ifndef SK_USE_REV_DESC ++#define LE2Word(value) (value) ++#else /* SK_USE_REV_DESC */ ++#define LE2Word(value) \ ++ ((((value)<< 8L) & 0xff00) + \ ++ (((value)>> 8L) & 0x00ff)) ++#endif /* SK_USE_REV_DESC */ ++ ++#else /* !SK_LITTLE_ENDIAN */ ++#ifndef SK_USE_REV_DESC ++#define LE2Word(value) \ ++ ((((value)<< 8L) & 0xff00) + \ ++ (((value)>> 8L) & 0x00ff)) ++#else /* SK_USE_REV_DESC */ ++#define LE2Word(value) (value) ++#endif /* SK_USE_REV_DESC */ ++#endif /* !SK_LITTLE_ENDIAN */ ++ ++/****************************************************************************** ++ * ++ * Word2LE() - Converts the given value to a Little Endian value ++ * ++ * Description: ++ * This function converts the value received as an argument to a Little Endian ++ * value on Big Endian machines. If the machine running the code is Little ++ * Endian, then no conversion is done. ++ * ++ * Returns: ++ * The converted value ++ * ++ */ ++ ++#ifdef SK_LITTLE_ENDIAN ++#ifndef SK_USE_REV_DESC ++#define Word2LE(value) (value) ++#else /* SK_USE_REV_DESC */ ++#define Word2LE(value) \ ++ ((((value)<< 8L) & 0xff00) + \ ++ (((value)>> 8L) & 0x00ff)) ++#endif /* SK_USE_REV_DESC */ ++ ++#else /* !SK_LITTLE_ENDIAN */ ++#ifndef SK_USE_REV_DESC ++#define Word2LE(value) \ ++ ((((value)<< 8L) & 0xff00) + \ ++ (((value)>> 8L) & 0x00ff)) ++#else /* SK_USE_REV_DESC */ ++#define Word2LE(value) (value) ++#endif /* SK_USE_REV_DESC */ ++#endif /* !SK_LITTLE_ENDIAN */ ++ ++/****************************************************************************** ++ * ++ * Transmit list element macros ++ * ++ */ ++ ++#define TXLE_SET_ADDR(pLE, Addr) \ ++ ((pLE)->Tx.TxUn.BufAddr = DWord2LE(Addr)) ++#define TXLE_SET_LSLEN(pLE, Len) \ ++ ((pLE)->Tx.TxUn.LargeSend.Length = Word2LE(Len)) ++#define TXLE_SET_STACS(pLE, Start) \ ++ ((pLE)->Tx.TxUn.ChkSum.TxTcpSp = Word2LE(Start)) ++#define TXLE_SET_WRICS(pLE, Write) \ ++ ((pLE)->Tx.TxUn.ChkSum.TxTcpWp = Word2LE(Write)) ++#define TXLE_SET_INICS(pLE, Ini) ((pLE)->Tx.Send.InitCsum = Word2LE(Ini)) ++#define TXLE_SET_LEN(pLE, Len) ((pLE)->Tx.Send.BufLen = Word2LE(Len)) ++#define TXLE_SET_VLAN(pLE, Vlan) ((pLE)->Tx.Send.VlanTag = Word2LE(Vlan)) ++#define TXLE_SET_LCKCS(pLE, Lock) ((pLE)->Tx.ControlFlags = (Lock)) ++#define TXLE_SET_CTRL(pLE, Ctrl) ((pLE)->Tx.ControlFlags = (Ctrl)) ++#define TXLE_SET_OPC(pLE, Opc) ((pLE)->Tx.Opcode = (Opc)) ++ ++#define TXLE_GET_ADDR(pLE) LE2DWord((pLE)->Tx.TxUn.BufAddr) ++#define TXLE_GET_LSLEN(pLE) LE2Word((pLE)->Tx.TxUn.LargeSend.Length) ++#define TXLE_GET_STACS(pLE) LE2Word((pLE)->Tx.TxUn.ChkSum.TxTcpSp) ++#define TXLE_GET_WRICS(pLE) LE2Word((pLE)->Tx.TxUn.ChkSum.TxTcpWp) ++#define TXLE_GET_INICS(pLE) LE2Word((pLE)->Tx.Send.InitCsum) ++#define TXLE_GET_LEN(pLE) LE2Word((pLE)->Tx.Send.BufLen) ++#define TXLE_GET_VLAN(pLE) LE2Word((pLE)->Tx.Send.VlanTag) ++#define TXLE_GET_LCKCS(pLE) ((pLE)->Tx.ControlFlags) ++#define TXLE_GET_CTRL(pLE) ((pLE)->Tx.ControlFlags) ++#define TXLE_GET_OPC(pLE) ((pLE)->Tx.Opcode) ++ ++/****************************************************************************** ++ * ++ * Receive list element macros ++ * ++ */ ++ ++#define RXLE_SET_ADDR(pLE, Addr) \ ++ ((pLE)->Rx.RxUn.BufAddr = (SK_U32) DWord2LE(Addr)) ++#define RXLE_SET_STACS2(pLE, Offs) \ ++ ((pLE)->Rx.RxUn.ChkSum.RxTcpSp2 = Word2LE(Offs)) ++#define RXLE_SET_STACS1(pLE, Offs) \ ++ ((pLE)->Rx.RxUn.ChkSum.RxTcpSp1 = Word2LE(Offs)) ++#define RXLE_SET_LEN(pLE, Len) ((pLE)->Rx.BufferLength = Word2LE(Len)) ++#define RXLE_SET_CTRL(pLE, Ctrl) ((pLE)->Rx.ControlFlags = (Ctrl)) ++#define RXLE_SET_OPC(pLE, Opc) ((pLE)->Rx.Opcode = (Opc)) ++ ++#define RXLE_GET_ADDR(pLE) LE2DWord((pLE)->Rx.RxUn.BufAddr) ++#define RXLE_GET_STACS2(pLE) LE2Word((pLE)->Rx.RxUn.ChkSum.RxTcpSp2) ++#define RXLE_GET_STACS1(pLE) LE2Word((pLE)->Rx.RxUn.ChkSum.RxTcpSp1) ++#define RXLE_GET_LEN(pLE) LE2Word((pLE)->Rx.BufferLength) ++#define RXLE_GET_CTRL(pLE) ((pLE)->Rx.ControlFlags) ++#define RXLE_GET_OPC(pLE) ((pLE)->Rx.Opcode) ++ ++/****************************************************************************** ++ * ++ * Status list element macros ++ * ++ */ ++ ++#define STLE_SET_OPC(pLE, Opc) ((pLE)->St.Opcode = (Opc)) ++ ++#define STLE_GET_FRSTATUS(pLE) LE2DWord((pLE)->St.StUn.StRxStatWord) ++#define STLE_GET_TIST(pLE) LE2DWord((pLE)->St.StUn.StRxTimeStamp) ++#define STLE_GET_TCP1(pLE) LE2Word((pLE)->St.StUn.StRxTCPCSum.RxTCPSum1) ++#define STLE_GET_TCP2(pLE) LE2Word((pLE)->St.StUn.StRxTCPCSum.RxTCPSum2) ++#define STLE_GET_LEN(pLE) LE2Word((pLE)->St.Stat.BufLen) ++#define STLE_GET_VLAN(pLE) LE2Word((pLE)->St.Stat.VlanTag) ++#define STLE_GET_LINK(pLE) ((pLE)->St.Link) ++#define STLE_GET_OPC(pLE) ((pLE)->St.Opcode) ++#define STLE_GET_DONE_IDX(pLE,LowVal,HighVal) { \ ++ (LowVal) = LE2DWord((pLE)->St.StUn.StTxStatLow); \ ++ (HighVal) = LE2Word((pLE)->St.Stat.StTxStatHi); \ ++} ++ ++#define STLE_GET_RSS(pLE) LE2DWord((pLE)->St.StUn.StRxRssValue) ++#define STLE_GET_IPBIT(pLE) ((pLE)->St.Stat.Rss.FlagField & RSS_IP_FLAG) ++#define STLE_GET_TCPBIT(pLE) ((pLE)->St.Stat.Rss.FlagField & RSS_TCP_FLAG) ++ ++ ++/* I always take both values as a paramter to avoid typos */ ++#define STLE_GET_DONE_IDX_TXA1(LowVal,HighVal) \ ++ (((LowVal) & STLE_TXA1_MSKL) >> STLE_TXA1_SHIFTL) ++#define STLE_GET_DONE_IDX_TXS1(LowVal,HighVal) \ ++ ((LowVal & STLE_TXS1_MSKL) >> STLE_TXS1_SHIFTL) ++#define STLE_GET_DONE_IDX_TXA2(LowVal,HighVal) \ ++ (((LowVal & STLE_TXA2_MSKL) >> STLE_TXA2_SHIFTL) + \ ++ ((HighVal & STLE_TXA2_MSKH) << STLE_TXA2_SHIFTH)) ++#define STLE_GET_DONE_IDX_TXS2(LowVal,HighVal) \ ++ ((HighVal & STLE_TXS2_MSKH) >> STLE_TXS2_SHIFTH) ++ ++ ++#define SK_Y2_RXSTAT_CHECK_PKT(Len, RxStat, IsOk) { \ ++ (IsOk) = (((RxStat) & GMR_FS_RX_OK) != 0) && \ ++ (((RxStat) & GMR_FS_ANY_ERR) == 0); \ ++ \ ++ if ((IsOk) && ((SK_U16)(((RxStat) & GMR_FS_LEN_MSK) >> \ ++ GMR_FS_LEN_SHIFT) != (Len))) { \ ++ /* length in MAC status differs from length in LE */\ ++ (IsOk) = SK_FALSE; \ ++ } \ ++} ++ ++ ++/****************************************************************************** ++ * ++ * Polling unit list element macros ++ * ++ * NOTE: the Idx must be <= 0xfff and PU_PUTIDX_VALID makes them valid ++ * ++ */ ++ ++#ifdef USE_POLLING_UNIT ++ ++#define POLE_SET_OPC(pLE, Opc) ((pLE)->Sa.Opcode = (Opc)) ++#define POLE_SET_LINK(pLE, Port) ((pLE)->Sa.Link = (Port)) ++#define POLE_SET_RXIDX(pLE, Idx) ((pLE)->Sa.RxIdxVld = Word2LE(Idx)) ++#define POLE_SET_TXAIDX(pLE, Idx) ((pLE)->Sa.TxAIdxVld = Word2LE(Idx)) ++#define POLE_SET_TXSIDX(pLE, Idx) ((pLE)->Sa.TxSIdxVld = Word2LE(Idx)) ++ ++#define POLE_GET_OPC(pLE) ((pLE)->Sa.Opcode) ++#define POLE_GET_LINK(pLE) ((pLE)->Sa.Link) ++#define POLE_GET_RXIDX(pLE) LE2Word((pLE)->Sa.RxIdxVld) ++#define POLE_GET_TXAIDX(pLE) LE2Word((pLE)->Sa.TxAIdxVld) ++#define POLE_GET_TXSIDX(pLE) LE2Word((pLE)->Sa.TxSIdxVld) ++ ++#endif /* USE_POLLING_UNIT */ ++ ++/****************************************************************************** ++ * ++ * Debug macros for list elements ++ * ++ */ ++ ++#ifdef DEBUG ++ ++#define SK_DBG_DUMP_RX_LE(pLE) { \ ++ SK_U8 Opcode; \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=== RX_LIST_ELEMENT @addr: %p cont: %02x %02x %02x %02x %02x %02x %02x %02x\n", \ ++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\ ++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \ ++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (16bit) %04x %04x %04x %04x\n", \ ++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \ ++ ((SK_U16 *) pLE)[3])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (32bit) %08x %08x\n", \ ++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \ ++ Opcode = RXLE_GET_OPC(pLE); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == HW_OWNER) ? \ ++ "Hardware" : "Software")); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOpc: 0x%x ",Opcode)); \ ++ switch (Opcode & (~HW_OWNER)) { \ ++ case OP_BUFFER: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_BUFFER\n")); \ ++ break; \ ++ case OP_PACKET: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_PACKET\n")); \ ++ break; \ ++ case OP_ADDR64: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_ADDR64\n")); \ ++ break; \ ++ case OP_TCPSTART: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TCPPAR\n")); \ ++ break; \ ++ case SW_OWNER: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tunused LE\n")); \ ++ break; \ ++ default: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tunknown Opcode!!!\n")); \ ++ break; \ ++ } \ ++ if ((Opcode & OP_BUFFER) == OP_BUFFER) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tControl: 0x%x\n", RXLE_GET_CTRL(pLE))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tBufLen: 0x%x\n", RXLE_GET_LEN(pLE))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tLowAddr: 0x%x\n", RXLE_GET_ADDR(pLE))); \ ++ } \ ++ if ((Opcode & OP_ADDR64) == OP_ADDR64) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tHighAddr: 0x%x\n", RXLE_GET_ADDR(pLE))); \ ++ } \ ++ if ((Opcode & OP_TCPSTART) == OP_TCPSTART) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTCP Sum Start 1 : 0x%x\n", RXLE_GET_STACS1(pLE))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTCP Sum Start 2 : 0x%x\n", RXLE_GET_STACS2(pLE))); \ ++ } \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=====================\n")); \ ++} ++ ++#define SK_DBG_DUMP_TX_LE(pLE) { \ ++ SK_U8 Opcode; \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=== TX_LIST_ELEMENT @addr: %p cont: %02x %02x %02x %02x %02x %02x %02x %02x\n", \ ++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\ ++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \ ++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (16bit) %04x %04x %04x %04x\n", \ ++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \ ++ ((SK_U16 *) pLE)[3])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (32bit) %08x %08x\n", \ ++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \ ++ Opcode = TXLE_GET_OPC(pLE); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == HW_OWNER) ? \ ++ "Hardware" : "Software")); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOpc: 0x%x ",Opcode)); \ ++ switch (Opcode & (~HW_OWNER)) { \ ++ case OP_TCPCHKSUM: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TCPCHKSUM\n")); \ ++ break; \ ++ case OP_TCPIS: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TCPIS\n")); \ ++ break; \ ++ case OP_TCPLCK: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TCPLCK\n")); \ ++ break; \ ++ case OP_TCPLW: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TCPLW\n")); \ ++ break; \ ++ case OP_TCPLSW: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TCPLSW\n")); \ ++ break; \ ++ case OP_TCPLISW: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TCPLISW\n")); \ ++ break; \ ++ case OP_ADDR64: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_ADDR64\n")); \ ++ break; \ ++ case OP_VLAN: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_VLAN\n")); \ ++ break; \ ++ case OP_ADDR64VLAN: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_ADDR64VLAN\n")); \ ++ break; \ ++ case OP_LRGLEN: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_LRGLEN\n")); \ ++ break; \ ++ case OP_LRGLENVLAN: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_LRGLENVLAN\n")); \ ++ break; \ ++ case OP_BUFFER: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_BUFFER\n")); \ ++ break; \ ++ case OP_PACKET: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_PACKET\n")); \ ++ break; \ ++ case OP_LARGESEND: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_LARGESEND\n")); \ ++ break; \ ++ case SW_OWNER: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tunused LE\n")); \ ++ break; \ ++ default: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tunknown Opcode!!!\n")); \ ++ break; \ ++ } \ ++ if ((Opcode & OP_BUFFER) == OP_BUFFER) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tControl: 0x%x\n", TXLE_GET_CTRL(pLE))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tBufLen: 0x%x\n", TXLE_GET_LEN(pLE))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tLowAddr: 0x%x\n", TXLE_GET_ADDR(pLE))); \ ++ } \ ++ if ((Opcode & OP_ADDR64) == OP_ADDR64) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tHighAddr: 0x%x\n", TXLE_GET_ADDR(pLE))); \ ++ } \ ++ if ((Opcode & OP_VLAN) == OP_VLAN) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tVLAN Id: 0x%x\n", TXLE_GET_VLAN(pLE))); \ ++ } \ ++ if ((Opcode & OP_LRGLEN) == OP_LRGLEN) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tLarge send length: 0x%x\n", TXLE_GET_LSLEN(pLE))); \ ++ } \ ++ if ((Opcode &(~HW_OWNER)) <= OP_ADDR64) { \ ++ if ((Opcode & OP_TCPWRITE) == OP_TCPWRITE) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTCP Sum Write: 0x%x\n", TXLE_GET_WRICS(pLE))); \ ++ } \ ++ if ((Opcode & OP_TCPSTART) == OP_TCPSTART) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTCP Sum Start: 0x%x\n", TXLE_GET_STACS(pLE))); \ ++ } \ ++ if ((Opcode & OP_TCPINIT) == OP_TCPINIT) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTCP Sum Init: 0x%x\n", TXLE_GET_INICS(pLE))); \ ++ } \ ++ if ((Opcode & OP_TCPLCK) == OP_TCPLCK) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTCP Sum Lock: 0x%x\n", TXLE_GET_LCKCS(pLE))); \ ++ } \ ++ } \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=====================\n")); \ ++} ++ ++#define SK_DBG_DUMP_ST_LE(pLE) { \ ++ SK_U8 Opcode; \ ++ SK_U16 HighVal; \ ++ SK_U32 LowVal; \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=== ST_LIST_ELEMENT @addr: %p contains: %02x %02x %02x %02x %02x %02x %02x %02x\n",\ ++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\ ++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \ ++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (16bit) %04x %04x %04x %04x\n", \ ++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \ ++ ((SK_U16 *) pLE)[3])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (32bit) %08x %08x\n", \ ++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \ ++ Opcode = STLE_GET_OPC(pLE); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == SW_OWNER) ? \ ++ "Hardware" : "Software")); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOpc: 0x%x", Opcode)); \ ++ Opcode &= (~HW_OWNER); \ ++ switch (Opcode) { \ ++ case OP_RXSTAT: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_RXSTAT\n")); \ ++ break; \ ++ case OP_RXTIMESTAMP: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_RXTIMESTAMP\n")); \ ++ break; \ ++ case OP_RXVLAN: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_RXVLAN\n")); \ ++ break; \ ++ case OP_RXCHKS: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_RXCHKS\n")); \ ++ break; \ ++ case OP_RXCHKSVLAN: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_RXCHKSVLAN\n")); \ ++ break; \ ++ case OP_RXTIMEVLAN: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_RXTIMEVLAN\n")); \ ++ break; \ ++ case OP_RSS_HASH: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_RSS_HASH\n")); \ ++ break; \ ++ case OP_TXINDEXLE: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_TXINDEXLE\n")); \ ++ break; \ ++ case HW_OWNER: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tunused LE\n")); \ ++ break; \ ++ default: \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tunknown status list element!!!\n")); \ ++ break; \ ++ } \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tPort: %c\n", 'A' + STLE_GET_LINK(pLE))); \ ++ if (Opcode == OP_RXSTAT) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tFrameLen: 0x%x\n", STLE_GET_LEN(pLE))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tFrameStat: 0x%x\n", STLE_GET_FRSTATUS(pLE))); \ ++ } \ ++ if ((Opcode & OP_RXVLAN) == OP_RXVLAN) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tVLAN Id: 0x%x\n", STLE_GET_VLAN(pLE))); \ ++ } \ ++ if ((Opcode & OP_RXTIMESTAMP) == OP_RXTIMESTAMP) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTimestamp: 0x%x\n", STLE_GET_TIST(pLE))); \ ++ } \ ++ if ((Opcode & OP_RXCHKS) == OP_RXCHKS) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTCP: 0x%x 0x%x\n", STLE_GET_TCP1(pLE), \ ++ STLE_GET_TCP2(pLE))); \ ++ } \ ++ if (Opcode == OP_TXINDEXLE) { \ ++ STLE_GET_DONE_IDX(pLE, LowVal, HighVal); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTx Index TxA1: 0x%x\n", \ ++ STLE_GET_DONE_IDX_TXA1(LowVal,HighVal))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTx Index TxS1: 0x%x\n", \ ++ STLE_GET_DONE_IDX_TXS1(LowVal,HighVal))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTx Index TxA2: 0x%x\n", \ ++ STLE_GET_DONE_IDX_TXA2(LowVal,HighVal))); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTx Index TxS2: 0x%x\n", \ ++ STLE_GET_DONE_IDX_TXS2(LowVal,HighVal))); \ ++ } \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=====================\n")); \ ++} ++ ++#ifdef USE_POLLING_UNIT ++#define SK_DBG_DUMP_PO_LE(pLE) { \ ++ SK_U8 Opcode; \ ++ SK_U16 Idx; \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=== PO_LIST_ELEMENT @addr: %p cont: %02x %02x %02x %02x %02x %02x %02x %02x\n", \ ++ pLE, ((SK_U8 *) pLE)[0], ((SK_U8 *) pLE)[1], ((SK_U8 *) pLE)[2],\ ++ ((SK_U8 *) pLE)[3], ((SK_U8 *) pLE)[4], ((SK_U8 *) pLE)[5], \ ++ ((SK_U8 *) pLE)[6], ((SK_U8 *) pLE)[7])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (16bit) %04x %04x %04x %04x\n", \ ++ ((SK_U16 *) pLE)[0], ((SK_U16 *) pLE)[1], ((SK_U16 *) pLE)[2], \ ++ ((SK_U16 *) pLE)[3])); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\t (32bit) %08x %08x\n", \ ++ ((SK_U32 *) pLE)[0], ((SK_U32 *) pLE)[1])); \ ++ Opcode = POLE_GET_OPC(pLE); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOwn belongs to %s\n", ((Opcode & HW_OWNER) == HW_OWNER) ? \ ++ "Hardware" : "Software")); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOpc: 0x%x ",Opcode)); \ ++ if ((Opcode & ~HW_OWNER) == OP_PUTIDX) { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tOP_PUTIDX\n")); \ ++ } \ ++ else { \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tunknown Opcode!!!\n")); \ ++ } \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tPort %c\n", 'A' + POLE_GET_LINK(pLE))); \ ++ Idx = POLE_GET_TXAIDX(pLE); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTxA Index is 0x%X and %svalid\n", Idx, \ ++ (Idx & PU_PUTIDX_VALID) ? "" : "not ")); \ ++ Idx = POLE_GET_TXSIDX(pLE); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tTxS Index is 0x%X and %svalid\n", Idx, \ ++ (Idx & PU_PUTIDX_VALID) ? "" : "not ")); \ ++ Idx = POLE_GET_RXIDX(pLE); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("\tRx Index is 0x%X and %svalid\n", Idx, \ ++ (Idx & PU_PUTIDX_VALID) ? "" : "not ")); \ ++ SK_DBG_MSG(pAc, SK_DBGMOD_HWM, SK_DBGCAT_INIT, \ ++ ("=====================\n")); \ ++} ++#endif /* USE_POLLING_UNIT */ ++ ++#else /* !DEBUG */ ++ ++#define SK_DBG_DUMP_RX_LE(pLE) ++#define SK_DBG_DUMP_TX_LE(pLE) ++#define SK_DBG_DUMP_ST_LE(pLE) ++#define SK_DBG_DUMP_PO_LE(pLE) ++ ++#endif /* !DEBUG */ ++ ++/****************************************************************************** ++ * ++ * Macros for listelement tables ++ * ++ * ++ */ ++ ++#define LE_SIZE sizeof(SK_HWLE) ++#define LE_TAB_SIZE(NumElements) ((NumElements) * LE_SIZE) ++ ++/* Number of unused list elements in table ++ * this macro always returns the number of free listelements - 1 ++ * this way we want to guarantee that always one LE remains unused ++ */ ++#define NUM_FREE_LE_IN_TABLE(pTable) \ ++ ( ((pTable)->Put >= (pTable)->Done) ? \ ++ (NUM_LE_IN_TABLE(pTable) - (pTable)->Put + (pTable)->Done - 1) :\ ++ ((pTable)->Done - (pTable)->Put - 1) ) ++ ++/* total number of list elements in table */ ++#define NUM_LE_IN_TABLE(pTable) ((pTable)->Num) ++ ++/* get next unused Rx list element */ ++#define GET_RX_LE(pLE, pTable) { \ ++ pLE = &(pTable)->pLETab[(pTable)->Put]; \ ++ (pTable)->Put = ((pTable)->Put + 1) & (NUM_LE_IN_TABLE(pTable) - 1);\ ++} ++ ++/* get next unused Tx list element */ ++#define GET_TX_LE(pLE, pTable) GET_RX_LE(pLE, pTable) ++ ++/* get next status list element expected to be finished by hw */ ++#define GET_ST_LE(pLE, pTable) { \ ++ pLE = &(pTable)->pLETab[(pTable)->Done]; \ ++ (pTable)->Done = ((pTable)->Done +1) & (NUM_LE_IN_TABLE(pTable) - 1);\ ++} ++ ++#ifdef USE_POLLING_UNIT ++/* get next polling unit list element for port */ ++#define GET_PO_LE(pLE, pTable, Port) { \ ++ pLE = &(pTable)->pLETab[(Port)]; \ ++} ++#endif /* USE_POLLING_UNIT */ ++ ++#define GET_PUT_IDX(pTable) ((pTable)->Put) ++ ++#define UPDATE_HWPUT_IDX(pTable) {(pTable)->HwPut = (pTable)->Put; } ++ ++/* ++ * get own bit of next status LE ++ * if the result is != 0 there has been at least one status LE finished ++ */ ++#define OWN_OF_FIRST_LE(pTable) \ ++ (STLE_GET_OPC(&(pTable)->pLETab[(pTable)->Done]) & HW_OWNER) ++ ++#define SET_DONE_INDEX(pTable, Idx) (pTable)->Done = (Idx); ++ ++#define GET_DONE_INDEX(pTable) ((pTable)->Done) ++ ++#ifdef SAFE_BUT_SLOW ++ ++/* check own bit of LE before current done idx */ ++#define CHECK_STLE_OVERFLOW(pTable, IsOk) { \ ++ unsigned i; \ ++ if ((i = (pTable)->Done) == 0) { \ ++ i = NUM_LE_IN_TABLE(pTable); \ ++ } \ ++ else { \ ++ i = i - 1; \ ++ } \ ++ if (STLE_GET_OPC(&(pTable)->pLETab[i]) == HW_OWNER) { \ ++ (IsOk) = SK_TRUE; \ ++ } \ ++ else { \ ++ (IsOk) = SK_FALSE; \ ++ } \ ++ } ++ ++ ++/* ++ * for Yukon-2 the hardware is not polling the list elements, so it ++ * is not necessary to change the own-bit of Rx or Tx LEs before ++ * reusing them ++ * but it might make debugging easier if one simply can see whether ++ * a LE has been worked on ++ */ ++ ++#define CLEAR_LE_OWN(pTable, Idx) \ ++ STLE_SET_OPC(&(pTable)->pLETab[(Idx)], SW_OWNER) ++ ++/* ++ * clear all own bits starting from old done index up to the LE before ++ * the new done index ++ */ ++#define CLEAR_LE_OWN_FROM_DONE_TO(pTable, To) { \ ++ int i; \ ++ i = (pTable)->Done; \ ++ while (i != To) { \ ++ CLEAR_LE_OWN(pTable, i); \ ++ i = (i + 1) & (NUM_LE_IN_TABLE(pTable) - 1); \ ++ } \ ++ } ++ ++#else /* !SAFE_BUT_SLOW */ ++ ++#define CHECK_STLE_OVERFLOW(pTable, IsOk) ++#define CLEAR_LE_OWN(pTable, Idx) ++#define CLEAR_LE_OWN_FROM_DONE_TO(pTable, To) ++ ++#endif /* !SAFE_BUT_SLOW */ ++ ++ ++/* typedefs *******************************************************************/ ++ ++typedef struct s_LetRxTx { ++ SK_U16 VlanId; /* VLAN Id given down last time */ ++ SK_U16 TcpWp; /* TCP Checksum Write Position */ ++ SK_U16 TcpSp1; /* TCP Checksum Calculation Start Position 1 */ ++ SK_U16 TcpSp2; /* TCP Checksum Calculation Start Position 2 */ ++ SK_U16 MssValue; /* Maximum Segment Size */ ++ SK_U16 Reserved1; /* reserved word for furture extensions */ ++ SK_U16 Reserved2; /* reserved word for furture extensions */ ++ SK_U16 Reserved3; /* reserved word for furture extensions */ ++} SK_LET_RX_TX; ++ ++typedef struct s_LetStat { ++ SK_U32 RxTimeStamp; /* Receive Timestamp */ ++ SK_U32 RssHashValue; /* RSS Hash Value */ ++ SK_BOOL RssIsIp; /* RSS Hash Value: IP packet detected */ ++ SK_BOOL RssIsTcp; /* RSS Hash Value: IP+TCP packet detected */ ++ SK_U16 VlanId; /* VLAN Id given received by Status BMU */ ++ SK_U16 TcpSum1; /* TCP checksum 1 (status BMU) */ ++ SK_U16 TcpSum2; /* TCP checksum 2 (status BMU) */ ++} SK_LET_STAT; ++ ++typedef union s_LetBmuSpec { ++ SK_LET_RX_TX RxTx; /* Rx/Tx BMU specific variables */ ++ SK_LET_STAT Stat; /* Status BMU specific variables */ ++} SK_LET_BMU_S; ++ ++typedef struct s_le_table { ++ /* all LE's between Done and HWPut are owned by the hardware */ ++ /* all LE's between Put and Done can be used from Software */ ++ /* all LE's between HWPut and Put are currently processed in DriverSend */ ++ unsigned Done; /* done index - consumed from HW and available */ ++ unsigned Put; /* put index - to be given to hardware */ ++ unsigned HwPut; /* put index actually given to hardware */ ++ unsigned Num; /* total number of list elements */ ++ SK_HWLE *pLETab; /* virtual address of list element table */ ++ SK_U32 pPhyLETABLow; /* physical address of list element table */ ++ SK_U32 pPhyLETABHigh; /* physical address of list element table */ ++ /* values to remember in order to save some LEs */ ++ SK_U32 BufHighAddr; /* high addr given down last time */ ++ SK_LET_BMU_S Bmu; /* contains BMU specific information */ ++ SK_U32 private; /* driver private variable free usable */ ++ SK_U16 TcpInitCsum; /* Init. Checksum */ ++} SK_LE_TABLE; ++ ++/* function prototypes ********************************************************/ ++ ++#ifndef SK_KR_PROTO ++ ++/* ++ * public functions in sky2le.c ++ */ ++extern void SkGeY2SetPutIndex( ++ SK_AC *pAC, ++ SK_IOC IoC, ++ SK_U32 StartAddrPrefetchUnit, ++ SK_LE_TABLE *pLETab); ++ ++extern void SkGeY2InitPrefetchUnit( ++ SK_AC *pAC, ++ SK_IOC IoC, ++ unsigned int Queue, ++ SK_LE_TABLE *pLETab); ++ ++extern void SkGeY2InitStatBmu( ++ SK_AC *pAC, ++ SK_IOC IoC, ++ SK_LE_TABLE *pLETab); ++ ++extern void SkGeY2InitPollUnit( ++ SK_AC *pAC, ++ SK_IOC IoC, ++ SK_LE_TABLE *pLETab); ++ ++extern void SkGeY2InitSingleLETable( ++ SK_AC *pAC, ++ SK_LE_TABLE *pLETab, ++ unsigned int NumLE, ++ void *pVMem, ++ SK_U32 PMemLowAddr, ++ SK_U32 PMemHighAddr); ++ ++#else /* SK_KR_PROTO */ ++extern void SkGeY2SetPutIndex(); ++extern void SkGeY2InitPrefetchUnit(); ++extern void SkGeY2InitStatBmu(); ++extern void SkGeY2InitPollUnit(); ++extern void SkGeY2InitSingleLETable(); ++#endif /* SK_KR_PROTO */ ++ ++#ifdef __cplusplus ++} ++#endif /* __cplusplus */ ++ ++#endif /* __INC_SKY2LE_H */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/h/xmac_ii.h linux-2.6.9.new/drivers/net/sk98lin/h/xmac_ii.h +--- linux-2.6.9.old/drivers/net/sk98lin/h/xmac_ii.h 2004-10-19 05:54:55.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/h/xmac_ii.h 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: xmac_ii.h + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.52 $ +- * Date: $Date: 2003/10/02 16:35:50 $ ++ * Version: $Revision: 2.11 $ ++ * Date: $Date: 2005/01/04 14:14:20 $ + * Purpose: Defines and Macros for Gigabit Ethernet Controller + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -449,7 +448,7 @@ + /* + * Receive Frame Status Encoding + */ +-#define XMR_FS_LEN (0x3fffUL<<18) /* Bit 31..18: Rx Frame Length */ ++#define XMR_FS_LEN_MSK (0x3fffUL<<18) /* Bit 31..18: Rx Frame Length */ + #define XMR_FS_2L_VLAN (1L<<17) /* Bit 17: tagged wh 2Lev VLAN ID*/ + #define XMR_FS_1L_VLAN (1L<<16) /* Bit 16: tagged wh 1Lev VLAN ID*/ + #define XMR_FS_BC (1L<<15) /* Bit 15: Broadcast Frame */ +@@ -469,6 +468,8 @@ + #define XMR_FS_ERR (1L<<1) /* Bit 1: Frame Error */ + #define XMR_FS_MCTRL (1L<<0) /* Bit 0: MAC Control Packet */ + ++#define XMR_FS_LEN_SHIFT 18 ++ + /* + * XMR_FS_ERR will be set if + * XMR_FS_FCS_ERR, XMR_FS_LNG_ERR, XMR_FS_RUNT, +@@ -510,7 +511,7 @@ + #define PHY_BCOM_NEPG 0x07 /* 16 bit r/w Next Page Register */ + #define PHY_BCOM_NEPG_LP 0x08 /* 16 bit r/o Next Page Link Partner */ + /* Broadcom-specific registers */ +-#define PHY_BCOM_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Ctrl Reg */ ++#define PHY_BCOM_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */ + #define PHY_BCOM_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */ + /* 0x0b - 0x0e: reserved */ + #define PHY_BCOM_EXT_STAT 0x0f /* 16 bit r/o Extended Status Reg */ +@@ -541,24 +542,32 @@ + #define PHY_MARV_NEPG 0x07 /* 16 bit r/w Next Page Register */ + #define PHY_MARV_NEPG_LP 0x08 /* 16 bit r/o Next Page Link Partner */ + /* Marvel-specific registers */ +-#define PHY_MARV_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Ctrl Reg */ ++#define PHY_MARV_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */ + #define PHY_MARV_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */ + /* 0x0b - 0x0e: reserved */ + #define PHY_MARV_EXT_STAT 0x0f /* 16 bit r/o Extended Status Reg */ +-#define PHY_MARV_PHY_CTRL 0x10 /* 16 bit r/w PHY Specific Ctrl Reg */ +-#define PHY_MARV_PHY_STAT 0x11 /* 16 bit r/o PHY Specific Stat Reg */ ++#define PHY_MARV_PHY_CTRL 0x10 /* 16 bit r/w PHY Specific Control Reg */ ++#define PHY_MARV_PHY_STAT 0x11 /* 16 bit r/o PHY Specific Status Reg */ + #define PHY_MARV_INT_MASK 0x12 /* 16 bit r/w Interrupt Mask Reg */ + #define PHY_MARV_INT_STAT 0x13 /* 16 bit r/o Interrupt Status Reg */ + #define PHY_MARV_EXT_CTRL 0x14 /* 16 bit r/w Ext. PHY Specific Ctrl */ + #define PHY_MARV_RXE_CNT 0x15 /* 16 bit r/w Receive Error Counter */ + #define PHY_MARV_EXT_ADR 0x16 /* 16 bit r/w Ext. Ad. for Cable Diag. */ +- /* 0x17: reserved */ ++#define PHY_MARV_PORT_IRQ 0x17 /* 16 bit r/o Port 0 IRQ (88E1111 only) */ + #define PHY_MARV_LED_CTRL 0x18 /* 16 bit r/w LED Control Reg */ + #define PHY_MARV_LED_OVER 0x19 /* 16 bit r/w Manual LED Override Reg */ + #define PHY_MARV_EXT_CTRL_2 0x1a /* 16 bit r/w Ext. PHY Specific Ctrl 2 */ + #define PHY_MARV_EXT_P_STAT 0x1b /* 16 bit r/w Ext. PHY Spec. Stat Reg */ + #define PHY_MARV_CABLE_DIAG 0x1c /* 16 bit r/o Cable Diagnostic Reg */ +- /* 0x1d - 0x1f: reserved */ ++#define PHY_MARV_PAGE_ADDR 0x1d /* 16 bit r/w Extended Page Address Reg */ ++#define PHY_MARV_PAGE_DATA 0x1e /* 16 bit r/w Extended Page Data Reg */ ++ ++/* for 10/100 Fast Ethernet PHY (88E3082 only) */ ++#define PHY_MARV_FE_LED_PAR 0x16 /* 16 bit r/w LED Parallel Select Reg. */ ++#define PHY_MARV_FE_LED_SER 0x17 /* 16 bit r/w LED Stream Select S. LED */ ++#define PHY_MARV_FE_VCT_TX 0x1a /* 16 bit r/w VCT Reg. for TXP/N Pins */ ++#define PHY_MARV_FE_VCT_RX 0x1b /* 16 bit r/o VCT Reg. for RXP/N Pins */ ++#define PHY_MARV_FE_SPEC_2 0x1c /* 16 bit r/w Specific Control Reg. 2 */ + + /*----------------------------------------------------------------------------*/ + /* +@@ -574,9 +583,9 @@ + #define PHY_LONE_NEPG 0x07 /* 16 bit r/w Next Page Register */ + #define PHY_LONE_NEPG_LP 0x08 /* 16 bit r/o Next Page Link Partner */ + /* Level One-specific registers */ +-#define PHY_LONE_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg*/ ++#define PHY_LONE_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */ + #define PHY_LONE_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */ +- /* 0x0b -0x0e: reserved */ ++ /* 0x0b - 0x0e: reserved */ + #define PHY_LONE_EXT_STAT 0x0f /* 16 bit r/o Extended Status Reg */ + #define PHY_LONE_PORT_CFG 0x10 /* 16 bit r/w Port Configuration Reg*/ + #define PHY_LONE_Q_STAT 0x11 /* 16 bit r/o Quick Status Reg */ +@@ -585,7 +594,7 @@ + #define PHY_LONE_LED_CFG 0x14 /* 16 bit r/w LED Configuration Reg */ + #define PHY_LONE_PORT_CTRL 0x15 /* 16 bit r/w Port Control Reg */ + #define PHY_LONE_CIM 0x16 /* 16 bit r/o CIM Reg */ +- /* 0x17 -0x1c: reserved */ ++ /* 0x17 - 0x1c: reserved */ + + /*----------------------------------------------------------------------------*/ + /* +@@ -603,14 +612,14 @@ + /* National-specific registers */ + #define PHY_NAT_1000T_CTRL 0x09 /* 16 bit r/w 1000Base-T Control Reg */ + #define PHY_NAT_1000T_STAT 0x0a /* 16 bit r/o 1000Base-T Status Reg */ +- /* 0x0b -0x0e: reserved */ ++ /* 0x0b - 0x0e: reserved */ + #define PHY_NAT_EXT_STAT 0x0f /* 16 bit r/o Extended Status Register */ + #define PHY_NAT_EXT_CTRL1 0x10 /* 16 bit r/o Extended Control Reg1 */ + #define PHY_NAT_Q_STAT1 0x11 /* 16 bit r/o Quick Status Reg1 */ + #define PHY_NAT_10B_OP 0x12 /* 16 bit r/o 10Base-T Operations Reg */ + #define PHY_NAT_EXT_CTRL2 0x13 /* 16 bit r/o Extended Control Reg1 */ + #define PHY_NAT_Q_STAT2 0x14 /* 16 bit r/o Quick Status Reg2 */ +- /* 0x15 -0x18: reserved */ ++ /* 0x15 - 0x18: reserved */ + #define PHY_NAT_PHY_ADDR 0x19 /* 16 bit r/o PHY Address Register */ + + +@@ -618,7 +627,7 @@ + + /* + * PHY bit definitions +- * Bits defined as PHY_X_..., PHY_B_..., PHY_L_... or PHY_N_... are ++ * Bits defined as PHY_X_..., PHY_B_..., PHY_L_..., PHY_N_... or PHY_M_... are + * XMAC/Broadcom/LevelOne/National/Marvell-specific. + * All other are general. + */ +@@ -629,14 +638,14 @@ + /***** PHY_LONE_CTRL 16 bit r/w PHY Control Register *****/ + #define PHY_CT_RESET (1<<15) /* Bit 15: (sc) clear all PHY related regs */ + #define PHY_CT_LOOP (1<<14) /* Bit 14: enable Loopback over PHY */ +-#define PHY_CT_SPS_LSB (1<<13) /* Bit 13: (BC,L1) Speed select, lower bit */ ++#define PHY_CT_SPS_LSB (1<<13) /* Bit 13: Speed select, lower bit */ + #define PHY_CT_ANE (1<<12) /* Bit 12: Auto-Negotiation Enabled */ +-#define PHY_CT_PDOWN (1<<11) /* Bit 11: (BC,L1) Power Down Mode */ +-#define PHY_CT_ISOL (1<<10) /* Bit 10: (BC,L1) Isolate Mode */ +-#define PHY_CT_RE_CFG (1<<9) /* Bit 9: (sc) Restart Auto-Negotiation */ ++#define PHY_CT_PDOWN (1<<11) /* Bit 11: Power Down Mode */ ++#define PHY_CT_ISOL (1<<10) /* Bit 10: Isolate Mode */ ++#define PHY_CT_RE_CFG (1<<9) /* Bit 9: (sc) Restart Auto-Negotiation */ + #define PHY_CT_DUP_MD (1<<8) /* Bit 8: Duplex Mode */ +-#define PHY_CT_COL_TST (1<<7) /* Bit 7: (BC,L1) Collision Test enabled */ +-#define PHY_CT_SPS_MSB (1<<6) /* Bit 6: (BC,L1) Speed select, upper bit */ ++#define PHY_CT_COL_TST (1<<7) /* Bit 7: Collision Test enabled */ ++#define PHY_CT_SPS_MSB (1<<6) /* Bit 6: Speed select, upper bit */ + /* Bit 5..0: reserved */ + + #define PHY_CT_SP1000 PHY_CT_SPS_MSB /* enable speed of 1000 Mbps */ +@@ -649,25 +658,25 @@ + /***** PHY_MARV_STAT 16 bit r/w PHY Status Register *****/ + /***** PHY_LONE_STAT 16 bit r/w PHY Status Register *****/ + /* Bit 15..9: reserved */ +- /* (BC/L1) 100/10 Mbps cap bits ignored*/ ++ /* (BC/L1) 100/10 Mbps cap bits ignored */ + #define PHY_ST_EXT_ST (1<<8) /* Bit 8: Extended Status Present */ + /* Bit 7: reserved */ +-#define PHY_ST_PRE_SUP (1<<6) /* Bit 6: (BC/L1) preamble suppression */ ++#define PHY_ST_PRE_SUP (1<<6) /* Bit 6: Preamble Suppression */ + #define PHY_ST_AN_OVER (1<<5) /* Bit 5: Auto-Negotiation Over */ + #define PHY_ST_REM_FLT (1<<4) /* Bit 4: Remote Fault Condition Occured */ + #define PHY_ST_AN_CAP (1<<3) /* Bit 3: Auto-Negotiation Capability */ + #define PHY_ST_LSYNC (1<<2) /* Bit 2: Link Synchronized */ +-#define PHY_ST_JAB_DET (1<<1) /* Bit 1: (BC/L1) Jabber Detected */ ++#define PHY_ST_JAB_DET (1<<1) /* Bit 1: Jabber Detected */ + #define PHY_ST_EXT_REG (1<<0) /* Bit 0: Extended Register available */ + + +-/***** PHY_XMAC_ID1 16 bit r/o PHY ID1 Register */ +-/***** PHY_BCOM_ID1 16 bit r/o PHY ID1 Register */ +-/***** PHY_MARV_ID1 16 bit r/o PHY ID1 Register */ +-/***** PHY_LONE_ID1 16 bit r/o PHY ID1 Register */ ++/***** PHY_XMAC_ID1 16 bit r/o PHY ID1 Register */ ++/***** PHY_BCOM_ID1 16 bit r/o PHY ID1 Register */ ++/***** PHY_MARV_ID1 16 bit r/o PHY ID1 Register */ ++/***** PHY_LONE_ID1 16 bit r/o PHY ID1 Register */ + #define PHY_I1_OUI_MSK (0x3f<<10) /* Bit 15..10: Organization Unique ID */ + #define PHY_I1_MOD_NUM (0x3f<<4) /* Bit 9.. 4: Model Number */ +-#define PHY_I1_REV_MSK 0x0f /* Bit 3.. 0: Revision Number */ ++#define PHY_I1_REV_MSK 0xf /* Bit 3.. 0: Revision Number */ + + /* different Broadcom PHY Ids */ + #define PHY_BCOM_ID1_A1 0x6041 +@@ -675,11 +684,19 @@ + #define PHY_BCOM_ID1_C0 0x6044 + #define PHY_BCOM_ID1_C5 0x6047 + ++/* different Marvell PHY Ids */ ++#define PHY_MARV_ID0_VAL 0x0141 /* Marvell Unique Identifier */ ++ ++#define PHY_MARV_ID1_B0 0x0C23 /* Yukon (PHY 88E1011) */ ++#define PHY_MARV_ID1_B2 0x0C25 /* Yukon-Plus (PHY 88E1011) */ ++#define PHY_MARV_ID1_C2 0x0CC2 /* Yukon-EC (PHY 88E1111) */ ++#define PHY_MARV_ID1_Y2 0x0C91 /* Yukon-2 (PHY 88E1112) */ ++ + + /***** PHY_XMAC_AUNE_ADV 16 bit r/w Auto-Negotiation Advertisement *****/ + /***** PHY_XMAC_AUNE_LP 16 bit r/o Link Partner Ability Reg *****/ + #define PHY_AN_NXT_PG (1<<15) /* Bit 15: Request Next Page */ +-#define PHY_X_AN_ACK (1<<14) /* Bit 14: (ro) Acknowledge Received */ ++#define PHY_X_AN_ACK (1<<14) /* Bit 14: (ro) Acknowledge Received */ + #define PHY_X_AN_RFB (3<<12) /* Bit 13..12: Remote Fault Bits */ + /* Bit 11.. 9: reserved */ + #define PHY_X_AN_PAUSE (3<<7) /* Bit 8.. 7: Pause Bits */ +@@ -827,7 +844,7 @@ + #define PHY_B_PEC_BY_MLT3 (1<<8) /* Bit 8: Bypass MLT3 Encoder */ + #define PHY_B_PEC_BY_RXA (1<<7) /* Bit 7: Bypass Rx Alignm. */ + #define PHY_B_PEC_RES_SCR (1<<6) /* Bit 6: Reset Scrambler */ +-#define PHY_B_PEC_EN_LTR (1<<5) /* Bit 5: Ena LED Traffic Mode */ ++#define PHY_B_PEC_EN_LTR (1<<5) /* Bit 5: Enable LED Traffic Mode */ + #define PHY_B_PEC_LED_ON (1<<4) /* Bit 4: Force LED's on */ + #define PHY_B_PEC_LED_OFF (1<<3) /* Bit 3: Force LED's off */ + #define PHY_B_PEC_EX_IPG (1<<2) /* Bit 2: Extend Tx IPG Mode */ +@@ -981,7 +998,7 @@ + #define PHY_L_QS_DUP_MOD (1<<9) /* Bit 9: Full/Half Duplex */ + #define PHY_L_QS_AN (1<<8) /* Bit 8: AutoNeg is On */ + #define PHY_L_QS_AN_C (1<<7) /* Bit 7: AN is Complete */ +-#define PHY_L_QS_LLE (7<<4) /* Bit 6: Line Length Estim. */ ++#define PHY_L_QS_LLE (7<<4) /* Bit 6..4: Line Length Estim. */ + #define PHY_L_QS_PAUSE (1<<3) /* Bit 3: LP advertised Pause */ + #define PHY_L_QS_AS_PAUSE (1<<2) /* Bit 2: LP adv. asym. Pause */ + #define PHY_L_QS_ISOLATE (1<<1) /* Bit 1: CIM Isolated */ +@@ -1029,9 +1046,8 @@ + /* Bit 9..0: not described */ + + /***** PHY_LONE_CIM 16 bit r/o CIM Reg *****/ +-#define PHY_L_CIM_ISOL (255<<8)/* Bit 15..8: Isolate Count */ +-#define PHY_L_CIM_FALSE_CAR (255<<0)/* Bit 7..0: False Carrier Count */ +- ++#define PHY_L_CIM_ISOL (0xff<<8) /* Bit 15..8: Isolate Count */ ++#define PHY_L_CIM_FALSE_CAR 0xff /* Bit 7..0: False Carrier Count */ + + /* + * Pause Bits (PHY_L_AN_ASP and PHY_L_AN_PC) encoding +@@ -1041,7 +1057,6 @@ + #define PHY_L_P_ASYM_MD (2<<10) /* Bit 11..10: asymmetric Pause Mode */ + #define PHY_L_P_BOTH_MD (3<<10) /* Bit 11..10: both Pause Mode */ + +- + /* + * National-Specific + */ +@@ -1086,22 +1101,24 @@ + */ + /***** PHY_MARV_AUNE_ADV 16 bit r/w Auto-Negotiation Advertisement *****/ + /***** PHY_MARV_AUNE_LP 16 bit r/w Link Part Ability Reg *****/ +-#define PHY_M_AN_NXT_PG BIT_15 /* Request Next Page */ +-#define PHY_M_AN_ACK BIT_14 /* (ro) Acknowledge Received */ +-#define PHY_M_AN_RF BIT_13 /* Remote Fault */ +- /* Bit 12: reserved */ +-#define PHY_M_AN_ASP BIT_11 /* Asymmetric Pause */ +-#define PHY_M_AN_PC BIT_10 /* MAC Pause implemented */ +-#define PHY_M_AN_100_FD BIT_8 /* Advertise 100Base-TX Full Duplex */ +-#define PHY_M_AN_100_HD BIT_7 /* Advertise 100Base-TX Half Duplex */ +-#define PHY_M_AN_10_FD BIT_6 /* Advertise 10Base-TX Full Duplex */ +-#define PHY_M_AN_10_HD BIT_5 /* Advertise 10Base-TX Half Duplex */ ++#define PHY_M_AN_NXT_PG BIT_15S /* Request Next Page */ ++#define PHY_M_AN_ACK BIT_14S /* (ro) Acknowledge Received */ ++#define PHY_M_AN_RF BIT_13S /* Remote Fault */ ++ /* Bit 12: reserved */ ++#define PHY_M_AN_ASP BIT_11S /* Asymmetric Pause */ ++#define PHY_M_AN_PC BIT_10S /* MAC Pause implemented */ ++#define PHY_M_AN_100_T4 BIT_9S /* Not cap. 100Base-T4 (always 0) */ ++#define PHY_M_AN_100_FD BIT_8S /* Advertise 100Base-TX Full Duplex */ ++#define PHY_M_AN_100_HD BIT_7S /* Advertise 100Base-TX Half Duplex */ ++#define PHY_M_AN_10_FD BIT_6S /* Advertise 10Base-TX Full Duplex */ ++#define PHY_M_AN_10_HD BIT_5S /* Advertise 10Base-TX Half Duplex */ ++#define PHY_M_AN_SEL_MSK (0x1f<<4) /* Bit 4.. 0: Selector Field Mask */ + + /* special defines for FIBER (88E1011S only) */ +-#define PHY_M_AN_ASP_X BIT_8 /* Asymmetric Pause */ +-#define PHY_M_AN_PC_X BIT_7 /* MAC Pause implemented */ +-#define PHY_M_AN_1000X_AHD BIT_6 /* Advertise 10000Base-X Half Duplex */ +-#define PHY_M_AN_1000X_AFD BIT_5 /* Advertise 10000Base-X Full Duplex */ ++#define PHY_M_AN_ASP_X BIT_8S /* Asymmetric Pause */ ++#define PHY_M_AN_PC_X BIT_7S /* MAC Pause implemented */ ++#define PHY_M_AN_1000X_AHD BIT_6S /* Advertise 10000Base-X Half Duplex */ ++#define PHY_M_AN_1000X_AFD BIT_5S /* Advertise 10000Base-X Full Duplex */ + + /* Pause Bits (PHY_M_AN_ASP_X and PHY_M_AN_PC_X) encoding */ + #define PHY_M_P_NO_PAUSE_X (0<<7) /* Bit 8.. 7: no Pause Mode */ +@@ -1111,105 +1128,162 @@ + + /***** PHY_MARV_1000T_CTRL 16 bit r/w 1000Base-T Control Reg *****/ + #define PHY_M_1000C_TEST (7<<13) /* Bit 15..13: Test Modes */ +-#define PHY_M_1000C_MSE (1<<12) /* Bit 12: Manual Master/Slave Enable */ +-#define PHY_M_1000C_MSC (1<<11) /* Bit 11: M/S Configuration (1=Master) */ +-#define PHY_M_1000C_MPD (1<<10) /* Bit 10: Multi-Port Device */ +-#define PHY_M_1000C_AFD (1<<9) /* Bit 9: Advertise Full Duplex */ +-#define PHY_M_1000C_AHD (1<<8) /* Bit 8: Advertise Half Duplex */ ++#define PHY_M_1000C_MSE BIT_12S /* Manual Master/Slave Enable */ ++#define PHY_M_1000C_MSC BIT_11S /* M/S Configuration (1=Master) */ ++#define PHY_M_1000C_MPD BIT_10S /* Multi-Port Device */ ++#define PHY_M_1000C_AFD BIT_9S /* Advertise Full Duplex */ ++#define PHY_M_1000C_AHD BIT_8S /* Advertise Half Duplex */ + /* Bit 7..0: reserved */ + + /***** PHY_MARV_PHY_CTRL 16 bit r/w PHY Specific Ctrl Reg *****/ +-#define PHY_M_PC_TX_FFD_MSK (3<<14) /* Bit 15..14: Tx FIFO Depth Mask */ +-#define PHY_M_PC_RX_FFD_MSK (3<<12) /* Bit 13..12: Rx FIFO Depth Mask */ +-#define PHY_M_PC_ASS_CRS_TX (1<<11) /* Bit 11: Assert CRS on Transmit */ +-#define PHY_M_PC_FL_GOOD (1<<10) /* Bit 10: Force Link Good */ +-#define PHY_M_PC_EN_DET_MSK (3<<8) /* Bit 9.. 8: Energy Detect Mask */ +-#define PHY_M_PC_ENA_EXT_D (1<<7) /* Bit 7: Enable Ext. Distance (10BT) */ +-#define PHY_M_PC_MDIX_MSK (3<<5) /* Bit 6.. 5: MDI/MDIX Config. Mask */ +-#define PHY_M_PC_DIS_125CLK (1<<4) /* Bit 4: Disable 125 CLK */ +-#define PHY_M_PC_MAC_POW_UP (1<<3) /* Bit 3: MAC Power up */ +-#define PHY_M_PC_SQE_T_ENA (1<<2) /* Bit 2: SQE Test Enabled */ +-#define PHY_M_PC_POL_R_DIS (1<<1) /* Bit 1: Polarity Reversal Disabled */ +-#define PHY_M_PC_DIS_JABBER (1<<0) /* Bit 0: Disable Jabber */ ++#define PHY_M_PC_TX_FFD_MSK (3<<14) /* Bit 15..14: Tx FIFO Depth Mask */ ++#define PHY_M_PC_RX_FFD_MSK (3<<12) /* Bit 13..12: Rx FIFO Depth Mask */ ++#define PHY_M_PC_ASS_CRS_TX BIT_11S /* Assert CRS on Transmit */ ++#define PHY_M_PC_FL_GOOD BIT_10S /* Force Link Good */ ++#define PHY_M_PC_EN_DET_MSK (3<<8) /* Bit 9.. 8: Energy Detect Mask */ ++#define PHY_M_PC_ENA_EXT_D BIT_7S /* Enable Ext. Distance (10BT) */ ++#define PHY_M_PC_MDIX_MSK (3<<5) /* Bit 6.. 5: MDI/MDIX Config. Mask */ ++#define PHY_M_PC_DIS_125CLK BIT_4S /* Disable 125 CLK */ ++#define PHY_M_PC_MAC_POW_UP BIT_3S /* MAC Power up */ ++#define PHY_M_PC_SQE_T_ENA BIT_2S /* SQE Test Enabled */ ++#define PHY_M_PC_POL_R_DIS BIT_1S /* Polarity Reversal Disabled */ ++#define PHY_M_PC_DIS_JABBER BIT_0S /* Disable Jabber */ + + #define PHY_M_PC_EN_DET SHIFT8(2) /* Energy Detect (Mode 1) */ + #define PHY_M_PC_EN_DET_PLUS SHIFT8(3) /* Energy Detect Plus (Mode 2) */ + +-#define PHY_M_PC_MDI_XMODE(x) SHIFT5(x) +-#define PHY_M_PC_MAN_MDI 0 /* 00 = Manual MDI configuration */ ++#define PHY_M_PC_MDI_XMODE(x) (SHIFT5(x) & PHY_M_PC_MDIX_MSK) ++ ++#define PHY_M_PC_MAN_MDI 0 /* 00 = Manual MDI configuration */ + #define PHY_M_PC_MAN_MDIX 1 /* 01 = Manual MDIX configuration */ + #define PHY_M_PC_ENA_AUTO 3 /* 11 = Enable Automatic Crossover */ + ++/* for Yukon-2 Gigabit Ethernet PHY (88E1112 only) */ ++#define PHY_M_PC_DIS_LINK_P BIT_15S /* Disable Link Pulses */ ++#define PHY_M_PC_DSC_MSK (7<<12) /* Bit 14..12: Downshift Counter */ ++#define PHY_M_PC_DOWN_S_ENA BIT_11S /* Downshift Enable */ ++ /* !!! Errata in spec. (1 = disable) */ ++ ++#define PHY_M_PC_DSC(x) (SHIFT12(x) & PHY_M_PC_DSC_MSK) ++ /* 000=1x; 001=2x; 010=3x; 011=4x */ ++ /* 100=5x; 101=6x; 110=7x; 111=8x */ ++ ++/* for 10/100 Fast Ethernet PHY (88E3082 only) */ ++#define PHY_M_PC_ENA_DTE_DT BIT_15S /* Enable Data Terminal Equ. (DTE) Detect */ ++#define PHY_M_PC_ENA_ENE_DT BIT_14S /* Enable Energy Detect (sense & pulse) */ ++#define PHY_M_PC_DIS_NLP_CK BIT_13S /* Disable Normal Link Puls (NLP) Check */ ++#define PHY_M_PC_ENA_LIP_NP BIT_12S /* Enable Link Partner Next Page Reg. */ ++#define PHY_M_PC_DIS_NLP_GN BIT_11S /* Disable Normal Link Puls Generation */ ++ ++#define PHY_M_PC_DIS_SCRAMB BIT_9S /* Disable Scrambler */ ++#define PHY_M_PC_DIS_FEFI BIT_8S /* Disable Far End Fault Indic. (FEFI) */ ++ ++#define PHY_M_PC_SH_TP_SEL BIT_6S /* Shielded Twisted Pair Select */ ++#define PHY_M_PC_RX_FD_MSK (3<<2) /* Bit 3.. 2: Rx FIFO Depth Mask */ ++ + /***** PHY_MARV_PHY_STAT 16 bit r/o PHY Specific Status Reg *****/ +-#define PHY_M_PS_SPEED_MSK (3<<14) /* Bit 15..14: Speed Mask */ +-#define PHY_M_PS_SPEED_1000 (1<<15) /* 10 = 1000 Mbps */ +-#define PHY_M_PS_SPEED_100 (1<<14) /* 01 = 100 Mbps */ +-#define PHY_M_PS_SPEED_10 0 /* 00 = 10 Mbps */ +-#define PHY_M_PS_FULL_DUP (1<<13) /* Bit 13: Full Duplex */ +-#define PHY_M_PS_PAGE_REC (1<<12) /* Bit 12: Page Received */ +-#define PHY_M_PS_SPDUP_RES (1<<11) /* Bit 11: Speed & Duplex Resolved */ +-#define PHY_M_PS_LINK_UP (1<<10) /* Bit 10: Link Up */ +-#define PHY_M_PS_CABLE_MSK (3<<7) /* Bit 9.. 7: Cable Length Mask */ +-#define PHY_M_PS_MDI_X_STAT (1<<6) /* Bit 6: MDI Crossover Stat (1=MDIX) */ +-#define PHY_M_PS_DOWNS_STAT (1<<5) /* Bit 5: Downshift Status (1=downsh.) */ +-#define PHY_M_PS_ENDET_STAT (1<<4) /* Bit 4: Energy Detect Status (1=act) */ +-#define PHY_M_PS_TX_P_EN (1<<3) /* Bit 3: Tx Pause Enabled */ +-#define PHY_M_PS_RX_P_EN (1<<2) /* Bit 2: Rx Pause Enabled */ +-#define PHY_M_PS_POL_REV (1<<1) /* Bit 1: Polarity Reversed */ +-#define PHY_M_PC_JABBER (1<<0) /* Bit 0: Jabber */ ++#define PHY_M_PS_SPEED_MSK (3<<14) /* Bit 15..14: Speed Mask */ ++#define PHY_M_PS_SPEED_1000 BIT_15S /* 10 = 1000 Mbps */ ++#define PHY_M_PS_SPEED_100 BIT_14S /* 01 = 100 Mbps */ ++#define PHY_M_PS_SPEED_10 0 /* 00 = 10 Mbps */ ++#define PHY_M_PS_FULL_DUP BIT_13S /* Full Duplex */ ++#define PHY_M_PS_PAGE_REC BIT_12S /* Page Received */ ++#define PHY_M_PS_SPDUP_RES BIT_11S /* Speed & Duplex Resolved */ ++#define PHY_M_PS_LINK_UP BIT_10S /* Link Up */ ++#define PHY_M_PS_CABLE_MSK (7<<7) /* Bit 9.. 7: Cable Length Mask */ ++#define PHY_M_PS_MDI_X_STAT BIT_6S /* MDI Crossover Stat (1=MDIX) */ ++#define PHY_M_PS_DOWNS_STAT BIT_5S /* Downshift Status (1=downsh.) */ ++#define PHY_M_PS_ENDET_STAT BIT_4S /* Energy Detect Status (1=act) */ ++#define PHY_M_PS_TX_P_EN BIT_3S /* Tx Pause Enabled */ ++#define PHY_M_PS_RX_P_EN BIT_2S /* Rx Pause Enabled */ ++#define PHY_M_PS_POL_REV BIT_1S /* Polarity Reversed */ ++#define PHY_M_PS_JABBER BIT_0S /* Jabber */ + + #define PHY_M_PS_PAUSE_MSK (PHY_M_PS_TX_P_EN | PHY_M_PS_RX_P_EN) + ++/* for 10/100 Fast Ethernet PHY (88E3082 only) */ ++#define PHY_M_PS_DTE_DETECT BIT_15S /* Data Terminal Equipment (DTE) Detected */ ++#define PHY_M_PS_RES_SPEED BIT_14S /* Resolved Speed (1=100 Mbps, 0=10 Mbps */ ++ + /***** PHY_MARV_INT_MASK 16 bit r/w Interrupt Mask Reg *****/ + /***** PHY_MARV_INT_STAT 16 bit r/o Interrupt Status Reg *****/ +-#define PHY_M_IS_AN_ERROR (1<<15) /* Bit 15: Auto-Negotiation Error */ +-#define PHY_M_IS_LSP_CHANGE (1<<14) /* Bit 14: Link Speed Changed */ +-#define PHY_M_IS_DUP_CHANGE (1<<13) /* Bit 13: Duplex Mode Changed */ +-#define PHY_M_IS_AN_PR (1<<12) /* Bit 12: Page Received */ +-#define PHY_M_IS_AN_COMPL (1<<11) /* Bit 11: Auto-Negotiation Completed */ +-#define PHY_M_IS_LST_CHANGE (1<<10) /* Bit 10: Link Status Changed */ +-#define PHY_M_IS_SYMB_ERROR (1<<9) /* Bit 9: Symbol Error */ +-#define PHY_M_IS_FALSE_CARR (1<<8) /* Bit 8: False Carrier */ +-#define PHY_M_IS_FIFO_ERROR (1<<7) /* Bit 7: FIFO Overflow/Underrun Error */ +-#define PHY_M_IS_MDI_CHANGE (1<<6) /* Bit 6: MDI Crossover Changed */ +-#define PHY_M_IS_DOWNSH_DET (1<<5) /* Bit 5: Downshift Detected */ +-#define PHY_M_IS_END_CHANGE (1<<4) /* Bit 4: Energy Detect Changed */ +- /* Bit 3..2: reserved */ +-#define PHY_M_IS_POL_CHANGE (1<<1) /* Bit 1: Polarity Changed */ +-#define PHY_M_IS_JABBER (1<<0) /* Bit 0: Jabber */ ++#define PHY_M_IS_AN_ERROR BIT_15S /* Auto-Negotiation Error */ ++#define PHY_M_IS_LSP_CHANGE BIT_14S /* Link Speed Changed */ ++#define PHY_M_IS_DUP_CHANGE BIT_13S /* Duplex Mode Changed */ ++#define PHY_M_IS_AN_PR BIT_12S /* Page Received */ ++#define PHY_M_IS_AN_COMPL BIT_11S /* Auto-Negotiation Completed */ ++#define PHY_M_IS_LST_CHANGE BIT_10S /* Link Status Changed */ ++#define PHY_M_IS_SYMB_ERROR BIT_9S /* Symbol Error */ ++#define PHY_M_IS_FALSE_CARR BIT_8S /* False Carrier */ ++#define PHY_M_IS_FIFO_ERROR BIT_7S /* FIFO Overflow/Underrun Error */ ++#define PHY_M_IS_MDI_CHANGE BIT_6S /* MDI Crossover Changed */ ++#define PHY_M_IS_DOWNSH_DET BIT_5S /* Downshift Detected */ ++#define PHY_M_IS_END_CHANGE BIT_4S /* Energy Detect Changed */ ++ /* Bit 3: reserved */ ++#define PHY_M_IS_DTE_CHANGE BIT_2S /* DTE Power Det. Status Changed */ ++ /* (88E1111 only) */ ++#define PHY_M_IS_POL_CHANGE BIT_1S /* Polarity Changed */ ++#define PHY_M_IS_JABBER BIT_0S /* Jabber */ + + #define PHY_M_DEF_MSK (PHY_M_IS_AN_ERROR | PHY_M_IS_AN_PR | \ + PHY_M_IS_LST_CHANGE | PHY_M_IS_FIFO_ERROR) + + /***** PHY_MARV_EXT_CTRL 16 bit r/w Ext. PHY Specific Ctrl *****/ +-#define PHY_M_EC_M_DSC_MSK (3<<10) /* Bit 11..10: Master downshift counter */ +-#define PHY_M_EC_S_DSC_MSK (3<<8) /* Bit 9.. 8: Slave downshift counter */ ++#define PHY_M_EC_ENA_BC_EXT BIT_15S /* Enable Block Carr. Ext. (88E1111 only) */ ++#define PHY_M_EC_ENA_LIN_LB BIT_14S /* Enable Line Loopback (88E1111 only) */ ++ /* Bit 13: reserved */ ++#define PHY_M_EC_DIS_LINK_P BIT_12S /* Disable Link Pulses (88E1111 only) */ ++#define PHY_M_EC_M_DSC_MSK (3<<10) /* Bit 11..10: Master Downshift Counter */ ++ /* (88E1011 only) */ ++#define PHY_M_EC_S_DSC_MSK (3<<8) /* Bit 9.. 8: Slave Downshift Counter */ ++ /* (88E1011 only) */ ++#define PHY_M_EC_DSC_MSK_2 (7<<9) /* Bit 11.. 9: Downshift Counter */ ++ /* (88E1111 only) */ ++#define PHY_M_EC_DOWN_S_ENA BIT_8S /* Downshift Enable (88E1111 only) */ ++ /* !!! Errata in spec. (1 = disable) */ ++#define PHY_M_EC_RX_TIM_CT BIT_7S /* RGMII Rx Timing Control*/ + #define PHY_M_EC_MAC_S_MSK (7<<4) /* Bit 6.. 4: Def. MAC interface speed */ +-#define PHY_M_EC_FIB_AN_ENA (1<<3) /* Bit 3: Fiber Auto-Neg. Enable */ +- +-#define PHY_M_EC_M_DSC(x) SHIFT10(x) /* 00=1x; 01=2x; 10=3x; 11=4x */ +-#define PHY_M_EC_S_DSC(x) SHIFT8(x) /* 00=dis; 01=1x; 10=2x; 11=3x */ +-#define PHY_M_EC_MAC_S(x) SHIFT4(x) /* 01X=0; 110=2.5; 111=25 (MHz) */ +- ++#define PHY_M_EC_FIB_AN_ENA BIT_3S /* Fiber Auto-Neg. Enable (88E1011S only) */ ++#define PHY_M_EC_DTE_D_ENA BIT_2S /* DTE Detect Enable (88E1111 only) */ ++#define PHY_M_EC_TX_TIM_CT BIT_1S /* RGMII Tx Timing Control */ ++#define PHY_M_EC_TRANS_DIS BIT_0S /* Transmitter Disable (88E1111 only) */ ++ ++#define PHY_M_EC_M_DSC(x) (SHIFT10(x) & PHY_M_EC_M_DSC_MSK) ++ /* 00=1x; 01=2x; 10=3x; 11=4x */ ++#define PHY_M_EC_S_DSC(x) (SHIFT8(x) & PHY_M_EC_S_DSC_MSK) ++ /* 00=dis; 01=1x; 10=2x; 11=3x */ ++#define PHY_M_EC_MAC_S(x) (SHIFT4(x) & PHY_M_EC_MAC_S_MSK) ++ /* 01X=0; 110=2.5; 111=25 (MHz) */ ++ ++#define PHY_M_EC_DSC_2(x) (SHIFT9(x) & PHY_M_EC_DSC_MSK_2) ++ /* 000=1x; 001=2x; 010=3x; 011=4x */ ++ /* 100=5x; 101=6x; 110=7x; 111=8x */ + #define MAC_TX_CLK_0_MHZ 2 + #define MAC_TX_CLK_2_5_MHZ 6 + #define MAC_TX_CLK_25_MHZ 7 + + /***** PHY_MARV_LED_CTRL 16 bit r/w LED Control Reg *****/ +-#define PHY_M_LEDC_DIS_LED (1<<15) /* Bit 15: Disable LED */ +-#define PHY_M_LEDC_PULS_MSK (7<<12) /* Bit 14..12: Pulse Stretch Mask */ +-#define PHY_M_LEDC_F_INT (1<<11) /* Bit 11: Force Interrupt */ +-#define PHY_M_LEDC_BL_R_MSK (7<<8) /* Bit 10.. 8: Blink Rate Mask */ +- /* Bit 7.. 5: reserved */ +-#define PHY_M_LEDC_LINK_MSK (3<<3) /* Bit 4.. 3: Link Control Mask */ +-#define PHY_M_LEDC_DP_CTRL (1<<2) /* Bit 2: Duplex Control */ +-#define PHY_M_LEDC_RX_CTRL (1<<1) /* Bit 1: Rx activity / Link */ +-#define PHY_M_LEDC_TX_CTRL (1<<0) /* Bit 0: Tx activity / Link */ ++#define PHY_M_LEDC_DIS_LED BIT_15S /* Disable LED */ ++#define PHY_M_LEDC_PULS_MSK (7<<12) /* Bit 14..12: Pulse Stretch Mask */ ++#define PHY_M_LEDC_F_INT BIT_11S /* Force Interrupt */ ++#define PHY_M_LEDC_BL_R_MSK (7<<8) /* Bit 10.. 8: Blink Rate Mask */ ++#define PHY_M_LEDC_DP_C_LSB BIT_7S /* Duplex Control (LSB, 88E1111 only) */ ++#define PHY_M_LEDC_TX_C_LSB BIT_6S /* Tx Control (LSB, 88E1111 only) */ ++#define PHY_M_LEDC_LK_C_MSK (7<<3) /* Bit 5.. 3: Link Control Mask */ ++ /* (88E1111 only) */ ++ /* Bit 7.. 5: reserved (88E1011 only) */ ++#define PHY_M_LEDC_LINK_MSK (3<<3) /* Bit 4.. 3: Link Control Mask */ ++ /* (88E1011 only) */ ++#define PHY_M_LEDC_DP_CTRL BIT_2S /* Duplex Control */ ++#define PHY_M_LEDC_DP_C_MSB BIT_2S /* Duplex Control (MSB, 88E1111 only) */ ++#define PHY_M_LEDC_RX_CTRL BIT_1S /* Rx Activity / Link */ ++#define PHY_M_LEDC_TX_CTRL BIT_0S /* Tx Activity / Link */ ++#define PHY_M_LEDC_TX_C_MSB BIT_0S /* Tx Control (MSB, 88E1111 only) */ + +-#define PHY_M_LED_PULS_DUR(x) SHIFT12(x) /* Pulse Stretch Duration */ ++#define PHY_M_LED_PULS_DUR(x) (SHIFT12(x) & PHY_M_LEDC_PULS_MSK) + +-#define PULS_NO_STR 0 /* no pulse stretching */ +-#define PULS_21MS 1 /* 21 ms to 42 ms */ ++#define PULS_NO_STR 0 /* no pulse stretching */ ++#define PULS_21MS 1 /* 21 ms to 42 ms */ + #define PULS_42MS 2 /* 42 ms to 84 ms */ + #define PULS_84MS 3 /* 84 ms to 170 ms */ + #define PULS_170MS 4 /* 170 ms to 340 ms */ +@@ -1217,7 +1291,7 @@ + #define PULS_670MS 6 /* 670 ms to 1.3 s */ + #define PULS_1300MS 7 /* 1.3 s to 2.7 s */ + +-#define PHY_M_LED_BLINK_RT(x) SHIFT8(x) /* Blink Rate */ ++#define PHY_M_LED_BLINK_RT(x) (SHIFT8(x) & PHY_M_LEDC_BL_R_MSK) + + #define BLINK_42MS 0 /* 42 ms */ + #define BLINK_84MS 1 /* 84 ms */ +@@ -1227,6 +1301,8 @@ + /* values 5 - 7: reserved */ + + /***** PHY_MARV_LED_OVER 16 bit r/w Manual LED Override Reg *****/ ++#define PHY_M_LED_MO_SGMII(x) SHIFT14(x) /* Bit 15..14: SGMII AN Timer */ ++ /* Bit 13..12: reserved */ + #define PHY_M_LED_MO_DUP(x) SHIFT10(x) /* Bit 11..10: Duplex */ + #define PHY_M_LED_MO_10(x) SHIFT8(x) /* Bit 9.. 8: Link 10 */ + #define PHY_M_LED_MO_100(x) SHIFT6(x) /* Bit 7.. 6: Link 100 */ +@@ -1240,30 +1316,35 @@ + #define MO_LED_ON 3 + + /***** PHY_MARV_EXT_CTRL_2 16 bit r/w Ext. PHY Specific Ctrl 2 *****/ +- /* Bit 15.. 7: reserved */ +-#define PHY_M_EC2_FI_IMPED (1<<6) /* Bit 6: Fiber Input Impedance */ +-#define PHY_M_EC2_FO_IMPED (1<<5) /* Bit 5: Fiber Output Impedance */ +-#define PHY_M_EC2_FO_M_CLK (1<<4) /* Bit 4: Fiber Mode Clock Enable */ +-#define PHY_M_EC2_FO_BOOST (1<<3) /* Bit 3: Fiber Output Boost */ ++ /* Bit 15.. 7: reserved */ ++#define PHY_M_EC2_FI_IMPED BIT_6S /* Fiber Input Impedance */ ++#define PHY_M_EC2_FO_IMPED BIT_5S /* Fiber Output Impedance */ ++#define PHY_M_EC2_FO_M_CLK BIT_4S /* Fiber Mode Clock Enable */ ++#define PHY_M_EC2_FO_BOOST BIT_3S /* Fiber Output Boost */ + #define PHY_M_EC2_FO_AM_MSK 7 /* Bit 2.. 0: Fiber Output Amplitude */ + +-/***** PHY_MARV_EXT_P_STAT 16 bit r/w Ext. PHY Specific Status *****/ +-#define PHY_M_FC_AUTO_SEL (1<<15) /* Bit 15: Fiber/Copper Auto Sel. dis. */ +-#define PHY_M_FC_AN_REG_ACC (1<<14) /* Bit 14: Fiber/Copper Autoneg. reg acc */ +-#define PHY_M_FC_RESULUTION (1<<13) /* Bit 13: Fiber/Copper Resulution */ +-#define PHY_M_SER_IF_AN_BP (1<<12) /* Bit 12: Ser IF autoneg. bypass enable */ +-#define PHY_M_SER_IF_BP_ST (1<<11) /* Bit 11: Ser IF autoneg. bypass status */ +-#define PHY_M_IRQ_POLARITY (1<<10) /* Bit 10: IRQ polarity */ +- /* Bit 9..4: reserved */ +-#define PHY_M_UNDOC1 (1<< 7) /* undocumented bit !! */ +-#define PHY_M_MODE_MASK (0xf<<0)/* Bit 3..0: copy of HWCFG MODE[3:0] */ +- ++/***** PHY_MARV_EXT_P_STAT 16 bit r/w Ext. PHY Specific Status *****/ ++#define PHY_M_FC_AUTO_SEL BIT_15S /* Fiber/Copper Auto Sel. Dis. */ ++#define PHY_M_FC_AN_REG_ACC BIT_14S /* Fiber/Copper AN Reg. Access */ ++#define PHY_M_FC_RESOLUTION BIT_13S /* Fiber/Copper Resolution */ ++#define PHY_M_SER_IF_AN_BP BIT_12S /* Ser. IF AN Bypass Enable */ ++#define PHY_M_SER_IF_BP_ST BIT_11S /* Ser. IF AN Bypass Status */ ++#define PHY_M_IRQ_POLARITY BIT_10S /* IRQ polarity */ ++#define PHY_M_DIS_AUT_MED BIT_9S /* Disable Aut. Medium Reg. Selection */ ++ /* (88E1111 only) */ ++ /* Bit 9.. 4: reserved (88E1011 only) */ ++#define PHY_M_UNDOC1 BIT_7S /* undocumented bit !! */ ++#define PHY_M_DTE_POW_STAT BIT_4S /* DTE Power Status (88E1111 only) */ ++#define PHY_M_MODE_MASK 0xf /* Bit 3.. 0: copy of HWCFG MODE[3:0] */ + + /***** PHY_MARV_CABLE_DIAG 16 bit r/o Cable Diagnostic Reg *****/ +-#define PHY_M_CABD_ENA_TEST (1<<15) /* Bit 15: Enable Test */ +-#define PHY_M_CABD_STAT_MSK (3<<13) /* Bit 14..13: Status */ +- /* Bit 12.. 8: reserved */ +-#define PHY_M_CABD_DIST_MSK 0xff /* Bit 7.. 0: Distance */ ++#define PHY_M_CABD_ENA_TEST BIT_15S /* Enable Test (Page 0) */ ++#define PHY_M_CABD_DIS_WAIT BIT_15S /* Disable Waiting Period (Page 1) */ ++ /* (88E1111 only) */ ++#define PHY_M_CABD_STAT_MSK (3<<13) /* Bit 14..13: Status Mask */ ++#define PHY_M_CABD_AMPL_MSK (0x1f<<8) /* Bit 12.. 8: Amplitude Mask */ ++ /* (88E1111 only) */ ++#define PHY_M_CABD_DIST_MSK 0xff /* Bit 7.. 0: Distance Mask */ + + /* values for Cable Diagnostic Status (11=fail; 00=OK; 10=open; 01=short) */ + #define CABD_STAT_NORMAL 0 +@@ -1271,6 +1352,72 @@ + #define CABD_STAT_OPEN 2 + #define CABD_STAT_FAIL 3 + ++/* for 10/100 Fast Ethernet PHY (88E3082 only) */ ++/***** PHY_MARV_FE_LED_PAR 16 bit r/w LED Parallel Select Reg. *****/ ++ /* Bit 15..12: reserved (used internally) */ ++#define PHY_M_FELP_LED2_MSK (0xf<<8) /* Bit 11.. 8: LED2 Mask (LINK) */ ++#define PHY_M_FELP_LED1_MSK (0xf<<4) /* Bit 7.. 4: LED1 Mask (ACT) */ ++#define PHY_M_FELP_LED0_MSK 0xf /* Bit 3.. 0: LED0 Mask (SPEED) */ ++ ++#define PHY_M_FELP_LED2_CTRL(x) (SHIFT8(x) & PHY_M_FELP_LED2_MSK) ++#define PHY_M_FELP_LED1_CTRL(x) (SHIFT4(x) & PHY_M_FELP_LED1_MSK) ++#define PHY_M_FELP_LED0_CTRL(x) (SHIFT0(x) & PHY_M_FELP_LED0_MSK) ++ ++#define LED_PAR_CTRL_COLX 0x00 ++#define LED_PAR_CTRL_ERROR 0x01 ++#define LED_PAR_CTRL_DUPLEX 0x02 ++#define LED_PAR_CTRL_DP_COL 0x03 ++#define LED_PAR_CTRL_SPEED 0x04 ++#define LED_PAR_CTRL_LINK 0x05 ++#define LED_PAR_CTRL_TX 0x06 ++#define LED_PAR_CTRL_RX 0x07 ++#define LED_PAR_CTRL_ACT 0x08 ++#define LED_PAR_CTRL_LNK_RX 0x09 ++#define LED_PAR_CTRL_LNK_AC 0x0a ++#define LED_PAR_CTRL_ACT_BL 0x0b ++#define LED_PAR_CTRL_TX_BL 0x0c ++#define LED_PAR_CTRL_RX_BL 0x0d ++#define LED_PAR_CTRL_COL_BL 0x0e ++#define LED_PAR_CTRL_INACT 0x0f ++ ++/***** PHY_MARV_FE_SPEC_2 16 bit r/w Specific Control Reg. 2 *****/ ++#define PHY_M_FESC_DIS_WAIT BIT_2S /* Disable TDR Waiting Period */ ++#define PHY_M_FESC_ENA_MCLK BIT_1S /* Enable MAC Rx Clock in sleep mode */ ++#define PHY_M_FESC_SEL_CL_A BIT_0S /* Select Class A driver (100B-TX) */ ++ ++/* for Yukon-2 Gigabit Ethernet PHY (88E1112 only) */ ++/***** PHY_MARV_PHY_CTRL (page 2) 16 bit r/w MAC Specific Ctrl *****/ ++#define PHY_M_MAC_MD_MSK (7<<7) /* Bit 9.. 7: Mode Select Mask */ ++#define PHY_M_MAC_MD_AUTO 3 /* Auto Copper/1000Base-X */ ++#define PHY_M_MAC_MD_COPPER 5 /* Copper only */ ++#define PHY_M_MAC_MD_1000BX 7 /* 1000Base-X only */ ++#define PHY_M_MAC_MODE_SEL(x) (SHIFT7(x) & PHY_M_MAC_MD_MSK) ++ ++/***** PHY_MARV_PHY_CTRL (page 3) 16 bit r/w LED Control Reg. *****/ ++#define PHY_M_LEDC_LOS_MSK (0xf<<12) /* Bit 15..12: LOS LED Ctrl. Mask */ ++#define PHY_M_LEDC_INIT_MSK (0xf<<8) /* Bit 11.. 8: INIT LED Ctrl. Mask */ ++#define PHY_M_LEDC_STA1_MSK (0xf<<4) /* Bit 7.. 4: STAT1 LED Ctrl. Mask */ ++#define PHY_M_LEDC_STA0_MSK 0xf /* Bit 3.. 0: STAT0 LED Ctrl. Mask */ ++ ++#define PHY_M_LEDC_LOS_CTRL(x) (SHIFT12(x) & PHY_M_LEDC_LOS_MSK) ++#define PHY_M_LEDC_INIT_CTRL(x) (SHIFT8(x) & PHY_M_LEDC_INIT_MSK) ++#define PHY_M_LEDC_STA1_CTRL(x) (SHIFT4(x) & PHY_M_LEDC_STA1_MSK) ++#define PHY_M_LEDC_STA0_CTRL(x) (SHIFT0(x) & PHY_M_LEDC_STA0_MSK) ++ ++/***** PHY_MARV_PHY_STAT (page 3) 16 bit r/w Polarity Control Reg. *****/ ++#define PHY_M_POLC_LS1M_MSK (0xf<<12) /* Bit 15..12: LOS,STAT1 Mix % Mask */ ++#define PHY_M_POLC_IS0M_MSK (0xf<<8) /* Bit 11.. 8: INIT,STAT0 Mix % Mask */ ++#define PHY_M_POLC_LOS_MSK (0x3<<6) /* Bit 7.. 6: LOS Pol. Ctrl. Mask */ ++#define PHY_M_POLC_INIT_MSK (0x3<<4) /* Bit 5.. 4: INIT Pol. Ctrl. Mask */ ++#define PHY_M_POLC_STA1_MSK (0x3<<2) /* Bit 3.. 2: STAT1 Pol. Ctrl. Mask */ ++#define PHY_M_POLC_STA0_MSK 0x3 /* Bit 1.. 0: STAT0 Pol. Ctrl. Mask */ ++ ++#define PHY_M_POLC_LS1_P_MIX(x) (SHIFT12(x) & PHY_M_POLC_LS1M_MSK) ++#define PHY_M_POLC_IS0_P_MIX(x) (SHIFT8(x) & PHY_M_POLC_IS0M_MSK) ++#define PHY_M_POLC_LOS_CTRL(x) (SHIFT6(x) & PHY_M_POLC_LOS_MSK) ++#define PHY_M_POLC_INIT_CTRL(x) (SHIFT4(x) & PHY_M_POLC_INIT_MSK) ++#define PHY_M_POLC_STA1_CTRL(x) (SHIFT2(x) & PHY_M_POLC_STA1_MSK) ++#define PHY_M_POLC_STA0_CTRL(x) (SHIFT0(x) & PHY_M_POLC_STA0_MSK) + + /* + * GMAC registers +@@ -1431,141 +1578,159 @@ + */ + + /* GM_GP_STAT 16 bit r/o General Purpose Status Register */ +-#define GM_GPSR_SPEED (1<<15) /* Bit 15: Port Speed (1 = 100 Mbps) */ +-#define GM_GPSR_DUPLEX (1<<14) /* Bit 14: Duplex Mode (1 = Full) */ +-#define GM_GPSR_FC_TX_DIS (1<<13) /* Bit 13: Tx Flow-Control Mode Disabled */ +-#define GM_GPSR_LINK_UP (1<<12) /* Bit 12: Link Up Status */ +-#define GM_GPSR_PAUSE (1<<11) /* Bit 11: Pause State */ +-#define GM_GPSR_TX_ACTIVE (1<<10) /* Bit 10: Tx in Progress */ +-#define GM_GPSR_EXC_COL (1<<9) /* Bit 9: Excessive Collisions Occured */ +-#define GM_GPSR_LAT_COL (1<<8) /* Bit 8: Late Collisions Occured */ +- /* Bit 7..6: reserved */ +-#define GM_GPSR_PHY_ST_CH (1<<5) /* Bit 5: PHY Status Change */ +-#define GM_GPSR_GIG_SPEED (1<<4) /* Bit 4: Gigabit Speed (1 = 1000 Mbps) */ +-#define GM_GPSR_PART_MODE (1<<3) /* Bit 3: Partition mode */ +-#define GM_GPSR_FC_RX_DIS (1<<2) /* Bit 2: Rx Flow-Control Mode Disabled */ +-#define GM_GPSR_PROM_EN (1<<1) /* Bit 1: Promiscuous Mode Enabled */ +- /* Bit 0: reserved */ +- ++#define GM_GPSR_SPEED BIT_15S /* Port Speed (1 = 100 Mbps) */ ++#define GM_GPSR_DUPLEX BIT_14S /* Duplex Mode (1 = Full) */ ++#define GM_GPSR_FC_TX_DIS BIT_13S /* Tx Flow-Control Mode Disabled */ ++#define GM_GPSR_LINK_UP BIT_12S /* Link Up Status */ ++#define GM_GPSR_PAUSE BIT_11S /* Pause State */ ++#define GM_GPSR_TX_ACTIVE BIT_10S /* Tx in Progress */ ++#define GM_GPSR_EXC_COL BIT_9S /* Excessive Collisions Occured */ ++#define GM_GPSR_LAT_COL BIT_8S /* Late Collisions Occured */ ++ /* Bit 7.. 6: reserved */ ++#define GM_GPSR_PHY_ST_CH BIT_5S /* PHY Status Change */ ++#define GM_GPSR_GIG_SPEED BIT_4S /* Gigabit Speed (1 = 1000 Mbps) */ ++#define GM_GPSR_PART_MODE BIT_3S /* Partition mode */ ++#define GM_GPSR_FC_RX_DIS BIT_2S /* Rx Flow-Control Mode Disabled */ ++ /* Bit 2.. 0: reserved */ ++ + /* GM_GP_CTRL 16 bit r/w General Purpose Control Register */ +- /* Bit 15: reserved */ +-#define GM_GPCR_PROM_ENA (1<<14) /* Bit 14: Enable Promiscuous Mode */ +-#define GM_GPCR_FC_TX_DIS (1<<13) /* Bit 13: Disable Tx Flow-Control Mode */ +-#define GM_GPCR_TX_ENA (1<<12) /* Bit 12: Enable Transmit */ +-#define GM_GPCR_RX_ENA (1<<11) /* Bit 11: Enable Receive */ +-#define GM_GPCR_BURST_ENA (1<<10) /* Bit 10: Enable Burst Mode */ +-#define GM_GPCR_LOOP_ENA (1<<9) /* Bit 9: Enable MAC Loopback Mode */ +-#define GM_GPCR_PART_ENA (1<<8) /* Bit 8: Enable Partition Mode */ +-#define GM_GPCR_GIGS_ENA (1<<7) /* Bit 7: Gigabit Speed (1000 Mbps) */ +-#define GM_GPCR_FL_PASS (1<<6) /* Bit 6: Force Link Pass */ +-#define GM_GPCR_DUP_FULL (1<<5) /* Bit 5: Full Duplex Mode */ +-#define GM_GPCR_FC_RX_DIS (1<<4) /* Bit 4: Disable Rx Flow-Control Mode */ +-#define GM_GPCR_SPEED_100 (1<<3) /* Bit 3: Port Speed 100 Mbps */ +-#define GM_GPCR_AU_DUP_DIS (1<<2) /* Bit 2: Disable Auto-Update Duplex */ +-#define GM_GPCR_AU_FCT_DIS (1<<1) /* Bit 1: Disable Auto-Update Flow-C. */ +-#define GM_GPCR_AU_SPD_DIS (1<<0) /* Bit 0: Disable Auto-Update Speed */ ++#define GM_GPCR_RMII_PH_ENA BIT_15S /* Enable RMII for PHY (Yukon-FE only) */ ++#define GM_GPCR_RMII_LB_ENA BIT_14S /* Enable RMII Loopback (Yukon-FE only) */ ++#define GM_GPCR_FC_TX_DIS BIT_13S /* Disable Tx Flow-Control Mode */ ++#define GM_GPCR_TX_ENA BIT_12S /* Enable Transmit */ ++#define GM_GPCR_RX_ENA BIT_11S /* Enable Receive */ ++ /* Bit 10: reserved */ ++#define GM_GPCR_LOOP_ENA BIT_9S /* Enable MAC Loopback Mode */ ++#define GM_GPCR_PART_ENA BIT_8S /* Enable Partition Mode */ ++#define GM_GPCR_GIGS_ENA BIT_7S /* Gigabit Speed (1000 Mbps) */ ++#define GM_GPCR_FL_PASS BIT_6S /* Force Link Pass */ ++#define GM_GPCR_DUP_FULL BIT_5S /* Full Duplex Mode */ ++#define GM_GPCR_FC_RX_DIS BIT_4S /* Disable Rx Flow-Control Mode */ ++#define GM_GPCR_SPEED_100 BIT_3S /* Port Speed 100 Mbps */ ++#define GM_GPCR_AU_DUP_DIS BIT_2S /* Disable Auto-Update Duplex */ ++#define GM_GPCR_AU_FCT_DIS BIT_1S /* Disable Auto-Update Flow-C. */ ++#define GM_GPCR_AU_SPD_DIS BIT_0S /* Disable Auto-Update Speed */ + + #define GM_GPCR_SPEED_1000 (GM_GPCR_GIGS_ENA | GM_GPCR_SPEED_100) + #define GM_GPCR_AU_ALL_DIS (GM_GPCR_AU_DUP_DIS | GM_GPCR_AU_FCT_DIS |\ + GM_GPCR_AU_SPD_DIS) +- ++ + /* GM_TX_CTRL 16 bit r/w Transmit Control Register */ +-#define GM_TXCR_FORCE_JAM (1<<15) /* Bit 15: Force Jam / Flow-Control */ +-#define GM_TXCR_CRC_DIS (1<<14) /* Bit 14: Disable insertion of CRC */ +-#define GM_TXCR_PAD_DIS (1<<13) /* Bit 13: Disable padding of packets */ +-#define GM_TXCR_COL_THR_MSK (1<<10) /* Bit 12..10: Collision Threshold */ ++#define GM_TXCR_FORCE_JAM BIT_15S /* Force Jam / Flow-Control */ ++#define GM_TXCR_CRC_DIS BIT_14S /* Disable insertion of CRC */ ++#define GM_TXCR_PAD_DIS BIT_13S /* Disable padding of packets */ ++#define GM_TXCR_COL_THR_MSK (7<<10) /* Bit 12..10: Collision Threshold Mask */ ++ /* Bit 9.. 8: reserved */ ++#define GM_TXCR_PAD_PAT_MSK 0xff /* Bit 7.. 0: Padding Pattern Mask */ ++ /* (Yukon-2 only) */ + + #define TX_COL_THR(x) (SHIFT10(x) & GM_TXCR_COL_THR_MSK) + + #define TX_COL_DEF 0x04 +- ++ + /* GM_RX_CTRL 16 bit r/w Receive Control Register */ +-#define GM_RXCR_UCF_ENA (1<<15) /* Bit 15: Enable Unicast filtering */ +-#define GM_RXCR_MCF_ENA (1<<14) /* Bit 14: Enable Multicast filtering */ +-#define GM_RXCR_CRC_DIS (1<<13) /* Bit 13: Remove 4-byte CRC */ +-#define GM_RXCR_PASS_FC (1<<12) /* Bit 12: Pass FC packets to FIFO */ +- ++#define GM_RXCR_UCF_ENA BIT_15S /* Enable Unicast filtering */ ++#define GM_RXCR_MCF_ENA BIT_14S /* Enable Multicast filtering */ ++#define GM_RXCR_CRC_DIS BIT_13S /* Remove 4-byte CRC */ ++#define GM_RXCR_PASS_FC BIT_12S /* Pass FC packets to FIFO (Yukon-1 only) */ ++ /* Bit 11.. 0: reserved */ ++ + /* GM_TX_PARAM 16 bit r/w Transmit Parameter Register */ +-#define GM_TXPA_JAMLEN_MSK (0x03<<14) /* Bit 15..14: Jam Length */ +-#define GM_TXPA_JAMIPG_MSK (0x1f<<9) /* Bit 13..9: Jam IPG */ +-#define GM_TXPA_JAMDAT_MSK (0x1f<<4) /* Bit 8..4: IPG Jam to Data */ +- /* Bit 3..0: reserved */ ++#define GM_TXPA_JAMLEN_MSK (3<<14) /* Bit 15..14: Jam Length Mask */ ++#define GM_TXPA_JAMIPG_MSK (0x1f<<9) /* Bit 13.. 9: Jam IPG Mask */ ++#define GM_TXPA_JAMDAT_MSK (0x1f<<4) /* Bit 8.. 4: IPG Jam to Data Mask */ ++#define GM_TXPA_BO_LIM_MSK 0x0f /* Bit 3.. 0: Backoff Limit Mask */ ++ /* (Yukon-2 only) */ + + #define TX_JAM_LEN_VAL(x) (SHIFT14(x) & GM_TXPA_JAMLEN_MSK) + #define TX_JAM_IPG_VAL(x) (SHIFT9(x) & GM_TXPA_JAMIPG_MSK) + #define TX_IPG_JAM_DATA(x) (SHIFT4(x) & GM_TXPA_JAMDAT_MSK) ++#define TX_BACK_OFF_LIM(x) ((x) & GM_TXPA_BO_LIM_MSK) + + #define TX_JAM_LEN_DEF 0x03 + #define TX_JAM_IPG_DEF 0x0b + #define TX_IPG_JAM_DEF 0x1c ++#define TX_BOF_LIM_DEF 0x04 + + /* GM_SERIAL_MODE 16 bit r/w Serial Mode Register */ +-#define GM_SMOD_DATABL_MSK (0x1f<<11) /* Bit 15..11: Data Blinder (r/o) */ +-#define GM_SMOD_LIMIT_4 (1<<10) /* Bit 10: 4 consecutive Tx trials */ +-#define GM_SMOD_VLAN_ENA (1<<9) /* Bit 9: Enable VLAN (Max. Frame Len) */ +-#define GM_SMOD_JUMBO_ENA (1<<8) /* Bit 8: Enable Jumbo (Max. Frame Len) */ +- /* Bit 7..5: reserved */ +-#define GM_SMOD_IPG_MSK 0x1f /* Bit 4..0: Inter-Packet Gap (IPG) */ +- ++#define GM_SMOD_DATABL_MSK (0x1f<<11) /* Bit 15..11: Data Blinder */ ++ /* r/o on Yukon, r/w on Yukon-EC */ ++#define GM_SMOD_LIMIT_4 BIT_10S /* 4 consecutive Tx trials */ ++#define GM_SMOD_VLAN_ENA BIT_9S /* Enable VLAN (Max. Frame Len) */ ++#define GM_SMOD_JUMBO_ENA BIT_8S /* Enable Jumbo (Max. Frame Len) */ ++ /* Bit 7.. 5: reserved */ ++#define GM_SMOD_IPG_MSK 0x1f /* Bit 4.. 0: Inter-Packet Gap (IPG) */ ++ + #define DATA_BLIND_VAL(x) (SHIFT11(x) & GM_SMOD_DATABL_MSK) +-#define DATA_BLIND_DEF 0x04 ++#define IPG_DATA_VAL(x) ((x) & GM_SMOD_IPG_MSK) + +-#define IPG_DATA_VAL(x) (x & GM_SMOD_IPG_MSK) ++#define DATA_BLIND_DEF 0x04 + #define IPG_DATA_DEF 0x1e + + /* GM_SMI_CTRL 16 bit r/w SMI Control Register */ + #define GM_SMI_CT_PHY_A_MSK (0x1f<<11) /* Bit 15..11: PHY Device Address */ + #define GM_SMI_CT_REG_A_MSK (0x1f<<6) /* Bit 10.. 6: PHY Register Address */ +-#define GM_SMI_CT_OP_RD (1<<5) /* Bit 5: OpCode Read (0=Write)*/ +-#define GM_SMI_CT_RD_VAL (1<<4) /* Bit 4: Read Valid (Read completed) */ +-#define GM_SMI_CT_BUSY (1<<3) /* Bit 3: Busy (Operation in progress) */ +- /* Bit 2..0: reserved */ +- ++#define GM_SMI_CT_OP_RD BIT_5S /* OpCode Read (0=Write)*/ ++#define GM_SMI_CT_RD_VAL BIT_4S /* Read Valid (Read completed) */ ++#define GM_SMI_CT_BUSY BIT_3S /* Busy (Operation in progress) */ ++ /* Bit 2.. 0: reserved */ ++ + #define GM_SMI_CT_PHY_AD(x) (SHIFT11(x) & GM_SMI_CT_PHY_A_MSK) + #define GM_SMI_CT_REG_AD(x) (SHIFT6(x) & GM_SMI_CT_REG_A_MSK) + + /* GM_PHY_ADDR 16 bit r/w GPHY Address Register */ +- /* Bit 15..6: reserved */ +-#define GM_PAR_MIB_CLR (1<<5) /* Bit 5: Set MIB Clear Counter Mode */ +-#define GM_PAR_MIB_TST (1<<4) /* Bit 4: MIB Load Counter (Test Mode) */ +- /* Bit 3..0: reserved */ +- ++ /* Bit 15.. 6: reserved */ ++#define GM_PAR_MIB_CLR BIT_5S /* Set MIB Clear Counter Mode */ ++#define GM_PAR_MIB_TST BIT_4S /* MIB Load Counter (Test Mode) */ ++ /* Bit 3.. 0: reserved */ ++ + /* Receive Frame Status Encoding */ +-#define GMR_FS_LEN (0xffffUL<<16) /* Bit 31..16: Rx Frame Length */ ++#define GMR_FS_LEN_MSK (0xffffUL<<16) /* Bit 31..16: Rx Frame Length */ + /* Bit 15..14: reserved */ +-#define GMR_FS_VLAN (1L<<13) /* Bit 13: VLAN Packet */ +-#define GMR_FS_JABBER (1L<<12) /* Bit 12: Jabber Packet */ +-#define GMR_FS_UN_SIZE (1L<<11) /* Bit 11: Undersize Packet */ +-#define GMR_FS_MC (1L<<10) /* Bit 10: Multicast Packet */ +-#define GMR_FS_BC (1L<<9) /* Bit 9: Broadcast Packet */ +-#define GMR_FS_RX_OK (1L<<8) /* Bit 8: Receive OK (Good Packet) */ +-#define GMR_FS_GOOD_FC (1L<<7) /* Bit 7: Good Flow-Control Packet */ +-#define GMR_FS_BAD_FC (1L<<6) /* Bit 6: Bad Flow-Control Packet */ +-#define GMR_FS_MII_ERR (1L<<5) /* Bit 5: MII Error */ +-#define GMR_FS_LONG_ERR (1L<<4) /* Bit 4: Too Long Packet */ +-#define GMR_FS_FRAGMENT (1L<<3) /* Bit 3: Fragment */ ++#define GMR_FS_VLAN BIT_13 /* VLAN Packet */ ++#define GMR_FS_JABBER BIT_12 /* Jabber Packet */ ++#define GMR_FS_UN_SIZE BIT_11 /* Undersize Packet */ ++#define GMR_FS_MC BIT_10 /* Multicast Packet */ ++#define GMR_FS_BC BIT_9 /* Broadcast Packet */ ++#define GMR_FS_RX_OK BIT_8 /* Receive OK (Good Packet) */ ++#define GMR_FS_GOOD_FC BIT_7 /* Good Flow-Control Packet */ ++#define GMR_FS_BAD_FC BIT_6 /* Bad Flow-Control Packet */ ++#define GMR_FS_MII_ERR BIT_5 /* MII Error */ ++#define GMR_FS_LONG_ERR BIT_4 /* Too Long Packet */ ++#define GMR_FS_FRAGMENT BIT_3 /* Fragment */ + /* Bit 2: reserved */ +-#define GMR_FS_CRC_ERR (1L<<1) /* Bit 1: CRC Error */ +-#define GMR_FS_RX_FF_OV (1L<<0) /* Bit 0: Rx FIFO Overflow */ ++#define GMR_FS_CRC_ERR BIT_1 /* CRC Error */ ++#define GMR_FS_RX_FF_OV BIT_0 /* Rx FIFO Overflow */ ++ ++#define GMR_FS_LEN_SHIFT 16 + + /* + * GMR_FS_ANY_ERR (analogous to XMR_FS_ANY_ERR) + */ +-#define GMR_FS_ANY_ERR (GMR_FS_CRC_ERR | \ +- GMR_FS_LONG_ERR | \ ++#ifdef SK_DIAG ++#define GMR_FS_ANY_ERR ( \ ++ GMR_FS_RX_FF_OV | \ ++ GMR_FS_CRC_ERR | \ ++ GMR_FS_FRAGMENT | \ + GMR_FS_MII_ERR | \ + GMR_FS_BAD_FC | \ + GMR_FS_GOOD_FC | \ + GMR_FS_JABBER) +- +-/* Rx GMAC FIFO Flush Mask (default) */ +-#define RX_FF_FL_DEF_MSK (GMR_FS_CRC_ERR | \ ++#else ++#define GMR_FS_ANY_ERR ( \ + GMR_FS_RX_FF_OV | \ ++ GMR_FS_CRC_ERR | \ ++ GMR_FS_FRAGMENT | \ ++ GMR_FS_LONG_ERR | \ + GMR_FS_MII_ERR | \ + GMR_FS_BAD_FC | \ + GMR_FS_GOOD_FC | \ + GMR_FS_UN_SIZE | \ + GMR_FS_JABBER) ++#endif ++ ++/* Rx GMAC FIFO Flush Mask (default) */ ++#define RX_FF_FL_DEF_MSK GMR_FS_ANY_ERR + + /* typedefs *******************************************************************/ + +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/Makefile linux-2.6.9.new/drivers/net/sk98lin/Makefile +--- linux-2.6.9.old/drivers/net/sk98lin/Makefile 2004-10-19 05:53:05.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/Makefile 2006-12-07 14:35:03.000000000 +0800 +@@ -1,6 +1,59 @@ ++#****************************************************************************** + # +-# Makefile for the SysKonnect SK-98xx device driver. ++# Name: skge.c ++# Project: GEnesis, PCI Gigabit Ethernet Adapter ++# Version: $Revision: 1.9.2.1 $ ++# Date: $Date: 2005/04/11 09:01:18 $ ++# Purpose: The main driver source module + # ++#****************************************************************************** ++ ++#****************************************************************************** ++# ++# (C)Copyright 1998-2002 SysKonnect GmbH. ++# (C)Copyright 2002-2005 Marvell. ++# ++# Makefile for Marvell Yukon chipset and SysKonnect Gigabit Ethernet ++# Server Adapter driver. (Kernel 2.6) ++# ++# Author: Mirko Lindner (mlindner@syskonnect.de) ++# Ralph Roesler (rroesler@syskonnect.de) ++# ++# Address all question to: linux@syskonnect.de ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# The information in this file is provided "AS IS" without warranty. ++# ++#****************************************************************************** ++ ++#****************************************************************************** ++# ++# History: ++# ++# $Log: Makefile2.6,v $ ++# Revision 1.9.2.1 2005/04/11 09:01:18 mlindner ++# Fix: Copyright year changed ++# ++# Revision 1.9 2004/07/13 15:54:50 rroesler ++# Add: file skethtool.c ++# Fix: corrected header regarding copyright ++# Fix: minor typos corrected ++# ++# Revision 1.8 2004/06/08 08:39:38 mlindner ++# Fix: Add CONFIG_SK98LIN_ZEROCOPY as default ++# ++# Revision 1.7 2004/06/03 16:06:56 mlindner ++# Fix: Added compile flag SK_DIAG_SUPPORT ++# ++# Revision 1.6 2004/06/02 08:02:59 mlindner ++# Add: Changed header information and inserted a GPL statement ++# ++# ++#****************************************************************************** + + + # +@@ -13,13 +66,16 @@ + obj-$(CONFIG_SK98LIN) += sk98lin.o + sk98lin-objs := \ + skge.o \ ++ sky2.o \ ++ skethtool.o \ ++ sky2le.o \ + skdim.o \ + skaddr.o \ + skgehwt.o \ + skgeinit.o \ + skgepnmi.o \ + skgesirq.o \ +- ski2c.o \ ++ sktwsi.o \ + sklm80.o \ + skqueue.o \ + skrlmt.o \ +@@ -76,13 +132,11 @@ + # SK_DBGCAT_DRV_INT_SRC 0x04000000 interrupts sources + # SK_DBGCAT_DRV_EVENT 0x08000000 driver events + +-EXTRA_CFLAGS += -Idrivers/net/sk98lin -DSK_DIAG_SUPPORT -DSK_USE_CSUM -DGENESIS -DYUKON $(DBGDEF) $(SKPARAM) ++EXTRA_CFLAGS += -Idrivers/net/sk98lin -DSK_USE_CSUM -DSK_DIAG_SUPPORT \ ++ -DGENESIS -DYUKON -DYUK2 -DCONFIG_SK98LIN_ZEROCOPY \ ++ $(DBGDEF) $(SKPARAM) + + clean: + rm -f core *.o *.a *.s + + +- +- +- +- +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skaddr.c linux-2.6.9.new/drivers/net/sk98lin/skaddr.c +--- linux-2.6.9.old/drivers/net/sk98lin/skaddr.c 2004-10-19 05:54:32.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skaddr.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skaddr.c + * Project: Gigabit Ethernet Adapters, ADDR-Module +- * Version: $Revision: 1.52 $ +- * Date: $Date: 2003/06/02 13:46:15 $ ++ * Version: $Revision: 2.6 $ ++ * Date: $Date: 2005/05/11 10:05:14 $ + * Purpose: Manage Addresses (Multicast and Unicast) and Promiscuous Mode. + * + ******************************************************************************/ +@@ -44,7 +44,7 @@ + + #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: skaddr.c,v 1.52 2003/06/02 13:46:15 tschilli Exp $ (C) Marvell."; ++ "@(#) $Id: skaddr.c,v 2.6 2005/05/11 10:05:14 tschilli Exp $ (C) Marvell."; + #endif /* DEBUG ||!LINT || !SK_SLIM */ + + #define __SKADDR_C +@@ -191,11 +191,11 @@ + pAC->Addr.Port[pAC->Addr.Net[0].ActivePort].Exact[0] = + pAC->Addr.Net[0].CurrentMacAddress; + #if SK_MAX_NETS > 1 +- /* Set logical MAC address for net 2 to (log | 3). */ ++ /* Set logical MAC address for net 2 to. */ + if (!pAC->Addr.Net[1].CurrentMacAddressSet) { + pAC->Addr.Net[1].PermanentMacAddress = + pAC->Addr.Net[0].PermanentMacAddress; +- pAC->Addr.Net[1].PermanentMacAddress.a[5] |= 3; ++ pAC->Addr.Net[1].PermanentMacAddress.a[5] += 1; + /* Set the current logical MAC address to the permanent one. */ + pAC->Addr.Net[1].CurrentMacAddress = + pAC->Addr.Net[1].PermanentMacAddress; +@@ -213,7 +213,7 @@ + pAC->Addr.Net[i].PermanentMacAddress.a[2], + pAC->Addr.Net[i].PermanentMacAddress.a[3], + pAC->Addr.Net[i].PermanentMacAddress.a[4], +- pAC->Addr.Net[i].PermanentMacAddress.a[5])) ++ pAC->Addr.Net[i].PermanentMacAddress.a[5])); + + SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_INIT, + ("Logical MAC Address (Net%d): %02X %02X %02X %02X %02X %02X\n", +@@ -223,7 +223,7 @@ + pAC->Addr.Net[i].CurrentMacAddress.a[2], + pAC->Addr.Net[i].CurrentMacAddress.a[3], + pAC->Addr.Net[i].CurrentMacAddress.a[4], +- pAC->Addr.Net[i].CurrentMacAddress.a[5])) ++ pAC->Addr.Net[i].CurrentMacAddress.a[5])); + } + #endif /* DEBUG */ + +@@ -266,7 +266,7 @@ + pAPort->PermanentMacAddress.a[2], + pAPort->PermanentMacAddress.a[3], + pAPort->PermanentMacAddress.a[4], +- pAPort->PermanentMacAddress.a[5])) ++ pAPort->PermanentMacAddress.a[5])); + + SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_INIT, + ("SkAddrInit: Physical MAC Address: %02X %02X %02X %02X %02X %02X\n", +@@ -275,7 +275,7 @@ + pAPort->CurrentMacAddress.a[2], + pAPort->CurrentMacAddress.a[3], + pAPort->CurrentMacAddress.a[4], +- pAPort->CurrentMacAddress.a[5])) ++ pAPort->CurrentMacAddress.a[5])); + #endif /* DEBUG */ + } + /* pAC->Addr.InitDone = SK_INIT_IO; */ +@@ -339,10 +339,14 @@ + } + + if (pAC->GIni.GIGenesis) { ++#ifdef GENESIS + ReturnCode = SkAddrXmacMcClear(pAC, IoC, PortNumber, Flags); ++#endif + } + else { ++#ifdef YUKON + ReturnCode = SkAddrGmacMcClear(pAC, IoC, PortNumber, Flags); ++#endif + } + + return (ReturnCode); +@@ -352,7 +356,7 @@ + #endif /* !SK_SLIM */ + + #ifndef SK_SLIM +- ++#ifdef GENESIS + /****************************************************************************** + * + * SkAddrXmacMcClear - clear the multicast table +@@ -404,11 +408,11 @@ + return (SK_ADDR_SUCCESS); + + } /* SkAddrXmacMcClear */ +- ++#endif /* GENESIS */ + #endif /* !SK_SLIM */ + + #ifndef SK_SLIM +- ++#ifdef YUKON + /****************************************************************************** + * + * SkAddrGmacMcClear - clear the multicast table +@@ -447,7 +451,7 @@ + pAC->Addr.Port[PortNumber].InexactFilter.Bytes[4], + pAC->Addr.Port[PortNumber].InexactFilter.Bytes[5], + pAC->Addr.Port[PortNumber].InexactFilter.Bytes[6], +- pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7])) ++ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7])); + #endif /* DEBUG */ + + /* Clear InexactFilter */ +@@ -489,7 +493,7 @@ + pAC->Addr.Port[PortNumber].InexactFilter.Bytes[4], + pAC->Addr.Port[PortNumber].InexactFilter.Bytes[5], + pAC->Addr.Port[PortNumber].InexactFilter.Bytes[6], +- pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7])) ++ pAC->Addr.Port[PortNumber].InexactFilter.Bytes[7])); + #endif /* DEBUG */ + + if (!(Flags & SK_MC_SW_ONLY)) { +@@ -499,9 +503,10 @@ + return (SK_ADDR_SUCCESS); + + } /* SkAddrGmacMcClear */ ++#endif /* YUKON */ + + #ifndef SK_ADDR_CHEAT +- ++#ifdef GENESIS + /****************************************************************************** + * + * SkXmacMcHash - hash multicast address +@@ -538,8 +543,9 @@ + return (Crc & ((1 << HASH_BITS) - 1)); + + } /* SkXmacMcHash */ ++#endif /* GENESIS */ + +- ++#ifdef YUKON + /****************************************************************************** + * + * SkGmacMcHash - hash multicast address +@@ -597,7 +603,7 @@ + return (Crc & ((1 << HASH_BITS) - 1)); + + } /* SkGmacMcHash */ +- ++#endif /* YUKON */ + #endif /* !SK_ADDR_CHEAT */ + + /****************************************************************************** +@@ -638,17 +644,21 @@ + } + + if (pAC->GIni.GIGenesis) { ++#ifdef GENESIS + ReturnCode = SkAddrXmacMcAdd(pAC, IoC, PortNumber, pMc, Flags); ++#endif + } + else { ++#ifdef YUKON + ReturnCode = SkAddrGmacMcAdd(pAC, IoC, PortNumber, pMc, Flags); ++#endif + } + + return (ReturnCode); + + } /* SkAddrMcAdd */ + +- ++#ifdef GENESIS + /****************************************************************************** + * + * SkAddrXmacMcAdd - add a multicast address to a port +@@ -758,8 +768,9 @@ + } + + } /* SkAddrXmacMcAdd */ ++#endif /* GENESIS */ + +- ++#ifdef YUKON + /****************************************************************************** + * + * SkAddrGmacMcAdd - add a multicast address to a port +@@ -821,7 +832,7 @@ + pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[4], + pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[5], + pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[6], +- pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[7])) ++ pAC->Addr.Port[PortNumber].InexactRlmtFilter.Bytes[7])); + #endif /* DEBUG */ + } + else { /* not permanent => DRV */ +@@ -845,7 +856,7 @@ + pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[4], + pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[5], + pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[6], +- pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[7])) ++ pAC->Addr.Port[PortNumber].InexactDrvFilter.Bytes[7])); + #endif /* DEBUG */ + } + +@@ -860,7 +871,7 @@ + return (SK_MC_FILTERING_INEXACT); + + } /* SkAddrGmacMcAdd */ +- ++#endif /* YUKON */ + #endif /* !SK_SLIM */ + + /****************************************************************************** +@@ -892,7 +903,7 @@ + SK_IOC IoC, /* I/O context */ + SK_U32 PortNumber) /* Port Number */ + { +- int ReturnCode = 0; ++ int ReturnCode = SK_ADDR_ILLEGAL_PORT; + #if (!defined(SK_SLIM) || defined(DEBUG)) + if (PortNumber >= (SK_U32) pAC->GIni.GIMacsFound) { + return (SK_ADDR_ILLEGAL_PORT); +@@ -948,13 +959,13 @@ + SK_ADDR_PORT *pAPort; + + SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL, +- ("SkAddrXmacMcUpdate on Port %u.\n", PortNumber)) ++ ("SkAddrXmacMcUpdate on Port %u.\n", PortNumber)); + + pAPort = &pAC->Addr.Port[PortNumber]; + + #ifdef DEBUG + SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL, +- ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber])) ++ ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber])); + #endif /* DEBUG */ + + /* Start with 0 to also program the logical MAC address. */ +@@ -1043,7 +1054,7 @@ + pAPort->Exact[i].a[2], + pAPort->Exact[i].a[3], + pAPort->Exact[i].a[4], +- pAPort->Exact[i].a[5])) ++ pAPort->Exact[i].a[5])); + } + #endif /* DEBUG */ + +@@ -1095,13 +1106,13 @@ + SK_ADDR_PORT *pAPort; + + SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL, +- ("SkAddrGmacMcUpdate on Port %u.\n", PortNumber)) ++ ("SkAddrGmacMcUpdate on Port %u.\n", PortNumber)); + + pAPort = &pAC->Addr.Port[PortNumber]; + + #ifdef DEBUG + SK_DBG_MSG(pAC,SK_DBGMOD_ADDR, SK_DBGCAT_CTRL, +- ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber])) ++ ("Next0 on Port %d: %d\n", PortNumber, Next0[PortNumber])); + #endif /* DEBUG */ + + #ifndef SK_SLIM +@@ -1157,7 +1168,7 @@ + pAPort->Exact[0].a[2], + pAPort->Exact[0].a[3], + pAPort->Exact[0].a[4], +- pAPort->Exact[0].a[5])) ++ pAPort->Exact[0].a[5])); + + SK_DBG_MSG(pAC, SK_DBGMOD_ADDR, SK_DBGCAT_CTRL, + ("SkAddrGmacMcUpdate: Physical MAC Address: %02X %02X %02X %02X %02X %02X\n", +@@ -1166,7 +1177,7 @@ + pAPort->CurrentMacAddress.a[2], + pAPort->CurrentMacAddress.a[3], + pAPort->CurrentMacAddress.a[4], +- pAPort->CurrentMacAddress.a[5])) ++ pAPort->CurrentMacAddress.a[5])); + #endif /* DEBUG */ + + #ifndef SK_SLIM +@@ -1275,26 +1286,42 @@ + (void) SkAddrMcUpdate(pAC, IoC, PortNumber); + } + else if (Flags & SK_ADDR_PHYSICAL_ADDRESS) { /* Physical MAC address. */ +- if (SK_ADDR_EQUAL(pNewAddr->a, +- pAC->Addr.Net[NetNumber].CurrentMacAddress.a)) { +- return (SK_ADDR_DUPLICATE_ADDRESS); +- } +- + for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) { + if (!pAC->Addr.Port[i].CurrentMacAddressSet) { + return (SK_ADDR_TOO_EARLY); + } ++ } + ++ /* ++ * In dual net mode it should be possible to set all MAC ++ * addresses independently. Therefore the equality checks ++ * against the locical address of the same port and the ++ * physical address of the other port are suppressed here. ++ */ ++ if (pAC->Rlmt.NumNets == 1) { + if (SK_ADDR_EQUAL(pNewAddr->a, +- pAC->Addr.Port[i].CurrentMacAddress.a)) { +- if (i == PortNumber) { +- return (SK_ADDR_SUCCESS); +- } +- else { +- return (SK_ADDR_DUPLICATE_ADDRESS); ++ pAC->Addr.Net[NetNumber].CurrentMacAddress.a)) { ++ return (SK_ADDR_DUPLICATE_ADDRESS); ++ } ++ ++ for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) { ++ if (SK_ADDR_EQUAL(pNewAddr->a, ++ pAC->Addr.Port[i].CurrentMacAddress.a)) { ++ if (i == PortNumber) { ++ return (SK_ADDR_SUCCESS); ++ } ++ else { ++ return (SK_ADDR_DUPLICATE_ADDRESS); ++ } + } + } + } ++ else { ++ if (SK_ADDR_EQUAL(pNewAddr->a, ++ pAC->Addr.Port[PortNumber].CurrentMacAddress.a)) { ++ return (SK_ADDR_SUCCESS); ++ } ++ } + + pAC->Addr.Port[PortNumber].PreviousMacAddress = + pAC->Addr.Port[PortNumber].CurrentMacAddress; +@@ -1325,18 +1352,28 @@ + pAC->Addr.Net[NetNumber].CurrentMacAddress.a)) { + return (SK_ADDR_SUCCESS); + } +- ++ + for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) { + if (!pAC->Addr.Port[i].CurrentMacAddressSet) { + return (SK_ADDR_TOO_EARLY); + } ++ } + +- if (SK_ADDR_EQUAL(pNewAddr->a, +- pAC->Addr.Port[i].CurrentMacAddress.a)) { +- return (SK_ADDR_DUPLICATE_ADDRESS); ++ /* ++ * In dual net mode on Yukon-2 adapters the physical address ++ * of port 0 and the logical address of port 1 are equal - in ++ * this case the equality check of the physical address leads ++ * to an error and is suppressed here. ++ */ ++ if (pAC->Rlmt.NumNets == 1) { ++ for (i = 0; i < (SK_U32) pAC->GIni.GIMacsFound; i++) { ++ if (SK_ADDR_EQUAL(pNewAddr->a, ++ pAC->Addr.Port[i].CurrentMacAddress.a)) { ++ return (SK_ADDR_DUPLICATE_ADDRESS); ++ } + } + } +- ++ + /* + * In case that the physical and the logical MAC addresses are equal + * we must also change the physical MAC address here. +@@ -1424,7 +1461,7 @@ + SK_U32 PortNumber, /* port whose promiscuous mode changes */ + int NewPromMode) /* new promiscuous mode */ + { +- int ReturnCode = 0; ++ int ReturnCode = SK_ADDR_ILLEGAL_PORT; + #if (!defined(SK_SLIM) || defined(DEBUG)) + if (PortNumber >= (SK_U32) pAC->GIni.GIMacsFound) { + return (SK_ADDR_ILLEGAL_PORT); +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skcsum.c linux-2.6.9.new/drivers/net/sk98lin/skcsum.c +--- linux-2.6.9.old/drivers/net/sk98lin/skcsum.c 2004-10-19 05:53:07.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skcsum.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skcsum.c + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.12 $ +- * Date: $Date: 2003/08/20 13:55:53 $ ++ * Version: $Revision: 2.1 $ ++ * Date: $Date: 2003/10/27 14:16:08 $ + * Purpose: Store/verify Internet checksum in send/receive packets. + * + ******************************************************************************/ +@@ -25,7 +25,7 @@ + + #ifndef lint + static const char SysKonnectFileId[] = +- "@(#) $Id: skcsum.c,v 1.12 2003/08/20 13:55:53 mschmid Exp $ (C) SysKonnect."; ++ "@(#) $Id: skcsum.c,v 2.1 2003/10/27 14:16:08 amock Exp $ (C) SysKonnect."; + #endif /* !lint */ + + /****************************************************************************** +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skdim.c linux-2.6.9.new/drivers/net/sk98lin/skdim.c +--- linux-2.6.9.old/drivers/net/sk98lin/skdim.c 2004-10-19 05:53:45.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skdim.c 2006-12-07 14:35:03.000000000 +0800 +@@ -1,17 +1,25 @@ + /****************************************************************************** + * +- * Name: skdim.c +- * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.5 $ +- * Date: $Date: 2003/11/28 12:55:40 $ +- * Purpose: All functions to maintain interrupt moderation ++ * Name: skdim.c ++ * Project: GEnesis, PCI Gigabit Ethernet Adapter ++ * Version: $Revision: 1.5.2.2 $ ++ * Date: $Date: 2005/05/23 13:47:33 $ ++ * Purpose: All functions regardig interrupt moderation + * + ******************************************************************************/ + + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. ++ * ++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet ++ * Server Adapters. ++ * ++ * Author: Ralph Roesler (rroesler@syskonnect.de) ++ * Mirko Lindner (mlindner@syskonnect.de) ++ * ++ * Address all question to: linux@syskonnect.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -20,723 +28,367 @@ + * + * The information in this file is provided "AS IS" without warranty. + * +- ******************************************************************************/ ++ *****************************************************************************/ + +-/****************************************************************************** +- * +- * Description: +- * +- * This module is intended to manage the dynamic interrupt moderation on both +- * GEnesis and Yukon adapters. +- * +- * Include File Hierarchy: +- * +- * "skdrv1st.h" +- * "skdrv2nd.h" +- * +- ******************************************************************************/ +- +-#ifndef lint +-static const char SysKonnectFileId[] = +- "@(#) $Id: skdim.c,v 1.5 2003/11/28 12:55:40 rroesler Exp $ (C) SysKonnect."; +-#endif +- +-#define __SKADDR_C +- +-#ifdef __cplusplus +-#error C++ is not yet supported. +-extern "C" { +-#endif +- +-/******************************************************************************* +-** +-** Includes +-** +-*******************************************************************************/ +- +-#ifndef __INC_SKDRV1ST_H + #include "h/skdrv1st.h" +-#endif +- +-#ifndef __INC_SKDRV2ND_H + #include "h/skdrv2nd.h" +-#endif + +-#include +- +-/******************************************************************************* +-** +-** Defines +-** +-*******************************************************************************/ +- +-/******************************************************************************* +-** +-** Typedefs +-** +-*******************************************************************************/ ++/****************************************************************************** ++ * ++ * Local Function Prototypes ++ * ++ *****************************************************************************/ + +-/******************************************************************************* +-** +-** Local function prototypes +-** +-*******************************************************************************/ +- +-static unsigned int GetCurrentSystemLoad(SK_AC *pAC); +-static SK_U64 GetIsrCalls(SK_AC *pAC); +-static SK_BOOL IsIntModEnabled(SK_AC *pAC); +-static void SetCurrIntCtr(SK_AC *pAC); +-static void EnableIntMod(SK_AC *pAC); +-static void DisableIntMod(SK_AC *pAC); +-static void ResizeDimTimerDuration(SK_AC *pAC); +-static void DisplaySelectedModerationType(SK_AC *pAC); +-static void DisplaySelectedModerationMask(SK_AC *pAC); +-static void DisplayDescrRatio(SK_AC *pAC); ++static SK_U64 getIsrCalls(SK_AC *pAC); ++static SK_BOOL isIntModEnabled(SK_AC *pAC); ++static void setCurrIntCtr(SK_AC *pAC); ++static void enableIntMod(SK_AC *pAC); ++static void disableIntMod(SK_AC *pAC); + +-/******************************************************************************* +-** +-** Global variables +-** +-*******************************************************************************/ ++#define M_DIMINFO pAC->DynIrqModInfo + +-/******************************************************************************* +-** +-** Local variables +-** +-*******************************************************************************/ ++/****************************************************************************** ++ * ++ * Global Functions ++ * ++ *****************************************************************************/ + +-/******************************************************************************* +-** +-** Global functions +-** +-*******************************************************************************/ ++/***************************************************************************** ++ * ++ * SkDimModerate - Moderates the IRQs depending on the current needs ++ * ++ * Description: ++ * Moderation of IRQs depends on the number of occurred IRQs with ++ * respect to the previous moderation cycle. ++ * ++ * Returns: N/A ++ * ++ */ ++void SkDimModerate( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_U64 IsrCalls = getIsrCalls(pAC); ++ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==> SkDimModerate\n")); ++ ++ if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) { ++ if (isIntModEnabled(pAC)) { ++ if (IsrCalls < M_DIMINFO.MaxModIntsPerSecLowerLimit) { ++ disableIntMod(pAC); ++ } ++ } else { ++ if (IsrCalls > M_DIMINFO.MaxModIntsPerSecUpperLimit) { ++ enableIntMod(pAC); ++ } ++ } ++ } ++ setCurrIntCtr(pAC); + +-/******************************************************************************* +-** Function : SkDimModerate +-** Description : Called in every ISR to check if moderation is to be applied +-** or not for the current number of interrupts +-** Programmer : Ralph Roesler +-** Last Modified: 22-mar-03 +-** Returns : void (!) +-** Notes : - +-*******************************************************************************/ +- +-void +-SkDimModerate(SK_AC *pAC) { +- unsigned int CurrSysLoad = 0; /* expressed in percent */ +- unsigned int LoadIncrease = 0; /* expressed in percent */ +- SK_U64 ThresholdInts = 0; +- SK_U64 IsrCallsPerSec = 0; ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("<== SkDimModerate\n")); ++} + +-#define M_DIMINFO pAC->DynIrqModInfo ++/***************************************************************************** ++ * ++ * SkDimStartModerationTimer - Starts the moderation timer ++ * ++ * Description: ++ * Dynamic interrupt moderation is regularly checked using the ++ * so-called moderation timer. This timer is started with this function. ++ * ++ * Returns: N/A ++ */ ++void SkDimStartModerationTimer( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_EVPARA EventParam; /* Event struct for timer event */ ++ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("==> SkDimStartModerationTimer\n")); + +- if (!IsIntModEnabled(pAC)) { +- if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) { +- CurrSysLoad = GetCurrentSystemLoad(pAC); +- if (CurrSysLoad > 75) { +- /* +- ** More than 75% total system load! Enable the moderation +- ** to shield the system against too many interrupts. +- */ +- EnableIntMod(pAC); +- } else if (CurrSysLoad > M_DIMINFO.PrevSysLoad) { +- LoadIncrease = (CurrSysLoad - M_DIMINFO.PrevSysLoad); +- if (LoadIncrease > ((M_DIMINFO.PrevSysLoad * +- C_INT_MOD_ENABLE_PERCENTAGE) / 100)) { +- if (CurrSysLoad > 10) { +- /* +- ** More than 50% increase with respect to the +- ** previous load of the system. Most likely this +- ** is due to our ISR-proc... +- */ +- EnableIntMod(pAC); +- } +- } +- } else { +- /* +- ** Neither too much system load at all nor too much increase +- ** with respect to the previous system load. Hence, we can leave +- ** the ISR-handling like it is without enabling moderation. +- */ +- } +- M_DIMINFO.PrevSysLoad = CurrSysLoad; +- } +- } else { +- if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) { +- ThresholdInts = ((M_DIMINFO.MaxModIntsPerSec * +- C_INT_MOD_DISABLE_PERCENTAGE) / 100); +- IsrCallsPerSec = GetIsrCalls(pAC); +- if (IsrCallsPerSec <= ThresholdInts) { +- /* +- ** The number of interrupts within the last second is +- ** lower than the disable_percentage of the desried +- ** maxrate. Therefore we can disable the moderation. +- */ +- DisableIntMod(pAC); +- M_DIMINFO.MaxModIntsPerSec = +- (M_DIMINFO.MaxModIntsPerSecUpperLimit + +- M_DIMINFO.MaxModIntsPerSecLowerLimit) / 2; +- } else { +- /* +- ** The number of interrupts per sec is the same as expected. +- ** Evalulate the descriptor-ratio. If it has changed, a resize +- ** in the moderation timer might be usefull +- */ +- if (M_DIMINFO.AutoSizing) { +- ResizeDimTimerDuration(pAC); +- } +- } +- } +- } +- +- /* +- ** Some information to the log... +- */ +- if (M_DIMINFO.DisplayStats) { +- DisplaySelectedModerationType(pAC); +- DisplaySelectedModerationMask(pAC); +- DisplayDescrRatio(pAC); +- } ++ if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) { ++ SK_MEMSET((char *) &EventParam, 0, sizeof(EventParam)); ++ EventParam.Para32[0] = SK_DRV_MODERATION_TIMER; ++ SkTimerStart(pAC, pAC->IoBase, ++ &pAC->DynIrqModInfo.ModTimer, ++ pAC->DynIrqModInfo.DynIrqModSampleInterval * 1000000, ++ SKGE_DRV, SK_DRV_TIMER, EventParam); ++ } + +- M_DIMINFO.NbrProcessedDescr = 0; +- SetCurrIntCtr(pAC); ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("<== SkDimStartModerationTimer\n")); + } + +-/******************************************************************************* +-** Function : SkDimStartModerationTimer +-** Description : Starts the audit-timer for the dynamic interrupt moderation +-** Programmer : Ralph Roesler +-** Last Modified: 22-mar-03 +-** Returns : void (!) +-** Notes : - +-*******************************************************************************/ +- +-void +-SkDimStartModerationTimer(SK_AC *pAC) { +- SK_EVPARA EventParam; /* Event struct for timer event */ +- +- SK_MEMSET((char *) &EventParam, 0, sizeof(EventParam)); +- EventParam.Para32[0] = SK_DRV_MODERATION_TIMER; +- SkTimerStart(pAC, pAC->IoBase, &pAC->DynIrqModInfo.ModTimer, +- SK_DRV_MODERATION_TIMER_LENGTH, +- SKGE_DRV, SK_DRV_TIMER, EventParam); +-} ++/***************************************************************************** ++ * ++ * SkDimEnableModerationIfNeeded - Enables or disables any moderationtype ++ * ++ * Description: ++ * This function effectively initializes the IRQ moderation of a network ++ * adapter. Depending on the configuration, this might be either static ++ * or dynamic. If no moderation is configured, this function will do ++ * nothing. ++ * ++ * Returns: N/A ++ */ ++void SkDimEnableModerationIfNeeded( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("==> SkDimEnableModerationIfNeeded\n")); ++ ++ if (M_DIMINFO.IntModTypeSelect != C_INT_MOD_NONE) { ++ if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_STATIC) { ++ enableIntMod(pAC); ++ } else { /* must be C_INT_MOD_DYNAMIC */ ++ SkDimStartModerationTimer(pAC); ++ } ++ } + +-/******************************************************************************* +-** Function : SkDimEnableModerationIfNeeded +-** Description : Either enables or disables moderation +-** Programmer : Ralph Roesler +-** Last Modified: 22-mar-03 +-** Returns : void (!) +-** Notes : This function is called when a particular adapter is opened +-** There is no Disable function, because when all interrupts +-** might be disable, the moderation timer has no meaning at all +-******************************************************************************/ +- +-void +-SkDimEnableModerationIfNeeded(SK_AC *pAC) { +- +- if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_STATIC) { +- EnableIntMod(pAC); /* notification print in this function */ +- } else if (M_DIMINFO.IntModTypeSelect == C_INT_MOD_DYNAMIC) { +- SkDimStartModerationTimer(pAC); +- if (M_DIMINFO.DisplayStats) { +- printk("Dynamic moderation has been enabled\n"); +- } +- } else { +- if (M_DIMINFO.DisplayStats) { +- printk("No moderation has been enabled\n"); +- } +- } ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("<== SkDimEnableModerationIfNeeded\n")); + } + +-/******************************************************************************* +-** Function : SkDimDisplayModerationSettings +-** Description : Displays the current settings regaring interrupt moderation +-** Programmer : Ralph Roesler +-** Last Modified: 22-mar-03 +-** Returns : void (!) +-** Notes : - +-*******************************************************************************/ +- +-void +-SkDimDisplayModerationSettings(SK_AC *pAC) { +- DisplaySelectedModerationType(pAC); +- DisplaySelectedModerationMask(pAC); +-} ++/***************************************************************************** ++ * ++ * SkDimDisableModeration - disables moderation if it is enabled ++ * ++ * Description: ++ * Disabling of the moderation requires that is enabled already. ++ * ++ * Returns: N/A ++ */ ++void SkDimDisableModeration( ++SK_AC *pAC, /* pointer to adapter control context */ ++int CurrentModeration) /* type of current moderation */ ++{ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("==> SkDimDisableModeration\n")); ++ ++ if (M_DIMINFO.IntModTypeSelect != C_INT_MOD_NONE) { ++ if (CurrentModeration == C_INT_MOD_STATIC) { ++ disableIntMod(pAC); ++ } else { /* must be C_INT_MOD_DYNAMIC */ ++ SkTimerStop(pAC, pAC->IoBase, &M_DIMINFO.ModTimer); ++ disableIntMod(pAC); ++ } ++ } + +-/******************************************************************************* +-** +-** Local functions +-** +-*******************************************************************************/ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("<== SkDimDisableModeration\n")); ++} + +-/******************************************************************************* +-** Function : GetCurrentSystemLoad +-** Description : Retrieves the current system load of the system. This load +-** is evaluated for all processors within the system. +-** Programmer : Ralph Roesler +-** Last Modified: 22-mar-03 +-** Returns : unsigned int: load expressed in percentage +-** Notes : The possible range being returned is from 0 up to 100. +-** Whereas 0 means 'no load at all' and 100 'system fully loaded' +-** It is impossible to determine what actually causes the system +-** to be in 100%, but maybe that is due to too much interrupts. +-*******************************************************************************/ +- +-static unsigned int +-GetCurrentSystemLoad(SK_AC *pAC) { +- unsigned long jif = jiffies; +- unsigned int UserTime = 0; +- unsigned int SystemTime = 0; +- unsigned int NiceTime = 0; +- unsigned int IdleTime = 0; +- unsigned int TotalTime = 0; +- unsigned int UsedTime = 0; +- unsigned int SystemLoad = 0; ++/****************************************************************************** ++ * ++ * Local Functions ++ * ++ *****************************************************************************/ + +- /* unsigned int NbrCpu = 0; */ ++/***************************************************************************** ++ * ++ * getIsrCalls - evaluate the number of IRQs handled in mod interval ++ * ++ * Description: ++ * Depending on the selected moderation mask, this function will return ++ * the number of interrupts handled in the previous moderation interval. ++ * This evaluated number is based on the current number of interrupts ++ * stored in PNMI-context and the previous stored interrupts. ++ * ++ * Returns: ++ * the number of IRQs handled ++ */ ++static SK_U64 getIsrCalls( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_U64 RxPort0IntDiff = 0, RxPort1IntDiff = 0; ++ SK_U64 TxPort0IntDiff = 0, TxPort1IntDiff = 0; ++ SK_U64 StatusPort0IntDiff = 0, StatusPort1IntDiff = 0; ++ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==>getIsrCalls\n")); ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if ((M_DIMINFO.MaskIrqModeration == IRQ_MASK_TX_ONLY) || ++ (M_DIMINFO.MaskIrqModeration == IRQ_MASK_SP_TX)) { ++ if (pAC->GIni.GIMacsFound == 2) { ++ TxPort1IntDiff = ++ pAC->Pnmi.Port[1].TxIntrCts - ++ M_DIMINFO.PrevPort1TxIntrCts; ++ } ++ TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts - ++ M_DIMINFO.PrevPort0TxIntrCts; ++ } else if ((M_DIMINFO.MaskIrqModeration == IRQ_MASK_RX_ONLY) || ++ (M_DIMINFO.MaskIrqModeration == IRQ_MASK_SP_RX)) { ++ if (pAC->GIni.GIMacsFound == 2) { ++ RxPort1IntDiff = ++ pAC->Pnmi.Port[1].RxIntrCts - ++ M_DIMINFO.PrevPort1RxIntrCts; ++ } ++ RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts - ++ M_DIMINFO.PrevPort0RxIntrCts; ++ } else { ++ if (pAC->GIni.GIMacsFound == 2) { ++ RxPort1IntDiff = ++ pAC->Pnmi.Port[1].RxIntrCts - ++ M_DIMINFO.PrevPort1RxIntrCts; ++ TxPort1IntDiff = ++ pAC->Pnmi.Port[1].TxIntrCts - ++ M_DIMINFO.PrevPort1TxIntrCts; ++ } ++ RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts - ++ M_DIMINFO.PrevPort0RxIntrCts; ++ TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts - ++ M_DIMINFO.PrevPort0TxIntrCts; ++ } ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("==>getIsrCalls (!CHIP_ID_YUKON_2)\n")); ++ return (RxPort0IntDiff + RxPort1IntDiff + ++ TxPort0IntDiff + TxPort1IntDiff); ++ } + + /* +- ** The following lines have been commented out, because +- ** from kernel 2.5.44 onwards, the kernel-owned structure +- ** +- ** struct kernel_stat kstat +- ** +- ** is not marked as an exported symbol in the file ++ ** We have a Yukon2 compliant chipset if we come up to here + ** +- ** kernel/ksyms.c +- ** +- ** As a consequence, using this driver as KLM is not possible +- ** and any access of the structure kernel_stat via the +- ** dedicated macros kstat_cpu(i).cpustat.xxx is to be avoided. +- ** +- ** The kstat-information might be added again in future +- ** versions of the 2.5.xx kernel, but for the time being, +- ** number of interrupts will serve as indication how much +- ** load we currently have... +- ** +- ** for (NbrCpu = 0; NbrCpu < num_online_cpus(); NbrCpu++) { +- ** UserTime = UserTime + kstat_cpu(NbrCpu).cpustat.user; +- ** NiceTime = NiceTime + kstat_cpu(NbrCpu).cpustat.nice; +- ** SystemTime = SystemTime + kstat_cpu(NbrCpu).cpustat.system; +- ** } ++ if (pAC->GIni.GIMacsFound == 2) { ++ StatusPort1IntDiff = pAC->Pnmi.Port[1].StatusLeIntrCts - ++ M_DIMINFO.PrevPort1StatusIntrCts; ++ } ++ StatusPort0IntDiff = pAC->Pnmi.Port[0].StatusLeIntrCts - ++ M_DIMINFO.PrevPort0StatusIntrCts; + */ +- SK_U64 ThresholdInts = 0; +- SK_U64 IsrCallsPerSec = 0; +- +- ThresholdInts = ((M_DIMINFO.MaxModIntsPerSec * +- C_INT_MOD_ENABLE_PERCENTAGE) + 100); +- IsrCallsPerSec = GetIsrCalls(pAC); +- if (IsrCallsPerSec >= ThresholdInts) { +- /* +- ** We do not know how much the real CPU-load is! +- ** Return 80% as a default in order to activate DIM +- */ +- SystemLoad = 80; +- return (SystemLoad); +- } +- +- UsedTime = UserTime + NiceTime + SystemTime; +- +- IdleTime = jif * num_online_cpus() - UsedTime; +- TotalTime = UsedTime + IdleTime; +- +- SystemLoad = ( 100 * (UsedTime - M_DIMINFO.PrevUsedTime) ) / +- (TotalTime - M_DIMINFO.PrevTotalTime); ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("==>getIsrCalls (CHIP_ID_YUKON_2)\n")); ++ return (StatusPort0IntDiff + StatusPort1IntDiff); ++} + +- if (M_DIMINFO.DisplayStats) { +- printk("Current system load is: %u\n", SystemLoad); ++/***************************************************************************** ++ * ++ * setCurrIntCtr - stores the current number of interrupts ++ * ++ * Description: ++ * Stores the current number of occurred interrupts in the adapter ++ * context. This is needed to evaluate the umber of interrupts within ++ * the moderation interval. ++ * ++ * Returns: N/A ++ * ++ */ ++static void setCurrIntCtr( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==>setCurrIntCtr\n")); ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if (pAC->GIni.GIMacsFound == 2) { ++ M_DIMINFO.PrevPort1RxIntrCts = pAC->Pnmi.Port[1].RxIntrCts; ++ M_DIMINFO.PrevPort1TxIntrCts = pAC->Pnmi.Port[1].TxIntrCts; ++ } ++ M_DIMINFO.PrevPort0RxIntrCts = pAC->Pnmi.Port[0].RxIntrCts; ++ M_DIMINFO.PrevPort0TxIntrCts = pAC->Pnmi.Port[0].TxIntrCts; ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("<== setCurrIntCtr (!CHIP_ID_YUKON_2)\n")); ++ return; + } + +- M_DIMINFO.PrevTotalTime = TotalTime; +- M_DIMINFO.PrevUsedTime = UsedTime; +- +- return (SystemLoad); ++ /* ++ ** We have a Yukon2 compliant chipset if we come up to here ++ ** ++ if (pAC->GIni.GIMacsFound == 2) { ++ M_DIMINFO.PrevPort1StatusIntrCts = pAC->Pnmi.Port[1].StatusLeIntrCts; ++ } ++ M_DIMINFO.PrevPort0StatusIntrCts = pAC->Pnmi.Port[0].StatusLeIntrCts; ++ */ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("<== setCurrIntCtr (CHIP_ID_YUKON_2)\n")); + } + +-/******************************************************************************* +-** Function : GetIsrCalls +-** Description : Depending on the selected moderation mask, this function will +-** return the number of interrupts handled in the previous time- +-** frame. This evaluated number is based on the current number +-** of interrupts stored in PNMI-context and the previous stored +-** interrupts. +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : int: the number of interrupts being executed in the last +-** timeframe +-** Notes : It makes only sense to call this function, when dynamic +-** interrupt moderation is applied +-*******************************************************************************/ +- +-static SK_U64 +-GetIsrCalls(SK_AC *pAC) { +- SK_U64 RxPort0IntDiff = 0; +- SK_U64 RxPort1IntDiff = 0; +- SK_U64 TxPort0IntDiff = 0; +- SK_U64 TxPort1IntDiff = 0; +- +- if (pAC->DynIrqModInfo.MaskIrqModeration == IRQ_MASK_TX_ONLY) { +- if (pAC->GIni.GIMacsFound == 2) { +- TxPort1IntDiff = pAC->Pnmi.Port[1].TxIntrCts - +- pAC->DynIrqModInfo.PrevPort1TxIntrCts; +- } +- TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts - +- pAC->DynIrqModInfo.PrevPort0TxIntrCts; +- } else if (pAC->DynIrqModInfo.MaskIrqModeration == IRQ_MASK_RX_ONLY) { +- if (pAC->GIni.GIMacsFound == 2) { +- RxPort1IntDiff = pAC->Pnmi.Port[1].RxIntrCts - +- pAC->DynIrqModInfo.PrevPort1RxIntrCts; +- } +- RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts - +- pAC->DynIrqModInfo.PrevPort0RxIntrCts; +- } else { +- if (pAC->GIni.GIMacsFound == 2) { +- RxPort1IntDiff = pAC->Pnmi.Port[1].RxIntrCts - +- pAC->DynIrqModInfo.PrevPort1RxIntrCts; +- TxPort1IntDiff = pAC->Pnmi.Port[1].TxIntrCts - +- pAC->DynIrqModInfo.PrevPort1TxIntrCts; +- } +- RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts - +- pAC->DynIrqModInfo.PrevPort0RxIntrCts; +- TxPort0IntDiff = pAC->Pnmi.Port[0].TxIntrCts - +- pAC->DynIrqModInfo.PrevPort0TxIntrCts; +- } +- +- return (RxPort0IntDiff + RxPort1IntDiff + TxPort0IntDiff + TxPort1IntDiff); ++/***************************************************************************** ++ * ++ * isIntModEnabled - returns the current state of interrupt moderation ++ * ++ * Description: ++ * This function retrieves the current value of the interrupt moderation ++ * command register. Its content determines whether any moderation is ++ * running or not. ++ * ++ * Returns: ++ * SK_TRUE : IRQ moderation is currently active ++ * SK_FALSE: No IRQ moderation is active ++ */ ++static SK_BOOL isIntModEnabled( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ unsigned long CtrCmd; ++ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==>isIntModEnabled\n")); ++ ++ SK_IN32(pAC->IoBase, B2_IRQM_CTRL, &CtrCmd); ++ if ((CtrCmd & TIM_START) == TIM_START) { ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("<== isIntModEnabled (SK_TRUE)\n")); ++ return SK_TRUE; ++ } ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG, ++ ("<== isIntModEnabled (SK_FALSE)\n")); ++ return SK_FALSE; + } + +-/******************************************************************************* +-** Function : GetRxCalls +-** Description : This function will return the number of times a receive inter- +-** rupt was processed. This is needed to evaluate any resizing +-** factor. +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : SK_U64: the number of RX-ints being processed +-** Notes : It makes only sense to call this function, when dynamic +-** interrupt moderation is applied +-*******************************************************************************/ +- +-static SK_U64 +-GetRxCalls(SK_AC *pAC) { +- SK_U64 RxPort0IntDiff = 0; +- SK_U64 RxPort1IntDiff = 0; +- +- if (pAC->GIni.GIMacsFound == 2) { +- RxPort1IntDiff = pAC->Pnmi.Port[1].RxIntrCts - +- pAC->DynIrqModInfo.PrevPort1RxIntrCts; +- } +- RxPort0IntDiff = pAC->Pnmi.Port[0].RxIntrCts - +- pAC->DynIrqModInfo.PrevPort0RxIntrCts; +- +- return (RxPort0IntDiff + RxPort1IntDiff); +-} ++/***************************************************************************** ++ * ++ * enableIntMod - enables the interrupt moderation ++ * ++ * Description: ++ * Enabling the interrupt moderation is done by putting the desired ++ * moderation interval in the B2_IRQM_INI register, specifying the ++ * desired maks in the B2_IRQM_MSK register and finally starting the ++ * IRQ moderation timer using the B2_IRQM_CTRL register. ++ * ++ * Returns: N/A ++ * ++ */ ++static void enableIntMod( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ unsigned long ModBase; ++ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==> enableIntMod\n")); ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) { ++ ModBase = C_CLK_FREQ_GENESIS / M_DIMINFO.MaxModIntsPerSec; ++ } else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) { ++ ModBase = C_CLK_FREQ_YUKON_EC / M_DIMINFO.MaxModIntsPerSec; ++ } else { ++ ModBase = C_CLK_FREQ_YUKON / M_DIMINFO.MaxModIntsPerSec; ++ } + +-/******************************************************************************* +-** Function : SetCurrIntCtr +-** Description : Will store the current number orf occured interrupts in the +-** adapter context. This is needed to evaluated the number of +-** interrupts within a current timeframe. +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : void (!) +-** Notes : - +-*******************************************************************************/ +- +-static void +-SetCurrIntCtr(SK_AC *pAC) { +- if (pAC->GIni.GIMacsFound == 2) { +- pAC->DynIrqModInfo.PrevPort1RxIntrCts = pAC->Pnmi.Port[1].RxIntrCts; +- pAC->DynIrqModInfo.PrevPort1TxIntrCts = pAC->Pnmi.Port[1].TxIntrCts; +- } +- pAC->DynIrqModInfo.PrevPort0RxIntrCts = pAC->Pnmi.Port[0].RxIntrCts; +- pAC->DynIrqModInfo.PrevPort0TxIntrCts = pAC->Pnmi.Port[0].TxIntrCts; +-} ++ SK_OUT32(pAC->IoBase, B2_IRQM_INI, ModBase); ++ SK_OUT32(pAC->IoBase, B2_IRQM_MSK, M_DIMINFO.MaskIrqModeration); ++ SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_START); + +-/******************************************************************************* +-** Function : IsIntModEnabled() +-** Description : Retrieves the current value of the interrupts moderation +-** command register. Its content determines whether any +-** moderation is running or not. +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : SK_TRUE : if mod timer running +-** SK_FALSE : if no moderation is being performed +-** Notes : - +-*******************************************************************************/ +- +-static SK_BOOL +-IsIntModEnabled(SK_AC *pAC) { +- unsigned long CtrCmd; +- +- SK_IN32(pAC->IoBase, B2_IRQM_CTRL, &CtrCmd); +- if ((CtrCmd & TIM_START) == TIM_START) { +- return SK_TRUE; +- } else { +- return SK_FALSE; +- } ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("<== enableIntMod\n")); + } + +-/******************************************************************************* +-** Function : EnableIntMod() +-** Description : Enables the interrupt moderation using the values stored in +-** in the pAC->DynIntMod data structure +-** Programmer : Ralph Roesler +-** Last Modified: 22-mar-03 +-** Returns : - +-** Notes : - +-*******************************************************************************/ +- +-static void +-EnableIntMod(SK_AC *pAC) { +- unsigned long ModBase; +- +- if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) { +- ModBase = C_CLK_FREQ_GENESIS / pAC->DynIrqModInfo.MaxModIntsPerSec; +- } else { +- ModBase = C_CLK_FREQ_YUKON / pAC->DynIrqModInfo.MaxModIntsPerSec; +- } +- +- SK_OUT32(pAC->IoBase, B2_IRQM_INI, ModBase); +- SK_OUT32(pAC->IoBase, B2_IRQM_MSK, pAC->DynIrqModInfo.MaskIrqModeration); +- SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_START); +- if (M_DIMINFO.DisplayStats) { +- printk("Enabled interrupt moderation (%i ints/sec)\n", +- M_DIMINFO.MaxModIntsPerSec); +- } +-} ++/***************************************************************************** ++ * ++ * disableIntMod - disables the interrupt moderation ++ * ++ * Description: ++ * Disabling the interrupt moderation is done by stopping the ++ * IRQ moderation timer using the B2_IRQM_CTRL register. ++ * ++ * Returns: N/A ++ * ++ */ ++static void disableIntMod( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("==> disableIntMod\n")); + +-/******************************************************************************* +-** Function : DisableIntMod() +-** Description : Disbles the interrupt moderation independent of what inter- +-** rupts are running or not +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : - +-** Notes : - +-*******************************************************************************/ +- +-static void +-DisableIntMod(SK_AC *pAC) { +- +- SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_STOP); +- if (M_DIMINFO.DisplayStats) { +- printk("Disabled interrupt moderation\n"); +- } +-} ++ SK_OUT32(pAC->IoBase, B2_IRQM_CTRL, TIM_STOP); + +-/******************************************************************************* +-** Function : ResizeDimTimerDuration(); +-** Description : Checks the current used descriptor ratio and resizes the +-** duration timer (longer/smaller) if possible. +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : - +-** Notes : There are both maximum and minimum timer duration value. +-** This function assumes that interrupt moderation is already +-** enabled! +-*******************************************************************************/ +- +-static void +-ResizeDimTimerDuration(SK_AC *pAC) { +- SK_BOOL IncreaseTimerDuration; +- int TotalMaxNbrDescr; +- int UsedDescrRatio; +- int RatioDiffAbs; +- int RatioDiffRel; +- int NewMaxModIntsPerSec; +- int ModAdjValue; +- long ModBase; +- +- /* +- ** Check first if we are allowed to perform any modification +- */ +- if (IsIntModEnabled(pAC)) { +- if (M_DIMINFO.IntModTypeSelect != C_INT_MOD_DYNAMIC) { +- return; +- } else { +- if (M_DIMINFO.ModJustEnabled) { +- M_DIMINFO.ModJustEnabled = SK_FALSE; +- return; +- } +- } +- } +- +- /* +- ** If we got until here, we have to evaluate the amount of the +- ** descriptor ratio change... +- */ +- TotalMaxNbrDescr = pAC->RxDescrPerRing * GetRxCalls(pAC); +- UsedDescrRatio = (M_DIMINFO.NbrProcessedDescr * 100) / TotalMaxNbrDescr; +- +- if (UsedDescrRatio > M_DIMINFO.PrevUsedDescrRatio) { +- RatioDiffAbs = (UsedDescrRatio - M_DIMINFO.PrevUsedDescrRatio); +- RatioDiffRel = (RatioDiffAbs * 100) / UsedDescrRatio; +- M_DIMINFO.PrevUsedDescrRatio = UsedDescrRatio; +- IncreaseTimerDuration = SK_FALSE; /* in other words: DECREASE */ +- } else if (UsedDescrRatio < M_DIMINFO.PrevUsedDescrRatio) { +- RatioDiffAbs = (M_DIMINFO.PrevUsedDescrRatio - UsedDescrRatio); +- RatioDiffRel = (RatioDiffAbs * 100) / M_DIMINFO.PrevUsedDescrRatio; +- M_DIMINFO.PrevUsedDescrRatio = UsedDescrRatio; +- IncreaseTimerDuration = SK_TRUE; /* in other words: INCREASE */ +- } else { +- RatioDiffAbs = (M_DIMINFO.PrevUsedDescrRatio - UsedDescrRatio); +- RatioDiffRel = (RatioDiffAbs * 100) / M_DIMINFO.PrevUsedDescrRatio; +- M_DIMINFO.PrevUsedDescrRatio = UsedDescrRatio; +- IncreaseTimerDuration = SK_TRUE; /* in other words: INCREASE */ +- } +- +- /* +- ** Now we can determine the change in percent +- */ +- if ((RatioDiffRel >= 0) && (RatioDiffRel <= 5) ) { +- ModAdjValue = 1; /* 1% change - maybe some other value in future */ +- } else if ((RatioDiffRel > 5) && (RatioDiffRel <= 10) ) { +- ModAdjValue = 1; /* 1% change - maybe some other value in future */ +- } else if ((RatioDiffRel > 10) && (RatioDiffRel <= 15) ) { +- ModAdjValue = 1; /* 1% change - maybe some other value in future */ +- } else { +- ModAdjValue = 1; /* 1% change - maybe some other value in future */ +- } +- +- if (IncreaseTimerDuration) { +- NewMaxModIntsPerSec = M_DIMINFO.MaxModIntsPerSec + +- (M_DIMINFO.MaxModIntsPerSec * ModAdjValue) / 100; +- } else { +- NewMaxModIntsPerSec = M_DIMINFO.MaxModIntsPerSec - +- (M_DIMINFO.MaxModIntsPerSec * ModAdjValue) / 100; +- } +- +- /* +- ** Check if we exceed boundaries... +- */ +- if ( (NewMaxModIntsPerSec > M_DIMINFO.MaxModIntsPerSecUpperLimit) || +- (NewMaxModIntsPerSec < M_DIMINFO.MaxModIntsPerSecLowerLimit)) { +- if (M_DIMINFO.DisplayStats) { +- printk("Cannot change ModTim from %i to %i ints/sec\n", +- M_DIMINFO.MaxModIntsPerSec, NewMaxModIntsPerSec); +- } +- return; +- } else { +- if (M_DIMINFO.DisplayStats) { +- printk("Resized ModTim from %i to %i ints/sec\n", +- M_DIMINFO.MaxModIntsPerSec, NewMaxModIntsPerSec); +- } +- } +- +- M_DIMINFO.MaxModIntsPerSec = NewMaxModIntsPerSec; +- +- if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) { +- ModBase = C_CLK_FREQ_GENESIS / pAC->DynIrqModInfo.MaxModIntsPerSec; +- } else { +- ModBase = C_CLK_FREQ_YUKON / pAC->DynIrqModInfo.MaxModIntsPerSec; +- } +- +- /* +- ** We do not need to touch any other registers +- */ +- SK_OUT32(pAC->IoBase, B2_IRQM_INI, ModBase); ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_MSG,("<== disableIntMod\n")); + } + + /******************************************************************************* +-** Function : DisplaySelectedModerationType() +-** Description : Displays what type of moderation we have +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : void! +-** Notes : - +-*******************************************************************************/ +- +-static void +-DisplaySelectedModerationType(SK_AC *pAC) { +- +- if (pAC->DynIrqModInfo.DisplayStats) { +- if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC) { +- printk("Static int moderation runs with %i INTS/sec\n", +- pAC->DynIrqModInfo.MaxModIntsPerSec); +- } else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC) { +- if (IsIntModEnabled(pAC)) { +- printk("Dynamic int moderation runs with %i INTS/sec\n", +- pAC->DynIrqModInfo.MaxModIntsPerSec); +- } else { +- printk("Dynamic int moderation currently not applied\n"); +- } +- } else { +- printk("No interrupt moderation selected!\n"); +- } +- } +-} +- +-/******************************************************************************* +-** Function : DisplaySelectedModerationMask() +-** Description : Displays what interrupts are moderated +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : void! +-** Notes : - +-*******************************************************************************/ +- +-static void +-DisplaySelectedModerationMask(SK_AC *pAC) { +- +- if (pAC->DynIrqModInfo.DisplayStats) { +- if (pAC->DynIrqModInfo.IntModTypeSelect != C_INT_MOD_NONE) { +- switch (pAC->DynIrqModInfo.MaskIrqModeration) { +- case IRQ_MASK_TX_ONLY: +- printk("Only Tx-interrupts are moderated\n"); +- break; +- case IRQ_MASK_RX_ONLY: +- printk("Only Rx-interrupts are moderated\n"); +- break; +- case IRQ_MASK_SP_ONLY: +- printk("Only special-interrupts are moderated\n"); +- break; +- case IRQ_MASK_TX_RX: +- printk("Tx- and Rx-interrupts are moderated\n"); +- break; +- case IRQ_MASK_SP_RX: +- printk("Special- and Rx-interrupts are moderated\n"); +- break; +- case IRQ_MASK_SP_TX: +- printk("Special- and Tx-interrupts are moderated\n"); +- break; +- case IRQ_MASK_RX_TX_SP: +- printk("All Rx-, Tx and special-interrupts are moderated\n"); +- break; +- default: +- printk("Don't know what is moderated\n"); +- break; +- } +- } else { +- printk("No specific interrupts masked for moderation\n"); +- } +- } +-} +- +-/******************************************************************************* +-** Function : DisplayDescrRatio +-** Description : Like the name states... +-** Programmer : Ralph Roesler +-** Last Modified: 23-mar-03 +-** Returns : void! +-** Notes : - +-*******************************************************************************/ +- +-static void +-DisplayDescrRatio(SK_AC *pAC) { +- int TotalMaxNbrDescr = 0; +- +- if (pAC->DynIrqModInfo.DisplayStats) { +- TotalMaxNbrDescr = pAC->RxDescrPerRing * GetRxCalls(pAC); +- printk("Ratio descriptors: %i/%i\n", +- M_DIMINFO.NbrProcessedDescr, TotalMaxNbrDescr); +- } +-} +- +-/******************************************************************************* +-** +-** End of file +-** +-*******************************************************************************/ ++ * ++ * End of file ++ * ++ ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skethtool.c linux-2.6.9.new/drivers/net/sk98lin/skethtool.c +--- linux-2.6.9.old/drivers/net/sk98lin/skethtool.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skethtool.c 2006-12-07 14:35:03.000000000 +0800 +@@ -0,0 +1,1333 @@ ++/****************************************************************************** ++ * ++ * Name: skethtool.c ++ * Project: GEnesis, PCI Gigabit Ethernet Adapter ++ * Version: $Revision: 1.3.2.9 $ ++ * Date: $Date: 2005/05/23 13:47:33 $ ++ * Purpose: All functions regarding ethtool handling ++ * ++ ******************************************************************************/ ++ ++/****************************************************************************** ++ * ++ * (C)Copyright 1998-2002 SysKonnect GmbH. ++ * (C)Copyright 2002-2005 Marvell. ++ * ++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet ++ * Server Adapters. ++ * ++ * Author: Ralph Roesler (rroesler@syskonnect.de) ++ * Mirko Lindner (mlindner@syskonnect.de) ++ * ++ * Address all question to: linux@syskonnect.de ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * The information in this file is provided "AS IS" without warranty. ++ * ++ *****************************************************************************/ ++ ++#include "h/skdrv1st.h" ++#include "h/skdrv2nd.h" ++#include "h/skversion.h" ++#include ++#include ++#include ++ ++/****************************************************************************** ++ * ++ * External Functions and Data ++ * ++ *****************************************************************************/ ++ ++extern void SkDimDisableModeration(SK_AC *pAC, int CurrentModeration); ++extern void SkDimEnableModerationIfNeeded(SK_AC *pAC); ++ ++/****************************************************************************** ++ * ++ * Defines ++ * ++ *****************************************************************************/ ++ ++#ifndef ETHT_STATSTRING_LEN ++#define ETHT_STATSTRING_LEN 32 ++#endif ++ ++#define SK98LIN_STAT(m) sizeof(((SK_AC *)0)->m),offsetof(SK_AC, m) ++ ++#define SUPP_COPPER_ALL (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \ ++ SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \ ++ SUPPORTED_1000baseT_Half| SUPPORTED_1000baseT_Full| \ ++ SUPPORTED_TP) ++ ++#define ADV_COPPER_ALL (ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | \ ++ ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | \ ++ ADVERTISED_1000baseT_Half| ADVERTISED_1000baseT_Full| \ ++ ADVERTISED_TP) ++ ++#define SUPP_FIBRE_ALL (SUPPORTED_1000baseT_Full | \ ++ SUPPORTED_FIBRE | \ ++ SUPPORTED_Autoneg) ++ ++#define ADV_FIBRE_ALL (ADVERTISED_1000baseT_Full | \ ++ ADVERTISED_FIBRE | \ ++ ADVERTISED_Autoneg) ++ ++/****************************************************************************** ++ * ++ * Local Function Prototypes ++ * ++ *****************************************************************************/ ++ ++#ifdef ETHTOOL_GSET ++static void getSettings(SK_AC *pAC, int port, struct ethtool_cmd *ecmd); ++#endif ++#ifdef ETHTOOL_SSET ++static int setSettings(SK_AC *pAC, int port, struct ethtool_cmd *ecmd); ++#endif ++#ifdef ETHTOOL_GPAUSEPARAM ++static void getPauseParams(SK_AC *pAC, int port, struct ethtool_pauseparam *epause); ++#endif ++#ifdef ETHTOOL_SPAUSEPARAM ++static int setPauseParams(SK_AC *pAC, int port, struct ethtool_pauseparam *epause); ++#endif ++#ifdef ETHTOOL_GDRVINFO ++static void getDriverInfo(SK_AC *pAC, int port, struct ethtool_drvinfo *edrvinfo); ++#endif ++#ifdef ETHTOOL_PHYS_ID ++static int startLocateNIC(SK_AC *pAC, int port, struct ethtool_value *blinkSecs); ++static void toggleLeds(unsigned long ptr); ++#endif ++#ifdef ETHTOOL_GCOALESCE ++static void getModerationParams(SK_AC *pAC, int port, struct ethtool_coalesce *ecoalesc); ++#endif ++#ifdef ETHTOOL_SCOALESCE ++static int setModerationParams(SK_AC *pAC, int port, struct ethtool_coalesce *ecoalesc); ++#endif ++#ifdef ETHTOOL_GWOL ++static void getWOLsettings(SK_AC *pAC, int port, struct ethtool_wolinfo *ewol); ++#endif ++#ifdef ETHTOOL_SWOL ++static int setWOLsettings(SK_AC *pAC, int port, struct ethtool_wolinfo *ewol); ++#endif ++ ++static int getPortNumber(struct net_device *netdev, struct ifreq *ifr); ++ ++/****************************************************************************** ++ * ++ * Local Variables ++ * ++ *****************************************************************************/ ++ ++struct sk98lin_stats { ++ char stat_string[ETHT_STATSTRING_LEN]; ++ int sizeof_stat; ++ int stat_offset; ++}; ++ ++static struct sk98lin_stats sk98lin_etht_stats_port0[] = { ++ { "rx_packets" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxOkCts) }, ++ { "tx_packets" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxOkCts) }, ++ { "rx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxOctetsOkCts) }, ++ { "tx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxOctetsOkCts) }, ++ { "rx_errors" , SK98LIN_STAT(PnmiStruct.InErrorsCts) }, ++ { "tx_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxSingleCollisionCts) }, ++ { "rx_dropped" , SK98LIN_STAT(PnmiStruct.RxNoBufCts) }, ++ { "tx_dropped" , SK98LIN_STAT(PnmiStruct.TxNoBufCts) }, ++ { "multicasts" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxMulticastOkCts) }, ++ { "collisions" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxSingleCollisionCts) }, ++ { "rx_length_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxRuntCts) }, ++ { "rx_buffer_overflow_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxFifoOverflowCts) }, ++ { "rx_crc_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxFcsCts) }, ++ { "rx_frame_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxFramingCts) }, ++ { "rx_too_short_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxShortsCts) }, ++ { "rx_too_long_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxTooLongCts) }, ++ { "rx_carrier_extension_errors", SK98LIN_STAT(PnmiStruct.Stat[0].StatRxCextCts) }, ++ { "rx_symbol_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxSymbolCts) }, ++ { "rx_llc_mac_size_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxIRLengthCts) }, ++ { "rx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxCarrierCts) }, ++ { "rx_jabber_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxJabberCts) }, ++ { "rx_missed_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatRxMissedCts) }, ++ { "tx_abort_collision_errors" , SK98LIN_STAT(stats.tx_aborted_errors) }, ++ { "tx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxCarrierCts) }, ++ { "tx_buffer_underrun_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxFifoUnderrunCts) }, ++ { "tx_heartbeat_errors" , SK98LIN_STAT(PnmiStruct.Stat[0].StatTxCarrierCts) } , ++ { "tx_window_errors" , SK98LIN_STAT(stats.tx_window_errors) } ++}; ++ ++static struct sk98lin_stats sk98lin_etht_stats_port1[] = { ++ { "rx_packets" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxOkCts) }, ++ { "tx_packets" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxOkCts) }, ++ { "rx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxOctetsOkCts) }, ++ { "tx_bytes" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxOctetsOkCts) }, ++ { "rx_errors" , SK98LIN_STAT(PnmiStruct.InErrorsCts) }, ++ { "tx_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxSingleCollisionCts) }, ++ { "rx_dropped" , SK98LIN_STAT(PnmiStruct.RxNoBufCts) }, ++ { "tx_dropped" , SK98LIN_STAT(PnmiStruct.TxNoBufCts) }, ++ { "multicasts" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxMulticastOkCts) }, ++ { "collisions" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxSingleCollisionCts) }, ++ { "rx_length_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxRuntCts) }, ++ { "rx_buffer_overflow_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxFifoOverflowCts) }, ++ { "rx_crc_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxFcsCts) }, ++ { "rx_frame_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxFramingCts) }, ++ { "rx_too_short_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxShortsCts) }, ++ { "rx_too_long_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxTooLongCts) }, ++ { "rx_carrier_extension_errors", SK98LIN_STAT(PnmiStruct.Stat[1].StatRxCextCts) }, ++ { "rx_symbol_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxSymbolCts) }, ++ { "rx_llc_mac_size_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxIRLengthCts) }, ++ { "rx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxCarrierCts) }, ++ { "rx_jabber_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxJabberCts) }, ++ { "rx_missed_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatRxMissedCts) }, ++ { "tx_abort_collision_errors" , SK98LIN_STAT(stats.tx_aborted_errors) }, ++ { "tx_carrier_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxCarrierCts) }, ++ { "tx_buffer_underrun_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxFifoUnderrunCts) }, ++ { "tx_heartbeat_errors" , SK98LIN_STAT(PnmiStruct.Stat[1].StatTxCarrierCts) } , ++ { "tx_window_errors" , SK98LIN_STAT(stats.tx_window_errors) } ++}; ++ ++#define SK98LIN_STATS_LEN sizeof(sk98lin_etht_stats_port0) / sizeof(struct sk98lin_stats) ++ ++static int nbrBlinkQuarterSeconds; ++static int currentPortIndex; ++static SK_BOOL isLocateNICrunning = SK_FALSE; ++static SK_BOOL isDualNetCard = SK_FALSE; ++static SK_BOOL doSwitchLEDsOn = SK_FALSE; ++static SK_BOOL boardWasDown[2] = { SK_FALSE, SK_FALSE }; ++static struct timer_list locateNICtimer; ++ ++/****************************************************************************** ++ * ++ * Global Functions ++ * ++ *****************************************************************************/ ++ ++/***************************************************************************** ++ * ++ * SkEthIoctl - IOCTL entry point for all ethtool queries ++ * ++ * Description: ++ * Any IOCTL request that has to deal with the ethtool command tool is ++ * dispatched via this function. ++ * ++ * Returns: ++ * ==0: everything fine, no error ++ * !=0: the return value is the error code of the failure ++ */ ++int SkEthIoctl( ++struct net_device *netdev, /* the pointer to netdev structure */ ++struct ifreq *ifr) /* what interface the request refers to? */ ++{ ++ DEV_NET *pNet = (DEV_NET*) netdev->priv; ++ SK_AC *pAC = pNet->pAC; ++ void *pAddr = ifr->ifr_data; ++ int port = getPortNumber(netdev, ifr); ++ SK_PNMI_STRUCT_DATA *pPnmiStruct = &pAC->PnmiStruct; ++ SK_U32 Size = sizeof(SK_PNMI_STRUCT_DATA); ++ SK_U32 cmd; ++ struct sk98lin_stats *sk98lin_etht_stats = ++ (port == 0) ? sk98lin_etht_stats_port0 : sk98lin_etht_stats_port1; ++ ++ if (get_user(cmd, (uint32_t *) pAddr)) { ++ return -EFAULT; ++ } ++ ++ switch(cmd) { ++#ifdef ETHTOOL_GSET ++ case ETHTOOL_GSET: { ++ struct ethtool_cmd ecmd = { ETHTOOL_GSET }; ++ getSettings(pAC, port, &ecmd); ++ if(copy_to_user(pAddr, &ecmd, sizeof(ecmd))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++ break; ++#endif ++#ifdef ETHTOOL_SSET ++ case ETHTOOL_SSET: { ++ struct ethtool_cmd ecmd; ++ if(copy_from_user(&ecmd, pAddr, sizeof(ecmd))) { ++ return -EFAULT; ++ } ++ return setSettings(pAC, port, &ecmd); ++ } ++ break; ++#endif ++#ifdef ETHTOOL_GDRVINFO ++ case ETHTOOL_GDRVINFO: { ++ struct ethtool_drvinfo drvinfo = { ETHTOOL_GDRVINFO }; ++ getDriverInfo(pAC, port, &drvinfo); ++ if(copy_to_user(pAddr, &drvinfo, sizeof(drvinfo))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++ break; ++#endif ++#ifdef ETHTOOL_GSTRINGS ++ case ETHTOOL_GSTRINGS: { ++ struct ethtool_gstrings gstrings = { ETHTOOL_GSTRINGS }; ++ char *strings = NULL; ++ int err = 0; ++ if(copy_from_user(&gstrings, pAddr, sizeof(gstrings))) { ++ return -EFAULT; ++ } ++ switch(gstrings.string_set) { ++#ifdef ETHTOOL_GSTATS ++ case ETH_SS_STATS: { ++ int i; ++ gstrings.len = SK98LIN_STATS_LEN; ++ if ((strings = kmalloc(SK98LIN_STATS_LEN*ETHT_STATSTRING_LEN,GFP_KERNEL)) == NULL) { ++ return -ENOMEM; ++ } ++ for(i=0; i < SK98LIN_STATS_LEN; i++) { ++ memcpy(&strings[i * ETHT_STATSTRING_LEN], ++ &(sk98lin_etht_stats[i].stat_string), ++ ETHT_STATSTRING_LEN); ++ } ++ } ++ break; ++#endif ++ default: ++ return -EOPNOTSUPP; ++ } ++ if(copy_to_user(pAddr, &gstrings, sizeof(gstrings))) { ++ err = -EFAULT; ++ } ++ pAddr = (void *) ((unsigned long int) pAddr + offsetof(struct ethtool_gstrings, data)); ++ if(!err && copy_to_user(pAddr, strings, gstrings.len * ETH_GSTRING_LEN)) { ++ err = -EFAULT; ++ } ++ kfree(strings); ++ return err; ++ } ++#endif ++#ifdef ETHTOOL_GSTATS ++ case ETHTOOL_GSTATS: { ++ struct { ++ struct ethtool_stats eth_stats; ++ uint64_t data[SK98LIN_STATS_LEN]; ++ } stats = { {ETHTOOL_GSTATS, SK98LIN_STATS_LEN} }; ++ int i; ++ ++ if (netif_running(pAC->dev[port])) { ++ SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, port); ++ } ++ for(i = 0; i < SK98LIN_STATS_LEN; i++) { ++ if (netif_running(pAC->dev[port])) { ++ stats.data[i] = (sk98lin_etht_stats[i].sizeof_stat == ++ sizeof(uint64_t)) ? ++ *(uint64_t *)((char *)pAC + ++ sk98lin_etht_stats[i].stat_offset) : ++ *(uint32_t *)((char *)pAC + ++ sk98lin_etht_stats[i].stat_offset); ++ } else { ++ stats.data[i] = (sk98lin_etht_stats[i].sizeof_stat == ++ sizeof(uint64_t)) ? (uint64_t) 0 : (uint32_t) 0; ++ } ++ } ++ if(copy_to_user(pAddr, &stats, sizeof(stats))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_PHYS_ID ++ case ETHTOOL_PHYS_ID: { ++ struct ethtool_value blinkSecs; ++ if(copy_from_user(&blinkSecs, pAddr, sizeof(blinkSecs))) { ++ return -EFAULT; ++ } ++ return startLocateNIC(pAC, port, &blinkSecs); ++ } ++#endif ++#ifdef ETHTOOL_GPAUSEPARAM ++ case ETHTOOL_GPAUSEPARAM: { ++ struct ethtool_pauseparam epause = { ETHTOOL_GPAUSEPARAM }; ++ getPauseParams(pAC, port, &epause); ++ if(copy_to_user(pAddr, &epause, sizeof(epause))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_SPAUSEPARAM ++ case ETHTOOL_SPAUSEPARAM: { ++ struct ethtool_pauseparam epause; ++ if(copy_from_user(&epause, pAddr, sizeof(epause))) { ++ return -EFAULT; ++ } ++ return setPauseParams(pAC, port, &epause); ++ } ++#endif ++#ifdef ETHTOOL_GSG ++ case ETHTOOL_GSG: { ++ struct ethtool_value edata = { ETHTOOL_GSG }; ++ edata.data = (netdev->features & NETIF_F_SG) != 0; ++ if (copy_to_user(pAddr, &edata, sizeof(edata))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_SSG ++ case ETHTOOL_SSG: { ++ struct ethtool_value edata; ++ if (copy_from_user(&edata, pAddr, sizeof(edata))) { ++ return -EFAULT; ++ } ++ if (pAC->ChipsetType) { /* Don't handle if Genesis */ ++ if (edata.data) { ++ netdev->features |= NETIF_F_SG; ++ } else { ++ netdev->features &= ~NETIF_F_SG; ++ } ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_GRXCSUM ++ case ETHTOOL_GRXCSUM: { ++ struct ethtool_value edata = { ETHTOOL_GRXCSUM }; ++ edata.data = pAC->RxPort[port].UseRxCsum; ++ if (copy_to_user(pAddr, &edata, sizeof(edata))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_SRXCSUM ++ case ETHTOOL_SRXCSUM: { ++ struct ethtool_value edata; ++ if (copy_from_user(&edata, pAddr, sizeof(edata))) { ++ return -EFAULT; ++ } ++ pAC->RxPort[port].UseRxCsum = edata.data; ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_GTXCSUM ++ case ETHTOOL_GTXCSUM: { ++ struct ethtool_value edata = { ETHTOOL_GTXCSUM }; ++ edata.data = ((netdev->features & NETIF_F_IP_CSUM) != 0); ++ if (copy_to_user(pAddr, &edata, sizeof(edata))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_STXCSUM ++ case ETHTOOL_STXCSUM: { ++ struct ethtool_value edata; ++ if (copy_from_user(&edata, pAddr, sizeof(edata))) { ++ return -EFAULT; ++ } ++ if (pAC->ChipsetType) { /* Don't handle if Genesis */ ++ if (edata.data) { ++ netdev->features |= NETIF_F_IP_CSUM; ++ } else { ++ netdev->features &= ~NETIF_F_IP_CSUM; ++ } ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_NWAY_RST ++ case ETHTOOL_NWAY_RST: { ++ if(netif_running(netdev)) { ++ (*netdev->stop)(netdev); ++ (*netdev->open)(netdev); ++ } ++ return 0; ++ } ++#endif ++#ifdef NETIF_F_TSO ++#ifdef ETHTOOL_GTSO ++ case ETHTOOL_GTSO: { ++ struct ethtool_value edata = { ETHTOOL_GTSO }; ++ edata.data = (netdev->features & NETIF_F_TSO) != 0; ++ if (copy_to_user(pAddr, &edata, sizeof(edata))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_STSO ++ case ETHTOOL_STSO: { ++ struct ethtool_value edata; ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (copy_from_user(&edata, pAddr, sizeof(edata))) { ++ return -EFAULT; ++ } ++ if (edata.data) { ++ netdev->features |= NETIF_F_TSO; ++ } else { ++ netdev->features &= ~NETIF_F_TSO; ++ } ++ return 0; ++ } ++ return -EOPNOTSUPP; ++ } ++#endif ++#endif ++#ifdef ETHTOOL_GCOALESCE ++ case ETHTOOL_GCOALESCE: { ++ struct ethtool_coalesce ecoalesc = { ETHTOOL_GCOALESCE }; ++ getModerationParams(pAC, port, &ecoalesc); ++ if(copy_to_user(pAddr, &ecoalesc, sizeof(ecoalesc))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_SCOALESCE ++ case ETHTOOL_SCOALESCE: { ++ struct ethtool_coalesce ecoalesc; ++ if(copy_from_user(&ecoalesc, pAddr, sizeof(ecoalesc))) { ++ return -EFAULT; ++ } ++ return setModerationParams(pAC, port, &ecoalesc); ++ } ++#endif ++#ifdef ETHTOOL_GWOL ++ case ETHTOOL_GWOL: { ++ struct ethtool_wolinfo ewol = { ETHTOOL_GWOL }; ++ getWOLsettings(pAC, port, &ewol); ++ if(copy_to_user(pAddr, &ewol, sizeof(ewol))) { ++ return -EFAULT; ++ } ++ return 0; ++ } ++#endif ++#ifdef ETHTOOL_SWOL ++ case ETHTOOL_SWOL: { ++ struct ethtool_wolinfo ewol; ++ if(copy_from_user(&ewol, pAddr, sizeof(ewol))) { ++ return -EFAULT; ++ } ++ return setWOLsettings(pAC, port, &ewol); ++ } ++#endif ++ default: ++ return -EOPNOTSUPP; ++ } ++} /* SkEthIoctl() */ ++ ++/****************************************************************************** ++ * ++ * Local Functions ++ * ++ *****************************************************************************/ ++ ++#ifdef ETHTOOL_GSET ++/***************************************************************************** ++ * ++ * getSettings - retrieves the current settings of the selected adapter ++ * ++ * Description: ++ * The current configuration of the selected adapter is returned. ++ * This configuration involves a)speed, b)duplex and c)autoneg plus ++ * a number of other variables. ++ * ++ * Returns: N/A ++ * ++ */ ++static void getSettings( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_cmd *ecmd) /* mandatory command structure for results */ ++{ ++ SK_GEPORT *pPort = &pAC->GIni.GP[port]; ++ ++ static int DuplexAutoNegConfMap[9][3]= { ++ { -1 , -1 , -1 }, ++ { 0 , -1 , -1 }, ++ { SK_LMODE_HALF , DUPLEX_HALF, AUTONEG_DISABLE }, ++ { SK_LMODE_FULL , DUPLEX_FULL, AUTONEG_DISABLE }, ++ { SK_LMODE_AUTOHALF , DUPLEX_HALF, AUTONEG_ENABLE }, ++ { SK_LMODE_AUTOFULL , DUPLEX_FULL, AUTONEG_ENABLE }, ++ { SK_LMODE_AUTOBOTH , DUPLEX_FULL, AUTONEG_ENABLE }, ++ { SK_LMODE_AUTOSENSE , -1 , -1 }, ++ { SK_LMODE_INDETERMINATED, -1 , -1 } ++ }; ++ ++ static int SpeedConfMap[6][2] = { ++ { 0 , -1 }, ++ { SK_LSPEED_AUTO , -1 }, ++ { SK_LSPEED_10MBPS , SPEED_10 }, ++ { SK_LSPEED_100MBPS , SPEED_100 }, ++ { SK_LSPEED_1000MBPS , SPEED_1000 }, ++ { SK_LSPEED_INDETERMINATED, -1 } ++ }; ++ ++ static int AdvSpeedMap[6][2] = { ++ { 0 , -1 }, ++ { SK_LSPEED_AUTO , -1 }, ++ { SK_LSPEED_10MBPS , ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full }, ++ { SK_LSPEED_100MBPS , ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full }, ++ { SK_LSPEED_1000MBPS , ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full}, ++ { SK_LSPEED_INDETERMINATED, -1 } ++ }; ++ ++ ecmd->phy_address = port; ++ ecmd->speed = SpeedConfMap[pPort->PLinkSpeedUsed][1]; ++ ecmd->duplex = DuplexAutoNegConfMap[pPort->PLinkModeStatus][1]; ++ ecmd->autoneg = DuplexAutoNegConfMap[pPort->PLinkModeStatus][2]; ++ ecmd->transceiver = XCVR_INTERNAL; ++ ++ if (pAC->GIni.GICopperType) { ++ ecmd->port = PORT_TP; ++ ecmd->supported = (SUPP_COPPER_ALL|SUPPORTED_Autoneg); ++ if (pAC->GIni.GIGenesis) { ++ ecmd->supported &= ~(SUPPORTED_10baseT_Half); ++ ecmd->supported &= ~(SUPPORTED_10baseT_Full); ++ ecmd->supported &= ~(SUPPORTED_100baseT_Half); ++ ecmd->supported &= ~(SUPPORTED_100baseT_Full); ++ } else { ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) { ++ ecmd->supported &= ~(SUPPORTED_1000baseT_Half); ++ } ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ ecmd->supported &= ~(SUPPORTED_1000baseT_Half); ++ ecmd->supported &= ~(SUPPORTED_1000baseT_Full); ++ } ++ } ++ if (pAC->GIni.GP[0].PLinkSpeed != SK_LSPEED_AUTO) { ++ ecmd->advertising = AdvSpeedMap[pPort->PLinkSpeed][1]; ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) { ++ ecmd->advertising &= ~(SUPPORTED_1000baseT_Half); ++ } ++ } else { ++ ecmd->advertising = ecmd->supported; ++ } ++ if (ecmd->autoneg == AUTONEG_ENABLE) { ++ ecmd->advertising |= ADVERTISED_Autoneg; ++ } ++ } else { ++ ecmd->port = PORT_FIBRE; ++ ecmd->supported = (SUPP_FIBRE_ALL); ++ ecmd->advertising = (ADV_FIBRE_ALL); ++ } ++} ++#endif ++ ++#ifdef ETHTOOL_SSET ++/***************************************************************************** ++ * ++ * setSettings - configures the settings of a selected adapter ++ * ++ * Description: ++ * Possible settings that may be altered are a)speed, b)duplex or ++ * c)autonegotiation. ++ * ++ * Returns: ++ * ==0: everything fine, no error ++ * !=0: the return value is the error code of the failure ++ */ ++static int setSettings( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_cmd *ecmd) /* command structure containing settings */ ++{ ++ DEV_NET *pNet = (DEV_NET *) pAC->dev[port]->priv; ++ SK_U32 Instance; ++ char Buf[4]; ++ unsigned int Len = 1; ++ int Ret; ++ ++ if (port == 0) { ++ Instance = (pAC->RlmtNets == 2) ? 1 : 2; ++ } else { ++ Instance = (pAC->RlmtNets == 2) ? 2 : 3; ++ } ++ ++ if (((ecmd->autoneg == AUTONEG_DISABLE) || (ecmd->autoneg == AUTONEG_ENABLE)) && ++ ((ecmd->duplex == DUPLEX_FULL) || (ecmd->duplex == DUPLEX_HALF))) { ++ if (ecmd->autoneg == AUTONEG_DISABLE) { ++ if (ecmd->duplex == DUPLEX_FULL) { ++ *Buf = (char) SK_LMODE_FULL; ++ } else { ++ *Buf = (char) SK_LMODE_HALF; ++ } ++ } else { ++ if (ecmd->duplex == DUPLEX_FULL) { ++ *Buf = (char) SK_LMODE_AUTOFULL; ++ } else { ++ *Buf = (char) SK_LMODE_AUTOHALF; ++ } ++ } ++ ++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_LINK_MODE, ++ &Buf, &Len, Instance, pNet->NetNr); ++ ++ if (Ret != SK_PNMI_ERR_OK) { ++ return -EINVAL; ++ } ++ } ++ ++ if ((ecmd->speed == SPEED_1000) || ++ (ecmd->speed == SPEED_100) || ++ (ecmd->speed == SPEED_10)) { ++ if (ecmd->speed == SPEED_1000) { ++ *Buf = (char) SK_LSPEED_1000MBPS; ++ } else if (ecmd->speed == SPEED_100) { ++ *Buf = (char) SK_LSPEED_100MBPS; ++ } else { ++ *Buf = (char) SK_LSPEED_10MBPS; ++ } ++ ++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_SPEED_MODE, ++ &Buf, &Len, Instance, pNet->NetNr); ++ ++ if (Ret != SK_PNMI_ERR_OK) { ++ return -EINVAL; ++ } ++ } else { ++ return -EINVAL; ++ } ++ return 0; ++} ++#endif ++ ++#ifdef ETHTOOL_GPAUSEPARAM ++/***************************************************************************** ++ * ++ * getPauseParams - retrieves the pause parameters ++ * ++ * Description: ++ * All current pause parameters of a selected adapter are placed ++ * in the passed ethtool_pauseparam structure and are returned. ++ * ++ * Returns: N/A ++ * ++ */ ++static void getPauseParams( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_pauseparam *epause) /* pause parameter struct for result */ ++{ ++ SK_GEPORT *pPort = &pAC->GIni.GP[port]; ++ ++ epause->rx_pause = 0; ++ epause->tx_pause = 0; ++ ++ if (pPort->PFlowCtrlMode == SK_FLOW_MODE_LOC_SEND) { ++ epause->tx_pause = 1; ++ } ++ if ((pPort->PFlowCtrlMode == SK_FLOW_MODE_SYMMETRIC) || ++ (pPort->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM)) { ++ epause->tx_pause = 1; ++ epause->rx_pause = 1; ++ } ++ ++ if ((epause->rx_pause == 0) && (epause->tx_pause == 0)) { ++ epause->autoneg = SK_FALSE; ++ } else { ++ epause->autoneg = SK_TRUE; ++ } ++} ++#endif ++ ++#ifdef ETHTOOL_SPAUSEPARAM ++/***************************************************************************** ++ * ++ * setPauseParams - configures the pause parameters of an adapter ++ * ++ * Description: ++ * This function sets the Rx or Tx pause parameters ++ * ++ * Returns: ++ * ==0: everything fine, no error ++ * !=0: the return value is the error code of the failure ++ */ ++static int setPauseParams( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_pauseparam *epause) /* pause parameter struct with params */ ++{ ++ SK_GEPORT *pPort = &pAC->GIni.GP[port]; ++ DEV_NET *pNet = (DEV_NET *) pAC->dev[port]->priv; ++ int PrevSpeedVal = pPort->PLinkSpeedUsed; ++ ++ SK_U32 Instance; ++ char Buf[4]; ++ int Ret; ++ SK_BOOL prevAutonegValue = SK_TRUE; ++ int prevTxPause = 0; ++ int prevRxPause = 0; ++ unsigned int Len = 1; ++ ++ if (port == 0) { ++ Instance = (pAC->RlmtNets == 2) ? 1 : 2; ++ } else { ++ Instance = (pAC->RlmtNets == 2) ? 2 : 3; ++ } ++ ++ /* ++ ** we have to determine the current settings to see if ++ ** the operator requested any modification of the flow ++ ** control parameters... ++ */ ++ if (pPort->PFlowCtrlMode == SK_FLOW_MODE_LOC_SEND) { ++ prevTxPause = 1; ++ } ++ if ((pPort->PFlowCtrlMode == SK_FLOW_MODE_SYMMETRIC) || ++ (pPort->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM)) { ++ prevTxPause = 1; ++ prevRxPause = 1; ++ } ++ ++ if ((prevRxPause == 0) && (prevTxPause == 0)) { ++ prevAutonegValue = SK_FALSE; ++ } ++ ++ ++ /* ++ ** perform modifications regarding the changes ++ ** requested by the operator ++ */ ++ if (epause->autoneg != prevAutonegValue) { ++ if (epause->autoneg == AUTONEG_DISABLE) { ++ *Buf = (char) SK_FLOW_MODE_NONE; ++ } else { ++ *Buf = (char) SK_FLOW_MODE_SYMMETRIC; ++ } ++ } else { ++ if(epause->rx_pause && epause->tx_pause) { ++ *Buf = (char) SK_FLOW_MODE_SYMMETRIC; ++ } else if (epause->rx_pause && !epause->tx_pause) { ++ *Buf = (char) SK_FLOW_MODE_SYM_OR_REM; ++ } else if(!epause->rx_pause && epause->tx_pause) { ++ *Buf = (char) SK_FLOW_MODE_LOC_SEND; ++ } else { ++ *Buf = (char) SK_FLOW_MODE_NONE; ++ } ++ } ++ ++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_FLOWCTRL_MODE, ++ &Buf, &Len, Instance, pNet->NetNr); ++ ++ if (Ret != SK_PNMI_ERR_OK) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_CTRL, ++ ("ethtool (sk98lin): error changing rx/tx pause (%i)\n", Ret)); ++ } else { ++ Len = 1; /* set buffer length to correct value */ ++ } ++ ++ /* ++ ** It may be that autoneg has been disabled! Therefore ++ ** set the speed to the previously used value... ++ */ ++ *Buf = (char) PrevSpeedVal; ++ ++ Ret = SkPnmiSetVar(pAC, pAC->IoBase, OID_SKGE_SPEED_MODE, ++ &Buf, &Len, Instance, pNet->NetNr); ++ ++ if (Ret != SK_PNMI_ERR_OK) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_CTRL, ++ ("ethtool (sk98lin): error setting speed (%i)\n", Ret)); ++ } ++ return 0; ++} ++#endif ++ ++#ifdef ETHTOOL_GCOALESCE ++/***************************************************************************** ++ * ++ * getModerationParams - retrieves the IRQ moderation settings ++ * ++ * Description: ++ * All current IRQ moderation settings of a selected adapter are placed ++ * in the passed ethtool_coalesce structure and are returned. ++ * ++ * Returns: N/A ++ * ++ */ ++static void getModerationParams( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_coalesce *ecoalesc) /* IRQ moderation struct for results */ ++{ ++ DIM_INFO *Info = &pAC->DynIrqModInfo; ++ SK_BOOL UseTxIrqModeration = SK_FALSE; ++ SK_BOOL UseRxIrqModeration = SK_FALSE; ++ ++ if (Info->IntModTypeSelect != C_INT_MOD_NONE) { ++ if (CHIP_ID_YUKON_2(pAC)) { ++ UseRxIrqModeration = SK_TRUE; ++ UseTxIrqModeration = SK_TRUE; ++ } else { ++ if ((Info->MaskIrqModeration == IRQ_MASK_RX_ONLY) || ++ (Info->MaskIrqModeration == IRQ_MASK_SP_RX) || ++ (Info->MaskIrqModeration == IRQ_MASK_RX_TX_SP)) { ++ UseRxIrqModeration = SK_TRUE; ++ } ++ if ((Info->MaskIrqModeration == IRQ_MASK_TX_ONLY) || ++ (Info->MaskIrqModeration == IRQ_MASK_SP_TX) || ++ (Info->MaskIrqModeration == IRQ_MASK_RX_TX_SP)) { ++ UseTxIrqModeration = SK_TRUE; ++ } ++ } ++ ++ if (UseRxIrqModeration) { ++ ecoalesc->rx_coalesce_usecs = 1000000 / Info->MaxModIntsPerSec; ++ } ++ if (UseTxIrqModeration) { ++ ecoalesc->tx_coalesce_usecs = 1000000 / Info->MaxModIntsPerSec; ++ } ++ if (Info->IntModTypeSelect == C_INT_MOD_DYNAMIC) { ++ ecoalesc->rate_sample_interval = Info->DynIrqModSampleInterval; ++ if (UseRxIrqModeration) { ++ ecoalesc->use_adaptive_rx_coalesce = 1; ++ ecoalesc->rx_coalesce_usecs_low = ++ 1000000 / Info->MaxModIntsPerSecLowerLimit; ++ ecoalesc->rx_coalesce_usecs_high = ++ 1000000 / Info->MaxModIntsPerSecUpperLimit; ++ } ++ if (UseTxIrqModeration) { ++ ecoalesc->use_adaptive_tx_coalesce = 1; ++ ecoalesc->tx_coalesce_usecs_low = ++ 1000000 / Info->MaxModIntsPerSecLowerLimit; ++ ecoalesc->tx_coalesce_usecs_high = ++ 1000000 / Info->MaxModIntsPerSecUpperLimit; ++ } ++ } ++ } ++} ++#endif ++ ++#ifdef ETHTOOL_SCOALESCE ++/***************************************************************************** ++ * ++ * setModerationParams - configures the IRQ moderation of an adapter ++ * ++ * Description: ++ * Depending on the desired IRQ moderation parameters, either a) static, ++ * b) dynamic or c) no moderation is configured. ++ * ++ * Returns: ++ * ==0: everything fine, no error ++ * !=0: the return value is the error code of the failure ++ * ++ * Notes: ++ * The supported timeframe for the coalesced interrupts ranges from ++ * 33.333us (30 IntsPerSec) down to 25us (40.000 IntsPerSec). ++ * Any requested value that is not in this range will abort the request! ++ */ ++static int setModerationParams( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_coalesce *ecoalesc) /* IRQ moderation struct with params */ ++{ ++ DIM_INFO *Info = &pAC->DynIrqModInfo; ++ int PrevModeration = Info->IntModTypeSelect; ++ ++ Info->IntModTypeSelect = C_INT_MOD_NONE; /* initial default */ ++ ++ if ((ecoalesc->rx_coalesce_usecs) || (ecoalesc->tx_coalesce_usecs)) { ++ if (ecoalesc->rx_coalesce_usecs) { ++ if ((ecoalesc->rx_coalesce_usecs < 25) || ++ (ecoalesc->rx_coalesce_usecs > 33333)) { ++ return -EINVAL; ++ } ++ } ++ if (ecoalesc->tx_coalesce_usecs) { ++ if ((ecoalesc->tx_coalesce_usecs < 25) || ++ (ecoalesc->tx_coalesce_usecs > 33333)) { ++ return -EINVAL; ++ } ++ } ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if ((Info->MaskIrqModeration == IRQ_MASK_SP_RX) || ++ (Info->MaskIrqModeration == IRQ_MASK_SP_TX) || ++ (Info->MaskIrqModeration == IRQ_MASK_RX_TX_SP)) { ++ Info->MaskIrqModeration = IRQ_MASK_SP_ONLY; ++ } ++ } ++ Info->IntModTypeSelect = C_INT_MOD_STATIC; ++ if (ecoalesc->rx_coalesce_usecs) { ++ Info->MaxModIntsPerSec = ++ 1000000 / ecoalesc->rx_coalesce_usecs; ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if (Info->MaskIrqModeration == IRQ_MASK_TX_ONLY) { ++ Info->MaskIrqModeration = IRQ_MASK_TX_RX; ++ } ++ if (Info->MaskIrqModeration == IRQ_MASK_SP_ONLY) { ++ Info->MaskIrqModeration = IRQ_MASK_SP_RX; ++ } ++ if (Info->MaskIrqModeration == IRQ_MASK_SP_TX) { ++ Info->MaskIrqModeration = IRQ_MASK_RX_TX_SP; ++ } ++ } else { ++ Info->MaskIrqModeration = Y2_IRQ_MASK; ++ } ++ } ++ if (ecoalesc->tx_coalesce_usecs) { ++ Info->MaxModIntsPerSec = ++ 1000000 / ecoalesc->tx_coalesce_usecs; ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if (Info->MaskIrqModeration == IRQ_MASK_RX_ONLY) { ++ Info->MaskIrqModeration = IRQ_MASK_TX_RX; ++ } ++ if (Info->MaskIrqModeration == IRQ_MASK_SP_ONLY) { ++ Info->MaskIrqModeration = IRQ_MASK_SP_TX; ++ } ++ if (Info->MaskIrqModeration == IRQ_MASK_SP_RX) { ++ Info->MaskIrqModeration = IRQ_MASK_RX_TX_SP; ++ } ++ } else { ++ Info->MaskIrqModeration = Y2_IRQ_MASK; ++ } ++ } ++ } ++ if ((ecoalesc->rate_sample_interval) || ++ (ecoalesc->rx_coalesce_usecs_low) || ++ (ecoalesc->tx_coalesce_usecs_low) || ++ (ecoalesc->rx_coalesce_usecs_high)|| ++ (ecoalesc->tx_coalesce_usecs_high)) { ++ if (ecoalesc->rate_sample_interval) { ++ if ((ecoalesc->rate_sample_interval < 1) || ++ (ecoalesc->rate_sample_interval > 10)) { ++ return -EINVAL; ++ } ++ } ++ if (ecoalesc->rx_coalesce_usecs_low) { ++ if ((ecoalesc->rx_coalesce_usecs_low < 25) || ++ (ecoalesc->rx_coalesce_usecs_low > 33333)) { ++ return -EINVAL; ++ } ++ } ++ if (ecoalesc->rx_coalesce_usecs_high) { ++ if ((ecoalesc->rx_coalesce_usecs_high < 25) || ++ (ecoalesc->rx_coalesce_usecs_high > 33333)) { ++ return -EINVAL; ++ } ++ } ++ if (ecoalesc->tx_coalesce_usecs_low) { ++ if ((ecoalesc->tx_coalesce_usecs_low < 25) || ++ (ecoalesc->tx_coalesce_usecs_low > 33333)) { ++ return -EINVAL; ++ } ++ } ++ if (ecoalesc->tx_coalesce_usecs_high) { ++ if ((ecoalesc->tx_coalesce_usecs_high < 25) || ++ (ecoalesc->tx_coalesce_usecs_high > 33333)) { ++ return -EINVAL; ++ } ++ } ++ ++ Info->IntModTypeSelect = C_INT_MOD_DYNAMIC; ++ if (ecoalesc->rate_sample_interval) { ++ Info->DynIrqModSampleInterval = ++ ecoalesc->rate_sample_interval; ++ } ++ if (ecoalesc->rx_coalesce_usecs_low) { ++ Info->MaxModIntsPerSecLowerLimit = ++ 1000000 / ecoalesc->rx_coalesce_usecs_low; ++ } ++ if (ecoalesc->tx_coalesce_usecs_low) { ++ Info->MaxModIntsPerSecLowerLimit = ++ 1000000 / ecoalesc->tx_coalesce_usecs_low; ++ } ++ if (ecoalesc->rx_coalesce_usecs_high) { ++ Info->MaxModIntsPerSecUpperLimit = ++ 1000000 / ecoalesc->rx_coalesce_usecs_high; ++ } ++ if (ecoalesc->tx_coalesce_usecs_high) { ++ Info->MaxModIntsPerSecUpperLimit = ++ 1000000 / ecoalesc->tx_coalesce_usecs_high; ++ } ++ } ++ ++ if ((PrevModeration == C_INT_MOD_NONE) && ++ (Info->IntModTypeSelect != C_INT_MOD_NONE)) { ++ SkDimEnableModerationIfNeeded(pAC); ++ } ++ if (PrevModeration != C_INT_MOD_NONE) { ++ SkDimDisableModeration(pAC, PrevModeration); ++ if (Info->IntModTypeSelect != C_INT_MOD_NONE) { ++ SkDimEnableModerationIfNeeded(pAC); ++ } ++ } ++ ++ return 0; ++} ++#endif ++ ++#ifdef ETHTOOL_GWOL ++/***************************************************************************** ++ * ++ * getWOLsettings - retrieves the WOL settings of the selected adapter ++ * ++ * Description: ++ * All current WOL settings of a selected adapter are placed in the ++ * passed ethtool_wolinfo structure and are returned to the caller. ++ * ++ * Returns: N/A ++ * ++ */ ++static void getWOLsettings( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_wolinfo *ewol) /* mandatory WOL structure for results */ ++{ ++ ewol->supported = pAC->WolInfo.SupportedWolOptions; ++ ewol->wolopts = pAC->WolInfo.ConfiguredWolOptions; ++ ++ return; ++} ++#endif ++ ++#ifdef ETHTOOL_SWOL ++/***************************************************************************** ++ * ++ * setWOLsettings - configures the WOL settings of a selected adapter ++ * ++ * Description: ++ * The WOL settings of a selected adapter are configured regarding ++ * the parameters in the passed ethtool_wolinfo structure. ++ * Note that currently only wake on magic packet is supported! ++ * ++ * Returns: ++ * ==0: everything fine, no error ++ * !=0: the return value is the error code of the failure ++ */ ++static int setWOLsettings( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_wolinfo *ewol) /* WOL structure containing settings */ ++{ ++ if (((ewol->wolopts & WAKE_MAGIC) == WAKE_MAGIC) || (ewol->wolopts == 0)) { ++ pAC->WolInfo.ConfiguredWolOptions = ewol->wolopts; ++ return 0; ++ } ++ return -EFAULT; ++} ++#endif ++ ++#ifdef ETHTOOL_GDRVINFO ++/***************************************************************************** ++ * ++ * getDriverInfo - returns generic driver and adapter information ++ * ++ * Description: ++ * Generic driver information is returned via this function, such as ++ * the name of the driver, its version and and firmware version. ++ * In addition to this, the location of the selected adapter is ++ * returned as a bus info string (e.g. '01:05.0'). ++ * ++ * Returns: N/A ++ * ++ */ ++static void getDriverInfo( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_drvinfo *edrvinfo) /* mandatory info structure for results */ ++{ ++ char versionString[32]; ++ ++ snprintf(versionString, 32, "%s (%s)", VER_STRING, PATCHLEVEL); ++ strncpy(edrvinfo->driver, DRIVER_FILE_NAME , 32); ++ strncpy(edrvinfo->version, versionString , 32); ++ strncpy(edrvinfo->fw_version, "N/A", 32); ++ strncpy(edrvinfo->bus_info, pci_name(pAC->PciDev), 32); ++ ++#ifdef ETHTOOL_GSTATS ++ edrvinfo->n_stats = SK98LIN_STATS_LEN; ++#endif ++} ++#endif ++ ++#ifdef ETHTOOL_PHYS_ID ++/***************************************************************************** ++ * ++ * startLocateNIC - start the locate NIC feature of the elected adapter ++ * ++ * Description: ++ * This function is used if the user want to locate a particular NIC. ++ * All LEDs are regularly switched on and off, so the NIC can easily ++ * be identified. ++ * ++ * Returns: ++ * ==0: everything fine, no error, locateNIC test was started ++ * !=0: one locateNIC test runs already ++ * ++ */ ++static int startLocateNIC( ++SK_AC *pAC, /* pointer to adapter control context */ ++int port, /* the port of the selected adapter */ ++struct ethtool_value *blinkSecs) /* how long the LEDs should blink in seconds */ ++{ ++ struct SK_NET_DEVICE *pDev = pAC->dev[port]; ++ int OtherPort = (port) ? 0 : 1; ++ struct SK_NET_DEVICE *pOtherDev = pAC->dev[OtherPort]; ++ ++ if (isLocateNICrunning) { ++ return -EFAULT; ++ } ++ isLocateNICrunning = SK_TRUE; ++ currentPortIndex = port; ++ isDualNetCard = (pDev != pOtherDev) ? SK_TRUE : SK_FALSE; ++ ++ if (netif_running(pAC->dev[port])) { ++ boardWasDown[0] = SK_FALSE; ++ } else { ++ (*pDev->open)(pDev); ++ boardWasDown[0] = SK_TRUE; ++ } ++ ++ if (isDualNetCard) { ++ if (netif_running(pAC->dev[OtherPort])) { ++ boardWasDown[1] = SK_FALSE; ++ } else { ++ (*pOtherDev->open)(pOtherDev); ++ boardWasDown[1] = SK_TRUE; ++ } ++ } ++ ++ if ((blinkSecs->data < 1) || (blinkSecs->data > 30)) { ++ blinkSecs->data = 3; /* three seconds default */ ++ } ++ nbrBlinkQuarterSeconds = 4*blinkSecs->data; ++ ++ init_timer(&locateNICtimer); ++ locateNICtimer.function = toggleLeds; ++ locateNICtimer.data = (unsigned long) pAC; ++ locateNICtimer.expires = jiffies + HZ; /* initially 1sec */ ++ add_timer(&locateNICtimer); ++ ++ return 0; ++} ++ ++/***************************************************************************** ++ * ++ * toggleLeds - Changes the LED state of an adapter ++ * ++ * Description: ++ * This function changes the current state of all LEDs of an adapter so ++ * that it can be located by a user. If the requested time interval for ++ * this test has elapsed, this function cleans up everything that was ++ * temporarily setup during the locate NIC test. This involves of course ++ * also closing or opening any adapter so that the initial board state ++ * is recovered. ++ * ++ * Returns: N/A ++ * ++ */ ++static void toggleLeds( ++unsigned long ptr) /* holds the pointer to adapter control context */ ++{ ++ SK_AC *pAC = (SK_AC *) ptr; ++ int port = currentPortIndex; ++ SK_IOC IoC = pAC->IoBase; ++ struct SK_NET_DEVICE *pDev = pAC->dev[port]; ++ int OtherPort = (port) ? 0 : 1; ++ struct SK_NET_DEVICE *pOtherDev = pAC->dev[OtherPort]; ++ ++ SK_U16 YukLedOn = (PHY_M_LED_MO_DUP(MO_LED_ON) | ++ PHY_M_LED_MO_10(MO_LED_ON) | ++ PHY_M_LED_MO_100(MO_LED_ON) | ++ PHY_M_LED_MO_1000(MO_LED_ON) | ++ PHY_M_LED_MO_RX(MO_LED_ON)); ++ SK_U16 YukLedOff = (PHY_M_LED_MO_DUP(MO_LED_OFF) | ++ PHY_M_LED_MO_10(MO_LED_OFF) | ++ PHY_M_LED_MO_100(MO_LED_OFF) | ++ PHY_M_LED_MO_1000(MO_LED_OFF) | ++ PHY_M_LED_MO_RX(MO_LED_OFF)); ++ ++ nbrBlinkQuarterSeconds--; ++ if (nbrBlinkQuarterSeconds <= 0) { ++ (*pDev->stop)(pDev); ++ if (isDualNetCard) { ++ (*pOtherDev->stop)(pOtherDev); ++ } ++ ++ if (!boardWasDown[0]) { ++ (*pDev->open)(pDev); ++ } ++ if (isDualNetCard) { ++ (*pOtherDev->open)(pOtherDev); ++ } ++ isDualNetCard = SK_FALSE; ++ isLocateNICrunning = SK_FALSE; ++ return; ++ } ++ ++ doSwitchLEDsOn = (doSwitchLEDsOn) ? SK_FALSE : SK_TRUE; ++ if (doSwitchLEDsOn) { ++ if (pAC->GIni.GIGenesis) { ++ SK_OUT8(IoC,MR_ADDR(port,LNK_LED_REG),(SK_U8)SK_LNK_ON); ++ SkGeYellowLED(pAC,IoC,LED_ON >> 1); ++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,RX_LED_INI),SK_LED_TST); ++ if (pAC->GIni.GP[port].PhyType == SK_PHY_BCOM) { ++ SkXmPhyWrite(pAC,IoC,port,PHY_BCOM_P_EXT_CTRL,PHY_B_PEC_LED_ON); ++ } else if (pAC->GIni.GP[port].PhyType == SK_PHY_LONE) { ++ SkXmPhyWrite(pAC,IoC,port,PHY_LONE_LED_CFG,0x0800); ++ } else { ++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,TX_LED_INI),SK_LED_TST); ++ } ++ } else { ++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_CTRL,0); ++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_OVER,YukLedOn); ++ } ++ } else { ++ if (pAC->GIni.GIGenesis) { ++ SK_OUT8(IoC,MR_ADDR(port,LNK_LED_REG),(SK_U8)SK_LNK_OFF); ++ SkGeYellowLED(pAC,IoC,LED_OFF >> 1); ++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,RX_LED_INI),SK_LED_DIS); ++ if (pAC->GIni.GP[port].PhyType == SK_PHY_BCOM) { ++ SkXmPhyWrite(pAC,IoC,port,PHY_BCOM_P_EXT_CTRL,PHY_B_PEC_LED_OFF); ++ } else if (pAC->GIni.GP[port].PhyType == SK_PHY_LONE) { ++ SkXmPhyWrite(pAC,IoC,port,PHY_LONE_LED_CFG,PHY_L_LC_LEDT); ++ } else { ++ SkGeXmitLED(pAC,IoC,MR_ADDR(port,TX_LED_INI),SK_LED_DIS); ++ } ++ } else { ++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_CTRL,0); ++ SkGmPhyWrite(pAC,IoC,port,PHY_MARV_LED_OVER,YukLedOff); ++ } ++ } ++ ++ locateNICtimer.function = toggleLeds; ++ locateNICtimer.data = (unsigned long) pAC; ++ locateNICtimer.expires = jiffies + (HZ/4); /* 250ms */ ++ add_timer(&locateNICtimer); ++} ++#endif ++ ++/***************************************************************************** ++ * ++ * getPortNumber - evaluates the port number of an interface ++ * ++ * Description: ++ * It may be that the current interface refers to one which is located ++ * on a dual net adapter. Hence, this function will return the correct ++ * port for further use. ++ * ++ * Returns: ++ * the port number that corresponds to the selected adapter ++ * ++ */ ++static int getPortNumber( ++struct net_device *netdev, /* the pointer to netdev structure */ ++struct ifreq *ifr) /* what interface the request refers to? */ ++{ ++ DEV_NET *pNet = (DEV_NET*) netdev->priv; ++ SK_AC *pAC = pNet->pAC; ++ ++ if (pAC->dev[1] != pAC->dev[0]) { ++ if (!strcmp(pAC->dev[1]->name, ifr->ifr_name)) { ++ return 1; /* port index 1 */ ++ } ++ } ++ return 0; ++} ++ ++/******************************************************************************* ++ * ++ * End of file ++ * ++ ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skge.c linux-2.6.9.new/drivers/net/sk98lin/skge.c +--- linux-2.6.9.old/drivers/net/sk98lin/skge.c 2006-08-01 06:23:10.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skge.c 2006-12-07 14:35:03.000000000 +0800 +@@ -1,32 +1,26 @@ + /****************************************************************************** + * +- * Name: skge.c +- * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.45 $ +- * Date: $Date: 2004/02/12 14:41:02 $ +- * Purpose: The main driver source module ++ * Name: skge.c ++ * Project: GEnesis, PCI Gigabit Ethernet Adapter ++ * Version: $Revision: 1.60.2.51 $ ++ * Date: $Date: 2005/06/17 14:09:14 $ ++ * Purpose: The main driver source module + * + ******************************************************************************/ + + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * Driver for Marvell Yukon chipset and SysKonnect Gigabit Ethernet + * Server Adapters. + * +- * Created 10-Feb-1999, based on Linux' acenic.c, 3c59x.c and +- * SysKonnects GEnesis Solaris driver +- * Author: Christoph Goos (cgoos@syskonnect.de) +- * Mirko Lindner (mlindner@syskonnect.de) ++ * Author: Mirko Lindner (mlindner@syskonnect.de) ++ * Ralph Roesler (rroesler@syskonnect.de) + * + * Address all question to: linux@syskonnect.de + * +- * The technical manual for the adapters is available from SysKonnect's +- * web pages: www.syskonnect.com +- * Goto "Support" and search Knowledge Base for "manual". +- * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or +@@ -38,71 +32,33 @@ + + /****************************************************************************** + * +- * Possible compiler options (#define xxx / -Dxxx): +- * +- * debugging can be enable by changing SK_DEBUG_CHKMOD and +- * SK_DEBUG_CHKCAT in makefile (described there). +- * +- ******************************************************************************/ +- +-/****************************************************************************** +- * + * Description: + * +- * This is the main module of the Linux GE driver. +- * +- * All source files except skge.c, skdrv1st.h, skdrv2nd.h and sktypes.h +- * are part of SysKonnect's COMMON MODULES for the SK-98xx adapters. +- * Those are used for drivers on multiple OS', so some thing may seem +- * unnecessary complicated on Linux. Please do not try to 'clean up' +- * them without VERY good reasons, because this will make it more +- * difficult to keep the Linux driver in synchronisation with the +- * other versions. +- * +- * Include file hierarchy: +- * +- * +- * +- * "h/skdrv1st.h" +- * +- * +- * +- * +- * +- * +- * +- * +- * +- * +- * +- * +- * +- * +- * those three depending on kernel version used: +- * +- * +- * +- * +- * +- * "h/skerror.h" +- * "h/skdebug.h" +- * "h/sktypes.h" +- * "h/lm80.h" +- * "h/xmac_ii.h" +- * +- * "h/skdrv2nd.h" +- * "h/skqueue.h" +- * "h/skgehwt.h" +- * "h/sktimer.h" +- * "h/ski2c.h" +- * "h/skgepnmi.h" +- * "h/skvpd.h" +- * "h/skgehw.h" +- * "h/skgeinit.h" +- * "h/skaddr.h" +- * "h/skgesirq.h" +- * "h/skcsum.h" +- * "h/skrlmt.h" ++ * All source files in this sk98lin directory except of the sk98lin ++ * Linux specific files ++ * ++ * - skdim.c ++ * - skethtool.c ++ * - skge.c ++ * - skproc.c ++ * - sky2.c ++ * - Makefile ++ * - h/skdrv1st.h ++ * - h/skdrv2nd.h ++ * - h/sktypes.h ++ * - h/skversion.h ++ * ++ * are part of SysKonnect's common modules for the SK-9xxx adapters. ++ * ++ * Those common module files which are not Linux specific are used to ++ * build drivers on different OS' (e.g. Windows, MAC OS) so that those ++ * drivers are based on the same set of files ++ * ++ * At a first glance, this seems to complicate things unnescessarily on ++ * Linux, but please do not try to 'clean up' them without VERY good ++ * reasons, because this will make it more difficult to keep the sk98lin ++ * driver for Linux in synchronisation with the other drivers running on ++ * other operating systems. + * + ******************************************************************************/ + +@@ -110,11 +66,19 @@ + + #include + #include ++#include ++ ++#ifdef CONFIG_PROC_FS + #include ++#endif + + #include "h/skdrv1st.h" + #include "h/skdrv2nd.h" + ++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) ++#include ++#endif ++ + /******************************************************************************* + * + * Defines +@@ -124,62 +88,14 @@ + /* for debuging on x86 only */ + /* #define BREAKPOINT() asm(" int $3"); */ + +-/* use the transmit hw checksum driver functionality */ +-#define USE_SK_TX_CHECKSUM +- +-/* use the receive hw checksum driver functionality */ +-#define USE_SK_RX_CHECKSUM +- +-/* use the scatter-gather functionality with sendfile() */ +-#define SK_ZEROCOPY +- +-/* use of a transmit complete interrupt */ +-#define USE_TX_COMPLETE +- +-/* +- * threshold for copying small receive frames +- * set to 0 to avoid copying, set to 9001 to copy all frames +- */ +-#define SK_COPY_THRESHOLD 50 +- +-/* number of adapters that can be configured via command line params */ +-#define SK_MAX_CARD_PARAM 16 +- +- +- +-/* +- * use those defines for a compile-in version of the driver instead +- * of command line parameters +- */ +-// #define LINK_SPEED_A {"Auto", } +-// #define LINK_SPEED_B {"Auto", } +-// #define AUTO_NEG_A {"Sense", } +-// #define AUTO_NEG_B {"Sense", } +-// #define DUP_CAP_A {"Both", } +-// #define DUP_CAP_B {"Both", } +-// #define FLOW_CTRL_A {"SymOrRem", } +-// #define FLOW_CTRL_B {"SymOrRem", } +-// #define ROLE_A {"Auto", } +-// #define ROLE_B {"Auto", } +-// #define PREF_PORT {"A", } +-// #define CON_TYPE {"Auto", } +-// #define RLMT_MODE {"CheckLinkState", } +- +-#define DEV_KFREE_SKB(skb) dev_kfree_skb(skb) +-#define DEV_KFREE_SKB_IRQ(skb) dev_kfree_skb_irq(skb) +-#define DEV_KFREE_SKB_ANY(skb) dev_kfree_skb_any(skb) +- + + /* Set blink mode*/ + #define OEM_CONFIG_VALUE ( SK_ACT_LED_BLINK | \ + SK_DUP_LED_NORMAL | \ + SK_LED_LINK100_ON) + +- +-/* Isr return value */ +-#define SkIsrRetVar irqreturn_t +-#define SkIsrRetNone IRQ_NONE +-#define SkIsrRetHandled IRQ_HANDLED ++#define CLEAR_AND_START_RX(Port) SK_OUT8(pAC->IoBase, RxQueueAddr[(Port)]+Q_CSR, CSR_START | CSR_IRQ_CL_F) ++#define CLEAR_TX_IRQ(Port,Prio) SK_OUT8(pAC->IoBase, TxQueueAddr[(Port)][(Prio)]+Q_CSR, CSR_IRQ_CL_F) + + + /******************************************************************************* +@@ -188,12 +104,23 @@ + * + ******************************************************************************/ + ++static int __devinit sk98lin_init_device(struct pci_dev *pdev, const struct pci_device_id *ent); ++static void sk98lin_remove_device(struct pci_dev *pdev); ++#ifdef CONFIG_PM ++static int sk98lin_suspend(struct pci_dev *pdev, u32 state); ++static int sk98lin_resume(struct pci_dev *pdev); ++static void SkEnableWOMagicPacket(SK_AC *pAC, SK_IOC IoC, SK_MAC_ADDR MacAddr); ++#endif ++#ifdef Y2_RECOVERY ++static void SkGeHandleKernelTimer(unsigned long ptr); ++void SkGeCheckTimer(DEV_NET *pNet); ++#endif + static void FreeResources(struct SK_NET_DEVICE *dev); + static int SkGeBoardInit(struct SK_NET_DEVICE *dev, SK_AC *pAC); + static SK_BOOL BoardAllocMem(SK_AC *pAC); + static void BoardFreeMem(SK_AC *pAC); + static void BoardInitMem(SK_AC *pAC); +-static void SetupRing(SK_AC*, void*, uintptr_t, RXD**, RXD**, RXD**, int*, SK_BOOL); ++static void SetupRing(SK_AC*, void*, uintptr_t, RXD**, RXD**, RXD**, int*, int*, SK_BOOL); + static SkIsrRetVar SkGeIsr(int irq, void *dev_id, struct pt_regs *ptregs); + static SkIsrRetVar SkGeIsrOnePort(int irq, void *dev_id, struct pt_regs *ptregs); + static int SkGeOpen(struct SK_NET_DEVICE *dev); +@@ -209,24 +136,37 @@ + static void FreeTxDescriptors(SK_AC*pAC, TX_PORT*); + static void FillRxRing(SK_AC*, RX_PORT*); + static SK_BOOL FillRxDescriptor(SK_AC*, RX_PORT*); ++#ifdef CONFIG_SK98LIN_NAPI ++static int SkGePoll(struct net_device *dev, int *budget); ++static void ReceiveIrq(SK_AC*, RX_PORT*, SK_BOOL, int*, int); ++#else + static void ReceiveIrq(SK_AC*, RX_PORT*, SK_BOOL); +-static void ClearAndStartRx(SK_AC*, int); +-static void ClearTxIrq(SK_AC*, int, int); ++#endif ++#ifdef SK_POLL_CONTROLLER ++static void SkGeNetPoll(struct SK_NET_DEVICE *dev); ++#endif + static void ClearRxRing(SK_AC*, RX_PORT*); + static void ClearTxRing(SK_AC*, TX_PORT*); + static int SkGeChangeMtu(struct SK_NET_DEVICE *dev, int new_mtu); + static void PortReInitBmu(SK_AC*, int); + static int SkGeIocMib(DEV_NET*, unsigned int, int); + static int SkGeInitPCI(SK_AC *pAC); +-static void StartDrvCleanupTimer(SK_AC *pAC); +-static void StopDrvCleanupTimer(SK_AC *pAC); +-static int XmitFrameSG(SK_AC*, TX_PORT*, struct sk_buff*); +- +-#ifdef SK_DIAG_SUPPORT + static SK_U32 ParseDeviceNbrFromSlotName(const char *SlotName); + static int SkDrvInitAdapter(SK_AC *pAC, int devNbr); + static int SkDrvDeInitAdapter(SK_AC *pAC, int devNbr); +-#endif ++extern void SkLocalEventQueue( SK_AC *pAC, ++ SK_U32 Class, ++ SK_U32 Event, ++ SK_U32 Param1, ++ SK_U32 Param2, ++ SK_BOOL Flag); ++extern void SkLocalEventQueue64( SK_AC *pAC, ++ SK_U32 Class, ++ SK_U32 Event, ++ SK_U64 Param, ++ SK_BOOL Flag); ++ ++static int XmitFrameSG(SK_AC*, TX_PORT*, struct sk_buff*); + + /******************************************************************************* + * +@@ -234,17 +174,33 @@ + * + ******************************************************************************/ + +-#ifdef CONFIG_PROC_FS +-static const char SK_Root_Dir_entry[] = "sk98lin"; +-static struct proc_dir_entry *pSkRootDir; +-extern struct file_operations sk_proc_fops; ++extern SK_BOOL SkY2AllocateResources(SK_AC *pAC); ++extern void SkY2FreeResources(SK_AC *pAC); ++extern void SkY2AllocateRxBuffers(SK_AC *pAC,SK_IOC IoC,int Port); ++extern void SkY2FreeRxBuffers(SK_AC *pAC,SK_IOC IoC,int Port); ++extern void SkY2FreeTxBuffers(SK_AC *pAC,SK_IOC IoC,int Port); ++extern SkIsrRetVar SkY2Isr(int irq,void *dev_id,struct pt_regs *ptregs); ++extern int SkY2Xmit(struct sk_buff *skb,struct SK_NET_DEVICE *dev); ++extern void SkY2PortStop(SK_AC *pAC,SK_IOC IoC,int Port,int Dir,int RstMode); ++extern void SkY2PortStart(SK_AC *pAC,SK_IOC IoC,int Port); ++extern int SkY2RlmtSend(SK_AC *pAC,int PortNr,struct sk_buff *pMessage); ++extern void SkY2RestartStatusUnit(SK_AC *pAC); ++#ifdef CONFIG_SK98LIN_NAPI ++extern int SkY2Poll(struct net_device *dev, int *budget); + #endif + + extern void SkDimEnableModerationIfNeeded(SK_AC *pAC); +-extern void SkDimDisplayModerationSettings(SK_AC *pAC); + extern void SkDimStartModerationTimer(SK_AC *pAC); + extern void SkDimModerate(SK_AC *pAC); + ++extern int SkEthIoctl(struct net_device *netdev, struct ifreq *ifr); ++ ++#ifdef CONFIG_PROC_FS ++static const char SK_Root_Dir_entry[] = "sk98lin"; ++static struct proc_dir_entry *pSkRootDir; ++extern struct file_operations sk_proc_fops; ++#endif ++ + #ifdef DEBUG + static void DumpMsg(struct sk_buff*, char*); + static void DumpData(char*, int); +@@ -252,12 +208,424 @@ + #endif + + /* global variables *********************************************************/ ++static const char *BootString = BOOT_STRING; + struct SK_NET_DEVICE *SkGeRootDev = NULL; + static SK_BOOL DoPrintInterfaceChange = SK_TRUE; + + /* local variables **********************************************************/ + static uintptr_t TxQueueAddr[SK_MAX_MACS][2] = {{0x680, 0x600},{0x780, 0x700}}; + static uintptr_t RxQueueAddr[SK_MAX_MACS] = {0x400, 0x480}; ++static int sk98lin_max_boards_found = 0; ++ ++#ifdef CONFIG_PROC_FS ++static struct proc_dir_entry *pSkRootDir; ++#endif ++ ++ ++ ++static struct pci_device_id sk98lin_pci_tbl[] __devinitdata = { ++/* { pci_vendor_id, pci_device_id, * SAMPLE ENTRY! * ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, */ ++ { 0x10b7, 0x1700, /* 3Com (10b7), Gigabit Ethernet Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x10b7, 0x80eb, /* 3Com (10b7), 3Com 3C940B Gigabit LOM Ethernet Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1148, 0x4300, /* SysKonnect (1148), SK-98xx Gigabit Ethernet Server Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1148, 0x4320, /* SysKonnect (1148), SK-98xx V2.0 Gigabit Ethernet Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1148, 0x9000, /* SysKonnect (1148), SK-9Sxx 10/100/1000Base-T Server Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1148, 0x9E00, /* SysKonnect (1148), SK-9Exx 10/100/1000Base-T Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1186, 0x4b00, /* D-Link (1186), Gigabit Ethernet Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1186, 0x4b01, /* D-Link (1186), Gigabit Ethernet Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1186, 0x4c00, /* D-Link (1186), Gigabit Ethernet Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4320, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4340, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4341, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4342, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4343, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4344, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4345, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4346, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4347, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4350, /* Marvell (11ab), Fast Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4351, /* Marvell (11ab), Fast Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4360, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4361, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x4362, /* Marvell (11ab), Gigabit Ethernet Controller */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x11ab, 0x5005, /* Marvell (11ab), Belkin */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1371, 0x434e, /* CNet (1371), GigaCard Network Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1737, 0x1032, /* Linksys (1737), Gigabit Network Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0x1737, 0x1064, /* Linksys (1737), Gigabit Network Adapter */ ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { 0, } ++}; ++ ++MODULE_DEVICE_TABLE(pci, sk98lin_pci_tbl); ++ ++static struct pci_driver sk98lin_driver = { ++ .name = DRIVER_FILE_NAME, ++ .id_table = sk98lin_pci_tbl, ++ .probe = sk98lin_init_device, ++ .remove = __devexit_p(sk98lin_remove_device), ++#ifdef CONFIG_PM ++ .suspend = sk98lin_suspend, ++ .resume = sk98lin_resume ++#endif ++}; ++ ++ ++/***************************************************************************** ++ * ++ * sk98lin_init_device - initialize the adapter ++ * ++ * Description: ++ * This function initializes the adapter. Resources for ++ * the adapter are allocated and the adapter is brought into Init 1 ++ * state. ++ * ++ * Returns: ++ * 0, if everything is ok ++ * !=0, on error ++ */ ++static int __devinit sk98lin_init_device(struct pci_dev *pdev, ++ const struct pci_device_id *ent) ++ ++{ ++ static SK_BOOL sk98lin_boot_string = SK_FALSE; ++ static SK_BOOL sk98lin_proc_entry = SK_FALSE; ++ static int sk98lin_boards_found = 0; ++ SK_AC *pAC; ++ DEV_NET *pNet = NULL; ++ struct SK_NET_DEVICE *dev = NULL; ++ int retval; ++#ifdef CONFIG_PROC_FS ++ struct proc_dir_entry *pProcFile; ++#endif ++ ++ retval = pci_enable_device(pdev); ++ if (retval) { ++ printk(KERN_ERR "Cannot enable PCI device, " ++ "aborting.\n"); ++ return retval; ++ } ++ ++ dev = NULL; ++ pNet = NULL; ++ ++ ++ /* INSERT * We have to find the power-management capabilities */ ++ /* Find power-management capability. */ ++ ++ ++ ++ /* Configure DMA attributes. */ ++ retval = pci_set_dma_mask(pdev, (u64) 0xffffffffffffffffULL); ++ if (!retval) { ++ retval = pci_set_dma_mask(pdev, (u64) 0xffffffff); ++ if (retval) ++ return retval; ++ } else { ++ return retval; ++ } ++ ++ ++ if ((dev = alloc_etherdev(sizeof(DEV_NET))) == NULL) { ++ printk(KERN_ERR "Unable to allocate etherdev " ++ "structure!\n"); ++ return -ENODEV; ++ } ++ ++ pNet = dev->priv; ++ pNet->pAC = kmalloc(sizeof(SK_AC), GFP_KERNEL); ++ if (pNet->pAC == NULL){ ++ free_netdev(dev); ++ printk(KERN_ERR "Unable to allocate adapter " ++ "structure!\n"); ++ return -ENODEV; ++ } ++ ++ ++ /* Print message */ ++ if (!sk98lin_boot_string) { ++ /* set display flag to TRUE so that */ ++ /* we only display this string ONCE */ ++ sk98lin_boot_string = SK_TRUE; ++ printk("%s\n", BootString); ++ } ++ ++ memset(pNet->pAC, 0, sizeof(SK_AC)); ++ pAC = pNet->pAC; ++ pAC->PciDev = pdev; ++ pAC->PciDevId = pdev->device; ++ pAC->dev[0] = dev; ++ pAC->dev[1] = dev; ++ sprintf(pAC->Name, "SysKonnect SK-98xx"); ++ pAC->CheckQueue = SK_FALSE; ++ ++ dev->irq = pdev->irq; ++ retval = SkGeInitPCI(pAC); ++ if (retval) { ++ printk("SKGE: PCI setup failed: %i\n", retval); ++ free_netdev(dev); ++ return -ENODEV; ++ } ++ ++ SET_MODULE_OWNER(dev); ++ ++ dev->open = &SkGeOpen; ++ dev->stop = &SkGeClose; ++ dev->get_stats = &SkGeStats; ++ dev->set_multicast_list = &SkGeSetRxMode; ++ dev->set_mac_address = &SkGeSetMacAddr; ++ dev->do_ioctl = &SkGeIoctl; ++ dev->change_mtu = &SkGeChangeMtu; ++ dev->flags &= ~IFF_RUNNING; ++#ifdef SK_POLL_CONTROLLER ++ dev->poll_controller = SkGeNetPoll; ++#endif ++ SET_NETDEV_DEV(dev, &pdev->dev); ++ ++ pAC->Index = sk98lin_boards_found; ++ ++ if (SkGeBoardInit(dev, pAC)) { ++ free_netdev(dev); ++ return -ENODEV; ++ } else { ++ ProductStr(pAC); ++ } ++ ++ /* shifter to later moment in time... */ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ dev->hard_start_xmit = &SkY2Xmit; ++#ifdef CONFIG_SK98LIN_NAPI ++ dev->poll = &SkY2Poll; ++ dev->weight = 64; ++#endif ++ } else { ++ dev->hard_start_xmit = &SkGeXmit; ++#ifdef CONFIG_SK98LIN_NAPI ++ dev->poll = &SkGePoll; ++ dev->weight = 64; ++#endif ++ } ++ ++#ifdef NETIF_F_TSO ++#ifdef USE_SK_TSO_FEATURE ++ if (CHIP_ID_YUKON_2(pAC)) { ++ dev->features |= NETIF_F_TSO; ++ } ++#endif ++#endif ++#ifdef CONFIG_SK98LIN_ZEROCOPY ++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) ++ dev->features |= NETIF_F_SG; ++#endif ++#ifdef USE_SK_TX_CHECKSUM ++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) ++ dev->features |= NETIF_F_IP_CSUM; ++#endif ++#ifdef USE_SK_RX_CHECKSUM ++ pAC->RxPort[0].UseRxCsum = SK_TRUE; ++ if (pAC->GIni.GIMacsFound == 2 ) { ++ pAC->RxPort[1].UseRxCsum = SK_TRUE; ++ } ++#endif ++ ++ /* Save the hardware revision */ ++ pAC->HWRevision = (((pAC->GIni.GIPciHwRev >> 4) & 0x0F)*10) + ++ (pAC->GIni.GIPciHwRev & 0x0F); ++ ++ /* Set driver globals */ ++ pAC->Pnmi.pDriverFileName = DRIVER_FILE_NAME; ++ pAC->Pnmi.pDriverReleaseDate = DRIVER_REL_DATE; ++ ++ SK_MEMSET(&(pAC->PnmiBackup), 0, sizeof(SK_PNMI_STRUCT_DATA)); ++ SK_MEMCPY(&(pAC->PnmiBackup), &(pAC->PnmiStruct), ++ sizeof(SK_PNMI_STRUCT_DATA)); ++ ++ /* Register net device */ ++ retval = register_netdev(dev); ++ if (retval) { ++ printk(KERN_ERR "SKGE: Could not register device.\n"); ++ FreeResources(dev); ++ free_netdev(dev); ++ return retval; ++ } ++ ++ /* Save initial device name */ ++ strcpy(pNet->InitialDevName, dev->name); ++ ++ /* Set network to off */ ++ netif_stop_queue(dev); ++ netif_carrier_off(dev); ++ ++ /* Print adapter specific string from vpd and config settings */ ++ printk("%s: %s\n", pNet->InitialDevName, pAC->DeviceStr); ++ printk(" PrefPort:%c RlmtMode:%s\n", ++ 'A' + pAC->Rlmt.Net[0].Port[pAC->Rlmt.Net[0].PrefPort]->PortNumber, ++ (pAC->RlmtMode==0) ? "Check Link State" : ++ ((pAC->RlmtMode==1) ? "Check Link State" : ++ ((pAC->RlmtMode==3) ? "Check Local Port" : ++ ((pAC->RlmtMode==7) ? "Check Segmentation" : ++ ((pAC->RlmtMode==17) ? "Dual Check Link State" :"Error"))))); ++ ++ SkGeYellowLED(pAC, pAC->IoBase, 1); ++ ++ memcpy((caddr_t) &dev->dev_addr, ++ (caddr_t) &pAC->Addr.Net[0].CurrentMacAddress, 6); ++ ++ /* First adapter... Create proc and print message */ ++#ifdef CONFIG_PROC_FS ++ if (!sk98lin_proc_entry) { ++ sk98lin_proc_entry = SK_TRUE; ++ SK_MEMCPY(&SK_Root_Dir_entry, BootString, ++ sizeof(SK_Root_Dir_entry) - 1); ++ ++ /*Create proc (directory)*/ ++ if(!pSkRootDir) { ++ pSkRootDir = proc_mkdir(SK_Root_Dir_entry, proc_net); ++ if (!pSkRootDir) { ++ printk(KERN_WARNING "%s: Unable to create /proc/net/%s", ++ dev->name, SK_Root_Dir_entry); ++ } else { ++ pSkRootDir->owner = THIS_MODULE; ++ } ++ } ++ } ++ ++ /* Create proc file */ ++ if (pSkRootDir && ++ (pProcFile = create_proc_entry(pNet->InitialDevName, S_IRUGO, ++ pSkRootDir))) { ++ pProcFile->proc_fops = &sk_proc_fops; ++ pProcFile->data = dev; ++ } ++ ++#endif ++ ++ pNet->PortNr = 0; ++ pNet->NetNr = 0; ++ ++ sk98lin_boards_found++; ++ pci_set_drvdata(pdev, dev); ++ ++ /* More then one port found */ ++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { ++ if ((dev = alloc_etherdev(sizeof(DEV_NET))) == 0) { ++ printk(KERN_ERR "Unable to allocate etherdev " ++ "structure!\n"); ++ return -ENODEV; ++ } ++ ++ pAC->dev[1] = dev; ++ pNet = dev->priv; ++ pNet->PortNr = 1; ++ pNet->NetNr = 1; ++ pNet->pAC = pAC; ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ dev->hard_start_xmit = &SkY2Xmit; ++#ifdef CONFIG_SK98LIN_NAPI ++ dev->poll = &SkY2Poll; ++ dev->weight = 64; ++#endif ++ } else { ++ dev->hard_start_xmit = &SkGeXmit; ++#ifdef CONFIG_SK98LIN_NAPI ++ dev->poll = &SkGePoll; ++ dev->weight = 64; ++#endif ++ } ++ dev->open = &SkGeOpen; ++ dev->stop = &SkGeClose; ++ dev->get_stats = &SkGeStats; ++ dev->set_multicast_list = &SkGeSetRxMode; ++ dev->set_mac_address = &SkGeSetMacAddr; ++ dev->do_ioctl = &SkGeIoctl; ++ dev->change_mtu = &SkGeChangeMtu; ++ dev->flags &= ~IFF_RUNNING; ++#ifdef SK_POLL_CONTROLLER ++ dev->poll_controller = SkGeNetPoll; ++#endif ++ ++#ifdef NETIF_F_TSO ++#ifdef USE_SK_TSO_FEATURE ++ if (CHIP_ID_YUKON_2(pAC)) { ++ dev->features |= NETIF_F_TSO; ++ } ++#endif ++#endif ++#ifdef CONFIG_SK98LIN_ZEROCOPY ++ /* Don't handle if Genesis chipset */ ++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) ++ dev->features |= NETIF_F_SG; ++#endif ++#ifdef USE_SK_TX_CHECKSUM ++ /* Don't handle if Genesis chipset */ ++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) ++ dev->features |= NETIF_F_IP_CSUM; ++#endif ++ ++ if (register_netdev(dev)) { ++ printk(KERN_ERR "SKGE: Could not register device.\n"); ++ free_netdev(dev); ++ pAC->dev[1] = pAC->dev[0]; ++ } else { ++ ++ /* Save initial device name */ ++ strcpy(pNet->InitialDevName, dev->name); ++ ++ /* Set network to off */ ++ netif_stop_queue(dev); ++ netif_carrier_off(dev); ++ ++ ++#ifdef CONFIG_PROC_FS ++ if (pSkRootDir ++ && (pProcFile = create_proc_entry(pNet->InitialDevName, ++ S_IRUGO, pSkRootDir))) { ++ pProcFile->proc_fops = &sk_proc_fops; ++ pProcFile->data = dev; ++ } ++#endif ++ ++ memcpy((caddr_t) &dev->dev_addr, ++ (caddr_t) &pAC->Addr.Net[1].CurrentMacAddress, 6); ++ ++ printk("%s: %s\n", pNet->InitialDevName, pAC->DeviceStr); ++ printk(" PrefPort:B RlmtMode:Dual Check Link State\n"); ++ } ++ } ++ ++ pAC->Index = sk98lin_boards_found; ++ sk98lin_max_boards_found = sk98lin_boards_found; ++ return 0; ++} ++ ++ + + /***************************************************************************** + * +@@ -282,7 +650,7 @@ + dev->mem_start = pci_resource_start (pdev, 0); + pci_set_master(pdev); + +- if (pci_request_regions(pdev, pAC->Name) != 0) { ++ if (pci_request_regions(pdev, DRIVER_FILE_NAME) != 0) { + retval = 2; + goto out_disable; + } +@@ -298,26 +666,406 @@ + our2 |= PCI_REV_DESC; + SkPciWriteCfgDWord(pAC, PCI_OUR_REG_2, our2); + } +-#endif ++#endif ++ ++ /* ++ * Remap the regs into kernel space. ++ */ ++ pAC->IoBase = (char*)ioremap_nocache(dev->mem_start, 0x4000); ++ ++ if (!pAC->IoBase){ ++ retval = 3; ++ goto out_release; ++ } ++ ++ return 0; ++ ++ out_release: ++ pci_release_regions(pdev); ++ out_disable: ++ pci_disable_device(pdev); ++ return retval; ++} ++ ++#ifdef Y2_RECOVERY ++/***************************************************************************** ++ * ++ * SkGeHandleKernelTimer - Handle the kernel timer requests ++ * ++ * Description: ++ * If the requested time interval for the timer has elapsed, ++ * this function checks the link state. ++ * ++ * Returns: N/A ++ * ++ */ ++static void SkGeHandleKernelTimer( ++unsigned long ptr) /* holds the pointer to adapter control context */ ++{ ++ DEV_NET *pNet = (DEV_NET*) ptr; ++ ++ pNet->TimerExpired = SK_TRUE; ++} ++ ++/***************************************************************************** ++ * ++ * sk98lin_check_timer - Resume the the card ++ * ++ * Description: ++ * This function checks the kernel timer ++ * ++ * Returns: N/A ++ * ++ */ ++void SkGeCheckTimer( ++DEV_NET *pNet) /* holds the pointer to adapter control context */ ++{ ++ SK_AC *pAC = pNet->pAC; ++ SK_BOOL StartTimer = SK_TRUE; ++#ifdef Y2_RX_CHECK ++ SK_BOOL ZeroRegister = SK_FALSE; ++ SK_U8 FifoReadPointer; ++ SK_U8 FifoReadLevel; ++ SK_U32 BmuStateMachine; ++#endif ++ ++ if (pNet->InRecover) ++ return; ++ ++#define TXPORT pAC->TxPort[pNet->PortNr][TX_PRIO_LOW] ++#define RXPORT pAC->RxPort[pNet->PortNr] ++ ++ if ( (CHIP_ID_YUKON_2(pAC)) && ++ (netif_running(pAC->dev[pNet->PortNr]))) { ++ ++#ifdef Y2_RX_CHECK ++ /* Check the receiver only if link is up*/ ++ if ( (netif_carrier_ok(pAC->dev[pNet->PortNr])) && ++ (pNet->LastJiffies == pAC->dev[pNet->PortNr]->last_rx)) { ++ ++ /* Nothing received */ ++ /* Get the register values */ ++ SK_IN8(pAC->IoBase, 0x0448, &FifoReadPointer); ++ SK_IN8(pAC->IoBase, 0x044a, &FifoReadLevel); ++ SK_IN32(pAC->IoBase, 0x043c, &BmuStateMachine); ++ ++ /* Check the register values */ ++ if ((pNet->FifoReadPointer != FifoReadPointer) || ++ (pNet->FifoReadLevel != FifoReadLevel) || ++ (pNet->BmuStateMachine != BmuStateMachine)) { ++ ++ /* Check the values */ ++ if ((pNet->FifoReadPointer) || ++ (pNet->FifoReadLevel) || ++ (pNet->BmuStateMachine)) { ++ ++ /* Check the jiffies again */ ++ if (pNet->LastJiffies == ++ pAC->dev[pNet->PortNr]->last_rx) { ++ /* Still nothing received */ ++ SkLocalEventQueue(pAC, SKGE_DRV, ++ SK_DRV_RECOVER,pNet->PortNr,-1,SK_FALSE); ++ } else { ++ ZeroRegister = SK_TRUE; ++ } ++ } else { ++ pNet->FifoReadPointer = FifoReadPointer; ++ pNet->FifoReadLevel = FifoReadLevel; ++ pNet->BmuStateMachine = BmuStateMachine; ++ ++ } ++ } else { ++ if ((FifoReadLevel != 0) && ++ (FifoReadPointer > 0)) { ++ /* Check the jiffies again */ ++ if (pNet->LastJiffies == ++ pAC->dev[pNet->PortNr]->last_rx) { ++ /* Still nothing received */ ++ SkLocalEventQueue(pAC, SKGE_DRV, ++ SK_DRV_RECOVER,pNet->PortNr,-1,SK_FALSE); ++ } else { ++ ZeroRegister = SK_TRUE; ++ } ++ } else { ++ ZeroRegister = SK_TRUE; ++ } ++ } ++ } else { ++ /* Clear the values */ ++ if ((pNet->FifoReadPointer) || ++ (pNet->FifoReadLevel) || ++ (pNet->BmuStateMachine)) { ++ ZeroRegister = SK_TRUE; ++ } ++ pNet->LastJiffies = ++ pAC->dev[pNet->PortNr]->last_rx; ++ } ++ ++ /* Clear the register values */ ++ if (ZeroRegister) { ++ pNet->FifoReadPointer = 0; ++ pNet->FifoReadLevel = 0; ++ pNet->BmuStateMachine = 0; ++ } ++#endif ++ ++ /* Checkthe transmitter */ ++ if (!(IS_Q_EMPTY(&TXPORT.TxAQ_working))) { ++ if (TXPORT.LastDone != TXPORT.TxALET.Done) { ++ TXPORT.LastDone = TXPORT.TxALET.Done; ++ pNet->TransmitTimeoutTimer = 0; ++ } else { ++ pNet->TransmitTimeoutTimer++; ++ if (pNet->TransmitTimeoutTimer >= 10) { ++ pNet->TransmitTimeoutTimer = 0; ++#ifdef CHECK_TRANSMIT_TIMEOUT ++ StartTimer = SK_FALSE; ++ SkLocalEventQueue(pAC, SKGE_DRV, ++ SK_DRV_RECOVER,pNet->PortNr,-1,SK_FALSE); ++#endif ++ } ++ } ++ } ++ ++#ifdef CHECK_TRANSMIT_TIMEOUT ++// if (!timer_pending(&pNet->KernelTimer)) { ++ pNet->KernelTimer.expires = jiffies + (HZ/4); /* 250ms */ ++ add_timer(&pNet->KernelTimer); ++ pNet->TimerExpired = SK_FALSE; ++// } ++#endif ++ } ++} ++#endif ++ ++ ++#ifdef CONFIG_PM ++/***************************************************************************** ++ * ++ * sk98lin_resume - Resume the the card ++ * ++ * Description: ++ * This function resumes the card into the D0 state ++ * ++ * Returns: N/A ++ * ++ */ ++static int sk98lin_resume( ++struct pci_dev *pdev) /* the device that is to resume */ ++{ ++ struct net_device *dev = pci_get_drvdata(pdev); ++ DEV_NET *pNet = (DEV_NET*) dev->priv; ++ SK_AC *pAC = pNet->pAC; ++ SK_U16 PmCtlSts; ++ ++ /* Set the power state to D0 */ ++ pci_set_power_state(pdev, 0); ++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) ++ pci_restore_state(pdev); ++#else ++ pci_restore_state(pdev, pAC->PciState); ++#endif ++ ++ /* Set the adapter power state to D0 */ ++ SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts); ++ PmCtlSts &= ~(PCI_PM_STATE_D3); /* reset all DState bits */ ++ PmCtlSts |= PCI_PM_STATE_D0; ++ SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, PmCtlSts); ++ ++ /* Reinit the adapter and start the port again */ ++ pAC->BoardLevel = SK_INIT_DATA; ++ SkDrvLeaveDiagMode(pAC); ++ ++ netif_device_attach(dev); ++ netif_start_queue(dev); ++ return 0; ++} ++ ++/***************************************************************************** ++ * ++ * sk98lin_suspend - Suspend the card ++ * ++ * Description: ++ * This function suspends the card into a defined state ++ * ++ * Returns: N/A ++ * ++ */ ++static int sk98lin_suspend( ++struct pci_dev *pdev, /* pointer to the device that is to suspend */ ++u32 state) /* what power state is desired by Linux? */ ++{ ++ struct net_device *dev = pci_get_drvdata(pdev); ++ DEV_NET *pNet = (DEV_NET*) dev->priv; ++ SK_AC *pAC = pNet->pAC; ++ SK_U16 PciPMControlStatus; ++ SK_U16 PciPMCapabilities; ++ SK_MAC_ADDR MacAddr; ++ int i; ++ ++ /* GEnesis and first yukon revs do not support power management */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) { ++ if (pAC->GIni.GIChipRev == 0) { ++ return 0; /* power management not supported */ ++ } ++ } ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) { ++ return 0; /* not supported for this chipset */ ++ } ++ ++ if (pAC->WolInfo.ConfiguredWolOptions == 0) { ++ return 0; /* WOL possible, but disabled via ethtool */ ++ } ++ ++ if(netif_running(dev)) { ++ netif_stop_queue(dev); /* stop device if running */ ++ } ++ ++ netif_device_detach(dev); ++ ++ /* read the PM control/status register from the PCI config space */ ++ SK_IN16(pAC->IoBase, PCI_C(pAC, PCI_PM_CTL_STS), &PciPMControlStatus); ++ ++ /* read the power management capabilities from the config space */ ++ SK_IN16(pAC->IoBase, PCI_C(pAC, PCI_PM_CAP_REG), &PciPMCapabilities); ++ ++ /* Enable WakeUp with Magic Packet - get MAC address from adapter */ ++ for (i = 0; i < SK_MAC_ADDR_LEN; i++) { ++ /* virtual address: will be used for data */ ++ SK_IN8(pAC->IoBase, (B2_MAC_1 + i), &MacAddr.a[i]); ++ } ++ ++ SkDrvEnterDiagMode(pAC); ++ SkEnableWOMagicPacket(pAC, pAC->IoBase, MacAddr); ++ ++ pci_enable_wake(pdev, 3, 1); ++ pci_enable_wake(pdev, 4, 1); /* 4 == D3 cold */ ++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) ++ pci_save_state(pdev); ++#else ++ pci_save_state(pdev, pAC->PciState); ++#endif ++ pci_set_power_state(pdev, state); /* set the state */ ++ ++ return 0; ++} ++ ++ ++/****************************************************************************** ++ * ++ * SkEnableWOMagicPacket - Enable Wake on Magic Packet on the adapter ++ * ++ * Context: ++ * init, pageable ++ * the adapter should be de-initialized before calling this function ++ * ++ * Returns: ++ * nothing ++ */ ++ ++static void SkEnableWOMagicPacket( ++SK_AC *pAC, /* Adapter Control Context */ ++SK_IOC IoC, /* I/O control context */ ++SK_MAC_ADDR MacAddr) /* MacAddr expected in magic packet */ ++{ ++ SK_U16 Word; ++ SK_U32 DWord; ++ int i; ++ int HwPortIndex; ++ int Port = 0; ++ ++ /* use Port 0 as long as we do not have any dual port cards which support WOL */ ++ HwPortIndex = 0; ++ DWord = 0; ++ ++ SK_OUT16(IoC, 0x0004, 0x0002); /* clear S/W Reset */ ++ SK_OUT16(IoC, 0x0f10, 0x0002); /* clear Link Reset */ ++ ++ /* ++ * PHY Configuration: ++ * Autonegotioation is enalbed, advertise 10 HD, 10 FD, ++ * 100 HD, and 100 FD. ++ */ ++ if ((pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) || ++ (pAC->GIni.GIChipId == CHIP_ID_YUKON) || ++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE)) { ++ ++ SK_OUT16(IoC, 0x0004, 0x0800); /* enable CLK_RUN */ ++ SK_OUT8(IoC, 0x0007, 0xa9); /* enable VAUX */ ++ ++ /* WA code for COMA mode */ ++ /* Only for yukon plus based chipsets rev A3 */ ++ if (pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) { ++ SK_IN32(IoC, B2_GP_IO, &DWord); ++ DWord |= GP_DIR_9; /* set to output */ ++ DWord &= ~GP_IO_9; /* clear PHY reset (active high) */ ++ SK_OUT32(IoC, B2_GP_IO, DWord); /* clear PHY reset */ ++ } ++ ++ if ((pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) || ++ (pAC->GIni.GIChipId == CHIP_ID_YUKON)) { ++ SK_OUT32(IoC, 0x0f04, 0x01f04001); /* set PHY reset */ ++ SK_OUT32(IoC, 0x0f04, 0x01f04002); /* clear PHY reset */ ++ } else { ++ SK_OUT8(IoC, 0x0f04, 0x02); /* clear PHY reset */ ++ } ++ ++ SK_OUT8(IoC, 0x0f00, 0x02); /* clear MAC reset */ ++ SkGmPhyWrite(pAC, IoC, Port, 4, 0x01e1); /* advertise 10/100 HD/FD */ ++ SkGmPhyWrite(pAC, IoC, Port, 9, 0x0000); /* do not advertise 1000 HD/FD */ ++ SkGmPhyWrite(pAC, IoC, Port, 00, 0xB300); /* 100 MBit, disable Autoneg */ ++ } else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ SK_OUT8(IoC, 0x0007, 0xa9); /* enable VAUX */ ++ SK_OUT8(IoC, 0x0f04, 0x02); /* clear PHY reset */ ++ SK_OUT8(IoC, 0x0f00, 0x02); /* clear MAC reset */ ++ SkGmPhyWrite(pAC, IoC, Port, 16, 0x0130); /* Enable Automatic Crossover */ ++ SkGmPhyWrite(pAC, IoC, Port, 00, 0xB300); /* 100 MBit, disable Autoneg */ ++ } ++ + + /* +- * Remap the regs into kernel space. ++ * MAC Configuration: ++ * Set the MAC to 100 HD and enable the auto update features ++ * for Speed, Flow Control and Duplex Mode. ++ * If autonegotiation completes successfully the ++ * MAC takes the link parameters from the PHY. ++ * If the link partner doesn't support autonegotiation ++ * the MAC can receive magic packets if the link partner ++ * uses 100 HD. + */ +- pAC->IoBase = (char*)ioremap_nocache(dev->mem_start, 0x4000); ++ SK_OUT16(IoC, 0x2804, 0x3832); ++ + +- if (!pAC->IoBase){ +- retval = 3; +- goto out_release; ++ /* ++ * Set Up Magic Packet parameters ++ */ ++ for (i = 0; i < 6; i+=2) { /* set up magic packet MAC address */ ++ SK_IN16(IoC, 0x100 + i, &Word); ++ SK_OUT16(IoC, 0xf24 + i, Word); + } + +- return 0; ++ SK_OUT16(IoC, 0x0f20, 0x0208); /* enable PME on magic packet */ ++ /* and on wake up frame */ + +- out_release: +- pci_release_regions(pdev); +- out_disable: +- pci_disable_device(pdev); +- return retval; +-} ++ /* ++ * Set up PME generation ++ */ ++ /* set PME legacy mode */ ++ /* Only for PCI express based chipsets */ ++ if ((pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) || ++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE)) { ++ SkPciReadCfgDWord(pAC, 0x40, &DWord); ++ DWord |= 0x8000; ++ SkPciWriteCfgDWord(pAC, 0x40, DWord); ++ } ++ ++ /* clear PME status and switch adapter to DState */ ++ SkPciReadCfgWord(pAC, 0x4c, &Word); ++ Word |= 0x103; ++ SkPciWriteCfgWord(pAC, 0x4c, Word); ++} /* SkEnableWOMagicPacket */ ++#endif + + + /***************************************************************************** +@@ -350,7 +1098,9 @@ + if (pAC->IoBase) { + iounmap(pAC->IoBase); + } +- if (pAC->pDescrMem) { ++ if (CHIP_ID_YUKON_2(pAC)) { ++ SkY2FreeResources(pAC); ++ } else { + BoardFreeMem(pAC); + } + } +@@ -360,25 +1110,6 @@ + MODULE_AUTHOR("Mirko Lindner "); + MODULE_DESCRIPTION("SysKonnect SK-NET Gigabit Ethernet SK-98xx driver"); + MODULE_LICENSE("GPL"); +-MODULE_PARM(Speed_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(Speed_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(AutoNeg_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(AutoNeg_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(DupCap_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(DupCap_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(FlowCtrl_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(FlowCtrl_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(Role_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(Role_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(ConType, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(PrefPort, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(RlmtMode, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-/* used for interrupt moderation */ +-MODULE_PARM(IntsPerSec, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "i"); +-MODULE_PARM(Moderation, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(Stats, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(ModerationMask, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); +-MODULE_PARM(AutoSizing, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); + + + #ifdef LINK_SPEED_A +@@ -462,8 +1193,137 @@ + static int IntsPerSec[SK_MAX_CARD_PARAM]; + static char *Moderation[SK_MAX_CARD_PARAM]; + static char *ModerationMask[SK_MAX_CARD_PARAM]; +-static char *AutoSizing[SK_MAX_CARD_PARAM]; +-static char *Stats[SK_MAX_CARD_PARAM]; ++ ++static char *LowLatency[SK_MAX_CARD_PARAM]; ++ ++#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) ++module_param_array(Speed_A, charp, NULL, 0); ++module_param_array(Speed_B, charp, NULL, 0); ++module_param_array(AutoNeg_A, charp, NULL, 0); ++module_param_array(AutoNeg_B, charp, NULL, 0); ++module_param_array(DupCap_A, charp, NULL, 0); ++module_param_array(DupCap_B, charp, NULL, 0); ++module_param_array(FlowCtrl_A, charp, NULL, 0); ++module_param_array(FlowCtrl_B, charp, NULL, 0); ++module_param_array(Role_A, charp, NULL, 0); ++module_param_array(Role_B, charp, NULL, 0); ++module_param_array(ConType, charp, NULL, 0); ++module_param_array(PrefPort, charp, NULL, 0); ++module_param_array(RlmtMode, charp, NULL, 0); ++/* used for interrupt moderation */ ++module_param_array(IntsPerSec, int, NULL, 0); ++module_param_array(Moderation, charp, NULL, 0); ++module_param_array(ModerationMask, charp, NULL, 0); ++module_param_array(LowLatency, charp, NULL, 0); ++#else ++MODULE_PARM(Speed_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(Speed_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(AutoNeg_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(AutoNeg_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(DupCap_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(DupCap_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(FlowCtrl_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(FlowCtrl_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(Role_A, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(Role_B, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(ConType, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(PrefPort, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(RlmtMode, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(IntsPerSec, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "i"); ++MODULE_PARM(Moderation, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(ModerationMask, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++MODULE_PARM(LowLatency, "1-" __MODULE_STRING(SK_MAX_CARD_PARAM) "s"); ++#endif ++ ++ ++/***************************************************************************** ++ * ++ * sk98lin_remove_device - device deinit function ++ * ++ * Description: ++ * Disable adapter if it is still running, free resources, ++ * free device struct. ++ * ++ * Returns: N/A ++ */ ++ ++static void sk98lin_remove_device(struct pci_dev *pdev) ++{ ++DEV_NET *pNet; ++SK_AC *pAC; ++struct SK_NET_DEVICE *next; ++unsigned long Flags; ++struct net_device *dev = pci_get_drvdata(pdev); ++ ++ ++ /* Device not available. Return. */ ++ if (!dev) ++ return; ++ ++ pNet = (DEV_NET*) dev->priv; ++ pAC = pNet->pAC; ++ next = pAC->Next; ++ ++ netif_stop_queue(dev); ++ SkGeYellowLED(pAC, pAC->IoBase, 0); ++ ++ if(pAC->BoardLevel == SK_INIT_RUN) { ++ /* board is still alive */ ++ spin_lock_irqsave(&pAC->SlowPathLock, Flags); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, ++ 0, -1, SK_FALSE); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, ++ 1, -1, SK_TRUE); ++ ++ /* disable interrupts */ ++ SK_OUT32(pAC->IoBase, B0_IMSK, 0); ++ SkGeDeInit(pAC, pAC->IoBase); ++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); ++ pAC->BoardLevel = SK_INIT_DATA; ++ /* We do NOT check here, if IRQ was pending, of course*/ ++ } ++ ++ if(pAC->BoardLevel == SK_INIT_IO) { ++ /* board is still alive */ ++ SkGeDeInit(pAC, pAC->IoBase); ++ pAC->BoardLevel = SK_INIT_DATA; ++ } ++ ++ if ((pAC->GIni.GIMacsFound == 2) && pAC->RlmtNets == 2){ ++ unregister_netdev(pAC->dev[1]); ++ free_netdev(pAC->dev[1]); ++ } ++ ++ FreeResources(dev); ++ ++#ifdef CONFIG_PROC_FS ++ /* Remove the sk98lin procfs device entries */ ++ if ((pAC->GIni.GIMacsFound == 2) && pAC->RlmtNets == 2){ ++ remove_proc_entry(pAC->dev[1]->name, pSkRootDir); ++ } ++ remove_proc_entry(pNet->InitialDevName, pSkRootDir); ++#endif ++ ++ dev->get_stats = NULL; ++ /* ++ * otherwise unregister_netdev calls get_stats with ++ * invalid IO ... :-( ++ */ ++ unregister_netdev(dev); ++ free_netdev(dev); ++ kfree(pAC); ++ sk98lin_max_boards_found--; ++ ++#ifdef CONFIG_PROC_FS ++ /* Remove all Proc entries if last device */ ++ if (sk98lin_max_boards_found == 0) { ++ /* clear proc-dir */ ++ remove_proc_entry(pSkRootDir->name, proc_net); ++ } ++#endif ++ ++} ++ + + /***************************************************************************** + * +@@ -501,7 +1361,10 @@ + spin_lock_init(&pAC->TxPort[i][0].TxDesRingLock); + spin_lock_init(&pAC->RxPort[i].RxDesRingLock); + } ++ + spin_lock_init(&pAC->SlowPathLock); ++ spin_lock_init(&pAC->TxQueueLock); /* for Yukon2 chipsets */ ++ spin_lock_init(&pAC->SetPutIndexLock); /* for Yukon2 chipsets */ + + /* level 0 init common modules here */ + +@@ -520,15 +1383,13 @@ + SkTimerInit(pAC, pAC->IoBase, SK_INIT_DATA); + + pAC->BoardLevel = SK_INIT_DATA; +- pAC->RxBufSize = ETH_BUF_SIZE; ++ pAC->RxPort[0].RxBufSize = ETH_BUF_SIZE; ++ pAC->RxPort[1].RxBufSize = ETH_BUF_SIZE; + + SK_PNMI_SET_DRIVER_DESCR(pAC, DescrString); + SK_PNMI_SET_DRIVER_VER(pAC, VerStr); + +- spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); +- + /* level 1 init common modules here (HW init) */ +- spin_lock_irqsave(&pAC->SlowPathLock, Flags); + if (SkGeInit(pAC, pAC->IoBase, SK_INIT_IO) != 0) { + printk("sk98lin: HWInit (1) failed.\n"); + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); +@@ -540,51 +1401,93 @@ + SkAddrInit( pAC, pAC->IoBase, SK_INIT_IO); + SkRlmtInit( pAC, pAC->IoBase, SK_INIT_IO); + SkTimerInit(pAC, pAC->IoBase, SK_INIT_IO); ++#ifdef Y2_RECOVERY ++ /* mark entries invalid */ ++ pAC->LastPort = 3; ++ pAC->LastOpc = 0xFF; ++#endif + + /* Set chipset type support */ +- pAC->ChipsetType = 0; + if ((pAC->GIni.GIChipId == CHIP_ID_YUKON) || +- (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE)) { +- pAC->ChipsetType = 1; ++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) || ++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_LP)) { ++ pAC->ChipsetType = 1; /* Yukon chipset (descriptor logic) */ ++ } else if (CHIP_ID_YUKON_2(pAC)) { ++ pAC->ChipsetType = 2; /* Yukon2 chipset (list logic) */ ++ } else { ++ pAC->ChipsetType = 0; /* Genesis chipset (descriptor logic) */ ++ } ++ ++ /* wake on lan support */ ++ pAC->WolInfo.SupportedWolOptions = 0; ++#if defined (ETHTOOL_GWOL) && defined (ETHTOOL_SWOL) ++ if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) { ++ pAC->WolInfo.SupportedWolOptions = WAKE_MAGIC; ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON) { ++ if (pAC->GIni.GIChipRev == 0) { ++ pAC->WolInfo.SupportedWolOptions = 0; ++ } ++ } + } ++#endif ++ pAC->WolInfo.ConfiguredWolOptions = pAC->WolInfo.SupportedWolOptions; + + GetConfiguration(pAC); + if (pAC->RlmtNets == 2) { +- pAC->GIni.GIPortUsage = SK_MUL_LINK; ++ pAC->GIni.GP[0].PPortUsage = SK_MUL_LINK; ++ pAC->GIni.GP[1].PPortUsage = SK_MUL_LINK; + } + + pAC->BoardLevel = SK_INIT_IO; + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); + +- if (pAC->GIni.GIMacsFound == 2) { +- Ret = request_irq(dev->irq, SkGeIsr, SA_SHIRQ, pAC->Name, dev); +- } else if (pAC->GIni.GIMacsFound == 1) { +- Ret = request_irq(dev->irq, SkGeIsrOnePort, SA_SHIRQ, +- pAC->Name, dev); +- } else { +- printk(KERN_WARNING "sk98lin: Illegal number of ports: %d\n", +- pAC->GIni.GIMacsFound); +- return -EAGAIN; ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if (pAC->GIni.GIMacsFound == 2) { ++ Ret = request_irq(dev->irq, SkGeIsr, SA_SHIRQ, dev->name, dev); ++ } else if (pAC->GIni.GIMacsFound == 1) { ++ Ret = request_irq(dev->irq, SkGeIsrOnePort, SA_SHIRQ, dev->name, dev); ++ } else { ++ printk(KERN_WARNING "sk98lin: Illegal number of ports: %d\n", ++ pAC->GIni.GIMacsFound); ++ return -EAGAIN; ++ } ++ } ++ else { ++ Ret = request_irq(dev->irq, SkY2Isr, SA_SHIRQ, dev->name, dev); + } + + if (Ret) { + printk(KERN_WARNING "sk98lin: Requested IRQ %d is busy.\n", +- dev->irq); ++ dev->irq); + return -EAGAIN; + } + pAC->AllocFlag |= SK_ALLOC_IRQ; + +- /* Alloc memory for this board (Mem for RxD/TxD) : */ +- if(!BoardAllocMem(pAC)) { +- printk("No memory for descriptor rings.\n"); +- return(-EAGAIN); ++ /* ++ ** Alloc descriptor/LETable memory for this board (both RxD/TxD) ++ */ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (!SkY2AllocateResources(pAC)) { ++ printk("No memory for Yukon2 settings\n"); ++ return(-EAGAIN); ++ } ++ } else { ++ if(!BoardAllocMem(pAC)) { ++ printk("No memory for descriptor rings.\n"); ++ return(-EAGAIN); ++ } + } + ++#ifdef SK_USE_CSUM + SkCsSetReceiveFlags(pAC, + SKCS_PROTO_IP | SKCS_PROTO_TCP | SKCS_PROTO_UDP, + &pAC->CsOfs1, &pAC->CsOfs2, 0); + pAC->CsOfs = (pAC->CsOfs2 << 16) | pAC->CsOfs1; ++#endif + ++ /* ++ ** Function BoardInitMem() for Yukon dependent settings... ++ */ + BoardInitMem(pAC); + /* tschilling: New common function with minimum size check. */ + DualNet = SK_FALSE; +@@ -596,7 +1499,12 @@ + pAC, + pAC->ActivePort, + DualNet)) { +- BoardFreeMem(pAC); ++ if (CHIP_ID_YUKON_2(pAC)) { ++ SkY2FreeResources(pAC); ++ } else { ++ BoardFreeMem(pAC); ++ } ++ + printk("sk98lin: SkGeInitAssignRamToQueues failed.\n"); + return(-EAGAIN); + } +@@ -696,16 +1604,20 @@ + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("BoardFreeMem\n")); ++ ++ if (pAC->pDescrMem) { ++ + #if (BITS_PER_LONG == 32) +- AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound + 8; ++ AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound + 8; + #else +- AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound +- + RX_RING_SIZE + 8; ++ AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound ++ + RX_RING_SIZE + 8; + #endif + +- pci_free_consistent(pAC->PciDev, AllocLength, ++ pci_free_consistent(pAC->PciDev, AllocLength, + pAC->pDescrMem, pAC->pDescrMemDMA); +- pAC->pDescrMem = NULL; ++ pAC->pDescrMem = NULL; ++ } + } /* BoardFreeMem */ + + +@@ -714,7 +1626,7 @@ + * BoardInitMem - initiate the descriptor rings + * + * Description: +- * This function sets the descriptor rings up in memory. ++ * This function sets the descriptor rings or LETables up in memory. + * The adapter is initialized with the descriptor start addresses. + * + * Returns: N/A +@@ -729,34 +1641,37 @@ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("BoardInitMem\n")); + +- RxDescrSize = (((sizeof(RXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN; +- pAC->RxDescrPerRing = RX_RING_SIZE / RxDescrSize; +- TxDescrSize = (((sizeof(TXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN; +- pAC->TxDescrPerRing = TX_RING_SIZE / RxDescrSize; ++ if (!pAC->GIni.GIYukon2) { ++ RxDescrSize = (((sizeof(RXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN; ++ pAC->RxDescrPerRing = RX_RING_SIZE / RxDescrSize; ++ TxDescrSize = (((sizeof(TXD) - 1) / DESCR_ALIGN) + 1) * DESCR_ALIGN; ++ pAC->TxDescrPerRing = TX_RING_SIZE / RxDescrSize; + +- for (i=0; iGIni.GIMacsFound; i++) { +- SetupRing( +- pAC, +- pAC->TxPort[i][0].pTxDescrRing, +- pAC->TxPort[i][0].VTxDescrRing, +- (RXD**)&pAC->TxPort[i][0].pTxdRingHead, +- (RXD**)&pAC->TxPort[i][0].pTxdRingTail, +- (RXD**)&pAC->TxPort[i][0].pTxdRingPrev, +- &pAC->TxPort[i][0].TxdRingFree, +- SK_TRUE); +- SetupRing( +- pAC, +- pAC->RxPort[i].pRxDescrRing, +- pAC->RxPort[i].VRxDescrRing, +- &pAC->RxPort[i].pRxdRingHead, +- &pAC->RxPort[i].pRxdRingTail, +- &pAC->RxPort[i].pRxdRingPrev, +- &pAC->RxPort[i].RxdRingFree, +- SK_FALSE); ++ for (i=0; iGIni.GIMacsFound; i++) { ++ SetupRing( ++ pAC, ++ pAC->TxPort[i][0].pTxDescrRing, ++ pAC->TxPort[i][0].VTxDescrRing, ++ (RXD**)&pAC->TxPort[i][0].pTxdRingHead, ++ (RXD**)&pAC->TxPort[i][0].pTxdRingTail, ++ (RXD**)&pAC->TxPort[i][0].pTxdRingPrev, ++ &pAC->TxPort[i][0].TxdRingFree, ++ &pAC->TxPort[i][0].TxdRingPrevFree, ++ SK_TRUE); ++ SetupRing( ++ pAC, ++ pAC->RxPort[i].pRxDescrRing, ++ pAC->RxPort[i].VRxDescrRing, ++ &pAC->RxPort[i].pRxdRingHead, ++ &pAC->RxPort[i].pRxdRingTail, ++ &pAC->RxPort[i].pRxdRingPrev, ++ &pAC->RxPort[i].RxdRingFree, ++ &pAC->RxPort[i].RxdRingFree, ++ SK_FALSE); ++ } + } + } /* BoardInitMem */ + +- + /***************************************************************************** + * + * SetupRing - create one descriptor ring +@@ -776,6 +1691,7 @@ + RXD **ppRingTail, /* address where the tail should be written */ + RXD **ppRingPrev, /* address where the tail should be written */ + int *pRingFree, /* address where the # of free descr. goes */ ++int *pRingPrevFree, /* address where the # of free descr. goes */ + SK_BOOL IsTx) /* flag: is this a tx ring */ + { + int i; /* loop counter */ +@@ -818,11 +1734,12 @@ + } + pPrevDescr->pNextRxd = (RXD*) pMemArea; + pPrevDescr->VNextRxd = VMemArea; +- pDescr = (RXD*) pMemArea; +- *ppRingHead = (RXD*) pMemArea; +- *ppRingTail = *ppRingHead; +- *ppRingPrev = pPrevDescr; +- *pRingFree = DescrNum; ++ pDescr = (RXD*) pMemArea; ++ *ppRingHead = (RXD*) pMemArea; ++ *ppRingTail = *ppRingHead; ++ *ppRingPrev = pPrevDescr; ++ *pRingFree = DescrNum; ++ *pRingPrevFree = DescrNum; + } /* SetupRing */ + + +@@ -894,10 +1811,28 @@ + * Check and process if its our interrupt + */ + SK_IN32(pAC->IoBase, B0_SP_ISRC, &IntSrc); +- if (IntSrc == 0) { ++ if ((IntSrc == 0) && (!pNet->NetConsoleMode)) { + return SkIsrRetNone; + } + ++#ifdef CONFIG_SK98LIN_NAPI ++ if (netif_rx_schedule_prep(dev)) { ++ pAC->GIni.GIValIrqMask &= ~(NAPI_DRV_IRQS); ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ __netif_rx_schedule(dev); ++ } ++ ++#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */ ++ if (IntSrc & IS_XA1_F) { ++ CLEAR_TX_IRQ(0, TX_PRIO_LOW); ++ } ++ if (IntSrc & IS_XA2_F) { ++ CLEAR_TX_IRQ(1, TX_PRIO_LOW); ++ } ++#endif ++ ++ ++#else + while (((IntSrc & IRQ_MASK) & ~SPECIAL_IRQS) != 0) { + #if 0 /* software irq currently not used */ + if (IntSrc & IS_IRQ_SW) { +@@ -911,6 +1846,7 @@ + SK_DBGCAT_DRV_INT_SRC, + ("EOF RX1 IRQ\n")); + ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE); ++ CLEAR_AND_START_RX(0); + SK_PNMI_CNT_RX_INTR(pAC, 0); + } + if (IntSrc & IS_R2_F) { +@@ -918,6 +1854,7 @@ + SK_DBGCAT_DRV_INT_SRC, + ("EOF RX2 IRQ\n")); + ReceiveIrq(pAC, &pAC->RxPort[1], SK_TRUE); ++ CLEAR_AND_START_RX(1); + SK_PNMI_CNT_RX_INTR(pAC, 1); + } + #ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */ +@@ -925,6 +1862,7 @@ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_INT_SRC, + ("EOF AS TX1 IRQ\n")); ++ CLEAR_TX_IRQ(0, TX_PRIO_LOW); + SK_PNMI_CNT_TX_INTR(pAC, 0); + spin_lock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock); + FreeTxDescriptors(pAC, &pAC->TxPort[0][TX_PRIO_LOW]); +@@ -934,6 +1872,7 @@ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_INT_SRC, + ("EOF AS TX2 IRQ\n")); ++ CLEAR_TX_IRQ(1, TX_PRIO_LOW); + SK_PNMI_CNT_TX_INTR(pAC, 1); + spin_lock(&pAC->TxPort[1][TX_PRIO_LOW].TxDesRingLock); + FreeTxDescriptors(pAC, &pAC->TxPort[1][TX_PRIO_LOW]); +@@ -944,38 +1883,28 @@ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_INT_SRC, + ("EOF SY TX1 IRQ\n")); ++ CLEAR_TX_IRQ(0, TX_PRIO_HIGH); + SK_PNMI_CNT_TX_INTR(pAC, 1); + spin_lock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock); + FreeTxDescriptors(pAC, 0, TX_PRIO_HIGH); + spin_unlock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock); +- ClearTxIrq(pAC, 0, TX_PRIO_HIGH); + } + if (IntSrc & IS_XS2_F) { + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_INT_SRC, + ("EOF SY TX2 IRQ\n")); ++ CLEAR_TX_IRQ(1, TX_PRIO_HIGH); + SK_PNMI_CNT_TX_INTR(pAC, 1); + spin_lock(&pAC->TxPort[1][TX_PRIO_HIGH].TxDesRingLock); + FreeTxDescriptors(pAC, 1, TX_PRIO_HIGH); + spin_unlock(&pAC->TxPort[1][TX_PRIO_HIGH].TxDesRingLock); +- ClearTxIrq(pAC, 1, TX_PRIO_HIGH); + } + #endif + #endif + +- /* do all IO at once */ +- if (IntSrc & IS_R1_F) +- ClearAndStartRx(pAC, 0); +- if (IntSrc & IS_R2_F) +- ClearAndStartRx(pAC, 1); +-#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */ +- if (IntSrc & IS_XA1_F) +- ClearTxIrq(pAC, 0, TX_PRIO_LOW); +- if (IntSrc & IS_XA2_F) +- ClearTxIrq(pAC, 1, TX_PRIO_LOW); +-#endif + SK_IN32(pAC->IoBase, B0_ISRC, &IntSrc); + } /* while (IntSrc & IRQ_MASK != 0) */ ++#endif + + IntSrc &= pAC->GIni.GIValIrqMask; + if ((IntSrc & SPECIAL_IRQS) || pAC->CheckQueue) { +@@ -989,18 +1918,12 @@ + SkEventDispatcher(pAC, pAC->IoBase); + spin_unlock(&pAC->SlowPathLock); + } +- /* +- * do it all again is case we cleared an interrupt that +- * came in after handling the ring (OUTs may be delayed +- * in hardware buffers, but are through after IN) +- * +- * rroesler: has been commented out and shifted to +- * SkGeDrvEvent(), because it is timer +- * guarded now +- * ++ ++#ifndef CONFIG_SK98LIN_NAPI ++ /* Handle interrupts */ + ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE); + ReceiveIrq(pAC, &pAC->RxPort[1], SK_TRUE); +- */ ++#endif + + if (pAC->CheckQueue) { + pAC->CheckQueue = SK_FALSE; +@@ -1043,10 +1966,25 @@ + * Check and process if its our interrupt + */ + SK_IN32(pAC->IoBase, B0_SP_ISRC, &IntSrc); +- if (IntSrc == 0) { ++ if ((IntSrc == 0) && (!pNet->NetConsoleMode)) { + return SkIsrRetNone; + } + ++#ifdef CONFIG_SK98LIN_NAPI ++ if (netif_rx_schedule_prep(dev)) { ++ // CLEAR_AND_START_RX(0); ++ // CLEAR_TX_IRQ(0, TX_PRIO_LOW); ++ pAC->GIni.GIValIrqMask &= ~(NAPI_DRV_IRQS); ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ __netif_rx_schedule(dev); ++ } ++ ++#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */ ++ if (IntSrc & IS_XA1_F) { ++ CLEAR_TX_IRQ(0, TX_PRIO_LOW); ++ } ++#endif ++#else + while (((IntSrc & IRQ_MASK) & ~SPECIAL_IRQS) != 0) { + #if 0 /* software irq currently not used */ + if (IntSrc & IS_IRQ_SW) { +@@ -1060,6 +1998,7 @@ + SK_DBGCAT_DRV_INT_SRC, + ("EOF RX1 IRQ\n")); + ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE); ++ CLEAR_AND_START_RX(0); + SK_PNMI_CNT_RX_INTR(pAC, 0); + } + #ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */ +@@ -1067,6 +2006,7 @@ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_INT_SRC, + ("EOF AS TX1 IRQ\n")); ++ CLEAR_TX_IRQ(0, TX_PRIO_LOW); + SK_PNMI_CNT_TX_INTR(pAC, 0); + spin_lock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock); + FreeTxDescriptors(pAC, &pAC->TxPort[0][TX_PRIO_LOW]); +@@ -1077,24 +2017,18 @@ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_INT_SRC, + ("EOF SY TX1 IRQ\n")); ++ CLEAR_TX_IRQ(0, TX_PRIO_HIGH); + SK_PNMI_CNT_TX_INTR(pAC, 0); + spin_lock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock); + FreeTxDescriptors(pAC, 0, TX_PRIO_HIGH); + spin_unlock(&pAC->TxPort[0][TX_PRIO_HIGH].TxDesRingLock); +- ClearTxIrq(pAC, 0, TX_PRIO_HIGH); + } + #endif + #endif + +- /* do all IO at once */ +- if (IntSrc & IS_R1_F) +- ClearAndStartRx(pAC, 0); +-#ifdef USE_TX_COMPLETE /* only if tx complete interrupt used */ +- if (IntSrc & IS_XA1_F) +- ClearTxIrq(pAC, 0, TX_PRIO_LOW); +-#endif + SK_IN32(pAC->IoBase, B0_ISRC, &IntSrc); + } /* while (IntSrc & IRQ_MASK != 0) */ ++#endif + + IntSrc &= pAC->GIni.GIValIrqMask; + if ((IntSrc & SPECIAL_IRQS) || pAC->CheckQueue) { +@@ -1108,17 +2042,10 @@ + SkEventDispatcher(pAC, pAC->IoBase); + spin_unlock(&pAC->SlowPathLock); + } +- /* +- * do it all again is case we cleared an interrupt that +- * came in after handling the ring (OUTs may be delayed +- * in hardware buffers, but are through after IN) +- * +- * rroesler: has been commented out and shifted to +- * SkGeDrvEvent(), because it is timer +- * guarded now +- * ++ ++#ifndef CONFIG_SK98LIN_NAPI + ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE); +- */ ++#endif + + /* IRQ is processed - Enable IRQs again*/ + SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); +@@ -1126,7 +2053,6 @@ + return SkIsrRetHandled; + } /* SkGeIsrOnePort */ + +- + /**************************************************************************** + * + * SkGeOpen - handle start of initialized adapter +@@ -1144,27 +2070,21 @@ + * != 0 on error + */ + static int SkGeOpen( +-struct SK_NET_DEVICE *dev) ++struct SK_NET_DEVICE *dev) /* the device that is to be opened */ + { +- DEV_NET *pNet; +- SK_AC *pAC; +- unsigned long Flags; /* for spin lock */ +- int i; +- SK_EVPARA EvPara; /* an event parameter union */ ++ DEV_NET *pNet = (DEV_NET*) dev->priv; ++ SK_AC *pAC = pNet->pAC; ++ unsigned long Flags; /* for the spin locks */ ++ int CurrMac; /* loop ctr for ports */ + +- pNet = (DEV_NET*) dev->priv; +- pAC = pNet->pAC; +- + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeOpen: pAC=0x%lX:\n", (unsigned long)pAC)); + +-#ifdef SK_DIAG_SUPPORT + if (pAC->DiagModeActive == DIAG_ACTIVE) { + if (pAC->Pnmi.DiagAttached == SK_DIAG_RUNNING) { + return (-1); /* still in use by diag; deny actions */ + } + } +-#endif + + if (!try_module_get(THIS_MODULE)) { + return (-1); /* increase of usage count not possible */ +@@ -1188,6 +2108,11 @@ + SkRlmtInit (pAC, pAC->IoBase, SK_INIT_IO); + SkTimerInit (pAC, pAC->IoBase, SK_INIT_IO); + pAC->BoardLevel = SK_INIT_IO; ++#ifdef Y2_RECOVERY ++ /* mark entries invalid */ ++ pAC->LastPort = 3; ++ pAC->LastOpc = 0xFF; ++#endif + } + + if (pAC->BoardLevel != SK_INIT_RUN) { +@@ -1206,45 +2131,61 @@ + pAC->BoardLevel = SK_INIT_RUN; + } + +- for (i=0; iGIni.GIMacsFound; i++) { +- /* Enable transmit descriptor polling. */ +- SkGePollTxD(pAC, pAC->IoBase, i, SK_TRUE); +- FillRxRing(pAC, &pAC->RxPort[i]); ++ for (CurrMac=0; CurrMacGIni.GIMacsFound; CurrMac++) { ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ /* Enable transmit descriptor polling. */ ++ SkGePollTxD(pAC, pAC->IoBase, CurrMac, SK_TRUE); ++ FillRxRing(pAC, &pAC->RxPort[CurrMac]); ++ SkMacRxTxEnable(pAC, pAC->IoBase, pNet->PortNr); ++ } + } +- SkGeYellowLED(pAC, pAC->IoBase, 1); + +- StartDrvCleanupTimer(pAC); ++ SkGeYellowLED(pAC, pAC->IoBase, 1); + SkDimEnableModerationIfNeeded(pAC); +- SkDimDisplayModerationSettings(pAC); +- +- pAC->GIni.GIValIrqMask &= IRQ_MASK; + +- /* enable Interrupts */ +- SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); +- SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK); ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ /* ++ ** Has been setup already at SkGeInit(SK_INIT_IO), ++ ** but additional masking added for Genesis & Yukon ++ ** chipsets -> modify it... ++ */ ++ pAC->GIni.GIValIrqMask &= IRQ_MASK; ++#ifndef USE_TX_COMPLETE ++ pAC->GIni.GIValIrqMask &= ~(TX_COMPL_IRQS); ++#endif ++ } + + spin_lock_irqsave(&pAC->SlowPathLock, Flags); + + if ((pAC->RlmtMode != 0) && (pAC->MaxPorts == 0)) { +- EvPara.Para32[0] = pAC->RlmtNets; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_SET_NETS, +- EvPara); +- EvPara.Para32[0] = pAC->RlmtMode; +- EvPara.Para32[1] = 0; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_MODE_CHANGE, +- EvPara); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_SET_NETS, ++ pAC->RlmtNets, -1, SK_FALSE); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_MODE_CHANGE, ++ pAC->RlmtMode, 0, SK_FALSE); + } + +- EvPara.Para32[0] = pNet->NetNr; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara); +- SkEventDispatcher(pAC, pAC->IoBase); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, ++ pNet->NetNr, -1, SK_TRUE); + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); + +- pAC->MaxPorts++; +- pNet->Up = 1; ++#ifdef Y2_RECOVERY ++ pNet->TimerExpired = SK_FALSE; ++ pNet->InRecover = SK_FALSE; ++ pNet->NetConsoleMode = SK_FALSE; ++ ++ /* Initialize the kernel timer */ ++ init_timer(&pNet->KernelTimer); ++ pNet->KernelTimer.function = SkGeHandleKernelTimer; ++ pNet->KernelTimer.data = (unsigned long) pNet; ++ pNet->KernelTimer.expires = jiffies + (HZ/4); /* initially 250ms */ ++ add_timer(&pNet->KernelTimer); ++#endif ++ ++ /* enable Interrupts */ ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK); + ++ pAC->MaxPorts++; + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeOpen suceeded\n")); +@@ -1265,24 +2206,26 @@ + * error code - on error + */ + static int SkGeClose( +-struct SK_NET_DEVICE *dev) ++struct SK_NET_DEVICE *dev) /* the device that is to be closed */ + { +- DEV_NET *pNet; +- DEV_NET *newPtrNet; +- SK_AC *pAC; +- +- unsigned long Flags; /* for spin lock */ +- int i; +- int PortIdx; +- SK_EVPARA EvPara; +- ++ DEV_NET *pNet = (DEV_NET*) dev->priv; ++ SK_AC *pAC = pNet->pAC; ++ DEV_NET *newPtrNet; ++ unsigned long Flags; /* for the spin locks */ ++ int CurrMac; /* loop ctr for the current MAC */ ++ int PortIdx; ++#ifdef CONFIG_SK98LIN_NAPI ++ int WorkToDo = 1; /* min(*budget, dev->quota); */ ++ int WorkDone = 0; ++#endif + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeClose: pAC=0x%lX ", (unsigned long)pAC)); + +- pNet = (DEV_NET*) dev->priv; +- pAC = pNet->pAC; ++#ifdef Y2_RECOVERY ++ pNet->InRecover = SK_TRUE; ++ del_timer(&pNet->KernelTimer); ++#endif + +-#ifdef SK_DIAG_SUPPORT + if (pAC->DiagModeActive == DIAG_ACTIVE) { + if (pAC->DiagFlowCtrl == SK_FALSE) { + module_put(THIS_MODULE); +@@ -1302,7 +2245,6 @@ + pAC->DiagFlowCtrl = SK_FALSE; + } + } +-#endif + + netif_stop_queue(dev); + +@@ -1311,8 +2253,6 @@ + else + PortIdx = pNet->NetNr; + +- StopDrvCleanupTimer(pAC); +- + /* + * Clear multicast table, promiscuous mode .... + */ +@@ -1324,46 +2264,101 @@ + spin_lock_irqsave(&pAC->SlowPathLock, Flags); + /* disable interrupts */ + SK_OUT32(pAC->IoBase, B0_IMSK, 0); +- EvPara.Para32[0] = pNet->NetNr; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara); +- SkEventDispatcher(pAC, pAC->IoBase); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, ++ pNet->NetNr, -1, SK_TRUE); + SK_OUT32(pAC->IoBase, B0_IMSK, 0); + /* stop the hardware */ +- SkGeDeInit(pAC, pAC->IoBase); +- pAC->BoardLevel = SK_INIT_DATA; ++ ++ ++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 1)) { ++ /* RLMT check link state mode */ ++ for (CurrMac=0; CurrMacGIni.GIMacsFound; CurrMac++) { ++ if (CHIP_ID_YUKON_2(pAC)) ++ SkY2PortStop( pAC, ++ pAC->IoBase, ++ CurrMac, ++ SK_STOP_ALL, ++ SK_HARD_RST); ++ else ++ SkGeStopPort( pAC, ++ pAC->IoBase, ++ CurrMac, ++ SK_STOP_ALL, ++ SK_HARD_RST); ++ } /* for */ ++ } else { ++ /* Single link or single port */ ++ if (CHIP_ID_YUKON_2(pAC)) ++ SkY2PortStop( pAC, ++ pAC->IoBase, ++ PortIdx, ++ SK_STOP_ALL, ++ SK_HARD_RST); ++ else ++ SkGeStopPort( pAC, ++ pAC->IoBase, ++ PortIdx, ++ SK_STOP_ALL, ++ SK_HARD_RST); ++ } + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); + } else { +- + spin_lock_irqsave(&pAC->SlowPathLock, Flags); +- EvPara.Para32[0] = pNet->NetNr; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara); +- SkPnmiEvent(pAC, pAC->IoBase, SK_PNMI_EVT_XMAC_RESET, EvPara); +- SkEventDispatcher(pAC, pAC->IoBase); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, ++ pNet->NetNr, -1, SK_FALSE); ++ SkLocalEventQueue(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET, ++ pNet->NetNr, -1, SK_TRUE); + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); + + /* Stop port */ + spin_lock_irqsave(&pAC->TxPort[pNet->PortNr] + [TX_PRIO_LOW].TxDesRingLock, Flags); +- SkGeStopPort(pAC, pAC->IoBase, pNet->PortNr, +- SK_STOP_ALL, SK_HARD_RST); ++ if (CHIP_ID_YUKON_2(pAC)) { ++ SkY2PortStop(pAC, pAC->IoBase, pNet->PortNr, ++ SK_STOP_ALL, SK_HARD_RST); ++ } ++ else { ++ SkGeStopPort(pAC, pAC->IoBase, pNet->PortNr, ++ SK_STOP_ALL, SK_HARD_RST); ++ } + spin_unlock_irqrestore(&pAC->TxPort[pNet->PortNr] + [TX_PRIO_LOW].TxDesRingLock, Flags); + } + + if (pAC->RlmtNets == 1) { + /* clear all descriptor rings */ +- for (i=0; iGIni.GIMacsFound; i++) { +- ReceiveIrq(pAC, &pAC->RxPort[i], SK_TRUE); +- ClearRxRing(pAC, &pAC->RxPort[i]); +- ClearTxRing(pAC, &pAC->TxPort[i][TX_PRIO_LOW]); ++ for (CurrMac=0; CurrMacGIni.GIMacsFound; CurrMac++) { ++ if (!CHIP_ID_YUKON_2(pAC)) { ++#ifdef CONFIG_SK98LIN_NAPI ++ WorkToDo = 1; ++ ReceiveIrq(pAC,&pAC->RxPort[CurrMac], ++ SK_TRUE,&WorkDone,WorkToDo); ++#else ++ ReceiveIrq(pAC,&pAC->RxPort[CurrMac],SK_TRUE); ++#endif ++ ClearRxRing(pAC, &pAC->RxPort[CurrMac]); ++ ClearTxRing(pAC, &pAC->TxPort[CurrMac][TX_PRIO_LOW]); ++ } else { ++ SkY2FreeRxBuffers(pAC, pAC->IoBase, CurrMac); ++ SkY2FreeTxBuffers(pAC, pAC->IoBase, CurrMac); ++ } ++ } ++ } else { ++ /* clear port descriptor rings */ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++#ifdef CONFIG_SK98LIN_NAPI ++ WorkToDo = 1; ++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE, &WorkDone, WorkToDo); ++#else ++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE); ++#endif ++ ClearRxRing(pAC, &pAC->RxPort[pNet->PortNr]); ++ ClearTxRing(pAC, &pAC->TxPort[pNet->PortNr][TX_PRIO_LOW]); ++ } ++ else { ++ SkY2FreeRxBuffers(pAC, pAC->IoBase, pNet->PortNr); ++ SkY2FreeTxBuffers(pAC, pAC->IoBase, pNet->PortNr); + } +- } else { +- /* clear port descriptor rings */ +- ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE); +- ClearRxRing(pAC, &pAC->RxPort[pNet->PortNr]); +- ClearTxRing(pAC, &pAC->TxPort[pNet->PortNr][TX_PRIO_LOW]); + } + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, +@@ -1374,9 +2369,12 @@ + sizeof(SK_PNMI_STRUCT_DATA)); + + pAC->MaxPorts--; +- pNet->Up = 0; +- + module_put(THIS_MODULE); ++ ++#ifdef Y2_RECOVERY ++ pNet->InRecover = SK_FALSE; ++#endif ++ + return (0); + } /* SkGeClose */ + +@@ -1434,9 +2432,11 @@ + } + + /* Transmitter out of resources? */ ++#ifdef USE_TX_COMPLETE + if (Rc <= 0) { + netif_stop_queue(dev); + } ++#endif + + /* If not taken, give buffer ownership back to the + * queueing layer. +@@ -1448,6 +2448,94 @@ + return (0); + } /* SkGeXmit */ + ++#ifdef CONFIG_SK98LIN_NAPI ++/***************************************************************************** ++ * ++ * SkGePoll - NAPI Rx polling callback for GEnesis and Yukon chipsets ++ * ++ * Description: ++ * Called by the Linux system in case NAPI polling is activated ++ * ++ * Returns: ++ * The number of work data still to be handled ++ */ ++static int SkGePoll(struct net_device *dev, int *budget) ++{ ++SK_AC *pAC = ((DEV_NET*)(dev->priv))->pAC; /* pointer to adapter context */ ++int WorkToDo = min(*budget, dev->quota); ++int WorkDone = 0; ++ ++ if (pAC->dev[0] != pAC->dev[1]) { ++#ifdef USE_TX_COMPLETE ++ spin_lock(&pAC->TxPort[1][TX_PRIO_LOW].TxDesRingLock); ++ FreeTxDescriptors(pAC, &pAC->TxPort[1][TX_PRIO_LOW]); ++ spin_unlock(&pAC->TxPort[1][TX_PRIO_LOW].TxDesRingLock); ++#endif ++ ReceiveIrq(pAC, &pAC->RxPort[1], SK_TRUE, &WorkDone, WorkToDo); ++ CLEAR_AND_START_RX(1); ++ } ++#ifdef USE_TX_COMPLETE ++ spin_lock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock); ++ FreeTxDescriptors(pAC, &pAC->TxPort[0][TX_PRIO_LOW]); ++ spin_unlock(&pAC->TxPort[0][TX_PRIO_LOW].TxDesRingLock); ++#endif ++ ReceiveIrq(pAC, &pAC->RxPort[0], SK_TRUE, &WorkDone, WorkToDo); ++ CLEAR_AND_START_RX(0); ++ ++ *budget -= WorkDone; ++ dev->quota -= WorkDone; ++ ++ if(WorkDone < WorkToDo) { ++ netif_rx_complete(dev); ++ /* enable interrupts again */ ++ pAC->GIni.GIValIrqMask |= (NAPI_DRV_IRQS); ++#ifndef USE_TX_COMPLETE ++ pAC->GIni.GIValIrqMask &= ~(TX_COMPL_IRQS); ++#endif ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ } ++ return (WorkDone >= WorkToDo); ++} /* SkGePoll */ ++#endif ++ ++#ifdef SK_POLL_CONTROLLER ++/***************************************************************************** ++ * ++ * SkGeNetPoll - Polling "interrupt" ++ * ++ * Description: ++ * Polling 'interrupt' - used by things like netconsole and netdump ++ * to send skbs without having to re-enable interrupts. ++ * It's not called while the interrupt routine is executing. ++ */ ++static void SkGeNetPoll( ++struct SK_NET_DEVICE *dev) ++{ ++DEV_NET *pNet; ++SK_AC *pAC; ++ ++ pNet = (DEV_NET*) dev->priv; ++ pAC = pNet->pAC; ++ pNet->NetConsoleMode = SK_TRUE; ++ ++ /* Prevent any reconfiguration while handling ++ the 'interrupt' */ ++ SK_OUT32(pAC->IoBase, B0_IMSK, 0); ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ /* Handle the GENESIS Isr */ ++ if (pAC->GIni.GIMacsFound == 2) ++ SkGeIsr(dev->irq, dev, NULL); ++ else ++ SkGeIsrOnePort(dev->irq, dev, NULL); ++ } else { ++ /* Handle the Yukon2 Isr */ ++ SkY2Isr(dev->irq, dev, NULL); ++ } ++ ++} ++#endif ++ + + /***************************************************************************** + * +@@ -1472,7 +2560,7 @@ + * < 0 - on failure: other problems ( -> return failure to upper layers) + */ + static int XmitFrame( +-SK_AC *pAC, /* pointer to adapter context */ ++SK_AC *pAC, /* pointer to adapter context */ + TX_PORT *pTxPort, /* pointer to struct of port to send to */ + struct sk_buff *pMessage) /* pointer to send-message */ + { +@@ -1488,11 +2576,14 @@ + + spin_lock_irqsave(&pTxPort->TxDesRingLock, Flags); + #ifndef USE_TX_COMPLETE +- FreeTxDescriptors(pAC, pTxPort); ++ if ((pTxPort->TxdRingPrevFree - pTxPort->TxdRingFree) > 6) { ++ FreeTxDescriptors(pAC, pTxPort); ++ pTxPort->TxdRingPrevFree = pTxPort->TxdRingFree; ++ } + #endif + if (pTxPort->TxdRingFree == 0) { + /* +- ** no enough free descriptors in ring at the moment. ++ ** not enough free descriptors in ring at the moment. + ** Maybe free'ing some old one help? + */ + FreeTxDescriptors(pAC, pTxPort); +@@ -1578,7 +2669,7 @@ + BMU_IRQ_EOF | + #endif + pMessage->len; +- } else { ++ } else { + pTxd->TBControl = BMU_OWN | BMU_STF | BMU_CHECK | + BMU_SW | BMU_EOF | + #ifdef USE_TX_COMPLETE +@@ -1914,7 +3005,7 @@ + SK_U16 Length; /* data fragment length */ + SK_U64 PhysAddr; /* physical address of a rx buffer */ + +- pMsgBlock = alloc_skb(pAC->RxBufSize, GFP_ATOMIC); ++ pMsgBlock = alloc_skb(pRxPort->RxBufSize, GFP_ATOMIC); + if (pMsgBlock == NULL) { + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_ENTRY, +@@ -1928,12 +3019,12 @@ + pRxd = pRxPort->pRxdRingTail; + pRxPort->pRxdRingTail = pRxd->pNextRxd; + pRxPort->RxdRingFree--; +- Length = pAC->RxBufSize; ++ Length = pRxPort->RxBufSize; + PhysAddr = (SK_U64) pci_map_page(pAC->PciDev, + virt_to_page(pMsgBlock->data), + ((unsigned long) pMsgBlock->data & + ~PAGE_MASK), +- pAC->RxBufSize - 2, ++ pRxPort->RxBufSize - 2, + PCI_DMA_FROMDEVICE); + + pRxd->VDataLow = (SK_U32) (PhysAddr & 0xffffffff); +@@ -1973,7 +3064,7 @@ + pRxd = pRxPort->pRxdRingTail; + pRxPort->pRxdRingTail = pRxd->pNextRxd; + pRxPort->RxdRingFree--; +- Length = pAC->RxBufSize; ++ Length = pRxPort->RxBufSize; + + pRxd->VDataLow = PhysLow; + pRxd->VDataHigh = PhysHigh; +@@ -1998,33 +3089,40 @@ + * Returns: N/A + */ + static void ReceiveIrq( +- SK_AC *pAC, /* pointer to adapter context */ +- RX_PORT *pRxPort, /* pointer to receive port struct */ +- SK_BOOL SlowPathLock) /* indicates if SlowPathLock is needed */ +-{ +-RXD *pRxd; /* pointer to receive descriptors */ +-SK_U32 Control; /* control field of descriptor */ +-struct sk_buff *pMsg; /* pointer to message holding frame */ +-struct sk_buff *pNewMsg; /* pointer to a new message for copying frame */ +-int FrameLength; /* total length of received frame */ +-int IpFrameLength; +-SK_MBUF *pRlmtMbuf; /* ptr to a buffer for giving a frame to rlmt */ +-SK_EVPARA EvPara; /* an event parameter union */ +-unsigned long Flags; /* for spin lock */ +-int PortIndex = pRxPort->PortIndex; +-unsigned int Offset; +-unsigned int NumBytes; +-unsigned int ForRlmt; +-SK_BOOL IsBc; +-SK_BOOL IsMc; +-SK_BOOL IsBadFrame; /* Bad frame */ +- +-SK_U32 FrameStat; +-unsigned short Csum1; +-unsigned short Csum2; +-unsigned short Type; +-int Result; +-SK_U64 PhysAddr; ++#ifdef CONFIG_SK98LIN_NAPI ++SK_AC *pAC, /* pointer to adapter context */ ++RX_PORT *pRxPort, /* pointer to receive port struct */ ++SK_BOOL SlowPathLock, /* indicates if SlowPathLock is needed */ ++int *WorkDone, ++int WorkToDo) ++#else ++SK_AC *pAC, /* pointer to adapter context */ ++RX_PORT *pRxPort, /* pointer to receive port struct */ ++SK_BOOL SlowPathLock) /* indicates if SlowPathLock is needed */ ++#endif ++{ ++ RXD *pRxd; /* pointer to receive descriptors */ ++ struct sk_buff *pMsg; /* pointer to message holding frame */ ++ struct sk_buff *pNewMsg; /* pointer to new message for frame copy */ ++ SK_MBUF *pRlmtMbuf; /* ptr to buffer for giving frame to RLMT */ ++ SK_EVPARA EvPara; /* an event parameter union */ ++ SK_U32 Control; /* control field of descriptor */ ++ unsigned long Flags; /* for spin lock handling */ ++ int PortIndex = pRxPort->PortIndex; ++ int FrameLength; /* total length of received frame */ ++ int IpFrameLength; /* IP length of the received frame */ ++ unsigned int Offset; ++ unsigned int NumBytes; ++ unsigned int RlmtNotifier; ++ SK_BOOL IsBc; /* we received a broadcast packet */ ++ SK_BOOL IsMc; /* we received a multicast packet */ ++ SK_BOOL IsBadFrame; /* the frame received is bad! */ ++ SK_U32 FrameStat; ++ unsigned short Csum1; ++ unsigned short Csum2; ++ unsigned short Type; ++ int Result; ++ SK_U64 PhysAddr; + + rx_start: + /* do forever; exit if BMU_OWN found */ +@@ -2046,6 +3144,13 @@ + + Control = pRxd->RBControl; + ++#ifdef CONFIG_SK98LIN_NAPI ++ if (*WorkDone >= WorkToDo) { ++ break; ++ } ++ (*WorkDone)++; ++#endif ++ + /* check if this descriptor is ready */ + if ((Control & BMU_OWN) != 0) { + /* this descriptor is not yet ready */ +@@ -2054,11 +3159,10 @@ + FillRxRing(pAC, pRxPort); + return; + } +- pAC->DynIrqModInfo.NbrProcessedDescr++; + + /* get length of frame and check it */ + FrameLength = Control & BMU_BBC; +- if (FrameLength > pAC->RxBufSize) { ++ if (FrameLength > pRxPort->RxBufSize) { + goto rx_failed; + } + +@@ -2073,8 +3177,8 @@ + FrameStat = pRxd->FrameStat; + + /* check for frame length mismatch */ +-#define XMR_FS_LEN_SHIFT 18 +-#define GMR_FS_LEN_SHIFT 16 ++#define XMR_FS_LEN_SHIFT 18 ++#define GMR_FS_LEN_SHIFT 16 + if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) { + if (FrameLength != (SK_U32) (FrameStat >> XMR_FS_LEN_SHIFT)) { + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, +@@ -2084,8 +3188,7 @@ + (SK_U32) (FrameStat >> XMR_FS_LEN_SHIFT))); + goto rx_failed; + } +- } +- else { ++ } else { + if (FrameLength != (SK_U32) (FrameStat >> GMR_FS_LEN_SHIFT)) { + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_RX_PROGRESS, +@@ -2118,9 +3221,6 @@ + /* DumpMsg(pMsg, "Rx"); */ + + if ((Control & BMU_STAT_VAL) != BMU_STAT_VAL || (IsBadFrame)) { +-#if 0 +- (FrameStat & (XMR_FS_ANY_ERR | XMR_FS_2L_VLAN)) != 0) { +-#endif + /* there is a receive error in this frame */ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, + SK_DBGCAT_DRV_RX_PROGRESS, +@@ -2128,6 +3228,20 @@ + "Control: %x\nRxStat: %x\n", + Control, FrameStat)); + ++ PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; ++ PhysAddr |= (SK_U64) pRxd->VDataLow; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ++ pci_dma_sync_single(pAC->PciDev, ++ (dma_addr_t) PhysAddr, ++ FrameLength, ++ PCI_DMA_FROMDEVICE); ++#else ++ pci_dma_sync_single_for_cpu(pAC->PciDev, ++ (dma_addr_t) PhysAddr, ++ FrameLength, ++ PCI_DMA_FROMDEVICE); ++#endif + ReQueueRxBuffer(pAC, pRxPort, pMsg, + pRxd->VDataHigh, pRxd->VDataLow); + +@@ -2147,150 +3261,107 @@ + skb_put(pNewMsg, FrameLength); + PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; + PhysAddr |= (SK_U64) pRxd->VDataLow; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ++ pci_dma_sync_single(pAC->PciDev, ++ (dma_addr_t) PhysAddr, ++ FrameLength, ++ PCI_DMA_FROMDEVICE); ++#else ++ pci_dma_sync_single_for_device(pAC->PciDev, ++ (dma_addr_t) PhysAddr, ++ FrameLength, ++ PCI_DMA_FROMDEVICE); ++#endif + +- pci_dma_sync_single_for_cpu(pAC->PciDev, +- (dma_addr_t) PhysAddr, +- FrameLength, +- PCI_DMA_FROMDEVICE); + eth_copy_and_sum(pNewMsg, pMsg->data, + FrameLength, 0); +- pci_dma_sync_single_for_device(pAC->PciDev, +- (dma_addr_t) PhysAddr, +- FrameLength, +- PCI_DMA_FROMDEVICE); + ReQueueRxBuffer(pAC, pRxPort, pMsg, + pRxd->VDataHigh, pRxd->VDataLow); + + pMsg = pNewMsg; + +- } +- else { ++ } else { + /* + * if large frame, or SKB allocation failed, pass + * the SKB directly to the networking + */ +- + PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; + PhysAddr |= (SK_U64) pRxd->VDataLow; + + /* release the DMA mapping */ + pci_unmap_single(pAC->PciDev, + PhysAddr, +- pAC->RxBufSize - 2, ++ pRxPort->RxBufSize - 2, + PCI_DMA_FROMDEVICE); ++ skb_put(pMsg, FrameLength); /* set message len */ ++ pMsg->ip_summed = CHECKSUM_NONE; /* initial default */ + +- /* set length in message */ +- skb_put(pMsg, FrameLength); +- /* hardware checksum */ +- Type = ntohs(*((short*)&pMsg->data[12])); +- +-#ifdef USE_SK_RX_CHECKSUM +- if (Type == 0x800) { +- Csum1=le16_to_cpu(pRxd->TcpSums & 0xffff); +- Csum2=le16_to_cpu((pRxd->TcpSums >> 16) & 0xffff); +- IpFrameLength = (int) ntohs((unsigned short) +- ((unsigned short *) pMsg->data)[8]); +- +- /* +- * Test: If frame is padded, a check is not possible! +- * Frame not padded? Length difference must be 14 (0xe)! +- */ +- if ((FrameLength - IpFrameLength) != 0xe) { +- /* Frame padded => TCP offload not possible! */ +- pMsg->ip_summed = CHECKSUM_NONE; +- } else { +- /* Frame not padded => TCP offload! */ +- if ((((Csum1 & 0xfffe) && (Csum2 & 0xfffe)) && +- (pAC->GIni.GIChipId == CHIP_ID_GENESIS)) || +- (pAC->ChipsetType)) { +- Result = SkCsGetReceiveInfo(pAC, +- &pMsg->data[14], +- Csum1, Csum2, pRxPort->PortIndex); +- if (Result == +- SKCS_STATUS_IP_FRAGMENT || +- Result == +- SKCS_STATUS_IP_CSUM_OK || +- Result == +- SKCS_STATUS_TCP_CSUM_OK || +- Result == +- SKCS_STATUS_UDP_CSUM_OK) { +- pMsg->ip_summed = +- CHECKSUM_UNNECESSARY; +- } +- else if (Result == +- SKCS_STATUS_TCP_CSUM_ERROR || +- Result == +- SKCS_STATUS_UDP_CSUM_ERROR || +- Result == +- SKCS_STATUS_IP_CSUM_ERROR_UDP || +- Result == +- SKCS_STATUS_IP_CSUM_ERROR_TCP || +- Result == +- SKCS_STATUS_IP_CSUM_ERROR ) { +- /* HW Checksum error */ +- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, +- SK_DBGCAT_DRV_RX_PROGRESS, +- ("skge: CRC error. Frame dropped!\n")); +- goto rx_failed; +- } else { +- pMsg->ip_summed = +- CHECKSUM_NONE; +- } +- }/* checksumControl calculation valid */ +- } /* Frame length check */ +- } /* IP frame */ +-#else +- pMsg->ip_summed = CHECKSUM_NONE; +-#endif ++ if (pRxPort->UseRxCsum) { ++ Type = ntohs(*((short*)&pMsg->data[12])); ++ if (Type == 0x800) { ++ IpFrameLength = (int) ntohs((unsigned short) ++ ((unsigned short *) pMsg->data)[8]); ++ if ((FrameLength - IpFrameLength) == 0xe) { ++ Csum1=le16_to_cpu(pRxd->TcpSums & 0xffff); ++ Csum2=le16_to_cpu((pRxd->TcpSums >> 16) & 0xffff); ++ if ((((Csum1 & 0xfffe) && (Csum2 & 0xfffe)) && ++ (pAC->GIni.GIChipId == CHIP_ID_GENESIS)) || ++ (pAC->ChipsetType)) { ++ Result = SkCsGetReceiveInfo(pAC, &pMsg->data[14], ++ Csum1, Csum2, PortIndex); ++ if ((Result == SKCS_STATUS_IP_FRAGMENT) || ++ (Result == SKCS_STATUS_IP_CSUM_OK) || ++ (Result == SKCS_STATUS_TCP_CSUM_OK) || ++ (Result == SKCS_STATUS_UDP_CSUM_OK)) { ++ pMsg->ip_summed = CHECKSUM_UNNECESSARY; ++ } else if ((Result == SKCS_STATUS_TCP_CSUM_ERROR) || ++ (Result == SKCS_STATUS_UDP_CSUM_ERROR) || ++ (Result == SKCS_STATUS_IP_CSUM_ERROR_UDP) || ++ (Result == SKCS_STATUS_IP_CSUM_ERROR_TCP) || ++ (Result == SKCS_STATUS_IP_CSUM_ERROR)) { ++ /* HW Checksum error */ ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS, ++ ("skge: CRC error. Frame dropped!\n")); ++ goto rx_failed; ++ } else { ++ pMsg->ip_summed = CHECKSUM_NONE; ++ } ++ }/* checksumControl calculation valid */ ++ } /* Frame length check */ ++ } /* IP frame */ ++ } /* pRxPort->UseRxCsum */ + } /* frame > SK_COPY_TRESHOLD */ + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("V")); +- ForRlmt = SK_RLMT_RX_PROTOCOL; +-#if 0 +- IsBc = (FrameStat & XMR_FS_BC)==XMR_FS_BC; +-#endif ++ RlmtNotifier = SK_RLMT_RX_PROTOCOL; + SK_RLMT_PRE_LOOKAHEAD(pAC, PortIndex, FrameLength, +- IsBc, &Offset, &NumBytes); ++ IsBc, &Offset, &NumBytes); + if (NumBytes != 0) { +-#if 0 +- IsMc = (FrameStat & XMR_FS_MC)==XMR_FS_MC; +-#endif +- SK_RLMT_LOOKAHEAD(pAC, PortIndex, +- &pMsg->data[Offset], +- IsBc, IsMc, &ForRlmt); ++ SK_RLMT_LOOKAHEAD(pAC,PortIndex,&pMsg->data[Offset], ++ IsBc,IsMc,&RlmtNotifier); + } +- if (ForRlmt == SK_RLMT_RX_PROTOCOL) { +- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("W")); ++ if (RlmtNotifier == SK_RLMT_RX_PROTOCOL) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("W")); + /* send up only frames from active port */ +- if ((PortIndex == pAC->ActivePort) || +- (pAC->RlmtNets == 2)) { +- /* frame for upper layer */ ++ if ((PortIndex == pAC->ActivePort)||(pAC->RlmtNets == 2)) { + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, 1,("U")); + #ifdef xDEBUG + DumpMsg(pMsg, "Rx"); + #endif +- SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC, +- FrameLength, pRxPort->PortIndex); +- +- pMsg->dev = pAC->dev[pRxPort->PortIndex]; +- pMsg->protocol = eth_type_trans(pMsg, +- pAC->dev[pRxPort->PortIndex]); +- netif_rx(pMsg); +- pAC->dev[pRxPort->PortIndex]->last_rx = jiffies; +- } +- else { +- /* drop frame */ ++ SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,FrameLength,PortIndex); ++ pMsg->dev = pAC->dev[PortIndex]; ++ pMsg->protocol = eth_type_trans(pMsg,pAC->dev[PortIndex]); ++ netif_rx(pMsg); /* frame for upper layer */ ++ pAC->dev[PortIndex]->last_rx = jiffies; ++ } else { + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, +- SK_DBGCAT_DRV_RX_PROGRESS, +- ("D")); +- DEV_KFREE_SKB(pMsg); ++ SK_DBGCAT_DRV_RX_PROGRESS,("D")); ++ DEV_KFREE_SKB(pMsg); /* drop frame */ + } +- +- } /* if not for rlmt */ +- else { +- /* packet for rlmt */ ++ } else { /* packet for RLMT stack */ + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, +- SK_DBGCAT_DRV_RX_PROGRESS, ("R")); ++ SK_DBGCAT_DRV_RX_PROGRESS,("R")); + pRlmtMbuf = SkDrvAllocRlmtMbuf(pAC, + pAC->IoBase, FrameLength); + if (pRlmtMbuf != NULL) { +@@ -2318,32 +3389,22 @@ + } + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, +- SK_DBGCAT_DRV_RX_PROGRESS, +- ("Q")); ++ SK_DBGCAT_DRV_RX_PROGRESS,("Q")); + } +- if ((pAC->dev[pRxPort->PortIndex]->flags & +- (IFF_PROMISC | IFF_ALLMULTI)) != 0 || +- (ForRlmt & SK_RLMT_RX_PROTOCOL) == +- SK_RLMT_RX_PROTOCOL) { +- pMsg->dev = pAC->dev[pRxPort->PortIndex]; +- pMsg->protocol = eth_type_trans(pMsg, +- pAC->dev[pRxPort->PortIndex]); ++ if ((pAC->dev[PortIndex]->flags & (IFF_PROMISC | IFF_ALLMULTI)) || ++ (RlmtNotifier & SK_RLMT_RX_PROTOCOL)) { ++ pMsg->dev = pAC->dev[PortIndex]; ++ pMsg->protocol = eth_type_trans(pMsg,pAC->dev[PortIndex]); + netif_rx(pMsg); +- pAC->dev[pRxPort->PortIndex]->last_rx = jiffies; +- } +- else { ++ pAC->dev[PortIndex]->last_rx = jiffies; ++ } else { + DEV_KFREE_SKB(pMsg); + } +- +- } /* if packet for rlmt */ ++ } /* if packet for RLMT stack */ + } /* for ... scanning the RXD ring */ + + /* RXD ring is empty -> fill and restart */ + FillRxRing(pAC, pRxPort); +- /* do not start if called from Close */ +- if (pAC->BoardLevel > SK_INIT_DATA) { +- ClearAndStartRx(pAC, PortIndex); +- } + return; + + rx_failed: +@@ -2357,7 +3418,7 @@ + PhysAddr |= (SK_U64) pRxd->VDataLow; + pci_unmap_page(pAC->PciDev, + PhysAddr, +- pAC->RxBufSize - 2, ++ pRxPort->RxBufSize - 2, + PCI_DMA_FROMDEVICE); + DEV_KFREE_SKB_IRQ(pRxd->pMBuf); + pRxd->pMBuf = NULL; +@@ -2367,49 +3428,6 @@ + + } /* ReceiveIrq */ + +- +-/***************************************************************************** +- * +- * ClearAndStartRx - give a start receive command to BMU, clear IRQ +- * +- * Description: +- * This function sends a start command and a clear interrupt +- * command for one receive queue to the BMU. +- * +- * Returns: N/A +- * none +- */ +-static void ClearAndStartRx( +-SK_AC *pAC, /* pointer to the adapter context */ +-int PortIndex) /* index of the receive port (XMAC) */ +-{ +- SK_OUT8(pAC->IoBase, +- RxQueueAddr[PortIndex]+Q_CSR, +- CSR_START | CSR_IRQ_CL_F); +-} /* ClearAndStartRx */ +- +- +-/***************************************************************************** +- * +- * ClearTxIrq - give a clear transmit IRQ command to BMU +- * +- * Description: +- * This function sends a clear tx IRQ command for one +- * transmit queue to the BMU. +- * +- * Returns: N/A +- */ +-static void ClearTxIrq( +-SK_AC *pAC, /* pointer to the adapter context */ +-int PortIndex, /* index of the transmit port (XMAC) */ +-int Prio) /* priority or normal queue */ +-{ +- SK_OUT8(pAC->IoBase, +- TxQueueAddr[PortIndex][Prio]+Q_CSR, +- CSR_IRQ_CL_F); +-} /* ClearTxIrq */ +- +- + /***************************************************************************** + * + * ClearRxRing - remove all buffers from the receive ring +@@ -2440,7 +3458,7 @@ + PhysAddr |= (SK_U64) pRxd->VDataLow; + pci_unmap_page(pAC->PciDev, + PhysAddr, +- pAC->RxBufSize - 2, ++ pRxPort->RxBufSize - 2, + PCI_DMA_FROMDEVICE); + DEV_KFREE_SKB(pRxd->pMBuf); + pRxd->pMBuf = NULL; +@@ -2500,29 +3518,30 @@ + + DEV_NET *pNet = (DEV_NET*) dev->priv; + SK_AC *pAC = pNet->pAC; ++int Ret; + + struct sockaddr *addr = p; + unsigned long Flags; + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeSetMacAddr starts now...\n")); +- if(netif_running(dev)) +- return -EBUSY; + + memcpy(dev->dev_addr, addr->sa_data,dev->addr_len); + + spin_lock_irqsave(&pAC->SlowPathLock, Flags); + + if (pAC->RlmtNets == 2) +- SkAddrOverride(pAC, pAC->IoBase, pNet->NetNr, ++ Ret = SkAddrOverride(pAC, pAC->IoBase, pNet->NetNr, + (SK_MAC_ADDR*)dev->dev_addr, SK_ADDR_VIRTUAL_ADDRESS); + else +- SkAddrOverride(pAC, pAC->IoBase, pAC->ActivePort, ++ Ret = SkAddrOverride(pAC, pAC->IoBase, pAC->ActivePort, + (SK_MAC_ADDR*)dev->dev_addr, SK_ADDR_VIRTUAL_ADDRESS); +- +- + + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); ++ ++ if (Ret != SK_ADDR_OVERRIDE_SUCCESS) ++ return -EBUSY; ++ + return 0; + } /* SkGeSetMacAddr */ + +@@ -2604,6 +3623,45 @@ + + /***************************************************************************** + * ++ * SkSetMtuBufferSize - set the MTU buffer to another value ++ * ++ * Description: ++ * This function sets the new buffers and is called whenever the MTU ++ * size is changed ++ * ++ * Returns: ++ * N/A ++ */ ++ ++static void SkSetMtuBufferSize( ++SK_AC *pAC, /* pointer to adapter context */ ++int PortNr, /* Port number */ ++int Mtu) /* pointer to tx prt struct */ ++{ ++ pAC->RxPort[PortNr].RxBufSize = Mtu + 32; ++ ++ /* RxBufSize must be a multiple of 8 */ ++ while (pAC->RxPort[PortNr].RxBufSize % 8) { ++ pAC->RxPort[PortNr].RxBufSize = ++ pAC->RxPort[PortNr].RxBufSize + 1; ++ } ++ ++ if (Mtu > 1500) { ++ pAC->GIni.GP[PortNr].PPortUsage = SK_JUMBO_LINK; ++ } else { ++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { ++ pAC->GIni.GP[PortNr].PPortUsage = SK_MUL_LINK; ++ } else { ++ pAC->GIni.GP[PortNr].PPortUsage = SK_RED_LINK; ++ } ++ } ++ ++ return; ++} ++ ++ ++/***************************************************************************** ++ * + * SkGeChangeMtu - set the MTU to another value + * + * Description: +@@ -2617,12 +3675,13 @@ + */ + static int SkGeChangeMtu(struct SK_NET_DEVICE *dev, int NewMtu) + { +-DEV_NET *pNet; +-DEV_NET *pOtherNet; +-SK_AC *pAC; +-unsigned long Flags; +-int i; +-SK_EVPARA EvPara; ++DEV_NET *pNet; ++SK_AC *pAC; ++unsigned long Flags; ++#ifdef CONFIG_SK98LIN_NAPI ++int WorkToDo = 1; // min(*budget, dev->quota); ++int WorkDone = 0; ++#endif + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeChangeMtu starts now...\n")); +@@ -2630,15 +3689,12 @@ + pNet = (DEV_NET*) dev->priv; + pAC = pNet->pAC; + ++ /* MTU size outside the spec */ + if ((NewMtu < 68) || (NewMtu > SK_JUMBO_MTU)) { + return -EINVAL; + } + +- if(pAC->BoardLevel != SK_INIT_RUN) { +- return -EINVAL; +- } +- +-#ifdef SK_DIAG_SUPPORT ++ /* Diag access active */ + if (pAC->DiagModeActive == DIAG_ACTIVE) { + if (pAC->DiagFlowCtrl == SK_FALSE) { + return -1; /* still in use, deny any actions of MTU */ +@@ -2646,200 +3702,74 @@ + pAC->DiagFlowCtrl = SK_FALSE; + } + } +-#endif +- +- pNet->Mtu = NewMtu; +- pOtherNet = (DEV_NET*)pAC->dev[1 - pNet->NetNr]->priv; +- if ((pOtherNet->Mtu>1500) && (NewMtu<=1500) && (pOtherNet->Up==1)) { +- return(0); +- } + +- pAC->RxBufSize = NewMtu + 32; + dev->mtu = NewMtu; ++ SkSetMtuBufferSize(pAC, pNet->PortNr, NewMtu); + +- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, +- ("New MTU: %d\n", NewMtu)); ++ if(!netif_running(dev)) { ++ /* Preset MTU size if device not ready/running */ ++ return 0; ++ } + +- /* +- ** Prevent any reconfiguration while changing the MTU +- ** by disabling any interrupts +- */ ++ /* Prevent any reconfiguration while changing the MTU ++ by disabling any interrupts */ + SK_OUT32(pAC->IoBase, B0_IMSK, 0); + spin_lock_irqsave(&pAC->SlowPathLock, Flags); + +- /* +- ** Notify RLMT that any ports are to be stopped +- */ +- EvPara.Para32[0] = 0; +- EvPara.Para32[1] = -1; +- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara); +- EvPara.Para32[0] = 1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara); +- } else { +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara); +- } +- +- /* +- ** After calling the SkEventDispatcher(), RLMT is aware about +- ** the stopped ports -> configuration can take place! +- */ +- SkEventDispatcher(pAC, pAC->IoBase); +- +- for (i=0; iGIni.GIMacsFound; i++) { +- spin_lock(&pAC->TxPort[i][TX_PRIO_LOW].TxDesRingLock); +- netif_stop_queue(pAC->dev[i]); ++ /* Notify RLMT that the port has to be stopped */ ++ netif_stop_queue(dev); ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, ++ pNet->PortNr, -1, SK_TRUE); ++ spin_lock(&pAC->TxPort[pNet->PortNr][TX_PRIO_LOW].TxDesRingLock); + +- } + +- /* +- ** Depending on the desired MTU size change, a different number of +- ** RX buffers need to be allocated +- */ +- if (NewMtu > 1500) { +- /* +- ** Use less rx buffers +- */ +- for (i=0; iGIni.GIMacsFound; i++) { +- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { +- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing - +- (pAC->RxDescrPerRing / 4); +- } else { +- if (i == pAC->ActivePort) { +- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing - +- (pAC->RxDescrPerRing / 4); +- } else { +- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing - +- (pAC->RxDescrPerRing / 10); +- } +- } +- } ++ /* Change RxFillLimit to 1 */ ++ if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { ++ pAC->RxPort[pNet->PortNr].RxFillLimit = 1; + } else { +- /* +- ** Use the normal amount of rx buffers +- */ +- for (i=0; iGIni.GIMacsFound; i++) { +- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { +- pAC->RxPort[i].RxFillLimit = 1; +- } else { +- if (i == pAC->ActivePort) { +- pAC->RxPort[i].RxFillLimit = 1; +- } else { +- pAC->RxPort[i].RxFillLimit = pAC->RxDescrPerRing - +- (pAC->RxDescrPerRing / 4); +- } +- } +- } ++ pAC->RxPort[1 - pNet->PortNr].RxFillLimit = 1; ++ pAC->RxPort[pNet->PortNr].RxFillLimit = pAC->RxDescrPerRing - ++ (pAC->RxDescrPerRing / 4); + } +- +- SkGeDeInit(pAC, pAC->IoBase); + +- /* +- ** enable/disable hardware support for long frames +- */ +- if (NewMtu > 1500) { +-// pAC->JumboActivated = SK_TRUE; /* is never set back !!! */ +- pAC->GIni.GIPortUsage = SK_JUMBO_LINK; ++ /* clear and reinit the rx rings here, because of new MTU size */ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ SkY2PortStop(pAC, pAC->IoBase, pNet->PortNr, SK_STOP_ALL, SK_SOFT_RST); ++ SkY2AllocateRxBuffers(pAC, pAC->IoBase, pNet->PortNr); ++ SkY2PortStart(pAC, pAC->IoBase, pNet->PortNr); + } else { +- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { +- pAC->GIni.GIPortUsage = SK_MUL_LINK; +- } else { +- pAC->GIni.GIPortUsage = SK_RED_LINK; +- } +- } ++// SkGeStopPort(pAC, pAC->IoBase, pNet->PortNr, SK_STOP_ALL, SK_SOFT_RST); ++#ifdef CONFIG_SK98LIN_NAPI ++ WorkToDo = 1; ++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE, &WorkDone, WorkToDo); ++#else ++ ReceiveIrq(pAC, &pAC->RxPort[pNet->PortNr], SK_TRUE); ++#endif ++ ClearRxRing(pAC, &pAC->RxPort[pNet->PortNr]); ++ FillRxRing(pAC, &pAC->RxPort[pNet->PortNr]); + +- SkGeInit( pAC, pAC->IoBase, SK_INIT_IO); +- SkI2cInit( pAC, pAC->IoBase, SK_INIT_IO); +- SkEventInit(pAC, pAC->IoBase, SK_INIT_IO); +- SkPnmiInit( pAC, pAC->IoBase, SK_INIT_IO); +- SkAddrInit( pAC, pAC->IoBase, SK_INIT_IO); +- SkRlmtInit( pAC, pAC->IoBase, SK_INIT_IO); +- SkTimerInit(pAC, pAC->IoBase, SK_INIT_IO); +- +- /* +- ** tschilling: +- ** Speed and others are set back to default in level 1 init! +- */ +- GetConfiguration(pAC); +- +- SkGeInit( pAC, pAC->IoBase, SK_INIT_RUN); +- SkI2cInit( pAC, pAC->IoBase, SK_INIT_RUN); +- SkEventInit(pAC, pAC->IoBase, SK_INIT_RUN); +- SkPnmiInit( pAC, pAC->IoBase, SK_INIT_RUN); +- SkAddrInit( pAC, pAC->IoBase, SK_INIT_RUN); +- SkRlmtInit( pAC, pAC->IoBase, SK_INIT_RUN); +- SkTimerInit(pAC, pAC->IoBase, SK_INIT_RUN); ++ /* Enable transmit descriptor polling */ ++ SkGePollTxD(pAC, pAC->IoBase, pNet->PortNr, SK_TRUE); ++ FillRxRing(pAC, &pAC->RxPort[pNet->PortNr]); ++ } + +- /* +- ** clear and reinit the rx rings here +- */ +- for (i=0; iGIni.GIMacsFound; i++) { +- ReceiveIrq(pAC, &pAC->RxPort[i], SK_TRUE); +- ClearRxRing(pAC, &pAC->RxPort[i]); +- FillRxRing(pAC, &pAC->RxPort[i]); ++ netif_start_queue(pAC->dev[pNet->PortNr]); + +- /* +- ** Enable transmit descriptor polling +- */ +- SkGePollTxD(pAC, pAC->IoBase, i, SK_TRUE); +- FillRxRing(pAC, &pAC->RxPort[i]); +- }; ++ spin_unlock(&pAC->TxPort[pNet->PortNr][TX_PRIO_LOW].TxDesRingLock); + +- SkGeYellowLED(pAC, pAC->IoBase, 1); +- SkDimEnableModerationIfNeeded(pAC); +- SkDimDisplayModerationSettings(pAC); + +- netif_start_queue(pAC->dev[pNet->PortNr]); +- for (i=pAC->GIni.GIMacsFound-1; i>=0; i--) { +- spin_unlock(&pAC->TxPort[i][TX_PRIO_LOW].TxDesRingLock); +- } ++ /* Notify RLMT about the changing and restarting one (or more) ports */ ++ SkLocalEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, ++ pNet->PortNr, -1, SK_TRUE); + +- /* +- ** Enable Interrupts again +- */ ++ /* Enable Interrupts again */ + SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); + SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK); + +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara); +- SkEventDispatcher(pAC, pAC->IoBase); +- +- /* +- ** Notify RLMT about the changing and restarting one (or more) ports +- */ +- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { +- EvPara.Para32[0] = pAC->RlmtNets; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_SET_NETS, EvPara); +- EvPara.Para32[0] = pNet->PortNr; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara); +- +- if (pOtherNet->Up) { +- EvPara.Para32[0] = pOtherNet->PortNr; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara); +- } +- } else { +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_START, EvPara); +- } +- +- SkEventDispatcher(pAC, pAC->IoBase); + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); +- +- /* +- ** While testing this driver with latest kernel 2.5 (2.5.70), it +- ** seems as if upper layers have a problem to handle a successful +- ** return value of '0'. If such a zero is returned, the complete +- ** system hangs for several minutes (!), which is in acceptable. +- ** +- ** Currently it is not clear, what the exact reason for this problem +- ** is. The implemented workaround for 2.5 is to return the desired +- ** new MTU size if all needed changes for the new MTU size where +- ** performed. In kernels 2.2 and 2.4, a zero value is returned, +- ** which indicates the successful change of the mtu-size. +- */ +- return NewMtu; ++ return 0; + +-} /* SkGeChangeMtu */ ++} + + + /***************************************************************************** +@@ -2857,42 +3787,38 @@ + { + DEV_NET *pNet = (DEV_NET*) dev->priv; + SK_AC *pAC = pNet->pAC; +-SK_PNMI_STRUCT_DATA *pPnmiStruct; /* structure for all Pnmi-Data */ +-SK_PNMI_STAT *pPnmiStat; /* pointer to virtual XMAC stat. data */ +-SK_PNMI_CONF *pPnmiConf; /* pointer to virtual link config. */ +-unsigned int Size; /* size of pnmi struct */ ++SK_PNMI_STRUCT_DATA *pPnmiStruct; /* structure for all Pnmi-Data */ ++SK_PNMI_STAT *pPnmiStat; /* pointer to virtual XMAC stat. data */ ++SK_PNMI_CONF *pPnmiConf; /* pointer to virtual link config. */ ++unsigned int Size; /* size of pnmi struct */ + unsigned long Flags; /* for spin lock */ + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeStats starts now...\n")); + pPnmiStruct = &pAC->PnmiStruct; + +-#ifdef SK_DIAG_SUPPORT +- if ((pAC->DiagModeActive == DIAG_NOTACTIVE) && +- (pAC->BoardLevel == SK_INIT_RUN)) { +-#endif +- SK_MEMSET(pPnmiStruct, 0, sizeof(SK_PNMI_STRUCT_DATA)); +- spin_lock_irqsave(&pAC->SlowPathLock, Flags); +- Size = SK_PNMI_STRUCT_SIZE; +- SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, pNet->NetNr); +- spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); +-#ifdef SK_DIAG_SUPPORT ++ if ((pAC->DiagModeActive == DIAG_NOTACTIVE) && ++ (pAC->BoardLevel == SK_INIT_RUN)) { ++ SK_MEMSET(pPnmiStruct, 0, sizeof(SK_PNMI_STRUCT_DATA)); ++ spin_lock_irqsave(&pAC->SlowPathLock, Flags); ++ Size = SK_PNMI_STRUCT_SIZE; ++ SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, pNet->NetNr); ++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); + } +-#endif + +- pPnmiStat = &pPnmiStruct->Stat[0]; +- pPnmiConf = &pPnmiStruct->Conf[0]; ++ pPnmiStat = &pPnmiStruct->Stat[0]; ++ pPnmiConf = &pPnmiStruct->Conf[0]; + + pAC->stats.rx_packets = (SK_U32) pPnmiStruct->RxDeliveredCts & 0xFFFFFFFF; + pAC->stats.tx_packets = (SK_U32) pPnmiStat->StatTxOkCts & 0xFFFFFFFF; + pAC->stats.rx_bytes = (SK_U32) pPnmiStruct->RxOctetsDeliveredCts; + pAC->stats.tx_bytes = (SK_U32) pPnmiStat->StatTxOctetsOkCts; + +- if (pNet->Mtu <= 1500) { +- pAC->stats.rx_errors = (SK_U32) pPnmiStruct->InErrorsCts & 0xFFFFFFFF; +- } else { +- pAC->stats.rx_errors = (SK_U32) ((pPnmiStruct->InErrorsCts - +- pPnmiStat->StatRxTooLongCts) & 0xFFFFFFFF); ++ if (dev->mtu <= 1500) { ++ pAC->stats.rx_errors = (SK_U32) pPnmiStruct->InErrorsCts & 0xFFFFFFFF; ++ } else { ++ pAC->stats.rx_errors = (SK_U32) ((pPnmiStruct->InErrorsCts - ++ pPnmiStat->StatRxTooLongCts) & 0xFFFFFFFF); + } + + +@@ -2937,32 +3863,35 @@ + * 0, if everything is ok + * !=0, on error + */ +-static int SkGeIoctl(struct SK_NET_DEVICE *dev, struct ifreq *rq, int cmd) +-{ +-DEV_NET *pNet; +-SK_AC *pAC; +-void *pMemBuf; +-struct pci_dev *pdev = NULL; +-SK_GE_IOCTL Ioctl; +-unsigned int Err = 0; +-int Size = 0; +-int Ret = 0; +-unsigned int Length = 0; +-int HeaderLength = sizeof(SK_U32) + sizeof(SK_U32); ++static int SkGeIoctl( ++struct SK_NET_DEVICE *dev, /* the device the IOCTL is to be performed on */ ++struct ifreq *rq, /* additional request structure containing data */ ++int cmd) /* requested IOCTL command number */ ++{ ++ DEV_NET *pNet = (DEV_NET*) dev->priv; ++ SK_AC *pAC = pNet->pAC; ++ struct pci_dev *pdev = NULL; ++ void *pMemBuf; ++ SK_GE_IOCTL Ioctl; ++ unsigned long Flags; /* for spin lock */ ++ unsigned int Err = 0; ++ unsigned int Length = 0; ++ int HeaderLength = sizeof(SK_U32) + sizeof(SK_U32); ++ int Size = 0; ++ int Ret = 0; + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeIoctl starts now...\n")); + +- pNet = (DEV_NET*) dev->priv; +- pAC = pNet->pAC; +- + if(copy_from_user(&Ioctl, rq->ifr_data, sizeof(SK_GE_IOCTL))) { + return -EFAULT; + } + + switch(cmd) { +- case SK_IOCTL_SETMIB: +- case SK_IOCTL_PRESETMIB: ++ case SIOCETHTOOL: ++ return SkEthIoctl(dev, rq); ++ case SK_IOCTL_SETMIB: /* FALL THRU */ ++ case SK_IOCTL_PRESETMIB: /* FALL THRU (if capable!) */ + if (!capable(CAP_NET_ADMIN)) return -EPERM; + case SK_IOCTL_GETMIB: + if(copy_from_user(&pAC->PnmiStruct, Ioctl.pData, +@@ -2989,6 +3918,7 @@ + if (NULL == (pMemBuf = kmalloc(Length, GFP_KERNEL))) { + return -ENOMEM; + } ++ spin_lock_irqsave(&pAC->SlowPathLock, Flags); + if(copy_from_user(pMemBuf, Ioctl.pData, Length)) { + Err = -EFAULT; + goto fault_gen; +@@ -3007,10 +3937,10 @@ + goto fault_gen; + } + fault_gen: ++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); + kfree(pMemBuf); /* cleanup everything */ + break; +-#ifdef SK_DIAG_SUPPORT +- case SK_IOCTL_DIAG: ++ case SK_IOCTL_DIAG: + if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (Ioctl.Len < (sizeof(pAC->PnmiStruct) + HeaderLength)) { + Length = Ioctl.Len; +@@ -3034,7 +3964,7 @@ + */ + * ((SK_U32 *)pMemBuf) = 0; + * ((SK_U32 *)pMemBuf + 1) = pdev->bus->number; +- * ((SK_U32 *)pMemBuf + 2) = ParseDeviceNbrFromSlotName(pdev->slot_name); ++ * ((SK_U32 *)pMemBuf + 2) = ParseDeviceNbrFromSlotName(pci_name(pdev)); + if(copy_to_user(Ioctl.pData, pMemBuf, Length) ) { + Err = -EFAULT; + goto fault_diag; +@@ -3047,7 +3977,6 @@ + fault_diag: + kfree(pMemBuf); /* cleanup everything */ + break; +-#endif + default: + Err = -EOPNOTSUPP; + } +@@ -3079,12 +4008,12 @@ + unsigned int Size, /* length of ioctl data */ + int mode) /* flag for set/preset */ + { +-unsigned long Flags; /* for spin lock */ +-SK_AC *pAC; ++ SK_AC *pAC = pNet->pAC; ++ unsigned long Flags; /* for spin lock */ + + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_ENTRY, + ("SkGeIocMib starts now...\n")); +- pAC = pNet->pAC; ++ + /* access MIB */ + spin_lock_irqsave(&pAC->SlowPathLock, Flags); + switch(mode) { +@@ -3127,17 +4056,18 @@ + SK_I32 Port; /* preferred port */ + SK_BOOL AutoSet; + SK_BOOL DupSet; +-int LinkSpeed = SK_LSPEED_AUTO; /* Link speed */ +-int AutoNeg = 1; /* autoneg off (0) or on (1) */ +-int DuplexCap = 0; /* 0=both,1=full,2=half */ +-int FlowCtrl = SK_FLOW_MODE_SYM_OR_REM; /* FlowControl */ +-int MSMode = SK_MS_MODE_AUTO; /* master/slave mode */ +- +-SK_BOOL IsConTypeDefined = SK_TRUE; +-SK_BOOL IsLinkSpeedDefined = SK_TRUE; +-SK_BOOL IsFlowCtrlDefined = SK_TRUE; +-SK_BOOL IsRoleDefined = SK_TRUE; +-SK_BOOL IsModeDefined = SK_TRUE; ++int LinkSpeed = SK_LSPEED_AUTO; /* Link speed */ ++int AutoNeg = 1; /* autoneg off (0) or on (1) */ ++int DuplexCap = 0; /* 0=both,1=full,2=half */ ++int FlowCtrl = SK_FLOW_MODE_SYM_OR_REM; /* FlowControl */ ++int MSMode = SK_MS_MODE_AUTO; /* master/slave mode */ ++int IrqModMaskOffset = 6; /* all ints moderated=default */ ++ ++SK_BOOL IsConTypeDefined = SK_TRUE; ++SK_BOOL IsLinkSpeedDefined = SK_TRUE; ++SK_BOOL IsFlowCtrlDefined = SK_TRUE; ++SK_BOOL IsRoleDefined = SK_TRUE; ++SK_BOOL IsModeDefined = SK_TRUE; + /* + * The two parameters AutoNeg. and DuplexCap. map to one configuration + * parameter. The mapping is described by this table: +@@ -3155,6 +4085,15 @@ + {SK_LMODE_AUTOBOTH , SK_LMODE_AUTOFULL , SK_LMODE_AUTOHALF }, + {SK_LMODE_AUTOSENSE, SK_LMODE_AUTOSENSE, SK_LMODE_AUTOSENSE} }; + ++SK_U32 IrqModMask[7][2] = ++ { { IRQ_MASK_RX_ONLY , Y2_DRIVER_IRQS }, ++ { IRQ_MASK_TX_ONLY , Y2_DRIVER_IRQS }, ++ { IRQ_MASK_SP_ONLY , Y2_SPECIAL_IRQS }, ++ { IRQ_MASK_SP_RX , Y2_IRQ_MASK }, ++ { IRQ_MASK_TX_RX , Y2_DRIVER_IRQS }, ++ { IRQ_MASK_SP_TX , Y2_IRQ_MASK }, ++ { IRQ_MASK_RX_TX_SP, Y2_IRQ_MASK } }; ++ + #define DC_BOTH 0 + #define DC_FULL 1 + #define DC_HALF 2 +@@ -3194,7 +4133,7 @@ + ** + ** This ConType parameter is used for all ports of the adapter! + */ +- if ( (ConType != NULL) && ++ if ( (ConType != NULL) && + (pAC->Index < SK_MAX_CARD_PARAM) && + (ConType[pAC->Index] != NULL) ) { + +@@ -3220,40 +4159,40 @@ + M_CurrPort.PMSMode = SK_MS_MODE_AUTO; + M_CurrPort.PLinkSpeed = SK_LSPEED_AUTO; + } +- } else if (strcmp(ConType[pAC->Index],"100FD")==0) { ++ } else if (strcmp(ConType[pAC->Index],"100FD")==0) { + for (Port = 0; Port < SK_MAX_MACS; Port++) { + M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_FULL]; + M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE; + M_CurrPort.PMSMode = SK_MS_MODE_AUTO; + M_CurrPort.PLinkSpeed = SK_LSPEED_100MBPS; + } +- } else if (strcmp(ConType[pAC->Index],"100HD")==0) { ++ } else if (strcmp(ConType[pAC->Index],"100HD")==0) { + for (Port = 0; Port < SK_MAX_MACS; Port++) { + M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_HALF]; + M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE; + M_CurrPort.PMSMode = SK_MS_MODE_AUTO; + M_CurrPort.PLinkSpeed = SK_LSPEED_100MBPS; + } +- } else if (strcmp(ConType[pAC->Index],"10FD")==0) { ++ } else if (strcmp(ConType[pAC->Index],"10FD")==0) { + for (Port = 0; Port < SK_MAX_MACS; Port++) { + M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_FULL]; + M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE; + M_CurrPort.PMSMode = SK_MS_MODE_AUTO; + M_CurrPort.PLinkSpeed = SK_LSPEED_10MBPS; + } +- } else if (strcmp(ConType[pAC->Index],"10HD")==0) { ++ } else if (strcmp(ConType[pAC->Index],"10HD")==0) { + for (Port = 0; Port < SK_MAX_MACS; Port++) { + M_CurrPort.PLinkModeConf = Capabilities[AN_OFF][DC_HALF]; + M_CurrPort.PFlowCtrlMode = SK_FLOW_MODE_NONE; + M_CurrPort.PMSMode = SK_MS_MODE_AUTO; + M_CurrPort.PLinkSpeed = SK_LSPEED_10MBPS; + } +- } else { ++ } else { + printk("sk98lin: Illegal value \"%s\" for ConType\n", + ConType[pAC->Index]); + IsConTypeDefined = SK_FALSE; /* Wrong ConType defined */ + } +- } else { ++ } else { + IsConTypeDefined = SK_FALSE; /* No ConType defined */ + } + +@@ -3272,14 +4211,30 @@ + } else if (strcmp(Speed_A[pAC->Index],"100")==0) { + LinkSpeed = SK_LSPEED_100MBPS; + } else if (strcmp(Speed_A[pAC->Index],"1000")==0) { +- LinkSpeed = SK_LSPEED_1000MBPS; ++ if ((pAC->PciDev->vendor == 0x11ab ) && ++ (pAC->PciDev->device == 0x4350)) { ++ LinkSpeed = SK_LSPEED_100MBPS; ++ printk("sk98lin: Illegal value \"%s\" for Speed_A.\n" ++ "Gigabit speed not possible with this chip revision!", ++ Speed_A[pAC->Index]); ++ } else { ++ LinkSpeed = SK_LSPEED_1000MBPS; ++ } + } else { + printk("sk98lin: Illegal value \"%s\" for Speed_A\n", + Speed_A[pAC->Index]); + IsLinkSpeedDefined = SK_FALSE; + } + } else { +- IsLinkSpeedDefined = SK_FALSE; ++ if ((pAC->PciDev->vendor == 0x11ab ) && ++ (pAC->PciDev->device == 0x4350)) { ++ /* Gigabit speed not supported ++ * Swith to speed 100 ++ */ ++ LinkSpeed = SK_LSPEED_100MBPS; ++ } else { ++ IsLinkSpeedDefined = SK_FALSE; ++ } + } + + /* +@@ -3374,9 +4329,6 @@ + } + + if (!AutoSet && DupSet) { +- printk("sk98lin: Port A: Duplex setting not" +- " possible in\n default AutoNegotiation mode" +- " (Sense).\n Using AutoNegotiation On\n"); + AutoNeg = AN_ON; + } + +@@ -3404,7 +4356,7 @@ + FlowCtrl = SK_FLOW_MODE_NONE; + } else { + printk("sk98lin: Illegal value \"%s\" for FlowCtrl_A\n", +- FlowCtrl_A[pAC->Index]); ++ FlowCtrl_A[pAC->Index]); + IsFlowCtrlDefined = SK_FALSE; + } + } else { +@@ -3496,7 +4448,7 @@ + ** Decide whether to set new config value if somethig valid has + ** been received. + */ +- if (IsLinkSpeedDefined) { ++ if (IsLinkSpeedDefined) { + pAC->GIni.GP[1].PLinkSpeed = LinkSpeed; + } + +@@ -3572,9 +4524,6 @@ + } + + if (!AutoSet && DupSet) { +- printk("sk98lin: Port B: Duplex setting not" +- " possible in\n default AutoNegotiation mode" +- " (Sense).\n Using AutoNegotiation On\n"); + AutoNeg = AN_ON; + } + +@@ -3687,11 +4636,15 @@ + } + + pAC->RlmtNets = 1; ++ pAC->RlmtMode = 0; + + if (RlmtMode != NULL && pAC->IndexIndex] != NULL) { + if (strcmp(RlmtMode[pAC->Index], "") == 0) { +- pAC->RlmtMode = 0; ++ if (pAC->GIni.GIMacsFound == 2) { ++ pAC->RlmtMode = SK_RLMT_CHECK_LINK; ++ pAC->RlmtNets = 2; ++ } + } else if (strcmp(RlmtMode[pAC->Index], "CheckLinkState") == 0) { + pAC->RlmtMode = SK_RLMT_CHECK_LINK; + } else if (strcmp(RlmtMode[pAC->Index], "CheckLocalPort") == 0) { +@@ -3712,12 +4665,37 @@ + pAC->RlmtMode = 0; + } + } else { +- pAC->RlmtMode = 0; ++ if (pAC->GIni.GIMacsFound == 2) { ++ pAC->RlmtMode = SK_RLMT_CHECK_LINK; ++ pAC->RlmtNets = 2; ++ } + } +- ++ ++#ifdef SK_YUKON2 ++ /* ++ ** use dualnet config per default ++ * ++ pAC->RlmtMode = SK_RLMT_CHECK_LINK; ++ pAC->RlmtNets = 2; ++ */ ++#endif ++ ++ ++ /* ++ ** Check the LowLatance parameters ++ */ ++ pAC->LowLatency = SK_FALSE; ++ if (LowLatency[pAC->Index] != NULL) { ++ if (strcmp(LowLatency[pAC->Index], "On") == 0) { ++ pAC->LowLatency = SK_TRUE; ++ } ++ } ++ ++ + /* + ** Check the interrupt moderation parameters + */ ++ pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE; + if (Moderation[pAC->Index] != NULL) { + if (strcmp(Moderation[pAC->Index], "") == 0) { + pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE; +@@ -3731,70 +4709,49 @@ + printk("sk98lin: Illegal value \"%s\" for Moderation.\n" + " Disable interrupt moderation.\n", + Moderation[pAC->Index]); +- pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE; +- } +- } else { +- pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_NONE; +- } +- +- if (Stats[pAC->Index] != NULL) { +- if (strcmp(Stats[pAC->Index], "Yes") == 0) { +- pAC->DynIrqModInfo.DisplayStats = SK_TRUE; +- } else { +- pAC->DynIrqModInfo.DisplayStats = SK_FALSE; + } + } else { +- pAC->DynIrqModInfo.DisplayStats = SK_FALSE; ++/* Set interrupt moderation if wished */ ++#ifdef CONFIG_SK98LIN_STATINT ++ pAC->DynIrqModInfo.IntModTypeSelect = C_INT_MOD_STATIC; ++#endif + } + + if (ModerationMask[pAC->Index] != NULL) { + if (strcmp(ModerationMask[pAC->Index], "Rx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_ONLY; ++ IrqModMaskOffset = 0; + } else if (strcmp(ModerationMask[pAC->Index], "Tx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_ONLY; ++ IrqModMaskOffset = 1; + } else if (strcmp(ModerationMask[pAC->Index], "Sp") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_ONLY; ++ IrqModMaskOffset = 2; + } else if (strcmp(ModerationMask[pAC->Index], "RxSp") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_RX; ++ IrqModMaskOffset = 3; + } else if (strcmp(ModerationMask[pAC->Index], "SpRx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_RX; ++ IrqModMaskOffset = 3; + } else if (strcmp(ModerationMask[pAC->Index], "RxTx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_RX; ++ IrqModMaskOffset = 4; + } else if (strcmp(ModerationMask[pAC->Index], "TxRx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_RX; ++ IrqModMaskOffset = 4; + } else if (strcmp(ModerationMask[pAC->Index], "TxSp") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_TX; ++ IrqModMaskOffset = 5; + } else if (strcmp(ModerationMask[pAC->Index], "SpTx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_SP_TX; +- } else if (strcmp(ModerationMask[pAC->Index], "RxTxSp") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP; +- } else if (strcmp(ModerationMask[pAC->Index], "RxSpTx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP; +- } else if (strcmp(ModerationMask[pAC->Index], "TxRxSp") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP; +- } else if (strcmp(ModerationMask[pAC->Index], "TxSpRx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP; +- } else if (strcmp(ModerationMask[pAC->Index], "SpTxRx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP; +- } else if (strcmp(ModerationMask[pAC->Index], "SpRxTx") == 0) { +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_TX_SP; +- } else { /* some rubbish */ +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_RX_ONLY; +- } +- } else { /* operator has stated nothing */ +- pAC->DynIrqModInfo.MaskIrqModeration = IRQ_MASK_TX_RX; +- } +- +- if (AutoSizing[pAC->Index] != NULL) { +- if (strcmp(AutoSizing[pAC->Index], "On") == 0) { +- pAC->DynIrqModInfo.AutoSizing = SK_FALSE; +- } else { +- pAC->DynIrqModInfo.AutoSizing = SK_FALSE; ++ IrqModMaskOffset = 5; ++ } else { /* some rubbish stated */ ++ // IrqModMaskOffset = 6; ->has been initialized ++ // already at the begin of this function... + } +- } else { /* operator has stated nothing */ +- pAC->DynIrqModInfo.AutoSizing = SK_FALSE; ++ } ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ pAC->DynIrqModInfo.MaskIrqModeration = IrqModMask[IrqModMaskOffset][0]; ++ } else { ++ pAC->DynIrqModInfo.MaskIrqModeration = IrqModMask[IrqModMaskOffset][1]; + } + ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ pAC->DynIrqModInfo.MaxModIntsPerSec = C_INTS_PER_SEC_DEFAULT; ++ } else { ++ pAC->DynIrqModInfo.MaxModIntsPerSec = C_Y2_INTS_PER_SEC_DEFAULT; ++ } + if (IntsPerSec[pAC->Index] != 0) { + if ((IntsPerSec[pAC->Index]< C_INT_MOD_IPS_LOWER_RANGE) || + (IntsPerSec[pAC->Index] > C_INT_MOD_IPS_UPPER_RANGE)) { +@@ -3803,28 +4760,25 @@ + IntsPerSec[pAC->Index], + C_INT_MOD_IPS_LOWER_RANGE, + C_INT_MOD_IPS_UPPER_RANGE, +- C_INTS_PER_SEC_DEFAULT); +- pAC->DynIrqModInfo.MaxModIntsPerSec = C_INTS_PER_SEC_DEFAULT; ++ pAC->DynIrqModInfo.MaxModIntsPerSec); + } else { + pAC->DynIrqModInfo.MaxModIntsPerSec = IntsPerSec[pAC->Index]; + } +- } else { +- pAC->DynIrqModInfo.MaxModIntsPerSec = C_INTS_PER_SEC_DEFAULT; +- } ++ } + + /* + ** Evaluate upper and lower moderation threshold + */ + pAC->DynIrqModInfo.MaxModIntsPerSecUpperLimit = + pAC->DynIrqModInfo.MaxModIntsPerSec + +- (pAC->DynIrqModInfo.MaxModIntsPerSec / 2); ++ (pAC->DynIrqModInfo.MaxModIntsPerSec / 5); + + pAC->DynIrqModInfo.MaxModIntsPerSecLowerLimit = + pAC->DynIrqModInfo.MaxModIntsPerSec - +- (pAC->DynIrqModInfo.MaxModIntsPerSec / 2); +- +- pAC->DynIrqModInfo.PrevTimeVal = jiffies; /* initial value */ ++ (pAC->DynIrqModInfo.MaxModIntsPerSec / 5); + ++ pAC->DynIrqModInfo.DynIrqModSampleInterval = ++ SK_DRV_MODERATION_TIMER_LENGTH; + + } /* GetConfiguration */ + +@@ -3860,45 +4814,6 @@ + } + } /* ProductStr */ + +-/***************************************************************************** +- * +- * StartDrvCleanupTimer - Start timer to check for descriptors which +- * might be placed in descriptor ring, but +- * havent been handled up to now +- * +- * Description: +- * This function requests a HW-timer fo the Yukon card. The actions to +- * perform when this timer expires, are located in the SkDrvEvent(). +- * +- * Returns: N/A +- */ +-static void +-StartDrvCleanupTimer(SK_AC *pAC) { +- SK_EVPARA EventParam; /* Event struct for timer event */ +- +- SK_MEMSET((char *) &EventParam, 0, sizeof(EventParam)); +- EventParam.Para32[0] = SK_DRV_RX_CLEANUP_TIMER; +- SkTimerStart(pAC, pAC->IoBase, &pAC->DrvCleanupTimer, +- SK_DRV_RX_CLEANUP_TIMER_LENGTH, +- SKGE_DRV, SK_DRV_TIMER, EventParam); +-} +- +-/***************************************************************************** +- * +- * StopDrvCleanupTimer - Stop timer to check for descriptors +- * +- * Description: +- * This function requests a HW-timer fo the Yukon card. The actions to +- * perform when this timer expires, are located in the SkDrvEvent(). +- * +- * Returns: N/A +- */ +-static void +-StopDrvCleanupTimer(SK_AC *pAC) { +- SkTimerStop(pAC, pAC->IoBase, &pAC->DrvCleanupTimer); +- SK_MEMSET((char *) &pAC->DrvCleanupTimer, 0, sizeof(SK_TIMER)); +-} +- + /****************************************************************************/ + /* functions for common modules *********************************************/ + /****************************************************************************/ +@@ -3987,7 +4902,9 @@ + SK_U64 SkOsGetTime(SK_AC *pAC) + { + SK_U64 PrivateJiffies; ++ + SkOsGetTimeCurrent(pAC, &PrivateJiffies); ++ + return PrivateJiffies; + } /* SkOsGetTime */ + +@@ -4142,29 +5059,26 @@ + * + */ + int SkDrvEvent( +-SK_AC *pAC, /* pointer to adapter context */ +-SK_IOC IoC, /* io-context */ +-SK_U32 Event, /* event-id */ +-SK_EVPARA Param) /* event-parameter */ +-{ +-SK_MBUF *pRlmtMbuf; /* pointer to a rlmt-mbuf structure */ +-struct sk_buff *pMsg; /* pointer to a message block */ +-int FromPort; /* the port from which we switch away */ +-int ToPort; /* the port we switch to */ +-SK_EVPARA NewPara; /* parameter for further events */ +-int Stat; +-unsigned long Flags; +-SK_BOOL DualNet; ++SK_AC *pAC, /* pointer to adapter context */ ++SK_IOC IoC, /* IO control context */ ++SK_U32 Event, /* event-id */ ++SK_EVPARA Param) /* event-parameter */ ++{ ++ SK_MBUF *pRlmtMbuf; /* pointer to a rlmt-mbuf structure */ ++ struct sk_buff *pMsg; /* pointer to a message block */ ++ SK_BOOL DualNet; ++ SK_U32 Reason; ++ unsigned long Flags; ++ int FromPort; /* the port from which we switch away */ ++ int ToPort; /* the port we switch to */ ++ int Stat; ++ DEV_NET *pNet = NULL; ++#ifdef CONFIG_SK98LIN_NAPI ++ int WorkToDo = 1; /* min(*budget, dev->quota); */ ++ int WorkDone = 0; ++#endif + + switch (Event) { +- case SK_DRV_ADAP_FAIL: +- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, +- ("ADAPTER FAIL EVENT\n")); +- printk("%s: Adapter failed.\n", pAC->dev[0]->name); +- /* disable interrupts */ +- SK_OUT32(pAC->IoBase, B0_IMSK, 0); +- /* cgoos */ +- break; + case SK_DRV_PORT_FAIL: + FromPort = Param.Para32[0]; + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, +@@ -4174,219 +5088,294 @@ + } else { + printk("%s: Port B failed.\n", pAC->dev[1]->name); + } +- /* cgoos */ + break; +- case SK_DRV_PORT_RESET: /* SK_U32 PortIdx */ +- /* action list 4 */ ++ case SK_DRV_PORT_RESET: + FromPort = Param.Para32[0]; + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, + ("PORT RESET EVENT, Port: %d ", FromPort)); +- NewPara.Para64 = FromPort; +- SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_XMAC_RESET, NewPara); ++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET, ++ FromPort, SK_FALSE); + spin_lock_irqsave( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); +- +- SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_HARD_RST); ++ if (CHIP_ID_YUKON_2(pAC)) { ++ SkY2PortStop(pAC, IoC, FromPort, SK_STOP_ALL, SK_HARD_RST); ++ } else { ++ SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_HARD_RST); ++ } + pAC->dev[Param.Para32[0]]->flags &= ~IFF_RUNNING; + spin_unlock_irqrestore( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); + +- /* clear rx ring from received frames */ +- ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE); +- +- ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]); ++ if (!CHIP_ID_YUKON_2(pAC)) { ++#ifdef CONFIG_SK98LIN_NAPI ++ WorkToDo = 1; ++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE, &WorkDone, WorkToDo); ++#else ++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE); ++#endif ++ ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]); ++ } + spin_lock_irqsave( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); +- +- /* tschilling: Handling of return value inserted. */ +- if (SkGeInitPort(pAC, IoC, FromPort)) { +- if (FromPort == 0) { +- printk("%s: SkGeInitPort A failed.\n", pAC->dev[0]->name); ++ ++#ifdef USE_TIST_FOR_RESET ++ if (pAC->GIni.GIYukon2) { ++#ifdef Y2_RECOVERY ++ /* for Yukon II we want to have tist enabled all the time */ ++ if (!SK_ADAPTER_WAITING_FOR_TIST(pAC)) { ++ Y2_ENABLE_TIST(pAC->IoBase); ++ } ++#else ++ /* make sure that we do not accept any status LEs from now on */ ++ if (SK_ADAPTER_WAITING_FOR_TIST(pAC)) { ++#endif ++ /* port already waiting for tist */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Port %c is now waiting for specific Tist\n", ++ 'A' + FromPort)); ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST, ++ FromPort); ++ /* get current timestamp */ ++ Y2_GET_TIST_LOW_VAL(pAC->IoBase, &pAC->MinTistLo); ++ pAC->MinTistHi = pAC->GIni.GITimeStampCnt; ++#ifndef Y2_RECOVERY + } else { +- printk("%s: SkGeInitPort B failed.\n", pAC->dev[1]->name); ++ /* nobody is waiting yet */ ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_ANY_TIST, ++ FromPort); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Port %c is now waiting for any Tist (0x%X)\n", ++ 'A' + FromPort, pAC->AdapterResetState)); ++ /* start tist */ ++ Y2_ENABLE_TIST(pAC-IoBase); ++ } ++#endif ++ } ++#endif ++ ++#ifdef Y2_LE_CHECK ++ /* mark entries invalid */ ++ pAC->LastPort = 3; ++ pAC->LastOpc = 0xFF; ++#endif ++ if (CHIP_ID_YUKON_2(pAC)) { ++ SkY2PortStart(pAC, IoC, FromPort); ++ } else { ++ /* tschilling: Handling of return value inserted. */ ++ if (SkGeInitPort(pAC, IoC, FromPort)) { ++ if (FromPort == 0) { ++ printk("%s: SkGeInitPort A failed.\n", pAC->dev[0]->name); ++ } else { ++ printk("%s: SkGeInitPort B failed.\n", pAC->dev[1]->name); ++ } + } ++ SkAddrMcUpdate(pAC,IoC, FromPort); ++ PortReInitBmu(pAC, FromPort); ++ SkGePollTxD(pAC, IoC, FromPort, SK_TRUE); ++ CLEAR_AND_START_RX(FromPort); + } +- SkAddrMcUpdate(pAC,IoC, FromPort); +- PortReInitBmu(pAC, FromPort); +- SkGePollTxD(pAC, IoC, FromPort, SK_TRUE); +- ClearAndStartRx(pAC, FromPort); + spin_unlock_irqrestore( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); + break; +- case SK_DRV_NET_UP: /* SK_U32 PortIdx */ +- /* action list 5 */ ++ case SK_DRV_NET_UP: + FromPort = Param.Para32[0]; + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, +- ("NET UP EVENT, Port: %d ", Param.Para32[0])); +- /* Mac update */ +- SkAddrMcUpdate(pAC,IoC, FromPort); +- ++ ("NET UP EVENT, Port: %d ", FromPort)); ++ SkAddrMcUpdate(pAC,IoC, FromPort); /* Mac update */ + if (DoPrintInterfaceChange) { +- printk("%s: network connection up using" +- " port %c\n", pAC->dev[Param.Para32[0]]->name, 'A'+Param.Para32[0]); ++ printk("%s: network connection up using port %c\n", ++ pAC->dev[FromPort]->name, 'A'+FromPort); + +- /* tschilling: Values changed according to LinkSpeedUsed. */ +- Stat = pAC->GIni.GP[FromPort].PLinkSpeedUsed; +- if (Stat == SK_LSPEED_STAT_10MBPS) { +- printk(" speed: 10\n"); +- } else if (Stat == SK_LSPEED_STAT_100MBPS) { +- printk(" speed: 100\n"); +- } else if (Stat == SK_LSPEED_STAT_1000MBPS) { +- printk(" speed: 1000\n"); +- } else { +- printk(" speed: unknown\n"); +- } ++ /* tschilling: Values changed according to LinkSpeedUsed. */ ++ Stat = pAC->GIni.GP[FromPort].PLinkSpeedUsed; ++ if (Stat == SK_LSPEED_STAT_10MBPS) { ++ printk(" speed: 10\n"); ++ } else if (Stat == SK_LSPEED_STAT_100MBPS) { ++ printk(" speed: 100\n"); ++ } else if (Stat == SK_LSPEED_STAT_1000MBPS) { ++ printk(" speed: 1000\n"); ++ } else { ++ printk(" speed: unknown\n"); ++ } + ++ Stat = pAC->GIni.GP[FromPort].PLinkModeStatus; ++ if ((Stat == SK_LMODE_STAT_AUTOHALF) || ++ (Stat == SK_LMODE_STAT_AUTOFULL)) { ++ printk(" autonegotiation: yes\n"); ++ } else { ++ printk(" autonegotiation: no\n"); ++ } + +- Stat = pAC->GIni.GP[FromPort].PLinkModeStatus; +- if (Stat == SK_LMODE_STAT_AUTOHALF || +- Stat == SK_LMODE_STAT_AUTOFULL) { +- printk(" autonegotiation: yes\n"); +- } +- else { +- printk(" autonegotiation: no\n"); +- } +- if (Stat == SK_LMODE_STAT_AUTOHALF || +- Stat == SK_LMODE_STAT_HALF) { +- printk(" duplex mode: half\n"); +- } +- else { +- printk(" duplex mode: full\n"); +- } +- Stat = pAC->GIni.GP[FromPort].PFlowCtrlStatus; +- if (Stat == SK_FLOW_STAT_REM_SEND ) { +- printk(" flowctrl: remote send\n"); +- } +- else if (Stat == SK_FLOW_STAT_LOC_SEND ){ +- printk(" flowctrl: local send\n"); +- } +- else if (Stat == SK_FLOW_STAT_SYMMETRIC ){ +- printk(" flowctrl: symmetric\n"); +- } +- else { +- printk(" flowctrl: none\n"); +- } +- +- /* tschilling: Check against CopperType now. */ +- if ((pAC->GIni.GICopperType == SK_TRUE) && +- (pAC->GIni.GP[FromPort].PLinkSpeedUsed == +- SK_LSPEED_STAT_1000MBPS)) { +- Stat = pAC->GIni.GP[FromPort].PMSStatus; +- if (Stat == SK_MS_STAT_MASTER ) { +- printk(" role: master\n"); ++ if ((Stat == SK_LMODE_STAT_AUTOHALF) || ++ (Stat == SK_LMODE_STAT_HALF)) { ++ printk(" duplex mode: half\n"); ++ } else { ++ printk(" duplex mode: full\n"); + } +- else if (Stat == SK_MS_STAT_SLAVE ) { +- printk(" role: slave\n"); ++ ++ Stat = pAC->GIni.GP[FromPort].PFlowCtrlStatus; ++ if (Stat == SK_FLOW_STAT_REM_SEND ) { ++ printk(" flowctrl: remote send\n"); ++ } else if (Stat == SK_FLOW_STAT_LOC_SEND ) { ++ printk(" flowctrl: local send\n"); ++ } else if (Stat == SK_FLOW_STAT_SYMMETRIC ) { ++ printk(" flowctrl: symmetric\n"); ++ } else { ++ printk(" flowctrl: none\n"); + } +- else { +- printk(" role: ???\n"); ++ ++ /* tschilling: Check against CopperType now. */ ++ if ((pAC->GIni.GICopperType == SK_TRUE) && ++ (pAC->GIni.GP[FromPort].PLinkSpeedUsed == ++ SK_LSPEED_STAT_1000MBPS)) { ++ Stat = pAC->GIni.GP[FromPort].PMSStatus; ++ if (Stat == SK_MS_STAT_MASTER ) { ++ printk(" role: master\n"); ++ } else if (Stat == SK_MS_STAT_SLAVE ) { ++ printk(" role: slave\n"); ++ } else { ++ printk(" role: ???\n"); ++ } + } +- } + +- /* +- Display dim (dynamic interrupt moderation) +- informations +- */ +- if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC) +- printk(" irq moderation: static (%d ints/sec)\n", ++ /* Display interrupt moderation informations */ ++ if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC) { ++ printk(" irq moderation: static (%d ints/sec)\n", + pAC->DynIrqModInfo.MaxModIntsPerSec); +- else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC) +- printk(" irq moderation: dynamic (%d ints/sec)\n", ++ } else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC) { ++ printk(" irq moderation: dynamic (%d ints/sec)\n", + pAC->DynIrqModInfo.MaxModIntsPerSec); +- else +- printk(" irq moderation: disabled\n"); ++ } else { ++ printk(" irq moderation: disabled\n"); ++ } ++ ++#ifdef NETIF_F_TSO ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (pAC->dev[FromPort]->features & NETIF_F_TSO) { ++ printk(" tcp offload: enabled\n"); ++ } else { ++ printk(" tcp offload: disabled\n"); ++ } ++ } ++#endif + ++ if (pAC->dev[FromPort]->features & NETIF_F_SG) { ++ printk(" scatter-gather: enabled\n"); ++ } else { ++ printk(" scatter-gather: disabled\n"); ++ } + +-#ifdef SK_ZEROCOPY +- if (pAC->ChipsetType) +-#ifdef USE_SK_TX_CHECKSUM +- printk(" scatter-gather: enabled\n"); +-#else +- printk(" tx-checksum: disabled\n"); +-#endif +- else +- printk(" scatter-gather: disabled\n"); +-#else +- printk(" scatter-gather: disabled\n"); +-#endif ++ if (pAC->dev[FromPort]->features & NETIF_F_IP_CSUM) { ++ printk(" tx-checksum: enabled\n"); ++ } else { ++ printk(" tx-checksum: disabled\n"); ++ } + +-#ifndef USE_SK_RX_CHECKSUM +- printk(" rx-checksum: disabled\n"); ++ if (pAC->RxPort[FromPort].UseRxCsum) { ++ printk(" rx-checksum: enabled\n"); ++ } else { ++ printk(" rx-checksum: disabled\n"); ++ } ++#ifdef CONFIG_SK98LIN_NAPI ++ printk(" rx-polling: enabled\n"); + #endif +- ++ if (pAC->LowLatency) { ++ printk(" low latency: enabled\n"); ++ } + } else { +- DoPrintInterfaceChange = SK_TRUE; +- } ++ DoPrintInterfaceChange = SK_TRUE; ++ } + +- if ((Param.Para32[0] != pAC->ActivePort) && +- (pAC->RlmtNets == 1)) { +- NewPara.Para32[0] = pAC->ActivePort; +- NewPara.Para32[1] = Param.Para32[0]; +- SkEventQueue(pAC, SKGE_DRV, SK_DRV_SWITCH_INTERN, +- NewPara); ++ if ((FromPort != pAC->ActivePort)&&(pAC->RlmtNets == 1)) { ++ SkLocalEventQueue(pAC, SKGE_DRV, SK_DRV_SWITCH_INTERN, ++ pAC->ActivePort, FromPort, SK_FALSE); + } + + /* Inform the world that link protocol is up. */ +- pAC->dev[Param.Para32[0]]->flags |= IFF_RUNNING; +- ++ netif_wake_queue(pAC->dev[FromPort]); ++ netif_carrier_on(pAC->dev[FromPort]); ++ pAC->dev[FromPort]->flags |= IFF_RUNNING; + break; +- case SK_DRV_NET_DOWN: /* SK_U32 Reason */ +- /* action list 7 */ ++ case SK_DRV_NET_DOWN: ++ Reason = Param.Para32[0]; ++ FromPort = Param.Para32[1]; + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, + ("NET DOWN EVENT ")); ++ ++ /* Stop queue and carrier */ ++ netif_stop_queue(pAC->dev[FromPort]); ++ netif_carrier_off(pAC->dev[FromPort]); ++ ++ /* Print link change */ + if (DoPrintInterfaceChange) { +- printk("%s: network connection down\n", +- pAC->dev[Param.Para32[1]]->name); ++ if (pAC->dev[FromPort]->flags & IFF_RUNNING) { ++ printk("%s: network connection down\n", ++ pAC->dev[FromPort]->name); ++ } + } else { + DoPrintInterfaceChange = SK_TRUE; + } +- pAC->dev[Param.Para32[1]]->flags &= ~IFF_RUNNING; ++ pAC->dev[FromPort]->flags &= ~IFF_RUNNING; + break; +- case SK_DRV_SWITCH_HARD: /* SK_U32 FromPortIdx SK_U32 ToPortIdx */ +- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, +- ("PORT SWITCH HARD ")); +- case SK_DRV_SWITCH_SOFT: /* SK_U32 FromPortIdx SK_U32 ToPortIdx */ +- /* action list 6 */ +- printk("%s: switching to port %c\n", pAC->dev[0]->name, +- 'A'+Param.Para32[1]); +- case SK_DRV_SWITCH_INTERN: /* SK_U32 FromPortIdx SK_U32 ToPortIdx */ ++ case SK_DRV_SWITCH_HARD: /* FALL THRU */ ++ case SK_DRV_SWITCH_SOFT: /* FALL THRU */ ++ case SK_DRV_SWITCH_INTERN: + FromPort = Param.Para32[0]; +- ToPort = Param.Para32[1]; ++ ToPort = Param.Para32[1]; ++ printk("%s: switching from port %c to port %c\n", ++ pAC->dev[0]->name, 'A'+FromPort, 'A'+ToPort); + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, + ("PORT SWITCH EVENT, From: %d To: %d (Pref %d) ", + FromPort, ToPort, pAC->Rlmt.Net[0].PrefPort)); +- NewPara.Para64 = FromPort; +- SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_XMAC_RESET, NewPara); +- NewPara.Para64 = ToPort; +- SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_XMAC_RESET, NewPara); ++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET, ++ FromPort, SK_FALSE); ++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET, ++ ToPort, SK_FALSE); + spin_lock_irqsave( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); + spin_lock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock); +- SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST); +- SkGeStopPort(pAC, IoC, ToPort, SK_STOP_ALL, SK_SOFT_RST); ++ if (CHIP_ID_YUKON_2(pAC)) { ++ SkY2PortStop(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST); ++ SkY2PortStop(pAC, IoC, ToPort, SK_STOP_ALL, SK_SOFT_RST); ++ } ++ else { ++ SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST); ++ SkGeStopPort(pAC, IoC, ToPort, SK_STOP_ALL, SK_SOFT_RST); ++ } + spin_unlock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock); + spin_unlock_irqrestore( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); + +- ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE); /* clears rx ring */ +- ReceiveIrq(pAC, &pAC->RxPort[ToPort], SK_FALSE); /* clears rx ring */ + +- ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]); +- ClearTxRing(pAC, &pAC->TxPort[ToPort][TX_PRIO_LOW]); ++ if (!CHIP_ID_YUKON_2(pAC)) { ++#ifdef CONFIG_SK98LIN_NAPI ++ WorkToDo = 1; ++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE, &WorkDone, WorkToDo); ++ ReceiveIrq(pAC, &pAC->RxPort[ToPort], SK_FALSE, &WorkDone, WorkToDo); ++#else ++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE); /* clears rx ring */ ++ ReceiveIrq(pAC, &pAC->RxPort[ToPort], SK_FALSE); /* clears rx ring */ ++#endif ++ ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]); ++ ClearTxRing(pAC, &pAC->TxPort[ToPort][TX_PRIO_LOW]); ++ } ++ + spin_lock_irqsave( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); + spin_lock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock); + pAC->ActivePort = ToPort; +-#if 0 +- SetQueueSizes(pAC); +-#else ++ + /* tschilling: New common function with minimum size check. */ + DualNet = SK_FALSE; + if (pAC->RlmtNets == 2) { +@@ -4404,76 +5393,340 @@ + printk("SkGeInitAssignRamToQueues failed.\n"); + break; + } +-#endif +- /* tschilling: Handling of return values inserted. */ +- if (SkGeInitPort(pAC, IoC, FromPort) || +- SkGeInitPort(pAC, IoC, ToPort)) { +- printk("%s: SkGeInitPort failed.\n", pAC->dev[0]->name); ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ /* tschilling: Handling of return values inserted. */ ++ if (SkGeInitPort(pAC, IoC, FromPort) || ++ SkGeInitPort(pAC, IoC, ToPort)) { ++ printk("%s: SkGeInitPort failed.\n", pAC->dev[0]->name); ++ } + } +- if (Event == SK_DRV_SWITCH_SOFT) { +- SkMacRxTxEnable(pAC, IoC, FromPort); ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if (Event == SK_DRV_SWITCH_SOFT) { ++ SkMacRxTxEnable(pAC, IoC, FromPort); ++ } ++ SkMacRxTxEnable(pAC, IoC, ToPort); + } +- SkMacRxTxEnable(pAC, IoC, ToPort); ++ + SkAddrSwap(pAC, IoC, FromPort, ToPort); + SkAddrMcUpdate(pAC, IoC, FromPort); + SkAddrMcUpdate(pAC, IoC, ToPort); +- PortReInitBmu(pAC, FromPort); +- PortReInitBmu(pAC, ToPort); +- SkGePollTxD(pAC, IoC, FromPort, SK_TRUE); +- SkGePollTxD(pAC, IoC, ToPort, SK_TRUE); +- ClearAndStartRx(pAC, FromPort); +- ClearAndStartRx(pAC, ToPort); ++ ++#ifdef USE_TIST_FOR_RESET ++ if (pAC->GIni.GIYukon2) { ++ /* make sure that we do not accept any status LEs from now on */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("both Ports now waiting for specific Tist\n")); ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_ANY_TIST, ++ 0); ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_ANY_TIST, ++ 1); ++ ++ /* start tist */ ++ Y2_ENABLE_TIST(pAC->IoBase); ++ } ++#endif ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ PortReInitBmu(pAC, FromPort); ++ PortReInitBmu(pAC, ToPort); ++ SkGePollTxD(pAC, IoC, FromPort, SK_TRUE); ++ SkGePollTxD(pAC, IoC, ToPort, SK_TRUE); ++ CLEAR_AND_START_RX(FromPort); ++ CLEAR_AND_START_RX(ToPort); ++ } else { ++ SkY2PortStart(pAC, IoC, FromPort); ++ SkY2PortStart(pAC, IoC, ToPort); ++#ifdef SK_YUKON2 ++ /* in yukon-II always port 0 has to be started first */ ++ // SkY2PortStart(pAC, IoC, 0); ++ // SkY2PortStart(pAC, IoC, 1); ++#endif ++ } + spin_unlock(&pAC->TxPort[ToPort][TX_PRIO_LOW].TxDesRingLock); + spin_unlock_irqrestore( + &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, + Flags); + break; + case SK_DRV_RLMT_SEND: /* SK_MBUF *pMb */ +- SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, +- ("RLS ")); ++ SK_DBG_MSG(NULL,SK_DBGMOD_DRV,SK_DBGCAT_DRV_EVENT,("RLS ")); + pRlmtMbuf = (SK_MBUF*) Param.pParaPtr; + pMsg = (struct sk_buff*) pRlmtMbuf->pOs; + skb_put(pMsg, pRlmtMbuf->Length); +- if (XmitFrame(pAC, &pAC->TxPort[pRlmtMbuf->PortIdx][TX_PRIO_LOW], +- pMsg) < 0) +- +- DEV_KFREE_SKB_ANY(pMsg); ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ if (XmitFrame(pAC, &pAC->TxPort[pRlmtMbuf->PortIdx][TX_PRIO_LOW], ++ pMsg) < 0) { ++ DEV_KFREE_SKB_ANY(pMsg); ++ } ++ } else { ++ if (SkY2RlmtSend(pAC, pRlmtMbuf->PortIdx, pMsg) < 0) { ++ DEV_KFREE_SKB_ANY(pMsg); ++ } ++ } + break; + case SK_DRV_TIMER: + if (Param.Para32[0] == SK_DRV_MODERATION_TIMER) { +- /* +- ** expiration of the moderation timer implies that +- ** dynamic moderation is to be applied +- */ ++ /* check what IRQs are to be moderated */ + SkDimStartModerationTimer(pAC); + SkDimModerate(pAC); +- if (pAC->DynIrqModInfo.DisplayStats) { +- SkDimDisplayModerationSettings(pAC); +- } +- } else if (Param.Para32[0] == SK_DRV_RX_CLEANUP_TIMER) { +- /* +- ** check if we need to check for descriptors which +- ** haven't been handled the last millisecs +- */ +- StartDrvCleanupTimer(pAC); +- if (pAC->GIni.GIMacsFound == 2) { +- ReceiveIrq(pAC, &pAC->RxPort[1], SK_FALSE); +- } +- ReceiveIrq(pAC, &pAC->RxPort[0], SK_FALSE); + } else { + printk("Expiration of unknown timer\n"); + } + break; ++ case SK_DRV_ADAP_FAIL: ++#if (!defined (Y2_RECOVERY) && !defined (Y2_LE_CHECK)) ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, ++ ("ADAPTER FAIL EVENT\n")); ++ printk("%s: Adapter failed.\n", pAC->dev[0]->name); ++ SK_OUT32(pAC->IoBase, B0_IMSK, 0); /* disable interrupts */ ++ break; ++#endif ++ ++#if (defined (Y2_RECOVERY) || defined (Y2_LE_CHECK)) ++ case SK_DRV_RECOVER: ++ pNet = (DEV_NET *) pAC->dev[0]->priv; ++ ++ /* Recover already in progress */ ++ if (pNet->InRecover) { ++ break; ++ } ++ ++ netif_stop_queue(pAC->dev[0]); /* stop device if running */ ++ pNet->InRecover = SK_TRUE; ++ ++ FromPort = Param.Para32[0]; ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, ++ ("PORT RESET EVENT, Port: %d ", FromPort)); ++ ++ /* Disable interrupts */ ++ SK_OUT32(pAC->IoBase, B0_IMSK, 0); ++ SK_OUT32(pAC->IoBase, B0_HWE_IMSK, 0); ++ ++ SkLocalEventQueue64(pAC, SKGE_PNMI, SK_PNMI_EVT_XMAC_RESET, ++ FromPort, SK_FALSE); ++ spin_lock_irqsave( ++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, ++ Flags); ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (pAC->GIni.GIMacsFound > 1) { ++ SkY2PortStop(pAC, IoC, 0, SK_STOP_ALL, SK_SOFT_RST); ++ SkY2PortStop(pAC, IoC, 1, SK_STOP_ALL, SK_SOFT_RST); ++ } else { ++ SkY2PortStop(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST); ++ } ++ } else { ++ SkGeStopPort(pAC, IoC, FromPort, SK_STOP_ALL, SK_SOFT_RST); ++ } ++ pAC->dev[Param.Para32[0]]->flags &= ~IFF_RUNNING; ++ spin_unlock_irqrestore( ++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, ++ Flags); ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++#ifdef CONFIG_SK98LIN_NAPI ++ WorkToDo = 1; ++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE, &WorkDone, WorkToDo); ++#else ++ ReceiveIrq(pAC, &pAC->RxPort[FromPort], SK_FALSE); ++#endif ++ ClearTxRing(pAC, &pAC->TxPort[FromPort][TX_PRIO_LOW]); ++ } ++ spin_lock_irqsave( ++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, ++ Flags); ++ ++#ifdef USE_TIST_FOR_RESET ++ if (pAC->GIni.GIYukon2) { ++#if 0 ++ /* make sure that we do not accept any status LEs from now on */ ++ Y2_ENABLE_TIST(pAC->IoBase); ++ ++ /* get current timestamp */ ++ Y2_GET_TIST_LOW_VAL(pAC->IoBase, &pAC->MinTistLo); ++ pAC->MinTistHi = pAC->GIni.GITimeStampCnt; ++ ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_SPECIFIC_TIST, ++ FromPort); ++#endif ++ if (pAC->GIni.GIMacsFound > 1) { ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_ANY_TIST, ++ 0); ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_ANY_TIST, ++ 1); ++ } else { ++ SK_SET_WAIT_BIT_FOR_PORT( ++ pAC, ++ SK_PSTATE_WAITING_FOR_ANY_TIST, ++ FromPort); ++ } ++ ++ /* start tist */ ++ Y2_ENABLE_TIST(pAC->IoBase); ++ } ++#endif ++ ++ ++ ++#ifdef Y2_LE_CHECK ++ /* mark entries invalid */ ++ pAC->LastPort = 3; ++ pAC->LastOpc = 0xFF; ++#endif ++ ++#endif ++ /* Restart ports but do not initialize PHY. */ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (pAC->GIni.GIMacsFound > 1) { ++ SkY2PortStart(pAC, IoC, 0); ++ SkY2PortStart(pAC, IoC, 1); ++ } else { ++ SkY2PortStart(pAC, IoC, FromPort); ++ } ++ } else { ++ /* tschilling: Handling of return value inserted. */ ++ if (SkGeInitPort(pAC, IoC, FromPort)) { ++ if (FromPort == 0) { ++ printk("%s: SkGeInitPort A failed.\n", pAC->dev[0]->name); ++ } else { ++ printk("%s: SkGeInitPort B failed.\n", pAC->dev[1]->name); ++ } ++ } ++ SkAddrMcUpdate(pAC,IoC, FromPort); ++ PortReInitBmu(pAC, FromPort); ++ SkGePollTxD(pAC, IoC, FromPort, SK_TRUE); ++ CLEAR_AND_START_RX(FromPort); ++ } ++ spin_unlock_irqrestore( ++ &pAC->TxPort[FromPort][TX_PRIO_LOW].TxDesRingLock, ++ Flags); ++ ++#if 0 ++ /* restart the kernel timer */ ++ pNet = (DEV_NET *) pAC->dev[FromPort]->priv; ++ if (!timer_pending(&pNet->KernelTimer)) { ++ pNet->KernelTimer.expires = ++ jiffies + (HZ/4); /* 250ms */ ++ add_timer(&pNet->KernelTimer); ++ } ++#endif ++ pNet->InRecover = SK_FALSE; ++ /* enable Interrupts */ ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ SK_OUT32(pAC->IoBase, B0_HWE_IMSK, IRQ_HWE_MASK); ++ netif_wake_queue(pAC->dev[0]); ++ break; + default: + break; + } + SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_EVENT, + ("END EVENT ")); +- ++ + return (0); + } /* SkDrvEvent */ + + ++/****************************************************************************** ++ * ++ * SkLocalEventQueue() - add event to queue ++ * ++ * Description: ++ * This function adds an event to the event queue and run the ++ * SkEventDispatcher. At least Init Level 1 is required to queue events, ++ * but will be scheduled add Init Level 2. ++ * ++ * returns: ++ * nothing ++ */ ++void SkLocalEventQueue( ++SK_AC *pAC, /* Adapters context */ ++SK_U32 Class, /* Event Class */ ++SK_U32 Event, /* Event to be queued */ ++SK_U32 Param1, /* Event parameter 1 */ ++SK_U32 Param2, /* Event parameter 2 */ ++SK_BOOL Dispatcher) /* Dispatcher flag: ++ * TRUE == Call SkEventDispatcher ++ * FALSE == Don't execute SkEventDispatcher ++ */ ++{ ++ SK_EVPARA EvPara; ++ EvPara.Para32[0] = Param1; ++ EvPara.Para32[1] = Param2; ++ ++ ++ if (Class == SKGE_PNMI) { ++ SkPnmiEvent( pAC, ++ pAC->IoBase, ++ Event, ++ EvPara); ++ } else { ++ SkEventQueue( pAC, ++ Class, ++ Event, ++ EvPara); ++ } ++ ++ /* Run the dispatcher */ ++ if (Dispatcher) { ++ SkEventDispatcher(pAC, pAC->IoBase); ++ } ++ ++} ++ ++/****************************************************************************** ++ * ++ * SkLocalEventQueue64() - add event to queue (64bit version) ++ * ++ * Description: ++ * This function adds an event to the event queue and run the ++ * SkEventDispatcher. At least Init Level 1 is required to queue events, ++ * but will be scheduled add Init Level 2. ++ * ++ * returns: ++ * nothing ++ */ ++void SkLocalEventQueue64( ++SK_AC *pAC, /* Adapters context */ ++SK_U32 Class, /* Event Class */ ++SK_U32 Event, /* Event to be queued */ ++SK_U64 Param, /* Event parameter */ ++SK_BOOL Dispatcher) /* Dispatcher flag: ++ * TRUE == Call SkEventDispatcher ++ * FALSE == Don't execute SkEventDispatcher ++ */ ++{ ++ SK_EVPARA EvPara; ++ EvPara.Para64 = Param; ++ ++ ++ if (Class == SKGE_PNMI) { ++ SkPnmiEvent( pAC, ++ pAC->IoBase, ++ Event, ++ EvPara); ++ } else { ++ SkEventQueue( pAC, ++ Class, ++ Event, ++ EvPara); ++ } ++ ++ /* Run the dispatcher */ ++ if (Dispatcher) { ++ SkEventDispatcher(pAC, pAC->IoBase); ++ } ++ ++} ++ ++ + /***************************************************************************** + * + * SkErrorLog - log errors +@@ -4523,8 +5776,6 @@ + + } /* SkErrorLog */ + +-#ifdef SK_DIAG_SUPPORT +- + /***************************************************************************** + * + * SkDrvEnterDiagMode - handles DIAG attach request +@@ -4550,7 +5801,7 @@ + + pAC->DiagModeActive = DIAG_ACTIVE; + if (pAC->BoardLevel > SK_INIT_DATA) { +- if (pNet->Up) { ++ if (netif_running(pAC->dev[0])) { + pAC->WasIfUp[0] = SK_TRUE; + pAC->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */ + DoPrintInterfaceChange = SK_FALSE; +@@ -4558,9 +5809,10 @@ + } else { + pAC->WasIfUp[0] = SK_FALSE; + } ++ + if (pNet != (DEV_NET *) pAc->dev[1]->priv) { + pNet = (DEV_NET *) pAc->dev[1]->priv; +- if (pNet->Up) { ++ if (netif_running(pAC->dev[1])) { + pAC->WasIfUp[1] = SK_TRUE; + pAC->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */ + DoPrintInterfaceChange = SK_FALSE; +@@ -4592,16 +5844,16 @@ + sizeof(SK_PNMI_STRUCT_DATA)); + pAc->DiagModeActive = DIAG_NOTACTIVE; + pAc->Pnmi.DiagAttached = SK_DIAG_IDLE; +- if (pAc->WasIfUp[0] == SK_TRUE) { +- pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */ ++ if (pAc->WasIfUp[0] == SK_TRUE) { ++ pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */ + DoPrintInterfaceChange = SK_FALSE; +- SkDrvInitAdapter(pAc, 0); /* first device */ +- } +- if (pAc->WasIfUp[1] == SK_TRUE) { +- pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */ ++ SkDrvInitAdapter(pAc, 0); /* first device */ ++ } ++ if (pAc->WasIfUp[1] == SK_TRUE) { ++ pAc->DiagFlowCtrl = SK_TRUE; /* for SkGeClose */ + DoPrintInterfaceChange = SK_FALSE; +- SkDrvInitAdapter(pAc, 1); /* second device */ +- } ++ SkDrvInitAdapter(pAc, 1); /* second device */ ++ } + return(0); + } + +@@ -4746,14 +5998,25 @@ + + } /* SkDrvInitAdapter */ + +-#endif ++static int __init sk98lin_init(void) ++{ ++ return pci_module_init(&sk98lin_driver); ++} ++ ++static void __exit sk98lin_cleanup(void) ++{ ++ pci_unregister_driver(&sk98lin_driver); ++} ++ ++module_init(sk98lin_init); ++module_exit(sk98lin_cleanup); ++ + + #ifdef DEBUG + /****************************************************************************/ + /* "debug only" section *****************************************************/ + /****************************************************************************/ + +- + /***************************************************************************** + * + * DumpMsg - print a frame +@@ -4764,9 +6027,11 @@ + * Returns: N/A + * + */ +-static void DumpMsg(struct sk_buff *skb, char *str) ++static void DumpMsg( ++struct sk_buff *skb, /* linux' socket buffer */ ++char *str) /* additional msg string */ + { +- int msglen; ++ int msglen = (skb->len > 64) ? 64 : skb->len; + + if (skb == NULL) { + printk("DumpMsg(): NULL-Message\n"); +@@ -4778,19 +6043,14 @@ + return; + } + +- msglen = skb->len; +- if (msglen > 64) +- msglen = 64; +- +- printk("--- Begin of message from %s , len %d (from %d) ----\n", str, msglen, skb->len); +- ++ printk("DumpMsg: PhysPage: %p\n", ++ page_address(virt_to_page(skb->data))); ++ printk("--- Begin of message from %s , len %d (from %d) ----\n", ++ str, msglen, skb->len); + DumpData((char *)skb->data, msglen); +- + printk("------- End of message ---------\n"); + } /* DumpMsg */ + +- +- + /***************************************************************************** + * + * DumpData - print a data area +@@ -4802,23 +6062,22 @@ + * Returns: N/A + * + */ +-static void DumpData(char *p, int size) +-{ +-register int i; +-int haddr, addr; +-char hex_buffer[180]; +-char asc_buffer[180]; +-char HEXCHAR[] = "0123456789ABCDEF"; +- +- addr = 0; +- haddr = 0; +- hex_buffer[0] = 0; +- asc_buffer[0] = 0; ++static void DumpData( ++char *p, /* pointer to area containing the data */ ++int size) /* the size of that data area in bytes */ ++{ ++ register int i; ++ int haddr = 0, addr = 0; ++ char hex_buffer[180] = { '\0' }; ++ char asc_buffer[180] = { '\0' }; ++ char HEXCHAR[] = "0123456789ABCDEF"; ++ + for (i=0; i < size; ) { +- if (*p >= '0' && *p <='z') ++ if (*p >= '0' && *p <='z') { + asc_buffer[addr] = *p; +- else ++ } else { + asc_buffer[addr] = '.'; ++ } + addr++; + asc_buffer[addr] = 0; + hex_buffer[haddr] = HEXCHAR[(*p & 0xf0) >> 4]; +@@ -4844,27 +6103,24 @@ + * DumpLong - print a data area as long values + * + * Description: +- * This function prints a area of data to the system logfile/to the ++ * This function prints a long variable to the system logfile/to the + * console. + * + * Returns: N/A + * + */ +-static void DumpLong(char *pc, int size) +-{ +-register int i; +-int haddr, addr; +-char hex_buffer[180]; +-char asc_buffer[180]; +-char HEXCHAR[] = "0123456789ABCDEF"; +-long *p; +-int l; +- +- addr = 0; +- haddr = 0; +- hex_buffer[0] = 0; +- asc_buffer[0] = 0; +- p = (long*) pc; ++static void DumpLong( ++char *pc, /* location of the variable to print */ ++int size) /* how large is the variable? */ ++{ ++ register int i; ++ int haddr = 0, addr = 0; ++ char hex_buffer[180] = { '\0' }; ++ char asc_buffer[180] = { '\0' }; ++ char HEXCHAR[] = "0123456789ABCDEF"; ++ long *p = (long*) pc; ++ int l; ++ + for (i=0; i < size; ) { + l = (long) *p; + hex_buffer[haddr] = HEXCHAR[(l >> 28) & 0xf]; +@@ -4898,330 +6154,9 @@ + + #endif + +-static int __devinit skge_probe_one(struct pci_dev *pdev, +- const struct pci_device_id *ent) +-{ +- SK_AC *pAC; +- DEV_NET *pNet = NULL; +- struct net_device *dev = NULL; +-#ifdef CONFIG_PROC_FS +- struct proc_dir_entry *pProcFile; +-#endif +- static int boards_found = 0; +- int error = -ENODEV; +- +- if (pci_enable_device(pdev)) +- goto out; +- +- /* Configure DMA attributes. */ +- if (pci_set_dma_mask(pdev, (u64) 0xffffffffffffffffULL) && +- pci_set_dma_mask(pdev, (u64) 0xffffffff)) +- goto out_disable_device; +- +- +- if ((dev = alloc_etherdev(sizeof(DEV_NET))) == NULL) { +- printk(KERN_ERR "Unable to allocate etherdev " +- "structure!\n"); +- goto out_disable_device; +- } +- +- pNet = dev->priv; +- pNet->pAC = kmalloc(sizeof(SK_AC), GFP_KERNEL); +- if (!pNet->pAC) { +- printk(KERN_ERR "Unable to allocate adapter " +- "structure!\n"); +- goto out_free_netdev; +- } +- +- memset(pNet->pAC, 0, sizeof(SK_AC)); +- pAC = pNet->pAC; +- pAC->PciDev = pdev; +- pAC->PciDevId = pdev->device; +- pAC->dev[0] = dev; +- pAC->dev[1] = dev; +- sprintf(pAC->Name, "SysKonnect SK-98xx"); +- pAC->CheckQueue = SK_FALSE; +- +- pNet->Mtu = 1500; +- pNet->Up = 0; +- dev->irq = pdev->irq; +- error = SkGeInitPCI(pAC); +- if (error) { +- printk("SKGE: PCI setup failed: %i\n", error); +- goto out_free_netdev; +- } +- +- SET_MODULE_OWNER(dev); +- dev->open = &SkGeOpen; +- dev->stop = &SkGeClose; +- dev->hard_start_xmit = &SkGeXmit; +- dev->get_stats = &SkGeStats; +- dev->set_multicast_list = &SkGeSetRxMode; +- dev->set_mac_address = &SkGeSetMacAddr; +- dev->do_ioctl = &SkGeIoctl; +- dev->change_mtu = &SkGeChangeMtu; +- dev->flags &= ~IFF_RUNNING; +- SET_NETDEV_DEV(dev, &pdev->dev); +- +-#ifdef SK_ZEROCOPY +-#ifdef USE_SK_TX_CHECKSUM +- if (pAC->ChipsetType) { +- /* Use only if yukon hardware */ +- /* SK and ZEROCOPY - fly baby... */ +- dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; +- } +-#endif +-#endif +- +- pAC->Index = boards_found++; +- +- if (SkGeBoardInit(dev, pAC)) +- goto out_free_netdev; +- +- /* Register net device */ +- if (register_netdev(dev)) { +- printk(KERN_ERR "SKGE: Could not register device.\n"); +- goto out_free_resources; +- } +- +- /* Print adapter specific string from vpd */ +- ProductStr(pAC); +- printk("%s: %s\n", dev->name, pAC->DeviceStr); +- +- /* Print configuration settings */ +- printk(" PrefPort:%c RlmtMode:%s\n", +- 'A' + pAC->Rlmt.Net[0].Port[pAC->Rlmt.Net[0].PrefPort]->PortNumber, +- (pAC->RlmtMode==0) ? "Check Link State" : +- ((pAC->RlmtMode==1) ? "Check Link State" : +- ((pAC->RlmtMode==3) ? "Check Local Port" : +- ((pAC->RlmtMode==7) ? "Check Segmentation" : +- ((pAC->RlmtMode==17) ? "Dual Check Link State" :"Error"))))); +- +- SkGeYellowLED(pAC, pAC->IoBase, 1); +- +- +- memcpy(&dev->dev_addr, &pAC->Addr.Net[0].CurrentMacAddress, 6); +- +-#ifdef CONFIG_PROC_FS +- pProcFile = create_proc_entry(dev->name, S_IRUGO, pSkRootDir); +- if (pProcFile) { +- pProcFile->proc_fops = &sk_proc_fops; +- pProcFile->data = dev; +- pProcFile->owner = THIS_MODULE; +- } +-#endif +- +- pNet->PortNr = 0; +- pNet->NetNr = 0; +- +- boards_found++; +- +- /* More then one port found */ +- if ((pAC->GIni.GIMacsFound == 2 ) && (pAC->RlmtNets == 2)) { +- if ((dev = alloc_etherdev(sizeof(DEV_NET))) == 0) { +- printk(KERN_ERR "Unable to allocate etherdev " +- "structure!\n"); +- goto out; +- } +- +- pAC->dev[1] = dev; +- pNet = dev->priv; +- pNet->PortNr = 1; +- pNet->NetNr = 1; +- pNet->pAC = pAC; +- pNet->Mtu = 1500; +- pNet->Up = 0; +- +- dev->open = &SkGeOpen; +- dev->stop = &SkGeClose; +- dev->hard_start_xmit = &SkGeXmit; +- dev->get_stats = &SkGeStats; +- dev->set_multicast_list = &SkGeSetRxMode; +- dev->set_mac_address = &SkGeSetMacAddr; +- dev->do_ioctl = &SkGeIoctl; +- dev->change_mtu = &SkGeChangeMtu; +- dev->flags &= ~IFF_RUNNING; +- +-#ifdef SK_ZEROCOPY +-#ifdef USE_SK_TX_CHECKSUM +- if (pAC->ChipsetType) { +- /* SG and ZEROCOPY - fly baby... */ +- dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; +- } +-#endif +-#endif +- +- if (register_netdev(dev)) { +- printk(KERN_ERR "SKGE: Could not register device.\n"); +- free_netdev(dev); +- pAC->dev[1] = pAC->dev[0]; +- } else { +-#ifdef CONFIG_PROC_FS +- pProcFile = create_proc_entry(dev->name, S_IRUGO, +- pSkRootDir); +- if (pProcFile) { +- pProcFile->proc_fops = &sk_proc_fops; +- pProcFile->data = dev; +- pProcFile->owner = THIS_MODULE; +- } +-#endif +- +- memcpy(&dev->dev_addr, +- &pAC->Addr.Net[1].CurrentMacAddress, 6); +- +- printk("%s: %s\n", dev->name, pAC->DeviceStr); +- printk(" PrefPort:B RlmtMode:Dual Check Link State\n"); +- } +- } +- +- /* Save the hardware revision */ +- pAC->HWRevision = (((pAC->GIni.GIPciHwRev >> 4) & 0x0F)*10) + +- (pAC->GIni.GIPciHwRev & 0x0F); +- +- /* Set driver globals */ +- pAC->Pnmi.pDriverFileName = DRIVER_FILE_NAME; +- pAC->Pnmi.pDriverReleaseDate = DRIVER_REL_DATE; +- +- memset(&pAC->PnmiBackup, 0, sizeof(SK_PNMI_STRUCT_DATA)); +- memcpy(&pAC->PnmiBackup, &pAC->PnmiStruct, sizeof(SK_PNMI_STRUCT_DATA)); +- +- pci_set_drvdata(pdev, dev); +- return 0; +- +- out_free_resources: +- FreeResources(dev); +- out_free_netdev: +- free_netdev(dev); +- out_disable_device: +- pci_disable_device(pdev); +- out: +- return error; +-} +- +-static void __devexit skge_remove_one(struct pci_dev *pdev) +-{ +- struct net_device *dev = pci_get_drvdata(pdev); +- DEV_NET *pNet = (DEV_NET *) dev->priv; +- SK_AC *pAC = pNet->pAC; +- int have_second_mac = 0; +- +- if ((pAC->GIni.GIMacsFound == 2) && pAC->RlmtNets == 2) +- have_second_mac = 1; +- +- remove_proc_entry(dev->name, pSkRootDir); +- unregister_netdev(dev); +- if (have_second_mac) { +- remove_proc_entry(pAC->dev[1]->name, pSkRootDir); +- unregister_netdev(pAC->dev[1]); +- } +- +- SkGeYellowLED(pAC, pAC->IoBase, 0); +- +- if (pAC->BoardLevel == SK_INIT_RUN) { +- SK_EVPARA EvPara; +- unsigned long Flags; +- +- /* board is still alive */ +- spin_lock_irqsave(&pAC->SlowPathLock, Flags); +- EvPara.Para32[0] = 0; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara); +- EvPara.Para32[0] = 1; +- EvPara.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STOP, EvPara); +- SkEventDispatcher(pAC, pAC->IoBase); +- /* disable interrupts */ +- SK_OUT32(pAC->IoBase, B0_IMSK, 0); +- SkGeDeInit(pAC, pAC->IoBase); +- spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); +- pAC->BoardLevel = SK_INIT_DATA; +- /* We do NOT check here, if IRQ was pending, of course*/ +- } +- +- if (pAC->BoardLevel == SK_INIT_IO) { +- /* board is still alive */ +- SkGeDeInit(pAC, pAC->IoBase); +- pAC->BoardLevel = SK_INIT_DATA; +- } +- +- FreeResources(dev); +- free_netdev(dev); +- if (have_second_mac) +- free_netdev(pAC->dev[1]); +- kfree(pAC); +-} +- +-static struct pci_device_id skge_pci_tbl[] = { +- { PCI_VENDOR_ID_3COM, 0x1700, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_3COM, 0x80eb, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_SYSKONNECT, 0x4300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_SYSKONNECT, 0x4320, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_DLINK, 0x4c00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_MARVELL, 0x4320, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +-#if 0 /* don't handle Yukon2 cards at the moment -- mlindner@syskonnect.de */ +- { PCI_VENDOR_ID_MARVELL, 0x4360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_MARVELL, 0x4361, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +-#endif +- { PCI_VENDOR_ID_MARVELL, 0x5005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_CNET, 0x434e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { PCI_VENDOR_ID_LINKSYS, 0x1064, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { 0, } +-}; +- +-/***************************************************************************** ++/******************************************************************************* + * +- * Avoid PCI ID confusion w/ skge by limiting advertised IDs so we don't +- * needlessly overlap... ++ * End of file + * +- */ +-static struct pci_device_id advertised_skge_pci_tbl[] = { +- { PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, +- { 0, } +-}; +- +-MODULE_DEVICE_TABLE(pci, advertised_skge_pci_tbl); +- +-static struct pci_driver skge_driver = { +- .name = "skge", +- .id_table = skge_pci_tbl, +- .probe = skge_probe_one, +- .remove = __devexit_p(skge_remove_one), +-}; +- +-static int __init skge_init(void) +-{ +- int error; +- +-#ifdef CONFIG_PROC_FS +- memcpy(&SK_Root_Dir_entry, BOOT_STRING, sizeof(SK_Root_Dir_entry) - 1); +- +- pSkRootDir = proc_mkdir(SK_Root_Dir_entry, proc_net); +- if (!pSkRootDir) { +- printk(KERN_WARNING "Unable to create /proc/net/%s", +- SK_Root_Dir_entry); +- return -ENOMEM; +- } +- pSkRootDir->owner = THIS_MODULE; +-#endif +- +- error = pci_module_init(&skge_driver); +- if (error) { +-#ifdef CONFIG_PROC_FS +- remove_proc_entry(pSkRootDir->name, proc_net); +-#endif +- } +- +- return error; +-} +- +-static void __exit skge_exit(void) +-{ +- pci_unregister_driver(&skge_driver); +-#ifdef CONFIG_PROC_FS +- remove_proc_entry(pSkRootDir->name, proc_net); +-#endif +-} ++ ******************************************************************************/ + +-module_init(skge_init); +-module_exit(skge_exit); +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skgehwt.c linux-2.6.9.new/drivers/net/sk98lin/skgehwt.c +--- linux-2.6.9.old/drivers/net/sk98lin/skgehwt.c 2004-10-19 05:55:07.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skgehwt.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgehwt.c + * Project: Gigabit Ethernet Adapters, Event Scheduler Module +- * Version: $Revision: 1.15 $ +- * Date: $Date: 2003/09/16 13:41:23 $ ++ * Version: $Revision: 2.2 $ ++ * Date: $Date: 2004/05/28 13:39:04 $ + * Purpose: Hardware Timer + * + ******************************************************************************/ +@@ -11,7 +11,7 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -27,7 +27,7 @@ + */ + #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: skgehwt.c,v 1.15 2003/09/16 13:41:23 rschmidt Exp $ (C) Marvell."; ++ "@(#) $Id: skgehwt.c,v 2.2 2004/05/28 13:39:04 rschmidt Exp $ (C) Marvell."; + #endif + + #include "h/skdrv1st.h" /* Driver Specific Definitions */ +@@ -44,10 +44,10 @@ + /* + * Prototypes of local functions. + */ +-#define SK_HWT_MAX (65000) ++#define SK_HWT_MAX 65000UL * 160 /* ca. 10 sec. */ + + /* correction factor */ +-#define SK_HWT_FAC (1000 * (SK_U32)pAC->GIni.GIHstClkFact / 100) ++#define SK_HWT_FAC (10 * (SK_U32)pAC->GIni.GIHstClkFact / 16) + + /* + * Initialize hardware timer. +@@ -73,29 +73,21 @@ + void SkHwtStart( + SK_AC *pAC, /* Adapters context */ + SK_IOC Ioc, /* IoContext */ +-SK_U32 Time) /* Time in units of 16us to load the timer with. */ ++SK_U32 Time) /* Time in usec to load the timer */ + { +- SK_U32 Cnt; +- + if (Time > SK_HWT_MAX) + Time = SK_HWT_MAX; + + pAC->Hwt.TStart = Time; + pAC->Hwt.TStop = 0L; + +- Cnt = Time; +- +- /* +- * if time < 16 us +- * time = 16 us +- */ +- if (!Cnt) { +- Cnt++; ++ if (!Time) { ++ Time = 1L; + } + +- SK_OUT32(Ioc, B2_TI_INI, Cnt * SK_HWT_FAC); +- +- SK_OUT16(Ioc, B2_TI_CTRL, TIM_START); /* Start timer. */ ++ SK_OUT32(Ioc, B2_TI_INI, Time * SK_HWT_FAC); ++ ++ SK_OUT16(Ioc, B2_TI_CTRL, TIM_START); /* Start timer */ + + pAC->Hwt.TActive = SK_TRUE; + } +@@ -109,13 +101,12 @@ + SK_IOC Ioc) /* IoContext */ + { + SK_OUT16(Ioc, B2_TI_CTRL, TIM_STOP); +- ++ + SK_OUT16(Ioc, B2_TI_CTRL, TIM_CLR_IRQ); + + pAC->Hwt.TActive = SK_FALSE; + } + +- + /* + * Stop hardware timer and read time elapsed since last start. + * +@@ -129,6 +120,9 @@ + { + SK_U32 TRead; + SK_U32 IStatus; ++ SK_U32 TimerInt; ++ ++ TimerInt = CHIP_ID_YUKON_2(pAC) ? Y2_IS_TIMINT : IS_TIMINT; + + if (pAC->Hwt.TActive) { + +@@ -139,15 +133,15 @@ + + SK_IN32(Ioc, B0_ISRC, &IStatus); + +- /* Check if timer expired (or wraped around) */ +- if ((TRead > pAC->Hwt.TStart) || (IStatus & IS_TIMINT)) { +- ++ /* Check if timer expired (or wrapped around) */ ++ if ((TRead > pAC->Hwt.TStart) || ((IStatus & TimerInt) != 0)) { ++ + SkHwtStop(pAC, Ioc); +- ++ + pAC->Hwt.TStop = pAC->Hwt.TStart; + } + else { +- ++ + pAC->Hwt.TStop = pAC->Hwt.TStart - TRead; + } + } +@@ -162,9 +156,9 @@ + SK_IOC Ioc) /* IoContext */ + { + SkHwtStop(pAC, Ioc); +- ++ + pAC->Hwt.TStop = pAC->Hwt.TStart; +- ++ + SkTimerDone(pAC, Ioc); + } + +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skgeinit.c linux-2.6.9.new/drivers/net/sk98lin/skgeinit.c +--- linux-2.6.9.old/drivers/net/sk98lin/skgeinit.c 2004-10-19 05:54:30.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skgeinit.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgeinit.c + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.97 $ +- * Date: $Date: 2003/10/02 16:45:31 $ ++ * Version: $Revision: 2.73 $ ++ * Date: $Date: 2005/05/24 08:05:45 $ + * Purpose: Contains functions to initialize the adapter + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -31,7 +30,7 @@ + + #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: skgeinit.c,v 1.97 2003/10/02 16:45:31 rschmidt Exp $ (C) Marvell."; ++ "@(#) $Id: skgeinit.c,v 2.73 2005/05/24 08:05:45 rschmidt Exp $ (C) Marvell."; + #endif + + struct s_QOffTab { +@@ -59,6 +58,101 @@ + + /****************************************************************************** + * ++ * SkGePortVlan() - Enable / Disable VLAN support ++ * ++ * Description: ++ * Enable or disable the VLAN support of the selected port. ++ * The new configuration is *not* saved over any SkGeStopPort() and ++ * SkGeInitPort() calls. ++ * Currently this function is only supported on Yukon-2/EC adapters. ++ * ++ * Returns: ++ * nothing ++ */ ++void SkGePortVlan( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port number */ ++SK_BOOL Enable) /* Flag */ ++{ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (Enable) { ++ SK_OUT32(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), RX_VLAN_STRIP_ON); ++ SK_OUT32(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), TX_VLAN_TAG_ON); ++ } ++ else { ++ SK_OUT32(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), RX_VLAN_STRIP_OFF); ++ SK_OUT32(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), TX_VLAN_TAG_OFF); ++ } ++ } ++} ++ ++ ++/****************************************************************************** ++ * ++ * SkGeRxRss() - Enable / Disable RSS Hash Calculation ++ * ++ * Description: ++ * Enable or disable the RSS hash calculation of the selected port. ++ * The new configuration is *not* saved over any SkGeStopPort() and ++ * SkGeInitPort() calls. ++ * Currently this function is only supported on Yukon-2/EC adapters. ++ * ++ * Returns: ++ * nothing ++ */ ++void SkGeRxRss( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port number */ ++SK_BOOL Enable) /* Flag */ ++{ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (Enable) { ++ SK_OUT32(IoC, Q_ADDR(pAC->GIni.GP[Port].PRxQOff, Q_CSR), ++ BMU_ENA_RX_RSS_HASH); ++ } ++ else { ++ SK_OUT32(IoC, Q_ADDR(pAC->GIni.GP[Port].PRxQOff, Q_CSR), ++ BMU_DIS_RX_RSS_HASH); ++ } ++ } ++} ++ ++/****************************************************************************** ++ * ++ * SkGeRxCsum() - Enable / Disable Receive Checksum ++ * ++ * Description: ++ * Enable or disable the checksum of the selected port. ++ * The new configuration is *not* saved over any SkGeStopPort() and ++ * SkGeInitPort() calls. ++ * Currently this function is only supported on Yukon-2/EC adapters. ++ * ++ * Returns: ++ * nothing ++ */ ++void SkGeRxCsum( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port number */ ++SK_BOOL Enable) /* Flag */ ++{ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (Enable) { ++ SK_OUT32(IoC, Q_ADDR(pAC->GIni.GP[Port].PRxQOff, Q_CSR), ++ BMU_ENA_RX_CHKSUM); ++ } ++ else { ++ SK_OUT32(IoC, Q_ADDR(pAC->GIni.GP[Port].PRxQOff, Q_CSR), ++ BMU_DIS_RX_CHKSUM); ++ } ++ } ++} ++ ++ ++/****************************************************************************** ++ * + * SkGePollRxD() - Enable / Disable Descriptor Polling of RxD Ring + * + * Description: +@@ -71,8 +165,8 @@ + * nothing + */ + void SkGePollRxD( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL PollRxD) /* SK_TRUE (enable pol.), SK_FALSE (disable pol.) */ + { +@@ -80,8 +174,8 @@ + + pPrt = &pAC->GIni.GP[Port]; + +- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), (PollRxD) ? +- CSR_ENA_POL : CSR_DIS_POL); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), (SK_U32)((PollRxD) ? ++ CSR_ENA_POL : CSR_DIS_POL)); + } /* SkGePollRxD */ + + +@@ -99,8 +193,8 @@ + * nothing + */ + void SkGePollTxD( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL PollTxD) /* SK_TRUE (enable pol.), SK_FALSE (disable pol.) */ + { +@@ -114,7 +208,7 @@ + if (pPrt->PXSQSize != 0) { + SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), DWord); + } +- ++ + if (pPrt->PXAQSize != 0) { + SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), DWord); + } +@@ -135,17 +229,27 @@ + * nothing + */ + void SkGeYellowLED( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int State) /* yellow LED state, 0 = OFF, 0 != ON */ + { ++ int LedReg; ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* different mapping on Yukon-2 */ ++ LedReg = B0_CTST + 1; ++ } ++ else { ++ LedReg = B0_LED; ++ } ++ + if (State == 0) { +- /* Switch yellow LED OFF */ +- SK_OUT8(IoC, B0_LED, LED_STAT_OFF); ++ /* Switch state LED OFF */ ++ SK_OUT8(IoC, LedReg, LED_STAT_OFF); + } + else { +- /* Switch yellow LED ON */ +- SK_OUT8(IoC, B0_LED, LED_STAT_ON); ++ /* Switch state LED ON */ ++ SK_OUT8(IoC, LedReg, LED_STAT_ON); + } + } /* SkGeYellowLED */ + +@@ -169,8 +273,8 @@ + * nothing + */ + void SkGeXmitLED( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Led, /* offset to the LED Init Value register */ + int Mode) /* Mode may be SK_LED_DIS, SK_LED_ENA, SK_LED_TST */ + { +@@ -197,13 +301,13 @@ + SK_OUT8(IoC, Led + XMIT_LED_TST, LED_T_OFF); + break; + } +- ++ + /* +- * 1000BT: The Transmit LED is driven by the PHY. ++ * 1000BT: the Transmit LED is driven by the PHY. + * But the default LED configuration is used for + * Level One and Broadcom PHYs. +- * (Broadcom: It may be that PHY_B_PEC_EN_LTR has to be set.) +- * (In this case it has to be added here. But we will see. XXX) ++ * (Broadcom: It may be that PHY_B_PEC_EN_LTR has to be set. ++ * In this case it has to be added here.) + */ + } /* SkGeXmitLED */ + #endif /* !SK_SLIM || GENESIS */ +@@ -227,7 +331,7 @@ + * 1: configuration error + */ + static int DoCalcAddr( +-SK_AC *pAC, /* adapter context */ ++SK_AC *pAC, /* Adapter Context */ + SK_GEPORT SK_FAR *pPrt, /* port index */ + int QuSize, /* size of the queue to configure in kB */ + SK_U32 SK_FAR *StartVal, /* start value for address calculation */ +@@ -264,12 +368,35 @@ + + /****************************************************************************** + * ++ * SkGeRoundQueueSize() - Round the given queue size to the adpaters QZ units ++ * ++ * Description: ++ * This function rounds the given queue size in kBs to adapter specific ++ * queue size units (Genesis and Yukon: 8 kB, Yukon-2/EC: 1 kB). ++ * ++ * Returns: ++ * the rounded queue size in kB ++ */ ++static int SkGeRoundQueueSize( ++SK_AC *pAC, /* Adapter Context */ ++int QueueSizeKB) /* Queue size in kB */ ++{ ++ int QueueSizeSteps; ++ ++ QueueSizeSteps = (CHIP_ID_YUKON_2(pAC)) ? QZ_STEP_Y2 : QZ_STEP; ++ ++ return((QueueSizeKB + QueueSizeSteps - 1) & ~(QueueSizeSteps - 1)); ++} /* SkGeRoundQueueSize */ ++ ++ ++/****************************************************************************** ++ * + * SkGeInitAssignRamToQueues() - allocate default queue sizes + * + * Description: + * This function assigns the memory to the different queues and ports. + * When DualNet is set to SK_TRUE all ports get the same amount of memory. +- * Otherwise the first port gets most of the memory and all the ++ * Otherwise the first port gets most of the memory and all the + * other ports just the required minimum. + * This function can only be called when pAC->GIni.GIRamSize and + * pAC->GIni.GIMacsFound have been initialized, usually this happens +@@ -282,102 +409,141 @@ + */ + + int SkGeInitAssignRamToQueues( +-SK_AC *pAC, /* Adapter context */ ++SK_AC *pAC, /* Adapter Context */ + int ActivePort, /* Active Port in RLMT mode */ +-SK_BOOL DualNet) /* adapter context */ ++SK_BOOL DualNet) /* Dual Net active */ + { + int i; + int UsedKilobytes; /* memory already assigned */ + int ActivePortKilobytes; /* memory available for active port */ +- SK_GEPORT *pGePort; +- +- UsedKilobytes = 0; ++ int MinQueueSize; /* min. memory for queues */ ++ int TotalRamSize; /* total memory for queues */ ++ SK_BOOL DualPortYukon2; ++ SK_GEPORT *pPrt; + + if (ActivePort >= pAC->GIni.GIMacsFound) { ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, + ("SkGeInitAssignRamToQueues: ActivePort (%d) invalid\n", + ActivePort)); + return(1); + } +- if (((pAC->GIni.GIMacsFound * (SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE)) + +- ((RAM_QUOTA_SYNC == 0) ? 0 : SK_MIN_TXQ_SIZE)) > pAC->GIni.GIRamSize) { ++ ++ DualPortYukon2 = (CHIP_ID_YUKON_2(pAC) && pAC->GIni.GIMacsFound == 2); ++ ++ TotalRamSize = pAC->GIni.GIRamSize; ++ ++ if (DualPortYukon2) { ++ TotalRamSize *= 2; ++ } ++ ++ MinQueueSize = SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE; ++ ++ if (MinQueueSize > pAC->GIni.GIRamSize) { ++ MinQueueSize = pAC->GIni.GIRamSize; ++ } ++ ++ if ((pAC->GIni.GIMacsFound * MinQueueSize + ++ RAM_QUOTA_SYNC * SK_MIN_TXQ_SIZE) > TotalRamSize) { ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, + ("SkGeInitAssignRamToQueues: Not enough memory (%d)\n", +- pAC->GIni.GIRamSize)); ++ TotalRamSize)); + return(2); + } + + if (DualNet) { + /* every port gets the same amount of memory */ +- ActivePortKilobytes = pAC->GIni.GIRamSize / pAC->GIni.GIMacsFound; ++ ActivePortKilobytes = TotalRamSize / pAC->GIni.GIMacsFound; ++ + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { + +- pGePort = &pAC->GIni.GP[i]; +- ++ pPrt = &pAC->GIni.GP[i]; ++ ++ if (DualPortYukon2) { ++ ActivePortKilobytes = pAC->GIni.GIRamSize; ++ } + /* take away the minimum memory for active queues */ +- ActivePortKilobytes -= (SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE); ++ ActivePortKilobytes -= MinQueueSize; + + /* receive queue gets the minimum + 80% of the rest */ +- pGePort->PRxQSize = (int) (ROUND_QUEUE_SIZE_KB(( +- ActivePortKilobytes * (unsigned long) RAM_QUOTA_RX) / 100)) ++ pPrt->PRxQSize = SkGeRoundQueueSize(pAC, ++ (int)((long)ActivePortKilobytes * RAM_QUOTA_RX) / 100) + + SK_MIN_RXQ_SIZE; + +- ActivePortKilobytes -= (pGePort->PRxQSize - SK_MIN_RXQ_SIZE); ++ ActivePortKilobytes -= (pPrt->PRxQSize - SK_MIN_RXQ_SIZE); + + /* synchronous transmit queue */ +- pGePort->PXSQSize = 0; ++ pPrt->PXSQSize = 0; + + /* asynchronous transmit queue */ +- pGePort->PXAQSize = (int) ROUND_QUEUE_SIZE_KB(ActivePortKilobytes + +- SK_MIN_TXQ_SIZE); ++ pPrt->PXAQSize = SkGeRoundQueueSize(pAC, ++ ActivePortKilobytes + SK_MIN_TXQ_SIZE); + } + } +- else { +- /* Rlmt Mode or single link adapter */ ++ else { /* RLMT Mode or single link adapter */ + +- /* Set standby queue size defaults for all standby ports */ ++ UsedKilobytes = 0; ++ ++ /* set standby queue size defaults for all standby ports */ + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { + + if (i != ActivePort) { +- pGePort = &pAC->GIni.GP[i]; ++ pPrt = &pAC->GIni.GP[i]; + +- pGePort->PRxQSize = SK_MIN_RXQ_SIZE; +- pGePort->PXAQSize = SK_MIN_TXQ_SIZE; +- pGePort->PXSQSize = 0; ++ if (DualPortYukon2) { ++ pPrt->PRxQSize = SkGeRoundQueueSize(pAC, ++ (int)((long)pAC->GIni.GIRamSize * RAM_QUOTA_RX) / 100); ++ pPrt->PXAQSize = pAC->GIni.GIRamSize - pPrt->PRxQSize; ++ } ++ else { ++ pPrt->PRxQSize = SK_MIN_RXQ_SIZE; ++ pPrt->PXAQSize = SK_MIN_TXQ_SIZE; ++ } ++ pPrt->PXSQSize = 0; + + /* Count used RAM */ +- UsedKilobytes += pGePort->PRxQSize + pGePort->PXAQSize; ++ UsedKilobytes += pPrt->PRxQSize + pPrt->PXAQSize; + } + } + /* what's left? */ +- ActivePortKilobytes = pAC->GIni.GIRamSize - UsedKilobytes; ++ ActivePortKilobytes = TotalRamSize - UsedKilobytes; + + /* assign it to the active port */ + /* first take away the minimum memory */ +- ActivePortKilobytes -= (SK_MIN_RXQ_SIZE + SK_MIN_TXQ_SIZE); +- pGePort = &pAC->GIni.GP[ActivePort]; ++ ActivePortKilobytes -= MinQueueSize; ++ pPrt = &pAC->GIni.GP[ActivePort]; + + /* receive queue get's the minimum + 80% of the rest */ +- pGePort->PRxQSize = (int) (ROUND_QUEUE_SIZE_KB((ActivePortKilobytes * +- (unsigned long) RAM_QUOTA_RX) / 100)) + SK_MIN_RXQ_SIZE; ++ pPrt->PRxQSize = SkGeRoundQueueSize(pAC, ++ (int)((long)ActivePortKilobytes * RAM_QUOTA_RX) / 100) + ++ MinQueueSize/2; + +- ActivePortKilobytes -= (pGePort->PRxQSize - SK_MIN_RXQ_SIZE); ++ ActivePortKilobytes -= (pPrt->PRxQSize - MinQueueSize/2); + + /* synchronous transmit queue */ +- pGePort->PXSQSize = 0; ++ pPrt->PXSQSize = 0; + + /* asynchronous transmit queue */ +- pGePort->PXAQSize = (int) ROUND_QUEUE_SIZE_KB(ActivePortKilobytes) + +- SK_MIN_TXQ_SIZE; ++ pPrt->PXAQSize = SkGeRoundQueueSize(pAC, ActivePortKilobytes) + ++ MinQueueSize/2; + } +-#ifdef VCPU +- VCPUprintf(0, "PRxQSize=%u, PXSQSize=%u, PXAQSize=%u\n", +- pGePort->PRxQSize, pGePort->PXSQSize, pGePort->PXAQSize); +-#endif /* VCPU */ ++ ++#ifdef DEBUG ++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) { ++ ++ pPrt = &pAC->GIni.GP[i]; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Port %d: RxQSize=%u, TxAQSize=%u, TxSQSize=%u\n", ++ i, pPrt->PRxQSize, pPrt->PXAQSize, pPrt->PXSQSize)); ++ } ++#endif /* DEBUG */ + + return(0); + } /* SkGeInitAssignRamToQueues */ + ++ + /****************************************************************************** + * + * SkGeCheckQSize() - Checks the Adapters Queue Size Configuration +@@ -388,12 +554,12 @@ + * used ports. + * This requirements must be fullfilled to have a valid configuration: + * - The size of all queues must not exceed GIRamSize. +- * - The queue sizes must be specified in units of 8 kB. ++ * - The queue sizes must be specified in units of 8 kB (Genesis & Yukon). + * - The size of Rx queues of available ports must not be +- * smaller than 16 kB. ++ * smaller than 16 kB (Genesis & Yukon) resp. 10 kB (Yukon-2). + * - The size of at least one Tx queue (synch. or asynch.) +- * of available ports must not be smaller than 16 kB +- * when Jumbo Frames are used. ++ * of available ports must not be smaller than 16 kB (Genesis & Yukon), ++ * resp. 10 kB (Yukon-2) when Jumbo Frames are used. + * - The RAM start and end addresses must not be changed + * for ports which are already initialized. + * Furthermore SkGeCheckQSize() defines the Start and End Addresses +@@ -404,7 +570,7 @@ + * 1: Queue Size Configuration invalid + */ + static int SkGeCheckQSize( +-SK_AC *pAC, /* adapter context */ ++SK_AC *pAC, /* Adapter Context */ + int Port) /* port index */ + { + SK_GEPORT *pPrt; +@@ -414,55 +580,68 @@ + SK_U32 StartAddr; + #ifndef SK_SLIM + int UsedMem; /* total memory used (max. found ports) */ +-#endif ++#endif + + Rtv = 0; +- ++ + #ifndef SK_SLIM + + UsedMem = 0; ++ Rtv = 0; + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { + pPrt = &pAC->GIni.GP[i]; + +- if ((pPrt->PRxQSize & QZ_UNITS) != 0 || +- (pPrt->PXSQSize & QZ_UNITS) != 0 || +- (pPrt->PXAQSize & QZ_UNITS) != 0) { ++ if (CHIP_ID_YUKON_2(pAC)) { ++ UsedMem = 0; ++ } ++ else if (((pPrt->PRxQSize & QZ_UNITS) != 0 || ++ (pPrt->PXSQSize & QZ_UNITS) != 0 || ++ (pPrt->PXAQSize & QZ_UNITS) != 0)) { + + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E012, SKERR_HWI_E012MSG); + return(1); + } + +- if (i == Port && pPrt->PRxQSize < SK_MIN_RXQ_SIZE) { ++#ifndef SK_DIAG ++ if (i == Port && pAC->GIni.GIRamSize > SK_MIN_RXQ_SIZE && ++ pPrt->PRxQSize < SK_MIN_RXQ_SIZE) { + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E011, SKERR_HWI_E011MSG); + return(1); + } +- ++ + /* + * the size of at least one Tx queue (synch. or asynch.) has to be > 0. + * if Jumbo Frames are used, this size has to be >= 16 kB. + */ + if ((i == Port && pPrt->PXSQSize == 0 && pPrt->PXAQSize == 0) || +- (pAC->GIni.GIPortUsage == SK_JUMBO_LINK && +- ((pPrt->PXSQSize > 0 && pPrt->PXSQSize < SK_MIN_TXQ_SIZE) || ++ (pPrt->PPortUsage == SK_JUMBO_LINK && ++ ((pPrt->PXSQSize > 0 && pPrt->PXSQSize < SK_MIN_TXQ_SIZE) || + (pPrt->PXAQSize > 0 && pPrt->PXAQSize < SK_MIN_TXQ_SIZE)))) { + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E023, SKERR_HWI_E023MSG); + return(1); + } +- ++#endif /* !SK_DIAG */ ++ + UsedMem += pPrt->PRxQSize + pPrt->PXSQSize + pPrt->PXAQSize; ++ ++ if (UsedMem > pAC->GIni.GIRamSize) { ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E012, SKERR_HWI_E012MSG); ++ return(1); ++ } + } +- +- if (UsedMem > pAC->GIni.GIRamSize) { +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E012, SKERR_HWI_E012MSG); +- return(1); +- } ++ + #endif /* !SK_SLIM */ + + /* Now start address calculation */ + StartAddr = pAC->GIni.GIRamOffs; + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { ++ + pPrt = &pAC->GIni.GP[i]; + ++ if (CHIP_ID_YUKON_2(pAC)) { ++ StartAddr = 0; ++ } ++ + /* Calculate/Check values for the receive queue */ + Rtv2 = DoCalcAddr(pAC, pPrt, pPrt->PRxQSize, &StartAddr, + &pPrt->PRxQRamStart, &pPrt->PRxQRamEnd); +@@ -502,8 +681,8 @@ + * nothing + */ + static void SkGeInitMacArb( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { + /* release local reset */ + SK_OUT16(IoC, B3_MA_TO_CTRL, MA_RST_CLR); +@@ -542,8 +721,8 @@ + * nothing + */ + static void SkGeInitPktArb( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { + /* release local reset */ + SK_OUT16(IoC, B3_PA_CTRL, PA_RST_CLR); +@@ -559,7 +738,8 @@ + * NOTE: the packet arbiter timeout interrupt is needed for + * half duplex hangup workaround + */ +- if (pAC->GIni.GIPortUsage != SK_JUMBO_LINK) { ++ if (pAC->GIni.GP[MAC_1].PPortUsage != SK_JUMBO_LINK && ++ pAC->GIni.GP[MAC_2].PPortUsage != SK_JUMBO_LINK) { + if (pAC->GIni.GIMacsFound == 1) { + SK_OUT16(IoC, B3_PA_CTRL, PA_ENA_TO_TX1); + } +@@ -582,14 +762,11 @@ + * nothing + */ + static void SkGeInitMacFifo( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_U16 Word; +-#ifdef VCPU +- SK_U32 DWord; +-#endif /* VCPU */ + /* + * For each FIFO: + * - release local reset +@@ -597,31 +774,29 @@ + * - setup defaults for the control register + * - enable the FIFO + */ +- ++ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- /* Configure Rx MAC FIFO */ ++ /* configure Rx MAC FIFO */ + SK_OUT8(IoC, MR_ADDR(Port, RX_MFF_CTRL2), MFF_RST_CLR); + SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_RX_CTRL_DEF); + SK_OUT8(IoC, MR_ADDR(Port, RX_MFF_CTRL2), MFF_ENA_OP_MD); +- ++ + /* Configure Tx MAC FIFO */ + SK_OUT8(IoC, MR_ADDR(Port, TX_MFF_CTRL2), MFF_RST_CLR); + SK_OUT16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), MFF_TX_CTRL_DEF); + SK_OUT8(IoC, MR_ADDR(Port, TX_MFF_CTRL2), MFF_ENA_OP_MD); +- +- /* Enable frame flushing if jumbo frames used */ +- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) { ++ ++ /* enable frame flushing if jumbo frames used */ ++ if (pAC->GIni.GP[Port].PPortUsage == SK_JUMBO_LINK) { + SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_ENA_FLUSH); + } + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- /* set Rx GMAC FIFO Flush Mask */ +- SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_MSK), (SK_U16)RX_FF_FL_DEF_MSK); +- ++ + Word = (SK_U16)GMF_RX_CTRL_DEF; + + /* disable Rx GMAC FIFO Flush for YUKON-Lite Rev. A0 only */ +@@ -629,23 +804,52 @@ + + Word &= ~GMF_RX_F_FL_ON; + } +- +- /* Configure Rx MAC FIFO */ ++ ++ /* Configure Rx GMAC FIFO */ + SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8)GMF_RST_CLR); + SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), Word); +- +- /* set Rx GMAC FIFO Flush Threshold (default: 0x0a -> 56 bytes) */ +- SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_THR), RX_GMF_FL_THR_DEF); +- +- /* Configure Tx MAC FIFO */ ++ ++ Word = RX_FF_FL_DEF_MSK; ++ ++#ifndef SK_DIAG ++ if (HW_FEATURE(pAC, HWF_WA_DEV_4115)) { ++ /* ++ * Flushing must be enabled (needed for ASF see dev. #4.29), ++ * but the flushing mask should be disabled (see dev. #4.115) ++ */ ++ Word = 0; ++ } ++#endif /* !SK_DIAG */ ++ ++ /* set Rx GMAC FIFO Flush Mask (after clearing reset) */ ++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_MSK), Word); ++ ++ /* default: 0x0a -> 56 bytes on Yukon-1 and 64 bytes on Yukon-2 */ ++ Word = (SK_U16)RX_GMF_FL_THR_DEF; ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC && ++ pAC->GIni.GIAsfEnabled) { ++ /* WA for dev. #4.30 (reduce to 0x08 -> 48 bytes) */ ++ Word -= 2; ++ } ++ } ++ else { ++ /* ++ * because Pause Packet Truncation in GMAC is not working ++ * we have to increase the Flush Threshold to 64 bytes ++ * in order to flush pause packets in Rx FIFO on Yukon-1 ++ */ ++ Word++; ++ } ++ ++ /* set Rx GMAC FIFO Flush Threshold (after clearing reset) */ ++ SK_OUT16(IoC, MR_ADDR(Port, RX_GMF_FL_THR), Word); ++ ++ /* Configure Tx GMAC FIFO */ + SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U8)GMF_RST_CLR); + SK_OUT16(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U16)GMF_TX_CTRL_DEF); +- +-#ifdef VCPU +- SK_IN32(IoC, MR_ADDR(Port, RX_GMF_AF_THR), &DWord); +- SK_IN32(IoC, MR_ADDR(Port, TX_GMF_AE_THR), &DWord); +-#endif /* VCPU */ +- ++ + /* set Tx GMAC FIFO Almost Empty Threshold */ + /* SK_OUT32(IoC, MR_ADDR(Port, TX_GMF_AE_THR), 0); */ + } +@@ -653,7 +857,7 @@ + + } /* SkGeInitMacFifo */ + +-#ifdef SK_LNK_SYNC_CNT ++#ifdef SK_LNK_SYNC_CNT + /****************************************************************************** + * + * SkGeLoadLnkSyncCnt() - Load the Link Sync Counter and starts counting +@@ -674,8 +878,8 @@ + * nothing + */ + void SkGeLoadLnkSyncCnt( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_U32 CntVal) /* Counter value */ + { +@@ -685,7 +889,7 @@ + SK_BOOL IrqPend; + + /* stop counter */ +- SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LED_STOP); ++ SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LNK_STOP); + + /* + * ASIC problem: +@@ -698,6 +902,7 @@ + IrqPend = SK_FALSE; + SK_IN32(IoC, B0_ISRC, &ISrc); + SK_IN32(IoC, B0_IMSK, &OrgIMsk); ++ + if (Port == MAC_1) { + NewIMsk = OrgIMsk & ~IS_LNK_SYNC_M1; + if ((ISrc & IS_LNK_SYNC_M1) != 0) { +@@ -710,6 +915,7 @@ + IrqPend = SK_TRUE; + } + } ++ + if (!IrqPend) { + SK_OUT32(IoC, B0_IMSK, NewIMsk); + } +@@ -718,15 +924,17 @@ + SK_OUT32(IoC, MR_ADDR(Port, LNK_SYNC_INI), CntVal); + + /* start counter */ +- SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LED_START); ++ SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LNK_START); + + if (!IrqPend) { +- /* clear the unexpected IRQ, and restore the interrupt mask */ +- SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LED_CLR_IRQ); ++ /* clear the unexpected IRQ */ ++ SK_OUT8(IoC, MR_ADDR(Port, LNK_SYNC_CTRL), LNK_CLR_IRQ); ++ ++ /* restore the interrupt mask */ + SK_OUT32(IoC, B0_IMSK, OrgIMsk); + } + } /* SkGeLoadLnkSyncCnt*/ +-#endif /* SK_LNK_SYNC_CNT */ ++#endif /* SK_LNK_SYNC_CNT */ + + #if defined(SK_DIAG) || defined(SK_CFG_SYNC) + /****************************************************************************** +@@ -758,8 +966,8 @@ + * synchronous queue is configured + */ + int SkGeCfgSync( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_U32 IntTime, /* Interval Timer Value in units of 8ns */ + SK_U32 LimCount, /* Number of bytes to transfer during IntTime */ +@@ -777,16 +985,16 @@ + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E010, SKERR_HWI_E010MSG); + return(1); + } +- ++ + if (pAC->GIni.GP[Port].PXSQSize == 0) { + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E009, SKERR_HWI_E009MSG); + return(2); + } +- ++ + /* calculate register values */ + IntTime = (IntTime / 2) * pAC->GIni.GIHstClkFact / 100; + LimCount = LimCount / 8; +- ++ + if (IntTime > TXA_MAX_VAL || LimCount > TXA_MAX_VAL) { + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E010, SKERR_HWI_E010MSG); + return(1); +@@ -804,13 +1012,13 @@ + */ + SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL), + TXA_ENA_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC); +- ++ + SK_OUT32(IoC, MR_ADDR(Port, TXA_ITI_INI), IntTime); + SK_OUT32(IoC, MR_ADDR(Port, TXA_LIM_INI), LimCount); +- ++ + SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL), + (SK_U8)(SyncMode & (TXA_ENA_ALLOC | TXA_DIS_ALLOC))); +- ++ + if (IntTime != 0 || LimCount != 0) { + SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL), TXA_DIS_FSYNC | TXA_START_RC); + } +@@ -831,10 +1039,10 @@ + * Returns: + * nothing + */ +-static void DoInitRamQueue( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ +-int QuIoOffs, /* Queue IO Address Offset */ ++void DoInitRamQueue( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int QuIoOffs, /* Queue I/O Address Offset */ + SK_U32 QuStartAddr, /* Queue Start Address */ + SK_U32 QuEndAddr, /* Queue End Address */ + int QuType) /* Queue Type (SK_RX_SRAM_Q|SK_RX_BRAM_Q|SK_TX_RAM_Q) */ +@@ -867,8 +1075,7 @@ + + /* continue with SK_RX_BRAM_Q */ + case SK_RX_BRAM_Q: +- /* write threshold for Rx Queue */ +- ++ /* write threshold for Rx Queue (Pause packets) */ + SK_OUT32(IoC, RB_ADDR(QuIoOffs, RB_RX_UTPP), RxUpThresVal); + SK_OUT32(IoC, RB_ADDR(QuIoOffs, RB_RX_LTPP), RxLoThresVal); + +@@ -882,7 +1089,8 @@ + * or YUKON is used ((GMAC Tx FIFO is only 1 kB) + * we NEED Store & Forward of the RAM buffer. + */ +- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK || ++ if (pAC->GIni.GP[MAC_1].PPortUsage == SK_JUMBO_LINK || ++ pAC->GIni.GP[MAC_2].PPortUsage == SK_JUMBO_LINK || + pAC->GIni.GIYukon) { + /* enable Store & Forward Mode for the Tx Side */ + SK_OUT8(IoC, RB_ADDR(QuIoOffs, RB_CTRL), RB_ENA_STFWD); +@@ -911,8 +1119,8 @@ + * nothing + */ + static void SkGeInitRamBufs( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -920,8 +1128,8 @@ + + pPrt = &pAC->GIni.GP[Port]; + +- if (pPrt->PRxQSize == SK_MIN_RXQ_SIZE) { +- RxQType = SK_RX_SRAM_Q; /* small Rx Queue */ ++ if (pPrt->PRxQSize <= SK_MIN_RXQ_SIZE) { ++ RxQType = SK_RX_SRAM_Q; /* small Rx Queue */ + } + else { + RxQType = SK_RX_BRAM_Q; /* big Rx Queue */ +@@ -929,10 +1137,10 @@ + + DoInitRamQueue(pAC, IoC, pPrt->PRxQOff, pPrt->PRxQRamStart, + pPrt->PRxQRamEnd, RxQType); +- ++ + DoInitRamQueue(pAC, IoC, pPrt->PXsQOff, pPrt->PXsQRamStart, + pPrt->PXsQRamEnd, SK_TX_RAM_Q); +- ++ + DoInitRamQueue(pAC, IoC, pPrt->PXaQOff, pPrt->PXaQRamStart, + pPrt->PXaQRamEnd, SK_TX_RAM_Q); + +@@ -953,26 +1161,37 @@ + * nothing + */ + void SkGeInitRamIface( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { +- /* release local reset */ +- SK_OUT16(IoC, B3_RI_CTRL, RI_RST_CLR); ++ int i; ++ int RamBuffers; + +- /* configure timeout values */ +- SK_OUT8(IoC, B3_RI_WTO_R1, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_WTO_XA1, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_WTO_XS1, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_RTO_R1, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_RTO_XA1, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_RTO_XS1, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_WTO_R2, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_WTO_XA2, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_WTO_XS2, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_RTO_R2, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_RTO_XA2, SK_RI_TO_53); +- SK_OUT8(IoC, B3_RI_RTO_XS2, SK_RI_TO_53); ++ if (CHIP_ID_YUKON_2(pAC)) { ++ RamBuffers = pAC->GIni.GIMacsFound; ++ } ++ else { ++ RamBuffers = 1; ++ } ++ ++ for (i = 0; i < RamBuffers; i++) { ++ /* release local reset */ ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_CTRL), (SK_U8)RI_RST_CLR); + ++ /* configure timeout values */ ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_R1), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA1), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS1), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_R1), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA1), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS1), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_R2), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA2), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS2), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_R2), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA2), SK_RI_TO_53); ++ SK_OUT8(IoC, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS2), SK_RI_TO_53); ++ } + } /* SkGeInitRamIface */ + + +@@ -987,8 +1206,8 @@ + * nothing + */ + static void SkGeInitBmu( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -999,29 +1218,63 @@ + + RxWm = SK_BMU_RX_WM; + TxWm = SK_BMU_TX_WM; +- +- if (!pAC->GIni.GIPciSlot64 && !pAC->GIni.GIPciClock66) { +- /* for better performance */ +- RxWm /= 2; +- TxWm /= 2; +- } + +- /* Rx Queue: Release all local resets and set the watermark */ +- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_CLR_RESET); +- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_F), RxWm); ++ if (CHIP_ID_YUKON_2(pAC)) { + +- /* +- * Tx Queue: Release all local resets if the queue is used ! +- * set watermark +- */ +- if (pPrt->PXSQSize != 0) { +- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_CLR_RESET); +- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_F), TxWm); ++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) { ++ /* for better performance set it to 128 */ ++ RxWm = SK_BMU_RX_WM_PEX; ++ } ++ ++ /* Rx Queue: Release all local resets and set the watermark */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), BMU_CLR_RESET); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), BMU_OPER_INIT); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), BMU_FIFO_OP_ON); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_WM), RxWm); ++ ++ /* ++ * Tx Queue: Release all local resets if the queue is used ! ++ * set watermark ++ */ ++ if (pPrt->PXSQSize != 0 && HW_SYNC_TX_SUPPORTED(pAC)) { ++ /* Yukon-EC doesn't have a synchronous Tx queue */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), BMU_CLR_RESET); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), BMU_OPER_INIT); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), BMU_FIFO_OP_ON); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_WM), TxWm); ++ } ++ ++ if (pPrt->PXAQSize != 0) { ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), BMU_CLR_RESET); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), BMU_OPER_INIT); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), BMU_FIFO_OP_ON); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_WM), TxWm); ++ } + } +- +- if (pPrt->PXAQSize != 0) { +- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_CLR_RESET); +- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_F), TxWm); ++ else { ++ if (!pAC->GIni.GIPciSlot64 && !pAC->GIni.GIPciClock66) { ++ /* for better performance */ ++ RxWm /= 2; ++ TxWm /= 2; ++ } ++ ++ /* Rx Queue: Release all local resets and set the watermark */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_CLR_RESET); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_F), RxWm); ++ ++ /* ++ * Tx Queue: Release all local resets if the queue is used ! ++ * set watermark ++ */ ++ if (pPrt->PXSQSize != 0) { ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_CLR_RESET); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_F), TxWm); ++ } ++ ++ if (pPrt->PXAQSize != 0) { ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_CLR_RESET); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_F), TxWm); ++ } + } + /* + * Do NOT enable the descriptor poll timers here, because +@@ -1045,20 +1298,29 @@ + */ + static SK_U32 TestStopBit( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int QuIoOffs) /* Queue IO Address Offset */ ++SK_IOC IoC, /* I/O Context */ ++int QuIoOffs) /* Queue I/O Address Offset */ + { + SK_U32 QuCsr; /* CSR contents */ + + SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr); + +- if ((QuCsr & (CSR_STOP | CSR_SV_IDLE)) == 0) { +- /* Stop Descriptor overridden by start command */ +- SK_OUT32(IoC, Q_ADDR(QuIoOffs, Q_CSR), CSR_STOP); ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if ((QuCsr & (BMU_STOP | BMU_IDLE)) == 0) { ++ /* Stop Descriptor overridden by start command */ ++ SK_OUT32(IoC, Q_ADDR(QuIoOffs, Q_CSR), BMU_STOP); + +- SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr); ++ SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr); ++ } ++ } ++ else { ++ if ((QuCsr & (CSR_STOP | CSR_SV_IDLE)) == 0) { ++ /* Stop Descriptor overridden by start command */ ++ SK_OUT32(IoC, Q_ADDR(QuIoOffs, Q_CSR), CSR_STOP); ++ ++ SK_IN32(IoC, Q_ADDR(QuIoOffs, Q_CSR), &QuCsr); ++ } + } +- + return(QuCsr); + } /* TestStopBit */ + +@@ -1142,56 +1404,82 @@ + * SWITCH_PORT. + */ + void SkGeStopPort( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* I/O context */ +-int Port, /* port to stop (MAC_1 + n) */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port to stop (MAC_1 + n) */ + int Dir, /* Direction to Stop (SK_STOP_RX, SK_STOP_TX, SK_STOP_ALL) */ + int RstMode)/* Reset Mode (SK_SOFT_RST, SK_HARD_RST) */ + { +-#ifndef SK_DIAG +- SK_EVPARA Para; +-#endif /* !SK_DIAG */ + SK_GEPORT *pPrt; +- SK_U32 DWord; ++ SK_U32 RxCsr; + SK_U32 XsCsr; + SK_U32 XaCsr; + SK_U64 ToutStart; ++ SK_U32 CsrStart; ++ SK_U32 CsrStop; ++ SK_U32 CsrIdle; ++ SK_U32 CsrTest; ++ SK_U8 rsl; /* FIFO read shadow level */ ++ SK_U8 rl; /* FIFO read level */ + int i; + int ToutCnt; + + pPrt = &pAC->GIni.GP[Port]; + ++ /* set the proper values of Q_CSR register layout depending on the chip */ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ CsrStart = BMU_START; ++ CsrStop = BMU_STOP; ++ CsrIdle = BMU_IDLE; ++ CsrTest = BMU_IDLE; ++ } ++ else { ++ CsrStart = CSR_START; ++ CsrStop = CSR_STOP; ++ CsrIdle = CSR_SV_IDLE; ++ CsrTest = CSR_SV_IDLE | CSR_STOP; ++ } ++ + if ((Dir & SK_STOP_TX) != 0) { +- /* disable receiver and transmitter */ +- SkMacRxTxDisable(pAC, IoC, Port); +- ++ ++ if (!pAC->GIni.GIAsfEnabled) { ++ /* disable receiver and transmitter */ ++ SkMacRxTxDisable(pAC, IoC, Port); ++ } ++ + /* stop both transmit queues */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CsrStop); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CsrStop); + /* + * If the BMU is in the reset state CSR_STOP will terminate + * immediately. + */ +- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_STOP); +- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_STOP); + + ToutStart = SkOsGetTime(pAC); + ToutCnt = 0; + do { +- /* +- * Clear packet arbiter timeout to make sure +- * this loop will terminate. +- */ +- SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ? +- PA_CLR_TO_TX1 : PA_CLR_TO_TX2)); +- +- /* +- * If the transfer stucks at the MAC the STOP command will not +- * terminate if we don't flush the XMAC's transmit FIFO ! +- */ +- SkMacFlushTxFifo(pAC, IoC, Port); ++#ifdef GENESIS ++ if (pAC->GIni.GIGenesis) { ++ /* clear Tx packet arbiter timeout IRQ */ ++ SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ? ++ PA_CLR_TO_TX1 : PA_CLR_TO_TX2)); ++ /* ++ * If the transfer stucks at the XMAC the STOP command will not ++ * terminate if we don't flush the XMAC's transmit FIFO ! ++ */ ++ SkMacFlushTxFifo(pAC, IoC, Port); ++ } ++#endif /* GENESIS */ + +- XsCsr = TestStopBit(pAC, IoC, pPrt->PXsQOff); + XaCsr = TestStopBit(pAC, IoC, pPrt->PXaQOff); + ++ if (HW_SYNC_TX_SUPPORTED(pAC)) { ++ XsCsr = TestStopBit(pAC, IoC, pPrt->PXsQOff); ++ } ++ else { ++ XsCsr = XaCsr; ++ } ++ + if (SkOsGetTime(pAC) - ToutStart > (SK_TICKS_PER_SEC / 18)) { + /* + * Timeout of 1/18 second reached. +@@ -1199,67 +1487,111 @@ + */ + ToutCnt++; + if (ToutCnt > 1) { +- /* Might be a problem when the driver event handler +- * calls StopPort again. XXX. ++ /* ++ * If BMU stop doesn't terminate, we assume that ++ * we have a stable state and can reset the BMU, ++ * the Prefetch Unit, and RAM buffer now. + */ +- +- /* Fatal Error, Loop aborted */ +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E018, +- SKERR_HWI_E018MSG); +-#ifndef SK_DIAG +- Para.Para64 = Port; +- SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para); +-#endif /* !SK_DIAG */ +- return; ++ break; /* ===> leave do/while loop here */ + } + /* +- * Cache incoherency workaround: Assume a start command ++ * Cache incoherency workaround: assume a start command + * has been lost while sending the frame. + */ + ToutStart = SkOsGetTime(pAC); + +- if ((XsCsr & CSR_STOP) != 0) { +- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_START); ++ if ((XsCsr & CsrStop) != 0) { ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CsrStart); + } +- if ((XaCsr & CSR_STOP) != 0) { +- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_START); ++ ++ if ((XaCsr & CsrStop) != 0) { ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CsrStart); + } +- } + ++ /* ++ * After the previous operations the X(s|a)Csr does no ++ * longer contain the proper values ++ */ ++ XaCsr = TestStopBit(pAC, IoC, pPrt->PXaQOff); ++ ++ if (HW_SYNC_TX_SUPPORTED(pAC)) { ++ XsCsr = TestStopBit(pAC, IoC, pPrt->PXsQOff); ++ } ++ else { ++ XsCsr = XaCsr; ++ } ++ } + /* + * Because of the ASIC problem report entry from 21.08.1998 it is + * required to wait until CSR_STOP is reset and CSR_SV_IDLE is set. ++ * (valid for GENESIS only) + */ +- } while ((XsCsr & (CSR_STOP | CSR_SV_IDLE)) != CSR_SV_IDLE || +- (XaCsr & (CSR_STOP | CSR_SV_IDLE)) != CSR_SV_IDLE); ++ } while (((XsCsr & CsrTest) != CsrIdle || ++ (XaCsr & CsrTest) != CsrIdle)); ++ ++ if (pAC->GIni.GIAsfEnabled) { + +- /* Reset the MAC depending on the RstMode */ +- if (RstMode == SK_SOFT_RST) { +- SkMacSoftRst(pAC, IoC, Port); ++ pPrt->PState = (RstMode == SK_SOFT_RST) ? SK_PRT_STOP : ++ SK_PRT_RESET; + } + else { +- SkMacHardRst(pAC, IoC, Port); ++ /* Reset the MAC depending on the RstMode */ ++ if (RstMode == SK_SOFT_RST) { ++ ++ SkMacSoftRst(pAC, IoC, Port); ++ } ++ else { ++ if (HW_FEATURE(pAC, HWF_WA_DEV_472) && Port == MAC_1 && ++ pAC->GIni.GP[MAC_2].PState == SK_PRT_RUN) { ++ ++ pAC->GIni.GP[MAC_1].PState = SK_PRT_RESET; ++ ++ /* set GPHY Control reset */ ++ SK_OUT8(IoC, MR_ADDR(MAC_1, GPHY_CTRL), (SK_U8)GPC_RST_SET); ++ } ++ else { ++ ++ SkMacHardRst(pAC, IoC, Port); ++ } ++ } + } +- +- /* Disable Force Sync bit and Enable Alloc bit */ ++ ++ /* disable Force Sync bit and Enable Alloc bit */ + SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL), + TXA_DIS_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC); +- ++ + /* Stop Interval Timer and Limit Counter of Tx Arbiter */ + SK_OUT32(IoC, MR_ADDR(Port, TXA_ITI_INI), 0L); + SK_OUT32(IoC, MR_ADDR(Port, TXA_LIM_INI), 0L); + + /* Perform a local reset of the port's Tx path */ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* Reset the PCI FIFO of the async Tx queue */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), ++ BMU_RST_SET | BMU_FIFO_RST); ++ ++ /* Reset the PCI FIFO of the sync Tx queue */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), ++ BMU_RST_SET | BMU_FIFO_RST); ++ ++ /* Reset the Tx prefetch units */ ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(pPrt->PXaQOff, PREF_UNIT_CTRL_REG), ++ PREF_UNIT_RST_SET); ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(pPrt->PXsQOff, PREF_UNIT_CTRL_REG), ++ PREF_UNIT_RST_SET); ++ } ++ else { ++ /* Reset the PCI FIFO of the async Tx queue */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_SET_RESET); ++ /* Reset the PCI FIFO of the sync Tx queue */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_SET_RESET); ++ } + +- /* Reset the PCI FIFO of the async Tx queue */ +- SK_OUT32(IoC, Q_ADDR(pPrt->PXaQOff, Q_CSR), CSR_SET_RESET); +- /* Reset the PCI FIFO of the sync Tx queue */ +- SK_OUT32(IoC, Q_ADDR(pPrt->PXsQOff, Q_CSR), CSR_SET_RESET); + /* Reset the RAM Buffer async Tx queue */ + SK_OUT8(IoC, RB_ADDR(pPrt->PXaQOff, RB_CTRL), RB_RST_SET); + /* Reset the RAM Buffer sync Tx queue */ + SK_OUT8(IoC, RB_ADDR(pPrt->PXsQOff, RB_CTRL), RB_RST_SET); +- ++ + /* Reset Tx MAC FIFO */ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +@@ -1271,71 +1603,116 @@ + SkGeXmitLED(pAC, IoC, MR_ADDR(Port, TX_LED_INI), SK_LED_DIS); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- /* Reset TX MAC FIFO */ +- SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U8)GMF_RST_SET); ++ /* do the reset only if ASF is not enabled */ ++ if (!pAC->GIni.GIAsfEnabled) { ++ /* Reset Tx MAC FIFO */ ++ SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), (SK_U8)GMF_RST_SET); ++ } ++ ++ /* set Pause Off */ ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_PAUSE_OFF); + } + #endif /* YUKON */ + } + + if ((Dir & SK_STOP_RX) != 0) { +- /* +- * The RX Stop Command will not terminate if no buffers +- * are queued in the RxD ring. But it will always reach +- * the Idle state. Therefore we can use this feature to +- * stop the transfer of received packets. +- */ +- /* stop the port's receive queue */ +- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_STOP); +- +- i = 100; +- do { ++ ++ if (CHIP_ID_YUKON_2(pAC)) { + /* +- * Clear packet arbiter timeout to make sure +- * this loop will terminate ++ * The RX Stop command will not work for Yukon-2 if the BMU does not ++ * reach the end of packet and since we can't make sure that we have ++ * incoming data, we must reset the BMU while it is not during a DMA ++ * transfer. Since it is possible that the RX path is still active, ++ * the RX RAM buffer will be stopped first, so any possible incoming ++ * data will not trigger a DMA. After the RAM buffer is stopped, the ++ * BMU is polled until any DMA in progress is ended and only then it ++ * will be reset. + */ +- SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ? +- PA_CLR_TO_RX1 : PA_CLR_TO_RX2)); + +- DWord = TestStopBit(pAC, IoC, pPrt->PRxQOff); ++ /* disable the RAM Buffer receive queue */ ++ SK_OUT8(IoC, RB_ADDR(pPrt->PRxQOff, RB_CTRL), RB_DIS_OP_MD); + +- /* timeout if i==0 (bug fix for #10748) */ +- if (--i == 0) { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E024, +- SKERR_HWI_E024MSG); +- break; ++ i = 0xffff; ++ while (--i) { ++ SK_IN8(IoC, RB_ADDR(pPrt->PRxQOff, Q_RSL), &rsl); ++ SK_IN8(IoC, RB_ADDR(pPrt->PRxQOff, Q_RL), &rl); ++ ++ if (rsl == rl) { ++ break; ++ } + } ++ + /* +- * because of the ASIC problem report entry from 21.08.98 +- * it is required to wait until CSR_STOP is reset and +- * CSR_SV_IDLE is set. ++ * If the Rx side is blocked, the above loop cannot terminate. ++ * But, if there was any traffic it should be terminated, now. ++ * However, stop the Rx BMU and the Prefetch Unit ! + */ +- } while ((DWord & (CSR_STOP | CSR_SV_IDLE)) != CSR_SV_IDLE); ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), ++ BMU_RST_SET | BMU_FIFO_RST); ++ /* reset the Rx prefetch unit */ ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(pPrt->PRxQOff, PREF_UNIT_CTRL_REG), ++ PREF_UNIT_RST_SET); ++ } ++ else { ++ /* ++ * The RX Stop Command will not terminate if no buffers ++ * are queued in the RxD ring. But it will always reach ++ * the Idle state. Therefore we can use this feature to ++ * stop the transfer of received packets. ++ */ ++ /* stop the port's receive queue */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CsrStop); + +- /* The path data transfer activity is fully stopped now */ ++ i = 100; ++ do { ++#ifdef GENESIS ++ if (pAC->GIni.GIGenesis) { ++ /* clear Rx packet arbiter timeout IRQ */ ++ SK_OUT16(IoC, B3_PA_CTRL, (SK_U16)((Port == MAC_1) ? ++ PA_CLR_TO_RX1 : PA_CLR_TO_RX2)); ++ } ++#endif /* GENESIS */ ++ ++ RxCsr = TestStopBit(pAC, IoC, pPrt->PRxQOff); ++ ++ /* timeout if i==0 (bug fix for #10748) */ ++ if (--i == 0) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E024, ++ SKERR_HWI_E024MSG); ++ break; ++ } ++ /* ++ * Because of the ASIC problem report entry from 21.08.1998 it is ++ * required to wait until CSR_STOP is reset and CSR_SV_IDLE is set. ++ * (valid for GENESIS only) ++ */ ++ } while ((RxCsr & CsrTest) != CsrIdle); ++ /* The path data transfer activity is fully stopped now */ + +- /* Perform a local reset of the port's Rx path */ ++ /* Perform a local reset of the port's Rx path */ ++ /* Reset the PCI FIFO of the Rx queue */ ++ SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_SET_RESET); ++ } + +- /* Reset the PCI FIFO of the Rx queue */ +- SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), CSR_SET_RESET); + /* Reset the RAM Buffer receive queue */ + SK_OUT8(IoC, RB_ADDR(pPrt->PRxQOff, RB_CTRL), RB_RST_SET); + + /* Reset Rx MAC FIFO */ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + SK_OUT8(IoC, MR_ADDR(Port, RX_MFF_CTRL2), MFF_RST_SET); + + /* switch Rx LED off, stop the LED counter */ + SkGeXmitLED(pAC, IoC, MR_ADDR(Port, RX_LED_INI), SK_LED_DIS); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON +- if (pAC->GIni.GIYukon) { ++ if (pAC->GIni.GIYukon && !pAC->GIni.GIAsfEnabled) { + /* Reset Rx MAC FIFO */ + SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8)GMF_RST_SET); + } +@@ -1355,8 +1732,8 @@ + * nothing + */ + static void SkGeInit0( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { + int i; + SK_GEPORT *pPrt; +@@ -1365,6 +1742,7 @@ + pPrt = &pAC->GIni.GP[i]; + + pPrt->PState = SK_PRT_RESET; ++ pPrt->PPortUsage = SK_RED_LINK; + pPrt->PRxQOff = QOffTab[i].RxQOff; + pPrt->PXsQOff = QOffTab[i].XsQOff; + pPrt->PXaQOff = QOffTab[i].XaQOff; +@@ -1393,24 +1771,30 @@ + pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_UNKNOWN; + pPrt->PAutoNegFail = SK_FALSE; + pPrt->PHWLinkUp = SK_FALSE; +- pPrt->PLinkBroken = SK_TRUE; /* See WA code */ ++ pPrt->PLinkBroken = SK_TRUE; /* See WA code */ + pPrt->PPhyPowerState = PHY_PM_OPERATIONAL_MODE; + pPrt->PMacColThres = TX_COL_DEF; + pPrt->PMacJamLen = TX_JAM_LEN_DEF; + pPrt->PMacJamIpgVal = TX_JAM_IPG_DEF; + pPrt->PMacJamIpgData = TX_IPG_JAM_DEF; ++ pPrt->PMacBackOffLim = TX_BOF_LIM_DEF; ++ pPrt->PMacDataBlind = DATA_BLIND_DEF; + pPrt->PMacIpgData = IPG_DATA_DEF; + pPrt->PMacLimit4 = SK_FALSE; + } + +- pAC->GIni.GIPortUsage = SK_RED_LINK; + pAC->GIni.GILedBlinkCtrl = (SK_U16)OemConfig.Value; +- pAC->GIni.GIValIrqMask = IS_ALL_MSK; ++ pAC->GIni.GIChipCap = 0; ++ ++ for (i = 0; i < 4; i++) { ++ pAC->GIni.HwF.Features[i]= 0x00000000; ++ pAC->GIni.HwF.OnMask[i] = 0x00000000; ++ pAC->GIni.HwF.OffMask[i] = 0x00000000; ++ } + + } /* SkGeInit0*/ + + #ifdef SK_PCI_RESET +- + /****************************************************************************** + * + * SkGePciReset() - Reset PCI interface +@@ -1426,8 +1810,8 @@ + * 1: Power state could not be changed to 3. + */ + static int SkGePciReset( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { + int i; + SK_U16 PmCtlSts; +@@ -1450,7 +1834,7 @@ + /* We know the RAM Interface Arbiter is enabled. */ + SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, PCI_PM_STATE_D3); + SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts); +- ++ + if ((PmCtlSts & PCI_PM_STATE_MSK) != PCI_PM_STATE_D3) { + return(1); + } +@@ -1460,7 +1844,7 @@ + + /* Check for D0 state. */ + SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts); +- ++ + if ((PmCtlSts & PCI_PM_STATE_MSK) != PCI_PM_STATE_D0) { + return(1); + } +@@ -1469,11 +1853,24 @@ + SkPciReadCfgWord(pAC, PCI_COMMAND, &PciCmd); + SkPciReadCfgByte(pAC, PCI_CACHE_LSZ, &Cls); + SkPciReadCfgDWord(pAC, PCI_BASE_1ST, &Bp1); +- SkPciReadCfgDWord(pAC, PCI_BASE_2ND, &Bp2); ++ ++ /* ++ * Compute the location in PCI config space of BAR2 ++ * relativ to the location of BAR1 ++ */ ++ if ((Bp1 & PCI_MEM_TYP_MSK) == PCI_MEM64BIT) { ++ /* BAR1 is 64 bits wide */ ++ i = 8; ++ } ++ else { ++ i = 4; ++ } ++ ++ SkPciReadCfgDWord(pAC, PCI_BASE_1ST + i, &Bp2); + SkPciReadCfgByte(pAC, PCI_LAT_TIM, &Lat); +- +- if (PciCmd != 0 || Cls != (SK_U8)0 || Lat != (SK_U8)0 || +- (Bp1 & 0xfffffff0L) != 0 || Bp2 != 1) { ++ ++ if (PciCmd != 0 || Cls != 0 || (Bp1 & 0xfffffff0L) != 0 || Bp2 != 1 || ++ Lat != 0) { + return(1); + } + +@@ -1484,9 +1881,80 @@ + + return(0); + } /* SkGePciReset */ +- + #endif /* SK_PCI_RESET */ + ++ ++/****************************************************************************** ++ * ++ * SkGeSetUpSupFeatures() - Collect Feature List for HW_FEATURE Macro ++ * ++ * Description: ++ * This function collects the available features and required ++ * deviation services of the Adapter and provides these ++ * information in the GIHwF struct. This information is used as ++ * default value and may be overritten by the driver using the ++ * SET_HW_FEATURE_MASK() macro in its Init0 phase. ++ * ++ * Notice: ++ * Using the On and Off mask: Never switch on the same bit in both ++ * masks simultaneously. However, if doing the Off mask will win. ++ * ++ * Returns: ++ * nothing ++ */ ++static void SkGeSetUpSupFeatures( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ ++{ ++ int i; ++ ++ switch (pAC->GIni.GIChipId) { ++ case CHIP_ID_YUKON_EC: ++ if (pAC->GIni.GIChipRev == CHIP_REV_YU_EC_A1) { ++ /* A0/A1 */ ++ pAC->GIni.HwF.Features[HW_DEV_LIST] = ++ HWF_WA_DEV_42 | HWF_WA_DEV_46 | HWF_WA_DEV_43_418 | ++ HWF_WA_DEV_420 | HWF_WA_DEV_423 | ++ HWF_WA_DEV_424 | HWF_WA_DEV_425 | HWF_WA_DEV_427 | ++ HWF_WA_DEV_428 | HWF_WA_DEV_483 | HWF_WA_DEV_4109; ++ } ++ else { ++ /* A2/A3 */ ++ pAC->GIni.HwF.Features[HW_DEV_LIST] = ++ HWF_WA_DEV_424 | HWF_WA_DEV_425 | HWF_WA_DEV_427 | ++ HWF_WA_DEV_428 | HWF_WA_DEV_483 | HWF_WA_DEV_4109; ++ } ++ break; ++ case CHIP_ID_YUKON_FE: ++ pAC->GIni.HwF.Features[HW_DEV_LIST] = HWF_WA_DEV_427 | HWF_WA_DEV_4109; ++ break; ++ case CHIP_ID_YUKON_XL: ++ /* still needed for Diag */ ++ if (pAC->GIni.GIChipRev == 0) { ++ pAC->GIni.HwF.Features[HW_DEV_LIST] = ++ HWF_WA_DEV_427 | HWF_WA_DEV_463 | HWF_WA_DEV_472 | ++ HWF_WA_DEV_479 | HWF_WA_DEV_483 | HWF_WA_DEV_4115; ++ } ++ else if (pAC->GIni.GIChipRev == 1) { ++ pAC->GIni.HwF.Features[HW_DEV_LIST] = ++ HWF_WA_DEV_427 | HWF_WA_DEV_483 | HWF_WA_DEV_4109 | ++ HWF_WA_DEV_4115; ++ } ++ else { ++ pAC->GIni.HwF.Features[HW_DEV_LIST] = ++ HWF_WA_DEV_427 | HWF_WA_DEV_483 | HWF_WA_DEV_4109; ++ } ++ break; ++ } ++ ++ for (i = 0; i < 4; i++) { ++ pAC->GIni.HwF.Features[i] = ++ (pAC->GIni.HwF.Features[i] | pAC->GIni.HwF.OnMask[i]) & ++ ~pAC->GIni.HwF.OffMask[i]; ++ } ++} /* SkGeSetUpSupFeatures */ ++ ++ + /****************************************************************************** + * + * SkGeInit1() - Level 1 Initialization +@@ -1509,73 +1977,216 @@ + * 6: HW self test failed + */ + static int SkGeInit1( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { + SK_U8 Byte; + SK_U16 Word; +- SK_U16 CtrlStat; ++ SK_U32 CtrlStat; ++ SK_U32 VauxAvail; + SK_U32 DWord; ++ SK_U32 PowerDownBit; ++ SK_GEPORT *pPrt; + int RetVal; + int i; + + RetVal = 0; + +- /* save CLK_RUN bits (YUKON-Lite) */ +- SK_IN16(IoC, B0_CTST, &CtrlStat); ++ /* save CLK_RUN & ASF_ENABLE bits (YUKON-Lite, YUKON-EC) */ ++ SK_IN32(IoC, B0_CTST, &CtrlStat); + + #ifdef SK_PCI_RESET + (void)SkGePciReset(pAC, IoC); + #endif /* SK_PCI_RESET */ + +- /* do the SW-reset */ +- SK_OUT8(IoC, B0_CTST, CS_RST_SET); +- + /* release the SW-reset */ ++ /* Important: SW-reset has to be cleared here, to ensure ++ * the CHIP_ID can be read IO-mapped based, too - ++ * remember the RAP register can only be written if ++ * SW-reset is cleared. ++ */ + SK_OUT8(IoC, B0_CTST, CS_RST_CLR); + ++ /* read Chip Identification Number */ ++ SK_IN8(IoC, B2_CHIP_ID, &Byte); ++ pAC->GIni.GIChipId = Byte; ++ ++ pAC->GIni.GIAsfEnabled = SK_FALSE; ++ ++ /* ASF support only for Yukon-2 */ ++ if ((pAC->GIni.GIChipId >= CHIP_ID_YUKON_XL) && ++ (pAC->GIni.GIChipId <= CHIP_ID_YUKON_EC)) { ++#ifdef SK_ASF ++ if ((CtrlStat & Y2_ASF_ENABLE) != 0) { ++ /* do the SW-reset only if ASF is not enabled */ ++ pAC->GIni.GIAsfEnabled = SK_TRUE; ++ } ++#else /* !SK_ASF */ ++ ++ SK_IN8(IoC, B28_Y2_ASF_STAT_CMD, &Byte); ++ ++ pAC->GIni.GIAsfRunning = Byte & Y2_ASF_RUNNING; ++ ++ /* put ASF system in reset state */ ++ SK_OUT8(IoC, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET); ++ ++ /* disable ASF Unit */ ++ SK_OUT16(IoC, B0_CTST, Y2_ASF_DISABLE); ++#endif /* !SK_ASF */ ++ } ++ ++ if (!pAC->GIni.GIAsfEnabled) { ++ /* Yukon-2: required for Diag and Power Management */ ++ /* set the SW-reset */ ++ SK_OUT8(IoC, B0_CTST, CS_RST_SET); ++ ++ /* release the SW-reset */ ++ SK_OUT8(IoC, B0_CTST, CS_RST_CLR); ++ } ++ + /* reset all error bits in the PCI STATUS register */ + /* + * Note: PCI Cfg cycles cannot be used, because they are not + * available on some platforms after 'boot time'. + */ +- SK_IN16(IoC, PCI_C(PCI_STATUS), &Word); +- ++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word); ++ + SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); +- SK_OUT16(IoC, PCI_C(PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS)); +- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); ++ ++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), Word | (SK_U16)PCI_ERRBITS); + + /* release Master Reset */ + SK_OUT8(IoC, B0_CTST, CS_MRST_CLR); + + #ifdef CLK_RUN + CtrlStat |= CS_CLK_RUN_ENA; +-#endif /* CLK_RUN */ + + /* restore CLK_RUN bits */ + SK_OUT16(IoC, B0_CTST, (SK_U16)(CtrlStat & + (CS_CLK_RUN_HOT | CS_CLK_RUN_RST | CS_CLK_RUN_ENA))); ++#endif /* CLK_RUN */ ++ ++ if ((pAC->GIni.GIChipId >= CHIP_ID_YUKON_XL) && ++ (pAC->GIni.GIChipId <= CHIP_ID_YUKON_FE)) { ++ ++ pAC->GIni.GIYukon2 = SK_TRUE; ++ pAC->GIni.GIValIrqMask = Y2_IS_ALL_MSK; ++ pAC->GIni.GIValHwIrqMask = Y2_HWE_ALL_MSK; ++ ++ VauxAvail = Y2_VAUX_AVAIL; ++ ++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_STATUS), &DWord); ++ ++ if ((DWord & PCI_OS_PCI_X) != 0) { ++ /* this is a PCI / PCI-X bus */ ++ if ((DWord & PCI_OS_PCIX) != 0) { ++ /* this is a PCI-X bus */ ++ pAC->GIni.GIPciBus = SK_PCIX_BUS; ++ ++ /* PCI-X is always 64-bit wide */ ++ pAC->GIni.GIPciSlot64 = SK_TRUE; ++ ++ pAC->GIni.GIPciMode = (SK_U8)(PCI_OS_SPEED(DWord)); ++ } ++ else { ++ /* this is a conventional PCI bus */ ++ pAC->GIni.GIPciBus = SK_PCI_BUS; ++ ++ SK_IN16(IoC, PCI_C(pAC, PCI_OUR_REG_2), &Word); ++ ++ /* check if 64-bit width is used */ ++ pAC->GIni.GIPciSlot64 = (SK_BOOL) ++ (((DWord & PCI_OS_PCI64B) != 0) && ++ ((Word & PCI_USEDATA64) != 0)); ++ ++ /* check if 66 MHz PCI Clock is active */ ++ pAC->GIni.GIPciClock66 = (SK_BOOL)((DWord & PCI_OS_PCI66M) != 0); ++ } ++ } ++ else { ++ /* this is a PEX bus */ ++ pAC->GIni.GIPciBus = SK_PEX_BUS; ++ ++ /* clear any PEX errors */ ++ SK_OUT32(IoC, PCI_C(pAC, PEX_UNC_ERR_STAT), 0xffffffffUL); ++ ++ SK_IN16(IoC, PCI_C(pAC, PEX_LNK_STAT), &Word); ++ ++ pAC->GIni.GIPexWidth = (SK_U8)((Word & PEX_LS_LINK_WI_MSK) >> 4); ++ } ++ /* ++ * Yukon-2 chips family has a different way of providing ++ * the number of MACs available ++ */ ++ pAC->GIni.GIMacsFound = 1; ++ ++ SK_IN8(IoC, B2_Y2_HW_RES, &Byte); ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* ++ * OEM config value is overwritten and should not ++ * be used for Yukon-2 ++ */ ++ pAC->GIni.GILedBlinkCtrl |= SK_ACT_LED_BLINK; ++ ++ if (CFG_LED_MODE(Byte) == CFG_LED_DUAL_ACT_LNK) { ++ ++ pAC->GIni.GILedBlinkCtrl |= SK_DUAL_LED_ACT_LNK; ++ } ++ } ++ ++ if ((Byte & CFG_DUAL_MAC_MSK) == CFG_DUAL_MAC_MSK) { ++ ++ SK_IN8(IoC, B2_Y2_CLK_GATE, &Byte); ++ ++ if (!(Byte & Y2_STATUS_LNK2_INAC)) { ++ /* Link 2 activ */ ++ pAC->GIni.GIMacsFound++; ++ } ++ } ++ ++#ifdef VCPU ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ /* temporary WA for reported number of links */ ++ pAC->GIni.GIMacsFound = 2; ++ } ++#endif /* VCPU */ ++ ++ /* read Chip Revision */ ++ SK_IN8(IoC, B2_MAC_CFG, &Byte); ++ ++ pAC->GIni.GIChipCap = Byte & 0x0f; ++ } ++ else { ++ pAC->GIni.GIYukon2 = SK_FALSE; ++ pAC->GIni.GIValIrqMask = IS_ALL_MSK; ++ pAC->GIni.GIValHwIrqMask = 0; /* not activated */ ++ ++ VauxAvail = CS_VAUX_AVAIL; ++ ++ /* read number of MACs and Chip Revision */ ++ SK_IN8(IoC, B2_MAC_CFG, &Byte); ++ ++ pAC->GIni.GIMacsFound = (Byte & CFG_SNG_MAC) ? 1 : 2; ++ } + +- /* read Chip Identification Number */ +- SK_IN8(IoC, B2_CHIP_ID, &Byte); +- pAC->GIni.GIChipId = Byte; +- +- /* read number of MACs */ +- SK_IN8(IoC, B2_MAC_CFG, &Byte); +- pAC->GIni.GIMacsFound = (Byte & CFG_SNG_MAC) ? 1 : 2; +- + /* get Chip Revision Number */ + pAC->GIni.GIChipRev = (SK_U8)((Byte & CFG_CHIP_R_MSK) >> 4); + +- /* get diff. PCI parameters */ +- SK_IN16(IoC, B0_CTST, &CtrlStat); +- ++#ifndef SK_DIAG ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL && pAC->GIni.GIChipRev == 0) { ++ /* Yukon-2 Chip Rev. A0 */ ++ return(6); ++ } ++#endif /* !SK_DIAG */ ++ + /* read the adapters RAM size */ + SK_IN8(IoC, B2_E_0, &Byte); +- ++ + pAC->GIni.GIGenesis = SK_FALSE; + pAC->GIni.GIYukon = SK_FALSE; + pAC->GIni.GIYukonLite = SK_FALSE; ++ pAC->GIni.GIVauxAvail = SK_FALSE; + + #ifdef GENESIS + if (pAC->GIni.GIChipId == CHIP_ID_GENESIS) { +@@ -1591,57 +2202,77 @@ + pAC->GIni.GIRamSize = (int)Byte * 512; + pAC->GIni.GIRamOffs = 0; + } +- /* all GE adapters work with 53.125 MHz host clock */ ++ /* all GENESIS adapters work with 53.125 MHz host clock */ + pAC->GIni.GIHstClkFact = SK_FACT_53; +- ++ + /* set Descr. Poll Timer Init Value to 250 ms */ + pAC->GIni.GIPollTimerVal = + SK_DPOLL_DEF * (SK_U32)pAC->GIni.GIHstClkFact / 100; + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIChipId != CHIP_ID_GENESIS) { +- ++ + pAC->GIni.GIYukon = SK_TRUE; +- ++ + pAC->GIni.GIRamSize = (Byte == (SK_U8)0) ? 128 : (int)Byte * 4; +- ++ + pAC->GIni.GIRamOffs = 0; +- +- /* WA for chip Rev. A */ ++ ++ /* WA for Yukon chip Rev. A */ + pAC->GIni.GIWolOffs = (pAC->GIni.GIChipId == CHIP_ID_YUKON && + pAC->GIni.GIChipRev == 0) ? WOL_REG_OFFS : 0; +- ++ + /* get PM Capabilities of PCI config space */ +- SK_IN16(IoC, PCI_C(PCI_PM_CAP_REG), &Word); ++ SK_IN16(IoC, PCI_C(pAC, PCI_PM_CAP_REG), &Word); + + /* check if VAUX is available */ +- if (((CtrlStat & CS_VAUX_AVAIL) != 0) && ++ if (((CtrlStat & VauxAvail) != 0) && + /* check also if PME from D3cold is set */ + ((Word & PCI_PME_D3C_SUP) != 0)) { + /* set entry in GE init struct */ + pAC->GIni.GIVauxAvail = SK_TRUE; + } +- +- if (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) { +- /* this is Rev. A1 */ +- pAC->GIni.GIYukonLite = SK_TRUE; +- } +- else { +- /* save Flash-Address Register */ +- SK_IN32(IoC, B2_FAR, &DWord); + +- /* test Flash-Address Register */ +- SK_OUT8(IoC, B2_FAR + 3, 0xff); +- SK_IN8(IoC, B2_FAR + 3, &Byte); ++ if (!CHIP_ID_YUKON_2(pAC)) { + +- if (Byte != 0) { +- /* this is Rev. A0 */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_LITE) { ++ /* this is Rev. A1 */ + pAC->GIni.GIYukonLite = SK_TRUE; ++ } ++ else { ++ /* save Flash-Address Register */ ++ SK_IN32(IoC, B2_FAR, &DWord); + +- /* restore Flash-Address Register */ +- SK_OUT32(IoC, B2_FAR, DWord); ++ /* test Flash-Address Register */ ++ SK_OUT8(IoC, B2_FAR + 3, 0xff); ++ SK_IN8(IoC, B2_FAR + 3, &Byte); ++ ++ if (Byte != 0) { ++ /* this is Rev. A0 */ ++ pAC->GIni.GIYukonLite = SK_TRUE; ++ ++ /* restore Flash-Address Register */ ++ SK_OUT32(IoC, B2_FAR, DWord); ++ } ++ } ++ } ++ else { ++ /* Check for CLS = 0 (dev. #4.55) */ ++ if (pAC->GIni.GIPciBus != SK_PEX_BUS) { ++ /* PCI and PCI-X */ ++ SK_IN8(IoC, PCI_C(pAC, PCI_CACHE_LSZ), &Byte); ++ if (Byte == 0) { ++ /* set CLS to 2 if configured to 0 */ ++ SK_OUT8(IoC, PCI_C(pAC, PCI_CACHE_LSZ), 2); ++ } ++ if (pAC->GIni.GIPciBus == SK_PCIX_BUS) { ++ /* set Cache Line Size opt. */ ++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord); ++ DWord |= PCI_CLS_OPT; ++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord); ++ } + } + } + +@@ -1649,70 +2280,147 @@ + SK_OUT8(IoC, B0_POWER_CTRL, (SK_U8)(PC_VAUX_ENA | PC_VCC_ENA | + PC_VAUX_OFF | PC_VCC_ON)); + +- /* read the Interrupt source */ +- SK_IN32(IoC, B0_ISRC, &DWord); +- +- if ((DWord & IS_HW_ERR) != 0) { +- /* read the HW Error Interrupt source */ +- SK_IN32(IoC, B0_HWE_ISRC, &DWord); +- +- if ((DWord & IS_IRQ_SENSOR) != 0) { +- /* disable HW Error IRQ */ +- pAC->GIni.GIValIrqMask &= ~IS_HW_ERR; ++ Byte = 0; ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* PEX adapters work with different host clock */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) { ++ /* Yukon-EC works with 125 MHz host clock */ ++ pAC->GIni.GIHstClkFact = SK_FACT_125; ++ } ++ else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* Yukon-FE works with 100 MHz host clock */ ++ pAC->GIni.GIHstClkFact = SK_FACT_100; ++ } ++ else { /* CHIP_ID_YUKON_XL */ ++ /* all Yukon-2 adapters work with 156 MHz host clock */ ++ pAC->GIni.GIHstClkFact = 2 * SK_FACT_78; ++ ++ if (pAC->GIni.GIChipRev > 1) { ++ /* enable bits are inverted */ ++ Byte = (SK_U8)(Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS | ++ Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS | ++ Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS); ++ } + } ++ ++ pAC->GIni.GIPollTimerVal = ++ SK_DPOLL_DEF_Y2 * (SK_U32)pAC->GIni.GIHstClkFact / 100; ++ ++ /* set power down bit */ ++ PowerDownBit = PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD; ++ ++ /* disable Core Clock Division, set Clock Select to 0 (Yukon-2) */ ++ SK_OUT32(IoC, B2_Y2_CLK_CTRL, Y2_CLK_DIV_DIS); ++ ++ /* enable PCI & Core Clock, enable clock gating for both Links */ ++ SK_OUT8(IoC, B2_Y2_CLK_GATE, Byte); + } +- +- for (i = 0; i < pAC->GIni.GIMacsFound; i++) { +- /* set GMAC Link Control reset */ +- SK_OUT16(IoC, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_SET); ++ else { ++ /* YUKON adapters work with 78 MHz host clock */ ++ pAC->GIni.GIHstClkFact = SK_FACT_78; ++ ++ pAC->GIni.GIPollTimerVal = SK_DPOLL_MAX; /* 215 ms */ ++ ++ /* read the Interrupt source */ ++ SK_IN32(IoC, B0_ISRC, &DWord); ++ ++ if ((DWord & IS_HW_ERR) != 0) { ++ /* read the HW Error Interrupt source */ ++ SK_IN32(IoC, B0_HWE_ISRC, &DWord); ++ ++ if ((DWord & IS_IRQ_SENSOR) != 0) { ++ /* disable HW Error IRQ */ ++ pAC->GIni.GIValIrqMask &= ~IS_HW_ERR; ++ } ++ } ++ /* set power down bit */ ++ PowerDownBit = PCI_PHY_COMA; ++ } ++ ++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord); ++ ++ DWord &= ~PowerDownBit; ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL && pAC->GIni.GIChipRev > 1) { ++ /* deassert Low Power for 1st PHY */ ++ DWord |= PCI_Y2_PHY1_COMA; + +- /* clear GMAC Link Control reset */ +- SK_OUT16(IoC, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_CLR); ++ if (pAC->GIni.GIMacsFound > 1) { ++ /* deassert Low Power for 2nd PHY */ ++ DWord |= PCI_Y2_PHY2_COMA; ++ } ++ } ++ ++ /* Release PHY from PowerDown/COMA Mode */ ++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord); ++ ++ if (!pAC->GIni.GIAsfEnabled) { ++ ++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) { ++ /* set GMAC Link Control reset */ ++ SK_OUT8(IoC, MR_ADDR(i, GMAC_LINK_CTRL), (SK_U8)GMLC_RST_SET); ++ ++ /* clear GMAC Link Control reset */ ++ SK_OUT8(IoC, MR_ADDR(i, GMAC_LINK_CTRL), (SK_U8)GMLC_RST_CLR); ++ } + } +- /* all YU chips work with 78.125 MHz host clock */ +- pAC->GIni.GIHstClkFact = SK_FACT_78; +- +- pAC->GIni.GIPollTimerVal = SK_DPOLL_MAX; /* 215 ms */ + } + #endif /* YUKON */ + +- /* check if 64-bit PCI Slot is present */ +- pAC->GIni.GIPciSlot64 = (SK_BOOL)((CtrlStat & CS_BUS_SLOT_SZ) != 0); +- +- /* check if 66 MHz PCI Clock is active */ +- pAC->GIni.GIPciClock66 = (SK_BOOL)((CtrlStat & CS_BUS_CLOCK) != 0); ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ /* this is a conventional PCI bus */ ++ pAC->GIni.GIPciBus = SK_PCI_BUS; ++ ++ /* check if 64-bit PCI Slot is present */ ++ pAC->GIni.GIPciSlot64 = (SK_BOOL)((CtrlStat & CS_BUS_SLOT_SZ) != 0); ++ ++ /* check if 66 MHz PCI Clock is active */ ++ pAC->GIni.GIPciClock66 = (SK_BOOL)((CtrlStat & CS_BUS_CLOCK) != 0); ++ } + + /* read PCI HW Revision Id. */ +- SK_IN8(IoC, PCI_C(PCI_REV_ID), &Byte); ++ SK_IN8(IoC, PCI_C(pAC, PCI_REV_ID), &Byte); + pAC->GIni.GIPciHwRev = Byte; + ++ /* read connector type */ ++ SK_IN8(IoC, B2_CONN_TYP, &pAC->GIni.GIConTyp); ++ + /* read the PMD type */ + SK_IN8(IoC, B2_PMD_TYP, &Byte); +- pAC->GIni.GICopperType = (SK_U8)(Byte == 'T'); + +- /* read the PHY type */ ++ pAC->GIni.GIPmdTyp = Byte; ++ ++ pAC->GIni.GICopperType = (SK_BOOL)(Byte == 'T' || Byte == '1' || ++ (pAC->GIni.GIYukon2 && !(Byte == 'L' || Byte == 'S'))); ++ ++ /* read the PHY type (Yukon and Genesis) */ + SK_IN8(IoC, B2_E_1, &Byte); + + Byte &= 0x0f; /* the PHY type is stored in the lower nibble */ + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { +- ++ ++ pPrt = &pAC->GIni.GP[i]; ++ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { + switch (Byte) { + case SK_PHY_XMAC: +- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_XMAC; ++ pPrt->PhyAddr = PHY_ADDR_XMAC; + break; + case SK_PHY_BCOM: +- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_BCOM; +- pAC->GIni.GP[i].PMSCap = (SK_U8)(SK_MS_CAP_AUTO | ++ pPrt->PhyAddr = PHY_ADDR_BCOM; ++ pPrt->PMSCap = (SK_U8)(SK_MS_CAP_AUTO | + SK_MS_CAP_MASTER | SK_MS_CAP_SLAVE); + break; + #ifdef OTHER_PHY + case SK_PHY_LONE: +- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_LONE; ++ pPrt->PhyAddr = PHY_ADDR_LONE; + break; + case SK_PHY_NAT: +- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_NAT; ++ pPrt->PhyAddr = PHY_ADDR_NAT; + break; + #endif /* OTHER_PHY */ + default: +@@ -1722,65 +2430,98 @@ + } + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- +- if (Byte < (SK_U8)SK_PHY_MARV_COPPER) { ++ ++ if ((Byte < (SK_U8)SK_PHY_MARV_COPPER) && ++ pAC->GIni.GIPmdTyp != 'L' && pAC->GIni.GIPmdTyp != 'S') { + /* if this field is not initialized */ + Byte = (SK_U8)SK_PHY_MARV_COPPER; +- ++ + pAC->GIni.GICopperType = SK_TRUE; + } +- +- pAC->GIni.GP[i].PhyAddr = PHY_ADDR_MARV; +- ++ ++ pPrt->PhyAddr = PHY_ADDR_MARV; ++ + if (pAC->GIni.GICopperType) { + +- pAC->GIni.GP[i].PLinkSpeedCap = (SK_U8)(SK_LSPEED_CAP_AUTO | +- SK_LSPEED_CAP_10MBPS | SK_LSPEED_CAP_100MBPS | +- SK_LSPEED_CAP_1000MBPS); +- +- pAC->GIni.GP[i].PLinkSpeed = (SK_U8)SK_LSPEED_AUTO; +- +- pAC->GIni.GP[i].PMSCap = (SK_U8)(SK_MS_CAP_AUTO | ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE || ++ (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC && ++ pAC->GIni.GIChipCap == 2)) { ++ ++ pPrt->PLinkSpeedCap = (SK_U8)(SK_LSPEED_CAP_100MBPS | ++ SK_LSPEED_CAP_10MBPS); ++ ++ pAC->GIni.GIRamSize = 4; ++ } ++ else { ++ pPrt->PLinkSpeedCap = (SK_U8)(SK_LSPEED_CAP_1000MBPS | ++ SK_LSPEED_CAP_100MBPS | SK_LSPEED_CAP_10MBPS | ++ SK_LSPEED_CAP_AUTO); ++ } ++ ++ pPrt->PLinkSpeed = (SK_U8)SK_LSPEED_AUTO; ++ ++ pPrt->PMSCap = (SK_U8)(SK_MS_CAP_AUTO | + SK_MS_CAP_MASTER | SK_MS_CAP_SLAVE); + } + else { + Byte = (SK_U8)SK_PHY_MARV_FIBER; + } + } ++ ++ /* clear TWSI IRQ */ ++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); ++ + #endif /* YUKON */ +- +- pAC->GIni.GP[i].PhyType = (int)Byte; +- ++ ++ pPrt->PhyType = (int)Byte; ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, +- ("PHY type: %d PHY addr: %04x\n", Byte, +- pAC->GIni.GP[i].PhyAddr)); ++ ("PHY type: %d PHY addr: %04x\n", ++ Byte, pPrt->PhyAddr)); + } +- ++ + /* get MAC Type & set function pointers dependent on */ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + pAC->GIni.GIMacType = SK_MAC_XMAC; + + pAC->GIni.GIFunc.pFnMacUpdateStats = SkXmUpdateStats; + pAC->GIni.GIFunc.pFnMacStatistic = SkXmMacStatistic; + pAC->GIni.GIFunc.pFnMacResetCounter = SkXmResetCounter; + pAC->GIni.GIFunc.pFnMacOverflow = SkXmOverflowStatus; ++#ifdef SK_DIAG ++ pAC->GIni.GIFunc.pFnMacPhyRead = SkXmPhyRead; ++ pAC->GIni.GIFunc.pFnMacPhyWrite = SkXmPhyWrite; ++#else /* SK_DIAG */ ++ pAC->GIni.GIFunc.pSkGeSirqIsr = SkGeYuSirqIsr; ++#endif /* !SK_DIAG */ + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + pAC->GIni.GIMacType = SK_MAC_GMAC; + + pAC->GIni.GIFunc.pFnMacUpdateStats = SkGmUpdateStats; + pAC->GIni.GIFunc.pFnMacStatistic = SkGmMacStatistic; + pAC->GIni.GIFunc.pFnMacResetCounter = SkGmResetCounter; + pAC->GIni.GIFunc.pFnMacOverflow = SkGmOverflowStatus; ++#ifdef SK_DIAG ++ pAC->GIni.GIFunc.pFnMacPhyRead = SkGmPhyRead; ++ pAC->GIni.GIFunc.pFnMacPhyWrite = SkGmPhyWrite; ++#else /* SK_DIAG */ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ pAC->GIni.GIFunc.pSkGeSirqIsr = SkYuk2SirqIsr; ++ } ++ else { ++ pAC->GIni.GIFunc.pSkGeSirqIsr = SkGeYuSirqIsr; ++ } ++#endif /* !SK_DIAG */ + + #ifdef SPECIAL_HANDLING + if (pAC->GIni.GIChipId == CHIP_ID_YUKON) { +@@ -1793,7 +2534,9 @@ + #endif + } + #endif /* YUKON */ +- ++ ++ SkGeSetUpSupFeatures(pAC, IoC); ++ + return(RetVal); + } /* SkGeInit1 */ + +@@ -1814,9 +2557,12 @@ + * nothing + */ + static void SkGeInit2( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { ++#ifdef YUKON ++ SK_U16 Word; ++#endif /* YUKON */ + #ifdef GENESIS + SK_U32 DWord; + #endif /* GENESIS */ +@@ -1850,13 +2596,13 @@ + SkGeInitPktArb(pAC, IoC); + } + #endif /* GENESIS */ +- +-#ifdef YUKON ++ ++#ifdef xSK_DIAG + if (pAC->GIni.GIYukon) { + /* start Time Stamp Timer */ + SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8)GMT_ST_START); + } +-#endif /* YUKON */ ++#endif /* SK_DIAG */ + + /* enable the Tx Arbiters */ + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { +@@ -1866,8 +2612,34 @@ + /* enable the RAM Interface Arbiter */ + SkGeInitRamIface(pAC, IoC); + ++#ifdef YUKON ++ if (CHIP_ID_YUKON_2(pAC)) { ++ ++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) { ++ ++ SK_IN16(IoC, PCI_C(pAC, PEX_DEV_CTRL), &Word); ++ ++ /* change Max. Read Request Size to 2048 bytes */ ++ Word &= ~PEX_DC_MAX_RRS_MSK; ++ Word |= PEX_DC_MAX_RD_RQ_SIZE(4); ++ ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); ++ ++ SK_OUT16(IoC, PCI_C(pAC, PEX_DEV_CTRL), Word); ++ ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); ++ } ++ ++ /* ++ * Writing the HW Error Mask Reg. will not generate an IRQ ++ * as long as the B0_IMSK is not set by the driver. ++ */ ++ SK_OUT32(IoC, B0_HWE_IMSK, pAC->GIni.GIValHwIrqMask); ++ } ++#endif /* YUKON */ + } /* SkGeInit2 */ + ++ + /****************************************************************************** + * + * SkGeInit() - Initialize the GE Adapter with the specified level. +@@ -1889,7 +2661,7 @@ + * if Number of MACs > SK_MAX_MACS + * + * After returning from Level 0 the adapter +- * may be accessed with IO operations. ++ * may be accessed with I/O operations. + * + * Level 2: start the Blink Source Counter + * +@@ -1898,14 +2670,14 @@ + * 1: Number of MACs exceeds SK_MAX_MACS (after level 1) + * 2: Adapter not present or not accessible + * 3: Illegal initialization level +- * 4: Initialization Level 1 Call missing ++ * 4: Initialization level 1 call missing + * 5: Unexpected PHY type detected + * 6: HW self test failed + */ + int SkGeInit( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ +-int Level) /* initialization level */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int Level) /* Initialization Level */ + { + int RetVal; /* return value */ + SK_U32 DWord; +@@ -1920,7 +2692,7 @@ + SkGeInit0(pAC, IoC); + pAC->GIni.GILevel = SK_INIT_DATA; + break; +- ++ + case SK_INIT_IO: + /* Initialization Level 1 */ + RetVal = SkGeInit1(pAC, IoC); +@@ -1932,22 +2704,24 @@ + SK_OUT32(IoC, B2_IRQM_INI, SK_TEST_VAL); + SK_IN32(IoC, B2_IRQM_INI, &DWord); + SK_OUT32(IoC, B2_IRQM_INI, 0L); +- ++ + if (DWord != SK_TEST_VAL) { + RetVal = 2; + break; + } + ++#ifdef DEBUG + /* check if the number of GIMacsFound matches SK_MAX_MACS */ + if (pAC->GIni.GIMacsFound > SK_MAX_MACS) { + RetVal = 1; + break; + } ++#endif /* DEBUG */ + + /* Level 1 successfully passed */ + pAC->GIni.GILevel = SK_INIT_IO; + break; +- ++ + case SK_INIT_RUN: + /* Initialization Level 2 */ + if (pAC->GIni.GILevel != SK_INIT_IO) { +@@ -1957,12 +2731,13 @@ + RetVal = 4; + break; + } ++ + SkGeInit2(pAC, IoC); + + /* Level 2 successfully passed */ + pAC->GIni.GILevel = SK_INIT_RUN; + break; +- ++ + default: + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E003, SKERR_HWI_E003MSG); + RetVal = 3; +@@ -1985,77 +2760,79 @@ + * nothing + */ + void SkGeDeInit( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC) /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ + { + int i; + SK_U16 Word; + +-#ifdef SK_PHY_LP_MODE +- SK_U8 Byte; ++#ifdef SK_PHY_LP_MODE_DEEP_SLEEP + SK_U16 PmCtlSts; +-#endif /* SK_PHY_LP_MODE */ ++#endif + + #if (!defined(SK_SLIM) && !defined(VCPU)) + /* ensure I2C is ready */ + SkI2cWaitIrq(pAC, IoC); +-#endif +- +- /* stop all current transfer activity */ +- for (i = 0; i < pAC->GIni.GIMacsFound; i++) { +- if (pAC->GIni.GP[i].PState != SK_PRT_STOP && +- pAC->GIni.GP[i].PState != SK_PRT_RESET) { +- +- SkGeStopPort(pAC, IoC, i, SK_STOP_ALL, SK_HARD_RST); +- } +- } ++#endif + +-#ifdef SK_PHY_LP_MODE +- /* ++#ifdef SK_PHY_LP_MODE_DEEP_SLEEP ++ /* + * for power saving purposes within mobile environments +- * we set the PHY to coma mode and switch to D3 power state. ++ * we set the PHY to coma mode. + */ +- if (pAC->GIni.GIYukonLite && +- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) { ++#ifdef XXX ++ if (pAC->GIni.GIVauxAvail) { ++ /* switch power to VAUX */ ++ SK_OUT8(IoC, B0_POWER_CTRL, (SK_U8)(PC_VAUX_ENA | PC_VCC_ENA | ++ PC_VAUX_ON | PC_VCC_OFF)); ++ } ++#endif /* XXX */ ++ ++ if (CHIP_ID_YUKON_2(pAC) && /* pAC->GIni.GIMacsFound == 1 && */ ++ !pAC->GIni.GIAsfEnabled ++#ifdef XXX ++ || (pAC->GIni.GIYukonLite && pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) ++#endif /* XXX */ ++ ) { + + /* for all ports switch PHY to coma mode */ + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { +- +- SkGmEnterLowPowerMode(pAC, IoC, i, PHY_PM_DEEP_SLEEP); +- } + +- if (pAC->GIni.GIVauxAvail) { +- /* switch power to VAUX */ +- Byte = PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_ON | PC_VCC_OFF; +- +- SK_OUT8(IoC, B0_POWER_CTRL, Byte); ++ (void)SkGmEnterLowPowerMode(pAC, IoC, i, PHY_PM_DEEP_SLEEP); + } +- +- /* switch to D3 state */ +- SK_IN16(IoC, PCI_C(PCI_PM_CTL_STS), &PmCtlSts); +- +- PmCtlSts |= PCI_PM_STATE_D3; ++ } ++#else /* !SK_PHY_LP_MODE_DEEP_SLEEP */ + +- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); ++ if (!pAC->GIni.GIAsfEnabled) { ++ /* stop all current transfer activity */ ++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) { ++ if (pAC->GIni.GP[i].PState != SK_PRT_STOP && ++ pAC->GIni.GP[i].PState != SK_PRT_RESET) { + +- SK_OUT16(IoC, PCI_C(PCI_PM_CTL_STS), PmCtlSts); ++ SkGeStopPort(pAC, IoC, i, SK_STOP_ALL, SK_HARD_RST); ++ } ++ } + } +-#endif /* SK_PHY_LP_MODE */ + +- /* Reset all bits in the PCI STATUS register */ ++ /* reset all bits in the PCI STATUS register */ + /* + * Note: PCI Cfg cycles cannot be used, because they are not + * available on some platforms after 'boot time'. + */ +- SK_IN16(IoC, PCI_C(PCI_STATUS), &Word); +- ++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word); ++ + SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); +- SK_OUT16(IoC, PCI_C(PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS)); ++ ++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), Word | (SK_U16)PCI_ERRBITS); ++ + SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); + +- /* do the reset, all LEDs are switched off now */ +- SK_OUT8(IoC, B0_CTST, CS_RST_SET); +- ++ if (!pAC->GIni.GIAsfEnabled) { ++ /* set the SW-reset */ ++ SK_OUT8(IoC, B0_CTST, CS_RST_SET); ++ } ++#endif /* !SK_PHY_LP_MODE_DEEP_SLEEP */ ++ + pAC->GIni.GILevel = SK_INIT_DATA; + } /* SkGeDeInit */ + +@@ -2089,8 +2866,8 @@ + * 2: The port has to be stopped before it can be initialized again. + */ + int SkGeInitPort( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port to configure */ + { + SK_GEPORT *pPrt; +@@ -2101,8 +2878,8 @@ + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E004, SKERR_HWI_E004MSG); + return(1); + } +- +- if (pPrt->PState == SK_PRT_INIT || pPrt->PState == SK_PRT_RUN) { ++ ++ if (pPrt->PState >= SK_PRT_INIT) { + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E005, SKERR_HWI_E005MSG); + return(2); + } +@@ -2119,29 +2896,29 @@ + SkGeXmitLED(pAC, IoC, MR_ADDR(Port, TX_LED_INI), SK_LED_ENA); + SkGeXmitLED(pAC, IoC, MR_ADDR(Port, RX_LED_INI), SK_LED_ENA); + /* The Link LED is initialized by RLMT or Diagnostics itself */ +- ++ + SkXmInitMac(pAC, IoC, Port); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { + + SkGmInitMac(pAC, IoC, Port); + } + #endif /* YUKON */ +- ++ + /* do NOT initialize the Link Sync Counter */ + + SkGeInitMacFifo(pAC, IoC, Port); +- ++ + SkGeInitRamBufs(pAC, IoC, Port); +- ++ + if (pPrt->PXSQSize != 0) { + /* enable Force Sync bit if synchronous queue available */ + SK_OUT8(IoC, MR_ADDR(Port, TXA_CTRL), TXA_ENA_FSYNC); + } +- ++ + SkGeInitBmu(pAC, IoC, Port); + + /* mark port as initialized */ +@@ -2149,3 +2926,4 @@ + + return(0); + } /* SkGeInitPort */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skgemib.c linux-2.6.9.new/drivers/net/sk98lin/skgemib.c +--- linux-2.6.9.old/drivers/net/sk98lin/skgemib.c 2004-10-19 05:53:06.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skgemib.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgemib.c + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.11 $ +- * Date: $Date: 2003/09/15 13:38:12 $ ++ * Version: $Revision: 2.7 $ ++ * Date: $Date: 2004/10/26 12:42:18 $ + * Purpose: Private Network Management Interface Management Database + * + ****************************************************************************/ +@@ -251,6 +251,183 @@ + 0, + SK_PNMI_RW, DiagActions, 0}, + #endif /* SK_DIAG_SUPPORT */ ++#ifdef SK_ASF ++ {OID_SKGE_ASF, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_STORE_CONFIG, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_ENA, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_RETRANS, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_RETRANS_INT, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_HB_ENA, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_HB_INT, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_WD_ENA, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_WD_TIME, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_IP_SOURCE, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_MAC_SOURCE, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_IP_DEST, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_MAC_DEST, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_COMMUNITY_NAME, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_RSP_ENA, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_RETRANS_COUNT_MIN, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_RETRANS_COUNT_MAX, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_RETRANS_INT_MIN, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_RETRANS_INT_MAX, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_HB_INT_MIN, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_HB_INT_MAX, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_WD_TIME_MIN, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_WD_TIME_MAX, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_HB_CAP, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_WD_TIMER_RES, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_GUID, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_KEY_OP, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_KEY_ADM, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_KEY_GEN, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_CAP, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_PAR_1, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_OVERALL_OID, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RW, Asf, 0}, ++ {OID_SKGE_ASF_FWVER_OID, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RO, Asf, 0}, ++ {OID_SKGE_ASF_ACPI_OID, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RO, Asf, 0}, ++ {OID_SKGE_ASF_SMBUS_OID, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RO, Asf, 0}, ++#endif /* SK_ASF */ + {OID_SKGE_MDB_VERSION, + 1, + 0, +@@ -1073,6 +1250,11 @@ + 0, + 0, + SK_PNMI_RO, Vct, 0}, ++ {OID_SKGE_VCT_CAPABILITIES, ++ 0, ++ 0, ++ 0, ++ SK_PNMI_RO, Vct, 0}, + {OID_SKGE_BOARDLEVEL, + 0, + 0, +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skgepnmi.c linux-2.6.9.new/drivers/net/sk98lin/skgepnmi.c +--- linux-2.6.9.old/drivers/net/sk98lin/skgepnmi.c 2004-10-19 05:54:40.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skgepnmi.c 2006-12-07 14:35:03.000000000 +0800 +@@ -1,9 +1,9 @@ + /***************************************************************************** + * + * Name: skgepnmi.c +- * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.111 $ +- * Date: $Date: 2003/09/15 13:35:35 $ ++ * Project: Gigabit Ethernet Adapters, PNMI-Module ++ * Version: $Revision: 2.21 $ ++ * Date: $Date: 2005/05/11 11:50:12 $ + * Purpose: Private Network Management Interface + * + ****************************************************************************/ +@@ -22,11 +22,10 @@ + * + ******************************************************************************/ + +- +-#ifndef _lint ++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: skgepnmi.c,v 1.111 2003/09/15 13:35:35 tschilli Exp $ (C) Marvell."; +-#endif /* !_lint */ ++ "@(#) $Id: skgepnmi.c,v 2.21 2005/05/11 11:50:12 tschilli Exp $ (C) Marvell."; ++#endif + + #include "h/skdrv1st.h" + #include "h/sktypes.h" +@@ -38,12 +37,14 @@ + #include "h/skcsum.h" + #include "h/skvpd.h" + #include "h/skgehw.h" ++#include "h/sky2le.h" + #include "h/skgeinit.h" + #include "h/skdrv2nd.h" + #include "h/skgepnm2.h" + #ifdef SK_POWER_MGMT + #include "h/skgepmgt.h" +-#endif ++#endif /* SK_POWER_MGMT */ ++ + /* defines *******************************************************************/ + + #ifndef DEBUG +@@ -72,7 +73,6 @@ + int SkPnmiGenIoctl(SK_AC *pAC, SK_IOC IoC, void * pBuf, + unsigned int * pLen, SK_U32 NetIndex); + +- + /* + * Private Function prototypes + */ +@@ -112,6 +112,12 @@ + PNMI_STATIC int Vct(SK_AC *pAC, SK_IOC IoC, int Action, SK_U32 Id, char *pBuf, + unsigned int *pLen, SK_U32 Instance, unsigned int TableIndex, SK_U32 NetIndex); + PNMI_STATIC void CheckVctStatus(SK_AC *, SK_IOC, char *, SK_U32, SK_U32); ++PNMI_STATIC void VctGetResults(SK_AC *, SK_IOC, SK_U32); ++#ifdef SK_ASF ++PNMI_STATIC int Asf(SK_AC *pAC, SK_IOC IoC, int action, SK_U32 Id, ++ char *pBuf, unsigned int *pLen, SK_U32 Instance, ++ unsigned int TableIndex, SK_U32 NetIndex); ++#endif /* SK_ASF */ + + /* + * Table to correlate OID with handler function and index to +@@ -353,17 +359,13 @@ + * Always 0 + */ + int SkPnmiInit( +-SK_AC *pAC, /* Pointer to adapter context */ +-SK_IOC IoC, /* IO context handle */ +-int Level) /* Initialization level */ ++SK_AC *pAC, /* Pointer to adapter context */ ++SK_IOC IoC, /* IO context handle */ ++int Level) /* Initialization level */ + { + unsigned int PortMax; /* Number of ports */ + unsigned int PortIndex; /* Current port index in loop */ +- SK_U16 Val16; /* Multiple purpose 16 bit variable */ +- SK_U8 Val8; /* Mulitple purpose 8 bit variable */ +- SK_EVPARA EventParam; /* Event struct for timer event */ +- SK_PNMI_VCT *pVctBackupData; +- ++ SK_EVPARA EventParam; /* Event struct for timer event */ + + SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL, + ("PNMI: SkPnmiInit: Called, level=%d\n", Level)); +@@ -372,9 +374,11 @@ + + case SK_INIT_DATA: + SK_MEMSET((char *)&pAC->Pnmi, 0, sizeof(pAC->Pnmi)); ++ + pAC->Pnmi.TrapBufFree = SK_PNMI_TRAP_QUEUE_LEN; + pAC->Pnmi.StartUpTime = SK_PNMI_HUNDREDS_SEC(SkOsGetTime(pAC)); + pAC->Pnmi.RlmtChangeThreshold = SK_PNMI_DEF_RLMT_CHG_THRES; ++ + for (PortIndex = 0; PortIndex < SK_MAX_MACS; PortIndex ++) { + + pAC->Pnmi.Port[PortIndex].ActiveFlag = SK_FALSE; +@@ -408,51 +412,42 @@ + break; + + case SK_INIT_IO: +- /* +- * Reset MAC counters +- */ ++ ++ /* Reset MAC counters. */ + PortMax = pAC->GIni.GIMacsFound; + + for (PortIndex = 0; PortIndex < PortMax; PortIndex ++) { + + pAC->GIni.GIFunc.pFnMacResetCounter(pAC, IoC, PortIndex); + } +- ++ + /* Initialize DSP variables for Vct() to 0xff => Never written! */ + for (PortIndex = 0; PortIndex < PortMax; PortIndex ++) { + pAC->GIni.GP[PortIndex].PCableLen = 0xff; +- pVctBackupData = &pAC->Pnmi.VctBackup[PortIndex]; +- pVctBackupData->PCableLen = 0xff; ++ pAC->Pnmi.VctBackup[PortIndex].CableLen = 0xff; + } +- +- /* +- * Get pci bus speed +- */ +- SK_IN16(IoC, B0_CTST, &Val16); +- if ((Val16 & CS_BUS_CLOCK) == 0) { + +- pAC->Pnmi.PciBusSpeed = 33; ++ /* Get PCI bus speed. */ ++ if (pAC->GIni.GIPciClock66) { ++ ++ pAC->Pnmi.PciBusSpeed = 66; + } + else { +- pAC->Pnmi.PciBusSpeed = 66; ++ pAC->Pnmi.PciBusSpeed = 33; + } + +- /* +- * Get pci bus width +- */ +- SK_IN16(IoC, B0_CTST, &Val16); +- if ((Val16 & CS_BUS_SLOT_SZ) == 0) { ++ /* Get PCI bus width. */ ++ if (pAC->GIni.GIPciSlot64) { + +- pAC->Pnmi.PciBusWidth = 32; ++ pAC->Pnmi.PciBusWidth = 64; + } + else { +- pAC->Pnmi.PciBusWidth = 64; ++ pAC->Pnmi.PciBusWidth = 32; + } + +- /* +- * Get chipset +- */ ++ /* Get chipset. */ + switch (pAC->GIni.GIChipId) { ++ + case CHIP_ID_GENESIS: + pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_XMAC; + break; +@@ -460,58 +455,52 @@ + case CHIP_ID_YUKON: + pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON; + break; ++ ++ case CHIP_ID_YUKON_LITE: ++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_LITE; ++ break; ++ ++ case CHIP_ID_YUKON_LP: ++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_LP; ++ break; ++ ++ case CHIP_ID_YUKON_XL: ++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_XL; ++ break; ++ ++ case CHIP_ID_YUKON_EC: ++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_EC; ++ break; ++ ++ case CHIP_ID_YUKON_FE: ++ pAC->Pnmi.Chipset = SK_PNMI_CHIPSET_YUKON_FE; ++ break; + + default: + break; + } + +- /* +- * Get PMD and DeviceType +- */ +- SK_IN8(IoC, B2_PMD_TYP, &Val8); +- switch (Val8) { ++ /* Get PMD and Device Type. */ ++ switch (pAC->GIni.GIPmdTyp) { ++ + case 'S': + pAC->Pnmi.PMD = 3; +- if (pAC->GIni.GIMacsFound > 1) { +- +- pAC->Pnmi.DeviceType = 0x00020002; +- } +- else { +- pAC->Pnmi.DeviceType = 0x00020001; +- } ++ pAC->Pnmi.DeviceType = 0x00020001; + break; + + case 'L': + pAC->Pnmi.PMD = 2; +- if (pAC->GIni.GIMacsFound > 1) { +- +- pAC->Pnmi.DeviceType = 0x00020004; +- } +- else { +- pAC->Pnmi.DeviceType = 0x00020003; +- } ++ pAC->Pnmi.DeviceType = 0x00020003; + break; + + case 'C': + pAC->Pnmi.PMD = 4; +- if (pAC->GIni.GIMacsFound > 1) { +- +- pAC->Pnmi.DeviceType = 0x00020006; +- } +- else { +- pAC->Pnmi.DeviceType = 0x00020005; +- } ++ pAC->Pnmi.DeviceType = 0x00020005; + break; + + case 'T': + pAC->Pnmi.PMD = 5; +- if (pAC->GIni.GIMacsFound > 1) { +- +- pAC->Pnmi.DeviceType = 0x00020008; +- } +- else { +- pAC->Pnmi.DeviceType = 0x00020007; +- } ++ pAC->Pnmi.DeviceType = 0x00020007; + break; + + default : +@@ -520,11 +509,14 @@ + break; + } + +- /* +- * Get connector +- */ +- SK_IN8(IoC, B2_CONN_TYP, &Val8); +- switch (Val8) { ++ if (pAC->GIni.GIMacsFound > 1) { ++ ++ pAC->Pnmi.DeviceType++; ++ } ++ ++ /* Get connector type. */ ++ switch (pAC->GIni.GIConTyp) { ++ + case 'C': + pAC->Pnmi.Connector = 2; + break; +@@ -552,17 +544,17 @@ + break; + + case SK_INIT_RUN: +- /* +- * Start timer for RLMT change counter +- */ ++ ++ /* Start timer for RLMT change counter. */ + SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam)); ++ + SkTimerStart(pAC, IoC, &pAC->Pnmi.RlmtChangeEstimate.EstTimer, +- 28125000, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER, ++ SK_PNMI_EVT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER, + EventParam); + break; + + default: +- break; /* Nothing todo */ ++ break; /* Nothing to do. */ + } + + return (0); +@@ -642,7 +634,6 @@ + ("PNMI: SkPnmiPreSetVar: Called, Id=0x%x, BufLen=%d, Instance=%d, NetIndex=%d\n", + Id, *pLen, Instance, NetIndex)); + +- + return (PnmiVar(pAC, IoC, SK_PNMI_PRESET, Id, (char *)pBuf, pLen, + Instance, NetIndex)); + } +@@ -724,7 +715,6 @@ + unsigned int TmpLen; + char KeyArr[SK_PNMI_VPD_ENTRIES][SK_PNMI_VPD_KEY_SIZE]; + +- + SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL, + ("PNMI: SkPnmiGetStruct: Called, BufLen=%d, NetIndex=%d\n", + *pLen, NetIndex)); +@@ -733,22 +723,19 @@ + + if (*pLen >= SK_PNMI_MIN_STRUCT_SIZE) { + +- SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT, +- (SK_U32)(-1)); ++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT, (SK_U32)(-1)); + } + + *pLen = SK_PNMI_STRUCT_SIZE; + return (SK_PNMI_ERR_TOO_SHORT); + } + +- /* +- * Check NetIndex +- */ ++ /* Check NetIndex. */ + if (NetIndex >= pAC->Rlmt.NumNets) { + return (SK_PNMI_ERR_UNKNOWN_NET); + } + +- /* Update statistic */ ++ /* Update statistics. */ + SK_PNMI_CHECKFLAGS("SkPnmiGetStruct: On call"); + + if ((Ret = MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1)) != +@@ -773,15 +760,12 @@ + return (Ret); + } + +- /* +- * Increment semaphores to indicate that an update was +- * already done +- */ ++ /* Increment semaphores to indicate that an update was already done. */ + pAC->Pnmi.MacUpdatedFlag ++; + pAC->Pnmi.RlmtUpdatedFlag ++; + pAC->Pnmi.SirqUpdatedFlag ++; + +- /* Get vpd keys for instance calculation */ ++ /* Get VPD keys for instance calculation. */ + Ret = GetVpdKeyArr(pAC, IoC, &KeyArr[0][0], sizeof(KeyArr), &TmpLen); + if (Ret != SK_PNMI_ERR_OK) { + +@@ -795,13 +779,13 @@ + return (SK_PNMI_ERR_GENERAL); + } + +- /* Retrieve values */ ++ /* Retrieve values. */ + SK_MEMSET((char *)pBuf, 0, SK_PNMI_STRUCT_SIZE); ++ + for (TableIndex = 0; TableIndex < ID_TABLE_SIZE; TableIndex ++) { + + InstanceNo = IdTable[TableIndex].InstanceNo; +- for (InstanceCnt = 1; InstanceCnt <= InstanceNo; +- InstanceCnt ++) { ++ for (InstanceCnt = 1; InstanceCnt <= InstanceNo; InstanceCnt ++) { + + DstOffset = IdTable[TableIndex].Offset + + (InstanceCnt - 1) * +@@ -998,7 +982,6 @@ + unsigned int PhysPortIndex; + unsigned int MaxNetNumber; + int CounterIndex; +- int Ret; + SK_U16 MacStatus; + SK_U64 OverflowStatus; + SK_U64 Mask; +@@ -1012,12 +995,7 @@ + SK_U64 Delta; + SK_PNMI_ESTIMATE *pEst; + SK_U32 NetIndex; +- SK_GEPORT *pPrt; +- SK_PNMI_VCT *pVctBackupData; + SK_U32 RetCode; +- int i; +- SK_U32 CableLength; +- + + #ifdef DEBUG + if (Event != SK_PNMI_EVT_XMAC_RESET) { +@@ -1048,9 +1026,7 @@ + #endif /* DEBUG */ + OverflowStatus = 0; + +- /* +- * Check which source caused an overflow interrupt. +- */ ++ /* Check which source caused an overflow interrupt. */ + if ((pAC->GIni.GIFunc.pFnMacOverflow(pAC, IoC, PhysPortIndex, + MacStatus, &OverflowStatus) != 0) || + (OverflowStatus == 0)) { +@@ -1068,7 +1044,6 @@ + + Mask = (SK_U64)1 << CounterIndex; + if ((OverflowStatus & Mask) == 0) { +- + continue; + } + +@@ -1100,9 +1075,7 @@ + case SK_PNMI_HRX_IRLENGTH: + case SK_PNMI_HRX_RESERVED: + +- /* +- * the following counters aren't be handled (id > 63) +- */ ++ /* The following counters aren't be handled (id > 63). */ + case SK_PNMI_HTX_SYNC: + case SK_PNMI_HTX_SYNC_OCTET: + break; +@@ -1189,7 +1162,7 @@ + if ((unsigned int)Param.Para64 >= (unsigned int)pAC->I2c.MaxSens) { + + SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_CTRL, +- ("PNMI: ERR: SkPnmiEvent: SK_PNMI_EVT_SEN_ERR_UPP parameter wrong, SensorIndex=%d\n", ++ ("PNMI: ERR: SK_PNMI_EVT_SEN_ERR_UPP parameter wrong, SensorIndex=%d\n", + (unsigned int)Param.Para64)); + return (0); + } +@@ -1208,16 +1181,14 @@ + case SK_PNMI_EVT_CHG_EST_TIMER: + /* + * Calculate port switch average on a per hour basis +- * Time interval for check : 28125 ms ++ * Time interval for check : 28125 ms (SK_PNMI_EVT_TIMER_CHECK) + * Number of values for average : 8 + * + * Be careful in changing these values, on change check + * - typedef of SK_PNMI_ESTIMATE (Size of EstValue + * array one less than value number) + * - Timer initialization SkTimerStart() in SkPnmiInit +- * - Delta value below must be multiplicated with +- * power of 2 +- * ++ * - Delta value below must be multiplicated with power of 2 + */ + pEst = &pAC->Pnmi.RlmtChangeEstimate; + CounterIndex = pEst->EstValueIndex + 1; +@@ -1240,7 +1211,7 @@ + Delta = NewestValue - OldestValue; + } + else { +- /* Overflow situation */ ++ /* Overflow situation. */ + Delta = (SK_U64)(0 - OldestValue) + NewestValue; + } + +@@ -1266,8 +1237,9 @@ + } + + SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam)); ++ + SkTimerStart(pAC, IoC, &pAC->Pnmi.RlmtChangeEstimate.EstTimer, +- 28125000, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER, ++ SK_PNMI_EVT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_CHG_EST_TIMER, + EventParam); + break; + +@@ -1311,29 +1283,25 @@ + (unsigned int)Param.Para64)); + return (0); + } +-#endif ++#endif /* DEBUG */ ++ + PhysPortIndex = (unsigned int)Param.Para64; + +- /* +- * Update XMAC statistic to get fresh values +- */ +- Ret = MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1); +- if (Ret != SK_PNMI_ERR_OK) { ++ /* Update XMAC statistic to get fresh values. */ ++ if (MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1) != ++ SK_PNMI_ERR_OK) { + + SK_PNMI_CHECKFLAGS("SkPnmiEvent: On return"); + return (0); + } +- /* +- * Increment semaphore to indicate that an update was +- * already done +- */ ++ ++ /* Increment semaphore to indicate that an update was already done. */ + pAC->Pnmi.MacUpdatedFlag ++; + + for (CounterIndex = 0; CounterIndex < SK_PNMI_MAX_IDX; + CounterIndex ++) { + + if (!StatAddr[CounterIndex][MacType].GetOffset) { +- + continue; + } + +@@ -1366,14 +1334,15 @@ + QueueRlmtPortTrap(pAC, OID_SKGE_TRAP_RLMT_PORT_UP, PhysPortIndex); + (void)SK_DRIVER_SENDEVENT(pAC, IoC); + +- /* Bugfix for XMAC errata (#10620)*/ ++ /* Bugfix for XMAC errata (#10620). */ + if (MacType == SK_MAC_XMAC) { +- /* Add incremental difference to offset (#10620)*/ ++ /* Add incremental difference to offset (#10620). */ + (void)pAC->GIni.GIFunc.pFnMacStatistic(pAC, IoC, PhysPortIndex, + XM_RXE_SHT_ERR, &Val32); + + Value = (((SK_U64)pAC->Pnmi.Port[PhysPortIndex]. + CounterHigh[SK_PNMI_HRX_SHORTS] << 32) | (SK_U64)Val32); ++ + pAC->Pnmi.Port[PhysPortIndex].CounterOffset[SK_PNMI_HRX_SHORTS] += + Value - pAC->Pnmi.Port[PhysPortIndex].RxShortZeroMark; + } +@@ -1403,7 +1372,7 @@ + QueueRlmtPortTrap(pAC, OID_SKGE_TRAP_RLMT_PORT_DOWN, PhysPortIndex); + (void)SK_DRIVER_SENDEVENT(pAC, IoC); + +- /* Bugfix #10620 - get zero level for incremental difference */ ++ /* Bugfix #10620 - get zero level for incremental difference. */ + if (MacType == SK_MAC_XMAC) { + + (void)pAC->GIni.GIFunc.pFnMacStatistic(pAC, IoC, PhysPortIndex, +@@ -1435,17 +1404,13 @@ + } + #endif /* DEBUG */ + +- /* +- * For now, ignore event if NetIndex != 0. +- */ ++ /* For now, ignore event if NetIndex != 0. */ + if (Param.Para32[1] != 0) { + + return (0); + } + +- /* +- * Nothing to do if port is already inactive +- */ ++ /* Nothing to do if port is already inactive. */ + if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { + + return (0); +@@ -1476,7 +1441,6 @@ + CounterIndex ++) { + + if (!StatAddr[CounterIndex][MacType].GetOffset) { +- + continue; + } + +@@ -1485,9 +1449,7 @@ + pAC->Pnmi.VirtualCounterOffset[CounterIndex] += Value; + } + +- /* +- * Set port to inactive +- */ ++ /* Set port to inactive. */ + pAC->Pnmi.Port[PhysPortIndex].ActiveFlag = SK_FALSE; + + pAC->Pnmi.MacUpdatedFlag --; +@@ -1513,25 +1475,19 @@ + } + #endif /* DEBUG */ + +- /* +- * For now, ignore event if NetIndex != 0. +- */ ++ /* For now, ignore event if NetIndex != 0. */ + if (Param.Para32[1] != 0) { + + return (0); + } + +- /* +- * Nothing to do if port is already active +- */ ++ /* Nothing to do if port is already inactive. */ + if (pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { + + return (0); + } + +- /* +- * Statistic maintenance +- */ ++ /* Statistic maintenance. */ + pAC->Pnmi.RlmtChangeCts ++; + pAC->Pnmi.RlmtChangeTime = SK_PNMI_HUNDREDS_SEC(SkOsGetTime(pAC)); + +@@ -1565,7 +1521,6 @@ + CounterIndex ++) { + + if (!StatAddr[CounterIndex][MacType].GetOffset) { +- + continue; + } + +@@ -1574,16 +1529,14 @@ + pAC->Pnmi.VirtualCounterOffset[CounterIndex] -= Value; + } + +- /* Set port to active */ ++ /* Set port to active. */ + pAC->Pnmi.Port[PhysPortIndex].ActiveFlag = SK_TRUE; + + pAC->Pnmi.MacUpdatedFlag --; + break; + + case SK_PNMI_EVT_RLMT_SEGMENTATION: +- /* +- * Para.Para32[0] contains the NetIndex. +- */ ++ /* Para.Para32[0] contains the NetIndex. */ + + /* + * Store a trap message in the trap buffer and generate an event for +@@ -1598,71 +1551,53 @@ + * Param.Para32[0] contains the number of Nets. + * Param.Para32[1] is reserved, contains -1. + */ +- /* +- * Check number of nets +- */ ++ /* Check number of nets. */ + MaxNetNumber = pAC->GIni.GIMacsFound; +- if (((unsigned int)Param.Para32[0] < 1) +- || ((unsigned int)Param.Para32[0] > MaxNetNumber)) { ++ ++ if (((unsigned int)Param.Para32[0] < 1) || ++ ((unsigned int)Param.Para32[0] > MaxNetNumber)) { ++ + return (SK_PNMI_ERR_UNKNOWN_NET); + } + +- if ((unsigned int)Param.Para32[0] == 1) { /* single net mode */ ++ if ((unsigned int)Param.Para32[0] == 1) { /* SingleNet mode. */ + pAC->Pnmi.DualNetActiveFlag = SK_FALSE; + } +- else { /* dual net mode */ ++ else { /* DualNet mode. */ + pAC->Pnmi.DualNetActiveFlag = SK_TRUE; + } + break; + + case SK_PNMI_EVT_VCT_RESET: + PhysPortIndex = Param.Para32[0]; +- pPrt = &pAC->GIni.GP[PhysPortIndex]; +- pVctBackupData = &pAC->Pnmi.VctBackup[PhysPortIndex]; + + if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_PENDING) { ++ + RetCode = SkGmCableDiagStatus(pAC, IoC, PhysPortIndex, SK_FALSE); ++ + if (RetCode == 2) { + /* + * VCT test is still running. + * Start VCT timer counter again. + */ +- SK_MEMSET((char *) &Param, 0, sizeof(Param)); ++ SK_MEMSET((char *)&Param, 0, sizeof(Param)); ++ + Param.Para32[0] = PhysPortIndex; + Param.Para32[1] = -1; +- SkTimerStart(pAC, IoC, +- &pAC->Pnmi.VctTimeout[PhysPortIndex].VctTimer, +- 4000000, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Param); ++ ++ SkTimerStart(pAC, IoC, &pAC->Pnmi.VctTimeout[PhysPortIndex], ++ SK_PNMI_VCT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Param); ++ + break; + } +- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_PENDING; +- pAC->Pnmi.VctStatus[PhysPortIndex] |= +- (SK_PNMI_VCT_NEW_VCT_DATA | SK_PNMI_VCT_TEST_DONE); + +- /* Copy results for later use to PNMI struct. */ +- for (i = 0; i < 4; i++) { +- if (pPrt->PMdiPairSts[i] == SK_PNMI_VCT_NORMAL_CABLE) { +- if ((pPrt->PMdiPairLen[i] > 35) && +- (pPrt->PMdiPairLen[i] < 0xff)) { +- pPrt->PMdiPairSts[i] = SK_PNMI_VCT_IMPEDANCE_MISMATCH; +- } +- } +- if ((pPrt->PMdiPairLen[i] > 35) && +- (pPrt->PMdiPairLen[i] != 0xff)) { +- CableLength = 1000 * +- (((175 * pPrt->PMdiPairLen[i]) / 210) - 28); +- } +- else { +- CableLength = 0; +- } +- pVctBackupData->PMdiPairLen[i] = CableLength; +- pVctBackupData->PMdiPairSts[i] = pPrt->PMdiPairSts[i]; +- } ++ VctGetResults(pAC, IoC, PhysPortIndex); + +- Param.Para32[0] = PhysPortIndex; +- Param.Para32[1] = -1; +- SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, Param); +- SkEventDispatcher(pAC, IoC); ++ EventParam.Para32[0] = PhysPortIndex; ++ EventParam.Para32[1] = -1; ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, EventParam); ++ ++ /* SkEventDispatcher(pAC, IoC); */ + } + + break; +@@ -1710,14 +1645,13 @@ + unsigned int TableIndex; + int Ret; + +- + if ((TableIndex = LookupId(Id)) == (unsigned int)(-1)) { + + *pLen = 0; + return (SK_PNMI_ERR_UNKNOWN_OID); + } + +- /* Check NetIndex */ ++ /* Check NetIndex. */ + if (NetIndex >= pAC->Rlmt.NumNets) { + return (SK_PNMI_ERR_UNKNOWN_NET); + } +@@ -1767,22 +1701,20 @@ + SK_U32 Instance; + SK_U32 Id; + +- +- /* Check if the passed buffer has the right size */ ++ /* Check if the passed buffer has the right size. */ + if (*pLen < SK_PNMI_STRUCT_SIZE) { + +- /* Check if we can return the error within the buffer */ ++ /* Check if we can return the error within the buffer. */ + if (*pLen >= SK_PNMI_MIN_STRUCT_SIZE) { + +- SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT, +- (SK_U32)(-1)); ++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_TOO_SHORT, (SK_U32)(-1)); + } + + *pLen = SK_PNMI_STRUCT_SIZE; + return (SK_PNMI_ERR_TOO_SHORT); + } + +- /* Check NetIndex */ ++ /* Check NetIndex. */ + if (NetIndex >= pAC->Rlmt.NumNets) { + return (SK_PNMI_ERR_UNKNOWN_NET); + } +@@ -1810,12 +1742,11 @@ + pAC->Pnmi.RlmtUpdatedFlag ++; + pAC->Pnmi.SirqUpdatedFlag ++; + +- /* Preset/Set values */ ++ /* PRESET/SET values. */ + for (TableIndex = 0; TableIndex < ID_TABLE_SIZE; TableIndex ++) { + + if ((IdTable[TableIndex].Access != SK_PNMI_RW) && + (IdTable[TableIndex].Access != SK_PNMI_WO)) { +- + continue; + } + +@@ -1826,8 +1757,7 @@ + InstanceCnt ++) { + + DstOffset = IdTable[TableIndex].Offset + +- (InstanceCnt - 1) * +- IdTable[TableIndex].StructSize; ++ (InstanceCnt - 1) * IdTable[TableIndex].StructSize; + + /* + * Because VPD multiple instance variables are +@@ -1837,9 +1767,7 @@ + */ + Instance = (SK_U32)InstanceCnt; + +- /* +- * Evaluate needed buffer length +- */ ++ /* Evaluate needed buffer length. */ + Len = 0; + Ret = IdTable[TableIndex].Func(pAC, IoC, + SK_PNMI_GET, IdTable[TableIndex].Id, +@@ -1855,8 +1783,7 @@ + pAC->Pnmi.SirqUpdatedFlag --; + + SK_PNMI_CHECKFLAGS("PnmiStruct: On return"); +- SK_PNMI_SET_STAT(pBuf, +- SK_PNMI_ERR_GENERAL, DstOffset); ++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_GENERAL, DstOffset); + *pLen = SK_PNMI_MIN_STRUCT_SIZE; + return (SK_PNMI_ERR_GENERAL); + } +@@ -1878,7 +1805,7 @@ + } + } + +- /* Call the OID handler function */ ++ /* Call the OID handler function. */ + Ret = IdTable[TableIndex].Func(pAC, IoC, Action, + IdTable[TableIndex].Id, pBuf + DstOffset, + &Len, Instance, TableIndex, NetIndex); +@@ -1889,8 +1816,7 @@ + pAC->Pnmi.SirqUpdatedFlag --; + + SK_PNMI_CHECKFLAGS("PnmiStruct: On return"); +- SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_BAD_VALUE, +- DstOffset); ++ SK_PNMI_SET_STAT(pBuf, SK_PNMI_ERR_BAD_VALUE, DstOffset); + *pLen = SK_PNMI_MIN_STRUCT_SIZE; + return (SK_PNMI_ERR_BAD_VALUE); + } +@@ -1924,7 +1850,7 @@ + + if (IdTable[i].Id == Id) { + +- return i; ++ return (i); + } + } + +@@ -1965,16 +1891,13 @@ + { + if (Id != OID_SKGE_ALL_DATA) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR003, +- SK_PNMI_ERR003MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR003, SK_PNMI_ERR003MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + } + +- /* +- * Check instance. We only handle single instance variables +- */ ++ /* Check instance. We only handle single instance variables. */ + if (Instance != (SK_U32)(-1) && Instance != 1) { + + *pLen = 0; +@@ -2033,10 +1956,7 @@ + int Ret; + SK_U32 ActionOp; + +- +- /* +- * Check instance. We only handle single instance variables +- */ ++ /* Check instance. We only handle single instance variables. */ + if (Instance != (SK_U32)(-1) && Instance != 1) { + + *pLen = 0; +@@ -2049,10 +1969,10 @@ + return (SK_PNMI_ERR_TOO_SHORT); + } + +- /* Check if a get should be performed */ ++ /* Check if a GET should be performed. */ + if (Action == SK_PNMI_GET) { + +- /* A get is easy. We always return the same value */ ++ /* A GET is easy. We always return the same value. */ + ActionOp = (SK_U32)SK_PNMI_ACT_IDLE; + SK_PNMI_STORE_U32(pBuf, ActionOp); + *pLen = sizeof(SK_U32); +@@ -2060,13 +1980,13 @@ + return (SK_PNMI_ERR_OK); + } + +- /* Continue with PRESET/SET action */ ++ /* Continue with PRESET/SET action. */ + if (*pLen > sizeof(SK_U32)) { + + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* Check if the command is a known one */ ++ /* Check if the command is a known one. */ + SK_PNMI_READ_U32(pBuf, ActionOp); + if (*pLen > sizeof(SK_U32) || + (ActionOp != SK_PNMI_ACT_IDLE && +@@ -2078,7 +1998,7 @@ + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* A preset ends here */ ++ /* A PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + return (SK_PNMI_ERR_OK); +@@ -2087,19 +2007,15 @@ + switch (ActionOp) { + + case SK_PNMI_ACT_IDLE: +- /* Nothing to do */ ++ /* Nothing to do. */ + break; + + case SK_PNMI_ACT_RESET: +- /* +- * Perform a driver reset or something that comes near +- * to this. +- */ ++ /* Perform a driver reset or something that comes near to this. */ + Ret = SK_DRIVER_RESET(pAC, IoC); + if (Ret != 0) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR005, +- SK_PNMI_ERR005MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR005, SK_PNMI_ERR005MSG); + + return (SK_PNMI_ERR_GENERAL); + } +@@ -2116,13 +2032,12 @@ + break; + + case SK_PNMI_ACT_RESETCNT: +- /* Set all counters and timestamps to zero */ ++ /* Set all counters and timestamps to zero. */ + ResetCounter(pAC, IoC, NetIndex); + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR006, +- SK_PNMI_ERR006MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR006, SK_PNMI_ERR006MSG); + + return (SK_PNMI_ERR_GENERAL); + } +@@ -2166,25 +2081,21 @@ + SK_U32 StatVal32; + SK_BOOL Is64BitReq = SK_FALSE; + +- /* +- * Only the active Mac is returned +- */ ++ /* Only the active MAC is returned. */ + if (Instance != (SK_U32)(-1) && Instance != 1) { + + *pLen = 0; + return (SK_PNMI_ERR_UNKNOWN_INST); + } + +- /* +- * Check action type +- */ ++ /* Check action type. */ + if (Action != SK_PNMI_GET) { + + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* Check length */ ++ /* Check length. */ + switch (Id) { + + case OID_802_3_PERMANENT_ADDRESS: +@@ -2205,12 +2116,12 @@ + + #else /* SK_NDIS_64BIT_CTR */ + +- /* for compatibility, at least 32bit are required for OID */ ++ /* For compatibility, at least 32 bits are required for OID. */ + if (*pLen < sizeof(SK_U32)) { + /* +- * but indicate handling for 64bit values, +- * if insufficient space is provided +- */ ++ * Indicate handling for 64 bit values, ++ * if insufficient space is provided. ++ */ + *pLen = sizeof(SK_U64); + return (SK_PNMI_ERR_TOO_SHORT); + } +@@ -2226,16 +2137,14 @@ + * to indicate that an update was already done. + */ + Ret = MacUpdate(pAC, IoC, 0, pAC->GIni.GIMacsFound - 1); +- if ( Ret != SK_PNMI_ERR_OK) { ++ if (Ret != SK_PNMI_ERR_OK) { + + *pLen = 0; + return (Ret); + } + pAC->Pnmi.MacUpdatedFlag ++; + +- /* +- * Get value (MAC Index 0 identifies the virtual MAC) +- */ ++ /* Get value (MAC index 0 identifies the virtual MAC). */ + switch (Id) { + + case OID_802_3_PERMANENT_ADDRESS: +@@ -2251,7 +2160,7 @@ + default: + StatVal = GetStatVal(pAC, IoC, 0, IdTable[TableIndex].Param, NetIndex); + +- /* by default 32bit values are evaluated */ ++ /* By default 32 bit values are evaluated. */ + if (!Is64BitReq) { + StatVal32 = (SK_U32)StatVal; + SK_PNMI_STORE_U32(pBuf, StatVal32); +@@ -2305,21 +2214,19 @@ + int MacType; + int Ret; + SK_U64 StatVal; +- +- + +- /* Calculate instance if wished. MAC index 0 is the virtual MAC */ ++ /* Calculate instance if wished. MAC index 0 is the virtual MAC. */ + PhysPortMax = pAC->GIni.GIMacsFound; + LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax); + + MacType = pAC->GIni.GIMacType; + +- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */ ++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */ + LogPortMax--; + } + +- if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried */ +- /* Check instance range */ ++ if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried. */ ++ /* Check instance range. */ + if ((Instance < 1) || (Instance > LogPortMax)) { + + *pLen = 0; +@@ -2329,20 +2236,20 @@ + Limit = LogPortIndex + 1; + } + +- else { /* Instance == (SK_U32)(-1), get all Instances of that OID */ ++ else { /* Instance == (SK_U32)(-1), get all Instances of that OID. */ + + LogPortIndex = 0; + Limit = LogPortMax; + } + +- /* Check action */ ++ /* Check action. */ + if (Action != SK_PNMI_GET) { + + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* Check length */ ++ /* Check length. */ + if (*pLen < (Limit - LogPortIndex) * sizeof(SK_U64)) { + + *pLen = (Limit - LogPortIndex) * sizeof(SK_U64); +@@ -2361,7 +2268,7 @@ + } + pAC->Pnmi.MacUpdatedFlag ++; + +- /* Get value */ ++ /* Get value. */ + Offset = 0; + for (; LogPortIndex < Limit; LogPortIndex ++) { + +@@ -2467,19 +2374,16 @@ + unsigned int Limit; + unsigned int Offset = 0; + +- /* +- * Calculate instance if wished. MAC index 0 is the virtual +- * MAC. +- */ ++ /* Calculate instance if wished. MAC index 0 is the virtual MAC. */ + PhysPortMax = pAC->GIni.GIMacsFound; + LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax); + +- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */ ++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */ + LogPortMax--; + } + +- if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried */ +- /* Check instance range */ ++ if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried. */ ++ /* Check instance range. */ + if ((Instance < 1) || (Instance > LogPortMax)) { + + *pLen = 0; +@@ -2488,27 +2392,23 @@ + LogPortIndex = SK_PNMI_PORT_INST2LOG(Instance); + Limit = LogPortIndex + 1; + } +- else { /* Instance == (SK_U32)(-1), get all Instances of that OID */ ++ else { /* Instance == (SK_U32)(-1), get all Instances of that OID. */ + + LogPortIndex = 0; + Limit = LogPortMax; + } + +- /* +- * Perform Action +- */ ++ /* Perform action. */ + if (Action == SK_PNMI_GET) { + +- /* Check length */ ++ /* Check length. */ + if (*pLen < (Limit - LogPortIndex) * 6) { + + *pLen = (Limit - LogPortIndex) * 6; + return (SK_PNMI_ERR_TOO_SHORT); + } + +- /* +- * Get value +- */ ++ /* Get value. */ + for (; LogPortIndex < Limit; LogPortIndex ++) { + + switch (Id) { +@@ -2532,8 +2432,7 @@ + &pAC->Addr.Net[NetIndex].PermanentMacAddress); + } + else { +- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( +- pAC, LogPortIndex); ++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); + + CopyMac(pBuf + Offset, + &pAC->Addr.Port[PhysPortIndex].PermanentMacAddress); +@@ -2542,8 +2441,7 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR008, +- SK_PNMI_ERR008MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR008, SK_PNMI_ERR008MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -2554,8 +2452,8 @@ + } + else { + /* +- * The logical MAC address may not be changed only +- * the physical ones ++ * The logical MAC address may not be changed, ++ * only the physical ones. + */ + if (Id == OID_SKGE_PHYS_FAC_ADDR) { + +@@ -2563,19 +2461,16 @@ + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* +- * Only the current address may be changed +- */ ++ /* Only the current address may be changed. */ + if (Id != OID_SKGE_PHYS_CUR_ADDR) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR009, +- SK_PNMI_ERR009MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR009, SK_PNMI_ERR009MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + } + +- /* Check length */ ++ /* Check length. */ + if (*pLen < (Limit - LogPortIndex) * 6) { + + *pLen = (Limit - LogPortIndex) * 6; +@@ -2587,32 +2482,26 @@ + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* +- * Check Action +- */ ++ /* Check action. */ + if (Action == SK_PNMI_PRESET) { + + *pLen = 0; + return (SK_PNMI_ERR_OK); + } + +- /* +- * Set OID_SKGE_MAC_CUR_ADDR +- */ ++ /* Set OID_SKGE_MAC_CUR_ADDR. */ + for (; LogPortIndex < Limit; LogPortIndex ++, Offset += 6) { + + /* + * A set to virtual port and set of broadcast +- * address will be ignored ++ * address will be ignored. + */ + if (LogPortIndex == 0 || SK_MEMCMP(pBuf + Offset, + "\xff\xff\xff\xff\xff\xff", 6) == 0) { +- + continue; + } + +- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, +- LogPortIndex); ++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); + + Ret = SkAddrOverride(pAC, IoC, PhysPortIndex, + (SK_MAC_ADDR *)(pBuf + Offset), +@@ -2665,10 +2554,7 @@ + unsigned int Offset = 0; + SK_U64 StatVal; + +- +- /* +- * Calculate instance if wished +- */ ++ /* Calculate instance if wished. */ + if (Instance != (SK_U32)(-1)) { + + if ((Instance < 1) || (Instance > SKCS_NUM_PROTOCOLS)) { +@@ -2684,25 +2570,21 @@ + Limit = SKCS_NUM_PROTOCOLS; + } + +- /* +- * Check action +- */ ++ /* Check action. */ + if (Action != SK_PNMI_GET) { + + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* Check length */ ++ /* Check length. */ + if (*pLen < (Limit - Index) * sizeof(SK_U64)) { + + *pLen = (Limit - Index) * sizeof(SK_U64); + return (SK_PNMI_ERR_TOO_SHORT); + } + +- /* +- * Get value +- */ ++ /* Get value. */ + for (; Index < Limit; Index ++) { + + switch (Id) { +@@ -2728,8 +2610,7 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR010, +- SK_PNMI_ERR010MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR010, SK_PNMI_ERR010MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -2739,9 +2620,7 @@ + Offset += sizeof(SK_U64); + } + +- /* +- * Store used buffer space +- */ ++ /* Store used buffer space. */ + *pLen = Offset; + + return (SK_PNMI_ERR_OK); +@@ -2784,10 +2663,7 @@ + SK_U32 Val32; + SK_U64 Val64; + +- +- /* +- * Calculate instance if wished +- */ ++ /* Calculate instance if wished. */ + if ((Instance != (SK_U32)(-1))) { + + if ((Instance < 1) || (Instance > (SK_U32)pAC->I2c.MaxSens)) { +@@ -2804,16 +2680,14 @@ + Limit = (unsigned int) pAC->I2c.MaxSens; + } + +- /* +- * Check action +- */ ++ /* Check action. */ + if (Action != SK_PNMI_GET) { + + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* Check length */ ++ /* Check length. */ + switch (Id) { + + case OID_SKGE_SENSOR_VALUE: +@@ -2872,38 +2746,33 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR012, +- SK_PNMI_ERR012MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR012, SK_PNMI_ERR012MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + + } + +- /* +- * Get value +- */ ++ /* Get value. */ + for (Offset = 0; Index < Limit; Index ++) { + + switch (Id) { + + case OID_SKGE_SENSOR_INDEX: + *(pBuf + Offset) = (char)Index; +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_SENSOR_DESCR: + Len = SK_STRLEN(pAC->I2c.SenTable[Index].SenDesc); +- SK_MEMCPY(pBuf + Offset + 1, +- pAC->I2c.SenTable[Index].SenDesc, Len); ++ SK_MEMCPY(pBuf + Offset + 1, pAC->I2c.SenTable[Index].SenDesc, Len); + *(pBuf + Offset) = (char)Len; + Offset += Len + 1; + break; + + case OID_SKGE_SENSOR_TYPE: +- *(pBuf + Offset) = +- (char)pAC->I2c.SenTable[Index].SenType; +- Offset += sizeof(char); ++ *(pBuf + Offset) = (char)pAC->I2c.SenTable[Index].SenType; ++ Offset ++; + break; + + case OID_SKGE_SENSOR_VALUE: +@@ -2940,9 +2809,8 @@ + break; + + case OID_SKGE_SENSOR_STATUS: +- *(pBuf + Offset) = +- (char)pAC->I2c.SenTable[Index].SenErrFlag; +- Offset += sizeof(char); ++ *(pBuf + Offset) = (char)pAC->I2c.SenTable[Index].SenErrFlag; ++ Offset ++; + break; + + case OID_SKGE_SENSOR_WAR_CTS: +@@ -2979,9 +2847,7 @@ + } + } + +- /* +- * Store used buffer space +- */ ++ /* Store used buffer space. */ + *pLen = Offset; + + return (SK_PNMI_ERR_OK); +@@ -3035,9 +2901,7 @@ + int Ret; + SK_U32 Val32; + +- /* +- * Get array of all currently stored VPD keys +- */ ++ /* Get array of all currently stored VPD keys. */ + Ret = GetVpdKeyArr(pAC, IoC, &KeyArr[0][0], sizeof(KeyArr), &KeyNo); + if (Ret != SK_PNMI_ERR_OK) { + *pLen = 0; +@@ -3082,21 +2946,19 @@ + } + } + +- /* +- * Get value, if a query should be performed +- */ ++ /* Get value, if a query should be performed. */ + if (Action == SK_PNMI_GET) { + + switch (Id) { + + case OID_SKGE_VPD_FREE_BYTES: +- /* Check length of buffer */ ++ /* Check length of buffer. */ + if (*pLen < sizeof(SK_U32)) { + + *pLen = sizeof(SK_U32); + return (SK_PNMI_ERR_TOO_SHORT); + } +- /* Get number of free bytes */ ++ /* Get number of free bytes. */ + pVpdStatus = VpdStat(pAC, IoC); + if (pVpdStatus == NULL) { + +@@ -3121,7 +2983,7 @@ + break; + + case OID_SKGE_VPD_ENTRIES_LIST: +- /* Check length */ ++ /* Check length. */ + for (Len = 0, Index = 0; Index < KeyNo; Index ++) { + + Len += SK_STRLEN(KeyArr[Index]) + 1; +@@ -3132,7 +2994,7 @@ + return (SK_PNMI_ERR_TOO_SHORT); + } + +- /* Get value */ ++ /* Get value. */ + *(pBuf) = (char)Len - 1; + for (Offset = 1, Index = 0; Index < KeyNo; Index ++) { + +@@ -3151,7 +3013,7 @@ + break; + + case OID_SKGE_VPD_ENTRIES_NUMBER: +- /* Check length */ ++ /* Check length. */ + if (*pLen < sizeof(SK_U32)) { + + *pLen = sizeof(SK_U32); +@@ -3164,7 +3026,7 @@ + break; + + case OID_SKGE_VPD_KEY: +- /* Check buffer length, if it is large enough */ ++ /* Check buffer length, if it is large enough. */ + for (Len = 0, Index = FirstIndex; + Index < LastIndex; Index ++) { + +@@ -3180,31 +3042,27 @@ + * Get the key to an intermediate buffer, because + * we have to prepend a length byte. + */ +- for (Offset = 0, Index = FirstIndex; +- Index < LastIndex; Index ++) { ++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) { + + Len = SK_STRLEN(KeyArr[Index]); + + *(pBuf + Offset) = (char)Len; +- SK_MEMCPY(pBuf + Offset + 1, KeyArr[Index], +- Len); ++ SK_MEMCPY(pBuf + Offset + 1, KeyArr[Index], Len); + Offset += Len + 1; + } + *pLen = Offset; + break; + + case OID_SKGE_VPD_VALUE: +- /* Check the buffer length if it is large enough */ +- for (Offset = 0, Index = FirstIndex; +- Index < LastIndex; Index ++) { ++ /* Check the buffer length if it is large enough. */ ++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) { + + BufLen = 256; + if (VpdRead(pAC, IoC, KeyArr[Index], Buf, + (int *)&BufLen) > 0 || + BufLen >= SK_PNMI_VPD_DATALEN) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, +- SK_PNMI_ERR021, ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR021, + SK_PNMI_ERR021MSG); + + return (SK_PNMI_ERR_GENERAL); +@@ -3221,16 +3079,14 @@ + * Get the value to an intermediate buffer, because + * we have to prepend a length byte. + */ +- for (Offset = 0, Index = FirstIndex; +- Index < LastIndex; Index ++) { ++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) { + + BufLen = 256; + if (VpdRead(pAC, IoC, KeyArr[Index], Buf, + (int *)&BufLen) > 0 || + BufLen >= SK_PNMI_VPD_DATALEN) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, +- SK_PNMI_ERR022, ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR022, + SK_PNMI_ERR022MSG); + + *pLen = 0; +@@ -3251,8 +3107,7 @@ + return (SK_PNMI_ERR_TOO_SHORT); + } + +- for (Offset = 0, Index = FirstIndex; +- Index < LastIndex; Index ++) { ++ for (Offset = 0, Index = FirstIndex; Index < LastIndex; Index ++) { + + if (VpdMayWrite(KeyArr[Index])) { + +@@ -3278,15 +3133,14 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR023, +- SK_PNMI_ERR023MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR023, SK_PNMI_ERR023MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + } + } + else { +- /* The only OID which can be set is VPD_ACTION */ ++ /* The only OID which can be set is VPD_ACTION. */ + if (Id != OID_SKGE_VPD_ACTION) { + + if (Id == OID_SKGE_VPD_FREE_BYTES || +@@ -3300,8 +3154,7 @@ + return (SK_PNMI_ERR_READ_ONLY); + } + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR024, +- SK_PNMI_ERR024MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR024, SK_PNMI_ERR024MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3317,14 +3170,11 @@ + return (SK_PNMI_ERR_TOO_SHORT); + } + +- /* +- * The first byte contains the VPD action type we should +- * perform. +- */ ++ /* The first byte contains the VPD action type we should perform. */ + switch (*pBuf) { + + case SK_PNMI_VPD_IGNORE: +- /* Nothing to do */ ++ /* Nothing to do. */ + break; + + case SK_PNMI_VPD_CREATE: +@@ -3356,13 +3206,13 @@ + SK_MEMCPY(Buf, pBuf + 4, Offset); + Buf[Offset] = 0; + +- /* A preset ends here */ ++ /* A PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + return (SK_PNMI_ERR_OK); + } + +- /* Write the new entry or modify an existing one */ ++ /* Write the new entry or modify an existing one .*/ + Ret = VpdWrite(pAC, IoC, KeyStr, Buf); + if (Ret == SK_PNMI_VPD_NOWRITE ) { + +@@ -3394,7 +3244,7 @@ + break; + + case SK_PNMI_VPD_DELETE: +- /* Check if the buffer size is plausible */ ++ /* Check if the buffer size is plausible. */ + if (*pLen < 3) { + + *pLen = 3; +@@ -3409,7 +3259,7 @@ + KeyStr[1] = pBuf[2]; + KeyStr[2] = 0; + +- /* Find the passed key in the array */ ++ /* Find the passed key in the array. */ + for (Index = 0; Index < KeyNo; Index ++) { + + if (SK_STRCMP(KeyStr, KeyArr[Index]) == 0) { +@@ -3417,6 +3267,7 @@ + break; + } + } ++ + /* + * If we cannot find the key it is wrong, so we + * return an appropriate error value. +@@ -3432,7 +3283,7 @@ + return (SK_PNMI_ERR_OK); + } + +- /* Ok, you wanted it and you will get it */ ++ /* Ok, you wanted it and you will get it. */ + Ret = VpdDelete(pAC, IoC, KeyStr); + if (Ret != SK_PNMI_VPD_OK) { + +@@ -3505,23 +3356,21 @@ + SK_U32 Val32; + SK_U64 Val64; + SK_U64 Val64RxHwErrs = 0; ++ SK_U64 Val64RxRunt = 0; ++ SK_U64 Val64RxFcs = 0; + SK_U64 Val64TxHwErrs = 0; + SK_BOOL Is64BitReq = SK_FALSE; + char Buf[256]; + int MacType; + +- /* +- * Check instance. We only handle single instance variables. +- */ ++ /* Check instance. We only handle single instance variables. */ + if (Instance != (SK_U32)(-1) && Instance != 1) { + + *pLen = 0; + return (SK_PNMI_ERR_UNKNOWN_INST); + } + +- /* +- * Check action. We only allow get requests. +- */ ++ /* Check action. We only allow get requests. */ + if (Action != SK_PNMI_GET) { + + *pLen = 0; +@@ -3530,9 +3379,7 @@ + + MacType = pAC->GIni.GIMacType; + +- /* +- * Check length for the various supported OIDs +- */ ++ /* Check length for the various supported OIDs. */ + switch (Id) { + + case OID_GEN_XMIT_ERROR: +@@ -3546,14 +3393,12 @@ + + #else /* SK_NDIS_64BIT_CTR */ + +- /* +- * for compatibility, at least 32bit are required for oid +- */ ++ /* For compatibility, at least 32bit are required for OID. */ + if (*pLen < sizeof(SK_U32)) { + /* +- * but indicate handling for 64bit values, +- * if insufficient space is provided +- */ ++ * Indicate handling for 64bit values, ++ * if insufficient space is provided. ++ */ + *pLen = sizeof(SK_U64); + return (SK_PNMI_ERR_TOO_SHORT); + } +@@ -3624,11 +3469,11 @@ + break; + + default: +- /* Checked later */ ++ /* Checked later. */ + break; + } + +- /* Update statistic */ ++ /* Update statistics. */ + if (Id == OID_SKGE_RX_HW_ERROR_CTS || + Id == OID_SKGE_TX_HW_ERROR_CTS || + Id == OID_SKGE_IN_ERRORS_CTS || +@@ -3636,7 +3481,8 @@ + Id == OID_GEN_XMIT_ERROR || + Id == OID_GEN_RCV_ERROR) { + +- /* Force the XMAC to update its statistic counters and ++ /* ++ * Force the XMAC to update its statistic counters and + * Increment semaphore to indicate that an update was + * already done. + */ +@@ -3667,11 +3513,26 @@ + GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_IRLENGTH, NetIndex) + + GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_SYMBOL, NetIndex) + + GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_SHORTS, NetIndex) + +- GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_RUNT, NetIndex) + + GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_TOO_LONG, NetIndex) + +- GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_FCS, NetIndex) + + GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_CEXT, NetIndex); +- break; ++ ++ ++ /* ++ * In some cases the runt and fcs counters are incremented when collisions ++ * occur. We have to correct those counters here. ++ */ ++ Val64RxRunt = GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_RUNT, NetIndex); ++ Val64RxFcs = GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_FCS, NetIndex); ++ ++ if (Val64RxRunt > Val64RxFcs) { ++ Val64RxRunt -= Val64RxFcs; ++ Val64RxHwErrs += Val64RxRunt; ++ } ++ else { ++ Val64RxFcs -= Val64RxRunt; ++ Val64RxHwErrs += Val64RxFcs; ++ } ++ break; + + case OID_SKGE_TX_HW_ERROR_CTS: + case OID_SKGE_OUT_ERROR_CTS: +@@ -3685,9 +3546,7 @@ + } + } + +- /* +- * Retrieve value +- */ ++ /* Retrieve value. */ + switch (Id) { + + case OID_SKGE_SUPPORTED_LIST: +@@ -3697,11 +3556,11 @@ + *pLen = Len; + return (SK_PNMI_ERR_TOO_SHORT); + } +- for (Offset = 0, Index = 0; Offset < Len; +- Offset += sizeof(SK_U32), Index ++) { ++ for (Offset = 0, Index = 0; Offset < Len; Index ++) { + + Val32 = (SK_U32)IdTable[Index].Id; + SK_PNMI_STORE_U32(pBuf + Offset, Val32); ++ Offset += sizeof(SK_U32); + } + *pLen = Len; + break; +@@ -3727,8 +3586,7 @@ + case OID_SKGE_DRIVER_DESCR: + if (pAC->Pnmi.pDriverDescription == NULL) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR007, +- SK_PNMI_ERR007MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR007, SK_PNMI_ERR007MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3737,8 +3595,7 @@ + Len = SK_STRLEN(pAC->Pnmi.pDriverDescription) + 1; + if (Len > SK_PNMI_STRINGLEN1) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR029, +- SK_PNMI_ERR029MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR029, SK_PNMI_ERR029MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3757,8 +3614,7 @@ + case OID_SKGE_DRIVER_VERSION: + if (pAC->Pnmi.pDriverVersion == NULL) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030, +- SK_PNMI_ERR030MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030, SK_PNMI_ERR030MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3767,8 +3623,7 @@ + Len = SK_STRLEN(pAC->Pnmi.pDriverVersion) + 1; + if (Len > SK_PNMI_STRINGLEN1) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031, +- SK_PNMI_ERR031MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031, SK_PNMI_ERR031MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3787,8 +3642,7 @@ + case OID_SKGE_DRIVER_RELDATE: + if (pAC->Pnmi.pDriverReleaseDate == NULL) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030, +- SK_PNMI_ERR053MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR053, SK_PNMI_ERR053MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3797,8 +3651,7 @@ + Len = SK_STRLEN(pAC->Pnmi.pDriverReleaseDate) + 1; + if (Len > SK_PNMI_STRINGLEN1) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031, +- SK_PNMI_ERR054MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR054, SK_PNMI_ERR054MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3817,8 +3670,7 @@ + case OID_SKGE_DRIVER_FILENAME: + if (pAC->Pnmi.pDriverFileName == NULL) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR030, +- SK_PNMI_ERR055MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR055, SK_PNMI_ERR055MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3827,8 +3679,7 @@ + Len = SK_STRLEN(pAC->Pnmi.pDriverFileName) + 1; + if (Len > SK_PNMI_STRINGLEN1) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR031, +- SK_PNMI_ERR056MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR056, SK_PNMI_ERR056MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3854,8 +3705,7 @@ + Len = 256; + if (VpdRead(pAC, IoC, VPD_NAME, Buf, (int *)&Len) > 0) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR032, +- SK_PNMI_ERR032MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR032, SK_PNMI_ERR032MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3863,8 +3713,7 @@ + Len ++; + if (Len > SK_PNMI_STRINGLEN1) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR033, +- SK_PNMI_ERR033MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR033, SK_PNMI_ERR033MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -3880,7 +3729,6 @@ + break; + + case OID_SKGE_HW_VERSION: +- /* Oh, I love to do some string manipulation */ + if (*pLen < 5) { + + *pLen = 5; +@@ -3889,9 +3737,9 @@ + Val8 = (SK_U8)pAC->GIni.GIPciHwRev; + pBuf[0] = 4; + pBuf[1] = 'v'; +- pBuf[2] = (char)(0x30 | ((Val8 >> 4) & 0x0F)); ++ pBuf[2] = (char)('0' | ((Val8 >> 4) & 0x0f)); + pBuf[3] = '.'; +- pBuf[4] = (char)(0x30 | (Val8 & 0x0F)); ++ pBuf[4] = (char)('0' | (Val8 & 0x0f)); + *pLen = 5; + break; + +@@ -3914,12 +3762,12 @@ + break; + + case OID_SKGE_VAUXAVAIL: +- *pBuf = (char) pAC->GIni.GIVauxAvail; ++ *pBuf = (char)pAC->GIni.GIVauxAvail; + *pLen = sizeof(char); + break; + + case OID_SKGE_BUS_TYPE: +- *pBuf = (char) SK_PNMI_BUS_PCI; ++ *pBuf = (char)SK_PNMI_BUS_PCI; + *pLen = sizeof(char); + break; + +@@ -3968,31 +3816,31 @@ + break; + + case OID_SKGE_RLMT_MONITOR_NUMBER: +-/* XXX Not yet implemented by RLMT therefore we return zero elements */ ++ /* Not yet implemented by RLMT, therefore we return zero elements. */ + Val32 = 0; + SK_PNMI_STORE_U32(pBuf, Val32); + *pLen = sizeof(SK_U32); + break; + + case OID_SKGE_TX_SW_QUEUE_LEN: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].TxSwQueueLen; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].TxSwQueueLen + + pAC->Pnmi.BufPort[1].TxSwQueueLen; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].TxSwQueueLen; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].TxSwQueueLen + + pAC->Pnmi.Port[1].TxSwQueueLen; +@@ -4004,24 +3852,24 @@ + + + case OID_SKGE_TX_SW_QUEUE_MAX: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].TxSwQueueMax; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].TxSwQueueMax + + pAC->Pnmi.BufPort[1].TxSwQueueMax; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].TxSwQueueMax; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].TxSwQueueMax + + pAC->Pnmi.Port[1].TxSwQueueMax; +@@ -4032,24 +3880,24 @@ + break; + + case OID_SKGE_TX_RETRY: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].TxRetryCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].TxRetryCts + + pAC->Pnmi.BufPort[1].TxRetryCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].TxRetryCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].TxRetryCts + + pAC->Pnmi.Port[1].TxRetryCts; +@@ -4060,24 +3908,24 @@ + break; + + case OID_SKGE_RX_INTR_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].RxIntrCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].RxIntrCts + + pAC->Pnmi.BufPort[1].RxIntrCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].RxIntrCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].RxIntrCts + + pAC->Pnmi.Port[1].RxIntrCts; +@@ -4088,24 +3936,24 @@ + break; + + case OID_SKGE_TX_INTR_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].TxIntrCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].TxIntrCts + + pAC->Pnmi.BufPort[1].TxIntrCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].TxIntrCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].TxIntrCts + + pAC->Pnmi.Port[1].TxIntrCts; +@@ -4116,24 +3964,24 @@ + break; + + case OID_SKGE_RX_NO_BUF_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].RxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].RxNoBufCts + + pAC->Pnmi.BufPort[1].RxNoBufCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].RxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].RxNoBufCts + + pAC->Pnmi.Port[1].RxNoBufCts; +@@ -4144,24 +3992,24 @@ + break; + + case OID_SKGE_TX_NO_BUF_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].TxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].TxNoBufCts + + pAC->Pnmi.BufPort[1].TxNoBufCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].TxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].TxNoBufCts + + pAC->Pnmi.Port[1].TxNoBufCts; +@@ -4172,24 +4020,24 @@ + break; + + case OID_SKGE_TX_USED_DESCR_NO: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].TxUsedDescrNo; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].TxUsedDescrNo + + pAC->Pnmi.BufPort[1].TxUsedDescrNo; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].TxUsedDescrNo; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].TxUsedDescrNo + + pAC->Pnmi.Port[1].TxUsedDescrNo; +@@ -4200,24 +4048,24 @@ + break; + + case OID_SKGE_RX_DELIVERED_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].RxDeliveredCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].RxDeliveredCts + + pAC->Pnmi.BufPort[1].RxDeliveredCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].RxDeliveredCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].RxDeliveredCts + + pAC->Pnmi.Port[1].RxDeliveredCts; +@@ -4228,24 +4076,24 @@ + break; + + case OID_SKGE_RX_OCTETS_DELIV_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].RxOctetsDeliveredCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].RxOctetsDeliveredCts + + pAC->Pnmi.BufPort[1].RxOctetsDeliveredCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].RxOctetsDeliveredCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].RxOctetsDeliveredCts + + pAC->Pnmi.Port[1].RxOctetsDeliveredCts; +@@ -4266,13 +4114,13 @@ + break; + + case OID_SKGE_IN_ERRORS_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = Val64RxHwErrs + pAC->Pnmi.BufPort[NetIndex].RxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = Val64RxHwErrs + + pAC->Pnmi.BufPort[0].RxNoBufCts + +@@ -4280,11 +4128,11 @@ + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = Val64RxHwErrs + pAC->Pnmi.Port[NetIndex].RxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = Val64RxHwErrs + + pAC->Pnmi.Port[0].RxNoBufCts + +@@ -4296,13 +4144,13 @@ + break; + + case OID_SKGE_OUT_ERROR_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = Val64TxHwErrs + pAC->Pnmi.BufPort[NetIndex].TxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = Val64TxHwErrs + + pAC->Pnmi.BufPort[0].TxNoBufCts + +@@ -4310,11 +4158,11 @@ + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = Val64TxHwErrs + pAC->Pnmi.Port[NetIndex].TxNoBufCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = Val64TxHwErrs + + pAC->Pnmi.Port[0].TxNoBufCts + +@@ -4326,24 +4174,24 @@ + break; + + case OID_SKGE_ERR_RECOVERY_CTS: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.BufPort[NetIndex].ErrRecoveryCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.BufPort[0].ErrRecoveryCts + + pAC->Pnmi.BufPort[1].ErrRecoveryCts; + } + } + else { +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + Val64 = pAC->Pnmi.Port[NetIndex].ErrRecoveryCts; + } +- /* Single net mode */ ++ /* SingleNet mode. */ + else { + Val64 = pAC->Pnmi.Port[0].ErrRecoveryCts + + pAC->Pnmi.Port[1].ErrRecoveryCts; +@@ -4367,7 +4215,7 @@ + break; + + case OID_GEN_RCV_ERROR: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { + Val64 = Val64RxHwErrs + pAC->Pnmi.BufPort[NetIndex].RxNoBufCts; + } +@@ -4376,7 +4224,7 @@ + } + + /* +- * by default 32bit values are evaluated ++ * By default 32bit values are evaluated. + */ + if (!Is64BitReq) { + Val32 = (SK_U32)Val64; +@@ -4390,7 +4238,7 @@ + break; + + case OID_GEN_XMIT_ERROR: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { + Val64 = Val64TxHwErrs + pAC->Pnmi.BufPort[NetIndex].TxNoBufCts; + } +@@ -4399,7 +4247,7 @@ + } + + /* +- * by default 32bit values are evaluated ++ * By default 32bit values are evaluated. + */ + if (!Is64BitReq) { + Val32 = (SK_U32)Val64; +@@ -4413,16 +4261,19 @@ + break; + + case OID_GEN_RCV_NO_BUFFER: +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { +- Val64 = pAC->Pnmi.BufPort[NetIndex].RxNoBufCts; ++ Val64 = pAC->Pnmi.BufPort[NetIndex].RxNoBufCts + ++ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_OVERFLOW, NetIndex); ++ + } + else { +- Val64 = pAC->Pnmi.Port[NetIndex].RxNoBufCts; ++ Val64 = pAC->Pnmi.Port[NetIndex].RxNoBufCts + ++ GetStatVal(pAC, IoC, 0, SK_PNMI_HRX_OVERFLOW, NetIndex); + } + + /* +- * by default 32bit values are evaluated ++ * By default 32bit values are evaluated. + */ + if (!Is64BitReq) { + Val32 = (SK_U32)Val64; +@@ -4442,8 +4293,7 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR034, +- SK_PNMI_ERR034MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR034, SK_PNMI_ERR034MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -4500,25 +4350,17 @@ + SK_U32 Val32; + SK_U64 Val64; + +- +- /* +- * Check instance. Only single instance OIDs are allowed here. +- */ ++ /* Check instance. Only single instance OIDs are allowed here. */ + if (Instance != (SK_U32)(-1) && Instance != 1) { + + *pLen = 0; + return (SK_PNMI_ERR_UNKNOWN_INST); + } + +- /* +- * Perform the requested action. +- */ ++ /* Perform the requested action. */ + if (Action == SK_PNMI_GET) { + +- /* +- * Check if the buffer length is large enough. +- */ +- ++ /* Check if the buffer length is large enough. */ + switch (Id) { + + case OID_SKGE_RLMT_MODE: +@@ -4551,8 +4393,7 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR035, +- SK_PNMI_ERR035MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR035, SK_PNMI_ERR035MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -4571,9 +4412,7 @@ + } + pAC->Pnmi.RlmtUpdatedFlag ++; + +- /* +- * Retrieve Value +- */ ++ /* Retrieve value. */ + switch (Id) { + + case OID_SKGE_RLMT_MODE: +@@ -4651,17 +4490,17 @@ + pAC->Pnmi.RlmtUpdatedFlag --; + } + else { +- /* Perform a preset or set */ ++ /* Perform a PRESET or SET. */ + switch (Id) { + + case OID_SKGE_RLMT_MODE: +- /* Check if the buffer length is plausible */ ++ /* Check if the buffer length is plausible. */ + if (*pLen < sizeof(char)) { + + *pLen = sizeof(char); + return (SK_PNMI_ERR_TOO_SHORT); + } +- /* Check if the value range is correct */ ++ /* Check if the value range is correct. */ + if (*pLen != sizeof(char) || + (*pBuf & SK_PNMI_RLMT_MODE_CHK_LINK) == 0 || + *(SK_U8 *)pBuf > 15) { +@@ -4669,21 +4508,21 @@ + *pLen = 0; + return (SK_PNMI_ERR_BAD_VALUE); + } +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + *pLen = 0; + return (SK_PNMI_ERR_OK); + } +- /* Send an event to RLMT to change the mode */ ++ /* Send an event to RLMT to change the mode. */ + SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam)); ++ + EventParam.Para32[0] |= (SK_U32)(*pBuf); + EventParam.Para32[1] = 0; + if (SkRlmtEvent(pAC, IoC, SK_RLMT_MODE_CHANGE, + EventParam) > 0) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR037, +- SK_PNMI_ERR037MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR037, SK_PNMI_ERR037MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -4691,20 +4530,25 @@ + break; + + case OID_SKGE_RLMT_PORT_PREFERRED: +- /* Check if the buffer length is plausible */ ++ /* PRESET/SET action makes no sense in Dual Net mode. */ ++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { ++ break; ++ } ++ ++ /* Check if the buffer length is plausible. */ + if (*pLen < sizeof(char)) { + + *pLen = sizeof(char); + return (SK_PNMI_ERR_TOO_SHORT); + } +- /* Check if the value range is correct */ ++ /* Check if the value range is correct. */ + if (*pLen != sizeof(char) || *(SK_U8 *)pBuf > + (SK_U8)pAC->GIni.GIMacsFound) { + + *pLen = 0; + return (SK_PNMI_ERR_BAD_VALUE); + } +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + *pLen = 0; +@@ -4717,13 +4561,13 @@ + * make the decision which is the preferred port. + */ + SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam)); ++ + EventParam.Para32[0] = (SK_U32)(*pBuf) - 1; + EventParam.Para32[1] = NetIndex; + if (SkRlmtEvent(pAC, IoC, SK_RLMT_PREFPORT_CHANGE, + EventParam) > 0) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR038, +- SK_PNMI_ERR038MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR038, SK_PNMI_ERR038MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -4731,22 +4575,20 @@ + break; + + case OID_SKGE_RLMT_CHANGE_THRES: +- /* Check if the buffer length is plausible */ ++ /* Check if the buffer length is plausible. */ + if (*pLen < sizeof(SK_U64)) { + + *pLen = sizeof(SK_U64); + return (SK_PNMI_ERR_TOO_SHORT); + } +- /* +- * There are not many restrictions to the +- * value range. +- */ ++ ++ /* There are not many restrictions to the value range. */ + if (*pLen != sizeof(SK_U64)) { + + *pLen = 0; + return (SK_PNMI_ERR_BAD_VALUE); + } +- /* A preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + *pLen = 0; +@@ -4761,7 +4603,7 @@ + break; + + default: +- /* The other OIDs are not be able for set */ ++ /* The other OIDs are not be able for set. */ + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } +@@ -4806,54 +4648,49 @@ + SK_U32 Val32; + SK_U64 Val64; + +- /* +- * Calculate the port indexes from the instance. +- */ ++ ++ /* Calculate the port indexes from the instance. */ + PhysPortMax = pAC->GIni.GIMacsFound; + + if ((Instance != (SK_U32)(-1))) { +- /* Check instance range */ ++ /* Check instance range. */ + if ((Instance < 1) || (Instance > PhysPortMax)) { + + *pLen = 0; + return (SK_PNMI_ERR_UNKNOWN_INST); + } + +- /* Single net mode */ ++ /* SingleNet mode. */ + PhysPortIndex = Instance - 1; + +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + PhysPortIndex = NetIndex; + } + +- /* Both net modes */ ++ /* Both net modes. */ + Limit = PhysPortIndex + 1; + } + else { +- /* Single net mode */ ++ /* SingleNet mode. */ + PhysPortIndex = 0; + Limit = PhysPortMax; + +- /* Dual net mode */ ++ /* DualNet mode. */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + PhysPortIndex = NetIndex; + Limit = PhysPortIndex + 1; + } + } + +- /* +- * Currently only get requests are allowed. +- */ ++ /* Currently only GET requests are allowed. */ + if (Action != SK_PNMI_GET) { + + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* +- * Check if the buffer length is large enough. +- */ ++ /* Check if the buffer length is large enough. */ + switch (Id) { + + case OID_SKGE_RLMT_PORT_INDEX: +@@ -4877,8 +4714,7 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR039, +- SK_PNMI_ERR039MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR039, SK_PNMI_ERR039MSG); + + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -4896,9 +4732,7 @@ + } + pAC->Pnmi.RlmtUpdatedFlag ++; + +- /* +- * Get value +- */ ++ /* Get value. */ + Offset = 0; + for (; PhysPortIndex < Limit; PhysPortIndex ++) { + +@@ -5011,19 +4845,21 @@ + int Ret; + SK_EVPARA EventParam; + SK_U32 Val32; ++#ifdef SK_PHY_LP_MODE ++ SK_U8 CurrentPhyPowerState; ++#endif /* SK_PHY_LP_MODE */ + +- /* +- * Calculate instance if wished. MAC index 0 is the virtual MAC. +- */ ++ ++ /* Calculate instance if wished. MAC index 0 is the virtual MAC. */ + PhysPortMax = pAC->GIni.GIMacsFound; + LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax); + +- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */ ++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */ + LogPortMax--; + } + +- if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried */ +- /* Check instance range */ ++ if ((Instance != (SK_U32)(-1))) { /* Only one specific instance is queried. */ ++ /* Check instance range. */ + if ((Instance < 1) || (Instance > LogPortMax)) { + + *pLen = 0; +@@ -5033,18 +4869,16 @@ + Limit = LogPortIndex + 1; + } + +- else { /* Instance == (SK_U32)(-1), get all Instances of that OID */ ++ else { /* Instance == (SK_U32)(-1), get all Instances of that OID. */ + + LogPortIndex = 0; + Limit = LogPortMax; + } + +- /* +- * Perform action +- */ ++ /* Perform action. */ + if (Action == SK_PNMI_GET) { + +- /* Check length */ ++ /* Check length. */ + switch (Id) { + + case OID_SKGE_PMD: +@@ -5082,8 +4916,7 @@ + break; + + default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR041, +- SK_PNMI_ERR041MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR041, SK_PNMI_ERR041MSG); + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + } +@@ -5099,9 +4932,7 @@ + } + pAC->Pnmi.SirqUpdatedFlag ++; + +- /* +- * Get value +- */ ++ /* Get value. */ + Offset = 0; + for (; LogPortIndex < Limit; LogPortIndex ++) { + +@@ -5111,107 +4942,99 @@ + + case OID_SKGE_PMD: + *pBufPtr = pAC->Pnmi.PMD; +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_CONNECTOR: + *pBufPtr = pAC->Pnmi.Connector; +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_PHY_TYPE: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { + continue; + } +- else { +- /* Get value for physical ports */ +- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( +- pAC, LogPortIndex); +- Val32 = pAC->GIni.GP[PhysPortIndex].PhyType; +- SK_PNMI_STORE_U32(pBufPtr, Val32); +- } ++ /* Get value for physical port. */ ++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); ++ Val32 = pAC->GIni.GP[PhysPortIndex].PhyType; + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + Val32 = pAC->GIni.GP[NetIndex].PhyType; +- SK_PNMI_STORE_U32(pBufPtr, Val32); + } ++ SK_PNMI_STORE_U32(pBufPtr, Val32); + Offset += sizeof(SK_U32); + break; + + #ifdef SK_PHY_LP_MODE + case OID_SKGE_PHY_LP_MODE: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { + continue; + } +- else { +- /* Get value for physical ports */ +- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); +- Val8 = (SK_U8) pAC->GIni.GP[PhysPortIndex].PPhyPowerState; +- *pBufPtr = Val8; +- } ++ /* Get value for physical port. */ ++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); ++ *pBufPtr = (SK_U8)pAC->GIni.GP[PhysPortIndex].PPhyPowerState; + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + +- Val8 = (SK_U8) pAC->GIni.GP[PhysPortIndex].PPhyPowerState; +- *pBufPtr = Val8; ++ *pBufPtr = (SK_U8)pAC->GIni.GP[NetIndex].PPhyPowerState; + } + Offset += sizeof(SK_U8); + break; + #endif + + case OID_SKGE_LINK_CAP: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical ports */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkCap; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PLinkCap; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_LINK_MODE: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical ports */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkModeConf; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PLinkModeConf; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_LINK_MODE_STATUS: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical port */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + +@@ -5219,147 +5042,147 @@ + CalculateLinkModeStatus(pAC, IoC, PhysPortIndex); + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = CalculateLinkModeStatus(pAC, IoC, NetIndex); + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_LINK_STATUS: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical ports */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = CalculateLinkStatus(pAC, IoC, PhysPortIndex); + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = CalculateLinkStatus(pAC, IoC, NetIndex); + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_FLOWCTRL_CAP: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical ports */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PFlowCtrlCap; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PFlowCtrlCap; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_FLOWCTRL_MODE: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical port */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PFlowCtrlMode; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PFlowCtrlMode; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_FLOWCTRL_STATUS: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical port */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PFlowCtrlStatus; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PFlowCtrlStatus; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_PHY_OPERATION_CAP: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet Mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical ports */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PMSCap; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PMSCap; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_PHY_OPERATION_MODE: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical port */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PMSMode; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PMSMode; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_PHY_OPERATION_STATUS: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical port */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + +@@ -5370,70 +5193,70 @@ + + *pBufPtr = pAC->GIni.GP[NetIndex].PMSStatus; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_SPEED_CAP: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical ports */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkSpeedCap; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PLinkSpeedCap; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_SPEED_MODE: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical port */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkSpeed; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PLinkSpeed; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_SPEED_STATUS: +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { +- /* Get value for virtual port */ ++ /* Get value for virtual port. */ + VirtualConf(pAC, IoC, Id, pBufPtr); + } + else { +- /* Get value for physical port */ ++ /* Get value for physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS( + pAC, LogPortIndex); + + *pBufPtr = pAC->GIni.GP[PhysPortIndex].PLinkSpeedUsed; + } + } +- else { /* DualNetMode */ ++ else { /* DualNet mode. */ + + *pBufPtr = pAC->GIni.GP[NetIndex].PLinkSpeedUsed; + } +- Offset += sizeof(char); ++ Offset ++; + break; + + case OID_SKGE_MTU: +@@ -5486,40 +5309,33 @@ + return (SK_PNMI_ERR_TOO_SHORT); + } + break; +-#endif ++#endif /* SK_PHY_LP_MODE */ + + case OID_SKGE_MTU: +- if (*pLen < sizeof(SK_U32)) { ++ if (*pLen < (Limit - LogPortIndex) * sizeof(SK_U32)) { + +- *pLen = sizeof(SK_U32); ++ *pLen = (Limit - LogPortIndex) * sizeof(SK_U32); + return (SK_PNMI_ERR_TOO_SHORT); + } +- if (*pLen != sizeof(SK_U32)) { +- +- *pLen = 0; +- return (SK_PNMI_ERR_BAD_VALUE); +- } + break; +- ++ + default: + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* +- * Perform preset or set +- */ ++ /* Perform PRESET or SET. */ + Offset = 0; + for (; LogPortIndex < Limit; LogPortIndex ++) { + ++ Val8 = *(pBuf + Offset); ++ + switch (Id) { + + case OID_SKGE_LINK_MODE: +- /* Check the value range */ +- Val8 = *(pBuf + Offset); ++ /* Check the value range. */ + if (Val8 == 0) { +- +- Offset += sizeof(char); ++ Offset++; + break; + } + if (Val8 < SK_LMODE_HALF || +@@ -5530,51 +5346,68 @@ + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + return (SK_PNMI_ERR_OK); + } + +- if (LogPortIndex == 0) { +- +- /* +- * The virtual port consists of all currently +- * active ports. Find them and send an event +- * with the new link mode to SIRQ. +- */ +- for (PhysPortIndex = 0; +- PhysPortIndex < PhysPortMax; +- PhysPortIndex ++) { +- +- if (!pAC->Pnmi.Port[PhysPortIndex]. +- ActiveFlag) { +- +- continue; +- } ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ ++ if (LogPortIndex == 0) { ++ /* ++ * The virtual port consists of all currently ++ * active ports. Find them and send an event ++ * with the new link mode to SIRQ. ++ */ ++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; ++ PhysPortIndex ++) { + +- EventParam.Para32[0] = PhysPortIndex; ++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { ++ continue; ++ } ++ ++ EventParam.Para32[0] = PhysPortIndex; ++ EventParam.Para32[1] = (SK_U32)Val8; ++ if (SkGeSirqEvent(pAC, IoC, ++ SK_HWEV_SET_LMODE, ++ EventParam) > 0) { ++ ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, ++ SK_PNMI_ERR043, ++ SK_PNMI_ERR043MSG); ++ ++ *pLen = 0; ++ return (SK_PNMI_ERR_GENERAL); ++ } ++ } /* for */ ++ } ++ else { ++ /* ++ * Send an event with the new link mode to ++ * the SIRQ module. ++ */ ++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( ++ pAC, LogPortIndex); + EventParam.Para32[1] = (SK_U32)Val8; +- if (SkGeSirqEvent(pAC, IoC, +- SK_HWEV_SET_LMODE, ++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_LMODE, + EventParam) > 0) { +- ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW, + SK_PNMI_ERR043, + SK_PNMI_ERR043MSG); +- ++ + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + } + } + } +- else { ++ else { /* DualNet mode. */ ++ + /* + * Send an event with the new link mode to + * the SIRQ module. + */ +- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( +- pAC, LogPortIndex); ++ EventParam.Para32[0] = NetIndex; + EventParam.Para32[1] = (SK_U32)Val8; + if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_LMODE, + EventParam) > 0) { +@@ -5587,15 +5420,13 @@ + return (SK_PNMI_ERR_GENERAL); + } + } +- Offset += sizeof(char); ++ Offset++; + break; + + case OID_SKGE_FLOWCTRL_MODE: +- /* Check the value range */ +- Val8 = *(pBuf + Offset); ++ /* Check the value range. */ + if (Val8 == 0) { +- +- Offset += sizeof(char); ++ Offset++; + break; + } + if (Val8 < SK_FLOW_MODE_NONE || +@@ -5606,30 +5437,48 @@ + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + return (SK_PNMI_ERR_OK); + } + +- if (LogPortIndex == 0) { ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ ++ if (LogPortIndex == 0) { ++ /* ++ * The virtual port consists of all currently ++ * active ports. Find them and send an event ++ * with the new flow control mode to SIRQ. ++ */ ++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; ++ PhysPortIndex ++) { + +- /* +- * The virtual port consists of all currently +- * active ports. Find them and send an event +- * with the new flow control mode to SIRQ. +- */ +- for (PhysPortIndex = 0; +- PhysPortIndex < PhysPortMax; +- PhysPortIndex ++) { ++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { ++ continue; ++ } + +- if (!pAC->Pnmi.Port[PhysPortIndex]. +- ActiveFlag) { ++ EventParam.Para32[0] = PhysPortIndex; ++ EventParam.Para32[1] = (SK_U32)Val8; ++ if (SkGeSirqEvent(pAC, IoC, ++ SK_HWEV_SET_FLOWMODE, ++ EventParam) > 0) { ++ ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, ++ SK_PNMI_ERR044, ++ SK_PNMI_ERR044MSG); + +- continue; ++ *pLen = 0; ++ return (SK_PNMI_ERR_GENERAL); ++ } + } +- +- EventParam.Para32[0] = PhysPortIndex; ++ } ++ else { ++ /* ++ * Send an event with the new flow control ++ * mode to the SIRQ module. ++ */ ++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( ++ pAC, LogPortIndex); + EventParam.Para32[1] = (SK_U32)Val8; + if (SkGeSirqEvent(pAC, IoC, + SK_HWEV_SET_FLOWMODE, +@@ -5644,17 +5493,16 @@ + } + } + } +- else { ++ else { /* DualNet mode. */ ++ + /* +- * Send an event with the new flow control +- * mode to the SIRQ module. ++ * Send an event with the new link mode to ++ * the SIRQ module. + */ +- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( +- pAC, LogPortIndex); ++ EventParam.Para32[0] = NetIndex; + EventParam.Para32[1] = (SK_U32)Val8; +- if (SkGeSirqEvent(pAC, IoC, +- SK_HWEV_SET_FLOWMODE, EventParam) +- > 0) { ++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_FLOWMODE, ++ EventParam) > 0) { + + SK_ERR_LOG(pAC, SK_ERRCL_SW, + SK_PNMI_ERR044, +@@ -5664,15 +5512,14 @@ + return (SK_PNMI_ERR_GENERAL); + } + } +- Offset += sizeof(char); ++ Offset++; + break; + + case OID_SKGE_PHY_OPERATION_MODE : +- /* Check the value range */ +- Val8 = *(pBuf + Offset); ++ /* Check the value range. */ + if (Val8 == 0) { +- /* mode of this port remains unchanged */ +- Offset += sizeof(char); ++ /* Mode of this port remains unchanged. */ ++ Offset++; + break; + } + if (Val8 < SK_MS_MODE_AUTO || +@@ -5683,34 +5530,51 @@ + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + return (SK_PNMI_ERR_OK); + } + +- if (LogPortIndex == 0) { ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ ++ if (LogPortIndex == 0) { ++ /* ++ * The virtual port consists of all currently ++ * active ports. Find them and send an event ++ * with new master/slave (role) mode to SIRQ. ++ */ ++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; ++ PhysPortIndex ++) { + +- /* +- * The virtual port consists of all currently +- * active ports. Find them and send an event +- * with new master/slave (role) mode to SIRQ. +- */ +- for (PhysPortIndex = 0; +- PhysPortIndex < PhysPortMax; +- PhysPortIndex ++) { ++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { ++ continue; ++ } + +- if (!pAC->Pnmi.Port[PhysPortIndex]. +- ActiveFlag) { ++ EventParam.Para32[0] = PhysPortIndex; ++ EventParam.Para32[1] = (SK_U32)Val8; ++ if (SkGeSirqEvent(pAC, IoC, ++ SK_HWEV_SET_ROLE, ++ EventParam) > 0) { ++ ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, ++ SK_PNMI_ERR042, ++ SK_PNMI_ERR042MSG); + +- continue; ++ *pLen = 0; ++ return (SK_PNMI_ERR_GENERAL); ++ } + } +- +- EventParam.Para32[0] = PhysPortIndex; ++ } ++ else { ++ /* ++ * Send an event with the new master/slave ++ * (role) mode to the SIRQ module. ++ */ ++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( ++ pAC, LogPortIndex); + EventParam.Para32[1] = (SK_U32)Val8; + if (SkGeSirqEvent(pAC, IoC, +- SK_HWEV_SET_ROLE, +- EventParam) > 0) { ++ SK_HWEV_SET_ROLE, EventParam) > 0) { + + SK_ERR_LOG(pAC, SK_ERRCL_SW, + SK_PNMI_ERR042, +@@ -5721,16 +5585,16 @@ + } + } + } +- else { ++ else { /* DualNet mode. */ ++ + /* +- * Send an event with the new master/slave +- * (role) mode to the SIRQ module. ++ * Send an event with the new link mode to ++ * the SIRQ module. + */ +- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( +- pAC, LogPortIndex); ++ EventParam.Para32[0] = NetIndex; + EventParam.Para32[1] = (SK_U32)Val8; +- if (SkGeSirqEvent(pAC, IoC, +- SK_HWEV_SET_ROLE, EventParam) > 0) { ++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_ROLE, ++ EventParam) > 0) { + + SK_ERR_LOG(pAC, SK_ERRCL_SW, + SK_PNMI_ERR042, +@@ -5740,16 +5604,13 @@ + return (SK_PNMI_ERR_GENERAL); + } + } +- +- Offset += sizeof(char); ++ Offset++; + break; + + case OID_SKGE_SPEED_MODE: +- /* Check the value range */ +- Val8 = *(pBuf + Offset); ++ /* Check the value range. */ + if (Val8 == 0) { +- +- Offset += sizeof(char); ++ Offset++; + break; + } + if (Val8 < (SK_LSPEED_AUTO) || +@@ -5760,29 +5621,49 @@ + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + return (SK_PNMI_ERR_OK); + } + +- if (LogPortIndex == 0) { ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ ++ if (LogPortIndex == 0) { + +- /* +- * The virtual port consists of all currently +- * active ports. Find them and send an event +- * with the new flow control mode to SIRQ. +- */ +- for (PhysPortIndex = 0; +- PhysPortIndex < PhysPortMax; +- PhysPortIndex ++) { ++ /* ++ * The virtual port consists of all currently ++ * active ports. Find them and send an event ++ * with the new flow control mode to SIRQ. ++ */ ++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; ++ PhysPortIndex ++) { + +- if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { ++ if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { ++ continue; ++ } + +- continue; +- } ++ EventParam.Para32[0] = PhysPortIndex; ++ EventParam.Para32[1] = (SK_U32)Val8; ++ if (SkGeSirqEvent(pAC, IoC, ++ SK_HWEV_SET_SPEED, ++ EventParam) > 0) { ++ ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, ++ SK_PNMI_ERR045, ++ SK_PNMI_ERR045MSG); + +- EventParam.Para32[0] = PhysPortIndex; ++ *pLen = 0; ++ return (SK_PNMI_ERR_GENERAL); ++ } ++ } ++ } ++ else { ++ /* ++ * Send an event with the new flow control ++ * mode to the SIRQ module. ++ */ ++ EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( ++ pAC, LogPortIndex); + EventParam.Para32[1] = (SK_U32)Val8; + if (SkGeSirqEvent(pAC, IoC, + SK_HWEV_SET_SPEED, +@@ -5797,16 +5678,15 @@ + } + } + } +- else { ++ else { /* DualNet mode. */ ++ + /* +- * Send an event with the new flow control +- * mode to the SIRQ module. ++ * Send an event with the new link mode to ++ * the SIRQ module. + */ +- EventParam.Para32[0] = SK_PNMI_PORT_LOG2PHYS( +- pAC, LogPortIndex); ++ EventParam.Para32[0] = NetIndex; + EventParam.Para32[1] = (SK_U32)Val8; +- if (SkGeSirqEvent(pAC, IoC, +- SK_HWEV_SET_SPEED, ++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_SET_SPEED, + EventParam) > 0) { + + SK_ERR_LOG(pAC, SK_ERRCL_SW, +@@ -5817,23 +5697,25 @@ + return (SK_PNMI_ERR_GENERAL); + } + } +- Offset += sizeof(char); ++ Offset++; + break; + +- case OID_SKGE_MTU : +- /* Check the value range */ +- Val32 = *(SK_U32*)(pBuf + Offset); ++ case OID_SKGE_MTU: ++ /* Check the value range. */ ++ SK_PNMI_READ_U32((pBuf + Offset), Val32); ++ + if (Val32 == 0) { +- /* mtu of this port remains unchanged */ ++ /* MTU of this port remains unchanged. */ + Offset += sizeof(SK_U32); + break; + } ++ + if (SK_DRIVER_PRESET_MTU(pAC, IoC, NetIndex, Val32) != 0) { + *pLen = 0; + return (SK_PNMI_ERR_BAD_VALUE); + } + +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + return (SK_PNMI_ERR_OK); + } +@@ -5844,116 +5726,69 @@ + + Offset += sizeof(SK_U32); + break; +- ++ + #ifdef SK_PHY_LP_MODE + case OID_SKGE_PHY_LP_MODE: +- /* The preset ends here */ ++ /* The PRESET ends here. */ + if (Action == SK_PNMI_PRESET) { + + return (SK_PNMI_ERR_OK); + } + +- if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ ++ if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNet mode. */ + if (LogPortIndex == 0) { + Offset = 0; + continue; + } +- else { +- /* Set value for physical ports */ +- PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); +- +- switch (*(pBuf + Offset)) { +- case 0: +- /* If LowPowerMode is active, we can leave it. */ +- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { +- +- Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex); +- +- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState < 3) { +- +- SkDrvInitAdapter(pAC); +- } +- break; +- } +- else { +- *pLen = 0; +- return (SK_PNMI_ERR_GENERAL); +- } +- case 1: +- case 2: +- case 3: +- case 4: +- /* If no LowPowerMode is active, we can enter it. */ +- if (!pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { +- +- if ((*(pBuf + Offset)) < 3) { +- +- SkDrvDeInitAdapter(pAC); +- } +- +- Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf); +- break; +- } +- else { +- *pLen = 0; +- return (SK_PNMI_ERR_GENERAL); +- } +- default: +- *pLen = 0; +- return (SK_PNMI_ERR_BAD_VALUE); +- } +- } + } +- else { /* DualNetMode */ +- +- switch (*(pBuf + Offset)) { +- case 0: +- /* If we are in a LowPowerMode, we can leave it. */ +- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { ++ /* Set value for physical port. */ ++ PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); ++ CurrentPhyPowerState = pAC->GIni.GP[PhysPortIndex].PPhyPowerState; + +- Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex); +- +- if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState < 3) { ++ switch (Val8) { ++ case PHY_PM_OPERATIONAL_MODE: ++ /* If LowPowerMode is active, we can leave it. */ ++ if (CurrentPhyPowerState) { + +- SkDrvInitAdapter(pAC); +- } +- break; +- } +- else { +- *pLen = 0; +- return (SK_PNMI_ERR_GENERAL); +- } +- +- case 1: +- case 2: +- case 3: +- case 4: +- /* If we are not already in LowPowerMode, we can enter it. */ +- if (!pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { +- +- if ((*(pBuf + Offset)) < 3) { +- +- SkDrvDeInitAdapter(pAC); +- } +- else { +- +- Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf); +- } +- break; +- } +- else { +- *pLen = 0; +- return (SK_PNMI_ERR_GENERAL); ++ Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex); ++ ++ if ((CurrentPhyPowerState == PHY_PM_DEEP_SLEEP) || ++ (CurrentPhyPowerState == PHY_PM_IEEE_POWER_DOWN)) { ++ ++ SkDrvInitAdapter(pAC); + } +- +- default: ++ break; ++ } ++ else { + *pLen = 0; +- return (SK_PNMI_ERR_BAD_VALUE); +- } ++ return (SK_PNMI_ERR_GENERAL); ++ } ++ case PHY_PM_DEEP_SLEEP: ++ case PHY_PM_IEEE_POWER_DOWN: ++ /* If no LowPowerMode is active, we can enter it. */ ++ if (!CurrentPhyPowerState) { ++ SkDrvDeInitAdapter(pAC); ++ } ++ ++ case PHY_PM_ENERGY_DETECT: ++ case PHY_PM_ENERGY_DETECT_PLUS: ++ /* If no LowPowerMode is active, we can enter it. */ ++ if (!CurrentPhyPowerState) { ++ ++ Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf); ++ break; ++ } ++ else { ++ *pLen = 0; ++ return (SK_PNMI_ERR_GENERAL); ++ } ++ default: ++ *pLen = 0; ++ return (SK_PNMI_ERR_BAD_VALUE); + } +- Offset += sizeof(SK_U8); ++ Offset++; + break; +-#endif ++#endif /* SK_PHY_LP_MODE */ + + default: + SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_ERR, +@@ -6003,14 +5838,11 @@ + unsigned int Limit; + unsigned int Offset; + unsigned int Entries; +- + +- /* +- * Calculate instance if wished. +- */ +- /* XXX Not yet implemented. Return always an empty table. */ ++ /* Not implemented yet. Return always an empty table. */ + Entries = 0; + ++ /* Calculate instance if wished. */ + if ((Instance != (SK_U32)(-1))) { + + if ((Instance < 1) || (Instance > Entries)) { +@@ -6027,12 +5859,10 @@ + Limit = Entries; + } + +- /* +- * Get/Set value +- */ ++ /* GET/SET value. */ + if (Action == SK_PNMI_GET) { + +- for (Offset=0; Index < Limit; Index ++) { ++ for (Offset = 0; Index < Limit; Index ++) { + + switch (Id) { + +@@ -6054,32 +5884,29 @@ + *pLen = Offset; + } + else { +- /* Only MONITOR_ADMIN can be set */ ++ /* Only MONITOR_ADMIN can be set. */ + if (Id != OID_SKGE_RLMT_MONITOR_ADMIN) { + + *pLen = 0; + return (SK_PNMI_ERR_READ_ONLY); + } + +- /* Check if the length is plausible */ ++ /* Check if the length is plausible. */ + if (*pLen < (Limit - Index)) { + + return (SK_PNMI_ERR_TOO_SHORT); + } +- /* Okay, we have a wide value range */ ++ /* Okay, we have a wide value range. */ + if (*pLen != (Limit - Index)) { + + *pLen = 0; + return (SK_PNMI_ERR_BAD_VALUE); + } +-/* +- for (Offset=0; Index < Limit; Index ++) { +- } +-*/ +-/* +- * XXX Not yet implemented. Return always BAD_VALUE, because the table +- * is empty. +- */ ++ ++ /* ++ * Not yet implemented. Return always BAD_VALUE, ++ * because the table is empty. ++ */ + *pLen = 0; + return (SK_PNMI_ERR_BAD_VALUE); + } +@@ -6120,14 +5947,12 @@ + PortActiveFlag = SK_FALSE; + PhysPortMax = pAC->GIni.GIMacsFound; + +- for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; +- PhysPortIndex ++) { ++ for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; PhysPortIndex ++) { + + pPrt = &pAC->GIni.GP[PhysPortIndex]; + +- /* Check if the physical port is active */ ++ /* Check if the physical port is active. */ + if (!pAC->Pnmi.Port[PhysPortIndex].ActiveFlag) { +- + continue; + } + +@@ -6136,12 +5961,13 @@ + switch (Id) { + + case OID_SKGE_PHY_TYPE: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + Val32 = pPrt->PhyType; + SK_PNMI_STORE_U32(pBuf, Val32); + continue; + } ++ break; + + case OID_SKGE_LINK_CAP: + +@@ -6155,7 +5981,7 @@ + break; + + case OID_SKGE_LINK_MODE: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PLinkModeConf; +@@ -6163,9 +5989,8 @@ + } + + /* +- * If we find an active port with a different link +- * mode than the first one we return a value that +- * indicates that the link mode is indeterminated. ++ * If we find an active port with a different link mode ++ * than the first one we return indeterminated. + */ + if (*pBuf != pPrt->PLinkModeConf) { + +@@ -6174,10 +5999,10 @@ + break; + + case OID_SKGE_LINK_MODE_STATUS: +- /* Get the link mode of the physical port */ ++ /* Get the link mode of the physical port. */ + Val8 = CalculateLinkModeStatus(pAC, IoC, PhysPortIndex); + +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = Val8; +@@ -6185,10 +6010,8 @@ + } + + /* +- * If we find an active port with a different link +- * mode status than the first one we return a value +- * that indicates that the link mode status is +- * indeterminated. ++ * If we find an active port with a different link mode status ++ * than the first one we return indeterminated. + */ + if (*pBuf != Val8) { + +@@ -6197,10 +6020,10 @@ + break; + + case OID_SKGE_LINK_STATUS: +- /* Get the link status of the physical port */ ++ /* Get the link status of the physical port. */ + Val8 = CalculateLinkStatus(pAC, IoC, PhysPortIndex); + +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = Val8; +@@ -6208,10 +6031,8 @@ + } + + /* +- * If we find an active port with a different link +- * status than the first one, we return a value +- * that indicates that the link status is +- * indeterminated. ++ * If we find an active port with a different link status ++ * than the first one we return indeterminated. + */ + if (*pBuf != Val8) { + +@@ -6220,7 +6041,7 @@ + break; + + case OID_SKGE_FLOWCTRL_CAP: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PFlowCtrlCap; +@@ -6235,7 +6056,7 @@ + break; + + case OID_SKGE_FLOWCTRL_MODE: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PFlowCtrlMode; +@@ -6243,9 +6064,8 @@ + } + + /* +- * If we find an active port with a different flow +- * control mode than the first one, we return a value +- * that indicates that the mode is indeterminated. ++ * If we find an active port with a different flow-control mode ++ * than the first one we return indeterminated. + */ + if (*pBuf != pPrt->PFlowCtrlMode) { + +@@ -6254,7 +6074,7 @@ + break; + + case OID_SKGE_FLOWCTRL_STATUS: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PFlowCtrlStatus; +@@ -6262,10 +6082,8 @@ + } + + /* +- * If we find an active port with a different flow +- * control status than the first one, we return a +- * value that indicates that the status is +- * indeterminated. ++ * If we find an active port with a different flow-control status ++ * than the first one we return indeterminated. + */ + if (*pBuf != pPrt->PFlowCtrlStatus) { + +@@ -6274,7 +6092,7 @@ + break; + + case OID_SKGE_PHY_OPERATION_CAP: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PMSCap; +@@ -6289,7 +6107,7 @@ + break; + + case OID_SKGE_PHY_OPERATION_MODE: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PMSMode; +@@ -6297,9 +6115,8 @@ + } + + /* +- * If we find an active port with a different master/ +- * slave mode than the first one, we return a value +- * that indicates that the mode is indeterminated. ++ * If we find an active port with a different master/slave mode ++ * than the first one we return indeterminated. + */ + if (*pBuf != pPrt->PMSMode) { + +@@ -6308,7 +6125,7 @@ + break; + + case OID_SKGE_PHY_OPERATION_STATUS: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PMSStatus; +@@ -6316,10 +6133,8 @@ + } + + /* +- * If we find an active port with a different master/ +- * slave status than the first one, we return a +- * value that indicates that the status is +- * indeterminated. ++ * If we find an active port with a different master/slave status ++ * than the first one we return indeterminated. + */ + if (*pBuf != pPrt->PMSStatus) { + +@@ -6328,7 +6143,7 @@ + break; + + case OID_SKGE_SPEED_MODE: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PLinkSpeed; +@@ -6336,9 +6151,8 @@ + } + + /* +- * If we find an active port with a different flow +- * control mode than the first one, we return a value +- * that indicates that the mode is indeterminated. ++ * If we find an active port with a different link speed ++ * than the first one we return indeterminated. + */ + if (*pBuf != pPrt->PLinkSpeed) { + +@@ -6347,7 +6161,7 @@ + break; + + case OID_SKGE_SPEED_STATUS: +- /* Check if it is the first active port */ ++ /* Check if it is the first active port. */ + if (*pBuf == 0) { + + *pBuf = pPrt->PLinkSpeedUsed; +@@ -6355,10 +6169,8 @@ + } + + /* +- * If we find an active port with a different flow +- * control status than the first one, we return a +- * value that indicates that the status is +- * indeterminated. ++ * If we find an active port with a different link speed used ++ * than the first one we return indeterminated. + */ + if (*pBuf != pPrt->PLinkSpeedUsed) { + +@@ -6368,9 +6180,7 @@ + } + } + +- /* +- * If no port is active return an indeterminated answer +- */ ++ /* If no port is active return an indeterminated answer. */ + if (!PortActiveFlag) { + + switch (Id) { +@@ -6487,16 +6297,15 @@ + { + SK_U8 Result; + +- /* Get the current mode, which can be full or half duplex */ ++ /* Get the current mode, which can be full or half duplex. */ + Result = pAC->GIni.GP[PhysPortIndex].PLinkModeStatus; + +- /* Check if no valid mode could be found (link is down) */ ++ /* Check if no valid mode could be found (link is down). */ + if (Result < SK_LMODE_STAT_HALF) { + + Result = SK_LMODE_STAT_UNKNOWN; + } + else if (pAC->GIni.GP[PhysPortIndex].PLinkMode >= SK_LMODE_AUTOHALF) { +- + /* + * Auto-negotiation was used to bring up the link. Change + * the already found duplex status that it indicates +@@ -6541,22 +6350,19 @@ + int Index; + int Ret; + +- + SK_MEMSET(pKeyArr, 0, KeyArrLen); + +- /* +- * Get VPD key list +- */ +- Ret = VpdKeys(pAC, IoC, (char *)&BufKeys, (int *)&BufKeysLen, ++ /* Get VPD key list. */ ++ Ret = VpdKeys(pAC, IoC, BufKeys, (int *)&BufKeysLen, + (int *)pKeyNo); ++ + if (Ret > 0) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR014, +- SK_PNMI_ERR014MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR014, SK_PNMI_ERR014MSG); + + return (SK_PNMI_ERR_GENERAL); + } +- /* If no keys are available return now */ ++ /* If no keys are available return now. */ + if (*pKeyNo == 0 || BufKeysLen == 0) { + + return (SK_PNMI_ERR_OK); +@@ -6564,12 +6370,11 @@ + /* + * If the key list is too long for us trunc it and give a + * errorlog notification. This case should not happen because +- * the maximum number of keys is limited due to RAM limitations ++ * the maximum number of keys is limited due to RAM limitations. + */ + if (*pKeyNo > SK_PNMI_VPD_ENTRIES) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR015, +- SK_PNMI_ERR015MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR015, SK_PNMI_ERR015MSG); + + *pKeyNo = SK_PNMI_VPD_ENTRIES; + } +@@ -6582,14 +6387,12 @@ + Offset ++) { + + if (BufKeys[Offset] != 0) { +- + continue; + } + + if (Offset - StartOffset > SK_PNMI_VPD_KEY_SIZE) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR016, +- SK_PNMI_ERR016MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR016, SK_PNMI_ERR016MSG); + return (SK_PNMI_ERR_GENERAL); + } + +@@ -6600,7 +6403,7 @@ + StartOffset = Offset + 1; + } + +- /* Last key not zero terminated? Get it anyway */ ++ /* Last key not zero terminated? Get it anyway. */ + if (StartOffset < Offset) { + + SK_STRNCPY(pKeyArr + Index * SK_PNMI_VPD_KEY_SIZE, +@@ -6629,19 +6432,18 @@ + { + SK_EVPARA EventParam; + +- + /* Was the module already updated during the current PNMI call? */ + if (pAC->Pnmi.SirqUpdatedFlag > 0) { + + return (SK_PNMI_ERR_OK); + } + +- /* Send an synchronuous update event to the module */ ++ /* Send an synchronuous update event to the module. */ + SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam)); +- if (SkGeSirqEvent(pAC, IoC, SK_HWEV_UPDATE_STAT, EventParam) > 0) { ++ ++ if (SkGeSirqEvent(pAC, IoC, SK_HWEV_UPDATE_STAT, EventParam)) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR047, +- SK_PNMI_ERR047MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR047, SK_PNMI_ERR047MSG); + + return (SK_PNMI_ERR_GENERAL); + } +@@ -6669,21 +6471,19 @@ + { + SK_EVPARA EventParam; + +- + /* Was the module already updated during the current PNMI call? */ + if (pAC->Pnmi.RlmtUpdatedFlag > 0) { + + return (SK_PNMI_ERR_OK); + } + +- /* Send an synchronuous update event to the module */ ++ /* Send an synchronuous update event to the module. */ + SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam)); + EventParam.Para32[0] = NetIndex; + EventParam.Para32[1] = (SK_U32)-1; + if (SkRlmtEvent(pAC, IoC, SK_RLMT_STATS_UPDATE, EventParam) > 0) { + +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR048, +- SK_PNMI_ERR048MSG); ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SK_PNMI_ERR048, SK_PNMI_ERR048MSG); + + return (SK_PNMI_ERR_GENERAL); + } +@@ -6721,20 +6521,20 @@ + return (SK_PNMI_ERR_OK); + } + +- /* Send an update command to all MACs specified */ ++ /* Send an update command to all MACs specified. */ + for (MacIndex = FirstMac; MacIndex <= LastMac; MacIndex ++) { + + /* + * 2002-09-13 pweber: Freeze the current SW counters. + * (That should be done as close as + * possible to the update of the +- * HW counters) ++ * HW counters). + */ + if (pAC->GIni.GIMacType == SK_MAC_XMAC) { + pAC->Pnmi.BufPort[MacIndex] = pAC->Pnmi.Port[MacIndex]; + } + +- /* 2002-09-13 pweber: Update the HW counter */ ++ /* 2002-09-13 pweber: Update the HW counter. */ + if (pAC->GIni.GIFunc.pFnMacUpdateStats(pAC, IoC, MacIndex) != 0) { + + return (SK_PNMI_ERR_GENERAL); +@@ -6772,19 +6572,19 @@ + SK_U64 Val = 0; + + +- if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* Dual net mode */ ++ if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { /* DualNet mode. */ + + PhysPortIndex = NetIndex; + + Val = GetPhysStatVal(pAC, IoC, PhysPortIndex, StatIndex); + } +- else { /* Single Net mode */ ++ else { /* SingleNet mode. */ + + if (LogPortIndex == 0) { + + PhysPortMax = pAC->GIni.GIMacsFound; + +- /* Add counter of all active ports */ ++ /* Add counter of all active ports. */ + for (PhysPortIndex = 0; PhysPortIndex < PhysPortMax; + PhysPortIndex ++) { + +@@ -6794,11 +6594,11 @@ + } + } + +- /* Correct value because of port switches */ ++ /* Correct value because of port switches. */ + Val += pAC->Pnmi.VirtualCounterOffset[StatIndex]; + } + else { +- /* Get counter value of physical port */ ++ /* Get counter value of physical port. */ + PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); + + Val = GetPhysStatVal(pAC, IoC, PhysPortIndex, StatIndex); +@@ -6844,7 +6644,7 @@ + + MacType = pAC->GIni.GIMacType; + +- /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort) */ ++ /* 2002-09-17 pweber: For XMAC, use the frozen SW counters (BufPort). */ + if (MacType == SK_MAC_XMAC) { + pPnmiPrt = &pAC->Pnmi.BufPort[PhysPortIndex]; + } +@@ -6912,7 +6712,7 @@ + case SK_PNMI_HTX_BURST: + case SK_PNMI_HTX_EXCESS_DEF: + case SK_PNMI_HTX_CARRIER: +- /* Not supported by GMAC */ ++ /* Not supported by GMAC. */ + if (MacType == SK_MAC_GMAC) { + return (Val); + } +@@ -6924,7 +6724,7 @@ + break; + + case SK_PNMI_HTX_MACC: +- /* GMAC only supports PAUSE MAC control frames */ ++ /* GMAC only supports PAUSE MAC control frames. */ + if (MacType == SK_MAC_GMAC) { + HelpIndex = SK_PNMI_HTX_PMACC; + } +@@ -6941,7 +6741,7 @@ + + case SK_PNMI_HTX_COL: + case SK_PNMI_HRX_UNDERSIZE: +- /* Not supported by XMAC */ ++ /* Not supported by XMAC. */ + if (MacType == SK_MAC_XMAC) { + return (Val); + } +@@ -6953,7 +6753,7 @@ + break; + + case SK_PNMI_HTX_DEFFERAL: +- /* Not supported by GMAC */ ++ /* Not supported by GMAC. */ + if (MacType == SK_MAC_GMAC) { + return (Val); + } +@@ -6971,7 +6771,7 @@ + HighVal = 0; + } + else { +- /* Otherwise get contents of hardware register */ ++ /* Otherwise get contents of hardware register. */ + (void)pFnMac->pFnMacStatistic(pAC, IoC, PhysPortIndex, + StatAddr[StatIndex][MacType].Reg, + &LowVal); +@@ -6980,7 +6780,7 @@ + break; + + case SK_PNMI_HRX_BADOCTET: +- /* Not supported by XMAC */ ++ /* Not supported by XMAC. */ + if (MacType == SK_MAC_XMAC) { + return (Val); + } +@@ -6999,7 +6799,7 @@ + return (Val); + + case SK_PNMI_HRX_LONGFRAMES: +- /* For XMAC the SW counter is managed by PNMI */ ++ /* For XMAC the SW counter is managed by PNMI. */ + if (MacType == SK_MAC_XMAC) { + return (pPnmiPrt->StatRxLongFrameCts); + } +@@ -7019,7 +6819,7 @@ + Val = (((SK_U64)HighVal << 32) | (SK_U64)LowVal); + + if (MacType == SK_MAC_GMAC) { +- /* For GMAC the SW counter is additionally managed by PNMI */ ++ /* For GMAC the SW counter is additionally managed by PNMI. */ + Val += pPnmiPrt->StatRxFrameTooLongCts; + } + else { +@@ -7037,20 +6837,19 @@ + break; + + case SK_PNMI_HRX_SHORTS: +- /* Not supported by GMAC */ ++ /* Not supported by GMAC. */ + if (MacType == SK_MAC_GMAC) { + /* GM_RXE_FRAG?? */ + return (Val); + } + + /* +- * XMAC counts short frame errors even if link down (#10620) +- * +- * If link-down the counter remains constant ++ * XMAC counts short frame errors even if link down (#10620). ++ * If the link is down, the counter remains constant. + */ + if (pPrt->PLinkModeStatus != SK_LMODE_STAT_UNKNOWN) { + +- /* Otherwise get incremental difference */ ++ /* Otherwise get incremental difference. */ + (void)pFnMac->pFnMacStatistic(pAC, IoC, PhysPortIndex, + StatAddr[StatIndex][MacType].Reg, + &LowVal); +@@ -7073,7 +6872,7 @@ + case SK_PNMI_HRX_IRLENGTH: + case SK_PNMI_HRX_SYMBOL: + case SK_PNMI_HRX_CEXT: +- /* Not supported by GMAC */ ++ /* Not supported by GMAC. */ + if (MacType == SK_MAC_GMAC) { + return (Val); + } +@@ -7085,7 +6884,7 @@ + break; + + case SK_PNMI_HRX_PMACC_ERR: +- /* For GMAC the SW counter is managed by PNMI */ ++ /* For GMAC the SW counter is managed by PNMI. */ + if (MacType == SK_MAC_GMAC) { + return (pPnmiPrt->StatRxPMaccErr); + } +@@ -7096,13 +6895,13 @@ + HighVal = pPnmiPrt->CounterHigh[StatIndex]; + break; + +- /* SW counter managed by PNMI */ ++ /* SW counter managed by PNMI. */ + case SK_PNMI_HTX_SYNC: + LowVal = (SK_U32)pPnmiPrt->StatSyncCts; + HighVal = (SK_U32)(pPnmiPrt->StatSyncCts >> 32); + break; + +- /* SW counter managed by PNMI */ ++ /* SW counter managed by PNMI. */ + case SK_PNMI_HTX_SYNC_OCTET: + LowVal = (SK_U32)pPnmiPrt->StatSyncOctetsCts; + HighVal = (SK_U32)(pPnmiPrt->StatSyncOctetsCts >> 32); +@@ -7110,17 +6909,19 @@ + + case SK_PNMI_HRX_FCS: + /* +- * Broadcom filters FCS errors and counts it in +- * Receive Error Counter register ++ * Broadcom filters FCS errors and counts them in ++ * Receive Error Counter register. + */ + if (pPrt->PhyType == SK_PHY_BCOM) { +- /* do not read while not initialized (PHY_READ hangs!)*/ ++#ifdef GENESIS ++ /* Do not read while not initialized (PHY_READ hangs!). */ + if (pPrt->PState != SK_PRT_RESET) { + SkXmPhyRead(pAC, IoC, PhysPortIndex, PHY_BCOM_RE_CTR, &Word); + + LowVal = Word; + } + HighVal = pPnmiPrt->CounterHigh[StatIndex]; ++#endif /* GENESIS */ + } + else { + (void)pFnMac->pFnMacStatistic(pAC, IoC, PhysPortIndex, +@@ -7140,7 +6941,7 @@ + + Val = (((SK_U64)HighVal << 32) | (SK_U64)LowVal); + +- /* Correct value because of possible XMAC reset. XMAC Errata #2 */ ++ /* Correct value because of possible XMAC reset (XMAC Errata #2). */ + Val += pPnmiPrt->CounterOffset[StatIndex]; + + return (Val); +@@ -7165,22 +6966,21 @@ + unsigned int PhysPortIndex; + SK_EVPARA EventParam; + +- + SK_MEMSET((char *)&EventParam, 0, sizeof(EventParam)); + +- /* Notify sensor module */ ++ /* Notify sensor module. */ + SkEventQueue(pAC, SKGE_I2C, SK_I2CEV_CLEAR, EventParam); + +- /* Notify RLMT module */ ++ /* Notify RLMT module. */ + EventParam.Para32[0] = NetIndex; + EventParam.Para32[1] = (SK_U32)-1; + SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_STATS_CLEAR, EventParam); + EventParam.Para32[1] = 0; + +- /* Notify SIRQ module */ ++ /* Notify SIRQ module. */ + SkEventQueue(pAC, SKGE_HWAC, SK_HWEV_CLEAR_STAT, EventParam); + +- /* Notify CSUM module */ ++ /* Notify CSUM module. */ + #ifdef SK_USE_CSUM + EventParam.Para32[0] = NetIndex; + EventParam.Para32[1] = (SK_U32)-1; +@@ -7188,7 +6988,7 @@ + EventParam); + #endif /* SK_USE_CSUM */ + +- /* Clear XMAC statistic */ ++ /* Clear XMAC statistics. */ + for (PhysPortIndex = 0; PhysPortIndex < + (unsigned int)pAC->GIni.GIMacsFound; PhysPortIndex ++) { + +@@ -7215,13 +7015,13 @@ + PhysPortIndex].StatRxPMaccErr)); + } + +- /* +- * Clear local statistics +- */ ++ /* Clear local statistics. */ + SK_MEMSET((char *)&pAC->Pnmi.VirtualCounterOffset, 0, + sizeof(pAC->Pnmi.VirtualCounterOffset)); ++ + pAC->Pnmi.RlmtChangeCts = 0; + pAC->Pnmi.RlmtChangeTime = 0; ++ + SK_MEMSET((char *)&pAC->Pnmi.RlmtChangeEstimate.EstValue[0], 0, + sizeof(pAC->Pnmi.RlmtChangeEstimate.EstValue)); + pAC->Pnmi.RlmtChangeEstimate.EstValueIndex = 0; +@@ -7258,23 +7058,21 @@ + SK_U32 TrapId, /* SNMP ID of the trap */ + unsigned int Size) /* Space needed for trap entry */ + { +- unsigned int BufPad = pAC->Pnmi.TrapBufPad; +- unsigned int BufFree = pAC->Pnmi.TrapBufFree; +- unsigned int Beg = pAC->Pnmi.TrapQueueBeg; +- unsigned int End = pAC->Pnmi.TrapQueueEnd; ++ unsigned int BufPad = pAC->Pnmi.TrapBufPad; ++ unsigned int BufFree = pAC->Pnmi.TrapBufFree; ++ unsigned int Beg = pAC->Pnmi.TrapQueueBeg; ++ unsigned int End = pAC->Pnmi.TrapQueueEnd; + char *pBuf = &pAC->Pnmi.TrapBuf[0]; + int Wrap; +- unsigned int NeededSpace; +- unsigned int EntrySize; ++ unsigned int NeededSpace; ++ unsigned int EntrySize; + SK_U32 Val32; + SK_U64 Val64; + +- +- /* Last byte of entry will get a copy of the entry length */ ++ /* Last byte of entry will get a copy of the entry length. */ + Size ++; + +- /* +- * Calculate needed buffer space */ ++ /* Calculate needed buffer space. */ + if (Beg >= Size) { + + NeededSpace = Size; +@@ -7289,7 +7087,7 @@ + * Check if enough buffer space is provided. Otherwise + * free some entries. Leave one byte space between begin + * and end of buffer to make it possible to detect whether +- * the buffer is full or empty ++ * the buffer is full or empty. + */ + while (BufFree < NeededSpace + 1) { + +@@ -7328,13 +7126,13 @@ + } + BufFree -= NeededSpace; + +- /* Save the current offsets */ ++ /* Save the current offsets. */ + pAC->Pnmi.TrapQueueBeg = Beg; + pAC->Pnmi.TrapQueueEnd = End; + pAC->Pnmi.TrapBufPad = BufPad; + pAC->Pnmi.TrapBufFree = BufFree; + +- /* Initialize the trap entry */ ++ /* Initialize the trap entry. */ + *(pBuf + Beg + Size - 1) = (char)Size; + *(pBuf + Beg) = (char)Size; + Val32 = (pAC->Pnmi.TrapUnique) ++; +@@ -7369,7 +7167,6 @@ + unsigned int Len; + unsigned int DstOff = 0; + +- + while (Trap != End) { + + Len = (unsigned int)*(pBuf + Trap); +@@ -7414,7 +7211,6 @@ + unsigned int Entries = 0; + unsigned int TotalLen = 0; + +- + while (Trap != End) { + + Len = (unsigned int)*(pBuf + Trap); +@@ -7471,14 +7267,14 @@ + unsigned int DescrLen; + SK_U32 Val32; + +- +- /* Get trap buffer entry */ ++ /* Get trap buffer entry. */ + DescrLen = SK_STRLEN(pAC->I2c.SenTable[SensorIndex].SenDesc); ++ + pBuf = GetTrapEntry(pAC, TrapId, + SK_PNMI_TRAP_SENSOR_LEN_BASE + DescrLen); + Offset = SK_PNMI_TRAP_SIMPLE_LEN; + +- /* Store additionally sensor trap related data */ ++ /* Store additionally sensor trap related data. */ + Val32 = OID_SKGE_SENSOR_INDEX; + SK_PNMI_STORE_U32(pBuf + Offset, Val32); + *(pBuf + Offset + 4) = 4; +@@ -7523,7 +7319,6 @@ + char *pBuf; + SK_U32 Val32; + +- + pBuf = GetTrapEntry(pAC, OID_SKGE_TRAP_RLMT_CHANGE_PORT, + SK_PNMI_TRAP_RLMT_CHANGE_LEN); + +@@ -7551,7 +7346,6 @@ + char *pBuf; + SK_U32 Val32; + +- + pBuf = GetTrapEntry(pAC, TrapId, SK_PNMI_TRAP_RLMT_PORT_LEN); + + Val32 = OID_SKGE_RLMT_PORT_INDEX; +@@ -7571,12 +7365,11 @@ + * Nothing + */ + PNMI_STATIC void CopyMac( +-char *pDst, /* Pointer to destination buffer */ ++char *pDst, /* Pointer to destination buffer */ + SK_MAC_ADDR *pMac) /* Pointer of Source */ + { + int i; + +- + for (i = 0; i < sizeof(SK_MAC_ADDR); i ++) { + + *(pDst + i) = pMac->a[i]; +@@ -7616,17 +7409,14 @@ + + SK_U32 RetCode = SK_PNMI_ERR_GENERAL; + +- /* +- * Check instance. We only handle single instance variables +- */ ++ /* Check instance. We only handle single instance variables. */ + if (Instance != (SK_U32)(-1) && Instance != 1) { + + *pLen = 0; + return (SK_PNMI_ERR_UNKNOWN_INST); + } + +- +- /* Check length */ ++ /* Check length. */ + switch (Id) { + + case OID_PNP_CAPABILITIES: +@@ -7664,14 +7454,10 @@ + break; + } + +- /* +- * Perform action +- */ ++ /* Perform action. */ + if (Action == SK_PNMI_GET) { + +- /* +- * Get value +- */ ++ /* Get value. */ + switch (Id) { + + case OID_PNP_CAPABILITIES: +@@ -7679,18 +7465,21 @@ + break; + + case OID_PNP_QUERY_POWER: +- /* The Windows DDK describes: An OID_PNP_QUERY_POWER requests +- the miniport to indicate whether it can transition its NIC +- to the low-power state. +- A miniport driver must always return NDIS_STATUS_SUCCESS +- to a query of OID_PNP_QUERY_POWER. */ ++ /* ++ * The Windows DDK describes: An OID_PNP_QUERY_POWER requests ++ * the miniport to indicate whether it can transition its NIC ++ * to the low-power state. ++ * A miniport driver must always return NDIS_STATUS_SUCCESS ++ * to a query of OID_PNP_QUERY_POWER. ++ */ + *pLen = sizeof(SK_DEVICE_POWER_STATE); + RetCode = SK_PNMI_ERR_OK; + break; + +- /* NDIS handles these OIDs as write-only. ++ /* ++ * NDIS handles these OIDs as write-only. + * So in case of get action the buffer with written length = 0 +- * is returned ++ * is returned. + */ + case OID_PNP_SET_POWER: + case OID_PNP_ADD_WAKE_UP_PATTERN: +@@ -7711,13 +7500,11 @@ + return (RetCode); + } + +- +- /* +- * Perform preset or set +- */ ++ /* Perform PRESET or SET. */ + +- /* POWER module does not support PRESET action */ ++ /* The POWER module does not support PRESET action. */ + if (Action == SK_PNMI_PRESET) { ++ + return (SK_PNMI_ERR_OK); + } + +@@ -7749,7 +7536,7 @@ + #ifdef SK_DIAG_SUPPORT + /***************************************************************************** + * +- * DiagActions - OID handler function of Diagnostic driver ++ * DiagActions - OID handler function of Diagnostic driver + * + * Description: + * The code is simple. No description necessary. +@@ -7776,22 +7563,17 @@ + unsigned int TableIndex, /* Index to the Id table */ + SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */ + { +- + SK_U32 DiagStatus; + SK_U32 RetCode = SK_PNMI_ERR_GENERAL; + +- /* +- * Check instance. We only handle single instance variables. +- */ ++ /* Check instance. We only handle single instance variables. */ + if (Instance != (SK_U32)(-1) && Instance != 1) { + + *pLen = 0; + return (SK_PNMI_ERR_UNKNOWN_INST); + } + +- /* +- * Check length. +- */ ++ /* Check length. */ + switch (Id) { + + case OID_SKGE_DIAG_MODE: +@@ -7809,10 +7591,9 @@ + } + + /* Perform action. */ +- +- /* GET value. */ + if (Action == SK_PNMI_GET) { + ++ /* Get value. */ + switch (Id) { + + case OID_SKGE_DIAG_MODE: +@@ -7827,14 +7608,15 @@ + RetCode = SK_PNMI_ERR_GENERAL; + break; + } +- return (RetCode); ++ return (RetCode); + } + + /* From here SET or PRESET value. */ + + /* PRESET value is not supported. */ + if (Action == SK_PNMI_PRESET) { +- return (SK_PNMI_ERR_OK); ++ ++ return (SK_PNMI_ERR_OK); + } + + /* SET value. */ +@@ -7846,7 +7628,7 @@ + + /* Attach the DIAG to this adapter. */ + case SK_DIAG_ATTACHED: +- /* Check if we come from running */ ++ /* Check if we come from running. */ + if (pAC->Pnmi.DiagAttached == SK_DIAG_RUNNING) { + + RetCode = SkDrvLeaveDiagMode(pAC); +@@ -7881,7 +7663,7 @@ + /* If DiagMode is not active, we can enter it. */ + if (!pAC->DiagModeActive) { + +- RetCode = SkDrvEnterDiagMode(pAC); ++ RetCode = SkDrvEnterDiagMode(pAC); + } + else { + +@@ -7900,7 +7682,7 @@ + break; + + case SK_DIAG_IDLE: +- /* Check if we come from running */ ++ /* Check if we come from running. */ + if (pAC->Pnmi.DiagAttached == SK_DIAG_RUNNING) { + + RetCode = SkDrvLeaveDiagMode(pAC); +@@ -7946,7 +7728,7 @@ + + /***************************************************************************** + * +- * Vct - OID handler function of OIDs ++ * Vct - OID handler function of OIDs for Virtual Cable Tester (VCT) + * + * Description: + * The code is simple. No description necessary. +@@ -7982,153 +7764,150 @@ + SK_U32 PhysPortIndex; + SK_U32 Limit; + SK_U32 Offset; +- SK_BOOL Link; +- SK_U32 RetCode = SK_PNMI_ERR_GENERAL; +- int i; ++ SK_U32 RetCode; ++ int i; + SK_EVPARA Para; +- SK_U32 CableLength; +- +- /* +- * Calculate the port indexes from the instance. +- */ ++ ++ RetCode = SK_PNMI_ERR_GENERAL; ++ ++ /* Calculate the port indexes from the instance. */ + PhysPortMax = pAC->GIni.GIMacsFound; + LogPortMax = SK_PNMI_PORT_PHYS2LOG(PhysPortMax); +- ++ + /* Dual net mode? */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + LogPortMax--; + } +- ++ + if ((Instance != (SK_U32) (-1))) { +- /* Check instance range. */ +- if ((Instance < 2) || (Instance > LogPortMax)) { +- *pLen = 0; +- return (SK_PNMI_ERR_UNKNOWN_INST); +- } +- ++ /* ++ * Get one instance of that OID, so check the instance range: ++ * There is no virtual port with an Instance == 1, so we get ++ * the values from one physical port only. ++ */ + if (pAC->Pnmi.DualNetActiveFlag == SK_TRUE) { + PhysPortIndex = NetIndex; + } +- else { ++ else { ++ if ((Instance < 2) || (Instance > LogPortMax)) { ++ *pLen = 0; ++ return (SK_PNMI_ERR_UNKNOWN_INST); ++ } + PhysPortIndex = Instance - 2; + } + Limit = PhysPortIndex + 1; + } + else { + /* +- * Instance == (SK_U32) (-1), get all Instances of that OID. +- * +- * Not implemented yet. May be used in future releases. ++ * Instance == (SK_U32) (-1), so get all instances of that OID. ++ * There is no virtual port with an Instance == 1, so we get ++ * the values from all physical ports. + */ + PhysPortIndex = 0; + Limit = PhysPortMax; + } +- +- pPrt = &pAC->GIni.GP[PhysPortIndex]; +- if (pPrt->PHWLinkUp) { +- Link = SK_TRUE; +- } +- else { +- Link = SK_FALSE; +- } +- +- /* Check MAC type */ +- if (pPrt->PhyType != SK_PHY_MARV_COPPER) { ++ ++ /* Check MAC type. */ ++ if ((Id != OID_SKGE_VCT_CAPABILITIES) && ++ (pAC->GIni.GP[PhysPortIndex].PhyType != SK_PHY_MARV_COPPER)) { + *pLen = 0; +- return (SK_PNMI_ERR_GENERAL); ++ return (SK_PNMI_ERR_NOT_SUPPORTED); + } +- +- /* Initialize backup data pointer. */ +- pVctBackupData = &pAC->Pnmi.VctBackup[PhysPortIndex]; +- +- /* Check action type */ ++ ++ /* Check action type. */ + if (Action == SK_PNMI_GET) { +- /* Check length */ ++ /* Check length. */ + switch (Id) { +- ++ + case OID_SKGE_VCT_GET: + if (*pLen < (Limit - PhysPortIndex) * sizeof(SK_PNMI_VCT)) { + *pLen = (Limit - PhysPortIndex) * sizeof(SK_PNMI_VCT); + return (SK_PNMI_ERR_TOO_SHORT); + } + break; +- ++ + case OID_SKGE_VCT_STATUS: ++ case OID_SKGE_VCT_CAPABILITIES: + if (*pLen < (Limit - PhysPortIndex) * sizeof(SK_U8)) { + *pLen = (Limit - PhysPortIndex) * sizeof(SK_U8); + return (SK_PNMI_ERR_TOO_SHORT); + } + break; +- ++ + default: + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + } +- +- /* Get value */ ++ ++ /* Get value. */ + Offset = 0; + for (; PhysPortIndex < Limit; PhysPortIndex++) { ++ ++ pPrt = &pAC->GIni.GP[PhysPortIndex]; ++ + switch (Id) { +- ++ + case OID_SKGE_VCT_GET: +- if ((Link == SK_FALSE) && ++ if (!pPrt->PHWLinkUp && + (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_PENDING)) { ++ + RetCode = SkGmCableDiagStatus(pAC, IoC, PhysPortIndex, SK_FALSE); ++ + if (RetCode == 0) { +- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_PENDING; +- pAC->Pnmi.VctStatus[PhysPortIndex] |= +- (SK_PNMI_VCT_NEW_VCT_DATA | SK_PNMI_VCT_TEST_DONE); +- +- /* Copy results for later use to PNMI struct. */ +- for (i = 0; i < 4; i++) { +- if (pPrt->PMdiPairSts[i] == SK_PNMI_VCT_NORMAL_CABLE) { +- if ((pPrt->PMdiPairLen[i] > 35) && (pPrt->PMdiPairLen[i] < 0xff)) { +- pPrt->PMdiPairSts[i] = SK_PNMI_VCT_IMPEDANCE_MISMATCH; +- } +- } +- if ((pPrt->PMdiPairLen[i] > 35) && (pPrt->PMdiPairLen[i] != 0xff)) { +- CableLength = 1000 * (((175 * pPrt->PMdiPairLen[i]) / 210) - 28); +- } +- else { +- CableLength = 0; +- } +- pVctBackupData->PMdiPairLen[i] = CableLength; +- pVctBackupData->PMdiPairSts[i] = pPrt->PMdiPairSts[i]; +- } ++ ++ /* VCT test is finished, so save the data. */ ++ VctGetResults(pAC, IoC, PhysPortIndex); + + Para.Para32[0] = PhysPortIndex; + Para.Para32[1] = -1; + SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, Para); +- SkEventDispatcher(pAC, IoC); +- } +- else { +- ; /* VCT test is running. */ ++ ++ /* SkEventDispatcher(pAC, IoC); */ + } + } +- ++ ++ /* Initialize backup data pointer. */ ++ pVctBackupData = &pAC->Pnmi.VctBackup[PhysPortIndex]; ++ + /* Get all results. */ + CheckVctStatus(pAC, IoC, pBuf, Offset, PhysPortIndex); +- Offset += sizeof(SK_U8); ++ ++ Offset++; + *(pBuf + Offset) = pPrt->PCableLen; +- Offset += sizeof(SK_U8); ++ Offset++; + for (i = 0; i < 4; i++) { +- SK_PNMI_STORE_U32((pBuf + Offset), pVctBackupData->PMdiPairLen[i]); ++ ++ SK_PNMI_STORE_U32((pBuf + Offset), pVctBackupData->MdiPairLen[i]); + Offset += sizeof(SK_U32); + } + for (i = 0; i < 4; i++) { +- *(pBuf + Offset) = pVctBackupData->PMdiPairSts[i]; +- Offset += sizeof(SK_U8); ++ ++ *(pBuf + Offset) = pVctBackupData->MdiPairSts[i]; ++ Offset++; + } +- ++ + RetCode = SK_PNMI_ERR_OK; + break; +- ++ + case OID_SKGE_VCT_STATUS: + CheckVctStatus(pAC, IoC, pBuf, Offset, PhysPortIndex); +- Offset += sizeof(SK_U8); ++ ++ Offset++; + RetCode = SK_PNMI_ERR_OK; + break; +- ++ ++ case OID_SKGE_VCT_CAPABILITIES: ++ if (pPrt->PhyType != SK_PHY_MARV_COPPER) { ++ *(pBuf + Offset) = SK_PNMI_VCT_NOT_SUPPORTED; ++ } ++ else { ++ *(pBuf + Offset) = SK_PNMI_VCT_SUPPORTED; ++ } ++ Offset++; ++ ++ RetCode = SK_PNMI_ERR_OK; ++ break; ++ + default: + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -8136,15 +7915,15 @@ + } /* for */ + *pLen = Offset; + return (RetCode); +- ++ + } /* if SK_PNMI_GET */ +- ++ + /* + * From here SET or PRESET action. Check if the passed + * buffer length is plausible. + */ +- +- /* Check length */ ++ ++ /* Check length. */ + switch (Id) { + case OID_SKGE_VCT_SET: + if (*pLen < (Limit - PhysPortIndex) * sizeof(SK_U32)) { +@@ -8152,42 +7931,45 @@ + return (SK_PNMI_ERR_TOO_SHORT); + } + break; +- ++ + default: + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); + } +- +- /* +- * Perform preset or set. +- */ +- ++ ++ /* Perform PRESET or SET. */ ++ + /* VCT does not support PRESET action. */ + if (Action == SK_PNMI_PRESET) { ++ + return (SK_PNMI_ERR_OK); + } +- ++ + Offset = 0; + for (; PhysPortIndex < Limit; PhysPortIndex++) { ++ ++ pPrt = &pAC->GIni.GP[PhysPortIndex]; ++ + switch (Id) { + case OID_SKGE_VCT_SET: /* Start VCT test. */ +- if (Link == SK_FALSE) { ++ if (!pPrt->PHWLinkUp) { + SkGeStopPort(pAC, IoC, PhysPortIndex, SK_STOP_ALL, SK_SOFT_RST); +- ++ + RetCode = SkGmCableDiagStatus(pAC, IoC, PhysPortIndex, SK_TRUE); ++ + if (RetCode == 0) { /* RetCode: 0 => Start! */ + pAC->Pnmi.VctStatus[PhysPortIndex] |= SK_PNMI_VCT_PENDING; +- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_NEW_VCT_DATA; +- pAC->Pnmi.VctStatus[PhysPortIndex] &= ~SK_PNMI_VCT_LINK; +- +- /* +- * Start VCT timer counter. +- */ +- SK_MEMSET((char *) &Para, 0, sizeof(Para)); ++ pAC->Pnmi.VctStatus[PhysPortIndex] &= ++ ~(SK_PNMI_VCT_NEW_VCT_DATA | SK_PNMI_VCT_LINK); ++ ++ /* Start VCT timer counter. */ ++ SK_MEMSET((char *)&Para, 0, sizeof(Para)); + Para.Para32[0] = PhysPortIndex; + Para.Para32[1] = -1; +- SkTimerStart(pAC, IoC, &pAC->Pnmi.VctTimeout[PhysPortIndex].VctTimer, +- 4000000, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Para); ++ ++ SkTimerStart(pAC, IoC, &pAC->Pnmi.VctTimeout[PhysPortIndex], ++ SK_PNMI_VCT_TIMER_CHECK, SKGE_PNMI, SK_PNMI_EVT_VCT_RESET, Para); ++ + SK_PNMI_STORE_U32((pBuf + Offset), RetCode); + RetCode = SK_PNMI_ERR_OK; + } +@@ -8203,7 +7985,7 @@ + } + Offset += sizeof(SK_U32); + break; +- ++ + default: + *pLen = 0; + return (SK_PNMI_ERR_GENERAL); +@@ -8215,6 +7997,65 @@ + } /* Vct */ + + ++PNMI_STATIC void VctGetResults( ++SK_AC *pAC, ++SK_IOC IoC, ++SK_U32 Port) ++{ ++ SK_GEPORT *pPrt; ++ int i; ++ SK_U8 PairLen; ++ SK_U8 PairSts; ++ SK_U32 MinLength; ++ SK_U32 CableLength; ++ ++ pPrt = &pAC->GIni.GP[Port]; ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ MinLength = 25; ++ } ++ else { ++ MinLength = 35; ++ } ++ ++ /* Copy results for later use to PNMI struct. */ ++ for (i = 0; i < 4; i++) { ++ ++ PairLen = pPrt->PMdiPairLen[i]; ++ ++ if (((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) == 0) && (i > 1)) { ++ PairSts = SK_PNMI_VCT_NOT_PRESENT; ++ } ++ else { ++ PairSts = pPrt->PMdiPairSts[i]; ++ } ++ ++ if ((PairSts == SK_PNMI_VCT_NORMAL_CABLE) && ++ (PairLen > 28) && (PairLen < 0xff)) { ++ ++ PairSts = SK_PNMI_VCT_IMPEDANCE_MISMATCH; ++ } ++ ++ /* Ignore values <= MinLength, the linear factor is 4/5. */ ++ if ((PairLen > MinLength) && (PairLen < 0xff)) { ++ ++ CableLength = 1000UL * (PairLen - MinLength) * 4 / 5; ++ } ++ else { ++ /* No cable or short cable. */ ++ CableLength = 0; ++ } ++ ++ pAC->Pnmi.VctBackup[Port].MdiPairLen[i] = CableLength; ++ pAC->Pnmi.VctBackup[Port].MdiPairSts[i] = PairSts; ++ } ++ ++ pAC->Pnmi.VctStatus[Port] &= ~SK_PNMI_VCT_PENDING; ++ pAC->Pnmi.VctStatus[Port] |= (SK_PNMI_VCT_NEW_VCT_DATA | ++ SK_PNMI_VCT_TEST_DONE); ++ ++} /* GetVctResults */ ++ + PNMI_STATIC void CheckVctStatus( + SK_AC *pAC, + SK_IOC IoC, +@@ -8224,54 +8065,57 @@ + { + SK_GEPORT *pPrt; + SK_PNMI_VCT *pVctData; ++ SK_U8 VctStatus; + SK_U32 RetCode; +- ++ + pPrt = &pAC->GIni.GP[PhysPortIndex]; +- ++ + pVctData = (SK_PNMI_VCT *) (pBuf + Offset); + pVctData->VctStatus = SK_PNMI_VCT_NONE; +- ++ ++ VctStatus = pAC->Pnmi.VctStatus[PhysPortIndex]; ++ + if (!pPrt->PHWLinkUp) { +- ++ + /* Was a VCT test ever made before? */ +- if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_TEST_DONE) { +- if ((pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_LINK)) { ++ if (VctStatus & SK_PNMI_VCT_TEST_DONE) { ++ if (VctStatus & SK_PNMI_VCT_LINK) { + pVctData->VctStatus |= SK_PNMI_VCT_OLD_VCT_DATA; + } + else { + pVctData->VctStatus |= SK_PNMI_VCT_NEW_VCT_DATA; + } + } +- ++ + /* Check VCT test status. */ + RetCode = SkGmCableDiagStatus(pAC,IoC, PhysPortIndex, SK_FALSE); ++ + if (RetCode == 2) { /* VCT test is running. */ + pVctData->VctStatus |= SK_PNMI_VCT_RUNNING; + } + else { /* VCT data was copied to pAC here. Check PENDING state. */ +- if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_PENDING) { ++ if (VctStatus & SK_PNMI_VCT_PENDING) { + pVctData->VctStatus |= SK_PNMI_VCT_NEW_VCT_DATA; + } + } +- ++ + if (pPrt->PCableLen != 0xff) { /* Old DSP value. */ + pVctData->VctStatus |= SK_PNMI_VCT_OLD_DSP_DATA; + } + } + else { +- + /* Was a VCT test ever made before? */ +- if (pAC->Pnmi.VctStatus[PhysPortIndex] & SK_PNMI_VCT_TEST_DONE) { ++ if (VctStatus & SK_PNMI_VCT_TEST_DONE) { + pVctData->VctStatus &= ~SK_PNMI_VCT_NEW_VCT_DATA; + pVctData->VctStatus |= SK_PNMI_VCT_OLD_VCT_DATA; + } +- ++ + /* DSP only valid in 100/1000 modes. */ +- if (pAC->GIni.GP[PhysPortIndex].PLinkSpeedUsed != +- SK_LSPEED_STAT_10MBPS) { ++ if (pPrt->PLinkSpeedUsed != SK_LSPEED_STAT_10MBPS) { + pVctData->VctStatus |= SK_PNMI_VCT_NEW_DSP_DATA; + } +- } ++ } ++ + } /* CheckVctStatus */ + + +@@ -8314,29 +8158,29 @@ + ReturnCode = SK_PNMI_ERR_GENERAL; + + SK_MEMCPY(&Mode, pBuf, sizeof(SK_I32)); +- SK_MEMCPY(&Oid, (char *) pBuf + sizeof(SK_I32), sizeof(SK_U32)); ++ SK_MEMCPY(&Oid, (char *)pBuf + sizeof(SK_I32), sizeof(SK_U32)); + HeaderLength = sizeof(SK_I32) + sizeof(SK_U32); + *pLen = *pLen - HeaderLength; +- SK_MEMCPY((char *) pBuf + sizeof(SK_I32), (char *) pBuf + HeaderLength, *pLen); ++ SK_MEMCPY((char *)pBuf + sizeof(SK_I32), (char *)pBuf + HeaderLength, *pLen); + + switch(Mode) { + case SK_GET_SINGLE_VAR: +- ReturnCode = SkPnmiGetVar(pAC, IoC, Oid, +- (char *) pBuf + sizeof(SK_I32), pLen, ++ ReturnCode = SkPnmiGetVar(pAC, IoC, Oid, ++ (char *)pBuf + sizeof(SK_I32), pLen, + ((SK_U32) (-1)), NetIndex); + SK_PNMI_STORE_U32(pBuf, ReturnCode); + *pLen = *pLen + sizeof(SK_I32); + break; + case SK_PRESET_SINGLE_VAR: +- ReturnCode = SkPnmiPreSetVar(pAC, IoC, Oid, +- (char *) pBuf + sizeof(SK_I32), pLen, ++ ReturnCode = SkPnmiPreSetVar(pAC, IoC, Oid, ++ (char *)pBuf + sizeof(SK_I32), pLen, + ((SK_U32) (-1)), NetIndex); + SK_PNMI_STORE_U32(pBuf, ReturnCode); + *pLen = *pLen + sizeof(SK_I32); + break; + case SK_SET_SINGLE_VAR: +- ReturnCode = SkPnmiSetVar(pAC, IoC, Oid, +- (char *) pBuf + sizeof(SK_I32), pLen, ++ ReturnCode = SkPnmiSetVar(pAC, IoC, Oid, ++ (char *)pBuf + sizeof(SK_I32), pLen, + ((SK_U32) (-1)), NetIndex); + SK_PNMI_STORE_U32(pBuf, ReturnCode); + *pLen = *pLen + sizeof(SK_I32); +@@ -8357,3 +8201,86 @@ + return (ReturnCode); + + } /* SkGeIocGen */ ++ ++#ifdef SK_ASF ++/***************************************************************************** ++ * ++ * Asf ++ * ++ * Description: ++ * The code is simple. No description necessary. ++ * ++ * Returns: ++ * SK_PNMI_ERR_OK The request was successfully performed. ++ * SK_PNMI_ERR_GENERAL A general severe internal error occured. ++ * SK_PNMI_ERR_TOO_SHORT The passed buffer is too short to contain ++ * the correct data (e.g. a 32bit value is ++ * needed, but a 16 bit value was passed). ++ * SK_PNMI_ERR_UNKNOWN_INST The requested instance of the OID doesn't ++ * exist (e.g. port instance 3 on a two port ++ * adapter. ++ */ ++ ++PNMI_STATIC int Asf( ++SK_AC *pAC, /* Pointer to adapter context */ ++SK_IOC IoC, /* IO context handle */ ++int Action, /* GET/PRESET/SET action */ ++SK_U32 Id, /* Object ID that is to be processed */ ++char *pBuf, /* Buffer used for the management data transfer */ ++unsigned int *pLen, /* On call: pBuf buffer length. On return: used buffer */ ++SK_U32 Instance, /* Instance (1..n) that is to be queried or -1 */ ++unsigned int TableIndex, /* Index to the Id table */ ++SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */ ++{ ++ SK_U32 RetCode = SK_PNMI_ERR_GENERAL; ++ ++ /* ++ * Check instance. We only handle single instance variables. ++ */ ++ if (Instance != (SK_U32)(-1) && Instance != 1) { ++ ++ *pLen = 0; ++ return (SK_PNMI_ERR_UNKNOWN_INST); ++ } ++ ++ /* Perform action. */ ++ /* GET value. */ ++ if (Action == SK_PNMI_GET) { ++ switch (Id) { ++ case OID_SKGE_ASF: ++ RetCode = SkAsfGet(pAC, IoC, (SK_U8 *) pBuf, pLen); ++ break; ++ default: ++ RetCode = SkAsfGetOid( pAC, IoC, Id, Instance, (SK_U8 *) pBuf, pLen ); ++ break; ++ } ++ ++ return (RetCode); ++ } ++ ++ /* PRESET value. */ ++ if (Action == SK_PNMI_PRESET) { ++ switch (Id) { ++ case OID_SKGE_ASF: ++ RetCode = SkAsfPreSet(pAC, IoC, (SK_U8 *) pBuf, pLen); ++ break; ++ default: ++ RetCode = SkAsfPreSetOid( pAC, IoC, Id, Instance, (SK_U8 *) pBuf, pLen ); ++ break; ++ } ++ } ++ ++ /* SET value. */ ++ if (Action == SK_PNMI_SET) { ++ switch (Id) { ++ case OID_SKGE_ASF: ++ RetCode = SkAsfSet(pAC, IoC, (SK_U8 *) pBuf, pLen); ++ break; ++ default: ++ RetCode = SkAsfSetOid( pAC, IoC, Id, Instance, (SK_U8 *) pBuf, pLen ); ++ break; ++ } ++ } ++ return (RetCode); ++} ++#endif /* SK_ASF */ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skgesirq.c linux-2.6.9.new/drivers/net/sk98lin/skgesirq.c +--- linux-2.6.9.old/drivers/net/sk98lin/skgesirq.c 2004-10-19 05:53:06.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skgesirq.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skgesirq.c + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.92 $ +- * Date: $Date: 2003/09/16 14:37:07 $ ++ * Version: $Revision: 2.21 $ ++ * Date: $Date: 2005/03/03 15:49:58 $ + * Purpose: Special IRQ module + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -38,7 +37,7 @@ + * right after this ISR. + * + * The Interrupt source register of the adapter is NOT read by this module. +- * SO if the drivers implementor needs a while loop around the ++ * SO if the drivers implementor needs a while loop around the + * slow data paths interrupt bits, he needs to call the SkGeSirqIsr() for + * each loop entered. + * +@@ -46,11 +45,6 @@ + * + */ + +-#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) +-static const char SysKonnectFileId[] = +- "@(#) $Id: skgesirq.c,v 1.92 2003/09/16 14:37:07 rschmidt Exp $ (C) Marvell."; +-#endif +- + #include "h/skdrv1st.h" /* Driver Specific Definitions */ + #ifndef SK_SLIM + #include "h/skgepnmi.h" /* PNMI Definitions */ +@@ -58,6 +52,13 @@ + #endif + #include "h/skdrv2nd.h" /* Adapter Control and Driver specific Def. */ + ++/* local variables ************************************************************/ ++ ++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) ++static const char SysKonnectFileId[] = ++ "@(#) $Id: skgesirq.c,v 2.21 2005/03/03 15:49:58 rschmidt Exp $ (C) Marvell."; ++#endif ++ + /* local function prototypes */ + #ifdef GENESIS + static int SkGePortCheckUpXmac(SK_AC*, SK_IOC, int, SK_BOOL); +@@ -86,7 +87,7 @@ + XM_RXF_511B, + XM_RXF_1023B, + XM_RXF_MAX_SZ +-} ; ++}; + #endif /* GENESIS */ + + #ifdef __C2MAN__ +@@ -109,8 +110,8 @@ + * Returns: N/A + */ + static void SkHWInitDefSense( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -119,7 +120,7 @@ + + pPrt->PAutoNegTimeOut = 0; + +- if (pPrt->PLinkModeConf != SK_LMODE_AUTOSENSE) { ++ if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) { + pPrt->PLinkMode = pPrt->PLinkModeConf; + return; + } +@@ -145,8 +146,8 @@ + * + */ + static SK_U8 SkHWSenseGetNext( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -155,18 +156,18 @@ + + pPrt->PAutoNegTimeOut = 0; + +- if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) { ++ if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) { + /* Leave all as configured */ + return(pPrt->PLinkModeConf); + } + +- if (pPrt->PLinkMode == (SK_U8)SK_LMODE_AUTOFULL) { ++ if (pPrt->PLinkMode == (SK_U8)SK_LMODE_AUTOFULL) { + /* Return next mode AUTOBOTH */ +- return ((SK_U8)SK_LMODE_AUTOBOTH); ++ return((SK_U8)SK_LMODE_AUTOBOTH); + } + + /* Return default autofull */ +- return ((SK_U8)SK_LMODE_AUTOFULL); ++ return((SK_U8)SK_LMODE_AUTOFULL); + } /* SkHWSenseGetNext */ + + +@@ -179,8 +180,8 @@ + * Returns: N/A + */ + static void SkHWSenseSetNext( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_U8 NewMode) /* New Mode to be written in sense mode */ + { +@@ -190,7 +191,7 @@ + + pPrt->PAutoNegTimeOut = 0; + +- if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) { ++ if (pPrt->PLinkModeConf != (SK_U8)SK_LMODE_AUTOSENSE) { + return; + } + +@@ -214,8 +215,8 @@ + * Returns: N/A + */ + void SkHWLinkDown( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -227,11 +228,11 @@ + + /* Disable Receiver and Transmitter */ + SkMacRxTxDisable(pAC, IoC, Port); +- ++ + /* Init default sense mode */ + SkHWInitDefSense(pAC, IoC, Port); + +- if (pPrt->PHWLinkUp == SK_FALSE) { ++ if (!pPrt->PHWLinkUp) { + return; + } + +@@ -242,8 +243,8 @@ + pPrt->PHWLinkUp = SK_FALSE; + + /* Reset Port stati */ +- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN; +- pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE; ++ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN; ++ pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE; + pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_INDETERMINATED; + + /* Re-init Phy especially when the AutoSense default is set now */ +@@ -266,8 +267,8 @@ + * Returns: N/A + */ + void SkHWLinkUp( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -281,11 +282,11 @@ + + pPrt->PHWLinkUp = SK_TRUE; + pPrt->PAutoNegFail = SK_FALSE; +- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN; ++ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN; + +- if (pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOHALF && +- pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOFULL && +- pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOBOTH) { ++ if (pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOHALF && ++ pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOFULL && ++ pPrt->PLinkMode != (SK_U8)SK_LMODE_AUTOBOTH) { + /* Link is up and no Auto-negotiation should be done */ + + /* Link speed should be the configured one */ +@@ -304,18 +305,18 @@ + } + + /* Set Link Mode Status */ +- if (pPrt->PLinkMode == SK_LMODE_FULL) { ++ if (pPrt->PLinkMode == (SK_U8)SK_LMODE_FULL) { + pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_FULL; + } + else { +- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_HALF; ++ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_HALF; + } + + /* No flow control without auto-negotiation */ +- pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE; ++ pPrt->PFlowCtrlStatus = (SK_U8)SK_FLOW_STAT_NONE; + + /* enable Rx/Tx */ +- (void)SkMacRxTxEnable(pAC, IoC, Port); ++ (void)SkMacRxTxEnable(pAC, IoC, Port); + } + } /* SkHWLinkUp */ + +@@ -329,14 +330,16 @@ + * Returns: N/A + */ + static void SkMacParity( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ +-int Port) /* Port Index of the port failed */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ ++int Port) /* Port Index (MAC_1 + n) */ + { + SK_EVPARA Para; + SK_GEPORT *pPrt; /* GIni Port struct pointer */ + SK_U32 TxMax; /* Tx Max Size Counter */ + ++ TxMax = 0; ++ + pPrt = &pAC->GIni.GP[Port]; + + /* Clear IRQ Tx Parity Error */ +@@ -355,7 +358,7 @@ + pAC->GIni.GIChipRev == 0) ? GMF_CLI_TX_FC : GMF_CLI_TX_PE)); + } + #endif /* YUKON */ +- ++ + if (pPrt->PCheckPar) { + + if (Port == MAC_1) { +@@ -366,7 +369,7 @@ + } + Para.Para64 = Port; + SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para); +- ++ + Para.Para32[0] = Port; + SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para); + +@@ -378,7 +381,7 @@ + if (pAC->GIni.GIGenesis) { + /* Snap statistic counters */ + (void)SkXmUpdateStats(pAC, IoC, Port); +- ++ + (void)SkXmMacStatistic(pAC, IoC, Port, XM_TXF_MAX_SZ, &TxMax); + } + #endif /* GENESIS */ +@@ -399,15 +402,15 @@ + + /****************************************************************************** + * +- * SkGeHwErr() - Hardware Error service routine ++ * SkGeYuHwErr() - Hardware Error service routine (Genesis and Yukon) + * + * Description: handles all HW Error interrupts + * + * Returns: N/A + */ +-static void SkGeHwErr( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++static void SkGeYuHwErr( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ + SK_U32 HwStatus) /* Interrupt status word */ + { + SK_EVPARA Para; +@@ -423,10 +426,10 @@ + } + + /* Reset all bits in the PCI STATUS register */ +- SK_IN16(IoC, PCI_C(PCI_STATUS), &Word); +- ++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word); ++ + SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); +- SK_OUT16(IoC, PCI_C(PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS)); ++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS)); + SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); + + Para.Para64 = 0; +@@ -484,14 +487,18 @@ + #endif /* YUKON */ + + if ((HwStatus & IS_RAM_RD_PAR) != 0) { ++ + SK_OUT16(IoC, B3_RI_CTRL, RI_CLR_RD_PERR); ++ + SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E014, SKERR_SIRQ_E014MSG); + Para.Para64 = 0; + SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para); + } + + if ((HwStatus & IS_RAM_WR_PAR) != 0) { ++ + SK_OUT16(IoC, B3_RI_CTRL, RI_CLR_WR_PERR); ++ + SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E015, SKERR_SIRQ_E015MSG); + Para.Para64 = 0; + SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para); +@@ -512,7 +519,7 @@ + SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E018, SKERR_SIRQ_E018MSG); + Para.Para64 = MAC_1; + SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para); +- ++ + Para.Para32[0] = MAC_1; + SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para); + } +@@ -524,37 +531,286 @@ + SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E019, SKERR_SIRQ_E019MSG); + Para.Para64 = MAC_2; + SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para); +- ++ + Para.Para32[0] = MAC_2; + SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para); + } +-} /* SkGeHwErr */ ++} /* SkGeYuHwErr */ ++ ++#ifdef YUK2 ++/****************************************************************************** ++ * ++ * SkYuk2HwPortErr() - Service HW Errors for specified port (Yukon-2 only) ++ * ++ * Description: handles the HW Error interrupts for a specific port. ++ * ++ * Returns: N/A ++ */ ++static void SkYuk2HwPortErr( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++SK_U32 HwStatus, /* Interrupt status word */ ++int Port) /* Port Index (MAC_1 + n) */ ++{ ++ SK_EVPARA Para; ++ int Queue; ++ ++ if (Port == MAC_2) { ++ HwStatus >>= 8; ++ } ++ ++ if ((HwStatus & Y2_HWE_L1_MASK) == 0) { ++ return; ++ } ++ ++ if ((HwStatus & Y2_IS_PAR_RD1) != 0) { ++ /* Clear IRQ */ ++ SK_OUT16(IoC, SELECT_RAM_BUFFER(Port, B3_RI_CTRL), RI_CLR_RD_PERR); ++ ++ if (Port == MAC_1) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E028, SKERR_SIRQ_E028MSG); ++ } ++ else { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E030, SKERR_SIRQ_E030MSG); ++ } ++ } ++ ++ if ((HwStatus & Y2_IS_PAR_WR1) != 0) { ++ /* Clear IRQ */ ++ SK_OUT16(IoC, SELECT_RAM_BUFFER(Port, B3_RI_CTRL), RI_CLR_WR_PERR); + ++ if (Port == MAC_1) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E029, SKERR_SIRQ_E029MSG); ++ } ++ else { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E031, SKERR_SIRQ_E031MSG); ++ } ++ } ++ ++ if ((HwStatus & Y2_IS_PAR_MAC1) != 0) { ++ /* Clear IRQ */ ++ SK_OUT8(IoC, MR_ADDR(Port, TX_GMF_CTRL_T), GMF_CLI_TX_PE); ++ ++ if (Port == MAC_1) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E016, SKERR_SIRQ_E016MSG); ++ } ++ else { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E017, SKERR_SIRQ_E017MSG); ++ } ++ } ++ ++ if ((HwStatus & Y2_IS_PAR_RX1) != 0) { ++ if (Port == MAC_1) { ++ Queue = Q_R1; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E018, SKERR_SIRQ_E018MSG); ++ } ++ else { ++ Queue = Q_R2; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E019, SKERR_SIRQ_E019MSG); ++ } ++ /* Clear IRQ */ ++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_PAR); ++ } ++ ++ if ((HwStatus & Y2_IS_TCP_TXS1) != 0) { ++ if (Port == MAC_1) { ++ Queue = Q_XS1; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E033, SKERR_SIRQ_E033MSG); ++ } ++ else { ++ Queue = Q_XS2; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E035, SKERR_SIRQ_E035MSG); ++ } ++ /* Clear IRQ */ ++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_TCP); ++ } ++ ++ if ((HwStatus & Y2_IS_TCP_TXA1) != 0) { ++ if (Port == MAC_1) { ++ Queue = Q_XA1; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E032, SKERR_SIRQ_E032MSG); ++ } ++ else { ++ Queue = Q_XA2; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E034, SKERR_SIRQ_E034MSG); ++ } ++ /* Clear IRQ */ ++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_TCP); ++ } ++ ++ Para.Para64 = Port; ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para); ++ ++ Para.Para32[0] = Port; ++ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para); ++ ++} /* SkYuk2HwPortErr */ + + /****************************************************************************** + * +- * SkGeSirqIsr() - Special Interrupt Service Routine ++ * SkYuk2HwErr() - Hardware Error service routine (Yukon-2 only) + * +- * Description: handles all non data transfer specific interrupts (slow path) ++ * Description: handles all HW Error interrupts ++ * ++ * Returns: N/A ++ */ ++static void SkYuk2HwErr( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++SK_U32 HwStatus) /* Interrupt status word */ ++{ ++ SK_EVPARA Para; ++ SK_U16 Word; ++ SK_U32 DWord; ++ SK_U32 TlpHead[4]; ++ int i; ++ ++ /* This is necessary only for Rx timing measurements */ ++ if ((HwStatus & Y2_IS_TIST_OV) != 0) { ++ /* increment Time Stamp Timer counter (high) */ ++ pAC->GIni.GITimeStampCnt++; ++ ++ /* Clear Time Stamp Timer IRQ */ ++ SK_OUT8(IoC, GMAC_TI_ST_CTRL, (SK_U8)GMT_ST_CLR_IRQ); ++ } ++ ++ /* Evaluate Y2_IS_PCI_NEXP before Y2_IS_MST_ERR or Y2_IS_IRQ_STAT */ ++ if ((HwStatus & Y2_IS_PCI_NEXP) != 0) { ++ /* PCI-Express Error occured which is not described in PEX spec. */ ++ /* ++ * This error is also mapped either to Master Abort (Y2_IS_MST_ERR) ++ * or Target Abort (Y2_IS_IRQ_STAT) bit and can only be cleared there. ++ * Therefore handle this event just by printing an error log entry. ++ */ ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E027, SKERR_SIRQ_E027MSG); ++ } ++ ++ if ((HwStatus & (Y2_IS_MST_ERR | Y2_IS_IRQ_STAT)) != 0) { ++ /* PCI Errors occured */ ++ if ((HwStatus & Y2_IS_IRQ_STAT) != 0) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E013, SKERR_SIRQ_E013MSG); ++ } ++ else { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E012, SKERR_SIRQ_E012MSG); ++ } ++ ++ /* Reset all bits in the PCI STATUS register */ ++ SK_IN16(IoC, PCI_C(pAC, PCI_STATUS), &Word); ++ ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); ++ SK_OUT16(IoC, PCI_C(pAC, PCI_STATUS), (SK_U16)(Word | PCI_ERRBITS)); ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); ++ ++ Para.Para64 = 0; ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para); ++ } ++ ++ /* check for PCI-Express Uncorrectable Error*/ ++ if ((HwStatus & Y2_IS_PCI_EXP) != 0) { ++ /* ++ * On PCI-Express bus bridges are called root complexes (RC). ++ * PCI-Express errors are recognized by the root complex too, ++ * which requests the system to handle the problem. After error ++ * occurence it may be that no access to the adapter may be performed ++ * any longer. ++ */ ++ ++ /* Get uncorrectable error status */ ++ SK_IN32(IoC, PCI_C(pAC, PEX_UNC_ERR_STAT), &DWord); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ++ ("PEX Uncorr.Error Status: 0x%08lX\n", DWord)); ++ ++ if (DWord != PEX_UNSUP_REQ) { ++ /* ignore Unsupported Request Errors */ ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E026, SKERR_SIRQ_E026MSG); ++ } ++ ++ if ((DWord & (PEX_FATAL_ERRORS | PEX_POIS_TLP)) != 0) { ++ /* ++ * Stop only, if the uncorrectable error is fatal or ++ * Poisoned TLP occured ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ("Header Log:")); ++ ++ for (i = 0; i < 4; i++) { ++ /* get TLP Header from Log Registers */ ++ SK_IN32(IoC, PCI_C(pAC, PEX_HEADER_LOG + i*4), TlpHead + i); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ++ (" 0x%08lX", TlpHead[i])); ++ } ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ("\n")); ++ ++ /* check for vendor defined broadcast message */ ++ if (TlpHead[0] == 0x73004001 && (SK_U8)TlpHead[1] == 0x7f) { ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ++ ("Vendor defined broadcast message\n")); ++ } ++ else { ++ Para.Para64 = 0; ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para); ++ ++ pAC->GIni.GIValHwIrqMask &= ~Y2_IS_PCI_EXP; ++ /* Rewrite HW IRQ mask */ ++ SK_OUT32(IoC, B0_HWE_IMSK, pAC->GIni.GIValHwIrqMask); ++ } ++ } ++ /* clear the interrupt */ ++ SK_OUT32(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); ++ SK_OUT32(IoC, PCI_C(pAC, PEX_UNC_ERR_STAT), 0xffffffffUL); ++ SK_OUT32(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); ++ } ++ ++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) { ++ ++ SkYuk2HwPortErr(pAC, IoC, HwStatus, i); ++ } ++ ++} /* SkYuk2HwErr */ ++#endif /* YUK2 */ ++ ++/****************************************************************************** ++ * ++ * SkGeSirqIsr() - Wrapper for Special Interrupt Service Routine ++ * ++ * Description: calls the preselected special ISR (slow path) + * + * Returns: N/A + */ + void SkGeSirqIsr( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O context */ ++SK_U32 Istatus) /* Interrupt status word */ ++{ ++ pAC->GIni.GIFunc.pSkGeSirqIsr(pAC, IoC, Istatus); ++} ++ ++/****************************************************************************** ++ * ++ * SkGeYuSirqIsr() - Special Interrupt Service Routine ++ * ++ * Description: handles all non data transfer specific interrupts (slow path) ++ * ++ * Returns: N/A ++ */ ++void SkGeYuSirqIsr( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + SK_U32 Istatus) /* Interrupt status word */ + { + SK_EVPARA Para; + SK_U32 RegVal32; /* Read register value */ + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +- SK_U16 PhyInt; ++ SK_U16 PhyInt; + int i; + + if (((Istatus & IS_HW_ERR) & pAC->GIni.GIValIrqMask) != 0) { + /* read the HW Error Interrupt source */ + SK_IN32(IoC, B0_HWE_ISRC, &RegVal32); +- +- SkGeHwErr(pAC, IoC, RegVal32); ++ ++ SkGeYuHwErr(pAC, IoC, RegVal32); + } + + /* +@@ -569,7 +825,7 @@ + } + + if (((Istatus & (IS_PA_TO_RX2 | IS_PA_TO_TX2)) != 0) && +- pAC->GIni.GP[MAC_2].PState == SK_PRT_RESET) { ++ pAC->GIni.GP[MAC_2].PState == SK_PRT_RESET) { + /* MAC 2 was not initialized but Packet timeout occured */ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E005, + SKERR_SIRQ_E005MSG); +@@ -590,8 +846,8 @@ + } + + if ((Istatus & IS_PA_TO_TX1) != 0) { +- +- pPrt = &pAC->GIni.GP[0]; ++ ++ pPrt = &pAC->GIni.GP[MAC_1]; + + /* May be a normal situation in a server with a slow network */ + SK_OUT16(IoC, B3_PA_CTRL, PA_CLR_TO_TX1); +@@ -612,25 +868,18 @@ + * we ignore those + */ + pPrt->HalfDupTimerActive = SK_TRUE; +-#ifdef XXX +- Len = sizeof(SK_U64); +- SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets, +- &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, 0), +- pAC->Rlmt.Port[0].Net->NetNumber); +- +- pPrt->LastOctets = Octets; +-#endif /* XXX */ ++ + /* Snap statistic counters */ + (void)SkXmUpdateStats(pAC, IoC, 0); + + (void)SkXmMacStatistic(pAC, IoC, 0, XM_TXO_OK_HI, &RegVal32); + + pPrt->LastOctets = (SK_U64)RegVal32 << 32; +- ++ + (void)SkXmMacStatistic(pAC, IoC, 0, XM_TXO_OK_LO, &RegVal32); + + pPrt->LastOctets += RegVal32; +- ++ + Para.Para32[0] = 0; + SkTimerStart(pAC, IoC, &pPrt->HalfDupChkTimer, SK_HALFDUP_CHK_TIME, + SKGE_HWAC, SK_HWEV_HALFDUP_CHK, Para); +@@ -640,8 +889,8 @@ + } + + if ((Istatus & IS_PA_TO_TX2) != 0) { +- +- pPrt = &pAC->GIni.GP[1]; ++ ++ pPrt = &pAC->GIni.GP[MAC_2]; + + /* May be a normal situation in a server with a slow network */ + SK_OUT16(IoC, B3_PA_CTRL, PA_CLR_TO_TX2); +@@ -653,25 +902,18 @@ + pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOHALF) && + !pPrt->HalfDupTimerActive) { + pPrt->HalfDupTimerActive = SK_TRUE; +-#ifdef XXX +- Len = sizeof(SK_U64); +- SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets, +- &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, 1), +- pAC->Rlmt.Port[1].Net->NetNumber); +- +- pPrt->LastOctets = Octets; +-#endif /* XXX */ ++ + /* Snap statistic counters */ + (void)SkXmUpdateStats(pAC, IoC, 1); + + (void)SkXmMacStatistic(pAC, IoC, 1, XM_TXO_OK_HI, &RegVal32); + + pPrt->LastOctets = (SK_U64)RegVal32 << 32; +- ++ + (void)SkXmMacStatistic(pAC, IoC, 1, XM_TXO_OK_LO, &RegVal32); + + pPrt->LastOctets += RegVal32; +- ++ + Para.Para32[0] = 1; + SkTimerStart(pAC, IoC, &pPrt->HalfDupChkTimer, SK_HALFDUP_CHK_TIME, + SKGE_HWAC, SK_HWEV_HALFDUP_CHK, Para); +@@ -684,6 +926,7 @@ + if ((Istatus & IS_R1_C) != 0) { + /* Clear IRQ */ + SK_OUT32(IoC, B0_R1_CSR, CSR_IRQ_CL_C); ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E006, + SKERR_SIRQ_E006MSG); + Para.Para64 = MAC_1; +@@ -695,6 +938,7 @@ + if ((Istatus & IS_R2_C) != 0) { + /* Clear IRQ */ + SK_OUT32(IoC, B0_R2_CSR, CSR_IRQ_CL_C); ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E007, + SKERR_SIRQ_E007MSG); + Para.Para64 = MAC_2; +@@ -706,6 +950,7 @@ + if ((Istatus & IS_XS1_C) != 0) { + /* Clear IRQ */ + SK_OUT32(IoC, B0_XS1_CSR, CSR_IRQ_CL_C); ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E008, + SKERR_SIRQ_E008MSG); + Para.Para64 = MAC_1; +@@ -717,6 +962,7 @@ + if ((Istatus & IS_XA1_C) != 0) { + /* Clear IRQ */ + SK_OUT32(IoC, B0_XA1_CSR, CSR_IRQ_CL_C); ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E009, + SKERR_SIRQ_E009MSG); + Para.Para64 = MAC_1; +@@ -728,6 +974,7 @@ + if ((Istatus & IS_XS2_C) != 0) { + /* Clear IRQ */ + SK_OUT32(IoC, B0_XS2_CSR, CSR_IRQ_CL_C); ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E010, + SKERR_SIRQ_E010MSG); + Para.Para64 = MAC_2; +@@ -739,6 +986,7 @@ + if ((Istatus & IS_XA2_C) != 0) { + /* Clear IRQ */ + SK_OUT32(IoC, B0_XA2_CSR, CSR_IRQ_CL_C); ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E011, + SKERR_SIRQ_E011MSG); + Para.Para64 = MAC_2; +@@ -751,39 +999,37 @@ + if ((Istatus & IS_EXT_REG) != 0) { + /* Test IRQs from PHY */ + for (i = 0; i < pAC->GIni.GIMacsFound; i++) { +- ++ + pPrt = &pAC->GIni.GP[i]; +- ++ + if (pPrt->PState == SK_PRT_RESET) { + continue; + } +- ++ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + switch (pPrt->PhyType) { +- ++ + case SK_PHY_XMAC: + break; +- ++ + case SK_PHY_BCOM: + SkXmPhyRead(pAC, IoC, i, PHY_BCOM_INT_STAT, &PhyInt); +- ++ + if ((PhyInt & ~PHY_B_DEF_MSK) != 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, +- ("Port %d Bcom Int: 0x%04X\n", +- i, PhyInt)); ++ ("Port %d PHY Int: 0x%04X\n", i, PhyInt)); + SkPhyIsrBcom(pAC, IoC, i, PhyInt); + } + break; + #ifdef OTHER_PHY + case SK_PHY_LONE: + SkXmPhyRead(pAC, IoC, i, PHY_LONE_INT_STAT, &PhyInt); +- ++ + if ((PhyInt & PHY_L_DEF_MSK) != 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, +- ("Port %d Lone Int: %x\n", +- i, PhyInt)); ++ ("Port %d PHY Int: 0x%04X\n", i, PhyInt)); + SkPhyIsrLone(pAC, IoC, i, PhyInt); + } + break; +@@ -791,7 +1037,7 @@ + } + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { + /* Read PHY Interrupt Status */ +@@ -799,8 +1045,7 @@ + + if ((PhyInt & PHY_M_DEF_MSK) != 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, +- ("Port %d Marv Int: 0x%04X\n", +- i, PhyInt)); ++ ("Port %d PHY Int: 0x%04X\n", i, PhyInt)); + SkPhyIsrGmac(pAC, IoC, i, PhyInt); + } + } +@@ -808,13 +1053,13 @@ + } + } + +- /* I2C Ready interrupt */ ++ /* TWSI Ready interrupt */ + if ((Istatus & IS_I2C_READY) != 0) { + #ifdef SK_SLIM +- SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); +-#else ++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); ++#else + SkI2cIsr(pAC, IoC); +-#endif ++#endif + } + + /* SW forced interrupt */ +@@ -829,7 +1074,7 @@ + * us only a link going down. + */ + /* clear interrupt */ +- SK_OUT8(IoC, MR_ADDR(MAC_1, LNK_SYNC_CTRL), LED_CLR_IRQ); ++ SK_OUT8(IoC, MR_ADDR(MAC_1, LNK_SYNC_CTRL), LNK_CLR_IRQ); + } + + /* Check MAC after link sync counter */ +@@ -844,7 +1089,7 @@ + * us only a link going down. + */ + /* clear interrupt */ +- SK_OUT8(IoC, MR_ADDR(MAC_2, LNK_SYNC_CTRL), LED_CLR_IRQ); ++ SK_OUT8(IoC, MR_ADDR(MAC_2, LNK_SYNC_CTRL), LNK_CLR_IRQ); + } + + /* Check MAC after link sync counter */ +@@ -860,13 +1105,189 @@ + /* read the HW Error Interrupt source */ + SK_IN32(IoC, B0_HWE_ISRC, &RegVal32); + +- SkGeHwErr(pAC, IoC, RegVal32); ++ SkGeYuHwErr(pAC, IoC, RegVal32); + } + + SkHwtIsr(pAC, IoC); + } + +-} /* SkGeSirqIsr */ ++} /* SkGeYuSirqIsr */ ++ ++#ifdef YUK2 ++/****************************************************************************** ++ * ++ * SkYuk2PortSirq() - Service HW Errors for specified port (Yukon-2 only) ++ * ++ * Description: handles the HW Error interrupts for a specific port. ++ * ++ * Returns: N/A ++ */ ++static void SkYuk2PortSirq( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++SK_U32 IStatus, /* Interrupt status word */ ++int Port) /* Port Index (MAC_1 + n) */ ++{ ++ SK_EVPARA Para; ++ int Queue; ++ SK_U16 PhyInt; ++ ++ if (Port == MAC_2) { ++ IStatus >>= 8; ++ } ++ ++ /* Interrupt from PHY */ ++ if ((IStatus & Y2_IS_IRQ_PHY1) != 0) { ++ /* Read PHY Interrupt Status */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_INT_STAT, &PhyInt); ++ ++ if ((PhyInt & PHY_M_DEF_MSK) != 0) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ++ ("Port %d PHY Int: 0x%04X\n", Port, PhyInt)); ++ SkPhyIsrGmac(pAC, IoC, Port, PhyInt); ++ } ++ } ++ ++ /* Interrupt from MAC */ ++ if ((IStatus & Y2_IS_IRQ_MAC1) != 0) { ++ SkMacIrq(pAC, IoC, Port); ++ } ++ ++ if ((IStatus & (Y2_IS_CHK_RX1 | Y2_IS_CHK_TXS1 | Y2_IS_CHK_TXA1)) != 0) { ++ if ((IStatus & Y2_IS_CHK_RX1) != 0) { ++ if (Port == MAC_1) { ++ Queue = Q_R1; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E006, ++ SKERR_SIRQ_E006MSG); ++ } ++ else { ++ Queue = Q_R2; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E007, ++ SKERR_SIRQ_E007MSG); ++ } ++ /* Clear IRQ */ ++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_CHK); ++ } ++ ++ if ((IStatus & Y2_IS_CHK_TXS1) != 0) { ++ if (Port == MAC_1) { ++ Queue = Q_XS1; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E008, ++ SKERR_SIRQ_E008MSG); ++ } ++ else { ++ Queue = Q_XS2; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E010, ++ SKERR_SIRQ_E010MSG); ++ } ++ /* Clear IRQ */ ++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_CHK); ++ } ++ ++ if ((IStatus & Y2_IS_CHK_TXA1) != 0) { ++ if (Port == MAC_1) { ++ Queue = Q_XA1; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E009, ++ SKERR_SIRQ_E009MSG); ++ } ++ else { ++ Queue = Q_XA2; ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E011, ++ SKERR_SIRQ_E011MSG); ++ } ++ /* Clear IRQ */ ++ SK_OUT32(IoC, Q_ADDR(Queue, Q_CSR), BMU_CLR_IRQ_CHK); ++ } ++ ++ Para.Para64 = Port; ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_FAIL, Para); ++ ++ Para.Para32[0] = Port; ++ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para); ++ } ++} /* SkYuk2PortSirq */ ++#endif /* YUK2 */ ++ ++/****************************************************************************** ++ * ++ * SkYuk2SirqIsr() - Special Interrupt Service Routine (Yukon-2 only) ++ * ++ * Description: handles all non data transfer specific interrupts (slow path) ++ * ++ * Returns: N/A ++ */ ++void SkYuk2SirqIsr( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++SK_U32 Istatus) /* Interrupt status word */ ++{ ++#ifdef YUK2 ++ SK_EVPARA Para; ++ SK_U32 RegVal32; /* Read register value */ ++ SK_U8 Value; ++ ++ /* HW Error indicated ? */ ++ if (((Istatus & Y2_IS_HW_ERR) & pAC->GIni.GIValIrqMask) != 0) { ++ /* read the HW Error Interrupt source */ ++ SK_IN32(IoC, B0_HWE_ISRC, &RegVal32); ++ ++ SkYuk2HwErr(pAC, IoC, RegVal32); ++ } ++ ++ /* Interrupt from ASF Subsystem */ ++ if ((Istatus & Y2_IS_ASF) != 0) { ++ /* clear IRQ */ ++ /* later on clearing should be done in ASF ISR handler */ ++ SK_IN8(IoC, B28_Y2_ASF_STAT_CMD, &Value); ++ Value |= Y2_ASF_CLR_HSTI; ++ SK_OUT8(IoC, B28_Y2_ASF_STAT_CMD, Value); ++ /* Call IRQ handler in ASF Module */ ++ /* TBD */ ++ } ++ ++ /* Check IRQ from polling unit */ ++ if ((Istatus & Y2_IS_POLL_CHK) != 0) { ++ /* Clear IRQ */ ++ SK_OUT32(IoC, POLL_CTRL, PC_CLR_IRQ_CHK); ++ ++ SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_SIRQ_E036, ++ SKERR_SIRQ_E036MSG); ++ Para.Para64 = 0; ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_ADAP_FAIL, Para); ++ } ++ ++ /* TWSI Ready interrupt */ ++ if ((Istatus & Y2_IS_TWSI_RDY) != 0) { ++#ifdef SK_SLIM ++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); ++#else ++ SkI2cIsr(pAC, IoC); ++#endif ++ } ++ ++ /* SW forced interrupt */ ++ if ((Istatus & Y2_IS_IRQ_SW) != 0) { ++ /* clear the software IRQ */ ++ SK_OUT8(IoC, B0_CTST, CS_CL_SW_IRQ); ++ } ++ ++ if ((Istatus & Y2_IS_L1_MASK) != 0) { ++ SkYuk2PortSirq(pAC, IoC, Istatus, MAC_1); ++ } ++ ++ if ((Istatus & Y2_IS_L2_MASK) != 0) { ++ SkYuk2PortSirq(pAC, IoC, Istatus, MAC_2); ++ } ++ ++ /* Timer interrupt (served last) */ ++ if ((Istatus & Y2_IS_TIMINT) != 0) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, ++ ("Timer Int: 0x%08lX\n", Istatus)); ++ SkHwtIsr(pAC, IoC); ++ } ++#endif /* YUK2 */ ++ ++} /* SkYuk2SirqIsr */ + + + #ifdef GENESIS +@@ -880,8 +1301,8 @@ + */ + static int SkGePortCheckShorts( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int Port) /* Which port should be checked */ ++SK_IOC IoC, /* I/O Context */ ++int Port) /* Port Index (MAC_1 + n) */ + { + SK_U32 Shorts; /* Short Event Counter */ + SK_U32 CheckShorts; /* Check value for Short Event Counter */ +@@ -909,9 +1330,9 @@ + RxCts = 0; + + for (i = 0; i < sizeof(SkGeRxRegs)/sizeof(SkGeRxRegs[0]); i++) { +- ++ + (void)SkXmMacStatistic(pAC, IoC, Port, SkGeRxRegs[i], &RxTmp); +- ++ + RxCts += (SK_U64)RxTmp; + } + +@@ -928,11 +1349,11 @@ + CheckShorts = 2; + + (void)SkXmMacStatistic(pAC, IoC, Port, XM_RXF_FCS_ERR, &FcsErrCts); +- +- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE && +- pPrt->PLipaAutoNeg == SK_LIPA_UNKNOWN && +- (pPrt->PLinkMode == SK_LMODE_HALF || +- pPrt->PLinkMode == SK_LMODE_FULL)) { ++ ++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE && ++ pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_UNKNOWN && ++ (pPrt->PLinkMode == (SK_U8)SK_LMODE_HALF || ++ pPrt->PLinkMode == (SK_U8)SK_LMODE_FULL)) { + /* + * This is autosensing and we are in the fallback + * manual full/half duplex mode. +@@ -941,16 +1362,16 @@ + /* Nothing received, restart link */ + pPrt->PPrevFcs = FcsErrCts; + pPrt->PPrevShorts = Shorts; +- ++ + return(SK_HW_PS_RESTART); + } + else { +- pPrt->PLipaAutoNeg = SK_LIPA_MANUAL; ++ pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_MANUAL; + } + } + + if (((RxCts - pPrt->PPrevRx) > pPrt->PRxLim) || +- (!(FcsErrCts - pPrt->PPrevFcs))) { ++ (!(FcsErrCts - pPrt->PPrevFcs))) { + /* + * Note: The compare with zero above has to be done the way shown, + * otherwise the Linux driver will have a problem. +@@ -995,29 +1416,25 @@ + */ + static int SkGePortCheckUp( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int Port) /* Which port should be checked */ ++SK_IOC IoC, /* I/O Context */ ++int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ + SK_BOOL AutoNeg; /* Is Auto-negotiation used ? */ + int Rtv; /* Return value */ + + Rtv = SK_HW_PS_NONE; +- ++ + pPrt = &pAC->GIni.GP[Port]; + +- if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) { +- AutoNeg = SK_FALSE; +- } +- else { +- AutoNeg = SK_TRUE; +- } ++ AutoNeg = pPrt->PLinkMode != SK_LMODE_HALF && ++ pPrt->PLinkMode != SK_LMODE_FULL; + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { + + switch (pPrt->PhyType) { +- ++ + case SK_PHY_XMAC: + Rtv = SkGePortCheckUpXmac(pAC, IoC, Port, AutoNeg); + break; +@@ -1038,7 +1455,7 @@ + + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + Rtv = SkGePortCheckUpGmac(pAC, IoC, Port, AutoNeg); + } + #endif /* YUKON */ +@@ -1059,8 +1476,8 @@ + */ + static int SkGePortCheckUpXmac( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int Port, /* Which port should be checked */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */ + { + SK_U32 Shorts; /* Short Event Counter */ +@@ -1098,7 +1515,7 @@ + XM_IN16(IoC, Port, XM_ISRC, &Isrc); + IsrcSum |= Isrc; + SkXmAutoNegLipaXmac(pAC, IoC, Port, IsrcSum); +- ++ + if ((Isrc & XM_IS_INP_ASS) == 0) { + /* It has been in sync since last time */ + /* Restart the PORT */ +@@ -1117,14 +1534,14 @@ + * Link Restart Workaround: + * it may be possible that the other Link side + * restarts its link as well an we detect +- * another LinkBroken. To prevent this ++ * another PLinkBroken. To prevent this + * happening we check for a maximum number + * of consecutive restart. If those happens, + * we do NOT restart the active link and + * check whether the link is now o.k. + */ + pPrt->PLinkResCt++; +- ++ + pPrt->PAutoNegTimeOut = 0; + + if (pPrt->PLinkResCt < SK_MAX_LRESTART) { +@@ -1132,13 +1549,13 @@ + } + + pPrt->PLinkResCt = 0; +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Do NOT restart on Port %d %x %x\n", Port, Isrc, IsrcSum)); + } + else { + pPrt->PIsave = (SK_U16)(IsrcSum & XM_IS_AND); +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Save Sync/nosync Port %d %x %x\n", Port, Isrc, IsrcSum)); + +@@ -1165,7 +1582,7 @@ + if ((Isrc & XM_IS_INP_ASS) != 0) { + pPrt->PLinkBroken = SK_TRUE; + /* Re-Init Link partner Autoneg flag */ +- pPrt->PLipaAutoNeg = SK_LIPA_UNKNOWN; ++ pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_UNKNOWN; + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("Link broken Port %d\n", Port)); + +@@ -1178,7 +1595,7 @@ + } + else { + SkXmAutoNegLipaXmac(pAC, IoC, Port, Isrc); +- ++ + if (SkGePortCheckShorts(pAC, IoC, Port) == SK_HW_PS_RESTART) { + return(SK_HW_PS_RESTART); + } +@@ -1210,17 +1627,21 @@ + } + + if (AutoNeg) { ++ /* Auto-Negotiation Done ? */ + if ((IsrcSum & XM_IS_AND) != 0) { ++ + SkHWLinkUp(pAC, IoC, Port); ++ + Done = SkMacAutoNegDone(pAC, IoC, Port); ++ + if (Done != SK_AND_OK) { + /* Get PHY parameters, for debugging only */ + SkXmPhyRead(pAC, IoC, Port, PHY_XMAC_AUNE_LP, &LpAb); + SkXmPhyRead(pAC, IoC, Port, PHY_XMAC_RES_ABI, &ResAb); +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNeg FAIL Port %d (LpAb %x, ResAb %x)\n", +- Port, LpAb, ResAb)); +- ++ Port, LpAb, ResAb)); ++ + /* Try next possible mode */ + NextMode = SkHWSenseGetNext(pAC, IoC, Port); + SkHWLinkDown(pAC, IoC, Port); +@@ -1236,42 +1657,41 @@ + * (clear Page Received bit if set) + */ + SkXmPhyRead(pAC, IoC, Port, PHY_XMAC_AUNE_EXP, &ExtStat); +- ++ + return(SK_HW_PS_LINK); + } +- ++ + /* AutoNeg not done, but HW link is up. Check for timeouts */ +- pPrt->PAutoNegTimeOut++; +- if (pPrt->PAutoNegTimeOut >= SK_AND_MAX_TO) { ++ if (pPrt->PAutoNegTimeOut++ >= SK_AND_MAX_TO) { + /* Increase the Timeout counter */ + pPrt->PAutoNegTOCt++; + + /* Timeout occured */ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("AutoNeg timeout Port %d\n", Port)); +- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE && +- pPrt->PLipaAutoNeg != SK_LIPA_AUTO) { ++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE && ++ pPrt->PLipaAutoNeg != (SK_U8)SK_LIPA_AUTO) { + /* Set Link manually up */ + SkHWSenseSetNext(pAC, IoC, Port, SK_LMODE_FULL); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("Set manual full duplex Port %d\n", Port)); + } + +- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE && +- pPrt->PLipaAutoNeg == SK_LIPA_AUTO && ++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE && ++ pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO && + pPrt->PAutoNegTOCt >= SK_MAX_ANEG_TO) { + /* + * This is rather complicated. + * we need to check here whether the LIPA_AUTO + * we saw before is false alert. We saw at one +- * switch ( SR8800) that on boot time it sends ++ * switch (SR8800) that on boot time it sends + * just one auto-neg packet and does no further + * auto-negotiation. + * Solution: we restart the autosensing after + * a few timeouts. + */ + pPrt->PAutoNegTOCt = 0; +- pPrt->PLipaAutoNeg = SK_LIPA_UNKNOWN; ++ pPrt->PLipaAutoNeg = (SK_U8)SK_LIPA_UNKNOWN; + SkHWInitDefSense(pAC, IoC, Port); + } + +@@ -1282,18 +1702,18 @@ + else { + /* Link is up and we don't need more */ + #ifdef DEBUG +- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("ERROR: Lipa auto detected on port %d\n", Port)); + } + #endif /* DEBUG */ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("Link sync(GP), Port %d\n", Port)); + SkHWLinkUp(pAC, IoC, Port); +- ++ + /* +- * Link sync (GP) and so assume a good connection. But if not received +- * a bunch of frames received in a time slot (maybe broken tx cable) ++ * Link sync (GP) and so assume a good connection. But if no ++ * bunch of frames received in a time slot (maybe broken Tx cable) + * the port is restart. + */ + return(SK_HW_PS_LINK); +@@ -1314,8 +1734,8 @@ + */ + static int SkGePortCheckUpBcom( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int Port, /* Which port should be checked */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -1334,74 +1754,6 @@ + /* Check for No HCD Link events (#10523) */ + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_STAT, &Isrc); + +-#ifdef xDEBUG +- if ((Isrc & ~(PHY_B_IS_HCT | PHY_B_IS_LCT) == +- (PHY_B_IS_SCR_S_ER | PHY_B_IS_RRS_CHANGE | PHY_B_IS_LRS_CHANGE)) { +- +- SK_U32 Stat1, Stat2, Stat3; +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_MASK, &Stat1); +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "CheckUp1 - Stat: %x, Mask: %x", +- (void *)Isrc, +- (void *)Stat1); +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_CTRL, &Stat1); +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_STAT, &Stat2); +- Stat1 = Stat1 << 16 | Stat2; +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_ADV, &Stat2); +- Stat3 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_LP, &Stat3); +- Stat2 = Stat2 << 16 | Stat3; +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "Ctrl/Stat: %x, AN Adv/LP: %x", +- (void *)Stat1, +- (void *)Stat2); +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_EXP, &Stat1); +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_EXT_STAT, &Stat2); +- Stat1 = Stat1 << 16 | Stat2; +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_CTRL, &Stat2); +- Stat3 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &Stat3); +- Stat2 = Stat2 << 16 | Stat3; +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "AN Exp/IEEE Ext: %x, 1000T Ctrl/Stat: %x", +- (void *)Stat1, +- (void *)Stat2); +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_CTRL, &Stat1); +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_STAT, &Stat2); +- Stat1 = Stat1 << 16 | Stat2; +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &Stat2); +- Stat3 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_STAT, &Stat3); +- Stat2 = Stat2 << 16 | Stat3; +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "PHY Ext Ctrl/Stat: %x, Aux Ctrl/Stat: %x", +- (void *)Stat1, +- (void *)Stat2); +- } +-#endif /* DEBUG */ +- + if ((Isrc & (PHY_B_IS_NO_HDCL /* | PHY_B_IS_NO_HDC */)) != 0) { + /* + * Workaround BCom Errata: +@@ -1414,14 +1766,6 @@ + (SK_U16)(Ctrl & ~PHY_CT_LOOP)); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("No HCD Link event, Port %d\n", Port)); +-#ifdef xDEBUG +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "No HCD link event, port %d.", +- (void *)Port, +- (void *)NULL); +-#endif /* DEBUG */ + } + + /* Not obsolete: link status bit is latched to 0 and autoclearing! */ +@@ -1431,72 +1775,6 @@ + return(SK_HW_PS_NONE); + } + +-#ifdef xDEBUG +- { +- SK_U32 Stat1, Stat2, Stat3; +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_MASK, &Stat1); +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "CheckUp1a - Stat: %x, Mask: %x", +- (void *)Isrc, +- (void *)Stat1); +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_CTRL, &Stat1); +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_STAT, &PhyStat); +- Stat1 = Stat1 << 16 | PhyStat; +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_ADV, &Stat2); +- Stat3 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_LP, &Stat3); +- Stat2 = Stat2 << 16 | Stat3; +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "Ctrl/Stat: %x, AN Adv/LP: %x", +- (void *)Stat1, +- (void *)Stat2); +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_EXP, &Stat1); +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_EXT_STAT, &Stat2); +- Stat1 = Stat1 << 16 | Stat2; +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_CTRL, &Stat2); +- Stat3 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &ResAb); +- Stat2 = Stat2 << 16 | ResAb; +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "AN Exp/IEEE Ext: %x, 1000T Ctrl/Stat: %x", +- (void *)Stat1, +- (void *)Stat2); +- +- Stat1 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_CTRL, &Stat1); +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_P_EXT_STAT, &Stat2); +- Stat1 = Stat1 << 16 | Stat2; +- Stat2 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &Stat2); +- Stat3 = 0; +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_STAT, &Stat3); +- Stat2 = Stat2 << 16 | Stat3; +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "PHY Ext Ctrl/Stat: %x, Aux Ctrl/Stat: %x", +- (void *)Stat1, +- (void *)Stat2); +- } +-#endif /* DEBUG */ +- + /* + * Here we usually can check whether the link is in sync and + * auto-negotiation is done. +@@ -1505,7 +1783,7 @@ + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_STAT, &PhyStat); + + SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat); +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("CheckUp Port %d, PhyStat: 0x%04X\n", Port, PhyStat)); + +@@ -1513,88 +1791,62 @@ + + if ((ResAb & PHY_B_1000S_MSF) != 0) { + /* Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Master/Slave Fault port %d\n", Port)); +- ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("Master/Slave Fault, ResAb: 0x%04X\n", ResAb)); ++ + pPrt->PAutoNegFail = SK_TRUE; + pPrt->PMSStatus = SK_MS_STAT_FAULT; +- ++ + return(SK_HW_PS_RESTART); + } + + if ((PhyStat & PHY_ST_LSYNC) == 0) { + return(SK_HW_PS_NONE); + } +- ++ + pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ? + SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE; +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Port %d, ResAb: 0x%04X\n", Port, ResAb)); + + if (AutoNeg) { ++ /* Auto-Negotiation Over ? */ + if ((PhyStat & PHY_ST_AN_OVER) != 0) { +- ++ + SkHWLinkUp(pAC, IoC, Port); +- ++ + Done = SkMacAutoNegDone(pAC, IoC, Port); +- ++ + if (Done != SK_AND_OK) { + #ifdef DEBUG + /* Get PHY parameters, for debugging only */ + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUNE_LP, &LpAb); + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &ExtStat); +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNeg FAIL Port %d (LpAb %x, 1000TStat %x)\n", + Port, LpAb, ExtStat)); + #endif /* DEBUG */ + return(SK_HW_PS_RESTART); + } + else { +-#ifdef xDEBUG +- /* Dummy read ISR to prevent extra link downs/ups */ +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_STAT, &ExtStat); +- +- if ((ExtStat & ~(PHY_B_IS_HCT | PHY_B_IS_LCT)) != 0) { +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "CheckUp2 - Stat: %x", +- (void *)ExtStat, +- (void *)NULL); +- } +-#endif /* DEBUG */ + return(SK_HW_PS_LINK); + } + } + } + else { /* !AutoNeg */ +- /* Link is up and we don't need more. */ ++ /* Link is up and we don't need more */ + #ifdef DEBUG +- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("ERROR: Lipa auto detected on port %d\n", Port)); + } + #endif /* DEBUG */ + +-#ifdef xDEBUG +- /* Dummy read ISR to prevent extra link downs/ups */ +- SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_INT_STAT, &ExtStat); +- +- if ((ExtStat & ~(PHY_B_IS_HCT | PHY_B_IS_LCT)) != 0) { +- CMSMPrintString( +- pAC->pConfigTable, +- MSG_TYPE_RUNTIME_INFO, +- "CheckUp3 - Stat: %x", +- (void *)ExtStat, +- (void *)NULL); +- } +-#endif /* DEBUG */ +- + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("Link sync(GP), Port %d\n", Port)); + SkHWLinkUp(pAC, IoC, Port); +- ++ + return(SK_HW_PS_LINK); + } + +@@ -1615,20 +1867,17 @@ + */ + static int SkGePortCheckUpGmac( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int Port, /* Which port should be checked */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ + int Done; +- SK_U16 PhyIsrc; /* PHY Interrupt source */ +- SK_U16 PhyStat; /* PPY Status */ ++ SK_U16 PhyStat; /* PHY Status */ + SK_U16 PhySpecStat;/* PHY Specific Status */ + SK_U16 ResAb; /* Master/Slave resolution */ + SK_EVPARA Para; +-#ifdef DEBUG + SK_U16 Word; /* I/O helper */ +-#endif /* DEBUG */ + + pPrt = &pAC->GIni.GP[Port]; + +@@ -1642,94 +1891,125 @@ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("CheckUp Port %d, PhyStat: 0x%04X\n", Port, PhyStat)); + +- /* Read PHY Interrupt Status */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_INT_STAT, &PhyIsrc); ++ SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat); + +- if ((PhyIsrc & PHY_M_IS_AN_COMPL) != 0) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Auto-Negotiation Completed, PhyIsrc: 0x%04X\n", PhyIsrc)); +- } ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) { + +- if ((PhyIsrc & PHY_M_IS_LSP_CHANGE) != 0) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Link Speed Changed, PhyIsrc: 0x%04X\n", PhyIsrc)); +- } ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb); + +- SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat); +- +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb); ++ if ((ResAb & PHY_B_1000S_MSF) != 0) { ++ /* Error */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("Master/Slave Fault, ResAb: 0x%04X\n", ResAb)); + +- if ((ResAb & PHY_B_1000S_MSF) != 0) { +- /* Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Master/Slave Fault port %d\n", Port)); +- +- pPrt->PAutoNegFail = SK_TRUE; +- pPrt->PMSStatus = SK_MS_STAT_FAULT; +- +- return(SK_HW_PS_RESTART); ++ pPrt->PAutoNegFail = SK_TRUE; ++ pPrt->PMSStatus = SK_MS_STAT_FAULT; ++ ++ return(SK_HW_PS_RESTART); ++ } + } + + /* Read PHY Specific Status */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_STAT, &PhySpecStat); +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Phy1000BT: 0x%04X, PhySpecStat: 0x%04X\n", ResAb, PhySpecStat)); + + #ifdef DEBUG + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_EXP, &Word); + +- if ((PhyIsrc & PHY_M_IS_AN_PR) != 0 || (Word & PHY_ANE_RX_PG) != 0 || ++ if ((Word & PHY_ANE_RX_PG) != 0 || + (PhySpecStat & PHY_M_PS_PAGE_REC) != 0) { + /* Read PHY Next Page Link Partner */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_NEPG_LP, &Word); + + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Page Received, NextPage: 0x%04X\n", Word)); ++ ("Page received, NextPage: 0x%04X\n", Word)); + } + #endif /* DEBUG */ + + if ((PhySpecStat & PHY_M_PS_LINK_UP) == 0) { ++ /* Link down */ + return(SK_HW_PS_NONE); + } +- +- if ((PhySpecStat & PHY_M_PS_DOWNS_STAT) != 0 || +- (PhyIsrc & PHY_M_IS_DOWNSH_DET) != 0) { +- /* Downshift detected */ +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E025, SKERR_SIRQ_E025MSG); +- +- Para.Para64 = Port; +- SkEventQueue(pAC, SKGE_DRV, SK_DRV_DOWNSHIFT_DET, Para); +- +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Downshift detected, PhyIsrc: 0x%04X\n", PhyIsrc)); ++ ++#ifdef XXX ++ SK_U16 PhyInt; ++ /* Read PHY Interrupt Status */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_INT_STAT, &PhyInt); ++ ++ /* cross check that the link is really up */ ++ if ((PhyInt & PHY_M_IS_LST_CHANGE) == 0) { ++ /* Link Status unchanged */ ++ return(SK_HW_PS_NONE); + } ++#endif /* XXX */ + +- pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ? +- SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE; ++ if (pAC->GIni.GICopperType) { ++ ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) { ++ ++ if ((PhySpecStat & PHY_M_PS_DOWNS_STAT) != 0) { ++ /* Downshift detected */ ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E025, ++ SKERR_SIRQ_E025MSG); ++ ++ Para.Para64 = Port; ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_DOWNSHIFT_DET, Para); + +- pPrt->PCableLen = (SK_U8)((PhySpecStat & PHY_M_PS_CABLE_MSK) >> 7); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Downshift detected, PhySpecStat: 0x%04X\n", PhySpecStat)); ++ } ++ ++ pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ? ++ SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE; ++ } + ++ if ((PhySpecStat & PHY_M_PS_MDI_X_STAT) != 0) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("MDI Xover detected, PhyStat: 0x%04X\n", PhySpecStat)); ++ } ++ ++ /* on PHY 88E1112 cable length is in Reg. 26, Page 5 */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ /* save page register */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_ADR, &Word); ++ ++ /* select page 5 to access VCT DSP distance register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 5); ++ ++ /* get VCT DSP distance */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL_2, &PhySpecStat); ++ ++ /* restore page register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, Word); ++ ++ pPrt->PCableLen = (SK_U8)(PhySpecStat & PHY_M_EC2_FO_AM_MSK); ++ } ++ else { ++ pPrt->PCableLen = (SK_U8)((PhySpecStat & PHY_M_PS_CABLE_MSK) >> 7); ++ } ++ } ++ + if (AutoNeg) { +- /* Auto-Negotiation Over ? */ ++ /* Auto-Negotiation Complete ? */ + if ((PhyStat & PHY_ST_AN_OVER) != 0) { +- ++ + SkHWLinkUp(pAC, IoC, Port); +- ++ + Done = SkMacAutoNegDone(pAC, IoC, Port); +- ++ + if (Done != SK_AND_OK) { + return(SK_HW_PS_RESTART); + } +- ++ + return(SK_HW_PS_LINK); + } + } + else { /* !AutoNeg */ +- /* Link is up and we don't need more */ + #ifdef DEBUG +- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("ERROR: Lipa auto detected on port %d\n", Port)); + } + #endif /* DEBUG */ +@@ -1737,7 +2017,7 @@ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("Link sync, Port %d\n", Port)); + SkHWLinkUp(pAC, IoC, Port); +- ++ + return(SK_HW_PS_LINK); + } + +@@ -1758,8 +2038,8 @@ + */ + static int SkGePortCheckUpLone( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int Port, /* Which port should be checked */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -1788,7 +2068,7 @@ + StatSum |= PhyStat; + + SkMacAutoNegLipaPhy(pAC, IoC, Port, PhyStat); +- ++ + if ((PhyStat & PHY_ST_LSYNC) == 0) { + /* Save Auto-negotiation Done bit */ + pPrt->PIsave = (SK_U16)(StatSum & PHY_ST_AN_OVER); +@@ -1802,17 +2082,21 @@ + } + + if (AutoNeg) { ++ /* Auto-Negotiation Over ? */ + if ((StatSum & PHY_ST_AN_OVER) != 0) { ++ + SkHWLinkUp(pAC, IoC, Port); ++ + Done = SkMacAutoNegDone(pAC, IoC, Port); ++ + if (Done != SK_AND_OK) { + /* Get PHY parameters, for debugging only */ + SkXmPhyRead(pAC, IoC, Port, PHY_LONE_AUNE_LP, &LpAb); + SkXmPhyRead(pAC, IoC, Port, PHY_LONE_1000T_STAT, &ExtStat); +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNeg FAIL Port %d (LpAb %x, 1000TStat %x)\n", + Port, LpAb, ExtStat)); +- ++ + /* Try next possible mode */ + NextMode = SkHWSenseGetNext(pAC, IoC, Port); + SkHWLinkDown(pAC, IoC, Port); +@@ -1833,15 +2117,14 @@ + return(SK_HW_PS_LINK); + } + } +- ++ + /* AutoNeg not done, but HW link is up. Check for timeouts */ +- pPrt->PAutoNegTimeOut++; +- if (pPrt->PAutoNegTimeOut >= SK_AND_MAX_TO) { ++ if (pPrt->PAutoNegTimeOut++ >= SK_AND_MAX_TO) { + /* Timeout occured */ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("AutoNeg timeout Port %d\n", Port)); +- if (pPrt->PLinkModeConf == SK_LMODE_AUTOSENSE && +- pPrt->PLipaAutoNeg != SK_LIPA_AUTO) { ++ if (pPrt->PLinkModeConf == (SK_U8)SK_LMODE_AUTOSENSE && ++ pPrt->PLipaAutoNeg != (SK_U8)SK_LIPA_AUTO) { + /* Set Link manually up */ + SkHWSenseSetNext(pAC, IoC, Port, SK_LMODE_FULL); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, +@@ -1855,8 +2138,8 @@ + else { + /* Link is up and we don't need more */ + #ifdef DEBUG +- if (pPrt->PLipaAutoNeg == SK_LIPA_AUTO) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ if (pPrt->PLipaAutoNeg == (SK_U8)SK_LIPA_AUTO) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("ERROR: Lipa auto detected on port %d\n", Port)); + } + #endif /* DEBUG */ +@@ -1866,11 +2149,12 @@ + * extra link down/ups + */ + SkXmPhyRead(pAC, IoC, Port, PHY_LONE_INT_STAT, &ExtStat); +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("Link sync(GP), Port %d\n", Port)); ++ + SkHWLinkUp(pAC, IoC, Port); +- ++ + return(SK_HW_PS_LINK); + } + +@@ -1889,8 +2173,8 @@ + */ + static int SkGePortCheckUpNat( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO Context */ +-int Port, /* Which port should be checked */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL AutoNeg) /* Is Auto-negotiation used ? */ + { + /* todo: National */ +@@ -1909,12 +2193,12 @@ + */ + int SkGeSirqEvent( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* Io Context */ ++SK_IOC IoC, /* I/O Context */ + SK_U32 Event, /* Module specific Event */ + SK_EVPARA Para) /* Event specific Parameter */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +- SK_U32 Port; ++ int Port; + SK_U32 Val32; + int PortStat; + SK_U8 Val8; +@@ -1922,25 +2206,25 @@ + SK_U64 Octets; + #endif /* GENESIS */ + +- Port = Para.Para32[0]; ++ Port = (int)Para.Para32[0]; + pPrt = &pAC->GIni.GP[Port]; + + switch (Event) { + case SK_HWEV_WATIM: + if (pPrt->PState == SK_PRT_RESET) { +- ++ + PortStat = SK_HW_PS_NONE; + } + else { + /* Check whether port came up */ +- PortStat = SkGePortCheckUp(pAC, IoC, (int)Port); ++ PortStat = SkGePortCheckUp(pAC, IoC, Port); + } + + switch (PortStat) { + case SK_HW_PS_RESTART: + if (pPrt->PHWLinkUp) { + /* Set Link to down */ +- SkHWLinkDown(pAC, IoC, (int)Port); ++ SkHWLinkDown(pAC, IoC, Port); + + /* + * Signal directly to RLMT to ensure correct +@@ -1958,19 +2242,23 @@ + SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_UP, Para); + break; + } +- ++ + /* Start again the check Timer */ + if (pPrt->PHWLinkUp) { ++ + Val32 = SK_WA_ACT_TIME; + } + else { + Val32 = SK_WA_INA_TIME; +- } + +- /* Todo: still needed for non-XMAC PHYs??? */ ++ if (pAC->GIni.GIYukon) { ++ Val32 *= 5; ++ } ++ } + /* Start workaround Errata #2 timer */ + SkTimerStart(pAC, IoC, &pPrt->PWaTimer, Val32, + SKGE_HWAC, SK_HWEV_WATIM, Para); ++ + break; + + case SK_HWEV_PORT_START: +@@ -1982,7 +2270,7 @@ + SkRlmtEvent(pAC, IoC, SK_RLMT_LINK_DOWN, Para); + } + +- SkHWLinkDown(pAC, IoC, (int)Port); ++ SkHWLinkDown(pAC, IoC, Port); + + /* Schedule Port RESET */ + SkEventQueue(pAC, SKGE_DRV, SK_DRV_PORT_RESET, Para); +@@ -1990,6 +2278,7 @@ + /* Start workaround Errata #2 timer */ + SkTimerStart(pAC, IoC, &pPrt->PWaTimer, SK_WA_INA_TIME, + SKGE_HWAC, SK_HWEV_WATIM, Para); ++ + break; + + case SK_HWEV_PORT_STOP: +@@ -2004,7 +2293,7 @@ + /* Stop Workaround Timer */ + SkTimerStop(pAC, IoC, &pPrt->PWaTimer); + +- SkHWLinkDown(pAC, IoC, (int)Port); ++ SkHWLinkDown(pAC, IoC, Port); + break; + + case SK_HWEV_UPDATE_STAT: +@@ -2013,7 +2302,7 @@ + + case SK_HWEV_CLEAR_STAT: + /* We do NOT need to clear any statistics */ +- for (Port = 0; Port < (SK_U32)pAC->GIni.GIMacsFound; Port++) { ++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) { + pPrt->PPrevRx = 0; + pPrt->PPrevFcs = 0; + pPrt->PPrevShorts = 0; +@@ -2085,23 +2374,18 @@ + pPrt->HalfDupTimerActive = SK_FALSE; + if (pPrt->PLinkModeStatus == SK_LMODE_STAT_HALF || + pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOHALF) { +-#ifdef XXX +- Len = sizeof(SK_U64); +- SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets, +- &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, Port), +- pAC->Rlmt.Port[Port].Net->NetNumber); +-#endif /* XXX */ ++ + /* Snap statistic counters */ + (void)SkXmUpdateStats(pAC, IoC, Port); + + (void)SkXmMacStatistic(pAC, IoC, Port, XM_TXO_OK_HI, &Val32); + + Octets = (SK_U64)Val32 << 32; +- ++ + (void)SkXmMacStatistic(pAC, IoC, Port, XM_TXO_OK_LO, &Val32); + + Octets += Val32; +- ++ + if (pPrt->LastOctets == Octets) { + /* Tx hanging, a FIFO flush restarts it */ + SkMacFlushTxFifo(pAC, IoC, Port); +@@ -2110,7 +2394,7 @@ + } + break; + #endif /* GENESIS */ +- ++ + default: + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_SIRQ_E001, SKERR_SIRQ_E001MSG); + break; +@@ -2131,8 +2415,8 @@ + */ + static void SkPhyIsrBcom( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* Io Context */ +-int Port, /* Port Num = PHY Num */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_U16 IStatus) /* Interrupt Status */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -2145,7 +2429,7 @@ + SK_ERR_LOG(pAC, SK_ERRCL_HW | SK_ERRCL_INIT, SKERR_SIRQ_E022, + SKERR_SIRQ_E022MSG); + } +- ++ + if ((IStatus & (PHY_B_IS_AN_PR | PHY_B_IS_LST_CHANGE)) != 0) { + + SkHWLinkDown(pAC, IoC, Port); +@@ -2174,8 +2458,8 @@ + */ + static void SkPhyIsrGmac( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* Io Context */ +-int Port, /* Port Num = PHY Num */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_U16 IStatus) /* Interrupt Status */ + { + SK_GEPORT *pPrt; /* GIni Port struct pointer */ +@@ -2184,37 +2468,69 @@ + + pPrt = &pAC->GIni.GP[Port]; + +- if ((IStatus & (PHY_M_IS_AN_PR | PHY_M_IS_LST_CHANGE)) != 0) { +- +- SkHWLinkDown(pAC, IoC, Port); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Port %d PHY IRQ, PhyIsrc: 0x%04X\n", Port, IStatus)); + +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_ADV, &Word); ++ if ((IStatus & PHY_M_IS_LST_CHANGE) != 0) { + + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("AutoNeg.Adv: 0x%04X\n", Word)); +- +- /* Set Auto-negotiation advertisement */ +- if (pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) { +- /* restore Asymmetric Pause bit */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_AUNE_ADV, +- (SK_U16)(Word | PHY_M_AN_ASP)); +- } +- ++ ("Link Status changed\n")); ++ + Para.Para32[0] = (SK_U32)Port; +- /* Signal to RLMT */ +- SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para); ++ ++ if (pPrt->PHWLinkUp) { ++ ++ SkHWLinkDown(pAC, IoC, Port); ++ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_ADV, &Word); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("AutoNeg.Adv: 0x%04X\n", Word)); ++ ++ /* Set Auto-negotiation advertisement */ ++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_FE && ++ pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) { ++ /* restore Asymmetric Pause bit */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_AUNE_ADV, ++ (SK_U16)(Word | PHY_M_AN_ASP)); ++ } ++ ++ /* Signal to RLMT */ ++ SkEventQueue(pAC, SKGE_RLMT, SK_RLMT_LINK_DOWN, Para); ++ } ++ else { ++ if ((IStatus & PHY_M_IS_AN_COMPL) != 0) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Auto-Negotiation completed\n")); ++ } ++ ++ if ((IStatus & PHY_M_IS_LSP_CHANGE) != 0) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Link Speed changed\n")); ++ } ++ ++ SkEventQueue(pAC, SKGE_HWAC, SK_HWEV_WATIM, Para); ++ } + } +- ++ + if ((IStatus & PHY_M_IS_AN_ERROR) != 0) { +- /* Auto-Negotiation Error */ +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E023, SKERR_SIRQ_E023MSG); ++ /* the copper PHY makes 1 retry */ ++ if (pAC->GIni.GICopperType) { ++ /* not logged as error, it might be the first attempt */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Auto-Negotiation Error\n")); ++ } ++ else { ++ /* Auto-Negotiation Error */ ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E023, SKERR_SIRQ_E023MSG); ++ } + } +- ++ + if ((IStatus & PHY_M_IS_FIFO_ERROR) != 0) { + /* FIFO Overflow/Underrun Error */ + SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_SIRQ_E024, SKERR_SIRQ_E024MSG); + } +- ++ + } /* SkPhyIsrGmac */ + #endif /* YUKON */ + +@@ -2230,8 +2546,8 @@ + */ + static void SkPhyIsrLone( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* Io Context */ +-int Port, /* Port Num = PHY Num */ ++SK_IOC IoC, /* I/O Context */ ++int Port, /* Port Index (MAC_1 + n) */ + SK_U16 IStatus) /* Interrupt Status */ + { + SK_EVPARA Para; +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/ski2c.c linux-2.6.9.new/drivers/net/sk98lin/ski2c.c +--- linux-2.6.9.old/drivers/net/sk98lin/ski2c.c 2004-10-19 05:53:05.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/ski2c.c 1970-01-01 08:00:00.000000000 +0800 +@@ -1,1296 +0,0 @@ +-/****************************************************************************** +- * +- * Name: ski2c.c +- * Project: Gigabit Ethernet Adapters, TWSI-Module +- * Version: $Revision: 1.59 $ +- * Date: $Date: 2003/10/20 09:07:25 $ +- * Purpose: Functions to access Voltage and Temperature Sensor +- * +- ******************************************************************************/ +- +-/****************************************************************************** +- * +- * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * The information in this file is provided "AS IS" without warranty. +- * +- ******************************************************************************/ +- +-/* +- * I2C Protocol +- */ +-#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) +-static const char SysKonnectFileId[] = +- "@(#) $Id: ski2c.c,v 1.59 2003/10/20 09:07:25 rschmidt Exp $ (C) Marvell. "; +-#endif +- +-#include "h/skdrv1st.h" /* Driver Specific Definitions */ +-#include "h/lm80.h" +-#include "h/skdrv2nd.h" /* Adapter Control- and Driver specific Def. */ +- +-#ifdef __C2MAN__ +-/* +- I2C protocol implementation. +- +- General Description: +- +- The I2C protocol is used for the temperature sensors and for +- the serial EEPROM which hold the configuration. +- +- This file covers functions that allow to read write and do +- some bulk requests a specified I2C address. +- +- The Genesis has 2 I2C buses. One for the EEPROM which holds +- the VPD Data and one for temperature and voltage sensor. +- The following picture shows the I2C buses, I2C devices and +- their control registers. +- +- Note: The VPD functions are in skvpd.c +-. +-. PCI Config I2C Bus for VPD Data: +-. +-. +------------+ +-. | VPD EEPROM | +-. +------------+ +-. | +-. | <-- I2C +-. | +-. +-----------+-----------+ +-. | | +-. +-----------------+ +-----------------+ +-. | PCI_VPD_ADR_REG | | PCI_VPD_DAT_REG | +-. +-----------------+ +-----------------+ +-. +-. +-. I2C Bus for LM80 sensor: +-. +-. +-----------------+ +-. | Temperature and | +-. | Voltage Sensor | +-. | LM80 | +-. +-----------------+ +-. | +-. | +-. I2C --> | +-. | +-. +----+ +-. +-------------->| OR |<--+ +-. | +----+ | +-. +------+------+ | +-. | | | +-. +--------+ +--------+ +----------+ +-. | B2_I2C | | B2_I2C | | B2_I2C | +-. | _CTRL | | _DATA | | _SW | +-. +--------+ +--------+ +----------+ +-. +- The I2C bus may be driven by the B2_I2C_SW or by the B2_I2C_CTRL +- and B2_I2C_DATA registers. +- For driver software it is recommended to use the I2C control and +- data register, because I2C bus timing is done by the ASIC and +- an interrupt may be received when the I2C request is completed. +- +- Clock Rate Timing: MIN MAX generated by +- VPD EEPROM: 50 kHz 100 kHz HW +- LM80 over I2C Ctrl/Data reg. 50 kHz 100 kHz HW +- LM80 over B2_I2C_SW register 0 400 kHz SW +- +- Note: The clock generated by the hardware is dependend on the +- PCI clock. If the PCI bus clock is 33 MHz, the I2C/VPD +- clock is 50 kHz. +- */ +-intro() +-{} +-#endif +- +-#ifdef SK_DIAG +-/* +- * I2C Fast Mode timing values used by the LM80. +- * If new devices are added to the I2C bus the timing values have to be checked. +- */ +-#ifndef I2C_SLOW_TIMING +-#define T_CLK_LOW 1300L /* clock low time in ns */ +-#define T_CLK_HIGH 600L /* clock high time in ns */ +-#define T_DATA_IN_SETUP 100L /* data in Set-up Time */ +-#define T_START_HOLD 600L /* start condition hold time */ +-#define T_START_SETUP 600L /* start condition Set-up time */ +-#define T_STOP_SETUP 600L /* stop condition Set-up time */ +-#define T_BUS_IDLE 1300L /* time the bus must free after Tx */ +-#define T_CLK_2_DATA_OUT 900L /* max. clock low to data output valid */ +-#else /* I2C_SLOW_TIMING */ +-/* I2C Standard Mode Timing */ +-#define T_CLK_LOW 4700L /* clock low time in ns */ +-#define T_CLK_HIGH 4000L /* clock high time in ns */ +-#define T_DATA_IN_SETUP 250L /* data in Set-up Time */ +-#define T_START_HOLD 4000L /* start condition hold time */ +-#define T_START_SETUP 4700L /* start condition Set-up time */ +-#define T_STOP_SETUP 4000L /* stop condition Set-up time */ +-#define T_BUS_IDLE 4700L /* time the bus must free after Tx */ +-#endif /* !I2C_SLOW_TIMING */ +- +-#define NS2BCLK(x) (((x)*125)/10000) +- +-/* +- * I2C Wire Operations +- * +- * About I2C_CLK_LOW(): +- * +- * The Data Direction bit (I2C_DATA_DIR) has to be set to input when setting +- * clock to low, to prevent the ASIC and the I2C data client from driving the +- * serial data line simultaneously (ASIC: last bit of a byte = '1', I2C client +- * send an 'ACK'). See also Concentrator Bugreport No. 10192. +- */ +-#define I2C_DATA_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA) +-#define I2C_DATA_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA) +-#define I2C_DATA_OUT(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA_DIR) +-#define I2C_DATA_IN(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA) +-#define I2C_CLK_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_CLK) +-#define I2C_CLK_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK | I2C_DATA_DIR) +-#define I2C_START_COND(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK) +- +-#define NS2CLKT(x) ((x*125L)/10000) +- +-/*--------------- I2C Interface Register Functions --------------- */ +- +-/* +- * sending one bit +- */ +-void SkI2cSndBit( +-SK_IOC IoC, /* I/O Context */ +-SK_U8 Bit) /* Bit to send */ +-{ +- I2C_DATA_OUT(IoC); +- if (Bit) { +- I2C_DATA_HIGH(IoC); +- } +- else { +- I2C_DATA_LOW(IoC); +- } +- SkDgWaitTime(IoC, NS2BCLK(T_DATA_IN_SETUP)); +- I2C_CLK_HIGH(IoC); +- SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH)); +- I2C_CLK_LOW(IoC); +-} /* SkI2cSndBit*/ +- +- +-/* +- * Signal a start to the I2C Bus. +- * +- * A start is signaled when data goes to low in a high clock cycle. +- * +- * Ends with Clock Low. +- * +- * Status: not tested +- */ +-void SkI2cStart( +-SK_IOC IoC) /* I/O Context */ +-{ +- /* Init data and Clock to output lines */ +- /* Set Data high */ +- I2C_DATA_OUT(IoC); +- I2C_DATA_HIGH(IoC); +- /* Set Clock high */ +- I2C_CLK_HIGH(IoC); +- +- SkDgWaitTime(IoC, NS2BCLK(T_START_SETUP)); +- +- /* Set Data Low */ +- I2C_DATA_LOW(IoC); +- +- SkDgWaitTime(IoC, NS2BCLK(T_START_HOLD)); +- +- /* Clock low without Data to Input */ +- I2C_START_COND(IoC); +- +- SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW)); +-} /* SkI2cStart */ +- +- +-void SkI2cStop( +-SK_IOC IoC) /* I/O Context */ +-{ +- /* Init data and Clock to output lines */ +- /* Set Data low */ +- I2C_DATA_OUT(IoC); +- I2C_DATA_LOW(IoC); +- +- SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT)); +- +- /* Set Clock high */ +- I2C_CLK_HIGH(IoC); +- +- SkDgWaitTime(IoC, NS2BCLK(T_STOP_SETUP)); +- +- /* +- * Set Data High: Do it by setting the Data Line to Input. +- * Because of a pull up resistor the Data Line +- * floods to high. +- */ +- I2C_DATA_IN(IoC); +- +- /* +- * When I2C activity is stopped +- * o DATA should be set to input and +- * o CLOCK should be set to high! +- */ +- SkDgWaitTime(IoC, NS2BCLK(T_BUS_IDLE)); +-} /* SkI2cStop */ +- +- +-/* +- * Receive just one bit via the I2C bus. +- * +- * Note: Clock must be set to LOW before calling this function. +- * +- * Returns The received bit. +- */ +-int SkI2cRcvBit( +-SK_IOC IoC) /* I/O Context */ +-{ +- int Bit; +- SK_U8 I2cSwCtrl; +- +- /* Init data as input line */ +- I2C_DATA_IN(IoC); +- +- SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT)); +- +- I2C_CLK_HIGH(IoC); +- +- SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH)); +- +- SK_I2C_GET_SW(IoC, &I2cSwCtrl); +- +- Bit = (I2cSwCtrl & I2C_DATA) ? 1 : 0; +- +- I2C_CLK_LOW(IoC); +- SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW-T_CLK_2_DATA_OUT)); +- +- return(Bit); +-} /* SkI2cRcvBit */ +- +- +-/* +- * Receive an ACK. +- * +- * returns 0 If acknowledged +- * 1 in case of an error +- */ +-int SkI2cRcvAck( +-SK_IOC IoC) /* I/O Context */ +-{ +- /* +- * Received bit must be zero. +- */ +- return(SkI2cRcvBit(IoC) != 0); +-} /* SkI2cRcvAck */ +- +- +-/* +- * Send an NACK. +- */ +-void SkI2cSndNAck( +-SK_IOC IoC) /* I/O Context */ +-{ +- /* +- * Received bit must be zero. +- */ +- SkI2cSndBit(IoC, 1); +-} /* SkI2cSndNAck */ +- +- +-/* +- * Send an ACK. +- */ +-void SkI2cSndAck( +-SK_IOC IoC) /* I/O Context */ +-{ +- /* +- * Received bit must be zero. +- */ +- SkI2cSndBit(IoC, 0); +-} /* SkI2cSndAck */ +- +- +-/* +- * Send one byte to the I2C device and wait for ACK. +- * +- * Return acknowleged status. +- */ +-int SkI2cSndByte( +-SK_IOC IoC, /* I/O Context */ +-int Byte) /* byte to send */ +-{ +- int i; +- +- for (i = 0; i < 8; i++) { +- if (Byte & (1<<(7-i))) { +- SkI2cSndBit(IoC, 1); +- } +- else { +- SkI2cSndBit(IoC, 0); +- } +- } +- +- return(SkI2cRcvAck(IoC)); +-} /* SkI2cSndByte */ +- +- +-/* +- * Receive one byte and ack it. +- * +- * Return byte. +- */ +-int SkI2cRcvByte( +-SK_IOC IoC, /* I/O Context */ +-int Last) /* Last Byte Flag */ +-{ +- int i; +- int Byte = 0; +- +- for (i = 0; i < 8; i++) { +- Byte <<= 1; +- Byte |= SkI2cRcvBit(IoC); +- } +- +- if (Last) { +- SkI2cSndNAck(IoC); +- } +- else { +- SkI2cSndAck(IoC); +- } +- +- return(Byte); +-} /* SkI2cRcvByte */ +- +- +-/* +- * Start dialog and send device address +- * +- * Return 0 if acknowleged, 1 in case of an error +- */ +-int SkI2cSndDev( +-SK_IOC IoC, /* I/O Context */ +-int Addr, /* Device Address */ +-int Rw) /* Read / Write Flag */ +-{ +- SkI2cStart(IoC); +- Rw = ~Rw; +- Rw &= I2C_WRITE; +- return(SkI2cSndByte(IoC, (Addr<<1) | Rw)); +-} /* SkI2cSndDev */ +- +-#endif /* SK_DIAG */ +- +-/*----------------- I2C CTRL Register Functions ----------*/ +- +-/* +- * waits for a completion of an I2C transfer +- * +- * returns 0: success, transfer completes +- * 1: error, transfer does not complete, I2C transfer +- * killed, wait loop terminated. +- */ +-int SkI2cWait( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* I/O Context */ +-int Event) /* complete event to wait for (I2C_READ or I2C_WRITE) */ +-{ +- SK_U64 StartTime; +- SK_U64 CurrentTime; +- SK_U32 I2cCtrl; +- +- StartTime = SkOsGetTime(pAC); +- +- do { +- CurrentTime = SkOsGetTime(pAC); +- +- if (CurrentTime - StartTime > SK_TICKS_PER_SEC / 8) { +- +- SK_I2C_STOP(IoC); +-#ifndef SK_DIAG +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E002, SKERR_I2C_E002MSG); +-#endif /* !SK_DIAG */ +- return(1); +- } +- +- SK_I2C_GET_CTL(IoC, &I2cCtrl); +- +-#ifdef xYUKON_DBG +- printf("StartTime=%lu, CurrentTime=%lu\n", +- StartTime, CurrentTime); +- if (kbhit()) { +- return(1); +- } +-#endif /* YUKON_DBG */ +- +- } while ((I2cCtrl & I2C_FLAG) == (SK_U32)Event << 31); +- +- return(0); +-} /* SkI2cWait */ +- +- +-/* +- * waits for a completion of an I2C transfer +- * +- * Returns +- * Nothing +- */ +-void SkI2cWaitIrq( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC) /* I/O Context */ +-{ +- SK_SENSOR *pSen; +- SK_U64 StartTime; +- SK_U32 IrqSrc; +- +- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; +- +- if (pSen->SenState == SK_SEN_IDLE) { +- return; +- } +- +- StartTime = SkOsGetTime(pAC); +- +- do { +- if (SkOsGetTime(pAC) - StartTime > SK_TICKS_PER_SEC / 8) { +- +- SK_I2C_STOP(IoC); +-#ifndef SK_DIAG +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E016, SKERR_I2C_E016MSG); +-#endif /* !SK_DIAG */ +- return; +- } +- +- SK_IN32(IoC, B0_ISRC, &IrqSrc); +- +- } while ((IrqSrc & IS_I2C_READY) == 0); +- +- pSen->SenState = SK_SEN_IDLE; +- return; +-} /* SkI2cWaitIrq */ +- +-/* +- * writes a single byte or 4 bytes into the I2C device +- * +- * returns 0: success +- * 1: error +- */ +-int SkI2cWrite( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* I/O Context */ +-SK_U32 I2cData, /* I2C Data to write */ +-int I2cDev, /* I2C Device Address */ +-int I2cDevSize, /* I2C Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */ +-int I2cReg, /* I2C Device Register Address */ +-int I2cBurst) /* I2C Burst Flag */ +-{ +- SK_OUT32(IoC, B2_I2C_DATA, I2cData); +- +- SK_I2C_CTL(IoC, I2C_WRITE, I2cDev, I2cDevSize, I2cReg, I2cBurst); +- +- return(SkI2cWait(pAC, IoC, I2C_WRITE)); +-} /* SkI2cWrite*/ +- +- +-#ifdef SK_DIAG +-/* +- * reads a single byte or 4 bytes from the I2C device +- * +- * returns the word read +- */ +-SK_U32 SkI2cRead( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* I/O Context */ +-int I2cDev, /* I2C Device Address */ +-int I2cDevSize, /* I2C Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */ +-int I2cReg, /* I2C Device Register Address */ +-int I2cBurst) /* I2C Burst Flag */ +-{ +- SK_U32 Data; +- +- SK_OUT32(IoC, B2_I2C_DATA, 0); +- SK_I2C_CTL(IoC, I2C_READ, I2cDev, I2cDevSize, I2cReg, I2cBurst); +- +- if (SkI2cWait(pAC, IoC, I2C_READ) != 0) { +- w_print("%s\n", SKERR_I2C_E002MSG); +- } +- +- SK_IN32(IoC, B2_I2C_DATA, &Data); +- +- return(Data); +-} /* SkI2cRead */ +-#endif /* SK_DIAG */ +- +- +-/* +- * read a sensor's value +- * +- * This function reads a sensor's value from the I2C sensor chip. The sensor +- * is defined by its index into the sensors database in the struct pAC points +- * to. +- * Returns +- * 1 if the read is completed +- * 0 if the read must be continued (I2C Bus still allocated) +- */ +-int SkI2cReadSensor( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* I/O Context */ +-SK_SENSOR *pSen) /* Sensor to be read */ +-{ +- if (pSen->SenRead != NULL) { +- return((*pSen->SenRead)(pAC, IoC, pSen)); +- } +- else { +- return(0); /* no success */ +- } +-} /* SkI2cReadSensor */ +- +-/* +- * Do the Init state 0 initialization +- */ +-static int SkI2cInit0( +-SK_AC *pAC) /* Adapter Context */ +-{ +- int i; +- +- /* Begin with first sensor */ +- pAC->I2c.CurrSens = 0; +- +- /* Begin with timeout control for state machine */ +- pAC->I2c.TimerMode = SK_TIMER_WATCH_SM; +- +- /* Set sensor number to zero */ +- pAC->I2c.MaxSens = 0; +- +-#ifndef SK_DIAG +- /* Initialize Number of Dummy Reads */ +- pAC->I2c.DummyReads = SK_MAX_SENSORS; +-#endif +- +- for (i = 0; i < SK_MAX_SENSORS; i++) { +- pAC->I2c.SenTable[i].SenDesc = "unknown"; +- pAC->I2c.SenTable[i].SenType = SK_SEN_UNKNOWN; +- pAC->I2c.SenTable[i].SenThreErrHigh = 0; +- pAC->I2c.SenTable[i].SenThreErrLow = 0; +- pAC->I2c.SenTable[i].SenThreWarnHigh = 0; +- pAC->I2c.SenTable[i].SenThreWarnLow = 0; +- pAC->I2c.SenTable[i].SenReg = LM80_FAN2_IN; +- pAC->I2c.SenTable[i].SenInit = SK_SEN_DYN_INIT_NONE; +- pAC->I2c.SenTable[i].SenValue = 0; +- pAC->I2c.SenTable[i].SenErrFlag = SK_SEN_ERR_NOT_PRESENT; +- pAC->I2c.SenTable[i].SenErrCts = 0; +- pAC->I2c.SenTable[i].SenBegErrTS = 0; +- pAC->I2c.SenTable[i].SenState = SK_SEN_IDLE; +- pAC->I2c.SenTable[i].SenRead = NULL; +- pAC->I2c.SenTable[i].SenDev = 0; +- } +- +- /* Now we are "INIT data"ed */ +- pAC->I2c.InitLevel = SK_INIT_DATA; +- return(0); +-} /* SkI2cInit0*/ +- +- +-/* +- * Do the init state 1 initialization +- * +- * initialize the following register of the LM80: +- * Configuration register: +- * - START, noINT, activeLOW, noINT#Clear, noRESET, noCI, noGPO#, noINIT +- * +- * Interrupt Mask Register 1: +- * - all interrupts are Disabled (0xff) +- * +- * Interrupt Mask Register 2: +- * - all interrupts are Disabled (0xff) Interrupt modi doesn't matter. +- * +- * Fan Divisor/RST_OUT register: +- * - Divisors set to 1 (bits 00), all others 0s. +- * +- * OS# Configuration/Temperature resolution Register: +- * - all 0s +- * +- */ +-static int SkI2cInit1( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC) /* I/O Context */ +-{ +- int i; +- SK_U8 I2cSwCtrl; +- SK_GEPORT *pPrt; /* GIni Port struct pointer */ +- +- if (pAC->I2c.InitLevel != SK_INIT_DATA) { +- /* ReInit not needed in I2C module */ +- return(0); +- } +- +- /* Set the Direction of I2C-Data Pin to IN */ +- SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA); +- /* Check for 32-Bit Yukon with Low at I2C-Data Pin */ +- SK_I2C_GET_SW(IoC, &I2cSwCtrl); +- +- if ((I2cSwCtrl & I2C_DATA) == 0) { +- /* this is a 32-Bit board */ +- pAC->GIni.GIYukon32Bit = SK_TRUE; +- return(0); +- } +- +- /* Check for 64 Bit Yukon without sensors */ +- if (SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_CFG, 0) != 0) { +- return(0); +- } +- +- (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_1, 0); +- +- (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_2, 0); +- +- (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_FAN_CTRL, 0); +- +- (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_TEMP_CTRL, 0); +- +- (void)SkI2cWrite(pAC, IoC, (SK_U32)LM80_CFG_START, LM80_ADDR, I2C_025K_DEV, +- LM80_CFG, 0); +- +- /* +- * MaxSens has to be updated here, because PhyType is not +- * set when performing Init Level 0 +- */ +- pAC->I2c.MaxSens = 5; +- +- pPrt = &pAC->GIni.GP[0]; +- +- if (pAC->GIni.GIGenesis) { +- if (pPrt->PhyType == SK_PHY_BCOM) { +- if (pAC->GIni.GIMacsFound == 1) { +- pAC->I2c.MaxSens += 1; +- } +- else { +- pAC->I2c.MaxSens += 3; +- } +- } +- } +- else { +- pAC->I2c.MaxSens += 3; +- } +- +- for (i = 0; i < pAC->I2c.MaxSens; i++) { +- switch (i) { +- case 0: +- pAC->I2c.SenTable[i].SenDesc = "Temperature"; +- pAC->I2c.SenTable[i].SenType = SK_SEN_TEMP; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_TEMP_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_TEMP_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_TEMP_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_TEMP_LOW_ERR; +- pAC->I2c.SenTable[i].SenReg = LM80_TEMP_IN; +- break; +- case 1: +- pAC->I2c.SenTable[i].SenDesc = "Voltage PCI"; +- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PCI_5V_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PCI_5V_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PCI_5V_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PCI_5V_LOW_ERR; +- pAC->I2c.SenTable[i].SenReg = LM80_VT0_IN; +- break; +- case 2: +- pAC->I2c.SenTable[i].SenDesc = "Voltage PCI-IO"; +- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PCI_IO_5V_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PCI_IO_5V_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PCI_IO_3V3_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PCI_IO_3V3_LOW_ERR; +- pAC->I2c.SenTable[i].SenReg = LM80_VT1_IN; +- pAC->I2c.SenTable[i].SenInit = SK_SEN_DYN_INIT_PCI_IO; +- break; +- case 3: +- pAC->I2c.SenTable[i].SenDesc = "Voltage ASIC"; +- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_VDD_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_VDD_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_VDD_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_VDD_LOW_ERR; +- pAC->I2c.SenTable[i].SenReg = LM80_VT2_IN; +- break; +- case 4: +- if (pAC->GIni.GIGenesis) { +- if (pPrt->PhyType == SK_PHY_BCOM) { +- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY A PLL"; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR; +- } +- else { +- pAC->I2c.SenTable[i].SenDesc = "Voltage PMA"; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR; +- } +- } +- else { +- pAC->I2c.SenTable[i].SenDesc = "Voltage VAUX"; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_VAUX_3V3_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_VAUX_3V3_HIGH_WARN; +- if (pAC->GIni.GIVauxAvail) { +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR; +- } +- else { +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_VAUX_0V_WARN_ERR; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_VAUX_0V_WARN_ERR; +- } +- } +- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT; +- pAC->I2c.SenTable[i].SenReg = LM80_VT3_IN; +- break; +- case 5: +- if (pAC->GIni.GIGenesis) { +- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY 2V5"; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR; +- } +- else { +- pAC->I2c.SenTable[i].SenDesc = "Voltage Core 1V5"; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_CORE_1V5_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_CORE_1V5_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_CORE_1V5_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_CORE_1V5_LOW_ERR; +- } +- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT; +- pAC->I2c.SenTable[i].SenReg = LM80_VT4_IN; +- break; +- case 6: +- if (pAC->GIni.GIGenesis) { +- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY B PLL"; +- } +- else { +- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY 3V3"; +- } +- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR; +- pAC->I2c.SenTable[i].SenReg = LM80_VT5_IN; +- break; +- case 7: +- if (pAC->GIni.GIGenesis) { +- pAC->I2c.SenTable[i].SenDesc = "Speed Fan"; +- pAC->I2c.SenTable[i].SenType = SK_SEN_FAN; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_FAN_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_FAN_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_FAN_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_FAN_LOW_ERR; +- pAC->I2c.SenTable[i].SenReg = LM80_FAN2_IN; +- } +- else { +- pAC->I2c.SenTable[i].SenDesc = "Voltage PHY 2V5"; +- pAC->I2c.SenTable[i].SenType = SK_SEN_VOLT; +- pAC->I2c.SenTable[i].SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR; +- pAC->I2c.SenTable[i].SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN; +- pAC->I2c.SenTable[i].SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN; +- pAC->I2c.SenTable[i].SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR; +- pAC->I2c.SenTable[i].SenReg = LM80_VT6_IN; +- } +- break; +- default: +- SK_ERR_LOG(pAC, SK_ERRCL_INIT | SK_ERRCL_SW, +- SKERR_I2C_E001, SKERR_I2C_E001MSG); +- break; +- } +- +- pAC->I2c.SenTable[i].SenValue = 0; +- pAC->I2c.SenTable[i].SenErrFlag = SK_SEN_ERR_OK; +- pAC->I2c.SenTable[i].SenErrCts = 0; +- pAC->I2c.SenTable[i].SenBegErrTS = 0; +- pAC->I2c.SenTable[i].SenState = SK_SEN_IDLE; +- pAC->I2c.SenTable[i].SenRead = SkLm80ReadSensor; +- pAC->I2c.SenTable[i].SenDev = LM80_ADDR; +- } +- +-#ifndef SK_DIAG +- pAC->I2c.DummyReads = pAC->I2c.MaxSens; +-#endif /* !SK_DIAG */ +- +- /* Clear I2C IRQ */ +- SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); +- +- /* Now we are I/O initialized */ +- pAC->I2c.InitLevel = SK_INIT_IO; +- return(0); +-} /* SkI2cInit1 */ +- +- +-/* +- * Init level 2: Start first sensor read. +- */ +-static int SkI2cInit2( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC) /* I/O Context */ +-{ +- int ReadComplete; +- SK_SENSOR *pSen; +- +- if (pAC->I2c.InitLevel != SK_INIT_IO) { +- /* ReInit not needed in I2C module */ +- /* Init0 and Init2 not permitted */ +- return(0); +- } +- +- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; +- ReadComplete = SkI2cReadSensor(pAC, IoC, pSen); +- +- if (ReadComplete) { +- SK_ERR_LOG(pAC, SK_ERRCL_INIT, SKERR_I2C_E008, SKERR_I2C_E008MSG); +- } +- +- /* Now we are correctly initialized */ +- pAC->I2c.InitLevel = SK_INIT_RUN; +- +- return(0); +-} /* SkI2cInit2*/ +- +- +-/* +- * Initialize I2C devices +- * +- * Get the first voltage value and discard it. +- * Go into temperature read mode. A default pointer is not set. +- * +- * The things to be done depend on the init level in the parameter list: +- * Level 0: +- * Initialize only the data structures. Do NOT access hardware. +- * Level 1: +- * Initialize hardware through SK_IN / SK_OUT commands. Do NOT use interrupts. +- * Level 2: +- * Everything is possible. Interrupts may be used from now on. +- * +- * return: +- * 0 = success +- * other = error. +- */ +-int SkI2cInit( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* I/O Context needed in levels 1 and 2 */ +-int Level) /* Init Level */ +-{ +- +- switch (Level) { +- case SK_INIT_DATA: +- return(SkI2cInit0(pAC)); +- case SK_INIT_IO: +- return(SkI2cInit1(pAC, IoC)); +- case SK_INIT_RUN: +- return(SkI2cInit2(pAC, IoC)); +- default: +- break; +- } +- +- return(0); +-} /* SkI2cInit */ +- +- +-#ifndef SK_DIAG +- +-/* +- * Interrupt service function for the I2C Interface +- * +- * Clears the Interrupt source +- * +- * Reads the register and check it for sending a trap. +- * +- * Starts the timer if necessary. +- */ +-void SkI2cIsr( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC) /* I/O Context */ +-{ +- SK_EVPARA Para; +- +- /* Clear I2C IRQ */ +- SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); +- +- Para.Para64 = 0; +- SkEventQueue(pAC, SKGE_I2C, SK_I2CEV_IRQ, Para); +-} /* SkI2cIsr */ +- +- +-/* +- * Check this sensors Value against the threshold and send events. +- */ +-static void SkI2cCheckSensor( +-SK_AC *pAC, /* Adapter Context */ +-SK_SENSOR *pSen) +-{ +- SK_EVPARA ParaLocal; +- SK_BOOL TooHigh; /* Is sensor too high? */ +- SK_BOOL TooLow; /* Is sensor too low? */ +- SK_U64 CurrTime; /* Current Time */ +- SK_BOOL DoTrapSend; /* We need to send a trap */ +- SK_BOOL DoErrLog; /* We need to log the error */ +- SK_BOOL IsError; /* We need to log the error */ +- +- /* Check Dummy Reads first */ +- if (pAC->I2c.DummyReads > 0) { +- pAC->I2c.DummyReads--; +- return; +- } +- +- /* Get the current time */ +- CurrTime = SkOsGetTime(pAC); +- +- /* Set para to the most useful setting: The current sensor. */ +- ParaLocal.Para64 = (SK_U64)pAC->I2c.CurrSens; +- +- /* Check the Value against the thresholds. First: Error Thresholds */ +- TooHigh = (pSen->SenValue > pSen->SenThreErrHigh); +- TooLow = (pSen->SenValue < pSen->SenThreErrLow); +- +- IsError = SK_FALSE; +- if (TooHigh || TooLow) { +- /* Error condition is satisfied */ +- DoTrapSend = SK_TRUE; +- DoErrLog = SK_TRUE; +- +- /* Now error condition is satisfied */ +- IsError = SK_TRUE; +- +- if (pSen->SenErrFlag == SK_SEN_ERR_ERR) { +- /* This state is the former one */ +- +- /* So check first whether we have to send a trap */ +- if (pSen->SenLastErrTrapTS + SK_SEN_ERR_TR_HOLD > +- CurrTime) { +- /* +- * Do NOT send the Trap. The hold back time +- * has to run out first. +- */ +- DoTrapSend = SK_FALSE; +- } +- +- /* Check now whether we have to log an Error */ +- if (pSen->SenLastErrLogTS + SK_SEN_ERR_LOG_HOLD > +- CurrTime) { +- /* +- * Do NOT log the error. The hold back time +- * has to run out first. +- */ +- DoErrLog = SK_FALSE; +- } +- } +- else { +- /* We came from a different state -> Set Begin Time Stamp */ +- pSen->SenBegErrTS = CurrTime; +- pSen->SenErrFlag = SK_SEN_ERR_ERR; +- } +- +- if (DoTrapSend) { +- /* Set current Time */ +- pSen->SenLastErrTrapTS = CurrTime; +- pSen->SenErrCts++; +- +- /* Queue PNMI Event */ +- SkEventQueue(pAC, SKGE_PNMI, (TooHigh ? +- SK_PNMI_EVT_SEN_ERR_UPP : +- SK_PNMI_EVT_SEN_ERR_LOW), +- ParaLocal); +- } +- +- if (DoErrLog) { +- /* Set current Time */ +- pSen->SenLastErrLogTS = CurrTime; +- +- if (pSen->SenType == SK_SEN_TEMP) { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E011, SKERR_I2C_E011MSG); +- } +- else if (pSen->SenType == SK_SEN_VOLT) { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E012, SKERR_I2C_E012MSG); +- } +- else { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E015, SKERR_I2C_E015MSG); +- } +- } +- } +- +- /* Check the Value against the thresholds */ +- /* 2nd: Warning thresholds */ +- TooHigh = (pSen->SenValue > pSen->SenThreWarnHigh); +- TooLow = (pSen->SenValue < pSen->SenThreWarnLow); +- +- if (!IsError && (TooHigh || TooLow)) { +- /* Error condition is satisfied */ +- DoTrapSend = SK_TRUE; +- DoErrLog = SK_TRUE; +- +- if (pSen->SenErrFlag == SK_SEN_ERR_WARN) { +- /* This state is the former one */ +- +- /* So check first whether we have to send a trap */ +- if (pSen->SenLastWarnTrapTS + SK_SEN_WARN_TR_HOLD > CurrTime) { +- /* +- * Do NOT send the Trap. The hold back time +- * has to run out first. +- */ +- DoTrapSend = SK_FALSE; +- } +- +- /* Check now whether we have to log an Error */ +- if (pSen->SenLastWarnLogTS + SK_SEN_WARN_LOG_HOLD > CurrTime) { +- /* +- * Do NOT log the error. The hold back time +- * has to run out first. +- */ +- DoErrLog = SK_FALSE; +- } +- } +- else { +- /* We came from a different state -> Set Begin Time Stamp */ +- pSen->SenBegWarnTS = CurrTime; +- pSen->SenErrFlag = SK_SEN_ERR_WARN; +- } +- +- if (DoTrapSend) { +- /* Set current Time */ +- pSen->SenLastWarnTrapTS = CurrTime; +- pSen->SenWarnCts++; +- +- /* Queue PNMI Event */ +- SkEventQueue(pAC, SKGE_PNMI, (TooHigh ? +- SK_PNMI_EVT_SEN_WAR_UPP : +- SK_PNMI_EVT_SEN_WAR_LOW), +- ParaLocal); +- } +- +- if (DoErrLog) { +- /* Set current Time */ +- pSen->SenLastWarnLogTS = CurrTime; +- +- if (pSen->SenType == SK_SEN_TEMP) { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E009, SKERR_I2C_E009MSG); +- } +- else if (pSen->SenType == SK_SEN_VOLT) { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E010, SKERR_I2C_E010MSG); +- } +- else { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E014, SKERR_I2C_E014MSG); +- } +- } +- } +- +- /* Check for NO error at all */ +- if (!IsError && !TooHigh && !TooLow) { +- /* Set o.k. Status if no error and no warning condition */ +- pSen->SenErrFlag = SK_SEN_ERR_OK; +- } +- +- /* End of check against the thresholds */ +- +- /* Bug fix AF: 16.Aug.2001: Correct the init base +- * of LM80 sensor. +- */ +- if (pSen->SenInit == SK_SEN_DYN_INIT_PCI_IO) { +- +- pSen->SenInit = SK_SEN_DYN_INIT_NONE; +- +- if (pSen->SenValue > SK_SEN_PCI_IO_RANGE_LIMITER) { +- /* 5V PCI-IO Voltage */ +- pSen->SenThreWarnLow = SK_SEN_PCI_IO_5V_LOW_WARN; +- pSen->SenThreErrLow = SK_SEN_PCI_IO_5V_LOW_ERR; +- } +- else { +- /* 3.3V PCI-IO Voltage */ +- pSen->SenThreWarnHigh = SK_SEN_PCI_IO_3V3_HIGH_WARN; +- pSen->SenThreErrHigh = SK_SEN_PCI_IO_3V3_HIGH_ERR; +- } +- } +- +-#ifdef TEST_ONLY +- /* Dynamic thresholds also for VAUX of LM80 sensor */ +- if (pSen->SenInit == SK_SEN_DYN_INIT_VAUX) { +- +- pSen->SenInit = SK_SEN_DYN_INIT_NONE; +- +- /* 3.3V VAUX Voltage */ +- if (pSen->SenValue > SK_SEN_VAUX_RANGE_LIMITER) { +- pSen->SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN; +- pSen->SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR; +- } +- /* 0V VAUX Voltage */ +- else { +- pSen->SenThreWarnHigh = SK_SEN_VAUX_0V_WARN_ERR; +- pSen->SenThreErrHigh = SK_SEN_VAUX_0V_WARN_ERR; +- } +- } +- +- /* +- * Check initialization state: +- * The VIO Thresholds need adaption +- */ +- if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN && +- pSen->SenValue > SK_SEN_WARNLOW2C && +- pSen->SenValue < SK_SEN_WARNHIGH2) { +- pSen->SenThreErrLow = SK_SEN_ERRLOW2C; +- pSen->SenThreWarnLow = SK_SEN_WARNLOW2C; +- pSen->SenInit = SK_TRUE; +- } +- +- if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN && +- pSen->SenValue > SK_SEN_WARNLOW2 && +- pSen->SenValue < SK_SEN_WARNHIGH2C) { +- pSen->SenThreErrHigh = SK_SEN_ERRHIGH2C; +- pSen->SenThreWarnHigh = SK_SEN_WARNHIGH2C; +- pSen->SenInit = SK_TRUE; +- } +-#endif +- +- if (pSen->SenInit != SK_SEN_DYN_INIT_NONE) { +- SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E013, SKERR_I2C_E013MSG); +- } +-} /* SkI2cCheckSensor */ +- +- +-/* +- * The only Event to be served is the timeout event +- * +- */ +-int SkI2cEvent( +-SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* I/O Context */ +-SK_U32 Event, /* Module specific Event */ +-SK_EVPARA Para) /* Event specific Parameter */ +-{ +- int ReadComplete; +- SK_SENSOR *pSen; +- SK_U32 Time; +- SK_EVPARA ParaLocal; +- int i; +- +- /* New case: no sensors */ +- if (pAC->I2c.MaxSens == 0) { +- return(0); +- } +- +- switch (Event) { +- case SK_I2CEV_IRQ: +- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; +- ReadComplete = SkI2cReadSensor(pAC, IoC, pSen); +- +- if (ReadComplete) { +- /* Check sensor against defined thresholds */ +- SkI2cCheckSensor(pAC, pSen); +- +- /* Increment Current sensor and set appropriate Timeout */ +- pAC->I2c.CurrSens++; +- if (pAC->I2c.CurrSens >= pAC->I2c.MaxSens) { +- pAC->I2c.CurrSens = 0; +- Time = SK_I2C_TIM_LONG; +- } +- else { +- Time = SK_I2C_TIM_SHORT; +- } +- +- /* Start Timer */ +- ParaLocal.Para64 = (SK_U64)0; +- +- pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING; +- +- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time, +- SKGE_I2C, SK_I2CEV_TIM, ParaLocal); +- } +- else { +- /* Start Timer */ +- ParaLocal.Para64 = (SK_U64)0; +- +- pAC->I2c.TimerMode = SK_TIMER_WATCH_SM; +- +- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, SK_I2C_TIM_WATCH, +- SKGE_I2C, SK_I2CEV_TIM, ParaLocal); +- } +- break; +- case SK_I2CEV_TIM: +- if (pAC->I2c.TimerMode == SK_TIMER_NEW_GAUGING) { +- +- ParaLocal.Para64 = (SK_U64)0; +- SkTimerStop(pAC, IoC, &pAC->I2c.SenTimer); +- +- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; +- ReadComplete = SkI2cReadSensor(pAC, IoC, pSen); +- +- if (ReadComplete) { +- /* Check sensor against defined thresholds */ +- SkI2cCheckSensor(pAC, pSen); +- +- /* Increment Current sensor and set appropriate Timeout */ +- pAC->I2c.CurrSens++; +- if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) { +- pAC->I2c.CurrSens = 0; +- Time = SK_I2C_TIM_LONG; +- } +- else { +- Time = SK_I2C_TIM_SHORT; +- } +- +- /* Start Timer */ +- ParaLocal.Para64 = (SK_U64)0; +- +- pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING; +- +- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time, +- SKGE_I2C, SK_I2CEV_TIM, ParaLocal); +- } +- } +- else { +- pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; +- pSen->SenErrFlag = SK_SEN_ERR_FAULTY; +- SK_I2C_STOP(IoC); +- +- /* Increment Current sensor and set appropriate Timeout */ +- pAC->I2c.CurrSens++; +- if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) { +- pAC->I2c.CurrSens = 0; +- Time = SK_I2C_TIM_LONG; +- } +- else { +- Time = SK_I2C_TIM_SHORT; +- } +- +- /* Start Timer */ +- ParaLocal.Para64 = (SK_U64)0; +- +- pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING; +- +- SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time, +- SKGE_I2C, SK_I2CEV_TIM, ParaLocal); +- } +- break; +- case SK_I2CEV_CLEAR: +- for (i = 0; i < SK_MAX_SENSORS; i++) { +- pAC->I2c.SenTable[i].SenErrFlag = SK_SEN_ERR_OK; +- pAC->I2c.SenTable[i].SenErrCts = 0; +- pAC->I2c.SenTable[i].SenWarnCts = 0; +- pAC->I2c.SenTable[i].SenBegErrTS = 0; +- pAC->I2c.SenTable[i].SenBegWarnTS = 0; +- pAC->I2c.SenTable[i].SenLastErrTrapTS = (SK_U64)0; +- pAC->I2c.SenTable[i].SenLastErrLogTS = (SK_U64)0; +- pAC->I2c.SenTable[i].SenLastWarnTrapTS = (SK_U64)0; +- pAC->I2c.SenTable[i].SenLastWarnLogTS = (SK_U64)0; +- } +- break; +- default: +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E006, SKERR_I2C_E006MSG); +- } +- +- return(0); +-} /* SkI2cEvent*/ +- +-#endif /* !SK_DIAG */ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/sklm80.c linux-2.6.9.new/drivers/net/sk98lin/sklm80.c +--- linux-2.6.9.old/drivers/net/sk98lin/sklm80.c 2004-10-19 05:55:06.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/sklm80.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: sklm80.c + * Project: Gigabit Ethernet Adapters, TWSI-Module +- * Version: $Revision: 1.22 $ +- * Date: $Date: 2003/10/20 09:08:21 $ ++ * Version: $Revision: 1.1 $ ++ * Date: $Date: 2003/12/19 14:02:31 $ + * Purpose: Functions to access Voltage and Temperature Sensor (LM80) + * + ******************************************************************************/ +@@ -27,7 +27,7 @@ + */ + #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: sklm80.c,v 1.22 2003/10/20 09:08:21 rschmidt Exp $ (C) Marvell. "; ++ "@(#) $Id: sklm80.c,v 1.1 2003/12/19 14:02:31 mschmid Exp $ (C) Marvell. "; + #endif + + #include "h/skdrv1st.h" /* Driver Specific Definitions */ +@@ -111,12 +111,12 @@ + /* + * read a sensors value (LM80 specific) + * +- * This function reads a sensors value from the I2C sensor chip LM80. ++ * This function reads a sensors value from the TWSI sensor chip LM80. + * The sensor is defined by its index into the sensors database in the struct + * pAC points to. + * + * Returns 1 if the read is completed +- * 0 if the read must be continued (I2C Bus still allocated) ++ * 0 if the read must be continued (TWSI Bus still allocated) + */ + int SkLm80ReadSensor( + SK_AC *pAC, /* Adapter Context */ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skproc.c linux-2.6.9.new/drivers/net/sk98lin/skproc.c +--- linux-2.6.9.old/drivers/net/sk98lin/skproc.c 2004-10-19 05:53:11.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skproc.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,28 +2,34 @@ + * + * Name: skproc.c + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.11 $ +- * Date: $Date: 2003/12/11 16:03:57 $ +- * Purpose: Funktions to display statictic data ++ * Version: $Revision: 1.14.2.4 $ ++ * Date: $Date: 2005/05/23 13:47:33 $ ++ * Purpose: Functions to display statictic data + * + ******************************************************************************/ + + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. ++ * ++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet ++ * Server Adapters. ++ * ++ * Author: Ralph Roesler (rroesler@syskonnect.de) ++ * Mirko Lindner (mlindner@syskonnect.de) ++ * ++ * Address all question to: linux@syskonnect.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * +- * Created 22-Nov-2000 +- * Author: Mirko Lindner (mlindner@syskonnect.de) +- * + * The information in this file is provided "AS IS" without warranty. + * +- ******************************************************************************/ ++ *****************************************************************************/ ++ + #include + #include + +@@ -32,9 +38,16 @@ + #include "h/skversion.h" + + extern struct SK_NET_DEVICE *SkGeRootDev; ++ ++/****************************************************************************** ++ * ++ * Local Function Prototypes and Local Variables ++ * ++ *****************************************************************************/ ++ + static int sk_proc_print(void *writePtr, char *format, ...); + static void sk_gen_browse(void *buffer); +-int len; ++static int len; + + static int sk_seq_show(struct seq_file *seq, void *v); + static int sk_proc_open(struct inode *inode, struct file *file); +@@ -52,16 +65,18 @@ + * sk_gen_browse -generic print "summaries" entry + * + * Description: +- * This function fills the proc entry with statistic data about +- * the ethernet device. ++ * This function fills the proc entry with statistic data about ++ * the ethernet device. + * +- * Returns: - ++ * Returns: N/A + * + */ +-static void sk_gen_browse(void *buffer) ++static void sk_gen_browse( ++void *buffer) /* buffer where the statistics will be stored in */ + { + struct SK_NET_DEVICE *SkgeProcDev = SkGeRootDev; + struct SK_NET_DEVICE *next; ++ SK_BOOL DisableStatistic = 0; + SK_PNMI_STRUCT_DATA *pPnmiStruct; + SK_PNMI_STAT *pPnmiStat; + unsigned long Flags; +@@ -69,6 +84,7 @@ + DEV_NET *pNet; + SK_AC *pAC; + char sens_msg[50]; ++ int card_type; + int MaxSecurityCount = 0; + int t; + int i; +@@ -91,7 +107,7 @@ + + spin_lock_irqsave(&pAC->SlowPathLock, Flags); + Size = SK_PNMI_STRUCT_SIZE; +-#ifdef SK_DIAG_SUPPORT ++ DisableStatistic = 0; + if (pAC->BoardLevel == SK_INIT_DATA) { + SK_MEMCPY(&(pAC->PnmiStruct), &(pAC->PnmiBackup), sizeof(SK_PNMI_STRUCT_DATA)); + if (pAC->DiagModeActive == DIAG_NOTACTIVE) { +@@ -100,13 +116,13 @@ + } else { + SkPnmiGetStruct(pAC, pAC->IoBase, pPnmiStruct, &Size, t-1); + } +-#else +- SkPnmiGetStruct(pAC, pAC->IoBase, +- pPnmiStruct, &Size, t-1); +-#endif + spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); +- + if (strcmp(pAC->dev[t-1]->name, currDev->name) == 0) { ++ if (!pAC->GIni.GIYukon32Bit) ++ card_type = 64; ++ else ++ card_type = 32; ++ + pPnmiStat = &pPnmiStruct->Stat[0]; + len = sk_proc_print(buffer, + "\nDetailed statistic for device %s\n", +@@ -118,6 +134,17 @@ + len += sk_proc_print(buffer, + "\nBoard statistics\n\n"); + len += sk_proc_print(buffer, ++ "Card name %s\n", ++ pAC->DeviceStr); ++ len += sk_proc_print(buffer, ++ "Vendor/Device ID %x/%x\n", ++ pAC->PciDev->vendor, ++ pAC->PciDev->device); ++ len += sk_proc_print(buffer, ++ "Card type (Bit) %d\n", ++ card_type); ++ ++ len += sk_proc_print(buffer, + "Active Port %c\n", + 'A' + pAC->Rlmt.Net[t-1].Port[pAC->Rlmt. + Net[t-1].PrefPort]->PortNumber); +@@ -126,177 +153,239 @@ + 'A' + pAC->Rlmt.Net[t-1].Port[pAC->Rlmt. + Net[t-1].PrefPort]->PortNumber); + +- len += sk_proc_print(buffer, +- "Bus speed (MHz) %d\n", +- pPnmiStruct->BusSpeed); +- +- len += sk_proc_print(buffer, +- "Bus width (Bit) %d\n", +- pPnmiStruct->BusWidth); +- len += sk_proc_print(buffer, +- "Driver version %s\n", +- VER_STRING); +- len += sk_proc_print(buffer, +- "Hardware revision v%d.%d\n", +- (pAC->GIni.GIPciHwRev >> 4) & 0x0F, +- pAC->GIni.GIPciHwRev & 0x0F); +- +- /* Print sensor informations */ +- for (i=0; i < pAC->I2c.MaxSens; i ++) { +- /* Check type */ +- switch (pAC->I2c.SenTable[i].SenType) { +- case 1: +- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); +- strcat(sens_msg, " (C)"); +- len += sk_proc_print(buffer, +- "%-25s %d.%02d\n", +- sens_msg, +- pAC->I2c.SenTable[i].SenValue / 10, +- pAC->I2c.SenTable[i].SenValue % 10); ++ if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_STATIC) { ++ len += sk_proc_print(buffer, ++ "Interrupt Moderation static (%d ints/sec)\n", ++ pAC->DynIrqModInfo.MaxModIntsPerSec); ++ } else if (pAC->DynIrqModInfo.IntModTypeSelect == C_INT_MOD_DYNAMIC) { ++ len += sk_proc_print(buffer, ++ "Interrupt Moderation dynamic (%d ints/sec)\n", ++ pAC->DynIrqModInfo.MaxModIntsPerSec); ++ } else { ++ len += sk_proc_print(buffer, ++ "Interrupt Moderation disabled\n"); ++ } + +- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); +- strcat(sens_msg, " (F)"); ++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) { ++ len += sk_proc_print(buffer, ++ "Bus type PCI-Express\n"); ++ len += sk_proc_print(buffer, ++ "Bus width (Lanes) %d\n", ++ pAC->GIni.GIPexWidth); ++ } else { ++ if (pAC->GIni.GIPciBus == SK_PCIX_BUS) { + len += sk_proc_print(buffer, +- "%-25s %d.%02d\n", +- sens_msg, +- ((((pAC->I2c.SenTable[i].SenValue) +- *10)*9)/5 + 3200)/100, +- ((((pAC->I2c.SenTable[i].SenValue) +- *10)*9)/5 + 3200) % 10); +- break; +- case 2: +- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); +- strcat(sens_msg, " (V)"); ++ "Bus type PCI-X\n"); ++ if (pAC->GIni.GIPciMode == PCI_OS_SPD_X133) { ++ len += sk_proc_print(buffer, ++ "Bus speed (MHz) 133\n"); ++ } else if (pAC->GIni.GIPciMode == PCI_OS_SPD_X100) { ++ len += sk_proc_print(buffer, ++ "Bus speed (MHz) 100\n"); ++ } else if (pAC->GIni.GIPciMode == PCI_OS_SPD_X66) { ++ len += sk_proc_print(buffer, ++ "Bus speed (MHz) 66\n"); ++ } else { ++ len += sk_proc_print(buffer, ++ "Bus speed (MHz) 33\n"); ++ } ++ } else { + len += sk_proc_print(buffer, +- "%-25s %d.%03d\n", +- sens_msg, +- pAC->I2c.SenTable[i].SenValue / 1000, +- pAC->I2c.SenTable[i].SenValue % 1000); +- break; +- case 3: +- strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); +- strcat(sens_msg, " (rpm)"); ++ "Bus type PCI\n"); + len += sk_proc_print(buffer, +- "%-25s %d\n", +- sens_msg, +- pAC->I2c.SenTable[i].SenValue); +- break; +- default: +- break; ++ "Bus speed (MHz) %d\n", ++ pPnmiStruct->BusSpeed); + } ++ len += sk_proc_print(buffer, ++ "Bus width (Bit) %d\n", ++ pPnmiStruct->BusWidth); + } +- +- /*Receive statistics */ +- len += sk_proc_print(buffer, +- "\nReceive statistics\n\n"); + + len += sk_proc_print(buffer, +- "Received bytes %Lu\n", +- (unsigned long long) pPnmiStat->StatRxOctetsOkCts); +- len += sk_proc_print(buffer, +- "Received packets %Lu\n", +- (unsigned long long) pPnmiStat->StatRxOkCts); +-#if 0 +- if (pAC->GIni.GP[0].PhyType == SK_PHY_XMAC && +- pAC->HWRevision < 12) { +- pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts - +- pPnmiStat->StatRxShortsCts; +- pPnmiStat->StatRxShortsCts = 0; +- } +-#endif +- if (pNet->Mtu > 1500) +- pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts - +- pPnmiStat->StatRxTooLongCts; +- +- len += sk_proc_print(buffer, +- "Receive errors %Lu\n", +- (unsigned long long) pPnmiStruct->InErrorsCts); +- len += sk_proc_print(buffer, +- "Receive dropped %Lu\n", +- (unsigned long long) pPnmiStruct->RxNoBufCts); ++ "Driver version %s (%s)\n", ++ VER_STRING, PATCHLEVEL); + len += sk_proc_print(buffer, +- "Received multicast %Lu\n", +- (unsigned long long) pPnmiStat->StatRxMulticastOkCts); ++ "Driver release date %s\n", ++ pAC->Pnmi.pDriverReleaseDate); + len += sk_proc_print(buffer, +- "Receive error types\n"); +- len += sk_proc_print(buffer, +- " length %Lu\n", +- (unsigned long long) pPnmiStat->StatRxRuntCts); +- len += sk_proc_print(buffer, +- " buffer overflow %Lu\n", +- (unsigned long long) pPnmiStat->StatRxFifoOverflowCts); +- len += sk_proc_print(buffer, +- " bad crc %Lu\n", +- (unsigned long long) pPnmiStat->StatRxFcsCts); +- len += sk_proc_print(buffer, +- " framing %Lu\n", +- (unsigned long long) pPnmiStat->StatRxFramingCts); +- len += sk_proc_print(buffer, +- " missed frames %Lu\n", +- (unsigned long long) pPnmiStat->StatRxMissedCts); +- +- if (pNet->Mtu > 1500) +- pPnmiStat->StatRxTooLongCts = 0; ++ "Hardware revision v%d.%d\n", ++ (pAC->GIni.GIPciHwRev >> 4) & 0x0F, ++ pAC->GIni.GIPciHwRev & 0x0F); + +- len += sk_proc_print(buffer, +- " too long %Lu\n", +- (unsigned long long) pPnmiStat->StatRxTooLongCts); +- len += sk_proc_print(buffer, +- " carrier extension %Lu\n", +- (unsigned long long) pPnmiStat->StatRxCextCts); +- len += sk_proc_print(buffer, +- " too short %Lu\n", +- (unsigned long long) pPnmiStat->StatRxShortsCts); +- len += sk_proc_print(buffer, +- " symbol %Lu\n", +- (unsigned long long) pPnmiStat->StatRxSymbolCts); +- len += sk_proc_print(buffer, +- " LLC MAC size %Lu\n", +- (unsigned long long) pPnmiStat->StatRxIRLengthCts); +- len += sk_proc_print(buffer, +- " carrier event %Lu\n", +- (unsigned long long) pPnmiStat->StatRxCarrierCts); +- len += sk_proc_print(buffer, +- " jabber %Lu\n", +- (unsigned long long) pPnmiStat->StatRxJabberCts); ++ if (!netif_running(pAC->dev[t-1])) { ++ len += sk_proc_print(buffer, ++ "\n Device %s is down.\n" ++ " Therefore no statistics are available.\n" ++ " After bringing the device up (ifconfig)" ++ " statistics will\n" ++ " be displayed.\n", ++ pAC->dev[t-1]->name); ++ DisableStatistic = 1; ++ } + ++ /* Display only if statistic info available */ ++ /* Print sensor informations */ ++ if (!DisableStatistic) { ++ for (i=0; i < pAC->I2c.MaxSens; i ++) { ++ /* Check type */ ++ switch (pAC->I2c.SenTable[i].SenType) { ++ case 1: ++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); ++ strcat(sens_msg, " (C)"); ++ len += sk_proc_print(buffer, ++ "%-25s %d.%02d\n", ++ sens_msg, ++ pAC->I2c.SenTable[i].SenValue / 10, ++ pAC->I2c.SenTable[i].SenValue % ++ 10); ++ ++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); ++ strcat(sens_msg, " (F)"); ++ len += sk_proc_print(buffer, ++ "%-25s %d.%02d\n", ++ sens_msg, ++ ((((pAC->I2c.SenTable[i].SenValue) ++ *10)*9)/5 + 3200)/100, ++ ((((pAC->I2c.SenTable[i].SenValue) ++ *10)*9)/5 + 3200) % 10); ++ break; ++ case 2: ++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); ++ strcat(sens_msg, " (V)"); ++ len += sk_proc_print(buffer, ++ "%-25s %d.%03d\n", ++ sens_msg, ++ pAC->I2c.SenTable[i].SenValue / 1000, ++ pAC->I2c.SenTable[i].SenValue % 1000); ++ break; ++ case 3: ++ strcpy(sens_msg, pAC->I2c.SenTable[i].SenDesc); ++ strcat(sens_msg, " (rpm)"); ++ len += sk_proc_print(buffer, ++ "%-25s %d\n", ++ sens_msg, ++ pAC->I2c.SenTable[i].SenValue); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ /*Receive statistics */ ++ len += sk_proc_print(buffer, ++ "\nReceive statistics\n\n"); ++ ++ len += sk_proc_print(buffer, ++ "Received bytes %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxOctetsOkCts); ++ len += sk_proc_print(buffer, ++ "Received packets %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxOkCts); ++#if 0 ++ if (pAC->GIni.GP[0].PhyType == SK_PHY_XMAC && ++ pAC->HWRevision < 12) { ++ pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts - ++ pPnmiStat->StatRxShortsCts; ++ pPnmiStat->StatRxShortsCts = 0; ++ } ++#endif ++ if (pAC->dev[t-1]->mtu > 1500) ++ pPnmiStruct->InErrorsCts = pPnmiStruct->InErrorsCts - ++ pPnmiStat->StatRxTooLongCts; ++ ++ len += sk_proc_print(buffer, ++ "Receive errors %Lu\n", ++ (unsigned long long) pPnmiStruct->InErrorsCts); ++ len += sk_proc_print(buffer, ++ "Receive dropped %Lu\n", ++ (unsigned long long) pPnmiStruct->RxNoBufCts); ++ len += sk_proc_print(buffer, ++ "Received multicast %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxMulticastOkCts); ++#ifdef ADVANCED_STATISTIC_OUTPUT ++ len += sk_proc_print(buffer, ++ "Receive error types\n"); ++ len += sk_proc_print(buffer, ++ " length %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxRuntCts); ++ len += sk_proc_print(buffer, ++ " buffer overflow %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxFifoOverflowCts); ++ len += sk_proc_print(buffer, ++ " bad crc %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxFcsCts); ++ len += sk_proc_print(buffer, ++ " framing %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxFramingCts); ++ len += sk_proc_print(buffer, ++ " missed frames %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxMissedCts); ++ ++ if (pAC->dev[t-1]->mtu > 1500) ++ pPnmiStat->StatRxTooLongCts = 0; ++ ++ len += sk_proc_print(buffer, ++ " too long %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxTooLongCts); ++ len += sk_proc_print(buffer, ++ " carrier extension %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxCextCts); ++ len += sk_proc_print(buffer, ++ " too short %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxShortsCts); ++ len += sk_proc_print(buffer, ++ " symbol %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxSymbolCts); ++ len += sk_proc_print(buffer, ++ " LLC MAC size %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxIRLengthCts); ++ len += sk_proc_print(buffer, ++ " carrier event %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxCarrierCts); ++ len += sk_proc_print(buffer, ++ " jabber %Lu\n", ++ (unsigned long long) pPnmiStat->StatRxJabberCts); ++#endif + +- /*Transmit statistics */ +- len += sk_proc_print(buffer, +- "\nTransmit statistics\n\n"); ++ /*Transmit statistics */ ++ len += sk_proc_print(buffer, ++ "\nTransmit statistics\n\n"); + +- len += sk_proc_print(buffer, +- "Transmited bytes %Lu\n", +- (unsigned long long) pPnmiStat->StatTxOctetsOkCts); +- len += sk_proc_print(buffer, +- "Transmited packets %Lu\n", +- (unsigned long long) pPnmiStat->StatTxOkCts); +- len += sk_proc_print(buffer, +- "Transmit errors %Lu\n", +- (unsigned long long) pPnmiStat->StatTxSingleCollisionCts); +- len += sk_proc_print(buffer, +- "Transmit dropped %Lu\n", +- (unsigned long long) pPnmiStruct->TxNoBufCts); +- len += sk_proc_print(buffer, +- "Transmit collisions %Lu\n", +- (unsigned long long) pPnmiStat->StatTxSingleCollisionCts); +- len += sk_proc_print(buffer, +- "Transmit error types\n"); +- len += sk_proc_print(buffer, +- " excessive collision %ld\n", +- pAC->stats.tx_aborted_errors); +- len += sk_proc_print(buffer, +- " carrier %Lu\n", +- (unsigned long long) pPnmiStat->StatTxCarrierCts); +- len += sk_proc_print(buffer, +- " fifo underrun %Lu\n", +- (unsigned long long) pPnmiStat->StatTxFifoUnderrunCts); +- len += sk_proc_print(buffer, +- " heartbeat %Lu\n", +- (unsigned long long) pPnmiStat->StatTxCarrierCts); +- len += sk_proc_print(buffer, +- " window %ld\n", +- pAC->stats.tx_window_errors); ++ len += sk_proc_print(buffer, ++ "Transmitted bytes %Lu\n", ++ (unsigned long long) pPnmiStat->StatTxOctetsOkCts); ++ len += sk_proc_print(buffer, ++ "Transmitted packets %Lu\n", ++ (unsigned long long) pPnmiStat->StatTxOkCts); ++ len += sk_proc_print(buffer, ++ "Transmit errors %Lu\n", ++ (unsigned long long) pPnmiStat->StatTxSingleCollisionCts); ++ len += sk_proc_print(buffer, ++ "Transmit dropped %Lu\n", ++ (unsigned long long) pPnmiStruct->TxNoBufCts); ++ len += sk_proc_print(buffer, ++ "Transmit collisions %Lu\n", ++ (unsigned long long) pPnmiStat->StatTxSingleCollisionCts); ++#ifdef ADVANCED_STATISTIC_OUTPUT ++ len += sk_proc_print(buffer, ++ "Transmit error types\n"); ++ len += sk_proc_print(buffer, ++ " excessive collision %ld\n", ++ pAC->stats.tx_aborted_errors); ++ len += sk_proc_print(buffer, ++ " carrier %Lu\n", ++ (unsigned long long) pPnmiStat->StatTxCarrierCts); ++ len += sk_proc_print(buffer, ++ " fifo underrun %Lu\n", ++ (unsigned long long) pPnmiStat->StatTxFifoUnderrunCts); ++ len += sk_proc_print(buffer, ++ " heartbeat %Lu\n", ++ (unsigned long long) pPnmiStat->StatTxCarrierCts); ++ len += sk_proc_print(buffer, ++ " window %ld\n", ++ pAC->stats.tx_window_errors); ++#endif ++ } /* if (!DisableStatistic) */ + + } /* if (strcmp(pACname, currDeviceName) == 0) */ + } +@@ -306,16 +395,20 @@ + + /***************************************************************************** + * +- * sk_proc_print -generic line print ++ * sk_proc_print - generic line print + * + * Description: +- * This function fills the proc entry with statistic data about +- * the ethernet device. ++ * This function fills the proc entry with statistic data about the ++ * ethernet device. + * +- * Returns: number of bytes written ++ * Returns: ++ * the number of bytes written + * + */ +-static int sk_proc_print(void *writePtr, char *format, ...) ++static int sk_proc_print( ++void *writePtr, /* the buffer pointer */ ++char *format, /* the format of the string */ ++...) /* variable list of arguments */ + { + #define MAX_LEN_SINGLE_LINE 256 + char str[MAX_LEN_SINGLE_LINE]; +@@ -341,19 +434,22 @@ + * sk_seq_show - show proc information of a particular adapter + * + * Description: +- * This function fills the proc entry with statistic data about +- * the ethernet device. It invokes the generic sk_gen_browse() to +- * print out all items one per one. ++ * This function fills the proc entry with statistic data about the ++ * ethernet device. It invokes the generic sk_gen_browse() to print ++ * out all items one per one. + * +- * Returns: number of bytes written ++ * Returns: ++ * the number of bytes written + * + */ +-static int sk_seq_show(struct seq_file *seq, void *v) ++static int sk_seq_show( ++struct seq_file *seq, /* the sequence pointer */ ++void *v) /* additional pointer */ + { +- void *castedBuffer = (void *) seq; +- currDev = seq->private; +- sk_gen_browse(castedBuffer); +- return 0; ++ void *castedBuffer = (void *) seq; ++ currDev = seq->private; ++ sk_gen_browse(castedBuffer); ++ return 0; + } + + /***************************************************************************** +@@ -361,14 +457,17 @@ + * sk_proc_open - register the show function when proc is open'ed + * + * Description: +- * This function is called whenever a sk98lin proc file is queried. ++ * This function is called whenever a sk98lin proc file is queried. + * +- * Returns: the return value of single_open() ++ * Returns: ++ * the return value of single_open() + * + */ +-static int sk_proc_open(struct inode *inode, struct file *file) ++static int sk_proc_open( ++struct inode *inode, /* the inode of the file */ ++struct file *file) /* the file pointer itself */ + { +- return single_open(file, sk_seq_show, PDE(inode)->data); ++ return single_open(file, sk_seq_show, PDE(inode)->data); + } + + /******************************************************************************* +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skqueue.c linux-2.6.9.new/drivers/net/sk98lin/skqueue.c +--- linux-2.6.9.old/drivers/net/sk98lin/skqueue.c 2004-10-19 05:53:43.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skqueue.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skqueue.c + * Project: Gigabit Ethernet Adapters, Event Scheduler Module +- * Version: $Revision: 1.20 $ +- * Date: $Date: 2003/09/16 13:44:00 $ ++ * Version: $Revision: 2.3 $ ++ * Date: $Date: 2004/05/14 13:28:18 $ + * Purpose: Management of an event queue. + * + ******************************************************************************/ +@@ -28,7 +28,7 @@ + */ + #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: skqueue.c,v 1.20 2003/09/16 13:44:00 rschmidt Exp $ (C) Marvell."; ++ "@(#) $Id: skqueue.c,v 2.3 2004/05/14 13:28:18 malthoff Exp $ (C) Marvell."; + #endif + + #include "h/skdrv1st.h" /* Driver Specific Definitions */ +@@ -48,10 +48,16 @@ + + #define PRINTF(a,b,c) + +-/* +- * init event queue management ++/****************************************************************************** ++ * ++ * SkEventInit() - init event queue management + * +- * Must be called during init level 0. ++ * Description: ++ * This function initializes event queue management. ++ * It must be called during init level 0. ++ * ++ * Returns: ++ * nothing + */ + void SkEventInit( + SK_AC *pAC, /* Adapter context */ +@@ -67,8 +73,17 @@ + } + } + +-/* +- * add event to queue ++/****************************************************************************** ++ * ++ * SkEventQueue() - add event to queue ++ * ++ * Description: ++ * This function adds an event to the event queue. ++ * At least Init Level 1 is required to queue events, ++ * but will be scheduled add Init Level 2. ++ * ++ * returns: ++ * nothing + */ + void SkEventQueue( + SK_AC *pAC, /* Adapters context */ +@@ -76,26 +91,45 @@ + SK_U32 Event, /* Event to be queued */ + SK_EVPARA Para) /* Event parameter */ + { +- pAC->Event.EvPut->Class = Class; +- pAC->Event.EvPut->Event = Event; +- pAC->Event.EvPut->Para = Para; ++ ++ if (pAC->GIni.GILevel == SK_INIT_DATA) { ++ SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E003, SKERR_Q_E003MSG); ++ } ++ else { ++ pAC->Event.EvPut->Class = Class; ++ pAC->Event.EvPut->Event = Event; ++ pAC->Event.EvPut->Para = Para; + +- if (++pAC->Event.EvPut == &pAC->Event.EvQueue[SK_MAX_EVENT]) +- pAC->Event.EvPut = pAC->Event.EvQueue; ++ if (++pAC->Event.EvPut == &pAC->Event.EvQueue[SK_MAX_EVENT]) ++ pAC->Event.EvPut = pAC->Event.EvQueue; + +- if (pAC->Event.EvPut == pAC->Event.EvGet) { +- SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E001, SKERR_Q_E001MSG); ++ if (pAC->Event.EvPut == pAC->Event.EvGet) { ++ SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E001, SKERR_Q_E001MSG); ++ } + } + } + +-/* +- * event dispatcher +- * while event queue is not empty +- * get event from queue +- * send command to state machine +- * end +- * return error reported by individual Event function +- * 0 if no error occured. ++/****************************************************************************** ++ * ++ * SkEventDispatcher() - Event Dispatcher ++ * ++ * Description: ++ * The event dispatcher performs the following operations: ++ * o while event queue is not empty ++ * - get event from queue ++ * - send event to state machine ++ * end ++ * ++ * CAUTION: ++ * The event functions MUST report an error if performing a reinitialization ++ * of the event queue, e.g. performing level Init 0..2 while in dispatcher ++ * call! ++ * ANY OTHER return value delays scheduling the other events in the ++ * queue. In this case the event blocks the queue until ++ * the error condition is cleared! ++ * ++ * Returns: ++ * The return value error reported by individual event function + */ + int SkEventDispatcher( + SK_AC *pAC, /* Adapters Context */ +@@ -105,6 +139,10 @@ + SK_U32 Class; + int Rtv; + ++ if (pAC->GIni.GILevel != SK_INIT_RUN) { ++ SK_ERR_LOG(pAC, SK_ERRCL_NORES, SKERR_Q_E005, SKERR_Q_E005MSG); ++ } ++ + pEv = pAC->Event.EvGet; + + PRINTF("dispatch get %x put %x\n", pEv, pAC->Event.ev_put); +@@ -152,6 +190,11 @@ + Rtv = SkFdEvent(pAC, Ioc, pEv->Event, pEv->Para); + break; + #endif /* SK_USE_LAC_EV */ ++#ifdef SK_ASF ++ case SKGE_ASF : ++ Rtv = SkAsfEvent(pAC,Ioc,pEv->Event,pEv->Para); ++ break ; ++#endif + #ifdef SK_USE_CSUM + case SKGE_CSUM : + Rtv = SkCsEvent(pAC, Ioc, pEv->Event, pEv->Para); +@@ -163,6 +206,20 @@ + } + + if (Rtv != 0) { ++ /* ++ * Special Case: See CAUTION statement above. ++ * We assume the event queue is reset. ++ */ ++ if (pAC->Event.EvGet != pAC->Event.EvQueue && ++ pAC->Event.EvGet != pEv) { ++ /* ++ * Create an error log entry if the ++ * event queue isn't reset. ++ * In this case it may be blocked. ++ */ ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_Q_E004, SKERR_Q_E004MSG); ++ } ++ + return(Rtv); + } + +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skrlmt.c linux-2.6.9.new/drivers/net/sk98lin/skrlmt.c +--- linux-2.6.9.old/drivers/net/sk98lin/skrlmt.c 2004-10-19 05:54:37.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skrlmt.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skrlmt.c + * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.69 $ +- * Date: $Date: 2003/04/15 09:39:22 $ ++ * Version: $Revision: 2.3 $ ++ * Date: $Date: 2005/05/04 09:47:53 $ + * Purpose: Manage links on SK-NET Adapters, esp. redundant ones. + * + ******************************************************************************/ +@@ -39,7 +39,7 @@ + + #ifndef lint + static const char SysKonnectFileId[] = +- "@(#) $Id: skrlmt.c,v 1.69 2003/04/15 09:39:22 tschilli Exp $ (C) Marvell."; ++ "@(#) $Id: skrlmt.c,v 2.3 2005/05/04 09:47:53 tschilli Exp $ (C) Marvell."; + #endif /* !defined(lint) */ + + #define __SKRLMT_C +@@ -350,7 +350,7 @@ + SK_BOOL PhysicalAMacAddressSet; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_INIT, +- ("RLMT Init level %d.\n", Level)) ++ ("RLMT Init level %d.\n", Level)); + + switch (Level) { + case SK_INIT_DATA: /* Initialize data structures. */ +@@ -390,7 +390,7 @@ + + case SK_INIT_IO: /* GIMacsFound first available here. */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_INIT, +- ("RLMT: %d MACs were detected.\n", pAC->GIni.GIMacsFound)) ++ ("RLMT: %d MACs were detected.\n", pAC->GIni.GIMacsFound)); + + pAC->Rlmt.Net[0].NumPorts = pAC->GIni.GIMacsFound; + +@@ -512,7 +512,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SkRlmtBuildCheckChain.\n")) ++ ("SkRlmtBuildCheckChain.\n")); + + NumMacsUp = 0; + +@@ -558,7 +558,7 @@ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, + ("Port %d checks %d other ports: %2X.\n", i, + pAC->Rlmt.Net[NetIdx].Port[i]->PortsChecked, +- pAC->Rlmt.Net[NetIdx].Port[i]->PortCheck[0].CheckAddr.a[5])) ++ pAC->Rlmt.Net[NetIdx].Port[i]->PortCheck[0].CheckAddr.a[5])); + } + #endif /* DEBUG */ + +@@ -604,7 +604,7 @@ + if ((CheckSrc == 0) || (CheckDest == 0)) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_ERR, + ("SkRlmtBuildPacket: Invalid %s%saddr.\n", +- (CheckSrc == 0 ? "Src" : ""), (CheckDest == 0 ? "Dest" : ""))) ++ (CheckSrc == 0 ? "Src" : ""), (CheckDest == 0 ? "Dest" : ""))); + } + #endif + +@@ -796,7 +796,7 @@ + + SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_TX, +- ("SkRlmtSend: BPDU Packet on Port %u.\n", PortNumber)) ++ ("SkRlmtSend: BPDU Packet on Port %u.\n", PortNumber)); + } + } + return; +@@ -835,7 +835,7 @@ + * Bring it up. + */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Received on PortDown.\n")) ++ ("SkRlmtPacketReceive: Received on PortDown.\n")); + + pRPort->PortState = SK_RLMT_PS_GOING_UP; + pRPort->GuTimeStamp = SkOsGetTime(pAC); +@@ -849,7 +849,7 @@ + } /* PortDown && !SuspectTx */ + else if (pRPort->CheckingState & SK_RLMT_PCS_RX) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Stop bringing port down.\n")) ++ ("SkRlmtPacketReceive: Stop bringing port down.\n")); + SkTimerStop(pAC, IoC, &pRPort->DownRxTimer); + pRPort->CheckingState &= ~SK_RLMT_PCS_RX; + /* pAC->Rlmt.CheckSwitch = SK_TRUE; */ +@@ -896,7 +896,7 @@ + pRPort = &pAC->Rlmt.Port[PortNumber]; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: PortNumber == %d.\n", PortNumber)) ++ ("SkRlmtPacketReceive: PortNumber == %d.\n", PortNumber)); + + pRPacket = (SK_RLMT_PACKET*)pMb->pData; + pSPacket = (SK_SPTREE_PACKET*)pRPacket; +@@ -917,7 +917,7 @@ + + /* Not sent to current MAC or registered MC address => Trash it. */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Not for me.\n")) ++ ("SkRlmtPacketReceive: Not for me.\n")); + + SkDrvFreeRlmtMbuf(pAC, IoC, pMb); + return; +@@ -955,7 +955,7 @@ + pRPacket->Indicator[5] == SK_RLMT_INDICATOR5 && + pRPacket->Indicator[6] == SK_RLMT_INDICATOR6) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Duplicate MAC Address.\n")) ++ ("SkRlmtPacketReceive: Duplicate MAC Address.\n")); + + /* Error Log entry. */ + SK_ERR_LOG(pAC, SK_ERRCL_COMM, SKERR_RLMT_E006, SKERR_RLMT_E006_MSG); +@@ -963,7 +963,7 @@ + else { + /* Simply trash it. */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Sent by me.\n")) ++ ("SkRlmtPacketReceive: Sent by me.\n")); + } + + SkDrvFreeRlmtMbuf(pAC, IoC, pMb); +@@ -1007,7 +1007,7 @@ + #endif /* 0 */ + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Announce.\n")) ++ ("SkRlmtPacketReceive: Announce.\n")); + + SkDrvFreeRlmtMbuf(pAC, IoC, pMb); + break; +@@ -1015,7 +1015,7 @@ + case SK_PACKET_ALIVE: + if (pRPacket->SSap & LLC_COMMAND_RESPONSE_BIT) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Alive Reply.\n")) ++ ("SkRlmtPacketReceive: Alive Reply.\n")); + + if (!(pAC->Addr.Port[PortNumber].PromMode & SK_PROM_MODE_LLC) || + SK_ADDR_EQUAL( +@@ -1046,7 +1046,7 @@ + } + else { /* Alive Request Packet. */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Alive Request.\n")) ++ ("SkRlmtPacketReceive: Alive Request.\n")); + + pRPort->RxHelloCts++; + +@@ -1065,7 +1065,7 @@ + + case SK_PACKET_CHECK_TX: + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Check your tx line.\n")) ++ ("SkRlmtPacketReceive: Check your tx line.\n")); + + /* A port checking us requests us to check our tx line. */ + pRPort->CheckingState |= SK_RLMT_PCS_TX; +@@ -1088,7 +1088,7 @@ + + case SK_PACKET_ADDR_CHANGED: + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Address Change.\n")) ++ ("SkRlmtPacketReceive: Address Change.\n")); + + /* Build the check chain. */ + SkRlmtBuildCheckChain(pAC, pRPort->Net->NetNumber); +@@ -1097,7 +1097,7 @@ + + default: + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Unknown RLMT packet.\n")) ++ ("SkRlmtPacketReceive: Unknown RLMT packet.\n")); + + /* RA;:;: ??? */ + SkDrvFreeRlmtMbuf(pAC, IoC, pMb); +@@ -1107,7 +1107,7 @@ + pSPacket->Ctrl == SK_RLMT_SPT_CTRL && + (pSPacket->SSap & ~LLC_COMMAND_RESPONSE_BIT) == SK_RLMT_SPT_SSAP) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: BPDU Packet.\n")) ++ ("SkRlmtPacketReceive: BPDU Packet.\n")); + + /* Spanning Tree packet. */ + pRPort->RxSpHelloCts++; +@@ -1139,7 +1139,7 @@ + pRPort->Root.Id[0], pRPort->Root.Id[1], + pRPort->Root.Id[2], pRPort->Root.Id[3], + pRPort->Root.Id[4], pRPort->Root.Id[5], +- pRPort->Root.Id[6], pRPort->Root.Id[7])) ++ pRPort->Root.Id[6], pRPort->Root.Id[7])); + } + + SkDrvFreeRlmtMbuf(pAC, IoC, pMb); +@@ -1150,7 +1150,7 @@ + } + else { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_RX, +- ("SkRlmtPacketReceive: Unknown Packet Type.\n")) ++ ("SkRlmtPacketReceive: Unknown Packet Type.\n")); + + /* Unknown packet. */ + SkDrvFreeRlmtMbuf(pAC, IoC, pMb); +@@ -1232,7 +1232,7 @@ + if ((pRPort->PacketsPerTimeSlot - pRPort->BpduPacketsPerTimeSlot) == 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, + ("SkRlmtCheckPort %d: No (%d) receives in last time slot.\n", +- PortNumber, pRPort->PacketsPerTimeSlot)) ++ PortNumber, pRPort->PacketsPerTimeSlot)); + + /* + * Check segmentation if there was no receive at least twice +@@ -1249,7 +1249,7 @@ + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, + ("SkRlmtCheckPort: PortsSuspect %d, PcsRx %d.\n", +- pRPort->PortsSuspect, pRPort->CheckingState & SK_RLMT_PCS_RX)) ++ pRPort->PortsSuspect, pRPort->CheckingState & SK_RLMT_PCS_RX)); + + if (pRPort->PortState != SK_RLMT_PS_DOWN) { + NewTimeout = TO_SHORTEN(pAC->Rlmt.Port[PortNumber].Net->TimeoutValue); +@@ -1295,7 +1295,7 @@ + ("SkRlmtCheckPort %d: %d (%d) receives in last time slot.\n", + PortNumber, + pRPort->PacketsPerTimeSlot - pRPort->BpduPacketsPerTimeSlot, +- pRPort->PacketsPerTimeSlot)) ++ pRPort->PacketsPerTimeSlot)); + + SkRlmtPortReceives(pAC, IoC, PortNumber); + if (pAC->Rlmt.CheckSwitch) { +@@ -1345,7 +1345,7 @@ + i, + pAC->Rlmt.Port[i].PortDown, pAC->Rlmt.Port[i].PortNoRx, + *((SK_U32*)(&pAC->Rlmt.Port[i].BcTimeStamp) + OFFS_HI32), +- *((SK_U32*)(&pAC->Rlmt.Port[i].BcTimeStamp) + OFFS_LO32))) ++ *((SK_U32*)(&pAC->Rlmt.Port[i].BcTimeStamp) + OFFS_LO32))); + + if (!pAC->Rlmt.Port[i].PortDown && !pAC->Rlmt.Port[i].PortNoRx) { + if (!PortFound || pAC->Rlmt.Port[i].BcTimeStamp > BcTimeStamp) { +@@ -1358,7 +1358,7 @@ + + if (PortFound) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Port %d received the last broadcast.\n", *pSelect)) ++ ("Port %d received the last broadcast.\n", *pSelect)); + + /* Look if another port's time stamp is similar. */ + for (i = 0; i < (SK_U32)pAC->GIni.GIMacsFound; i++) { +@@ -1373,7 +1373,7 @@ + PortFound = SK_FALSE; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Port %d received a broadcast at a similar time.\n", i)) ++ ("Port %d received a broadcast at a similar time.\n", i)); + break; + } + } +@@ -1385,7 +1385,7 @@ + ("SK_RLMT_SELECT_BCRX found Port %d receiving the substantially " + "latest broadcast (%u).\n", + *pSelect, +- BcTimeStamp - pAC->Rlmt.Port[1 - *pSelect].BcTimeStamp)) ++ BcTimeStamp - pAC->Rlmt.Port[1 - *pSelect].BcTimeStamp)); + } + #endif /* DEBUG */ + +@@ -1434,7 +1434,7 @@ + PortFound = SK_TRUE; + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, + ("SK_RLMT_SELECT_NOTSUSPECT found Port %d up and not check RX.\n", +- *pSelect)) ++ *pSelect)); + break; + } + } +@@ -1483,7 +1483,7 @@ + } + PortFound = SK_TRUE; + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SELECT_UP found Port %d up.\n", *pSelect)) ++ ("SK_RLMT_SELECT_UP found Port %d up.\n", *pSelect)); + break; + } + } +@@ -1544,7 +1544,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SELECT_GOINGUP found Port %d going up.\n", *pSelect)) ++ ("SK_RLMT_SELECT_GOINGUP found Port %d going up.\n", *pSelect)); + return (SK_TRUE); + } /* SkRlmtSelectGoingUp */ + +@@ -1590,7 +1590,7 @@ + } + PortFound = SK_TRUE; + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SELECT_DOWN found Port %d down.\n", *pSelect)) ++ ("SK_RLMT_SELECT_DOWN found Port %d down.\n", *pSelect)); + break; + } + } +@@ -1680,16 +1680,19 @@ + Para.Para32[1] = NetIdx; + SkEventQueue(pAC, SKGE_DRV, SK_DRV_NET_UP, Para); + +- if ((pAC->Rlmt.Net[NetIdx].RlmtMode & SK_RLMT_TRANSPARENT) == 0 && +- (Para.pParaPtr = SkRlmtBuildPacket(pAC, IoC, +- pAC->Rlmt.Net[NetIdx].Port[i]->PortNumber, +- SK_PACKET_ANNOUNCE, &pAC->Addr.Net[NetIdx]. +- CurrentMacAddress, &SkRlmtMcAddr)) != NULL) { +- /* +- * Send announce packet to RLMT multicast address to force +- * switches to learn the new location of the logical MAC address. +- */ +- SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para); ++ if (pAC->Rlmt.NumNets == 1) { ++ if ((pAC->Rlmt.Net[NetIdx].RlmtMode & SK_RLMT_TRANSPARENT) == 0 && ++ (Para.pParaPtr = SkRlmtBuildPacket(pAC, IoC, ++ pAC->Rlmt.Net[NetIdx].Port[i]->PortNumber, ++ SK_PACKET_ANNOUNCE, &pAC->Addr.Net[NetIdx]. ++ CurrentMacAddress, &SkRlmtMcAddr)) != NULL) { ++ ++ /* ++ * Send announce packet to RLMT multicast address to force ++ * switches to learn the new location of the logical MAC address. ++ */ ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para); ++ } + } + } + else { +@@ -1788,7 +1791,7 @@ + + if (Para.Para32[1] != Active) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Active: %d, Para1: %d.\n", Active, Para.Para32[1])) ++ ("Active: %d, Para1: %d.\n", Active, Para.Para32[1])); + pAC->Rlmt.Net[NetIdx].ActivePort = Para.Para32[1]; + Para.Para32[0] = pAC->Rlmt.Net[NetIdx]. + Port[Para.Para32[0]]->PortNumber; +@@ -1868,7 +1871,7 @@ + pNet->Port[i]->Root.Id[0], pNet->Port[i]->Root.Id[1], + pNet->Port[i]->Root.Id[2], pNet->Port[i]->Root.Id[3], + pNet->Port[i]->Root.Id[4], pNet->Port[i]->Root.Id[5], +- pNet->Port[i]->Root.Id[6], pNet->Port[i]->Root.Id[7])) ++ pNet->Port[i]->Root.Id[6], pNet->Port[i]->Root.Id[7])); + + if (!pNet->RootIdSet) { + pNet->Root = pNet->Port[i]->Root; +@@ -1963,13 +1966,13 @@ + SK_U32 i; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTSTART_TIMEOUT Port %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_PORTSTART_TIMEOUT Port %d Event BEGIN.\n", Para.Para32[0])); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTSTART_TIMEOUT Event EMPTY.\n")) ++ ("SK_RLMT_PORTSTART_TIMEOUT Event EMPTY.\n")); + return; + } + +@@ -1990,7 +1993,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTSTART_TIMEOUT Event END.\n")) ++ ("SK_RLMT_PORTSTART_TIMEOUT Event END.\n")); + } /* SkRlmtEvtPortStartTim */ + + +@@ -2018,21 +2021,21 @@ + SK_EVPARA Para2; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_LINK_UP Port %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_LINK_UP Port %d Event BEGIN.\n", Para.Para32[0])); + + pRPort = &pAC->Rlmt.Port[Para.Para32[0]]; + if (!pRPort->PortStarted) { + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_RLMT_E008, SKERR_RLMT_E008_MSG); + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_LINK_UP Event EMPTY.\n")) ++ ("SK_RLMT_LINK_UP Event EMPTY.\n")); + return; + } + + if (!pRPort->LinkDown) { + /* RA;:;: Any better solution? */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_LINK_UP Event EMPTY.\n")) ++ ("SK_RLMT_LINK_UP Event EMPTY.\n")); + return; + } + +@@ -2082,16 +2085,19 @@ + Para2.Para32[1] = (SK_U32)-1; + SkTimerStart(pAC, IoC, &pRPort->UpTimer, SK_RLMT_PORTUP_TIM_VAL, + SKGE_RLMT, SK_RLMT_PORTUP_TIM, Para2); +- ++ + /* Later: if (pAC->Rlmt.RlmtMode & SK_RLMT_CHECK_LOC_LINK) && */ +- if ((pRPort->Net->RlmtMode & SK_RLMT_TRANSPARENT) == 0 && +- (pRPort->Net->RlmtMode & SK_RLMT_CHECK_LINK) != 0 && +- (Para2.pParaPtr = +- SkRlmtBuildPacket(pAC, IoC, Para.Para32[0], SK_PACKET_ANNOUNCE, +- &pAC->Addr.Port[Para.Para32[0]].CurrentMacAddress, &SkRlmtMcAddr) +- ) != NULL) { +- /* Send "new" packet to RLMT multicast address. */ +- SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para2); ++ if (pAC->Rlmt.NumNets == 1) { ++ if ((pRPort->Net->RlmtMode & SK_RLMT_TRANSPARENT) == 0 && ++ (pRPort->Net->RlmtMode & SK_RLMT_CHECK_LINK) != 0 && ++ (Para2.pParaPtr = ++ SkRlmtBuildPacket(pAC, IoC, Para.Para32[0], SK_PACKET_ANNOUNCE, ++ &pAC->Addr.Port[Para.Para32[0]].CurrentMacAddress, &SkRlmtMcAddr) ++ ) != NULL) { ++ ++ /* Send "new" packet to RLMT multicast address. */ ++ SkEventQueue(pAC, SKGE_DRV, SK_DRV_RLMT_SEND, Para2); ++ } + } + + if (pRPort->Net->RlmtMode & SK_RLMT_CHECK_SEG) { +@@ -2110,7 +2116,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_LINK_UP Event END.\n")) ++ ("SK_RLMT_LINK_UP Event END.\n")); + } /* SkRlmtEvtLinkUp */ + + +@@ -2136,20 +2142,20 @@ + SK_RLMT_PORT *pRPort; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTUP_TIM Port %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_PORTUP_TIM Port %d Event BEGIN.\n", Para.Para32[0])); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTUP_TIM Event EMPTY.\n")) ++ ("SK_RLMT_PORTUP_TIM Event EMPTY.\n")); + return; + } + + pRPort = &pAC->Rlmt.Port[Para.Para32[0]]; + if (pRPort->LinkDown || (pRPort->PortState == SK_RLMT_PS_UP)) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTUP_TIM Port %d Event EMPTY.\n", Para.Para32[0])) ++ ("SK_RLMT_PORTUP_TIM Port %d Event EMPTY.\n", Para.Para32[0])); + return; + } + +@@ -2164,7 +2170,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTUP_TIM Event END.\n")) ++ ("SK_RLMT_PORTUP_TIM Event END.\n")); + } /* SkRlmtEvtPortUpTim */ + + +@@ -2192,13 +2198,13 @@ + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, + ("SK_RLMT_PORTDOWN* Port %d Event (%d) BEGIN.\n", +- Para.Para32[0], Event)) ++ Para.Para32[0], Event)); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTDOWN* Event EMPTY.\n")) ++ ("SK_RLMT_PORTDOWN* Event EMPTY.\n")); + return; + } + +@@ -2206,7 +2212,7 @@ + if (!pRPort->PortStarted || (Event == SK_RLMT_PORTDOWN_TX_TIM && + !(pRPort->CheckingState & SK_RLMT_PCS_TX))) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTDOWN* Event (%d) EMPTY.\n", Event)) ++ ("SK_RLMT_PORTDOWN* Event (%d) EMPTY.\n", Event)); + return; + } + +@@ -2243,7 +2249,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORTDOWN* Event (%d) END.\n", Event)) ++ ("SK_RLMT_PORTDOWN* Event (%d) END.\n", Event)); + } /* SkRlmtEvtPortDownX */ + + +@@ -2270,7 +2276,7 @@ + + pRPort = &pAC->Rlmt.Port[Para.Para32[0]]; + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_LINK_DOWN Port %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_LINK_DOWN Port %d Event BEGIN.\n", Para.Para32[0])); + + if (!pAC->Rlmt.Port[Para.Para32[0]].LinkDown) { + pRPort->Net->LinksUp--; +@@ -2289,7 +2295,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_LINK_DOWN Event END.\n")) ++ ("SK_RLMT_LINK_DOWN Event END.\n")); + } /* SkRlmtEvtLinkDown */ + + +@@ -2318,13 +2324,13 @@ + SK_MAC_ADDR *pNewMacAddr; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORT_ADDR Port %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_PORT_ADDR Port %d Event BEGIN.\n", Para.Para32[0])); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORT_ADDR Event EMPTY.\n")) ++ ("SK_RLMT_PORT_ADDR Event EMPTY.\n")); + return; + } + +@@ -2348,7 +2354,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PORT_ADDR Event END.\n")) ++ ("SK_RLMT_PORT_ADDR Event END.\n")); + } /* SkRlmtEvtPortAddr */ + + +@@ -2376,35 +2382,35 @@ + SK_U32 PortNumber; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_START Net %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_START Net %d Event BEGIN.\n", Para.Para32[0])); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_START Event EMPTY.\n")) ++ ("SK_RLMT_START Event EMPTY.\n")); + return; + } + + if (Para.Para32[0] >= pAC->Rlmt.NumNets) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad NetNumber %d.\n", Para.Para32[0])) ++ ("Bad NetNumber %d.\n", Para.Para32[0])); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_START Event EMPTY.\n")) ++ ("SK_RLMT_START Event EMPTY.\n")); + return; + } + + if (pAC->Rlmt.Net[Para.Para32[0]].RlmtState != SK_RLMT_RS_INIT) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_START Event EMPTY.\n")) ++ ("SK_RLMT_START Event EMPTY.\n")); + return; + } + + if (pAC->Rlmt.NetsStarted >= pAC->Rlmt.NumNets) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("All nets should have been started.\n")) ++ ("All nets should have been started.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_START Event EMPTY.\n")) ++ ("SK_RLMT_START Event EMPTY.\n")); + return; + } + +@@ -2438,7 +2444,7 @@ + pAC->Rlmt.NetsStarted++; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_START Event END.\n")) ++ ("SK_RLMT_START Event END.\n")); + } /* SkRlmtEvtStart */ + + +@@ -2466,35 +2472,35 @@ + SK_U32 i; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STOP Net %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_STOP Net %d Event BEGIN.\n", Para.Para32[0])); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STOP Event EMPTY.\n")) ++ ("SK_RLMT_STOP Event EMPTY.\n")); + return; + } + + if (Para.Para32[0] >= pAC->Rlmt.NumNets) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad NetNumber %d.\n", Para.Para32[0])) ++ ("Bad NetNumber %d.\n", Para.Para32[0])); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STOP Event EMPTY.\n")) ++ ("SK_RLMT_STOP Event EMPTY.\n")); + return; + } + + if (pAC->Rlmt.Net[Para.Para32[0]].RlmtState == SK_RLMT_RS_INIT) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STOP Event EMPTY.\n")) ++ ("SK_RLMT_STOP Event EMPTY.\n")); + return; + } + + if (pAC->Rlmt.NetsStarted == 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("All nets are stopped.\n")) ++ ("All nets are stopped.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STOP Event EMPTY.\n")) ++ ("SK_RLMT_STOP Event EMPTY.\n")); + return; + } + +@@ -2529,7 +2535,7 @@ + pAC->Rlmt.NetsStarted--; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STOP Event END.\n")) ++ ("SK_RLMT_STOP Event END.\n")); + } /* SkRlmtEvtStop */ + + +@@ -2559,13 +2565,13 @@ + SK_U32 i; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_TIM Event BEGIN.\n")) ++ ("SK_RLMT_TIM Event BEGIN.\n")); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_TIM Event EMPTY.\n")) ++ ("SK_RLMT_TIM Event EMPTY.\n")); + return; + } + +@@ -2637,7 +2643,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_TIM Event END.\n")) ++ ("SK_RLMT_TIM Event END.\n")); + } /* SkRlmtEvtTim */ + + +@@ -2665,13 +2671,13 @@ + #endif /* DEBUG */ + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SEG_TIM Event BEGIN.\n")) ++ ("SK_RLMT_SEG_TIM Event BEGIN.\n")); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SEG_TIM Event EMPTY.\n")) ++ ("SK_RLMT_SEG_TIM Event EMPTY.\n")); + return; + } + +@@ -2695,7 +2701,7 @@ + InAddr8[3], InAddr8[4], InAddr8[5], + pAPort->Exact[k].a[0], pAPort->Exact[k].a[1], + pAPort->Exact[k].a[2], pAPort->Exact[k].a[3], +- pAPort->Exact[k].a[4], pAPort->Exact[k].a[5])) ++ pAPort->Exact[k].a[4], pAPort->Exact[k].a[5])); + } + } + #endif /* xDEBUG */ +@@ -2703,7 +2709,7 @@ + SkRlmtCheckSeg(pAC, IoC, Para.Para32[0]); + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SEG_TIM Event END.\n")) ++ ("SK_RLMT_SEG_TIM Event END.\n")); + } /* SkRlmtEvtSegTim */ + + +@@ -2732,18 +2738,18 @@ + + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PACKET_RECEIVED Event BEGIN.\n")) ++ ("SK_RLMT_PACKET_RECEIVED Event BEGIN.\n")); + + /* Should we ignore frames during port switching? */ + + #ifdef DEBUG + pMb = Para.pParaPtr; + if (pMb == NULL) { +- SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, ("No mbuf.\n")) ++ SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, ("No mbuf.\n")); + } + else if (pMb->pNext != NULL) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("More than one mbuf or pMb->pNext not set.\n")) ++ ("More than one mbuf or pMb->pNext not set.\n")); + } + #endif /* DEBUG */ + +@@ -2761,7 +2767,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PACKET_RECEIVED Event END.\n")) ++ ("SK_RLMT_PACKET_RECEIVED Event END.\n")); + } /* SkRlmtEvtPacketRx */ + + +@@ -2788,21 +2794,21 @@ + SK_RLMT_PORT *pRPort; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_CLEAR Event BEGIN.\n")) ++ ("SK_RLMT_STATS_CLEAR Event BEGIN.\n")); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_CLEAR Event EMPTY.\n")) ++ ("SK_RLMT_STATS_CLEAR Event EMPTY.\n")); + return; + } + + if (Para.Para32[0] >= pAC->Rlmt.NumNets) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad NetNumber %d.\n", Para.Para32[0])) ++ ("Bad NetNumber %d.\n", Para.Para32[0])); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_CLEAR Event EMPTY.\n")) ++ ("SK_RLMT_STATS_CLEAR Event EMPTY.\n")); + return; + } + +@@ -2817,7 +2823,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_CLEAR Event END.\n")) ++ ("SK_RLMT_STATS_CLEAR Event END.\n")); + } /* SkRlmtEvtStatsClear */ + + +@@ -2841,28 +2847,28 @@ + SK_EVPARA Para) /* SK_U32 NetNumber; SK_U32 -1 */ + { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_UPDATE Event BEGIN.\n")) ++ ("SK_RLMT_STATS_UPDATE Event BEGIN.\n")); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_UPDATE Event EMPTY.\n")) ++ ("SK_RLMT_STATS_UPDATE Event EMPTY.\n")); + return; + } + + if (Para.Para32[0] >= pAC->Rlmt.NumNets) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad NetNumber %d.\n", Para.Para32[0])) ++ ("Bad NetNumber %d.\n", Para.Para32[0])); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_UPDATE Event EMPTY.\n")) ++ ("SK_RLMT_STATS_UPDATE Event EMPTY.\n")); + return; + } + + /* Update statistics - currently always up-to-date. */ + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_STATS_UPDATE Event END.\n")) ++ ("SK_RLMT_STATS_UPDATE Event END.\n")); + } /* SkRlmtEvtStatsUpdate */ + + +@@ -2886,13 +2892,13 @@ + SK_EVPARA Para) /* SK_U32 PortIndex; SK_U32 NetNumber */ + { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PREFPORT_CHANGE to Port %d Event BEGIN.\n", Para.Para32[0])) ++ ("SK_RLMT_PREFPORT_CHANGE to Port %d Event BEGIN.\n", Para.Para32[0])); + + if (Para.Para32[1] >= pAC->Rlmt.NumNets) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad NetNumber %d.\n", Para.Para32[1])) ++ ("Bad NetNumber %d.\n", Para.Para32[1])); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n")) ++ ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n")); + return; + } + +@@ -2905,7 +2911,7 @@ + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_RLMT_E010, SKERR_RLMT_E010_MSG); + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n")) ++ ("SK_RLMT_PREFPORT_CHANGE Event EMPTY.\n")); + return; + } + +@@ -2919,7 +2925,7 @@ + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_PREFPORT_CHANGE Event END.\n")) ++ ("SK_RLMT_PREFPORT_CHANGE Event END.\n")); + } /* SkRlmtEvtPrefportChange */ + + +@@ -2945,37 +2951,37 @@ + int i; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SET_NETS Event BEGIN.\n")) ++ ("SK_RLMT_SET_NETS Event BEGIN.\n")); + + if (Para.Para32[1] != (SK_U32)-1) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad Parameter.\n")) ++ ("Bad Parameter.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SET_NETS Event EMPTY.\n")) ++ ("SK_RLMT_SET_NETS Event EMPTY.\n")); + return; + } + + if (Para.Para32[0] == 0 || Para.Para32[0] > SK_MAX_NETS || + Para.Para32[0] > (SK_U32)pAC->GIni.GIMacsFound) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad number of nets: %d.\n", Para.Para32[0])) ++ ("Bad number of nets: %d.\n", Para.Para32[0])); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SET_NETS Event EMPTY.\n")) ++ ("SK_RLMT_SET_NETS Event EMPTY.\n")); + return; + } + + if (Para.Para32[0] == pAC->Rlmt.NumNets) { /* No change. */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SET_NETS Event EMPTY.\n")) ++ ("SK_RLMT_SET_NETS Event EMPTY.\n")); + return; + } + + /* Entering and leaving dual mode only allowed while nets are stopped. */ + if (pAC->Rlmt.NetsStarted > 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Changing dual mode only allowed while all nets are stopped.\n")) ++ ("Changing dual mode only allowed while all nets are stopped.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SET_NETS Event EMPTY.\n")) ++ ("SK_RLMT_SET_NETS Event EMPTY.\n")); + return; + } + +@@ -3006,9 +3012,10 @@ + SkEventQueue(pAC, SKGE_PNMI, SK_PNMI_EVT_RLMT_SET_NETS, Para); + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("RLMT: Changed to one net with two ports.\n")) ++ ("RLMT: Changed to one net with two ports.\n")); + } + else if (Para.Para32[0] == 2) { ++ pAC->Rlmt.RlmtOff = SK_TRUE; + pAC->Rlmt.Port[1].Net= &pAC->Rlmt.Net[1]; + pAC->Rlmt.Net[1].NumPorts = pAC->GIni.GIMacsFound - 1; + pAC->Rlmt.Net[0].NumPorts = +@@ -3035,19 +3042,19 @@ + SkEventQueue(pAC, SKGE_PNMI, SK_PNMI_EVT_RLMT_SET_NETS, Para); + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("RLMT: Changed to two nets with one port each.\n")) ++ ("RLMT: Changed to two nets with one port each.\n")); + } + else { + /* Not implemented for more than two nets. */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SetNets not implemented for more than two nets.\n")) ++ ("SetNets not implemented for more than two nets.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SET_NETS Event EMPTY.\n")) ++ ("SK_RLMT_SET_NETS Event EMPTY.\n")); + return; + } + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_SET_NETS Event END.\n")) ++ ("SK_RLMT_SET_NETS Event END.\n")); + } /* SkRlmtSetNets */ + + +@@ -3075,13 +3082,13 @@ + SK_U32 PrevRlmtMode; + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_MODE_CHANGE Event BEGIN.\n")) ++ ("SK_RLMT_MODE_CHANGE Event BEGIN.\n")); + + if (Para.Para32[1] >= pAC->Rlmt.NumNets) { + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Bad NetNumber %d.\n", Para.Para32[1])) ++ ("Bad NetNumber %d.\n", Para.Para32[1])); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_MODE_CHANGE Event EMPTY.\n")) ++ ("SK_RLMT_MODE_CHANGE Event EMPTY.\n")); + return; + } + +@@ -3091,9 +3098,9 @@ + Para.Para32[0] != SK_RLMT_MODE_CLS) { + pAC->Rlmt.Net[Para.Para32[1]].RlmtMode = SK_RLMT_MODE_CLS; + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Forced RLMT mode to CLS on single port net.\n")) ++ ("Forced RLMT mode to CLS on single port net.\n")); + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_MODE_CHANGE Event EMPTY.\n")) ++ ("SK_RLMT_MODE_CHANGE Event EMPTY.\n")); + return; + } + +@@ -3159,7 +3166,7 @@ + } /* SK_RLMT_CHECK_SEG bit changed. */ + + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("SK_RLMT_MODE_CHANGE Event END.\n")) ++ ("SK_RLMT_MODE_CHANGE Event END.\n")); + } /* SkRlmtEvtModeChange */ + + +@@ -3245,7 +3252,7 @@ + + default: /* Create error log entry. */ + SK_DBG_MSG(pAC, SK_DBGMOD_RLMT, SK_DBGCAT_CTRL, +- ("Unknown RLMT Event %d.\n", Event)) ++ ("Unknown RLMT Event %d.\n", Event)); + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_RLMT_E003, SKERR_RLMT_E003_MSG); + break; + } /* switch() */ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/sktimer.c linux-2.6.9.new/drivers/net/sk98lin/sktimer.c +--- linux-2.6.9.old/drivers/net/sk98lin/sktimer.c 2004-10-19 05:53:13.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/sktimer.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: sktimer.c + * Project: Gigabit Ethernet Adapters, Event Scheduler Module +- * Version: $Revision: 1.14 $ +- * Date: $Date: 2003/09/16 13:46:51 $ ++ * Version: $Revision: 2.2 $ ++ * Date: $Date: 2004/05/28 13:44:39 $ + * Purpose: High level timer functions. + * + ******************************************************************************/ +@@ -11,7 +11,7 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect GmbH. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -22,13 +22,12 @@ + * + ******************************************************************************/ + +- + /* + * Event queue and dispatcher + */ + #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: sktimer.c,v 1.14 2003/09/16 13:46:51 rschmidt Exp $ (C) Marvell."; ++ "@(#) $Id: sktimer.c,v 2.2 2004/05/28 13:44:39 rschmidt Exp $ (C) Marvell."; + #endif + + #include "h/skdrv1st.h" /* Driver Specific Definitions */ +@@ -62,7 +61,7 @@ + { + switch (Level) { + case SK_INIT_DATA: +- pAC->Tim.StQueue = NULL; ++ pAC->Tim.StQueue = 0; + break; + case SK_INIT_IO: + SkHwtInit(pAC, Ioc); +@@ -85,22 +84,20 @@ + SK_TIMER **ppTimPrev; + SK_TIMER *pTm; + +- /* +- * remove timer from queue +- */ ++ /* remove timer from queue */ + pTimer->TmActive = SK_FALSE; +- ++ + if (pAC->Tim.StQueue == pTimer && !pTimer->TmNext) { + SkHwtStop(pAC, Ioc); + } +- ++ + for (ppTimPrev = &pAC->Tim.StQueue; (pTm = *ppTimPrev); + ppTimPrev = &pTm->TmNext ) { +- ++ + if (pTm == pTimer) { + /* + * Timer found in queue +- * - dequeue it and ++ * - dequeue it + * - correct delta of the next timer + */ + *ppTimPrev = pTm->TmNext; +@@ -121,7 +118,7 @@ + SK_AC *pAC, /* Adapters context */ + SK_IOC Ioc, /* IoContext */ + SK_TIMER *pTimer, /* Timer Pointer to be started */ +-SK_U32 Time, /* Time value */ ++SK_U32 Time, /* Time Value (in microsec.) */ + SK_U32 Class, /* Event Class for this timer */ + SK_U32 Event, /* Event Value for this timer */ + SK_EVPARA Para) /* Event Parameter for this timer */ +@@ -130,11 +127,6 @@ + SK_TIMER *pTm; + SK_U32 Delta; + +- Time /= 16; /* input is uS, clock ticks are 16uS */ +- +- if (!Time) +- Time = 1; +- + SkTimerStop(pAC, Ioc, pTimer); + + pTimer->TmClass = Class; +@@ -143,31 +135,26 @@ + pTimer->TmActive = SK_TRUE; + + if (!pAC->Tim.StQueue) { +- /* First Timer to be started */ ++ /* first Timer to be started */ + pAC->Tim.StQueue = pTimer; +- pTimer->TmNext = NULL; ++ pTimer->TmNext = 0; + pTimer->TmDelta = Time; +- ++ + SkHwtStart(pAC, Ioc, Time); +- ++ + return; + } + +- /* +- * timer correction +- */ ++ /* timer correction */ + timer_done(pAC, Ioc, 0); + +- /* +- * find position in queue +- */ ++ /* find position in queue */ + Delta = 0; + for (ppTimPrev = &pAC->Tim.StQueue; (pTm = *ppTimPrev); + ppTimPrev = &pTm->TmNext ) { +- ++ + if (Delta + pTm->TmDelta > Time) { +- /* Position found */ +- /* Here the timer needs to be inserted. */ ++ /* the timer needs to be inserted here */ + break; + } + Delta += pTm->TmDelta; +@@ -179,9 +166,7 @@ + pTimer->TmDelta = Time - Delta; + + if (pTm) { +- /* There is a next timer +- * -> correct its Delta value. +- */ ++ /* there is a next timer: correct its Delta value */ + pTm->TmDelta -= pTimer->TmDelta; + } + +@@ -210,7 +195,7 @@ + int Done = 0; + + Delta = SkHwtRead(pAC, Ioc); +- ++ + ppLast = &pAC->Tim.StQueue; + pTm = pAC->Tim.StQueue; + while (pTm && !Done) { +@@ -228,13 +213,13 @@ + Done = 1; + } + } +- *ppLast = NULL; ++ *ppLast = 0; + /* + * pTm points to the first Timer that did not run out. + * StQueue points to the first Timer that run out. + */ + +- for ( pTComp = pAC->Tim.StQueue; pTComp; pTComp = pTComp->TmNext) { ++ for (pTComp = pAC->Tim.StQueue; pTComp; pTComp = pTComp->TmNext) { + SkEventQueue(pAC,pTComp->TmClass, pTComp->TmEvent, pTComp->TmPara); + } + +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/sktwsi.c linux-2.6.9.new/drivers/net/sk98lin/sktwsi.c +--- linux-2.6.9.old/drivers/net/sk98lin/sktwsi.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/sktwsi.c 2006-12-07 14:35:03.000000000 +0800 +@@ -0,0 +1,1355 @@ ++/****************************************************************************** ++ * ++ * Name: sktwsi.c ++ * Project: Gigabit Ethernet Adapters, TWSI-Module ++ * Version: $Revision: 1.9 $ ++ * Date: $Date: 2004/12/20 15:10:30 $ ++ * Purpose: Functions to access Voltage and Temperature Sensor ++ * ++ ******************************************************************************/ ++ ++/****************************************************************************** ++ * ++ * (C)Copyright 1998-2002 SysKonnect. ++ * (C)Copyright 2002-2004 Marvell. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * The information in this file is provided "AS IS" without warranty. ++ * ++ ******************************************************************************/ ++ ++/* ++ * TWSI Protocol ++ */ ++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) ++static const char SysKonnectFileId[] = ++ "@(#) $Id: sktwsi.c,v 1.9 2004/12/20 15:10:30 rschmidt Exp $ (C) Marvell."; ++#endif ++ ++#include "h/skdrv1st.h" /* Driver Specific Definitions */ ++#include "h/lm80.h" ++#include "h/skdrv2nd.h" /* Adapter Control- and Driver specific Def. */ ++ ++#ifdef __C2MAN__ ++/* ++ TWSI protocol implementation. ++ ++ General Description: ++ ++ The TWSI protocol is used for the temperature sensors and for ++ the serial EEPROM which hold the configuration. ++ ++ This file covers functions that allow to read write and do ++ some bulk requests a specified TWSI address. ++ ++ The Genesis has 2 TWSI buses. One for the EEPROM which holds ++ the VPD Data and one for temperature and voltage sensor. ++ The following picture shows the TWSI buses, TWSI devices and ++ their control registers. ++ ++ Note: The VPD functions are in skvpd.c ++. ++. PCI Config TWSI Bus for VPD Data: ++. ++. +------------+ ++. | VPD EEPROM | ++. +------------+ ++. | ++. | <-- TWSI ++. | ++. +-----------+-----------+ ++. | | ++. +-----------------+ +-----------------+ ++. | PCI_VPD_ADR_REG | | PCI_VPD_DAT_REG | ++. +-----------------+ +-----------------+ ++. ++. ++. TWSI Bus for LM80 sensor: ++. ++. +-----------------+ ++. | Temperature and | ++. | Voltage Sensor | ++. | LM80 | ++. +-----------------+ ++. | ++. | ++. TWSI --> | ++. | ++. +----+ ++. +-------------->| OR |<--+ ++. | +----+ | ++. +------+------+ | ++. | | | ++. +--------+ +--------+ +----------+ ++. | B2_I2C | | B2_I2C | | B2_I2C | ++. | _CTRL | | _DATA | | _SW | ++. +--------+ +--------+ +----------+ ++. ++ The TWSI bus may be driven by the B2_I2C_SW or by the B2_I2C_CTRL ++ and B2_I2C_DATA registers. ++ For driver software it is recommended to use the TWSI control and ++ data register, because TWSI bus timing is done by the ASIC and ++ an interrupt may be received when the TWSI request is completed. ++ ++ Clock Rate Timing: MIN MAX generated by ++ VPD EEPROM: 50 kHz 100 kHz HW ++ LM80 over TWSI Ctrl/Data reg. 50 kHz 100 kHz HW ++ LM80 over B2_I2C_SW register 0 400 kHz SW ++ ++ Note: The clock generated by the hardware is dependend on the ++ PCI clock. If the PCI bus clock is 33 MHz, the I2C/VPD ++ clock is 50 kHz. ++ */ ++intro() ++{} ++#endif ++ ++#ifdef SK_DIAG ++/* ++ * TWSI Fast Mode timing values used by the LM80. ++ * If new devices are added to the TWSI bus the timing values have to be checked. ++ */ ++#ifndef I2C_SLOW_TIMING ++#define T_CLK_LOW 1300L /* clock low time in ns */ ++#define T_CLK_HIGH 600L /* clock high time in ns */ ++#define T_DATA_IN_SETUP 100L /* data in Set-up Time */ ++#define T_START_HOLD 600L /* start condition hold time */ ++#define T_START_SETUP 600L /* start condition Set-up time */ ++#define T_STOP_SETUP 600L /* stop condition Set-up time */ ++#define T_BUS_IDLE 1300L /* time the bus must free after Tx */ ++#define T_CLK_2_DATA_OUT 900L /* max. clock low to data output valid */ ++#else /* I2C_SLOW_TIMING */ ++/* TWSI Standard Mode Timing */ ++#define T_CLK_LOW 4700L /* clock low time in ns */ ++#define T_CLK_HIGH 4000L /* clock high time in ns */ ++#define T_DATA_IN_SETUP 250L /* data in Set-up Time */ ++#define T_START_HOLD 4000L /* start condition hold time */ ++#define T_START_SETUP 4700L /* start condition Set-up time */ ++#define T_STOP_SETUP 4000L /* stop condition Set-up time */ ++#define T_BUS_IDLE 4700L /* time the bus must free after Tx */ ++#endif /* !I2C_SLOW_TIMING */ ++ ++#define NS2BCLK(x) (((x)*125)/10000) ++ ++/* ++ * TWSI Wire Operations ++ * ++ * About I2C_CLK_LOW(): ++ * ++ * The Data Direction bit (I2C_DATA_DIR) has to be set to input when setting ++ * clock to low, to prevent the ASIC and the TWSI data client from driving the ++ * serial data line simultaneously (ASIC: last bit of a byte = '1', TWSI client ++ * send an 'ACK'). See also Concentrator Bugreport No. 10192. ++ */ ++#define I2C_DATA_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA) ++#define I2C_DATA_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA) ++#define I2C_DATA_OUT(IoC) SK_I2C_SET_BIT(IoC, I2C_DATA_DIR) ++#define I2C_DATA_IN(IoC) SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA) ++#define I2C_CLK_HIGH(IoC) SK_I2C_SET_BIT(IoC, I2C_CLK) ++#define I2C_CLK_LOW(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK | I2C_DATA_DIR) ++#define I2C_START_COND(IoC) SK_I2C_CLR_BIT(IoC, I2C_CLK) ++ ++#define NS2CLKT(x) ((x*125L)/10000) ++ ++/*--------------- TWSI Interface Register Functions --------------- */ ++ ++/* ++ * sending one bit ++ */ ++void SkI2cSndBit( ++SK_IOC IoC, /* I/O Context */ ++SK_U8 Bit) /* Bit to send */ ++{ ++ I2C_DATA_OUT(IoC); ++ if (Bit) { ++ I2C_DATA_HIGH(IoC); ++ } ++ else { ++ I2C_DATA_LOW(IoC); ++ } ++ SkDgWaitTime(IoC, NS2BCLK(T_DATA_IN_SETUP)); ++ I2C_CLK_HIGH(IoC); ++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH)); ++ I2C_CLK_LOW(IoC); ++} /* SkI2cSndBit*/ ++ ++ ++/* ++ * Signal a start to the TWSI Bus. ++ * ++ * A start is signaled when data goes to low in a high clock cycle. ++ * ++ * Ends with Clock Low. ++ * ++ * Status: not tested ++ */ ++void SkI2cStart( ++SK_IOC IoC) /* I/O Context */ ++{ ++ /* Init data and Clock to output lines */ ++ /* Set Data high */ ++ I2C_DATA_OUT(IoC); ++ I2C_DATA_HIGH(IoC); ++ /* Set Clock high */ ++ I2C_CLK_HIGH(IoC); ++ ++ SkDgWaitTime(IoC, NS2BCLK(T_START_SETUP)); ++ ++ /* Set Data Low */ ++ I2C_DATA_LOW(IoC); ++ ++ SkDgWaitTime(IoC, NS2BCLK(T_START_HOLD)); ++ ++ /* Clock low without Data to Input */ ++ I2C_START_COND(IoC); ++ ++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW)); ++} /* SkI2cStart */ ++ ++ ++void SkI2cStop( ++SK_IOC IoC) /* I/O Context */ ++{ ++ /* Init data and Clock to output lines */ ++ /* Set Data low */ ++ I2C_DATA_OUT(IoC); ++ I2C_DATA_LOW(IoC); ++ ++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT)); ++ ++ /* Set Clock high */ ++ I2C_CLK_HIGH(IoC); ++ ++ SkDgWaitTime(IoC, NS2BCLK(T_STOP_SETUP)); ++ ++ /* ++ * Set Data High: Do it by setting the Data Line to Input. ++ * Because of a pull up resistor the Data Line ++ * floods to high. ++ */ ++ I2C_DATA_IN(IoC); ++ ++ /* ++ * When TWSI activity is stopped ++ * o DATA should be set to input and ++ * o CLOCK should be set to high! ++ */ ++ SkDgWaitTime(IoC, NS2BCLK(T_BUS_IDLE)); ++} /* SkI2cStop */ ++ ++ ++/* ++ * Receive just one bit via the TWSI bus. ++ * ++ * Note: Clock must be set to LOW before calling this function. ++ * ++ * Returns The received bit. ++ */ ++int SkI2cRcvBit( ++SK_IOC IoC) /* I/O Context */ ++{ ++ int Bit; ++ SK_U8 I2cSwCtrl; ++ ++ /* Init data as input line */ ++ I2C_DATA_IN(IoC); ++ ++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_2_DATA_OUT)); ++ ++ I2C_CLK_HIGH(IoC); ++ ++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_HIGH)); ++ ++ SK_I2C_GET_SW(IoC, &I2cSwCtrl); ++ ++ Bit = (I2cSwCtrl & I2C_DATA) ? 1 : 0; ++ ++ I2C_CLK_LOW(IoC); ++ SkDgWaitTime(IoC, NS2BCLK(T_CLK_LOW-T_CLK_2_DATA_OUT)); ++ ++ return(Bit); ++} /* SkI2cRcvBit */ ++ ++ ++/* ++ * Receive an ACK. ++ * ++ * returns 0 If acknowledged ++ * 1 in case of an error ++ */ ++int SkI2cRcvAck( ++SK_IOC IoC) /* I/O Context */ ++{ ++ /* ++ * Received bit must be zero. ++ */ ++ return(SkI2cRcvBit(IoC) != 0); ++} /* SkI2cRcvAck */ ++ ++ ++/* ++ * Send an NACK. ++ */ ++void SkI2cSndNAck( ++SK_IOC IoC) /* I/O Context */ ++{ ++ /* ++ * Received bit must be zero. ++ */ ++ SkI2cSndBit(IoC, 1); ++} /* SkI2cSndNAck */ ++ ++ ++/* ++ * Send an ACK. ++ */ ++void SkI2cSndAck( ++SK_IOC IoC) /* I/O Context */ ++{ ++ /* ++ * Received bit must be zero. ++ */ ++ SkI2cSndBit(IoC, 0); ++} /* SkI2cSndAck */ ++ ++ ++/* ++ * Send one byte to the TWSI device and wait for ACK. ++ * ++ * Return acknowleged status. ++ */ ++int SkI2cSndByte( ++SK_IOC IoC, /* I/O Context */ ++int Byte) /* byte to send */ ++{ ++ int i; ++ ++ for (i = 0; i < 8; i++) { ++ if (Byte & (1<<(7-i))) { ++ SkI2cSndBit(IoC, 1); ++ } ++ else { ++ SkI2cSndBit(IoC, 0); ++ } ++ } ++ ++ return(SkI2cRcvAck(IoC)); ++} /* SkI2cSndByte */ ++ ++ ++/* ++ * Receive one byte and ack it. ++ * ++ * Return byte. ++ */ ++int SkI2cRcvByte( ++SK_IOC IoC, /* I/O Context */ ++int Last) /* Last Byte Flag */ ++{ ++ int i; ++ int Byte = 0; ++ ++ for (i = 0; i < 8; i++) { ++ Byte <<= 1; ++ Byte |= SkI2cRcvBit(IoC); ++ } ++ ++ if (Last) { ++ SkI2cSndNAck(IoC); ++ } ++ else { ++ SkI2cSndAck(IoC); ++ } ++ ++ return(Byte); ++} /* SkI2cRcvByte */ ++ ++ ++/* ++ * Start dialog and send device address ++ * ++ * Return 0 if acknowleged, 1 in case of an error ++ */ ++int SkI2cSndDev( ++SK_IOC IoC, /* I/O Context */ ++int Addr, /* Device Address */ ++int Rw) /* Read / Write Flag */ ++{ ++ SkI2cStart(IoC); ++ Rw = ~Rw; ++ Rw &= I2C_WRITE; ++ return(SkI2cSndByte(IoC, (Addr << 1) | Rw)); ++} /* SkI2cSndDev */ ++ ++#endif /* SK_DIAG */ ++ ++/*----------------- TWSI CTRL Register Functions ----------*/ ++ ++/* ++ * waits for a completion of an TWSI transfer ++ * ++ * returns 0: success, transfer completes ++ * 1: error, transfer does not complete, TWSI transfer ++ * killed, wait loop terminated. ++ */ ++int SkI2cWait( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int Event) /* complete event to wait for (I2C_READ or I2C_WRITE) */ ++{ ++ SK_U64 StartTime; ++ SK_U64 CurrentTime; ++ SK_U32 I2cCtrl; ++ ++ StartTime = SkOsGetTime(pAC); ++ ++ do { ++ CurrentTime = SkOsGetTime(pAC); ++ ++ if (CurrentTime - StartTime > SK_TICKS_PER_SEC / 8) { ++ ++ SK_I2C_STOP(IoC); ++#ifndef SK_DIAG ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E002, SKERR_I2C_E002MSG); ++#endif /* !SK_DIAG */ ++ return(1); ++ } ++ ++ SK_I2C_GET_CTL(IoC, &I2cCtrl); ++ ++#ifdef xYUKON_DBG ++ printf("StartTime=%lu, CurrentTime=%lu\n", ++ StartTime, CurrentTime); ++ if (kbhit()) { ++ return(1); ++ } ++#endif /* YUKON_DBG */ ++ ++ } while ((I2cCtrl & I2C_FLAG) == (SK_U32)Event << 31); ++ ++ return(0); ++} /* SkI2cWait */ ++ ++ ++/* ++ * waits for a completion of an TWSI transfer ++ * ++ * Returns ++ * Nothing ++ */ ++void SkI2cWaitIrq( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ ++{ ++ SK_SENSOR *pSen; ++ SK_U64 StartTime; ++ SK_U32 IrqSrc; ++ SK_U32 IsTwsiReadyBit; ++ ++ IsTwsiReadyBit = CHIP_ID_YUKON_2(pAC) ? Y2_IS_TWSI_RDY : IS_I2C_READY; ++ ++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; ++ ++ if (pSen->SenState == SK_SEN_IDLE) { ++ return; ++ } ++ ++ StartTime = SkOsGetTime(pAC); ++ ++ do { ++ if (SkOsGetTime(pAC) - StartTime > SK_TICKS_PER_SEC / 8) { ++ ++ SK_I2C_STOP(IoC); ++#ifndef SK_DIAG ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E016, SKERR_I2C_E016MSG); ++#endif /* !SK_DIAG */ ++ return; ++ } ++ ++ SK_IN32(IoC, B0_ISRC, &IrqSrc); ++ ++ } while ((IrqSrc & IsTwsiReadyBit) == 0); ++ ++ pSen->SenState = SK_SEN_IDLE; ++ return; ++} /* SkI2cWaitIrq */ ++ ++/* ++ * writes a single byte or 4 bytes into the TWSI device ++ * ++ * returns 0: success ++ * 1: error ++ */ ++int SkI2cWrite( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++SK_U32 I2cData, /* TWSI Data to write */ ++int I2cDev, /* TWSI Device Address */ ++int I2cDevSize, /* TWSI Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */ ++int I2cReg, /* TWSI Device Register Address */ ++int I2cBurst) /* TWSI Burst Flag */ ++{ ++ SK_OUT32(IoC, B2_I2C_DATA, I2cData); ++ ++ SK_I2C_CTL(IoC, I2C_WRITE, I2cDev, I2cDevSize, I2cReg, I2cBurst); ++ ++ return(SkI2cWait(pAC, IoC, I2C_WRITE)); ++} /* SkI2cWrite*/ ++ ++ ++#ifdef SK_DIAG ++/* ++ * reads a single byte or 4 bytes from the TWSI device ++ * ++ * returns the word read ++ */ ++SK_U32 SkI2cRead( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++int I2cDev, /* TWSI Device Address */ ++int I2cDevSize, /* TWSI Device Size (e.g. I2C_025K_DEV or I2C_2K_DEV) */ ++int I2cReg, /* TWSI Device Register Address */ ++int I2cBurst) /* TWSI Burst Flag */ ++{ ++ SK_U32 Data; ++ ++ SK_OUT32(IoC, B2_I2C_DATA, 0); ++ SK_I2C_CTL(IoC, I2C_READ, I2cDev, I2cDevSize, I2cReg, I2cBurst); ++ ++ if (SkI2cWait(pAC, IoC, I2C_READ) != 0) { ++ w_print("%s\n", SKERR_I2C_E002MSG); ++ } ++ ++ SK_IN32(IoC, B2_I2C_DATA, &Data); ++ ++ return(Data); ++} /* SkI2cRead */ ++#endif /* SK_DIAG */ ++ ++ ++/* ++ * read a sensor's value ++ * ++ * This function reads a sensor's value from the TWSI sensor chip. The sensor ++ * is defined by its index into the sensors database in the struct pAC points ++ * to. ++ * Returns ++ * 1 if the read is completed ++ * 0 if the read must be continued (TWSI Bus still allocated) ++ */ ++int SkI2cReadSensor( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++SK_SENSOR *pSen) /* Sensor to be read */ ++{ ++ if (pSen->SenRead != NULL) { ++ return((*pSen->SenRead)(pAC, IoC, pSen)); ++ } ++ ++ return(0); /* no success */ ++} /* SkI2cReadSensor */ ++ ++/* ++ * Do the Init state 0 initialization ++ */ ++static int SkI2cInit0( ++SK_AC *pAC) /* Adapter Context */ ++{ ++ int i; ++ SK_SENSOR *pSen; ++ ++ /* Begin with first sensor */ ++ pAC->I2c.CurrSens = 0; ++ ++ /* Begin with timeout control for state machine */ ++ pAC->I2c.TimerMode = SK_TIMER_WATCH_SM; ++ ++ /* Set sensor number to zero */ ++ pAC->I2c.MaxSens = 0; ++ ++#ifndef SK_DIAG ++ /* Initialize Number of Dummy Reads */ ++ pAC->I2c.DummyReads = SK_MAX_SENSORS; ++#endif /* !SK_DIAG */ ++ ++ for (i = 0; i < SK_MAX_SENSORS; i++) { ++ pSen = &pAC->I2c.SenTable[i]; ++ ++ pSen->SenDesc = "unknown"; ++ pSen->SenType = SK_SEN_UNKNOWN; ++ pSen->SenThreErrHigh = 0; ++ pSen->SenThreErrLow = 0; ++ pSen->SenThreWarnHigh = 0; ++ pSen->SenThreWarnLow = 0; ++ pSen->SenReg = LM80_FAN2_IN; ++ pSen->SenInit = SK_SEN_DYN_INIT_NONE; ++ pSen->SenValue = 0; ++ pSen->SenErrFlag = SK_SEN_ERR_NOT_PRESENT; ++ pSen->SenErrCts = 0; ++ pSen->SenBegErrTS = 0; ++ pSen->SenState = SK_SEN_IDLE; ++ pSen->SenRead = NULL; ++ pSen->SenDev = 0; ++ } ++ ++ /* Now we are "INIT data"ed */ ++ pAC->I2c.InitLevel = SK_INIT_DATA; ++ return(0); ++} /* SkI2cInit0*/ ++ ++ ++/* ++ * Do the init state 1 initialization ++ * ++ * initialize the following register of the LM80: ++ * Configuration register: ++ * - START, noINT, activeLOW, noINT#Clear, noRESET, noCI, noGPO#, noINIT ++ * ++ * Interrupt Mask Register 1: ++ * - all interrupts are Disabled (0xff) ++ * ++ * Interrupt Mask Register 2: ++ * - all interrupts are Disabled (0xff) Interrupt modi doesn't matter. ++ * ++ * Fan Divisor/RST_OUT register: ++ * - Divisors set to 1 (bits 00), all others 0s. ++ * ++ * OS# Configuration/Temperature resolution Register: ++ * - all 0s ++ * ++ */ ++static int SkI2cInit1( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ ++{ ++ int i; ++ SK_U8 I2cSwCtrl; ++ SK_GEPORT *pPrt; /* GIni Port struct pointer */ ++ SK_SENSOR *pSen; ++ ++ if (pAC->I2c.InitLevel != SK_INIT_DATA) { ++ /* Re-init not needed in TWSI module */ ++ return(0); ++ } ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC || ++ pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* No sensors on Yukon-EC and Yukon-FE */ ++ return(0); ++ } ++ ++ /* Set the Direction of TWSI-Data Pin to IN */ ++ SK_I2C_CLR_BIT(IoC, I2C_DATA_DIR | I2C_DATA); ++ /* Check for 32-Bit Yukon with Low at TWSI-Data Pin */ ++ SK_I2C_GET_SW(IoC, &I2cSwCtrl); ++ ++ if ((I2cSwCtrl & I2C_DATA) == 0) { ++ /* this is a 32-Bit board */ ++ pAC->GIni.GIYukon32Bit = SK_TRUE; ++ return(0); ++ } ++ ++ /* Check for 64 Bit Yukon without sensors */ ++ if (SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_CFG, 0) != 0) { ++ return(0); ++ } ++ ++ (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_1, 0); ++ ++ (void)SkI2cWrite(pAC, IoC, 0xffUL, LM80_ADDR, I2C_025K_DEV, LM80_IMSK_2, 0); ++ ++ (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_FAN_CTRL, 0); ++ ++ (void)SkI2cWrite(pAC, IoC, 0, LM80_ADDR, I2C_025K_DEV, LM80_TEMP_CTRL, 0); ++ ++ (void)SkI2cWrite(pAC, IoC, (SK_U32)LM80_CFG_START, LM80_ADDR, I2C_025K_DEV, ++ LM80_CFG, 0); ++ ++ /* ++ * MaxSens has to be updated here, because PhyType is not ++ * set when performing Init Level 0 ++ */ ++ pAC->I2c.MaxSens = 5; ++ ++ pPrt = &pAC->GIni.GP[0]; ++ ++ if (pAC->GIni.GIGenesis) { ++ if (pPrt->PhyType == SK_PHY_BCOM) { ++ if (pAC->GIni.GIMacsFound == 1) { ++ pAC->I2c.MaxSens += 1; ++ } ++ else { ++ pAC->I2c.MaxSens += 3; ++ } ++ } ++ } ++ else { ++ pAC->I2c.MaxSens += 3; ++ } ++ ++ for (i = 0; i < pAC->I2c.MaxSens; i++) { ++ pSen = &pAC->I2c.SenTable[i]; ++ switch (i) { ++ case 0: ++ pSen->SenDesc = "Temperature"; ++ pSen->SenType = SK_SEN_TEMP; ++ pSen->SenThreErrHigh = SK_SEN_TEMP_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_TEMP_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_TEMP_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_TEMP_LOW_ERR; ++ pSen->SenReg = LM80_TEMP_IN; ++ break; ++ case 1: ++ pSen->SenDesc = "Voltage PCI"; ++ pSen->SenType = SK_SEN_VOLT; ++ pSen->SenThreErrHigh = SK_SEN_PCI_5V_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_PCI_5V_HIGH_WARN; ++ if (pAC->GIni.GIPciBus != SK_PEX_BUS) { ++ pSen->SenThreWarnLow = SK_SEN_PCI_5V_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PCI_5V_LOW_ERR; ++ } ++ else { ++ pSen->SenThreWarnLow = 0; ++ pSen->SenThreErrLow = 0; ++ } ++ pSen->SenReg = LM80_VT0_IN; ++ break; ++ case 2: ++ pSen->SenDesc = "Voltage PCI-IO"; ++ pSen->SenType = SK_SEN_VOLT; ++ pSen->SenThreErrHigh = SK_SEN_PCI_IO_5V_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_PCI_IO_5V_HIGH_WARN; ++ if (pAC->GIni.GIPciBus != SK_PEX_BUS) { ++ pSen->SenThreWarnLow = SK_SEN_PCI_IO_3V3_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PCI_IO_3V3_LOW_ERR; ++ } ++ else { ++ pSen->SenThreWarnLow = 0; ++ pSen->SenThreErrLow = 0; ++ } ++ pSen->SenReg = LM80_VT1_IN; ++ pSen->SenInit = SK_SEN_DYN_INIT_PCI_IO; ++ break; ++ case 3: ++ if (pAC->GIni.GIGenesis) { ++ pSen->SenDesc = "Voltage ASIC"; ++ } ++ else { ++ pSen->SenDesc = "Voltage VMAIN"; ++ } ++ pSen->SenType = SK_SEN_VOLT; ++ pSen->SenThreErrHigh = SK_SEN_VDD_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_VDD_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_VDD_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_VDD_LOW_ERR; ++ pSen->SenReg = LM80_VT2_IN; ++ break; ++ case 4: ++ if (pAC->GIni.GIGenesis) { ++ if (pPrt->PhyType == SK_PHY_BCOM) { ++ pSen->SenDesc = "Voltage PHY A PLL"; ++ pSen->SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR; ++ } ++ else { ++ pSen->SenDesc = "Voltage PMA"; ++ pSen->SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR; ++ } ++ } ++ else { ++ pSen->SenDesc = "Voltage VAUX"; ++ pSen->SenThreErrHigh = SK_SEN_VAUX_3V3_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_VAUX_3V3_HIGH_WARN; ++ if (pAC->GIni.GIVauxAvail) { ++ pSen->SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR; ++ } ++ else { ++ pSen->SenThreErrLow = 0; ++ pSen->SenThreWarnLow = 0; ++ } ++ } ++ pSen->SenType = SK_SEN_VOLT; ++ pSen->SenReg = LM80_VT3_IN; ++ break; ++ case 5: ++ if (CHIP_ID_YUKON_2(pAC)) { ++ if (pAC->GIni.GIChipRev == 0) { ++ pSen->SenDesc = "Voltage Core 1V3"; ++ pSen->SenThreErrHigh = SK_SEN_CORE_1V3_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V3_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_CORE_1V3_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_CORE_1V3_LOW_ERR; ++ } ++ else { ++ pSen->SenDesc = "Voltage Core 1V2"; ++ pSen->SenThreErrHigh = SK_SEN_CORE_1V2_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V2_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_CORE_1V2_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_CORE_1V2_LOW_ERR; ++ } ++ } ++ else { ++ if (pAC->GIni.GIGenesis) { ++ pSen->SenDesc = "Voltage PHY 2V5"; ++ pSen->SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR; ++ } ++ else { ++ pSen->SenDesc = "Voltage Core 1V5"; ++ pSen->SenThreErrHigh = SK_SEN_CORE_1V5_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V5_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_CORE_1V5_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_CORE_1V5_LOW_ERR; ++ } ++ } ++ pSen->SenType = SK_SEN_VOLT; ++ pSen->SenReg = LM80_VT4_IN; ++ break; ++ case 6: ++ if (CHIP_ID_YUKON_2(pAC)) { ++ pSen->SenDesc = "Voltage PHY 1V5"; ++ pSen->SenThreErrHigh = SK_SEN_CORE_1V5_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_CORE_1V5_HIGH_WARN; ++ if (pAC->GIni.GIPciBus == SK_PEX_BUS) { ++ pSen->SenThreWarnLow = SK_SEN_CORE_1V5_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_CORE_1V5_LOW_ERR; ++ } ++ else { ++ pSen->SenThreWarnLow = 0; ++ pSen->SenThreErrLow = 0; ++ } ++ } ++ else { ++ if (pAC->GIni.GIGenesis) { ++ pSen->SenDesc = "Voltage PHY B PLL"; ++ } ++ else { ++ pSen->SenDesc = "Voltage PHY 3V3"; ++ } ++ pSen->SenThreErrHigh = SK_SEN_PLL_3V3_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_PLL_3V3_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_PLL_3V3_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PLL_3V3_LOW_ERR; ++ } ++ pSen->SenType = SK_SEN_VOLT; ++ pSen->SenReg = LM80_VT5_IN; ++ break; ++ case 7: ++ if (pAC->GIni.GIGenesis) { ++ pSen->SenDesc = "Speed Fan"; ++ pSen->SenType = SK_SEN_FAN; ++ pSen->SenThreErrHigh = SK_SEN_FAN_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_FAN_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_FAN_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_FAN_LOW_ERR; ++ pSen->SenReg = LM80_FAN2_IN; ++ } ++ else { ++ pSen->SenDesc = "Voltage PHY 2V5"; ++ pSen->SenType = SK_SEN_VOLT; ++ pSen->SenThreErrHigh = SK_SEN_PHY_2V5_HIGH_ERR; ++ pSen->SenThreWarnHigh = SK_SEN_PHY_2V5_HIGH_WARN; ++ pSen->SenThreWarnLow = SK_SEN_PHY_2V5_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PHY_2V5_LOW_ERR; ++ pSen->SenReg = LM80_VT6_IN; ++ } ++ break; ++ default: ++ SK_ERR_LOG(pAC, SK_ERRCL_INIT | SK_ERRCL_SW, ++ SKERR_I2C_E001, SKERR_I2C_E001MSG); ++ break; ++ } ++ ++ pSen->SenValue = 0; ++ pSen->SenErrFlag = SK_SEN_ERR_OK; ++ pSen->SenErrCts = 0; ++ pSen->SenBegErrTS = 0; ++ pSen->SenState = SK_SEN_IDLE; ++ if (pSen->SenThreWarnLow != 0) { ++ pSen->SenRead = SkLm80ReadSensor; ++ } ++ pSen->SenDev = LM80_ADDR; ++ } ++ ++#ifndef SK_DIAG ++ pAC->I2c.DummyReads = pAC->I2c.MaxSens; ++#endif /* !SK_DIAG */ ++ ++ /* Clear TWSI IRQ */ ++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); ++ ++ /* Now we are I/O initialized */ ++ pAC->I2c.InitLevel = SK_INIT_IO; ++ return(0); ++} /* SkI2cInit1 */ ++ ++ ++/* ++ * Init level 2: Start first sensor read. ++ */ ++static int SkI2cInit2( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ ++{ ++ int ReadComplete; ++ SK_SENSOR *pSen; ++ ++ if (pAC->I2c.InitLevel != SK_INIT_IO) { ++ /* ReInit not needed in TWSI module */ ++ /* Init0 and Init2 not permitted */ ++ return(0); ++ } ++ ++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; ++ ++ ReadComplete = SkI2cReadSensor(pAC, IoC, pSen); ++ ++ if (ReadComplete) { ++ SK_ERR_LOG(pAC, SK_ERRCL_INIT, SKERR_I2C_E008, SKERR_I2C_E008MSG); ++ } ++ ++ /* Now we are correctly initialized */ ++ pAC->I2c.InitLevel = SK_INIT_RUN; ++ ++ return(0); ++} /* SkI2cInit2*/ ++ ++ ++/* ++ * Initialize TWSI devices ++ * ++ * Get the first voltage value and discard it. ++ * Go into temperature read mode. A default pointer is not set. ++ * ++ * The things to be done depend on the init level in the parameter list: ++ * Level 0: ++ * Initialize only the data structures. Do NOT access hardware. ++ * Level 1: ++ * Initialize hardware through SK_IN / SK_OUT commands. Do NOT use interrupts. ++ * Level 2: ++ * Everything is possible. Interrupts may be used from now on. ++ * ++ * return: ++ * 0 = success ++ * other = error. ++ */ ++int SkI2cInit( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context needed in levels 1 and 2 */ ++int Level) /* Init Level */ ++{ ++ ++ switch (Level) { ++ case SK_INIT_DATA: ++ return(SkI2cInit0(pAC)); ++ case SK_INIT_IO: ++ return(SkI2cInit1(pAC, IoC)); ++ case SK_INIT_RUN: ++ return(SkI2cInit2(pAC, IoC)); ++ default: ++ break; ++ } ++ ++ return(0); ++} /* SkI2cInit */ ++ ++ ++#ifndef SK_DIAG ++/* ++ * Interrupt service function for the TWSI Interface ++ * ++ * Clears the Interrupt source ++ * ++ * Reads the register and check it for sending a trap. ++ * ++ * Starts the timer if necessary. ++ */ ++void SkI2cIsr( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC) /* I/O Context */ ++{ ++ SK_EVPARA Para; ++ ++ /* Clear TWSI IRQ */ ++ SK_OUT32(IoC, B2_I2C_IRQ, I2C_CLR_IRQ); ++ ++ Para.Para64 = 0; ++ SkEventQueue(pAC, SKGE_I2C, SK_I2CEV_IRQ, Para); ++} /* SkI2cIsr */ ++ ++ ++/* ++ * Check this sensors Value against the threshold and send events. ++ */ ++static void SkI2cCheckSensor( ++SK_AC *pAC, /* Adapter Context */ ++SK_SENSOR *pSen) ++{ ++ SK_EVPARA ParaLocal; ++ SK_BOOL TooHigh; /* Is sensor too high? */ ++ SK_BOOL TooLow; /* Is sensor too low? */ ++ SK_U64 CurrTime; /* Current Time */ ++ SK_BOOL DoTrapSend; /* We need to send a trap */ ++ SK_BOOL DoErrLog; /* We need to log the error */ ++ SK_BOOL IsError; /* Error occured */ ++ ++ /* Check Dummy Reads first */ ++ if (pAC->I2c.DummyReads > 0) { ++ pAC->I2c.DummyReads--; ++ return; ++ } ++ ++ /* Get the current time */ ++ CurrTime = SkOsGetTime(pAC); ++ ++ /* Set para to the most useful setting: The current sensor. */ ++ ParaLocal.Para64 = (SK_U64)pAC->I2c.CurrSens; ++ ++ /* Check the Value against the thresholds. First: Error Thresholds */ ++ TooHigh = pSen->SenValue > pSen->SenThreErrHigh; ++ TooLow = pSen->SenValue < pSen->SenThreErrLow; ++ ++ IsError = SK_FALSE; ++ ++ if (TooHigh || TooLow) { ++ /* Error condition is satisfied */ ++ DoTrapSend = SK_TRUE; ++ DoErrLog = SK_TRUE; ++ ++ /* Now error condition is satisfied */ ++ IsError = SK_TRUE; ++ ++ if (pSen->SenErrFlag == SK_SEN_ERR_ERR) { ++ /* This state is the former one */ ++ ++ /* So check first whether we have to send a trap */ ++ if (pSen->SenLastErrTrapTS + SK_SEN_ERR_TR_HOLD > CurrTime) { ++ /* ++ * Do NOT send the Trap. The hold back time ++ * has to run out first. ++ */ ++ DoTrapSend = SK_FALSE; ++ } ++ ++ /* Check now whether we have to log an Error */ ++ if (pSen->SenLastErrLogTS + SK_SEN_ERR_LOG_HOLD > CurrTime) { ++ /* ++ * Do NOT log the error. The hold back time ++ * has to run out first. ++ */ ++ DoErrLog = SK_FALSE; ++ } ++ } ++ else { ++ /* We came from a different state -> Set Begin Time Stamp */ ++ pSen->SenBegErrTS = CurrTime; ++ pSen->SenErrFlag = SK_SEN_ERR_ERR; ++ } ++ ++ if (DoTrapSend) { ++ /* Set current Time */ ++ pSen->SenLastErrTrapTS = CurrTime; ++ pSen->SenErrCts++; ++ ++ /* Queue PNMI Event */ ++ SkEventQueue(pAC, SKGE_PNMI, TooHigh ? ++ SK_PNMI_EVT_SEN_ERR_UPP : SK_PNMI_EVT_SEN_ERR_LOW, ++ ParaLocal); ++ } ++ ++ if (DoErrLog) { ++ /* Set current Time */ ++ pSen->SenLastErrLogTS = CurrTime; ++ ++ if (pSen->SenType == SK_SEN_TEMP) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E011, SKERR_I2C_E011MSG); ++ } ++ else if (pSen->SenType == SK_SEN_VOLT) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E012, SKERR_I2C_E012MSG); ++ } ++ else { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E015, SKERR_I2C_E015MSG); ++ } ++ } ++ } ++ ++ /* Check the Value against the thresholds */ ++ /* 2nd: Warning thresholds */ ++ TooHigh = pSen->SenValue > pSen->SenThreWarnHigh; ++ TooLow = pSen->SenValue < pSen->SenThreWarnLow; ++ ++ if (!IsError && (TooHigh || TooLow)) { ++ /* Error condition is satisfied */ ++ DoTrapSend = SK_TRUE; ++ DoErrLog = SK_TRUE; ++ ++ if (pSen->SenErrFlag == SK_SEN_ERR_WARN) { ++ /* This state is the former one */ ++ ++ /* So check first whether we have to send a trap */ ++ if (pSen->SenLastWarnTrapTS + SK_SEN_WARN_TR_HOLD > CurrTime) { ++ /* ++ * Do NOT send the Trap. The hold back time ++ * has to run out first. ++ */ ++ DoTrapSend = SK_FALSE; ++ } ++ ++ /* Check now whether we have to log an Error */ ++ if (pSen->SenLastWarnLogTS + SK_SEN_WARN_LOG_HOLD > CurrTime) { ++ /* ++ * Do NOT log the error. The hold back time ++ * has to run out first. ++ */ ++ DoErrLog = SK_FALSE; ++ } ++ } ++ else { ++ /* We came from a different state -> Set Begin Time Stamp */ ++ pSen->SenBegWarnTS = CurrTime; ++ pSen->SenErrFlag = SK_SEN_ERR_WARN; ++ } ++ ++ if (DoTrapSend) { ++ /* Set current Time */ ++ pSen->SenLastWarnTrapTS = CurrTime; ++ pSen->SenWarnCts++; ++ ++ /* Queue PNMI Event */ ++ SkEventQueue(pAC, SKGE_PNMI, TooHigh ? ++ SK_PNMI_EVT_SEN_WAR_UPP : SK_PNMI_EVT_SEN_WAR_LOW, ParaLocal); ++ } ++ ++ if (DoErrLog) { ++ /* Set current Time */ ++ pSen->SenLastWarnLogTS = CurrTime; ++ ++ if (pSen->SenType == SK_SEN_TEMP) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E009, SKERR_I2C_E009MSG); ++ } ++ else if (pSen->SenType == SK_SEN_VOLT) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E010, SKERR_I2C_E010MSG); ++ } ++ else { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E014, SKERR_I2C_E014MSG); ++ } ++ } ++ } ++ ++ /* Check for NO error at all */ ++ if (!IsError && !TooHigh && !TooLow) { ++ /* Set o.k. Status if no error and no warning condition */ ++ pSen->SenErrFlag = SK_SEN_ERR_OK; ++ } ++ ++ /* End of check against the thresholds */ ++ ++ if (pSen->SenInit == SK_SEN_DYN_INIT_PCI_IO) { ++ /* Bug fix AF: 16.Aug.2001: Correct the init base of LM80 sensor */ ++ pSen->SenInit = SK_SEN_DYN_INIT_NONE; ++ ++ if (pSen->SenValue > SK_SEN_PCI_IO_RANGE_LIMITER) { ++ /* 5V PCI-IO Voltage */ ++ pSen->SenThreWarnLow = SK_SEN_PCI_IO_5V_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_PCI_IO_5V_LOW_ERR; ++ } ++ else { ++ /* 3.3V PCI-IO Voltage */ ++ pSen->SenThreWarnHigh = SK_SEN_PCI_IO_3V3_HIGH_WARN; ++ pSen->SenThreErrHigh = SK_SEN_PCI_IO_3V3_HIGH_ERR; ++ } ++ } ++ ++#ifdef TEST_ONLY ++ /* Dynamic thresholds also for VAUX of LM80 sensor */ ++ if (pSen->SenInit == SK_SEN_DYN_INIT_VAUX) { ++ ++ pSen->SenInit = SK_SEN_DYN_INIT_NONE; ++ ++ /* 3.3V VAUX Voltage */ ++ if (pSen->SenValue > SK_SEN_VAUX_RANGE_LIMITER) { ++ pSen->SenThreWarnLow = SK_SEN_VAUX_3V3_LOW_WARN; ++ pSen->SenThreErrLow = SK_SEN_VAUX_3V3_LOW_ERR; ++ } ++ /* 0V VAUX Voltage */ ++ else { ++ pSen->SenThreWarnHigh = SK_SEN_VAUX_0V_WARN_ERR; ++ pSen->SenThreErrHigh = SK_SEN_VAUX_0V_WARN_ERR; ++ } ++ } ++ ++ /* Check initialization state: the VIO Thresholds need adaption */ ++ if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN && ++ pSen->SenValue > SK_SEN_WARNLOW2C && ++ pSen->SenValue < SK_SEN_WARNHIGH2) { ++ ++ pSen->SenThreErrLow = SK_SEN_ERRLOW2C; ++ pSen->SenThreWarnLow = SK_SEN_WARNLOW2C; ++ pSen->SenInit = SK_TRUE; ++ } ++ ++ if (!pSen->SenInit && pSen->SenReg == LM80_VT1_IN && ++ pSen->SenValue > SK_SEN_WARNLOW2 && ++ pSen->SenValue < SK_SEN_WARNHIGH2C) { ++ ++ pSen->SenThreErrHigh = SK_SEN_ERRHIGH2C; ++ pSen->SenThreWarnHigh = SK_SEN_WARNHIGH2C; ++ pSen->SenInit = SK_TRUE; ++ } ++#endif ++ ++ if (pSen->SenInit != SK_SEN_DYN_INIT_NONE) { ++ SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_I2C_E013, SKERR_I2C_E013MSG); ++ } ++} /* SkI2cCheckSensor */ ++ ++ ++/* ++ * The only Event to be served is the timeout event ++ * ++ */ ++int SkI2cEvent( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ ++SK_U32 Event, /* Module specific Event */ ++SK_EVPARA Para) /* Event specific Parameter */ ++{ ++ int ReadComplete; ++ SK_SENSOR *pSen; ++ SK_U32 Time; ++ SK_EVPARA ParaLocal; ++ int i; ++ ++ /* New case: no sensors */ ++ if (pAC->I2c.MaxSens == 0) { ++ return(0); ++ } ++ ++ switch (Event) { ++ case SK_I2CEV_IRQ: ++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; ++ ReadComplete = SkI2cReadSensor(pAC, IoC, pSen); ++ ++ if (ReadComplete) { ++ /* Check sensor against defined thresholds */ ++ SkI2cCheckSensor(pAC, pSen); ++ ++ /* Increment Current sensor and set appropriate Timeout */ ++ pAC->I2c.CurrSens++; ++ if (pAC->I2c.CurrSens >= pAC->I2c.MaxSens) { ++ pAC->I2c.CurrSens = 0; ++ Time = SK_I2C_TIM_LONG; ++ } ++ else { ++ Time = SK_I2C_TIM_SHORT; ++ } ++ ++ /* Start Timer */ ++ ParaLocal.Para64 = (SK_U64)0; ++ ++ pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING; ++ ++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time, ++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal); ++ } ++ else { ++ /* Start Timer */ ++ ParaLocal.Para64 = (SK_U64)0; ++ ++ pAC->I2c.TimerMode = SK_TIMER_WATCH_SM; ++ ++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, SK_I2C_TIM_WATCH, ++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal); ++ } ++ break; ++ case SK_I2CEV_TIM: ++ if (pAC->I2c.TimerMode == SK_TIMER_NEW_GAUGING) { ++ ++ ParaLocal.Para64 = (SK_U64)0; ++ SkTimerStop(pAC, IoC, &pAC->I2c.SenTimer); ++ ++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; ++ ReadComplete = SkI2cReadSensor(pAC, IoC, pSen); ++ ++ if (ReadComplete) { ++ /* Check sensor against defined thresholds */ ++ SkI2cCheckSensor(pAC, pSen); ++ ++ /* Increment Current sensor and set appropriate Timeout */ ++ pAC->I2c.CurrSens++; ++ if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) { ++ pAC->I2c.CurrSens = 0; ++ Time = SK_I2C_TIM_LONG; ++ } ++ else { ++ Time = SK_I2C_TIM_SHORT; ++ } ++ ++ /* Start Timer */ ++ ParaLocal.Para64 = (SK_U64)0; ++ ++ pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING; ++ ++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time, ++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal); ++ } ++ } ++ else { ++ pSen = &pAC->I2c.SenTable[pAC->I2c.CurrSens]; ++ pSen->SenErrFlag = SK_SEN_ERR_FAULTY; ++ SK_I2C_STOP(IoC); ++ ++ /* Increment Current sensor and set appropriate Timeout */ ++ pAC->I2c.CurrSens++; ++ if (pAC->I2c.CurrSens == pAC->I2c.MaxSens) { ++ pAC->I2c.CurrSens = 0; ++ Time = SK_I2C_TIM_LONG; ++ } ++ else { ++ Time = SK_I2C_TIM_SHORT; ++ } ++ ++ /* Start Timer */ ++ ParaLocal.Para64 = (SK_U64)0; ++ ++ pAC->I2c.TimerMode = SK_TIMER_NEW_GAUGING; ++ ++ SkTimerStart(pAC, IoC, &pAC->I2c.SenTimer, Time, ++ SKGE_I2C, SK_I2CEV_TIM, ParaLocal); ++ } ++ break; ++ case SK_I2CEV_CLEAR: ++ for (i = 0; i < SK_MAX_SENSORS; i++) { ++ pSen = &pAC->I2c.SenTable[i]; ++ ++ pSen->SenErrFlag = SK_SEN_ERR_OK; ++ pSen->SenErrCts = 0; ++ pSen->SenWarnCts = 0; ++ pSen->SenBegErrTS = 0; ++ pSen->SenBegWarnTS = 0; ++ pSen->SenLastErrTrapTS = (SK_U64)0; ++ pSen->SenLastErrLogTS = (SK_U64)0; ++ pSen->SenLastWarnTrapTS = (SK_U64)0; ++ pSen->SenLastWarnLogTS = (SK_U64)0; ++ } ++ break; ++ default: ++ SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_I2C_E006, SKERR_I2C_E006MSG); ++ } ++ ++ return(0); ++} /* SkI2cEvent*/ ++ ++#endif /* !SK_DIAG */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skvpd.c linux-2.6.9.new/drivers/net/sk98lin/skvpd.c +--- linux-2.6.9.old/drivers/net/sk98lin/skvpd.c 2004-10-19 05:54:32.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skvpd.c 2006-12-07 14:35:03.000000000 +0800 +@@ -1,22 +1,22 @@ + /****************************************************************************** + * + * Name: skvpd.c +- * Project: GEnesis, PCI Gigabit Ethernet Adapter +- * Version: $Revision: 1.37 $ +- * Date: $Date: 2003/01/13 10:42:45 $ +- * Purpose: Shared software to read and write VPD data ++ * Project: Gigabit Ethernet Adapters, VPD-Module ++ * Version: $Revision: 2.6 $ ++ * Date: $Date: 2004/11/02 10:47:39 $ ++ * Purpose: Shared software to read and write VPD + * + ******************************************************************************/ + + /****************************************************************************** + * +- * (C)Copyright 1998-2003 SysKonnect GmbH. ++ * (C)Copyright 1998-2002 SysKonnect. ++ * (C)Copyright 2002-2004 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -25,7 +25,7 @@ + Please refer skvpd.txt for infomation how to include this module + */ + static const char SysKonnectFileId[] = +- "@(#)$Id: skvpd.c,v 1.37 2003/01/13 10:42:45 rschmidt Exp $ (C) SK"; ++ "@(#) $Id: skvpd.c,v 2.6 2004/11/02 10:47:39 rschmidt Exp $ (C) Marvell."; + + #include "h/skdrv1st.h" + #include "h/sktypes.h" +@@ -59,9 +59,10 @@ + SK_U64 start_time; + SK_U16 state; + +- SK_DBG_MSG(pAC,SK_DBGMOD_VPD, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL, + ("VPD wait for %s\n", event?"Write":"Read")); + start_time = SkOsGetTime(pAC); ++ + do { + if (SkOsGetTime(pAC) - start_time > SK_TICKS_PER_SEC) { + +@@ -81,17 +82,18 @@ + ("ERROR:VPD wait timeout\n")); + return(1); + } +- ++ + VPD_IN16(pAC, IoC, PCI_VPD_ADR_REG, &state); +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL, + ("state = %x, event %x\n",state,event)); +- } while((int)(state & PCI_VPD_FLAG) == event); ++ } while ((int)(state & PCI_VPD_FLAG) == event); + + return(0); + } + +-#ifdef SKDIAG ++ ++#ifdef SK_DIAG + + /* + * Read the dword at address 'addr' from the VPD EEPROM. +@@ -124,16 +126,15 @@ + Rtv = 0; + + VPD_IN32(pAC, IoC, PCI_VPD_DAT_REG, &Rtv); +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL, + ("VPD read dword data = 0x%x\n",Rtv)); + return(Rtv); + } ++#endif /* SK_DIAG */ + +-#endif /* SKDIAG */ +- +-#if 0 + ++#ifdef XXX + /* + Write the dword 'data' at address 'addr' into the VPD EEPROM, and + verify that the data is written. +@@ -151,7 +152,6 @@ + . over all 3.8 ms 13.2 ms + . + +- + Returns 0: success + 1: error, I2C transfer does not terminate + 2: error, data verify error +@@ -189,7 +189,8 @@ + return(0); + } /* VpdWriteDWord */ + +-#endif /* 0 */ ++#endif /* XXX */ ++ + + /* + * Read one Stream of 'len' bytes of VPD data, starting at 'addr' from +@@ -215,7 +216,7 @@ + pComp = (SK_U8 *) buf; + + for (i = 0; i < Len; i++, buf++) { +- if ((i%sizeof(SK_U32)) == 0) { ++ if ((i % SZ_LONG) == 0) { + /* + * At the begin of each cycle read the Data Reg + * So it is initialized even if only a few bytes +@@ -233,14 +234,13 @@ + } + } + +- /* Write current Byte */ +- VPD_OUT8(pAC, IoC, PCI_VPD_DAT_REG + (i%sizeof(SK_U32)), +- *(SK_U8*)buf); ++ /* Write current byte */ ++ VPD_OUT8(pAC, IoC, PCI_VPD_DAT_REG + (i % SZ_LONG), *(SK_U8*)buf); + +- if (((i%sizeof(SK_U32)) == 3) || (i == (Len - 1))) { ++ if (((i % SZ_LONG) == 3) || (i == (Len - 1))) { + /* New Address needs to be written to VPD_ADDR reg */ + AdrReg = (SK_U16) Addr; +- Addr += sizeof(SK_U32); ++ Addr += SZ_LONG; + AdrReg |= VPD_WRITE; /* WRITE operation */ + + VPD_OUT16(pAC, IoC, PCI_VPD_ADR_REG, AdrReg); +@@ -250,7 +250,7 @@ + if (Rtv != 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, + ("Write Timed Out\n")); +- return(i - (i%sizeof(SK_U32))); ++ return(i - (i % SZ_LONG)); + } + + /* +@@ -265,18 +265,18 @@ + if (Rtv != 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, + ("Verify Timed Out\n")); +- return(i - (i%sizeof(SK_U32))); ++ return(i - (i % SZ_LONG)); + } + +- for (j = 0; j <= (int)(i%sizeof(SK_U32)); j++, pComp++) { +- ++ for (j = 0; j <= (int)(i % SZ_LONG); j++, pComp++) { ++ + VPD_IN8(pAC, IoC, PCI_VPD_DAT_REG + j, &Data); +- ++ + if (Data != *pComp) { + /* Verify Error */ + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, + ("WriteStream Verify Error\n")); +- return(i - (i%sizeof(SK_U32)) + j); ++ return(i - (i % SZ_LONG) + j); + } + } + } +@@ -284,7 +284,7 @@ + + return(Len); + } +- ++ + + /* + * Read one Stream of 'len' bytes of VPD data, starting at 'addr' from +@@ -304,10 +304,10 @@ + int Rtv; + + for (i = 0; i < Len; i++, buf++) { +- if ((i%sizeof(SK_U32)) == 0) { ++ if ((i % SZ_LONG) == 0) { + /* New Address needs to be written to VPD_ADDR reg */ + AdrReg = (SK_U16) Addr; +- Addr += sizeof(SK_U32); ++ Addr += SZ_LONG; + AdrReg &= ~VPD_WRITE; /* READ operation */ + + VPD_OUT16(pAC, IoC, PCI_VPD_ADR_REG, AdrReg); +@@ -318,13 +318,13 @@ + return(i); + } + } +- VPD_IN8(pAC, IoC, PCI_VPD_DAT_REG + (i%sizeof(SK_U32)), +- (SK_U8 *)buf); ++ VPD_IN8(pAC, IoC, PCI_VPD_DAT_REG + (i % SZ_LONG), (SK_U8 *)buf); + } + + return(Len); + } + ++ + /* + * Read ore writes 'len' bytes of VPD data, starting at 'addr' from + * or to the I2C EEPROM. +@@ -350,14 +350,14 @@ + return(0); + + vpd_rom_size = pAC->vpd.rom_size; +- ++ + if (addr > vpd_rom_size - 4) { + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL, + ("Address error: 0x%x, exp. < 0x%x\n", + addr, vpd_rom_size - 4)); + return(0); + } +- ++ + if (addr + len > vpd_rom_size) { + len = vpd_rom_size - addr; + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, +@@ -374,8 +374,8 @@ + return(Rtv); + } + +-#ifdef SKDIAG + ++#if defined (SK_DIAG) || defined (SK_ASF) + /* + * Read 'len' bytes of VPD data, starting at 'addr'. + * +@@ -391,6 +391,7 @@ + return(VpdTransferBlock(pAC, IoC, buf, addr, len, VPD_READ)); + } + ++ + /* + * Write 'len' bytes of *but to the VPD EEPROM, starting at 'addr'. + * +@@ -405,18 +406,27 @@ + { + return(VpdTransferBlock(pAC, IoC, buf, addr, len, VPD_WRITE)); + } +-#endif /* SKDIAG */ ++#endif /* SK_DIAG */ + +-/* +- * (re)initialize the VPD buffer ++ ++/****************************************************************************** + * +- * Reads the VPD data from the EEPROM into the VPD buffer. +- * Get the remaining read only and read / write space. ++ * VpdInit() - (re)initialize the VPD buffer + * +- * return 0: success +- * 1: fatal VPD error ++ * Description: ++ * Reads the VPD data from the EEPROM into the VPD buffer. ++ * Get the remaining read only and read / write space. ++ * ++ * Note: ++ * This is a local function and should be used locally only. ++ * However, the ASF module needs to use this function also. ++ * Therfore it has been published. ++ * ++ * Returns: ++ * 0: success ++ * 1: fatal VPD error + */ +-static int VpdInit( ++int VpdInit( + SK_AC *pAC, /* Adapters context */ + SK_IOC IoC) /* IO Context */ + { +@@ -427,14 +437,14 @@ + SK_U16 dev_id; + SK_U32 our_reg2; + +- SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_INIT, ("VpdInit .. ")); +- ++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_INIT, ("VpdInit ... ")); ++ + VPD_IN16(pAC, IoC, PCI_DEVICE_ID, &dev_id); +- ++ + VPD_IN32(pAC, IoC, PCI_OUR_REG_2, &our_reg2); +- ++ + pAC->vpd.rom_size = 256 << ((our_reg2 & PCI_VPD_ROM_SZ) >> 14); +- ++ + /* + * this function might get used before the hardware is initialized + * therefore we cannot always trust in GIChipId +@@ -465,19 +475,15 @@ + ("Block Read Error\n")); + return(1); + } +- ++ + pAC->vpd.vpd_size = vpd_size; + + /* Asus K8V Se Deluxe bugfix. Correct VPD content */ +- /* MBo April 2004 */ +- if (((unsigned char)pAC->vpd.vpd_buf[0x3f] == 0x38) && +- ((unsigned char)pAC->vpd.vpd_buf[0x40] == 0x3c) && +- ((unsigned char)pAC->vpd.vpd_buf[0x41] == 0x45)) { +- printk("sk98lin: Asus mainboard with buggy VPD? " +- "Correcting data.\n"); +- pAC->vpd.vpd_buf[0x40] = 0x38; +- } ++ i = 62; ++ if (!SK_STRNCMP(pAC->vpd.vpd_buf + i, " 8vpd.vpd_buf[i + 2] = '8'; ++ } + + /* find the end tag of the RO area */ + if (!(r = vpd_find_para(pAC, VPD_RV, &rp))) { +@@ -485,9 +491,9 @@ + ("Encoding Error: RV Tag not found\n")); + return(1); + } +- ++ + if (r->p_val + r->p_len > pAC->vpd.vpd_buf + vpd_size/2) { +- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_ERR | SK_DBGCAT_FATAL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD,SK_DBGCAT_ERR | SK_DBGCAT_FATAL, + ("Encoding Error: Invalid VPD struct size\n")); + return(1); + } +@@ -497,7 +503,7 @@ + for (i = 0, x = 0; (unsigned)i <= (unsigned)vpd_size/2 - r->p_len; i++) { + x += pAC->vpd.vpd_buf[i]; + } +- ++ + if (x != 0) { + /* checksum error */ + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL, +@@ -511,7 +517,7 @@ + ("Encoding Error: RV Tag not found\n")); + return(1); + } +- ++ + if (r->p_val < pAC->vpd.vpd_buf + vpd_size/2) { + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL, + ("Encoding Error: Invalid VPD struct size\n")); +@@ -531,6 +537,7 @@ + return(0); + } + ++ + /* + * find the Keyword 'key' in the VPD buffer and fills the + * parameter struct 'p' with it's values +@@ -541,7 +548,7 @@ + static SK_VPD_PARA *vpd_find_para( + SK_AC *pAC, /* common data base */ + const char *key, /* keyword to find (e.g. "MN") */ +-SK_VPD_PARA *p) /* parameter description struct */ ++SK_VPD_PARA *p) /* parameter description struct */ + { + char *v ; /* points to VPD buffer */ + int max; /* Maximum Number of Iterations */ +@@ -556,10 +563,10 @@ + if (*v != (char)RES_ID) { + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL, + ("Error: 0x%x missing\n", RES_ID)); +- return NULL; ++ return(0); + } + +- if (strcmp(key, VPD_NAME) == 0) { ++ if (SK_STRCMP(key, VPD_NAME) == 0) { + p->p_len = VPD_GET_RES_LEN(v); + p->p_val = VPD_GET_VAL(v); + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL, +@@ -569,7 +576,7 @@ + + v += 3 + VPD_GET_RES_LEN(v) + 3; + for (;; ) { +- if (SK_MEMCMP(key,v,2) == 0) { ++ if (SK_MEMCMP(key, v, 2) == 0) { + p->p_len = VPD_GET_VPD_LEN(v); + p->p_val = VPD_GET_VAL(v); + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL, +@@ -579,11 +586,11 @@ + + /* exit when reaching the "RW" Tag or the maximum of itera. */ + max--; +- if (SK_MEMCMP(VPD_RW,v,2) == 0 || max == 0) { ++ if (SK_MEMCMP(VPD_RW, v, 2) == 0 || max == 0) { + break; + } + +- if (SK_MEMCMP(VPD_RV,v,2) == 0) { ++ if (SK_MEMCMP(VPD_RV, v, 2) == 0) { + v += 3 + VPD_GET_VPD_LEN(v) + 3; /* skip VPD-W */ + } + else { +@@ -600,9 +607,10 @@ + ("Key/Len Encoding error\n")); + } + #endif /* DEBUG */ +- return NULL; ++ return(0); + } + ++ + /* + * Move 'n' bytes. Begin with the last byte if 'n' is > 0, + * Start with the last byte if n is < 0. +@@ -637,6 +645,7 @@ + } + } + ++ + /* + * setup the VPD keyword 'key' at 'ip'. + * +@@ -653,10 +662,11 @@ + p = (SK_VPD_KEY *) ip; + p->p_key[0] = key[0]; + p->p_key[1] = key[1]; +- p->p_len = (unsigned char) len; +- SK_MEMCPY(&p->p_val,buf,len); ++ p->p_len = (unsigned char)len; ++ SK_MEMCPY(&p->p_val, buf, len); + } + ++ + /* + * Setup the VPD end tag "RV" / "RW". + * Also correct the remaining space variables vpd_free_ro / vpd_free_rw. +@@ -682,7 +692,7 @@ + + if (p->p_key[0] != 'R' || (p->p_key[1] != 'V' && p->p_key[1] != 'W')) { + /* something wrong here, encoding error */ +- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_ERR | SK_DBGCAT_FATAL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL, + ("Encoding Error: invalid end tag\n")); + return(1); + } +@@ -714,6 +724,7 @@ + return(0); + } + ++ + /* + * Insert a VPD keyword into the VPD buffer. + * +@@ -747,11 +758,11 @@ + + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL, + ("VPD setup para key = %s, val = %s\n",key,buf)); +- ++ + vpd_size = pAC->vpd.vpd_size; + + rtv = 0; +- ip = NULL; ++ ip = 0; + if (type == VPD_RW_KEY) { + /* end tag is "RW" */ + free = pAC->vpd.v.vpd_free_rw; +@@ -875,18 +886,18 @@ + } + } + +- if ((signed)strlen(VPD_NAME) + 1 <= *len) { ++ if ((signed)SK_STRLEN(VPD_NAME) + 1 <= *len) { + v = pAC->vpd.vpd_buf; +- strcpy(buf,VPD_NAME); +- n = strlen(VPD_NAME) + 1; ++ SK_STRCPY(buf, VPD_NAME); ++ n = SK_STRLEN(VPD_NAME) + 1; + buf += n; + *elements = 1; + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_RX, +- ("'%c%c' ",v[0],v[1])); ++ ("'%c%c' ", v[0], v[1])); + } + else { + *len = 0; +- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_ERR, ++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, + ("buffer overflow\n")); + return(2); + } +@@ -894,17 +905,17 @@ + v += 3 + VPD_GET_RES_LEN(v) + 3; + for (;; ) { + /* exit when reaching the "RW" Tag */ +- if (SK_MEMCMP(VPD_RW,v,2) == 0) { ++ if (SK_MEMCMP(VPD_RW, v, 2) == 0) { + break; + } + +- if (SK_MEMCMP(VPD_RV,v,2) == 0) { ++ if (SK_MEMCMP(VPD_RV, v, 2) == 0) { + v += 3 + VPD_GET_VPD_LEN(v) + 3; /* skip VPD-W */ + continue; + } + + if (n+3 <= *len) { +- SK_MEMCPY(buf,v,2); ++ SK_MEMCPY(buf, v, 2); + buf += 2; + *buf++ = '\0'; + n += 3; +@@ -991,13 +1002,14 @@ + { + if ((*key != 'Y' && *key != 'V') || + key[1] < '0' || key[1] > 'Z' || +- (key[1] > '9' && key[1] < 'A') || strlen(key) != 2) { ++ (key[1] > '9' && key[1] < 'A') || SK_STRLEN(key) != 2) { + + return(SK_FALSE); + } + return(SK_TRUE); + } + ++ + /* + * Read the contents of the VPD EEPROM and copy it to the VPD + * buffer if not already done. Insert/overwrite the keyword 'key' +@@ -1026,7 +1038,7 @@ + + if ((*key != 'Y' && *key != 'V') || + key[1] < '0' || key[1] > 'Z' || +- (key[1] > '9' && key[1] < 'A') || strlen(key) != 2) { ++ (key[1] > '9' && key[1] < 'A') || SK_STRLEN(key) != 2) { + + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, + ("illegal key tag, keyword not written\n")); +@@ -1042,13 +1054,13 @@ + } + + rtv = 0; +- len = strlen(buf); ++ len = SK_STRLEN(buf); + if (len > VPD_MAX_LEN) { + /* cut it */ + len = VPD_MAX_LEN; + rtv = 2; + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, +- ("keyword too long, cut after %d bytes\n",VPD_MAX_LEN)); ++ ("keyword too long, cut after %d bytes\n", VPD_MAX_LEN)); + } + if ((rtv2 = VpdSetupPara(pAC, key, buf, len, VPD_RW_KEY, OWR_KEY)) != 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, +@@ -1059,6 +1071,7 @@ + return(rtv); + } + ++ + /* + * Read the contents of the VPD EEPROM and copy it to the + * VPD buffer if not already done. Remove the VPD keyword +@@ -1082,7 +1095,7 @@ + + vpd_size = pAC->vpd.vpd_size; + +- SK_DBG_MSG(pAC,SK_DBGMOD_VPD,SK_DBGCAT_TX,("VPD delete key %s\n",key)); ++ SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_TX, ("VPD delete key %s\n", key)); + if ((pAC->vpd.v.vpd_status & VPD_VALID) == 0) { + if (VpdInit(pAC, IoC) != 0) { + SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, +@@ -1119,6 +1132,7 @@ + return(0); + } + ++ + /* + * If the VPD buffer contains valid data write the VPD + * read/write area back to the VPD EEPROM. +@@ -1149,7 +1163,6 @@ + } + + +- + /* + * Read the contents of the VPD EEPROM and copy it to the VPD buffer + * if not already done. If the keyword "VF" is not present it will be +@@ -1178,7 +1191,7 @@ + } + } + +- len = strlen(msg); ++ len = SK_STRLEN(msg); + if (len > VPD_MAX_LEN) { + /* cut it */ + len = VPD_MAX_LEN; +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/skxmac2.c linux-2.6.9.new/drivers/net/sk98lin/skxmac2.c +--- linux-2.6.9.old/drivers/net/sk98lin/skxmac2.c 2004-10-19 05:55:28.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/skxmac2.c 2006-12-07 14:35:03.000000000 +0800 +@@ -2,8 +2,8 @@ + * + * Name: skxmac2.c + * Project: Gigabit Ethernet Adapters, Common Modules +- * Version: $Revision: 1.102 $ +- * Date: $Date: 2003/10/02 16:53:58 $ ++ * Version: $Revision: 2.35 $ ++ * Date: $Date: 2005/05/24 08:35:04 $ + * Purpose: Contains functions to initialize the MACs and PHYs + * + ******************************************************************************/ +@@ -11,13 +11,12 @@ + /****************************************************************************** + * + * (C)Copyright 1998-2002 SysKonnect. +- * (C)Copyright 2002-2003 Marvell. ++ * (C)Copyright 2002-2005 Marvell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +- * + * The information in this file is provided "AS IS" without warranty. + * + ******************************************************************************/ +@@ -37,7 +36,7 @@ + + #if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) + static const char SysKonnectFileId[] = +- "@(#) $Id: skxmac2.c,v 1.102 2003/10/02 16:53:58 rschmidt Exp $ (C) Marvell."; ++ "@(#) $Id: skxmac2.c,v 2.35 2005/05/24 08:35:04 rschmidt Exp $ (C) Marvell."; + #endif + + #ifdef GENESIS +@@ -83,7 +82,7 @@ + * Returns: + * nothing + */ +-void SkXmPhyRead( ++int SkXmPhyRead( + SK_AC *pAC, /* Adapter Context */ + SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ +@@ -94,13 +93,13 @@ + SK_GEPORT *pPrt; + + pPrt = &pAC->GIni.GP[Port]; +- ++ + /* write the PHY register's address */ + XM_OUT16(IoC, Port, XM_PHY_ADDR, PhyReg | pPrt->PhyAddr); +- ++ + /* get the PHY register's value */ + XM_IN16(IoC, Port, XM_PHY_DATA, pVal); +- ++ + if (pPrt->PhyType != SK_PHY_XMAC) { + do { + XM_IN16(IoC, Port, XM_MMU_CMD, &Mmu); +@@ -110,6 +109,8 @@ + /* get the PHY register's value */ + XM_IN16(IoC, Port, XM_PHY_DATA, pVal); + } ++ ++ return(0); + } /* SkXmPhyRead */ + + +@@ -122,7 +123,7 @@ + * Returns: + * nothing + */ +-void SkXmPhyWrite( ++int SkXmPhyWrite( + SK_AC *pAC, /* Adapter Context */ + SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ +@@ -133,26 +134,28 @@ + SK_GEPORT *pPrt; + + pPrt = &pAC->GIni.GP[Port]; +- ++ + if (pPrt->PhyType != SK_PHY_XMAC) { + do { + XM_IN16(IoC, Port, XM_MMU_CMD, &Mmu); + /* wait until 'Busy' is cleared */ + } while ((Mmu & XM_MMU_PHY_BUSY) != 0); + } +- ++ + /* write the PHY register's address */ + XM_OUT16(IoC, Port, XM_PHY_ADDR, PhyReg | pPrt->PhyAddr); +- ++ + /* write the PHY register's value */ + XM_OUT16(IoC, Port, XM_PHY_DATA, Val); +- ++ + if (pPrt->PhyType != SK_PHY_XMAC) { + do { + XM_IN16(IoC, Port, XM_MMU_CMD, &Mmu); + /* wait until 'Busy' is cleared */ + } while ((Mmu & XM_MMU_PHY_BUSY) != 0); + } ++ ++ return(0); + } /* SkXmPhyWrite */ + #endif /* GENESIS */ + +@@ -167,7 +170,7 @@ + * Returns: + * nothing + */ +-void SkGmPhyRead( ++int SkGmPhyRead( + SK_AC *pAC, /* Adapter Context */ + SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ +@@ -176,52 +179,72 @@ + { + SK_U16 Ctrl; + SK_GEPORT *pPrt; +-#ifdef VCPU +- u_long SimCyle; +- u_long SimLowTime; +- +- VCPUgetTime(&SimCyle, &SimLowTime); +- VCPUprintf(0, "SkGmPhyRead(%u), SimCyle=%u, SimLowTime=%u\n", +- PhyReg, SimCyle, SimLowTime); +-#endif /* VCPU */ +- ++ SK_U32 StartTime; ++ SK_U32 CurrTime; ++ SK_U32 Delta; ++ + pPrt = &pAC->GIni.GP[Port]; +- ++ + /* set PHY-Register offset and 'Read' OpCode (= 1) */ + *pVal = (SK_U16)(GM_SMI_CT_PHY_AD(pPrt->PhyAddr) | + GM_SMI_CT_REG_AD(PhyReg) | GM_SMI_CT_OP_RD); + + GM_OUT16(IoC, Port, GM_SMI_CTRL, *pVal); + +- GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl); +- ++#ifdef DEBUG + /* additional check for MDC/MDIO activity */ +- if ((Ctrl & GM_SMI_CT_BUSY) == 0) { ++ GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl); ++ ++ if ((Ctrl & GM_SMI_CT_OP_RD) == 0) { ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("PHY read impossible on Port %d (Ctrl=0x%04x)\n", Port, Ctrl)); ++ + *pVal = 0; +- return; ++ return(1); + } ++#endif /* DEBUG */ + + *pVal |= GM_SMI_CT_BUSY; +- +- do { ++ ++ SK_IN32(IoC, GMAC_TI_ST_VAL, &StartTime); ++ ++ do { /* wait until 'Busy' is cleared and 'ReadValid' is set */ + #ifdef VCPU + VCPUwaitTime(1000); + #endif /* VCPU */ + ++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime); ++ ++ if (CurrTime >= StartTime) { ++ Delta = CurrTime - StartTime; ++ } ++ else { ++ Delta = CurrTime + ~StartTime + 1; ++ } ++ ++ if (Delta > SK_PHY_ACC_TO) { ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("PHY read timeout on Port %d (Ctrl=0x%04x)\n", Port, Ctrl)); ++ return(1); ++ } ++ + GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl); + +- /* wait until 'ReadValid' is set */ +- } while (Ctrl == *pVal); +- +- /* get the PHY register's value */ ++ /* Error on reading SMI Control Register */ ++ if (Ctrl == 0xffff) { ++ return(1); ++ } ++ ++ } while ((Ctrl ^ *pVal) != (GM_SMI_CT_RD_VAL | GM_SMI_CT_BUSY)); ++ + GM_IN16(IoC, Port, GM_SMI_DATA, pVal); + +-#ifdef VCPU +- VCPUgetTime(&SimCyle, &SimLowTime); +- VCPUprintf(0, "VCPUgetTime(), SimCyle=%u, SimLowTime=%u\n", +- SimCyle, SimLowTime); +-#endif /* VCPU */ ++ /* dummy read after GM_IN16() */ ++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime); + ++ return(0); + } /* SkGmPhyRead */ + + +@@ -234,7 +257,7 @@ + * Returns: + * nothing + */ +-void SkGmPhyWrite( ++int SkGmPhyWrite( + SK_AC *pAC, /* Adapter Context */ + SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ +@@ -243,54 +266,74 @@ + { + SK_U16 Ctrl; + SK_GEPORT *pPrt; +-#ifdef VCPU +- SK_U32 DWord; +- u_long SimCyle; +- u_long SimLowTime; +- +- VCPUgetTime(&SimCyle, &SimLowTime); +- VCPUprintf(0, "SkGmPhyWrite(Reg=%u, Val=0x%04x), SimCyle=%u, SimLowTime=%u\n", +- PhyReg, Val, SimCyle, SimLowTime); +-#endif /* VCPU */ +- ++ SK_U32 StartTime; ++ SK_U32 CurrTime; ++ SK_U32 Delta; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("SkGmPhyWrite Port:%d, Reg=%d, Val=0x%04X\n", ++ Port, PhyReg, Val)); ++ + pPrt = &pAC->GIni.GP[Port]; +- ++ + /* write the PHY register's value */ + GM_OUT16(IoC, Port, GM_SMI_DATA, Val); +- +- /* set PHY-Register offset and 'Write' OpCode (= 0) */ +- Val = GM_SMI_CT_PHY_AD(pPrt->PhyAddr) | GM_SMI_CT_REG_AD(PhyReg); + +- GM_OUT16(IoC, Port, GM_SMI_CTRL, Val); +- +- GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl); +- ++#ifdef DEBUG + /* additional check for MDC/MDIO activity */ +- if ((Ctrl & GM_SMI_CT_BUSY) == 0) { +- return; ++ GM_IN16(IoC, Port, GM_SMI_DATA, &Ctrl); ++ ++ if (Ctrl != Val) { ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("PHY write impossible on Port %d (Val=0x%04x)\n", Port, Ctrl)); ++ ++ return(1); + } +- +- Val |= GM_SMI_CT_BUSY; ++#endif /* DEBUG */ + +- do { +-#ifdef VCPU +- /* read Timer value */ +- SK_IN32(IoC, B2_TI_VAL, &DWord); ++ /* set PHY-Register offset and 'Write' OpCode (= 0) */ ++ Ctrl = (SK_U16)(GM_SMI_CT_PHY_AD(pPrt->PhyAddr) | ++ GM_SMI_CT_REG_AD(PhyReg)); + ++ GM_OUT16(IoC, Port, GM_SMI_CTRL, Ctrl); ++ ++ SK_IN32(IoC, GMAC_TI_ST_VAL, &StartTime); ++ ++ do { /* wait until 'Busy' is cleared */ ++#ifdef VCPU + VCPUwaitTime(1000); + #endif /* VCPU */ + ++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime); ++ ++ if (CurrTime >= StartTime) { ++ Delta = CurrTime - StartTime; ++ } ++ else { ++ Delta = CurrTime + ~StartTime + 1; ++ } ++ ++ if (Delta > SK_PHY_ACC_TO) { ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("PHY write timeout on Port %d (Ctrl=0x%04x)\n", Port, Ctrl)); ++ return(1); ++ } ++ + GM_IN16(IoC, Port, GM_SMI_CTRL, &Ctrl); + +- /* wait until 'Busy' is cleared */ +- } while (Ctrl == Val); ++ /* Error on reading SMI Control Register */ ++ if (Ctrl == 0xffff) { ++ return(1); ++ } + +-#ifdef VCPU +- VCPUgetTime(&SimCyle, &SimLowTime); +- VCPUprintf(0, "VCPUgetTime(), SimCyle=%u, SimLowTime=%u\n", +- SimCyle, SimLowTime); +-#endif /* VCPU */ ++ } while ((Ctrl & GM_SMI_CT_BUSY) != 0); + ++ /* dummy read after GM_IN16() */ ++ SK_IN32(IoC, GMAC_TI_ST_VAL, &CurrTime); ++ ++ return(0); + } /* SkGmPhyWrite */ + #endif /* YUKON */ + +@@ -312,16 +355,8 @@ + int PhyReg, /* Register Address (Offset) */ + SK_U16 *pVal) /* Pointer to Value */ + { +- void (*r_func)(SK_AC *pAC, SK_IOC IoC, int Port, int Reg, SK_U16 *pVal); + +- if (pAC->GIni.GIGenesis) { +- r_func = SkXmPhyRead; +- } +- else { +- r_func = SkGmPhyRead; +- } +- +- r_func(pAC, IoC, Port, PhyReg, pVal); ++ pAC->GIni.GIFunc.pFnMacPhyRead(pAC, IoC, Port, PhyReg, pVal); + } /* SkGePhyRead */ + + +@@ -341,16 +376,8 @@ + int PhyReg, /* Register Address (Offset) */ + SK_U16 Val) /* Value */ + { +- void (*w_func)(SK_AC *pAC, SK_IOC IoC, int Port, int Reg, SK_U16 Val); + +- if (pAC->GIni.GIGenesis) { +- w_func = SkXmPhyWrite; +- } +- else { +- w_func = SkGmPhyWrite; +- } +- +- w_func(pAC, IoC, Port, PhyReg, Val); ++ pAC->GIni.GIFunc.pFnMacPhyWrite(pAC, IoC, Port, PhyReg, Val); + } /* SkGePhyWrite */ + #endif /* SK_DIAG */ + +@@ -360,15 +387,15 @@ + * SkMacPromiscMode() - Enable / Disable Promiscuous Mode + * + * Description: +- * enables / disables promiscuous mode by setting Mode Register (XMAC) or +- * Receive Control Register (GMAC) dep. on board type ++ * enables / disables promiscuous mode by setting Mode Register (XMAC) or ++ * Receive Control Register (GMAC) dep. on board type + * + * Returns: + * nothing + */ + void SkMacPromiscMode( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL Enable) /* Enable / Disable */ + { +@@ -377,11 +404,11 @@ + #endif + #ifdef GENESIS + SK_U32 MdReg; +-#endif ++#endif + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + XM_IN32(IoC, Port, XM_MODE, &MdReg); + /* enable or disable promiscuous mode */ + if (Enable) { +@@ -394,12 +421,12 @@ + XM_OUT32(IoC, Port, XM_MODE, MdReg); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + GM_IN16(IoC, Port, GM_RX_CTRL, &RcReg); +- ++ + /* enable or disable unicast and multicast filtering */ + if (Enable) { + RcReg &= ~(GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA); +@@ -420,28 +447,28 @@ + * SkMacHashing() - Enable / Disable Hashing + * + * Description: +- * enables / disables hashing by setting Mode Register (XMAC) or +- * Receive Control Register (GMAC) dep. on board type ++ * enables / disables hashing by setting Mode Register (XMAC) or ++ * Receive Control Register (GMAC) dep. on board type + * + * Returns: + * nothing + */ + void SkMacHashing( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL Enable) /* Enable / Disable */ + { + #ifdef YUKON + SK_U16 RcReg; +-#endif ++#endif + #ifdef GENESIS + SK_U32 MdReg; + #endif + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + XM_IN32(IoC, Port, XM_MODE, &MdReg); + /* enable or disable hashing */ + if (Enable) { +@@ -454,12 +481,12 @@ + XM_OUT32(IoC, Port, XM_MODE, MdReg); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + GM_IN16(IoC, Port, GM_RX_CTRL, &RcReg); +- ++ + /* enable or disable multicast filtering */ + if (Enable) { + RcReg |= GM_RXCR_MCF_ENA; +@@ -487,8 +514,8 @@ + * - don't set XMR_FS_ERR in status SK_LENERR_OK_ON/OFF + * for inrange length error frames + * - don't set XMR_FS_ERR in status SK_BIG_PK_OK_ON/OFF +- * for frames > 1514 bytes +- * - enable Rx of own packets SK_SELF_RX_ON/OFF ++ * for frames > 1514 bytes ++ * - enable Rx of own packets SK_SELF_RX_ON/OFF + * + * for incoming packets may be enabled/disabled by this function. + * Additional modes may be added later. +@@ -499,11 +526,11 @@ + * nothing + */ + static void SkXmSetRxCmd( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + int Mode) /* Mode is SK_STRIP_FCS_ON/OFF, SK_STRIP_PAD_ON/OFF, +- SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */ ++ SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */ + { + SK_U16 OldRxCmd; + SK_U16 RxCmd; +@@ -511,7 +538,7 @@ + XM_IN16(IoC, Port, XM_RX_CMD, &OldRxCmd); + + RxCmd = OldRxCmd; +- ++ + switch (Mode & (SK_STRIP_FCS_ON | SK_STRIP_FCS_OFF)) { + case SK_STRIP_FCS_ON: + RxCmd |= XM_RX_STRIP_FCS; +@@ -572,8 +599,8 @@ + * The features + * - FCS (CRC) stripping, SK_STRIP_FCS_ON/OFF + * - don't set GMR_FS_LONG_ERR SK_BIG_PK_OK_ON/OFF +- * for frames > 1514 bytes +- * - enable Rx of own packets SK_SELF_RX_ON/OFF ++ * for frames > 1514 bytes ++ * - enable Rx of own packets SK_SELF_RX_ON/OFF + * + * for incoming packets may be enabled/disabled by this function. + * Additional modes may be added later. +@@ -584,20 +611,17 @@ + * nothing + */ + static void SkGmSetRxCmd( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + int Mode) /* Mode is SK_STRIP_FCS_ON/OFF, SK_STRIP_PAD_ON/OFF, +- SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */ ++ SK_LENERR_OK_ON/OFF, or SK_BIG_PK_OK_ON/OFF */ + { +- SK_U16 OldRxCmd; + SK_U16 RxCmd; + + if ((Mode & (SK_STRIP_FCS_ON | SK_STRIP_FCS_OFF)) != 0) { +- +- GM_IN16(IoC, Port, GM_RX_CTRL, &OldRxCmd); + +- RxCmd = OldRxCmd; ++ GM_IN16(IoC, Port, GM_RX_CTRL, &RxCmd); + + if ((Mode & SK_STRIP_FCS_ON) != 0) { + RxCmd |= GM_RXCR_CRC_DIS; +@@ -605,17 +629,13 @@ + else { + RxCmd &= ~GM_RXCR_CRC_DIS; + } +- /* Write the new mode to the Rx control register if required */ +- if (OldRxCmd != RxCmd) { +- GM_OUT16(IoC, Port, GM_RX_CTRL, RxCmd); +- } ++ /* Write the new mode to the Rx Control register */ ++ GM_OUT16(IoC, Port, GM_RX_CTRL, RxCmd); + } + + if ((Mode & (SK_BIG_PK_OK_ON | SK_BIG_PK_OK_OFF)) != 0) { +- +- GM_IN16(IoC, Port, GM_SERIAL_MODE, &OldRxCmd); + +- RxCmd = OldRxCmd; ++ GM_IN16(IoC, Port, GM_SERIAL_MODE, &RxCmd); + + if ((Mode & SK_BIG_PK_OK_ON) != 0) { + RxCmd |= GM_SMOD_JUMBO_ENA; +@@ -623,10 +643,8 @@ + else { + RxCmd &= ~GM_SMOD_JUMBO_ENA; + } +- /* Write the new mode to the Rx control register if required */ +- if (OldRxCmd != RxCmd) { +- GM_OUT16(IoC, Port, GM_SERIAL_MODE, RxCmd); +- } ++ /* Write the new mode to the Serial Mode register */ ++ GM_OUT16(IoC, Port, GM_SERIAL_MODE, RxCmd); + } + } /* SkGmSetRxCmd */ + +@@ -641,17 +659,17 @@ + * nothing + */ + void SkMacSetRxCmd( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + int Mode) /* Rx Mode */ + { + if (pAC->GIni.GIGenesis) { +- ++ + SkXmSetRxCmd(pAC, IoC, Port, Mode); + } + else { +- ++ + SkGmSetRxCmd(pAC, IoC, Port, Mode); + } + +@@ -668,15 +686,15 @@ + * nothing + */ + void SkMacCrcGener( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL Enable) /* Enable / Disable */ + { + SK_U16 Word; + + if (pAC->GIni.GIGenesis) { +- ++ + XM_IN16(IoC, Port, XM_TX_CMD, &Word); + + if (Enable) { +@@ -689,9 +707,9 @@ + XM_OUT16(IoC, Port, XM_TX_CMD, Word); + } + else { +- ++ + GM_IN16(IoC, Port, GM_TX_CTRL, &Word); +- ++ + if (Enable) { + Word &= ~GM_TXCR_CRC_DIS; + } +@@ -721,14 +739,14 @@ + * nothing + */ + void SkXmClrExactAddr( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + int StartNum, /* Begin with this Address Register Index (0..15) */ + int StopNum) /* Stop after finished with this Register Idx (0..15) */ + { + int i; +- SK_U16 ZeroAddr[3] = {0x0000, 0x0000, 0x0000}; ++ SK_U16 ZeroAddr[3] = {0, 0, 0}; + + if ((unsigned)StartNum > 15 || (unsigned)StopNum > 15 || + StartNum > StopNum) { +@@ -738,7 +756,7 @@ + } + + for (i = StartNum; i <= StopNum; i++) { +- XM_OUTADDR(IoC, Port, XM_EXM(i), &ZeroAddr[0]); ++ XM_OUTADDR(IoC, Port, XM_EXM(i), ZeroAddr); + } + } /* SkXmClrExactAddr */ + #endif /* GENESIS */ +@@ -755,21 +773,21 @@ + * nothing + */ + void SkMacFlushTxFifo( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + #ifdef GENESIS + SK_U32 MdReg; + + if (pAC->GIni.GIGenesis) { +- ++ + XM_IN32(IoC, Port, XM_MODE, &MdReg); + + XM_OUT32(IoC, Port, XM_MODE, MdReg | XM_MD_FTF); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { + /* no way to flush the FIFO we have to issue a reset */ +@@ -791,8 +809,8 @@ + * nothing + */ + void SkMacFlushRxFifo( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + #ifdef GENESIS +@@ -805,7 +823,7 @@ + XM_OUT32(IoC, Port, XM_MODE, MdReg | XM_MD_FRF); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { + /* no way to flush the FIFO we have to issue a reset */ +@@ -853,23 +871,23 @@ + * nothing + */ + static void SkXmSoftRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { +- SK_U16 ZeroAddr[4] = {0x0000, 0x0000, 0x0000, 0x0000}; +- ++ SK_U16 ZeroAddr[4] = {0, 0, 0, 0}; ++ + /* reset the statistics module */ + XM_OUT32(IoC, Port, XM_GP_PORT, XM_GP_RES_STAT); + + /* disable all XMAC IRQs */ + XM_OUT16(IoC, Port, XM_IMSK, 0xffff); +- ++ + XM_OUT32(IoC, Port, XM_MODE, 0); /* clear Mode Reg */ +- ++ + XM_OUT16(IoC, Port, XM_TX_CMD, 0); /* reset TX CMD Reg */ + XM_OUT16(IoC, Port, XM_RX_CMD, 0); /* reset RX CMD Reg */ +- ++ + /* disable all PHY IRQs */ + switch (pAC->GIni.GP[Port].PhyType) { + case SK_PHY_BCOM: +@@ -887,13 +905,13 @@ + } + + /* clear the Hash Register */ +- XM_OUTHASH(IoC, Port, XM_HSM, &ZeroAddr); ++ XM_OUTHASH(IoC, Port, XM_HSM, ZeroAddr); + + /* clear the Exact Match Address registers */ + SkXmClrExactAddr(pAC, IoC, Port, 0, 15); +- ++ + /* clear the Source Check Address registers */ +- XM_OUTHASH(IoC, Port, XM_SRC_CHK, &ZeroAddr); ++ XM_OUTHASH(IoC, Port, XM_SRC_CHK, ZeroAddr); + + } /* SkXmSoftRst */ + +@@ -916,8 +934,8 @@ + * nothing + */ + static void SkXmHardRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_U32 Reg; +@@ -940,19 +958,19 @@ + } + + SK_OUT16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), MFF_SET_MAC_RST); +- ++ + SK_IN16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), &Word); +- ++ + } while ((Word & MFF_SET_MAC_RST) == 0); + } + + /* For external PHYs there must be special handling */ + if (pAC->GIni.GP[Port].PhyType != SK_PHY_XMAC) { +- ++ + SK_IN32(IoC, B2_GP_IO, &Reg); +- ++ + if (Port == 0) { +- Reg |= GP_DIR_0; /* set to output */ ++ Reg |= GP_DIR_0; /* set to output */ + Reg &= ~GP_IO_0; /* set PHY reset (active low) */ + } + else { +@@ -978,12 +996,12 @@ + * nothing + */ + static void SkXmClearRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_U32 DWord; +- ++ + /* clear HW reset */ + SK_OUT16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), MFF_CLR_MAC_RST); + +@@ -1000,7 +1018,7 @@ + /* Clear PHY reset */ + SK_OUT32(IoC, B2_GP_IO, DWord); + +- /* Enable GMII interface */ ++ /* enable GMII interface */ + XM_OUT16(IoC, Port, XM_HW_CFG, XM_HW_GMII_MD); + } + } /* SkXmClearRst */ +@@ -1020,8 +1038,8 @@ + * nothing + */ + static void SkGmSoftRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_U16 EmptyHash[4] = {0x0000, 0x0000, 0x0000, 0x0000}; +@@ -1030,19 +1048,18 @@ + /* reset the statistics module */ + + /* disable all GMAC IRQs */ +- SK_OUT8(IoC, GMAC_IRQ_MSK, 0); +- ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_IRQ_MSK), 0); ++ + /* disable all PHY IRQs */ + SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, 0); +- ++ + /* clear the Hash Register */ + GM_OUTHASH(IoC, Port, GM_MC_ADDR_H1, EmptyHash); + +- /* Enable Unicast and Multicast filtering */ ++ /* enable Unicast and Multicast filtering */ + GM_IN16(IoC, Port, GM_RX_CTRL, &RxCtrl); +- +- GM_OUT16(IoC, Port, GM_RX_CTRL, +- (SK_U16)(RxCtrl | GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA)); ++ ++ GM_OUT16(IoC, Port, GM_RX_CTRL, RxCtrl | GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA); + + } /* SkGmSoftRst */ + +@@ -1057,16 +1074,16 @@ + * nothing + */ + static void SkGmHardRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_U32 DWord; +- ++ + /* WA code for COMA mode */ + if (pAC->GIni.GIYukonLite && +- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) { +- ++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) { ++ + SK_IN32(IoC, B2_GP_IO, &DWord); + + DWord |= (GP_DIR_9 | GP_IO_9); +@@ -1076,10 +1093,10 @@ + } + + /* set GPHY Control reset */ +- SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), GPC_RST_SET); ++ SK_OUT8(IoC, MR_ADDR(Port, GPHY_CTRL), (SK_U8)GPC_RST_SET); + + /* set GMAC Control reset */ +- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_RST_SET); ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_SET); + + } /* SkGmHardRst */ + +@@ -1094,24 +1111,27 @@ + * nothing + */ + static void SkGmClearRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_U32 DWord; +- ++ SK_U16 PhyId0; ++ SK_U16 PhyId1; ++ SK_U16 Word; ++ + #ifdef XXX +- /* clear GMAC Control reset */ +- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_RST_CLR); ++ /* clear GMAC Control reset */ ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_CLR); + +- /* set GMAC Control reset */ +- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_RST_SET); ++ /* set GMAC Control reset */ ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_SET); + #endif /* XXX */ + + /* WA code for COMA mode */ + if (pAC->GIni.GIYukonLite && +- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) { +- ++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) { ++ + SK_IN32(IoC, B2_GP_IO, &DWord); + + DWord |= GP_DIR_9; /* set to output */ +@@ -1121,30 +1141,74 @@ + SK_OUT32(IoC, B2_GP_IO, DWord); + } + +- /* set HWCFG_MODE */ +- DWord = GPC_INT_POL_HI | GPC_DIS_FC | GPC_DIS_SLEEP | +- GPC_ENA_XC | GPC_ANEG_ADV_ALL_M | GPC_ENA_PAUSE | +- (pAC->GIni.GICopperType ? GPC_HWCFG_GMII_COP : +- GPC_HWCFG_GMII_FIB); ++#ifdef VCPU ++ /* set MAC Reset before PHY reset is set */ ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_SET); ++#endif /* VCPU */ ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* set GPHY Control reset */ ++ SK_OUT8(IoC, MR_ADDR(Port, GPHY_CTRL), (SK_U8)GPC_RST_SET); + +- /* set GPHY Control reset */ +- SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_SET); ++ /* release GPHY Control reset */ ++ SK_OUT8(IoC, MR_ADDR(Port, GPHY_CTRL), (SK_U8)GPC_RST_CLR); ++ } ++ else { ++ /* set HWCFG_MODE */ ++ DWord = GPC_INT_POL | GPC_DIS_FC | GPC_DIS_SLEEP | ++ GPC_ENA_XC | GPC_ANEG_ADV_ALL_M | GPC_ENA_PAUSE | ++ (pAC->GIni.GICopperType ? GPC_HWCFG_GMII_COP : ++ GPC_HWCFG_GMII_FIB); + +- /* release GPHY Control reset */ +- SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_CLR); ++ /* set GPHY Control reset */ ++ SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_SET); ++ ++ /* release GPHY Control reset */ ++ SK_OUT32(IoC, MR_ADDR(Port, GPHY_CTRL), DWord | GPC_RST_CLR); ++ } + + #ifdef VCPU ++ /* wait for internal initialization of GPHY */ ++ VCPUprintf(0, "Waiting until PHY %d is ready to initialize\n", Port); ++ VCpuWait(10000); ++ ++ /* release GMAC reset */ ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_CLR); ++ ++ /* wait for stable GMAC clock */ + VCpuWait(9000); + #endif /* VCPU */ + + /* clear GMAC Control reset */ +- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_PAUSE_ON | GMC_RST_CLR); ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), (SK_U8)GMC_RST_CLR); ++ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_472) && Port == MAC_2) { ++ ++ /* clear GMAC 1 Control reset */ ++ SK_OUT8(IoC, MR_ADDR(MAC_1, GMAC_CTRL), (SK_U8)GMC_RST_CLR); ++ ++ do { ++ /* set GMAC 2 Control reset */ ++ SK_OUT8(IoC, MR_ADDR(MAC_2, GMAC_CTRL), (SK_U8)GMC_RST_SET); ++ ++ /* clear GMAC 2 Control reset */ ++ SK_OUT8(IoC, MR_ADDR(MAC_2, GMAC_CTRL), (SK_U8)GMC_RST_CLR); ++ ++ SkGmPhyRead(pAC, IoC, MAC_2, PHY_MARV_ID0, &PhyId0); ++ ++ SkGmPhyRead(pAC, IoC, MAC_2, PHY_MARV_ID1, &PhyId1); ++ ++ SkGmPhyRead(pAC, IoC, MAC_2, PHY_MARV_INT_MASK, &Word); ++ ++ } while (Word != 0 || PhyId0 != PHY_MARV_ID0_VAL || ++ PhyId1 != PHY_MARV_ID1_Y2); ++ } + + #ifdef VCPU + VCpuWait(2000); +- ++ + SK_IN32(IoC, MR_ADDR(Port, GPHY_CTRL), &DWord); +- ++ + SK_IN32(IoC, B0_ISRC, &DWord); + #endif /* VCPU */ + +@@ -1162,37 +1226,33 @@ + * nothing + */ + void SkMacSoftRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { +- SK_GEPORT *pPrt; +- +- pPrt = &pAC->GIni.GP[Port]; +- + /* disable receiver and transmitter */ + SkMacRxTxDisable(pAC, IoC, Port); + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + SkXmSoftRst(pAC, IoC, Port); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + SkGmSoftRst(pAC, IoC, Port); + } + #endif /* YUKON */ + + /* flush the MAC's Rx and Tx FIFOs */ + SkMacFlushTxFifo(pAC, IoC, Port); +- ++ + SkMacFlushRxFifo(pAC, IoC, Port); + +- pPrt->PState = SK_PRT_STOP; ++ pAC->GIni.GP[Port].PState = SK_PRT_STOP; + + } /* SkMacSoftRst */ + +@@ -1207,25 +1267,27 @@ + * nothing + */ + void SkMacHardRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { +- ++ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + SkXmHardRst(pAC, IoC, Port); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + SkGmHardRst(pAC, IoC, Port); + } + #endif /* YUKON */ + ++ pAC->GIni.GP[Port].PHWLinkUp = SK_FALSE; ++ + pAC->GIni.GP[Port].PState = SK_PRT_RESET; + + } /* SkMacHardRst */ +@@ -1241,21 +1303,21 @@ + * nothing + */ + void SkMacClearRst( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { +- ++ + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + SkXmClearRst(pAC, IoC, Port); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + SkGmClearRst(pAC, IoC, Port); + } + #endif /* YUKON */ +@@ -1279,8 +1341,8 @@ + * nothing + */ + void SkXmInitMac( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -1290,13 +1352,13 @@ + pPrt = &pAC->GIni.GP[Port]; + + if (pPrt->PState == SK_PRT_STOP) { +- /* Port State: SK_PRT_STOP */ + /* Verify that the reset bit is cleared */ + SK_IN16(IoC, MR_ADDR(Port, TX_MFF_CTRL1), &SWord); + + if ((SWord & MFF_SET_MAC_RST) != 0) { + /* PState does not match HW state */ +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E006, SKERR_HWI_E006MSG); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("SkXmInitMac: PState does not match HW state")); + /* Correct it */ + pPrt->PState = SK_PRT_RESET; + } +@@ -1315,7 +1377,7 @@ + * Must be done AFTER first access to BCOM chip. + */ + XM_IN16(IoC, Port, XM_MMU_CMD, &SWord); +- ++ + XM_OUT16(IoC, Port, XM_MMU_CMD, SWord | XM_MMU_NO_PRE); + + if (pPrt->PhyId1 == PHY_BCOM_ID1_C0) { +@@ -1348,7 +1410,7 @@ + * Disable Power Management after reset. + */ + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &SWord); +- ++ + SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, + (SK_U16)(SWord | PHY_B_AC_DIS_PM)); + +@@ -1357,7 +1419,7 @@ + + /* Dummy read the Interrupt source register */ + XM_IN16(IoC, Port, XM_ISRC, &SWord); +- ++ + /* + * The auto-negotiation process starts immediately after + * clearing the reset. The auto-negotiation process should be +@@ -1383,7 +1445,7 @@ + * independent. Remember this when changing. + */ + SK_IN16(IoC, (B2_MAC_2 + Port * 8 + i * 2), &SWord); +- ++ + XM_OUT16(IoC, Port, (XM_SA + i * 2), SWord); + } + +@@ -1401,7 +1463,7 @@ + SWord = SK_XM_THR_SL; /* for single port */ + + if (pAC->GIni.GIMacsFound > 1) { +- switch (pAC->GIni.GIPortUsage) { ++ switch (pPrt->PPortUsage) { + case SK_RED_LINK: + SWord = SK_XM_THR_REDL; /* redundant link */ + break; +@@ -1424,7 +1486,7 @@ + /* setup register defaults for the Rx Command Register */ + SWord = XM_RX_STRIP_FCS | XM_RX_LENERR_OK; + +- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) { ++ if (pPrt->PPortUsage == SK_JUMBO_LINK) { + SWord |= XM_RX_BIG_PK_OK; + } + +@@ -1436,7 +1498,7 @@ + */ + SWord |= XM_RX_DIS_CEXT; + } +- ++ + XM_OUT16(IoC, Port, XM_RX_CMD, SWord); + + /* +@@ -1493,8 +1555,8 @@ + * nothing + */ + void SkGmInitMac( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -1505,24 +1567,29 @@ + pPrt = &pAC->GIni.GP[Port]; + + if (pPrt->PState == SK_PRT_STOP) { +- /* Port State: SK_PRT_STOP */ + /* Verify that the reset bit is cleared */ + SK_IN32(IoC, MR_ADDR(Port, GMAC_CTRL), &DWord); +- ++ + if ((DWord & GMC_RST_SET) != 0) { + /* PState does not match HW state */ +- SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E006, SKERR_HWI_E006MSG); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("SkGmInitMac: PState does not match HW state")); + /* Correct it */ + pPrt->PState = SK_PRT_RESET; + } ++ else { ++ /* enable all PHY interrupts */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, ++ (SK_U16)PHY_M_DEF_MSK); ++ } + } + + if (pPrt->PState == SK_PRT_RESET) { +- ++ + SkGmHardRst(pAC, IoC, Port); + + SkGmClearRst(pAC, IoC, Port); +- ++ + /* Auto-negotiation ? */ + if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) { + /* Auto-negotiation disabled */ +@@ -1532,10 +1599,10 @@ + + /* disable auto-update for speed, duplex and flow-control */ + SWord |= GM_GPCR_AU_ALL_DIS; +- ++ + /* setup General Purpose Control Register */ + GM_OUT16(IoC, Port, GM_GP_CTRL, SWord); +- ++ + SWord = GM_GPCR_AU_ALL_DIS; + } + else { +@@ -1546,7 +1613,10 @@ + switch (pPrt->PLinkSpeed) { + case SK_LSPEED_AUTO: + case SK_LSPEED_1000MBPS: +- SWord |= GM_GPCR_SPEED_1000 | GM_GPCR_SPEED_100; ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) { ++ ++ SWord |= GM_GPCR_SPEED_1000 | GM_GPCR_SPEED_100; ++ } + break; + case SK_LSPEED_100MBPS: + SWord |= GM_GPCR_SPEED_100; +@@ -1564,8 +1634,6 @@ + /* flow-control settings */ + switch (pPrt->PFlowCtrlMode) { + case SK_FLOW_MODE_NONE: +- /* set Pause Off */ +- SK_OUT32(IoC, MR_ADDR(Port, GMAC_CTRL), GMC_PAUSE_OFF); + /* disable Tx & Rx flow-control */ + SWord |= GM_GPCR_FC_TX_DIS | GM_GPCR_FC_RX_DIS | GM_GPCR_AU_FCT_DIS; + break; +@@ -1583,24 +1651,25 @@ + GM_OUT16(IoC, Port, GM_GP_CTRL, SWord); + + /* dummy read the Interrupt Source Register */ +- SK_IN16(IoC, GMAC_IRQ_SRC, &SWord); +- ++ SK_IN16(IoC, MR_ADDR(Port, GMAC_IRQ_SRC), &SWord); ++ + #ifndef VCPU + /* read Id from PHY */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_ID1, &pPrt->PhyId1); +- ++ + SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE); +-#endif /* VCPU */ ++#endif /* !VCPU */ + } + + (void)SkGmResetCounter(pAC, IoC, Port); + + /* setup Transmit Control Register */ +- GM_OUT16(IoC, Port, GM_TX_CTRL, TX_COL_THR(pPrt->PMacColThres)); ++ GM_OUT16(IoC, Port, GM_TX_CTRL, (SK_U16)TX_COL_THR(pPrt->PMacColThres)); + + /* setup Receive Control Register */ +- GM_OUT16(IoC, Port, GM_RX_CTRL, GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA | +- GM_RXCR_CRC_DIS); ++ SWord = GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA | GM_RXCR_CRC_DIS; ++ ++ GM_OUT16(IoC, Port, GM_RX_CTRL, SWord); + + /* setup Transmit Flow Control Register */ + GM_OUT16(IoC, Port, GM_TX_FLOW_CTRL, 0xffff); +@@ -1610,31 +1679,29 @@ + GM_IN16(IoC, Port, GM_TX_PARAM, &SWord); + #endif /* VCPU */ + +- SWord = TX_JAM_LEN_VAL(pPrt->PMacJamLen) | +- TX_JAM_IPG_VAL(pPrt->PMacJamIpgVal) | +- TX_IPG_JAM_DATA(pPrt->PMacJamIpgData); +- ++ SWord = (SK_U16)(TX_JAM_LEN_VAL(pPrt->PMacJamLen) | ++ TX_JAM_IPG_VAL(pPrt->PMacJamIpgVal) | ++ TX_IPG_JAM_DATA(pPrt->PMacJamIpgData) | ++ TX_BACK_OFF_LIM(pPrt->PMacBackOffLim)); ++ + GM_OUT16(IoC, Port, GM_TX_PARAM, SWord); + + /* configure the Serial Mode Register */ +-#ifdef VCPU +- GM_IN16(IoC, Port, GM_SERIAL_MODE, &SWord); +-#endif /* VCPU */ +- +- SWord = GM_SMOD_VLAN_ENA | IPG_DATA_VAL(pPrt->PMacIpgData); ++ SWord = (SK_U16)(DATA_BLIND_VAL(pPrt->PMacDataBlind) | ++ GM_SMOD_VLAN_ENA | IPG_DATA_VAL(pPrt->PMacIpgData)); + + if (pPrt->PMacLimit4) { + /* reset of collision counter after 4 consecutive collisions */ + SWord |= GM_SMOD_LIMIT_4; + } + +- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) { ++ if (pPrt->PPortUsage == SK_JUMBO_LINK) { + /* enable jumbo mode (Max. Frame Length = 9018) */ + SWord |= GM_SMOD_JUMBO_ENA; + } +- ++ + GM_OUT16(IoC, Port, GM_SERIAL_MODE, SWord); +- ++ + /* + * configure the GMACs Station Addresses + * in PROM you can find our addresses at: +@@ -1663,15 +1730,15 @@ + else { + GM_OUT16(IoC, Port, (GM_SRC_ADDR_1L + i * 4), SWord); + } +-#else ++#else + GM_OUT16(IoC, Port, (GM_SRC_ADDR_1L + i * 4), SWord); + #endif /* WA_DEV_16 */ +- ++ + /* virtual address: will be used for data */ + SK_IN16(IoC, (B2_MAC_1 + Port * 8 + i * 2), &SWord); + + GM_OUT16(IoC, Port, (GM_SRC_ADDR_2L + i * 4), SWord); +- ++ + /* reset Multicast filtering Hash registers 1-3 */ + GM_OUT16(IoC, Port, GM_MC_ADDR_H1 + 4*i, 0); + } +@@ -1684,18 +1751,6 @@ + GM_OUT16(IoC, Port, GM_RX_IRQ_MSK, 0); + GM_OUT16(IoC, Port, GM_TR_IRQ_MSK, 0); + +-#if defined(SK_DIAG) || defined(DEBUG) +- /* read General Purpose Status */ +- GM_IN16(IoC, Port, GM_GP_STAT, &SWord); +- +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("MAC Stat Reg.=0x%04X\n", SWord)); +-#endif /* SK_DIAG || DEBUG */ +- +-#ifdef SK_DIAG +- c_print("MAC Stat Reg=0x%04X\n", SWord); +-#endif /* SK_DIAG */ +- + } /* SkGmInitMac */ + #endif /* YUKON */ + +@@ -1714,8 +1769,8 @@ + * nothing + */ + void SkXmInitDupMd( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + switch (pAC->GIni.GP[Port].PLinkModeStatus) { +@@ -1762,8 +1817,8 @@ + * nothing + */ + void SkXmInitPauseMd( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -1773,11 +1828,11 @@ + pPrt = &pAC->GIni.GP[Port]; + + XM_IN16(IoC, Port, XM_MMU_CMD, &Word); +- ++ + if (pPrt->PFlowCtrlStatus == SK_FLOW_STAT_NONE || + pPrt->PFlowCtrlStatus == SK_FLOW_STAT_LOC_SEND) { + +- /* Disable Pause Frame Reception */ ++ /* disable Pause Frame Reception */ + Word |= XM_MMU_IGN_PF; + } + else { +@@ -1785,10 +1840,10 @@ + * enabling pause frame reception is required for 1000BT + * because the XMAC is not reset if the link is going down + */ +- /* Enable Pause Frame Reception */ ++ /* enable Pause Frame Reception */ + Word &= ~XM_MMU_IGN_PF; +- } +- ++ } ++ + XM_OUT16(IoC, Port, XM_MMU_CMD, Word); + + XM_IN32(IoC, Port, XM_MODE, &DWord); +@@ -1811,10 +1866,10 @@ + /* remember this value is defined in big endian (!) */ + XM_OUT16(IoC, Port, XM_MAC_PTIME, 0xffff); + +- /* Set Pause Mode in Mode Register */ ++ /* set Pause Mode in Mode Register */ + DWord |= XM_PAUSE_MODE; + +- /* Set Pause Mode in MAC Rx FIFO */ ++ /* set Pause Mode in MAC Rx FIFO */ + SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_ENA_PAUSE); + } + else { +@@ -1822,13 +1877,13 @@ + * disable pause frame generation is required for 1000BT + * because the XMAC is not reset if the link is going down + */ +- /* Disable Pause Mode in Mode Register */ ++ /* disable Pause Mode in Mode Register */ + DWord &= ~XM_PAUSE_MODE; + +- /* Disable Pause Mode in MAC Rx FIFO */ ++ /* disable Pause Mode in MAC Rx FIFO */ + SK_OUT16(IoC, MR_ADDR(Port, RX_MFF_CTRL1), MFF_DIS_PAUSE); + } +- ++ + XM_OUT32(IoC, Port, XM_MODE, DWord); + } /* SkXmInitPauseMd*/ + +@@ -1845,8 +1900,8 @@ + * nothing + */ + static void SkXmInitPhyXmac( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */ + { +@@ -1855,12 +1910,12 @@ + + pPrt = &pAC->GIni.GP[Port]; + Ctrl = 0; +- ++ + /* Auto-negotiation ? */ + if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("InitPhyXmac: no auto-negotiation Port %d\n", Port)); +- /* Set DuplexMode in Config register */ ++ /* set DuplexMode in Config register */ + if (pPrt->PLinkMode == SK_LMODE_FULL) { + Ctrl |= PHY_CT_DUP_MD; + } +@@ -1873,9 +1928,9 @@ + else { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("InitPhyXmac: with auto-negotiation Port %d\n", Port)); +- /* Set Auto-negotiation advertisement */ ++ /* set Auto-negotiation advertisement */ + +- /* Set Full/half duplex capabilities */ ++ /* set Full/half duplex capabilities */ + switch (pPrt->PLinkMode) { + case SK_LMODE_AUTOHALF: + Ctrl |= PHY_X_AN_HD; +@@ -1891,7 +1946,7 @@ + SKERR_HWI_E015MSG); + } + +- /* Set Flow-control capabilities */ ++ /* set Flow-control capabilities */ + switch (pPrt->PFlowCtrlMode) { + case SK_FLOW_MODE_NONE: + Ctrl |= PHY_X_P_NO_PAUSE; +@@ -1918,7 +1973,7 @@ + } + + if (DoLoop) { +- /* Set the Phy Loopback bit, too */ ++ /* set the Phy Loopback bit, too */ + Ctrl |= PHY_CT_LOOP; + } + +@@ -1939,8 +1994,8 @@ + * nothing + */ + static void SkXmInitPhyBcom( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */ + { +@@ -1962,7 +2017,7 @@ + /* manually Master/Slave ? */ + if (pPrt->PMSMode != SK_MS_MODE_AUTO) { + Ctrl2 |= PHY_B_1000C_MSE; +- ++ + if (pPrt->PMSMode == SK_MS_MODE_MASTER) { + Ctrl2 |= PHY_B_1000C_MSC; + } +@@ -1971,7 +2026,7 @@ + if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("InitPhyBcom: no auto-negotiation Port %d\n", Port)); +- /* Set DuplexMode in Config register */ ++ /* set DuplexMode in Config register */ + if (pPrt->PLinkMode == SK_LMODE_FULL) { + Ctrl1 |= PHY_CT_DUP_MD; + } +@@ -1989,7 +2044,7 @@ + else { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("InitPhyBcom: with auto-negotiation Port %d\n", Port)); +- /* Set Auto-negotiation advertisement */ ++ /* set Auto-negotiation advertisement */ + + /* + * Workaround BCOM Errata #1 for the C5 type. +@@ -1997,8 +2052,8 @@ + * Set Repeater/DTE bit 10 of the 1000Base-T Control Register + */ + Ctrl2 |= PHY_B_1000C_RD; +- +- /* Set Full/half duplex capabilities */ ++ ++ /* set Full/half duplex capabilities */ + switch (pPrt->PLinkMode) { + case SK_LMODE_AUTOHALF: + Ctrl2 |= PHY_B_1000C_AHD; +@@ -2014,7 +2069,7 @@ + SKERR_HWI_E015MSG); + } + +- /* Set Flow-control capabilities */ ++ /* set Flow-control capabilities */ + switch (pPrt->PFlowCtrlMode) { + case SK_FLOW_MODE_NONE: + Ctrl3 |= PHY_B_P_NO_PAUSE; +@@ -2036,27 +2091,27 @@ + /* Restart Auto-negotiation */ + Ctrl1 |= PHY_CT_ANE | PHY_CT_RE_CFG; + } +- ++ + /* Initialize LED register here? */ + /* No. Please do it in SkDgXmitLed() (if required) and swap +- init order of LEDs and XMAC. (MAl) */ +- ++ init order of LEDs and XMAC. (MAl) */ ++ + /* Write 1000Base-T Control Register */ + SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_1000T_CTRL, Ctrl2); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Set 1000B-T Ctrl Reg=0x%04X\n", Ctrl2)); +- ++ + /* Write AutoNeg Advertisement Register */ + SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUNE_ADV, Ctrl3); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Set Auto-Neg.Adv.Reg=0x%04X\n", Ctrl3)); +- ++ + if (DoLoop) { +- /* Set the Phy Loopback bit, too */ ++ /* set the Phy Loopback bit, too */ + Ctrl1 |= PHY_CT_LOOP; + } + +- if (pAC->GIni.GIPortUsage == SK_JUMBO_LINK) { ++ if (pPrt->PPortUsage == SK_JUMBO_LINK) { + /* configure FIFO to high latency for transmission of ext. packets */ + Ctrl4 |= PHY_B_PEC_HIGH_LA; + +@@ -2068,7 +2123,7 @@ + + /* Configure LED Traffic Mode and Jumbo Frame usage if specified */ + SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_P_EXT_CTRL, Ctrl4); +- ++ + /* Write to the Phy control register */ + SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_CTRL, Ctrl1); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +@@ -2078,17 +2133,17 @@ + + + #ifdef YUKON +-#ifndef SK_SLIM ++#ifdef SK_PHY_LP_MODE + /****************************************************************************** + * + * SkGmEnterLowPowerMode() + * +- * Description: ++ * Description: + * This function sets the Marvell Alaska PHY to the low power mode + * given by parameter mode. + * The following low power modes are available: +- * +- * - Coma Mode (Deep Sleep): ++ * ++ * - COMA Mode (Deep Sleep): + * Power consumption: ~15 - 30 mW + * The PHY cannot wake up on its own. + * +@@ -2115,114 +2170,203 @@ + * 1: error + */ + int SkGmEnterLowPowerMode( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (e.g. MAC_1) */ + SK_U8 Mode) /* low power mode */ + { ++ SK_U8 LastMode; ++ SK_U8 Byte; + SK_U16 Word; ++ SK_U16 ClkDiv; + SK_U32 DWord; +- SK_U8 LastMode; ++ SK_U32 PowerDownBit; + int Ret = 0; + +- if (pAC->GIni.GIYukonLite && +- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) { ++ if (!(CHIP_ID_YUKON_2(pAC) || (pAC->GIni.GIYukonLite && ++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3))) { + +- /* save current power mode */ +- LastMode = pAC->GIni.GP[Port].PPhyPowerState; +- pAC->GIni.GP[Port].PPhyPowerState = Mode; +- +- switch (Mode) { +- /* coma mode (deep sleep) */ +- case PHY_PM_DEEP_SLEEP: +- /* setup General Purpose Control Register */ +- GM_OUT16(IoC, 0, GM_GP_CTRL, GM_GPCR_FL_PASS | +- GM_GPCR_SPEED_100 | GM_GPCR_AU_ALL_DIS); +- +- /* apply COMA mode workaround */ +- SkGmPhyWrite(pAC, IoC, Port, 29, 0x001f); +- SkGmPhyWrite(pAC, IoC, Port, 30, 0xfff3); +- +- SK_IN32(IoC, PCI_C(PCI_OUR_REG_1), &DWord); +- +- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); +- +- /* Set PHY to Coma Mode */ +- SK_OUT32(IoC, PCI_C(PCI_OUR_REG_1), DWord | PCI_PHY_COMA); +- +- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); +- +- break; +- +- /* IEEE 22.2.4.1.5 compatible power down mode */ +- case PHY_PM_IEEE_POWER_DOWN: +- /* +- * - disable MAC 125 MHz clock +- * - allow MAC power down +- */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); +- Word |= PHY_M_PC_DIS_125CLK; +- Word &= ~PHY_M_PC_MAC_POW_UP; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ return(1); ++ } + +- /* +- * register changes must be followed by a software +- * reset to take effect +- */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); +- Word |= PHY_CT_RESET; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); +- +- /* switch IEEE compatible power down mode on */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); +- Word |= PHY_CT_PDOWN; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); +- break; ++ /* save current power mode */ ++ LastMode = pAC->GIni.GP[Port].PPhyPowerState; ++ pAC->GIni.GP[Port].PPhyPowerState = Mode; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_POWM, SK_DBGCAT_CTRL, ++ ("SkGmEnterLowPowerMode: %u\n", Mode)); ++ ++ switch (Mode) { ++ /* COMA mode (deep sleep) */ ++ case PHY_PM_DEEP_SLEEP: ++ /* clear PHY & MAC reset first */ ++ SkGmClearRst(pAC, IoC, Port); + +- /* energy detect and energy detect plus mode */ +- case PHY_PM_ENERGY_DETECT: +- case PHY_PM_ENERGY_DETECT_PLUS: +- /* +- * - disable MAC 125 MHz clock +- */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); +- Word |= PHY_M_PC_DIS_125CLK; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); +- +- /* activate energy detect mode 1 */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); +- +- /* energy detect mode */ +- if (Mode == PHY_PM_ENERGY_DETECT) { +- Word |= PHY_M_PC_EN_DET; +- } +- /* energy detect plus mode */ +- else { +- Word |= PHY_M_PC_EN_DET_PLUS; ++ /* setup General Purpose Control Register */ ++ GM_OUT16(IoC, Port, GM_GP_CTRL, GM_GPCR_FL_PASS | ++ GM_GPCR_SPEED_100 | GM_GPCR_AU_ALL_DIS); ++ ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* set power down bit */ ++ PowerDownBit = (Port == MAC_1) ? PCI_Y2_PHY1_POWD : ++ PCI_Y2_PHY2_POWD; ++ ++ /* no COMA mode on Yukon-FE and Yukon-2 PHY */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE || ++ pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ ++ /* set IEEE compatible Power Down Mode */ ++ Ret = SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, PHY_CT_PDOWN); ++ ++ ClkDiv = 0; /* divide clock by 2 */ ++ } ++ else { ++ ClkDiv = 1; /* divide clock by 4 */ ++ } ++ } ++ else { ++ /* apply COMA mode workaround */ ++ (void)SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_ADDR, 0x001f); ++ ++ Ret = SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xfff3); ++ ++ PowerDownBit = PCI_PHY_COMA; ++ } ++ ++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord); ++ ++ /* set PHY to PowerDown/COMA Mode */ ++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord | PowerDownBit); ++ ++ /* check if this routine was called from a for() loop */ ++ if (pAC->GIni.GIMacsFound == 1 || Port == MAC_2) { ++ ++ /* ASF system clock stopped */ ++ SK_OUT8(IoC, B28_Y2_ASF_STAT_CMD, Y2_ASF_CLK_HALT); ++ ++ if (HW_FEATURE(pAC, HWF_RED_CORE_CLK_SUP)) { ++ /* on Yukon-2 clock select value is 31 */ ++ DWord = (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) ? ++ (Y2_CLK_DIV_VAL_2(0) | Y2_CLK_SEL_VAL_2(31)) : ++ Y2_CLK_DIV_VAL(ClkDiv); ++ ++ /* check for Yukon-2 dual port PCI-Express adapter */ ++ if (!(pAC->GIni.GIMacsFound == 2 && ++ pAC->GIni.GIPciBus == SK_PEX_BUS)) { ++ /* enable Core Clock Division */ ++ DWord |= Y2_CLK_DIV_ENA; + } + +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Set Core Clock: 0x%08X\n", DWord)); + +- /* +- * reinitialize the PHY to force a software reset +- * which is necessary after the register settings +- * for the energy detect modes. +- * Furthermore reinitialisation prevents that the +- * PHY is running out of a stable state. +- */ +- SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE); +- break; ++ /* reduce Core Clock Frequency */ ++ SK_OUT32(IoC, B2_Y2_CLK_CTRL, DWord); ++ } + +- /* don't change current power mode */ +- default: +- pAC->GIni.GP[Port].PPhyPowerState = LastMode; +- Ret = 1; +- break; ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL && ++ pAC->GIni.GIChipRev > 1) { ++ /* enable bits are inverted */ ++ Byte = 0; ++ } ++ else { ++ Byte = (SK_U8)(Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS | ++ Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS | ++ Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS); ++ } ++ ++ /* disable PCI & Core Clock, disable clock gating for both Links */ ++ SK_OUT8(IoC, B2_Y2_CLK_GATE, Byte); ++ ++ if (pAC->GIni.GIVauxAvail) { ++ /* switch power to VAUX */ ++ SK_OUT8(IoC, B0_POWER_CTRL, (SK_U8)(PC_VAUX_ENA | PC_VCC_ENA | ++ PC_VAUX_ON | PC_VCC_OFF)); ++ } ++#ifdef DEBUG ++ SK_IN32(IoC, B0_CTST, &DWord); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Ctrl/Stat & Switch: 0x%08x\n", DWord)); ++#endif /* DEBUG */ ++ ++ if (pAC->GIni.GIMacsFound == 1 && ++ pAC->GIni.GIPciBus == SK_PEX_BUS) { ++ ++ /* switch to D1 state */ ++ SK_OUT8(IoC, PCI_C(pAC, PCI_PM_CTL_STS), PCI_PM_STATE_D1); ++ } + } +- } +- /* low power modes are not supported by this chip */ +- else { ++ ++ break; ++ ++ /* IEEE 22.2.4.1.5 compatible power down mode */ ++ case PHY_PM_IEEE_POWER_DOWN: ++ ++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); ++ ++ Word |= PHY_M_PC_POL_R_DIS; ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ /* disable MAC 125 MHz clock */ ++ Word |= PHY_M_PC_DIS_125CLK; ++ Word &= ~PHY_M_PC_MAC_POW_UP; ++ } ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ ++ /* these register changes must be followed by a software reset */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); ++ Word |= PHY_CT_RESET; ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); ++ ++ /* switch IEEE compatible power down mode on */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); ++ Word |= PHY_CT_PDOWN; ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); ++ ++ break; ++ ++ /* energy detect and energy detect plus mode */ ++ case PHY_PM_ENERGY_DETECT: ++ case PHY_PM_ENERGY_DETECT_PLUS: ++ ++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); ++ ++ Word |= PHY_M_PC_POL_R_DIS; ++ ++ if (!CHIP_ID_YUKON_2(pAC)) { ++ /* disable MAC 125 MHz clock */ ++ Word |= PHY_M_PC_DIS_125CLK; ++ } ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* enable Energy Detect (sense & pulse) */ ++ Word |= PHY_M_PC_ENA_ENE_DT; ++ } ++ else { ++ /* clear energy detect mode bits */ ++ Word &= ~PHY_M_PC_EN_DET_MSK; ++ ++ Word |= (Mode == PHY_PM_ENERGY_DETECT) ? PHY_M_PC_EN_DET : ++ PHY_M_PC_EN_DET_PLUS; ++ } ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ ++ /* these register changes must be followed by a software reset */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); ++ Word |= PHY_CT_RESET; ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); ++ ++ break; ++ ++ /* don't change current power mode */ ++ default: ++ pAC->GIni.GP[Port].PPhyPowerState = LastMode; + Ret = 1; ++ break; + } + + return(Ret); +@@ -2233,7 +2377,7 @@ + * + * SkGmLeaveLowPowerMode() + * +- * Description: ++ * Description: + * Leave the current low power mode and switch to normal mode + * + * Note: +@@ -2243,115 +2387,146 @@ + * 1: error + */ + int SkGmLeaveLowPowerMode( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (e.g. MAC_1) */ + { + SK_U32 DWord; ++ SK_U32 PowerDownBit; + SK_U16 Word; + SK_U8 LastMode; + int Ret = 0; + +- if (pAC->GIni.GIYukonLite && +- pAC->GIni.GIChipRev == CHIP_REV_YU_LITE_A3) { ++ if (!(CHIP_ID_YUKON_2(pAC) || (pAC->GIni.GIYukonLite && ++ pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3))) { + +- /* save current power mode */ +- LastMode = pAC->GIni.GP[Port].PPhyPowerState; +- pAC->GIni.GP[Port].PPhyPowerState = PHY_PM_OPERATIONAL_MODE; +- +- switch (LastMode) { +- /* coma mode (deep sleep) */ +- case PHY_PM_DEEP_SLEEP: +- SK_IN32(IoC, PCI_C(PCI_OUR_REG_1), &DWord); +- +- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); +- +- /* Release PHY from Coma Mode */ +- SK_OUT32(IoC, PCI_C(PCI_OUR_REG_1), DWord & ~PCI_PHY_COMA); +- +- SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); +- +- SK_IN32(IoC, B2_GP_IO, &DWord); +- +- /* set to output */ +- DWord |= (GP_DIR_9 | GP_IO_9); +- +- /* set PHY reset */ +- SK_OUT32(IoC, B2_GP_IO, DWord); +- +- DWord &= ~GP_IO_9; /* clear PHY reset (active high) */ +- +- /* clear PHY reset */ +- SK_OUT32(IoC, B2_GP_IO, DWord); +- break; +- +- /* IEEE 22.2.4.1.5 compatible power down mode */ +- case PHY_PM_IEEE_POWER_DOWN: +- /* +- * - enable MAC 125 MHz clock +- * - set MAC power up +- */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); +- Word &= ~PHY_M_PC_DIS_125CLK; +- Word |= PHY_M_PC_MAC_POW_UP; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ return(1); ++ } + +- /* +- * register changes must be followed by a software +- * reset to take effect +- */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); +- Word |= PHY_CT_RESET; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); +- +- /* switch IEEE compatible power down mode off */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); +- Word &= ~PHY_CT_PDOWN; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); +- break; ++ /* save current power mode */ ++ LastMode = pAC->GIni.GP[Port].PPhyPowerState; ++ pAC->GIni.GP[Port].PPhyPowerState = PHY_PM_OPERATIONAL_MODE; + +- /* energy detect and energy detect plus mode */ +- case PHY_PM_ENERGY_DETECT: +- case PHY_PM_ENERGY_DETECT_PLUS: +- /* +- * - enable MAC 125 MHz clock +- */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); +- Word &= ~PHY_M_PC_DIS_125CLK; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); +- +- /* disable energy detect mode */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); +- Word &= ~PHY_M_PC_EN_DET_MSK; +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ SK_DBG_MSG(pAC, SK_DBGMOD_POWM, SK_DBGCAT_CTRL, ++ ("SkGmLeaveLowPowerMode: %u\n", LastMode)); + +- /* +- * reinitialize the PHY to force a software reset +- * which is necessary after the register settings +- * for the energy detect modes. +- * Furthermore reinitialisation prevents that the +- * PHY is running out of a stable state. +- */ +- SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE); +- break; ++ switch (LastMode) { ++ /* COMA mode (deep sleep) */ ++ case PHY_PM_DEEP_SLEEP: + +- /* don't change current power mode */ +- default: +- pAC->GIni.GP[Port].PPhyPowerState = LastMode; +- Ret = 1; +- break; ++ SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &Word); ++ ++ /* reset all DState bits */ ++ Word &= ~(PCI_PM_STATE_MSK); ++ ++ /* switch to D0 state */ ++ SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, Word); ++ ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* disable Core Clock Division */ ++ SK_OUT32(IoC, B2_Y2_CLK_CTRL, Y2_CLK_DIV_DIS); ++ ++ /* set power down bit */ ++ PowerDownBit = (Port == MAC_1) ? PCI_Y2_PHY1_POWD : ++ PCI_Y2_PHY2_POWD; + } +- } +- /* low power modes are not supported by this chip */ +- else { ++ else { ++ PowerDownBit = PCI_PHY_COMA; ++ } ++ ++ SK_IN32(IoC, PCI_C(pAC, PCI_OUR_REG_1), &DWord); ++ ++ /* Release PHY from PowerDown/COMA Mode */ ++ SK_OUT32(IoC, PCI_C(pAC, PCI_OUR_REG_1), DWord & ~PowerDownBit); ++ ++ SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* no COMA mode on Yukon-FE */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* release IEEE compatible Power Down Mode */ ++ Ret = SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, PHY_CT_ANE); ++ } ++ } ++ else { ++ SK_IN32(IoC, B2_GP_IO, &DWord); ++ ++ /* set to output */ ++ DWord |= (GP_DIR_9 | GP_IO_9); ++ ++ /* set PHY reset */ ++ SK_OUT32(IoC, B2_GP_IO, DWord); ++ ++ DWord &= ~GP_IO_9; /* clear PHY reset (active high) */ ++ ++ /* clear PHY reset */ ++ SK_OUT32(IoC, B2_GP_IO, DWord); ++ } ++ ++ break; ++ ++ /* IEEE 22.2.4.1.5 compatible power down mode */ ++ case PHY_PM_IEEE_POWER_DOWN: ++ ++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_XL) { ++ ++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); ++ Word &= ~PHY_M_PC_DIS_125CLK; /* enable MAC 125 MHz clock */ ++ Word |= PHY_M_PC_MAC_POW_UP; /* set MAC power up */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ ++ /* these register changes must be followed by a software reset */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); ++ Word |= PHY_CT_RESET; ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); ++ } ++ ++ /* switch IEEE compatible power down mode off */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); ++ Word &= ~PHY_CT_PDOWN; ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); ++ ++ break; ++ ++ /* energy detect and energy detect plus mode */ ++ case PHY_PM_ENERGY_DETECT: ++ case PHY_PM_ENERGY_DETECT_PLUS: ++ ++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_XL) { ++ ++ Ret = SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* disable Energy Detect */ ++ Word &= ~PHY_M_PC_ENA_ENE_DT; ++ } ++ else { ++ /* disable energy detect mode & enable MAC 125 MHz clock */ ++ Word &= ~(PHY_M_PC_EN_DET_MSK | PHY_M_PC_DIS_125CLK); ++ } ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); ++ ++ /* these register changes must be followed by a software reset */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); ++ Word |= PHY_CT_RESET; ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); ++ } ++ break; ++ ++ /* don't change current power mode */ ++ default: ++ pAC->GIni.GP[Port].PPhyPowerState = LastMode; + Ret = 1; ++ break; + } + + return(Ret); + + } /* SkGmLeaveLowPowerMode */ +-#endif /* !SK_SLIM */ +- ++#endif /* SK_PHY_LP_MODE */ + + /****************************************************************************** + * +@@ -2365,74 +2540,168 @@ + * nothing + */ + static void SkGmInitPhyMarv( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */ + { + SK_GEPORT *pPrt; ++ SK_BOOL AutoNeg; + SK_U16 PhyCtrl; + SK_U16 C1000BaseT; + SK_U16 AutoNegAdv; ++ SK_U8 PauseMode; ++#ifndef VCPU ++ SK_U16 SWord; ++ SK_U16 PageReg; ++ SK_U16 LoopSpeed; + SK_U16 ExtPhyCtrl; + SK_U16 LedCtrl; +- SK_BOOL AutoNeg; ++ SK_U16 LedOver; + #if defined(SK_DIAG) || defined(DEBUG) + SK_U16 PhyStat; + SK_U16 PhyStat1; + SK_U16 PhySpecStat; + #endif /* SK_DIAG || DEBUG */ ++#endif /* !VCPU */ ++ ++ /* set Pause On */ ++ PauseMode = (SK_U8)GMC_PAUSE_ON; + + pPrt = &pAC->GIni.GP[Port]; + + /* Auto-negotiation ? */ +- if (pPrt->PLinkMode == SK_LMODE_HALF || pPrt->PLinkMode == SK_LMODE_FULL) { +- AutoNeg = SK_FALSE; +- } +- else { +- AutoNeg = SK_TRUE; +- } +- ++ AutoNeg = pPrt->PLinkMode != SK_LMODE_HALF && ++ pPrt->PLinkMode != SK_LMODE_FULL; ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("InitPhyMarv: Port %d, auto-negotiation %s\n", +- Port, AutoNeg ? "ON" : "OFF")); ++ ("InitPhyMarv: Port %d, Auto-neg. %s, LMode %d, LSpeed %d, FlowC %d\n", ++ Port, AutoNeg ? "ON" : "OFF", ++ pPrt->PLinkMode, pPrt->PLinkSpeed, pPrt->PFlowCtrlMode)); + +-#ifdef VCPU +- VCPUprintf(0, "SkGmInitPhyMarv(), Port=%u, DoLoop=%u\n", +- Port, DoLoop); +-#else /* VCPU */ +- if (DoLoop) { +- /* Set 'MAC Power up'-bit, set Manual MDI configuration */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, +- PHY_M_PC_MAC_POW_UP); ++#ifndef VCPU ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) { ++ ++ if (DoLoop) { ++ /* special setup for PHY 88E1112 */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ ++ LoopSpeed = pPrt->PLinkSpeed; ++ ++ if (LoopSpeed == SK_LSPEED_AUTO) { ++ /* force 1000 Mbps */ ++ LoopSpeed = SK_LSPEED_1000MBPS; ++ } ++ LoopSpeed += 2; ++ ++ /* save page register */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_ADR, &PageReg); ++ ++ /* select page 2 to access MAC control register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 2); ++ ++ /* set MAC interface speed */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, LoopSpeed << 4); ++ ++ /* restore page register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, PageReg); ++ ++ /* disable link pulses */ ++ SWord = PHY_M_PC_DIS_LINK_P; ++ } ++ else { ++ /* set 'MAC Power up'-bit, set Manual MDI configuration */ ++ SWord = PHY_M_PC_MAC_POW_UP; ++ } ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, SWord); ++ } ++ else if (AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_AUTO && ++ pAC->GIni.GIChipId != CHIP_ID_YUKON_XL) { ++ /* Read Ext. PHY Specific Control */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl); ++ ++ ExtPhyCtrl &= ~(PHY_M_EC_M_DSC_MSK | PHY_M_EC_S_DSC_MSK | ++ PHY_M_EC_MAC_S_MSK); ++ ++ ExtPhyCtrl |= PHY_M_EC_MAC_S(MAC_TX_CLK_25_MHZ); ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) { ++ /* on PHY 88E1111 there is a change for downshift control */ ++ ExtPhyCtrl |= PHY_M_EC_DSC_2(2) | PHY_M_EC_DOWN_S_ENA; ++ } ++ else { ++ ExtPhyCtrl |= PHY_M_EC_M_DSC(2) | PHY_M_EC_S_DSC(3); ++ } ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_CTRL, ExtPhyCtrl); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Set Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl)); ++ } + } +- else if (AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_AUTO) { +- /* Read Ext. PHY Specific Control */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl); +- +- ExtPhyCtrl &= ~(PHY_M_EC_M_DSC_MSK | PHY_M_EC_S_DSC_MSK | +- PHY_M_EC_MAC_S_MSK); +- +- ExtPhyCtrl |= PHY_M_EC_MAC_S(MAC_TX_CLK_25_MHZ) | +- PHY_M_EC_M_DSC(0) | PHY_M_EC_S_DSC(1); +- +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_CTRL, ExtPhyCtrl); +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Set Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl)); ++ ++ if (CHIP_ID_YUKON_2(pAC)) { ++ /* Read PHY Specific Control */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &PhyCtrl); ++ ++ if (!DoLoop && pAC->GIni.GICopperType) { ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* enable Automatic Crossover (!!! Bits 5..4) */ ++ PhyCtrl |= (SK_U16)(PHY_M_PC_MDI_XMODE(PHY_M_PC_ENA_AUTO) >> 1); ++ } ++ else { ++ /* disable Energy Detect Mode */ ++ PhyCtrl &= ~PHY_M_PC_EN_DET_MSK; ++ ++ /* enable Automatic Crossover */ ++ PhyCtrl |= (SK_U16)PHY_M_PC_MDI_XMODE(PHY_M_PC_ENA_AUTO); ++ ++ if (AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_AUTO && ++ pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ /* on PHY 88E1112 there is a change for downshift control */ ++ PhyCtrl &= ~PHY_M_PC_DSC_MSK; ++ PhyCtrl |= PHY_M_PC_DSC(2) | PHY_M_PC_DOWN_S_ENA; ++ } ++ } ++ } ++ /* workaround for deviation #4.88 (CRC errors) */ ++ else { ++ /* disable Automatic Crossover */ ++ PhyCtrl &= ~PHY_M_PC_MDIX_MSK; ++ } ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, PhyCtrl); ++ } ++ ++ /* special setup for PHY 88E1112 Fiber */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL && !pAC->GIni.GICopperType) { ++ /* Fiber: select 1000BASE-X only mode MAC Specific Ctrl Reg. */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 2); ++ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &SWord); ++ ++ SWord &= ~PHY_M_MAC_MD_MSK; ++ SWord |= PHY_M_MAC_MODE_SEL(PHY_M_MAC_MD_1000BX); ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, SWord); ++ ++ /* select page 1 to access Fiber registers */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 1); + } + + /* Read PHY Control */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &PhyCtrl); + + if (!AutoNeg) { +- /* Disable Auto-negotiation */ ++ /* disable Auto-negotiation */ + PhyCtrl &= ~PHY_CT_ANE; + } + + PhyCtrl |= PHY_CT_RESET; +- /* Assert software reset */ ++ /* assert software reset */ + SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, PhyCtrl); +-#endif /* VCPU */ ++#endif /* !VCPU */ + + PhyCtrl = 0 /* PHY_CT_COL_TST */; + C1000BaseT = 0; +@@ -2442,30 +2711,31 @@ + if (pPrt->PMSMode != SK_MS_MODE_AUTO) { + /* enable Manual Master/Slave */ + C1000BaseT |= PHY_M_1000C_MSE; +- ++ + if (pPrt->PMSMode == SK_MS_MODE_MASTER) { + C1000BaseT |= PHY_M_1000C_MSC; /* set it to Master */ + } + } +- ++ + /* Auto-negotiation ? */ + if (!AutoNeg) { +- ++ + if (pPrt->PLinkMode == SK_LMODE_FULL) { +- /* Set Full Duplex Mode */ ++ /* set Full Duplex Mode */ + PhyCtrl |= PHY_CT_DUP_MD; + } + +- /* Set Master/Slave manually if not already done */ ++ /* set Master/Slave manually if not already done */ + if (pPrt->PMSMode == SK_MS_MODE_AUTO) { + C1000BaseT |= PHY_M_1000C_MSE; /* set it to Slave */ + } + +- /* Set Speed */ ++ /* set Speed */ + switch (pPrt->PLinkSpeed) { + case SK_LSPEED_AUTO: + case SK_LSPEED_1000MBPS: +- PhyCtrl |= PHY_CT_SP1000; ++ PhyCtrl |= (((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) ? ++ PHY_CT_SP1000 : PHY_CT_SP100); + break; + case SK_LSPEED_100MBPS: + PhyCtrl |= PHY_CT_SP100; +@@ -2477,38 +2747,65 @@ + SKERR_HWI_E019MSG); + } + ++ if ((pPrt->PFlowCtrlMode == SK_FLOW_STAT_NONE) || ++ /* disable Pause also for 10/100 Mbps in half duplex mode */ ++ ((pPrt->PLinkMode == SK_LMODE_HALF) && ++ ((pPrt->PLinkSpeed == SK_LSPEED_STAT_100MBPS) || ++ (pPrt->PLinkSpeed == SK_LSPEED_STAT_10MBPS)))) { ++ ++ /* set Pause Off */ ++ PauseMode = (SK_U8)GMC_PAUSE_OFF; ++ } ++ ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), PauseMode); ++ + if (!DoLoop) { ++ /* assert software reset */ + PhyCtrl |= PHY_CT_RESET; + } + } + else { +- /* Set Auto-negotiation advertisement */ +- ++ /* set Auto-negotiation advertisement */ ++ + if (pAC->GIni.GICopperType) { +- /* Set Speed capabilities */ ++ /* set Speed capabilities */ + switch (pPrt->PLinkSpeed) { + case SK_LSPEED_AUTO: +- C1000BaseT |= PHY_M_1000C_AHD | PHY_M_1000C_AFD; ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) { ++ C1000BaseT |= PHY_M_1000C_AFD; ++#ifdef xSK_DIAG ++ C1000BaseT |= PHY_M_1000C_AHD; ++#endif /* SK_DIAG */ ++ } + AutoNegAdv |= PHY_M_AN_100_FD | PHY_M_AN_100_HD | + PHY_M_AN_10_FD | PHY_M_AN_10_HD; + break; + case SK_LSPEED_1000MBPS: +- C1000BaseT |= PHY_M_1000C_AHD | PHY_M_1000C_AFD; ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) { ++ C1000BaseT |= PHY_M_1000C_AFD; ++#ifdef xSK_DIAG ++ C1000BaseT |= PHY_M_1000C_AHD; ++#endif /* SK_DIAG */ ++ } + break; + case SK_LSPEED_100MBPS: +- AutoNegAdv |= PHY_M_AN_100_FD | PHY_M_AN_100_HD | +- /* advertise 10Base-T also */ +- PHY_M_AN_10_FD | PHY_M_AN_10_HD; ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_100MBPS) != 0) { ++ AutoNegAdv |= PHY_M_AN_100_FD | PHY_M_AN_100_HD | ++ /* advertise 10Base-T also */ ++ PHY_M_AN_10_FD | PHY_M_AN_10_HD; ++ } + break; + case SK_LSPEED_10MBPS: +- AutoNegAdv |= PHY_M_AN_10_FD | PHY_M_AN_10_HD; ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_10MBPS) != 0) { ++ AutoNegAdv |= PHY_M_AN_10_FD | PHY_M_AN_10_HD; ++ } + break; + default: + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E019, + SKERR_HWI_E019MSG); + } + +- /* Set Full/half duplex capabilities */ ++ /* set Full/half duplex capabilities */ + switch (pPrt->PLinkMode) { + case SK_LMODE_AUTOHALF: + C1000BaseT &= ~PHY_M_1000C_AFD; +@@ -2524,8 +2821,8 @@ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E015, + SKERR_HWI_E015MSG); + } +- +- /* Set Flow-control capabilities */ ++ ++ /* set Flow-control capabilities */ + switch (pPrt->PFlowCtrlMode) { + case SK_FLOW_MODE_NONE: + AutoNegAdv |= PHY_B_P_NO_PAUSE; +@@ -2545,8 +2842,8 @@ + } + } + else { /* special defines for FIBER (88E1011S only) */ +- +- /* Set Full/half duplex capabilities */ ++ ++ /* set Full/half duplex capabilities */ + switch (pPrt->PLinkMode) { + case SK_LMODE_AUTOHALF: + AutoNegAdv |= PHY_M_AN_1000X_AHD; +@@ -2561,8 +2858,8 @@ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E015, + SKERR_HWI_E015MSG); + } +- +- /* Set Flow-control capabilities */ ++ ++ /* set Flow-control capabilities */ + switch (pPrt->PFlowCtrlMode) { + case SK_FLOW_MODE_NONE: + AutoNegAdv |= PHY_M_P_NO_PAUSE_X; +@@ -2587,52 +2884,51 @@ + PhyCtrl |= PHY_CT_ANE | PHY_CT_RE_CFG; + } + } +- ++ + #ifdef VCPU + /* + * E-mail from Gu Lin (08-03-2002): + */ +- ++ + /* Program PHY register 30 as 16'h0708 for simulation speed up */ + SkGmPhyWrite(pAC, IoC, Port, 30, 0x0700 /* 0x0708 */); +- ++ + VCpuWait(2000); + + #else /* VCPU */ +- +- /* Write 1000Base-T Control Register */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_1000T_CTRL, C1000BaseT); +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Set 1000B-T Ctrl =0x%04X\n", C1000BaseT)); +- ++ ++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_FE) { ++ /* Write 1000Base-T Control Register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_1000T_CTRL, C1000BaseT); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Set 1000B-T Ctrl =0x%04X\n", C1000BaseT)); ++ } ++ + /* Write AutoNeg Advertisement Register */ + SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_AUNE_ADV, AutoNegAdv); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Set Auto-Neg.Adv.=0x%04X\n", AutoNegAdv)); + #endif /* VCPU */ +- ++ + if (DoLoop) { +- /* Set the PHY Loopback bit */ ++ /* set the PHY Loopback bit */ + PhyCtrl |= PHY_CT_LOOP; + + #ifdef XXX + /* Program PHY register 16 as 16'h0400 to force link good */ + SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, PHY_M_PC_FL_GOOD); +-#endif /* XXX */ + +-#ifndef VCPU + if (pPrt->PLinkSpeed != SK_LSPEED_AUTO) { + /* Write Ext. PHY Specific Control */ + SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_CTRL, + (SK_U16)((pPrt->PLinkSpeed + 2) << 4)); + } +-#endif /* VCPU */ ++#endif /* XXX */ + } + #ifdef TEST_ONLY + else if (pPrt->PLinkSpeed == SK_LSPEED_10MBPS) { +- /* Write PHY Specific Control */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, +- PHY_M_PC_EN_DET_MSK); ++ /* Write PHY Specific Control */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, PHY_M_PC_EN_DET_MSK); + } + #endif + +@@ -2645,27 +2941,83 @@ + VCpuWait(2000); + #else + +- LedCtrl = PHY_M_LED_PULS_DUR(PULS_170MS) | PHY_M_LED_BLINK_RT(BLINK_84MS); ++ LedCtrl = PHY_M_LED_PULS_DUR(PULS_170MS); ++ ++ LedOver = 0; ++ ++ if ((pAC->GIni.GILedBlinkCtrl & SK_ACT_LED_BLINK) != 0) { ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* on 88E3082 these bits are at 11..9 (shifted left) */ ++ LedCtrl |= PHY_M_LED_BLINK_RT(BLINK_84MS) << 1; ++ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_FE_LED_PAR, &SWord); + +- if ((pAC->GIni.GILedBlinkCtrl & SK_ACT_LED_BLINK) != 0) { +- LedCtrl |= PHY_M_LEDC_RX_CTRL | PHY_M_LEDC_TX_CTRL; ++ /* delete ACT LED control bits */ ++ SWord &= ~PHY_M_FELP_LED1_MSK; ++ /* change ACT LED control to blink mode */ ++ SWord |= PHY_M_FELP_LED1_CTRL(LED_PAR_CTRL_ACT_BL); ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_FE_LED_PAR, SWord); ++ } ++ else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ /* save page register */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_ADR, &PageReg); ++ ++ /* select page 3 to access LED control register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 3); ++ ++ /* set LED Function Control register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, (SK_U16) ++ (PHY_M_LEDC_LOS_CTRL(1) | /* LINK/ACT */ ++ PHY_M_LEDC_INIT_CTRL(7) | /* 10 Mbps */ ++ PHY_M_LEDC_STA1_CTRL(7) | /* 100 Mbps */ ++ PHY_M_LEDC_STA0_CTRL(7))); /* 1000 Mbps */ ++ ++ /* set Polarity Control register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_STAT, (SK_U16) ++ (PHY_M_POLC_LS1_P_MIX(4) | PHY_M_POLC_IS0_P_MIX(4) | ++ PHY_M_POLC_LOS_CTRL(2) | PHY_M_POLC_INIT_CTRL(2) | ++ PHY_M_POLC_STA1_CTRL(2) | PHY_M_POLC_STA0_CTRL(2))); ++ ++ /* restore page register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, PageReg); ++ } ++ else { ++ /* set Tx LED (LED_TX) to blink mode on Rx OR Tx activity */ ++ LedCtrl |= PHY_M_LED_BLINK_RT(BLINK_84MS) | PHY_M_LEDC_TX_CTRL; ++ ++ /* on PHY 88E1111 there is a change for LED control */ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC && ++ (pAC->GIni.GILedBlinkCtrl & SK_DUAL_LED_ACT_LNK) != 0) { ++ /* Yukon-EC needs setting of 2 bits: 0,6=11) */ ++ LedCtrl |= PHY_M_LEDC_TX_C_LSB; ++ } ++ /* turn off the Rx LED (LED_RX) */ ++ LedOver |= PHY_M_LED_MO_RX(MO_LED_OFF); ++ } + } + + if ((pAC->GIni.GILedBlinkCtrl & SK_DUP_LED_NORMAL) != 0) { ++ /* disable blink mode (LED_DUPLEX) on collisions */ + LedCtrl |= PHY_M_LEDC_DP_CTRL; + } +- ++ + SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_LED_CTRL, LedCtrl); + + if ((pAC->GIni.GILedBlinkCtrl & SK_LED_LINK100_ON) != 0) { + /* only in forced 100 Mbps mode */ + if (!AutoNeg && pPrt->PLinkSpeed == SK_LSPEED_100MBPS) { +- +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_LED_OVER, +- PHY_M_LED_MO_100(MO_LED_ON)); ++ /* turn on 100 Mbps LED (LED_LINK100) */ ++ LedOver |= PHY_M_LED_MO_100(MO_LED_ON); + } + } + ++ if (LedOver != 0) { ++ /* set Manual LED Override */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_LED_OVER, LedOver); ++ } ++ + #ifdef SK_DIAG + c_print("Set PHY Ctrl=0x%04X\n", PhyCtrl); + c_print("Set 1000 B-T=0x%04X\n", C1000BaseT); +@@ -2678,30 +3030,33 @@ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &PhyCtrl); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("PHY Ctrl Reg.=0x%04X\n", PhyCtrl)); +- +- /* Read 1000Base-T Control Register */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_CTRL, &C1000BaseT); +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("1000B-T Ctrl =0x%04X\n", C1000BaseT)); +- ++ + /* Read AutoNeg Advertisement Register */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_ADV, &AutoNegAdv); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Auto-Neg.Adv.=0x%04X\n", AutoNegAdv)); +- +- /* Read Ext. PHY Specific Control */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl); +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl)); +- ++ ++ if (pAC->GIni.GIChipId != CHIP_ID_YUKON_FE) { ++ /* Read 1000Base-T Control Register */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_CTRL, &C1000BaseT); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("1000B-T Ctrl =0x%04X\n", C1000BaseT)); ++ ++ /* Read Ext. PHY Specific Control */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_CTRL, &ExtPhyCtrl); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ("Ext. PHY Ctrl=0x%04X\n", ExtPhyCtrl)); ++ } ++ + /* Read PHY Status */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_STAT, &PhyStat); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("PHY Stat Reg.=0x%04X\n", PhyStat)); ++ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_STAT, &PhyStat1); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("PHY Stat Reg.=0x%04X\n", PhyStat1)); +- ++ + /* Read PHY Specific Status */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_STAT, &PhySpecStat); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +@@ -2718,6 +3073,8 @@ + c_print("PHY Spec Reg=0x%04X\n", PhySpecStat); + #endif /* SK_DIAG */ + ++ /* enable all PHY interrupts */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, (SK_U16)PHY_M_DEF_MSK); + #endif /* VCPU */ + + } /* SkGmInitPhyMarv */ +@@ -2737,8 +3094,8 @@ + * nothing + */ + static void SkXmInitPhyLone( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */ + { +@@ -2756,7 +3113,7 @@ + /* manually Master/Slave ? */ + if (pPrt->PMSMode != SK_MS_MODE_AUTO) { + Ctrl2 |= PHY_L_1000C_MSE; +- ++ + if (pPrt->PMSMode == SK_MS_MODE_MASTER) { + Ctrl2 |= PHY_L_1000C_MSC; + } +@@ -2769,7 +3126,7 @@ + */ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("InitPhyLone: no auto-negotiation Port %d\n", Port)); +- /* Set DuplexMode in Config register */ ++ /* set DuplexMode in Config register */ + if (pPrt->PLinkMode == SK_LMODE_FULL) { + Ctrl1 |= PHY_CT_DUP_MD; + } +@@ -2778,7 +3135,6 @@ + if (pPrt->PMSMode == SK_MS_MODE_AUTO) { + Ctrl2 |= PHY_L_1000C_MSE; /* set it to Slave */ + } +- + /* + * Do NOT enable Auto-negotiation here. This would hold + * the link down because no IDLES are transmitted +@@ -2787,9 +3143,9 @@ + else { + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("InitPhyLone: with auto-negotiation Port %d\n", Port)); +- /* Set Auto-negotiation advertisement */ ++ /* set Auto-negotiation advertisement */ + +- /* Set Full/half duplex capabilities */ ++ /* set Full/half duplex capabilities */ + switch (pPrt->PLinkMode) { + case SK_LMODE_AUTOHALF: + Ctrl2 |= PHY_L_1000C_AHD; +@@ -2805,7 +3161,7 @@ + SKERR_HWI_E015MSG); + } + +- /* Set Flow-control capabilities */ ++ /* set Flow-control capabilities */ + switch (pPrt->PFlowCtrlMode) { + case SK_FLOW_MODE_NONE: + Ctrl3 |= PHY_L_P_NO_PAUSE; +@@ -2827,19 +3183,19 @@ + /* Restart Auto-negotiation */ + Ctrl1 = PHY_CT_ANE | PHY_CT_RE_CFG; + } +- ++ + /* Write 1000Base-T Control Register */ + SkXmPhyWrite(pAC, IoC, Port, PHY_LONE_1000T_CTRL, Ctrl2); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("1000B-T Ctrl Reg=0x%04X\n", Ctrl2)); +- ++ + /* Write AutoNeg Advertisement Register */ + SkXmPhyWrite(pAC, IoC, Port, PHY_LONE_AUNE_ADV, Ctrl3); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Auto-Neg.Adv.Reg=0x%04X\n", Ctrl3)); + + if (DoLoop) { +- /* Set the Phy Loopback bit, too */ ++ /* set the Phy Loopback bit, too */ + Ctrl1 |= PHY_CT_LOOP; + } + +@@ -2862,8 +3218,8 @@ + * nothing + */ + static void SkXmInitPhyNat( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */ + { +@@ -2884,8 +3240,8 @@ + * nothing + */ + void SkMacInitPhy( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */ + { +@@ -2895,7 +3251,7 @@ + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + switch (pPrt->PhyType) { + case SK_PHY_XMAC: + SkXmInitPhyXmac(pAC, IoC, Port, DoLoop); +@@ -2914,10 +3270,10 @@ + } + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + SkGmInitPhyMarv(pAC, IoC, Port, DoLoop); + } + #endif /* YUKON */ +@@ -2939,8 +3295,8 @@ + * SK_AND_OTHER Other error happened + */ + static int SkXmAutoNegDoneXmac( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -2958,10 +3314,10 @@ + + if ((LPAb & PHY_X_AN_RFB) != 0) { + /* At least one of the remote fault bit is set */ +- /* Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNegFail: Remote fault bit set Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; ++ + return(SK_AND_OTHER); + } + +@@ -2974,7 +3330,7 @@ + } + else { + /* Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNegFail: Duplex mode mismatch Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; + return(SK_AND_DUP_CAP); +@@ -2984,19 +3340,19 @@ + /* We are NOT using chapter 4.23 of the Xaqti manual */ + /* We are using IEEE 802.3z/D5.0 Table 37-4 */ + if ((pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYMMETRIC || +- pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) && +- (LPAb & PHY_X_P_SYM_MD) != 0) { ++ pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM) && ++ (LPAb & PHY_X_P_SYM_MD) != 0) { + /* Symmetric PAUSE */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC; + } + else if (pPrt->PFlowCtrlMode == SK_FLOW_MODE_SYM_OR_REM && +- (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) { +- /* Enable PAUSE receive, disable PAUSE transmit */ ++ (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_ASYM_MD) { ++ /* enable PAUSE receive, disable PAUSE transmit */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND; + } + else if (pPrt->PFlowCtrlMode == SK_FLOW_MODE_LOC_SEND && +- (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) { +- /* Disable PAUSE receive, enable PAUSE transmit */ ++ (LPAb & PHY_X_RS_PAUSE) == PHY_X_P_BOTH_MD) { ++ /* disable PAUSE receive, enable PAUSE transmit */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND; + } + else { +@@ -3022,8 +3378,8 @@ + * SK_AND_OTHER Other error happened + */ + static int SkXmAutoNegDoneBcom( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -3045,12 +3401,12 @@ + 01-Sep-2000 RA;:;: + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_1000T_STAT, &ResAb); + #endif /* 0 */ +- ++ + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_STAT, &AuxStat); + + if ((LPAb & PHY_B_AN_RF) != 0) { + /* Remote fault bit is set: Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNegFail: Remote fault bit set Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; + return(SK_AND_OTHER); +@@ -3065,23 +3421,23 @@ + } + else { + /* Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNegFail: Duplex mode mismatch Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; + return(SK_AND_DUP_CAP); + } +- ++ + #ifdef TEST_ONLY + 01-Sep-2000 RA;:;: + /* Check Master/Slave resolution */ + if ((ResAb & PHY_B_1000S_MSF) != 0) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("Master/Slave Fault Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; + pPrt->PMSStatus = SK_MS_STAT_FAULT; + return(SK_AND_OTHER); + } +- ++ + pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ? + SK_MS_STAT_MASTER : SK_MS_STAT_SLAVE; + #endif /* 0 */ +@@ -3093,11 +3449,11 @@ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC; + } + else if ((AuxStat & PHY_B_AS_PAUSE_MSK) == PHY_B_AS_PRR) { +- /* Enable PAUSE receive, disable PAUSE transmit */ ++ /* enable PAUSE receive, disable PAUSE transmit */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND; + } + else if ((AuxStat & PHY_B_AS_PAUSE_MSK) == PHY_B_AS_PRT) { +- /* Disable PAUSE receive, enable PAUSE transmit */ ++ /* disable PAUSE receive, enable PAUSE transmit */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND; + } + else { +@@ -3125,14 +3481,18 @@ + * SK_AND_OTHER Other error happened + */ + static int SkGmAutoNegDoneMarv( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; + SK_U16 LPAb; /* Link Partner Ability */ + SK_U16 ResAb; /* Resolved Ability */ + SK_U16 AuxStat; /* Auxiliary Status */ ++ SK_U8 PauseMode; /* Pause Mode */ ++ ++ /* set Pause On */ ++ PauseMode = (SK_U8)GMC_PAUSE_ON; + + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("AutoNegDoneMarv, Port %d\n", Port)); +@@ -3142,78 +3502,105 @@ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_AUNE_LP, &LPAb); + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("Link P.Abil.=0x%04X\n", LPAb)); +- ++ + if ((LPAb & PHY_M_AN_RF) != 0) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNegFail: Remote fault bit set Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; + return(SK_AND_OTHER); + } + +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb); +- +- /* Check Master/Slave resolution */ +- if ((ResAb & PHY_B_1000S_MSF) != 0) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("Master/Slave Fault Port %d\n", Port)); +- pPrt->PAutoNegFail = SK_TRUE; +- pPrt->PMSStatus = SK_MS_STAT_FAULT; +- return(SK_AND_OTHER); ++ if ((pPrt->PLinkSpeedCap & SK_LSPEED_CAP_1000MBPS) != 0) { ++ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_1000T_STAT, &ResAb); ++ ++ /* Check Master/Slave resolution */ ++ if ((ResAb & PHY_B_1000S_MSF) != 0) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("Master/Slave Fault Port %d\n", Port)); ++ pPrt->PAutoNegFail = SK_TRUE; ++ pPrt->PMSStatus = SK_MS_STAT_FAULT; ++ return(SK_AND_OTHER); ++ } ++ ++ pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ? ++ (SK_U8)SK_MS_STAT_MASTER : (SK_U8)SK_MS_STAT_SLAVE; + } +- +- pPrt->PMSStatus = ((ResAb & PHY_B_1000S_MSR) != 0) ? +- (SK_U8)SK_MS_STAT_MASTER : (SK_U8)SK_MS_STAT_SLAVE; +- ++ + /* Read PHY Specific Status */ + SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_STAT, &AuxStat); +- ++ + /* Check Speed & Duplex resolved */ +- if ((AuxStat & PHY_M_PS_SPDUP_RES) == 0) { +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, +- ("AutoNegFail: Speed & Duplex not resolved, Port %d\n", Port)); +- pPrt->PAutoNegFail = SK_TRUE; +- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN; +- return(SK_AND_DUP_CAP); +- } +- +- if ((AuxStat & PHY_M_PS_FULL_DUP) != 0) { +- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_AUTOFULL; +- } +- else { +- pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_AUTOHALF; +- } +- +- /* Check PAUSE mismatch ??? */ +- /* We are using IEEE 802.3z/D5.0 Table 37-4 */ +- if ((AuxStat & PHY_M_PS_PAUSE_MSK) == PHY_M_PS_PAUSE_MSK) { +- /* Symmetric PAUSE */ +- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC; +- } +- else if ((AuxStat & PHY_M_PS_PAUSE_MSK) == PHY_M_PS_RX_P_EN) { +- /* Enable PAUSE receive, disable PAUSE transmit */ +- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND; ++ if ((AuxStat & PHY_M_PS_SPDUP_RES) == 0) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("AutoNegFail: Speed & Duplex not resolved, Port %d\n", Port)); ++ pPrt->PAutoNegFail = SK_TRUE; ++ pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_UNKNOWN; ++ return(SK_AND_DUP_CAP); + } +- else if ((AuxStat & PHY_M_PS_PAUSE_MSK) == PHY_M_PS_TX_P_EN) { +- /* Disable PAUSE receive, enable PAUSE transmit */ +- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND; ++ ++ pPrt->PLinkModeStatus = (SK_U8)(((AuxStat & PHY_M_PS_FULL_DUP) != 0) ? ++ SK_LMODE_STAT_AUTOFULL : SK_LMODE_STAT_AUTOHALF); ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ /* set used link speed */ ++ pPrt->PLinkSpeedUsed = (SK_U8)(((AuxStat & PHY_M_PS_SPEED_100) != 0) ? ++ SK_LSPEED_STAT_100MBPS : SK_LSPEED_STAT_10MBPS); + } + else { +- /* PAUSE mismatch -> no PAUSE */ +- pPrt->PFlowCtrlStatus = SK_FLOW_STAT_NONE; ++ /* set used link speed */ ++ switch ((unsigned)(AuxStat & PHY_M_PS_SPEED_MSK)) { ++ case (unsigned)PHY_M_PS_SPEED_1000: ++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_1000MBPS; ++ break; ++ case PHY_M_PS_SPEED_100: ++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_100MBPS; ++ break; ++ default: ++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_10MBPS; ++ } ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ /* Tx & Rx Pause Enabled bits are at 9..8 */ ++ AuxStat >>= 6; ++ ++ if (!pAC->GIni.GICopperType) { ++ /* always 1000 Mbps on fiber */ ++ pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_1000MBPS; ++ } ++ } ++ ++ AuxStat &= PHY_M_PS_PAUSE_MSK; ++ /* We are using IEEE 802.3z/D5.0 Table 37-4 */ ++ if (AuxStat == PHY_M_PS_PAUSE_MSK) { ++ /* Symmetric PAUSE */ ++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_SYMMETRIC; ++ } ++ else if (AuxStat == PHY_M_PS_RX_P_EN) { ++ /* enable PAUSE receive, disable PAUSE transmit */ ++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND; ++ } ++ else if (AuxStat == PHY_M_PS_TX_P_EN) { ++ /* disable PAUSE receive, enable PAUSE transmit */ ++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND; ++ } ++ else { ++ /* PAUSE mismatch -> no PAUSE */ ++ pPrt->PFlowCtrlStatus = SK_FLOW_STAT_NONE; ++ } + } +- +- /* set used link speed */ +- switch ((unsigned)(AuxStat & PHY_M_PS_SPEED_MSK)) { +- case (unsigned)PHY_M_PS_SPEED_1000: +- pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_1000MBPS; +- break; +- case PHY_M_PS_SPEED_100: +- pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_100MBPS; +- break; +- default: +- pPrt->PLinkSpeedUsed = (SK_U8)SK_LSPEED_STAT_10MBPS; ++ ++ if ((pPrt->PFlowCtrlStatus == SK_FLOW_STAT_NONE) || ++ /* disable Pause also for 10/100 Mbps in half duplex mode */ ++ ((pPrt->PLinkSpeedUsed < (SK_U8)SK_LSPEED_STAT_1000MBPS) && ++ pPrt->PLinkModeStatus == (SK_U8)SK_LMODE_STAT_AUTOHALF)) { ++ ++ /* set Pause Off */ ++ PauseMode = (SK_U8)GMC_PAUSE_OFF; + } + ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_CTRL), PauseMode); ++ + return(SK_AND_OK); + } /* SkGmAutoNegDoneMarv */ + #endif /* YUKON */ +@@ -3233,8 +3620,8 @@ + * SK_AND_OTHER Other error happened + */ + static int SkXmAutoNegDoneLone( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -3253,8 +3640,7 @@ + + if ((LPAb & PHY_L_AN_RF) != 0) { + /* Remote fault bit is set */ +- /* Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("AutoNegFail: Remote fault bit set Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; + return(SK_AND_OTHER); +@@ -3267,11 +3653,11 @@ + else { + pPrt->PLinkModeStatus = (SK_U8)SK_LMODE_STAT_AUTOHALF; + } +- ++ + /* Check Master/Slave resolution */ + if ((ResAb & PHY_L_1000S_MSF) != 0) { + /* Error */ +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("Master/Slave Fault Port %d\n", Port)); + pPrt->PAutoNegFail = SK_TRUE; + pPrt->PMSStatus = SK_MS_STAT_FAULT; +@@ -3288,7 +3674,7 @@ + /* We are using IEEE 802.3z/D5.0 Table 37-4 */ + /* we must manually resolve the abilities here */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_NONE; +- ++ + switch (pPrt->PFlowCtrlMode) { + case SK_FLOW_MODE_NONE: + /* default */ +@@ -3296,7 +3682,7 @@ + case SK_FLOW_MODE_LOC_SEND: + if ((QuickStat & (PHY_L_QS_PAUSE | PHY_L_QS_AS_PAUSE)) == + (PHY_L_QS_PAUSE | PHY_L_QS_AS_PAUSE)) { +- /* Disable PAUSE receive, enable PAUSE transmit */ ++ /* disable PAUSE receive, enable PAUSE transmit */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_LOC_SEND; + } + break; +@@ -3309,7 +3695,7 @@ + case SK_FLOW_MODE_SYM_OR_REM: + if ((QuickStat & (PHY_L_QS_PAUSE | PHY_L_QS_AS_PAUSE)) == + PHY_L_QS_AS_PAUSE) { +- /* Enable PAUSE receive, disable PAUSE transmit */ ++ /* enable PAUSE receive, disable PAUSE transmit */ + pPrt->PFlowCtrlStatus = SK_FLOW_STAT_REM_SEND; + } + else if ((QuickStat & PHY_L_QS_PAUSE) != 0) { +@@ -3321,7 +3707,7 @@ + SK_ERR_LOG(pAC, SK_ERRCL_SW | SK_ERRCL_INIT, SKERR_HWI_E016, + SKERR_HWI_E016MSG); + } +- ++ + return(SK_AND_OK); + } /* SkXmAutoNegDoneLone */ + +@@ -3339,8 +3725,8 @@ + * SK_AND_OTHER Other error happened + */ + static int SkXmAutoNegDoneNat( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + /* todo: National */ +@@ -3360,9 +3746,9 @@ + * SK_AND_DUP_CAP Duplex capability error happened + * SK_AND_OTHER Other error happened + */ +-int SkMacAutoNegDone( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++int SkMacAutoNegDone( ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -3374,9 +3760,9 @@ + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + switch (pPrt->PhyType) { +- ++ + case SK_PHY_XMAC: + Rtv = SkXmAutoNegDoneXmac(pAC, IoC, Port); + break; +@@ -3396,26 +3782,26 @@ + } + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + Rtv = SkGmAutoNegDoneMarv(pAC, IoC, Port); + } + #endif /* YUKON */ +- ++ + if (Rtv != SK_AND_OK) { + return(Rtv); + } + + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("AutoNeg done Port %d\n", Port)); +- ++ + /* We checked everything and may now enable the link */ + pPrt->PAutoNegFail = SK_FALSE; + + SkMacRxTxEnable(pAC, IoC, Port); +- ++ + return(SK_AND_OK); + } /* SkMacAutoNegDone */ + +@@ -3433,7 +3819,7 @@ + */ + static void SkXmSetRxTxEn( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + int Para) /* Parameter to set: MAC or PHY LoopBack, Duplex Mode */ + { +@@ -3458,7 +3844,7 @@ + Word &= ~XM_MMU_GMII_LOOP; + break; + } +- ++ + switch (Para & (SK_PHY_FULLD_ON | SK_PHY_FULLD_OFF)) { + case SK_PHY_FULLD_ON: + Word |= XM_MMU_GMII_FD; +@@ -3467,7 +3853,7 @@ + Word &= ~XM_MMU_GMII_FD; + break; + } +- ++ + XM_OUT16(IoC, Port, XM_MMU_CMD, Word | XM_MMU_ENA_RX | XM_MMU_ENA_TX); + + /* dummy read to ensure writing */ +@@ -3490,12 +3876,12 @@ + */ + static void SkGmSetRxTxEn( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + int Para) /* Parameter to set: MAC LoopBack, Duplex Mode */ + { + SK_U16 Ctrl; +- ++ + GM_IN16(IoC, Port, GM_GP_CTRL, &Ctrl); + + switch (Para & (SK_MAC_LOOPB_ON | SK_MAC_LOOPB_OFF)) { +@@ -3515,12 +3901,13 @@ + Ctrl &= ~GM_GPCR_DUP_FULL; + break; + } +- +- GM_OUT16(IoC, Port, GM_GP_CTRL, (SK_U16)(Ctrl | GM_GPCR_RX_ENA | +- GM_GPCR_TX_ENA)); + ++ GM_OUT16(IoC, Port, GM_GP_CTRL, Ctrl | GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); ++ ++#ifdef XXX + /* dummy read to ensure writing */ + GM_IN16(IoC, Port, GM_GP_CTRL, &Ctrl); ++#endif /* XXX */ + + } /* SkGmSetRxTxEn */ + #endif /* YUKON */ +@@ -3537,20 +3924,20 @@ + */ + void SkMacSetRxTxEn( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + int Para) + { + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + SkXmSetRxTxEn(pAC, IoC, Port, Para); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + SkGmSetRxTxEn(pAC, IoC, Port, Para); + } + #endif /* YUKON */ +@@ -3570,8 +3957,8 @@ + * != 0 Error happened + */ + int SkMacRxTxEnable( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -3589,9 +3976,9 @@ + } + + if ((pPrt->PLinkMode == SK_LMODE_AUTOHALF || +- pPrt->PLinkMode == SK_LMODE_AUTOFULL || +- pPrt->PLinkMode == SK_LMODE_AUTOBOTH) && +- pPrt->PAutoNegFail) { ++ pPrt->PLinkMode == SK_LMODE_AUTOFULL || ++ pPrt->PLinkMode == SK_LMODE_AUTOBOTH) && ++ pPrt->PAutoNegFail) { + /* Auto-negotiation is not done or failed */ + return(0); + } +@@ -3600,9 +3987,9 @@ + if (pAC->GIni.GIGenesis) { + /* set Duplex Mode and Pause Mode */ + SkXmInitDupMd(pAC, IoC, Port); +- ++ + SkXmInitPauseMd(pAC, IoC, Port); +- ++ + /* + * Initialize the Interrupt Mask Register. Default IRQs are... + * - Link Asynchronous Event +@@ -3618,23 +4005,23 @@ + /* add IRQ for Receive FIFO Overflow */ + IntMask &= ~XM_IS_RXF_OV; + #endif /* DEBUG */ +- ++ + if (pPrt->PhyType != SK_PHY_XMAC) { + /* disable GP0 interrupt bit */ + IntMask |= XM_IS_INP_ASS; + } + XM_OUT16(IoC, Port, XM_IMSK, IntMask); +- ++ + /* get MMU Command Reg. */ + XM_IN16(IoC, Port, XM_MMU_CMD, &Reg); +- ++ + if (pPrt->PhyType != SK_PHY_XMAC && + (pPrt->PLinkModeStatus == SK_LMODE_STAT_FULL || + pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOFULL)) { + /* set to Full Duplex */ + Reg |= XM_MMU_GMII_FD; + } +- ++ + switch (pPrt->PhyType) { + case SK_PHY_BCOM: + /* +@@ -3644,7 +4031,7 @@ + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &SWord); + SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, + (SK_U16)(SWord & ~PHY_B_AC_DIS_PM)); +- SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_INT_MASK, ++ SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_INT_MASK, + (SK_U16)PHY_B_DEF_MSK); + break; + #ifdef OTHER_PHY +@@ -3658,12 +4045,12 @@ + break; + #endif /* OTHER_PHY */ + } +- ++ + /* enable Rx/Tx */ + XM_OUT16(IoC, Port, XM_MMU_CMD, Reg | XM_MMU_ENA_RX | XM_MMU_ENA_TX); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { + /* +@@ -3678,30 +4065,30 @@ + /* add IRQ for Receive FIFO Overrun */ + IntMask |= GM_IS_RX_FF_OR; + #endif /* DEBUG */ +- +- SK_OUT8(IoC, GMAC_IRQ_MSK, (SK_U8)IntMask); +- ++ ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_IRQ_MSK), (SK_U8)IntMask); ++ + /* get General Purpose Control */ + GM_IN16(IoC, Port, GM_GP_CTRL, &Reg); +- ++ + if (pPrt->PLinkModeStatus == SK_LMODE_STAT_FULL || + pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOFULL) { + /* set to Full Duplex */ + Reg |= GM_GPCR_DUP_FULL; + } +- ++ + /* enable Rx/Tx */ +- GM_OUT16(IoC, Port, GM_GP_CTRL, (SK_U16)(Reg | GM_GPCR_RX_ENA | +- GM_GPCR_TX_ENA)); ++ GM_OUT16(IoC, Port, GM_GP_CTRL, Reg | GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); + +-#ifndef VCPU +- /* Enable all PHY interrupts */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, +- (SK_U16)PHY_M_DEF_MSK); +-#endif /* VCPU */ ++#ifdef XXX ++ /* dummy read to ensure writing */ ++ GM_IN16(IoC, Port, GM_GP_CTRL, &Reg); ++#endif /* XXX */ + } + #endif /* YUKON */ +- ++ ++ pAC->GIni.GP[Port].PState = SK_PRT_RUN; ++ + return(0); + + } /* SkMacRxTxEnable */ +@@ -3717,33 +4104,38 @@ + */ + void SkMacRxTxDisable( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_U16 Word; + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + XM_IN16(IoC, Port, XM_MMU_CMD, &Word); +- +- XM_OUT16(IoC, Port, XM_MMU_CMD, Word & ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX)); +- ++ ++ Word &= ~(XM_MMU_ENA_RX | XM_MMU_ENA_TX); ++ ++ XM_OUT16(IoC, Port, XM_MMU_CMD, Word); ++ + /* dummy read to ensure writing */ + XM_IN16(IoC, Port, XM_MMU_CMD, &Word); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { +- ++ + GM_IN16(IoC, Port, GM_GP_CTRL, &Word); + +- GM_OUT16(IoC, Port, GM_GP_CTRL, (SK_U16)(Word & ~(GM_GPCR_RX_ENA | +- GM_GPCR_TX_ENA))); ++ Word &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); + ++ GM_OUT16(IoC, Port, GM_GP_CTRL, Word); ++ ++#ifdef XXX + /* dummy read to ensure writing */ + GM_IN16(IoC, Port, GM_GP_CTRL, &Word); ++#endif /* XXX */ + } + #endif /* YUKON */ + +@@ -3760,7 +4152,7 @@ + */ + void SkMacIrqDisable( + SK_AC *pAC, /* Adapter Context */ +-SK_IOC IoC, /* IO context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -3772,18 +4164,18 @@ + + #ifdef GENESIS + if (pAC->GIni.GIGenesis) { +- ++ + /* disable all XMAC IRQs */ +- XM_OUT16(IoC, Port, XM_IMSK, 0xffff); +- +- /* Disable all PHY interrupts */ ++ XM_OUT16(IoC, Port, XM_IMSK, 0xffff); ++ ++ /* disable all PHY interrupts */ + switch (pPrt->PhyType) { + case SK_PHY_BCOM: + /* Make sure that PHY is initialized */ + if (pPrt->PState != SK_PRT_RESET) { + /* NOT allowed if BCOM is in RESET state */ + /* Workaround BCOM Errata (#10523) all BCom */ +- /* Disable Power Management if link is down */ ++ /* disable Power Management if link is down */ + SkXmPhyRead(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, &Word); + SkXmPhyWrite(pAC, IoC, Port, PHY_BCOM_AUX_CTRL, + (SK_U16)(Word | PHY_B_AC_DIS_PM)); +@@ -3802,16 +4194,16 @@ + } + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { + /* disable all GMAC IRQs */ +- SK_OUT8(IoC, GMAC_IRQ_MSK, 0); +- ++ SK_OUT8(IoC, MR_ADDR(Port, GMAC_IRQ_MSK), 0); ++ + #ifndef VCPU +- /* Disable all PHY interrupts */ ++ /* disable all PHY interrupts */ + SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_INT_MASK, 0); +-#endif /* VCPU */ ++#endif /* !VCPU */ + } + #endif /* YUKON */ + +@@ -3823,29 +4215,72 @@ + * + * SkXmSendCont() - Enable / Disable Send Continuous Mode + * +- * Description: enable / disable Send Continuous Mode on XMAC ++ * Description: enable / disable Send Continuous Mode on XMAC resp. ++ * Packet Generation on GPHY + * + * Returns: + * nothing + */ + void SkXmSendCont( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL Enable) /* Enable / Disable */ + { ++ SK_U16 Reg; ++ SK_U16 Save; + SK_U32 MdReg; + +- XM_IN32(IoC, Port, XM_MODE, &MdReg); ++ if (pAC->GIni.GIGenesis) { ++ XM_IN32(IoC, Port, XM_MODE, &MdReg); + +- if (Enable) { +- MdReg |= XM_MD_TX_CONT; ++ if (Enable) { ++ MdReg |= XM_MD_TX_CONT; ++ } ++ else { ++ MdReg &= ~XM_MD_TX_CONT; ++ } ++ /* setup Mode Register */ ++ XM_OUT32(IoC, Port, XM_MODE, MdReg); + } + else { +- MdReg &= ~XM_MD_TX_CONT; ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) { ++ /* select page 18 */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_ADDR, 18); ++ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PAGE_DATA, &Reg); ++ ++ Reg &= ~0x003c; /* clear bits 5..2 */ ++ ++ if (Enable) { ++ /* enable packet generation, 1518 byte length */ ++ Reg |= (BIT_5S | BIT_3S); ++ } ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, Reg); ++ } ++ else if (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL) { ++ /* save page register */ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_EXT_ADR, &Save); ++ ++ /* select page 6 to access Packet Generation register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 6); ++ ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Reg); ++ ++ Reg &= ~0x003f; /* clear bits 5..0 */ ++ ++ if (Enable) { ++ /* enable packet generation, 1518 byte length */ ++ Reg |= (BIT_3S | BIT_1S); ++ } ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Reg); ++ ++ /* restore page register */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, Save); ++ } + } +- /* setup Mode Register */ +- XM_OUT32(IoC, Port, XM_MODE, MdReg); + + } /* SkXmSendCont */ + +@@ -3860,8 +4295,8 @@ + * nothing + */ + void SkMacTimeStamp( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL Enable) /* Enable / Disable */ + { +@@ -3906,8 +4341,8 @@ + * is set true. + */ + void SkXmAutoNegLipaXmac( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_U16 IStatus) /* Interrupt Status word to analyse */ + { +@@ -3921,6 +4356,7 @@ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("AutoNegLipa: AutoNeg detected on Port %d, IStatus=0x%04X\n", + Port, IStatus)); ++ + pPrt->PLipaAutoNeg = SK_LIPA_AUTO; + } + } /* SkXmAutoNegLipaXmac */ +@@ -3936,8 +4372,8 @@ + * is set true. + */ + void SkMacAutoNegLipaPhy( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_U16 PhyStat) /* PHY Status word to analyse */ + { +@@ -3951,6 +4387,7 @@ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("AutoNegLipa: AutoNeg detected on Port %d, PhyStat=0x%04X\n", + Port, PhyStat)); ++ + pPrt->PLipaAutoNeg = SK_LIPA_AUTO; + } + } /* SkMacAutoNegLipaPhy */ +@@ -3965,7 +4402,7 @@ + * + * Note: + * With an external PHY, some interrupt bits are not meaningfull any more: +- * - LinkAsyncEvent (bit #14) XM_IS_LNK_AE ++ * - LinkAsyncEvent (bit #14) XM_IS_LNK_AE + * - LinkPartnerReqConfig (bit #10) XM_IS_LIPA_RC + * - Page Received (bit #9) XM_IS_RX_PAGE + * - NextPageLoadedForXmt (bit #8) XM_IS_TX_PAGE +@@ -3977,8 +4414,8 @@ + * nothing + */ + void SkXmIrq( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -3986,13 +4423,13 @@ + SK_U16 IStatus; /* Interrupt status read from the XMAC */ + SK_U16 IStatus2; + #ifdef SK_SLIM +- SK_U64 OverflowStatus; +-#endif ++ SK_U64 OverflowStatus; ++#endif + + pPrt = &pAC->GIni.GP[Port]; +- ++ + XM_IN16(IoC, Port, XM_ISRC, &IStatus); +- ++ + /* LinkPartner Auto-negable? */ + if (pPrt->PhyType == SK_PHY_XMAC) { + SkXmAutoNegLipaXmac(pAC, IoC, Port, IStatus); +@@ -4003,7 +4440,7 @@ + XM_IS_RX_PAGE | XM_IS_TX_PAGE | + XM_IS_AND | XM_IS_INP_ASS); + } +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, + ("XmacIrq Port %d Isr 0x%04X\n", Port, IStatus)); + +@@ -4113,40 +4550,40 @@ + * nothing + */ + void SkGmIrq( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; + SK_U8 IStatus; /* Interrupt status */ + #ifdef SK_SLIM +- SK_U64 OverflowStatus; ++ SK_U64 OverflowStatus; + #else + SK_EVPARA Para; +-#endif ++#endif + + pPrt = &pAC->GIni.GP[Port]; +- +- SK_IN8(IoC, GMAC_IRQ_SRC, &IStatus); +- ++ ++ SK_IN8(IoC, MR_ADDR(Port, GMAC_IRQ_SRC), &IStatus); ++ + #ifdef XXX + /* LinkPartner Auto-negable? */ + SkMacAutoNegLipaPhy(pAC, IoC, Port, IStatus); + #endif /* XXX */ +- ++ + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_IRQ, +- ("GmacIrq Port %d Isr 0x%04X\n", Port, IStatus)); ++ ("GmacIrq Port %d Isr 0x%02X\n", Port, IStatus)); + + /* Combined Tx & Rx Counter Overflow SIRQ Event */ + if (IStatus & (GM_IS_RX_CO_OV | GM_IS_TX_CO_OV)) { + /* these IRQs will be cleared by reading GMACs register */ + #ifdef SK_SLIM +- SkGmOverflowStatus(pAC, IoC, Port, IStatus, &OverflowStatus); ++ SkGmOverflowStatus(pAC, IoC, Port, (SK_U16)IStatus, &OverflowStatus); + #else + Para.Para32[0] = (SK_U32)Port; + Para.Para32[1] = (SK_U32)IStatus; + SkPnmiEvent(pAC, IoC, SK_PNMI_EVT_SIRQ_OVERFLOW, Para); +-#endif ++#endif + } + + if (IStatus & GM_IS_RX_FF_OR) { +@@ -4185,8 +4622,8 @@ + * nothing + */ + void SkMacIrq( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port) /* Port Index (MAC_1 + n) */ + { + #ifdef GENESIS +@@ -4195,7 +4632,7 @@ + SkXmIrq(pAC, IoC, Port); + } + #endif /* GENESIS */ +- ++ + #ifdef YUKON + if (pAC->GIni.GIYukon) { + /* IRQ from GMAC */ +@@ -4222,8 +4659,8 @@ + * 1: something went wrong + */ + int SkXmUpdateStats( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port) /* Port Index (MAC_1 + n) */ + { + SK_GEPORT *pPrt; +@@ -4245,7 +4682,7 @@ + do { + + XM_IN16(IoC, Port, XM_STAT_CMD, &StatReg); +- ++ + if (++WaitIndex > 10) { + + SK_ERR_LOG(pAC, SK_ERRCL_HW, SKERR_HWI_E021, SKERR_HWI_E021MSG); +@@ -4253,7 +4690,7 @@ + return(1); + } + } while ((StatReg & (XM_SC_SNP_TXC | XM_SC_SNP_RXC)) != 0); +- ++ + return(0); + } /* SkXmUpdateStats */ + +@@ -4272,19 +4709,19 @@ + * 1: something went wrong + */ + int SkXmMacStatistic( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port, /* Port Index (MAC_1 + n) */ + SK_U16 StatAddr, /* MIB counter base address */ +-SK_U32 SK_FAR *pVal) /* ptr to return statistic value */ ++SK_U32 SK_FAR *pVal) /* Pointer to return statistic value */ + { + if ((StatAddr < XM_TXF_OK) || (StatAddr > XM_RXF_MAX_SZ)) { +- ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E022, SKERR_HWI_E022MSG); +- ++ + return(1); + } +- ++ + XM_IN32(IoC, Port, StatAddr, pVal); + + return(0); +@@ -4303,12 +4740,12 @@ + * 1: something went wrong + */ + int SkXmResetCounter( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port) /* Port Index (MAC_1 + n) */ + { + XM_OUT16(IoC, Port, XM_STAT_CMD, XM_SC_CLR_RXC | XM_SC_CLR_TXC); +- /* Clear two times according to Errata #3 */ ++ /* Clear two times according to XMAC Errata #3 */ + XM_OUT16(IoC, Port, XM_STAT_CMD, XM_SC_CLR_RXC | XM_SC_CLR_TXC); + + return(0); +@@ -4335,11 +4772,11 @@ + * 1: something went wrong + */ + int SkXmOverflowStatus( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port, /* Port Index (MAC_1 + n) */ +-SK_U16 IStatus, /* Interupt Status from MAC */ +-SK_U64 SK_FAR *pStatus) /* ptr for return overflow status value */ ++SK_U16 IStatus, /* Interrupt Status from MAC */ ++SK_U64 SK_FAR *pStatus) /* Pointer for return overflow status value */ + { + SK_U64 Status; /* Overflow status */ + SK_U32 RegVal; +@@ -4351,7 +4788,7 @@ + XM_IN32(IoC, Port, XM_RX_CNT_EV, &RegVal); + Status |= (SK_U64)RegVal << 32; + } +- ++ + if ((IStatus & XM_IS_TXC_OV) != 0) { + + XM_IN32(IoC, Port, XM_TX_CNT_EV, &RegVal); +@@ -4378,8 +4815,8 @@ + * 1: something went wrong + */ + int SkGmUpdateStats( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port) /* Port Index (MAC_1 + n) */ + { + return(0); +@@ -4400,24 +4837,27 @@ + * 1: something went wrong + */ + int SkGmMacStatistic( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port, /* Port Index (MAC_1 + n) */ + SK_U16 StatAddr, /* MIB counter base address */ +-SK_U32 SK_FAR *pVal) /* ptr to return statistic value */ ++SK_U32 SK_FAR *pVal) /* Pointer to return statistic value */ + { + + if ((StatAddr < GM_RXF_UC_OK) || (StatAddr > GM_TXE_FIFO_UR)) { +- ++ + SK_ERR_LOG(pAC, SK_ERRCL_SW, SKERR_HWI_E022, SKERR_HWI_E022MSG); +- +- SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, + ("SkGmMacStat: wrong MIB counter 0x%04X\n", StatAddr)); + return(1); + } +- ++ + GM_IN32(IoC, Port, StatAddr, pVal); + ++ /* dummy read */ ++ SK_IN16(IoC, B0_RAP, &StatAddr); ++ + return(0); + } /* SkGmMacStatistic */ + +@@ -4434,8 +4874,8 @@ + * 1: something went wrong + */ + int SkGmResetCounter( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port) /* Port Index (MAC_1 + n) */ + { + SK_U16 Reg; /* Phy Address Register */ +@@ -4446,16 +4886,16 @@ + + /* set MIB Clear Counter Mode */ + GM_OUT16(IoC, Port, GM_PHY_ADDR, Reg | GM_PAR_MIB_CLR); +- ++ + /* read all MIB Counters with Clear Mode set */ + for (i = 0; i < GM_MIB_CNT_SIZE; i++) { + /* the reset is performed only when the lower 16 bits are read */ + GM_IN16(IoC, Port, GM_MIB_CNT_BASE + 8*i, &Word); + } +- ++ + /* clear MIB Clear Counter Mode */ + GM_OUT16(IoC, Port, GM_PHY_ADDR, Reg); +- ++ + return(0); + } /* SkGmResetCounter */ + +@@ -4469,48 +4909,62 @@ + * resulting counter overflow status is written to , whereas the + * the following bit coding is used: + * 63:56 - unused +- * 55:48 - TxRx interrupt register bit7:0 +- * 32:47 - Rx interrupt register ++ * 55:48 - TxRx interrupt register bit 7:0 ++ * 47:32 - Rx interrupt register + * 31:24 - unused +- * 23:16 - TxRx interrupt register bit15:8 +- * 15:0 - Tx interrupt register ++ * 23:16 - TxRx interrupt register bit 15:8 ++ * 15: 0 - Tx interrupt register + * + * Returns: + * 0: success + * 1: something went wrong + */ + int SkGmOverflowStatus( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + unsigned int Port, /* Port Index (MAC_1 + n) */ +-SK_U16 IStatus, /* Interupt Status from MAC */ +-SK_U64 SK_FAR *pStatus) /* ptr for return overflow status value */ ++SK_U16 IStatus, /* Interrupt Status from MAC */ ++SK_U64 SK_FAR *pStatus) /* Pointer for return overflow status value */ + { +- SK_U64 Status; /* Overflow status */ + SK_U16 RegVal; ++#ifndef SK_SLIM ++ SK_U64 Status; /* Overflow status */ + + Status = 0; ++#endif /* !SK_SLIM */ + + if ((IStatus & GM_IS_RX_CO_OV) != 0) { + /* this register is self-clearing after read */ + GM_IN16(IoC, Port, GM_RX_IRQ_SRC, &RegVal); ++ ++#ifndef SK_SLIM + Status |= (SK_U64)RegVal << 32; ++#endif /* !SK_SLIM */ + } +- ++ + if ((IStatus & GM_IS_TX_CO_OV) != 0) { + /* this register is self-clearing after read */ + GM_IN16(IoC, Port, GM_TX_IRQ_SRC, &RegVal); ++ ++#ifndef SK_SLIM + Status |= (SK_U64)RegVal; ++#endif /* !SK_SLIM */ + } +- ++ + /* this register is self-clearing after read */ + GM_IN16(IoC, Port, GM_TR_IRQ_SRC, &RegVal); ++ ++#ifndef SK_SLIM + /* Rx overflow interrupt register bits (LoByte)*/ + Status |= (SK_U64)((SK_U8)RegVal) << 48; + /* Tx overflow interrupt register bits (HiByte)*/ + Status |= (SK_U64)(RegVal >> 8) << 16; + + *pStatus = Status; ++#endif /* !SK_SLIM */ ++ ++ /* dummy read */ ++ SK_IN16(IoC, B0_RAP, &RegVal); + + return(0); + } /* SkGmOverflowStatus */ +@@ -4526,57 +4980,114 @@ + * gets the results if 'StartTest' is true + * + * NOTE: this test is meaningful only when link is down +- * ++ * + * Returns: + * 0: success + * 1: no YUKON copper + * 2: test in progress + */ + int SkGmCableDiagStatus( +-SK_AC *pAC, /* adapter context */ +-SK_IOC IoC, /* IO context */ ++SK_AC *pAC, /* Adapter Context */ ++SK_IOC IoC, /* I/O Context */ + int Port, /* Port Index (MAC_1 + n) */ + SK_BOOL StartTest) /* flag for start / get result */ + { + int i; ++ int CableDiagOffs; ++ int MdiPairs; ++ SK_BOOL FastEthernet; ++ SK_BOOL Yukon2; + SK_U16 RegVal; + SK_GEPORT *pPrt; + + pPrt = &pAC->GIni.GP[Port]; + + if (pPrt->PhyType != SK_PHY_MARV_COPPER) { +- ++ + return(1); + } + ++ Yukon2 = (pAC->GIni.GIChipId == CHIP_ID_YUKON_XL); ++ ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_FE) { ++ ++ CableDiagOffs = PHY_MARV_FE_VCT_TX; ++ FastEthernet = SK_TRUE; ++ MdiPairs = 2; ++ } ++ else { ++ CableDiagOffs = Yukon2 ? PHY_MARV_PHY_CTRL : PHY_MARV_CABLE_DIAG; ++ FastEthernet = SK_FALSE; ++ MdiPairs = 4; ++ } ++ + if (StartTest) { ++ ++ /* set to RESET to avoid PortCheckUp */ ++ pPrt->PState = SK_PRT_RESET; ++ + /* only start the cable test */ +- if ((pPrt->PhyId1 & PHY_I1_REV_MSK) < 4) { +- /* apply TDR workaround from Marvell */ +- SkGmPhyWrite(pAC, IoC, Port, 29, 0x001e); +- +- SkGmPhyWrite(pAC, IoC, Port, 30, 0xcc00); +- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc800); +- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc400); +- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc000); +- SkGmPhyWrite(pAC, IoC, Port, 30, 0xc100); ++ if (!FastEthernet) { ++ ++ if ((((pPrt->PhyId1 & PHY_I1_MOD_NUM) >> 4) == 2) && ++ ((pPrt->PhyId1 & PHY_I1_REV_MSK) < 4)) { ++ /* apply TDR workaround for model 2, rev. < 4 */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_ADDR, 0x001e); ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xcc00); ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc800); ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc400); ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc000); ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PAGE_DATA, 0xc100); ++ } ++ ++#ifdef YUKON_DBG ++ if (pAC->GIni.GIChipId == CHIP_ID_YUKON_EC) { ++ /* set address to 1 for page 1 */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 1); ++ ++ /* disable waiting period */ ++ SkGmPhyWrite(pAC, IoC, Port, CableDiagOffs, ++ PHY_M_CABD_DIS_WAIT); ++ } ++#endif ++ if (Yukon2) { ++ /* set address to 5 for page 5 */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 5); ++ ++#ifdef YUKON_DBG ++ /* disable waiting period */ ++ SkGmPhyWrite(pAC, IoC, Port, CableDiagOffs + 1, ++ PHY_M_CABD_DIS_WAIT); ++#endif ++ } ++ else { ++ /* set address to 0 for MDI[0] (Page 0) */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 0); ++ } + } ++ else { ++ RegVal = PHY_CT_RESET | PHY_CT_SP100; + +- /* set address to 0 for MDI[0] */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, 0); ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, RegVal); + +- /* Read Cable Diagnostic Reg */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CABLE_DIAG, &RegVal); ++#ifdef xYUKON_DBG ++ SkGmPhyRead(pAC, IoC, Port, PHY_MARV_FE_SPEC_2, &RegVal); ++ /* disable waiting period */ ++ RegVal |= PHY_M_FESC_DIS_WAIT; ++ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_FE_SPEC_2, RegVal); ++#endif ++ } + + /* start Cable Diagnostic Test */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CABLE_DIAG, +- (SK_U16)(RegVal | PHY_M_CABD_ENA_TEST)); +- ++ SkGmPhyWrite(pAC, IoC, Port, CableDiagOffs, PHY_M_CABD_ENA_TEST); ++ + return(0); + } +- ++ + /* Read Cable Diagnostic Reg */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CABLE_DIAG, &RegVal); ++ SkGmPhyRead(pAC, IoC, Port, CableDiagOffs, &RegVal); + + SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_CTRL, + ("PHY Cable Diag.=0x%04X\n", RegVal)); +@@ -4587,16 +5098,24 @@ + } + + /* get the test results */ +- for (i = 0; i < 4; i++) { +- /* set address to i for MDI[i] */ +- SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, (SK_U16)i); ++ for (i = 0; i < MdiPairs; i++) { ++ ++ if (!FastEthernet && !Yukon2) { ++ /* set address to i for MDI[i] */ ++ SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_EXT_ADR, (SK_U16)i); ++ } + + /* get Cable Diagnostic values */ +- SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CABLE_DIAG, &RegVal); ++ SkGmPhyRead(pAC, IoC, Port, CableDiagOffs, &RegVal); + + pPrt->PMdiPairLen[i] = (SK_U8)(RegVal & PHY_M_CABD_DIST_MSK); + + pPrt->PMdiPairSts[i] = (SK_U8)((RegVal & PHY_M_CABD_STAT_MSK) >> 13); ++ ++ if (FastEthernet || Yukon2) { ++ /* get next register */ ++ CableDiagOffs++; ++ } + } + + return(0); +@@ -4605,3 +5124,4 @@ + #endif /* YUKON */ + + /* End of file */ ++ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/sky2.c linux-2.6.9.new/drivers/net/sk98lin/sky2.c +--- linux-2.6.9.old/drivers/net/sk98lin/sky2.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/sky2.c 2006-12-07 14:35:03.000000000 +0800 +@@ -0,0 +1,2737 @@ ++/****************************************************************************** ++ * ++ * Name: sky2.c ++ * Project: Yukon2 specific functions and implementations ++ * Version: $Revision: 1.35.2.33 $ ++ * Date: $Date: 2005/06/17 14:09:32 $ ++ * Purpose: The main driver source module ++ * ++ *****************************************************************************/ ++ ++/****************************************************************************** ++ * ++ * (C)Copyright 1998-2002 SysKonnect GmbH. ++ * (C)Copyright 2002-2005 Marvell. ++ * ++ * Driver for Marvell Yukon/2 chipset and SysKonnect Gigabit Ethernet ++ * Server Adapters. ++ * ++ * Author: Ralph Roesler (rroesler@syskonnect.de) ++ * Mirko Lindner (mlindner@syskonnect.de) ++ * ++ * Address all question to: linux@syskonnect.de ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * The information in this file is provided "AS IS" without warranty. ++ * ++ *****************************************************************************/ ++ ++#include "h/skdrv1st.h" ++#include "h/skdrv2nd.h" ++#include ++ ++/****************************************************************************** ++ * ++ * Local Function Prototypes ++ * ++ *****************************************************************************/ ++ ++static void InitPacketQueues(SK_AC *pAC,int Port); ++static void GiveTxBufferToHw(SK_AC *pAC,SK_IOC IoC,int Port); ++static void GiveRxBufferToHw(SK_AC *pAC,SK_IOC IoC,int Port,SK_PACKET *pPacket); ++static void FillReceiveTableYukon2(SK_AC *pAC,SK_IOC IoC,int Port); ++static SK_BOOL HandleReceives(SK_AC *pAC,int Port,SK_U16 Len,SK_U32 FrameStatus,SK_U16 Tcp1,SK_U16 Tcp2,SK_U32 Tist,SK_U16 Vlan); ++static void CheckForSendComplete(SK_AC *pAC,SK_IOC IoC,int Port,SK_PKT_QUEUE *pPQ,SK_LE_TABLE *pLETab,unsigned int Done); ++static void UnmapAndFreeTxPktBuffer(SK_AC *pAC,SK_PACKET *pSkPacket,int TxPort); ++static SK_BOOL AllocateAndInitLETables(SK_AC *pAC); ++static SK_BOOL AllocatePacketBuffersYukon2(SK_AC *pAC); ++static void FreeLETables(SK_AC *pAC); ++static void FreePacketBuffers(SK_AC *pAC); ++static SK_BOOL AllocAndMapRxBuffer(SK_AC *pAC,SK_PACKET *pSkPacket,int Port); ++#ifdef CONFIG_SK98LIN_NAPI ++static SK_BOOL HandleStatusLEs(SK_AC *pAC,int *WorkDone,int WorkToDo); ++#else ++static SK_BOOL HandleStatusLEs(SK_AC *pAC); ++#endif ++ ++extern void SkGeCheckTimer (DEV_NET *pNet); ++extern void SkLocalEventQueue( SK_AC *pAC, ++ SK_U32 Class, ++ SK_U32 Event, ++ SK_U32 Param1, ++ SK_U32 Param2, ++ SK_BOOL Flag); ++extern void SkLocalEventQueue64( SK_AC *pAC, ++ SK_U32 Class, ++ SK_U32 Event, ++ SK_U64 Param, ++ SK_BOOL Flag); ++ ++/****************************************************************************** ++ * ++ * Local Variables ++ * ++ *****************************************************************************/ ++ ++#define MAX_NBR_RX_BUFFERS_IN_HW 0x15 ++static SK_U8 NbrRxBuffersInHW; ++ ++#if defined(__i386__) || defined(__x86_64__) ++#if defined(__x86_64__) ++#define FLUSH_OPC(le) ++/* #define FLUSH_OPC(le) \ */ ++/* cache0 = ((long *)(le))[0]; \ */ ++/* cache1 = ((long *)(le))[1]; \ */ ++/* ((volatile long *)(le))[0] = cache0; \ */ ++/* ((volatile long *)(le))[1] = cache1; */ ++#else ++#define FLUSH_OPC(le) ++#endif ++#else ++#define FLUSH_OPC(le) ++#endif ++ ++/****************************************************************************** ++ * ++ * Global Functions ++ * ++ *****************************************************************************/ ++ ++int SkY2Xmit( struct sk_buff *skb, struct SK_NET_DEVICE *dev); ++ ++/***************************************************************************** ++ * ++ * SkY2RestartStatusUnit - restarts teh status unit ++ * ++ * Description: ++ * Reenables the status unit after any De-Init (e.g. when altering ++ * the sie of the MTU via 'ifconfig a.b.c.d mtu xxx') ++ * ++ * Returns: N/A ++ */ ++void SkY2RestartStatusUnit( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> SkY2RestartStatusUnit\n")); ++ ++ /* ++ ** It might be that the TX timer is not started. Therefore ++ ** it is initialized here -> to be more investigated! ++ */ ++ SK_OUT32(pAC->IoBase, STAT_TX_TIMER_INI, HW_MS_TO_TICKS(pAC,10)); ++ ++ pAC->StatusLETable.Done = 0; ++ pAC->StatusLETable.Put = 0; ++ pAC->StatusLETable.HwPut = 0; ++ SkGeY2InitStatBmu(pAC, pAC->IoBase, &pAC->StatusLETable); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== SkY2RestartStatusUnit\n")); ++} ++ ++/***************************************************************************** ++ * ++ * SkY2RlmtSend - sends out a single RLMT notification ++ * ++ * Description: ++ * This function sends out an RLMT frame ++ * ++ * Returns: ++ * > 0 - on succes: the number of bytes in the message ++ * = 0 - on resource shortage: this frame sent or dropped, now ++ * the ring is full ( -> set tbusy) ++ * < 0 - on failure: other problems ( -> return failure to upper layers) ++ */ ++int SkY2RlmtSend ( ++SK_AC *pAC, /* pointer to adapter control context */ ++int PortNr, /* index of port the packet(s) shall be send to */ ++struct sk_buff *pMessage) /* pointer to send-message */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("=== SkY2RlmtSend\n")); ++#if 0 ++ return -1; // temporarily do not send out RLMT frames ++#endif ++ skb_shinfo(pMessage)->nr_frags = (2*MAX_SKB_FRAGS) + PortNr; ++ return(SkY2Xmit(pMessage, pAC->dev[PortNr])); // SkY2Xmit needs device ++} ++ ++/***************************************************************************** ++ * ++ * SkY2AllocateResources - Allocates all required resources for Yukon2 ++ * ++ * Description: ++ * This function allocates all memory needed for the Yukon2. ++ * It maps also RX buffers to the LETables and initializes the ++ * status list element table. ++ * ++ * Returns: ++ * SK_TRUE, if all resources could be allocated and setup succeeded ++ * SK_FALSE, if an error ++ */ ++SK_BOOL SkY2AllocateResources ( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ int CurrMac; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("==> SkY2AllocateResources\n")); ++ ++ /* ++ ** Initialize the packet queue variables first ++ */ ++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) { ++ InitPacketQueues(pAC, CurrMac); ++ } ++ ++ /* ++ ** Get sufficient memory for the LETables ++ */ ++ if (!AllocateAndInitLETables(pAC)) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR, ++ ("No memory for LETable.\n")); ++ return(SK_FALSE); ++ } ++ ++ /* ++ ** Allocate and intialize memory for both RX and TX ++ ** packet and fragment buffers. On an error, free ++ ** previously allocated LETable memory and quit. ++ */ ++ if (!AllocatePacketBuffersYukon2(pAC)) { ++ FreeLETables(pAC); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR, ++ ("No memory for Packetbuffers.\n")); ++ return(SK_FALSE); ++ } ++ ++ /* ++ ** Rx and Tx LE tables will be initialized in SkGeOpen() ++ ** ++ ** It might be that the TX timer is not started. Therefore ++ ** it is initialized here -> to be more investigated! ++ */ ++ SK_OUT32(pAC->IoBase, STAT_TX_TIMER_INI, HW_MS_TO_TICKS(pAC,10)); ++ SkGeY2InitStatBmu(pAC, pAC->IoBase, &pAC->StatusLETable); ++ ++ pAC->MaxUnusedRxLeWorking = MAX_UNUSED_RX_LE_WORKING; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("<== SkY2AllocateResources\n")); ++ ++ return (SK_TRUE); ++} ++ ++/***************************************************************************** ++ * ++ * SkY2FreeResources - Frees previously allocated resources of Yukon2 ++ * ++ * Description: ++ * This function frees all previously allocated memory of the Yukon2. ++ * ++ * Returns: N/A ++ */ ++void SkY2FreeResources ( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> SkY2FreeResources\n")); ++ ++ FreeLETables(pAC); ++ FreePacketBuffers(pAC); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== SkY2FreeResources\n")); ++} ++ ++/***************************************************************************** ++ * ++ * SkY2AllocateRxBuffers - Allocates the receive buffers for a port ++ * ++ * Description: ++ * This function allocated all the RX buffers of the Yukon2. ++ * ++ * Returns: N/A ++ */ ++void SkY2AllocateRxBuffers ( ++SK_AC *pAC, /* pointer to adapter control context */ ++SK_IOC IoC, /* I/O control context */ ++int Port) /* port index of RX */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("==> SkY2AllocateRxBuffers (Port %c)\n", Port)); ++ ++ FillReceiveTableYukon2(pAC, IoC, Port); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("<== SkY2AllocateRxBuffers\n")); ++} ++ ++/***************************************************************************** ++ * ++ * SkY2FreeRxBuffers - Free's all allocates RX buffers of ++ * ++ * Description: ++ * This function frees all RX buffers of the Yukon2 for a single port ++ * ++ * Returns: N/A ++ */ ++void SkY2FreeRxBuffers ( ++SK_AC *pAC, /* pointer to adapter control context */ ++SK_IOC IoC, /* I/O control context */ ++int Port) /* port index of RX */ ++{ ++ SK_PACKET *pSkPacket; ++ unsigned long Flags; /* for POP/PUSH macros */ ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> SkY2FreeRxBuffers (Port %c)\n", Port)); ++ ++ if (pAC->RxPort[Port].ReceivePacketTable != NULL) { ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket); ++ while (pSkPacket != NULL) { ++ if ((pSkPacket->pFrag) != NULL) { ++ pci_unmap_page(pAC->PciDev, ++ (dma_addr_t) pSkPacket->pFrag->pPhys, ++ pSkPacket->pFrag->FragLen - 2, ++ PCI_DMA_FROMDEVICE); ++ ++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf); ++ pSkPacket->pMBuf = NULL; ++ pSkPacket->pFrag->pPhys = (SK_U64) 0; ++ pSkPacket->pFrag->pVirt = NULL; ++ } ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket); ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket); ++ } ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== SkY2FreeRxBuffers\n")); ++} ++ ++/***************************************************************************** ++ * ++ * SkY2FreeTxBuffers - Free's any currently maintained Tx buffer ++ * ++ * Description: ++ * This function frees the TX buffers of the Yukon2 for a single port ++ * which might be in use by a transmit action ++ * ++ * Returns: N/A ++ */ ++void SkY2FreeTxBuffers ( ++SK_AC *pAC, /* pointer to adapter control context */ ++SK_IOC IoC, /* I/O control context */ ++int Port) /* port index of TX */ ++{ ++ SK_PACKET *pSkPacket; ++ SK_FRAG *pSkFrag; ++ unsigned long Flags; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> SkY2FreeTxBuffers (Port %c)\n", Port)); ++ ++ if (pAC->TxPort[Port][0].TransmitPacketTable != NULL) { ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxAQ_working, pSkPacket); ++ while (pSkPacket != NULL) { ++ if ((pSkFrag = pSkPacket->pFrag) != NULL) { ++ UnmapAndFreeTxPktBuffer(pAC, pSkPacket, Port); ++ } ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->TxPort[Port][0].TxQ_free, pSkPacket); ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxAQ_working, pSkPacket); ++ } ++#if USE_SYNC_TX_QUEUE ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxSQ_working, pSkPacket); ++ while (pSkPacket != NULL) { ++ if ((pSkFrag = pSkPacket->pFrag) != NULL) { ++ UnmapAndFreeTxPktBuffer(pAC, pSkPacket, Port); ++ } ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->TxPort[Port][0].TxQ_free, pSkPacket); ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->TxPort[Port][0].TxSQ_working, pSkPacket); ++ } ++#endif ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== SkY2FreeTxBuffers\n")); ++} ++ ++/***************************************************************************** ++ * ++ * SkY2Isr - handle a receive IRQ for all yukon2 cards ++ * ++ * Description: ++ * This function is called when a receive IRQ is set. (only for yukon2) ++ * HandleReceives does the deferred processing of all outstanding ++ * interrupt operations. ++ * ++ * Returns: N/A ++ */ ++SkIsrRetVar SkY2Isr ( ++int irq, /* the irq we have received (might be shared!) */ ++void *dev_id, /* current device id */ ++struct pt_regs *ptregs) /* not used by our driver */ ++{ ++ struct SK_NET_DEVICE *dev = (struct SK_NET_DEVICE *)dev_id; ++ DEV_NET *pNet = (DEV_NET*) dev->priv; ++ SK_AC *pAC = pNet->pAC; ++ SK_U32 IntSrc; ++ unsigned long Flags; ++#ifndef CONFIG_SK98LIN_NAPI ++ SK_BOOL handledStatLE = SK_FALSE; ++#else ++ SK_BOOL SetIntMask = SK_FALSE; ++#endif ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("==> SkY2Isr\n")); ++ ++ SK_IN32(pAC->IoBase, B0_Y2_SP_ISRC2, &IntSrc); ++ ++ if ((IntSrc == 0) && (!pNet->NetConsoleMode)){ ++ SK_OUT32(pAC->IoBase, B0_Y2_SP_ICR, 2); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("No Interrupt\n ==> SkY2Isr\n")); ++ return SkIsrRetNone; ++ ++ } ++ ++#ifdef Y2_RECOVERY ++ if (pNet->InRecover) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Already in recover\n ==> SkY2Isr\n")); ++ SK_OUT32(pAC->IoBase, B0_Y2_SP_ICR, 2); ++ return SkIsrRetNone; ++ } ++#endif ++ ++#ifdef CONFIG_SK98LIN_NAPI ++ if (netif_rx_schedule_prep(pAC->dev[0])) { ++ pAC->GIni.GIValIrqMask &= ~(Y2_IS_STAT_BMU); ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ SetIntMask = SK_TRUE; ++ __netif_rx_schedule(pAC->dev[0]); ++ } ++ ++ if (netif_rx_schedule_prep(pAC->dev[1])) { ++ if (!SetIntMask) { ++ pAC->GIni.GIValIrqMask &= ~(Y2_IS_STAT_BMU); ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ } ++ __netif_rx_schedule(pAC->dev[1]); ++ } ++#else ++ handledStatLE = HandleStatusLEs(pAC); ++#endif ++ ++ /* ++ ** Check for Special Interrupts ++ */ ++ if ((IntSrc & ~Y2_IS_STAT_BMU) || pAC->CheckQueue || pNet->TimerExpired) { ++ pAC->CheckQueue = SK_FALSE; ++ spin_lock_irqsave(&pAC->SlowPathLock, Flags); ++#ifdef Y2_RECOVERY ++ if (pNet->TimerExpired) { ++ SkGeCheckTimer(pNet); ++ } ++#endif ++ SkGeSirqIsr(pAC, pAC->IoBase, IntSrc); ++ SkEventDispatcher(pAC, pAC->IoBase); ++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); ++ } ++ ++ /* Speed enhancement for a2 chipsets */ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_42)) { ++ spin_lock_irqsave(&pAC->SetPutIndexLock, Flags); ++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_XA1,0), &pAC->TxPort[0][0].TxALET); ++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_R1,0), &pAC->RxPort[0].RxLET); ++ spin_unlock_irqrestore(&pAC->SetPutIndexLock, Flags); ++ } ++ ++ /* ++ ** Reenable interrupts and signal end of ISR ++ */ ++ SK_OUT32(pAC->IoBase, B0_Y2_SP_ICR, 2); ++ ++ /* ++ ** Stop and restart TX timer in case a Status LE was handled ++ */ ++#ifndef CONFIG_SK98LIN_NAPI ++ if ((HW_FEATURE(pAC, HWF_WA_DEV_43_418)) && (handledStatLE)) { ++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_STOP); ++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_START); ++ } ++#endif ++ ++ if (!(IS_Q_EMPTY(&(pAC->TxPort[0][TX_PRIO_LOW].TxAQ_waiting)))) { ++ GiveTxBufferToHw(pAC, pAC->IoBase, 0); ++ } ++ if (!(IS_Q_EMPTY(&(pAC->TxPort[1][TX_PRIO_LOW].TxAQ_waiting)))) { ++ GiveTxBufferToHw(pAC, pAC->IoBase, 1); ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("<== SkY2Isr\n")); ++ ++ return SkIsrRetHandled; ++} /* SkY2Isr */ ++ ++/***************************************************************************** ++ * ++ * SkY2Xmit - Linux frame transmit function for Yukon2 ++ * ++ * Description: ++ * The system calls this function to send frames onto the wire. ++ * It puts the frame in the tx descriptor ring. If the ring is ++ * full then, the 'tbusy' flag is set. ++ * ++ * Returns: ++ * 0, if everything is ok ++ * !=0, on error ++ * ++ * WARNING: ++ * returning 1 in 'tbusy' case caused system crashes (double ++ * allocated skb's) !!! ++ */ ++int SkY2Xmit( ++struct sk_buff *skb, /* socket buffer to be sent */ ++struct SK_NET_DEVICE *dev) /* via which device? */ ++{ ++ DEV_NET *pNet = (DEV_NET*) dev->priv; ++ SK_AC *pAC = pNet->pAC; ++ SK_U8 FragIdx = 0; ++ SK_PACKET *pSkPacket; ++ SK_FRAG *PrevFrag; ++ SK_FRAG *CurrFrag; ++ SK_PKT_QUEUE *pWorkQueue; /* corresponding TX queue */ ++ SK_PKT_QUEUE *pWaitQueue; ++ SK_PKT_QUEUE *pFreeQueue; ++ SK_LE_TABLE *pLETab; /* corresponding LETable */ ++ skb_frag_t *sk_frag; ++ SK_U64 PhysAddr; ++ unsigned long Flags; ++ unsigned int Port; ++ int CurrFragCtr; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("==> SkY2Xmit\n")); ++ ++ /* ++ ** Get port and return if no free packet is available ++ */ ++ if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) { ++ Port = skb_shinfo(skb)->nr_frags - (2*MAX_SKB_FRAGS); ++ skb_shinfo(skb)->nr_frags = 0; ++ } else { ++ Port = (pAC->RlmtNets == 2) ? pNet->PortNr : pAC->ActivePort; ++ } ++ ++ if (IS_Q_EMPTY(&(pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free))) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("Not free packets available for send\n")); ++ return 1; /* zero bytes sent! */ ++ } ++ ++ /* ++ ** Put any new packet to be sent in the waiting queue and ++ ** handle also any possible fragment of that packet. ++ */ ++ pWorkQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working); ++ pWaitQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting); ++ pFreeQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free); ++ pLETab = &(pAC->TxPort[Port][TX_PRIO_LOW].TxALET); ++ ++ /* ++ ** Normal send operations require only one fragment, because ++ ** only one sk_buff data area is passed. ++ ** In contradiction to this, scatter-gather (zerocopy) send ++ ** operations might pass one or more additional fragments ++ ** where each fragment needs a separate fragment info packet. ++ */ ++ if (((skb_shinfo(skb)->nr_frags + 1) * MAX_FRAG_OVERHEAD) > ++ NUM_FREE_LE_IN_TABLE(pLETab)) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("Not enough LE available for send\n")); ++ return 1; /* zero bytes sent! */ ++ } ++ ++ if ((skb_shinfo(skb)->nr_frags + 1) > MAX_NUM_FRAGS) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("Not even one fragment available for send\n")); ++ return 1; /* zero bytes sent! */ ++ } ++ ++ /* ++ ** Get first packet from free packet queue ++ */ ++ POP_FIRST_PKT_FROM_QUEUE(pFreeQueue, pSkPacket); ++ if(pSkPacket == NULL) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_TX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("Could not obtain free packet used for xmit\n")); ++ return 1; /* zero bytes sent! */ ++ } ++ ++ pSkPacket->pFrag = &(pSkPacket->FragArray[FragIdx]); ++ ++ /* ++ ** map the sk_buff to be available for the adapter ++ */ ++ PhysAddr = (SK_U64) pci_map_page(pAC->PciDev, ++ virt_to_page(skb->data), ++ ((unsigned long) skb->data & ~PAGE_MASK), ++ skb_headlen(skb), ++ PCI_DMA_TODEVICE); ++ pSkPacket->pMBuf = skb; ++ pSkPacket->pFrag->pPhys = PhysAddr; ++ pSkPacket->pFrag->FragLen = skb_headlen(skb); ++ pSkPacket->pFrag->pNext = NULL; /* initial has no next default */ ++ pSkPacket->NumFrags = skb_shinfo(skb)->nr_frags + 1; ++ ++ PrevFrag = pSkPacket->pFrag; ++ ++ /* ++ ** Each scatter-gather fragment need to be mapped... ++ */ ++ for ( CurrFragCtr = 0; ++ CurrFragCtr < skb_shinfo(skb)->nr_frags; ++ CurrFragCtr++) { ++ FragIdx++; ++ sk_frag = &skb_shinfo(skb)->frags[CurrFragCtr]; ++ CurrFrag = &(pSkPacket->FragArray[FragIdx]); ++ ++ /* ++ ** map the sk_buff to be available for the adapter ++ */ ++ PhysAddr = (SK_U64) pci_map_page(pAC->PciDev, ++ sk_frag->page, ++ sk_frag->page_offset, ++ sk_frag->size, ++ PCI_DMA_TODEVICE); ++ ++ CurrFrag->pPhys = PhysAddr; ++ CurrFrag->FragLen = sk_frag->size; ++ CurrFrag->pNext = NULL; ++ ++ /* ++ ** Add the new fragment to the list of fragments ++ */ ++ PrevFrag->pNext = CurrFrag; ++ PrevFrag = CurrFrag; ++ } ++ ++ /* ++ ** Add packet to waiting packets queue ++ */ ++ PUSH_PKT_AS_LAST_IN_QUEUE(pWaitQueue, pSkPacket); ++ GiveTxBufferToHw(pAC, pAC->IoBase, Port); ++ dev->trans_start = jiffies; ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("<== SkY2Xmit(return 0)\n")); ++ return (0); ++} /* SkY2Xmit */ ++ ++#ifdef CONFIG_SK98LIN_NAPI ++/***************************************************************************** ++ * ++ * SkY2Poll - NAPI Rx polling callback for Yukon2 chipsets ++ * ++ * Description: ++ * Called by the Linux system in case NAPI polling is activated ++ * ++ * Returns ++ * The number of work data still to be handled ++ * ++ * Notes ++ * The slowpath lock needs to be set because HW accesses may ++ * interfere with slowpath events (e.g. TWSI) ++ */ ++int SkY2Poll( ++struct net_device *dev, /* device that needs to be polled */ ++int *budget) /* how many budget do we have? */ ++{ ++ SK_AC *pAC = ((DEV_NET*)(dev->priv))->pAC; ++ int WorkToDo = min(*budget, dev->quota); ++ int WorkDone = 0; ++ SK_BOOL handledStatLE = SK_FALSE; ++ unsigned long Flags; ++ ++ spin_lock_irqsave(&pAC->SlowPathLock, Flags); ++ handledStatLE = HandleStatusLEs(pAC, &WorkDone, WorkToDo); ++ ++ *budget -= WorkDone; ++ dev->quota -= WorkDone; ++ ++ if(WorkDone < WorkToDo) { ++ netif_rx_complete(dev); ++ pAC->GIni.GIValIrqMask |= (Y2_IS_STAT_BMU); ++ SK_OUT32(pAC->IoBase, B0_IMSK, pAC->GIni.GIValIrqMask); ++ if ((HW_FEATURE(pAC, HWF_WA_DEV_43_418)) && (handledStatLE)) { ++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_STOP); ++ SK_OUT8(pAC->IoBase, STAT_TX_TIMER_CTRL, TIM_START); ++ } ++ } ++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); ++ return (WorkDone >= WorkToDo); ++} /* SkY2Poll */ ++#endif ++ ++/****************************************************************************** ++ * ++ * SkY2PortStop - stop a port on Yukon2 ++ * ++ * Description: ++ * This function stops a port of the Yukon2 chip. This stop ++ * stop needs to be performed in a specific order: ++ * ++ * a) Stop the Prefetch unit ++ * b) Stop the Port (MAC, PHY etc.) ++ * ++ * Returns: N/A ++ */ ++void SkY2PortStop( ++SK_AC *pAC, /* adapter control context */ ++SK_IOC IoC, /* I/O control context (address of adapter registers) */ ++int Port, /* port to stop (MAC_1 + n) */ ++int Dir, /* StopDirection (SK_STOP_RX, SK_STOP_TX, SK_STOP_ALL) */ ++int RstMode) /* Reset Mode (SK_SOFT_RST, SK_HARD_RST) */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> SkY2PortStop (Port %c)\n", 'A' + Port)); ++ ++ /* ++ ** Stop the HW ++ */ ++ SkGeStopPort(pAC, IoC, Port, Dir, RstMode); ++ ++ /* ++ ** Move any TX packet from work queues into the free queue again ++ ** and initialize the TX LETable variables ++ */ ++ SkY2FreeTxBuffers(pAC, pAC->IoBase, Port); ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Bmu.RxTx.TcpWp = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Bmu.RxTx.MssValue = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.BufHighAddr = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Done = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Put = 0; ++ // pAC->GIni.GP[Port].PState = SK_PRT_STOP; ++ ++ /* ++ ** Move any RX packet from work queue into the waiting queue ++ ** and initialize the RX LETable variables ++ */ ++ SkY2FreeRxBuffers(pAC, pAC->IoBase, Port); ++ pAC->RxPort[Port].RxLET.BufHighAddr = 0; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== SkY2PortStop()\n")); ++} ++ ++/****************************************************************************** ++ * ++ * SkY2PortStart - start a port on Yukon2 ++ * ++ * Description: ++ * This function starts a port of the Yukon2 chip. This start ++ * action needs to be performed in a specific order: ++ * ++ * a) Initialize the LET indices (PUT/GET to 0) ++ * b) Initialize the LET in HW (enables also prefetch unit) ++ * c) Move all RX buffers from waiting queue to working queue ++ * which involves also setting up of RX list elements ++ * d) Initialize the FIFO settings of Yukon2 (Watermark etc.) ++ * e) Initialize the Port (MAC, PHY etc.) ++ * f) Initialize the MC addresses ++ * ++ * Returns: N/A ++ */ ++void SkY2PortStart( ++SK_AC *pAC, /* adapter control context */ ++SK_IOC IoC, /* I/O control context (address of adapter registers) */ ++int Port) /* port to start */ ++{ ++ // SK_GEPORT *pPrt = &pAC->GIni.GP[Port]; ++ SK_HWLE *pLE; ++ SK_U32 DWord; ++ SK_U32 PrefetchReg; /* register for Put index */ ++#if defined(__x86_64__) ++ long cache0, cache1; ++#endif ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> SkY2PortStart (Port %c)\n", 'A' + Port)); ++ ++ /* ++ ** Initialize the LET indices ++ */ ++ pAC->RxPort[Port].RxLET.Done = 0; ++ pAC->RxPort[Port].RxLET.Put = 0; ++ pAC->RxPort[Port].RxLET.HwPut = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Done = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.Put = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxALET.HwPut = 0; ++ if (HW_SYNC_TX_SUPPORTED(pAC)) { ++ pAC->TxPort[Port][TX_PRIO_LOW].TxSLET.Done = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxSLET.Put = 0; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxSLET.HwPut = 0; ++ } ++ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) { ++ /* ++ ** It might be that we have to limit the RX buffers ++ ** effectively passed to HW. Initialize the start ++ ** value in that case... ++ */ ++ NbrRxBuffersInHW = 0; ++ } ++ ++ /* ++ ** TODO on dual net adapters we need to check if ++ ** StatusLETable need to be set... ++ ** ++ ** pAC->StatusLETable.Done = 0; ++ ** pAC->StatusLETable.Put = 0; ++ ** pAC->StatusLETable.HwPut = 0; ++ ** SkGeY2InitPrefetchUnit(pAC, pAC->IoBase, Q_ST, &pAC->StatusLETable); ++ */ ++ ++ /* ++ ** Initialize the LET in HW (enables also prefetch unit) ++ */ ++ SkGeY2InitPrefetchUnit(pAC, IoC,(Port == 0) ? Q_R1 : Q_R2, ++ &pAC->RxPort[Port].RxLET); ++ SkGeY2InitPrefetchUnit( pAC, IoC,(Port == 0) ? Q_XA1 : Q_XA2, ++ &pAC->TxPort[Port][TX_PRIO_LOW].TxALET); ++ if (HW_SYNC_TX_SUPPORTED(pAC)) { ++ SkGeY2InitPrefetchUnit( pAC, IoC, (Port == 0) ? Q_XS1 : Q_XS2, ++ &pAC->TxPort[Port][TX_PRIO_HIGH].TxSLET); ++ } ++ ++ ++ /* ++ ** Using new values for the watermarks and the timer for ++ ** low latency optimization ++ */ ++ if (pAC->LowLatency) { ++ SK_OUT8(IoC, STAT_FIFO_WM, 1); ++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 1); ++ SK_OUT32(IoC, STAT_LEV_TIMER_INI, 50); ++ SK_OUT32(IoC, STAT_ISR_TIMER_INI, 10); ++ } ++ ++ ++ /* ++ ** Initialize the Port (MAC, PHY etc.) ++ */ ++ if (SkGeInitPort(pAC, IoC, Port)) { ++ if (Port == 0) { ++ printk("%s: SkGeInitPort A failed.\n",pAC->dev[0]->name); ++ } else { ++ printk("%s: SkGeInitPort B failed.\n",pAC->dev[1]->name); ++ } ++ } ++ ++ if (IS_GMAC(pAC)) { ++ /* disable Rx GMAC FIFO Flush Mode */ ++ SK_OUT8(IoC, MR_ADDR(Port, RX_GMF_CTRL_T), (SK_U8) GMF_RX_F_FL_OFF); ++ } ++ ++ /* ++ ** Initialize the MC addresses ++ */ ++ SkAddrMcUpdate(pAC,IoC, Port); ++ ++ SkMacRxTxEnable(pAC, IoC,Port); ++ ++ if (pAC->RxPort[Port].UseRxCsum) { ++ SkGeRxCsum(pAC, IoC, Port, SK_TRUE); ++ ++ GET_RX_LE(pLE, &pAC->RxPort[Port].RxLET); ++ RXLE_SET_STACS1(pLE, pAC->CsOfs1); ++ RXLE_SET_STACS2(pLE, pAC->CsOfs2); ++ RXLE_SET_CTRL(pLE, 0); ++ ++ RXLE_SET_OPC(pLE, OP_TCPSTART | HW_OWNER); ++ FLUSH_OPC(pLE); ++ if (Port == 0) { ++ PrefetchReg=Y2_PREF_Q_ADDR(Q_R1,PREF_UNIT_PUT_IDX_REG); ++ } else { ++ PrefetchReg=Y2_PREF_Q_ADDR(Q_R2,PREF_UNIT_PUT_IDX_REG); ++ } ++ DWord = GET_PUT_IDX(&pAC->RxPort[Port].RxLET); ++ SK_OUT32(IoC, PrefetchReg, DWord); ++ UPDATE_HWPUT_IDX(&pAC->RxPort[Port].RxLET); ++ } ++ ++ pAC->GIni.GP[Port].PState = SK_PRT_RUN; ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== SkY2PortStart()\n")); ++} ++ ++/****************************************************************************** ++ * ++ * Local Functions ++ * ++ *****************************************************************************/ ++ ++/***************************************************************************** ++ * ++ * InitPacketQueues - initialize SW settings of packet queues ++ * ++ * Description: ++ * This function will initialize the packet queues for a port. ++ * ++ * Returns: N/A ++ */ ++static void InitPacketQueues( ++SK_AC *pAC, /* pointer to adapter control context */ ++int Port) /* index of port to be initialized */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("==> InitPacketQueues(Port %c)\n", 'A' + Port)); ++ ++ pAC->RxPort[Port].RxQ_working.pHead = NULL; ++ pAC->RxPort[Port].RxQ_working.pTail = NULL; ++ spin_lock_init(&pAC->RxPort[Port].RxQ_working.QueueLock); ++ ++ pAC->RxPort[Port].RxQ_waiting.pHead = NULL; ++ pAC->RxPort[Port].RxQ_waiting.pTail = NULL; ++ spin_lock_init(&pAC->RxPort[Port].RxQ_waiting.QueueLock); ++ ++ pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free.pHead = NULL; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free.pTail = NULL; ++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxQ_free.QueueLock); ++ ++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working.pHead = NULL; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working.pTail = NULL; ++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working.QueueLock); ++ ++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting.pHead = NULL; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting.pTail = NULL; ++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting.QueueLock); ++ ++#if USE_SYNC_TX_QUEUE ++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_working.pHead = NULL; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_working.pTail = NULL; ++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_working.QueueLock); ++ ++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_waiting.pHead = NULL; ++ pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_waiting.pTail = NULL; ++ spin_lock_init(&pAC->TxPort[Port][TX_PRIO_LOW].TxSQ_waiting.QueueLock); ++#endif ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("<== InitPacketQueues(Port %c)\n", 'A' + Port)); ++} /* InitPacketQueues */ ++ ++/***************************************************************************** ++ * ++ * GiveTxBufferToHw - commits a previously allocated DMA area to HW ++ * ++ * Description: ++ * This functions gives transmit buffers to HW. If no list elements ++ * are available the buffers will be queued. ++ * ++ * Notes: ++ * This function can run only once in a system at one time. ++ * ++ * Returns: N/A ++ */ ++static void GiveTxBufferToHw( ++SK_AC *pAC, /* pointer to adapter control context */ ++SK_IOC IoC, /* I/O control context (address of registers) */ ++int Port) /* port index for which the buffer is used */ ++{ ++ SK_HWLE *pLE; ++ SK_PACKET *pSkPacket; ++ SK_FRAG *pFrag; ++ SK_PKT_QUEUE *pWorkQueue; /* corresponding TX queue */ ++ SK_PKT_QUEUE *pWaitQueue; ++ SK_LE_TABLE *pLETab; /* corresponding LETable */ ++ SK_BOOL SetOpcodePacketFlag; ++ SK_U32 HighAddress; ++ SK_U32 LowAddress; ++ SK_U16 TcpSumStart; ++ SK_U16 TcpSumWrite; ++ SK_U8 OpCode; ++ SK_U8 Ctrl; ++ unsigned long Flags; ++ unsigned long LockFlag; ++ int Protocol; ++#ifdef NETIF_F_TSO ++ SK_U16 Mss; ++ int TcpOptLen; ++ int IpTcpLen; ++#endif ++#if defined(__x86_64__) ++ long cache0, cache1; ++#endif ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("==> GiveTxBufferToHw\n")); ++ ++ if (IS_Q_EMPTY(&(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting))) { ++ return; ++ } ++ ++ spin_lock_irqsave(&pAC->TxQueueLock, LockFlag); ++ ++ /* ++ ** Initialize queue settings ++ */ ++ pWorkQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_working); ++ pWaitQueue = &(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting); ++ pLETab = &(pAC->TxPort[Port][TX_PRIO_LOW].TxALET); ++ ++ POP_FIRST_PKT_FROM_QUEUE(pWaitQueue, pSkPacket); ++ while (pSkPacket != NULL) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("\tWe have a packet to send %p\n", pSkPacket)); ++ ++ /* ++ ** the first frag of a packet gets opcode OP_PACKET ++ */ ++ SetOpcodePacketFlag = SK_TRUE; ++ pFrag = pSkPacket->pFrag; ++ ++ /* ++ ** fill list elements with data from fragments ++ */ ++ while (pFrag != NULL) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("\tGet LE\n")); ++#ifdef NETIF_F_TSO ++ Mss = skb_shinfo(pSkPacket->pMBuf)->tso_size; ++ if (Mss) { ++ TcpOptLen = ((pSkPacket->pMBuf->h.th->doff - 5) * 4); ++ IpTcpLen = ((pSkPacket->pMBuf->nh.iph->ihl * 4) + ++ sizeof(struct tcphdr)); ++ Mss += (TcpOptLen + IpTcpLen + C_LEN_ETHERMAC_HEADER); ++ } ++ if (pLETab->Bmu.RxTx.MssValue != Mss) { ++ pLETab->Bmu.RxTx.MssValue = Mss; ++ /* Take a new LE for TSO from the table */ ++ GET_TX_LE(pLE, pLETab); ++ ++#if 0 ++ if(pSkPacket->VlanId) { ++ TXLE_SET_OPC(pLE, OP_LRGLENVLAN | HW_OWNER); ++ TXLE_SET_VLAN(pLE, pSkPacket->VlanId); ++ pSkPacket->VlanId = 0; ++ Ctrl |= INS_VLAN; ++ } else { ++#endif ++ TXLE_SET_OPC(pLE, OP_LRGLEN | HW_OWNER); ++#if 0 ++ } ++#endif ++ /* set maximum segment size for new packet */ ++ TXLE_SET_LSLEN(pLE, pLETab->Bmu.RxTx.MssValue); ++ FLUSH_OPC(pLE) ; ++ } ++#endif ++ GET_TX_LE(pLE, pLETab); ++ Ctrl = 0; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("\tGot empty LE %p idx %d\n", pLE, GET_PUT_IDX(pLETab))); ++ ++ SK_DBG_DUMP_TX_LE(pLE); ++ ++ LowAddress = (SK_U32) (pFrag->pPhys & 0xffffffff); ++ HighAddress = (SK_U32) (pFrag->pPhys >> 32); ++ if (HighAddress != pLETab->BufHighAddr) { ++ /* set opcode high part of the address in one LE */ ++ OpCode = OP_ADDR64 | HW_OWNER; ++ ++ /* Set now the 32 high bits of the address */ ++ TXLE_SET_ADDR( pLE, HighAddress); ++ ++ /* Set the opcode into the LE */ ++ TXLE_SET_OPC(pLE, OpCode); ++ ++ /* Flush the LE to memory */ ++ FLUSH_OPC(pLE); ++ ++ /* remember the HighAddress we gave to the Hardware */ ++ pLETab->BufHighAddr = HighAddress; ++ ++ /* get a new LE because we filled one with high address */ ++ GET_TX_LE(pLE, pLETab); ++ } ++ ++ /* ++ ** TCP checksum offload ++ */ ++ if ((pSkPacket->pMBuf->ip_summed == CHECKSUM_HW) && ++ (SetOpcodePacketFlag == SK_TRUE)) { ++ Protocol = ((SK_U8)pSkPacket->pMBuf->data[C_OFFSET_IPPROTO] & 0xff); ++ /* if (Protocol & C_PROTO_ID_IP) { Ctrl = 0; } */ ++ if (Protocol & C_PROTO_ID_TCP) { ++ Ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM; ++ /* TCP Checksum Calculation Start Position */ ++ TcpSumStart = C_LEN_ETHERMAC_HEADER + IP_HDR_LEN; ++ /* TCP Checksum Write Position */ ++ TcpSumWrite = TcpSumStart + TCP_CSUM_OFFS; ++ } else { ++ Ctrl = UDPTCP | CALSUM | WR_SUM | INIT_SUM | LOCK_SUM; ++ /* TCP Checksum Calculation Start Position */ ++ TcpSumStart = ETHER_MAC_HDR_LEN + IP_HDR_LEN; ++ /* UDP Checksum Write Position */ ++ TcpSumWrite = TcpSumStart + UDP_CSUM_OFFS; ++ } ++ ++ if ((Ctrl) && (pLETab->Bmu.RxTx.TcpWp != TcpSumWrite)) { ++ /* Update the last value of the write position */ ++ pLETab->Bmu.RxTx.TcpWp = TcpSumWrite; ++ ++ /* Set the Lock field for this LE: */ ++ /* Checksum calculation for one packet only */ ++ TXLE_SET_LCKCS(pLE, 1); ++ ++ /* Set the start position for checksum. */ ++ TXLE_SET_STACS(pLE, TcpSumStart); ++ ++ /* Set the position where the checksum will be writen */ ++ TXLE_SET_WRICS(pLE, TcpSumWrite); ++ ++ /* Set the initial value for checksum */ ++ /* PseudoHeader CS passed from Linux -> 0! */ ++ TXLE_SET_INICS(pLE, 0); ++ ++ /* Set the opcode for tcp checksum */ ++ TXLE_SET_OPC(pLE, OP_TCPLISW | HW_OWNER); ++ ++ /* Flush the LE to memory */ ++ FLUSH_OPC(pLE); ++ ++ /* get a new LE because we filled one with data for checksum */ ++ GET_TX_LE(pLE, pLETab); ++ } ++ } /* end TCP offload handling */ ++ ++ TXLE_SET_ADDR(pLE, LowAddress); ++ TXLE_SET_LEN(pLE, pFrag->FragLen); ++ ++ if (SetOpcodePacketFlag){ ++#ifdef NETIF_F_TSO ++ if (Mss) { ++ OpCode = OP_LARGESEND | HW_OWNER; ++ } else { ++#endif ++ OpCode = OP_PACKET| HW_OWNER; ++#ifdef NETIF_F_TSO ++ } ++#endif ++ SetOpcodePacketFlag = SK_FALSE; ++ } else { ++ /* Follow packet in a sequence has always OP_BUFFER */ ++ OpCode = OP_BUFFER | HW_OWNER; ++ } ++ ++ pFrag = pFrag->pNext; ++ if (pFrag == NULL) { ++ /* mark last fragment */ ++ Ctrl |= EOP; ++ } ++ TXLE_SET_CTRL(pLE, Ctrl); ++ TXLE_SET_OPC(pLE, OpCode); ++ FLUSH_OPC(pLE); ++ SK_DBG_DUMP_TX_LE(pLE); ++ } ++ ++ /* ++ ** Remember next LE for tx complete ++ */ ++ pSkPacket->NextLE = GET_PUT_IDX(pLETab); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("\tNext LE for pkt %p is %d\n", pSkPacket, pSkPacket->NextLE)); ++ ++ /* ++ ** Add packet to working packets queue ++ */ ++ PUSH_PKT_AS_LAST_IN_QUEUE(pWorkQueue, pSkPacket); ++ ++ /* ++ ** give transmit start command ++ */ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_42)) { ++ spin_lock(&pAC->SetPutIndexLock); ++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_XA1,0), &pAC->TxPort[0][0].TxALET); ++ spin_unlock(&pAC->SetPutIndexLock); ++ } else { ++ /* write put index */ ++ if (Port == 0) { ++ SK_OUT32(pAC->IoBase, ++ Y2_PREF_Q_ADDR(Q_XA1,PREF_UNIT_PUT_IDX_REG), ++ GET_PUT_IDX(&pAC->TxPort[0][0].TxALET)); ++ UPDATE_HWPUT_IDX(&pAC->TxPort[0][0].TxALET); ++ } else { ++ SK_OUT32(pAC->IoBase, ++ Y2_PREF_Q_ADDR(Q_XA2, PREF_UNIT_PUT_IDX_REG), ++ GET_PUT_IDX(&pAC->TxPort[1][0].TxALET)); ++ UPDATE_HWPUT_IDX(&pAC->TxPort[1][0].TxALET); ++ } ++ } ++ ++ if (IS_Q_EMPTY(&(pAC->TxPort[Port][TX_PRIO_LOW].TxAQ_waiting))) { ++ break; /* get out of while */ ++ } ++ POP_FIRST_PKT_FROM_QUEUE(pWaitQueue, pSkPacket); ++ } /* while (pSkPacket != NULL) */ ++ ++ spin_unlock_irqrestore(&pAC->TxQueueLock, LockFlag); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("<== GiveTxBufferToHw\n")); ++ return; ++} /* GiveTxBufferToHw */ ++ ++/*********************************************************************** ++ * ++ * GiveRxBufferToHw - commits a previously allocated DMA area to HW ++ * ++ * Description: ++ * This functions gives receive buffers to HW. If no list elements ++ * are available the buffers will be queued. ++ * ++ * Notes: ++ * This function can run only once in a system at one time. ++ * ++ * Returns: N/A ++ */ ++static void GiveRxBufferToHw( ++SK_AC *pAC, /* pointer to adapter control context */ ++SK_IOC IoC, /* I/O control context (address of registers) */ ++int Port, /* port index for which the buffer is used */ ++SK_PACKET *pPacket) /* receive buffer(s) */ ++{ ++ SK_HWLE *pLE; ++ SK_LE_TABLE *pLETab; ++ SK_BOOL Done = SK_FALSE; /* at least on LE changed? */ ++ SK_U32 LowAddress; ++ SK_U32 HighAddress; ++ SK_U32 PrefetchReg; /* register for Put index */ ++ unsigned NumFree; ++ unsigned Required; ++ unsigned long Flags; ++#if defined(__x86_64__) ++ long cache0, cache1; ++#endif ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("==> GiveRxBufferToHw(Port %c, Packet %p)\n", 'A' + Port, pPacket)); ++ ++ pLETab = &pAC->RxPort[Port].RxLET; ++ ++ if (Port == 0) { ++ PrefetchReg = Y2_PREF_Q_ADDR(Q_R1, PREF_UNIT_PUT_IDX_REG); ++ } else { ++ PrefetchReg = Y2_PREF_Q_ADDR(Q_R2, PREF_UNIT_PUT_IDX_REG); ++ } ++ ++ if (pPacket != NULL) { ++ /* ++ ** For the time being, we have only one packet passed ++ ** to this function which might be changed in future! ++ */ ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket); ++ } ++ ++ /* ++ ** now pPacket contains the very first waiting packet ++ */ ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket); ++ while (pPacket != NULL) { ++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) { ++ if (NbrRxBuffersInHW >= MAX_NBR_RX_BUFFERS_IN_HW) { ++ PUSH_PKT_AS_FIRST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<== GiveRxBufferToHw()\n")); ++ return; ++ } ++ NbrRxBuffersInHW++; ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("Try to add packet %p\n", pPacket)); ++ ++ /* ++ ** Check whether we have enough listelements: ++ ** ++ ** we have to take into account that each fragment ++ ** may need an additional list element for the high ++ ** part of the address here I simplified it by ++ ** using MAX_FRAG_OVERHEAD maybe it's worth to split ++ ** this constant for Rx and Tx or to calculate the ++ ** real number of needed LE's ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("\tNum %d Put %d Done %d Free %d %d\n", ++ pLETab->Num, pLETab->Put, pLETab->Done, ++ NUM_FREE_LE_IN_TABLE(pLETab), ++ (NUM_FREE_LE_IN_TABLE(pLETab)))); ++ ++ Required = pPacket->NumFrags + MAX_FRAG_OVERHEAD; ++ NumFree = NUM_FREE_LE_IN_TABLE(pLETab); ++ if (NumFree) { ++ NumFree--; ++ } ++ ++ if (Required > NumFree ) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("\tOut of LEs have %d need %d\n", ++ NumFree, Required)); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("\tWaitQueue starts with packet %p\n", pPacket)); ++ PUSH_PKT_AS_FIRST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket); ++ if (Done) { ++ /* ++ ** write Put index to BMU or Polling Unit and make the LE's ++ ** available for the hardware ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("\tWrite new Put Idx\n")); ++ ++ SK_OUT32(IoC, PrefetchReg, GET_PUT_IDX(pLETab)); ++ UPDATE_HWPUT_IDX(pLETab); ++ } ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<== GiveRxBufferToHw()\n")); ++ return; ++ } else { ++ if (!AllocAndMapRxBuffer(pAC, pPacket, Port)) { ++ /* ++ ** Failure while allocating sk_buff might ++ ** be due to temporary short of resources ++ ** Maybe next time buffers are available. ++ ** Until this, the packet remains in the ++ ** RX waiting queue... ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("Failed to allocate Rx buffer\n")); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("WaitQueue starts with packet %p\n", pPacket)); ++ PUSH_PKT_AS_FIRST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket); ++ if (Done) { ++ /* ++ ** write Put index to BMU or Polling ++ ** Unit and make the LE's ++ ** available for the hardware ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("\tWrite new Put Idx\n")); ++ ++ SK_OUT32(IoC, PrefetchReg, GET_PUT_IDX(pLETab)); ++ UPDATE_HWPUT_IDX(pLETab); ++ } ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<== GiveRxBufferToHw()\n")); ++ return; ++ } ++ } ++ Done = SK_TRUE; ++ ++ LowAddress = (SK_U32) (pPacket->pFrag->pPhys & 0xffffffff); ++ HighAddress = (SK_U32) (pPacket->pFrag->pPhys >> 32); ++ if (HighAddress != pLETab->BufHighAddr) { ++ /* get a new LE for high address */ ++ GET_RX_LE(pLE, pLETab); ++ ++ /* Set now the 32 high bits of the address */ ++ RXLE_SET_ADDR(pLE, HighAddress); ++ ++ /* Set the control bits of the address */ ++ RXLE_SET_CTRL(pLE, 0); ++ ++ /* Set the opcode into the LE */ ++ RXLE_SET_OPC(pLE, (OP_ADDR64 | HW_OWNER)); ++ ++ /* Flush the LE to memory */ ++ FLUSH_OPC(pLE); ++ ++ /* remember the HighAddress we gave to the Hardware */ ++ pLETab->BufHighAddr = HighAddress; ++ } ++ ++ /* ++ ** Fill data into listelement ++ */ ++ GET_RX_LE(pLE, pLETab); ++ RXLE_SET_ADDR(pLE, LowAddress); ++ RXLE_SET_LEN(pLE, pPacket->pFrag->FragLen); ++ RXLE_SET_CTRL(pLE, 0); ++ RXLE_SET_OPC(pLE, (OP_PACKET | HW_OWNER)); ++ FLUSH_OPC(pLE); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("=== LE filled\n")); ++ ++ SK_DBG_DUMP_RX_LE(pLE); ++ ++ /* ++ ** Remember next LE for rx complete ++ */ ++ pPacket->NextLE = GET_PUT_IDX(pLETab); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("\tPackets Next LE is %d\n", pPacket->NextLE)); ++ ++ /* ++ ** Add packet to working receive buffer queue and get ++ ** any next packet out of the waiting queue ++ */ ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_working, pPacket); ++ if (IS_Q_EMPTY(&(pAC->RxPort[Port].RxQ_waiting))) { ++ break; /* get out of while processing */ ++ } ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pPacket); ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("\tWaitQueue is empty\n")); ++ ++ if (Done) { ++ /* ++ ** write Put index to BMU or Polling Unit and make the LE's ++ ** available for the hardware ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("\tWrite new Put Idx\n")); ++ ++ /* Speed enhancement for a2 chipsets */ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_42)) { ++ spin_lock_irqsave(&pAC->SetPutIndexLock, Flags); ++ SkGeY2SetPutIndex(pAC, pAC->IoBase, Y2_PREF_Q_ADDR(Q_R1,0), pLETab); ++ spin_unlock_irqrestore(&pAC->SetPutIndexLock, Flags); ++ } else { ++ /* write put index */ ++ if (Port == 0) { ++ SK_OUT32(IoC, ++ Y2_PREF_Q_ADDR(Q_R1, PREF_UNIT_PUT_IDX_REG), ++ GET_PUT_IDX(pLETab)); ++ } else { ++ SK_OUT32(IoC, ++ Y2_PREF_Q_ADDR(Q_R2, PREF_UNIT_PUT_IDX_REG), ++ GET_PUT_IDX(pLETab)); ++ } ++ ++ /* Update put index */ ++ UPDATE_HWPUT_IDX(pLETab); ++ } ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<== GiveRxBufferToHw()\n")); ++} /* GiveRxBufferToHw */ ++ ++/*********************************************************************** ++ * ++ * FillReceiveTableYukon2 - map any waiting RX buffers to HW ++ * ++ * Description: ++ * If the list element table contains more empty elements than ++ * specified this function tries to refill them. ++ * ++ * Notes: ++ * This function can run only once per port in a system at one time. ++ * ++ * Returns: N/A ++ */ ++static void FillReceiveTableYukon2( ++SK_AC *pAC, /* pointer to adapter control context */ ++SK_IOC IoC, /* I/O control context */ ++int Port) /* port index of RX */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("==> FillReceiveTableYukon2 (Port %c)\n", 'A' + Port)); ++ ++ if (NUM_FREE_LE_IN_TABLE(&pAC->RxPort[Port].RxLET) > ++ pAC->MaxUnusedRxLeWorking) { ++ ++ /* ++ ** Give alle waiting receive buffers down ++ ** The queue holds all RX packets that ++ ** need a fresh allocation of the sk_buff. ++ */ ++ if (pAC->RxPort[Port].RxQ_waiting.pHead != NULL) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("Waiting queue is not empty -> give it to HW")); ++ GiveRxBufferToHw(pAC, IoC, Port, NULL); ++ } ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<== FillReceiveTableYukon2 ()\n")); ++} /* FillReceiveTableYukon2 */ ++ ++/****************************************************************************** ++ * ++ * ++ * HandleReceives - will pass any ready RX packet to kernel ++ * ++ * Description: ++ * This functions handles a received packet. It checks wether it is ++ * valid, updates the receive list element table and gives the receive ++ * buffer to Linux ++ * ++ * Notes: ++ * This function can run only once per port at one time in the system. ++ * ++ * Returns: N/A ++ */ ++static SK_BOOL HandleReceives( ++SK_AC *pAC, /* adapter control context */ ++int Port, /* port on which a packet has been received */ ++SK_U16 Len, /* number of bytes which was actually received */ ++SK_U32 FrameStatus, /* MAC frame status word */ ++SK_U16 Tcp1, /* first hw checksum */ ++SK_U16 Tcp2, /* second hw checksum */ ++SK_U32 Tist, /* timestamp */ ++SK_U16 Vlan) /* Vlan Id */ ++{ ++ ++ SK_PACKET *pSkPacket; ++ SK_LE_TABLE *pLETab; ++ SK_MBUF *pRlmtMbuf; /* buffer for giving RLMT frame */ ++ struct sk_buff *pMsg; /* ptr to message holding frame */ ++#ifdef __ia64__ ++ struct sk_buff *pNewMsg; /* used when IP aligning */ ++#endif ++ ++#ifdef CONFIG_SK98LIN_NAPI ++ SK_BOOL SlowPathLock = SK_FALSE; ++#else ++ SK_BOOL SlowPathLock = SK_TRUE; ++#endif ++ SK_BOOL IsGoodPkt; ++ SK_BOOL IsBc; ++ SK_BOOL IsMc; ++ SK_EVPARA EvPara; /* an event parameter union */ ++ SK_I16 LenToFree; /* must be signed integer */ ++ ++ unsigned long Flags; /* for spin lock */ ++ unsigned int RlmtNotifier; ++ unsigned short Type; ++ int IpFrameLength; ++ int FrameLength; /* total length of recvd frame */ ++ int HeaderLength; ++ int NumBytes; ++ int Result; ++ int Offset = 0; ++ ++#ifdef Y2_SYNC_CHECK ++ SK_U16 MyTcp; ++#endif ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("==> HandleReceives (Port %c)\n", 'A' + Port)); ++ ++ /* ++ ** initialize vars for selected port ++ */ ++ pLETab = &pAC->RxPort[Port].RxLET; ++ ++ /* ++ ** check whether we want to receive this packet ++ */ ++ SK_Y2_RXSTAT_CHECK_PKT(Len, FrameStatus, IsGoodPkt); ++ ++ /* ++ ** Remember length to free (in case of RxBuffer overruns; ++ ** unlikely, but might happen once in a while) ++ */ ++ LenToFree = (SK_I16) Len; ++ ++ /* ++ ** maybe we put these two checks into the SK_RXDESC_CHECK_PKT macro too ++ */ ++ if (Len > pAC->RxPort[Port].RxBufSize) { ++ IsGoodPkt = SK_FALSE; ++ } ++ ++ /* ++ ** take first receive buffer out of working queue ++ */ ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket); ++ if (pSkPacket == NULL) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_ERROR, ++ ("Packet not available. NULL pointer.\n")); ++ return(SK_TRUE); ++ } ++ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) { ++ NbrRxBuffersInHW--; ++ } ++ ++ /* ++ ** Verify the received length of the frame! Note that having ++ ** multiple RxBuffers being aware of one single receive packet ++ ** (one packet spread over multiple RxBuffers) is not supported ++ ** by this driver! ++ */ ++ if ((Len > pAC->RxPort[Port].RxBufSize) || ++ (Len > (SK_U16) pSkPacket->PacketLen)) { ++ IsGoodPkt = SK_FALSE; ++ } ++ ++ /* ++ ** Reset own bit in LE's between old and new Done index ++ ** This is not really necessary but makes debugging easier ++ */ ++ CLEAR_LE_OWN_FROM_DONE_TO(pLETab, pSkPacket->NextLE); ++ ++ /* ++ ** Free the list elements for new Rx buffers ++ */ ++ SET_DONE_INDEX(pLETab, pSkPacket->NextLE); ++ pMsg = pSkPacket->pMBuf; ++ FrameLength = Len; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("Received frame of length %d on port %d\n",FrameLength, Port)); ++ ++ if (!IsGoodPkt) { ++ /* ++ ** release the DMA mapping ++ */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ++ pci_dma_sync_single(pAC->PciDev, ++ (dma_addr_t) pSkPacket->pFrag->pPhys, ++ pSkPacket->pFrag->FragLen, ++ PCI_DMA_FROMDEVICE); ++ ++#else ++ pci_dma_sync_single_for_cpu(pAC->PciDev, ++ (dma_addr_t) pSkPacket->pFrag->pPhys, ++ pSkPacket->pFrag->FragLen, ++ PCI_DMA_FROMDEVICE); ++#endif ++ ++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf); ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<== HandleReceives (Port %c)\n", 'A' + Port)); ++ ++ /* ++ ** Sanity check for RxBuffer overruns... ++ */ ++ LenToFree = LenToFree - (pSkPacket->pFrag->FragLen); ++ while (LenToFree > 0) { ++ POP_FIRST_PKT_FROM_QUEUE(&pAC->RxPort[Port].RxQ_working, pSkPacket); ++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) { ++ NbrRxBuffersInHW--; ++ } ++ CLEAR_LE_OWN_FROM_DONE_TO(pLETab, pSkPacket->NextLE); ++ SET_DONE_INDEX(pLETab, pSkPacket->NextLE); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ++ pci_dma_sync_single(pAC->PciDev, ++ (dma_addr_t) pSkPacket->pFrag->pPhys, ++ pSkPacket->pFrag->FragLen, ++ PCI_DMA_FROMDEVICE); ++#else ++ pci_dma_sync_single_for_device(pAC->PciDev, ++ (dma_addr_t) pSkPacket->pFrag->pPhys, ++ pSkPacket->pFrag->FragLen, ++ PCI_DMA_FROMDEVICE); ++#endif ++ ++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf); ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket); ++ LenToFree = LenToFree - ((SK_I16)(pSkPacket->pFrag->FragLen)); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("<==HandleReceives (Port %c) drop faulty len pkt(2)\n",'A'+Port)); ++ } ++ return(SK_TRUE); ++ } else { ++ /* ++ ** Release the DMA mapping ++ */ ++ pci_unmap_single(pAC->PciDev, ++ pSkPacket->pFrag->pPhys, ++ pAC->RxPort[Port].RxBufSize, ++ PCI_DMA_FROMDEVICE); ++ ++ skb_put(pMsg, FrameLength); /* set message len */ ++ pMsg->ip_summed = CHECKSUM_NONE; /* initial default */ ++ ++#ifdef Y2_SYNC_CHECK ++ pAC->FramesWithoutSyncCheck++; ++ if (pAC->FramesWithoutSyncCheck > Y2_RESYNC_WATERMARK) { ++ if ((Tcp1 != 1) && (Tcp2 != 0)) { ++ pAC->FramesWithoutSyncCheck = 0; ++ MyTcp = (SK_U16) SkCsCalculateChecksum( ++ &pMsg->data[14], ++ FrameLength - 14); ++ if (MyTcp != Tcp1) { ++ /* Queue port reset event */ ++ SkLocalEventQueue(pAC, SKGE_DRV, ++ SK_DRV_RECOVER,Port,-1,SK_FALSE); ++ } ++ } ++ } ++#endif ++ ++ if (pAC->RxPort[Port].UseRxCsum) { ++ Type = ntohs(*((short*)&pMsg->data[12])); ++ if (Type == 0x800) { ++ *((char *)&(IpFrameLength)) = pMsg->data[16]; ++ *(((char *)&(IpFrameLength))+1) = pMsg->data[17]; ++ IpFrameLength = ntohs(IpFrameLength); ++ HeaderLength = FrameLength - IpFrameLength; ++ if (HeaderLength == 0xe) { ++ Result = ++ SkCsGetReceiveInfo(pAC,&pMsg->data[14],Tcp1,Tcp2, Port); ++ if ((Result == SKCS_STATUS_IP_FRAGMENT) || ++ (Result == SKCS_STATUS_IP_CSUM_OK) || ++ (Result == SKCS_STATUS_TCP_CSUM_OK) || ++ (Result == SKCS_STATUS_UDP_CSUM_OK)) { ++ pMsg->ip_summed = CHECKSUM_UNNECESSARY; ++ } else if ((Result == SKCS_STATUS_TCP_CSUM_ERROR) || ++ (Result == SKCS_STATUS_UDP_CSUM_ERROR) || ++ (Result == SKCS_STATUS_IP_CSUM_ERROR_UDP) || ++ (Result == SKCS_STATUS_IP_CSUM_ERROR_TCP) || ++ (Result == SKCS_STATUS_IP_CSUM_ERROR)) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("skge: CRC error. Frame dropped!\n")); ++ DEV_KFREE_SKB_ANY(pMsg); ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket); ++ SK_DBG_MSG(pAC,SK_DBGMOD_DRV,SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<==HandleReceives(Port %c)\n",'A'+Port)); ++ return(SK_TRUE); ++ } else { ++ pMsg->ip_summed = CHECKSUM_NONE; ++ } ++ } /* end if (HeaderLength == valid) */ ++ } /* end if (Type == 0x800) -> IP frame */ ++ } /* end if (pRxPort->UseRxCsum) */ ++ ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS,("V")); ++ RlmtNotifier = SK_RLMT_RX_PROTOCOL; ++ ++ IsBc = (FrameStatus & GMR_FS_BC) ? SK_TRUE : SK_FALSE; ++ SK_RLMT_PRE_LOOKAHEAD(pAC,Port,FrameLength, ++ IsBc,&Offset,&NumBytes); ++ if (NumBytes != 0) { ++ IsMc = (FrameStatus & GMR_FS_MC) ? SK_TRUE : SK_FALSE; ++ SK_RLMT_LOOKAHEAD(pAC,Port,&pMsg->data[Offset], ++ IsBc,IsMc,&RlmtNotifier); ++ } ++ ++ if (RlmtNotifier == SK_RLMT_RX_PROTOCOL) { ++ SK_DBG_MSG(NULL,SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS,("W")); ++ if ((Port == pAC->ActivePort)||(pAC->RlmtNets == 2)) { ++ /* send up only frames from active port */ ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS,("U")); ++#ifdef xDEBUG ++ DumpMsg(pMsg, "Rx"); ++#endif ++ SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC, ++ FrameLength, Port); ++#ifdef __ia64__ ++ pNewMsg = alloc_skb(pMsg->len, GFP_ATOMIC); ++ skb_reserve(pNewMsg, 2); /* to align IP */ ++ SK_MEMCPY(pNewMsg->data,pMsg->data,pMsg->len); ++ pNewMsg->ip_summed = pMsg->ip_summed; ++ skb_put(pNewMsg, pMsg->len); ++ DEV_KFREE_SKB_ANY(pMsg); ++ pMsg = pNewMsg; ++#endif ++ pMsg->dev = pAC->dev[Port]; ++ pMsg->protocol = eth_type_trans(pMsg, ++ pAC->dev[Port]); ++ netif_rx(pMsg); ++ pAC->dev[Port]->last_rx = jiffies; ++ } else { /* drop frame */ ++ SK_DBG_MSG(NULL,SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS,("D")); ++ DEV_KFREE_SKB_ANY(pMsg); ++ } ++ } else { /* This is an RLMT-packet! */ ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS,("R")); ++ pRlmtMbuf = SkDrvAllocRlmtMbuf(pAC, ++ pAC->IoBase, FrameLength); ++ if (pRlmtMbuf != NULL) { ++ pRlmtMbuf->pNext = NULL; ++ pRlmtMbuf->Length = FrameLength; ++ pRlmtMbuf->PortIdx = Port; ++ EvPara.pParaPtr = pRlmtMbuf; ++ SK_MEMCPY((char*)(pRlmtMbuf->pData), ++ (char*)(pMsg->data),FrameLength); ++ ++ if (SlowPathLock == SK_TRUE) { ++ spin_lock_irqsave(&pAC->SlowPathLock, Flags); ++ SkEventQueue(pAC, SKGE_RLMT, ++ SK_RLMT_PACKET_RECEIVED, ++ EvPara); ++ pAC->CheckQueue = SK_TRUE; ++ spin_unlock_irqrestore(&pAC->SlowPathLock, Flags); ++ } else { ++ SkEventQueue(pAC, SKGE_RLMT, ++ SK_RLMT_PACKET_RECEIVED, ++ EvPara); ++ pAC->CheckQueue = SK_TRUE; ++ } ++ ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS,("Q")); ++ } ++ if (pAC->dev[Port]->flags & (IFF_PROMISC | IFF_ALLMULTI)) { ++#ifdef __ia64__ ++ pNewMsg = alloc_skb(pMsg->len, GFP_ATOMIC); ++ skb_reserve(pNewMsg, 2); /* to align IP */ ++ SK_MEMCPY(pNewMsg->data,pMsg->data,pMsg->len); ++ pNewMsg->ip_summed = pMsg->ip_summed; ++ pNewMsg->len = pMsg->len; ++ DEV_KFREE_SKB_ANY(pMsg); ++ pMsg = pNewMsg; ++#endif ++ pMsg->dev = pAC->dev[Port]; ++ pMsg->protocol = eth_type_trans(pMsg,pAC->dev[Port]); ++ netif_rx(pMsg); ++ pAC->dev[Port]->last_rx = jiffies; ++ } else { ++ DEV_KFREE_SKB_ANY(pMsg); ++ } ++ } /* if packet for rlmt */ ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[Port].RxQ_waiting, pSkPacket); ++ } /* end if-else (IsGoodPkt) */ ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<== HandleReceives (Port %c)\n", 'A' + Port)); ++ return(SK_TRUE); ++ ++} /* HandleReceives */ ++ ++/*********************************************************************** ++ * ++ * CheckForSendComplete - Frees any freeable Tx bufffer ++ * ++ * Description: ++ * This function checks the queues of a port for completed send ++ * packets and returns these packets back to the OS. ++ * ++ * Notes: ++ * This function can run simultaneously for both ports if ++ * the OS function OSReturnPacket() can handle this, ++ * ++ * Such a send complete does not mean, that the packet is really ++ * out on the wire. We just know that the adapter has copied it ++ * into its internal memory and the buffer in the systems memory ++ * is no longer needed. ++ * ++ * Returns: N/A ++ */ ++static void CheckForSendComplete( ++SK_AC *pAC, /* pointer to adapter control context */ ++SK_IOC IoC, /* I/O control context */ ++int Port, /* port index */ ++SK_PKT_QUEUE *pPQ, /* tx working packet queue to check */ ++SK_LE_TABLE *pLETab, /* corresponding list element table */ ++unsigned int Done) /* done index reported for this LET */ ++{ ++ SK_PACKET *pSkPacket; ++ SK_PKT_QUEUE SendCmplPktQ = { NULL, NULL, SPIN_LOCK_UNLOCKED }; ++ SK_BOOL DoWakeQueue = SK_FALSE; ++ unsigned long Flags; ++ unsigned Put; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("==> CheckForSendComplete(Port %c)\n", 'A' + Port)); ++ ++ /* ++ ** Reset own bit in LE's between old and new Done index ++ ** This is not really necessairy but makes debugging easier ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Clear Own Bits in TxTable from %d to %d\n", ++ pLETab->Done, (Done == 0) ? ++ NUM_LE_IN_TABLE(pLETab) : ++ (Done - 1))); ++ ++ spin_lock_irqsave(&(pPQ->QueueLock), Flags); ++ ++ CLEAR_LE_OWN_FROM_DONE_TO(pLETab, Done); ++ ++ Put = GET_PUT_IDX(pLETab); ++ ++ /* ++ ** Check whether some packets have been completed ++ */ ++ PLAIN_POP_FIRST_PKT_FROM_QUEUE(pPQ, pSkPacket); ++ while (pSkPacket != NULL) { ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Check Completion of Tx packet %p\n", pSkPacket)); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Put %d NewDone %d NextLe of Packet %d\n", Put, Done, ++ pSkPacket->NextLE)); ++ ++ if ((Put > Done) && ++ ((pSkPacket->NextLE > Put) || (pSkPacket->NextLE <= Done))) { ++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Packet finished (a)\n")); ++ } else if ((Done > Put) && ++ (pSkPacket->NextLE > Put) && (pSkPacket->NextLE <= Done)) { ++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Packet finished (b)\n")); ++ } else if ((Done == TXA_MAX_LE-1) && (Put == 0) && (pSkPacket->NextLE == 0)) { ++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Packet finished (b)\n")); ++ DoWakeQueue = SK_TRUE; ++ } else if (Done == Put) { ++ /* all packets have been sent */ ++ PLAIN_PUSH_PKT_AS_LAST_IN_QUEUE(&SendCmplPktQ, pSkPacket); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Packet finished (c)\n")); ++ } else { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("Packet not yet finished\n")); ++ PLAIN_PUSH_PKT_AS_FIRST_IN_QUEUE(pPQ, pSkPacket); ++ break; ++ } ++ PLAIN_POP_FIRST_PKT_FROM_QUEUE(pPQ, pSkPacket); ++ } ++ spin_unlock_irqrestore(&(pPQ->QueueLock), Flags); ++ ++ /* ++ ** Set new done index in list element table ++ */ ++ SET_DONE_INDEX(pLETab, Done); ++ ++ /* ++ ** All TX packets that are send complete should be added to ++ ** the free queue again for new sents to come ++ */ ++ pSkPacket = SendCmplPktQ.pHead; ++ while (pSkPacket != NULL) { ++ while (pSkPacket->pFrag != NULL) { ++ pci_unmap_page(pAC->PciDev, ++ (dma_addr_t) pSkPacket->pFrag->pPhys, ++ pSkPacket->pFrag->FragLen, ++ PCI_DMA_FROMDEVICE); ++ pSkPacket->pFrag = pSkPacket->pFrag->pNext; ++ } ++ ++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf); ++ pSkPacket->pMBuf = NULL; ++ pSkPacket = pSkPacket->pNext; /* get next packet */ ++ } ++ ++ /* ++ ** Append the available TX packets back to free queue ++ */ ++ if (SendCmplPktQ.pHead != NULL) { ++ spin_lock_irqsave(&(pAC->TxPort[Port][0].TxQ_free.QueueLock), Flags); ++ if (pAC->TxPort[Port][0].TxQ_free.pTail != NULL) { ++ pAC->TxPort[Port][0].TxQ_free.pTail->pNext = SendCmplPktQ.pHead; ++ pAC->TxPort[Port][0].TxQ_free.pTail = SendCmplPktQ.pTail; ++ if (pAC->TxPort[Port][0].TxQ_free.pHead->pNext == NULL) { ++ netif_wake_queue(pAC->dev[Port]); ++ } ++ } else { ++ pAC->TxPort[Port][0].TxQ_free.pHead = SendCmplPktQ.pHead; ++ pAC->TxPort[Port][0].TxQ_free.pTail = SendCmplPktQ.pTail; ++ netif_wake_queue(pAC->dev[Port]); ++ } ++ if (Done == Put) { ++ netif_wake_queue(pAC->dev[Port]); ++ } ++ if (DoWakeQueue) { ++ netif_wake_queue(pAC->dev[Port]); ++ DoWakeQueue = SK_FALSE; ++ } ++ spin_unlock_irqrestore(&pAC->TxPort[Port][0].TxQ_free.QueueLock, Flags); ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("<== CheckForSendComplete()\n")); ++ ++ return; ++} /* CheckForSendComplete */ ++ ++/***************************************************************************** ++ * ++ * UnmapAndFreeTxPktBuffer ++ * ++ * Description: ++ * This function free any allocated space of receive buffers ++ * ++ * Arguments: ++ * pAC - A pointer to the adapter context struct. ++ * ++ */ ++static void UnmapAndFreeTxPktBuffer( ++SK_AC *pAC, /* pointer to adapter context */ ++SK_PACKET *pSkPacket, /* pointer to port struct of ring to fill */ ++int TxPort) /* TX port index */ ++{ ++ SK_FRAG *pFrag = pSkPacket->pFrag; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("--> UnmapAndFreeTxPktBuffer\n")); ++ ++ while (pFrag != NULL) { ++ pci_unmap_page(pAC->PciDev, ++ (dma_addr_t) pFrag->pPhys, ++ pFrag->FragLen, ++ PCI_DMA_FROMDEVICE); ++ pFrag = pFrag->pNext; ++ } ++ ++ DEV_KFREE_SKB_ANY(pSkPacket->pMBuf); ++ pSkPacket->pMBuf = NULL; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_TX_PROGRESS, ++ ("<-- UnmapAndFreeTxPktBuffer\n")); ++} ++ ++/***************************************************************************** ++ * ++ * HandleStatusLEs ++ * ++ * Description: ++ * This function checks for any new status LEs that may have been ++ * received. Those status LEs may either be Rx or Tx ones. ++ * ++ * Returns: N/A ++ */ ++static SK_BOOL HandleStatusLEs( ++#ifdef CONFIG_SK98LIN_NAPI ++SK_AC *pAC, /* pointer to adapter context */ ++int *WorkDone, /* Done counter needed for NAPI */ ++int WorkToDo) /* ToDo counter for NAPI */ ++#else ++SK_AC *pAC) /* pointer to adapter context */ ++#endif ++{ ++ int DoneTxA[SK_MAX_MACS]; ++ int DoneTxS[SK_MAX_MACS]; ++ int Port; ++ SK_BOOL handledStatLE = SK_FALSE; ++ SK_BOOL NewDone = SK_FALSE; ++ SK_HWLE *pLE; ++ SK_U16 HighVal; ++ SK_U32 LowVal; ++ SK_U8 OpCode; ++ int i; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("==> HandleStatusLEs\n")); ++ ++ do { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Check next Own Bit of ST-LE[%d]: 0x%li \n", ++ (pAC->StatusLETable.Done + 1) % NUM_LE_IN_TABLE(&pAC->StatusLETable), ++ OWN_OF_FIRST_LE(&pAC->StatusLETable))); ++ ++ while (OWN_OF_FIRST_LE(&pAC->StatusLETable) == HW_OWNER) { ++ GET_ST_LE(pLE, &pAC->StatusLETable); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Working on finished status LE[%d]:\n", ++ GET_DONE_INDEX(&pAC->StatusLETable))); ++ SK_DBG_DUMP_ST_LE(pLE); ++ handledStatLE = SK_TRUE; ++ OpCode = STLE_GET_OPC(pLE) & ~HW_OWNER; ++ Port = STLE_GET_LINK(pLE); ++ ++#ifdef USE_TIST_FOR_RESET ++ if (SK_ADAPTER_WAITING_FOR_TIST(pAC)) { ++ /* do we just have a tist LE ? */ ++ if ((OpCode & OP_RXTIMESTAMP) == OP_RXTIMESTAMP) { ++ for (i = 0; i < pAC->GIni.GIMacsFound; i++) { ++ if (SK_PORT_WAITING_FOR_ANY_TIST(pAC, i)) { ++ /* if a port is waiting for any tist it is done */ ++ SK_CLR_STATE_FOR_PORT(pAC, i); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Got any Tist on port %c (now 0x%X!!!)\n", ++ 'A' + i, pAC->AdapterResetState)); ++ } ++ if (SK_PORT_WAITING_FOR_SPECIFIC_TIST(pAC, i)) { ++ Y2_GET_TIST_LOW_VAL(pAC->IoBase, &LowVal); ++ if ((pAC->MinTistHi != pAC->GIni.GITimeStampCnt) || ++ (pAC->MinTistLo < LowVal)) { ++ /* time is up now */ ++ SK_CLR_STATE_FOR_PORT(pAC, i); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Got expected Tist on Port %c (now 0x%X)!!!\n", ++ 'A' + i, pAC->AdapterResetState)); ++#ifdef Y2_SYNC_CHECK ++ pAC->FramesWithoutSyncCheck = ++ Y2_RESYNC_WATERMARK; ++#endif ++ } else { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Got Tist %l:%l on Port %c but still waiting\n", ++ pAC->GIni.GITimeStampCnt, pAC->MinTistLo, ++ 'A' + i)); ++ } ++ } ++ } ++#ifndef Y2_RECOVERY ++ if (!SK_ADAPTER_WAITING_FOR_TIST(pAC)) { ++ /* nobody needs tist anymore - turn it off */ ++ Y2_DISABLE_TIST(pAC->IoBase); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Turn off Tist !!!\n")); ++ } ++#endif ++ } else if (OpCode == OP_TXINDEXLE) { ++ /* ++ * change OpCode to notify the folowing code ++ * to ignore the done index from this LE ++ * unfortunately tist LEs will be generated only ++ * for RxStat LEs ++ * so in order to get a safe Done index for a ++ * port currently waiting for a tist we have to ++ * get the done index directly from the BMU ++ */ ++ OpCode = OP_MOD_TXINDEX; ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Mark unusable TX_INDEX LE!!!\n")); ++ } else { ++ if (SK_PORT_WAITING_FOR_TIST(pAC, Port)) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Ignore LE 0x%X on Port %c!!!\n", ++ OpCode, 'A' + Port)); ++ OpCode = OP_MOD_LE; ++#ifdef Y2_LE_CHECK ++ /* mark entries invalid */ ++ pAC->LastOpc = 0xFF; ++ pAC->LastPort = 3; ++#endif ++ } ++ } ++ } /* if (SK_ADAPTER_WAITING_FOR_TIST(pAC)) */ ++#endif ++ ++ ++ ++ ++ ++#ifdef Y2_LE_CHECK ++ if (pAC->LastOpc != 0xFF) { ++ /* last opc is valid ++ * check if current opcode follows last opcode ++ */ ++ if ((((OpCode & OP_RXTIMESTAMP) == OP_RXTIMESTAMP) && (pAC->LastOpc != OP_RXSTAT)) || ++ (((OpCode & OP_RXCHKS) == OP_RXCHKS) && (pAC->LastOpc != OP_RXTIMESTAMP)) || ++ ((OpCode == OP_RXSTAT) && (pAC->LastOpc != OP_RXCHKS))) { ++ ++ /* opcode sequence broken ++ * current LE is invalid ++ */ ++ ++ if (pAC->LastOpc == OP_RXTIMESTAMP) { ++ /* force invalid checksum */ ++ pLE->St.StUn.StRxTCPCSum.RxTCPSum1 = 1; ++ pLE->St.StUn.StRxTCPCSum.RxTCPSum2 = 0; ++ OpCode = pAC->LastOpc = OP_RXCHKS; ++ Port = pAC->LastPort; ++ } else if (pAC->LastOpc == OP_RXCHKS) { ++ /* force invalid frame */ ++ Port = pAC->LastPort; ++ pLE->St.Stat.BufLen = 64; ++ pLE->St.StUn.StRxStatWord = GMR_FS_CRC_ERR; ++ OpCode = pAC->LastOpc = OP_RXSTAT; ++#ifdef Y2_SYNC_CHECK ++ /* force rx sync check */ ++ pAC->FramesWithoutSyncCheck = Y2_RESYNC_WATERMARK; ++#endif ++ } else if (pAC->LastOpc == OP_RXSTAT) { ++ /* create dont care tist */ ++ pLE->St.StUn.StRxTimeStamp = 0; ++ OpCode = pAC->LastOpc = OP_RXTIMESTAMP; ++ /* dont know the port yet */ ++ } else { ++#ifdef DEBUG ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Unknown LastOpc %X for Timestamp on port %c.\n", ++ pAC->LastOpc, Port)); ++#endif ++ } ++ } ++ } ++#endif ++ ++ switch (OpCode) { ++ case OP_RXSTAT: ++#ifdef Y2_RECOVERY ++ pAC->LastOpc = OP_RXSTAT; ++#endif ++ /* ++ ** This is always the last Status LE belonging ++ ** to a received packet -> handle it... ++ */ ++ if ((Port != 0) && (Port != 1)) { ++ /* Unknown port */ ++ panic("sk98lin: Unknown port %d\n", ++ Port); ++ } ++ ++ HandleReceives( ++ pAC, ++ Port, ++ STLE_GET_LEN(pLE), ++ STLE_GET_FRSTATUS(pLE), ++ pAC->StatusLETable.Bmu.Stat.TcpSum1, ++ pAC->StatusLETable.Bmu.Stat.TcpSum2, ++ pAC->StatusLETable.Bmu.Stat.RxTimeStamp, ++ pAC->StatusLETable.Bmu.Stat.VlanId); ++#ifdef CONFIG_SK98LIN_NAPI ++ if (*WorkDone >= WorkToDo) { ++ break; ++ } ++ (*WorkDone)++; ++#endif ++ break; ++ case OP_RXVLAN: ++ /* this value will be used for next RXSTAT */ ++ pAC->StatusLETable.Bmu.Stat.VlanId = STLE_GET_VLAN(pLE); ++ break; ++ case OP_RXTIMEVLAN: ++ /* this value will be used for next RXSTAT */ ++ pAC->StatusLETable.Bmu.Stat.VlanId = STLE_GET_VLAN(pLE); ++ /* fall through */ ++ case OP_RXTIMESTAMP: ++ /* this value will be used for next RXSTAT */ ++ pAC->StatusLETable.Bmu.Stat.RxTimeStamp = STLE_GET_TIST(pLE); ++#ifdef Y2_RECOVERY ++ pAC->LastOpc = OP_RXTIMESTAMP; ++ pAC->LastPort = Port; ++#endif ++ break; ++ case OP_RXCHKSVLAN: ++ /* this value will be used for next RXSTAT */ ++ pAC->StatusLETable.Bmu.Stat.VlanId = STLE_GET_VLAN(pLE); ++ /* fall through */ ++ case OP_RXCHKS: ++ /* this value will be used for next RXSTAT */ ++ pAC->StatusLETable.Bmu.Stat.TcpSum1 = STLE_GET_TCP1(pLE); ++ pAC->StatusLETable.Bmu.Stat.TcpSum2 = STLE_GET_TCP2(pLE); ++#ifdef Y2_RECOVERY ++ pAC->LastPort = Port; ++ pAC->LastOpc = OP_RXCHKS; ++#endif ++ break; ++ case OP_RSS_HASH: ++ /* this value will be used for next RXSTAT */ ++#if 0 ++ pAC->StatusLETable.Bmu.Stat.RssHashValue = STLE_GET_RSS(pLE); ++#endif ++ break; ++ case OP_TXINDEXLE: ++ /* ++ ** :;:; TODO ++ ** it would be possible to check for which queues ++ ** the index has been changed and call ++ ** CheckForSendComplete() only for such queues ++ */ ++ STLE_GET_DONE_IDX(pLE,LowVal,HighVal); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("LowVal: 0x%x HighVal: 0x%x\n", LowVal, HighVal)); ++ ++ /* ++ ** It would be possible to check whether we really ++ ** need the values for second port or sync queue, ++ ** but I think checking whether we need them is ++ ** more expensive than the calculation ++ */ ++ DoneTxA[0] = STLE_GET_DONE_IDX_TXA1(LowVal,HighVal); ++ DoneTxS[0] = STLE_GET_DONE_IDX_TXS1(LowVal,HighVal); ++ DoneTxA[1] = STLE_GET_DONE_IDX_TXA2(LowVal,HighVal); ++ DoneTxS[1] = STLE_GET_DONE_IDX_TXS2(LowVal,HighVal); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("DoneTxa1 0x%x DoneTxS1: 0x%x DoneTxa2 0x%x DoneTxS2: 0x%x\n", ++ DoneTxA[0], DoneTxS[0], DoneTxA[1], DoneTxS[1])); ++ ++ NewDone = SK_TRUE; ++ break; ++#ifdef USE_TIST_FOR_RESET ++ case OP_MOD_TXINDEX: ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("OP_MOD_TXINDEX\n")); ++ SK_IN16(pAC->IoBase, Q_ADDR(Q_XA1, Q_DONE), &DoneTxA[0]); ++ if (pAC->GIni.GIMacsFound > 1) { ++ SK_IN16(pAC->IoBase, Q_ADDR(Q_XA2, Q_DONE), &DoneTxA[1]); ++ } ++ NewDone = SK_TRUE; ++ break; ++ case OP_MOD_LE: ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DUMP, ++ ("Ignore marked LE on port in Reset\n")); ++ break; ++#endif ++ ++ default: ++ /* ++ ** Have to handle the illegal Opcode in Status LE ++ */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Unexpected OpCode\n")); ++ break; ++ } ++ ++#ifdef Y2_RECOVERY ++ OpCode = STLE_GET_OPC(pLE) & ~HW_OWNER; ++ STLE_SET_OPC(pLE, OpCode); ++#else ++ /* ++ ** Reset own bit we have to do this in order to detect a overflow ++ */ ++ STLE_SET_OPC(pLE, SW_OWNER); ++#endif ++ } /* while (OWN_OF_FIRST_LE(&pAC->StatusLETable) == HW_OWNER) */ ++ ++ /* ++ ** Now handle any new transmit complete ++ */ ++ if (NewDone) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Done Index for Tx BMU has been changed\n")); ++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) { ++ /* ++ ** Do we have a new Done idx ? ++ */ ++ if (DoneTxA[Port] != GET_DONE_INDEX(&pAC->TxPort[Port][0].TxALET)) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Check TxA%d\n", Port + 1)); ++ CheckForSendComplete(pAC, pAC->IoBase, Port, ++ &(pAC->TxPort[Port][0].TxAQ_working), ++ &pAC->TxPort[Port][0].TxALET, ++ DoneTxA[Port]); ++ } else { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("No changes for TxA%d\n", Port + 1)); ++ } ++#if USE_SYNC_TX_QUEUE ++ if (HW_SYNC_TX_SUPPORTED(pAC)) { ++ /* ++ ** Do we have a new Done idx ? ++ */ ++ if (DoneTxS[Port] != ++ GET_DONE_INDEX(&pAC->TxPort[Port][0].TxSLET)) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_INT_SRC, ++ ("Check TxS%d\n", Port)); ++ CheckForSendComplete(pAC, pAC->IoBase, Port, ++ &(pAC->TxPort[Port][0].TxSQ_working), ++ &pAC->TxPort[Port][0].TxSLET, ++ DoneTxS[Port]); ++ } else { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_INT_SRC, ++ ("No changes for TxS%d\n", Port)); ++ } ++ } ++#endif ++ } ++ } ++ NewDone = SK_FALSE; ++ ++ /* ++ ** Check whether we have to refill our RX table ++ */ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_420)) { ++ if (NbrRxBuffersInHW < MAX_NBR_RX_BUFFERS_IN_HW) { ++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Check for refill of RxBuffers on Port %c\n", 'A' + Port)); ++ FillReceiveTableYukon2(pAC, pAC->IoBase, Port); ++ } ++ } ++ } else { ++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_INT_SRC, ++ ("Check for refill of RxBuffers on Port %c\n", 'A' + Port)); ++ if (NUM_FREE_LE_IN_TABLE(&pAC->RxPort[Port].RxLET) >= 64) { ++ FillReceiveTableYukon2(pAC, pAC->IoBase, Port); ++ } ++ } ++ } ++#ifdef CONFIG_SK98LIN_NAPI ++ if (*WorkDone >= WorkToDo) { ++ break; ++ } ++#endif ++ } while (OWN_OF_FIRST_LE(&pAC->StatusLETable) == HW_OWNER); ++ ++ /* ++ ** Clear status BMU ++ */ ++ SK_OUT32(pAC->IoBase, STAT_CTRL, SC_STAT_CLR_IRQ); ++ ++ return(handledStatLE); ++} /* HandleStatusLEs */ ++ ++/***************************************************************************** ++ * ++ * AllocateAndInitLETables - allocate memory for the LETable and init ++ * ++ * Description: ++ * This function will allocate space for the LETable and will also ++ * initialize them. The size of the tables must have been specified ++ * before. ++ * ++ * Arguments: ++ * pAC - A pointer to the adapter context struct. ++ * ++ * Returns: ++ * SK_TRUE - all LETables initialized ++ * SK_FALSE - failed ++ */ ++static SK_BOOL AllocateAndInitLETables( ++SK_AC *pAC) /* pointer to adapter context */ ++{ ++ char *pVirtMemAddr; ++ dma_addr_t pPhysMemAddr = 0; ++ SK_U32 CurrMac; ++ unsigned Size; ++ unsigned Aligned; ++ unsigned Alignment; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("==> AllocateAndInitLETables()\n")); ++ ++ /* ++ ** Determine how much memory we need with respect to alignment ++ */ ++ Alignment = MAX_LEN_OF_LE_TAB; ++ Size = 0; ++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) { ++ SK_ALIGN_SIZE(LE_TAB_SIZE(RX_MAX_LE), Alignment, Aligned); ++ Size += Aligned; ++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXA_MAX_LE), Alignment, Aligned); ++ Size += Aligned; ++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXS_MAX_LE), Alignment, Aligned); ++ Size += Aligned; ++ } ++ SK_ALIGN_SIZE(LE_TAB_SIZE(ST_MAX_LE), Alignment, Aligned); ++ Size += Aligned; ++ Size += Alignment; ++ pAC->SizeOfAlignedLETables = Size; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("Need %08x bytes in total\n", Size)); ++ ++ /* ++ ** Allocate the memory ++ */ ++ pVirtMemAddr = pci_alloc_consistent(pAC->PciDev, Size, &pPhysMemAddr); ++ if (pVirtMemAddr == NULL) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, ++ SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR, ++ ("AllocateAndInitLETables: kernel malloc failed!\n")); ++ return (SK_FALSE); ++ } ++ ++ /* ++ ** Initialize the memory ++ */ ++ SK_MEMSET(pVirtMemAddr, 0, Size); ++ ALIGN_ADDR(pVirtMemAddr, Alignment); /* Macro defined in skgew.h */ ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("Virtual address of LETab is %8p!\n", pVirtMemAddr)); ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("Phys address of LETab is %8p!\n", (void *) pPhysMemAddr)); ++ ++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("RxLeTable for Port %c", 'A' + CurrMac)); ++ SkGeY2InitSingleLETable( ++ pAC, ++ &pAC->RxPort[CurrMac].RxLET, ++ RX_MAX_LE, ++ pVirtMemAddr, ++ (SK_U32) (pPhysMemAddr & 0xffffffff), ++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32)); ++ ++ SK_ALIGN_SIZE(LE_TAB_SIZE(RX_MAX_LE), Alignment, Aligned); ++ pVirtMemAddr += Aligned; ++ pPhysMemAddr += Aligned; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("TxALeTable for Port %c", 'A' + CurrMac)); ++ SkGeY2InitSingleLETable( ++ pAC, ++ &pAC->TxPort[CurrMac][0].TxALET, ++ TXA_MAX_LE, ++ pVirtMemAddr, ++ (SK_U32) (pPhysMemAddr & 0xffffffff), ++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32)); ++ ++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXA_MAX_LE), Alignment, Aligned); ++ pVirtMemAddr += Aligned; ++ pPhysMemAddr += Aligned; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("TxSLeTable for Port %c", 'A' + CurrMac)); ++ SkGeY2InitSingleLETable( ++ pAC, ++ &pAC->TxPort[CurrMac][0].TxSLET, ++ TXS_MAX_LE, ++ pVirtMemAddr, ++ (SK_U32) (pPhysMemAddr & 0xffffffff), ++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32)); ++ ++ SK_ALIGN_SIZE(LE_TAB_SIZE(TXS_MAX_LE), Alignment, Aligned); ++ pVirtMemAddr += Aligned; ++ pPhysMemAddr += Aligned; ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG,("StLeTable")); ++ ++ SkGeY2InitSingleLETable( ++ pAC, ++ &pAC->StatusLETable, ++ ST_MAX_LE, ++ pVirtMemAddr, ++ (SK_U32) (pPhysMemAddr & 0xffffffff), ++ (SK_U32) (((SK_U64) pPhysMemAddr) >> 32)); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("<== AllocateAndInitLETables(OK)\n")); ++ return(SK_TRUE); ++} /* AllocateAndInitLETables */ ++ ++/***************************************************************************** ++ * ++ * AllocatePacketBuffersYukon2 - allocate packet and fragment buffers ++ * ++ * Description: ++ * This function will allocate space for the packets and fragments ++ * ++ * Arguments: ++ * pAC - A pointer to the adapter context struct. ++ * ++ * Returns: ++ * SK_TRUE - Memory was allocated correctly ++ * SK_FALSE - An error occured ++ */ ++static SK_BOOL AllocatePacketBuffersYukon2( ++SK_AC *pAC) /* pointer to adapter context */ ++{ ++ SK_PACKET *pRxPacket; ++ SK_PACKET *pTxPacket; ++ SK_U32 CurrBuff; ++ SK_U32 CurrMac; ++ unsigned long Flags; /* needed for POP/PUSH functions */ ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("==> AllocatePacketBuffersYukon2()")); ++ ++ for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) { ++ /* ++ ** Allocate RX packet space, initialize the packets and ++ ** add them to the RX waiting queue. Waiting queue means ++ ** that packet and fragment are initialized, but no sk_buff ++ ** has been assigned to it yet. ++ */ ++ pAC->RxPort[CurrMac].ReceivePacketTable = ++ kmalloc((RX_MAX_NBR_BUFFERS * sizeof(SK_PACKET)), GFP_KERNEL); ++ ++ if (pAC->RxPort[CurrMac].ReceivePacketTable == NULL) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR, ++ ("AllocatePacketBuffersYukon2: no mem RxPkts (port %i)",CurrMac)); ++ break; ++ } else { ++ SK_MEMSET(pAC->RxPort[CurrMac].ReceivePacketTable, 0, ++ (RX_MAX_NBR_BUFFERS * sizeof(SK_PACKET))); ++ ++ pRxPacket = pAC->RxPort[CurrMac].ReceivePacketTable; ++ ++ for (CurrBuff=0;CurrBuffpFrag = &(pRxPacket->FragArray[0]); ++ pRxPacket->NumFrags = 1; ++ PUSH_PKT_AS_LAST_IN_QUEUE(&pAC->RxPort[CurrMac].RxQ_waiting, pRxPacket); ++ pRxPacket++; ++ } ++ } ++ ++ /* ++ ** Allocate TX packet space, initialize the packets and ++ ** add them to the TX free queue. Free queue means that ++ ** packet is available and initialized, but no fragment ++ ** has been assigned to it. (Must be done at TX side) ++ */ ++ pAC->TxPort[CurrMac][0].TransmitPacketTable = ++ kmalloc((TX_MAX_NBR_BUFFERS * sizeof(SK_PACKET)), GFP_KERNEL); ++ ++ if (pAC->TxPort[CurrMac][0].TransmitPacketTable == NULL) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_INIT | SK_DBGCAT_DRV_ERROR, ++ ("AllocatePacketBuffersYukon2: no mem TxPkts (port %i)",CurrMac)); ++ kfree(pAC->RxPort[CurrMac].ReceivePacketTable); ++ return(SK_FALSE); ++ } else { ++ SK_MEMSET(pAC->TxPort[CurrMac][0].TransmitPacketTable, 0, ++ (TX_MAX_NBR_BUFFERS * sizeof(SK_PACKET))); ++ ++ pTxPacket = pAC->TxPort[CurrMac][0].TransmitPacketTable; ++ ++ for (CurrBuff=0;CurrBuffTxPort[CurrMac][0].TxQ_free, pTxPacket); ++ pTxPacket++; ++ } ++ } ++ } /* end for (CurrMac = 0; CurrMac < pAC->GIni.GIMacsFound; CurrMac++) */ ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_INIT, ++ ("<== AllocatePacketBuffersYukon2 (OK)\n")); ++ return(SK_TRUE); ++ ++} /* AllocatePacketBuffersYukon2 */ ++ ++/***************************************************************************** ++ * ++ * FreeLETables - release allocated memory of LETables ++ * ++ * Description: ++ * This function will free all resources of the LETables ++ * ++ * Arguments: ++ * pAC - A pointer to the adapter context struct. ++ * ++ * Returns: N/A ++ */ ++static void FreeLETables( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ dma_addr_t pPhysMemAddr; ++ char *pVirtMemAddr; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> FreeLETables()\n")); ++ ++ /* ++ ** The RxLETable is the first of all LET. ++ ** Therefore we can use its address for the input ++ ** of the free function. ++ */ ++ pVirtMemAddr = (char *) pAC->RxPort[0].RxLET.pLETab; ++ pPhysMemAddr = (((SK_U64) pAC->RxPort[0].RxLET.pPhyLETABHigh << (SK_U64) 32) | ++ ((SK_U64) pAC->RxPort[0].RxLET.pPhyLETABLow)); ++ ++ /* free continuous memory */ ++ pci_free_consistent(pAC->PciDev, pAC->SizeOfAlignedLETables, ++ pVirtMemAddr, pPhysMemAddr); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== FreeLETables()\n")); ++} /* FreeLETables */ ++ ++/***************************************************************************** ++ * ++ * FreePacketBuffers - free's all packet buffers of an adapter ++ * ++ * Description: ++ * This function will free all previously allocated memory of the ++ * packet buffers. ++ * ++ * Arguments: ++ * pAC - A pointer to the adapter context struct. ++ * ++ * Returns: N/A ++ */ ++static void FreePacketBuffers( ++SK_AC *pAC) /* pointer to adapter control context */ ++{ ++ int Port; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("==> FreePacketBuffers()\n")); ++ ++ for (Port = 0; Port < pAC->GIni.GIMacsFound; Port++) { ++ kfree(pAC->RxPort[Port].ReceivePacketTable); ++ kfree(pAC->TxPort[Port][0].TransmitPacketTable); ++ } ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_DRV, SK_DBGCAT_DRV_MSG, ++ ("<== FreePacketBuffers()\n")); ++} /* FreePacketBuffers */ ++ ++/***************************************************************************** ++ * ++ * AllocAndMapRxBuffer - fill one buffer into the receive packet/fragment ++ * ++ * Description: ++ * The function allocates a new receive buffer and assigns it to the ++ * the passsed receive packet/fragment ++ * ++ * Returns: ++ * SK_TRUE - a buffer was allocated and assigned ++ * SK_FALSE - a buffer could not be added ++ */ ++static SK_BOOL AllocAndMapRxBuffer( ++SK_AC *pAC, /* pointer to the adapter control context */ ++SK_PACKET *pSkPacket, /* pointer to packet that is to fill */ ++int Port) /* port the packet belongs to */ ++{ ++ struct sk_buff *pMsgBlock; /* pointer to a new message block */ ++ SK_U64 PhysAddr; /* physical address of a rx buffer */ ++ ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("--> AllocAndMapRxBuffer (Port: %i)\n", Port)); ++ ++ pMsgBlock = alloc_skb(pAC->RxPort[Port].RxBufSize, GFP_ATOMIC); ++ if (pMsgBlock == NULL) { ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, ++ SK_DBGCAT_DRV_RX_PROGRESS | SK_DBGCAT_DRV_ERROR, ++ ("%s: Allocation of rx buffer failed !\n", ++ pAC->dev[Port]->name)); ++ SK_PNMI_CNT_NO_RX_BUF(pAC, pAC->RxPort[Port].PortIndex); ++ return(SK_FALSE); ++ } ++ skb_reserve(pMsgBlock, 8); ++ ++ PhysAddr = (SK_U64) pci_map_page(pAC->PciDev, ++ virt_to_page(pMsgBlock->data), ++ ((unsigned long) pMsgBlock->data & ++ ~PAGE_MASK), ++ pAC->RxPort[Port].RxBufSize, ++ PCI_DMA_FROMDEVICE); ++ ++ pSkPacket->pFrag->pVirt = pMsgBlock->data; ++ pSkPacket->pFrag->pPhys = PhysAddr; ++ pSkPacket->pFrag->FragLen = pAC->RxPort[Port].RxBufSize; /* for correct unmap */ ++ pSkPacket->pMBuf = pMsgBlock; ++ pSkPacket->PacketLen = pAC->RxPort[Port].RxBufSize; ++ ++ SK_DBG_MSG(NULL, SK_DBGMOD_DRV, SK_DBGCAT_DRV_RX_PROGRESS, ++ ("<-- AllocAndMapRxBuffer\n")); ++ ++ return (SK_TRUE); ++} /* AllocAndMapRxBuffer */ ++ ++/******************************************************************************* ++ * ++ * End of file ++ * ++ ******************************************************************************/ +diff -ruN linux-2.6.9.old/drivers/net/sk98lin/sky2le.c linux-2.6.9.new/drivers/net/sk98lin/sky2le.c +--- linux-2.6.9.old/drivers/net/sk98lin/sky2le.c 1970-01-01 08:00:00.000000000 +0800 ++++ linux-2.6.9.new/drivers/net/sk98lin/sky2le.c 2006-12-07 14:35:03.000000000 +0800 +@@ -0,0 +1,510 @@ ++/***************************************************************************** ++ * ++ * Name: sky2le.c ++ * Project: Gigabit Ethernet Adapters, Common Modules ++ * Version: $Revision: 1.11 $ ++ * Date: $Date: 2004/11/22 14:21:58 $ ++ * Purpose: Functions for handling List Element Tables ++ * ++ *****************************************************************************/ ++ ++/****************************************************************************** ++ * ++ * (C)Copyright 2002-2004 Marvell. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * The information in this file is provided "AS IS" without warranty. ++ * ++ ******************************************************************************/ ++ ++/***************************************************************************** ++ * ++ * Description: ++ * ++ * This module contains the code necessary for handling List Elements. ++ * ++ * Supported Gigabit Ethernet Chipsets: ++ * Yukon-2 (PCI, PCI-X, PCI-Express) ++ * ++ * Include File Hierarchy: ++ * ++ * ++ *****************************************************************************/ ++#include "h/skdrv1st.h" ++#include "h/skdrv2nd.h" ++ ++/* defines *******************************************************************/ ++/* typedefs ******************************************************************/ ++/* global variables **********************************************************/ ++/* local variables ***********************************************************/ ++ ++#if (defined(DEBUG) || ((!defined(LINT)) && (!defined(SK_SLIM)))) ++static const char SysKonnectFileId[] = ++ "@(#) $Id: sky2le.c,v 1.11 2004/11/22 14:21:58 malthoff Exp $ (C) Marvell."; ++#endif /* DEBUG || (!LINT && !SK_SLIM) */ ++ ++/* function prototypes *******************************************************/ ++ ++/***************************************************************************** ++ * ++ * SkGeY2InitSingleLETable() - initializes a list element table ++ * ++ * Description: ++ * This function will initialize the selected list element table. ++ * Should be called once during DriverInit. No InitLevel required. ++ * ++ * Arguments: ++ * pAC - pointer to the adapter context struct. ++ * pLETab - pointer to list element table structure ++ * NumLE - number of list elements in this table ++ * pVMem - virtual address of memory allocated for this LE table ++ * PMemLowAddr - physical address of memory to be used for the LE table ++ * PMemHighAddr ++ * ++ * Returns: ++ * nothing ++ */ ++void SkGeY2InitSingleLETable( ++SK_AC *pAC, /* pointer to adapter context */ ++SK_LE_TABLE *pLETab, /* pointer to list element table to be initialized */ ++unsigned int NumLE, /* number of list elements to be filled in tab */ ++void *pVMem, /* virtual address of memory used for list elements */ ++SK_U32 PMemLowAddr, /* physical addr of mem used for LE */ ++SK_U32 PMemHighAddr) ++{ ++ unsigned int i; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("==> SkGeY2InitSingleLETable()\n")); ++ ++#ifdef DEBUG ++ if (NumLE != 2) { /* not table for polling unit */ ++ if ((NumLE % MIN_LEN_OF_LE_TAB) != 0 || NumLE > MAX_LEN_OF_LE_TAB) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("ERROR: Illegal number of list elements %d\n", NumLE)); ++ } ++ } ++#endif /* DEBUG */ ++ ++ /* special case: unused list element table */ ++ if (NumLE == 0) { ++ PMemLowAddr = 0; ++ PMemHighAddr = 0; ++ pVMem = 0; ++ } ++ ++ /* ++ * in order to get the best possible performance the macros to access ++ * list elements use & instead of % ++ * this requires the length of LE tables to be a power of 2 ++ */ ++ ++ /* ++ * this code guarantees that we use the next power of 2 below the ++ * value specified for NumLe - this way some LEs in the table may ++ * not be used but the macros work correctly ++ * this code does not check for bad values below 128 because in such a ++ * case we cannot do anything here ++ */ ++ ++ if ((NumLE != 2) && (NumLE != 0)) { ++ /* no check for polling unit and unused sync Tx */ ++ i = MIN_LEN_OF_LE_TAB; ++ while (NumLE > i) { ++ i *= 2; ++ if (i > MAX_LEN_OF_LE_TAB) { ++ break; ++ } ++ } ++ if (NumLE != i) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("ERROR: Illegal number of list elements %d adjusted to %d\n", ++ NumLE, (i / 2))); ++ NumLE = i / 2; ++ } ++ } ++ ++ /* set addresses */ ++ pLETab->pPhyLETABLow = PMemLowAddr; ++ pLETab->pPhyLETABHigh = PMemHighAddr; ++ pLETab->pLETab = pVMem; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("contains %d LEs", NumLE)); ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ (" and starts at virt %08lx and phys %08lx:%08lx\n", ++ pVMem, PMemHighAddr, PMemLowAddr)); ++ ++ /* initialize indexes */ ++ pLETab->Done = 0; ++ pLETab->Put = 0; ++ pLETab->HwPut = 0; ++ /* initialize size */ ++ pLETab->Num = NumLE; ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("<== SkGeY2InitSingleLETable()\n")); ++} /* SkGeY2InitSingleLETable */ ++ ++/***************************************************************************** ++ * ++ * SkGeY2InitPrefetchUnit() - Initialize a Prefetch Unit ++ * ++ * Description: ++ * Calling this function requires an already configured list element ++ * table. The prefetch unit to be configured is specified in the parameter ++ * 'Queue'. The function is able to initialze the prefetch units of ++ * the following queues: Q_R1, Q_R2, Q_XS1, Q_XS2, Q_XA1, Q_XA2. ++ * The funcution should be called before SkGeInitPort(). ++ * ++ * Arguments: ++ * pAC - pointer to the adapter context struct. ++ * IoC - I/O context. ++ * Queue - I/O offset of queue e.g. Q_XA1. ++ * pLETab - pointer to list element table to be initialized ++ * ++ * Returns: N/A ++ */ ++void SkGeY2InitPrefetchUnit( ++SK_AC *pAC, /* pointer to adapter context */ ++SK_IOC IoC, /* I/O context */ ++unsigned int Queue, /* Queue offset for finding the right registers */ ++SK_LE_TABLE *pLETab) /* pointer to list element table to be initialized */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("==> SkGeY2InitPrefetchUnit()\n")); ++ ++#ifdef DEBUG ++ if (Queue != Q_R1 && Queue != Q_R2 && Queue != Q_XS1 && ++ Queue != Q_XS2 && Queue != Q_XA1 && Queue != Q_XA2) { ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_ERR, ++ ("ERROR: Illegal queue identifier %x\n", Queue)); ++ } ++#endif /* DEBUG */ ++ ++ /* disable the prefetch unit */ ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET); ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_CLR); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Base address: %08lx:%08lx\n", pLETab->pPhyLETABHigh, ++ pLETab->pPhyLETABLow)); ++ ++ /* Set the list base address high part*/ ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_ADDR_HI_REG), ++ pLETab->pPhyLETABHigh); ++ ++ /* Set the list base address low part */ ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_ADDR_LOW_REG), ++ pLETab->pPhyLETABLow); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Last index: %d\n", pLETab->Num-1)); ++ ++ /* Set the list last index */ ++ SK_OUT16(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_LAST_IDX_REG), ++ (SK_U16)(pLETab->Num - 1)); ++ ++ /* turn on prefetch unit */ ++ SK_OUT32(IoC, Y2_PREF_Q_ADDR(Queue, PREF_UNIT_CTRL_REG), PREF_UNIT_OP_ON); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("<== SkGeY2InitPrefetchUnit()\n")); ++} /* SkGeY2InitPrefetchUnit */ ++ ++ ++/***************************************************************************** ++ * ++ * SkGeY2InitStatBmu() - Initialize the Status BMU ++ * ++ * Description: ++ * Calling this function requires an already configured list element ++ * table. Ensure the status BMU is only initialized once during ++ * DriverInit - InitLevel2 required. ++ * ++ * Arguments: ++ * pAC - pointer to the adapter context struct. ++ * IoC - I/O context. ++ * pLETab - pointer to status LE table to be initialized ++ * ++ * Returns: N/A ++ */ ++void SkGeY2InitStatBmu( ++SK_AC *pAC, /* pointer to adapter context */ ++SK_IOC IoC, /* I/O context */ ++SK_LE_TABLE *pLETab) /* pointer to status LE table */ ++{ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("==> SkGeY2InitStatBmu()\n")); ++ ++ /* disable the prefetch unit */ ++ SK_OUT32(IoC, STAT_CTRL, SC_STAT_RST_SET); ++ SK_OUT32(IoC, STAT_CTRL, SC_STAT_RST_CLR); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Base address Low: %08lX\n", pLETab->pPhyLETABLow)); ++ ++ /* Set the list base address */ ++ SK_OUT32(IoC, STAT_LIST_ADDR_LO, pLETab->pPhyLETABLow); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Base address High: %08lX\n", pLETab->pPhyLETABHigh)); ++ ++ SK_OUT32(IoC, STAT_LIST_ADDR_HI, pLETab->pPhyLETABHigh); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Last index: %d\n", pLETab->Num - 1)); ++ ++ /* Set the list last index */ ++ SK_OUT16(IoC, STAT_LAST_IDX, (SK_U16)(pLETab->Num - 1)); ++ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_43_418)) { ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Set Tx index threshold\n")); ++ /* WA for dev. #4.3 */ ++ SK_OUT16(IoC, STAT_TX_IDX_TH, ST_TXTH_IDX_MASK); ++ ++ /* set Status-FIFO watermark */ ++ SK_OUT8(IoC, STAT_FIFO_WM, 0x21); /* WA for dev. #4.18 */ ++ ++ /* set Status-FIFO ISR watermark */ ++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 0x07); /* WA for dev. #4.18 */ ++ ++ /* WA for dev. #4.3 and #4.18 */ ++ /* set Status-FIFO Tx timer init value */ ++ SK_OUT32(IoC, STAT_TX_TIMER_INI, HW_MS_TO_TICKS(pAC, 10)); ++ } ++ else { ++ /* ++ * Further settings may be added if required... ++ * 1) Status-FIFO watermark (STAT_FIFO_WM, STAT_FIFO_ISR_WM) ++ * 2) Status-FIFO timer values (STAT_TX_TIMER_INI, ++ * STAT_LEV_TIMER_INI and STAT_ISR_TIMER_INI) ++ * but tests shows that the default values give the best results, ++ * therefore the defaults are used. ++ */ ++ ++ /* ++ * Theses settings should avoid the ++ * temporary hanging of the status BMU. ++ * May be not all required... still under investigation... ++ */ ++ SK_OUT16(IoC, STAT_TX_IDX_TH, 0x000a); ++ ++ /* set Status-FIFO watermark */ ++ SK_OUT8(IoC, STAT_FIFO_WM, 0x10); ++ ++ ++ /* set Status-FIFO ISR watermark */ ++ if (HW_FEATURE(pAC, HWF_WA_DEV_4109)) { ++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 0x10); ++ } ++ else { ++ SK_OUT8(IoC, STAT_FIFO_ISR_WM, 0x04); ++ } ++ ++ SK_OUT32(IoC, STAT_ISR_TIMER_INI, 0x0190); ++ } ++ ++ /* start Status-FIFO timer */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Start Status FiFo timer\n")); ++ ++ /* enable the prefetch unit */ ++ /* operational bit not functional for Yukon-EC, but fixed in Yukon-2 */ ++ SK_OUT32(IoC, STAT_CTRL, SC_STAT_OP_ON); ++ ++ /* start Status-FIFO timer */ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Start Status FiFo timer\n")); ++ ++ SK_OUT8(IoC, STAT_TX_TIMER_CTRL, TIM_START); ++ SK_OUT8(IoC, STAT_LEV_TIMER_CTRL, TIM_START); ++ SK_OUT8(IoC, STAT_ISR_TIMER_CTRL, TIM_START); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("<== SkGeY2InitStatBmu()\n")); ++} /* SkGeY2InitStatBmu */ ++ ++#ifdef USE_POLLING_UNIT ++/***************************************************************************** ++ * ++ * SkGeY2InitPollUnit() - Initialize the Polling Unit ++ * ++ * Description: ++ * This function will write the data of one polling LE table into the ++ * adapter. ++ * ++ * Arguments: ++ * pAC - pointer to the adapter context struct. ++ * IoC - I/O context. ++ * pLETab - pointer to polling LE table to be initialized ++ * ++ * Returns: N/A ++ */ ++void SkGeY2InitPollUnit( ++SK_AC *pAC, /* pointer to adapter context */ ++SK_IOC IoC, /* I/O context */ ++SK_LE_TABLE *pLETab) /* pointer to polling LE table */ ++{ ++ SK_HWLE *pLE; ++ int i; ++#ifdef VCPU ++ VCPU_VARS(); ++#endif /* VCPU */ ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("==> SkGeY2InitPollUnit()\n")); ++ ++#ifdef VCPU ++ for (i = 0; i < SK_MAX_MACS; i++) { ++ GET_PO_LE(pLE, pLETab, i); ++ VCPU_START_AND_COPY_LE(); ++ /* initialize polling LE but leave indexes invalid */ ++ POLE_SET_OPC(pLE, OP_PUTIDX | HW_OWNER); ++ POLE_SET_LINK(pLE, i); ++ POLE_SET_RXIDX(pLE, 0); ++ POLE_SET_TXAIDX(pLE, 0); ++ POLE_SET_TXSIDX(pLE, 0); ++ VCPU_WRITE_LE(); ++ SK_DBG_DUMP_PO_LE(pLE); ++ } ++#endif /* VCPU */ ++ ++ /* disable the polling unit */ ++ SK_OUT32(IoC, POLL_CTRL, PC_POLL_RST_SET); ++ SK_OUT32(IoC, POLL_CTRL, PC_POLL_RST_CLR); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Base address Low: %08lX\n", pLETab->pPhyLETABLow)); ++ ++ /* Set the list base address */ ++ SK_OUT32(IoC, POLL_LIST_ADDR_LO, pLETab->pPhyLETABLow); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("Base address High: %08lX\n", pLETab->pPhyLETABHigh)); ++ ++ SK_OUT32(IoC, POLL_LIST_ADDR_HI, pLETab->pPhyLETABHigh); ++ ++ /* we don't need to write the last index - it is hardwired to 1 */ ++ ++ /* enable the prefetch unit */ ++ SK_OUT32(IoC, POLL_CTRL, PC_POLL_OP_ON); ++ ++ /* ++ * now we have to start the descriptor poll timer because it triggers ++ * the polling unit ++ */ ++ ++ /* ++ * still playing with the value (timer runs at 125 MHz) ++ * descriptor poll timer is enabled by GeInit ++ */ ++ SK_OUT32(IoC, B28_DPT_INI, ++ (SK_DPOLL_DEF_Y2 * (SK_U32)pAC->GIni.GIHstClkFact / 100)); ++ ++ SK_OUT8(IoC, B28_DPT_CTRL, TIM_START); ++ ++ SK_DBG_MSG(pAC, SK_DBGMOD_HWM, SK_DBGCAT_INIT, ++ ("<== SkGeY2InitPollUnit()\n")); ++} /* SkGeY2InitPollUnit */ ++#endif /* USE_POLLING_UNIT */ ++ ++ ++/****************************************************************************** ++ * ++ * SkGeY2SetPutIndex ++ * ++ * Description: ++ * This function is writing the Done index of a transmit ++ * list element table. ++ * ++ * Notes: ++ * Dev. Issue 4.2 ++ * ++ * Returns: N/A ++ */ ++void SkGeY2SetPutIndex( ++SK_AC *pAC, /* pointer to adapter context */ ++SK_IOC IoC, /* pointer to the IO context */ ++SK_U32 StartAddrPrefetchUnit, /* start address of the prefetch unit */ ++SK_LE_TABLE *pLETab) /* list element table to work with */ ++{ ++ unsigned int Put; ++ SK_U16 EndOfListIndex; ++ SK_U16 HwGetIndex; ++ SK_U16 HwPutIndex; ++ ++ /* set put index we would like to write */ ++ Put = GET_PUT_IDX(pLETab); ++ ++ /* ++ * in this case we wrap around ++ * new put is lower than last put given to hw ++ */ ++ if (Put < pLETab->HwPut) { ++ ++ /* set put index = last index of list */ ++ EndOfListIndex = (NUM_LE_IN_TABLE(pLETab)-1); ++ ++ /* read get index of hw prefetch unit */ ++ SK_IN16(IoC, (StartAddrPrefetchUnit + PREF_UNIT_GET_IDX_REG), ++ &HwGetIndex); ++ ++ /* read put index of hw prefetch unit */ ++ SK_IN16(IoC, (StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG), ++ &HwPutIndex); ++ ++ /* prefetch unit reached end of list */ ++ /* prefetch unit reached first list element */ ++ if (HwGetIndex == 0) { ++ /* restore watermark */ ++ SK_OUT8(IoC, StartAddrPrefetchUnit + PREF_UNIT_FIFO_WM_REG, 0xe0U); ++ /* write put index */ ++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG, ++ (SK_U16)Put); ++ ++ /* remember put index we wrote to hw */ ++ pLETab->HwPut = Put; ++ } ++ else if (HwGetIndex == EndOfListIndex) { ++ /* set watermark to one list element */ ++ SK_OUT8(IoC, StartAddrPrefetchUnit + PREF_UNIT_FIFO_WM_REG, 8); ++ /* set put index to first list element */ ++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG, 0); ++ } ++ /* prefetch unit did not reach end of list yet */ ++ /* and we did not write put index to end of list yet */ ++ else if ((HwPutIndex != EndOfListIndex) && ++ (HwGetIndex != EndOfListIndex)) { ++ /* write put index */ ++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG, ++ EndOfListIndex); ++ } ++ else { ++ /* do nothing */ ++ } ++ } ++ else { ++#ifdef XXX /* leads in to problems in the Windows Driver */ ++ if (Put != pLETab->HwPut) { ++ /* write put index */ ++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG, ++ (SK_U16)Put); ++ /* update put index */ ++ UPDATE_HWPUT_IDX(pLETab); ++ } ++#else ++ /* write put index */ ++ SK_OUT16(IoC, StartAddrPrefetchUnit + PREF_UNIT_PUT_IDX_REG, ++ (SK_U16)Put); ++ /* update put index */ ++ UPDATE_HWPUT_IDX(pLETab); ++#endif ++ } ++} /* SkGeY2SetPutIndex */ ++ diff --git a/lustre/kernel_patches/patches/listman-2.4.20.patch b/lustre/kernel_patches/patches/listman-2.4.20.patch deleted file mode 100644 index 1e4508b..0000000 --- a/lustre/kernel_patches/patches/listman-2.4.20.patch +++ /dev/null @@ -1,22 +0,0 @@ -Index: linux-2.4.20/include/linux/list.h -=================================================================== ---- linux-2.4.20.orig/include/linux/list.h 2003-10-17 23:56:26.000000000 +0400 -+++ linux-2.4.20/include/linux/list.h 2003-10-22 14:02:22.000000000 +0400 -@@ -227,6 +227,17 @@ - pos = list_entry(pos->member.next, typeof(*pos), member), \ - prefetch(pos->member.next)) - -+/* 2.5 uses hlists for some things, like the d_hash. we'll treat them -+ * as 2.5 and let macros drop back.. */ -+#define hlist_entry list_entry -+#define hlist_head list_head -+#define hlist_node list_head -+#define HLIST_HEAD LIST_HEAD -+#define INIT_HLIST_HEAD INIT_LIST_HEAD -+#define hlist_del_init list_del_init -+#define hlist_add_head list_add -+#define hlist_for_each_safe list_for_each_safe -+ - #endif /* __KERNEL__ || _LVM_H_INCLUDE */ - - #endif diff --git a/lustre/kernel_patches/patches/listman-2.4.21-chaos.patch b/lustre/kernel_patches/patches/listman-2.4.21-chaos.patch deleted file mode 100644 index 6c85e492..0000000 --- a/lustre/kernel_patches/patches/listman-2.4.21-chaos.patch +++ /dev/null @@ -1,26 +0,0 @@ -Index: linux-2.4.21-chaos/include/linux/list.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/list.h 2003-12-05 16:54:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/list.h 2003-12-12 16:08:20.000000000 +0300 -@@ -241,6 +241,21 @@ - pos = list_entry(pos->member.next, typeof(*pos), member), \ - prefetch(pos->member.next)) - -+#ifndef list_for_each_entry_safe -+/** -+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry -+ * @pos: the type * to use as a loop counter. -+ * @n: another type * to use as temporary storage -+ * @head: the head for your list. -+ * @member: the name of the list_struct within the struct. -+ */ -+#define list_for_each_entry_safe(pos, n, head, member) \ -+ for (pos = list_entry((head)->next, typeof(*pos), member), \ -+ n = list_entry(pos->member.next, typeof(*pos), member); \ -+ &pos->member != (head); \ -+ pos = n, n = list_entry(n->member.next, typeof(*n), member)) -+#endif -+ - #define list_first(head) (((head)->next != (head)) ? (head)->next: (struct list_head *) 0) - - #endif /* __KERNEL__ || _LVM_H_INCLUDE */ diff --git a/lustre/kernel_patches/patches/llnl-frame-pointer-walk-2.4.21-rhel.patch b/lustre/kernel_patches/patches/llnl-frame-pointer-walk-2.4.21-rhel.patch deleted file mode 100644 index 4d234af..0000000 --- a/lustre/kernel_patches/patches/llnl-frame-pointer-walk-2.4.21-rhel.patch +++ /dev/null @@ -1,120 +0,0 @@ -diff -X dontdiff -urp kern_oldest/arch/i386/kernel/traps.c kern_fix/arch/i386/kernel/traps.c ---- kern_oldest/arch/i386/kernel/traps.c 2006-05-01 11:56:31.000000000 -0700 -+++ kern_fix/arch/i386/kernel/traps.c 2006-05-01 14:56:40.000000000 -0700 -@@ -133,6 +133,80 @@ static inline int kernel_text_address(un - - #endif - -+#if CONFIG_FRAME_POINTER -+void show_stack_frame_params (int param_count, unsigned long params[]) -+{ -+ int i; -+ unsigned long *p, task_addr, stack_base; -+ -+ if (param_count <= 0) -+ return; -+ -+ task_addr = (unsigned long) current; -+ stack_base = task_addr + THREAD_SIZE - 1; -+ -+ printk(" ("); -+ -+ for (i = 0, p = params; -+ ((param_count - i) > 1) && (p >= task_addr) && (p <= stack_base); -+ i++, p++) { -+ printk("0x%x, ", *p); -+ -+ if ((i % 4) == 3) -+ printk("\n "); -+ } -+ -+ if ((p >= task_addr) && (p <= stack_base)) -+ printk("0x%x)\n", *p); -+} -+ -+void frame_pointer_walk(unsigned long *stack) -+{ -+ int i; -+ unsigned long addr, task_addr, *frame_ptr, *next_frame_ptr, *eip_ptr, -+ eip, stack_base; -+ /* static to not take up stackspace; if we race here too bad */ -+ static char buffer[512]; -+ -+ addr = (unsigned long) stack; -+ task_addr = (unsigned long) current; -+ stack_base = task_addr + THREAD_SIZE - 1; -+ -+ /* Simply return if we are doing a stack trace for any task other -+ * than the currently executing task. To trace another task, we -+ * would need an %ebp register value for that task. -+ */ -+ if (((addr < task_addr) || (addr > stack_base))) { -+ return; -+ } -+ -+ frame_ptr = (unsigned long *) (&stack - 2); -+ -+ for (; ; ) { -+ next_frame_ptr = (unsigned long *) (*frame_ptr); -+ addr = (unsigned long) next_frame_ptr; -+ -+ /* Stop when we reach a frame pointer that points to a -+ * location clearly outside our own kernel stack. -+ */ -+ if ((addr < task_addr) || (addr > stack_base)) -+ break; -+ -+ eip_ptr = frame_ptr + 1; -+ eip = *eip_ptr; -+ -+ if (kernel_text_address(eip)) { -+ lookup_symbol(eip, buffer, 512); -+ show_stack_frame_params(4, frame_ptr + 2); -+ printk("[<%08lx>] %s (0x%x)\n", eip, buffer, -+ eip_ptr); -+ } -+ -+ frame_ptr = next_frame_ptr; -+ } -+} -+#endif -+ - void show_trace(unsigned long * stack) - { - #if !CONFIG_FRAME_POINTER -@@ -151,16 +225,7 @@ void show_trace(unsigned long * stack) - * a 100% exact backtrace, up until the entry frame: - */ - #if CONFIG_FRAME_POINTER --#define DO(n) \ -- addr = (int)__builtin_return_address(n); \ -- if (!kernel_text_address(addr)) \ -- goto out; \ -- lookup_symbol(addr, buffer, 512); \ -- printk("[<%08lx>] %s\n", addr, buffer); -- -- DO(0); DO(1); DO(2); DO(3); DO(4); DO(5); DO(7); DO(8); DO(9); -- DO(10); DO(11); DO(12); DO(13); DO(14); DO(15); DO(17); DO(18); DO(19); --out: -+ frame_pointer_walk(stack); - #else - i = 1; - limit = ((unsigned long)stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE - 3; -@@ -168,7 +233,7 @@ out: - addr = *stack++; - if (kernel_text_address(addr)) { - lookup_symbol(addr, buffer, 512); -- printk("[<%08lx>] %s (0x%p)\n", addr,buffer,stack-1); -+ printk("[<%08lx>] %s (0x%x)\n", addr,buffer,stack-1); - i++; - } - } -@@ -244,7 +309,7 @@ void show_registers(struct pt_regs *regs - lookup_symbol(regs->eip, buffer, 512); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); -- printk("\nEIP is at %s (" UTS_RELEASE ARCHIT ")\n",buffer); -+ printk("\nEIP is at %s (" UTS_RELEASE ")\n",buffer); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", diff --git a/lustre/kernel_patches/patches/llnl-frame-pointer-walk-fix-2.4.21-rhel.patch b/lustre/kernel_patches/patches/llnl-frame-pointer-walk-fix-2.4.21-rhel.patch deleted file mode 100644 index 64d652a..0000000 --- a/lustre/kernel_patches/patches/llnl-frame-pointer-walk-fix-2.4.21-rhel.patch +++ /dev/null @@ -1,249 +0,0 @@ -Index: kernel/arch/i386/kernel/traps.c -=================================================================== ---- kernel.orig/arch/i386/kernel/traps.c 2006-05-26 16:12:28.000000000 -0700 -+++ kernel/arch/i386/kernel/traps.c 2006-05-26 16:15:54.000000000 -0700 -@@ -133,6 +133,30 @@ - - #endif - -+void scan_stack (unsigned long *stack) -+{ -+ int i; -+ unsigned long addr; -+ /* static to not take up stackspace */ -+ static char buffer[NR_CPUS][512], *bufp; -+ -+ bufp = buffer[smp_processor_id()]; -+ -+ /* -+ * If we have frame pointers then use them to get -+ * a 100% exact backtrace, up until the entry frame: -+ */ -+ i = 1; -+ while (((long) stack & (THREAD_SIZE-1)) != 0) { -+ addr = *stack++; -+ if (kernel_text_address(addr)) { -+ lookup_symbol(addr, bufp, 512); -+ printk("[<%08lx>] %s (0x%p)\n", addr,bufp,stack-1); -+ i++; -+ } -+ } -+} -+ - #if CONFIG_FRAME_POINTER - void show_stack_frame_params (int param_count, unsigned long params[]) - { -@@ -160,27 +184,23 @@ - printk("0x%x)\n", *p); - } - --void frame_pointer_walk(unsigned long *stack) -+/* Display a stack trace for the currently executing task. The 'dummy' -+ * parameter serves a purpose although its value is unused. We use the -+ * address of 'dummy' as a reference point for finding the saved %ebp register -+ * value on the stack. -+ */ -+void frame_pointer_walk (void *dummy) - { - int i; - unsigned long addr, task_addr, *frame_ptr, *next_frame_ptr, *eip_ptr, - eip, stack_base; -- /* static to not take up stackspace; if we race here too bad */ -- static char buffer[512]; -+ /* static to not take up stackspace */ -+ static char buffer[NR_CPUS][512], *bufp; - -- addr = (unsigned long) stack; -+ bufp = buffer[smp_processor_id()]; - task_addr = (unsigned long) current; - stack_base = task_addr + THREAD_SIZE - 1; -- -- /* Simply return if we are doing a stack trace for any task other -- * than the currently executing task. To trace another task, we -- * would need an %ebp register value for that task. -- */ -- if (((addr < task_addr) || (addr > stack_base))) { -- return; -- } -- -- frame_ptr = (unsigned long *) (&stack - 2); -+ frame_ptr = (unsigned long *) (&dummy - 2); - - for (; ; ) { - next_frame_ptr = (unsigned long *) (*frame_ptr); -@@ -196,9 +216,9 @@ - eip = *eip_ptr; - - if (kernel_text_address(eip)) { -- lookup_symbol(eip, buffer, 512); -+ lookup_symbol(eip, bufp, 512); - show_stack_frame_params(4, frame_ptr + 2); -- printk("[<%08lx>] %s (0x%x)\n", eip, buffer, -+ printk("[<%08lx>] %s (0x%x)\n", eip, bufp, - eip_ptr); - } - -@@ -207,40 +227,49 @@ - } - #endif - -+typedef void (*stack_trace_fn_t) (unsigned long *stack); -+ -+#if CONFIG_FRAME_POINTER - void show_trace(unsigned long * stack) - { --#if !CONFIG_FRAME_POINTER -- int i; --#endif -- unsigned long addr, limit; -- /* static to not take up stackspace; if we race here too bad */ -- static char buffer[512]; -+ static const stack_trace_fn_t trace_fn_vector[] = -+ { scan_stack, frame_pointer_walk }; -+ unsigned long addr, task_addr, stack_base; -+ int task_is_current; - - if (!stack) - stack = (unsigned long*)&stack; - - printk("Call Trace: "); -- /* -- * If we have frame pointers then use them to get -- * a 100% exact backtrace, up until the entry frame: -- */ --#if CONFIG_FRAME_POINTER -- frame_pointer_walk(stack); --#else -- i = 1; -- limit = ((unsigned long)stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE - 3; -- while ((unsigned long)stack < limit) { -- addr = *stack++; -- if (kernel_text_address(addr)) { -- lookup_symbol(addr, buffer, 512); -- printk("[<%08lx>] %s (0x%x)\n", addr,buffer,stack-1); -- i++; -- } -- } --#endif -+ addr = (unsigned long) stack; -+ task_addr = (unsigned long) current; -+ stack_base = task_addr + THREAD_SIZE - 1; -+ task_is_current = (addr >= task_addr) && (addr <= stack_base); -+ -+ /* We may use frame pointers to do a stack trace only if the current -+ * task is being traced. Tracing some other task in this manner -+ * would require a saved %ebp register value. Perhaps in the future -+ * I'll consider providing a means of obtaining this. -+ */ -+ trace_fn_vector[task_is_current](stack); -+ -+ printk("\n"); -+} -+ -+#else /* CONFIG_FRAME_POINTER */ -+ -+void show_trace(unsigned long * stack) -+{ -+ if (!stack) -+ stack = (unsigned long*)&stack; -+ -+ printk("Call Trace:\n"); -+ scan_stack(stack); - printk("\n"); - } - -+#endif /* CONFIG_FRAME_POINTER */ -+ - void show_trace_task(struct task_struct *tsk) - { - unsigned long esp = tsk->thread.esp; -Index: kernel/include/asm-i386/hw_irq.h -=================================================================== ---- kernel.orig/include/asm-i386/hw_irq.h 2006-05-26 16:11:33.000000000 -0700 -+++ kernel/include/asm-i386/hw_irq.h 2006-05-26 16:13:51.000000000 -0700 -@@ -153,6 +153,9 @@ - /* there is a second layer of macro just to get the symbolic - name for the vector evaluated. This change is for RTLinux */ - #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) -+ -+#if CONFIG_X86_HIGH_ENTRY -+ - #define XBUILD_SMP_INTERRUPT(x,v)\ - asmlinkage void x(void); \ - asmlinkage void call_##x(void); \ -@@ -165,7 +168,26 @@ - "movl $"SYMBOL_NAME_STR(smp_##x)", %ebp; call *%ebp\n\t" \ - "jmp ret_from_intr; .previous\n"); - -+#else -+ -+#define XBUILD_SMP_INTERRUPT(x,v)\ -+asmlinkage void x(void); \ -+asmlinkage void call_##x(void); \ -+__asm__( \ -+".section .entry.text,\"ax\"\n"__ALIGN_STR"\n" \ -+SYMBOL_NAME_STR(x) ":\n\t" \ -+ "pushl $"#v"-256\n\t" \ -+ SAVE_ALL_SWITCH \ -+ SYMBOL_NAME_STR(call_##x)":\n\t" \ -+ "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ -+ "jmp ret_from_intr; .previous\n"); -+ -+#endif -+ - #define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) -+ -+#if CONFIG_X86_HIGH_ENTRY -+ - #define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ - asmlinkage void x(struct pt_regs * regs); \ - asmlinkage void call_##x(void); \ -@@ -181,6 +203,27 @@ - "addl $4,%esp\n\t" \ - "jmp ret_from_intr; .previous\n"); - -+#else -+ -+#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ -+asmlinkage void x(struct pt_regs * regs); \ -+asmlinkage void call_##x(void); \ -+__asm__( \ -+".section .entry.text,\"ax\"\n"__ALIGN_STR"\n" \ -+SYMBOL_NAME_STR(x) ":\n\t" \ -+ "pushl $"#v"-256\n\t" \ -+ SAVE_ALL_SWITCH \ -+ "movl %esp,%eax\n\t" \ -+ "pushl %eax\n\t" \ -+ SYMBOL_NAME_STR(call_##x)":\n\t" \ -+ "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ -+ "addl $4,%esp\n\t" \ -+ "jmp ret_from_intr; .previous\n"); -+ -+#endif -+ -+#if CONFIG_X86_HIGH_ENTRY -+ - #define BUILD_COMMON_IRQ() \ - asmlinkage void call_do_IRQ(void); \ - __asm__( \ -@@ -191,6 +234,20 @@ - "movl $"SYMBOL_NAME_STR(do_IRQ)", %ebp; call *%ebp\n\t" \ - "jmp ret_from_intr; .previous\n"); - -+#else -+ -+#define BUILD_COMMON_IRQ() \ -+asmlinkage void call_do_IRQ(void); \ -+__asm__( \ -+ ".section .entry.text,\"ax\"\n" __ALIGN_STR"\n" \ -+ "common_interrupt:\n\t" \ -+ SAVE_ALL_SWITCH \ -+ SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \ -+ "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \ -+ "jmp ret_from_intr; .previous\n"); -+ -+#endif -+ - /* - * subtle. orig_eax is used by the signal code to distinct between - * system calls and interrupted 'random user-space'. Thus we have diff --git a/lustre/kernel_patches/patches/lockmeter.patch b/lustre/kernel_patches/patches/lockmeter.patch new file mode 100644 index 0000000..01e8b77 --- /dev/null +++ b/lustre/kernel_patches/patches/lockmeter.patch @@ -0,0 +1,3096 @@ + +From: Ray Bryant + +This version of the lockmeter patch has been updated to work with +the out-of-line spinlocks that were recently merged into the mainline. +Basically what is done here is that if CONFIG_LOCKMETER is set, then the +lock routines in kernel/spinlock.c are not used and the corresponding +versions in kernel/lockmeter.c are used instead. The former set of +lock routines call _raw_ spin lock code defined by the architecture; +the latter call _metered_ versions of the spinlock routines, which in +turn call the _raw_ routines. The versions in the two files will have +to kept in sync manually, but given that the lock APIs don't change +very often, that should be ok. The lockmeter.c versions are written +so that the address of the original caller is passed to the _metered_ +lock routines; elsewise all lock requests would look like they were +coming from the lockmeter.c versions of the lock routine. + +I've had trouble testing this on an Altix so have really only tested this +on i386. But the changes are almost exclusively in machine independent +code so that should be ok. I did do some fixup in the Sparc64 arch files, +so someone with such a box should test this stuff. + +The existing lockstat routine will work with this patch, although we have +had to add some "#include " lines to that program to get it to +compile properly. An updated version of lockstat will be posted to +oss.sgi.com in the near future. + +Signed-off-by: Ray Bryant + +========================================================================= + +DESC +ia64 CONFIG_LOCKMETER fix +EDESC +From: John Hawkes + +The 2.6.3-mm4 patch seems to have the CONFIG_LOCKMETER in the wrong spot +for ia64. When I make this change, I can build and run an ia64 +Lockmeter'ed kernel. +DESC +lockmeter-build-fix +EDESC +DESC +lockmeter for x86_64 +EDESC +From: Alexander Nyberg + +This is basically a cut and paste from i386 code. At some places however +some unresolved addresses at places like [0x1000211eb38] shows up, which is +a bit weird. I'm hoping for a comment from any of the SGI guys, as the +code is so similar to i386 I don't know if problem lies below or in the +generic code. +Signed-off-by: Andrew Morton +Index: linux/arch/i386/Kconfig.debug +=================================================================== +--- linux.orig/arch/i386/Kconfig.debug ++++ linux/arch/i386/Kconfig.debug +@@ -67,6 +67,13 @@ config SCHEDSTATS + application, you can say N to avoid the very slight overhead + this adds. + ++config LOCKMETER ++ bool "Kernel lock metering" ++ depends on SMP ++ help ++ Say Y to enable kernel lock metering, which adds overhead to SMP locks, ++ but allows you to see various statistics using the lockstat command. ++ + config X86_FIND_SMP_CONFIG + bool + depends on X86_LOCAL_APIC || X86_VOYAGER +Index: linux/arch/i386/lib/dec_and_lock.c +=================================================================== +--- linux.orig/arch/i386/lib/dec_and_lock.c ++++ linux/arch/i386/lib/dec_and_lock.c +@@ -10,6 +10,7 @@ + #include + #include + ++#ifndef ATOMIC_DEC_AND_LOCK + int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) + { + int counter; +@@ -38,3 +39,5 @@ slow_path: + spin_unlock(lock); + return 0; + } ++#endif ++ +Index: linux/arch/ia64/Kconfig.debug +=================================================================== +--- linux.orig/arch/ia64/Kconfig.debug ++++ linux/arch/ia64/Kconfig.debug +@@ -72,4 +72,11 @@ config SYSVIPC_COMPAT + depends on COMPAT && SYSVIPC + default y + ++config LOCKMETER ++ bool "Kernel lock metering" ++ depends on SMP ++ help ++ Say Y to enable kernel lock metering, which adds overhead to SMP locks, ++ but allows you to see various statistics using the lockstat command. ++ + endmenu +Index: linux/arch/ia64/lib/dec_and_lock.c +=================================================================== +--- linux.orig/arch/ia64/lib/dec_and_lock.c ++++ linux/arch/ia64/lib/dec_and_lock.c +@@ -13,6 +13,7 @@ + #include + #include + ++#ifndef CONFIG_LOCKMETER + /* + * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these + * operations have to be done atomically, so that the count doesn't drop to zero without +@@ -40,3 +41,4 @@ atomic_dec_and_lock (atomic_t *refcount, + } + + EXPORT_SYMBOL(atomic_dec_and_lock); ++#endif +Index: linux/arch/sparc64/Kconfig.debug +=================================================================== +--- linux.orig/arch/sparc64/Kconfig.debug ++++ linux/arch/sparc64/Kconfig.debug +@@ -33,12 +33,19 @@ config DEBUG_BOOTMEM + depends on DEBUG_KERNEL + bool "Debug BOOTMEM initialization" + ++config LOCKMETER ++ bool "Kernel lock metering" ++ depends on SMP && !PREEMPT ++ help ++ Say Y to enable kernel lock metering, which adds overhead to SMP locks, ++ but allows you to see various statistics using the lockstat command. ++ + # We have a custom atomic_dec_and_lock() implementation but it's not + # compatible with spinlock debugging so we need to fall back on + # the generic version in that case. + config HAVE_DEC_LOCK + bool +- depends on SMP && !DEBUG_SPINLOCK ++ depends on SMP && !DEBUG_SPINLOCK && !LOCKMETER + default y + + config MCOUNT +Index: linux/arch/x86_64/Kconfig.debug +=================================================================== +--- linux.orig/arch/x86_64/Kconfig.debug ++++ linux/arch/x86_64/Kconfig.debug +@@ -66,4 +66,11 @@ config IOMMU_LEAK + #config X86_REMOTE_DEBUG + # bool "kgdb debugging stub" + ++config LOCKMETER ++ bool "Kernel lock metering" ++ depends on SMP ++ help ++ Say Y to enable kernel lock metering, which adds overhead to SMP locks, ++ but allows you to see various statistics using the lockstat command. ++ + endmenu +Index: linux/arch/x86_64/lib/dec_and_lock.c +=================================================================== +--- linux.orig/arch/x86_64/lib/dec_and_lock.c ++++ linux/arch/x86_64/lib/dec_and_lock.c +@@ -10,6 +10,7 @@ + #include + #include + ++#ifndef ATOMIC_DEC_AND_LOCK + int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) + { + int counter; +@@ -38,3 +39,4 @@ slow_path: + spin_unlock(lock); + return 0; + } ++#endif +Index: linux/fs/proc/proc_misc.c +=================================================================== +--- linux.orig/fs/proc/proc_misc.c ++++ linux/fs/proc/proc_misc.c +@@ -578,6 +578,36 @@ static void create_seq_entry(char *name, + entry->proc_fops = f; + } + ++#ifdef CONFIG_LOCKMETER ++extern ssize_t get_lockmeter_info(char *, size_t, loff_t *); ++extern ssize_t put_lockmeter_info(const char *, size_t); ++extern int get_lockmeter_info_size(void); ++ ++/* ++ * This function accesses lock metering information. ++ */ ++static ssize_t read_lockmeter(struct file *file, char *buf, ++ size_t count, loff_t *ppos) ++{ ++ return get_lockmeter_info(buf, count, ppos); ++} ++ ++/* ++ * Writing to /proc/lockmeter resets the counters ++ */ ++static ssize_t write_lockmeter(struct file * file, const char * buf, ++ size_t count, loff_t *ppos) ++{ ++ return put_lockmeter_info(buf, count); ++} ++ ++static struct file_operations proc_lockmeter_operations = { ++ NULL, /* lseek */ ++ read: read_lockmeter, ++ write: write_lockmeter, ++}; ++#endif /* CONFIG_LOCKMETER */ ++ + void __init proc_misc_init(void) + { + struct proc_dir_entry *entry; +@@ -638,6 +668,13 @@ void __init proc_misc_init(void) + if (entry) + entry->proc_fops = &proc_sysrq_trigger_operations; + #endif ++#ifdef CONFIG_LOCKMETER ++ entry = create_proc_entry("lockmeter", S_IWUSR | S_IRUGO, NULL); ++ if (entry) { ++ entry->proc_fops = &proc_lockmeter_operations; ++ entry->size = get_lockmeter_info_size(); ++ } ++#endif + #ifdef CONFIG_PPC32 + { + extern struct file_operations ppc_htab_operations; +Index: linux/include/asm-alpha/lockmeter.h +=================================================================== +--- linux.orig/include/asm-alpha/lockmeter.h ++++ linux/include/asm-alpha/lockmeter.h +@@ -0,0 +1,84 @@ ++/* ++ * Written by John Hawkes (hawkes@sgi.com) ++ * Based on klstat.h by Jack Steiner (steiner@sgi.com) ++ * ++ * Modified by Peter Rival (frival@zk3.dec.com) ++ */ ++ ++#ifndef _ALPHA_LOCKMETER_H ++#define _ALPHA_LOCKMETER_H ++ ++#include ++#define CPU_CYCLE_FREQUENCY hwrpb->cycle_freq ++ ++#define get_cycles64() get_cycles() ++ ++#define THIS_CPU_NUMBER smp_processor_id() ++ ++#include ++ ++#define SPINLOCK_MAGIC_INIT /**/ ++ ++/* ++ * Macros to cache and retrieve an index value inside of a lock ++ * these macros assume that there are less than 65536 simultaneous ++ * (read mode) holders of a rwlock. ++ * We also assume that the hash table has less than 32767 entries. ++ * the high order bit is used for write locking a rw_lock ++ * Note: although these defines and macros are the same as what is being used ++ * in include/asm-i386/lockmeter.h, they are present here to easily ++ * allow an alternate Alpha implementation. ++ */ ++/* ++ * instrumented spinlock structure -- never used to allocate storage ++ * only used in macros below to overlay a spinlock_t ++ */ ++typedef struct inst_spinlock_s { ++ /* remember, Alpha is little endian */ ++ unsigned short lock; ++ unsigned short index; ++} inst_spinlock_t; ++#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv ++#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index ++ ++/* ++ * macros to cache and retrieve an index value in a read/write lock ++ * as well as the cpu where a reader busy period started ++ * we use the 2nd word (the debug word) for this, so require the ++ * debug word to be present ++ */ ++/* ++ * instrumented rwlock structure -- never used to allocate storage ++ * only used in macros below to overlay a rwlock_t ++ */ ++typedef struct inst_rwlock_s { ++ volatile int lock; ++ unsigned short index; ++ unsigned short cpu; ++} inst_rwlock_t; ++#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv ++#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index ++#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv ++#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu ++ ++/* ++ * return true if rwlock is write locked ++ * (note that other lock attempts can cause the lock value to be negative) ++ */ ++#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) (((inst_rwlock_t *)rwlock_ptr)->lock & 1) ++#define IABS(x) ((x) > 0 ? (x) : -(x)) ++ ++#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) ++extern inline int rwlock_readers(rwlock_t *rwlock_ptr) ++{ ++ int tmp = (int) ((inst_rwlock_t *)rwlock_ptr)->lock; ++ /* readers subtract 2, so we have to: */ ++ /* - andnot off a possible writer (bit 0) */ ++ /* - get the absolute value */ ++ /* - divide by 2 (right shift by one) */ ++ /* to find the number of readers */ ++ if (tmp == 0) return(0); ++ else return(IABS(tmp & ~1)>>1); ++} ++ ++#endif /* _ALPHA_LOCKMETER_H */ +Index: linux/include/asm-alpha/spinlock.h +=================================================================== +--- linux.orig/include/asm-alpha/spinlock.h ++++ linux/include/asm-alpha/spinlock.h +@@ -6,6 +6,10 @@ + #include + #include + ++#ifdef CONFIG_LOCKMETER ++#undef DEBUG_SPINLOCK ++#undef DEBUG_RWLOCK ++#endif + + /* + * Simple spin lock operations. There are two variants, one clears IRQ's +@@ -96,9 +100,18 @@ static inline int _raw_spin_trylock(spin + + typedef struct { + volatile unsigned int write_lock:1, read_counter:31; ++#ifdef CONFIG_LOCKMETER ++ /* required for LOCKMETER since all bits in lock are used */ ++ /* need this storage for CPU and lock INDEX ............. */ ++ unsigned magic; ++#endif + } /*__attribute__((aligned(32)))*/ rwlock_t; + ++#ifdef CONFIG_LOCKMETER ++#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0 } ++#else + #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } ++#endif + + #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + #define rwlock_is_locked(x) (*(volatile int *)(x) != 0) +@@ -193,4 +206,41 @@ static inline void _raw_read_unlock(rwlo + : "m" (*lock) : "memory"); + } + ++#ifdef CONFIG_LOCKMETER ++static inline int _raw_write_trylock(rwlock_t *lock) ++{ ++ long temp,result; ++ ++ __asm__ __volatile__( ++ " ldl_l %1,%0\n" ++ " mov $31,%2\n" ++ " bne %1,1f\n" ++ " or $31,1,%2\n" ++ " stl_c %2,%0\n" ++ "1: mb\n" ++ : "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result) ++ : "m" (*(volatile int *)lock) ++ ); ++ ++ return (result); ++} ++ ++static inline int _raw_read_trylock(rwlock_t *lock) ++{ ++ unsigned long temp,result; ++ ++ __asm__ __volatile__( ++ " ldl_l %1,%0\n" ++ " mov $31,%2\n" ++ " blbs %1,1f\n" ++ " subl %1,2,%2\n" ++ " stl_c %2,%0\n" ++ "1: mb\n" ++ : "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result) ++ : "m" (*(volatile int *)lock) ++ ); ++ return (result); ++} ++#endif /* CONFIG_LOCKMETER */ ++ + #endif /* _ALPHA_SPINLOCK_H */ +Index: linux/include/asm-i386/lockmeter.h +=================================================================== +--- linux.orig/include/asm-i386/lockmeter.h ++++ linux/include/asm-i386/lockmeter.h +@@ -0,0 +1,115 @@ ++/* ++ * Copyright (C) 1999,2000 Silicon Graphics, Inc. ++ * ++ * Written by John Hawkes (hawkes@sgi.com) ++ * Based on klstat.h by Jack Steiner (steiner@sgi.com) ++ * ++ * Modified by Ray Bryant (raybry@us.ibm.com) ++ * Changes Copyright (C) 2000 IBM, Inc. ++ * Added save of index in spinlock_t to improve efficiency ++ * of "hold" time reporting for spinlocks. ++ * Added support for hold time statistics for read and write ++ * locks. ++ * Moved machine dependent code here from include/lockmeter.h. ++ * ++ */ ++ ++#ifndef _I386_LOCKMETER_H ++#define _I386_LOCKMETER_H ++ ++#include ++#include ++ ++#include ++ ++#ifdef __KERNEL__ ++extern unsigned long cpu_khz; ++#define CPU_CYCLE_FREQUENCY (cpu_khz * 1000) ++#else ++#define CPU_CYCLE_FREQUENCY 450000000 ++#endif ++ ++#define THIS_CPU_NUMBER smp_processor_id() ++ ++/* ++ * macros to cache and retrieve an index value inside of a spin lock ++ * these macros assume that there are less than 65536 simultaneous ++ * (read mode) holders of a rwlock. Not normally a problem!! ++ * we also assume that the hash table has less than 65535 entries. ++ */ ++/* ++ * instrumented spinlock structure -- never used to allocate storage ++ * only used in macros below to overlay a spinlock_t ++ */ ++typedef struct inst_spinlock_s { ++ /* remember, Intel is little endian */ ++ unsigned short lock; ++ unsigned short index; ++} inst_spinlock_t; ++#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv ++#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index ++ ++/* ++ * macros to cache and retrieve an index value in a read/write lock ++ * as well as the cpu where a reader busy period started ++ * we use the 2nd word (the debug word) for this, so require the ++ * debug word to be present ++ */ ++/* ++ * instrumented rwlock structure -- never used to allocate storage ++ * only used in macros below to overlay a rwlock_t ++ */ ++typedef struct inst_rwlock_s { ++ volatile int lock; ++ unsigned short index; ++ unsigned short cpu; ++} inst_rwlock_t; ++#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv ++#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index ++#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv ++#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu ++ ++/* ++ * return the number of readers for a rwlock_t ++ */ ++#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) ++ ++extern inline int rwlock_readers(rwlock_t *rwlock_ptr) ++{ ++ int tmp = (int) rwlock_ptr->lock; ++ /* read and write lock attempts may cause the lock value to temporarily */ ++ /* be negative. Until it is >= 0 we know nothing (i. e. can't tell if */ ++ /* is -1 because it was write locked and somebody tried to read lock it */ ++ /* or if it is -1 because it was read locked and somebody tried to write*/ ++ /* lock it. ........................................................... */ ++ do { ++ tmp = (int) rwlock_ptr->lock; ++ } while (tmp < 0); ++ if (tmp == 0) return(0); ++ else return(RW_LOCK_BIAS-tmp); ++} ++ ++/* ++ * return true if rwlock is write locked ++ * (note that other lock attempts can cause the lock value to be negative) ++ */ ++#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock <= 0) ++#define IABS(x) ((x) > 0 ? (x) : -(x)) ++#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((IABS((rwlock_ptr)->lock) % RW_LOCK_BIAS) != 0) ++ ++/* this is a lot of typing just to get gcc to emit "rdtsc" */ ++static inline long long get_cycles64 (void) ++{ ++ union longlong_u { ++ long long intlong; ++ struct intint_s { ++ uint32_t eax; ++ uint32_t edx; ++ } intint; ++ } longlong; ++ ++ rdtsc(longlong.intint.eax,longlong.intint.edx); ++ return longlong.intlong; ++} ++ ++#endif /* _I386_LOCKMETER_H */ +Index: linux/include/asm-i386/spinlock.h +=================================================================== +--- linux.orig/include/asm-i386/spinlock.h ++++ linux/include/asm-i386/spinlock.h +@@ -163,6 +163,11 @@ static inline void _raw_spin_lock_flags + */ + typedef struct { + volatile unsigned int lock; ++#ifdef CONFIG_LOCKMETER ++ /* required for LOCKMETER since all bits in lock are used */ ++ /* and we need this storage for CPU and lock INDEX */ ++ unsigned lockmeter_magic; ++#endif + #ifdef CONFIG_DEBUG_SPINLOCK + unsigned magic; + #endif +@@ -170,11 +175,19 @@ typedef struct { + + #define RWLOCK_MAGIC 0xdeaf1eed + ++#ifdef CONFIG_LOCKMETER ++#ifdef CONFIG_DEBUG_SPINLOCK ++#define RWLOCK_MAGIC_INIT , 0, RWLOCK_MAGIC ++#else ++#define RWLOCK_MAGIC_INIT , 0 ++#endif ++#else /* !CONFIG_LOCKMETER */ + #ifdef CONFIG_DEBUG_SPINLOCK + #define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC + #else + #define RWLOCK_MAGIC_INIT /* */ + #endif ++#endif /* !CONFIG_LOCKMETER */ + + #define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } + +@@ -212,6 +225,16 @@ static inline void _raw_write_lock(rwloc + #define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") + #define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") + ++static inline int _raw_read_trylock(rwlock_t *lock) ++{ ++ atomic_t *count = (atomic_t *)lock; ++ atomic_dec(count); ++ if (atomic_read(count) >= 0) ++ return 1; ++ atomic_inc(count); ++ return 0; ++} ++ + static inline int _raw_write_trylock(rwlock_t *lock) + { + atomic_t *count = (atomic_t *)lock; +@@ -221,4 +244,47 @@ static inline int _raw_write_trylock(rwl + return 0; + } + ++#if defined(CONFIG_LOCKMETER) && defined(CONFIG_HAVE_DEC_LOCK) ++extern void _metered_spin_lock (spinlock_t *lock, void *caller_pc); ++extern void _metered_spin_unlock(spinlock_t *lock); ++ ++/* ++ * Matches what is in arch/i386/lib/dec_and_lock.c, except this one is ++ * "static inline" so that the spin_lock(), if actually invoked, is charged ++ * against the real caller, not against the catch-all atomic_dec_and_lock ++ */ ++static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) ++{ ++ int counter; ++ int newcount; ++ ++repeat: ++ counter = atomic_read(atomic); ++ newcount = counter-1; ++ ++ if (!newcount) ++ goto slow_path; ++ ++ asm volatile("lock; cmpxchgl %1,%2" ++ :"=a" (newcount) ++ :"r" (newcount), "m" (atomic->counter), "0" (counter)); ++ ++ /* If the above failed, "eax" will have changed */ ++ if (newcount != counter) ++ goto repeat; ++ return 0; ++ ++slow_path: ++ preempt_disable(); ++ _metered_spin_lock(lock, __builtin_return_address(0)); ++ if (atomic_dec_and_test(atomic)) ++ return 1; ++ _metered_spin_unlock(lock); ++ preempt_enable(); ++ return 0; ++} ++ ++#define ATOMIC_DEC_AND_LOCK ++#endif ++ + #endif /* __ASM_SPINLOCK_H */ +Index: linux/include/asm-ia64/lockmeter.h +=================================================================== +--- linux.orig/include/asm-ia64/lockmeter.h ++++ linux/include/asm-ia64/lockmeter.h +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (C) 1999,2000 Silicon Graphics, Inc. ++ * ++ * Written by John Hawkes (hawkes@sgi.com) ++ * Based on klstat.h by Jack Steiner (steiner@sgi.com) ++ */ ++ ++#ifndef _IA64_LOCKMETER_H ++#define _IA64_LOCKMETER_H ++ ++#ifdef local_cpu_data ++#define CPU_CYCLE_FREQUENCY local_cpu_data->itc_freq ++#else ++#define CPU_CYCLE_FREQUENCY my_cpu_data.itc_freq ++#endif ++#define get_cycles64() get_cycles() ++ ++#define THIS_CPU_NUMBER smp_processor_id() ++ ++/* ++ * macros to cache and retrieve an index value inside of a lock ++ * these macros assume that there are less than 65536 simultaneous ++ * (read mode) holders of a rwlock. ++ * we also assume that the hash table has less than 32767 entries. ++ */ ++/* ++ * instrumented spinlock structure -- never used to allocate storage ++ * only used in macros below to overlay a spinlock_t ++ */ ++typedef struct inst_spinlock_s { ++ /* remember, Intel is little endian */ ++ volatile unsigned short lock; ++ volatile unsigned short index; ++} inst_spinlock_t; ++#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv ++#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index ++ ++/* ++ * macros to cache and retrieve an index value in a read/write lock ++ * as well as the cpu where a reader busy period started ++ * we use the 2nd word (the debug word) for this, so require the ++ * debug word to be present ++ */ ++/* ++ * instrumented rwlock structure -- never used to allocate storage ++ * only used in macros below to overlay a rwlock_t ++ */ ++typedef struct inst_rwlock_s { ++ volatile int read_counter:31; ++ volatile int write_lock:1; ++ volatile unsigned short index; ++ volatile unsigned short cpu; ++} inst_rwlock_t; ++#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv ++#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index ++#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv ++#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu ++ ++/* ++ * return the number of readers for a rwlock_t ++ */ ++#define RWLOCK_READERS(rwlock_ptr) ((rwlock_ptr)->read_counter) ++ ++/* ++ * return true if rwlock is write locked ++ * (note that other lock attempts can cause the lock value to be negative) ++ */ ++#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->write_lock) ++#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((rwlock_ptr)->read_counter) ++ ++#endif /* _IA64_LOCKMETER_H */ ++ +Index: linux/include/asm-ia64/spinlock.h +=================================================================== +--- linux.orig/include/asm-ia64/spinlock.h ++++ linux/include/asm-ia64/spinlock.h +@@ -116,8 +116,18 @@ do { \ + typedef struct { + volatile unsigned int read_counter : 31; + volatile unsigned int write_lock : 1; ++#ifdef CONFIG_LOCKMETER ++ /* required for LOCKMETER since all bits in lock are used */ ++ /* and we need this storage for CPU and lock INDEX */ ++ unsigned lockmeter_magic; ++#endif + } rwlock_t; ++ ++#ifdef CONFIG_LOCKMETER ++#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0 } ++#else + #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } ++#endif + + #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + #define rwlock_is_locked(x) (*(volatile int *) (x) != 0) +@@ -133,6 +143,48 @@ do { \ + } \ + } while (0) + ++#ifdef CONFIG_LOCKMETER ++/* ++ * HACK: This works, but still have a timing window that affects performance: ++ * we see that no one owns the Write lock, then someone * else grabs for Write ++ * lock before we do a read_lock(). ++ * This means that on rare occasions our read_lock() will stall and spin-wait ++ * until we acquire for Read, instead of simply returning a trylock failure. ++ */ ++static inline int _raw_read_trylock(rwlock_t *rw) ++{ ++ if (rw->write_lock) { ++ return 0; ++ } else { ++ _raw_read_lock(rw); ++ return 1; ++ } ++} ++ ++static inline int _raw_write_trylock(rwlock_t *rw) ++{ ++ if (!(rw->write_lock)) { ++ /* isn't currently write-locked... that looks promising... */ ++ if (test_and_set_bit(31, rw) == 0) { ++ /* now it is write-locked by me... */ ++ if (rw->read_counter) { ++ /* really read-locked, so release write-lock and fail */ ++ clear_bit(31, rw); ++ } else { ++ /* we've the the write-lock, no read-lockers... success! */ ++ barrier(); ++ return 1; ++ } ++ ++ } ++ } ++ ++ /* falls through ... fails to write-lock */ ++ barrier(); ++ return 0; ++} ++#endif ++ + #define _raw_read_unlock(rw) \ + do { \ + rwlock_t *__read_lock_ptr = (rw); \ +@@ -196,4 +248,25 @@ do { \ + clear_bit(31, (x)); \ + }) + ++#ifdef CONFIG_LOCKMETER ++extern void _metered_spin_lock (spinlock_t *lock, void *caller_pc); ++extern void _metered_spin_unlock(spinlock_t *lock); ++ ++/* ++ * Use a less efficient, and inline, atomic_dec_and_lock() if lockmetering ++ * so we can see the callerPC of who is actually doing the spin_lock(). ++ * Otherwise, all we see is the generic rollup of all locks done by ++ * atomic_dec_and_lock(). ++ */ ++static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) ++{ ++ _metered_spin_lock(lock, __builtin_return_address(0)); ++ if (atomic_dec_and_test(atomic)) ++ return 1; ++ _metered_spin_unlock(lock); ++ return 0; ++} ++#define ATOMIC_DEC_AND_LOCK ++#endif ++ + #endif /* _ASM_IA64_SPINLOCK_H */ +Index: linux/include/asm-mips/lockmeter.h +=================================================================== +--- linux.orig/include/asm-mips/lockmeter.h ++++ linux/include/asm-mips/lockmeter.h +@@ -0,0 +1,126 @@ ++/* ++ * Copyright (C) 1999,2000 Silicon Graphics, Inc. ++ * ++ * Written by John Hawkes (hawkes@sgi.com) ++ * Based on klstat.h by Jack Steiner (steiner@sgi.com) ++ * Ported to mips32 for Asita Technologies ++ * by D.J. Barrow ( dj.barrow@asitatechnologies.com ) ++ */ ++#ifndef _ASM_LOCKMETER_H ++#define _ASM_LOCKMETER_H ++ ++/* do_gettimeoffset is a function pointer on mips */ ++/* & it is not included by */ ++#include ++#include ++#include ++ ++#define SPINLOCK_MAGIC_INIT /* */ ++ ++#define CPU_CYCLE_FREQUENCY get_cpu_cycle_frequency() ++ ++#define THIS_CPU_NUMBER smp_processor_id() ++ ++static uint32_t cpu_cycle_frequency = 0; ++ ++static uint32_t get_cpu_cycle_frequency(void) ++{ ++ /* a total hack, slow and invasive, but ... it works */ ++ int sec; ++ uint32_t start_cycles; ++ struct timeval tv; ++ ++ if (cpu_cycle_frequency == 0) { /* uninitialized */ ++ do_gettimeofday(&tv); ++ sec = tv.tv_sec; /* set up to catch the tv_sec rollover */ ++ while (sec == tv.tv_sec) { do_gettimeofday(&tv); } ++ sec = tv.tv_sec; /* rolled over to a new sec value */ ++ start_cycles = get_cycles(); ++ while (sec == tv.tv_sec) { do_gettimeofday(&tv); } ++ cpu_cycle_frequency = get_cycles() - start_cycles; ++ } ++ ++ return cpu_cycle_frequency; ++} ++ ++extern struct timeval xtime; ++ ++static uint64_t get_cycles64(void) ++{ ++ static uint64_t last_get_cycles64 = 0; ++ uint64_t ret; ++ unsigned long sec; ++ unsigned long usec, usec_offset; ++ ++again: ++ sec = xtime.tv_sec; ++ usec = xtime.tv_usec; ++ usec_offset = do_gettimeoffset(); ++ if ((xtime.tv_sec != sec) || ++ (xtime.tv_usec != usec)|| ++ (usec_offset >= 20000)) ++ goto again; ++ ++ ret = ((uint64_t)(usec + usec_offset) * cpu_cycle_frequency); ++ /* We can't do a normal 64 bit division on mips without libgcc.a */ ++ do_div(ret,1000000); ++ ret += ((uint64_t)sec * cpu_cycle_frequency); ++ ++ /* XXX why does time go backwards? do_gettimeoffset? general time adj? */ ++ if (ret <= last_get_cycles64) ++ ret = last_get_cycles64+1; ++ last_get_cycles64 = ret; ++ ++ return ret; ++} ++ ++/* ++ * macros to cache and retrieve an index value inside of a lock ++ * these macros assume that there are less than 65536 simultaneous ++ * (read mode) holders of a rwlock. ++ * we also assume that the hash table has less than 32767 entries. ++ * the high order bit is used for write locking a rw_lock ++ */ ++#define INDEX_MASK 0x7FFF0000 ++#define READERS_MASK 0x0000FFFF ++#define INDEX_SHIFT 16 ++#define PUT_INDEX(lockp,index) \ ++ lockp->lock = (((lockp->lock) & ~INDEX_MASK) | (index) << INDEX_SHIFT) ++#define GET_INDEX(lockp) \ ++ (((lockp->lock) & INDEX_MASK) >> INDEX_SHIFT) ++ ++/* ++ * macros to cache and retrieve an index value in a read/write lock ++ * as well as the cpu where a reader busy period started ++ * we use the 2nd word (the debug word) for this, so require the ++ * debug word to be present ++ */ ++/* ++ * instrumented rwlock structure -- never used to allocate storage ++ * only used in macros below to overlay a rwlock_t ++ */ ++typedef struct inst_rwlock_s { ++ volatile int lock; ++ unsigned short index; ++ unsigned short cpu; ++} inst_rwlock_t; ++#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv ++#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index ++#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv ++#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu ++ ++/* ++ * return the number of readers for a rwlock_t ++ */ ++#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) ++ ++extern inline int rwlock_readers(rwlock_t *rwlock_ptr) ++{ ++ int tmp = (int) rwlock_ptr->lock; ++ return (tmp >= 0) ? tmp : 0; ++} ++ ++#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock < 0) ++#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock > 0) ++ ++#endif /* _ASM_LOCKMETER_H */ +Index: linux/include/asm-mips/spinlock.h +=================================================================== +--- linux.orig/include/asm-mips/spinlock.h ++++ linux/include/asm-mips/spinlock.h +@@ -92,9 +92,18 @@ static inline unsigned int _raw_spin_try + + typedef struct { + volatile unsigned int lock; ++#ifdef CONFIG_LOCKMETER ++ /* required for LOCKMETER since all bits in lock are used */ ++ /* and we need this storage for CPU and lock INDEX */ ++ unsigned lockmeter_magic; ++#endif + } rwlock_t; + ++#ifdef CONFIG_LOCKMETER ++#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } ++#else + #define RW_LOCK_UNLOCKED (rwlock_t) { 0 } ++#endif + + #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + +Index: linux/include/asm-sparc64/lockmeter.h +=================================================================== +--- linux.orig/include/asm-sparc64/lockmeter.h ++++ linux/include/asm-sparc64/lockmeter.h +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com) ++ * Copyright (C) 2003 David S. Miller (davem@redhat.com) ++ */ ++ ++#ifndef _SPARC64_LOCKMETER_H ++#define _SPARC64_LOCKMETER_H ++ ++#include ++#include ++#include ++#include ++ ++/* Actually, this is not the CPU frequency by the system tick ++ * frequency which is good enough for lock metering. ++ */ ++#define CPU_CYCLE_FREQUENCY (timer_tick_offset * HZ) ++#define THIS_CPU_NUMBER smp_processor_id() ++ ++#define PUT_INDEX(lock_ptr,indexv) (lock_ptr)->index = (indexv) ++#define GET_INDEX(lock_ptr) (lock_ptr)->index ++ ++#define PUT_RWINDEX(rwlock_ptr,indexv) (rwlock_ptr)->index = (indexv) ++#define GET_RWINDEX(rwlock_ptr) (rwlock_ptr)->index ++#define PUT_RW_CPU(rwlock_ptr,cpuv) (rwlock_ptr)->cpu = (cpuv) ++#define GET_RW_CPU(rwlock_ptr) (rwlock_ptr)->cpu ++ ++#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) ++ ++extern inline int rwlock_readers(rwlock_t *rwlock_ptr) ++{ ++ signed int tmp = rwlock_ptr->lock; ++ ++ if (tmp > 0) ++ return tmp; ++ else ++ return 0; ++} ++ ++#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((signed int)((rwlock_ptr)->lock) < 0) ++#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((signed int)((rwlock_ptr)->lock) > 0) ++ ++#define get_cycles64() get_cycles() ++ ++#endif /* _SPARC64_LOCKMETER_H */ +Index: linux/include/asm-x86_64/lockmeter.h +=================================================================== +--- linux.orig/include/asm-x86_64/lockmeter.h ++++ linux/include/asm-x86_64/lockmeter.h +@@ -0,0 +1,102 @@ ++/* ++ * Copyright (C) 1999,2000 Silicon Graphics, Inc. ++ * ++ * Written by John Hawkes (hawkes@sgi.com) ++ * Based on klstat.h by Jack Steiner (steiner@sgi.com) ++ * ++ * Modified by Ray Bryant (raybry@us.ibm.com) ++ * Changes Copyright (C) 2000 IBM, Inc. ++ * Added save of index in spinlock_t to improve efficiency ++ * of "hold" time reporting for spinlocks. ++ * Added support for hold time statistics for read and write ++ * locks. ++ * Moved machine dependent code here from include/lockmeter.h. ++ * ++ */ ++ ++#ifndef _X8664_LOCKMETER_H ++#define _X8664_LOCKMETER_H ++ ++#include ++#include ++ ++#include ++ ++#ifdef __KERNEL__ ++extern unsigned int cpu_khz; ++#define CPU_CYCLE_FREQUENCY (cpu_khz * 1000) ++#else ++#define CPU_CYCLE_FREQUENCY 450000000 ++#endif ++ ++#define THIS_CPU_NUMBER smp_processor_id() ++ ++/* ++ * macros to cache and retrieve an index value inside of a spin lock ++ * these macros assume that there are less than 65536 simultaneous ++ * (read mode) holders of a rwlock. Not normally a problem!! ++ * we also assume that the hash table has less than 65535 entries. ++ */ ++/* ++ * instrumented spinlock structure -- never used to allocate storage ++ * only used in macros below to overlay a spinlock_t ++ */ ++typedef struct inst_spinlock_s { ++ /* remember, Intel is little endian */ ++ unsigned short lock; ++ unsigned short index; ++} inst_spinlock_t; ++#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv ++#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index ++ ++/* ++ * macros to cache and retrieve an index value in a read/write lock ++ * as well as the cpu where a reader busy period started ++ * we use the 2nd word (the debug word) for this, so require the ++ * debug word to be present ++ */ ++/* ++ * instrumented rwlock structure -- never used to allocate storage ++ * only used in macros below to overlay a rwlock_t ++ */ ++typedef struct inst_rwlock_s { ++ volatile int lock; ++ unsigned short index; ++ unsigned short cpu; ++} inst_rwlock_t; ++#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv ++#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index ++#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv ++#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu ++ ++/* ++ * return the number of readers for a rwlock_t ++ */ ++#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) ++ ++extern inline int rwlock_readers(rwlock_t *rwlock_ptr) ++{ ++ int tmp = (int) rwlock_ptr->lock; ++ /* read and write lock attempts may cause the lock value to temporarily */ ++ /* be negative. Until it is >= 0 we know nothing (i. e. can't tell if */ ++ /* is -1 because it was write locked and somebody tried to read lock it */ ++ /* or if it is -1 because it was read locked and somebody tried to write*/ ++ /* lock it. ........................................................... */ ++ do { ++ tmp = (int) rwlock_ptr->lock; ++ } while (tmp < 0); ++ if (tmp == 0) return(0); ++ else return(RW_LOCK_BIAS-tmp); ++} ++ ++/* ++ * return true if rwlock is write locked ++ * (note that other lock attempts can cause the lock value to be negative) ++ */ ++#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock <= 0) ++#define IABS(x) ((x) > 0 ? (x) : -(x)) ++#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((IABS((rwlock_ptr)->lock) % RW_LOCK_BIAS) != 0) ++ ++#define get_cycles64() get_cycles() ++ ++#endif /* _X8664_LOCKMETER_H */ +Index: linux/include/asm-x86_64/spinlock.h +=================================================================== +--- linux.orig/include/asm-x86_64/spinlock.h ++++ linux/include/asm-x86_64/spinlock.h +@@ -136,6 +136,11 @@ static inline void _raw_spin_lock(spinlo + */ + typedef struct { + volatile unsigned int lock; ++#ifdef CONFIG_LOCKMETER ++ /* required for LOCKMETER since all bits in lock are used */ ++ /* and we need this storage for CPU and lock INDEX */ ++ unsigned lockmeter_magic; ++#endif + #ifdef CONFIG_DEBUG_SPINLOCK + unsigned magic; + #endif +@@ -143,11 +148,19 @@ typedef struct { + + #define RWLOCK_MAGIC 0xdeaf1eed + ++#ifdef CONFIG_LOCKMETER ++#ifdef CONFIG_DEBUG_SPINLOCK ++#define RWLOCK_MAGIC_INIT , 0, RWLOCK_MAGIC ++#else ++#define RWLOCK_MAGIC_INIT , 0 ++#endif ++#else /* !CONFIG_LOCKMETER */ + #ifdef CONFIG_DEBUG_SPINLOCK + #define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC + #else + #define RWLOCK_MAGIC_INIT /* */ + #endif ++#endif /* !CONFIG_LOCKMETER */ + + #define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } + +@@ -194,4 +207,47 @@ static inline int _raw_write_trylock(rwl + return 0; + } + ++#if defined(CONFIG_LOCKMETER) && defined(CONFIG_HAVE_DEC_LOCK) ++extern void _metered_spin_lock (spinlock_t *lock, void *caller_pc); ++extern void _metered_spin_unlock(spinlock_t *lock); ++ ++/* ++ * Matches what is in arch/x86_64/lib/dec_and_lock.c, except this one is ++ * "static inline" so that the spin_lock(), if actually invoked, is charged ++ * against the real caller, not against the catch-all atomic_dec_and_lock ++ */ ++static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) ++{ ++ int counter; ++ int newcount; ++ ++repeat: ++ counter = atomic_read(atomic); ++ newcount = counter-1; ++ ++ if (!newcount) ++ goto slow_path; ++ ++ asm volatile("lock; cmpxchgl %1,%2" ++ :"=a" (newcount) ++ :"r" (newcount), "m" (atomic->counter), "0" (counter)); ++ ++ /* If the above failed, "eax" will have changed */ ++ if (newcount != counter) ++ goto repeat; ++ return 0; ++ ++slow_path: ++ preempt_disable(); ++ _metered_spin_lock(lock, __builtin_return_address(0)); ++ if (atomic_dec_and_test(atomic)) ++ return 1; ++ _metered_spin_unlock(lock); ++ preempt_enable(); ++ return 0; ++} ++ ++#define ATOMIC_DEC_AND_LOCK ++#endif ++ + #endif /* __ASM_SPINLOCK_H */ +Index: linux/include/linux/lockmeter.h +=================================================================== +--- linux.orig/include/linux/lockmeter.h ++++ linux/include/linux/lockmeter.h +@@ -0,0 +1,320 @@ ++/* ++ * Copyright (C) 1999-2002 Silicon Graphics, Inc. ++ * ++ * Written by John Hawkes (hawkes@sgi.com) ++ * Based on klstat.h by Jack Steiner (steiner@sgi.com) ++ * ++ * Modified by Ray Bryant (raybry@us.ibm.com) Feb-Apr 2000 ++ * Changes Copyright (C) 2000 IBM, Inc. ++ * Added save of index in spinlock_t to improve efficiency ++ * of "hold" time reporting for spinlocks ++ * Added support for hold time statistics for read and write ++ * locks. ++ * Moved machine dependent code to include/asm/lockmeter.h. ++ * ++ */ ++ ++#ifndef _LINUX_LOCKMETER_H ++#define _LINUX_LOCKMETER_H ++ ++ ++/*--------------------------------------------------- ++ * architecture-independent lockmeter.h ++ *-------------------------------------------------*/ ++ ++/* ++ * raybry -- version 2: added efficient hold time statistics ++ * requires lstat recompile, so flagged as new version ++ * raybry -- version 3: added global reader lock data ++ * hawkes -- version 4: removed some unnecessary fields to simplify mips64 port ++ */ ++#define LSTAT_VERSION 5 ++ ++int lstat_update(void*, void*, int); ++int lstat_update_time(void*, void*, int, uint32_t); ++ ++/* ++ * Currently, the mips64 and sparc64 kernels talk to a 32-bit lockstat, so we ++ * need to force compatibility in the inter-communication data structure. ++ */ ++ ++#if defined(CONFIG_MIPS32_COMPAT) ++#define TIME_T uint32_t ++#elif defined(CONFIG_SPARC) || defined(CONFIG_SPARC64) ++#define TIME_T uint64_t ++#else ++#define TIME_T time_t ++#endif ++ ++#if defined(__KERNEL__) || (!defined(CONFIG_MIPS32_COMPAT) && !defined(CONFIG_SPARC) && !defined(CONFIG_SPARC64)) || (_MIPS_SZLONG==32) ++#define POINTER void * ++#else ++#define POINTER int64_t ++#endif ++ ++/* ++ * Values for the "action" parameter passed to lstat_update. ++ * ZZZ - do we want a try-success status here??? ++ */ ++#define LSTAT_ACT_NO_WAIT 0 ++#define LSTAT_ACT_SPIN 1 ++#define LSTAT_ACT_REJECT 2 ++#define LSTAT_ACT_WW_SPIN 3 ++#define LSTAT_ACT_SLEPT 4 /* UNUSED */ ++ ++#define LSTAT_ACT_MAX_VALUES 4 /* NOTE: Increase to 5 if use ACT_SLEPT */ ++ ++/* ++ * Special values for the low 2 bits of an RA passed to ++ * lstat_update. ++ */ ++/* we use these values to figure out what kind of lock data */ ++/* is stored in the statistics table entry at index ....... */ ++#define LSTAT_RA_SPIN 0 /* spin lock data */ ++#define LSTAT_RA_READ 1 /* read lock statistics */ ++#define LSTAT_RA_SEMA 2 /* RESERVED */ ++#define LSTAT_RA_WRITE 3 /* write lock statistics*/ ++ ++#define LSTAT_RA(n) \ ++ ((void*)( ((unsigned long) caller_pc & ~3) | n) ) ++ ++/* ++ * Constants used for lock addresses in the lstat_directory ++ * to indicate special values of the lock address. ++ */ ++#define LSTAT_MULTI_LOCK_ADDRESS NULL ++ ++/* ++ * Maximum size of the lockstats tables. Increase this value ++ * if its not big enough. (Nothing bad happens if its not ++ * big enough although some locks will not be monitored.) ++ * We record overflows of this quantity in lstat_control.dir_overflows ++ * ++ * Note: The max value here must fit into the field set ++ * and obtained by the macro's PUT_INDEX() and GET_INDEX(). ++ * This value depends on how many bits are available in the ++ * lock word in the particular machine implementation we are on. ++ */ ++#define LSTAT_MAX_STAT_INDEX 2000 ++ ++/* ++ * Size and mask for the hash table into the directory. ++ */ ++#define LSTAT_HASH_TABLE_SIZE 4096 /* must be 2**N */ ++#define LSTAT_HASH_TABLE_MASK (LSTAT_HASH_TABLE_SIZE-1) ++ ++#define DIRHASH(ra) ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK) ++ ++/* ++ * This defines an entry in the lockstat directory. It contains ++ * information about a lock being monitored. ++ * A directory entry only contains the lock identification - ++ * counts on usage of the lock are kept elsewhere in a per-cpu ++ * data structure to minimize cache line pinging. ++ */ ++typedef struct { ++ POINTER caller_ra; /* RA of code that set lock */ ++ POINTER lock_ptr; /* lock address */ ++ ushort next_stat_index; /* Used to link multiple locks that have the same hash table value */ ++} lstat_directory_entry_t; ++ ++/* ++ * A multi-dimensioned array used to contain counts for lock accesses. ++ * The array is 3-dimensional: ++ * - CPU number. Keep from thrashing cache lines between CPUs ++ * - Directory entry index. Identifies the lock ++ * - Action. Indicates what kind of contention occurred on an ++ * access to the lock. ++ * ++ * The index of an entry in the directory is the same as the 2nd index ++ * of the entry in the counts array. ++ */ ++/* ++ * This table contains data for spin_locks, write locks, and read locks ++ * Not all data is used for all cases. In particular, the hold time ++ * information is not stored here for read locks since that is a global ++ * (e. g. cannot be separated out by return address) quantity. ++ * See the lstat_read_lock_counts_t structure for the global read lock ++ * hold time. ++ */ ++typedef struct { ++ uint64_t cum_wait_ticks; /* sum of wait times */ ++ /* for write locks, sum of time a */ ++ /* writer is waiting for a reader */ ++ int64_t cum_hold_ticks; /* cumulative sum of holds */ ++ /* not used for read mode locks */ ++ /* must be signed. ............... */ ++ uint32_t max_wait_ticks; /* max waiting time */ ++ uint32_t max_hold_ticks; /* max holding time */ ++ uint64_t cum_wait_ww_ticks; /* sum times writer waits on writer*/ ++ uint32_t max_wait_ww_ticks; /* max wait time writer vs writer */ ++ /* prev 2 only used for write locks*/ ++ uint32_t acquire_time; /* time lock acquired this CPU */ ++ uint32_t count[LSTAT_ACT_MAX_VALUES]; ++} lstat_lock_counts_t; ++ ++typedef lstat_lock_counts_t lstat_cpu_counts_t[LSTAT_MAX_STAT_INDEX]; ++ ++/* ++ * User request to: ++ * - turn statistic collection on/off, or to reset ++ */ ++#define LSTAT_OFF 0 ++#define LSTAT_ON 1 ++#define LSTAT_RESET 2 ++#define LSTAT_RELEASE 3 ++ ++#define LSTAT_MAX_READ_LOCK_INDEX 1000 ++typedef struct { ++ POINTER lock_ptr; /* address of lock for output stats */ ++ uint32_t read_lock_count; ++ int64_t cum_hold_ticks; /* sum of read lock hold times over */ ++ /* all callers. ....................*/ ++ uint32_t write_index; /* last write lock hash table index */ ++ uint32_t busy_periods; /* count of busy periods ended this */ ++ uint64_t start_busy; /* time this busy period started. ..*/ ++ uint64_t busy_ticks; /* sum of busy periods this lock. ..*/ ++ uint64_t max_busy; /* longest busy period for this lock*/ ++ uint32_t max_readers; /* maximum number of readers ...... */ ++#ifdef USER_MODE_TESTING ++ rwlock_t entry_lock; /* lock for this read lock entry... */ ++ /* avoid having more than one rdr at*/ ++ /* needed for user space testing... */ ++ /* not needed for kernel 'cause it */ ++ /* is non-preemptive. ............. */ ++#endif ++} lstat_read_lock_counts_t; ++typedef lstat_read_lock_counts_t lstat_read_lock_cpu_counts_t[LSTAT_MAX_READ_LOCK_INDEX]; ++ ++#if defined(__KERNEL__) || defined(USER_MODE_TESTING) ++ ++#ifndef USER_MODE_TESTING ++#include ++#else ++#include "asm_newlockmeter.h" ++#endif ++ ++/* ++ * Size and mask for the hash table into the directory. ++ */ ++#define LSTAT_HASH_TABLE_SIZE 4096 /* must be 2**N */ ++#define LSTAT_HASH_TABLE_MASK (LSTAT_HASH_TABLE_SIZE-1) ++ ++#define DIRHASH(ra) ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK) ++ ++/* ++ * This version eliminates the per processor lock stack. What we do is to ++ * store the index of the lock hash structure in unused bits in the lock ++ * itself. Then on unlock we can find the statistics record without doing ++ * any additional hash or lock stack lookup. This works for spin_locks. ++ * Hold time reporting is now basically as cheap as wait time reporting ++ * so we ignore the difference between LSTAT_ON_HOLD and LSTAT_ON_WAIT ++ * as in version 1.1.* of lockmeter. ++ * ++ * For rw_locks, we store the index of a global reader stats structure in ++ * the lock and the writer index is stored in the latter structure. ++ * For read mode locks we hash at the time of the lock to find an entry ++ * in the directory for reader wait time and the like. ++ * At unlock time for read mode locks, we update just the global structure ++ * so we don't need to know the reader directory index value at unlock time. ++ * ++ */ ++ ++/* ++ * Protocol to change lstat_control.state ++ * This is complicated because we don't want the cum_hold_time for ++ * a rw_lock to be decremented in _read_lock_ without making sure it ++ * is incremented in _read_lock_ and vice versa. So here is the ++ * way we change the state of lstat_control.state: ++ * I. To Turn Statistics On ++ * After allocating storage, set lstat_control.state non-zero. ++ * This works because we don't start updating statistics for in use ++ * locks until the reader lock count goes to zero. ++ * II. To Turn Statistics Off: ++ * (0) Disable interrupts on this CPU ++ * (1) Seize the lstat_control.directory_lock ++ * (2) Obtain the current value of lstat_control.next_free_read_lock_index ++ * (3) Store a zero in lstat_control.state. ++ * (4) Release the lstat_control.directory_lock ++ * (5) For each lock in the read lock list up to the saved value ++ * (well, -1) of the next_free_read_lock_index, do the following: ++ * (a) Check validity of the stored lock address ++ * by making sure that the word at the saved addr ++ * has an index that matches this entry. If not ++ * valid, then skip this entry. ++ * (b) If there is a write lock already set on this lock, ++ * skip to (d) below. ++ * (c) Set a non-metered write lock on the lock ++ * (d) set the cached INDEX in the lock to zero ++ * (e) Release the non-metered write lock. ++ * (6) Re-enable interrupts ++ * ++ * These rules ensure that a read lock will not have its statistics ++ * partially updated even though the global lock recording state has ++ * changed. See put_lockmeter_info() for implementation. ++ * ++ * The reason for (b) is that there may be write locks set on the ++ * syscall path to put_lockmeter_info() from user space. If we do ++ * not do this check, then we can deadlock. A similar problem would ++ * occur if the lock was read locked by the current CPU. At the ++ * moment this does not appear to happen. ++ */ ++ ++/* ++ * Main control structure for lockstat. Used to turn statistics on/off ++ * and to maintain directory info. ++ */ ++typedef struct { ++ int state; ++ spinlock_t control_lock; /* used to serialize turning statistics on/off */ ++ spinlock_t directory_lock; /* for serialize adding entries to directory */ ++ volatile int next_free_dir_index;/* next free entry in the directory */ ++ /* FIXME not all of these fields are used / needed .............. */ ++ /* the following fields represent data since */ ++ /* first "lstat on" or most recent "lstat reset" */ ++ TIME_T first_started_time; /* time when measurement first enabled */ ++ TIME_T started_time; /* time when measurement last started */ ++ TIME_T ending_time; /* time when measurement last disabled */ ++ uint64_t started_cycles64; /* cycles when measurement last started */ ++ uint64_t ending_cycles64; /* cycles when measurement last disabled */ ++ uint64_t enabled_cycles64; /* total cycles with measurement enabled */ ++ int intervals; /* number of measurement intervals recorded */ ++ /* i. e. number of times did lstat on;lstat off */ ++ lstat_directory_entry_t *dir; /* directory */ ++ int dir_overflow; /* count of times ran out of space in directory */ ++ int rwlock_overflow; /* count of times we couldn't allocate a rw block*/ ++ ushort *hashtab; /* hash table for quick dir scans */ ++ lstat_cpu_counts_t *counts[NR_CPUS]; /* Array of pointers to per-cpu stats */ ++ int next_free_read_lock_index; /* next rwlock reader (global) stats block */ ++ lstat_read_lock_cpu_counts_t *read_lock_counts[NR_CPUS]; /* per cpu read lock stats */ ++} lstat_control_t; ++ ++#endif /* defined(__KERNEL__) || defined(USER_MODE_TESTING) */ ++ ++typedef struct { ++ short lstat_version; /* version of the data */ ++ short state; /* the current state is returned */ ++ int maxcpus; /* Number of cpus present */ ++ int next_free_dir_index; /* index of the next free directory entry */ ++ TIME_T first_started_time; /* when measurement enabled for first time */ ++ TIME_T started_time; /* time in secs since 1969 when stats last turned on */ ++ TIME_T ending_time; /* time in secs since 1969 when stats last turned off */ ++ uint32_t cycleval; /* cycles per second */ ++#ifdef notyet ++ void *kernel_magic_addr; /* address of kernel_magic */ ++ void *kernel_end_addr; /* contents of kernel magic (points to "end") */ ++#endif ++ int next_free_read_lock_index; /* index of next (global) read lock stats struct */ ++ uint64_t started_cycles64; /* cycles when measurement last started */ ++ uint64_t ending_cycles64; /* cycles when stats last turned off */ ++ uint64_t enabled_cycles64; /* total cycles with measurement enabled */ ++ int intervals; /* number of measurement intervals recorded */ ++ /* i.e. number of times we did lstat on;lstat off*/ ++ int dir_overflow; /* number of times we wanted more space in directory */ ++ int rwlock_overflow; /* # of times we wanted more space in read_locks_count */ ++ struct new_utsname uts; /* info about machine where stats are measured */ ++ /* -T option of lockstat allows data to be */ ++ /* moved to another machine. ................. */ ++} lstat_user_request_t; ++ ++#endif /* _LINUX_LOCKMETER_H */ +Index: linux/include/linux/spinlock.h +=================================================================== +--- linux.orig/include/linux/spinlock.h ++++ linux/include/linux/spinlock.h +@@ -74,7 +74,16 @@ void __lockfunc _write_unlock_irqrestore + void __lockfunc _write_unlock_irq(rwlock_t *lock); + void __lockfunc _write_unlock_bh(rwlock_t *lock); + int __lockfunc _spin_trylock_bh(spinlock_t *lock); +-int in_lock_functions(unsigned long addr); ++ ++static inline int in_lock_functions(unsigned long addr) ++{ ++ /* Linker adds these: start and end of __lockfunc functions */ ++ extern char __lock_text_start[], __lock_text_end[]; ++ ++ return addr >= (unsigned long)__lock_text_start ++ && addr < (unsigned long)__lock_text_end; ++} ++ + #else + + #define in_lock_functions(ADDR) 0 +@@ -472,17 +481,6 @@ do { \ + 1 : ({local_irq_restore(flags); 0;}); \ + }) + +-#ifdef CONFIG_LOCKMETER +-extern void _metered_spin_lock (spinlock_t *lock); +-extern void _metered_spin_unlock (spinlock_t *lock); +-extern int _metered_spin_trylock(spinlock_t *lock); +-extern void _metered_read_lock (rwlock_t *lock); +-extern void _metered_read_unlock (rwlock_t *lock); +-extern void _metered_write_lock (rwlock_t *lock); +-extern void _metered_write_unlock (rwlock_t *lock); +-extern int _metered_write_trylock(rwlock_t *lock); +-#endif +- + /* "lock on reference count zero" */ + #ifndef ATOMIC_DEC_AND_LOCK + #include +@@ -558,5 +556,4 @@ static inline int bit_spin_is_locked(int + return 1; + #endif + } +- + #endif /* __LINUX_SPINLOCK_H */ +Index: linux/kernel/Makefile +=================================================================== +--- linux.orig/kernel/Makefile ++++ linux/kernel/Makefile +@@ -11,7 +11,12 @@ obj-y = sched.o fork.o exec_domain.o + + obj-$(CONFIG_FUTEX) += futex.o + obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o ++ifneq ($(CONFIG_LOCKMETER),y) + obj-$(CONFIG_SMP) += cpu.o spinlock.o ++else ++obj-$(CONFIG_SMP) += cpu.o ++obj-$(CONFIG_LOCKMETER) += lockmeter.o ++endif + obj-$(CONFIG_UID16) += uid16.o + obj-$(CONFIG_MODULES) += module.o module-verify.o + obj-$(CONFIG_MODULE_SIG) += module-verify-sig.o +Index: linux/kernel/lockmeter.c +=================================================================== +--- linux.orig/kernel/lockmeter.c ++++ linux/kernel/lockmeter.c +@@ -0,0 +1,1512 @@ ++/* ++ * Copyright (C) 1999,2000 Silicon Graphics, Inc. ++ * ++ * Written by John Hawkes (hawkes@sgi.com) ++ * Based on klstat.c by Jack Steiner (steiner@sgi.com) ++ * ++ * Modified by Ray Bryant (raybry@us.ibm.com) ++ * Changes Copyright (C) 2000 IBM, Inc. ++ * Added save of index in spinlock_t to improve efficiency ++ * of "hold" time reporting for spinlocks ++ * Added support for hold time statistics for read and write ++ * locks. ++ * ++ * Modified by Ray Bryant (raybry@sgi.com) ++ * Changes Copyright (C) 2004, Silicon Graphics, Inc. ++ * Fix to work with out-of-line spinlocks. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define ASSERT(cond) ++#define bzero(loc,size) memset(loc,0,size) ++ ++/*<---------------------------------------------------*/ ++/* lockmeter.c */ ++/*>---------------------------------------------------*/ ++ ++static lstat_control_t lstat_control __cacheline_aligned = ++ { LSTAT_OFF, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED, ++ 19 * 0, NR_CPUS * 0, 0, NR_CPUS * 0 }; ++ ++static ushort lstat_make_dir_entry(void *, void *); ++ ++/* ++ * lstat_lookup ++ * ++ * Given a RA, locate the directory entry for the lock. ++ */ ++static ushort ++lstat_lookup(void *lock_ptr, void *caller_ra) ++{ ++ ushort index; ++ lstat_directory_entry_t *dirp; ++ ++ dirp = lstat_control.dir; ++ ++ index = lstat_control.hashtab[DIRHASH(caller_ra)]; ++ while (dirp[index].caller_ra != caller_ra) { ++ if (index == 0) { ++ return lstat_make_dir_entry(lock_ptr, caller_ra); ++ } ++ index = dirp[index].next_stat_index; ++ } ++ ++ if (dirp[index].lock_ptr != NULL && dirp[index].lock_ptr != lock_ptr) { ++ dirp[index].lock_ptr = NULL; ++ } ++ ++ return index; ++} ++ ++/* ++ * lstat_make_dir_entry ++ * Called to add a new lock to the lock directory. ++ */ ++static ushort ++lstat_make_dir_entry(void *lock_ptr, void *caller_ra) ++{ ++ lstat_directory_entry_t *dirp; ++ ushort index, hindex; ++ unsigned long flags; ++ ++ /* lock the table without recursively reentering this metering code */ ++ local_irq_save(flags); ++ _raw_spin_lock(&lstat_control.directory_lock); ++ ++ hindex = DIRHASH(caller_ra); ++ index = lstat_control.hashtab[hindex]; ++ dirp = lstat_control.dir; ++ while (index && dirp[index].caller_ra != caller_ra) ++ index = dirp[index].next_stat_index; ++ ++ if (index == 0) { ++ if (lstat_control.next_free_dir_index < LSTAT_MAX_STAT_INDEX) { ++ index = lstat_control.next_free_dir_index++; ++ lstat_control.dir[index].caller_ra = caller_ra; ++ lstat_control.dir[index].lock_ptr = lock_ptr; ++ lstat_control.dir[index].next_stat_index = ++ lstat_control.hashtab[hindex]; ++ lstat_control.hashtab[hindex] = index; ++ } else { ++ lstat_control.dir_overflow++; ++ } ++ } ++ _raw_spin_unlock(&lstat_control.directory_lock); ++ local_irq_restore(flags); ++ return index; ++} ++ ++int ++lstat_update(void *lock_ptr, void *caller_ra, int action) ++{ ++ int index; ++ int cpu; ++ ++ ASSERT(action < LSTAT_ACT_MAX_VALUES); ++ ++ if (lstat_control.state == LSTAT_OFF) ++ return 0; ++ ++ index = lstat_lookup(lock_ptr, caller_ra); ++ cpu = THIS_CPU_NUMBER; ++ (*lstat_control.counts[cpu])[index].count[action]++; ++ (*lstat_control.counts[cpu])[index].acquire_time = get_cycles(); ++ ++ return index; ++} ++ ++int ++lstat_update_time(void *lock_ptr, void *caller_ra, int action, uint32_t ticks) ++{ ++ ushort index; ++ int cpu; ++ ++ ASSERT(action < LSTAT_ACT_MAX_VALUES); ++ ++ if (lstat_control.state == LSTAT_OFF) ++ return 0; ++ ++ index = lstat_lookup(lock_ptr, caller_ra); ++ cpu = THIS_CPU_NUMBER; ++ (*lstat_control.counts[cpu])[index].count[action]++; ++ (*lstat_control.counts[cpu])[index].cum_wait_ticks += (uint64_t) ticks; ++ if ((*lstat_control.counts[cpu])[index].max_wait_ticks < ticks) ++ (*lstat_control.counts[cpu])[index].max_wait_ticks = ticks; ++ ++ (*lstat_control.counts[cpu])[index].acquire_time = get_cycles(); ++ ++ return index; ++} ++ ++void ++_metered_spin_lock(spinlock_t * lock_ptr, void *caller_pc) ++{ ++ if (lstat_control.state == LSTAT_OFF) { ++ _raw_spin_lock(lock_ptr); /* do the real lock */ ++ PUT_INDEX(lock_ptr, 0); /* clean index in case lockmetering */ ++ /* gets turned on before unlock */ ++ } else { ++ void *this_pc = LSTAT_RA(LSTAT_RA_SPIN); ++ int index; ++ ++ if (_raw_spin_trylock(lock_ptr)) { ++ index = lstat_update(lock_ptr, this_pc, ++ LSTAT_ACT_NO_WAIT); ++ } else { ++ uint32_t start_cycles = get_cycles(); ++ _raw_spin_lock(lock_ptr); /* do the real lock */ ++ index = lstat_update_time(lock_ptr, this_pc, ++ LSTAT_ACT_SPIN, get_cycles() - start_cycles); ++ } ++ /* save the index in the lock itself for use in spin unlock */ ++ PUT_INDEX(lock_ptr, index); ++ } ++} ++/* some archs require this for atomic_dec_and_lock in modules */ ++EXPORT_SYMBOL(_metered_spin_lock); ++ ++void ++_metered_spin_lock_flags(spinlock_t * lock_ptr, unsigned long flags, ++ void *caller_pc) ++{ ++ if (lstat_control.state == LSTAT_OFF) { ++ _raw_spin_lock(lock_ptr); /* do the real lock */ ++ PUT_INDEX(lock_ptr, 0); /* clean index in case lockmetering */ ++ /* gets turned on before unlock */ ++ } else { ++ void *this_pc = LSTAT_RA(LSTAT_RA_SPIN); ++ int index; ++ ++ if (_raw_spin_trylock(lock_ptr)) { ++ index = lstat_update(lock_ptr, this_pc, ++ LSTAT_ACT_NO_WAIT); ++ } else { ++ uint32_t start_cycles = get_cycles(); ++ /* do the real lock */ ++ _raw_spin_lock_flags(lock_ptr, flags); ++ index = lstat_update_time(lock_ptr, this_pc, ++ LSTAT_ACT_SPIN, get_cycles() - start_cycles); ++ } ++ /* save the index in the lock itself for use in spin unlock */ ++ PUT_INDEX(lock_ptr, index); ++ } ++} ++ ++int ++_metered_spin_trylock(spinlock_t * lock_ptr, void *caller_pc) ++{ ++ if (lstat_control.state == LSTAT_OFF) { ++ return _raw_spin_trylock(lock_ptr); ++ } else { ++ int retval; ++ void *this_pc = LSTAT_RA(LSTAT_RA_SPIN); ++ ++ if ((retval = _raw_spin_trylock(lock_ptr))) { ++ int index = lstat_update(lock_ptr, this_pc, ++ LSTAT_ACT_NO_WAIT); ++ /* ++ * save the index in the lock itself for use in spin ++ * unlock ++ */ ++ PUT_INDEX(lock_ptr, index); ++ } else { ++ lstat_update(lock_ptr, this_pc, LSTAT_ACT_REJECT); ++ } ++ ++ return retval; ++ } ++} ++ ++void ++_metered_spin_unlock(spinlock_t * lock_ptr) ++{ ++ int index = -1; ++ ++ if (lstat_control.state != LSTAT_OFF) { ++ index = GET_INDEX(lock_ptr); ++ /* ++ * If statistics were turned off when we set the lock, ++ * then the index can be zero. If that is the case, ++ * then collect no stats on this call. ++ */ ++ if (index > 0) { ++ uint32_t hold_time; ++ int cpu = THIS_CPU_NUMBER; ++ hold_time = get_cycles() - ++ (*lstat_control.counts[cpu])[index].acquire_time; ++ (*lstat_control.counts[cpu])[index].cum_hold_ticks += ++ (uint64_t) hold_time; ++ if ((*lstat_control.counts[cpu])[index].max_hold_ticks < ++ hold_time) ++ (*lstat_control.counts[cpu])[index]. ++ max_hold_ticks = hold_time; ++ } ++ } ++ ++ /* make sure we don't have a stale index value saved */ ++ PUT_INDEX(lock_ptr, 0); ++ _raw_spin_unlock(lock_ptr); /* do the real unlock */ ++} ++/* some archs require this for atomic_dec_and_lock in modules*/ ++EXPORT_SYMBOL(_metered_spin_unlock); ++ ++/* ++ * allocate the next global read lock structure and store its index ++ * in the rwlock at "lock_ptr". ++ */ ++uint32_t ++alloc_rwlock_struct(rwlock_t * rwlock_ptr) ++{ ++ int index; ++ unsigned long flags; ++ int cpu = THIS_CPU_NUMBER; ++ ++ /* If we've already overflowed, then do a quick exit */ ++ if (lstat_control.next_free_read_lock_index > ++ LSTAT_MAX_READ_LOCK_INDEX) { ++ lstat_control.rwlock_overflow++; ++ return 0; ++ } ++ ++ local_irq_save(flags); ++ _raw_spin_lock(&lstat_control.directory_lock); ++ ++ /* It is possible this changed while we were waiting for the directory_lock */ ++ if (lstat_control.state == LSTAT_OFF) { ++ index = 0; ++ goto unlock; ++ } ++ ++ /* It is possible someone else got here first and set the index */ ++ if ((index = GET_RWINDEX(rwlock_ptr)) == 0) { ++ /* ++ * we can't turn on read stats for this lock while there are ++ * readers (this would mess up the running hold time sum at ++ * unlock time) ++ */ ++ if (RWLOCK_READERS(rwlock_ptr) != 0) { ++ index = 0; ++ goto unlock; ++ } ++ ++ /* ++ * if stats are turned on after being off, we may need to ++ * return an old index from when the statistics were on last ++ * time. ++ */ ++ for (index = 1; index < lstat_control.next_free_read_lock_index; ++ index++) ++ if ((*lstat_control.read_lock_counts[cpu])[index]. ++ lock_ptr == rwlock_ptr) ++ goto put_index_and_unlock; ++ ++ /* allocate the next global read lock structure */ ++ if (lstat_control.next_free_read_lock_index >= ++ LSTAT_MAX_READ_LOCK_INDEX) { ++ lstat_control.rwlock_overflow++; ++ index = 0; ++ goto unlock; ++ } ++ index = lstat_control.next_free_read_lock_index++; ++ ++ /* ++ * initialize the global read stats data structure for each ++ * cpu ++ */ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ (*lstat_control.read_lock_counts[cpu])[index].lock_ptr = ++ rwlock_ptr; ++ } ++put_index_and_unlock: ++ /* store the index for the read lock structure into the lock */ ++ PUT_RWINDEX(rwlock_ptr, index); ++ } ++ ++unlock: ++ _raw_spin_unlock(&lstat_control.directory_lock); ++ local_irq_restore(flags); ++ return index; ++} ++ ++void ++_metered_read_lock(rwlock_t * rwlock_ptr, void *caller_pc) ++{ ++ void *this_pc; ++ uint32_t start_cycles; ++ int index; ++ int cpu; ++ unsigned long flags; ++ int readers_before, readers_after; ++ uint64_t cycles64; ++ ++ if (lstat_control.state == LSTAT_OFF) { ++ _raw_read_lock(rwlock_ptr); ++ /* clean index in case lockmetering turns on before an unlock */ ++ PUT_RWINDEX(rwlock_ptr, 0); ++ return; ++ } ++ ++ this_pc = LSTAT_RA(LSTAT_RA_READ); ++ cpu = THIS_CPU_NUMBER; ++ index = GET_RWINDEX(rwlock_ptr); ++ ++ /* allocate the global stats entry for this lock, if needed */ ++ if (index == 0) ++ index = alloc_rwlock_struct(rwlock_ptr); ++ ++ readers_before = RWLOCK_READERS(rwlock_ptr); ++ if (_raw_read_trylock(rwlock_ptr)) { ++ /* ++ * We have decremented the lock to count a new reader, ++ * and have confirmed that no writer has it locked. ++ */ ++ /* update statistics if enabled */ ++ if (index > 0) { ++ local_irq_save(flags); ++ lstat_update((void *) rwlock_ptr, this_pc, ++ LSTAT_ACT_NO_WAIT); ++ /* preserve value of TSC so cum_hold_ticks and start_busy use same value */ ++ cycles64 = get_cycles64(); ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ cum_hold_ticks -= cycles64; ++ ++ /* record time and cpu of start of busy period */ ++ /* this is not perfect (some race conditions are possible) */ ++ if (readers_before == 0) { ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ start_busy = cycles64; ++ PUT_RW_CPU(rwlock_ptr, cpu); ++ } ++ readers_after = RWLOCK_READERS(rwlock_ptr); ++ if (readers_after > ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ max_readers) ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ max_readers = readers_after; ++ local_irq_restore(flags); ++ } ++ ++ return; ++ } ++ /* If we get here, then we could not quickly grab the read lock */ ++ ++ start_cycles = get_cycles(); /* start counting the wait time */ ++ ++ /* Now spin until read_lock is successful */ ++ _raw_read_lock(rwlock_ptr); ++ ++ lstat_update_time((void *) rwlock_ptr, this_pc, LSTAT_ACT_SPIN, ++ get_cycles() - start_cycles); ++ ++ /* update statistics if they are enabled for this lock */ ++ if (index > 0) { ++ local_irq_save(flags); ++ cycles64 = get_cycles64(); ++ (*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks -= ++ cycles64; ++ ++ /* this is not perfect (some race conditions are possible) */ ++ if (readers_before == 0) { ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ start_busy = cycles64; ++ PUT_RW_CPU(rwlock_ptr, cpu); ++ } ++ readers_after = RWLOCK_READERS(rwlock_ptr); ++ if (readers_after > ++ (*lstat_control.read_lock_counts[cpu])[index].max_readers) ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ max_readers = readers_after; ++ local_irq_restore(flags); ++ } ++} ++ ++void ++_metered_read_unlock(rwlock_t * rwlock_ptr) ++{ ++ int index; ++ int cpu; ++ unsigned long flags; ++ uint64_t busy_length; ++ uint64_t cycles64; ++ ++ if (lstat_control.state == LSTAT_OFF) { ++ _raw_read_unlock(rwlock_ptr); ++ return; ++ } ++ ++ index = GET_RWINDEX(rwlock_ptr); ++ cpu = THIS_CPU_NUMBER; ++ ++ if (index > 0) { ++ local_irq_save(flags); ++ /* ++ * preserve value of TSC so cum_hold_ticks and busy_ticks are ++ * consistent. ++ */ ++ cycles64 = get_cycles64(); ++ (*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks += ++ cycles64; ++ (*lstat_control.read_lock_counts[cpu])[index].read_lock_count++; ++ ++ /* ++ * once again, this is not perfect (some race conditions are ++ * possible) ++ */ ++ if (RWLOCK_READERS(rwlock_ptr) == 1) { ++ int cpu1 = GET_RW_CPU(rwlock_ptr); ++ uint64_t last_start_busy = ++ (*lstat_control.read_lock_counts[cpu1])[index]. ++ start_busy; ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ busy_periods++; ++ if (cycles64 > last_start_busy) { ++ busy_length = cycles64 - last_start_busy; ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ busy_ticks += busy_length; ++ if (busy_length > ++ (*lstat_control. ++ read_lock_counts[cpu])[index]. ++ max_busy) ++ (*lstat_control. ++ read_lock_counts[cpu])[index]. ++ max_busy = busy_length; ++ } ++ } ++ local_irq_restore(flags); ++ } ++ _raw_read_unlock(rwlock_ptr); ++} ++ ++void ++_metered_write_lock(rwlock_t * rwlock_ptr, void *caller_pc) ++{ ++ uint32_t start_cycles; ++ void *this_pc; ++ uint32_t spin_ticks = 0; /* in anticipation of a potential wait */ ++ int index; ++ int write_index = 0; ++ int cpu; ++ enum { ++ writer_writer_conflict, ++ writer_reader_conflict ++ } why_wait = writer_writer_conflict; ++ ++ if (lstat_control.state == LSTAT_OFF) { ++ _raw_write_lock(rwlock_ptr); ++ /* clean index in case lockmetering turns on before an unlock */ ++ PUT_RWINDEX(rwlock_ptr, 0); ++ return; ++ } ++ ++ this_pc = LSTAT_RA(LSTAT_RA_WRITE); ++ cpu = THIS_CPU_NUMBER; ++ index = GET_RWINDEX(rwlock_ptr); ++ ++ /* allocate the global stats entry for this lock, if needed */ ++ if (index == 0) { ++ index = alloc_rwlock_struct(rwlock_ptr); ++ } ++ ++ if (_raw_write_trylock(rwlock_ptr)) { ++ /* We acquired the lock on the first try */ ++ write_index = lstat_update((void *) rwlock_ptr, this_pc, ++ LSTAT_ACT_NO_WAIT); ++ /* save the write_index for use in unlock if stats enabled */ ++ if (index > 0) ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ write_index = write_index; ++ return; ++ } ++ ++ /* If we get here, then we could not quickly grab the write lock */ ++ start_cycles = get_cycles(); /* start counting the wait time */ ++ ++ why_wait = RWLOCK_READERS(rwlock_ptr) ? ++ writer_reader_conflict : writer_writer_conflict; ++ ++ /* Now set the lock and wait for conflicts to disappear */ ++ _raw_write_lock(rwlock_ptr); ++ ++ spin_ticks = get_cycles() - start_cycles; ++ ++ /* update stats -- if enabled */ ++ if (index > 0 && spin_ticks) { ++ if (why_wait == writer_reader_conflict) { ++ /* waited due to a reader holding the lock */ ++ write_index = lstat_update_time((void *)rwlock_ptr, ++ this_pc, LSTAT_ACT_SPIN, spin_ticks); ++ } else { ++ /* ++ * waited due to another writer holding the lock ++ */ ++ write_index = lstat_update_time((void *)rwlock_ptr, ++ this_pc, LSTAT_ACT_WW_SPIN, spin_ticks); ++ (*lstat_control.counts[cpu])[write_index]. ++ cum_wait_ww_ticks += spin_ticks; ++ if (spin_ticks > ++ (*lstat_control.counts[cpu])[write_index]. ++ max_wait_ww_ticks) { ++ (*lstat_control.counts[cpu])[write_index]. ++ max_wait_ww_ticks = spin_ticks; ++ } ++ } ++ ++ /* save the directory index for use on write_unlock */ ++ (*lstat_control.read_lock_counts[cpu])[index]. ++ write_index = write_index; ++ } ++} ++ ++void ++_metered_write_unlock(rwlock_t * rwlock_ptr) ++{ ++ int index; ++ int cpu; ++ int write_index; ++ uint32_t hold_time; ++ ++ if (lstat_control.state == LSTAT_OFF) { ++ _raw_write_unlock(rwlock_ptr); ++ return; ++ } ++ ++ cpu = THIS_CPU_NUMBER; ++ index = GET_RWINDEX(rwlock_ptr); ++ ++ /* update statistics if stats enabled for this lock */ ++ if (index > 0) { ++ write_index = ++ (*lstat_control.read_lock_counts[cpu])[index].write_index; ++ ++ hold_time = get_cycles() - ++ (*lstat_control.counts[cpu])[write_index].acquire_time; ++ (*lstat_control.counts[cpu])[write_index].cum_hold_ticks += ++ (uint64_t) hold_time; ++ if ((*lstat_control.counts[cpu])[write_index].max_hold_ticks < ++ hold_time) ++ (*lstat_control.counts[cpu])[write_index]. ++ max_hold_ticks = hold_time; ++ } ++ _raw_write_unlock(rwlock_ptr); ++} ++ ++int ++_metered_write_trylock(rwlock_t * rwlock_ptr, void *caller_pc) ++{ ++ int retval; ++ void *this_pc = LSTAT_RA(LSTAT_RA_WRITE); ++ ++ if ((retval = _raw_write_trylock(rwlock_ptr))) { ++ lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_NO_WAIT); ++ } else { ++ lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_REJECT); ++ } ++ ++ return retval; ++} ++ ++static void ++init_control_space(void) ++{ ++ /* Set all control space pointers to null and indices to "empty" */ ++ int cpu; ++ ++ /* ++ * Access CPU_CYCLE_FREQUENCY at the outset, which in some ++ * architectures may trigger a runtime calculation that uses a ++ * spinlock. Let's do this before lockmetering is turned on. ++ */ ++ if (CPU_CYCLE_FREQUENCY == 0) ++ BUG(); ++ ++ lstat_control.hashtab = NULL; ++ lstat_control.dir = NULL; ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ lstat_control.counts[cpu] = NULL; ++ lstat_control.read_lock_counts[cpu] = NULL; ++ } ++} ++ ++static int ++reset_lstat_data(void) ++{ ++ int cpu, flags; ++ ++ flags = 0; ++ lstat_control.next_free_dir_index = 1; /* 0 is for overflows */ ++ lstat_control.next_free_read_lock_index = 1; ++ lstat_control.dir_overflow = 0; ++ lstat_control.rwlock_overflow = 0; ++ ++ lstat_control.started_cycles64 = 0; ++ lstat_control.ending_cycles64 = 0; ++ lstat_control.enabled_cycles64 = 0; ++ lstat_control.first_started_time = 0; ++ lstat_control.started_time = 0; ++ lstat_control.ending_time = 0; ++ lstat_control.intervals = 0; ++ ++ /* ++ * paranoia -- in case someone does a "lockstat reset" before ++ * "lockstat on" ++ */ ++ if (lstat_control.hashtab) { ++ bzero(lstat_control.hashtab, ++ LSTAT_HASH_TABLE_SIZE * sizeof (short)); ++ bzero(lstat_control.dir, LSTAT_MAX_STAT_INDEX * ++ sizeof (lstat_directory_entry_t)); ++ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ bzero(lstat_control.counts[cpu], ++ sizeof (lstat_cpu_counts_t)); ++ bzero(lstat_control.read_lock_counts[cpu], ++ sizeof (lstat_read_lock_cpu_counts_t)); ++ } ++ } ++#ifdef NOTDEF ++ _raw_spin_unlock(&lstat_control.directory_lock); ++ local_irq_restore(flags); ++#endif ++ return 1; ++} ++ ++static void ++release_control_space(void) ++{ ++ /* ++ * Called when either (1) allocation of kmem ++ * or (2) when user writes LSTAT_RELEASE to /pro/lockmeter. ++ * Assume that all pointers have been initialized to zero, ++ * i.e., nonzero pointers are valid addresses. ++ */ ++ int cpu; ++ ++ if (lstat_control.hashtab) { ++ kfree(lstat_control.hashtab); ++ lstat_control.hashtab = NULL; ++ } ++ ++ if (lstat_control.dir) { ++ vfree(lstat_control.dir); ++ lstat_control.dir = NULL; ++ } ++ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ if (lstat_control.counts[cpu]) { ++ vfree(lstat_control.counts[cpu]); ++ lstat_control.counts[cpu] = NULL; ++ } ++ if (lstat_control.read_lock_counts[cpu]) { ++ kfree(lstat_control.read_lock_counts[cpu]); ++ lstat_control.read_lock_counts[cpu] = NULL; ++ } ++ } ++} ++ ++int ++get_lockmeter_info_size(void) ++{ ++ return sizeof (lstat_user_request_t) ++ + num_online_cpus() * sizeof (lstat_cpu_counts_t) ++ + num_online_cpus() * sizeof (lstat_read_lock_cpu_counts_t) ++ + (LSTAT_MAX_STAT_INDEX * sizeof (lstat_directory_entry_t)); ++} ++ ++ssize_t ++get_lockmeter_info(char *buffer, size_t max_len, loff_t * last_index) ++{ ++ lstat_user_request_t req; ++ struct timeval tv; ++ ssize_t next_ret_bcount; ++ ssize_t actual_ret_bcount = 0; ++ int cpu; ++ ++ *last_index = 0; /* a one-shot read */ ++ ++ req.lstat_version = LSTAT_VERSION; ++ req.state = lstat_control.state; ++ req.maxcpus = num_online_cpus(); ++ req.cycleval = CPU_CYCLE_FREQUENCY; ++#ifdef notyet ++ req.kernel_magic_addr = (void *) &_etext; ++ req.kernel_end_addr = (void *) &_etext; ++#endif ++ req.uts = system_utsname; ++ req.intervals = lstat_control.intervals; ++ ++ req.first_started_time = lstat_control.first_started_time; ++ req.started_time = lstat_control.started_time; ++ req.started_cycles64 = lstat_control.started_cycles64; ++ ++ req.next_free_dir_index = lstat_control.next_free_dir_index; ++ req.next_free_read_lock_index = lstat_control.next_free_read_lock_index; ++ req.dir_overflow = lstat_control.dir_overflow; ++ req.rwlock_overflow = lstat_control.rwlock_overflow; ++ ++ if (lstat_control.state == LSTAT_OFF) { ++ if (req.intervals == 0) { ++ /* mesasurement is off and no valid data present */ ++ next_ret_bcount = sizeof (lstat_user_request_t); ++ req.enabled_cycles64 = 0; ++ ++ if ((actual_ret_bcount + next_ret_bcount) > max_len) ++ return actual_ret_bcount; ++ ++ copy_to_user(buffer, (void *) &req, next_ret_bcount); ++ actual_ret_bcount += next_ret_bcount; ++ return actual_ret_bcount; ++ } else { ++ /* ++ * measurement is off but valid data present ++ * fetch time info from lstat_control ++ */ ++ req.ending_time = lstat_control.ending_time; ++ req.ending_cycles64 = lstat_control.ending_cycles64; ++ req.enabled_cycles64 = lstat_control.enabled_cycles64; ++ } ++ } else { ++ /* ++ * this must be a read while data active--use current time, ++ * etc ++ */ ++ do_gettimeofday(&tv); ++ req.ending_time = tv.tv_sec; ++ req.ending_cycles64 = get_cycles64(); ++ req.enabled_cycles64 = req.ending_cycles64 - ++ req.started_cycles64 + lstat_control.enabled_cycles64; ++ } ++ ++ next_ret_bcount = sizeof (lstat_user_request_t); ++ if ((actual_ret_bcount + next_ret_bcount) > max_len) ++ return actual_ret_bcount; ++ ++ copy_to_user(buffer, (void *) &req, next_ret_bcount); ++ actual_ret_bcount += next_ret_bcount; ++ ++ if (!lstat_control.counts[0]) /* not initialized? */ ++ return actual_ret_bcount; ++ ++ next_ret_bcount = sizeof (lstat_cpu_counts_t); ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ if ((actual_ret_bcount + next_ret_bcount) > max_len) ++ return actual_ret_bcount; /* leave early */ ++ copy_to_user(buffer + actual_ret_bcount, ++ lstat_control.counts[cpu], next_ret_bcount); ++ actual_ret_bcount += next_ret_bcount; ++ } ++ ++ next_ret_bcount = LSTAT_MAX_STAT_INDEX * ++ sizeof (lstat_directory_entry_t); ++ if (((actual_ret_bcount + next_ret_bcount) > max_len) ++ || !lstat_control.dir) ++ return actual_ret_bcount; /* leave early */ ++ ++ copy_to_user(buffer + actual_ret_bcount, lstat_control.dir, ++ next_ret_bcount); ++ actual_ret_bcount += next_ret_bcount; ++ ++ next_ret_bcount = sizeof (lstat_read_lock_cpu_counts_t); ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ if (actual_ret_bcount + next_ret_bcount > max_len) ++ return actual_ret_bcount; ++ copy_to_user(buffer + actual_ret_bcount, ++ lstat_control.read_lock_counts[cpu], ++ next_ret_bcount); ++ actual_ret_bcount += next_ret_bcount; ++ } ++ ++ return actual_ret_bcount; ++} ++ ++/* ++ * Writing to the /proc lockmeter node enables or disables metering. ++ * based upon the first byte of the "written" data. ++ * The following values are defined: ++ * LSTAT_ON: 1st call: allocates storage, intializes and turns on measurement ++ * subsequent calls just turn on measurement ++ * LSTAT_OFF: turns off measurement ++ * LSTAT_RESET: resets statistics ++ * LSTAT_RELEASE: releases statistics storage ++ * ++ * This allows one to accumulate statistics over several lockstat runs: ++ * ++ * lockstat on ++ * lockstat off ++ * ...repeat above as desired... ++ * lockstat get ++ * ...now start a new set of measurements... ++ * lockstat reset ++ * lockstat on ++ * ... ++ * ++ */ ++ssize_t ++put_lockmeter_info(const char *buffer, size_t len) ++{ ++ int error = 0; ++ int dirsize, countsize, read_lock_countsize, hashsize; ++ int cpu; ++ char put_char; ++ int i, read_lock_blocks; ++ unsigned long flags; ++ rwlock_t *lock_ptr; ++ struct timeval tv; ++ ++ if (len <= 0) ++ return -EINVAL; ++ ++ _raw_spin_lock(&lstat_control.control_lock); ++ ++ get_user(put_char, buffer); ++ switch (put_char) { ++ ++ case LSTAT_OFF: ++ if (lstat_control.state != LSTAT_OFF) { ++ /* ++ * To avoid seeing read lock hold times in an ++ * inconsisent state, we have to follow this protocol ++ * to turn off statistics ++ */ ++ local_irq_save(flags); ++ /* ++ * getting this lock will stop any read lock block ++ * allocations ++ */ ++ _raw_spin_lock(&lstat_control.directory_lock); ++ /* ++ * keep any more read lock blocks from being ++ * allocated ++ */ ++ lstat_control.state = LSTAT_OFF; ++ /* record how may read lock blocks there are */ ++ read_lock_blocks = ++ lstat_control.next_free_read_lock_index; ++ _raw_spin_unlock(&lstat_control.directory_lock); ++ /* now go through the list of read locks */ ++ cpu = THIS_CPU_NUMBER; ++ for (i = 1; i < read_lock_blocks; i++) { ++ lock_ptr = ++ (*lstat_control.read_lock_counts[cpu])[i]. ++ lock_ptr; ++ /* is this saved lock address still valid? */ ++ if (GET_RWINDEX(lock_ptr) == i) { ++ /* ++ * lock address appears to still be ++ * valid because we only hold one lock ++ * at a time, this can't cause a ++ * deadlock unless this is a lock held ++ * as part of the current system call ++ * path. At the moment there ++ * are no READ mode locks held to get ++ * here from user space, so we solve ++ * this by skipping locks held in ++ * write mode. ++ */ ++ if (RWLOCK_IS_WRITE_LOCKED(lock_ptr)) { ++ PUT_RWINDEX(lock_ptr, 0); ++ continue; ++ } ++ /* ++ * now we know there are no read ++ * holders of this lock! stop ++ * statistics collection for this ++ * lock ++ */ ++ _raw_write_lock(lock_ptr); ++ PUT_RWINDEX(lock_ptr, 0); ++ _raw_write_unlock(lock_ptr); ++ } ++ /* ++ * it may still be possible for the hold time ++ * sum to be negative e.g. if a lock is ++ * reallocated while "busy" we will have to fix ++ * this up in the data reduction program. ++ */ ++ } ++ local_irq_restore(flags); ++ lstat_control.intervals++; ++ lstat_control.ending_cycles64 = get_cycles64(); ++ lstat_control.enabled_cycles64 += ++ lstat_control.ending_cycles64 - ++ lstat_control.started_cycles64; ++ do_gettimeofday(&tv); ++ lstat_control.ending_time = tv.tv_sec; ++ /* ++ * don't deallocate the structures -- we may do a ++ * lockstat on to add to the data that is already ++ * there. Use LSTAT_RELEASE to release storage ++ */ ++ } else { ++ error = -EBUSY; /* already OFF */ ++ } ++ break; ++ ++ case LSTAT_ON: ++ if (lstat_control.state == LSTAT_OFF) { ++#ifdef DEBUG_LOCKMETER ++ printk("put_lockmeter_info(cpu=%d): LSTAT_ON\n", ++ THIS_CPU_NUMBER); ++#endif ++ lstat_control.next_free_dir_index = 1; /* 0 is for overflows */ ++ ++ dirsize = LSTAT_MAX_STAT_INDEX * ++ sizeof (lstat_directory_entry_t); ++ hashsize = ++ (1 + LSTAT_HASH_TABLE_SIZE) * sizeof (ushort); ++ countsize = sizeof (lstat_cpu_counts_t); ++ read_lock_countsize = ++ sizeof (lstat_read_lock_cpu_counts_t); ++#ifdef DEBUG_LOCKMETER ++ printk(" dirsize:%d", dirsize); ++ printk(" hashsize:%d", hashsize); ++ printk(" countsize:%d", countsize); ++ printk(" read_lock_countsize:%d\n", ++ read_lock_countsize); ++#endif ++#ifdef DEBUG_LOCKMETER ++ { ++ int secs; ++ unsigned long cycles; ++ uint64_t cycles64; ++ ++ do_gettimeofday(&tv); ++ secs = tv.tv_sec; ++ do { ++ do_gettimeofday(&tv); ++ } while (secs == tv.tv_sec); ++ cycles = get_cycles(); ++ cycles64 = get_cycles64(); ++ secs = tv.tv_sec; ++ do { ++ do_gettimeofday(&tv); ++ } while (secs == tv.tv_sec); ++ cycles = get_cycles() - cycles; ++ cycles64 = get_cycles64() - cycles; ++ printk("lockmeter: cycleFrequency:%d " ++ "cycles:%d cycles64:%d\n", ++ CPU_CYCLE_FREQUENCY, cycles, cycles64); ++ } ++#endif ++ ++ /* ++ * if this is the first call, allocate storage and ++ * initialize ++ */ ++ if (!lstat_control.hashtab) { ++ ++ spin_lock_init(&lstat_control.directory_lock); ++ ++ /* guarantee all pointers at zero */ ++ init_control_space(); ++ ++ lstat_control.hashtab = ++ kmalloc(hashsize, GFP_KERNEL); ++ if (!lstat_control.hashtab) { ++ error = -ENOSPC; ++#ifdef DEBUG_LOCKMETER ++ printk("!!error kmalloc of hashtab\n"); ++#endif ++ } ++ lstat_control.dir = vmalloc(dirsize); ++ if (!lstat_control.dir) { ++ error = -ENOSPC; ++#ifdef DEBUG_LOCKMETER ++ printk("!!error kmalloc of dir\n"); ++#endif ++ } ++ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ lstat_control.counts[cpu] = ++ vmalloc(countsize); ++ if (!lstat_control.counts[cpu]) { ++ error = -ENOSPC; ++#ifdef DEBUG_LOCKMETER ++ printk("!!error vmalloc of " ++ "counts[%d]\n", cpu); ++#endif ++ } ++ lstat_control.read_lock_counts[cpu] = ++ (lstat_read_lock_cpu_counts_t *) ++ kmalloc(read_lock_countsize, ++ GFP_KERNEL); ++ if (!lstat_control. ++ read_lock_counts[cpu]) { ++ error = -ENOSPC; ++#ifdef DEBUG_LOCKMETER ++ printk("!!error kmalloc of " ++ "read_lock_counts[%d]\n", ++ cpu); ++#endif ++ } ++ } ++ } ++ ++ if (error) { ++ /* ++ * One or more kmalloc failures -- free ++ * everything ++ */ ++ release_control_space(); ++ } else { ++ ++ if (!reset_lstat_data()) { ++ error = -EINVAL; ++ break; ++ }; ++ ++ /* ++ * record starting and ending times and the ++ * like ++ */ ++ if (lstat_control.intervals == 0) { ++ do_gettimeofday(&tv); ++ lstat_control.first_started_time = ++ tv.tv_sec; ++ } ++ lstat_control.started_cycles64 = get_cycles64(); ++ do_gettimeofday(&tv); ++ lstat_control.started_time = tv.tv_sec; ++ ++ lstat_control.state = LSTAT_ON; ++ } ++ } else { ++ error = -EBUSY; /* already ON */ ++ } ++ break; ++ ++ case LSTAT_RESET: ++ if (lstat_control.state == LSTAT_OFF) { ++ if (!reset_lstat_data()) ++ error = -EINVAL; ++ } else { ++ error = -EBUSY; /* still on; can't reset */ ++ } ++ break; ++ ++ case LSTAT_RELEASE: ++ if (lstat_control.state == LSTAT_OFF) { ++ release_control_space(); ++ lstat_control.intervals = 0; ++ lstat_control.enabled_cycles64 = 0; ++ } else { ++ error = -EBUSY; ++ } ++ break; ++ ++ default: ++ error = -EINVAL; ++ } /* switch */ ++ ++ _raw_spin_unlock(&lstat_control.control_lock); ++ return error ? error : len; ++} ++ ++#ifdef USER_MODE_TESTING ++/* following used for user mode testing */ ++void ++lockmeter_init() ++{ ++ int dirsize, hashsize, countsize, read_lock_countsize, cpu; ++ ++ printf("lstat_control is at %x size=%d\n", &lstat_control, ++ sizeof (lstat_control)); ++ printf("sizeof(spinlock_t)=%d\n", sizeof (spinlock_t)); ++ lstat_control.state = LSTAT_ON; ++ ++ lstat_control.directory_lock = SPIN_LOCK_UNLOCKED; ++ lstat_control.next_free_dir_index = 1; /* 0 is for overflows */ ++ lstat_control.next_free_read_lock_index = 1; ++ ++ dirsize = LSTAT_MAX_STAT_INDEX * sizeof (lstat_directory_entry_t); ++ hashsize = (1 + LSTAT_HASH_TABLE_SIZE) * sizeof (ushort); ++ countsize = sizeof (lstat_cpu_counts_t); ++ read_lock_countsize = sizeof (lstat_read_lock_cpu_counts_t); ++ ++ lstat_control.hashtab = (ushort *) malloc(hashsize); ++ ++ if (lstat_control.hashtab == 0) { ++ printf("malloc failure for at line %d in lockmeter.c\n", ++ __LINE__); ++ exit(0); ++ } ++ ++ lstat_control.dir = (lstat_directory_entry_t *) malloc(dirsize); ++ ++ if (lstat_control.dir == 0) { ++ printf("malloc failure for at line %d in lockmeter.c\n", cpu, ++ __LINE__); ++ exit(0); ++ } ++ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ int j, k; ++ j = (int) (lstat_control.counts[cpu] = ++ (lstat_cpu_counts_t *) malloc(countsize)); ++ k = (int) (lstat_control.read_lock_counts[cpu] = ++ (lstat_read_lock_cpu_counts_t *) ++ malloc(read_lock_countsize)); ++ if (j * k == 0) { ++ printf("malloc failure for cpu=%d at line %d in " ++ "lockmeter.c\n", cpu, __LINE__); ++ exit(0); ++ } ++ } ++ ++ memset(lstat_control.hashtab, 0, hashsize); ++ memset(lstat_control.dir, 0, dirsize); ++ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ memset(lstat_control.counts[cpu], 0, countsize); ++ memset(lstat_control.read_lock_counts[cpu], 0, ++ read_lock_countsize); ++ } ++} ++ ++asm(" \ ++.align 4 \ ++.globl __write_lock_failed \ ++__write_lock_failed: \ ++ " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax) \ ++1: cmpl $" RW_LOCK_BIAS_STR ",(%eax) \ ++ jne 1b \ ++\ ++ " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax) \ ++ jnz __write_lock_failed \ ++ ret \ ++\ ++\ ++.align 4 \ ++.globl __read_lock_failed \ ++__read_lock_failed: \ ++ lock ; incl (%eax) \ ++1: cmpl $1,(%eax) \ ++ js 1b \ ++\ ++ lock ; decl (%eax) \ ++ js __read_lock_failed \ ++ ret \ ++"); ++#endif ++ ++/* ++ * these definitions need to match what is in kernel/spinlock.c ++ * except for the fact tht calls to _raw_ routines are replaced by ++ * corresponding calls to the _metered_ routines ++ */ ++ ++/* ++ * Generic declaration of the raw read_trylock() function, ++ * architectures are supposed to optimize this: ++ */ ++int __lockfunc generic_raw_read_trylock(rwlock_t *lock) ++{ ++ _metered_read_lock(lock, __builtin_return_address(0)); ++ return 1; ++} ++EXPORT_SYMBOL(generic_raw_read_trylock); ++ ++int __lockfunc _spin_trylock(spinlock_t *lock) ++{ ++ preempt_disable(); ++ if (_metered_spin_trylock(lock, __builtin_return_address(0))) ++ return 1; ++ ++ preempt_enable(); ++ return 0; ++} ++EXPORT_SYMBOL(_spin_trylock); ++ ++int __lockfunc _write_trylock(rwlock_t *lock) ++{ ++ preempt_disable(); ++ if (_metered_write_trylock(lock, __builtin_return_address(0))) ++ return 1; ++ ++ preempt_enable(); ++ return 0; ++} ++EXPORT_SYMBOL(_write_trylock); ++ ++#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) ++/* ++ * This could be a long-held lock. If another CPU holds it for a long time, ++ * and that CPU is not asked to reschedule then *this* CPU will spin on the ++ * lock for a long time, even if *this* CPU is asked to reschedule. ++ * ++ * So what we do here, in the slow (contended) path is to spin on the lock by ++ * hand while permitting preemption. ++ * ++ * Called inside preempt_disable(). ++ */ ++static inline void __preempt_spin_lock(spinlock_t *lock, void *caller_pc) ++{ ++ if (preempt_count() > 1) { ++ _metered_spin_lock(lock, caller_pc); ++ return; ++ } ++ ++ do { ++ preempt_enable(); ++ while (spin_is_locked(lock)) ++ cpu_relax(); ++ preempt_disable(); ++ } while (!_metered_spin_trylock(lock, caller_pc)); ++} ++ ++void __lockfunc _spin_lock(spinlock_t *lock) ++{ ++ preempt_disable(); ++ if (unlikely(!_metered_spin_trylock(lock, __builtin_return_address(0)))) ++ __preempt_spin_lock(lock, __builtin_return_address(0)); ++} ++ ++static inline void __preempt_write_lock(rwlock_t *lock, void *caller_pc) ++{ ++ if (preempt_count() > 1) { ++ _metered_write_lock(lock, caller_pc); ++ return; ++ } ++ ++ do { ++ preempt_enable(); ++ while (rwlock_is_locked(lock)) ++ cpu_relax(); ++ preempt_disable(); ++ } while (!_metered_write_trylock(lock,caller_pc)); ++} ++ ++void __lockfunc _write_lock(rwlock_t *lock) ++{ ++ preempt_disable(); ++ if (unlikely(!_metered_write_trylock(lock, __builtin_return_address(0)))) ++ __preempt_write_lock(lock, __builtin_return_address(0)); ++} ++#else ++void __lockfunc _spin_lock(spinlock_t *lock) ++{ ++ preempt_disable(); ++ _metered_spin_lock(lock, __builtin_return_address(0)); ++} ++ ++void __lockfunc _write_lock(rwlock_t *lock) ++{ ++ preempt_disable(); ++ _metered_write_lock(lock, __builtin_return_address(0)); ++} ++#endif ++EXPORT_SYMBOL(_spin_lock); ++EXPORT_SYMBOL(_write_lock); ++ ++void __lockfunc _read_lock(rwlock_t *lock) ++{ ++ preempt_disable(); ++ _metered_read_lock(lock, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(_read_lock); ++ ++void __lockfunc _spin_unlock(spinlock_t *lock) ++{ ++ _metered_spin_unlock(lock); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_spin_unlock); ++ ++void __lockfunc _write_unlock(rwlock_t *lock) ++{ ++ _metered_write_unlock(lock); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_write_unlock); ++ ++void __lockfunc _read_unlock(rwlock_t *lock) ++{ ++ _metered_read_unlock(lock); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_read_unlock); ++ ++unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ preempt_disable(); ++ _metered_spin_lock_flags(lock, flags, __builtin_return_address(0)); ++ return flags; ++} ++EXPORT_SYMBOL(_spin_lock_irqsave); ++ ++void __lockfunc _spin_lock_irq(spinlock_t *lock) ++{ ++ local_irq_disable(); ++ preempt_disable(); ++ _metered_spin_lock(lock, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(_spin_lock_irq); ++ ++void __lockfunc _spin_lock_bh(spinlock_t *lock) ++{ ++ local_bh_disable(); ++ preempt_disable(); ++ _metered_spin_lock(lock, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(_spin_lock_bh); ++ ++unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ preempt_disable(); ++ _metered_read_lock(lock, __builtin_return_address(0)); ++ return flags; ++} ++EXPORT_SYMBOL(_read_lock_irqsave); ++ ++void __lockfunc _read_lock_irq(rwlock_t *lock) ++{ ++ local_irq_disable(); ++ preempt_disable(); ++ _metered_read_lock(lock, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(_read_lock_irq); ++ ++void __lockfunc _read_lock_bh(rwlock_t *lock) ++{ ++ local_bh_disable(); ++ preempt_disable(); ++ _metered_read_lock(lock, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(_read_lock_bh); ++ ++unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ preempt_disable(); ++ _metered_write_lock(lock, __builtin_return_address(0)); ++ return flags; ++} ++EXPORT_SYMBOL(_write_lock_irqsave); ++ ++void __lockfunc _write_lock_irq(rwlock_t *lock) ++{ ++ local_irq_disable(); ++ preempt_disable(); ++ _metered_write_lock(lock, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(_write_lock_irq); ++ ++void __lockfunc _write_lock_bh(rwlock_t *lock) ++{ ++ local_bh_disable(); ++ preempt_disable(); ++ _metered_write_lock(lock, __builtin_return_address(0)); ++} ++EXPORT_SYMBOL(_write_lock_bh); ++ ++void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) ++{ ++ _metered_spin_unlock(lock); ++ local_irq_restore(flags); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_spin_unlock_irqrestore); ++ ++void __lockfunc _spin_unlock_irq(spinlock_t *lock) ++{ ++ _metered_spin_unlock(lock); ++ local_irq_enable(); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_spin_unlock_irq); ++ ++void __lockfunc _spin_unlock_bh(spinlock_t *lock) ++{ ++ _metered_spin_unlock(lock); ++ preempt_enable(); ++ local_bh_enable(); ++} ++EXPORT_SYMBOL(_spin_unlock_bh); ++ ++void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) ++{ ++ _metered_read_unlock(lock); ++ local_irq_restore(flags); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_read_unlock_irqrestore); ++ ++void __lockfunc _read_unlock_irq(rwlock_t *lock) ++{ ++ _metered_read_unlock(lock); ++ local_irq_enable(); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_read_unlock_irq); ++ ++void __lockfunc _read_unlock_bh(rwlock_t *lock) ++{ ++ _metered_read_unlock(lock); ++ preempt_enable(); ++ local_bh_enable(); ++} ++EXPORT_SYMBOL(_read_unlock_bh); ++ ++void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) ++{ ++ _metered_write_unlock(lock); ++ local_irq_restore(flags); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_write_unlock_irqrestore); ++ ++void __lockfunc _write_unlock_irq(rwlock_t *lock) ++{ ++ _metered_write_unlock(lock); ++ local_irq_enable(); ++ preempt_enable(); ++} ++EXPORT_SYMBOL(_write_unlock_irq); ++ ++void __lockfunc _write_unlock_bh(rwlock_t *lock) ++{ ++ _metered_write_unlock(lock); ++ preempt_enable(); ++ local_bh_enable(); ++} ++EXPORT_SYMBOL(_write_unlock_bh); ++ ++int __lockfunc _spin_trylock_bh(spinlock_t *lock) ++{ ++ local_bh_disable(); ++ preempt_disable(); ++ if (_metered_spin_trylock(lock, __builtin_return_address(0))) ++ return 1; ++ ++ preempt_enable(); ++ local_bh_enable(); ++ return 0; ++} ++EXPORT_SYMBOL(_spin_trylock_bh); diff --git a/lustre/kernel_patches/patches/lookup-stack-symbols-2.4.21-suse-171.patch b/lustre/kernel_patches/patches/lookup-stack-symbols-2.4.21-suse-171.patch deleted file mode 100644 index fd3d054..0000000 --- a/lustre/kernel_patches/patches/lookup-stack-symbols-2.4.21-suse-171.patch +++ /dev/null @@ -1,234 +0,0 @@ -Index: linux-2.4.21-273/arch/i386/kernel/process.c -=================================================================== ---- linux-2.4.21-273.orig/arch/i386/kernel/process.c 2005-01-17 07:08:16.000000000 -0500 -+++ linux-2.4.21-273/arch/i386/kernel/process.c 2005-04-05 20:11:38.391253492 -0400 -@@ -36,6 +36,7 @@ - #ifdef CONFIG_KDB - #include - #endif /* CONFIG_KDB */ -+#include - - #include - #include -@@ -772,10 +773,14 @@ - void show_regs(struct pt_regs * regs) - { - unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; -+ static char buffer[512]; -+ -+ lookup_symbol(regs->eip, buffer, 512); - - printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); -+ printk("\nEIP is at %s (" UTS_RELEASE ")\n", buffer); - if (regs->xcs & 3) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); -Index: linux-2.4.21-273/arch/i386/kernel/traps.c -=================================================================== ---- linux-2.4.21-273.orig/arch/i386/kernel/traps.c 2005-04-05 20:11:27.381848523 -0400 -+++ linux-2.4.21-273/arch/i386/kernel/traps.c 2005-04-05 20:11:38.392253347 -0400 -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - - #include - -@@ -275,7 +276,8 @@ - { - int i; - unsigned long addr; -- unsigned long *prev = NULL; -+ /* static to not take up stackspace; if we race here too bad */ -+ static char buffer[512]; - - if (!stack) - stack = (unsigned long*)&stack; -@@ -285,17 +287,12 @@ - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { -- if (prev) -- printk(" (%02d)", (stack-prev)*sizeof(*stack)); -- prev = stack; -- if ((i % 4) == 0) -- printk("\n "); -- printk(" [<%08lx>]", addr); -+ lookup_symbol(addr, buffer, 512); -+ printk(" [<%08lx>] %s (0x%p)\n", -+ addr, buffer, stack - 1); - i++; - } - } -- if (prev) -- printk(" (%02d)", (stack-prev)*sizeof(*stack)); - printk("\n"); - } - -@@ -341,12 +338,19 @@ - printk("Code: <0>\n"); /* tell ksymoops trace ends here */ - } - -+#ifdef CONFIG_MK7 -+#define ARCHIT "/athlon" -+#else -+#define ARCHIT "/i686" -+#endif -+ - void show_registers(struct pt_regs *regs) - { - int i; - int in_kernel = 1; - unsigned long esp; - unsigned short ss; -+ static char buffer[512]; - - esp = (unsigned long) (®s->esp); - ss = __KERNEL_DS; -@@ -355,10 +359,13 @@ - esp = regs->esp; - ss = regs->xss & 0xffff; - } -+ print_modules(); -+ lookup_symbol(regs->eip, buffer, 512); - module_oops_tracking_init(); - kernel_text_address(regs->eip); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); -+ printk("\nEIP is at %s (" UTS_RELEASE ARCHIT ")\n", buffer); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", -@@ -420,6 +427,7 @@ - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) - file = ""; - -+ printk("-----------[ cut here ]------------\n"); - printk("kernel BUG at %s:%d!\n", file, line); - - no_bug: -Index: linux-2.4.21-273/include/linux/kernel.h -=================================================================== ---- linux-2.4.21-273.orig/include/linux/kernel.h 2005-01-17 07:09:09.000000000 -0500 -+++ linux-2.4.21-273/include/linux/kernel.h 2005-04-05 20:11:38.393253203 -0400 -@@ -132,6 +132,9 @@ - - extern void dump_stack(void); - -+extern int lookup_symbol(unsigned long address, char *buffer, int buflen); -+extern void print_modules(void); -+ - extern char *oops_id; - - #if DEBUG -Index: linux-2.4.21-273/kernel/Makefile -=================================================================== ---- linux-2.4.21-273.orig/kernel/Makefile 2005-01-17 07:08:59.000000000 -0500 -+++ linux-2.4.21-273/kernel/Makefile 2005-04-05 20:12:04.410483855 -0400 -@@ -11,12 +11,13 @@ - - export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \ - printk.o cpufreq.o rcupdate.o syscall_ksyms.o fork.o hook.o \ -- rashooks.o module.o -+ rashooks.o module.o kksymoops.o - - obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ - module.o exit.o itimer.o info.o time.o softirq.o resource.o \ - sysctl.o acct.o capability.o ptrace.o timer.o user.o \ -- signal.o sys.o kmod.o context.o rcupdate.o futex.o syscall_ksyms.o -+ signal.o sys.o kmod.o context.o rcupdate.o futex.o \ -+ syscall_ksyms.o kksymoops.o - - obj-$(CONFIG_UID16) += uid16.o - obj-$(CONFIG_MODULES) += ksyms.o -Index: linux-2.4.21-273/kernel/kksymoops.c -=================================================================== ---- linux-2.4.21-273.orig/kernel/kksymoops.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.4.21-273/kernel/kksymoops.c 2005-04-05 20:11:38.394253058 -0400 -@@ -0,0 +1,83 @@ -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_KALLSYMS -+#include -+#endif -+ -+ -+ -+int lookup_symbol(unsigned long address, char *buffer, int buflen) -+{ -+ struct module *this_mod; -+ unsigned long bestsofar; -+ -+ const char *mod_name = NULL, *sec_name = NULL, *sym_name = NULL; -+ unsigned long mod_start,mod_end,sec_start,sec_end,sym_start,sym_end; -+ -+ if (!buffer) -+ return -EFAULT; -+ -+ if (buflen<256) -+ return -ENOMEM; -+ -+ memset(buffer,0,buflen); -+ -+#ifdef CONFIG_KALLSYMS -+ if (!kallsyms_address_to_symbol(address,&mod_name,&mod_start,&mod_end,&sec_name, -+ &sec_start, &sec_end, &sym_name, &sym_start, &sym_end)) { -+ /* kallsyms doesn't have a clue; lets try harder */ -+ bestsofar = 0; -+ snprintf(buffer,buflen-1,"[unresolved]"); -+ -+ this_mod = module_list; -+ -+ while (this_mod != NULL) { -+ int i; -+ /* walk the symbol list of this module. Only symbols -+ who's address is smaller than the searched for address -+ are relevant; and only if it's better than the best so far */ -+ for (i=0; i< this_mod->nsyms; i++) -+ if ((this_mod->syms[i].value<=address) && -+ (bestsofarsyms[i].value)) { -+ snprintf(buffer,buflen-1,"%s [%s] 0x%x", -+ this_mod->syms[i].name, -+ this_mod->name, -+ (unsigned int)(address - this_mod->syms[i].value)); -+ bestsofar = this_mod->syms[i].value; -+ } -+ this_mod = this_mod->next; -+ } -+ -+ } else { /* kallsyms success */ -+ snprintf(buffer,buflen-1,"%s [%s] 0x%x",sym_name,mod_name,(unsigned int)(address-sym_start)); -+ } -+#endif -+ return strlen(buffer); -+} -+EXPORT_SYMBOL(lookup_symbol); -+ -+static char modlist[4096]; -+/* this function isn't smp safe but that's not really a problem; it's called from -+ * oops context only and any locking could actually prevent the oops from going out; -+ * the line that is generated is informational only and should NEVER prevent the real oops -+ * from going out. -+ */ -+void print_modules(void) -+{ -+ struct module *this_mod; -+ int pos = 0, i; -+ memset(modlist,0,4096); -+ -+#ifdef CONFIG_KALLSYMS -+ this_mod = module_list; -+ while (this_mod != NULL) { -+ if (this_mod->name != NULL) -+ pos +=snprintf(modlist+pos,160-pos-1,"%s ",this_mod->name); -+ this_mod = this_mod->next; -+ } -+ printk("%s\n",modlist); -+#endif -+} diff --git a/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch b/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch deleted file mode 100644 index 5d18ad3..0000000 --- a/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- linux/drivers/block/loop.c.bu 2004-05-11 16:27:23.000000000 -0700 -+++ linux/drivers/block/loop.c 2004-05-11 16:28:50.000000000 -0700 -@@ -978,7 +978,7 @@ static int lo_release(struct inode *inod - - lo = &loop_dev[dev]; - -- if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) { -+ if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && lo->lo_device != 0) { - fsync_dev(lo->lo_device); - invalidate_buffers(lo->lo_device); - } diff --git a/lustre/kernel_patches/patches/lustre_build.patch b/lustre/kernel_patches/patches/lustre_build.patch deleted file mode 100644 index 70f6a37..0000000 --- a/lustre/kernel_patches/patches/lustre_build.patch +++ /dev/null @@ -1,33 +0,0 @@ - fs/Kconfig | 8 ++++++++ - fs/Makefile | 1 + - 2 files changed, 9 insertions(+) - ---- linux-2.5.72/fs/Makefile~lustre_build 2003-06-16 22:20:05.000000000 -0600 -+++ linux-2.5.72-braam/fs/Makefile 2003-06-22 10:11:57.000000000 -0600 -@@ -57,6 +57,7 @@ obj-$(CONFIG_RAMFS) += ramfs/ - obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ - obj-$(CONFIG_CODA_FS) += coda/ - obj-$(CONFIG_INTERMEZZO_FS) += intermezzo/ -+obj-$(CONFIG_LUSTRE_FS) += lustre/ - obj-$(CONFIG_MINIX_FS) += minix/ - obj-$(CONFIG_FAT_FS) += fat/ - obj-$(CONFIG_UMSDOS_FS) += umsdos/ ---- linux-2.5.72/fs/Kconfig~lustre_build 2003-06-16 22:20:05.000000000 -0600 -+++ linux-2.5.72-braam/fs/Kconfig 2003-06-22 10:47:15.000000000 -0600 -@@ -1561,6 +1561,14 @@ config CODA_FS - whenever you want), say M here and read - . The module will be called coda. - -+config LUSTRE_FS -+ bool "Lustre: next generation clustering file system (EXPERIMENTAL)" -+ depends on INET && EXPERIMENTAL -+ help -+ Lustre is a next generation storage architecture which includes a -+ POSIX compliant cluster file system. For details see -+ . -+ - config INTERMEZZO_FS - tristate "InterMezzo file system support (replicating fs) (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL - -_ diff --git a/lustre/kernel_patches/patches/netconsole-2.4.24-ppc.patch b/lustre/kernel_patches/patches/netconsole-2.4.24-ppc.patch deleted file mode 100644 index 701c56a..0000000 --- a/lustre/kernel_patches/patches/netconsole-2.4.24-ppc.patch +++ /dev/null @@ -1,489 +0,0 @@ -Index: linux-2.4.24/drivers/net/netconsole.c -=================================================================== -Index: bglio/drivers/net/netconsole.c -=================================================================== ---- bglio.orig/drivers/net/netconsole.c 2004-05-07 15:50:22.000000000 -0700 -+++ bglio/drivers/net/netconsole.c 2004-05-07 17:15:28.000000000 -0700 -@@ -12,6 +12,8 @@ - * - * 2001-09-17 started by Ingo Molnar. - * 2002-03-14 simultaneous syslog packet option by Michael K. Johnson -+ * 2003-10-30 Add sysrq command processing by Wangdi -+ * - */ - - /**************************************************************** -@@ -51,6 +53,7 @@ - #include - #include - #include -+#include "netconsole.h" - - static struct net_device *netconsole_dev; - static u16 source_port, netdump_target_port, netlog_target_port, syslog_target_port; -@@ -62,12 +65,11 @@ - static unsigned int mhz = 500, idle_timeout; - static unsigned long long mhz_cycles, jiffy_cycles; - --#include "netconsole.h" - - #define MAX_UDP_CHUNK 1460 - #define MAX_PRINT_CHUNK (MAX_UDP_CHUNK-HEADER_LEN) - --#define DEBUG 0 -+#define DEBUG 0 - #if DEBUG - # define Dprintk(x...) printk(KERN_INFO x) - #else -@@ -187,6 +189,22 @@ - } - } - } -+void (*irqfunc)(int, void *, struct pt_regs *); -+ -+static void netdump_poll(struct net_device *dev) -+{ -+ int budget = 1; -+ -+ disable_irq(dev->irq); -+ -+ irqfunc(dev->irq, dev, 0); -+ -+ if(dev->poll && test_bit(__LINK_STATE_RX_SCHED, &dev->state)) -+ dev->poll(dev, &budget); -+ -+ enable_irq(dev->irq); -+ -+} - - static struct sk_buff * alloc_netconsole_skb(struct net_device *dev, int len, int reserve) - { -@@ -209,7 +227,7 @@ - once = 0; - } - Dprintk("alloc skb: polling controller ...\n"); -- dev->poll_controller(dev); -+ netdump_poll(dev); - goto repeat; - } - } -@@ -231,7 +249,7 @@ - spin_unlock(&dev->xmit_lock); - - Dprintk("xmit skb: polling controller ...\n"); -- dev->poll_controller(dev); -+ netdump_poll(dev); - zap_completion_queue(); - goto repeat_poll; - } -@@ -426,18 +444,79 @@ - static spinlock_t sequence_lock = SPIN_LOCK_UNLOCKED; - static unsigned int log_offset; - -+static int thread_stopped = 0; -+/*Interrupt function for netdump */ -+static int sysrq_mode = 0; -+static int stop_sysrq_thread = 0; -+#define Set_Sysrq_mode() (sysrq_mode = 1) -+#define Clear_Sysrq_mode() (sysrq_mode = 0) -+static char send_cache[MAX_PRINT_CHUNK]; -+static unsigned int send_cache_pos = 0; -+wait_queue_head_t sysrq_thread_queue; -+wait_queue_head_t sysrq_thread_waiter_queue; -+ -+#define SEND_MSG_BUFFER(buf, len) \ -+do \ -+{ \ -+ reply_t reply; \ -+ unsigned int flags; \ -+ __save_flags(flags); \ -+ __cli(); \ -+ reply.code = REPLY_LOG; \ -+ reply.nr = 0; \ -+ reply.info = 0; \ -+ spin_lock(&sequence_lock); \ -+ send_netlog_skb(dev, buf, len, &reply); \ -+ spin_unlock(&sequence_lock); \ -+ __restore_flags(flags); \ -+}while(0); -+ -+void netconsole_do_sysrq(req_t *req) -+{ -+ struct pt_regs regs; -+ struct net_device *dev = netconsole_dev; -+ -+ if (!dev) -+ return; -+ Set_Sysrq_mode(); -+ get_current_regs(®s); -+ handle_sysrq((int)req->from, ®s, NULL); -+ -+ if (send_cache_pos != 0){ -+ SEND_MSG_BUFFER(send_cache, send_cache_pos); -+ memset(send_cache, 0, MAX_PRINT_CHUNK); -+ send_cache_pos = 0; -+ } -+ -+ Clear_Sysrq_mode(); -+} - static void write_netconsole_msg(struct console *con, const char *msg0, unsigned int msg_len) - { - int len, left, i; - struct net_device *dev; - const char *msg = msg0; - reply_t reply; -- -+ - dev = netconsole_dev; - if (!dev || netdump_mode) - return; -- -- if (dev->poll_controller && netif_running(dev)) { -+ if (sysrq_mode){ -+ unsigned long total_len = send_cache_pos + msg_len; -+ unsigned long left_len = msg_len; -+ while (total_len >= MAX_PRINT_CHUNK){ -+ unsigned long send_len = MAX_PRINT_CHUNK - send_cache_pos; -+ memcpy(send_cache + send_cache_pos, msg, send_len); -+ SEND_MSG_BUFFER(send_cache, MAX_PRINT_CHUNK); -+ send_cache_pos = 0; -+ total_len -= MAX_PRINT_CHUNK; -+ left_len -= send_len; -+ } -+ if (left_len > 0){ -+ memcpy(send_cache + send_cache_pos, msg + (msg_len -left_len), left_len); -+ send_cache_pos += left_len; -+ } -+ return; -+ }else if (netif_running(dev)) { - unsigned long flags; - - __save_flags(flags); -@@ -567,8 +646,6 @@ - req_t *req; - struct net_device *dev; - -- if (!netdump_mode) -- return NET_RX_SUCCESS; - #if DEBUG - { - static int packet_count; -@@ -722,8 +799,16 @@ - Dprintk("... netdump from: %08x.\n", req->from); - Dprintk("... netdump to: %08x.\n", req->to); - -- add_new_req(req); -+ if (netdump_mode) -+ add_new_req(req); -+ else if (req->command == COMM_SYSRQ){ -+ add_new_req(req); -+ wake_up(&sysrq_thread_queue); -+ return NET_RX_DROP; -+ } - out: -+ if (!netdump_mode) -+ return NET_RX_SUCCESS; - return NET_RX_DROP; - } - -@@ -763,6 +848,7 @@ - kunmap_atomic(kaddr, KM_NETDUMP); - } - -+ - /* - * This function waits for the client to acknowledge the receipt - * of the netdump startup reply, with the possibility of packets -@@ -792,7 +878,7 @@ - // wait 1 sec. - udelay(100); - Dprintk("handshake: polling controller ...\n"); -- dev->poll_controller(dev); -+ netdump_poll(dev); - zap_completion_queue(); - req = get_new_req(); - if (req) -@@ -884,6 +970,7 @@ - */ - spin_lock_init(&dev->xmit_lock); - -+#ifdef __i386__ - esp = (unsigned long) ((char *)regs + sizeof (struct pt_regs)); - ss = __KERNEL_DS; - if (regs->xcs & 3) { -@@ -893,6 +980,7 @@ - myregs = *regs; - myregs.esp = esp; - myregs.xss = (myregs.xss & 0xffff0000) | ss; -+#endif - - rdtscll(t0); - -@@ -904,7 +992,7 @@ - while (netdump_mode) { - __cli(); - Dprintk("main netdump loop: polling controller ...\n"); -- dev->poll_controller(dev); -+ netdump_poll(dev); - zap_completion_queue(); - #if !CLI - __sti(); -@@ -1009,6 +1097,32 @@ - printk("NETDUMP END!\n"); - __restore_flags(flags); - } -+static int netconsole_sysrq_schedule(void *arg) -+{ -+ struct task_struct *tsk = current; -+ -+ sprintf(tsk->comm, "sysrq_schedule"); -+ sigfillset(&tsk->blocked); -+ -+ /* main loop */ -+ thread_stopped = 0; -+ for (;;) { -+ wait_event_interruptible(sysrq_thread_queue, -+ !list_empty(&request_list) || stop_sysrq_thread); -+ while (!list_empty(&request_list)) { -+ req_t *req = get_new_req(); -+ if (req->command == COMM_SYSRQ) -+ netconsole_do_sysrq(req); -+ } -+ if (stop_sysrq_thread) -+ break; -+ wake_up(&sysrq_thread_waiter_queue); -+ } -+ thread_stopped = 1; -+ wake_up(&sysrq_thread_waiter_queue); -+ return 0; -+} -+ - - static char *dev; - static int netdump_target_eth_byte0 = 255; -@@ -1087,11 +1201,12 @@ - - static struct console netconsole = - { flags: CON_ENABLED, write: write_netconsole_msg }; -- - static int init_netconsole(void) - { - struct net_device *ndev = NULL; - struct in_device *in_dev; -+ struct irqaction *action; -+ int rc = 0; - - printk(KERN_INFO "netlog: using network device <%s>\n", dev); - // this will be valid once the device goes up. -@@ -1101,10 +1216,6 @@ - printk(KERN_ERR "netlog: network device %s does not exist, aborting.\n", dev); - return -1; - } -- if (!ndev->poll_controller) { -- printk(KERN_ERR "netlog: %s's network driver does not implement netlogging yet, aborting.\n", dev); -- return -1; -- } - in_dev = in_dev_get(ndev); - if (!in_dev) { - printk(KERN_ERR "netlog: network device %s is not an IP protocol device, aborting.\n", dev); -@@ -1137,8 +1248,6 @@ - if (!netdump_target_ip && !netlog_target_ip && !syslog_target_ip) { - printk(KERN_ERR "netlog: target_ip parameter not specified, aborting.\n"); - return -1; -- } -- if (netdump_target_ip) { - #define IP(x) ((unsigned char *)&netdump_target_ip)[x] - printk(KERN_INFO "netlog: using netdump target IP %u.%u.%u.%u\n", - IP(3), IP(2), IP(1), IP(0)); -@@ -1214,12 +1323,27 @@ - - mhz_cycles = (unsigned long long)mhz * 1000000ULL; - jiffy_cycles = (unsigned long long)mhz * (1000000/HZ); -- -- INIT_LIST_HEAD(&request_list); -- -+ - ndev->rx_hook = netconsole_rx_hook; - netdump_func = netconsole_netdump; - netconsole_dev = ndev; -+ /* find irq function of the ndev*/ -+ action=find_irq_action(ndev->irq, ndev); -+ if (!action) { -+ printk(KERN_ERR "couldn't find irq handler for <%s>", dev); -+ return -1; -+ } -+ irqfunc = action->handler; -+ -+ stop_sysrq_thread = 0; -+ INIT_LIST_HEAD(&request_list); -+ init_waitqueue_head(&sysrq_thread_queue); -+ init_waitqueue_head(&sysrq_thread_waiter_queue); -+ if ((rc = kernel_thread(netconsole_sysrq_schedule, NULL, 0)) < 0 ){ -+ printk(KERN_ERR "Can not start netconsole sysrq thread: rc %d\n", rc); -+ return -1; -+ } -+ - #define STARTUP_MSG "[...network console startup...]\n" - write_netconsole_msg(NULL, STARTUP_MSG, strlen(STARTUP_MSG)); - -@@ -1230,7 +1354,11 @@ - - static void cleanup_netconsole(void) - { -- printk(KERN_INFO "netlog: network logging shut down.\n"); -+ stop_sysrq_thread = 1; -+ -+ wake_up(&sysrq_thread_queue); -+ wait_event(sysrq_thread_waiter_queue, thread_stopped); -+ printk(KERN_INFO"netlog: network logging shut down.\n"); - unregister_console(&netconsole); - - #define SHUTDOWN_MSG "[...network console shutdown...]\n" -Index: bglio/drivers/net/netconsole.h -=================================================================== ---- bglio.orig/drivers/net/netconsole.h 2004-05-07 15:50:22.000000000 -0700 -+++ bglio/drivers/net/netconsole.h 2004-05-07 17:11:01.000000000 -0700 -@@ -29,7 +29,7 @@ - * - ****************************************************************/ - --#define NETCONSOLE_VERSION 0x04 -+#define NETCONSOLE_VERSION 0x03 - - enum netdump_commands { - COMM_NONE = 0, -@@ -42,6 +42,8 @@ - COMM_START_NETDUMP_ACK = 7, - COMM_GET_REGS = 8, - COMM_SHOW_STATE = 9, -+ COMM_START_WRITE_NETDUMP_ACK = 10, -+ COMM_SYSRQ = 11, - }; - - #define NETDUMP_REQ_SIZE (8+4*4) -@@ -69,6 +71,7 @@ - REPLY_REGS = 10, - REPLY_MAGIC = 11, - REPLY_SHOW_STATE = 12, -+ REPLY_SYSRQ = 13, - }; - - typedef struct netdump_reply_s { -@@ -78,4 +81,24 @@ - } reply_t; - - #define HEADER_LEN (1 + sizeof(reply_t)) -- -+/* for netconsole */ -+static inline void get_current_regs(struct pt_regs *regs) -+{ -+#ifdef __i386__ -+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); -+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); -+ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); -+ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); -+ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); -+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); -+ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); -+ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); -+ __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); -+ __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); -+ __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); -+ __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); -+ __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); -+ regs->eip = (unsigned long)current_text_addr(); -+#endif -+} -+ -Index: bglio/arch/i386/kernel/irq.c -=================================================================== ---- bglio.orig/arch/i386/kernel/irq.c 2004-05-07 15:50:17.000000000 -0700 -+++ bglio/arch/i386/kernel/irq.c 2004-05-07 17:11:01.000000000 -0700 -@@ -182,7 +182,20 @@ - - return 0; - } -+struct irqaction *find_irq_action(unsigned int irq, void *dev_id) -+{ -+ struct irqaction *a, *r=0; - -+ spin_lock_irq(&irq_desc[irq].lock); -+ for(a=irq_desc[irq].action; a; a=a->next) { -+ if(a->dev_id == dev_id) { -+ r=a; -+ break; -+ } -+ } -+ spin_unlock_irq(&irq_desc[irq].lock); -+ return r; -+} - - /* - * Global interrupt locks for SMP. Allow interrupts to come in on any -Index: bglio/arch/i386/kernel/i386_ksyms.c -=================================================================== ---- bglio.orig/arch/i386/kernel/i386_ksyms.c 2004-05-07 15:50:22.000000000 -0700 -+++ bglio/arch/i386/kernel/i386_ksyms.c 2004-05-07 17:11:01.000000000 -0700 -@@ -66,6 +66,7 @@ - EXPORT_SYMBOL(iounmap); - EXPORT_SYMBOL(enable_irq); - EXPORT_SYMBOL(disable_irq); -+EXPORT_SYMBOL(find_irq_action); - EXPORT_SYMBOL(disable_irq_nosync); - EXPORT_SYMBOL(probe_irq_mask); - EXPORT_SYMBOL(kernel_thread); -@@ -186,7 +187,6 @@ - EXPORT_SYMBOL(edd); - EXPORT_SYMBOL(eddnr); - #endif -- - EXPORT_SYMBOL_GPL(show_mem); - EXPORT_SYMBOL_GPL(show_state); - EXPORT_SYMBOL_GPL(show_regs); -Index: bglio/net/core/dev.c -=================================================================== ---- bglio.orig/net/core/dev.c 2004-05-07 15:50:22.000000000 -0700 -+++ bglio/net/core/dev.c 2004-05-07 17:11:01.000000000 -0700 -@@ -1476,6 +1476,16 @@ - - skb_bond(skb); - -+ if (unlikely(skb->dev->rx_hook != NULL)) { -+ int ret; -+ -+ ret = skb->dev->rx_hook(skb); -+ if (ret == NET_RX_DROP){ -+ kfree_skb(skb); -+ return ret; -+ } -+ } -+ - netdev_rx_stat[smp_processor_id()].total++; - - #ifdef CONFIG_NET_FASTROUTE -Index: bglio/include/asm-i386/irq.h -=================================================================== ---- bglio.orig/include/asm-i386/irq.h 2004-05-07 15:25:28.000000000 -0700 -+++ bglio/include/asm-i386/irq.h 2004-05-07 17:11:01.000000000 -0700 -@@ -38,7 +38,7 @@ - extern void disable_irq_nosync(unsigned int); - extern void enable_irq(unsigned int); - extern void release_x86_irqs(struct task_struct *); -- -+extern struct irqaction *find_irq_action(unsigned int irq, void *dev_id); - #ifdef CONFIG_X86_LOCAL_APIC - #define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ - #endif -Index: bglio/kernel/panic.c -=================================================================== ---- bglio.orig/kernel/panic.c 2004-05-07 15:50:22.000000000 -0700 -+++ bglio/kernel/panic.c 2004-05-07 17:11:01.000000000 -0700 -@@ -66,8 +66,6 @@ - vsprintf(buf, fmt, args); - va_end(args); - printk(KERN_EMERG "Kernel panic: %s\n",buf); -- if (netdump_func) -- BUG(); - if (in_interrupt()) - printk(KERN_EMERG "In interrupt handler - not syncing\n"); - else if (!current->pid) diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20-hp.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20-hp.patch deleted file mode 100644 index be3c31a..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20-hp.patch +++ /dev/null @@ -1,740 +0,0 @@ - fs/Makefile | 3 - fs/file_table.c | 11 ++ - fs/inode.c | 23 ++++- - fs/namei.c | 12 ++ - fs/nfsd/export.c | 5 + - fs/nfsd/nfsfh.c | 65 +++++++++++++- - fs/nfsd/vfs.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++++----- - include/linux/fs.h | 10 ++ - kernel/ksyms.c | 2 - 9 files changed, 337 insertions(+), 34 deletions(-) - ---- linux-2.4.20-hp4-pnnl13/fs/file_table.c~nfs_export_kernel-2.4.20-hp 2002-11-29 02:53:15.000000000 +0300 -+++ linux-2.4.20-hp4-pnnl13-alexey/fs/file_table.c 2003-10-08 10:54:08.000000000 +0400 -@@ -82,7 +82,8 @@ struct file * get_empty_filp(void) - * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. - */ --int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - memset(filp, 0, sizeof(*filp)); - filp->f_mode = mode; -@@ -90,12 +91,20 @@ int init_private_file(struct file *filp, - filp->f_dentry = dentry; - filp->f_uid = current->fsuid; - filp->f_gid = current->fsgid; -+ if (it) -+ filp->f_it = it; - filp->f_op = dentry->d_inode->i_fop; - if (filp->f_op->open) - return filp->f_op->open(dentry->d_inode, filp); - else - return 0; - } -+EXPORT_SYMBOL(init_private_file_it); -+ -+int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+{ -+ return init_private_file_it(filp, dentry, mode, NULL); -+} - - void fput(struct file * file) - { ---- linux-2.4.20-hp4-pnnl13/fs/inode.c~nfs_export_kernel-2.4.20-hp 2003-09-14 17:34:20.000000000 +0400 -+++ linux-2.4.20-hp4-pnnl13-alexey/fs/inode.c 2003-10-08 11:38:11.000000000 +0400 -@@ -964,9 +964,10 @@ struct inode *igrab(struct inode *inode) - return inode; - } - --struct inode *iget4_locked(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) -+struct inode *ifind(struct super_block *sb, unsigned long ino, -+ struct list_head *head, -+ find_inode_t find_actor, void *opaque) - { -- struct list_head * head = inode_hashtable + hash(sb,ino); - struct inode * inode; - - spin_lock(&inode_lock); -@@ -979,6 +980,24 @@ struct inode *iget4_locked(struct super_ - } - spin_unlock(&inode_lock); - -+ return NULL; -+} -+ -+struct inode *ilookup4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ return ifind(sb, ino, head, find_actor, opaque); -+} -+ -+struct inode *iget4_locked(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); -+ if (inode) -+ return inode; -+ - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. ---- linux-2.4.20-hp4-pnnl13/fs/Makefile~nfs_export_kernel-2.4.20-hp 2003-09-14 17:34:22.000000000 +0400 -+++ linux-2.4.20-hp4-pnnl13-alexey/fs/Makefile 2003-10-08 10:54:37.000000000 +0400 -@@ -7,7 +7,8 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o \ -+ namei.o file_table.o - mod-subdirs := nls xfs - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ---- linux-2.4.20-hp4-pnnl13/fs/namei.c~nfs_export_kernel-2.4.20-hp 2003-09-14 17:34:20.000000000 +0400 -+++ linux-2.4.20-hp4-pnnl13-alexey/fs/namei.c 2003-10-08 10:54:08.000000000 +0400 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -100,6 +101,7 @@ void intent_release(struct lookup_intent - it->it_op_release(it); - - } -+EXPORT_SYMBOL(intent_release); - - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the -@@ -917,7 +919,8 @@ struct dentry * lookup_hash(struct qstr - - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, -+ int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -937,11 +940,16 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash_it(&this, base, NULL); -+ return lookup_hash_it(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * ---- linux-2.4.20-hp4-pnnl13/fs/nfsd/export.c~nfs_export_kernel-2.4.20-hp 2002-11-29 02:53:15.000000000 +0300 -+++ linux-2.4.20-hp4-pnnl13-alexey/fs/nfsd/export.c 2003-10-08 10:54:08.000000000 +0400 -@@ -222,6 +222,11 @@ exp_export(struct nfsctl_export *nxp) - inode = nd.dentry->d_inode; - dev = inode->i_dev; - ino = inode->i_ino; -+ if ((inode->i_sb->s_type->fs_flags & FS_NFSEXP_FSID) && -+ !(nxp->ex_flags & NFSEXP_FSID)) { -+ nxp->ex_dev = inode->i_sb->s_dev; -+ nxp->ex_flags |= NFSEXP_FSID; -+ } - err = -EINVAL; - - exp = exp_get(clp, dev, ino); ---- linux-2.4.20-hp4-pnnl13/fs/nfsd/nfsfh.c~nfs_export_kernel-2.4.20-hp 2002-11-29 02:53:15.000000000 +0300 -+++ linux-2.4.20-hp4-pnnl13-alexey/fs/nfsd/nfsfh.c 2003-10-08 10:54:08.000000000 +0400 -@@ -36,6 +36,13 @@ struct nfsd_getdents_callback { - int sequence; /* sequence counter */ - }; - -+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) -+{ -+ if (inode->i_op->lookup_it) -+ return inode->i_op->lookup_it(inode, dentry, NULL, 0); -+ return inode->i_op->lookup(inode, dentry); -+} -+ - /* - * A rather strange filldir function to capture - * the name matching the specified inode number. -@@ -75,6 +84,8 @@ static int nfsd_get_name(struct dentry * - int error; - struct file file; - struct nfsd_getdents_callback buffer; -+ struct lookup_intent it; -+ struct file *filp = NULL; - - error = -ENOTDIR; - if (!dir || !S_ISDIR(dir->i_mode)) -@@ -85,9 +96,37 @@ static int nfsd_get_name(struct dentry * - /* - * Open the directory ... - */ -- error = init_private_file(&file, dentry, FMODE_READ); -- if (error) -+ if (dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && -+ (dentry->d_parent == dentry) ) { -+ it.it_op_release = NULL; -+ /* -+ * XXX Temporary Hack: Simulate init_private_file without -+ * f_op->open for disconnected dentry as we don't have -+ * actual dentry->d_name to revalidate in revalidate_it() -+ */ -+ filp = &file; -+ memset(filp, 0, sizeof(*filp)); -+ filp->f_mode = FMODE_READ; -+ atomic_set(&filp->f_count, 1); -+ filp->f_dentry = dentry; -+ filp->f_uid = current->fsuid; -+ filp->f_gid = current->fsgid; -+ filp->f_op = dentry->d_inode->i_fop; -+ error = 0; -+ } else { -+ intent_init(&it, IT_OPEN, FMODE_READ); -+ error = revalidate_it(dentry, &it); -+ if (error) -+ goto out; -+ error = init_private_file_it(&file, dentry, FMODE_READ, &it); -+ } -+ } else { -+ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); -+ } -+ if (error) - goto out; -+ - error = -EINVAL; - if (!file.f_op->readdir) - goto out_close; -@@ -113,9 +152,12 @@ static int nfsd_get_name(struct dentry * - } - - out_close: -- if (file.f_op->release) -+ if (file.f_op->release && !filp) - file.f_op->release(dir, &file); - out: -+ if (dentry->d_op && dentry->d_op->d_revalidate_it && -+ it.it_op_release && !filp) -+ intent_release(&it); - return error; - } - -@@ -274,7 +317,7 @@ struct dentry *nfsd_findparent(struct de - * it is well connected. But nobody returns different dentrys do they? - */ - down(&child->d_inode->i_sem); -- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); -+ pdentry = lookup_it(child->d_inode, tdentry); - up(&child->d_inode->i_sem); - d_drop(tdentry); /* we never want ".." hashed */ - if (!pdentry && tdentry->d_inode == NULL) { -@@ -306,6 +349,8 @@ struct dentry *nfsd_findparent(struct de - igrab(tdentry->d_inode); - pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; - } -+ if (child->d_op && child->d_op->d_revalidate_it) -+ pdentry->d_op = child->d_op; - } - if (pdentry == NULL) - pdentry = ERR_PTR(-ENOMEM); -@@ -463,6 +508,8 @@ find_fh_dentry(struct super_block *sb, _ - struct dentry *pdentry; - struct inode *parent; - -+ if (result->d_op && result->d_op->d_revalidate_it) -+ dentry->d_op = result->d_op; - pdentry = nfsd_findparent(dentry); - err = PTR_ERR(pdentry); - if (IS_ERR(pdentry)) -@@ -662,6 +709,10 @@ fh_verify(struct svc_rqst *rqstp, struct - - inode = dentry->d_inode; - -+ /* cache coherency for non-device filesystems */ -+ if (inode->i_op && inode->i_op->revalidate_it) -+ inode->i_op->revalidate_it(dentry, NULL); -+ - /* Type check. The correct error return for type mismatches - * does not seem to be generally agreed upon. SunOS seems to - * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 -@@ -900,8 +952,9 @@ out_negative: - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - out_uptodate: -- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -- dentry->d_parent->d_name.name, dentry->d_name.name); -+ if (!dentry->d_parent->d_inode->i_op->mkdir_raw) -+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -+ dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - } - ---- linux-2.4.20-hp4-pnnl13/fs/nfsd/vfs.c~nfs_export_kernel-2.4.20-hp 2002-11-29 02:53:15.000000000 +0300 -+++ linux-2.4.20-hp4-pnnl13-alexey/fs/nfsd/vfs.c 2003-10-08 10:54:08.000000000 +0400 -@@ -77,6 +77,130 @@ struct raparms { - static struct raparms * raparml; - static struct raparms * raparm_cache; - -+static int link_raw(struct dentry *dold, struct dentry *ddir, -+ struct dentry *dnew) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = dold }; -+ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->link_raw(&old_nd, &nd); -+ igrab(dold->d_inode); -+ d_instantiate(dnew, dold->d_inode); -+ if (dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) -+ dold->d_inode->i_op->revalidate_it(dnew, NULL); -+ -+ return err; -+} -+ -+static int unlink_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->unlink_raw(&nd); -+ if (!err) -+ d_delete(rdentry); -+ -+ return err; -+} -+ -+static int rmdir_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->rmdir_raw(&nd); -+ if (!err) { -+ rdentry->d_inode->i_flags |= S_DEAD; -+ d_delete(rdentry); -+ } -+ -+ return err; -+} -+ -+static int symlink_raw(struct dentry *dentry, char *fname, int flen, -+ char *path) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->symlink_raw(&nd, path); -+ -+ return err; -+} -+ -+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mkdir_raw(&nd, mode); -+ -+ return err; -+} -+ -+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, -+ dev_t dev) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mknod_raw(&nd, mode, dev); -+ -+ return err; -+} -+ -+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, -+ struct dentry *odentry, struct dentry *ndentry) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; -+ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; -+ struct inode_operations *op = old_nd.dentry->d_inode->i_op; -+ err = op->rename_raw(&old_nd, &new_nd); -+ d_move(odentry, ndentry); -+ -+ return err; -+} -+ -+#ifndef O_OWNER_OVERRIDE -+#define O_OWNER_OVERRIDE 0200000000 -+#endif -+static int setattr_raw(struct inode *inode, struct iattr *iap) -+{ -+ int err; -+ -+ iap->ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, iap); -+ -+ return err; -+} -+ -+int revalidate_it(struct dentry *dentry, struct lookup_intent *it) -+{ -+ int err = 0; -+ -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) && -+ !d_invalidate(dentry)) { -+ err = -EINVAL; -+ return err; -+ } -+ } -+ -+ return err; -+} -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -300,7 +422,12 @@ nfsd_setattr(struct svc_rqst *rqstp, str - } - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime) { -- err = notify_change(dentry, iap); -+ if (dentry->d_inode->i_op &&dentry->d_inode->i_op->setattr_raw){ -+ if (accmode & MAY_OWNER_OVERRIDE) -+ iap->ia_valid & O_OWNER_OVERRIDE; -+ err = setattr_raw(dentry->d_inode, iap); -+ } else -+ err = notify_change(dentry, iap); - err = nfserrno(err); - } - if (size_change) { -@@ -427,6 +552,7 @@ nfsd_open(struct svc_rqst *rqstp, struct - { - struct dentry *dentry; - struct inode *inode; -+ struct lookup_intent it; - int err; - - /* If we get here, then the client has already done an "open", and (hopefully) -@@ -473,6 +599,15 @@ nfsd_open(struct svc_rqst *rqstp, struct - filp->f_mode = FMODE_READ; - } - -+ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode | -+ O_OWNER_OVERRIDE); -+ -+ err = revalidate_it(dentry, &it); -+ if (err) -+ goto out_nfserr; -+ -+ filp->f_it = ⁢ -+ - err = 0; - if (filp->f_op && filp->f_op->open) { - err = filp->f_op->open(inode, filp); -@@ -487,6 +621,9 @@ nfsd_open(struct svc_rqst *rqstp, struct - } - } - out_nfserr: -+ if (it.it_op_release) -+ intent_release(&it); -+ - if (err) - err = nfserrno(err); - out: -@@ -818,7 +956,7 @@ nfsd_create(struct svc_rqst *rqstp, stru - { - struct dentry *dentry, *dchild; - struct inode *dirp; -- int err; -+ int err, error = -EOPNOTSUPP; - - err = nfserr_perm; - if (!flen) -@@ -834,20 +972,47 @@ nfsd_create(struct svc_rqst *rqstp, stru - dentry = fhp->fh_dentry; - dirp = dentry->d_inode; - -+ switch (type) { -+ case S_IFDIR: -+ if (dirp->i_op->mkdir_raw) -+ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ case S_IFREG: -+ if (dirp->i_op->mknod_raw) { -+ if (type == S_IFREG) -+ rdev = 0; -+ error = mknod_raw(dentry, fname,flen,iap->ia_mode,rdev); -+ } -+ break; -+ default: -+ printk("nfsd: bad file type %o in nfsd_create\n", type); -+ } -+ if (error && error != -EOPNOTSUPP) { -+ err = error; -+ goto out_nfserr; -+ } -+ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - /* - * Check whether the response file handle has been verified yet. - * If it has, the parent directory should already be locked. - */ -- if (!resfhp->fh_dentry) { -- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ -- fh_lock(fhp); -+ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { -+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create -+ * and nfsd_proc_create in case of lustre */ -+ if (!resfhp->fh_dentry) -+ fh_lock(fhp); - dchild = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; -+ resfhp->fh_dentry = NULL; - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; -@@ -868,10 +1030,12 @@ nfsd_create(struct svc_rqst *rqstp, stru - * Make sure the child dentry is still negative ... - */ - err = nfserr_exist; -- if (dchild->d_inode) { -- dprintk("nfsd_create: dentry %s/%s not negative!\n", -- dentry->d_name.name, dchild->d_name.name); -- goto out; -+ if (error == -EOPNOTSUPP) { -+ if (dchild->d_inode) { -+ dprintk("nfsd_create: dentry %s/%s not negative!\n", -+ dentry->d_name.name, dchild->d_name.name); -+ goto out; -+ } - } - - if (!(iap->ia_valid & ATTR_MODE)) -@@ -884,16 +1048,19 @@ nfsd_create(struct svc_rqst *rqstp, stru - err = nfserr_perm; - switch (type) { - case S_IFREG: -- err = vfs_create(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_create(dirp, dchild, iap->ia_mode); - break; - case S_IFDIR: -- err = vfs_mkdir(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mkdir(dirp, dchild, iap->ia_mode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: -- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); - break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); -@@ -962,7 +1129,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, s - /* Get all the sanity checks out of the way before - * we lock the parent. */ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (dirp->i_op->mknod_raw) { -+ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); -+ if (err && err != -EOPNOTSUPP) -+ goto out_nfserr; -+ } -+ -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - fh_lock(fhp); - -@@ -1013,6 +1186,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, s - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } -+ if (dirp->i_op->mknod_raw) -+ err = 0; - goto out; - } - -@@ -1119,7 +1294,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str - struct iattr *iap) - { - struct dentry *dentry, *dnew; -- int err, cerr; -+ int err, cerr, error = -EOPNOTSUPP; - - err = nfserr_noent; - if (!flen || !plen) -@@ -1133,12 +1308,18 @@ nfsd_symlink(struct svc_rqst *rqstp, str - goto out; - fh_lock(fhp); - dentry = fhp->fh_dentry; -+ -+ if (dentry->d_inode->i_op->symlink_raw) -+ error = symlink_raw(dentry, fname, flen, path); -+ - dnew = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - -- err = vfs_symlink(dentry->d_inode, dnew, path); -+ err = error; -+ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) -+ err = vfs_symlink(dentry->d_inode, dnew, path); - if (!err) { - if (EX_ISSYNC(fhp->fh_export)) - nfsd_sync_dir(dentry); -@@ -1148,7 +1329,10 @@ nfsd_symlink(struct svc_rqst *rqstp, str - iap->ia_valid |= ATTR_CTIME; - iap->ia_mode = (iap->ia_mode&S_IALLUGO) - | S_IFLNK; -- err = notify_change(dnew, iap); -+ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dnew->d_inode, iap); -+ else -+ err = notify_change(dnew, iap); - if (!err && EX_ISSYNC(fhp->fh_export)) - write_inode_now(dentry->d_inode, 1); - } -@@ -1206,7 +1390,10 @@ nfsd_link(struct svc_rqst *rqstp, struct - dold = tfhp->fh_dentry; - dest = dold->d_inode; - -- err = vfs_link(dold, dirp, dnew); -+ if (dirp->i_op->link_raw) -+ err = link_raw(dold, ddir, dnew); -+ else -+ err = vfs_link(dold, dirp, dnew); - if (!err) { - if (EX_ISSYNC(ffhp->fh_export)) { - nfsd_sync_dir(ddir); -@@ -1291,7 +1478,10 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ if (fdir->i_op->rename_raw) -+ err = rename_raw(fdentry, tdentry, odentry, ndentry); -+ else -+ err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); -@@ -1312,7 +1502,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - fill_post_wcc(tfhp); - double_up(&tdir->i_sem, &fdir->i_sem); - ffhp->fh_locked = tfhp->fh_locked = 0; -- -+ - out: - return err; - } -@@ -1358,9 +1548,15 @@ nfsd_unlink(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_unlink(dirp, rdentry); -+ if (dirp->i_op->unlink_raw) -+ err = unlink_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ -- err = vfs_rmdir(dirp, rdentry); -+ if (dirp->i_op->rmdir_raw) -+ err = rmdir_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_rmdir(dirp, rdentry); - } - - dput(rdentry); ---- linux-2.4.20-hp4-pnnl13/include/linux/fs.h~nfs_export_kernel-2.4.20-hp 2003-09-14 17:34:24.000000000 +0400 -+++ linux-2.4.20-hp4-pnnl13-alexey/include/linux/fs.h 2003-10-08 11:39:07.000000000 +0400 -@@ -93,6 +93,8 @@ extern int leases_enable, dir_notify_ena - #define FS_SINGLE 8 /* Filesystem that can have only one superblock */ - #define FS_NOMOUNT 16 /* Never mount from userland */ - #define FS_LITTER 32 /* Keeps the tree in dcache */ -+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for -+ * exporting non device filesystems. */ - #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ -@@ -1116,6 +1119,9 @@ extern int open_namei_it(const char *fil - struct nameidata *nd, struct lookup_intent *it); - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); -+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, -+ struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1386,6 +1392,8 @@ extern void path_release(struct nameidat - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, -+ struct lookup_intent *); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -@@ -1403,6 +1411,8 @@ typedef int (*find_inode_t)(struct inode - - extern struct inode * iget4_locked(struct super_block *, unsigned long, - find_inode_t, void *); -+extern struct inode * ilookup4(struct super_block *, unsigned long, -+ find_inode_t, void *); - - static inline struct inode *iget4(struct super_block *sb, unsigned long ino, - find_inode_t find_actor, void *opaque) ---- linux-2.4.20-hp4-pnnl13/kernel/ksyms.c~nfs_export_kernel-2.4.20-hp 2003-09-14 17:34:22.000000000 +0400 -+++ linux-2.4.20-hp4-pnnl13-alexey/kernel/ksyms.c 2003-10-08 11:39:42.000000000 +0400 -@@ -169,6 +169,7 @@ EXPORT_SYMBOL(fget); - EXPORT_SYMBOL(igrab); - EXPORT_SYMBOL(iunique); - EXPORT_SYMBOL(iget4_locked); -+EXPORT_SYMBOL(ilookup4); - EXPORT_SYMBOL(unlock_new_inode); - EXPORT_SYMBOL(iput); - EXPORT_SYMBOL(inode_init_once); -@@ -181,6 +182,7 @@ EXPORT_SYMBOL(path_walk); - EXPORT_SYMBOL(path_release); - EXPORT_SYMBOL(__user_walk); - EXPORT_SYMBOL(lookup_one_len); -+EXPORT_SYMBOL(lookup_one_len_it); - EXPORT_SYMBOL(lookup_hash); - EXPORT_SYMBOL(sys_close); - EXPORT_SYMBOL(dcache_lock); diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-chaos.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-chaos.patch deleted file mode 100644 index db293ca..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-chaos.patch +++ /dev/null @@ -1,756 +0,0 @@ - fs/Makefile | 3 - fs/file_table.c | 11 ++ - fs/inode.c | 23 ++++- - fs/namei.c | 12 ++ - fs/nfsd/export.c | 5 + - fs/nfsd/nfsfh.c | 65 +++++++++++++- - fs/nfsd/vfs.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++----- - include/linux/fs.h | 11 ++ - kernel/ksyms.c | 2 - 9 files changed, 333 insertions(+), 34 deletions(-) - -Index: linux-2.4.21-chaos/fs/file_table.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/file_table.c 2003-07-15 04:41:00.000000000 +0400 -+++ linux-2.4.21-chaos/fs/file_table.c 2003-12-12 16:19:25.000000000 +0300 -@@ -82,7 +82,8 @@ struct file * get_empty_filp(void) - * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. - */ --int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - memset(filp, 0, sizeof(*filp)); - filp->f_mode = mode; -@@ -90,12 +91,20 @@ int init_private_file(struct file *filp, - filp->f_dentry = dentry; - filp->f_uid = current->fsuid; - filp->f_gid = current->fsgid; -+ if (it) -+ filp->f_it = it; - filp->f_op = dentry->d_inode->i_fop; - if (filp->f_op->open) - return filp->f_op->open(dentry->d_inode, filp); - else - return 0; - } -+EXPORT_SYMBOL(init_private_file_it); -+ -+int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+{ -+ return init_private_file_it(filp, dentry, mode, NULL); -+} - - void fput(struct file * file) - { -Index: linux-2.4.21-chaos/fs/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/inode.c 2003-12-12 16:18:15.000000000 +0300 -+++ linux-2.4.21-chaos/fs/inode.c 2003-12-12 16:19:25.000000000 +0300 -@@ -1054,9 +1054,10 @@ struct inode *igrab(struct inode *inode) - return inode; - } - --struct inode *iget4_locked(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) -+struct inode *ifind(struct super_block *sb, unsigned long ino, -+ struct list_head *head, -+ find_inode_t find_actor, void *opaque) - { -- struct list_head * head = inode_hashtable + hash(sb,ino); - struct inode * inode; - - spin_lock(&inode_lock); -@@ -1069,6 +1070,24 @@ struct inode *iget4_locked(struct super_ - } - spin_unlock(&inode_lock); - -+ return NULL; -+} -+ -+struct inode *ilookup4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ return ifind(sb, ino, head, find_actor, opaque); -+} -+ -+struct inode *iget4_locked(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); -+ if (inode) -+ return inode; -+ - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. -Index: linux-2.4.21-chaos/fs/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/Makefile 2003-12-12 16:18:36.000000000 +0300 -+++ linux-2.4.21-chaos/fs/Makefile 2003-12-12 16:19:25.000000000 +0300 -@@ -9,7 +9,8 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o dcookies.o inode.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o dcookies.o inode.o \ -+ namei.o file_table.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ -Index: linux-2.4.21-chaos/fs/namei.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/namei.c 2003-12-12 16:18:06.000000000 +0300 -+++ linux-2.4.21-chaos/fs/namei.c 2003-12-12 16:19:25.000000000 +0300 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -100,6 +101,7 @@ void intent_release(struct lookup_intent - it->it_op_release(it); - - } -+EXPORT_SYMBOL(intent_release); - - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the -@@ -910,7 +912,8 @@ struct dentry * lookup_hash(struct qstr - - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, -+ int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -930,11 +933,16 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash_it(&this, base, NULL); -+ return lookup_hash_it(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -Index: linux-2.4.21-chaos/fs/nfsd/export.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/nfsd/export.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/nfsd/export.c 2003-12-12 16:19:25.000000000 +0300 -@@ -223,6 +223,11 @@ exp_export(struct nfsctl_export *nxp) - inode = nd.dentry->d_inode; - dev = inode->i_dev; - ino = inode->i_ino; -+ if ((inode->i_sb->s_type->fs_flags & FS_NFSEXP_FSID) && -+ !(nxp->ex_flags & NFSEXP_FSID)) { -+ nxp->ex_dev = inode->i_sb->s_dev; -+ nxp->ex_flags |= NFSEXP_FSID; -+ } - err = -EINVAL; - - exp = exp_get(clp, dev, ino); -Index: linux-2.4.21-chaos/fs/nfsd/nfsfh.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/nfsd/nfsfh.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/nfsd/nfsfh.c 2003-12-12 16:19:25.000000000 +0300 -@@ -36,6 +36,13 @@ struct nfsd_getdents_callback { - int sequence; /* sequence counter */ - }; - -+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) -+{ -+ if (inode->i_op->lookup_it) -+ return inode->i_op->lookup_it(inode, dentry, NULL, 0); -+ return inode->i_op->lookup(inode, dentry); -+} -+ - /* - * A rather strange filldir function to capture - * the name matching the specified inode number. -@@ -75,6 +84,8 @@ static int nfsd_get_name(struct dentry * - int error; - struct file file; - struct nfsd_getdents_callback buffer; -+ struct lookup_intent it; -+ struct file *filp = NULL; - - error = -ENOTDIR; - if (!dir || !S_ISDIR(dir->i_mode)) -@@ -85,9 +96,37 @@ static int nfsd_get_name(struct dentry * - /* - * Open the directory ... - */ -- error = init_private_file(&file, dentry, FMODE_READ); -- if (error) -+ if (dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && -+ (dentry->d_parent == dentry) ) { -+ it.it_op_release = NULL; -+ /* -+ * XXX Temporary Hack: Simulate init_private_file without -+ * f_op->open for disconnected dentry as we don't have -+ * actual dentry->d_name to revalidate in revalidate_it() -+ */ -+ filp = &file; -+ memset(filp, 0, sizeof(*filp)); -+ filp->f_mode = FMODE_READ; -+ atomic_set(&filp->f_count, 1); -+ filp->f_dentry = dentry; -+ filp->f_uid = current->fsuid; -+ filp->f_gid = current->fsgid; -+ filp->f_op = dentry->d_inode->i_fop; -+ error = 0; -+ } else { -+ intent_init(&it, IT_OPEN, FMODE_READ); -+ error = revalidate_it(dentry, &it); -+ if (error) -+ goto out; -+ error = init_private_file_it(&file, dentry, FMODE_READ, &it); -+ } -+ } else { -+ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); -+ } -+ if (error) - goto out; -+ - error = -EINVAL; - if (!file.f_op->readdir) - goto out_close; -@@ -113,9 +152,12 @@ static int nfsd_get_name(struct dentry * - } - - out_close: -- if (file.f_op->release) -+ if (file.f_op->release && !filp) - file.f_op->release(dir, &file); - out: -+ if (dentry->d_op && dentry->d_op->d_revalidate_it && -+ it.it_op_release && !filp) -+ intent_release(&it); - return error; - } - -@@ -274,7 +317,7 @@ struct dentry *nfsd_findparent(struct de - * it is well connected. But nobody returns different dentrys do they? - */ - down(&child->d_inode->i_sem); -- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); -+ pdentry = lookup_it(child->d_inode, tdentry); - up(&child->d_inode->i_sem); - d_drop(tdentry); /* we never want ".." hashed */ - if (!pdentry && tdentry->d_inode == NULL) { -@@ -306,6 +350,8 @@ struct dentry *nfsd_findparent(struct de - pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; - pdentry->d_op = child->d_op; - } -+ if (child->d_op && child->d_op->d_revalidate_it) -+ pdentry->d_op = child->d_op; - } - if (pdentry == NULL) - pdentry = ERR_PTR(-ENOMEM); -@@ -464,6 +509,8 @@ find_fh_dentry(struct super_block *sb, _ - struct dentry *pdentry; - struct inode *parent; - -+ if (result->d_op && result->d_op->d_revalidate_it) -+ dentry->d_op = result->d_op; - pdentry = nfsd_findparent(dentry); - err = PTR_ERR(pdentry); - if (IS_ERR(pdentry)) -@@ -672,6 +719,10 @@ fh_verify(struct svc_rqst *rqstp, struct - - inode = dentry->d_inode; - -+ /* cache coherency for non-device filesystems */ -+ if (inode->i_op && inode->i_op->revalidate_it) -+ inode->i_op->revalidate_it(dentry, NULL); -+ - /* Type check. The correct error return for type mismatches - * does not seem to be generally agreed upon. SunOS seems to - * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 -@@ -905,8 +957,9 @@ out_negative: - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - out_uptodate: -- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -- dentry->d_parent->d_name.name, dentry->d_name.name); -+ if (!dentry->d_parent->d_inode->i_op->mkdir_raw) -+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -+ dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - } - -Index: linux-2.4.21-chaos/fs/nfsd/vfs.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/nfsd/vfs.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/nfsd/vfs.c 2003-12-12 16:19:25.000000000 +0300 -@@ -78,6 +78,127 @@ - static struct raparms * raparml; - static struct raparms * raparm_cache; - -+static int link_raw(struct dentry *dold, struct dentry *ddir, -+ struct dentry *dnew) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = dold }; -+ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->link_raw(&old_nd, &nd); -+ igrab(dold->d_inode); -+ d_instantiate(dnew, dold->d_inode); -+ if (dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) -+ dold->d_inode->i_op->revalidate_it(dnew, NULL); -+ -+ return err; -+} -+ -+static int unlink_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->unlink_raw(&nd); -+ if (!err) -+ d_delete(rdentry); -+ -+ return err; -+} -+ -+static int rmdir_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->rmdir_raw(&nd); -+ if (!err) { -+ rdentry->d_inode->i_flags |= S_DEAD; -+ d_delete(rdentry); -+ } -+ -+ return err; -+} -+ -+static int symlink_raw(struct dentry *dentry, char *fname, int flen, -+ char *path) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->symlink_raw(&nd, path); -+ -+ return err; -+} -+ -+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mkdir_raw(&nd, mode); -+ -+ return err; -+} -+ -+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, -+ dev_t dev) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mknod_raw(&nd, mode, dev); -+ -+ return err; -+} -+ -+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, -+ struct dentry *odentry, struct dentry *ndentry) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; -+ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; -+ struct inode_operations *op = old_nd.dentry->d_inode->i_op; -+ err = op->rename_raw(&old_nd, &new_nd); -+ d_move(odentry, ndentry); -+ -+ return err; -+} -+ -+static int setattr_raw(struct inode *inode, struct iattr *iap) -+{ -+ int err; -+ -+ iap->ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, iap); -+ -+ return err; -+} -+ -+int revalidate_it(struct dentry *dentry, struct lookup_intent *it) -+{ -+ int err = 0; -+ -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) && -+ !d_invalidate(dentry)) { -+ err = -EINVAL; -+ return err; -+ } -+ } -+ -+ return err; -+} -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -303,7 +425,10 @@ nfsd_setattr(struct svc_rqst *rqstp, str - } - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime) { -- err = notify_change(dentry, iap); -+ if (dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dentry->d_inode, iap); -+ else -+ err = notify_change(dentry, iap); - err = nfserrno(err); - } - if (size_change) { -@@ -430,6 +555,7 @@ nfsd_open(struct svc_rqst *rqstp, struct - { - struct dentry *dentry; - struct inode *inode; -+ struct lookup_intent it; - int err; - - /* If we get here, then the client has already done an "open", and (hopefully) -@@ -476,6 +602,18 @@ nfsd_open(struct svc_rqst *rqstp, struct - filp->f_mode = FMODE_READ; - } - -+#ifndef O_OWNER_OVERRIDE -+#define O_OWNER_OVERRIDE 0200000000 -+#endif -+ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode | -+ O_OWNER_OVERRIDE); -+ -+ err = revalidate_it(dentry, &it); -+ if (err) -+ goto out_nfserr; -+ -+ filp->f_it = ⁢ -+ - err = 0; - if (filp->f_op && filp->f_op->open) { - err = filp->f_op->open(inode, filp); -@@ -490,6 +624,9 @@ nfsd_open(struct svc_rqst *rqstp, struct - } - } - out_nfserr: -+ if (it.it_op_release) -+ intent_release(&it); -+ - if (err) - err = nfserrno(err); - out: -@@ -821,7 +959,7 @@ nfsd_create(struct svc_rqst *rqstp, stru - { - struct dentry *dentry, *dchild; - struct inode *dirp; -- int err; -+ int err, error = -EOPNOTSUPP; - - err = nfserr_perm; - if (!flen) -@@ -837,20 +975,47 @@ nfsd_create(struct svc_rqst *rqstp, stru - dentry = fhp->fh_dentry; - dirp = dentry->d_inode; - -+ switch (type) { -+ case S_IFDIR: -+ if (dirp->i_op->mkdir_raw) -+ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ case S_IFREG: -+ if (dirp->i_op->mknod_raw) { -+ if (type == S_IFREG) -+ rdev = 0; -+ error = mknod_raw(dentry, fname,flen,iap->ia_mode,rdev); -+ } -+ break; -+ default: -+ printk("nfsd: bad file type %o in nfsd_create\n", type); -+ } -+ if (error && error != -EOPNOTSUPP) { -+ err = error; -+ goto out_nfserr; -+ } -+ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - /* - * Check whether the response file handle has been verified yet. - * If it has, the parent directory should already be locked. - */ -- if (!resfhp->fh_dentry) { -- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ -- fh_lock(fhp); -+ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { -+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create -+ * and nfsd_proc_create in case of lustre */ -+ if (!resfhp->fh_dentry) -+ fh_lock(fhp); - dchild = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; -+ resfhp->fh_dentry = NULL; - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; -@@ -871,10 +1033,12 @@ nfsd_create(struct svc_rqst *rqstp, stru - * Make sure the child dentry is still negative ... - */ - err = nfserr_exist; -- if (dchild->d_inode) { -- dprintk("nfsd_create: dentry %s/%s not negative!\n", -- dentry->d_name.name, dchild->d_name.name); -- goto out; -+ if (error == -EOPNOTSUPP) { -+ if (dchild->d_inode) { -+ dprintk("nfsd_create: dentry %s/%s not negative!\n", -+ dentry->d_name.name, dchild->d_name.name); -+ goto out; -+ } - } - - if (!(iap->ia_valid & ATTR_MODE)) -@@ -887,16 +1051,19 @@ nfsd_create(struct svc_rqst *rqstp, stru - err = nfserr_perm; - switch (type) { - case S_IFREG: -- err = vfs_create(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_create(dirp, dchild, iap->ia_mode); - break; - case S_IFDIR: -- err = vfs_mkdir(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mkdir(dirp, dchild, iap->ia_mode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: -- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); - break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); -@@ -965,7 +1132,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, s - /* Get all the sanity checks out of the way before - * we lock the parent. */ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (dirp->i_op->mknod_raw) { -+ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); -+ if (err && err != -EOPNOTSUPP) -+ goto out_nfserr; -+ } -+ -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - fh_lock(fhp); - -@@ -1016,6 +1189,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, s - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } -+ if (dirp->i_op->mknod_raw) -+ err = 0; - goto out; - } - -@@ -1122,7 +1297,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str - struct iattr *iap) - { - struct dentry *dentry, *dnew; -- int err, cerr; -+ int err, cerr, error = -EOPNOTSUPP; - - err = nfserr_noent; - if (!flen || !plen) -@@ -1136,12 +1311,18 @@ nfsd_symlink(struct svc_rqst *rqstp, str - goto out; - fh_lock(fhp); - dentry = fhp->fh_dentry; -+ -+ if (dentry->d_inode->i_op->symlink_raw) -+ error = symlink_raw(dentry, fname, flen, path); -+ - dnew = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - -- err = vfs_symlink(dentry->d_inode, dnew, path); -+ err = error; -+ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) -+ err = vfs_symlink(dentry->d_inode, dnew, path); - if (!err) { - if (EX_ISSYNC(fhp->fh_export)) - nfsd_sync_dir(dentry); -@@ -1148,7 +1329,10 @@ nfsd_symlink(struct svc_rqst *rqstp, str - iap->ia_valid |= ATTR_CTIME; - iap->ia_mode = (iap->ia_mode&S_IALLUGO) - | S_IFLNK; -- err = notify_change(dnew, iap); -+ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dnew->d_inode, iap); -+ else -+ err = notify_change(dnew, iap); - if (err) - err = nfserrno(err); - else if (EX_ISSYNC(fhp->fh_export)) -@@ -1211,7 +1392,10 @@ nfsd_link(struct svc_rqst *rqstp, struct - dold = tfhp->fh_dentry; - dest = dold->d_inode; - -- err = vfs_link(dold, dirp, dnew); -+ if (dirp->i_op->link_raw) -+ err = link_raw(dold, ddir, dnew); -+ else -+ err = vfs_link(dold, dirp, dnew); - if (!err) { - if (EX_ISSYNC(ffhp->fh_export)) { - nfsd_sync_dir(ddir); -@@ -1296,7 +1480,10 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ if (fdir->i_op->rename_raw) -+ err = rename_raw(fdentry, tdentry, odentry, ndentry); -+ else -+ err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); -@@ -1317,7 +1504,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - fill_post_wcc(tfhp); - double_up(&tdir->i_sem, &fdir->i_sem); - ffhp->fh_locked = tfhp->fh_locked = 0; -- -+ - out: - return err; - } -@@ -1363,9 +1550,15 @@ nfsd_unlink(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_unlink(dirp, rdentry); -+ if (dirp->i_op->unlink_raw) -+ err = unlink_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ -- err = vfs_rmdir(dirp, rdentry); -+ if (dirp->i_op->rmdir_raw) -+ err = rmdir_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_rmdir(dirp, rdentry); - } - - dput(rdentry); -Index: linux-2.4.21-chaos/include/linux/fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/fs.h 2003-12-12 16:19:23.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/fs.h 2003-12-12 16:19:25.000000000 +0300 -@@ -93,6 +93,8 @@ - #define FS_SINGLE 8 /* Filesystem that can have only one superblock */ - #define FS_NOMOUNT 16 /* Never mount from userland */ - #define FS_LITTER 32 /* Keeps the tree in dcache */ -+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for -+ * exporting non device filesystems. */ - #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ -@@ -1159,6 +1162,9 @@ extern int open_namei_it(const char *fil - struct nameidata *nd, struct lookup_intent *it); - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); -+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, -+ struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1458,6 +1464,8 @@ extern void path_release(struct nameidat - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, -+ struct lookup_intent *); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -@@ -1477,6 +1485,8 @@ typedef int (*find_inode_t)(struct inode - - extern struct inode * iget4_locked(struct super_block *, unsigned long, - find_inode_t, void *); -+extern struct inode * ilookup4(struct super_block *, unsigned long, -+ find_inode_t, void *); - - static inline struct inode *iget4(struct super_block *sb, unsigned long ino, - find_inode_t find_actor, void *opaque) -Index: linux-2.4.21-chaos/kernel/ksyms.c -=================================================================== ---- linux-2.4.21-chaos.orig/kernel/ksyms.c 2003-12-12 16:18:36.000000000 +0300 -+++ linux-2.4.21-chaos/kernel/ksyms.c 2003-12-12 16:19:25.000000000 +0300 -@@ -178,6 +178,7 @@ EXPORT_SYMBOL(fget); - EXPORT_SYMBOL(igrab); - EXPORT_SYMBOL(iunique); - EXPORT_SYMBOL(iget4_locked); -+EXPORT_SYMBOL(ilookup4); - EXPORT_SYMBOL(unlock_new_inode); - EXPORT_SYMBOL(iput); - EXPORT_SYMBOL(inode_init_once); -@@ -191,6 +192,7 @@ EXPORT_SYMBOL(path_walk); - EXPORT_SYMBOL(path_release); - EXPORT_SYMBOL(__user_walk); - EXPORT_SYMBOL(lookup_one_len); -+EXPORT_SYMBOL(lookup_one_len_it); - EXPORT_SYMBOL(lookup_hash); - - EXPORT_SYMBOL(sys_close); diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-suse2.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-suse2.patch deleted file mode 100644 index 74744ec..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.21-suse2.patch +++ /dev/null @@ -1,756 +0,0 @@ - fs/Makefile | 3 - fs/file_table.c | 11 ++ - fs/inode.c | 23 ++++- - fs/namei.c | 12 ++ - fs/nfsd/export.c | 5 + - fs/nfsd/nfsfh.c | 65 +++++++++++++- - fs/nfsd/vfs.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++----- - include/linux/fs.h | 11 ++ - kernel/ksyms.c | 2 - 9 files changed, 333 insertions(+), 34 deletions(-) - -Index: linux-2.4.21-chaos/fs/file_table.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/file_table.c 2003-07-15 04:41:00.000000000 +0400 -+++ linux-2.4.21-chaos/fs/file_table.c 2003-12-12 16:19:25.000000000 +0300 -@@ -82,7 +82,8 @@ - * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. - */ --int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - memset(filp, 0, sizeof(*filp)); - filp->f_mode = mode; -@@ -90,12 +91,20 @@ - filp->f_dentry = dentry; - filp->f_uid = current->fsuid; - filp->f_gid = current->fsgid; -+ if (it) -+ filp->f_it = it; - filp->f_op = dentry->d_inode->i_fop; - if (filp->f_op->open) - return filp->f_op->open(dentry->d_inode, filp); - else - return 0; - } -+EXPORT_SYMBOL(init_private_file_it); -+ -+int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+{ -+ return init_private_file_it(filp, dentry, mode, NULL); -+} - - void fput(struct file * file) - { -Index: linux-2.4.21-chaos/fs/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/inode.c 2003-12-12 16:18:15.000000000 +0300 -+++ linux-2.4.21-chaos/fs/inode.c 2003-12-12 16:19:25.000000000 +0300 -@@ -1054,9 +1054,10 @@ - return inode; - } - --struct inode *iget4_locked(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) -+struct inode *ifind(struct super_block *sb, unsigned long ino, -+ struct list_head *head, -+ find_inode_t find_actor, void *opaque) - { -- struct list_head * head = inode_hashtable + hash(sb,ino); - struct inode * inode; - - spin_lock(&inode_lock); -@@ -1069,6 +1070,24 @@ - } - spin_unlock(&inode_lock); - -+ return NULL; -+} -+ -+struct inode *ilookup4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ return ifind(sb, ino, head, find_actor, opaque); -+} -+ -+struct inode *iget4_locked(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); -+ if (inode) -+ return inode; -+ - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. -Index: linux-2.4.21-chaos/fs/Makefile -=================================================================== ---- linux-2.4.21-chaos.orig/fs/Makefile 2003-12-12 16:18:36.000000000 +0300 -+++ linux-2.4.21-chaos/fs/Makefile 2003-12-12 16:19:25.000000000 +0300 -@@ -9,7 +9,8 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o \ -+ namei.o file_table.o - mod-subdirs := nls xfs - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ -Index: linux-2.4.21-chaos/fs/namei.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/namei.c 2003-12-12 16:18:06.000000000 +0300 -+++ linux-2.4.21-chaos/fs/namei.c 2003-12-12 16:19:25.000000000 +0300 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -100,6 +101,7 @@ - it->it_op_release(it); - - } -+EXPORT_SYMBOL(intent_release); - - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the -@@ -910,7 +912,8 @@ - - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, -+ int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -930,11 +933,16 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash_it(&this, base, NULL); -+ return lookup_hash_it(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -Index: linux-2.4.21-chaos/fs/nfsd/export.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/nfsd/export.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/nfsd/export.c 2003-12-12 16:19:25.000000000 +0300 -@@ -223,6 +223,11 @@ - inode = nd.dentry->d_inode; - dev = inode->i_dev; - ino = inode->i_ino; -+ if ((inode->i_sb->s_type->fs_flags & FS_NFSEXP_FSID) && -+ !(nxp->ex_flags & NFSEXP_FSID)) { -+ nxp->ex_dev = inode->i_sb->s_dev; -+ nxp->ex_flags |= NFSEXP_FSID; -+ } - err = -EINVAL; - - exp = exp_get(clp, dev, ino); -Index: linux-2.4.21-chaos/fs/nfsd/nfsfh.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/nfsd/nfsfh.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/nfsd/nfsfh.c 2003-12-12 16:19:25.000000000 +0300 -@@ -36,6 +36,13 @@ - int sequence; /* sequence counter */ - }; - -+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) -+{ -+ if (inode->i_op->lookup_it) -+ return inode->i_op->lookup_it(inode, dentry, NULL, 0); -+ return inode->i_op->lookup(inode, dentry); -+} -+ - /* - * A rather strange filldir function to capture - * the name matching the specified inode number. -@@ -75,6 +84,8 @@ - int error; - struct file file; - struct nfsd_getdents_callback buffer; -+ struct lookup_intent it; -+ struct file *filp = NULL; - - error = -ENOTDIR; - if (!dir || !S_ISDIR(dir->i_mode)) -@@ -85,9 +96,37 @@ - /* - * Open the directory ... - */ -- error = init_private_file(&file, dentry, FMODE_READ); -- if (error) -+ if (dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && -+ (dentry->d_parent == dentry) ) { -+ it.it_op_release = NULL; -+ /* -+ * XXX Temporary Hack: Simulate init_private_file without -+ * f_op->open for disconnected dentry as we don't have -+ * actual dentry->d_name to revalidate in revalidate_it() -+ */ -+ filp = &file; -+ memset(filp, 0, sizeof(*filp)); -+ filp->f_mode = FMODE_READ; -+ atomic_set(&filp->f_count, 1); -+ filp->f_dentry = dentry; -+ filp->f_uid = current->fsuid; -+ filp->f_gid = current->fsgid; -+ filp->f_op = dentry->d_inode->i_fop; -+ error = 0; -+ } else { -+ intent_init(&it, IT_OPEN, FMODE_READ); -+ error = revalidate_it(dentry, &it); -+ if (error) -+ goto out; -+ error = init_private_file_it(&file, dentry, FMODE_READ, &it); -+ } -+ } else { -+ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); -+ } -+ if (error) - goto out; -+ - error = -EINVAL; - if (!file.f_op->readdir) - goto out_close; -@@ -113,9 +152,12 @@ - } - - out_close: -- if (file.f_op->release) -+ if (file.f_op->release && !filp) - file.f_op->release(dir, &file); - out: -+ if (dentry->d_op && dentry->d_op->d_revalidate_it && -+ it.it_op_release && !filp) -+ intent_release(&it); - return error; - } - -@@ -274,7 +317,7 @@ - * it is well connected. But nobody returns different dentrys do they? - */ - down(&child->d_inode->i_sem); -- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); -+ pdentry = lookup_it(child->d_inode, tdentry); - up(&child->d_inode->i_sem); - d_drop(tdentry); /* we never want ".." hashed */ - if (!pdentry && tdentry->d_inode == NULL) { -@@ -307,6 +350,8 @@ - pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; - pdentry->d_op = child->d_op; - } -+ if (child->d_op && child->d_op->d_revalidate_it) -+ pdentry->d_op = child->d_op; - } - if (pdentry == NULL) - pdentry = ERR_PTR(-ENOMEM); -@@ -464,6 +509,8 @@ - struct dentry *pdentry; - struct inode *parent; - -+ if (result->d_op && result->d_op->d_revalidate_it) -+ dentry->d_op = result->d_op; - pdentry = nfsd_findparent(dentry); - err = PTR_ERR(pdentry); - if (IS_ERR(pdentry)) -@@ -672,6 +719,10 @@ - - inode = dentry->d_inode; - -+ /* cache coherency for non-device filesystems */ -+ if (inode->i_op && inode->i_op->revalidate_it) -+ inode->i_op->revalidate_it(dentry, NULL); -+ - /* Type check. The correct error return for type mismatches - * does not seem to be generally agreed upon. SunOS seems to - * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 -@@ -905,8 +957,9 @@ - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - out_uptodate: -- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -- dentry->d_parent->d_name.name, dentry->d_name.name); -+ if (!dentry->d_parent->d_inode->i_op->mkdir_raw) -+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -+ dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - } - -Index: linux-2.4.21-chaos/fs/nfsd/vfs.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/nfsd/vfs.c 2003-09-19 03:49:54.000000000 +0400 -+++ linux-2.4.21-chaos/fs/nfsd/vfs.c 2003-12-12 16:19:25.000000000 +0300 -@@ -78,6 +78,127 @@ - static struct raparms * raparml; - static struct raparms * raparm_cache; - -+static int link_raw(struct dentry *dold, struct dentry *ddir, -+ struct dentry *dnew) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = dold }; -+ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->link_raw(&old_nd, &nd); -+ igrab(dold->d_inode); -+ d_instantiate(dnew, dold->d_inode); -+ if (dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) -+ dold->d_inode->i_op->revalidate_it(dnew, NULL); -+ -+ return err; -+} -+ -+static int unlink_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->unlink_raw(&nd); -+ if (!err) -+ d_delete(rdentry); -+ -+ return err; -+} -+ -+static int rmdir_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->rmdir_raw(&nd); -+ if (!err) { -+ rdentry->d_inode->i_flags |= S_DEAD; -+ d_delete(rdentry); -+ } -+ -+ return err; -+} -+ -+static int symlink_raw(struct dentry *dentry, char *fname, int flen, -+ char *path) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->symlink_raw(&nd, path); -+ -+ return err; -+} -+ -+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mkdir_raw(&nd, mode); -+ -+ return err; -+} -+ -+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, -+ dev_t dev) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mknod_raw(&nd, mode, dev); -+ -+ return err; -+} -+ -+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, -+ struct dentry *odentry, struct dentry *ndentry) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; -+ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; -+ struct inode_operations *op = old_nd.dentry->d_inode->i_op; -+ err = op->rename_raw(&old_nd, &new_nd); -+ d_move(odentry, ndentry); -+ -+ return err; -+} -+ -+static int setattr_raw(struct inode *inode, struct iattr *iap) -+{ -+ int err; -+ -+ iap->ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, iap); -+ -+ return err; -+} -+ -+int revalidate_it(struct dentry *dentry, struct lookup_intent *it) -+{ -+ int err = 0; -+ -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) && -+ !d_invalidate(dentry)) { -+ err = -EINVAL; -+ return err; -+ } -+ } -+ -+ return err; -+} -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -303,7 +425,10 @@ - } - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime) { -- err = notify_change(dentry, iap); -+ if (dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dentry->d_inode, iap); -+ else -+ err = notify_change(dentry, iap); - err = nfserrno(err); - } - if (size_change) { -@@ -430,6 +555,7 @@ - { - struct dentry *dentry; - struct inode *inode; -+ struct lookup_intent it; - int err; - - /* If we get here, then the client has already done an "open", and (hopefully) -@@ -476,6 +602,18 @@ - filp->f_mode = FMODE_READ; - } - -+#ifndef O_OWNER_OVERRIDE -+#define O_OWNER_OVERRIDE 0200000000 -+#endif -+ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode | -+ O_OWNER_OVERRIDE); -+ -+ err = revalidate_it(dentry, &it); -+ if (err) -+ goto out_nfserr; -+ -+ filp->f_it = ⁢ -+ - err = 0; - if (filp->f_op && filp->f_op->open) { - err = filp->f_op->open(inode, filp); -@@ -490,6 +624,9 @@ - } - } - out_nfserr: -+ if (it.it_op_release) -+ intent_release(&it); -+ - if (err) - err = nfserrno(err); - out: -@@ -821,7 +959,7 @@ - { - struct dentry *dentry, *dchild; - struct inode *dirp; -- int err; -+ int err, error = -EOPNOTSUPP; - - err = nfserr_perm; - if (!flen) -@@ -837,20 +975,47 @@ - dentry = fhp->fh_dentry; - dirp = dentry->d_inode; - -+ switch (type) { -+ case S_IFDIR: -+ if (dirp->i_op->mkdir_raw) -+ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ case S_IFREG: -+ if (dirp->i_op->mknod_raw) { -+ if (type == S_IFREG) -+ rdev = 0; -+ error = mknod_raw(dentry, fname,flen,iap->ia_mode,rdev); -+ } -+ break; -+ default: -+ printk("nfsd: bad file type %o in nfsd_create\n", type); -+ } -+ if (error && error != -EOPNOTSUPP) { -+ err = error; -+ goto out_nfserr; -+ } -+ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - /* - * Check whether the response file handle has been verified yet. - * If it has, the parent directory should already be locked. - */ -- if (!resfhp->fh_dentry) { -- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ -- fh_lock(fhp); -+ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { -+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create -+ * and nfsd_proc_create in case of lustre */ -+ if (!resfhp->fh_dentry) -+ fh_lock(fhp); - dchild = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; -+ resfhp->fh_dentry = NULL; - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; -@@ -871,10 +1033,12 @@ - * Make sure the child dentry is still negative ... - */ - err = nfserr_exist; -- if (dchild->d_inode) { -- dprintk("nfsd_create: dentry %s/%s not negative!\n", -- dentry->d_name.name, dchild->d_name.name); -- goto out; -+ if (error == -EOPNOTSUPP) { -+ if (dchild->d_inode) { -+ dprintk("nfsd_create: dentry %s/%s not negative!\n", -+ dentry->d_name.name, dchild->d_name.name); -+ goto out; -+ } - } - - if (!(iap->ia_valid & ATTR_MODE)) -@@ -887,16 +1051,19 @@ - err = nfserr_perm; - switch (type) { - case S_IFREG: -- err = vfs_create(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_create(dirp, dchild, iap->ia_mode); - break; - case S_IFDIR: -- err = vfs_mkdir(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mkdir(dirp, dchild, iap->ia_mode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: -- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); - break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); -@@ -965,7 +1132,13 @@ - /* Get all the sanity checks out of the way before - * we lock the parent. */ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (dirp->i_op->mknod_raw) { -+ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); -+ if (err && err != -EOPNOTSUPP) -+ goto out_nfserr; -+ } -+ -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - fh_lock(fhp); - -@@ -1016,6 +1189,8 @@ - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } -+ if (dirp->i_op->mknod_raw) -+ err = 0; - goto out; - } - -@@ -1122,7 +1297,7 @@ - struct iattr *iap) - { - struct dentry *dentry, *dnew; -- int err, cerr; -+ int err, cerr, error = -EOPNOTSUPP; - - err = nfserr_noent; - if (!flen || !plen) -@@ -1136,12 +1311,18 @@ - goto out; - fh_lock(fhp); - dentry = fhp->fh_dentry; -+ -+ if (dentry->d_inode->i_op->symlink_raw) -+ error = symlink_raw(dentry, fname, flen, path); -+ - dnew = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - -- err = vfs_symlink(dentry->d_inode, dnew, path); -+ err = error; -+ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) -+ err = vfs_symlink(dentry->d_inode, dnew, path); - if (!err) { - if (EX_ISSYNC(fhp->fh_export)) - nfsd_sync_dir(dentry); -@@ -1148,7 +1329,10 @@ nfsd_symlink(struct svc_rqst *rqstp, str - iap->ia_valid |= ATTR_CTIME; - iap->ia_mode = (iap->ia_mode&S_IALLUGO) - | S_IFLNK; -- err = notify_change(dnew, iap); -+ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dnew->d_inode, iap); -+ else -+ err = notify_change(dnew, iap); - if (err) - err = nfserrno(err); - else if (EX_ISSYNC(fhp->fh_export)) -@@ -1211,7 +1392,10 @@ - dold = tfhp->fh_dentry; - dest = dold->d_inode; - -- err = vfs_link(dold, dirp, dnew); -+ if (dirp->i_op->link_raw) -+ err = link_raw(dold, ddir, dnew); -+ else -+ err = vfs_link(dold, dirp, dnew); - if (!err) { - if (EX_ISSYNC(ffhp->fh_export)) { - nfsd_sync_dir(ddir); -@@ -1296,7 +1480,10 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ if (fdir->i_op->rename_raw) -+ err = rename_raw(fdentry, tdentry, odentry, ndentry); -+ else -+ err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); -@@ -1317,7 +1504,7 @@ - fill_post_wcc(tfhp); - double_up(&tdir->i_sem, &fdir->i_sem); - ffhp->fh_locked = tfhp->fh_locked = 0; -- -+ - out: - return err; - } -@@ -1363,9 +1550,15 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_unlink(dirp, rdentry); -+ if (dirp->i_op->unlink_raw) -+ err = unlink_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ -- err = vfs_rmdir(dirp, rdentry); -+ if (dirp->i_op->rmdir_raw) -+ err = rmdir_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_rmdir(dirp, rdentry); - } - - dput(rdentry); -Index: linux-2.4.21-chaos/include/linux/fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/fs.h 2003-12-12 16:19:23.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/fs.h 2003-12-12 16:19:25.000000000 +0300 -@@ -93,6 +93,8 @@ - #define FS_SINGLE 8 /* Filesystem that can have only one superblock */ - #define FS_NOMOUNT 16 /* Never mount from userland */ - #define FS_LITTER 32 /* Keeps the tree in dcache */ -+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for -+ * exporting non device filesystems. */ - #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ -@@ -1159,6 +1162,9 @@ - struct nameidata *nd, struct lookup_intent *it); - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); -+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, -+ struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1458,6 +1464,8 @@ - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, -+ struct lookup_intent *); - extern struct dentry * __lookup_hash(struct qstr *, struct dentry *); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) -@@ -1477,6 +1485,8 @@ - - extern struct inode * iget4_locked(struct super_block *, unsigned long, - find_inode_t, void *); -+extern struct inode * ilookup4(struct super_block *, unsigned long, -+ find_inode_t, void *); - - static inline struct inode *iget4(struct super_block *sb, unsigned long ino, - find_inode_t find_actor, void *opaque) -Index: linux-2.4.21-chaos/kernel/ksyms.c -=================================================================== ---- linux-2.4.21-chaos.orig/kernel/ksyms.c 2003-12-12 16:18:36.000000000 +0300 -+++ linux-2.4.21-chaos/kernel/ksyms.c 2003-12-12 16:19:25.000000000 +0300 -@@ -178,6 +178,7 @@ - EXPORT_SYMBOL(igrab); - EXPORT_SYMBOL(iunique); - EXPORT_SYMBOL(iget4_locked); -+EXPORT_SYMBOL(ilookup4); - EXPORT_SYMBOL(unlock_new_inode); - EXPORT_SYMBOL(iput); - EXPORT_SYMBOL(inode_init_once); -@@ -191,6 +192,7 @@ - EXPORT_SYMBOL(path_release); - EXPORT_SYMBOL(__user_walk); - EXPORT_SYMBOL(lookup_one_len); -+EXPORT_SYMBOL(lookup_one_len_it); - EXPORT_SYMBOL(lookup_hash); - - EXPORT_SYMBOL(sys_close); diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.22.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.22.patch deleted file mode 100644 index 0747db2..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.22.patch +++ /dev/null @@ -1,745 +0,0 @@ -Index: linux-2.4.22-vanilla/fs/Makefile -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/Makefile 2003-11-03 23:41:40.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/Makefile 2003-11-03 23:45:07.000000000 +0300 -@@ -7,7 +7,8 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o \ -+ namei.o file_table.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ -Index: linux-2.4.22-vanilla/fs/file_table.c -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/file_table.c 2003-05-16 05:29:12.000000000 +0400 -+++ linux-2.4.22-vanilla/fs/file_table.c 2003-11-03 23:44:38.000000000 +0300 -@@ -82,7 +82,8 @@ - * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. - */ --int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - memset(filp, 0, sizeof(*filp)); - filp->f_mode = mode; -@@ -90,12 +91,20 @@ - filp->f_dentry = dentry; - filp->f_uid = current->fsuid; - filp->f_gid = current->fsgid; -+ if (it) -+ filp->f_it = it; - filp->f_op = dentry->d_inode->i_fop; - if (filp->f_op->open) - return filp->f_op->open(dentry->d_inode, filp); - else - return 0; - } -+EXPORT_SYMBOL(init_private_file_it); -+ -+int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+{ -+ return init_private_file_it(filp, dentry, mode, NULL); -+} - - void fput(struct file * file) - { -Index: linux-2.4.22-vanilla/fs/inode.c -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/inode.c 2003-11-03 23:25:33.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/inode.c 2003-11-03 23:44:38.000000000 +0300 -@@ -970,9 +970,10 @@ - } - - --struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) -+static inline struct inode *ifind(struct super_block *sb, unsigned long ino, -+ struct list_head *head, -+ find_inode_t find_actor, void *opaque) - { -- struct list_head * head = inode_hashtable + hash(sb,ino); - struct inode * inode; - - spin_lock(&inode_lock); -@@ -985,6 +986,24 @@ - } - spin_unlock(&inode_lock); - -+ return NULL; -+} -+ -+struct inode *ilookup4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ return ifind(sb, ino, head, find_actor, opaque); -+} -+ -+struct inode *iget4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); -+ if (inode) -+ return inode; -+ - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. -Index: linux-2.4.22-vanilla/fs/namei.c -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/namei.c 2003-11-03 23:22:22.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/namei.c 2003-11-03 23:44:38.000000000 +0300 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -100,6 +101,7 @@ - it->it_op_release(it); - - } -+EXPORT_SYMBOL(intent_release); - - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the -@@ -902,7 +904,8 @@ - - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, -+ int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -922,11 +925,16 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash_it(&this, base, NULL); -+ return lookup_hash_it(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -Index: linux-2.4.22-vanilla/fs/nfsd/export.c -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/nfsd/export.c 2003-11-03 23:22:11.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/nfsd/export.c 2003-11-03 23:44:38.000000000 +0300 -@@ -223,6 +223,11 @@ - inode = nd.dentry->d_inode; - dev = inode->i_dev; - ino = inode->i_ino; -+ if ((inode->i_sb->s_type->fs_flags & FS_NFSEXP_FSID) && -+ !(nxp->ex_flags & NFSEXP_FSID)) { -+ nxp->ex_dev = inode->i_sb->s_dev; -+ nxp->ex_flags |= NFSEXP_FSID; -+ } - err = -EINVAL; - - exp = exp_get(clp, dev, ino); -Index: linux-2.4.22-vanilla/fs/nfsd/nfsfh.c -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/nfsd/nfsfh.c 2003-11-03 23:22:11.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/nfsd/nfsfh.c 2003-11-03 23:44:38.000000000 +0300 -@@ -36,6 +36,13 @@ - int sequence; /* sequence counter */ - }; - -+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) -+{ -+ if (inode->i_op->lookup_it) -+ return inode->i_op->lookup_it(inode, dentry, NULL, 0); -+ return inode->i_op->lookup(inode, dentry); -+} -+ - /* - * A rather strange filldir function to capture - * the name matching the specified inode number. -@@ -75,6 +84,8 @@ - int error; - struct file file; - struct nfsd_getdents_callback buffer; -+ struct lookup_intent it; -+ struct file *filp = NULL; - - error = -ENOTDIR; - if (!dir || !S_ISDIR(dir->i_mode)) -@@ -85,9 +96,37 @@ - /* - * Open the directory ... - */ -- error = init_private_file(&file, dentry, FMODE_READ); -- if (error) -+ if (dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && -+ (dentry->d_parent == dentry) ) { -+ it.it_op_release = NULL; -+ /* -+ * XXX Temporary Hack: Simulate init_private_file without -+ * f_op->open for disconnected dentry as we don't have -+ * actual dentry->d_name to revalidate in revalidate_it() -+ */ -+ filp = &file; -+ memset(filp, 0, sizeof(*filp)); -+ filp->f_mode = FMODE_READ; -+ atomic_set(&filp->f_count, 1); -+ filp->f_dentry = dentry; -+ filp->f_uid = current->fsuid; -+ filp->f_gid = current->fsgid; -+ filp->f_op = dentry->d_inode->i_fop; -+ error = 0; -+ } else { -+ intent_init(&it, IT_OPEN, FMODE_READ); -+ error = revalidate_it(dentry, &it); -+ if (error) -+ goto out; -+ error = init_private_file_it(&file, dentry, FMODE_READ, &it); -+ } -+ } else { -+ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); -+ } -+ if (error) - goto out; -+ - error = -EINVAL; - if (!file.f_op->readdir) - goto out_close; -@@ -113,9 +152,12 @@ - } - - out_close: -- if (file.f_op->release) -+ if (file.f_op->release && !filp) - file.f_op->release(dir, &file); - out: -+ if (dentry->d_op && dentry->d_op->d_revalidate_it && -+ it.it_op_release && !filp) -+ intent_release(&it); - return error; - } - -@@ -274,7 +317,7 @@ - * it is well connected. But nobody returns different dentrys do they? - */ - down(&child->d_inode->i_sem); -- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); -+ pdentry = lookup_it(child->d_inode, tdentry); - up(&child->d_inode->i_sem); - d_drop(tdentry); /* we never want ".." hashed */ - if (!pdentry && tdentry->d_inode == NULL) { -@@ -306,6 +349,8 @@ - igrab(tdentry->d_inode); - pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; - } -+ if (child->d_op && child->d_op->d_revalidate_it) -+ pdentry->d_op = child->d_op; - } - if (pdentry == NULL) - pdentry = ERR_PTR(-ENOMEM); -@@ -463,6 +508,8 @@ - struct dentry *pdentry; - struct inode *parent; - -+ if (result->d_op && result->d_op->d_revalidate_it) -+ dentry->d_op = result->d_op; - pdentry = nfsd_findparent(dentry); - err = PTR_ERR(pdentry); - if (IS_ERR(pdentry)) -@@ -669,6 +716,10 @@ - - inode = dentry->d_inode; - -+ /* cache coherency for non-device filesystems */ -+ if (inode->i_op && inode->i_op->revalidate_it) -+ inode->i_op->revalidate_it(dentry, NULL); -+ - /* Type check. The correct error return for type mismatches - * does not seem to be generally agreed upon. SunOS seems to - * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 -@@ -902,8 +954,9 @@ - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - out_uptodate: -- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -- dentry->d_parent->d_name.name, dentry->d_name.name); -+ if (!dentry->d_parent->d_inode->i_op->mkdir_raw) -+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -+ dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - } - -Index: linux-2.4.22-vanilla/fs/nfsd/vfs.c -=================================================================== ---- linux-2.4.22-vanilla.orig/fs/nfsd/vfs.c 2003-11-03 23:22:11.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/nfsd/vfs.c 2003-11-03 23:47:41.000000000 +0300 -@@ -77,6 +77,127 @@ - static struct raparms * raparml; - static struct raparms * raparm_cache; - -+static int link_raw(struct dentry *dold, struct dentry *ddir, -+ struct dentry *dnew) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = dold }; -+ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->link_raw(&old_nd, &nd); -+ igrab(dold->d_inode); -+ d_instantiate(dnew, dold->d_inode); -+ if (dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) -+ dold->d_inode->i_op->revalidate_it(dnew, NULL); -+ -+ return err; -+} -+ -+static int unlink_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->unlink_raw(&nd); -+ if (!err) -+ d_delete(rdentry); -+ -+ return err; -+} -+ -+static int rmdir_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->rmdir_raw(&nd); -+ if (!err) { -+ rdentry->d_inode->i_flags |= S_DEAD; -+ d_delete(rdentry); -+ } -+ -+ return err; -+} -+ -+static int symlink_raw(struct dentry *dentry, char *fname, int flen, -+ char *path) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->symlink_raw(&nd, path); -+ -+ return err; -+} -+ -+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mkdir_raw(&nd, mode); -+ -+ return err; -+} -+ -+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, -+ dev_t dev) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mknod_raw(&nd, mode, dev); -+ -+ return err; -+} -+ -+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, -+ struct dentry *odentry, struct dentry *ndentry) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; -+ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; -+ struct inode_operations *op = old_nd.dentry->d_inode->i_op; -+ err = op->rename_raw(&old_nd, &new_nd); -+ d_move(odentry, ndentry); -+ -+ return err; -+} -+ -+static int setattr_raw(struct inode *inode, struct iattr *iap) -+{ -+ int err; -+ -+ iap->ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, iap); -+ -+ return err; -+} -+ -+int revalidate_it(struct dentry *dentry, struct lookup_intent *it) -+{ -+ int err = 0; -+ -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) && -+ !d_invalidate(dentry)) { -+ err = -EINVAL; -+ return err; -+ } -+ } -+ -+ return err; -+} -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -302,7 +424,10 @@ - } - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime) { -- err = notify_change(dentry, iap); -+ if (dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dentry->d_inode, iap); -+ else -+ err = notify_change(dentry, iap); - err = nfserrno(err); - } - if (size_change) { -@@ -429,6 +554,7 @@ - { - struct dentry *dentry; - struct inode *inode; -+ struct lookup_intent it; - int err; - - /* If we get here, then the client has already done an "open", and (hopefully) -@@ -475,6 +601,18 @@ - filp->f_mode = FMODE_READ; - } - -+#ifndef O_OWNER_OVERRIDE -+#define O_OWNER_OVERRIDE 0200000000 -+#endif -+ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode | -+ O_OWNER_OVERRIDE); -+ -+ err = revalidate_it(dentry, &it); -+ if (err) -+ goto out_nfserr; -+ -+ filp->f_it = ⁢ -+ - err = 0; - if (filp->f_op && filp->f_op->open) { - err = filp->f_op->open(inode, filp); -@@ -489,6 +623,9 @@ - } - } - out_nfserr: -+ if (it.it_op_release) -+ intent_release(&it); -+ - if (err) - err = nfserrno(err); - out: -@@ -820,7 +958,7 @@ - { - struct dentry *dentry, *dchild; - struct inode *dirp; -- int err; -+ int err, error = -EOPNOTSUPP; - - err = nfserr_perm; - if (!flen) -@@ -836,20 +974,47 @@ - dentry = fhp->fh_dentry; - dirp = dentry->d_inode; - -+ switch (type) { -+ case S_IFDIR: -+ if (dirp->i_op->mkdir_raw) -+ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ case S_IFREG: -+ if (dirp->i_op->mknod_raw) { -+ if (type == S_IFREG) -+ rdev = 0; -+ error = mknod_raw(dentry, fname,flen,iap->ia_mode,rdev); -+ } -+ break; -+ default: -+ printk("nfsd: bad file type %o in nfsd_create\n", type); -+ } -+ if (error && error != -EOPNOTSUPP) { -+ err = error; -+ goto out_nfserr; -+ } -+ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - /* - * Check whether the response file handle has been verified yet. - * If it has, the parent directory should already be locked. - */ -- if (!resfhp->fh_dentry) { -- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ -- fh_lock(fhp); -+ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { -+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create -+ * and nfsd_proc_create in case of lustre */ -+ if (!resfhp->fh_dentry) -+ fh_lock(fhp); - dchild = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; -+ resfhp->fh_dentry = NULL; - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; -@@ -870,10 +1032,12 @@ - * Make sure the child dentry is still negative ... - */ - err = nfserr_exist; -- if (dchild->d_inode) { -- dprintk("nfsd_create: dentry %s/%s not negative!\n", -- dentry->d_name.name, dchild->d_name.name); -- goto out; -+ if (error == -EOPNOTSUPP) { -+ if (dchild->d_inode) { -+ dprintk("nfsd_create: dentry %s/%s not negative!\n", -+ dentry->d_name.name, dchild->d_name.name); -+ goto out; -+ } - } - - if (!(iap->ia_valid & ATTR_MODE)) -@@ -886,16 +1050,19 @@ - err = nfserr_perm; - switch (type) { - case S_IFREG: -- err = vfs_create(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_create(dirp, dchild, iap->ia_mode); - break; - case S_IFDIR: -- err = vfs_mkdir(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mkdir(dirp, dchild, iap->ia_mode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: -- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); - break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); -@@ -964,7 +1131,13 @@ - /* Get all the sanity checks out of the way before - * we lock the parent. */ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (dirp->i_op->mknod_raw) { -+ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); -+ if (err && err != -EOPNOTSUPP) -+ goto out_nfserr; -+ } -+ -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - fh_lock(fhp); - -@@ -1015,6 +1188,8 @@ - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } -+ if (dirp->i_op->mknod_raw) -+ err = 0; - goto out; - } - -@@ -1121,7 +1296,7 @@ - struct iattr *iap) - { - struct dentry *dentry, *dnew; -- int err, cerr; -+ int err, cerr, error = -EOPNOTSUPP; - - err = nfserr_noent; - if (!flen || !plen) -@@ -1135,12 +1310,18 @@ - goto out; - fh_lock(fhp); - dentry = fhp->fh_dentry; -+ -+ if (dentry->d_inode->i_op->symlink_raw) -+ error = symlink_raw(dentry, fname, flen, path); -+ - dnew = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - -- err = vfs_symlink(dentry->d_inode, dnew, path); -+ err = error; -+ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) -+ err = vfs_symlink(dentry->d_inode, dnew, path); - if (!err) { - if (EX_ISSYNC(fhp->fh_export)) - nfsd_sync_dir(dentry); -@@ -1150,7 +1331,10 @@ - iap->ia_valid |= ATTR_CTIME; - iap->ia_mode = (iap->ia_mode&S_IALLUGO) - | S_IFLNK; -- err = notify_change(dnew, iap); -+ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dnew->d_inode, iap); -+ else -+ err = notify_change(dnew, iap); - if (err) - err = nfserrno(err); - else if (EX_ISSYNC(fhp->fh_export)) -@@ -1210,7 +1394,10 @@ - dold = tfhp->fh_dentry; - dest = dold->d_inode; - -- err = vfs_link(dold, dirp, dnew); -+ if (dirp->i_op->link_raw) -+ err = link_raw(dold, ddir, dnew); -+ else -+ err = vfs_link(dold, dirp, dnew); - if (!err) { - if (EX_ISSYNC(ffhp->fh_export)) { - nfsd_sync_dir(ddir); -@@ -1295,7 +1482,10 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ if (fdir->i_op->rename_raw) -+ err = rename_raw(fdentry, tdentry, odentry, ndentry); -+ else -+ err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); -@@ -1316,7 +1506,7 @@ - fill_post_wcc(tfhp); - double_up(&tdir->i_sem, &fdir->i_sem); - ffhp->fh_locked = tfhp->fh_locked = 0; -- -+ - out: - return err; - } -@@ -1362,9 +1552,15 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_unlink(dirp, rdentry); -+ if (dirp->i_op->unlink_raw) -+ err = unlink_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ -- err = vfs_rmdir(dirp, rdentry); -+ if (dirp->i_op->rmdir_raw) -+ err = rmdir_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_rmdir(dirp, rdentry); - } - - dput(rdentry); -Index: linux-2.4.22-vanilla/include/linux/fs.h -=================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/fs.h 2003-11-03 23:41:40.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/fs.h 2003-11-03 23:44:38.000000000 +0300 -@@ -93,6 +93,8 @@ - #define FS_SINGLE 8 /* Filesystem that can have only one superblock */ - #define FS_NOMOUNT 16 /* Never mount from userland */ - #define FS_LITTER 32 /* Keeps the tree in dcache */ -+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for -+ * exporting non device filesystems. */ - #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ -@@ -1115,6 +1118,9 @@ - struct nameidata *nd, struct lookup_intent *it); - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); -+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, -+ struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1411,6 +1417,8 @@ - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, -+ struct lookup_intent *); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -@@ -1425,6 +1433,8 @@ - - typedef int (*find_inode_t)(struct inode *, unsigned long, void *); - extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *); -+extern struct inode * ilookup4(struct super_block *, unsigned long, -+ find_inode_t, void *); - static inline struct inode *iget(struct super_block *sb, unsigned long ino) - { - return iget4(sb, ino, NULL, NULL); -Index: linux-2.4.22-vanilla/kernel/ksyms.c -=================================================================== ---- linux-2.4.22-vanilla.orig/kernel/ksyms.c 2003-11-03 23:41:40.000000000 +0300 -+++ linux-2.4.22-vanilla/kernel/ksyms.c 2003-11-03 23:44:38.000000000 +0300 -@@ -149,6 +149,7 @@ - EXPORT_SYMBOL(igrab); - EXPORT_SYMBOL(iunique); - EXPORT_SYMBOL(iget4); -+EXPORT_SYMBOL(ilookup4); - EXPORT_SYMBOL(iput); - EXPORT_SYMBOL(inode_init_once); - EXPORT_SYMBOL(force_delete); -@@ -160,6 +161,7 @@ - EXPORT_SYMBOL(path_release); - EXPORT_SYMBOL(__user_walk); - EXPORT_SYMBOL(lookup_one_len); -+EXPORT_SYMBOL(lookup_one_len_it); - EXPORT_SYMBOL(lookup_hash); - EXPORT_SYMBOL(sys_close); - EXPORT_SYMBOL(dcache_lock); diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch deleted file mode 100644 index f2766d3..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch +++ /dev/null @@ -1,744 +0,0 @@ -Index: linux-2.4.29/fs/Makefile -=================================================================== ---- linux-2.4.29.orig/fs/Makefile 2005-05-03 18:16:44.000000000 +0300 -+++ linux-2.4.29/fs/Makefile 2005-05-03 18:46:09.301144016 +0300 -@@ -7,7 +7,8 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o \ -+ namei.o file_table.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ -Index: linux-2.4.29/fs/file_table.c -=================================================================== ---- linux-2.4.29.orig/fs/file_table.c 2005-05-03 16:28:21.000000000 +0300 -+++ linux-2.4.29/fs/file_table.c 2005-05-03 18:46:09.303143712 +0300 -@@ -82,7 +82,8 @@ - * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. - */ --int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - memset(filp, 0, sizeof(*filp)); - filp->f_mode = mode; -@@ -90,12 +91,20 @@ - filp->f_dentry = dentry; - filp->f_uid = current->fsuid; - filp->f_gid = current->fsgid; -+ if (it) -+ filp->f_it = it; - filp->f_op = dentry->d_inode->i_fop; - if (filp->f_op->open) - return filp->f_op->open(dentry->d_inode, filp); - else - return 0; - } -+EXPORT_SYMBOL(init_private_file_it); -+ -+int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+{ -+ return init_private_file_it(filp, dentry, mode, NULL); -+} - - void fastcall fput(struct file * file) - { -Index: linux-2.4.29/fs/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/inode.c 2005-05-03 18:16:44.000000000 +0300 -+++ linux-2.4.29/fs/inode.c 2005-05-03 18:51:36.389419040 +0300 -@@ -1139,9 +1139,10 @@ - return inode; - } - --struct inode *iget4_locked(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) -+static inline struct inode *ifind(struct super_block *sb, unsigned long ino, -+ struct list_head *head, -+ find_inode_t find_actor, void *opaque) - { -- struct list_head * head = inode_hashtable + hash(sb,ino); - struct inode * inode; - - spin_lock(&inode_lock); -@@ -1154,6 +1155,24 @@ - } - spin_unlock(&inode_lock); - -+ return NULL; -+} -+ -+struct inode *ilookup4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ return ifind(sb, ino, head, find_actor, opaque); -+} -+ -+struct inode *iget4_locked(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); -+ if (inode) -+ return inode; -+ - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. -Index: linux-2.4.29/fs/namei.c -=================================================================== ---- linux-2.4.29.orig/fs/namei.c 2005-05-03 18:16:43.000000000 +0300 -+++ linux-2.4.29/fs/namei.c 2005-05-03 18:46:09.310142648 +0300 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -100,6 +101,7 @@ - it->it_op_release(it); - - } -+EXPORT_SYMBOL(intent_release); - - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the -@@ -910,7 +912,8 @@ - - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, -+ int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -930,11 +933,16 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash_it(&this, base, NULL); -+ return lookup_hash_it(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -Index: linux-2.4.29/fs/nfsd/export.c -=================================================================== ---- linux-2.4.29.orig/fs/nfsd/export.c 2005-05-03 16:28:21.000000000 +0300 -+++ linux-2.4.29/fs/nfsd/export.c 2005-05-03 18:46:09.312142344 +0300 -@@ -223,6 +223,11 @@ - inode = nd.dentry->d_inode; - dev = inode->i_dev; - ino = inode->i_ino; -+ if ((inode->i_sb->s_type->fs_flags & FS_NFSEXP_FSID) && -+ !(nxp->ex_flags & NFSEXP_FSID)) { -+ nxp->ex_dev = inode->i_sb->s_dev; -+ nxp->ex_flags |= NFSEXP_FSID; -+ } - err = -EINVAL; - - exp = exp_get(clp, dev, ino); -Index: linux-2.4.29/fs/nfsd/nfsfh.c -=================================================================== ---- linux-2.4.29.orig/fs/nfsd/nfsfh.c 2005-05-03 16:28:21.000000000 +0300 -+++ linux-2.4.29/fs/nfsd/nfsfh.c 2005-05-03 18:46:09.315141888 +0300 -@@ -36,6 +36,13 @@ - int sequence; /* sequence counter */ - }; - -+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) -+{ -+ if (inode->i_op->lookup_it) -+ return inode->i_op->lookup_it(inode, dentry, NULL, 0); -+ return inode->i_op->lookup(inode, dentry); -+} -+ - /* - * A rather strange filldir function to capture - * the name matching the specified inode number. -@@ -75,6 +82,8 @@ - int error; - struct file file; - struct nfsd_getdents_callback buffer; -+ struct lookup_intent it; -+ struct file *filp = NULL; - - error = -ENOTDIR; - if (!dir || !S_ISDIR(dir->i_mode)) -@@ -85,9 +94,37 @@ - /* - * Open the directory ... - */ -- error = init_private_file(&file, dentry, FMODE_READ); -+ if (dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && -+ (dentry->d_parent == dentry) ) { -+ it.it_op_release = NULL; -+ /* -+ * XXX Temporary Hack: Simulate init_private_file without -+ * f_op->open for disconnected dentry as we don't have -+ * actual dentry->d_name to revalidate in revalidate_it() -+ */ -+ filp = &file; -+ memset(filp, 0, sizeof(*filp)); -+ filp->f_mode = FMODE_READ; -+ atomic_set(&filp->f_count, 1); -+ filp->f_dentry = dentry; -+ filp->f_uid = current->fsuid; -+ filp->f_gid = current->fsgid; -+ filp->f_op = dentry->d_inode->i_fop; -+ error = 0; -+ } else { -+ intent_init(&it, IT_OPEN, FMODE_READ); -+ error = revalidate_it(dentry, &it); -+ if (error) -+ goto out; -+ error = init_private_file_it(&file, dentry, FMODE_READ, &it); -+ } -+ } else { -+ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); -+ } - if (error) - goto out; -+ - error = -EINVAL; - if (!file.f_op->readdir) - goto out_close; -@@ -113,9 +150,12 @@ - } - - out_close: -- if (file.f_op->release) -+ if (file.f_op->release && !filp) - file.f_op->release(dir, &file); - out: -+ if (dentry->d_op && dentry->d_op->d_revalidate_it && -+ it.it_op_release && !filp) -+ intent_release(&it); - return error; - } - -@@ -274,7 +314,7 @@ - * it is well connected. But nobody returns different dentrys do they? - */ - down(&child->d_inode->i_sem); -- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); -+ pdentry = lookup_it(child->d_inode, tdentry); - up(&child->d_inode->i_sem); - d_drop(tdentry); /* we never want ".." hashed */ - if (!pdentry && tdentry->d_inode == NULL) { -@@ -307,6 +347,8 @@ - pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; - pdentry->d_op = child->d_op; - } -+ if (child->d_op && child->d_op->d_revalidate_it) -+ pdentry->d_op = child->d_op; - } - if (pdentry == NULL) - pdentry = ERR_PTR(-ENOMEM); -@@ -464,6 +506,8 @@ - struct dentry *pdentry; - struct inode *parent; - -+ if (result->d_op && result->d_op->d_revalidate_it) -+ dentry->d_op = result->d_op; - pdentry = nfsd_findparent(dentry); - err = PTR_ERR(pdentry); - if (IS_ERR(pdentry)) -@@ -670,6 +714,10 @@ - - inode = dentry->d_inode; - -+ /* cache coherency for non-device filesystems */ -+ if (inode->i_op && inode->i_op->revalidate_it) -+ inode->i_op->revalidate_it(dentry, NULL); -+ - /* Type check. The correct error return for type mismatches - * does not seem to be generally agreed upon. SunOS seems to - * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 -@@ -903,8 +951,9 @@ - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - out_uptodate: -- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -- dentry->d_parent->d_name.name, dentry->d_name.name); -+ if (!dentry->d_parent->d_inode->i_op->mkdir_raw) -+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -+ dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - } - -Index: linux-2.4.29/fs/nfsd/vfs.c -=================================================================== ---- linux-2.4.29.orig/fs/nfsd/vfs.c 2005-05-03 16:28:21.000000000 +0300 -+++ linux-2.4.29/fs/nfsd/vfs.c 2005-05-03 18:46:09.372133224 +0300 -@@ -77,6 +77,127 @@ - static struct raparms * raparml; - static struct raparms * raparm_cache; - -+static int link_raw(struct dentry *dold, struct dentry *ddir, -+ struct dentry *dnew) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = dold }; -+ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->link_raw(&old_nd, &nd); -+ igrab(dold->d_inode); -+ d_instantiate(dnew, dold->d_inode); -+ if (dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) -+ dold->d_inode->i_op->revalidate_it(dnew, NULL); -+ -+ return err; -+} -+ -+static int unlink_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->unlink_raw(&nd); -+ if (!err) -+ d_delete(rdentry); -+ -+ return err; -+} -+ -+static int rmdir_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->rmdir_raw(&nd); -+ if (!err) { -+ rdentry->d_inode->i_flags |= S_DEAD; -+ d_delete(rdentry); -+ } -+ -+ return err; -+} -+ -+static int symlink_raw(struct dentry *dentry, char *fname, int flen, -+ char *path) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->symlink_raw(&nd, path); -+ -+ return err; -+} -+ -+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mkdir_raw(&nd, mode); -+ -+ return err; -+} -+ -+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, -+ dev_t dev) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mknod_raw(&nd, mode, dev); -+ -+ return err; -+} -+ -+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, -+ struct dentry *odentry, struct dentry *ndentry) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; -+ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; -+ struct inode_operations *op = old_nd.dentry->d_inode->i_op; -+ err = op->rename_raw(&old_nd, &new_nd); -+ d_move(odentry, ndentry); -+ -+ return err; -+} -+ -+static int setattr_raw(struct inode *inode, struct iattr *iap) -+{ -+ int err; -+ -+ iap->ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, iap); -+ -+ return err; -+} -+ -+int revalidate_it(struct dentry *dentry, struct lookup_intent *it) -+{ -+ int err = 0; -+ -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) && -+ !d_invalidate(dentry)) { -+ err = -EINVAL; -+ return err; -+ } -+ } -+ -+ return err; -+} -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -302,7 +422,10 @@ - } - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime) { -- err = notify_change(dentry, iap); -+ if (dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dentry->d_inode, iap); -+ else -+ err = notify_change(dentry, iap); - err = nfserrno(err); - } - if (size_change) { -@@ -429,6 +552,7 @@ - { - struct dentry *dentry; - struct inode *inode; -+ struct lookup_intent it; - int err; - - /* If we get here, then the client has already done an "open", and (hopefully) -@@ -475,6 +599,18 @@ - filp->f_mode = FMODE_READ; - } - -+#ifndef O_OWNER_OVERRIDE -+#define O_OWNER_OVERRIDE 0200000000 -+#endif -+ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode | -+ O_OWNER_OVERRIDE); -+ -+ err = revalidate_it(dentry, &it); -+ if (err) -+ goto out_nfserr; -+ -+ filp->f_it = ⁢ -+ - err = 0; - if (filp->f_op && filp->f_op->open) { - err = filp->f_op->open(inode, filp); -@@ -490,6 +626,9 @@ - } - } - out_nfserr: -+ if (it.it_op_release) -+ intent_release(&it); -+ - if (err) - err = nfserrno(err); - out: -@@ -837,7 +976,7 @@ - { - struct dentry *dentry, *dchild; - struct inode *dirp; -- int err; -+ int err, error = -EOPNOTSUPP; - - err = nfserr_perm; - if (!flen) -@@ -853,20 +992,47 @@ - dentry = fhp->fh_dentry; - dirp = dentry->d_inode; - -+ switch (type) { -+ case S_IFDIR: -+ if (dirp->i_op->mkdir_raw) -+ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ case S_IFREG: -+ if (dirp->i_op->mknod_raw) { -+ if (type == S_IFREG) -+ rdev = 0; -+ error = mknod_raw(dentry, fname,flen,iap->ia_mode,rdev); -+ } -+ break; -+ default: -+ printk("nfsd: bad file type %o in nfsd_create\n", type); -+ } -+ if (error && error != -EOPNOTSUPP) { -+ err = error; -+ goto out_nfserr; -+ } -+ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - /* - * Check whether the response file handle has been verified yet. - * If it has, the parent directory should already be locked. - */ -- if (!resfhp->fh_dentry) { -- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ -- fh_lock(fhp); -+ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { -+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create -+ * and nfsd_proc_create in case of lustre */ -+ if (!resfhp->fh_dentry) -+ fh_lock(fhp); - dchild = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; -+ resfhp->fh_dentry = NULL; - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; -@@ -887,10 +1053,12 @@ - * Make sure the child dentry is still negative ... - */ - err = nfserr_exist; -- if (dchild->d_inode) { -- dprintk("nfsd_create: dentry %s/%s not negative!\n", -- dentry->d_name.name, dchild->d_name.name); -- goto out; -+ if (error == -EOPNOTSUPP) { -+ if (dchild->d_inode) { -+ dprintk("nfsd_create: dentry %s/%s not negative!\n", -+ dentry->d_name.name, dchild->d_name.name); -+ goto out; -+ } - } - - if (!(iap->ia_valid & ATTR_MODE)) -@@ -903,16 +1071,19 @@ - err = nfserr_perm; - switch (type) { - case S_IFREG: -- err = vfs_create(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_create(dirp, dchild, iap->ia_mode); - break; - case S_IFDIR: -- err = vfs_mkdir(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mkdir(dirp, dchild, iap->ia_mode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: -- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); - break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); -@@ -981,7 +1152,13 @@ - /* Get all the sanity checks out of the way before - * we lock the parent. */ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (dirp->i_op->mknod_raw) { -+ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); -+ if (err && err != -EOPNOTSUPP) -+ goto out_nfserr; -+ } -+ -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - fh_lock(fhp); - -@@ -1032,6 +1209,8 @@ - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } -+ if (dirp->i_op->mknod_raw) -+ err = 0; - goto out; - } - -@@ -1138,7 +1317,7 @@ - struct iattr *iap) - { - struct dentry *dentry, *dnew; -- int err, cerr; -+ int err, cerr, error = -EOPNOTSUPP; - - err = nfserr_noent; - if (!flen || !plen) -@@ -1152,12 +1331,18 @@ - goto out; - fh_lock(fhp); - dentry = fhp->fh_dentry; -+ -+ if (dentry->d_inode->i_op->symlink_raw) -+ error = symlink_raw(dentry, fname, flen, path); -+ - dnew = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - -- err = vfs_symlink(dentry->d_inode, dnew, path); -+ err = error; -+ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) -+ err = vfs_symlink(dentry->d_inode, dnew, path); - if (!err) { - if (EX_ISSYNC(fhp->fh_export)) - nfsd_sync_dir(dentry); -@@ -1167,7 +1352,10 @@ - iap->ia_valid |= ATTR_CTIME; - iap->ia_mode = (iap->ia_mode&S_IALLUGO) - | S_IFLNK; -- err = notify_change(dnew, iap); -+ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dnew->d_inode, iap); -+ else -+ err = notify_change(dnew, iap); - if (err) - err = nfserrno(err); - else if (EX_ISSYNC(fhp->fh_export)) -@@ -1227,7 +1415,10 @@ - dold = tfhp->fh_dentry; - dest = dold->d_inode; - -- err = vfs_link(dold, dirp, dnew); -+ if (dirp->i_op->link_raw) -+ err = link_raw(dold, ddir, dnew); -+ else -+ err = vfs_link(dold, dirp, dnew); - if (!err) { - if (EX_ISSYNC(ffhp->fh_export)) { - nfsd_sync_dir(ddir); -@@ -1312,7 +1503,10 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ if (fdir->i_op->rename_raw) -+ err = rename_raw(fdentry, tdentry, odentry, ndentry); -+ else -+ err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); -@@ -1333,7 +1527,7 @@ - fill_post_wcc(tfhp); - double_up(&tdir->i_sem, &fdir->i_sem); - ffhp->fh_locked = tfhp->fh_locked = 0; -- -+ - out: - return err; - } -@@ -1379,9 +1573,15 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_unlink(dirp, rdentry); -+ if (dirp->i_op->unlink_raw) -+ err = unlink_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ -- err = vfs_rmdir(dirp, rdentry); -+ if (dirp->i_op->rmdir_raw) -+ err = rmdir_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_rmdir(dirp, rdentry); - } - - dput(rdentry); -Index: linux-2.4.29/include/linux/fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/fs.h 2005-05-03 18:16:44.000000000 +0300 -+++ linux-2.4.29/include/linux/fs.h 2005-05-03 18:52:56.016313912 +0300 -@@ -93,6 +93,8 @@ - #define FS_SINGLE 8 /* Filesystem that can have only one superblock */ - #define FS_NOMOUNT 16 /* Never mount from userland */ - #define FS_LITTER 32 /* Keeps the tree in dcache */ -+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for -+ * exporting non device filesystems. */ - #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ -@@ -1124,6 +1126,9 @@ - struct nameidata *nd, struct lookup_intent *it); - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); -+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, -+ struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1423,6 +1428,8 @@ - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, -+ struct lookup_intent *); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -@@ -1443,6 +1450,8 @@ - - extern struct inode * iget4_locked(struct super_block *, unsigned long, - find_inode_t, void *); -+ extern struct inode * ilookup4(struct super_block *, unsigned long, -+ find_inode_t, void *); - - static inline struct inode *iget4(struct super_block *sb, unsigned long ino, - find_inode_t find_actor, void *opaque) -Index: linux-2.4.29/kernel/ksyms.c -=================================================================== ---- linux-2.4.29.orig/kernel/ksyms.c 2005-05-03 18:16:44.000000000 +0300 -+++ linux-2.4.29/kernel/ksyms.c 2005-05-03 18:52:05.377012256 +0300 -@@ -150,6 +150,7 @@ - EXPORT_SYMBOL(iunique); - EXPORT_SYMBOL(ilookup); - EXPORT_SYMBOL(iget4_locked); -+EXPORT_SYMBOL(ilookup4); - EXPORT_SYMBOL(unlock_new_inode); - EXPORT_SYMBOL(iput); - EXPORT_SYMBOL(inode_init_once); -@@ -164,6 +165,7 @@ - EXPORT_SYMBOL(path_release); - EXPORT_SYMBOL(__user_walk); - EXPORT_SYMBOL(lookup_one_len); -+EXPORT_SYMBOL(lookup_one_len_it); - EXPORT_SYMBOL(lookup_hash); - EXPORT_SYMBOL(sys_close); - EXPORT_SYMBOL(dcache_lock); diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch-1 b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch-1 deleted file mode 100644 index 6b254ec..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.29.patch-1 +++ /dev/null @@ -1,730 +0,0 @@ -Index: linux-2.4.29/fs/Makefile -=================================================================== ---- linux-2.4.29.orig/fs/Makefile 2005-04-07 19:31:00.000000000 +0300 -+++ linux-2.4.29/fs/Makefile 2005-05-03 15:59:07.943621928 +0300 -@@ -7,7 +7,8 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o \ -+ namei.o file_table.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ -Index: linux-2.4.29/fs/file_table.c -=================================================================== ---- linux-2.4.29.orig/fs/file_table.c 2005-04-07 18:52:26.000000000 +0300 -+++ linux-2.4.29/fs/file_table.c 2005-05-03 15:59:07.945621624 +0300 -@@ -82,7 +82,8 @@ - * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. - */ --int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - memset(filp, 0, sizeof(*filp)); - filp->f_mode = mode; -@@ -90,12 +91,20 @@ - filp->f_dentry = dentry; - filp->f_uid = current->fsuid; - filp->f_gid = current->fsgid; -+ if (it) -+ filp->f_it = it; - filp->f_op = dentry->d_inode->i_fop; - if (filp->f_op->open) - return filp->f_op->open(dentry->d_inode, filp); - else - return 0; - } -+EXPORT_SYMBOL(init_private_file_it); -+ -+int init_private_file(struct file *filp, struct dentry *dentry, int mode) -+{ -+ return init_private_file_it(filp, dentry, mode, NULL); -+} - - void fastcall fput(struct file * file) - { -Index: linux-2.4.29/fs/inode.c -=================================================================== ---- linux-2.4.29.orig/fs/inode.c 2005-04-07 19:18:51.000000000 +0300 -+++ linux-2.4.29/fs/inode.c 2005-05-03 16:02:40.198354304 +0300 -@@ -1154,6 +1154,24 @@ - } - spin_unlock(&inode_lock); - -+ return NULL; -+} -+ -+struct inode *ilookup4(struct super_block *sb, unsigned long ino, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct list_head * head = inode_hashtable + hash(sb,ino); -+ return ifind(sb, ino, head, find_actor, opaque); -+} -+ -+static inline struct inode *ifind(struct super_block *sb, unsigned long ino, -+ struct list_head *head, -+ find_inode_t find_actor, void *opaque) -+{ -+ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); -+ if (inode) -+ return inode; -+ - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. -Index: linux-2.4.29/fs/namei.c -=================================================================== ---- linux-2.4.29.orig/fs/namei.c 2005-04-07 19:14:06.000000000 +0300 -+++ linux-2.4.29/fs/namei.c 2005-05-03 15:59:07.953620408 +0300 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -100,6 +101,7 @@ - it->it_op_release(it); - - } -+EXPORT_SYMBOL(intent_release); - - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the -@@ -910,7 +912,8 @@ - - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, -+ int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -930,11 +933,16 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash_it(&this, base, NULL); -+ return lookup_hash_it(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -Index: linux-2.4.29/fs/nfsd/export.c -=================================================================== ---- linux-2.4.29.orig/fs/nfsd/export.c 2005-04-07 18:53:59.000000000 +0300 -+++ linux-2.4.29/fs/nfsd/export.c 2005-05-03 15:59:07.955620104 +0300 -@@ -223,6 +223,11 @@ - inode = nd.dentry->d_inode; - dev = inode->i_dev; - ino = inode->i_ino; -+ if ((inode->i_sb->s_type->fs_flags & FS_NFSEXP_FSID) && -+ !(nxp->ex_flags & NFSEXP_FSID)) { -+ nxp->ex_dev = inode->i_sb->s_dev; -+ nxp->ex_flags |= NFSEXP_FSID; -+ } - err = -EINVAL; - - exp = exp_get(clp, dev, ino); -Index: linux-2.4.29/fs/nfsd/nfsfh.c -=================================================================== ---- linux-2.4.29.orig/fs/nfsd/nfsfh.c 2005-04-07 18:53:14.000000000 +0300 -+++ linux-2.4.29/fs/nfsd/nfsfh.c 2005-05-03 15:59:07.958619648 +0300 -@@ -36,6 +36,13 @@ - int sequence; /* sequence counter */ - }; - -+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) -+{ -+ if (inode->i_op->lookup_it) -+ return inode->i_op->lookup_it(inode, dentry, NULL, 0); -+ return inode->i_op->lookup(inode, dentry); -+} -+ - /* - * A rather strange filldir function to capture - * the name matching the specified inode number. -@@ -75,6 +82,8 @@ - int error; - struct file file; - struct nfsd_getdents_callback buffer; -+ struct lookup_intent it; -+ struct file *filp = NULL; - - error = -ENOTDIR; - if (!dir || !S_ISDIR(dir->i_mode)) -@@ -85,9 +94,37 @@ - /* - * Open the directory ... - */ -- error = init_private_file(&file, dentry, FMODE_READ); -+ if (dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && -+ (dentry->d_parent == dentry) ) { -+ it.it_op_release = NULL; -+ /* -+ * XXX Temporary Hack: Simulate init_private_file without -+ * f_op->open for disconnected dentry as we don't have -+ * actual dentry->d_name to revalidate in revalidate_it() -+ */ -+ filp = &file; -+ memset(filp, 0, sizeof(*filp)); -+ filp->f_mode = FMODE_READ; -+ atomic_set(&filp->f_count, 1); -+ filp->f_dentry = dentry; -+ filp->f_uid = current->fsuid; -+ filp->f_gid = current->fsgid; -+ filp->f_op = dentry->d_inode->i_fop; -+ error = 0; -+ } else { -+ intent_init(&it, IT_OPEN, FMODE_READ); -+ error = revalidate_it(dentry, &it); -+ if (error) -+ goto out; -+ error = init_private_file_it(&file, dentry, FMODE_READ, &it); -+ } -+ } else { -+ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); -+ } - if (error) - goto out; -+ - error = -EINVAL; - if (!file.f_op->readdir) - goto out_close; -@@ -113,9 +150,12 @@ - } - - out_close: -- if (file.f_op->release) -+ if (file.f_op->release && !filp) - file.f_op->release(dir, &file); - out: -+ if (dentry->d_op && dentry->d_op->d_revalidate_it && -+ it.it_op_release && !filp) -+ intent_release(&it); - return error; - } - -@@ -274,7 +314,7 @@ - * it is well connected. But nobody returns different dentrys do they? - */ - down(&child->d_inode->i_sem); -- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); -+ pdentry = lookup_it(child->d_inode, tdentry); - up(&child->d_inode->i_sem); - d_drop(tdentry); /* we never want ".." hashed */ - if (!pdentry && tdentry->d_inode == NULL) { -@@ -307,6 +347,8 @@ - pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; - pdentry->d_op = child->d_op; - } -+ if (child->d_op && child->d_op->d_revalidate_it) -+ pdentry->d_op = child->d_op; - } - if (pdentry == NULL) - pdentry = ERR_PTR(-ENOMEM); -@@ -464,6 +506,8 @@ - struct dentry *pdentry; - struct inode *parent; - -+ if (result->d_op && result->d_op->d_revalidate_it) -+ dentry->d_op = result->d_op; - pdentry = nfsd_findparent(dentry); - err = PTR_ERR(pdentry); - if (IS_ERR(pdentry)) -@@ -670,6 +714,10 @@ - - inode = dentry->d_inode; - -+ /* cache coherency for non-device filesystems */ -+ if (inode->i_op && inode->i_op->revalidate_it) -+ inode->i_op->revalidate_it(dentry, NULL); -+ - /* Type check. The correct error return for type mismatches - * does not seem to be generally agreed upon. SunOS seems to - * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 -@@ -903,8 +951,9 @@ - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - out_uptodate: -- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -- dentry->d_parent->d_name.name, dentry->d_name.name); -+ if (!dentry->d_parent->d_inode->i_op->mkdir_raw) -+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", -+ dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; - } - -Index: linux-2.4.29/fs/nfsd/vfs.c -=================================================================== ---- linux-2.4.29.orig/fs/nfsd/vfs.c 2005-04-07 18:53:19.000000000 +0300 -+++ linux-2.4.29/fs/nfsd/vfs.c 2005-05-03 15:59:07.965618584 +0300 -@@ -77,6 +77,126 @@ - static struct raparms * raparml; - static struct raparms * raparm_cache; - -+static int link_raw(struct dentry *dold, struct dentry *ddir, -+ struct dentry *dnew) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = dold }; -+ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->link_raw(&old_nd, &nd); -+ d_instantiate(dnew, dold->d_inode); -+ if (dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) -+ dold->d_inode->i_op->revalidate_it(dnew, NULL); -+ -+ return err; -+} -+ -+static int unlink_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->unlink_raw(&nd); -+ if (!err) -+ d_delete(rdentry); -+ -+ return err; -+} -+ -+static int rmdir_raw(struct dentry *dentry, char *fname, int flen, -+ struct dentry *rdentry) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->rmdir_raw(&nd); -+ if (!err) { -+ rdentry->d_inode->i_flags |= S_DEAD; -+ d_delete(rdentry); -+ } -+ -+ return err; -+} -+ -+static int symlink_raw(struct dentry *dentry, char *fname, int flen, -+ char *path) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->symlink_raw(&nd, path); -+ -+ return err; -+} -+ -+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mkdir_raw(&nd, mode); -+ -+ return err; -+} -+ -+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, -+ dev_t dev) -+{ -+ int err; -+ struct qstr last = { .name = fname, .len = flen }; -+ struct nameidata nd = { .dentry = dentry, .last = last }; -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ err = op->mknod_raw(&nd, mode, dev); -+ -+ return err; -+} -+ -+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, -+ struct dentry *odentry, struct dentry *ndentry) -+{ -+ int err; -+ -+ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; -+ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; -+ struct inode_operations *op = old_nd.dentry->d_inode->i_op; -+ err = op->rename_raw(&old_nd, &new_nd); -+ d_move(odentry, ndentry); -+ -+ return err; -+} -+ -+static int setattr_raw(struct inode *inode, struct iattr *iap) -+{ -+ int err; -+ -+ iap->ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, iap); -+ -+ return err; -+} -+ -+int revalidate_it(struct dentry *dentry, struct lookup_intent *it) -+{ -+ int err = 0; -+ -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) && -+ !d_invalidate(dentry)) { -+ err = -EINVAL; -+ return err; -+ } -+ } -+ -+ return err; -+} -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -302,7 +422,10 @@ - } - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime) { -- err = notify_change(dentry, iap); -+ if (dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dentry->d_inode, iap); -+ else -+ err = notify_change(dentry, iap); - err = nfserrno(err); - } - if (size_change) { -@@ -429,6 +552,7 @@ - { - struct dentry *dentry; - struct inode *inode; -+ struct lookup_intent it; - int err; - - /* If we get here, then the client has already done an "open", and (hopefully) -@@ -475,6 +599,18 @@ - filp->f_mode = FMODE_READ; - } - -+#ifndef O_OWNER_OVERRIDE -+#define O_OWNER_OVERRIDE 0200000000 -+#endif -+ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode | -+ O_OWNER_OVERRIDE); -+ -+ err = revalidate_it(dentry, &it); -+ if (err) -+ goto out_nfserr; -+ -+ filp->f_it = ⁢ -+ - err = 0; - if (filp->f_op && filp->f_op->open) { - err = filp->f_op->open(inode, filp); -@@ -490,6 +626,9 @@ - } - } - out_nfserr: -+ if (it.it_op_release) -+ intent_release(&it); -+ - if (err) - err = nfserrno(err); - out: -@@ -837,7 +976,7 @@ - { - struct dentry *dentry, *dchild; - struct inode *dirp; -- int err; -+ int err, error = -EOPNOTSUPP; - - err = nfserr_perm; - if (!flen) -@@ -853,20 +992,47 @@ - dentry = fhp->fh_dentry; - dirp = dentry->d_inode; - -+ switch (type) { -+ case S_IFDIR: -+ if (dirp->i_op->mkdir_raw) -+ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ case S_IFREG: -+ if (dirp->i_op->mknod_raw) { -+ if (type == S_IFREG) -+ rdev = 0; -+ error = mknod_raw(dentry, fname,flen,iap->ia_mode,rdev); -+ } -+ break; -+ default: -+ printk("nfsd: bad file type %o in nfsd_create\n", type); -+ } -+ if (error && error != -EOPNOTSUPP) { -+ err = error; -+ goto out_nfserr; -+ } -+ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - /* - * Check whether the response file handle has been verified yet. - * If it has, the parent directory should already be locked. - */ -- if (!resfhp->fh_dentry) { -- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ -- fh_lock(fhp); -+ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { -+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create -+ * and nfsd_proc_create in case of lustre */ -+ if (!resfhp->fh_dentry) -+ fh_lock(fhp); - dchild = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; -+ resfhp->fh_dentry = NULL; - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; -@@ -887,10 +1053,12 @@ - * Make sure the child dentry is still negative ... - */ - err = nfserr_exist; -- if (dchild->d_inode) { -- dprintk("nfsd_create: dentry %s/%s not negative!\n", -- dentry->d_name.name, dchild->d_name.name); -- goto out; -+ if (error == -EOPNOTSUPP) { -+ if (dchild->d_inode) { -+ dprintk("nfsd_create: dentry %s/%s not negative!\n", -+ dentry->d_name.name, dchild->d_name.name); -+ goto out; -+ } - } - - if (!(iap->ia_valid & ATTR_MODE)) -@@ -903,16 +1071,19 @@ - err = nfserr_perm; - switch (type) { - case S_IFREG: -- err = vfs_create(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_create(dirp, dchild, iap->ia_mode); - break; - case S_IFDIR: -- err = vfs_mkdir(dirp, dchild, iap->ia_mode); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mkdir(dirp, dchild, iap->ia_mode); - break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: -- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); -+ if (error == -EOPNOTSUPP) -+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); - break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); -@@ -981,7 +1152,13 @@ - /* Get all the sanity checks out of the way before - * we lock the parent. */ - err = nfserr_notdir; -- if(!dirp->i_op || !dirp->i_op->lookup) -+ if (dirp->i_op->mknod_raw) { -+ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); -+ if (err && err != -EOPNOTSUPP) -+ goto out_nfserr; -+ } -+ -+ if (!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) - goto out; - fh_lock(fhp); - -@@ -1032,6 +1209,8 @@ - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } -+ if (dirp->i_op->mknod_raw) -+ err = 0; - goto out; - } - -@@ -1138,7 +1317,7 @@ - struct iattr *iap) - { - struct dentry *dentry, *dnew; -- int err, cerr; -+ int err, cerr, error = -EOPNOTSUPP; - - err = nfserr_noent; - if (!flen || !plen) -@@ -1152,12 +1331,18 @@ - goto out; - fh_lock(fhp); - dentry = fhp->fh_dentry; -+ -+ if (dentry->d_inode->i_op->symlink_raw) -+ error = symlink_raw(dentry, fname, flen, path); -+ - dnew = lookup_one_len(fname, dentry, flen); - err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - -- err = vfs_symlink(dentry->d_inode, dnew, path); -+ err = error; -+ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) -+ err = vfs_symlink(dentry->d_inode, dnew, path); - if (!err) { - if (EX_ISSYNC(fhp->fh_export)) - nfsd_sync_dir(dentry); -@@ -1167,7 +1352,10 @@ - iap->ia_valid |= ATTR_CTIME; - iap->ia_mode = (iap->ia_mode&S_IALLUGO) - | S_IFLNK; -- err = notify_change(dnew, iap); -+ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) -+ err = setattr_raw(dnew->d_inode, iap); -+ else -+ err = notify_change(dnew, iap); - if (err) - err = nfserrno(err); - else if (EX_ISSYNC(fhp->fh_export)) -@@ -1227,7 +1415,10 @@ - dold = tfhp->fh_dentry; - dest = dold->d_inode; - -- err = vfs_link(dold, dirp, dnew); -+ if (dirp->i_op->link_raw) -+ err = link_raw(dold, ddir, dnew); -+ else -+ err = vfs_link(dold, dirp, dnew); - if (!err) { - if (EX_ISSYNC(ffhp->fh_export)) { - nfsd_sync_dir(ddir); -@@ -1312,7 +1503,10 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ if (fdir->i_op->rename_raw) -+ err = rename_raw(fdentry, tdentry, odentry, ndentry); -+ else -+ err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); -@@ -1333,7 +1527,7 @@ - fill_post_wcc(tfhp); - double_up(&tdir->i_sem, &fdir->i_sem); - ffhp->fh_locked = tfhp->fh_locked = 0; -- -+ - out: - return err; - } -@@ -1379,9 +1573,15 @@ - err = nfserr_perm; - } else - #endif -- err = vfs_unlink(dirp, rdentry); -+ if (dirp->i_op->unlink_raw) -+ err = unlink_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ -- err = vfs_rmdir(dirp, rdentry); -+ if (dirp->i_op->rmdir_raw) -+ err = rmdir_raw(dentry, fname, flen, rdentry); -+ else -+ err = vfs_rmdir(dirp, rdentry); - } - - dput(rdentry); -Index: linux-2.4.29/include/linux/fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/fs.h 2005-04-07 19:31:00.000000000 +0300 -+++ linux-2.4.29/include/linux/fs.h 2005-05-03 16:05:36.094614008 +0300 -@@ -93,6 +93,8 @@ - #define FS_SINGLE 8 /* Filesystem that can have only one superblock */ - #define FS_NOMOUNT 16 /* Never mount from userland */ - #define FS_LITTER 32 /* Keeps the tree in dcache */ -+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for -+ * exporting non device filesystems. */ - #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ -@@ -1118,6 +1120,9 @@ - struct nameidata *nd, struct lookup_intent *it); - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); -+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, -+ struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1417,6 +1422,8 @@ - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, -+ struct lookup_intent *); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -@@ -1437,6 +1444,8 @@ - - extern struct inode * iget4_locked(struct super_block *, unsigned long, - find_inode_t, void *); -+extern struct inode * ilookup4(struct super_block *, unsigned long, -+ find_inode_t, void *); - - static inline struct inode *iget4(struct super_block *sb, unsigned long ino, - find_inode_t find_actor, void *opaque) -Index: linux-2.4.29/kernel/ksyms.c -=================================================================== ---- linux-2.4.29.orig/kernel/ksyms.c 2005-04-07 19:31:00.000000000 +0300 -+++ linux-2.4.29/kernel/ksyms.c 2005-05-03 16:04:09.445786632 +0300 -@@ -151,6 +151,7 @@ - EXPORT_SYMBOL(ilookup); - EXPORT_SYMBOL(iget4_locked); - EXPORT_SYMBOL(unlock_new_inode); -+EXPORT_SYMBOL(ilookup4); - EXPORT_SYMBOL(iput); - EXPORT_SYMBOL(inode_init_once); - EXPORT_SYMBOL(__inode_init_once); -@@ -164,6 +165,7 @@ - EXPORT_SYMBOL(path_release); - EXPORT_SYMBOL(__user_walk); - EXPORT_SYMBOL(lookup_one_len); -+EXPORT_SYMBOL(lookup_one_len_it); - EXPORT_SYMBOL(lookup_hash); - EXPORT_SYMBOL(sys_close); - EXPORT_SYMBOL(dcache_lock); diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-getattr_on_lookup-2.4.patch b/lustre/kernel_patches/patches/nfs_export_kernel-getattr_on_lookup-2.4.patch deleted file mode 100644 index 2becbc4..0000000 --- a/lustre/kernel_patches/patches/nfs_export_kernel-getattr_on_lookup-2.4.patch +++ /dev/null @@ -1,64 +0,0 @@ ---- linux-2.4.24orig/include/linux/fs.h.orig 2006-07-21 13:03:15.000000000 +0300 -+++ linux-2.4.24/include/linux/fs.h 2006-07-21 13:03:46.000000000 +0300 -@@ -1128,6 +1128,8 @@ extern int open_namei_it(const char *fil - extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct lookup_intent *it); - extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); -+struct dentry * lookup_one_len_getattr(const char * name, struct dentry * base, -+ int len); - extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, - struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); ---- linux-2.4.24orig/fs/nfsd/vfs.c.orig 2006-07-21 12:52:07.000000000 +0300 -+++ linux-2.4.24/fs/nfsd/vfs.c 2006-07-21 13:28:15.000000000 +0300 -@@ -198,6 +198,30 @@ int revalidate_it(struct dentry *dentry, - return err; - } - -+struct dentry * lookup_one_len_getattr(const char * name, struct dentry * base, -+ int len) -+{ -+ struct lookup_intent it; -+ struct dentry *de; -+ -+ intent_init(&it, IT_GETATTR, 0); -+ -+ de = lookup_one_len_it(name, base, len, NULL); -+ -+ if (!IS_ERR(de) && de->d_inode) { -+ if (de->d_inode->i_op && de->d_inode->i_op->revalidate_it) -+ de->d_inode->i_op->revalidate_it(de, &it); -+ else if (de->d_inode->i_op && de->d_inode->i_op->revalidate) -+ de->d_inode->i_op->revalidate(de); -+ } -+ -+ if (it.it_op_release) -+ intent_release(&it); -+ -+ return de; -+} -+ -+ - /* - * Look up one component of a pathname. - * N.B. After this call _both_ fhp and resfh need an fh_put -@@ -263,7 +287,7 @@ nfsd_lookup(struct svc_rqst *rqstp, stru - } - } else { - fh_lock(fhp); -- dentry = lookup_one_len(name, dparent, len); -+ dentry = lookup_one_len_getattr(name, dparent, len); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_nfserr; ---- linux-2.4.24orig/fs/nfsd/nfs3xdr.c.orig 2006-07-21 13:02:18.000000000 +0300 -+++ linux-2.4.24/fs/nfsd/nfs3xdr.c 2006-07-21 13:02:29.000000000 +0300 -@@ -715,7 +715,7 @@ encode_entry(struct readdir_cd *cd, cons - dchild = dchild->d_parent; - dchild = dget(dchild); - } else -- dchild = lookup_one_len(name, dparent,namlen); -+ dchild = lookup_one_len_getattr(name, dparent,namlen); - if (IS_ERR(dchild)) - goto noexec; - if (fh_compose(&fh, exp, dchild, cd->dirfh) != 0 || !dchild->d_inode) diff --git a/lustre/kernel_patches/patches/nfs_statfs-toomanyfiles-rhel-2.4.patch b/lustre/kernel_patches/patches/nfs_statfs-toomanyfiles-rhel-2.4.patch deleted file mode 100644 index 4bb8892..0000000 --- a/lustre/kernel_patches/patches/nfs_statfs-toomanyfiles-rhel-2.4.patch +++ /dev/null @@ -1,30 +0,0 @@ -Index: linux-2.4.21/fs/nfs/inode.c -=================================================================== ---- linux-2.4.21.orig/fs/nfs/inode.c 2005-06-01 22:51:55.000000000 -0400 -+++ linux-2.4.21/fs/nfs/inode.c 2005-06-01 23:38:54.883239912 -0400 -@@ -679,14 +679,21 @@ - goto too_big; - if (TOOBIG(((res.abytes + blockres) >> blockbits))) - goto too_big; -- if (TOOBIG(res.tfiles) || TOOBIG(res.afiles)) -- goto too_big; - - buf->f_blocks = (res.tbytes + blockres) >> blockbits; - buf->f_bfree = (res.fbytes + blockres) >> blockbits; - buf->f_bavail = (res.abytes + blockres) >> blockbits; -- buf->f_files = res.tfiles; -- buf->f_ffree = res.afiles; -+ -+ if (TOOBIG(res.tfiles)) -+ buf->f_files = -1; -+ else -+ buf->f_files = res.tfiles; -+ -+ if (TOOBIG(res.afiles)) -+ buf->f_ffree = -1; -+ else -+ buf->f_ffree = res.afiles; -+ - return 0; - - too_big: diff --git a/lustre/kernel_patches/patches/nfsd_iallocsem.patch b/lustre/kernel_patches/patches/nfsd_iallocsem.patch deleted file mode 100644 index 96b9c71..0000000 --- a/lustre/kernel_patches/patches/nfsd_iallocsem.patch +++ /dev/null @@ -1,19 +0,0 @@ -===== fs/nfsd/vfs.c 1.20 vs edited ===== ---- 1.20/fs/nfsd/vfs.c 2005-02-08 16:35:28 +02:00 -+++ edited/fs/nfsd/vfs.c 2005-05-29 00:46:44 +03:00 -@@ -297,6 +297,7 @@ - iap->ia_valid |= ATTR_CTIME; - - if (iap->ia_valid & ATTR_SIZE) { -+ down_write(&inode->i_alloc_sem); - fh_lock(fhp); - size_change = 1; - } -@@ -307,6 +308,7 @@ - } - if (size_change) { - fh_unlock(fhp); -+ up_write(&inode->i_alloc_sem); - put_write_access(inode); - } - if (!err) diff --git a/lustre/kernel_patches/patches/pagecache-lock-2.4.21-chaos.patch b/lustre/kernel_patches/patches/pagecache-lock-2.4.21-chaos.patch deleted file mode 100644 index 88f951b9..0000000 --- a/lustre/kernel_patches/patches/pagecache-lock-2.4.21-chaos.patch +++ /dev/null @@ -1,21 +0,0 @@ -Index: linux-2.4.21-chaos/include/linux/swap.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/swap.h 2003-12-12 16:24:33.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/swap.h 2003-12-12 16:41:15.000000000 +0300 -@@ -95,7 +95,7 @@ - extern atomic_t page_cache_size; - extern atomic_t buffermem_pages; - --#if 1 -+#if 0 - - static inline void - lock_pagecache(void) { br_write_lock(BR_PAGECACHE_LOCK); } -@@ -110,6 +110,7 @@ - - extern spinlock_cacheline_t pagecache_lock_cacheline; - #define __pagecache_lock (pagecache_lock_cacheline.lock) -+#define pagecache_lock (pagecache_lock_cacheline.lock) - - static inline void - lock_pagecache(void) { spin_lock(&__pagecache_lock); } diff --git a/lustre/kernel_patches/patches/proc-sleep-2.6.9.patch b/lustre/kernel_patches/patches/proc-sleep-2.6.9.patch new file mode 100644 index 0000000..3b399f6 --- /dev/null +++ b/lustre/kernel_patches/patches/proc-sleep-2.6.9.patch @@ -0,0 +1,758 @@ + +export blocking statistics in /proc//task/sleep. Statistics +collection for given file is activated on the first read of +/proc/pid/sleep. When statistics collection is on on each context switch +current back-trace is built (through __builtin_return_address()). For each +monitored process there is a LRU list of such back-traces. Useful when trying +to understand where elapsed time is spent. + +Signed-off-by: Nikita Danilov + + + arch/i386/Kconfig.debug | 9 + + arch/i386/kernel/entry.S | 4 + arch/i386/kernel/process.c | 52 +++++ + arch/i386/mm/fault.c | 7 + fs/proc/base.c | 15 + + fs/proc/proc_misc.c | 7 + include/linux/sched.h | 9 - + include/linux/sleep_info.h | 48 +++++ + kernel/Makefile | 1 + kernel/exit.c | 10 - + kernel/fork.c | 17 + + kernel/sched.c | 4 + kernel/sleep_info.c | 392 +++++++++++++++++++++++++++++++++++++++++++++ + 13 files changed, 560 insertions(+), 15 deletions(-) + +Index: linux/Makefile +=================================================================== +--- linux.orig/Makefile ++++ linux/Makefile +@@ -490,6 +490,10 @@ ifndef CONFIG_FRAME_POINTER + CFLAGS += -fomit-frame-pointer + endif + ++ifdef CONFIG_FRAME_POINTER_FORCE ++CFLAGS += -fno-omit-frame-pointer ++endif ++ + ifdef CONFIG_DEBUG_INFO + CFLAGS += -g + endif +Index: linux/arch/i386/Kconfig.debug +=================================================================== +--- linux.orig/arch/i386/Kconfig.debug ++++ linux/arch/i386/Kconfig.debug +@@ -79,4 +79,13 @@ config X86_MPPARSE + + source "arch/i386/Kconfig.kgdb" + ++config PROC_SLEEP ++ bool "Export sleep statistics" ++ depends on DEBUG_KERNEL && FRAME_POINTER ++ default n ++ help ++ If you say Y here, new file /proc/pid/stack will appear that contains ++ call-traces where given process blocked most of the time. If unsure say ++ N. ++ + endmenu +Index: linux/arch/x86_64/Kconfig.debug +=================================================================== +--- linux.orig/arch/x86_64/Kconfig.debug ++++ linux/arch/x86_64/Kconfig.debug +@@ -30,6 +30,21 @@ config SCHEDSTATS + application, you can say N to avoid the very slight overhead + this adds. + ++config FRAME_POINTER ++ bool "Compile the kernel with frame pointers" ++ help ++ Compile the kernel with frame pointers. This may help for some ++ debugging with external debuggers. Note the standard oops backtracer ++ doesn't make use of this and the x86-64 kernel doesn't ensure a ++ consistent frame pointer through inline assembly (semaphores etc.) ++ Normally you should say N. ++ ++config FRAME_POINTER_FORCE ++ bool "Compile the kernel with frame pointers" ++ depends on FRAME_POINTER ++ help ++ Enforce passing -fno-omit-frame-pointer to the compiler. ++ + config IOMMU_DEBUG + depends on GART_IOMMU && DEBUG_KERNEL + bool "Enable IOMMU debugging" +@@ -66,4 +81,13 @@ config IOMMU_LEAK + #config X86_REMOTE_DEBUG + # bool "kgdb debugging stub" + ++config PROC_SLEEP ++ bool "Export sleep statistics" ++ depends on DEBUG_KERNEL && FRAME_POINTER && FRAME_POINTER_FORCE ++ default n ++ help ++ If you say Y here, new file /proc/pid/stack will appear that contains ++ call-traces where given process blocked most of the time. If unsure say ++ N. ++ + endmenu +Index: linux/fs/proc/base.c +=================================================================== +--- linux.orig/fs/proc/base.c ++++ linux/fs/proc/base.c +@@ -103,6 +103,9 @@ enum pid_directory_inos { + #ifdef CONFIG_AUDITSYSCALL + PROC_TID_LOGINUID, + #endif ++#ifdef CONFIG_PROC_SLEEP ++ PROC_TID_SLEEP, ++#endif + PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ + }; + +@@ -170,6 +173,9 @@ static struct pid_entry tid_base_stuff[] + #ifdef CONFIG_AUDITSYSCALL + E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), + #endif ++#ifdef CONFIG_PROC_SLEEP ++ E(PROC_TID_SLEEP, "sleep", S_IFREG|S_IRUGO), ++#endif + {0,0,NULL,0} + }; + +@@ -1173,6 +1179,10 @@ out: + return ~0U; + } + ++#ifdef CONFIG_PROC_SLEEP ++extern struct file_operations proc_sleep_operations; ++#endif ++ + /* SMP-safe */ + static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) + { +@@ -1463,6 +1473,11 @@ static struct dentry *proc_pident_lookup + inode->i_fop = &proc_loginuid_operations; + break; + #endif ++#ifdef CONFIG_PROC_SLEEP ++ case PROC_TID_SLEEP: ++ inode->i_fop = &proc_sleep_operations; ++ break; ++#endif + default: + printk("procfs: impossible type (%d)",p->type); + iput(inode); +Index: linux/fs/proc/proc_misc.c +=================================================================== +--- linux.orig/fs/proc/proc_misc.c ++++ linux/fs/proc/proc_misc.c +@@ -270,6 +270,11 @@ static struct file_operations proc_cpuin + .release = seq_release, + }; + ++#ifdef CONFIG_PROC_SLEEP ++extern struct file_operations proc_global_sleep_operations; ++extern struct file_operations proc_global_stack_operations; ++#endif ++ + extern struct seq_operations vmstat_op; + static int vmstat_open(struct inode *inode, struct file *file) + { +@@ -641,4 +646,8 @@ void __init proc_misc_init(void) + entry->proc_fops = &ppc_htab_operations; + } + #endif ++#ifdef CONFIG_PROC_SLEEP ++ create_seq_entry("sleep", 0, &proc_global_sleep_operations); ++ create_seq_entry("stacktrace", 0, &proc_global_stack_operations); ++#endif + } +Index: linux/include/linux/sched.h +=================================================================== +--- linux.orig/include/linux/sched.h ++++ linux/include/linux/sched.h +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + struct exec_domain; + extern int exec_shield; +@@ -646,6 +647,8 @@ struct task_struct { + /* process tracking callback */ + struct list_head ptrack_list; + #endif ++ /* where this task blocked */ ++ struct sleep_info sinfo; + }; + + static inline pid_t process_group(struct task_struct *tsk) +Index: linux/include/linux/sleep_info.h +=================================================================== +--- linux.orig/include/linux/sleep_info.h ++++ linux/include/linux/sleep_info.h +@@ -0,0 +1,50 @@ ++#ifndef _LINUX_SLEEP_INFO_H ++#define _LINUX_SLEEP_INFO_H ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_PROC_SLEEP ++ ++struct __sleep_info; ++struct task_struct; ++ ++struct sleep_info { ++ struct __sleep_info *p; ++ unsigned long long last_in; ++}; ++ ++void init_sleep_info(struct task_struct *tsk); ++void free_sleep_info(struct sleep_info *sinfo); ++ ++void sleep_in_hook(struct task_struct *tsk); ++void sleep_ex_hook(struct task_struct *tsk); ++ ++void stacktrace_record(void); ++ ++extern struct file_operations proc_sleep_operations; ++ ++/* CONFIG_PROC_SLEEP */ ++#else ++ ++struct sleep_info {}; ++ ++#define init_sleep_info(tsk) ++#define free_sleep_info(sinfo) ++ ++#define sleep_in_hook(tsk) ++#define sleep_ex_hook(tsk) ++#define stacktrace_record() ++ ++/* CONFIG_PROC_SLEEP */ ++#endif ++ ++/* __KERNEL__ */ ++#endif ++ ++/* _LINUX_SLEEP_INFO_H */ ++#endif +Index: linux/kernel/Makefile +=================================================================== +--- linux.orig/kernel/Makefile ++++ linux/kernel/Makefile +@@ -27,6 +27,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o + obj-$(CONFIG_PTRACK) += ptrack.o ++obj-$(CONFIG_PROC_SLEEP) += sleep_info.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +Index: linux/kernel/exit.c +=================================================================== +--- linux.orig/kernel/exit.c ++++ linux/kernel/exit.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -96,6 +97,7 @@ repeat: + write_unlock_irq(&tasklist_lock); + spin_unlock(&p->proc_lock); + proc_pid_flush(proc_dentry); ++ free_sleep_info(&p->sinfo); + release_thread(p); + put_task_struct(p); + +Index: linux/kernel/fork.c +=================================================================== +--- linux.orig/kernel/fork.c ++++ linux/kernel/fork.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1172,6 +1173,8 @@ static task_t *copy_process(unsigned lon + if (!current->signal->tty && p->signal->tty) + p->signal->tty = NULL; + ++ init_sleep_info(p); ++ + nr_threads++; + write_unlock_irq(&tasklist_lock); + retval = 0; +Index: linux/kernel/sched.c +=================================================================== +--- linux.orig/kernel/sched.c ++++ linux/kernel/sched.c +@@ -2656,6 +2656,8 @@ asmlinkage void __sched schedule(void) + } + } + ++ sleep_in_hook(current); ++ + need_resched: + preempt_disable(); + prev = current; +@@ -2797,6 +2799,8 @@ switch_tasks: + preempt_enable_no_resched(); + if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) + goto need_resched; ++ ++ sleep_ex_hook(current); + } + + EXPORT_SYMBOL(schedule); +Index: linux/kernel/sleep_info.c +=================================================================== +--- linux.orig/kernel/sleep_info.c ++++ linux/kernel/sleep_info.c +@@ -0,0 +1,431 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#ifdef CONFIG_PROC_SLEEP ++ ++#define SLEEP_TRACES_DEF (32) ++#define GLOBAL_SLEEP_TRACES_DEF (512) ++#define SLEEP_TRACE_DEPTH (20) ++#define GLOBAL_STACK_TRACES_DEF (512) ++ ++struct stack_trace { ++ unsigned nr; ++ unsigned long long total; ++ unsigned long long max; ++ struct list_head lru; ++ unsigned long hash; ++ int depth; ++ void *frame[SLEEP_TRACE_DEPTH]; ++}; ++ ++struct __sleep_info { ++ spinlock_t lock; ++ int nr_traces; ++ struct list_head lru; ++ struct stack_trace traces[0]; ++}; ++ ++static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) ++{ ++ return p > (void *)tinfo && ++ p < (void *)tinfo + THREAD_SIZE - 3; ++} ++ ++static void fill_trace(struct stack_trace *trace) ++{ ++ int i; ++ struct thread_info *tinfo = current_thread_info(); ++ unsigned long fp; /* frame pointer */ ++ ++ /* Grab fp right from our regs */ ++#if defined(CONFIG_X86_64) && defined(CONFIG_FRAME_POINTER_FORCE) ++ asm ("movq %%rbp, %0" : "=r" (fp) : ); ++#define FP_RETADDR_OFFSET (8) ++#elif defined(CONFIG_X86) ++ asm ("movl %%ebp, %0" : "=r" (fp) : ); ++#define FP_RETADDR_OFFSET (4) ++#else ++#error Unsupported platform. ++#endif ++ ++ for (i = -3; /* skip three innermost frames */ ++ i < SLEEP_TRACE_DEPTH && valid_stack_ptr(tinfo, (void *)fp); ++ ++i, fp = *(unsigned long *)fp) { ++ if (i >= 0) ++ trace->frame[i] = *(void **)(fp + FP_RETADDR_OFFSET); ++ } ++ trace->depth = max(i, 0); ++} ++ ++static int stack_trace_eq(struct stack_trace *t1, struct stack_trace *t2) ++{ ++ return ++ t1->hash == t2->hash && t1->depth == t2->depth && ++ !memcmp(t1->frame, t2->frame, t1->depth * sizeof t1->frame[0]); ++} ++ ++static unsigned long stack_trace_hash(struct stack_trace *trace) ++{ ++ int i; ++ unsigned hash; ++ ++ for (i = 0, hash = 0; i < trace->depth; ++ i) ++ hash += ((unsigned long)trace->frame[i]) >> 3; ++ return hash; ++} ++ ++/* from sleepometer by Andrew Morton */ ++static unsigned long long grab_time(void) ++{ ++#if defined(CONFIG_X86) && !defined(CONFIG_X86_64) ++ /* ++ * do_gettimeofday() goes backwards sometimes :(. Usethe TSC ++ */ ++ unsigned long long ret; ++ extern unsigned long cpu_khz; ++ ++ rdtscll(ret); ++ do_div(ret, cpu_khz / 1000); ++ return ret; ++#else ++ struct timeval now; ++ unsigned long long ret; ++ ++ do_gettimeofday(&now); ++ ret = now.tv_sec; ++ ret *= 1000000; ++ ret += now.tv_usec; ++ return ret; ++#endif ++} ++ ++static void zero_sleep_info(struct __sleep_info *info) ++{ ++ int i; ++ ++ INIT_LIST_HEAD(&info->lru); ++ for (i = 0; i < info->nr_traces; ++ i) { ++ struct stack_trace *trace; ++ ++ trace = &info->traces[i]; ++ trace->nr = 0; ++ trace->total = 0; ++ trace->max = 0; ++ trace->hash = 0; ++ list_add(&trace->lru, &info->lru); ++ memset(trace->frame, 0, sizeof trace->frame); ++ } ++} ++ ++static int alloc_sleep_info(struct sleep_info *sinfo, int nr_traces) ++{ ++ struct __sleep_info *result; ++ ++ result = kmalloc(sizeof *result + nr_traces * sizeof result->traces[0], ++ GFP_ATOMIC); ++ if (result == NULL) ++ return -ENOMEM; ++ ++ sinfo->p = result; ++ sinfo->last_in = 0; ++ result->nr_traces = nr_traces; ++ spin_lock_init(&result->lock); ++ zero_sleep_info(result); ++ return 0; ++} ++ ++void init_sleep_info(struct task_struct *tsk) ++{ ++ tsk->sinfo.p = NULL; ++} ++ ++void free_sleep_info(struct sleep_info *sinfo) ++{ ++ kfree(sinfo->p); ++ sinfo->p = NULL; ++} ++ ++void sleep_in_hook(struct task_struct *tsk) ++{ ++ tsk->sinfo.last_in = grab_time(); ++} ++ ++void update_sinfo(struct __sleep_info *sinfo, unsigned long long last_in) ++{ ++ if (sinfo != NULL && last_in != 0) { ++ unsigned long long delta; ++ struct stack_trace trace; ++ struct stack_trace *target; ++ int i; ++ ++ delta = grab_time() - last_in; ++ fill_trace(&trace); ++ target = NULL; /* to shut gcc up */ ++ trace.hash = stack_trace_hash(&trace); ++ spin_lock(&sinfo->lock); ++ for (i = 0; i < sinfo->nr_traces; ++ i) { ++ target = &sinfo->traces[i]; ++ if (stack_trace_eq(&trace, target)) { ++ ++ target->nr; ++ target->total += delta; ++ target->max = max(target->max, delta); ++ break; ++ } ++ } ++ if (i == sinfo->nr_traces) { ++ target = container_of(sinfo->lru.prev, ++ struct stack_trace, lru); ++ target->nr = 1; ++ target->total = target->max = delta; ++ target->hash = trace.hash; ++ target->depth = trace.depth; ++ memcpy(target->frame, trace.frame, sizeof target->frame); ++ } ++ list_move(&target->lru, &sinfo->lru); ++ spin_unlock(&sinfo->lock); ++ } ++} ++ ++static struct sleep_info global_sinfo = { ++ .p = NULL ++}; ++ ++void sleep_ex_hook(struct task_struct *tsk) ++{ ++ struct sleep_info *cur; ++ ++ cur = &tsk->sinfo; ++ update_sinfo(cur->p, cur->last_in); ++ update_sinfo(global_sinfo.p, cur->last_in); ++ cur->last_in = 0; ++} ++ ++static spinlock_t sleep_serializer = SPIN_LOCK_UNLOCKED; ++ ++static void *sinfo_start(struct sleep_info *sinfo, int nr_traces, loff_t l) ++{ ++ spin_lock(&sleep_serializer); ++ if (sinfo->p == NULL) ++ alloc_sleep_info(sinfo, nr_traces); ++ spin_unlock(&sleep_serializer); ++ if (sinfo->p == NULL) ++ return NULL; ++ ++ if (l >= sinfo->p->nr_traces) ++ return NULL; ++ else ++ return &sinfo->p->traces[l]; ++} ++ ++static void *sinfo_next(struct sleep_info *sinfo, void *v, loff_t *pos) ++{ ++ (*pos)++; ++ ++ if (*pos >= sinfo->p->nr_traces) ++ return NULL; ++ else ++ return ((struct stack_trace *)v) + 1; ++} ++ ++/* ++ * seq_file methods for /proc/pid/sleep. No locking is needed here, because we ++ * are iterating over sinfo->traces[] array rather than over sinfo->lru ++ * list. Actually spin locking is not allowed, because we can schedule between ++ * sleep_start() and sleep_stop(). ++ */ ++ ++static void *sleep_start(struct seq_file *m, loff_t *pos) ++{ ++ struct task_struct *task; ++ ++ task = m->private; ++ return sinfo_start(&task->sinfo, SLEEP_TRACES_DEF, *pos); ++} ++ ++static void sleep_stop(struct seq_file *m, void *v) ++{ ++} ++ ++static void *sleep_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ struct task_struct *task; ++ ++ task = m->private; ++ return sinfo_next(&task->sinfo, v, pos); ++} ++ ++static int show_sleep(struct seq_file *m, void *v) ++{ ++ struct stack_trace *trace; ++ int i; ++ ++ trace = v; ++ if (trace->nr == 0) ++ return 0; ++ ++ seq_printf(m, "\n%u %llu %llu", ++ trace->nr, trace->total, trace->max); ++ for (i = 0; i < trace->depth; ++ i) { ++ char *module; ++ const char *name; ++ char namebuf[128]; ++ unsigned long address; ++ unsigned long offset; ++ unsigned long size; ++ ++ address = (unsigned long) trace->frame[i]; ++ name = kallsyms_lookup(address, &size, ++ &offset, &module, namebuf); ++ seq_printf(m, "\n\t%i %#lx ", i, address); ++ if (name != NULL) ++ seq_printf(m, "%s+%#lx/%#lx", name, offset, size); ++ } ++ seq_printf(m, "\n"); ++ return 0; ++} ++ ++struct seq_operations proc_pid_sleep_op = { ++ .start = sleep_start, ++ .next = sleep_next, ++ .stop = sleep_stop, ++ .show = show_sleep ++}; ++ ++static int sleep_open(struct inode *inode, struct file *file) ++{ ++ struct task_struct *task = PROC_I(inode)->task; ++ int ret = seq_open(file, &proc_pid_sleep_op); ++ if (!ret) { ++ struct seq_file *m = file->private_data; ++ m->private = task; ++ } ++ return ret; ++} ++ ++static void reset_sleep_info(struct sleep_info *sinfo, int nr_traces) ++{ ++ spin_lock(&sleep_serializer); ++ if (sinfo->p == NULL) ++ alloc_sleep_info(sinfo, nr_traces); ++ if (sinfo->p != NULL) ++ zero_sleep_info(sinfo->p); ++ spin_unlock(&sleep_serializer); ++} ++ ++static ssize_t sleep_write(struct file *file, const char __user *buffer, ++ size_t count, loff_t *ppos) ++{ ++ struct task_struct *tsk = PROC_I(file->f_dentry->d_inode)->task; ++ ++ reset_sleep_info(&tsk->sinfo, SLEEP_TRACES_DEF); ++ return count; ++} ++ ++struct file_operations proc_sleep_operations = { ++ .open = sleep_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++ .write = sleep_write ++}; ++ ++static void *global_sleep_start(struct seq_file *m, loff_t *pos) ++{ ++ return sinfo_start(&global_sinfo, GLOBAL_SLEEP_TRACES_DEF, *pos); ++} ++ ++static void *global_sleep_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ return sinfo_next(&global_sinfo, v, pos); ++} ++ ++ ++struct seq_operations global_sleep_op = { ++ .start = global_sleep_start, ++ .next = global_sleep_next, ++ .stop = sleep_stop, ++ .show = show_sleep ++}; ++ ++static int global_sleep_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &global_sleep_op); ++} ++ ++static ssize_t global_sleep_write(struct file *file, const char __user *buffer, ++ size_t count, loff_t *ppos) ++{ ++ reset_sleep_info(&global_sinfo, GLOBAL_SLEEP_TRACES_DEF); ++ return count; ++} ++ ++struct file_operations proc_global_sleep_operations = { ++ .open = global_sleep_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++ .write = global_sleep_write ++}; ++ ++static struct sleep_info stack_sinfo = { ++ .p = NULL ++}; ++ ++static void *global_stack_start(struct seq_file *m, loff_t *pos) ++{ ++ return sinfo_start(&stack_sinfo, GLOBAL_STACK_TRACES_DEF, *pos); ++} ++ ++static void *global_stack_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ return sinfo_next(&stack_sinfo, v, pos); ++} ++ ++ ++struct seq_operations global_stack_op = { ++ .start = global_stack_start, ++ .next = global_stack_next, ++ .stop = sleep_stop, ++ .show = show_sleep ++}; ++ ++static int global_stack_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &global_stack_op); ++} ++ ++static ssize_t global_stack_write(struct file *file, const char __user *buffer, ++ size_t count, loff_t *ppos) ++{ ++ reset_sleep_info(&stack_sinfo, GLOBAL_STACK_TRACES_DEF); ++ return count; ++} ++ ++struct file_operations proc_global_stack_operations = { ++ .open = global_stack_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++ .write = global_stack_write ++}; ++ ++void stacktrace_record(void) ++{ ++ if (!in_interrupt()) ++ update_sinfo(stack_sinfo.p, 1); ++} ++EXPORT_SYMBOL(stacktrace_record); ++ ++/* CONFIG_PROC_SLEEP */ ++#endif diff --git a/lustre/kernel_patches/patches/procfs-ndynamic-2.4.21-suse2.patch b/lustre/kernel_patches/patches/procfs-ndynamic-2.4.21-suse2.patch deleted file mode 100644 index 65f4926..0000000 --- a/lustre/kernel_patches/patches/procfs-ndynamic-2.4.21-suse2.patch +++ /dev/null @@ -1,16 +0,0 @@ -Index: linux-2.4.21-suse2/include/linux/proc_fs.h -=================================================================== ---- linux-2.4.21-suse2.orig/include/linux/proc_fs.h 2004-01-12 09:56:51.000000000 +0300 -+++ linux-2.4.21-suse2/include/linux/proc_fs.h 2004-01-12 20:25:55.000000000 +0300 -@@ -25,11 +25,7 @@ - /* Finally, the dynamically allocatable proc entries are reserved: */ - - #define PROC_DYNAMIC_FIRST 4096 --#ifdef CONFIG_PPC64 - #define PROC_NDYNAMIC 16384 --#else --#define PROC_NDYNAMIC 4096 --#endif - - #define PROC_SUPER_MAGIC 0x9fa0 - diff --git a/lustre/kernel_patches/patches/procfs-ndynamic-2.4.patch b/lustre/kernel_patches/patches/procfs-ndynamic-2.4.patch deleted file mode 100644 index bff6c7e..0000000 --- a/lustre/kernel_patches/patches/procfs-ndynamic-2.4.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-2.4.18-chaos/include/linux/proc_fs.h -=================================================================== ---- linux-2.4.18-chaos.orig/include/linux/proc_fs.h 2003-07-28 17:52:18.000000000 +0400 -+++ linux-2.4.18-chaos/include/linux/proc_fs.h 2004-01-12 20:12:38.000000000 +0300 -@@ -25,7 +25,7 @@ - /* Finally, the dynamically allocatable proc entries are reserved: */ - - #define PROC_DYNAMIC_FIRST 4096 --#define PROC_NDYNAMIC 4096 -+#define PROC_NDYNAMIC 16384 - - #define PROC_SUPER_MAGIC 0x9fa0 - diff --git a/lustre/kernel_patches/patches/qsnet-rhel-2.4.patch b/lustre/kernel_patches/patches/qsnet-rhel-2.4.patch deleted file mode 100644 index 6f53041..0000000 --- a/lustre/kernel_patches/patches/qsnet-rhel-2.4.patch +++ /dev/null @@ -1,93733 +0,0 @@ -Index: linux-2.4.21/arch/i386/kernel/i386_ksyms.c -=================================================================== ---- linux-2.4.21.orig/arch/i386/kernel/i386_ksyms.c 2005-06-01 22:51:51.000000000 -0400 -+++ linux-2.4.21/arch/i386/kernel/i386_ksyms.c 2005-06-01 23:12:54.521450960 -0400 -@@ -220,3 +220,12 @@ - EXPORT_SYMBOL_GPL(__PAGE_KERNEL); - extern unsigned long long __supported_pte_mask; - EXPORT_SYMBOL_GPL(__supported_pte_mask); -+ -+extern asmlinkage long sys_open(const char *, int, int); -+EXPORT_SYMBOL(sys_open); -+extern asmlinkage off_t sys_lseek(unsigned int, off_t, unsigned int); -+EXPORT_SYMBOL(sys_lseek); -+extern asmlinkage long sys_poll(struct pollfd *, unsigned int, long); -+EXPORT_SYMBOL(sys_poll); -+extern asmlinkage long sys_kill(int, int); -+EXPORT_SYMBOL(sys_kill); -Index: linux-2.4.21/arch/ia64/kernel/ia64_ksyms.c -=================================================================== ---- linux-2.4.21.orig/arch/ia64/kernel/ia64_ksyms.c 2005-06-01 22:51:59.000000000 -0400 -+++ linux-2.4.21/arch/ia64/kernel/ia64_ksyms.c 2005-06-01 23:14:43.773842072 -0400 -@@ -207,3 +207,13 @@ - EXPORT_SYMBOL_GPL(show_state); - EXPORT_SYMBOL_GPL(show_regs); - EXPORT_SYMBOL(pm_power_off); -+ -+#define __KERNEL_SYSCALLS__ 1 -+#include -+EXPORT_SYMBOL(sys_open); -+extern asmlinkage off_t sys_lseek(unsigned int, off_t, unsigned int); -+EXPORT_SYMBOL(sys_lseek); -+extern asmlinkage long sys_poll(struct pollfd *, unsigned int, long); -+EXPORT_SYMBOL(sys_poll); -+extern asmlinkage long sys_kill(int, int); -+EXPORT_SYMBOL(sys_kill); -Index: linux-2.4.21/arch/x86_64/kernel/x8664_ksyms.c -=================================================================== ---- linux-2.4.21.orig/arch/x86_64/kernel/x8664_ksyms.c 2005-06-01 22:51:51.000000000 -0400 -+++ linux-2.4.21/arch/x86_64/kernel/x8664_ksyms.c 2005-06-01 23:12:54.522450808 -0400 -@@ -215,6 +215,10 @@ - EXPORT_SYMBOL(sys_exit); - EXPORT_SYMBOL(sys_open); - EXPORT_SYMBOL(sys_lseek); -+extern asmlinkage long sys_poll(struct pollfd *, unsigned int, long); -+EXPORT_SYMBOL(sys_poll); -+extern asmlinkage long sys_kill(int, int); -+EXPORT_SYMBOL(sys_kill); - EXPORT_SYMBOL(sys_delete_module); - EXPORT_SYMBOL(sys_sync); - EXPORT_SYMBOL(sys_pause); -Index: linux-2.4.21/Documentation/Configure.help -=================================================================== ---- linux-2.4.21.orig/Documentation/Configure.help 2005-06-01 23:12:39.856680344 -0400 -+++ linux-2.4.21/Documentation/Configure.help 2005-06-01 23:12:54.547447008 -0400 -@@ -28030,6 +28030,54 @@ - kernel tree does. Such modules that use library CRC32 functions - require M here. - -+ -+Enable support for Quadrics QsNet (QSNET) -+CONFIG_QSNET -+ Quadrics QsNet is a high bandwidth, ultra low latency cluster -+ interconnect which provides both user and kernel programmers with -+ secure, direct access to the Quadrics network. -+ -+Elan 3 device driver (ELAN3) -+CONFIG_ELAN3 -+ This is the main device driver for the Quadrics QsNet (Elan3) PCI -+ device. This is a high bandwidth, ultra low latency interconnect -+ which provides both user and kernel programmers with secure, direct -+ access to the Quadrics network. -+ -+Elan 3 Kernel Comms (EP3) -+CONFIG_EP3 -+ This modules implements the QsNet kernel communications layer. This -+ is used to layer kernel level facilities on top of the basic Elan3 -+ device driver. These can be used to implement subsystems such as -+ TCP/IP and remote filing systems over the QsNet interconnect. -+ -+Elan IP device (EIP) -+CONFIG_EIP -+ This is a network IP device driver for the Quadrics QsNet device. -+ It allows the TCP/IP protocol to be run over the Quadrics interconnect. -+ -+Elan 4 device driver (ELAN4) -+CONFIG_ELAN4 -+ This is the main device driver for the Quadrics QsNetII (Elan4) PCI-X -+ device. This is a high bandwidth, ultra low latency interconnect which -+ provides both user and kernel programmers with secure, direct access to -+ the Quadrics network. -+Resource Management System support (RMS) -+CONFIG_RMS -+ This is a support module for the Quadrics RMS resource manager. It -+ provides kernel services for monitoring and controlling user job -+ execution, termination and cleanup. -+ -+Switch monitoring (JTAG) -+CONFIG_JTAG -+ The jtag interface is used to allow processes to send and retrieve jtag -+ information to a Quadrics QsNet Elite switch via the parallel port. -+ The module requires a /dev/jtag[0-3] entry (usually there is only a -+ /dev/jtag0) device and a particular device only allows one process at a -+ time to access this resource. -+ For more information about JTag interface, please refer to the IEEE -+ document on http://www.ieee.org -+ - # - # A couple of things I keep forgetting: - # capitalize: AppleTalk, Ethernet, DOS, DMA, FAT, FTP, Internet, -Index: linux-2.4.21/drivers/net/Config.in -=================================================================== ---- linux-2.4.21.orig/drivers/net/Config.in 2005-06-01 22:52:03.000000000 -0400 -+++ linux-2.4.21/drivers/net/Config.in 2005-06-01 23:12:54.549446704 -0400 -@@ -272,6 +272,9 @@ - - endmenu - -+# Quadrics QsNet -+source drivers/net/qsnet/Config.in -+ - if [ "$CONFIG_PPC_ISERIES" = "y" ]; then - dep_tristate 'iSeries Virtual Ethernet driver support' CONFIG_VETH $CONFIG_PPC_ISERIES - fi -Index: linux-2.4.21/drivers/net/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/Makefile 2005-06-01 22:52:03.000000000 -0400 -+++ linux-2.4.21/drivers/net/Makefile 2005-06-01 23:12:54.550446552 -0400 -@@ -8,7 +8,7 @@ - obj-n := - obj- := - --mod-subdirs := appletalk arcnet fc irda tokenring pcmcia wireless wireless_old wan -+mod-subdirs := appletalk arcnet fc irda tokenring pcmcia wireless wireless_old wan qsnet - - O_TARGET := net.o - -@@ -48,6 +48,7 @@ - subdir-$(CONFIG_DEV_APPLETALK) += appletalk - subdir-$(CONFIG_SK98LIN) += sk98lin - subdir-$(CONFIG_SKFP) += skfp -+subdir-$(CONFIG_QSNET) += qsnet - subdir-$(CONFIG_E100) += e100 - subdir-$(CONFIG_E1000) += e1000 - subdir-$(CONFIG_BONDING) += bonding -Index: linux-2.4.21/drivers/net/qsnet/Config.in -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/Config.in 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/Config.in 2005-06-01 23:12:54.550446552 -0400 -@@ -0,0 +1,25 @@ -+# -+# Config.in for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/Config.in -+# -+ -+mainmenu_option next_comment -+comment "Quadrics QsNet device support" -+ -+dep_tristate "Enable support for Quadrics QsNet" CONFIG_QSNET $CONFIG_PCI -+ -+dep_tristate "Elan 3 device driver" CONFIG_ELAN3 $CONFIG_QSNET -+dep_tristate "Elan 4 device driver" CONFIG_ELAN4 $CONFIG_QSNET -+ -+if [ "$CONFIG_ELAN3" = "$CONFIG_QSNET" ] || [ "$CONFIG_ELAN4" = "$CONFIG_QSNET" ]; then -+ dep_tristate "Elan Kernel Comms" CONFIG_EP $CONFIG_QSNET -+fi -+dep_tristate "Elan IP device" CONFIG_EIP $CONFIG_NET $CONFIG_EP -+ -+dep_tristate "Resource Management System support" CONFIG_RMS $CONFIG_QSNET -+dep_tristate "Switch monitoring" CONFIG_JTAG $CONFIG_QSNET -+ -+endmenu -Index: linux-2.4.21/drivers/net/qsnet/eip/eip_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/eip/eip_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/eip/eip_linux.c 2005-06-01 23:12:54.553446096 -0400 -@@ -0,0 +1,1565 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: eip_linux.c,v 1.89.2.3 2004/12/20 16:54:05 mike Exp $" -+ -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#undef ASSERT -+#include -+#include -+ -+ -+ -+#include -+#include -+ -+#include "eip_linux.h" -+#include "eip_stats.h" -+ -+#ifdef UNUSED -+static void eip_skb_display(struct sk_buff *); -+#endif -+static void eip_iph_display(struct iphdr *); -+#ifdef UNUSED -+static void eip_eiph_display(EIP_HEADER *); -+static void eip_packet_display(unsigned char *); -+#endif -+static void eip_tmd_display(EIP_TMD *); -+static void eip_tmd_head_display(EIP_TMD_HEAD *); -+static void eip_rmd_display(EIP_RMD *); -+static void eip_rmd_head_display(EIP_RMD_HEAD *); -+ -+static void eip_rmd_reclaim(EIP_RMD *); -+ -+static inline EP_NMH *eip_dma_reserve(int, int); -+static inline void __eip_tmd_load(EIP_TMD *, EP_RAILMASK *); -+static inline void __eip_tmd_unload(EIP_TMD *); -+static inline unsigned long eip_buff_alloc(int, int); -+static inline void eip_buff_free(unsigned long, int); -+static struct iphdr *eip_ipfrag_get(char *); -+static inline void eip_rmd_free(EIP_RMD *); -+static inline void eip_skb_load(EIP_RMD *); -+static inline void eip_skb_unload(EIP_RMD *); -+static inline void eip_rmd_requeue(EIP_RMD *); -+static EIP_RMD *eip_rmd_alloc(int, int); -+static int eip_rmd_alloc_replace(EIP_RMD *, int, int); -+static int eip_rmd_alloc_queue(int, int, int, int); -+static int eip_rmds_alloc(void); -+static void eip_rxhandler(EP_RXD *); -+static void eip_rx_tasklet(unsigned long); -+static inline void eip_tmd_init(EIP_TMD *, unsigned long, EIP_TMD_HEAD *, unsigned long, int); -+static inline EIP_TMD *eip_tmd_get(int); -+static inline void eip_tmd_put(EIP_TMD *); -+static inline void eip_tmd_load(EIP_TMD *); -+static inline void eip_tmd_unload(EIP_TMD *); -+static inline EIP_TMD *eip_tmd_alloc_queue(EIP_TMD *, EIP_TMD_HEAD *, int); -+static inline EIP_TMD *eip_tmd_alloc_queue_copybreak(EIP_TMD_HEAD *, int); -+static inline EIP_TMD *eip_tmd_alloc_queue_aggreg(EIP_TMD_HEAD *, int); -+static int eip_tmds_alloc(void); -+int eip_hard_start_xmit(struct sk_buff *, struct net_device *); -+static inline int eip_do_xmit(EIP_TMD *, EP_NMD *i, EP_PAYLOAD *); -+static void eip_txhandler(EP_TXD *, void *, EP_STATUS); -+static void eip_tx_tasklet(unsigned long); -+void eip_stop_queue(void); -+void eip_start_queue(void); -+static int eip_open(struct net_device *); -+static int eip_close(struct net_device *); -+static struct net_device_stats *eip_get_stats(struct net_device *); -+static int eip_change_mtu(struct net_device *, int); -+ -+static int eip_rx_dropping = 0; -+static int eip_rx_tasklet_locked = 1; -+ -+/* Global */ -+struct timer_list eip_rx_tasklet_timer; -+ -+EIP_RX *eip_rx = NULL; -+EIP_TX *eip_tx = NULL; -+int eip_checksum_state=CHECKSUM_NONE; -+ -+int tmd_max = EIP_TMD_MAX_NR; -+int rmd_max = EIP_RMD_MAX_NR; -+int rx_envelope_nr = EIP_RX_ENVELOPE_NR; -+int rx_granularity = EIP_RX_GRANULARITY; -+int tx_copybreak_max = EIP_TX_COPYBREAK_MAX; -+EP_RAILMASK tx_railmask = EP_RAILMASK_ALL; -+int eipdebug = 0; -+ -+#ifdef UNUSED -+static void eip_skb_display(struct sk_buff *skb) -+{ -+ if (skb) { -+ __EIP_DBG_PRINTF("SKB [%p] : len %d truesize %d proto %x pkt type %x cloned %d users %d summed %d\n", -+ skb, skb->len, skb->truesize, skb->protocol, skb->pkt_type, skb->cloned, atomic_read(&skb->users), skb->ip_summed); -+ __EIP_DBG_PRINTF("SKB [%p] : skb_shinfo dataref %d nr_frags %d frag_list[%p] (device %p)\n", skb, -+ atomic_read(&skb_shinfo(skb)->dataref), skb_shinfo(skb)->nr_frags, skb_shinfo(skb)->frag_list, skb->dev); -+ __EIP_DBG_PRINTF("SKB [%p] : head[%p] data[%p] tail [%p] end [%p] data_len [%d]\n", skb, skb->head, skb->data, -+ skb->tail, skb->end, skb->data_len); -+ __EIP_DBG_PRINTF("SKB [%p] : Transport Layer h.(th, uh, icmph, raw)[%p]\n", skb, skb->h.th); -+ __EIP_DBG_PRINTF("SKB [%p] : Network Layer nh.(iph, arph, raw)[%p]\n", skb, skb->nh.iph); -+ __EIP_DBG_PRINTF("SKB [%p] : Link Layer mac.(ethernet, raw)[%p]\n", skb, skb->mac.ethernet); -+ return; -+ } -+ EIP_ERR_PRINTF("SKB IS NULL - NO SKB TO DISPLAY\n"); -+} -+#endif -+static void eip_iph_display(struct iphdr *iph) -+{ -+ if (iph) { -+ __EIP_DBG_PRINTF("IPH [%p] : version %d header len %d TOS 0x%x Total len %d\n", -+ iph, iph->version, iph->ihl, htons(iph->tos), htons(iph->tot_len)); -+ __EIP_DBG_PRINTF("IPH [%p] : id %d frag flags 0x%x offset %d\n", -+ iph, htons(iph->id), (iph->frag_off & htons(IP_CE | IP_DF | IP_MF)) >> 4, -+ (htons(iph->frag_off) << 3) & IP_OFFSET); -+ __EIP_DBG_PRINTF("IPH [%p] : TTL %d proto %d header checksum 0x%x\n", iph, iph->ttl, iph->protocol, iph->check); -+ __EIP_DBG_PRINTF("IPH [%p] : IP src %u.%u.%u.%u dest %u.%u.%u.%u\n", iph, -+ ((unsigned char *)&(iph->saddr))[0],((unsigned char *)&(iph->saddr))[1], ((unsigned char *)&(iph->saddr))[2],((unsigned char *)&(iph->saddr))[3], -+ ((unsigned char *)&(iph->daddr))[0],((unsigned char *)&(iph->daddr))[1], ((unsigned char *)&(iph->daddr))[2],((unsigned char *)&(iph->daddr))[3]); -+ return; -+ } -+ EIP_ERR_PRINTF("IPH IS NULL - NO IPH TO DISPLAY\n"); -+} -+#ifdef UNUSED -+static void eip_eiph_display(EIP_HEADER * eiph) -+{ -+ if (eiph) { -+ __EIP_DBG_PRINTF("EIPH [%p] : dhost %04x.%04x.%04x sap %x\n", eiph, eiph->h_dhost.ip_bcast, eiph->h_dhost.ip_inst, -+ eiph->h_dhost.ip_addr, eiph->h_sap); -+ __EIP_DBG_PRINTF("EIPH [%p] : shost %04x.%04x.%04x \n", eiph, eiph->h_shost.ip_bcast, eiph->h_shost.ip_inst, -+ eiph->h_shost.ip_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("EIPH IS NULL - NO EIPH TO DISPLAY\n"); -+} -+static void eip_packet_display(unsigned char *data) -+{ -+ eip_eiph_display((EIP_HEADER *) data); -+ eip_iph_display((struct iphdr *) (data + EIP_HEADER_PAD + ETH_HLEN)); -+} -+#endif -+static void eip_tmd_display(EIP_TMD * tmd) -+{ -+ if (tmd) { -+ __EIP_DBG_PRINTF("\t\tTMD [%p] : next[%p] skb[%p] DVMA[%d]\n", tmd, tmd->chain.next, tmd->skb, tmd->dvma_idx); -+ if (tmd->dma_base) -+ __EIP_DBG_PRINTF("TMD [%p] : head[%p] *data 0x%lx\n", tmd, tmd->head, *((unsigned long *) tmd->dma_base)); -+ else -+ __EIP_DBG_PRINTF("TMD [%p] : head[%p] NO DATA !!!\n", tmd, tmd->head); -+ __EIP_DBG_PRINTF("TMD [%p] : DMA(%lx,%d,%d) ebase[%x]\n",tmd, tmd->dma_base, tmd->dma_len, tmd->nmd.nmd_len, -+ tmd->nmd.nmd_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("TMD IS NULL - NO TMD TO DISPLAY\n"); -+ -+} -+static void eip_ipf_display(EIP_IPFRAG * ipf) -+{ -+ if (ipf) { -+ __EIP_DBG_PRINTF("IPF[%p] : datagram len %d dma correction %d uts %lx frag_nr %d\n", ipf, ipf->datagram_len, -+ ipf->dma_correction, ipf->timestamp.tv_usec, ipf->frag_nr); -+ eip_tmd_display((EIP_TMD *) ipf); -+ return; -+ } -+ EIP_ERR_PRINTF("IPF IS NULL - NO IPF TO DISPLAY\n"); -+} -+ -+static void eip_tmd_head_display(EIP_TMD_HEAD * head) -+{ -+ if (head) { -+ __EIP_DBG_PRINTF("TMD HEAD [%p] : handle[%p] tmds[%p] %3.3d/%3.3d/%3.3d\n", head, head->handle, head->tmd, -+ EIP_STAT_QUEUED_GET(&head->stats), EIP_STAT_ALLOC_GET(&head->stats), -+ eip_tx->tmd_max_nr); -+ return; -+ } -+ EIP_ERR_PRINTF("TMD HEAD IS NULL - NO TMD HEAD TO DISPLAY\n"); -+} -+static void eip_rmd_display(EIP_RMD * rmd) -+{ -+ if (rmd) { -+ __EIP_DBG_PRINTF("RMD [%p] : next[%p] rxd[%p] DVMA[%d]\n", rmd, rmd->chain.next, rmd->rxd, rmd->dvma_idx); -+ __EIP_DBG_PRINTF("RMD [%p] : head[%p]\n", rmd, rmd->head); -+ __EIP_DBG_PRINTF("RMD [%p] : ebase[%x]\n", rmd, rmd->nmd.nmd_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("RMD IS NULL - NO RMD TO DISPLAY\n"); -+} -+static void eip_rmd_head_display(EIP_RMD_HEAD * head) -+{ -+ if (head) { -+ __EIP_DBG_PRINTF("RMD HEAD [%p] : rcvr[%p] handle[%p] busy list[%p]\n", head, head->rcvr, head->handle, head->busy_list); -+ __EIP_DBG_PRINTF("RMD HEAD [%p] : %3.3d/%3.3d/%3.3d\n", head, -+ EIP_STAT_QUEUED_GET(&head->stats), EIP_STAT_ALLOC_GET(&head->stats), eip_rx->rmd_max_nr); -+ return; -+ } -+ EIP_ERR_PRINTF("RMD HEAD IS NULL - NO RMD HEAD TO DISPLAY\n"); -+} -+ -+/* END - DISPLAY FUNCTIONS */ -+static inline EP_NMH *eip_dma_reserve(int pages_nr, int perm) -+{ -+ EP_NMH *handle = ep_dvma_reserve(eip_tx->ep_system, pages_nr, perm); -+ -+ if (handle) -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "HANDLE [%p] %d pages of elan address space reserved\n", -+ handle, pages_nr); -+ else -+ EIP_ERR_PRINTF("cannot reserve %d page(s) of elan address space\n", pages_nr); -+ -+ return handle; -+} -+ -+static inline void __eip_tmd_load(EIP_TMD * tmd, EP_RAILMASK *rmask) -+{ -+ EIP_ASSERT(tmd->nmd.nmd_len > 0); -+ -+ ep_dvma_load(eip_tx->ep_system, NULL, (caddr_t) tmd->dma_base, tmd->nmd.nmd_len, tmd->head->handle, -+ tmd->dvma_idx, rmask, &tmd->nmd); -+} -+ -+static inline void __eip_tmd_unload(EIP_TMD * tmd) -+{ -+ EIP_ASSERT(tmd->nmd.nmd_addr && tmd->head->handle); -+ -+ ep_dvma_unload(eip_tx->ep_system, tmd->head->handle, &tmd->nmd); -+ tmd->nmd.nmd_addr = 0; -+} -+static inline unsigned long eip_buff_alloc(int buff_len, int gfp) -+{ -+ unsigned long buff_base = (buff_len < PAGE_SIZE) ? -+ (unsigned long) kmalloc(buff_len, gfp) : -+ __get_dma_pages(gfp, get_order(buff_len)); -+ -+ if (likely(buff_base)) -+ return buff_base; -+ -+ EIP_ERR_PRINTF("cannot allocate %db of memory\n", buff_len); -+ return 0; -+} -+static inline void eip_buff_free(unsigned long buff_base, int buff_len) -+{ -+ (buff_len < PAGE_SIZE) ? kfree((void *) buff_base) : -+ free_pages(buff_base, get_order(buff_len)); -+} -+static struct iphdr *eip_ipfrag_get(char *data) -+{ -+ struct ethhdr *eh = (struct ethhdr *) (data); -+ struct iphdr *iph; -+ -+ if (eh->h_proto == htons(ETH_P_IP)) { -+ iph = (struct iphdr *) ((char *) eh + ETH_HLEN); -+ -+ /* EIP_DBG(eip_iph_display(iph)); */ -+ -+ if ((iph->frag_off & htons(IP_MF | IP_OFFSET))) -+ return iph; -+ } -+ return NULL; -+} -+ -+static inline void eip_rmd_free(EIP_RMD * rmd) -+{ -+ EIP_ASSERT2(rmd->nmd.nmd_addr == 0, eip_rmd_display, rmd); -+ -+ if ( rmd->skb != NULL) -+ kfree_skb (rmd->skb); -+ -+ kfree(rmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_MEMFREE, "RMD [%p] : FREED\n", rmd); -+} -+static inline void eip_skb_load(EIP_RMD * rmd) -+{ -+ EP_RAILMASK rmask = rmd->rxd ? ep_rxd_railmask (rmd->rxd) : 0; -+ -+ EIP_ASSERT(skb_tailroom(rmd->skb) > 0); -+ -+ ep_dvma_load(eip_tx->ep_system, NULL, (caddr_t) rmd->skb->data, skb_tailroom(rmd->skb), rmd->head->handle, -+ rmd->dvma_idx, &rmask, &rmd->nmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_EP_DVMA, "RMD [%p] : LOADED\n", rmd); -+} -+static inline void eip_skb_unload(EIP_RMD * rmd) -+{ -+ EIP_ASSERT(rmd->nmd.nmd_addr && rmd->head->handle); -+ -+ ep_dvma_unload(eip_tx->ep_system, rmd->head->handle, &rmd->nmd); -+ rmd->nmd.nmd_addr = 0; -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_EP_DVMA, "RMD [%p] : UNLOADED\n", rmd); -+} -+static inline void eip_rmd_requeue(EIP_RMD * rmd) -+{ -+ EIP_ASSERT(rmd->rxd); -+ -+ rmd->chain.next = NULL; -+ -+ ep_requeue_receive(rmd->rxd, eip_rxhandler, rmd, &rmd->nmd, EP_NO_ALLOC|EP_NO_SLEEP ); -+ -+ atomic_inc(&rmd->head->stats); -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "RMD [%p] : REQUEUED\n", rmd); -+} -+static EIP_RMD * eip_rmd_alloc(int svc, int gfp) -+{ -+ int buff_len = EIP_SVC_SMALLEST_LEN << svc; -+ EIP_RMD *rmd; -+ struct sk_buff *skb; -+ -+ if (!(skb = alloc_skb((buff_len - EIP_EXTRA), gfp))) -+ return NULL; -+ -+ skb_reserve(skb, 2); -+ -+ if (!(rmd = (EIP_RMD *) kmalloc(buff_len, gfp))) { -+ kfree_skb(skb); -+ return NULL; -+ } -+ -+ rmd->skb = skb; -+ -+ rmd->chain.next = NULL; -+ rmd->rxd = NULL; -+ rmd->head = &eip_rx->head[svc]; -+ -+ return rmd; -+} -+ -+static int eip_rmd_alloc_replace(EIP_RMD *rmd, int svc, int gfp) -+{ -+ struct sk_buff *skb,*old; -+ int buff_len = EIP_SVC_SMALLEST_LEN << svc; -+ -+ if (!(skb = alloc_skb(buff_len, gfp))) -+ return 1; -+ -+ skb_reserve(skb, 2); -+ -+ eip_skb_unload(rmd); -+ -+ old = rmd->skb; -+ rmd->skb = skb; -+ -+ eip_skb_load(rmd); -+ -+ eip_rmd_requeue(rmd); -+ -+ kfree_skb(old); -+ -+ return 0; -+} -+ -+static int eip_rmd_alloc_queue(int svc, int dvma_idx, int gfp, int attr) -+{ -+ EIP_RMD * rmd = eip_rmd_alloc(svc, gfp); -+ -+ if (!rmd) -+ return 1; -+ -+ EIP_STAT_ALLOC_ADD(&rmd->head->stats, 1); -+ -+ rmd->dvma_idx = dvma_idx; -+ eip_skb_load(rmd); -+ -+ EIP_DBG2(EIP_DBG_RMD, eip_rmd_display, rmd, "RMD [%p] : ALLOCATED for SVC 0x%x\n", rmd, svc); -+ -+ if (ep_queue_receive(rmd->head->rcvr, eip_rxhandler, (void *) rmd, &rmd->nmd, attr) == ESUCCESS) { -+ atomic_inc(&rmd->head->stats); -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "RMD [%p] : QUEUED on SVC 0x%x\n", rmd, svc); -+ return 0; -+ } -+ -+ EIP_ERR_PRINTF("RMD [%p] : couldn't be QUEUED on SVC 0x%x\n", rmd, svc); -+ -+ EIP_STAT_ALLOC_SUB(&rmd->head->stats, 1); -+ -+ eip_skb_unload(rmd); -+ eip_rmd_free(rmd); -+ -+ return 1; -+} -+ -+static int eip_rmds_alloc(void) -+{ -+ int idx, svc; -+ -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ eip_rx->head[svc].rcvr = ep_alloc_rcvr(eip_tx->ep_system, EIP_SVC_EP(svc), rx_envelope_nr); -+ if (!eip_rx->head[svc].rcvr) { -+ EIP_ERR_PRINTF("Cannot install receiver for SVC 0x%x - maybe cable is disconnected\n", svc); -+ return -EAGAIN; -+ } -+ -+ eip_rx->head[svc].handle = -+ eip_dma_reserve(EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)) * eip_rx->rmd_max_nr, -+ EP_PERM_WRITE); -+ if (!eip_rx->head[svc].handle) -+ return -ENOMEM; -+ -+ EIP_DBG(EIP_DBG_RMD_HEAD, eip_rmd_head_display, &eip_rx->head[svc]); -+ -+ for (idx = 0; idx < EIP_RMD_NR; idx++) { -+ if (eip_rmd_alloc_queue(svc, idx * EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)), -+ GFP_KERNEL, EP_NO_SLEEP)) -+ return -ENOMEM; -+ } -+ } -+ return 0; -+} -+static void eip_rmds_free(void) -+{ -+ unsigned long flags; -+ EIP_RMD *rmd; -+ int svc; -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ rmd = eip_rx->irq_list; -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ eip_rmd_reclaim(rmd); -+ -+ for (svc = 0; svc < EIP_SVC_NR ; svc++) { -+ -+ while ((rmd = eip_rx->head[svc].busy_list)) { -+ eip_rx->head[svc].busy_list = NULL; -+ eip_rmd_reclaim(rmd); -+ if (eip_rx->head[svc].busy_list) { -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "Still RMD [%p] on BUSY list SVC 0x%d - Scheduling\n", rmd, svc); -+ schedule(); -+ } -+ } -+ -+ EIP_ASSERT(EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats) == EIP_STAT_ALLOC_GET(&eip_rx->head[svc].stats)); -+ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "HEAD[%p] : FREEING RCVR [%p]\n", &eip_rx->head[svc], -+ eip_rx->head[svc].rcvr); -+ -+ ep_free_rcvr(eip_rx->head[svc].rcvr); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "HEAD[%p] : RELEASING DVMA [%p]\n", &eip_rx->head[svc], -+ eip_rx->head[svc].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_rx->head[svc].handle); -+ } -+ -+} -+static int eip_rx_queues_low (void) { -+ int svc; -+ for (svc = 0; svc < EIP_SVC_NR; svc++) -+ if (EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats) < EIP_RMD_ALLOC_THRESH) -+ return (1); -+ return (0); -+} -+static void eip_rxhandler(EP_RXD * rxd) -+{ -+ EIP_RMD *rmd = (EIP_RMD *) ep_rxd_arg(rxd); -+ EP_STATUS ret = ep_rxd_status(rxd); -+ EP_PAYLOAD * payload = ep_rxd_payload(rxd); -+ unsigned long data = (unsigned long) rmd->skb->data; -+ int frag_nr = 0; -+ int len; -+ -+ struct sk_buff *skb; -+ static char count = 0; -+ -+ atomic_dec(&rmd->head->stats); -+ rmd->rxd = rxd; -+ -+ if (likely(ret == EP_SUCCESS)) { -+ -+ rmd->head->dma++; -+ -+ if ( eip_rx_dropping) { -+ eip_rmd_requeue(rmd); -+ return; -+ } -+ -+ len = (payload) ? payload->Data[frag_nr++] : ep_rxd_len(rxd); -+ -+ EIP_DBG(EIP_DBG_RMD, eip_rmd_display, rmd); -+ -+again: -+ if ( (skb = skb_clone(rmd->skb, GFP_ATOMIC)) ) { -+ unsigned int off = (data - (unsigned long) rmd->skb->data); -+ -+ /* have to set the length before calling -+ * skb pull as it will not allow you to -+ * pull past the end */ -+ -+ skb_put (skb, off + len); -+ skb_pull (skb, off); -+ -+ skb->protocol = eth_type_trans(skb, eip_rx->net_device); -+ skb->ip_summed = eip_checksum_state; -+ skb->dev = eip_rx->net_device; -+ -+ /* Fabien/David/Mike this is a hack/fix to allow aggrigation of packets to work. -+ * The problem is ip_frag looks at the truesize to see if it is caching too much space. -+ * As we are reusing a large skb (cloned) for a number of small fragments, they appear to take up alot of space. -+ * so ip_frag dropped them after 4 frags (not good). So we lie and set the truesize to just bigger than the data. -+ */ -+ if (payload) -+ skb->truesize = SKB_DATA_ALIGN(skb->len + EIP_HEADER_PAD) +sizeof(struct sk_buff); -+ -+ } -+ if ( (skb) && -+ (netif_rx(skb) != NET_RX_DROP)){ -+ -+ eip_rx->bytes += len; -+ -+ if (payload && payload->Data[frag_nr] ) { -+ data += EIP_IP_ALIGN(len); -+ len = payload->Data[frag_nr++]; -+ goto again; -+ } -+ eip_rx->packets += ++frag_nr; -+ } else if ( (eip_rx->dropped++ % 20) == 0) -+ __EIP_DBG_PRINTK("Packet dropped by the TCP/IP stack - increase /proc/sys/net/core/netdev_max_backlog\n"); -+ } else if (ret == EP_SHUTDOWN ) { -+ EIP_DBG2(EIP_DBG_RMD, eip_rmd_display, rmd, "ABORTING\n"); -+ ep_complete_receive(rxd); -+ eip_skb_unload(rmd); -+ EIP_STAT_ALLOC_SUB(&rmd->head->stats, 1); -+ eip_rmd_free(rmd); -+ return; -+ } else { -+ EP_ENVELOPE *env = ep_rxd_envelope(rxd); -+ EP_NMD *nmd ; -+ -+ EIP_ERR_PRINTF("RMD[%p] : RECEIVE ret = %d\n", rmd, ret); -+ -+ for (len = 0 ; len < env->nFrags ; len++) { -+ nmd = &env->Frags[len]; -+ EIP_ERR_PRINTF("RMD[%p] : ep_frag #%d nmd_addr [%x] nmd_len %d\n", rmd, len, -+ (unsigned int) nmd->nmd_addr, nmd->nmd_len); -+ } -+ eip_rx->errors++; -+ EIP_ASSERT2(atomic_read(&skb_shinfo(rmd->skb)->dataref) == 1, eip_rmd_display, rmd); -+ } -+ -+ /* data is used to store the irq flags */ -+ spin_lock_irqsave(&eip_rx->lock, data); -+ rmd->chain.next = eip_rx->irq_list; -+ eip_rx->irq_list = rmd; -+ eip_rx->irq_list_nr++; -+ spin_unlock_irqrestore(&eip_rx->lock, data); -+ -+ if ( !timer_pending (&eip_rx_tasklet_timer) /* the timer not already set */ -+ && ( (count++ % eip_rx->sysctl_granularity) /* and either we have passed up a number of them */ -+ || eip_rx_queues_low() )) /* or we are low */ -+ mod_timer (&eip_rx_tasklet_timer, lbolt + 1); -+} -+ -+/* dest ; if the buffer still reference on it mocve the rmd to the dest list */ -+static void eip_rmd_reclaim(EIP_RMD *rmd) -+{ -+ EIP_RMD *rmd_next = rmd; -+ int dataref; -+ -+ while (rmd_next) { -+ rmd = rmd_next; -+ rmd_next = rmd_next->chain.next; -+ -+ dataref = atomic_read(&skb_shinfo(rmd->skb)->dataref); -+ EIP_ASSERT(dataref > 0); -+ -+ if (dataref == 1) { -+ eip_rmd_requeue(rmd); -+ } else { -+ rmd->chain.next = rmd->head->busy_list; -+ rmd->head->busy_list = rmd; -+ } -+ } -+} -+static void eip_rx_tasklet(unsigned long arg) -+{ -+ EIP_RMD *rmd, *rmd_next; -+ unsigned long flags; -+ short svc, queued; -+ int needs_reschedule; -+ -+ if (eip_rx_tasklet_locked) /* we dont want the tasklet to do anything when we are finishing */ -+ return; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ rmd = eip_rx->head[svc].busy_list; -+ eip_rx->head[svc].busy_list = NULL; -+ eip_rmd_reclaim(rmd); -+ } -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ rmd = eip_rx->irq_list; -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ eip_rmd_reclaim(rmd); -+ -+ needs_reschedule = 0; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ /* the plan is : allocate some more if possible or steall some dvma space from those on the EIP_BUSY_LIST */ -+ queued = EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats); -+ -+ EIP_ASSERT(queued >= 0 && queued <= EIP_RMD_MAX_NR); -+ -+ if (queued < EIP_RMD_ALLOC_THRESH) { -+ short allocated = EIP_STAT_ALLOC_GET(&eip_rx->head[svc].stats); -+ short how_many; -+ -+ EIP_ASSERT(allocated >= 0 && allocated <= EIP_RMD_MAX_NR); -+ -+ if (likely(allocated < eip_rx->rmd_max_nr)) { -+ -+ how_many = (((allocated / EIP_RMD_ALLOC_STEP) + 1) * EIP_RMD_ALLOC_STEP); -+ if (how_many > eip_rx->rmd_max_nr) -+ how_many = eip_rx->rmd_max_nr; -+ -+ for (; allocated < how_many && -+ (eip_rmd_alloc_queue(svc, allocated * EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)), -+ GFP_ATOMIC, EP_NO_ALLOC|EP_NO_SLEEP) == 0) ; allocated++); -+ if ( allocated != how_many ) { -+ eip_rx->reschedule++; -+ needs_reschedule = 1; -+ } -+ } else { -+ /* steal how_many rmds and put them on the aside list */ -+ how_many = EIP_RMD_ALLOC_THRESH - queued; -+ -+ EIP_ASSERT(how_many >= 0 && how_many <= EIP_RMD_ALLOC_THRESH); -+ -+ rmd_next = eip_rx->head[svc].busy_list; -+ eip_rx->head[svc].busy_list = NULL; -+ -+ while (how_many-- && rmd_next) { -+ rmd = rmd_next; -+ rmd_next = rmd_next->chain.next; -+ -+ if (eip_rmd_alloc_replace(rmd, svc, GFP_ATOMIC)) { -+ rmd_next = rmd; -+ break; -+ } -+ } -+ eip_rx->head[svc].busy_list = rmd_next; -+ if ( how_many ) -+ needs_reschedule = 1; -+ } -+ } -+ } -+ -+ if ( needs_reschedule && ( !timer_pending (&eip_rx_tasklet_timer))) -+ mod_timer (&eip_rx_tasklet_timer, lbolt + 2); -+} -+ -+static inline void eip_tmd_init(EIP_TMD * tmd, unsigned long buff_base, EIP_TMD_HEAD * head, unsigned long buff_len, -+ int dvma_idx) -+{ -+ tmd->dvma_idx = dvma_idx; -+ tmd->dma_base = buff_base; -+ tmd->dma_len = -1; -+ tmd->skb = NULL; -+ tmd->head = head; -+ tmd->chain.next = NULL; -+ -+ if (tmd->head != &eip_tx->head[EIP_TMD_STD]) { -+ tmd->nmd.nmd_len = buff_len; -+ eip_tmd_load(tmd); -+ } else { -+ tmd->nmd.nmd_len = -1; -+ tmd->nmd.nmd_addr = 0; -+ } -+} -+ -+static inline EIP_TMD *eip_tmd_get(int id) -+{ -+ unsigned long flags; -+ EIP_TMD *tmd = NULL; -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ while ((tmd = eip_tx->head[id].tmd) == NULL) { -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ if (ep_enable_txcallbacks(eip_tx->xmtr) == 0) { -+ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ if (eip_tx->head[id].tmd == NULL) { -+ __EIP_DBG_PRINTF("Cannot get a TMD on head %d ... stopping queue\n", id); -+ -+ eip_stop_queue (); -+ -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ return NULL; -+ } -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ } -+ -+ ep_disable_txcallbacks(eip_tx->xmtr); -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ } -+ eip_tx->head[id].tmd = tmd->chain.next; -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ atomic_dec(&tmd->head->stats); -+ return tmd; -+} -+ -+static inline void eip_tmd_put(EIP_TMD * tmd) -+{ -+ unsigned long flags; -+ -+ tmd->skb = NULL; -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ tmd->chain.next = tmd->head->tmd; -+ tmd->head->tmd = tmd; -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ atomic_inc(&tmd->head->stats); -+ -+ eip_start_queue(); -+ -+ EIP_DBG_PRINTF(EIP_DBG_TMD_QUEUE, "TMD [%p] : REQUEUED\n", tmd); -+} -+static inline void eip_tmd_load(EIP_TMD * tmd) -+{ -+ EP_RAILMASK rmask = tx_railmask; -+ -+ __eip_tmd_load(tmd, &rmask); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "TMD [%p] : LOADED\n", tmd); -+} -+static inline void eip_tmd_unload(EIP_TMD * tmd) -+{ -+ __eip_tmd_unload(tmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "TMD [%p] : UNLOADED\n", tmd); -+} -+static inline void eip_tmd_free(EIP_TMD * tmd) -+{ -+ eip_buff_free(tmd->dma_base, tmd->nmd.nmd_len); -+ -+ EIP_DBG_PRINTF(EIP_DBG_MEMFREE, "TMD [%p] : FREED\n", tmd); -+ -+ EIP_STAT_ALLOC_SUB(&tmd->head->stats, 1); -+} -+ -+/* tmd on a separate block */ -+static inline EIP_TMD *eip_tmd_alloc_queue(EIP_TMD * tmd, EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ eip_tmd_init(tmd, 0, head, -1, dvma_idx); -+ -+ eip_tmd_put(tmd); -+ -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+/* tmd on the buffer */ -+static inline EIP_TMD *eip_tmd_alloc_queue_copybreak(EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ EIP_TMD *tmd; -+ unsigned long buff_base; -+ -+ if (!(buff_base = eip_buff_alloc(tx_copybreak_max + sizeof(EIP_TMD), GFP_KERNEL))) -+ return NULL; -+ -+ tmd = (EIP_TMD *) (buff_base + tx_copybreak_max); -+ eip_tmd_init(tmd, buff_base, head, tx_copybreak_max, dvma_idx); -+ -+ eip_tmd_put(tmd); -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+ -+/* ipf are on the buffer */ -+static inline EIP_TMD *eip_tmd_alloc_queue_aggreg(EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ EIP_TMD *tmd; -+ unsigned long buff_base; -+ -+ if (!(buff_base = eip_buff_alloc(EIP_SVC_BIGGEST_LEN, GFP_KERNEL))) -+ return NULL; -+ -+ tmd = (EIP_TMD *) (buff_base + EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG)); -+ eip_tmd_init(tmd, buff_base, head, EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG), dvma_idx); -+ -+ eip_tmd_put(tmd); -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+ -+static int eip_tmds_alloc() -+{ -+ int i; -+ int page_nr; -+ EIP_TMD *tmd; -+ -+ page_nr = EIP_DVMA_PAGES(tx_copybreak_max); -+ -+ eip_tx->head[EIP_TMD_COPYBREAK].handle = eip_dma_reserve(page_nr * eip_tx->tmd_max_nr, EP_PERM_READ); -+ -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_COPYBREAK]); -+ -+ for (i = 0; i < EIP_TMD_NR; i++) { -+ if (!eip_tmd_alloc_queue_copybreak(&eip_tx->head[EIP_TMD_COPYBREAK], i * page_nr)) -+ return -ENOMEM; -+ } -+ -+ eip_tx->head[EIP_TMD_STD].handle = -+ eip_dma_reserve(EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN) * eip_tx->tmd_max_nr, EP_PERM_READ); -+ -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_STD]); -+ -+ tmd = kmalloc(sizeof(EIP_TMD) * EIP_TMD_NR, GFP_KERNEL); -+ if (!tmd) { -+ EIP_ERR_PRINTF("Cannot ALLOCATE %d of tmds\n", (int) sizeof(EIP_TMD) * EIP_TMD_NR); -+ return -ENOMEM; -+ } -+ -+ page_nr = EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN); -+ -+ for (i = 0; i < EIP_TMD_NR; i++, tmd++) { -+ if (!eip_tmd_alloc_queue(tmd, &eip_tx->head[EIP_TMD_STD], i * page_nr)) -+ return -ENOMEM; -+ } -+ -+ page_nr = EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN); -+ -+ eip_tx->head[EIP_TMD_AGGREG].handle = eip_dma_reserve(page_nr * eip_tx->tmd_max_nr, EP_PERM_READ); -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_AGGREG]); -+ -+ for (i = 0; i < EIP_TMD_NR; i++) { -+ if (!eip_tmd_alloc_queue_aggreg(&eip_tx->head[EIP_TMD_AGGREG], i * page_nr)) -+ return -ENOMEM; -+ } -+ return 0; -+} -+ -+static void eip_tmds_free(void) -+{ -+ EIP_TMD *tmd; -+ EIP_TMD *tmd_next; -+ int i; -+ -+ ep_poll_transmits(eip_tx->xmtr); -+ -+ for (i = 0 ; i < 3 ; i++) { -+again: -+ if (EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats) < EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats)) { -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "Polling XMTR [%p]\n", eip_tx->xmtr); -+ ep_poll_transmits(eip_tx->xmtr); -+ goto again; -+ } -+ } -+ /* everything should be queued */ -+ if ((tmd = eip_tx->head[EIP_TMD_COPYBREAK].tmd)) { -+ do { -+ tmd_next = tmd->chain.next; -+ eip_tmd_unload(tmd); -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ -+ eip_tmd_free(tmd); -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "HEAD[EIP_TMD_COPYBREAK] release DVMA [%p]\n", -+ eip_tx->head[EIP_TMD_COPYBREAK].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_COPYBREAK].handle); -+ -+ /* these ones have been allocated as a block */ -+ if ((tmd = eip_tx->head[EIP_TMD_STD].tmd)) { -+ do { -+ if (tmd->dvma_idx == 0 ) { -+ kfree(tmd); -+ /* eip_tmd_free(tmd); */ -+ EIP_STAT_ALLOC_SUB(&tmd->head->stats, EIP_TMD_NR); -+ tmd_next = NULL; -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "TMD HEAD[%p] : [EIP_TMD_STD] BLOCK FREED\n", tmd); -+ } else -+ tmd_next = tmd->chain.next; -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "HEAD[EIP_TMD_STD] release DVMA [%p]\n", -+ eip_tx->head[EIP_TMD_STD].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_STD].handle); -+ -+ if ((tmd = eip_tx->head[EIP_TMD_AGGREG].tmd)) { -+ do { -+ tmd_next = tmd->chain.next; -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ -+ eip_tmd_unload(tmd); -+ eip_tmd_free(tmd); -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "TMD HEAD[%p] : [EIP_TMD_AGGREG] release DVMA\n", -+ eip_tx->head[EIP_TMD_AGGREG].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_AGGREG].handle); -+ -+ ep_free_xmtr(eip_tx->xmtr); -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "XMTR[%p] : FREED\n", eip_tx->xmtr); -+} -+ -+static inline void eip_ipf_skb_add(EIP_IPFRAG * ipf, struct sk_buff *skb) -+{ -+ int align = EIP_IP_ALIGN(skb->len); -+ -+ -+ if (ipf->dma_len == -1) { /* like a virgin; touched for the very first time */ -+ do_gettimeofday(&ipf->timestamp); -+ /* FIXE ME put that in release tmd code */ -+ ipf->frag_nr = 0; -+ ipf->dma_len = 0; -+ ipf->datagram_len = -1; -+ ipf->dma_correction = 0; -+ } -+ -+ memcpy((void *) (ipf->dma_base + ipf->dma_len), skb->data, skb->len); -+ -+ if (ipf->datagram_len == -1) { -+ struct iphdr * iph = skb->nh.iph; -+ int offset = ntohs(iph->frag_off); -+ -+ /* last one ? ; offset & ~IP_OFFSET = IP fragment flags */ -+ if (((offset & ~IP_OFFSET) & IP_MF) == 0) { -+ offset &= IP_OFFSET; -+ offset <<= 3; -+ ipf->datagram_len = offset + htons(iph->tot_len) - sizeof(struct iphdr); -+ } -+ } -+ -+ skb->next = ipf->skb; -+ ipf->skb = skb; -+ ipf->payload.Data[ipf->frag_nr] = skb->len; -+ ipf->dma_len += align; -+ ipf->dma_correction += align - skb->len + ETH_HLEN + sizeof(struct iphdr); -+ /* FIXME ; Count got wrong if ip header has options */ -+ -+ ipf->frag_nr++; -+ -+ EIP_DBG2(EIP_DBG_TMD, eip_ipf_display, ipf, "ADDED skb[%p] len %db ALIGNED(%db)\n", skb, skb->len, EIP_IP_ALIGN(skb->len)); -+} -+ -+#define eip_ipf_hasroom(ipf, skb) ((ipf->dma_len + EIP_IP_ALIGN(skb->len) < eip_tx->sysctl_ipfrag_copybreak)) -+int eip_hard_start_xmit(struct sk_buff *skb, struct net_device *devnet) -+{ -+ -+ EIP_TMD *tmd; -+ EP_NMD nmd; -+ struct iphdr *iph; -+ int j; -+ -+ if (skb->destructor){ -+ atomic_inc(&eip_tx->destructor); -+ tasklet_schedule(&eip_tx->tasklet); -+ } -+ -+ if (!(iph = eip_ipfrag_get(skb->data)) || (eip_tx->sysctl_aggregation == 0)) { /* not ip fragment */ -+no_aggreg: -+ j = (skb->len < eip_tx->sysctl_copybreak) ? EIP_TMD_COPYBREAK : EIP_TMD_STD; /* j = head id */ -+ -+ if (!(tmd = eip_tmd_get(j))) { -+ if (skb->destructor) -+ atomic_dec(&eip_tx->destructor); -+ return 1; -+ } -+ -+ tmd->dma_len = skb->len; -+ tmd->skb = skb; -+ tmd->skb->next = NULL; -+ tmd->chain.next = NULL; -+ -+ if (j == EIP_TMD_COPYBREAK) { -+ memcpy((void *) tmd->dma_base, skb->data, skb->len); -+ -+ ep_nmd_subset(&nmd, &tmd->nmd, 0, skb->len); -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak++; -+#endif -+ return eip_do_xmit(tmd, &nmd, NULL); -+ } -+ tmd->dma_base = (unsigned long) skb->data; -+ tmd->nmd.nmd_len = skb->len; -+ eip_tmd_load(tmd); -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_std++; -+#endif -+ return eip_do_xmit(tmd, &tmd->nmd, NULL); -+ } else if ( skb->len > EIP_SVC_BIGGEST_LEN/2 ) { -+ /* don't aggregate when we have a full mtu of data */ -+ /* or more than 32k ; in this case it is cheaper */ -+ /* to just map the buffer and send it */ -+ goto no_aggreg; -+ } else { -+ EIP_IPFRAG *ipf = NULL; -+ unsigned long flags; -+ struct list_head *l; -+ struct iphdr *iph2; -+ int i; -+ __u16 id = iph->id; -+ __u32 saddr = iph->saddr; -+ __u32 daddr = iph->daddr; -+ __u8 protocol = iph->protocol; -+ -+ EIP_DBG(EIP_DBG_IPH, eip_iph_display, iph); -+ -+ j = 0; -+ -+ /* here we can't have full mtu size aggregated packet */ -+ EIP_ASSERT_RET(skb->len < eip_tx->sysctl_ipfrag_copybreak, 0); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ list_for_each(l, &eip_tx->ipfrag) { -+ ipf = list_entry(l, EIP_IPFRAG, list); -+ iph2 = eip_ipfrag_get((char *) ipf->dma_base); -+ -+ EIP_ASSERT(iph2); -+ -+ if ((iph2->id == id) && -+ (get_unaligned(&iph2->saddr) == saddr) && -+ (get_unaligned(&iph2->daddr) == daddr) && -+ (iph2->protocol == protocol)) { -+ /* || timeout */ -+ if (eip_ipf_hasroom(ipf, skb)) { -+ -+ eip_ipf_skb_add(ipf, skb); -+ -+ if ((ipf->datagram_len != -1) && -+ (ipf->dma_len == (ipf->datagram_len + ipf->dma_correction) || -+ ipf->frag_nr == (128 / sizeof(uint32_t)))) { -+send_aggreg: -+ ipf->payload.Data[ipf->frag_nr] = 0; -+ list_del(&ipf->list); -+ eip_tx->ipfrag_count--; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ -+ ep_nmd_subset(&nmd, &ipf->nmd, 0, ipf->dma_len); -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_aggreg++; -+#endif -+ if ((i = eip_do_xmit((EIP_TMD *) ipf, &nmd, &ipf->payload)) != EP_SUCCESS) -+ return i; -+ if (j) -+ goto new; -+ return 0; -+ } -+ -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ tasklet_schedule(&eip_tx->tasklet); -+ return 0; -+ } else { -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "IPF[%p] : FULL %db full - sending it\n", ipf, ipf->dma_len); -+ j = 1; -+ goto send_aggreg; -+ } -+ } -+ } -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+new: -+ if (!(ipf = (EIP_IPFRAG *) eip_tmd_get(EIP_TMD_AGGREG))) -+ goto no_aggreg; -+ -+ eip_ipf_skb_add(ipf, skb); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ list_add_tail(&ipf->list, &eip_tx->ipfrag); -+ eip_tx->ipfrag_count++; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ tasklet_schedule(&eip_tx->tasklet); -+ } -+ return 0; -+} -+static int eip_do_xmit(EIP_TMD * tmd, EP_NMD *nmd, EP_PAYLOAD *payload) -+{ -+ EIP_HEADER *eiph = (EIP_HEADER *) tmd->dma_base; -+ int attr = EP_SET_DATA((EP_NO_SLEEP | EP_NO_INTERRUPT | EP_NO_FAILOVER), EP_TYPE_SVC_INDICATOR, EP_SVC_EIP); -+ unsigned long flags; -+ int svc, rnum; -+ -+ SIZE_TO_SVC(nmd->nmd_len, svc); -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ /* EIP_DBG(eip_eiph_display(eiph)); */ -+ -+ if (unlikely (eiph->h_dhost.ip_bcast)) -+ rnum = ep_pickRail (EP_NMD_RAILMASK (nmd) & tx_railmask & ep_xmtr_availrails(eip_tx->xmtr)); -+ else -+ rnum = ep_pickRail (EP_NMD_RAILMASK (nmd) & tx_railmask & ep_xmtr_noderails(eip_tx->xmtr, ntohs(eiph->h_dhost.ip_addr))); -+ -+ if (rnum >= 0) -+ attr = EP_SET_PREFRAIL(attr, rnum); -+ -+ /* add to inuse list */ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ list_add_tail (&tmd->chain.link, &eip_tx->inuse); -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ /* ENOMEM EINVAL ECONNREFUSED ESUCCESS */ -+ svc = (unlikely(eiph->h_dhost.ip_bcast)) ? -+ ep_multicast_message(eip_tx->xmtr, -1, -1, NULL, EIP_SVC_EP(svc), attr | EP_NOT_MYSELF, eip_txhandler, tmd, payload, nmd, 1) : -+ -+ ep_transmit_message(eip_tx->xmtr, ntohs(eiph->h_dhost.ip_addr), EIP_SVC_EP(svc), attr, eip_txhandler, tmd, payload, nmd, 1); -+ -+ if (likely(svc == EP_SUCCESS)) -+ return 0; -+ else if (svc == ENOMEM) { -+ EIP_ERR_PRINTF("%s", "Memory allocation error ...\n"); -+ eip_tx->errors++; -+ } -+ else -+ { -+ /* EP_EINVAL occurs when the svc has a bad value or the iovec has too many frag; */ -+ /* we don't use the latter option here */ -+ __EIP_DBG_PRINTF("TMD [%p] : DROPPED skb[%p] status = %d from ep_?_message\n", tmd, tmd->skb, svc); -+ -+ eip_tx->dropped++; -+ } -+ -+ eip_txhandler(NULL, tmd, -99); -+ -+ /* Quadrics GNAT sw-elan/4397 - since we will "never" be able to send this packet to the */ -+ /* destination node, we drop it and feign success - this has the same behaviour as an */ -+ /* ethernet where it sticks the packet on the wire, but no-one receives it. */ -+ return 0; -+} -+ -+static void eip_txhandler(EP_TXD * txd, void *arg, EP_STATUS status) -+{ -+ EIP_TMD *tmd = (EIP_TMD *) arg; -+ struct sk_buff *skb_next; -+ unsigned long flags; -+ int svc = 0; -+ -+ if (likely(status == EP_SUCCESS)) { -+ SIZE_TO_SVC(tmd->dma_len, svc); -+ eip_tx->dma[svc]++; -+ eip_tx->bytes += tmd->dma_len; -+ -+ if (tmd->head == &eip_tx->head[EIP_TMD_AGGREG]) { -+ EIP_IPFRAG *ipf = (EIP_IPFRAG *) tmd; -+ eip_tx->packets += ipf->frag_nr; -+ } else -+ eip_tx->packets++; -+ } else { -+ if (tmd->head == &eip_tx->head[EIP_TMD_AGGREG]) { -+ EIP_IPFRAG *ipf = (EIP_IPFRAG *) tmd; -+ eip_tx->dropped += ipf->frag_nr; -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "txhandler aggreg packet dropped status = %d\n", status); -+ } else { -+ eip_tx->dropped++; -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "txhandler packet dropped status = %d\n", status); -+ } -+ } -+ -+ if (tmd->head == &eip_tx->head[EIP_TMD_STD]) { -+ eip_tmd_unload(tmd); -+ tmd->dma_base = 0; -+ tmd->nmd.nmd_len = -1; -+ } -+ -+ tmd->dma_len = -1; -+ -+ svc = 0; -+ while (tmd->skb) { -+ svc++; -+ -+ if (tmd->skb->destructor) -+ atomic_dec(&eip_tx->destructor); -+ -+ skb_next = tmd->skb->next; -+ dev_kfree_skb_any(tmd->skb); -+ tmd->skb = skb_next; -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "IPF/TMD [%p] : %d skb RELEASE/FREED\n", tmd, svc); -+ -+ /* remove from inuse list */ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ list_del (&tmd->chain.link); -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ eip_tmd_put(tmd); -+} -+ -+static void eip_tx_tasklet(unsigned long arg) -+{ -+ struct timeval now; -+ unsigned long flags; -+ EIP_IPFRAG *ipf, *ipfq = NULL; -+ EP_NMD nmd; -+ struct list_head *list; -+ struct list_head *tmp; -+ char resched = 0; -+ char poll = 1; -+ -+ do_gettimeofday(&now); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ if (eip_tx->ipfrag_count) { -+ list_for_each_safe(list, tmp, &eip_tx->ipfrag) { -+ ipf = list_entry(list, EIP_IPFRAG, list); -+ /* delta = (((now.tv_sec - ipf->timestamp.tv_sec) * 1000000UL) + now.tv_usec) - ipf->timestamp.tv_usec; */ -+ if (((((now.tv_sec - ipf->timestamp.tv_sec) * 1000000UL) + now.tv_usec) - -+ ipf->timestamp.tv_usec) >= (1000UL * eip_tx->sysctl_ipfrag_to)) { -+ list_del(&ipf->list); -+ eip_tx->ipfrag_count--; -+ ipf->chain.next = (EIP_TMD *) ipfq; -+ ipfq = ipf; -+ } -+ } -+ } -+ if (eip_tx->ipfrag_count) -+ resched = 1; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ -+ while (ipfq) { -+ poll = 0; -+ -+ ep_nmd_subset(&nmd, &ipfq->nmd, 0, ipfq->dma_len); -+ -+ ipfq->payload.Data[ipfq->frag_nr] = 0; -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_aggreg++; -+#endif -+ ipf = (EIP_IPFRAG *) ipfq->chain.next; -+ eip_do_xmit((EIP_TMD *) ipfq, &nmd, &ipfq->payload); -+ ipfq = ipf; -+ } -+ -+ if (poll) -+ ep_poll_transmits(eip_tx->xmtr); -+ -+ if (atomic_read(&eip_tx->destructor) || resched ) -+ tasklet_schedule(&eip_tx->tasklet); -+} -+void eip_start_queue() -+{ -+ if (netif_queue_stopped(eip_tx->net_device)) { -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Waking up %s queue\n", eip_tx->net_device->name); -+ netif_wake_queue(eip_tx->net_device); -+ } -+} -+void eip_stop_queue() -+{ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Stopping %s queue\n", eip_tx->net_device->name); -+ netif_stop_queue(eip_tx->net_device); -+} -+ -+static int eip_open(struct net_device *devnet) -+{ -+ if (devnet->flags & IFF_PROMISC) -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "%s entering in promiscuous mode\n", devnet->name); -+ -+ netif_start_queue(devnet); -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x up\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ return 0; -+} -+ -+static int eip_close(struct net_device *devnet) -+{ -+ if (devnet->flags & IFF_PROMISC) -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "%s leaving promiscuous mode\n", devnet->name); -+ -+ netif_stop_queue(devnet); -+ -+ eip_rx_tasklet(0); -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x down\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ return 0; -+} -+ -+static struct net_device_stats *eip_get_stats(struct net_device *devnet) -+{ -+ static struct net_device_stats stats; -+ -+ stats.rx_packets = eip_rx->packets; -+ stats.rx_bytes = eip_rx->bytes; -+ stats.rx_errors = eip_rx->errors; -+ stats.rx_dropped = eip_rx->dropped; -+ -+ stats.tx_packets = eip_tx->packets; -+ stats.tx_bytes = eip_tx->bytes; -+ stats.tx_errors = eip_tx->errors; -+ stats.tx_dropped = eip_tx->dropped; -+ return &stats; -+} -+ -+static int eip_change_mtu(struct net_device *devnet, int mtu) -+{ -+ if (mtu <= EIP_MTU_MAX) { -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "MTU size changed from %d to %d\n", devnet->mtu, mtu); -+ devnet->mtu = mtu; -+ } -+ return 0; -+} -+ -+#ifdef MODULE -+int eip_init(void) -+{ -+ struct net_device *devnet; -+ int errno = 0; -+ -+ eip_rx_dropping = 0; -+ eip_rx_tasklet_locked = 1; -+ -+ /* timer up but not started */ -+ init_timer (&eip_rx_tasklet_timer); -+ eip_rx_tasklet_timer.function = eip_rx_tasklet; -+ eip_rx_tasklet_timer.data = (unsigned long) 0; -+ eip_rx_tasklet_timer.expires = lbolt + hz; -+ -+ devnet = alloc_etherdev(sizeof(EIP_RX) + sizeof(EIP_TX)); -+ if (!devnet) { -+ EIP_ERR_PRINTF("Unable to ALLOCATE etherdev structure\n"); -+ return -ENOMEM; -+ } -+ strcpy (devnet->name, "eip0"); -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Enabling aggregation code\n"); -+ devnet->change_mtu = eip_change_mtu; -+ devnet->mtu = EIP_MTU_MAX; -+ devnet->open = eip_open; -+ devnet->stop = eip_close; -+ devnet->hard_start_xmit = eip_hard_start_xmit; -+ devnet->get_stats = eip_get_stats; -+ -+ /* devnet->features |= (NETIF_F_DYNALLOC); */ -+ /* devnet->features = (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA); */ -+ /* devnet->features |= (NETIF_F_SG|NETIF_F_FRAGLIST|NETIF_F_HIGHDMA|NETIF_F_HW_CSUM); */ -+ -+ eip_rx = (EIP_RX *) devnet->priv; -+ eip_tx = (EIP_TX *) (eip_rx + 1); -+ -+ /* instance 0 */ -+ eip_tx->ep_system = ep_system(); -+ if (eip_tx->ep_system == NULL) { -+ EIP_ERR_PRINTF("kernel comms for iface %s does not exist\n", devnet->name); -+ errno = -ENXIO; -+ goto out; -+ } -+ if (ep_waitfor_nodeid(eip_tx->ep_system) == ELAN_INVALID_NODE) { -+ EIP_ERR_PRINTF("network position not found\n"); -+ errno = -EAGAIN; -+ goto out; -+ } -+ eip_tx->xmtr = ep_alloc_xmtr(eip_tx->ep_system); -+ if (!eip_tx->xmtr) { -+ EIP_ERR_PRINTF("Cannot create allocated transmitter - maybe cable is disconnected\n"); -+ errno = -EAGAIN; -+ goto out; -+ } -+ /* assign MAC address */ -+ *((int *) &devnet->dev_addr[4]) = htons(ep_nodeid(eip_tx->ep_system)); -+ eip_rx->net_device = devnet; -+ eip_tx->net_device = devnet; -+ -+ atomic_set(&eip_tx->destructor, 0); -+ -+ if ((tmd_max >= EIP_TMD_MIN_NR) && (tmd_max <= EIP_TMD_MAX_NR)) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting tmd_max_nr to %d\n", tmd_max); -+ eip_tx->tmd_max_nr = tmd_max; -+ } else { -+ EIP_ERR_PRINTF("parameter error : %d <= tmd_max(%d) <= %d using default %d\n", -+ EIP_TMD_MIN_NR, tmd_max, EIP_TMD_MAX_NR, EIP_TMD_MAX_NR); -+ eip_tx->tmd_max_nr = EIP_TMD_MAX_NR; -+ } -+ -+ if ((rmd_max >= EIP_RMD_MIN_NR) && (rmd_max <= EIP_RMD_MAX_NR)) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting rmd_max_nr to %d\n", rmd_max); -+ eip_rx->rmd_max_nr = rmd_max; -+ } else { -+ EIP_ERR_PRINTF("parameter error : %d <= rmd_max(%d) <= %d using default %d\n", EIP_RMD_MIN_NR, -+ rmd_max, EIP_RMD_MAX_NR, EIP_RMD_MAX_NR); -+ eip_rx->rmd_max_nr = EIP_RMD_MAX_NR; -+ } -+ -+ if ((rx_envelope_nr > 0) && (rx_envelope_nr <= 1024)) { /* > 1024 don't be silly */ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Setting rx_envelope_nr to %d\n", rx_envelope_nr); -+ } else { -+ EIP_ERR_PRINTF("parameter error : 0 < rx_envelope_nr(%d) <= 1024 using default %d\n", -+ rx_envelope_nr, EIP_RX_ENVELOPE_NR); -+ rx_envelope_nr = EIP_RX_ENVELOPE_NR; -+ } -+ -+ if (tx_copybreak_max <= EIP_TX_COPYBREAK_MAX) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting tx_copybreak_max to %d\n", tx_copybreak_max); -+ } else { -+ EIP_ERR_PRINTF("parameter error : tx_copybreak_max > %d using default %d\n", -+ EIP_TX_COPYBREAK_MAX, EIP_TX_COPYBREAK_MAX); -+ tx_copybreak_max = EIP_TX_COPYBREAK_MAX; -+ } -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak = 0; -+ eip_tx->sent_std = 0; -+ eip_tx->sent_aggreg = 0; -+#endif -+ -+ eip_tx->ipfrag_count = 0; -+ eip_aggregation_set(1); -+ eip_rx_granularity_set(rx_granularity); -+ eip_tx_copybreak_set(EIP_TX_COPYBREAK); -+ eip_ipfrag_to_set(EIP_IPFRAG_TO); -+ eip_ipfrag_copybreak_set(EIP_IPFRAG_COPYBREAK); -+ -+ spin_lock_init(&eip_tx->lock); -+ spin_lock_init(&eip_tx->ipfraglock); -+ spin_lock_init(&eip_rx->lock); -+ tasklet_init(&eip_rx->tasklet, eip_rx_tasklet, 0); -+ tasklet_init(&eip_tx->tasklet, eip_tx_tasklet, 0); -+ INIT_LIST_HEAD(&eip_tx->ipfrag); -+ INIT_LIST_HEAD(&eip_tx->inuse); -+ -+ /* if we fail here cannot do much yet; waiting for rcvr remove code in ep. */ -+ errno = eip_tmds_alloc(); -+ if (errno) -+ goto out; -+ -+ errno = eip_rmds_alloc(); -+ if (errno) -+ goto out; -+ -+ errno = eip_stats_init(); -+ if (errno) -+ goto out; -+ -+ if (ep_svc_indicator_set(eip_tx->ep_system, EP_SVC_EIP) != EP_SUCCESS) { -+ EIP_ERR_PRINTF("Cannot set the service indicator\n"); -+ errno = -EINVAL; -+ goto out; -+ } -+ -+ eip_rx_tasklet_locked = 0; -+ tasklet_schedule(&eip_rx->tasklet); -+ -+ SET_MODULE_OWNER(eip_tx->net_device); -+ -+ if (register_netdev(devnet)) { -+ printk("eip: failed to register netdev\n"); -+ goto out; -+ } -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x ready\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ -+ return 0; -+ out: -+ unregister_netdev(devnet); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 25) -+ kfree(devnet); -+#else -+ free_netdev(devnet); -+#endif -+ -+ return errno; -+} -+void eip_exit(void) -+{ -+ int i; -+ -+ eip_rx_dropping = 1; /* means that new messages wont be sent to tcp stack */ -+ eip_rx_tasklet_locked = 1; -+ -+ netif_stop_queue(eip_tx->net_device); -+ -+ if (ep_svc_indicator_clear(eip_tx->ep_system, EP_SVC_EIP) != EP_SUCCESS) { -+ EIP_ERR_PRINTF("Cannot unset the service indicator\n"); -+ } -+ -+ schedule_timeout(10); -+ -+ del_timer_sync (&eip_rx_tasklet_timer); -+ -+ tasklet_disable(&eip_rx->tasklet); -+ tasklet_disable(&eip_tx->tasklet); -+ -+ tasklet_kill(&eip_tx->tasklet); -+ tasklet_kill(&eip_rx->tasklet); -+ -+ eip_rmds_free(); -+ eip_tmds_free(); -+ -+ /* that things freed */ -+ for (i = 0 ; i < EIP_SVC_NR ; i++) { -+ if ( EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats) != 0 ) -+ EIP_ERR_PRINTF("%d RMDs not FREED on SVC[%d]\n", EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), i); -+ } -+ for (i = 0 ; i < 3 ; i++) { -+ if ( EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats) != 0 ) -+ EIP_ERR_PRINTF("%d TMDs not freed on TX HEAD[%d]\n", EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), i); -+ -+ } -+ unregister_netdev(eip_tx->net_device); -+ kfree(eip_tx->net_device); -+ -+ eip_stats_cleanup(); -+} -+ -+module_init(eip_init); -+module_exit(eip_exit); -+ -+MODULE_PARM(eipdebug, "i"); -+MODULE_PARM_DESC(eipdebug, "Set debug flags"); -+ -+MODULE_PARM(rx_envelope_nr, "i"); -+MODULE_PARM_DESC(rx_enveloppe_nr, "Number of allocated enveloppe on the rx side"); -+ -+MODULE_PARM(tx_copybreak_max, "i"); -+MODULE_PARM_DESC(tx_copybreak_max, "Maximum size of the tx copybreak limit (default 512)"); -+ -+MODULE_PARM(tmd_max, "i"); -+MODULE_PARM(rmd_max, "i"); -+MODULE_PARM_DESC(tmd_max, "Maximun number of transmit buffers (default 64)"); -+MODULE_PARM_DESC(rmd_max, "Maximun number of receive buffers (default 64)"); -+ -+MODULE_PARM(tx_railmask, "i"); -+MODULE_PARM_DESC(tx_railmask, "Mask of which rails transmits can be queued on"); -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan IP driver"); -+MODULE_LICENSE("GPL"); -+#endif /* MODULE */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/eip/eip_linux.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/eip/eip_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/eip/eip_linux.h 2005-06-01 23:12:54.554445944 -0400 -@@ -0,0 +1,399 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: eip_linux.h,v 1.46.2.1 2004/10/01 10:49:38 mike Exp $" -+ -+#ifndef __EIP_LINUX_H -+#define __EIP_LINUX_H -+ -+#define EIP_WATERMARK (0xfab1e) -+ -+#define EIP_PAGES(s) (((s - 1) >> PAGE_SHIFT) + 1) -+#define EIP_DVMA_PAGES(s) ((s < PAGE_SIZE) ? EIP_PAGES(s) + 1 : EIP_PAGES(s)) -+ -+#define EIP_SVC_SMALLEST_LEN (1 << 9) /* 512 */ -+#define EIP_SVC_BIGGEST_LEN (1 << 16) /* 64k */ -+ -+#define EIP_SVC_SMALLEST (0) -+#define EIP_SVC_BIGGEST (7) -+ -+#define EIP_SVC_NR (8) -+#define EIP_SVC_EP(s) (s + EP_MSG_SVC_EIP512) -+ -+#define EIP_STAT_ALLOC_SHIFT (8) -+#define EIP_STAT_ALLOC_GET(atomicp) ((int) atomic_read(atomicp) >> EIP_STAT_ALLOC_SHIFT) -+#define EIP_STAT_ALLOC_ADD(atomicp, v) (atomic_add((v << EIP_STAT_ALLOC_SHIFT), atomicp)) -+#define EIP_STAT_ALLOC_SUB(atomicp, v) (atomic_sub((v << EIP_STAT_ALLOC_SHIFT), atomicp)) -+ -+#define EIP_STAT_QUEUED_MASK (0xff) -+#define EIP_STAT_QUEUED_GET(atomicp) ((int) atomic_read(atomicp) & EIP_STAT_QUEUED_MASK) -+ -+#define EIP_RMD_NR (8) -+#define EIP_RMD_MIN_NR (8) -+#define EIP_RMD_MAX_NR (64) /* should be < than (1 << EIP_STAT_ALLOC_SHIFT) */ -+ -+#define EIP_RMD_ALLOC_STEP (8) -+#define EIP_RMD_ALLOC_THRESH (16) -+ -+#define EIP_RMD_ALLOC (1) -+#define EIP_RMD_REPLACE (0) -+ -+#define EIP_TMD_NR (64) -+#define EIP_TMD_MIN_NR (16) -+#define EIP_TMD_MAX_NR (64) /* should be < than (1 << EIP_STAT_ALLOC_SHIFT) */ -+ -+#define EIP_TMD_TYPE_NR (3) -+#define EIP_TMD_COPYBREAK (0x0) -+#define EIP_TMD_STD (0x1) -+#define EIP_TMD_AGGREG (0x2) -+ -+#define EIP_TX_COPYBREAK (512) -+#define EIP_TX_COPYBREAK_MAX (1024) -+ -+#define EIP_IPFRAG_TO (50) /* time out before a frag is sent in msec */ -+#define EIP_IPFRAG_COPYBREAK (EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG) - EIP_HEADER_PAD) -+ -+#define EIP_RX_ENVELOPE_NR ((EIP_RMD_MAX_NR*EIP_SVC_NR)/2) -+#define EIP_RX_GRANULARITY (1) -+ -+#define EIP_IP_ALIGN(X) (((X) + (15)) & ~(15)) -+#define EIP_EXTRA roundup (sizeof(EIP_RMD), 256) -+#define EIP_RCV_DMA_LEN(s) (s - EIP_EXTRA - EIP_HEADER_PAD) -+#define EIP_MTU_MAX (EIP_RCV_DMA_LEN(EIP_SVC_BIGGEST_LEN) - (ETH_HLEN)) -+ -+#define SIZE_TO_SVC(s, svc) \ -+ do { \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 9))) {svc = 0;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 10))) {svc = 1;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 11))) {svc = 2;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 12))) {svc = 3;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 13))) {svc = 4;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 14))) {svc = 5;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 15))) {svc = 6;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 16))) {svc = 7;break;} \ -+ svc = -666; \ -+ EIP_ASSERT(1 == 0); \ -+ } while (0) -+ -+extern int eipdebug; -+#define EIP_ASSERT_ON -+/* #define NO_DEBUG */ -+ -+ -+/* ######################## */ -+#ifdef NO_DEBUG -+#define __EIP_DBG_PRINTF(fmt, args...) -+#define EIP_DBG_PRINTF(flag, fmt, args...) -+#else -+ -+#define EIP_DBG_RMD 0x1 -+#define EIP_DBG_TMD 0x2 -+#define EIP_DBG_RMD_HEAD 0x4 -+#define EIP_DBG_TMD_HEAD 0x8 -+#define EIP_DBG_EIPH 0x10 -+#define EIP_DBG_IPH 0x20 -+#define EIP_DBG_RMD_EP_DVMA 0x40 -+#define EIP_DBG_TMD_EP_DVMA 0x80 -+#define EIP_DBG_EP_DVMA (EIP_DBG_RMD_EP_DVMA|EIP_DBG_TMD_EP_DVMA) -+#define EIP_DBG_MEMALLOC 0x100 -+#define EIP_DBG_MEMFREE 0x200 -+#define EIP_DBG_RMD_QUEUE 0x400 -+#define EIP_DBG_TMD_QUEUE 0x800 -+#define EIP_DBG_GEN 0x1000 -+#define EIP_DBG_DEBUG 0x2000 -+ -+#define __EIP_DBG_PRINTF(fmt, args...) (qsnet_debugf (QSNET_DEBUG_BUFFER, " CPU #%d %s: " fmt, smp_processor_id(), __func__, ## args)) -+#define EIP_DBG_PRINTF(flag, fmt, args...) (unlikely(eipdebug & flag) ? __EIP_DBG_PRINTF(fmt, ## args):(void)0) -+ -+#define __EIP_DBG_PRINTK(fmt, args...) (qsnet_debugf (QSNET_DEBUG_BUF_CON, " CPU #%d %s: " fmt, smp_processor_id(), __func__, ## args)) -+#define EIP_DBG_PRINTK(flag, fmt, args...) (unlikely(eipdebug & flag) ? __EIP_DBG_PRINTF(fmt, ## args):(void)0) -+ -+#define EIP_ERR_PRINTF(fmt, args...) __EIP_DBG_PRINTK("!!! ERROR !!! - " fmt, ## args) -+ -+ -+#define EIP_DBG2(flag, fn, fn_arg, fmt, args...) \ -+ if (unlikely(eipdebug & flag)) { \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "+CPU #%d %s: " fmt, smp_processor_id(), __func__, ##args); \ -+ (void)(fn)(fn_arg); \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "-CPU #%d %s: " fmt, smp_processor_id(), __func__, ##args); \ -+ } -+ -+ -+#define EIP_DBG(flag, fn, args...) \ -+ if (unlikely(eipdebug & flag)) { \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "+CPU #%d %s\n", smp_processor_id(), __func__); \ -+ (void)(fn)(args); \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "-CPU #%d %s :\n", smp_processor_id(), __func__); \ -+ } -+#endif /* NO_DEBUG */ -+ -+ -+#ifdef EIP_ASSERT_ON -+ -+#define __EIP_ASSERT_PRINT(exp) \ -+ eipdebug = 0xffff; \ -+ EIP_ERR_PRINTF("ASSERT : %s, %s::%d\n", \ -+ #exp, __BASE_FILE__, __LINE__); -+ -+#define EIP_ASSERT(exp) \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ netif_stop_queue(eip_tx->net_device); \ -+ } -+ -+#define EIP_ASSERT2(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_BUG(exp) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ BUG(); \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_GOTO(exp, label, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ goto label; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RET(exp, ret) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ return ret; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RETURN(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ return; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RETNULL(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ return NULL; \ -+ } \ -+ } while (0) -+ -+#else -+ -+#define EIP_ASSERT(exp) do {} while(0) -+#define EIP_ASSERT_OUT(exp) do {} while(0) -+#define EIP_ASSERT_RETURN(exp) do {} while(0) -+#define EIP_ASSERT_RETNULL(exp) do {} while(0) -+#define EIP_ASSERT_BUG(exp) do {} while(0) -+ -+#endif /* EIP_ASSERT */ -+ -+ -+ -+typedef struct { -+ u_short ip_bcast; -+ u_short ip_inst; -+ u_short ip_addr; -+} EIP_ADDRESS; -+ -+typedef struct { -+ EIP_ADDRESS h_dhost; -+ EIP_ADDRESS h_shost; -+ u_short h_sap; -+} EIP_HEADER; -+#define EIP_HEADER_PAD (2) -+ -+typedef struct eip_proc_fs { -+ const char *name; -+ struct proc_dir_entry **parent; -+ read_proc_t *read; -+ write_proc_t *write; -+ unsigned char allocated; -+ struct proc_dir_entry *entry; -+} EIP_PROC_FS; -+ -+#define EIP_PROC_ROOT_DIR "eip" -+ -+#define EIP_PROC_DEBUG_DIR "debug" -+#define EIP_PROC_DEBUG_RX_FLUSH "rx_flush" -+#define EIP_PROC_DEBUG_TX_FLUSH "tx_flush" -+ -+#define EIP_PROC_AGGREG_DIR "aggregation" -+#define EIP_PROC_AGGREG_ONOFF "enable" -+#define EIP_PROC_AGGREG_TO "timeout" -+#define EIP_PROC_AGGREG_COPYBREAK "copybreak" -+ -+#define EIP_PROC_TX_COPYBREAK "tx_copybreak" -+#define EIP_PROC_STATS "stats" -+#define EIP_PROC_RX_GRAN "rx_granularity" -+#define EIP_PROC_TX_RAILMASK "tx_railmask" -+#define EIP_PROC_TMD_INUSE "tmd_inuse" -+#define EIP_PROC_EIPDEBUG "eipdebug" -+#define EIP_PROC_CHECKSUM "checksum" -+ -+/* RX */ -+/* dma_len is used to keep the len of a received packet */ -+/* nmd.nmd_len is the max dma that can be received */ -+/* */ -+struct eip_rmd { -+ struct sk_buff *skb; -+ -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ EP_RXD *rxd; -+ struct eip_rmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_rmd *next; /* all other lists */ -+ } chain; -+}; -+typedef struct eip_rmd EIP_RMD; -+struct eip_rmd_head { -+ EP_NMH *handle; -+ -+ EP_RCVR *rcvr; -+ EIP_RMD *busy_list; -+ -+ /* stats */ -+ atomic_t stats; -+ unsigned long dma; -+}; -+ -+typedef struct eip_rmd_head EIP_RMD_HEAD; -+typedef struct eip_rx { -+ struct eip_rmd_head head[EIP_SVC_NR]; -+ -+ EIP_RMD *irq_list; -+ short irq_list_nr; -+ -+ /* stats */ -+ unsigned long packets; -+ unsigned long bytes; -+ unsigned long errors; -+ unsigned long dropped; -+ unsigned long reschedule; -+ -+ spinlock_t lock; -+ struct tasklet_struct tasklet; -+ unsigned char rmd_max_nr; -+ unsigned char sysctl_granularity; -+ struct net_device *net_device; -+} EIP_RX; -+ -+/* TX */ -+/* dma_len_max is the maximum len for a given DMA */ -+/* where mnd.nmd_len is the len of the packet to send ~> than skb->len */ -+typedef struct eip_ipfrag_handle { -+ /* common with tmd */ -+ unsigned long dma_base; -+ int dma_len; -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ struct sk_buff *skb; -+ struct eip_tmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_tmd *next; /* all other lists */ -+ } chain; -+ -+ /* private */ -+ struct list_head list; -+ struct timeval timestamp; -+ unsigned int frag_nr; -+ int datagram_len; /* Ip data */ -+ int dma_correction; -+ EP_PAYLOAD payload; -+} EIP_IPFRAG; -+ -+struct eip_tmd { -+ unsigned long dma_base; -+ int dma_len; -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ struct sk_buff *skb; -+ struct eip_tmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_tmd *next; /* all other lists */ -+ } chain; -+}; -+ -+struct eip_tmd_head { -+ EP_NMH *handle; -+ -+ struct eip_tmd *tmd; -+ atomic_t stats; -+}; -+ -+typedef struct eip_tmd EIP_TMD; -+typedef struct eip_tmd_head EIP_TMD_HEAD; -+ -+/* #define EIP_MORE_STATS */ -+ -+typedef struct eip_tx { -+ struct net_device *net_device; -+ EP_XMTR *xmtr; -+ EP_SYS *ep_system; -+ -+ struct eip_tmd_head head[EIP_TMD_TYPE_NR]; -+ struct list_head inuse; -+ atomic_t destructor; -+ -+ /* stats */ -+ unsigned long packets; -+ unsigned long bytes; -+ unsigned long errors; -+ unsigned long dropped; -+ unsigned long dma[EIP_SVC_NR]; -+ -+#ifdef EIP_MORE_STATS -+ unsigned long sent_copybreak; -+ unsigned long sent_std; -+ unsigned long sent_aggreg; -+#endif -+ -+ unsigned char tmd_max_nr; -+ -+ unsigned short sysctl_copybreak; -+ unsigned short sysctl_ipfrag_to; -+ unsigned short sysctl_ipfrag_copybreak; -+ unsigned short sysctl_aggregation; -+ -+ unsigned short ipfrag_count; -+ struct list_head ipfrag; -+ spinlock_t ipfraglock; -+ -+ spinlock_t lock; -+ struct tasklet_struct tasklet; -+} EIP_TX; -+ -+/* =============================================== */ -+ /* unsigned long multicast; */ -+#endif /* __EIP_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/eip/eip_stats.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/eip/eip_stats.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/eip/eip_stats.c 2005-06-01 23:12:54.555445792 -0400 -@@ -0,0 +1,374 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * $Id: eip_stats.c,v 1.34.2.1 2005/01/26 14:31:56 mike Exp $ -+ * $Source: /cvs/master/quadrics/eipmod/eip_stats.c,v $ -+ */ -+ -+#include -+#include -+ -+#include -+ -+#include -+ -+#include -+#include -+ -+#include -+ -+#include -+ -+#include "eip_linux.h" -+#include "eip_stats.h" -+ -+extern EIP_RX *eip_rx; -+extern EIP_TX *eip_tx; -+extern int tx_copybreak_max; -+extern EP_RAILMASK tx_railmask; -+extern int eip_checksum_state; -+extern void eip_stop_queue(void); -+extern void eip_start_queue(void); -+ -+static int eip_stats_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int i, outlen = 0; -+ -+ *buf = '\0'; -+ strcat(buf, "\n"); -+ strcat(buf, "--------------------------------------------+------------+-----------------+\n"); -+ strcat(buf, " SKB/DMA | | Rx | Tx | TMD TYPE |\n"); -+ strcat(buf, "--------------------------------------------+------------|-----------------+\n"); -+ -+ i = 0; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #1[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #2[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #3[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld +-----------------+\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ strcat(buf, "--------------------------------------------+------------+\n"); -+ sprintf(buf + strlen(buf), " RMD IRQ %4.4d %10lu | %10lu |\n", -+ eip_rx->irq_list_nr, -+ eip_rx->packets, eip_tx->packets); -+ strcat(buf, "--------------------------------------------+------------+\n"); -+ -+#ifdef EIP_MORE_STATS -+ strcat(buf, "\n"); -+ sprintf(buf + strlen(buf), " Copybreak %10ld Std %10ld Aggreg %10ld\n", -+ eip_tx->sent_copybreak, eip_tx->sent_std, eip_tx->sent_aggreg); -+#endif -+ -+ -+ strcat(buf, "\n"); -+ sprintf(buf + strlen(buf), "Rx bytes: %lu (%lu Mb) errors: %lu dropped: %lu reschedule: %lu\n", -+ eip_rx->bytes, eip_rx->bytes / (1024 * 1024), eip_rx->errors, eip_rx->dropped, eip_rx->reschedule); -+ sprintf(buf + strlen(buf), "Tx bytes: %lu (%lu Mb) errors: %lu dropped: %lu\n", -+ eip_tx->bytes, eip_tx->bytes / (1024 * 1024), eip_tx->errors, eip_tx->dropped); -+ strcat(buf, "\n"); -+ -+ outlen = strlen(buf); -+ ASSERT(outlen < PAGE_SIZE); -+ *eof = 1; -+ return outlen; -+} -+ -+void eip_stats_dump(void) -+{ -+ int eof; -+ -+ char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ -+ if (buf == NULL) -+ { -+ printk("no memory to produce eip_stats\n"); -+ return; -+ } -+ -+ eip_stats_read(buf, NULL, 0, 0, &eof, NULL); -+ -+ printk(buf); -+ -+ kfree(buf); -+} -+ -+static int eip_stats_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ eip_rx->packets = 0; -+ eip_rx->bytes = 0; -+ eip_rx->errors = 0; -+ eip_rx->dropped = 0; -+ eip_rx->reschedule = 0; -+ for (i = 0; i < EIP_SVC_NR; eip_rx->head[i].dma = 0, i++); -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ eip_tx->packets = 0; -+ eip_tx->bytes = 0; -+ eip_tx->errors = 0; -+ eip_tx->dropped = 0; -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak = 0; -+ eip_tx->sent_std = 0; -+ eip_tx->sent_aggreg = 0; -+#endif -+ for (i = 0; i < EIP_SVC_NR; eip_tx->dma[i] = 0, i++); -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ -+ return count; -+} -+ -+#define eip_stats_var_write(name) \ -+static int eip_stats_##name##_write(struct file *file, const char *buf, unsigned long count, void *data) \ -+{ \ -+ char * b = (char *) buf; \ -+ *(b + count) = '\0'; \ -+ eip_##name##_set((int) simple_strtoul(b, NULL, 10)); \ -+ return count; \ -+} -+ -+#define eip_stats_var_read(name, var) \ -+static int eip_stats_##name##_read(char *buf, char **start, off_t off, int count, int *eof, void *data) \ -+{ \ -+ sprintf(buf, "%d\n", var); \ -+ *eof = 1; \ -+ return strlen(buf); \ -+} -+ -+ -+#define eip_stats_var_set(name, min, max, default, var) \ -+void eip_##name##_set(int i) \ -+{ \ -+ if ( (i >= min) && (i <= max)) { \ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Setting " #name " to %d\n", i); \ -+ var =(unsigned short) i; \ -+ } \ -+ else { \ -+ EIP_ERR_PRINTF("parameter error : %d <= " #name "(%d) <= %d using default %d\n", min, i, (int) max, (int) default); \ -+ } \ -+} -+ -+eip_stats_var_set(tx_copybreak, 0, tx_copybreak_max, EIP_TX_COPYBREAK, eip_tx->sysctl_copybreak); -+eip_stats_var_set(rx_granularity, 1, EIP_RMD_MIN_NR, EIP_RX_GRANULARITY, eip_rx->sysctl_granularity); -+eip_stats_var_set(tx_railmask, 0, EP_RAILMASK_ALL, EP_RAILMASK_ALL, tx_railmask); -+eip_stats_var_set(ipfrag_to, 0, (1 << 16), EIP_IPFRAG_TO, eip_tx->sysctl_ipfrag_to); -+eip_stats_var_set(aggregation, 0, 1, 1, eip_tx->sysctl_aggregation); -+eip_stats_var_set(ipfrag_copybreak, 0, EIP_IPFRAG_COPYBREAK, EIP_IPFRAG_COPYBREAK, eip_tx->sysctl_ipfrag_copybreak); -+/* eip_stats_var_set(eipdebug, 0, , 0, eipdebug); */ -+ -+eip_stats_var_read(aggregation, eip_tx->sysctl_aggregation); -+eip_stats_var_read(ipfrag_count, eip_tx->ipfrag_count); -+eip_stats_var_read(ipfrag_to, eip_tx->sysctl_ipfrag_to); -+eip_stats_var_read(ipfrag_copybreak, eip_tx->sysctl_ipfrag_copybreak); -+eip_stats_var_read(tx_copybreak, eip_tx->sysctl_copybreak); -+eip_stats_var_read(rx_granularity, eip_rx->sysctl_granularity); -+eip_stats_var_read(tx_railmask, tx_railmask); -+ -+eip_stats_var_write(aggregation); -+eip_stats_var_write(ipfrag_to); -+eip_stats_var_write(ipfrag_copybreak); -+eip_stats_var_write(tx_copybreak); -+eip_stats_var_write(rx_granularity); -+eip_stats_var_write(tx_railmask); -+ -+ -+static int eip_checksum_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char * b = (char *) buf; -+ int value; -+ -+ *(b + count) = '\0'; -+ -+ value = (int) simple_strtoul(b, NULL, 10); -+ if ((value >= CHECKSUM_NONE) && (value <= CHECKSUM_UNNECESSARY)) -+ eip_checksum_state = value; -+ else -+ EIP_ERR_PRINTF("%d <= checksum(%d) <= %d using old value %d\n", CHECKSUM_NONE, value, CHECKSUM_UNNECESSARY, eip_checksum_state); -+ -+ return count; -+} -+ -+static int eip_checksum_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ switch ( eip_checksum_state ) -+ { -+ case 0 : sprintf(buf, "0 CHECKSUM_NONE\n"); break; -+ case 1 : sprintf(buf, "1 CHECKSUM_HW\n"); break; -+ case 2 : sprintf(buf, "2 CHECKSUM_UNNECESSARY\n"); break; -+ default : sprintf(buf, "%d INVALID VALUE\n", eip_checksum_state); break; -+ } -+ *eof = 1; -+ return strlen(buf); -+} -+ -+static int eip_stats_eipdebug_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ *buf = '\0'; -+ sprintf(buf + strlen(buf), "0x%x\n", eipdebug); -+ *eof = 1; -+ return strlen(buf); -+} -+static int eip_stats_eipdebug_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char * p = (char *) buf; -+ *(p + count - 1) = '\0'; -+ eipdebug = simple_strtoul(p, NULL, 0); -+ __EIP_DBG_PRINTK("Setting eipdebug to 0x%x\n", eipdebug); -+ return count; -+} -+ -+static int eip_stats_tmd_inuse_read(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ struct list_head *lp; -+ unsigned long flags; -+ unsigned int len = 0; -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ list_for_each (lp, &eip_tx->inuse) { -+ EIP_TMD *tmd = list_entry (lp, EIP_TMD, chain.link); -+ EIP_HEADER *eiph = (EIP_HEADER *) tmd->dma_base; -+ -+ len += sprintf(page+len, "tmd=%p id=%d len=%d\n", -+ tmd, eiph ? ntohs(eiph->h_dhost.ip_addr) : -1, -+ tmd->dma_len); -+ -+ if (len + 40 >= count) -+ break; -+ } -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ -+ return qsnet_proc_calc_metrics (page, start, off, count, eof, len); -+} -+ -+static int eip_stats_debug_rx_flush(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Flushing rx ...\n"); -+ tasklet_schedule(&eip_rx->tasklet); -+ return count; -+} -+static int eip_stats_debug_tx_flush(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Flushing tx ... %d tmds reclaimed\n", ep_enable_txcallbacks(eip_tx->xmtr)); -+ ep_disable_txcallbacks(eip_tx->xmtr); -+ tasklet_schedule(&eip_tx->tasklet); -+ return count; -+} -+ -+#define EIP_PROC_PARENT_NR (3) -+/* NOTE : the parents should be declared b4 the children */ -+static EIP_PROC_FS eip_procs[] = { -+ /* {name, parent, read fn, write fn, allocated, entry}, */ -+ {EIP_PROC_ROOT_DIR, &qsnet_procfs_root, NULL, NULL, 0, NULL}, -+ {EIP_PROC_DEBUG_DIR, &eip_procs[0].entry, NULL, NULL, 0, NULL}, -+ {EIP_PROC_AGGREG_DIR, &eip_procs[0].entry, NULL, NULL, 0, NULL}, /* end of parents */ -+ {EIP_PROC_STATS, &eip_procs[0].entry, eip_stats_read, eip_stats_write, 0, NULL}, -+ {EIP_PROC_TX_COPYBREAK, &eip_procs[0].entry, eip_stats_tx_copybreak_read, eip_stats_tx_copybreak_write, 0, NULL}, -+ {EIP_PROC_RX_GRAN, &eip_procs[0].entry, eip_stats_rx_granularity_read, eip_stats_rx_granularity_write, 0, NULL}, -+ {EIP_PROC_TX_RAILMASK, &eip_procs[0].entry, eip_stats_tx_railmask_read, eip_stats_tx_railmask_write, 0, NULL}, -+ {EIP_PROC_TMD_INUSE, &eip_procs[0].entry, eip_stats_tmd_inuse_read, NULL, 0, NULL}, -+ {EIP_PROC_EIPDEBUG, &eip_procs[0].entry, eip_stats_eipdebug_read, eip_stats_eipdebug_write, 0, NULL}, -+ {EIP_PROC_CHECKSUM, &eip_procs[0].entry, eip_checksum_read, eip_checksum_write, 0, NULL}, -+ {EIP_PROC_DEBUG_RX_FLUSH, &eip_procs[1].entry, NULL, eip_stats_debug_rx_flush, 0, NULL}, -+ {EIP_PROC_DEBUG_TX_FLUSH, &eip_procs[1].entry, NULL, eip_stats_debug_tx_flush, 0, NULL}, -+ {"ipfrag_count", &eip_procs[2].entry, eip_stats_ipfrag_count_read, NULL, 0, NULL}, -+ {EIP_PROC_AGGREG_TO, &eip_procs[2].entry, eip_stats_ipfrag_to_read, eip_stats_ipfrag_to_write, 0, NULL}, -+ {EIP_PROC_AGGREG_ONOFF, &eip_procs[2].entry, eip_stats_aggregation_read, eip_stats_aggregation_write, 0, NULL}, -+ {EIP_PROC_AGGREG_COPYBREAK, &eip_procs[2].entry, eip_stats_ipfrag_copybreak_read, eip_stats_ipfrag_copybreak_write, 0, NULL}, -+ {NULL, NULL, NULL, NULL, 1, NULL}, -+}; -+ -+int eip_stats_init(void) -+{ -+ int p; -+ -+ for (p = 0; !eip_procs[p].allocated; p++) { -+ if (p < EIP_PROC_PARENT_NR) -+ eip_procs[p].entry = proc_mkdir(eip_procs[p].name, *eip_procs[p].parent); -+ else -+ eip_procs[p].entry = create_proc_entry(eip_procs[p].name, 0, *eip_procs[p].parent); -+ -+ if (!eip_procs[p].entry) { -+ EIP_ERR_PRINTF("%s\n", "Cannot allocate proc entry"); -+ eip_stats_cleanup(); -+ return -ENOMEM; -+ } -+ -+ eip_procs[p].entry->owner = THIS_MODULE; -+ eip_procs[p].entry->write_proc = eip_procs[p].write; -+ eip_procs[p].entry->read_proc = eip_procs[p].read; -+ eip_procs[p].allocated = 1; -+ } -+ eip_procs[p].allocated = 0; -+ return 0; -+} -+ -+void eip_stats_cleanup(void) -+{ -+ int p; -+ for (p = 0; eip_procs[p].allocated; p++) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Removing %s from proc\n", eip_procs[p].name); -+ remove_proc_entry(eip_procs[p].name, *eip_procs[p].parent); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/eip/eip_stats.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/eip/eip_stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/eip/eip_stats.h 2005-06-01 23:12:54.555445792 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: eip_stats.h,v 1.14 2004/05/10 14:47:47 daniel Exp $" -+ -+#ifndef __EIP_STATS_H -+#define __EIP_STATS_H -+ -+int eip_stats_init(void); -+void eip_stats_cleanup(void); -+void eip_rx_granularity_set(int); -+void eip_tx_copybreak_set(int); -+void eip_ipfrag_to_set(int); -+void eip_aggregation_set(int); -+void eip_ipfrag_copybreak_set(int); -+void eip_stats_dump(void); -+ -+#endif /* __EIP_STATS_H */ -Index: linux-2.4.21/drivers/net/qsnet/eip/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/eip/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/eip/Makefile 2005-06-01 23:12:54.555445792 -0400 -@@ -0,0 +1,31 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/eip/Makefile -+# -+ -+ -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/eip/Makefile -+# -+ -+list-multi := eip.o -+eip-objs := eip_linux.o eip_stats.o -+export-objs := -+obj-$(CONFIG_EIP) := eip.o -+ -+eip.o : $(eip-objs) -+ $(LD) -r -o $@ $(eip-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/eip/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/eip/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/eip/Makefile.conf 2005-06-01 23:12:54.555445792 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = eip.o -+MODULENAME = eip -+KOBJFILES = eip_linux.o eip_stats.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_EIP -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/eip/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/eip/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/eip/quadrics_version.h 2005-06-01 23:12:54.556445640 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/elan/bitmap.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/bitmap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/bitmap.c 2005-06-01 23:12:54.556445640 -0400 -@@ -0,0 +1,287 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: bitmap.c,v 1.5 2004/01/20 17:32:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/shared/bitmap.c,v $*/ -+ -+#if defined(__KERNEL__) -+#include -+#endif -+#include -+#include -+ -+/* -+ * Return the index of the first available bit in the -+ * bitmap , or -1 for failure -+ */ -+int -+bt_freebit (bitmap_t *bitmap, int nbits) -+{ -+ int last = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for a word with a bit off */ -+ for (i = 0; i <= last; i++) -+ if (bitmap[i] != ~((bitmap_t) 0)) -+ break; -+ -+ if (i <= last) -+ { -+ /* found an word with a bit off, now see which bit it is */ -+ maxbit = (i == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if ((bitmap[i] & (1 << j)) == 0) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ return (-1); -+ -+} -+ -+/* -+ * bt_lowbit: -+ * Return the index of the lowest set bit in the -+ * bitmap, or -1 for failure. -+ */ -+int -+bt_lowbit (bitmap_t *bitmap, int nbits) -+{ -+ int last = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for a word with a bit on */ -+ for (i = 0; i <= last; i++) -+ if (bitmap[i] != 0) -+ break; -+ if (i <= last) -+ { -+ /* found a word bit a bit on, now see which bit it is */ -+ maxbit = (i == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if (bitmap[i] & (1 << j)) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ -+ return (-1); -+} -+ -+/* -+ * Return the index of the first available bit in the -+ * bitmap , or -1 for failure -+ */ -+int -+bt_nextbit (bitmap_t *bitmap, int nbits, int last, int isset) -+{ -+ int first = ((last+1) + BT_NBIPUL-1) >> BT_ULSHIFT; -+ int end = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for bits before the first whole word */ -+ if (((last+1) & BT_ULMASK) != 0) -+ { -+ maxbit = ((first-1) == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = ((last+1) & BT_ULMASK); j <= maxbit; j++) -+ if ((bitmap[first-1] & (1 << j)) == (isset << j)) -+ return (((first-1) << BT_ULSHIFT) | j); -+ } -+ -+ /* look for a word with a bit off */ -+ for (i = first; i <= end; i++) -+ if (bitmap[i] != (isset ? 0 : ~((bitmap_t) 0))) -+ break; -+ -+ if (i <= end) -+ { -+ /* found an word with a bit off, now see which bit it is */ -+ maxbit = (i == end) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if ((bitmap[i] & (1 << j)) == (isset << j)) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ return (-1); -+} -+ -+void -+bt_copy (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ b[i] = a[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST(a, i)) -+ BT_SET(b,i); -+ else -+ BT_CLEAR(b,i); -+} -+ -+void -+bt_zero (bitmap_t *bitmap, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ bitmap[i] = 0; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ BT_CLEAR(bitmap,i); -+} -+ -+void -+bt_fill (bitmap_t *bitmap, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ bitmap[i] = ~((bitmap_t) 0); -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ BT_SET(bitmap,i); -+} -+ -+int -+bt_cmp (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ if (a[i] != b[i]) -+ return (1); -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (a, i) != BT_TEST(b, i)) -+ return (1); -+ return (0); -+} -+ -+void -+bt_intersect (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] &= b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (a, i) && BT_TEST (b, i)) -+ BT_SET (a, i); -+ else -+ BT_CLEAR (a, i); -+} -+ -+void -+bt_remove (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] &= ~b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (b, i)) -+ BT_CLEAR (a, i); -+} -+ -+void -+bt_add (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] |= b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST(b, i)) -+ BT_SET (a, i); -+} -+ -+/* -+ * bt_spans : partition a spans partition b -+ * == all bits set in 'b' are set in 'a' -+ */ -+int -+bt_spans (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ if (BT_TEST (b, i) && !BT_TEST (a, i)) -+ return (0); -+ return (1); -+} -+ -+/* -+ * bt_subset: copy [base,base+nbits-1] from 'a' to 'b' -+ */ -+void -+bt_subset (bitmap_t *a, bitmap_t *b, int base, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (BT_TEST (a, base+i)) -+ BT_SET(b,i); -+ else -+ BT_CLEAR (b,i); -+ } -+} -+ -+void -+bt_up (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (!BT_TEST (a, i) && BT_TEST (b, i)) -+ { -+ BT_SET (c, i); -+ } -+ else -+ { -+ BT_CLEAR (c, i); -+ } -+ } -+} -+ -+void -+bt_down (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (BT_TEST (a, i) && !BT_TEST (b, i)) -+ { -+ BT_SET (c, i); -+ } -+ else -+ { -+ BT_CLEAR (c, i); -+ } -+ } -+} -+ -+int -+bt_nbits (bitmap_t *a, int nbits) -+{ -+ int i, c; -+ for (i = 0, c = 0; i < nbits; i++) -+ if (BT_TEST (a, i)) -+ c++; -+ return (c); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan/capability.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/capability.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/capability.c 2005-06-01 23:12:54.557445488 -0400 -@@ -0,0 +1,628 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability.c,v 1.13 2004/07/20 10:15:33 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/capability.c,v $ */ -+ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_cap_list); -+ -+typedef struct elan_vp_struct -+{ -+ struct list_head list; -+ ELAN_CAPABILITY vp; -+} ELAN_VP_NODE_STRUCT; -+ -+ -+typedef struct elan_attached_struct -+{ -+ void *cb_args; -+ ELAN_DESTROY_CB cb_func; -+} ELAN_ATTACHED_STRUCT; -+ -+typedef struct elan_cap_node_struct -+{ -+ struct list_head list; -+ ELAN_CAP_STRUCT node; -+ ELAN_ATTACHED_STRUCT *attached[ELAN_MAX_RAILS]; -+ struct list_head vp_list; -+} ELAN_CAP_NODE_STRUCT; -+ -+ -+ELAN_CAP_NODE_STRUCT * -+find_cap_node(ELAN_CAPABILITY *cap) -+{ -+ struct list_head *tmp; -+ ELAN_CAP_NODE_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ /* is it an exact match */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) -+ && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap)) { -+ return ptr; -+ } -+ } -+ return ptr; -+}; -+ -+ELAN_VP_NODE_STRUCT * -+find_vp_node( ELAN_CAP_NODE_STRUCT *cap_node,ELAN_CAPABILITY *map) -+{ -+ struct list_head * tmp; -+ ELAN_VP_NODE_STRUCT * ptr = NULL; -+ -+ list_for_each(tmp, &cap_node->vp_list) { -+ ptr = list_entry(tmp, ELAN_VP_NODE_STRUCT , list); -+ /* is it an exact match */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->vp,map) -+ && ELAN_CAP_GEOM_MATCH(&ptr->vp,map)){ -+ return ptr; -+ } -+ } -+ return ptr; -+} -+ -+int -+elan_validate_cap(ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* check versions */ -+ if (cap->cap_version != ELAN_CAP_VERSION_NUMBER) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->Version != ELAN_CAP_VERSION) %d %d\n", cap->cap_version, ELAN_CAP_VERSION_NUMBER); -+ return (EINVAL); -+ } -+ -+ /* check its not HWTEST */ -+ if ( cap->cap_type & ELAN_CAP_TYPE_HWTEST ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_cap: failed type = ELAN_CAP_TYPE_HWTEST \n"); -+ return (EINVAL); -+ } -+ -+ /* check its type */ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_KERNEL : -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_cap: failed type = ELAN_CAP_TYPE_KERNEL \n"); -+ return (EINVAL); -+ -+ /* check it has a valid type */ -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ break; -+ -+ /* all others are failed as well */ -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap: failed unknown type = %x \n", (cap->cap_type & ELAN_CAP_TYPE_MASK)); -+ return (EINVAL); -+ } -+ -+ if ((cap->cap_lowcontext == ELAN_CAP_UNINITIALISED) || (cap->cap_highcontext == ELAN_CAP_UNINITIALISED) -+ || (cap->cap_lownode == ELAN_CAP_UNINITIALISED) || (cap->cap_highnode == ELAN_CAP_UNINITIALISED)) -+ { -+ -+ ELAN_DEBUG4 (ELAN_DBG_VP,"elan_validate_cap: ELAN_CAP_UNINITIALISED LowNode %d HighNode %d LowContext %d highContext %d\n", -+ cap->cap_lownode , cap->cap_highnode, -+ cap->cap_lowcontext , cap->cap_highcontext); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_lowcontext > cap->cap_highcontext) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->cap_lowcontext > cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_lownode > cap->cap_highnode) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->cap_lownode > cap->cap_highnode) %d %d\n",cap->cap_lownode, cap->cap_highnode); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_mycontext != ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap: failed cap->cap_mycontext is set %d \n", cap->cap_mycontext); -+ return (EINVAL); -+ } -+ -+ -+ if ((ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap)) > ELAN_MAX_VPS) -+ { -+ ELAN_DEBUG6 (ELAN_DBG_VP,"elan_validate_cap: too many vps LowNode %d HighNode %d LowContext %d highContext %d, %d >% d\n", -+ cap->cap_lownode , cap->cap_highnode, -+ cap->cap_lowcontext , cap->cap_highcontext, -+ (ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap)), -+ ELAN_MAX_VPS); -+ -+ return (EINVAL); -+ } -+ -+ return (ESUCCESS); -+} -+ -+int -+elan_validate_map(ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vptr = NULL; -+ char space[256]; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map \n"); -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_map cap = %s \n",elan_capability_string(cap,space)); -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_map map = %s \n",elan_capability_string(map,space)); -+ -+ /* does cap exist */ -+ ptr = find_cap_node(cap); -+ if ( ptr == NULL ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap not found \n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ /* is it active */ -+ if ( ! ptr->node.active ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap not active \n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* are they the same */ -+ if ( ELAN_CAP_TYPE_MATCH(cap,map) -+ && ELAN_CAP_GEOM_MATCH(cap,map)) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap == map passed\n"); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ -+ /* is map in map list */ -+ vptr = find_vp_node(ptr, map); -+ if ( vptr == NULL ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: map not found\n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: map passed\n"); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_create_cap(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ int i, rail; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_create_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* need to check that the cap does not over lap another one -+ or is an exact match with only the userkey changing */ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) -+ && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap) -+ && (&ptr->node.owner == owner)) { -+ if ( ptr->node.active ) { -+ /* dont inc attached count as its like a create */ -+ ptr->node.cap.cap_userkey = cap->cap_userkey; -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ else -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ } -+ -+ /* does it overlap, even with ones being destroyed */ -+ if (elan_cap_overlap(&ptr->node.cap,cap)) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EACCES; -+ } -+ } -+ -+ /* create it */ -+ KMEM_ALLOC(ptr, ELAN_CAP_NODE_STRUCT *, sizeof(ELAN_CAP_NODE_STRUCT), 1); -+ if (ptr == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ /* create space for the attached array */ -+ for(rail=0;railattached[rail]=NULL; -+ if ( ELAN_CAP_IS_RAIL_SET(cap,rail) ) -+ { -+ KMEM_ALLOC(ptr->attached[rail], ELAN_ATTACHED_STRUCT *, sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(cap), 1); -+ if (ptr->attached[rail] == NULL) -+ { -+ for(;rail>=0;rail--) -+ if ( ptr->attached[rail] ) -+ KMEM_FREE(ptr->attached[rail], sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(cap)); -+ -+ KMEM_FREE(ptr, sizeof(ELAN_CAP_NODE_STRUCT)); -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ /* blank the attached array */ -+ for(i=0;iattached[rail][i].cb_func = NULL; -+ } -+ } -+ -+ ptr->node.owner = owner; -+ ptr->node.cap = *cap; -+ ptr->node.attached = 1; /* creator counts as attached */ -+ ptr->node.active = 1; -+ ptr->vp_list.next = &(ptr->vp_list); -+ ptr->vp_list.prev = &(ptr->vp_list); -+ -+ list_add_tail(&ptr->list, &elan_cap_list); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+void -+elan_destroy_cap_test(ELAN_CAP_NODE_STRUCT *cap_ptr) -+{ -+ /* called by someone holding the mutex */ -+ struct list_head * vp_tmp; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ int rail; -+ -+ /* check to see if it can be deleted now */ -+ if ( cap_ptr->node.attached == 0 ) { -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_destroy_cap_test: attached == 0\n"); -+ -+ /* delete the vp list */ -+ list_for_each(vp_tmp, &(cap_ptr->vp_list)) { -+ vp_ptr = list_entry(vp_tmp, ELAN_VP_NODE_STRUCT , list); -+ list_del(&vp_ptr->list); -+ KMEM_FREE( vp_ptr, sizeof(ELAN_VP_NODE_STRUCT)); -+ } -+ -+ list_del(&cap_ptr->list); -+ -+ /* delete space for the attached array */ -+ for(rail=0;railattached[rail]) -+ KMEM_FREE(cap_ptr->attached[rail], sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(&(cap_ptr->node.cap))); -+ -+ KMEM_FREE(cap_ptr, sizeof(ELAN_CAP_NODE_STRUCT)); -+ } -+} -+ -+int -+elan_destroy_cap(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ struct list_head * el; -+ struct list_head * nel; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ int i, rail; -+ int found = 0; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1 (ELAN_DBG_CAP,"elan_destroy_cap %s\n",elan_capability_string(cap,space)); -+ -+ list_for_each_safe (el, nel, &elan_cap_list) { -+ ptr = list_entry(el, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if ( (ptr->node.owner == owner ) -+ && ( (cap == NULL) -+ || (ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap)))) { -+ -+ if ( ptr->node.active ) { -+ -+ /* mark as in active and dec attached count */ -+ ptr->node.active = 0; -+ ptr->node.attached--; -+ ptr->node.owner = 0; /* no one own's it now */ -+ -+ /* need to tell any one who was attached that this has been destroy'd */ -+ for(rail=0;railnode.cap), rail)) { -+ for(i=0;i< ELAN_CAP_NUM_CONTEXTS(&(ptr->node.cap));i++) -+ if ( ptr->attached[rail][i].cb_func != NULL) -+ ptr->attached[rail][i].cb_func(ptr->attached[rail][i].cb_args, cap, NULL); -+ } -+ -+ /* now try to destroy it */ -+ elan_destroy_cap_test(ptr); -+ -+ /* found it */ -+ found = 1; -+ } -+ } -+ } -+ -+ if ( found ) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ -+ /* failed */ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_destroy_cap: didnt find it \n"); -+ -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_get_caps(uint *number_of_results, uint array_size, ELAN_CAP_STRUCT *caps) -+{ -+ uint results = 0; -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_get_caps\n"); -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ copyout(&ptr->node, &caps[results], sizeof (ELAN_CAP_STRUCT)); -+ -+ results++; -+ -+ if ( results >= array_size ) -+ { -+ copyout(&results, number_of_results, sizeof(uint)); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ copyout(&results, number_of_results, sizeof(uint)); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_create_vp(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * cap_ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_create_vp\n"); -+ -+ /* the railmasks must match */ -+ if ( cap->cap_railmask != map->cap_railmask) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* does the cap exist */ -+ cap_ptr = find_cap_node(cap); -+ if ((cap_ptr == NULL) || ( cap_ptr->node.owner != owner ) || (! cap_ptr->node.active) ) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* is there already a mapping */ -+ vp_ptr = find_vp_node(cap_ptr,map); -+ if ( vp_ptr != NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* create space for mapping */ -+ KMEM_ALLOC(vp_ptr, ELAN_VP_NODE_STRUCT *, sizeof(ELAN_VP_NODE_STRUCT), 1); -+ if (vp_ptr == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ /* copy map */ -+ vp_ptr->vp = *map; -+ list_add_tail(&vp_ptr->list, &(cap_ptr->vp_list)); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_destroy_vp(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * cap_ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ int i, rail; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_destroy_vp\n"); -+ -+ cap_ptr = find_cap_node(cap); -+ if ((cap_ptr!=NULL) && (cap_ptr->node.owner == owner) && ( cap_ptr->node.active)) -+ { -+ vp_ptr = find_vp_node( cap_ptr, map ); -+ if ( vp_ptr != NULL ) -+ { -+ list_del(&vp_ptr->list); -+ KMEM_FREE(vp_ptr, sizeof(ELAN_VP_NODE_STRUCT)); -+ -+ /* need to tell those who are attached that map is nolonger in use */ -+ for(rail=0;railnode.cap));i++) -+ if ( cap_ptr->attached[rail][i].cb_func != NULL) -+ cap_ptr->attached[rail][i].cb_func( cap_ptr->attached[rail][i].cb_args, cap, map); -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ /* didnt find it */ -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_attach_cap(ELAN_CAPABILITY *cap, unsigned int rail, void *args, ELAN_DESTROY_CB func) -+{ -+ char space[127]; -+ struct list_head *el; -+ -+ ELAN_DEBUG1 (ELAN_DBG_CAP,"elan_attach_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* currently must provide a call back, as null mean something */ -+ if ( func == NULL) -+ return (EINVAL); -+ -+ /* mycontext must be set and correct */ -+ if ( ! ELAN_CAP_VALID_MYCONTEXT(cap)) -+ return (EINVAL); -+ -+ /* rail must be one of the rails in railmask */ -+ if (((1 << rail) & cap->cap_railmask) == 0) -+ return (EINVAL); -+ -+ kmutex_lock(&elan_mutex); -+ -+ list_for_each(el, &elan_cap_list) { -+ ELAN_CAP_NODE_STRUCT *cap_ptr = list_entry(el, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if (ELAN_CAP_MATCH(&cap_ptr->node.cap,cap) && cap_ptr->node.active) { -+ unsigned int attached_index = cap->cap_mycontext - cap->cap_lowcontext; -+ -+ if ( cap_ptr->attached[rail][attached_index].cb_func != NULL ) /* only one per ctx per rail */ -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* keep track of who attached as we might need to tell them when */ -+ /* cap or maps get destroyed */ -+ cap_ptr->attached[rail][ attached_index ].cb_func = func; -+ cap_ptr->attached[rail][ attached_index ].cb_args = args; -+ cap_ptr->node.attached++; -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_attach_cap: passed\n"); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_attach_cap: failed to find \n"); -+ -+ /* didnt find one */ -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_detach_cap(ELAN_CAPABILITY *cap, unsigned int rail) -+{ -+ struct list_head *el, *nel; -+ char space[256]; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elan_detach_cap %s\n",elan_capability_string(cap,space)); -+ list_for_each_safe (el, nel, &elan_cap_list) { -+ ELAN_CAP_NODE_STRUCT *ptr = list_entry (el, ELAN_CAP_NODE_STRUCT, list); -+ -+ /* is it an exact match */ -+ if (ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) && -+ ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap) && -+ (ptr->node.cap.cap_railmask & cap->cap_railmask) == cap->cap_railmask) { -+ -+ unsigned int attached_index = cap->cap_mycontext - cap->cap_lowcontext; -+ -+ if ( ptr->attached[rail][ attached_index ].cb_func == NULL ) -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elanmod_detach_cap already removed \n"); -+ -+ ptr->attached[rail][ attached_index ].cb_func = NULL; -+ ptr->attached[rail][ attached_index ].cb_args = (void *)0; -+ -+ ptr->node.attached--; -+ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elanmod_detach_cap new attach count%d \n", ptr->node.attached); -+ -+ elan_destroy_cap_test(ptr); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_detach_cap: success\n"); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_detach_cap: failed to find\n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_cap_dump() -+{ -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ ELAN_DEBUG2 (ELAN_DBG_ALL, "cap dump: owner %p type %x\n", ptr->node.owner, ptr->node.cap.cap_type); -+ -+ ELAN_DEBUG5 (ELAN_DBG_ALL, "cap dump: LowNode %d HighNode %d LowContext %d mycontext %d highContext %d\n", -+ ptr->node.cap.cap_lownode , ptr->node.cap.cap_highnode, -+ ptr->node.cap.cap_lowcontext , ptr->node.cap.cap_mycontext, ptr->node.cap.cap_highcontext); -+ -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan/capability_general.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/capability_general.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/capability_general.c 2005-06-01 23:12:54.558445336 -0400 -@@ -0,0 +1,446 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability_general.c,v 1.10 2004/02/25 13:47:59 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/shared/capability_general.c,v $ */ -+ -+#if defined(__KERNEL__) -+ -+#include -+ -+#else -+ -+#include -+#include -+#include -+ -+#endif -+ -+#include -+ -+ -+void -+elan_nullcap (ELAN_CAPABILITY *cap) -+{ -+ register int i; -+ -+ for (i = 0; i < sizeof (cap->cap_userkey)/sizeof(cap->cap_userkey.key_values[0]); i++) -+ cap->cap_userkey.key_values[i] = ELAN_CAP_UNINITIALISED; -+ -+ cap->cap_lowcontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_highcontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_mycontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_lownode = ELAN_CAP_UNINITIALISED; -+ cap->cap_highnode = ELAN_CAP_UNINITIALISED; -+ cap->cap_railmask = ELAN_CAP_UNINITIALISED; -+ cap->cap_type = ELAN_CAP_UNINITIALISED; -+ cap->cap_spare = 0; -+ cap->cap_version = ELAN_CAP_VERSION_NUMBER; -+ -+ for (i = 0; i < sizeof (cap->cap_bitmap)/sizeof (cap->cap_bitmap[0]); i++) -+ cap->cap_bitmap[i] = 0; -+} -+ -+char * -+elan_capability_string (ELAN_CAPABILITY *cap, char *str) -+{ -+ if (cap == NULL) -+ sprintf (str, "[-.-.-.-] cap = NULL\n"); -+ else -+ sprintf (str, "[%x.%x.%x.%x] Version %x Type %x \n" -+ "Context %x.%x.%x Node %x.%x\n", -+ cap->cap_userkey.key_values[0], cap->cap_userkey.key_values[1], -+ cap->cap_userkey.key_values[2], cap->cap_userkey.key_values[3], -+ cap->cap_version, cap->cap_type, -+ cap->cap_lowcontext, cap->cap_mycontext, cap->cap_highcontext, -+ cap->cap_lownode, cap->cap_highnode); -+ -+ return (str); -+} -+ -+ELAN_LOCATION -+elan_vp2location (u_int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN_LOCATION location; -+ int i, vp, node, context, nnodes, nctxs; -+ -+ vp = 0; -+ -+ location.loc_node = ELAN_INVALID_NODE; -+ location.loc_context = -1; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (node = 0, i = 0; node < nnodes; node++) -+ { -+ for (context = 0; context < nctxs; context++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, context + (node * nctxs))) -+ { -+ if (vp == process) -+ { -+ /* Return relative indices within the capability box */ -+ location.loc_node = node; -+ location.loc_context = context; -+ -+ return (location); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (context = 0, i = 0; context < nctxs; context++) -+ { -+ for (node = 0; node < nnodes; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (context * nnodes))) -+ { -+ if (vp == process) -+ { -+ location.loc_node = node; -+ location.loc_context = context; -+ -+ return (location); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ } -+ -+ return( location ); -+} -+ -+int -+elan_location2vp (ELAN_LOCATION location, ELAN_CAPABILITY *cap) -+{ -+ int vp, node, context, nnodes, nctxs; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ vp = 0; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (node = 0 ; node < nnodes ; node++) -+ { -+ for (context = 0; context < nctxs; context++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, context + (node * nctxs))) -+ { -+ if ((location.loc_node == node) && (location.loc_context == context)) -+ { -+ /* Found it ! */ -+ return( vp ); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (context = 0; context < nctxs; context++) -+ { -+ for (node = 0; node < nnodes; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (context * nnodes))) -+ { -+ if ((location.loc_node == node) && (location.loc_context == context)) -+ { -+ /* Found it ! */ -+ return( vp ); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ } -+ -+ /* Failed to find it */ -+ return( -1 ); -+} -+ -+/* Return the number of processes as described by a capability */ -+int -+elan_nvps (ELAN_CAPABILITY *cap) -+{ -+ int i, c, nbits = ELAN_CAP_BITMAPSIZE(cap); -+ -+ if (cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) -+ return (nbits); -+ -+ for (i = 0, c = 0; i < nbits; i++) -+ if (BT_TEST (cap->cap_bitmap, i)) -+ c++; -+ -+ return (c); -+} -+ -+/* Return the number of local processes on a given node as described by a capability */ -+int -+elan_nlocal (int node, ELAN_CAPABILITY *cap) -+{ -+ int vp; -+ ELAN_LOCATION loc; -+ int nLocal = 0; -+ -+ for (vp = 0; vp < elan_nvps(cap); vp++) -+ { -+ loc = elan_vp2location(vp, cap); -+ if (loc.loc_node == node) -+ nLocal++; -+ } -+ -+ return (nLocal); -+} -+ -+/* Return the maximum number of local processes on any node as described by a capability */ -+int -+elan_maxlocal (ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highcontext - cap->cap_lowcontext + 1); -+} -+ -+/* Return the vps of the local processes on a given node as described by a capability */ -+int -+elan_localvps (int node, ELAN_CAPABILITY *cap, int *vps, int size) -+{ -+ int context; -+ ELAN_LOCATION loc; -+ int nLocal = 0; -+ -+ loc.loc_node = node; -+ -+ for (context = 0; context < MIN(size, elan_maxlocal(cap)); context++) -+ { -+ loc.loc_context = context; -+ -+ /* Should return -1 if none found */ -+ if ( (vps[context] = elan_location2vp( loc, cap )) != -1) -+ nLocal++; -+ } -+ -+ return (nLocal); -+} -+ -+/* Return the number of rails that this capability utilises */ -+int -+elan_nrails (ELAN_CAPABILITY *cap) -+{ -+ int nrails = 0; -+ unsigned int railmask; -+ -+ /* Test for a multi-rail capability */ -+ if (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL) -+ { -+ /* Grab rail bitmask from capability */ -+ railmask = cap->cap_railmask; -+ -+ while (railmask) -+ { -+ if (railmask & 1) -+ nrails++; -+ -+ railmask >>= 1; -+ } -+ } -+ else -+ /* Default to just one rail */ -+ nrails = 1; -+ -+ return (nrails); -+} -+ -+/* Fill out an array giving the physical rail numbers utilised by a capability */ -+int -+elan_rails (ELAN_CAPABILITY *cap, int *rails) -+{ -+ int nrails, rail; -+ unsigned int railmask; -+ -+ /* Test for a multi-rail capability */ -+ if (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL) -+ { -+ /* Grab rail bitmask from capability */ -+ railmask = cap->cap_railmask; -+ -+ nrails = rail = 0; -+ while (railmask) -+ { -+ if (railmask & 1) -+ rails[nrails++] = rail; -+ -+ rail++; -+ railmask >>= 1; -+ } -+ } -+ else -+ { -+ /* Default to just one rail */ -+ rails[0] = 0; -+ nrails = 1; -+ } -+ -+ return( nrails ); -+} -+ -+int -+elan_cap_overlap(ELAN_CAPABILITY *cap1, ELAN_CAPABILITY *cap2) -+{ -+ /* by context */ -+ if ( cap1->cap_highcontext < cap2->cap_lowcontext ) return (0); -+ if ( cap1->cap_lowcontext > cap2->cap_highcontext) return (0); -+ -+ /* by node */ -+ if ( cap1->cap_highnode < cap2->cap_lownode ) return (0); -+ if ( cap1->cap_lownode > cap2->cap_highnode) return (0); -+ -+ /* by rail */ -+ /* they overlap if they have a rail in common */ -+ return (cap1->cap_railmask & cap2->cap_railmask); -+} -+ -+#if !defined(__KERNEL__) -+ -+/* Fill out an array that hints at the best use of the rails on a -+ * per process basis. The library user can then decide whether or not -+ * to take this into account (e.g. TPORTs) -+ * All processes calling this fn will be returned the same information. -+ */ -+int -+elan_prefrails(ELAN_CAPABILITY *cap, int *pref, int nvp) -+{ -+ int i; -+ int nrails = elan_nrails(cap); -+ int maxlocal = elan_maxlocal(cap); -+ -+ /* Test for a multi-rail capability */ -+ if (! (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL)) -+ { -+ /* Default to just one rail */ -+ for (i = 0; i < nvp; i++) -+ pref[i] = 0; -+ -+ return( 0 ); -+ } -+ -+ /* -+ * We allocate rails on a per node basis sharing our the rails -+ * equally amongst the local processes. However, if there is only -+ * one process per node and multiple rails, then we use a different -+ * algorithm where rails are allocated across all the processes in -+ * a round-robin fashion -+ */ -+ -+ if (maxlocal == 1) -+ { -+ /* Allocate rails in a round-robin manner */ -+ for (i = 0; i < nvp; i++) -+ *pref++ = i % nrails; -+ } -+ else -+ { -+ int node; -+ int *vps; -+ int nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ -+ vps = (int *) malloc(sizeof(int)*maxlocal); -+ -+ /* Grab the local process info for each node and allocate -+ * rails to those vps on an equal basis -+ */ -+ for (node = 0; node < nnodes; node++) -+ { -+ int nlocal; -+ int pprail; -+ -+ /* Grab an array of local vps */ -+ nlocal = elan_localvps(node, cap, vps, maxlocal); -+ -+ /* Calculate the number processes per rail */ -+ if ((pprail = nlocal/nrails) == 0) -+ pprail = 1; -+ -+ /* Allocate processes to rails */ -+ for (i = 0; i < nlocal; i++) -+ { -+ pref[vps[i]] = (i / pprail) % nrails; -+ } -+ } -+ -+ free(vps); -+ } -+ -+ return( 0 ); -+} -+ -+void -+elan_get_random_key(ELAN_USERKEY *key) -+{ -+ int i; -+ for (i = 0; i < sizeof(key->key_values) / sizeof(key->key_values[0]); i++) -+ key->key_values[i] = lrand48(); -+} -+ -+int elan_lowcontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_lowcontext); -+} -+ -+int elan_mycontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_mycontext); -+} -+ -+int elan_highcontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highcontext); -+} -+ -+int elan_lownode(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_lownode); -+} -+ -+int elan_highnode(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highnode); -+} -+ -+int elan_captype(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_type); -+} -+ -+int elan_railmask(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_railmask); -+} -+ -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan/device.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/device.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/device.c 2005-06-01 23:12:54.559445184 -0400 -@@ -0,0 +1,147 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.c,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/device.c,v $*/ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_dev_list); -+ -+ELAN_DEV_STRUCT * -+elan_dev_find (ELAN_DEV_IDX devidx) -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ if (ptr->devidx == devidx) -+ return ptr; -+ if (ptr->devidx > devidx) -+ return ERR_PTR(-ENXIO); -+ } -+ -+ return ERR_PTR(-EINVAL); -+} -+ -+ELAN_DEV_STRUCT * -+elan_dev_find_byrail (unsigned short deviceid, unsigned rail) -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ -+ ELAN_DEBUG5 (ELAN_DBG_ALL,"elan_dev_find_byrail devidx %d - %04x %04x, %d %d \n", ptr->devidx, -+ ptr->devinfo->dev_device_id, deviceid, ptr->devinfo->dev_rail, rail); -+ -+ if (ptr->devinfo->dev_device_id == deviceid && ptr->devinfo->dev_rail == rail) -+ return ptr; -+ } -+ -+ return NULL; -+} -+ -+ELAN_DEV_IDX -+elan_dev_register (ELAN_DEVINFO *devinfo, ELAN_DEV_OPS *ops, void * user_data) -+{ -+ ELAN_DEV_STRUCT *ptr; -+ ELAN_DEV_IDX devidx = 0; -+ struct list_head *tmp; -+ -+ kmutex_lock(&elan_mutex); -+ -+ /* is it already registered */ -+ if ((ptr = elan_dev_find_byrail(devinfo->dev_device_id, devinfo->dev_rail)) != NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* find a free device idx */ -+ list_for_each (tmp, &elan_dev_list) { -+ if (list_entry (tmp, ELAN_DEV_STRUCT, node)->devidx != devidx) -+ break; -+ devidx++; -+ } -+ -+ /* create it and add */ -+ KMEM_ALLOC(ptr, ELAN_DEV_STRUCT *, sizeof(ELAN_DEV_STRUCT), 1); -+ if (ptr == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ ptr->devidx = devidx; -+ ptr->ops = ops; -+ ptr->devinfo = devinfo; -+ ptr->user_data = user_data; -+ -+ /* insert this entry *before* the last entry we've found */ -+ list_add_tail(&ptr->node, tmp); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_dev_deregister (ELAN_DEVINFO *devinfo) -+{ -+ ELAN_DEV_STRUCT *target; -+ -+ kmutex_lock(&elan_mutex); -+ -+ if ((target = elan_dev_find_byrail (devinfo->dev_device_id, devinfo->dev_rail)) == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ list_del(&target->node); -+ -+ /* delete target entry */ -+ KMEM_FREE(target, sizeof(ELAN_DEV_STRUCT)); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_dev_dump () -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ -+ ELAN_DEBUG3 (ELAN_DBG_ALL,"dev dump: index %u rail %u elan%c\n", -+ ptr->devidx, ptr->devinfo->dev_rail, '3' + ptr->devinfo->dev_device_id); -+ ELAN_DEBUG5 (ELAN_DBG_ALL,"dev dump: Vid %x Did %x Rid %x DR %d DVal %x\n", -+ ptr->devinfo->dev_vendor_id, -+ ptr->devinfo->dev_device_id, -+ ptr->devinfo->dev_revision_id, -+ ptr->devinfo->dev_driver_version, -+ ptr->devinfo->dev_num_down_links_value); -+ -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan/devinfo.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/devinfo.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/devinfo.c 2005-06-01 23:12:54.559445184 -0400 -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: devinfo.c,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/devinfo.c,v $*/ -+ -+#include -+#include -+ -+int -+elan_get_devinfo(ELAN_DEV_IDX devidx, ELAN_DEVINFO *devinfo) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ kmutex_lock(&elan_mutex); -+ -+ target = elan_dev_find (devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR(target); -+ else -+ { -+ copyout(target->devinfo, devinfo, sizeof(ELAN_DEVINFO)); -+ res = ESUCCESS; -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_get_position(ELAN_DEV_IDX devidx, ELAN_POSITION *position) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ kmutex_lock(&elan_mutex); -+ -+ target = elan_dev_find(devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR(target); -+ else -+ res = target->ops->get_position(target->user_data, position); -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_set_position(ELAN_DEV_IDX devidx, unsigned short nodeId, unsigned short numNodes) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ kmutex_lock(&elan_mutex); -+ -+ target = elan_dev_find(devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR (target); -+ else -+ res = target->ops->set_position(target->user_data, nodeId, numNodes); -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan/elanmod.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/elanmod.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/elanmod.c 2005-06-01 23:12:54.559445184 -0400 -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+#ident "@(#)$Id: elanmod.c,v 1.11 2004/06/18 09:28:16 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod.c,v $*/ -+ -+#include -+#include -+ -+kmutex_t elan_mutex; -+ -+int -+elan_init() -+{ -+ kmutex_init(&elan_mutex); -+ return (ESUCCESS); -+} -+ -+int -+elan_fini() -+{ -+ kmutex_destroy(&elan_mutex); -+ return (ESUCCESS); -+} -+ -+int -+elanmod_classify_cap (ELAN_POSITION *position, ELAN_CAPABILITY *cap, unsigned use) -+{ -+ if (cap->cap_version != ELAN_CAP_VERSION_NUMBER) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: (cap->Version != ELAN_CAP_VERSION) %d %d\n", cap->cap_version, ELAN_CAP_VERSION_NUMBER); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext == ELAN_CAP_UNINITIALISED || cap->cap_highcontext == ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: LowContext %d HighContext %d MyContext %d\n", -+ cap->cap_lowcontext , cap->cap_highcontext, cap->cap_mycontext); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext > cap->cap_highcontext) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: (cap->cap_lowcontext > cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext); -+ return (-EINVAL); -+ } -+ -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ if (position->pos_mode == ELAN_POS_UNKNOWN) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: Position Unknown \n"); -+ return (-EAGAIN); -+ } -+ -+ if ( ! ( ELAN_USER_CONTEXT(cap->cap_lowcontext) && ELAN_USER_CONTEXT(cap->cap_highcontext))) -+ { -+ ELAN_DEBUG4 (ELAN_DBG_VP, "elanmod_classify_cap: USER_BASE_CONTEXT %d %d %d %d \n" , ELAN_USER_BASE_CONTEXT_NUM,cap->cap_lowcontext, cap->cap_highcontext ,ELAN_USER_TOP_CONTEXT_NUM); -+ return (-EINVAL); -+ } -+ if (cap->cap_lownode == ELAN_CAP_UNINITIALISED) -+ cap->cap_lownode = position->pos_nodeid; -+ if (cap->cap_highnode == ELAN_CAP_UNINITIALISED) -+ cap->cap_highnode = position->pos_nodeid; -+ -+ if (cap->cap_lownode < 0 || cap->cap_highnode >= position->pos_nodes || cap->cap_lownode > cap->cap_highnode) -+ { -+ ELAN_DEBUG3 ( ELAN_DBG_VP,"elanmod_classify_cap: low %d high %d pos %d \n" , cap->cap_lownode ,cap->cap_highnode, position->pos_nodes); -+ -+ return (-EINVAL); -+ } -+ -+ if ((cap->cap_highnode < position->pos_nodeid) || (cap->cap_lownode > position->pos_nodeid)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: node not i range low %d high %d this %d\n", -+ cap->cap_lownode, cap->cap_highnode, position->pos_nodeid); -+ return (-EINVAL); -+ } -+ -+ break; -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP, "elanmod_classify_cap: cant decode type %x \n", cap->cap_type & ELAN_CAP_TYPE_MASK); -+ return (-EINVAL); -+ -+ } -+ -+ switch (use) -+ { -+ case ELAN_USER_ATTACH: -+ case ELAN_USER_DETACH: -+ if (cap->cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: cap->cap_mycontext == ELAN_CAP_UNINITIALISED"); -+ return (-EINVAL); -+ } -+ -+ if ((cap->cap_mycontext != ELAN_CAP_UNINITIALISED) && -+ (cap->cap_mycontext < cap->cap_lowcontext || cap->cap_mycontext > cap->cap_highcontext)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: cap->cap_mycontext out of range %d %d %d \n", cap->cap_lowcontext,cap->cap_mycontext,cap->cap_highcontext); -+ return (-EINVAL); -+ } -+ break; -+ -+ case ELAN_USER_P2P: -+ break; -+ -+ case ELAN_USER_BROADCAST: -+ if (! (cap->cap_type & ELAN_CAP_TYPE_BROADCASTABLE)) { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: use ELAN_USER_BROADCAST but cap not ELAN_CAP_TYPE_BROADCASTABLE\n"); -+ return (-EINVAL); -+ } -+ break; -+ -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP, "elanmod_classify_cap: unknown use (%d)\n",use); -+ return (-EINVAL); -+ } -+ -+ -+ -+ /* is any ctxt an rms one ?? */ -+ if (ELAN_RMS_CONTEXT(cap->cap_lowcontext) || ELAN_RMS_CONTEXT(cap->cap_highcontext)) -+ { -+ /* so both low and high must be */ -+ if (!(ELAN_RMS_CONTEXT(cap->cap_lowcontext) && ELAN_RMS_CONTEXT(cap->cap_highcontext))) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: not rms ctxt %x %x\n",cap->cap_lowcontext,cap->cap_highcontext ); -+ return (-EINVAL); -+ } -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: returning ELAN_CAP_RMS\n"); -+ return (ELAN_CAP_RMS); -+ } -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: returning ELAN_CAP_OK\n"); -+ return (ELAN_CAP_OK); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan/elanmod_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/elanmod_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/elanmod_linux.c 2005-06-01 23:12:54.560445032 -0400 -@@ -0,0 +1,410 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod_linux.c,v 1.16 2004/06/14 15:45:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+ -+#include -+#include -+ -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan support module"); -+ -+MODULE_LICENSE("GPL"); -+ -+/* elanmod.c */ -+EXPORT_SYMBOL(elanmod_classify_cap); -+ -+/* bitmap.c */ -+#include -+ -+EXPORT_SYMBOL(bt_freebit); -+EXPORT_SYMBOL(bt_lowbit); -+EXPORT_SYMBOL(bt_nextbit); -+EXPORT_SYMBOL(bt_copy); -+EXPORT_SYMBOL(bt_zero); -+EXPORT_SYMBOL(bt_fill); -+EXPORT_SYMBOL(bt_cmp); -+EXPORT_SYMBOL(bt_intersect); -+EXPORT_SYMBOL(bt_remove); -+EXPORT_SYMBOL(bt_add); -+EXPORT_SYMBOL(bt_spans); -+EXPORT_SYMBOL(bt_subset); -+EXPORT_SYMBOL(bt_up); -+EXPORT_SYMBOL(bt_down); -+EXPORT_SYMBOL(bt_nbits); -+ -+/* capability.c */ -+EXPORT_SYMBOL(elan_nullcap); -+EXPORT_SYMBOL(elan_detach_cap); -+EXPORT_SYMBOL(elan_attach_cap); -+EXPORT_SYMBOL(elan_validate_map); -+ -+/* stats.c */ -+EXPORT_SYMBOL(elan_stats_register); -+EXPORT_SYMBOL(elan_stats_deregister); -+ -+/* device.c */ -+EXPORT_SYMBOL(elan_dev_deregister); -+EXPORT_SYMBOL(elan_dev_register); -+ -+/* debug */ -+int elan_debug_mode = QSNET_DEBUG_BUFFER; -+int elan_debug_mask; -+ -+static struct proc_dir_entry *elan_procfs_root; -+ -+extern void elan_procfs_init(void); -+extern void elan_procfs_fini(void); -+ -+static int elan_open (struct inode *ino, struct file *fp); -+static int elan_release (struct inode *ino, struct file *fp); -+static int elan_ioctl (struct inode *ino, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static struct file_operations elan_fops = -+{ -+ ioctl: elan_ioctl, -+ open: elan_open, -+ release: elan_release, -+}; -+ -+static int __init elan_start(void) -+{ -+ int res; -+ -+ elan_procfs_init(); -+ -+ if ((res = elan_init()) != ESUCCESS) -+ { -+ elan_procfs_fini(); -+ return (-res); -+ } -+ -+ return (0); -+} -+ -+static void __exit elan_exit(void) -+{ -+ elan_fini(); -+ elan_procfs_fini(); -+} -+ -+ -+/* Declare the module init and exit functions */ -+void -+elan_procfs_init() -+{ -+ struct proc_dir_entry *p; -+ -+ elan_procfs_root = proc_mkdir("elan", qsnet_procfs_root); -+ -+ qsnet_proc_register_hex(elan_procfs_root, "debug_mask", &elan_debug_mask, 0); -+ qsnet_proc_register_hex(elan_procfs_root, "debug_mode", &elan_debug_mode, 0); -+ -+ if ((p = create_proc_entry ("ioctl", 0, elan_procfs_root)) != NULL) -+ { -+ p->proc_fops = &elan_fops; -+ p->data = 0; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+elan_procfs_fini() -+{ -+ remove_proc_entry ("debug_mask", elan_procfs_root); -+ remove_proc_entry ("debug_mode", elan_procfs_root); -+ -+ remove_proc_entry ("ioctl", elan_procfs_root); -+ remove_proc_entry ("version", elan_procfs_root); -+ -+ remove_proc_entry ("elan", qsnet_procfs_root); -+} -+ -+module_init(elan_start); -+module_exit(elan_exit); -+ -+static int -+elan_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+elan_release (struct inode *inode, struct file *fp) -+{ -+ /* mark all caps owned by fp to be destroyed */ -+ elan_destroy_cap(fp,NULL); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+elan_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int rep = 0; -+ -+ switch (cmd) -+ { -+ case ELANCTRL_STATS_GET_NEXT : -+ { -+ ELANCTRL_STATS_GET_NEXT_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_NEXT_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_next_index(args.statidx, args.next_statidx) != 0 ) -+ return (-EINVAL); -+ -+ break; -+ } -+ case ELANCTRL_STATS_FIND_INDEX : -+ { -+ ELANCTRL_STATS_FIND_INDEX_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_FIND_INDEX_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_find_index(args.block_name, args.statidx, args.num_entries) != 0 ) -+ return (-EINVAL); -+ -+ break; -+ } -+ case ELANCTRL_STATS_GET_BLOCK_INFO : -+ { -+ ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_block_info(args.statidx, args.block_name, args.num_entries) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_GET_INDEX_NAME : -+ { -+ ELANCTRL_STATS_GET_INDEX_NAME_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_INDEX_NAME_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_index_name(args.statidx, args.index, args.name) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_CLEAR_BLOCK : -+ { -+ ELANCTRL_STATS_CLEAR_BLOCK_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_CLEAR_BLOCK_STRUCT))) -+ return (-EFAULT); -+ -+ /* statidx is not a pointer */ -+ if (elan_stats_clear_block(args.statidx) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_GET_BLOCK : -+ { -+ ELANCTRL_STATS_GET_BLOCK_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_BLOCK_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_block(args.statidx, args.entries, args.values) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_GET_DEVINFO : -+ { -+ ELANCTRL_GET_DEVINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_DEVINFO_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_devinfo(args.devidx, args.devinfo) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_GET_POSITION : -+ { -+ ELANCTRL_GET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_position(args.devidx, args.position) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_SET_POSITION : -+ { -+ ELANCTRL_SET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_SET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_set_position(args.devidx, args.nodeId, args.numNodes) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_CREATE_CAP : -+ { -+ ELANCTRL_CREATE_CAP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_CREATE_CAP_STRUCT *, sizeof(ELANCTRL_CREATE_CAP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_CREATE_CAP_STRUCT))) -+ return (-EFAULT); -+ else -+ { -+ if ((elan_validate_cap(&args->cap) != 0) || (elan_create_cap(fp,&args->cap) != 0 )) -+ rep = (-EINVAL); -+ } -+ -+ /* free the space */ -+ KMEM_FREE(args, sizeof(ELANCTRL_CREATE_CAP_STRUCT)); -+ -+ break; -+ } -+ case ELANCTRL_DESTROY_CAP : -+ { -+ ELANCTRL_DESTROY_CAP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_DESTROY_CAP_STRUCT *, sizeof(ELANCTRL_DESTROY_CAP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_DESTROY_CAP_STRUCT))) -+ rep = (-EFAULT); -+ else -+ { -+ if (elan_destroy_cap(fp, &args->cap) != 0 ) -+ rep = (-EINVAL); -+ } -+ -+ /* free the space */ -+ KMEM_FREE(args, sizeof(ELANCTRL_DESTROY_CAP_STRUCT)); -+ -+ break; -+ } -+ case ELANCTRL_CREATE_VP : -+ { -+ ELANCTRL_CREATE_VP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_CREATE_VP_STRUCT *, sizeof(ELANCTRL_CREATE_VP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_CREATE_VP_STRUCT))) -+ return (-EFAULT); -+ else -+ { -+ if ((elan_validate_cap( &args->map) != 0) || (elan_create_vp(fp, &args->cap, &args->map) != 0 )) -+ rep = (-EINVAL); -+ } -+ -+ KMEM_FREE(args, sizeof(ELANCTRL_CREATE_VP_STRUCT )); -+ -+ break; -+ } -+ case ELANCTRL_DESTROY_VP : -+ { -+ ELANCTRL_DESTROY_VP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_DESTROY_VP_STRUCT *, sizeof(ELANCTRL_DESTROY_VP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_DESTROY_VP_STRUCT))) -+ rep = (-EFAULT); -+ else -+ { -+ if (elan_destroy_vp(fp, &args->cap, &args->map) != 0 ) -+ rep = (-EINVAL); -+ } -+ -+ KMEM_FREE(args, sizeof(ELANCTRL_DESTROY_VP_STRUCT )); -+ -+ break; -+ } -+ -+ case ELANCTRL_GET_CAPS : -+ { -+ ELANCTRL_GET_CAPS_STRUCT args; -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_CAPS_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_caps(args.number_of_results, args.array_size, args.caps) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_DEBUG_DUMP : -+ { -+ elan_cap_dump(); -+ elan_dev_dump(); -+ -+ break; -+ } -+ case ELANCTRL_DEBUG_BUFFER : -+ { -+ ELANCTRL_DEBUG_BUFFER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if ((args.size = qsnet_debug_buffer (args.buffer, args.size)) != -1 && -+ copy_to_user ((void *) arg, &args, sizeof (ELANCTRL_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ break; -+ } -+ default: -+ return (-EINVAL); -+ break; -+ } -+ -+ return (rep); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/Makefile 2005-06-01 23:12:54.560445032 -0400 -@@ -0,0 +1,31 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan/Makefile -+# -+ -+ -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/elan/Makefile -+# -+ -+list-multi := elan.o -+elan-objs := elanmod.o device.o stats.o devinfo.o capability.o elanmod_linux.o capability_general.o bitmap.o -+export-objs := elanmod_linux.o -+obj-$(CONFIG_QSNET) := elan.o -+ -+elan.o : $(elan-objs) -+ $(LD) -r -o $@ $(elan-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/elan/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/Makefile.conf 2005-06-01 23:12:54.561444880 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan.o -+MODULENAME = elan -+KOBJFILES = elanmod.o device.o stats.o devinfo.o capability.o elanmod_linux.o capability_general.o bitmap.o -+EXPORT_KOBJS = elanmod_linux.o -+CONFIG_NAME = CONFIG_QSNET -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/elan/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/quadrics_version.h 2005-06-01 23:12:54.561444880 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/elan/stats.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan/stats.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan/stats.c 2005-06-01 23:12:54.562444728 -0400 -@@ -0,0 +1,277 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.c,v 1.6 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/stats.c,v $*/ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_stats_list); -+static ELAN_STATS_IDX elan_next_statidx=0; -+ -+ELAN_STATS_STRUCT * -+elan_stats_find(ELAN_STATS_IDX statidx) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ if ( ptr->statidx == statidx ) -+ return ptr; -+ } -+ -+ ELAN_DEBUG1 (ELAN_DBG_CTRL, "elan_stats_find failed %d\n", statidx); -+ return NULL; -+} -+ -+ELAN_STATS_STRUCT * -+elan_stats_find_by_name(caddr_t block_name) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ if (!strcmp(ptr->block_name, block_name)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_CTRL, "elan_stats_find_by_name found %s (%d,%d)\n", block_name, ptr->statidx, ptr->num_entries); -+ return ptr; -+ } -+ } -+ -+ ELAN_DEBUG1 (ELAN_DBG_CTRL, "elan_stats_find_by_name failed %s\n", block_name); -+ return NULL; -+} -+ -+ELAN_STATS_STRUCT * -+elan_stats_find_next(ELAN_STATS_IDX statidx) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ -+ if ( ptr->statidx > statidx ) -+ return ptr; -+ } -+ -+ return NULL; -+} -+ -+int -+elan_stats_get_next_index (ELAN_STATS_IDX statidx, ELAN_STATS_IDX *next_block) -+{ -+ ELAN_STATS_STRUCT *target; -+ ELAN_STATS_IDX next = 0; -+ -+ kmutex_lock(&elan_mutex); -+ -+ if ((target = elan_stats_find_next(statidx)) != NULL) -+ next = target->statidx; -+ -+ copyout(&next, next_block, sizeof(ELAN_STATS_IDX) ); -+ -+ kmutex_unlock(&elan_mutex); -+ return 0; -+} -+ -+int -+elan_stats_find_index (caddr_t block_name, ELAN_STATS_IDX *statidx, uint *num_entries) -+ -+{ -+ ELAN_STATS_STRUCT *target; -+ ELAN_STATS_IDX index = 0; -+ uint entries = 0; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_find_index %s \n", block_name); -+ -+ if ((target = elan_stats_find_by_name(block_name)) != NULL) -+ { -+ index = target->statidx; -+ entries = target->num_entries; -+ } -+ -+ ELAN_DEBUG3(ELAN_DBG_CTRL, "elan_stats_find_index found %d %d (target=%p)\n", index, entries, target); -+ -+ copyout(&index, statidx, sizeof(ELAN_STATS_IDX)); -+ copyout(&entries, num_entries, sizeof(uint)); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_stats_get_block_info (ELAN_STATS_IDX statidx, caddr_t block_name, uint *num_entries) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_get_block_info statidx %d\n",statidx); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ ELAN_DEBUG2(ELAN_DBG_CTRL, "elan_stats_get_block_info name %s entries %d\n",block_name, *num_entries); -+ -+ copyout( target->block_name, block_name, ELAN_STATS_NAME_MAX_LEN); -+ copyout(&target->num_entries, num_entries, sizeof(uint)); -+ -+ res = ESUCCESS; -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_stats_get_index_name (ELAN_STATS_IDX statidx, uint index, caddr_t name) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG2(ELAN_DBG_CTRL, "elan_stats_get_index_name statidx %d index %d\n",statidx, index); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_get_name== NULL) -+ { -+ ELAN_DEBUG0(ELAN_DBG_CTRL, "elan_stats_get_index_name no callback\n"); -+ kmutex_unlock(&elan_mutex); -+ return res; -+ } -+ -+ if ((res = target->ops->elan_stats_get_name(target->arg, index, name)) == 0) -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_get_index_name name %s\n",name); -+ -+ } -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_stats_get_block (ELAN_STATS_IDX statidx, uint entries, ulong *values) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_get_block == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return res; -+ } -+ -+ res = target->ops->elan_stats_get_block(target->arg, entries, values); -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_stats_clear_block (ELAN_STATS_IDX statidx) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_clear_block == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return res; -+ } -+ -+ res = target->ops->elan_stats_clear_block(target->arg); -+ } -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+void -+elan_stats_next_statidx(void) -+{ -+ /* XXXXX need to put not in use check here incase we loop MRH */ -+ /* tho its a bigish loop :) */ -+ elan_next_statidx++; -+ if (!elan_next_statidx) -+ elan_next_statidx++; -+} -+ -+int -+elan_stats_register (ELAN_STATS_IDX *statidx, -+ char *block_name, -+ uint num_entries, -+ ELAN_STATS_OPS *ops, -+ void *arg) -+{ -+ ELAN_STATS_STRUCT *target; -+ -+ kmutex_lock(&elan_mutex); -+ -+ /* create it and add */ -+ KMEM_ALLOC(target, ELAN_STATS_STRUCT *, sizeof(ELAN_STATS_STRUCT), 1); -+ if (target == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ elan_stats_next_statidx(); -+ -+ *statidx = elan_next_statidx; -+ -+ target->statidx = elan_next_statidx; -+ target->num_entries = num_entries; -+ target->ops = ops; -+ target->arg = arg; -+ strcpy(target->block_name, block_name); -+ -+ list_add_tail(&target->node, &elan_stats_list); -+ -+ kmutex_unlock(&elan_mutex); -+ return 0; -+} -+ -+int -+elan_stats_deregister (ELAN_STATS_IDX statidx) -+{ -+ ELAN_STATS_STRUCT *target; -+ -+ kmutex_lock(&elan_mutex); -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ -+ list_del(&target->node); -+ -+ /* delete target entry */ -+ KMEM_FREE(target, sizeof(ELAN_STATS_STRUCT)); -+ } -+ kmutex_unlock(&elan_mutex); -+ -+ return target == NULL ? EINVAL : 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/context.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/context.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/context.c 2005-06-01 23:12:54.565444272 -0400 -@@ -0,0 +1,2101 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: context.c,v 1.116.2.1 2004/11/12 14:24:18 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/context.c,v $ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+/* -+ * Global variables configurable from /etc/system file -+ * (OR /etc/sysconfigtab on Digital UNIX) -+ */ -+int ntrapped_threads = 64; -+int ntrapped_dmas = 64; -+int ntrapped_events = E3_NonSysCntxQueueSize + 128; -+int ntrapped_commands = 64; -+int noverflow_commands = 1024; -+int nswapped_threads = 64; -+int nswapped_dmas = 64; -+ -+#define NUM_HALTOPS 8 -+ -+void *SwapListsLockInfo; -+void *CmdLockInfo; -+ -+static void HaltSwapContext (ELAN3_DEV *dev, void *arg); -+ -+static char *OthersStateStrings[] = {"others_running", "others_halting", "others_swapping", -+ "others_halting_more", "others_swapping_more", "others_swapped"}; -+ -+ELAN3_CTXT * -+elan3_alloc (ELAN3_DEV *dev, int kernel) -+{ -+ ELAN3_CTXT *ctxt; -+ int i; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_FN, "elan3_alloc: %s\n", kernel ? "kernel" : "user"); -+ -+ KMEM_ZALLOC (ctxt, ELAN3_CTXT *, sizeof (ELAN3_CTXT), TRUE); -+ -+ if (ctxt == NULL) -+ return (NULL); -+ -+ elan_nullcap (&ctxt->Capability); -+ -+ ctxt->Device = dev; -+ ctxt->OthersState = CTXT_OTHERS_SWAPPED; -+ ctxt->RefCnt = 1; -+ ctxt->Position = dev->Position; -+ -+ if (kernel) -+ ctxt->Status = CTXT_DETACHED | CTXT_SWAPPED_OUT | CTXT_KERNEL; -+ else -+ ctxt->Status = CTXT_DETACHED | CTXT_SWAPPED_OUT | CTXT_NO_LWPS; -+ -+ ctxt->Elan3mmu = elan3mmu_alloc (ctxt); -+ -+ kcondvar_init (&ctxt->Wait); -+ kcondvar_init (&ctxt->CommandPortWait); -+ kcondvar_init (&ctxt->LwpWait); -+ kcondvar_init (&ctxt->HaltWait); -+ -+ spin_lock_init (&ctxt->InputFaultLock); -+ -+ kmutex_init (&ctxt->SwapListsLock); -+ kmutex_init (&ctxt->CmdPortLock); -+ kmutex_init (&ctxt->NetworkErrorLock); -+ kmutex_init (&ctxt->CmdLock); -+ -+ krwlock_init (&ctxt->VpLock); -+ -+ KMEM_GETPAGES (ctxt->FlagPage, ELAN3_FLAGSTATS *, 1, TRUE); -+ if (!ctxt->FlagPage) -+ goto error; -+ bzero ((char *) ctxt->FlagPage, PAGESIZE); -+ -+ KMEM_ZALLOC (ctxt->CommandTraps, COMMAND_TRAP *, sizeof (COMMAND_TRAP) * ntrapped_commands, TRUE); -+ if (!ctxt->CommandTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->ThreadTraps, THREAD_TRAP *, sizeof (THREAD_TRAP) * ntrapped_threads, TRUE); -+ if (!ctxt->ThreadTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->DmaTraps, DMA_TRAP *, sizeof (DMA_TRAP) * ntrapped_dmas, TRUE); -+ if (!ctxt->DmaTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->EventCookies, EVENT_COOKIE *, sizeof (EVENT_COOKIE) * ntrapped_events, TRUE); -+ if (!ctxt->EventCookies) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->Commands, CProcTrapBuf_BE *, sizeof (CProcTrapBuf_BE) * noverflow_commands,TRUE); -+ if (!ctxt->Commands) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->SwapThreads, E3_Addr *, sizeof (E3_Addr) * nswapped_threads, TRUE); -+ if (!ctxt->SwapThreads) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->SwapDmas, E3_DMA_BE *, sizeof (E3_DMA_BE) * nswapped_dmas, TRUE); -+ if (!ctxt->SwapDmas) -+ goto error; -+ -+ /* -+ * "slop" is defined as follows : -+ * number of entries REQUIRED to be left spare to consume all other traps -+ * up until the time that the context can be swapped out. -+ * -+ * CommandTrapQ : 1 command issued by main + 1 issued by the thread processor per elan -+ * ThreadTrapQ : 2 from command + 2 input -+ * DmaTrapQ : 2 from command + 2 input -+ * EventTrapQ : 2 from command + 1 thread + 1 dma + 2 input + E3_NonSysCntxQueueSize -+ */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ELAN3_QUEUE_INIT (ctxt->CommandTrapQ, ntrapped_commands, 2); -+ ELAN3_QUEUE_INIT (ctxt->ThreadTrapQ, ntrapped_threads, 4); -+ ELAN3_QUEUE_INIT (ctxt->DmaTrapQ, ntrapped_dmas, 4); -+ ELAN3_QUEUE_INIT (ctxt->EventCookieQ, ntrapped_events, MIN(E3_NonSysCntxQueueSize + 6, ntrapped_events - 6)); -+ ELAN3_QUEUE_INIT (ctxt->CommandQ, noverflow_commands, 0); -+ ELAN3_QUEUE_INIT (ctxt->SwapThreadQ, nswapped_threads, 0); -+ ELAN3_QUEUE_INIT (ctxt->SwapDmaQ, nswapped_dmas, 0); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+#if defined(DIGITAL_UNIX) -+ /* Allocate the segelan for the command port */ -+ if (! kernel && elan3_segelan3_create (ctxt) == NULL) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ return ((ELAN3_CTXT *) NULL); -+ } -+#endif -+ -+ /* -+ * Initialise the Input Fault list -+ */ -+ spin_lock (&ctxt->InputFaultLock); -+ for (i = 0; i < NUM_INPUT_FAULT_SAVE; i++) -+ ctxt->InputFaults[i].Next = (i == (NUM_INPUT_FAULT_SAVE-1)) ? NULL : &ctxt->InputFaults[i+1]; -+ ctxt->InputFaultList = &ctxt->InputFaults[0]; -+ spin_unlock (&ctxt->InputFaultLock); -+ -+ ReserveHaltOperations (dev, NUM_HALTOPS, TRUE); -+ -+ if ((ctxt->RouteTable = AllocateRouteTable (ctxt->Device, ELAN3_MAX_VPS)) == NULL) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_FN, "elan3_alloc: cannot map route table\n"); -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ return ((ELAN3_CTXT *) NULL); -+ } -+ -+ return (ctxt); -+ -+ -+ error: -+ -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ if (ctxt->FlagPage) -+ KMEM_FREEPAGES ((void *) ctxt->FlagPage, 1); -+ if (ctxt->CommandTraps) -+ KMEM_FREE ((void *) ctxt->CommandTraps, sizeof (COMMAND_TRAP) * ntrapped_commands); -+ if (ctxt->ThreadTraps) -+ KMEM_FREE ((void *) ctxt->ThreadTraps, sizeof (THREAD_TRAP) * ntrapped_threads); -+ if (ctxt->DmaTraps) -+ KMEM_FREE ((void *) ctxt->DmaTraps, sizeof (DMA_TRAP) * ntrapped_dmas); -+ if (ctxt->EventCookies) -+ KMEM_FREE ((void *) ctxt->EventCookies, sizeof (EVENT_COOKIE) * ntrapped_events); -+ if (ctxt->Commands) -+ KMEM_FREE ((void *) ctxt->Commands, sizeof (CProcTrapBuf_BE) * noverflow_commands); -+ if (ctxt->SwapThreads) -+ KMEM_FREE ((void *) ctxt->SwapThreads, sizeof (E3_Addr) * nswapped_threads); -+ if (ctxt->SwapDmas) -+ KMEM_FREE ((void *) ctxt->SwapDmas, sizeof (E3_DMA_BE) * nswapped_dmas); -+ -+ kcondvar_destroy (&ctxt->Wait); -+ kcondvar_destroy (&ctxt->CommandPortWait); -+ kcondvar_destroy (&ctxt->LwpWait); -+ kcondvar_destroy (&ctxt->HaltWait); -+ -+ kmutex_destroy (&ctxt->SwapListsLock); -+ kmutex_destroy (&ctxt->CmdLock); -+ kmutex_destroy (&ctxt->NetworkErrorLock); -+ spin_lock_destroy (&ctxt->InputFaultLock); -+ -+ krwlock_destroy (&ctxt->VpLock); -+ -+ KMEM_FREE (ctxt, sizeof (ELAN3_CTXT)); -+ -+ return (NULL); -+} -+ -+void -+elan3_free (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ NETERR_FIXUP *nef; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_free: %p \n", ctxt); -+ -+ elan3_removevp (ctxt, ELAN3_INVALID_PROCESS); /* Remove any virtual process mappings */ -+ -+#if defined(DIGITAL_UNIX) -+ WaitForContext (ctxt); /* wait for all references to this context to go away */ -+#endif -+ -+ if (ctxt->RouteTable) -+ FreeRouteTable (dev, ctxt->RouteTable); -+ ctxt->RouteTable = NULL; -+ -+ elan3mmu_free (ctxt->Elan3mmu); /* free of our Elan3mmu */ -+ -+ if (ctxt->Private) /* Call back to "user" to free off */ -+ ELAN3_OP_FREE_PRIVATE (ctxt); /* private data */ -+ -+#if defined(DIGITAL_UNIX) -+ if (! CTXT_IS_KERNEL(ctxt)) -+ elan3_segelan3_destroy (ctxt); /* Unmap the command port from the users address space. */ -+#endif -+ -+ ReleaseHaltOperations (dev, NUM_HALTOPS); -+ -+ if (ctxt->Input0Resolver) -+ CancelNetworkErrorResolver (ctxt->Input0Resolver); -+ -+ if (ctxt->Input1Resolver) -+ CancelNetworkErrorResolver (ctxt->Input1Resolver); -+ -+ while ((nef = ctxt->NetworkErrorFixups) != NULL) -+ { -+ ctxt->NetworkErrorFixups = nef->Next; -+ -+ CompleteNetworkErrorFixup (ctxt, nef, ESRCH); -+ } -+ -+ KMEM_FREEPAGES ((void *) ctxt->FlagPage, 1); -+ -+ KMEM_FREE ((void *) ctxt->CommandTraps, sizeof (COMMAND_TRAP) * ntrapped_commands); -+ KMEM_FREE ((void *) ctxt->ThreadTraps, sizeof (THREAD_TRAP) * ntrapped_threads); -+ KMEM_FREE ((void *) ctxt->DmaTraps, sizeof (DMA_TRAP) * ntrapped_dmas); -+ KMEM_FREE ((void *) ctxt->EventCookies, sizeof (EVENT_COOKIE) * ntrapped_events); -+ KMEM_FREE ((void *) ctxt->Commands, sizeof (CProcTrapBuf_BE) * noverflow_commands); -+ KMEM_FREE ((void *) ctxt->SwapThreads, sizeof (E3_Addr) * nswapped_threads); -+ KMEM_FREE ((void *) ctxt->SwapDmas, sizeof (E3_DMA_BE) * nswapped_dmas); -+ -+ kcondvar_destroy (&ctxt->Wait); -+ kcondvar_destroy (&ctxt->CommandPortWait); -+ kcondvar_destroy (&ctxt->LwpWait); -+ kcondvar_destroy (&ctxt->HaltWait); -+ -+ kmutex_destroy (&ctxt->SwapListsLock); -+ kmutex_destroy (&ctxt->CmdLock); -+ kmutex_destroy (&ctxt->NetworkErrorLock); -+ spin_lock_destroy (&ctxt->InputFaultLock); -+ -+ krwlock_destroy (&ctxt->VpLock); -+ -+ KMEM_FREE (ctxt, sizeof (ELAN3_CTXT)); -+} -+ -+int -+elan3_doattach(ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ unsigned long pgnum = ((cap->cap_mycontext & MAX_ROOT_CONTEXT_MASK) * sizeof (E3_CommandPort)) / PAGE_SIZE; -+ unsigned long pgoff = ((cap->cap_mycontext & MAX_ROOT_CONTEXT_MASK) * sizeof (E3_CommandPort)) & (PAGE_SIZE-1); -+ ELAN3_DEV *dev = ctxt->Device; -+ int res = ESUCCESS; -+ unsigned long flags; -+ -+ /* Map in the command port for this context */ -+ if (MapDeviceRegister (dev, ELAN3_BAR_COMMAND_PORT, &ctxt->CommandPage, pgnum * PAGE_SIZE, PAGE_SIZE, &ctxt->CommandPageHandle) != ESUCCESS) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_doattach: MapDeviceRegister failed"); -+ return (EINVAL); -+ } -+ -+ ctxt->CommandPort = ctxt->CommandPage + pgoff; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ res = 0; -+ if (ELAN3_DEV_CTX_TABLE(dev,cap->cap_mycontext) != NULL) -+ res = EBUSY; -+ else -+ { -+ if ((res = elan3mmu_attach (ctxt->Device, cap->cap_mycontext, ctxt->Elan3mmu, -+ ctxt->RouteTable->Table, ctxt->RouteTable->Size-1)) == 0) -+ { -+ ELAN3_DEV_CTX_TABLE(dev,cap->cap_mycontext) = ctxt; -+ ctxt->Capability = *cap; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (res == ESUCCESS) -+ elan3_swapin (ctxt, CTXT_DETACHED); -+ else -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ ctxt->CommandPort = (ioaddr_t) 0; -+ } -+ -+ return (res); -+} -+ -+void -+elan3_destroy_callback( void * args, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ if (map == NULL) -+ { -+ /* the cap is being destroyed */ -+ PRINTF0 (NULL, DBG_VP, "elan3_destroy_callback: the cap is being destroyed \n"); -+ } -+ else -+ { -+ /* the map is being destroyed */ -+ PRINTF0 (NULL, DBG_VP, "elan3_destroy_callback: the map is being destroyed \n"); -+ } -+} -+ -+int -+elan3_attach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int type; -+ int res; -+ -+ switch (type = elan3_validate_cap (dev, cap, ELAN_USER_ATTACH)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ((res = elan_attach_cap(cap, dev->Devinfo.dev_rail, ctxt, elan3_destroy_callback)) != 0) -+ return res; -+ break; -+ -+ default: -+ return (EINVAL); -+ } -+ -+ if (((res = elan3_doattach(ctxt,cap)) != ESUCCESS) && (type == ELAN_CAP_RMS)) -+ elan_detach_cap(cap, dev->Devinfo.dev_rail); -+ -+ return res; -+} -+ -+void -+elan3_detach ( ELAN3_CTXT *ctxt ) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int need_to_call_elanmod_detach = 0; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_detach: %p \n", ctxt ); -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_detach: context not attached \n"); -+ return ; -+ } -+ -+ /* must you be in the ctx_table ?? */ -+ -+ switch (ctxt->Capability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ if (ELAN3_SYSTEM_CONTEXT (ctxt->Capability.cap_mycontext)) -+ return ; -+ -+ if (! (ctxt->Capability.cap_type & ELAN_CAP_TYPE_HWTEST)) -+ need_to_call_elanmod_detach = 1; -+ -+ break; -+ } -+ default: -+ return ; -+ } -+ -+ elan3_swapout (ctxt, CTXT_DETACHED); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3mmu_detach (dev, ctxt->Capability.cap_mycontext); -+ ELAN3_DEV_CTX_TABLE(dev,ctxt->Capability.cap_mycontext) = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (ctxt->CommandPage) -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ } -+ -+ if (need_to_call_elanmod_detach) -+ elan_detach_cap(&ctxt->Capability, dev->Devinfo.dev_rail); -+ -+ elan_nullcap (&ctxt->Capability); -+ -+} -+ -+void -+elan3_dodetach ( ELAN3_CTXT *ctxt ) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_dodetach: %p \n", ctxt ); -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_dodetach: context not attached \n"); -+ return ; -+ } -+ -+ elan3_swapout (ctxt, CTXT_DETACHED); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3mmu_detach (dev, ctxt->Capability.cap_mycontext); -+ ELAN3_DEV_CTX_TABLE(dev,ctxt->Capability.cap_mycontext) = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (ctxt->CommandPage) -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ } -+ -+ elan_nullcap (&ctxt->Capability); -+} -+ -+void -+elan3_swapin (ELAN3_CTXT *ctxt, int reason) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (ctxt->Status & CTXT_SWAPPED_REASONS); -+ -+ PRINTF3 (ctxt, DBG_SWAP, "elan3_swapin: status %x State %s reason %x\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState], reason); -+ -+ while (ctxt->Status & CTXT_SWAPPING_OUT) /* In transition */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); -+ -+ if (reason == CTXT_NO_LWPS && ctxt->LwpCount++ != 0) /* Added another LWP */ -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ if ((ctxt->Status & ~reason) & CTXT_SWAPPED_REASONS) -+ ctxt->Status &= ~reason; -+ else -+ { -+ ASSERT (ctxt->Status & CTXT_SWAPPED_OUT); -+ ASSERT (ctxt->OthersState == CTXT_OTHERS_SWAPPED); -+ -+ /* -+ * Will not be swapped out anymore, so ask the "user" to perform -+ * any swapping in he needs before letting the context run again. -+ */ -+ -+ ctxt->Status &= ~(CTXT_SWAPPED_OUT | CTXT_QUEUES_EMPTY | reason); -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_OK && ctxt->Input1Trap.State == CTXT_STATE_OK) -+ SetInputterStateForContext (ctxt, 0, NULL); -+ -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ } -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapin: all done - status %x state %s\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+ -+void -+elan3_swapout (ELAN3_CTXT *ctxt, int reason) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int cansleep; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ PRINTF3 (ctxt, DBG_SWAP, "elan3_swapout: status %x state %s reason %x\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState], reason); -+ -+ if (reason == CTXT_NO_LWPS) -+ { -+ if (--ctxt->LwpCount != 0) /* Still other LWPs running */ -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); /* Wakeup anyone waiting on LwpCount */ -+ } -+ -+ ctxt->Status |= reason; -+ -+ while (ctxt->Status & CTXT_SWAPPING_OUT) /* wait for someone else to finish swapping */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); /* out */ -+ -+ if (ctxt->Status & CTXT_SWAPPED_OUT) -+ { -+ if (reason == CTXT_NO_LWPS) /* Wakeup other thread waiting on LWP exit */ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ /* -+ * mark the context as swapping out. -+ */ -+ ctxt->Status |= CTXT_SWAPPING_OUT; -+ -+ if (reason != CTXT_FIXUP_NETERR) -+ { -+ /* -+ * Stop all of the lwps. -+ */ -+ while (ctxt->LwpCount) -+ { -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); /* Wake up any lwps */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); /* then wait for them to enter elan3_swapout */ -+ } -+ } -+ -+ StartSwapoutContext (ctxt, 0, NULL); -+ for (;;) -+ { -+ PRINTF0 (ctxt, DBG_SWAP, "elan3_swapout: HandleExceptions\n"); -+ -+ cansleep = (HandleExceptions(ctxt, &flags) == ESUCCESS); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapout: OthersState=%d cansleep=%d\n", ctxt->OthersState, cansleep); -+ -+ if (ctxt->OthersState == CTXT_OTHERS_SWAPPED) -+ break; -+ -+ if (cansleep) -+ kcondvar_wait (&ctxt->Wait, &dev->IntrLock, &flags); -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "elan3_swapout: swapped out\n"); -+ -+ ASSERT (ELAN3_QUEUE_EMPTY (ctxt->DmaTrapQ)); -+ ASSERT (ELAN3_QUEUE_EMPTY (ctxt->ThreadTrapQ)); -+ -+ ctxt->Status |= CTXT_SWAPPED_OUT; -+ ctxt->Status &= ~CTXT_SWAPPING_OUT; -+ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapout: all done - status %x state %s\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+elan3_pagefault (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSave, int npages) -+{ -+ E3_Addr elanAddr = FaultSave->s.FaultAddress; -+ int writeable; -+ int res; -+ -+ PRINTF3 (ctxt, DBG_FAULT, "elan3_pagefault: elanAddr %08x FSR %08x : %s\n", elanAddr, FaultSave->s.FSR.Status, -+ FaultSave->s.FSR.s.ProtFault ? "protection fault" : "pte invalid"); -+ -+ /* Look at the FSR to determine the fault type etc */ -+ -+ if (FaultSave->s.FSR.Status == 0) /* this is a target abort/parity error, so look */ -+ { /* at the PCI config space registers to determine */ -+ ElanBusError (ctxt->Device); -+ return (EFAULT); -+ } -+ -+ if (FaultSave->s.FSR.s.AlignmentErr) /* Alignment errors are always fatal. */ -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: Alignment error\n"); -+ return (EFAULT); -+ } -+ -+ if (FaultSave->s.FSR.s.WalkBadData) /* Memory ECC error during a walk */ -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: Memory ECC error during walk\n"); -+ return (EFAULT); -+ } -+ -+ if (!FaultSave->s.FSR.s.ProtFault && /* DMA memory type changed */ -+ !FaultSave->s.FSR.s.Walking) -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: DMA memory type changed\n"); -+ return (EFAULT); -+ } -+ -+ ASSERT (FaultSave->s.FSR.s.ProtFault ? /* protection errors, should always have a valid pte */ -+ (!FaultSave->s.FSR.s.Walking || !(FaultSave->s.FSR.s.Level==3) || FaultSave->s.FSR.s.FaultPte == ELAN3_ET_PTE) : -+ FaultSave->s.FSR.s.FaultPte == ELAN3_ET_INVALID); /* otherwise it must be an invalid pte */ -+ -+ /* -+ * Determine whether to fault for a 'write' from the access permissions we need, and not -+ * from the access type (WrAcc). -+ */ -+ writeable = (FaultSave->s.FSR.s.AccTypePerm & (1 << FSR_WritePermBit)); -+ -+ /* Check that we have the right permissions for this access type. */ -+ if ((res = elan3mmu_checkperm (ctxt->Elan3mmu, (elanAddr&PAGEMASK), npages*PAGESIZE, FaultSave->s.FSR.s.AccTypePerm)) != 0) -+ { -+ PRINTF1 (ctxt, DBG_FAULT, "elan3_pagefault: %s\n", (res == ENOMEM) ? "no protection mapping" : "protection error"); -+ -+ return (res); -+ } -+ -+ res = LoadElanTranslation (ctxt, (elanAddr&PAGEMASK), npages*PAGESIZE, FaultSave->s.FSR.s.ProtFault, writeable); -+ -+ if (res == ESUCCESS) -+ { -+ BumpStat (ctxt->Device, PageFaults); -+ BumpUserStat (ctxt, PageFaults); -+ } -+ -+ PRINTF1 (ctxt, DBG_FAULT, "elan3_pagefault: -> %d\n", res); -+ -+ return (res); -+} -+ -+void -+elan3_block_inputter (ELAN3_CTXT *ctxt, int block) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (block) -+ ctxt->Status |= CTXT_USER_FILTERING; -+ else -+ ctxt->Status &= ~CTXT_USER_FILTERING; -+ -+ if (ctxt->Capability.cap_mycontext != ELAN_CAP_UNINITIALISED) -+ SetInputterStateForContext (ctxt, 0, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+FixupNetworkErrors (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ NETERR_FIXUP *nef; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ctxt->NetworkErrorFixups == NULL) -+ return (ESUCCESS); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ kmutex_lock (&ctxt->NetworkErrorLock); /* single thread while fixing up errors */ -+ elan3_swapout (ctxt, CTXT_FIXUP_NETERR); -+ -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ while ((nef = ctxt->NetworkErrorFixups) != NULL) -+ { -+ ctxt->NetworkErrorFixups = nef->Next; -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ if (ELAN3_OP_FIXUP_NETWORK_ERROR (ctxt, nef) == OP_FAILED) -+ CompleteNetworkErrorFixup (ctxt, nef, EINVAL); -+ -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ elan3_swapin (ctxt, CTXT_FIXUP_NETERR); -+ -+ kmutex_unlock (&ctxt->NetworkErrorLock); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+} -+ -+int -+CompleteNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER *rvp) -+{ -+ int state; -+ -+ switch (rvp->Status) -+ { -+ case ESUCCESS: -+ /* -+ * the item still existed at the source - if it's a wait for EOP transaction -+ * then the source will retry - otherwise the remote event will have been -+ * cleared and we should execute it -+ */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: ESUCCESS zero WaitForEopTransaction %p\n", trap->WaitForEopTransaction); -+ -+ state = trap->WaitForEopTransaction ? CTXT_STATE_OK : CTXT_STATE_NEEDS_RESTART; -+ -+ break; -+ -+ case ESRCH: -+ /* -+ * the item was not found at the source - we should always execute the transaction -+ * since it will never be resent -+ */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: ESRCH execute WaitForEopTransaction %p\n", trap->WaitForEopTransaction); -+ state = CTXT_STATE_NEEDS_RESTART; -+ break; -+ -+ default: /* other errors */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: %d\n", rvp->Status); -+ if (ElanException (ctxt, EXCEPTION_NETWORK_ERROR, INPUT_PROC, trap, &rvp) == OP_HANDLED) -+ state = CTXT_STATE_NEEDS_RESTART; -+ else -+ state = CTXT_STATE_OK; -+ break; -+ } -+ -+ FreeNetworkErrorResolver (rvp); -+ -+ return (state); -+} -+ -+int -+HandleExceptions (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ THREAD_TRAP tproc; -+ DMA_TRAP dproc; -+ NETERR_RESOLVER *rvp; -+ int state; -+ -+ if (ctxt->Status & CTXT_COMMAND_OVERFLOW_ERROR) -+ { -+ ctxt->Status &= ~CTXT_COMMAND_OVERFLOW_ERROR; -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ElanException (ctxt, EXCEPTION_COMMAND_OVERFLOW, COMMAND_PROC, NULL); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (! ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ)) -+ { -+ /* XXXX: unmap translations to the command port */ -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveCProcTrap (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_TRAPPED) -+ { -+ ctxt->Input0Trap.State = CTXT_STATE_RESOLVING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveIProcTrap (ctxt, &ctxt->Input0Trap, &ctxt->Input0Resolver); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input1Trap.State == CTXT_STATE_TRAPPED) -+ { -+ ctxt->Input1Trap.State = CTXT_STATE_RESOLVING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveIProcTrap (ctxt, &ctxt->Input1Trap, &ctxt->Input1Resolver); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if ((rvp = ctxt->Input0Resolver) != NULL && rvp->Completed) -+ { -+ ASSERT (ctxt->Input0Trap.State == CTXT_STATE_NETWORK_ERROR); -+ -+ ctxt->Input0Resolver = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ state = CompleteNetworkErrorResolver (ctxt, &ctxt->Input0Trap, rvp); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ ctxt->Input0Trap.State = state; -+ return (EAGAIN); -+ } -+ -+ if ((rvp = ctxt->Input1Resolver) != NULL && rvp->Completed) -+ { -+ ASSERT (ctxt->Input1Trap.State == CTXT_STATE_NETWORK_ERROR); -+ -+ ctxt->Input1Resolver = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ state = CompleteNetworkErrorResolver (ctxt,&ctxt->Input1Trap, rvp); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ ctxt->Input1Trap.State = state; -+ return (EAGAIN); -+ } -+ -+ if (NextTProcTrap (ctxt, &tproc)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveTProcTrap (ctxt, &tproc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_THREAD_QUEUE_FULL; -+ -+ if (NextDProcTrap (ctxt, &dproc)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveDProcTrap (ctxt, &dproc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_DMA_QUEUE_FULL; -+ -+ /* Handle all event interrupts. */ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->EventCookieQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->EventCookieQ)) -+ { -+ E3_uint32 cookie = *ELAN3_QUEUE_FRONT (ctxt->EventCookieQ, ctxt->EventCookies); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->EventCookieQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ if (ELAN3_OP_EVENT (ctxt, cookie, OP_LWP) != OP_DEFER) -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ else -+ { -+ spin_lock_irqsave (&dev->IntrLock, *flags); /* place the cookie back on the queue. */ -+ /* note we place it on the front to ensure */ -+ ELAN3_QUEUE_ADD_FRONT (ctxt->EventCookieQ); /* event ordering. */ -+ *ELAN3_QUEUE_FRONT (ctxt->EventCookieQ, ctxt->EventCookies) = cookie; -+ } -+ } -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_EVENT_QUEUE_FULL; -+ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->SwapDmaQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->SwapDmaQ)) -+ { -+ E3_DMA_BE DmaDesc = *ELAN3_QUEUE_FRONT (ctxt->SwapDmaQ, ctxt->SwapDmas); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->SwapDmaQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartDmaDesc (ctxt, &DmaDesc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ return (EAGAIN); -+ } -+ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->SwapThreadQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->SwapThreadQ)) -+ { -+ E3_Addr StackPointer = *ELAN3_QUEUE_FRONT (ctxt->SwapThreadQ, ctxt->SwapThreads); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->SwapThreadQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ReissueStackPointer (ctxt, StackPointer); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ return (EAGAIN); -+ } -+ -+ switch (ctxt->OthersState) -+ { -+ case CTXT_OTHERS_SWAPPING: -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ else -+ ctxt->OthersState = CTXT_OTHERS_SWAPPED; -+ -+ PRINTF1 (ctxt, DBG_LWP, "HandleExceptions: OthersState : swapping -> %s\n", OthersStateStrings[ctxt->OthersState]); -+ -+ break; -+ -+ case CTXT_OTHERS_SWAPPING_MORE: -+ ctxt->OthersState = CTXT_OTHERS_HALTING_MORE; -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted | INT_TProcHalted, HaltSwapContext, ctxt); -+ -+ PRINTF1 (ctxt, DBG_LWP, "HandleExceptions: OthersState : swapping_more -> %s\n", OthersStateStrings[ctxt->OthersState]); -+ break; -+ } -+ return (ESUCCESS); -+} -+ -+int -+RestartContext (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ PRINTF1 (ctxt, DBG_LWP, "RestartContext: status %x\n", ctxt->Status); -+ -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ if (! ELAN3_QUEUE_FRONT_EMPTY (ctxt->CommandTrapQ) || ! ELAN3_QUEUE_EMPTY(ctxt->CommandQ)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartCProcTrap (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_NEEDS_RESTART) -+ { -+ ctxt->Input0Trap.State = CTXT_STATE_EXECUTING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ res = RestartIProcTrap (ctxt, &ctxt->Input0Trap); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ -+ if (res == ESUCCESS) -+ ctxt->Input0Trap.State = CTXT_STATE_OK; -+ else -+ ctxt->Input0Trap.State = CTXT_STATE_NEEDS_RESTART; -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input1Trap.State == CTXT_STATE_NEEDS_RESTART) -+ { -+ ctxt->Input1Trap.State = CTXT_STATE_EXECUTING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ res = RestartIProcTrap (ctxt, &ctxt->Input1Trap); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ -+ if (res == ESUCCESS) -+ ctxt->Input1Trap.State = CTXT_STATE_OK; -+ else -+ ctxt->Input1Trap.State = CTXT_STATE_NEEDS_RESTART; -+ return (EAGAIN); -+ } -+ -+ if (SetEventsNeedRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartSetEvents (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ SetInputterStateForContext (ctxt, 0, NULL); -+ -+ if (TProcNeedsRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ LoadCommandPortTranslation (ctxt); -+ RestartTProcItems (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (DProcNeedsRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartDProcItems (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) -+ { -+ PRINTF1 (ctxt, DBG_LWP, "RestartContext: setting Command Flag at %p to 0\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 0; -+ -+ if (ctxt->Status & CTXT_WAITING_COMMAND) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "RestartContext: waking up threads waiting for commandport\n"); -+ -+ ctxt->Status &= ~CTXT_WAITING_COMMAND; -+ -+ kcondvar_wakeupall (&ctxt->CommandPortWait, &dev->IntrLock); -+ } -+ } -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+HaltSwapContext (ELAN3_DEV *dev, void *arg) -+{ -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) arg; -+ int SysCntx = (ctxt->Capability.cap_mycontext & SYS_CONTEXT_BIT); -+ E3_ThreadQueue_BE thread; -+ E3_DMA_BE dma; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ u_int *runCount; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (ctxt->OthersState == CTXT_OTHERS_HALTING || ctxt->OthersState == CTXT_OTHERS_HALTING_MORE); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "HaltSwapContext: status %x state %s\n", ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING_MORE) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if (--(*runCount) == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: no more reason to swap -> others_running\n"); -+ -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ /* -+ * Capture all other processors since we're not being responsive to -+ * the command processor interrupt. -+ */ -+ CAPTURE_CPUS(); -+ -+ if (SysCntx) -+ { -+ FPtr = read_reg32 (dev, TProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, TProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[E3_SysCntxQueueSize-1]); -+ } -+ else -+ { -+ FPtr = read_reg32 (dev, TProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, TProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[E3_NonSysCntxQueueSize-1]); -+ } -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, (void *) &thread, sizeof (E3_ThreadQueue_BE)); -+ -+ if (thread.s.Context == ctxt->Capability.cap_mycontext) -+ { -+ if (ELAN3_QUEUE_FULL (ctxt->SwapThreadQ)) -+ break; -+ -+ *ELAN3_QUEUE_BACK(ctxt->SwapThreadQ, ctxt->SwapThreads) = thread.s.Thread; -+ ELAN3_QUEUE_ADD (ctxt->SwapThreadQ); -+ -+ /* -+ * Remove this entry from the queue by replacing it with -+ * the "magic" thread value. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ thread.s.Context = SysCntx ? SYS_CONTEXT_BIT : 0; -+ thread.s.Thread = VanishingStackPointer; -+ -+ elan3_sdram_copyq_to_sdram (dev, (void *) &thread, FPtr, sizeof (E3_ThreadQueue_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_ThreadQueue); -+ } -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ if (SysCntx) -+ { -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ } -+ else -+ { -+ FPtr = read_reg32 (dev, DProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[E3_NonSysCntxQueueSize-1]); -+ } -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ if (dma.s.dma_u.s.Context == ctxt->Capability.cap_mycontext) -+ { -+ if (ELAN3_QUEUE_FULL (ctxt->SwapDmaQ)) -+ break; -+ -+ *ELAN3_QUEUE_BACK (ctxt->SwapDmaQ, ctxt->SwapDmas) = dma; -+ ELAN3_QUEUE_ADD (ctxt->SwapDmaQ); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = ((SysCntx ? SYS_CONTEXT_BIT : 0) << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = (E3_Addr) 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = (E3_Addr) 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ /* -+ * Release the other processors now before signalling the LWP. -+ */ -+ RELEASE_CPUS(); -+ -+ if (! ELAN3_QUEUE_FULL (ctxt->SwapDmaQ) && !ELAN3_QUEUE_FULL (ctxt->SwapThreadQ)) -+ { -+ /* -+ * We've compleletly emptied the elan queues of items in this -+ * context, so we now mark it as fully swapped out. -+ */ -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING_MORE) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if (--(*runCount) == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: queues emptied -> others_swapping\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING; -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ } -+ else -+ { -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if ((*runCount)++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: queues not emptied -> others_swapping_more\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING_MORE; -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+UnloadCommandPageMapping (ELAN3_CTXT *ctxt) -+{ -+ /* -+ * Unload the Elan translations, and flag the main processor to stall after -+ * issueing its next command. -+ */ -+ if (ctxt->CommandPageMapping != NULL && (ctxt->Status & CTXT_COMMAND_MAPPED_ELAN)) -+ { -+ ELAN3MMU_RGN *rgn = elan3mmu_rgnat_main (ctxt->Elan3mmu, ctxt->CommandPageMapping); -+ -+ if (rgn != NULL) -+ { -+ E3_Addr eaddr = rgn->rgn_ebase + (ctxt->CommandPageMapping - rgn->rgn_mbase); -+ -+ PRINTF1 (ctxt, DBG_INTR, "UnloadCommandPageMapping: unmapping command port at addr %08x\n", eaddr); -+ -+ elan3mmu_unload (ctxt->Elan3mmu, eaddr, PAGESIZE, PTE_UNLOAD); -+ } -+ -+ ctxt->Status &= ~CTXT_COMMAND_MAPPED_ELAN; -+ } -+} -+ -+void -+StartSwapoutContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int SysCntx = (ctxt->Capability.cap_mycontext & SYS_CONTEXT_BIT); -+ u_int *runCount; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "StartSwapoutContext: Status %x OthersState %s\n", -+ ctxt->Status, OthersStateStrings [ctxt->OthersState]); -+ /* -+ * Disable the inputters, we should already have a reason for it. -+ */ -+ SetInputterStateForContext (ctxt, Pend, Maskp); -+ -+ UnloadCommandPageMapping (ctxt); -+ -+ /* -+ * Flag main processor to stall after issueing next command -+ */ -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: setting Command Flag at %p to 1\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 1; -+ -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: OthersState=%d\n", ctxt->OthersState); -+ -+ /* -+ * And queue a haltop to stop the queues and clear it out. -+ */ -+ switch (ctxt->OthersState) -+ { -+ case CTXT_OTHERS_RUNNING: -+ PRINTF0 (ctxt, DBG_SWAP, "StartSwapoutContext: -> others_halting\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_HALTING; -+ -+ QueueHaltOperation (dev, Pend, Maskp, INT_DProcHalted | INT_TProcHalted, HaltSwapContext, ctxt); -+ break; -+ -+ case CTXT_OTHERS_SWAPPING: -+ PRINTF0 (ctxt, DBG_SWAP, "StartSwapoutContext: -> others_swapping_more\n"); -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING_MORE; -+ -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if ((*runCount)++ == 0) -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ break; -+ default: -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: OthersState=%d\n", ctxt->OthersState); -+ break; -+ } -+} -+ -+#if defined(DIGITAL_UNIX) -+/* temporary tweaks to priority bump */ -+int lwp_do_prio = 1; -+int lwp_do_nxm = 1; -+int lwp_prio = BASEPRI_USER-1; -+#elif defined(LINUX) -+/* This is the default nice level for the helper LWP */ -+int LwpNice = -1; -+#endif -+ -+int -+elan3_lwp (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_LWP, "elan3_lwp: started, context 0x%x\n", ctxt->Capability.cap_mycontext); -+ -+#if defined(DIGITAL_UNIX) -+ { -+ thread_t mythread = current_thread(); -+ if (lwp_do_prio && (lwp_do_nxm || !IS_NXM_TASK(mythread->task))) -+ { -+ mythread->priority = mythread->sched_pri = lwp_prio; -+ mythread->max_priority = BASEPRI_HIGHEST; -+ (void) thread_priority(mythread, lwp_prio, 0, 1); -+ } -+ } -+#elif defined(LINUX) -+ { -+ /* Do the priority trick for the helper LWP so that it -+ * runs in preferance to the user threads which may be -+ * burning CPU waiting for a trap to be fixed up -+ */ -+#ifdef NO_O1_SCHED -+ if (LwpNice >= -20 && LwpNice < 20) -+ current->nice = LwpNice; -+#else -+ set_user_nice(current, LwpNice); -+#endif -+ } -+#endif -+ -+ elan3_swapin (ctxt, CTXT_NO_LWPS); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ /* If we're swapped out, and not detached (or exiting) then wait until we're swapped back in */ -+ /* since otherwise we could "spin" forever continually calling elan3_lwp() */ -+ if ((ctxt->Status & CTXT_SWAPPED_REASONS) && ! (ctxt->Status & (CTXT_DETACHED|CTXT_EXITING))) -+ kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags); -+ -+ for (;;) -+ { -+#if defined(DIGITAL_UNIX) -+ if (thread_should_halt(current_thread()) || -+ CURSIG_CHECK(task_to_proc(current_thread()->task), u.np_uthread)) -+ { -+ PRINTF1 (ctxt, DBG_LWP, "elan3_lwp: exiting on %s\n", -+ thread_should_halt(current_thread()) ? "halt" : "signal"); -+ break; -+ } -+#endif -+ -+ if (ctxt->Status & CTXT_SWAPPED_REASONS) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting on swapped reasons\n"); -+ break; -+ } -+ -+ if (! (ctxt->inhibit)) -+ { -+ if (FixupNetworkErrors (ctxt, &flags) == ESUCCESS && -+ HandleExceptions (ctxt, &flags) == ESUCCESS && -+ RestartContext (ctxt, &flags) == ESUCCESS) -+ { -+ if (kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags) == 0) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting by kcondvar_wait_sig()\n"); -+ break; -+ } -+ } -+ } -+ else -+ { -+ printk("elan3_lwp :: skipping as inhibited\n"); -+ if (kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags) == 0) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting by kcondvar_wait_sig()\n"); -+ break; -+ } -+ } -+ -+ } -+ -+ /* Return EINVAL to elan3_syscall_lwp() when we want it to exit */ -+ res = (ctxt->Status & (CTXT_DETACHED|CTXT_EXITING)) ? EINVAL : 0; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ elan3_swapout (ctxt, CTXT_NO_LWPS); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ FixupNetworkErrors (ctxt, &flags); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+SetInputterStateForContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ ELAN3_DEV *dev = NULL; -+ int new_disabled = 0; -+ int ctxnum; -+ -+ ASSERT (ctxt != NULL); -+ dev = ctxt->Device; -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ new_disabled = (ctxt->Input0Trap.State != CTXT_STATE_OK || -+ ctxt->Input1Trap.State != CTXT_STATE_OK || -+ (ctxt->Status & CTXT_INPUTTER_REASONS) != 0); -+ -+ -+ ctxnum = ctxt->Capability.cap_mycontext; -+ -+#ifndef __lock_lint -+ PRINTF2 (ctxt , DBG_IPROC, "SetInputterState: ctxnum %x %s attached\n", ctxnum, ctxt->Disabled ? "disabled " : ""); -+#endif /* __lock_lint */ -+ -+ if (ctxt->Disabled != new_disabled) -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "SetInputterState: ctxnum %x change %s\n", ctxnum, new_disabled ? "enabled to disabled" : "disabled to enabled"); -+ -+ ctxt->Disabled = new_disabled; -+ -+ /* synchronize the context filter for this context */ -+ elan3mmu_set_context_filter (dev, ctxnum, new_disabled, Pend, Maskp); -+ } -+} -+ -+int -+CheckCommandQueueFlushed (ELAN3_CTXT *ctxt, E3_uint32 cflags, int how, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int delay = 1; -+ int i, SeenComQueueEmpty; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ ASSERT (cflags != DmaComQueueNotEmpty || dev->HaltDmaDequeueCount != 0); -+ -+ /* -+ * Flush the command processor queues and poll the queue to see it it empties. -+ */ -+ if (dev->FlushCommandCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ /* -+ * Ensure previous writes have been flushed through the write buffers -+ */ -+ wmb(); mmiob(); -+ -+ /* -+ * If the command processor traps, or it's taking too long to observe -+ * the queue as emtpy, then we need to force the interrupt handler to -+ * run for us. So queue a halt operation for the dma processor. -+ */ -+ SeenComQueueEmpty = !(read_reg32 (dev, ComQueueStatus) & cflags); -+ for (i = 20; i > 0 || (how & ISSUE_COMMAND_CANT_WAIT); i--) -+ { -+ if (SeenComQueueEmpty || (read_reg32 (dev, Exts.InterruptReg) & (INT_CProc | INT_ComQueue))) -+ break; -+ -+ mb(); -+ DELAY (delay); -+ -+ if ((delay <<= 1) == 0) delay = 1; -+ -+ SeenComQueueEmpty = !(read_reg32 (dev, ComQueueStatus) & cflags); -+ } -+ -+ if (--dev->FlushCommandCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ /* -+ * If we've seen the command queue that we're interested in with nothing in it -+ * and the command processor has not trapped then the commands we've -+ * issued have been successfully processed. -+ */ -+ if (SeenComQueueEmpty && ! (read_reg32 (dev, Exts.InterruptReg) & (INT_CProc | INT_ComQueue))) -+ { -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: observed dma queue empty and command proc not trapped\n"); -+ -+ if (cflags == DmaComQueueNotEmpty && --dev->HaltDmaDequeueCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ return (ISSUE_COMMAND_OK); -+ } -+ -+ if ((how & ISSUE_COMMAND_CANT_WAIT) != 0) -+ return (ISSUE_COMMAND_WAIT); -+ -+ /* -+ * Halt the dma processor and wait for it to halt, if the command we've issued has -+ * trapped then the interrupt handler will have moved it to the context structure. -+ */ -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: waiting for dproc to halt\n"); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted, WakeupLwp, ctxt); -+ while (! ctxt->Halted) -+ { -+ PRINTF1 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: waiting for Halted - %d\n", ctxt->Halted); -+ -+ kcondvar_wait (&ctxt->HaltWait, &dev->IntrLock, flags); -+ -+ PRINTF1 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: woken for Halted - %d\n", ctxt->Halted); -+ } -+ ctxt->Halted = 0; -+ -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: dproc halted, checking for trap\n"); -+ -+ if (cflags == DmaComQueueNotEmpty && --dev->HaltDmaDequeueCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ return (ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ) ? ISSUE_COMMAND_OK : ISSUE_COMMAND_TRAPPED); -+} -+ -+int -+WaitForCommandPort (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (ctxt->Status & CTXT_DETACHED) -+ res = EINVAL; -+ else -+ { -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ ctxt->Status |= CTXT_WAITING_COMMAND; -+ if (CTXT_IS_KERNEL(ctxt)) -+ kcondvar_wait (&ctxt->CommandPortWait, &dev->IntrLock, &flags); -+ else -+ kcondvar_waitsig (&ctxt->CommandPortWait, &dev->IntrLock, &flags); -+ } -+ -+ res = (!ELAN3_QUEUE_EMPTY(ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) ? EAGAIN : 0; -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+static char * -+CommandName (int offset) -+{ -+ switch (offset) -+ { -+ case offsetof (E3_CommandPort, PutDma): return ("PutDma"); -+ case offsetof (E3_CommandPort, GetDma): return ("GetDma"); -+ case offsetof (E3_CommandPort, RunThread): return ("RunThread"); -+ case offsetof (E3_CommandPort, WaitEvent0): return ("WaitEvent0"); -+ case offsetof (E3_CommandPort, WaitEvent1): return ("WaitEvent1"); -+ case offsetof (E3_CommandPort, SetEvent): return ("SetEvent"); -+ default: return ("Bad Command"); -+ } -+} -+ -+int -+IssueCommand (ELAN3_CTXT *ctxt, unsigned cmdoff, E3_Addr value, int cflags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((! (cflags & ISSUE_COMMAND_FOR_CPROC) && !ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ /* -+ * Cannot issue commands for non-cproc traps if command port is trapped, -+ * nor if the dma/thread trap queues are full, or we're swapping out -+ */ -+ PRINTF2 (ctxt, DBG_CMD, "IssueCommand: %s %08x -> ISSUE_COMMAND_RETRY\n", -+ CommandName (cmdoff), value); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueCommand: %s %08x -> ISSUE_COMMAND_OK\n", -+ CommandName (cmdoff), value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, ctxt->CommandPort + cmdoff); /* issue command */ -+ mmiob(); /* and flush through IO writes */ -+ -+ res = ISSUE_COMMAND_OK; -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+int -+IssueDmaCommand (ELAN3_CTXT *ctxt, E3_Addr value, void *item, int how) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ /* -+ * Since we may be issuing a command that could trap, and we're interested in -+ * the outcome, the command port trap resolving code must be locked out. -+ */ -+ kmutex_lock (&ctxt->CmdLock); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((! (how & ISSUE_COMMAND_FOR_CPROC) && !ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p] -> ISSUE_COMMAND_RETRY\n", value, item); -+ -+ /* -+ * Cannot issue commands for non-cproc traps if command port is trapped, -+ * nor if the dma/thread trap queues are full, or we're swapping out -+ */ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ -+ ASSERT (item == NULL || ctxt->CommandPortItem == NULL); -+ -+ /* -+ * Stop the DMA processor from removing entries from the -+ * command port, and force the command processor to do this. -+ * This means that if a trap occurs then it will be the command -+ * processor that traps. -+ */ -+ if (dev->HaltDmaDequeueCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p]\n", value, item); -+ -+ /* -+ * Always issue the DMA to the 'write' command, since we've asserted HaltDmaDequeue -+ * the command processor will read the descriptor and transfer it to the run queue. -+ * The command processor looks at the dma_direction field to determine whether it is -+ * a read or a write and whether to alter the dma_souce of the descriptr on the run -+ * queue -+ */ -+ mb(); /* ensure writes to main memory ccompleted */ -+ writel (value, ctxt->CommandPort + offsetof (E3_CommandPort, PutDma)); -+ mmiob(); /* and flush through IO writes */ -+ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, how, &flags); -+ -+ if (res == ISSUE_COMMAND_TRAPPED) -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p] -> ISSUE_COMMAND_TRAPPED\n", value, item); -+ /* -+ * Remember the item we're issueing so that if the command port traps the item will not -+ * get freed off until the descriptor has been read after the command trap has been fixed -+ * up. -+ */ -+ if (item != NULL) -+ ctxt->CommandPortItem = item; -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+ -+ return (res); -+} -+ -+int -+WaitForDmaCommand (ELAN3_CTXT *ctxt, void *item, int how) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, how, &flags); -+ -+ if (res == ISSUE_COMMAND_TRAPPED && item != NULL) -+ ctxt->CommandPortItem = item; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+FixupEventTrap (ELAN3_CTXT *ctxt, int proc, void *trap, E3_uint32 TrapType, E3_FaultSave_BE *FaultSaveArea, int flags) -+{ -+ ASSERT (! CTXT_IS_KERNEL (ctxt)); -+ -+ /* -+ * This code re-issues the part of the set event that trapped. -+ */ -+ switch (TrapType) -+ { -+ case MI_ChainedEventError: -+ ElanException (ctxt, EXCEPTION_CHAINED_EVENT, proc, trap, FaultSaveArea->s.EventAddress); -+ break; -+ -+ -+ case MI_SetEventReadWait: -+ /* -+ * Fault occured on the read for the event location. Just re-issue -+ * setevent using EventAddress in E3_FaultSave -+ */ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_SetEventReadWait: re-issuing setevent %08x\n", -+ FaultSaveArea->s.EventAddress); -+ -+ ReissueEvent (ctxt, (E3_Addr) FaultSaveArea->s.EventAddress, flags); -+ break; -+ -+ case MI_DoSetEvent: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * perform the set. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check that the event has the block copy bit -+ * set in it, since we couldn't trap here if it -+ * didn't -+ */ -+ if ((EventType & EV_TYPE_BCOPY) != EV_TYPE_BCOPY) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_DoSetEvent: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_DoSetEvent: RunEventType %x\n", EventType); -+ -+ if (RunEventType (ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ -+ break; -+ } -+ -+ case MI_ThreadUpdateNonSysCntxBack: -+ case MI_ThreadUpdateSysCntxBack: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the thread. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_THREAD|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_THREAD)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_ThreadUpdateCntx0Back: Unexpected type=%x for setevent trap. Should be thread\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_ThreadUpdateCntx0Back: RunEventType %x\n", EventType); -+ if (RunEventType (ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_EventIntUpdateBPtr: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the dma. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_EVIRQ)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_EventIntUpdateBPtr: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_EventIntUpdateBPtr: RunEventType %x\n", EventType); -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_RunDmaDesc: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the dma. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_DMA|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_DMA)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_RunDmaDesc: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_RunDmaDesc: RunEventType %x\n", EventType); -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_WaitForCntxDmaDescRead: -+ case MI_WaitForNonCntxDmaDescRead: -+ /* -+ * Fault occured on the read of the dma descriptor. Run dma using the -+ * Fault Address in FaultSave. -+ */ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_WaitForCntxDmaDescRead: re-issue dma at %08x\n", FaultSaveArea->s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, FaultSaveArea->s.FaultAddress); -+ break; -+ -+ case MI_FinishedSetEvent: -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Simulate the block copy. -+ */ -+ if (SimulateBlockCopy (ctxt, FaultSaveArea->s.EventAddress)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ -+ case MI_BlockCopyEvent: -+ case MI_BlockCopyWaitForReadData: -+ { -+ /* -+ * Fault occured on the read or write of the data for a block copy -+ * event. Simulate the block copy using EventAddress in E3_FaultSave. Must also sample -+ * the event type and then perform a run. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ PRINTF0 (ctxt, DBG_EVENT, "FixupEventTrap: MI_BlockCopyWaitForReadData: BCopy read fault in BCopy event. Simulating BCopy.\n"); -+ -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ /* XXXX: should handle queue overflow */ -+ PRINTF0 (ctxt, DBG_EVENT, "FixupEventTrap: Queue overflow\n"); -+ -+ ElanException (ctxt, EXCEPTION_QUEUE_OVERFLOW, proc, trap, FaultSaveArea, TrapType); -+ break; -+ -+ default: -+ ElanException (ctxt, EXCEPTION_BUS_ERROR, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+} -+ -+int -+SimulateBlockCopy (ELAN3_CTXT *ctxt, E3_Addr EventAddress) -+{ -+ E3_Addr SourcePtrElan; -+ E3_Addr DestPtrElan; -+ unsigned DataType; -+ int i; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ElanException (ctxt, EXCEPTION_FAULTED, EVENT_PROC, NULL, EventAddress); -+ return (TRUE); -+ } -+ -+ SourcePtrElan = ELAN3_OP_LOAD32 (ctxt, EventAddress + offsetof (E3_BlockCopyEvent, ev_Source)); -+ DestPtrElan = ELAN3_OP_LOAD32 (ctxt, EventAddress + offsetof (E3_BlockCopyEvent, ev_Dest)); -+ DataType = DestPtrElan & EV_BCOPY_DTYPE_MASK; -+ DestPtrElan &= ~EV_BCOPY_DTYPE_MASK; -+ -+ -+ PRINTF3 (ctxt, DBG_EVENT, "SimulateBlockCopy: Event %08x SourcePtr %08x DestPtr %08x\n", -+ EventAddress, SourcePtrElan, DestPtrElan); -+ -+ if (SourcePtrElan & EV_WCOPY) -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan, SourcePtrElan); -+ else -+ { -+ /* -+ * NOTE: since the block copy could be to sdram, we issue the writes backwards, -+ * except we MUST ensure that the last item in the block is written last. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+ /* -+ * For little endian cpu's we don't need to worry about the data type. -+ */ -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+#else -+ switch (DataType) -+ { -+ case EV_TYPE_BCOPY_BYTE: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint8)); i >= 0; i -= sizeof (E3_uint8)) -+ ELAN3_OP_STORE8 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD8 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint8); -+ ELAN3_OP_STORE8 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD8 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_HWORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint16)); i >= 0; i -= sizeof (E3_uint16)) -+ ELAN3_OP_STORE16 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD16 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint16); -+ ELAN3_OP_STORE16 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD16 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_WORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint32)); i >= 0; i -= sizeof (E3_uint32)) -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD32 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint32); -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD32 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_DWORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ break; -+ } -+#endif -+ } -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ return (FALSE); -+} -+ -+void -+ReissueEvent (ELAN3_CTXT *ctxt, E3_Addr addr, int flags) -+{ -+ PRINTF1 (ctxt, DBG_CMD, "ReissueEvent : Event=%08x\n", addr); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), addr, flags) == ISSUE_COMMAND_RETRY) -+ { -+ PRINTF1 (ctxt, DBG_CMD, "ReissueEvent: queue event %08x\n", addr); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ctxt->ItemCount[LIST_SETEVENT]++; -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_SETEVENT, addr); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ } -+} -+ -+int -+SetEventsNeedRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_SETEVENT] != 0); -+} -+ -+void -+RestartSetEvents (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_uint32 EventPointer; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ -+ while (ctxt->ItemCount[LIST_SETEVENT]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_SETEVENT, &item, &EventPointer)) -+ ctxt->ItemCount[LIST_SETEVENT] = 0; -+ else -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), EventPointer, FALSE) == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_SETEVENT, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_SETEVENT]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+int -+RunEventType(ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSaveArea, E3_uint32 EventType) -+{ -+ int failed = FALSE; -+ -+ if ((EventType & EV_TYPE_BCOPY) != 0) -+ failed = SimulateBlockCopy(ctxt, FaultSaveArea->s.EventAddress); -+ -+ if ((EventType & EV_TYPE_MASK) == EV_TYPE_THREAD) -+ ReissueStackPointer (ctxt, EventType & ~(EV_TYPE_MASK_THREAD|EV_TYPE_MASK_BCOPY)); -+ else if ((EventType & EV_TYPE_MASK) == EV_TYPE_DMA) -+ RestartDmaPtr (ctxt, EventType & ~(EV_TYPE_MASK_DMA|EV_TYPE_MASK_BCOPY)); -+ else if ((EventType & EV_TYPE_EVIRQ) != 0) -+ QueueEventInterrupt (ctxt, EventType & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)); -+ else /* Chained event */ -+ { -+ if ((EventType & ~EV_TYPE_BCOPY) != 0) /* not null setevent */ -+ ReissueEvent (ctxt, EventType & ~(EV_TYPE_MASK_CHAIN|EV_TYPE_MASK_BCOPY), FALSE); -+ } -+ -+ return (failed); -+} -+ -+void -+WakeupLwp (ELAN3_DEV *dev, void *arg) -+{ -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) arg; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_INTR, "WakeupLwp: %d\n", SPINLOCK_HELD (&dev->IntrLock)); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ctxt->Halted = 1; -+ kcondvar_wakeupone (&ctxt->HaltWait, &dev->IntrLock); -+ -+ PRINTF0 (ctxt, DBG_INTR, "WakeupLwp: woken up context\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+QueueEventInterrupt (ELAN3_CTXT *ctxt, E3_uint32 cookie) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_EVENT, "QueueEventInterrupt: cookie %08x\n", cookie); -+ -+ if (ELAN3_OP_EVENT (ctxt, cookie, OP_INTR) == OP_DEFER) -+ { -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->EventCookieQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, 0, NULL); -+ } -+ else -+ { -+ *(ELAN3_QUEUE_BACK (ctxt->EventCookieQ, ctxt->EventCookies)) = cookie; -+ -+ ELAN3_QUEUE_ADD (ctxt->EventCookieQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ if (ELAN3_QUEUE_FULL (ctxt->EventCookieQ)) -+ { -+ ctxt->Status |= CTXT_EVENT_QUEUE_FULL; -+ StartSwapoutContext (ctxt, 0, NULL); -+ } -+ } -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ } -+} -+ -+int -+ElanException (ELAN3_CTXT *ctxt, int type, int proc, void *trap, ...) -+{ -+ int res; -+ va_list ap; -+ -+ va_start (ap, trap); -+ -+ PRINTF2 (ctxt, DBG_FN, "ElanException: proc %d type %d\n", proc, type); -+ -+ res = ELAN3_OP_EXCEPTION (ctxt, type, proc, trap, ap); -+ -+ va_end (ap); -+ -+ return (res); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/context_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/context_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/context_linux.c 2005-06-01 23:12:54.566444120 -0400 -@@ -0,0 +1,228 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: context_linux.c,v 1.28.2.2 2004/10/28 11:54:56 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/context_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int -+LoadElanTranslation (ELAN3_CTXT *ctxt, E3_Addr addr, int len, int protFault, int writeable) -+{ -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3MMU_RGN *rgn; -+ caddr_t mainAddr; -+ int perm; -+ unsigned int off; -+ unsigned long flags; -+ -+ ASSERT (PAGE_ALIGNED (addr) && PAGE_ALIGNED (len)); -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: addr %08x len %08x%s%s\n", -+ addr, len, protFault ? " prot fault" : "", writeable ? " writeable" : ""); -+ -+ /* Ensure there's enough elan mmu tables for us to use */ -+ elan3mmu_expand (elan3mmu, addr, len, PTBL_LEVEL_3, 0); -+ -+ while (len > 0) -+ { -+ /* -+ * Retrieve permission region and calculate main address -+ */ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ rgn = elan3mmu_rgnat_elan (elan3mmu, addr); -+ if (rgn == NULL) { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: no permission region at %lx %p\n", -+ (u_long) addr, rgn); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (EFAULT); -+ } -+ mainAddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ -+ ASSERT (PAGE_ALIGNED ((unsigned long)mainAddr)); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ /* -+ * If we're tying to load a translation to the elan command port, -+ * then don't do it now, but mark the context to have it reloaded -+ * just before we restart any threads. We do this because we don't -+ * want to call into the segment driver since we could then block -+ * waiting for the command port to become available. -+ */ -+ if (mainAddr == ctxt->CommandPageMapping) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: addr=%08x maps command port\n", addr); -+ -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ UnloadCommandPageMapping (ctxt); -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ } -+ else -+ { -+ struct vm_area_struct *area; -+ struct mm_struct *mm = current->mm; -+ pte_t *ptep_ptr; -+ pte_t ptep_value; -+ -+ down_read (¤t->mm->mmap_sem); -+ -+ if ((area = find_vma_intersection(mm, (unsigned long)mainAddr, (unsigned long)mainAddr + PAGESIZE)) == NULL) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p no vma\n", mainAddr); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ if (writeable && !(area->vm_flags & VM_WRITE)) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p not writeable\n", mainAddr); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ spin_lock (&mm->page_table_lock); -+ -+ /* dont deference the pointer after the unmap */ -+ ptep_ptr = find_pte_map (mm, (unsigned long)mainAddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p %s %s\n", -+ mainAddr, writeable ? "writeable" : "readonly", -+ !ptep_ptr ? "invalid" : pte_none(ptep_value) ? "none " : !pte_present(ptep_value) ? "swapped " : -+ writeable && !pte_write(ptep_value) ? "COW" : "OK"); -+ -+ if (!ptep_ptr || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value))) -+ { -+ spin_unlock (&mm->page_table_lock); -+ -+ make_pages_present((unsigned long)mainAddr, (unsigned long)mainAddr + PAGE_SIZE); -+ -+ spin_lock (&mm->page_table_lock); -+ -+ /* dont deference the pointer after the unmap */ -+ ptep_ptr = find_pte_map (mm, (unsigned long)mainAddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ if (!ptep_ptr || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value))) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ } -+ -+ /* don't allow user write access to kernel pages if not kernel */ -+ if (!pte_read(ptep_value)) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ if (writeable) -+ pte_mkdirty(ptep_value); -+ pte_mkyoung (ptep_value); -+ -+ /* now load the elan pte */ -+ if (writeable) -+ perm = rgn->rgn_perm; -+ else -+ perm = ELAN3_PERM_READONLY(rgn->rgn_perm & ELAN3_PTE_PERM_MASK) | (rgn->rgn_perm & ~ELAN3_PTE_PERM_MASK); -+ -+ for (off = 0; off < PAGE_SIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, addr + off, pte_phys(ptep_value) + off, perm, PTE_LOAD | PTE_NO_SLEEP); -+ -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ } -+ -+ len -= PAGESIZE; -+ addr += PAGESIZE; -+ } -+ return (ESUCCESS); -+} -+ -+ -+/* -+ * LoadCommandPortTranslation: -+ * explicitly load an elan translation to the command port. -+ * but only do it if the command port is accessible. -+ * -+ * we call this function just after we have restarted -+ * and trapped commands, since when a command traps -+ * the elan translation to the command port is unloaded. -+ */ -+void -+LoadCommandPortTranslation (ELAN3_CTXT *ctxt) -+{ -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3MMU_RGN *rgn; -+ E3_Addr addr; -+ int perm; -+ physaddr_t phys; -+ unsigned int off; -+ unsigned long flags; -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadCommandPortTranslation: SegAddr=%p Status=%x\n", ctxt->CommandPageMapping, ctxt->Status); -+ -+ if (ctxt->CommandPageMapping != NULL && !(ctxt->Status & CTXT_COMMAND_MAPPED_ELAN)) -+ { -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ rgn = elan3mmu_rgnat_main (elan3mmu, ctxt->CommandPageMapping); -+ if (rgn == (ELAN3MMU_RGN *) NULL) -+ { -+ PRINTF(ctxt, DBG_FAULT, "LoadCommandPortTranslation: no permission for command port\n"); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return; -+ } -+ -+ addr = rgn->rgn_ebase + (ctxt->CommandPageMapping - rgn->rgn_mbase); -+ perm = rgn->rgn_perm; -+ phys = kmem_to_phys((caddr_t) ctxt->CommandPage); -+ -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ if (ELAN3_QUEUE_EMPTY(ctxt->CommandTrapQ) && !(ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF(ctxt, DBG_FAULT, "LoadCommandPortTranslation: load xlation addr=%08x phys=%llx perm=%d\n", -+ addr, (unsigned long long)phys, perm); -+ -+ ctxt->Status |= CTXT_COMMAND_MAPPED_ELAN; -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, addr + off, phys + off, perm, PTE_LOAD | PTE_NO_SLEEP); -+ } -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/cproc.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/cproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/cproc.c 2005-06-01 23:12:54.567443968 -0400 -@@ -0,0 +1,539 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cproc.c,v 1.46 2004/02/10 15:05:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/cproc.c,v $ */ -+ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+void -+HandleCProcTrap (ELAN3_DEV *dev, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ E3_FaultSave_BE FaultSave; -+ CProcTrapBuf_BE TrapBuf; -+ COMMAND_TRAP *trap; -+ ELAN3_CTXT *ctxt; -+ sdramaddr_t CurrTrap; -+ sdramaddr_t LastTrapAddr; -+ int NTrapEntries; -+ int NewPend; -+ unsigned long flags; -+ -+ /* -+ * Temporarily mask out the command processor interrupt, since -+ * we may cause it be re-asserted when we re-issue the commands -+ * from the overflow queue area. -+ */ -+ DISABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ NewPend = read_reg32 (dev, Exts.InterruptReg); -+ -+ do { -+ if (NewPend & INT_ComQueue) -+ { -+ if ((read_reg32 (dev, ComQueueStatus) & ComQueueError) != 0) -+ { -+ printk ("elan%d: InterruptReg=%x ComQueueStatus=%x\n", dev->Instance, -+ read_reg32 (dev, Exts.InterruptReg), read_reg32 (dev, ComQueueStatus)); -+ panic ("elan: command queue has overflowed !!"); -+ /* NOTREACHED */ -+ } -+ -+ BumpStat (dev, ComQueueHalfFull); -+ -+ /* -+ * Capture the other cpus and stop the threads processor then -+ * allow the command processor to eagerly flush the command queue. -+ */ -+ dev->FlushCommandCount++; dev->HaltThreadCount++; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ CAPTURE_CPUS(); -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ mb(); -+ -+ /* -+ * Let the threads processor run again, and release the cross call. -+ */ -+ RELEASE_CPUS(); -+ -+ dev->FlushCommandCount--; dev->HaltThreadCount--; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ /* -+ * Re-sample the interrupt register to see if the command processor -+ * has trapped while flushing the queue. Preserve the INT_ComQueue -+ * bit, so we can clear the ComQueueStatus register later. -+ */ -+ NewPend = (read_reg32 (dev, Exts.InterruptReg) | INT_ComQueue); -+ } -+ -+ CurrTrap = dev->CommandPortTraps[dev->CurrentCommandPortTrap]; -+ -+ if (NewPend & INT_CProc) -+ { -+ BumpStat (dev, CProcTraps); -+ -+ /* -+ * Copy the MMU Fault Save area and zero it out for future traps. -+ */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), &FaultSave, sizeof (E3_FaultSave)); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), sizeof (E3_FaultSave)); -+ -+ /* -+ * First entry in the cproc trap save area is the value of Areg and Breg for the -+ * uWord before the address fault. -+ */ -+ TrapBuf.Align64 = elan3_sdram_readq (dev, CurrTrap); CurrTrap += sizeof (TrapBuf.Align64); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, (TrapBuf.r.Breg >> 16)); -+ if (ctxt == NULL) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: context invalid [%08x.%08x]\n", TrapBuf.r.Areg, TrapBuf.r.Breg); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandTrapQ)) -+ { -+ if ((ctxt->Status & CTXT_COMMAND_OVERFLOW_ERROR) == 0) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, Maskp); -+ } -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->CommandTrapQ, ctxt->CommandTraps); -+ -+ trap->FaultSave = FaultSave; -+ trap->Status.Status = read_reg32 (dev, Exts.CProcStatus.Status); -+ trap->TrapBuf = TrapBuf; -+ -+ /* -+ * The command processor does not stop after it has trapped. It will continue -+ * to save commands for other contexts into the commands port save area. -+ * The valid context for the trap is held in FaultSave. As some of this -+ * trap code uses the context in the status register the local copy must be -+ * updated with the trap context. -+ */ -+ trap->Status.s.Context = (TrapBuf.r.Breg >> 16); -+ -+ PRINTF4 (ctxt, DBG_INTR, "HandleCProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName(trap->Status.s.TrapType)); -+ PRINTF2 (ctxt, DBG_INTR, "HandleCProcTrap: Areg=%08x Breg=%08x\n", -+ trap->TrapBuf.r.Areg, trap->TrapBuf.r.Breg); -+ -+ if (ELAN3_OP_CPROC_TRAP (ctxt, trap) == OP_DEFER) -+ { -+ ELAN3_QUEUE_ADD (ctxt->CommandTrapQ); -+ -+ PRINTF1 (ctxt, DBG_INTR, "HandleCProcTrap: setting Command Flag at %p to 1\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 1; -+ -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ } -+ } -+ -+ UnloadCommandPageMapping (ctxt); -+ } -+ } -+ -+ /* -+ * Now change the CommandPortTrap queue. -+ * Must stop the command processor, wait for it to stop, find the final -+ * entry in the current cproc trap save area, reset the comm port -+ * trap save address to the other queue, clear the command port interrupt and -+ * set it running normally again, and then let it go again. This is not very -+ * time critical but it would be a good idea to prevent a higher priority -+ * interrupt from slowing down the process to prevent to fifos filling. -+ */ -+ spin_lock_irqsave (&dev->CProcLock, flags); -+ -+ SET_SCHED_STATUS (dev, CProcStop); -+ -+ while ((read_reg32 (dev, Exts.SchCntReg) & CProcStopped) == 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: waiting for command processor to stop\n"); -+ mb(); -+ } -+ -+ /* -+ * Remember how many entries are in the saved command queue, and -+ * re-initialise it, before restarting the command processor. -+ */ -+ NTrapEntries = (read_reg32 (dev, CProc_TrapSave_Addr) - dev->CommandPortTraps[dev->CurrentCommandPortTrap])/sizeof (E3_uint64); -+ LastTrapAddr = dev->CommandPortTraps[dev->CurrentCommandPortTrap] + NTrapEntries*sizeof (TrapBuf); -+ -+ dev->CurrentCommandPortTrap ^= 1; -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[dev->CurrentCommandPortTrap]); -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: command trap queue has %d entries\n", NTrapEntries); -+ -+ if (NTrapEntries > ELAN3_COMMAND_TRAP_SIZE/sizeof (E3_uint64)) -+ panic ("HandleCProcTrap: command trap queue has overflowed\n"); -+ -+ if (NewPend & INT_CProc) -+ { -+ /* -+ * Clear the CProc interrupt and set it running normally again. Nothing should -+ * be running now that could issue commands apart from this trap handler. -+ */ -+ PULSE_SCHED_STATUS (dev, RestartCProc); -+ } -+ -+ if (NewPend & INT_ComQueue) -+ { -+ /* -+ * Write any value here to clear out the half full and error bits of the command -+ * overflow queues. This will also remove the overflow interrupt. -+ */ -+ write_reg32 (dev, ComQueueStatus, 0); -+ } -+ -+ /* -+ * And let the command processor start again -+ */ -+ CLEAR_SCHED_STATUS (dev, CProcStop); -+ -+ /* -+ * Now re-issue all the commands that were issued after the command port trapped. -+ * Should halt the dma processor and force command sto be put onto the run queues -+ * to ensure that a remote re-issued command is handled correctly. NOTE it is -+ * not necessary to wait for the dma processor to stop and this will reduce the -+ * performance impact. As CProcHalt is asserted all commands will be flushed -+ * to the queues. -+ */ -+ dev->HaltDmaDequeueCount++; dev->FlushCommandCount++; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ /* -+ * XXXX: should we do a capture/release if the trap overflow -+ * area has a "large" number of commands in it, since -+ * we will just stuff them all back in, together with -+ * all those issued by the other cpus/thread processors. -+ */ -+ while (CurrTrap != LastTrapAddr) -+ { -+ /* Read the next saved (but not trapped) command */ -+ TrapBuf.Align64 = elan3_sdram_readq (dev, CurrTrap); CurrTrap += sizeof (TrapBuf); -+ -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, (TrapBuf.s.ContextType >> 16)); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: context %x invalid\n", TrapBuf.s.ContextType >> 16); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (!ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF3 (ctxt, DBG_INTR, "HandleCProcTrap: save command %x context %x - %08x\n", -+ (TrapBuf.s.ContextType>>3) & 0x3ff, TrapBuf.s.ContextType >> 17, TrapBuf.s.Addr); -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, Maskp); -+ } -+ else -+ { -+ *ELAN3_QUEUE_BACK(ctxt->CommandQ, ctxt->Commands) = TrapBuf; -+ -+ ELAN3_QUEUE_ADD (ctxt->CommandQ); -+ } -+ continue; -+ } -+ -+ /* Reissue the command to the command port for this context */ -+ PRINTF2 (ctxt, DBG_INTR, "HandleCProcTrap: re-issue command %x - %08x\n", -+ (TrapBuf.s.ContextType>>5) & 0xff, TrapBuf.s.Addr); -+ -+ mb(); -+ if (ELAN3_OP_CPROC_REISSUE(ctxt, &TrapBuf) != OP_HANDLED) -+ ((E3_uint32 *) ctxt->CommandPort)[(TrapBuf.s.ContextType>>5) & 0xff] = TrapBuf.s.Addr; -+ mmiob(); -+ } -+ } -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: waiting for queues to empty after reissueing commands\n"); -+ mb(); -+ } -+ -+ dev->HaltDmaDequeueCount--; dev->FlushCommandCount--; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ spin_unlock_irqrestore (&dev->CProcLock, flags); -+ -+ /* -+ * Re-read the interrupt register and see if we've got another command -+ * port interrupt -+ */ -+ NewPend = read_reg32 (dev, Exts.InterruptReg); -+ } while ((NewPend & (INT_CProc | INT_ComQueue)) != 0); -+ -+ -+ /* -+ * Re-enable the command processor interrupt as we've finished -+ * polling it. -+ */ -+ ENABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+} -+ -+void -+ResolveCProcTrap (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ COMMAND_TRAP *trap; -+ int res; -+ unsigned long flags; -+ -+ kmutex_lock (&ctxt->CmdLock); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ while (! ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ)) -+ { -+ trap = ELAN3_QUEUE_MIDDLE(ctxt->CommandTrapQ, ctxt->CommandTraps); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_EventIntUpdateBPtr: -+ case MI_ChainedEventError: -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ PRINTF1 (ctxt, DBG_CPROC, "ResolveCProcTrap: %s\n", MiToName (trap->Status.s.TrapType)); -+ break; -+ -+ default: -+ /* All other traps are MMU related, we should have a fault address and FSR */ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_CPROC, "ResolveCProcTrap: elan3_pagefault failed for address %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, COMMAND_PROC, trap, &trap->FaultSave, res); -+ -+ /* Set the trap type to 0 so the command does not get re-issued */ -+ trap->Status.s.TrapType = 0; -+ } -+ break; -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ELAN3_QUEUE_CONSUME (ctxt->CommandTrapQ); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+} -+ -+int -+RestartCProcTrap (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ COMMAND_TRAP trap; -+ void *item; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ while (! ELAN3_QUEUE_FRONT_EMPTY (ctxt->CommandTrapQ)) -+ { -+ trap = (*ELAN3_QUEUE_FRONT (ctxt->CommandTrapQ, ctxt->CommandTraps)); -+ ELAN3_QUEUE_REMOVE (ctxt->CommandTrapQ); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ BumpUserStat (ctxt, CProcTraps); -+ -+ switch (trap.Status.s.TrapType) -+ { -+ case 0: -+ res = ISSUE_COMMAND_OK; -+ break; -+ -+ case MI_WaitForWaitEventDesc: -+ /* -+ * Fault occured on the read of wait event descriptor for wait event type 0. -+ * Fault already fixed. Just re-issue the wait command. Wait event descriptor addr -+ * is in the Areg save value. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type0 desc read fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent0), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForEventReadTy0: -+ /* -+ * Fault occured on the read of event location for wait event type 0. -+ * Fault already fixed. Just re-issue the wait command. Wait event descriptor addr -+ * is in the Areg save value. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type0 event loc fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent0), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForEventReadTy1: -+ /* -+ * Fault occured on the read of the event location for wait event type 1. -+ * Areg has the original ptr and count. -+ * Fault already fixed. Just re-issue the wait command using Areg and context. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type1 event location read fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent1), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForCntxDmaDescRead: -+ case MI_WaitForNonCntxDmaDescRead: -+ /* -+ * Fault occured on the read of the dma descriptor. Run dma using the -+ * Fault Address in FaultSave. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: MI_WaitForCntxDmaDescRead: re-issue dma at %08x\n", -+ trap.FaultSave.s.FaultAddress); -+ -+ res = IssueDmaCommand (ctxt, trap.FaultSave.s.FaultAddress, NULL, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ default: -+ /* -+ * Assume the fault will be fixed by FixupEventTrap. -+ */ -+ FixupEventTrap (ctxt, COMMAND_PROC, &trap, trap.Status.s.TrapType, &trap.FaultSave, ISSUE_COMMAND_FOR_CPROC); -+ -+ res = ISSUE_COMMAND_OK; -+ break; -+ } -+ -+ switch (res) -+ { -+ case ISSUE_COMMAND_OK: /* command re-issued ok*/ -+ break; -+ -+ case ISSUE_COMMAND_TRAPPED: /* command trapped, it will have been copied */ -+ return (EAGAIN); /* to the back of the trap queue */ -+ -+ case ISSUE_COMMAND_RETRY: /* didn't issue command, so place back at front for */ -+ spin_lock_irqsave (&dev->IntrLock, flags); /* later (after resolving other traps */ -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandTrapQ)) -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ else -+ { -+ ELAN3_QUEUE_ADD_FRONT(ctxt->CommandTrapQ); -+ (*ELAN3_QUEUE_FRONT (ctxt->CommandTrapQ, ctxt->CommandTraps)) = trap; -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ -+ default: -+ return (EINVAL); -+ } -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ } -+ -+ /* -+ * GNAT 5409 - if CommandPortItem was not NULL, but other reasons were set, -+ * then we'd not free the CommandPortItem even though we'd re- -+ * issued all trapped and overflowed commands. Hence only return -+ * without clearing CommandPortItem if we will be called again as -+ * either CommandTrapQ or CommandQ is not empty. -+ */ -+ -+ /* Now run the overflowed commands for this context */ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandQ)) -+ { -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF0 (ctxt, DBG_CPROC, "RestartCProcTrap: cannot issue overflowed commands\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ } -+ -+ /* -+ * Just re-issue the commands, if one traps then the remainder will -+ * just get placed in the overflow queue again and the interrupt handler -+ * will copy them back in here. -+ * -+ * Stop the dma processor from taking commands, since one of the commands -+ * could be a re-issued remote dma, which must be processed by the command -+ * processor. -+ */ -+ -+ if (dev->HaltDmaDequeueCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ while (! ELAN3_QUEUE_EMPTY (ctxt->CommandQ)) -+ { -+ CProcTrapBuf_BE *TrapBuf = ELAN3_QUEUE_FRONT (ctxt->CommandQ, ctxt->Commands); -+ -+ PRINTF2 (ctxt, DBG_CPROC, "RestartCProcTrap: re-issue command %x - %08x\n", -+ (TrapBuf->s.ContextType>>5) & 0xff, TrapBuf->s.Addr); -+ mb(); /* ensure writes to main memory completed */ -+ ((E3_uint32 *) ctxt->CommandPort)[(TrapBuf->s.ContextType>>5) & 0xff] = TrapBuf->s.Addr; -+ mmiob(); /* and flush through IO writes */ -+ -+ ELAN3_QUEUE_REMOVE (ctxt->CommandQ); -+ } -+ -+ /* observe the command processor having halted */ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, 0, &flags); -+ -+ if (res != ISSUE_COMMAND_OK) -+ { -+ PRINTF0 (ctxt, DBG_CPROC, "RestartCProcTrap: trapped after issueing overflowed commands\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ } -+ } -+ -+ /* remove the command port item, while holding the lock */ -+ item = ctxt->CommandPortItem; -+ ctxt->CommandPortItem = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (item != NULL) /* Free of any item that may have been stored */ -+ { /* because of the commandport trap */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: commandPortItem %p\n", item); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ELAN3_OP_FREE_BLOCK_ITEM (ctxt, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ } -+ -+ return (ESUCCESS); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/dproc.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/dproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/dproc.c 2005-06-01 23:12:54.568443816 -0400 -@@ -0,0 +1,553 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: dproc.c,v 1.52 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/dproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DMA_RETRY_FAIL_COUNT 8 -+ -+static void PrintUserDma (ELAN3_CTXT *ctxt, E3_Addr addr); -+ -+int -+HandleDProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits) -+{ -+ DMA_TRAP *trap = dev->DmaTrap; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ /* Scoop out the trap information, before restarting the Elan */ -+ trap->Status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ -+ ASSERT(trap->Status.s.WakeupFunction == WakeupNever); -+ -+ /* copy the normal dma access fault type */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), &trap->FaultSave, sizeof (E3_FaultSave_BE)); -+ -+ /* copy all 4 of the dma data fault type */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), &trap->Data0, 4*sizeof (E3_FaultSave_BE)); -+ -+ /* Copy the DMA descriptor */ -+ copy_dma_regs (dev, &trap->Desc); -+ -+ /* Copy the packet info */ -+ trap->PacketInfo.Value = read_reg32 (dev, Exts.Dmas.DmaRds.DMA_PacketInfo.Value); -+ -+ /* update device statistics */ -+ BumpStat (dev, DProcTraps); -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ if (trap->PacketInfo.s.PacketTimeout) -+ BumpStat (dev, DmaOutputTimeouts); -+ else if (trap->PacketInfo.s.PacketAckValue == C_ACK_ERROR) -+ BumpStat (dev, DmaPacketAckErrors); -+ break; -+ -+ case MI_DmaFailCountError: -+ BumpStat (dev, DmaRetries); -+ break; -+ } -+ -+ /* Must now zero all the FSRs so that a subsequent fault can be seen */ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), sizeof (E3_FaultSave)); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), 4*sizeof (E3_FaultSave)); -+ -+ *RestartBits |= RestartDProc; -+ return (TRUE); -+} -+ -+void -+DeliverDProcTrap (ELAN3_DEV *dev, DMA_TRAP *dmaTrap, E3_uint32 Pend) -+{ -+ ELAN3_CTXT *ctxt; -+ E3_FaultSave_BE *FaultArea; -+ DMA_TRAP *trap; -+ register int i; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, dmaTrap->Status.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "DeliverDProcTrap: context %x invalid\n", dmaTrap->Status.s.Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_OP_DPROC_TRAP (ctxt, dmaTrap) == OP_DEFER) -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->DmaTrapQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->DmaTrapQ, ctxt->DmaTraps); -+ -+ bcopy (dmaTrap, trap, sizeof (DMA_TRAP)); -+ -+ PRINTF5 (ctxt, DBG_INTR, "DeliverDProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x PacketInfo=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, trap->PacketInfo.Value, MiToName (trap->Status.s.TrapType)); -+ PRINTF3 (ctxt, DBG_INTR, " FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ PRINTF4 (ctxt, DBG_INTR, " %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_INTR, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ PRINTF2 (ctxt, DBG_INTR, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ PRINTF2 (ctxt, DBG_INTR, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ ELAN3_QUEUE_ADD (ctxt->DmaTrapQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ if (ELAN3_QUEUE_FULL (ctxt->DmaTrapQ)) -+ { -+ PRINTF0 (ctxt, DBG_INTR, "DeliverDProcTrap: dma queue full, must swap out\n"); -+ ctxt->Status |= CTXT_DMA_QUEUE_FULL; -+ -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+ } -+} -+ -+int -+NextDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->DmaTrapQ)) -+ return (0); -+ -+ *trap = *ELAN3_QUEUE_FRONT (ctxt->DmaTrapQ, ctxt->DmaTraps); -+ ELAN3_QUEUE_REMOVE (ctxt->DmaTrapQ); -+ -+ return (1); -+} -+ -+void -+ResolveDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ E3_FaultSave_BE *FaultArea; -+ int FaultHandled = 0; -+ int res; -+ register int i; -+ -+ PRINTF4 (ctxt, DBG_DPROC, "ResolveDProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ PRINTF3 (ctxt, DBG_DPROC, " FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ PRINTF4 (ctxt, DBG_DPROC, " %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_DPROC, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ PRINTF2 (ctxt, DBG_DPROC, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ PRINTF2 (ctxt, DBG_DPROC, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+ BumpUserStat (ctxt, DProcTraps); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ /* -+ * Faulted due to packet timeout or a PAckError. -+ * Reset fail count and reissue the same desc. -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: got a PAckError or the output timed out. Rescheduling dma.\n"); -+ if (ElanException (ctxt, EXCEPTION_PACKET_TIMEOUT, DMA_PROC, trap) == OP_IGNORE) -+ { -+ BumpUserStat (ctxt, DmaRetries); -+ -+ trap->Desc.s.dma_failCount = DMA_RETRY_FAIL_COUNT; -+ -+ RestartDmaTrap (ctxt, trap); -+ } -+ return; -+ -+ case MI_DmaFailCountError: -+ /* -+ * Faulted due to dma fail count. -+ * Reset fail count and reissue the same desc. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: Reset dma fail count to %d\n", DMA_RETRY_FAIL_COUNT); -+ -+ if (ElanException (ctxt, EXCEPTION_DMA_RETRY_FAIL, DMA_PROC, trap) == OP_IGNORE) -+ { -+ BumpUserStat (ctxt, DmaRetries); -+ -+ trap->Desc.s.dma_failCount = DMA_RETRY_FAIL_COUNT; -+ -+ RestartDmaTrap (ctxt, trap); -+ } -+ return; -+ -+ case MI_TimesliceDmaQueueOverflow: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: dma timeslice queue overflow\n"); -+ RestartDmaTrap (ctxt, trap); -+ return; -+ -+ case MI_UnimplementedError: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: unimplemented dma trap\n"); -+ if (ElanException (ctxt, EXCEPTION_UNIMPLEMENTED, DMA_PROC, trap) == OP_IGNORE) -+ RestartDmaTrap (ctxt, trap); -+ return; -+ -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped on a write set event.\n"); -+ FixupEventTrap (ctxt, DMA_PROC, trap, trap->Status.s.TrapType, &trap->FaultSave, 0); -+ return; -+ -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ case MI_DequeueNonSysCntxDma: -+ case MI_DequeueSysCntxDma: -+ /* -+ * The DMA processor has trapped due to outstanding prefetches from the previous -+ * dma. The "current" dma has not been consumed, so we just ignore the trap -+ */ -+ return; -+ -+ case MI_WaitForRemoteDescRead2: -+ case MI_ExecuteDmaDescriptorForRun: -+ /* -+ * The DMA processor has trapped while fetching the dma descriptor, so -+ * zero it out to not confuse the user on an error -+ */ -+ bzero (&trap->Desc, sizeof (trap->Desc)); -+ break; -+ } -+ -+ /* -+ * All other uWords will have updated one of the fault areas, so fix -+ * any faults found in them. If there were no faults found then it -+ * must have been a bus error -+ */ -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ { -+ if (FaultArea->s.FSR.Status != 0) -+ { -+ FaultHandled++; -+ -+ ASSERT ((FaultArea->s.FSR.Status & FSR_SizeMask) == FSR_Block64 || -+ (FaultArea->s.FSR.Status & FSR_SizeMask) == FSR_Block32); -+ -+ ASSERT (FaultArea->s.FaultContext == trap->Status.s.Context); -+ -+ if (((trap->Desc.s.dma_source & PAGEOFFSET) >= (PAGESIZE-E3_BLK_SIZE)) && -+ ((trap->Desc.s.dma_source & PAGEMASK) != ((trap->Desc.s.dma_source + trap->Desc.s.dma_size-1) & PAGEMASK))) -+ { -+ /* XXXX: dma started within last 64 bytes of the page -+ * terminate the process if it has pagefaulted */ -+ if (FaultArea->s.FaultAddress == (trap->Desc.s.dma_source & ~(E3_BLK_SIZE-1))) -+ { -+ printk ("elan%d: invalid dma - context=%x source=%x\n", ctxt->Device->Instance, -+ ctxt->Capability.cap_mycontext, trap->Desc.s.dma_source); -+ -+ if (ElanException (ctxt, EXCEPTION_BAD_DMA, DMA_PROC, trap, NULL, 0) != OP_IGNORE) -+ return; -+ } -+ } -+ -+ if (trap->Desc.s.dma_size != 0 && (res = elan3_pagefault (ctxt, FaultArea, 1)) != ESUCCESS) -+ { -+ /* XXXX: Rev B Elans can prefetch data passed the end of the dma descriptor */ -+ /* if the fault relates to this, then just ignore it */ -+ if (FaultArea->s.FaultAddress < (trap->Desc.s.dma_source+trap->Desc.s.dma_size) || -+ FaultArea->s.FaultAddress > (trap->Desc.s.dma_source+trap->Desc.s.dma_size+E3_BLK_SIZE*2)) -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: elan3_pagefault failed for address %x\n", -+ FaultArea->s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, FaultArea, res) != OP_IGNORE) -+ return; -+ } -+ } -+ } -+ } -+ -+ if (trap->FaultSave.s.FSR.Status != 0) -+ { -+ FaultHandled++; -+ -+ ASSERT (trap->FaultSave.s.FaultContext == trap->Status.s.Context); -+ -+ if ((trap->FaultSave.s.FSR.Status & FSR_SizeMask) == FSR_RouteFetch) -+ { -+ res = ResolveVirtualProcess (ctxt, trap->FaultSave.s.FaultAddress & 0xffff); /* mask out cookie */ -+ -+ switch (res) -+ { -+ default: -+ if (ElanException (ctxt, EXCEPTION_INVALID_PROCESS, DMA_PROC, trap, trap->FaultSave.s.FaultAddress, res) != OP_IGNORE) -+ return; -+ -+ case EAGAIN: -+ /* XXXX; wait on trail blazing code */ -+ -+ case 0: -+ break; -+ } -+ } -+ else -+ { -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: elan3_pagefault failed for address %x\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, &trap->FaultSave, res) != OP_IGNORE) -+ return; -+ } -+ } -+ } -+ -+ if (! FaultHandled) -+ { -+ ElanBusError (ctxt->Device); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, &trap->FaultSave, EFAULT) != OP_IGNORE) -+ return; -+ } -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_WaitForRemoteDescRead2: -+ /* -+ * Faulted while trying to read the dma descriptor for a read dma. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a remote dma descriptor at %x.\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, trap->FaultSave.s.FaultAddress); -+ break; -+ -+ case MI_ExecuteDmaDescriptorForRun: -+ /* -+ * Faulted while trying to read the dma descriptor for a write dma. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a write dma descriptor at %x.\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, trap->FaultSave.s.FaultAddress); -+ break; -+ -+ case MI_WaitForRemoteRoutes1: -+ case MI_WaitForRemoteRoutes2: -+ case MI_SendRemoteDmaDesc: -+ case MI_SendDmaIdentify: -+ case MI_SendRemoteDmaRoutes2: -+ case MI_WaitForDmaRoutes1: -+ case MI_DmaLoop: -+ case MI_ExitDmaLoop: -+ case MI_GetDestEventValue: -+ case MI_SendFinalUnlockTrans: -+ case MI_SendNullSetEvent: -+ case MI_SendFinalSetEvent: -+ case MI_SendDmaEOP: -+ /* -+ * Faulted either fetching routes or fetching dma data. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ -+ case MI_SendEOPforRemoteDma: -+ case MI_LookAtRemoteAck: -+ case MI_FailedAckIfCCis0: -+ /* -+ * Possible fault when reading the remote desc into the dma data buffers -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a dma data or fetching a route\n"); -+ RestartDmaTrap (ctxt, trap); -+ break; -+ -+ case MI_DequeueSysCntxDma: -+ case MI_DequeueNonSysCntxDma: -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ /* -+ * It is possible that a dma can get back onto the queue while outstanding dma -+ * have not finished trapping. In this case the trap can be ignored as the dma -+ * state has been saved. It might trap again the next time it comes to the front -+ * of the queue and be fixed then. -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trap after dma has finished. ignored\n"); -+ break; -+ -+ default: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped on a write set event.\n"); -+ FixupEventTrap (ctxt, DMA_PROC, trap, trap->Status.s.TrapType, &trap->FaultSave, 0); -+ break; -+ } -+} -+ -+int -+DProcNeedsRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_DMA_PTR] != 0 || -+ ctxt->ItemCount[LIST_DMA_DESC] != 0); -+} -+ -+void -+RestartDProcItems (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_Addr value; -+ int res; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ while (ctxt->ItemCount[LIST_DMA_PTR]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_DMA_PTR, &item, &value)) -+ ctxt->ItemCount[LIST_DMA_PTR] = 0; -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "RestartDProc: issue write dma at %x\n", value); -+ PrintUserDma (ctxt, value); -+ -+ res = IssueDmaCommand (ctxt, value, NULL, 0); -+ -+ if (res == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_DMA_PTR, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_DMA_PTR]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ -+ while (ctxt->ItemCount[LIST_DMA_DESC]) -+ { -+ if (! ELAN3_OP_GET_BLOCK_ITEM (ctxt, LIST_DMA_DESC, &item, &value)) -+ ctxt->ItemCount[LIST_DMA_DESC] = 0; -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "RestartDProc: issue dma desc at %x\n", value); -+ PrintUserDma (ctxt, value); -+ -+ res = IssueDmaCommand (ctxt, value, item, 0); -+ -+ switch (res) -+ { -+ case ISSUE_COMMAND_OK: -+ ctxt->ItemCount[LIST_DMA_DESC]--; -+ ELAN3_OP_FREE_BLOCK_ITEM (ctxt, item); -+ break; -+ -+ case ISSUE_COMMAND_RETRY: -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_DMA_DESC, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ -+ case ISSUE_COMMAND_TRAPPED: -+ ctxt->ItemCount[LIST_DMA_DESC]--; -+ /* The item will be freed off when the command port trap */ -+ /* fixed up and the command successfully re-issued */ -+ break; -+ } -+ } -+ } -+ -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+void -+RestartDmaDesc(ELAN3_CTXT *ctxt, E3_DMA_BE *desc) -+{ -+ kmutex_lock (&ctxt->SwapListsLock); -+ if (desc->s.dma_direction != DMA_WRITE) -+ desc->s.dma_direction = (desc->s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ ELAN3_OP_PUT_BLOCK_ITEM (ctxt, LIST_DMA_DESC, (E3_uint32 *) desc); -+ ctxt->ItemCount[LIST_DMA_DESC]++; -+ -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+void -+RestartDmaTrap(ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ /* Negative length DMAs are illegal, since they hangup the dma processor, -+ * if they got generated then they will have been spotted by PollForDmahungup, -+ * and delivered to us with a Dequeue suspend address, -+ * -+ * GNAT sw-elan3/3908: Moved this check into this new function to avoid -+ * it sampling old or invalid register state -+ */ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ ElanException (ctxt, EXCEPTION_BAD_DMA, DMA_PROC, trap, NULL, 0); -+ else -+ RestartDmaDesc (ctxt, &trap->Desc); -+} -+ -+void -+RestartDmaPtr (ELAN3_CTXT *ctxt, E3_Addr ptr) -+{ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_DMA_PTR, ptr); -+ ctxt->ItemCount[LIST_DMA_PTR]++; -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+static void -+PrintUserDma (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_DMA *dma; -+ -+ /* Dont call a function which takes locks unless we need to */ -+ if (!(elan3_debug & DBG_DPROC)) -+ return; -+ -+ dma = (E3_DMA *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ PRINTF4 (ctxt, DBG_DPROC, "DMA: type %08x size %08x source %08x dest %08x\n", -+ fuword ((int *) &dma->dma_type), fuword ((int *) &dma->dma_size), -+ fuword ((int *) &dma->dma_source), fuword ((int *) &dma->dma_dest)); -+ PRINTF4 (ctxt, DBG_DPROC, "DMA: Dest %08x %08x Local %08x %08x\n", -+ fuword ((int *) &dma->dma_destEvent), fuword ((int *) &dma->dma_destCookieProc), -+ fuword ((int *) &dma->dma_srcEvent), fuword ((int *) &dma->dma_srcCookieProc)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/elan3mmu_generic.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/elan3mmu_generic.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/elan3mmu_generic.c 2005-06-01 23:12:54.573443056 -0400 -@@ -0,0 +1,3255 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3mmu_generic.c,v 1.75.2.1 2004/12/14 10:19:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/vm/elan3mmu_generic.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_MPSAS -+# define zero_all_ptbls -+#endif -+ -+/* -+ * Debugging -+ */ -+int elan3mmu_debug = 0; -+ -+#define N_L3PTBL_MTX (0x20) -+#define N_L2PTBL_MTX (0x40) -+#define N_L1PTBL_MTX (0x20) -+ -+#define L3PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L3PTBL_MTX - 1)) -+static spinlock_t l3ptbl_lock[N_L3PTBL_MTX]; -+ -+#define L2PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L2PTBL_MTX - 1)) -+static spinlock_t l2ptbl_lock[N_L2PTBL_MTX]; -+ -+#define L1PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L1PTBL_MTX - 1)) -+static spinlock_t l1ptbl_lock[N_L1PTBL_MTX]; -+ -+ -+#define BASE2VA(p) ((E3_Addr)((p)->ptbl_base << 16)) -+#define VA2BASE(v) ((u_short)(((uintptr_t)(v)) >> 16)) -+ -+ELAN3MMU_GLOBAL_STATS elan3mmu_global_stats; -+ -+static void elan3mmu_flush_context_filter (ELAN3_DEV *dev, void *); -+static void elan3mmu_unload_loop (ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl, int first_valid, int nptes, int flags); -+ -+static ELAN3_PTBL *elan3mmu_create_ptbls (ELAN3_DEV *dev, int level, int attr, int keep); -+static ELAN3_PTBL *elan3mmu_ta_to_ptbl (ELAN3MMU *elan3mmu, ELAN3_PTP *ptp); -+ -+static ELAN3_PTBL *elan3mmu_alloc_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, int *idx); -+void elan3mmu_free_lXptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl); -+ -+void elan3mmu_free_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl_ptr, int idx); -+ -+static ELAN3_PTBL *elan3mmu_alloc_l1ptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu); -+static ELAN3_PTBL *elan3mmu_alloc_l2ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, -+ E3_Addr base, spinlock_t **plock, unsigned long *flags); -+static ELAN3_PTBL *elan3mmu_alloc_l3ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, -+ E3_Addr base, spinlock_t **plock, unsigned long *flags); -+ -+static int elan3mmu_steal_this_ptbl (ELAN3_DEV *dev, ELAN3_PTBL *l3ptbl); -+static ELAN3_PTBL *elan3mmu_steal_l3ptbl (ELAN3_DEV *dev, int attr); -+ -+static spinlock_t *elan3mmu_ptbl_to_lock (int level, ELAN3_PTBL *ptbl); -+ -+/* -+ * Encoding of MMU permissions against access type, -+ * to allow quick permission checking against access -+ * type. -+ */ -+u_char elan3mmu_permissionTable[] = -+{ -+ 0xcc, /* 11001100 ELAN3_PERM_NULL */ -+ 0x01, /* 00000001 ELAN3_PERM_LOCALREAD */ -+ 0x05, /* 00000101 ELAN3_PERM_READ */ -+ 0x33, /* 00110011 ELAN3_PERM_NOREMOTE */ -+ 0x37, /* 00110111 ELAN3_PERM_REMOTEREAD */ -+ 0x3f, /* 00111111 ELAN3_PERM_REMOTEWRITE */ -+ 0xf7, /* 11110111 ELAN3_PERM_REMOTEEVENT */ -+ 0xff, /* 11111111 ELAN3_PERM_REMOTEALL */ -+} ; -+ -+void -+elan3mmu_init() -+{ -+ register int i; -+ -+ HAT_PRINTF0 (1, "elan3mmu_init: initialising elan mmu\n"); -+ -+ for (i = 0; i < N_L1PTBL_MTX; i++) -+ spin_lock_init (&l1ptbl_lock[i]); -+ -+ for (i = 0; i < N_L2PTBL_MTX; i++) -+ spin_lock_init (&l2ptbl_lock[i]); -+ -+ for (i = 0; i < N_L3PTBL_MTX; i++) -+ spin_lock_init (&l3ptbl_lock[i]); -+ -+ elan3mmu_global_stats.version = ELAN3MMU_STATS_VERSION; -+ -+ elan3mmu_init_osdep(); -+} -+ -+void -+elan3mmu_fini() -+{ -+ register int i; -+ -+ HAT_PRINTF0 (1, "elan3mmu_fini: finalising elan mmu\n"); -+ -+ for (i = 0; i < N_L1PTBL_MTX; i++) -+ spin_lock_destroy (&l1ptbl_lock[i]); -+ -+ for (i = 0; i < N_L2PTBL_MTX; i++) -+ spin_lock_destroy (&l2ptbl_lock[i]); -+ -+ for (i = 0; i < N_L3PTBL_MTX; i++) -+ spin_lock_destroy (&l3ptbl_lock[i]); -+ -+ elan3mmu_fini_osdep(); -+} -+ -+ELAN3MMU * -+elan3mmu_alloc (ELAN3_CTXT *ctxt) -+{ -+ ELAN3MMU *elan3mmu; -+ ELAN3_PTBL *l1ptbl; -+ -+ ALLOC_ELAN3MMU (elan3mmu, TRUE); -+ -+ spin_lock_init (&elan3mmu->elan3mmu_lock); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ elan3mmu->elan3mmu_ergns = NULL; -+ elan3mmu->elan3mmu_etail = NULL; -+ elan3mmu->elan3mmu_ergnlast = NULL; -+ elan3mmu->elan3mmu_mrgns = NULL; -+ elan3mmu->elan3mmu_mtail = NULL; -+ elan3mmu->elan3mmu_mrgnlast = NULL; -+ elan3mmu->elan3mmu_ctxt = ctxt; -+ -+ spin_lock_init (&elan3mmu->elan3mmu_lXptbl_lock); -+ elan3mmu->elan3mmu_lXptbl = NULL; -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ l1ptbl = elan3mmu_alloc_l1ptbl(ctxt->Device, 0, elan3mmu); -+ -+ elan3mmu->elan3mmu_ctp = (sdramaddr_t) 0; -+ elan3mmu->elan3mmu_dev = ctxt->Device; -+ elan3mmu->elan3mmu_l1ptbl = l1ptbl; -+ -+ /* Ensure that there are at least some level 3 page tables, since if a level 2 and */ -+ /* a level 3 table are allocated together, then the level 3 is allocated with the NO_ALLOC */ -+ /* flag, thus there MUST be at least one that can be stolen or on the free list */ -+ if (elan3mmu->elan3mmu_dev->Level[PTBL_LEVEL_3].PtblFreeList == NULL) -+ elan3mmu_create_ptbls (elan3mmu->elan3mmu_dev, PTBL_LEVEL_3, 0, 0); -+ -+ HAT_PRINTF1 (1, "elan3mmu_alloc: elan3mmu %p\n", elan3mmu); -+ -+ elan3mmu_alloc_osdep (elan3mmu); -+ -+ return (elan3mmu); -+} -+ -+void -+elan3mmu_free (ELAN3MMU *elan3mmu) -+{ -+ ELAN3MMU_RGN *rgn; -+ ELAN3_PTBL *l1ptbl; -+ spinlock_t *l1lock; -+ unsigned long l1flags; -+ unsigned long flags; -+ -+ HAT_PRINTF1 (1, "elan3mmu_free : elan3mmu %p\n", elan3mmu); -+ -+ /* -+ * Invalidate the level1 page table, since it's already removed -+ * from the context table, there is no need to flush the tlb. -+ */ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ elan3mmu->elan3mmu_l1ptbl = NULL; -+ -+ if (elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, (E3_Addr) 0, PTBL_LEVEL_1, &l1lock, &l1flags) == LK_PTBL_OK) -+ { -+ elan3mmu_l1inval (elan3mmu, l1ptbl, PTE_UNLOAD_NOFLUSH); -+ elan3mmu_free_l1ptbl (elan3mmu->elan3mmu_dev, l1ptbl, l1lock, l1flags); -+ } -+ -+ /* -+ * Free of any permission regions. -+ */ -+ spin_lock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ while ((rgn = elan3mmu->elan3mmu_mrgns) != NULL) -+ { -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); /* lock_lint */ -+ elan3mmu_removergn_elan (elan3mmu, rgn->rgn_ebase); -+ elan3mmu_removergn_main (elan3mmu, rgn->rgn_mbase); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); /* lock_lint */ -+ -+ FREE_ELAN3MMU_RGN (rgn); -+ } -+ elan3mmu->elan3mmu_mrgnlast = NULL; -+ elan3mmu->elan3mmu_ergnlast = NULL; -+ -+ /* -+ * Free the lXptbl list -+ */ -+ ASSERT (elan3mmu->elan3mmu_lXptbl == NULL); /* XXXX MRH need to add list removal */ -+ -+ elan3mmu->elan3mmu_lXptbl = NULL; -+ spin_lock_destroy (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ spin_lock_destroy (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU (elan3mmu); -+} -+ -+/*================================================================================*/ -+/* Interface routines to device driver */ -+static void -+elan3mmu_flush_context_filter (ELAN3_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ASSERT ((read_reg32 (dev, Exts.InterruptReg) & (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx)) == -+ (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx)); -+ -+ dev->FilterHaltQueued = 0; -+ -+ write_reg32 (dev, Input_Context_Fil_Flush, 0); -+ -+ HAT_PRINTF0 (1, "elan3mmu_flush_context_filter completed\n"); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+elan3mmu_set_context_filter (ELAN3_DEV *dev, int ctx, int disabled, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ int mctx = ctx & MAX_ROOT_CONTEXT_MASK; -+ sdramaddr_t ctp = dev->ContextTable + mctx * sizeof (E3_ContextControlBlock); -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ASSERT ((mctx < 32 || mctx >= ELAN3_KCOMM_BASE_CONTEXT_NUM) ? (ctx & SYS_CONTEXT_BIT) : ! (ctx & SYS_CONTEXT_BIT)); -+ -+ elan3_sdram_writel (dev, ctp + offsetof (E3_ContextControlBlock, filter), -+ ((ctx & SYS_CONTEXT_BIT) ? E3_CCB_CNTX0 : 0) | (disabled ? E3_CCB_DISCARD_ALL : 0)); -+ -+ HAT_PRINTF4 (1, "elan3mmu_set_context_filter: ctx %x [%lx] -> %s (%x)\n", ctx, ctp, -+ disabled ? "up" : "down", elan3_sdram_readl (dev, ctp + offsetof (E3_ContextControlBlock, filter))); -+ -+ /* queue a halt operation to flush the context filter while the inputter is halted */ -+ if (dev->FilterHaltQueued == 0) -+ { -+ dev->FilterHaltQueued = 1; -+ QueueHaltOperation (dev, Pend, Maskp, INT_DiscardingSysCntx | INT_DiscardingNonSysCntx, -+ elan3mmu_flush_context_filter, NULL); -+ } -+} -+ -+int -+elan3mmu_attach (ELAN3_DEV *dev, int ctx, ELAN3MMU *elan3mmu, sdramaddr_t routeTable, E3_uint32 routeMask) -+{ -+ sdramaddr_t ctp; -+ ELAN3_PTP trootptp; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return (EINVAL); -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ trootptp = elan3_readptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP)); -+ -+ if (ELAN3_PTP_TYPE(trootptp) != ELAN3_ET_INVALID) -+ return (EBUSY); -+ -+ elan3mmu->elan3mmu_ctp = ctp; -+ -+ trootptp = PTBL_TO_PTADDR (elan3mmu->elan3mmu_l1ptbl) | ELAN3_ET_PTP; -+ -+ HAT_PRINTF4 (1, "elan3mmu_attach: ctp at %08lx : trootptp=%08x VPT_ptr=%08lx VPT_mask=%08x\n", -+ ctp, trootptp, routeTable, routeMask); -+ -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP), trootptp); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_ptr), routeTable); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_mask), routeMask); -+ -+ return (ESUCCESS); -+} -+ -+void -+elan3mmu_detach (ELAN3_DEV *dev, int ctx) -+{ -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ sdramaddr_t ctp; -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return; -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ HAT_PRINTF1 (1, "elan3mmu_detach: clearing ptp at %lx\n", ctp); -+ -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP), invalidptp); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_mask), 0); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_ptr), 0); -+ -+ ElanFlushTlb (dev); -+} -+ -+int -+elan3mmu_reference (ELAN3MMU *elan3mmu, int ctx) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ sdramaddr_t ctp; -+ E3_ContextControlBlock ccb; -+ ELAN3_PTP trootptp; -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return (EINVAL); -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ trootptp = elan3_readptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP)); -+ -+ if (ELAN3_PTP_TYPE(trootptp) != ELAN3_ET_INVALID) -+ return (EBUSY); -+ -+ elan3_sdram_copyl_from_sdram (dev, elan3mmu->elan3mmu_ctp, &ccb, sizeof (E3_ContextControlBlock)); -+ elan3_sdram_copyl_to_sdram (dev, &ccb, ctp, sizeof (E3_ContextControlBlock)); -+ -+ return (ESUCCESS); -+ -+} -+/*================================================================================*/ -+/* Elan permission regions. */ -+ -+/* elan address region management */ -+ELAN3MMU_RGN * -+elan3mmu_findrgn_elan (ELAN3MMU *elan3mmu, -+ E3_Addr addr, int tail) -+{ -+ ELAN3MMU_RGN *next = NULL; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *hirgn; -+ ELAN3MMU_RGN *lorgn; -+ E3_Addr base; -+ E3_Addr lastaddr; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) || SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (elan3mmu->elan3mmu_ergns == NULL) -+ return (NULL); -+ -+ rgn = elan3mmu->elan3mmu_ergnlast; -+ if (rgn == NULL) -+ rgn = elan3mmu->elan3mmu_ergns; -+ -+ forward = 0; -+ if ((u_long) (base = rgn->rgn_ebase) < (u_long)addr) -+ { -+ if ((u_long)addr <= ((u_long) base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = elan3mmu->elan3mmu_etail; -+ -+ if ((u_long) (lastaddr = (hirgn->rgn_ebase + hirgn->rgn_len - 1)) < (u_long) addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((u_long) (addr - base) > (u_long) (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_enext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = elan3mmu->elan3mmu_ergns; -+ -+ if ((u_long)lorgn->rgn_ebase > (u_long) addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((u_long)(addr - lorgn->rgn_ebase) < (u_long) (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((u_long)(rgn->rgn_ebase + rgn->rgn_len - 1) < (u_long)addr) -+ rgn = rgn->rgn_enext; -+ -+ if ((u_long)rgn->rgn_ebase <= (u_long)addr) -+ elan3mmu->elan3mmu_ergnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while ((u_long)rgn->rgn_ebase > (u_long)addr) -+ { -+ next = rgn; -+ rgn = rgn->rgn_eprev; -+ } -+ -+ if ((u_long) (rgn->rgn_ebase + rgn->rgn_len - 1) < (u_long)addr) -+ return (next); -+ else -+ { -+ elan3mmu->elan3mmu_ergnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+int -+elan3mmu_addrgn_elan (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, nrgn->rgn_ebase, 1); -+ E3_Addr nbase = nrgn->rgn_ebase; -+ E3_Addr ntop = nbase + nrgn->rgn_len - 1; /* avoid wrap */ -+ E3_Addr base; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL) -+ { -+ elan3mmu->elan3mmu_ergns = elan3mmu->elan3mmu_etail = nrgn; -+ nrgn->rgn_enext = nrgn->rgn_eprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_ebase; -+ -+ if ((u_long)(base + rgn->rgn_len - 1) < (u_long)nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_eprev = rgn; /* of list */ -+ nrgn->rgn_enext = NULL; -+ rgn->rgn_enext = elan3mmu->elan3mmu_etail = nrgn; -+ } -+ else -+ { -+ if ((u_long)nbase >= (u_long)base || (u_long)ntop >= (u_long)base) -+ return (-1); /* overlapping region */ -+ -+ nrgn->rgn_enext = rgn; /* insert before region */ -+ nrgn->rgn_eprev = rgn->rgn_eprev; -+ rgn->rgn_eprev = nrgn; -+ if (elan3mmu->elan3mmu_ergns == rgn) -+ elan3mmu->elan3mmu_ergns = nrgn; -+ else -+ nrgn->rgn_eprev->rgn_enext = nrgn; -+ } -+ } -+ elan3mmu->elan3mmu_ergnlast = nrgn; -+ -+ return (0); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_removergn_elan (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL || rgn->rgn_ebase != addr) -+ return (NULL); -+ -+ elan3mmu->elan3mmu_ergnlast = rgn->rgn_enext; -+ if (rgn == elan3mmu->elan3mmu_etail) -+ elan3mmu->elan3mmu_etail = rgn->rgn_eprev; -+ else -+ rgn->rgn_enext->rgn_eprev = rgn->rgn_eprev; -+ -+ if (rgn == elan3mmu->elan3mmu_ergns) -+ elan3mmu->elan3mmu_ergns = rgn->rgn_enext; -+ else -+ rgn->rgn_eprev->rgn_enext = rgn->rgn_enext; -+ -+ return (rgn); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_rgnat_elan (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); -+ E3_Addr base; -+ -+ if (rgn != NULL && (u_long)(base = rgn->rgn_ebase) <= (u_long)addr && (u_long)addr <= (u_long)(base + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+/* main address region management */ -+ELAN3MMU_RGN * -+elan3mmu_findrgn_main (ELAN3MMU *elan3mmu, -+ caddr_t addr, int tail) -+{ -+ ELAN3MMU_RGN *next = NULL; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *hirgn; -+ ELAN3MMU_RGN *lorgn; -+ caddr_t lastaddr; -+ caddr_t base; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) || SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (elan3mmu->elan3mmu_mrgns == NULL) -+ return (NULL); -+ -+ rgn = elan3mmu->elan3mmu_mrgnlast; -+ if (rgn == NULL) -+ rgn = elan3mmu->elan3mmu_mrgns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_mbase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = elan3mmu->elan3mmu_mtail; -+ if ((lastaddr = hirgn->rgn_mbase + hirgn->rgn_len - 1) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_mnext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = elan3mmu->elan3mmu_mrgns; -+ if (lorgn->rgn_mbase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_mbase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_mnext; -+ -+ if (rgn->rgn_mbase <= addr) -+ elan3mmu->elan3mmu_mrgnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_mbase > addr) -+ { -+ next = rgn; -+ rgn = rgn->rgn_mprev; -+ } -+ if ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ return (next); -+ else -+ { -+ elan3mmu->elan3mmu_mrgnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+int -+elan3mmu_addrgn_main (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, nrgn->rgn_mbase, 1); -+ caddr_t nbase = nrgn->rgn_mbase; -+ caddr_t ntop = nbase + nrgn->rgn_len - 1; -+ caddr_t base; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL) -+ { -+ elan3mmu->elan3mmu_mrgns = elan3mmu->elan3mmu_mtail = nrgn; -+ nrgn->rgn_mnext = nrgn->rgn_mprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_mbase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_mprev = rgn; /* of list */ -+ nrgn->rgn_mnext = NULL; -+ rgn->rgn_mnext = elan3mmu->elan3mmu_mtail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) -+ return (-1); /* overlapping region */ -+ -+ nrgn->rgn_mnext = rgn; /* insert before region */ -+ nrgn->rgn_mprev = rgn->rgn_mprev; -+ rgn->rgn_mprev = nrgn; -+ if (elan3mmu->elan3mmu_mrgns == rgn) -+ elan3mmu->elan3mmu_mrgns = nrgn; -+ else -+ nrgn->rgn_mprev->rgn_mnext = nrgn; -+ } -+ } -+ elan3mmu->elan3mmu_mrgnlast = nrgn; -+ -+ return (0); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_removergn_main (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL || rgn->rgn_mbase != addr) -+ return (NULL); -+ -+ elan3mmu->elan3mmu_mrgnlast = rgn->rgn_mnext; -+ if (rgn == elan3mmu->elan3mmu_mtail) -+ elan3mmu->elan3mmu_mtail = rgn->rgn_mprev; -+ else -+ rgn->rgn_mnext->rgn_mprev = rgn->rgn_mprev; -+ -+ if (rgn == elan3mmu->elan3mmu_mrgns) -+ elan3mmu->elan3mmu_mrgns = rgn->rgn_mnext; -+ else -+ rgn->rgn_mprev->rgn_mnext = rgn->rgn_mnext; -+ -+ return (rgn); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_rgnat_main (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ caddr_t base; -+ -+ if (rgn != NULL && (base = rgn->rgn_mbase) <= addr && addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+int -+elan3mmu_setperm (ELAN3MMU *elan3mmu, -+ caddr_t maddr, -+ E3_Addr eaddr, -+ u_int len, -+ u_int perm) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3MMU_RGN *nrgn; -+ unsigned long flags; -+ -+ HAT_PRINTF4 (1, "elan3mmu_setperm: user %p elan %08x len %x perm %x\n", maddr, eaddr, len, perm); -+ -+ if ((((uintptr_t) maddr) & PAGEOFFSET) || (eaddr & PAGEOFFSET) || (len & PAGEOFFSET)) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: alignment failure\n"); -+ return (EINVAL); -+ } -+ -+ if (((uintptr_t) maddr + len - 1) < (uintptr_t) maddr || ((u_long)eaddr + len - 1) < (u_long)eaddr) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: range failure\n"); -+ return (EINVAL); -+ } -+ -+ ALLOC_ELAN3MMU_RGN(nrgn, TRUE); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ nrgn->rgn_mbase = maddr; -+ nrgn->rgn_ebase = eaddr; -+ nrgn->rgn_len = len; -+ nrgn->rgn_perm = perm; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if (elan3mmu_addrgn_elan (elan3mmu, nrgn) < 0) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: elan address exists\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU_RGN (nrgn); -+ return (EINVAL); -+ } -+ -+ if (elan3mmu_addrgn_main (elan3mmu, nrgn) < 0) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: main address exists\n"); -+ elan3mmu_removergn_elan (elan3mmu, eaddr); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU_RGN (nrgn); -+ return (EINVAL); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (ESUCCESS); -+} -+ -+void -+elan3mmu_clrperm (ELAN3MMU *elan3mmu, -+ E3_Addr addr, -+ u_int len) -+{ -+ E3_Addr raddr; -+ E3_Addr rtop; -+ ELAN3MMU_RGN *nrgn; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *rgn_next; -+ u_int ssize; -+ unsigned long flags; -+ int res; -+ -+ HAT_PRINTF2 (1, "elan3mmu_clrperm: elan %08x len %x\n", addr, len); -+ -+ raddr = (addr & PAGEMASK); -+ rtop = ((addr + len - 1) & PAGEMASK) + PAGEOFFSET; -+ -+ ALLOC_ELAN3MMU_RGN (nrgn, TRUE); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); rgn != NULL; rgn = rgn_next) -+ { -+ if (rtop < rgn->rgn_ebase) /* rtop was in a gap */ -+ break; -+ -+ rgn_next = rgn->rgn_enext; /* Save next region pointer */ -+ -+ if (raddr <= rgn->rgn_ebase && rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* whole region is cleared */ -+ elan3mmu_unload (elan3mmu, rgn->rgn_ebase, rgn->rgn_len, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ elan3mmu_removergn_elan (elan3mmu, rgn->rgn_ebase); -+ elan3mmu_removergn_main (elan3mmu, rgn->rgn_mbase); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ FREE_ELAN3MMU_RGN (rgn); -+ } -+ else if (raddr <= rgn->rgn_ebase) -+ { -+ /* clearing at beginning, so shrink size and increment base ptrs */ -+ ssize = rtop - rgn->rgn_ebase + 1; -+ -+ elan3mmu_unload (elan3mmu, rgn->rgn_ebase, ssize, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ rgn->rgn_mbase += ssize; -+ rgn->rgn_ebase += ssize; -+ rgn->rgn_len -= ssize; -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ } -+ else if (rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* clearing at end, so just shrink length of region */ -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ -+ elan3mmu_unload (elan3mmu, raddr, ssize, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ rgn->rgn_len -= ssize; -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ } -+ else -+ { -+ /* the section to go is in the middle, so need to */ -+ /* split it into two regions */ -+ elan3mmu_unload (elan3mmu, raddr, rtop - raddr + 1, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ ASSERT (nrgn != NULL); -+ -+ nrgn->rgn_mbase = rgn->rgn_mbase + (rtop - rgn->rgn_ebase + 1);; -+ nrgn->rgn_ebase = rtop + 1; -+ nrgn->rgn_len = ((rgn->rgn_ebase + rgn->rgn_len - 1) - rtop); -+ nrgn->rgn_perm = rgn->rgn_perm; -+ -+ rgn->rgn_len = (raddr - rgn->rgn_ebase); /* shrink original region */ -+ -+ res = elan3mmu_addrgn_elan (elan3mmu, nrgn); /* insert new region */ -+ ASSERT (res == 0); /* which cannot fail */ -+ -+ res = elan3mmu_addrgn_main (elan3mmu, nrgn); -+ ASSERT (res == 0); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ nrgn = NULL; -+ } -+ } -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ if (nrgn != NULL) -+ FREE_ELAN3MMU_RGN (nrgn); -+} -+ -+int -+elan3mmu_checkperm (ELAN3MMU *elan3mmu, -+ E3_Addr addr, -+ u_int len, -+ u_int access) -+{ -+ E3_Addr raddr = (((E3_Addr) addr) & PAGEMASK); -+ u_int rtop = ((addr + len - 1) & PAGEMASK) + PAGEOFFSET; -+ u_int rsize = rtop - raddr + 1; -+ ELAN3MMU_RGN *rgn; -+ -+ HAT_PRINTF3 (1, "elan3mmu_checkperm: user %08x len %x access %x\n", addr, len, access); -+ -+ -+ if ((raddr + rsize - 1) < raddr) -+ return (ENOMEM); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_elan (elan3mmu, raddr)) == (ELAN3MMU_RGN *) NULL) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (ENOMEM); -+ } -+ else -+ { -+ register int ssize; -+ -+ for (; rsize != 0; rsize -= ssize, raddr += ssize) -+ { -+ if (raddr > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ rgn = rgn->rgn_enext; -+ -+ if (rgn == NULL || raddr != rgn->rgn_ebase) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (ENOMEM); -+ } -+ } -+ if ((raddr + rsize - 1) > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ else -+ ssize = rsize; -+ -+ HAT_PRINTF4 (1, "elan3mmu_checkperm : rgn %x -> %x perm %x access %x\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len, rgn->rgn_perm, access); -+ -+ if (ELAN3_INCOMPAT_ACCESS (rgn->rgn_perm, access)) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (EACCES); -+ } -+ } -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (ESUCCESS); -+} -+ -+caddr_t -+elan3mmu_mainaddr (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn; -+ caddr_t raddr; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_elan (elan3mmu, addr)) == (ELAN3MMU_RGN *) NULL) -+ raddr = NULL; -+ else -+ raddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (raddr); -+} -+ -+E3_Addr -+elan3mmu_elanaddr (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn; -+ E3_Addr raddr; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_main (elan3mmu, addr)) == (ELAN3MMU_RGN *) NULL) -+ raddr = (E3_Addr) 0; -+ else -+ raddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (raddr); -+} -+ -+void -+elan3mmu_displayrgns(ELAN3MMU *elan3mmu) -+{ -+ ELAN3MMU_RGN *rgn; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ HAT_PRINTF0 (1, "elan3mmu_displayrgns: main regions\n"); -+ for (rgn = elan3mmu->elan3mmu_mrgns; rgn; rgn = (rgn->rgn_mnext == elan3mmu->elan3mmu_mrgns) ? NULL : rgn->rgn_mnext) -+ HAT_PRINTF5 (1, " RGN %p ebase %08x mbase %p len %08x perm %08x\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len, rgn->rgn_perm); -+ HAT_PRINTF0 (1, "elan3mmu_displayrgns: elan regions\n"); -+ for (rgn = elan3mmu->elan3mmu_ergns; rgn; rgn = (rgn->rgn_enext == elan3mmu->elan3mmu_ergns) ? NULL : rgn->rgn_enext) -+ HAT_PRINTF5 (1, " RGN %p ebase %08x mbase %p len %08x perm %08x\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len, rgn->rgn_perm); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/*============================================================================*/ -+/* Private functions */ -+#define ELAN3_PTE_IS_VALID(ptbl, pte) \ -+ ((ptbl->ptbl_flags & PTBL_KERNEL) ? \ -+ (pte&(~ELAN3_PTE_REF)) != elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu) : \ -+ ELAN3_PTE_VALID(pte)) -+ -+void -+elan3mmu_expand (ELAN3MMU *elan3mmu, E3_Addr addr, int len, int level, int attr) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ spinlock_t *lock; -+ u_int span; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_expand: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + len); -+ -+ for ( ; len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ -+ switch (level) -+ { -+ case PTBL_LEVEL_3: -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ break; -+ case PTBL_LEVEL_2: -+ span = MIN(len, ELAN3_L2_PTSIZE - ((E3_Addr) addr & ELAN3_L2_PTOFFSET)); -+ break; -+ default: -+ span = len; -+ break; -+ } -+ -+ if (pte != (sdramaddr_t) 0) -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+} -+ -+void -+elan3mmu_reserve (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *ptes) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ spinlock_t *lock; -+ u_int span; -+ int len; -+ int i; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_reserve: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + (npages << ELAN3_PAGE_SHIFT)); -+ -+ for (len = (npages << ELAN3_PAGE_SHIFT); len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptealloc (elan3mmu, addr, 3, &ptbl, &lock, 0, &flags); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ if (ptes != NULL) -+ { -+ for (i = 0; i < span; i += ELAN3_PAGE_SIZE, pte += ELAN3_PTE_SIZE) -+ *ptes++ = pte; -+ ptbl->ptbl_valid += (span >> ELAN3_PAGE_SHIFT); -+ -+ HAT_PRINTF4 (2, "elan3mmu_reserve: inc valid for level %d ptbl %p to %d (%d)\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid, (span >> ELAN3_PAGE_SHIFT)); -+ -+ } -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+} -+ -+void -+elan3mmu_release (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *ptes) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ ELAN3_PTE tpte; -+ spinlock_t *lock; -+ u_int span; -+ int len; -+ int i; -+ int level; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_release: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + (npages << ELAN3_PAGE_SHIFT)); -+ -+ if (ptes == NULL) -+ return; -+ -+ tpte = elan3mmu_kernel_invalid_pte (elan3mmu); -+ -+ for (len = (npages << ELAN3_PAGE_SHIFT); len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptefind(elan3mmu, addr, &level, &ptbl, &lock, &flags); -+ ASSERT (level == PTBL_LEVEL_3); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ -+ for (i = 0 ; i < span; i += ELAN3_PAGE_SIZE, pte += ELAN3_PTE_SIZE) -+ elan3_writepte (dev, pte, tpte); -+ ptbl->ptbl_valid -= (span >> ELAN3_PAGE_SHIFT); -+ -+ HAT_PRINTF3 (2, "elan3mmu_release: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+} -+ -+void -+elan3mmu_pteload (ELAN3MMU *elan3mmu, int level, E3_Addr addr, physaddr_t paddr, int perm, int attr) -+ -+{ -+ ELAN3_DEV *dev; -+ ELAN3_PTBL *ptbl; -+ spinlock_t *lock; -+ unsigned long flags; -+ ELAN3_PTE newpte; -+ ELAN3_PTE oldpte; -+ sdramaddr_t pte; -+ -+ ASSERT((level == PTBL_LEVEL_2) || (level == PTBL_LEVEL_3)); -+ -+ /* Generate the new pte which we're going to load */ -+ dev = elan3mmu->elan3mmu_dev; -+ -+ newpte = elan3mmu_phys_to_pte (dev, paddr, perm); -+ -+ if (attr & PTE_LOAD_BIG_ENDIAN) -+ newpte |= ELAN3_PTE_BIG_ENDIAN; -+ -+ HAT_PRINTF4 (1, "elan3mmu_pteload: elan3mmu %p level %d addr %x pte %llx\n", elan3mmu, level, addr, (long long) newpte); -+ HAT_PRINTF5 (1, "elan3mmu_pteload:%s%s%s perm=%d phys=%llx\n", -+ (newpte & ELAN3_PTE_LOCAL) ? " local" : "", -+ (newpte & ELAN3_PTE_64_BIT) ? " 64 bit" : "", -+ (newpte & ELAN3_PTE_BIG_ENDIAN) ? " big-endian" : " little-endian", -+ (u_int) (newpte & ELAN3_PTE_PERM_MASK) >> ELAN3_PTE_PERM_SHIFT, -+ (unsigned long long) (newpte & ELAN3_PTE_PFN_MASK)); -+ -+ if (level == PTBL_LEVEL_3) -+ pte = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ else -+ { -+ sdramaddr_t ptp = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ -+ pte = elan3mmu_ptp2pte (elan3mmu, ptp, level); -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: level %d ptp at %lx => pte at %lx\n", level, ptp, pte); -+ } -+ -+ if (pte == (sdramaddr_t) 0) -+ { -+ ASSERT (level == PTBL_LEVEL_3 && (attr & (PTE_NO_SLEEP | PTE_NO_STEAL)) == (PTE_NO_SLEEP | PTE_NO_STEAL)); -+ return; -+ } -+ -+ ASSERT (ptbl->ptbl_elan3mmu == elan3mmu); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == level); -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ -+ oldpte = elan3_readpte (dev, pte); -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: modify pte at %lx from %llx to %llx\n", pte, (long long) oldpte, (long long) newpte); -+ -+ if (ELAN3_PTE_IS_VALID(ptbl, oldpte)) -+ { -+ ELAN3MMU_STAT(ptereload); -+ -+ ASSERT ((newpte & ~((E3_uint64)ELAN3_PTE_PERM_MASK | ELAN3_RM_MASK)) == (oldpte & ~((E3_uint64)ELAN3_PTE_PERM_MASK | ELAN3_RM_MASK))); -+ -+ if ((newpte & ~ELAN3_RM_MASK) != (oldpte & ~ELAN3_RM_MASK)) -+ { -+ /* We're modifying a valid translation, it must be mapping the same page */ -+ /* so we use elan3_modifypte to not affect the referenced and modified bits */ -+ elan3_modifypte (dev, pte, newpte); -+ -+ -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+ } -+ } -+ else -+ { -+ ELAN3MMU_STAT(pteload); -+ -+ ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_pteload: write pte %lx to %llx\n", pte, (long long) newpte); -+ -+ elan3_writepte (dev, pte, newpte); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+ -+ } -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+} -+ -+void -+elan3mmu_unload (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, int attr) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t ptp; -+ spinlock_t *lock; -+ int level; -+ u_int span; -+ unsigned long flags; -+ -+ HAT_PRINTF3(1, "elan3mmu_unload (elan3mmu %p addr %x -> %x)\n", elan3mmu, addr, addr+len-1); -+ -+ for (; len != 0; addr += span, len -= span) -+ { -+ ptp = elan3mmu_ptefind(elan3mmu, addr, &level, &ptbl, &lock, &flags); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ if (ptp != (sdramaddr_t) 0) -+ { -+ HAT_PRINTF2 (2, "elan3mmu_unload: unload [%x,%x]\n", addr, addr + span); -+ -+ if ( level == PTBL_LEVEL_3 ) -+ elan3mmu_unload_loop (elan3mmu, ptbl, ptp - PTBL_TO_PTADDR(ptbl), span >> ELAN3_PAGE_SHIFT, attr); -+ else -+ { -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *lXptbl; -+ ELAN3_PTP tptp; -+ int idx; -+ -+ tptp = elan3_readptp (elan3mmu->elan3mmu_dev, ptp); -+ -+ ASSERT (ELAN3_PTP_TYPE(tptp) == ELAN3_ET_PTE); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tptp); -+ idx = (PTP_TO_PT_PADDR(tptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ if ( level == PTBL_LEVEL_1) -+ span = MIN(len, ELAN3_L2_PTSIZE - ((E3_Addr) addr & ELAN3_L2_PTOFFSET)); -+ else -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ /* invalidate the ptp. */ -+ elan3_writeptp (dev, ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_unload: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ } -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+ } -+} -+ -+static void -+elan3mmu_unload_loop (ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl, int first_valid, int nptes, int flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ sdramaddr_t pte; -+ ELAN3_PTE tpte; -+ int last_valid = first_valid + nptes; -+ int i; -+ -+ HAT_PRINTF3 (1, "elan3mmu_unloadloop: ptbl %p entries [%d->%d]\n", ptbl, first_valid, last_valid); -+ -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ pte = PTBL_TO_PTADDR(ptbl) + first_valid; -+ -+ for (i = first_valid; i < last_valid; i++, pte += ELAN3_PTE_SIZE) -+ { -+ if (ptbl->ptbl_valid == 0) -+ break; -+ -+ tpte = elan3_readpte (dev, pte); -+ if (! ELAN3_PTE_IS_VALID(ptbl, tpte)) -+ continue; -+ -+ elan3mmu_pteunload (ptbl, pte, flags, NO_MLIST_LOCK); -+ } -+} -+ -+void -+elan3mmu_pteunload (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock) -+{ -+ ELAN3_DEV *dev = ptbl->ptbl_elan3mmu->elan3mmu_dev; -+ ELAN3_PTE tpte; -+ -+ ASSERT (PTBL_LEVEL (ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ -+ HAT_PRINTF2 (1, "elan3mmu_pteunload: ptbl %p pte %lx\n", ptbl, pte); -+ -+ ELAN3MMU_STAT (pteunload); -+ -+ elan3_invalidatepte (dev, pte); -+ -+ if (! (flags & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ tpte = ELAN3_INVALID_PTE; -+ elan3_writepte (dev, pte, tpte); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) -+ { -+ tpte = elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu); -+ -+ elan3_writepte (dev, pte, tpte); -+ } -+ -+ ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteunload: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+} -+ -+void -+elan3mmu_ptesync (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock) -+{ -+ -+} -+ -+/* -+ * Create more page tables at a given level for this Elan. -+ */ -+static ELAN3_PTBL * -+elan3mmu_create_ptbls (ELAN3_DEV *dev, int level, int attr, int keep) -+{ -+ sdramaddr_t pts; -+ ELAN3_PTBL *ptbl; -+ ELAN3_PTBL *first; -+ ELAN3_PTBL *last; -+ ELAN3_PTBL_GR *ptg; -+ register int i; -+ register int inc; -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: create level %d ptbls\n", level); -+ -+ pts = elan3_sdram_alloc (dev, PTBL_GROUP_SIZE); -+ if (pts == (sdramaddr_t) 0) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_create_ptbls: cannot map elan pages\n"); -+ -+ ELAN3MMU_STAT (create_ptbl_failed); -+ return (NULL); -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: pts at %lx\n", pts); -+ -+ ALLOC_PTBL_GR (ptg, !(attr & PTE_NO_SLEEP)); /* Allocate the group of page tables */ -+ if (ptg == NULL) /* for this page */ -+ { -+ HAT_PRINTF0 (2, "elan3mmu_create_ptbls: cannot allocate page table group\n"); -+ -+ elan3_sdram_free (dev, pts, PTBL_GROUP_SIZE); -+ -+ ELAN3MMU_STAT (create_ptbl_failed); -+ return (NULL); -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: ptg is %p\n", ptg); -+ -+ ElanSetPtblGr (dev, pts, ptg); -+ -+ HAT_PRINTF4 (2, "elan3mmu_create_ptbls: zeroing %d bytes at %lx, %d bytes at %p\n", -+ PTBL_GROUP_SIZE, pts, (int) sizeof (ELAN3_PTBL_GR), ptg); -+ -+#ifndef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, pts, PTBL_GROUP_SIZE); /* Ensure that all PTEs/PTPs are invalid */ -+#endif -+ bzero ((caddr_t) ptg, sizeof (ELAN3_PTBL_GR)); -+ -+ ptg->pg_addr = pts; -+ ptg->pg_level = level; -+ -+ ptbl = ptg->pg_ptbls; /* Initialise the index in all page tables */ -+ for (i = 0; i < PTBLS_PER_GROUP_MAX; i++) -+ { -+ ptbl->ptbl_index = (u_char) i; -+ ptbl->ptbl_next = (ELAN3_PTBL *) 0xdeaddead; -+ ptbl++; -+ } -+ -+ switch (level) /* Determine the number of ptbls we can */ -+ { /* allocate from this page, by jumping */ -+ case PTBL_LEVEL_X: inc = PTBLS_PER_PTBL_LX; break; /* multiples of the smallest. */ -+ case PTBL_LEVEL_1: inc = PTBLS_PER_PTBL_L1; break; -+ case PTBL_LEVEL_2: inc = PTBLS_PER_PTBL_L2; break; -+ case PTBL_LEVEL_3: inc = PTBLS_PER_PTBL_L3; break; -+ default: inc = PTBLS_PER_PTBL_L3; break; -+ } -+ -+ ptbl = ptg->pg_ptbls; /* Chain them together */ -+ for (i = 0; i < PTBLS_PER_GROUP_MAX; i += inc, ptbl += inc) -+ ptbl->ptbl_next = ptbl + inc; -+ -+ first = ptg->pg_ptbls; /* Determine list of */ -+ last = first + PTBLS_PER_GROUP_MAX - inc; /* ptbls to add to free list */ -+ if (! keep) -+ ptbl = NULL; -+ else -+ { -+ ptbl = first; -+ first = first->ptbl_next; -+ } -+ -+ spin_lock (&dev->Level[level].PtblLock); -+ dev->Level[level].PtblTotal += PTBLS_PER_GROUP_MAX/inc; /* Increment the counts */ -+ dev->Level[level].PtblFreeCount += PTBLS_PER_GROUP_MAX/inc; -+ -+ ELAN3MMU_SET_STAT (num_ptbl_level[level], dev->Level[level].PtblTotal); -+ -+ if (keep) -+ dev->Level[level].PtblFreeCount--; -+ -+ last->ptbl_next = dev->Level[level].PtblFreeList; /* And add to free list */ -+ dev->Level[level].PtblFreeList = first; -+ spin_unlock (&dev->Level[level].PtblLock); -+ -+ spin_lock (&dev->PtblGroupLock); -+ ptg->pg_next = dev->Level[level].PtblGroupList; -+ dev->Level[level].PtblGroupList = ptg; -+ spin_unlock (&dev->PtblGroupLock); -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: returning ptbl %p\n", ptbl); -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_ta_to_ptbl (ELAN3MMU *elan3mmu, ELAN3_PTP *ptp) -+{ -+ E3_Addr ptpa = PTP_TO_PT_PADDR(*ptp); -+ ELAN3_PTBL_GR *pg = ElanGetPtblGr (elan3mmu->elan3mmu_dev, (sdramaddr_t)ptpa & ~(PTBL_GROUP_SIZE-1)); -+ -+ return (pg->pg_ptbls + ((ptpa - pg->pg_addr) >> ELAN3_PT_SHIFT)); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_lXptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ if (dev->Level[PTBL_LEVEL_X].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_X].PtblFreeList; -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: found ptbl %p on free list\n", ptbl); -+ -+ dev->Level[PTBL_LEVEL_X].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_X].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_X, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: created level X ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_lXptbl: not allowed to steal ptbl for use at level 2\n"); -+ return NULL; -+ } -+ -+ ELAN3MMU_STAT(lX_alloc_l3); -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: stolen level3 ptbl %p used as level 2\n", ptbl); -+ } -+ -+ ptbl->ptbl_elan3mmu = elan3mmu; -+ ptbl->ptbl_base = 0; -+ ptbl->ptbl_parent = 0; -+ ptbl->ptbl_flags = PTBL_LEVEL_X | PTBL_ALLOCED; -+ -+ HAT_PRINTF2 (2, "elan3mmu_alloc_lXptbl: ptbl %p dev %p\n", ptbl, dev); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zero_sdarm (dev, PTBL_TO_PTADDR(ptbl), ELAN3_LX_ENTRIES*ELAN3_PTE_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, int *idx) -+{ -+ ELAN3_PTBL * ptbl_ptr; -+ int index; -+ -+ /* lock whilst looking for space */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* walk the lXptbl list */ -+ ptbl_ptr = elan3mmu->elan3mmu_lXptbl; -+ while ( ptbl_ptr != NULL ) -+ { -+ /* does this ptlb have any free ones */ -+ if ( (index = ptbl_ptr->ptbl_valid) < ELAN3_LX_ENTRIES) -+ { -+ /* better to search from valid count as its likly to be free */ -+ index = ptbl_ptr->ptbl_valid; -+ do { -+ if ((ptbl_ptr->ptbl_base & (1 << index)) == 0) -+ goto found; -+ -+ /* move index on and wrap back to start if needed */ -+ if ((++index) == ELAN3_LX_ENTRIES) -+ index = 0; -+ } while (index != ptbl_ptr->ptbl_valid); -+ -+ panic ("elan3mmu_alloc_pte: has ptbl valid < 32 when but no free pte's"); -+ } -+ ptbl_ptr = ptbl_ptr->ptbl_parent; -+ } -+ -+ /* unlock so we can create space */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* if create some more */ -+ ptbl_ptr = elan3mmu_alloc_lXptbl(dev, 0, elan3mmu); -+ -+ /* get the lock again */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* add to front of list as its obviously got free ones on it */ -+ ptbl_ptr->ptbl_parent = elan3mmu->elan3mmu_lXptbl; -+ elan3mmu->elan3mmu_lXptbl = ptbl_ptr; -+ -+ /* grap the first one */ -+ index = 0; -+ -+ found: -+ ptbl_ptr->ptbl_base |= (1 << index); -+ ptbl_ptr->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_pte: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl_ptr->ptbl_flags), ptbl_ptr, ptbl_ptr->ptbl_valid); -+ -+ /* release the loc and return it */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ *idx = index; -+ return (ptbl_ptr); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l1ptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i,j; -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ if (dev->Level[PTBL_LEVEL_1].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_1].PtblFreeList; -+ dev->Level[PTBL_LEVEL_1].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_1].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ -+ if (ptbl == NULL) -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_1, attr, 1); -+ -+ if (ptbl == NULL) -+ panic ("elan3mmu_alloc_l1ptbl: cannot alloc ptbl"); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L1; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (j); -+ p->ptbl_flags = PTBL_LEVEL_1 | PTBL_GROUPED; -+ p->ptbl_parent = NULL; -+ -+ j += L1_VA_PER_PTBL; -+ } -+ -+ /* Now mark the real page table as allocated */ -+ /* level 1 ptbls are returned unlocked */ -+ ptbl->ptbl_flags = PTBL_LEVEL_1 | PTBL_ALLOCED; -+ -+ HAT_PRINTF2 (2, "elan3mmu_alloc_l1ptbl: ptbl %p dev %p\n", ptbl, dev); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L1_ENTRIES*ELAN3_PTP_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l2ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, E3_Addr base, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i; -+ int j; -+ unsigned long ptbl_flags; -+ -+ spin_lock_irqsave (&dev->Level[PTBL_LEVEL_2].PtblLock, ptbl_flags); -+ if (dev->Level[PTBL_LEVEL_2].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_2].PtblFreeList; -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: found ptbl %p on free list\n", ptbl); -+ -+ dev->Level[PTBL_LEVEL_2].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_2].PtblFreeCount--; -+ } -+ spin_unlock_irqrestore (&dev->Level[PTBL_LEVEL_2].PtblLock, ptbl_flags); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_2, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: created level 2 ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_l2ptbl: not allowted to steal ptbl for use at level 2\n"); -+ return (NULL); -+ } -+ -+ ELAN3MMU_STAT(l2_alloc_l3); -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: stolen level3 ptbl %p used as level 2\n", ptbl); -+ } -+ -+ *plock = elan3mmu_ptbl_to_lock (PTBL_LEVEL_2, ptbl); -+ spin_lock_irqsave (*plock, *flags); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L2; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (base + j); -+ p->ptbl_flags = PTBL_LEVEL_2 | PTBL_GROUPED; -+ p->ptbl_parent = parent; -+ -+ j += L2_VA_PER_PTBL; -+ } -+ -+ ptbl->ptbl_flags = PTBL_LEVEL_2 | PTBL_ALLOCED | PTBL_LOCKED; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_l2ptbl: ptbl %p dev %p base %x\n", ptbl, dev, base); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zero_sdarm (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L2_ENTRIES*ELAN3_PTP_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l3ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, E3_Addr base, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i; -+ int j; -+ unsigned long ptbl_flags; -+ -+ spin_lock_irqsave (&dev->Level[PTBL_LEVEL_3].PtblLock, ptbl_flags); -+ if (dev->Level[PTBL_LEVEL_3].PtblFreeList) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: found ptbl %p on free list\n", ptbl); -+ -+ ptbl = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount--; -+ } -+ spin_unlock_irqrestore (&dev->Level[PTBL_LEVEL_3].PtblLock, ptbl_flags); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_3, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: created level 3 ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_l3ptbl: not allowed to steal ptbl for use at level 3\n"); -+ return (NULL); -+ } -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: stolen level3 ptbl %p\n", ptbl); -+ } -+ -+ *plock = elan3mmu_ptbl_to_lock (PTBL_LEVEL_3, ptbl); -+ spin_lock_irqsave (*plock,*flags); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L3; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (base + j); -+ p->ptbl_flags = PTBL_LEVEL_3 | PTBL_GROUPED; -+ p->ptbl_parent = parent; -+ -+ j += L3_VA_PER_PTBL; -+ } -+ -+ ptbl->ptbl_flags = PTBL_LEVEL_3 | PTBL_ALLOCED | PTBL_LOCKED; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_l3ptbl: ptbl %p dev %p base %x\n", ptbl, dev, base); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L3_ENTRIES*ELAN3_PTE_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+void -+elan3mmu_free_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl_ptr, int idx) -+{ -+ sdramaddr_t pte = PTBL_TO_PTADDR (ptbl_ptr) | (idx * sizeof (ELAN3_PTE)); -+ ELAN3_PTE tpte = ELAN3_INVALID_PTE; -+ ELAN3_PTBL *prev; -+ -+ /* ensure that the pte is invalid when free */ -+ elan3_writepte (dev, pte, tpte); -+ -+ /* lock whilst removing */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ HAT_PRINTF4 (2, "elan3mmu_free_pte idx %d ptbl_ptr %p ptbl_base %x ptbl_ptr->ptbl_valid %d \n", -+ idx, ptbl_ptr, ptbl_ptr->ptbl_base, ptbl_ptr->ptbl_valid); -+ /* make sure it was set */ -+ ASSERT ( ptbl_ptr->ptbl_base & (1 << idx) ); -+ ASSERT ( ptbl_ptr->ptbl_valid > 0 ); -+ -+ ptbl_ptr->ptbl_base &= ~(1 << idx); -+ ptbl_ptr->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_free_pte: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl_ptr->ptbl_flags), ptbl_ptr, ptbl_ptr->ptbl_valid); -+ -+ /* was that the last one on this page */ -+ if ( ! ptbl_ptr->ptbl_valid ) -+ { -+ /* so no bits should be set then */ -+ ASSERT ( ptbl_ptr->ptbl_base == 0 ); -+ -+ /* is this the first page ?? */ -+ if ( elan3mmu->elan3mmu_lXptbl == ptbl_ptr ) -+ { -+ /* make the list start at the second element */ -+ elan3mmu->elan3mmu_lXptbl = ptbl_ptr->ptbl_parent; -+ -+ /* put ptbl back on free list */ -+ elan3mmu_free_lXptbl(dev, ptbl_ptr); -+ -+ /* unlock and return */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ return ; -+ } -+ -+ /* scan thro list looking for this page */ -+ prev = elan3mmu->elan3mmu_lXptbl; -+ while ( prev->ptbl_parent != NULL ) -+ { -+ if ( prev->ptbl_parent == ptbl_ptr ) /* its the next one */ -+ { -+ /* remove element from chain */ -+ prev->ptbl_parent = ptbl_ptr->ptbl_parent; -+ -+ /* put ptbl back on free list */ -+ elan3mmu_free_lXptbl(dev, ptbl_ptr); -+ -+ /* unlock and return */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ return ; -+ } -+ prev = prev->ptbl_parent; -+ } -+ -+ panic ("elan3mmu_free_pte: failed to find ptbl in chain"); -+ /* NOTREACHED */ -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+} -+ -+void -+elan3mmu_free_lXptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl) -+{ -+ ELAN3_PTBL_GR *ptg; -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_lXptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_X); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ ptbl->ptbl_flags = 0; -+ -+ ptg = PTBL_TO_GR(ptbl); -+ -+ if (ptg->pg_level == PTBL_LEVEL_3) -+ { -+ ELAN3MMU_STAT(lX_freed_l3); -+ -+ HAT_PRINTF1 (2, "elan3mmu_free_lXptbl: freeing stolen level 3 ptbl %p\n", ptbl); -+ -+ /* this was really a level 3 ptbl which we had to steal */ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ } -+ else -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_X].PtblFreeList; -+ dev->Level[PTBL_LEVEL_X].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_X].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ } -+} -+ -+void -+elan3mmu_free_l1ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ HAT_PRINTF3 (2, "elan3mmu_free_l1ptbl: dev %p ptbl %p ptbl->ptbl_valid %x \n", dev, ptbl, ptbl->ptbl_valid); -+ -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_1); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l1ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_1].PtblFreeList; -+ dev->Level[PTBL_LEVEL_1].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_1].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_free_l2ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ELAN3_PTBL_GR *ptg; -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l2ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ASSERT (PTBL_IS_LOCKED(ptbl->ptbl_flags)); -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ ptg = PTBL_TO_GR(ptbl); -+ -+ if (ptg->pg_level == PTBL_LEVEL_3) -+ { -+ ELAN3MMU_STAT(l2_freed_l3); -+ -+ HAT_PRINTF1 (2, "elan3mmu_free_l2ptbl: freeing stolen level 3 ptbl %p\n", ptbl); -+ -+ /* this was really a level 3 ptbl which we had to steal */ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ } -+ else -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_2].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_2].PtblFreeList; -+ dev->Level[PTBL_LEVEL_2].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_2].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_2].PtblLock); -+ } -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_free_l3ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ASSERT (PTBL_IS_LOCKED(ptbl->ptbl_flags)); -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l3ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) /* if the ptbl has been used by the kernel */ -+ { /* then zero all the pte's, since they will */ -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L3_ENTRIES*ELAN3_PTE_SIZE); -+ } -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_kernel_l3ptbl (ELAN3_PTBL *ptbl) -+{ -+ ELAN3_DEV *dev = ptbl->ptbl_elan3mmu->elan3mmu_dev; -+ sdramaddr_t pte = PTBL_TO_PTADDR(ptbl); -+ ELAN3_PTE tpte = elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu); -+ int i; -+ -+ ptbl->ptbl_flags |= PTBL_KERNEL; -+ for (i = 0; i < ELAN3_L3_ENTRIES; i++, pte += ELAN3_PTE_SIZE) -+ { -+ elan3_writepte (dev, pte, tpte); -+ } -+} -+ -+#define PTBL_CAN_STEAL(flag) (((flag) & (PTBL_KERNEL|PTBL_KEEP)) == 0 && (((flag) & PTBL_ALLOCED) && PTBL_LEVEL(flag) == PTBL_LEVEL_3)) -+#define PTBL_MAY_STEAL(flag) (((flag) & (PTBL_KERNEL|PTBL_KEEP|PTBL_LOCKED)) == 0 && (((flag) & PTBL_ALLOCED) && PTBL_LEVEL(flag) == PTBL_LEVEL_3)) -+ -+static int -+elan3mmu_steal_this_ptbl (ELAN3_DEV *dev, ELAN3_PTBL *l3ptbl) -+{ -+ ELAN3_PTBL *l2ptbl = l3ptbl->ptbl_parent; -+ E3_Addr l2addr = BASE2VA(l2ptbl); -+ E3_Addr l3addr = BASE2VA(l3ptbl); -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ sdramaddr_t l2ptp; -+ spinlock_t *l2lock; -+ unsigned long l2flags; -+ -+ HAT_PRINTF5 (1, "elan3mmu_steal_this_ptbl: l3ptbl %p (%x) l2ptbl %p (%x) l2addr %x\n", -+ l3ptbl, l3ptbl->ptbl_flags, l2ptbl, l2ptbl->ptbl_flags, l2addr); -+ -+ if (PTBL_CAN_STEAL (l3ptbl->ptbl_flags) && -+ elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_NOWAIT, l3ptbl->ptbl_elan3mmu, l2addr, PTBL_LEVEL_2, &l2lock, &l2flags) == LK_PTBL_OK) -+ { -+ ELAN3MMU_STAT(stolen_ptbls); -+ -+ /* Locked both L3 and L2 page tables. */ -+ l2ptp = PTBL_TO_PTADDR (l2ptbl) + ELAN3_L2_INDEX(l3addr)*ELAN3_PTP_SIZE; -+ -+ /* detach the level 3 page table */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ ElanFlushTlb (dev); -+ -+ l2ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_steal_this_ptbl: dec valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_unload_loop (l3ptbl->ptbl_elan3mmu, l3ptbl, 0, ELAN3_L3_ENTRIES, PTE_UNLOAD_NOFLUSH); -+ -+ ASSERT (l3ptbl->ptbl_valid == 0); -+ -+ l3ptbl->ptbl_flags = 0; -+ return (1); -+ } -+ return (0); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_steal_l3ptbl (ELAN3_DEV *dev, int attr) -+{ -+ ELAN3_PTBL_GR *ptg; -+ ELAN3_PTBL *ptbl; -+ spinlock_t *lock; -+ unsigned long group_flags; -+ unsigned long ptbl_flags; -+ register int i; -+ -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: attr %x\n", attr); -+ -+ spin_lock_irqsave (&dev->PtblGroupLock, group_flags); -+ -+ ptg = dev->Level3PtblGroupHand; -+ -+ if (ptg == NULL) -+ ptg = dev->Level[PTBL_LEVEL_3].PtblGroupList; -+ -+ for (;;) -+ { -+ while (ptg) -+ { -+ for (i = 0, ptbl = ptg->pg_ptbls; i < PTBLS_PER_GROUP_MAX; i++, ptbl++) -+ { -+ if (PTBL_MAY_STEAL (ptbl->ptbl_flags) && -+ elan3mmu_lock_this_ptbl (ptbl, LK_PTBL_NOWAIT, &lock, &ptbl_flags) == LK_PTBL_OK) -+ { -+ if (elan3mmu_steal_this_ptbl (dev, ptbl )) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: stolen ptbl %p\n", ptbl); -+ -+ elan3mmu_unlock_ptbl (ptbl, lock,ptbl_flags); -+ -+ dev->Level3PtblGroupHand = ptg->pg_next; -+ -+ spin_unlock_irqrestore (&dev->PtblGroupLock, group_flags); -+ -+ return (ptbl); -+ } -+ elan3mmu_unlock_ptbl (ptbl, lock, ptbl_flags); -+ } -+ } -+ ptg = ptg->pg_next; -+ } -+ -+ if (dev->Level[PTBL_LEVEL_3].PtblFreeList) -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ if (ptbl != NULL) -+ { -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ -+ if (ptbl != NULL) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: found ptbl %p on free list\n", ptbl); -+ break; -+ } -+ } -+ -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_3, attr, 1); -+ -+ if (ptbl != NULL) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: created new ptbl %p\n", ptbl); -+ break; -+ } -+ -+ HAT_PRINTF0 (1, "elan3mmu_steal_l3ptbl: cannot find a ptbl, retrying\n"); -+ ptg = dev->Level[PTBL_LEVEL_3].PtblGroupList; -+ } -+ -+ spin_unlock (&dev->PtblGroupLock); -+ return (ptbl); -+} -+ -+sdramaddr_t -+elan3mmu_ptefind (ELAN3MMU *elan3mmu, E3_Addr addr, int *level, -+ ELAN3_PTBL **pptbl, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ sdramaddr_t l1ptp; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ spinlock_t *l1lock; -+ spinlock_t *l2lock; -+ spinlock_t *l3lock; -+ unsigned long l1flags; -+ unsigned long l2flags; -+ unsigned long l3flags; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptefind: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ *level = 0; -+ -+ if (l1ptbl == NULL) -+ return ((sdramaddr_t) NULL); -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+retryl1: -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptefind: l1ptbl %p l1ptp %lx l1base %x : tl1ptp %x\n", l1ptbl, l1ptp, l1base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ *level = 1; -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return with l1lock */ -+ return (l1ptp); -+ -+ case ELAN3_ET_INVALID: -+ return ((sdramaddr_t) 0); -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_ptefind: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptefind: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptefind: l2ptbl %p l2ptp %lx l2base %x : tl2ptp %x\n", l2ptbl, l2ptp, l2base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ switch (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ *level = 2; -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return with l2lock */ -+ return (l2ptp); -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptefind: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptefind: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ case ELAN3_ET_INVALID: -+ return ((sdramaddr_t) 0); -+ -+ case ELAN3_ET_PTP: -+ break; -+ default: -+ panic ("elan3mmu_ptefind: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptefind: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptefind: l3ptbl %p l3pte %lx\n", l3ptbl, l3pte); -+ -+ switch (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags)) -+ { -+ case LK_PTBL_OK: -+ *level = 3; -+ *plock = l3lock; -+ *pptbl = l3ptbl; -+ *flags = l3flags; -+ -+ return (l3pte); -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptefind: l3 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptefind: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_elan3mmu, l3ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptefind: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ /* NOTREACHED */ -+ return ((sdramaddr_t) 0); -+} -+ -+sdramaddr_t -+elan3mmu_ptp2pte (ELAN3MMU *elan3mmu, sdramaddr_t ptp, int level) -+{ -+ ELAN3_PTP tptp = elan3_readptp (elan3mmu->elan3mmu_dev, ptp); -+ -+ ASSERT (level != 3 && ELAN3_PTP_TYPE(tptp) == ELAN3_ET_PTE); -+ -+ return PTP_TO_PT_PADDR(tptp); -+} -+ -+sdramaddr_t -+elan3mmu_ptealloc (ELAN3MMU *elan3mmu, E3_Addr addr, int level, -+ ELAN3_PTBL **pptbl, spinlock_t **plock, int attr, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ sdramaddr_t l1ptp; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ spinlock_t *l1lock; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ spinlock_t *l2lock; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ E3_Addr l3base; -+ spinlock_t *l3lock; -+ -+ unsigned long l1flags; -+ unsigned long l2flags; -+ unsigned long l3flags; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ if (l1ptbl == NULL) -+ return ((sdramaddr_t) 0); -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+retryl1: -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF5 (2, "elan3mmu_ptealloc: l1ptbl %p 1ptp %lx l1base %x (%x) : tl1ptp %x\n", -+ l1ptbl, l1ptp, l1base, l1ptbl->ptbl_base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ if (level == PTBL_LEVEL_1) -+ { -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return holding l1lock */ -+ return (l1ptp); -+ } -+ panic ("elan3mmu_ptealloc: found pte in level 1 page table"); -+ /* NOTREACHED */ -+ -+ case ELAN3_ET_PTP: -+ if (level == PTBL_LEVEL_1) -+ panic ("elan3mmu_ptealloc: found PTP when loading a level 1 PTE\n"); -+ break; -+ -+ case ELAN3_ET_INVALID: -+ if (level == PTBL_LEVEL_1) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ /* raced with someone else, whose got there first */ -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ /* drop the l1lock and retry */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ tl1ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return holding l1lock */ -+ return (l1ptp); -+ } -+ -+ if (level == PTBL_LEVEL_2) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if ((l2ptbl = elan3mmu_alloc_l2ptbl (dev, attr, l1ptbl, elan3mmu, ELAN3_L2_BASE(addr), &l2lock, &l2flags)) == NULL) -+ { -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ return ((sdramaddr_t) 0); -+ } -+ -+ /* Connect l2ptbl to the new LX pte */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr) * ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ /* Now need to lock the l1 ptbl */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l2 ptbl/lx pte\n"); -+ -+ tl2ptp = ELAN3_INVALID_PTP; -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ elan3mmu_free_l2ptbl (dev, l2ptbl, l2lock, l2flags); -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ -+ goto retryl1; -+ } -+ -+ /* Now have L1 locked, so install the L2 ptbl */ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl1ptp = PTBL_TO_PTADDR(l2ptbl) | ELAN3_ET_PTP; -+ l1ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l1ptp %lx to %x\n", l1ptp, tl1ptp); -+ -+ /* unordered unlock - lock l1ptbl, lock l2ptbl, unlock l1ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l2flags); /* need to unlock with the l2flags to keep irq order correct */ -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l1flags; /* return the l1flags here as we have released the l2flags already to keep order */ -+ -+ /* return holding l2lock */ -+ return (l2ptp); -+ } -+ -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: allocating level 2 and level 3 page tables\n"); -+ -+ /* Allocate a level 2 and level 3 page table and link them together */ -+ if ((l2ptbl = elan3mmu_alloc_l2ptbl (dev, attr, l1ptbl, elan3mmu, ELAN3_L2_BASE(addr), &l2lock, &l2flags)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if ((l3ptbl = elan3mmu_alloc_l3ptbl (dev, attr | PTE_NO_SLEEP, l2ptbl, elan3mmu, ELAN3_L3_BASE(addr), &l3lock, &l3flags)) == NULL) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ return ((sdramaddr_t) 0); -+ } -+ -+ ASSERT (PTBL_IS_LOCKED (l2ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: l2ptbl %p (%x,%x) l3ptbl %p (%x,%x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_base, -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_base); -+ -+ if (CTXT_IS_KERNEL (elan3mmu->elan3mmu_ctxt)) -+ { -+ l2ptbl->ptbl_flags |= PTBL_KERNEL; -+ elan3mmu_kernel_l3ptbl (l3ptbl); -+ } -+ -+ /* -+ * Connect L3 ptbl to the new L2 ptbl. -+ */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr) * ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(l3ptbl) | ELAN3_ET_PTP; -+ -+ l2ptbl->ptbl_valid = 1; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: set valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ /* -+ * Now need to lock the l1 ptbl - to maintain lock ordering -+ * we set the PTBL_KEEP bit to stop the l3 ptbl from being -+ * stolen and drop the locks in the order we aquired them -+ */ -+ l3ptbl->ptbl_flags |= PTBL_KEEP; -+ -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ -+ /* Now have l1 and l3 ptbls locked, so install the new l2 ptbl into the l1. */ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: l1ptp %lx is %x\n", l1ptp, tl1ptp); -+ -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l2/l3 ptbls\n"); -+ -+ /* free off the level 3 page table */ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l3 ptbl %p (%x)\n", l3ptbl, l3ptbl->ptbl_flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ /* and unlock the level 1 ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ -+ /* lock the level 2 page table, and clear out the PTP, then free it */ -+ (void) elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: locked l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ tl2ptp = ELAN3_INVALID_PTP; -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ l2ptbl->ptbl_valid = 0; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: set to 0 valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ elan3mmu_free_l2ptbl (dev, l2ptbl, l2lock, l2flags); -+ -+ goto retryl1; -+ } -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptealloc: l1ptbl is %p (%x), l3ptbl is %p (%x)\n", -+ l1ptbl, l1ptbl->ptbl_flags, l3ptbl, l3ptbl->ptbl_flags); -+ -+ /* Now have L1 and L3 locked, so install the L2 ptbl */ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl1ptp = PTBL_TO_PTADDR(l2ptbl) | ELAN3_ET_PTP; -+ l1ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l1ptp %lx to %x\n", l1ptp, tl1ptp); -+ -+ /* unordered unlock - lock l1ptbl, lock l3ptbl, unlock l1ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l3flags); /* free using l3flags to keep irq ordering */ -+ -+ l3pte = PTBL_TO_PTADDR (l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ /* Level 3 ptbl is already locked, so just return the pte */ -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l1flags; /* return l1flags to keep irq ordering */ -+ -+ return (l3pte); -+ -+ default: -+ panic ("elan3mmu_ptealloc: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptealloc: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF5 (2, "elan3mmu_ptealloc: l2ptbl %p l2ptp %lx l2base %x (%x) : tl2ptp %x\n", -+ l2ptbl, l2ptp, l2base, l2ptbl->ptbl_base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ if (level == PTBL_LEVEL_2) { -+ /* this is a pointer to a pte, we should just return it */ -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptealloc: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptealloc: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return holdind l2lock */ -+ return (l2ptp); -+ } -+ panic ("elan3mmu: found pte in level 2 page table"); -+ /* NOTREACHED */ -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ case ELAN3_ET_INVALID: -+ if (level == PTBL_LEVEL_2) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptealloc: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptealloc: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free lx pte\n"); -+ -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ /* Connect l2ptbl to the new LX pte */ -+ tl2ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l2ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return holding l2lock */ -+ return (l2ptp); -+ } -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: allocate level 3 page table\n"); -+ -+ if ((l3ptbl = elan3mmu_alloc_l3ptbl (dev, attr, l2ptbl, elan3mmu, ELAN3_L3_BASE(addr), &l3lock, &l3flags)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if (CTXT_IS_KERNEL (elan3mmu->elan3mmu_ctxt)) -+ elan3mmu_kernel_l3ptbl (l3ptbl); -+ -+ /* -+ * Now need to lock the l2 ptbl - to maintain lock ordering -+ * we set the PTBL_KEEP bit to stop the l3 ptbl from being -+ * stolen and drop the locks in the order we aquired them -+ */ -+ l3ptbl->ptbl_flags |= PTBL_KEEP; -+ -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ -+ if (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags) == LK_PTBL_MISMATCH) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: l2ptbl freed, free l3 ptbl and try again\n"); -+ -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ /* free off the level 3 page table, and try again */ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ goto retryl1; -+ } -+ -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ -+ /* Now have L2 and L3 ptbls locked, see if someone has beaten us to it. */ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: l2ptp at %lx is %x\n", l2ptp, tl2ptp); -+ -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l3 ptbl and try again\n"); -+ -+ /* free off the level 3 page table, and try again */ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ /* Someone has allocated the ptbl before us */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ goto retryl1; -+ } -+ -+ ASSERT (PTBL_IS_LOCKED (l2ptbl->ptbl_flags)); -+ -+ /* Install the L3 ptbl into the L2 one */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(l3ptbl) | ELAN3_ET_PTP; -+ l2ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ /* unordered unlock - lock l2ptbl, lock l3ptbl, unlock l2ptbl */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l3flags); /* free with the l3flags to keep irq ordering */ -+ -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ /* Level 3 ptbl is already locked, so just return the pte */ -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l2flags; /* return l2flags to keep irq ordering */ -+ -+ return (l3pte); -+ -+ default: -+ panic ("elan3mmu_ptealloc: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptealloc: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ l3base = ELAN3_L3_BASE(addr); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptealloc: l3ptbl %p 3pte %lx l3base %x (%x)\n", -+ l3ptbl, l3pte, l3base, l3ptbl->ptbl_base); -+ -+ if (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags) == LK_PTBL_OK) -+ { -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l3flags; -+ -+ return (l3pte); -+ } -+ -+ /* got all the way down here, but its been nicked before we could lock it */ -+ /* so try all over again */ -+ goto retryl1; -+} -+ -+void -+elan3mmu_l1inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l1ptbl, int attr) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_PTP tl1ptp; -+ sdramaddr_t l1ptp; -+ E3_Addr addr; -+ spinlock_t *l2lock; -+ ELAN3_PTBL *l2ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ int i; -+ int ret; -+ unsigned long flags; -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl); -+ -+ HAT_PRINTF2 (1, "elan3mmu_l1inval: l1ptbl %p l1ptp %lx\n", l1ptbl, l1ptp); -+ -+ for (i = 0, addr = 0; i < ELAN3_L1_ENTRIES; i++, l1ptp += ELAN3_PTP_SIZE) -+ { -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ idx = (PTP_TO_PT_PADDR(tl1ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: l1ptbl %p : lXptbl %p idx %d\n", -+ l1ptbl, lXptbl, idx); -+ -+ /* invalidate the L1 pte. */ -+ elan3_writeptp (dev, l1ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ l1ptbl->ptbl_valid--; -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ break; -+ -+ case ELAN3_ET_PTP: -+ HAT_PRINTF5 (2, "elan3mmu_l1inval: l1ptbl %p : ptp %lx (%x) addr %x (%d)\n", -+ l1ptbl, l1ptp, tl1ptp, addr, i); -+ -+ /* invalidate the L1 ptp. */ -+ elan3_writeptp (dev, l1ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ /* invalidate the level 2 page table */ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ ret = elan3mmu_l2inval (elan3mmu, l2ptbl, attr | PTE_UNLOAD_NOFLUSH, addr, &l2lock, &flags); -+ -+ ASSERT ((l2ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ -+ if (ret == LK_PTBL_OK) -+ { -+ if (((l2ptbl->ptbl_flags & PTBL_KEEP) == 0) && l2ptbl->ptbl_valid == 0) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_l1inval: free l2ptbl %p\n", l2ptbl); -+ -+ l1ptbl->ptbl_valid--; -+ elan3mmu_free_l2ptbl (elan3mmu->elan3mmu_dev, l2ptbl, l2lock, flags); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ } -+ else -+ { -+ /* need to keep this page table, so even though its now empty, */ -+ /* chain it back in */ -+ HAT_PRINTF1 (2, "elan3mmu_l1inval: keep l2ptbl %p\n", l2ptbl); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, flags); -+ } -+ } -+ else -+ { -+ l1ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ } -+ break; -+ -+ case ELAN3_ET_INVALID: -+ break; -+ -+ default: -+ panic ("elan3mmu_l1inval: found invalid entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ if (l1ptbl->ptbl_valid == 0) -+ break; -+ -+ addr += ELAN3_L1_SIZE; -+ } -+} -+ -+int -+elan3mmu_l2inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l2ptbl, int attr, E3_Addr addr, spinlock_t **pl2lock, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_PTP tl2ptp; -+ sdramaddr_t l2ptp; -+ spinlock_t *l3lock; -+ unsigned long l3flags; -+ ELAN3_PTBL *l3ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ int i; -+ int ret; -+ -+ HAT_PRINTF2 (1, "elan3mmu_l2inval: l2ptbl %p addr %x\n", l2ptbl, addr); -+ -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_parent->ptbl_flags) == PTBL_LEVEL_1); -+ -+ ret = elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, pl2lock, flags); -+ -+ ASSERT (ret == LK_PTBL_OK); -+ ASSERT (l2ptbl->ptbl_elan3mmu == elan3mmu); -+ ASSERT (l2ptbl->ptbl_parent->ptbl_elan3mmu == elan3mmu); -+ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl); -+ -+ for (i = 0; i < ELAN3_L2_ENTRIES; i++, l2ptp += ELAN3_PTP_SIZE) -+ { -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ idx = (PTP_TO_PT_PADDR(tl2ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: l2ptbl %p : lXptbl %p idx %d\n", -+ l2ptbl, lXptbl, idx); -+ -+ /* invalidate the L2 pte. */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ l2ptbl->ptbl_valid--; -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ break; -+ -+ case ELAN3_ET_PTP: -+ HAT_PRINTF5 (2, "elan3mmu_l2inval: l2ptbl %p : ptp %lx (%x) addr %x (%d)\n", -+ l2ptbl, l2ptp, tl2ptp, addr, i); -+ -+ /* invalidate the L2 ptp. */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ /* unload the level 3 page table */ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ ret = elan3mmu_l3inval (elan3mmu, l3ptbl, attr | PTE_UNLOAD_NOFLUSH, addr, &l3lock, &l3flags); -+ -+ if (ret == LK_PTBL_OK) -+ { -+ if ((l3ptbl->ptbl_flags & PTBL_KEEP) == 0 && l3ptbl->ptbl_valid == 0) -+ { -+ /* decrement the valid count of the level 2 page table, and */ -+ /* free off the level 3 page table */ -+ HAT_PRINTF1 (2, "elan3mmu_l2inval: free l3ptbl %p\n", l3ptbl); -+ -+ l2ptbl->ptbl_valid--; -+ elan3mmu_free_l3ptbl (elan3mmu->elan3mmu_dev, l3ptbl, l3lock, l3flags); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ } -+ else -+ { -+ /* need to keep this page table, so even though its now empty, */ -+ /* chain it back in */ -+ HAT_PRINTF1 (2, "elan3mmu_l2inval: keep l3ptbl %p\n", l3ptbl); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ } -+ } -+ else -+ { -+ l2ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ } -+ break; -+ -+ case ELAN3_ET_INVALID: -+ break; -+ -+ default: -+ panic ("elan3mmu_l2inval: found pte in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ if (l2ptbl->ptbl_valid == 0) -+ break; -+ -+ addr += ELAN3_L2_SIZE; -+ } -+ -+ ASSERT (PTBL_IS_LOCKED(l2ptbl->ptbl_flags)); -+ -+ return (ret); -+} -+ -+int -+elan3mmu_l3inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l3ptbl, int attr, E3_Addr addr, spinlock_t **pl3lock, unsigned long *flags) -+{ -+ int ret; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l3inval: l3ptbl %p parent %p addr %x\n", l3ptbl, l3ptbl->ptbl_parent, addr); -+ -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_parent->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_parent->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (l3ptbl->ptbl_parent->ptbl_elan3mmu == elan3mmu); -+ ASSERT (l3ptbl->ptbl_parent->ptbl_base == VA2BASE (ELAN3_L2_BASE(addr))); -+ -+ ret = elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, pl3lock, flags); -+ -+ ASSERT (ret == LK_PTBL_OK); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ elan3mmu_unload_loop (elan3mmu, l3ptbl, 0, ELAN3_L3_ENTRIES, attr); -+ -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_flags)); -+ -+ return (ret); -+ } -+ -+int -+elan3mmu_lock_this_ptbl (ELAN3_PTBL *ptbl, int flag, spinlock_t **plock, unsigned long *flags) -+{ -+ int level = PTBL_LEVEL (ptbl->ptbl_flags); -+ spinlock_t *lock = elan3mmu_ptbl_to_lock (level, ptbl); -+ -+ local_irq_save (*flags); -+ -+ if ((flag & LK_PTBL_NOWAIT) == 0) -+ spin_lock (lock); -+ else if (! spin_trylock (lock)) { -+ local_irq_restore (*flags); -+ return (LK_PTBL_FAILED); -+ } -+ -+ if (level != PTBL_LEVEL (ptbl->ptbl_flags)) -+ { -+ spin_unlock (lock); -+ local_irq_restore (*flags); -+ return (LK_PTBL_MISMATCH); -+ } -+ -+ ptbl->ptbl_flags |= PTBL_LOCKED; -+ *plock = lock; -+ return (LK_PTBL_OK); -+} -+ -+int -+elan3mmu_lock_ptbl (ELAN3_PTBL *ptbl, u_int flag, ELAN3MMU *elan3mmu, E3_Addr va, int level, spinlock_t **plock, unsigned long *flags) -+{ -+ spinlock_t *lock = elan3mmu_ptbl_to_lock (level, ptbl); -+ int res = LK_PTBL_MISMATCH; -+ -+ local_irq_save (*flags); -+ -+ if ((flag & LK_PTBL_NOWAIT) == 0) -+ spin_lock (lock); -+ else if (spin_trylock (lock) == 0) { -+ local_irq_restore(*flags); -+ return (LK_PTBL_FAILED); -+ } -+ -+ if (PTBL_LEVEL (ptbl->ptbl_flags) != level) -+ { -+ res = LK_PTBL_MISMATCH; -+ goto mismatch; -+ } -+ -+ /* We have the right mutex, so check that its the ptbl we want. */ -+ switch (level) -+ { -+ case PTBL_LEVEL_1: va = ELAN3_L1_BASE(va); break; -+ case PTBL_LEVEL_2: va = ELAN3_L2_BASE(va); break; -+ case PTBL_LEVEL_3: va = ELAN3_L3_BASE(va); break; -+ } -+ -+ if (ptbl->ptbl_elan3mmu != elan3mmu || ptbl->ptbl_base != VA2BASE(va)) -+ { -+ res = LK_PTBL_MISMATCH; -+ goto mismatch; -+ } -+ -+ ASSERT ((ptbl->ptbl_flags & PTBL_LOCKED) == 0); -+ ptbl->ptbl_flags |= PTBL_LOCKED; -+ -+ *plock = lock; -+ return (LK_PTBL_OK); -+ -+mismatch: -+ if (! (flag & LK_PTBL_FAILOK)) -+ panic ("elan3mmu: failed to lock ptbl\n"); -+ -+ spin_unlock (lock); -+ local_irq_restore(*flags); -+ return (res); -+} -+ -+void -+elan3mmu_unlock_ptbl (ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ptbl->ptbl_flags &= ~PTBL_LOCKED; -+ spin_unlock_irqrestore (lock,flags); -+} -+ -+static spinlock_t * -+elan3mmu_ptbl_to_lock (int level, ELAN3_PTBL *ptbl) -+{ -+ switch (level) -+ { -+ case PTBL_LEVEL_3: return (&l3ptbl_lock[L3PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_2: return (&l2ptbl_lock[L2PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_1: return (&l1ptbl_lock[L1PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_X: -+ panic ("elan3mmu: ptbl_to_lock, bad level X"); -+ default: -+ panic ("elan3mmu: ptbl_to_lock, bad level"); -+ /* NOTREACHED */ -+ } -+ return (NULL); -+} -+ -+void -+elan3mmu_display (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ sdramaddr_t l1ptp; -+ spinlock_t *l1lock; -+ ELAN3_PTE tl1pte; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTE tl2pte; -+ spinlock_t *l2lock; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ ELAN3_PTE tl3pte; -+ spinlock_t *l3lock; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ unsigned long flags; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ -+ if (l1ptbl == NULL) -+ return; -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l1ptbl %p l1ptp %lx l1base %x : tl1ptp %x\n", l1ptbl, l1ptp, l1base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: level 1 page table for pte %x\n", tl1ptp); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ idx = (PTP_TO_PT_PADDR(tl1ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lXptbl %p idx %d\n",lXptbl, idx); -+ -+ tl1pte = elan3_readpte (dev,(PTBL_TO_PTADDR (lXptbl) + idx * ELAN3_PTE_SIZE)); -+ -+ switch (elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lvl 1 l1pte matches value %llx\n", (long long) tl1pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l1 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : lvl 1 ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l1ptbl, l1ptbl->ptbl_flags, l1ptbl->ptbl_elan3mmu, l1ptbl->ptbl_base, elan3mmu, addr, (long long)tl1pte); -+ -+ break; -+ default: -+ panic ("elan3mmu_display: lvl 1 elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ return; -+ -+ case ELAN3_ET_INVALID: -+ return; -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_display: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l2ptbl %p l2ptp %lx l2base %x : tl2ptp %x\n", -+ l2ptbl, l2ptp, l2base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: level 2 page table for pte %x\n", tl2ptp); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ idx = (PTP_TO_PT_PADDR(tl2ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lXptbl %p idx %d\n",lXptbl, idx); -+ -+ tl2pte = elan3_readpte (dev,(PTBL_TO_PTADDR (lXptbl) + idx * ELAN3_PTE_SIZE)); -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lvl 2 l1pte matches value %llx\n", (long long)tl2pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : lvl 2 ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr, (long long) tl2pte); -+ -+ break; -+ default: -+ panic ("elan3mmu_display: lvl 2 elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ return; -+ -+ case ELAN3_ET_INVALID: -+ return; -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_display: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l3ptbl %p l3pte %lx\n",l3ptbl, l3pte); -+ -+ tl3pte = elan3_readpte (dev, l3pte); -+ switch (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l3pte matches value %llx\n", (long long) tl3pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l3 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_elan3mmu, l3ptbl->ptbl_base, elan3mmu, addr, (long long) tl3pte); -+ -+ break; -+ -+ default: -+ panic ("elan3mmu_display: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/elan3mmu_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/elan3mmu_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/elan3mmu_linux.c 2005-06-01 23:12:54.574442904 -0400 -@@ -0,0 +1,284 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3mmu_linux.c,v 1.50.2.3 2004/12/14 10:19:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/vm/elan3mmu_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * Strategy for syncing main <-> elan pte's: -+ * -+ * Install callbacks for linux flush_tlb_page(), flush_tlb_range(), -+ * flush_tlb_all(), and flush_tlb_mm() so when a main PTE changes, -+ * the elan translations, if any, are invalidated. They can then be -+ * faulted in again with the correct physical page, perms, etc., on demand. -+ * -+ * Callbacks are stacked on the mm_struct, one per context. We also stack -+ * a ctxt pointer so we don't have to do lookups on every call. -+ * -+ * Sanity check -- we clearly want to flush the elan PTEs in these -+ * situations, all of which are covered by tlb_flush_{page,range}() -+ * -+ * 1) kernel/vmscan.c::try_to_swap_out() swaps out a page -+ * -+ * 2) kernel/mremap.c::copy_one_pte() moves a page as a result of the -+ * mremap system call -+ * -+ * 3) kernel/mprotect.c::change_pte_range() changes the permissions of a -+ * page as the result of the mprotect system call -+ * -+ * Other Notes: -+ * -+ * Dirty a page in the mains page tables when it is faulted into the elan. -+ * This way it will not be thrown away by the swapper. -+ * -+ * Pages write protected for COW are copied by elan3mmu_main_pagefault() -+ * when a writeable translation is loaded into the elan. -+ */ -+ -+caddr_t elan3mmu_kernel_invalid_space; -+ELAN3_PTE elan3mmu_kernel_invalid_pte_val; -+ -+void -+elan3mmu_init_osdep (void) -+{ -+ pte_t *pte; -+ -+ KMEM_GETPAGES (elan3mmu_kernel_invalid_space, caddr_t, 1, TRUE); -+ -+ ASSERT(elan3mmu_kernel_invalid_space != NULL); -+ -+ pte = find_pte_kernel ((unsigned long) elan3mmu_kernel_invalid_space); -+ -+ elan3mmu_kernel_invalid_pte_val = ELAN3_PTE_64_BIT | (pte_phys(*pte) & ELAN3_PTE_PFN_MASK) | ELAN3_PERM_REMOTEREAD | ELAN3_ET_PTE; -+ -+#ifdef __alpha -+ /* -+ * NOTE: Elan sign-extends bit 48 of the physical address, so if we need to -+ * set any of bits 63:48, then we will set them all by setting bit 48/ -+ */ -+ if (alpha_mv.pci_dac_offset & 0xFFFF000000000000ull) -+ elan3mmu_kernel_invalid_pte_val |= (1ull << 48); -+ else -+ elan3mmu_kernel_invalid_pte_val |= alpha_mv.pci_dac_offset; -+#endif -+ -+ HAT_PRINTF(0x10, "elan3mmu_invalid_space at %p phys=%llx pte=%llx\n", elan3mmu_kernel_invalid_space, -+ (unsigned long long) pte_phys(*pte), (unsigned long long) elan3mmu_kernel_invalid_pte_val); -+} -+ -+void -+elan3mmu_fini_osdep() -+{ -+ KMEM_FREEPAGES (elan3mmu_kernel_invalid_space, 1); -+} -+ -+void -+elan3mmu_alloc_osdep (ELAN3MMU *elan3mmu) -+{ -+ elan3mmu->elan3mmu_coproc_mm = current->mm; -+} -+ -+/* -+ * Convert physical page frame number to elan pte. -+ */ -+ELAN3_PTE -+elan3mmu_phys_to_pte (ELAN3_DEV *dev, physaddr_t paddr, int perm) -+{ -+ ELAN3_PTE newpte; -+ -+ ASSERT (paddr != 0); -+ -+ if ((paddr & dev->SdramPhysMask) == dev->SdramPhysBase) /* SDRAM, turn on PTE_LOCAL bit */ -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx SDRAM\n", (unsigned long long) paddr); -+ -+ newpte = ELAN3_PTE_LOCAL | (paddr & ELAN3_PTE_PFN_MASK & ~dev->SdramPhysMask) | perm | ELAN3_ET_PTE; -+ } -+#if defined(LINUX_ALPHA) -+ else if ((paddr & dev->PciPhysMask) == dev->PciPhysBase) -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx PCI\n", (unsigned long long) paddr); -+ newpte = ELAN3_PTE_64_BIT | (paddr & ELAN3_PTE_PFN_MASK & ~dev->PciPhysMask) | perm | ELAN3_ET_PTE; -+ } -+#endif -+ else /* main memory, must convert to PCI view */ -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx is main memory\n", (unsigned long long) paddr); -+ -+ /* main memory, just set the architecture specific PTE_BYPASS bit */ -+ /* This requires the Tsunami chipset being programmed to support -+ * the monster window option. This is in linux-2.4.5 and later kernels -+ * and is also patched into the RH 7.1/2.4.3-12 Alpha kernel -+ */ -+ newpte = ELAN3_PTE_64_BIT | (paddr & ELAN3_PTE_PFN_MASK) | perm | ELAN3_ET_PTE; -+ -+#ifdef __alpha -+ /* -+ * NOTE: Elan sign-extends bit 48 of the physical address, so if we need to -+ * set any of bits 63:48, then we will set them all by setting bit 48/ -+ */ -+ if (alpha_mv.pci_dac_offset & 0xFFFF000000000000ull) -+ newpte |= (1ull << 48); -+ else -+ newpte |= alpha_mv.pci_dac_offset; -+#endif -+ } -+ -+ if ( ELAN3_PERM_WRITEABLE( perm )) -+ newpte |= ( ELAN3_PTE_MOD | ELAN3_PTE_REF ); -+ else -+ newpte |= ( ELAN3_PTE_REF ) ; -+ -+ return (newpte); -+} -+ -+ELAN3_PTE -+elan3mmu_kernel_invalid_pte (ELAN3MMU *elan3mmu) -+{ -+ if (elan3mmu->elan3mmu_dev->Devinfo.dev_revision_id == PCI_REVISION_ID_ELAN3_REVB) -+ return (elan3mmu_kernel_invalid_pte_val); -+ return (ELAN3_INVALID_PTE); -+} -+ -+/* -+ * Invalidate a range of addresses for specified context. -+ */ -+void -+elan3mmu_pte_range_unload (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len) -+{ -+ E3_Addr eaddr; -+ ELAN3MMU_RGN *rgn; -+ unsigned long span; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (; len; len -= span, addr += span) -+ { -+ rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < addr) -+ span = len; -+ else if (rgn->rgn_mbase > addr) -+ span = MIN(len, rgn->rgn_mbase - addr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - addr); -+ eaddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ HAT_PRINTF(0x10, " unloading eaddr %x main %p (%ld pages)\n", -+ eaddr, addr, btopr(span)); -+ elan3mmu_unload (elan3mmu, eaddr, span, PTE_UNLOAD); -+ } /* takes care of elan tlb flush also */ -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/* -+ * -+ */ -+void -+elan3mmu_update_range (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t vaddr, E3_Addr eaddr, u_int len, u_int perm) -+{ -+ u_int roperm = ELAN3_PERM_READONLY(perm & ELAN3_PTE_PERM_MASK) | (perm & ~ELAN3_PTE_PERM_MASK); -+ u_int off; -+ -+ HAT_PRINTF3(1, "elan3mmu_update_range (elan3mmu %p addr %p -> %p)\n", elan3mmu, vaddr, vaddr+len-1); -+ -+ while (len > 0) -+ { -+ pte_t *pte_ptr; -+ pte_t pte_value; -+ -+ pte_ptr = find_pte_map(mm, (unsigned long)vaddr); -+ if (pte_ptr) { -+ pte_value = *pte_ptr; -+ pte_unmap(pte_ptr); -+ } -+ -+ HAT_PRINTF(0x10, " elan3mmu_update_range %x (%p) %s\n", eaddr, vaddr, -+ !pte_ptr ? "invalid" : pte_none(pte_value) ? "none " : !pte_present(pte_value) ? "swapped " : -+ !pte_write(pte_value) ? "RO/COW" : "OK"); -+ -+ if (pte_ptr && !pte_none(pte_value) && pte_present(pte_value)) -+ for (off = 0; off < PAGE_SIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, eaddr + off, pte_phys(pte_value) + off, pte_write(pte_value) ? perm : roperm, PTE_LOAD|PTE_NO_SLEEP|PTE_NO_STEAL); -+ vaddr += PAGESIZE; -+ eaddr += PAGESIZE; -+ len -= PAGESIZE; -+ } -+} -+ -+/* -+ * Update a range of addresses for specified context. -+ */ -+void -+elan3mmu_pte_range_update (ELAN3MMU *elan3mmu, struct mm_struct *mm,caddr_t vaddr, unsigned long len) -+{ -+ E3_Addr eaddr; -+ ELAN3MMU_RGN *rgn; -+ unsigned long span; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (; len; len -= span, vaddr += span) -+ { -+ rgn = elan3mmu_findrgn_main (elan3mmu, vaddr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < vaddr) -+ span = len; -+ else if (rgn->rgn_mbase > vaddr) -+ span = MIN(len, rgn->rgn_mbase - vaddr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - vaddr); -+ eaddr = rgn->rgn_ebase + (vaddr - rgn->rgn_mbase); -+ -+ HAT_PRINTF(0x10, " updating eaddr %u main %p (%ld pages)\n", -+ eaddr, vaddr, btopr(span)); -+ -+ elan3mmu_update_range(elan3mmu, mm, vaddr, eaddr, span, rgn->rgn_perm); -+ } -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/* -+ * Invalidate all ptes for the given context. -+ */ -+void -+elan3mmu_pte_ctxt_unload(ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *l1ptbl = (elan3mmu ? elan3mmu->elan3mmu_l1ptbl : NULL); -+ spinlock_t *l1mtx; -+ unsigned long flags; -+ -+ if (l1ptbl && elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, (E3_Addr) 0, 1, &l1mtx, &flags) == LK_PTBL_OK) -+ { -+ elan3mmu_l1inval(elan3mmu, elan3mmu->elan3mmu_l1ptbl, 0); -+ elan3mmu_unlock_ptbl (l1ptbl, l1mtx, flags); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/elan3ops.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/elan3ops.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/elan3ops.c 2005-06-01 23:12:54.575442752 -0400 -@@ -0,0 +1,170 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3ops.c,v 1.4 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elan3ops.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+extern ELAN_STATS_OPS elan3_device_stats_ops; -+ -+ELAN_DEV_OPS elan3_dev_ops = { -+ -+ get_position, -+ set_position, -+ -+ ELAN_DEV_OPS_VERSION -+}; -+ -+ELAN_STATS_OPS elan3_device_stats_ops = { -+ ELAN_STATS_OPS_VERSION, -+ -+ stats_get_index_name, -+ stats_get_block, -+ stats_clear_block -+}; -+ -+static char *elan3_device_stats_names[ELAN3_NUM_STATS] = -+{ -+ "version field", /* not cleared */ -+ "elan interrupts", -+ "tlb flushes", -+ "traps with invalid context", -+ "interrupts com queue half full", -+ "cproc traps", -+ "dproc traps", -+ "tproc traps", -+ "iproc traps", -+ "event interrupts", -+ "elan page faults", -+ "EopBadAcks", -+ "EopResets", -+ "InputterBadLength", -+ "InputterCRCDiscards", -+ "InputterCRCErrors", -+ "InputterCRCBad", -+ "errors in dma data", -+ "errors after dma identify", -+ "errors after thread identify", -+ "dma retries", -+ "dma output timeouts", -+ "dma packet ack errors", -+ "forced tproc traps", -+ "too many instruction traps", -+ "output timeouts", -+ "packet ack errors", -+ "LockError", -+ "DeskewError", -+ "PhaseError", -+ "DataError", -+ "FifoOvFlow0", -+ "FifoOvFlow1", -+ "link error value on data error", -+ "correctable ecc errors", -+ "uncorrectable ecc errors", -+ "multiple ecc errors", -+ "sdram bytes free", /* not cleared */ -+ "longest interrupt in ticks", -+ "punts of event int's to thread", -+ "reschedules of event int's thread" -+}; -+ -+int -+stats_get_index_name (void *arg, uint index, caddr_t name) -+{ -+ copyout (elan3_device_stats_names[index], name, strlen (elan3_device_stats_names[index]) + 1 /* with \0 */); -+ -+ return (0); -+} -+ -+int -+stats_get_block (void *arg, uint entries, ulong *value) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ -+ if ( entries > ELAN3_NUM_STATS ) /* if space too big only send valid portion */ -+ entries = ELAN3_NUM_STATS; -+ -+ copyout(&dev->Stats, value, sizeof(ulong) * entries); -+ -+ return (0); -+} -+ -+int -+stats_clear_block (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ u_long *ptr = (u_long *) &dev->Stats; -+ int n; -+ -+ for (n = 0; n < ELAN3_NUM_STATS; n++) -+ { -+ switch (n) -+ { -+ case offsetof (ELAN3_STATS, Version)/sizeof(u_long): -+ case offsetof (ELAN3_STATS, SdramBytesFree)/sizeof(u_long): -+ break; -+ default: -+ ptr[n] = (ulong)0; -+ } -+ } -+ return (0); -+} -+ -+int -+get_position (void *user_data, ELAN_POSITION *position) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *)user_data; -+ -+ copyout(&dev->Position, position, sizeof(ELAN_POSITION)); -+ -+ return (0); -+} -+ -+int -+set_position (void *user_data, unsigned short nodeId, unsigned short numNodes) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *)user_data; -+ -+ if (ComputePosition (&dev->Position, nodeId, numNodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ return (EINVAL); -+ -+ return (0); -+} -+ -+int -+elan3_register_dev_stats(ELAN3_DEV * dev) -+{ -+ char name[ELAN_STATS_NAME_MAX_LEN+1]; -+ -+ sprintf (name, ELAN3_STATS_DEV_FMT, dev->Instance); -+ -+ elan_stats_register(&dev->StatsIndex, -+ name, -+ sizeof (elan3_device_stats_names)/sizeof (elan3_device_stats_names[0]), -+ &elan3_device_stats_ops, -+ (void *)dev); -+ -+ return (0); -+} -+ -+void -+elan3_deregister_dev_stats(ELAN3_DEV * dev) -+{ -+ elan_stats_deregister(dev->StatsIndex); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/elandebug.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/elandebug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/elandebug.c 2005-06-01 23:12:54.575442752 -0400 -@@ -0,0 +1,151 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elandebug.c,v 1.25 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandebug.c,v $*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+void -+elan3_debugf (void *p, unsigned int mode, char *fmt,...) -+{ -+ char prefix[128]; -+ -+#if defined (DIGITAL_UNIX) -+#define PREFIX_FMT "[%lx.%08x]" -+#define PREFIX_VAL (int)CURTHREAD() -+#else -+#define PREFIX_FMT "[%lx.%04d]" -+#define PREFIX_VAL (current->pid) -+#endif -+ -+ if ((unsigned long) p > DBG_NTYPES) -+ { -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) p; -+ -+ if (elan3_debug_display_ctxt && (ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK) != elan3_debug_display_ctxt) -+ return; -+ if (elan3_debug_ignore_ctxt && (ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK) == elan3_debug_ignore_ctxt) -+ return; -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ sprintf (prefix, PREFIX_FMT " (XXX) ", lbolt, PREFIX_VAL); -+ else -+ sprintf (prefix, PREFIX_FMT " (%03x) ", lbolt, PREFIX_VAL, -+ ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK); -+ } -+ else -+ { -+ char *what; -+ -+ if (elan3_debug_ignore_dev & (1 << ((unsigned long) p))) -+ return; -+ -+ switch ((unsigned long) p) -+ { -+ case (int) DBG_DEVICE: what = "dev"; break; -+ case (int) DBG_KCOMM: what = "kcm"; break; -+ case (int) DBG_ICS: what = "ics"; break; -+ case (int) DBG_USER: what = "usr"; break; -+ default: what = NULL; break; -+ } -+ -+ if (what) -+ sprintf (prefix, PREFIX_FMT " [%s] ", lbolt, PREFIX_VAL, what); -+ else -+ sprintf (prefix, PREFIX_FMT " [%3d] ", lbolt, PREFIX_VAL, (int)(long)what); -+ } -+ -+ { -+ va_list ap; -+ -+ va_start (ap, fmt); -+ qsnet_vdebugf ((((mode & elan3_debug_buffer)?QSNET_DEBUG_BUFFER:0)|((mode & elan3_debug_console)?QSNET_DEBUG_CONSOLE:0)) , prefix, fmt, ap); -+ va_end (ap); -+ } -+} -+ -+ -+void -+elan3_alloc_panicstate (ELAN3_DEV *dev, int allocsdram) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr == NULL) -+ KMEM_ZALLOC (dev->PanicState.RegPtr, E3_Regs *, sizeof (E3_Regs), 1); -+ -+ if (allocsdram) -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->PanicState.Sdram[bank] == NULL && dev->SdramBanks[bank].Size) -+ KMEM_ZALLOC (dev->PanicState.Sdram[bank], char *, dev->SdramBanks[bank].Size, 1); -+} -+ -+void -+elan3_free_panicstate (ELAN3_DEV *dev) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr != NULL) -+ KMEM_FREE (dev->PanicState.RegPtr, sizeof (E3_Regs)); -+ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->PanicState.Sdram[bank] != NULL && dev->SdramBanks[bank].Size) -+ KMEM_FREE (dev->PanicState.Sdram[bank], dev->SdramBanks[bank].Size); -+ -+ bzero (&dev->PanicState, sizeof (dev->PanicState)); -+} -+ -+void -+elan3_save_panicstate (ELAN3_DEV *dev) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr) -+ { -+ printk ("elan%d: saving state on panic .....\n", dev->Devinfo.dev_instance); -+ -+ bcopy ((void *) dev->RegPtr, (void *) dev->PanicState.RegPtr, sizeof (E3_Regs)); -+ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->SdramBanks[bank].Size && dev->PanicState.Sdram[bank]) -+ elan3_sdram_copyq_from_sdram (dev, (bank << ELAN3_SDRAM_BANK_SHIFT), dev->PanicState.Sdram[bank], dev->SdramBanks[bank].Size); -+ -+ } -+} -+ -+int -+elan3_assfail (ELAN3_DEV *dev, char *string, char *file, int line) -+{ -+ if (panicstr) -+ return (0); -+ -+ printk ("elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+ -+#if defined(LINUX) -+ elan3_save_panicstate (dev); -+ -+ panic ("elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+#else -+ cmn_err (CE_PANIC, "elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+#endif -+ /*NOTREACHED*/ -+ return (0); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/elandev_generic.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/elandev_generic.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/elandev_generic.c 2005-06-01 23:12:54.578442296 -0400 -@@ -0,0 +1,1862 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elandev_generic.c,v 1.111.2.3 2004/11/15 11:12:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandev_generic.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * Module globals, configurable from system file. -+ */ -+u_int elan3_debug = 0; -+u_int elan3_debug_console = 0; -+u_int elan3_debug_buffer = -1; -+u_int elan3_debug_ignore_dev = 0; -+u_int elan3_debug_ignore_kcomm = 0; -+u_int elan3_debug_ignore_ctxt = 0; -+u_int elan3_debug_display_ctxt = 0; -+ -+int eventint_punt_loops; -+int eventint_punt_ticks; -+int eventint_resched_ticks; -+ -+static void InitialiseDmaBuffers (ELAN3_DEV *dev, ioaddr_t CmdPort); -+static int ProbeSdram (ELAN3_DEV *dev); -+static void InitialiseSdram (ELAN3_DEV *dev); -+static void ReEnableErrorInterrupts (void *arg); -+void PollForDmaHungup (void *arg); -+static void elan3_event_interrupt (ELAN3_DEV *dev); -+ -+/* -+ * BaseAddr is ptr to the start of a table aligned on a power of two byte address. -+ * SizePower must be in the range of 6 to 12. It defines the number of valid contexts as -+ * shown below. -+ * -+ * SizePower Valid Contexts Table size in bytes. -+ * 6 64 1k -+ * 7 128 2k -+ * 8 256 4K -+ * 9 512 8k -+ * 10 1024 16k -+ * 11 2048 32k -+ * 12 4096 64k -+ */ -+#define GEN_CONTEXT_PTR(BaseAddr, SizePower) (((E3_uint32) BaseAddr) | \ -+ (~((1 << ((SizePower) - 6)) - 1) & 0x3f)) -+ -+int -+InitialiseElan (ELAN3_DEV *dev, ioaddr_t CmdPort) -+{ -+ E3_IprocTrapHeader_BE TrapCleanup[4]; -+ E3_ContextControlBlock ContextControlBlock; -+ sdramaddr_t ptr; -+ int res; -+ int i; -+ -+ eventint_punt_loops = 100; -+ eventint_punt_ticks = (hz/100); -+ eventint_resched_ticks = (hz/4); -+ -+ dev->Stats.Version = ELAN3_STATS_VERSION; -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* -+ * The elan should have already been reset, so the interrupt mask -+ * should be 0 and the schedule status register should be set to -+ * its initial state -+ */ -+ ASSERT (dev->InterruptMask == 0); -+ ASSERT ((read_reg32 (dev, Exts.SchCntReg) & HaltStopAndExtTestMask) == Sched_Initial_Value); -+ -+ /* -+ * Write any value here to clear out the half full and error bits of the command -+ * overflow queues. -+ */ -+ write_reg32 (dev, ComQueueStatus, 0); -+ -+ /* Initialise the cache tags before touching the SDRAM */ -+ /* we initialise them to "map" the bottom of SDRAM */ -+ for (i = 0; i < E3_NumCacheLines; i++) -+ { -+ write_cache_tag (dev, Tags[i][0].Value, 0x0000000000000000ULL); -+ write_cache_tag (dev, Tags[i][1].Value, 0x0000080000000000ULL); -+ write_cache_tag (dev, Tags[i][2].Value, 0x0000100000000000ULL); -+ write_cache_tag (dev, Tags[i][3].Value, 0x0000180000000000ULL); -+ } -+ -+#ifndef CONFIG_MPSAS -+ for (i = 0; i < E3_NumCacheLines*(E3_CACHELINE_SIZE/sizeof(E3_uint64)); i++) -+ { -+ write_cache_set (dev, Set0[i], 0xcac1ecac1ecac1e0ULL); -+ write_cache_set (dev, Set1[i], 0xcac1ecac1ecac1e1ULL); -+ write_cache_set (dev, Set2[i], 0xcac1ecac1ecac1e2ULL); -+ write_cache_set (dev, Set3[i], 0xcac1ecac1ecac1e3ULL); -+ } -+#endif -+ -+ if ((res = ProbeSdram(dev)) != ESUCCESS) -+ return (res); -+ -+ /* Enable all cache sets before initialising the sdram allocators */ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, (dev->Cache_Control_Reg |= CONT_EN_ALL_SETS)); -+ -+ InitialiseSdram (dev); -+ -+ dev->TAndQBase = elan3_sdram_alloc (dev, ELAN3_TANDQ_SIZE); -+ dev->ContextTable = elan3_sdram_alloc (dev, ELAN3_CONTEXT_SIZE); -+ dev->ContextTableSize = ELAN3_NUM_CONTEXTS; -+ dev->CommandPortTraps[0] = elan3_sdram_alloc (dev, ELAN3_COMMAND_TRAP_SIZE); -+ dev->CommandPortTraps[1] = elan3_sdram_alloc (dev, ELAN3_COMMAND_TRAP_SIZE); -+ dev->CurrentCommandPortTrap = 0; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "InitialiseElan: ContextTable %08lx TAndQ %08lx CommandPortTrap %08lx\n", -+ dev->ContextTable, dev->TAndQBase, dev->CommandPortTraps[0]); -+ -+ /* Allocate the thread amd dma trap areas */ -+ KMEM_ZALLOC (dev->ThreadTrap, THREAD_TRAP *, sizeof (THREAD_TRAP), TRUE); -+ KMEM_ZALLOC (dev->DmaTrap, DMA_TRAP *, sizeof (DMA_TRAP), TRUE); -+ -+ /* Allocate the ctxt table */ -+ KMEM_ZALLOC (dev->CtxtTable, ELAN3_CTXT **, dev->ContextTableSize * sizeof ( ELAN3_CTXT *), TRUE); -+ -+ /* Initialise halt queue list */ -+ dev->HaltOperationsTailpp = &dev->HaltOperations; -+ -+ /* From elan3/code/harness/elanstuff.c */ -+ /* Init the clock. */ -+ write_ureg64 (dev, Clock.NanoSecClock, 0); -+ -+ /* Init the instruction count reg. */ -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ -+ /* Init the stats control reg. Must be done before the count regs.*/ -+ write_ureg32 (dev, StatCont.StatsControl, 0); -+ -+ /* Init the stats count regs. */ -+ write_ureg32 (dev, StatCounts[0].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[1].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[2].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[3].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[4].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[5].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[6].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[7].s.StatsCount, 0); -+ -+ /* -+ * Initialise the Context_Ptr and Fault_Base_Ptr -+ */ -+ write_reg32 (dev, Fault_Base_Ptr, dev->TAndQBase + offsetof(E3_TrapAndQueue, IProcSysCntx)); -+ write_reg32 (dev, Context_Ptr, GEN_CONTEXT_PTR (dev->ContextTable, ELAN3_LN2_NUM_CONTEXTS)); -+ -+ /* scrub the TProc Registers */ -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Globals[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Outs[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Locals[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Ins[i], 0xdeadbabe); -+ -+ /* -+ * Initialise the Queue pointers. Arrange them so that the starting positions are -+ * farthest apart in one set of the cache. Thus 512 bytes apart, but with cntx0 -+ * thread the same as the interrupt queue. -+ */ -+ write_reg32 (dev, TProc_NonSysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0xc0])); -+ write_reg32 (dev, TProc_NonSysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0xc0])); -+ write_reg32 (dev, TProc_SysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0x80])); -+ write_reg32 (dev, TProc_SysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0x80])); -+ -+ write_reg32 (dev, DProc_NonSysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0])); -+ write_reg32 (dev, DProc_NonSysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0])); -+ write_reg32 (dev, DProc_SysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0x10])); -+ write_reg32 (dev, DProc_SysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0x10])); -+ -+ dev->Event_Int_Queue_FPtr = dev->TAndQBase + offsetof (E3_TrapAndQueue, EventIntQueue[0x80]); -+ write_reg32 (dev, Event_Int_Queue_FPtr, dev->Event_Int_Queue_FPtr); -+ write_reg32 (dev, Event_Int_Queue_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, EventIntQueue[0x80])); -+ -+ -+ /* Initialise Input_Trap_Base to last 8 Kbytes of trap area, uCode adds the right offset */ -+ write_reg32 (dev, Input_Trap_Base, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0])); -+ -+ /* Ptr to word used to save the SP to when a thread deschedules */ -+ write_reg32 (dev, Thread_SP_Save_Ptr, dev->TAndQBase + offsetof (E3_TrapAndQueue, Thread_SP_Save)); -+ -+ /* Initialise the command trap base */ -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[0]); -+ -+ /* Initialise the set event tracing registers */ -+ write_reg32 (dev, Event_Trace_Ptr, 0); -+ write_reg32 (dev, Event_Trace_Mask, 0); -+ -+ /* Initialise Tlb_Line_Value to zero. The TLB cannot be read while either the */ -+ /* uCode or thread proc might be running. Must be set to 0. */ -+ write_reg64 (dev, Tlb_Line_Value, 0); -+ -+ /* Control register. Cache everything, Enable MMU, RefreshRate=3, CasLatency=1, StartSDR */ -+ dev->Cache_Control_Reg |= CONT_MMU_ENABLE | CONT_EN_ALL_SETS | CONT_CACHE_ALL | CONT_ENABLE_ECC; -+ -+#if ELAN3_PAGE_SHIFT == 13 -+ dev->Cache_Control_Reg |= CONT_ENABLE_8K_PAGES; -+#endif -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg); -+ -+ /* -+ * Initialise the context table to be discard for all contexts -+ */ -+ ContextControlBlock.rootPTP = 0; -+ ContextControlBlock.filter = E3_CCB_DISCARD_ALL; -+ ContextControlBlock.VPT_mask = 0; -+ ContextControlBlock.VPT_ptr = 0; -+ -+ for (i = 0, ptr = dev->ContextTable; i < ELAN3_NUM_CONTEXTS; i++, ptr += sizeof (E3_ContextControlBlock)) -+ elan3_sdram_copyl_to_sdram (dev, &ContextControlBlock, ptr, sizeof (E3_ContextControlBlock)); -+ -+ /* From elan3/code/trap_handler/init.c */ -+ /* -+ * Initialise the Trap And Queue area in Elan SDRAM. -+ */ -+ TrapCleanup[0].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[0].s.TrAddr = 0; -+ TrapCleanup[0].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[0].s.TrData0 = 0; -+ TrapCleanup[1].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[1].s.TrAddr = 0; -+ TrapCleanup[1].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[1].s.TrData0 = 0; -+ TrapCleanup[2].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[2].s.TrAddr = 0; -+ TrapCleanup[2].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[2].s.TrData0 = 0; -+ TrapCleanup[3].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[3].s.TrAddr = 0; -+ TrapCleanup[3].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[3].s.TrData0 = 0; -+ -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx.s.FaultContext), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx.s.FSR.Status), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx.s.FaultContext), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx.s.FSR.Status), 0); -+ -+ /* Must now zero all the FSRs so that a subsequent Fault can be seen */ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), 16); -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), 64); -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), 16); -+ -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrHead[0]), 64); -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrHead[0]), 64); -+ -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrHead[0]), 64); -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrHead[0]), 64); -+ -+ InitialiseDmaBuffers(dev, CmdPort); -+ -+ /* reserve a halt operation for flushing the context filter */ -+ ReserveHaltOperations (dev, 1, TRUE); -+ -+ /* Allow the Thread/Dma to run */ -+ CLEAR_SCHED_STATUS (dev, HaltThread | HaltDmas); -+ -+ /* Enable All Interrrupts */ -+ SET_INT_MASK (dev, (INT_PciMemErr | INT_SDRamInt | INT_EventInterrupt | INT_LinkError | INT_ComQueue | -+ INT_TProc | INT_CProc | INT_DProc | INT_IProcCh1NonSysCntx | -+ INT_IProcCh1SysCntx | INT_IProcCh0NonSysCntx | INT_IProcCh0SysCntx)); -+ -+ /* Take the link out of boundary scan */ -+ SET_SCHED_LINK_VALUE (dev, 0, 0); -+ -+ /* And clear any link errors */ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ /* XXXX: clear discard context 0, AFTER setting up the kernel comms */ -+ CLEAR_SCHED_STATUS (dev, DiscardSysCntxIn | DiscardNonSysCntxIn); -+ -+ /* Start a thread to handle excessive Event Interrrupts */ -+ if (kernel_thread_create (elan3_event_interrupt, (caddr_t) dev) == NULL) -+ { -+ panic ("InitialiseElan: cannot start elan3_event_interrupt\n"); -+ return (EFAIL); -+ } -+ dev->EventInterruptThreadStarted = 1; -+ -+ ReserveHaltOperations (dev, 1, TRUE); -+ -+ PollForDmaHungup (dev); -+ -+ /* register the device and stats with elanmod for RMS */ -+ dev->DeviceIdx = elan_dev_register(&dev->Devinfo, &elan3_dev_ops, (void *) dev); -+ -+ elan3_register_dev_stats(dev); -+ -+ return (ESUCCESS); -+} -+ -+static void -+InitialiseDmaBuffers(ELAN3_DEV *dev, ioaddr_t CmdPort) -+{ -+ register int i; -+ -+ /* GNAT sw-elan3/3908: -+ * Clear down the power on state of the Dma_Desc registers to make sure we don't -+ * try and interpret them when a trap happens. -+ */ -+ write_reg32 (dev, Dma_Desc.dma_type, 0); -+ write_reg32 (dev, Dma_Desc.dma_size, 0); -+ write_reg32 (dev, Dma_Desc.dma_source, 0); -+ write_reg32 (dev, Dma_Desc.dma_dest, 0); -+ write_reg32 (dev, Dma_Desc.dma_destEvent, 0); -+ write_reg32 (dev, Dma_Desc.dma_destCookieVProc, 0); -+ write_reg32 (dev, Dma_Desc.dma_srcEvent, 0); -+ write_reg32 (dev, Dma_Desc.dma_srcCookieVProc, 0); -+ -+ /* -+ * The following is a sequence of writes to remove X's from the dma buffers and -+ * registers. It is only safe to write these registers after reset and before any -+ * dma's have been issued. The chip will NOT function corectly if they are written at -+ * any other time or in a different order. -+ */ -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.ResetAckNLdBytesToWr, ((u_longlong_t)0x1000) << 32); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdBytesToRd, ((u_longlong_t)0x100) << 32); -+ -+ for (i=0;i<(4*8);i++) -+ write_reg64 (dev, Dma_Alignment_Port[0], 0); -+ -+ /* -+ * This is used to clear out X's from some of the trap registers. This is required to -+ * prevent the first traps from possibly writting X's into the SDram and upsetting the -+ * ECC value. It requires that the trap save area registers have been set up but does -+ * not require any translations to be ready. -+ */ -+ writel (-1, CmdPort + offsetof (E3_CommandPort, SetEvent)); -+ while ((read_reg32 (dev, Exts.InterruptReg) & INT_CProc) == 0) -+ { -+ mb(); -+ DELAY (1); -+ } -+ -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[dev->CurrentCommandPortTrap]); -+ -+ PULSE_SCHED_STATUS(dev, RestartCProc); -+} -+ -+void -+FinaliseElan (ELAN3_DEV *dev) -+{ -+ ELAN3_PTBL_GR *ptg; -+ ELAN3_HALTOP *op; -+ ELAN3_HALTOP *chain = NULL; -+ int bank; -+ int indx; -+ int size; -+ unsigned long flags; -+ int level; -+ -+ elan_stats_deregister (dev->StatsIndex); -+ elan_dev_deregister(&dev->Devinfo); -+ -+ /* Cancel the dma poller */ -+ cancel_timer_fn (&dev->DmaPollTimeoutId); -+ -+ /* release it's halt operation */ -+ ReleaseHaltOperations (dev, 1); -+ -+ /* stop all kernel threads */ -+ dev->ThreadsShouldStop = 1; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ while (dev->EventInterruptThreadStarted && !dev->EventInterruptThreadStopped) -+ { -+ kcondvar_wakeupall (&dev->IntrWait, &dev->IntrLock); -+ kcondvar_wait (&dev->IntrWait, &dev->IntrLock, &flags); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ /* Set the interrupt mask to 0 and the schedule control register to run nothing */ -+ SET_INT_MASK (dev, 0); -+ SET_SCHED_STATUS (dev, DiscardNonSysCntxIn | DiscardSysCntxIn | HaltThread | HaltDmas); -+ -+ /* Cancel any link error timeout */ -+ if (timer_fn_queued(&dev->ErrorTimeoutId)) -+ cancel_timer_fn (&dev->ErrorTimeoutId); -+ -+ /* Free of and page tables that have been allocated */ -+ spin_lock (&dev->PtblGroupLock); -+ for(level=0; level<4; level++) -+ { -+ while ((ptg = dev->Level[level].PtblGroupList) != NULL) -+ { -+ dev->Level[level].PtblGroupList = ptg->pg_next; -+ -+ elan3_sdram_free (dev, ptg->pg_addr, PTBL_GROUP_SIZE); -+ FREE_PTBL_GR(ptg); -+ } -+ } -+ -+ spin_unlock (&dev->PtblGroupLock); -+ -+ /* Free of all halt operations */ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ while ((op = dev->FreeHaltOperations) != NULL) -+ { -+ dev->FreeHaltOperations = op->Next; -+ -+ /* Keep a list of 'freed' ops for later KMEM_FREE call */ -+ op->Next = chain; -+ chain = op; -+ } -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ /* Have now dropped the spinlock - can call KMEM_FREE */ -+ while ((op = chain) != NULL) -+ { -+ chain = op->Next; -+ -+ KMEM_FREE (op, sizeof (ELAN3_HALTOP)); -+ } -+ -+ /* Free of the ctxt table */ -+ KMEM_FREE (dev->CtxtTable, dev->ContextTableSize * sizeof (ELAN3_CTXT *)); -+ -+ /* Free of the thread and dma atrap areas */ -+ KMEM_FREE (dev->ThreadTrap, sizeof (THREAD_TRAP)); -+ KMEM_FREE (dev->DmaTrap, sizeof (DMA_TRAP)); -+ -+ /* Free of the memsegs and pages */ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ { -+ if (dev->SdramBanks[bank].Size) -+ { -+ UnmapDeviceRegister (dev, &dev->SdramBanks[bank].Handle); -+ -+ KMEM_FREE (dev->SdramBanks[bank].PtblGroups, sizeof (ELAN3_PTBL_GR *) * (dev->SdramBanks[bank].Size / PTBL_GROUP_SIZE)); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= dev->SdramBanks[bank].Size; indx++, size <<= 1) -+ KMEM_FREE (dev->SdramBanks[bank].Bitmaps[indx], sizeof (bitmap_t)*BT_BITOUL(dev->SdramBanks[bank].Size/size)); -+ } -+ } -+ elan3_sdram_fini (dev); -+} -+ -+#define INIT_PATTERN(offset) (0xBEEC000000000011ull | ((u_longlong_t)(offset)) << 16) -+#define FREE_PATTERN(offset) (0xBEEC000000000022ull | ((u_longlong_t)(offset)) << 16) -+ -+static int -+ProbeSdram (ELAN3_DEV *dev) -+{ -+ int Instance; -+ u_int Bank; -+ int MemSpaceSize; -+ int BankMaxSize; -+ int BankOffset; -+ int BankSize; -+ ioaddr_t BankBase; -+ ioaddr_t PageBase; -+ ioaddr_t PageBase1; -+ ioaddr_t PageBase2; -+ DeviceMappingHandle BankHandle; -+ DeviceMappingHandle PageHandle; -+ DeviceMappingHandle PageHandle1; -+ DeviceMappingHandle PageHandle2; -+ register int i; -+ u_longlong_t value; -+ extern int sdram_bank_limit; -+ -+ /* NOTE: The Cache control register is set to only enable cache set 0 */ -+ /* and has ECC disabled */ -+ Instance = dev->Instance; -+ -+ /* Determine the size of the SDRAM from the BAR register */ -+ if (DeviceRegisterSize (dev, ELAN3_BAR_SDRAM, &MemSpaceSize) != ESUCCESS) -+ { -+ printk ("elan%d: cannot determine SDRAM size\n", Instance); -+ return (EFAIL); -+ } -+ -+ elan3_sdram_init (dev); -+ -+ BankMaxSize = MemSpaceSize / ELAN3_SDRAM_NUM_BANKS; -+ -+ for (Bank = 0; Bank < ELAN3_SDRAM_NUM_BANKS; Bank++) -+ { -+ BankOffset = Bank * BankMaxSize; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "elan%d: Probing RAM Bank %d (max size %08x)\n", Instance, Bank, BankMaxSize); -+ -+ /* Probe the memory bank by mapping two pages that are the size of the cache apart */ -+ /* this guarantees that when we store the second pattern we displace the first pattern */ -+ /* from the cache, also store the second pattern again the size of the cache up again */ -+ /* to ensure that the SDRAM wires don't stay floating at pattern1 */ -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &BankBase, BankOffset, PAGESIZE, &BankHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ continue; -+ } -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase1, BankOffset + ELAN3_MAX_CACHE_SIZE, PAGESIZE, &PageHandle1) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ UnmapDeviceRegister (dev, &BankHandle); -+ continue; -+ } -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase2, BankOffset + 2*ELAN3_MAX_CACHE_SIZE, PAGESIZE, &PageHandle2) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ continue; -+ } -+ -+#define PATTERN0 (0x5555555555555555L) -+#define PATTERN1 (0xAAAAAAAAAAAAAAAAL) -+ writeq (PATTERN0, (u_longlong_t *) BankBase); -+ writeq (PATTERN1, (u_longlong_t *) PageBase1); -+ writeq (PATTERN1, (u_longlong_t *) PageBase2); -+ -+ mmiob(); -+ -+ value = readq ((u_longlong_t *) BankBase); -+ -+ if (value != PATTERN0) -+ { -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ continue; -+ } -+ -+ writeq (PATTERN1, (u_longlong_t *) BankBase); -+ writeq (PATTERN0, (u_longlong_t *) PageBase1); -+ writeq (PATTERN0, (u_longlong_t *) PageBase2); -+ -+ mmiob(); -+ -+ value = readq ((u_longlong_t *) BankBase); -+ if (value != PATTERN1) -+ { -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ continue; -+ } -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ -+ /* Bank is present, so work out its size, we store tha maximum size at the base */ -+ /* and then store the address at each address on every power of two address until */ -+ /* we reach the minimum mappable size (PAGESIZE), we then read back the value at the */ -+ /* base to determine the bank size */ -+ writeq ((u_longlong_t) BankMaxSize, (u_longlong_t *) BankBase); -+ -+ for (BankSize = (BankMaxSize>>1); BankSize > PAGESIZE; BankSize >>= 1) -+ { -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase, BankOffset + BankSize, PAGESIZE, &PageHandle) == ESUCCESS) -+ { -+ writeq (BankSize, (u_longlong_t *) PageBase); -+ UnmapDeviceRegister (dev, &PageHandle); -+ } -+ } -+ mmiob(); -+ -+ BankSize = (u_long) readq ((u_longlong_t *) BankBase); -+ -+ if (sdram_bank_limit == 0 || BankSize <= (sdram_bank_limit * 1024 * 1024)) -+ printk ("elan%d: memory bank %d is %dK\n", Instance, Bank, BankSize / 1024); -+ else -+ { -+ BankSize = (sdram_bank_limit * 1024 * 1024); -+ printk ("elan%d: limit memory bank %d to %dK\n", Instance, Bank, BankSize / 1024); -+ } -+ -+ UnmapDeviceRegister (dev, &BankHandle); -+ -+ /* Now map all of this bank into the kernel */ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &BankBase, BankOffset, BankSize, &BankHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot initialise memory bank %d\n", Instance, Bank); -+ continue; -+ } -+ -+ dev->SdramBanks[Bank].Size = BankSize; -+ dev->SdramBanks[Bank].Mapping = BankBase; -+ dev->SdramBanks[Bank].Handle = BankHandle; -+ -+#ifndef CONFIG_MPSAS -+ /* Initialise it for ECC */ -+ preemptable_start { -+ for (i = 0; i < BankSize; i += 8) -+ { -+ elan3_sdram_writeq (dev, (Bank << ELAN3_SDRAM_BANK_SHIFT) | i, INIT_PATTERN(BankOffset+i)); -+ -+ preemptable_check(); -+ } -+ } preemptable_end; -+#endif -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+InitialiseSdram (ELAN3_DEV *dev) -+{ -+ int indx, size, b; -+ -+ for (b = 0; b < ELAN3_SDRAM_NUM_BANKS; b++) -+ { -+ ELAN3_SDRAM_BANK *bank = &dev->SdramBanks[b]; -+ -+ if (bank->Size == 0) -+ continue; -+ -+ /* allocate a ptbl group pointer for each possible ptbl group in this bank */ -+ KMEM_ZALLOC (bank->PtblGroups, ELAN3_PTBL_GR **, sizeof (ELAN3_PTBL_GR *) * bank->Size/PTBL_GROUP_SIZE, TRUE); -+ -+ /* allocate the buddy allocator bitmaps */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->Size; indx++, size <<= 1) -+ KMEM_ZALLOC (bank->Bitmaps[indx], bitmap_t *, sizeof (bitmap_t)*BT_BITOUL(bank->Size/size), TRUE); -+ -+ /* and add it to the sdram buddy allocator */ -+ elan3_sdram_add (dev, (b << ELAN3_SDRAM_BANK_SHIFT), (b << ELAN3_SDRAM_BANK_SHIFT) + bank->Size); -+ } -+} -+ -+#include -+ -+int -+ReadVitalProductData (ELAN3_DEV *dev, int *CasLatency) -+{ -+ DeviceMappingHandle RomHandle; -+ unsigned char *RomBase; -+ unsigned char *PCIDataPtr; -+ unsigned char *VPDPtr; -+ unsigned char *lim; -+ int type; -+ int i, len, len2; -+ char name[3] = "XX"; -+ char value[256]; -+ int finished = 0; -+ -+ -+ /* default valud for CAS latency is 3 */ -+ (*CasLatency) = CAS_LATENCY_3; -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_EBUS, (ioaddr_t *) &RomBase, ELAN3_EBUS_ROM_OFFSET, ELAN3_EBUS_ROM_SIZE, &RomHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot map ROM\n", dev->Instance); -+ return (EFAIL); -+ } -+ -+ /* Check the ROM signature */ -+ if (RomBase[0] != 0x55 || RomBase[1] != 0xAA) -+ { -+ printk ("elan%d: Invalid ROM signature %02x %02x\n", dev->Instance, RomBase[0], RomBase[1]); -+ return (ESUCCESS); -+ } -+ -+ PCIDataPtr = RomBase + ((RomBase[0x19] << 8) | RomBase[0x18]); -+ -+ /* check the pci data structure */ -+ if (PCIDataPtr[0] != 'P' || PCIDataPtr[1] != 'C' || PCIDataPtr[2] != 'I' || PCIDataPtr[3] != 'R') -+ { -+ printk ("elan%d: Invalid PCI Data structure\n", dev->Instance); -+ return (ESUCCESS); -+ } -+ -+ /* Extract the VPD pointer */ -+ VPDPtr = RomBase + ((PCIDataPtr[9] << 8) | PCIDataPtr[8]); -+ -+ if (VPDPtr == RomBase) -+ { -+ printk ("elan%d: No Vital Product Data\n", dev->Instance); -+ return (ESUCCESS); -+ } -+ -+ while (! finished) -+ { -+ type = *VPDPtr++; -+ -+ if (type & LARGE_RESOURCE_BIT) -+ { -+ len = *(VPDPtr++); -+ len += *(VPDPtr++) << 8; -+ -+ switch (type & ~LARGE_RESOURCE_BIT) -+ { -+ case LARGE_RESOURCE_STRING: -+ printk ("elan%d: ", dev->Instance); -+ for (i = 0; i < len; i++) -+ printk ("%c", *VPDPtr++); -+ printk ("\n"); -+ break; -+ -+ case LARGE_RESOURCE_VENDOR_DEFINED: -+ VPDPtr += len; -+ break; -+ -+ case LARGE_RESOURCE_VITAL_PRODUCT_DATA: -+ for (lim = VPDPtr + len; VPDPtr < lim; ) -+ { -+ name[0] = *VPDPtr++; -+ name[1] = *VPDPtr++; -+ len2 = *VPDPtr++; -+ -+ for (i = 0; i < len2 && VPDPtr < lim; i++) -+ value[i] = *VPDPtr++; -+ value[i] = '\0'; -+ -+ if (! strcmp (name, "SN")) -+ printk ("elan%d: Serial Number - %s\n", dev->Instance, value); -+ -+ if (! strcmp (name, "Z0")) -+ (*CasLatency) = (strcmp (value, "CAS_LATENCY_2") ? CAS_LATENCY_3 : CAS_LATENCY_2); -+ } -+ break; -+ -+ default: -+ printk ("elan%d: unknown large resource %x\n", dev->Instance, type); -+ finished = 1; -+ break; -+ } -+ } -+ else -+ { -+ len = type & 0x7; -+ -+ switch (type >> 3) -+ { -+ case SMALL_RESOURCE_COMPATIBLE_DEVICE_ID: -+ VPDPtr += len; -+ break; -+ -+ case SMALL_RESOURCE_VENDOR_DEFINED: -+ VPDPtr += len; -+ break; -+ -+ case SMALL_RESOURCE_END_TAG: -+ finished = 1; -+ break; -+ -+ default: -+ printk ("elan%d: unknown small resource %x\n", dev->Instance, type >> 3); -+ finished = 1; -+ break; -+ } -+ } -+ } -+ -+ UnmapDeviceRegister (dev, &RomHandle); -+ return (ESUCCESS); -+} -+ -+void -+ElanSetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset, ELAN3_PTBL_GR *ptg) -+{ -+ int bank = offset >> ELAN3_SDRAM_BANK_SHIFT; -+ -+ dev->SdramBanks[bank].PtblGroups[(offset & (ELAN3_SDRAM_BANK_SIZE-1)) / PTBL_GROUP_SIZE] = ptg; -+} -+ -+ELAN3_PTBL_GR * -+ElanGetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset) -+{ -+ int bank = offset >> ELAN3_SDRAM_BANK_SHIFT; -+ -+ return (dev->SdramBanks[bank].PtblGroups[(offset & (ELAN3_SDRAM_BANK_SIZE-1)) / PTBL_GROUP_SIZE]); -+} -+ -+void -+ElanFlushTlb (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ BumpStat (dev, TlbFlushes); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | MMU_FLUSH); -+ mmiob(); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ while (! (read_reg32 (dev, Cache_Control_Reg.ContReg) & MMU_FLUSHED)) -+ mb(); -+} -+ -+void -+KillNegativeDma (ELAN3_DEV *dev, void *arg) -+{ -+ DMA_TRAP *trap = dev->DmaTrap; -+ E3_Status_Reg status; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (read_reg32 (dev, Exts.InterruptReg) & INT_DProcHalted); -+ -+ /* Initialise the trap to deliver to the offending user process */ -+ trap->Status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ trap->PacketInfo.Value = 0; -+ -+ bzero (&trap->FaultSave, sizeof (trap->FaultSave)); -+ bzero (&trap->Data0, sizeof (trap->Data0)); -+ bzero (&trap->Data1, sizeof (trap->Data1)); -+ bzero (&trap->Data2, sizeof (trap->Data2)); -+ bzero (&trap->Data3, sizeof (trap->Data3)); -+ -+ /* run down the kernel dma run queue and panic on a -ve length dma */ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &trap->Desc, sizeof (E3_DMA_BE)); -+ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ panic ("KillNegativeDma: -ve sized kernel dma\n"); -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ /* run down the user dma run queue and "remove" and -ve length dma's */ -+ FPtr = read_reg32 (dev, DProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[E3_NonSysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &trap->Desc, sizeof (E3_DMA_BE)); -+ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ { -+ PRINTF3 (NULL, DBG_INTR, "KillNegativeDma: remove dma - context %d size %d SuspendAddr %x\n", -+ trap->Desc.s.dma_u.s.Context, trap->Desc.s.dma_size, trap->Status.s.SuspendAddr); -+ -+ trap->Status.s.TrapType = trap->Status.s.SuspendAddr; -+ trap->Status.s.Context = trap->Desc.s.dma_u.s.Context; -+ -+ DeliverDProcTrap (dev, trap, 0); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ trap->Desc.s.dma_type = 0; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = (E3_Addr) 0; -+ trap->Desc.s.dma_srcEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_srcCookieVProc = (E3_Addr) 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &trap->Desc, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ -+ if (status.s.SuspendAddr == MI_DequeueNonSysCntxDma || -+ status.s.SuspendAddr == MI_DequeueSysCntxDma || -+ status.s.SuspendAddr == MI_DmaLoop) -+ { -+ PRINTF0 (NULL, DBG_INTR, "KillNegativeDma: unlock dma processor\n"); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ mmiob(); -+ -+ DELAY (10); -+ -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ mmiob(); -+ } -+ -+ PRINTF0 (NULL, DBG_INTR, "KillNegativeDma: dma processor restarted\n"); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 1); -+} -+ -+void -+ForceTProcTrap (ELAN3_DEV *dev, void *arg) -+{ -+ printk ("elan%d: forced tproc trap .....\n", dev->Instance); -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 1); -+} -+ -+void -+PollForDmaHungup (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ unsigned long flags; -+ E3_Status_Reg status; -+ E3_uint32 insn1, insn3; -+ register int i; -+ -+ if (read_reg32 (dev, Dma_Desc.dma_size) > E3_MAX_DMA_SIZE) -+ { -+ status.Status = read_reg32 (dev, Exts.DProcStatus); -+ -+ PRINTF2 (NULL, DBG_INTR, "PollForDmaHungup: size %x SuspendAddr %x\n", read_reg32 (dev, Dma_Desc.dma_size), status.s.SuspendAddr); -+ -+ if (status.s.SuspendAddr == MI_DequeueNonSysCntxDma || -+ status.s.SuspendAddr == MI_DequeueSysCntxDma || -+ status.s.SuspendAddr == MI_DmaLoop) -+ { -+ printk ("elan%d: PollForDmaHungup: size %x context %d SuspendAddr %x\n", -+ dev->Instance, read_reg32 (dev, Dma_Desc.dma_size), -+ status.s.Context, status.s.SuspendAddr); -+ -+ PRINTF2 (NULL, DBG_INTR, "PollForDmaHungup: dma_size %x status %x\n", -+ read_reg32 (dev, Dma_Desc.dma_size), status.Status); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted, KillNegativeDma, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return; -+ } -+ } -+ -+ status.Status = read_reg32 (dev, Exts.TProcStatus); -+ if (status.s.WakeupFunction == WakeupStopped) -+ { -+ E3_uint32 PC = read_reg32 (dev, ExecutePC); -+ -+ /* See if it's likely that the thread is really "stuck" on a waitevent/break -+ * instruction ......... */ -+ for (i = 0; i < 10; i++) -+ { -+ status.Status = read_reg32 (dev, Exts.TProcStatus); -+ insn1 = read_reg32 (dev, IBufferReg[1]); -+ insn3 = read_reg32 (dev, IBufferReg[3]); -+ -+ if (! (status.s.WakeupFunction == WakeupStopped && read_reg32 (dev, ExecutePC) == PC && /* stopping and it could be a break/waitevent */ -+ (insn1 == 0x81a00000 || insn3 == 0x81a00000 || /* break instruction */ -+ insn1 == 0x81b00000 || insn3 == 0x81b00000))) /* waitevent instruction */ -+ break; -+ } -+ -+ if (i == 10) -+ { -+ printk ("elan%d: forcing tproc trap from %s instruction at pc %x\n", dev->Instance, -+ (insn1 == 0x81a00000 || insn3 == 0x81a00000) ? "break" : "waitevent", PC); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_TProcHalted, ForceTProcTrap, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ } -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 10); -+} -+ -+/*=======================================================================================*/ -+/* -+ * Interrupt handler. -+ */ -+static void -+ReEnableErrorInterrupts (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "ReEnableErrorInterrupts: IntMask=%x\n", read_reg32 (dev, Exts.InterruptMask)); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+CheckForExcessiveErrorRate (ELAN3_DEV *dev) -+{ -+ if (dev->ErrorTime == (lbolt/hz)) -+ { -+ if (dev->ErrorsPerTick++ > 100) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "CheckForExcessiveErrorRate: too many links errors, disabling interrupt\n"); -+ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ schedule_timer_fn (&dev->ErrorTimeoutId, ReEnableErrorInterrupts, (void *) dev, hz); -+ } -+ } -+ else -+ { -+ dev->ErrorTime = (lbolt/hz); -+ dev->ErrorsPerTick = 0; -+ } -+} -+/*=======================================================================================*/ -+/* -+ * Interrupt handler. -+ */ -+static void -+HandlePciMemErr (ELAN3_DEV *dev) -+{ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandlePciMemErr : masking out interrupt\n"); -+ -+ ElanBusError (dev); -+ panic ("elan pci memory error\n"); -+} -+ -+static void -+HandleSDRamInterrupt (ELAN3_DEV *dev) -+{ -+ E3_uint32 EccStatus0 = read_reg32 (dev, ECC_STATUS0); -+ E3_uint32 EccStatus1 = read_reg32 (dev, ECC_STATUS1); -+ unsigned long flags; -+ -+ PRINTF5 (DBG_DEVICE, DBG_INTR, "elan: ECC error - Addr=%x UE=%x CE=%x ME=%x Syn=%x\n", -+ EccStatus0 & ECC_ADDR_MASK, EccStatus0 & ECC_UE_MASK, -+ EccStatus0 & ECC_CE_MASK, EccStatus0 & ECC_ME_MASK, -+ EccStatus1 & ECC_SYN_MASK); -+ -+ if (EccStatus0 & (ECC_UE_MASK|ECC_CE_MASK)) -+ { -+ printk ("elan%d: ECC memory error (Address=%08x Syndrome=%02x %s%s%s)\n", -+ dev->Instance, -+ (EccStatus0 & ECC_ADDR_MASK), (EccStatus1 & ECC_SYN_MASK), -+ (EccStatus0 & ECC_UE_MASK) ? "Uncorrectable " : "", -+ (EccStatus0 & ECC_CE_MASK) ? "Correctable " : "", -+ (EccStatus0 & ECC_ME_MASK) ? "Multiple Errors " : ""); -+ } -+ -+ if (EccStatus0 & ECC_UE_MASK) -+ panic ("elan: Uncorrectable ECC memory error"); -+ if (EccStatus0 & ECC_CE_MASK) -+ BumpStat (dev, CorrectableErrors); -+ if (EccStatus0 & ECC_ME_MASK) -+ BumpStat (dev, MultipleErrors); -+ -+ /* -+ * Clear the interrupt and reset the error flags. -+ * Note. Might loose an UE or CE if it occurs between reading the status and -+ * clearing the interrupt. I don't think this matters very much as the -+ * status reg will only be used to identify a bad simm. -+ */ -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | CLEAR_SDRAM_ERROR); -+ mmiob(); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ CheckForExcessiveErrorRate (dev); -+} -+ -+static int -+HandleEventInterrupt (ELAN3_DEV *dev, int nticks, unsigned long *flags) -+{ -+ E3_uint32 Fptr = dev->Event_Int_Queue_FPtr; -+ E3_uint32 Bptr = read_reg32 (dev, Event_Int_Queue_BPtr); /* PCI read */ -+ long tlim = lbolt + nticks; -+ long count = 0; -+ ELAN3_CTXT *ctxt; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ ASSERT ((dev->InterruptMask & INT_EventInterrupt) == 0); -+ -+ while (Fptr != Bptr) -+ { -+ while (Fptr != Bptr) -+ { -+ E3_EventInt_BE EvInt; -+ E3_uint32 Context; -+ -+ /* If we're running in the interrupt handler and have seen a high -+ * rate of event interrupts then punt to the thread - however on -+ * Linux the elan interrupt handler can block the timer interrupt, -+ * and so lbolt (jiffies) is not incremented, hence we punt after -+ a number of loops instead */ -+#if defined(LINUX) -+ if (in_interrupt() && ++count > eventint_punt_loops) -+ return (EAGAIN); -+#endif -+ -+ if (nticks && ((int) (lbolt - tlim)) > 0) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Fptr %x Bptr %x punting to thread\n", Fptr, Bptr); -+ return (EAGAIN); -+ } -+ -+ elan3_sdram_copyq_from_sdram (dev, Fptr, (void *) &EvInt, 8); /* PCI read */ -+ -+ /* The context number is held in the top 16 bits of the EventContext */ -+ Context = (EvInt.s.EventContext >> 16) & MAX_ROOT_CONTEXT_MASK; -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Context %d : Cookie %x\n", Context, EvInt.s.IntCookie); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, Context); -+ -+ /* Work out new fptr, and store it in the device, since we'll be dropping the IntrLock */ -+ Fptr = E3_EVENT_INTQ_NEXT(Fptr); -+ dev->Event_Int_Queue_FPtr = Fptr; -+ -+ if (ctxt == NULL) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Fptr %x Bptr %x context %d invalid\n", -+ Fptr, Bptr, Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ BumpStat (dev, EventInterrupts); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ QueueEventInterrupt (ctxt, EvInt.s.IntCookie); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ -+ /* Re-read the FPtr, since we've dropped the IntrLock */ -+ Fptr = dev->Event_Int_Queue_FPtr; -+ -+ /* Store the new FPtr to the elan, this also clears the interrupt. */ -+ write_reg32 (dev, Event_Int_Queue_FPtr, Fptr); /* PCI write */ -+ -+ mmiob(); -+ } -+ -+ mb(); -+ Bptr = read_reg32 (dev, Event_Int_Queue_BPtr); /* PCI read */ -+ } -+ -+ return (ESUCCESS); -+} -+ -+int -+SetLinkBoundaryScan (ELAN3_DEV *dev) -+{ -+ int res = ESUCCESS; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ res = EAGAIN; -+ else -+ { -+ PRINTF0 (DBG_DEVICE, DBG_BSCAN, "SetLinkBoundaryScan: setting link into boundary scan mode\n"); -+ -+ /* -+ * We're going to set the link into boundary scan mode, so firstly -+ * set the inputters to discard everything. -+ */ -+ if (dev->DiscardAllCount++ == 0) -+ SetSchedStatusRegister (dev, read_reg32 (dev, Exts.InterruptReg), NULL); -+ -+ /* -+ * Now disable the error interrupts -+ */ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And set the link into boundary scan mode, and drive -+ * a reset token onto the link. -+ */ -+ SET_SCHED_LINK_VALUE (dev, 1, LinkResetToken); -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+ClearLinkBoundaryScan (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_BSCAN, "ClearLinkBoundaryScan: taking link out of boundary scan mode\n"); -+ -+ /* -+ * Take the link out of boundary scan -+ */ -+ SET_SCHED_LINK_VALUE (dev, 0, 0); -+ -+ /* -+ * Clear any link errors. -+ */ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ /* -+ * Re-enable the error interrupts. -+ */ -+ if (! timer_fn_queued(&dev->ErrorTimeoutId)) -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And stop the inputter from discarding all packets. -+ */ -+ if (--dev->DiscardAllCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+WriteBoundaryScanValue (ELAN3_DEV *dev, int value) -+{ -+ int res = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "WriteBoundaryScanValue: driving value 0x%x onto link\n", value); -+ SET_SCHED_LINK_VALUE (dev, 1, value); -+ -+ res = read_reg32 (dev, Exts.LinkState); -+ -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "WriteBoundaryScanValue: return 0x%x\n", res); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+int -+ReadBoundaryScanValue(ELAN3_DEV *dev, int link) -+{ -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "ReadBoundaryScanValue: set linkval 0x%x\n", link); -+ SET_SCHED_LINK_VALUE (dev, 0, link); -+ } -+ res = read_reg32 (dev, Exts.LinkState); -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "ReadBoundaryScanValue: return 0x%x\n", res); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+static int -+ReadLinkVal (ELAN3_DEV *dev, int link) -+{ -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ SET_SCHED_LINK_VALUE (dev, 0, link); -+ -+ return (read_reg32 (dev, Exts.LinkState)); -+} -+ -+static void -+HandleLinkError (ELAN3_DEV *dev) -+{ -+ E3_uint32 value = read_reg32 (dev, Exts.LinkErrorTypes); -+ -+ PRINTF1 (DBG_DEVICE, DBG_LINKERR, "HandleLinkError: LinkErrorTypes %08x - clearing\n", value); -+ -+ if (value & LS_LockError) BumpStat (dev, LockError); -+ if (value & LS_DeskewError) BumpStat (dev, DeskewError); -+ if (value & LS_PhaseError) BumpStat (dev, PhaseError); -+ if (value & LS_DataError) BumpStat (dev, DataError); -+ if (value & LS_FifoOvFlow0) BumpStat (dev, FifoOvFlow0); -+ if (value & LS_FifoOvFlow1) BumpStat (dev, FifoOvFlow1); -+ -+ if (value & LS_DataError) -+ dev->Stats.LinkErrorValue = ReadLinkVal (dev, 12) | (ReadLinkVal (dev, 13) << 9); -+ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ CheckForExcessiveErrorRate (dev); -+} -+ -+static void -+HandleErrorInterrupt (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ if (Pend & INT_PciMemErr) -+ HandlePciMemErr (dev); -+ -+ if (Pend & INT_SDRamInt) -+ HandleSDRamInterrupt (dev); -+ -+ if (Pend & INT_LinkError) -+ HandleLinkError (dev); -+} -+ -+static void -+HandleAnyIProcTraps (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ E3_uint32 RestartBits = 0; -+ -+ if (Pend & INT_IProcCh0SysCntx) -+ { -+ HandleIProcTrap (dev, 0, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrData[0])); -+ -+ RestartBits |= RestartCh0SysCntx; -+ } -+ -+ if (Pend & INT_IProcCh1SysCntx) -+ { -+ HandleIProcTrap (dev, 1, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrData[0])); -+ -+ RestartBits |= RestartCh1SysCntx; -+ } -+ -+ if (Pend & INT_IProcCh0NonSysCntx) -+ { -+ HandleIProcTrap (dev, 0, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrData[0])); -+ -+ RestartBits |= RestartCh0NonSysCntx; -+ } -+ -+ -+ if (Pend & INT_IProcCh1NonSysCntx) -+ { -+ HandleIProcTrap (dev, 1, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrData[0])); -+ RestartBits |= RestartCh1NonSysCntx; -+ } -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); -+} -+ -+static void -+elan3_event_interrupt (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ kernel_thread_init("elan3_event_int"); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ for (;;) -+ { -+ /* Make sure we never sleep with the EventInterrupt disabled */ -+ if (! (dev->InterruptMask & INT_EventInterrupt)) -+ { -+ if (HandleEventInterrupt (dev, eventint_resched_ticks, &flags) != ESUCCESS) -+ BumpStat (dev, EventRescheds); -+ -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); -+ } -+ -+ if (dev->ThreadsShouldStop) -+ break; -+ -+ kcondvar_wait (&dev->IntrWait, &dev->IntrLock, &flags); -+ } -+ -+ dev->EventInterruptThreadStopped = 1; -+ kcondvar_wakeupall (&dev->IntrWait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ kernel_thread_exit (); -+} -+ -+int -+InterruptHandler (ELAN3_DEV *dev) -+{ -+ E3_uint32 Mask; -+ E3_uint32 Pend; -+ E3_uint32 RestartBits; -+ int deliverDProcTrap; -+ int deliverTProcTrap; -+ static long lboltsave; -+ int loop_count = 0; -+ unsigned long flags; -+ int tproc_delivered; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ BumpStat (dev, Interrupts); -+ -+ Mask = dev->InterruptMask; -+ Pend = read_reg32 (dev, Exts.InterruptReg); /* PCI read */ -+ -+ /* Save the lbolt so we know how long in do loop or in event handling */ -+ lboltsave = lbolt; -+ -+ if ((Pend & Mask) == INT_EventInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_EventInterrupt); -+ -+ if (HandleEventInterrupt (dev, eventint_punt_ticks, &flags) == ESUCCESS) -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); -+ else -+ { -+ BumpStat (dev, EventPunts); -+ -+ kcondvar_wakeupone (&dev->IntrWait, &dev->IntrLock); -+ } -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (ESUCCESS); -+ } -+ -+ if ((Pend & Mask) == 0) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Spurious Pend %x Mask %x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EFAIL); -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Pend %x Mask %08x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ -+ do { -+ loop_count++; -+ RestartBits = 0; -+ -+ if (Pend & Mask & (INT_CProc | INT_ComQueue)) -+ HandleCProcTrap (dev, Pend, &Mask); -+ -+ tproc_delivered = 0; -+ -+ if (Pend & Mask & INT_TProc) { -+ ELAN_REG_REC(Pend); -+ tproc_delivered = 1; -+ deliverTProcTrap = HandleTProcTrap (dev, &RestartBits); -+ } -+ else -+ deliverTProcTrap = 0; -+ -+ if (Pend & Mask & INT_DProc) -+ deliverDProcTrap = HandleDProcTrap (dev, &RestartBits); -+ else -+ deliverDProcTrap = 0; -+ -+ ASSERT ((RestartBits & RestartDProc) == 0 || (read_reg32 (dev, Exts.DProcStatus.Status) >> 29) == 4); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); /* Restart any processors which had trapped. */ -+ SET_INT_MASK (dev, Mask); /* And install the new interrupt mask */ -+ -+ if ((Pend & Mask & INT_TProc) && deliverTProcTrap) -+ DeliverTProcTrap (dev, dev->ThreadTrap, Pend); -+ -+ if ((Pend & Mask & INT_DProc) && deliverDProcTrap) -+ DeliverDProcTrap (dev, dev->DmaTrap, Pend); -+ -+ if (Pend & Mask & INT_Inputters) -+ HandleAnyIProcTraps (dev, Pend); -+ -+ if (Pend & Mask & INT_EventInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_EventInterrupt); -+ -+ if (loop_count == 1 && HandleEventInterrupt (dev, eventint_punt_ticks, &flags) == ESUCCESS) /* always punt to the thread if we've */ -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); /* been round the loop once */ -+ else -+ { -+ BumpStat (dev, EventPunts); -+ -+ kcondvar_wakeupone (&dev->IntrWait, &dev->IntrLock); -+ } -+ } -+ -+ if (Pend & (INT_Halted | INT_Discarding)) -+ ProcessHaltOperations (dev, Pend); -+ -+ if (Pend & Mask & INT_ErrorInterrupts) -+ HandleErrorInterrupt (dev, Pend); -+ -+ Mask = dev->InterruptMask; -+ Pend = read_reg32 (dev, Exts.InterruptReg); /* PCI read */ -+ -+ if (tproc_delivered) -+ ELAN_REG_REC(Pend); -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Pend %x Mask %08x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ } while ((Pend & Mask) != 0); -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "InterruptHandler: lbolt is %lx; start lbolt is %lx\n", -+ lbolt, lboltsave); -+ -+ return (ESUCCESS); -+} -+ -+void -+SetSchedStatusRegister (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp) -+{ -+ E3_uint32 HaltMask = dev->HaltOperationsMask; -+ E3_uint32 Mask = Maskp ? *Maskp : dev->InterruptMask; -+ E3_uint32 ClearBits = 0; -+ E3_uint32 SetBits = 0; -+ -+ PRINTF5 (DBG_DEVICE, DBG_INTR, "SetSchedStatusRegister: HaltOperationsMask=%x HaltAll=%d HaltDmaDequeue=%d HaltThread=%d DiscardAll=%d\n", -+ HaltMask, dev->HaltAllCount, dev->HaltDmaDequeueCount, dev->HaltThreadCount, dev->DiscardAllCount); -+ -+ if (dev->FlushCommandCount) -+ SetBits |= FlushCommandQueues; -+ -+ if ((HaltMask & INT_DProcHalted) || dev->HaltAllCount) -+ { -+ SetBits |= HaltDmas | HaltDmaDequeue; -+ if (Pend & INT_DProcHalted) -+ Mask &= ~INT_DProcHalted; -+ else -+ Mask |= INT_DProcHalted; -+ } -+ -+ if (dev->HaltDmaDequeueCount) -+ { -+ SetBits |= HaltDmaDequeue; -+ if (Pend & INT_DProcHalted) -+ Mask &= ~INT_DProcHalted; -+ else -+ Mask |= INT_DProcHalted; -+ } -+ -+ if ((HaltMask & INT_TProcHalted) || dev->HaltAllCount || dev->HaltThreadCount) -+ { -+ SetBits |= HaltThread; -+ if (Pend & INT_TProcHalted) -+ Mask &= ~INT_TProcHalted; -+ else -+ Mask |= INT_TProcHalted; -+ } -+ -+ if ((HaltMask & INT_DiscardingSysCntx) || dev->DiscardAllCount) -+ { -+ SetBits |= DiscardSysCntxIn; -+ if (Pend & INT_DiscardingSysCntx) -+ Mask &= ~INT_DiscardingSysCntx; -+ else -+ Mask |= INT_DiscardingSysCntx; -+ } -+ -+ if ((HaltMask & INT_DiscardingNonSysCntx) || dev->DiscardNonContext0Count || dev->DiscardAllCount) -+ { -+ SetBits |= DiscardNonSysCntxIn; -+ if (Pend & INT_DiscardingNonSysCntx) -+ Mask &= ~INT_DiscardingNonSysCntx; -+ else -+ Mask |= INT_DiscardingNonSysCntx; -+ } -+ -+ if (dev->HaltNonContext0Count) -+ SetBits |= StopNonSysCntxs; -+ -+ ClearBits = SetBits ^ (FlushCommandQueues | HaltDmas | HaltDmaDequeue | HaltThread | -+ DiscardSysCntxIn | DiscardNonSysCntxIn | StopNonSysCntxs); -+ -+ PRINTF4 (DBG_DEVICE, DBG_INTR, "SetSchedStatusRegister: SetBits=%x InterruptMask=%x InterruptReg=%x Mask=%x\n", -+ SetBits, dev->InterruptMask, read_reg32 (dev, Exts.InterruptReg), Mask); -+ -+ MODIFY_SCHED_STATUS (dev, SetBits, ClearBits); -+ -+ if (Maskp) -+ *Maskp = Mask; /* copyback new interrupt mask */ -+ else -+ SET_INT_MASK(dev, Mask); -+} -+ -+void -+FreeHaltOperation (ELAN3_DEV *dev, ELAN3_HALTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ op->Next = dev->FreeHaltOperations; -+ dev->FreeHaltOperations = op; -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+} -+ -+int -+ReserveHaltOperations (ELAN3_DEV *dev, int count, int cansleep) -+{ -+ ELAN3_HALTOP *op; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ while ((dev->NumHaltOperations - dev->ReservedHaltOperations) < count) -+ { -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ KMEM_ZALLOC (op, ELAN3_HALTOP *, sizeof (ELAN3_HALTOP), cansleep); -+ -+ if (op == NULL) -+ return (FALSE); -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ -+ dev->NumHaltOperations++; -+ -+ op->Next = dev->FreeHaltOperations; -+ dev->FreeHaltOperations = op; -+ } -+ -+ dev->ReservedHaltOperations += count; -+ -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ return (TRUE); -+} -+ -+void -+ReleaseHaltOperations (ELAN3_DEV *dev, int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ dev->ReservedHaltOperations -= count; -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+} -+ -+void -+QueueHaltOperation (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp, -+ E3_uint32 ReqMask, void (*Function)(ELAN3_DEV *, void *), void *Arguement) -+{ -+ ELAN3_HALTOP *op; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ spin_lock (&dev->FreeHaltLock); -+ op = dev->FreeHaltOperations; -+ -+ ASSERT (op != NULL); -+ -+ dev->FreeHaltOperations = op->Next; -+ spin_unlock (&dev->FreeHaltLock); -+ -+ op->Mask = ReqMask; -+ op->Function = (void (*)(void *, void *))Function; -+ op->Arguement = Arguement; -+ -+ dev->HaltOperationsMask |= ReqMask; /* Add our bits to the global bits needed. */ -+ SetSchedStatusRegister (dev, Pend, Maskp); /* Set the control register and the interrupt mask */ -+ -+ /* -+ * If the condition is already satisfied, then SetSchedStatusRegister will -+ * have masked out the interrupt, so re-enable it now to take it straight -+ * away -+ */ -+ if (Maskp == NULL) -+ { -+ if ((read_reg32 (dev, Exts.InterruptReg) & ReqMask) == ReqMask) -+ ENABLE_INT_MASK (dev, ReqMask); -+ } -+ else -+ { -+ if ((Pend & ReqMask) == ReqMask) -+ *Maskp |= ReqMask; -+ } -+ -+ *dev->HaltOperationsTailpp = op; /* Queue at end of list, since ProcessHaltOperations */ -+ dev->HaltOperationsTailpp = &op->Next; /* drops the IntrLock while running down the list */ -+ op->Next = NULL; -+} -+ -+void -+ProcessHaltOperations (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ E3_uint32 Mask; -+ ELAN3_HALTOP *op; -+ ELAN3_HALTOP **prevp; -+ E3_uint32 haltMask; -+ ELAN3_HALTOP *next; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: Pend %x\n", Pend); -+ -+ for (;;) -+ { -+ ELAN3_HALTOP *head = NULL; -+ ELAN3_HALTOP **tailp = &head; -+ -+ /* -+ * Generate a list of halt operations which can be called now. -+ */ -+ for (haltMask = 0, prevp = &dev->HaltOperations; (op = *prevp) != NULL; ) -+ { -+ if ((Pend & op->Mask) != op->Mask) -+ { -+ haltMask |= op->Mask; -+ prevp = &op->Next; -+ } -+ else -+ { -+ *prevp = op->Next; /* remove from list */ -+ if (op->Next == NULL) -+ dev->HaltOperationsTailpp = prevp; -+ -+ *tailp = op; /* add to local list */ -+ op->Next = NULL; -+ tailp = &op->Next; -+ } -+ } -+ -+ if (head == NULL) /* nothing to do, so update */ -+ { /* the schedule status register */ -+ dev->HaltOperationsMask = haltMask; /* and the interrupt mask */ -+ SetSchedStatusRegister (dev, Pend, NULL); -+ return; -+ } -+ -+ /* -+ * flush the command queues, before calling any operations -+ */ -+ Mask = dev->InterruptMask; -+ -+ if (dev->FlushCommandCount++ == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ if ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ { -+ if (dev->HaltThreadCount++ == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ CAPTURE_CPUS(); -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ mb(); -+ -+ RELEASE_CPUS(); -+ -+ if (--dev->HaltThreadCount == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ } -+ -+ if (read_reg32 (dev, Exts.InterruptReg) & INT_CProc) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: command processor has trapped\n"); -+ HandleCProcTrap (dev, Pend, &Mask); -+ } -+ -+ if (--dev->FlushCommandCount == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: interrupt mask %08x -> %08x\n", -+ dev->InterruptMask, Mask); -+ -+ SET_INT_MASK (dev, Mask); -+ spin_unlock (&dev->IntrLock); -+ -+ /* -+ * now process the list of operations -+ * we have -+ */ -+ for (op = head; op != NULL; op = next) -+ { -+ next = op->Next; -+ -+ op->Function (dev, op->Arguement); -+ -+ FreeHaltOperation (dev, op); -+ } -+ -+ spin_lock (&dev->IntrLock); -+ } -+} -+ -+int -+ComputePosition (ELAN_POSITION *pos, unsigned nodeId, unsigned numNodes, unsigned numDownLinksVal) -+{ -+ int i, lvl, n; -+ char numDownLinks[ELAN_MAX_LEVELS]; -+ -+ if (nodeId >= numNodes) -+ return (EINVAL); -+ -+ for (i = 0; i < ELAN_MAX_LEVELS; i++, numDownLinksVal >>= 4) -+ numDownLinks[i] = numDownLinksVal & 7; -+ -+ for (lvl = 0, n = numNodes; n > ((lvl % 3) == 2 ? 8 : 4) && lvl < ELAN_MAX_LEVELS; lvl++) -+ { -+ if (numDownLinks[lvl] == 0) -+ numDownLinks[lvl] = 4; -+ -+ if ((n % numDownLinks[lvl]) != 0) -+ return (EINVAL); -+ -+ n /= numDownLinks[lvl]; -+ } -+ -+ if (numDownLinks[lvl] == 0) -+ numDownLinks[lvl] = n; -+ -+ if (numDownLinks[lvl] != n) -+ return (EINVAL); -+ -+ for (i = 0; i <= lvl; i++) -+ pos->pos_arity[i] = numDownLinks[lvl - i]; -+ -+ pos->pos_nodes = numNodes; -+ pos->pos_levels = lvl + 1; -+ pos->pos_nodeid = nodeId; -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/elandev_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/elandev_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/elandev_linux.c 2005-06-01 23:12:54.582441688 -0400 -@@ -0,0 +1,2302 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: elandev_linux.c,v 1.102.2.4 2004/12/20 16:55:17 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandev_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,2,0) -+#error please use a 2.2 series kernel or newer -+#endif -+ -+/* Minor numbers encoded as : -+ * [5:0] device number -+ * [15:6] function number -+ */ -+#define ELAN3_DEVICE_MASK 0x3F -+ -+#define ELAN3_MINOR_CONTROL 0 -+#define ELAN3_MINOR_MEM 1 -+#define ELAN3_MINOR_USER 2 -+#define ELAN3_MINOR_SHIFT 6 -+ -+#define ELAN3_DEVICE(inode) (MINOR(inode->i_rdev) & ELAN3_DEVICE_MASK) -+#define ELAN3_MINOR(inode) (MINOR(inode->i_rdev) >> ELAN3_MINOR_SHIFT) -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+# define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -+# define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) -+typedef void irqreturn_t; -+#endif -+# define IRQ_NONE -+# define IRQ_HANDLED -+# define IRQ_RETVAL(x) -+#endif -+ -+ -+/* -+ * Function prototypes. -+ */ -+static int elanattach(int instance, struct pci_dev *pcidev); -+static int elandetach(int instance); -+ -+static int elan3_open (struct inode *inode, struct file *file); -+static int elan3_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg); -+static int elan3_mmap (struct file *file, struct vm_area_struct *vm_area); -+static int elan3_release (struct inode *inode, struct file *file); -+ -+static int elan3_reboot_event (struct notifier_block *self, unsigned long event, void *buffer); -+static int elan3_panic_event (struct notifier_block *self, unsigned long event, void *buffer); -+ -+static irqreturn_t InterruptHandlerWrapper(int irq, void *dev_id, struct pt_regs *regs); -+ -+static int ConfigurePci(ELAN3_DEV *dev); -+static int ResetElan(ELAN3_DEV *dev, ioaddr_t intPalAddr); -+ -+static void elan3_shutdown_devices(int panicing); -+ -+/* -+ * Globals. -+ */ -+static ELAN3_DEV *elan3_devices[ELAN3_MAX_CONTROLLER]; -+static int NodeId = ELAN3_INVALID_NODE; -+static int NumNodes; -+static int DownLinks; -+static int RandomRoutingDisabled; -+int BackToBackMaster; -+int BackToBackSlave; -+int enable_sdram_writecombining; -+int sdram_bank_limit; -+extern int LwpNice; -+ -+char * elan_reg_rec_file [ELAN_REG_REC_MAX]; -+int elan_reg_rec_line [ELAN_REG_REC_MAX]; -+long elan_reg_rec_lbolt[ELAN_REG_REC_MAX]; -+int elan_reg_rec_cpu [ELAN_REG_REC_MAX]; -+E3_uint32 elan_reg_rec_reg [ELAN_REG_REC_MAX]; -+int elan_reg_rec_index; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan3 Device Driver"); -+ -+MODULE_LICENSE("GPL"); -+ -+MODULE_PARM(NodeId,"i"); -+MODULE_PARM(NumNodes,"i"); -+MODULE_PARM(RandomRoutingDisabled,"i"); -+MODULE_PARM(DownLinks,"i"); -+MODULE_PARM(BackToBackMaster,"i"); -+MODULE_PARM(BackToBackSlave,"i"); -+MODULE_PARM(LwpNice, "i"); -+MODULE_PARM(elan3_debug, "i"); -+MODULE_PARM(elan3_debug_console, "i"); -+MODULE_PARM(elan3_debug_buffer, "i"); -+MODULE_PARM(elan3mmu_debug, "i"); -+MODULE_PARM(sdram_bank_limit, "i"); -+ -+/* elan3/os/context.c */ -+EXPORT_SYMBOL(elan3_alloc); -+EXPORT_SYMBOL(elan3_attach); -+EXPORT_SYMBOL(elan3_doattach); -+EXPORT_SYMBOL(elan3_free); -+EXPORT_SYMBOL(elan3_detach); -+EXPORT_SYMBOL(elan3_dodetach); -+EXPORT_SYMBOL(elan3_block_inputter); -+EXPORT_SYMBOL(CheckCommandQueueFlushed); -+ -+/* elan3/os/sdram.c */ -+EXPORT_SYMBOL(elan3_sdram_alloc); -+EXPORT_SYMBOL(elan3_sdram_free); -+EXPORT_SYMBOL(elan3_sdram_to_phys); -+EXPORT_SYMBOL(elan3_sdram_writeb); -+EXPORT_SYMBOL(elan3_sdram_writew); -+EXPORT_SYMBOL(elan3_sdram_writel); -+EXPORT_SYMBOL(elan3_sdram_writeq); -+EXPORT_SYMBOL(elan3_sdram_readb); -+EXPORT_SYMBOL(elan3_sdram_readw); -+EXPORT_SYMBOL(elan3_sdram_readl); -+EXPORT_SYMBOL(elan3_sdram_readq); -+EXPORT_SYMBOL(elan3_sdram_zerob_sdram); -+EXPORT_SYMBOL(elan3_sdram_zerow_sdram); -+EXPORT_SYMBOL(elan3_sdram_zerol_sdram); -+EXPORT_SYMBOL(elan3_sdram_zeroq_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyb_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyw_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyl_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyq_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyb_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyw_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyl_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyq_from_sdram); -+ -+/* elan3/os/tproc.c */ -+EXPORT_SYMBOL(DeliverTProcTrap); -+EXPORT_SYMBOL(HandleTProcTrap); -+EXPORT_SYMBOL(SaveThreadToStack); -+ -+/* elan3/os/tprocinsts.c */ -+EXPORT_SYMBOL(RollThreadToClose); -+ -+/* elan3/os/iproc.c */ -+EXPORT_SYMBOL(InspectIProcTrap); -+EXPORT_SYMBOL(IProcTrapString); -+EXPORT_SYMBOL(SimulateUnlockQueue); -+ -+/* elan3/os/cproc.c */ -+EXPORT_SYMBOL(HandleCProcTrap); -+ -+/* elan3/os/route_table.c */ -+EXPORT_SYMBOL(GenerateRoute); -+EXPORT_SYMBOL(LoadRoute); -+EXPORT_SYMBOL(InvalidateRoute); -+EXPORT_SYMBOL(ValidateRoute); -+EXPORT_SYMBOL(ClearRoute); -+EXPORT_SYMBOL(GenerateProbeRoute); -+EXPORT_SYMBOL(GenerateCheckRoute); -+ -+/* elan3/os/elandev_generic.c */ -+EXPORT_SYMBOL(elan3_debug); -+EXPORT_SYMBOL(QueueHaltOperation); -+EXPORT_SYMBOL(ReleaseHaltOperations); -+EXPORT_SYMBOL(ReserveHaltOperations); -+ -+/* elan3/vm/elan3mmu_generic.c */ -+EXPORT_SYMBOL(elan3mmu_pteload); -+EXPORT_SYMBOL(elan3mmu_unload); -+EXPORT_SYMBOL(elan3mmu_set_context_filter); -+EXPORT_SYMBOL(elan3mmu_reserve); -+EXPORT_SYMBOL(elan3mmu_attach); -+EXPORT_SYMBOL(elan3mmu_detach); -+EXPORT_SYMBOL(elan3mmu_release); -+/* elan3/vm/elan3mmu_linux.c */ -+EXPORT_SYMBOL(elan3mmu_phys_to_pte); -+EXPORT_SYMBOL(elan3mmu_kernel_invalid_pte); -+ -+/* elan3/os/elan3_debug.c */ -+EXPORT_SYMBOL(elan3_debugf); -+ -+/* elan3/os/minames.c */ -+EXPORT_SYMBOL(MiToName); -+ -+/* elan3/os/elandev_generic.c */ -+EXPORT_SYMBOL(MapDeviceRegister); -+EXPORT_SYMBOL(UnmapDeviceRegister); -+ -+EXPORT_SYMBOL(elan_reg_rec_lbolt); -+EXPORT_SYMBOL(elan_reg_rec_file); -+EXPORT_SYMBOL(elan_reg_rec_index); -+EXPORT_SYMBOL(elan_reg_rec_cpu); -+EXPORT_SYMBOL(elan_reg_rec_reg); -+EXPORT_SYMBOL(elan_reg_rec_line); -+ -+/* -+ * Standard device entry points. -+ */ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int elan3_dump_event (struct notifier_block *self, unsigned long event, void *buffer); -+ -+static struct notifier_block elan3_dump_notifier = -+{ -+ notifier_call: elan3_dump_event, -+ priority: 0, -+}; -+ -+static int -+elan3_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ( event == DUMP_BEGIN ) -+ elan3_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+ -+#endif -+ -+static struct file_operations elan3_fops = { -+ ioctl: elan3_ioctl, /* ioctl */ -+ mmap: elan3_mmap, /* mmap */ -+ open: elan3_open, /* open */ -+ release: elan3_release, /* release */ -+}; -+ -+static struct notifier_block elan3_reboot_notifier = -+{ -+ notifier_call: elan3_reboot_event, -+ priority: 0, -+}; -+ -+static struct notifier_block elan3_panic_notifier = -+{ -+ notifier_call: elan3_panic_event, -+ priority: 0, -+}; -+ -+ELAN3_DEV * -+elan3_device (int instance) -+{ -+ if (instance < 0 || instance >= ELAN3_MAX_CONTROLLER) -+ return ((ELAN3_DEV *) NULL); -+ return elan3_devices[instance]; -+} -+EXPORT_SYMBOL(elan3_device); -+ -+/* -+ * Called at rmmod time. elandetach() for each card + general cleanup. -+ */ -+#ifdef MODULE -+static void __exit elan3_exit(void) -+{ -+ int i; -+ -+ printk("elan: preparing to remove module\n"); -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&elan3_dump_notifier); -+#endif -+ unregister_reboot_notifier (&elan3_reboot_notifier); -+ notifier_chain_unregister (&panic_notifier_list, &elan3_panic_notifier); -+ -+ /* call elandetach() for each device configured. */ -+ for (i = 0; i < ELAN3_MAX_CONTROLLER; i++) -+ if (elan3_devices[i] != NULL) -+ elandetach(i); -+ -+ FinaliseNetworkErrorResolver(); -+ elan3mmu_fini(); -+ -+ cookie_fini(); -+ unregister_chrdev(ELAN3_MAJOR, ELAN3_NAME); -+ -+ elan3_procfs_fini(); -+ -+ printk("elan: module removed\n"); -+} -+ -+/* -+ * Called at insmod time. First we perform general driver initialization, -+ * then call elanattach() for each card. -+ */ -+#ifdef MODULE -+static int __init elan3_init(void) -+#else -+__initfunc(int elan3_init(void)) -+#endif -+{ -+ int e; -+ int boards; -+ struct pci_dev *dev; -+ char revid; -+ -+ elan_reg_rec_index=0; -+ { -+ int i; -+ for(i=0;islot_name); -+ else -+ { -+ if (boards < ELAN3_MAX_CONTROLLER) -+ /* Count successfully attached devices */ -+ boards += ((elanattach(boards, dev) == 0) ? 1 : 0); -+ else -+ { -+ printk ("elan: max controllers = %d\n", ELAN3_MAX_CONTROLLER); -+ break; -+ } -+ } -+ } -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&elan3_dump_notifier); -+#endif -+ register_reboot_notifier (&elan3_reboot_notifier); -+ notifier_chain_register (&panic_notifier_list, &elan3_panic_notifier); -+ -+ return 0; -+} -+ -+/* Declare the module init and exit functions */ -+module_init(elan3_init); -+module_exit(elan3_exit); -+ -+#endif -+ -+static void -+elan3_shutdown_devices(int panicing) -+{ -+ ELAN3_DEV *dev; -+ unsigned long flags; -+ register int i; -+ -+ local_irq_save (flags); -+ for (i = 0; i < ELAN3_MAX_CONTROLLER; i++) -+ { -+ if ((dev = elan3_devices[i]) != NULL) -+ { -+ if (! panicing) spin_lock (&dev->IntrLock); -+ -+ printk(KERN_INFO "elan%d: forcing link into reset\n", dev->Instance); -+ -+ /* -+ * We're going to set the link into boundary scan mode, so firstly -+ * set the inputters to discard everything. -+ */ -+ if (dev->DiscardAllCount++ == 0) -+ SetSchedStatusRegister (dev, read_reg32 (dev, Exts.InterruptReg), NULL); -+ -+ dev->LinkShutdown = 1; -+ -+ /* -+ * Now disable the error interrupts -+ */ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And set the link into boundary scan mode, and drive -+ * a reset token onto the link. -+ */ -+ SET_SCHED_LINK_VALUE (dev, 1, LinkResetToken); -+ -+ if (! panicing) spin_unlock (&dev->IntrLock); -+ } -+ } -+ local_irq_restore (flags); -+} -+ -+static int -+elan3_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (! (event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ return (NOTIFY_DONE); -+ -+ elan3_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+ -+static int -+elan3_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ elan3_shutdown_devices (TRUE); -+ -+ return (NOTIFY_DONE); -+} -+ -+#include -+/* -+ * Called by init_module() for each card discovered on PCI. -+ */ -+static int -+elanattach(int instance, struct pci_dev *pcidev) -+{ -+ ELAN3_DEV *dev; -+ int ramSize; -+ int level; -+ ioaddr_t sdramAddr, cmdPortAddr, intPalAddr; -+ DeviceMappingHandle handle; -+ -+ printk("elan%d: attach, irq=%d\n", instance, pcidev->irq); -+ -+ /* -+ * Allocate the ELAN3_DEV structure. -+ */ -+ KMEM_ZALLOC(dev, ELAN3_DEV *, sizeof(ELAN3_DEV), TRUE); -+ if (dev == NULL) { -+ printk ("elan%d: KMEM_ALLOC failed\n", instance); -+ return (-ENOMEM); -+ } -+ elan3_devices[instance] = dev; -+ dev->Osdep.pci = pcidev; -+ -+ dev->Instance = instance; -+ -+ /* Initialise the device information */ -+ pci_read_config_word (pcidev, PCI_VENDOR_ID, &dev->Devinfo.dev_vendor_id); -+ pci_read_config_word (pcidev, PCI_DEVICE_ID, &dev->Devinfo.dev_device_id); -+ pci_read_config_byte (pcidev, PCI_REVISION_ID, &dev->Devinfo.dev_revision_id); -+ -+ dev->Devinfo.dev_instance = instance; -+ dev->Devinfo.dev_rail = instance; -+ dev->Devinfo.dev_driver_version = 0; -+ dev->Devinfo.dev_num_down_links_value = DownLinks; -+ -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ dev->Position.pos_random_disabled = RandomRoutingDisabled; -+ -+ /* -+ * Set up PCI config regs. -+ */ -+ if (ConfigurePci(dev) != ESUCCESS) -+ goto fail0; -+ -+ /* -+ * Determine the PFnums of the SDRAM and command port -+ */ -+ if (MapDeviceRegister(dev, ELAN3_BAR_SDRAM, &sdramAddr, 0, PAGESIZE, &handle) != ESUCCESS) -+ goto fail1; -+ -+ DeviceRegisterSize(dev, ELAN3_BAR_SDRAM, &ramSize); -+ -+ dev->SdramPhysMask = ~((physaddr_t) ramSize - 1); -+ dev->SdramPhysBase = kmem_to_phys((void *) sdramAddr); -+ -+ UnmapDeviceRegister (dev, &handle); -+ -+#if defined(LINUX_ALPHA) -+ /* -+ * consider a physical address to be on the same pci bus -+ * as us if it's physical address is "close" to our sdram -+ * physical address. -+ * this is almost certainly incorrect for large memory (> 2Gb) -+ * i386 machines - and is only correct for alpha for 32 bit -+ * base address registers. -+ * -+ * Modified this to match the Tru64 driver value; -+ * i.e. PciPhysMask = 0xfffffffffffc0000 -+ */ -+# define PCI_ADDR_MASK (0x7FFFFFFFl) -+ -+ dev->PciPhysMask = ~PCI_ADDR_MASK; -+ dev->PciPhysBase = dev->SdramPhysBase & dev->PciPhysMask; -+#endif -+ /* -+ * Now reset the elan chip. -+ */ -+ if (MapDeviceRegister(dev, ELAN3_BAR_REGISTERS, &dev->RegPtr, 0, 0, &dev->RegHandle) != ESUCCESS) -+ goto fail1; -+ -+ if (MapDeviceRegister(dev, ELAN3_BAR_EBUS, &intPalAddr, ELAN3_EBUS_INTPAL_OFFSET, PAGESIZE, -+ &handle) != ESUCCESS) -+ goto fail2; -+ -+ ResetElan(dev, intPalAddr); -+ -+ UnmapDeviceRegister (dev, &handle); -+ -+ /* -+ * Initialise the device mutex's which must be accessible from the -+ * interrupt handler. -+ */ -+ kcondvar_init (&dev->IntrWait); -+ spin_lock_init (&dev->IntrLock); -+ spin_lock_init (&dev->TlbLock); -+ spin_lock_init (&dev->CProcLock); -+ spin_lock_init (&dev->FreeHaltLock); -+ for(level=0; level<4; level++) -+ spin_lock_init (&dev->Level[level].PtblLock); -+ spin_lock_init (&dev->PtblGroupLock); -+ -+ /* -+ * Add the interrupt handler, -+ */ -+ if (request_irq(dev->Osdep.pci->irq, InterruptHandlerWrapper, -+ SA_SHIRQ, "elan3", dev) != 0) { -+ printk ("elan%d: request_irq failed\n", instance); -+ goto fail3; -+ } -+ -+ if (MapDeviceRegister(dev, ELAN3_BAR_COMMAND_PORT, &cmdPortAddr, 0, PAGESIZE, &handle) != ESUCCESS) -+ goto fail4; -+ -+ if (InitialiseElan(dev, cmdPortAddr) == EFAIL) { -+ printk ("elan%d: InitialiseElan failed\n", instance); -+ UnmapDeviceRegister (dev, &handle); -+ goto fail4; -+ } -+ UnmapDeviceRegister (dev, &handle); -+ -+ /* If our nodeid is defined, then set it now */ -+ if (NodeId != ELAN3_INVALID_NODE && ComputePosition (&dev->Position, NodeId, NumNodes, DownLinks) == 0) -+ { -+ if (RandomRoutingDisabled & ((1 << (dev->Position.pos_levels-1))-1)) -+ printk ("elan%d: NodeId=%d NodeLevel=%d NumNodes=%d (random routing disabled 0x%x)\n", -+ dev->Instance, dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes, RandomRoutingDisabled); -+ else -+ printk ("elan%d: NodeId=%d NodeLevel=%d NumNodes=%d (random routing ok)\n", -+ dev->Instance, dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes); -+ } -+ -+ if (BackToBackMaster || BackToBackSlave) -+ { -+ dev->Position.pos_mode = ELAN_POS_MODE_BACKTOBACK; -+ dev->Position.pos_nodeid = (BackToBackMaster == 0); -+ dev->Position.pos_nodes = 2; -+ dev->Position.pos_levels = 1; -+ dev->Position.pos_arity[0] = 2; -+ -+ printk ("elan%d: back-to-back %s - elan node %d\n", dev->Instance, -+ BackToBackMaster ? "master" : "slave", dev->Position.pos_nodeid); -+ } -+ -+ elan3_procfs_device_init (dev); -+ -+ /* Success */ -+ return (0); -+ -+fail4: -+ free_irq(dev->Osdep.pci->irq, dev); -+ -+fail3: -+ kcondvar_destroy (&dev->IntrWait); -+ spin_lock_destroy (&dev->IntrLock); -+ spin_lock_destroy (&dev->InfoLock); -+ spin_lock_destroy (&dev->TlbLock); -+ spin_lock_destroy (&dev->CProcLock); -+ spin_lock_destroy (&dev->FreeHaltLock); -+ spin_lock_destroy (&dev->Level1PtblLock); -+ spin_lock_destroy (&dev->Level2PtblLock); -+ spin_lock_destroy (&dev->Level3PtblLock); -+ spin_lock_destroy (&dev->PtblGroupLock); -+ -+fail2: -+ UnmapDeviceRegister (dev, &dev->RegHandle); -+ -+fail1: -+ pci_disable_device (dev->Osdep.pci); -+fail0: -+ KMEM_FREE(dev, sizeof(ELAN3_DEV)); -+ -+ elan3_devices[instance] = NULL; -+ -+ /* Failure */ -+ return (-ENODEV); -+} -+ -+/* -+ * Called by elan3_exit() for each board found on PCI. -+ */ -+static int -+elandetach(int instance) -+{ -+ ELAN3_DEV *dev = elan3_devices[instance]; -+ -+ printk("elan%d: detach\n", instance); -+ -+ elan3_procfs_device_fini (dev); -+ -+ FinaliseElan (dev); -+ -+ UnmapDeviceRegister (dev, &dev->RegHandle); -+ -+ free_irq(dev->Osdep.pci->irq, dev); -+ -+ pci_disable_device(dev->Osdep.pci); -+ -+ kcondvar_destroy (&dev->IntrWait); -+ spin_lock_destroy (&dev->IntrLock); -+ spin_lock_destroy (&dev->InfoLock); -+ spin_lock_destroy (&dev->TlbLock); -+ spin_lock_destroy (&dev->CProcLock); -+ spin_lock_destroy (&dev->FreeHaltLock); -+ spin_lock_destroy (&dev->Level1PtblLock); -+ spin_lock_destroy (&dev->Level2PtblLock); -+ spin_lock_destroy (&dev->Level3PtblLock); -+ spin_lock_destroy (&dev->PtblGroupLock); -+ -+ KMEM_FREE(dev, sizeof(ELAN3_DEV)); -+ elan3_devices[instance] = NULL; -+ -+ return 0; -+} -+ -+/* -+ * generic ioctls - available on control and user devices. -+ */ -+ -+static int -+device_stats_ioctl (ELAN3_DEV *dev, unsigned long arg) -+{ -+ ELAN3IO_STATS_STRUCT *args; -+ -+ KMEM_ALLOC(args, ELAN3IO_STATS_STRUCT *, sizeof(ELAN3IO_STATS_STRUCT), TRUE); -+ -+ if (args == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN3IO_STATS_STRUCT))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ switch (args->which) -+ { -+ case ELAN3_SYS_STATS_DEVICE: -+ if (copy_to_user (args->ptr, &dev->Stats, sizeof (ELAN3_STATS))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (0); -+ -+ case ELAN3_SYS_STATS_MMU: -+ if (copy_to_user (args->ptr, &elan3mmu_global_stats, sizeof (ELAN3MMU_GLOBAL_STATS))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (0); -+ -+ default: -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EINVAL); -+ } -+} -+ -+/* -+ * /dev/elan3/controlX - control device -+ * -+ */ -+ -+typedef struct control_private -+{ -+ u_int pr_boundary_scan; -+} CONTROL_PRIVATE; -+ -+static int -+control_open (struct inode *inode, struct file *file) -+{ -+ CONTROL_PRIVATE *pr; -+ -+ KMEM_ALLOC(pr, CONTROL_PRIVATE *, sizeof (CONTROL_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_boundary_scan = 0; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ -+ return (0); -+} -+ -+static int -+control_release (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ -+ if (pr->pr_boundary_scan) -+ ClearLinkBoundaryScan(dev); -+ -+ KMEM_FREE (pr, sizeof(CONTROL_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+control_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ int res; -+ -+ switch (cmd) -+ { -+ case ELAN3IO_SET_BOUNDARY_SCAN: -+ if (SetLinkBoundaryScan (dev) == 0) -+ pr->pr_boundary_scan = 1; -+ return (0); -+ -+ case ELAN3IO_CLEAR_BOUNDARY_SCAN: -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ pr->pr_boundary_scan = 0; -+ -+ ClearLinkBoundaryScan (dev); -+ return (0); -+ -+ case ELAN3IO_READ_LINKVAL: -+ { -+ E3_uint32 val; -+ -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ if (copy_from_user(&val, (E3_uint32 *)arg, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ val = ReadBoundaryScanValue (dev, val); -+ -+ if (copy_to_user((E3_uint32 *)arg, &val, sizeof(E3_uint32))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN3IO_WRITE_LINKVAL: -+ { -+ E3_uint32 val; -+ -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ if (copy_from_user(&val, (E3_uint32 *)arg, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ val = WriteBoundaryScanValue (dev, val); -+ -+ if (copy_to_user((E3_uint32 *)arg, &val, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN3IO_SET_POSITION: -+ { -+ ELAN3IO_SET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ if (ComputePosition (&dev->Position, args.nodeId, args.numNodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ return (-EINVAL); -+ -+ return (0); -+ } -+ -+ case ELAN3IO_SET_DEBUG: -+ { -+ ELAN3IO_SET_DEBUG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SET_DEBUG_STRUCT))) -+ return (-EFAULT); -+ -+ if (! strcmp (args.what, "elan3_debug")) -+ elan3_debug = args.value; -+ else if (! strcmp (args.what, "elan3_debug_console")) -+ elan3_debug_console = args.value; -+ else if (! strcmp (args.what, "elan3_debug_buffer")) -+ elan3_debug_buffer = args.value; -+ else if (! strcmp (args.what, "elan3_debug_ignore_dev")) -+ elan3_debug_ignore_dev = args.value; -+ else if (! strcmp (args.what, "elan3_debug_ignore_ctxt")) -+ elan3_debug_ignore_ctxt = args.value; -+ else if (! strcmp (args.what, "elan3mmu_debug")) -+ elan3mmu_debug = args.value; -+ -+ return (0); -+ } -+ -+ case ELAN3IO_NETERR_SERVER: -+ { -+ ELAN3IO_NETERR_SERVER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_NETERR_SERVER_STRUCT))) -+ return (-EFAULT); -+ -+ res = AddNeterrServerSyscall (args.elanid, args.addr, args.name, NULL); -+ return (set_errno (res)); -+ } -+ -+ case ELAN3IO_NETERR_FIXUP: -+ { -+ NETERR_MSG *msg; -+ -+ KMEM_ALLOC(msg, NETERR_MSG *, sizeof (NETERR_MSG), TRUE); -+ -+ if (msg == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (msg, (void *) arg, sizeof (NETERR_MSG))) -+ res = EFAULT; -+ else -+ res = ExecuteNetworkErrorFixup (msg); -+ -+ KMEM_FREE (msg, sizeof (NETERR_MSG)); -+ return (set_errno (res)); -+ } -+ -+ case ELAN3IO_STATS: -+ return (device_stats_ioctl (dev, arg)); -+ -+ case ELAN3IO_GET_DEVINFO: -+ { -+ if (copy_to_user ((void *) arg, &dev->Devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN3IO_GET_POSITION: -+ { -+ if (copy_to_user ((void *) arg, &dev->Position, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ return (0); -+ } -+ default: -+ return (-EINVAL); -+ } -+} -+ -+static int -+control_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(file->f_dentry->d_inode)]; -+ int space = OFF_TO_SPACE(vma->vm_pgoff << PAGE_SHIFT); -+ int off = OFF_TO_OFFSET(vma->vm_pgoff << PAGE_SHIFT); -+ int size; -+ ioaddr_t addr; -+ DeviceMappingHandle handle; -+ physaddr_t phys; -+ -+ if (space < ELAN3_BAR_SDRAM || space > ELAN3_BAR_EBUS) -+ return (-EINVAL); -+ -+ if (off < 0 || DeviceRegisterSize (dev, space, &size) != ESUCCESS || off > size) -+ return (-EINVAL); -+ -+ if (MapDeviceRegister(dev, space, &addr, off, PAGESIZE, &handle) != ESUCCESS) -+ return (-EINVAL); -+ -+ phys = kmem_to_phys((caddr_t) addr); -+ UnmapDeviceRegister(dev, &handle); -+ -+#ifdef NO_RMAP -+ if (remap_page_range(vma->vm_start, phys, vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#else -+ if (remap_page_range(vma, vma->vm_start, phys, vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#endif -+ return (-EAGAIN); -+ -+ return (0); -+} -+ -+/* -+ * /dev/elan3/sdramX - sdram access device -+ */ -+typedef struct mem_page -+{ -+ struct mem_page *pg_next; -+ sdramaddr_t pg_addr; -+ u_long pg_pgoff; -+ u_int pg_ref; -+} MEM_PAGE; -+ -+#define MEM_HASH_SIZE 32 -+#define MEM_HASH(pgoff) ((pgoff) & (MEM_HASH_SIZE-1)) -+ -+typedef struct mem_private -+{ -+ ELAN3_DEV *pr_dev; -+ MEM_PAGE *pr_pages[MEM_HASH_SIZE]; -+ spinlock_t pr_lock; -+} MEM_PRIVATE; -+ -+static void -+mem_freepage (MEM_PRIVATE *pr, MEM_PAGE *pg) -+{ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_freepage: pr=%p pgoff=%lx pg=%p ref=%d\n", pr, pg->pg_pgoff, pg, pg->pg_ref); -+ -+ elan3_sdram_free (pr->pr_dev, pg->pg_addr, PAGE_SIZE); -+ KMEM_FREE (pg, sizeof(MEM_PAGE)); -+} -+ -+static MEM_PAGE * -+mem_getpage (MEM_PRIVATE *pr, u_long pgoff, virtaddr_t addr) -+{ -+ int hashval = MEM_HASH (pgoff); -+ MEM_PAGE *npg = NULL; -+ MEM_PAGE *pg; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_getpage: pr=%p pgoff=%lx addr=%lx\n", pr, pgoff, addr); -+ -+ again: -+ spin_lock (&pr->pr_lock); -+ for (pg = pr->pr_pages[hashval]; pg; pg = pg->pg_next) -+ if (pg->pg_pgoff == pgoff) -+ break; -+ -+ if (pg != NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_getpage: pr=%p pgoff=%lx addr=%lx -> found %p addr=%lx\n", pr, pgoff, addr, pg, pg->pg_addr); -+ -+ pg->pg_ref++; -+ spin_unlock (&pr->pr_lock); -+ -+ if (npg != NULL) /* we'd raced and someone else had created */ -+ mem_freepage (pr, npg); /* this page - so free of our new one*/ -+ return (pg); -+ } -+ -+ if (npg != NULL) /* didn't find the page, so inset the */ -+ { /* new one we've just created */ -+ npg->pg_next = pr->pr_pages[hashval]; -+ pr->pr_pages[hashval] = npg; -+ -+ spin_unlock (&pr->pr_lock); -+ return (npg); -+ } -+ -+ spin_unlock (&pr->pr_lock); /* drop spinlock before creating a new page */ -+ -+ KMEM_ALLOC(npg, MEM_PAGE *, sizeof (MEM_PAGE), TRUE); -+ -+ if (npg == NULL) -+ return (NULL); -+ -+ if ((npg->pg_addr = elan3_sdram_alloc (pr->pr_dev, PAGE_SIZE)) == 0) -+ { -+ KMEM_FREE (npg, sizeof (MEM_PAGE)); -+ return (NULL); -+ } -+ -+ /* zero the page before returning it to the user */ -+ elan3_sdram_zeroq_sdram (pr->pr_dev, npg->pg_addr, PAGE_SIZE); -+ -+ npg->pg_pgoff = pgoff; -+ npg->pg_ref = 1; -+ -+ /* created a new page - so have to rescan before inserting it */ -+ goto again; -+} -+ -+static void -+mem_droppage (MEM_PRIVATE *pr, u_long pgoff, int dontfree) -+{ -+ MEM_PAGE **ppg; -+ MEM_PAGE *pg; -+ -+ spin_lock (&pr->pr_lock); -+ for (ppg = &pr->pr_pages[MEM_HASH(pgoff)]; *ppg; ppg = &(*ppg)->pg_next) -+ if ((*ppg)->pg_pgoff == pgoff) -+ break; -+ -+ pg = *ppg; -+ -+ ASSERT (*ppg != NULL); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_droppage: pr=%p pgoff=%lx pg=%p ref=%d dontfree=%d\n", pr, pgoff, (*ppg), (*ppg)->pg_ref, dontfree); -+ -+ if (--pg->pg_ref == 0 && !dontfree) -+ { -+ *ppg = pg->pg_next; -+ -+ mem_freepage (pr, pg); -+ } -+ -+ spin_unlock (&pr->pr_lock); -+} -+ -+static int -+mem_open (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ MEM_PRIVATE *pr; -+ register int i; -+ -+ KMEM_ALLOC(pr, MEM_PRIVATE *, sizeof (MEM_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ spin_lock_init (&pr->pr_lock); -+ pr->pr_dev = dev; -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ pr->pr_pages[i] = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+mem_release (struct inode *node, struct file *file) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg, *next; -+ int i; -+ -+ /* free off any pages that we'd allocated */ -+ spin_lock (&pr->pr_lock); -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ { -+ for (pg = pr->pr_pages[i]; pg; pg = next) -+ { -+ next = pg->pg_next; -+ mem_freepage (pr, pg); -+ } -+ } -+ spin_unlock (&pr->pr_lock); -+ -+ KMEM_FREE (pr, sizeof (MEM_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+mem_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ return (-EINVAL); -+} -+ -+static void mem_vma_open(struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ preemptable_start { -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) { -+ mem_getpage (pr, pgoff, addr); -+ preemptable_check(); -+ } -+ } preemptable_end; -+} -+ -+static void mem_vma_close(struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the call to close may not have the same vm_start/vm_end values as -+ * were passed into mmap()/open() - since if an partial unmap had occured -+ * then the vma could have been shrunk or even split. -+ * -+ * if a the vma is split then an vma_open() will be called for the top -+ * portion - thus causing the reference counts to become incorrect. -+ * -+ * We drop the reference to any pages we're notified about - so they get freed -+ * earlier than when the device is finally released. -+ */ -+ for (pgoff = vma->vm_pgoff, addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_droppage (pr, pgoff, 0); -+} -+ -+static struct vm_operations_struct mem_vm_ops = { -+ open: mem_vma_open, -+ close: mem_vma_close, -+}; -+ -+static int -+mem_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: vm_mm=%p start=%lx end=%lx pgoff=%lx prot=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_page_prot.pgprot , file); -+ -+ preemptable_start { -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ if ((pg = mem_getpage (pr, pgoff, addr)) == NULL) -+ goto failed; -+ -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ -+#if defined(__ia64__) -+ if (enable_sdram_writecombining) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+#endif -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: addr %lx -> pg=%p addr=%lx phys=%lx flags=%lx prot=%lx\n", -+ addr, pg, pg->pg_addr, elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), vma->vm_flags, vma->vm_page_prot.pgprot); -+ -+#ifdef NO_RMAP -+ if (remap_page_range (addr, elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), PAGE_SIZE, vma->vm_page_prot)) -+#else -+ if (remap_page_range (vma, addr, elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), PAGE_SIZE, vma->vm_page_prot)) -+#endif -+ { -+ mem_droppage (pr, pgoff, 0); /* drop our reference to this page */ -+ goto failed; -+ } -+ -+ preemptable_check(); -+ } -+ } preemptable_end; -+ -+ /* Don't try to swap out Elan SDRAM pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump SDRAM pages to a core file -+ * (Pity I would really like to do this but it crashes in elf_core_dump() as -+ * it can only handle pages that are in the mem_map area (addy 11/01/2002)) -+ */ -+ vma->vm_flags |= VM_IO; -+ -+ vma->vm_ops = &mem_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+ -+ failed: -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: failed\n"); -+ -+ /* free of any pages we've already allocated/referenced */ -+ while ((--pgoff) >= vma->vm_pgoff) -+ mem_droppage (pr, pgoff, 0); -+ -+ return (-ENOMEM); -+} -+ -+/* -+ * /dev/elan3/userX - control device -+ * -+ * "user_private" can be referenced from a number of places -+ * 1) the "file" structure. -+ * 2) the "mm" coproc ops -+ * 3) the "mmap" of the command port. -+ * -+ */ -+typedef struct user_private -+{ -+ spinlock_t pr_lock; -+ atomic_t pr_mappings; -+ atomic_t pr_ref; -+ ELAN3_CTXT *pr_ctxt; -+ struct mm_struct *pr_mm; -+ coproc_ops_t pr_coproc; -+} USER_PRIVATE; -+ -+static void -+user_free (USER_PRIVATE *pr) -+{ -+ /* Have to unreserve the FlagPage or else we leak memory like a sieve! */ -+ ClearPageReserved(pte_page(*find_pte_kernel((unsigned long) pr->pr_ctxt->FlagPage))); -+ -+ elan3_detach(pr->pr_ctxt); -+ elan3_free (pr->pr_ctxt); -+ -+ KMEM_FREE (pr, sizeof(USER_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+static void -+user_coproc_release (void *arg, struct mm_struct *mm) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF3 (pr->pr_ctxt, DBG_SEG, "user_coproc_release: ctxt=%p pr=%p ref=%d\n", -+ pr->pr_ctxt, pr, atomic_read (&pr->pr_ref)); -+ -+ elan3mmu_pte_ctxt_unload (pr->pr_ctxt->Elan3mmu); -+ -+ pr->pr_mm = NULL; -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+} -+ -+static void -+user_coproc_sync_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_coproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_coproc_invalidate_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_coproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_coproc_update_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ ASSERT(start <= end && ((start & PAGEOFFSET) == 0) && ((end & PAGEOFFSET) == 0)); -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_coproc_update_range: start=%lx end=%lx\n", start, end); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu, mm,(caddr_t) start, end-start); -+} -+ -+static void -+user_coproc_change_protection (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_coproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_coproc_sync_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_coproc_sync_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+static void -+user_coproc_invalidate_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_coproc_invalidate_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+static void -+user_coproc_update_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_coproc_update_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu,vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+int -+user_ptrack_handler (void *arg, int phase, struct task_struct *child) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ -+ PRINTF5 (pr->pr_ctxt, DBG_FN, "user_ptrack_handler: ctxt=%p pr=%p ref=%d phase %d mm->ref %d\n", -+ pr->pr_ctxt, pr, atomic_read (&pr->pr_ref), phase, atomic_read (¤t->mm->mm_count)); -+ -+ if (phase == PTRACK_PHASE_EXIT) -+ { -+ /* this will force the helper thread to exit */ -+ elan3_swapout (ctxt, CTXT_EXITING); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+ } -+ return PTRACK_FINISHED; -+} -+ -+static int -+user_open (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ USER_PRIVATE *pr; -+ ELAN3_CTXT *ctxt; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC(pr, USER_PRIVATE *, sizeof (USER_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ if ((ctxt = elan3_alloc (dev, 0)) == NULL) -+ { -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ if (sys_init (ctxt) == NULL) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ /* initialise refcnt to 3 - one for "file", one for XA handler, one for the coproc ops */ -+ atomic_set (&pr->pr_ref, 3); -+ -+ atomic_set (&pr->pr_mappings, 0); -+ spin_lock_init (&pr->pr_lock); -+ -+ pr->pr_ctxt = ctxt; -+ pr->pr_mm = current->mm; -+ -+ /* register an ptrack handler to force the helper thread to exit when we do */ -+ if (ptrack_register (user_ptrack_handler, pr) < 0) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ /* register a coproc callback to notify us of translation changes */ -+ -+ pr->pr_coproc.arg = (void *) pr; -+ pr->pr_coproc.release = user_coproc_release; -+ pr->pr_coproc.sync_range = user_coproc_sync_range; -+ pr->pr_coproc.invalidate_range = user_coproc_invalidate_range; -+ pr->pr_coproc.update_range = user_coproc_update_range; -+ pr->pr_coproc.change_protection = user_coproc_change_protection; -+ pr->pr_coproc.sync_page = user_coproc_sync_page; -+ pr->pr_coproc.invalidate_page = user_coproc_invalidate_page; -+ pr->pr_coproc.update_page = user_coproc_update_page; -+ -+ spin_lock (¤t->mm->page_table_lock); -+ register_coproc_ops (current->mm, &pr->pr_coproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ file->private_data = (void *) pr; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_FN, "user_open: done ctxt=%p pr=%p\n", ctxt, pr); -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+user_release (struct inode *inode, struct file *file) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ -+ PRINTF3 (pr->pr_ctxt, DBG_FN, "user_release: ctxt=%p pr=%p ref=%d\n", pr->pr_ctxt, pr, -+ atomic_read (&pr->pr_ref)); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+ -+ return (0); -+} -+ -+static int -+user_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ int res = 0; -+ -+ if (current->mm != pr->pr_mm) -+ return (-EINVAL); -+ -+ PRINTF4 (ctxt, DBG_FN, "user_ioctl: ctxt=%p cmd=%x(%d) arg=%lx\n", ctxt, cmd, _IOC_NR(cmd), arg); -+ -+ switch (cmd) -+ { -+ case ELAN3IO_FREE: -+ if (atomic_read (&pr->pr_mappings) > 0) -+ return (-EINVAL); -+ -+ spin_lock (¤t->mm->page_table_lock); -+ if (pr->pr_mm != current->mm) -+ spin_unlock (¤t->mm->page_table_lock); -+ else -+ { -+ unregister_coproc_ops (current->mm, &pr->pr_coproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ user_coproc_release (pr, current->mm); -+ } -+ -+ if (ptrack_registered (user_ptrack_handler, pr)) -+ { -+ ptrack_deregister (user_ptrack_handler, pr); -+ user_ptrack_handler (pr, PTRACK_PHASE_EXIT, NULL); -+ } -+ break; -+ -+ case ELAN3IO_ATTACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ KMEM_ALLOC(cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), TRUE); -+ -+ if (cap == NULL) -+ return (set_errno (EFAULT)); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = EFAULT; -+ else -+ { -+ if ((res = elan3_attach (ctxt, cap)) == 0) -+ { -+ if (copy_to_user ((void *) arg, cap, sizeof (ELAN_CAPABILITY))) -+ { -+ elan3_detach (ctxt); -+ res = EFAULT; -+ } -+ } -+ } -+ KMEM_FREE (cap, sizeof(ELAN_CAPABILITY)); -+ break; -+ } -+ -+ case ELAN3IO_DETACH: -+ spin_lock (&pr->pr_lock); -+ if (atomic_read (&pr->pr_mappings) > 0) -+ res = EINVAL; -+ else -+ elan3_detach (ctxt); -+ spin_unlock (&pr->pr_lock); -+ break; -+ -+ case ELAN3IO_ADDVP: -+ { -+ ELAN3IO_ADDVP_STRUCT *args; -+ -+ KMEM_ALLOC(args, ELAN3IO_ADDVP_STRUCT *, sizeof (ELAN3IO_ADDVP_STRUCT), TRUE); -+ -+ if (args == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN3IO_ADDVP_STRUCT))) -+ res = EFAULT; -+ else -+ { -+ if ( (res=elan3_addvp (ctxt, args->process, &args->capability)) != 0) -+ PRINTF0 (ctxt, DBG_FN, "ELAN3IO_ADDVP elan3_addvp failed \n"); -+ } -+ -+ KMEM_FREE (args, sizeof (ELAN3IO_ADDVP_STRUCT)); -+ break; -+ } -+ -+ case ELAN3IO_REMOVEVP: -+ res = elan3_removevp (ctxt, arg); -+ break; -+ -+ case ELAN3IO_BCASTVP: -+ { -+ ELAN3IO_BCASTVP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_BCASTVP_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_addbcastvp (ctxt, args.process, args.lowvp, args.highvp); -+ break; -+ } -+ -+ case ELAN3IO_LOAD_ROUTE: -+ { -+ ELAN3IO_LOAD_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_load_route (ctxt, args.process, args.flits); -+ break; -+ } -+ -+ case ELAN3IO_CHECK_ROUTE: -+ { -+ ELAN3IO_CHECK_ROUTE_STRUCT args; -+ -+ args.routeError = 0; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = elan3_check_route (ctxt, args.process, args.flits, & args.routeError)) == ESUCCESS) -+ { -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ } -+ break; -+ } -+ -+ case ELAN3IO_PROCESS_2_LOCATION: -+ { -+ ELAN3IO_PROCESS_2_LOCATION_STRUCT args; -+ ELAN_LOCATION loc; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_PROCESS_2_LOCATION_STRUCT))) -+ return (-EFAULT); -+ -+ krwlock_write (&ctxt->VpLock); -+ loc = ProcessToLocation (ctxt, NULL, args.process , NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ args.loc = loc; -+ -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_PROCESS_2_LOCATION_STRUCT))) -+ return (-EFAULT); -+ -+ break; -+ } -+ -+ case ELAN3IO_GET_ROUTE: -+ { -+ ELAN3IO_GET_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = elan3_get_route (ctxt, args.process, args.flits)) == ESUCCESS) -+ { -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_GET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ } -+ break; -+ } -+ -+ case ELAN3IO_RESET_ROUTE: -+ { -+ ELAN3IO_RESET_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_RESET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_reset_route (ctxt, args.process); -+ break; -+ } -+ -+ case ELAN3IO_VP2NODEID: -+ { -+ ELAN3IO_VP2NODEID_STRUCT *vp2nodeId; -+ ELAN_LOCATION location; -+ -+ KMEM_ALLOC (vp2nodeId, ELAN3IO_VP2NODEID_STRUCT *, sizeof(ELAN3IO_VP2NODEID_STRUCT), TRUE); -+ if (vp2nodeId == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (vp2nodeId, (void *) arg, sizeof (ELAN3IO_VP2NODEID_STRUCT))) { -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ krwlock_write (&ctxt->VpLock); -+ location = ProcessToLocation (ctxt, NULL, vp2nodeId->process , NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ vp2nodeId->nodeId = location.loc_node; -+ if (copy_to_user ( (void *) arg, vp2nodeId, sizeof (ELAN3IO_VP2NODEID_STRUCT))) { -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ -+ break; -+ } -+ -+ case ELAN3IO_PROCESS: -+ return (elan3_process (ctxt)); -+ -+ case ELAN3IO_SETPERM: -+ { -+ ELAN3IO_SETPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SETPERM_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3mmu_setperm (ctxt->Elan3mmu, args.maddr, args.eaddr, args.len, args.perm); -+ break; -+ } -+ -+ case ELAN3IO_CLEARPERM: -+ { -+ ELAN3IO_CLEARPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_CLEARPERM_STRUCT))) -+ return (-EFAULT); -+ -+ elan3mmu_clrperm (ctxt->Elan3mmu, args.eaddr, args.len); -+ break; -+ } -+ -+ case ELAN3IO_CHANGEPERM: -+ { -+ ELAN3IO_CHANGEPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_CHANGEPERM_STRUCT))) -+ return (-EFAULT); -+ -+ res = EINVAL; -+ break; -+ } -+ -+ case ELAN3IO_HELPER_THREAD: -+ res = elan3_lwp (ctxt); -+ break; -+ -+ case ELAN3IO_WAITCOMMAND: -+ res = WaitForCommandPort (ctxt); -+ break; -+ -+ case ELAN3IO_BLOCK_INPUTTER: -+ elan3_block_inputter (ctxt, arg); -+ break; -+ -+ case ELAN3IO_SET_FLAGS: -+ sctx->Flags = arg; -+ break; -+ -+ case ELAN3IO_SET_SIGNAL: -+ sctx->signal = arg; -+ break; -+ -+ case ELAN3IO_WAITEVENT: -+ res = sys_waitevent (ctxt, (E3_Event *) arg); -+ break; -+ -+ case ELAN3IO_ALLOC_EVENTCOOKIE: -+ res = cookie_alloc_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_FREE_EVENTCOOKIE: -+ res = cookie_free_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_ARM_EVENTCOOKIE: -+ res = cookie_arm_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_WAIT_EVENTCOOKIE: -+ res = cookie_wait_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_SWAPSPACE: -+ if (fuword (&((SYS_SWAP_SPACE *) arg)->Magic) != SYS_SWAP_MAGIC) -+ return (set_errno (EINVAL)); -+ -+ ((SYS_CTXT *) ctxt->Private)->Swap = (SYS_SWAP_SPACE *) arg; -+ break; -+ -+ case ELAN3IO_EXCEPTION_SPACE: -+ if (fuword (&((SYS_EXCEPTION_SPACE *) arg)->Magic) != SYS_EXCEPTION_MAGIC) -+ return (set_errno (EINVAL)); -+ -+ ((SYS_CTXT *) ctxt->Private)->Exceptions = (SYS_EXCEPTION_SPACE *) arg; -+ break; -+ -+ case ELAN3IO_GET_EXCEPTION: -+ { -+ SYS_EXCEPTION *exception; -+ -+ if (((SYS_CTXT *) ctxt->Private)->Exceptions == NULL) -+ return (set_errno (EINVAL)); -+ -+ KMEM_ALLOC(exception, SYS_EXCEPTION *, sizeof (SYS_EXCEPTION), TRUE); -+ -+ if (exception == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if ((res = sys_getException (((SYS_CTXT *) ctxt->Private), exception)) == 0 && -+ copy_to_user ((void *) arg, exception, sizeof (SYS_EXCEPTION))) -+ res = EFAULT; -+ -+ KMEM_FREE (exception, sizeof (SYS_EXCEPTION)); -+ break; -+ } -+ -+ case ELAN3IO_UNLOAD: -+ { -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3IO_UNLOAD_STRUCT args; -+ int span; -+ unsigned long flags; -+ E3_Addr eaddr; -+ caddr_t addr; -+ size_t len; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_UNLOAD_STRUCT))) -+ return (-EFAULT); -+ -+ addr = (caddr_t) args.addr; -+ len = args.len; -+ -+ if (((unsigned long) addr & PAGEMASK) || (len & PAGEMASK) || (len < 0)) -+ return -EINVAL; -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_lock, flags); -+ for (; len; len -= span, addr += span) -+ { -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < addr) -+ span = len; -+ else if (rgn->rgn_mbase > addr) -+ span = MIN(len, rgn->rgn_mbase - addr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - addr); -+ eaddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ elan3mmu_unload (elan3mmu, eaddr, span, PTE_UNLOAD); -+ } -+ } -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_lock, flags); -+ -+ return 0; -+ } -+ -+ case ELAN3IO_GET_DEVINFO: -+ { -+ ELAN3IO_GET_DEVINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_DEVINFO_STRUCT))) -+ return (-EFAULT); -+ -+ if (copy_to_user ((void *) args.devinfo, &ctxt->Device->Devinfo, sizeof (ELAN_DEVINFO))) -+ res = EFAULT; -+ break; -+ } -+ -+ case ELAN3IO_GET_POSITION: -+ { -+ ELAN3IO_GET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ if (copy_to_user ((void *) args.position, &ctxt->Device->Position, sizeof (ELAN_POSITION))) -+ res = EFAULT; -+ break; -+ } -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ return (res ? set_errno (res) : 0); -+} -+ -+static void user_vma_open(struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ if (vma->vm_pgoff == ELAN3IO_OFF_COMMAND_PAGE) -+ if (atomic_dec_and_test (&pr->pr_mappings)) -+ pr->pr_ctxt->CommandPageMapping = NULL; -+} -+ -+static void user_vma_close(struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ if (vma->vm_pgoff == ELAN3IO_OFF_COMMAND_PAGE) -+ atomic_inc (&pr->pr_mappings); -+} -+ -+static struct vm_operations_struct user_vm_ops = { -+ open: user_vma_open, -+ close: user_vma_close, -+}; -+ -+static int -+user_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ ioaddr_t ioaddr; -+ -+ /* -+ * NOTE - since we need to maintain the reference count on -+ * the user_private we only permit single page -+ * mmaps - this means that we will certainly see -+ * the correct number of closes to maintain the -+ * the reference count correctly. -+ */ -+ -+ if ((vma->vm_end - vma->vm_start) != PAGE_SIZE) -+ return (-EINVAL); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: vm_mm=%p start=%lx end=%lx pgoff=%lx flags=%lx prot=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_flags, vma->vm_page_prot.pgprot, vma->vm_file); -+ -+ switch (vma->vm_pgoff) -+ { -+ default: -+ return (-EINVAL); -+ -+ case ELAN3IO_OFF_COMMAND_PAGE: -+ spin_lock (&pr->pr_lock); -+ if (ctxt->CommandPage == (ioaddr_t) 0 || atomic_read (&pr->pr_mappings) != 0) -+ { -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: command port - %s\n", ctxt->CommandPort ? "already mapped" : "not attached"); -+ spin_unlock (&pr->pr_lock); -+ return (-EINVAL); -+ } -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: commandport at %lx phys %llx prot %lx\n", -+ vma->vm_start, (unsigned long long) kmem_to_phys ((void *) ctxt->CommandPort), vma->vm_page_prot.pgprot); -+ -+ /* Don't try to swap out physical pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump addresses that are not real memory to a core file. -+ */ -+ vma->vm_flags |= VM_IO; -+ -+#ifdef NO_RMAP -+ if (remap_page_range (vma->vm_start, kmem_to_phys ((void *) ctxt->CommandPage), vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#else -+ if (remap_page_range (vma, vma->vm_start, kmem_to_phys ((void *) ctxt->CommandPage), vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#endif -+ { -+ spin_unlock (&pr->pr_lock); -+ return (-EAGAIN); -+ } -+ ctxt->CommandPageMapping = (void *) vma->vm_start; -+ -+ atomic_inc (&pr->pr_mappings); -+ -+ spin_unlock (&pr->pr_lock); -+ break; -+ -+ case ELAN3IO_OFF_UREG_PAGE: -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ ioaddr = ctxt->Device->RegPtr + (offsetof (E3_Regs, URegs) & PAGEMASK); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: user_regs at %lx phys %llx prot %lx\n", vma->vm_start, -+ (unsigned long long) kmem_to_phys ((void *) ioaddr), vma->vm_page_prot.pgprot); -+ -+ /* Don't try to swap out physical pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump addresses that are not real memory to a core file. -+ */ -+ vma->vm_flags |= VM_IO; -+ -+#ifdef NO_RMAP -+ if (remap_page_range (vma->vm_start, kmem_to_phys ((void *) ioaddr), -+#else -+ if (remap_page_range (vma, vma->vm_start, kmem_to_phys ((void *) ioaddr), -+#endif -+ vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ return (-EAGAIN); -+ break; -+ -+ case ELAN3IO_OFF_FLAG_PAGE: -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: flag page at %lx phys %llx\n", vma->vm_start, -+ (unsigned long long) kmem_to_phys ((void *) ctxt->FlagPage)); -+ -+ /* we do not want to have this area swapped out, lock it */ -+ vma->vm_flags |= VM_LOCKED; -+ -+ /* Mark the page as reserved or else the remap_page_range() doesn't remap it */ -+ SetPageReserved(pte_page(*find_pte_kernel((unsigned long) ctxt->FlagPage))); -+ -+#ifdef NO_RMAP -+ if (remap_page_range (vma->vm_start, kmem_to_phys ((void *) ctxt->FlagPage), -+#else -+ if (remap_page_range (vma, vma->vm_start, kmem_to_phys ((void *) ctxt->FlagPage), -+#endif -+ vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ return (-EAGAIN); -+ break; -+ } -+ -+ ASSERT (vma->vm_ops == NULL); -+ -+ vma->vm_ops = &user_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+} -+ -+/* driver entry points */ -+static int -+elan3_open (struct inode *inode, struct file *file) -+{ -+ if (elan3_devices[ELAN3_DEVICE(inode)] == NULL) -+ return (-ENXIO); -+ -+ PRINTF (DBG_DEVICE, DBG_FN, "elan3_open: device %d minor %d file=%p\n", ELAN3_DEVICE(inode), ELAN3_MINOR(inode), file); -+ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_open (inode, file)); -+ case ELAN3_MINOR_MEM: -+ return (mem_open (inode, file)); -+ case ELAN3_MINOR_USER: -+ return (user_open (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan3_release (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_DEVICE, DBG_FN, "elan3_release: device %d minor %d file=%p\n", ELAN3_DEVICE(inode), ELAN3_MINOR(inode), file); -+ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_release (inode, file)); -+ case ELAN3_MINOR_MEM: -+ return (mem_release (inode, file)); -+ case ELAN3_MINOR_USER: -+ return (user_release (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan3_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_ioctl (inode, file, cmd, arg)); -+ case ELAN3_MINOR_MEM: -+ return (mem_ioctl (inode, file, cmd, arg)); -+ case ELAN3_MINOR_USER: -+ return (user_ioctl (inode, file, cmd, arg)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+ -+static int -+elan3_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ PRINTF (DBG_DEVICE, DBG_SEG, "elan3_mmap: instance %d minor %d start=%lx end=%lx pgoff=%lx flags=%lx prot=%lx\n", -+ ELAN3_DEVICE (file->f_dentry->d_inode), ELAN3_MINOR (file->f_dentry->d_inode), -+ vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_flags, vma->vm_page_prot.pgprot); -+ -+ switch (ELAN3_MINOR (file->f_dentry->d_inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_mmap (file, vma)); -+ case ELAN3_MINOR_MEM: -+ return (mem_mmap (file, vma)); -+ case ELAN3_MINOR_USER: -+ return (user_mmap (file, vma)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static irqreturn_t -+InterruptHandlerWrapper(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ if (InterruptHandler ((ELAN3_DEV *)dev_id) == 0) -+ return IRQ_HANDLED; -+ else -+ return IRQ_NONE; -+} -+ -+ -+/* -+ * Elan specific PCI configuration registers. -+ */ -+ -+#define PCI_CONF_PARITY_PHYS_LO 0x40 -+#define PCI_CONF_PARITY_PHYS_HI 0x44 -+#define PCI_CONF_PARITY_PHASE_ADDR 0x46 -+#define PCI_CONF_PARITY_MASTER_TYPE 0x47 -+#define PCI_CONF_ELAN3_CTRL 0x48 -+ -+#define ECTRL_EXTEND_LATENCY (1 << 0) -+#define ECTRL_ENABLE_PREFETCH (1 << 1) -+#define ECTRL_SOFTWARE_INTERNAL_RESET (1 << 2) -+#define ECTRL_REDUCED_RETRY_RATE (1 << 3) -+#define ECTRL_CLOCK_DIVIDE_RATE_SHIFT 4 -+#define ECTRL_COMMS_DIVIDE_RATE_SHIFT 10 -+#define ECTRL_FORCE_COMMSCLK_LOCAL (1 << 14) -+ -+/* -+ * Configure PCI. -+ */ -+static int -+ConfigurePci(ELAN3_DEV *dev) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ u32 rom_address; -+ -+ if (pci_enable_device(pci)) -+ return (ENXIO); -+ -+ /* disable ROM */ -+ pci_read_config_dword(pci, PCI_ROM_ADDRESS, &rom_address); -+ rom_address &= ~PCI_ROM_ADDRESS_ENABLE; -+ pci_write_config_dword(pci, PCI_ROM_ADDRESS, rom_address); -+ mb(); -+ -+ /* this is in 32-bit WORDS */ -+ pci_write_config_byte(pci, PCI_CACHE_LINE_SIZE, (64 >> 2)); -+ mb(); -+ -+ /* allow 40 ticks to respond, 16 data phases */ -+ pci_write_config_byte(pci, PCI_LATENCY_TIMER, 255); -+ mb(); -+ -+ /* don't enable PCI_COMMAND_SERR--see note in elandev_dunix.c */ -+ pci_write_config_word(pci, PCI_COMMAND, PCI_COMMAND_MEMORY -+ | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY); -+ mb(); -+ -+ return ESUCCESS; -+} -+ -+/* -+ * Reset chip to a known state. -+ */ -+static int -+ResetElan(ELAN3_DEV *dev, ioaddr_t intPalAddr) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ int instance = dev->Instance; -+ u32 val; -+ u8 revid; -+ int CasLatency; -+ int res; -+ -+ /* determine rev of board */ -+ pci_read_config_byte(pci, PCI_REVISION_ID, &revid); -+ -+ /* GNAT 2328 - don't set ECTRL_ENABLE_PREFETCH on Elan rev A */ -+ val = ECTRL_EXTEND_LATENCY | (39 << ECTRL_CLOCK_DIVIDE_RATE_SHIFT) -+ | (6 << ECTRL_COMMS_DIVIDE_RATE_SHIFT); -+ switch (revid) -+ { -+ case PCI_REVISION_ID_ELAN3_REVA: -+ printk("elan%d: is an elan3 (revision a) - not supported\n", instance); -+ return (EFAIL); -+ -+ case PCI_REVISION_ID_ELAN3_REVB: -+ val |= ECTRL_ENABLE_PREFETCH; -+ if (BackToBackMaster) -+ val |= ECTRL_FORCE_COMMSCLK_LOCAL; -+ printk("elan%d: is an elan3 (revision b)\n", instance); -+ break; -+ default: -+ printk("elan%d: unsupported elan3 revision %d\n", -+ instance, revid); -+ return EFAIL; -+ } -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, val); -+ mb(); -+ -+ /* -+ * GNAT: 2474 -+ * Hit reset on the Elan, then we MUST initialise the schedule status -+ * register to drive reset on the link before the link can come out -+ * of reset (15 uS). We need to keep it like this until we've -+ * initialised SDRAM -+ */ -+ pci_read_config_dword(pci, PCI_CONF_ELAN3_CTRL, &val); -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, -+ val | ECTRL_SOFTWARE_INTERNAL_RESET); -+ mb(); -+ -+ /* Read the Vital Product Data to determine the cas latency */ -+ if ((res = ReadVitalProductData (dev, &CasLatency)) != ESUCCESS) -+ return (res); -+ -+ /* -+ * Now clear the Software internal reset bit, and start the sdram -+ */ -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, val); -+ mb(); -+ -+ /* -+ * Enable SDRAM before sizing and initalising it for ECC. -+ * NOTE: don't enable all sets of the cache (yet), nor ECC -+ */ -+ dev->Cache_Control_Reg = (CasLatency | REFRESH_RATE_16US); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, (dev->Cache_Control_Reg | SETUP_SDRAM)); -+ mb(); -+ -+ INIT_SCHED_STATUS(dev, Sched_Initial_Value); -+ -+ /* -+ * Set the interrupt mask to 0 and enable the interrupt PAL -+ * by writing any value to it. -+ */ -+ SET_INT_MASK (dev, 0); -+ writeb (0, intPalAddr); -+ -+ return ESUCCESS; -+} -+ -+/* -+ * Determine the size of elan PCI address spaces. EFAIL is returned if -+ * unused or invalid BAR is specified, or if board reports I/O mapped space. -+ */ -+int -+DeviceRegisterSize(ELAN3_DEV *dev, int rnumber, int *sizep) -+{ -+ struct pci_dev *pdev = dev->Osdep.pci; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ *sizep = pci_resource_size(pdev, rnumber); -+#else -+ *sizep = pci_resource_end(pdev, rnumber) - pci_resource_start(pdev, rnumber) + 1; -+#endif -+ return ESUCCESS; -+} -+ -+/* -+ * Map PCI memory into kernel virtual address space. On the alpha, -+ * we just return appropriate kseg address, and Unmap is a no-op. -+ */ -+int -+MapDeviceRegister(ELAN3_DEV *dev, int rnumber, ioaddr_t *addrp, -+ int off, int len, DeviceMappingHandle *handlep) -+{ -+ struct pci_dev *pdev = dev->Osdep.pci; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ u64 base = pci_get_base_address(pdev, rnumber); -+ *addrp = (ioaddr_t) pci_base_to_kseg(base + off, pdev->bus->number); -+ -+#else -+ if (len == 0) -+ len = pci_resource_end(pdev, rnumber) - pci_resource_start(pdev, rnumber) + 1; -+ -+ if (len == 0) -+ return (EINVAL); -+ -+ *addrp = (ioaddr_t) ioremap_nocache (pci_resource_start(pdev, rnumber) + off, len); -+#endif -+ -+ *handlep = (void *) *addrp; -+ -+ return (*addrp ? ESUCCESS : ENOMEM); -+} -+void -+UnmapDeviceRegister(ELAN3_DEV *dev, DeviceMappingHandle *handlep) -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+ iounmap (*handlep); -+#endif -+} -+ -+void -+ElanBusError (ELAN3_DEV *dev) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ u8 phaseaddr, type; -+ u16 status, cmd, physhi; -+ u32 physlo; -+ -+ printk("elan%d: bus error occured\n", dev->Instance); -+ -+ pci_read_config_word (pci, PCI_STATUS, &status); -+ pci_read_config_word (pci, PCI_COMMAND, &cmd); -+ pci_read_config_dword(pci, PCI_CONF_PARITY_PHYS_LO, &physlo); -+ pci_read_config_word (pci, PCI_CONF_PARITY_PHYS_HI, &physhi); -+ pci_read_config_byte (pci, PCI_CONF_PARITY_PHASE_ADDR, &phaseaddr); -+ pci_read_config_byte (pci, PCI_CONF_PARITY_MASTER_TYPE, &type); -+ -+#define PCI_CONF_STAT_FORMAT "\20" \ -+ "\6SIXTY_SIX_MHZ\7UDF\10FAST_BACK\11PARITY" \ -+ "\14SIG_TARGET_ABORT\15REC_TARGET_ABORT\16REC_MASTER_ABORT" \ -+ "\17SIG_SYSTEM_ERROR\20DETECTED_PARITY" -+ -+ printk ("elan%d: status %x cmd %4x physaddr %04x%08x phase %x type %x\n", -+ dev->Instance, status, cmd, physhi, physlo, phaseaddr, type); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/elansyscall.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/elansyscall.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/elansyscall.c 2005-06-01 23:12:54.584441384 -0400 -@@ -0,0 +1,1230 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elansyscall.c,v 1.99.2.1 2004/10/28 17:08:56 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elansyscall.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static int sys_exception (ELAN3_CTXT *ctxt, int type, int proc, void *trap, va_list ap); -+static int sys_getWordItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep); -+static int sys_getBlockItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep); -+static void sys_putWordItem (ELAN3_CTXT *ctxt, int list, E3_uint32 value); -+static void sys_putBlockItem (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr); -+static void sys_putbackItem (ELAN3_CTXT *ctxt, int list, void *item); -+static void sys_freeWordItem (ELAN3_CTXT *ctxt, void *item); -+static void sys_freeBlockItem (ELAN3_CTXT *ctxt, void *item); -+static int sys_countItems (ELAN3_CTXT *ctxt, int list); -+static int sys_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+static void sys_swapin (ELAN3_CTXT *ctxt); -+static void sys_swapout (ELAN3_CTXT *ctxt); -+static void sys_freePrivate (ELAN3_CTXT *ctxt); -+static int sys_fixupNetworkError (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef); -+static int sys_startFaultCheck (ELAN3_CTXT *ctxt); -+static void sys_endFaultCheck (ELAN3_CTXT *ctxt); -+static E3_uint8 sys_load8 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+static E3_uint16 sys_load16 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+static E3_uint32 sys_load32 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+static E3_uint64 sys_load64 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+static ELAN3_OPS elan3_sys_ops = { -+ ELAN3_OPS_VERSION, /* Version */ -+ -+ sys_exception, /* Exception */ -+ sys_getWordItem, /* GetWordItem */ -+ sys_getBlockItem, /* GetBlockItem */ -+ sys_putWordItem, /* PutWordItem */ -+ sys_putBlockItem, /* PutBlockItem */ -+ sys_putbackItem, /* PutbackItem */ -+ sys_freeWordItem, /* FreeWordItem */ -+ sys_freeBlockItem, /* FreeBlockItem */ -+ sys_countItems, /* CountItems */ -+ sys_event, /* Event */ -+ sys_swapin, /* Swapin */ -+ sys_swapout, /* Swapout */ -+ sys_freePrivate, /* FreePrivate */ -+ sys_fixupNetworkError, /* FixupNetworkError */ -+ NULL, /* DProcTrap */ -+ NULL, /* TProcTrap */ -+ NULL, /* IProcTrap */ -+ NULL, /* CProcTrap */ -+ NULL, /* CProcReissue */ -+ sys_startFaultCheck, /* StartFaultCheck */ -+ sys_endFaultCheck, /* EndFaultCheck */ -+ sys_load8, /* Load8 */ -+ sys_store8, /* Store8 */ -+ sys_load16, /* Load16 */ -+ sys_store16, /* Store16 */ -+ sys_load32, /* Load32 */ -+ sys_store32, /* Store32 */ -+ sys_load64, /* Load64 */ -+ sys_store64 /* Store64 */ -+}; -+ -+va_list null_valist; -+ -+SYS_CTXT * -+sys_init (ELAN3_CTXT *ctxt) -+{ -+ SYS_CTXT *sctx; -+ -+ /* Allocate and initialise the context private data */ -+ KMEM_ZALLOC (sctx, SYS_CTXT *, sizeof (SYS_CTXT), TRUE); -+ -+ if (sctx == NULL) -+ return ((SYS_CTXT *) NULL); -+ -+ sctx->Swap = NULL; -+ sctx->Armed = 0; -+ sctx->Backoff = 1; -+ sctx->Table = cookie_alloc_table ((unsigned long) ELAN3_MY_TASK_HANDLE(), 0); -+ sctx->signal = SIGSEGV; -+ -+ if (sctx->Table == NULL) -+ { -+ KMEM_FREE (sctx, sizeof (SYS_CTXT)); -+ return ((SYS_CTXT *) NULL); -+ } -+ -+ kmutex_init (&sctx->Lock); -+ spin_lock_init (&sctx->WaitLock); -+ kcondvar_init (&sctx->NetworkErrorWait); -+ -+ /* Install my context operations and private data */ -+ ctxt->Operations = &elan3_sys_ops; -+ ctxt->Private = (void *) sctx; -+ -+ return (sctx); -+} -+ -+/* returns -ve on error or ELAN_CAP_OK or ELAN_CAP_RMS */ -+/* use = ELAN_USER_ATTACH, ELAN_USER_P2P, ELAN_USER_BROADCAST */ -+int -+elan3_validate_cap(ELAN3_DEV *dev, ELAN_CAPABILITY *cap ,int use) -+{ -+ /* Don't allow a user process to attach to system context */ -+ if (ELAN3_SYSTEM_CONTEXT (cap->cap_lowcontext) || ELAN3_SYSTEM_CONTEXT (cap->cap_highcontext) -+ || cap->cap_highcontext <= ELAN_USER_BASE_CONTEXT_NUM || cap->cap_highcontext <= ELAN_USER_BASE_CONTEXT_NUM) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: lctx %x hctx %x \n",cap->cap_lowcontext, cap->cap_highcontext); -+ PRINTF3 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: bit %x low %x high %x\n", ((cap->cap_lowcontext) & SYS_CONTEXT_BIT), -+ E3_NUM_CONTEXT_0, ELAN3_KCOMM_BASE_CONTEXT_NUM); -+ -+ -+ PRINTF0 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: user process cant attach to system cap\n"); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_type & ELAN_CAP_TYPE_HWTEST) -+ { -+ if (!(cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP)) /* cant have a bit map */ -+ { -+ PRINTF0 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST must have ELAN_CAP_TYPE_NO_BITMAP\n"); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext != cap->cap_highcontext) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST (cap->cap_lowcontext != cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext) ; -+ return (-EINVAL); -+ } -+ -+ if ( ! (ELAN3_HWTEST_CONTEXT(cap->cap_lowcontext) && ELAN3_HWTEST_CONTEXT(cap->cap_highcontext))) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST HWTEST_BASE_CONTEXT %d %d %d \n" , ELAN3_HWTEST_BASE_CONTEXT_NUM,cap->cap_lowcontext ,ELAN3_HWTEST_TOP_CONTEXT_NUM); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lownode != ELAN_CAP_UNINITIALISED || cap->cap_highnode != ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST nodes != ELAN_CAP_UNINITIALISED\n"); -+ return (-EINVAL); -+ } -+ -+ return ELAN_CAP_OK; -+ } -+ -+ return elanmod_classify_cap(&dev->Position, cap, use); -+} -+ -+int -+sys_waitevent (ELAN3_CTXT *ctxt, E3_Event *event) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ EVENT_COOKIE cookie; -+ -+ if (ctxt->Device->Devinfo.dev_revision_id == PCI_REVISION_ID_ELAN3_REVA) -+ return (EINVAL); -+ -+ cookie = fuword ((int *) &event->ev_Type) & ~(EV_TYPE_MASK_EVIRQ | EV_TYPE_MASK_BCOPY); -+ -+ if (cookie_alloc_cookie (sctx->Table, cookie) != ESUCCESS) -+ return (EINVAL); -+ -+ cookie_arm_cookie (sctx->Table, cookie); -+ -+ if (fuword ((int *) &event->ev_Count) > 0) -+ cookie_wait_cookie (sctx->Table, cookie); -+ -+ cookie_free_cookie (sctx->Table, cookie); -+ -+ return (ESUCCESS); -+} -+ -+static void * -+sys_getItem (SYS_SWAP_SPACE *sp, int list) -+{ -+ void *itemp = (void *) fuptr_noerr ((void **) &sp->ItemListsHead[list]); -+ void *next; -+ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_getItem: sp=%p list=%d head=%p itemp=%p\n", -+ sp, list, &sp->ItemListsHead[list], itemp); -+ -+ if (itemp == NULL) -+ return (NULL); -+ -+ next = (void *) fuptr_noerr ((void *) itemp); -+ -+ suptr_noerr ((void *) &sp->ItemListsHead[list], (void *) next); -+ if (next == NULL) -+ suptr_noerr ((void *) &sp->ItemListsTailp[list], (void *)&sp->ItemListsHead[list]); -+ return (itemp); -+} -+ -+static void -+sys_putItemBack (SYS_SWAP_SPACE *sp, int list, void *itemp) -+{ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_putItemBack: sp=%p list=%d itemp=%p value=%08x\n", -+ sp, list, itemp, fuword_noerr ((int *) &((SYS_WORD_ITEM *) itemp)->Value)); -+ -+ suptr_noerr ((void **) itemp, NULL); /* item->Next = NULL */ -+ suptr_noerr ((void **) fuptr_noerr ((void **) &sp->ItemListsTailp[list]), (void *)itemp); /* *Tailp = item */ -+ suptr_noerr ((void **) &sp->ItemListsTailp[list], (void *) itemp); /* Tailp = &item->Next */ -+} -+ -+static void -+sys_putItemFront (SYS_SWAP_SPACE *sp, int list, void *itemp) -+{ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_putItemFront: sp=%p list=%d itemp=%p value=%08x\n", -+ sp, list, itemp, fuword_noerr ((int *) &((SYS_WORD_ITEM *) itemp)->Value)); -+ -+ suptr_noerr ((void **) itemp, fuptr_noerr ((void **) &sp->ItemListsHead[list])); /* item->Next = Head */ -+ suptr_noerr ((void **) &sp->ItemListsHead[list], (void *) itemp); /* Head = item */ -+ -+ if (fuptr_noerr ((void **) &sp->ItemListsTailp[list]) == (void *) &sp->ItemListsHead[list]) /* if (Tailp == &Head) */ -+ suptr_noerr ((void **) &sp->ItemListsTailp[list], (void *) itemp); /* Tailp = &Item->Next */ -+} -+ -+ -+static int -+sys_getWordItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_WORD_ITEM *item; -+ int res; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ item = (SYS_WORD_ITEM *) sys_getItem (sp, list); -+ -+ if (item == NULL) -+ res = 0; -+ else -+ { -+ if (list == LIST_DMA_PTR) -+ sctx->Armed = TRUE; -+ -+ *itemp = (void *) item; -+ *valuep = (E3_Addr) fuword_noerr ((E3_int32 *) &item->Value); -+ -+ PRINTF3 (ctxt, DBG_SYSCALL, "sys_getWordItem: list=%d -> item=%p value=%08x\n", list, *itemp, *valuep); -+ -+ res = 1; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+static int -+sys_getBlockItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item; -+ int res; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ item = sys_getItem (sp, list); -+ -+ if (item == NULL) -+ res = 0; -+ else -+ { -+ E3_uint32 *dest = fuptr_noerr ((void **) &item->Pointer); -+ -+ if (list == LIST_DMA_DESC) -+ sctx->Armed = TRUE; -+ -+ *itemp = (void *) item; -+ *valuep = elan3mmu_elanaddr (ctxt->Elan3mmu, (caddr_t) dest); -+ -+ PRINTF3 (ctxt, DBG_SYSCALL, "sys_getBlockItem: list=%d -> item=%p addr=%08x\n", list, *itemp, *valuep); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[0]), fuword_noerr ((int *) &dest[1]), -+ fuword_noerr ((int *) &dest[2]), fuword_noerr ((int *) &dest[3])); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[4]), fuword_noerr ((int *) &dest[5]), -+ fuword_noerr ((int *) &dest[6]), fuword_noerr ((int *) &dest[7])); -+ -+ -+ res = 1; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+static void -+sys_putWordItem (ELAN3_CTXT *ctxt, int list, E3_Addr value) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_WORD_ITEM *item; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ PRINTF2 (ctxt,DBG_SYSCALL, "sys_putWordItem: list=%x value=%x\n", list, value); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ item = sys_getItem (sp, LIST_FREE_WORD); -+ -+ PRINTF1 (ctxt, DBG_SYSCALL, "sys_putWordItem: item=%p\n", item); -+ -+ if (item == NULL) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAILED, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putWordItem: storing value=%08x at %p\n", value, &item->Value); -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putWordItem: item=%p value=%08x\n", item, value); -+ -+ suword_noerr ((E3_int32 *) &item->Value, value); /* write "value" into item */ -+ -+ sys_putItemBack (sp, list, item); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_putBlockItem (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item; -+ label_t ljp; -+ E3_uint32 *source; -+ E3_uint32 *dest; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putBlockItem: list=%x ptr=%p\n", list, ptr); -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ item = sys_getItem (sp, LIST_FREE_BLOCK); /* get an item from the freelist. */ -+ -+ if (item == NULL) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAILED, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ /* -+ * The block will have been read using 64 bit reads, since we have -+ * to write it to user memory using 32 bit writes, we need to perform -+ * an endian swap on the Ultrasparc. -+ */ -+ dest = (E3_uint32 *) fuptr_noerr ((void **) &item->Pointer); -+ source = (E3_uint32 *) ptr; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putBlockItem: item=%p dest=%p\n",item, dest); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ source[0^WordEndianFlip], source[1^WordEndianFlip], source[2^WordEndianFlip], source[3^WordEndianFlip]); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ source[4^WordEndianFlip], source[5^WordEndianFlip], source[6^WordEndianFlip], source[7^WordEndianFlip]); -+ -+ suword_noerr ((E3_int32 *) &dest[7], (E3_int32) source[7^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[6], (E3_int32) source[6^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[5], (E3_int32) source[5^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[4], (E3_int32) source[4^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[3], (E3_int32) source[3^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[2], (E3_int32) source[2^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[1], (E3_int32) source[1^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[0], (E3_int32) source[0^WordEndianFlip]); -+ -+ sys_putItemBack (sp, list, item); /* chain onto list of items. */ -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_freeWordItem (ELAN3_CTXT *ctxt, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, LIST_FREE_WORD, (void *) NULL, null_valist); -+ return; -+ } -+ -+ sys_putItemBack (sp, LIST_FREE_WORD, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_freeBlockItem (ELAN3_CTXT *ctxt, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item = (SYS_BLOCK_ITEM *)itemp; -+ E3_uint32 *dest; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, LIST_FREE_BLOCK, (void *) NULL, null_valist); -+ return; -+ } -+#ifdef DEBUG_PRINTF -+ dest = (E3_uint32 *) fuptr_noerr ((void **) &item->Pointer); -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_freeBlockItem: item=%p dest=%p\n", item, dest); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[0]), fuword_noerr ((int *) &dest[1]), -+ fuword_noerr ((int *) &dest[2]), fuword_noerr ((int *) &dest[3])); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[4]), fuword_noerr ((int *) &dest[5]), -+ fuword_noerr ((int *) &dest[6]), fuword_noerr ((int *) &dest[7])); -+#endif -+ -+ sys_putItemBack (sp, LIST_FREE_BLOCK, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_putbackItem (ELAN3_CTXT *ctxt, int list, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ sys_putItemFront (sp, list, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static int -+sys_countItems (ELAN3_CTXT *ctxt, int list) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ int count = 0; -+ void *item; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ for (item = (void *) fuptr_noerr ((void **) &sp->ItemListsHead[list]); -+ item != NULL; -+ item = (void *) fuptr_noerr ((void **) item)) -+ { -+ count++; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (count); -+} -+ -+ -+long sys_longTime; -+long sys_shortTime; -+int sys_waitTicks; -+int sys_maxBackoff; -+ -+#define SYS_LONG_TIME MAX((hz * 5) / 1000, 1) /* 5 ms */ -+#define SYS_SHORT_TIME MAX((hz * 2) / 1000, 1) /* 2 ms */ -+#define SYS_WAIT_TICKS MAX((hz * 1) / 1000, 1) /* 1 ms - backoff granularity */ -+#define SYS_MAX_BACKOFF MAX((hz * 5) / 1000, 1) /* 5 ms - max backoff for "nacked" packets*/ -+#define SYS_TIMEOUT_BACKOFF MAX((hz * 10) / 1000, 1) /* 10 ms - backoff for output timeout (point to point) */ -+#define SYS_BCAST_BACKOFF MAX((hz * 50) / 1000, 1) /* 50 ms - backoff for output timeout (broadcast) */ -+#define SYS_NETERR_BACKOFF MAX((hz * 10) / 1000, 1) /* 10 ms - delay for network error in dma data */ -+ -+static void -+sys_backoffWait (ELAN3_CTXT *ctxt, int ticks) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ long t; -+ -+ spin_lock (&sctx->WaitLock); -+ -+ t = lbolt - sctx->Time; -+ -+ if (sys_longTime == 0) sys_longTime = SYS_LONG_TIME; -+ if (sys_shortTime == 0) sys_shortTime = SYS_SHORT_TIME; -+ if (sys_waitTicks == 0) sys_waitTicks = SYS_WAIT_TICKS; -+ if (sys_maxBackoff == 0) sys_maxBackoff = SYS_MAX_BACKOFF; -+ -+ if (t > sys_longTime) /* It's a long time since the last trap */ -+ sctx->Backoff = 0; /* so set the backoff back down to 0 */ -+ -+ if (ticks) -+ { -+ PRINTF2 (ctxt, DBG_DPROC, "sys_backoffWait : Waiting - %d ticks [%lx]\n", ticks, t); -+ kcondvar_timedwait (&sctx->NetworkErrorWait, &sctx->WaitLock, NULL, lbolt + ticks); -+ } -+ else if (sctx->Armed) -+ { -+ if (t < sys_shortTime) /* It's been a short time since the last */ -+ { /* trap, so increase the backoff */ -+ sctx->Backoff++; -+ -+ if (sctx->Backoff > sys_maxBackoff) -+ sctx->Backoff = sys_maxBackoff; -+ } -+ -+ PRINTF2 (ctxt, DBG_DPROC, "sys_backoffWait : Waiting - %d [%lx]\n", sctx->Backoff, t); -+ -+ if (sctx->Backoff) -+ kcondvar_timedwaitsig (&sctx->NetworkErrorWait, &sctx->WaitLock, NULL, lbolt + sctx->Backoff * sys_waitTicks); -+ -+ sctx->Armed = 0; -+ } -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "sys_backoffWait : Not Waiting - %d\n", sctx->Backoff); -+ -+ } -+ sctx->Time = lbolt; -+ -+ spin_unlock (&sctx->WaitLock); -+} -+ -+static int -+trapSize (int proc) -+{ -+ switch (proc) -+ { -+ case DMA_PROC: return (sizeof (DMA_TRAP)); -+ case THREAD_PROC: return (sizeof (THREAD_TRAP)); -+ case COMMAND_PROC: return (sizeof (COMMAND_TRAP)); -+ case INPUT_PROC: return (sizeof (INPUT_TRAP)); -+ default: return (0); -+ } -+} -+ -+static int -+sys_exception (ELAN3_CTXT *ctxt, int type, int proc, void *trapp, va_list ap) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ int res; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_exception: type %d proc %d\n", type, proc); -+ -+ switch (type) -+ { -+ case EXCEPTION_INVALID_ADDR: -+ { -+ E3_FaultSave_BE *faultSave = va_arg (ap, E3_FaultSave_BE *); -+ int res = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), faultSave, res, 0); -+ break; -+ } -+ -+ case EXCEPTION_UNIMP_INSTR: -+ { -+ E3_uint32 instr = va_arg (ap, E3_uint32); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, instr); -+ break; -+ } -+ -+ case EXCEPTION_INVALID_PROCESS: -+ { -+ E3_uint32 vproc = va_arg (ap, E3_uint32); -+ int res = va_arg (ap, int); -+ -+ switch (proc) -+ { -+ case DMA_PROC: -+ if (sctx->Flags & ELAN3_SYS_FLAG_DMA_BADVP) -+ { -+ DMA_TRAP *trap = (DMA_TRAP *) trapp; -+ -+ if (trap->Desc.s.dma_direction != DMA_WRITE) -+ trap->Desc.s.dma_srcEvent = trap->Desc.s.dma_destEvent; -+ -+ trap->Desc.s.dma_direction = DMA_WRITE; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = 0; -+ trap->Desc.s.dma_srcCookieVProc = 0; -+ -+ return (OP_IGNORE); -+ } -+ break; -+ -+ case THREAD_PROC: -+ if (sctx->Flags & ELAN3_SYS_FLAG_THREAD_BADVP) -+ { -+ THREAD_TRAP *trap = (THREAD_TRAP *) trapp; -+ -+ trap->TrapBits.s.PacketAckValue = E3_PAckError; -+ -+ return (OP_IGNORE); -+ } -+ break; -+ } -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, res, vproc); -+ break; -+ } -+ -+ case EXCEPTION_FAULTED: -+ { -+ E3_Addr addr = va_arg (ap, E3_Addr); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, addr); -+ break; -+ } -+ -+ case EXCEPTION_QUEUE_OVERFLOW: -+ { -+ E3_FaultSave_BE *faultSave = va_arg (ap, E3_FaultSave_BE *); -+ int trapType = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), faultSave, 0, trapType); -+ break; -+ } -+ -+ case EXCEPTION_COMMAND_OVERFLOW: -+ { -+ int count = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, count); -+ break; -+ } -+ -+ case EXCEPTION_CHAINED_EVENT: -+ { -+ E3_Addr addr = va_arg (ap, E3_Addr); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, addr); -+ break; -+ } -+ -+ case EXCEPTION_DMA_RETRY_FAIL: -+ case EXCEPTION_PACKET_TIMEOUT: -+ if (proc != DMA_PROC) -+ sys_backoffWait (ctxt, SYS_TIMEOUT_BACKOFF); -+ else -+ { -+ DMA_TRAP *trap = (DMA_TRAP *) trapp; -+ -+ if (sctx->Flags & ELAN3_SYS_FLAG_DMAFAIL) -+ { -+ E3_BlockCopyEvent *event; -+ -+ if (trap->Desc.s.dma_direction != DMA_WRITE) -+ trap->Desc.s.dma_srcEvent = trap->Desc.s.dma_destEvent; -+ -+ /* change the source word to be E3_EVENT_FAILED */ -+ if ((event = (E3_BlockCopyEvent *) elan3mmu_mainaddr (ctxt->Elan3mmu, trap->Desc.s.dma_srcEvent)) == NULL) -+ { -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, 0); -+ break; -+ } -+ -+ suword (&event->ev_Source, E3_EVENT_FAILED); -+ wmb(); mmiob(); -+ -+ trap->Desc.s.dma_direction = DMA_WRITE; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = 0; -+ trap->Desc.s.dma_srcCookieVProc = 0; -+ -+ return (OP_IGNORE); -+ } -+ -+ if (type == EXCEPTION_DMA_RETRY_FAIL) -+ sys_backoffWait (ctxt, 0); -+ else -+ { -+ ELAN_LOCATION location; -+ -+ krwlock_read (&ctxt->VpLock); -+ location = ProcessToLocation (ctxt, NULL, trap->Desc.s.dma_direction == DMA_WRITE ? -+ trap->Desc.s.dma_destVProc : trap->Desc.s.dma_srcVProc, NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ sys_backoffWait (ctxt, location.loc_node == ELAN3_INVALID_NODE ? SYS_BCAST_BACKOFF : SYS_TIMEOUT_BACKOFF); -+ } -+ } -+ return (OP_IGNORE); -+ -+ case EXCEPTION_NETWORK_ERROR: -+ { -+ INPUT_TRAP *trap = (INPUT_TRAP *) trapp; -+ NETERR_RESOLVER **rvpp = va_arg (ap, NETERR_RESOLVER **); -+ -+ ASSERT (trap->State == CTXT_STATE_NETWORK_ERROR); -+ -+ if (! (sctx->Flags & ELAN3_SYS_FLAG_NETERR) && (trap->DmaIdentifyTransaction || trap->ThreadIdentifyTransaction)) -+ { -+ if ((*rvpp) != (NETERR_RESOLVER *) NULL) -+ res = (*rvpp)->Status; -+ else if ((res = QueueNetworkErrorResolver (ctxt, trap, rvpp)) == ESUCCESS) -+ { -+ /* Successfully queued the network error resolver */ -+ return (OP_HANDLED); -+ } -+ -+ /* network error resolution has failed - either a bad cookie or */ -+ /* an rpc error has occured */ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, res, 0); -+ } -+ else -+ { -+ /* Must be an overlaped dma packet. Must wait long enough to -+ * ensure that the sending dma'er has tried to send the next -+ * packet and had it discarded. In the real world this should -+ * be greater than an output timeout. (About 8mSec) */ -+ -+ sys_backoffWait (ctxt, SYS_NETERR_BACKOFF); -+ -+ /* set this inputter state to be ok, since we've been called -+ * by the lwp it will lower the context filter for us, so -+ * re-enabling the inputter, note we don't need to execute -+ * any of the packet since the dma process will re-transmit -+ * it after receiving a nack for the next packet */ -+ trap->State = CTXT_STATE_OK; -+ -+ return (OP_HANDLED); -+ } -+ break; -+ } -+ -+ default: -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, 0); -+ break; -+ } -+ -+ if (type != EXCEPTION_DEBUG) -+#ifdef LINUX -+#ifdef NO_NPTL -+ psignal (CURPROC()->p_opptr, sctx->signal); -+#else -+ psignal (CURPROC()->parent, sctx->signal); -+#endif -+#else -+ psignal (CURPROC(), sctx->signal); -+#endif -+ return (OP_HANDLED); -+} -+ -+static int -+sys_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ -+ cookie_fire_cookie (sctx->Table, cookie); -+ -+ return (OP_HANDLED); -+} -+ -+static void -+sys_swapin (ELAN3_CTXT *ctxt) -+{ -+ PRINTF0 (ctxt, DBG_SYSCALL, "sys_swapin\n"); -+} -+ -+static void -+sys_swapout (ELAN3_CTXT *ctxt) -+{ -+ PRINTF0 (ctxt, DBG_SYSCALL, "sys_swapout\n"); -+} -+ -+static void -+sys_freePrivate (ELAN3_CTXT *ctxt) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ -+ cookie_free_table (sctx->Table); -+ -+ kmutex_destroy (&sctx->Lock); -+ spin_lock_destroy (&sctx->WaitLock); -+ kcondvar_destroy (&sctx->NetworkErrorWait); -+ -+ KMEM_FREE (sctx, sizeof (SYS_CTXT)); -+ ctxt->Private = NULL; -+} -+ -+static int -+sys_checkThisDma (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, E3_DMA *dma) -+{ -+ E3_DmaType type; -+ E3_uint32 cookie; -+ E3_uint32 cvproc; -+ int ignore; -+ int match; -+ -+ type.type = fuword_noerr ((int *) &dma->dma_type); -+ -+ if (type.s.direction == DMA_WRITE) -+ { -+ cookie = fuword_noerr ((int *) &dma->dma_srcCookieVProc); -+ cvproc = fuword_noerr ((int *) &dma->dma_destCookieVProc); -+ } -+ else -+ { -+ cookie = fuword_noerr ((int *) &dma->dma_destCookieVProc); -+ cvproc = fuword_noerr ((int *) &dma->dma_srcCookieVProc); -+ } -+ -+ PRINTF5 (ctxt, DBG_NETERR, "sys_checkThisDma: dir = %d cookie = %08x cvproc = %08x CookieVProc %08x DstProcess %04x\n", -+ type.s.direction, cookie, cvproc, nef->Message.CookieVProc, nef->Message.DstProcess); -+ -+ /* A DMA matches a network errror fixup if it's going to the right place (or is a broadcast) -+ * and the approriate cookie matches, except that we ignore DMA's which don't have a destEvent -+ * since they don't have any atomic behaviour (though they still send the identify) */ -+ -+ ignore = (type.s.direction == DMA_WRITE && cookie == 0 && -+ fuword_noerr ((int *) &dma->dma_destEvent) == 0); -+ match = (nef->Message.CookieVProc == cookie && -+ (nef->Message.DstProcess == (cvproc & DMA_PROCESS_MASK) || nef->Message.WaitForEop)); -+ -+ PRINTF2 (ctxt, DBG_NETERR, " -> %s %s\n", ignore ? "ignore" : match ? "matched" : "not-matched", nef->Message.WaitForEop ? "wait for eop" : ""); -+ -+ if (match && !ignore && !nef->Message.WaitForEop) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "sys_checkThisDma: nuking the dma\n"); -+ -+ /* NOTE - we access the dma descriptor backwards since it could exist in sdram */ -+ if (type.s.direction != DMA_WRITE) -+ suword_noerr ((int *) &dma->dma_srcEvent, 0); -+ -+ suword_noerr ((int *) &dma->dma_destEvent, 0); -+ suword_noerr ((int *) &dma->dma_dest, 0); -+ suword_noerr ((int *) &dma->dma_source, 0); -+ suword_noerr ((int *) &dma->dma_size, 0); -+ -+ if (type.s.direction != DMA_WRITE) -+ suword_noerr ((int *) &dma->dma_type, fuword_noerr ((int *) &dma->dma_type) & E3_DMA_CONTEXT_MASK); -+ -+ wmb(); mmiob(); -+ } -+ -+ return (match && !ignore); -+} -+ -+static int -+sys_fixupNetworkError (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ int matched = 0; -+ SYS_WORD_ITEM *wordp; -+ SYS_BLOCK_ITEM *blockp; -+ label_t ljb; -+ int res; -+ -+ PRINTF3 (ctxt, DBG_NETERR, "sys_fixupnetworkError %08x %08x %08x\n", -+ nef->Message.CookieAddr, nef->Message.CookieVProc, nef->Message.NextCookie); -+ -+ if (nef->Message.CookieAddr == (E3_Addr) 0) /* It's a DMA which requires fixing up */ -+ { -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ res = EFAULT; -+ else -+ { -+ /* scan the dma ptr list */ -+ for (wordp = (SYS_WORD_ITEM *) fuptr_noerr ((void **) &sp->ItemListsHead[LIST_DMA_PTR]); -+ wordp != NULL; -+ wordp = (SYS_WORD_ITEM *) fuptr_noerr ((void **) &wordp->Next)) -+ { -+ E3_uint32 value = fuword_noerr ((int *) &wordp->Value); -+ E3_DMA *dma = (E3_DMA *) elan3mmu_mainaddr (ctxt->Elan3mmu, value); -+ -+ PRINTF3 (ctxt, DBG_NETERR, "sys_fixupnetworkError: check block item %p Value %08x dma %p\n", wordp, value, dma); -+ -+ matched += sys_checkThisDma (ctxt, nef, dma); -+ } -+ -+ /* scan the dma desc list */ -+ for (blockp = (SYS_BLOCK_ITEM *) fuptr_noerr ((void **) &sp->ItemListsHead[LIST_DMA_DESC]); -+ blockp != NULL; -+ blockp = (SYS_BLOCK_ITEM *) fuptr_noerr ((void **) &blockp->Next)) -+ { -+ E3_DMA *dma = (E3_DMA *) fuptr_noerr ((void *) &blockp->Pointer); -+ -+ PRINTF2 (ctxt, DBG_NETERR, "sys_fixupnetworkError: check block item %p Pointer %p\n", blockp, dma); -+ -+ matched += sys_checkThisDma (ctxt, nef, dma); -+ } -+ -+ /* If we've still not found it, then check the command port item */ -+ /* it MUST be present as a command waiting to be executed, as */ -+ /* otherwise it could have already happened and we will claim to */ -+ /* have found it, but not realy */ -+ if (ctxt->CommandPortItem != NULL) -+ { -+ E3_DMA *dma = (E3_DMA *) fuptr_noerr ((void *) &((SYS_BLOCK_ITEM *) ctxt->CommandPortItem)->Pointer); -+ -+ if (sys_checkThisDma (ctxt, nef, dma)) -+ { -+ printk ("!!! it's the command port item - need to ensure that the command exists\n"); -+ matched++; -+ } -+ } -+ -+ res = matched ? ESUCCESS : ESRCH; -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ if (matched > 1) -+ ElanException (ctxt, EXCEPTION_COOKIE_ERROR, DMA_PROC, NULL, NULL, nef->Message.CookieVProc); -+ } -+ else /* It's a thread which requires fixing up */ -+ { -+ E3_int32 *cookiePtr = (E3_int32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, nef->Message.CookieAddr); -+ E3_uint32 curval = fuword_noerr (cookiePtr); -+ -+ if (curval == nef->Message.CookieVProc) /* thread doesn't think it's been done */ -+ { -+ if (! nef->Message.WaitForEop) -+ { -+ suword_noerr (cookiePtr, nef->Message.NextCookie); -+ mb(); mmiob(); -+ } -+ -+ res = ESUCCESS; -+ } -+ else /* thread thinks that it's been executed */ -+ { -+ res = ESRCH; -+ } -+ } -+ -+ CompleteNetworkErrorFixup (ctxt, nef, res); -+ -+ return (OP_HANDLED); -+} -+ -+ -+static int -+sys_startFaultCheck (ELAN3_CTXT *ctxt) -+{ -+ return (0); -+} -+ -+static void -+sys_endFaultCheck (ELAN3_CTXT *ctxt) -+{ -+ wmb(); -+} -+ -+static E3_uint8 -+sys_load8 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint8 *maddr = (E3_uint8 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fubyte_noerr (maddr)); -+} -+ -+static void -+sys_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val) -+{ -+ E3_uint8 *maddr = (E3_uint8 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ subyte_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint16 -+sys_load16 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint16 *maddr = (E3_uint16 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fusword_noerr (maddr)); -+} -+ -+static void -+sys_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val) -+{ -+ E3_uint16 *maddr = (E3_uint16 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ susword_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint32 -+sys_load32 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint32 *maddr = (E3_uint32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fuword_noerr (maddr)); -+} -+ -+static void -+sys_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val) -+{ -+ E3_uint32 *maddr = (E3_uint32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ suword_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint64 -+sys_load64 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint64 *maddr = (E3_uint64 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fulonglong_noerr ((long long *) maddr)); -+} -+ -+static void -+sys_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val) -+{ -+ E3_uint64 *maddr = (E3_uint64 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ sulonglong_noerr ((long long *) maddr, val); -+ wmb(); mmiob(); -+} -+ -+ -+void -+sys_addException (SYS_CTXT *sctx, int type, int proc, caddr_t trapp, int size, -+ E3_FaultSave_BE *faultSave, u_long res, u_long value) -+{ -+ SYS_EXCEPTION *ex_ptr; -+ int front; -+ int back; -+ int count; -+ label_t ljp; -+ -+ PRINTF4 (DBG_DEVICE, DBG_FN, "sys_addException: type %d proc %d res %ld value %ld\n", -+ type, proc, res, value); -+ -+ KMEM_ZALLOC (ex_ptr, SYS_EXCEPTION *, sizeof (SYS_EXCEPTION), TRUE); -+ -+ if (ex_ptr != NULL) -+ { -+ bzero ((caddr_t) ex_ptr, sizeof (SYS_EXCEPTION)); -+ -+ ex_ptr->Type = type; -+ ex_ptr->Proc = proc; -+ ex_ptr->Res = res; -+ ex_ptr->Value = value; -+ -+ if (trapp && size) -+ bcopy (trapp, (caddr_t) &ex_ptr->Union, size); -+ if (faultSave) -+ bcopy ((caddr_t) faultSave, (caddr_t) &ex_ptr->FaultArea, sizeof (E3_FaultSave_BE)); -+ } -+ -+ kmutex_lock (&sctx->Lock); -+ if (! on_fault (&ljp)) -+ { -+ front = fuword_noerr (&sctx->Exceptions->Front); -+ back = fuword_noerr (&sctx->Exceptions->Back); -+ count = fuword_noerr (&sctx->Exceptions->Count); -+ -+ if (count <= 0 || front < 0 || back < 0 || front >= count || back >= count) -+ suword_noerr (&sctx->Exceptions->Overflow, fuword_noerr (&sctx->Exceptions->Overflow) + 1); -+ else if (((front+1) % count ) == back) -+ suword_noerr (&sctx->Exceptions->Overflow, fuword_noerr (&sctx->Exceptions->Overflow) + 1); -+ else -+ { -+ if (ex_ptr != NULL) -+ copyout_noerr ((caddr_t) ex_ptr, (caddr_t) &sctx->Exceptions->Exceptions[front], sizeof (SYS_EXCEPTION)); -+ else -+ { -+ suword_noerr (&sctx->Exceptions->Exceptions[front].Type, EXCEPTION_ENOMEM); -+ suword_noerr (&sctx->Exceptions->Exceptions[front].Proc, 0); -+ } -+ suword_noerr (&sctx->Exceptions->Front, (front + 1) % count); -+ } -+ -+ /* always reset the magic number in case it's been overwritten */ -+ /* so that 'edb' can find the exception page in the core file */ -+ suword_noerr (&sctx->Exceptions->Magic, SYS_EXCEPTION_MAGIC); -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ if (ex_ptr != NULL) -+ KMEM_FREE (ex_ptr, sizeof (SYS_EXCEPTION)); -+} -+ -+int -+sys_getException (SYS_CTXT *sctx, SYS_EXCEPTION *ex) -+{ -+ int front; -+ int back; -+ int count; -+ int res; -+ label_t ljp; -+ -+ if (sctx->Exceptions == NULL) -+ return (EINVAL); -+ -+ kmutex_lock (&sctx->Lock); -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ return (EFAULT); -+ } -+ -+ front = fuword_noerr (&sctx->Exceptions->Front); -+ back = fuword_noerr (&sctx->Exceptions->Back); -+ count = fuword_noerr (&sctx->Exceptions->Count); -+ -+ if (count <= 0 || front < 0 || back < 0 || front >= count || back >= count || back == front) -+ res = EINVAL; -+ else -+ { -+ copyin_noerr ((caddr_t) &sctx->Exceptions->Exceptions[back], (caddr_t) ex, sizeof (SYS_EXCEPTION)); -+ suword_noerr (&sctx->Exceptions->Back, (back+1) % count); -+ -+ res = ESUCCESS; -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/eventcookie.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/eventcookie.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/eventcookie.c 2005-06-01 23:12:54.585441232 -0400 -@@ -0,0 +1,324 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: eventcookie.c,v 1.7 2003/08/13 10:03:03 fabien Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/eventcookie.c,v $*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static EVENT_COOKIE_TABLE *cookie_tables; -+static spinlock_t cookie_table_lock; -+ -+/* -+ * cookie_drop_entry: -+ * drop the reference to a cookie held -+ * by the cookie table -+ */ -+static void -+cookie_drop_entry (EVENT_COOKIE_ENTRY *ent) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ent->ent_lock, flags); -+ if (--ent->ent_ref != 0) -+ { -+ ent->ent_fired = ent->ent_cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (EVENT_COOKIE_ENTRY)); -+ } -+} -+ -+void -+cookie_init() -+{ -+ spin_lock_init (&cookie_table_lock); -+} -+ -+void -+cookie_fini() -+{ -+ spin_lock_destroy (&cookie_table_lock); -+} -+ -+EVENT_COOKIE_TABLE * -+cookie_alloc_table (unsigned long task, unsigned long handle) -+{ -+ EVENT_COOKIE_TABLE *tbl, *ntbl; -+ -+ KMEM_ZALLOC (ntbl, EVENT_COOKIE_TABLE *, sizeof (EVENT_COOKIE_TABLE), TRUE); -+ -+ if (ntbl == NULL) -+ return (NULL); -+ -+ spin_lock (&cookie_table_lock); -+ -+ for (tbl = cookie_tables; tbl; tbl = tbl->tbl_next) -+ if (tbl->tbl_task == task && tbl->tbl_handle == handle) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ else -+ { -+ spin_lock_init (&ntbl->tbl_lock); -+ -+ ntbl->tbl_task = task; -+ ntbl->tbl_handle = handle; -+ ntbl->tbl_ref = 1; -+ ntbl->tbl_entries = NULL; -+ -+ if ((ntbl->tbl_next = cookie_tables) != NULL) -+ cookie_tables->tbl_prev = ntbl; -+ cookie_tables = ntbl; -+ ntbl->tbl_prev = NULL; -+ } -+ spin_unlock (&cookie_table_lock); -+ -+ if (tbl == NULL) -+ return (ntbl); -+ else -+ { -+ KMEM_FREE (ntbl, sizeof (EVENT_COOKIE_TABLE)); -+ return (tbl); -+ } -+} -+ -+void -+cookie_free_table (EVENT_COOKIE_TABLE *tbl) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ -+ spin_lock (&cookie_table_lock); -+ if (tbl->tbl_ref > 1) -+ { -+ tbl->tbl_ref--; -+ spin_unlock (&cookie_table_lock); -+ return; -+ } -+ -+ if (tbl->tbl_prev) -+ tbl->tbl_prev->tbl_next = tbl->tbl_next; -+ else -+ cookie_tables = tbl->tbl_next; -+ if (tbl->tbl_next) -+ tbl->tbl_next->tbl_prev = tbl->tbl_prev; -+ -+ spin_unlock (&cookie_table_lock); -+ -+ /* NOTE - table no longer visible to other threads -+ * no need to aquire tbl_lock */ -+ while ((ent = tbl->tbl_entries) != NULL) -+ { -+ if ((tbl->tbl_entries = ent->ent_next) != NULL) -+ ent->ent_next->ent_prev = NULL; -+ -+ cookie_drop_entry (ent); -+ } -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl, sizeof (EVENT_COOKIE_TABLE)); -+} -+ -+int -+cookie_alloc_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent, *nent; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (nent, EVENT_COOKIE_ENTRY *, sizeof (EVENT_COOKIE_ENTRY), TRUE); -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ kcondvar_init (&nent->ent_wait); -+ spin_lock_init (&nent->ent_lock); -+ -+ nent->ent_ref = 1; -+ nent->ent_cookie = cookie; -+ -+ if ((nent->ent_next = tbl->tbl_entries) != NULL) -+ tbl->tbl_entries->ent_prev = nent; -+ tbl->tbl_entries = nent; -+ nent->ent_prev = NULL; -+ } -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ if (ent == NULL) -+ return (ESUCCESS); -+ else -+ { -+ KMEM_FREE (nent, sizeof (EVENT_COOKIE_ENTRY)); -+ return (EINVAL); -+ } -+} -+ -+int -+cookie_free_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ if (ent->ent_prev == NULL) -+ tbl->tbl_entries = ent->ent_next; -+ else -+ ent->ent_prev->ent_next = ent->ent_next; -+ -+ if (ent->ent_next != NULL) -+ ent->ent_next->ent_prev = ent->ent_prev; -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ cookie_drop_entry (ent); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * cookie_fire_cookie: -+ * fire the cookie - this is called from the event interrupt. -+ */ -+int -+cookie_fire_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * cookie_wait_cookie: -+ * deschedule on a cookie if it has not already fired. -+ * note - if the cookie is removed from the table, then -+ * we free it off when we're woken up. -+ */ -+int -+cookie_wait_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ spin_unlock (&tbl->tbl_lock); -+ -+ if (ent->ent_fired != 0) -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ return (ESUCCESS); -+ } -+ -+ ent->ent_ref++; -+ kcondvar_waitsig (&ent->ent_wait, &ent->ent_lock, &flags); -+ -+ if (--ent->ent_ref > 0) -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (EVENT_COOKIE_ENTRY)); -+ } -+ return (ESUCCESS); -+} -+ -+int -+cookie_arm_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = 0; -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/iproc.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/iproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/iproc.c 2005-06-01 23:12:54.586441080 -0400 -@@ -0,0 +1,925 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: iproc.c,v 1.47 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/iproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static int TrSizeTable[] = {0, 8, 16, 32, 64}; -+ -+static void ConvertTransactionToSetEvent (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_Addr Addr); -+static void SimulateBlockWrite (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateWriteWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateWriteDWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateTraceRoute (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void BumpInputterStats (ELAN3_DEV *dev, E3_IprocTrapHeader_BE *hdrp); -+ -+void -+HandleIProcTrap (ELAN3_DEV *dev, -+ int Channel, -+ E3_uint32 Pend, -+ sdramaddr_t FaultSaveOff, -+ sdramaddr_t TransactionsOff, -+ sdramaddr_t DataOff) -+{ -+ E3_IprocTrapHeader_BE Transaction0; -+ ELAN3_CTXT *ctxt; -+ INPUT_TRAP *trap; -+ register int i; -+ -+ /* -+ * Read the 1st set of transactions, so we can determine the -+ * context for the trap -+ */ -+ elan3_sdram_copyq_from_sdram (dev, TransactionsOff, (void *) &Transaction0, 16); -+ -+ BumpStat (dev, IProcTraps); -+ BumpInputterStats (dev, &Transaction0); -+ -+ if (Transaction0.s.TrTypeCntx.s.TypeCntxInvalid) -+ { -+ /* -+ * The context is not valid. This will occur if the packet -+ * trapped for an EopError with no IdentTrans or an error corrupted the context -+ * giving a CRC error on the first transaction and the Ack had not been returned. -+ */ -+ if (Transaction0.s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_IPROC, "iproc: Error on EOP without a good context, ignoring trap\n"); -+ } -+ else -+ { -+ /* Check that only crap has been received. If not then die. */ -+ if (! Transaction0.s.IProcTrapStatus.s.BadLength && -+ (Transaction0.s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_GOOD) -+ { -+ printk ("iproc: Did not have a valid context for the trap area.\n"); -+ printk ("iproc: TrTypeCntx=%x TrAddr=%x TrData0=%x IProcTrapStatus=%x\n", -+ Transaction0.s.TrTypeCntx.TypeContext, Transaction0.s.TrAddr, -+ Transaction0.s.TrData0, Transaction0.s.IProcTrapStatus.Status); -+ panic ("elan3: iproc did not have a valid context"); -+ /* NOTREACHED */ -+ } -+ PRINTF0 (DBG_DEVICE, DBG_IPROC, "iproc: First transaction is bad, ignoring trap\n"); -+ } -+ } -+ else -+ { -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, Transaction0.s.TrTypeCntx.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleIProcTrap: context %x invalid\n", -+ Transaction0.s.TrTypeCntx.s.Context); -+ -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ trap = (Channel == 0) ? &ctxt->Input0Trap : &ctxt->Input1Trap; -+ -+ ASSERT (trap->State == CTXT_STATE_OK); -+ -+ trap->Transactions[0] = Transaction0; -+ -+ PRINTF1 (ctxt, DBG_INTR, "HandleIProcTrap: %s\n", IProcTrapString (&trap->Transactions[0], NULL)); -+ /* -+ * Copy the rest of the transactions into the trap area. -+ */ -+ for (i = 0; !(trap->Transactions[i].s.TrTypeCntx.s.LastTrappedTrans);) -+ { -+ if (++i >= MAX_TRAPPED_TRANS) -+ { -+ trap->Overflow = 1; -+ break; -+ } -+ -+ elan3_sdram_copyq_from_sdram (dev, TransactionsOff + i*sizeof (E3_IprocTrapHeader), (void *) &trap->Transactions[i], 16); -+ -+ PRINTF1 (ctxt, DBG_INTR, " %s\n", IProcTrapString (&trap->Transactions[i], NULL)); -+ -+ BumpInputterStats (dev, &trap->Transactions[i]); -+ } -+ -+ /* -+ * Remember the number of transactions we've copied. -+ */ -+ trap->NumTransactions = i+1; -+ -+ PRINTF1 (ctxt, DBG_INTR, " NumTransactions = %d\n", trap->NumTransactions); -+ -+ /* -+ * Copy all the data blocks in one go to let the Elan prefetcher work -+ */ -+ elan3_sdram_copyq_from_sdram (dev, DataOff, trap->DataBuffers, trap->NumTransactions*sizeof (E3_IprocTrapData)); -+ -+ /* -+ * Copy fault save area and clear out for next time round. -+ */ -+ elan3_sdram_copyq_from_sdram (dev, FaultSaveOff, (void *) &trap->FaultSave, 16); -+ elan3_sdram_zeroq_sdram (dev, FaultSaveOff, 16); -+ -+ if (ELAN3_OP_IPROC_TRAP (ctxt, trap, Channel) == OP_DEFER) -+ { -+ /* -+ * Mark the trap as valid and set the inputter state to -+ * raise the context filter. -+ */ -+ trap->State = CTXT_STATE_TRAPPED; -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ SetInputterStateForContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+} -+ -+void -+InspectIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap) -+{ -+ int i; -+ int StatusValid; -+ -+ trap->AckSent = 0; -+ trap->BadTransaction = 0; -+ -+ trap->TrappedTransaction = NULL; -+ trap->TrappedDataBuffer = NULL; -+ trap->WaitForEopTransaction = NULL; -+ trap->WaitForEopDataBuffer = NULL; -+ trap->DmaIdentifyTransaction = NULL; -+ trap->ThreadIdentifyTransaction = NULL; -+ trap->LockQueuePointer = (E3_Addr) 0; -+ trap->UnlockQueuePointer = (E3_Addr) 0; -+ -+ /* -+ * Now scan all the transactions received -+ */ -+ for (i = 0; i < trap->NumTransactions ; i++) -+ { -+ E3_IprocTrapHeader_BE *hdrp = &trap->Transactions[i]; -+ E3_IprocTrapData_BE *datap = &trap->DataBuffers[i]; -+ -+ StatusValid = hdrp->s.TrTypeCntx.s.StatusRegValid != 0; -+ -+ if (StatusValid && hdrp->s.IProcTrapStatus.s.AckSent) /* Remember if we've sent the ack back */ -+ trap->AckSent = 1; -+ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) /* Check for EOP */ -+ { -+ ASSERT (i == trap->NumTransactions - 1); -+ -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_GOOD: -+ /* if we get an EOP_GOOD then the outputer should have received a PAckOk. */ -+ /* unless it was a flood, in which case someone must have sent an ack */ -+ /* but not necessarily us */ -+ break; -+ -+ case EOP_BADACK: -+ BumpUserStat (ctxt, EopBadAcks); -+ -+ /* if we get an EOP_BADACK then the outputer did not receive a PAckOk even if -+ * we sent a PAckOk. We can clear tinfo.AckSent. */ -+ if (trap->AckSent == 1) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: Network error destroyed PAckOk\n"); -+ trap->AckSent = 0; -+ } -+ break; -+ -+ case EOP_ERROR_RESET: -+ BumpUserStat (ctxt, EopResets); -+ -+ /* if we get an EOP_ERROR_RESET then the outputer may or may not have got a PAckOk. */ -+ trap->BadTransaction = 1; -+ break; -+ -+ default: -+ panic ("InspectIProcTrap: invalid EOP type in status register\n"); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ -+ PRINTF2 (ctxt, DBG_IPROC, "InspectIProcTrap: %2d: %s\n", i, IProcTrapString (hdrp, datap)); -+ -+ if (! StatusValid) /* We're looking at transactions stored before the trap */ -+ { /* these should only be identifies and lock transactions */ -+ -+ if (hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) -+ panic ("InspectIProcTrap: writeblock transaction found in input trap header before trap occured\n"); -+ -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_LOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (trap->LockQueuePointer) /* Already seen a LOCKQUEUE transaction in this packet, */ -+ { /* the user program should not have done this !! */ -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ -+ trap->LockQueuePointer = (E3_Addr) hdrp->s.TrAddr; /* Remember the queue pointer in case we need to unlock it */ -+ break; -+ -+ case TR_DMAIDENTIFY & TR_OPCODE_TYPE_MASK: -+ if (trap->DmaIdentifyTransaction || /* Already seen an identify transaction in this packet */ -+ trap->ThreadIdentifyTransaction) /* the user program should not have done this */ -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->DmaIdentifyTransaction = hdrp; -+ break; -+ -+ case TR_THREADIDENTIFY & TR_OPCODE_TYPE_MASK: -+ if (trap->DmaIdentifyTransaction || /* Already seen an identify transaction in this packet */ -+ trap->ThreadIdentifyTransaction) /* the user program should not have done this */ -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->ThreadIdentifyTransaction = hdrp; -+ break; -+ -+ default: -+ panic ("InspectIProcTrap: invalid transaction found in input trap header before trap occured\n"); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ -+ if (StatusValid && trap->TrappedTransaction == NULL) /* Remember the transaction which caused the */ -+ { /* trap */ -+ trap->TrappedTransaction = hdrp; -+ trap->TrappedDataBuffer = datap; -+ } -+ -+ if(hdrp->s.IProcTrapStatus.s.BadLength || -+ ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_ERROR) || -+ ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_BAD)) -+ { -+ int j; -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: transaction has a bad crc\n"); -+ for (j=0; jTrData[j], datap->TrData[j+1], datap->TrData[j+2], datap->TrData[j+3]); -+ trap->BadTransaction = 1; -+ continue; -+ } -+ -+ /* No more to do if it's a writeblock transaction */ -+ if (hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) -+ continue; -+ -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) == MI_InputDoTrap && -+ (hdrp->s.TrTypeCntx.s.Type & TR_WAIT_FOR_EOP) != 0) -+ { -+ /* -+ * This is a wait for eop transaction that has trapped because the inputer -+ * then received a EopError. The next transaction saved should always be an -+ * EopError. -+ */ -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: got a trapped WaitForEop transaction due to EopError\n"); -+ -+ trap->WaitForEopTransaction = hdrp; -+ trap->WaitForEopDataBuffer = datap; -+ continue; -+ } -+ -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (trap->UnlockQueuePointer) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->UnlockQueuePointer = (E3_Addr) hdrp->s.TrAddr; -+ break; -+ } -+ } -+} -+ -+void -+ResolveIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ ASSERT (! CTXT_IS_KERNEL (ctxt)); -+ -+ BumpUserStat (ctxt, IProcTraps); -+ -+ InspectIProcTrap (ctxt, trap); -+ -+ /* -+ * fixup page fault if we've trapped because of one. -+ */ -+ if (trap->FaultSave.s.FaultContext != 0) -+ { -+ /* -+ * If it's a WRITEBLOCK transaction, then see if we remember faulting -+ * before it, and try and prefault in a sensible amount past it. -+ */ -+ int fixedFault = FALSE; -+ INPUT_FAULT_SAVE *entry; -+ INPUT_FAULT_SAVE **predp; -+ int npages; -+ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0 && /* a DMA packet */ -+ trap->LockQueuePointer == (E3_Addr) 0 && /* but not a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr != 0) /* and not a DMA to 0 */ -+ { -+ spin_lock (&ctxt->InputFaultLock); -+ -+ for (predp = &ctxt->InputFaultList; (entry = *predp)->Next != NULL ; predp = &entry->Next) -+ { -+ if (entry->Addr == trap->TrappedTransaction->s.TrAddr) -+ break; -+ } -+ -+ *predp = entry->Next; -+ entry->Next = ctxt->InputFaultList; -+ ctxt->InputFaultList = entry; -+ -+ if (entry->Addr == trap->TrappedTransaction->s.TrAddr) -+ { -+ if ((entry->Count <<= 1) > MAX_INPUT_FAULT_PAGES) -+ entry->Count = MAX_INPUT_FAULT_PAGES; -+ } -+ else -+ { -+ entry->Count = MIN_INPUT_FAULT_PAGES; -+ } -+ -+ entry->Addr = trap->TrappedTransaction->s.TrAddr + (entry->Count * PAGESIZE); -+ npages = entry->Count; -+ -+ spin_unlock (&ctxt->InputFaultLock); -+ -+ if (elan3_pagefault (ctxt, &trap->FaultSave, npages) != ESUCCESS) -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "ResolveIProcTrap: pagefaulting %d pages at %08x - failed\n", -+ npages, trap->TrappedTransaction->s.TrAddr); -+ } -+ else -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "ResolveIProcTrap: pagefaulting %d pages at %08x - succeeded\n", -+ npages, trap->TrappedTransaction->s.TrAddr); -+ -+ fixedFault = TRUE; -+ } -+ } -+ -+ /* Workaround WRITEBLOCK transaction executed when LOCKQUEUE transaction missed */ -+ /* the packet will have been nacked */ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) && /* a DMA packet */ -+ trap->LockQueuePointer == 0 && trap->UnlockQueuePointer && /* a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr == trap->FaultSave.s.FaultAddress) /* and missed lockqueue */ -+ { -+ fixedFault = TRUE; -+ } -+ -+ if (! fixedFault) -+ { -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_IPROC, "ResolveIProcTrap: elan3_pagefault failed at %x\n", -+ trap->FaultSave.s.FaultAddress); -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, INPUT_PROC, trap, &trap->FaultSave, res); -+ return; -+ } -+ } -+ } -+ -+ if (! trap->AckSent && trap->LockQueuePointer) /* Queued DMA */ -+ { /* The ack was not sent, so the queue will be locked. */ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, FALSE); /* We must unlock it. */ -+ } -+ -+ if (trap->AckSent && trap->BadTransaction) -+ { -+ if (trap->DmaIdentifyTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: Dma identify needs network resultion\n"); -+ -+ BumpStat (dev, DmaIdentifyNetworkErrors); -+ BumpUserStat (ctxt, DmaIdentifyNetworkErrors); -+ -+ if (trap->WaitForEopTransaction) -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: have delayed wait for eop transaction\n"); -+ } -+ else if (trap->ThreadIdentifyTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: Thread identify needs network resolution\n"); -+ -+ BumpStat (dev, ThreadIdentifyNetworkErrors); -+ BumpUserStat (ctxt, ThreadIdentifyNetworkErrors); -+ -+ if (trap->WaitForEopTransaction) -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: have delayed wait for eop transaction\n"); -+ } -+ else -+ { -+ BumpStat (dev, DmaNetworkErrors); -+ BumpUserStat (ctxt, DmaNetworkErrors); -+ } -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (! trap->AckSent) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack not sent, lowering context filter\n"); -+ -+ trap->State = CTXT_STATE_OK; -+ } -+ else -+ { -+ if (trap->BadTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack sent, waiting on bad transaction\n"); -+ trap->State = CTXT_STATE_NETWORK_ERROR; -+ } -+ else -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack sent, waiting on packet to be re-executed\n"); -+ trap->State = CTXT_STATE_NEEDS_RESTART; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (trap->AckSent && trap->BadTransaction) -+ ElanException (ctxt, EXCEPTION_NETWORK_ERROR, INPUT_PROC, trap, rvpp); -+} -+ -+int -+RestartIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap) -+{ -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: %d transactions\n", trap->NumTransactions); -+ -+ if (trap->TrappedTransaction == NULL) /* No transaction trapped - probably a network */ -+ return (ESUCCESS); /* error */ -+ -+ while (! trap->TrappedTransaction->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ E3_IprocTrapHeader_BE *hdrp = trap->TrappedTransaction; -+ E3_IprocTrapData_BE *datap = trap->TrappedDataBuffer; -+ -+ ASSERT (hdrp->s.TrTypeCntx.s.StatusRegValid != 0); -+ -+ PRINTF2 (ctxt, DBG_IPROC, "RestartIProc: TrType=0x%x Status=0x%x\n", -+ hdrp->s.TrTypeCntx.TypeContext, hdrp->s.IProcTrapStatus.Status); -+ -+ if ((hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0) -+ { -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: WRITEBLOCK : Addr %x\n", hdrp->s.TrAddr); -+ SimulateBlockWrite (ctxt, hdrp, datap); -+ } -+ else -+ { -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: SETEVENT : %x\n", hdrp->s.TrAddr); -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) != MI_InputDoTrap) -+ FixupEventTrap (ctxt, INPUT_PROC, trap, GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus), &trap->FaultSave, FALSE); -+ else if (hdrp->s.TrAddr) -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), hdrp->s.TrAddr, FALSE) != ISSUE_COMMAND_OK) -+ return (EAGAIN); -+ } -+ break; -+ -+ case TR_WRITEWORD & TR_OPCODE_TYPE_MASK: -+ SimulateWriteWord (ctxt, hdrp, datap); -+ break; -+ -+ case TR_WRITEDOUBLEWORD & TR_OPCODE_TYPE_MASK: -+ SimulateWriteDWord (ctxt, hdrp, datap); -+ break; -+ -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) == MI_InputDoTrap) -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ else -+ { -+ switch (GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus)) -+ { -+ case MI_WaitForUnLockDescRead: -+ /* -+ * Fault occured on the read of the queue descriptor - since the ack -+ * has been sent we need to move the queue on one slot. -+ */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: TR_UNLOCKQUEUE : desc read fault\n"); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, TRUE); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), -+ hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue setevent to complete queue unlock, since we've already unlocked */ -+ /* the queue, we should "convert" this transaction into a setevent transaction that */ -+ /* hasn't trapped */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: could not issue setevent for SimulateUnlockQueue\n"); -+ -+ ConvertTransactionToSetEvent (ctxt, hdrp, hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET); -+ return (EAGAIN); -+ } -+ break; -+ -+ case MI_DoSetEvent: -+ /* -+ * Fault occured on either the write to unlock the queue or during -+ * processing of the event. Test the fault address against the -+ * queue address to find out which - in this case, since the ack -+ * has been sent we need to move the queue on one slot. -+ */ -+ if (trap->FaultSave.s.FaultAddress == trap->LockQueuePointer) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: fixed unlock queue write to unlock fault\n"); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, TRUE); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), -+ hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue setevent to complete queue unlock, since we've already unlocked */ -+ /* the queue, we should "convert" this transaction into a setevent transaction that */ -+ /* hasn't trapped */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: could not issue setevent for SimulateUnlockQueue\n"); -+ -+ ConvertTransactionToSetEvent (ctxt, hdrp, hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET); -+ return (EFAIL); -+ } -+ break; -+ } -+ /*DROPTHROUGH*/ -+ -+ default: -+ FixupEventTrap (ctxt, INPUT_PROC, trap, GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus), -+ &trap->FaultSave, FALSE); -+ break; -+ } -+ trap->LockQueuePointer = trap->UnlockQueuePointer = 0; -+ } -+ break; -+ -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: -+ /* Just ignore send-discard transactions */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: ignore SENDDISCARD\n"); -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: REMOTEDMA\n"); -+ -+ /* modify the dma type since it will still be a "read" dma */ -+ ((E3_DMA_BE *) datap)->s.dma_type &= ~(DMA_TYPE_READ | E3_DMA_CONTEXT_MASK); -+ ((E3_DMA_BE *) datap)->s.dma_type |= DMA_TYPE_ISREMOTE; -+ -+ RestartDmaDesc (ctxt, (E3_DMA_BE *) datap); -+ break; -+ -+ case TR_TRACEROUTE & TR_OPCODE_TYPE_MASK: -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: TRACEROUTE\n"); -+ SimulateTraceRoute (ctxt, hdrp, datap); -+ break; -+ -+ default: -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ break; -+ } -+ } -+ -+ /* -+ * We've successfully processed this transaction, so move onto the -+ * next one. -+ */ -+ trap->TrappedTransaction++; -+ trap->TrappedDataBuffer++; -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+ConvertTransactionToSetEvent (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_Addr Addr) -+{ -+ hdrp->s.TrTypeCntx.s.Type = TR_SETEVENT; -+ hdrp->s.TrTypeCntx.s.StatusRegValid = 0; -+ hdrp->s.TrAddr = Addr; -+} -+ -+void -+SimulateBlockWrite (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ void *saddr = (void *) ((unsigned long) datap + (hdrp->s.TrAddr & 0x3f)); -+ unsigned nbytes = (hdrp->s.TrTypeCntx.s.Type) & TR_PARTSIZE_MASK; -+ int i; -+ -+ if (nbytes == 0) -+ nbytes = sizeof (E3_IprocTrapData_BE); -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateBlockWrite: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ /* -+ * NOTE: since the block copy could be to sdram, we issue the writes backwards, -+ * except we MUST ensure that the last item in the block is written last. -+ */ -+ switch (((hdrp->s.TrTypeCntx.s.Type) >> TR_TYPE_SHIFT) & TR_TYPE_MASK) -+ { -+ case TR_TYPE_BYTE: /* 8 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint8)); i >= 0; i -= sizeof (E3_uint8)) -+ ELAN3_OP_STORE8 (ctxt, hdrp->s.TrAddr + i, ((E3_uint8 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint8); -+ ELAN3_OP_STORE8 (ctxt, hdrp->s.TrAddr + i, ((E3_uint8 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_SHORT: /* 16 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint16)); i >= 0; i -= sizeof (E3_uint16)) -+ ELAN3_OP_STORE16 (ctxt, hdrp->s.TrAddr + i, ((E3_uint16 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint16); -+ ELAN3_OP_STORE16 (ctxt, hdrp->s.TrAddr + i, ((E3_uint16 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_WORD: /* 32 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint32)); i >= 0; i -= sizeof (E3_uint32)) -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + i, ((E3_uint32 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint32); -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + i, ((E3_uint32 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_DWORD: /* 64 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr + i, ((E3_uint64 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr + i, ((E3_uint64 *) saddr)[i]); -+ break; -+ } -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateWriteWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateWriteWord: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr, ((E3_uint32 *) datap)[WordEndianFlip]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateWriteDWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateWriteDWord: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr, ((E3_uint64 *) datap)[0]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateTraceRoute (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ E3_uint32 *saddr = (E3_uint32 *) ((unsigned long) datap + (hdrp->s.TrAddr & 0x3f)); -+ unsigned nwords = TrSizeTable[(hdrp->s.TrTypeCntx.s.Type >> TR_SIZE_SHIFT) & TR_SIZE_MASK] / sizeof (E3_uint32); -+ int i; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateTraceRoute: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ for (i = nwords-2; i >= 0; i--) -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + (i * sizeof (E3_uint32)), saddr[i ^ WordEndianFlip]); -+ -+ i = nwords-1; -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + (i * sizeof (E3_uint32)), saddr[i ^ WordEndianFlip]); -+ -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateUnlockQueue (ELAN3_CTXT *ctxt, E3_Addr QueuePointer, int SentAck) -+{ -+ E3_uint32 QueueLock; -+ E3_Addr QueueBPTR; -+ E3_Addr QueueFPTR; -+ E3_uint64 QueueStateAndBPTR; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "UnlockQueue: faulted with QueuePointer %x\n", QueuePointer); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, QueuePointer); -+ return; -+ } -+ -+ if (SentAck) -+ { -+ QueueBPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_bptr)); -+ QueueFPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_fptr)); -+ -+ if (QueueBPTR == ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_top))) /* move on back pointer */ -+ QueueBPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_base)); -+ else -+ QueueBPTR += ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_size)); -+ -+ QueueLock = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state)); -+ -+ if (QueueBPTR == QueueFPTR) /* and set full bit if fptr == bptr */ -+ QueueLock |= E3_QUEUE_FULL; -+ -+ QueueLock &= ~E3_QUEUE_LOCKED; -+ -+ QueueStateAndBPTR = (E3_uint64)QueueLock << 32 | QueueBPTR; -+ -+ ELAN3_OP_STORE64 (ctxt, QueuePointer + offsetof (E3_Queue, q_state), QueueStateAndBPTR); -+ } -+ else -+ { -+ QueueLock = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state)); -+ -+ QueueLock &= ~E3_QUEUE_LOCKED; -+ -+ ELAN3_OP_STORE32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state), QueueLock); -+ } -+ -+ no_fault(); -+} -+ -+static void -+BumpInputterStats (ELAN3_DEV *dev, E3_IprocTrapHeader_BE *hdrp) -+{ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) /* EOP */ -+ { -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_BADACK: -+ BumpStat (dev, EopBadAcks); -+ break; -+ case EOP_ERROR_RESET: -+ BumpStat (dev, EopResets); -+ break; -+ } -+ } -+ else if (hdrp->s.TrTypeCntx.s.StatusRegValid) -+ { -+ /* -+ * Errors are tested in order of badness. i.e. badlength will prevent a BadCrc and so on... -+ */ -+ if (hdrp->s.IProcTrapStatus.s.BadLength) -+ BumpStat (dev, InputterBadLength); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_BAD) -+ BumpStat (dev, InputterCRCBad); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_ERROR) -+ BumpStat (dev, InputterCRCErrors); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_DISCARD) -+ BumpStat (dev, InputterCRCDiscards); -+ } -+} -+ -+char * -+IProcTrapString (E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ static char buffer[256]; -+ static char typeString[256]; -+ static char statusString[256]; -+ char *ptr; -+ E3_Addr Addr = hdrp->s.TrAddr; -+ E3_uint32 Type = hdrp->s.TrTypeCntx.s.Type; -+ E3_uint32 Context = hdrp->s.TrTypeCntx.s.Context; -+ E3_uint32 StatusValid = hdrp->s.TrTypeCntx.s.StatusRegValid; -+ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_GOOD: sprintf (typeString, "EOP GOOD"); break; -+ case EOP_BADACK: sprintf (typeString, "EOP BADACK"); break; -+ case EOP_ERROR_RESET: sprintf (typeString, "EOP ERROR RESET"); break; -+ default: sprintf (typeString, "EOP - bad status"); break; -+ } -+ sprintf (buffer, "%15s Cntx=%08x", typeString, Context); -+ } -+ else -+ { -+ if (Type & TR_WRITEBLOCK_BIT) -+ { -+ switch ((Type >> TR_TYPE_SHIFT) & TR_TYPE_MASK) -+ { -+ case TR_TYPE_BYTE: ptr = "Byte"; break; -+ case TR_TYPE_SHORT: ptr = "Short"; break; -+ case TR_TYPE_WORD: ptr = "Word"; break; -+ case TR_TYPE_DWORD: ptr = "Double"; break; -+ default: ptr = "Unknown"; break; -+ } -+ -+ sprintf (typeString, "WriteBlock Type=%s Size=%2d", ptr, Type & TR_PARTSIZE_MASK); -+ } -+ else -+ { -+ switch (Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Setevent"); break; -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Remote DMA"); break; -+ case TR_LOCKQUEUE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Lock Queue"); break; -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Unlock Queue"); break; -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Send Discard"); break; -+ case TR_DMAIDENTIFY & TR_OPCODE_TYPE_MASK: sprintf (typeString, "DMA Identify"); break; -+ case TR_THREADIDENTIFY & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Thread Identify"); break; -+ case TR_GTE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "GTE"); break; -+ case TR_LT & TR_OPCODE_TYPE_MASK: sprintf (typeString, "LT"); break; -+ case TR_EQ & TR_OPCODE_TYPE_MASK: sprintf (typeString, "EQ"); break; -+ case TR_NEQ & TR_OPCODE_TYPE_MASK: sprintf (typeString, "NEQ"); break; -+ case TR_WRITEWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Write Word"); break; -+ case TR_WRITEDOUBLEWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Write Double"); break; -+ case TR_ATOMICADDWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Atomic Add"); break; -+ case TR_TESTANDWRITE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Test and Write"); break; -+ default: sprintf (typeString, "Type=%d", Type & TR_OPCODE_TYPE_MASK); break; -+ } -+ } -+ sprintf (buffer, "%15s Addr=%08x Cntx=%08x", typeString, Addr, Context); -+ /*(Type & TR_SENDACK) ? " Sendack" : "", */ -+ /*(Type & TR_LAST_TRANS) ? " LastTrans" : "", */ -+ /*(Type & TR_WAIT_FOR_EOP) ? " WaitForEop" : ""); */ -+ } -+ -+ if (StatusValid) -+ { -+ sprintf (statusString, " Type=%s %x", MiToName (hdrp->s.IProcTrapStatus.s.TrapType), hdrp->s.IProcTrapStatus.Status); -+ strcat (buffer, statusString); -+ -+ if (hdrp->s.IProcTrapStatus.s.BadLength) -+ strcat (buffer, " BadLength"); -+ switch (hdrp->s.IProcTrapStatus.Status & CRC_MASK) -+ { -+ case CRC_STATUS_DISCARD: -+ strcat (buffer, " CRC Discard"); -+ break; -+ case CRC_STATUS_ERROR: -+ strcat (buffer, " CRC Error"); -+ break; -+ -+ case CRC_STATUS_BAD: -+ strcat (buffer, " CRC Bad"); -+ break; -+ } -+ } -+ -+ return (buffer); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/Makefile 2005-06-01 23:12:54.587440928 -0400 -@@ -0,0 +1,31 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan3/Makefile -+# -+ -+ -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/elan3/Makefile -+# -+ -+list-multi := elan3.o -+elan3-objs := context.o cproc.o dproc.o elandebug.o elandev_generic.o elansyscall.o eventcookie.o iproc.o sdram.o minames.o network_error.o route_table.o tproc.o tprocinsts.o routecheck.o virtual_process.o elan3ops.o context_linux.o elandev_linux.o procfs_linux.o tproc_linux.o elan3mmu_generic.o elan3mmu_linux.o -+export-objs := elandev_linux.o procfs_linux.o -+obj-$(CONFIG_ELAN3) := elan3.o -+ -+elan3.o : $(elan3-objs) -+ $(LD) -r -o $@ $(elan3-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/elan3/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/Makefile.conf 2005-06-01 23:12:54.587440928 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan3.o -+MODULENAME = elan3 -+KOBJFILES = context.o cproc.o dproc.o elandebug.o elandev_generic.o elansyscall.o eventcookie.o iproc.o sdram.o minames.o network_error.o route_table.o tproc.o tprocinsts.o routecheck.o virtual_process.o elan3ops.o context_linux.o elandev_linux.o procfs_linux.o tproc_linux.o elan3mmu_generic.o elan3mmu_linux.o -+EXPORT_KOBJS = elandev_linux.o procfs_linux.o -+CONFIG_NAME = CONFIG_ELAN3 -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/elan3/minames.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/minames.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/minames.c 2005-06-01 23:12:54.587440928 -0400 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: minames.c,v 1.12 2003/06/07 15:57:49 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/minames.c,v $*/ -+ -+#include -+#include -+ -+caddr_t -+MiToName (int mi) -+{ -+ static char space[32]; -+ static struct { -+ int mi; -+ char *name; -+ } info[] = { -+#include -+ }; -+ register int i; -+ -+ -+ for (i = 0; i < sizeof(info)/sizeof(info[0]); i++) -+ if (info[i].mi == mi) -+ return (info[i].name); -+ sprintf (space, "MI %x", mi); -+ return (space); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/network_error.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/network_error.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/network_error.c 2005-06-01 23:12:54.589440624 -0400 -@@ -0,0 +1,777 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: network_error.c,v 1.32.2.1 2004/10/28 11:54:57 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/network_error.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef DIGITAL_UNIX -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+typedef xdrproc_t kxdrproc_t; -+#endif -+ -+#ifdef LINUX -+#include -+#include -+#include -+#include -+ -+#include -+#define SYS_NMLN __NEW_UTS_LEN -+#endif -+ -+#include -+ -+spinlock_t ResolveRequestLock; -+kcondvar_t ResolveRequestWait; -+ -+NETERR_RESOLVER *ResolveRequestHead; -+NETERR_RESOLVER **ResolveRequestTailp = &ResolveRequestHead; -+int ResolveRequestCount; -+int ResolveRequestThreads; -+int ResolveRequestMaxThreads = 4; -+int ResolveRequestTimeout = 60; -+ -+typedef struct neterr_server -+{ -+ struct neterr_server *Next; -+ struct neterr_server *Prev; -+ unsigned ElanId; -+ -+ char *Name; -+ int RefCount; -+ struct sockaddr_in Addr; -+} NETERR_SERVER; -+ -+#define NETERR_HASH_ENTRIES 64 -+#define NETERR_HASH(elanid) (((unsigned) elanid) % NETERR_HASH_ENTRIES) -+NETERR_SERVER *NeterrServerHash[NETERR_HASH_ENTRIES]; -+kmutex_t NeterrServerLock; -+ -+static NETERR_SERVER *FindNeterrServer (int elanId); -+static void DereferenceNeterrServer (NETERR_SERVER *server); -+static int CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg); -+ -+void -+InitialiseNetworkErrorResolver () -+{ -+ spin_lock_init (&ResolveRequestLock); -+ kcondvar_init (&ResolveRequestWait); -+ -+ ResolveRequestHead = NULL; -+ ResolveRequestTailp = &ResolveRequestHead; -+ -+ kmutex_init (&NeterrServerLock); -+} -+ -+void -+FinaliseNetworkErrorResolver () -+{ -+ spin_lock_destroy (&ResolveRequestLock); -+ kcondvar_destroy (&ResolveRequestWait); -+ -+ kmutex_destroy (&NeterrServerLock); -+} -+ -+static NETERR_RESOLVER * -+AllocateNetworkErrorResolver (void) -+{ -+ NETERR_RESOLVER *rvp; -+ -+ KMEM_ZALLOC (rvp, NETERR_RESOLVER *, sizeof (NETERR_RESOLVER), TRUE); -+ spin_lock_init (&rvp->Lock); -+ -+ return (rvp); -+} -+ -+void -+FreeNetworkErrorResolver (NETERR_RESOLVER *rvp) -+{ -+ spin_lock_destroy (&rvp->Lock); -+ KMEM_FREE (rvp, sizeof (NETERR_RESOLVER)); -+} -+ -+static void -+elan3_neterr_resolver (void) -+{ -+ NETERR_RESOLVER *rvp; -+ NETERR_SERVER *server; -+ int status; -+ unsigned long flags; -+ -+ kernel_thread_init("elan3_neterr_resolver"); -+ spin_lock (&ResolveRequestLock); -+ -+ while ((rvp = ResolveRequestHead) != NULL) -+ { -+ if ((ResolveRequestHead = rvp->Next) == NULL) -+ ResolveRequestTailp = &ResolveRequestHead; -+ -+ spin_unlock (&ResolveRequestLock); -+ -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: rvp = %p\n", rvp); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " Rail %d\n", rvp->Message.Rail); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " SrcCapability %s\n", CapabilityString (&rvp->Message.SrcCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " DstCapability %s\n", CapabilityString (&rvp->Message.DstCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieAddr %08x\n", rvp->Message.CookieAddr); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieVProc %08x\n", rvp->Message.CookieVProc); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " NextCookie %08x\n", rvp->Message.NextCookie); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " WaitForEop %08x\n", rvp->Message.WaitForEop); -+ -+ if ((server = FindNeterrServer (rvp->Location.loc_node)) == NULL) -+ status = ECONNREFUSED; -+ else if (ResolveRequestTimeout && ((int)(lbolt - rvp->Timestamp)) > (ResolveRequestTimeout*HZ)) -+ { -+ printk ("elan_neterr: rpc to '%s' timedout - context %d killed\n", server->Name, rvp->Message.SrcCapability.cap_mycontext); -+ status = ECONNABORTED; -+ } -+ else -+ { -+ status = CallNeterrServer (server, &rvp->Message); -+ -+ DereferenceNeterrServer (server); -+ } -+ -+ if ((status == EINTR || status == ETIMEDOUT) && rvp->Ctxt != NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: retry rvp=%p\n", rvp); -+ spin_lock (&ResolveRequestLock); -+ rvp->Next = NULL; -+ *ResolveRequestTailp = rvp; -+ ResolveRequestTailp = &rvp->Next; -+ } -+ else -+ { -+ rvp->Status = status; -+ -+ spin_lock (&rvp->Lock); -+ -+ if (rvp->Ctxt != NULL) -+ { -+ PRINTF2 (rvp->Ctxt, DBG_NETERR, "elan3_neterr_resolver: completing rvp %p for ctxt %p\n", rvp, rvp->Ctxt); -+ spin_lock_irqsave (&rvp->Ctxt->Device->IntrLock, flags); -+ -+ rvp->Completed = TRUE; -+ -+ kcondvar_wakeupall (&rvp->Ctxt->Wait, &rvp->Ctxt->Device->IntrLock); -+ -+ /* -+ * drop the locks out of order since the rvp can get freeed -+ * as soon as we drop the IntrLock - so cannot reference the -+ * rvp after this. -+ */ -+ -+ spin_unlock (&rvp->Lock); -+ spin_unlock_irqrestore (&rvp->Ctxt->Device->IntrLock, flags); -+ } -+ else -+ { -+ PRINTF2 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: completing rvp %p for deceased ctxt %p\n", rvp, rvp->Ctxt); -+ spin_unlock (&rvp->Lock); -+ FreeNetworkErrorResolver (rvp); -+ } -+ -+ spin_lock (&ResolveRequestLock); -+ ResolveRequestCount--; -+ } -+ } -+ -+ ResolveRequestThreads--; -+ -+ spin_unlock (&ResolveRequestLock); -+ kernel_thread_exit(); -+} -+ -+int -+QueueNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp) -+{ -+ int isdma = trap->DmaIdentifyTransaction != NULL; -+ E3_IprocTrapHeader_BE *hdrp = isdma ? trap->DmaIdentifyTransaction : trap->ThreadIdentifyTransaction; -+ E3_uint32 process = isdma ? (hdrp->s.TrAddr & 0xFFFF) : (hdrp->s.TrData0 & 0xFFFF); -+ NETERR_RESOLVER *rvp; -+ -+ PRINTF2 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: process = %d %s\n", process, isdma ? "(dma)" : "(thread)"); -+ -+ if ((rvp = AllocateNetworkErrorResolver()) == NULL) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: cannot allocate resolver\n"); -+ return (ENOMEM); -+ } -+ -+ rvp->Message.Rail = ctxt->Device->Devinfo.dev_rail; -+ -+ krwlock_read (&ctxt->VpLock); -+ rvp->Location = ProcessToLocation (ctxt, NULL, process, &rvp->Message.SrcCapability); -+ krwlock_done (&ctxt->VpLock); -+ -+ if (rvp->Location.loc_node == ELAN3_INVALID_NODE) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: invalid elan id\n"); -+ -+ FreeNetworkErrorResolver (rvp); -+ return (EINVAL); -+ } -+ -+ rvp->Message.DstCapability = ctxt->Capability; -+ rvp->Message.DstProcess = elan3_process (ctxt); -+ rvp->Message.WaitForEop = (trap->WaitForEopTransaction != NULL); -+ -+ if (isdma) -+ { -+ rvp->Message.CookieAddr = 0; -+ rvp->Message.CookieVProc = hdrp->s.TrAddr; -+ rvp->Message.NextCookie = 0; -+ } -+ else -+ { -+ rvp->Message.CookieAddr = hdrp->s.TrAddr; -+ rvp->Message.CookieVProc = hdrp->s.TrData0; -+ rvp->Message.NextCookie = hdrp->s.TrData1; -+ } -+ -+ rvp->Completed = FALSE; -+ rvp->Ctxt = ctxt; -+ rvp->Timestamp = lbolt; -+ -+ spin_lock (&ResolveRequestLock); -+ -+ rvp->Next = NULL; -+ *ResolveRequestTailp = rvp; -+ ResolveRequestTailp = &rvp->Next; -+ ResolveRequestCount++; -+ -+ kcondvar_wakeupone (&ResolveRequestWait, &ResolveRequestLock); -+ -+ if (ResolveRequestCount < ResolveRequestThreads || ResolveRequestThreads >= ResolveRequestMaxThreads) -+ spin_unlock (&ResolveRequestLock); -+ else -+ { -+ ResolveRequestThreads++; -+ -+ spin_unlock (&ResolveRequestLock); -+ if (kernel_thread_create (elan3_neterr_resolver, NULL) == NULL) -+ { -+ spin_lock (&ResolveRequestLock); -+ ResolveRequestThreads--; -+ spin_unlock (&ResolveRequestLock); -+ -+ if (ResolveRequestThreads == 0) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: cannot thread pool\n"); -+ -+ FreeNetworkErrorResolver (rvp); -+ return (ENOMEM); -+ } -+ } -+ } -+ -+ *rvpp = rvp; -+ return (ESUCCESS); -+} -+ -+void -+CancelNetworkErrorResolver (NETERR_RESOLVER *rvp) -+{ -+ spin_lock (&rvp->Lock); -+ -+ PRINTF2 (rvp->Ctxt, DBG_NETERR, "CancelNetworkErrorResolver: rvp=%p %s\n", rvp, rvp->Completed ? "Completed" : "Pending"); -+ -+ if (rvp->Completed) -+ { -+ spin_unlock (&rvp->Lock); -+ FreeNetworkErrorResolver (rvp); -+ } -+ else -+ { -+ rvp->Ctxt = NULL; -+ spin_unlock (&rvp->Lock); -+ } -+} -+ -+static NETERR_FIXUP * -+AllocateNetworkErrorFixup (void) -+{ -+ NETERR_FIXUP *nef; -+ -+ KMEM_ZALLOC (nef, NETERR_FIXUP *, sizeof (NETERR_FIXUP), TRUE); -+ -+ if (nef == (NETERR_FIXUP *) NULL) -+ return (NULL); -+ -+ kcondvar_init (&nef->Wait); -+ -+ return (nef); -+} -+ -+static void -+FreeNetworkErrorFixup (NETERR_FIXUP *nef) -+{ -+ kcondvar_destroy (&nef->Wait); -+ KMEM_FREE (nef, sizeof (NETERR_FIXUP)); -+} -+ -+int -+ExecuteNetworkErrorFixup (NETERR_MSG *msg) -+{ -+ ELAN3_DEV *dev; -+ ELAN3_CTXT *ctxt; -+ NETERR_FIXUP *nef; -+ NETERR_FIXUP **predp; -+ int rc; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "ExecuteNetworkErrorFixup: msg = %p\n", msg); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " Rail %d\n", msg->Rail); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " SrcCapability %s\n", CapabilityString (&msg->SrcCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " DstCapability %s\n", CapabilityString (&msg->DstCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieAddr %08x\n", msg->CookieAddr); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieVProc %08x\n", msg->CookieVProc); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " NextCookie %08x\n", msg->NextCookie); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " WaitForEop %08x\n", msg->WaitForEop); -+ -+ if ((dev = elan3_device (msg->Rail)) == NULL) -+ return (ESRCH); -+ -+ if ((nef = AllocateNetworkErrorFixup()) == NULL) -+ return (ENOMEM); -+ -+ if (nef == (NETERR_FIXUP *) NULL) -+ return (ENOMEM); -+ -+ bcopy (msg, &nef->Message, sizeof (NETERR_MSG)); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, msg->SrcCapability.cap_mycontext); -+ -+ if (ctxt == NULL) -+ rc = ESRCH; -+ else if (!ELAN_CAP_MATCH (&msg->SrcCapability, &ctxt->Capability)) -+ rc = EPERM; -+ else -+ { -+ if (ctxt->Status & CTXT_NO_LWPS) -+ rc = EAGAIN; -+ else -+ { -+ for (predp = &ctxt->NetworkErrorFixups; *predp != NULL; predp = &(*predp)->Next) -+ ; -+ nef->Next = NULL; -+ *predp = nef; -+ -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ while (! nef->Completed) -+ kcondvar_wait (&nef->Wait, &dev->IntrLock, &flags); -+ -+ rc = nef->Status; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ FreeNetworkErrorFixup (nef); -+ -+ return (rc); -+} -+ -+void -+CompleteNetworkErrorFixup (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, int status) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF2 (ctxt, DBG_NETERR, "CompleteNetworkErrorFixup: %p %d\n", nef, status); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ nef->Status = status; -+ nef->Completed = TRUE; -+ kcondvar_wakeupone (&nef->Wait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+ -+static NETERR_SERVER * -+NewNeterrServer (int elanId, struct sockaddr_in *addr, char *name) -+{ -+ NETERR_SERVER *server; -+ -+ KMEM_ZALLOC (server, NETERR_SERVER *, sizeof (NETERR_SERVER), TRUE); -+ KMEM_ALLOC (server->Name, char *, strlen (name)+1, TRUE); -+ -+ bcopy (addr, &server->Addr, sizeof (struct sockaddr_in)); -+ bcopy (name, server->Name, strlen (name)+1); -+ -+ server->ElanId = elanId; -+ server->RefCount = 1; -+ -+ return (server); -+} -+ -+static void -+DeleteNeterrServer (NETERR_SERVER *server) -+{ -+ KMEM_FREE (server->Name, strlen(server->Name)+1); -+ KMEM_FREE (server, sizeof (NETERR_SERVER)); -+} -+ -+static NETERR_SERVER * -+FindNeterrServer (int elanId) -+{ -+ NETERR_SERVER *server; -+ -+ kmutex_lock (&NeterrServerLock); -+ -+ for (server = NeterrServerHash[NETERR_HASH(elanId)]; server != NULL; server = server->Next) -+ if (server->ElanId == elanId) -+ break; -+ -+ if (server != NULL) -+ server->RefCount++; -+ kmutex_unlock (&NeterrServerLock); -+ -+ return (server); -+} -+ -+static void -+DereferenceNeterrServer (NETERR_SERVER *server) -+{ -+ kmutex_lock (&NeterrServerLock); -+ if ((--server->RefCount) == 0) -+ DeleteNeterrServer (server); -+ kmutex_unlock (&NeterrServerLock); -+} -+ -+int -+AddNeterrServer (int elanId, struct sockaddr_in *addr, char *name) -+{ -+ NETERR_SERVER *server; -+ NETERR_SERVER *old; -+ int hashval = NETERR_HASH(elanId); -+ -+ server = NewNeterrServer (elanId, addr, name); -+ -+ if (server == NULL) -+ return (ENOMEM); -+ -+ kmutex_lock (&NeterrServerLock); -+ for (old = NeterrServerHash[hashval]; old != NULL; old = old->Next) -+ if (old->ElanId == elanId) -+ break; -+ -+ /* remove "old" server from hash table */ -+ if (old != NULL) -+ { -+ if (old->Prev) -+ old->Prev->Next = old->Next; -+ else -+ NeterrServerHash[hashval] = old->Next; -+ if (old->Next) -+ old->Next->Prev = old->Prev; -+ } -+ -+ /* insert "new" server into hash table */ -+ if ((server->Next = NeterrServerHash[hashval]) != NULL) -+ server->Next->Prev = server; -+ server->Prev = NULL; -+ NeterrServerHash[hashval] = server; -+ -+ kmutex_unlock (&NeterrServerLock); -+ -+ if (old != NULL) -+ DereferenceNeterrServer (old); -+ -+ return (ESUCCESS); -+} -+ -+int -+AddNeterrServerSyscall (int elanId, void *addrp, void *namep, char *unused) -+{ -+ struct sockaddr_in addr; -+ char *name; -+ int error; -+ int nob; -+ -+ /* Sanity check the supplied elanId argument */ -+ if (elanId < 0) -+ return ( set_errno(EINVAL) ); -+ -+ KMEM_ALLOC (name, caddr_t, SYS_NMLN, TRUE); -+ -+ if (copyin ((caddr_t) addrp, (caddr_t) &addr, sizeof (addr)) || -+ copyinstr ((caddr_t) namep, name, SYS_NMLN, &nob)) -+ { -+ error = EFAULT; -+ } -+ else -+ { -+ PRINTF2 (DBG_DEVICE, DBG_NETERR, "AddNeterrServer: '%s' at elanid %d\n", name, elanId); -+ -+ error = AddNeterrServer (elanId, &addr, name); -+ } -+ KMEM_FREE (name, SYS_NMLN); -+ -+ return (error ? set_errno(error) : ESUCCESS); -+} -+ -+ -+#if defined(DIGITAL_UNIX) -+static int -+CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg) -+{ -+ cred_t *cr = crget(); -+ struct rpc_err rpcerr; -+ extern cred_t *kcred; -+ struct timeval wait; -+ enum clnt_stat rc; -+ int status; -+ CLIENT *clnt; -+ int error; -+ -+ PRINTF4 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) - family=%d port=%d addr=%08x\n", server->Name, -+ server->Addr.sin_family, server->Addr.sin_port, server->Addr.sin_addr.s_addr); -+ -+ if ((clnt = clntkudp_create (&server->Addr, (struct sockaddr_in *)0, NETERR_PROGRAM, NETERR_VERSION, 1, cr)) == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): clntkudp_create error\n", server->Name); -+ -+ return (ENOMEM); -+ } -+ -+ wait.tv_sec = NETERR_RPC_TIMEOUT; -+ wait.tv_usec = 0; -+ -+ PRINTF2 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): CLNT_CALL timeout = %d\n", server->Name, NETERR_RPC_TIMEOUT); -+ -+ rc = CLNT_CALL(clnt, NETERR_FIXUP_RPC, xdr_neterr_msg, (void *)msg, xdr_int, (void *) &status, wait); -+ -+ PRINTF3 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): CLNT_CALL -> %d (%s)\n", server->Name, rc, clnt_sperrno(rc));; -+ -+ switch (rc) -+ { -+ case RPC_SUCCESS: -+ break; -+ -+ case RPC_INTR: -+ status = EINTR; -+ break; -+ -+ case RPC_TIMEDOUT: -+ status = ETIMEDOUT; -+ break; -+ -+ default: -+ printf ("CallNeterrServer(%s): %s\n", server->Name, clnt_sperrno(status)); -+ status = ENOENT; -+ break; -+ } -+ -+ CLNT_DESTROY(clnt); -+ -+ crfree(cr); -+ -+ ASSERT(rc == RPC_SUCCESS || status != 0); -+ -+ PRINTF2 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): status=%d\n", server->Name, status); -+ -+ return (status); -+} -+#endif -+ -+#if defined(LINUX) -+ -+#define xdrsize(type) ((sizeof(type) + 3) >> 2) -+ -+static int -+xdr_error(struct rpc_rqst *req, u32 *p, void *dummy) -+{ -+ return -EIO; -+} -+ -+static int -+xdr_decode_int(struct rpc_rqst *req, u32 *p, int *res) -+{ -+ *res = ntohl(*p++); -+ return 0; -+} -+ -+#define XDR_capability_sz ((12 + BT_BITOUL(ELAN3_MAX_VPS)) * sizeof (u32)) -+ -+static int -+xdr_encode_capability(u32 *p, ELAN_CAPABILITY *cap) -+{ -+ u32 *pp = p; -+ -+ /* basic xdr unit is u32 - for opaque types we must round up to that */ -+ memcpy(p, &cap->cap_userkey, sizeof(cap->cap_userkey)); -+ p += xdrsize(cap->cap_userkey); -+ -+ *p++ = htonl(cap->cap_version); -+ ((u16 *) (p++))[1] = htons(cap->cap_type); -+ *p++ = htonl(cap->cap_lowcontext); -+ *p++ = htonl(cap->cap_highcontext); -+ *p++ = htonl(cap->cap_mycontext); -+ *p++ = htonl(cap->cap_lownode); -+ *p++ = htonl(cap->cap_highnode); -+ *p++ = htonl(cap->cap_railmask); -+ -+ memcpy(p, &cap->cap_bitmap[0], sizeof(cap->cap_bitmap)); -+ p += xdrsize(cap->cap_bitmap); -+ -+ ASSERT (((unsigned long) p - (unsigned long) pp) == XDR_capability_sz); -+ -+ return (p - pp); -+} -+ -+ -+#define XDR_neterr_sz (((1 + 5) * sizeof (u32)) + (2*XDR_capability_sz)) -+ -+static int -+xdr_encode_neterr_msg(struct rpc_rqst *req, u32 *p, NETERR_MSG *msg) -+{ -+ u32 *pp = p; -+ -+ *p++ = htonl(msg->Rail); -+ -+ p += xdr_encode_capability(p, &msg->SrcCapability); -+ p += xdr_encode_capability(p, &msg->DstCapability); -+ -+ *p++ = htonl(msg->DstProcess); -+ *p++ = htonl(msg->CookieAddr); -+ *p++ = htonl(msg->CookieVProc); -+ *p++ = htonl(msg->NextCookie); -+ *p++ = htonl(msg->WaitForEop); -+ -+ ASSERT (((unsigned long) p - (unsigned long) pp) == XDR_neterr_sz); -+ -+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); -+ -+ return 0; -+} -+ -+static struct rpc_procinfo neterr_procedures[2] = -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+# define RPC_ID_NULL "neterr_null" -+# define RPC_ID_FIXUP_RPC "neterr_fixup_rpc" -+#else -+# define RPC_ID_NULL NETERR_NULL_RPC -+# define RPC_ID_FIXUP_RPC NETERR_FIXUP_RPC -+#endif -+ { -+ RPC_ID_NULL, /* procedure name or number*/ -+ (kxdrproc_t) xdr_error, /* xdr encode fun */ -+ (kxdrproc_t) xdr_error, /* xdr decode fun */ -+ 0, /* req buffer size */ -+ 0, /* call count */ -+ }, -+ { -+ RPC_ID_FIXUP_RPC, -+ (kxdrproc_t) xdr_encode_neterr_msg, -+ (kxdrproc_t) xdr_decode_int, -+ XDR_neterr_sz, -+ 0, -+ }, -+}; -+ -+static struct rpc_version neterr_version1 = -+{ -+ 1, /* version */ -+ 2, /* number of procedures */ -+ neterr_procedures /* procedures */ -+}; -+ -+static struct rpc_version *neterr_version[] = -+{ -+ NULL, -+ &neterr_version1, -+}; -+ -+static struct rpc_stat neterr_stats; -+ -+static struct rpc_program neterr_program = -+{ -+ NETERR_SERVICE, -+ NETERR_PROGRAM, -+ sizeof(neterr_version)/sizeof(neterr_version[0]), -+ neterr_version, -+ &neterr_stats, -+}; -+ -+static int -+CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg) -+{ -+ struct rpc_xprt *xprt; -+ struct rpc_clnt *clnt; -+ struct rpc_timeout to; -+ int rc, status; -+ -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s)\n", server->Name); -+ -+ xprt_set_timeout(&to, 1, NETERR_RPC_TIMEOUT * HZ); -+ -+ if ((xprt = xprt_create_proto(IPPROTO_UDP, &server->Addr, &to)) == NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) xprt_create_proto failed\n", server->Name); -+ return EFAIL; -+ } -+ -+ if ((clnt = rpc_create_client(xprt, server->Name, &neterr_program, NETERR_VERSION, RPC_AUTH_NULL)) == NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) rpc_create_client failed\n", server->Name); -+ xprt_destroy (xprt); -+ -+ return EFAIL; -+ } -+ -+ clnt->cl_softrtry = 1; -+ clnt->cl_chatty = 0; -+ clnt->cl_oneshot = 1; -+ clnt->cl_intr = 0; -+ -+ if ((rc = rpc_call(clnt, NETERR_FIXUP_RPC, msg, &status, 0)) < 0) -+ { -+ /* RPC error has occured - determine whether we should retry */ -+ -+ status = ETIMEDOUT; -+ } -+ -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): -> %d\n", server->Name, status); -+ -+ return (status); -+} -+ -+#endif /* defined(LINUX) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/procfs_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/procfs_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/procfs_linux.c 2005-06-01 23:12:54.589440624 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_linux.c,v 1.21 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/procfs_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+struct proc_dir_entry *elan3_procfs_root; -+struct proc_dir_entry *elan3_config_root; -+ -+static int -+proc_read_position (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) data; -+ int len; -+ -+ if (dev->Position.pos_mode == ELAN_POS_UNKNOWN) -+ len = sprintf (page, "\n"); -+ else -+ len = sprintf (page, -+ "NodeId %d\n" -+ "NumLevels %d\n" -+ "NumNodes %d\n", -+ dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_position (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) data; -+ unsigned nodeid = ELAN3_INVALID_NODE; -+ unsigned numnodes = 0; -+ char *page, *p; -+ int res; -+ -+ if (count == 0) -+ return (0); -+ -+ if (count >= PAGE_SIZE) -+ return (-EINVAL); -+ -+ if ((page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (page, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ page[count] = '\0'; -+ -+ if (page[count-1] == '\n') -+ page[count-1] = '\0'; -+ -+ if (! strcmp (page, "")) -+ { -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ dev->Position.pos_nodeid = ELAN3_INVALID_NODE; -+ dev->Position.pos_nodes = 0; -+ dev->Position.pos_levels = 0; -+ } -+ else -+ { -+ for (p = page; *p; ) -+ { -+ while (isspace (*p)) -+ p++; -+ -+ if (! strncmp (p, "NodeId=", strlen("NodeId="))) -+ nodeid = simple_strtoul (p + strlen ("NodeId="), NULL, 0); -+ if (! strncmp (p, "NumNodes=", strlen ("NumNodes="))) -+ numnodes = simple_strtoul (p + strlen ("NumNodes="), NULL, 0); -+ -+ while (*p && !isspace(*p)) -+ p++; -+ } -+ -+ if (ComputePosition (&dev->Position, nodeid, numnodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ printk ("elan%d: invalid values for NodeId=%d NumNodes=%d\n", dev->Instance, nodeid, numnodes); -+ else -+ printk ("elan%d: setting NodeId=%d NumNodes=%d NumLevels=%d\n", dev->Instance, dev->Position.pos_nodeid, -+ dev->Position.pos_nodes, dev->Position.pos_levels); -+ } -+ } -+ -+ MOD_DEC_USE_COUNT; -+ free_page ((unsigned long) page); -+ -+ return (count); -+} -+ -+ -+void -+elan3_procfs_device_init (ELAN3_DEV *dev) -+{ -+ struct proc_dir_entry *dir, *p; -+ char name[NAME_MAX]; -+ -+ sprintf (name, "device%d", dev->Instance); -+ dir = dev->Osdep.procdir = proc_mkdir (name, elan3_procfs_root); -+ -+ if ((p = create_proc_entry ("position", 0, dir)) != NULL) -+ { -+ p->read_proc = proc_read_position; -+ p->write_proc = proc_write_position; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ -+} -+ -+void -+elan3_procfs_device_fini (ELAN3_DEV *dev) -+{ -+ struct proc_dir_entry *dir = dev->Osdep.procdir; -+ char name[NAME_MAX]; -+ -+ remove_proc_entry ("position", dir); -+ -+ sprintf (name, "device%d", dev->Instance); -+ remove_proc_entry (name, elan3_procfs_root); -+} -+ -+void -+elan3_procfs_init() -+{ -+ extern int eventint_punt_loops; -+ extern int ResolveRequestTimeout; -+ -+ elan3_procfs_root = proc_mkdir("elan3", qsnet_procfs_root); -+ -+ elan3_config_root = proc_mkdir("config", elan3_procfs_root); -+ -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug", &elan3_debug, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug_console", &elan3_debug_console, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug_buffer", &elan3_debug_buffer, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3mmu_debug", &elan3mmu_debug, 0); -+ qsnet_proc_register_int (elan3_config_root, "eventint_punt_loops", &eventint_punt_loops, 0); -+ qsnet_proc_register_int (elan3_config_root, "neterr_timeout", &ResolveRequestTimeout, 0); -+ -+#if defined(__ia64__) -+ { -+ extern int enable_sdram_writecombining; -+ qsnet_proc_register_int (elan3_config_root, "enable_sdram_writecombining", &enable_sdram_writecombining, 0); -+ } -+#endif -+} -+ -+void -+elan3_procfs_fini() -+{ -+#if defined(__ia64__) -+ remove_proc_entry ("enable_sdram_writecombining", elan3_config_root); -+#endif -+ remove_proc_entry ("neterr_timeout", elan3_config_root); -+ remove_proc_entry ("eventint_punt_loops", elan3_config_root); -+ remove_proc_entry ("elan3mmu_debug", elan3_config_root); -+ remove_proc_entry ("elan3_debug_buffer", elan3_config_root); -+ remove_proc_entry ("elan3_debug_console", elan3_config_root); -+ remove_proc_entry ("elan3_debug", elan3_config_root); -+ -+ remove_proc_entry ("config", elan3_procfs_root); -+ remove_proc_entry ("version", elan3_procfs_root); -+ -+ remove_proc_entry ("elan3", qsnet_procfs_root); -+} -+ -+EXPORT_SYMBOL(elan3_procfs_root); -+EXPORT_SYMBOL(elan3_config_root); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/quadrics_version.h 2005-06-01 23:12:54.589440624 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/elan3/routecheck.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/routecheck.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/routecheck.c 2005-06-01 23:12:54.590440472 -0400 -@@ -0,0 +1,313 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* ------------------------------------------------------------- */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* ---------------------------------------------------------------------- */ -+typedef struct elan3_net_location { -+ int netid; -+ int plane; -+ int level; -+} ELAN3_NET_LOCATION; -+/* ---------------------------------------------------------------------- */ -+#define FLIT_LINK_ARRAY_MAX (ELAN3_MAX_LEVELS*2) -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_follow_link( ELAN3_CTXT *ctxt, ELAN3_NET_LOCATION *loc, int link) -+{ -+ ELAN_POSITION *pos = &ctxt->Position; -+ -+ if ((link<0) || (link>7)) -+ { -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_follow_link: link (%d) out of range \n",link); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ -+ /* going up or down ? */ -+ if ( link >= pos->pos_arity[loc->level] ) -+ { -+ /* Up */ -+ if (loc->level >= pos->pos_levels) -+ loc->plane = 0; -+ else -+ { -+ if ((loc->level == 1) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->plane = (16 * ( loc->plane / 8 )) + (4 * ( loc->plane % 4)) -+ +(link - pos->pos_arity[loc->level]); -+ else -+ loc->plane = (loc->plane * (8 - pos->pos_arity[loc->level])) -+ +(link - pos->pos_arity[loc->level]); -+ } -+ loc->level--; -+ if ( loc->level < 0 ) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_follow_link: link goes off the top\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ loc->netid = loc->netid / pos->pos_arity[loc->level]; -+ } -+ else -+ { -+ /* going down */ -+ if ((loc->level == 0) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->netid = link % 2; -+ else -+ loc->netid =(loc->netid * pos->pos_arity[loc->level])+link; -+ -+ loc->level++; -+ if (loc->level > pos->pos_levels) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_follow_link: link goes off the bottom\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ -+ if ( loc->level >= (pos->pos_levels-1)) -+ loc->plane = 0; -+ else -+ if ((loc->level == 1) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->plane = (((loc->plane)>>2)*2) - ( ((loc->plane)>>2) & 3 ) + ((link<2)?0:4); /* ((p/4) % 4) */ -+ else -+ loc->plane = loc->plane/(8-pos->pos_arity[loc->level]); -+ } -+ return (ELAN3_ROUTE_SUCCESS); -+} -+/* ---------------------------------------------------------------------- */ -+int /* assumes they are connected, really only used for finding the MyLink */ -+elan3_route_get_mylink (ELAN_POSITION *pos, ELAN3_NET_LOCATION *locA, ELAN3_NET_LOCATION *locB) -+{ -+ /* whats the My Link for locA to LocB */ -+ if ( locA->level > locB->level ) -+ return locB->plane - (locA->plane * (8 - pos->pos_arity[locA->level])) + pos->pos_arity[locA->level]; -+ -+ return locB->netid - (locA->netid * pos->pos_arity[locA->level]); -+} -+/* ---------------------------------------------------------------------- */ -+#define FIRST_GET_HIGH_PRI(FLIT) (FLIT & FIRST_HIGH_PRI) -+#define FIRST_GET_AGE(FLIT) ((FLIT & FIRST_AGE(15))>>11) -+#define FIRST_GET_TIMEOUT(FLIT) ((FLIT & FIRST_TIMEOUT(3))>>9) -+#define FIRST_GET_NEXT(FLIT) ((FLIT & FIRST_PACKED(3))>>7) -+#define FIRST_GET_ROUTE(FLIT) (FLIT & 0x7f) -+#define FIRST_GET_BCAST(FLIT) (FLIT & 0x40) -+#define FIRST_GET_IS_INVALID(FLIT) ((FLIT & 0x78) == 0x08) -+#define FIRST_GET_TYPE(FLIT) ((FLIT & 0x30)>>4) -+#define PRF_GET_ROUTE(FLIT,N) ((FLIT >> (N*4)) & 0x0F) -+#define PRF_GET_IS_MYLINK(ROUTE) (ROUTE == PACKED_MYLINK) -+#define PRF_GET_IS_NORMAL(ROUTE) (ROUTE & 0x8) -+#define PRF_GET_NORMAL_LINK(ROUTE) (ROUTE & 0x7) -+#define PRF_MOVE_ON(INDEX,NEXT) do { if (NEXT==3) {NEXT=0;INDEX++;} else {NEXT++; }} while (0); -+/* ---------------------------------------------------------------------- */ -+int /* turn level needed or -1 if not possible */ -+elan3_route_get_min_turn_level( ELAN_POSITION *pos, int nodeId) -+{ -+ int l,range = 1; -+ -+ for(l=pos->pos_levels-1;l>=0;l--) -+ { -+ range = range * pos->pos_arity[l]; -+ -+ if ( ((pos->pos_nodeid - (pos->pos_nodeid % range)) <= nodeId ) -+ && (nodeId <= (pos->pos_nodeid - (pos->pos_nodeid % range)+range -1))) -+ return l; -+ } -+ return -1; -+} -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_check(ELAN3_CTXT *ctxt, E3_uint16 *flits, int destNodeId) -+{ -+ ELAN3_NET_LOCATION lastLoc,currLoc; -+ int err; -+ int turnLevel; -+ int goingDown; -+ int lnk,index,next,val; -+ ELAN_POSITION *pos = &ctxt->Position; -+ -+ /* is the dest possible */ -+ if ( (destNodeId <0 ) || (destNodeId >= pos->pos_nodes)) -+ return (ELAN3_ROUTE_PROC_RANGE); -+ -+ /* -+ * walk the route, -+ * - to see if we get there -+ * - checking we dont turn around -+ */ -+ currLoc.netid = pos->pos_nodeid; /* the elan */ -+ currLoc.plane = 0; -+ currLoc.level = pos->pos_levels; -+ -+ turnLevel = currLoc.level; /* track the how far the route goes in */ -+ goingDown = 0; /* once set we cant go up again ie only one change of direction */ -+ -+ /* move onto the network from the elan */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,4)) != ELAN3_ROUTE_SUCCESS) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: initial elan3_route_follow_link failed\n"); -+ return err; -+ } -+ /* do the first part of flit */ -+ switch ( FIRST_GET_TYPE(flits[0]) ) -+ { -+ case 0 /* sent */ : { lnk = (flits[0] & 0x7); break; } -+ case PACKED_MYLINK : { lnk = pos->pos_nodeid % pos->pos_arity[pos->pos_levels-1]; break; } -+ case PACKED_ADAPTIVE : { lnk = 7; /* all routes are the same just check one */ break; } -+ default : -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_check: unexpected first flit (%d)\n",flits[0]); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ -+ /* move along this link and check new location */ -+ memcpy(&lastLoc,&currLoc,sizeof(ELAN3_NET_LOCATION)); /* keep track of last loc */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,lnk)) != ELAN3_ROUTE_SUCCESS ) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: elan3_route_follow_link failed\n"); -+ return err; -+ } -+ if ((currLoc.level > pos->pos_levels) || (currLoc.level < 0 )) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route leaves machine\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ if ( lastLoc.level < currLoc.level ) -+ { -+ turnLevel = lastLoc.level; -+ goingDown = 1; -+ } -+ else -+ { -+ if (turnLevel > currLoc.level) -+ turnLevel = currLoc.level; -+ if (goingDown) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route ocilated\n"); -+ return (ELAN3_ROUTE_OCILATES); -+ } -+ } -+ -+ /* loop on doing the remaining flits */ -+ index = 1; -+ next = FIRST_GET_NEXT(flits[0]); -+ val = PRF_GET_ROUTE(flits[index],next); -+ while(val) -+ { -+ if (PRF_GET_IS_NORMAL(val) ) -+ lnk = PRF_GET_NORMAL_LINK(val); -+ else -+ { -+ switch ( val ) -+ { -+ case PACKED_MYLINK : -+ { -+ lnk = elan3_route_get_mylink(pos, &currLoc,&lastLoc); -+ break; -+ } -+ default : -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_check: unexpected packed flit (%d)\n",val); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ } -+ -+ /* move along this link and check new location */ -+ memcpy(&lastLoc,&currLoc,sizeof(ELAN3_NET_LOCATION)); /* keep track of last loc */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,lnk)) != ELAN3_ROUTE_SUCCESS) -+ return err; -+ -+ if ((currLoc.level > pos->pos_levels ) || ( currLoc.level < 0 )) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route leaves machine\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ -+ if ( lastLoc.level < currLoc.level ) -+ goingDown = 1; -+ else -+ { -+ if (turnLevel > currLoc.level) -+ turnLevel = currLoc.level; -+ if (goingDown) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route ocilated\n"); -+ return (ELAN3_ROUTE_OCILATES); -+ } -+ } -+ -+ /* move to next part of flit */ -+ PRF_MOVE_ON(index,next); -+ if ( index >= MAX_FLITS) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route too long\n"); -+ return (ELAN3_ROUTE_TOO_LONG); -+ } -+ /* extract the new value */ -+ val = PRF_GET_ROUTE(flits[index],next); -+ } -+ -+ /* have we got to where we want ? */ -+ if ((currLoc.level != pos->pos_levels) || (currLoc.netid != destNodeId)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_route_check: goes to %d instead of %d\n",currLoc.netid , destNodeId ); -+ return (ELAN3_ROUTE_WRONG_DEST); -+ } -+ -+ /* -+ * there is the case of src == dest -+ * getTurnLevel returns pos->pos_levels, and turnLevel is (pos->pos_levels -1) -+ * then we assume they really want to go onto the network. -+ * otherwise we check that the turn at the appriate level -+ */ -+ if ( (pos->pos_nodeid != destNodeId) || ( turnLevel != (pos->pos_levels -1)) ) -+ { -+ int lev; -+ if ((lev = elan3_route_get_min_turn_level(pos,destNodeId)) == -1) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: cant calculate turn level\n"); -+ return (ELAN3_ROUTE_INVALID); /* not sure this can happen here as checks above should protect me */ -+ } -+ if (turnLevel != lev) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_route_check: turn level should be %d but is %d \n", lev, turnLevel); -+ return (ELAN3_ROUTE_TURN_LEVEL); -+ } -+ } -+ return (ELAN3_ROUTE_SUCCESS); -+} -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_broadcast_check(ELAN3_CTXT *ctxt , E3_uint16 *flits, int lowNode, int highNode ) -+{ -+ E3_uint16 flitsTmp[MAX_FLITS]; -+ int nflits,i; -+ -+ nflits = GenerateRoute (&ctxt->Position, flitsTmp, lowNode, highNode, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ for(i=0;i -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static sdramaddr_t -+AllocateLargeRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int ctxnum, E3_uint64 *smallRoute) -+{ -+ int bit = -1; -+ ELAN3_ROUTES *rent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ for (rent = tbl->LargeRoutes; rent; rent = rent->Next) -+ { -+ if ((bit = bt_freebit (rent->Bitmap, NROUTES_PER_BLOCK)) != -1) -+ break; -+ } -+ -+ if (bit == -1) /* No spare entries in large routes */ -+ { /* so allocate a new page */ -+ PRINTF0 (DBG_DEVICE, DBG_VP, "AllocateLargeRoute: allocate route entries\n"); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ -+ KMEM_ZALLOC(rent, ELAN3_ROUTES *, sizeof (ELAN3_ROUTES), TRUE); -+ -+ if (rent == (ELAN3_ROUTES *) NULL) -+ return ((sdramaddr_t) 0); -+ -+ rent->Routes = elan3_sdram_alloc (dev, PAGESIZE); -+ if (rent->Routes == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (rent, sizeof (ELAN3_ROUTES)); -+ return ((sdramaddr_t) 0); -+ } -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* Add to list of large routes */ -+ rent->Next = tbl->LargeRoutes; -+ tbl->LargeRoutes = rent; -+ -+ /* and use entry 0 */ -+ bit = 0; -+ } -+ -+ /* Set the bit in the bitmap to mark this route as allocated */ -+ BT_SET (rent->Bitmap, bit); -+ -+ /* And generate the small route pointer and the pointer to the large routes */ -+ (*smallRoute) = BIG_ROUTE_PTR(rent->Routes + (bit*NBYTES_PER_LARGE_ROUTE), ctxnum); -+ -+ PRINTF4 (DBG_DEVICE, DBG_VP, "AllocateLargeRoute: rent %p using entry %d at %lx with route pointer %llx\n", -+ rent, bit, rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE), (long long) (*smallRoute)); -+ -+ /* Invalidate the large route */ -+ elan3_sdram_zeroq_sdram (dev, rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE), NBYTES_PER_LARGE_ROUTE); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ -+ return (rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE)); -+} -+ -+static void -+FreeLargeRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, E3_uint64 smallRoute) -+{ -+ E3_Addr addr = (E3_Addr) (smallRoute & ((1ULL << ROUTE_CTXT_SHIFT)-1)); -+ ELAN3_ROUTES *rent; -+ -+ PRINTF1 (DBG_DEVICE, DBG_VP, "FreeLargeRoute: free route %llx\n", (long long) smallRoute); -+ -+ ASSERT (SPINLOCK_HELD (&tbl->Lock)); -+ -+ for (rent = tbl->LargeRoutes; rent; rent = rent->Next) -+ { -+ if (rent->Routes <= addr && (rent->Routes + ROUTE_BLOCK_SIZE) > addr) -+ { -+ int indx = (addr - rent->Routes)/NBYTES_PER_LARGE_ROUTE; -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "FreeLargeRoute: rent=%p indx=%d\n", rent, indx); -+ -+ BT_CLEAR(rent->Bitmap, indx); -+ return; -+ } -+ } -+ -+ panic ("elan: FreeLargeRoute - route not found in large route tables"); -+} -+ -+static void -+FreeLargeRoutes (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl) -+{ -+ ELAN3_ROUTES *rent; -+ -+ while ((rent = tbl->LargeRoutes) != NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_VP, "FreeLargeRoutes: free rent %p\n", rent); -+ -+ tbl->LargeRoutes = rent->Next; -+ -+ elan3_sdram_free (dev, rent->Routes, PAGESIZE); -+ -+ KMEM_FREE (rent, sizeof(ELAN3_ROUTES)); -+ } -+} -+ -+int -+GetRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, E3_uint16 *flits) -+{ -+ E3_uint64 routeValue; -+ sdramaddr_t largeRouteOff; -+ -+ if (process < 0 || process >= tbl->Size) -+ return (EINVAL); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ if (routeValue & ROUTE_PTR) -+ { -+ largeRouteOff = (routeValue & ROUTE_PTR_MASK); -+ -+ routeValue = elan3_sdram_readq (dev, largeRouteOff + 0); -+ flits[0] = routeValue & 0xffff; -+ flits[1] = (routeValue >> 16) & 0xffff; -+ flits[2] = (routeValue >> 32) & 0xffff; -+ flits[3] = (routeValue >> 48) & 0xffff; -+ -+ routeValue = elan3_sdram_readq (dev, largeRouteOff + 8); -+ flits[4] = routeValue & 0xffff; -+ flits[5] = (routeValue >> 16) & 0xffff; -+ flits[6] = (routeValue >> 32) & 0xffff; -+ flits[6] = (routeValue >> 48) & 0xffff; -+ } -+ else -+ { -+ flits[0] = routeValue & 0xffff; -+ flits[1] = (routeValue >> 16) & 0xffff; -+ flits[2] = (routeValue >> 32) & 0xffff; -+ } -+ -+ return (ESUCCESS); -+} -+ -+ELAN3_ROUTE_TABLE * -+AllocateRouteTable (ELAN3_DEV *dev, int size) -+{ -+ ELAN3_ROUTE_TABLE *tbl; -+ -+ KMEM_ZALLOC (tbl, ELAN3_ROUTE_TABLE *, sizeof (ELAN3_ROUTE_TABLE), TRUE); -+ -+ if (tbl == (ELAN3_ROUTE_TABLE *) NULL) -+ return (NULL); -+ -+ tbl->Size = size; -+ tbl->Table = elan3_sdram_alloc (dev, size*NBYTES_PER_SMALL_ROUTE); -+ -+ if (tbl->Table == 0) -+ { -+ KMEM_FREE (tbl, sizeof (ELAN3_ROUTE_TABLE)); -+ return (NULL); -+ } -+ spin_lock_init (&tbl->Lock); -+ -+ /* zero the route table */ -+ elan3_sdram_zeroq_sdram (dev, tbl->Table, size*NBYTES_PER_SMALL_ROUTE); -+ -+ return (tbl); -+} -+ -+void -+FreeRouteTable (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl) -+{ -+ elan3_sdram_free (dev, tbl->Table, tbl->Size*NBYTES_PER_SMALL_ROUTE); -+ -+ FreeLargeRoutes (dev, tbl); -+ -+ spin_lock_destroy (&tbl->Lock); -+ -+ KMEM_FREE (tbl, sizeof (ELAN3_ROUTE_TABLE)); -+} -+ -+int -+LoadRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, int ctxnum, int nflits, E3_uint16 *flits) -+{ -+ E3_uint64 routeValue; -+ E3_uint64 largeRouteValue; -+ sdramaddr_t largeRouteOff; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return (EINVAL); -+ -+ PRINTF3 (DBG_DEVICE, DBG_VP, "LoadRoute: table %lx process %d ctxnum %x\n", tbl->Table ,process, ctxnum); -+ -+ if (nflits < 4) -+ { -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* See if we're replacing a "large" route */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ if (routeValue & ROUTE_PTR) -+ FreeLargeRoute (dev, tbl, routeValue); -+ -+ routeValue = SMALL_ROUTE(flits, ctxnum); -+ -+ if ( routeValue & ROUTE_PTR) -+ PRINTF0 (DBG_DEVICE, DBG_VP, "SHOULD BE A SMALL ROUTE !!!!!!!\n"); -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "LoadRoute: loading small route %d %llx\n", process, (long long) routeValue); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, routeValue); -+ } -+ else -+ { -+ E3_uint64 value0 = BIG_ROUTE0(flits); -+ E3_uint64 value1 = BIG_ROUTE1(flits); -+ -+ if ((largeRouteOff = AllocateLargeRoute (dev, tbl, ctxnum, &largeRouteValue)) == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ if ((routeValue & ROUTE_PTR) == 0) -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, largeRouteValue); -+ else -+ { -+ FreeLargeRoute (dev, tbl, largeRouteValue); -+ -+ largeRouteOff = (routeValue & ROUTE_PTR_MASK); -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_VP, "LoadRoute: loading large route %d - %llx %llx\n", process, -+ (long long) value0, (long long) value1); -+ -+ elan3_sdram_writeq (dev, largeRouteOff + 0, value0); -+ elan3_sdram_writeq (dev, largeRouteOff + 8, value1); -+ } -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ return (ESUCCESS); -+} -+void -+InvalidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* unset ROUTE_VALID -+ * does not matter if its short or long, will check when we re-use it -+ */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, (routeValue & (~ROUTE_VALID))); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+void -+ValidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "ValidateRoute: table %ld process %d \n", tbl->Table ,process); -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* set ROUTE_VALID -+ */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, (routeValue | ROUTE_VALID)); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+void -+ClearRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "ClearRoute: table %ld process %d \n", tbl->Table ,process); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, 0); -+ -+ if (routeValue & ROUTE_PTR) -+ FreeLargeRoute (dev, tbl, routeValue); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+ -+static int -+ElanIdEqual (ELAN_POSITION *pos, int level, int ida, int idb) -+{ -+ int l; -+ -+ for (l = pos->pos_levels-1; l >= level; l--) -+ { -+ ida /= pos->pos_arity[l]; -+ idb /= pos->pos_arity[l]; -+ } -+ -+ return (ida == idb); -+} -+ -+static int -+RouteDown (ELAN_POSITION *pos, int level, int elanid) -+{ -+ int l; -+ -+ for (l = (pos->pos_levels - 1); level < pos->pos_levels - 1; level++, l--) -+ { -+ if ( pos->pos_arity[l] ) -+ elanid /= pos->pos_arity[l]; -+ } -+ elanid %= pos->pos_arity[l]; -+ -+ return elanid; -+} -+ -+static int -+InitPackedAndFlits (u_char *packed, E3_uint16 *flits) -+{ -+ int rb = 0; -+ -+ bzero ((caddr_t) packed, MAX_PACKED+4); -+ bzero ((caddr_t) flits, MAX_FLITS * sizeof (E3_uint16)); -+ -+ /* Initialise 4 bytes of packed, so that the "padding" */ -+ /* NEVER terminates with 00, as this is recognised as */ -+ /* as CRC flit */ -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ -+ return (rb); -+} -+ -+static int -+PackThemRoutesUp (E3_uint16 *flits, u_char *packed, int rb, int timeout, int highPri) -+{ -+ int i, nflits; -+ -+ flits[0] |= FIRST_TIMEOUT(timeout); -+ if (highPri) -+ flits[0] |= FIRST_HIGH_PRI; -+ -+ /* round up the number of route bytes to flits */ -+ /* and subtract the 4 extra we've padded out with */ -+ nflits = (rb-1)/4; -+ -+ for (i = nflits; i > 0; i--) -+ { -+ flits[i] = (packed[rb-1] << 12 | -+ packed[rb-2] << 8 | -+ packed[rb-3] << 4 | -+ packed[rb-4] << 0); -+ rb -= 4; -+ } -+ -+ /* Now set the position of the first packed route */ -+ /* byte in the 2nd 16 bit flit, taking account of the */ -+ /* 4 byte padding */ -+ flits[0] |= FIRST_PACKED (4-rb); -+ -+ return (nflits+1); -+} -+ -+int -+GenerateRoute (ELAN_POSITION *pos, E3_uint16 *flits, int lowid, int highid, int timeout, int highPri) -+{ -+ int broadcast = (lowid != highid); -+ int rb = 0; -+ int first = 1; -+ int noRandom = 0; -+ int level; -+ u_char packed[MAX_PACKED+4]; -+ int numDownLinks; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ for (level = pos->pos_levels-1; /* Move up out of the elan */ -+ level > 0 && ! (ElanIdEqual (pos, level, pos->pos_nodeid, lowid) && -+ ElanIdEqual (pos, level, pos->pos_nodeid, highid)); level--) -+ { -+ noRandom |= pos->pos_random_disabled & (1 << (pos->pos_levels-1-level)); -+ } -+ -+ for (level = pos->pos_levels-1; /* Move up out of the elan */ -+ level > 0 && ! (ElanIdEqual (pos, level, pos->pos_nodeid, lowid) && -+ ElanIdEqual (pos, level, pos->pos_nodeid, highid)); level--) -+ { -+ numDownLinks = pos->pos_arity [level]; -+ if (first) -+ { -+ if (broadcast || noRandom) -+ flits[0] = FIRST_BCAST_TREE; -+ else -+ { -+ if (numDownLinks == 4) -+ flits[0] = FIRST_ADAPTIVE; -+ else -+ flits[0] = FIRST_ROUTE( numDownLinks + ( lowid % (8-numDownLinks) )); -+ } -+ first = 0; -+ } -+ else -+ { -+ if (broadcast || noRandom) -+ packed[rb++] = PACKED_BCAST_TREE; -+ else -+ { -+ if (numDownLinks == 4) -+ packed[rb++] = PACKED_ADAPTIVE; -+ else -+ packed[rb++] = PACKED_ROUTE( numDownLinks + ( lowid % (8-numDownLinks) )); -+ } -+ } -+ } -+ -+ while (level < pos->pos_levels) -+ { -+ int lowRoute = RouteDown (pos, level, lowid); -+ int highRoute = RouteDown (pos, level, highid); -+ -+ if (first) -+ { -+ if (broadcast) -+ flits[0] = FIRST_BCAST(highRoute, lowRoute); -+ else -+ flits[0] = FIRST_ROUTE(lowRoute); -+ -+ first = 0; -+ } -+ else -+ { -+ if (broadcast) -+ { -+ packed[rb++] = PACKED_BCAST0(highRoute, lowRoute); -+ packed[rb++] = PACKED_BCAST1(highRoute, lowRoute); -+ } -+ else -+ packed[rb++] = PACKED_ROUTE(lowRoute); -+ } -+ -+ level++; -+ } -+ -+#ifdef ELITE_REVA_SUPPORTED -+ if (broadcast && (pos->pos_levels == 3)) -+ { -+ packed[rb++] = PACKED_BCAST0(0, 0); -+ packed[rb++] = PACKED_BCAST1(0, 0); -+ } -+#endif -+ -+ return (PackThemRoutesUp (flits, packed, rb, timeout, highPri)); -+} -+ -+int -+GenerateCheckRoute (ELAN_POSITION *pos, E3_uint16 *flits, int level, int adaptive) -+{ -+ int notfirst = 0; -+ int l, rb; -+ u_char packed[MAX_PACKED+4]; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ for (l = pos->pos_levels-1; l > level; l--) -+ if (! notfirst++) -+ flits[0] = adaptive ? FIRST_ADAPTIVE : FIRST_BCAST_TREE; -+ else -+ packed[rb++] = adaptive ? PACKED_ADAPTIVE : PACKED_BCAST_TREE; -+ -+ if (! notfirst++ ) -+ flits[0] = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ for (l++ /* consume mylink */; l < pos->pos_levels; l++) -+ if (! notfirst++) -+ flits[0] = FIRST_ROUTE (RouteDown (pos, l, pos->pos_nodeid)); -+ else -+ packed[rb++] = PACKED_ROUTE (RouteDown (pos, l, pos->pos_nodeid)); -+ -+ -+ return (PackThemRoutesUp (flits, packed, rb, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY)); -+} -+ -+ -+/* -+ * In this case "level" is the number of levels counted from the bottom. -+ */ -+int -+GenerateProbeRoute (E3_uint16 *flits, int nodeid, int level, int *linkup, int *linkdown, int adaptive ) -+{ -+ int first = 1; -+ int i, rb; -+ u_char packed[MAX_PACKED+4]; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < level; i++) -+ { -+ if (first) -+ flits[0] = linkup ? FIRST_ROUTE(linkup[i]) : adaptive ? FIRST_ADAPTIVE : FIRST_BCAST_TREE; -+ else -+ packed[rb++] = linkup ? PACKED_ROUTE(linkup[i]) : adaptive ? PACKED_ADAPTIVE : PACKED_BCAST_TREE; -+ first = 0; -+ } -+ -+ /* Generate a "to-me" route down */ -+ if (first) -+ flits[0] = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ for (i = level-1; i >= 0; i--) -+ packed[rb++] = PACKED_ROUTE(linkdown[i]); -+ -+ return (PackThemRoutesUp (flits, packed, rb, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/sdram.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/sdram.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/sdram.c 2005-06-01 23:12:54.593440016 -0400 -@@ -0,0 +1,807 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: sdram.c,v 1.17 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/sdram.c,v $*/ -+ -+ -+#include -+ -+#include -+#include -+#include -+ -+/* sdram access functions */ -+#define sdram_off_to_bank(dev,off) (&dev->SdramBanks[(off) >> ELAN3_SDRAM_BANK_SHIFT]) -+#define sdram_off_to_offset(dev,off) ((off) & (ELAN3_SDRAM_BANK_SIZE-1)) -+#define sdram_off_to_bit(dev,indx,off) (sdram_off_to_offset(dev,off) >> (SDRAM_MIN_BLOCK_SHIFT+(indx))) -+ -+#define sdram_off_to_mapping(dev,off) (sdram_off_to_bank(dev,off)->Mapping + sdram_off_to_offset(dev,off)) -+ -+unsigned char -+elan3_sdram_readb (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readb ((unsigned char *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned short -+elan3_sdram_readw (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readw ((unsigned short *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned int -+elan3_sdram_readl (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readl ((unsigned int *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned long long -+elan3_sdram_readq (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readq ((unsigned long long *) sdram_off_to_mapping(dev, off))); -+} -+ -+void -+elan3_sdram_writeb (ELAN3_DEV *dev, sdramaddr_t off, unsigned char val) -+{ -+ writeb (val, (unsigned char *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writew (ELAN3_DEV *dev, sdramaddr_t off, unsigned short val) -+{ -+ writew (val, (unsigned short *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writel (ELAN3_DEV *dev, sdramaddr_t off, unsigned int val) -+{ -+ writel (val, (unsigned int *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writeq (ELAN3_DEV *dev, sdramaddr_t off, unsigned long long val) -+{ -+ writeq (val, (unsigned long long *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_copyb_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+} -+ -+void -+elan3_sdram_copyw_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+void -+elan3_sdram_copyl_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+void -+elan3_sdram_copyq_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+#define E3_WRITEBUFFER_SIZE 16 -+#define E3_WRITEBUFFER_OFFSET(x) (((unsigned long) x) & (E3_WRITEBUFFER_SIZE-1)) -+#define E3_WRITEBUFFER_BASE(x) (((unsigned long) x) & ~((unsigned long) (E3_WRITEBUFFER_SIZE-1))) -+ -+void -+elan3_sdram_copyb_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint8_t)) + sizeof (uint8_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) from)[i], &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) slim)[i], &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ for (i = 0; i < E3_WRITEBUFFER_SIZE/sizeof (uint8_t); i++) -+ writeb (((uint8_t *) slim)[i], &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) from)[i], &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerob_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint8_t)) + sizeof (uint8_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyw_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint16_t)) + sizeof (uint16_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint16_t); i++) -+ writew (((uint16_t *) from)[i], &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint16_t); i++) -+ writew (((uint16_t *) slim)[i], &((uint16_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writew (((uint16_t *) slim)[0], &((uint16_t *) dlim)[0]); -+ writew (((uint16_t *) slim)[1], &((uint16_t *) dlim)[1]); -+ writew (((uint16_t *) slim)[2], &((uint16_t *) dlim)[2]); -+ writew (((uint16_t *) slim)[3], &((uint16_t *) dlim)[3]); -+ writew (((uint16_t *) slim)[4], &((uint16_t *) dlim)[4]); -+ writew (((uint16_t *) slim)[5], &((uint16_t *) dlim)[5]); -+ writew (((uint16_t *) slim)[6], &((uint16_t *) dlim)[6]); -+ writew (((uint16_t *) slim)[7], &((uint16_t *) dlim)[7]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint16_t); i++) -+ writew (((uint16_t *) from)[i], &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerow_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint16_t)) + sizeof (uint16_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyl_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint32_t)) + sizeof (uint32_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint32_t); i++) -+ writel (((uint32_t *) from)[i], &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint32_t); i++) -+ writel (((uint32_t *) slim)[i], &((uint32_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writel (((uint32_t *) slim)[0], &((uint32_t *) dlim)[0]); -+ writel (((uint32_t *) slim)[1], &((uint32_t *) dlim)[1]); -+ writel (((uint32_t *) slim)[2], &((uint32_t *) dlim)[2]); -+ writel (((uint32_t *) slim)[3], &((uint32_t *) dlim)[3]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint32_t); i++) -+ writel (((uint32_t *) from)[i], &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerol_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint32_t)) + sizeof (uint32_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyq_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint64_t)) + sizeof (uint64_t); -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ writeq (((uint64_t *) from)[0], &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ writeq (((uint64_t *) slim)[0], &((uint64_t *) dlim)[0]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (((uint64_t *) slim)[0], &((uint64_t *) dlim)[0]); -+ writeq (((uint64_t *) slim)[1], &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ writeq (((uint64_t *) from)[0], &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zeroq_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint64_t)) + sizeof (uint64_t); -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ writeq (0, &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ writeq (0, &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ } -+} -+ -+physaddr_t -+elan3_sdram_to_phys (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+#if defined(DIGITAL_UNIX) -+ return (KSEG_TO_PHYS (sdram_off_to_mapping (dev, off))); -+#elif defined(LINUX) -+ return (kmem_to_phys ((void *) sdram_off_to_mapping (dev, off))); -+#endif -+} -+ -+/* sdram buddy allocator */ -+#define read_next(dev, block) elan3_sdram_readl(dev, block + 0) -+#define read_prev(dev, block) elan3_sdram_readl(dev, block + 4) -+#define write_next(dev, block, val) (elan3_sdram_writel(dev, block + 0, val), val) -+#define write_prev(dev, block, val) (elan3_sdram_writel(dev, block + 4, val), val) -+ -+#define freelist_insert(dev,idx,block)\ -+do {\ -+ sdramaddr_t next = dev->SdramFreeLists[(idx)];\ -+\ -+ /*\ -+ * block->prev = NULL;\ -+ * block->next = next;\ -+ * if (next != NULL)\ -+ * next->prev = block;\ -+ * freelist = block;\ -+ */\ -+ write_prev (dev, block, (sdramaddr_t) 0);\ -+ write_next (dev, block, next);\ -+ if (next != (sdramaddr_t) 0)\ -+ write_prev (dev, next, block);\ -+ dev->SdramFreeLists[idx] = block;\ -+\ -+ dev->SdramFreeCounts[idx]++;\ -+ dev->Stats.SdramBytesFree += (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#define freelist_remove(dev,idx,block)\ -+do {\ -+ /*\ -+ * if (block->prev)\ -+ * block->prev->next = block->next;\ -+ * else\ -+ * dev->SdramFreeLists[idx] = block->next;\ -+ * if (block->next)\ -+ * block->next->prev = block->prev;\ -+ */\ -+ sdramaddr_t blocknext = read_next (dev, block);\ -+ sdramaddr_t blockprev = read_prev (dev, block);\ -+\ -+ if (blockprev)\ -+ write_next (dev, blockprev, blocknext);\ -+ else\ -+ dev->SdramFreeLists[idx] = blocknext;\ -+ if (blocknext)\ -+ write_prev (dev, blocknext, blockprev);\ -+\ -+ dev->SdramFreeCounts[idx]--;\ -+ dev->Stats.SdramBytesFree -= (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#define freelist_removehead(dev,idx,block)\ -+do {\ -+ sdramaddr_t blocknext = read_next (dev, block);\ -+\ -+ if ((dev->SdramFreeLists[idx] = blocknext) != 0)\ -+ write_prev (dev, blocknext, 0);\ -+\ -+ dev->SdramFreeCounts[idx]--;\ -+ dev->Stats.SdramBytesFree -= (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#if defined(DEBUG) -+static int -+display_blocks (ELAN3_DEV *dev, int indx, char *string) -+{ -+ sdramaddr_t block; -+ int nbytes = 0; -+ -+ printk ("%s - indx %d\n", string, indx); -+ for (block = dev->SdramFreeLists[indx]; block != (sdramaddr_t) 0; block = read_next (dev, block)) -+ { -+ printk (" %lx", block); -+ nbytes += (SDRAM_MIN_BLOCK_SIZE << indx); -+ } -+ printk ("\n"); -+ -+ return (nbytes); -+} -+ -+ -+void -+elan3_sdram_display (ELAN3_DEV *dev, char *string) -+{ -+ int indx; -+ int nbytes = 0; -+ -+ printk ("elan3_sdram_display: dev=%p\n", dev); -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ if (dev->SdramFreeLists[indx] != (sdramaddr_t) 0) -+ nbytes += display_blocks (dev, indx, string); -+ printk ("\n%d bytes free\n", nbytes); -+} -+ -+void -+elan3_sdram_verify (ELAN3_DEV *dev) -+{ -+ int indx, size, nbits, i, b; -+ sdramaddr_t block; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ unsigned count = 0; -+ -+ for (block = dev->SdramFreeLists[indx]; block; block = read_next (dev, block), count++) -+ { -+ ELAN3_SDRAM_BANK *bank = sdram_off_to_bank (dev, block); -+ unsigned off = sdram_off_to_offset (dev, block); -+ int bit = sdram_off_to_bit (dev, indx, block); -+ -+ if ((block & (size-1)) != 0) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - not aligned\n", block, indx); -+ -+ if (bank == NULL || off > bank->Size) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - outside bank\n", block, indx); -+ else if (BT_TEST (bank->Bitmaps[indx], bit) == 0) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - bit not set\n", block, indx); -+ else -+ { -+ for (i = indx-1, nbits = 2; i >= 0; i--, nbits <<= 1) -+ { -+ bit = sdram_off_to_bit (dev, i, block); -+ -+ for (b = 0; b < nbits; b++) -+ if (BT_TEST(bank->Bitmaps[i], bit + b)) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - also free i=%d bit=%x\n", block, indx, i, bit+b); -+ } -+ } -+ } -+ -+ if (dev->SdramFreeCounts[indx] != count) -+ printk ("elan3_sdram_verify: indx=%x expected %d got %d\n", indx, dev->SdramFreeCounts[indx], count); -+ } -+} -+ -+#endif /* defined(DEBUG) */ -+ -+static void -+free_block (ELAN3_DEV *dev, sdramaddr_t block, int indx) -+{ -+ ELAN3_SDRAM_BANK *bank = sdram_off_to_bank (dev, block); -+ unsigned bit = sdram_off_to_bit(dev, indx, block); -+ unsigned size = SDRAM_MIN_BLOCK_SIZE << indx; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: block=%lx indx=%d bit=%x\n", block, indx, bit); -+ -+ ASSERT ((block & (size-1)) == 0); -+ ASSERT (BT_TEST (bank->Bitmaps[indx], bit) == 0); -+ -+ while (BT_TEST (bank->Bitmaps[indx], bit ^ 1)) -+ { -+ sdramaddr_t buddy = block ^ size; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: merge block=%lx buddy=%lx indx=%d\n", block, buddy, indx); -+ -+ BT_CLEAR (bank->Bitmaps[indx], bit ^ 1); -+ -+ freelist_remove (dev, indx, buddy); -+ -+ block = (block < buddy) ? block : buddy; -+ indx++; -+ size <<= 1; -+ bit >>= 1; -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: free block=%lx indx=%d bit=%x\n", block, indx, bit); -+ -+ freelist_insert (dev, indx, block); -+ -+ BT_SET (bank->Bitmaps[indx], bit); -+} -+ -+void -+elan3_sdram_init (ELAN3_DEV *dev) -+{ -+ int indx; -+ -+ spin_lock_init (&dev->SdramLock); -+ -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ { -+ dev->SdramFreeLists[indx] = (sdramaddr_t) 0; -+ dev->SdramFreeCounts[indx] = 0; -+ } -+} -+ -+void -+elan3_sdram_fini (ELAN3_DEV *dev) -+{ -+ spin_lock_destroy (&dev->SdramLock); -+} -+ -+void -+elan3_sdram_add (ELAN3_DEV *dev, sdramaddr_t base, sdramaddr_t top) -+{ -+ register int indx; -+ register unsigned long size; -+ -+ /* align to the minimum block size */ -+ base = (base + SDRAM_MIN_BLOCK_SIZE - 1) & ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ top &= ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ -+ /* don't allow 0 as a valid "base" */ -+ if (base == 0) -+ base = E3_CACHE_SIZE; -+ -+ /* carve the bottom to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((base & size) == 0) -+ continue; -+ -+ if ((base + size) > top) -+ break; -+ -+ free_block (dev, base, indx); -+ -+ base += size; -+ } -+ -+ /* carve the top down to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((top & size) == 0) -+ continue; -+ -+ if ((top - size) < base) -+ break; -+ -+ free_block (dev, (top - size), indx); -+ -+ top -= size; -+ } -+ -+ /* now free of the space in between */ -+ while (base < top) -+ { -+ free_block (dev, base, (SDRAM_NUM_FREE_LISTS-1)); -+ -+ base += SDRAM_MAX_BLOCK_SIZE; -+ } -+} -+ -+sdramaddr_t -+elan3_sdram_alloc (ELAN3_DEV *dev, int nbytes) -+{ -+ sdramaddr_t block; -+ register int i, indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->SdramLock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: nbytes=%d indx=%d\n", nbytes, indx); -+ -+ /* find the smallest block which is big enough for this allocation */ -+ for (i = indx; i < SDRAM_NUM_FREE_LISTS; i++, size <<= 1) -+ if (dev->SdramFreeLists[i]) -+ break; -+ -+ if (i == SDRAM_NUM_FREE_LISTS) -+ { -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+ return ((sdramaddr_t) 0); -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: use block=%lx indx=%d\n", dev->SdramFreeLists[i], i); -+ -+ /* remove the block from the free list */ -+ freelist_removehead (dev, i, (block = dev->SdramFreeLists[i])); -+ -+ /* clear the approriate bit in the bitmap */ -+ BT_CLEAR (sdram_off_to_bank (dev, block)->Bitmaps[i], sdram_off_to_bit (dev,i, block)); -+ -+ /* and split it up as required */ -+ while (i-- > indx) -+ free_block (dev, block + (size >>= 1), i); -+ -+ PRINTF1 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: return block=%lx\n", block); -+ -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+ -+ ASSERT ((block & ((SDRAM_MIN_BLOCK_SIZE << (indx))-1)) == 0); -+ -+ return ((sdramaddr_t) block); -+} -+ -+void -+elan3_sdram_free (ELAN3_DEV *dev, sdramaddr_t block, int nbytes) -+{ -+ register int indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->SdramLock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_free: indx=%d block=%lx\n", indx, block); -+ -+ free_block (dev, block, indx); -+ -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+} -+ -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/tproc.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/tproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/tproc.c 2005-06-01 23:12:54.594439864 -0400 -@@ -0,0 +1,778 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: tproc.c,v 1.51.2.1 2004/11/15 11:12:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int -+HandleTProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits) -+{ -+ THREAD_TRAP *trap = dev->ThreadTrap; -+ int delay = 1; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ trap->Status.Status = read_reg32 (dev, Exts.TProcStatus); -+ trap->sp = read_reg32 (dev, Thread_Desc_SP); -+ trap->pc = read_reg32 (dev, ExecutePC); -+ trap->npc = read_reg32 (dev, ExecuteNPC); -+ trap->StartPC = read_reg32 (dev, StartPC); -+ trap->mi = GET_STATUS_TRAPTYPE(trap->Status); -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ trap->DirtyBits.Bits = read_reg32 (dev, DirtyBits.Bits); -+ -+ if ( ! (trap->Status.s.WakeupFunction == SleepOneTick) ) { -+ int p,i; -+ E3_uint32 reg = read_reg32 (dev, Exts.InterruptReg); -+ -+ ELAN_REG_REC(reg); -+ p = elan_reg_rec_index; -+ for(i=0;iStatus.s.WakeupFunction == SleepOneTick); -+ -+ /* copy the four access fault areas */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), (void *) &trap->FaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), (void *) &trap->DataFaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), (void *) &trap->InstFaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), (void *) &trap->OpenFaultSave, 16); -+ -+ /* copy the registers, note the endian swap flips the odd registers into the even registers -+ and visa versa. */ -+ copy_thread_regs (dev, trap->Registers); -+ -+ /* -+ * If the output was open then the ack may not have returned yet. Must wait for the -+ * ack to become valid and update trap_dirty with the new value. Will simulate the -+ * instructions later. -+ */ -+ if (trap->TrapBits.s.OutputWasOpen) -+ { -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ while (! trap->TrapBits.s.AckBufferValid) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "tproc: waiting for ack to become valid\n"); -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ DELAY (delay); -+ -+ if ((delay <<= 1) == 0) delay = 1; -+ } -+ } -+ -+ /* update device statistics */ -+ BumpStat (dev, TProcTraps); -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ BumpStat (dev, ForcedTProcTraps); -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ if (trap->TrapBits.s.PacketTimeout) -+ BumpStat (dev, ThreadOutputTimeouts); -+ else if (trap->TrapBits.s.PacketAckValue == E3_PAckError) -+ BumpStat (dev, ThreadPacketAckErrors); -+ } -+ if (trap->TrapBits.s.TrapForTooManyInsts) -+ BumpStat (dev, TrapForTooManyInsts); -+ break; -+ } -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), 16); -+ -+ *RestartBits |= RestartTProc; -+ -+ return (TRUE); -+} -+ -+void -+DeliverTProcTrap (ELAN3_DEV *dev, THREAD_TRAP *threadTrap, E3_uint32 Pend) -+{ -+ ELAN3_CTXT *ctxt; -+ THREAD_TRAP *trap; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, threadTrap->Status.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "DeliverTProcTrap: context %x invalid\n", threadTrap->Status.s.Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_OP_TPROC_TRAP (ctxt, threadTrap) == OP_DEFER) -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->ThreadTrapQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->ThreadTrapQ, ctxt->ThreadTraps); -+ -+ bcopy (threadTrap, trap, sizeof (THREAD_TRAP)); -+ -+ PRINTF4 (ctxt, DBG_INTR, "DeliverTProcTrap: SP=%08x PC=%08x NPC=%08x StartPC %08x\n", -+ trap->sp, trap->pc, trap->npc, trap->StartPC); -+ PRINTF3 (ctxt, DBG_INTR, " mi=%s trap=%08x dirty=%08x\n", -+ MiToName (trap->mi), trap->TrapBits.Bits, trap->DirtyBits.Bits); -+ PRINTF3 (ctxt, DBG_INTR, " FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_INTR, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ ELAN3_QUEUE_ADD (ctxt->ThreadTrapQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ if (ELAN3_QUEUE_FULL (ctxt->ThreadTrapQ)) -+ { -+ PRINTF0 (ctxt, DBG_INTR, "DeliverTProcTrap: thread queue full, must swap out\n"); -+ ctxt->Status |= CTXT_THREAD_QUEUE_FULL; -+ -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+ } -+} -+ -+int -+NextTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->ThreadTrapQ)) -+ return (0); -+ -+ *trap = *ELAN3_QUEUE_FRONT (ctxt->ThreadTrapQ, ctxt->ThreadTraps); -+ ELAN3_QUEUE_REMOVE (ctxt->ThreadTrapQ); -+ -+ return (1); -+} -+ -+void -+ResolveTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ int i; -+ int res; -+ E3_Addr StackPointer; -+ -+ PRINTF4 (ctxt, DBG_TPROC, "ResolveTProcTrap: SP=%08x PC=%08x NPC=%08x StartPC %08x\n", -+ trap->sp, trap->pc, trap->npc, trap->StartPC); -+ PRINTF3 (ctxt, DBG_TPROC, " mi=%s trap=%08x dirty=%08x\n", -+ MiToName (trap->mi), trap->TrapBits.Bits, trap->DirtyBits.Bits); -+ PRINTF3 (ctxt, DBG_TPROC, " FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_TPROC, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ -+ BumpUserStat (ctxt, TProcTraps); -+ -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ { -+ /* -+ * This occurs if the threads processor trapped. All other cases will be for the ucode -+ * thread trapping. -+ */ -+ int restart = 1; -+ int skip = 0; -+ -+ PRINTF1 (ctxt, DBG_TPROC, "TProc: Mi=Unimp. Using trap->TrapBits=%x\n", trap->TrapBits.Bits); -+ -+ /* -+ * Data Access Exception. -+ */ -+ if (trap->TrapBits.s.DataAccessException) -+ { -+ ASSERT (CTXT_IS_KERNEL(ctxt) || trap->DataFaultSave.s.FSR.Status == 0 || -+ ctxt->Capability.cap_mycontext == trap->DataFaultSave.s.FaultContext); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: DataAccessException %08x\n", trap->DataFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->DataFaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed for data %08x\n", -+ trap->DataFaultSave.s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->DataFaultSave, res) != OP_IGNORE) -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Instruction Access Exception. -+ */ -+ if (trap->TrapBits.s.InstAccessException) -+ { -+ ASSERT (CTXT_IS_KERNEL (ctxt) || trap->InstFaultSave.s.FSR.Status == 0 || -+ ctxt->Capability.cap_mycontext == trap->InstFaultSave.s.FaultContext); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: InstAccessException %08x\n", trap->InstFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->InstFaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed for inst %08x\n", -+ trap->InstFaultSave.s.FaultAddress); -+ -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->InstFaultSave, res); -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Forced TProc trap/Unimplemented instruction -+ * -+ * If there is a force tproc trap then don't look at -+ * the unimplemented instruction bit - since it can -+ * be set in obscure circumstances. -+ */ -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: forced tproc trap, restarting\n"); -+ else if (trap->TrapBits.s.Unimplemented) -+ { -+ E3_uint32 instr = ELAN3_OP_LOAD32 (ctxt, trap->pc & PC_MASK); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: unimplemented instruction %08x\n", instr); -+ -+ if ((instr & OPCODE_MASK) == OPCODE_Ticc && -+ (instr & OPCODE_IMM) == OPCODE_IMM && -+ (Ticc_COND(instr) == Ticc_TA)) -+ { -+ switch (INSTR_IMM(instr)) -+ { -+ case ELAN3_ELANCALL_TRAPNUM: -+ /* -+ * Since the thread cannot easily access the global variable which holds -+ * the elan system call number, we provide a different trap for the elan -+ * system call, and copy the system call number into %g1 before calling -+ * ThreadSyscall(). -+ */ -+ BumpUserStat (ctxt, ThreadElanCalls); -+ -+ if (ThreadElancall (ctxt, trap, &skip) != ESUCCESS) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_SYSCALL, THREAD_PROC, trap); -+ restart = 0; -+ } -+ break; -+ -+ case ELAN3_SYSCALL_TRAPNUM: -+ BumpUserStat (ctxt, ThreadSystemCalls); -+ -+ if (ThreadSyscall (ctxt, trap, &skip) != ESUCCESS) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_SYSCALL, THREAD_PROC, trap); -+ restart = 0; -+ } -+ break; -+ -+ case ELAN3_DEBUG_TRAPNUM: -+ ElanException (ctxt, EXCEPTION_DEBUG, THREAD_PROC, trap); -+ skip = 1; -+ break; -+ -+ case ELAN3_ABORT_TRAPNUM: -+ default: -+ ElanException (ctxt, EXCEPTION_UNIMP_INSTR, THREAD_PROC, trap, instr); -+ restart = 0; -+ break; -+ } -+ -+ } -+ else -+ { -+ ElanException (ctxt, EXCEPTION_UNIMP_INSTR, THREAD_PROC, trap, instr); -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Faulted fetching routes. -+ */ -+ if (trap->TrapBits.s.OpenRouteFetch) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: OpenRouteFetch %08x\n", trap->OpenFaultSave.s.FaultAddress); -+ -+ if ((res = ResolveVirtualProcess (ctxt, trap->OpenFaultSave.s.FaultAddress)) != ESUCCESS && -+ ElanException (ctxt, EXCEPTION_INVALID_PROCESS, THREAD_PROC, trap, trap->DataFaultSave.s.FaultAddress, res) != OP_IGNORE) -+ { -+ restart = 0; -+ } -+ else if (RollThreadToClose (ctxt, trap, E3_PAckDiscard) != ESUCCESS) /* Force a discard */ -+ { -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Thread Timeout -+ */ -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ if (ElanException (ctxt, EXCEPTION_PACKET_TIMEOUT, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ else -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: timeout or PAckError!\n"); -+ -+ /* Might deschedule the thread for a while or mark the link error here. */ -+ if (! trap->TrapBits.s.OutputWasOpen && RollThreadToClose (ctxt, trap, trap->TrapBits.s.PacketAckValue) != ESUCCESS) -+ { -+ restart = 0; -+ } -+ } -+ } -+ -+ /* -+ * Open exception -+ */ -+ if (trap->TrapBits.s.OpenException) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: open exception\n"); -+ if (ElanException (ctxt, EXCEPTION_THREAD_KILLED, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ } -+ -+ /* -+ * Too many instructions. -+ */ -+ if (trap->TrapBits.s.TrapForTooManyInsts) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: too many instructions\n"); -+ if (ElanException (ctxt, EXCEPTION_THREAD_KILLED, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ } -+ -+ if (restart) -+ { -+ /* -+ * If the output was open when the trap was taken then the trap code must move -+ * the PC on past the close instruction and simulate the effect of all the instructions -+ * that do not output onto the link. The value of the ack received is then used to -+ * simulate the close instruction. -+ */ -+ if (trap->TrapBits.s.OutputWasOpen && RollThreadToClose(ctxt, trap, trap->TrapBits.s.PacketAckValue) != ESUCCESS) -+ { -+ /* -+ * Don't restart if we couldn't roll it forweards -+ * to a close instruction. -+ */ -+ break; -+ } -+ -+ /* -+ * We must check back 3 instructions from the PC, and if we see the -+ * c_close_cookie() sequence then we must execute the instructions to -+ * the end of it. -+ */ -+ /* XXXX: code to be written */ -+ -+ StackPointer = SaveThreadToStack (ctxt, trap, skip); -+ -+ ReissueStackPointer (ctxt, StackPointer); -+ } -+ -+ break; -+ } -+ -+ /* -+ * This case is different from the others as %o6 has been overwritten with -+ * the SP. The real PC can be read from StartPC and written back -+ * into %o6 on the stack. -+ */ -+ case MI_TProcNext: /* Reading the outs block */ -+ { -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing StartPc to o6\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[6]), trap->StartPC & PC_MASK); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ /* DROPTHROUGH */ -+ } -+ /* -+ * all of these will be generated when starting up a thread. -+ * Just re-issue the command after fixing the trap. The ucode keeps the startup -+ * from trap information in Thread_Desc_SP while it is still loading the regs. -+ */ -+ case MI_WaitForGlobalsRead: /* Reading the globals block (trap restart) */ -+ case MI_WaitForNPCRead: /* Reading the nPC, V and C (trap restart) */ -+ case MI_WaitForPCload: /* Reading the PC, N and Z (trap restart) */ -+ case MI_WaitForInsRead: /* Reading the ins block (trap restart) */ -+ case MI_WaitForLocals: /* Reading the ins block (trap restart) */ -+ case MI_WaitForPCload2: /* Reading the PC (normal thread start) */ -+ case MI_WaitForSpStore: /* Writing the SP to the outs block */ -+ PRINTF2 (ctxt, DBG_TPROC, "ResolveTProcTrap: %s %08x\n", MiToName (trap->mi), trap->InstFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed at %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, &trap->FaultSave, trap, res) != OP_IGNORE) -+ break; -+ } -+ -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ -+ /* -+ * These traps could occur after the threads proc has stopped (either for a wait, -+ * break, or suspend, but not a trap). Must simulate the uCode's job. -+ */ -+ case MI_WaitForOutsWrite: /* Writing the outs block */ -+ case MI_WaitForNPCWrite: /* Writing the nPC block */ -+ { -+ E3_uint32 DeschedBits = (trap->TrapBits.Bits & E3_TProcDescheduleMask); -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: trapped on %s while stopping a thread\n", MiToName(trap->mi)); -+ -+ /* -+ * Copy npc into o6. -+ */ -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = trap->npc; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing outs to stack\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ -+ /* -+ * Now write the outs back to the stack. NOTE then endian flip is undone. -+ */ -+ for (i = 0; i < 8; i++) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[i]), trap->Registers[REG_OUTS+(i^WordEndianFlip)]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ /* -+ * thread has been saved. Now find out why the thread proc stopped. -+ */ -+ if (DeschedBits == E3_TProcDescheduleSuspend) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: suspend instruction executed\n"); -+ break; -+ } -+ -+ /* -+ * Break. Just reissue the command. -+ */ -+ if (DeschedBits == E3_TProcDescheduleBreak) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: break instruction, reissue sp %08x\n", trap->sp); -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ } -+ -+ ASSERT (DeschedBits == E3_TProcDescheduleWait); -+ -+ /* DROPTHROUGH to fix up a wait event */ -+ } -+ -+ /* -+ * Trapped here trying to execute a wait instruction. All the thread state has already -+ * been saved and the trap has been fixed so simplest thing to do is to start the -+ * thread up at the wait instruction again. -+ */ -+ case MI_WaitForEventWaitAddr: /* Reading back the %o0,%o1 pair for a -+ wait event instr. */ -+ case MI_WaitForWaitEventAccess: /* Locked dword read of the event location. -+ Note that this read is done with write -+ permissions so we never get a trap on the write */ -+ { -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed at %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->DataFaultSave, res) != OP_IGNORE) -+ break; -+ } -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing pc to stack\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[6]), trap->pc); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ } -+ -+ /* -+ * Assume the fault will be fixed by FixupEventTrap. -+ */ -+ default: -+ FixupEventTrap (ctxt, THREAD_PROC, trap, trap->mi, &trap->FaultSave, 0); -+ break; -+ } -+} -+ -+int -+TProcNeedsRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_THREAD] != 0); -+} -+ -+void -+RestartTProcItems (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_uint32 StackPointer; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ -+ while (ctxt->ItemCount[LIST_THREAD]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_THREAD, &item, &StackPointer)) -+ ctxt->ItemCount[LIST_THREAD] = 0; -+ else -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, RunThread), StackPointer, 0) == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_THREAD, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_THREAD]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+E3_Addr -+SaveThreadToStack (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int SkipInstruction) -+{ -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ E3_Addr orflag; -+ register int i; -+ -+ /* -+ * When the thread deschedules normally, the N & Z flags are written -+ * to the stack in o6, and the V & C flags are lost. -+ * Since the Elan will store the NPC into o6 (to skip the instruction), -+ * the CC flags are visible to the trap handler in the trapped PC and NPC. -+ * If the instruction needs to be re-executed then the CC flags need to be -+ * kept in the right place to be read in when the thread re-starts. -+ * -+ * PC has N & Z from trapped NPC. -+ * NPC has V & C from trapped PC. -+ */ -+ if (SkipInstruction) -+ { -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = trap->npc; -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)] = ((trap->npc & PC_MASK) + 4) | (trap->pc & CC_MASK); -+ } -+ else -+ { -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = (trap->pc & PC_MASK) | (trap->npc & CC_MASK); -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)] = (trap->npc & PC_MASK) | (trap->pc & CC_MASK); -+ } -+ -+ if (ELAN3_OP_START_FAULT_CHECK(ctxt)) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "RestartThread: faulted writing out thread\n"); -+ ELAN3_OP_END_FAULT_CHECK(ctxt); -+ -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ return ((E3_Addr) 0); -+ } -+ -+ -+#ifdef DEBUG_PRINTF -+ PRINTF4 (ctxt, DBG_TPROC, "SaveThreadToStack: SP=%08x PC=%08x NPC=%08x DIRTY=%08x\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits); -+ if (trap->DirtyBits.s.GlobalsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.OutsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.LocalsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.InsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ } -+#endif -+ -+ PRINTF1 (ctxt, DBG_TPROC, "flushing registers to stack %08x\n", stack); -+ -+ /* -+ * NOTE - store the register to the stack in reverse order, since the stack -+ * will be allocated in sdram, and we cannot use the sdram accessing functions -+ * here, as it is "mapped" in user-space. -+ */ -+ for (i = 0; i < 8; i++) -+ { -+ if (trap->DirtyBits.s.GlobalsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Globals[i]), trap->Registers[REG_GLOBALS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.OutsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[i]), trap->Registers[REG_OUTS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.LocalsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Locals[i]), trap->Registers[REG_LOCALS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.InsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Ins[i]), trap->Registers[REG_INS+(i^WordEndianFlip)]); -+ } -+ -+ /* always restore all registers */ -+ orflag = ThreadRestartFromTrapBit | ThreadReloadAllRegs; -+ -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ return (trap->sp | orflag); -+} -+ -+void -+ReissueStackPointer (ELAN3_CTXT *ctxt, E3_Addr StackPointer) -+{ -+ PRINTF1 (ctxt, DBG_TPROC, "ReissueStackPointer : Queue SP %08x\n", StackPointer); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ctxt->ItemCount[LIST_THREAD]++; -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_THREAD, StackPointer); -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/tprocinsts.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/tprocinsts.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/tprocinsts.c 2005-06-01 23:12:54.595439712 -0400 -@@ -0,0 +1,401 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: tprocinsts.c,v 1.20 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tprocinsts.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define MAXINSTR 256 /* # Instructions to look at while looking for close */ -+ -+static E3_uint32 ALU (ELAN3_CTXT *ctxt, -+ E3_uint32 fcode, E3_uint32 X, E3_uint32 Y, -+ E3_uint32 *Z, E3_uint32 *N, E3_uint32 *C, E3_uint32 *V); -+ -+char *OpcodeNames[] = -+{ -+ "ADD ", -+ "AND ", -+ "OR ", -+ "XOR ", -+ "SUB ", -+ "ANDN ", -+ "ORN ", -+ "XNOR ", -+ "ADDX ", -+ "UNIP ", -+ "UMUL ", -+ "SMUL ", -+ "SUBX ", -+ "UNIP ", -+ "UDIV ", -+ "SDIV ", -+ "ADDcc ", -+ "ANDcc ", -+ "ORcc ", -+ "XORcc ", -+ "SUBcc ", -+ "ANDNcc", -+ "ORNcc ", -+ "XNORcc", -+ "ADDXcc", -+ "UNIPcc", -+ "UMULcc", -+ "SMULcc", -+ "SUBXcc", -+ "UNIPcc", -+ "UDIVcc", -+ "SDIVcc" -+}; -+ -+#define REGISTER_VALUE(trap, rN) (((rN) == 0) ? 0 : (trap)->Registers[(rN)^WordEndianFlip]) -+#define ASSIGN_REGISTER(trap, rN, value) ((rN) != 0 ? trap->Registers[(rN)^WordEndianFlip] = (value) : 0) -+ -+int -+RollThreadToClose (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, E3_uint32 PAckVal) -+{ -+ E3_Addr pc = (trap->pc & PC_MASK); -+ E3_Addr npc = (trap->npc & PC_MASK); -+ E3_uint32 Z = (trap->npc & PSR_Z_BIT) ? 1 : 0; -+ E3_uint32 N = (trap->npc & PSR_N_BIT) ? 1 : 0; -+ E3_uint32 C = (trap->pc & PSR_C_BIT) ? 1 : 0; -+ E3_uint32 V = (trap->pc & PSR_V_BIT) ? 1 : 0; -+ E3_uint32 instr; -+ E3_Addr addr; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ failed: -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ElanException (ctxt, EXCEPTION_SIMULATION_FAILED, THREAD_PROC, trap); -+ return (EFAULT); -+ } -+ -+ /* -+ * Thread trapped with output open, or while closing, -+ * so roll the PC forwards to the instruction after the -+ * next c_close, and execute that with the register -+ * specified in c_close set to the trap which occured. -+ * (This is not 1 which means an ACK) -+ */ -+ PRINTF1 (ctxt, DBG_TPROC, "RollThreadToClose: roll pc %x to c_close\n", pc); -+ -+ for (;;) -+ { -+ instr = ELAN3_OP_LOAD32 (ctxt, pc); -+ -+ PRINTF2 (ctxt, DBG_TPROC, "RollThreadToClose: PC=%x INSTR=%x\n", pc, instr); -+ -+ switch (OPCODE_CLASS(instr)) -+ { -+ case OPCODE_CLASS_0: -+ switch ((instr) & OPCODE_CLASS0_MASK) -+ { -+ case OPCODE_SETHI: -+ PRINTF3 (ctxt, DBG_TPROC, "PC %x : sethi r%d = %x\n", pc, INSTR_RD(instr), instr << 10); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), instr << 10); -+ break; -+ -+ case OPCODE_SENDREG: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : sendreg\n", pc); -+ break; -+ -+ case OPCODE_SENDMEM: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : sendmem\n", pc); -+ break; -+ -+ case OPCODE_BICC: -+ { -+ int DoBranch = (instr >> 28) & 1; -+ int CondBranch = 1; -+ E3_Addr OldnPC = npc; -+ -+ PRINTF5 (ctxt, DBG_TPROC, "PC %x : Bicc Z=%x N=%x C=%x V=%x ", pc, Z, N, C, V); -+ switch (instr & OPCODE_BICC_MASK) -+ { -+ case OPCODE_BICC_BN: CondBranch = 0; break; -+ case OPCODE_BICC_BE: DoBranch ^= Z; break; -+ case OPCODE_BICC_BLE: DoBranch ^= Z | (N ^ V); break; -+ case OPCODE_BICC_BL: DoBranch ^= N ^ V; break; -+ case OPCODE_BICC_BLEU: DoBranch ^= C | Z; break; -+ case OPCODE_BICC_BCS: DoBranch ^= C; break; -+ case OPCODE_BICC_BNEG: DoBranch ^= N; break; -+ case OPCODE_BICC_BVS: DoBranch ^= V; break; -+ } -+ -+ /* Do the branch */ -+ if (DoBranch != 0) -+ { -+ npc = pc + (((instr & 0x3fffff) << 2) | -+ (((instr & 0x200000) != 0) ? 0xff000000 : 0)); -+ -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : branch taken to %x\n", pc, npc); -+ } -+ else -+ { -+ npc = npc + 4; -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : branch not taken\n", pc); -+ } -+ pc = OldnPC; -+ -+ /* Test if the next is annuled */ -+ if (((instr & OPCODE_BICC_ANNUL) != 0) & -+ ((DoBranch == 0) | (CondBranch == 0))) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : branch annulled\n", pc); -+ -+ pc = npc; -+ npc += 4; -+ } -+ -+ /* -+ * we've already consumed the instruction - so continue rather -+ * than break; -+ */ -+ continue; -+ } -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 0 instr %x\n", pc, instr); -+ goto failed; -+ } -+ break; -+ -+ case OPCODE_CLASS_1: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 1 instr %x\n", pc, instr); -+ goto failed; -+ -+ case OPCODE_CLASS_2: -+ { -+ E3_uint32 X = REGISTER_VALUE (trap, INSTR_RS1(instr)); -+ E3_uint32 Y = (instr & OPCODE_IMM) ? INSTR_IMM(instr) : REGISTER_VALUE (trap, INSTR_RS2(instr)); -+ -+ if ((instr & OPCODE_NOT_ALUOP) == 0) -+ { -+ E3_uint32 fcode = (instr >> OPCODE_FCODE_SHIFT) & OPCODE_FCODE_MASK; -+ E3_uint32 result = ALU (ctxt, fcode, X, Y, &Z, &N, &C, &V); -+ -+ PRINTF5 (ctxt, DBG_TPROC, "PC %x : %s %x %x -> %x", pc, OpcodeNames[fcode], X, Y, result); -+ PRINTF4 (ctxt, DBG_TPROC, " Z=%x N=%x C=%x V=%x\n", Z, N, C, V); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), result); -+ } -+ else -+ { -+ switch (instr & OPCODE_MASK) -+ { -+ case OPCODE_OPEN: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_open\n", pc); -+ break; -+ -+ case OPCODE_CLOSE: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_close\n", pc); -+ goto found_close; -+ -+ case OPCODE_SLL: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SLL\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X << Y); -+ break; -+ -+ case OPCODE_SRL: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SRL\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X >> Y); -+ break; -+ -+ case OPCODE_SRA: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SRA\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X >> Y); -+ break; -+ -+ case OPCODE_BREAKTEST: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : BREAKTEST not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_BREAK: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : BREAK not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_SUSPEND: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SUSPEND not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_WAIT: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : WAIT not allowed while open\n", pc); -+ goto failed; -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 2 instr %x\n", pc, instr); -+ goto failed; -+ } -+ } -+ break; -+ } -+ -+ case OPCODE_CLASS_3: -+ { -+ if ((instr & OPCODE_IMM) != 0) -+ addr = REGISTER_VALUE (trap, INSTR_RS1(instr)) + INSTR_IMM(instr); -+ else -+ addr = (REGISTER_VALUE (trap, INSTR_RS1(instr)) + -+ REGISTER_VALUE (trap, INSTR_RS2(instr))); -+ -+ switch (instr & OPCODE_MASK) -+ { -+ case OPCODE_LD: -+ PRINTF3 (ctxt, DBG_TPROC, "PC %x : LD [%x], r%d\n", pc, addr, INSTR_RD(instr)); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), ELAN3_OP_LOAD32 (ctxt, addr)); -+ break; -+ -+ case OPCODE_LDD: -+ case OPCODE_LDBLOCK16: -+ case OPCODE_LDBLOCK32: -+ case OPCODE_LDBLOCK64: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : LDBLOCKx @ %x is not possible while output open\n", pc, addr); -+ goto failed; -+ -+ case OPCODE_ST: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : ST @ %x\n", pc, addr); -+ -+ ELAN3_OP_STORE32 (ctxt, addr, REGISTER_VALUE (trap, INSTR_RD(instr))); -+ break; -+ -+ case OPCODE_STD: -+ case OPCODE_STBLOCK16: -+ case OPCODE_STBLOCK32: -+ case OPCODE_STBLOCK64: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : STD @ %x is not posisble while output open\n", pc, addr); -+ goto failed; -+ -+ case OPCODE_SWAP: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : SWAP @ %x is not posible while output open\n", pc, addr); -+ goto failed; -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 3 instr %x\n", pc, instr); -+ goto failed; -+ } -+ break; -+ }} -+ -+ pc = npc; -+ npc += 4; -+ } -+ -+found_close: -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_close\n", pc); -+ -+ /* -+ * Found the new pc, and have the close instruction in *instr -+ */ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), PAckVal); -+ -+ /* -+ * Move to instruction after close. -+ */ -+ trap->pc = npc; -+ -+ /* Insert the value of Z and N from the close inst */ -+ trap->npc = (npc + 4) | ((PAckVal == E3_PAckOk) ? 1 : -+ (PAckVal == E3_PAckTestFail) ? 2 : 0); -+ -+ return (ESUCCESS); -+} -+ -+E3_uint32 -+ALU (ELAN3_CTXT *ctxt, -+ E3_uint32 fcode, E3_uint32 X, E3_uint32 Y, -+ E3_uint32 *Z, E3_uint32 *N, E3_uint32 *C, E3_uint32 *V) -+{ -+ E3_uint32 XMSB, YMSB, ZMSB, Cprime; -+ E3_uint32 Yprime; -+ E3_uint32 Result=0; -+ -+ Yprime = ((fcode >> 2) & 1) ? ~Y : Y; -+ Cprime = ((fcode >> 2) & 1) ^ (*C & ((fcode >> 3) & 1)); -+ XMSB = (X >> 31) & 1; -+ YMSB = (Yprime >> 31) & 1; -+ /* mul or div */ -+ if ((fcode & 0xa) == 0xa) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ALU: tried a multiply or a divide\n"); -+ return (0); -+ } -+ -+ switch (fcode & 3) -+ { -+ /*ADD */ -+ case 0: -+ Result = X + Yprime + Cprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ ZMSB = Result >> 31; -+ *V = ((XMSB & YMSB & ~ZMSB) | (~XMSB &~YMSB & ZMSB)); -+ *C = ((fcode >> 2) & 1) ^ ( (XMSB & YMSB) | (~ZMSB & (XMSB | YMSB))); -+ break; -+ -+ /*AND */ -+ case 1: -+ Result = X & Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ -+ /*OR */ -+ case 2: -+ Result = X | Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ -+ /*XOR */ -+ case 3: -+ Result = X ^ Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ } -+ -+ *Z = (Result == 0) ? 1 : 0; -+ *N = (Result >> 31) & 1; -+ -+ return (Result); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/tproc_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/tproc_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/tproc_linux.c 2005-06-01 23:12:54.596439560 -0400 -@@ -0,0 +1,215 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: tproc_linux.c,v 1.19.2.1 2004/10/28 17:08:56 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tproc_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+ -+#ifdef NO_ABI -+#include -+extern asmlinkage long sys_open(const char *, int, int); -+extern asmlinkage ssize_t sys_write(unsigned int, const char *, size_t); -+extern asmlinkage ssize_t sys_read(unsigned int, char *, size_t); -+extern asmlinkage off_t sys_lseek(unsigned int, off_t, unsigned int); -+extern asmlinkage long sys_poll(struct pollfd *, unsigned int, long); -+extern asmlinkage long sys_kill(int, int); -+#else -+# include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * NOTE: system calls from kernel on Linux are different on alpha and i386 -+ * on alpha they return -errno on failure -+ * on i386 they return -1 on failure and set errno -+ */ -+ -+static void -+ReturnSyscall (THREAD_TRAP *trap, unsigned long rc, int *skip) -+{ -+ if (rc >= (unsigned long) (-130)) -+ { -+ trap->pc |= PSR_C_BIT; /* clear carry to indicate failure */ -+ -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)] = -rc; -+ } -+ else -+ { -+ trap->pc &= ~PSR_C_BIT; /* set carry to indicate success */ -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)] = rc; -+ } -+ trap->Registers[REG_OUTS+(1^WordEndianFlip)] = 0; -+ *skip = 1; -+} -+ -+static void -+dump_regs(ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ PRINTF (ctxt, DBG_TPROC, " OUTS %08x %08x %08x %08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF (ctxt, DBG_TPROC, " %08x %08x %08x %08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+} -+ -+int -+ThreadSyscall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip) -+{ -+ int code; -+ caddr_t maddr; -+ struct file *file; -+ unsigned long rc; -+ int i; -+ uintptr_t av[6]; -+ uintptr_t ptr; -+ -+ PRINTF (ctxt, DBG_TPROC, "ThreadSyscall: PC %08x G1 %08x\n", -+ trap->pc, trap->Registers[REG_GLOBALS+(1^WordEndianFlip)]); -+ dump_regs(ctxt, trap); -+ -+ code = trap->Registers[REG_GLOBALS+(1^WordEndianFlip)]; -+ -+ /* Copy the system call arguments from %o0-%o5 */ -+ for (i = 0; i < 6; i++) -+ av[i] = trap->Registers[REG_OUTS+(i^WordEndianFlip)]; -+ -+ rc = (unsigned long) -EINVAL; -+ -+ switch (code) { -+ case ELAN3_SYS_open: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ if (maddr != NULL) -+ rc = sys_open((const char *)maddr, av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_close: -+ rc = sys_close(av[0]); -+ break; -+ -+ case ELAN3_SYS_write: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[1]); -+ if (maddr != NULL) -+ rc = sys_write(av[0], (const char *)maddr, av[2]); -+ break; -+ -+ case ELAN3_SYS_read: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[1]); -+ if (maddr != NULL) -+ rc = sys_read(av[0], (char *)maddr, av[2]); -+ break; -+ -+ case ELAN3_SYS_poll: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ if (maddr != NULL) -+ rc = sys_poll((struct pollfd *)maddr, av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_lseek: -+ rc = sys_lseek(av[0], av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_mmap: -+ if ((E3_Addr) av[0] == (E3_Addr) 0) -+ maddr = NULL; -+ else if ((maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0])) == NULL) -+ break; -+ -+ file = NULL; -+ /* GNAT 5515: If *not* anonymous memory need to do fget */ -+ if ((av[3] & MAP_ANONYMOUS) == 0 && (file = fget (av[4])) == NULL) -+ { -+ rc = -EBADF; -+ break; -+ } -+ -+ down_write (¤t->mm->mmap_sem); -+ ptr = do_mmap_pgoff (file, (unsigned long) maddr, av[1], av[2], av[3], av[5] >>PAGE_SHIFT); -+ up_write (¤t->mm->mmap_sem); -+ -+ if (file) -+ fput (file); -+ -+ if (IS_ERR((void *) ptr)) -+ rc = PTR_ERR((void *) ptr); -+ else -+ rc = elan3mmu_elanaddr (ctxt->Elan3mmu, (caddr_t)ptr); -+ -+ break; -+ -+ case ELAN3_SYS_munmap: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ -+#ifdef AC -+ if (maddr != NULL) -+ rc = do_munmap(current->mm, (unsigned long) maddr, av[1], 1); -+#else -+ if (maddr != NULL) -+ rc = do_munmap(current->mm, (unsigned long) maddr, av[1]); -+#endif -+ break; -+ -+ case ELAN3_SYS_kill: -+ rc = sys_kill(av[0], av[1]); -+ break; -+ -+ case ELAN3_SYS_getpid: -+ rc = current->pid; -+ break; -+ -+ default: -+ return EINVAL; -+ } -+ ReturnSyscall(trap, rc, skip); -+ return ESUCCESS; -+} -+ -+ -+int -+ThreadElancall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip) -+{ -+ int ret = ESUCCESS; -+ -+ PRINTF (ctxt, DBG_TPROC, "ThreadElancall: PC %08x\n", trap->pc); -+ dump_regs(ctxt, trap); -+ -+ /* -+ * Elan system call 'type' is passed in o0 -+ */ -+ switch (trap->Registers[REG_OUTS+(0^WordEndianFlip)]) -+ { -+ default: -+ ret = EINVAL; -+ break; -+ } -+ return ret; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan3/virtual_process.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan3/virtual_process.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan3/virtual_process.c 2005-06-01 23:12:54.597439408 -0400 -@@ -0,0 +1,884 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: virtual_process.c,v 1.68 2004/06/07 13:50:10 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/virtual_process.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static ELAN3_VPSEG * -+InstallSegment (ELAN3_CTXT *ctxt, int process, int entries) -+{ -+ ELAN3_VPSEG **prevSeg, *seg; -+ int lastTop = -1; -+ int top = process + entries-1; -+ -+ ASSERT (krwlock_is_write_locked (&ctxt->VpLock)); -+ -+ for (prevSeg = &ctxt->VpSegs; (seg = (*prevSeg)) != NULL; prevSeg = &seg->Next) -+ { -+ int thisTop = seg->Process + seg->Entries - 1; -+ -+ if (process < seg->Process && (process <= lastTop || top >= seg->Process)) -+ { -+ /* -+ * Overlaps with last segment, or this one -+ */ -+ return (NULL); -+ } -+ if (seg->Process > process) -+ break; -+ -+ lastTop = thisTop; -+ } -+ -+ KMEM_ZALLOC (seg, ELAN3_VPSEG *, sizeof (ELAN3_VPSEG), TRUE); -+ -+ if (seg == (ELAN3_VPSEG *) NULL) -+ return (NULL); -+ -+ seg->Process = process; -+ seg->Entries = entries; -+ -+ -+ PRINTF2 (ctxt, DBG_VP, "InstallSegment: add seg %p before %p\n", seg, *prevSeg); -+ -+ seg->Next = *prevSeg; -+ *prevSeg = seg; -+ -+ return (seg); -+} -+ -+static int -+RemoveSegment (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg) -+{ -+ ELAN3_VPSEG **prevSeg, *thisSeg; -+ -+ ASSERT (krwlock_is_write_locked (&ctxt->VpLock)); -+ -+ for (prevSeg = &ctxt->VpSegs; (thisSeg = (*prevSeg)) != NULL; prevSeg = &thisSeg->Next) -+ { -+ if (thisSeg == seg) -+ break; -+ } -+ -+ if (thisSeg == (ELAN3_VPSEG *) NULL) -+ return (EINVAL); -+ -+ -+ PRINTF2 (ctxt, DBG_VP, "RemoveSegment: remove seg %p next %p\n", thisSeg, thisSeg->Next); -+ -+ *prevSeg = thisSeg->Next; -+ -+ KMEM_FREE ((caddr_t) seg, sizeof (ELAN3_VPSEG)); -+ -+ return (ESUCCESS); -+} -+ -+static ELAN3_VPSEG * -+FindSegment (ELAN3_CTXT *ctxt, int low, int high) -+{ -+ ELAN3_VPSEG *seg; -+ -+ ASSERT(krwlock_is_locked (&ctxt->VpLock)); -+ -+ for (seg = ctxt->VpSegs; seg; seg = seg->Next) -+ { -+ if (seg->Process <= low && (seg->Process + seg->Entries) > high) -+ return (seg); -+ } -+ -+ return ((ELAN3_VPSEG *) NULL); -+} -+ -+ELAN_LOCATION -+ProcessToLocation (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN_LOCATION location; -+ int nnodes,nctxs; -+ int node,ctx,i; -+ -+ ASSERT(krwlock_is_locked (&ctxt->VpLock)); -+ -+ location.loc_node = ELAN3_INVALID_NODE; -+ location.loc_context = -1; -+ -+ PRINTF3 (ctxt, DBG_VP, "ProcessToLocation: process %d seg %p cap %p\n", process, seg, cap); -+ -+ if (seg == NULL) -+ seg = FindSegment (ctxt, process, process); -+ -+ if (!seg || (seg->Type != ELAN3_VPSEG_P2P)) -+ return (location); -+ -+ cap = &seg->SegCapability; -+ nnodes = ELAN_CAP_NUM_NODES (cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (cap); -+ -+ switch (seg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (node = 0, i = 0; node < nnodes && i < entries; node++) -+ { -+ for (ctx = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ if (( seg->SegCapability.cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->SegCapability.cap_bitmap, ctx + (node * nctxs))) -+ { -+ if (i++ == (process - seg->Process)) -+ { -+ location.loc_node = seg->SegCapability.cap_lownode + node; -+ location.loc_context = seg->SegCapability.cap_lowcontext + ctx; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (ctx = 0, i = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ for (node = 0; node < nnodes && i < entries; node++) -+ { -+ if ((seg->SegCapability.cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->SegCapability.cap_bitmap, node + (ctx * nnodes))) -+ { -+ if (i++ == (process - seg->Process)) -+ { -+ location.loc_node = seg->SegCapability.cap_lownode + node; -+ location.loc_context = seg->SegCapability.cap_lowcontext + ctx; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ default: -+ break; -+ } -+ -+ found: -+ -+ PRINTF3 (ctxt, DBG_VP, "ProcessToLocation: process %d -> Node %d Context %d\n", process, location.loc_node, location.loc_context); -+ -+ if (cap != NULL) -+ { -+ bcopy ((caddr_t) &seg->SegCapability, (caddr_t) cap, sizeof (ELAN_CAPABILITY)); -+ cap->cap_mycontext = location.loc_context; -+ } -+ -+ return (location); -+} -+ -+int -+LocationToProcess (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, ELAN_LOCATION loc, ELAN_CAPABILITY *cap) -+{ -+ int nnodes,nctxs; -+ int node,ctx,i; -+ -+ if (seg == NULL) -+ return ELAN3_INVALID_PROCESS; -+ -+ if (!seg || (seg->Type != ELAN3_VPSEG_P2P)) -+ return ELAN3_INVALID_PROCESS; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (node = 0, i = 0; node < nnodes && i < entries; node++) -+ { -+ for (ctx = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctx + (node * nctxs))) -+ { -+ if ((loc.loc_node == (cap->cap_lownode + node) ) -+ && (loc.loc_context == (cap->cap_lowcontext + ctx) )) -+ { -+ return (i + seg->Process); -+ } -+ i++; -+ } -+ } -+ } -+ break; -+ } -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (ctx = 0, i = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ for (node = 0; node < nnodes && i < entries; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (ctx * nnodes))) -+ { -+ if ((loc.loc_node == (cap->cap_lownode + node) ) -+ && (loc.loc_context == (cap->cap_lowcontext + ctx) )) -+ { -+ return (i + seg->Process); -+ } -+ i++; -+ -+ } -+ } -+ } -+ break; -+ } -+ default: -+ break; -+ } -+ -+ return ELAN3_INVALID_PROCESS; -+} -+ -+int -+elan3_addvp (ELAN3_CTXT *ctxt, int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ ELAN_POSITION *pos = &ctxt->Position; -+ ELAN3_VPSEG *seg; -+ int i; -+ int nodeOff; -+ int ctxOff; -+ int nnodes; -+ int nctxs; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ int entries; -+ -+ PRINTF2 (ctxt, DBG_VP, "elan3_addvp: %d -> %s\n", process, CapabilityString (cap)); -+ -+ entries = ELAN_CAP_ENTRIES(cap); -+ if (entries <= 0 || (process + entries) > ELAN3_MAX_VPS) -+ return (EINVAL); -+ -+ /* -+ * Scan the virtual process segment list, to add this entry, and ensure that -+ * the ranges don't overlap. -+ */ -+ krwlock_write (&ctxt->VpLock); -+ -+ /* check cap. */ -+ switch (elan3_validate_cap (ctxt->Device, cap, ELAN_USER_P2P)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(cap, cap) != ESUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = InstallSegment (ctxt, process, entries)) == NULL) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_addvp: failed to find a seg\n"); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ seg->Type = ELAN3_VPSEG_P2P; -+ seg->SegCapability = *cap; -+ seg->SegCapability.cap_mycontext = ELAN_CAP_UNINITIALISED; -+ -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: segment type %x %d %d\n", -+ seg->SegCapability.cap_type, seg->Process, entries); -+ -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ /* position not determined, so cannot load any routes, the hwtest -+ * process must explicitly set it's own routes */ -+ -+ if (!(cap->cap_type & ELAN_CAP_TYPE_HWTEST) && (pos->pos_mode != ELAN_POS_UNKNOWN)) -+ { -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, i = 0; nodeOff < nnodes && i < entries; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs && i < entries; ctxOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ /* Don't load a route if there's no switch and trying to talk to myself */ -+ if (pos->pos_mode == ELAN_POS_MODE_SWITCHED || -+ (pos->pos_mode == ELAN_POS_MODE_LOOPBACK && cap->cap_lownode + nodeOff == pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && cap->cap_lownode + nodeOff != pos->pos_nodeid)) -+ { -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: virtual process %d -> node %d context %d\n", -+ seg->Process + i, cap->cap_lownode +nodeOff, cap->cap_lowcontext +ctxOff); -+ -+ nflits = GenerateRoute (pos, flits, cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, -+ DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ -+ -+ LoadRoute (dev, ctxt->RouteTable, seg->Process+i, cap->cap_lowcontext + ctxOff, nflits, flits); -+ } -+ -+ i++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, i = 0; ctxOff < nctxs && i < entries; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes && i < entries; nodeOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ /* Don't load a route if there's no switch and trying to talk to myself */ -+ if (pos->pos_mode == ELAN_POS_MODE_SWITCHED || -+ (pos->pos_mode == ELAN_POS_MODE_LOOPBACK && cap->cap_lownode + nodeOff == pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && cap->cap_lownode + nodeOff != pos->pos_nodeid)) -+ { -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: virtual process %d -> node %d context %d\n", -+ seg->Process + i, cap->cap_lownode + nodeOff, cap->cap_lowcontext +ctxOff); -+ -+ nflits = GenerateRoute (pos, flits, cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, -+ DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ -+ LoadRoute (dev, ctxt->RouteTable, seg->Process+i, cap->cap_lowcontext +ctxOff, nflits, flits); -+ } -+ i++; -+ } -+ } -+ } -+ break; -+ default: -+ break; -+ } -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_removevp (ELAN3_CTXT *ctxt, int process) -+{ -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *next; -+ int i; -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_removevp: remove process %d\n", process); -+ -+ if (process == ELAN3_INVALID_PROCESS) -+ seg = ctxt->VpSegs; -+ else -+ seg = FindSegment (ctxt, process, process); -+ -+ if (seg == (ELAN3_VPSEG *) NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ do { -+ PRINTF3 (ctxt, DBG_VP, "elan3_removevp: segment is %p [%x,%x]\n", -+ seg, seg->Process, seg->Process+seg->Entries); -+ -+ for (i = 0; i < seg->Entries; i++) -+ ClearRoute (ctxt->Device, ctxt->RouteTable, seg->Process+i); -+ -+ /* get Next pointer value before structure is free'd */ -+ next = seg->Next; -+ RemoveSegment (ctxt, seg); -+ -+ } while (process == ELAN3_INVALID_PROCESS && (seg = next) != NULL); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_addbcastvp (ELAN3_CTXT *ctxt, int process, int lowProc, int highProc) -+{ -+ ELAN_POSITION *pos = &ctxt->Position; -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *aseg; -+ int virtualProcess; -+ E3_uint64 routeValue; -+ -+ PRINTF3 (ctxt, DBG_VP, "elan3_addbcastvp: process %d [%d,%d]\n", process, lowProc, highProc); -+ -+ if (lowProc > highProc || pos->pos_mode != ELAN_POS_MODE_SWITCHED) -+ return (EINVAL); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ if ((aseg = FindSegment (ctxt, lowProc, highProc)) == NULL || (aseg->Type != ELAN3_VPSEG_P2P)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_addbcastvp: process [%d,%d] does not map to p2p segment\n", lowProc, highProc); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* check aseg->SegCapability */ -+ switch (elan3_validate_cap (ctxt->Device, &aseg->SegCapability, ELAN_USER_BROADCAST)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(&ctxt->Capability, &aseg->SegCapability) != ESUCCESS ) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ( ProcessToLocation (ctxt, aseg, lowProc, NULL).loc_context != -+ ProcessToLocation (ctxt, aseg, highProc, NULL).loc_context) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_addbcastvp: process [%d,%d] does not map to single context\n", lowProc, highProc); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = InstallSegment (ctxt, process, 1)) == NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ seg->Type = ELAN3_VPSEG_BROADCAST; -+ seg->SegLowProc = lowProc; -+ seg->SegHighProc = highProc; -+ -+ PRINTF4 (ctxt, DBG_VP, "elan3_addbcastvp: installed seg %p Type %d LowProc %d HighProc %d\n", -+ seg, seg->Type, seg->SegLowProc, seg->SegHighProc); -+ -+ for (virtualProcess = lowProc; virtualProcess <= highProc; virtualProcess++) -+ { -+ if (virtualProcess < 0 || virtualProcess >= ctxt->RouteTable->Size) -+ routeValue = 0; -+ else -+ routeValue = elan3_sdram_readq ( ctxt->Device, ctxt->RouteTable->Table + virtualProcess * NBYTES_PER_SMALL_ROUTE); -+ -+ if (! (routeValue & ROUTE_VALID)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "loadvp[%x]: broadcast %x not valid\n", -+ ctxt->Capability.cap_mycontext, virtualProcess); -+ break; -+ } -+ } -+ -+ if (virtualProcess > highProc) /* All vps now present */ -+ { /* so load up broadcast route */ -+ E3_uint16 flits[MAX_FLITS]; -+ ELAN_LOCATION low = ProcessToLocation (ctxt, aseg, lowProc, NULL); -+ ELAN_LOCATION high = ProcessToLocation (ctxt, aseg, highProc, NULL); -+ int nflits = GenerateRoute (pos, flits, low.loc_node, high.loc_node, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ PRINTF6 (ctxt, DBG_VP, "loadvp[%x]: broadcast %d -> %x.%x [%x.%x]\n", ctxt->Capability.cap_mycontext, -+ seg->Process, low.loc_node, high.loc_node, -+ low.loc_context, high.loc_context); -+ -+ LoadRoute ( ctxt->Device, ctxt->RouteTable, seg->Process, low.loc_context, nflits, flits); -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_process (ELAN3_CTXT *ctxt) -+{ -+ int res = ELAN3_INVALID_PROCESS; -+ ELAN3_VPSEG *seg; -+ ELAN_LOCATION loc; -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ loc.loc_node = ctxt->Position.pos_nodeid; -+ loc.loc_context = ctxt->Capability.cap_mycontext; -+ -+ for (seg = ctxt->VpSegs ; seg; seg = seg->Next) -+ { -+ if (seg->Type == ELAN3_VPSEG_P2P && -+ seg->SegCapability.cap_lowcontext <= ctxt->Capability.cap_mycontext && -+ seg->SegCapability.cap_highcontext >= ctxt->Capability.cap_mycontext && -+ seg->SegCapability.cap_lownode <= ctxt->Position.pos_nodeid && -+ seg->SegCapability.cap_highnode >= ctxt->Position.pos_nodeid) -+ { -+ if ((res=LocationToProcess (ctxt,seg,loc,&ctxt->Capability)) != ELAN3_INVALID_PROCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return res; -+ } -+ } -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_check_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits, E3_uint32 *routeError) -+{ -+ PRINTF5 (ctxt, DBG_VP, "elan3_check_route: vp=%d flits=%04x %04x %04x %04x\n", -+ process, flits[0], flits[1], flits[2], flits[3]); -+ PRINTF4 (ctxt, DBG_VP, " %04x %04x %04x %04x\n", -+ flits[4], flits[5], flits[6], flits[7]); -+ -+ krwlock_read (&ctxt->VpLock); -+ *routeError=elan3_route_check(ctxt,flits,ProcessToLocation (ctxt, NULL, process, NULL).loc_node); -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); /* the call is a success tho the errorcode may be set */ -+} -+ -+int -+elan3_load_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits) -+{ -+ ELAN3_VPSEG *seg; -+ int res = 0; -+ int nflits; -+ int err; -+ -+ PRINTF5 (ctxt, DBG_VP, "elan3_load_route: vp=%d flits=%04x %04x %04x %04x\n", -+ process, flits[0], flits[1], flits[2], flits[3]); -+ PRINTF4 (ctxt, DBG_VP, " %04x %04x %04x %04x\n", -+ flits[4], flits[5], flits[6], flits[7]); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ /* check the route is valid */ -+ if (!(ctxt->Capability.cap_type & ELAN_CAP_TYPE_HWTEST)) -+ { -+ /* must have already attached to define my context number */ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((err=elan3_route_check(ctxt,flits,ProcessToLocation (ctxt, NULL, process, NULL).loc_node)) != ELAN3_ROUTE_SUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ } -+ -+ if ((seg = FindSegment (ctxt, process, process)) == NULL || seg->Type != ELAN3_VPSEG_P2P) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* Calculate number of flits in this route */ -+ for (nflits = 0; nflits < MAX_FLITS && flits[nflits]; nflits++) -+ ; -+ -+ res = LoadRoute (ctxt->Device, ctxt->RouteTable, process, ProcessToLocation (ctxt, seg, process, NULL).loc_context, nflits, flits); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_get_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits) -+{ -+ ELAN3_VPSEG *seg; -+ int res = 0; -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_get_route: vp=%d \n", process); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ if (ctxt->RouteTable == NULL) /* is there a route table */ -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = FindSegment (ctxt, process, process)) != NULL && seg->Type != ELAN3_VPSEG_P2P) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if (seg == NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ res = GetRoute (ctxt->Device, ctxt->RouteTable, process, flits); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_reset_route (ELAN3_CTXT *ctxt, int process) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_reset_route: vp=%d \n", process); -+ -+ GenerateRoute (&ctxt->Position, flits, process, process, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ return elan3_load_route(ctxt,process,flits); -+} -+ -+int -+ResolveVirtualProcess (ELAN3_CTXT *ctxt, int process) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ ELAN3_DEV *dev = ctxt->Device; -+ int res = ESUCCESS; -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *aseg; -+ E3_uint64 routeValue; -+ -+ krwlock_read (&ctxt->VpLock); -+ -+ PRINTF1 (ctxt, DBG_VP, "ResolveVirtualProcess: vp=%d \n", process); -+ -+ if (ctxt->RouteTable == NULL || process < 0 || process >= ctxt->RouteTable->Size) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if (! (seg = FindSegment (ctxt, process, process))) -+ { -+ PRINTF1 (ctxt, DBG_VP, "ResolveVirtualProcess: cannot find segment for virtual process %d\n", process); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* check cap. */ -+ switch (elan3_validate_cap (ctxt->Device, &seg->SegCapability, ((seg->Type == ELAN3_VPSEG_P2P) ? ELAN_USER_P2P : ELAN_USER_BROADCAST))) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(&ctxt->Capability, &seg->SegCapability) != ESUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ BumpUserStat (ctxt, LoadVirtualProcess); -+ -+ routeValue = elan3_sdram_readq (dev, ctxt->RouteTable->Table + process * NBYTES_PER_SMALL_ROUTE); -+ if (routeValue & ROUTE_VALID) /* Virtual process already */ -+ { /* loaded */ -+ krwlock_done (&ctxt->VpLock); -+ return (ESUCCESS); -+ } -+ -+ switch (seg->Type) -+ { -+ case ELAN3_VPSEG_P2P: -+ switch (seg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ if ((res = elan_validate_map (&ctxt->Capability,&seg->SegCapability)) == ESUCCESS && -+ (res = GetRoute(dev, ctxt->RouteTable ,process, flits)) == ESUCCESS) -+ { -+ if (elan3_route_check(ctxt, flits, ProcessToLocation (ctxt, seg, process, NULL).loc_node)) -+ res = EINVAL; -+ else -+ ValidateRoute(dev, ctxt->RouteTable, process); -+ } -+ break; -+ default: -+ res = EINVAL; -+ break; -+ } -+ break; -+ -+ case ELAN3_VPSEG_BROADCAST: -+ /* Find the segment that this broadcast range spans. */ -+ aseg = FindSegment (ctxt, seg->SegLowProc, seg->SegHighProc); -+ -+ if (aseg == NULL || (aseg->Type != ELAN3_VPSEG_P2P) || !(aseg->SegCapability.cap_type & ELAN_CAP_TYPE_BROADCASTABLE)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "resolveVirtualProcess: %d -> EINVAL (%s)\n", process, -+ (aseg == NULL ? "no segment" : ((seg->Type != ELAN3_VPSEG_P2P) ? "not point to point" : -+ "not broadcastable"))); -+ res = EINVAL; -+ break; -+ } -+ -+ switch (aseg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ ELAN_LOCATION lowNode = ProcessToLocation (ctxt,aseg,seg->SegLowProc , NULL); -+ ELAN_LOCATION highNode = ProcessToLocation (ctxt,aseg,seg->SegHighProc , NULL); -+ -+ -+ if ((res = elan_validate_map (&ctxt->Capability,&aseg->SegCapability)) == ESUCCESS && -+ (res=GetRoute(dev, ctxt->RouteTable ,process, flits)) == ESUCCESS) -+ { -+ if (elan3_route_broadcast_check(ctxt,flits, lowNode.loc_node , highNode.loc_node ) != ELAN3_ROUTE_SUCCESS ) -+ res = EINVAL; -+ else -+ ValidateRoute(dev, ctxt->RouteTable, process); -+ } -+ break; -+ } -+ -+ default: -+ res = EINVAL; -+ break; -+ } -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ return (res); -+} -+ -+void -+UnloadVirtualProcess (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ ELAN3_VPSEG *seg; -+ ELAN_CAPABILITY *scap; -+ int i; -+ -+ for (seg = ctxt->VpSegs; seg; seg = seg->Next) -+ { -+ switch (seg->Type) -+ { -+ case ELAN3_VPSEG_P2P: -+ scap = &seg->SegCapability; -+ -+ if (cap == NULL || ELAN_CAP_MATCH (scap, cap)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "unloadvp: segment [%x.%x]\n", -+ seg->Process, seg->Process + seg->Entries-1); -+ -+ for (i = 0; i < seg->Entries; i++) -+ InvalidateRoute (dev, ctxt->RouteTable, seg->Process+i); -+ } -+ break; -+ -+ case ELAN3_VPSEG_BROADCAST: -+ for (i = 0; i < seg->Entries; i++) -+ { -+ ELAN3_VPSEG *aseg = FindSegment (ctxt, seg->SegLowProc, seg->SegHighProc); -+ -+ if (aseg != NULL && ELAN_CAP_MATCH(&aseg->SegCapability, cap)) -+ { -+ PRINTF1 (ctxt, DBG_VP, "unloadvp: broadcast vp %d\n", seg->Process); -+ -+ InvalidateRoute (dev, ctxt->RouteTable, seg->Process+i); -+ } -+ } -+ } -+ } -+} -+ -+caddr_t -+CapabilityString (ELAN_CAPABILITY *cap) -+{ -+#define CAPSTR_LEN 200 -+#define NCAPSTRS 4 -+ static char space[CAPSTR_LEN*NCAPSTRS]; -+ static int bufnum; -+ static spinlock_t lock; -+ static int lockinitialised; -+ int num; -+ unsigned long flags; -+ -+ if (! lockinitialised) -+ { -+ spin_lock_init (&lock); -+ lockinitialised = 1; -+ } -+ -+ spin_lock_irqsave (&lock, flags); -+ -+ if ((num = ++bufnum) == NCAPSTRS) -+ num = bufnum = 0; -+ spin_unlock_irqrestore (&lock, flags); -+ -+ sprintf (space + (num * CAPSTR_LEN), "%4x %4x %4x %4x %4x %4x %4x [%x.%x.%x.%x]", cap->cap_type, -+ cap->cap_lownode, cap->cap_highnode, -+ cap->cap_lowcontext, cap->cap_mycontext, cap->cap_highcontext, ELAN_CAP_ENTRIES(cap), -+ cap->cap_userkey.key_values[0], cap->cap_userkey.key_values[1], -+ cap->cap_userkey.key_values[2], cap->cap_userkey.key_values[3]); -+ -+ return (space + (num * CAPSTR_LEN)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/debug.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/debug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/debug.c 2005-06-01 23:12:54.597439408 -0400 -@@ -0,0 +1,94 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.16 2004/07/07 11:22:33 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/debug.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+unsigned elan4_debug = 0; -+unsigned elan4_debug_toconsole = 0; -+unsigned elan4_debug_tobuffer = DBG_ALL; -+ -+unsigned elan4_debug_display_ctxt; -+unsigned elan4_debug_ignore_ctxt; -+unsigned elan4_debug_ignore_type; -+ -+void -+elan4_debug_init() -+{ -+ if ((elan4_debug & elan4_debug_tobuffer) != 0) -+ qsnet_debug_alloc(); -+} -+ -+void -+elan4_debug_fini() -+{ -+} -+ -+void -+elan4_debugf (void *type, int mode, char *fmt,...) -+{ -+ char prefix[128]; -+ int where = 0; -+ va_list ap; -+ -+ if ((mode & elan4_debug_tobuffer) != 0 || type == DBG_BUFFER) -+ where |= QSNET_DEBUG_BUFFER; -+ if ((mode & elan4_debug_toconsole) != 0 || type == DBG_CONSOLE) -+ where |= QSNET_DEBUG_CONSOLE; -+ -+ if (where == 0) -+ return; -+ -+ if ((unsigned long) type > DBG_NTYPES) -+ { -+ ELAN4_CTXT *ctxt = (ELAN4_CTXT *) type; -+ -+ if (elan4_debug_display_ctxt && ctxt->ctxt_num != elan4_debug_display_ctxt) -+ return; -+ if (elan4_debug_ignore_ctxt && ctxt->ctxt_num == elan4_debug_ignore_ctxt) -+ return; -+ -+ sprintf (prefix, "[%08ld.%04d] elan4 (%03x) ", lbolt, current->pid, ctxt->ctxt_num); -+ } -+ else if ((unsigned long) type == (int) DBG_CONSOLE) -+ prefix[0] = '\0'; -+ else -+ { -+ char *what; -+ -+ if (elan4_debug_ignore_type & (1 << ((unsigned long) type))) -+ return; -+ -+ switch ((unsigned long) type) -+ { -+ case (int) DBG_DEVICE: what = "dev"; break; -+ case (int) DBG_USER: what = "usr"; break; -+ default: what = NULL; break; -+ } -+ -+ if (what) -+ sprintf (prefix, "[%08ld.%04d] elan4 [%s] ", lbolt, current->pid, what); -+ else -+ sprintf (prefix, "[%08ld.%04d] elan4 [%3d] ", lbolt, current->pid, (int)(long)type); -+ } -+ -+ va_start(ap,fmt); -+ qsnet_vdebugf (where, prefix, fmt, ap); -+ va_end (ap); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/device.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/device.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/device.c 2005-06-01 23:12:54.602438648 -0400 -@@ -0,0 +1,2805 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.c,v 1.87.6.3 2005/01/18 14:25:35 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+/* allow this code to compile against an Eagle elanmod */ -+#ifdef __ELANMOD_DEVICE_H -+#define ELAN_DEV_OPS ELANMOD_DEV_OPS -+#define ELAN_DEV_OPS_VERSION ELANMOD_DEV_OPS_VERSION -+#define elan_dev_register elanmod_dev_register -+#define elan_dev_deregister elanmod_dev_deregister -+#endif -+ -+/* XXXX configurational defines */ -+ -+#if defined (CONFIG_MPSAS) -+#define HASH_0_SIZE_VAL (12 + 6) -+#define HASH_1_SIZE_VAL (2 + 6) -+#define CTXT_TABLE_SHIFT 8 -+#define LN2_MAX_CQS 8 /* 256 */ -+#else -+#define HASH_0_SIZE_VAL (13 + 6) -+#define HASH_1_SIZE_VAL (2 + 6) -+#define CTXT_TABLE_SHIFT 12 -+#define LN2_MAX_CQS 10 /* 1024 */ -+#endif -+ -+unsigned int elan4_hash_0_size_val = HASH_0_SIZE_VAL; -+unsigned int elan4_hash_1_size_val = HASH_1_SIZE_VAL; -+unsigned int elan4_ctxt_table_shift = CTXT_TABLE_SHIFT; -+unsigned int elan4_ln2_max_cqs = LN2_MAX_CQS; -+unsigned int elan4_dmaq_highpri_size = 2; /* 8192 entries */ -+unsigned int elan4_threadq_highpri_size = 1; /* 1024 entries */ -+unsigned int elan4_dmaq_lowpri_size = 2; /* 8192 entries */ -+unsigned int elan4_threadq_lowpri_size = 1; /* 1024 entries */ -+unsigned int elan4_interruptq_size = 0; /* 1024 entries */ -+unsigned int elan4_mainint_punt_loops = 1; -+unsigned int elan4_mainint_resched_ticks = 0; -+ -+static int -+elan4_op_get_position (void *arg, ELAN_POSITION *ptr) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *)arg; -+ ELAN_POSITION pos; -+ -+ elan4_get_position (dev, &pos); -+ -+ return copyout (&pos, ptr, sizeof (ELAN_POSITION)); -+} -+ -+static int -+elan4_op_set_position (void *arg, unsigned short nodeid, unsigned short numnodes) -+{ -+ /* XXXXX -+ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ compute_position (&pos, nodeid, numnode, num_down_links_value); -+ -+ return elan4_set_position (dev, pos); -+ */ -+ return EINVAL; -+} -+ -+ELAN_DEV_OPS elan4_dev_ops = -+{ -+ elan4_op_get_position, -+ elan4_op_set_position, -+ -+ ELAN_DEV_OPS_VERSION -+}; -+ -+static E4_uint32 -+elan4_read_filter (ELAN4_DEV *dev, unsigned networkctx) -+{ -+ return (elan4_sdram_readl (dev, dev->dev_ctxtable + (networkctx * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, Filter))); -+} -+ -+static void -+elan4_write_filter (ELAN4_DEV *dev, unsigned networkctx, E4_uint32 value) -+{ -+ elan4_sdram_writel (dev, (dev->dev_ctxtable + (networkctx * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, Filter)), value); -+ pioflush_sdram(dev); -+} -+ -+void -+elan4_set_schedstatus (ELAN4_DEV *dev, E4_uint32 intreg) -+{ -+ E4_uint32 setbits = 0; -+ E4_uint32 intmask = 0; -+ E4_uint32 haltmask; -+ E4_uint32 next_sched; -+ E4_uint32 next_intmask; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intmask_lock, flags); -+ -+ haltmask = (dev->dev_haltop_mask | dev->dev_haltop_active); -+ -+ if ((haltmask & INT_DProcHalted) || dev->dev_halt_all_count || dev->dev_halt_dproc_count) -+ setbits |= SCH_DProcHalt; -+ -+ if ((haltmask & INT_TProcHalted) || dev->dev_halt_all_count || dev->dev_halt_tproc_count) -+ setbits |= SCH_TProcHalt; -+ -+ if ((haltmask & INT_CProcHalted) || dev->dev_halt_all_count || dev->dev_halt_cproc_count) -+ setbits |= SCH_CProcHalt; -+ -+ if ((haltmask & INT_DiscardingLowPri) || dev->dev_discard_all_count || dev->dev_discard_lowpri_count) -+ setbits |= SCH_DiscardLowPriInput; -+ -+ if ((haltmask & INT_DiscardingHighPri) || dev->dev_discard_all_count || dev->dev_discard_highpri_count) -+ setbits |= SCH_DiscardHighPriInput; -+ -+ if (dev->dev_halt_lowpri_count) -+ setbits |= SCH_StopLowPriQueues; -+ -+ if (haltmask & INT_DProcHalted) intmask |= INT_DProcHalted; -+ if (haltmask & INT_TProcHalted) intmask |= INT_TProcHalted; -+ if (haltmask & INT_CProcHalted) intmask |= INT_CProcHalted; -+ if (haltmask & INT_DiscardingLowPri) intmask |= INT_DiscardingLowPri; -+ if (haltmask & INT_DiscardingHighPri) intmask |= INT_DiscardingHighPri; -+ -+ next_intmask = (dev->dev_intmask & ~(INT_Halted | INT_Discarding)) | (intmask & ~intreg); -+ next_sched = (dev->dev_schedstatus & ~(SCH_Halt | SCH_Discard)) | setbits; -+ -+ PRINTF5 (DBG_DEVICE, DBG_REGISTER, "elan4_set_schedstatus: haltmask=%x setbits=%x intmask=%x next_sched=%x next_intmask=%x\n", -+ haltmask, setbits, intmask, next_sched, next_intmask); -+ -+ CHANGE_INT_MASK (dev, next_intmask); -+ CHANGE_SCHED_STATUS (dev, next_sched); -+ -+ spin_unlock_irqrestore (&dev->dev_intmask_lock, flags); -+} -+ -+void -+elan4_queue_haltop (ELAN4_DEV *dev, ELAN4_HALTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ /* add to the end of the halt operations list */ -+ list_add_tail (&op->op_link, &dev->dev_haltop_list); -+ -+ if ((dev->dev_haltop_mask & op->op_mask) != op->op_mask) -+ { -+ dev->dev_haltop_mask |= op->op_mask; -+ -+ elan4_set_schedstatus (dev, 0); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+void -+elan4_queue_intop (ELAN4_DEV *dev, ELAN4_CQ *cq, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ -+ op->op_cookie = INTOP_ONESHOT | ((dev->dev_intop_cookie++) & INTOP_VALUE_MASK); -+ -+ list_add_tail (&op->op_link, &dev->dev_intop_list); -+ -+ writeq ((op->op_cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD, cq->cq_mapping); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+void -+elan4_register_intop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ -+ op->op_cookie = INTOP_PERSISTENT | ((dev->dev_intop_cookie++) & INTOP_VALUE_MASK); -+ -+ list_add_tail (&op->op_link, &dev->dev_intop_list); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+void -+elan4_deregister_intop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ list_del (&op->op_link); -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+static __inline__ void -+__issue_dma_flushop_cmd (ELAN4_CQ *cq) -+{ -+ writeq (DMA_ShMemWrite | RUN_DMA_CMD, cq->cq_mapping); -+ writeq (0 /* cookie */, cq->cq_mapping); -+ writeq (0 /* vproc */, cq->cq_mapping); -+ writeq (0 /* srcAddr */, cq->cq_mapping); -+ writeq (0 /* dstAddr */, cq->cq_mapping); -+ writeq (0 /* srcEvent */, cq->cq_mapping); -+ writeq (0 /* dstEvent */, cq->cq_mapping); -+ writeq (SET_EVENT_CMD, cq->cq_mapping); -+} -+ -+static void -+handle_dma_flushops_intop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned int hipri = ((unsigned long) arg & 1); -+ E4_uint64 status = dev->dev_dma_flushop[hipri].status; -+ ELAN4_CQ *cq = dev->dev_dma_flushop[hipri].cq; -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ E4_uint32 completedPtr = CQ_CompletedPtr(queuePtrs); -+ E4_uint32 size = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ unsigned long flags; -+ -+ /* -+ * Since we're called from a main interrupt which was issued through the approriate -+ * flushcq the command queue descriptor for dma flushing can no longer be in the -+ * insert cache, nor can it be in the extractor (as it's trapped), hence it is -+ * safe to modify the completed pointer -+ */ -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ ASSERT (status != 0); -+ -+ /* skip over either the DMA/SETEVENT or just the SETEVENT depending on the trap type */ -+ if (CPROC_TrapType (status) == CommandProcDmaQueueOverflow) -+ completedPtr = (completedPtr & ~(size-1)) | ((completedPtr + 64) & (size - 1)); -+ else -+ completedPtr = (completedPtr & ~(size-1)) | ((completedPtr + 8) & (size - 1)); -+ -+ elan4_sdram_writel (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs) + 4, -+ ((queuePtrs >> 32) & ~CQ_PtrOffsetMask) | (completedPtr & CQ_PtrOffsetMask)); -+ -+ elan4_restartcq (dev, dev->dev_dma_flushop[hipri].cq); -+ -+ if (! list_empty (&dev->dev_dma_flushop[hipri].list)) -+ __issue_dma_flushop_cmd (dev->dev_dma_flushop[hipri].cq); -+ -+ dev->dev_dma_flushop[hipri].status = 0; -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+} -+ -+static void -+handle_dma_flushops (ELAN4_DEV *dev, E4_uint64 status, int cqnum) -+{ -+ unsigned int hipri = (cqnum == elan4_cq2num(dev->dev_dma_flushop[1].cq) ? 1 : 0); -+ ELAN4_CQ *cq = dev->dev_dma_flushop[hipri].cq; -+ ELAN4_CQ *flushq = dev->dev_flush_cq[elan4_cq2num(cq) & (COMMAND_INSERTER_CACHE_ENTRIES-1)]; -+ struct list_head *ops; -+ unsigned long flags; -+ int qfull,count; -+ E4_uint64 queuePtrs; -+ LIST_HEAD(list); -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ ASSERT (cqnum == elan4_cq2num (dev->dev_dma_flushop[hipri].cq)); -+ ASSERT (! list_empty (&dev->dev_dma_flushop[hipri].list)); -+ ASSERT (dev->dev_dma_flushop[hipri].status == 0); -+ -+ /* remove the whole list */ -+ ops = dev->dev_dma_flushop[hipri].list.next; -+ -+ list_del_init (&dev->dev_dma_flushop[hipri].list); -+ -+ /* and add it to our local list */ -+ list_add_tail (&list, ops); -+ -+ /* now determine whether the queue was full - since it cannot be empty -+ * then if the front and back pointers are the same then it is full */ -+ queuePtrs = hipri ? read_reg64 (dev, DProcHighPriPtrs) : read_reg64 (dev, DProcLowPriPtrs); -+ qfull = (E4_QueueFrontPointer (queuePtrs) == E4_QueueBackPointer (queuePtrs)); -+ -+ if (CPROC_TrapType(status) == CommandProcDmaQueueOverflow && !qfull) -+ printk (" ******* queue overflow trap - but queue not full\n"); -+ -+ if (qfull && CPROC_TrapType(status) != CommandProcDmaQueueOverflow) -+ printk (" ****** queue full - but not overflow trap : %llx %llx %x\n", -+ read_reg64 (dev, DProcLowPriPtrs), read_reg64 (dev, DProcHighPriPtrs), CPROC_TrapType(status)); -+ -+ /* Store the status register, this also indicates that the intop is pending */ -+ dev->dev_dma_flushop[hipri].status = status; -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ /* Issue a main interrupt command to the approriate flush command queue, -+ * which will then safely update the completed pointer to skip over the -+ * command which has trapped, also prevent any new commands to be issued -+ * to the command queue. -+ */ -+ dev->dev_dma_flushop[hipri].intop.op_function = handle_dma_flushops_intop; -+ dev->dev_dma_flushop[hipri].intop.op_arg = (void *) (unsigned long) hipri; -+ -+ elan4_queue_intop (dev, flushq, &dev->dev_dma_flushop[hipri].intop); -+ -+ /* now execute all operations */ -+ for (count = 0; ! list_empty (&list); count++) -+ { -+ ELAN4_DMA_FLUSHOP *op = list_entry (list.next, ELAN4_DMA_FLUSHOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ (*op->op_function) (dev, op->op_arg, qfull); -+ } -+ -+ /* finally release the "reasons" for halting */ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ if ((dev->dev_halt_dproc_count -= count) == 0) -+ elan4_set_schedstatus (dev, 0); -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ return; -+} -+ -+void -+elan4_queue_dma_flushop (ELAN4_DEV *dev, ELAN4_DMA_FLUSHOP *op, int hipri) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ if (dev->dev_halt_dproc_count++ == 0) /* ensure that the DMA processor cannot */ -+ elan4_set_schedstatus (dev, 0); /* execute the DMA we issue. */ -+ -+ if (list_empty (&dev->dev_dma_flushop[hipri].list) && dev->dev_dma_flushop[hipri].status == 0) -+ __issue_dma_flushop_cmd (dev->dev_dma_flushop[hipri].cq); -+ -+ list_add_tail (&op->op_link, &dev->dev_dma_flushop[hipri].list); -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+static void -+enable_elan_errors (void *arg) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+} -+ -+#define ERROR_DISABLE_PERIOD (hz/2) -+#define ERROR_SAMPLE_PERIOD (hz/10) -+#define ERROR_LIMIT (100) -+ -+static __inline__ void -+check_error_rate (ELAN4_DEV *dev) -+{ -+ if (dev->dev_error_time == (lbolt/ERROR_SAMPLE_PERIOD)) -+ { -+ if (++dev->dev_errors_per_period >= ERROR_LIMIT && (dev->dev_intmask & INT_ErrorInterrupts)) -+ { -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ schedule_timer_fn (&dev->dev_error_timeoutid, enable_elan_errors, (void *) dev, ERROR_DISABLE_PERIOD); -+ } -+ } -+ else -+ { -+ dev->dev_error_time = (lbolt/ERROR_SAMPLE_PERIOD); -+ dev->dev_errors_per_period = 0; -+ } -+} -+ -+static __inline__ int -+handle_mainints (ELAN4_DEV *dev, int nticks, int nintr) -+{ -+ E4_uint32 nfptr = dev->dev_interruptq_nfptr; -+ E4_uint32 bptr = read_reg32 (dev, MainIntQueuePtrs.s.Back); -+ E4_uint32 qsize = E4_QueueSize(elan4_interruptq_size); -+ E4_uint32 qmask = qsize - 1; -+ long tlim = lbolt + nticks; -+ int done = 0; -+ unsigned long flags; -+ -+ do { -+ int todo = ((bptr - nfptr) & qmask) / E4_MainIntEntrySize; -+ -+ ASSERT (todo > 0); -+ -+ PRINTF4 (DBG_DEVICE, DBG_MAININT, "handle_mainints: fptr %x nfptr %x bptr %x : %d todo\n", -+ read_reg32 (dev, MainIntQueuePtrs.s.Front), nfptr, bptr, todo); -+ -+ if (nintr >= 0 && (done + todo) > nintr) /* punt because too may to do in interrupt */ -+ { -+ PRINTF4 (DBG_DEVICE, DBG_MAININT, "handle_mainints: punting (done %d todo %d) (bptr %x fptr %x)\n", -+ done, todo, bptr, read_reg32 (dev, MainIntQueuePtrs.s.Front)); -+ -+ return 1; -+ } -+ -+ BucketDevStat (dev, s_mainints, todo, MainIntBuckets); -+ -+ /* consume all the entries in the queue which we think are there */ -+ do { -+ E4_uint64 value = elan4_sdram_readq (dev, nfptr); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, E4_MAIN_INT_CTX (value)); -+ E4_uint32 fptr = nfptr; -+ -+ PRINTF2 (DBG_DEVICE, DBG_MAININT, "handle_mainints: process cookie %llx - write fptr=%x\n", value, nfptr); -+ -+ if (ctxt == NULL) -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "handle_mainints: context %d invalid\n", E4_MAIN_INT_CTX (value)); -+ else -+ ctxt->ctxt_ops->op_interrupt (ctxt, E4_MAIN_INT_COOKIE(value)); -+ -+ /* compute the next queue front pointer, before updating the front pointer -+ * since we need to ensure that elan4_queue_mainintop doesn't see the queue -+ * as being empty if an extra interrupt is queued in between */ -+ dev->dev_interruptq_nfptr = nfptr = (nfptr & ~qmask) | ((nfptr + sizeof (E4_uint64)) & qmask); -+ -+ /* update the queue front pointer, doing this will clear the -+ * interrupt for *all* interrupt cookies which have previously -+ * been added to the queue */ -+ write_reg32 (dev, MainIntQueuePtrs.s.Front, E4_QueueFrontValue (fptr, elan4_interruptq_size)); -+ pioflush_reg (dev); -+ } while (bptr != nfptr); -+ -+ /* re-sample the back pointer and if it's different from the previous -+ * queue front pointer, then the queue has something on it again */ -+ done += todo; -+ -+ if ((nticks > 0 && ((int) (lbolt - tlim)) > 0)) /* been executing for too long in thread */ -+ return 1; -+ -+ bptr = read_reg32 (dev, MainIntQueuePtrs.s.Back); -+ -+ PRINTF3 (DBG_DEVICE, DBG_MAININT, "handle_mainints: resample : fptr %x nfptr %x bptr %x\n", -+ read_reg32 (dev, MainIntQueuePtrs.s.Front), nfptr, bptr); -+ -+ /* at this point we've made some space in the interrupt queue, -+ * so check to see if we've got anything to restart */ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ while (! list_empty (&dev->dev_interruptq_list)) -+ { -+ ELAN4_INTOP *op = list_entry (dev->dev_interruptq_list.next, ELAN4_INTOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ op->op_function (dev, op->op_arg); -+ } -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ } while (bptr != nfptr); -+ -+ return 0; -+} -+ -+static void -+elan4_mainint_thread (ELAN4_DEV *dev) -+{ -+ unsigned long flags; -+ -+ kernel_thread_init ("elan4_mainint"); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ for (;;) -+ { -+ if (dev->dev_stop_threads) -+ break; -+ -+ if (! (dev->dev_intmask & INT_MainInterrupt)) -+ { -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ if (handle_mainints (dev, elan4_mainint_resched_ticks, -1)) -+ BumpDevStat (dev, s_mainint_rescheds); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ ENABLE_INT_MASK (dev, INT_MainInterrupt); -+ } -+ -+ kcondvar_wait (&dev->dev_mainint_wait, &dev->dev_mainint_lock, &flags); -+ } -+ -+ dev->dev_mainint_stopped = 1; -+ kcondvar_wakeupall (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ kernel_thread_exit(); -+} -+ -+void -+elan4_queue_mainintop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ if (dev->dev_interruptq_nfptr == read_reg32 (dev, MainIntQueuePtrs.s.Back)) -+ op->op_function (dev, op->op_arg); -+ else -+ list_add_tail (&op->op_link, &dev->dev_interruptq_list); -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+} -+ -+static __inline__ E4_uint32 -+handle_cproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint32 cqptr = read_reg32 (dev, CommandControl.CommandQueueDescsBase) & E4_QueueDescPtrMask; -+ unsigned cqnum = ((cqptr - dev->dev_cqaddr) / sizeof (E4_CommandQueueDesc)); -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 control = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ E4_uint64 status = read_reg64 (dev, CProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, CQ_Context (control)); -+ -+ PRINTF4 (DBG_DEVICE, DBG_INTR, "handle_cproc_trap: cqnum=%d status=%016llx control=%016llx TrapType\n", -+ cqnum, status, control, CPROC_TrapType (status)); -+ PRINTF4 (DBG_DEVICE, DBG_INTR, " %016llx %016llx %016llx %016llx\n", -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control))); -+ -+ BumpDevStat (dev, s_cproc_traps); -+ -+ ctxt->ctxt_ops->op_cproc_trap (ctxt, status, cqnum); -+ -+ return (CPROC_TrapType (status) == CommandProcWaitTrap ? SCH_RestartCProc | SCH_RestartEProc : SCH_RestartCProc); -+} -+ -+static __inline__ E4_uint32 -+handle_dproc_trap (ELAN4_DEV *dev, int unit) -+{ -+ E4_uint64 status = (unit == 0) ? read_reg64 (dev, DProc0Status) : read_reg64 (dev, DProc1Status); -+ E4_uint32 restart = (unit == 0) ? SCH_RestartDma0Proc : SCH_RestartDma1Proc; -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, DPROC_Context (status)); -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "handle_dproc_trap: unit %d context %d%s\n", unit, DPROC_Context(status), -+ DPROC_PrefetcherFault(status) ? " (prefetcher)" : ""); -+ -+ if (DPROC_PrefetcherFault (status)) -+ restart |= SCH_RestartDmaPrefetchProc; -+ -+ BumpDevStat (dev, s_dproc_traps); -+ -+ ctxt->ctxt_ops->op_dproc_trap (ctxt, status, unit); -+ -+ return (restart); -+} -+ -+static __inline__ E4_uint32 -+handle_eproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, EProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, EPROC_Context (status)); -+ -+ BumpDevStat (dev, s_eproc_traps); -+ -+ ctxt->ctxt_ops->op_eproc_trap (ctxt, status); -+ -+ return (SCH_RestartEProc); -+} -+ -+static __inline__ E4_uint32 -+handle_tproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, TProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, TPROC_Context (status)); -+ -+ BumpDevStat (dev, s_tproc_traps); -+ -+ ctxt->ctxt_ops->op_tproc_trap (ctxt, status); -+ -+ return (SCH_RestartTProc); -+} -+ -+static __inline__ void -+handle_haltints (ELAN4_DEV *dev, E4_uint32 intreg) -+{ -+ struct list_head list = LIST_HEAD_INIT(list); -+ E4_uint32 mask = 0; -+ E4_uint32 active = 0; -+ struct list_head *entry; -+ struct list_head *next; -+ unsigned long flags; -+ -+ BumpDevStat (dev, s_haltints); -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ list_for_each_safe (entry, next, &dev->dev_haltop_list) { -+ ELAN4_HALTOP *op = list_entry (entry, ELAN4_HALTOP, op_link); -+ -+ PRINTF (DBG_DEVICE, DBG_INTR, "handle_haltints: op=%p op_mask=%x intreg=%x\n", op, op->op_mask, intreg); -+ -+ if ((op->op_mask & intreg) != op->op_mask) -+ mask |= op->op_mask; -+ else -+ { -+ list_del (&op->op_link); /* remove from list */ -+ list_add_tail (&op->op_link, &list); /* add to local list */ -+ -+ active |= op->op_mask; -+ } -+ } -+ -+ ASSERT (dev->dev_haltop_mask == (mask | active)); -+ -+ dev->dev_haltop_mask = mask; -+ -+ if (list_empty (&list)) -+ elan4_set_schedstatus (dev, intreg); -+ else -+ { -+ dev->dev_haltop_active = active; -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ ELAN4_HALTOP *op = list_entry (list.next, ELAN4_HALTOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ (*op->op_function) (dev, op->op_arg); -+ } -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ dev->dev_haltop_active = 0; -+ -+ elan4_set_schedstatus (dev, 0); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+static __inline__ E4_uint32 -+handle_iproc_trap (ELAN4_DEV *dev, unsigned unit) -+{ -+ sdramaddr_t hdroff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrHeader[0][unit]); -+ E4_uint64 status = elan4_sdram_readq (dev, hdroff + offsetof (E4_IprocTrapHeader, IProcStatusCntxAndTrType)); -+ E4_uint32 filter = elan4_read_filter (dev, IPROC_NetworkContext (status)); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, filter & E4_FILTER_CONTEXT_MASK); -+ -+ /* -+ * The context is not valid in the following case : -+ * ack not been sent AND bad CRC/bad length. -+ * -+ * NOTE TransCRCStatus and BadLength only valid if NOT an EopTrap. -+ */ -+ ASSERT ((IPROC_GoodAckSent (status) & (1 << IPROC_InputterChan (status))) || IPROC_EOPTrap (status) || -+ (IPROC_TransCRCStatus (status) == CRC_STATUS_GOOD && !IPROC_BadLength (status))); -+ -+ BumpDevStat (dev, s_iproc_traps); -+ -+ ctxt->ctxt_ops->op_iproc_trap (ctxt, status, unit); -+ -+ return (SCH_RestartCh0LowPriInput << unit); -+} -+ -+void -+handle_pcimemerr (ELAN4_DEV *dev) -+{ -+ elan4_pcierror (dev); -+ -+ check_error_rate (dev); -+} -+ -+void -+handle_sdramint (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, SDRamECCStatus); -+ char errstr[200]; -+ int i; -+ int Found = 0; -+ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "handle_sdramint\n"); -+ -+ /* search for this error already being logged */ -+ for (i = sizeof (dev->dev_sdramerrs)/sizeof (dev->dev_sdramerrs[0]) - 1; i >= 0; i--) -+ if ((dev->dev_sdramerrs[i] & 0x000fffffffffffffULL) == status) -+ { -+ Found = 1; -+ dev->dev_sdramerrs[i] += 10000000000000ULL; // Keep a count. -+ break; -+ } -+ -+ /* stash the status for /proc */ -+ if (!Found) -+ { -+ for (i = sizeof (dev->dev_sdramerrs)/sizeof (dev->dev_sdramerrs[0]) - 1; i > 0; i--) -+ dev->dev_sdramerrs[i] = dev->dev_sdramerrs[i-1]; -+ dev->dev_sdramerrs[0] = status; -+ } -+ -+ printk ("elan%d: ECC Error %s\n", dev->dev_instance, elan4_sdramerr2str (dev, status, errstr)); -+ -+ if (!ECC_UncorrectableErr(status) && !ECC_MultUncorrectErrs(status)) -+ printk ("elan%d: ECC error data=%016llx\n", dev->dev_instance, elan4_sdram_readq (dev, ECC_Addr(status))); -+ -+ if (ECC_CorrectableErr (status)) -+ BumpDevStat (dev, s_correctable_errors); -+ if (ECC_MultCorrectErrs (status)) -+ BumpDevStat (dev, s_multiple_errors); -+ -+ if (ECC_UncorrectableErr(status)) -+ panic ("elan%d: uncorrectable ECC error\n", dev->dev_instance); -+ if (ECC_MultUncorrectErrs(status)) -+ panic ("elan%d: muliple uncorrectable ECC error\n", dev->dev_instance); -+ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_SDRAM_ERROR); -+ -+ check_error_rate (dev); -+} -+ -+static void -+clear_linkerr_led (void *arg) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_ClearLinkError); -+} -+ -+void -+handle_linkerror (ELAN4_DEV *dev) -+{ -+ E4_uint32 LinkState; -+ E4_uint32 CurrState = read_reg32 (dev, LinkControlReg); -+ -+ /* Set for reading errors. */ -+ write_reg32 (dev, LinkControlReg, -+ (CurrState = CurrState & ~((LCONT_TEST_CONTROL_MASK << LCONT_TEST_CONTROL_SHIFT) | -+ (LCONT_TEST_VALUE_MASK << LCONT_TEST_VALUE_SHIFT)))); -+ LinkState = LCONT_LINK_STATE(CurrState = read_reg32 (dev, LinkControlReg)); -+ -+#ifdef DEBUG -+ { -+ E4_uint8 ErrorMsg[256], DataErrorVal[64]; -+ -+ strcpy (ErrorMsg, "handle_linkerror:"); -+ if (LinkState & LS_LockError) strcat (ErrorMsg, " LockError"); -+ if (LinkState & LS_DeskewError) strcat (ErrorMsg, " DeskewError"); -+ if (LinkState & LS_PhaseError) strcat (ErrorMsg, " PhaseError"); -+ if (LinkState & LS_DataError) -+ { -+ E4_uint32 error[4]; -+ E4_uint32 i; -+ strcat (ErrorMsg, " DataError"); -+ /* Errors */ -+ for(i = LRS_ErrorVal8to0; i <= LRS_ErrorVal35to27; i++) -+ { -+ write_reg32 (dev, LinkControlReg, -+ CurrState | LCONT_TEST_VALUE(i) | (LCONT_READ_STATE << LCONT_TEST_CONTROL_SHIFT)); -+ error[i - LRS_ErrorVal8to0] = LCONT_LINK_STATE(read_reg32 (dev, LinkControlReg)); -+ } -+ sprintf (DataErrorVal, " Link State Error Val: %09llx %03x %03x %03x %03x", -+ (unsigned long long) ((error[0] & 0x1ffUL) | ((error[1] & 0x1ffUL) << 9) | -+ ((error[2] & 0x1ffUL) << 18) | ((error[3] & 0x1ffUL) << 27)), -+ error[3], error[2], error[1], error[0]); -+ strcat (ErrorMsg, DataErrorVal); -+ } -+ if (LinkState & LS_FifoOvFlow0) strcat (ErrorMsg, " FifoOvFlow0"); -+ if (LinkState & LS_FifoOvFlow1) strcat (ErrorMsg, " FifoOvFlow1"); -+ if (LinkState & LS_Mod45Changed) strcat (ErrorMsg, " Mod45Changed"); -+ if (LinkState & LS_PAckNotSeenError) strcat (ErrorMsg, " PAckNotSeenError"); -+ strcat (ErrorMsg, "\n"); -+ PRINTF0 (DBG_DEVICE, DBG_INTR, ErrorMsg); -+ } -+#endif -+ -+ BumpDevStat (dev, s_link_errors); -+ -+ if (LinkState & LS_LockError) BumpDevStat (dev, s_lock_errors); -+ if (LinkState & LS_DeskewError) BumpDevStat (dev, s_deskew_errors); -+ if (LinkState & LS_PhaseError) BumpDevStat (dev, s_phase_errors); -+ if (LinkState & LS_DataError) BumpDevStat (dev, s_data_errors); -+ if (LinkState & LS_FifoOvFlow0) BumpDevStat (dev, s_fifo_overflow0); -+ if (LinkState & LS_FifoOvFlow1) BumpDevStat (dev, s_fifo_overflow1); -+ if (LinkState & LS_Mod45Changed) BumpDevStat (dev, s_mod45changed); -+ if (LinkState & LS_PAckNotSeenError) BumpDevStat (dev, s_pack_not_seen); -+ -+ PULSE_SCHED_RESTART (dev, SCH_ClearLinkErrorInt); -+ -+ /* schedule a timer to clear the link error LED, so that it stays on -+ * for a second for every link error that occurs */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && !timer_fn_queued (&dev->dev_linkerr_timeoutid)) -+ schedule_timer_fn (&dev->dev_linkerr_timeoutid, clear_linkerr_led, (void *) dev, HZ); -+ -+ check_error_rate (dev); -+} -+ -+void -+handle_linkportkeyfail (ELAN4_DEV *dev) -+{ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "handle_linkportkeyfail\n"); -+ -+ BumpDevStat (dev, s_linkport_keyfail); -+ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_LINKPORT_INT); -+ -+ check_error_rate (dev); -+} -+ -+ -+static __inline__ void -+__elan4_4msi0 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ unsigned long flags; -+ -+ if (intreg & intmask & INT_MainInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_MainInterrupt); -+ -+ if (handle_mainints (dev, -1, elan4_mainint_punt_loops) == 0) -+ ENABLE_INT_MASK (dev, INT_MainInterrupt); -+ else -+ { -+ BumpDevStat (dev, s_mainint_punts); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ kcondvar_wakeupone (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ } -+ } -+} -+ -+static __inline__ void -+__elan4_4msi1 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ E4_uint32 restart = 0; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi1: %x\n", intreg); -+ -+ spin_lock (&dev->dev_trap_lock); -+ -+ if (intreg & intmask & INT_CProc) -+ restart |= handle_cproc_trap (dev); -+ if (intreg & intmask & INT_EProc) -+ restart |= handle_eproc_trap (dev); -+ if (intreg & intmask & INT_Dma0Proc) -+ restart |= handle_dproc_trap (dev, 0); -+ if (intreg & intmask & INT_Dma1Proc) -+ restart |= handle_dproc_trap (dev, 1); -+ if (intreg & intmask & INT_TProc) -+ restart |= handle_tproc_trap (dev); -+ -+ PULSE_SCHED_RESTART (dev, restart); -+ -+ spin_unlock (&dev->dev_trap_lock); -+ -+ if (intreg & (INT_Halted|INT_Discarding)) -+ handle_haltints (dev, intreg); -+} -+ -+static __inline__ void -+__elan4_4msi2 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ E4_uint32 restart = 0; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi2: %x\n", intreg); -+ -+ spin_lock (&dev->dev_trap_lock); -+ if (intreg & intmask & INT_IProcCh0LowPri) -+ restart |= handle_iproc_trap (dev, 0); -+ -+ if (intreg & intmask & INT_IProcCh1LowPri) -+ restart |= handle_iproc_trap (dev, 1); -+ -+ if (intreg & intmask & INT_IProcCh0HighPri) -+ restart |= handle_iproc_trap (dev, 2); -+ -+ if (intreg & intmask & INT_IProcCh1HighPri) -+ restart |= handle_iproc_trap (dev, 3); -+ -+ PULSE_SCHED_RESTART (dev, restart); -+ -+ spin_unlock (&dev->dev_trap_lock); -+} -+ -+static __inline__ void -+__elan4_4msi3 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi3: %x\n", intreg); -+ -+ if (intreg & intmask & INT_PciMemErr) -+ handle_pcimemerr (dev); -+ -+ if (intreg & intmask & INT_SDRamInt) -+ handle_sdramint (dev); -+ -+ if (intreg & intmask & INT_LinkError) -+ handle_linkerror (dev); -+ -+ if (intreg & intmask & INT_LinkPortKeyFail) -+ handle_linkportkeyfail (dev); -+} -+ -+int -+elan4_1msi0 (ELAN4_DEV *dev) -+{ -+ E4_uint32 intmask = dev->dev_intmask; -+ E4_uint32 intreg; -+ -+ if (intmask == 0 || ((intreg = read_reg32 (dev, InterruptReg)) & intmask) == 0) -+ return (0); -+ -+ BumpDevStat (dev, s_interrupts); -+ -+ do { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "elan4_1msi0: %x\n", intreg); -+ -+ if (intreg & intmask & INT_MSI0) -+ __elan4_4msi0(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI1) -+ __elan4_4msi1(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI2) -+ __elan4_4msi2(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI3) -+ __elan4_4msi3(dev, intreg, intmask); -+ -+ /* must ensure that the read of the interrupt mask -+ * completes before the read of the interrupt register -+ * since the main interrupt thread clears it's interrupt -+ * and then re-enables it in the interrupt mask. */ -+ intmask = dev->dev_intmask; -+ mb(); -+ intreg = read_reg32 (dev, InterruptReg); -+ -+ } while ((intreg & intmask) != 0); -+ -+ return (1); -+} -+ -+/* local context management */ -+int -+elan4_insertctxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt, ELAN4_TRAP_OPS *ops) -+{ -+ unsigned long flags; -+ int tbl; -+ -+ ctxt->ctxt_dev = dev; -+ ctxt->ctxt_ops = ops; -+ -+ INIT_LIST_HEAD (&ctxt->ctxt_cqalist); -+ spin_lock_init (&ctxt->ctxt_mmulock); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ KMEM_ZALLOC (ctxt->ctxt_mmuhash[tbl], ELAN4_HASH_ENTRY **, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *), 1); -+ -+ if (ctxt->ctxt_mmuhash[tbl] == NULL) -+ { -+ if (tbl != 0) -+ KMEM_FREE (ctxt->ctxt_mmuhash[0], dev->dev_hashsize[0] * sizeof (ELAN4_HASH_ENTRY *)); -+ spin_lock_destroy (&ctxt->ctxt_mmulock); -+ return (-ENOMEM); -+ } -+ } -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ if ((ctxt->ctxt_num = bt_freebit (dev->dev_ctxmap, (1 << dev->dev_ctxtableshift))) >= 0) -+ { -+ /* chain onto the lists of all contexts */ -+ list_add (&ctxt->ctxt_link, &dev->dev_ctxt_list); -+ -+ BT_SET (dev->dev_ctxmap, ctxt->ctxt_num); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return (ctxt->ctxt_num < 0 ? -ENOMEM : 0); -+} -+ -+void -+elan4_removectxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt) -+{ -+ unsigned long flags; -+ int tbl; -+ -+ /* remove from list of contexts */ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ list_del (&ctxt->ctxt_link); -+ -+ BT_CLEAR (dev->dev_ctxmap, ctxt->ctxt_num); -+ -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ spin_lock_destroy (&ctxt->ctxt_info_lock); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ KMEM_FREE (ctxt->ctxt_mmuhash[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *)); -+ -+ spin_lock_destroy (&ctxt->ctxt_mmulock); -+} -+ -+ELAN4_CTXT * -+elan4_localctxt (ELAN4_DEV *dev, unsigned num) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ list_for_each (entry, &dev->dev_ctxt_list) { -+ ELAN4_CTXT *ctxt = list_entry (entry, ELAN4_CTXT, ctxt_link); -+ -+ if (ctxt->ctxt_num == num) -+ { -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ return (ctxt); -+ } -+ } -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return ((ELAN4_CTXT *) NULL); -+} -+ -+ELAN4_CTXT * -+elan4_networkctxt (ELAN4_DEV *dev, unsigned num) -+{ -+ E4_uint32 filter = elan4_read_filter (dev, num); -+ -+ if ((filter & E4_FILTER_CONTEXT_MASK) == INVALID_CONTEXT) -+ return NULL; -+ else -+ return elan4_localctxt (dev, filter & E4_FILTER_CONTEXT_MASK); -+} -+ -+/* network context management */ -+int -+elan4_attach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int res = 0; -+ E4_uint32 filter; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ filter = elan4_read_filter (dev, ctxnum); -+ if ((filter & E4_FILTER_CONTEXT_MASK) != INVALID_CONTEXT) -+ { -+ PRINTF2 (ctxt, DBG_NETWORK_CTX, "elan4_attach_filter: ctx=%d filter=%x -> EBUSY\n", ctxnum, filter); -+ res = -EBUSY; -+ } -+ else -+ { -+ PRINTF1 (ctxt, DBG_NETWORK_CTX, "elan4_attach_filter: ctx=%d - SUCCESS\n", ctxnum); -+ -+ elan4_write_filter (dev, ctxnum, ctxt->ctxt_num | E4_FILTER_DISCARD_ALL); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+ } -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return (res); -+} -+ -+void -+elan4_detach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ PRINTF1 (ctxt, DBG_NETWORK_CTX, "elan4_detach_filter: detach from network context %d\n", ctxnum); -+ -+ elan4_write_filter (dev, ctxnum, INVALID_CONTEXT | E4_FILTER_DISCARD_ALL); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+} -+ -+void -+elan4_set_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum, E4_uint32 state) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ PRINTF6 (ctxt, DBG_NETWORK_CTX, "elan4_set_filter: set filter state %x for network context %d <%s%s%s%s>\n", state, ctxnum, -+ (state & E4_FILTER_DISCARD_ALL) ? "discard," : "", -+ (state & E4_FILTER_ACKOK_ALL) ? "ack-ok," : "", -+ (state & E4_FILTER_HIGH_PRI) ? "high-pri," : "", -+ (state & E4_FILTER_STATS) ? "stats," : ""); -+ -+ elan4_write_filter (dev, ctxnum, ctxt->ctxt_num | state); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+} -+ -+void -+elan4_set_routetable (ELAN4_CTXT *ctxt, ELAN4_ROUTE_TABLE *tbl) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_uint32 value = tbl ? (E4_VPT_VALID | E4_VPT_VALUE(tbl->tbl_entries, tbl->tbl_size)) : 0; -+ -+ /* and insert into the vp table */ -+ elan4_sdram_writel (dev, (dev->dev_ctxtable + (ctxt->ctxt_num * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, VirtualProcessTable)), value); -+ pioflush_sdram(dev); -+ -+ PULSE_SYSCONTROL (dev, CONT_ROUTE_FLUSH); -+} -+ -+/* command queue management */ -+ELAN4_CQA * -+elan4_getcqa (ELAN4_CTXT *ctxt, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ struct list_head *el; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each (el, &ctxt->ctxt_cqalist) { -+ ELAN4_CQA *cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_idx == idx) -+ { -+ cqa->cqa_ref++; -+ -+ spin_unlock (&dev->dev_cqlock); -+ return cqa; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ return NULL; -+} -+ -+void -+elan4_putcqa (ELAN4_CTXT *ctxt, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ struct list_head *el, *nel; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each_safe (el, nel, &ctxt->ctxt_cqalist) { -+ ELAN4_CQA *cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_idx == idx) -+ { -+ if (--cqa->cqa_ref || bt_lowbit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA) != -1) -+ spin_unlock (&dev->dev_cqlock); -+ else -+ { -+ list_del (&cqa->cqa_link); -+ -+ BT_CLEAR (ctxt->ctxt_cqamap, cqa->cqa_idx); -+ BT_CLEAR (dev->dev_cqamap, cqa->cqa_cqnum/ELAN4_CQ_PER_CQA); -+ spin_unlock (&dev->dev_cqlock); -+ -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ } -+ return; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ -+ printk ("elan4_putcqa: idx %d not found\n", idx); -+ BUG(); -+} -+ -+static ELAN4_CQ * -+elan4_getcq (ELAN4_CTXT *ctxt, unsigned int type) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQA *cqa; -+ struct list_head *el; -+ int cidx, didx; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each (el, &ctxt->ctxt_cqalist) { -+ cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_type == type && (cidx = bt_freebit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA)) >=0) -+ { -+ BT_SET (cqa->cqa_bitmap, cidx); -+ -+ spin_unlock (&dev->dev_cqlock); -+ return &cqa->cqa_cq[cidx]; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ -+ /* allocate a new cqa and it's chunk of command queue descriptors */ -+ KMEM_ZALLOC (cqa, ELAN4_CQA *, sizeof (ELAN4_CQA), 1); -+ if (cqa == NULL) -+ return NULL; -+ -+ spin_lock (&dev->dev_cqlock); -+ cidx = bt_freebit (ctxt->ctxt_cqamap, ELAN4_MAX_CQA); -+ -+ /* On architectures which have MTRR registers for write-combinig -+ * the top command queues from dev->dev_cqreorder upwards are -+ * used for reordered queues. Without MTRR registers any page -+ * sized group can use write combinig through the ptes. */ -+#ifdef CONFIG_MTRR -+ if ((type & CQ_Reorder) != 0) -+ didx = bt_nextbit (dev->dev_cqamap, dev->dev_cqcount, dev->dev_cqreorder - 1, 0); -+ else -+ didx = bt_freebit (dev->dev_cqamap, dev->dev_cqreorder); -+#else -+ didx = bt_freebit (dev->dev_cqamap, dev->dev_cqcount); -+#endif -+ -+ if (cidx < 0 || didx < 0) -+ { -+ spin_unlock (&dev->dev_cqlock); -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ return NULL; -+ } -+ -+ BT_SET (ctxt->ctxt_cqamap, cidx); -+ BT_SET (dev->dev_cqamap, didx); -+ -+ cqa->cqa_idx = cidx; -+ cqa->cqa_type = type; -+ cqa->cqa_cqnum = (didx * ELAN4_CQ_PER_CQA); -+ -+ list_add_tail (&cqa->cqa_link, &ctxt->ctxt_cqalist); -+ -+ /* initialise the cqa struct */ -+ for (cidx = 0; cidx < ELAN4_CQ_PER_CQA; cidx++) -+ { -+ cqa->cqa_cq[cidx].cq_idx = cidx; -+ cqa->cqa_cq[cidx].cq_cqa = cqa; -+ } -+ -+ /* no mappings yet */ -+ cqa->cqa_ref = 0; -+ -+ /* we're going to return entry zero */ -+ BT_SET (cqa->cqa_bitmap, 0); -+ spin_unlock (&dev->dev_cqlock); -+ -+ return &cqa->cqa_cq[0]; -+} -+ -+static void -+elan4_putcq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQA *cqa = cq->cq_cqa; -+ -+ spin_lock (&dev->dev_cqlock); -+ -+ BT_CLEAR (cqa->cqa_bitmap, cq->cq_idx); -+ -+ if (bt_lowbit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA) != -1 || cqa->cqa_ref) -+ spin_unlock (&dev->dev_cqlock); -+ else -+ { -+ list_del (&cqa->cqa_link); -+ -+ BT_CLEAR (ctxt->ctxt_cqamap, cqa->cqa_idx); -+ BT_CLEAR (dev->dev_cqamap, cqa->cqa_cqnum/ELAN4_CQ_PER_CQA); -+ spin_unlock (&dev->dev_cqlock); -+ -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ } -+} -+ -+ELAN4_CQ * -+elan4_alloccq (ELAN4_CTXT *ctxt, unsigned cqsize, unsigned perm, unsigned cqtype) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQ *cq; -+ int cqnum; -+ sdramaddr_t cqdesc; -+ unsigned offset; -+ E4_uint64 value; -+ -+ if ((cq = elan4_getcq (ctxt, cqtype)) == NULL) -+ return NULL; -+ -+ cqnum = elan4_cq2num(cq); -+ -+ cq->cq_space = elan4_sdram_alloc (dev, CQ_Size(cqsize)); -+ if (cq->cq_space == (virtaddr_t) 0) -+ { -+ elan4_putcq (ctxt, cq); -+ return (NULL); -+ } -+ -+ cq->cq_size = cqsize; -+ cq->cq_perm = perm; -+ -+ /* and finally initialise the command queue descriptor */ -+ cqdesc = dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)); -+ -+ value = CQ_QueuePtrsValue (cqsize, cq->cq_space, cq->cq_space); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ value |= ((cqtype & CQ_Priority) ? CQ_RevA_Priority : 0); -+ else -+ value |= (((cqtype & CQ_Priority) ? CQ_RevB_Priority : 0) | -+ ((cqtype & CQ_Reorder) ? CQ_RevB_ReorderingQueue : CQ_RevB_32bitWriteQueue)); -+ -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs), value); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue), 0); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers), 0); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control), CQ_ControlValue (ctxt->ctxt_num, 2, perm)); -+ pioflush_sdram (dev); -+ -+ offset = (cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ cq->cq_mapping = elan4_map_device (dev, ELAN4_BAR_REGISTERS, (offset & ~(PAGE_SIZE-1)), -+ PAGE_SIZE, &cq->cq_handle) + (offset & (PAGE_SIZE-1)); -+#ifdef CONFIG_MPSAS -+ if (ctxt == &dev->dev_ctxt) -+ return (cq); -+#endif -+ -+ elan4_sdram_flushcache (dev, cq->cq_space, CQ_Size(cqsize)); -+ -+ return (cq); -+} -+ -+void -+elan4_freecq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned offset = (elan4_cq2num(cq) + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ elan4_flushcq (dev, cq); -+ -+ elan4_unmap_device (dev, cq->cq_mapping - (offset & (PAGE_SIZE-1)), PAGE_SIZE, &cq->cq_handle); -+ elan4_sdram_free (dev, cq->cq_space, CQ_Size (cq->cq_size)); -+ -+ elan4_putcq (ctxt, cq); -+} -+ -+void -+elan4_restartcq (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ int hipri; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restarting cq %p\n", cq); -+ -+ spin_lock_irqsave (&dev->dev_requeue_lock, flags); -+ -+ while (read_reg32 (dev, CommandControl.CommandRequeuePtr) & E4_CommandRequeueBusy) -+ ; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ hipri = (elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)) & CQ_RevA_Priority) != 0; -+ else -+ hipri = (elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)) & CQ_RevB_Priority) != 0; -+ -+ if (hipri) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restart cq %d as high pri\n", elan4_cq2num(cq)); -+ write_reg32 (dev, CommandControl.CommandRequeuePtr, cqdesc | E4_CommandRequeueHighPri); -+ } -+ else -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restart cq %d as low pri\n", elan4_cq2num(cq)); -+ write_reg32 (dev, CommandControl.CommandRequeuePtr, cqdesc); -+ } -+ pioflush_reg (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_requeue_lock, flags); -+} -+ -+static void -+flushcq_intop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ dev->dev_flush_finished |= (1 << (unsigned long) arg); -+ kcondvar_wakeupall (&dev->dev_flush_wait, &dev->dev_flush_lock); -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+void -+elan4_flushcq (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ int flushqnum = elan4_cq2num(cq) & (COMMAND_INSERTER_CACHE_ENTRIES-1); -+ ELAN4_CQ *flushq = dev->dev_flush_cq[flushqnum]; -+ unsigned long flags; -+ -+ PRINTF (DBG_DEVICE, DBG_FLUSH, "elan4_flushcq: cqnum=%d\n", elan4_cq2num(cq)); -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ while (! (dev->dev_flush_finished & (1 << flushqnum))) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ dev->dev_flush_finished &= ~(1 << flushqnum); -+ -+ dev->dev_flush_op[flushqnum].op_function = flushcq_intop; -+ dev->dev_flush_op[flushqnum].op_arg = (void *) (unsigned long) flushqnum; -+ -+ elan4_queue_intop (dev, flushq, &dev->dev_flush_op[flushqnum]); -+ -+ while (! (dev->dev_flush_finished & (1 << flushqnum))) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_updatecq (ELAN4_DEV *dev, ELAN4_CQ *cq, unsigned perm, unsigned restart) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint32 control = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ -+ /* Write the command queues control word, but ensure that the ChannelNotCompleted fields -+ * are not modified. We use this to just alter the RestartCount/Permissions fields */ -+ -+ elan4_sdram_writel (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control), -+ CQ_ControlValue (CQ_Context (control), restart ? restart : CQ_RestartCount (control), perm)); -+} -+ -+/* instruction cache flush */ -+static __inline__ void -+elan4_flush_icache_locked (ELAN4_DEV *dev) -+{ -+ int i, j; -+ -+ PRINTF0 (DBG_DEVICE, DBG_FLUSH, "elan4_flush_icache_locked: flushing icache\n"); -+ -+ for (i = 0; i < (E4_ICacheLines/E4_ICachePortSize); i++) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, i << E4_ICacheTagAddrShift); -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], E4_InvalidTagValue); -+ } -+ -+ /* -+ * Initialise the top of the ICache Set0 with a instruction which will -+ * cause a know trap fingerprint so that the application can identify it -+ * and ignore the trap. -+ */ -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_ICacheFixupOffset | E4_AccessICacheRams); -+ -+ /* Errata 24: must ensure that the DCache is flushed after loading -+ * code for the thread processor. */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ pioflush_reg (dev); -+} -+ -+static void -+device_iflush_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ elan4_flush_icache_locked (dev); -+ -+ dev->dev_iflush_queued = 0; -+ -+ kcondvar_wakeupall (&dev->dev_flush_wait, &dev->dev_flush_lock); -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_flush_icache_halted (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ elan4_flush_icache_locked (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_flush_icache (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ PRINTF1 (DBG_DEVICE, DBG_FLUSH, "elan4_flush_icache: queued=%d\n", dev->dev_iflush_queued); -+ -+ if (! dev->dev_iflush_queued) -+ { -+ dev->dev_iflush_queued = 1; -+ -+ elan4_queue_haltop (dev, &dev->dev_iflush_haltop); -+ } -+ -+ while (dev->dev_iflush_queued) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+/* device context operations */ -+static void -+device_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CPROC_TRAP *trap = &dev->dev_cproc_trap; -+ -+ elan4_extract_cproc_trap (dev, status, trap, cqnum); -+ -+ DBGCMD (DBG_DEVICE, DBG_FLUSH, elan4_display_cproc_trap (DBG_DEVICE, DBG_FLUSH, "device_cproc_trap", trap)); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcInterruptQueueOverflow: -+ PRINTF (ctxt, DBG_FLUSH, "device_cproc_trap: cqnum=%d\n", cqnum); -+ -+ /* XXXX: we could either just hit restart (and hope) - or we could extract -+ * the event interrupt cookie out and "complete" the command before -+ * restarting it */ -+ elan4_restartcq (dev, dev->dev_flush_cq[cqnum]); -+ return; -+ -+ case CommandProcDmaQueueOverflow: -+ case CommandProcPermissionTrap: -+ handle_dma_flushops (dev, status, cqnum); -+ return; -+ -+ default: -+ printk ("device_cproc_trap: status=%llx control=%llx TrapType=%x cqnum=%d\n", (long long) trap->tr_status, -+ elan4_sdram_readq (dev, dev->dev_cqaddr + cqnum * sizeof (E4_CommandQueueDesc) + -+ offsetof (E4_CommandQueueDesc, CQ_Control)), -+ (int) CPROC_TrapType(trap->tr_status), cqnum); -+ panic ("device_cproc_trap"); -+ } -+} -+ -+static void -+device_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ ELAN4_TPROC_TRAP trap; -+ -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, &trap); -+ -+ elan4_display_tproc_trap (DBG_CONSOLE, DBG_TRAP, "device_tproc_trap", &trap); -+ panic ("device_tproc_trap"); -+} -+ -+static void -+device_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ ELAN4_DPROC_TRAP trap; -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, &trap, unit); -+ -+ elan4_display_dproc_trap (DBG_CONSOLE, DBG_TRAP, "device_dproc_trap", &trap); -+ panic ("device_dproc_trap"); -+} -+ -+static void -+device_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) ctxt; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ PRINTF (ctxt, DBG_FLUSH, "device_interrupt: cookie=%llx\n", cookie); -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ list_for_each_safe (el, nel, &dev->dev_intop_list) { -+ ELAN4_INTOP *op = list_entry (el, ELAN4_INTOP, op_link); -+ -+ if (op->op_cookie == cookie) -+ { -+ if ((op->op_cookie & INTOP_TYPE_MASK) == INTOP_ONESHOT) -+ list_del (&op->op_link); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+ -+ (*op->op_function)(dev, op->op_arg); -+ return; -+ } -+ } -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+ -+ panic ("device_interrupt: interrupt cookie %llx not found\n", cookie); -+} -+ -+static void -+device_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_IPROC_TRAP *trap = &dev->dev_iproc_trap; -+ -+ elan4_extract_iproc_trap (dev, status, trap, unit); -+ elan4_inspect_iproc_trap (trap); -+ -+ DBGCMD (ctxt, DBG_IPROC, elan4_display_iproc_trap (ctxt, DBG_IPROC, "device_iproc_trap", trap)); -+ -+ if (elan4_neterr_iproc_trap (dev, trap)) -+ return; -+ -+ elan4_display_iproc_trap (DBG_CONSOLE, DBG_TRAP, "device_iproc_trap", trap); -+ panic ("device_iproc_trap: unexpected trap\n"); -+} -+ -+ELAN4_TRAP_OPS device_trap_ops = -+{ -+ NULL, -+ device_cproc_trap, -+ device_dproc_trap, -+ device_tproc_trap, -+ device_iproc_trap, -+ device_interrupt, -+}; -+ -+/* -+ * elan4_initialise_device -+ * initialise the ELAN4_DEV struct - spinlocks,cvs etc. -+ * map the registers, sdram etc -+ */ -+int -+elan4_initialise_device (ELAN4_DEV *dev) -+{ -+ int i, bit; -+ -+ if (elan4_mainint_resched_ticks == 0) -+ elan4_mainint_resched_ticks = (hz/4); -+ -+ /* map the registers */ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ dev->dev_regs = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVA_REG_OFFSET, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ -+ dev->dev_rom = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVA_EBUS_OFFSET + ELAN4_REVA_EBUS_ROM_OFFSET, -+ ELAN4_REVA_EBUS_ROM_SIZE, &dev->dev_rom_handle); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ dev->dev_regs = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVB_REG_OFFSET, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ dev->dev_rom = (ioaddr_t) 0; -+ dev->dev_i2c = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVB_I2C_OFFSET, ELAN4_REVB_I2C_SIZE, &dev->dev_i2c_handle); -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ /* XXXX: parse the ebus rom to determine the sdram configuration */ -+ { -+ extern long long sdram_cfg; -+ -+ if (sdram_cfg == 0) -+ dev->dev_sdram_cfg = SDRAM_STARTUP_VALUE; -+ else -+ dev->dev_sdram_cfg = sdram_cfg; -+ } -+ -+ for (bit = 0; ((1 << bit) & elan4_resource_len (dev, ELAN4_BAR_SDRAM)) == 0; bit++) -+ ; -+ -+ switch ((dev->dev_sdram_cfg >> SDRAM_RamSize_SH) & 3) -+ { -+ case 0: /* 64Mbit, 128Mbit, 256Mbit, 512Mbit or 1Gbit (16-bit output) */ -+ dev->dev_sdram_numbanks = 4; bit -= 2; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = (i << bit); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 1: /* 64Mbit, 128Mbit, 256Mbit or 512Mbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 4; bit -= 2; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = ((i & 2) << (bit)) | ((i & 1) << (bit-1)); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 2: /* 2Gbit (16-bit output) or 1Gbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 2; bit--; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = (i << bit); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 3: /* 4Gbit (16-bit output) or 2Gbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 1; -+ dev->dev_sdram_banks[0].b_base = 0; -+ dev->dev_sdram_banks[0].b_size = (1 << bit); -+ break; -+ } -+ -+ elan4_sdram_init (dev); -+ -+ /* initialise locks for classes of interrupts */ -+ spin_lock_init (&dev->dev_trap_lock); -+ spin_lock_init (&dev->dev_intop_lock); -+ spin_lock_init (&dev->dev_haltop_lock); -+ spin_lock_init (&dev->dev_mainint_lock); -+ -+ /* initialise other locks */ -+ spin_lock_init (&dev->dev_i2c_lock); -+ -+ spin_lock_init (&dev->dev_mmulock); -+ spin_lock_init (&dev->dev_cqlock); -+ spin_lock_init (&dev->dev_ctxlock); -+ -+ spin_lock_init (&dev->dev_intmask_lock); -+ spin_lock_init (&dev->dev_syscontrol_lock); -+ -+ spin_lock_init (&dev->dev_ctxt_lock); -+ spin_lock_init (&dev->dev_flush_lock); -+ spin_lock_init (&dev->dev_requeue_lock); -+ -+ kmutex_init (&dev->dev_lock); -+ -+ kcondvar_init (&dev->dev_mainint_wait); -+ kcondvar_init (&dev->dev_flush_wait); -+ -+ /* initialsie lists */ -+ INIT_LIST_HEAD (&dev->dev_ctxt_list); -+ INIT_LIST_HEAD (&dev->dev_intop_list); -+ INIT_LIST_HEAD (&dev->dev_interruptq_list); -+ INIT_LIST_HEAD (&dev->dev_hc_list); -+ INIT_LIST_HEAD (&dev->dev_haltop_list); -+ INIT_LIST_HEAD (&dev->dev_dma_flushop[0].list); -+ INIT_LIST_HEAD (&dev->dev_dma_flushop[1].list); -+ -+ dev->dev_state = ELAN4_STATE_STOPPED; -+ -+ return (0); -+} -+ -+void -+elan4_finalise_device (ELAN4_DEV *dev) -+{ -+ kcondvar_destroy (&dev->dev_flush_wait); -+ kcondvar_destroy (&dev->dev_mainint_wait); -+ -+ kmutex_destroy (&dev->dev_lock); -+ -+ spin_lock_destroy (&dev->dev_requeue_lock); -+ spin_lock_destroy (&dev->dev_flush_lock); -+ spin_lock_destroy (&dev->dev_ctxt_lock); -+ -+ spin_lock_destroy (&dev->dev_syscontrol_lock); -+ spin_lock_destroy (&dev->dev_intmask_lock); -+ -+ spin_lock_destroy (&dev->dev_ctxlock); -+ spin_lock_destroy (&dev->dev_cqlock); -+ spin_lock_destroy (&dev->dev_mmulock); -+ -+ spin_lock_destroy (&dev->dev_i2c_lock); -+ -+ spin_lock_destroy (&dev->dev_mainint_lock); -+ spin_lock_destroy (&dev->dev_haltop_lock); -+ spin_lock_destroy (&dev->dev_intop_lock); -+ spin_lock_destroy (&dev->dev_trap_lock); -+ -+ while (! list_empty (&dev->dev_hc_list)) -+ { -+ ELAN4_HASH_CHUNK *hc = list_entry (dev->dev_hc_list.next, ELAN4_HASH_CHUNK, hc_link); -+ -+ list_del (&hc->hc_link); -+ -+ KMEM_FREE(hc, sizeof (ELAN4_HASH_CHUNK)); -+ } -+ -+ elan4_sdram_fini (dev); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ elan4_unmap_device (dev, dev->dev_rom, ELAN4_REVA_EBUS_ROM_SIZE, &dev->dev_rom_handle); -+ elan4_unmap_device (dev, dev->dev_regs, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ break; -+ case PCI_REVISION_ID_ELAN4_REVB: -+ elan4_unmap_device (dev, dev->dev_i2c, ELAN4_REVB_I2C_SIZE, &dev->dev_i2c_handle); -+ elan4_unmap_device (dev, dev->dev_regs, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ break; -+ } -+} -+ -+static void -+initialise_cache (ELAN4_DEV *dev) -+{ -+ register int set, line; -+ -+ /* Initialise the cache to "map" the bottom of sdram - we will use -+ * this space for cache flushing, so require the cache to be set -+ * up so that cachelines for this are in the correct set. -+ * -+ * XXXX: for MPSAS we set bit 28, to ensure that any access to -+ * sdram causes the line to be filled first to expunge any -+ * Xs. */ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], (((E4_uint64) set) << 29) | (1 << 28) | (line << 16)); -+} -+ -+#ifndef CONFIG_MPSAS -+static void -+initialise_cache_tags (ELAN4_DEV *dev, unsigned addr) -+{ -+ register int set, line; -+ -+ /* Initialise the whole cache to hold sdram at "addr" as direct mapped */ -+ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], addr | (set << 13) | (1 << 11)); -+} -+ -+static void -+initialise_ecc (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ register int i, addr; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ { -+ initialise_cache_tags (dev, E4_CacheSize); -+ for (addr = 0; addr < bank->b_size; addr += E4_CacheSize) -+ { -+ for (i = 0; i < E4_CacheSize; i += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr | i, bank->b_ioaddr + addr + i); -+ initialise_cache_tags (dev, addr); -+ } -+ } -+ else -+ { -+ /* Write the whole of this bank of sdram. */ -+ for (addr = 0; addr < bank->b_size; addr += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr, bank->b_ioaddr + addr); -+ -+ /* Now flush out the top out of the cache */ -+ for (addr = 0; addr < E4_CacheSize; addr += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr, bank->b_ioaddr + addr); -+ -+ /* Now read the top value of sdram to guarantee the write has occured before the ecc is enabled */ -+ readq (bank->b_ioaddr + bank->b_size - sizeof (E4_uint64)); -+ } -+} -+#endif -+ -+#ifdef CONFIG_MPSAS -+static void -+do_initdma (ELAN4_DEV *dev) -+{ -+#define VIRTUAL_ADDRESS 0x10000000ull -+ ELAN4_CQ *cq = dev->dev_flush_cq[0]; -+ E4_uint64 value; -+ E4_uint32 intreg; -+ E4_uint64 status; -+ -+ PRINTF (DBG_DEVICE, DBG_CONFIG, "elan: performing initialising dma\n"); -+ -+ DISABLE_INT_MASK (dev, INT_Dma0Proc | INT_Dma1Proc); -+ -+ /* initialise the context filter */ -+ elan4_attach_filter (&dev->dev_ctxt, 0); -+ -+ /* now issue a DMA - we expect this to trap */ -+ writeq (E4_DMA_TYPE_SIZE (128*4, DMA_DataTypeByte, 0, 0) | RUN_DMA_CMD, cq->cq_mapping + (0 << 3)); -+ writeq (0, cq->cq_mapping + (1 << 3)); -+ writeq (0, cq->cq_mapping + (2 << 3)); -+ writeq (dev->dev_tproc_space, cq->cq_mapping + (3 << 3)); -+ writeq (dev->dev_tproc_space, cq->cq_mapping + (4 << 3)); -+ writeq (0, cq->cq_mapping + (5 << 3)); -+ writeq (0, cq->cq_mapping + (6 << 3)); -+ -+ /* spin waiting for it to trap - then restart the dma processor */ -+ do { -+ value = read_reg64 (dev, IntAndMaskReg); -+ intreg = (value >> E4_INTERRUPT_REG_SHIFT); -+ } while ((intreg & (INT_Dma0Proc | INT_Dma1Proc)) == 0); -+ -+ /* check it trapped for the right reason */ -+ status = (intreg & INT_Dma0Proc) ? read_reg64 (dev, DProc0Status) : read_reg64 (dev, DProc1Status); -+ -+ if (DPROC_PrefetcherFault (status) || (DPROC_TrapType(status) != DmaProcFailCountError && DPROC_TrapType(status) != DmaProcPacketAckError)) -+ { -+ printk ("elan: bad dma trap, status = %lx\n", (long)status); -+ panic ("elan: bad dma trap\n"); -+ } -+ -+ PULSE_SCHED_RESTART (dev, SCH_RestartDma0Proc | SCH_RestartDma1Proc | SCH_RestartDmaPrefetchProc); -+ -+ elan4_detach _filter (&dev->dev_ctxt, 0); -+ -+ ENABLE_INT_MASK (dev, INT_Dma0Proc | INT_Dma1Proc); -+#undef VIRTUAL_ADDRESS -+} -+#endif -+ -+static int -+ebus_read_vpd (ELAN4_DEV *dev, unsigned char *data, unsigned int nob) -+{ -+ unsigned int pci_data_ptr; -+ unsigned int vpd_ptr; -+ register int i; -+ -+ if (read_ebus_rom (dev, 0) != 0x55 || read_ebus_rom (dev, 1) != 0xaa) -+ { -+ printk ("elan%d: invalid rom signature in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ pci_data_ptr = (read_ebus_rom (dev, 0x19) << 8) | read_ebus_rom (dev, 0x18); -+ -+ /* check the pci data structure */ -+ if (read_ebus_rom (dev, pci_data_ptr + 0) != 'P' || -+ read_ebus_rom (dev, pci_data_ptr + 1) != 'C' || -+ read_ebus_rom (dev, pci_data_ptr + 2) != 'I' || -+ read_ebus_rom (dev, pci_data_ptr + 3) != 'R') -+ { -+ printk ("elan%d: invalid pci data structure in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ /* extract the VPD pointer */ -+ vpd_ptr = (read_ebus_rom (dev, pci_data_ptr + 9) << 8) | read_ebus_rom (dev, pci_data_ptr + 8); -+ -+ if (vpd_ptr == 0) -+ { -+ printk ("elan%d: no vital product data in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ /* read the vpd data */ -+ for (i = 0; i < nob; i++) -+ data[i] = read_ebus_rom (dev, vpd_ptr + i); -+ -+ return 0; -+} -+ -+int -+elan4_read_vpd (ELAN4_DEV *dev, unsigned char *tag, unsigned char *result) -+{ -+ unsigned char vpd[I2C_ELAN_EEPROM_VPD_SIZE]; -+ unsigned char *ptr = vpd; -+ unsigned int finished = 0; -+ unsigned char *lim; -+ unsigned char name[3]; -+ unsigned char value[256]; -+ unsigned char type; -+ unsigned int len, len2; -+ register int i; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ { -+ if (ebus_read_vpd (dev, vpd, I2C_ELAN_EEPROM_VPD_SIZE) < 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unable to read serial number from EBUS rom\n", dev->dev_instance); -+ return -EINVAL ; -+ } -+ } -+ else -+ { -+ if (i2c_read_rom (dev, I2C_ELAN_EEPROM_VPD_BASEADDR, I2C_ELAN_EEPROM_VPD_SIZE, vpd) < 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unable to read serial number from I2C rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ } -+ -+ result[0] = 0; -+ while (! finished) -+ { -+ type = *ptr++; -+ -+ if (type & LARGE_RESOURCE_BIT) -+ { -+ len = *(ptr++); -+ len += *(ptr++) << 8; -+ -+ switch (type & ~LARGE_RESOURCE_BIT) -+ { -+ case LARGE_RESOURCE_STRING: -+ case LARGE_RESOURCE_VENDOR_DEFINED: -+ ptr += len; -+ break; -+ -+ case LARGE_RESOURCE_VITAL_PRODUCT_DATA: -+ for (lim = ptr + len; ptr < lim; ) -+ { -+ name[0] = *ptr++; -+ name[1] = *ptr++; -+ name[2] = '\0'; -+ len2 = *ptr++; -+ -+ for (i = 0; i < len2 && ptr < lim; i++) -+ value[i] = *ptr++; -+ value[i] = '\0'; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, %s: $s\n", dev->dev_instance, name, value); -+ -+ if (tag != NULL) -+ { /* looking for just one tag */ -+ if (!strcmp (name, tag)) -+ strcpy(result, value); -+ } -+ else -+ { /* get all tags */ -+ strcat(result,name); -+ strcat(result,": "); -+ strcat(result,value); -+ strcat(result,"\n"); -+ } -+ } -+ break; -+ -+ default: -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unknown large resource %x\n", dev->dev_instance, type); -+ finished = 1; -+ break; -+ } -+ } -+ else -+ { -+ len = type & 0x7; -+ -+ switch (type >> 3) -+ { -+ case SMALL_RESOURCE_COMPATIBLE_DEVICE_ID: -+ ptr += len; -+ break; -+ -+ case SMALL_RESOURCE_VENDOR_DEFINED: -+ ptr += len; -+ break; -+ -+ case SMALL_RESOURCE_END_TAG: -+ finished = 1; -+ break; -+ -+ default: -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unknown small resource %x\n", dev->dev_instance, type >> 3); -+ finished = 1; -+ break; -+ } -+ } -+ } -+ -+ if ( result[0] == 0 ) { -+ if ( tag != 0 ) -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, failed to find tag %s\n", dev->dev_instance, tag); -+ else -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, failed to find any tags\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ return (0); -+} -+ -+int -+elan4_start_device (ELAN4_DEV *dev) -+{ -+ E4_VirtualProcessEntry entry; -+ unsigned pagesizeval[2]; -+ unsigned hashsizeval[2]; -+ register int i, j, tbl, res; -+ unsigned attempts = 0; -+ E4_PAGE_SIZE_TABLE; -+ unsigned char serial[256]; -+ -+ PRINTF (DBG_DEVICE, DBG_ALL, "elan4_start_device: entered\n"); -+ -+ dev->dev_state = ELAN4_STATE_STARTING; -+ -+ tryagain: -+ /* Initialise the pci config space */ -+ if ((res = elan4_pciinit (dev)) < 0) -+ return (res); -+ -+ /* Display the serial number */ -+ if (elan4_read_vpd (dev, "SN", serial)) -+ printk("elan%d: SN: failed to read\n", dev->dev_instance); -+ else -+ printk("elan%d: SN: %s\n", dev->dev_instance, serial); -+ -+ /* initialise the interrupt mask to zero */ -+ SET_INT_MASK (dev, 0); -+ -+ /* Initialise the device registers */ -+ write_reg64 (dev, TlbLineValue, 0); -+ write_reg64 (dev, SysControlReg, 0); -+ -+ /* Initialise the SDRAM using the configuration value from the ROM */ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg | SDRAM_SETUP); -+ -+ /* Setup the linkport registers */ -+ write_reg64 (dev, LinkPortLock, 0); -+ write_reg64 (dev, LinkPortKey, LINK_PORT_LOCK_VALUE); -+ -+ /* Setup the tick rates, start the clock, and init the stats registers */ -+ write_ureg32 (dev, ClockTickRate.s.TickRates, ELAN4_CLOCK_TICK_RATE); -+ write_ureg64 (dev, Clock, 0); -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ for (i = 0; i < 8; i++) -+ write_ureg32 (dev, StatCounts[i].s.StatsCount, 0); -+ -+ /* Initialise the Link Control register - disable the TLB prefetcher on RevB -+ * as it can cause very occasional data corruption. */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ write_reg32 (dev, LinkControlReg, LCONT_REVB_DISABLE_TLB_PREFETCH); -+ else -+ write_reg32 (dev, LinkControlReg, 0); -+ -+ /* Initialise the Link Control Settings to set the PLL Reference Value */ -+ write_reg32 (dev, LinkContSettings, -+ (elan4_mod45disable ? LCONT_MOD45_DISABLE : 0) | -+ (3 << LCONT_CONFIG_PHASE_SHIFT) | -+ ((elan4_pll_div & LCONT_PLL_REF_VAL_BITS_MASK) << LCONT_PLL_REF_VAL_BITS_SHIFT) | -+ (LCONT_VOD_360 << LCONT_LVDS_VOLTAGE_BITS_SHIFT) | -+ (LCONT_TERM_AUTO_OHM << LCONT_LVDS_TERMINATION_SHIFT)); -+ -+ /* Clear the link error LED on RevB and above */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA) -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_ClearLinkError); -+ -+ initialise_cache (dev); -+ -+ /* Initialise the MMU hash table parameters */ -+ /* Select the largest elan pagesize which is spanned by the -+ * system pagesize for mmu table 0*/ -+ for (i = 0; i < E4_PAGE_SIZE_TABLE_SIZE; i++) -+ if (PageSizeTable[i] > PAGE_SHIFT) -+ break; -+ -+ pagesizeval[0] = i - 1; -+ hashsizeval[0] = elan4_hash_0_size_val; -+ -+ /* Select a suitable elan pagesize to match any "large" page -+ * support that the OS provides. */ -+ pagesizeval[1] = PAGE_SIZE_4M; -+ hashsizeval[1] = elan4_hash_1_size_val; -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ dev->dev_pagesizeval[tbl] = pagesizeval[tbl]; -+ dev->dev_pageshift[tbl] = PageSizeTable[pagesizeval[tbl]]; -+ dev->dev_hashsize[tbl] = (1 << hashsizeval[tbl])/sizeof (E4_HashTableEntry); -+ dev->dev_rsvd_hashmask[tbl] = ((1 << (27 - dev->dev_pageshift[tbl]))-1) & ~((1 << hashsizeval[tbl])-1); -+ dev->dev_rsvd_hashval[tbl] = 0xFFFFFFFF; -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: pageshifts %d,%d\n", dev->dev_pageshift[0], -+ NUM_HASH_TABLES == 2 ? dev->dev_pageshift[1] : 0); -+ -+ /* Initialise the control register to the desired value */ -+ dev->dev_syscontrol = (CONT_EN_ALL_SETS | CONT_MMU_ENABLE | CONT_CACHE_ALL | CONT_2K_NOT_1K_DMA_PACKETS | -+ (pagesizeval[0] << CONT_TABLE0_PAGE_SIZE_SHIFT) | (hashsizeval[0] << CONT_TABLE0_MASK_SIZE_SHIFT)); -+ -+ if (NUM_HASH_TABLES == 2) -+ dev->dev_syscontrol |= CONT_TWO_HASH_TABLES | (pagesizeval[1] << CONT_TABLE1_PAGE_SIZE_SHIFT) | (hashsizeval[1] << CONT_TABLE1_MASK_SIZE_SHIFT); -+ -+ write_reg64 (dev, SysControlReg, dev->dev_syscontrol); -+ -+ /* use direct mapped pci writes during sdram initialisation, since for -+ * cache flushing to work, we need to ensure that the cacheflush page -+ * never gets lines into the incorrect cache set. */ -+ SET_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ elan4_sdram_setup_delay_lines(dev); -+ -+ for (i = res = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_size) -+ res |= elan4_sdram_init_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ if (! res) -+ { -+ if (dev->dev_devinfo.dev_device_id == PCI_REVISION_ID_ELAN4_REVB && ++attempts < 5) -+ { -+ printk ("elan%d: sdram not working, resetting\n", dev->dev_instance); -+ goto tryagain; -+ } -+ -+ printk ("elan%d: could not find any sdram banks\n", dev->dev_instance); -+ goto failed; -+ } -+ -+#ifndef CONFIG_MPSAS -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: initialising for ECC\n"); -+ -+ for (i = 0 ; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ initialise_ecc (dev, &dev->dev_sdram_banks[i]); -+#endif -+ -+ dev->dev_sdram_initial_ecc_val = read_reg64 (dev, SDRamECCStatus); -+ -+ /* Now enable ECC after we've scrubbed the memory */ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg | SDRAM_ENABLE_ECC); -+ -+ /* clear any errors, and flush the tlb/route cache */ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH | CONT_ROUTE_FLUSH | CONT_CLEAR_LINKPORT_INT | CONT_CLEAR_SDRAM_ERROR); -+ -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ -+ /* Initialise the thread processor's register file */ -+ for (i = 0; i < 64; i++) -+ write_reg64 (dev, TProcRegs[i], 0); -+ -+ /* Initialise the thread processor's ICache tags */ -+ for (i = 0; i < (E4_ICacheLines/E4_ICachePortSize); i++) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, i << E4_ICacheTagAddrShift); -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], E4_InvalidTagValue); -+ } -+ -+ /* -+ * Initialise the ICache with a sethi %hi(addr << 7), %r0 -+ * writing 8 64 bit values per loop of sethi %g0 values ending in 77 for something different?? -+ */ -+ for (i = 0; i < E4_ICacheSizeInBytes; i += (E4_ICachePortSize << 3)) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_AccessICacheRams | (i >> 3)); -+ -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], -+ (E4_uint64) (((E4_uint64)i << (4+7)) + ((E4_uint64)j << (1+7)) + (0x077)) | -+ (E4_uint64) (((E4_uint64)i << (4+7+32)) + ((E4_uint64)j << (1+7+32)) + (0x0e7)) << 32); -+ } -+ -+ /* -+ * Initialise the top of the ICache Set0 with a instruction which will -+ * cause a know trap fingerprint so that the application can identify it -+ * and ignore the trap. -+ */ -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_ICacheFixupOffset | E4_AccessICacheRams); -+ for (i = 0; i < E4_ICachePortSize; i++) -+ write_reg64 (dev, ICachePort[i], E4_ICacheFixupInsn | (E4_ICacheFixupInsn << 32)); -+ -+ /* create the buddy allocator for SDRAM */ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ elan4_sdram_add_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ dev->dev_ctxtableshift = elan4_ctxt_table_shift; -+ dev->dev_cqcount = (1 << elan4_ln2_max_cqs); -+#ifdef CONFIG_MTRR -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ dev->dev_cqreorder = dev->dev_cqcount >> 1; -+ else -+ dev->dev_cqreorder = dev->dev_cqcount; -+#endif -+ -+ /* allocate the sdram for cache flushing whilst still in direct mapped mode */ -+ dev->dev_cacheflush_space = elan4_sdram_alloc (dev, E4_CacheSize); -+ -+ /* and longer need direct mapped pci writes */ -+ CLEAR_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ /* allocate the hash tables, command queues, context tables etc */ -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: allocating hash tables, command queueus, context tables\n"); -+ -+ dev->dev_comqlowpri = elan4_sdram_alloc (dev, (1 << COMMAND_RUN_QUEUE_BITS)); -+ dev->dev_comqhighpri = elan4_sdram_alloc (dev, (1 << COMMAND_RUN_QUEUE_BITS)); -+ dev->dev_cqaddr = elan4_sdram_alloc (dev, sizeof (E4_CommandQueueDesc) * dev->dev_cqcount); -+ dev->dev_dmaqhighpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_dmaq_highpri_size)); -+ dev->dev_dmaqlowpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_dmaq_lowpri_size)); -+ dev->dev_threadqhighpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_threadq_highpri_size)); -+ dev->dev_threadqlowpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_threadq_lowpri_size)); -+ dev->dev_interruptq = elan4_sdram_alloc (dev, E4_QueueSize(elan4_interruptq_size)); -+ -+ dev->dev_ctxtable = elan4_sdram_alloc (dev, (1 << dev->dev_ctxtableshift) * sizeof (E4_ContextControlBlock)); -+ dev->dev_faultarea = elan4_sdram_alloc (dev, CUN_Entries * sizeof (E4_FaultSave)); -+ dev->dev_inputtraparea = elan4_sdram_alloc (dev, sizeof (E4_IprocTrapState)); -+ -+ dev->dev_sdrampages[0] = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ dev->dev_sdrampages[1] = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ dev->dev_hashtable[tbl] = elan4_sdram_alloc (dev, dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+#ifndef CONFIG_MPSAS -+ /* Initialise hash tables to invalid (zero) */ -+ elan4_sdram_zeroq_sdram (dev, dev->dev_hashtable[tbl], dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+#endif -+ } -+ -+ /* Initialise all context filters to discard */ -+#ifdef CONFIG_MPSAS -+ if (sas_memset_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, dev->dev_ctxtable, -+ E4_FILTER_DISCARD_ALL, (1 << (dev->dev_ctxtableshift-1))) < 0) -+ { -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i++) -+ elan4_write_filter (dev, i, E4_FILTER_DISCARD_ALL); -+ } -+#else -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i++) -+ elan4_write_filter (dev, i, E4_FILTER_DISCARD_ALL); -+#endif -+ -+ PRINTF4 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: hashtables %x,%x, %x,%x\n", dev->dev_hashtable[0], -+ dev->dev_hashsize[0], dev->dev_hashtable[1], dev->dev_hashsize[1]); -+ -+ /* install the hash table pointers */ -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: initialise registers with table addresses\n"); -+ write_reg64 (dev, MmuTableBasePtrs, (((E4_uint64) dev->dev_hashtable[0]) | ((E4_uint64) dev->dev_hashtable[1]) << 32)); -+ write_reg64 (dev, MmuFaultAndRootCntxPtr, (((E4_uint64) dev->dev_ctxtableshift) | -+ ((E4_uint64) dev->dev_ctxtable) | -+ ((E4_uint64) dev->dev_faultarea) << 32)); -+ write_reg64 (dev, InputTrapAndFilter, (((E4_uint64) dev->dev_ctxtableshift) | -+ ((E4_uint64) dev->dev_ctxtable) | -+ ((E4_uint64) dev->dev_inputtraparea) << 32)); -+ /* -+ * The run ptrs have this format: (Front << 32) | Back -+ * The base for both the front and back is uses the high bits of the back pointer. -+ * So writting just the base value is good enough. -+ */ -+ write_reg64 (dev, CommandLowPriRunPtrs, dev->dev_comqlowpri); -+ write_reg64 (dev, CommandHighPriRunPtrs, dev->dev_comqhighpri); -+ -+ /* Initialise the run queues */ -+ write_reg64 (dev, DProcHighPriPtrs, E4_QueueValue (dev->dev_dmaqhighpri, elan4_dmaq_highpri_size)); -+ write_reg64 (dev, DProcLowPriPtrs, E4_QueueValue (dev->dev_dmaqlowpri, elan4_dmaq_lowpri_size)); -+ write_reg64 (dev, TProcHighPriPtrs, E4_QueueValue (dev->dev_threadqhighpri, elan4_threadq_highpri_size)); -+ write_reg64 (dev, TProcLowPriPtrs, E4_QueueValue (dev->dev_threadqlowpri, elan4_threadq_lowpri_size)); -+ -+ /* Initialise the interrupt queue as "empty" - this is actually with one entry on it */ -+ write_reg64 (dev, MainIntQueuePtrs.Value, (((E4_uint64) E4_QueueFrontValue (dev->dev_interruptq, elan4_interruptq_size) << 32) | -+ ((E4_uint64) E4_QueueBackPointer(dev->dev_interruptq + E4_MainIntEntrySize)))); -+ -+ dev->dev_interruptq_nfptr = dev->dev_interruptq + E4_MainIntEntrySize; -+ -+ /* -+ * Flush the context filter before dropping the Discard all bits in the schedule status register. -+ * Also hit the SCH_RestartTProc to clear out X's from the trap state and -+ * hit the SCH_RestartDmaPrefetchProc to clear out X's from the prev register. -+ */ -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush | SCH_RestartTProc | SCH_RestartDmaPrefetchProc); -+ -+ /* setup the schedule status register. */ -+ SET_SCHED_STATUS (dev, SCH_CProcTimeout6p2us | SCH_DProcTimeslice512us); -+ -+ /* -+ * Now initialise the inserter cache.s -+ * Bit 31 of the first word of the descriptor is a valid bit. This must be cleared. -+ * Bit 31 becomes a used bit in the descriptors in memory. -+ */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ { -+ write_reg32 (dev, CommandControl.CommandQueueDescsBase, i); /* select a cache line */ -+ write_reg64 (dev, CommandCacheTestPort, 0); /* Mark it invalid */ -+ } -+ -+ /* Setup the pointer to the command descriptors */ -+ /* the table must be aligned on a CQ_CommandDescsAlignement boundary */ -+ /* since we've allocated a small table - we work out the offset of the */ -+ /* first entry in our table for mapping in the command ports later */ -+ dev->dev_cqoffset = (dev->dev_cqaddr & (CQ_CommandDescsAlignment-1)) / sizeof (E4_CommandQueueDesc); -+ -+ write_reg32 (dev, CommandControl.CommandQueueDescsBase, (dev->dev_cqaddr & ~(CQ_CommandDescsAlignment-1)) | COM_ENABLE_DEQUEUE); -+ -+ /* allocate the bitmaps for cq,ctxt allocation */ -+ KMEM_ZALLOC (dev->dev_cqamap, bitmap_t *, BT_BITOUL(dev->dev_cqcount/ELAN4_CQ_PER_CQA) * sizeof (bitmap_t), 1); -+ KMEM_ZALLOC (dev->dev_ctxmap, bitmap_t *, BT_BITOUL(1 << dev->dev_ctxtableshift) * sizeof (bitmap_t), 1); -+ -+ if (dev->dev_cqamap == NULL || dev->dev_ctxmap == NULL) -+ goto failed; -+ -+ /* Make every fourth context be invalid for ICache fixup. -+ * context 0 is also invalid - since it is used to indicate -+ * an invalid tag. */ -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i += 4) -+ BT_SET (dev->dev_ctxmap, i); -+ -+ /* initialise the halt operations */ -+ dev->dev_haltop_mask = 0; -+ dev->dev_haltop_active = 0; -+ -+ /* allocate the hash table shadow structures - and place all blocks on the free lists */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ KMEM_ZALLOC (dev->dev_mmuhash[tbl], ELAN4_HASH_ENTRY *, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY), 1); -+ KMEM_ZALLOC (dev->dev_mmufree[tbl], ELAN4_HASH_ENTRY **, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *), 1); -+ -+ if (dev->dev_mmuhash[tbl] == NULL || dev->dev_mmufree[tbl] == NULL) -+ goto failed; -+ -+ for (i = 0; i < dev->dev_hashsize[tbl]; i++) -+ { -+ dev->dev_mmuhash[tbl][i].he_entry = dev->dev_hashtable[tbl] + (i * sizeof (E4_HashTableEntry)); -+ dev->dev_mmufree[tbl][i] = &dev->dev_mmuhash[tbl][i]; -+ } -+ } -+ -+ /* setup the interrupt mask register */ -+ SET_INT_MASK (dev, (INT_MSI0 | INT_MSI1 | INT_MSI2 | INT_MSI3) & ~(INT_Discarding | INT_Halted)); -+ -+ /* start a thread to handle excessive main interrupts */ -+ if (kernel_thread_create (elan4_mainint_thread, (caddr_t) dev) == NULL) -+ goto failed; -+ dev->dev_mainint_started = 1; -+ -+ /* install the device context - and allocate the first 16 command queues */ -+ if (elan4_insertctxt (dev, &dev->dev_ctxt, &device_trap_ops) != 0) -+ goto failed; -+ -+ /* Allocate command queues, one for each entry in the inserter cache, -+ * we'll use these queues to flush the insert cache */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ { -+ if ((dev->dev_flush_cq[i] = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit | CQ_InterruptEnableBit, -+ CQ_Priority)) == NULL) -+ goto failed; -+ -+ ASSERT (elan4_cq2num(dev->dev_flush_cq[i]) == i); -+ -+ dev->dev_flush_finished |= (1 << i); -+ } -+ -+ /* Allocate command queues for dma halt operations */ -+ if ((dev->dev_dma_flushop[0].cq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit, 0)) == NULL || -+ (dev->dev_dma_flushop[1].cq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit, CQ_Priority)) == NULL) -+ goto failed; -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+#endif -+ -+ /* initialise halt operation for flushing the icache */ -+ dev->dev_iflush_haltop.op_function = device_iflush_haltop; -+ dev->dev_iflush_haltop.op_arg = dev; -+ dev->dev_iflush_haltop.op_mask = INT_TProcHalted; -+ -+ /* Allocate a route table, and create a valid route for vp==0, this is used -+ * when a DMA is removed from the dma run queue */ -+ if ((dev->dev_routetable = elan4_alloc_routetable (dev, 0)) == NULL) -+ goto failed; -+ -+ elan4_set_routetable (&dev->dev_ctxt, dev->dev_routetable); -+ -+ entry.Values[0] = FIRST_MYLINK; -+ entry.Values[1] = 0; -+ -+ elan4_write_route (dev, dev->dev_routetable, 0, &entry); -+ -+ /* map the sdram pages into the elan */ -+ dev->dev_tproc_suspend = DEVICE_TPROC_SUSPEND_ADDR; -+ dev->dev_tproc_space = DEVICE_TPROC_SPACE_ADDR; -+ -+ elan4mmu_pteload (&dev->dev_ctxt, 0, dev->dev_tproc_suspend, (dev->dev_sdrampages[0] >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocExecute)); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, dev->dev_tproc_space, (dev->dev_sdrampages[1] >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocDataWrite)); -+ -+ /* and store the thread suspend sequence in it for use when a thread is removed from the run queue */ -+ elan4_sdram_writel (dev, dev->dev_sdrampages[0], DEVICE_TPROC_SUSPEND_INSTR); -+ -+#ifdef CONFIG_MPSAS -+ do_initdma (dev); -+#endif -+ -+ if (!elan4_neterr_init (dev)) -+ goto failed; -+ -+ elan4_configure_mtrr (dev); -+ -+ /* finally register the device with elanmod for rms */ -+ dev->dev_idx = elan_dev_register (&dev->dev_devinfo, &elan4_dev_ops, (void *) dev); -+ -+ dev->dev_state = ELAN4_STATE_STARTED; -+ -+ return (0); -+ -+ failed: -+ printk ("elan%d: failed to start elan4 device - stopping\n", dev->dev_instance); -+ -+ elan4_stop_device (dev); -+ return (-ENOMEM); -+} -+ -+void -+elan4_stop_device (ELAN4_DEV *dev) -+{ -+ unsigned long flags; -+ int i, tbl; -+ -+ dev->dev_state = ELAN4_STATE_STOPPING; -+ -+ elan_dev_deregister (&dev->dev_devinfo); -+ -+ elan4_unconfigure_mtrr (dev); -+ -+ elan4_neterr_destroy (dev); -+ -+ if (dev->dev_tproc_suspend) -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, dev->dev_tproc_suspend, 1 << dev->dev_pageshift[0]); -+ -+ if (dev->dev_tproc_space) -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, dev->dev_tproc_space, 1 << dev->dev_pageshift[0]); -+ -+ if (dev->dev_routetable) -+ { -+ elan4_set_routetable (&dev->dev_ctxt, NULL); -+ elan4_free_routetable (dev, dev->dev_routetable); -+ } -+ -+ for (i = 0; i < 2; i++) -+ if (dev->dev_dma_flushop[i].cq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_dma_flushop[i].cq); -+ -+ /* free of the device context - and insert cache flushing command queues */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ if (dev->dev_flush_cq[i]) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_flush_cq[i]); -+ -+ if (dev->dev_ctxt.ctxt_dev) -+ elan4_removectxt (dev, &dev->dev_ctxt); -+ -+ /* stop the mainint thread */ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ dev->dev_stop_threads = 1; -+ -+ while (dev->dev_mainint_started && !dev->dev_mainint_stopped) -+ { -+ kcondvar_wakeupall (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ kcondvar_wait (&dev->dev_mainint_wait, &dev->dev_mainint_lock, &flags); -+ } -+ dev->dev_mainint_started = dev->dev_mainint_stopped = 0; -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ /* cancel any error interrupt timeouts */ -+ if (timer_fn_queued (&dev->dev_error_timeoutid)) -+ cancel_timer_fn (&dev->dev_error_timeoutid); -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && timer_fn_queued (&dev->dev_linkerr_timeoutid)) -+ cancel_timer_fn (&dev->dev_linkerr_timeoutid); -+ -+ /* reset the interrupt mask register to zero */ -+ if (dev->dev_regs) -+ SET_INT_MASK (dev, 0); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ if (dev->dev_mmuhash[tbl]) -+ KMEM_FREE (dev->dev_mmuhash[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY)); -+ if (dev->dev_mmufree[tbl]) -+ KMEM_FREE (dev->dev_mmufree[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *)); -+ if (dev->dev_hashtable[tbl]) -+ elan4_sdram_free (dev, dev->dev_hashtable[tbl], dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+ } -+ -+ if (dev->dev_cqamap) -+ KMEM_FREE (dev->dev_cqamap, BT_BITOUL (dev->dev_cqcount/ELAN4_CQ_PER_CQA) * sizeof (bitmap_t)); -+ if (dev->dev_ctxmap) -+ KMEM_FREE (dev->dev_ctxmap, BT_BITOUL(1 << dev->dev_ctxtableshift) * sizeof (bitmap_t)); -+ -+ if (dev->dev_comqlowpri) -+ elan4_sdram_free (dev, dev->dev_comqlowpri, (1 << COMMAND_RUN_QUEUE_BITS)); -+ if (dev->dev_comqhighpri) -+ elan4_sdram_free (dev, dev->dev_comqhighpri, (1 << COMMAND_RUN_QUEUE_BITS)); -+ if (dev->dev_cqaddr) -+ elan4_sdram_free (dev, dev->dev_cqaddr, sizeof (E4_CommandQueueDesc) * dev->dev_cqcount); -+ if (dev->dev_dmaqhighpri) -+ elan4_sdram_free (dev, dev->dev_dmaqhighpri, E4_QueueSize(elan4_dmaq_highpri_size)); -+ if (dev->dev_dmaqlowpri) -+ elan4_sdram_free (dev, dev->dev_dmaqlowpri, E4_QueueSize(elan4_dmaq_lowpri_size)); -+ if (dev->dev_threadqhighpri) -+ elan4_sdram_free (dev, dev->dev_threadqhighpri, E4_QueueSize(elan4_threadq_highpri_size)); -+ if (dev->dev_threadqlowpri) -+ elan4_sdram_free (dev, dev->dev_threadqlowpri, E4_QueueSize(elan4_threadq_lowpri_size)); -+ if (dev->dev_interruptq) -+ elan4_sdram_free (dev, dev->dev_interruptq, E4_QueueSize(elan4_interruptq_size)); -+ -+ if (dev->dev_ctxtable) -+ elan4_sdram_free (dev, dev->dev_ctxtable, (1 << dev->dev_ctxtableshift) * sizeof (E4_ContextControlBlock)); -+ if (dev->dev_faultarea) -+ elan4_sdram_free (dev, dev->dev_faultarea, CUN_Entries * sizeof (E4_FaultSave)); -+ if (dev->dev_inputtraparea) -+ elan4_sdram_free (dev, dev->dev_inputtraparea, sizeof (E4_IprocTrapState)); -+ -+ if (dev->dev_sdrampages[0]) -+ elan4_sdram_free (dev, dev->dev_sdrampages[0], SDRAM_PAGE_SIZE); -+ if (dev->dev_sdrampages[1]) -+ elan4_sdram_free (dev, dev->dev_sdrampages[1], SDRAM_PAGE_SIZE); -+ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ elan4_sdram_fini_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ elan4_pcifini (dev); -+ -+ dev->dev_state = ELAN4_STATE_STOPPED; -+ -+ if (dev->dev_ack_errors) -+ kfree(dev->dev_ack_errors); -+ if (dev->dev_dproc_timeout) -+ kfree(dev->dev_dproc_timeout); -+ if (dev->dev_cproc_timeout) -+ kfree(dev->dev_cproc_timeout); -+} -+ -+static __inline__ int -+compute_arity (int lvl, unsigned n, char *arity) -+{ -+ if (arity[lvl] == 0) -+ { -+ if (n <= 8) -+ arity[lvl] = n; -+ else -+ arity[lvl] = 4; -+ } -+ -+ return (arity[lvl]); -+} -+ -+int -+elan4_compute_position (ELAN_POSITION *pos, unsigned nodeid, unsigned numnodes, unsigned arityval) -+{ -+ int i, lvl, n; -+ char arity[ELAN_MAX_LEVELS]; -+ -+ if (nodeid >= numnodes) -+ return -EINVAL; -+ -+ for (i = 0; i < ELAN_MAX_LEVELS; i++, arityval >>= 4) -+ arity[i] = arityval & 7; -+ -+ for (lvl = 0, n = numnodes; n > compute_arity(lvl, n, arity) && lvl < ELAN_MAX_LEVELS; lvl++) -+ { -+ if ((n % arity[lvl]) != 0) -+ return -EINVAL; -+ -+ n /= arity[lvl]; -+ } -+ -+ if (arity[lvl] != n) -+ return -EINVAL; -+ -+ for (i = 0; i <= lvl; i++) -+ pos->pos_arity[i] = arity[lvl - i]; -+ -+ pos->pos_nodes = numnodes; -+ pos->pos_levels = lvl + 1; -+ pos->pos_nodeid = nodeid; -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ -+ return 0; -+} -+ -+int -+elan4_get_position (ELAN4_DEV *dev, ELAN_POSITION *pos) -+{ -+ kmutex_lock (&dev->dev_lock); -+ *pos = dev->dev_position; -+ kmutex_unlock (&dev->dev_lock); -+ -+ return (pos->pos_mode); -+} -+ -+int -+elan4_set_position (ELAN4_DEV *dev, ELAN_POSITION *pos) -+{ -+ int forceLocal = 0; -+ int nnodes, i; -+ unsigned int *ack_errors; -+ unsigned int *dproc_timeout; -+ unsigned int *cproc_timeout; -+ -+ switch (pos->pos_mode) -+ { -+ case ELAN_POS_UNKNOWN: -+ break; -+ -+ case ELAN_POS_MODE_SWITCHED: -+ if (pos->pos_levels > ELAN_MAX_LEVELS) -+ return (-EINVAL); -+ -+ for (i = 0, nnodes = 1; i < pos->pos_levels; i++) -+ { -+ -+ if (pos->pos_arity[i] <= 0 || (i == 0 ? pos->pos_arity[i] > 8 : pos->pos_arity[i] >= 8)) /* allow an 8 way top-switch */ -+ return (-EINVAL); -+ -+ nnodes *= pos->pos_arity[i]; -+ } -+ -+ if (pos->pos_nodes > nnodes || pos->pos_nodeid >= pos->pos_nodes) -+ return (-EINVAL); -+ break; -+ -+ case ELAN_POS_MODE_LOOPBACK: -+ if (pos->pos_levels != 1 || pos->pos_nodes != 1 || pos->pos_nodeid != 0 || pos->pos_arity[0] != 1) -+ return (-EINVAL); -+ -+ forceLocal = 1; -+ break; -+ -+ case ELAN_POS_MODE_BACKTOBACK: -+ if (pos->pos_levels != 1 || pos->pos_nodes != 2 || pos->pos_nodeid >= 2 || pos->pos_arity[0] != 2) -+ return (-EINVAL); -+ -+ forceLocal = (pos->pos_nodeid == 0); -+ break; -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ ack_errors = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!ack_errors) -+ return (-EINVAL); -+ memset(ack_errors, 0, pos->pos_nodes * sizeof(unsigned int)); -+ dproc_timeout = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!dproc_timeout) -+ { -+ kfree(ack_errors); -+ return (-EINVAL); -+ } -+ memset(dproc_timeout, 0, pos->pos_nodes * sizeof(unsigned int)); -+ cproc_timeout = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!cproc_timeout) -+ { -+ kfree(ack_errors); -+ kfree(dproc_timeout); -+ return (-EINVAL); -+ } -+ memset(cproc_timeout, 0, pos->pos_nodes * sizeof(unsigned int)); -+ -+ kmutex_lock (&dev->dev_lock); -+ dev->dev_position = *pos; -+ dev->dev_ack_errors = ack_errors; -+ dev->dev_dproc_timeout = dproc_timeout; -+ dev->dev_cproc_timeout = cproc_timeout; -+ spin_lock_init(&dev->dev_error_routes_lock); -+ -+ if (forceLocal) -+ write_reg32 (dev, LinkContSettings, read_reg32 (dev, LinkContSettings) | LCONT_FORCE_COMMSCLK_LOCAL); -+ else -+ write_reg32 (dev, LinkContSettings, read_reg32 (dev, LinkContSettings) & ~LCONT_FORCE_COMMSCLK_LOCAL); -+ -+ pioflush_reg (dev); -+ kmutex_unlock (&dev->dev_lock); -+ -+ return (0); -+} -+ -+void -+elan4_get_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short *mask) -+{ -+ kmutex_lock (&dev->dev_lock); -+ -+ *mask = dev->dev_devinfo.dev_params_mask; -+ memcpy (params, &dev->dev_devinfo.dev_params, sizeof (ELAN_PARAMS)); -+ -+ kmutex_unlock (&dev->dev_lock); -+} -+ -+void -+elan4_set_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short mask) -+{ -+ int i; -+ -+ kmutex_lock (&dev->dev_lock); -+ for (i = 0; i < ELAN4_PARAM_COUNT; i++) -+ if (mask & (1 << i)) -+ dev->dev_devinfo.dev_params.values[i] = params->values[i]; -+ -+ dev->dev_devinfo.dev_params_mask |= mask; -+ kmutex_unlock (&dev->dev_lock); -+} -+ -+ -+EXPORT_SYMBOL(elan4_get_position); -+EXPORT_SYMBOL(elan4_set_position); -+ -+EXPORT_SYMBOL(elan4_queue_haltop); -+EXPORT_SYMBOL(elan4_queue_dma_flushop); -+EXPORT_SYMBOL(elan4_queue_mainintop); -+ -+EXPORT_SYMBOL(elan4_insertctxt); -+EXPORT_SYMBOL(elan4_removectxt); -+ -+EXPORT_SYMBOL(elan4_attach_filter); -+EXPORT_SYMBOL(elan4_detach_filter); -+EXPORT_SYMBOL(elan4_set_filter); -+EXPORT_SYMBOL(elan4_set_routetable); -+ -+EXPORT_SYMBOL(elan4_alloccq); -+EXPORT_SYMBOL(elan4_freecq); -+EXPORT_SYMBOL(elan4_restartcq); -+ -+EXPORT_SYMBOL(elan4_flush_icache); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/device_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/device_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/device_Linux.c 2005-06-01 23:12:54.606438040 -0400 -@@ -0,0 +1,2625 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device_Linux.c,v 1.74.6.9 2005/01/18 14:44:11 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device_Linux.c,v $*/ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#ifdef CONFIG_MTRR -+#include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#include -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) -+typedef void irqreturn_t; -+#endif -+# define IRQ_NONE -+# define IRQ_HANDLED -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+#error please use a 2.4.0 series kernel or newer -+#endif -+ -+ -+#if defined(LINUX_SPARC) || defined(LINUX_PPC64) -+#define __io_remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#elif defined(NO_RMAP) -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#else -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(vma,from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(vma,from,offset,size,prot) -+#endif -+ -+#ifndef pgprot_noncached -+static inline pgprot_t pgprot_noncached(pgprot_t _prot) -+{ -+ unsigned long prot = pgprot_val(_prot); -+#if defined(__powerpc__) -+ prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; -+#elif defined(__sparc__) -+ prot &= ~(_PAGE_CACHE); -+ prot |= _PAGE_IE; -+#endif -+ -+ return __pgprot(prot); -+} -+#endif -+ -+#ifndef pgprot_writecombine -+static inline pgprot_t pgprot_writecombine (pgprot_t _prot) -+{ -+ return _prot; -+} -+#endif -+ -+#define ELAN4_DRIVER_VERSION 0x103 /* 16 bit value */ -+ -+/* -+ * Function prototypes. -+ */ -+static int elan4_attach_device (int instance, struct pci_dev *pdev); -+static void elan4_detach_device (ELAN4_DEV *dev); -+ -+static int elan4_open (struct inode *inode, struct file *file); -+static int elan4_release(struct inode *inode, struct file *file); -+static int elan4_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg); -+static int elan4_mmap (struct file *file, struct vm_area_struct *vm_area); -+ -+static irqreturn_t elan4_irq (int irq, void *arg, struct pt_regs *regs); -+ -+static void elan4_shutdown_devices(int panicing); -+ -+static int disabled; /* bitmask of which devices not to start */ -+unsigned int elan4_pll_cfg = 0; -+int elan4_pll_div = 31; /* RevC PCB */ -+int elan4_mod45disable = 0; -+static int optimise_pci_bus = 1; /* 0 => don't, 1 => if ok, 2 => always */ -+static int default_features = 0; /* default values for dev_features */ -+ -+long long sdram_cfg = SDRAM_STARTUP_VALUE; -+static int sdram_cfg_lo; -+static int sdram_cfg_hi; -+int sdram_bank_limit; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan 4 Device Driver"); -+MODULE_LICENSE("GPL"); -+ -+MODULE_PARM(elan4_debug, "i"); -+MODULE_PARM(elan4_debug_toconsole, "i"); -+MODULE_PARM(elan4_debug_tobuffer, "i"); -+MODULE_PARM(elan4_debug_mmu, "i"); -+MODULE_PARM(elan4_pll_cfg, "i"); -+MODULE_PARM(elan4_pll_div, "i"); -+MODULE_PARM(elan4_mod45disable, "i"); -+MODULE_PARM(optimise_pci_bus, "i"); -+MODULE_PARM(default_features, "i"); -+ -+MODULE_PARM(disabled, "i"); -+MODULE_PARM(sdram_cfg_lo, "i"); -+MODULE_PARM(sdram_cfg_hi, "i"); -+MODULE_PARM(sdram_bank_limit, "i"); -+ -+MODULE_PARM(elan4_hash_0_size_val, "i"); -+MODULE_PARM(elan4_hash_1_size_val, "i"); -+MODULE_PARM(elan4_ctxt_table_shift, "i"); -+MODULE_PARM(elan4_ln2_max_cqs, "i"); -+MODULE_PARM(elan4_dmaq_highpri_size, "i"); -+MODULE_PARM(elan4_threadq_highpri_size, "i"); -+MODULE_PARM(elan4_dmaq_lowpri_size, "i"); -+MODULE_PARM(elan4_threadq_lowpri_size, "i"); -+MODULE_PARM(elan4_interruptq_size, "i"); -+ -+MODULE_PARM(elan4_mainint_punt_loops, "i"); -+MODULE_PARM(elan4_mainint_resched_ticks, "i"); -+ -+MODULE_PARM(user_p2p_route_options, "i"); -+MODULE_PARM(user_bcast_route_options, "i"); -+MODULE_PARM(user_dproc_retry_count, "i"); -+MODULE_PARM(user_cproc_retry_count, "i"); -+ -+/* -+ * Standard device entry points. -+ */ -+static struct file_operations elan4_fops = { -+ ioctl: elan4_ioctl, -+ mmap: elan4_mmap, -+ open: elan4_open, -+ release: elan4_release, -+}; -+ -+ELAN4_DEV *elan4_devices[ELAN4_MAX_CONTROLLER]; -+ -+#if defined(CONFIG_DEVFS_FS) -+static devfs_handle_t devfs_handle; -+#endif -+ -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+elan4_ioctl32_cmds[] = -+{ /* /dev/elan/control */ -+ ELAN4IO_DEVINFO, -+ ELAN4IO_GET_POSITION, -+ ELAN4IO_SET_POSITION, -+ ELAN4IO_GET_PARAMS, -+ ELAN4IO_SET_PARAMS, -+ -+ /* /dev/elan4/user */ -+ ELAN4IO_POSITION, -+ ELAN4IO_FREE, -+ ELAN4IO_ATTACH, -+ ELAN4IO_DETACH, -+ ELAN4IO_BLOCK_INPUTTER, -+ -+ ELAN4IO_ADD_P2PVP, -+ ELAN4IO_ADD_BCASTVP, -+ ELAN4IO_REMOVEVP, -+ ELAN4IO_SET_ROUTE, -+ ELAN4IO_RESET_ROUTE, -+ ELAN4IO_GET_ROUTE, -+ ELAN4IO_CHECK_ROUTE, -+ -+ ELAN4IO_ALLOCCQ, -+ ELAN4IO_FREECQ, -+ ELAN4IO_SETPERM32, -+ ELAN4IO_CLRPERM32, -+ ELAN4IO_TRAPSIG, -+ ELAN4IO_TRAPHANDLER32, -+ ELAN4IO_REQUIRED_MAPPINGS, -+ -+ ELAN4IO_RESUME_EPROC_TRAP, -+ ELAN4IO_RESUME_CPROC_TRAP, -+ ELAN4IO_RESUME_DPROC_TRAP, -+ ELAN4IO_RESUME_TPROC_TRAP, -+ ELAN4IO_RESUME_IPROC_TRAP, -+ -+ ELAN4IO_FLUSH_ICACHE, -+ -+ ELAN4IO_STOP_CTXT, -+ -+ ELAN4IO_ALLOC_INTCOOKIE, -+ ELAN4IO_FREE_INTCOOKIE, -+ ELAN4IO_ARM_INTCOOKIE, -+ ELAN4IO_WAIT_INTCOOKIE, -+ -+ ELAN4IO_ALLOC_TRAP_QUEUES, -+ ELAN4IO_NETERR_MSG, -+ ELAN4IO_NETERR_TIMER, -+ ELAN4IO_NETERR_FIXUP, -+ -+ ELAN4IO_DUMPCQ32, -+}; -+ -+static int elan4_ioctl32 (unsigned int fd, unsigned int cmd, -+ unsigned long arg, struct file *file); -+#endif -+ -+/* -+ * Standard device entry points. -+ */ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int -+elan4_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (event == DUMP_BEGIN) -+ elan4_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+static struct notifier_block elan4_dump_notifier = -+{ -+ notifier_call: elan4_dump_event, -+ priority: 0, -+}; -+ -+#endif -+ -+static int -+elan4_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ((event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ elan4_shutdown_devices (0); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block elan4_reboot_notifier = -+{ -+ notifier_call: elan4_reboot_event, -+ priority: 0, -+}; -+ -+static int -+elan4_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ elan4_shutdown_devices (1); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block elan4_panic_notifier = -+{ -+ notifier_call: elan4_panic_event, -+ priority: 0, -+}; -+ -+static int __init -+elan4_init (void) -+{ -+ int err; -+ struct pci_dev *pdev; -+ int count; -+#if defined(__ia64) -+ int seenRevA = 0; -+#endif -+ -+ if ((err = register_chrdev (ELAN4_MAJOR, ELAN4_NAME, &elan4_fops)) < 0) -+ return (err); -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_handle = devfs_mk_dir (NULL, "elan4", NULL); -+#endif -+ -+ intcookie_init(); -+ elan4_debug_init(); -+ elan4_procfs_init(); -+ -+#ifdef CONFIG_MPSAS -+ sas_init(); -+#endif -+ -+ if (sdram_cfg_lo != 0 && sdram_cfg_hi != 0) -+ sdram_cfg = (((unsigned long long) sdram_cfg_hi) << 32) | ((unsigned long long) sdram_cfg_lo); -+ -+ for (count = 0, pdev = NULL; (pdev = pci_find_device(PCI_VENDOR_ID_QUADRICS, PCI_DEVICE_ID_ELAN4, pdev)) != NULL ; count++) -+ { -+#if defined(__ia64) -+ unsigned char revid; -+ -+ pci_read_config_byte (pdev, PCI_REVISION_ID, &revid); -+ -+ if (revid == PCI_REVISION_ID_ELAN4_REVA && seenRevA++ != 0 && pci_find_device (PCI_VENDOR_ID_HP, 0x122e, NULL)) -+ { -+ printk ("elan: only a single elan4a supported on rx2600\n"); -+ continue; -+ } -+#endif -+ -+ if (count < ELAN4_MAX_CONTROLLER) -+ elan4_attach_device (count, pdev); -+ } -+ -+ if (count >= ELAN4_MAX_CONTROLLER) -+ printk ("elan: found %d elan4 devices - only support %d\n", count, ELAN4_MAX_CONTROLLER); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); -+ register int i; -+ for (i = 0; i < sizeof (elan4_ioctl32_cmds)/sizeof(elan4_ioctl32_cmds[0]); i++) -+ register_ioctl32_conversion (elan4_ioctl32_cmds[i], elan4_ioctl32); -+ } -+ unlock_kernel(); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&elan4_dump_notifier); -+#endif -+ register_reboot_notifier (&elan4_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_register (&panic_notifier_list, &elan4_panic_notifier); -+#endif -+ -+ return (0); -+} -+ -+#ifdef MODULE -+static void __exit -+elan4_exit (void) -+{ -+ int i; -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern void unregister_ioctl32_conversion(unsigned int cmd); -+ -+ for (i = 0; i < sizeof (elan4_ioctl32_cmds)/sizeof(elan4_ioctl32_cmds[0]); i++) -+ unregister_ioctl32_conversion (elan4_ioctl32_cmds[i]); -+ } -+ unlock_kernel(); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&elan4_dump_notifier); -+#endif -+ unregister_reboot_notifier (&elan4_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_unregister (&panic_notifier_list, &elan4_panic_notifier); -+#endif -+ -+ for (i = 0; i < ELAN4_MAX_CONTROLLER; i++) -+ if (elan4_devices[i] != NULL) -+ elan4_detach_device (elan4_devices[i]); -+ -+ elan4_procfs_fini(); -+ elan4_debug_fini(); -+ intcookie_fini(); -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_unregister (devfs_handle); -+#endif -+ -+ unregister_chrdev(ELAN4_MAJOR, ELAN4_NAME); -+} -+ -+module_init (elan4_init); -+module_exit (elan4_exit); -+ -+#else -+__initcall (elan4_init); -+#endif -+ -+/* -+ * Minor numbers encoded as : -+ * [5:0] device number -+ * [15:6] function number -+ */ -+#define ELAN4_DEVICE_MASK 0x3F -+#define ELAN4_DEVICE(inode) (MINOR((inode)->i_rdev) & ELAN4_DEVICE_MASK) -+ -+#define ELAN4_MINOR_CONTROL 0 -+#define ELAN4_MINOR_MEM 1 -+#define ELAN4_MINOR_USER 2 -+ -+#define ELAN4_MINOR_SHIFT 6 -+#define ELAN4_MINOR(inode) (MINOR((inode)->i_rdev) >> ELAN4_MINOR_SHIFT) -+ -+/* -+ * Called by init_module() for each card discovered on PCI. -+ */ -+static int -+elan4_attach_device (int instance, struct pci_dev *pdev) -+{ -+ ELAN4_DEV *dev; -+ int res; -+ -+ if ((dev = (ELAN4_DEV *) kmalloc (sizeof (ELAN4_DEV), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ memset (dev, 0, sizeof (ELAN4_DEV)); -+ -+ /* setup os dependent section of ELAN4_DEV */ -+ dev->dev_instance = instance; -+ dev->dev_osdep.pdev = pdev; -+ dev->dev_features = default_features; -+ -+ /* initialise the devinfo */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_VENDOR_ID, &dev->dev_devinfo.dev_vendor_id); -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_DEVICE_ID, &dev->dev_devinfo.dev_device_id); -+ pci_read_config_byte (dev->dev_osdep.pdev, PCI_REVISION_ID, &dev->dev_devinfo.dev_revision_id); -+ -+ dev->dev_devinfo.dev_rail = instance; -+ dev->dev_devinfo.dev_driver_version = ELAN4_DRIVER_VERSION; -+ dev->dev_devinfo.dev_num_down_links_value = 0; -+ -+ dev->dev_position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* initialise the data structures and map the device */ -+ if ((res = elan4_initialise_device (dev)) != 0) -+ { -+ kfree (dev); -+ return res; -+ } -+ -+ /* add the interrupt handler */ -+ if (request_irq (pdev->irq, elan4_irq, SA_SHIRQ, "elan4", dev) != 0) -+ { -+ elan4_finalise_device (dev); -+ kfree (dev); -+ return -ENXIO; -+ } -+ -+ if (pci_request_regions(dev->dev_osdep.pdev, "elan4")) -+ { -+ free_irq (dev->dev_osdep.pdev->irq, dev); -+ kfree (dev); -+ return -ENODEV; -+ } -+ -+#if defined(CONFIG_DEVFS_FS) -+ { -+ char name[16]; -+ -+ sprintf (name, "control%d", dev->dev_instance); -+ dev->dev_osdep.devfs_control = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_CONTROL << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR | S_IWUSR, -+ &elan4_fops, NULL); -+ sprintf (name, "sdram%d", dev->dev_instance); -+ dev->dev_osdep.devfs_sdram = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_MEM << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH, -+ &elan4_fops, NULL); -+ sprintf (name, "user%d", dev->dev_instance); -+ dev->dev_osdep.devfs_user = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_USER << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH, -+ &elan4_fops, NULL); -+ } -+#endif -+ -+ /* add the procfs entry */ -+ elan4_procfs_device_init (dev); -+ -+ /* allow the device to be referenced now */ -+ elan4_devices[instance] = dev; -+ -+ if ((disabled & (1 << instance)) == 0) -+ { -+ if (elan4_start_device (dev) != 0) -+ { -+ printk ("elan%d: auto-start of device failed\n", dev->dev_instance); -+ -+ elan4_detach_device (dev); -+ return (-ENXIO); -+ } -+ -+ dev->dev_state = ELAN4_STATE_STARTED; -+ } -+ -+#if defined (__sparc) -+ printk ("elan%d: at pci %s (irq = %s)\n", instance, pdev->slot_name, __irq_itoa(pdev->irq)); -+#else -+ printk ("elan%d: at pci %s (irq = %d)\n", instance, pdev->slot_name, pdev->irq); -+#endif -+ -+ return (0); -+} -+ -+/* -+ * Called by cleanup_module() for each board found on PCI. -+ */ -+static void -+elan4_detach_device (ELAN4_DEV *dev) -+{ -+ /* stop the chip and free of resources */ -+ if (dev->dev_state == ELAN4_STATE_STARTED) -+ elan4_stop_device (dev); -+ -+ elan4_devices[dev->dev_instance] = NULL; -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_unregister (dev->dev_osdep.devfs_control); -+ devfs_unregister (dev->dev_osdep.devfs_sdram); -+ devfs_unregister (dev->dev_osdep.devfs_user); -+#endif -+ -+ /* release the address space */ -+ pci_release_regions (dev->dev_osdep.pdev); -+ -+ /* release the interrupt */ -+ free_irq (dev->dev_osdep.pdev->irq, dev); -+ -+ /* remove the procfs entry */ -+ elan4_procfs_device_fini (dev); -+ -+ /* unmap the device and finalise the data structures */ -+ elan4_finalise_device (dev); -+ -+ kfree (dev); -+} -+ -+/* -+ * Maintain reference counts on the device -+ */ -+ELAN4_DEV * -+elan4_reference_device (int instance, int state) -+{ -+ ELAN4_DEV *dev = elan4_devices[instance]; -+ -+ if (dev == NULL) -+ return (NULL); -+ -+ kmutex_lock (&dev->dev_lock); -+ -+ if ((dev->dev_state & state) == 0) -+ { -+ kmutex_unlock (&dev->dev_lock); -+ return (NULL); -+ } -+ -+ dev->dev_references++; -+ kmutex_unlock (&dev->dev_lock); -+ -+#ifdef MODULE -+ MOD_INC_USE_COUNT; -+#endif -+ -+#ifdef CONFIG_MPSAS -+ sas_set_position(dev); -+#endif -+ -+ return (dev); -+} -+ -+void -+elan4_dereference_device (ELAN4_DEV *dev) -+{ -+ kmutex_lock (&dev->dev_lock); -+ dev->dev_references--; -+ kmutex_unlock (&dev->dev_lock); -+ -+#ifdef MODULE -+ MOD_DEC_USE_COUNT; -+#endif -+} -+ -+static void -+elan4_shutdown_devices(int panicing) -+{ -+ ELAN4_DEV *dev; -+ unsigned long flags; -+ register int i; -+ -+ local_irq_save (flags); -+ for (i = 0; i < ELAN4_MAX_CONTROLLER; i++) -+ { -+ if ((dev = elan4_devices[i]) != NULL) -+ { -+ printk(KERN_INFO "elan%d: forcing link into reset\n", dev->dev_instance); -+ -+ /* set the inputters to discard everything */ -+ if (! panicing) spin_lock (&dev->dev_haltop_lock); -+ -+ if (dev->dev_discard_lowpri_count++ == 0) -+ elan4_set_schedstatus (dev, 0); -+ if (dev->dev_discard_highpri_count++ == 0) -+ elan4_set_schedstatus (dev, 0); -+ -+ if (! panicing) spin_unlock (&dev->dev_haltop_lock); -+ -+ /* ideally we'd like to halt all the outputters too, -+ * however this will prevent the kernel comms flushing -+ * to work correctly ..... -+ */ -+ } -+ } -+ local_irq_restore (flags); -+} -+ -+/* -+ * /dev/elan4/controlX - control device -+ * -+ */ -+static int -+control_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STOPPED | ELAN4_STATE_STARTED); -+ CONTROL_PRIVATE *pr; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ if ((pr = (CONTROL_PRIVATE *) kmalloc (sizeof (CONTROL_PRIVATE), GFP_KERNEL)) == NULL) -+ { -+ elan4_dereference_device (dev); -+ -+ return (-ENOMEM); -+ } -+ -+ PRINTF (DBG_USER, DBG_FILE, "control_open: dev=%p pr=%p\n", dev, pr); -+ -+ pr->pr_dev = dev; -+ pr->pr_boundary_scan = 0; -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+control_release (struct inode *inode, struct file *file) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ ELAN4_DEV *dev = pr->pr_dev; -+ -+ PRINTF (DBG_DEVICE, DBG_FILE, "control_release: pr=%p\n", pr); -+ -+ //if (pr->pr_boundary_scan) -+ // elan4_clear_boundary_scan (dev, pr); -+ -+ elan4_dereference_device (dev); -+ -+ kfree (pr); -+ -+ return (0); -+} -+ -+static int -+control_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_FILE, "control_ioctl: cmd=%x arg=%lx\n", cmd, arg); -+ -+ switch (cmd) -+ { -+ case ELAN4IO_DEVINFO: -+ if (copy_to_user ((void *) arg, &pr->pr_dev->dev_devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ -+ case ELAN4IO_GET_POSITION: -+ { -+ ELAN_POSITION pos; -+ -+ elan4_get_position (pr->pr_dev, &pos); -+ -+ if (copy_to_user ((void *) arg, &pos, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SET_POSITION: -+ { -+ ELAN_POSITION pos; -+ -+ if (copy_from_user (&pos, (void *) arg, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ -+ return (elan4_set_position (pr->pr_dev, &pos)); -+ } -+ -+ case ELAN4IO_OLD_GET_PARAMS: -+ { -+ ELAN_PARAMS params; -+ unsigned short mask; -+ -+ elan4_get_params (pr->pr_dev, ¶ms, &mask); -+ -+ if (copy_to_user ((void *) arg, ¶ms, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_OLD_SET_PARAMS: -+ { -+ ELAN_PARAMS params; -+ -+ if (copy_from_user (¶ms, (void *) arg, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ elan4_set_params (pr->pr_dev, ¶ms, 3); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SET_PARAMS: -+ { -+ ELAN4IO_PARAMS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PARAMS_STRUCT))) -+ return (-EFAULT); -+ -+ elan4_set_params (pr->pr_dev, &args.p_params, args.p_mask); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_GET_PARAMS: -+ { -+ ELAN4IO_PARAMS_STRUCT args; -+ -+ elan4_get_params (pr->pr_dev, &args.p_params, &args.p_mask); -+ -+ if (copy_to_user ((void *) arg, &args, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ } -+ -+ return (-EINVAL); -+} -+ -+static int -+control_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ unsigned bar = OFF_TO_BAR (vma->vm_pgoff << PAGE_SHIFT); -+ unsigned long off = OFF_TO_OFFSET (vma->vm_pgoff << PAGE_SHIFT); -+ long len = vma->vm_end - vma->vm_start; -+ -+ PRINTF (DBG_USER, DBG_FILE, "control_mmap: pr=%p bar=%x off=%x\n", pr, bar, off); -+ -+ /* check bar number and translate the standard psuedo bars */ -+ switch (bar) -+ { -+ case ELAN4_BAR_SDRAM: -+ case ELAN4_BAR_REGISTERS: -+ break; -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ if (off < 0 || (off + len) > pci_resource_len (pr->pr_dev->dev_osdep.pdev, bar)) -+ return (-EINVAL); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (__io_remap_page_range (vma->vm_start, pci_resource_start (pr->pr_dev->dev_osdep.pdev, bar) + off, len, vma->vm_page_prot)) -+ return (-EAGAIN); -+ -+ return (0); -+} -+ -+/* -+ * /dev/elan4/sdramX - sdram access device -+ */ -+static void -+mem_freepage (MEM_PRIVATE *pr, MEM_PAGE *pg) -+{ -+ PRINTF (DBG_USER, DBG_MEM, "mem_freepage: pr=%p pgoff=%lx pg=%p ref=%d\n", pr, pg->pg_pgoff, pg, pg->pg_ref); -+ -+ elan4_sdram_free (pr->pr_dev, pg->pg_addr, SDRAM_PAGE_SIZE); -+ kfree (pg); -+} -+ -+static MEM_PAGE * -+mem_getpage (MEM_PRIVATE *pr, unsigned long pgoff) -+{ -+ int hashval = MEM_HASH (pgoff); -+ MEM_PAGE *npg = NULL; -+ MEM_PAGE *pg; -+ -+ ASSERT ((pgoff & SDRAM_PGOFF_OFFSET) == 0); -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_getpage: pr=%p pgoff=%lx\n", pr, pgoff); -+ -+ again: -+ spin_lock (&pr->pr_lock); -+ for (pg = pr->pr_pages[hashval]; pg; pg = pg->pg_next) -+ if (pg->pg_pgoff == pgoff) -+ break; -+ -+ if (pg != NULL) -+ { -+ PRINTF (DBG_USER, DBG_MEM, "mem_getpage: pr=%p pgoff=%lx -> found %p addr=%x\n", pr, pgoff, pg, pg->pg_addr); -+ -+ pg->pg_ref++; -+ spin_unlock (&pr->pr_lock); -+ -+ if (npg != NULL) /* we'd raced and someone else had created */ -+ mem_freepage (pr, npg); /* this page - so free of our new one*/ -+ return (pg); -+ } -+ -+ if (npg != NULL) /* didn't find the page, so inset the */ -+ { /* new one we've just created */ -+ npg->pg_next = pr->pr_pages[hashval]; -+ pr->pr_pages[hashval] = npg; -+ -+ spin_unlock (&pr->pr_lock); -+ return (npg); -+ } -+ -+ spin_unlock (&pr->pr_lock); /* drop spinlock before creating a new page */ -+ -+ if ((npg = (MEM_PAGE *) kmalloc (sizeof (MEM_PAGE), GFP_KERNEL)) == NULL) -+ return (NULL); -+ -+ if ((npg->pg_addr = elan4_sdram_alloc (pr->pr_dev, SDRAM_PAGE_SIZE)) == 0) -+ { -+ kfree (npg); -+ return (NULL); -+ } -+ -+#ifndef CONFIG_MPSAS -+ /* zero the page before returning it to the user */ -+ elan4_sdram_zeroq_sdram (pr->pr_dev, npg->pg_addr, SDRAM_PAGE_SIZE); -+#endif -+ -+ npg->pg_pgoff = pgoff; -+ npg->pg_ref = 1; -+ -+ /* created a new page - so have to rescan before inserting it */ -+ goto again; -+} -+ -+static void -+mem_droppage (MEM_PRIVATE *pr, unsigned long pgoff, int dontfree) -+{ -+ MEM_PAGE **ppg; -+ MEM_PAGE *pg; -+ -+ spin_lock (&pr->pr_lock); -+ for (ppg = &pr->pr_pages[MEM_HASH(pgoff)]; *ppg; ppg = &(*ppg)->pg_next) -+ if ((*ppg)->pg_pgoff == pgoff) -+ break; -+ -+ pg = *ppg; -+ -+ ASSERT (*ppg != NULL); -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_droppage: pr=%p pgoff=%lx pg=%p ref=%d dontfree=%d\n", pr, pgoff, (*ppg), (*ppg)->pg_ref, dontfree); -+ -+ if (--pg->pg_ref == 0 && !dontfree) -+ { -+ *ppg = pg->pg_next; -+ -+ mem_freepage (pr, pg); -+ } -+ -+ spin_unlock (&pr->pr_lock); -+} -+ -+static int -+mem_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STARTED); -+ MEM_PRIVATE *pr; -+ register int i; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ if ((pr = (MEM_PRIVATE *) kmalloc (sizeof (MEM_PRIVATE), GFP_KERNEL)) == NULL) -+ { -+ elan4_dereference_device (dev); -+ return (-ENOMEM); -+ } -+ -+ spin_lock_init (&pr->pr_lock); -+ pr->pr_dev = dev; -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ pr->pr_pages[i] = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+mem_release (struct inode *node, struct file *file) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg, *next; -+ int i; -+ -+ /* free off any pages that we'd allocated */ -+ spin_lock (&pr->pr_lock); -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ { -+ for (pg = pr->pr_pages[i]; pg; pg = next) -+ { -+ next = pg->pg_next; -+ mem_freepage (pr, pg); -+ } -+ } -+ spin_unlock (&pr->pr_lock); -+ -+ elan4_dereference_device (pr->pr_dev); -+ kfree (pr); -+ -+ return (0); -+} -+ -+static int -+mem_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ return (-EINVAL); -+} -+ -+static void -+mem_vma_open (struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_getpage (pr, pgoff & SDRAM_PGOFF_MASK); -+} -+ -+static void -+mem_vma_close (struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the call to close may not have the same vm_start/vm_end values as -+ * were passed into mmap()/open() - since if an partial unmap had occurred -+ * then the vma could have been shrunk or even split. -+ * -+ * if a the vma is split then an vma_open() will be called for the top -+ * portion - thus causing the reference counts to become incorrect. -+ * -+ * We drop the reference to any pages we're notified about - so they get freed -+ * earlier than when the device is finally released. -+ */ -+ for (pgoff = vma->vm_pgoff, addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); -+} -+ -+struct vm_operations_struct mem_vm_ops = { -+ open: mem_vma_open, -+ close: mem_vma_close, -+}; -+ -+static int -+mem_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_mmap: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, file); -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ if ((pg = mem_getpage (pr, pgoff & SDRAM_PGOFF_MASK)) == NULL) -+ goto failed; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_mmap: addr %lx -> pg=%p sdram=%x+%x bar=%lx\n", -+ addr, pg, pg->pg_addr, (pgoff & SDRAM_PGOFF_OFFSET) * PAGE_SIZE, -+ pci_resource_start (pr->pr_dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (! (pr->pr_dev->dev_features & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ -+ if (__io_remap_page_range (addr, pci_resource_start (pr->pr_dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + -+ pg->pg_addr + (pgoff & SDRAM_PGOFF_OFFSET) * PAGE_SIZE, PAGE_SIZE, vma->vm_page_prot)) -+ { -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); /* drop our reference to this page */ -+ goto failed; -+ } -+ -+#if defined(conditional_schedule) -+ conditional_schedule(); -+#endif -+ } -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ vma->vm_ops = &mem_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+ -+ failed: -+ /* free of any pages we've already allocated/referenced */ -+ while (pgoff-- > vma->vm_pgoff) -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); -+ -+ return (-ENOMEM); -+} -+ -+/* -+ * /dev/elan4/userX - control device -+ * -+ */ -+static inline void -+user_private_free (USER_PRIVATE *pr) -+{ -+ ELAN4_DEV *dev = pr->pr_uctx->uctx_ctxt.ctxt_dev; -+ -+ ASSERT (atomic_read (&pr->pr_ref) == 0); -+ -+ user_free (pr->pr_uctx); -+ kfree (pr); -+ -+ elan4_dereference_device (dev); -+} -+ -+static void -+user_coproc_release (void *arg, struct mm_struct *mm) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_release: ref=%d\n", atomic_read (&pr->pr_ref)); -+ -+ elan4mmu_invalidate_ctxt (&pr->pr_uctx->uctx_ctxt); -+ -+ pr->pr_mm = NULL; -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_private_free (pr); -+} -+ -+static void -+user_coproc_sync_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ /* XXXX: this is intended to sync the modified bit from our page tables, -+ * into the main cpu's modified bits - however since we do not -+ * syncronize our modified bit on a coproc_invalidate_page() call, -+ * then it could get lost if we modify the page after the last -+ * modification and writepage has occurred. Hence we invalidate -+ * all translations and allow it to refault. -+ */ -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_coproc_invalidate_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_coproc_update_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_update_range: start=%lx end=%lx\n", start, end); -+ -+#if defined(CONFIG_MPSAS) -+ if (((end - start) >> PAGE_SHIFT) > 16) -+ return; -+#endif -+ -+ user_update_main (pr->pr_uctx, mm, start, end - start); -+} -+ -+static void -+user_coproc_change_protection (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_coproc_sync_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_sync_page: addr=%lx\n", addr); -+ -+ user_unload_main (pr->pr_uctx, addr & PAGE_MASK, PAGE_SIZE); -+} -+ -+static void -+user_coproc_invalidate_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_invalidate_page: addr=%lx\n", addr); -+ -+ user_unload_main (pr->pr_uctx, addr & PAGE_MASK, PAGE_SIZE); -+} -+ -+static void -+user_coproc_update_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_coproc_update_page: addr=%lx\n", addr); -+ -+ user_update_main (pr->pr_uctx, vma->vm_mm, addr & PAGE_MASK, PAGE_SIZE); -+} -+ -+static int -+user_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev; -+ USER_PRIVATE *pr; -+ USER_CTXT *uctx; -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_open: mm=%p users=%d count=%d\n", current->mm, -+ atomic_read (¤t->mm->mm_users), atomic_read (¤t->mm->mm_count)); -+ -+ if ((dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STARTED)) == NULL) -+ return (-ENXIO); -+ -+ if ((pr = (USER_PRIVATE *) kmalloc (sizeof (USER_PRIVATE), GFP_KERNEL)) == NULL) -+ { -+ elan4_dereference_device (dev); -+ return (-ENOMEM); -+ } -+ -+ uctx = user_alloc (dev); -+ -+ if (IS_ERR(uctx)) -+ { -+ elan4_dereference_device (dev); -+ kfree (pr); -+ -+ return PTR_ERR(uctx); -+ } -+ -+ /* initialise refcnt to 2 - one for "file", one for the coproc ops */ -+ atomic_set (&pr->pr_ref, 2); -+ -+ pr->pr_uctx = uctx; -+ pr->pr_mm = current->mm; -+ -+ /* register a coproc callback to notify us of translation changes */ -+ pr->pr_coproc.arg = (void *) pr; -+ pr->pr_coproc.release = user_coproc_release; -+ pr->pr_coproc.sync_range = user_coproc_sync_range; -+ pr->pr_coproc.invalidate_range = user_coproc_invalidate_range; -+ pr->pr_coproc.update_range = user_coproc_update_range; -+ pr->pr_coproc.change_protection = user_coproc_change_protection; -+ pr->pr_coproc.sync_page = user_coproc_sync_page; -+ pr->pr_coproc.invalidate_page = user_coproc_invalidate_page; -+ pr->pr_coproc.update_page = user_coproc_update_page; -+ -+ spin_lock (¤t->mm->page_table_lock); -+ register_coproc_ops (current->mm, &pr->pr_coproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+user_release (struct inode *inode, struct file *file) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ -+ PRINTF (pr->pr_uctx, DBG_FILE, "user_release: ref=%d\n", atomic_read (&pr->pr_ref)); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_private_free (pr); -+ -+ return (0); -+} -+ -+static int -+user_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ int res = 0; -+ -+ PRINTF (uctx, DBG_FILE, "user_ioctl: cmd=%x arg=%lx\n", cmd, arg); -+ -+ if (current->mm != pr->pr_mm) -+ return (-EINVAL); -+ -+ switch (cmd) -+ { -+ case ELAN4IO_DEVINFO: -+ if (copy_to_user ((void *) arg, &uctx->uctx_ctxt.ctxt_dev->dev_devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ -+ case ELAN4IO_POSITION: -+ { -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ -+ if (copy_to_user ((void *) arg, &dev->dev_position, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN4IO_FREE: -+ spin_lock (¤t->mm->page_table_lock); -+ if (pr->pr_mm != current->mm) -+ spin_unlock (¤t->mm->page_table_lock); -+ else -+ { -+ unregister_coproc_ops (current->mm, &pr->pr_coproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ user_coproc_release (pr, current->mm); -+ } -+ return (0); -+ -+ case ELAN4IO_ATTACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ if ((cap = kmalloc (sizeof (ELAN_CAPABILITY), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else if ((res = user_attach (uctx, cap)) == 0 && -+ copy_to_user ((void *) arg, cap, sizeof (ELAN_CAPABILITY))) -+ { -+ user_detach (uctx, cap); -+ res = -EFAULT; -+ } -+ -+ kfree (cap); -+ return (res); -+ } -+ -+ case ELAN4IO_DETACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ if ((cap = kmalloc (sizeof (ELAN_CAPABILITY), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else -+ user_detach (uctx, cap); -+ -+ kfree (cap); -+ return (res); -+ } -+ -+ case ELAN4IO_BLOCK_INPUTTER: -+ user_block_inputter (uctx, arg); -+ return (0); -+ -+ case ELAN4IO_ADD_P2PVP: -+ { -+ ELAN4IO_ADD_P2PVP_STRUCT *args; -+ -+ if ((args = kmalloc (sizeof (ELAN4IO_ADD_P2PVP_STRUCT), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN4IO_ADD_P2PVP_STRUCT))) -+ res = -EFAULT; -+ else -+ res = user_add_p2pvp (uctx, args->vp_process, &args->vp_capability); -+ -+ kfree (args); -+ return (res); -+ } -+ -+ case ELAN4IO_ADD_BCASTVP: -+ { -+ ELAN4IO_ADD_BCASTVP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ADD_BCASTVP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_add_bcastvp (uctx, args.vp_process, args.vp_lowvp, args.vp_highvp)); -+ } -+ -+ case ELAN4IO_REMOVEVP: -+ return (user_removevp (uctx, arg)); -+ -+ case ELAN4IO_SET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_set_route (uctx, args.rt_process, &args.rt_route)); -+ } -+ -+ case ELAN4IO_RESET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_reset_route (uctx, args.rt_process)); -+ } -+ -+ case ELAN4IO_GET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = user_get_route (uctx, args.rt_process, &args.rt_route)) == 0 && -+ copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ res = -EFAULT; -+ -+ return (res); -+ } -+ -+ case ELAN4IO_CHECK_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = user_check_route (uctx, args.rt_process, &args.rt_route, &args.rt_error)) == 0 && -+ copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ res = -EFAULT; -+ -+ return (res); -+ } -+ -+ case ELAN4IO_ALLOCCQ: -+ { -+ ELAN4IO_ALLOCCQ_STRUCT args; -+ USER_CQ *ucq; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ALLOCCQ_STRUCT))) -+ return (-EFAULT); -+ -+ ucq = user_alloccq (uctx, args.cq_size & CQ_SizeMask, args.cq_perm & CQ_PermissionMask, -+ (args.cq_type & ELAN4IO_CQ_TYPE_REORDER) ? UCQ_REORDER : 0); -+ if (IS_ERR (ucq)) -+ return PTR_ERR (ucq); -+ -+ args.cq_indx = elan4_cq2idx (ucq->ucq_cq); -+ -+ if (copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ALLOCCQ_STRUCT))) -+ { -+ user_dropcq (uctx, ucq); -+ return (-EFAULT); -+ } -+ -+ /* don't drop the reference on the cq until the context is freed, -+ * or the caller explicitly frees the cq */ -+ return (0); -+ } -+ -+ case ELAN4IO_FREECQ: -+ { -+ USER_CQ *ucq; -+ unsigned indx; -+ -+ if (copy_from_user (&indx, (void *) arg, sizeof (unsigned))) -+ return (-EFAULT); -+ -+ if ((ucq = user_findcq (uctx, indx)) == NULL) /* can't free unallocated cq */ -+ return (-EINVAL); -+ -+ user_dropcq (uctx, ucq); /* drop the reference we've just taken */ -+ -+ if ((ucq->ucq_flags & UCQ_SYSTEM)) /* can't free device driver cq */ -+ return (-EINVAL); -+ -+ user_dropcq (uctx, ucq); /* and the one held from the alloccq call */ -+ -+ return (0); -+ } -+ -+ case ELAN4IO_DUMPCQ: -+ { -+ ELAN4IO_DUMPCQ_STRUCT args; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CQ *ucq; -+ void *buf; -+ int i; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof(ELAN4IO_DUMPCQ_STRUCT))) -+ return (-EFAULT); -+ -+ if ((ucq = user_findcq (uctx, args.cq_indx)) == NULL) -+ return (-EINVAL); -+ -+ if (args.bufsize) -+ { -+ E4_uint32 usedBufSize = min(args.cq_size, args.bufsize); -+ -+ KMEM_ALLOC (buf, void *, usedBufSize, 0); -+ -+ if (buf == NULL) -+ return (-ENOMEM); -+ -+ for (i=0; iucq_cq->cq_space + i); -+ -+ if (copy_to_user((void *)args.buffer, buf, usedBufSize)) -+ { -+ KMEM_FREE(buf, args.bufsize); -+ return (-EFAULT); -+ } -+ KMEM_FREE(buf, usedBufSize); -+ args.bufsize = usedBufSize; -+ } -+ -+ args.cq_size = CQ_Size(ucq->ucq_cq->cq_size); -+ args.cq_space = ucq->ucq_cq->cq_space; -+ -+ -+ if (copy_to_user((void *)arg, &args, sizeof(ELAN4IO_DUMPCQ_STRUCT))) -+ { -+ return (-EFAULT); -+ } -+ -+ user_dropcq (uctx, ucq); /* drop the reference we've just taken */ -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SETPERM: -+ { -+ ELAN4IO_PERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_setperm (uctx, args.ps_maddr, args.ps_eaddr, args.ps_len, args.ps_perm)); -+ } -+ -+ case ELAN4IO_CLRPERM: -+ { -+ ELAN4IO_PERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT))) -+ return (-EFAULT); -+ -+ user_clrperm (uctx, args.ps_eaddr, args.ps_len); -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPSIG: -+ { -+ ELAN4IO_TRAPSIG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPSIG_STRUCT))) -+ return (-EFAULT); -+ -+ pr->pr_uctx->uctx_trap_pid = current->pid; -+ pr->pr_uctx->uctx_trap_signo = args.ts_signo; -+ -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPHANDLER: -+ { -+ ELAN4IO_TRAPHANDLER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPHANDLER_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_trap_handler (pr->pr_uctx, (ELAN4_USER_TRAP *)args.th_trapp, args.th_nticks)); -+ } -+ -+ case ELAN4IO_REQUIRED_MAPPINGS: -+ { -+ ELAN4IO_REQUIRED_MAPPINGS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_REQUIRED_MAPPINGS_STRUCT))) -+ return (-EFAULT); -+ -+ pr->pr_uctx->uctx_upage_addr = args.rm_upage_addr; -+ pr->pr_uctx->uctx_trestart_addr = args.rm_trestart_addr; -+ -+ return (0); -+ } -+ -+ case ELAN4IO_ALLOC_TRAP_QUEUES: -+ { -+ ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_alloc_trap_queues (uctx, args.tq_ndproc_traps, args.tq_neproc_traps, -+ args.tq_ntproc_traps, args.tq_nthreads, args.tq_ndmas)); -+ } -+ -+ case ELAN4IO_RESUME_EPROC_TRAP: -+ { -+ ELAN4IO_RESUME_EPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_EPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_eproc_trap (pr->pr_uctx, args.rs_addr)); -+ } -+ -+ case ELAN4IO_RESUME_CPROC_TRAP: -+ { -+ ELAN4IO_RESUME_CPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_CPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_cproc_trap (pr->pr_uctx, args.rs_indx)); -+ } -+ -+ case ELAN4IO_RESUME_DPROC_TRAP: -+ { -+ ELAN4IO_RESUME_DPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_DPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_dproc_trap (pr->pr_uctx, &args.rs_desc)); -+ } -+ -+ case ELAN4IO_RESUME_TPROC_TRAP: -+ { -+ ELAN4IO_RESUME_TPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_TPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_tproc_trap (pr->pr_uctx, &args.rs_regs)); -+ } -+ -+ case ELAN4IO_RESUME_IPROC_TRAP: -+ { -+ ELAN4IO_RESUME_IPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_IPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_iproc_trap (pr->pr_uctx, args.rs_channel, args.rs_trans, -+ &args.rs_header, &args.rs_data)); -+ } -+ -+ case ELAN4IO_FLUSH_ICACHE: -+ elan4_flush_icache (&uctx->uctx_ctxt); -+ return (0); -+ -+ case ELAN4IO_STOP_CTXT: -+ if (arg) -+ user_swapout (uctx, UCTX_USER_STOPPED); -+ else -+ user_swapin (uctx, UCTX_USER_STOPPED); -+ return (0); -+ -+ case ELAN4IO_ALLOC_INTCOOKIE_TABLE: -+ { -+ ELAN_CAPABILITY *cap; -+ INTCOOKIE_TABLE *tbl; -+ -+ if ((cap = kmalloc (sizeof (ELAN_CAPABILITY), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else -+ { -+ tbl = intcookie_alloc_table(cap); -+ -+ if (tbl == NULL) -+ res = -ENOMEM; -+ else -+ { -+ /* Install the intcookie table we've just created */ -+ spin_lock (&uctx->uctx_spinlock); -+ if (uctx->uctx_intcookie_table != NULL) -+ res = -EBUSY; -+ else -+ uctx->uctx_intcookie_table = tbl; -+ spin_unlock (&uctx->uctx_spinlock); -+ -+ /* drop the table we created if there already was one */ -+ if (res != 0) -+ intcookie_free_table (tbl); -+ } -+ } -+ -+ kfree (cap); -+ -+ return (res); -+ } -+ -+ case ELAN4IO_FREE_INTCOOKIE_TABLE: -+ { -+ INTCOOKIE_TABLE *tbl; -+ -+ spin_lock (&uctx->uctx_spinlock); -+ tbl = uctx->uctx_intcookie_table; -+ uctx->uctx_intcookie_table = NULL; -+ spin_unlock (&uctx->uctx_spinlock); -+ -+ if (tbl != NULL) -+ intcookie_free_table (tbl); -+ -+ return (tbl == NULL ? -EINVAL : 0); -+ } -+ -+ case ELAN4IO_ALLOC_INTCOOKIE: -+ { -+ /* For backwards compatibility with the old libs (pre 1.8.0) -+ * we allocate an intcookie table on the first cookie -+ * alloc if one hasn't be created already -+ */ -+ if (uctx->uctx_intcookie_table == NULL) -+ { -+ ELAN_CAPABILITY *cap; -+ INTCOOKIE_TABLE *tbl; -+ -+ if ((cap = kmalloc (sizeof (ELAN_CAPABILITY), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ /* Create a dummy capability */ -+ elan_nullcap(cap); -+ -+ /* Must be unique for each process on a node */ -+ cap->cap_mycontext = (int) ELAN4_TASK_HANDLE(); -+ -+ /* Create a new intcookie table */ -+ tbl = intcookie_alloc_table(cap); -+ -+ /* Hang intcookie table off uctx */ -+ spin_lock (&uctx->uctx_spinlock); -+ if (uctx->uctx_intcookie_table == NULL) -+ { -+ uctx->uctx_intcookie_table = tbl; -+ spin_unlock (&uctx->uctx_spinlock); -+ } -+ else -+ { -+ spin_unlock (&uctx->uctx_spinlock); -+ intcookie_free_table(tbl); -+ } -+ -+ kfree(cap); -+ } -+ -+ return (intcookie_alloc (uctx->uctx_intcookie_table, arg)); -+ } -+ -+ case ELAN4IO_FREE_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_free (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_ARM_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_arm (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_WAIT_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_wait (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_FIRE_INTCOOKIE: -+ { -+ ELAN4IO_FIRECAP_STRUCT *args; -+ -+ if ((args = kmalloc (sizeof (ELAN4IO_FIRECAP_STRUCT), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN4IO_FIRECAP_STRUCT))) -+ res = -EFAULT; -+ else -+ res = intcookie_fire_cap (&args->fc_capability, args->fc_cookie); -+ -+ kfree (args); -+ -+ return (res); -+ } -+ -+ case ELAN4IO_NETERR_MSG: -+ { -+ ELAN4IO_NETERR_MSG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_NETERR_MSG_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_send_neterr_msg (uctx, args.nm_vp, args.nm_nctx, args.nm_retries, &args.nm_msg)); -+ } -+ -+ case ELAN4IO_NETERR_TIMER: -+ { -+ unsigned long ticks = ((unsigned long) arg * HZ) / 1000; -+ -+ PRINTF (uctx, DBG_NETERR, "elan4_neterr_timer: arg %ld inc %ld\n", arg, ticks); -+ -+ mod_timer (&uctx->uctx_neterr_timer, (jiffies + (ticks > 0 ? ticks : 1))); -+ return 0; -+ } -+ -+ case ELAN4IO_NETERR_FIXUP: -+ { -+ ELAN4IO_NETERR_FIXUP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_NETERR_FIXUP_STRUCT))) -+ return (-EFAULT); -+ -+ if (args.nf_sten) -+ return (user_neterr_sten (uctx, args.nf_vp, args.nf_cookie, args.nf_waitforeop)); -+ else -+ return (user_neterr_dma (uctx, args.nf_vp, args.nf_cookie, args.nf_waitforeop)); -+ } -+ default: -+ PRINTF (uctx, DBG_FILE, "user_ioctl: invalid ioctl %x\n", cmd); -+ return (-EINVAL); -+ } -+} -+ -+static void -+user_vma_open (struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (uctx, DBG_FILE, "user_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ elan4_getcqa (&uctx->uctx_ctxt, pgoff); -+} -+ -+static void -+user_vma_close (struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (uctx, DBG_FILE, "user_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the same comments apply as mem_vma_close */ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ if (elan4_getcqa (&uctx->uctx_ctxt, pgoff) != NULL) -+ { -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* drop the reference we've just taken */ -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* and the one held by the mmap */ -+ } -+} -+ -+struct vm_operations_struct user_vm_ops = { -+ open: user_vma_open, -+ close: user_vma_close, -+}; -+ -+static int -+user_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN4_CQA *cqa; -+ unsigned long addr; -+ unsigned long pgoff; -+ int res; -+ ioaddr_t ioaddr; -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ switch (pgoff) -+ { -+ default: -+ PRINTF (uctx, DBG_FILE, "user_mmap: command queue %ld mapping at %lx\n", pgoff, addr); -+ -+ if ((cqa = elan4_getcqa (&uctx->uctx_ctxt, pgoff)) == NULL) -+ { -+ res = -EINVAL; -+ goto failed; -+ } -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: cqa=%p idx=%d num=%d ref=%d\n", cqa, cqa->cqa_idx, cqa->cqa_cqnum, cqa->cqa_ref); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (! (dev->dev_features & ELAN4_FEATURE_NO_WRITE_COMBINE) && (cqa->cqa_type & CQ_Reorder) != 0) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (%lx, %lx, %lx, %lx)\n", -+ addr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (cqa->cqa_cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize, PAGE_SIZE, -+ vma->vm_page_prot); -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ if (__io_remap_page_range (addr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (cqa->cqa_cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize, -+ PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range failed\n"); -+ -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_USER_REGS: -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ ioaddr = pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + ELAN4_REVA_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ ioaddr = pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + ELAN4_REVB_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ default: -+ res = -EINVAL; -+ goto failed; -+ } -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: user_regs at %lx ioaddr %lx prot %lx\n", -+ addr, ioaddr, vma->vm_page_prot.pgprot); -+ -+ if (__io_remap_page_range (addr, (ioaddr & PAGEMASK), PAGE_SIZE, vma->vm_page_prot)) -+ { -+ res = -EAGAIN; -+ goto failed; -+ } -+ -+ break; -+ -+ case ELAN4_OFF_USER_PAGE: -+ PRINTF (uctx, DBG_FILE, "user_mmap: shared user page - kaddr=%lx uaddr=%lx phys=%lx\n", -+ uctx->uctx_upage, addr, kmem_to_phys (uctx->uctx_upage)); -+ -+ /* we do not want to have this area swapped out, lock it */ -+ vma->vm_flags |= VM_LOCKED; -+ -+ /* Mark the page as reserved or else the remap_page_range() doesn't remap it */ -+ SetPageReserved(pte_page(*find_pte_kernel((unsigned long) uctx->uctx_upage))); -+ -+ if (__remap_page_range (addr, kmem_to_phys (uctx->uctx_upage), PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (user_page) failed\n"); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_TPROC_TRAMPOLINE: -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: tproc trampoline - kaddr=%lx uaddr=%lx phys=%lx\n", uctx->uctx_trampoline, addr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + uctx->uctx_trampoline + (addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT))); -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ if (__io_remap_page_range (addr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + -+ uctx->uctx_trampoline + (addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)), -+ PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (tproc_trampoline) failed\n"); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_DEVICE_STATS: -+ printk ("user_mmap: device_stats\n"); -+ break; -+ } -+ -+ } -+ -+ ASSERT (vma->vm_ops == NULL); -+ -+ /* Don't try to swap out physical pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump addresses that are not real memory to a core file. -+ */ -+ vma->vm_flags |= VM_IO; -+ -+ vma->vm_ops = &user_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+ -+ failed: -+ for (addr -= PAGE_SIZE, pgoff--; addr >= vma->vm_start; addr -= PAGE_SIZE, pgoff--) -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* drop the reference we've just taken */ -+ return (res); -+} -+ -+/* driver entry points */ -+static int -+elan4_open (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_open: device %d minor %d file=%p\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), file); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_open (inode, file)); -+ case ELAN4_MINOR_MEM: -+ return (mem_open (inode, file)); -+ case ELAN4_MINOR_USER: -+ return (user_open (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan4_release (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_release: device %d minor %d file=%p\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), file); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_release (inode, file)); -+ case ELAN4_MINOR_MEM: -+ return (mem_release (inode, file)); -+ case ELAN4_MINOR_USER: -+ return (user_release (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan4_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl: device %d minor %d cmd %x\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), cmd); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_ioctl (inode, file, cmd, arg)); -+ case ELAN4_MINOR_MEM: -+ return (mem_ioctl (inode, file, cmd, arg)); -+ case ELAN4_MINOR_USER: -+ return (user_ioctl (inode, file, cmd, arg)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+elan4_ioctl32 (unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file) -+{ -+ struct inode *inode = file->f_dentry->d_inode; -+ extern int sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg); -+ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl32: device %d minor %d cmd %x\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), cmd); -+ -+ if (ELAN4_MINOR (inode) == ELAN4_MINOR_USER) -+ { -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ -+ if (current->mm != pr->pr_mm) -+ return -EINVAL; -+ -+ switch (cmd) -+ { -+ case ELAN4IO_SETPERM32: -+ { -+ ELAN4IO_PERM_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: setperm maddr=%x eaddr=%llx len=%llxx perm=%d\n", -+ args.ps_maddr, args.ps_eaddr,args.ps_len, args.ps_perm); -+ -+ return (user_setperm (uctx, args.ps_maddr, args.ps_eaddr, args.ps_len, args.ps_perm)); -+ } -+ -+ case ELAN4IO_CLRPERM32: -+ { -+ ELAN4IO_PERM_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: clrperm eaddr=%llx len=%ll\n", -+ args.ps_eaddr, args.ps_len); -+ -+ user_clrperm (uctx, args.ps_eaddr, args.ps_len); -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPHANDLER32: -+ { -+ ELAN4IO_TRAPHANDLER_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPHANDLER_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: traphandler trapp=%x nticks=%d\n", -+ args.th_trapp, args.th_nticks); -+ -+ return (user_trap_handler (pr->pr_uctx, (ELAN4_USER_TRAP *)(unsigned long)args.th_trapp, args.th_nticks)); -+ } -+ } -+ } -+ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl32: fd=%d cmd=%x arg=%lx file=%p\n", fd, cmd, arg, file); -+ return (sys_ioctl (fd, cmd, arg)); -+} -+#endif -+ -+ -+ -+static int -+elan4_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_mmap: instance %d minor %d start=%lx end=%lx pgoff=%lx\n", -+ ELAN4_DEVICE (file->f_dentry->d_inode), ELAN4_MINOR (file->f_dentry->d_inode), -+ vma->vm_start, vma->vm_end, vma->vm_pgoff); -+ -+ switch (ELAN4_MINOR (file->f_dentry->d_inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_mmap (file, vma)); -+ case ELAN4_MINOR_MEM: -+ return (mem_mmap (file, vma)); -+ case ELAN4_MINOR_USER: -+ return (user_mmap (file, vma)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+void -+elan4_update_intel_p64h2 (ELAN4_DEV *dev, struct pci_dev *bridge) -+{ -+ u16 cnf; -+ -+ pci_read_config_word (bridge, 0x40 /* CNF */, &cnf); -+ -+ /* We expect the CNF register to be configured as follows -+ * -+ * [8] == 1 PMODE PCI Mode -+ * [7:6] == 2/3 PFREQ PCI Frequency (100/133) -+ * [5] == 0 RSDIS Restreaming Disable -+ * [4:3] == 0x PP Prefetch Policy -+ * [2] == 0 DTD Delayed Transaction Depth -+ * [1:0] == 10 MDT MaximumDelaedTransactions -+ */ -+ -+ if ((cnf & (1 << 8)) == 0) -+ printk ("elan%d: strangeness - elan reports PCI-X but P64H2 reports PCI mode !\n", dev->dev_instance); -+ else if ((cnf & 0xb7) != 0x82 && (cnf & 0xb7) != 0x84 && optimise_pci_bus < 2) -+ printk ("elan%d: P64H2 CNF is not configured as expected : RSDIS=%d PP=%d DTD=%d MDT=%d\n", -+ dev->dev_instance, (cnf >> 5) & 1, (cnf >> 3) & 3, (cnf >> 2) & 1, cnf & 3); -+ else -+ { -+ switch ((cnf >> 6) & 3) -+ { -+ case 2: /* PCI-X 100 */ -+ pci_write_config_word (bridge, 0xfc /* PC100 */, 0x7777); -+ -+ printk ("elan%d: optimise P64H2 : setting MDT=0, DTD=1, PFC=777 for PCI-X 100\n", dev->dev_instance); -+ -+ break; -+ -+ case 3: /* PCI-X 133 */ -+ pci_write_config_word (bridge, 0xfe /* PC133 */, 0x7777); -+ -+ printk ("elan%d: optimise P64H2 : setting MDT=0, DTD=1, PFC=777 for PCI-X 133\n", dev->dev_instance); -+ break; -+ } -+ -+ pci_write_config_word (bridge, 0x40 /* CNF */, (cnf & 0xfff8) | 0x4); /* DTD=1 MDT=0 */ -+ } -+} -+ -+int -+elan4_optimise_intel_p64h2 (ELAN4_DEV *dev, struct pci_dev *pdev) -+{ -+ struct pci_bus *bus = pdev->bus; -+ struct pci_dev *bridge = bus->self; -+ unsigned int devcount = 0; -+ u8 revision; -+ u32 ectrl; -+ struct list_head *el; -+ -+ pci_read_config_dword (pdev, PCI_ELAN_CONTROL, &ectrl); -+ -+ /* We can only run in PCI-Xmode with a B1 stepping P64H2 because of P64H2 Errata 3 */ -+ pci_read_config_byte (bridge, PCI_REVISION_ID, &revision); -+ if (revision < 0x04) -+ { -+ if ((ectrl & ECTRL_INITIALISATION_MODE) != Pci2_2) -+ { -+ static const char *p64h2_stepping[4] = {"UNKNOWN", "UNKNOWN", "UNKNOWN", "B0"}; -+ -+ printk ("elan%d: unable to use device because of P64H2 Errata 3 on\n" -+ " %s stepping part and running in a PCI-X slot\n", -+ dev->dev_instance, p64h2_stepping[revision]); -+ return -EINVAL; -+ } -+ } -+ -+ /* We can only alter the bus configuration registers if the Elan is the only device -+ * on the bus ... */ -+ list_for_each (el, &bus->devices) { -+ struct pci_dev *pcip = list_entry (el, struct pci_dev, bus_list); -+ -+ if (pcip == pdev || (pcip->vendor == PCI_VENDOR_ID_INTEL && pcip->device == 0x1462 /* P64H2 HOTPLUG */)) -+ continue; -+ -+ devcount++; -+ } -+ -+ if (devcount > 0 || !list_empty (&bus->children)) -+ { -+ printk ("elan%d: unable to optimise P64H2 settings as %s%s\n", dev->dev_instance, -+ (devcount > 0) ? "more than one device on bus" : "", -+ ! list_empty (&bus->children) ? "has child buses" : ""); -+ return 0; -+ } -+ -+#ifdef __ia64 -+ if ((ectrl & ECTRL_INITIALISATION_MODE) == PciX100to133MHz) -+ { -+ struct pci_dev *pcip; -+ unsigned int sioh_good = 0; -+ unsigned int sioh_downgrade = 0; -+ unsigned int snc_good = 0; -+ unsigned int snc_downgrade = 0; -+ -+ /* Search for the associated SIOH and SNC on ia64, -+ * if we have a C2 SIOH and a C0/C1 SNC, then we can -+ * reconfigure the P64H2 as follows: -+ * CNF:MDT = 0 -+ * CNF:DTD = 1 -+ * CNF:PC133 = 7777 -+ * -+ * if not, then issue a warning that down rev parts -+ * affect bandwidth. -+ */ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_INTEL, 0x500, pcip)); ) -+ { -+ pci_read_config_byte (pcip, PCI_REVISION_ID, &revision); -+ -+ if (revision >= 0x21) -+ snc_good++; -+ else -+ { -+ printk ("elan%d: SNC revision %x (%s)\n", dev->dev_instance, revision, -+ revision == 0x00 ? "A0" : revision == 0x01 ? "A1" : -+ revision == 0x02 ? "A2" : revision == 0x03 ? "A3" : -+ revision == 0x10 ? "B0" : revision == 0x20 ? "C0" : -+ revision == 0x21 ? "C1" : "UNKNOWN"); -+ -+ snc_downgrade++; -+ } -+ } -+ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_INTEL, 0x510, pcip)) != NULL; ) -+ { -+ pci_read_config_byte (pcip, PCI_REVISION_ID, &revision); -+ -+ -+ if (revision >= 0x22) -+ sioh_good++; -+ else -+ { -+ printk ("elan%d: SIOH revsision %x (%s)\n", dev->dev_instance, revision, -+ revision == 0x10 ? "C0" : revision == 0x20 ? "C0" : -+ revision == 0x21 ? "C1" : revision == 0x22 ? "C2" : "UNKNOWN"); -+ -+ sioh_downgrade++; -+ } -+ } -+ -+ if (optimise_pci_bus < 2 && (sioh_downgrade || snc_downgrade)) -+ printk ("elan%d: unable to optimise as SNC/SIOH below required C1/C2 steppings\n", dev->dev_instance); -+ else if (optimise_pci_bus < 2 && (sioh_good == 0 || snc_good == 0)) -+ printk ("elan%d: unable to optimise as cannot determine SNC/SIOH revision\n", dev->dev_instance); -+ else -+ elan4_update_intel_p64h2 (dev, bridge); -+ } -+#endif -+ -+#ifdef __i386 -+ if ((ectrl & ECTRL_INITIALISATION_MODE) == PciX100to133MHz) -+ elan4_update_intel_p64h2 (dev, bridge); -+#endif -+ return 0; -+} -+ -+int -+elan4_optimise_intel_pxh (ELAN4_DEV *dev, struct pci_dev *pdev) -+{ -+#ifdef __i386 -+ printk ("elan%d: unable to use device on this platform in 32 bit mode\n", dev->dev_instance); -+ -+ return -EINVAL; -+#endif -+ -+ dev->dev_features |= ELAN4_FEATURE_NO_DWORD_READ; -+ -+ return 0; -+} -+ -+void -+elan4_optimise_serverworks_ciobx2 (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pdev = dev->dev_osdep.pdev; -+ struct pci_dev *pcip; -+ unsigned char bus; -+ unsigned int dor; -+ -+ /* Find the CIOBX2 for our bus number */ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_SERVERWORKS, 0x0101, pcip)) != NULL;) -+ { -+ pci_read_config_byte (pcip, 0x44 /* BUSNUM */, &bus); -+ -+ if (pdev->bus->number == bus) -+ { -+ printk ("elan%d: optimise CIOBX2 : setting DOR to disable read pipe lining\n", dev->dev_instance); -+ -+ pci_read_config_dword (pcip, 0x78 /* DOR */, &dor); -+ pci_write_config_dword (pcip, 0x78 /* DOR */, dor | (1 << 16)); -+ } -+ } -+} -+ -+int -+elan4_optimise_bus (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pdev = dev->dev_osdep.pdev; -+ -+ if (pdev->bus && pdev->bus->self) -+ { -+ struct pci_dev *bridge = pdev->bus->self; -+ -+ if (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x1460 /* Intel P64H2 */) -+ return elan4_optimise_intel_p64h2 (dev, pdev); -+ -+ if ((bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0329) /* Intel 6700PXH Fn 0 */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x032a) /* Intel 6700PXH Fn 2 */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x032c) /* Intel 6702PXH */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0320) /* Intel PXH-D */) -+ return elan4_optimise_intel_pxh (dev, pdev); -+ } -+ -+ if (pci_find_device (PCI_VENDOR_ID_HP, 0x122e, NULL) != NULL) /* on HP ZX1 set the relaxed ordering */ -+ dev->dev_pteval = PTE_RelaxedOrder; /* bit to get better DMA bandwidth. */ -+ -+ if (pci_find_device (PCI_VENDOR_ID_SERVERWORKS, 0x0101, NULL) != NULL) /* ServerWorks CIOBX2 */ -+ elan4_optimise_serverworks_ciobx2 (dev); -+ -+ return 0; -+} -+ -+int -+elan4_pciinit (ELAN4_DEV *dev) -+{ -+ int res; -+ u32 value; -+ u16 command; -+ u8 cacheline; -+ unsigned long flags; -+ -+ if (optimise_pci_bus && (res = elan4_optimise_bus (dev)) <0) -+ return (res); -+ -+ if ((res = pci_enable_device (dev->dev_osdep.pdev)) < 0) -+ return (res); -+ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ if ((value & ECTRL_INITIALISATION_MODE) == Pci2_2) -+ printk ("elan%d: is an elan4%c (PCI-2.2)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ else -+ { -+ switch (value & ECTRL_INITIALISATION_MODE) -+ { -+ case PciX50To66MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 50-66)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ case PciX66to100MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 66-100)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ case PciX100to133MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 100-133)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ default: -+ printk ("elan%d: Invalid PCI-X mode\n", dev->dev_instance); -+ return (-EINVAL); -+ } -+ } -+ -+ /* initialise the elan pll control register */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, &value); -+ -+ if (elan4_pll_cfg) -+ { -+ printk ("elan%d: setting pll control to %08x\n", dev->dev_instance, elan4_pll_cfg); -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, elan4_pll_cfg); -+ } -+ else -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | ECTRL_SYS_CLOCK_RATIO_4_3); -+ else -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | ECTRL_SYS_CLOCK_RATIO_6_5 | SysPll_FeedForwardISel0 | SysPll_FeedForwardISel1); -+ } -+ -+ /* initialise the elan control register */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ -+ value = ((15 << ECTRL_IPROC_HIGH_PRI_TIME_SHIFT) | -+ (15 << ECTRL_OTHER_HIGH_PRI_TIME_SHIFT) | -+ (value & ECTRL_28_NOT_30_BIT_LOCAL_BAR) | -+ (dev->dev_topaddrmode ? ECTRL_ExtraMasterAddrBits : 0) | -+ ECTRL_ENABLE_LATENCY_RESET | -+ ECTRL_ENABLE_WRITEBURSTS | -+ ECTRL_ENABLE_2_2READBURSTS); -+ -+#ifdef LINUX_SPARC -+ value &= ~(ECTRL_ENABLE_LATENCY_RESET | ECTRL_ENABLE_WRITEBURSTS); -+#endif -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value | ECTRL_SOFTWARE_INTERNAL_RESET); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ /* Delay 10ms here if we've changed the sysclock ratio */ -+ /* to allow the PLL to stabalise before proceeding */ -+ udelay (10000); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ { -+ unsigned char val = read_i2c (dev, I2cLedsValue); -+ -+ /* On RevB we have to explicitly reset the PLLs */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_COMMAND, &command); -+ -+ write_i2c (dev, I2cLedsValue, val | 0x80); -+ udelay (1000); -+ -+ /* Issue the PLL counter reset and immediately inhibit all pci interaction -+ * while the PLL is recovering. The write to the PCI_COMMAND register has -+ * to occur within 50uS of the write to the i2c registers */ -+ local_irq_save (flags); -+ write_i2c (dev, I2cLedsValue, val & ~0x80); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, (1 << 10) /* PCI_COMMAND_DISABLE_INT */); -+ local_irq_restore (flags); -+ -+ /* Wait for the write to occur and for the PLL to regain lock */ -+ udelay (20000); udelay (20000); -+ -+ /* Re-enable pci interaction and clear any spurious errors deteced */ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_STATUS, PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, command); -+ break; -+ } -+ } -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value); -+ -+ /* Enable master accesses */ -+ pci_set_master (dev->dev_osdep.pdev); -+ -+ /* Verify that the memWrInvalidate bit is set */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_COMMAND, &command); -+ pci_read_config_byte (dev->dev_osdep.pdev, PCI_CACHE_LINE_SIZE, &cacheline); -+ -+ if ((command & PCI_COMMAND_INVALIDATE) == 0) -+ { -+ printk ("elan%d: enable MemWrInvalidate (cacheline %d)\n", -+ dev->dev_instance, cacheline * 4); -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, command | PCI_COMMAND_INVALIDATE); -+ } -+ -+ return (0); -+} -+ -+void -+elan4_pcifini (ELAN4_DEV *dev) -+{ -+ u32 value; -+ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value | ECTRL_SOFTWARE_INTERNAL_RESET); -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value); -+ -+ pci_disable_device (dev->dev_osdep.pdev); -+} -+ -+void -+elan4_pcierror (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pci = dev->dev_osdep.pdev; -+ u8 type; -+ u16 status, cmd; -+ u32 physlo, physhi, control; -+ -+ printk("elan%d: pci error has occurred\n", dev->dev_instance); -+ -+ pci_read_config_word (pci, PCI_STATUS, &status); -+ pci_read_config_word (pci, PCI_COMMAND, &cmd); -+ pci_read_config_dword (pci, PCI_ELAN_CONTROL, &control); -+ -+ if (control & ECTRL_REC_SPLIT_COMP_MESSAGE) -+ { -+ u32 message, attr; -+ -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control & ~ECTRL_SELECT_SPLIT_MESS_ATTR); -+ pci_read_config_dword (pci, PCI_ELAN_SPLIT_MESSAGE_VALUE, &message); -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control | ECTRL_SELECT_SPLIT_MESS_ATTR); -+ pci_read_config_dword (pci, PCI_ELAN_SPLIT_MESSAGE_VALUE, &attr); -+ -+ printk ("elan%d: pcierror - received split completion message - attr=%08x, message=%08x\n", -+ dev->dev_instance, attr, message); -+ -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control | ECTRL_REC_SPLIT_COMP_MESSAGE); /* clear the error */ -+ } -+ else -+ { -+ pci_read_config_dword (pci, PCI_ELAN_PARITY_ADDR_LO, &physlo); -+ pci_read_config_dword (pci, PCI_ELAN_PARITY_ADDR_HI, &physhi); -+ pci_read_config_byte (pci, PCI_ELAN_PARITY_TYPE, &type); -+ -+ printk ("elan%d: pcierror - status %x cmd %4x physaddr %08x%08x type %x\n", -+ dev->dev_instance, status, cmd, physhi, physlo, type); -+ -+ if (status & PCI_STATUS_PARITY) -+ printk ("elan%d: parity error signalled (PERR)\n", dev->dev_instance); -+ if (status & PCI_STATUS_DETECTED_PARITY) -+ printk ("elan%d: detected parity error\n", dev->dev_instance); -+ if (status & PCI_STATUS_REC_MASTER_ABORT) -+ printk ("elan%d: received master abort\n", dev->dev_instance); -+ if (status & PCI_STATUS_REC_TARGET_ABORT) -+ printk ("elan%d: received target abort\n", dev->dev_instance); -+ if (status & PCI_STATUS_SIG_SYSTEM_ERROR) -+ printk ("elan%d: signalled SERR\n", dev->dev_instance); -+ if (status & PCI_STATUS_SIG_TARGET_ABORT) -+ printk ("elan%d: signalled target abort\n", dev->dev_instance); -+ -+ pci_write_config_word (pci, PCI_STATUS, status); /* clear the errors */ -+ } -+ -+ DISABLE_INT_MASK (dev, INT_PciMemErr); -+ -+#ifdef notdef -+ panic ("elan%d: pcierror\n", dev->dev_instance); /* better panic ! */ -+#endif -+} -+ -+static irqreturn_t -+elan4_irq (int irq, void *arg, struct pt_regs *regs) -+{ -+ if (elan4_1msi0 ((ELAN4_DEV *) arg)) -+ return IRQ_HANDLED; -+ else -+ return IRQ_NONE; -+} -+ -+ioaddr_t -+elan4_map_device (ELAN4_DEV *dev, unsigned bar, unsigned off, unsigned size, ELAN4_MAP_HANDLE *handle) -+{ -+ return (ioaddr_t) ioremap_nocache (pci_resource_start (dev->dev_osdep.pdev, bar) + off, size); -+} -+ -+void -+elan4_unmap_device (ELAN4_DEV *dev, ioaddr_t ptr, unsigned size, ELAN4_MAP_HANDLE *handle) -+{ -+ iounmap ((void *) ptr); -+} -+ -+unsigned long -+elan4_resource_len (ELAN4_DEV *dev, unsigned bar) -+{ -+ return (pci_resource_len (dev->dev_osdep.pdev, bar)); -+} -+ -+void -+elan4_configure_mtrr (ELAN4_DEV *dev) -+{ -+#ifdef CONFIG_MTRR -+ if (! (dev->dev_features & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ { -+ /* try and initialise the MTRR registers to enable write-combining */ -+ dev->dev_osdep.sdram_mtrr = mtrr_add (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ MTRR_TYPE_WRCOMB, 1); -+ if (dev->dev_osdep.sdram_mtrr < 0) -+ printk ("elan%d: cannot configure MTRR for sdram\n", dev->dev_instance); -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ { -+ dev->dev_osdep.regs_mtrr = mtrr_add (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (dev->dev_cqoffset + dev->dev_cqreorder) * CQ_CommandMappingSize, -+ CQ_CommandMappingSize * (dev->dev_cqcount >> 1), -+ MTRR_TYPE_WRCOMB, 1); -+ -+ if (dev->dev_osdep.regs_mtrr < 0) -+ printk ("elan%d: cannot configure MTRR for command ports\n", dev->dev_instance); -+ } -+ } -+#endif -+} -+ -+void -+elan4_unconfigure_mtrr (ELAN4_DEV *dev) -+{ -+#ifdef CONFIG_MTRR -+ if (! (dev->dev_features & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ { -+ if (dev->dev_osdep.sdram_mtrr >=0 ) -+ mtrr_del (dev->dev_osdep.sdram_mtrr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB && dev->dev_osdep.regs_mtrr >= 0) -+ mtrr_del (dev->dev_osdep.regs_mtrr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (dev->dev_cqoffset + dev->dev_cqreorder) * CQ_CommandMappingSize, -+ CQ_CommandMappingSize * (dev->dev_cqcount >> 1)); -+ } -+#endif -+} -+ -+EXPORT_SYMBOL(elan4_reference_device); -+EXPORT_SYMBOL(elan4_dereference_device); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/i2c.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/i2c.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/i2c.c 2005-06-01 23:12:54.607437888 -0400 -@@ -0,0 +1,248 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: i2c.c,v 1.4 2004/01/07 13:37:45 jon Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/i2c.c,v $*/ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#define I2C_POLL_LIMIT 8 -+ -+static int -+i2c_poll_busy (ELAN4_DEV *dev) -+{ -+ int t = 100; -+ int loop = 0; -+ volatile unsigned char val; -+ -+ /* wait for any led I2C operation to finish */ -+ while (((val = read_i2c (dev, I2cPortControl)) & I2cCntl_I2cPortBusy) && loop++ < I2C_POLL_LIMIT) -+ { -+ DELAY (t); -+ -+ if (t < 500000) -+ t <<= 1; -+ } -+ if (loop >= I2C_POLL_LIMIT) -+ { -+ printk ("elan%d: I2c has timed out waiting for I2cPortBusy to clear!\n", dev->dev_instance); -+ printk ("elan%d: I2cPortControl=%x I2cLedBase=%x I2cStatus=%x\n", -+ dev->dev_instance, val, read_i2c (dev, I2cLedBase), read_i2c (dev, I2cStatus)); -+ } -+ -+ return val; -+} -+ -+static int -+i2c_poll_stopped (ELAN4_DEV *dev) -+{ -+ int t = 100; -+ int loop = 0; -+ unsigned char val=0, newval; -+ -+ /* wait for any led I2C operation to finish. Must see it stopped at least twice */ -+ while (!(((newval = read_i2c (dev, I2cPortControl)) & I2cCntl_I2cStopped) && -+ (val & I2cCntl_I2cStopped)) && -+ (loop++ < I2C_POLL_LIMIT)) -+ { -+ DELAY (t); -+ -+ if (t < 500000) -+ t <<= 1; -+ val = newval; -+ } -+ -+ return val; -+} -+ -+int -+i2c_disable_auto_led_update (ELAN4_DEV *dev) -+{ -+ spin_lock (&dev->dev_i2c_lock); -+ -+ if (dev->dev_i2c_led_disabled++ == 0) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) & ~I2cCntl_I2cUpdatingLedReg); -+ -+ if (! (i2c_poll_stopped (dev) & I2cCntl_I2cStopped)) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) | I2cCntl_I2cUpdatingLedReg); -+ -+ spin_unlock (&dev->dev_i2c_lock); -+ -+ return -EAGAIN; -+ } -+ -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) & ~I2cCntl_SampleNewLedValues); -+ } -+ -+ spin_unlock (&dev->dev_i2c_lock); -+ -+ return 0; -+} -+ -+void -+i2c_enable_auto_led_update (ELAN4_DEV *dev) -+{ -+ spin_lock (&dev->dev_i2c_lock); -+ if (--dev->dev_i2c_led_disabled == 0) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) | I2cCntl_I2cUpdatingLedReg); -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_SampleNewLedValues); -+ } -+ -+ spin_unlock (&dev->dev_i2c_lock); -+} -+ -+int -+i2c_write (ELAN4_DEV *dev, unsigned int address, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, data[i]); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | (i == (count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_read (ELAN4_DEV *dev, unsigned int address, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_READ_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, 0xff); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortRead | ((i == count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ -+ i2c_poll_busy (dev); -+ -+ data[i] = read_i2c (dev, I2cRdData); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_writereg (ELAN4_DEV *dev, unsigned int address, unsigned int reg, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ write_i2c (dev, I2cWrData, reg); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, data[i]); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | ((i == count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ printk (" i2c_writereg: off %d failed\n", i); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_readreg (ELAN4_DEV *dev, unsigned int address, unsigned int reg, unsigned int count, unsigned char *data) -+{ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ write_i2c (dev, I2cWrData, reg); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | I2cCntl_I2cPortGenStopBit); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ return i2c_read (dev, address, count, data); -+} -+ -+int -+i2c_read_rom (ELAN4_DEV *dev, unsigned int addr, unsigned int len, unsigned char *data) -+{ -+ unsigned int top = addr + len; -+ int res; -+ -+ if ((res = i2c_disable_auto_led_update (dev)) == 0) -+ { -+ /* read the rom in chunks that don't span the block boundary */ -+ while (addr < top) -+ { -+ unsigned int thisnob = top - addr; -+ unsigned int blocknob = I2C_24LC16B_BLOCKSIZE - I2C_24LC16B_BLOCKOFFSET(addr); -+ -+ if (thisnob > blocknob) -+ thisnob = blocknob; -+ -+ if ((res = i2c_readreg (dev, I2C_EEPROM_ADDR + I2C_24LC16B_BLOCKADDR(addr), -+ I2C_24LC16B_BLOCKOFFSET(addr), thisnob, data)) < 0) -+ break; -+ -+ addr += thisnob; -+ data += thisnob; -+ } -+ -+ i2c_enable_auto_led_update (dev); -+ } -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/intcookie.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/intcookie.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/intcookie.c 2005-06-01 23:12:54.608437736 -0400 -@@ -0,0 +1,371 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: intcookie.c,v 1.14 2004/08/09 14:02:37 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/intcookie.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+static INTCOOKIE_TABLE *intcookie_tables; -+static spinlock_t intcookie_table_lock; -+ -+/* -+ * intcookie_drop_entry: -+ * drop the reference to a cookie held -+ * by the cookie table -+ */ -+static void -+intcookie_drop_entry (INTCOOKIE_ENTRY *ent) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ent->ent_lock, flags); -+ if (--ent->ent_ref != 0) -+ { -+ ent->ent_fired = ent->ent_cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (INTCOOKIE_ENTRY)); -+ } -+} -+ -+void -+intcookie_init() -+{ -+ spin_lock_init (&intcookie_table_lock); -+} -+ -+void -+intcookie_fini() -+{ -+ spin_lock_destroy (&intcookie_table_lock); -+} -+ -+INTCOOKIE_TABLE * -+intcookie_alloc_table (ELAN_CAPABILITY *cap) -+{ -+ INTCOOKIE_TABLE *tbl, *ntbl; -+ ELAN_CAPABILITY *ncap; -+ -+ KMEM_ZALLOC (ntbl, INTCOOKIE_TABLE *, sizeof (INTCOOKIE_TABLE), 1); -+ -+ if (ntbl == NULL) -+ return (NULL); -+ -+ KMEM_ALLOC (ncap, ELAN_CAPABILITY *, ELAN_CAP_SIZE(cap), 1); -+ -+ if (ncap == NULL) -+ { -+ KMEM_FREE (ntbl, sizeof (INTCOOKIE_TABLE)); -+ return (NULL); -+ } -+ -+ spin_lock (&intcookie_table_lock); -+ -+ for (tbl = intcookie_tables; tbl; tbl = tbl->tbl_next) -+ if (ELAN_CAP_MATCH (tbl->tbl_cap, cap) && tbl->tbl_cap->cap_mycontext == cap->cap_mycontext) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ else -+ { -+ spin_lock_init (&ntbl->tbl_lock); -+ -+ ntbl->tbl_cap = ncap; -+ ntbl->tbl_ref = 1; -+ ntbl->tbl_entries = NULL; -+ -+ /* Save supplied cap */ -+ bcopy (cap, ncap, ELAN_CAP_SIZE(cap)); -+ -+ if ((ntbl->tbl_next = intcookie_tables) != NULL) -+ intcookie_tables->tbl_prev = ntbl; -+ intcookie_tables = ntbl; -+ ntbl->tbl_prev = NULL; -+ } -+ spin_unlock (&intcookie_table_lock); -+ -+ if (tbl == NULL) -+ return (ntbl); -+ else -+ { -+ KMEM_FREE (ntbl, sizeof (INTCOOKIE_TABLE)); -+ KMEM_FREE (ncap, ELAN_CAP_SIZE(cap)); -+ return (tbl); -+ } -+} -+ -+void -+intcookie_free_table (INTCOOKIE_TABLE *tbl) -+{ -+ INTCOOKIE_ENTRY *ent; -+ -+ spin_lock (&intcookie_table_lock); -+ if (tbl->tbl_ref > 1) -+ { -+ tbl->tbl_ref--; -+ spin_unlock (&intcookie_table_lock); -+ return; -+ } -+ -+ if (tbl->tbl_prev) -+ tbl->tbl_prev->tbl_next = tbl->tbl_next; -+ else -+ intcookie_tables = tbl->tbl_next; -+ if (tbl->tbl_next) -+ tbl->tbl_next->tbl_prev = tbl->tbl_prev; -+ -+ spin_unlock (&intcookie_table_lock); -+ -+ /* NOTE - table no longer visible to other threads -+ * no need to aquire tbl_lock */ -+ while ((ent = tbl->tbl_entries) != NULL) -+ { -+ if ((tbl->tbl_entries = ent->ent_next) != NULL) -+ ent->ent_next->ent_prev = NULL; -+ -+ intcookie_drop_entry (ent); -+ } -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl->tbl_cap, ELAN_CAP_SIZE(tbl->tbl_cap)); -+ KMEM_FREE (tbl, sizeof (INTCOOKIE_TABLE)); -+} -+ -+int -+intcookie_alloc (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent, *nent; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (nent, INTCOOKIE_ENTRY *, sizeof (INTCOOKIE_ENTRY), 1); -+ -+ if (nent == NULL) -+ return (-ENOMEM); -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ kcondvar_init (&nent->ent_wait); -+ spin_lock_init (&nent->ent_lock); -+ -+ nent->ent_ref = 1; -+ nent->ent_cookie = cookie; -+ -+ if ((nent->ent_next = tbl->tbl_entries) != NULL) -+ tbl->tbl_entries->ent_prev = nent; -+ tbl->tbl_entries = nent; -+ nent->ent_prev = NULL; -+ } -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ if (ent == NULL) -+ return (0); -+ else -+ { -+ KMEM_FREE (nent, sizeof (INTCOOKIE_ENTRY)); -+ return (-EINVAL); -+ } -+} -+ -+int -+intcookie_free (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ if (ent->ent_prev == NULL) -+ tbl->tbl_entries = ent->ent_next; -+ else -+ ent->ent_prev->ent_next = ent->ent_next; -+ -+ if (ent->ent_next != NULL) -+ ent->ent_next->ent_prev = ent->ent_prev; -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ intcookie_drop_entry (ent); -+ -+ return (0); -+} -+ -+/* -+ * intcookie_fire_cookie: -+ * fire the cookie - this is called from the event interrupt. -+ */ -+int -+intcookie_fire (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (0); -+} -+ -+int -+intcookie_fire_cap (ELAN_CAPABILITY *cap, ELAN4_INTCOOKIE cookie) -+{ -+ int res; -+ INTCOOKIE_TABLE *tbl; -+ -+ spin_lock (&intcookie_table_lock); -+ -+ for (tbl = intcookie_tables; tbl; tbl = tbl->tbl_next) -+ if (ELAN_CAP_MATCH (tbl->tbl_cap, cap) && tbl->tbl_cap->cap_mycontext == cap->cap_mycontext) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ -+ spin_unlock (&intcookie_table_lock); -+ -+ /* No matching table found */ -+ if (tbl == NULL) -+ return (-EINVAL); -+ -+ /* Fire the correct cookie */ -+ res = intcookie_fire (tbl, cookie); -+ -+ /* Decrement reference count (and free if necessary) */ -+ intcookie_free_table (tbl); -+ -+ return (res); -+} -+ -+/* -+ * intcookie_wait_cookie: -+ * deschedule on a cookie if it has not already fired. -+ * note - if the cookie is removed from the table, then -+ * we free it off when we're woken up. -+ */ -+int -+intcookie_wait (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ spin_unlock (&tbl->tbl_lock); -+ -+ if (ent->ent_fired != 0) -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ return (0); -+ } -+ -+ ent->ent_ref++; -+ kcondvar_waitsig (&ent->ent_wait, &ent->ent_lock, &flags); -+ -+ res = ent->ent_fired ? 0 : -EINTR; -+ -+ if (--ent->ent_ref > 0) -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (INTCOOKIE_ENTRY)); -+ } -+ -+ return (res); -+} -+ -+int -+intcookie_arm (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = 0; -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/Makefile 2005-06-01 23:12:54.608437736 -0400 -@@ -0,0 +1,31 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan4/Makefile -+# -+ -+ -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/elan4/Makefile -+# -+ -+list-multi := elan4.o -+elan4-objs := device.o i2c.o mmu.o sdram.o debug.o routetable.o trap.o user.o user_ddcq.o regions.o intcookie.o neterr.o device_Linux.o user_Linux.o procfs_Linux.o mmu_Linux.o -+export-objs := device.o device_Linux.o mmu.o mmu_Linux.o procfs_Linux.o routetable.o sdram.o trap.o -+obj-$(CONFIG_ELAN4) := elan4.o -+ -+elan4.o : $(elan4-objs) -+ $(LD) -r -o $@ $(elan4-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/elan4/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/Makefile.conf 2005-06-01 23:12:54.608437736 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan4.o -+MODULENAME = elan4 -+KOBJFILES = device.o i2c.o mmu.o sdram.o debug.o routetable.o trap.o user.o user_ddcq.o regions.o intcookie.o neterr.o device_Linux.o user_Linux.o procfs_Linux.o mmu_Linux.o -+EXPORT_KOBJS = device.o device_Linux.o mmu.o mmu_Linux.o procfs_Linux.o routetable.o sdram.o trap.o -+CONFIG_NAME = CONFIG_ELAN4 -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/elan4/mmu.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/mmu.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/mmu.c 2005-06-01 23:12:54.610437432 -0400 -@@ -0,0 +1,854 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu.c,v 1.29.6.2 2005/01/18 16:58:12 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+ -+int elan4_debug_mmu; -+ -+/* Permission table - see ELAN4 MMU documentation */ -+u_char elan4_permtable[] = -+{ -+ 0x00, /* 0x000000 - Disable */ -+ 0x00, /* 0x000000 - Unused */ -+ 0x01, /* 0x000001 - Local Data Read */ -+ 0x03, /* 0x000011 - Local Data Write */ -+ 0x11, /* 0x010001 - Local Read */ -+ 0x10, /* 0x010000 - Local Execute */ -+ 0x05, /* 0x000101 - Read Only */ -+ 0x13, /* 0x010011 - Local Write */ -+ 0x20, /* 0x100000 - Local Event Access */ -+ 0x23, /* 0x100011 - Local Event Write Ac */ -+ 0xa3, /* 1x100011 - Remote Ev Loc Write */ -+ 0xaf, /* 1x101111 - Remote All */ -+ 0x07, /* 0x000111 - Remote Read Only */ -+ 0x0d, /* 0x001101 - Remote Write Only */ -+ 0x0f, /* 0x001111 - Remote Read/Write */ -+ 0xbf, /* 1x111111 - No Fault */ -+}; -+ -+u_char elan4_permreadonly[] = -+{ -+ PERM_Disabled, /* PERM_Disabled */ -+ PERM_Disabled, /* PERM_Unused */ -+ PERM_LocDataRead, /* PERM_LocDataRead */ -+ PERM_LocDataRead, /* PERM_LocDataWrite */ -+ PERM_LocRead, /* PERM_LocRead */ -+ PERM_LocExecute, /* PERM_LocExecute */ -+ PERM_ReadOnly, /* PERM_ReadOnly */ -+ PERM_LocRead, /* PERM_LocWrite */ -+ PERM_LocEventOnly, /* PERM_LocEventOnly */ -+ PERM_LocDataRead, /* PERM_LocEventWrite */ -+ PERM_LocDataRead, /* PERM_RemoteEvent */ -+ PERM_ReadOnly, /* PERM_RemoteAll */ -+ PERM_RemoteReadOnly, /* PERM_RemoteReadOnly */ -+ PERM_ReadOnly, /* PERM_RemoteWriteLocRead */ -+ PERM_ReadOnly, /* PERM_DataReadWrite */ -+ PERM_ReadOnly, /* PERM_NoFault */ -+}; -+ -+static void -+elan4mmu_synctag (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx) -+{ -+ E4_uint64 value = (he->he_tag[tagidx] & HE_TAG_VALID) ? he->he_tag[tagidx] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK) : INVALID_CONTEXT; -+ -+ if (he->he_next) -+ value |= ((tagidx == 0) ? -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) : -+ ((he->he_next->he_entry << TAG_CHAINPTR_LOW_SHIFT) & TAG_CHAINPTR_18TO6_MASK)); -+ else if (tagidx == 0) -+ value |= TAG_CHAINPTR_30TO19_MASK; -+ -+ MPRINTF (DBG_DEVICE, 4, "elan4mmu_synctag: he=%p tagidx=%d he->he_tag=%llx -> value=%llx\n", he, tagidx, he->he_tag[tagidx], value); -+ -+ elan4_sdram_writeq (dev, he->he_entry + E4MMU_TAG_OFFSET(tagidx), value); -+} -+ -+static void -+elan4mmu_chain_hents (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *phe, ELAN4_HASH_ENTRY *he) -+{ -+ ASSERT ((elan4_sdram_readq (dev, phe->he_entry + E4MMU_TAG_OFFSET(0)) & TAG_CHAINPTR_30TO19_MASK) == TAG_CHAINPTR_30TO19_MASK); -+ -+ elan4_sdram_writeq (dev, phe->he_entry + E4MMU_TAG_OFFSET(1), -+ ((phe->he_tag[1] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK)) | ((he->he_entry << TAG_CHAINPTR_LOW_SHIFT) & TAG_CHAINPTR_18TO6_MASK))); -+ elan4_sdram_writeq (dev, phe->he_entry + E4MMU_TAG_OFFSET(0), -+ ((phe->he_tag[0] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK)) | ((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK))); -+} -+ -+static void -+elan4mmu_writepte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx, E4_uint64 value) -+{ -+ /* -+ * NOTE - we can only change a valid PTE if we're upgrading it's permissions, -+ * any other changes should have invalidated it first. */ -+ -+ MPRINTF (DBG_DEVICE, 4, "elan4mmu_writepte: he=%p tagidx=%d pteidx=%x value=%llx\n", he, tagidx, pteidx, (unsigned long long) value); -+ -+ if (pteidx == 3) -+ { -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD1_OFFSET(tagidx), (value >> 16) & 0xFFFF); -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD2_OFFSET(tagidx), (value >> 32) & 0xFFFF); -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx), (value >> 0) & 0xFFFF); -+ } -+ else -+ { -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE_HIGH_OFFSET(tagidx, pteidx), (value >> 32) & 0xFFFF); -+ elan4_sdram_writel (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx), value & 0xFFFFFFFF); -+ } -+} -+ -+static void -+elan4mmu_invalidatepte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx) -+{ -+ if (pteidx == 3) -+ elan4_sdram_writeb (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx), PTE_SetPerm (PERM_Disabled)); -+ else -+ elan4_sdram_writeb (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx), PTE_SetPerm (PERM_Disabled)); -+} -+ -+static E4_uint64 -+elan4mmu_readpte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx) -+{ -+ if (pteidx == 3) -+ return (((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx)) << 0) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD1_OFFSET(tagidx)) << 16) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD2_OFFSET(tagidx)) << 32)); -+ else -+ return ((E4_uint64) elan4_sdram_readl (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx)) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE_HIGH_OFFSET(tagidx, pteidx)) << 32)); -+} -+ -+ -+void -+elan4mmu_flush_tlb (ELAN4_DEV *dev) -+{ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH); -+ -+ while (read_reg64 (dev, SysControlReg) & CONT_TLB_FLUSH) -+ DELAY (1); -+} -+ -+/* -+ * elanmmu_flush_tlb_hash - this flushes the hash copy entries and the elan -+ * tlb. However after the write to the hash copy entry if the elan was -+ * in the process of walking, then it could write the hash copy with a valid -+ * entry which we had just invalidated. However once we've seen the tlb flushed -+ * then if the walk engine had done a write - then we need to invaldate the -+ * hash copy entries again and reflush the tlb. -+ * -+ * If we're invalidating a lot of hash blocks, then the chances are that the -+ * walk engine will perform a write - so we flush the tlb first, then invalidate -+ * the hash copy entries, then flush the tlb again. -+ */ -+static void -+elan4mmu_flush_tlb_hash (ELAN4_DEV *dev, int tbl, unsigned baseidx, unsigned topidx) -+{ -+ int notmany = (abs(topidx - baseidx) < 5) ? 1 : 0; -+ int hashidx; -+ E4_uint32 reg; -+ -+ if (notmany) -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_WALK_WROTE_TABLES); -+ else -+ elan4mmu_flush_tlb(dev); -+ -+ do { -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ if (dev->dev_mmuhash[tbl][hashidx].he_tag[0] & HE_TAG_COPY) -+ { -+ ASSERT ((dev->dev_mmuhash[tbl][hashidx].he_tag[0] & HE_TAG_VALID) == 0); -+ ASSERT ((dev->dev_mmuhash[tbl][hashidx].he_tag[1] & HE_TAG_VALID) == 0); -+ -+ elan4mmu_synctag (dev, &dev->dev_mmuhash[tbl][hashidx], 0); -+ elan4mmu_synctag (dev, &dev->dev_mmuhash[tbl][hashidx], 1); -+ } -+ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH); -+ -+ while ((reg = read_reg64 (dev, SysControlReg)) & CONT_TLB_FLUSH) -+ DELAY (1); -+ -+ } while (notmany-- && (reg & CONT_CLEAR_WALK_WROTE_TABLES) != 0); -+} -+ -+void -+elan4mmu_display_hent (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int hashidx) -+{ -+ int tagidx; -+ -+ elan4_debugf (DBG_DEVICE, DBG_MMU, "elan4mmu_display_hent: hashidx=%d he=%p entry at %lx\n", hashidx, he, he->he_entry); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " next=%p prev=%p chain=%p,%p\n", he->he_next, he->he_prev, he->he_chain[0], he->he_chain[1]); -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ { -+ E4_uint64 tag = elan4_sdram_readq (dev, he->he_entry + E4MMU_TAG_OFFSET(tagidx)); -+ E4_uint64 pte0 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 0)); -+ E4_uint64 pte1 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 1)); -+ E4_uint64 pte2 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 2)); -+ E4_uint64 pte3 = ((pte0 >> 48) | (pte1 >> 32) | (pte2 >> 16)); -+ -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Tag %d (%llx,%08x) context=%04x vaddr=%llx\n", tagidx, he->he_tag[tagidx], he->he_pte[tagidx], (int) (tag & TAG_CONTEXT_MASK), (tag & TAG_ADDRESS_MASK)); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 0 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte0 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte0 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte0 & PTE_TYPE_MASK), (pte0 & PTE_MOD_MASK) ? " mod" : "", (pte0 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 1 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte1 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte1 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte1 & PTE_TYPE_MASK), (pte1 & PTE_MOD_MASK) ? " mod" : "", (pte1 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 2 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte2 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte2 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte2 & PTE_TYPE_MASK), (pte2 & PTE_MOD_MASK) ? " mod" : "", (pte2 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 3 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte3 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte3 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte3 & PTE_TYPE_MASK), (pte3 & PTE_MOD_MASK) ? " mod" : "", (pte3 & PTE_REF_MASK) ? " ref" : ""); -+ } -+} -+ -+static __inline__ ELAN4_HASH_ENTRY * -+he_ctxt_next (ELAN4_HASH_ENTRY *he, int ctxnum) -+{ -+ return ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxnum) ? he->he_chain[0] : he->he_chain[1]; -+} -+ -+static __inline__ ELAN4_HASH_ENTRY * -+he_ctxt_unlink (ELAN4_CTXT *ctxt, int tbl, int hashidx, ELAN4_HASH_ENTRY *prevhe, ELAN4_HASH_ENTRY *he, ELAN4_HASH_ENTRY *next) -+{ -+ /* Check whether either tag is in use by this context */ -+ if ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num || (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ return he; -+ -+ if (prevhe == NULL) -+ ctxt->ctxt_mmuhash[tbl][hashidx] = next; -+ else -+ { -+ /* previous he, ensure that both chain pointers are changed is this ctxt is using both tags */ -+ ASSERT ((prevhe->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num || (prevhe->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num); -+ -+ if ((prevhe->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ prevhe->he_chain[0] = next; -+ if ((prevhe->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ prevhe->he_chain[1] = next; -+ } -+ -+ return prevhe; -+} -+ -+void -+elan4mmu_display (ELAN4_CTXT *ctxt, int tbl, const char *tag) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_HASH_ENTRY *he; -+ int hashidx; -+ -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxt->ctxt_num)) -+ { -+ elan4_debugf (DBG_DEVICE, DBG_MMU, "%s: hashidx=%d he=%p tags <%llx,%llx>\n", tag, hashidx, he, -+ (he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num ? E4MMU_TAG2VADDR (he->he_tag[0], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1) : 0, -+ (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num ? E4MMU_TAG2VADDR (he->he_tag[1], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1) : 0); -+ elan4mmu_display_hent (dev, he, hashidx); -+ } -+} -+ -+static ELAN4_HASH_ENTRY * -+elan4mmu_alloc_hent (ELAN4_DEV *dev, int tbl, int hashidx, E4_uint64 newtag, int *tagidx) -+{ -+ ELAN4_HASH_ENTRY *he, *phe; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ /* 2nd see if there are any partial free blocks */ -+ if ((he = dev->dev_mmufree[tbl][hashidx]) != NULL) -+ { -+ *tagidx = ((he->he_tag[0] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) ? 0 : 1; -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_alloc_hent: allocate he=%p idx=%d%s\n", he, *tagidx, (he == &dev->dev_mmuhash[tbl][hashidx]) ? " hash-block" : ""); -+ -+ he->he_tag[*tagidx] = newtag | HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, *tagidx); -+ -+ if ((he->he_tag[(*tagidx) ^ 1] & TAG_CONTEXT_MASK) != INVALID_CONTEXT) -+ { -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_alloc_hent: block full - remove from freelist\n"); -+ dev->dev_mmufree[tbl][hashidx] = he->he_chain[*tagidx]; -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return (he); -+ } -+ -+ if ((he = dev->dev_mmufreelist) != NULL) -+ dev->dev_mmufreelist = he->he_next; -+ else -+ { -+ ELAN4_HASH_CHUNK *hc; -+ sdramaddr_t entry; -+ -+ KMEM_ALLOC (hc, ELAN4_HASH_CHUNK *, sizeof (ELAN4_HASH_CHUNK), 0); -+ -+ if (hc == NULL) -+ { -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return ((ELAN4_HASH_ENTRY *) NULL); -+ } -+ -+ if ((entry = elan4_sdram_alloc (dev, sizeof (E4_HashTableEntry) * ELAN4_HENT_CHUNKS)) == (sdramaddr_t) 0) -+ { -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ -+ KMEM_FREE (hc, sizeof (ELAN4_HASH_CHUNK)); -+ return ((ELAN4_HASH_ENTRY *) NULL); -+ } -+ -+ list_add_tail (&hc->hc_link, &dev->dev_hc_list); -+ -+ elan4_sdram_zeroq_sdram (dev, entry, sizeof (E4_HashTableEntry) * ELAN4_HENT_CHUNKS); -+ -+ /* no initialise all chunks and chain all but the first onto the freelist */ -+ for (i = 0; i < ELAN4_HENT_CHUNKS; i++, entry += sizeof (E4_HashTableEntry)) -+ { -+ hc->hc_hents[i].he_entry = entry; -+ -+ if (i == 0) -+ he = &hc->hc_hents[0]; -+ else -+ { -+ hc->hc_hents[i].he_next = dev->dev_mmufreelist; -+ dev->dev_mmufreelist = &hc->hc_hents[i]; -+ } -+ } -+ } -+ -+ /* Initialise hash entry, using slot 0 */ -+ *tagidx = 0; -+ -+ he->he_next = NULL; -+ he->he_prev = NULL; -+ he->he_chain[0] = NULL; -+ he->he_chain[1] = NULL; -+ he->he_tag[0] = newtag | HE_TAG_VALID; -+ he->he_tag[1] = E4MMU_TAG(0, INVALID_CONTEXT); -+ he->he_pte[0] = 0; -+ he->he_pte[1] = 0; -+ -+ elan4mmu_synctag (dev, he, 0); -+ -+ /* add slot 1 to freelist */ -+ he->he_chain[1] = dev->dev_mmufree[tbl][hashidx]; -+ dev->dev_mmufree[tbl][hashidx] = he; -+ -+ /* add to mmuhash lists */ -+ for (phe = &dev->dev_mmuhash[tbl][hashidx]; phe->he_next; phe = phe->he_next) -+ ; -+ phe->he_next = he; -+ he->he_prev = phe; -+ he->he_next = NULL; -+ -+ /* finally chain the hash block into the hash tables */ -+ elan4mmu_chain_hents (dev, phe, he); -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return (he); -+} -+ -+static void -+elan4mmu_free_hent (ELAN4_DEV *dev, int tbl, int hashidx, ELAN4_HASH_ENTRY *he, int tagidx) -+{ -+ unsigned long flags; -+ int pteidx; -+ -+ /* Invalidate the tag, and zero all ptes */ -+ for (pteidx = 0; pteidx < 4; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx)) -+ elan4mmu_writepte (dev, he, tagidx, pteidx, 0); -+ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ he->he_tag[tagidx] = E4MMU_TAG(0, INVALID_CONTEXT); -+ he->he_pte[tagidx] = 0; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ -+ if ((he->he_tag[tagidx^1] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) /* Both tags are now free */ -+ { -+ if (he == &dev->dev_mmuhash[tbl][hashidx]) /* it's the hash block entry */ -+ { /* so as it's already on the freelist */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; /* just copy it's chain pointers */ -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => all free but hashblk\n", tbl, hashidx, tagidx, he); -+ } -+ else -+ { -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => all free\n", tbl, hashidx, tagidx, he); -+ -+ /* XXXX - should remove it from the hash table, and -+ * place back on the anonymous freelist */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; -+ } -+ } -+ else -+ { -+ /* Other tag still in use */ -+ he->he_chain[tagidx] = dev->dev_mmufree[tbl][hashidx]; -+ dev->dev_mmufree[tbl][hashidx] = he; -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => other tag in use\n", tbl, hashidx, tagidx, he); -+ } -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+} -+ -+ELAN4_HASH_ENTRY * -+elan4mmu_ptealloc (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, unsigned int *tagidxp) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned ctxnum = ctxt->ctxt_num; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxnum, vaddr, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_uint64 newtag = E4MMU_TAG(vaddr, ctxnum); -+ ELAN4_HASH_ENTRY *he = &dev->dev_mmuhash[tbl][hashidx]; -+ unsigned tagidx; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: tbl=%d ctxnum=%d vaddr=%llx -> hashidx %d\n", tbl, ctxnum, vaddr, hashidx); -+ -+ /* 1st) check whether we're reloading an existing entry */ -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ { -+ ASSERT ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxnum || (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ { -+ if ((he->he_tag[tagidx] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK | HE_TAG_VALID)) == (newtag | HE_TAG_VALID)) -+ { -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: return old he %p tagidx %d\n", he, tagidx); -+ -+ *tagidxp = tagidx; -+ return he; -+ } -+ } -+ } -+ -+ if ((he = elan4mmu_alloc_hent (dev, tbl, hashidx, newtag, &tagidx)) == NULL) -+ return NULL; -+ -+ /* chain onto context hash */ -+ if ((he->he_tag[tagidx ^ 1] & TAG_CONTEXT_MASK) == ctxnum) /* already chained using other link */ -+ { /* so ensure both slots are chained the same */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; -+ } -+ else -+ { -+ he->he_chain[tagidx] = ctxt->ctxt_mmuhash[tbl][hashidx]; -+ ctxt->ctxt_mmuhash[tbl][hashidx] = he; -+ } -+ -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: return new he %p tagidx %d\n", he, tagidx); -+ -+ *tagidxp = tagidx; -+ -+ return he; -+} -+ -+int -+elan4mmu_pteload (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, E4_uint64 newpte) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(vaddr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx; -+ ELAN4_HASH_ENTRY *he; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_pteload: ctx=%d tbl=%d pteidx=%d vaddr=%llx pte=%llx\n", -+ ctxt->ctxt_num, tbl, pteidx, (unsigned long long)vaddr, newpte); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ if ((he = elan4mmu_ptealloc (ctxt, tbl, vaddr, &tagidx)) == NULL) -+ { -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return -ENOMEM; -+ } -+ -+ MPRINTF (ctxt, 1, "elan4mmu_pteload: %s he=%p tagidx=%d pteidx=%d\n", HE_GET_PTE(he,0,pteidx) ? "reloading" : "loading", he, tagidx, pteidx); -+ -+ ASSERT (HE_GET_PTE(he,tagidx,pteidx) == 0 || /* invalid -> valid */ -+ (elan4mmu_readpte (dev, he, tagidx, pteidx) & PTE_PPN_MASK) == (newpte & PTE_PPN_MASK)); /* or same phys address */ -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, newpte); -+ -+ HE_SET_PTE(he, tagidx, pteidx, (newpte & PTE_PERM_TYPE_MASK)); -+ -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return 0; -+} -+ -+void -+elan4mmu_unload_range (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned long len) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned ctxnum = ctxt->ctxt_num; -+ unsigned long tagspan = (1 << (dev->dev_pageshift[tbl] + 2)); -+ E4_Addr end = start + len - 1; -+ int needflush = 0; -+ unsigned baseidx, topidx; -+ unsigned hashidx, tagidx, pteidx; -+ ELAN4_HASH_ENTRY *he, *prevhe, *next; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_unload_range: tbl=%d start=%llx end=%llx len=%lx\n", tbl, start, end, len); -+ -+ /* determine how much of the hash table we've got to scan */ -+ -+ /* GNAT 6760: When we have a Main page size which maps onto multiple Elan pages -+ * we need to do something a bit more clever here or else it takes ms per page invalidate -+ * This change helps in the meantime -+ */ -+ /* if (len <= (1 << dev->dev_pageshift[tbl])) */ -+ if (len <= PAGE_SIZE) -+ { -+ baseidx = E4MMU_HASH_INDEX (ctxnum, start, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ topidx = E4MMU_HASH_INDEX (ctxnum, end, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ -+ if (baseidx != topidx) -+ { -+ /* GNAT 6760: Need to search whole of the hash table (slow!) */ -+ baseidx = 0; -+ topidx = dev->dev_hashsize[tbl] - 1; -+ } -+ } -+ else -+ { -+ baseidx = 0; -+ topidx = dev->dev_hashsize[tbl] - 1; -+ } -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: baseidx=%d topidx=%d\n", baseidx, topidx); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ /* 1st - invalidate the tag for all hash blocks which are completely invalidated, -+ * and remember the first/last hash blocks */ -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ { -+ E4_Addr base = E4MMU_TAG2VADDR (he->he_tag[tagidx], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_Addr top = base + (tagspan -1); -+ -+ if (start < top && end > base) -+ { -+ unsigned bidx = (start <= base) ? 0 : (start & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ unsigned tidx = (end >= top) ? 3 : (end & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx hashidx=%d bidx=%d tidx=%d\n", he, base, top, hashidx, bidx, tidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx)) -+ { -+ elan4mmu_invalidatepte (dev, he, tagidx, pteidx); -+ needflush = 1; -+ } -+ } -+ else if (base >= start && top <= end) /* hash entry completely spanned */ -+ { /* so invalidate the tag */ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx spanned\n", he, base, top); -+ -+ he->he_tag[tagidx] &= ~HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ needflush = 1; -+ } -+ } -+ -+ if (needflush) -+ { -+ /* 2nd invalidate the first/last hash blocks if they are partially invalidated -+ * and flush the tlb/hash copy blocks */ -+ elan4mmu_flush_tlb_hash (dev, tbl, baseidx, topidx); -+ -+ /* 3rd free off the hash entries which are completely invalidated */ -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ for (prevhe = NULL, he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = next) -+ { -+ next = he_ctxt_next (he, ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ { -+ E4_Addr base = E4MMU_TAG2VADDR (he->he_tag[tagidx], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_Addr top = base + (tagspan -1); -+ -+ if (start < top && end > base) -+ { -+ unsigned bidx = (start <= base) ? 0 : (start & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ unsigned tidx = (end >= top) ? 3 : (end & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx bidx=%d tidx=%d\n", he, base, top, bidx, tidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx)) -+ { -+ HE_SET_PTE(he, tagidx, pteidx, 0); -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, 0); -+ } -+ } -+ -+ if ((base >= start && top <= end) || he->he_pte[tagidx] == 0) /* hash entry completely spanned or all pte's cleared */ -+ { /* so invalidate the pte's and free it */ -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx spanned or empty\n", he, base, top); -+ -+ elan4mmu_free_hent (dev, tbl, hashidx, he, tagidx); -+ } -+ } -+ -+ prevhe = he_ctxt_unlink (ctxt, tbl, hashidx, prevhe, he, next); -+ } -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+void -+elan4mmu_invalidate_ctxt (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int ctxnum = ctxt->ctxt_num; -+ ELAN4_HASH_ENTRY *he; -+ int tbl, hashidx, tagidx; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_invalidate_ctxt: invalidating ctxnum=%d\n", ctxnum); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ /* 1st invalidate all tags belonging to me */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) /* own tag block */ -+ { -+ MPRINTF (ctxt, 1, "elan4mmu_invalidate_ctxt: he=%p addr=%llx hashidx=%d tagidx=%d\n", -+ he, he->he_tag[tagidx] & TAG_ADDRESS_MASK, hashidx, tagidx); -+ -+ he->he_tag[tagidx] &= ~HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ } -+ -+ /* 2nd flush the tlb & cached hash block */ -+ elan4mmu_flush_tlb (dev); -+ -+ /* 3rd invalidate all pte's and free off the hash entries */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ while ((he = ctxt->ctxt_mmuhash[tbl][hashidx]) != NULL) -+ { -+ ctxt->ctxt_mmuhash[tbl][hashidx] = he_ctxt_next (he, ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ elan4mmu_free_hent (dev, tbl, hashidx, he, tagidx); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+ELAN4_HASH_CACHE * -+elan4mmu_reserve (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned int npages, int cansleep) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_Addr end = start + (npages << dev->dev_pageshift[tbl]) - 1; -+ unsigned long tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr base = (start & ~(tagspan-1)); -+ E4_Addr top = (end & ~(tagspan-1)) + (tagspan-1); -+ unsigned int nhes = (top - base + 1) >> tagshift; -+ ELAN4_HASH_CACHE *hc; -+ unsigned int tagidx, pteidx; -+ E4_Addr addr; -+ int i; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_reserve: start=%llx npages=%d\n", start, npages); -+ MPRINTF (ctxt, 0, " pageshift=%d tagspan=%lx base=%llx top=%llx end=%llx nhes=%d\n", -+ dev->dev_pageshift[tbl], tagspan, base, top, end, nhes); -+ -+ KMEM_ALLOC (hc, ELAN4_HASH_CACHE *, offsetof (ELAN4_HASH_CACHE, hc_hes[nhes]), cansleep); -+ -+ if (hc == NULL) -+ return NULL; -+ -+ hc->hc_start = start; -+ hc->hc_end = end; -+ hc->hc_tbl = tbl; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ for (addr = base, i = 0; i < nhes; addr += tagspan, i++) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[tbl] : 3; -+ -+ -+ if ((hc->hc_hes[i] = elan4mmu_ptealloc (ctxt, tbl, addr & ~(tagspan-1), &tagidx)) == NULL) -+ goto failed; -+ -+ -+ MPRINTF (ctxt, 2, "elan4mmu_reserve: tbl=%d addr=%llx -> hashidx=%d tagidx=%d\n", tbl, addr & ~(tagspan-1), -+ E4MMU_HASH_INDEX (ctxt->ctxt_num, (addr & ~(tagspan-1)), dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1), tagidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ { -+ ASSERT (HE_GET_PTE (hc->hc_hes[i], tagidx, pteidx) == 0); -+ -+ MPRINTF (ctxt, 2, "elan4mmu_reserve: i=%d addr=%llx he=%p (tagidx=%d pteidx=%d)\n", -+ i, addr, hc->hc_hes[i], tagidx, pteidx); -+ -+ HE_SET_PTE (hc->hc_hes[i], tagidx, pteidx, PTE_PERM_TYPE_MASK); -+ } -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ return hc; -+ -+ failed: -+ for (i--, addr -= tagspan; i >= 0; i--, addr -= tagspan) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[tbl] : 3; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ unsigned tagidx = (addr == E4MMU_TAG2VADDR (hc->hc_hes[i]->he_tag[0], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1)) ? 0 : 1; -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ HE_SET_PTE(hc->hc_hes[i], tagidx, pteidx, 0); -+ -+ if (hc->hc_hes[i]->he_pte[tagidx] == 0) -+ elan4mmu_free_hent (dev, tbl, hashidx, hc->hc_hes[i], tagidx); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ KMEM_FREE (hc, offsetof (ELAN4_HASH_CACHE, hc_hes[nhes])); -+ -+ return NULL; -+} -+ -+void -+elan4mmu_release (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_Addr start = hc->hc_start; -+ E4_Addr end = hc->hc_end; -+ unsigned long tagshift = dev->dev_pageshift[hc->hc_tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr base = (start & ~(tagspan-1)); -+ E4_Addr top = (end & ~(tagspan-1)) + (tagspan-1); -+ unsigned int nhes = (top - base + 1) >> tagshift; -+ ELAN4_HASH_ENTRY *prevhe, *he, *next; -+ E4_Addr addr; -+ unsigned int pteidx; -+ int i; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ MPRINTF (ctxt, 0, "elan4mmu_release: base=%llx top=%llx\n", base, top); -+ -+ for (addr = base, i = 0; i < nhes; addr += tagspan, i++) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[hc->hc_tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[hc->hc_tbl] : 3; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[hc->hc_tbl], dev->dev_hashsize[hc->hc_tbl]-1); -+ unsigned tagidx = (addr == E4MMU_TAG2VADDR (hc->hc_hes[i]->he_tag[0], hashidx, dev->dev_pageshift[hc->hc_tbl], dev->dev_hashsize[hc->hc_tbl]-1)) ? 0 : 1; -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ { -+ elan4mmu_invalidatepte (dev, hc->hc_hes[i], tagidx, pteidx); -+ -+ HE_SET_PTE(hc->hc_hes[i], tagidx, pteidx, 0); -+ } -+ -+ MPRINTF (ctxt, 2, "elan4mmu_release: i=%d addr=%llx he=%p (hashidx=%d tagidx=%d pteidx=%d) pte=%x\n", -+ i, addr, hc->hc_hes[i], hashidx, tagidx, pteidx, hc->hc_hes[i]->he_pte[tagidx]); -+ -+ /* remove from context hash */ -+ for (prevhe = NULL, he = ctxt->ctxt_mmuhash[hc->hc_tbl][hashidx], next = he_ctxt_next (he, ctxt->ctxt_num); he != hc->hc_hes[i]; he = next) -+ next = he_ctxt_next (he, ctxt->ctxt_num); -+ -+ if (hc->hc_hes[i]->he_pte[tagidx] == 0) -+ elan4mmu_free_hent (dev, hc->hc_tbl, hashidx, hc->hc_hes[i], tagidx); -+ -+ prevhe = he_ctxt_unlink (ctxt, hc->hc_tbl, hashidx, prevhe, he, next); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+void -+elan4mmu_set_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx, E4_uint64 newpte) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_set_pte: idx=%d addr=%llx he=%p (tagidx=%d pteidx=%d) newpte=%llx\n", idx, addr, he, tagidx, pteidx, newpte); -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, newpte); -+} -+ -+E4_uint64 -+elan4mmu_get_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ return elan4mmu_readpte (dev, he, tagidx, pteidx); -+} -+ -+void -+elan4mmu_clear_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_clear_pte: idx=%d addr=%llx he=%p (tagidx=%d pteidx=%d)\n", idx, addr, he, tagidx, pteidx); -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ elan4mmu_invalidatepte (dev, he, tagidx, pteidx); -+} -+ -+EXPORT_SYMBOL(elan4mmu_flush_tlb); -+EXPORT_SYMBOL(elan4mmu_pteload); -+EXPORT_SYMBOL(elan4mmu_unload_range); -+EXPORT_SYMBOL(elan4mmu_reserve); -+EXPORT_SYMBOL(elan4mmu_release); -+EXPORT_SYMBOL(elan4mmu_set_pte); -+EXPORT_SYMBOL(elan4mmu_get_pte); -+EXPORT_SYMBOL(elan4mmu_clear_pte); -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/mmu_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/mmu_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/mmu_Linux.c 2005-06-01 23:12:54.611437280 -0400 -@@ -0,0 +1,265 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu_Linux.c,v 1.8 2004/05/10 14:10:46 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu_Linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+/* -+ * Convert a physical address into an pte. This should generate a "local" pte for -+ * physical addresses which are elan4 sdram or elan4 command queues. For elan4 -+ * registers and other addresses on the same bus, this should be the local pci -+ * bus address. All other addresses should access the physical address via the -+ * PCI bridge. -+ */ -+ -+#ifdef __alpha -+#define ioaddr2paddr(ioaddr) virt_to_phys((void *) __ioremap(ioaddr, PAGE_SIZE)) -+#elif defined(__ia64) -+#define ioaddr2paddr(ioaddr) ((ioaddr) & ~__IA64_UNCACHED_OFFSET) -+#else -+#define ioaddr2paddr(ioaddr) (ioaddr) -+#endif -+ -+int -+elan4mmu_categorise_paddr (ELAN4_DEV *dev, physaddr_t *physp) -+{ -+ physaddr_t sdram_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t sdram_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t regs_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t regs_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t phys = *physp; -+ int iscommand; -+ -+ if (phys >= sdram_base && phys <= sdram_top) -+ { -+ (*physp) = (phys ^ sdram_base); -+ return ELAN4MMU_PADDR_SDRAM; -+ } -+ -+ if (phys >= regs_base && phys < regs_top) -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ iscommand = (phys < (regs_base + ELAN4_REVA_REG_OFFSET)); -+ else -+ iscommand = (phys < (regs_base + ELAN4_REVB_I2C_OFFSET)); -+ -+ if (iscommand) -+ { -+ (*physp) = phys ^ regs_base; -+ -+ return ELAN4MMU_PADDR_COMMAND; -+ } -+ else -+ { -+ // XXXX (*physp) = phys2bus (phys); -+ -+ return ELAN4MMU_PADDR_LOCALPCI; -+ } -+ } -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ if (VALID_PAGE (virt_to_page (phys_to_virt (phys)))) -+#else -+ if (virt_addr_valid (phys_to_virt (phys))) -+#endif -+ return ELAN4MMU_PADDR_PAGE; -+ -+ return ELAN4MMU_PADDR_OTHER; -+} -+ -+int -+elan4mmu_sdram_aliascheck (ELAN4_CTXT *ctxt, E4_Addr addr, physaddr_t phys) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ /* -+ * On MPSAS we don't allocate a large enough context table, so -+ * if we see an address/context pair which would "alias" because -+ * they differ in unchecked hash bits to a previous pteload, -+ * then we kill the application. -+ */ -+ unsigned hashval = (E4MMU_SHIFT_ADDR(addr, (dev->dev_pageshift[0]) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(ctxt->ctxt_num)); -+ -+ if (dev->dev_rsvd_hashval[0] == 0xFFFFFFFF) -+ dev->dev_rsvd_hashval[0] = hashval & dev->dev_rsvd_hashmask[0]; -+ -+ if ((hashval & dev->dev_rsvd_hashmask[0]) != dev->dev_rsvd_hashval[0]) -+ { -+ printk ("elan4mmu_sdram_aliascheck: vaddr=%016llx ctxnum=%x -> [%x] overlaps %x - %x [hashidx=%x]\n", (unsigned long long) addr, -+ ctxt->ctxt_num, hashval, hashval & dev->dev_rsvd_hashmask[0], dev->dev_rsvd_hashval[0], -+ E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[0], dev->dev_hashsize[0]-1)); -+ -+ return 0; -+ } -+ -+ if (((addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (phys & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)))) -+ { -+ printk ("elan4mmu_sdram_aliascheck: vaddr=%016llx incorrectly alias sdram at %lx\n", (unsigned long long) addr, -+ phys ^ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+int -+elan4mmu_alloc_topaddr (ELAN4_DEV *dev, physaddr_t paddr, unsigned type) -+{ -+#if defined(__i386) && !defined(CONFIG_X86_PAE) -+ if (dev->dev_topaddrvalid == 0) -+ { -+ dev->dev_topaddrvalid = 1; -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(0), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(1), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(2), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(3), 0); -+ } -+ return (0); -+#else -+ register int i; -+ E4_uint16 match; -+ -+ if (dev->dev_topaddrmode) /* ExtraMasterAddrBits=1 => match {paddr[63:50],type[3:2]} */ -+ match = ((paddr >> 48) & ~3) | ((type >> 2) & 3); -+ else /* ExtraMasterAddrBits=0 => match {paddr[63:48]} */ -+ match = (paddr >> 48); -+ -+ MPRINTF (DBG_DEVICE, 2, "elan4mmu_alloc_topaddr: mode=%d paddr=%lx type=%x match=%x [%x %x.%x.%x.%x]\n", -+ dev->dev_topaddrmode, paddr, type, match, dev->dev_topaddrvalid, -+ dev->dev_topaddr[0], dev->dev_topaddr[1], dev->dev_topaddr[2], dev->dev_topaddr[3]); -+ -+ for (i = 0; i < 4; i++) -+ if ((dev->dev_topaddrvalid & (1 << i)) && dev->dev_topaddr[i] == match) -+ return (i); -+ -+ for (i = 0; i < 4; i++) -+ { -+ if ((dev->dev_topaddrvalid & (1 << i)) == 0) -+ { -+ MPRINTF (DBG_DEVICE, 2, "elan4mmu_alloc_topaddr: allocate slot %d for %x\n", i, match); -+ -+ dev->dev_topaddrvalid |= (1 << i); -+ dev->dev_topaddr[i] = match; -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(i), match); -+ return (i); -+ } -+ } -+ -+ panic ("elan4mmu_alloc_topaddr: all topaddrs in use\n"); -+ return (0); -+#endif -+} -+ -+E4_uint64 -+elan4mmu_phys2pte (ELAN4_DEV *dev, physaddr_t phys, unsigned perm) -+{ -+ physaddr_t sdram_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t sdram_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t regs_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t regs_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ int iscommand; -+ E4_uint64 pte; -+ unsigned type; -+ -+ if (phys >= sdram_base && phys <= sdram_top) -+ { -+ phys ^= sdram_base; -+ type = PTE_SetPerm (perm); -+ } -+ else if (phys >= regs_base && phys < regs_top) -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ iscommand = (phys < (regs_base + ELAN4_REVA_REG_OFFSET)); -+ else -+ iscommand = (phys < (regs_base + ELAN4_REVB_I2C_OFFSET)); -+ -+ if (iscommand) -+ { -+ phys ^= regs_base; -+ type = PTE_SetPerm (perm) | PTE_CommandQueue; -+ } -+ else -+ { -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal; -+ // phys = phys2bus (phys); -+ } -+ } -+ else -+ { -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal | dev->dev_pteval; -+ -+#ifdef LINUX_SPARC -+ /* XXXX if not local pci bus, then or in the bypass bit */ -+ phys |= 0xfffe000000000000; -+ type |= PTE_BigEndian; -+#endif -+ -+ -+#if defined(__alpha) -+ phys |= alpha_mv.pci_dac_offset; -+#endif -+ } -+ -+ if ((type & PTE_PciNotLocal) == 0) -+ pte = (phys >> PTE_PADDR_SHIFT) | type; -+ else -+ { -+ unsigned topaddr = elan4mmu_alloc_topaddr (dev, phys, type); -+ -+ if (dev->dev_topaddrmode) -+ pte = (phys >> PTE_PADDR_SHIFT) | (type & ~0xc) | (topaddr << 2); -+ else -+ pte = ((phys >> PTE_PADDR_SHIFT) & ~PTE_TOPADDR_MASK) | (((E4_uint64) topaddr) << 45) | type; -+ } -+ -+ return pte; -+} -+ -+physaddr_t -+elan4mmu_pte2phys (ELAN4_DEV *dev, E4_uint64 pte) -+{ -+ physaddr_t sdram_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t regs_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t phys; -+ -+ if (pte & PTE_PciNotLocal) -+ { -+ if (dev->dev_topaddrmode) -+ phys = ((physaddr_t)(dev->dev_topaddr[(pte >> 2) & 3] & 0xfffc) << 48) | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT); -+ else -+ phys = ((physaddr_t)(dev->dev_topaddr[(pte >> 45) & 3] & 0xffff) << 48)| ((pte & PTE_PPN_MASK & ~PTE_TOPADDR_MASK) << PTE_PADDR_SHIFT); -+ -+#ifdef LINUX_SPARC /* XXXX if not local pci bus, then or in the bypass bit */ -+ phys ^= 0xfffe000000000000; -+#endif -+ -+#if defined(__alpha) -+ phys ^= alpha_mv.pci_dac_offset; -+#endif -+ return phys; -+ } -+ -+ if (pte & PTE_CommandQueue) -+ return (regs_base | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT)); -+ -+ /* sdram */ -+ return (sdram_base | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT)); -+} -+ -+EXPORT_SYMBOL(elan4mmu_phys2pte); -+EXPORT_SYMBOL(elan4mmu_pte2phys); -Index: linux-2.4.21/drivers/net/qsnet/elan4/neterr.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/neterr.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/neterr.c 2005-06-01 23:12:54.612437128 -0400 -@@ -0,0 +1,270 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr.c,v 1.4.6.3 2004/11/05 13:11:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/neterr.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+typedef struct neterr_inputq -+{ -+ E4_InputQueue inputq; /* input queue */ -+ E4_Event32 qevent; /* input queue event */ -+ E4_uint64 sent; /* # messages sent (cq flow control)*/ -+} NETERR_INPUTQ; -+ -+#define NETERR_NSLOTS 64 /* single page of queue space (4Kb) */ -+ -+#define NETERR_RETRIES 16 -+#define NETERR_CQ_SIZE CQ_Size8K -+#define NETERR_CQ_MSGS (CQ_Size(NETERR_CQ_SIZE) / (21*8)) -+#define NETERR_VP_COUNT 64 /* this *must* be > NETERR_CQ_MSGS */ -+#define NETERR_VP_BASE 1 /* use vp 1 upwards */ -+ -+void -+elan4_neterr_interrupt (ELAN4_DEV *dev, void *arg) -+{ -+ E4_Addr qfptr = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr)); -+ E4_Addr qbptr = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_bptr)); -+ E4_Addr qfirst = DEVICE_NETERR_SLOTS_ADDR; -+ E4_Addr qlast = qfirst + (NETERR_NSLOTS-1) * ELAN4_NETERR_MSG_SIZE; -+ ELAN4_CQ *cq = dev->dev_neterr_intcq; -+ int count = 0; -+ ELAN4_CTXT *ctxt; -+ ELAN4_NETERR_MSG msg; -+ -+ while (qfptr != qbptr) -+ { -+ elan4_sdram_copyq_from_sdram (dev, dev->dev_neterr_slots + (qfptr - qfirst), &msg, ELAN4_NETERR_MSG_SIZE); -+ -+ ctxt = elan4_networkctxt (dev, msg.msg_context); -+ -+ if (ctxt != NULL && ctxt->ctxt_ops->op_neterrmsg) -+ ctxt->ctxt_ops->op_neterrmsg (ctxt, &msg); -+ else -+ PRINTF (DBG_DEVICE, DBG_NETERR, "elan4_neterr_interrupt: no process - sender %d.%d\n", msg.msg_sender.loc_node, msg.msg_sender.loc_context); -+ -+ count++; -+ -+ /* move on the from pointer */ -+ qfptr = (qfptr == qlast) ? qfirst : qfptr + ELAN4_NETERR_MSG_SIZE; -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr), qfptr); -+ } -+ -+ if (count == 0) -+ { -+ printk ("elan4_neterr_interrupt: spurious\n"); -+ return; -+ } -+ -+ /* Issue the waitevent to the interrupt queue */ -+ writeq (WAIT_EVENT_CMD | (DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)), cq->cq_mapping); -+ writeq ( E4_EVENT_INIT_VALUE (-32 * count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INTCQ_ADDR, cq->cq_mapping); -+ writeq (INTERRUPT_CMD | (dev->dev_neterr_intop.op_cookie << E4_MAIN_INT_SHIFT), cq->cq_mapping); -+ -+ pioflush_reg (dev); -+} -+ -+int -+elan4_neterr_init (ELAN4_DEV *dev) -+{ -+ unsigned int intqaddr; -+ E4_Addr qfirst, qlast; -+ -+ if ((dev->dev_neterr_inputq = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE)) == 0) -+ return 0; -+ -+ if ((dev->dev_neterr_slots = elan4_sdram_alloc (dev, roundup (NETERR_NSLOTS * ELAN4_NETERR_MSG_SIZE, SDRAM_PAGE_SIZE))) == 0) -+ return 0; -+ -+ if ((dev->dev_neterr_msgcq = elan4_alloccq (&dev->dev_ctxt, NETERR_CQ_SIZE, CQ_STENEnableBit | CQ_WriteEnableBit, CQ_Priority)) == NULL) -+ return 0; -+ -+ if ((dev->dev_neterr_intcq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_WaitEventEnableBit | CQ_InterruptEnableBit, CQ_Priority)) == NULL) -+ return 0; -+ -+ intqaddr = (dev->dev_cqoffset + elan4_cq2num (dev->dev_neterr_intcq)) * CQ_CommandMappingSize; -+ qfirst = DEVICE_NETERR_SLOTS_ADDR; -+ qlast = qfirst + (NETERR_NSLOTS-1) * ELAN4_NETERR_MSG_SIZE; -+ -+ spin_lock_init (&dev->dev_neterr_lock); -+ -+ /* Register an interrupt operation */ -+ dev->dev_neterr_intop.op_function = elan4_neterr_interrupt; -+ dev->dev_neterr_intop.op_arg = NULL; -+ -+ elan4_register_intop (dev, &dev->dev_neterr_intop); -+ -+ /* Initialise the inputq descriptor and event */ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr), qfirst); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_bptr), qfirst); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_control), E4_InputQueueControl (qfirst, qlast, ELAN4_NETERR_MSG_SIZE)); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_event), DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)); -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_CountAndType), E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_WritePtr), DEVICE_NETERR_INTCQ_ADDR); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_WriteValue), (dev->dev_neterr_intop.op_cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD); -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, sent), 0); -+ -+ /* Map them all into the device context */ -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_INPUTQ_ADDR, (dev->dev_neterr_inputq >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_RemoteAll)); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_INTCQ_ADDR, (intqaddr >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocDataWrite) | PTE_CommandQueue); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_SLOTS_ADDR, (dev->dev_neterr_slots >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_DataReadWrite)); -+ -+ /* finally attach to the neterr context */ -+ if (elan4_attach_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM) != 0) -+ panic ("elan4_neterr_init: failed to attach to neterr context\n"); -+ -+ /* and drop the context filter */ -+ elan4_set_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM, E4_FILTER_HIGH_PRI); -+ -+ return 1; -+} -+ -+void -+elan4_neterr_destroy (ELAN4_DEV *dev) -+{ -+ if (dev->dev_neterr_intcq) -+ { -+ elan4_detach_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM); -+ -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_SLOTS_ADDR, 1 << dev->dev_pageshift[0]); -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_INTCQ_ADDR, 1 << dev->dev_pageshift[0]); -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_INPUTQ_ADDR, 1 << dev->dev_pageshift[0]); -+ -+ spin_lock_destroy (&dev->dev_neterr_lock); -+ } -+ -+ if (dev->dev_neterr_intcq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_neterr_intcq); -+ dev->dev_neterr_intcq = NULL; -+ -+ if (dev->dev_neterr_msgcq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_neterr_msgcq); -+ dev->dev_neterr_msgcq = NULL; -+ -+ if (dev->dev_neterr_slots) -+ elan4_sdram_free (dev, dev->dev_neterr_slots, roundup (NETERR_NSLOTS * ELAN4_NETERR_MSG_SIZE, SDRAM_PAGE_SIZE)); -+ dev->dev_neterr_slots = 0; -+ -+ if (dev->dev_neterr_inputq) -+ elan4_sdram_free (dev, dev->dev_neterr_inputq, SDRAM_PAGE_SIZE); -+ dev->dev_neterr_inputq = 0; -+} -+ -+int -+elan4_neterr_sendmsg (ELAN4_DEV *dev, unsigned int nodeid, unsigned int retries, ELAN4_NETERR_MSG *msg) -+{ -+ ELAN4_CQ *cq = dev->dev_neterr_msgcq; -+ E4_uint64 sent; -+ E4_VirtualProcessEntry route; -+ unsigned int vp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_neterr_lock, flags); -+ -+ sent = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, sent)); -+ -+ PRINTF (DBG_DEVICE, DBG_NETERR, "elan4_neterr_sendmsg: nodeid=%d retries=%d cookie=%llx sender=%d,%d%s\n", -+ nodeid, retries, msg->msg_cookies[0], msg->msg_sender.loc_node, msg->msg_sender.loc_context, -+ (dev->dev_neterr_queued - sent) >= NETERR_CQ_MSGS ? " - no cq space" : ""); -+ -+ if ((dev->dev_neterr_queued - sent) >= NETERR_CQ_MSGS) -+ { -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 0; -+ } -+ -+ vp = NETERR_VP_BASE + (dev->dev_neterr_queued % NETERR_VP_COUNT); -+ -+ if (elan4_generate_route (&dev->dev_position, &route, ELAN4_NETERR_CONTEXT_NUM, nodeid, nodeid, FIRST_SYSTEM_PACKET | FIRST_HIGH_PRI) < 0) -+ { -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 0; -+ } -+ -+ elan4_write_route (dev, dev->dev_routetable, vp, &route); -+ -+ writeq ((GUARD_CMD | GUARD_CHANNEL(0) | GUARD_RESET(retries)), cq->cq_mapping); -+ writeq (NOP_CMD, cq->cq_mapping); -+ -+ writeq (OPEN_STEN_PKT_CMD | OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, vp), cq->cq_mapping); -+ writeq (SEND_TRANS_CMD | (TR_INPUT_Q_GETINDEX << 16), cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq), cq->cq_mapping); -+ -+ writeq (SEND_TRANS_CMD | (TR_WRITE (64 >> 3, 0, TR_DATATYPE_DWORD) << 16), cq->cq_mapping); -+ writeq ( 0 /* address */, cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[0], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[1], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[2], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[3], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[4], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[5], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[6], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[7], cq->cq_mapping); -+ -+ writeq (SEND_TRANS_CMD | (TR_INPUT_Q_COMMIT << 16), cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq), cq->cq_mapping); -+ writeq ( 0 /* cookie */, cq->cq_mapping); -+ -+ writeq (GUARD_CMD | GUARD_CHANNEL(0) | GUARD_RESET(NETERR_RETRIES), cq->cq_mapping); -+ writeq (WRITE_DWORD_CMD | (DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, sent)), cq->cq_mapping); -+ writeq ( ++dev->dev_neterr_queued, cq->cq_mapping); -+ -+ pioflush_reg (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ -+ return 1; -+} -+ -+int -+elan4_neterr_iproc_trap (ELAN4_DEV *dev, ELAN4_IPROC_TRAP *trap) -+{ -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[trap->tr_trappedTrans]; -+ unsigned long flags; -+ -+ switch (IPROC_TrapValue (hdrp->IProcStatusCntxAndTrType)) -+ { -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ case InputCrcErrorAfterPAckOk: -+ return 1; -+ -+ case InputEventEngineTrapped: -+ printk ("elan%d: device_iproc_trap: InputEventEngineTrapped - Trans=%x TrAddr=%llx\n", -+ dev->dev_instance, (int)IPROC_TransactionType (hdrp->IProcStatusCntxAndTrType), (long long) hdrp->TrAddr); -+ -+ if ((IPROC_TransactionType (hdrp->IProcStatusCntxAndTrType) & TR_OPCODE_MASK) == (TR_INPUT_Q_COMMIT & TR_OPCODE_MASK) && -+ hdrp->TrAddr == DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq)) -+ { -+ spin_lock_irqsave (&dev->dev_neterr_lock, flags); -+ writeq ((DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)) | SET_EVENT_CMD, dev->dev_neterr_msgcq->cq_mapping); -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 1; -+ } -+ -+ default: -+ return 0; -+ } -+} -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/procfs_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/procfs_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/procfs_Linux.c 2005-06-01 23:12:54.613436976 -0400 -@@ -0,0 +1,1041 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_Linux.c,v 1.27.2.5 2005/01/18 14:36:17 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/elan4mod/procfs_Linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * -+ * procfs format for elan4: -+ * -+ * /proc/qsnet/elan4/config -+ * elan4_debug -+ * elan4_debug_toconsole -+ * elan4_debug_tobuffer -+ * elan4_debug_display_ctxt -+ * elan4_debug_ignore_ctxt -+ * elan4_debug_ignore_type -+ * elan4_debug_mmu -+ * elan4_mainint_punt_loops -+ * user_p2p_route_options -+ * user_bcast_route_options -+ * -+ * /proc/qsnet/elan4/deviceN -+ * stats -+ * position -+ * vpd -+ */ -+ -+struct proc_dir_entry *elan4_procfs_root; -+struct proc_dir_entry *elan4_config_root; -+ -+/* borrowed from fs/proc/proc_misc - helper for proc_read_int */ -+static int -+proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) -+{ -+ if (len <= off+count) *eof = 1; -+ *start = page + off; -+ len -= off; -+ if (len>count) len = count; -+ if (len<0) len = 0; -+ return len; -+} -+ -+static int -+proc_read_devinfo (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len = 0; -+ -+ if (! dev) -+ len = sprintf (page, "\n"); -+ else -+ { -+ len += sprintf (page + len, "dev_vendor_id 0x%x\n", dev->dev_devinfo.dev_vendor_id); -+ len += sprintf (page + len, "dev_device_id 0x%x\n", dev->dev_devinfo.dev_vendor_id); -+ len += sprintf (page + len, "dev_revision_id 0x%x\n", dev->dev_devinfo.dev_revision_id); -+ len += sprintf (page + len, "dev_instance 0x%x\n", dev->dev_devinfo.dev_instance); -+ len += sprintf (page + len, "dev_rail 0x%x\n", dev->dev_devinfo.dev_rail); -+ len += sprintf (page + len, "dev_driver_version 0x%x\n", dev->dev_devinfo.dev_driver_version); -+ len += sprintf (page + len, "dev_params_mask 0x%x\n", dev->dev_devinfo.dev_params_mask); -+ len += sprintf (page + len, "dev_params: \n"); -+ len += sprintf (page + len, " 0 - PciCmdQPadFlag 0x%x\n", dev->dev_devinfo.dev_params.values[0]); -+ len += sprintf (page + len, " 1 - EventCopyWinPt 0x%x\n", dev->dev_devinfo.dev_params.values[1]); -+ len += sprintf (page + len, " 2 - PciWriteCombining 0x%x\n", dev->dev_devinfo.dev_params.values[2]); -+ len += sprintf (page + len, " 3 - 0x%x\n", dev->dev_devinfo.dev_params.values[3]); -+ len += sprintf (page + len, " 4 - 0x%x\n", dev->dev_devinfo.dev_params.values[4]); -+ len += sprintf (page + len, " 5 - 0x%x\n", dev->dev_devinfo.dev_params.values[5]); -+ len += sprintf (page + len, " 6 - 0x%x\n", dev->dev_devinfo.dev_params.values[6]); -+ len += sprintf (page + len, " 7 - 0x%x\n", dev->dev_devinfo.dev_params.values[7]); -+ len += sprintf (page + len, " 8 - 0x%x\n", dev->dev_devinfo.dev_params.values[8]); -+ len += sprintf (page + len, " 9 - 0x%x\n", dev->dev_devinfo.dev_params.values[9]); -+ len += sprintf (page + len, " 10 - 0x%x\n", dev->dev_devinfo.dev_params.values[10]); -+ len += sprintf (page + len, " 11 - 0x%x\n", dev->dev_devinfo.dev_params.values[11]); -+ len += sprintf (page + len, "dev_num_down_links_value 0x%x\n", dev->dev_devinfo.dev_num_down_links_value); -+ -+ len += sprintf (page + len, "features 0x%x\n", dev->dev_features); -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_position (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ if (dev->dev_position.pos_mode == ELAN_POS_UNKNOWN) -+ len = sprintf (page, "\n"); -+ else -+ len = sprintf (page, -+ "NodeId %d\n" -+ "NumLevels %d\n" -+ "NumNodes %d\n", -+ dev->dev_position.pos_nodeid, -+ dev->dev_position.pos_levels, -+ dev->dev_position.pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_position (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned nodeid = ELAN_INVALID_NODE; -+ unsigned numnodes = 0; -+ char *page, *p; -+ int res; -+ ELAN_POSITION pos; -+ -+ if (count == 0) -+ return (0); -+ -+ if (count >= PAGE_SIZE) -+ return (-EINVAL); -+ -+ if ((page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (page, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ page[count] = '\0'; -+ -+ if (page[count-1] == '\n') -+ page[count-1] = '\0'; -+ -+ if (! strcmp (page, "")) -+ { -+ pos.pos_mode = ELAN_POS_UNKNOWN; -+ pos.pos_nodeid = ELAN_INVALID_NODE; -+ pos.pos_nodes = 0; -+ pos.pos_levels = 0; -+ } -+ else -+ { -+ for (p = page; *p; ) -+ { -+ while (isspace (*p)) -+ p++; -+ -+ if (! strncmp (p, "NodeId=", strlen("NodeId="))) -+ nodeid = simple_strtoul (p + strlen ("NodeId="), NULL, 0); -+ if (! strncmp (p, "NumNodes=", strlen ("NumNodes="))) -+ numnodes = simple_strtoul (p + strlen ("NumNodes="), NULL, 0); -+ -+ while (*p && !isspace(*p)) -+ p++; -+ } -+ -+ if (elan4_compute_position (&pos, nodeid, numnodes, dev->dev_devinfo.dev_num_down_links_value) != 0) -+ printk ("elan%d: invalid values for NodeId=%d NumNodes=%d\n", dev->dev_instance, nodeid, numnodes); -+ else -+ { -+ printk ("elan%d: setting NodeId=%d NumNodes=%d NumLevels=%d\n", dev->dev_instance, pos.pos_nodeid, -+ pos.pos_nodes, pos.pos_levels); -+ -+ if (elan4_set_position (dev, &pos) < 0) -+ printk ("elan%d: failed to set device position\n", dev->dev_instance); -+ } -+ } -+ } -+ -+ MOD_DEC_USE_COUNT; -+ free_page ((unsigned long) page); -+ -+ return (count); -+} -+ -+static int -+proc_read_temp (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned char values[2]; -+ int len; -+ -+ if (i2c_disable_auto_led_update (dev) < 0) -+ len = sprintf (page, ""); -+ else -+ { -+ if (i2c_read (dev, I2C_TEMP_ADDR, 2, values) < 0) -+ len = sprintf (page, ""); -+ else -+ len = sprintf (page, "%s%d%s\n", (values[0] & 0x80) ? "-" : "", -+ (values[0] & 0x80) ? -((signed char)values[0]) - 1 : values[0], -+ (values[1] & 0x80) ? ".5" : ".0"); -+ -+ i2c_enable_auto_led_update (dev); -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_eccerr (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char errstr[200]; -+ register int i, len = 0; -+ -+ *page = '\0'; -+ -+ for (i = 0; i < sizeof (dev->dev_sdramerrs)/sizeof(dev->dev_sdramerrs[0]); i++) -+ if (dev->dev_sdramerrs[i]) -+ len += sprintf (page + len, "%s occured %0d times\n", -+ elan4_sdramerr2str (dev, dev->dev_sdramerrs[i] & 0x000fffffffffffffULL, errstr), -+ (int) (dev->dev_sdramerrs[i] >> 52) + 1); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_vpd (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ if ( elan4_read_vpd (dev, NULL, page) ) -+ len = sprintf (page, "no vpd tags found\n"); -+ else -+ len = strlen(page)+1; -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static struct device_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+ unsigned minrev; -+} device_info[] = { -+ {"devinfo", proc_read_devinfo, NULL, 0}, -+ {"position", proc_read_position, proc_write_position, 0}, -+ {"temp", proc_read_temp, NULL, 1}, -+ {"eccerr", proc_read_eccerr, NULL, 0}, -+ {"vpd", proc_read_vpd, NULL, 0}, -+}; -+ -+static int -+proc_read_link_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "link_errors", dev->dev_stats.s_link_errors); -+ p += sprintf (p, "%20s %ld\n", "lock_errors", dev->dev_stats.s_lock_errors); -+ p += sprintf (p, "%20s %ld\n", "deskew_errors", dev->dev_stats.s_deskew_errors); -+ p += sprintf (p, "%20s %ld\n", "phase_errors", dev->dev_stats.s_phase_errors); -+ -+ p += sprintf (p, "%20s %ld\n", "data_errors", dev->dev_stats.s_data_errors); -+ p += sprintf (p, "%20s %ld\n", "fifo_overflow0", dev->dev_stats.s_fifo_overflow0); -+ p += sprintf (p, "%20s %ld\n", "fifo_overflow1", dev->dev_stats.s_fifo_overflow1); -+ p += sprintf (p, "%20s %ld\n", "mod45changed", dev->dev_stats.s_mod45changed); -+ p += sprintf (p, "%20s %ld\n", "pack_not_seen", dev->dev_stats.s_pack_not_seen); -+ -+ p += sprintf (p, "%20s %ld\n", "linkport_keyfail", dev->dev_stats.s_linkport_keyfail); -+ p += sprintf (p, "%20s %ld\n", "eop_reset", dev->dev_stats.s_eop_reset); -+ p += sprintf (p, "%20s %ld\n", "bad_length", dev->dev_stats.s_bad_length); -+ p += sprintf (p, "%20s %ld\n", "crc_error", dev->dev_stats.s_crc_error); -+ p += sprintf (p, "%20s %ld\n", "crc_bad", dev->dev_stats.s_crc_bad); -+ -+ p += sprintf (p, "%20s %ld\n", "cproc_timeout", dev->dev_stats.s_cproc_timeout); -+ p += sprintf (p, "%20s %ld\n", "dproc_timeout", dev->dev_stats.s_dproc_timeout); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static char * -+proc_sprintf_bucket_stat (char *p, char *name, unsigned long *stats, int *buckets) -+{ -+ int i; -+ -+ p += sprintf (p, "%20s ", name); -+ -+ for (i = 0; i < ELAN4_DEV_STATS_BUCKETS-1; i++) -+ p += sprintf (p, "%ld(<=%d) ", stats[i], buckets[i]); -+ p += sprintf (p, "%ld(>%d)\n", stats[i], buckets[i-1]); -+ -+ return p; -+} -+ -+static int -+proc_read_intr_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "interrupts", dev->dev_stats.s_interrupts); -+ p += sprintf (p, "%20s %ld\n", "haltints", dev->dev_stats.s_haltints); -+ -+ p += sprintf (p, "%20s %ld\n", "mainint_punts", dev->dev_stats.s_mainint_punts); -+ p += sprintf (p, "%20s %ld\n", "mainint_rescheds", dev->dev_stats.s_mainint_rescheds); -+ -+ p = proc_sprintf_bucket_stat (p, "mainints", dev->dev_stats.s_mainints, MainIntBuckets); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "cproc_traps", dev->dev_stats.s_cproc_traps); -+ p += sprintf (p, "%20s %ld\n", "dproc_traps", dev->dev_stats.s_dproc_traps); -+ p += sprintf (p, "%20s %ld\n", "eproc_traps", dev->dev_stats.s_eproc_traps); -+ p += sprintf (p, "%20s %ld\n", "iproc_traps", dev->dev_stats.s_iproc_traps); -+ p += sprintf (p, "%20s %ld\n", "tproc_traps", dev->dev_stats.s_tproc_traps); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_cproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const CProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_cproc_trap_types)/sizeof(dev->dev_stats.s_cproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", CProcTrapNames[i], dev->dev_stats.s_cproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_dproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const DProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_dproc_trap_types)/sizeof(dev->dev_stats.s_dproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", DProcTrapNames[i], dev->dev_stats.s_dproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_eproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const EProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_eproc_trap_types)/sizeof(dev->dev_stats.s_eproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", EProcTrapNames[i], dev->dev_stats.s_eproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_iproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const IProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_iproc_trap_types)/sizeof(dev->dev_stats.s_iproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", IProcTrapNames[i], dev->dev_stats.s_iproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_tproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const TProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_tproc_trap_types)/sizeof(dev->dev_stats.s_tproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", TProcTrapNames[i], dev->dev_stats.s_tproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_sdram_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "correctable_errors", dev->dev_stats.s_correctable_errors); -+ p += sprintf (p, "%20s %ld\n", "multiple_errors", dev->dev_stats.s_multiple_errors); -+ p += sprintf (p, "%20s %ldK\n", "sdram_bytes_free", dev->dev_stats.s_sdram_bytes_free/1024); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+void -+elan4_ringbuf_store(ELAN4_ROUTE_RINGBUF *ringbuf, E4_VirtualProcessEntry *route, ELAN4_DEV *dev) -+{ -+ int newend; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&dev->dev_error_routes_lock, flags); -+ bcopy(route, &ringbuf->routes[ringbuf->end], sizeof(E4_VirtualProcessEntry)); -+ newend = ringbuf->end + 1; -+ if (newend >= DEV_STASH_ROUTE_COUNT) -+ newend -= DEV_STASH_ROUTE_COUNT; -+ if (newend == ringbuf->start) -+ ringbuf->start += 1; -+ if (ringbuf->start >= DEV_STASH_ROUTE_COUNT) -+ ringbuf->start -= DEV_STASH_ROUTE_COUNT; -+ ringbuf->end = newend; -+ spin_unlock_irqrestore(&dev->dev_error_routes_lock, flags); -+} -+ -+static int -+proc_read_dproc_timeout_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *dproc_timeout; -+ -+ dproc_timeout = dev->dev_dproc_timeout; -+ -+ if (!dproc_timeout) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (dproc_timeout[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, dproc_timeout[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+elan4_route2str (E4_VirtualProcessEntry *route, char *routeStr) -+{ -+ int part = 0; -+ int shift; -+ int broadcast; -+ E4_uint64 value; -+ char *ptr = routeStr; -+ int b; -+ -+ /* unpack first */ -+ value = route->Values[part] & 0x7f; -+ if ( (value & 0x78) == 0) { -+ /* empty route */ -+ strcpy(routeStr,"Invalid lead route"); -+ return (-EINVAL); -+ } -+ -+ if ( value & 0x40 ) { -+ /* broad cast */ -+ strcpy(routeStr,"Broadcast"); -+ return (-EINVAL); -+ } else { -+ switch ((value & 0x30) >> 4) { -+ case 0: { *ptr++ = '0' + (value & 0x7); break; } -+ case 1: { *ptr++ = 'M'; break; } -+ case 2: { *ptr++ = 'U'; break; } -+ case 3: { *ptr++ = 'A'; break; } -+ } -+ } -+ -+ shift = 16; -+ broadcast = 0; -+ while ( 1 ) { -+ b = (route->Values[part] >> shift) & 0xf; -+ -+ if ( broadcast ) { -+ /* about to pick up the second byte of a broadcast pair */ -+ broadcast = 0; -+ } else { -+ if ( b & 0x8) { -+ /* output link */ -+ *ptr++ = '0' + (b & 0x7); -+ } else { -+ if ( b & 0x4) { -+ /* broad cast */ -+ broadcast = 1; -+ } else { -+ switch ( b & 0x3 ) { -+ case 0: { *ptr++ = 0 ; return (0); break; } -+ case 1: { *ptr++ = 'M'; break; } -+ case 2: { *ptr++ = 'U'; break; } -+ case 3: { *ptr++ = 'A'; break; } -+ } -+ } -+ } -+ } -+ -+ shift += 4; -+ if ( part != 0 ) { -+ if ( shift > 36) { -+ /* too far, now in the crc value */ -+ strcpy(routeStr,"Invalid route length"); -+ return (-EINVAL); -+ } -+ } else { -+ if ( shift >= 64) { -+ /* move to the next 64 bits */ -+ part = 1; -+ shift = 2; -+ } -+ } -+ } -+ -+ /* never reached */ -+ return (-EINVAL); -+} -+ -+ -+static int -+proc_read_dproc_timeout_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_dproc_timeout_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ unsigned long flags; -+ -+ memset(&routestr, 0, 33); -+ -+ spin_lock_irqsave(&dev->dev_error_routes_lock, flags); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ spin_unlock_irqrestore(&dev->dev_error_routes_lock, flags); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+ -+static int -+proc_read_cproc_timeout_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *cproc_timeout; -+ -+ cproc_timeout = dev->dev_cproc_timeout; -+ -+ if (!cproc_timeout) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (cproc_timeout[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, cproc_timeout[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_cproc_timeout_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_cproc_timeout_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ unsigned long flags; -+ -+ memset(&routestr, 0, 33); -+ -+ spin_lock_irqsave(&dev->dev_error_routes_lock, flags); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ spin_unlock_irqrestore(&dev->dev_error_routes_lock, flags); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_traperr_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *ack_errors; -+ -+ ack_errors = dev->dev_ack_errors; -+ -+ if (!ack_errors) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (ack_errors[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, ack_errors[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_ackerror_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_ack_error_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ unsigned long flags; -+ -+ memset(&routestr, 0, 33); -+ -+ spin_lock_irqsave(&dev->dev_error_routes_lock, flags); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ spin_unlock_irqrestore(&dev->dev_error_routes_lock, flags); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static struct stats_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} stats_info[] = { -+ {"link", proc_read_link_stats, NULL}, -+ {"intr", proc_read_intr_stats, NULL}, -+ {"trap", proc_read_trap_stats, NULL}, -+ {"cproc", proc_read_cproc_trap_stats, NULL}, -+ {"dproc", proc_read_dproc_trap_stats, NULL}, -+ {"eproc", proc_read_eproc_trap_stats, NULL}, -+ {"iproc", proc_read_iproc_trap_stats, NULL}, -+ {"tproc", proc_read_tproc_trap_stats, NULL}, -+ {"sdram", proc_read_sdram_stats, NULL}, -+ {"trapdmaerr", proc_read_traperr_stats, NULL}, -+ {"dproctimeout", proc_read_dproc_timeout_stats, NULL}, -+ {"cproctimeout", proc_read_cproc_timeout_stats, NULL}, -+ {"dproctimeoutroutes", proc_read_dproc_timeout_routes, NULL}, -+ {"cproctimeoutroutes", proc_read_cproc_timeout_routes, NULL}, -+ {"ackerrroutes", proc_read_ackerror_routes, NULL}, -+}; -+ -+static int -+proc_read_sysconfig (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ E4_uint32 syscontrol = dev->dev_syscontrol; -+ int len = 0; -+ -+ *eof = 1; -+ if (off != 0) -+ return (0); -+ -+ if (syscontrol & CONT_EN_ALL_SETS) -+ len += sprintf (page + len, "%sEN_ALL_SETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_MMU_ENABLE) -+ len += sprintf (page + len, "%sMMU_ENABLE", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_HASH_TABLE) -+ len += sprintf (page + len, "%sCACHE_HASH_TABLE", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_CHAINS) -+ len += sprintf (page + len, "%sCACHE_CHAINS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_ROOT_CNTX) -+ len += sprintf (page + len, "%sCACHE_ROOT_CNTX", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_STEN_ROUTES) -+ len += sprintf (page + len, "%sCACHE_STEN_ROUTES", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_DMA_ROUTES) -+ len += sprintf (page + len, "%sCACHE_DMA_ROUTES", len == 0 ? "" : " "); -+ if (syscontrol & CONT_INHIBIT_MAX_CHAIN_ITEMS) -+ len += sprintf (page + len, "%sINHIBIT_MAX_CHAIN_ITEMS", len == 0 ? "" : " "); -+ -+ len += sprintf (page + len, "%sTABLE0_MASK_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE0_MASK_SIZE_SHIFT) & PAGE_MASK_MASK); -+ len += sprintf (page + len, "%sTABLE0_PAGE_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE0_PAGE_SIZE_SHIFT) & PAGE_SIZE_MASK); -+ len += sprintf (page + len, "%sTABLE1_MASK_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE1_MASK_SIZE_SHIFT) & PAGE_MASK_MASK); -+ len += sprintf (page + len, "%sTABLE1_PAGE_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE1_PAGE_SIZE_SHIFT) & PAGE_SIZE_MASK); -+ -+ if (syscontrol & CONT_2K_NOT_1K_DMA_PACKETS) -+ len += sprintf (page + len, "%s2K_NOT_1K_DMA_PACKETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_ALIGN_ALL_DMA_PACKETS) -+ len += sprintf (page + len, "%sALIGN_ALL_DMA_PACKETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_DIRECT_MAP_PCI_WRITES) -+ len += sprintf (page + len, "%sDIRECT_MAP_PCI_WRITES", len == 0 ? "" : " "); -+ -+ len += sprintf (page + len, "\n"); -+ -+ *start = page; -+ return (len); -+} -+ -+static int -+proc_write_sysconfig (struct file *file, const char *ubuffer, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned long page = __get_free_page (GFP_KERNEL); -+ char *buffer = (char *)page; -+ int add = 0; -+ int sub = 0; -+ -+ count = MIN (count, PAGE_SIZE - 1); -+ if (copy_from_user (buffer, ubuffer, count)) -+ { -+ free_page (page); -+ return (-EFAULT); -+ } -+ -+ buffer[count] = 0; /* terminate string */ -+ -+ while (*buffer != 0) -+ { -+ char *ptr; -+ char *end; -+ int ch; -+ int val; -+ int op; -+ -+ ch = *buffer; -+ if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') -+ { -+ buffer++; -+ continue; -+ } -+ -+ op = *buffer; -+ if (op == '+' || op == '-') -+ buffer++; -+ -+ for (end = buffer; *end != 0; end++) -+ if (*end == ' ' || *end == '\t' || -+ *end == '\r' || *end == '\n') -+ break; -+ -+ if (end == buffer) -+ break; -+ -+ ch = *end; -+ *end = 0; -+ -+ for (ptr = buffer; *ptr != 0; ptr++) -+ if ('a' <= *ptr && *ptr <= 'z') -+ *ptr = *ptr + 'A' - 'a'; -+ -+ if (!strcmp (buffer, "EN_ALL_SETS")) -+ val = CONT_EN_ALL_SETS; -+ if (!strcmp (buffer, "CACHE_HASH_TABLE")) -+ val = CONT_CACHE_HASH_TABLE; -+ else if (!strcmp (buffer, "CACHE_CHAINS")) -+ val = CONT_CACHE_CHAINS; -+ else if (!strcmp (buffer, "CACHE_ROOT_CNTX")) -+ val = CONT_CACHE_ROOT_CNTX; -+ else if (!strcmp (buffer, "CACHE_STEN_ROUTES")) -+ val = CONT_CACHE_STEN_ROUTES; -+ else if (!strcmp (buffer, "CACHE_DMA_ROUTES")) -+ val = CONT_CACHE_DMA_ROUTES; -+ else if (!strcmp (buffer, "2K_NOT_1K_DMA_PACKETS")) -+ val = CONT_2K_NOT_1K_DMA_PACKETS; -+ else if (!strcmp (buffer, "ALIGN_ALL_DMA_PACKETS")) -+ val = CONT_ALIGN_ALL_DMA_PACKETS; -+ else -+ val = 0; -+ -+ if (op == '+') -+ add |= val; -+ else if (op == '-') -+ sub |= val; -+ -+ *end = ch; -+ buffer = end; -+ } -+ -+ if ((add | sub) & CONT_EN_ALL_SETS) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ CHANGE_SYSCONTROL (dev, add, sub); -+ -+ if ((add | sub) & CONT_EN_ALL_SETS) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ free_page (page); -+ return (count); -+} -+ -+static struct config_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} config_info[] = { -+ {"sysconfig", proc_read_sysconfig, proc_write_sysconfig}, -+}; -+ -+void -+elan4_procfs_device_init (ELAN4_DEV *dev) -+{ -+ struct proc_dir_entry *p; -+ char name[NAME_MAX]; -+ int i; -+ -+ sprintf (name, "device%d", dev->dev_instance); -+ dev->dev_osdep.procdir = proc_mkdir (name, elan4_procfs_root); -+ -+ for (i = 0; i < sizeof (device_info)/sizeof (device_info[0]); i++) -+ { -+ if (dev->dev_devinfo.dev_revision_id < device_info[i].minrev) -+ continue; -+ -+ if ((p = create_proc_entry (device_info[i].name, 0, dev->dev_osdep.procdir)) != NULL) -+ { -+ p->read_proc = device_info[i].read_func; -+ p->write_proc = device_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ dev->dev_osdep.configdir = proc_mkdir ("config", dev->dev_osdep.procdir); -+ for (i = 0; i < sizeof (config_info)/sizeof (config_info[0]); i++) -+ { -+ if ((p = create_proc_entry (config_info[i].name, 0, dev->dev_osdep.configdir)) != NULL) -+ { -+ p->read_proc = config_info[i].read_func; -+ p->write_proc = config_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ dev->dev_osdep.statsdir = proc_mkdir ("stats", dev->dev_osdep.procdir); -+ for (i = 0; i < sizeof (stats_info)/sizeof (stats_info[0]); i++) -+ { -+ if ((p = create_proc_entry (stats_info[i].name, 0, dev->dev_osdep.statsdir)) != NULL) -+ { -+ p->read_proc = stats_info[i].read_func; -+ p->write_proc = stats_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+} -+ -+void -+elan4_procfs_device_fini (ELAN4_DEV *dev) -+{ -+ char name[NAME_MAX]; -+ int i; -+ -+ for (i = 0; i < sizeof (stats_info)/sizeof (stats_info[0]); i++) -+ remove_proc_entry (stats_info[i].name, dev->dev_osdep.statsdir); -+ remove_proc_entry ("stats", dev->dev_osdep.procdir); -+ -+ for (i = 0; i < sizeof (config_info)/sizeof (config_info[0]); i++) -+ remove_proc_entry (config_info[i].name, dev->dev_osdep.configdir); -+ remove_proc_entry ("config", dev->dev_osdep.procdir); -+ -+ for (i = 0; i < sizeof (device_info)/sizeof (device_info[0]); i++) -+ { -+ if (dev->dev_devinfo.dev_revision_id < device_info[i].minrev) -+ continue; -+ -+ remove_proc_entry (device_info[i].name, dev->dev_osdep.procdir); -+ } -+ -+ sprintf (name, "device%d", dev->dev_instance); -+ remove_proc_entry (name, elan4_procfs_root); -+} -+ -+void -+elan4_procfs_init(void) -+{ -+ elan4_procfs_root = proc_mkdir("elan4", qsnet_procfs_root); -+ elan4_config_root = proc_mkdir("config", elan4_procfs_root); -+ -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug", &elan4_debug, 0); -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug_toconsole", &elan4_debug_toconsole, 0); -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug_tobuffer", &elan4_debug_tobuffer, 0); -+ qsnet_proc_register_int (elan4_config_root, "elan4_debug_mmu", &elan4_debug_mmu, 0); -+ qsnet_proc_register_int (elan4_config_root, "elan4_mainint_punt_loops", &elan4_mainint_punt_loops, 0); -+ qsnet_proc_register_hex (elan4_config_root, "user_p2p_route_options", &user_p2p_route_options, 0); -+ qsnet_proc_register_hex (elan4_config_root, "user_bcast_route_options", &user_bcast_route_options, 0); -+ qsnet_proc_register_int (elan4_config_root, "user_dproc_retry_count", &user_dproc_retry_count, 0); -+ qsnet_proc_register_int (elan4_config_root, "user_cproc_retry_count", &user_cproc_retry_count, 0); -+ qsnet_proc_register_int (elan4_config_root, "num_fault_save", &num_fault_save, 0); -+ qsnet_proc_register_int (elan4_config_root, "min_fault_pages", &min_fault_pages, 0); -+ qsnet_proc_register_int (elan4_config_root, "max_fault_pages", &max_fault_pages, 0); -+} -+ -+void -+elan4_procfs_fini(void) -+{ -+ remove_proc_entry ("max_fault_pages", elan4_config_root); -+ remove_proc_entry ("min_fault_pages", elan4_config_root); -+ remove_proc_entry ("num_fault_save", elan4_config_root); -+ remove_proc_entry ("user_cproc_retry_count", elan4_config_root); -+ remove_proc_entry ("user_dproc_retry_count", elan4_config_root); -+ remove_proc_entry ("user_bcast_route_options", elan4_config_root); -+ remove_proc_entry ("user_p2p_route_options", elan4_config_root); -+ remove_proc_entry ("elan4_mainint_punt_loops", elan4_config_root); -+ remove_proc_entry ("elan4_debug_mmu", elan4_config_root); -+ remove_proc_entry ("elan4_debug_tobuffer", elan4_config_root); -+ remove_proc_entry ("elan4_debug_toconsole", elan4_config_root); -+ remove_proc_entry ("elan4_debug", elan4_config_root); -+ -+ remove_proc_entry ("config", elan4_procfs_root); -+ remove_proc_entry ("elan4", qsnet_procfs_root); -+} -+ -+EXPORT_SYMBOL(elan4_procfs_root); -+EXPORT_SYMBOL(elan4_config_root); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/quadrics_version.h 2005-06-01 23:12:54.614436824 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/elan4/regions.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/regions.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/regions.c 2005-06-01 23:12:54.615436672 -0400 -@@ -0,0 +1,609 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: regions.c,v 1.18.2.1 2004/11/18 11:31:08 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/regions.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+/*================================================================================*/ -+/* elan address region management */ -+USER_RGN * -+user_findrgn_elan (USER_CTXT *uctx, E4_Addr addr, int tail) -+{ -+ USER_RGN *rgn; -+ USER_RGN *hirgn; -+ USER_RGN *lorgn; -+ E4_Addr base; -+ E4_Addr lastaddr; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) || kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (uctx->uctx_ergns == NULL) -+ return (NULL); -+ -+ rgn = uctx->uctx_ergnlast; -+ if (rgn == NULL) -+ rgn = uctx->uctx_ergns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_ebase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = uctx->uctx_etail; -+ -+ if ((lastaddr = (hirgn->rgn_ebase + hirgn->rgn_len - 1)) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_enext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = uctx->uctx_ergns; -+ -+ if (lorgn->rgn_ebase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_ebase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_ebase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_enext; -+ -+ if (rgn->rgn_ebase <= addr) -+ uctx->uctx_ergnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_ebase > addr) -+ rgn = rgn->rgn_eprev; -+ -+ if ((rgn->rgn_ebase + rgn->rgn_len - 1) < addr) -+ return (rgn->rgn_enext); -+ else -+ { -+ uctx->uctx_ergnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+static int -+user_addrgn_elan (USER_CTXT *uctx, USER_RGN *nrgn) -+{ -+ USER_RGN *rgn = user_findrgn_elan (uctx, nrgn->rgn_ebase, 1); -+ E4_Addr nbase = nrgn->rgn_ebase; -+ E4_Addr ntop = nbase + nrgn->rgn_len - 1; -+ E4_Addr base; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (rgn == NULL) -+ { -+ uctx->uctx_ergns = uctx->uctx_etail = nrgn; -+ nrgn->rgn_enext = nrgn->rgn_eprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_ebase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_eprev = rgn; /* of list */ -+ nrgn->rgn_enext = NULL; -+ rgn->rgn_enext = uctx->uctx_etail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) /* overlapping region */ -+ return (-1); -+ -+ nrgn->rgn_enext = rgn; /* insert before region */ -+ nrgn->rgn_eprev = rgn->rgn_eprev; -+ rgn->rgn_eprev = nrgn; -+ if (uctx->uctx_ergns == rgn) -+ uctx->uctx_ergns = nrgn; -+ else -+ nrgn->rgn_eprev->rgn_enext = nrgn; -+ } -+ } -+ uctx->uctx_ergnlast = nrgn; -+ -+ return (0); -+} -+ -+static USER_RGN * -+user_removergn_elan (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ uctx->uctx_ergnlast = rgn->rgn_enext; -+ if (rgn == uctx->uctx_etail) -+ uctx->uctx_etail = rgn->rgn_eprev; -+ else -+ rgn->rgn_enext->rgn_eprev = rgn->rgn_eprev; -+ -+ if (rgn == uctx->uctx_ergns) -+ uctx->uctx_ergns = rgn->rgn_enext; -+ else -+ rgn->rgn_eprev->rgn_enext = rgn->rgn_enext; -+ -+ return (rgn); -+} -+ -+USER_RGN * -+user_rgnat_elan (USER_CTXT *uctx, E4_Addr addr) -+{ -+ USER_RGN *rgn = user_findrgn_elan (uctx, addr, 0); -+ -+ if (rgn != NULL && rgn->rgn_ebase <= addr && addr <= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ return (rgn); -+ -+ return (NULL); -+} -+ -+/* main address region management */ -+USER_RGN * -+user_findrgn_main (USER_CTXT *uctx, virtaddr_t addr, int tail) -+{ -+ USER_RGN *rgn; -+ USER_RGN *hirgn; -+ USER_RGN *lorgn; -+ virtaddr_t lastaddr; -+ virtaddr_t base; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) || kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (uctx->uctx_mrgns == NULL) -+ return (NULL); -+ -+ rgn = uctx->uctx_mrgnlast; -+ if (rgn == NULL) -+ rgn = uctx->uctx_mrgns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_mbase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = uctx->uctx_mtail; -+ if ((lastaddr = hirgn->rgn_mbase + hirgn->rgn_len - 1) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_mnext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = uctx->uctx_mrgns; -+ if (lorgn->rgn_mbase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_mbase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_mnext; -+ -+ if (rgn->rgn_mbase <= addr) -+ uctx->uctx_mrgnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_mbase > addr) -+ rgn = rgn->rgn_mprev; -+ -+ if ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ return (rgn->rgn_mnext); -+ else -+ { -+ uctx->uctx_mrgnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+static int -+user_addrgn_main (USER_CTXT *uctx, USER_RGN *nrgn) -+{ -+ USER_RGN *rgn = user_findrgn_main (uctx, nrgn->rgn_mbase, 1); -+ virtaddr_t nbase = nrgn->rgn_mbase; -+ virtaddr_t ntop = nbase + nrgn->rgn_len - 1; -+ virtaddr_t base; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (rgn == NULL) -+ { -+ uctx->uctx_mrgns = uctx->uctx_mtail = nrgn; -+ nrgn->rgn_mnext = nrgn->rgn_mprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_mbase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_mprev = rgn; /* of list */ -+ nrgn->rgn_mnext = NULL; -+ rgn->rgn_mnext = uctx->uctx_mtail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) /* overlapping region */ -+ return (-1); -+ -+ nrgn->rgn_mnext = rgn; /* insert before region */ -+ nrgn->rgn_mprev = rgn->rgn_mprev; -+ rgn->rgn_mprev = nrgn; -+ if (uctx->uctx_mrgns == rgn) -+ uctx->uctx_mrgns = nrgn; -+ else -+ nrgn->rgn_mprev->rgn_mnext = nrgn; -+ } -+ } -+ uctx->uctx_mrgnlast = nrgn; -+ -+ return (0); -+} -+ -+static USER_RGN * -+user_removergn_main (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ uctx->uctx_mrgnlast = rgn->rgn_mnext; -+ if (rgn == uctx->uctx_mtail) -+ uctx->uctx_mtail = rgn->rgn_mprev; -+ else -+ rgn->rgn_mnext->rgn_mprev = rgn->rgn_mprev; -+ -+ if (rgn == uctx->uctx_mrgns) -+ uctx->uctx_mrgns = rgn->rgn_mnext; -+ else -+ rgn->rgn_mprev->rgn_mnext = rgn->rgn_mnext; -+ -+ return (rgn); -+} -+ -+/* Remove whole region from both lists */ -+static void -+user_removergn (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, rgn->rgn_ebase, rgn->rgn_len); -+ -+ user_removergn_elan (uctx, rgn); -+ user_removergn_main (uctx, rgn); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ KMEM_FREE (rgn, sizeof (USER_RGN)); -+} -+ -+/* Remove all allocated regions */ -+void -+user_freergns (USER_CTXT *uctx) -+{ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ while (uctx->uctx_mrgns) -+ user_removergn(uctx, uctx->uctx_mrgns); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ ASSERT (uctx->uctx_ergns == NULL); -+} -+ -+USER_RGN * -+user_rgnat_main (USER_CTXT *uctx, virtaddr_t addr) -+{ -+ USER_RGN *rgn = user_findrgn_main (uctx, addr, 0); -+ -+ if (rgn != NULL && rgn->rgn_mbase <= addr && addr <= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+int -+user_setperm (USER_CTXT *uctx, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, unsigned perm) -+{ -+ USER_RGN *nrgn; -+ -+ PRINTF4 (uctx, DBG_PERM, "user_setperm: user %lx elan %llx len %lx perm %x\n", maddr, (long long) eaddr, len, perm); -+ -+ if ((maddr & PAGEOFFSET) || (eaddr & PAGEOFFSET) || (len & PAGEOFFSET)) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: alignment failure\n"); -+ return (-EINVAL); -+ } -+ -+ if ((maddr + len - 1) <= maddr || (eaddr + len - 1) <= eaddr) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: range failure\n"); -+ return (-EINVAL); -+ } -+ -+ KMEM_ALLOC (nrgn, USER_RGN *, sizeof (USER_RGN), 1); -+ -+ if (nrgn == NULL) -+ return (-ENOMEM); -+ -+ nrgn->rgn_mbase = maddr; -+ nrgn->rgn_ebase = eaddr; -+ nrgn->rgn_len = len; -+ nrgn->rgn_perm = perm; -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if (user_addrgn_elan (uctx, nrgn) < 0) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: elan address exists\n"); -+ spin_unlock (&uctx->uctx_rgnlock); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ KMEM_FREE (nrgn, sizeof (USER_RGN)); -+ return (-EINVAL); -+ } -+ -+ if (user_addrgn_main (uctx, nrgn) < 0) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: main address exists\n"); -+ user_removergn_elan (uctx, nrgn); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ KMEM_FREE (nrgn, sizeof (USER_RGN)); -+ return (-EINVAL); -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ if ((perm & PERM_Preload)) -+ user_preload_main (uctx, maddr, len); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ return (0); -+} -+ -+void -+user_clrperm (USER_CTXT *uctx, E4_Addr addr, unsigned long len) -+{ -+ E4_Addr raddr; -+ E4_Addr rtop; -+ USER_RGN *nrgn; -+ USER_RGN *rgn; -+ USER_RGN *rgn_next; -+ unsigned long ssize; -+ int res; -+ -+ PRINTF2 (uctx, DBG_PERM, "user_clrperm: elan %llx len %lx\n", addr, len); -+ -+ raddr = (addr & PAGEMASK); -+ rtop = ((addr + len - 1) & PAGEMASK) + (PAGESIZE-1); -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ for (rgn = user_findrgn_elan (uctx, addr, 0); rgn != NULL; rgn = rgn_next) -+ { -+ if (rtop < rgn->rgn_ebase) /* rtop was in a gap */ -+ break; -+ -+ rgn_next = rgn->rgn_enext; /* Save next region pointer */ -+ -+ PRINTF (uctx, DBG_PERM, " elan %llx->%llx main %p->%p\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1, -+ rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len-1); -+ -+ if (raddr <= rgn->rgn_ebase && rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* whole region is cleared */ -+ -+ PRINTF (uctx, DBG_PERM, " whole region\n"); -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1); -+ user_removergn (uctx, rgn); -+ } -+ else if (raddr <= rgn->rgn_ebase) -+ { -+ /* clearing at beginning, so shrink size and increment base ptrs */ -+ ssize = rtop - rgn->rgn_ebase + 1; -+ -+ PRINTF (uctx, DBG_PERM, " clear at beginning %x\n", ssize); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", rgn->rgn_ebase, rgn->rgn_ebase + ssize-1); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, rgn->rgn_ebase, ssize); -+ -+ rgn->rgn_mbase += ssize; -+ rgn->rgn_ebase += ssize; -+ rgn->rgn_len -= ssize; -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ else if (rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* clearing at end, so just shrink length of region */ -+ ssize = (rgn->rgn_ebase + rgn->rgn_len - 1) - raddr + 1; -+ -+ PRINTF (uctx, DBG_PERM, " clear at end %x\n", ssize); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", raddr, raddr+ssize-1); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, raddr, ssize); -+ -+ rgn->rgn_len -= ssize; -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ else -+ { -+ /* the section to go is in the middle, so need to */ -+ /* split it into two regions */ -+ KMEM_ALLOC (nrgn, USER_RGN *, sizeof (USER_RGN), 1); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", raddr, rtop); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, raddr, rtop - raddr + 1); -+ -+ nrgn->rgn_mbase = rgn->rgn_mbase + (rtop - rgn->rgn_ebase + 1); -+ nrgn->rgn_ebase = rtop + 1; -+ nrgn->rgn_len = (rgn->rgn_ebase + rgn->rgn_len - 1) - rtop; -+ nrgn->rgn_perm = rgn->rgn_perm; -+ -+ PRINTF (uctx, DBG_PERM, " new elan %llx->%llx main %p->%p\n", -+ nrgn->rgn_ebase, nrgn->rgn_ebase + nrgn->rgn_len-1, -+ nrgn->rgn_mbase, nrgn->rgn_mbase + nrgn->rgn_len-1); -+ -+ rgn->rgn_len = (raddr - rgn->rgn_ebase); /* shrink original region */ -+ -+ PRINTF (uctx, DBG_PERM, " old elan %llx->%llx main %p->%p\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1, -+ rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len-1); -+ -+ res = user_addrgn_elan (uctx, nrgn); /* insert new region */ -+ ASSERT (res == 0); /* which cannot fail */ -+ -+ res = user_addrgn_main (uctx, nrgn); -+ ASSERT (res == 0); -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ } -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+} -+ -+int -+user_checkperm (USER_CTXT *uctx, E4_Addr raddr, unsigned long rsize, unsigned access) -+{ -+ USER_RGN *rgn; -+ -+ PRINTF3 (uctx, DBG_PERM, "user_checkperm: elan %lx len %lx access %x\n", raddr, rsize, access); -+ -+ if ((raddr + rsize - 1) < raddr) -+ return (-ENOMEM); -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ if ((rgn = user_rgnat_elan (uctx, raddr)) == (USER_RGN *) NULL) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-ENOMEM); -+ } -+ else -+ { -+ register int ssize; -+ -+ for (; rsize != 0; rsize -= ssize, raddr += ssize) -+ { -+ if (raddr > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ rgn = rgn->rgn_enext; -+ -+ if (rgn == NULL || raddr != rgn->rgn_ebase) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-ENOMEM); -+ } -+ } -+ if ((raddr + rsize - 1) > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ else -+ ssize = rsize; -+ -+ PRINTF4 (uctx, DBG_PERM, "user_checkperm : rgn %lx -> %lx perm %x access %x\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + (E4_Addr)rgn->rgn_len, rgn->rgn_perm, access); -+ -+ if (ELAN4_INCOMPAT_ACCESS (rgn->rgn_perm, access)) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EACCES); -+ } -+ } -+ } -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ return (0); -+} -+ -+virtaddr_t -+user_elan2main (USER_CTXT *uctx, E4_Addr addr) -+{ -+ USER_RGN *rgn; -+ virtaddr_t raddr; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if ((rgn = user_rgnat_elan (uctx, addr)) == (USER_RGN *) NULL) -+ raddr = (virtaddr_t) 0; -+ else -+ raddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ return (raddr); -+} -+ -+E4_Addr -+user_main2elan (USER_CTXT *uctx, virtaddr_t addr) -+{ -+ USER_RGN *rgn; -+ E4_Addr raddr; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if ((rgn = user_rgnat_main (uctx, addr)) == (USER_RGN *) NULL) -+ raddr = (virtaddr_t) 0; -+ else -+ raddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ return (raddr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/routetable.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/routetable.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/routetable.c 2005-06-01 23:12:54.615436672 -0400 -@@ -0,0 +1,249 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: routetable.c,v 1.15 2004/07/20 09:29:40 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/routetable.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+ELAN4_ROUTE_TABLE * -+elan4_alloc_routetable (ELAN4_DEV *dev, unsigned size) -+{ -+ ELAN4_ROUTE_TABLE *tbl; -+ -+ KMEM_ZALLOC (tbl, ELAN4_ROUTE_TABLE *, sizeof (ELAN4_ROUTE_TABLE), 1); -+ -+ if (tbl == (ELAN4_ROUTE_TABLE *) NULL) -+ return (NULL); -+ -+ tbl->tbl_size = (size & E4_VPT_SIZE_MASK); -+ tbl->tbl_entries = elan4_sdram_alloc (dev, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ if (tbl->tbl_entries == 0) -+ { -+ KMEM_FREE (tbl, sizeof (ELAN4_ROUTE_TABLE)); -+ return ((ELAN4_ROUTE_TABLE *) NULL); -+ } -+ -+ spin_lock_init (&tbl->tbl_lock); -+ -+ /* zero the route table */ -+ elan4_sdram_zeroq_sdram (dev, tbl->tbl_entries, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ return (tbl); -+} -+ -+void -+elan4_free_routetable (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl) -+{ -+ elan4_sdram_free (dev, tbl->tbl_entries, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl, sizeof (ELAN4_ROUTE_TABLE)); -+} -+ -+void -+elan4_write_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1]), entry->Values[1]); -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0]), entry->Values[0]); -+ pioflush_sdram (dev); -+} -+ -+void -+elan4_read_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ entry->Values[0] = elan4_sdram_readq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0])); -+ entry->Values[1] = elan4_sdram_readq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1])); -+} -+ -+void -+elan4_invalidate_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0]), 0); -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1]), 0); -+ pioflush_sdram (dev); -+} -+ -+static void -+pack_them_routes (E4_VirtualProcessEntry *entry, E4_uint16 first, E4_uint8 *packed, unsigned ctx) -+{ -+ E4_uint64 value0 = first; -+ E4_uint64 value1 = ROUTE_CTXT_VALUE(ctx); -+ E4_uint32 ThirdRouteBCastVal; -+ register int i; -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ value0 |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ value1 |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ /* DMA fix for large broadcast route values that fall into the double issue of route value 3 bug. */ -+ /* NOTE - this is only required when the link is running in Mod45 mode, it could be automatically -+ * disabled when Mod44 is detected */ -+ -+ /* First seach for the alignment type. The bug is only sensitive to an odd bcast aligment on the 3rd word. */ -+ for (i=4;i<16;i++) -+ if (((value0 >> (i*4)) & 0xc) == 4) -+ i++; -+ -+ if (i == 17) -+ { -+ ThirdRouteBCastVal = value1 & 0xcccccccc; -+ if (((value1 & 0xfffff0000000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x04444444)) -+ value1 |= 0x140000000ULL; -+ else if (((value1 & 0xfffffff00000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00044444)) -+ value1 |= 0x1400000ULL; -+ else if (((value1 & 0xfffffffff000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00000444)) -+ value1 |= 0x14000ULL; -+ else if (((value1 & 0xfffffffffff0ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00000004)) -+ value1 |= 0x140ULL; -+ } -+ -+ entry->Values[0] = value0; -+ entry->Values[1] = value1; -+} -+ -+int -+elan4_generate_route (ELAN_POSITION *pos, E4_VirtualProcessEntry *route, unsigned ctx, unsigned lowid, unsigned highid, unsigned options) -+{ -+ unsigned int broadcast = (lowid != highid); -+ unsigned int noadaptive = 0; -+ int padbcast = 0; -+ E4_uint16 first; -+ int rb; -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ int level, llink, hlink; -+ -+ regenerate_routes: -+ first = 0; -+ rb = 0; -+ -+ switch (pos->pos_mode) -+ { -+ case ELAN_POS_MODE_LOOPBACK: -+ if (lowid != highid || lowid != pos->pos_nodeid) -+ return (-EINVAL); -+ -+ route->Values[0] = FIRST_MYLINK; -+ route->Values[1] = ROUTE_CTXT_VALUE (ctx); -+ return (0); -+ -+ case ELAN_POS_MODE_BACKTOBACK: -+ if (lowid != highid || lowid == pos->pos_nodeid) -+ return (-EINVAL); -+ -+ route->Values[0] = FIRST_MYLINK; -+ route->Values[1] = ROUTE_CTXT_VALUE (ctx); -+ return (0); -+ -+ case ELAN_POS_MODE_SWITCHED: -+ { -+ unsigned char *arityp = &pos->pos_arity[pos->pos_levels - 1]; -+ unsigned int spanned = *arityp; -+ unsigned int broadcasting = 0; -+ -+ bzero (packed, sizeof (packed)); -+ -+ /* XXXX compute noadaptive ? */ -+ -+ for (level = 0; -+ level < pos->pos_levels && ! ((pos->pos_nodeid / spanned) == (lowid / spanned) && -+ (pos->pos_nodeid / spanned) == (highid / spanned)); -+ level++, spanned *= *(--arityp)) -+ { -+ if (first == 0) -+ first = (broadcast || noadaptive) ? FIRST_BCAST_TREE : FIRST_ADAPTIVE; -+ else if (broadcast && padbcast) -+ { -+ padbcast = 0; -+ packed[rb++] = PACKED_BCAST0(4, 4); -+ packed[rb++] = PACKED_BCAST1(4, 4); -+ } -+ else -+ packed[rb++] = (broadcast || noadaptive) ? PACKED_BCAST_TREE : PACKED_ADAPTIVE; -+ } -+ -+ while (level >= 0) -+ { -+ spanned /= *arityp; -+ -+ llink = (lowid / spanned) % *arityp; -+ hlink = (highid / spanned) % *arityp; -+ -+ if (llink != hlink || broadcasting) -+ { -+ broadcasting = 1; -+ -+ if (first == 0) -+ first = FIRST_BCAST (hlink, llink); -+ else -+ { -+ packed[rb++] = PACKED_BCAST0(hlink, llink); -+ -+ if ((rb % 4) == 0 && PACKED_BCAST1(hlink, llink) == 0) -+ { -+ padbcast = 1; -+ goto regenerate_routes; -+ } -+ -+ packed[rb++] = PACKED_BCAST1(hlink, llink); -+ } -+ } -+ else -+ { -+ if (first == 0) -+ first = FIRST_ROUTE(llink); -+ else -+ packed[rb++] = PACKED_ROUTE(llink); -+ } -+ -+ level--; -+ arityp++; -+ } -+ -+ pack_them_routes (route, first | (options & FIRST_OPTIONS_MASK), packed, ctx); -+ return (0); -+ } -+ } -+ -+ return (-EINVAL); -+} -+ -+int -+elan4_check_route (ELAN_POSITION *postiion, ELAN_LOCATION location, E4_VirtualProcessEntry *route, unsigned flags) -+{ -+ /* XXXX - TBD */ -+ return (0); -+} -+ -+EXPORT_SYMBOL(elan4_alloc_routetable); -+EXPORT_SYMBOL(elan4_free_routetable); -+EXPORT_SYMBOL(elan4_write_route); -+EXPORT_SYMBOL(elan4_read_route); -+EXPORT_SYMBOL(elan4_invalidate_route); -+EXPORT_SYMBOL(elan4_generate_route); -+EXPORT_SYMBOL(elan4_check_route); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/sdram.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/sdram.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/sdram.c 2005-06-01 23:12:54.617436368 -0400 -@@ -0,0 +1,1034 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: sdram.c,v 1.29.6.1 2004/11/29 11:39:13 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/sdram.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+EXPORT_SYMBOL_GPL(elan4_sdram_readb); -+EXPORT_SYMBOL_GPL(elan4_sdram_readw); -+EXPORT_SYMBOL_GPL(elan4_sdram_readl); -+EXPORT_SYMBOL_GPL(elan4_sdram_readq); -+EXPORT_SYMBOL_GPL(elan4_sdram_writeb); -+EXPORT_SYMBOL_GPL(elan4_sdram_writew); -+EXPORT_SYMBOL_GPL(elan4_sdram_writel); -+EXPORT_SYMBOL_GPL(elan4_sdram_writeq); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerob_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerow_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerol_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zeroq_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyb_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyw_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyl_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyq_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyb_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyw_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyl_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyq_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_alloc); -+EXPORT_SYMBOL_GPL(elan4_sdram_free); -+EXPORT_SYMBOL_GPL(elan4_sdram_flushcache); -+ -+#define SDRAM_MIN_BANK_SIZE ((1 << 15) * 8) /* 256 Kbytes */ -+ -+static inline ELAN4_SDRAM_BANK * -+sdramaddr_to_bank (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ register int i; -+ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ ELAN4_SDRAM_BANK *bank = &dev->dev_sdram_banks[i]; -+ -+ if (saddr >= bank->b_base && saddr < (bank->b_base + bank->b_size)) -+ return (bank); -+ } -+ printk ("sdramaddr_to_bank: sdram address %lx not in a sdram bank\n", saddr); -+ BUG(); -+ -+ return (NULL); /* NOTREACHED */ -+} -+ -+static inline int -+sdramaddr_to_bankoffset (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ return (saddr & (sdramaddr_to_bank (dev, saddr)->b_size-1)); -+} -+ -+static inline int -+sdramaddr_to_bit(ELAN4_DEV *dev, int indx, sdramaddr_t saddr) -+{ -+ return (sdramaddr_to_bankoffset(dev, saddr) >> (SDRAM_MIN_BLOCK_SHIFT+(indx))); -+} -+ -+static inline ioaddr_t -+sdramaddr_to_ioaddr (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, saddr); -+ -+ return (bank->b_ioaddr + (saddr - bank->b_base)); -+} -+ -+unsigned char -+elan4_sdram_readb (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readb (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned short -+elan4_sdram_readw (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readw (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned int -+elan4_sdram_readl (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readl (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned long long -+elan4_sdram_readq (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (readq (sdramaddr_to_ioaddr(dev, off))); -+} -+ -+void -+elan4_sdram_writeb (ELAN4_DEV *dev, sdramaddr_t off, unsigned char val) -+{ -+ writeb (val, sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writew (ELAN4_DEV *dev, sdramaddr_t off, unsigned short val) -+{ -+ writew (val, sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writel (ELAN4_DEV *dev, sdramaddr_t off, unsigned int val) -+{ -+ writel (val, sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writeq (ELAN4_DEV *dev, sdramaddr_t off, unsigned long long val) -+{ -+ writeq (val, sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_zerob_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (0, dest); -+} -+ -+void -+elan4_sdram_zerow_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (0, dest); -+} -+ -+void -+elan4_sdram_zerol_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u32)) -+ writel (0, dest); -+} -+ -+void -+elan4_sdram_zeroq_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_memset_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, to, 0, nbytes) == 0) -+ return; -+#endif -+ -+ for (; dest < lim; dest += sizeof (u64)) -+ writeq (0, dest); -+} -+ -+void -+elan4_sdram_copyb_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u8 *dest = (u8 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u8)) -+ *dest++ = __elan4_readb (dev, src); -+} -+ -+void -+elan4_sdram_copyw_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u16 *dest = (u16 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u16)) -+ *dest++ = __elan4_readw (dev, src); -+} -+ -+void -+elan4_sdram_copyl_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u32 *dest = (u32 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u32)) -+ *dest++ = __elan4_readl (dev, src); -+} -+ -+void -+elan4_sdram_copyq_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u64 *dest = (u64 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyfrom_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, from, (unsigned long) to, nbytes) == 0) -+ return; -+#endif -+ -+ for (; src < lim; src += sizeof (u64)) -+ *dest++ = readq (src); -+} -+ -+void -+elan4_sdram_copyb_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u8 *src = (u8 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (*src++, dest); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyw_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u16 *src = (u16 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u16)) -+ writew (*src++, dest); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyl_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u32 *src = (u32 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u16)) -+ writew (*src++, dest); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyq_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u64 *src = (u64 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyto_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, to, (unsigned long) from, nbytes) == 0) -+ return; -+#endif -+ -+ for (; dest < lim; dest += sizeof (u64)) -+ writeq (*src++, dest); -+ -+ mb(); -+} -+ -+/* sdram buddy allocator */ -+typedef struct sdramblock -+{ -+ sdramaddr_t next; -+ sdramaddr_t prev; -+} sdramblock_t; -+ -+static inline sdramaddr_t -+read_next (ELAN4_DEV *dev, sdramaddr_t block) -+{ -+ return __elan4_readl (dev, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, next))); -+} -+ -+static inline sdramaddr_t -+read_prev (ELAN4_DEV *dev, sdramaddr_t block) -+{ -+ return __elan4_readl (dev, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, prev))); -+} -+ -+static inline void -+write_next (ELAN4_DEV *dev, sdramaddr_t block, sdramaddr_t val) -+{ -+ writel (val, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, next))); -+} -+ -+static inline void -+write_prev (ELAN4_DEV *dev, sdramaddr_t block, sdramaddr_t val) -+{ -+ writel (val, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, prev))); -+} -+ -+static inline void -+freelist_insert (ELAN4_DEV *dev, int idx, sdramaddr_t block) -+{ -+ sdramaddr_t next = dev->dev_sdram_freelists[(idx)]; -+ -+ /* -+ * block->prev = NULL; -+ * block->next = next; -+ * if (next != NULL) -+ * next->prev = block; -+ * freelist = block; -+ */ -+ write_prev (dev, block, (sdramaddr_t) 0); -+ write_next (dev, block, next); -+ if (next != (sdramaddr_t) 0) -+ write_prev (dev, next, block); -+ dev->dev_sdram_freelists[idx] = block; -+ -+ dev->dev_sdram_freecounts[idx]++; -+ dev->dev_stats.s_sdram_bytes_free += (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+static inline void -+freelist_remove (ELAN4_DEV *dev,int idx, sdramaddr_t block) -+{ -+ /* -+ * if (block->prev) -+ * block->prev->next = block->next; -+ * else -+ * dev->dev_sdram_freelists[idx] = block->next; -+ * if (block->next) -+ * block->next->prev = block->prev; -+ */ -+ sdramaddr_t blocknext = read_next (dev, block); -+ sdramaddr_t blockprev = read_prev (dev, block); -+ -+ if (blockprev) -+ write_next (dev, blockprev, blocknext); -+ else -+ dev->dev_sdram_freelists[idx] = blocknext; -+ if (blocknext) -+ write_prev (dev, blocknext, blockprev); -+ -+ dev->dev_sdram_freecounts[idx]--; -+ dev->dev_stats.s_sdram_bytes_free -= (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+static inline void -+freelist_removehead(ELAN4_DEV *dev, int idx, sdramaddr_t block) -+{ -+ sdramaddr_t blocknext = read_next (dev, block); -+ -+ if ((dev->dev_sdram_freelists[idx] = blocknext) != 0) -+ write_prev (dev, blocknext, 0); -+ -+ dev->dev_sdram_freecounts[idx]--; -+ dev->dev_stats.s_sdram_bytes_free -= (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+#ifdef DEBUG -+static int -+display_blocks (ELAN4_DEV *dev, int indx, char *string) -+{ -+ sdramaddr_t block; -+ int nbytes = 0; -+ -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "%s - indx %d\n", string, indx); -+ for (block = dev->dev_sdram_freelists[indx]; block != (sdramaddr_t) 0; block = read_next (dev, block)) -+ { -+ PRINTF (DBG_DEVICE, DBG_SDRAM, " %x\n", block); -+ nbytes += (SDRAM_MIN_BLOCK_SIZE << indx); -+ } -+ -+ return (nbytes); -+} -+ -+void -+elan4_sdram_display (ELAN4_DEV *dev, char *string) -+{ -+ int indx; -+ int nbytes = 0; -+ -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_display: dev=%p\n", dev); -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ if (dev->dev_sdram_freelists[indx] != (sdramaddr_t) 0) -+ nbytes += display_blocks (dev, indx, string); -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "\n%d bytes free - %d pages free\n", nbytes, nbytes/SDRAM_PAGE_SIZE); -+} -+ -+void -+elan4_sdram_verify (ELAN4_DEV *dev) -+{ -+ int indx, size, nbits, i, b; -+ sdramaddr_t block; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ unsigned count = 0; -+ -+ for (block = dev->dev_sdram_freelists[indx]; block; block = read_next (dev, block), count++) -+ { -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, block); -+ unsigned off = sdramaddr_to_bankoffset (dev, block); -+ int bit = sdramaddr_to_bit (dev, indx, block); -+ -+ if ((block & (size-1)) != 0) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - not aligned\n", block, indx); -+ -+ if (bank == NULL || off > bank->b_size) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - outside bank\n", block, indx); -+ else if (BT_TEST (bank->b_bitmaps[indx], bit) == 0) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - bit not set\n", block, indx); -+ else -+ { -+ for (i = indx-1, nbits = 2; i >= 0; i--, nbits <<= 1) -+ { -+ bit = sdramaddr_to_bit (dev, i, block); -+ -+ for (b = 0; b < nbits; b++) -+ if (BT_TEST(bank->b_bitmaps[i], bit + b)) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - also free i=%d bit=%x\n", block, indx, i, bit+b); -+ } -+ } -+ } -+ -+ if (dev->dev_sdram_freecounts[indx] != count) -+ printk ("elan4_sdram_verify: indx=%x expected %d got %d\n", indx, dev->dev_sdram_freecounts[indx], count); -+ } -+} -+ -+#endif -+ -+static void -+free_block (ELAN4_DEV *dev, sdramaddr_t block, int indx) -+{ -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, block); -+ unsigned bit = sdramaddr_to_bit (dev, indx, block); -+ unsigned size = SDRAM_MIN_BLOCK_SIZE << indx; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: block=%x indx=%d bit=%x\n", block, indx, bit); -+ -+ ASSERT ((block & (size-1)) == 0); -+ ASSERT (BT_TEST (bank->b_bitmaps[indx], bit) == 0); -+ -+ while (BT_TEST (bank->b_bitmaps[indx], bit ^ 1)) -+ { -+ sdramaddr_t buddy = block ^ size; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: merge block=%x buddy=%x indx=%d\n", block, buddy, indx); -+ -+ BT_CLEAR (bank->b_bitmaps[indx], bit ^ 1); -+ -+ freelist_remove (dev, indx, buddy); -+ -+ block = (block < buddy) ? block : buddy; -+ indx++; -+ size <<= 1; -+ bit >>= 1; -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: free block=%x indx=%d bit=%x\n", block, indx, bit); -+ -+ freelist_insert (dev, indx, block); -+ -+ BT_SET (bank->b_bitmaps[indx], bit); -+} -+ -+void -+elan4_sdram_init (ELAN4_DEV *dev) -+{ -+ int indx; -+ -+ spin_lock_init (&dev->dev_sdram_lock); -+ -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ { -+ dev->dev_sdram_freelists[indx] = (sdramaddr_t) 0; -+ dev->dev_sdram_freecounts[indx] = 0; -+ } -+} -+ -+void -+elan4_sdram_fini (ELAN4_DEV *dev) -+{ -+ spin_lock_destroy (&dev->dev_sdram_lock); -+} -+ -+#ifdef CONFIG_MPSAS -+/* size of Elan SDRAM in simulation */ -+#define SDRAM_used_addr_bits (16) -+#define SDRAM_SIMULATION_BANK_SIZE ((1 << SDRAM_used_addr_bits) * 8) /* 128 kbytes */ -+ -+static int -+elan4_sdram_probe_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ printk ("elan%d: memory bank %d is %d Kb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (SDRAM_SIMULATION_BANK_SIZE / 1024)); -+ -+ bank->b_size = SDRAM_SIMULATION_BANK_SIZE; -+ -+ return 1; -+} -+ -+#else -+ -+static void -+initialise_cache_tags (ELAN4_DEV *dev, unsigned addr) -+{ -+ register int set, line; -+ -+ mb(); -+ -+ /* Initialise the whole cache to hold sdram at "addr" as direct mapped */ -+ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], addr | (set << 13) | (1 << 11)); -+ -+ read_tag (dev, Tags[set][line]); /* read it back to guarantee the memory system is quite again */ -+ mb(); -+} -+ -+static __inline__ int -+sdram_GreyToBinary(int GreyVal, int NoOfBits) -+{ -+ int Bit; -+ int BinaryVal=0; -+ for (Bit=(1 << (NoOfBits-1)); Bit != 0; Bit >>= 1) -+ BinaryVal ^= (GreyVal & Bit) ^ ((BinaryVal >> 1) & Bit); -+ return (BinaryVal); -+} -+ -+static __inline__ int -+sdram_BinaryToGrey(int BinaryVal) -+{ -+ return (BinaryVal ^ (BinaryVal >> 1)); -+} -+ -+void -+elan4_sdram_setup_delay_lines (ELAN4_DEV *dev) -+{ -+ /* This is used to fix the SDRAM delay line values */ -+ int i, AutoGenDelayValue=0; -+ int NewDelayValue; -+ -+ if (dev->dev_sdram_cfg & SDRAM_FIXED_DELAY_ENABLE) /* already setup. */ -+ return; -+ -+ /* now get an average of 10 dll values */ -+ for (i=0;i<10;i++) -+ AutoGenDelayValue += sdram_GreyToBinary(SDRAM_GET_DLL_DELAY(read_reg64 (dev, SDRamConfigReg)), -+ SDRAM_FIXED_DLL_DELAY_BITS); -+ -+ NewDelayValue = SDRAM_DLL_CORRECTION_FACTOR + (AutoGenDelayValue / 10); /* Mean of 10 values */ -+ -+ dev->dev_sdram_cfg = (dev->dev_sdram_cfg & ~(SDRAM_FIXED_DLL_DELAY_MASK << SDRAM_FIXED_DLL_DELAY_SHIFT)) | -+ SDRAM_FIXED_DELAY_ENABLE | SDRAM_FIXED_DLL_DELAY(sdram_BinaryToGrey(NewDelayValue)); -+ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg); /* Put back the new value */ -+ -+ pioflush_reg (dev); -+} -+ -+static int -+elan4_sdram_probe_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ unsigned long mappedsize = bank->b_size; -+ ioaddr_t ioaddr; -+ unsigned long long value, size; -+ register int i; -+ extern int sdram_bank_limit; -+ -+ if (mappedsize > SDRAM_MAX_BLOCK_SIZE) -+ mappedsize = SDRAM_MAX_BLOCK_SIZE; -+ -+ while ((ioaddr = elan4_map_device (dev, ELAN4_BAR_SDRAM, bank->b_base, mappedsize, &bank->b_handle)) == 0) -+ { -+ if (mappedsize <= (64*1024*1024)) /* boards normally populated with 64mb, so winge if we can't see this much */ -+ printk ("elan%d: could not map bank %d size %dMb\n", dev->dev_instance, (int)(bank - dev->dev_sdram_banks), (int)mappedsize/(1024*1024)); -+ -+ if ((mappedsize >>= 1) < (1024*1024)) -+ return 0; -+ } -+ -+ /* first probe to see if the memory bank is present */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, E4_CacheSize); -+ -+ for (i = 0; i < 64; i++) -+ { -+ unsigned long long pattern = (1ull << i); -+ -+ writeq (pattern, ioaddr); /* write pattern at base */ -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 0); -+ -+ writeq (~pattern, ioaddr + E4_CacheSize); /* write ~pattern at cachesize */ -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, E4_CacheSize); -+ -+ writeq (~pattern, ioaddr + 2*E4_CacheSize); /* write ~pattern at 2*cachesize */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 2*E4_CacheSize); -+ -+ value = readq (ioaddr); /* read pattern back at 0 */ -+ -+ if (value != pattern) -+ { -+ printk ("elan%d: sdram bank %d not present\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 0; -+ } -+ } -+ -+ /* sdram bank is present, so work out it's size. We store the maximum size at the base -+ * and then store the address at each address on every power of two address until -+ * we reach the minimum mappable size (PAGESIZE), we then read back the value at the -+ * base to determine the bank size */ -+ writeq (mappedsize, ioaddr); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 0); -+ -+ for (size = mappedsize >> 1; size > PAGE_SIZE; size >>= 1) -+ { -+ writeq (size, ioaddr + size); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, size); -+ } -+ -+ if ((size = readq (ioaddr)) < SDRAM_MIN_BANK_SIZE) -+ { -+ printk ("elan%d: memory bank %d dubious\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 0; -+ } -+ -+ if (sdram_bank_limit == 0 || size <= (sdram_bank_limit * 1024 * 1024)) -+ printk ("elan%d: memory bank %d is %d Mb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (size / (1024*1024))); -+ else -+ { -+ size = (sdram_bank_limit * 1024 * 1024); -+ printk ("elan%d: limit bank %d to %d Mb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (size / (1024*1024))); -+ } -+ -+ bank->b_size = size; -+ -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 1; -+} -+#endif -+ -+int -+elan4_sdram_init_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ int indx, size; -+ -+ bank->b_ioaddr = 0; -+ -+ if (! elan4_sdram_probe_bank (dev, bank)) -+ return 0; -+ -+ if ((bank->b_ioaddr = elan4_map_device (dev, ELAN4_BAR_SDRAM, bank->b_base, bank->b_size, &bank->b_handle)) == (ioaddr_t) 0) -+ { -+ printk ("elan%d: could not map sdrambank %d\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ return 0; -+ } -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->b_size; indx++, size <<= 1) /* allocate the buddy allocator bitmaps */ -+ KMEM_ZALLOC (bank->b_bitmaps[indx], bitmap_t *, sizeof (bitmap_t) * BT_BITOUL(bank->b_size/size), 1); -+ -+ return 1; -+} -+ -+void -+elan4_sdram_fini_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ int indx, size; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->b_size; indx++, size <<= 1) -+ KMEM_FREE (bank->b_bitmaps[indx], sizeof (bitmap_t) * BT_BITOUL(bank->b_size/size)); -+ -+ elan4_unmap_device (dev, bank->b_ioaddr, bank->b_size, &bank->b_handle); -+} -+ -+void -+elan4_sdram_add_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ sdramaddr_t base = bank->b_base; -+ sdramaddr_t top = bank->b_base + bank->b_size; -+ register int indx; -+ register unsigned long size; -+ -+ /* align to the minimum block size */ -+ base = (base + SDRAM_MIN_BLOCK_SIZE - 1) & ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ top &= ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ -+ /* don't allow 0 as a valid "base" */ -+ if (base == 0) -+ base = SDRAM_MIN_BLOCK_SIZE; -+ -+ /* carve the bottom to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((base & size) == 0) -+ continue; -+ -+ if ((base + size) > top) -+ break; -+ -+ free_block (dev, base, indx); -+ -+ base += size; -+ } -+ -+ /* carve the top down to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((top & size) == 0) -+ continue; -+ -+ if ((top - size) < base) -+ break; -+ -+ free_block (dev, (top - size), indx); -+ -+ top -= size; -+ } -+ -+ /* now free of the space in between */ -+ while (base < top) -+ { -+ free_block (dev, base, (SDRAM_NUM_FREE_LISTS-1)); -+ -+ base += SDRAM_MAX_BLOCK_SIZE; -+ } -+} -+ -+sdramaddr_t -+elan4_sdram_alloc (ELAN4_DEV *dev, int nbytes) -+{ -+ sdramaddr_t block; -+ register int i, indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_sdram_lock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_alloc: nbytes=%d indx=%d\n", nbytes, indx); -+ -+ /* need to split a bigger block up */ -+ for (i = indx; i < SDRAM_NUM_FREE_LISTS; i++, size <<= 1) -+ if (dev->dev_sdram_freelists[i]) -+ break; -+ -+ if (i == SDRAM_NUM_FREE_LISTS) -+ { -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+ printk ("elan4_sdram_alloc: %d bytes failed\n", nbytes); -+ return ((sdramaddr_t) 0); -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_alloc: use block=%x indx=%d\n", dev->dev_sdram_freelists[i], i); -+ -+ /* remove the block from the free list */ -+ freelist_removehead (dev, i, (block = dev->dev_sdram_freelists[i])); -+ -+ /* clear the approriate bit in the bitmap */ -+ BT_CLEAR (sdramaddr_to_bank (dev, block)->b_bitmaps[i], sdramaddr_to_bit (dev,i, block)); -+ -+ /* and split it up as required */ -+ while (i-- > indx) -+ free_block (dev, block + (size >>= 1), i); -+ -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+ -+ ASSERT ((block & ((SDRAM_MIN_BLOCK_SIZE << (indx))-1)) == 0); -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_zeroq_sdram (dev, block, sizeof (sdramblock_t)); -+#endif -+ -+ return ((sdramaddr_t) block); -+} -+ -+void -+elan4_sdram_free (ELAN4_DEV *dev, sdramaddr_t block, int nbytes) -+{ -+ register int indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_sdram_lock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_free: indx=%d block=%x\n", indx, block); -+ -+ free_block (dev, block, indx); -+ -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+} -+ -+void -+elan4_sdram_flushcache (ELAN4_DEV *dev, sdramaddr_t addr, int len) -+{ -+ int set, off; -+ -+ SET_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ /* -+ * if flushing more than a single set (8K), then you have to flush the whole cache. -+ * NOTE - in the real world we will probably want to generate a burst across -+ * the pci bus. -+ */ -+ if (len >= E4_CacheSetSize) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => whole cache\n", addr, len, addr + len); -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space, E4_CacheSize); -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (off = 0; off < E4_CacheSetSize; off += E4_CacheLineSize) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+#endif -+ } -+ else -+ { -+ unsigned base = addr & ~(E4_CACHELINE_SIZE-1); -+ unsigned top = (addr + len + (E4_CACHELINE_SIZE-1)) & ~(E4_CACHELINE_SIZE-1); -+ unsigned baseoff = base & (E4_CacheSetSize-1); -+ unsigned topoff = top & (E4_CacheSetSize-1); -+ -+ if ((base ^ top) & E4_CacheSetSize) /* wraps */ -+ { -+ PRINTF7 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => split cache (%x,%x %x,%x)\n", -+ addr, len, addr + len, 0, topoff, baseoff, E4_CacheSetSize); -+ -+#ifdef CONFIG_MPSAS -+ for (set = 0; set < E4_NumCacheSets; set++) -+ { -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize), topoff); -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + baseoff, E4_CacheSetSize - baseoff); -+ } -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ { -+ for (off = 0; off < (top & (E4_CacheSetSize-1)); off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+ -+ for (off = (base & (E4_CacheSetSize-1)); off < E4_CacheSetSize; off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+ } -+#endif -+ } -+ else -+ { -+ PRINTF5 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => part cache (%x,%x)\n", -+ addr, len, addr + len, baseoff, topoff); -+ -+#ifdef CONFIG_MPSAS -+ for (set = 0; set < E4_NumCacheSets; set++) -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + baseoff, topoff - baseoff); -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (off = (base & (E4_CacheSetSize-1)); off < (top & (E4_CacheSetSize-1)); off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+#endif -+ } -+ } -+ pioflush_sdram (dev); -+ -+ CLEAR_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+} -+ -+static char * -+get_correctableErr_bitpos(uint SyndromeBits) -+{ -+ switch (SyndromeBits) -+ { -+ case 0x00: return ("NoErr"); -+ case 0x31: return ("00"); -+ case 0x32: return ("01"); -+ case 0xc4: return ("02"); -+ case 0xc8: return ("03"); -+ case 0x26: return ("04"); -+ case 0x91: return ("05"); -+ case 0x89: return ("06"); -+ case 0x64: return ("07"); -+ case 0xc1: return ("08"); -+ case 0xf2: return ("09"); -+ case 0x34: return ("10"); -+ case 0xf8: return ("11"); -+ case 0xf1: return ("12"); -+ case 0xc2: return ("13"); -+ case 0xf4: return ("14"); -+ case 0x38: return ("15"); -+ case 0xd6: return ("16"); -+ case 0xa1: return ("17"); -+ case 0x79: return ("18"); -+ case 0xa4: return ("19"); -+ case 0xd9: return ("20"); -+ case 0xa2: return ("21"); -+ case 0x76: return ("22"); -+ case 0xa8: return ("23"); -+ case 0xe6: return ("24"); -+ case 0x51: return ("25"); -+ case 0xb9: return ("26"); -+ case 0x54: return ("27"); -+ case 0xe9: return ("28"); -+ case 0x52: return ("29"); -+ case 0xb6: return ("30"); -+ case 0x58: return ("31"); -+ case 0x13: return ("32"); -+ case 0x23: return ("33"); -+ case 0x4c: return ("34"); -+ case 0x8c: return ("35"); -+ case 0x62: return ("36"); -+ case 0x19: return ("37"); -+ case 0x98: return ("38"); -+ case 0x46: return ("39"); -+ case 0x1c: return ("40"); -+ case 0x2f: return ("41"); -+ case 0x43: return ("42"); -+ case 0x8f: return ("43"); -+ case 0x1f: return ("44"); -+ case 0x2c: return ("45"); -+ case 0x4f: return ("46"); -+ case 0x83: return ("47"); -+ case 0x6d: return ("48"); -+ case 0x1a: return ("49"); -+ case 0x97: return ("50"); -+ case 0x4a: return ("51"); -+ case 0x9d: return ("52"); -+ case 0x2a: return ("53"); -+ case 0x67: return ("54"); -+ case 0x8a: return ("55"); -+ case 0x6e: return ("56"); -+ case 0x15: return ("57"); -+ case 0x9b: return ("58"); -+ case 0x45: return ("59"); -+ case 0x9e: return ("60"); -+ case 0x25: return ("61"); -+ case 0x6b: return ("62"); -+ case 0x85: return ("63"); -+ case 0x01: return ("C0"); -+ case 0x02: return ("C1"); -+ case 0x04: return ("C2"); -+ case 0x08: return ("C3"); -+ case 0x10: return ("C4"); -+ case 0x20: return ("C5"); -+ case 0x40: return ("C6"); -+ case 0x80: return ("C7"); -+ -+ case 0x07: case 0x0b: case 0x0d: case 0x0e: case 0x3d: case 0x3e: case 0x70: case 0x7c: // T -+ case 0xb0: case 0xbc: case 0xc7: case 0xcb: case 0xd0: case 0xd3: case 0xe0: case 0xe3: // T -+ return ("triple"); -+ -+ case 0x0f: case 0x55: case 0x5a: case 0xa5: case 0xaa: case 0xf0: case 0xff: // Q -+ return ("quadruple"); -+ -+ case 0x16: case 0x29: case 0x37: case 0x3b: case 0x49: case 0x57: case 0x5b: case 0x5d: case 0x5e: case 0x61: // M -+ case 0x68: case 0x73: case 0x75: case 0x7a: case 0x7f: case 0x86: case 0x92: case 0x94: case 0xa7: case 0xab: // M -+ case 0xad: case 0xae: case 0xb3: case 0xb5: case 0xba: case 0xbf: case 0xcd: case 0xce: case 0xd5: case 0xda: // M -+ case 0xdc: case 0xdf: case 0xe5: case 0xea: case 0xec: case 0xef: case 0xf7: case 0xfb: case 0xfd: case 0xfe: // M -+ return ("multiple"); -+ -+ default: // all other cases -+ return ("double"); -+ } -+} -+ -+char * -+elan4_sdramerr2str (ELAN4_DEV *dev, E4_uint64 status, char *str) -+{ -+ E4_uint64 StartupSyndrome = dev->dev_sdram_initial_ecc_val; -+ int RisingDQSsyndrome = ((ECC_RisingDQSSyndrome(status) == ECC_RisingDQSSyndrome(StartupSyndrome)) ? -+ 0 : ECC_RisingDQSSyndrome(status)); -+ int FallingDQSsyndrome = ((ECC_FallingDQSSyndrome(status) == ECC_FallingDQSSyndrome(StartupSyndrome)) ? -+ 0 : ECC_FallingDQSSyndrome(status)); -+ E4_uint64 Addr = ECC_Addr(status); -+ int Bank = (Addr >> 6) & 3; -+ int Cas = ((Addr >> 3) & 7) | ((Addr >> (8 - 3)) & 0xf8) | ((Addr >> (25 - 8)) & 0x100) | -+ ((Addr >> (27 - 9)) & 0x200) | ((Addr >> (29 - 10)) & 0xc00); -+ int Ras = ((Addr >> 13) & 0xfff) | ((Addr >> (26 - 12)) & 0x1000) | ((Addr >> (28 - 13)) & 0x2000) | -+ ((Addr >> (30 - 14)) & 0x4000); -+ -+ sprintf (str, "Addr=%07llx Bank=%x Ras=%x Cas=%x Falling DQS=%s Rising DQS=%s Syndrome=%x%s%s%s%s", /* 41 + 16 + 8 + 15 + 24 + 13 + 22 + 10 + 10 == 151 */ -+ (long long)Addr, Bank, Ras, Cas, -+ get_correctableErr_bitpos(FallingDQSsyndrome), -+ get_correctableErr_bitpos(RisingDQSsyndrome), -+ (int)ECC_Syndrome(status), -+ ECC_UncorrectableErr(status) ? " Uncorrectable" : "", -+ ECC_MultUncorrectErrs(status) ? " Multiple-Uncorrectable" : "", -+ ECC_CorrectableErr(status) ? " Correctable" : "", -+ ECC_MultCorrectErrs(status) ? " Multiple-Correctable" : ""); -+ -+ return str; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/trap.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/trap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/trap.c 2005-06-01 23:12:54.619436064 -0400 -@@ -0,0 +1,778 @@ -+/* -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: trap.c,v 1.19.10.2 2004/11/03 14:24:32 duncant Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/trap.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+char * const PermTypes[16] = -+{ -+ "Disabled", "Unused", "LocalDataRead", "LocalDataWrite", -+ "LocalRead", "LocalExecute", "ReadOnly", "LocalWrite", -+ "LocalEventOnly", "LocalEventWrite", "RemoteEvent", "RemoteAll", -+ "RemoteReadOnly", "RemoteWriteOnly", "DataReadWrite", "NoFault", -+}; -+ -+char * const AccTypes[] = -+{ -+ "LocalDataRead ", "LocalDataWrite", "RemoteRead ", "RemoteWrite ", -+ "Execute ", "LocalEvent ", "Unused ", "RemoteEvent " -+}; -+char * const DataTypes[] = {"Byte ", "HWord", "Word ", "DWord"}; -+char * const PhysTypes[] = {"Special Read", "Special Write", "Physical Read", "Physical Write"}; -+ -+char * const EProcTrapNames[] = { -+ "EventProcNoFault", -+ "EventProcAddressAlignment", -+ "EventProcMemoryFault", -+ "EventProcCountWrapError", -+}; -+ -+char * const CProcTrapNames[] = { -+ "CommandProcNoFault", -+ "CommandProcInserterError", -+ "CommandProcPermissionTrap", -+ "CommandProcSendTransInvalid", -+ "CommandProcSendTransExpected", -+ "CommandProcDmaQueueOverflow", -+ "CommandProcInterruptQueueOverflow", -+ "CommandProcMemoryFault", -+ "CommandProcRouteFetchFault", -+ "CommandProcFailCountZero", -+ "CommandProcAddressAlignment", -+ "CommandProcWaitTrap", -+ "CommandProcMultipleGuards", -+ "CommandProcOpenOnGuardedChan", -+ "CommandProcThreadQueueOverflow", -+ "CommandProcBadData", -+}; -+ -+char *const CProcInsertError[] = { -+ "No Error", -+ "Overflowed", -+ "Invalid Write Size", -+ "Invalid Write Order", -+}; -+ -+char * const DProcTrapNames[] = { -+ "DmaProcNoFault", -+ "DmaProcRouteFetchFault", -+ "DmaProcFailCountError", -+ "DmaProcPacketAckError", -+ "DmaProcRunQueueReadFault", -+ "DmaProcQueueOverFlow", -+}; -+ -+char *const IProcTrapNames[] = { -+ "InputNoFault", -+ "InputAddressAlignment", -+ "InputMemoryFault", -+ "InputInvalidTransType", -+ "InputDmaQueueOverflow", -+ "InputEventEngineTrapped", -+ "InputCrcErrorAfterPAckOk", -+ "InputEopErrorOnWaitForEop", -+ "InputEopErrorTrap", -+ "InputDiscardAfterAckOk", -+}; -+ -+char *const TProcTrapNames[] = { -+ "HaltThread", -+ "TrapForTooManyInstructions", -+ "InstAccessException", -+ "Unimplemented", -+ "DataAccessException", -+ "DataAlignmentError", -+ "TrapForUsingBadData", -+}; -+ -+#define declare_spaces(space, str) char space[64]; do { int i; for (i = 0; i < strlen(str); i++) spaces[i] = ' '; space[i] = '\0'; } while (0) -+#define declare_prefix(space, spaces, str) char space[64]; do { strcpy (space, spaces); strcat (space, str); } while (0) -+ -+void -+elan4_display_farea (void *type, int mode, char *str, E4_FaultSave *farea) -+{ -+ E4_uint32 FSR = FaultSaveFSR(farea->FSRAndFaultContext); -+ -+ declare_spaces(spaces, str); -+ -+ elan4_debugf (type, mode, "%s Fault occurred at %016llx for context %4x\n", str, -+ farea->FaultAddress, FaultSaveContext(farea->FSRAndFaultContext)); -+ -+ if (FSR & AT_VirtualWriteAccBit) /* Virtual write access */ -+ elan4_debugf (type, mode, "%s FSR=%x: Virtual Write. DWSize=0x%x EndP=0x%x Access=%s DT=%s\n", -+ spaces, FSR, FSR & AT_VirtualWriteSizeMask, -+ (FSR >> AT_VirtualWriteEndPtrShift) & AT_VirtualWriteEndPtrMask, -+ AccTypes[(FSR >> AT_PermBitsShift) & AT_PermBitsMask], -+ DataTypes[(FSR >> AT_BlkDataTyShift) & AT_BlkDataTyMask]); -+ else if (FSR & AT_VirtualReadAccBit) /* Virtual read access */ -+ elan4_debugf (type, mode, "%s FSR=%x: Virtual Read. DWSize=0x%x Access=%s DT=%s\n", -+ spaces, FSR, FSR & AT_VirtualReadSizeMask, -+ AccTypes[(FSR >> AT_PermBitsShift) & AT_PermBitsMask], -+ DataTypes[(FSR >> AT_BlkDataTyShift) & AT_BlkDataTyMask]); -+ else -+ elan4_debugf (type, mode, "%s FSR=%x: %s. Size=0x%x\n", spaces, -+ FSR, PhysTypes[(FSR >> AT_SelBitsShift) & AT_SelBitsMask], -+ FSR & AT_OtherSizeMask); -+ elan4_debugf (type, mode, "%s FSR: %s %s%s %sWalking\n", spaces, -+ (FSR & AT_NonAlloc) ? "NonAlloc" : "Alloc", -+ (FSR & AT_DmaData) ? "Dma " : "", -+ (FSR & FSR_WalkForThread) ? "ThreadAcc" : "UnitsAcc", -+ (FSR & FSR_Walking) ? "" : "Not"); -+ PRINTF (type, mode, "%s FSR: %s%sHashTable=%s\n", spaces, -+ (FSR & FSR_NoTranslationsFound) ? "NoTranslationsFound " : "", -+ (FSR & FSR_WalkingProtectionFault) ? "WalkingProtectionFault " : "", -+ (FSR & FSR_HashTable1) ? "1" : "0"); -+ if (FSR & (FSR_RouteVProcErr | FSR_FaultForBadData)) -+ elan4_debugf (type, mode, "%s FSR: %s%s\n", spaces, -+ (FSR & FSR_RouteVProcErr) ? "RouteVProcErr " : "", -+ (FSR & FSR_FaultForBadData) ? "FaultForBadData " : ""); -+} -+ -+void -+elan4_display_eproc_trap (void *type, int mode, char *str, ELAN4_EPROC_TRAP *trap) -+{ -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s Status=%016llx %s EventAddr=%016llx CountAndType=%016llx\n", str, -+ trap->tr_status, EProcTrapNames[EPROC_TrapType(trap->tr_status)], -+ trap->tr_eventaddr, trap->tr_event.ev_CountAndType); -+ elan4_debugf (type, mode, "%s Param=%016llx.%016llx\n", spaces, -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ -+ elan4_display_farea (type, mode, strcat (spaces, EPROC_Port0Fault(trap->tr_status) ? " EPROC0" : " EPROC1"), &trap->tr_faultarea); -+} -+ -+void -+elan4_display_cproc_trap (void *type, int mode, char *str, ELAN4_CPROC_TRAP *trap) -+{ -+ declare_spaces(spaces, str); -+ -+ elan4_debugf (type, mode, "%s Status=%llx %s Command=%llx\n", str, trap->tr_status, -+ CProcTrapNames[CPROC_TrapType(trap->tr_status)], trap->tr_command); -+ elan4_debugf (type, mode, "%s Desc=%016llx %016llx %016llx %016llx\n", str, -+ trap->tr_qdesc.CQ_QueuePtrs, trap->tr_qdesc.CQ_HoldingValue, -+ trap->tr_qdesc.CQ_AckBuffers, trap->tr_qdesc.CQ_Control); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcInserterError: -+ elan4_debugf (type, mode, "%s %s\n", str, CProcInsertError[CQ_RevB_ErrorType(trap->tr_qdesc.CQ_QueuePtrs)]); -+ break; -+ -+ case CommandProcWaitTrap: -+ elan4_display_eproc_trap (type, mode, spaces, &trap->tr_eventtrap); -+ break; -+ -+ default: -+ elan4_display_farea (type, mode, spaces, &trap->tr_faultarea); -+ break; -+ } -+} -+ -+void -+elan4_display_dproc_trap (void *type, int mode, char *str, ELAN4_DPROC_TRAP *trap) -+{ -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s status %llx - %s\n", str, -+ trap->tr_status, DProcTrapNames[DPROC_TrapType(trap->tr_status)]); -+ -+ elan4_debugf (type, mode, "%s DESC %016llx %016llx %016llx %016llx\n", spaces, trap->tr_desc.dma_typeSize, -+ trap->tr_desc.dma_cookie, trap->tr_desc.dma_vproc, trap->tr_desc.dma_srcAddr); -+ elan4_debugf (type, mode, "%s %016llx %016llx %016llx\n", spaces, trap->tr_desc.dma_dstAddr, -+ trap->tr_desc.dma_srcEvent, trap->tr_desc.dma_dstEvent); -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ elan4_display_farea (type, mode, spaces, &trap->tr_prefetchFault); -+} -+ -+void -+elan4_display_tproc_trap (void *type, int mode, char *str, ELAN4_TPROC_TRAP *trap) -+{ -+ register int i; -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s PC=%016llx nPC=%016llx State=%016llx Status=%016llx -%s%s%s%s\n", str, -+ trap->tr_pc, trap->tr_npc, trap->tr_state, trap->tr_status, -+ (trap->tr_state & TS_TrapForTooManyInstructions) ? " TrapForTooManyInstructions" : "", -+ (trap->tr_state & TS_Unimplemented) ? " Unimplemented" : "", -+ (trap->tr_state & TS_DataAlignmentError) ? " DataAlignmentError" : "", -+ (trap->tr_state & TS_InstAccessException) ? " InstAccessException" : "", -+ (trap->tr_state & TS_DataAccessException) ? " DataAlignmentError" : ""); -+ -+ for (i = 0; i < 64; i += 4) -+ elan4_debugf (type, mode, "%s r%d - %016llx %016llx %016llx %016llx\n", spaces, i, -+ trap->tr_regs[i], trap->tr_regs[i+1], trap->tr_regs[i+2], trap->tr_regs[i+3]); -+ -+ if (trap->tr_state & TS_InstAccessException) -+ { -+ declare_prefix (prefix, spaces, "Inst"); -+ -+ elan4_display_farea (type, mode, prefix, &trap->tr_instFault); -+ } -+ -+ if (trap->tr_state & TS_DataAccessException) -+ { -+ declare_prefix (prefix, spaces, "Data"); -+ elan4_display_farea (type, mode, prefix, &trap->tr_dataFault); -+ } -+} -+ -+void -+elan4_display_iproc_trap (void *type, int mode, char *str, ELAN4_IPROC_TRAP *trap) -+{ -+ register int i; -+ declare_spaces (spaces, str); -+ -+ for (i = 0; i < trap->tr_numTransactions; i++) -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[i]; -+ E4_uint64 status = hdrp->IProcStatusCntxAndTrType; -+ E4_Addr addr = hdrp->TrAddr; -+ char *typeString; -+ char buffer[256]; -+ char *ptr = buffer; -+ -+ if (IPROC_EOPTrap(status)) -+ { -+ switch (IPROC_EOPType(status)) -+ { -+ case EOP_GOOD: typeString = "EopGood"; break; -+ case EOP_BADACK: typeString = "EopBadAck"; break; -+ case EOP_ERROR_RESET: typeString = "EopReset"; break; -+ default: typeString = "EopBad"; break; -+ } -+ -+ ptr += sprintf (ptr, "%15s Cntx=%-6d", typeString, IPROC_NetworkContext(status)); -+ } -+ else -+ { -+ if (IPROC_BadLength(status)) -+ typeString = "BadLength"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_DISCARD) -+ typeString = "DiscardCrc"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_ERROR) -+ typeString = "ErrorCrc Remote Network error"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_BAD) -+ typeString = "BadCrc Cable error into this node."; -+ else -+ { -+ if ((IPROC_TransactionType(status) & TR_BLOCK_OPCODE_MASK) == TR_WRITEBLOCK) -+ typeString = "WriteBlock"; -+ else -+ { -+ switch (IPROC_TransactionType(status) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT_IDENTIFY & TR_OPCODE_MASK: typeString = "SetEvent"; break; -+ case TR_REMOTEDMA & TR_OPCODE_MASK: typeString = "RemoteDma"; break; -+ case TR_SENDDISCARD & TR_OPCODE_MASK: typeString = "SendDiscard"; break; -+ case TR_GTE & TR_OPCODE_MASK: typeString = "GTE"; break; -+ case TR_LT & TR_OPCODE_MASK: typeString = "LT"; break; -+ case TR_EQ & TR_OPCODE_MASK: typeString = "EQ"; break; -+ case TR_NEQ & TR_OPCODE_MASK: typeString = "NEQ"; break; -+ case TR_IDENTIFY & TR_OPCODE_MASK: typeString = "Idenfity"; break; -+ case TR_ADDWORD & TR_OPCODE_MASK: typeString = "AddWord"; break; -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: typeString = "InputQCommit"; break; -+ case TR_TESTANDWRITE & TR_OPCODE_MASK: typeString = "TestAndWrite"; break; -+ case TR_INPUT_Q_GETINDEX & TR_OPCODE_MASK: typeString = "InputQGetIndex"; break; -+ case TR_TRACEROUTE_TRANS & TR_OPCODE_MASK: typeString = "TraceRoute"; break; -+ default: typeString = "Unknown"; break; -+ } -+ } -+ } -+ -+ ptr += sprintf (ptr, "%15s Cntx=%-6d Addr=%016llx", typeString, IPROC_NetworkContext(status), (unsigned long long) addr); -+ } -+ -+ -+ if (IPROC_TrapValue(status) != InputNoFault) -+ { -+ ptr += sprintf (ptr, " TrType=%2d ChanTrapped=%x GoodAck=%x BadAck=%x InputterChan=%d", IPROC_TrapValue(status), -+ IPROC_ChannelTrapped(status), IPROC_GoodAckSent(status), IPROC_BadAckSent(status), -+ IPROC_InputterChan(status)); -+ if (IPROC_EOPTrap(status)) -+ ptr += sprintf (ptr, " EOPType=%d", IPROC_EOPType(status)); -+ else -+ ptr += sprintf (ptr, " %s%s%s%s", -+ IPROC_FirstTrans(status) ? " FirstTrans" : "", -+ IPROC_LastTrans(status) ? " LastTrans" : "", -+ (IPROC_TransactionType(status) & TR_WAIT_FOR_EOP) ? " WaitForEop" : "", -+ (IPROC_GoodAckSent(status) & (1 << IPROC_Channel(status))) ? " AckSent" : ""); -+ } -+ -+ elan4_debugf (type, mode, "%s %s\n", str, buffer); -+ -+ str = spaces; -+ } -+ -+ elan4_display_farea (type, mode, spaces, &trap->tr_faultarea); -+} -+ -+#define elan4_sdram_copy_faultarea(dev, unit, farea) \ -+ elan4_sdram_copyq_from_sdram ((dev), (dev)->dev_faultarea + (unit) * sizeof (E4_FaultSave), (E4_uint64 *) farea, sizeof (E4_FaultSave)); -+ -+void -+elan4_extract_eproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_EPROC_TRAP *trap, int iswaitevent) -+{ -+ /* only one of the memory ports can fault at a time */ -+ ASSERT (EPROC_TrapType(status) != EventProcMemoryFault || (EPROC_Port0Fault(status) ^ EPROC_Port1Fault(status)) == 1); -+ -+ trap->tr_status = status; -+ -+ if (EPROC_Port0Fault(status)) -+ elan4_sdram_copy_faultarea (dev, CUN_EventProc0, &trap->tr_faultarea); -+ if (EPROC_Port1Fault(status)) -+ elan4_sdram_copy_faultarea (dev, CUN_EventProc1, &trap->tr_faultarea); -+ -+ if (iswaitevent) -+ { -+ /* -+ * for waitevents the Event address is always taken from the command processor -+ * -+ * if we trapped during the copy then we take the "Event" from the event processor -+ * since we need to complete the copy. Otherwise we'll be reissuing the original -+ * command again -+ */ -+ E4_uint32 fsr = FaultSaveFSR(trap->tr_faultarea.FSRAndFaultContext); -+ -+ trap->tr_eventaddr = read_reg64 (dev, CommandHold) ^ WAIT_EVENT_CMD; -+ -+ if (EPROC_TrapType(trap->tr_status) == EventProcMemoryFault && -+ (AT_Perm(fsr) == AT_PermLocalDataRead || AT_Perm(fsr) == AT_PermLocalDataWrite)) -+ { -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, EventCountAndType); -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, EventParameters[0]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, EventParameters[1]); -+ } -+ else -+ { -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, CommandCopy[5]); -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, CommandCopy[4]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, CommandCopy[6]); -+ -+ } -+ } -+ else -+ { -+ trap->tr_eventaddr = read_reg64 (dev, EventAddress); -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, EventCountAndType); -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, EventParameters[0]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, EventParameters[1]); -+ } -+ -+ BumpDevStat (dev, s_eproc_trap_types[EPROC_TrapType(status)]); -+} -+ -+int -+cproc_open_extract_vp (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ /* cq = ucq->ucq_cq */ -+ if ((cq->cq_perm & CQ_STENEnableBit) != 0) -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ sdramaddr_t insertPtr = (queuePtrs & CQ_PtrMask); -+ sdramaddr_t commandPtr = CQ_CompletedPtr (queuePtrs); -+ unsigned int cqSize = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ E4_uint64 openCommand = 0; -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && (queuePtrs & CQ_RevB_ReorderingQueue)) -+ { -+ E4_uint32 oooMask = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)); -+ -+ for (; (oooMask & 1) != 0; oooMask >>= 1) -+ insertPtr = (insertPtr & ~(cqSize-1)) | ((insertPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ } -+ -+ while (commandPtr != insertPtr) -+ { -+ E4_uint64 command = elan4_sdram_readq (dev, commandPtr); -+ unsigned int cmdSize; -+ -+ switch (__categorise_command (command, &cmdSize)) -+ { -+ case 0: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 1: /* open */ -+ return (command >> 32); -+ -+ break; /* Not reached */ -+ -+ case 2: -+ if (openCommand == 0) -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ /* Else we should have stopped by now */ -+ else ASSERT(1==2); -+ case 3: -+ printk ("cproc_open_extract_vp: invalid command %llx\n", command); -+ return -1; -+ } -+ } /* while */ -+ } -+ -+ return -1; -+} -+ -+void -+elan4_extract_cproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_CPROC_TRAP *trap, unsigned cqnum) -+{ -+ /* extract the state from the device */ -+ elan4_sdram_copy_faultarea (dev, CUN_CommandProc, &trap->tr_faultarea); -+ -+ trap->tr_status = status; -+ trap->tr_command = read_reg64 (dev, CommandHold); -+ -+ elan4_sdram_copyq_from_sdram (dev, dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)), &trap->tr_qdesc, sizeof (E4_CommandQueueDesc)); -+ -+ if (CPROC_TrapType (status) == CommandProcWaitTrap) -+ elan4_extract_eproc_trap (dev, read_reg64 (dev, EProcStatus), &trap->tr_eventtrap, 1); -+ -+ BumpDevStat (dev, s_cproc_trap_types[CPROC_TrapType(status)]); -+ -+ if (PackValue(trap->tr_qdesc.CQ_AckBuffers, 0) == PackTimeout || PackValue(trap->tr_qdesc.CQ_AckBuffers, 1) == PackTimeout) -+ BumpDevStat (dev, s_cproc_timeout); -+} -+ -+void -+elan4_extract_dproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_DPROC_TRAP *trap, unsigned unit) -+{ -+ trap->tr_status = status; -+ -+ if (unit == 0) -+ { -+ trap->tr_desc.dma_typeSize = read_reg64 (dev, Dma0Desc.dma_typeSize); -+ trap->tr_desc.dma_cookie = read_reg64 (dev, Dma0Desc.dma_cookie); -+ trap->tr_desc.dma_vproc = read_reg64 (dev, Dma0Desc.dma_vproc); -+ trap->tr_desc.dma_srcAddr = read_reg64 (dev, Dma0Desc.dma_srcAddr); -+ trap->tr_desc.dma_dstAddr = read_reg64 (dev, Dma0Desc.dma_dstAddr); -+ trap->tr_desc.dma_srcEvent = read_reg64 (dev, Dma0Desc.dma_srcEvent); -+ trap->tr_desc.dma_dstEvent = read_reg64 (dev, Dma0Desc.dma_dstEvent); -+ -+ elan4_sdram_copy_faultarea (dev, CUN_DProcPA0, &trap->tr_packAssemFault); -+ } -+ else -+ { -+ trap->tr_desc.dma_typeSize = read_reg64 (dev, Dma1Desc.dma_typeSize); -+ trap->tr_desc.dma_cookie = read_reg64 (dev, Dma1Desc.dma_cookie); -+ trap->tr_desc.dma_vproc = read_reg64 (dev, Dma1Desc.dma_vproc); -+ trap->tr_desc.dma_srcAddr = read_reg64 (dev, Dma1Desc.dma_srcAddr); -+ trap->tr_desc.dma_dstAddr = read_reg64 (dev, Dma1Desc.dma_dstAddr); -+ trap->tr_desc.dma_srcEvent = read_reg64 (dev, Dma1Desc.dma_srcEvent); -+ trap->tr_desc.dma_dstEvent = read_reg64 (dev, Dma1Desc.dma_dstEvent); -+ -+ elan4_sdram_copy_faultarea (dev, CUN_DProcPA1, &trap->tr_packAssemFault); -+ } -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ elan4_sdram_copy_faultarea (dev, (CUN_DProcData0 | DPROC_FaultUnitNo(trap->tr_status)), &trap->tr_prefetchFault); -+ -+ if (DPROC_PacketTimeout (trap->tr_status)) -+ BumpDevStat (dev, s_dproc_timeout); -+ -+ BumpDevStat (dev, s_dproc_trap_types[DPROC_TrapType(status)]); -+} -+ -+void -+elan4_extract_tproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_TPROC_TRAP *trap) -+{ -+ int i; -+ -+ trap->tr_status = status; -+ trap->tr_state = read_reg64 (dev, Thread_Trap_State); -+ trap->tr_pc = read_reg64 (dev, PC_W); -+ trap->tr_npc = read_reg64 (dev, nPC_W); -+ trap->tr_dirty = read_reg64 (dev, DirtyBits); -+ trap->tr_bad = read_reg64 (dev, BadBits); -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyfrom_dev (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS, -+ ((dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) ? ELAN4_REVA_REG_OFFSET : ELAN4_REVB_REG_OFFSET) + -+ offsetof (E4_Registers, Regs.TProcRegs), (unsigned long) &trap->tr_regs, 64*sizeof (E4_uint64)) < 0) -+ { -+ for (i = 0; i < 64; i++) -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ trap->tr_regs[i] = read_reg64 (dev, TProcRegs[i]); -+ } -+ -+ for (i = 0; i < 64; i++) -+ if (! (trap->tr_dirty & ((E4_uint64) 1 << i))) -+ trap->tr_regs[i] = 0xdeadbabedeadbabeULL; -+#else -+ for (i = 0; i < 64; i++) -+ { -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ trap->tr_regs[i] = read_reg64 (dev, TProcRegs[i]); -+ else -+ trap->tr_regs[i] = 0xdeadbabedeadbabeULL; -+ } -+#endif -+ -+ if (trap->tr_state & TS_DataAccessException) -+ elan4_sdram_copy_faultarea (dev, CUN_TProcData0 | TS_DataPortNo (trap->tr_state), &trap->tr_dataFault); -+ -+ if (trap->tr_state & TS_InstAccessException) -+ elan4_sdram_copy_faultarea (dev, CUN_TProcInst, &trap->tr_instFault); -+ -+ for (i = 0; i < 7; i++) -+ if (trap->tr_state & (1 << i)) -+ BumpDevStat (dev, s_tproc_trap_types[i]); -+} -+ -+void -+elan4_extract_iproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_IPROC_TRAP *trap, unsigned unit) -+{ -+ sdramaddr_t hdroff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrHeader[0][unit]); -+ sdramaddr_t dataoff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrData[0][unit]); -+ register int i, j; -+ int CurrUnitNo = (unit >= 2) ? CUN_IProcHighPri : CUN_IProcLowPri; -+ sdramaddr_t CurrFaultArea = dev->dev_faultarea + (CurrUnitNo * sizeof (E4_FaultSave)); -+ -+ /* Finally copy the fault area */ -+ elan4_sdram_copy_faultarea (dev, CurrUnitNo, &trap->tr_faultarea); -+ -+ /* -+ * Clear out the fault save area after reading to allow a fault on the write of the back pointer of -+ * an InputQCommit to be obsurved if a simultaneous event proc trap occurs. -+ */ -+ elan4_sdram_writeq (dev, CurrFaultArea + offsetof(E4_FaultSave, FSRAndFaultContext), 0x0ULL); -+ elan4_sdram_writeq (dev, CurrFaultArea + offsetof(E4_FaultSave, FaultAddress), 0x0ULL); -+ -+ /* copy the transaction headers */ -+ trap->tr_transactions[0].IProcStatusCntxAndTrType = status; -+ trap->tr_transactions[0].TrAddr = elan4_sdram_readq (dev, hdroff + offsetof (E4_IprocTrapHeader, TrAddr)); -+ -+ for (i = 0; !IPROC_EOPTrap(trap->tr_transactions[i].IProcStatusCntxAndTrType);) -+ { -+ if (IPROC_BadLength (trap->tr_transactions[i].IProcStatusCntxAndTrType)) -+ BumpDevStat (dev, s_bad_length); -+ else if (IPROC_TransCRCStatus (trap->tr_transactions[i].IProcStatusCntxAndTrType) == CRC_STATUS_BAD) -+ BumpDevStat (dev, s_crc_bad); -+ else if (IPROC_TransCRCStatus (trap->tr_transactions[i].IProcStatusCntxAndTrType) == CRC_STATUS_ERROR) -+ BumpDevStat (dev, s_crc_error); -+ -+ BumpDevStat (dev, s_iproc_trap_types[IPROC_TrapValue (trap->tr_transactions[i].IProcStatusCntxAndTrType)]); -+ -+ hdroff += NO_OF_INPUT_CHANNELS*sizeof (E4_IprocTrapHeader); -+ -+ if (++i == MAX_TRAPPED_TRANS) -+ break; -+ -+ elan4_sdram_copyq_from_sdram (dev, hdroff, &trap->tr_transactions[i], sizeof (E4_IprocTrapHeader)); -+ } -+ -+ if (IPROC_EOPType (trap->tr_transactions[i].IProcStatusCntxAndTrType) == EOP_ERROR_RESET) -+ BumpDevStat (dev, s_eop_reset); -+ -+ /* Remember the number of transactions we've copied */ -+ trap->tr_numTransactions = i + 1; -+ -+ /* Copy all the data blocks in one go */ -+ for (i = 0; i < MIN (trap->tr_numTransactions, MAX_TRAPPED_TRANS); i++, dataoff += NO_OF_INPUT_CHANNELS*sizeof (E4_IprocTrapData)) -+ { -+ if (IPROC_BadLength(status) || IPROC_TransCRCStatus (status) != CRC_STATUS_GOOD) -+ elan4_sdram_copyq_from_sdram (dev, dataoff, trap->tr_dataBuffers[i].Data, TRANS_DATA_DWORDS*sizeof(E4_uint64)); -+ else -+ { -+ int trtype = IPROC_TransactionType(trap->tr_transactions[i].IProcStatusCntxAndTrType); -+ int ndwords = (trtype & TR_SIZE_MASK) >> TR_SIZE_SHIFT; -+ -+ elan4_sdram_copyq_from_sdram (dev, dataoff, trap->tr_dataBuffers[i].Data, ndwords*sizeof(E4_uint64)); -+ -+ for (j = ndwords; j < TRANS_DATA_DWORDS; j++) -+ trap->tr_dataBuffers[i].Data[j] = 0xbeec0f212345678ull; -+ } -+ } -+ -+} -+ -+void -+elan4_inspect_iproc_trap (ELAN4_IPROC_TRAP *trap) -+{ -+ int i; -+ -+ trap->tr_flags = 0; -+ trap->tr_trappedTrans = TR_TRANS_INVALID; -+ trap->tr_waitForEopTrans = TR_TRANS_INVALID; -+ trap->tr_identifyTrans = TR_TRANS_INVALID; -+ -+ if (trap->tr_numTransactions > MAX_TRAPPED_TRANS) -+ trap->tr_flags = TR_FLAG_TOOMANY_TRANS; -+ -+ /* -+ * Now scan all the transactions received -+ */ -+ for (i = 0; i < MIN(trap->tr_numTransactions, MAX_TRAPPED_TRANS) ; i++) -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[i]; -+ E4_uint64 status = hdrp->IProcStatusCntxAndTrType; -+ -+ if (trap->tr_identifyTrans == TR_TRANS_INVALID) -+ { -+ switch (IPROC_TransactionType (status) & (TR_OPCODE_MASK | TR_SIZE_MASK)) -+ { -+ case TR_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_REMOTEDMA & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_SETEVENT_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_INPUT_Q_COMMIT & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_ADDWORD & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_TESTANDWRITE & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ trap->tr_identifyTrans = i; -+ break; -+ } -+ } -+ -+ if (IPROC_TrapValue(status) == InputNoFault) /* We're looking at transactions stored before the trap */ -+ continue; /* these should only be identifies */ -+ -+ if (trap->tr_trappedTrans == TR_TRANS_INVALID) /* Remember the transaction which caused the */ -+ trap->tr_trappedTrans = i; /* trap */ -+ -+ if (IPROC_GoodAckSent (status) & (1 << IPROC_InputterChan (status))) -+ trap->tr_flags |= TR_FLAG_ACK_SENT; -+ -+ if (IPROC_EOPTrap(status)) /* Check for EOP */ -+ { -+ ASSERT (i == trap->tr_numTransactions - 1); -+ -+ switch (IPROC_EOPType(status)) -+ { -+ case EOP_GOOD: -+ /* if we get an EOP_GOOD then the outputer should have received a PAckOk. */ -+ /* unless it was a flood, in which case someone must have sent an ack */ -+ /* but not necessarily us */ -+ break; -+ -+ case EOP_BADACK: -+ /* if we get an EOP_BADACK then the outputer did not receive a PAckOk even if -+ * we sent a PAckOk. WFlag this to ignore the AckSent. */ -+ trap->tr_flags |= TR_FLAG_EOP_BAD; -+ break; -+ -+ case EOP_ERROR_RESET: -+ /* if we get an EOP_ERROR_RESET then the outputer may or may not have got a PAckOk. */ -+ trap->tr_flags |= TR_FLAG_EOP_ERROR; -+ break; -+ -+ default: -+ printk ("elan4_inspect_iproc_trap: unknown eop type %d", IPROC_EOPType(status)); -+ BUG(); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ else -+ { -+ if (IPROC_BadLength(status) || (IPROC_TransCRCStatus (status) == CRC_STATUS_ERROR || -+ IPROC_TransCRCStatus (status) == CRC_STATUS_BAD)) -+ { -+ { -+ register int j; -+ if (IPROC_BadLength(status)) -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: Trapped on bad length data. status=%016llx Address=%016llx\n", -+ status, hdrp->TrAddr); -+ else -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: Trapped with bad CRC. status=%016llx Address=%016llx\n", -+ status, hdrp->TrAddr); -+ for (j = 0; j < TRANS_DATA_DWORDS; j++) -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: DataBuffers[%d] : %016llx\n", j, trap->tr_dataBuffers[i].Data[j]); -+ } -+ -+ trap->tr_flags |= TR_FLAG_BAD_TRANS; -+ continue; -+ } -+ -+ if (IPROC_TransCRCStatus (status) == CRC_STATUS_DISCARD) -+ continue; -+ -+ if ((((IPROC_TransactionType(status) & TR_BLOCK_OPCODE_MASK) == TR_WRITEBLOCK) || -+ (IPROC_TransactionType(status) == TR_TRACEROUTE_TRANS)) && -+ (trap->tr_flags & TR_FLAG_ACK_SENT) && trap->tr_identifyTrans == TR_TRANS_INVALID) -+ { -+ /* -+ * Writeblock after the ack is sent without an identify transaction - this is -+ * considered to be a DMA packet and requires the next packet to be nacked - since -+ * the DMA processor will send this in a deterministic time and there's an upper -+ * limit on the network latency (the output timeout) we just need to hold the context -+ * filter up for a while. -+ */ -+ trap->tr_flags |= TR_FLAG_DMA_PACKET; -+ } -+ -+ if (IPROC_LastTrans(status) && (IPROC_TransactionType(status) & TR_WAIT_FOR_EOP)) -+ { -+ /* -+ * WaitForEop transactions - if we have to do network error fixup -+ * then we may need to execute/ignore this transaction dependant -+ * on whether the source will be resending it. -+ */ -+ trap->tr_waitForEopTrans = i; -+ } -+ -+ /* -+ * This is a special case caused by a minor input processor bug. -+ * If simultaneous InputMemoryFault and InputEventEngineTrapped occur then the chip will probably return -+ * InputEventEngineTrapped even though the write of the back pointer has not occured and must be done by -+ * the trap handler. -+ * In this case the fault address will equal q->q_bptr. If there has been only EventEngineTrap then the -+ * the fault address should be zero as the trap handler now always zeros this after every input trap. -+ */ -+ if ((IPROC_TransactionType (status) & TR_OPCODE_MASK) == (TR_INPUT_Q_COMMIT & TR_OPCODE_MASK) && -+ trap->tr_faultarea.FaultAddress == hdrp->TrAddr + offsetof(E4_InputQueue, q_bptr) && -+ IPROC_TrapValue(status) == InputEventEngineTrapped) -+ { -+ hdrp->IProcStatusCntxAndTrType = (status & 0xFFFFFFF0FFFFFFFFull) | ((E4_uint64) InputMemoryFault << 32); -+ } -+ } -+ -+ PRINTF (DBG_DEVICE, DBG_INTR, "inspect[%d] status=%llx TrapValue=%d -> flags %x\n", i, status, IPROC_TrapValue(status), trap->tr_flags); -+ } -+} -+ -+E4_uint64 -+elan4_trapped_open_command (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc); -+ E4_uint64 cqcontrol = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ E4_uint32 extractOff = CQ_ExtractPtr (cqcontrol) & (CQ_Size(cq->cq_size)-1); -+ -+ if (extractOff == 0) -+ extractOff = CQ_Size(cq->cq_size) - sizeof (E4_uint64); -+ else -+ extractOff -= sizeof (E4_uint64); -+ -+ return (elan4_sdram_readq (dev, cq->cq_space + extractOff)); -+} -+ -+EXPORT_SYMBOL(elan4_extract_eproc_trap); -+EXPORT_SYMBOL(elan4_display_eproc_trap); -+EXPORT_SYMBOL(elan4_extract_cproc_trap); -+EXPORT_SYMBOL(elan4_display_cproc_trap); -+EXPORT_SYMBOL(elan4_extract_dproc_trap); -+EXPORT_SYMBOL(elan4_display_dproc_trap); -+EXPORT_SYMBOL(elan4_extract_tproc_trap); -+EXPORT_SYMBOL(elan4_display_tproc_trap); -+EXPORT_SYMBOL(elan4_extract_iproc_trap); -+EXPORT_SYMBOL(elan4_inspect_iproc_trap); -+EXPORT_SYMBOL(elan4_display_iproc_trap); -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/user.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/user.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/user.c 2005-06-01 23:12:54.624435304 -0400 -@@ -0,0 +1,3352 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user.c,v 1.68.2.9 2004/12/20 16:56:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+/* allow this code to compile against an Eagle elanmod */ -+#ifdef __ELANMOD_DEVICE_H -+#define elan_attach_cap(cap,rnum,args,func) elanmod_attach_cap(cap,args,func) -+#define elan_detach_cap(cap,rnum) elanmod_detach_cap(cap) -+#endif -+ -+#define NETERR_MSGS 16 -+ -+int user_p2p_route_options = FIRST_TIMEOUT(3); -+int user_bcast_route_options = FIRST_TIMEOUT(3); -+int user_dproc_retry_count = 15; -+int user_cproc_retry_count = 2; -+ -+int num_fault_save = 30; -+int min_fault_pages = 1; -+int max_fault_pages = 128; -+ -+static int -+user_validate_cap (USER_CTXT *uctx, ELAN_CAPABILITY *cap, unsigned use) -+{ -+ /* Don't allow a user process to attach to system context */ -+ if (ELAN4_SYSTEM_CONTEXT (cap->cap_lowcontext) || ELAN4_SYSTEM_CONTEXT (cap->cap_highcontext)) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_VP,"user_validate_cap: lctx %x hctx %x high %x\n", cap->cap_lowcontext, cap->cap_highcontext, ELAN4_KCOMM_BASE_CONTEXT_NUM); -+ PRINTF0 (DBG_DEVICE, DBG_VP,"user_validate_cap: user process cant attach to system cap\n"); -+ return (EINVAL); -+ } -+ -+ return elanmod_classify_cap(&uctx->uctx_position, cap, use); -+} -+ -+static __inline__ void -+__user_signal_trap (USER_CTXT *uctx) -+{ -+ switch (uctx->uctx_trap_state) -+ { -+ case UCTX_TRAP_IDLE: -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: deliver signal %d to pid %d\n", uctx->uctx_trap_signo, uctx->uctx_trap_pid); -+ -+ if (uctx->uctx_trap_signo) -+ kill_proc (uctx->uctx_trap_pid, uctx->uctx_trap_signo, 1); -+ break; -+ -+ case UCTX_TRAP_SLEEPING: -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: wakeup sleeping trap handler\n"); -+ -+ kcondvar_wakeupone (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ break; -+ } -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+} -+ -+static void -+user_signal_timer (unsigned long arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_timer: state=%d pid=%d signal=%d (now %d start %d)\n", -+ uctx->uctx_trap_state, uctx->uctx_trap_pid, uctx->uctx_trap_signo, jiffies, -+ uctx->uctx_int_start); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ __user_signal_trap (uctx); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+#define MAX_INTS_PER_TICK 50 -+#define MIN_INTS_PER_TICK 20 -+ -+static void -+user_signal_trap (USER_CTXT *uctx) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: state=%d pid=%d signal=%d%s\n", uctx->uctx_trap_state, -+ uctx->uctx_trap_pid, uctx->uctx_trap_signo, timer_pending(&uctx->uctx_int_timer) ? " (timer-pending)" : ""); -+ -+ uctx->uctx_int_count++; -+ -+ if (timer_pending (&uctx->uctx_int_timer)) -+ return; -+ -+ if (uctx->uctx_int_count > ((int)(jiffies - uctx->uctx_int_start) * MAX_INTS_PER_TICK)) -+ { -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: deferring signal for %d ticks (count %d ticks %d -> %d)\n", -+ uctx->uctx_int_delay + 1, uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start), -+ ((int)(jiffies - uctx->uctx_int_start) * MAX_INTS_PER_TICK)); -+ -+ /* We're interrupting too fast, so defer this signal */ -+ uctx->uctx_int_timer.expires = jiffies + (++uctx->uctx_int_delay); -+ -+ add_timer (&uctx->uctx_int_timer); -+ } -+ else -+ { -+ __user_signal_trap (uctx); -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: check signal for %d ticks (count %d ticks %d -> %d)\n", -+ uctx->uctx_int_delay + 1, uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start), -+ (int)(jiffies - uctx->uctx_int_start) * MIN_INTS_PER_TICK); -+ -+ if (uctx->uctx_int_count < ((int) (jiffies - uctx->uctx_int_start)) * MIN_INTS_PER_TICK) -+ { -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: reset interrupt throttle (count %d ticks %d)\n", -+ uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start)); -+ -+ uctx->uctx_int_start = jiffies; -+ uctx->uctx_int_count = 0; -+ uctx->uctx_int_delay = 0; -+ } -+ } -+} -+ -+static void -+user_neterr_timer (unsigned long arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ uctx->uctx_status |= UCTX_NETERR_TIMER; -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_flush_dma_runqueue (ELAN4_DEV *dev, USER_CTXT *uctx, int qfull) -+{ -+ E4_uint64 qptrs = read_reg64 (dev, DProcLowPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 typeSize = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_typeSize)); -+ -+ if (DMA_Context (typeSize) == uctx->uctx_ctxt.ctxt_num) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_DProcQueueEntry)); -+ -+ PRINTF4 (uctx, DBG_SWAP, "user_flush_dma_runqueue: %016llx %016llx %016llx %016llx\n", qentry.Desc.dma_typeSize, -+ qentry.Desc.dma_cookie, qentry.Desc.dma_vproc, qentry.Desc.dma_srcAddr); -+ PRINTF3 (uctx, DBG_SWAP, " %016llx %016llx %016llx\n", qentry.Desc.dma_dstAddr, -+ qentry.Desc.dma_srcEvent, qentry.Desc.dma_dstEvent); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_dmaQ)) -+ uctx->uctx_status |= UCTX_DPROC_QUEUE_OVERFLOW; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = qentry.Desc; -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ } -+ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+} -+ -+static void -+user_flush_thread_runqueue (ELAN4_DEV *dev, USER_CTXT *uctx, int qfull) -+{ -+ E4_uint64 qptrs = read_reg64 (dev, TProcLowPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_TProcQueueEntry qentry; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 context = elan4_sdram_readq (dev, qfptr + offsetof (E4_TProcQueueEntry, Context)); -+ -+ if (TPROC_Context (context) == uctx->uctx_ctxt.ctxt_num) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_TProcQueueEntry)); -+ -+ PRINTF (uctx, DBG_SWAP, "user_flush_thread_runqueue: %016llx %016llx %016llx %016llx\n", qentry.Regs.Registers[0], -+ qentry.Regs.Registers[1], qentry.Regs.Registers[2], qentry.Regs.Registers[3]); -+ PRINTF (uctx, DBG_SWAP, " %016llx %016llx %016llx\n", -+ qentry.Regs.Registers[4], qentry.Regs.Registers[5], qentry.Regs.Registers[6]); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_threadQ)) -+ uctx->uctx_status |= UCTX_TPROC_QUEUE_OVERFLOW; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = qentry.Regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ } -+ -+ /* change the thread to execute the suspend sequence */ -+ qentry.Regs.Registers[0] = dev->dev_tproc_suspend; -+ qentry.Regs.Registers[1] = dev->dev_tproc_space; -+ qentry.Context = dev->dev_ctxt.ctxt_num; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_TProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_TProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+} -+ -+static void -+user_flush_dmas (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ ASSERT ((read_reg32 (dev, InterruptReg) & INT_DProcHalted) != 0); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if ((uctx->uctx_status & (UCTX_SWAPPED_REASONS|UCTX_STOPPED_REASONS)) == 0) -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush_dmas: status %x - no more reasons\n", uctx->uctx_status); -+ -+ uctx->uctx_status &= ~UCTX_STOPPING; -+ -+ user_signal_trap (uctx); -+ } -+ else -+ { -+ user_flush_dma_runqueue (dev, uctx, qfull); -+ -+ uctx->uctx_status = (uctx->uctx_status | UCTX_STOPPED) & ~UCTX_STOPPING; -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_flush_dmas: statux %x - stopped\n", uctx->uctx_status); -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_flush (ELAN4_DEV *dev, void *arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ struct list_head *entry; -+ unsigned long flags; -+ -+ ASSERT ((read_reg32 (dev, InterruptReg) & (INT_Halted|INT_Discarding)) == (INT_Halted|INT_Discarding)); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if ((uctx->uctx_status & (UCTX_SWAPPED_REASONS|UCTX_STOPPED_REASONS)) == 0) -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: status %x - no more reasons\n", uctx->uctx_status); -+ -+ uctx->uctx_status &= ~UCTX_STOPPING; -+ -+ user_signal_trap (uctx); -+ } -+ else -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: status %x - flushing context\n", uctx->uctx_status); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_RUNNING) -+ { -+ /* NOTE: since the inserter can still be running we modify the permissions -+ * to zero then when the extractor starts up again it will trap */ -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: stopping cq indx=%d\n", elan4_cq2idx(ucq->ucq_cq)); -+ -+ elan4_updatecq (dev, ucq->ucq_cq, 0, 0); -+ } -+ } -+ -+ user_flush_thread_runqueue (dev, uctx, TPROC_LowRunQueueFull(read_reg64 (dev, TProcStatus))); -+ -+ /* since we can't determine whether the dma run queue is full or empty, we use a dma -+ * halt operation to do the flushing - as the reason for halting the dma processor -+ * will be released when we return, we keep it halted until the flush has completed */ -+ elan4_queue_dma_flushop (dev, &uctx->uctx_dma_flushop, 0); -+ -+ if (uctx->uctx_status & UCTX_EXITING) -+ elan4_flush_icache_halted (&uctx->uctx_ctxt); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_set_filter (USER_CTXT *uctx, E4_uint32 state) -+{ -+ struct list_head *entry; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ list_for_each (entry, &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (entry, USER_CTXT_ENTRY, cent_link); -+ -+ elan4_set_filter (&uctx->uctx_ctxt, cent->cent_cap->cap_mycontext, state); -+ } -+} -+ -+static void -+user_start_nacking (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_start_nacking: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ if (UCTX_NACKING(uctx)) -+ uctx->uctx_status |= reason; -+ else -+ { -+ uctx->uctx_status |= reason; -+ -+ user_set_filter (uctx, E4_FILTER_STATS | E4_FILTER_DISCARD_ALL); -+ } -+} -+ -+static void -+user_stop_nacking (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_stop_nacking: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ uctx->uctx_status &= ~reason; -+ -+ if (! UCTX_NACKING (uctx)) -+ user_set_filter (uctx, E4_FILTER_STATS); -+} -+ -+static void -+user_start_stopping (USER_CTXT *uctx, unsigned reason) -+{ -+ ELAN4_DEV *dev =uctx->uctx_ctxt.ctxt_dev; -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_start_stopping: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (! (uctx->uctx_status & UCTX_STOPPED)); -+ -+ user_start_nacking (uctx, reason); -+ -+ if ((uctx->uctx_status & UCTX_STOPPING) != 0) -+ return; -+ -+ uctx->uctx_status |= UCTX_STOPPING; -+ -+ /* queue the halt operation to remove all threads/dmas/cqs from the run queues */ -+ /* and also flush through the context filter change */ -+ elan4_queue_haltop (dev, &uctx->uctx_haltop); -+} -+ -+static void -+user_stop_stopping (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_stop_stopping: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ user_stop_nacking (uctx, reason); -+ -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ uctx->uctx_status &= ~UCTX_STOPPED; -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_stop_stopping: no more reasons => %x\n", uctx->uctx_status); -+ -+ user_signal_trap (uctx); -+ } -+} -+ -+void -+user_swapout (USER_CTXT *uctx, unsigned reason) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_swapout: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ user_start_nacking (uctx, reason); -+ -+ while (uctx->uctx_status & (UCTX_SWAPPING|UCTX_STOPPING) && /* wait for someone else to finish */ -+ uctx->uctx_trap_count > 0) /* and for trap handlers to notice */ -+ { /* and exit */ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapout: waiting for %d trap handlers to exit/previous swapout\n", uctx->uctx_trap_count); -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_SWAPPED) /* already swapped out */ -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return; -+ } -+ -+ uctx->uctx_status |= (UCTX_SWAPPING|UCTX_STOPPING); /* mark the context as swapping & stopping */ -+ -+ /* queue the halt operation to remove all threads/dmas/cqs from the run queues */ -+ /* and also flush through the context filter change */ -+ elan4_queue_haltop (dev, &uctx->uctx_haltop); -+ -+ while (! (uctx->uctx_status & UCTX_STOPPED)) -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); -+ -+ /* all state has been removed from the elan - we can now "tidy" it up */ -+ -+ PRINTF0 (uctx, DBG_SWAP, "user_swapout: swapped out\n"); -+ -+ uctx->uctx_status = (uctx->uctx_status & ~UCTX_SWAPPING) | UCTX_SWAPPED; -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapout: all done - status %x\n", uctx->uctx_status); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+void -+user_swapin (USER_CTXT *uctx, unsigned reason) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ ASSERT (uctx->uctx_status & UCTX_SWAPPED_REASONS); -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_swapin: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ while (uctx->uctx_status & (UCTX_SWAPPING|UCTX_STOPPING)) /* wait until other threads have */ -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); /* completed their swap operation */ -+ -+ ASSERT (uctx->uctx_status & (UCTX_SWAPPED | UCTX_STOPPED)); -+ -+ user_stop_nacking (uctx, reason); -+ -+ if (! (uctx->uctx_status & UCTX_SWAPPED_REASONS)) -+ { -+ uctx->uctx_status &= ~UCTX_SWAPPED; -+ -+ /* no longer swapped out - wakeup anyone sleeping waiting for swapin */ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ if (! (uctx->uctx_status & UCTX_STOPPED_REASONS)) -+ { -+ uctx->uctx_status &= ~UCTX_STOPPED; -+ user_signal_trap (uctx); -+ } -+ } -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapin: all done - status %x\n", uctx->uctx_status); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+void -+user_destroy_callback (void *arg, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ -+ PRINTF (uctx, DBG_VP, "user_destroy_callback: %s\n", map == NULL ? "cap destoyed" : "map destroyed"); -+} -+ -+int -+user_attach (USER_CTXT *uctx, ELAN_CAPABILITY *cap) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CTXT_ENTRY *cent; -+ unsigned long flags; -+ int ctype, res; -+ -+ if ((ctype = user_validate_cap (uctx, cap, ELAN_USER_ATTACH)) < 0) -+ return ctype; -+ -+ if ((ctype == ELAN_CAP_RMS) && (res = elan_attach_cap (cap, dev->dev_devinfo.dev_rail, uctx, user_destroy_callback)) != 0) -+ { -+ /* NOTE: elan_attach_cap returns +ve errnos */ -+ return -res; -+ } -+ -+ KMEM_ALLOC (cent, USER_CTXT_ENTRY *, sizeof (USER_CTXT_ENTRY), 1); -+ if (cent == NULL) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ return -ENOMEM; -+ } -+ -+ KMEM_ALLOC (cent->cent_cap, ELAN_CAPABILITY *, ELAN_CAP_SIZE(cap), 1); -+ if (cent->cent_cap == NULL) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ return -ENOMEM; -+ } -+ -+ bcopy (cap, cent->cent_cap, ELAN_CAP_SIZE(cap)); -+ -+ if ((res = elan4_attach_filter (&uctx->uctx_ctxt, cap->cap_mycontext)) != 0) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent->cent_cap, ELAN_CAP_SIZE (cap)); -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ -+ return res; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_add_tail (¢->cent_link, &uctx->uctx_cent_list); -+ -+ if (! UCTX_NACKING (uctx)) -+ user_set_filter (uctx, E4_FILTER_STATS); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (0); -+ -+} -+ -+void -+user_detach (USER_CTXT *uctx, ELAN_CAPABILITY *cap) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ struct list_head *entry; -+ struct list_head *next; -+ struct list_head list; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&list); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF (uctx, DBG_NETWORK_CTX, cap ? "user_detach: network context %d\n" : "user_detach: all network contexts\n", cap ? cap->cap_mycontext : 0); -+ -+ list_for_each_safe (entry, next, &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (entry, USER_CTXT_ENTRY, cent_link); -+ -+ if (cap == NULL || ELAN_CAP_MATCH (cap, cent->cent_cap)) -+ { -+ PRINTF1 (uctx, DBG_NETWORK_CTX, "user_detach: detach from network context %d\n", cent->cent_cap->cap_mycontext); -+ -+ elan4_detach_filter (&uctx->uctx_ctxt, cent->cent_cap->cap_mycontext); -+ -+ list_del (¢->cent_link); -+ list_add_tail (¢->cent_link, &list); -+ } -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ USER_CTXT_ENTRY *cent = list_entry (list.next, USER_CTXT_ENTRY, cent_link); -+ -+ list_del (¢->cent_link); -+ -+ if (user_validate_cap (uctx, cent->cent_cap, ELAN_USER_DETACH) == ELAN_CAP_RMS) -+ elan_detach_cap (cent->cent_cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent->cent_cap, ELAN_CAP_SIZE (cent->cent_cap)); -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ } -+} -+ -+void -+user_block_inputter (USER_CTXT *uctx, unsigned blocked) -+{ -+ unsigned long flags; -+ int isblocked; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ isblocked = (uctx->uctx_status & UCTX_USER_FILTERING); -+ -+ if (blocked && !isblocked) -+ user_start_nacking (uctx, UCTX_USER_FILTERING); -+ -+ if (!blocked && isblocked) -+ user_stop_nacking (uctx, UCTX_USER_FILTERING); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static USER_VPSEG * -+user_install_vpseg (USER_CTXT *uctx, unsigned process, unsigned entries) -+{ -+ struct list_head *entry; -+ USER_VPSEG *seg; -+ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (process <= (seg->vps_process + seg->vps_entries-1) && -+ (process + entries - 1) >= seg->vps_process) -+ return ((USER_VPSEG *) NULL); -+ } -+ -+ KMEM_ZALLOC (seg, USER_VPSEG *, sizeof (USER_VPSEG), 1); -+ -+ if (seg == (USER_VPSEG *) NULL) -+ return ((USER_VPSEG *) NULL); -+ -+ seg->vps_process = process; -+ seg->vps_entries = entries; -+ -+ list_add_tail (&seg->vps_link, &uctx->uctx_vpseg_list); -+ -+ return (seg); -+} -+ -+static void -+user_remove_vpseg (USER_CTXT *uctx, USER_VPSEG *seg) -+{ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_del (&seg->vps_link); -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+ /* These pointers (union) are only valid for P2P segs */ -+ if (seg->vps_p2p_routes) -+ KMEM_FREE (seg->vps_p2p_routes, sizeof (E4_VirtualProcessEntry) * seg->vps_entries); -+ -+ if (seg->vps_p2p_cap) -+ KMEM_FREE (seg->vps_p2p_cap, ELAN_CAP_SIZE(seg->vps_p2p_cap)); -+ -+ break; -+ -+ case USER_VPSEG_BCAST: -+ ; -+ } -+ -+ KMEM_FREE (seg, sizeof (USER_VPSEG)); -+} -+ -+static USER_VPSEG * -+user_find_vpseg (USER_CTXT *uctx, unsigned low, unsigned high) -+{ -+ struct list_head *entry; -+ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ USER_VPSEG *seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (seg->vps_process <= low && (seg->vps_process + seg->vps_entries) > high) -+ return (seg); -+ } -+ -+ return ((USER_VPSEG *) NULL); -+} -+ -+static ELAN_LOCATION -+user_process2location (USER_CTXT *uctx, USER_VPSEG *seg, unsigned process) -+{ -+ ELAN_LOCATION location; -+ int nnodes, nctxs; -+ int nodeOff, ctxOff, vpOff; -+ -+ location.loc_node = ELAN_INVALID_NODE; -+ location.loc_context = -1; -+ -+ if (seg == NULL) -+ seg = user_find_vpseg (uctx, process, process); -+ -+ if (seg == NULL || (seg->vps_type != USER_VPSEG_P2P)) -+ return (location); -+ -+ nnodes = ELAN_CAP_NUM_NODES (seg->vps_p2p_cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (seg->vps_p2p_cap); -+ -+ switch (seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (vpOff++ == (process - seg->vps_process)) -+ { -+ location.loc_node = seg->vps_p2p_cap->cap_lownode + nodeOff; -+ location.loc_context = seg->vps_p2p_cap->cap_lowcontext + ctxOff; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (vpOff++ == (process - seg->vps_process)) -+ { -+ location.loc_node = seg->vps_p2p_cap->cap_lownode + nodeOff; -+ location.loc_context = seg->vps_p2p_cap->cap_lowcontext + ctxOff; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ -+ found: -+ return (location); -+} -+ -+static unsigned -+user_location2process (USER_CTXT *uctx, ELAN_LOCATION location) -+{ -+ unsigned int process = ELAN_INVALID_PROCESS; -+ struct list_head *entry; -+ int nnodes, nctxs; -+ int nodeOff, ctxOff, vpOff; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ USER_VPSEG *seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (seg->vps_type != USER_VPSEG_P2P) -+ continue; -+ -+ if (location.loc_node >= seg->vps_p2p_cap->cap_lownode && location.loc_node <= seg->vps_p2p_cap->cap_highnode && -+ location.loc_context >= seg->vps_p2p_cap->cap_lowcontext && location.loc_context <= seg->vps_p2p_cap->cap_highcontext) -+ { -+ nnodes = ELAN_CAP_NUM_NODES (seg->vps_p2p_cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (seg->vps_p2p_cap); -+ -+ switch (seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (location.loc_node == seg->vps_p2p_cap->cap_lownode + nodeOff && -+ location.loc_context == seg->vps_p2p_cap->cap_lowcontext + ctxOff) -+ { -+ process = seg->vps_process + vpOff; -+ goto found; -+ } -+ vpOff++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (location.loc_node == seg->vps_p2p_cap->cap_lownode + nodeOff && -+ location.loc_context == seg->vps_p2p_cap->cap_lowcontext + ctxOff) -+ { -+ process = seg->vps_process + vpOff; -+ goto found; -+ } -+ vpOff++; -+ } -+ } -+ } -+ break; -+ } -+ } -+ } -+ found: -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (process); -+} -+ -+static void -+user_loadroute_vpseg (USER_CTXT *uctx, USER_VPSEG *seg, ELAN_POSITION *pos) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN_CAPABILITY *cap = seg->vps_p2p_cap; -+ unsigned nnodes = ELAN_CAP_NUM_NODES (cap); -+ unsigned nctxs = ELAN_CAP_NUM_CONTEXTS (cap); -+ E4_VirtualProcessEntry route; -+ unsigned nodeOff; -+ unsigned ctxOff; -+ unsigned vpOff; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (seg->vps_p2p_routes != NULL) -+ route = seg->vps_p2p_routes[vpOff]; -+ else if (elan4_generate_route (&uctx->uctx_position, &route, cap->cap_lowcontext + ctxOff, -+ cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, user_p2p_route_options) < 0) -+ { -+ vpOff++; -+ continue; -+ } -+ -+ PRINTF5 (uctx, DBG_VP, "user_loadroute_vpseg: virtual process %d -> node %d context %d [%016llx.%016llx]\n", -+ seg->vps_process + vpOff, cap->cap_lownode + nodeOff, cap->cap_lowcontext + ctxOff, -+ route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process + vpOff, &route); -+ -+ vpOff++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (seg->vps_p2p_routes != NULL) -+ route = seg->vps_p2p_routes[vpOff]; -+ else if (elan4_generate_route (&uctx->uctx_position, &route, cap->cap_lowcontext + ctxOff, -+ cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, user_p2p_route_options) < 0) -+ { -+ vpOff++; -+ continue; -+ } -+ -+ PRINTF5 (uctx, DBG_VP, "user_loadroute_vpseg: virtual process %d -> node %d context %d [%016llx.%016llx]\n", -+ seg->vps_process + vpOff, cap->cap_lownode + nodeOff, cap->cap_lowcontext + ctxOff, -+ route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process + vpOff, &route); -+ -+ vpOff++; -+ } -+ } -+ } -+ break; -+ } -+} -+ -+static int -+user_loadroute_bcast (USER_CTXT *uctx, USER_VPSEG *seg) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN_POSITION *pos = &uctx->uctx_position; -+ E4_VirtualProcessEntry route; -+ USER_VPSEG *aseg; -+ int res; -+ ELAN_LOCATION low; -+ ELAN_LOCATION high; -+ -+ if ((aseg = user_find_vpseg (uctx, seg->vps_bcast_lowvp, seg->vps_bcast_highvp)) == NULL || aseg->vps_type != USER_VPSEG_P2P) -+ return (-EINVAL); -+ -+#ifdef use_elanmod -+ if ((res = user_validate_cap (dev, aseg->vps_p2p_cap, ELAN_USER_BROADCAST)) < 0) -+ return (res); -+#endif -+ -+ low = user_process2location (uctx, aseg, seg->vps_bcast_lowvp); -+ high = user_process2location (uctx, aseg, seg->vps_bcast_highvp); -+ -+ if (low.loc_context != high.loc_context) -+ return (-EINVAL); -+ -+ /* NOTE: if loopback can only broadcast to ourself - -+ * if back-to-back can only broadcast to other node */ -+ if ((pos->pos_mode == ELAN_POS_MODE_LOOPBACK && low.loc_node != high.loc_node && low.loc_node != pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && low.loc_node != high.loc_node && low.loc_node == pos->pos_nodeid)) -+ { -+ return (-EINVAL); -+ } -+ -+ if ((res = elan4_generate_route (pos, &route, low.loc_context, low.loc_node, high.loc_node, user_bcast_route_options)) < 0) -+ return (res); -+ -+ PRINTF (uctx, DBG_VP, "user_loadroute_bcast: virtual process %d -> nodes %d.%d context %d [%016llx.%016llx]\n", -+ seg->vps_process, low.loc_node, high.loc_node, low.loc_context, route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process, &route); -+ return (0); -+} -+ -+int -+user_add_p2pvp (USER_CTXT *uctx, unsigned process, ELAN_CAPABILITY *cap) -+{ -+ USER_VPSEG *seg; -+ ELAN_CAPABILITY *ncap; -+ unsigned entries; -+ -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) == 0) -+ entries = bt_nbits (cap->cap_bitmap , ELAN_CAP_BITMAPSIZE(cap)); -+ else -+ entries = ELAN_CAP_BITMAPSIZE(cap); -+ -+ if ((process + entries) > (E4_VPT_MIN_ENTRIES << uctx->uctx_routetable->tbl_size)) -+ return (-EINVAL); -+ -+ KMEM_ALLOC (ncap, ELAN_CAPABILITY *, ELAN_CAP_SIZE (cap), 1); -+ -+ if (ncap == NULL) -+ return (-ENOMEM); -+ -+ bcopy (cap, ncap, ELAN_CAP_SIZE (cap)); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_install_vpseg (uctx, process, entries)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ seg->vps_type = USER_VPSEG_P2P; -+ seg->vps_p2p_cap = ncap; -+ seg->vps_p2p_routes = NULL; -+ -+ user_loadroute_vpseg (uctx, seg, &uctx->uctx_position); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_add_bcastvp (USER_CTXT *uctx, unsigned process, unsigned lowvp, unsigned highvp) -+{ -+ USER_VPSEG *seg; -+ int res; -+ -+ if (lowvp > highvp || process >= (E4_VPT_MIN_ENTRIES << uctx->uctx_routetable->tbl_size)) -+ return (-EINVAL); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_install_vpseg (uctx, process, 1)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ seg->vps_type = USER_VPSEG_BCAST; -+ seg->vps_bcast_lowvp = lowvp; -+ seg->vps_bcast_highvp = highvp; -+ -+ if ((res = user_loadroute_bcast (uctx, seg)) < 0) -+ user_remove_vpseg (uctx, seg); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (res); -+} -+ -+int -+user_removevp (USER_CTXT *uctx, unsigned process) -+{ -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if (process == ELAN_INVALID_PROCESS) -+ seg = list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link); -+ else -+ seg = user_find_vpseg (uctx, process, process); -+ -+ if (seg == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ do { -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int i; -+ -+ for (i = 0; i < seg->vps_entries; i++) -+ elan4_invalidate_route (dev, uctx->uctx_routetable, seg->vps_process + i); -+ -+ user_remove_vpseg (uctx, seg); -+ -+ } while (process == ELAN_INVALID_PROCESS && (seg = list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link)) != NULL); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_set_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ /* check that the route supplied is valid and goes to the correct place */ -+ location = user_process2location (uctx, seg, process); -+ -+ if (elan4_check_route (&uctx->uctx_position, location, route, 0) != 0) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ if (seg->vps_p2p_routes == NULL) -+ KMEM_ZALLOC (seg->vps_p2p_routes, E4_VirtualProcessEntry *, sizeof (E4_VirtualProcessEntry) * seg->vps_entries, 1); -+ -+ if (seg->vps_p2p_routes == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-ENOMEM); -+ } -+ -+ seg->vps_p2p_routes[process - seg->vps_process].Values[0] = route->Values[0]; -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1] = ROUTE_CTXT_VALUE(location.loc_context) | (route->Values[1] & ~ROUTE_CTXT_MASK); -+ -+ PRINTF (uctx, DBG_ROUTE, "user_set_route: vp=%d -> %016llx%016llx\n", process, -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1], seg->vps_p2p_routes[process - seg->vps_process].Values[0]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, process, &seg->vps_p2p_routes[process - seg->vps_process]); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_reset_route (USER_CTXT *uctx, unsigned process) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ ELAN_LOCATION location; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ if (seg->vps_p2p_routes != NULL) -+ { -+ seg->vps_p2p_routes[process - seg->vps_process].Values[0] = 0; -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1] = 0; -+ } -+ -+ /* generate the default route to this location */ -+ location = user_process2location (uctx, seg, process); -+ -+ PRINTF (uctx, DBG_ROUTE, "user_reset_route: vp=%d\n", process); -+ -+ if (elan4_generate_route (&uctx->uctx_position, &route, location.loc_context, location.loc_node, location.loc_node, 0) < 0) -+ elan4_invalidate_route (dev, uctx->uctx_routetable, process); -+ else -+ elan4_write_route (dev, uctx->uctx_routetable, process, &route); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_get_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ elan4_read_route (dev, uctx->uctx_routetable, process, route); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (0); -+} -+ -+int -+user_check_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route, unsigned *error) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ elan4_read_route (dev, uctx->uctx_routetable, process, route); -+ -+ *error = elan4_check_route (&uctx->uctx_position, user_process2location (uctx, seg, process), route, 0); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (0); -+} -+ -+int -+user_send_neterr_msg (USER_CTXT *uctx, unsigned int vp, unsigned int nctx, unsigned int retries, ELAN4_NETERR_MSG *msg) -+{ -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ unsigned long flags; -+ int res, found = 0; -+ struct list_head *el; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ /* determine the location of the virtual process */ -+ if ((seg = user_find_vpseg (uctx, vp, vp)) == NULL) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: vp=%d has no vpseg\n", vp); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return -EINVAL; -+ } -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+ location = user_process2location (uctx, seg, vp); -+ break; -+ -+ case USER_VPSEG_BCAST: -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: vp=%d is a bcast vp\n", vp); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return -EINVAL; -+ } -+ -+ /* check that we're attached to the network context */ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_for_each (el , &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (el, USER_CTXT_ENTRY, cent_link); -+ -+ if (cent->cent_cap->cap_mycontext == nctx) -+ found++; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if (! found) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: nctx=%d not attached\n", nctx); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return -EINVAL; -+ } -+ -+ /* Update the fields which the user might have "faked" */ -+ msg->msg_context = location.loc_context; -+ msg->msg_sender.loc_node = uctx->uctx_position.pos_nodeid; -+ msg->msg_sender.loc_context = nctx; -+ -+ res = elan4_neterr_sendmsg (uctx->uctx_ctxt.ctxt_dev, location.loc_node, retries, msg); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (res); -+} -+ -+ -+static int -+user_resolvevp (USER_CTXT *uctx, unsigned process) -+{ -+ int res = 0; -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ E4_VirtualProcessEntry route; -+ -+ PRINTF1 (uctx, DBG_VP, "user_resolvevp: process=%d\n", process); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+#ifdef use_elanmod -+ if ((res = user_validate_cap (uctx, seg->vps_p2p_cap, ELAN_USER_P2P)) != 0) -+ break; -+#endif -+ -+ location = user_process2location (uctx, seg, process); -+ -+ PRINTF (uctx, DBG_VP, "user_resolvevp: vp=%d -> node=%d ctx=%d\n", process, location.loc_node, location.loc_context); -+ -+ if (seg->vps_p2p_routes != NULL && seg->vps_p2p_routes[process - seg->vps_process].Values[0] != 0) -+ route = seg->vps_p2p_routes[process - seg->vps_process]; -+ else if ((res = elan4_generate_route (&uctx->uctx_position, &route, location.loc_context, location.loc_node, location.loc_node, user_p2p_route_options)) < 0) -+ break;; -+ -+ elan4_write_route (uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, process, &route); -+ break; -+ -+ case USER_VPSEG_BCAST: -+ res = user_loadroute_bcast (uctx, seg); -+ break; -+ -+ default: -+ res = -EINVAL; -+ break; -+ } -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (res); -+} -+ -+static void -+user_eproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_eprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_EPROC, "user_eproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ elan4_extract_eproc_trap (ctxt->ctxt_dev, status, RING_QUEUE_BACK (uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps), 0); -+ -+ DBGCMD (ctxt, DBG_EPROC, elan4_display_eproc_trap (ctxt, DBG_EPROC, "user_eproc_trap", RING_QUEUE_BACK(uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps))); -+ -+ if (RING_QUEUE_ADD (uctx->uctx_eprocTrapQ)) -+ user_start_stopping (uctx, UCTX_EPROC_QUEUE_FULL); -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ USER_CQ *ucq = NULL; -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2num(ucq->ucq_cq) == cqnum) -+ break; -+ } -+ -+ ASSERT (ucq != NULL); -+ -+ if (ucq->ucq_state != UCQ_RUNNING && CPROC_TrapType (status) == CommandProcInserterError) -+ { -+ PRINTF (ctxt, DBG_TRAP, "user_cproc_trap CommandProcInserterError\n"); -+ ucq->ucq_errored++; -+ } -+ else -+ { -+ int vp; -+ -+ ASSERT (ucq->ucq_state == UCQ_RUNNING); -+ -+ elan4_extract_cproc_trap (ctxt->ctxt_dev, status, &ucq->ucq_trap, cqnum); -+ -+ DBGCMD (ctxt, DBG_CPROC, elan4_display_cproc_trap (ctxt, DBG_CPROC, "user_cproc_trap", &ucq->ucq_trap)); -+ -+ ucq->ucq_state = UCQ_TRAPPED; -+ -+ if ((vp = cproc_open_extract_vp(uctx->uctx_ctxt.ctxt_dev, ucq->ucq_cq)) != -1) -+ { -+ E4_VirtualProcessEntry route; -+ -+ elan4_read_route(uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, vp, &route); -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_cproc_timeout_routes, &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_dprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_DPROC, "user_dproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_DPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ ELAN4_DPROC_TRAP *trap = RING_QUEUE_BACK (uctx->uctx_dprocTrapQ, uctx->uctx_dprocTraps); -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, trap, unit); -+ -+ if ((DPROC_PacketTimeout (trap->tr_status)) || (DPROC_TrapType (trap->tr_status) == DmaProcFailCountError)) -+ { -+ E4_VirtualProcessEntry route; -+ -+ elan4_read_route(ctxt->ctxt_dev, uctx->uctx_routetable, trap->tr_desc.dma_vproc, &route); -+ -+ if ((route.Values[0] != 0) || (route.Values[1] != 0)) -+ { -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_dproc_timeout_routes, &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ } -+ else if (DPROC_TrapType (trap->tr_status) == DmaProcPacketAckError) -+ { -+ E4_VirtualProcessEntry route; -+ -+ elan4_read_route(ctxt->ctxt_dev, uctx->uctx_routetable, trap->tr_desc.dma_vproc, &route); -+ -+ if ((route.Values[0] != 0) || (route.Values[1] != 0)) -+ { -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_ack_error_routes, &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ } -+ else -+ -+ DBGCMD (ctxt, DBG_DPROC, elan4_display_dproc_trap (ctxt, DBG_DPROC, "user_dproc_trap", trap)); -+ -+ if (!DPROC_PrefetcherFault (status) && DPROC_TrapType(status) == DmaProcFailCountError && !RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ { -+ trap->tr_desc.dma_typeSize |= DMA_FailCount (user_dproc_retry_count); -+ -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = trap->tr_desc; -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ } -+ else -+ { -+ if (RING_QUEUE_ADD (uctx->uctx_dprocTrapQ)) -+ user_start_stopping (uctx, UCTX_DPROC_QUEUE_FULL); -+ } -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_tprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_TPROC, "user_tproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_TPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, RING_QUEUE_BACK (uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps)); -+ -+ DBGCMD (ctxt, DBG_TPROC, elan4_display_tproc_trap (ctxt, DBG_TPROC, "user_tproc_trap", RING_QUEUE_BACK (uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps))); -+ -+ if (RING_QUEUE_ADD (uctx->uctx_tprocTrapQ)) -+ user_start_stopping (uctx, UCTX_TPROC_QUEUE_FULL); -+ } -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ USER_IPROC_TRAP *utrap = &uctx->uctx_iprocTrap[unit & 1]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ ASSERT (utrap->ut_state == UTS_IPROC_RUNNING); -+ -+ elan4_extract_iproc_trap (ctxt->ctxt_dev, status, &utrap->ut_trap, unit); -+ DBGCMD (ctxt, DBG_IPROC, elan4_display_iproc_trap (ctxt, DBG_IPROC, "user_iproc_trap", &utrap->ut_trap)); -+ -+ utrap->ut_state = UTS_IPROC_TRAPPED; -+ -+ user_start_nacking (uctx, unit ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF1 (uctx, DBG_TRAP, "user_interrupt: cookie=%llx\n", cookie); -+ -+ switch (cookie) -+ { -+ case ELAN4_INT_COOKIE_DDCQ: -+ uctx->uctx_ddcq_intr--; -+ -+ user_signal_trap (uctx); -+ break; -+ -+ default: -+ if (uctx->uctx_intcookie_table == NULL || intcookie_fire (uctx->uctx_intcookie_table, cookie) != 0) -+ { -+ PRINTF2 (uctx, DBG_TRAP, "user_interrupt: cookie=%llx %s\n", cookie, uctx->uctx_intcookie_table ? "not found" : "no table"); -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_ERROR; -+ user_signal_trap (uctx); -+ } -+ break; -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_neterrmsg (ELAN4_CTXT *ctxt, ELAN4_NETERR_MSG *msg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (! RING_QUEUE_FULL (uctx->uctx_msgQ)) -+ { -+ memcpy (RING_QUEUE_BACK (uctx->uctx_msgQ, uctx->uctx_msgs), msg, sizeof (ELAN4_NETERR_MSG)); -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_msgQ); -+ -+ user_signal_trap (uctx); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+ELAN4_TRAP_OPS user_trap_ops = -+{ -+ user_eproc_trap, -+ user_cproc_trap, -+ user_dproc_trap, -+ user_tproc_trap, -+ user_iproc_trap, -+ user_interrupt, -+ user_neterrmsg, -+}; -+ -+static int -+deliver_trap (ELAN4_USER_TRAP *utrapp, int type, unsigned proc, void *trap, ...) -+{ -+ register int i, len; -+ va_list ap; -+ -+ PRINTF (NULL, DBG_TRAP, "deliver_trap: type=%d proc=%d\n", type, proc); -+ -+ switch (proc) -+ { -+ case UTS_CPROC: len = sizeof (ELAN4_CPROC_TRAP); break; -+ case UTS_DPROC: len = sizeof (ELAN4_DPROC_TRAP); break; -+ case UTS_EPROC: len = sizeof (ELAN4_EPROC_TRAP); break; -+ case UTS_IPROC: len = sizeof (ELAN4_IPROC_TRAP); break; -+ case UTS_TPROC: len = sizeof (ELAN4_TPROC_TRAP); break; -+ case UTS_NETERR_MSG: len = sizeof (ELAN4_NETERR_MSG); break; -+ default: len = 0; break; -+ } -+ -+ if (put_user (type, &utrapp->ut_type) || put_user (proc, &utrapp->ut_proc) || copy_to_user (&utrapp->ut_trap, trap, len)) -+ return (UTS_EFAULT); -+ -+ va_start (ap, trap); -+ for (i = 0; i < sizeof (utrapp->ut_args)/sizeof (utrapp->ut_args[0]); i++) -+ if (put_user (va_arg (ap, unsigned long), &utrapp->ut_args[i])) -+ return (UTS_EFAULT); -+ va_end (ap); -+ -+ return (type); -+} -+ -+static int -+user_pagefault (USER_CTXT *uctx, E4_FaultSave *farea) -+{ -+ E4_Addr addr = farea->FaultAddress; -+ E4_uint32 fsr = FaultSaveFSR(farea->FSRAndFaultContext); -+ FAULT_SAVE *entry; -+ FAULT_SAVE **predp; -+ int count; -+ -+ PRINTF2 (uctx, DBG_FAULT, "user_pagefault: addr=%llx fsr %x\n", (unsigned long long) addr, fsr); -+ -+ if ((fsr & FSR_FaultForBadData) != 0) /* Memory ECC error during walk */ -+ { -+ PRINTF0 (uctx, DBG_FAULT, "user_pagefault: ECC error during walk\n"); -+ return (-EFAULT); -+ } -+ -+ if ((fsr & FSR_FaultForMaxChainCount) != 0) /* Have walked a chain of 1024 items */ -+ { -+ PRINTF0 (uctx, DBG_FAULT, "user_pagefault: pte chain too long\n"); -+ return (-EFAULT); -+ } -+ -+ if (uctx->uctx_num_fault_save) -+ { -+ spin_lock (&uctx->uctx_fault_lock); -+ for( predp = &uctx->uctx_fault_list; (entry = *predp)->next != NULL; predp = &entry->next) -+ { -+ if (entry->addr == (addr & ~((E4_Addr) PAGE_SIZE-1))) -+ break; -+ } -+ -+ *predp = entry->next; -+ entry->next = uctx->uctx_fault_list; -+ uctx->uctx_fault_list = entry; -+ -+ if (entry->addr == (addr & ~((E4_Addr) PAGE_SIZE-1))) -+ { -+ if ((entry->count <<= 1) > max_fault_pages) -+ entry->count = max_fault_pages; -+ } -+ else -+ entry->count = min_fault_pages; -+ -+ entry->addr = (addr & ~((E4_Addr) PAGE_SIZE-1))+(entry->count * PAGE_SIZE); -+ count = entry->count; -+ spin_unlock (&uctx->uctx_fault_lock); -+ -+ if (user_load_range (uctx, addr & ~((E4_Addr) PAGE_SIZE-1), count * PAGESIZE, fsr) == 0) -+ return 0; -+ -+ /* else pre-faulting has failed, try just this page */ -+ } -+ -+ return (user_load_range (uctx, addr & ~((E4_Addr) PAGE_SIZE-1), PAGE_SIZE, fsr)); -+ -+} -+ -+static int -+queue_dma_for_retry (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, E4_DMA *dma) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, UCTX_DPROC_QUEUE_OVERFLOW)); -+ } -+ -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = *dma; -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+queue_thread_for_retry (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, E4_ThreadRegs *regs) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_FULL (uctx->uctx_threadQ)) -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, UCTX_TPROC_QUEUE_OVERFLOW)); -+ } -+ -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = *regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+fixup_eproc_trap (USER_CTXT *uctx, ELAN4_EPROC_TRAP *trap, int waitevent) -+{ -+ E4_FaultSave *farea = &trap->tr_faultarea; -+ E4_uint32 fsr = FaultSaveFSR(farea->FSRAndFaultContext); -+ E4_uint64 CountAndType; -+ E4_uint64 CopySource; -+ E4_uint64 CopyDest; -+ -+ /* -+ * Event processor can trap as follows : -+ * 1) Event location read (faddr == event location & Event Permission) -+ * 2) Event location write (faddr == event location & Event Permission) -+ * 3) Copy Source read Read Access -+ * 4) Copy/Write dest write other -+ * -+ * NOTE - it is possible to see both 3) and 4) together - but only with physical errors. -+ */ -+ if (AT_Perm(fsr) == AT_PermLocalDataRead || AT_Perm(fsr) == AT_PermLocalDataWrite) -+ { -+ /* -+ * We complete the copy/write by issuing a waitevent 0 of the approriate type. -+ * - NB mask off bottom bits of EventAddr in case of partial setevent -+ */ -+ E4_uint64 EventAddr = trap->tr_eventaddr & ~((E4_uint64) E4_EVENT_ALIGN-1); -+ -+ if (! user_ddcq_check (uctx, 4)) -+ return (0); -+ -+ if ((trap->tr_event.ev_CountAndType & E4_EVENT_COPY_TYPE_MASK) == E4_EVENT_WRITE) -+ { -+ /* case 4) faulted on write word to destination */ -+ -+ CountAndType = trap->tr_event.ev_CountAndType & E4_EVENT_TYPE_MASK; -+ -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: write Event=%llx CountAndType=%llx\n", EventAddr, CountAndType); -+ PRINTF (uctx, DBG_TRAP, " WritePtr=%llx WriteValue=%llx\n", -+ trap->tr_event.ev_WritePtr, trap->tr_event.ev_WriteValue); -+ -+ user_ddcq_waitevent (uctx, EventAddr, CountAndType, trap->tr_event.ev_WritePtr, trap->tr_event.ev_WriteValue); -+ } -+ else -+ { -+ /* case 3) or case 4) faulted on read/write of copy */ -+ if (AT_Perm (fsr) == AT_PermLocalDataRead) -+ { -+ CountAndType = (trap->tr_event.ev_CountAndType & E4_EVENT_DATA_TYPE_MASK) | EPROC_CopySize(trap->tr_status); -+ CopySource = trap->tr_event.ev_CopySource - EVENT_COPY_BLOCK_SIZE; -+ CopyDest = trap->tr_event.ev_CopyDest; -+ } -+ else -+ { -+ CountAndType = ((trap->tr_event.ev_CountAndType & E4_EVENT_DATA_TYPE_MASK) | -+ ((EPROC_CopySize(trap->tr_status) + EVENT_COPY_NDWORDS) & E4_EVENT_COPY_SIZE_MASK)); -+ CopySource = trap->tr_event.ev_CopySource - EVENT_COPY_BLOCK_SIZE; -+ CopyDest = trap->tr_event.ev_CopyDest - EVENT_COPY_BLOCK_SIZE; -+ } -+ -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: copy Event=%llx CountAndType=%llx\n", EventAddr, CountAndType); -+ PRINTF (uctx, DBG_TRAP, " CopySource=%llx CopyDest=%llx\n", CopySource, CopyDest); -+ -+ user_ddcq_waitevent (uctx, EventAddr, CountAndType, CopySource, CopyDest); -+ } -+ } -+ else -+ { -+ E4_uint64 EventAddr = trap->tr_eventaddr & ~((E4_uint64) E4_EVENT_ALIGN-1); -+ E4_uint32 EventCount = trap->tr_eventaddr & (E4_EVENT_ALIGN-1); -+ -+ /* case 1) or 2) - just reissue the event */ -+ if (! waitevent) -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: setevent EventAddr=%llx EventCount=%x\n", EventAddr, EventCount); -+ else -+ { -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: waitevent Event=%llx CountAndType=%llx\n", EventAddr, trap->tr_event.ev_CountAndType); -+ PRINTF (uctx, DBG_TRAP, " Param[0]=%llx Param[1]=%llx\n", -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ } -+ -+ if (! user_ddcq_check (uctx, waitevent ? 4 : 2)) -+ return (0); -+ -+ if (waitevent) -+ user_ddcq_waitevent (uctx, EventAddr, trap->tr_event.ev_CountAndType, -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ else -+ user_ddcq_seteventn (uctx, EventAddr, EventCount); -+ } -+ -+ return (1); -+} -+ -+ -+static int -+resolve_eproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_EPROC_TRAP *trap) -+{ -+ switch (EPROC_TrapType (trap->tr_status)) -+ { -+ case EventProcNoFault: -+ PRINTF (uctx, DBG_TRAP, "resolve_eproc_trap: EventProcNoFault\n"); -+ -+ return (UTS_FINISHED); -+ -+ case EventProcAddressAlignment: -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_EPROC, trap)); -+ -+ case EventProcMemoryFault: -+ PRINTF (uctx, DBG_TRAP, "resolve_eproc_trap: EventProcMemoryFault @ %llx\n", trap->tr_faultarea.FaultAddress); -+ -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_EPROC, trap)); -+ -+ return (UTS_FINISHED); -+ -+ case EventProcCountWrapError: -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_EPROC, trap)); -+ -+ default: -+ printk ("resolve_eproc_trap: bad trap type %d\n", EPROC_TrapType (trap->tr_status)); -+ BUG(); -+ } -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_cproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, USER_CQ *ucq) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN4_CPROC_TRAP *trap = &ucq->ucq_trap; -+ E4_uint64 command; -+ int res; -+ -+ PRINTF2 (uctx, DBG_CPROC, "resolve_cproc_trap: cq %p is trapped - Status %lx\n", ucq, trap->tr_status); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcDmaQueueOverflow: -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcDmaQueueOverflow\n"); -+ /* -+ * XXXX: should wait for the queue to become emptier if we're -+ * responsible for it being very full -+ */ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcInterruptQueueOverflow: -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcInterruptQueueOverflow\n"); -+ /* -+ * XXXX: should wait for the queue to become emptier if we're -+ * responsible for it being very full -+ */ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcWaitTrap: -+ PRINTF0 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcWaitTrap\n"); -+ -+ if ((res = resolve_eproc_trap (uctx, utrapp, &trap->tr_eventtrap)) != UTS_FINISHED) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (res); -+ } -+ -+ if (fixup_eproc_trap (uctx, &trap->tr_eventtrap, 1) == 0) -+ return UTS_RESCHEDULE; -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcMemoryFault: -+ PRINTF1 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcMemoryFault at %llx\n", trap->tr_faultarea.FaultAddress); -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ } -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcRouteFetchFault: -+ command = elan4_trapped_open_command (dev, ucq->ucq_cq); -+ -+ PRINTF1 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcRouteFetchFault to vp %d\n", (int) (command >> 32)); -+ -+ if (user_resolvevp (uctx, (unsigned) (command >> 32)) != 0) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_VPROC, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq), (long) (command >> 32))); -+ } -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcFailCountZero: -+ PRINTF0 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcFailCountZero - reset failcount\n"); -+ -+ /* NOTE - we must not modify the ChannelNotCompleted bits - so modify */ -+ /* the restart count with a part-word store */ -+ elan4_updatecq (dev, ucq->ucq_cq, ucq->ucq_cq->cq_perm, user_cproc_retry_count); -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcAddressAlignment: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ case CommandProcPermissionTrap: -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(ucq->ucq_cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 control = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcPermissionTrap - %s\n", -+ (control & CQ_PermissionMask) != ucq->ucq_cq->cq_perm ? "resume from stop" : "permission denied"); -+ -+ if ((control & CQ_PermissionMask) == ucq->ucq_cq->cq_perm) -+ return (deliver_trap (utrapp, UTS_PERMISSION_DENIED, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ elan4_updatecq (dev, ucq->ucq_cq, ucq->ucq_cq->cq_perm, 0); -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ } -+ -+ case CommandProcBadData: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_COMMAND, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ default: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ } -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_dproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_DPROC_TRAP *trap) -+{ -+ ELAN_LOCATION location; -+ int node; -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ { -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: PrefetcherFault at %llx\n", trap->tr_prefetchFault.FaultAddress); -+ -+ if (user_pagefault (uctx, &trap->tr_prefetchFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_DPROC, trap)); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc)); -+ } -+ -+ switch (DPROC_TrapType (trap->tr_status)) -+ { -+ case DmaProcRouteFetchFault: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcRouteFetchFault vp %d\n", trap->tr_desc.dma_vproc); -+ -+ if (user_resolvevp (uctx, trap->tr_desc.dma_vproc) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_VPROC, UTS_DPROC, trap, trap->tr_desc.dma_vproc)); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* immediate */)); -+ -+ case DmaProcFailCountError: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcFailCountError - vp %d cookie %llx\n", -+ trap->tr_desc.dma_vproc, trap->tr_desc.dma_cookie); -+ -+ trap->tr_desc.dma_typeSize |= DMA_FailCount (user_dproc_retry_count); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ location = user_process2location(uctx, NULL, trap->tr_desc.dma_vproc); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ node = location.loc_node; -+ -+ if ((node >= 0) && (node <= uctx->uctx_ctxt.ctxt_dev->dev_position.pos_nodes)) -+ { -+ kmutex_lock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ uctx->uctx_ctxt.ctxt_dev->dev_dproc_timeout[node]++; -+ kmutex_unlock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ } -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcPacketAckError: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcPacketAckError - %d%s\n", DPROC_PacketAckValue (trap->tr_status), -+ DPROC_PacketTimeout (trap->tr_status) ? " timeout" : ""); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ location = user_process2location(uctx, NULL, trap->tr_desc.dma_vproc); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ node = location.loc_node; -+ -+ if ((node >= 0) && (node <= uctx->uctx_ctxt.ctxt_dev->dev_position.pos_nodes)) -+ { -+ kmutex_lock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ uctx->uctx_ctxt.ctxt_dev->dev_ack_errors[node]++; -+ kmutex_unlock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ } -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcQueueOverflow: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcQueueOverflow\n"); -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcRunQueueReadFault: -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_DPROC, trap)); -+ -+ default: -+ printk ("resolve_dproc_trap: unknown trap type : %d\n", DPROC_TrapType(trap->tr_status)); -+ BUG(); -+ } -+ return UTS_FINISHED; -+} -+ -+int -+resolve_tproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_TPROC_TRAP *trap) -+{ -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: trap state = %lx\n", trap->tr_state); -+ -+ if (trap->tr_state & TS_TrapForTooManyInstructions) -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_TPROC, trap)); -+ -+ if (trap->tr_state & TS_Unimplemented) -+ return (deliver_trap (utrapp, UTS_UNIMP_INSTR, UTS_TPROC, trap)); -+ -+ if (trap->tr_state & TS_DataAlignmentError) -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_TPROC, trap)); -+ -+ if ((trap->tr_state & TS_InstAccessException) && user_pagefault (uctx, &trap->tr_instFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_TPROC, trap)); -+ -+ if ((trap->tr_state & TS_DataAccessException) && user_pagefault (uctx, &trap->tr_dataFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_TPROC, trap)); -+ -+ /* If we're restarting from trap - then just need to re-issue it */ -+ if (trap->tr_pc == uctx->uctx_trestart_addr || (trap->tr_state & TS_TrappedFlag)) -+ { -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: trapped in trap code PC=%llx SP=%llx\n", trap->tr_pc, trap->tr_regs[1]); -+ -+ trap->tr_regs[0] = uctx->uctx_trestart_addr; -+ } -+ else -+ { -+ E4_uint64 *sp = (E4_uint64 *) user_elan2main (uctx, trap->tr_regs[1]); -+ int i, reload; -+ -+ /* need to store the register on the stack see */ -+ /* lib_tproc_trampoline_elan4_thread.S for stack layout */ -+#define TS_STACK_OFF(REG) ((((REG)&7)) - (((REG)>>3)*8) - 8) -+ for (reload = 0, i = 0; i < 64; i++) -+ { -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ { -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: %%r%d [%016llx] -> %p\n", i, trap->tr_regs[i], &sp[TS_STACK_OFF(i)]); -+ -+ sulonglong ((u64 *) &sp[TS_STACK_OFF(i)], trap->tr_regs[i]); -+ -+ reload |= (1 << (i >> 3)); -+ } -+ } -+#undef TS_STACK_OFF -+ -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: pc %llx npc %llx\n", trap->tr_pc, trap->tr_npc); -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: CC %x reload %x\n", (int) (trap->tr_state >> TS_XCCshift), reload); -+ -+ trap->tr_regs[0] = uctx->uctx_trestart_addr; -+ trap->tr_regs[2] = trap->tr_pc; -+ trap->tr_regs[3] = trap->tr_npc; -+ trap->tr_regs[4] = (trap->tr_state >> TS_XCCshift) & TS_XCCmask; -+ trap->tr_regs[5] = reload; -+ } -+ -+ return (queue_thread_for_retry (uctx, utrapp, (E4_ThreadRegs *) trap->tr_regs)); -+} -+ -+static int -+resolve_iproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int channel) -+{ -+ USER_IPROC_TRAP *utrap = &uctx->uctx_iprocTrap[channel]; -+ ELAN4_IPROC_TRAP *trap = &utrap->ut_trap; -+ unsigned long flags; -+ -+ elan4_inspect_iproc_trap (trap); -+ -+ if (trap->tr_flags & TR_FLAG_TOOMANY_TRANS) -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ ASSERT (trap->tr_trappedTrans >= 0 && trap->tr_trappedTrans < trap->tr_numTransactions); -+ -+ switch (IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)) -+ { -+ case InputMemoryFault: -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ { -+ utrap->ut_state = UTS_IPROC_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_IPROC, trap, channel)); -+ } -+ break; -+ -+ case InputDmaQueueOverflow: -+ case InputEventEngineTrapped: -+ /* nothing to do for these 2 - restarting will simulate the transactions */ -+ break; -+ -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ break; -+ -+ case InputCrcErrorAfterPAckOk: -+ PRINTF (DBG_DEVICE, DBG_IPROC, "InputCrcErrorAfterPAckOk: flags %x\n", trap->tr_flags); -+ -+ ASSERT ((trap->tr_flags & TR_FLAG_ACK_SENT) && ((trap->tr_flags & (TR_FLAG_DMA_PACKET|TR_FLAG_BAD_TRANS)) || -+ ((trap->tr_flags & TR_FLAG_EOP_ERROR) && trap->tr_identifyTrans == TR_TRANS_INVALID))); -+ break; -+ -+ case InputDiscardAfterAckOk: -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ case InputAddressAlignment: -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_IPROC, trap, channel)); -+ -+ case InputInvalidTransType: -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ default: -+ printk ("resolve_iproc_trap: unknown trap type %d\n", IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)); -+ BUG(); -+ /* NOTREACHED */ -+ } -+ -+ if (! (trap->tr_flags & TR_FLAG_ACK_SENT) || (trap->tr_flags & TR_FLAG_EOP_BAD)) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ } -+ else if ((trap->tr_flags & (TR_FLAG_DMA_PACKET | TR_FLAG_BAD_TRANS)) || ((trap->tr_flags & TR_FLAG_EOP_ERROR) && (trap->tr_identifyTrans == TR_TRANS_INVALID))) -+ { -+ /* -+ * TR_FLAG_DMA_PACKET means a DMA packet has faulted. -+ * -+ * TR_FLAG_BAD_TRANS means we have a transaction with a bad CRC after the transaction -+ * which sent the ack - this implies it's an overlapped ack DMA packet -+ * -+ * TR_FLAG_EOP_ERROR means we've received an EOP reset - if we hadn't seen an identify -+ * transaction then it's a DMA packet. -+ * -+ * To ensure that the DMA processor works correctly the next packet must be NACKed to -+ * cause it to resend this one. -+ */ -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: %s during DMA packet\n", -+ (trap->tr_flags & TR_FLAG_BAD_TRANS) ? "BadTransaction" : (trap->tr_flags & TR_FLAG_EOP_ERROR) ? "EopError" : "trap"); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (trap->tr_flags & TR_FLAG_DMA_PACKET) -+ { -+ if (! (trap->tr_flags & TR_FLAG_BAD_TRANS)) -+ utrap->ut_state = UTS_IPROC_EXECUTE_PACKET; -+ else -+ { -+ kcondvar_t waithere; -+ -+ /* We must ensure that the next packet is always nacked, so -+ * we wait here for an output timeout before dropping the -+ * context filter - we just pause here for 4 mS */ -+ kcondvar_init (&waithere); -+ kcondvar_timedwait (&waithere, &uctx->uctx_spinlock, &flags, lbolt + (HZ/250) + 1);; -+ kcondvar_destroy (&waithere); -+ -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ } -+ else -+ { -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ } -+ else if (trap->tr_flags & TR_FLAG_EOP_ERROR) -+ { -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: EopError with identify\n"); -+ -+ utrap->ut_state = UTS_IPROC_NETWORK_ERROR; -+ } -+ else -+ { -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: execute packet\n"); -+ -+ utrap->ut_state = UTS_IPROC_EXECUTE_PACKET; -+ } -+ -+ return UTS_FINISHED; -+} -+ -+ -+static int -+resolve_cproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ struct list_head *entry; -+ int res = UTS_FINISHED; -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_TRAPPED) -+ { -+ res = resolve_cproc_trap (uctx, utrapp, ucq); -+ -+ if (res != UTS_FINISHED) -+ break; -+ } -+ -+ if (ucq->ucq_errored) -+ { -+ ucq->ucq_errored = 0; -+ res = deliver_trap (utrapp, UTS_CPROC_ERROR, UTS_CPROC, &ucq->ucq_trap, elan4_cq2idx(ucq->ucq_cq)); -+ break; -+ } -+ } -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ return (res); -+} -+ -+static int -+resolve_eproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_eprocTrapQ)) -+ { -+ ELAN4_EPROC_TRAP trap = *RING_QUEUE_FRONT (uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_eprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_eproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ if (fixup_eproc_trap (uctx, &trap, 0) == 0) -+ { -+ PRINTF (uctx, DBG_EPROC, "resolve_eproc_trap: could not fixup eproc trap - requeue it\n"); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_REALLY_FULL(uctx->uctx_eprocTrapQ)) -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_OVERFLOW; -+ else -+ { -+ *RING_QUEUE_FRONT(uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps) = trap; -+ -+ (void) RING_QUEUE_ADD_FRONT(uctx->uctx_eprocTrapQ); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return UTS_RESCHEDULE; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_EPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_EPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_dproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_dprocTrapQ)) -+ { -+ ELAN4_DPROC_TRAP trap = *RING_QUEUE_FRONT(uctx->uctx_dprocTrapQ, uctx->uctx_dprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_dprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_dproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_DPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_DPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_tproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_tprocTrapQ)) -+ { -+ ELAN4_TPROC_TRAP trap = *RING_QUEUE_FRONT(uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_tprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_tproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_TPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_TPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_iproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int i, res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ for (i = 0; i < 2; i++) -+ if (uctx->uctx_iprocTrap[i].ut_state == UTS_IPROC_TRAPPED) -+ { -+ uctx->uctx_iprocTrap[i].ut_state = UTS_IPROC_RESOLVING; -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_iproc_trap (uctx, utrapp, i)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_all_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ int res; -+ -+ if ((res = resolve_iproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_cproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_eproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_dproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_tproc_traps (uctx, utrapp)) != UTS_FINISHED) -+ return (res); -+ -+ if (uctx->uctx_status & UCTX_OVERFLOW_REASONS) -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, uctx->uctx_status)); -+ -+ if (uctx->uctx_status & UCTX_ERROR_REASONS) -+ return (deliver_trap (utrapp, UTS_QUEUE_ERROR, UTS_NOPROC, NULL, uctx->uctx_status)); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+execute_iproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ for (i = 0; i < 2; i++) -+ switch (uctx->uctx_iprocTrap[i].ut_state) -+ { -+ case UTS_IPROC_EXECUTE_PACKET: -+ uctx->uctx_iprocTrap[i].ut_state = UTS_IPROC_EXECUTING; -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_EXECUTE_PACKET, UTS_IPROC, &uctx->uctx_iprocTrap[i].ut_trap, i)); -+ -+ case UTS_IPROC_NETWORK_ERROR: -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_NETWORK_ERROR_TRAP, UTS_IPROC, &uctx->uctx_iprocTrap[i].ut_trap, i)); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+progress_neterr (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (! RING_QUEUE_EMPTY (uctx->uctx_msgQ)) -+ { -+ ELAN4_NETERR_MSG msg = *RING_QUEUE_FRONT (uctx->uctx_msgQ, uctx->uctx_msgs); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_msgQ); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return deliver_trap (utrapp, UTS_NETWORK_ERROR_MSG, UTS_NETERR_MSG, &msg, user_location2process (uctx, msg.msg_sender)); -+ } -+ -+ if (uctx->uctx_status & UCTX_NETERR_TIMER) -+ { -+ uctx->uctx_status &= ~UCTX_NETERR_TIMER; -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return deliver_trap (utrapp, UTS_NETWORK_ERROR_TIMER, UTS_NOPROC, NULL); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static void -+restart_command_queues (USER_CTXT *uctx) -+{ -+ struct list_head *entry; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_NEEDS_RESTART) -+ { -+ ucq->ucq_state = UCQ_RUNNING; -+ -+ elan4_restartcq (uctx->uctx_ctxt.ctxt_dev, ucq->ucq_cq); -+ } -+ } -+} -+ -+static int -+restart_dmas (USER_CTXT *uctx) -+{ -+ PRINTF (uctx, DBG_TRAP, "restart_dmas: back=%d front=%d\n", uctx->uctx_dmaQ.q_back, uctx->uctx_dmaQ.q_front); -+ -+ while (! RING_QUEUE_EMPTY (uctx->uctx_dmaQ)) -+ { -+ if (! user_ddcq_check (uctx, 7)) -+ return (0); -+ -+ user_ddcq_run_dma (uctx, RING_QUEUE_FRONT(uctx->uctx_dmaQ, uctx->uctx_dmas)); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_dmaQ); -+ } -+ -+ return (1); -+} -+ -+static int -+restart_threads (USER_CTXT *uctx) -+{ -+ PRINTF (uctx, DBG_TRAP, "restart_threads: back=%d front=%d\n", uctx->uctx_threadQ.q_back, uctx->uctx_threadQ.q_front); -+ -+ while (! RING_QUEUE_EMPTY (uctx->uctx_threadQ)) -+ { -+ if (! user_ddcq_check (uctx, 7)) -+ return (0); -+ -+ user_ddcq_run_thread (uctx, RING_QUEUE_FRONT(uctx->uctx_threadQ, uctx->uctx_threads)); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_threadQ); -+ } -+ -+ return (1); -+} -+ -+int -+user_resume_eproc_trap (USER_CTXT *uctx, E4_Addr addr) -+{ -+ PRINTF2 (uctx, DBG_RESUME, "user_resume_eproc_trap: addr=%llx -> %s\n", addr, user_ddcq_check(uctx, 2) ? "success" : "EAGAIN"); -+ -+ if (! user_ddcq_check (uctx, 2)) -+ return (-EAGAIN); -+ -+ user_ddcq_setevent (uctx, addr); -+ -+ return (0); -+} -+ -+int -+user_resume_cproc_trap (USER_CTXT *uctx, unsigned indx) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_cproc_trap: indx=%d\n", indx); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2idx(ucq->ucq_cq) == indx && ucq->ucq_state == UCQ_STOPPED && !(ucq->ucq_flags & UCQ_SYSTEM)) -+ { -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (0); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (-EINVAL); -+} -+ -+int -+user_resume_dproc_trap (USER_CTXT *uctx, E4_DMA *dma) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ res = -ENOMEM; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = *dma; -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ -+ user_signal_trap (uctx); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (res); -+} -+ -+int -+user_resume_tproc_trap (USER_CTXT *uctx, E4_ThreadRegs *regs) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_FULL (uctx->uctx_threadQ)) -+ res = -ENOMEM; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = *regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ -+ user_signal_trap (uctx); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (res); -+} -+ -+int -+user_resume_iproc_trap (USER_CTXT *uctx, unsigned channel, unsigned trans, -+ E4_IprocTrapHeader *hdrp, E4_IprocTrapData *datap) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ if (channel >= 2) -+ return (-EINVAL); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_STOPPED && -+ uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_EXECUTING && -+ uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_NETWORK_ERROR) -+ res = -EINVAL; -+ else -+ { -+ ELAN4_IPROC_TRAP *trap = &uctx->uctx_iprocTrap[channel].ut_trap; -+ -+ if (trans < trap->tr_numTransactions) -+ { -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_iproc_trap: trans=%d -> execute\n", trans); -+ -+ uctx->uctx_iprocTrap[channel].ut_state = UTS_IPROC_EXECUTE_PACKET; -+ trap->tr_trappedTrans = trans; -+ trap->tr_transactions[trans] = *hdrp; -+ trap->tr_dataBuffers[trans] = *datap; -+ } -+ else -+ { -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_iproc_trap: trans=%d -> running\n", trans); -+ -+ uctx->uctx_iprocTrap[channel].ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (res); -+} -+ -+int -+__categorise_command (E4_uint64 command, int *cmdSize) -+{ -+ switch (command & 0x3) -+ { -+ case RUN_THREAD_CMD: *cmdSize = 7; break; -+ -+ default: -+ switch (command & 0x7) -+ { -+ case WRITE_DWORD_CMD: *cmdSize = 2; break; -+ case ADD_DWORD_CMD: *cmdSize = 2; break; -+ -+ default: -+ switch (command & 0xF) -+ { -+ case OPEN_STEN_PKT_CMD: -+ *cmdSize = 1; -+ return 1; -+ -+ case COPY64_CMD: *cmdSize = 2; break; -+ case GUARD_CMD: *cmdSize = 1; break; -+ case INTERRUPT_CMD: *cmdSize = 1; break; -+ case RUN_DMA_CMD: *cmdSize = 7; break; -+ -+ default: -+ switch (command & 0x1f) -+ { -+ case SEND_TRANS_CMD: -+ *cmdSize = 2 + (((command >> 16) & TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ return 2; -+ -+ case SET_EVENT_CMD: *cmdSize = 1; break; -+ case SET_EVENTN_CMD: *cmdSize = 2; break; -+ case WAIT_EVENT_CMD: *cmdSize = 4; break; -+ -+ default: -+ switch (command & 0x3f) -+ { -+ case NOP_CMD: *cmdSize = 1; break; -+ case MAKE_EXT_CLEAN_CMD: *cmdSize = 1; break; -+ default: -+ return 3; -+ } -+ break; -+ } -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+int -+__whole_command (sdramaddr_t *commandPtr, sdramaddr_t insertPtr, unsigned int cqSize, unsigned int cmdSize) -+{ -+ /* Move onto next command */ -+ while (cmdSize-- && (*commandPtr) != insertPtr) -+ *commandPtr = ((*commandPtr) & ~(cqSize-1)) | (((*commandPtr) + sizeof (E4_uint64)) & (cqSize-1)); -+ -+ return cmdSize == -1; -+} -+ -+int -+user_neterr_sten (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int found = 0; -+ struct list_head *el; -+ -+ user_swapout (uctx, UCTX_NETERR_FIXUP); -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ list_for_each (el, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (el, USER_CQ, ucq_link); -+ -+ if ((ucq->ucq_cq->cq_perm & CQ_STENEnableBit) != 0) -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(ucq->ucq_cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ sdramaddr_t insertPtr = (queuePtrs & CQ_PtrMask); -+ sdramaddr_t commandPtr = CQ_CompletedPtr (queuePtrs); -+ unsigned int cqSize = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ E4_uint64 openCommand = 0; -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && (queuePtrs & CQ_RevB_ReorderingQueue)) -+ { -+ E4_uint32 oooMask = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)); -+ -+ for (; (oooMask & 1) != 0; oooMask >>= 1) -+ insertPtr = (insertPtr & ~(cqSize-1)) | ((insertPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ } -+ -+ while (commandPtr != insertPtr) -+ { -+ E4_uint64 command = elan4_sdram_readq (dev, commandPtr); -+ sdramaddr_t identifyPtr; -+ unsigned int cmdSize; -+ -+ switch (__categorise_command (command, &cmdSize)) -+ { -+ case 0: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 1: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cq=%d OPEN %llx\n", elan4_cq2num (ucq->ucq_cq), command); -+ -+ if ((command >> 32) == vp) -+ openCommand = command; -+ -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 2: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cq=%d SENDTRANS %llx\n", elan4_cq2num (ucq->ucq_cq), command); -+ -+ if (openCommand == 0) -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ else -+ { -+ switch ((command >> 16) & (TR_OPCODE_MASK | TR_SIZE_MASK)) -+ { -+ case TR_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_REMOTEDMA & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_IDENTIFY/TR_REMOTEDMA\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_SETEVENT_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_INPUT_Q_COMMIT & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_SETEVENT_IDENTIFY/TR_INPUT_Q_COMMIT\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 2*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_ADDWORD & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_ADDWORD\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 3*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_TESTANDWRITE & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_TESTANDWRITE\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 4*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ default: -+ identifyPtr = 0; -+ } -+ -+ if (! __whole_command (&commandPtr, insertPtr, cqSize, cmdSize)) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: not whole command\n"); -+ openCommand = 0; -+ } -+ -+ else if (identifyPtr) -+ { -+ E4_uint64 tcookie = elan4_sdram_readq (dev, identifyPtr); -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cookie=%llx [%llx]\n", tcookie, cookie); -+ -+ if (tcookie == cookie) -+ { -+ unsigned int vchan = (openCommand >> 4) & 0x1f; -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cookie matches - vchan=%d\n", vchan); -+ -+ if (! waitforeop) -+ { -+ /* Alter the CQ_AckBuffer for this channel to indicate an -+ * ack was received */ -+ E4_uint64 value = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers)); -+ E4_uint64 nvalue = ((value & ~((E4_uint64)0xf << ((vchan & 0xf) << 2))) | -+ ((E4_uint64) PackOk << ((vchan & 0xf) << 2))); -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: CQ_AckBuffers %llx -> %llx\n", value, nvalue); -+ -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers), nvalue); -+ pioflush_sdram (dev); -+ } -+ -+ found++; -+ } -+ openCommand = 0; -+ } -+ -+ if ((command >> 16) & TR_LAST_AND_SEND_ACK) -+ openCommand = 0; -+ } -+ break; -+ -+ case 3: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: invalid command %llx\n", command); -+ kmutex_unlock (&uctx->uctx_cqlock); -+ return -EINVAL; -+ } -+ -+ } -+ } -+ } -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ user_swapin (uctx, UCTX_NETERR_FIXUP); -+ -+ return found; -+} -+ -+int -+user_neterr_dma (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop) -+{ -+ unsigned long flags; -+ int found = 0; -+ int idx; -+ -+ user_swapout (uctx, UCTX_NETERR_FIXUP); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ RING_QUEUE_ITERATE (uctx->uctx_dmaQ, idx) { -+ E4_DMA *dma = &uctx->uctx_dmas[idx]; -+ -+ if (dma->dma_vproc == vp && dma->dma_cookie == cookie) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_dma: dmaQ matches %s\n", waitforeop ? "waitforeop" : "remove remoteness"); -+ -+ if (! waitforeop) -+ { -+ dma->dma_dstEvent = 0; -+ dma->dma_typeSize = DMA_ShMemWrite | DMA_Context (dma->dma_typeSize); -+ } -+ found++; -+ } -+ } -+ -+ RING_QUEUE_ITERATE (uctx->uctx_dprocTrapQ, idx) { -+ ELAN4_DPROC_TRAP *trap = &uctx->uctx_dprocTraps[idx]; -+ -+ if (trap->tr_desc.dma_vproc == vp && trap->tr_desc.dma_cookie == cookie) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_dma: dmaTrapQ matches %s\n", waitforeop ? "waitforeop" : "remove remoteness"); -+ -+ if (! waitforeop) -+ { -+ trap->tr_desc.dma_dstEvent = 0; -+ trap->tr_desc.dma_typeSize = DMA_ShMemWrite | DMA_Context (trap->tr_desc.dma_typeSize); -+ } -+ found++; -+ } -+ } -+ -+ /* The device driver command queue should be empty at this point ! */ -+ if (user_ddcq_flush (uctx) == 0) -+ found = -EAGAIN; -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ /* The device driver command queue should be empty at this point ! */ -+ if (user_ddcq_flush (uctx) == 0) -+ found = -EAGAIN; -+ -+ user_swapin (uctx, UCTX_NETERR_FIXUP); -+ -+ return found; -+} -+ -+int -+user_trap_handler (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int nticks) -+{ -+ unsigned long entered = jiffies; -+ unsigned int need_reenter = 0; -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF1 (uctx, DBG_TRAP, "user_trap_handler: entered state=%d\n", uctx->uctx_trap_state); -+ -+ uctx->uctx_trap_count++; -+ -+ for (;;) -+ { -+ if (uctx->uctx_status & UCTX_SWAPPED_REASONS) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting on swapped reasons\n"); -+ -+ res = UTS_FINISHED; -+ goto no_more_to_do; -+ } -+ -+ if ((long) (jiffies - entered) > HZ) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting for reschedule\n"); -+ res = UTS_RESCHEDULE; -+ goto no_more_to_do; -+ } -+ -+ switch (uctx->uctx_trap_state) -+ { -+ case UCTX_TRAP_ACTIVE: -+ uctx->uctx_trap_state = UCTX_TRAP_SLEEPING; -+ -+ if (nticks == 0 || need_reenter || kcondvar_timedwaitsig (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags, lbolt + nticks) != CV_RET_NORMAL) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting by kcondvar_timedwaitsig\n"); -+ -+ res = UTS_FINISHED; -+ goto no_more_to_do; -+ } -+ -+ /* Have slept above, so resample entered */ -+ entered = jiffies; -+ -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+ continue; -+ -+ case UCTX_TRAP_IDLE: -+ case UCTX_TRAP_SIGNALLED: -+ uctx->uctx_trap_state = UCTX_TRAP_ACTIVE; -+ break; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: resolve traps - state=%d status=%x\n", uctx->uctx_trap_state, uctx->uctx_status); -+ -+ switch ((res = resolve_all_traps (uctx, utrapp))) -+ { -+ case UTS_FINISHED: -+ break; -+ -+ case UTS_RESCHEDULE: -+ need_reenter++; -+ break; -+ -+ default: -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (! user_ddcq_flush (uctx)) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: ddcq not flushed - re-enter\n"); -+ need_reenter++; -+ continue; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = progress_neterr (uctx, utrapp)) != UTS_FINISHED) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ if ((res = execute_iproc_traps (uctx, utrapp)) != UTS_FINISHED) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: restart items - state=%d status=%x\n", uctx->uctx_trap_state, uctx->uctx_status); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ restart_command_queues (uctx); -+ -+ if (! restart_threads (uctx) || ! restart_dmas (uctx)) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: ddcq full - re-enter\n"); -+ need_reenter++; -+ } -+ } -+ } -+ no_more_to_do: -+ uctx->uctx_trap_state = UCTX_TRAP_IDLE; -+ -+ /* -+ * Always ensure that the command queue is flushed with a flow control -+ * write, so that on the next trap we (hopefully) find it empty and so -+ * can immediately restart the context. Also if we need to be re-enter -+ * the trap handler and don't have an interrupt outstanding, then issue -+ * one now. -+ */ -+ user_ddcq_flush (uctx); -+ if (need_reenter && uctx->uctx_ddcq_intr == 0) -+ { -+ uctx->uctx_ddcq_intr++; -+ user_ddcq_intr (uctx); -+ } -+ -+ if (--uctx->uctx_trap_count == 0 && (uctx->uctx_status & UCTX_SWAPPING)) -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: finished state=%d res=%d\n", uctx->uctx_trap_state, res); -+ -+ return (res == UTS_EFAULT ? -EFAULT : 0); -+} -+ -+USER_CQ * -+user_alloccq (USER_CTXT *uctx, unsigned cqsize, unsigned perm, unsigned uflags) -+{ -+ USER_CQ *ucq; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (ucq, USER_CQ *, sizeof (USER_CQ), 1); -+ -+ if (ucq == (USER_CQ *) NULL) -+ return ERR_PTR(-ENOMEM); -+ -+ /* NOTE - do not allow the user to create high-priority queues as we only flush through the low-priority run queues */ -+ if ((ucq->ucq_cq = elan4_alloccq (&uctx->uctx_ctxt, cqsize, perm, (uflags & UCQ_REORDER) ? CQ_Reorder : 0)) == NULL) -+ { -+ KMEM_FREE (ucq, sizeof (USER_CQ)); -+ -+ PRINTF2 (uctx, DBG_CQ, "user_alloccq: failed elan4_allocq cqsize %d uflags %x\n", cqsize, uflags); -+ -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ atomic_set (&ucq->ucq_ref, 1); -+ -+ ucq->ucq_state = UCQ_RUNNING; -+ ucq->ucq_flags = uflags; -+ -+ PRINTF3 (uctx, DBG_CQ, "user_alloccq: ucq=%p idx=%d cqnum=%d\n", ucq, elan4_cq2idx (ucq->ucq_cq), elan4_cq2num(ucq->ucq_cq)); -+ -+ /* chain it onto the context */ -+ kmutex_lock (&uctx->uctx_cqlock); -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_add (&ucq->ucq_link, &uctx->uctx_cqlist); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ return (ucq); -+} -+ -+USER_CQ * -+user_findcq (USER_CTXT *uctx, unsigned idx) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2idx(ucq->ucq_cq) == idx) -+ { -+ atomic_inc (&ucq->ucq_ref); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (ucq); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (NULL); -+} -+ -+void -+user_dropcq (USER_CTXT *uctx, USER_CQ *ucq) -+{ -+ unsigned long flags; -+ -+ PRINTF2 (uctx, DBG_CQ, "user_dropcq: ucq=%p ref=%d\n", ucq, atomic_read (&ucq->ucq_ref)); -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ if (! atomic_dec_and_test (&ucq->ucq_ref)) -+ { -+ kmutex_unlock (&uctx->uctx_cqlock); -+ return; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_del (&ucq->ucq_link); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ elan4_freecq (&uctx->uctx_ctxt, ucq->ucq_cq); -+ -+ KMEM_FREE (ucq, sizeof (USER_CQ)); -+} -+ -+int -+user_alloc_trap_queues (USER_CTXT *uctx, unsigned ndproc_traps, unsigned neproc_traps, -+ unsigned ntproc_traps, unsigned nthreads, unsigned ndmas) -+{ -+ ELAN4_DPROC_TRAP *dprocs; -+ ELAN4_EPROC_TRAP *eprocs; -+ ELAN4_TPROC_TRAP *tprocs; -+ E4_DMA *dmas; -+ E4_ThreadRegs *threads; -+ ELAN4_NETERR_MSG *msgs; -+ unsigned long flags; -+ -+ int nmsgs = NETERR_MSGS; -+ -+ /* bounds check the values that have been passed in */ -+ if (ndproc_traps < 2 || ndproc_traps > 10000 || -+ ntproc_traps < 1 || ntproc_traps > 100 || -+ neproc_traps < 6 || neproc_traps > 10000 || -+ nthreads < 2 || nthreads > 10000 || -+ ndmas < 2 || ndmas > 10000) -+ return -EINVAL; -+ -+ if (uctx->uctx_dmas != NULL) -+ return -EBUSY; -+ -+ KMEM_ZALLOC (dprocs, ELAN4_DPROC_TRAP *, ndproc_traps * sizeof (ELAN4_DPROC_TRAP), 1); -+ KMEM_ZALLOC (eprocs, ELAN4_EPROC_TRAP *, neproc_traps * sizeof (ELAN4_EPROC_TRAP), 1); -+ KMEM_ZALLOC (tprocs, ELAN4_TPROC_TRAP *, ntproc_traps * sizeof (ELAN4_TPROC_TRAP), 1); -+ KMEM_ZALLOC (threads, E4_ThreadRegs *, nthreads * sizeof (E4_ThreadRegs), 1); -+ KMEM_ZALLOC (dmas, E4_DMA *, ndmas * sizeof (E4_DMA), 1); -+ KMEM_ZALLOC (msgs, ELAN4_NETERR_MSG *, nmsgs * sizeof (ELAN4_NETERR_MSG), 1); -+ -+ if (dprocs == NULL || eprocs == NULL || tprocs == NULL || dmas == NULL || threads == NULL || msgs == NULL) -+ { -+ if (dprocs != NULL) KMEM_FREE (dprocs, ndproc_traps * sizeof (ELAN4_DPROC_TRAP)); -+ if (eprocs != NULL) KMEM_FREE (eprocs, neproc_traps * sizeof (ELAN4_EPROC_TRAP)); -+ if (tprocs != NULL) KMEM_FREE (tprocs, ntproc_traps * sizeof (ELAN4_TPROC_TRAP)); -+ if (threads != NULL) KMEM_FREE (threads, nthreads * sizeof (E4_ThreadRegs)); -+ if (dmas != NULL) KMEM_FREE (dmas, ndmas * sizeof (E4_DMA)); -+ if (msgs != NULL) KMEM_FREE (msgs, nmsgs * sizeof (ELAN4_NETERR_MSG)); -+ -+ return -ENOMEM; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ uctx->uctx_dprocTraps = dprocs; -+ uctx->uctx_eprocTraps = eprocs; -+ uctx->uctx_tprocTraps = tprocs; -+ uctx->uctx_threads = threads; -+ uctx->uctx_dmas = dmas; -+ uctx->uctx_msgs = msgs; -+ -+ RING_QUEUE_INIT (uctx->uctx_dprocTrapQ, ndproc_traps, 1 /* 1 for 2nd dma */); -+ RING_QUEUE_INIT (uctx->uctx_tprocTrapQ, ntproc_traps, 0); -+ RING_QUEUE_INIT (uctx->uctx_eprocTrapQ, neproc_traps, 5 /* 1 for command, 2 for dma, 2 for inputter */); -+ RING_QUEUE_INIT (uctx->uctx_threadQ, nthreads, 1); -+ RING_QUEUE_INIT (uctx->uctx_dmaQ, ndmas, 1); -+ RING_QUEUE_INIT (uctx->uctx_msgQ, nmsgs, 0); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return 0; -+} -+ -+USER_CTXT * -+user_alloc (ELAN4_DEV *dev) -+{ -+ USER_CTXT *uctx; -+ int res; -+ int i; -+ -+ /* Allocate and initialise the context private data */ -+ KMEM_ZALLOC (uctx, USER_CTXT *, sizeof (USER_CTXT), 1); -+ -+ if (uctx == NULL) -+ return ERR_PTR(-ENOMEM); -+ -+ if (elan4_get_position (dev, &uctx->uctx_position) == ELAN_POS_UNKNOWN) -+ { -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-EAGAIN); -+ } -+ -+ if ((res = elan4_insertctxt (dev, &uctx->uctx_ctxt, &user_trap_ops)) != 0) -+ { -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(res); -+ } -+ -+ KMEM_GETPAGES (uctx->uctx_upage, ELAN4_USER_PAGE *, btopr (sizeof (ELAN4_USER_PAGE)), 1); -+ if (uctx->uctx_upage == NULL) -+ { -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ if ((uctx->uctx_trampoline = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE)) == 0) -+ { -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ if ((uctx->uctx_routetable = elan4_alloc_routetable (dev, 4 /* 512 << 4 == 8192 entries */)) == NULL) -+ { -+ elan4_sdram_free (dev, uctx->uctx_trampoline, SDRAM_PAGE_SIZE); -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ elan4_set_routetable (&uctx->uctx_ctxt, uctx->uctx_routetable); -+ -+ /* initialise the trap and swap queues to be really full */ -+ RING_QUEUE_INIT (uctx->uctx_dprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_tprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_eprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_threadQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_dmaQ, 0, 1); -+ -+ INIT_LIST_HEAD (&uctx->uctx_cent_list); -+ INIT_LIST_HEAD (&uctx->uctx_vpseg_list); -+ INIT_LIST_HEAD (&uctx->uctx_cqlist); -+ -+ uctx->uctx_haltop.op_function = user_flush; -+ uctx->uctx_haltop.op_arg = uctx; -+ uctx->uctx_haltop.op_mask = INT_Halted|INT_Discarding; -+ -+ uctx->uctx_dma_flushop.op_function = user_flush_dmas; -+ uctx->uctx_dma_flushop.op_arg = uctx; -+ -+ kmutex_init (&uctx->uctx_vpseg_lock); -+ kmutex_init (&uctx->uctx_cqlock); -+ kmutex_init (&uctx->uctx_rgnmutex); -+ -+ spin_lock_init (&uctx->uctx_spinlock); -+ spin_lock_init (&uctx->uctx_rgnlock); -+ spin_lock_init (&uctx->uctx_fault_lock); -+ -+ kcondvar_init (&uctx->uctx_wait); -+ -+ if ((uctx->uctx_ddcq = user_alloccq (uctx, CQ_Size1K, CQ_EnableAllBits, UCQ_SYSTEM)) == NULL) -+ { -+ user_free (uctx); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ uctx->uctx_trap_count = 0; -+ uctx->uctx_trap_state = UCTX_TRAP_IDLE; -+ uctx->uctx_status = 0 /* UCTX_DETACHED | UCTX_SWAPPED | UCTX_STOPPED */; -+ -+ init_timer (&uctx->uctx_int_timer); -+ -+ uctx->uctx_int_timer.function = user_signal_timer; -+ uctx->uctx_int_timer.data = (unsigned long) uctx; -+ uctx->uctx_int_start = jiffies; -+ uctx->uctx_int_count = 0; -+ uctx->uctx_int_delay = 0; -+ -+ init_timer (&uctx->uctx_neterr_timer); -+ uctx->uctx_neterr_timer.function = user_neterr_timer; -+ uctx->uctx_neterr_timer.data = (unsigned long) uctx; -+ -+ uctx->uctx_upage->upage_ddcq_completed = 0; -+ uctx->uctx_ddcq_completed = 0; -+ uctx->uctx_ddcq_insertcnt = 0; -+ -+ uctx->uctx_num_fault_save = num_fault_save; -+ if (uctx->uctx_num_fault_save) -+ { -+ KMEM_ZALLOC (uctx->uctx_faults, FAULT_SAVE *, (sizeof(FAULT_SAVE) * uctx->uctx_num_fault_save), 1); -+ if ( uctx->uctx_faults == NULL) -+ { -+ user_free (uctx); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ for (i = 0; i < uctx->uctx_num_fault_save; i++) -+ uctx->uctx_faults[i].next = (i == (uctx->uctx_num_fault_save-1) ? NULL : &uctx->uctx_faults[i+1]); -+ -+ } -+ uctx->uctx_fault_list = uctx->uctx_faults; -+ -+ return (uctx); -+} -+ -+void -+user_free (USER_CTXT *uctx) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ -+ user_swapout (uctx, UCTX_EXITING); -+ -+ /* Detach from all input contexts */ -+ user_detach (uctx, NULL); -+ -+ /* since we're single threaded here - (called from close()) */ -+ /* we don't need to hold the lock to drop the command queues */ -+ /* since they cannot be mapped into user space */ -+ while (! list_empty (&uctx->uctx_cqlist)) -+ user_dropcq (uctx, list_entry (uctx->uctx_cqlist.next, USER_CQ, ucq_link)); -+ -+ /* Free off all of vpseg_list */ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ while (! list_empty (&uctx->uctx_vpseg_list)) -+ user_remove_vpseg(uctx, list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link)); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ if (timer_pending (&uctx->uctx_int_timer)) -+ del_timer_sync (&uctx->uctx_int_timer); -+ -+ if (timer_pending (&uctx->uctx_neterr_timer)) -+ del_timer_sync (&uctx->uctx_neterr_timer); -+ -+ if (uctx->uctx_dprocTraps) -+ KMEM_FREE (uctx->uctx_dprocTraps, uctx->uctx_dprocTrapQ.q_size * sizeof (ELAN4_DPROC_TRAP)); -+ if (uctx->uctx_tprocTraps) -+ KMEM_FREE (uctx->uctx_tprocTraps, uctx->uctx_tprocTrapQ.q_size * sizeof (ELAN4_TPROC_TRAP)); -+ if (uctx->uctx_eprocTraps) -+ KMEM_FREE (uctx->uctx_eprocTraps, uctx->uctx_eprocTrapQ.q_size * sizeof (ELAN4_EPROC_TRAP)); -+ if (uctx->uctx_dmas) -+ KMEM_FREE (uctx->uctx_dmas, uctx->uctx_dmaQ.q_size * sizeof (E4_DMA)); -+ if (uctx->uctx_msgs) -+ KMEM_FREE (uctx->uctx_msgs, NETERR_MSGS * sizeof (ELAN4_NETERR_MSG)); -+ if (uctx->uctx_threads) -+ KMEM_FREE (uctx->uctx_threads, uctx->uctx_threadQ.q_size * sizeof (E4_ThreadRegs)); -+ if (uctx->uctx_faults) -+ KMEM_FREE (uctx->uctx_faults, (sizeof(FAULT_SAVE) * uctx->uctx_num_fault_save)); -+ -+ if (uctx->uctx_intcookie_table) -+ intcookie_free_table (uctx->uctx_intcookie_table); -+ -+ elan4_set_routetable (&uctx->uctx_ctxt, NULL); -+ elan4_free_routetable (dev, uctx->uctx_routetable); -+ -+ /* Free off all USER_RGNs */ -+ user_freergns(uctx); -+ -+ elan4_sdram_free (dev, uctx->uctx_trampoline, SDRAM_PAGE_SIZE); -+ -+ /* Clear the PG_Reserved bit before free to avoid a memory leak */ -+ ClearPageReserved(pte_page(*find_pte_kernel((unsigned long) uctx->uctx_upage))); -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ kcondvar_destroy (&uctx->uctx_wait); -+ -+ spin_lock_destroy (&uctx->uctx_rgnlock); -+ spin_lock_destroy (&uctx->uctx_spinlock); -+ -+ kmutex_destroy (&uctx->uctx_rgnmutex); -+ kmutex_destroy (&uctx->uctx_cqlock); -+ kmutex_destroy (&uctx->uctx_vpseg_lock); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/user_ddcq.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/user_ddcq.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/user_ddcq.c 2005-06-01 23:12:54.625435152 -0400 -@@ -0,0 +1,226 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user_ddcq.c,v 1.15 2004/06/23 11:06:05 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user_ddcq.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#if PAGE_SIZE < CQ_CommandMappingSize -+# define ELAN4_COMMAND_QUEUE_MAPPING PAGE_SIZE -+#else -+# define ELAN4_COMMAND_QUEUE_MAPPING CQ_CommandMappingSize -+#endif -+ -+/* The user device driver command queue is used for re-issuing -+ * trapped items. It is allocated as a 1K command queue, and -+ * we insert command flow writes event 256 words. -+ */ -+#define USER_CTRLFLOW_COUNT 256 -+ -+/* Flow control of the device driver command queue is handled by periodically -+ * inserting dword writes into the command stream. When you need to know -+ * that the queue has been flushed, then you insert an extra contorl flow -+ * write into the command queue. Should the queue not be flushed, but the -+ * trap handler be returning to user space, then it will also insert and -+ * extra interrupt command to ensure that it is re-entered after the queue -+ * has been flushed. -+ * -+ * Note - we account the space for the interrupt command on each control -+ * flow write so that we do not overflow the queue even if we end up -+ * inserting an interrupt for every command flow write. In general only -+ * a single interrupt should get inserted.... -+ */ -+ -+#define user_ddcq_command_write(value,off) do { \ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_write: cmdptr=%x off=%d value=%llx\n", cmdptr, off, value);\ -+ writeq(value, cmdptr + (off << 3)); \ -+} while (0) -+ -+#define user_ddcq_command_space(uctx) \ -+ ((CQ_Size (uctx->uctx_ddcq->ucq_cq->cq_size)>>3) - ((uctx)->uctx_ddcq_insertcnt - (uctx)->uctx_upage->upage_ddcq_completed)) -+ -+#define user_ddcq_command_flow_write(uctx) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+\ -+ (uctx)->uctx_ddcq_completed = ((uctx)->uctx_ddcq_insertcnt += 3);\ -+\ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_command_flow_write: completed=%llx [%llx] addr=%llx\n", (uctx)->uctx_ddcq_completed, \ -+ (uctx)->uctx_upage->upage_ddcq_completed, (uctx)->uctx_upage_addr); \ -+ user_ddcq_command_write (GUARD_CMD | GUARD_ALL_CHANNELS, 0);\ -+ user_ddcq_command_write (WRITE_DWORD_CMD | (uctx)->uctx_upage_addr, 1);\ -+ user_ddcq_command_write ((uctx)->uctx_ddcq_completed, 2);\ -+} while (0) -+ -+#define user_ddcq_command_flow_intr(uctx) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+\ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_command_flow_intr: completed=%llx [%llx] addr=%llx\n", (uctx)->uctx_ddcq_completed, \ -+ (uctx)->uctx_upage->upage_ddcq_completed, (uctx)->uctx_upage_addr); \ -+ user_ddcq_command_write (INTERRUPT_CMD | ELAN4_INT_COOKIE_DDCQ, 3);\ -+} while (0) -+ -+#define user_ddcq_command_prologue(uctx, count) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_prologue: iptr=%llx cmdptr=%x\n", iptr, cmdptr); -+ -+#define user_ddcq_command_epilogue(uctx, count, extra) \ -+ (uctx)->uctx_ddcq_insertcnt = iptr + (count);\ -+\ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_epilogue: iptr=%llx + %x + %x - completed %llx\n", iptr, count, extra, (uctx)->uctx_ddcq_completed);\ -+ if (((iptr) + (count) + (extra)) > ((uctx)->uctx_ddcq_completed + USER_CTRLFLOW_COUNT))\ -+ user_ddcq_command_flow_write(uctx); \ -+} while (0) -+ -+int -+user_ddcq_check (USER_CTXT *uctx, unsigned num) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_check_ddcq: insert=%llx completed=%llx num=%d\n", -+ uctx->uctx_ddcq_insertcnt, uctx->uctx_upage->upage_ddcq_completed, num); -+ -+ /* Ensure that there is enough space for the command we want to issue, -+ * PLUS the guard/writeword for the control flow flush. -+ * PLUS the interrupt command for rescheduling */ -+ if (user_ddcq_command_space (uctx) > (num + 4)) -+ { -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_check: loads of space\n"); -+ -+ return (1); -+ } -+ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_check: not enough space - reschedule\n"); -+ -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+ return (0); -+} -+ -+int -+user_ddcq_flush (USER_CTXT *uctx) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CQ *ucq = uctx->uctx_ddcq; -+ -+ switch (ucq->ucq_state) -+ { -+ case UCQ_TRAPPED: -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: command queue is trapped\n"); -+ return (0); -+ -+ case UCQ_NEEDS_RESTART: -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: restarting command queue\n"); -+ -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ ucq->ucq_state = UCQ_RUNNING; -+ elan4_restartcq (dev, ucq->ucq_cq); -+ } -+ break; -+ } -+ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: insertcnt=%llx completed=%llx [%llx]\n", -+ uctx->uctx_ddcq_insertcnt, uctx->uctx_ddcq_completed, uctx->uctx_upage->upage_ddcq_completed); -+ -+ if (uctx->uctx_ddcq_completed != uctx->uctx_ddcq_insertcnt) -+ user_ddcq_command_flow_write (uctx); -+ -+ return (uctx->uctx_ddcq_completed == uctx->uctx_upage->upage_ddcq_completed); -+} -+ -+void -+user_ddcq_intr (USER_CTXT *uctx) -+{ -+ user_ddcq_command_flow_intr (uctx); -+} -+ -+void -+user_ddcq_run_dma (USER_CTXT *uctx, E4_DMA *dma) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_run_dma: cookie=%llx vproc=%llx\n", dma->dma_cookie, dma->dma_vproc); -+ -+ user_ddcq_command_prologue(uctx, 7) { -+ -+ user_ddcq_command_write ((dma->dma_typeSize & ~DMA_ContextMask) | RUN_DMA_CMD, 0); -+ user_ddcq_command_write (dma->dma_cookie, 1); -+ user_ddcq_command_write (dma->dma_vproc, 2); -+ user_ddcq_command_write (dma->dma_srcAddr, 3); -+ user_ddcq_command_write (dma->dma_dstAddr, 4); -+ user_ddcq_command_write (dma->dma_srcEvent, 5); -+ user_ddcq_command_write (dma->dma_dstEvent, 6); -+ -+ } user_ddcq_command_epilogue (uctx, 7, 0); -+} -+ -+void -+user_ddcq_run_thread (USER_CTXT *uctx, E4_ThreadRegs *regs) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_run_thread: PC=%llx SP=%llx\n", regs->Registers[0], regs->Registers[1]); -+ -+ user_ddcq_command_prologue(uctx, 7) { -+ -+ user_ddcq_command_write (regs->Registers[0] | RUN_THREAD_CMD, 0); -+ user_ddcq_command_write (regs->Registers[1], 1); -+ user_ddcq_command_write (regs->Registers[2], 2); -+ user_ddcq_command_write (regs->Registers[3], 3); -+ user_ddcq_command_write (regs->Registers[4], 4); -+ user_ddcq_command_write (regs->Registers[5], 5); -+ user_ddcq_command_write (regs->Registers[6], 6); -+ -+ } user_ddcq_command_epilogue (uctx, 7, 0); -+} -+ -+void -+user_ddcq_setevent (USER_CTXT *uctx, E4_Addr addr) -+{ -+ user_ddcq_command_prologue (uctx, 1) { -+ -+ user_ddcq_command_write (SET_EVENT_CMD | addr, 0); -+ -+ } user_ddcq_command_epilogue (uctx, 1, 0); -+} -+ -+void -+user_ddcq_seteventn (USER_CTXT *uctx, E4_Addr addr, E4_uint32 count) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_seteventn: addr=%llx count=%lx\n", addr, count); -+ -+ user_ddcq_command_prologue (uctx, 2) { -+ -+ user_ddcq_command_write (SET_EVENTN_CMD, 0); -+ user_ddcq_command_write (addr | count, 1); -+ -+ } user_ddcq_command_epilogue (uctx, 2, 0); -+} -+ -+void -+user_ddcq_waitevent (USER_CTXT *uctx, E4_Addr addr, E4_uint64 CountAndType, E4_uint64 Param0, E4_uint64 Param1) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_waitevent: addr=%llx CountAndType=%llx Param=%llx,%llx\n", addr, CountAndType, Param0, Param1); -+ -+ user_ddcq_command_prologue (uctx, 4) { -+ -+ user_ddcq_command_write (WAIT_EVENT_CMD | addr, 0); -+ user_ddcq_command_write (CountAndType, 1); -+ user_ddcq_command_write (Param0, 2); -+ user_ddcq_command_write (Param1, 3); -+ -+ } user_ddcq_command_epilogue (uctx, 4, 0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/elan4/user_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/elan4/user_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/elan4/user_Linux.c 2005-06-01 23:12:54.626435000 -0400 -@@ -0,0 +1,377 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user_Linux.c,v 1.25.2.4 2005/01/18 14:36:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user_Linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+ -+static int -+user_pteload (USER_CTXT *uctx, E4_Addr addr, physaddr_t phys, int perm) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, phys, perm); -+ -+ /* -+ * On MPSAS we don't allocate a large enough context table, so -+ * if we see an address/context pair which would "alias" because -+ * they differ in unchecked hash bits to a previous pteload, -+ * then we kill the application. -+ */ -+ { -+ unsigned hashval = (E4MMU_SHIFT_ADDR(addr, (dev->dev_pageshift[0]) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(uctx->uctx_ctxt.ctxt_num)); -+ -+ if (dev->dev_rsvd_hashval[0] == 0xFFFFFFFF) -+ dev->dev_rsvd_hashval[0] = hashval & dev->dev_rsvd_hashmask[0]; -+ -+ if ((hashval & dev->dev_rsvd_hashmask[0]) != dev->dev_rsvd_hashval[0]) -+ { -+ printk ("user_pteload: vaddr=%016llx ctxnum=%x -> [%x] overlaps %x - %x [hashidx=%x]\n", (unsigned long long) addr, -+ uctx->uctx_ctxt.ctxt_num, hashval, hashval & dev->dev_rsvd_hashmask[0], dev->dev_rsvd_hashval[0], -+ E4MMU_HASH_INDEX (uctx->uctx_ctxt.ctxt_num, addr, dev->dev_pageshift[0], dev->dev_hashsize[0]-1)); -+ -+ return -EFAULT; -+ } -+ } -+ -+ if ((newpte & (PTE_PciNotLocal | PTE_CommandQueue)) == 0 && -+ ((addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (phys & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)))) -+ { -+ printk ("user_pteload: vaddr=%016llx incorrectly alias sdram at %lx\n", (unsigned long long) addr, -+ phys ^ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ return -EFAULT; -+ } -+ -+ if (newpte & PTE_PciNotLocal) -+ PRINTF (uctx, DBG_FAULT, "user_pteload: addr=%llx -> pte=%llx (pci)\n", addr, newpte); -+ else if (newpte & PTE_CommandQueue) -+ PRINTF (uctx, DBG_FAULT, "user_pteload: addr=%llx -> pte=%llx (command)\n", addr, newpte); -+ else -+ PRINTF (uctx, DBG_FAULT, "user_pteload: addr=%llx -> pte=%llx (sdram)\n", addr, newpte); -+ -+ elan4mmu_pteload (&uctx->uctx_ctxt, 0, addr, newpte); -+ -+ return (0); -+} -+ -+int -+user_load_range (USER_CTXT *uctx, E4_Addr eaddr, unsigned long nbytes, E4_uint32 fsr) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ struct mm_struct *mm = current->mm; -+ int writeable = (AT_Perm(fsr) == AT_PermLocalDataWrite || -+ AT_Perm(fsr) == AT_PermRemoteWrite || -+ AT_Perm(fsr) == AT_PermLocalEvent || -+ AT_Perm(fsr) == AT_PermRemoteEvent); -+ struct vm_area_struct *vma; -+ int i, perm; -+ unsigned long len; -+ unsigned long maddr; -+ physaddr_t phys; -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ while (nbytes > 0) -+ { -+ USER_RGN *rgn = user_rgnat_elan (uctx, eaddr); -+ -+ if (rgn == NULL || ELAN4_INCOMPAT_ACCESS (rgn->rgn_perm, AT_Perm (fsr))) -+ { -+ PRINTF (uctx, DBG_FAULT, "user_load_range: eaddr=%llx -> %s\n", eaddr, rgn == NULL ? "no mapping" : "no permission"); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (rgn == NULL ? -EFAULT : -EPERM); -+ } -+ -+ if (writeable) -+ perm = rgn->rgn_perm; -+/* This is the correct code but it breaks the Eagle libraries (1.6.X) - backed out (addy 24.08.04) -+ else if (AT_Perm(fsr) == AT_PermExecute && (rgn->rgn_perm & PERM_Mask) != PERM_LocExecute) -+*/ -+ else if (AT_Perm(fsr) == AT_PermExecute) -+ perm = PERM_LocRead | (rgn->rgn_perm & ~PERM_Mask); -+ else -+ perm = ELAN4_PERM_READONLY (rgn->rgn_perm & PERM_Mask) | (rgn->rgn_perm & ~PERM_Mask); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: rgn=%p [%llx.%lx.%x]\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len); -+ -+ len = ((rgn->rgn_ebase + rgn->rgn_len) - eaddr); -+ if (len > nbytes) -+ len = nbytes; -+ nbytes -= len; -+ -+ maddr = rgn->rgn_mbase + (eaddr - rgn->rgn_ebase); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: eaddr=%llx->%llx -> %lx->%lx len=%x perm=%x\n", eaddr, -+ eaddr + len, maddr, maddr + len, len, perm); -+ -+ down_read (&mm->mmap_sem); -+ while (len > 0) -+ { -+ if ((vma = find_vma_intersection (mm, maddr, maddr + PAGE_SIZE)) == NULL || -+ (writeable && !(vma->vm_flags & VM_WRITE))) -+ { -+ PRINTF (DBG_USER, DBG_FAULT, "ctxt_pagefault: %s %lx\n", vma ? "no writeble at" : "no vma for", maddr); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EFAULT); -+ } -+ -+ spin_lock (&mm->page_table_lock); -+ { -+ pte_t *ptep_ptr; -+ pte_t ptep_value; -+ -+ ptep_ptr = find_pte_map (mm, maddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: %lx %s %s\n", maddr, writeable ? "writeable" : "readonly", -+ !ptep_ptr ? "invalid" : pte_none(ptep_value) ? "none " : !pte_present(ptep_value) ? "swapped " : -+ writeable && !pte_write(ptep_value) ? "COW" : "OK"); -+ -+ if (ptep_ptr == NULL || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value)) || !pte_read (ptep_value)) -+ { -+ spin_unlock (&mm->page_table_lock); -+ -+ make_pages_present(maddr, maddr + PAGE_SIZE); -+ -+ spin_lock (&mm->page_table_lock); -+ -+ ptep_ptr = find_pte_map (mm, maddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ if (ptep_ptr == NULL || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value)) || !pte_read (ptep_value)) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EFAULT); -+ } -+ } -+ -+ if (writeable) -+ pte_mkdirty(ptep_value); -+ pte_mkyoung (ptep_value); -+ -+ phys = pte_phys (ptep_value); -+ -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ { -+ if (user_pteload (uctx, eaddr, phys, perm) < 0) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EFAULT); -+ } -+ -+ eaddr += (1 << dev->dev_pageshift[0]); -+ phys += (1 << dev->dev_pageshift[0]); -+ } -+ } -+ spin_unlock (&mm->page_table_lock); -+ -+ maddr += PAGE_SIZE; -+ len -= PAGE_SIZE; -+ } -+ up_read (&mm->mmap_sem); -+ } -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: alldone\n"); -+ -+ return (0); -+} -+ -+void -+user_preload_main (USER_CTXT *uctx, virtaddr_t addr, unsigned long len) -+{ -+ virtaddr_t lim = addr + len - 1; -+ struct vm_area_struct *vma; -+ -+ down_read (¤t->mm->mmap_sem); -+ -+ if ((vma = find_vma (current->mm, addr)) != NULL) -+ { -+ do { -+ unsigned long start = vma->vm_start; -+ unsigned long end = vma->vm_end; -+ -+ if ((start-1) >= lim) -+ break; -+ -+ if (start < addr) start = addr; -+ if ((end-1) > lim) end = lim+1; -+ -+ if (vma->vm_flags & VM_IO) -+ continue; -+ -+ user_unload_main (uctx, start, end - start); -+ -+ make_pages_present (start, end); -+ -+ user_update_main (uctx, current->mm, start, end - start); -+ -+ } while ((vma = find_vma (current->mm, vma->vm_end)) != NULL); -+ } -+ up_read (¤t->mm->mmap_sem); -+} -+ -+static void -+user_update_range (USER_CTXT *uctx, int tbl, struct mm_struct *mm, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, int perm) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int roperm = ELAN4_PERM_READONLY(perm & PERM_Mask) | (perm & ~PERM_Mask); -+ int nbytes; -+ -+ while (len > 0) -+ { -+ pte_t *ptep_ptr; -+ pte_t ptep_value; -+ -+ ptep_ptr = find_pte_map (mm, maddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_range: %llx (%lx) %s\n", eaddr, maddr, -+ !ptep_ptr ? "invalid" : pte_none(ptep_value) ? "none " : !pte_present(ptep_value) ? "swapped " : -+ !pte_write(ptep_value) ? "RO/COW" : "OK"); -+ -+ if (ptep_ptr && !pte_none(ptep_value) && pte_present(ptep_value) && pte_read (ptep_value)) { -+ physaddr_t phys_value = pte_phys(ptep_value); -+ for (nbytes = 0; nbytes < PAGE_SIZE; nbytes += (1 << dev->dev_pageshift[0])) -+ { -+ user_pteload (uctx, eaddr, phys_value, pte_write (ptep_value) ? perm : roperm); -+ -+ eaddr += (1 << dev->dev_pageshift[0]); -+ phys_value += (1 << dev->dev_pageshift[0]); -+ } -+ } -+ -+ maddr += PAGE_SIZE; -+ len -= PAGE_SIZE; -+ } -+} -+ -+void -+user_update_main (USER_CTXT *uctx, struct mm_struct *mm, virtaddr_t start, unsigned long len) -+{ -+ USER_RGN *rgn; -+ unsigned long ssize; -+ virtaddr_t end = start + len - 1; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: start=%lx end=%lx\n", start, end); -+ -+ for (rgn = user_findrgn_main (uctx, start, 0); rgn != NULL; rgn = rgn->rgn_mnext) -+ { -+ if (end < rgn->rgn_mbase) -+ break; -+ -+ if (start <= rgn->rgn_mbase && end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: whole %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len - 1); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, rgn->rgn_mbase, rgn->rgn_ebase, rgn->rgn_len, rgn->rgn_perm); -+ } -+ else if (start <= rgn->rgn_mbase) -+ { -+ ssize = end - rgn->rgn_mbase + 1; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: start %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + ssize); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, rgn->rgn_mbase, rgn->rgn_ebase, ssize, rgn->rgn_perm); -+ } -+ else if (end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ ssize = (rgn->rgn_mbase + rgn->rgn_len) - start; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: end %lx -> %lx\n", start, start + ssize); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, start, rgn->rgn_ebase + (start - rgn->rgn_mbase), ssize, rgn->rgn_perm); -+ } -+ else -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: middle %lx -> %lx\n", start, end); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, start, rgn->rgn_ebase + (start - rgn->rgn_mbase), len, rgn->rgn_perm); -+ } -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+} -+ -+void -+user_unload_main (USER_CTXT *uctx, virtaddr_t start, unsigned long len) -+{ -+ USER_RGN *rgn; -+ unsigned long ssize; -+ virtaddr_t end = start + len - 1; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: start=%lx end=%lx\n", start, end); -+ -+ for (rgn = user_findrgn_main (uctx, start, 0); rgn != NULL; rgn = rgn->rgn_mnext) -+ { -+ if (end < rgn->rgn_mbase) -+ break; -+ -+ if (start <= rgn->rgn_mbase && end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: whole %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len - 1); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase, rgn->rgn_len); -+ } -+ else if (start <= rgn->rgn_mbase) -+ { -+ ssize = end - rgn->rgn_mbase + 1; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: start %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + ssize); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase, ssize); -+ } -+ else if (end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ ssize = (rgn->rgn_mbase + rgn->rgn_len) - start; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: end %lx -> %lx\n", start, start + ssize); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase + (start - rgn->rgn_mbase), ssize); -+ } -+ else -+ { -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: middle %lx -> %lx\n", start, end); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase + (start - rgn->rgn_mbase), len); -+ } -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/asm_elan4_thread.S -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/asm_elan4_thread.S 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/asm_elan4_thread.S 2005-06-01 23:12:54.626435000 -0400 -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: asm_elan4_thread.S,v 1.1 2003/09/23 13:55:11 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/asm_elan4_thread.S,v $*/ -+ -+#include -+#include -+ -+/* -+ * c_reschedule (E4_uint64 *commandport) -+ */ -+ .global c_reschedule -+c_reschedule: -+ add %sp, -128, %sp -+ st64 %r16, [%sp] // preserve call preserved registers -+ st64 %r24, [%sp + 64] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ mov NOP_CMD, %r23 // "nop" command -+ st64suspend %r16, [%r8] -+3: ld64 [%sp], %r16 -+ ld64 [%sp + 64], %r24 // restore call preserved register -+ jmpl %r2+8, %r0 // and return -+ add %sp, 128, %sp -+ -+ -+/* -+ * c_waitevent (E4_uint64 *commandport, E4_Event *event, E4_uint64 count) -+ */ -+ .global c_waitevent -+c_waitevent: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ st32 %r16, [%sp] // event source block -+ mov MAKE_EXT_CLEAN_CMD, %r23 // "flush command queue desc" command -+ st8 %r23, [%sp+56] // event source block -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r23,%r23 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ -+ or %r9, WAIT_EVENT_CMD, %r16 -+ sll8 %r10, 32, %r17 -+ or %r17, E4_EVENT_TYPE_VALUE(E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), %r17 -+ mov %sp, %r18 -+ mov %r8, %r19 -+ -+ st32suspend %r16, [%r8] -+ -+3: ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r2+8, %r0 // and return -+ add %sp, 192, %sp -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/assym_elan4.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/assym_elan4.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/assym_elan4.h 2005-06-01 23:12:54.627434848 -0400 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: genassym_elan4.c,v 1.3 2004/04/25 11:26:07 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/genassym_elan4.c,v $*/ -+ -+/* Generated by genassym_elan4 - do not modify */ -+ -+#define EP4_RCVR_THREAD_STALL 0 -+#define EP4_RCVR_PENDING_TAILP 128 -+#define EP4_RCVR_PENDING_HEAD 136 -+#define EP4_RCVR_DEBUG 176 -+#define EP4_RXD_NEXT 664 -+#define EP4_RXD_QUEUED 728 -+#define EP4_RXD_DEBUG 944 -Index: linux-2.4.21/drivers/net/qsnet/ep/cm.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/cm.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/cm.c 2005-06-01 23:12:54.632434088 -0400 -@@ -0,0 +1,3000 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cm.c,v 1.83.2.6 2005/01/13 12:37:57 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "debug.h" -+#include "cm.h" -+#include -+ -+#include -+ -+#if defined(LINUX) -+#include "conf_linux.h" -+#endif -+ -+int BranchingRatios[CM_MAX_LEVELS]; -+ -+int MachineId = -1; -+int BrokenLevel = -1; /* Simulates Broken Network */ -+int RejoinCheck = 1; -+int RejoinPanic = 0; -+ -+static int -+SegmentNo (CM_RAIL *cmRail, u_int nodeid, u_int lvl) -+{ -+ int i; -+ -+ ASSERT (lvl < cmRail->NumLevels); -+ -+ for (i = 0; i < lvl; i++) -+ nodeid /= cmRail->Levels[i].NumSegs; -+ -+ return (nodeid % cmRail->Levels[lvl].NumSegs); -+} -+ -+static int -+ClusterIds (CM_RAIL *cmRail, int clvl, int *clmin, int *clmax) -+{ -+ int clid = cmRail->Rail->Position.pos_nodeid - cmRail->Levels[clvl].MinNodeId; -+ -+ if (clvl == 0) -+ *clmin = *clmax = clid; -+ else -+ { -+ *clmin = cmRail->Levels[clvl - 1].MinNodeId - cmRail->Levels[clvl].MinNodeId; -+ *clmax = *clmin + cmRail->Levels[clvl - 1].NumNodes - 1; -+ } -+ return (clid); -+} -+ -+#if defined(PER_CPU_TIMEOUT) -+static void -+__Schedule_Discovery (CM_RAIL *cmRail) /* we urgently need to schedule discovery */ -+{ -+ cmRail->NextDiscoverTime = lbolt; -+ -+ if (cmRail->NextRunTime == 0 || AFTER (cmRail->NextRunTime, cmRail->NextDiscoverTime)) -+ cmRail->NextRunTime = cmRail->NextDiscoverTime; -+} -+ -+static void -+__Schedule_Heartbeat (CM_RAIL *cmRail) -+{ -+ cmRail->NextHeartbeatTime = lbolt; -+ -+ if (cmRail->NextRunTime == 0 || AFTER (cmRail->NextRunTime, cmRail->NextHeartbeatTime)) -+ cmRail->NextRunTime = cmRail->NextHeartbeatTime; -+} -+#else -+ -+static void -+__Schedule_Timer (CM_RAIL *cmRail, long tick) -+{ -+ if (! timer_pending (&cmRail->HeartbeatTimer) || AFTER (cmRail->NextRunTime, tick)) -+ { -+ cmRail->NextRunTime = tick; -+ -+ mod_timer (&cmRail->HeartbeatTimer, tick); -+ } -+} -+ -+static void -+__Schedule_Discovery (CM_RAIL *cmRail) /* we urgently need to schedule discovery */ -+{ -+ __Schedule_Timer (cmRail, cmRail->NextDiscoverTime = lbolt); -+} -+ -+static void -+__Schedule_Heartbeat (CM_RAIL *cmRail) -+{ -+ __Schedule_Timer (cmRail, cmRail->NextHeartbeatTime = lbolt); -+} -+#endif -+ -+static int -+MsgBusy (CM_RAIL *cmRail, int msgNumber) -+{ -+ switch (ep_outputq_state (cmRail->Rail, cmRail->MsgQueue, msgNumber)) -+ { -+ case EP_OUTPUTQ_BUSY: /* still busy */ -+ return 1; -+ -+ case EP_OUTPUTQ_FAILED: /* NACKed */ -+ { -+#if defined(DEBUG_PRINTF) -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, msgNumber); -+ uint8_t type = msg->Hdr.Type; -+ uint16_t nmaps = msg->Hdr.NumMaps; -+ int16_t off = msg->Payload.Statemaps[CM_MSG_MAP(0)].offset; -+ -+ CPRINTF4 (((type == CM_MSG_TYPE_DISCOVER_LEADER) || (type == CM_MSG_TYPE_DISCOVER_SUBORDINATE)) ? 6 : 3, /* we expect broadcasts to be NACKed */ -+ "%s: msg %d type %d failed%s\n", cmRail->Rail->Name, msgNumber, type, -+ (type != CM_MSG_TYPE_HEARTBEAT) ? "" : nmaps == 0 ? ": null heartbeat" : -+ off == STATEMAP_RESET ? ": heartbeat with R statemaps" : ": heartbeat with statemaps"); -+#endif -+ return 0; -+ } -+ -+ case EP_OUTPUTQ_FINISHED: -+ return 0; -+ -+ default: -+ panic ("MsgBusy - bad return code from ep_outputq_state\n"); -+ /* NOTREACHED */ -+ } -+ return 0; -+} -+ -+static void -+LaunchMessage (CM_RAIL *cmRail, int msgNumber, int vp, int qnum, int retries, int type, int lvl, int nmaps) -+{ -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, msgNumber); -+ CM_HDR *hdr = &msg->Hdr; -+ -+ ASSERT (nmaps >= 0 && nmaps <= CM_MSG_MAXMAPS); -+ ASSERT (SPINLOCK_HELD (&cmRail->Lock)); -+ -+ hdr->Version = CM_MSG_VERSION; -+ hdr->ParamHash = cmRail->ParamHash; -+ hdr->Timestamp = cmRail->Timestamp; -+ hdr->Checksum = 0; -+ hdr->NodeId = cmRail->Rail->Position.pos_nodeid; -+ hdr->MachineId = MachineId; -+ hdr->NumMaps = nmaps; -+ hdr->Level = lvl; -+ hdr->Type = type; -+ hdr->Checksum = CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)); -+ -+ if (BrokenLevel != -1 && (lvl >= ((BrokenLevel >> (cmRail->Rail->Number*4)) & 0xf))) /* Simulate broken network? */ -+ return; -+ -+ if (ep_outputq_send (cmRail->Rail, cmRail->MsgQueue, msgNumber, -+ CM_MSG_SIZE(nmaps), vp, qnum, retries)); -+ IncrStat (cmRail, LaunchMessageFail); -+} -+ -+static int -+SendMessage (CM_RAIL *cmRail, int nodeId, int lvl, int type) -+{ -+ int msgNumber = CM_NUM_NODE_MSG_BUFFERS + cmRail->NextSpareMsg; -+ int n = CM_NUM_SPARE_MSG_BUFFERS; -+ int retries; -+ -+ ASSERT (type == CM_MSG_TYPE_IMCOMING || /* other types must use SendToSgmt */ -+ type == CM_MSG_TYPE_REJOIN); -+ -+ while (n-- > 0 && MsgBusy (cmRail, msgNumber)) /* search for idle "spare" buffer */ -+ { -+ if (++(cmRail->NextSpareMsg) == CM_NUM_SPARE_MSG_BUFFERS) -+ cmRail->NextSpareMsg = 0; -+ -+ msgNumber = CM_NUM_NODE_MSG_BUFFERS + cmRail->NextSpareMsg; -+ } -+ -+ if (n == 0) /* all "spare" message buffers busy */ -+ { -+ CPRINTF3 (3, "%s: all spare message buffers busy: trying to send type %d to %d\n", -+ cmRail->Rail->Name, type, nodeId); -+ return (0); -+ } -+ -+ /* NB IMCOMING may be echoed by MANY nodes, so we don't (and musn't) have any retries */ -+ retries = (type == CM_MSG_TYPE_IMCOMING) ? 0 : CM_P2P_DMA_RETRIES; -+ -+ LaunchMessage (cmRail, msgNumber, EP_VP_NODE (nodeId), EP_SYSTEMQ_INTR, /* eager receive */ -+ retries, type, lvl, 0); -+ -+ if (++(cmRail->NextSpareMsg) == CM_NUM_SPARE_MSG_BUFFERS) /* check this one last next time */ -+ cmRail->NextSpareMsg = 0; -+ -+ return (1); -+} -+ -+static int -+SendToSgmt (CM_RAIL *cmRail, CM_SGMT *sgmt, int type) -+{ -+ bitmap_t seg; -+ int offset; -+ int nmaps; -+ int sidx; -+ int clvl; -+ -+ ASSERT (sgmt->Level <= cmRail->TopLevel); -+ -+ if (MsgBusy (cmRail, sgmt->MsgNumber)) /* previous message still busy */ -+ { -+ CPRINTF3 (3, "%s: node message buffer busy: trying to send type %d to %d\n", -+ cmRail->Rail->Name, type, sgmt->NodeId); -+ -+ return (0); -+ } -+ -+ switch (type) -+ { -+ case CM_MSG_TYPE_RESOLVE_LEADER: -+ case CM_MSG_TYPE_DISCOVER_LEADER: -+ ASSERT (sgmt->State == CM_SGMT_ABSENT); -+ ASSERT (sgmt->Level == ((cmRail->Role == CM_ROLE_LEADER_CANDIDATE) ? cmRail->TopLevel : cmRail->TopLevel - 1)); -+ ASSERT (sgmt->Level < cmRail->NumLevels); -+ ASSERT (sgmt->Sgmt == cmRail->Levels[sgmt->Level].MySgmt); -+ -+ /* broadcast to me and all my peers at this level (== my segment in the level above) */ -+ sidx = (sgmt->Level == cmRail->NumLevels - 1) ? 0 : cmRail->Levels[sgmt->Level + 1].MySgmt; -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_BCAST (sgmt->Level + 1, sidx), -+ EP_SYSTEMQ_INTR, 0, /* eager rx; no retries */ -+ type, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_DISCOVER_SUBORDINATE: -+ ASSERT (sgmt->Sgmt != cmRail->Levels[sgmt->Level].MySgmt); -+ ASSERT (sgmt->State == CM_SGMT_WAITING); -+ ASSERT (sgmt->Level > 0); /* broadcasting just to subtree */ -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_BCAST (sgmt->Level, sgmt->Sgmt), -+ EP_SYSTEMQ_INTR, 0, /* eager rx; no retries */ -+ CM_MSG_TYPE_DISCOVER_SUBORDINATE, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_NOTIFY: -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_NODE (sgmt->NodeId), -+ EP_SYSTEMQ_INTR, CM_P2P_DMA_RETRIES, /* eager rx; lots of retries */ -+ CM_MSG_TYPE_NOTIFY, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_HEARTBEAT: -+ { -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, sgmt->MsgNumber); -+ CM_HDR *hdr = &msg->Hdr; -+ -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ -+ hdr->AckSeq = sgmt->AckSeq; -+ -+ if (!sgmt->MsgAcked) /* Current message not acknowledged */ -+ { -+ /* must have been something significant to require an ack */ -+ ASSERT (sgmt->SendMaps); -+ ASSERT (sgmt->NumMaps > 0); -+ -+ CPRINTF3 (3, "%s: retrying heartbeat to %d (%d entries)\n", cmRail->Rail->Name, sgmt->NodeId, sgmt->NumMaps); -+ -+ IncrStat (cmRail, RetryHeartbeat); -+ -+ nmaps = sgmt->NumMaps; -+ } -+ else -+ { -+ nmaps = 0; -+ -+ if (sgmt->SendMaps) /* can send maps */ -+ { -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (!sgmt->Maps[clvl].OutputMapValid) -+ continue; -+ -+ while ((offset = statemap_findchange (sgmt->Maps[clvl].OutputMap, &seg, 1)) >= 0) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(nmaps)]; -+ -+ sgmt->Maps[clvl].SentChanges = 1; -+ -+ map->level = clvl; -+ map->offset = offset; -+ map->seg[0] = seg & 0xffff; -+ map->seg[1] = (seg >> 16) & 0xffff; -+#if (BT_ULSHIFT == 6) -+ map->seg[2] = (seg >> 32) & 0xffff; -+ map->seg[3] = (seg >> 48) & 0xffff; -+#elif (BT_ULSHIFT != 5) -+#error "Bad value for BT_ULSHIFT" -+#endif -+ if (++nmaps == CM_MSG_MAXMAPS) -+ goto msg_full; -+ } -+ -+ if (sgmt->Maps[clvl].SentChanges) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(nmaps)]; -+ -+ sgmt->Maps[clvl].SentChanges = 0; -+ -+ map->level = clvl; -+ map->offset = STATEMAP_NOMORECHANGES; -+ -+ if (++nmaps == CM_MSG_MAXMAPS) -+ goto msg_full; -+ } -+ } -+ } -+ -+ ASSERT (nmaps < CM_MSG_MAXMAPS); -+ -+ msg_full: -+ sgmt->NumMaps = nmaps; /* remember how many incase we retry */ -+ -+ if (nmaps == 0) /* no changes to send */ -+ hdr->Seq = sgmt->MsgSeq; /* this one can be dropped */ -+ else -+ { -+ hdr->Seq = ++(sgmt->MsgSeq); /* on to next message number */ -+ sgmt->MsgAcked = 0; /* need this one to be acked before I can send another */ -+ -+ IncrStat (cmRail, MapChangesSent); -+ } -+ } -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_NODE (sgmt->NodeId), -+ EP_SYSTEMQ_POLLED, CM_P2P_DMA_RETRIES, /* polled receive, lots of retries */ -+ CM_MSG_TYPE_HEARTBEAT, sgmt->Level, nmaps); -+ -+ IncrStat (cmRail, HeartbeatsSent); -+ -+ return (1); -+ } -+ -+ default: /* other types must use SendMessage */ -+ printk ("SendToSgmt: invalid type %d\n", type); -+ ASSERT (0); -+ -+ return (1); -+ } -+} -+ -+static char * -+GlobalStatusString (statemap_t *map, int idx) -+{ -+ char *strings[] = {"....", "S...", "C...", "R...", -+ ".s..", "Ss..", "Cs..", "Rs..", -+ "..r.", "S.r.", "C.r.", "R.r.", -+ ".sr.", "Ssr.", "Csr.", "Rsr.", -+ "...R", "S..R", "C..R", "R..R", -+ ".s.R", "Ss.R", "Cs.R", "Rs.R", -+ "..rR", "S.rR", "C.rR", "R.rR", -+ ".srR", "SsrR", "CsrR", "RsrR"}; -+ -+ return (strings[statemap_getbits (map, idx * CM_GSTATUS_BITS, CM_GSTATUS_BITS)]); -+} -+ -+static char * -+MapString (char *name, statemap_t *map, int nnodes, char *trailer) -+{ -+ static char *space; -+ int i; -+ -+ if (space == NULL) -+ KMEM_ALLOC (space, char *, EP_MAX_NODES*(CM_GSTATUS_BITS+1), 0); -+ -+ if (space == NULL) -+ return (""); -+ else -+ { -+ char *ptr = space; -+ -+ sprintf (space, "%s ", name); ptr += strlen (ptr); -+ for (i = 0; i < nnodes; i++, ptr += strlen (ptr)) -+ sprintf (ptr, "%s%s", i == 0 ? "" : ",", GlobalStatusString (map, i)); -+ sprintf (ptr, " %s", trailer); -+ return (space); -+ } -+} -+ -+void -+DisplayMap (DisplayInfo *di, CM_RAIL *cmRail, char *name, statemap_t *map, int nnodes, char *trailer) -+{ -+ char linebuf[256]; -+ char *ptr = linebuf; -+ int i; -+ -+#define NODES_PER_LINE 32 -+ for (i = 0; i < nnodes; i++) -+ { -+ if (ptr == linebuf) -+ { -+ sprintf (ptr, "%4d", i); -+ ptr += strlen (ptr); -+ } -+ -+ sprintf (ptr, ",%s", GlobalStatusString (map, i)); -+ ptr += strlen (ptr); -+ -+ if ((i % NODES_PER_LINE) == (NODES_PER_LINE-1) || (i == (nnodes-1))) -+ { -+ (di->func)(di->arg, "%s: %s %s %s\n", cmRail->Rail->Name, name, linebuf, trailer); -+ ptr = linebuf; -+ } -+ } -+#undef NODES_PER_LINE -+} -+ -+void -+DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail) -+{ -+ int lvl; -+ int clvl; -+ char mapname[128]; -+ -+ (di->func)(di->arg, "%s: Node %d maps...\n", cmRail->Rail->Name, cmRail->Rail->Position.pos_nodeid); -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ int nnodes = cmRail->Levels[clvl].NumNodes; -+ -+ (di->func)(di->arg, "%s: Cluster level %d: Connected %ld - %s%s\n", -+ cmRail->Rail->Name, clvl, cmRail->Levels[clvl].Connected, -+ cmRail->Levels[clvl].Online ? "Online" : "Offline", -+ cmRail->Levels[clvl].Restarting ? ", Restarting" : ""); -+ -+ for (lvl = 0; lvl < cmRail->TopLevel && lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ sprintf (mapname, "%10s%2d", "Level", lvl); -+ DisplayMap (di, cmRail, mapname, level->SubordinateMap[clvl], nnodes, -+ level->SubordinateMapValid[clvl] ? "" : "(invalid)"); -+ } -+ -+ sprintf (mapname, "%12s", "Local"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].LocalMap, nnodes, ""); -+ -+ sprintf (mapname, "%12s", "Subtree"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].SubTreeMap, nnodes, -+ cmRail->Levels[clvl].SubTreeMapValid ? "" : "(invalid)"); -+ -+ sprintf (mapname, "%12s", "Global"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].GlobalMap, nnodes, -+ cmRail->Levels[clvl].GlobalMapValid ? "" : "(invalid)"); -+ -+ sprintf (mapname, "%12s", "LastGlobal"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].LastGlobalMap, nnodes, ""); -+ } -+} -+ -+void -+DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail) -+{ -+ int lvl; -+ int sidx; -+ -+ (di->func)(di->arg, "%s: Node %d segments...\n", cmRail->Rail->Name, cmRail->NodeId); -+ -+ for (lvl = 0; lvl <= cmRail->TopLevel && lvl < cmRail->NumLevels; lvl++) -+ { -+ (di->func)(di->arg, " level %d: ", lvl); -+ -+ for (sidx = 0; sidx < ((lvl == cmRail->TopLevel) ? 1 : cmRail->Levels[lvl].NumSegs); sidx++) -+ { -+ CM_SGMT *sgmt = &cmRail->Levels[lvl].Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ (di->func)(di->arg, "[%d, in: %d out: %d %s%s]", -+ sgmt->NodeId, -+ sgmt->AckSeq, -+ sgmt->MsgSeq, -+ sgmt->MsgAcked ? "A" : "-", -+ sgmt->SendMaps ? "!" : "-"); -+ else -+ (di->func)(di->arg, "[%s]", (sgmt->State == CM_SGMT_ABSENT ? "absent" : -+ sgmt->State == CM_SGMT_WAITING ? "waiting" : -+ sgmt->State == CM_SGMT_COMING ? "coming" : "UNKNOWN")); -+ } -+ (di->func)(di->arg, "\n"); -+ } -+} -+ -+ -+static void -+StartConnecting (CM_RAIL *cmRail, CM_SGMT *sgmt, int NodeId, int Timestamp) -+{ -+ int clvl; -+ -+ CPRINTF4 (2, "%s: lvl %d subtree %d node %d -> connecting\n", cmRail->Rail->Name, sgmt->Level, sgmt->Sgmt, NodeId); -+ -+ /* Only reconnect the same guy if he was reborn */ -+ ASSERT (sgmt->State != CM_SGMT_PRESENT || -+ (sgmt->NodeId == NodeId && sgmt->Timestamp != Timestamp)); -+ -+ /* After we've connected to a new peer, we wait to receive -+ * STATEMAP_RESET before we accumulate changes and we wait for a -+ * complete map to be received before we propagate changes to other -+ * nodes. -+ * -+ * If I'm the subordinate, I can start sending maps right away, since -+ * the leader is ready for them already. If I'm the leader, I hold off -+ * sending maps until I've seen the subordinate's first heartbeat, -+ * because the subordinate might miss my NOTIFY message, still think -+ * she's a leader candidate and ignore my heartbeats. -+ */ -+ sgmt->SendMaps = (sgmt->Level == cmRail->TopLevel); /* I can send maps to my leader (she NOTIFIED me) */ -+ -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_reset (sgmt->Maps[clvl].CurrentInputMap); -+ statemap_reset (sgmt->Maps[clvl].InputMap); -+ statemap_reset (sgmt->Maps[clvl].OutputMap); -+ -+ sgmt->Maps[clvl].InputMapValid = 0; -+ sgmt->Maps[clvl].OutputMapValid = 0; -+ sgmt->Maps[clvl].SentChanges = 0; -+ -+ if (sgmt->Level == cmRail->TopLevel) /* connection to leader */ -+ { -+ ASSERT (sgmt->Sgmt == 0); -+ ASSERT (cmRail->Role == CM_ROLE_SUBORDINATE); -+ -+ if (cmRail->Levels[clvl].SubTreeMapValid) /* already got a subtree map to send up */ -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].SubTreeMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ } -+ } -+ else /* connection to subordinate */ -+ { -+ ASSERT (sgmt->Sgmt != cmRail->Levels[sgmt->Level].MySgmt); -+ -+ if (cmRail->Levels[clvl].GlobalMapValid) /* already got a global map to broadcast */ -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].GlobalMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ } -+ } -+ } -+ -+ /* Initialise sequence counters */ -+ sgmt->MsgSeq = sgmt->AckSeq = 0; -+ sgmt->MsgAcked = 1; /* ready to send a new sequenced message */ -+ -+ sgmt->State = CM_SGMT_PRESENT; -+ sgmt->NodeId = NodeId; -+ sgmt->UpdateTick = lbolt; -+ sgmt->Timestamp = Timestamp; -+} -+ -+static void -+StartSubTreeDiscovery (CM_RAIL *cmRail, CM_SGMT *sgmt) -+{ -+ sgmt->State = CM_SGMT_WAITING; -+ sgmt->UpdateTick = lbolt; -+ sgmt->WaitingTick = lbolt; -+ -+ if (sgmt->Level > 0) -+ __Schedule_Discovery (cmRail); -+} -+ -+void -+StartSubordinateDiscovery (CM_RAIL *cmRail) -+{ -+ int i; -+ int lvl = cmRail->TopLevel - 1; -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ ASSERT (lvl >= 0 && lvl < cmRail->NumLevels); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ if (i != level->MySgmt) /* No-one should connect here */ -+ StartSubTreeDiscovery (cmRail, sgmt); -+ } -+} -+ -+void -+StartLeaderDiscovery (CM_RAIL *cmRail) -+{ -+ int i; -+ int clvl; -+ CM_LEVEL *level = &cmRail->Levels[cmRail->TopLevel]; -+ -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ for (clvl = cmRail->TopLevel; clvl < cmRail->NumLevels; clvl++) -+ { -+ cmRail->Levels[clvl].GlobalMapValid = 0; -+ cmRail->Levels[clvl].SubTreeMapValid = 0; -+ level->SubordinateMapValid[clvl] = 0; -+ } -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ } -+ -+ cmRail->DiscoverStartTick = lbolt; -+ cmRail->Role = CM_ROLE_LEADER_CANDIDATE; -+ -+ __Schedule_Discovery (cmRail); -+} -+ -+static void -+RaiseTopLevel (CM_RAIL *cmRail) -+{ -+ ASSERT (cmRail->NumLevels != 0); -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ CPRINTF2 (2, "%s: RaiseTopLevel %d\n", cmRail->Rail->Name, cmRail->TopLevel + 1); -+ -+ if (++cmRail->TopLevel == cmRail->NumLevels) /* whole machine leader? */ -+ cmRail->Role = CM_ROLE_LEADER; -+ else -+ StartLeaderDiscovery (cmRail); /* look for my leader */ -+ -+ StartSubordinateDiscovery (cmRail); /* and any direct subordinates */ -+} -+ -+static void -+LowerTopLevel (CM_RAIL *cmRail, int lvl) -+{ -+ ASSERT (cmRail->NumLevels != 0); -+ ASSERT (lvl < cmRail->NumLevels); -+ -+ CPRINTF2 (2, "%s: LowerTopLevel %d\n", cmRail->Rail->Name, lvl); -+ -+ if (lvl == 0) -+ cmRail->Timestamp = lbolt; -+ -+ cmRail->TopLevel = lvl; -+ -+ StartLeaderDiscovery (cmRail); /* look for my leader */ -+} -+ -+static int -+IShouldLead (CM_RAIL *cmRail, CM_MSG *msg) -+{ -+ /* NB, this function MUST be consistently calculated on any nodes, just -+ * from the info supplied in the message. Otherwise leadership -+ * arbitration during concurrent discovery will fail. -+ */ -+ return (cmRail->NodeId < msg->Hdr.NodeId); -+} -+ -+static int -+SumCheck (CM_MSG *msg) -+{ -+ CM_HDR *hdr = &msg->Hdr; -+ uint16_t sum = hdr->Checksum; -+ uint16_t nmaps = hdr->NumMaps; -+ -+ if (nmaps > CM_MSG_MAXMAPS) { -+ printk ("SumCheck: nmaps %d > CM_MSG_MAXMAPS\n", nmaps); -+ return 0; -+ } -+ -+ if ((hdr->Type != CM_MSG_TYPE_HEARTBEAT) && nmaps != 0) { -+ printk ("SumCheck: type(%d) not HEARTBEAT and nmaps(%d) != 0\n", hdr->Type, nmaps); -+ return 0; -+ } -+ -+ hdr->Checksum = 0; -+ -+ if (CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)) != sum) { -+ printk ("SumCheck: checksum failed %x %x\n", CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)), sum); -+ -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static void -+ProcessMessage (EP_RAIL *rail, void *arg, void *msgbuf) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ CM_MSG *msg = (CM_MSG *) msgbuf; -+ CM_HDR *hdr = &msg->Hdr; -+ int lvl; -+ int sidx; -+ CM_LEVEL *level; -+ CM_SGMT *sgmt; -+ bitmap_t seg; -+ int i; -+ int delay; -+ static long tlast; -+ static int count; -+ -+ /* Poll the message Version field until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; hdr->Version == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ /* Display a message every 60 seconds if we see an "old" format message */ -+ if (hdr->Version == EP_SYSTEMQ_UNRECEIVED && (((lbolt - tlast) > 60*HZ) ? (count = 0) : ++count) < 1) -+ { -+ printk ("%s: received old protocol message (type %d from node %d)\n", cmRail->Rail->Name, -+ ((uint8_t *) msg)[20], ((uint16_t *) msg)[4]); -+ -+ tlast = lbolt; -+ goto finished; -+ } -+ -+ if (hdr->Version != CM_MSG_VERSION || hdr->ParamHash != cmRail->ParamHash || hdr->MachineId != MachineId) -+ { -+ CPRINTF8 (1, "%s: invalid message : Version %08x (%08x) ParamHash %08x (%08x) MachineId %04x (%04x) Nodeid %d\n", cmRail->Rail->Name, -+ hdr->Version, CM_MSG_VERSION, hdr->ParamHash, cmRail->ParamHash, hdr->MachineId, MachineId, hdr->NodeId); -+ goto finished; -+ } -+ -+ if (!SumCheck (msg)) -+ { -+ printk ("%s: checksum failed on msg from %d?\n", cmRail->Rail->Name, hdr->NodeId); -+ goto finished; -+ } -+ -+ if (hdr->NodeId == cmRail->NodeId) /* ignore my own broadcast */ -+ { -+ CPRINTF3 (6, "%s: node %d type %d: ignored (MESSAGE FROM ME)\n", -+ cmRail->Rail->Name, hdr->NodeId, hdr->Type); -+ -+ if (hdr->Type != CM_MSG_TYPE_DISCOVER_LEADER && hdr->Type != CM_MSG_TYPE_RESOLVE_LEADER) -+ printk ("%s: node %d type %d: ignored (MESSAGE FROM ME)\n", -+ cmRail->Rail->Name, hdr->NodeId, hdr->Type); -+ goto finished; -+ } -+ -+ lvl = hdr->Level; -+ level = &cmRail->Levels[lvl]; -+ -+ if (BrokenLevel != -1 && (lvl >= ((BrokenLevel >> (cmRail->Rail->Number*4)) & 0xf))) /* Simulate broken network? */ -+ goto finished; -+ -+ if (lvl >= cmRail->NumLevels || /* from outer space */ -+ hdr->NodeId < level->MinNodeId || /* from outside this level's subtree */ -+ hdr->NodeId >= level->MinNodeId + level->NumNodes) -+ { -+ printk ("%s: lvl %d node %d type %d: ignored (%s)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId, hdr->Type, -+ lvl >= cmRail->NumLevels ? "level too big for machine" : "outside subtree"); -+ goto finished; -+ } -+ -+ sidx = SegmentNo (cmRail, hdr->NodeId, lvl); -+ sgmt = &level->Sgmts[sidx]; -+ -+ switch (hdr->Type) -+ { -+ case CM_MSG_TYPE_RESOLVE_LEADER: -+ if (lvl >= cmRail->TopLevel) -+ { -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d RESOLVE_LEADER: ignored (above my level)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ /* someone else thinks they lead at the same level as me */ -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d RESOLVE_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d RESOLVE_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ -+ SendMessage (cmRail, hdr->NodeId, lvl, CM_MSG_TYPE_REJOIN); -+ break; -+ -+ case CM_MSG_TYPE_DISCOVER_LEADER: -+ if (lvl > cmRail->TopLevel) -+ { -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: ignored (above my level)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ if (sidx == level->MySgmt) /* someone I led thinks they lead some of my subtrees */ -+ { -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d DISCOVER_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ if (lvl < cmRail->TopLevel) /* I'm the leader of this level */ -+ { -+ if (sgmt->State == CM_SGMT_PRESENT && /* someone thinks someone I lead is dead */ -+ sgmt->NodeId != hdr->NodeId) -+ { -+ /* My subordinate's death could be noticed by one of her peers -+ * before I do. If she _is_ dead, I'll notice before long and -+ * NOTIFY this discover. If this discover completes before I -+ * detect my subordinate's death, the discovering node will -+ * try to take over from me, and then I'll RESET her. -+ */ -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: ignored (got established subordinate)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ return; -+ } -+ -+ if (sgmt->State != CM_SGMT_PRESENT || /* New connection */ -+ sgmt->Timestamp != hdr->Timestamp) /* new incarnation */ -+ StartConnecting (cmRail, sgmt, hdr->NodeId, hdr->Timestamp); -+ -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: !NOTIFY)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_NOTIFY); -+ break; -+ } -+ -+ ASSERT (lvl == cmRail->TopLevel); -+ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ /* I think my leader is alive, in which case she'll NOTIFY this -+ * DISCOVER. If she's dead, I'll start to become a leader -+ * candidate and handle this appropriately. -+ */ -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: ignored (I'm a subordinate)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ ASSERT (cmRail->Role == CM_ROLE_LEADER_CANDIDATE); -+ -+ /* A peer at this level is bidding for leadership along with me */ -+ if (IShouldLead (cmRail, msg)) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: but I should lead\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ /* So there _is_ someone there; She'll be seeing my DISCOVER -+ * messages and extending her discovery period, so that when I -+ * become leader, I'll NOTIFY her. In the meantime I'll flag her -+ * activity, so she remains WAITING. -+ */ -+ sgmt->UpdateTick = lbolt; -+ break; -+ } -+ -+ /* Defer to sender... */ -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: delaying me becoming leader\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ StartLeaderDiscovery (cmRail); -+ break; -+ -+ case CM_MSG_TYPE_DISCOVER_SUBORDINATE: -+ if (lvl <= cmRail->TopLevel) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (from my subtree)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (I'm not looking for a leader)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (hdr->Level > cmRail->BroadcastLevel && AFTER (lbolt, cmRail->BroadcastLevelTick + EP_WITHDRAW_TIMEOUT)) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (broadcast level too low)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ CPRINTF3 (2, "%s: lvl %d node %d DISCOVER_SUBORDINATE: !IMCOMING\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_IMCOMING); -+ break; -+ -+ case CM_MSG_TYPE_IMCOMING: -+ if (lvl > cmRail->TopLevel || /* from peer or node above me */ -+ sgmt->State == CM_SGMT_PRESENT || /* already got a subtree */ -+ sgmt->State == CM_SGMT_ABSENT) /* already written off this subtree */ -+ { -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d IMCOMING: ignored\n", cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d IMCOMING: waiting...\n", cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ sgmt->State = CM_SGMT_COMING; -+ sgmt->UpdateTick = lbolt; -+ break; -+ -+ case CM_MSG_TYPE_NOTIFY: -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE || /* I'm not looking for a leader */ -+ lvl != cmRail->TopLevel) /* at this level */ -+ { -+ /* If this person really should be my leader, my existing leader -+ * will time out, and I'll discover this one. */ -+ CPRINTF4 (2, "%s: lvl %d node %d NOTIFY: ignored (%s)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId, -+ lvl < cmRail->TopLevel ? "already leader" : -+ lvl > cmRail->TopLevel ? "lvl too high" : "already subordinate"); -+ break; -+ } -+ -+ CPRINTF3 (2, "%s: lvl %d node %d NOTIFY: becoming subordinate\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ cmRail->Role = CM_ROLE_SUBORDINATE; /* Now I've found my level */ -+ StartConnecting (cmRail, &level->Sgmts[0], hdr->NodeId, hdr->Timestamp); -+ break; -+ -+ case CM_MSG_TYPE_HEARTBEAT: -+ if (lvl > cmRail->TopLevel) -+ { -+ CPRINTF3 (2, "%s: lvl %d node %d H/BEAT: ignored (lvl too high)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (lvl == cmRail->TopLevel) /* heartbeat from my leader */ -+ { -+ if (cmRail->Role == CM_ROLE_LEADER_CANDIDATE) /* but I've not got one */ -+ { -+ /* I'm probably a new incarnation of myself; I'll keep doing -+ * discovery until my previous existence's leader NOTIFY's me. -+ * If I was this node's leader, she'll time me out (I'm not -+ * sending heartbeats to her) and we'll fight it out for -+ * leadership. */ -+ CPRINTF3 (2, "%s: lvl %d node %d H/BEAT ignored (no leader)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ sidx = 0; -+ sgmt = &level->Sgmts[0]; -+ } -+ -+ if (sgmt->State != CM_SGMT_PRESENT || /* not fully connected with this guy */ -+ sgmt->NodeId != hdr->NodeId || /* someone else impersonating my peer */ -+ sgmt->Timestamp != hdr->Timestamp) /* new incarnation of my peer */ -+ { -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d H/BEAT: !REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d H/BEAT: !REJOIN %s\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, -+ sgmt->State != CM_SGMT_PRESENT ? "not present" : -+ sgmt->NodeId != hdr->NodeId ? "someone else" : "new incarnation"); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ if (!((hdr->Seq == sgmt->AckSeq) || /* NOT duplicate message or */ -+ (hdr->Seq == (CM_SEQ)(sgmt->AckSeq + 1))) || /* expected message */ -+ !((hdr->AckSeq == sgmt->MsgSeq) || /* NOT expected ack or */ -+ (hdr->AckSeq == (CM_SEQ)(sgmt->MsgSeq - 1)))) /* duplicate ack */ -+ { -+ CPRINTF9 (1, "%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (out-of-seq) M(%d,a%d) S%d,A%d\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ (int)hdr->Seq, (int)hdr->AckSeq, (int)sgmt->MsgSeq, (int)sgmt->AckSeq); -+ -+ printk ("%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (out-of-seq) M(%d,a%d) S%d,A%d\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ (int)hdr->Seq, (int)hdr->AckSeq, (int)sgmt->MsgSeq, (int)sgmt->AckSeq); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ IncrStat (cmRail, HeartbeatsRcvd); -+ -+ sgmt->UpdateTick = lbolt; -+ sgmt->SendMaps = 1; -+ -+ if (sgmt->MsgSeq == hdr->AckSeq) /* acking current message */ -+ sgmt->MsgAcked = 1; /* can send the next one */ -+ -+ if (hdr->Seq == sgmt->AckSeq) /* discard duplicate (or NULL heartbeat) */ -+ { -+ CPRINTF6 (6, "%s: lvl %d sidx %d node %d type %d: %s H/BEAT\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ hdr->NumMaps == 0 ? "null" : "duplicate"); -+ break; -+ } -+ -+ CPRINTF7 (6, "%s: lvl %d sidx %d node %d type %d: seq %d maps %d H/BEAT\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, hdr->Seq, hdr->NumMaps); -+ -+ sgmt->AckSeq = hdr->Seq; /* ready to receive next one */ -+ -+ for (i = 0; i < hdr->NumMaps; i++) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(i)]; -+ int clvl = map->level; -+ -+ if (clvl < 0) /* end of message */ -+ break; -+ -+ if (clvl < sgmt->Level) /* bad level */ -+ { -+ CPRINTF6 (1, "%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (bad clevel %d)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, clvl); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ goto finished; -+ } -+ -+ if (map->offset == STATEMAP_NOMORECHANGES) /* end of atomic changes */ -+ { -+ if (!sgmt->Maps[clvl].InputMapValid || /* not set InputMap yet */ -+ statemap_changed (sgmt->Maps[clvl].CurrentInputMap)) /* previously applied changes */ -+ { -+ CPRINTF3 (4, "%s: received new clvl %d map from %d\n", cmRail->Rail->Name, clvl, sgmt->NodeId); -+ -+ statemap_setmap (sgmt->Maps[clvl].InputMap, sgmt->Maps[clvl].CurrentInputMap); -+ sgmt->Maps[clvl].InputMapValid = 1; -+ -+ statemap_clearchanges (sgmt->Maps[clvl].CurrentInputMap); -+ } -+ continue; -+ } -+ -+ seg = ((bitmap_t)map->seg[0]) -+ | (((bitmap_t)map->seg[1]) << 16) -+#if (BT_ULSHIFT == 6) -+ | (((bitmap_t)map->seg[2]) << 32) -+ | (((bitmap_t)map->seg[3]) << 48) -+#elif (BT_ULSHIFT != 5) -+#error "Bad value for BT_ULSHIFT" -+#endif -+ ; -+ statemap_setseg (sgmt->Maps[clvl].CurrentInputMap, map->offset, seg); -+ } -+ break; -+ -+ case CM_MSG_TYPE_REJOIN: -+ CPRINTF5 (1, "%s: lvl %d sidx %d node %d type %d: REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type); -+ printk ("%s: lvl %d sidx %d node %d type %d: REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type); -+ -+ LowerTopLevel (cmRail, 0); -+ -+ IncrStat (cmRail, RejoinRequest); -+ break; -+ -+ default: -+ printk ("%s: lvl=%d unknown message type %d\n", cmRail->Rail->Name, lvl, hdr->Type); -+ break; -+ } -+ finished: -+ hdr->Version = EP_SYSTEMQ_UNRECEIVED; -+} -+ -+static void -+PollInputQueues (CM_RAIL *cmRail) -+{ -+ ep_poll_inputq (cmRail->Rail, cmRail->IntrQueue, 0, ProcessMessage, cmRail); -+ ep_poll_inputq (cmRail->Rail, cmRail->PolledQueue, 0, ProcessMessage, cmRail); -+} -+ -+static void -+IntrQueueCallback (EP_RAIL *rail, void *arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ unsigned long flags; -+ -+ /* If the lock is held, then don't bother spinning for it, -+ * since the messages will be received at this, or the -+ * next heartbeat */ -+ local_irq_save (flags); -+ if (spin_trylock (&cmRail->Lock)) -+ { -+ if (AFTER (lbolt, cmRail->NextRunTime + MSEC2TICKS(CM_TIMER_SCHEDULE_TIMEOUT))) -+ printk ("%s: heartbeat timer stuck - scheduled\n", cmRail->Rail->Name); -+ else -+ ep_poll_inputq (rail, cmRail->IntrQueue, 0, ProcessMessage, cmRail); -+ spin_unlock (&cmRail->Lock); -+ } -+ local_irq_restore (flags); -+} -+ -+char * -+sprintClPeers (char *str, CM_RAIL *cmRail, int clvl) -+{ -+ int clLo = cmRail->Levels[clvl].MinNodeId; -+ int clHi = clLo + cmRail->Levels[clvl].NumNodes - 1; -+ int subClLo = (clvl == 0) ? cmRail->NodeId : cmRail->Levels[clvl - 1].MinNodeId; -+ int subClHi = subClLo + ((clvl == 0) ? 0 : cmRail->Levels[clvl - 1].NumNodes - 1); -+ -+ if (subClHi == clHi) -+ sprintf (str, "[%d-%d]", clLo, subClLo - 1); -+ else if (subClLo == clLo) -+ sprintf (str, "[%d-%d]", subClHi + 1, clHi); -+ else -+ sprintf (str, "[%d-%d][%d-%d]", clLo, subClLo - 1, subClHi + 1, clHi); -+ -+ return (str); -+} -+ -+static void -+RestartComms (CM_RAIL *cmRail, int clvl) -+{ -+ int base; -+ int nodeId; -+ int lstat; -+ int numClNodes; -+ int subClMin; -+ int subClMax; -+ int myClId; -+ int thisClId; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ base = myClId * CM_GSTATUS_BITS; -+ numClNodes = cmRail->Levels[clvl].NumNodes; -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ cmRail->Levels[clvl].Restarting = 1; -+ -+ if (cmRail->Levels[clvl].Online) -+ { -+ cmRail->Levels[clvl].Online = 0; -+ -+ for (thisClId = 0; thisClId < numClNodes; thisClId++) -+ { -+ if (thisClId == subClMin) /* skip sub-cluster; it's just someone in this cluster */ -+ { /* that wants me to restart */ -+ thisClId = subClMax; -+ continue; -+ } -+ -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ /* gstat must == RUNNING */ -+ cmRail->Levels[clvl].Connected--; -+ break; -+ case EP_NODE_DISCONNECTED: -+ /* CLOSING || STARTING || (lstat & RESTART) */ -+ break; -+ } -+ } -+ } -+ } -+} -+ -+static void -+UpdateGlobalStatus (CM_RAIL *cmRail) -+{ -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ int nodeId; -+ int offset; -+ int base; -+ bitmap_t gstat; -+ bitmap_t lgstat; -+ bitmap_t lstat; -+ int clvl; -+ int numClNodes; -+ int subClMin; -+ int subClMax; -+ int myClId; -+ int thisClId; -+ int lastClId; -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (!cmRail->Levels[clvl].GlobalMapValid || /* not got the global map yet */ -+ !statemap_changed (cmRail->Levels[clvl].GlobalMap)) /* no changes to respond to */ -+ { -+ CPRINTF2 (6, "%s: Got invalid or unchanged clvl %d global map\n", cmRail->Rail->Name, clvl); -+ continue; -+ } -+ -+ CPRINTF2 (5, "%s: Got valid changed clvl %d global map\n", cmRail->Rail->Name, clvl); -+ -+ lastClId = -1; -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ numClNodes = cmRail->Levels[clvl].NumNodes; -+ -+ while ((offset = statemap_findchange (cmRail->Levels[clvl].GlobalMap, &gstat, 1)) >= 0) -+ { -+ /* -+ * Check every node that this segment covers - however -+ * if the last node we checked in the previous segmemt -+ * is also the first node in this segment, then skip -+ * it. -+ */ -+ if ((thisClId = (offset/CM_GSTATUS_BITS)) == lastClId) -+ thisClId++; -+ lastClId = (offset + BT_NBIPUL - 1)/CM_GSTATUS_BITS; -+ -+ /* check each node that might have changed */ -+ for ( ; thisClId <= lastClId && thisClId < numClNodes; thisClId++) -+ { -+ base = thisClId * CM_GSTATUS_BITS; -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ -+ if (thisClId >= subClMin && thisClId <= subClMax) /* skip sub-cluster */ -+ continue; -+ -+ /* This isn't me; I need to sense what this node is driving -+ * (just the starting and running bits) and respond -+ * appropriately... -+ */ -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ if (lgstat == gstat) /* no change in peer state */ -+ continue; -+ -+ CPRINTF5 (3, "%s: Node %d: lgstat %s, gstat %s, lstat %s\n", cmRail->Rail->Name, nodeId, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ /* What I'm currently driving as my acknowledgement */ -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ -+ switch (gstat) -+ { -+ case CM_GSTATUS_STARTING: -+ if ((lgstat == CM_GSTATUS_ABSENT || lgstat == CM_GSTATUS_CLOSING) && lstat == CM_GSTATUS_MAY_START) -+ { -+ CPRINTF2 (1, "%s: ===================node %d STARTING\n", cmRail->Rail->Name, nodeId); -+ -+ ASSERT (cmRail->Rail->Nodes[nodeId].State == EP_NODE_DISCONNECTED); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ continue; -+ } -+ break; -+ -+ case CM_GSTATUS_RUNNING: -+ if ((lgstat == CM_GSTATUS_ABSENT && lstat == CM_GSTATUS_MAY_START) || -+ (lgstat == CM_GSTATUS_STARTING && lstat == CM_GSTATUS_MAY_RUN)) -+ { -+ CPRINTF3 (1, "%s: ===================node %d%s RUNNING\n", cmRail->Rail->Name, nodeId, -+ lgstat == CM_GSTATUS_ABSENT ? " Already" : ""); -+ -+ ASSERT (cmRail->Rail->Nodes[nodeId].State == EP_NODE_DISCONNECTED); -+ -+ if (cmRail->Levels[clvl].Online) -+ { -+ ep_connect_node (cmRail->Rail, nodeId); -+ -+ cmRail->Levels[clvl].Connected++; -+ } -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ continue; -+ } -+ break; -+ -+ case CM_GSTATUS_CLOSING: -+ CPRINTF4 (1, "%s: ===================node %d CLOSING%s%s\n", cmRail->Rail->Name, nodeId, -+ (lstat & CM_GSTATUS_RESTART) ? " for Restart" : "", -+ cmRail->Levels[clvl].Online ? "" : " (offline)"); -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ cmRail->Levels[clvl].Connected--; -+ /* DROPTHROUGH */ -+ case EP_NODE_DISCONNECTED: -+ lstat = CM_GSTATUS_MAY_START; -+ break; -+ } -+ } -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_START) /* clear restart if we've disconnected */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ continue; -+ -+ default: -+ break; -+ } -+ -+ /* "unexpected" state change forces me to ask her to restart */ -+ if (! (lstat & CM_GSTATUS_RESTART)) /* not requesting restart already */ -+ { -+ CPRINTF5 (1, "%s: ===================node %d %s, old %s new %s\n", cmRail->Rail->Name, nodeId, -+ (gstat == CM_GSTATUS_ABSENT) ? "ABSENT" : "REQUEST RESTART", -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId)); -+ -+ /* request restart */ -+ if (cmRail->Levels[clvl].Online && lstat == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ cmRail->Levels[clvl].Connected--; -+ /* DROPTHROUGH */ -+ case EP_NODE_DISCONNECTED: -+ lstat = CM_GSTATUS_MAY_START; -+ break; -+ } -+ } -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, lstat | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ continue; -+ } -+ -+ continue; -+ } -+ } -+ -+ /* Now check myself - see what everyone else thinks I'm doing */ -+ base = myClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS); -+ -+ if (lgstat == gstat) /* my state in this cluster hasn't changed */ -+ { -+ CPRINTF3 (6, "%s: my clvl %d global status unchanged from %s\n", cmRail->Rail->Name, -+ clvl, GlobalStatusString (cmRail->Levels[clvl].GlobalMap, myClId)); -+ goto all_done; -+ } -+ -+ if ((gstat & CM_GSTATUS_RESTART) != 0) /* someone wants me to restart */ -+ { -+ if ((lstat & CM_GSTATUS_STATUS_MASK) == CM_GSTATUS_CLOSING) /* I'm already restarting */ -+ goto all_done; -+ -+ CPRINTF2 (1, "%s: ===================RESTART REQUEST from %s\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ printk ("%s: Restart Request from %s\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ RestartComms (cmRail, clvl); -+ goto all_done; -+ } -+ -+ CPRINTF6 (5, "%s: clvl %d: lgstat %s gstat %s, lstat %s%s\n", cmRail->Rail->Name, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, myClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, myClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, myClId), -+ (gstat != lstat) ? " (IGNORED)" : ""); -+ -+ if (gstat != lstat) /* not everyone agrees with me */ -+ goto all_done; -+ -+ switch (lstat) -+ { -+ default: -+ ASSERT (0); /* I never drive this */ -+ -+ case CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START: /* I can restart now (have seen restart go away) */ -+ ASSERT (!cmRail->Levels[clvl].Online); -+ -+ CPRINTF2 (1,"%s: ===================NODES %s AGREE I MAY START\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I MAY START\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_STARTING | CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ goto all_done; -+ -+ case CM_GSTATUS_STARTING | CM_GSTATUS_MAY_RUN: -+ ASSERT (!cmRail->Levels[clvl].Online); -+ -+ CPRINTF2 (1, "%s: ===================NODES %s AGREE I MAY RUN\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I MAY RUN\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_RUNNING | CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ goto all_done; -+ -+ case CM_GSTATUS_RUNNING | CM_GSTATUS_MAY_RUN: -+ if (! cmRail->Levels[clvl].Online) -+ { -+ CPRINTF2 (1, "%s: ===================NODES %s AGREE I'M RUNNING\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I'M RUNNING\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ cmRail->Levels[clvl].Online = 1; -+ -+ for (thisClId = 0; thisClId < numClNodes; thisClId++) -+ { -+ if (thisClId == subClMin) /* skip sub-cluster */ -+ { -+ thisClId = subClMax; -+ continue; -+ } -+ -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ /* Only connect to her if I see her as running and I'm not requesting her -+ * to restart - this means that I was offline when I saw her transition -+ * to running and haven't seen her in a "bad" state since. */ -+ if (gstat == CM_GSTATUS_RUNNING && ! (lstat & CM_GSTATUS_RESTART)) -+ { -+ CPRINTF5 (1, "%s: node %d lgstat %s gstat %s, lstat %s -> CONNECT\n", cmRail->Rail->Name, nodeId, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ if (lstat == CM_GSTATUS_MAY_START) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ -+ ep_connect_node (cmRail->Rail, nodeId); -+ -+ cmRail->Levels[clvl].Connected++; -+ } -+ } -+ } -+ goto all_done; -+ } -+ -+ all_done: -+ statemap_setmap (cmRail->Levels[clvl].LastGlobalMap, cmRail->Levels[clvl].GlobalMap); -+ } -+} -+ -+static void -+ReduceGlobalMap (CM_RAIL *cmRail, int clvl) -+{ -+ int lvl; -+ int sidx; -+ int recompute; -+ CM_LEVEL *level; -+ int cTopLevel; -+ int cRole; -+ -+ if (clvl < cmRail->TopLevel) -+ { -+ cTopLevel = clvl + 1; -+ cRole = CM_ROLE_LEADER; -+ } -+ else -+ { -+ cTopLevel = cmRail->TopLevel; -+ cRole = cmRail->Role; -+ } -+ -+ /* Update cmRail->Levels[*].SubordinateMap[clvl] for all subordinate levels */ -+ for (lvl = 0; lvl < cTopLevel; lvl++) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ /* We need to recompute this level's statemap if... -+ * . Previous level's statemap has changes to propagate OR -+ * . This level's statemap has not been computed yet OR -+ * . A subordinate at this level has sent me a change. -+ * Note that we can only do this if all subordinates from this -+ * level down are present with valid statemaps, or absent (i.e. not -+ * timing out). -+ */ -+ -+ ASSERT (lvl == 0 || cmRail->Levels[lvl - 1].SubordinateMapValid[clvl]); -+ -+ recompute = !level->SubordinateMapValid[clvl] || -+ (lvl > 0 && statemap_changed (cmRail->Levels[lvl - 1].SubordinateMap[clvl])); -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (!(sgmt->State == CM_SGMT_ABSENT || /* absent nodes contribute zeros */ -+ (sgmt->State == CM_SGMT_PRESENT && /* present nodes MUST have received a map to contribute */ -+ sgmt->Maps[clvl].InputMapValid))) -+ { -+ CPRINTF5 (5, "%s: waiting for clvl %d lvl %d seg %d node %d\n", cmRail->Rail->Name, -+ clvl, lvl, sidx, sgmt->NodeId); -+ -+ /* Gotta wait for this guy, so we can't compute this level, -+ * or any higher levels. */ -+ return; -+ } -+ -+ if (statemap_changed (sgmt->Maps[clvl].InputMap)) -+ { -+ ASSERT (sgmt->Maps[clvl].InputMapValid); -+ -+ recompute = 1; -+ -+ CPRINTF7 (5, "%s: %s clvl %d map from @ %d %d (%d) - %s\n", -+ cmRail->Rail->Name, sgmt->State == CM_SGMT_ABSENT ? "newly absent" : "got new", -+ clvl, lvl, sidx, sgmt->NodeId, -+ MapString ("Input", sgmt->Maps[clvl].InputMap, cmRail->Levels[clvl].NumNodes, "")); -+ } -+ } -+ -+ if (recompute) -+ { -+ if (lvl == 0) -+ statemap_reset (cmRail->Levels[clvl].TmpMap); -+ else -+ { -+ ASSERT (cmRail->Levels[lvl - 1].SubordinateMapValid[clvl]); -+ -+ statemap_copy (cmRail->Levels[clvl].TmpMap, cmRail->Levels[lvl - 1].SubordinateMap[clvl]); -+ statemap_clearchanges (cmRail->Levels[lvl - 1].SubordinateMap[clvl]); -+ } -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State != CM_SGMT_ABSENT) /* absent nodes contribute zeroes */ -+ { -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ ASSERT (sgmt->Maps[clvl].InputMapValid); -+ statemap_ormap (cmRail->Levels[clvl].TmpMap, sgmt->Maps[clvl].InputMap); -+ } -+ statemap_clearchanges (sgmt->Maps[clvl].InputMap); -+ } -+ -+ statemap_setmap (level->SubordinateMap[clvl], cmRail->Levels[clvl].TmpMap); -+ level->SubordinateMapValid[clvl] = 1; -+ -+ CPRINTF4 (5, "%s: recompute clvl %d level %d statemap - %s\n", cmRail->Rail->Name, clvl, lvl, -+ MapString ("level", level->SubordinateMap[clvl], cmRail->Levels[clvl].NumNodes, "")); -+ } -+ } -+ -+ if (cRole == CM_ROLE_LEADER_CANDIDATE) /* don't know this cluster's leader yet */ -+ return; -+ -+ ASSERT (cTopLevel == 0 || cmRail->Levels[cTopLevel - 1].SubordinateMapValid[clvl]); -+ -+ /* Update SubTreeMap */ -+ -+ if (!cmRail->Levels[clvl].SubTreeMapValid || -+ statemap_changed (cmRail->Levels[clvl].LocalMap) || -+ (cTopLevel > 0 && statemap_changed (cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]))) -+ { -+ statemap_copy (cmRail->Levels[clvl].TmpMap, cmRail->Levels[clvl].LocalMap); -+ statemap_clearchanges (cmRail->Levels[clvl].LocalMap); -+ -+ if (cTopLevel > 0) -+ { -+ statemap_ormap (cmRail->Levels[clvl].TmpMap, cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]); -+ statemap_clearchanges (cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]); -+ } -+ -+ statemap_setmap (cmRail->Levels[clvl].SubTreeMap, cmRail->Levels[clvl].TmpMap); -+ cmRail->Levels[clvl].SubTreeMapValid = 1; -+ -+ CPRINTF3 (5, "%s: recompute clvl %d subtree map - %s\n", cmRail->Rail->Name, clvl, -+ MapString ("subtree", cmRail->Levels[clvl].SubTreeMap, cmRail->Levels[clvl].NumNodes, "")); -+ } -+ -+ if (cRole == CM_ROLE_SUBORDINATE) /* got a leader (Not me) */ -+ { /* => send SubTreeMap to her */ -+ CM_SGMT *leader = &cmRail->Levels[cmRail->TopLevel].Sgmts[0]; -+ -+ ASSERT (leader->State == CM_SGMT_PRESENT); -+ ASSERT (cmRail->Levels[clvl].SubTreeMapValid); -+ -+ if (!leader->Maps[clvl].OutputMapValid || -+ statemap_changed (cmRail->Levels[clvl].SubTreeMap)) -+ { -+ statemap_setmap (leader->Maps[clvl].OutputMap, cmRail->Levels[clvl].SubTreeMap); -+ leader->Maps[clvl].OutputMapValid = 1; -+ -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ -+ CPRINTF3 (5, "%s: sending clvl %d subtree map to leader (%d)\n", cmRail->Rail->Name, clvl, leader->NodeId); -+ } -+ } -+} -+ -+void -+BroadcastGlobalMap (CM_RAIL *cmRail, int clvl) -+{ -+ int lvl; -+ int sidx; -+ CM_LEVEL *level; -+ CM_SGMT *leader; -+ int cTopLevel; -+ int cRole; -+ -+ if (clvl < cmRail->TopLevel) -+ { -+ cTopLevel = clvl + 1; -+ cRole = CM_ROLE_LEADER; -+ } -+ else -+ { -+ cTopLevel = cmRail->TopLevel; -+ cRole = cmRail->Role; -+ } -+ -+ switch (cRole) -+ { -+ default: -+ ASSERT (0); -+ -+ case CM_ROLE_LEADER_CANDIDATE: /* don't know this cluster's leader yet */ -+ return; -+ -+ case CM_ROLE_LEADER: /* cluster leader: */ -+ ASSERT (clvl < cmRail->TopLevel); /* set GlobalMap from SubTreeMap */ -+ -+ if (!cmRail->Levels[clvl].SubTreeMapValid) /* can't set global map */ -+ return; -+ -+ if (cmRail->Levels[clvl].GlobalMapValid && /* already set global map */ -+ !statemap_changed (cmRail->Levels[clvl].SubTreeMap)) /* no changes to propagate */ -+ return; -+ -+ statemap_setmap (cmRail->Levels[clvl].GlobalMap, cmRail->Levels[clvl].SubTreeMap); -+ cmRail->Levels[clvl].GlobalMapValid = 1; -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ -+ CPRINTF2 (5, "%s: whole cluster %d leader setting global map\n", cmRail->Rail->Name, clvl); -+ -+ UpdateGlobalStatus (cmRail); -+ break; -+ -+ case CM_ROLE_SUBORDINATE: /* cluster subordinate: */ -+ ASSERT (clvl >= cmRail->TopLevel); /* receive GlobalMap from leader */ -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ leader = &cmRail->Levels[cmRail->TopLevel].Sgmts[0]; -+ ASSERT (leader->State == CM_SGMT_PRESENT); -+ -+ if (!leader->Maps[clvl].InputMapValid) /* can't set global map */ -+ return; -+ -+ if (cmRail->Levels[clvl].GlobalMapValid && /* already set global map */ -+ !statemap_changed (leader->Maps[clvl].InputMap)) /* no changes to propagate */ -+ return; -+ -+ statemap_setmap (cmRail->Levels[clvl].GlobalMap, leader->Maps[clvl].InputMap); -+ cmRail->Levels[clvl].GlobalMapValid = 1; -+ statemap_clearchanges (leader->Maps[clvl].InputMap); -+ -+ CPRINTF3 (5, "%s: getting clvl %d global map from leader (%d)\n", cmRail->Rail->Name, clvl, leader->NodeId); -+ -+ UpdateGlobalStatus (cmRail); -+ break; -+ } -+ -+ CPRINTF3 (5, "%s: clvl %d %s\n", cmRail->Rail->Name, clvl, -+ MapString ("global", cmRail->Levels[clvl].GlobalMap, cmRail->Levels[clvl].NumNodes, "")); -+ -+ /* Broadcast global map to all subordinates */ -+ for (lvl = 0; lvl < cTopLevel; lvl++) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].GlobalMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ -+ CPRINTF5 (5, "%s: sending clvl %d global map to subordinate %d %d (%d)\n", -+ cmRail->Rail->Name, clvl, lvl, sidx, sgmt->NodeId); -+ } -+ } -+ } -+} -+ -+static void -+CheckPeerPulse (CM_RAIL *cmRail, CM_SGMT *sgmt) -+{ -+ int clvl, sendRejoin; -+ -+ switch (sgmt->State) -+ { -+ case CM_SGMT_ABSENT: -+ break; -+ -+ case CM_SGMT_WAITING: /* waiting for a subtree */ -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_DISCOVER_TIMEOUT))) -+ break; -+ -+ CPRINTF3 (2, "%s: lvl %d subtree %d contains no live nodes\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_zero (sgmt->Maps[clvl].InputMap); /* need to start propagating zeros (flags change) */ -+ sgmt->Maps[clvl].InputMapValid = 1; /* and must indicate that the map is now valid */ -+ } -+ break; -+ -+ case CM_SGMT_COMING: /* lost/waiting subtree sent me IMCOMING */ -+ ASSERT (sgmt->Level > 0); /* we only do subtree discovery below our own level */ -+ -+ if (AFTER (lbolt, sgmt->WaitingTick + MSEC2TICKS(CM_WAITING_TIMEOUT))) -+ { -+ CPRINTF3 (1, "%s: lvl %d subtree %d waiting too long\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ printk ("%s: lvl %d subtree %d waiting too long\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_zero (sgmt->Maps[clvl].InputMap); /* need to start propagating zeros (flags change) */ -+ sgmt->Maps[clvl].InputMapValid = 1; /* and must indicate that the map is now valid */ -+ } -+ break; -+ } -+ -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_DISCOVER_TIMEOUT))) -+ break; -+ -+ CPRINTF3 (2, "%s: lvl %d subtree %d hasn't connected yet\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_WAITING; -+ sgmt->UpdateTick = lbolt; -+ -+ if (sgmt->Level > 0) -+ __Schedule_Discovery (cmRail); -+ break; -+ -+ case CM_SGMT_PRESENT: -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_HEARTBEAT_TIMEOUT))) -+ break; -+ -+ if (sgmt->Level == cmRail->TopLevel) /* leader died */ -+ { -+ sendRejoin = (sgmt->State == CM_SGMT_PRESENT && sgmt->AckSeq == 0); -+ -+ CPRINTF4 (1, "%s: leader (%d) node %d JUST DIED%s\n", -+ cmRail->Rail->Name, sgmt->Level, sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ printk ("%s: lvl %d leader (%d) JUST DIED%s\n", -+ cmRail->Rail->Name, sgmt->Level, sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ if (sendRejoin) -+ { -+ /* she's not sent us any heartbeats even though she responded to a discover -+ * so tell her to rejoin the tree at the bottom, this will mean that she -+ * has to run the heartbeat timer before being able to rejoin the tree. */ -+ SendMessage (cmRail, sgmt->NodeId, sgmt->Level, CM_MSG_TYPE_REJOIN); -+ } -+ -+ StartLeaderDiscovery (cmRail); -+ break; -+ } -+ -+ sendRejoin = (sgmt->State == CM_SGMT_PRESENT && sgmt->AckSeq == 0); -+ -+ CPRINTF5 (2, "%s: lvl %d subordinate %d (%d) JUST DIED%s\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0]), sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ printk ("%s: lvl %d subordinate %d (%d) JUST DIED%s\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0]), sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ if (sendRejoin) -+ { -+ /* she's not sent us any heartbeats even though she responded to a discover -+ * so tell her to rejoin the tree at the bottom, this will mean that she -+ * has to run the heartbeat timer before being able to rejoin the tree. */ -+ SendMessage (cmRail, sgmt->NodeId, sgmt->Level, CM_MSG_TYPE_REJOIN); -+ } -+ -+ StartSubTreeDiscovery (cmRail, sgmt); -+ break; -+ -+ default: -+ ASSERT (0); -+ } -+} -+ -+static void -+CheckPeerPulses (CM_RAIL *cmRail) -+{ -+ int lvl; -+ int sidx; -+ -+ /* check children are alive */ -+ for (lvl = 0; lvl < cmRail->TopLevel; lvl++) -+ for (sidx = 0; sidx < cmRail->Levels[lvl].NumSegs; sidx++) -+ CheckPeerPulse (cmRail, &cmRail->Levels[lvl].Sgmts[sidx]); -+ -+ /* check leader is alive */ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ ASSERT (cmRail->Levels[cmRail->TopLevel].Sgmts[0].State == CM_SGMT_PRESENT); -+ -+ CheckPeerPulse (cmRail, &cmRail->Levels[cmRail->TopLevel].Sgmts[0]); -+ } -+} -+ -+static void -+SendHeartbeats (CM_RAIL *cmRail) -+{ -+ int lvl; -+ -+ /* Send heartbeats to my children */ -+ for (lvl = 0; lvl < cmRail->TopLevel; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ int sidx; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &cmRail->Levels[lvl].Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_HEARTBEAT); -+ } -+ } -+ -+ /* Send heartbeat to my leader */ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ SendToSgmt (cmRail, &cmRail->Levels[cmRail->TopLevel].Sgmts[0], CM_MSG_TYPE_HEARTBEAT); -+ } -+} -+ -+static int -+BroadcastDiscover (CM_RAIL *cmRail) -+{ -+ int sidx; -+ int lvl; -+ int msgType; -+ CM_LEVEL *level; -+ int urgent; -+ -+ ASSERT (cmRail->TopLevel <= cmRail->NumLevels); -+ ASSERT ((cmRail->Role == CM_ROLE_LEADER) ? (cmRail->TopLevel == cmRail->NumLevels) : -+ (cmRail->Role == CM_ROLE_SUBORDINATE) ? (cmRail->Levels[cmRail->TopLevel].Sgmts[0].State == CM_SGMT_PRESENT) : -+ (cmRail->Role == CM_ROLE_LEADER_CANDIDATE)); -+ -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE) /* got a leader/lead whole machine */ -+ { -+ urgent = 0; /* non-urgent leader discovery */ -+ lvl = cmRail->TopLevel - 1; /* on nodes I lead (resolves leader conflicts) */ -+ msgType = CM_MSG_TYPE_RESOLVE_LEADER; -+ } -+ else -+ { -+ urgent = 1; /* urgent leader discovery */ -+ lvl = cmRail->TopLevel; /* on nodes I'd like to lead */ -+ msgType = CM_MSG_TYPE_DISCOVER_LEADER; -+ } -+ -+ if (lvl >= 0) -+ { -+ if (lvl > cmRail->BroadcastLevel) -+ { -+ /* Unable to broadcast at this level in the spanning tree, so we -+ * just continue doing discovery until we are able to broadcast */ -+ CPRINTF4 (6, "%s: broadcast level %d too low to discover %d at level %d\n", -+ cmRail->Rail->Name, cmRail->BroadcastLevel, msgType, lvl); -+ -+ cmRail->DiscoverStartTick = lbolt; -+ } -+ else -+ { -+ level = &cmRail->Levels[lvl]; -+ SendToSgmt (cmRail, &level->Sgmts[level->MySgmt], msgType); -+ } -+ } -+ -+ while (lvl > 0) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_WAITING) -+ { -+ ASSERT (sidx != level->MySgmt); -+ /* Do subordinate discovery. Existing subordinates will -+ * ignore it, but leader candidates will send IMCOMING. -+ * This is always urgent since we'll assume a subtree is -+ * absent if I don't get IMCOMING within the timeout. -+ */ -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_DISCOVER_SUBORDINATE); -+ urgent = 1; -+ } -+ } -+ lvl--; -+ } -+ -+ return (urgent); -+} -+ -+static void -+CheckBroadcast (CM_RAIL *cmRail) -+{ -+ int clvl; -+ -+ for (clvl = cmRail->NumLevels-1; clvl >= 0 && cmRail->Rail->SwitchBroadcastLevel < cmRail->Levels[clvl].SwitchLevel; clvl--) -+ ; -+ -+ if (cmRail->OfflineReasons || cmRail->Rail->System->Shutdown) -+ clvl = -1; -+ -+ /* if the level at which we can broadcast drops, then we must rejoin the -+ * spanning tree at the highest level for which broadcast is good. */ -+ if (cmRail->BroadcastLevel > clvl && clvl < (int)(cmRail->Role == CM_ROLE_LEADER ? cmRail->TopLevel - 1 : cmRail->TopLevel)) -+ { -+ printk ("%s: REJOINING at level %d because %s\n", cmRail->Rail->Name, clvl+1, -+ (cmRail->OfflineReasons & CM_OFFLINE_MANAGER) ? "of manager thread" : -+ (cmRail->OfflineReasons & CM_OFFLINE_PROCFS) ? "force offline" : -+ cmRail->Rail->System->Shutdown ? "system shutdown" : "broadcast level changed"); -+ LowerTopLevel (cmRail, clvl+1); -+ } -+ -+ if (cmRail->BroadcastLevel != clvl) -+ { -+ cmRail->BroadcastLevel = clvl; -+ cmRail->BroadcastLevelTick = lbolt; -+ } -+ -+ /* schedule the update thread, to withdraw from comms with -+ * nodes "outside" of the valid broadcastable range. */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (cmRail->BroadcastLevel < clvl) -+ { -+ if (AFTER (lbolt, cmRail->BroadcastLevelTick + EP_WITHDRAW_TIMEOUT) && -+ !(cmRail->Levels[clvl].OfflineReasons & CM_OFFLINE_BROADCAST)) -+ { -+ printk ("%s: Withdraw at Level %d\n", cmRail->Rail->Name, clvl); -+ cmRail->Levels[clvl].OfflineReasons |= CM_OFFLINE_BROADCAST; -+ } -+ } -+ else -+ { -+ if (cmRail->Levels[clvl].OfflineReasons & CM_OFFLINE_BROADCAST) -+ { -+ printk ("%s: Rejoin at Level %d\n", cmRail->Rail->Name, clvl); -+ cmRail->Levels[clvl].OfflineReasons &= ~CM_OFFLINE_BROADCAST; -+ } -+ } -+ } -+ -+} -+ -+static void -+CheckManager (CM_RAIL *cmRail) -+{ -+ long time, state = ep_kthread_state (&cmRail->Rail->System->ManagerThread, &time); -+ -+ if (state == KT_STATE_RUNNING && BEFORE (lbolt, time + MSEC2TICKS(CM_THREAD_RUNNING_TIMEOUT))) -+ state = KT_STATE_SLEEPING; -+ if (state != KT_STATE_SLEEPING && BEFORE (lbolt, time + MSEC2TICKS(CM_THREAD_SCHEDULE_TIMEOUT))) -+ state = KT_STATE_SLEEPING; -+ -+ if ((cmRail->OfflineReasons & CM_OFFLINE_MANAGER) && state == KT_STATE_SLEEPING) -+ { -+ printk ("%s: manager thread unstuck\n", cmRail->Rail->Name); -+ -+ cmRail->OfflineReasons &= ~CM_OFFLINE_MANAGER; -+ } -+ -+ if (!(cmRail->OfflineReasons & CM_OFFLINE_MANAGER) && state != KT_STATE_SLEEPING) -+ { -+ printk ("%s: manager thread stuck - %s\n", cmRail->Rail->Name, -+ state == KT_STATE_SCHEDULED ? "scheduled" : -+ state == KT_STATE_RUNNING ? "running" : -+ state == KT_STATE_STALLED ? "stalled" : "unknown"); -+ -+ cmRail->OfflineReasons |= CM_OFFLINE_MANAGER; -+ } -+} -+ -+static void -+CheckOfflineReasons (CM_RAIL *cmRail, int clvl) -+{ -+ int subClMin, subClMax, myClId; -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ -+ if (cmRail->Levels[clvl].OfflineReasons) -+ { -+ if (cmRail->Levels[clvl].Online) -+ { -+ printk ("%s: Withdraw from %s\n", cmRail->Rail->Name, sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ RestartComms (cmRail, clvl); -+ } -+ } -+ else -+ { -+ if (cmRail->Levels[clvl].Restarting && cmRail->Levels[clvl].Connected == 0) -+ { -+ printk ("%s: Rejoin with %s\n", cmRail->Rail->Name, sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ -+ ASSERT (statemap_getbits (cmRail->Levels[clvl].LocalMap, myClId * CM_GSTATUS_BITS, CM_GSTATUS_BITS) == -+ (CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START | CM_GSTATUS_RESTART)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, myClId * CM_GSTATUS_BITS, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ cmRail->Levels[clvl].Restarting = 0; -+ } -+ } -+} -+ -+void -+DoHeartbeatWork (CM_RAIL *cmRail) -+{ -+ long now = lbolt; -+ int clvl; -+ -+ if ((RejoinCheck || RejoinPanic) && -+ AFTER (now, cmRail->NextRunTime + MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT))) /* If I've been unresponsive for too long */ -+ { -+ /* I'd better reconnect to the network because I've not been playing the game */ -+ CPRINTF4 (1, "%s: REJOINING because I was too slow (heartbeat) [%ld,%ld,(%ld)]\n", cmRail->Rail->Name, now, cmRail->NextRunTime, (long int)MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT)); -+ printk ("%s: REJOINING because I was too slow (heartbeat) [%ld,%ld,(%ld)]\n", cmRail->Rail->Name, now, cmRail->NextRunTime, (long int)MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT)); -+ -+ LowerTopLevel (cmRail, 0); -+ -+ IncrStat (cmRail, RejoinTooSlow); -+ -+ if (RejoinPanic) -+ panic ("ep: REJOINING because I was too slow (heartbeat)\n"); -+ } -+ -+ PollInputQueues (cmRail); -+ -+ if (cmRail->NextDiscoverTime && ! BEFORE (now, cmRail->NextDiscoverTime)) -+ { -+ if (BroadcastDiscover (cmRail)) /* urgent discovery required? */ -+ cmRail->NextDiscoverTime = now + MSEC2TICKS (CM_URGENT_DISCOVER_INTERVAL); -+ else -+ cmRail->NextDiscoverTime = now + MSEC2TICKS (CM_PERIODIC_DISCOVER_INTERVAL); -+ -+ if (cmRail->Role == CM_ROLE_LEADER_CANDIDATE && AFTER (now, cmRail->DiscoverStartTick + MSEC2TICKS (CM_DISCOVER_TIMEOUT))) -+ RaiseTopLevel (cmRail); -+ } -+ -+ if (cmRail->NextHeartbeatTime && ! BEFORE (now, cmRail->NextHeartbeatTime)) -+ { -+ CheckPosition (cmRail->Rail); -+ CheckPeerPulses (cmRail); -+ CheckBroadcast (cmRail); -+ CheckManager (cmRail); -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ CheckOfflineReasons (cmRail, clvl); -+ ReduceGlobalMap (cmRail, clvl); -+ BroadcastGlobalMap (cmRail, clvl); -+ } -+ -+ SendHeartbeats (cmRail); -+ -+ /* Compute the next heartbeat time, but "drift" it towards the last -+ * periodic discovery time we saw from the whole machine leader */ -+ cmRail->NextHeartbeatTime = now + MSEC2TICKS (CM_HEARTBEAT_INTERVAL); -+ } -+ -+ if (cmRail->NextDiscoverTime && AFTER (cmRail->NextHeartbeatTime, cmRail->NextDiscoverTime)) -+ cmRail->NextRunTime = cmRail->NextDiscoverTime; -+ else -+ cmRail->NextRunTime = cmRail->NextHeartbeatTime; -+} -+ -+#define CM_SVC_INDICATOR_OFFSET(CMRAIL,CLVL,IND,NODEID) ( ( CMRAIL->Levels[CLVL].NumNodes * CM_GSTATUS_BITS ) \ -+ + ( CMRAIL->Levels[CLVL].NumNodes * IND ) \ -+ + ( NODEID - CMRAIL->Levels[CLVL].MinNodeId ) ) -+int -+cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ -+ EPRINTF2 (DBG_SVC,"cm_svc_indicator_set: rail %p ind %d\n", rail, svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC,"cm_svc_indicator_set: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId), 1, 1); -+ EPRINTF3 (DBG_SVC,"cm_svc_indicator_set: clvl %d nodeId %d offset %d\n", clvl, cmRail->NodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId)); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+int -+cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ -+ EPRINTF2 (DBG_SVC, "cm_svc_indicator_clear: rail %p ind %d\n", rail, svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_clear: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId), 0, 1); -+ EPRINTF3 (DBG_SVC, "cm_svc_indicator_clear: clvl %d nodeId %d offset %d\n", clvl, cmRail->NodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId)); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+int -+cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ bitmap_t bits; -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_is_set: rail %p ind %d nodeId %d (me=%d)\n", rail, svc_indicator, nodeId, cmRail->NodeId); -+ -+ if (svc_indicator < 0 || svc_indicator > EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_is_set: service indicator %d not registered\n", svc_indicator); -+ return (0); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (0); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ if ( clvl == cmRail->NumLevels) { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_is_set: node out of range %d \n", nodeId); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ return (0); -+ } -+ -+ if ( cmRail->NodeId == nodeId ) -+ bits = statemap_getbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ else -+ bits = statemap_getbits (cmRail->Levels[clvl].GlobalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_is_set: clvl %d nodeId %d offset %d %x\n", clvl, nodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), bits); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return ( (bits == 0) ? (0) : (1) ); -+} -+ -+int -+cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ /* or in the bit map */ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int nodeId, clvl; -+ bitmap_t bits; -+ unsigned long flags; -+ int clip_out_low, clip_out_high; -+ int curr_low, curr_high; -+ int check_low, check_high; -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_bitmap: rail %p ind %d low %d high %d\n", rail, svc_indicator, low, (low + nnodes)); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_bitmap: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State != EP_RAIL_STATE_RUNNING) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ clip_out_low = clip_out_high = -1; /* all in */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ -+ /* curr_high/low is the range of the current lvl */ -+ curr_low = cmRail->Levels[clvl].MinNodeId; -+ curr_high = cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes; -+ -+ /* find out how much of low high is in this range and only check that part */ -+ check_low = ( low < curr_low) ? curr_low : low; -+ check_high = ( (low + nnodes) > curr_high) ? curr_high : (low + nnodes); -+ -+ EPRINTF6 (DBG_SVC, "cm_svc_indicator_bitmap: curr(%d,%d) check(%d,%d) clip(%d,%d)\n", curr_low, curr_high, check_low, check_high, clip_out_low, clip_out_high); -+ -+ for(nodeId = check_low; nodeId < check_high; nodeId++) { -+ -+ if ( (clip_out_low <= nodeId) && (nodeId <= clip_out_high)) -+ nodeId = clip_out_high; /* step over the cliped out section */ -+ else { -+ -+ if ( cmRail->NodeId == nodeId ) -+ bits = statemap_getbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ else -+ bits = statemap_getbits (cmRail->Levels[clvl].GlobalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ -+ if ( bits ) { -+ EPRINTF2 (DBG_SVC, "cm_svc_indicator_bitmap: its set nodeId %d (clvl %d)\n", nodeId, clvl); -+ BT_SET ( bitmap , nodeId - low ); -+ } -+ } -+ } -+ -+ /* widen the clip out range */ -+ clip_out_low = curr_low; -+ clip_out_high = curr_high -1; -+ } -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+#if defined(PER_CPU_TIMEOUT) -+static void -+cm_percpu_timeout (void *arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ CM_TIMEOUT_DATA *hbd = &cmRail->HeartbeatTimeoutsData[current_cpu()]; -+ long now = lbolt; -+ unsigned delay = now - hbd->ScheduledAt; -+ unsigned long flags; -+ -+ if (delay > hbd->WorstDelay) -+ hbd->WorstDelay = delay; -+ if (hbd->BestDelay == 0 || delay < hbd->BestDelay) -+ hbd->BestDelay = delay; -+ -+ if (cmRail->HeartbeatTimeoutsShouldStop) -+ { -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ cmRail->HeartbeatTimeoutsStopped |= (1 << current_cpu()); -+ kcondvar_wakeupall (&cmRail->HeartbeatTimeoutsWait, &cmRail->Lock); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ return; -+ } -+ -+ if (cmRail->NextRunTime == 0 || AFTER (cmRail->NextRunTime, lbolt)) -+ hbd->EarlyCount++; -+ else if (cmRail->HeartbeatTimeoutRunning) -+ hbd->MissedCount++; -+ else -+ { -+ local_irq_save (flags); -+ -+ if (! spin_trylock (&cmRail->HeartbeatTimeoutsLock)) -+ hbd->WastedCount++; -+ else -+ { -+ cmRail->HeartbeatTimeoutRunning = 1; -+ hbd->WorkCount++; -+ -+ spin_lock (&cmRail->Lock); -+ -+ if ((delay = (lbolt - cmRail->NextRunTime)) > hbd->WorstHearbeatDelay) -+ hbd->WorstHearbeatDelay = delay; -+ if ((delay = (lbolt - now) > hbd->WorstLockDelay)) -+ hbd->WorstLockDelay = delay; -+ -+ DoHeartbeatWork (cmRail); -+ -+ spin_unlock (&cmRail->Lock); -+ spin_unlock (&cmRail->HeartbeatTimeoutsLock); -+ -+ cmRail->HeartbeatTimeoutRunning = 0; -+ } -+ local_irq_restore (flags); -+ } -+ -+ hbd->ScheduledAt = lbolt + MSEC2TICKS (CM_PERCPU_TIMEOUT_INTERVAL); -+ timeout_cpu (cm_percpu_timeout, cmRail, MSECS2TICKS (CM_PERCPU_TIMEOUT_INTERVAL), CALLOUT_TYPE|CALLOUT_NOMALLOC); -+} -+ -+static void -+StartPerCpuTimeouts (CM_RAIL *cmRail) -+{ -+ register int c; -+ -+ spin_lock_init (&cmRail->HeartbeatTimeoutsLock); -+ -+ KMEM_ZALLOC (cmRail->HeartbeatTimeoutsData, CM_TIMEOUT_DATA *, ncpus * sizeof (CM_TIMEOUT_DATA), 1); -+ -+ for (c = 0; c < cpus_in_box; c++) -+ { -+ if (cpu_to_processor (c)) -+ { -+ if (current_cpu() != c) -+ { -+ thread_bind (current_thread(), cpu_to_processor(c)); -+ mpsleep (current_thread(), 0, "StartPerCpuTimeouts", 1, NULL, 0); -+ -+ if (current_cpu() != c) -+ panic ("ep: StartPerCpuTimeouts - failed to switch cpu\n"); -+ } -+ -+ cmRail->HeartbeatTimeoutsStarted |= (1 << c); -+ cmRail->HeartbeatTimeoutsData[c].ScheduledAt = lbolt + c; -+ -+ timeout_cpu (cm_percpu_timeout, cmRail, c, CALLOUT_TYPE|CALLOUT_NOMALLOC); -+ } -+ } -+ -+ thread_bind(current_thread(), NULL); -+} -+ -+static void -+StopPerCpuTimeouts (CM_RAIL *cmRail) -+{ -+ register int c; -+ unsigned long flags; -+ -+ cmRail->HeartbeatTimeoutsShouldStop = 1; -+ -+ for (c = 0; c < cpus_in_box; c++) -+ { -+ if (cmRail->HeartbeatTimeoutsStarted & (1 << c)) -+ { -+ printk ("%s: stopping cpu_timeout on cpu %d\n", cmRail->Rail->Name, c); -+ -+ if (untimeout_cpu (cm_percpu_timeout, cmRail, c, CALLOUT_TYPE|CALLOUT_NOMALLOC, NULL)) -+ cmRail->HeartbeatTimeoutsStopped |= (1 << c); -+ } -+ } -+ thread_bind(current_thread(), NULL); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ while (cmRail->HeartbeatTimeoutsStopped != cmRail->HeartbeatTimeoutsStarted) -+ kcondvar_wait (&cmRail->HeartbeatTimeoutsWait, &cmRail->Lock, &flags); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ cmRail->HeartbeatTimeoutsStarted = 0; -+ cmRail->HeartbeatTimeoutsStopped = 0; -+ cmRail->HeartbeatTimeoutsShouldStop = 0; -+ -+ KMEM_FREE (cmRail->HeartbeatTimeoutsData, ncpus * sizeof (CM_TIMEOUT_DATA)); -+ -+ spin_lock_destroy (&cmRail->HeartbeatTimeoutsLock); -+} -+ -+#else -+ -+static void -+cm_heartbeat_timer (unsigned long arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ ASSERT (cmRail->Rail->State == EP_RAIL_STATE_RUNNING); -+ -+ DoHeartbeatWork (cmRail); -+ -+ __Schedule_Timer (cmRail, cmRail->NextRunTime); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+#endif /* defined(PER_CPU_TIMEOUT) */ -+ -+ -+ -+void -+DisplayRailDo (DisplayInfo *di, EP_RAIL *rail) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int i, j; -+ -+ if (rail->State != EP_RAIL_STATE_RUNNING) -+ return; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ (di->func)(di->arg, "NodeId=%d NodeLevel=%d NumLevels=%d NumNodes=%d\n", -+ cmRail->NodeId, cmRail->TopLevel, cmRail->NumLevels, cmRail->Rail->Position.pos_nodes); -+ -+ (di->func)(di->arg, "["); -+ -+ for (i = 0; i < cmRail->NumLevels; i++) -+ { -+ if (i > 0) -+ (di->func)(di->arg, ","); -+ -+ if (i < cmRail->TopLevel) -+ { -+ (di->func)(di->arg, "L "); -+ -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ switch (cmRail->Levels[i].Sgmts[j].State) -+ { -+ case CM_SGMT_PRESENT: (di->func)(di->arg, "p%-4d", cmRail->Levels[i].Sgmts[j].NodeId); break; -+ case CM_SGMT_WAITING: (di->func)(di->arg, "w%4s", ""); break; -+ case CM_SGMT_COMING: (di->func)(di->arg, "c%4s", ""); break; -+ case CM_SGMT_ABSENT: (di->func)(di->arg, ".%4s", ""); break; -+ default: (di->func)(di->arg, "?%4s", ""); break; -+ } -+ } -+ else -+ switch (cmRail->Role) -+ { -+ case CM_ROLE_LEADER_CANDIDATE: -+ (di->func)(di->arg,"l "); -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ (di->func)(di->arg," "); -+ break; -+ -+ case CM_ROLE_SUBORDINATE: -+ switch (cmRail->Levels[i].Sgmts[0].State) -+ { -+ case CM_SGMT_PRESENT: (di->func)(di->arg, "p%-4d", cmRail->Levels[i].Sgmts[0].NodeId); break; -+ case CM_SGMT_WAITING: (di->func)(di->arg, "w%4s", ""); break; -+ case CM_SGMT_COMING: (di->func)(di->arg, "c%4s", ""); break; -+ case CM_SGMT_ABSENT: (di->func)(di->arg, ".%4s", ""); break; -+ default: (di->func)(di->arg, "?%4s", ""); break; -+ } -+ for (j = 1; j < cmRail->Levels[i].NumSegs; j++) -+ (di->func)(di->arg, " "); -+ break; -+ -+ default: -+ (di->func)(di->arg, "####"); -+ break; -+ } -+ } -+ (di->func)(di->arg, "]\n"); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+DisplayRail (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ DisplayRailDo (&di_ep_debug, rail); -+} -+ -+void -+DisplayStatus (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ DisplayNodeMaps (&di_ep_debug, cmRail); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+} -+ -+void -+DisplaySegs (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ DisplayNodeSgmts (&di_ep_debug, cmRail); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+} -+ -+static void -+LoadBroadcastRoute (CM_RAIL *cmRail, int lvl, int sidx) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int nsegs = cmRail->Levels[0].NumSegs; -+ int vp = EP_VP_BCAST(lvl, sidx); -+ int nodes = 1; -+ int baseNode; -+ int i; -+ -+ ASSERT (lvl > 0 && lvl <= cmRail->NumLevels); -+ ASSERT (sidx == 0 || lvl < cmRail->NumLevels); -+ -+ ASSERT (vp >= EP_VP_BCAST_BASE && vp < EP_VP_BCAST_BASE + EP_VP_BCAST_COUNT); -+ -+ for (i = 1; i <= lvl; i++) -+ { -+ nodes *= nsegs; -+ nsegs = (i == cmRail->NumLevels) ? 1 : cmRail->Levels[i].NumSegs; -+ } -+ -+ baseNode = ((cmRail->NodeId / (nodes * nsegs)) * nsegs + sidx) * nodes; -+ -+ CPRINTF5 (2, "%s: broadcast vp lvl %d sidx %d [%d,%d]\n", -+ cmRail->Rail->Name, lvl, sidx, baseNode, baseNode + nodes - 1); -+ -+ rail->Operations.LoadSystemRoute (rail, vp, baseNode, baseNode + nodes - 1); -+} -+ -+static void -+LoadRouteTable (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int i, j; -+ -+ if (cmRail->NumNodes > EP_MAX_NODES) -+ { -+ printk ("More nodes (%d) than point-to-point virtual process table entries (%d)\n", cmRail->NumNodes, EP_MAX_NODES); -+ panic ("LoadRouteTable\n"); -+ } -+ -+ for (i = 0; i < cmRail->NumNodes; i++) -+ rail->Operations.LoadSystemRoute (rail, EP_VP_NODE(i), i, i); -+ -+ /* Generate broadcast routes for subtrees */ -+ for (i = 1; i < cmRail->NumLevels; i++) -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ LoadBroadcastRoute (cmRail, i, j); -+ -+ /* Generate broadcast route for whole machine */ -+ LoadBroadcastRoute (cmRail, cmRail->NumLevels, 0); -+ -+ /* Finally invalidate all the data routes */ -+ for (i = 0; i < cmRail->NumNodes; i++) -+ rail->Operations.UnloadNodeRoute (cmRail->Rail, i); -+} -+ -+void -+cm_node_disconnected (EP_RAIL *rail, unsigned nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int base, lstat, lgstat; -+ int clvl, subClMin, subClMax; -+ int thisClId, myClId; -+ unsigned long flags; -+ -+ ASSERT (nodeId != cmRail->NodeId); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ thisClId = nodeId - cmRail->Levels[clvl].MinNodeId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ ASSERT ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN); -+ -+ CPRINTF7 (2, "%s: cm_node_disconnected: Node %d: clvl %d, lgstat %s, gstat %s, lstat %s -> %sMAY_START\n", -+ cmRail->Rail->Name, nodeId, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId), -+ ((lgstat != CM_GSTATUS_CLOSING) && (lstat & CM_GSTATUS_RESTART)) ? "RESTART|" : ""); -+ -+ switch (lgstat) -+ { -+ case CM_GSTATUS_CLOSING: -+ /* delayed ack of closing - set MAY_START and clear RESTART */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ break; -+ case CM_GSTATUS_STARTING: -+ case CM_GSTATUS_RUNNING: -+ IASSERT (! cmRail->Levels[clvl].Online || lstat & CM_GSTATUS_RESTART); -+ break; -+ case CM_GSTATUS_ABSENT: -+ IASSERT (lstat & CM_GSTATUS_RESTART); -+ } -+ -+ cmRail->Levels[clvl].Connected--; -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+cm_restart_node (EP_RAIL *rail, unsigned nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int base, lstat, lgstat; -+ int clvl, subClMin, subClMax; -+ int thisClId, myClId; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ if (nodeId == rail->Position.pos_nodeid) -+ { -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ RestartComms (cmRail, clvl); -+ } -+ else -+ { -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ thisClId = nodeId - cmRail->Levels[clvl].MinNodeId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ CPRINTF6 (2, "%s: cm_restart_node: Node %d: clvl %d, lgstat %s, gstat %s, lstat %s\n", -+ cmRail->Rail->Name, nodeId, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ if (lgstat != CM_GSTATUS_CLOSING) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, lstat | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ if (offline) -+ cmRail->OfflineReasons |= reason; -+ else -+ cmRail->OfflineReasons &= ~reason; -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+static void -+cm_remove_rail (EP_SUBSYS *subsys, EP_SYS *epsys, EP_RAIL *rail) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ CM_RAIL *cmRail = sys->Rails[rail->Number]; -+ int i, lvl, clvl; -+ -+ cm_procfs_rail_fini (cmRail); -+ -+ sys->Rails[rail->Number] = NULL; -+ rail->ClusterRail = NULL; -+ -+#if defined(PER_CPU_TIMEOUT) -+ StopPerCpuTimeouts (cmRail); -+#else -+ del_timer_sync (&cmRail->HeartbeatTimer); -+#endif -+ cmRail->NextRunTime = 0; -+ cmRail->NextDiscoverTime = 0; -+ cmRail->NextHeartbeatTime = 0; -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ for (lvl = 0; lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ statemap_destroy (level->SubordinateMap[clvl]); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ statemap_destroy (level->Sgmts[i].Maps[clvl].CurrentInputMap); -+ statemap_destroy (level->Sgmts[i].Maps[clvl].InputMap); -+ statemap_destroy (level->Sgmts[i].Maps[clvl].OutputMap); -+ } -+ } -+ -+ cmRail->Levels[clvl].Online = 0; -+ -+ statemap_destroy (cmRail->Levels[clvl].TmpMap); -+ statemap_destroy (cmRail->Levels[clvl].GlobalMap); -+ statemap_destroy (cmRail->Levels[clvl].LastGlobalMap); -+ statemap_destroy (cmRail->Levels[clvl].SubTreeMap); -+ statemap_destroy (cmRail->Levels[clvl].LocalMap); -+ } -+ -+ spin_lock_destroy (&cmRail->Lock); -+ -+ ep_free_inputq (cmRail->Rail, cmRail->PolledQueue); -+ ep_free_inputq (cmRail->Rail, cmRail->IntrQueue); -+ ep_free_outputq (cmRail->Rail, cmRail->MsgQueue); -+ -+ KMEM_FREE (cmRail, sizeof (CM_RAIL)); -+} -+ -+static int -+cm_add_rail (EP_SUBSYS *subsys, EP_SYS *epsys, EP_RAIL *rail) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ ELAN_POSITION *pos = &rail->Position; -+ CM_RAIL *cmRail; -+ int lvl, n, nn, clvl, span, i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (cmRail, CM_RAIL *, sizeof (CM_RAIL), 1); -+ -+ if (cmRail == NULL) -+ return (ENOMEM); -+ -+ cmRail->Rail = rail; -+ cmRail->NodeId = pos->pos_nodeid; -+ cmRail->NumNodes = pos->pos_nodes; -+ -+ spin_lock_init (&cmRail->Lock); -+ -+ if ((cmRail->IntrQueue = ep_alloc_inputq (rail, EP_SYSTEMQ_INTR, sizeof (CM_MSG), CM_INPUTQ_ENTRIES, IntrQueueCallback, cmRail)) == NULL || -+ (cmRail->PolledQueue = ep_alloc_inputq (rail, EP_SYSTEMQ_POLLED, sizeof (CM_MSG), CM_INPUTQ_ENTRIES, NULL, 0)) == NULL || -+ (cmRail->MsgQueue = ep_alloc_outputq (rail, sizeof (CM_MSG), CM_NUM_MSG_BUFFERS)) == NULL) -+ { -+ goto failed; -+ } -+ -+ /* point to first "spare" message buffer */ -+ cmRail->NextSpareMsg = 0; -+ -+ /* Compute the branching ratios from the switcy arity */ -+ for (lvl = 0; lvl < CM_MAX_LEVELS; lvl++) -+ BranchingRatios[lvl] = (lvl < pos->pos_levels) ? pos->pos_arity[pos->pos_levels - lvl - 1] : 4; -+ -+ /* now determine the number of levels of hierachy we have */ -+ /* and how many nodes per level there are */ -+ for (lvl = 0, nn = 1, n = pos->pos_nodes; -+ n > 1; -+ nn *= BranchingRatios[lvl], n = n / BranchingRatios[lvl], lvl++) -+ { -+ int nSegs = (n > BranchingRatios[lvl]) ? BranchingRatios[lvl] : n; -+ int nNodes = nn * nSegs; -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ for (clvl = 0, span = pos->pos_arity[pos->pos_levels - clvl - 1]; -+ span < nNodes && clvl < pos->pos_levels - 1; -+ clvl++, span *= pos->pos_arity[pos->pos_levels - clvl - 1]) -+ ; -+ -+ level->SwitchLevel = clvl; -+ level->MinNodeId = (pos->pos_nodeid / nNodes) * nNodes; -+ level->NumNodes = nNodes; -+ level->NumSegs = nSegs; -+ } -+ -+ cmRail->NumLevels = lvl; -+ cmRail->BroadcastLevel = lvl-1; -+ -+ CPRINTF4 (2, "%s: NodeId=%d NumNodes=%d NumLevels=%d\n", -+ rail->Name, pos->pos_nodeid, pos->pos_nodes, cmRail->NumLevels); -+ -+ LoadRouteTable (cmRail); -+ -+ /* Init SGMT constants */ -+ for (lvl = 0; lvl < cmRail->NumLevels; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ level->MySgmt = SegmentNo (cmRail, cmRail->NodeId, lvl); -+ -+ for (i = 0; i < CM_SGMTS_PER_LEVEL; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ sgmt->MsgNumber = lvl * CM_SGMTS_PER_LEVEL + i; -+ sgmt->Level = lvl; -+ sgmt->Sgmt = i; -+ } -+ } -+ -+ /* Init maps for each cluster level */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ int nNodes = cmRail->Levels[clvl].NumNodes; -+ int mapBits = (nNodes * CM_GSTATUS_BITS) + (nNodes * EP_SVC_NUM_INDICATORS); -+ int clmin; -+ int clmax; -+ int clid = ClusterIds (cmRail, clvl, &clmin, &clmax); -+ -+ for (lvl = 0; lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ level->SubordinateMap[clvl] = statemap_create (mapBits); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ level->Sgmts[i].Maps[clvl].CurrentInputMap = statemap_create (mapBits); -+ level->Sgmts[i].Maps[clvl].InputMap = statemap_create (mapBits); -+ level->Sgmts[i].Maps[clvl].OutputMap = statemap_create (mapBits); -+ } -+ } -+ -+ cmRail->Levels[clvl].Online = 0; -+ -+ cmRail->Levels[clvl].TmpMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].GlobalMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].LastGlobalMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].SubTreeMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].LocalMap = statemap_create (mapBits); -+ -+ /* Flag everyone outside my next lower cluster as sensed offline... */ -+ for (i = 0; i < clmin; i++) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, i * CM_GSTATUS_BITS, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ for (i = clmax + 1; i < nNodes; i++) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, i * CM_GSTATUS_BITS, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ /* ...and set my own state */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, clid * CM_GSTATUS_BITS, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ } -+ -+ /* compute parameter hash to add to messages */ -+ cmRail->ParamHash = EP_PROTOCOL_VERSION; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_PERIODIC_DISCOVER_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_URGENT_DISCOVER_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_HEARTBEAT_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_P2P_DMA_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_P2P_MSG_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_BCAST_MSG_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_TIMER_SCHEDULE_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_HEARTBEAT_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_DISCOVER_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + BT_NBIPUL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_GSTATUS_BITS; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + EP_SVC_NUM_INDICATORS; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + cmRail->NumLevels; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + cmRail->NumNodes; -+ for (i = 0; i < cmRail->NumLevels; i++) -+ cmRail->ParamHash = cmRail->ParamHash * 127 + BranchingRatios[i]; -+ -+#if defined(PER_CPU_TIMEOUT) -+ StartPerCpuTimeouts (cmRail); -+#endif -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+#if !defined(PER_CPU_TIMEOUT) -+ /* Initialise the timer, but don't add it yet, since -+ * __Schedule_Heartbeat() will do this. */ -+ -+ init_timer (&cmRail->HeartbeatTimer); -+ -+ cmRail->HeartbeatTimer.function = cm_heartbeat_timer; -+ cmRail->HeartbeatTimer.data = (unsigned long) cmRail; -+ cmRail->HeartbeatTimer.expires = lbolt + hz; -+#endif -+ -+ /* start sending heartbeats */ -+ __Schedule_Heartbeat (cmRail); -+ -+ /* start discovering who else is out there */ -+ LowerTopLevel (cmRail, 0); -+ -+ /* connect to myself straight away - I know I'm here */ -+ ep_connect_node (rail, cmRail->NodeId); -+ -+ /* add to all rails */ -+ sys->Rails[rail->Number] = cmRail; -+ rail->ClusterRail = (void *) cmRail; -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ /* Enable the input queues */ -+ ep_enable_inputq (rail, cmRail->PolledQueue); -+ ep_enable_inputq (rail, cmRail->IntrQueue); -+ -+ /* Create the procfs entries */ -+ cm_procfs_rail_init (cmRail); -+ -+ return 0; -+ -+ failed: -+ cm_remove_rail (subsys, epsys, rail); -+ return -ENOMEM; -+} -+ -+static void -+cm_fini (EP_SUBSYS *subsys, EP_SYS *epsys) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ -+ cm_procfs_fini(sys); -+ -+ KMEM_FREE (sys, sizeof (CM_SUBSYS)); -+} -+ -+int -+cm_init (EP_SYS *sys) -+{ -+ CM_SUBSYS *subsys; -+ -+ KMEM_ZALLOC (subsys, CM_SUBSYS *, sizeof (CM_SUBSYS), 1); -+ -+ if (subsys == NULL) -+ return (ENOMEM); -+ -+ subsys->Subsys.Sys = sys; -+ subsys->Subsys.Name = "cm"; -+ subsys->Subsys.Destroy = cm_fini; -+ subsys->Subsys.AddRail = cm_add_rail; -+ subsys->Subsys.RemoveRail = cm_remove_rail; -+ -+ ep_subsys_add (sys, &subsys->Subsys); -+ -+ cm_procfs_init (subsys); -+ -+ /* -+ * Initialise the machineid if it wasn't specified by -+ * the modules.conf file - otherwise truncate it to -+ * 16 bits. -+ */ -+ if (MachineId != -1) -+ MachineId = (uint16_t) MachineId; -+ else -+ { -+#if defined(LINUX_ALPHA) -+ MachineId = (uint16_t)((5 << 12) | HZ); -+#elif defined(LINUX_SPARC) -+ MachineId = (uint16_t)((4 << 12) | HZ); -+#elif defined(LINUX_I386) -+ MachineId = (uint16_t)((3 << 12) | HZ); -+#elif defined( LINUX_IA64) -+ MachineId = (uint16_t)((2 << 12) | HZ); -+#elif defined(LINUX_X86_64) -+ MachineId = (uint16_t)((1 << 12) | HZ); -+#else -+ MachineId = (uint16_t)((0 << 12) | HZ); -+#endif -+ } -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/cm.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/cm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/cm.h 2005-06-01 23:12:54.633433936 -0400 -@@ -0,0 +1,412 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_CM_H -+#define __ELAN_CM_H -+ -+#ident "@(#)$Id: cm.h,v 1.14.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.h,v $*/ -+ -+#include -+ -+#if defined(DIGITAL_UNIX) -+/* -+ * On Tru64 - SMP doesn't mean Symmetric - cpu 0 is a master cpu and is responsible -+ * for handling all PCI interrupts and "funneled" operations. When a kernel thread -+ * is made runnable, the scheduler will choose which cpu it will run on at that time, -+ * and will only execute a higher priority thread from another cpu's run queue when -+ * it becomes totally idle (apparently also including user processes). Also the -+ * assert_wait_mesg_timo function uses a per-cpu timeout - these can only get executed -+ * at "preemptable" places - so again have no guarantee on when they will execute if -+ * they happen to be queued on a "hogged" cpu. The combination of these mean that the Tru64 -+ * is incapable of scheduling a high priority kernel thread within a deterministic time -+ * of when it should have become runnable - wonderfull. -+ * -+ * Hence the solution Compaq have proposed it to schedule a timeout onto all of the -+ * cpu's timeouts lists at the maximum frequency that we could want to execute code, -+ * then to handle the scheduling of work between these ourselves. With a bit of luck -+ * ..... at least one cpu will be sufficiently unloaded to allow us to get a chance -+ * to do our important work. -+ * -+ * However ..... this still is not reliable, since timeouts under Tru64 are still -+ * only run when the currently running kernel thread "co-operates" by calling one -+ * of a number of functions which is permitted to run the "lwc"s AND is not holding -+ * any spinlocks AND is running ai IPL 0. However Compaq are unable to provide -+ * any upper limit on the time between the "lwc"'s being run and so it is possible -+ * for all 4 cpus to not run them for an unbounded time. -+ * -+ * The solution proposed is to use the RM_TEMP_BACKDOOR hook which was added to -+ * hardclock() to "solve" this problem for Memory Channel. However, since it -+ * is called within the clock interrupt it is not permissible to aquire any -+ * spinlocks, nor to run for "too long". This means that it is not possible to -+ * call the heartbeat algorithm from this hook. -+ * -+ * Our solution to these limitations is to use the hook to cause an elan interrupt -+ * to be delivered, by issueing a mis-aligned SetEvent command - this causes the device -+ * to trap and ep_cprocTrap() can then run the heartbeat code. However there is a lock -+ * order violation between the elan_dev::IntrLock and ep_dev::Lock, so we have to -+ * use a trylock and if we fail, then hope that when the interrupt is delievered again -+ * some time later we will succeed. -+ * -+ * However this only works if the kernel is able to respond to the Elan interrupt, -+ * so we panic inside the RM_TEMP_BACKDOOR hook if the SetEvent's interrupt has -+ * not been taken for more than an CM_TIMER_SCHEDULE_TIMEOUT interval. -+ * -+ * In fact this is exactly the mechanism that other operating systems use to -+ * execute timeouts, since the hardclock interrupt posts a low priority -+ * "soft interrupt" which "pre-eempts" the currently running thread and then -+ * executes the timeouts.To block timeouts you use splsoftclock() the same as -+ * in Tru64. -+ */ -+#define PER_CPU_TIMEOUT TRUE -+#endif -+ -+ -+#define CM_SGMTS_PER_LEVEL 8 /* maximum nodes in each segment */ -+#define CM_MAX_LEVELS 6 /* maximum depth of tree */ -+ -+/* message buffers/dmas/events etc */ -+#define CM_NUM_NODE_MSG_BUFFERS (CM_MAX_LEVELS * CM_SGMTS_PER_LEVEL) /* subordinates and leader */ -+#define CM_NUM_SPARE_MSG_BUFFERS 8 /* spare msg buffers for non-connected nodes */ -+#define CM_NUM_MSG_BUFFERS (CM_NUM_NODE_MSG_BUFFERS + CM_NUM_SPARE_MSG_BUFFERS) -+ -+#define CM_INPUTQ_ENTRIES 128 /* # entries in input queue */ -+ -+#define CM_PERIODIC_DISCOVER_INTERVAL (5000) /* 5s (infrequent resolution of established leader conflicts) */ -+#define CM_URGENT_DISCOVER_INTERVAL (50) /* 0.05s (more frequently than heartbeats 'cause they don't retry) */ -+#define CM_HEARTBEAT_INTERVAL (125) /* 0.125s */ -+#define CM_TIMER_SCHEDULE_TIMEOUT (4000) /* 4s Maximum time before a timer that's secheduled to run gets to run (eg blocked in interrupt handlers etc) */ -+#define CM_THREAD_SCHEDULE_TIMEOUT (30000) /* 30s Maximum time before a thread that's scheduled to run gets to run */ -+#define CM_THREAD_RUNNING_TIMEOUT (30000) /* 30s Don't expect the manager thread to be running longer than this */ -+ -+#ifdef PER_CPU_TIMEOUT -+#define CM_PERCPU_TIMEOUT_INTERVAL (50) /* 0.05s (must be less than all above intervals) */ -+#define CM_PACEMAKER_INTERVAL (500) /* 0.05s */ -+ -+#define CM_HEARTBEAT_OVERDUE (250) /* 0.25s Maximum time a timeout can be overdue before taking extreme action */ -+#endif -+ -+#define CM_P2P_DMA_RETRIES 31 -+ -+/* We expect at least 1 point-to-point message in CM_P2P_MSG_RETRIES -+ * attempts to send one to be successfully received */ -+#define CM_P2P_MSG_RETRIES 8 -+ -+/* We expect at least 1 broadcast message in CM_BCAST_MSG_RETRIES attempts -+ * to send one to be successfully received. */ -+#define CM_BCAST_MSG_RETRIES 40 -+ -+/* Heartbeat timeout allows for a node stalling and still getting its -+ * heartbeat. The 2 is to allow for unsynchronised polling times. */ -+#define CM_HEARTBEAT_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_P2P_MSG_RETRIES) * CM_HEARTBEAT_INTERVAL) -+ -+/* Discover timeout must be > CM_HEARTBEAT_TIMEOUT to guarantee that people -+ * who don't see discovery are considered dead by their leader. This -+ * ensures that by the time a node "discovers" it is a leader of a segment, -+ * the previous leader of that segment will have been deemed to be dead by -+ * its the parent segment's leader */ -+#define CM_DISCOVER_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_BCAST_MSG_RETRIES) * CM_URGENT_DISCOVER_INTERVAL) -+ -+#define CM_WAITING_TIMEOUT (CM_DISCOVER_TIMEOUT * 100) -+ -+/* -+ * Convert all timeouts specified in mS into "ticks" -+ */ -+#define MSEC2TICKS(MSEC) (((MSEC)*HZ)/1000) -+ -+ -+/* statemap entry */ -+typedef struct cm_state_entry -+{ -+ int16_t level; /* cluster level to apply to */ -+ int16_t offset; /* from statemap_findchange() */ -+ uint16_t seg[BT_NBIPUL/16]; /* ditto */ -+} CM_STATEMAP_ENTRY; -+ -+/* offset is >= 0 for a change to apply and */ -+#define STATEMAP_NOMORECHANGES (-1) /* end of a set of updates */ -+#define STATEMAP_RESET (-2) /* reset the target map */ -+#define STATEMAP_NOOP (-3) /* null token */ -+ -+/* CM message format */ -+typedef int8_t CM_SEQ; /* heartbeat sequence numbers; at least 2 bits, signed */ -+ -+/* -+ * The message header is received into the last 64 byte block of -+ * the input queue and the Version *MUST* be the last word of the -+ * block to ensure that we can see that the whole of the message -+ * has reached main memory after we've seen the input queue pointer -+ * have been updated. -+ */ -+typedef struct ep_cm_hdr -+{ -+ uint32_t Pad0; -+ uint32_t Pad1; -+ -+ uint8_t Type; -+ uint8_t Level; -+ CM_SEQ Seq; /* precision at least 2 bits each*/ -+ CM_SEQ AckSeq; -+ -+ uint16_t NumMaps; -+ uint16_t MachineId; -+ -+ uint16_t NodeId; -+ uint16_t Checksum; -+ -+ uint32_t Timestamp; -+ uint32_t ParamHash; -+ uint32_t Version; -+} CM_HDR; -+ -+#define CM_HDR_SIZE sizeof (CM_HDR) -+ -+typedef struct cm_msg -+{ -+ union { -+ CM_STATEMAP_ENTRY Statemaps[1]; /* piggy-backed statemap updates start here */ -+ uint8_t Space[EP_SYSTEMQ_MSG_MAX - CM_HDR_SIZE]; -+ } Payload; -+ -+ CM_HDR Hdr; -+} CM_MSG; -+ -+/* The maximum number of statemap entries that can fit within an EP_CM_MSG_BUFFER */ -+#define CM_MSG_MAXMAPS (offsetof (CM_MSG, Hdr) / sizeof (CM_STATEMAP_ENTRY)) -+#define CM_MSG_MAP(mapno) (CM_MSG_MAXMAPS - (mapno) - 1) -+ -+/* The actual special message base & size, including 'nmaps' piggy-backed statemap entries */ -+#define CM_MSG_BASE(nmaps) (nmaps == 0 ? offsetof (CM_MSG, Hdr) : offsetof (CM_MSG, Payload.Statemaps[CM_MSG_MAXMAPS - nmaps])) -+#define CM_MSG_SIZE(nmaps) (sizeof (CM_MSG) - CM_MSG_BASE(nmaps)) -+ -+#define CM_MSG_VERSION 0xcad00005 -+#define CM_MSG_TYPE_RESOLVE_LEADER 0 -+#define CM_MSG_TYPE_DISCOVER_LEADER 1 -+#define CM_MSG_TYPE_NOTIFY 2 -+#define CM_MSG_TYPE_DISCOVER_SUBORDINATE 3 -+#define CM_MSG_TYPE_IMCOMING 4 -+#define CM_MSG_TYPE_HEARTBEAT 5 -+#define CM_MSG_TYPE_REJOIN 6 -+ -+/* CM machine segment */ -+typedef struct cm_sgmtMaps -+{ -+ u_char InputMapValid; /* Input map has been set */ -+ u_char OutputMapValid; /* Output map has been set */ -+ u_char SentChanges; /* got an outstanding STATEMAP_NOMORECHANGES to send */ -+ statemap_t *OutputMap; /* state to send */ -+ statemap_t *InputMap; /* state received */ -+ statemap_t *CurrentInputMap; /* state being received */ -+} CM_SGMTMAPS; -+ -+typedef struct cm_sgmt -+{ -+ u_char State; -+ u_char SendMaps; -+ u_char MsgAcked; -+ CM_SEQ MsgSeq; -+ CM_SEQ AckSeq; -+ u_int NodeId; -+ long UpdateTick; -+ long WaitingTick; -+ uint32_t Timestamp; -+ CM_SGMTMAPS Maps[CM_MAX_LEVELS]; /* Maps[i] == state for cluster level i */ -+ u_short MsgNumber; /* msg buffer to use */ -+ u_short NumMaps; /* # maps in message buffer */ -+ u_short Level; -+ u_short Sgmt; -+} CM_SGMT; -+ -+#define CM_SGMT_ABSENT 0 /* no one there at all */ -+#define CM_SGMT_WAITING 1 /* waiting for subtree to connect */ -+#define CM_SGMT_COMING 2 /* expecting a subtree to reconnect */ -+#define CM_SGMT_PRESENT 3 /* connected */ -+ -+typedef struct cm_level -+{ -+ int SwitchLevel; -+ u_int MinNodeId; -+ u_int NumNodes; -+ u_int NumSegs; -+ u_int MySgmt; -+ -+ /* SubordinateMap[i] == OR of all subordinate maps on this level and down for cluster level i */ -+ u_char SubordinateMapValid[CM_MAX_LEVELS]; -+ statemap_t *SubordinateMap[CM_MAX_LEVELS]; -+ -+ /* maps/flags for this cluster level */ -+ u_int Online:1; /* I've gone online (seen myself running) */ -+ u_int Restarting:1; /* driving my owm restart bit */ -+ u_char OfflineReasons; /* forced offline by broadcast */ -+ -+ u_char GlobalMapValid; -+ u_char SubTreeMapValid; -+ u_long Connected; -+ -+ statemap_t *LocalMap; /* state bits I drive */ -+ statemap_t *SubTreeMap; /* OR of my and my subtree states */ -+ statemap_t *GlobalMap; /* OR of all node states */ -+ statemap_t *LastGlobalMap; /* last map I saw */ -+ statemap_t *TmpMap; /* scratchpad */ -+ -+ CM_SGMT Sgmts[CM_SGMTS_PER_LEVEL]; -+} CM_LEVEL; -+ -+#define CM_ROLE_LEADER_CANDIDATE 0 -+#define CM_ROLE_LEADER 1 -+#define CM_ROLE_SUBORDINATE 2 -+ -+/* global status bits */ -+#define CM_GSTATUS_STATUS_MASK 0x03 /* bits nodes drive to broadcast their status */ -+#define CM_GSTATUS_ABSENT 0x00 /* Off the network */ -+#define CM_GSTATUS_STARTING 0x01 /* I'm waiting for everyone to see me online */ -+#define CM_GSTATUS_RUNNING 0x03 /* up and running */ -+#define CM_GSTATUS_CLOSING 0x02 /* I'm waiting for everyone to see me offline */ -+ -+#define CM_GSTATUS_ACK_MASK 0x0c /* bits node drive to ack other status */ -+#define CM_GSTATUS_MAY_START 0x04 /* Everyone thinks I may not start */ -+#define CM_GSTATUS_MAY_RUN 0x08 /* Everyone thinks I may not run */ -+ -+#define CM_GSTATUS_RESTART 0x10 /* Someone thinks I should restart */ -+#define CM_GSTATUS_BITS 5 -+ -+#define CM_GSTATUS_BASE(node) ((node) * CM_GSTATUS_BITS) -+ -+#if defined(PER_CPU_TIMEOUT) -+typedef struct cm_timeout_data -+{ -+ long ScheduledAt; /* lbolt timeout was scheduled to run at */ -+ -+ unsigned long EarlyCount; /* # times run early than NextRun */ -+ unsigned long MissedCount; /* # times run on time - but someone else was running it */ -+ unsigned long WastedCount; /* # times we failed to get the spinlock */ -+ unsigned long WorkCount; /* # times we're the one running */ -+ -+ unsigned long WorstDelay; /* worst scheduling delay */ -+ unsigned long BestDelay; /* best scheduling delay */ -+ -+ unsigned long WorstLockDelay; /* worst delay before getting rail->Lock */ -+ -+ unsigned long WorstHearbeatDelay; /* worst delay before calling DoHeartbeatWork */ -+} CM_TIMEOUT_DATA; -+#endif -+ -+typedef struct cm_rail -+{ -+ EP_RAIL *Rail; /* rail we're associated with */ -+ struct list_head Link; /* and linked on the CM_SUBSYS */ -+ -+ uint32_t ParamHash; /* hash of critical parameters */ -+ uint32_t Timestamp; -+ long DiscoverStartTick; /* when discovery start */ -+ -+ unsigned int NodeId; /* my node id */ -+ unsigned int NumNodes; /* and number of nodes */ -+ unsigned int NumLevels; /* number of levels computed from machine size */ -+ int BroadcastLevel; -+ long BroadcastLevelTick; -+ unsigned int TopLevel; /* level at which I'm not a leader */ -+ unsigned char Role; /* state at TopLevel */ -+ -+ EP_INPUTQ *PolledQueue; /* polled input queue */ -+ EP_INPUTQ *IntrQueue; /* intr input queue */ -+ EP_OUTPUTQ *MsgQueue; /* message */ -+ unsigned int NextSpareMsg; /* next "spare" message buffer to use */ -+ -+ EP_CM_RAIL_STATS Stats; /* statistics */ -+ -+ kmutex_t Mutex; -+ spinlock_t Lock; -+ -+ long NextHeartbeatTime; /* next time to check/send heartbeats */ -+ long NextDiscoverTime; /* next time to progress discovery */ -+ long NextRunTime; /* the earlier of the above two or intr requires inputq poll*/ -+ -+ unsigned int OfflineReasons; /* forced offline by procfs/manager thread stuck */ -+ -+#if defined(PER_CPU_TIMEOUT) -+ spinlock_t HeartbeatTimeoutsLock; /* spinlock to sequentialise per-cpu timeouts */ -+ long HeartbeatTimeoutsStarted; /* bitmap of which timeouts have started */ -+ long HeartbeatTimeoutsStopped; /* bitmap of which timeouts have stopped */ -+ long HeartbeatTimeoutsShouldStop; /* flag to indicate timeouts should stop */ -+ kcondvar_t HeartbeatTimeoutsWait; /* place to sleep waiting for timeouts to stop */ -+ long HeartbeatTimeoutRunning; /* someone is running the timeout - don't try for the lock */ -+ -+ long HeartbeatTimeoutOverdue; /* heartbeat seen as overdue - interrupt requested */ -+ -+ CM_TIMEOUT_DATA *HeartbeatTimeoutsData; /* per timeout data */ -+#else -+ struct timer_list HeartbeatTimer; /* timer for heartbeat/discovery */ -+#endif -+ -+ CM_LEVEL Levels[CM_MAX_LEVELS]; -+} CM_RAIL; -+ -+/* OfflineReasons (both per-rail and */ -+#define CM_OFFLINE_BROADCAST (1 << 0) -+#define CM_OFFLINE_PROCFS (1 << 1) -+#define CM_OFFLINE_MANAGER (1 << 2) -+ -+typedef struct cm_subsys -+{ -+ EP_SUBSYS Subsys; -+ CM_RAIL *Rails[EP_MAX_RAILS]; -+} CM_SUBSYS; -+ -+extern int MachineId; -+ -+extern void cm_node_disconnected (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_node (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_comms (CM_RAIL *cmRail); -+extern int cm_init (EP_SYS *sys); -+ -+extern void DisplayRail(EP_RAIL *rail); -+extern void DisplaySegs (EP_RAIL *rail); -+extern void DisplayStatus (EP_RAIL *rail); -+ -+typedef struct proc_private -+{ -+ struct nodeset_private *pr_next; -+ EP_RAIL *pr_rail; -+ char *pr_data; -+ int pr_data_len; -+ unsigned pr_off; -+ unsigned pr_len; -+ DisplayInfo pr_di; -+} PROC_PRIVATE; -+ -+extern void proc_character_fill (long mode, char *fmt, ...); -+extern int proc_release (struct inode *inode, struct file *file); -+extern ssize_t proc_read (struct file *file, char *buf, size_t count, loff_t *ppos); -+ -+ -+extern void DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayRailDo (DisplayInfo *di, EP_RAIL *rail); -+ -+extern int cm_read_cluster(EP_RAIL *rail,char *page); -+extern void cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason); -+ -+extern int cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId); -+extern int cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+/* cm_procfs.c */ -+extern void cm_procfs_init (CM_SUBSYS *subsys); -+extern void cm_procfs_fini (CM_SUBSYS *subsys); -+extern void cm_procfs_rail_init (CM_RAIL *rail); -+extern void cm_procfs_rail_fini (CM_RAIL *rail); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_CM_H */ -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/cm_procfs.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/cm_procfs.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/cm_procfs.c 2005-06-01 23:12:54.633433936 -0400 -@@ -0,0 +1,254 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2005 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cm_procfs.c,v 1.5 2004/05/14 09:23:13 daniel Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm_procfs.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "debug.h" -+#include "cm.h" -+#include -+ -+#include -+ -+extern char *sprintClPeers (char *str, CM_RAIL *cmRail, int clvl); -+ -+static int -+proc_read_cluster(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) data; -+ char *p = page; -+ -+ page[0] = 0; -+ -+ if (cmRail->Rail->State != EP_RAIL_STATE_RUNNING) -+ p += sprintf(p, "\n"); -+ else -+ { -+ CM_LEVEL *cmLevel; -+ unsigned long flags; -+ int i, j; -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ char seperate_with; -+ -+ struct { int val; char *name; } bitvals[] = { -+ {CM_OFFLINE_BROADCAST, "Broadcast"}, -+ {CM_OFFLINE_PROCFS, "Offline"}, -+ {CM_OFFLINE_MANAGER, "Manager"}}; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ for (i = 0; i < cmRail->NumLevels; i++) -+ { -+ cmLevel = &cmRail->Levels[i]; -+ -+ p += sprintf(p, "%23s %7s ", sprintClPeers (clNodeStr, cmRail, i), cmLevel->Online?"Online":"Offline"); -+ -+ if ((cmLevel->Online ) | ( cmLevel->Connected > 0)) -+ p += sprintf(p, "Connected=%lu ", cmLevel->Connected); -+ -+ seperate_with = '<'; -+ -+ if ( cmLevel->Restarting ) { -+ p += sprintf(p, "%cRestarting", seperate_with); -+ seperate_with = ','; -+ } -+ -+ if ( ! (cmLevel->GlobalMapValid & cmLevel->SubTreeMapValid )) { -+ p += sprintf(p, "%cMap Not Valid", seperate_with); -+ seperate_with = ','; -+ } -+ -+ if ( cmLevel->OfflineReasons ) { -+ for (j = 0; j < sizeof (bitvals)/sizeof(bitvals[0]); j++) -+ if (cmLevel->OfflineReasons & bitvals[j].val) { -+ p += sprintf(p, "%c%s", seperate_with, bitvals[j].name); -+ seperate_with = ','; -+ } -+ } -+ if ( cmRail->OfflineReasons ) { -+ for (j = 0; j < sizeof (bitvals)/sizeof(bitvals[0]); j++) -+ if (cmRail->OfflineReasons & bitvals[j].val) { -+ p += sprintf(p, "%c%s", seperate_with, bitvals[j].name); -+ seperate_with = ','; -+ } -+ } -+ -+ if ( seperate_with != '<' ) -+ p += sprintf(p,">\n"); -+ else -+ p += sprintf(p,"\n"); -+ } -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ return qsnet_proc_calc_metrics (page, start, off, count, eof, p - page); -+} -+ -+static struct rail_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} rail_info[] = { -+ {"cluster", proc_read_cluster, NULL}, -+}; -+ -+struct proc_dir_entry *svc_indicators_root; -+ -+typedef struct svc_indicator_data -+{ -+ int svc_indicator; -+ EP_RAIL *rail; -+} SVC_INDICATOR_DATA; -+ -+static SVC_INDICATOR_DATA svc_indicator_data[EP_SVC_NUM_INDICATORS][EP_MAX_RAILS]; -+static char *svc_indicator_names[EP_SVC_NUM_INDICATORS] = EP_SVC_NAMES; -+ -+static int -+proc_read_svc_indicator_rail_bitmap (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ SVC_INDICATOR_DATA *svc_data = (SVC_INDICATOR_DATA *)data; -+ unsigned int nnodes = ep_numnodes (ep_system()); -+ bitmap_t *bitmap; -+ -+ KMEM_ZALLOC (bitmap, bitmap_t *, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t)), 1); -+ -+ cm_svc_indicator_bitmap (svc_data->rail, svc_data->svc_indicator, bitmap, 0, nnodes); -+ -+ ep_sprintf_bitmap (page, PAGESIZE, bitmap, 0, 0, nnodes); -+ -+ KMEM_FREE (bitmap, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t))); -+ -+ strcat (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_svc_indicator_bitmap(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ unsigned int num = (unsigned long) data; -+ EP_SYS *sys = ep_system(); -+ unsigned int nnodes = ep_numnodes (sys); -+ bitmap_t *bitmap; -+ -+ KMEM_ALLOC(bitmap, bitmap_t *, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t)), 1); -+ -+ ep_svc_indicator_bitmap (sys, num, bitmap, 0, nnodes); -+ -+ ep_sprintf_bitmap (page, PAGESIZE, bitmap, 0, 0, nnodes); -+ -+ KMEM_FREE (bitmap, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t))); -+ -+ strcat (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+cm_procfs_rail_init (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ struct proc_dir_entry *p; -+ int i; -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ { -+ if ((p = create_proc_entry (rail_info[i].name, 0, cmRail->Rail->ProcDir)) != NULL) -+ { -+ p->read_proc = rail_info[i].read_func; -+ p->write_proc = rail_info[i].write_func; -+ p->data = cmRail; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ if ((rail->SvcIndicatorDir = proc_mkdir ("svc_indicators", cmRail->Rail->ProcDir)) != NULL) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ { -+ if ((p = create_proc_entry (svc_indicator_names[i], 0, rail->SvcIndicatorDir)) != NULL) -+ { -+ svc_indicator_data[i][rail->Number].svc_indicator = i; -+ svc_indicator_data[i][rail->Number].rail = rail; -+ -+ p->write_proc = NULL; -+ p->read_proc = proc_read_svc_indicator_rail_bitmap; -+ p->data = (void *)&svc_indicator_data[i][rail->Number]; -+ p->owner = THIS_MODULE; -+ } -+ } -+ } -+} -+ -+void -+cm_procfs_rail_fini (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int i; -+ -+ if (rail->SvcIndicatorDir) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ remove_proc_entry (svc_indicator_names[i], rail->SvcIndicatorDir); -+ -+ remove_proc_entry ("svc_indicators", cmRail->Rail->ProcDir); -+ } -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ remove_proc_entry (rail_info[i].name, cmRail->Rail->ProcDir); -+} -+ -+void -+cm_procfs_init (CM_SUBSYS *subsys) -+{ -+ struct proc_dir_entry *p; -+ int i; -+ -+ qsnet_proc_register_hex (ep_config_root, "machine_id", &MachineId, 0); -+ -+ if ((svc_indicators_root = proc_mkdir("svc_indicators", ep_procfs_root)) != NULL) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ { -+ if ((p = create_proc_entry (svc_indicator_names[i], 0, svc_indicators_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_svc_indicator_bitmap; -+ p->data = (void *)(long) i; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ } -+} -+ -+void -+cm_procfs_fini (CM_SUBSYS *subsys) -+{ -+ int i; -+ -+ if (svc_indicators_root) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ remove_proc_entry (svc_indicator_names[i], svc_indicators_root); -+ -+ remove_proc_entry ("svc_indicators", ep_procfs_root); -+ } -+ -+ remove_proc_entry ("machine_id", ep_config_root); -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/commands_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/commands_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/commands_elan4.c 2005-06-01 23:12:54.634433784 -0400 -@@ -0,0 +1,173 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: commands_elan4.c,v 1.2 2003/10/23 15:07:53 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/commands_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+ -+static __inline__ void -+elan4_command_write (ELAN4_CQ *cq, E4_uint64 val, unsigned off) -+{ -+ writeq (val, cq->cq_mapping + offsetof (E4_CommandPort, Command[off])); -+} -+ -+void -+elan4_nop_cmd (ELAN4_CQ *cq, E4_uint64 tag) -+{ -+ elan4_command_write (cq, tag | NOP_CMD, 0); -+} -+ -+void -+elan4_write_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data) -+{ -+ elan4_command_write (cq, addr | WRITE_DWORD_CMD, 0); -+ elan4_command_write (cq, data, 1); -+} -+ -+void -+elan4_add_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data) -+{ -+ elan4_command_write (cq, addr | ADD_DWORD_CMD, 0); -+ elan4_command_write (cq, data, 1); -+} -+ -+void -+elan4_copy64_cmd (ELAN4_CQ *cq, E4_Addr from, E4_Addr to, E4_uint32 datatype) -+{ -+ elan4_command_write (cq, from | (datatype << COPY64_DATA_TYPE_SHIFT) | COPY64_CMD, 0); -+ elan4_command_write (cq, to | (datatype << COPY64_DATA_TYPE_SHIFT), 1); -+} -+ -+void -+elan4_interrupt_cmd (ELAN4_CQ *cq, E4_uint64 cookie) -+{ -+ elan4_command_write (cq, (cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD, 0); -+} -+ -+ -+void -+elan4_run_thread_cmd (ELAN4_CQ *cq, E4_ThreadRegs *regs) -+{ -+ elan4_command_write (cq, regs->Registers[0] | RUN_THREAD_CMD, 0); -+ elan4_command_write (cq, regs->Registers[1], 1); -+ elan4_command_write (cq, regs->Registers[2], 2); -+ elan4_command_write (cq, regs->Registers[3], 3); -+ elan4_command_write (cq, regs->Registers[4], 4); -+ elan4_command_write (cq, regs->Registers[5], 5); -+ elan4_command_write (cq, regs->Registers[6], 6); -+} -+ -+void -+elan4_run_dma_cmd (ELAN4_CQ *cq, E4_DMA *dma) -+{ -+ E4_uint64 *dmaptr = (E4_uint64 *) dma; -+ -+ elan4_command_write (cq, dmaptr[0] | RUN_DMA_CMD, 0); -+ elan4_command_write (cq, dmaptr[1], 1); -+ elan4_command_write (cq, dmaptr[2], 2); -+ elan4_command_write (cq, dmaptr[3], 3); -+ elan4_command_write (cq, dmaptr[4], 4); -+ elan4_command_write (cq, dmaptr[5], 5); -+ elan4_command_write (cq, dmaptr[6], 6); -+} -+ -+void -+elan4_set_event_cmd (ELAN4_CQ *cq, E4_Addr event) -+{ -+ elan4_command_write (cq, event | SET_EVENT_CMD, 0); -+} -+ -+void -+elan4_set_eventn_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint32 count) -+{ -+ elan4_command_write (cq, SET_EVENTN_CMD,0); -+ elan4_command_write (cq, event | count, 1); -+} -+ -+void -+elan4_wait_event_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1) -+{ -+ elan4_command_write (cq, event | WAIT_EVENT_CMD, 0); -+ elan4_command_write (cq, candt, 1); -+ elan4_command_write (cq, param0, 2); -+ elan4_command_write (cq, param1, 3); -+} -+ -+void -+elan4_open_packet (ELAN4_CQ *cq, E4_uint64 command) -+{ -+ elan4_command_write (cq, command | OPEN_STEN_PKT_CMD, 0); -+} -+ -+void -+elan4_guard (ELAN4_CQ *cq, E4_uint64 command) -+{ -+ elan4_command_write (cq, command | GUARD_CMD, 0); -+} -+ -+void -+elan4_sendtrans0 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+} -+ -+void -+elan4_sendtrans1 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ elan4_command_write (cq, p0, 2); -+} -+ -+void -+elan4_sendtrans2 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0, E4_uint64 p1) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ elan4_command_write (cq, p0, 2); -+ elan4_command_write (cq, p1, 3); -+} -+ -+void -+elan4_sendtransn (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, ...) -+{ -+ E4_uint32 ndword = ((trtype & TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ va_list ap; -+ register int i; -+ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ -+ va_start (ap, addr); -+ for (i = 2; i < ndword+2; i++) -+ elan4_command_write (cq, va_arg (ap, E4_uint64), i); -+ va_end (ap); -+} -+ -+void -+elan4_sendtransp (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 *ptr) -+{ -+ E4_uint32 ndword = ((trtype &TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ register int i; -+ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ for (i = 2; i < ndword+2; i++) -+ elan4_command_write (cq, *ptr++, i); -+} -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/conf_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/conf_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/conf_linux.c 2005-06-01 23:12:54.635433632 -0400 -@@ -0,0 +1,309 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: conf_linux.c,v 1.37.2.3 2005/01/18 14:47:35 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/conf_linux.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+ -+#include "conf_linux.h" -+ -+#include -+#include -+#include -+#include -+ -+/* Module parameters */ -+unsigned int epdebug = 0; -+unsigned int epdebug_console = 0; -+unsigned int epdebug_cmlevel = 0; -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+unsigned int epdebug_check_sum = 0; -+#endif -+int disabled = 0; -+int sdram_assert = 0; -+int assfail_mode = 0; -+int txd_stabilise = 7; -+int portals_envelopes = 0; -+ -+/* External module parameters */ -+extern int MaxSwitchLevels; -+extern int RejoinCheck; -+extern int RejoinPanic; -+extern int PositionCheck; -+extern int MachineId; -+ -+/* Module globals */ -+EP_SYS epsys; -+ -+#ifdef MODULE -+MODULE_AUTHOR("Quadrics Ltd"); -+MODULE_DESCRIPTION("Elan Kernel Comms"); -+ -+MODULE_LICENSE("GPL"); -+ -+MODULE_PARM(epdebug, "i"); -+MODULE_PARM(epdebug_console, "i"); -+MODULE_PARM(epdebug_cmlevel, "i"); -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+MODULE_PARM(epdebug_check_sum, "i"); -+#endif -+MODULE_PARM(disabled, "i"); -+ -+MODULE_PARM(MachineId, "i"); -+MODULE_PARM(RejoinPanic, "i"); -+MODULE_PARM(RejoinCheck, "i"); -+MODULE_PARM(PositionCheck, "i"); -+MODULE_PARM(MaxSwitchLevels, "i"); -+ -+MODULE_PARM(sdram_assert, "i"); -+MODULE_PARM(assfail_mode, "i"); -+MODULE_PARM(txd_stabilise, "i"); -+MODULE_PARM(portals_envelopes,"i"); -+ -+/* epcomms.c large message service functions */ -+EXPORT_SYMBOL(ep_alloc_xmtr); -+EXPORT_SYMBOL(ep_free_xmtr); -+EXPORT_SYMBOL(ep_transmit_message); -+EXPORT_SYMBOL(ep_multicast_message); -+EXPORT_SYMBOL(ep_transmit_rpc); -+ -+EXPORT_SYMBOL(ep_alloc_rcvr); -+EXPORT_SYMBOL(ep_free_rcvr); -+EXPORT_SYMBOL(ep_queue_receive); -+EXPORT_SYMBOL(ep_requeue_receive); -+EXPORT_SYMBOL(ep_rpc_put); -+EXPORT_SYMBOL(ep_rpc_get); -+EXPORT_SYMBOL(ep_complete_rpc); -+EXPORT_SYMBOL(ep_complete_receive); -+ -+EXPORT_SYMBOL(ep_poll_transmits); -+EXPORT_SYMBOL(ep_enable_txcallbacks); -+EXPORT_SYMBOL(ep_disable_txcallbacks); -+ -+/* epcomms.c functions for accessing fields of rxds/txds */ -+EXPORT_SYMBOL(ep_rxd_arg); -+EXPORT_SYMBOL(ep_rxd_len); -+EXPORT_SYMBOL(ep_rxd_isrpc); -+EXPORT_SYMBOL(ep_rxd_envelope); -+EXPORT_SYMBOL(ep_rxd_payload); -+EXPORT_SYMBOL(ep_rxd_node); -+EXPORT_SYMBOL(ep_rxd_status); -+EXPORT_SYMBOL(ep_rxd_statusblk); -+EXPORT_SYMBOL(ep_txd_node); -+EXPORT_SYMBOL(ep_txd_statusblk); -+ -+/* kmap.c, nmh.c - handling mapping of pages into network memory */ -+EXPORT_SYMBOL(ep_dvma_reserve); -+EXPORT_SYMBOL(ep_dvma_release); -+EXPORT_SYMBOL(ep_dvma_load); -+EXPORT_SYMBOL(ep_dvma_unload); -+EXPORT_SYMBOL(ep_nmd_subset); -+EXPORT_SYMBOL(ep_nmd_merge); -+ -+EXPORT_SYMBOL(ep_system); -+ -+/* kcomm.c */ -+EXPORT_SYMBOL(ep_nodeid); -+EXPORT_SYMBOL(ep_numnodes); -+EXPORT_SYMBOL(ep_waitfor_nodeid); -+ -+/* railhints.c */ -+EXPORT_SYMBOL(ep_pickRail); -+EXPORT_SYMBOL(ep_xmtr_bcastrail); -+EXPORT_SYMBOL(ep_xmtr_prefrail); -+EXPORT_SYMBOL(ep_xmtr_availrails); -+EXPORT_SYMBOL(ep_xmtr_noderails); -+EXPORT_SYMBOL(ep_rcvr_prefrail); -+EXPORT_SYMBOL(ep_rcvr_availrails); -+EXPORT_SYMBOL(ep_rxd_railmask); -+ -+EXPORT_SYMBOL(ep_svc_indicator_bitmap); -+EXPORT_SYMBOL(ep_svc_indicator_is_set); -+EXPORT_SYMBOL(ep_svc_indicator_clear); -+EXPORT_SYMBOL(ep_svc_indicator_set); -+ -+/* cm.c */ -+EXPORT_SYMBOL(cm_svc_indicator_clear); -+EXPORT_SYMBOL(cm_svc_indicator_set); -+EXPORT_SYMBOL(cm_svc_indicator_is_set); -+EXPORT_SYMBOL(cm_svc_indicator_bitmap); -+ -+#endif -+ -+EP_SYS * -+ep_system() -+{ -+ return (&epsys); -+} -+ -+void -+ep_mod_inc_usecount() -+{ -+ MOD_INC_USE_COUNT; -+} -+ -+void -+ep_mod_dec_usecount() -+{ -+ MOD_DEC_USE_COUNT; -+} -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int -+ep_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (event == DUMP_BEGIN) -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+static struct notifier_block ep_dump_notifier = -+{ -+ notifier_call: ep_dump_event, -+ priority: 0, -+}; -+ -+#endif -+ -+static int -+ep_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ((event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block ep_reboot_notifier = -+{ -+ notifier_call: ep_reboot_event, -+ priority: 0, -+}; -+ -+static int -+ep_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block ep_panic_notifier = -+{ -+ notifier_call: ep_panic_event, -+ priority: 0, -+}; -+ -+/* -+ * Module configuration. -+ */ -+#ifdef MODULE -+static int __init ep_init(void) -+#else -+__initfunc(int ep_init(void)) -+#endif -+{ -+ register int rmask = 0; -+ -+ ep_procfs_init (); -+ -+ ep_sys_init (&epsys); -+ -+#if defined(CONFIG_ELAN4) || defined(CONFIG_ELAN4_MODULE) -+ rmask = ep4_create_rails (&epsys, disabled); -+#endif -+ -+ /* If we've brought up an elan4 rail, then disable all elan3 rails. */ -+ if ((rmask & ~disabled) != 0) -+ disabled = ~rmask; -+ -+#if defined(CONFIG_ELAN3) || defined(CONFIG_ELAN3_MODULE) -+ rmask = ep3_create_rails (&epsys, disabled); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&ep_dump_notifier); -+#endif -+ register_reboot_notifier (&ep_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_register (&panic_notifier_list, &ep_panic_notifier); -+#endif -+ -+ return (0); -+} -+ -+/* -+ * Module removal. -+ */ -+#ifdef MODULE -+static void -+__exit ep_exit(void) -+{ -+ register int i; -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&ep_dump_notifier); -+#endif -+ unregister_reboot_notifier (&ep_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_unregister (&panic_notifier_list, &ep_panic_notifier); -+#endif -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (epsys.Rails[i]) -+ { -+ switch (epsys.Rails[i]->State) -+ { -+ case EP_RAIL_STATE_UNINITIALISED: -+ break; -+ -+ case EP_RAIL_STATE_STARTED: -+ case EP_RAIL_STATE_RUNNING: -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ /* remove per-rail CM proc entries */ -+ ep_stop_rail (epsys.Rails[i]); -+ break; -+ } -+ -+ /* remove EP proc rail entries after per-rail CM entries */ -+ ep_procfs_rail_fini (epsys.Rails[i]); -+ ep_destroy_rail (epsys.Rails[i]); -+ } -+ } -+ -+ ep_sys_fini (&epsys); -+ -+ ep_procfs_fini (); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(ep_init); -+module_exit(ep_exit); -+ -+#endif -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/conf_linux.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/conf_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/conf_linux.h 2005-06-01 23:12:54.635433632 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: conf_linux.h,v 1.6 2003/10/02 14:16:07 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/conf_linux.h,v $*/ -+ -+#ifndef __ELAN_CONF_LINUX_H -+#define __ELAN_CONF_LINUX_H -+ -+extern void ep_procfs_init(void); -+extern void ep_procfs_fini(void); -+extern void ep_procfs_rail_init(EP_RAIL *rail); -+extern void ep_procfs_rail_fini(EP_RAIL *rail); -+ -+extern void ep_procfs_svc_indicator_create(int svc_indicator, char *name); -+extern void ep_procfs_svc_indicator_remove(int svc_indicator, char *name); -+ -+#endif /* __ELAN_CONF_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/debug.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/debug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/debug.c 2005-06-01 23:12:54.635433632 -0400 -@@ -0,0 +1,145 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.28.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/debug.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+ -+DisplayInfo di_ep_debug = {ep_debugf, DBG_DEBUG}; -+ -+/* -+ * Generate a partial bitmap string, for the bitmap from offset "off" for "count" bits, -+ * to allow for displaying of subsets, treat entry 0 of the bitmap as having value "base". -+ */ -+int -+ep_sprintf_bitmap (char *str, unsigned nbytes, bitmap_t *bitmap, int base, int off, int nbits) -+{ -+ char entry[12]; /* space for N-N */ -+ register int i, j, len; -+ register int notstart = off; -+ register int notfirst = 0; -+ char *p = str; -+ -+ for (i = off; i < nbits; i++) -+ { -+ if (BT_TEST (bitmap, i)) -+ { -+ for (j = i+1; j < nbits; j++) -+ if (! BT_TEST (bitmap, j)) -+ break; -+ -+ if (j == (i+1)) -+ len = (int)sprintf (entry, "%d", base + i); -+ else -+ len = (int)sprintf (entry, "%d-%d", base + i, base + j-1); -+ -+ /* NOTE the 2 is for: one for comma, one for (possible) closing bracket */ -+ if ((p - str) <= (nbytes - (len+3))) -+ p += (int)sprintf (p, "%c%s", notfirst++ ? ',' : notstart ? ' ' : '[', entry); -+ else -+ { -+ /* no more space on this line, so move onto next */ -+ sprintf (p, "%c", notfirst++ ? ',' : '['); -+ -+ return (i); -+ } -+ -+ i = j; -+ } -+ } -+ -+ if (!notfirst) -+ sprintf (str, ""); -+ else -+ strcpy (p, "]"); -+ -+ return (-1); -+} -+ -+void -+ep_display_bitmap (char *prefix, char *tag, bitmap_t *bitmap, unsigned base, unsigned nbits) -+{ -+ /* Tru64 kernel printf() truncates lines at 128 bytes - the man pages for printf (9) -+ * do not mention this restriction, nor that it does not terminate the line with a -+ * carriage return, this is pretty naff. -+ * Linux has a similar limit though is much more generous at 1024 - and you can just -+ * look at the code to see why this has been done. -+ * -+ * Our nodeset information could well be longer than 128 characters, so we're going to -+ * have to split it into a number of lines. */ -+ -+#define LINEBUF_SIZE 128 -+ char *p, linebuf[LINEBUF_SIZE+1]; /* +1 for null termination */ -+ int i, noff, off = 0; -+ -+ do { -+ if (off == 0) -+ p = linebuf + (int)sprintf (linebuf, "%s: %s ", prefix, tag); -+ else -+ { -+ p = linebuf + (int)sprintf (linebuf, "%s: ", prefix); -+ for (i = 0; tag[i] != '\0'; i++) -+ *p++ = ' '; -+ } -+ -+ noff = ep_sprintf_bitmap (p, &linebuf[LINEBUF_SIZE-1]-p, bitmap, base, off, nbits); -+ -+ printk ("%s\n", linebuf); -+ -+ } while ((off = noff) != -1); -+ -+#undef LINEBUF_SIZE -+} -+ -+void -+ep_debugf (long mode, char *fmt, ...) -+{ -+ va_list ap; -+ char prefix[32]; -+ -+ va_start (ap, fmt); -+#if defined(LINUX) -+ sprintf (prefix, "[%08d.%04d] ", (int) lbolt, current->pid); -+#else -+ sprintf (prefix, "[%08d.----] ", (int) lbolt); -+#endif -+ qsnet_vdebugf ((mode & epdebug_console ? QSNET_DEBUG_CONSOLE: 0) | QSNET_DEBUG_BUFFER, prefix, fmt, ap); -+ va_end (ap); -+} -+ -+int -+ep_assfail (EP_RAIL *rail, const char *ex, const char *func, const char *file, const int line) -+{ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ printk (KERN_EMERG "ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ if (panicstr) -+ return (0); -+ -+ if (assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (assfail_mode & 2) -+ panic ("ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ if (assfail_mode & 4) -+ epdebug = 0; -+ -+ return 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/debug_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/debug_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/debug_elan4.c 2005-06-01 23:12:54.636433480 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug_elan4.c,v 1.1 2004/05/19 10:21:04 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/debug_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "conf_linux.h" -+#include "debug.h" -+ -+static void -+ep4_display_ecqs (EP4_RAIL *rail) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ ep_debugf (DBG_DEBUG, "ECQ: type %d: avail %d cqnum %d\n", i, ecq->ecq_avail, elan4_cq2num (ecq->ecq_cq)); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+} -+ -+void -+ep4_debug_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP_SYS *sys = rail->r_generic.System; -+ -+ ep_debugf (DBG_DEBUG, "ep%d: is elan4 %d rev %c\n", rail->r_generic.Number, -+ rail->r_generic.Devinfo.dev_instance, 'a' + rail->r_generic.Devinfo.dev_revision_id); -+ -+ ep4_display_ecqs (rail); -+ -+ ep_display_alloc (&sys->Allocator); -+ ep_display_rmap (sys->Allocator.ResourceMap); -+ -+ ep_display_alloc (&rail->r_generic.ElanAllocator); -+ ep_display_alloc (&rail->r_generic.MainAllocator); -+ -+ ep_display_rmap (rail->r_generic.ElanAllocator.ResourceMap); -+} -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/debug.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/debug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/debug.h 2005-06-01 23:12:54.636433480 -0400 -@@ -0,0 +1,109 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_EPDEBUG_H -+#define _ELAN3_EPDEBUG_H -+ -+#ident "$Id: debug.h,v 1.18.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/debug.h,v $ */ -+ -+extern unsigned int epdebug; -+extern unsigned int epdebug_console; -+extern unsigned int epdebug_cmlevel; -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern unsigned int epdebug_check_sum; -+#endif -+#define DBG_CONFIG 0x00000001 /* Module configuration */ -+#define DBG_PROBE 0x00000002 -+#define DBG_ROUTETABLE 0x00000004 -+#define DBG_STATEMAP 0x00000008 -+ -+#define DBG_CM 0x00000020 -+#define DBG_XMTR 0x00000040 -+#define DBG_RCVR 0x00000080 -+#define DBG_FORWARD 0x00000100 -+#define DBG_DISCON 0x00000200 -+#define DBG_EPTRAP 0x00000400 -+#define DBG_COMMAND 0x00000800 -+#define DBG_RETRY 0x00001000 -+#define DBG_DEBUG 0x00002000 -+#define DBG_NETWORK_ERROR 0x00004000 -+#define DBG_MSGSYS 0x00008000 -+#define DBG_MANAGER 0x00010000 -+#define DBG_KMAP 0x00020000 -+#define DBG_FAILOVER 0x00040000 -+#define DBG_MAPNMD 0x00080000 -+#define DBG_KMSG 0x00100000 -+#define DBG_SVC 0x00200000 -+#define DBG_STABILISE 0x00400000 -+ -+#if defined(DEBUG_PRINTF) -+ -+# define EPRINTF0(m,fmt) ((epdebug&(m)) ? ep_debugf(m,fmt) : (void)0) -+# define EPRINTF1(m,fmt,a) ((epdebug&(m)) ? ep_debugf(m,fmt,a) : (void)0) -+# define EPRINTF2(m,fmt,a,b) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b) : (void)0) -+# define EPRINTF3(m,fmt,a,b,c) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c) : (void)0) -+# define EPRINTF4(m,fmt,a,b,c,d) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d) : (void)0) -+# define EPRINTF5(m,fmt,a,b,c,d,e) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e) : (void)0) -+# define EPRINTF6(m,fmt,a,b,c,d,e,f) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f) : (void)0) -+# define EPRINTF7(m,fmt,a,b,c,d,e,f,g) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g) : (void)0) -+# define EPRINTF8(m,fmt,a,b,c,d,e,f,g,h) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h,i) : (void)0) -+# define EPRINTF10(m,fmt,a,b,c,d,e,f,g,h,i,j) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h,i,j) : (void)0) -+ -+# define CPRINTF0(lvl,fmt) (((lvl) <= epdebug_cmlevel) ? EPRINTF0(DBG_CM,fmt) : (void)0) -+# define CPRINTF1(lvl,fmt,a) (((lvl) <= epdebug_cmlevel) ? EPRINTF1(DBG_CM,fmt,a) : (void)0) -+# define CPRINTF2(lvl,fmt,a,b) (((lvl) <= epdebug_cmlevel) ? EPRINTF2(DBG_CM,fmt,a,b) : (void)0) -+# define CPRINTF3(lvl,fmt,a,b,c) (((lvl) <= epdebug_cmlevel) ? EPRINTF3(DBG_CM,fmt,a,b,c) : (void)0) -+# define CPRINTF4(lvl,fmt,a,b,c,d) (((lvl) <= epdebug_cmlevel) ? EPRINTF4(DBG_CM,fmt,a,b,c,d) : (void)0) -+# define CPRINTF5(lvl,fmt,a,b,c,d,e) (((lvl) <= epdebug_cmlevel) ? EPRINTF5(DBG_CM,fmt,a,b,c,d,e) : (void)0) -+# define CPRINTF6(lvl,fmt,a,b,c,d,e,f) (((lvl) <= epdebug_cmlevel) ? EPRINTF6(DBG_CM,fmt,a,b,c,d,e,f) : (void)0) -+# define CPRINTF7(lvl,fmt,a,b,c,d,e,f,g) (((lvl) <= epdebug_cmlevel) ? EPRINTF7(DBG_CM,fmt,a,b,c,d,e,f,g) : (void)0) -+# define CPRINTF8(lvl,fmt,a,b,c,d,e,f,g,h) (((lvl) <= epdebug_cmlevel) ? EPRINTF8(DBG_CM,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define CPRINTF9(lvl,fmt,a,b,c,d,e,f,g,h,i) (((lvl) <= epdebug_cmlevel) ? EPRINTF9(DBG_CM,fmt,a,b,c,d,e,f,g,h,i) : (void)0) -+ -+#if defined __GNUC__ -+extern void ep_debugf (long mode, char *fmt, ...) __attribute__ ((format (printf,2,3))); -+#else -+extern void ep_debugf (long mode, char *fmt, ...); -+#endif -+ -+#else -+ -+# define EPRINTF0(m,fmt) (0) -+# define EPRINTF1(m,fmt,a) (0) -+# define EPRINTF2(m,fmt,a,b) (0) -+# define EPRINTF3(m,fmt,a,b,c) (0) -+# define EPRINTF4(m,fmt,a,b,c,d) (0) -+# define EPRINTF5(m,fmt,a,b,c,d,e) (0) -+# define EPRINTF6(m,fmt,a,b,c,d,e,f) (0) -+# define EPRINTF7(m,fmt,a,b,c,d,e,f,g) (0) -+# define EPRINTF8(m,fmt,a,b,c,d,e,f,g,h) (0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i) (0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i,j) (0) -+ -+# define CPRINTF0(lvl,fmt) (0) -+# define CPRINTF1(lvl,fmt,a) (0) -+# define CPRINTF2(lvl,fmt,a,b) (0) -+# define CPRINTF3(lvl,fmt,a,b,c) (0) -+# define CPRINTF4(lvl,fmt,a,b,c,d) (0) -+# define CPRINTF5(lvl,fmt,a,b,c,d,e) (0) -+# define CPRINTF6(lvl,fmt,a,b,c,d,e,f) (0) -+# define CPRINTF7(lvl,fmt,a,b,c,d,e,f,g) (0) -+# define CPRINTF8(lvl,fmt,a,b,c,d,e,f,g,h) (0) -+# define CPRINTF9(lvl,fmt,a,b,c,d,e,f,g,h,i) (0) -+ -+#endif /* DEBUG */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN3_EPDEBUG_H */ -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S 2005-06-01 23:12:54.637433328 -0400 -@@ -0,0 +1,133 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_asm_elan4_thread.S,v 1.5 2004/04/25 11:25:43 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_asm_elan4_thread.S,v $*/ -+ -+#include -+#include -+ -+#include "assym_elan4.h" -+ -+/* XXXXX - registers.h */ -+#define E4_MAIN_INT_SHIFT 14 -+ -+/* -+ * c_waitevent_interrupt (E4_uint64 *commandport, E4_Event *event, E4_uint64 count, E4_uint64 intcookie) -+ */ -+ .global c_waitevent_interrupt -+c_waitevent_interrupt: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ st32 %r16, [%sp] // event source block -+ mov MAKE_EXT_CLEAN_CMD, %r23 -+ st8 %r23, [%sp+56] // event source block -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r23,%r23 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ or %r9, WAIT_EVENT_CMD, %r16 ! WAIT_EVENT_CMD | event -+ sll8 %r10, 32, %r17 -+ or %r17, E4_EVENT_TYPE_VALUE(E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), %r17 ! ev_CountAndType -+ mov %sp, %r18 ! ev_Source -+ mov %r8, %r19 ! ev_Dest -+ sll8 %r11, E4_MAIN_INT_SHIFT, %r20 -+ or %r20, INTERRUPT_CMD, %r20 ! INTERRUPT_CMD | (cookie << E4_MAIN_INT_SHIFT) -+ mov NOP_CMD, %r21 -+ mov NOP_CMD, %r22 -+ mov NOP_CMD, %r23 -+ -+ st64suspend %r16, [%r8] -+ -+3: ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r2+8, %r0 // and return -+ add %sp, 192, %sp -+ -+ -+#define EP4_RCVR_PENDING_STALLED 1 /* indicates thread has stalled for no descriptor (rcvr_pending_head) */ -+ -+#define RXD_DEBUG(VAL,RXD,TMP) \ -+ mov VAL, TMP; \ -+ st8 TMP, [RXD + EP4_RXD_DEBUG] -+ -+ -+ /* -+ * %r2 - rcvr elan -+ * %r3 - rxd elan -+ */ -+ .global c_queue_rxd -+c_queue_rxd: -+ RXD_DEBUG(1, %r3, %r23) -+ -+ ld16 [%r2 + EP4_RCVR_PENDING_TAILP], %r18 /* r18 == tailp, r19 = head */ -+ add %r3, EP4_RXD_NEXT, %r4 -+ -+ st8 %r0, [%r3 + EP4_RXD_NEXT] /* rxd->rxd_next = NULL */ -+ st8 %r4, [%r2 + EP4_RCVR_PENDING_TAILP] /* tailp = &rxd->rxd_next */ -+ st8 %r3, [%r18] /* *tailp = rxd */ -+ -+ cmp %r19, EP4_RCVR_PENDING_STALLED /* thread stalled ? */ -+ beq 1f -+ mov %r18, %r16 /* must have used %r16, %r19, %r23 */ -+ mov %r3, %r23 -+ -+ RXD_DEBUG(2, %r3, %r23) -+ -+ st8suspend %r16, [%r3 + EP4_RXD_QUEUED] /* no - mark as queued - all done */ -+ -+1: st8 %r16, [%r3 + EP4_RXD_QUEUED] /* mark as queued */ -+ -+ RXD_DEBUG(3, %r3, %r23) -+ -+ mov %r3, %r8 /* return rxd from c_stall_thread */ -+ ba .epcomms_resume_thread /* resume the thread */ -+ ld64 [%r2 + EP4_RCVR_THREAD_STALL], %r0 -+ -+ /* -+ * c_stall_thread (EP4_RCVR_ELAN *rcvrElan) -+ */ -+ .global c_stall_thread -+c_stall_thread: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov EP4_RCVR_PENDING_STALLED, %r9 // Mark rcvr as stalled -+ st8 %r9, [%r8 + EP4_RCVR_PENDING_HEAD] -+ -+ // XXXX _ TBD should generate interrupt -+ -+ mov %r1, %r17 // SP -+ mov %r7, %r23 // return pc -+ -+ st64suspend %r16, [%r8 + EP4_RCVR_THREAD_STALL] -+ -+.epcomms_resume_thread: -+ /* %r8 == rxdElan */ -+ -+ ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r7+8, %r0 // and return -+ add %sp, 192, %sp -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms.c 2005-06-01 23:12:54.637433328 -0400 -@@ -0,0 +1,484 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms.c,v 1.71.2.6 2004/11/30 12:02:16 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms.c,v $ */ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include "cm.h" -+#include "debug.h" -+ -+static void -+ep_comms_thread (void *arg) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) arg; -+ struct list_head *el; -+ -+ kernel_thread_init ("ep_comms"); -+ -+ /* since ep_alloc_xmtr() has incremented the module use count, -+ * we would be preventing the module from being unloaded, so -+ * we decrement the use count since this thread must terminate -+ * during unload of the module. -+ */ -+ ep_mod_dec_usecount(); -+ -+ for (;;) -+ { -+ long nextRunTime = 0; -+ -+ /* NOTE - subsys->Lock serializes us against flush/relocations -+ * caused by rail nodeset transitions. -+ */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ nextRunTime = ep_check_xmtr (list_entry (el, EP_XMTR, Link), nextRunTime); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ nextRunTime = ep_check_rcvr (list_entry (el, EP_RCVR, Link), nextRunTime); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ep_csum_rxds (subsys); -+#endif -+ nextRunTime = ep_forward_rxds (subsys, nextRunTime); -+ -+ if (ep_kthread_sleep (&subsys->Thread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_mod_inc_usecount(); -+ -+ ep_kthread_stopped (&subsys->Thread); -+ kernel_thread_exit(); -+} -+ -+int -+ep_comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ EP_COMMS_RAIL *commsRail; -+ struct list_head *el; -+ -+ printk ("%s: vendorid=%x deviceid=%x\n", rail->Name, rail->Devinfo.dev_vendor_id, rail->Devinfo.dev_device_id); -+ -+ switch (rail->Devinfo.dev_device_id) -+ { -+#if defined(CONFIG_ELAN3) || defined(CONFIG_ELAN3_MODULE) -+ case PCI_DEVICE_ID_ELAN3: -+ commsRail = ep3comms_add_rail (s, sys, rail); -+ break; -+#endif -+#if defined(CONFIG_ELAN4) || defined(CONFIG_ELAN4_MODULE) -+ case PCI_DEVICE_ID_ELAN4: -+ commsRail = ep4comms_add_rail (s, sys, rail); -+ break; -+#endif -+ default: -+ return 0; -+ } -+ -+ if (commsRail == NULL) -+ return 1; -+ -+ commsRail->Rail = rail; -+ commsRail->Subsys = subsys; -+ -+ kmutex_lock (&subsys->Lock); -+ list_add_tail (&commsRail->Link, &subsys->Rails); -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.AddRail) (rcvr, commsRail); -+ } -+ -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ EP_RAIL_OP (commsRail, Xmtr.AddRail) (xmtr, commsRail); -+ } -+ -+ kmutex_unlock (&subsys->Lock); -+ -+ return 0; -+} -+ -+void -+ep_comms_del_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ EP_COMMS_RAIL *commsRail = NULL; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ /* find out rail entry and remove from system list */ -+ list_for_each (el, &subsys->Rails) { -+ if ((commsRail = list_entry (el, EP_COMMS_RAIL, Link))->Rail == rail) -+ break; -+ } -+ -+ list_del (&commsRail->Link); -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ EP_RAIL_OP(commsRail, Rcvr.DelRail) (rcvr, commsRail); -+ } -+ -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ EP_RAIL_OP(commsRail,Xmtr.DelRail) (xmtr, commsRail); -+ } -+ -+ kmutex_unlock (&subsys->Lock); -+ -+ EP_RAIL_OP (commsRail, DelRail) (commsRail); -+} -+ -+void -+ep_comms_fini (EP_SUBSYS *s, EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ -+ ep_kthread_stop (&subsys->Thread); -+ ep_kthread_destroy (&subsys->Thread); -+ -+ if (subsys->ForwardXmtr) -+ ep_free_xmtr (subsys->ForwardXmtr); -+ -+ spin_lock_destroy (&subsys->ForwardDescLock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ spin_lock_destroy (&subsys->CheckSumDescLock); -+#endif -+ -+ kmutex_destroy (&subsys->Lock); -+ -+ KMEM_FREE (subsys, sizeof (EP_COMMS_SUBSYS)); -+} -+ -+int -+ep_comms_init (EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ -+ KMEM_ZALLOC (subsys, EP_COMMS_SUBSYS *, sizeof (EP_COMMS_SUBSYS), 1); -+ -+ if (subsys == NULL) -+ return (ENOMEM); -+ -+ INIT_LIST_HEAD (&subsys->Rails); -+ INIT_LIST_HEAD (&subsys->Receivers); -+ INIT_LIST_HEAD (&subsys->Transmitters); -+ INIT_LIST_HEAD (&subsys->ForwardDescList); -+ -+ kmutex_init (&subsys->Lock); -+ spin_lock_init (&subsys->ForwardDescLock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&subsys->CheckSumDescList); -+ spin_lock_init (&subsys->CheckSumDescLock); -+#endif -+ -+ subsys->Subsys.Sys = sys; -+ subsys->Subsys.Name = "epcomms"; -+ subsys->Subsys.Destroy = ep_comms_fini; -+ subsys->Subsys.AddRail = ep_comms_add_rail; -+ subsys->Subsys.RemoveRail = ep_comms_del_rail; -+ -+ ep_subsys_add (sys, &subsys->Subsys); -+ ep_kthread_init (&subsys->Thread); -+ -+ if ((subsys->ForwardXmtr = ep_alloc_xmtr (subsys->Subsys.Sys)) == NULL) -+ goto failed; -+ -+ if (kernel_thread_create (ep_comms_thread, subsys) == NULL) -+ goto failed; -+ ep_kthread_started (&subsys->Thread); -+ -+ return (0); -+ -+ failed: -+ ep_subsys_del (sys, &subsys->Subsys); -+ ep_comms_fini (&subsys->Subsys, sys); -+ -+ return (ENOMEM); -+} -+ -+void -+ep_comms_display (EP_SYS *sys, char *how) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME); -+ struct list_head *el; -+ -+ if (how == NULL || !strncmp (how, "rail", 4)) -+ { -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(commsRail, DisplayRail) (commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ } -+ -+ if (how == NULL || !strncmp (how, "xmtr", 4)) -+ list_for_each (el, &subsys->Transmitters) -+ ep_display_xmtr (&di_ep_debug, list_entry (el, EP_XMTR, Link)); -+ -+ if (how == NULL || !strncmp (how, "rcvr", 4)) -+ list_for_each (el, &subsys->Receivers) -+ ep_display_rcvr (&di_ep_debug, list_entry (el, EP_RCVR, Link), (how && how[4] == ',') ? 1 : 0); -+} -+ -+int -+ep_svc_indicator_set (EP_SYS *epsys, int svc_indicator) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_set: %d \n",svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator > EP_SVC_NUM_INDICATORS) -+ return (EP_EINVAL); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_set: ep_subsys_find failed\n"); -+ return (EP_EINVAL); -+ } -+ -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ cm_svc_indicator_set(commsRail->Rail, svc_indicator); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_set: %d success\n",svc_indicator); -+ return (EP_SUCCESS); -+} -+ -+int -+ep_svc_indicator_clear (EP_SYS *epsys, int svc_indicator) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_clear: %d \n",svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (EP_EINVAL); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_clear: ep_subsys_find failed\n"); -+ return (EP_EINVAL); -+ } -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ cm_svc_indicator_clear(commsRail->Rail, svc_indicator); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_clear: %d success\n",svc_indicator); -+ return (EP_SUCCESS); -+} -+ -+int -+ep_svc_indicator_is_set (EP_SYS *epsys, int svc_indicator, int nodeId) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ int set = 0; -+ -+ EPRINTF2 (DBG_SVC,"ep_svc_indicator_is_set: svc %d node %d \n", svc_indicator, nodeId); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_is_set: ep_subsys_find failed\n"); -+ return (0); -+ } -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ set |= cm_svc_indicator_is_set(commsRail->Rail, svc_indicator, nodeId); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF3 (DBG_SVC,"ep_svc_indicator_is_set: svc %d node %d returning %d\n", svc_indicator, nodeId, set); -+ return set; -+} -+ -+int -+ep_svc_indicator_bitmap (EP_SYS *epsys, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_bitmap: svc %d\n", svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (-1); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_bitmap: ep_subsys_find failed\n"); -+ return (-2); -+ } -+ -+ /* clear bitmap */ -+ bt_zero (bitmap, nnodes); -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ /* this will or in each bit map */ -+ cm_svc_indicator_bitmap (commsRail->Rail, svc_indicator, bitmap, low, nnodes); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ return (0); -+} -+ -+int -+ep_xmtr_svc_indicator_bitmap (EP_XMTR *xmtr, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ int i; -+ -+ EPRINTF1 (DBG_SVC,"ep_xmtr_svc_indicator_bitmap: svc %d\n", svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (-1); -+ -+ /* clear bitmap */ -+ bt_zero (bitmap, nnodes); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (xmtr->RailMask & (1 << i) ) -+ { -+ /* this will or in each bit map */ -+ cm_svc_indicator_bitmap (xmtr->Rails[i]->CommsRail->Rail, svc_indicator, bitmap, low, nnodes); -+ } -+ } -+ -+ return (0); -+} -+ -+EP_RAILMASK -+ep_svc_indicator_railmask (EP_SYS *epsys, int svc_indicator, int nodeId) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ EP_RAILMASK rmask=0; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) -+ return (rmask); -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and reading info from Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ if ( cm_svc_indicator_is_set(commsRail->Rail, svc_indicator,nodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ return (rmask); -+} -+ -+EP_RAILMASK -+ep_xmtr_svc_indicator_railmask (EP_XMTR *xmtr, int svc_indicator, int nodeId) -+{ -+ EP_RAILMASK rmask=0; -+ EP_COMMS_RAIL *commsRail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (xmtr->RailMask & (1 << i) ) -+ { -+ commsRail = xmtr->Rails[i]->CommsRail; -+ -+ if ( cm_svc_indicator_is_set(commsRail->Rail, svc_indicator,nodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ } -+ -+ EPRINTF3 (DBG_SVC, "ep_xmtr_svc_indicator_railmask: svc %d node %d mask 0x%x\n", svc_indicator, nodeId, rmask); -+ -+ return (rmask); -+} -+ -+EP_RAILMASK -+ep_rcvr_railmask (EP_SYS *epsys, EP_SERVICE service) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_RAILMASK rmask=0; -+ struct list_head *el; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) -+ return (rmask); -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Service == service) -+ rmask |= rcvr->RailMask; -+ } -+ kmutex_unlock(&subsys->Lock); -+ -+ return (rmask); -+} -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+uint32_t -+ep_calc_check_sum (EP_SYS *sys, EP_ENVELOPE *env, EP_NMD *nmd, int nFrags) -+{ -+ EP_NMH *nmh; -+ int i; -+ uint16_t check_data = 0; -+ uint16_t check_env = 0; -+ -+ for (i = 0; i < nFrags; i++) { -+ /* find the nmh for this frag */ -+ nmh = ep_nmh_find (&sys->MappingTable, &nmd[i]); -+ -+ ASSERT( nmh != NULL); -+ -+ /* add the next frag to the check sum */ -+ check_data = nmh->nmh_ops->op_calc_check_sum (sys, nmh, &nmd[i], check_data); -+ } -+ -+ check_env = rolling_check_sum ((char *) env, offsetof(EP_ENVELOPE, CheckSum), 0); -+ -+ return (EP_ENVELOPE_CHECK_SUM | ( (check_env & 0x7FFF) << 16) | (check_data & 0xFFFF)); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan3.c 2005-06-01 23:12:54.638433176 -0400 -@@ -0,0 +1,191 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan3.c,v 1.60 2004/08/03 11:34:34 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+void -+ep3comms_flush_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_flush_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_flush_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep3comms_failover_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_failover_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_failover_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep3comms_disconnect_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_disconnect_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_disconnect_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+EP_COMMS_RAIL * -+ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_COMMS_RAIL *commsRail; -+ EP3_InputQueue qdesc; -+ int i; -+ -+ KMEM_ZALLOC (commsRail, EP3_COMMS_RAIL *, sizeof (EP3_COMMS_RAIL), TRUE); -+ -+ if (commsRail == NULL) -+ return NULL; -+ -+ commsRail->Generic.Ops.DelRail = ep3comms_del_rail; -+ commsRail->Generic.Ops.DisplayRail = ep3comms_display_rail; -+ commsRail->Generic.Ops.Rcvr.AddRail = ep3rcvr_add_rail; -+ commsRail->Generic.Ops.Rcvr.DelRail = ep3rcvr_del_rail; -+ commsRail->Generic.Ops.Rcvr.Check = ep3rcvr_check; -+ commsRail->Generic.Ops.Rcvr.QueueRxd = ep3rcvr_queue_rxd; -+ commsRail->Generic.Ops.Rcvr.RpcPut = ep3rcvr_rpc_put; -+ commsRail->Generic.Ops.Rcvr.RpcGet = ep3rcvr_rpc_get; -+ commsRail->Generic.Ops.Rcvr.RpcComplete = ep3rcvr_rpc_complete; -+ -+ commsRail->Generic.Ops.Rcvr.StealRxd = ep3rcvr_steal_rxd; -+ -+ commsRail->Generic.Ops.Rcvr.FillOutRailStats = ep3rcvr_fillout_rail_stats; -+ -+ commsRail->Generic.Ops.Rcvr.DisplayRcvr = ep3rcvr_display_rcvr; -+ commsRail->Generic.Ops.Rcvr.DisplayRxd = ep3rcvr_display_rxd; -+ -+ commsRail->Generic.Ops.Xmtr.AddRail = ep3xmtr_add_rail; -+ commsRail->Generic.Ops.Xmtr.DelRail = ep3xmtr_del_rail; -+ commsRail->Generic.Ops.Xmtr.Check = ep3xmtr_check; -+ commsRail->Generic.Ops.Xmtr.BindTxd = ep3xmtr_bind_txd; -+ commsRail->Generic.Ops.Xmtr.UnbindTxd = ep3xmtr_unbind_txd; -+ commsRail->Generic.Ops.Xmtr.PollTxd = ep3xmtr_poll_txd; -+ commsRail->Generic.Ops.Xmtr.CheckTxdState = ep3xmtr_check_txd_state; -+ -+ commsRail->Generic.Ops.Xmtr.DisplayXmtr = ep3xmtr_display_xmtr; -+ commsRail->Generic.Ops.Xmtr.DisplayTxd = ep3xmtr_display_txd; -+ -+ commsRail->Generic.Ops.Xmtr.FillOutRailStats = ep3xmtr_fillout_rail_stats; -+ -+ /* Allocate the input queues at their fixed elan address */ -+ if (! (commsRail->QueueDescs = ep_alloc_memory_elan (r, EP_EPCOMMS_QUEUE_BASE, roundup (EP_MSG_NSVC * sizeof (EP3_InputQueue), PAGESIZE), EP_PERM_ALL, 0))) -+ { -+ KMEM_FREE (commsRail, sizeof (EP3_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_base = 0; -+ qdesc.q_top = 0; -+ qdesc.q_fptr = 0; -+ qdesc.q_bptr = 0; -+ qdesc.q_size = 0; -+ qdesc.q_event.ev_Count = 0; -+ qdesc.q_event.ev_Type = 0; -+ -+ /* Initialise all queue entries to be full */ -+ for (i = 0; i < EP_MSG_NSVC; i++) -+ elan3_sdram_copyl_to_sdram (dev, &qdesc, commsRail->QueueDescs + (i * sizeof (EP3_InputQueue)), sizeof (EP3_InputQueue)); -+ -+ ep_register_callback (r, EP_CB_FLUSH_FILTERING, ep3comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FLUSH_FLUSHING, ep3comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FAILOVER, ep3comms_failover_callback, commsRail); -+ ep_register_callback (r, EP_CB_DISCONNECTING, ep3comms_disconnect_callback, commsRail); -+ -+ return (EP_COMMS_RAIL *) commsRail; -+} -+ -+void -+ep3comms_del_rail (EP_COMMS_RAIL *r) -+{ -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) r; -+ EP_RAIL *rail = commsRail->Generic.Rail; -+ -+ ep_remove_callback (rail, EP_CB_FLUSH_FILTERING, ep3comms_flush_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_FLUSH_FLUSHING, ep3comms_flush_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_FAILOVER, ep3comms_failover_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_DISCONNECTING, ep3comms_disconnect_callback, commsRail); -+ -+ ep_free_memory_elan (rail, EP_EPCOMMS_QUEUE_BASE); -+ -+ KMEM_FREE (commsRail, sizeof (EP3_COMMS_RAIL)); -+} -+ -+void -+ep3comms_display_rail (EP_COMMS_RAIL *r) -+{ -+ -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan3.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms_elan3.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan3.h 2005-06-01 23:12:54.639433024 -0400 -@@ -0,0 +1,330 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EPCOMMS_ELAN3_H -+#define __EPCOMMS_ELAN3_H -+ -+#ident "@(#)$Id: epcomms_elan3.h,v 1.27.2.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3.h,v $ */ -+ -+#define EP3_DMAFAILCOUNT 3 -+ -+ -+/* Main/Elan spinlock */ -+typedef struct ep3_spinlock_elan -+{ -+ volatile E3_uint32 sl_lock; /* main wants a lock */ -+ volatile E3_uint32 sl_seq; /* thread owns this word */ -+ /* NOTE: The lock/seq words must be within the same 32-byte Elan cache-line */ -+ E3_uint64 sl_pad[14]; /* pad to 64-bytes */ -+} EP3_SPINLOCK_ELAN; -+ -+/* Declare this as a main memory cache block for efficiency */ -+typedef struct ep3_spinlock_main { -+ volatile E3_uint32 sl_seq; /* copy of seq number updated by Elan */ -+ volatile E3_uint32 sl_pad[15]; /* pad to 64-bytes */ -+} EP3_SPINLOCK_MAIN; -+ -+#if defined (__ELAN3__) -+ -+extern void ep3_spinblock (EP3_SPINLOCK_ELAN *, EP3_SPINLOCK_MAIN *); -+ -+#define EP3_SPINENTER(SLE,SL) \ -+do {\ -+ (SLE)->sl_seq++; \ -+ if ((SLE)->sl_lock) \ -+ ep3_spinblock(SLE, SL);\ -+} while (0) -+ -+#define EP3_SPINEXIT(SLE,SL) \ -+do {\ -+ (SL)->sl_seq = (SLE)->sl_seq;\ -+} while (0) -+ -+#else -+ -+#define EP3_SPINENTER(DEV,SLE,SL) do { \ -+ E3_uint32 seq; \ -+\ -+ mb();\ -+ elan3_sdram_writel (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 1);\ -+ mb();\ -+ seq = elan3_sdram_readl (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_seq));\ -+ while (seq != (SL)->sl_seq)\ -+ {\ -+ while ((SL)->sl_seq == (seq - 1))\ -+ {\ -+ mb();\ -+\ -+ DELAY (1); \ -+ }\ -+ seq = elan3_sdram_readl (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_seq));\ -+ }\ -+} while (0) -+ -+#define EP3_SPINEXIT(DEV,SLE,SL) do { \ -+ wmb(); \ -+ elan3_sdram_writel (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 0);\ -+ mmiob(); \ -+} while (0) -+ -+#endif /* ! __ELAN3__ */ -+ -+/* per-rail elan memory portion receive descriptor */ -+typedef struct ep3_rxd_rail_elan -+{ -+ E3_DMA Dmas[EP_MAXFRAG+1]; /* Dma's for fetching data/putting data & status blk */ -+ E3_Event ChainEvent[EP_MAXFRAG]; /* Events to chain dmas */ -+ E3_BlockCopyEvent DataEvent; /* message received block event */ -+ E3_BlockCopyEvent DoneEvent; /* RPC status block event */ -+ -+ EP_NMD Data; /* Network mapping handle for receive data */ -+ -+ E3_Addr RxdMain; /* pointer to main memory portion */ -+ -+ E3_Addr Next; /* linked list when on pending list (elan address) */ -+ -+ E3_uint64 MainAddr; /* kernel address of ep_rxd_main */ -+} EP3_RXD_RAIL_ELAN; -+ -+#define EP3_RXD_RAIL_ELAN_SIZE roundup (sizeof (EP3_RXD_RAIL_ELAN), E3_DMA_ALIGN) -+ -+/* per-rail main memory portion of receive descriptor */ -+typedef struct ep3_rxd_rail_main -+{ -+ E3_uint32 DataEvent; /* dest for done event */ -+ E3_uint32 DoneEvent; /* dest for done event */ -+} EP3_RXD_RAIL_MAIN; -+ -+#define EP3_RXD_RAIL_MAIN_SIZE roundup (sizeof(EP3_RXD_RAIL_MAIN), sizeof (E3_uint32)) -+ -+#if !defined(__ELAN3__) -+/* Kernel memory portion of per-rail receive descriptor */ -+typedef struct ep3_rxd_rail -+{ -+ EP_RXD_RAIL Generic; /* generic rxd rail */ -+ -+ EP3_COOKIE DataCookie; /* Event cookie */ -+ EP3_COOKIE DoneCookie; /* Event cookie */ -+ EP3_COOKIE ChainCookie[EP_MAXFRAG]; /* Event cookie */ -+ -+ sdramaddr_t RxdElan; /* per-rail elan receive descriptor */ -+ E3_Addr RxdElanAddr; /* and elan address */ -+ -+ EP3_RXD_RAIL_MAIN *RxdMain; /* per-rail main receive descriptor */ -+ E3_Addr RxdMainAddr; /* and elan address */ -+ -+ EP_BACKOFF Backoff; /* dma backoff */ -+} EP3_RXD_RAIL; -+ -+#define EP3_NUM_RXD_PER_BLOCK 16 -+ -+typedef struct ep3_rxd_rail_block -+{ -+ struct list_head Link; -+ -+ EP3_RXD_RAIL Rxd[EP3_NUM_RXD_PER_BLOCK]; -+} EP3_RXD_RAIL_BLOCK; -+ -+#endif /* ! __ELAN3__ */ -+ -+typedef struct ep3_rcvr_rail_elan /* Elan memory service structure */ -+{ -+ EP3_SPINLOCK_ELAN ThreadLock; /* elan memory portion of spin lock */ -+ EP3_SPINLOCK_ELAN PendingLock; /* spin lock for pending rx list */ -+ -+ E3_Addr PendingDescs; /* list of pending receive descriptors */ -+ E3_uint32 ThreadShouldHalt; /* marks that the thread should halt */ -+ -+ E3_uint64 MainAddr; /* kernel address of ep_rcvr (for StallThreadForNoDescs)*/ -+} EP3_RCVR_RAIL_ELAN; -+ -+typedef struct ep3_rcvr_rail_main /* Main memory service strucure */ -+{ -+ EP3_SPINLOCK_MAIN ThreadLock; /* main memory portion of spin lock */ -+ EP3_SPINLOCK_MAIN PendingLock; /* spinlock for pending rx list */ -+ -+ volatile unsigned PendingDescsTailp; /* next pointer of last receive descriptor on pending list */ -+} EP3_RCVR_RAIL_MAIN; -+ -+#if !defined(__ELAN3__) -+ -+typedef struct ep3_rcvr_rail_stats -+{ -+ unsigned long some_stat; -+} EP3_RCVR_RAIL_STATS; -+ -+typedef struct ep3_rcvr_rail -+{ -+ EP_RCVR_RAIL Generic; /* generic portion */ -+ -+ EP3_RCVR_RAIL_MAIN *RcvrMain; -+ E3_Addr RcvrMainAddr; -+ sdramaddr_t RcvrElan; -+ E3_Addr RcvrElanAddr; -+ -+ sdramaddr_t InputQueueBase; /* base of receive queue */ -+ E3_Addr InputQueueAddr; /* elan address of receive queue */ -+ -+ E3_Addr ThreadStack; /* Thread processor stack */ -+ E3_Addr ThreadWaiting; /* Elan thread is waiting as no receive descriptors pending (sp stored here ) */ -+ E3_Addr ThreadHalted; /* Elan thread is waiting as it was requested to halt */ -+ -+ struct list_head FreeDescList; /* freelist of per-rail receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; /* total number created */ -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ struct list_head DescBlockList; /* list of receive descriptor blocks */ -+ -+ unsigned int FreeDescWaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t FreeDescSleep; /* and sleep here */ -+ -+ unsigned int CleanupWaiting; /* waiting for cleanup */ -+ kcondvar_t CleanupSleep; /* and sleep here */ -+ -+ EP3_RCVR_RAIL_STATS stats; /* elan3 specific rcvr_rail stats */ -+} EP3_RCVR_RAIL; -+ -+#endif /* ! __ELAN3__ */ -+ -+/* per-rail portion of transmit descriptor */ -+typedef struct ep3_txd_rail_elan -+{ -+ EP_ENVELOPE Envelope; /* message envelope */ -+ EP_PAYLOAD Payload; /* message payload */ -+ -+ E3_BlockCopyEvent EnveEvent; /* envelope event */ -+ E3_BlockCopyEvent DataEvent; /* data transfer event */ -+ E3_BlockCopyEvent DoneEvent; /* rpc done event */ -+} EP3_TXD_RAIL_ELAN; -+ -+#define EP3_TXD_RAIL_ELAN_SIZE roundup (sizeof (EP3_TXD_RAIL_ELAN), E3_BLK_ALIGN) -+ -+typedef struct ep3_txd_rail_main -+{ -+ E3_uint32 EnveEvent; /* dest for envelope event */ -+ E3_uint32 DataEvent; /* dest for data transfer event */ -+ E3_uint32 DoneEvent; /* dest for rpc done event */ -+} EP3_TXD_RAIL_MAIN; -+ -+#define EP3_TXD_RAIL_MAIN_SIZE roundup (sizeof(EP3_TXD_RAIL_MAIN), E3_BLK_ALIGN) -+ -+#if !defined(__ELAN3__) -+ -+typedef struct ep3_txd_rail -+{ -+ EP_TXD_RAIL Generic; /* generic txd rail */ -+ -+ EP3_COOKIE EnveCookie; /* Event cookies */ -+ EP3_COOKIE DataCookie; -+ EP3_COOKIE DoneCookie; -+ -+ sdramaddr_t TxdElan; /* Elan TX descriptor */ -+ E3_Addr TxdElanAddr; /* and elan address */ -+ -+ EP3_TXD_RAIL_MAIN *TxdMain; /* Elan Main memory tx descriptor */ -+ E3_Addr TxdMainAddr; /* and elan address */ -+ -+ EP_BACKOFF Backoff; /* dma backoff */ -+} EP3_TXD_RAIL; -+ -+ -+#define EP3_NUM_TXD_PER_BLOCK 16 -+ -+typedef struct ep3_txd_rail_block -+{ -+ struct list_head Link; -+ -+ EP3_TXD_RAIL Txd[EP3_NUM_TXD_PER_BLOCK]; -+} EP3_TXD_RAIL_BLOCK; -+ -+typedef struct ep3_xmtr_rail_stats -+{ -+ unsigned long some_stat; -+} EP3_XMTR_RAIL_STATS; -+ -+typedef struct ep3_xmtr_rail -+{ -+ EP_XMTR_RAIL Generic; /* generic portion */ -+ -+ struct list_head FreeDescList; /* freelist of per-rail receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ struct list_head DescBlockList; /* list of receive descriptor blocks */ -+ -+ unsigned int FreeDescWaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t FreeDescSleep; /* and sleep here */ -+ -+ EP3_XMTR_RAIL_STATS stats; /* elan3 specific xmtr rail stats */ -+} EP3_XMTR_RAIL; -+ -+typedef struct ep3_comms_rail -+{ -+ EP_COMMS_RAIL Generic; /* generic comms rail */ -+ sdramaddr_t QueueDescs; /* input queue descriptors */ -+} EP3_COMMS_RAIL; -+ -+/* epcommxTx_elan3.c */ -+extern void ep3xmtr_flush_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_failover_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_disconnect_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+ -+/* epcommsRx_elan3.c */ -+extern void CompleteEnvelope (EP3_RAIL *rail, E3_Addr rxdMainAddr, E3_uint32 PAckVal); -+extern void StallThreadForNoDescs (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp); -+extern void StallThreadForHalted (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp); -+ -+extern void ep3rcvr_flush_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_failover_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_disconnect_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+ -+/* epcomms_elan3.c */ -+extern EP_COMMS_RAIL *ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r); -+extern void ep3comms_del_rail (EP_COMMS_RAIL *r); -+extern void ep3comms_display_rail (EP_COMMS_RAIL *r); -+ -+/* epcommsTx_elan3.c */ -+extern int ep3xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+extern void ep3xmtr_unbind_txd (EP_TXD *txd, unsigned int phase); -+extern int ep3xmtr_poll_txd (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+extern long ep3xmtr_check (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+extern void ep3xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern void ep3xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern int ep3xmtr_check_txd_state(EP_TXD *txd); -+ -+extern void ep3xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+extern void ep3xmtr_fillout_rail_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+/* epcommsRx_elan3.c */ -+extern int ep3rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep3rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep3rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+extern EP_RXD *ep3rcvr_steal_rxd (EP_RCVR_RAIL *rcvrRail); -+ -+extern long ep3rcvr_check (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+extern void ep3rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+extern void ep3rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+extern void ep3rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+extern void ep3rcvr_fillout_rail_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#endif /* !defined(__ELAN3__) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __EPCOMMS_ELAN3_H */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan3_thread.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms_elan3_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan3_thread.c 2005-06-01 23:12:54.640432872 -0400 -@@ -0,0 +1,296 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan3_thread.c,v 1.4 2004/01/20 11:03:15 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3_thread.c,v $ */ -+ -+//#include -+ -+typedef char int8_t; -+typedef unsigned char uint8_t; -+typedef short int16_t; -+typedef unsigned short uint16_t; -+typedef int int32_t; -+typedef unsigned int uint32_t; -+typedef long long int64_t; -+typedef unsigned long long uint64_t; -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+#ifndef offsetof -+#define offsetof(s, m) (unsigned long)(&(((s *)0)->m)) -+#endif -+ -+EP3_RAIL_ELAN *rail; -+EP3_RCVR_RAIL_ELAN *r; -+EP3_RCVR_RAIL_MAIN *rm; -+ -+void -+ep3comms_rcvr (EP3_RAIL_ELAN *rail, EP3_RCVR_RAIL_ELAN *rcvrElan, EP3_RCVR_RAIL_MAIN *rcvrMain, -+ EP3_InputQueue *q, unsigned int *cookies) -+{ -+ int count = 1; -+ E3_Addr nfptr = q->q_fptr + q->q_size; -+ E3_uint32 tmp; -+ int i; -+ E3_Addr buffer; -+ int len; -+ E3_DMA *dma; -+ E3_Event *event; -+ -+ /* clear the queue state to allow envelopes to arrive */ -+ q->q_state = 0; -+ -+ for (;;) -+ { -+ if (! rcvrElan->ThreadShouldHalt) -+ c_waitevent ((E3_Event *) &q->q_event, count); /* HALT POINT */ -+ -+ if (rcvrElan->ThreadShouldHalt && nfptr == q->q_bptr) -+ { -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rcvrElan)); -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_THREAD_HALTED)); /* HALT POINT */ -+ continue; -+ } -+ -+ count = 0; -+ do { -+ /* Process the message at nfptr */ -+ EP_ENVELOPE *env = (EP_ENVELOPE *) nfptr; -+ EP3_RXD_RAIL_ELAN *rxd; -+ int ack; -+ -+ EP3_SPINENTER(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); /* HALT POINT */ -+ -+ while ((rxd = (EP3_RXD_RAIL_ELAN *)rcvrElan->PendingDescs) == 0) -+ { -+ /* no receive descriptors, so trap to the kernel to wait -+ * for receive descriptor to be queued, we pass the rcvr -+ * in %g1, so that the trap handler can restart us. */ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rcvrElan)); -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_TRAP_NO_DESCS)); /* HALT POINT */ -+ EP3_SPINENTER(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); /* HALT POINT */ -+ } -+ -+ if (env->Version != EP_ENVELOPE_VERSION) -+ { -+ /* This envelope has been cancelled - so just consume it */ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ goto consume_envelope; -+ } -+ -+ dma = rxd->Dmas; -+ event = rxd->ChainEvent; -+ -+ if (EP_IS_MULTICAST(env->Attr)) -+ { -+ dma->dma_type = E3_DMA_TYPE (DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t); -+ dma->dma_source = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, Bitmap); -+ dma->dma_dest = (E3_Addr) &((EP_RXD_MAIN *) rxd->RxdMain)->Bitmap; -+ dma->dma_destEvent = (E3_Addr) event; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ -+ event->ev_Count = 1; -+ -+ dma++; event++; -+ } -+ -+ if (env->nFrags == 0) -+ { -+ /* Generate a "get" DMA to accept the envelope and fire the rx handler */ -+ dma->dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = 0; -+ dma->dma_destEvent = (E3_Addr) &rxd->DataEvent; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ len = 0; -+ } -+ else -+ { -+ /* Generate the DMA chain to fetch the data */ -+ for (i = 0, buffer = rxd->Data.nmd_addr, len = 0; i < env->nFrags; i++, dma++, event++) -+ { -+ dma->dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = env->Frags[i].nmd_len; -+ dma->dma_source = env->Frags[i].nmd_addr; -+ dma->dma_dest = buffer; -+ dma->dma_destEvent = (E3_Addr) event; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ -+ event->ev_Count = 1; -+ -+ buffer += dma->dma_size; -+ len += dma->dma_size; -+ } -+ -+ /* Point the last dma at the done event */ -+ (--dma)->dma_destEvent = (E3_Addr) &rxd->DataEvent; -+ -+ if (rxd->Data.nmd_len < len) -+ { -+ /* The receive descriptor was too small for the message */ -+ /* complete the message anyway, but don't transfer any */ -+ /* data, we set the length to EP_MSG_TOO_BIG */ -+ for (i = 0, dma = rxd->Dmas; i < env->nFrags; i++, dma++) -+ dma->dma_size = 0; -+ -+ len = EP_MSG_TOO_BIG; -+ } -+ } -+ -+ /* Store the received message length in the rxdElan for CompleteEnvelope */ -+ rxd->Data.nmd_len = len; -+ -+ /* Initialise %g1 with the "rxd" so the trap handler can -+ * complete the envelope processing if we trap while sending the -+ * packet */ -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rxd)); -+ -+ /* Generate a packet to start the data transfer */ -+ c_open (EP_VP_DATA (env->NodeId)); -+ c_sendtrans2 (TR_THREADIDENTIFY, rxd->Dmas->dma_destCookieVProc, 0, 0); -+ c_sendmem (TR_SENDACK | TR_REMOTEDMA, 0, rxd->Dmas); -+ ack = c_close(); -+ -+ /* -+ * If we trapped for an output timeout, then the trap handler will have -+ * completed processing this envelope and cleared the spinlock, so we just -+ * need to update the queue descriptor. -+ */ -+ if (ack == EP3_PAckStolen) -+ goto consume_envelope; -+ -+ if (ack != E3_PAckOk) -+ { -+ /* our packet got nacked, so trap into the kernel so that -+ * it can complete processing of this envelope. -+ */ -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_TRAP_PACKET_NACKED)); /* HALT POINT */ -+ goto consume_envelope; -+ } -+ -+ /* remove the RXD from the pending list */ -+ EP3_SPINENTER (&rcvrElan->PendingLock, &rcvrMain->PendingLock); -+ if ((rcvrElan->PendingDescs = rxd->Next) == 0) -+ rcvrMain->PendingDescsTailp = 0; -+ EP3_SPINEXIT (&rcvrElan->PendingLock, &rcvrMain->PendingLock); -+ -+ /* Copy the envelope information - as 5 64 byte chunks. -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ if (EP_HAS_PAYLOAD(env->Attr)) -+ { -+ register void *src asm ("g5") = (void *) env; -+ register void *dst asm ("g6") = (void *) &((EP_RXD_MAIN *) rxd->RxdMain)->Envelope; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "add %%g7,64,%%g7 ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ -+ "ldblock64 [%0 + 0],%%l0 ! load 64-byte block into locals/ins\n" /* copy envelope */ -+ "stblock64 %%l0,[%1 + 0] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 64],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 64] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%0 + 128],%%l0 ! load 64-byte block into locals/ins\n" /* copy payload */ -+ "stblock64 %%l0,[%1 + 128] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 192],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 192] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (src), "r" (dst) -+ : /* clobbered */ "g5", "g6", "g7" ); -+ } -+ else -+ { -+ register void *src asm ("g5") = (void *) env; -+ register void *dst asm ("g6") = (void *) &((EP_RXD_MAIN *) rxd->RxdMain)->Envelope; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "add %%g7,64,%%g7 ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ -+ "ldblock64 [%0 + 0],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 0] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 64],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 64] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (src), "r" (dst) -+ : /* clobbered */ "g5", "g6", "g7" ); -+ } -+ -+ /* Store the message length to indicate that I've finished */ -+ ((EP_RXD_MAIN *) rxd->RxdMain)->Len = rxd->Data.nmd_len; /* PCI write */ -+ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ -+ consume_envelope: -+ /* Sample the queue full bit *BEFORE* moving the fptr. -+ * Then only clear it if it was full before, otherwise, -+ * as soon as the fptr is moved on the queue could fill -+ * up, and so clearing it could mark a full queue as -+ * empty. -+ * -+ * While the full bit is set, the queue is in a 'steady -+ * state', so it is safe to set the q_state -+ * -+ */ -+ if (((tmp = q->q_state) & E3_QUEUE_FULL) == 0) -+ q->q_fptr = nfptr; /* update queue */ -+ else -+ { -+ q->q_fptr = nfptr; /* update queue */ -+ q->q_state = tmp &~E3_QUEUE_FULL; /* and clear full flag */ -+ } -+ -+ count++; /* bump message count */ -+ if (nfptr == q->q_top) /* queue wrap */ -+ nfptr = q->q_base; -+ else -+ nfptr += q->q_size; -+ -+ c_break_busywait(); /* be nice HALT POINT */ -+ -+ } while (nfptr != q->q_bptr); /* loop until Fptr == Bptr */ -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan4.c 2005-06-01 23:12:54.640432872 -0400 -@@ -0,0 +1,392 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan4.c,v 1.11.2.1 2004/10/28 11:53:28 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+static void -+ep4comms_flush_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ commsRail->r_flush_count = 0; -+ kcondvar_wakeupall (&commsRail->r_flush_sleep, &commsRail->r_flush_lock); -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+} -+ -+void -+ep4comms_flush_start (EP4_COMMS_RAIL *commsRail) -+{ -+ kmutex_lock (&commsRail->r_flush_mutex); -+} -+ -+void -+ep4comms_flush_wait (EP4_COMMS_RAIL *commsRail) -+{ -+ unsigned long flags; -+ -+ ep4_wait_event_cmd (commsRail->r_flush_mcq, -+ commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event), -+ E4_EVENT_INIT_VALUE (-32 * commsRail->r_flush_count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), -+ commsRail->r_flush_ecq->ecq_addr, -+ INTERRUPT_CMD | (commsRail->r_flush_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ while (commsRail->r_flush_count != 0) -+ kcondvar_wait (&commsRail->r_flush_sleep, &commsRail->r_flush_lock, &flags); -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+ -+ kmutex_unlock (&commsRail->r_flush_mutex); -+} -+ -+void -+ep4comms_flush_setevent (EP4_COMMS_RAIL *commsRail, ELAN4_CQ *cq) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ -+ elan4_set_event_cmd (cq, commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event)); -+ -+ commsRail->r_flush_count++; -+ -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+} -+ -+void -+ep4comms_flush_callback (void *arg, statemap_t *map) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->r_generic.Subsys; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ unsigned int rnum = rail->r_generic.Number; -+ struct list_head *el; -+ -+ /* -+ * We stall the retry thread from CB_FLUSH_FILTERING until -+ * we've finished CB_FLUSH_FLUSHING to ensure that sten -+ * packets can not be being retried while we flush them -+ * through. -+ */ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ ep4comms_flush_start (commsRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ break; -+ } -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_flush_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_flush_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep4comms_flush_wait (commsRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep_kthread_resume (&rail->r_retry_thread); -+ break; -+ } -+} -+ -+void -+ep4comms_failover_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_failover_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_failover_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep4comms_disconnect_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_disconnect_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_disconnect_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep4comms_neterr_callback (EP4_RAIL *rail, void *arg, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ /* First - stall the retry thread, so that it will no longer restart -+ * any sten packets from the retry lists */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ ep4comms_flush_start ((EP4_COMMS_RAIL *) commsRail); -+ -+ /* Second - flush through all command queues for xmtrs and rcvrs */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_neterr_flush (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum], nodeId, cookies); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_neterr_flush (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum], nodeId, cookies); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ /* Third - wait for flush to complete */ -+ ep4comms_flush_wait ((EP4_COMMS_RAIL *) commsRail); -+ -+ /* Fourth - flush through all command queues */ -+ ep4_flush_ecqs (rail); -+ -+ /* Fifth - search all the retry lists for the network error cookies */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_neterr_check (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum], nodeId, cookies); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_neterr_check (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum], nodeId, cookies); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+ -+EP_COMMS_RAIL * -+ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *)r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_COMMS_RAIL *commsRail; -+ E4_InputQueue qdesc; -+ int i; -+ -+ KMEM_ZALLOC (commsRail, EP4_COMMS_RAIL *,sizeof (EP4_COMMS_RAIL), 1); -+ -+ if (commsRail == NULL) -+ return NULL; -+ -+ commsRail->r_generic.Ops.DelRail = ep4comms_del_rail; -+ commsRail->r_generic.Ops.DisplayRail = ep4comms_display_rail; -+ commsRail->r_generic.Ops.Rcvr.AddRail = ep4rcvr_add_rail; -+ commsRail->r_generic.Ops.Rcvr.DelRail = ep4rcvr_del_rail; -+ commsRail->r_generic.Ops.Rcvr.Check = ep4rcvr_check; -+ commsRail->r_generic.Ops.Rcvr.QueueRxd = ep4rcvr_queue_rxd; -+ commsRail->r_generic.Ops.Rcvr.RpcPut = ep4rcvr_rpc_put; -+ commsRail->r_generic.Ops.Rcvr.RpcGet = ep4rcvr_rpc_get; -+ commsRail->r_generic.Ops.Rcvr.RpcComplete = ep4rcvr_rpc_complete; -+ -+ commsRail->r_generic.Ops.Rcvr.StealRxd = ep4rcvr_steal_rxd; -+ -+ commsRail->r_generic.Ops.Rcvr.DisplayRcvr = ep4rcvr_display_rcvr; -+ commsRail->r_generic.Ops.Rcvr.DisplayRxd = ep4rcvr_display_rxd; -+ -+ commsRail->r_generic.Ops.Rcvr.FillOutRailStats = ep4rcvr_fillout_rail_stats; -+ -+ commsRail->r_generic.Ops.Xmtr.AddRail = ep4xmtr_add_rail; -+ commsRail->r_generic.Ops.Xmtr.DelRail = ep4xmtr_del_rail; -+ commsRail->r_generic.Ops.Xmtr.Check = ep4xmtr_check; -+ commsRail->r_generic.Ops.Xmtr.BindTxd = ep4xmtr_bind_txd; -+ commsRail->r_generic.Ops.Xmtr.UnbindTxd = ep4xmtr_unbind_txd; -+ commsRail->r_generic.Ops.Xmtr.PollTxd = ep4xmtr_poll_txd; -+ commsRail->r_generic.Ops.Xmtr.CheckTxdState = ep4xmtr_check_txd_state; -+ -+ commsRail->r_generic.Ops.Xmtr.DisplayXmtr = ep4xmtr_display_xmtr; -+ commsRail->r_generic.Ops.Xmtr.DisplayTxd = ep4xmtr_display_txd; -+ -+ commsRail->r_generic.Ops.Xmtr.FillOutRailStats = ep4xmtr_fillout_rail_stats; -+ -+ /* Allocate command queue space for flushing (1 dword for interrupt + 4 dwords for waitevent) */ -+ if ((commsRail->r_flush_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1)) == NULL) -+ { -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ if ((commsRail->r_flush_mcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4)) == NULL) -+ { -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ /* Allocate and initialise the elan memory part */ -+ if ((commsRail->r_elan = ep_alloc_elan (r, EP4_COMMS_RAIL_ELAN_SIZE, 0, &commsRail->r_elan_addr)) == (sdramaddr_t) 0) -+ { -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ ep4_register_intcookie (rail, &commsRail->r_flush_intcookie, commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event), -+ ep4comms_flush_interrupt, commsRail); -+ -+ elan4_sdram_writeq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ -+ /* Allocate and initialise all the queue desriptors as "full" with no event */ -+ if ((commsRail->r_descs = ep_alloc_memory_elan (r, EP_EPCOMMS_QUEUE_BASE, roundup (EP_MSG_NSVC * EP_QUEUE_DESC_SIZE, SDRAM_PAGE_SIZE), EP_PERM_ALL, 0)) == (sdramaddr_t) 0) -+ { -+ ep_free_elan (r, commsRail->r_elan_addr, EP4_COMMS_RAIL_ELAN_SIZE); -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl (qdesc.q_bptr,qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ for (i = 0; i < EP_MSG_NSVC; i++) -+ elan4_sdram_copyq_to_sdram (rail->r_ctxt.ctxt_dev, &qdesc, commsRail->r_descs + (i * EP_QUEUE_DESC_SIZE), -+ sizeof (E4_InputQueue)); -+ -+ kmutex_init (&commsRail->r_flush_mutex); -+ spin_lock_init (&commsRail->r_flush_lock); -+ kcondvar_init (&commsRail->r_flush_sleep); -+ -+ ep_register_callback (r, EP_CB_FLUSH_FILTERING, ep4comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FLUSH_FLUSHING, ep4comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FAILOVER, ep4comms_failover_callback, commsRail); -+ ep_register_callback (r, EP_CB_DISCONNECTING, ep4comms_disconnect_callback, commsRail); -+ -+ commsRail->r_neterr_ops.op_func = ep4comms_neterr_callback; -+ commsRail->r_neterr_ops.op_arg = commsRail; -+ -+ ep4_add_neterr_ops (rail, &commsRail->r_neterr_ops); -+ -+ return (EP_COMMS_RAIL *) commsRail; -+} -+ -+void -+ep4comms_del_rail (EP_COMMS_RAIL *r) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) r; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ -+ ep_remove_callback (&rail->r_generic, EP_CB_FLUSH_FILTERING, ep4comms_flush_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_FLUSH_FLUSHING, ep4comms_flush_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_FAILOVER, ep4comms_failover_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_DISCONNECTING, ep4comms_disconnect_callback, commsRail); -+ -+ kcondvar_destroy (&commsRail->r_flush_sleep); -+ spin_lock_destroy (&commsRail->r_flush_lock); -+ kmutex_destroy (&commsRail->r_flush_mutex); -+ -+ ep_free_memory_elan (&rail->r_generic, EP_EPCOMMS_QUEUE_BASE); -+ ep_free_elan (&rail->r_generic, commsRail->r_elan_addr, EP4_COMMS_RAIL_ELAN_SIZE); -+ -+ ep4_deregister_intcookie (rail, &commsRail->r_flush_intcookie); -+ -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+} -+ -+void -+ep4comms_display_rail (EP_COMMS_RAIL *r) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) r; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ ep4_display_rail (rail); -+ -+ ep_debugf (DBG_DEBUG, " flush count=%d mcq=%p ecq=%p event %llx.%llx.%llx\n", -+ commsRail->r_flush_count, commsRail->r_flush_mcq, commsRail->r_flush_ecq, -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_CountAndType)), -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_WritePtr)), -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_WriteValue))); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan4.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms_elan4.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan4.h 2005-06-01 23:12:54.641432720 -0400 -@@ -0,0 +1,470 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EPCOMMS_ELAN4_H -+#define __EPCOMMS_ELAN4_H -+ -+#ident "@(#)$Id: epcomms_elan4.h,v 1.13.2.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4.h,v $ */ -+ -+ -+#include -+ -+/* -+ * Elan4 spinlocks are a pair of 64 bit words, one in elan sdram and one in main memory -+ * the sdram word holds the thread sequence number in the bottom 32 bits and the main -+ * lock in the top 32 bits. The main memory word holds the sequence number only in -+ * it's bottom 32 bits */ -+ -+typedef volatile E4_uint64 EP4_SPINLOCK_MAIN; -+typedef volatile E4_uint64 EP4_SPINLOCK_ELAN; -+ -+#define EP4_SPINLOCK_SEQ 0 -+#define EP4_SPINLOCK_MLOCK 4 -+ -+#if defined(__elan4__) -+ -+#define EP4_SPINENTER(CPORT,SLE,SLM) \ -+do { \ -+ register long tmp; \ -+\ -+ asm volatile ("ld4 [%1], %0\n" \ -+ "inc %0\n" \ -+ "st4 %0, [%1]\n" \ -+ "ld4 [%1 + 4], %0\n" \ -+ "srl8,byte %0, 4, %0\n" \ -+ : /* outputs */ "=r" (tmp) \ -+ : /* inputs */ "r" (SLE), "r" (SLM)); \ -+\ -+ if (tmp) \ -+ ep4_spinblock (CPORT,SLE, SLM); \ -+} while (0) -+ -+extern void ep4_spinblock(E4_uint64 *cport, EP4_SPINLOCK_ELAN *sle, EP4_SPINLOCK_MAIN *slm); -+ -+#define EP4_SPINEXIT(CPORT,SLE,SLM) \ -+do { \ -+ register long tmp; \ -+\ -+ asm volatile ("ld4 [%1], %0\n" \ -+ "st4 %0, [%2]\n" \ -+ : /* outputs */ "=r" (tmp) \ -+ : /* inputs */ "r" (SLE), "r" (SLM)); \ -+} while (0) -+ -+#else -+ -+#define EP4_SPINENTER(DEV,SLE,SLM) \ -+do { \ -+ uint32_t seq; \ -+\ -+ mb(); \ -+ elan4_sdram_writel (DEV, (SLE) + EP4_SPINLOCK_MLOCK, 1); \ -+ mb(); \ -+ while ((seq = elan4_sdram_readl (DEV, (SLE) + EP4_SPINLOCK_SEQ)) != *((uint32_t *) (SLM))) \ -+ { \ -+ while (*((uint32_t *) (SLM)) == (seq - 1)) \ -+ { \ -+ mb(); \ -+ DELAY(1); \ -+ } \ -+ } \ -+} while (0) -+ -+#define EP4_SPINEXIT(DEV,SLE,SLM) \ -+do { \ -+ wmb(); \ -+ elan4_sdram_writel (DEV, (SLE) + EP4_SPINLOCK_MLOCK, 0); \ -+} while (0) -+ -+#endif /* !defined(__elan4__) */ -+ -+#define EP4_STEN_RETRYCOUNT 16 -+#define EP4_DMA_RETRYCOUNT 16 -+ -+typedef struct ep4_intr_cmd -+{ -+ E4_uint64 c_write_cmd; -+ E4_uint64 c_write_value; -+ E4_uint64 c_intr_cmd; -+} EP4_INTR_CMD; -+ -+#define EP4_INTR_CMD_NDWORDS (sizeof (EP4_INTR_CMD) / 8) -+ -+typedef struct ep4_rxd_sten_cmd -+{ -+ E4_uint64 c_open; -+ -+ E4_uint64 c_trans; -+ E4_uint64 c_cookie; -+ E4_uint64 c_dma_typeSize; -+ E4_uint64 c_dma_cookie; -+ E4_uint64 c_dma_vproc; -+ E4_uint64 c_dma_srcAddr; -+ E4_uint64 c_dma_dstAddr; -+ E4_uint64 c_dma_srcEvent; -+ E4_uint64 c_dma_dstEvent; -+ -+ E4_uint64 c_ok_guard; -+ E4_uint64 c_ok_write_cmd; -+ E4_uint64 c_ok_write_value; -+ -+ E4_uint64 c_fail_guard; -+ E4_uint64 c_fail_setevent; -+ -+ E4_uint64 c_nop_cmd; -+} EP4_RXD_STEN_CMD; -+ -+#define EP4_RXD_STEN_CMD_NDWORDS (sizeof (EP4_RXD_STEN_CMD) / 8) -+ -+typedef struct ep4_rxd_dma_cmd -+{ -+ E4_uint64 c_dma_typeSize; -+ E4_uint64 c_dma_cookie; -+ E4_uint64 c_dma_vproc; -+ E4_uint64 c_dma_srcAddr; -+ E4_uint64 c_dma_dstAddr; -+ E4_uint64 c_dma_srcEvent; -+ E4_uint64 c_dma_dstEvent; -+ E4_uint64 c_nop_cmd; -+} EP4_RXD_DMA_CMD; -+ -+#define EP4_RXD_DMA_CMD_NDWORDS (sizeof (EP4_RXD_DMA_CMD) / 8) -+#define EP4_RXD_START_CMD_NDWORDS (sizeof (E4_ThreadRegs) / 8) -+ -+typedef struct ep4_rxd_rail_elan -+{ -+ EP4_RXD_STEN_CMD rxd_sten[EP_MAXFRAG+1]; -+ -+ EP4_INTR_CMD rxd_done_cmd; /* command stream issued by done event (aligned to 64 bytes) */ -+ E4_Addr rxd_next; /* linked list when on pending list (pad to 32 bytes)*/ -+ E4_Event32 rxd_failed; /* event set when sten packet fails */ -+ -+ EP4_INTR_CMD rxd_failed_cmd; /* command stream issued by fail event (aligned to 64 bytes) */ -+ E4_uint64 rxd_queued; /* rxd queuing thread has executed (pad to 32 bytes)*/ -+ -+ E4_Event32 rxd_start; /* event to set to fire off and event chain (used as chain[0]) */ -+ E4_Event32 rxd_chain[EP_MAXFRAG]; /* chained events (aligned to 32 bytes) */ -+ E4_Event32 rxd_done; /* event to fire done command stream causing interrupt (used as chain[EP_MAXFRAG]) */ -+ -+ E4_Addr rxd_rxd; /* elan address of EP4_RXD_MAIN */ -+ E4_Addr rxd_main; /* elan address of EP4_RXD_RAIL_MAIN */ -+ E4_uint64 rxd_debug; /* thread debug value */ -+ -+ EP_NMD rxd_buffer; /* Network mapping descriptor for receive data */ -+} EP4_RXD_RAIL_ELAN; -+ -+#define EP4_RXD_RAIL_ELAN_SIZE roundup(sizeof (EP4_RXD_RAIL_ELAN), 64) -+ -+typedef struct ep4_rxd_rail_main -+{ -+ E4_uint64 rxd_sent[EP_MAXFRAG+1]; /* sten packet sent */ -+ E4_uint64 rxd_failed; /* sten packet failed */ -+ E4_uint64 rxd_done; /* operation complete */ -+ -+ E4_Addr rxd_scq; /* command port for scq */ -+} EP4_RXD_RAIL_MAIN; -+ -+#define EP4_RXD_RAIL_MAIN_SIZE roundup(sizeof (EP4_RXD_RAIL_MAIN), 8) -+ -+#if !defined(__elan4__) -+typedef struct ep4_rxd_rail -+{ -+ EP_RXD_RAIL rxd_generic; -+ -+ struct list_head rxd_retry_link; -+ unsigned long rxd_retry_time; -+ -+ EP4_INTCOOKIE rxd_intcookie; -+ -+ sdramaddr_t rxd_elan; -+ EP_ADDR rxd_elan_addr; -+ -+ EP4_RXD_RAIL_MAIN *rxd_main; -+ EP_ADDR rxd_main_addr; -+ -+ EP4_ECQ *rxd_ecq; /* cq with 128 bytes targetted by event */ -+ EP4_ECQ *rxd_scq; /* cq with 8 bytes targetted by main/thread store */ -+} EP4_RXD_RAIL; -+ -+#define EP4_NUM_RXD_PER_BLOCK 16 -+ -+typedef struct ep4_rxd_rail_block -+{ -+ struct list_head blk_link; -+ EP4_RXD_RAIL blk_rxds[EP4_NUM_RXD_PER_BLOCK]; -+} EP4_RXD_RAIL_BLOCK; -+ -+#endif /* !defined(__elan4__) */ -+ -+typedef struct ep4_rcvr_rail_elan -+{ -+ E4_uint64 rcvr_thread_stall[8]; /* place for thread to stall */ -+ E4_Event32 rcvr_qevent; /* Input queue event */ -+ E4_Event32 rcvr_thread_halt; /* place for thread to halt */ -+ -+ volatile E4_Addr rcvr_pending_tailp; /* list of pending rxd's (elan addr) */ -+ volatile E4_Addr rcvr_pending_head; /* -- this pair aligned to 16 bytes */ -+ -+ EP4_SPINLOCK_ELAN rcvr_thread_lock; /* spinlock for thread processing loop */ -+ -+ E4_uint64 rcvr_stall_intcookie; /* interrupt cookie to use when requseted to halt */ -+ -+ E4_uint64 rcvr_qbase; /* base of input queue */ -+ E4_uint64 rcvr_qlast; /* last item in input queue */ -+ -+ E4_uint64 rcvr_debug; /* thread debug value */ -+} EP4_RCVR_RAIL_ELAN; -+ -+typedef struct ep4_rcvr_rail_main -+{ -+ EP4_SPINLOCK_MAIN rcvr_thread_lock; /* spinlock for thread processing loop */ -+} EP4_RCVR_RAIL_MAIN; -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_rcvr_rail_stats -+{ -+ unsigned long some_stat; -+} EP4_RCVR_RAIL_STATS; -+ -+typedef struct ep4_rcvr_rail -+{ -+ EP_RCVR_RAIL rcvr_generic; /* generic portion */ -+ -+ sdramaddr_t rcvr_elan; -+ EP_ADDR rcvr_elan_addr; -+ -+ EP4_RCVR_RAIL_MAIN *rcvr_main; -+ EP_ADDR rcvr_main_addr; -+ -+ sdramaddr_t rcvr_slots; /* input queue slots */ -+ EP_ADDR rcvr_slots_addr; /* and elan address */ -+ -+ EP_ADDR rcvr_stack; /* stack for thread */ -+ -+ EP4_ECQ *rcvr_ecq; /* command queue space for thread STEN packets */ -+ EP4_ECQ *rcvr_resched; /* command queue space to reschedule the thread */ -+ -+ struct list_head rcvr_freelist; /* freelist of per-rail receive descriptors */ -+ unsigned int rcvr_freecount; /* and number on free list */ -+ unsigned int rcvr_totalcount; /* total number created */ -+ spinlock_t rcvr_freelock; /* and lock for free list */ -+ struct list_head rcvr_blocklist; /* list of receive descriptor blocks */ -+ -+ unsigned int rcvr_freewaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t rcvr_freesleep; /* and sleep here */ -+ -+ EP4_INTCOOKIE rcvr_stall_intcookie; /* interrupt cookie for thread halt */ -+ unsigned char rcvr_thread_halted; /* thread has been halted */ -+ unsigned char rcvr_cleanup_waiting; /* waiting for cleanup */ -+ kcondvar_t rcvr_cleanup_sleep; /* and sleep here */ -+ -+ EP4_RETRY_OPS rcvr_retryops; -+ -+ struct list_head rcvr_retrylist; /* list of txd's to retry envelopes for */ -+ struct list_head rcvr_polllist; /* list of txd's to poll for completion */ -+ spinlock_t rcvr_retrylock; -+ -+ EP4_RCVR_RAIL_STATS rcvr_stats; /* elan4 specific rcvr_rail stats */ -+ -+} EP4_RCVR_RAIL; -+ -+#endif /* !defined(__elan4__) */ -+ -+typedef struct ep4_txd_rail_elan -+{ -+ EP4_INTR_CMD txd_env_cmd; /* command stream for envelope event (64 byte aligned) */ -+ E4_uint64 txd_pad0; /* pad to 32 bytes */ -+ E4_Event32 txd_env; /* event set when STEN packet fails */ -+ -+ EP4_INTR_CMD txd_done_cmd; /* command stream for done event (64 byte aligned) */ -+ E4_uint64 txd_pad1; /* pad to 32 bytes */ -+ E4_Event32 txd_done; /* event set when transmit complete */ -+ -+ E4_Event32 txd_data; /* event set when xmit completes (=> phase becomes passive) */ -+} EP4_TXD_RAIL_ELAN; -+ -+#define EP4_TXD_RAIL_ELAN_SIZE roundup(sizeof(EP4_TXD_RAIL_ELAN), 64) -+ -+typedef struct ep4_txd_rail_main -+{ -+ E4_uint64 txd_env; -+ E4_uint64 txd_data; -+ E4_uint64 txd_done; -+} EP4_TXD_RAIL_MAIN; -+ -+#define EP4_TXD_RAIL_MAIN_SIZE roundup(sizeof(EP4_TXD_RAIL_MAIN), 8) -+ -+#if !defined (__elan4__) -+typedef struct ep4_txd_rail -+{ -+ EP_TXD_RAIL txd_generic; -+ -+ struct list_head txd_retry_link; -+ unsigned long txd_retry_time; -+ -+ EP4_INTCOOKIE txd_intcookie; -+ -+ sdramaddr_t txd_elan; -+ EP_ADDR txd_elan_addr; -+ -+ EP4_TXD_RAIL_MAIN *txd_main; -+ EP_ADDR txd_main_addr; -+ -+ EP4_ECQ *txd_ecq; -+ -+ E4_uint64 txd_cookie; -+} EP4_TXD_RAIL; -+ -+#define EP4_NUM_TXD_PER_BLOCK 21 -+ -+typedef struct ep4_txd_rail_block -+{ -+ struct list_head blk_link; -+ EP4_TXD_RAIL blk_txds[EP4_NUM_TXD_PER_BLOCK]; -+} EP4_TXD_RAIL_BLOCK; -+ -+typedef struct ep4_xmtr_rail_main -+{ -+ E4_int64 xmtr_flowcnt; -+} EP4_XMTR_RAIL_MAIN; -+ -+typedef struct ep4_xmtr_rail_stats -+{ -+ unsigned long some_stat; -+} EP4_XMTR_RAIL_STATS; -+ -+#define EP4_TXD_LIST_POLL 0 -+#define EP4_TXD_LIST_STALLED 1 -+#define EP4_TXD_LIST_RETRY 2 -+#define EP4_TXD_NUM_LISTS 3 -+typedef struct ep4_xmtr_rail -+{ -+ EP_XMTR_RAIL xmtr_generic; -+ -+ EP4_XMTR_RAIL_MAIN *xmtr_main; -+ EP_ADDR xmtr_main_addr; -+ -+ struct list_head xmtr_freelist; -+ unsigned int xmtr_freecount; -+ unsigned int xmtr_totalcount; -+ spinlock_t xmtr_freelock; -+ struct list_head xmtr_blocklist; -+ unsigned int xmtr_freewaiting; -+ kcondvar_t xmtr_freesleep; -+ -+ EP4_INTCOOKIE xmtr_intcookie; /* interrupt cookie for "polled" descriptors */ -+ -+ ELAN4_CQ *xmtr_cq; -+ E4_int64 xmtr_flowcnt; -+ -+ EP4_RETRY_OPS xmtr_retryops; -+ -+ struct list_head xmtr_retrylist[EP4_TXD_NUM_LISTS]; /* list of txd's to retry envelopes for */ -+ struct list_head xmtr_polllist; /* list of txd's to poll for completion */ -+ spinlock_t xmtr_retrylock; -+ -+ EP4_XMTR_RAIL_STATS stats; /* elan4 specific xmtr rail stats */ -+} EP4_XMTR_RAIL; -+ -+#define EP4_XMTR_CQSIZE CQ_Size64K /* size of command queue for xmtr */ -+#define EP4_XMTR_FLOWCNT (CQ_Size(EP4_XMTR_CQSIZE) / 512) /* # of STEN packets which can fit in */ -+ -+typedef struct ep4_comms_rail_elan -+{ -+ E4_Event32 r_flush_event; -+} EP4_COMMS_RAIL_ELAN; -+ -+#define EP4_COMMS_RAIL_ELAN_SIZE roundup(sizeof (EP4_COMMS_RAIL_ELAN), 32) -+ -+typedef struct ep4_comms_rail -+{ -+ EP_COMMS_RAIL r_generic; /* generic comms rail */ -+ sdramaddr_t r_descs; /* input queue descriptors */ -+ -+ sdramaddr_t r_elan; /* elan portion */ -+ EP_ADDR r_elan_addr; -+ -+ kmutex_t r_flush_mutex; /* sequentialise flush usage */ -+ EP4_INTCOOKIE r_flush_intcookie; /* interrupt cookie to generate */ -+ -+ kcondvar_t r_flush_sleep; /* place to sleep waiting */ -+ spinlock_t r_flush_lock; /* and spinlock to use */ -+ -+ unsigned int r_flush_count; /* # setevents issued */ -+ EP4_ECQ *r_flush_ecq; /* command queue for interrupt */ -+ EP4_ECQ *r_flush_mcq; /* command queeu to issue waitevent */ -+ -+ EP4_NETERR_OPS r_neterr_ops; /* network error fixup ops */ -+} EP4_COMMS_RAIL; -+ -+/* epcommsTx_elan4.c */ -+extern void ep4xmtr_flush_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_failover_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_disconnect_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+ -+extern void ep4xmtr_neterr_flush (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+extern void ep4xmtr_neterr_check (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* epcommsRx_elan4.c */ -+extern void ep4rcvr_flush_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_failover_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_disconnect_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+ -+extern void ep4rcvr_neterr_flush (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+extern void ep4rcvr_neterr_check (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* epcomms_elan4.c */ -+extern void ep4comms_flush_start (EP4_COMMS_RAIL *commsRail); -+extern void ep4comms_flush_wait (EP4_COMMS_RAIL *commsRail); -+extern void ep4comms_flush_setevent (EP4_COMMS_RAIL *commsRail, ELAN4_CQ *cq); -+ -+extern EP_COMMS_RAIL *ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r); -+extern void ep4comms_del_rail (EP_COMMS_RAIL *r); -+extern void ep4comms_display_rail (EP_COMMS_RAIL *r); -+ -+/* epcommsTx_elan4.c */ -+extern int ep4xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+extern void ep4xmtr_unbind_txd (EP_TXD *txd, unsigned int phase); -+extern int ep4xmtr_poll_txd (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+extern long ep4xmtr_check (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+extern void ep4xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern void ep4xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern int ep4xmtr_check_txd_state(EP_TXD *txd); -+ -+extern void ep4xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+extern void ep4xmtr_fillout_rail_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+/* epcommsRx_elan4.c */ -+extern int ep4rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep4rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep4rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+extern EP_RXD *ep4rcvr_steal_rxd (EP_RCVR_RAIL *rcvrRail); -+ -+extern long ep4rcvr_check (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+extern void ep4rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+extern void ep4rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+extern void ep4rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+extern void ep4rcvr_fillout_rail_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#endif /* !defined(__elan4__) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __EPCOMMS_ELAN4_H */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan4_thread.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcomms_elan4_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcomms_elan4_thread.c 2005-06-01 23:12:54.642432568 -0400 -@@ -0,0 +1,346 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan4_thread.c,v 1.10.8.2 2004/09/28 10:36:51 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4_thread.c,v $*/ -+ -+//#include -+ -+typedef char int8_t; -+typedef unsigned char uint8_t; -+typedef short int16_t; -+typedef unsigned short uint16_t; -+typedef int int32_t; -+typedef unsigned int uint32_t; -+typedef long int64_t; -+typedef unsigned long uint64_t; -+ -+#include -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+/* assembler in epcomms_asm_elan4_thread.S */ -+extern void c_waitevent_interrupt (E4_uint64 *cport, E4_Event32 *event, E4_uint64 count, E4_uint64 intcookie); -+extern EP4_RXD_RAIL_ELAN *c_stall_thread (EP4_RCVR_RAIL_ELAN *rcvrRail); -+ -+#define R32_to_R47 "%r32", "%r33", "%r34", "%r35", "%r36", "%r37", "%r38", "%r39", \ -+ "%r40", "%r41", "%r42", "%r43", "%r44", "%r45", "%r46", "%r47" -+#define R48_to_R63 "%r48", "%r49", "%r50", "%r51", "%r52", "%r53", "%r54", "%r55", \ -+ "%r56", "%r57", "%r58", "%r59", "%r60", "%r61", "%r62", "%r63" -+ -+/* proto types for code in asm_elan4_thread.S */ -+extern void c_waitevent (E4_uint64 *commandport, E4_Addr event, E4_uint64 count); -+extern void c_reschedule(E4_uint64 *commandport); -+ -+static inline unsigned long -+c_load_u16(unsigned short *ptr) -+{ -+ unsigned long value; -+ -+ asm volatile ("ld2 [%1], %%r2\n" -+ "srl8,byte %%r2, %1, %0\n" -+ "sll8 %0, 48, %0\n" -+ "srl8 %0, 48, %0\n" -+ : /* outputs */ "=r" (value) -+ : /* inputs */ "r" (ptr) -+ : /* clobbered */ "%r2"); -+ return value; -+} -+ -+static inline unsigned long -+c_load_u32(unsigned int *ptr) -+{ -+ unsigned long value; -+ -+ asm volatile ("ld4 [%1], %%r2\n" -+ "srl8,byte %%r2, %1, %0\n" -+ "sll8 %0, 32, %0\n" -+ "srl8 %0, 32, %0\n" -+ : /* outputs */ "=r" (value) -+ : /* inputs */ "r" (ptr) -+ : /* clobbered */ "%r2"); -+ return value; -+} -+ -+static inline void -+c_store_u32(unsigned int *ptr, unsigned long value) -+{ -+ asm volatile ("sll8,byte %0, %1, %%r2\n" -+ "st4 %%r2, [%1]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (value), "r" (ptr) -+ : /* clobbered */ "%r2"); -+} -+ -+/* Reschedule the current Elan thread to the back of the run queue -+ * if there is another one ready to run */ -+static inline void -+c_yield (E4_uint64 *commandport) -+{ -+ unsigned long rval; -+ -+ asm volatile ("breaktest %0" : /* outputs */ "=r" (rval) : /* inputs */); -+ -+ if (rval & ICC_SIGNED_BIT) -+ c_reschedule(commandport); -+} -+ -+/* Reschedule the current thread if we're in danger of exceeding the -+ * thread instruction count */ -+static inline void -+c_insn_check(E4_uint64 *commandport) -+{ -+ unsigned long rval; -+ -+ asm volatile ("breaktest %0" : /* outputs */ "=r" (rval) : /* inputs */); -+ -+ if (rval & ICC_ZERO_BIT) -+ c_reschedule(commandport); -+} -+ -+void -+ep4_spinblock (E4_uint64 *cport, EP4_SPINLOCK_ELAN *sle, EP4_SPINLOCK_MAIN *slm) -+{ -+ do { -+ unsigned long val = *sle & 0xfffffffff; -+ -+ *slm = val; /* Release my lock */ -+ -+ while (*sle >> 32) /* Wait until the main */ -+ c_yield(cport); /* releases the lock */ -+ -+ c_store_u32 ((unsigned int *) sle, val + 1); /* and try and relock */ -+ } while (*sle >> 32); -+} -+ -+#define RESCHED_AFTER_PKTS ((CQ_Size(CQ_Size64K) / 128) - 1) -+ -+void -+ep4comms_rcvr (EP4_RAIL_ELAN *rail, EP4_RCVR_RAIL_ELAN *rcvrElan, EP4_RCVR_RAIL_MAIN *rcvrMain, -+ E4_InputQueue *inputq, E4_uint64 *cport, E4_uint64 *resched) -+{ -+ long count = 1; -+ long fptr = inputq->q_fptr; -+ -+ for (;;) -+ { -+ c_waitevent (cport, inputq->q_event, -count << 5); -+ -+ count = 0; -+ -+ while (fptr != inputq->q_bptr) -+ { -+ EP_ENVELOPE *env = (EP_ENVELOPE *) fptr; -+ unsigned long nodeid = c_load_u32 (&env->NodeId); -+ unsigned long opencmd = OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(nodeid)); -+ unsigned long vproc = EP_VP_DATA(rail->r_nodeid); -+ EP_ATTRIBUTE attr = c_load_u32 (&env->Attr); -+ unsigned long txdRail = c_load_u32 (&env->TxdRail); -+ unsigned long nFrags = c_load_u32 (&env->nFrags); -+ E4_uint64 cookie = rail->r_cookies[nodeid]; -+ unsigned long srcevent = (EP_IS_RPC(attr) ? txdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_data) : -+ txdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_done)); -+ EP4_RXD_RAIL_ELAN *rxdElan; -+ EP4_RXD_RAIL_MAIN *rxdMain; -+ EP_RXD_MAIN *rxd; -+ EP4_RXD_STEN_CMD *sten; -+ E4_Event32 *event; -+ unsigned long first; -+ unsigned long buffer; -+ unsigned long len; -+ unsigned long i; -+ -+ EP4_SPINENTER(resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ if ((rxdElan = (EP4_RXD_RAIL_ELAN *) rcvrElan->rcvr_pending_head) == 0) -+ { -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ rxdElan = c_stall_thread (rcvrElan); -+ -+ EP4_SPINENTER(resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ } -+ -+ if (c_load_u32 (&env->Version) != EP_ENVELOPE_VERSION) /* envelope has been cancelled */ -+ { -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ goto consume_envelope; -+ } -+ -+ rxd = (EP_RXD_MAIN *) rxdElan->rxd_rxd; -+ rxdMain = (EP4_RXD_RAIL_MAIN *) rxdElan->rxd_main; -+ first = (EP_MAXFRAG+1) - (( EP_IS_MULTICAST(attr) ? 1 : 0) + (nFrags == 0 ? 1 : nFrags)); -+ sten = &rxdElan->rxd_sten[first]; -+ event = &rxdElan->rxd_chain[first]; -+ -+ if (EP_IS_MULTICAST(attr)) /* need to fetch broadcast bitmap */ -+ { -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t), DMA_DataTypeWord, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcAddr = c_load_u32 (&env->TxdMain.nmd_addr) + offsetof(EP_TXD_MAIN, Bitmap); -+ sten->c_dma_dstAddr = (E4_Addr) &rxd->Bitmap; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS); -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ -+ sten++; event++; -+ } -+ -+ if (nFrags == 0) -+ { -+ /* Generate an empty "get" DMA to accept the envelope and fire the rx handler */ -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(0, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS); -+ -+ len = 0; -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ } -+ else -+ { -+ /* Generate the DMA chain to fetch the data */ -+ for (i = 0, buffer = c_load_u32 (&rxdElan->rxd_buffer.nmd_addr), len = 0; i < nFrags; i++) -+ { -+ unsigned long fragLen = c_load_u32 (&env->Frags[i].nmd_len); -+ -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(fragLen, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcAddr = c_load_u32 (&env->Frags[i].nmd_addr); -+ sten->c_dma_dstAddr = buffer; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS); -+ -+ buffer += fragLen; -+ len += fragLen; -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ -+ sten++; event++; -+ } -+ -+ (--event)->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS); -+ -+ if (c_load_u32 (&rxdElan->rxd_buffer.nmd_len) < len) -+ { -+ /* The receive descriptor was too small for the message */ -+ /* complete the message anyway, but don't transfer any */ -+ /* data, we set the length to EP_MSG_TOO_BIG */ -+ for (i = first, sten = &rxdElan->rxd_sten[first]; i <= EP_MAXFRAG; i++, sten++) -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(0, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ -+ len = EP_MSG_TOO_BIG; -+ } -+ } -+ -+ /* Stuff the first STEN packet into the command queue, there's always enough space, -+ * since we will insert a waitevent at least once for the queue size */ -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0 + 64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "st64 %%r48, [%1]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (&rxdElan->rxd_sten[first]), "r" (cport) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ /* remove the RXD from the pending list */ -+ if ((rcvrElan->rcvr_pending_head = rxdElan->rxd_next) == 0) -+ rcvrElan->rcvr_pending_tailp = (E4_Addr)&rcvrElan->rcvr_pending_head; -+ -+ /* mark as not queued */ -+ rxdElan->rxd_queued = 0; -+ -+ /* copy down the envelope */ -+ if (EP_HAS_PAYLOAD(attr)) -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0+64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "ld64 [%0+128], %%r32\n" -+ "st64 %%r48, [%1+64]\n" -+ "ld64 [%0+192], %%r48\n" -+ "st64 %%r32, [%1 + 128]\n" -+ "st64 %%r48, [%1 + 192]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (env), "r" (&rxd->Envelope) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ else -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0+64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "st64 %%r48, [%1+64]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (env), "r" (&rxd->Envelope) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ /* Store the message length to indicate that I've finished */ -+ c_store_u32 (&rxd->Len, len); -+ -+ /* Finally update the network error cookie */ -+ rail->r_cookies[nodeid] = cookie; -+ -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ consume_envelope: -+ if (fptr != rcvrElan->rcvr_qlast) -+ fptr += EP_INPUTQ_SIZE; -+ else -+ fptr = rcvrElan->rcvr_qbase; -+ -+ if (! rcvrElan->rcvr_stall_intcookie) -+ inputq->q_fptr = fptr; -+ -+ if (++count >= RESCHED_AFTER_PKTS) -+ break; -+ -+ c_insn_check (cport); -+ } -+ -+ if (rcvrElan->rcvr_stall_intcookie) -+ { -+ c_waitevent_interrupt (cport, &rcvrElan->rcvr_thread_halt, -(1 << 5), rcvrElan->rcvr_stall_intcookie); -+ inputq->q_fptr = fptr; -+ -+ count++; /* one extra as we were given an extra set to wake us up */ -+ } -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcommsFwd.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcommsFwd.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcommsFwd.c 2005-06-01 23:12:54.643432416 -0400 -@@ -0,0 +1,310 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsFwd.c,v 1.12 2004/08/16 12:21:15 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsFwd.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+unsigned int epcomms_forward_limit = 8; -+ -+static void -+GenerateTree (unsigned nodeId, unsigned lowId, unsigned highId, bitmap_t *bitmap, -+ unsigned *parentp, unsigned *childrenp, int *nchildrenp) -+{ -+ int i; -+ int count; -+ int branch; -+ int nSub; -+ int branchIndex; -+ int parent; -+ int nBranch; -+ int rem; -+ int self; -+ int branchRatio; -+ int node; -+ int x, y, z; -+ -+ -+#ifdef DEBUG_PRINTF -+ { -+#define OVERFLOW "...]" -+#define LINESZ 128 -+ char space[LINESZ+1]; -+ -+ if (ep_sprintf_bitmap (space, LINESZ-strlen(OVERFLOW), bitmap, 0, 0, (highId - lowId)+1) != -1) -+ strcat (space, OVERFLOW); -+ -+ EPRINTF3 (DBG_FORWARD, "GenerateTree; elan node low=%d node high=%d bitmap=%s\n", lowId, highId, space); -+#undef OVERFLOW -+#undef LINESZ -+ } -+#endif -+ -+ /* Count the number of nodes in the partition */ -+ /* and work out which one I am */ -+ for (count = 0, self = ELAN_INVALID_NODE, i = lowId; i <= highId; i++) -+ { -+ if (BT_TEST (bitmap, i-lowId)) -+ { -+ if (i == nodeId) -+ self = count; -+ count++; -+ } -+ } -+ -+ EPRINTF2 (DBG_FORWARD, "GenerateTree: count=%d self=%d\n", count, self); -+ -+ if (count == 0 || self == ELAN_INVALID_NODE) -+ { -+ *parentp = ELAN_INVALID_NODE; -+ *nchildrenp = 0; -+ return; -+ } -+ -+ /* search for position in tree */ -+ branchRatio = EP_TREE_ARITY; /* branching ratio */ -+ branch = 0; /* start with process 0 */ -+ nSub = count; /* and whole tree */ -+ branchIndex = -1; /* my branch # in parent */ -+ parent = -1; /* my parent's group index # */ -+ -+ while (branch != self) /* descend process tree */ -+ { /* until I find myself */ -+ parent = branch; -+ branch++; /* parent + 1 = first born */ -+ nSub--; /* set # descendents */ -+ -+ rem = nSub % branchRatio; -+ nSub = nSub / branchRatio + 1; -+ x = rem * nSub; -+ y = self - branch; -+ -+ if (y < x) /* my first 'rem' branches have */ -+ { /* 1 more descendent... */ -+ branchIndex = y / nSub; -+ branch += branchIndex * nSub; -+ } -+ else /* than the rest of my branches */ -+ { -+ nSub--; -+ z = (y - x) / nSub; -+ branchIndex = rem + z; -+ branch += x + z * nSub; -+ } -+ } -+ -+ branch++; /* my first born */ -+ nSub--; /* total # of my descendents */ -+ /* leaves + their parents may have # children < branchRatio */ -+ nBranch = (nSub < branchRatio) ? nSub : branchRatio; -+ -+ EPRINTF2 (DBG_FORWARD, "GenerateTree: parent=%d nBranch=%d\n", parent, nBranch); -+ -+ /* Now calculate the real elan id's of the parent and my children */ -+ if (parent == -1) -+ *parentp = ELAN_INVALID_NODE; -+ else -+ { -+ for (i = lowId, node = 0; i <= highId; i++) -+ { -+ if (BT_TEST(bitmap, i-lowId)) -+ if (node++ == parent) -+ break; -+ } -+ *parentp = i; -+ } -+ -+ for (i = lowId, branchIndex = 0, node = 0; branchIndex < nBranch && i <= highId; i++) -+ { -+ if (BT_TEST(bitmap, i-lowId)) -+ { -+ if (node == branch) -+ { -+ branch = branch + nSub / branchRatio + ((branchIndex < (nSub % branchRatio)) ? 1 : 0); -+ -+ childrenp[branchIndex++] = i; -+ } -+ node++; -+ } -+ } -+ -+ *nchildrenp = branchIndex; -+} -+ -+static void -+ForwardTxDone (EP_TXD *txd, void *arg, EP_STATUS status) -+{ -+ EP_FWD_DESC *desc = (EP_FWD_DESC *) arg; -+ EP_RXD *rxd = desc->Rxd; -+ EP_COMMS_SUBSYS *subsys = rxd->Rcvr->Subsys; -+ unsigned long flags; -+ -+ /* XXXX: if transmit fails, could step to next node in this subtree ? */ -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ -+ if (--desc->NumChildren > 0) -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ else -+ { -+ rxd->Rcvr->ForwardRxdCount--; -+ -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_FREE (desc, sizeof (EP_FWD_DESC)); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+long -+ep_forward_rxds (EP_COMMS_SUBSYS *subsys, long nextRunTime) -+{ -+ unsigned long flags; -+ int i, res; -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ while (! list_empty (&subsys->ForwardDescList)) -+ { -+ EP_RXD *rxd = (EP_RXD *) list_entry (subsys->ForwardDescList.next, EP_RXD, Link); -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_FWD_DESC *desc; -+ -+ EPRINTF2 (DBG_FORWARD, "ep: forwarding rxd %p to range %x\n", rxd, env->Range); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Rcvr->ForwardRxdCount++; -+ -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_ALLOC (desc, EP_FWD_DESC *, sizeof (EP_FWD_DESC), 1); -+ -+ if (desc == NULL) -+ { -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ rxd->Rcvr->ForwardRxdCount--; -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ /* compute the spanning tree for this message */ -+ unsigned int destLo = EP_RANGE_LOW (env->Range); -+ unsigned int destHi = EP_RANGE_HIGH (env->Range); -+ unsigned int parent; -+ -+ GenerateTree (subsys->Subsys.Sys->Position.pos_nodeid, destLo, destHi, rxdMain->Bitmap, &parent, desc->Children, &desc->NumChildren); -+ -+ if (desc->NumChildren == 0 || (epcomms_forward_limit && (rxd->Rcvr->ForwardRxdCount >= epcomms_forward_limit))) -+ { -+ EPRINTF5 (DBG_FORWARD, "ep; don't forward rxd %p to /%d (%d children/ %d forwarding (%d))\n", -+ rxd, rxd->Rcvr->Service, desc->NumChildren, rxd->Rcvr->ForwardRxdCount, epcomms_forward_limit); -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ rxd->Rcvr->ForwardRxdCount--; -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_FREE (desc, sizeof (EP_FWD_DESC)); -+ -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ ep_nmd_subset (&desc->Data, &rxd->Data, 0, ep_rxd_len (rxd)); -+ desc->Rxd = rxd; -+ -+ /* NOTE - cannot access 'desc' after last call to multicast, since it could complete -+ * and free the desc before we access it again. Hence the reverse loop. */ -+ for (i = desc->NumChildren-1; i >= 0; i--) -+ { -+ ASSERT (desc->Children[i] < subsys->Subsys.Sys->Position.pos_nodes); -+ -+ EPRINTF3 (DBG_FORWARD, "ep: forwarding rxd %p to node %d/%d\n", rxd, desc->Children[i], rxd->Rcvr->Service); -+ -+ if ((res = ep_multicast_forward (subsys->ForwardXmtr, desc->Children[i], rxd->Rcvr->Service, 0, -+ ForwardTxDone, desc, env, EP_HAS_PAYLOAD(env->Attr) ? &rxdMain->Payload : NULL, -+ rxdMain->Bitmap, &desc->Data, 1)) != EP_SUCCESS) -+ { -+ ep_debugf (DBG_FORWARD, "ep: ep_multicast_forward failed\n"); -+ ForwardTxDone (NULL, desc, res); -+ } -+ } -+ -+ } -+ } -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ } -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ return (nextRunTime); -+} -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+void -+ep_csum_rxds (EP_COMMS_SUBSYS *subsys) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&subsys->CheckSumDescLock, flags); -+ while (! list_empty (&subsys->CheckSumDescList)) -+ { -+ EP_RXD *rxd = (EP_RXD *) list_entry (subsys->CheckSumDescList.next, EP_RXD, CheckSumLink); -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ list_del_init (&rxd->CheckSumLink); -+ spin_unlock_irqrestore (&subsys->CheckSumDescLock, flags); -+ -+ if (env->CheckSum) { -+ EP_NMD nmd; -+ uint32_t csum; -+ -+ ep_nmd_subset ( &nmd, &rxd->Data, 0, ep_rxd_len (rxd)); -+ -+ csum = ep_calc_check_sum(subsys->Subsys.Sys, env, &nmd, 1); -+ if ( env->CheckSum != csum ) { -+ int f; -+ -+ -+ printk("Check Sum Error: env(0x%x,0x%x) data(0x%x,0x%x)\n", ((csum >> 16) & 0x7FFF), ((env->CheckSum >> 16) & 0x7FFF), -+ (csum & 0xFFFF), (env->CheckSum & 0xFFFF)); -+ printk("Check Sum Error: Sent : NodeId %u Range 0x%x Service %u Version 0x%x Attr 0x%x\n", env->NodeId, env->Range, rxd->Rcvr->Service, env->Version, env->Attr); -+ printk("Check Sum Error: Sent : Xid Generation 0x%x Handle 0x%x Unique 0x%llx\n", env->Xid.Generation, env->Xid.Handle, env->Xid.Unique); -+ printk("Check Sum Error: Sent : TxdRail 0x%x TxdMain nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", env->TxdRail, env->TxdMain.nmd_addr, env->TxdMain.nmd_len, env->TxdMain.nmd_attr ); -+ printk("Check Sum Error: Sent : nFrags %d \n", env->nFrags); -+ for(f=0;fnFrags;f++) -+ printk("Check Sum Error: Sent (%d): nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", f, -+ env->Frags[f].nmd_addr, env->Frags[f].nmd_len, env->Frags[f].nmd_attr); -+ printk("Check Sum Error: Recv : nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", -+ nmd.nmd_addr, nmd.nmd_len, nmd.nmd_attr); -+ -+ } -+ } -+ ep_rxd_received_now(rxd); -+ -+ spin_lock_irqsave (&subsys->CheckSumDescLock, flags); -+ } -+ spin_unlock_irqrestore (&subsys->CheckSumDescLock, flags); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcommsRx.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcommsRx.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcommsRx.c 2005-06-01 23:12:54.645432112 -0400 -@@ -0,0 +1,1205 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx.c,v 1.27.2.5 2004/11/30 12:02:16 mike Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx.c,v $*/ -+ -+#include -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+unsigned int ep_rxd_lowat = 5; -+ -+static int -+AllocateRxdBlock (EP_RCVR *rcvr, EP_ATTRIBUTE attr, EP_RXD **rxdp) -+{ -+ EP_RXD_BLOCK *blk; -+ EP_RXD *rxd; -+ EP_RXD_MAIN *pRxdMain; -+ int i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP_RXD_BLOCK *, sizeof (EP_RXD_BLOCK), ! (attr & EP_NO_SLEEP)); -+ -+ if (blk == NULL) -+ return (ENOMEM); -+ -+ if ((pRxdMain = ep_shared_alloc_main (rcvr->Subsys->Subsys.Sys, EP_RXD_MAIN_SIZE * EP_NUM_RXD_PER_BLOCK, attr, &blk->NmdMain)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP_RXD_BLOCK)); -+ return (ENOMEM); -+ } -+ -+ for (rxd = &blk->Rxd[0], i = 0; i < EP_NUM_RXD_PER_BLOCK; i++, rxd++) -+ { -+ rxd->Rcvr = rcvr; -+ rxd->RxdMain = pRxdMain; -+ -+ ep_nmd_subset (&rxd->NmdMain, &blk->NmdMain, (i * EP_RXD_MAIN_SIZE), EP_RXD_MAIN_SIZE); -+ -+ /* move onto next descriptor */ -+ pRxdMain = (EP_RXD_MAIN *) ((unsigned long) pRxdMain + EP_RXD_MAIN_SIZE); -+ } -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &rcvr->DescBlockList); -+ -+ rcvr->TotalDescCount += EP_NUM_RXD_PER_BLOCK; -+ -+ for (i = rxdp ? 1 : 0; i < EP_NUM_RXD_PER_BLOCK; i++) -+ { -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&blk->Rxd[i].CheckSumLink); -+#endif -+ -+ list_add (&blk->Rxd[i].Link, &rcvr->FreeDescList); -+ -+ rcvr->FreeDescCount++; -+ -+ if (rcvr->FreeDescWanted) -+ { -+ rcvr->FreeDescWanted--; -+ kcondvar_wakeupone (&rcvr->FreeDescSleep, &rcvr->FreeDescLock); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (rxdp) -+ { -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&blk->Rxd[0].CheckSumLink); -+#endif -+ -+ *rxdp = &blk->Rxd[0]; -+ } -+ return (ESUCCESS); -+} -+ -+static void -+FreeRxdBlock (EP_RCVR *rcvr, EP_RXD_BLOCK *blk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ rcvr->TotalDescCount -= EP_NUM_RXD_PER_BLOCK; -+ rcvr->FreeDescCount -= EP_NUM_RXD_PER_BLOCK; -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ ep_shared_free_main (rcvr->Subsys->Subsys.Sys, &blk->NmdMain); -+ KMEM_FREE (blk, sizeof (EP_RXD_BLOCK)); -+} -+ -+static EP_RXD * -+GetRxd (EP_RCVR *rcvr, EP_ATTRIBUTE attr) -+{ -+ EP_RXD *rxd; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ while (list_empty (&rcvr->FreeDescList)) -+ { -+ if (! (attr & EP_NO_ALLOC)) -+ { -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (AllocateRxdBlock (rcvr, attr, &rxd) == ESUCCESS) -+ return (rxd); -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ } -+ -+ if (attr & EP_NO_SLEEP) -+ { -+ IncrStat (rcvr->Subsys, NoFreeRxds); -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ return (NULL); -+ } -+ -+ rcvr->FreeDescWanted++; -+ kcondvar_wait (&rcvr->FreeDescSleep, &rcvr->FreeDescLock, &flags); -+ } -+ -+ rxd = list_entry (rcvr->FreeDescList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (--rcvr->FreeDescCount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ -+ return (rxd); -+} -+ -+static void -+FreeRxd (EP_RCVR *rcvr, EP_RXD *rxd) -+{ -+ unsigned long flags; -+ -+ ASSERT (EP_XID_INVALID(rxd->MsgXid)); -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ASSERT(list_empty(&rxd->CheckSumLink)); -+#endif -+ -+ list_add (&rxd->Link, &rcvr->FreeDescList); -+ -+ rcvr->FreeDescCount++; -+ -+ if (rcvr->FreeDescWanted) /* someone waiting for a receive */ -+ { /* descriptor, so wake them up */ -+ rcvr->FreeDescWanted--; -+ kcondvar_wakeupone (&rcvr->FreeDescSleep, &rcvr->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+} -+ -+int -+ep_queue_receive (EP_RCVR *rcvr, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr) -+{ -+ EP_RCVR_RAIL *rcvrRail; -+ EP_RXD *rxd; -+ int rnum; -+ unsigned long flags; -+ -+ if ((rxd = GetRxd (rcvr, attr)) == NULL) -+ return (ENOMEM); -+ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ rxd->Data = *nmd; -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ list_add_tail (&rxd->Link, &rcvr->ActiveDescList); -+ -+ if (EP_IS_PREFRAIL_SET(attr)) -+ rnum = EP_ATTR2PREFRAIL(attr); -+ else -+ rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(nmd)); -+ -+ if (rnum < 0 || !(EP_NMD_RAILMASK(nmd) & EP_RAIL2RAILMASK(rnum) & rcvr->RailMask)) -+ rcvrRail = NULL; -+ else -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ EPRINTF7 (DBG_RCVR,"ep_queue_receive: rxd=%p svc %d nmd=%08x,%d,%x rnum=%d rcvrRail=%p\n", -+ rxd, rcvr->Service, nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, rnum, rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if (rcvrRail == NULL || !EP_RCVR_OP (rcvrRail, QueueRxd) (rxd, rcvrRail)) -+ { -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_requeue_receive (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ int rnum = ep_pickRail(EP_NMD_RAILMASK(&rxd->Data)); -+ EP_RCVR_RAIL *rcvrRail; -+ unsigned long flags; -+ -+ ASSERT (rxd->RxdRail == NULL); -+ -+ EPRINTF5 (DBG_RCVR,"ep_requeue_receive: rxd=%p svc %d nmd=%08x,%d,%x\n", -+ rxd, rcvr->Service, nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ rxd->Data = *nmd; -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ list_add_tail (&rxd->Link, &rcvr->ActiveDescList); -+ -+ /* -+ * Rail selection: if they've asked for a particular rail, then use it, otherwise if -+ * the rail it was last received on is mapped for the nmd and is available -+ * then use that one, otherwise pick one that is mapped by the nmd. -+ */ -+ if (EP_IS_PREFRAIL_SET(attr)) -+ rnum = EP_ATTR2PREFRAIL(attr); -+ -+ if (rnum < 0 || ! (EP_RAIL2RAILMASK (rnum) & EP_NMD_RAILMASK(nmd) & ep_rcvr_availrails (rcvr))) -+ rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(nmd)); -+ -+ if (rnum < 0) -+ rcvrRail = NULL; -+ else -+ { -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ if (! (EP_NMD_RAILMASK(&rxd->Data) & EP_RAIL2RAILMASK(rnum)) && ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rnum)) < 0) -+ rcvrRail = NULL; -+ } -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if (rcvrRail == NULL || !EP_RCVR_OP(rcvrRail, QueueRxd) (rxd, rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR, "ep_requeue_receive: rcvrRail=%p - setting unbound\n", rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ if (rcvr->CleanupWaiting) -+ kcondvar_wakeupall (&rcvr->CleanupSleep, &rcvr->Lock); -+ rcvr->CleanupWaiting = 0; -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ -+ep_complete_receive (EP_RXD *rxd) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ ASSERT (rxd->RxdRail == NULL && rxd->State == EP_RXD_COMPLETED); -+ -+ FreeRxd (rcvr, rxd); -+ -+ /* if we're waiting for cleanup, then wake them up */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ if (rcvr->CleanupWaiting) -+ kcondvar_wakeupall (&rcvr->CleanupSleep, &rcvr->Lock); -+ rcvr->CleanupWaiting = 0; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+int -+ep_rpc_put (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *local, EP_NMD *remote, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_rpc_put: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ /* rxd no longer on active list - just free it */ -+ /* off and return an error */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcPut) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * rail or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_rpc_put: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_PUT_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_SUCCESS; -+} -+ -+int -+ep_rpc_get (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *remote, EP_NMD *local, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_rpc_get: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcGet) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * node or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_rpc_get: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_GET_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_SUCCESS; -+} -+ -+int -+ep_complete_rpc (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_STATUSBLK *blk, EP_NMD *local, EP_NMD *remote, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_complete_rpc: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ if (blk == NULL) -+ bzero (&rxd->RxdMain->StatusBlk, sizeof (EP_STATUSBLK)); -+ else -+ bcopy (blk, &rxd->RxdMain->StatusBlk, sizeof (EP_STATUSBLK)); -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcComplete) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * node or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_complete_rpc: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_COMPLETE_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* functions for accessing fields of rxds */ -+void *ep_rxd_arg(EP_RXD *rxd) { return (rxd->Arg); } -+int ep_rxd_len(EP_RXD *rxd) { return (rxd->RxdMain->Len); } -+EP_STATUS ep_rxd_status(EP_RXD *rxd) { return (rxd->RxdMain->Len < 0 ? rxd->RxdMain->Len : EP_SUCCESS); } -+int ep_rxd_isrpc(EP_RXD *rxd) { return (EP_IS_RPC(rxd->RxdMain->Envelope.Attr) != 0); } -+EP_ENVELOPE *ep_rxd_envelope(EP_RXD *rxd) { return (&rxd->RxdMain->Envelope); } -+EP_PAYLOAD *ep_rxd_payload(EP_RXD *rxd) { return (EP_HAS_PAYLOAD(rxd->RxdMain->Envelope.Attr) ? &rxd->RxdMain->Payload : NULL); } -+int ep_rxd_node(EP_RXD *rxd) { return (rxd->RxdMain->Envelope.NodeId); } -+EP_STATUSBLK *ep_rxd_statusblk(EP_RXD *rxd) { return (&rxd->RxdMain->StatusBlk); } -+EP_RAILMASK ep_rxd_railmask(EP_RXD *rxd) { return (rxd->Data.nmd_attr); } -+ -+static void -+ProcessNmdMapResponse (EP_RCVR *rcvr, EP_RXD *rxd, EP_MANAGER_MSG *msg) -+{ -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[rxd->RxdMain->Envelope.NodeId]; -+ int i; -+ -+ ASSERT (msg->Body.MapNmd.nFrags == rxd->nFrags); -+ -+ for (i = 0; i < rxd->nFrags; i++) -+ rxd->Remote[i] = msg->Body.MapNmd.Nmd[i]; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* node is still connected on this rail */ -+ (ep_nmd2railmask (rxd->Local, rxd->nFrags) & ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* NMDs are now valid for this rail */ -+ { -+ switch (rxd->State) -+ { -+ case EP_RXD_PUT_STALLED: -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcPut) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ case EP_RXD_GET_STALLED: -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcGet) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ case EP_RXD_COMPLETE_STALLED: -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcComplete) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ default: -+ panic ("ProcessNmdMapResponse: XID match but rxd in invalid state\n"); -+ break; -+ } -+ -+ rxd->NextRunTime = 0; -+ } -+ else -+ ep_debugf (DBG_MANAGER, "%s: ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p - still cannot proceed\n", rail->Name, rcvr, rxd); -+} -+ -+static void -+ProcessFailoverResponse (EP_RCVR *rcvr, EP_RXD *rxd, EP_MANAGER_MSG *msg) -+{ -+ /* XXXX - TBD */ -+#ifdef NOTYET -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ EP_RCVR_RAIL *nRcvrRail; -+ EP_RXD_RAIL *nRxdRail; -+ -+ ASSERT (rxd->RxdMain->Envelope.Attr & EP_RPC); -+ -+ EPRINTF6 (DBG_RCVR, "ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p Xid=%016llx state %x.%x - txd on rail %d\n", rcvr, rxd, -+ rxd->MsgXid.Unique, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, msg->Body.FailoverTxd.Rail); -+ -+ if ((nRcvrRail = rcvr->Rails[msg->Body.FailoverTxd.Rail]) == NULL || -+ (nRcvrRail->Rcvr->RailMask & EP_RAIL2RAILMASK (rail->Number)) == NULL) -+ { -+ ep_debugf (DBG_MANAGER, "%s: ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p - still cannot proceed\n", rail->Name, rcvr,rxd); -+ return; -+ } -+ -+ -+ nRxdRail = EP_RCVR_OP (nrcvrRail, GetRxd) (rcvr, nRcvrRail); -+ -+ -+ /* If the RPC was in progress, then rollback and mark it as flagged, -+ * this will then get treated as though the NMDs were not mapped -+ * for the rail when the user initiated the operation. -+ */ -+ switch (rxdRail->RxdMain->DataEvent) -+ { -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_PUT: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_PUT: -+ ASSERT (rxdRail->RxdMain->DoneEvent == EP_EVENT_PRIVATE || -+ rxdRail->RxdMain->DoneEvent == EP_EVENT_PENDING); -+ -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_PUT; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_PENDING; -+ break; -+ -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_GET: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_GET: -+ ASSERT (rxdRail->RxdMain->DoneEvent == EP_EVENT_PRIVATE || -+ rxdRail->RxdMain->DoneEvent == EP_EVENT_PENDING); -+ -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_GET; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_PENDING; -+ break; -+ -+ case EP_EVENT_PRIVATE: -+ switch (rxdRail->RxdMain->DoneEvent) -+ { -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_COMPLETE: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_COMPLETE: -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_PRIVATE; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_COMPLETE; -+ break; -+ -+ case EP_EVENT_PENDING: -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: rxd in invalid state\n"); -+ } -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: rxd in invalid staten"); -+ } -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* Mark rxdRail as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP_EVENT_PRIVATE; -+ -+ sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ BindRxdToRail (rxd, nRxdRail); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+#endif -+} -+ -+void -+ep_rcvr_xid_msg_handler (void *arg, EP_MANAGER_MSG *msg) -+{ -+ EP_RCVR *rcvr = (EP_RCVR *) arg; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el,EP_RXD, Link); -+ -+ if (EP_XIDS_MATCH (msg->Hdr.Xid, rxd->MsgXid)) -+ { -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE: -+ ProcessNmdMapResponse (rcvr, rxd, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE: -+ ProcessFailoverResponse (rcvr, rxd, msg); -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: XID match but invalid message type\n"); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+ -+EP_RCVR * -+ep_alloc_rcvr (EP_SYS *sys, EP_SERVICE svc, unsigned int nenvs) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_RCVR *rcvr; -+ struct list_head *el; -+ extern int portals_envelopes; -+ -+ if (portals_envelopes && (svc == EP_MSG_SVC_PORTALS_SMALL || svc == EP_MSG_SVC_PORTALS_LARGE)) -+ { -+ printk ("ep: use %d envelopes rather than %d for portals %s message service\n", sys->Position.pos_nodes * 16, nenvs, -+ svc == EP_MSG_SVC_PORTALS_SMALL ? "small" : "large"); -+ -+ nenvs = portals_envelopes; -+ } -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME)) == NULL) -+ return (NULL); -+ -+ KMEM_ZALLOC (rcvr, EP_RCVR *, sizeof (EP_RCVR), 1); -+ -+ if (rcvr == NULL) -+ return (NULL); -+ -+ rcvr->Subsys = subsys; -+ rcvr->Service = svc; -+ rcvr->InputQueueEntries = nenvs; -+ rcvr->FreeDescCount = 0; -+ rcvr->TotalDescCount = 0; -+ rcvr->ForwardRxdCount = 0; -+ -+ spin_lock_init (&rcvr->Lock); -+ INIT_LIST_HEAD (&rcvr->ActiveDescList); -+ -+ kcondvar_init (&rcvr->CleanupSleep); -+ kcondvar_init (&rcvr->FreeDescSleep); -+ spin_lock_init (&rcvr->FreeDescLock); -+ INIT_LIST_HEAD (&rcvr->FreeDescList); -+ INIT_LIST_HEAD (&rcvr->DescBlockList); -+ -+ ep_xid_cache_init (sys, &rcvr->XidCache); -+ -+ rcvr->XidCache.MessageHandler = ep_rcvr_xid_msg_handler; -+ rcvr->XidCache.Arg = rcvr; -+ -+ kmutex_lock (&subsys->Lock); -+ /* See if this service is already in use */ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Service == svc) -+ { -+ KMEM_FREE (rcvr, sizeof (EP_RCVR)); -+ kmutex_unlock (&subsys->Lock); -+ return NULL; -+ } -+ } -+ -+ -+ list_add_tail (&rcvr->Link, &subsys->Receivers); -+ -+ ep_procfs_rcvr_add(rcvr); -+ -+ /* Now add all rails which are already started */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.AddRail) (rcvr, commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_mod_inc_usecount(); -+ -+ return (rcvr); -+} -+ -+void -+ep_free_rcvr (EP_RCVR *rcvr) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head list; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.DelRail) (rcvr, commsRail); -+ } -+ -+ ep_procfs_rcvr_del(rcvr); -+ -+ list_del (&rcvr->Link); -+ kmutex_unlock (&subsys->Lock); -+ -+ INIT_LIST_HEAD (&list); -+ -+ /* abort all rxds - should not be bound to a rail */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (! list_empty (&rcvr->ActiveDescList)) -+ { -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ -+ ASSERT (rxd->RxdRail == NULL); -+ ASSERT (rxd->RxdMain->Len == EP_RXD_PENDING); -+ -+ rxd->State = EP_RXD_COMPLETED; -+ rxd->RxdMain->Len = EP_SHUTDOWN; -+ -+ list_del (&rxd->Link); -+ list_add_tail (&rxd->Link, &list); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ EP_RXD *rxd = list_entry (list.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ if (rxd->Handler) -+ rxd->Handler (rxd); -+ } -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ continue; -+ } -+ -+ if (rcvr->FreeDescCount == rcvr->TotalDescCount) -+ break; -+ -+ rcvr->CleanupWaiting++; -+ kcondvar_wait (&rcvr->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* must all be in free list */ -+ ASSERT( rcvr->FreeDescCount == rcvr->TotalDescCount); -+ -+ while (! list_empty(& rcvr->DescBlockList) ) -+ FreeRxdBlock (rcvr, list_entry (rcvr->DescBlockList.next, EP_RXD_BLOCK, Link)); -+ -+ /* had better be all gone now */ -+ ASSERT((rcvr->FreeDescCount == 0) && (rcvr->TotalDescCount == 0)); -+ -+ ep_xid_cache_destroy (sys, &rcvr->XidCache); -+ -+ spin_lock_destroy (&rcvr->Lock); -+ KMEM_FREE (rcvr, sizeof (EP_RCVR)); -+ -+ ep_mod_dec_usecount(); -+} -+ -+EP_RXD * -+StealRxdFromOtherRail (EP_RCVR *rcvr) -+{ -+ EP_RXD *rxd; -+ int i; -+ -+ /* looking at the the rcvr railmask to find a rail to try to steal rxd from */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->RailMask & (1 << i) ) -+ if ((rxd = EP_RCVR_OP (rcvr->Rails[i], StealRxd) (rcvr->Rails[i])) != NULL) -+ return rxd; -+ -+ return NULL; -+} -+ -+long -+CheckUnboundRxd (EP_RCVR *rcvr, EP_RXD *rxd, long nextRunTime) -+{ -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_RCVR_RAIL *rcvrRail; -+ int rnum; -+ -+ if ((rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(&rxd->Data))) < 0) -+ rnum = ep_rcvr_prefrail (rcvr, ep_rcvr_availrails (rcvr)); -+ -+ if ( rnum < 0 ) { -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ -+ return (nextRunTime); -+ } -+ -+ ASSERT ( rnum >= 0 ); -+ -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ ASSERT ( rcvrRail != NULL); -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if ((!(EP_NMD_RAILMASK (&rxd->Data) & EP_RAIL2RAILMASK(rnum)) && /* not mapped already and */ -+ ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rnum)) == 0) || /* failed mapping, or */ -+ !EP_RCVR_OP (rcvrRail, QueueRxd) (rxd, rcvrRail)) /* failed to queue */ -+ { -+ ASSERT (rxd->RxdRail == NULL); -+ -+ EPRINTF4 (DBG_RCVR,"CheckUnboundRxd: rcvr=%p rxd=%p -> rnum=%d rcvrRail=%p (failed)\n", rcvr, rxd, rnum, rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return (nextRunTime); -+} -+ -+int -+CheckRxdNmdsMapped (EP_RCVR *rcvr, EP_RXD *rxd) -+{ -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_RAIL *rail = rxdRail->RcvrRail->CommsRail->Rail; -+ int i; -+ -+ /* Try and map the local NMDs before checking to see if we can proceed */ -+ if (! (ep_nmd2railmask (rxd->Local, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ EPRINTF3 (DBG_MAPNMD, "%s: rcvr=%p rxd=%p RPC Local NMDs not mapped\n", rail->Name, rcvr, rxd); -+ -+ for (i = 0; i < rxd->nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&rxd->Local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ if (ep_nmd_map_rails (sys, &rxd->Local[i], EP_RAIL2RAILMASK(rail->Number))) -+ rxd->NextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ /* Try and map remote NMDs if they are not valid for this rail */ -+ if (! (ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ EP_MANAGER_MSG_BODY msgBody; -+ -+ EPRINTF3 (DBG_MAPNMD, "%s: rcvr=%p rxd=%p RPC Remote NMDs not mapped\n", rail->Name, rcvr, rxd); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ msgBody.MapNmd.nFrags = rxd->nFrags; -+ msgBody.MapNmd.Railmask = EP_RAIL2RAILMASK (rail->Number); -+ for (i = 0; i < rxd->nFrags; i++) -+ msgBody.MapNmd.Nmd[i] = rxd->Remote[i]; -+ -+ if (ep_send_message (rail, env->NodeId, EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST, rxd->MsgXid, &msgBody) == 0) -+ rxd->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ rxd->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ -+ return 0; -+ } -+ -+ if ((ep_nmd2railmask (rxd->Local, rxd->nFrags) & ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number)) != 0) -+ { -+ rxd->NextRunTime = 0; -+ return 1; -+ } -+ -+ return 0; -+} -+ -+long -+ep_check_rcvr (EP_RCVR *rcvr, long nextRunTime) -+{ -+ struct list_head *el, *nel; -+ unsigned long flags; -+ int i; -+ -+ /* Check to see if we're low on rxds */ -+ if (rcvr->FreeDescCount < ep_rxd_lowat) -+ AllocateRxdBlock (rcvr, 0, NULL); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->RailMask & (1 << i) ) -+ nextRunTime = EP_RCVR_OP (rcvr->Rails[i], Check) (rcvr->Rails[i], nextRunTime); -+ -+ /* See if we have any rxd's which need to be handled */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ if (rxdRail == NULL) -+ nextRunTime = CheckUnboundRxd (rcvr, rxd, nextRunTime); -+ else -+ { -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || /* envelope not received yet */ -+ rail->Nodes[env->NodeId].State != EP_NODE_CONNECTED) /* will be failing over */ -+ continue; -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_PUT_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcPut) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ -+ case EP_RXD_GET_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcGet) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ -+ case EP_RXD_COMPLETE_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcComplete)(rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ } -+ -+ if (rxd->NextRunTime && (nextRunTime == 0 || AFTER (nextRunTime, rxd->NextRunTime))) -+ nextRunTime = rxd->NextRunTime; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (nextRunTime); -+} -+ -+void -+ep_display_rxd (DisplayInfo *di, EP_RXD *rxd) -+{ -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ (di->func)(di->arg, " RXD: %p State=%x RxdMain=%p(%x.%x.%x) Data=%x.%x.%x %s\n", rxd, -+ rxd->State, rxd->RxdMain, rxd->NmdMain.nmd_addr, rxd->NmdMain.nmd_len, -+ rxd->NmdMain.nmd_attr, rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, -+ rxd->RxdMain->Len == EP_RXD_PENDING ? "Pending" : "Active"); -+ (di->func)(di->arg, " NodeId=%d Range=%d.%d TxdRail=%x TxdMain=%x.%x.%x nFrags=%d XID=%08x.%08x.%016llx\n", -+ env->NodeId, EP_RANGE_LOW(env->Range), EP_RANGE_HIGH(env->Range), env->TxdRail, env->TxdMain.nmd_addr, -+ env->TxdMain.nmd_len, env->TxdMain.nmd_attr, env->nFrags, env->Xid.Generation, env->Xid.Handle, env->Xid.Unique);; -+ (di->func)(di->arg, " Frag[0] %08x.%08x.%08x\n", env->Frags[0].nmd_addr, env->Frags[0].nmd_len, env->Frags[0].nmd_attr); -+ (di->func)(di->arg, " Frag[1] %08x.%08x.%08x\n", env->Frags[1].nmd_addr, env->Frags[1].nmd_len, env->Frags[1].nmd_attr); -+ (di->func)(di->arg, " Frag[2] %08x.%08x.%08x\n", env->Frags[2].nmd_addr, env->Frags[2].nmd_len, env->Frags[2].nmd_attr); -+ (di->func)(di->arg, " Frag[3] %08x.%08x.%08x\n", env->Frags[3].nmd_addr, env->Frags[3].nmd_len, env->Frags[3].nmd_attr); -+ -+ if (rxdRail) EP_RCVR_OP (rxdRail->RcvrRail, DisplayRxd) (di, rxdRail); -+} -+ -+void -+ep_display_rcvr (DisplayInfo *di, EP_RCVR *rcvr, int full) -+{ -+ int freeCount = 0; -+ int activeCount = 0; -+ int pendingCount = 0; -+ int railCounts[EP_MAX_RAILS]; -+ struct list_head *el; -+ int i; -+ unsigned long flags; -+ -+ for (i = 0; i FreeDescLock, flags); -+ list_for_each (el, &rcvr->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ pendingCount++; -+ else -+ activeCount++; -+ -+ if (rxdRail) -+ railCounts[rxdRail->RcvrRail->CommsRail->Rail->Number]++; -+ } -+ -+ (di->func)(di->arg, "RCVR: rcvr=%p number=%d\n", rcvr, rcvr->Service); -+ (di->func)(di->arg, " RXDS Free=%d (%d) Pending=%d Active=%d Rails=%d.%d.%d.%d\n", -+ freeCount, rcvr->FreeDescCount, pendingCount, activeCount, railCounts[0], railCounts[1], -+ railCounts[2], railCounts[3]); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->Rails[i] != NULL) -+ EP_RCVR_OP (rcvr->Rails[i], DisplayRcvr) (di, rcvr->Rails[i]); -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ -+ if (rxd->RxdMain->Len != EP_RXD_PENDING || full) -+ ep_display_rxd (di, rxd); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep_rxd_received_now(EP_RXD *rxd) -+{ -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ INC_STAT(rcvr->stats,rx); -+ ADD_STAT(rcvr->stats,rx_len, rxd->RxdMain->Len); -+ -+ if (rxd->RxdMain->Len < 0 || !EP_IS_MULTICAST(env->Attr)) -+ { -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ EPRINTF5 (DBG_RCVR, "ep_rxd_received: forward rxd=%p Data=%08x.%08x.%08x len=%d\n", rxd, -+ rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, ep_rxd_len(rxd)); -+ -+ spin_lock_irqsave (&rcvr->Subsys->ForwardDescLock, flags); -+ list_add_tail (&rxd->Link, &rcvr->Subsys->ForwardDescList); -+ spin_unlock_irqrestore (&rcvr->Subsys->ForwardDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+} -+ -+#if defined(CONFIG_EP_NO_CHECK_SUM) -+void -+ep_rxd_received(EP_RXD *rxd) -+{ -+ ep_rxd_received_now(rxd); -+} -+ -+#else -+ -+void -+ep_rxd_received(EP_RXD *rxd) -+{ -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ if (env->CheckSum) -+ ep_rxd_queue_csum(rxd); -+ else -+ ep_rxd_received_now(rxd); -+} -+ -+void -+ep_rxd_queue_csum(EP_RXD *rxd) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ EPRINTF5 (DBG_RCVR, "ep_rxd_queue_csum: rxd=%p Data=%08x.%08x.%08x len=%d\n", rxd, -+ rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, ep_rxd_len(rxd)); -+ -+ spin_lock_irqsave (&rcvr->Subsys->CheckSumDescLock, flags); -+ list_add_tail (&rxd->CheckSumLink, &rcvr->Subsys->CheckSumDescList); -+ spin_unlock_irqrestore (&rcvr->Subsys->CheckSumDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+} -+#endif -+ -+void -+ep_rcvr_fillout_stats(EP_RCVR *rcvr, char *str) -+{ -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(rcvr->stats,rx), GET_STAT_PER_SEC(rcvr->stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(rcvr->stats,rx_len) / (1024*1024), GET_STAT_PER_SEC(rcvr->stats,rx_len) / (1024*1024)); -+} -+ -+void -+ep_rcvr_rail_fillout_stats(EP_RCVR_RAIL *rcvr_rail, char *str) -+{ -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(rcvr_rail->stats,rx), GET_STAT_PER_SEC(rcvr_rail->stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(rcvr_rail->stats,rx_len) / (1024*1024), GET_STAT_PER_SEC(rcvr_rail->stats,rx_len) / (1024*1024)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcommsRx_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcommsRx_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcommsRx_elan3.c 2005-06-01 23:12:54.649431504 -0400 -@@ -0,0 +1,1776 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx_elan3.c,v 1.19.2.3 2004/11/15 11:05:49 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#define RCVR_TO_RAIL(rcvrRail) ((EP3_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail->Rail) -+#define RCVR_TO_DEV(rcvrRail) (RCVR_TO_RAIL(rcvrRail)->Device) -+#define RCVR_TO_SUBSYS(rcvrRail) (((EP_RCVR_RAIL *) rcvrRail)->Rcvr->Subsys) -+ -+static void RxDataEvent (EP3_RAIL *rail, void *arg); -+static void RxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void RxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS RxDataCookieOps = -+{ -+ RxDataEvent, -+ RxDataRetry, -+ NULL, /* DmaCancelled */ -+ RxDataVerify, -+}; -+ -+static void RxDoneEvent (EP3_RAIL *rail, void *arg); -+static void RxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void RxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS RxDoneCookieOps = -+{ -+ RxDoneEvent, -+ RxDoneRetry, -+ NULL, /* DmaCancelled */ -+ RxDoneVerify, -+}; -+ -+static int -+AllocateRxdRailBlock (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RXD_RAIL_BLOCK *blk; -+ EP3_RXD_RAIL *rxdRail; -+ sdramaddr_t pRxdElan; -+ EP3_RXD_RAIL_MAIN *pRxdMain; -+ E3_Addr pRxdElanAddr; -+ E3_Addr pRxdMainAddr; -+ E3_BlockCopyEvent event; -+ int i, j; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP3_RXD_RAIL_BLOCK *, sizeof (EP3_RXD_RAIL_BLOCK), 1); -+ if (blk == NULL) -+ return 0; -+ -+ if ((pRxdElan = ep_alloc_elan (&rail->Generic, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK, 0, &pRxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((pRxdMain = ep_alloc_main (&rail->Generic, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK, 0, &pRxdMainAddr)) == (sdramaddr_t) 0) -+ { -+ ep_free_elan (&rail->Generic, pRxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ReserveDmaRetries (rail, EP3_NUM_RXD_PER_BLOCK, 0) != ESUCCESS) -+ { -+ ep_free_main (&rail->Generic, pRxdMainAddr, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, pRxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (rxdRail = &blk->Rxd[0], i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rxdRail->Generic.RcvrRail = (EP_RCVR_RAIL *) rcvrRail; -+ rxdRail->RxdElan = pRxdElan; -+ rxdRail->RxdElanAddr = pRxdElanAddr; -+ rxdRail->RxdMain = pRxdMain; -+ rxdRail->RxdMainAddr = pRxdMainAddr; -+ -+ elan3_sdram_writel (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, RxdMain), 0); -+ elan3_sdram_writel (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next), 0); -+ elan3_sdram_writeq (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr), (long) rxdRail); -+ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ { -+ RegisterCookie (&rail->CookieTable, &rxdRail->ChainCookie[j], pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[j]), &RxDataCookieOps, (void *) rxdRail); -+ -+ event.ev_Type = EV_TYPE_DMA | (pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[j+1])); -+ event.ev_Count = 0; -+ -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[j]), sizeof (E3_BlockCopyEvent)); -+ } -+ -+ RegisterCookie (&rail->CookieTable, &rxdRail->DataCookie, pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), &RxDataCookieOps, (void *) rxdRail); -+ RegisterCookie (&rail->CookieTable, &rxdRail->DoneCookie, pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), &RxDoneCookieOps, (void *) rxdRail); -+ -+ EP3_INIT_COPY_EVENT (event, rxdRail->DataCookie, pRxdMainAddr + offsetof (EP3_RXD_RAIL_MAIN, DataEvent), 1); -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, rxdRail->DoneCookie, pRxdMainAddr + offsetof (EP3_RXD_RAIL_MAIN, DoneEvent), 1); -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), sizeof (E3_BlockCopyEvent)); -+ -+ pRxdMain->DataEvent = EP3_EVENT_FREE; -+ pRxdMain->DoneEvent = EP3_EVENT_FREE; -+ -+ /* move onto next descriptor */ -+ pRxdElan += EP3_RXD_RAIL_ELAN_SIZE; -+ pRxdElanAddr += EP3_RXD_RAIL_ELAN_SIZE; -+ pRxdMain = (EP3_RXD_RAIL_MAIN *) ((unsigned long) pRxdMain + EP3_RXD_RAIL_MAIN_SIZE); -+ pRxdMainAddr += EP3_RXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &rcvrRail->DescBlockList); -+ rcvrRail->TotalDescCount += EP3_NUM_RXD_PER_BLOCK; -+ rcvrRail->FreeDescCount += EP3_NUM_RXD_PER_BLOCK; -+ -+ for (i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++) -+ list_add (&blk->Rxd[i].Generic.Link, &rcvrRail->FreeDescList); -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ return 1; -+} -+ -+static void -+FreeRxdRailBlock (EP3_RCVR_RAIL *rcvrRail, EP3_RXD_RAIL_BLOCK *blk) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ EP3_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i, j; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ rcvrRail->TotalDescCount -= EP3_NUM_RXD_PER_BLOCK; -+ -+ for (rxdRail = &blk->Rxd[0], i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ -+ rcvrRail->FreeDescCount--; -+ -+ list_del (&rxdRail->Generic.Link); -+ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ DeregisterCookie (&rail->CookieTable, &rxdRail->ChainCookie[j]); -+ -+ DeregisterCookie (&rail->CookieTable, &rxdRail->DataCookie); -+ DeregisterCookie (&rail->CookieTable, &rxdRail->DoneCookie); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ ReleaseDmaRetries (rail, EP3_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->Generic, blk->Rxd[0].RxdMainAddr, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, blk->Rxd[0].RxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+} -+ -+static EP3_RXD_RAIL * -+GetRxdRail (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ if (list_empty (&rcvrRail->FreeDescList)) -+ rxdRail = NULL; -+ else -+ { -+ rxdRail = list_entry (rcvrRail->FreeDescList.next, EP3_RXD_RAIL, Generic.Link); -+ -+ list_del (&rxdRail->Generic.Link); -+ -+ rcvrRail->FreeDescCount--; -+ } -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (rcvrRail->FreeDescCount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&RCVR_TO_SUBSYS(rcvrRail)->Thread, lbolt); -+ -+ return (rxdRail); -+} -+ -+static void -+FreeRxdRail (EP3_RCVR_RAIL *rcvrRail, EP3_RXD_RAIL *rxdRail) -+{ -+ unsigned long flags; -+ -+#if defined(DEBUG_ASSERT) -+ { -+ EP_RAIL *rail = (EP_RAIL *) RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ -+ EP_ASSERT (rail, rxdRail->Generic.RcvrRail == &rcvrRail->Generic); -+ -+ EP_ASSERT (rail, rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_FREE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_FREE; -+ } -+#endif -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_add (&rxdRail->Generic.Link, &rcvrRail->FreeDescList); -+ -+ rcvrRail->FreeDescCount++; -+ -+ if (rcvrRail->FreeDescWaiting) -+ { -+ rcvrRail->FreeDescWaiting--; -+ kcondvar_wakeupall (&rcvrRail->FreeDescSleep, &rcvrRail->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+} -+ -+static void -+BindRxdToRail (EP_RXD *rxd, EP3_RXD_RAIL *rxdRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rxdRail->Generic.RcvrRail); -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ -+ EPRINTF3 (DBG_RCVR, "%s: BindRxdToRail: rxd=%p rxdRail=%p\n", rail->Generic.Name, rxd, rxdRail); -+ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, RxdMain), rxd->NmdMain.nmd_addr); /* PCI write */ -+ -+ rxd->RxdRail = &rxdRail->Generic; -+ rxdRail->Generic.Rxd = rxd; -+} -+ -+static void -+UnbindRxdFromRail (EP_RXD *rxd, EP3_RXD_RAIL *rxdRail) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ ASSERT (rxd->RxdRail == &rxdRail->Generic && rxdRail->Generic.Rxd == rxd); -+ -+ EPRINTF3 (DBG_RCVR, "%s: UnbindRxdFromRail: rxd=%p rxdRail=%p\n", RCVR_TO_RAIL(rxdRail->Generic.RcvrRail)->Generic.Name, rxd, rxdRail); -+ -+ rxd->RxdRail = NULL; -+ rxdRail->Generic.Rxd = NULL; -+ -+ if (rcvrRail->CleanupWaiting) -+ kcondvar_wakeupall (&rcvrRail->CleanupSleep, &rxd->Rcvr->Lock); -+ rcvrRail->CleanupWaiting = 0; -+} -+ -+static void -+LockRcvrThread (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ EP3_SPINLOCK_MAIN *sl = &rcvrRail->RcvrMain->ThreadLock; -+ E3_uint32 RestartBits = 0; -+ int delay = 1; -+ E3_uint32 seq; -+ E3_uint32 reg; -+ -+ ASSERT (SPINLOCK_HELD (&rcvrRail->Generic.Rcvr->Lock)); -+ -+ mb(); -+ elan3_sdram_writel (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 1); -+ mb(); -+ seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ while (seq != sl->sl_seq) -+ { -+ while (sl->sl_seq == (seq - 1)) -+ { -+ mb(); -+ -+ if ((read_reg32 (dev, Exts.InterruptReg) & (INT_TProc | INT_TProcHalted)) != 0 && spin_trylock (&dev->IntrLock)) -+ { -+ reg=read_reg32 (dev, Exts.InterruptReg); -+ ELAN_REG_REC(reg); -+ -+ if ((reg & (INT_TProc | INT_TProcHalted)) != 0&& -+ elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)) != sl->sl_seq) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: LockRcvrThread - thread trapped\n", rail->Generic.Name); -+ -+ /* The thread processor has *really* trapped, and the spinlock is still held. -+ * thus is must have trapped due to a network error - we need to complete the -+ * actions required for this envelope, since we may be spin-locking the receiver -+ * to search the dma retry lists for a particular dma. So must ensure that -+ * if the thread had trapped then the dma has been queued onto the retry list -+ * *before* we inspect them. -+ */ -+ IncrStat (commsRail, LockRcvrTrapped); -+ -+ /* We're going to generate a spurious interrupt here - since we will -+ * handle the thread processor trap directly */ -+ ELAN_REG_REC(reg); -+ if (HandleTProcTrap (dev, &RestartBits)) -+ { -+ /* NOTE - this is not an assert, since the "store" to unlock the lock could -+ * be held up on the PCI interface, whilst the thread processor has -+ * gone on and switched to a new thread, which has then trapped, and -+ * our read of the InterruptReg can overtake the unlock write. -+ * -+ * ASSERT (dev->ThreadTrap->Registers[REG_GLOBALS + (1^WordEndianFlip)] == -+ * elan3_sdram_readl (dev, rcvr->RcvrElan + offsetof (EP_RCVR_ELAN, PendingRxDescsElan))); -+ */ -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); -+ -+ DeliverTProcTrap (dev, dev->ThreadTrap, INT_TProc); -+ } -+ } -+ spin_unlock (&dev->IntrLock); -+ } -+ -+ DELAY (delay); delay++; -+ } -+ seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ } -+} -+ -+static void -+UnlockRcvrThread (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ -+ mb(); -+ elan3_sdram_writel (rail->Device, sle + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 0); -+ mmiob(); -+} -+ -+void -+CompleteEnvelope (EP3_RAIL *rail, E3_Addr rxdElanAddr, E3_uint32 PAckVal) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rxdElan = ep_elan2sdram (&rail->Generic, rxdElanAddr); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) (unsigned long) elan3_sdram_readq (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr)); -+ EP_RXD_MAIN *rxdMain = rxdRail->Generic.Rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ sdramaddr_t queue = ((EP3_COMMS_RAIL *) commsRail)->QueueDescs + rcvr->Service * sizeof (EP3_InputQueue); -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ EP3_SPINLOCK_MAIN *sl = &rcvrRail->RcvrMain->ThreadLock; -+ int nodeId; -+ EP_NODE_RAIL *nodeRail; -+ E3_DMA_BE dma; -+ E3_Addr nfptr; -+ E3_Addr next; -+ -+ ASSERT (commsRail->Rail == &rail->Generic); -+ ASSERT (rxdElanAddr == elan3_sdram_readl (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs))); -+ -+ IncrStat (commsRail, CompleteEnvelope); -+ -+ /* We don't need to aquire the NodeLock here (however we might be holding it), -+ * since this can only get called while the node is connected, or disconnecting. -+ * If the node is disconnecting, then we can get called from FlushDisconnecting() -+ * while holding the NodeLock - after we cannot get called again until the node -+ * has reconnected from scratch. -+ */ -+ /* Copy the envelope information */ -+ nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr)); -+ -+ if (nfptr == elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top))) -+ nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)); -+ else -+ nfptr += elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)); -+ -+ /* Copy the envelope and payload (unconditionally) */ -+ elan3_sdram_copyl_from_sdram (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr), env, EP_ENVELOPE_SIZE + EP_PAYLOAD_SIZE); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION); -+ -+ /* Copy the received message length */ -+ rxdMain->Len = elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len)); -+ -+ /* Remove the RXD from the pending desc list */ -+ if ((next = elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next))) == 0) -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ elan3_sdram_writel (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), next); -+ -+ /* Copy the DMA descriptor to queue on the approriate retry list */ -+ elan3_sdram_copyq_from_sdram (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]), &dma, sizeof (E3_DMA)); /* PCI read block */ -+ -+ EP_ASSERT (&rail->Generic, dma.s.dma_direction == DMA_READ);; -+ -+#if defined(DEBUG_ASSERT) && defined(DEBUG_SDRAM_ASSERT) -+ /* NOTE: not an assertion, since the thread packet could have successfully -+ * transferred the "put" dma to the far side - which could then have -+ * completed - but the far side will see a network error which will -+ * cause the virtual circuit to be dropped by the far side and this -+ * DMA will be removed */ -+ if (rxdRail->RxdMain->DataEvent != EP3_EVENT_ACTIVE || -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) != 1) -+ { -+ printk ("CompleteEnvelope: suspicious dma : Node=%d DataBlock=%d Event=%d\n", -+ env->NodeId, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count))); -+ } -+#endif -+ -+ EPRINTF6 (DBG_RCVR, "%s: CompleteEnvelope: rxd=%p NodeId=%d Xid=%llx Cookies=%08x,%08x\n", commsRail->Rail->Name, -+ rxdRail, env->NodeId, (long long) env->Xid.Unique, dma.s.dma_srcCookieVProc, dma.s.dma_destCookieVProc); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the DMA descriptor will -+ * be read from the EP_RETRY_DMA rather than the original DMA - this can then get reused -+ * and an incorrect DMA descriptor sent */ -+ dma.s.dma_source = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]); -+ dma.s.dma_direction = (dma.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ nodeId = EP_VP_TO_NODE(dma.s.dma_srcVProc); -+ nodeRail = &rail->Generic.Nodes[nodeId]; -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (PAckVal != E3_PAckOk) -+ { -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ QueueDmaForRetry (rail, &dma, EP_RETRY_LOW_PRI_RETRY); -+ else -+ QueueDmaOnStalledList (rail, &dma); -+ } -+ -+ /* Finaly forcefully drop the spinlock for the thread */ -+ sl->sl_seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ -+ wmb(); -+} -+ -+void -+StallThreadForNoDescs (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rcvrElan = ep_elan2sdram (&rail->Generic, rcvrElanAddr); -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) (unsigned long) elan3_sdram_readq (dev, rcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr)); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ -+ EPRINTF3 (DBG_RCVR, "%s: StallThreadForNoDescs - rcvrRail=%p sp=%x\n", commsRail->Rail->Name, rcvrRail, sp); -+ -+ IncrStat (commsRail, StallThread); -+ -+ /* NOTE: spin lock not required as thread is trapped */ -+ -+ if (rcvrRail->RcvrMain->PendingDescsTailp != 0) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: StallThreadForNoDescs - pending descriptors, wakeup thread\n", commsRail->Rail->Name); -+ -+ /* -+ * A receive buffer was queued after the thread had decided to go to -+ * sleep, but before the event interrupt occured. Just restart the -+ * thread to consume the envelope. -+ */ -+ IssueRunThread (rail, sp); -+ } -+ else -+ { -+ EPRINTF1 (DBG_RCVR, "%s: StallThreadForNoDescs - set ThreadWaiting\n", commsRail->Rail->Name); -+ -+ IncrStat (commsRail, ThrdWaiting); -+ -+ /* Mark the rcvr as waiting for a rxd, and schedule a call of ep_check_rcvr -+ * to attempt to "steal" a descriptor from a different rail */ -+ rcvrRail->ThreadWaiting = sp; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+} -+ -+void -+StallThreadForHalted (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rcvrElan = ep_elan2sdram (&rail->Generic, rcvrElanAddr); -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) (unsigned long) elan3_sdram_readq (dev, rcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr)); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ unsigned long flags = 0; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ rcvrRail->ThreadHalted = sp; -+ -+ EPRINTF2 (DBG_EPTRAP, "%s: StallThreadForHalted: sp=%08x\n", rail->Generic.Name, sp); -+ -+ if (rcvrRail->CleanupWaiting) -+ kcondvar_wakeupone (&rcvrRail->CleanupSleep, &rcvr->Lock); -+ rcvrRail->CleanupWaiting = 0; -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+/* -+ * RxDataEvent: arg == EP3_RXD_RAIL -+ * Called on completion of receiving data. -+ */ -+static void -+RxDataEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ int delay = 1; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (rxdRail->DataCookie, rxdRail->RxdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), rxdRail->DataCookie, rxdRail->RxdMain->DataEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("RxDataEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ printk ("%s: RxDataEvent: rxd %p not complete [%x,%x,%x]\n", rail->Generic.Name, rxd, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Type))); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ /* -+ * Note, since the thread will have sent the "get" dma before copying the -+ * envelope, we must check that it has completed doing this, if not then -+ * it might be that the thread trapped due to a network error, so we must -+ * spinlock against the thread -+ */ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ { -+ LockRcvrThread (rcvrRail); -+ UnlockRcvrThread (rcvrRail); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION && rxd->RxdMain->Len != EP_RXD_PENDING); -+ } -+ -+ EPRINTF7 (DBG_RCVR, "%s: RxDataEvent: rxd=%p rxdRail=%p completed from elan node %d [XID=%llx] Length %d State %x\n", -+ rail->Generic.Name, rxd, rxdRail, env->NodeId, (long long) env->Xid.Unique, rxd->RxdMain->Len, rxd->State); -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_RECEIVE_ACTIVE || rxd->State == EP_RXD_PUT_ACTIVE || rxd->State == EP_RXD_GET_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxd->Data.nmd_attr = EP_RAIL2RAILMASK (rail->Generic.Number); -+ -+ if (rxd->RxdMain->Len >= 0 && EP_IS_RPC(env->Attr)) -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ else -+ { -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ ep_rxd_received (rxd); -+ -+} -+ -+/* -+ * RxDataRetry: arg == EP3_RXD_RAIL -+ * Called on retry of "get" dma of large transmit data -+ * and rpc_get/rpc_put and "put" of datavec of rpc completion. -+ */ -+static void -+RxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_COMMS_RAIL *commsRail = rxdRail->Generic.RcvrRail->CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+#if defined(DEBUG_ASSERT) -+ RxDataVerify (rail, arg, dma); -+#endif -+ -+ IncrStat (commsRail, RxDataRetry); -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDataRetry: rcvr %p rxd %p [XID=%llx]\n", rail->Generic.Name, rxd->Rcvr, rxd, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&rxdRail->Backoff, EP_BACKOFF_DATA)); -+} -+ -+static void -+RxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+#if defined(DEBUG_ASSERT) -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+ if (dma->s.dma_direction == DMA_WRITE) -+ { -+ EP_ASSERT (&rail->Generic, -+ (rxd->State == EP_RXD_RECEIVE_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE) || -+ (rxd->State == EP_RXD_PUT_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE) || -+ (rxd->State == EP_RXD_COMPLETE_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_ACTIVE)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (rxd->State == EP_RXD_COMPLETE_ACTIVE ? -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 1: /* PCI read */ -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 1)); /* PCI read */ -+ } -+ else -+ { -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_READ_REQUEUE); -+ -+#if defined(DEBUG_SDRAM_ASSERT) -+ /* NOTE: not an assertion, since the "get" DMA can still be running if -+ * it's packet got a network error - and then the "put" from the -+ * far side has completed - however the virtual circuit should -+ * then be dropped by the far side and this DMA will be removed */ -+ if (EP_VP_TO_NODE(dma->s.dma_srcVProc) != ep_rxd_node(rxd) || -+ (rxd->State != EP_RXD_RECEIVE_ACTIVE && rxd->State != EP_RXD_GET_ACTIVE) || -+ rxdRail->RxdMain->DataEvent != EP3_EVENT_ACTIVE || -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) != 1) -+ { -+ EPRINTF6 (DBG_RCVR, "%s: RxDataRetry: suspicious dma : VProc=%d NodeId=%d State=%d DataBlock=%x Event=%d\n", -+ rail->Generic.Name, EP_VP_TO_NODE(dma->s.dma_srcVProc), ep_rxd_node(rxd), rxd->State, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count))); -+ } -+#endif /* defined(DEBUG_SDRAM_ASSERT) */ -+ } -+#endif /* DEBUG_ASSERT */ -+} -+ -+/* -+ * RxDoneEvent: arg == EP_RXD -+ * Called on completion of large receive. -+ */ -+static void -+RxDoneEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ ELAN3_DEV *dev = rail->Device; -+ int delay = 1; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("RxDoneEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ printk ("RxDoneEvent: rxd %p not complete [%x,%x.%x]\n", rxd, rxdRail->RxdMain->DoneEvent, -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Type))); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDoneEvent: rxd %p completed from elan node %d [XID=%llx]\n", -+ commsRail->Rail->Name, rxd, rxd->RxdMain->Envelope.NodeId, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ IncrStat (commsRail, RxDoneEvent); -+ -+ EP_ASSERT (&rail->Generic, rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* mark rxd as private */ -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler (rxd); -+} -+ -+/* -+ * RxDoneRetry: arg == EP_RXD -+ * Called on retry of "put" of RPC completion status block -+ */ -+static void -+RxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_COMMS_RAIL *commsRail = rxdRail->Generic.RcvrRail->CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+#if defined(DEBUG_ASSERT) -+ RxDoneVerify (rail, arg, dma); -+#endif -+ -+ IncrStat (commsRail, RxDoneRetry); -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDoneRetry: rcvr %p rxd %p [XID=%llx]\n", commsRail->Rail->Name, rxd->Rcvr, rxd, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&rxdRail->Backoff, EP_BACKOFF_DONE)); -+} -+ -+static void -+RxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+#if defined(DEBUG_ASSERT) -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == ep_rxd_node(rxd)); -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_COMPLETE_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 1)); /* PCI read */ -+#endif /* defined(DEBUG_ASSERT) */ -+} -+ -+int -+ep3rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RXD_RAIL *rxdRail; -+ -+ ASSERT ( SPINLOCK_HELD(&rxd->Rcvr->Lock)); -+ -+ if ((rxdRail = GetRxdRail (rcvrRail)) == NULL) -+ return 0; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_addr), rxd->Data.nmd_addr); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len), rxd->Data.nmd_len); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_attr), rxd->Data.nmd_attr); /* PCI write */ -+ -+ /* Bind the rxdRail and rxd together */ -+ BindRxdToRail (rxd, rxdRail); -+ -+ /* Mark as active */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* Interlock with StallThreadForNoDescs */ -+ spin_lock (&dev->IntrLock); -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_queue_rxd: rcvr %p rxd %p rxdRail %p\n", rail->Generic.Name, rxd->Rcvr, rxd, rxdRail); -+ -+ EP3_SPINENTER (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock), &rcvrRail->RcvrMain->PendingLock); -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next), 0); /* PCI write */ -+ if (rcvrRail->RcvrMain->PendingDescsTailp == 0) -+ elan3_sdram_writel (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), rxdRail->RxdElanAddr); /* PCI write */ -+ else -+ elan3_sdram_writel (dev, rcvrRail->RcvrMain->PendingDescsTailp, rxdRail->RxdElanAddr); /* PCI write */ -+ rcvrRail->RcvrMain->PendingDescsTailp = rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next); -+ -+ EP3_SPINEXIT (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock), &rcvrRail->RcvrMain->PendingLock); -+ -+ /* If the thread has paused because it was woken up with no receive buffer */ -+ /* ready, then wake it up to process the one we've just added */ -+ if (rcvrRail->ThreadWaiting) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: DoReceive: ThreadWaiting - restart thread\n", rail->Generic.Name); -+ -+ IssueRunThread (rail, rcvrRail->ThreadWaiting); -+ -+ rcvrRail->ThreadWaiting = (E3_Addr) 0; -+ } -+ -+ spin_unlock (&dev->IntrLock); -+ -+ return 1; -+} -+ -+void -+ep3rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_PUT_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Generate the DMA chain to put the data in two loops to burst -+ * the data across the PCI bus */ -+ for (len = 0, i = (nFrags-1), local += (nFrags-1), remote += (nFrags-1); i >= 0; len += local->nmd_len, i--, local--, remote--) -+ { -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = local->nmd_len; -+ dmabe.s.dma_source = local->nmd_addr; -+ dmabe.s.dma_dest = remote->nmd_addr; -+ dmabe.s.dma_destEvent = (E3_Addr) 0; -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (env->NodeId); -+ if (i == (nFrags-1)) -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent); -+ else -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i]); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_put: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len, dmabe.s.dma_destCookieVProc, dmabe.s.dma_srcCookieVProc); -+ -+ if (i != 0) -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ } -+ -+ for (i = 0; i < nFrags; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the data event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_put: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep3rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_GET_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Generate the DMA chain to get the data in two loops to burst -+ * the data across the PCI bus */ -+ for (len = 0, i = (nFrags-1), remote += (nFrags-1), local += (nFrags-1); i >= 0; len += remote->nmd_len, i--, remote--, local--) -+ { -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = remote->nmd_len; -+ dmabe.s.dma_source = remote->nmd_addr; -+ dmabe.s.dma_dest = local->nmd_addr; -+ if (i == (nFrags-1)) -+ dmabe.s.dma_destEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent); -+ else -+ dmabe.s.dma_destEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i]); -+ dmabe.s.dma_destCookieVProc = LocalCookie (rail, env->NodeId); -+ dmabe.s.dma_srcEvent = (E3_Addr) 0; -+ dmabe.s.dma_srcCookieVProc = RemoteCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_get rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, remote->nmd_addr, local->nmd_addr, remote->nmd_len, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ -+ /* -+ * Always copy down the dma descriptor, since we issue it as a READ_REQUEUE -+ * dma, and the elan will fetch the descriptor to send out of the link from -+ * the rxdElan->Dmas[i] location, before issueing the DMA chain we modify -+ * the dma_source. -+ */ -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ } -+ -+ for (i = 0; i < nFrags; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the data event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the DMA descriptor will -+ * be read from the EP_RETRY_DMA rather than the orignal DMA - this can then get reused -+ * and an incorrect DMA descriptor sent */ -+ dmabe.s.dma_source = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_get: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCGet, len); -+} -+ -+void -+ep3rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_COMPLETE_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Initialise the status block dma */ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = sizeof (EP_STATUSBLK); -+ dmabe.s.dma_source = rxd->NmdMain.nmd_addr + offsetof (EP_RXD_MAIN, StatusBlk); -+ dmabe.s.dma_dest = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, StatusBlk); -+ dmabe.s.dma_destEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent); -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA(env->NodeId); -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF8 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: rxd %p [XID=%llx] statusblk source=%08x dest=%08x len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, dmabe.s.dma_source, dmabe.s.dma_dest, dmabe.s.dma_size, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ -+ for (len = 0, i = EP_MAXFRAG, remote += (nFrags-1), local += (nFrags-1); i > EP_MAXFRAG-nFrags; len += local->nmd_len, i--, local--, remote--) -+ { -+ /* copy down previous dma */ -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = local->nmd_len; -+ dmabe.s.dma_source = local->nmd_addr; -+ dmabe.s.dma_dest = remote->nmd_addr; -+ dmabe.s.dma_destEvent = (E3_Addr) 0; -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (env->NodeId); -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i-1]); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ } -+ -+ for (i = EP_MAXFRAG-nFrags; i < EP_MAXFRAG; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the done event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DoneEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, CompleteRPC, len); -+} -+ -+void -+ep3rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ sdramaddr_t qdescs = ((EP3_COMMS_RAIL *) commsRail)->QueueDescs; -+ EP3_RCVR_RAIL *rcvrRail; -+ EP3_InputQueue qdesc; -+ sdramaddr_t stack; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (rcvrRail, EP3_RCVR_RAIL *, sizeof (EP3_RCVR_RAIL), TRUE); -+ -+ kcondvar_init (&rcvrRail->CleanupSleep); -+ spin_lock_init (&rcvrRail->FreeDescLock); -+ INIT_LIST_HEAD (&rcvrRail->FreeDescList); -+ INIT_LIST_HEAD (&rcvrRail->DescBlockList); -+ -+ rcvrRail->Generic.CommsRail = commsRail; -+ rcvrRail->Generic.Rcvr = rcvr; -+ -+ rcvrRail->RcvrMain = ep_alloc_main (&rail->Generic, sizeof (EP3_RCVR_RAIL_MAIN), 0, &rcvrRail->RcvrMainAddr); -+ rcvrRail->RcvrElan = ep_alloc_elan (&rail->Generic, sizeof (EP3_RCVR_RAIL_ELAN), 0, &rcvrRail->RcvrElanAddr); -+ rcvrRail->InputQueueBase = ep_alloc_elan (&rail->Generic, EP_INPUTQ_SIZE * rcvr->InputQueueEntries, 0, &rcvrRail->InputQueueAddr); -+ stack = ep_alloc_elan (&rail->Generic, EP3_STACK_SIZE, 0, &rcvrRail->ThreadStack); -+ -+ rcvrRail->TotalDescCount = 0; -+ rcvrRail->FreeDescCount = 0; -+ -+ /* Initialise the main/elan spin lock */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock.sl_lock), 0); -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock.sl_seq), 0); -+ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock.sl_lock), 0); -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock.sl_seq), 0); -+ -+ /* Initialise the receive lists */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), 0); -+ -+ /* Initialise the ThreadShould Halt */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadShouldHalt), 0); -+ -+ /* Initialise pointer to the ep_rcvr_rail */ -+ elan3_sdram_writeq (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr), (unsigned long) rcvrRail); -+ -+ /* Initialise elan visible main memory */ -+ rcvrRail->RcvrMain->ThreadLock.sl_seq = 0; -+ rcvrRail->RcvrMain->PendingLock.sl_seq = 0; -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ -+ /* initialise and copy down the input queue descriptor */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_base = rcvrRail->InputQueueAddr; -+ qdesc.q_top = rcvrRail->InputQueueAddr + (rcvr->InputQueueEntries-1) * EP_INPUTQ_SIZE; -+ qdesc.q_fptr = rcvrRail->InputQueueAddr; -+ qdesc.q_bptr = rcvrRail->InputQueueAddr + EP_INPUTQ_SIZE; -+ qdesc.q_size = EP_INPUTQ_SIZE; -+ qdesc.q_event.ev_Count = 0; -+ qdesc.q_event.ev_Type = 0; -+ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, qdescs + rcvr->Service * sizeof (EP3_InputQueue), sizeof (EP3_InputQueue)); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->Generic.Number] = &rcvrRail->Generic; -+ rcvr->RailMask |= EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* initialise and run the Elan thread to process the queue */ -+ IssueRunThread (rail, ep3_init_thread (rail->Device, ep_symbol (&rail->ThreadCode, "ep3comms_rcvr"), -+ rcvrRail->ThreadStack, stack, EP3_STACK_SIZE, 5, -+ rail->RailElanAddr, rcvrRail->RcvrElanAddr, rcvrRail->RcvrMainAddr, -+ EP_MSGQ_ADDR(rcvr->Service), -+ rail->ElanCookies)); -+} -+ -+void -+ep3rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rcvr->Rails[rail->Generic.Number]; -+ unsigned long flags; -+ struct list_head *el, *nel; -+ -+ EPRINTF1 (DBG_RCVR, "%s: ep3rcvr_del_rail: removing rail\n", rail->Generic.Name); -+ -+ /* flag the rail as no longer available */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->RailMask &= ~EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* mark the input queue descriptor as full */ -+ SetQueueLocked(rail, ((EP3_COMMS_RAIL *)commsRail)->QueueDescs + rcvr->Service * sizeof (EP3_InputQueue)); -+ -+ /* need to halt the thread first */ -+ /* set ThreadShouldHalt in elan memory */ -+ /* then trigger the event */ -+ /* and wait on haltWait */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadShouldHalt), TRUE); -+ -+ IssueSetevent (rail, EP_MSGQ_ADDR(rcvr->Service) + offsetof(EP3_InputQueue, q_event)); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ while (rcvrRail->ThreadHalted == 0) -+ { -+ rcvrRail->CleanupWaiting++; -+ kcondvar_wait (&rcvrRail->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point the thread is halted and it has no envelopes */ -+ -+ /* we need to wait until all the rxd's in the list that are -+ * bound to the rail we are removing are not pending -+ */ -+ for (;;) -+ { -+ int mustWait = 0; -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el,EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail) && rxd->RxdMain->Len != EP_RXD_PENDING) -+ { -+ mustWait++; -+ break; -+ } -+ } -+ -+ if (! mustWait) -+ break; -+ -+ EPRINTF1 (DBG_RCVR, "%s: ep3rcvr_del_rail: waiting for active rxd's to be returned\n", rail->Generic.Name); -+ -+ rcvrRail->CleanupWaiting++; -+ kcondvar_wait (&rcvrRail->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point all rxd's in the list that are bound to the deleting rail are not pending */ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail)) -+ { -+ /* here we need to unbind the remaining rxd's */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail(rcvrRail, rxdRail ); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* wait for all rxd's for this rail to become free */ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ while (rcvrRail->FreeDescCount != rcvrRail->TotalDescCount) -+ { -+ rcvrRail->FreeDescWaiting++; -+ kcondvar_wait (&rcvrRail->FreeDescSleep, &rcvrRail->FreeDescLock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ /* can now remove the rail as it can no longer be used */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->Generic.Number] = NULL; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* all the rxd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (rcvrRail->TotalDescCount == rcvrRail->FreeDescCount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&rcvrRail->DescBlockList)) -+ FreeRxdRailBlock (rcvrRail, list_entry(rcvrRail->DescBlockList.next, EP3_RXD_RAIL_BLOCK , Link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((rcvrRail->TotalDescCount == 0) && (rcvrRail->TotalDescCount == rcvrRail->FreeDescCount)); -+ -+ ep_free_elan (&rail->Generic, rcvrRail->ThreadStack, EP3_STACK_SIZE); -+ ep_free_elan (&rail->Generic, rcvrRail->InputQueueAddr, EP_INPUTQ_SIZE * rcvr->InputQueueEntries); -+ ep_free_elan (&rail->Generic, rcvrRail->RcvrElanAddr, sizeof (EP3_RCVR_RAIL_ELAN)); -+ ep_free_main (&rail->Generic, rcvrRail->RcvrMainAddr, sizeof (EP3_RCVR_RAIL_MAIN)); -+ -+ KMEM_FREE (rcvrRail, sizeof (EP3_RCVR_RAIL)); -+} -+ -+EP_RXD * -+ep3rcvr_steal_rxd (EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ E3_Addr rxdElanAddr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ LockRcvrThread (rcvrRail); -+ if ((rxdElanAddr = elan3_sdram_readl (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs))) != 0) -+ { -+ sdramaddr_t rxdElan = ep_elan2sdram (&rail->Generic, rxdElanAddr); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) (unsigned long) elan3_sdram_readq (rail->Device, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr)); -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ sdramaddr_t next; -+ -+ EPRINTF2 (DBG_RCVR, "%s: StealRxdFromOtherRail stealing rxd %p\n", rail->Generic.Name, rail); -+ -+ /* Remove the RXD from the pending desc list */ -+ if ((next = elan3_sdram_readl (rail->Device, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next))) == 0) -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), next); -+ UnlockRcvrThread (rcvrRail); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* Mark rxdRail as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ return rxd; -+ } -+ -+ UnlockRcvrThread (rcvrRail); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return NULL; -+} -+ -+long -+ep3rcvr_check (EP_RCVR_RAIL *r, long nextRunTime) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ EP_RXD *rxd; -+ unsigned long flags; -+ -+ if (rcvrRail->FreeDescCount < ep_rxd_lowat && !AllocateRxdRailBlock (rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow rxd rail pool\n", rail->Generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ if (rcvrRail->ThreadWaiting && (rxd = StealRxdFromOtherRail (rcvr)) != NULL) -+ { -+ /* Map the receive buffer into this rail as well */ -+ EPRINTF4 (DBG_RCVR, "%s: mapping rxd->Data (%08x.%08x.%08x) into this rails\n", -+ rail->Generic.Name, rxd->Data.nmd_addr,rxd->Data.nmd_len, rxd->Data.nmd_attr); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ if ((!(EP_NMD_RAILMASK (&rxd->Data) & EP_RAIL2RAILMASK(rail->Generic.Number)) && /* not already mapped and */ -+ ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rail->Generic.Number)) == 0) || /* failed to map it */ -+ ep3rcvr_queue_rxd (rxd, &rcvrRail->Generic)) /* or failed to queue it */ -+ { -+ EPRINTF5 (DBG_RCVR,"%s: stolen rcvr=%p rxd=%p -> rnum=%d rcvrRail=%p (failed)\n", -+ rail->Generic.Name, rcvr, rxd, rail->Generic.Number, rcvrRail); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ } -+ -+ return nextRunTime; -+} -+ -+static void -+ep3rcvr_flush_filtering (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Generic.Rail; -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t qdesc = commsRail->QueueDescs + rcvr->Service*sizeof (EP3_InputQueue); -+ E3_Addr qTop = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_top)); -+ E3_Addr qBase = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_base)); -+ E3_Addr qSize = elan3_sdram_readl (dev,qdesc + offsetof (EP3_InputQueue, q_size)); -+ E3_uint32 nfptr, qbptr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ nfptr = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_fptr)); -+ qbptr = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_bptr)); -+ -+ if (nfptr == qTop) -+ nfptr = qBase; -+ else -+ nfptr += qSize; -+ -+ while (nfptr != qbptr) -+ { -+ unsigned nodeId = elan3_sdram_readl (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr) + -+ offsetof (EP_ENVELOPE, NodeId)); -+ -+ EPRINTF3 (DBG_DISCON, "%s: ep3rcvr_flush_filtering: nodeId=%d State=%d\n", rail->Generic.Name, nodeId, rail->Generic.Nodes[nodeId].State); -+ -+ if (rail->Generic.Nodes[nodeId].State == EP_NODE_LOCAL_PASSIVATE) -+ elan3_sdram_writel (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr) + -+ offsetof (EP_ENVELOPE, Version), 0); -+ -+ if (nfptr == qTop) -+ nfptr = qBase; -+ else -+ nfptr += qSize; -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+ep3rcvr_flush_flushing (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF6 (DBG_DISCON, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p state %x.%x elan node %d\n", rail->Generic.Name, -+ rcvr, rxd, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep3rcvr_flush_flushing: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - passive\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - active\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP_INVALIDATE_XID (rxd->MsgXid); /* Ignore any previous NMD map responses */ -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep3rcvr_flush_flushing: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep3rcvr_flush_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ -+ switch (rail->Generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep3rcvr_flush_filtering (rcvr, rcvrRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep3rcvr_flush_flushing (rcvr, rcvrRail); -+ break; -+ } -+} -+ -+void -+ep3rcvr_failover_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_SYS *sys = subsys->Subsys.Sys; -+#endif -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_MANAGER_MSG_BODY msgBody; -+ EP_NODE *node = &sys->Nodes[env->NodeId]; -+#endif -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ EPRINTF6 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rcvr %p rxd %p elan node %d state %x.%x\n", rail->Generic.Name, rcvr, rxd, env->NodeId, -+ rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_failover_callback: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* clear the data event - the done event should already be zero */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+#ifdef SUPPORT_RAIL_FAILOVER -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent) && !(EP_IS_NO_FAILOVER(env->Attr))) /* incomplete RPC, which can be failed over */ -+ { -+ EPRINTF7 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rxd %p State %x.%x Xid %llxx MsgXid %llxx nodeId %d - failover\n", -+ rail->Generic.Name, rxd, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, -+ (long long) env->Xid.Unique, (long long) rxd->MsgXid.Unique, env->NodeId); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ /* XXXX maybe only send the message if the node failover retry is now ? */ -+ msgBody.Failover.Xid = env->Xid; -+ msgBody.Failover.Railmask = node->ConnectedRails; -+ -+ ep_send_message (&rail->Generic, env->NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST, rxd->MsgXid, &msgBody); -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+#endif -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_failover_callback: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF3 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rxd %p nodeId %d - finished\n", rail->Generic.Name, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep3rcvr_disconnect_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ EPRINTF4 (DBG_DISCON, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p elan node %d\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep3rcvr_disconnect_callback: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* clear the data event - the done event should already be zero */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* remark it as pending if it was partially received */ -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - not able to failover\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ /* Mark as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* Ignore any previous NMD/failover responses */ -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ /* Remove from active list */ -+ list_del (&rxd->Link); -+ -+ if (rxd->State == EP_RXD_RPC_IN_PROGRESS) /* ownder by user .... */ -+ rxd->State = EP_RXD_BEEN_ABORTED; -+ else /* queue for completion */ -+ { -+ rxd->RxdMain->Len = EP_CONN_RESET; /* ensure ep_rxd_status() fails */ -+ list_add_tail (&rxd->Link, &rxdList); -+ } -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_failover_callback: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&rxdList)) -+ { -+ EP_RXD *rxd = list_entry (rxdList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+void -+ep3rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *r) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) r; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rxdRail->Generic.RcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ (di->func)(di->arg, " ChainEvent=%x.%x %x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[0].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[0].ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[1].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[1].ev_Type))); -+ (di->func)(di->arg, " ChainEvent=%x.%x %x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[2].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[2].ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[3].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[3].ev_Type))); -+ (di->func)(di->arg, " DataEvent=%x.%x DoneEvent=%x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Type))); -+ (di->func)(di->arg, " Data=%x Len=%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_addr)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len))); -+} -+ -+void -+ep3rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t queue = commsRail->QueueDescs + rcvrRail->Generic.Rcvr->Service * sizeof (EP3_InputQueue); -+ E3_Addr qbase = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)); -+ E3_Addr qtop = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top)); -+ E3_uint32 qsize = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)); -+ int freeCount = 0; -+ int blockCount = 0; -+ unsigned long flags; -+ struct list_head *el; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ list_for_each (el, &rcvrRail->FreeDescList) -+ freeCount++; -+ list_for_each (el, &rcvrRail->DescBlockList) -+ blockCount++; -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ (di->func)(di->arg, " Rail %d FreeDesc %d (%d) Total %d Blocks %d %s\n", -+ rail->Generic.Number, rcvrRail->FreeDescCount, freeCount, rcvrRail->TotalDescCount, blockCount, -+ rcvrRail->ThreadWaiting ? "ThreadWaiting" : ""); -+ -+ (di->func)(di->arg, " InputQueue state=%x bptr=%x size=%x top=%x base=%x fptr=%x\n", -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_state)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_bptr)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr))); -+ (di->func)(di->arg, " event=%x.%x [%x.%x] wevent=%x.%x\n", -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Type)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Count)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Source)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Dest)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_wevent)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_wcount))); -+ -+ LockRcvrThread (rcvrRail); -+ { -+ E3_Addr nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr)); -+ EP_ENVELOPE env; -+ -+ if (nfptr == qtop) -+ nfptr = qbase; -+ else -+ nfptr += qsize; -+ -+ while (nfptr != elan3_sdram_readl (dev, queue + offsetof (E3_Queue, q_bptr))) -+ { -+ elan3_sdram_copyl_from_sdram (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr), -+ &env, sizeof (EP_ENVELOPE)); -+ -+ (di->func)(di->arg, " ENVELOPE Version=%x Attr=%x Xid=%08x.%08x.%016llx\n", -+ env.Version, env.Attr, env.Xid.Generation, env.Xid.Handle, (long long) env.Xid.Unique); -+ (di->func)(di->arg, " NodeId=%x Range=%x TxdRail=%x TxdMain=%x.%x.%x\n", -+ env.NodeId, env.Range, env.TxdRail, env.TxdMain.nmd_addr, -+ env.TxdMain.nmd_len, env.TxdMain.nmd_attr); -+ -+ -+ if (nfptr == qtop) -+ nfptr = qbase; -+ else -+ nfptr += qsize; -+ } -+ } -+ UnlockRcvrThread (rcvrRail); -+} -+ -+void -+ep3rcvr_fillout_rail_stats(EP_RCVR_RAIL *rcvr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP3_RCVR_RAIL * ep4rcvr_rail = (EP3_RCVR_RAIL *) rcvr_rail; */ -+} -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcommsRx_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcommsRx_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcommsRx_elan4.c 2005-06-01 23:12:54.653430896 -0400 -@@ -0,0 +1,1758 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx_elan4.c,v 1.30.2.2 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+#define RCVR_TO_COMMS(rcvrRail) ((EP4_COMMS_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail) -+#define RCVR_TO_RAIL(rcvrRail) ((EP4_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail->Rail) -+#define RCVR_TO_DEV(rcvrRail) (RCVR_TO_RAIL(rcvrRail)->r_ctxt.ctxt_dev) -+#define RCVR_TO_SUBSYS(rcvrRail) (((EP_RCVR_RAIL *) rcvrRail)->Rcvr->Subsys) -+ -+#define RXD_TO_RCVR(txdRail) ((EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail) -+#define RXD_TO_RAIL(txdRail) RCVR_TO_RAIL(RXD_TO_RCVR(rxdRail)) -+ -+static void rxd_interrupt (EP4_RAIL *rail, void *arg); -+ -+static __inline__ void -+__ep4_rxd_assert_free (EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rxdRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ register int i, failed = 0; -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ if (((rxdRail)->rxd_main->rxd_sent[i] != EP4_STATE_FREE)) -+ failed |= (1 << i); -+ -+ if (((rxdRail)->rxd_main->rxd_failed != EP4_STATE_FREE)) -+ failed |= (1 << 5); -+ if (((rxdRail)->rxd_main->rxd_done != EP4_STATE_FREE)) -+ failed |= (1 << 6); -+ -+ if (sdram_assert) -+ { -+ if (((elan4_sdram_readq (RXD_TO_RAIL(rxdRail)->r_ctxt.ctxt_dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << 7); -+ for (i = 0; i < EP_MAXFRAG; i++) -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << (8 + i)); -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << 12); -+ if (((int)(elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)) >> 32) != -32)) -+ failed |= (1 << 13); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_free: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_free: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ (rxdRail)->rxd_main->rxd_sent[i] = EP4_STATE_FREE; -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_FREE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_FREE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writew (RXD_TO_RAIL(rxdRail)->r_ctxt.ctxt_dev, -+ (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType) + 4, 0); -+ -+ for (i = 0; i < EP_MAXFRAG; i++) -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType) + 4, -32); -+ } -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_free"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_assert_pending(EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rcvrRail); -+ register int failed = 0; -+ -+ failed |= ((rxdRail)->rxd_main->rxd_done != EP4_STATE_ACTIVE); -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_pending: %s - %d\n", file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_pending: %s - %d\n", file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_pending"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_assert_private(EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rxdRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ register int failed = 0; -+ -+ if (((rxdRail)->rxd_main->rxd_failed != EP4_STATE_ACTIVE)) failed |= (1 << 0); -+ if (((rxdRail)->rxd_main->rxd_done != EP4_STATE_PRIVATE)) failed |= (1 << 1); -+ -+ if (sdram_assert) -+ { -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)) >> 32) != 0)) failed |= (1 << 2); -+ if (((int) (elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)) >> 32) != -32)) failed |= (1 << 3); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_private: %s - %d\n", file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_private: %s - %d\n", file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_PRIVATE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType) + 4, -32); -+ } -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_private"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_private_to_free (EP4_RXD_RAIL *rxdRail) -+{ -+ register int i; -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_FREE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_FREE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_FREE; -+} -+ -+static __inline__ void -+__ep4_rxd_force_private (EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RAIL *rail = RXD_TO_RAIL(rxdRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_PRIVATE; -+ -+ if (sdram_assert) -+ elan4_sdram_writeq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+} -+ -+#define EP4_RXD_ASSERT_FREE(rxdRail) __ep4_rxd_assert_free(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_ASSERT_PENDING(rxdRail) __ep4_rxd_assert_pending(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_ASSERT_PRIVATE(rxdRail) __ep4_rxd_assert_private(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_PRIVATE_TO_FREE(rxdRail) __ep4_rxd_private_to_free(rxdRail) -+#define EP4_RXD_FORCE_PRIVATE(rxdRail) __ep4_rxd_force_private(rxdRail) -+ -+static int -+alloc_rxd_block (EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_RXD_RAIL_BLOCK *blk; -+ EP4_RXD_RAIL_MAIN *rxdMain; -+ EP_ADDR rxdMainAddr; -+ sdramaddr_t rxdElan; -+ EP_ADDR rxdElanAddr; -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i, j; -+ -+ KMEM_ZALLOC (blk, EP4_RXD_RAIL_BLOCK *, sizeof (EP4_RXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((rxdElan = ep_alloc_elan (&rail->r_generic, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK, 0, &rxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((rxdMain = ep_alloc_main (&rail->r_generic, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK, 0, &rxdMainAddr)) == (EP4_RXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ep4_reserve_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK, 0) != 0) -+ { -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ -+ return 0; -+ } -+ -+ for (rxdRail = &blk->blk_rxds[0], i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rxdRail->rxd_generic.RcvrRail = &rcvrRail->rcvr_generic; -+ rxdRail->rxd_elan = rxdElan; -+ rxdRail->rxd_elan_addr = rxdElanAddr; -+ rxdRail->rxd_main = rxdMain; -+ rxdRail->rxd_main_addr = rxdMainAddr; -+ -+ /* reserve 128 bytes of "event" cq space for the chained STEN packets */ -+ if ((rxdRail->rxd_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, EP4_RXD_STEN_CMD_NDWORDS)) == NULL) -+ goto failed; -+ -+ /* allocate a single word of "setevent" command space */ -+ if ((rxdRail->rxd_scq = ep4_get_ecq (rail, EP4_ECQ_SINGLE, 1)) == NULL) -+ { -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ goto failed; -+ } -+ -+ /* initialise the completion events */ -+ for (j = 0; j <= EP_MAXFRAG; j++) -+ rxdMain->rxd_sent[i] = EP4_STATE_FREE; -+ -+ rxdMain->rxd_done = EP4_STATE_FREE; -+ rxdMain->rxd_failed = EP4_STATE_FREE; -+ -+ /* initialise the scq for the thread */ -+ rxdMain->rxd_scq = rxdRail->rxd_scq->ecq_addr; -+ -+ /* initialise the "start" event to copy the first STEN packet into the command queue */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_START_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0])); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ /* initialise the "chain" events to copy the next STEN packet into the command queue */ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ { -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j+1])); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ } -+ -+ /* initialise the portions of the sten packets which don't change */ -+ for (j = 0; j < EP_MAXFRAG+1; j++) -+ { -+ if (j < EP_MAXFRAG) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_dma_dstEvent), -+ rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j])); -+ else -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_dma_dstEvent), -+ rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_guard), -+ GUARD_CMD | GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_sent[j]))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_fail_guard), -+ GUARD_CMD | GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_fail_setevent), -+ SET_EVENT_CMD | (rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_nop_cmd), -+ NOP_CMD); -+ } -+ -+ /* register a main interrupt cookie */ -+ ep4_register_intcookie (rail, &rxdRail->rxd_intcookie, rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ rxd_interrupt, rxdRail); -+ -+ /* initialise the command stream for the done event */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_done))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (rxdRail->rxd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ /* initialise the command stream for the fail event */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_failed))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_write_value), -+ EP4_STATE_FAILED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (rxdRail->rxd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ /* initialise the done and fail events */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ /* initialise the pointer to the main memory portion */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_main), -+ rxdMainAddr); -+ -+ /* move onto next descriptor */ -+ rxdElan += EP4_RXD_RAIL_ELAN_SIZE; -+ rxdElanAddr += EP4_RXD_RAIL_ELAN_SIZE; -+ rxdMain = (EP4_RXD_RAIL_MAIN *) ((unsigned long) rxdMain + EP4_RXD_RAIL_MAIN_SIZE); -+ rxdMainAddr += EP4_RXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_add (&blk->blk_link, &rcvrRail->rcvr_blocklist); -+ -+ rcvrRail->rcvr_totalcount += EP4_NUM_RXD_PER_BLOCK; -+ rcvrRail->rcvr_freecount += EP4_NUM_RXD_PER_BLOCK; -+ -+ for (i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++) -+ list_add (&blk->blk_rxds[i].rxd_generic.Link, &rcvrRail->rcvr_freelist); -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ return 1; -+ -+ failed: -+ while (--i >= 0) -+ { -+ rxdRail--; -+ -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ ep4_put_ecq (rail, rxdRail->rxd_scq, 1); -+ -+ ep4_deregister_intcookie (rail, &rxdRail->rxd_intcookie); -+ } -+ -+ ep4_release_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ -+ return 0; -+} -+ -+ -+static void -+free_rxd_block (EP4_RCVR_RAIL *rcvrRail, EP4_RXD_RAIL_BLOCK *blk) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_del (&blk->blk_link); -+ -+ rcvrRail->rcvr_totalcount -= EP4_NUM_RXD_PER_BLOCK; -+ -+ for (rxdRail = &blk->blk_rxds[0], i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rcvrRail->rcvr_freecount--; -+ -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ ep4_put_ecq (rail, rxdRail->rxd_scq, 1); -+ -+ ep4_deregister_intcookie (rail, &rxdRail->rxd_intcookie); -+ -+ list_del (&rxdRail->rxd_generic.Link); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ ep4_release_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_rxds[0].rxd_elan_addr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+} -+ -+static EP4_RXD_RAIL * -+get_rxd_rail (EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = RCVR_TO_SUBSYS(rcvrRail); -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ if (list_empty (&rcvrRail->rcvr_freelist)) -+ rxdRail = NULL; -+ else -+ { -+ rxdRail = list_entry (rcvrRail->rcvr_freelist.next, EP4_RXD_RAIL, rxd_generic.Link); -+ -+ EP4_RXD_ASSERT_FREE(rxdRail); -+ -+ list_del (&rxdRail->rxd_generic.Link); -+ -+ rcvrRail->rcvr_freecount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (rcvrRail->rcvr_freecount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (rxdRail); -+} -+ -+static void -+free_rxd_rail (EP4_RCVR_RAIL *rcvrRail, EP4_RXD_RAIL *rxdRail) -+{ -+ unsigned long flags; -+ -+ EP4_RXD_ASSERT_FREE(rxdRail); -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_add (&rxdRail->rxd_generic.Link, &rcvrRail->rcvr_freelist); -+ -+ rcvrRail->rcvr_freecount++; -+ -+ if (rcvrRail->rcvr_freewaiting) -+ { -+ rcvrRail->rcvr_freewaiting--; -+ kcondvar_wakeupall (&rcvrRail->rcvr_freesleep, &rcvrRail->rcvr_freelock); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+} -+ -+static void -+bind_rxd_rail (EP_RXD *rxd, EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rxdRail->rxd_generic.RcvrRail); -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ -+ EPRINTF3 (DBG_RCVR, "%s: bind_rxd_rail: rxd=%p rxdRail=%p\n", rail->r_generic.Name, rxd, rxdRail); -+ -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_rxd), rxd->NmdMain.nmd_addr); /* PCI write */ -+ -+ rxd->RxdRail = &rxdRail->rxd_generic; -+ rxdRail->rxd_generic.Rxd = rxd; -+} -+ -+static void -+unbind_rxd_rail (EP_RXD *rxd, EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ ASSERT (rxd->RxdRail == &rxdRail->rxd_generic && rxdRail->rxd_generic.Rxd == rxd); -+ -+ EP4_RXD_ASSERT_PRIVATE (rxdRail); -+ -+ EPRINTF3 (DBG_RCVR, "%s: unbind_rxd_rail: rxd=%p rxdRail=%p\n", RCVR_TO_RAIL(rcvrRail)->r_generic.Name, rxd, rxdRail); -+ -+ rxd->RxdRail = NULL; -+ rxdRail->rxd_generic.Rxd = NULL; -+ -+ if (rcvrRail->rcvr_cleanup_waiting) -+ kcondvar_wakeupall (&rcvrRail->rcvr_cleanup_sleep, &rxd->Rcvr->Lock); -+ rcvrRail->rcvr_cleanup_waiting = 0; -+ -+ EP4_RXD_PRIVATE_TO_FREE (rxdRail); -+} -+ -+ -+static void -+rcvr_stall_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ EPRINTF1 (DBG_RCVR, "rcvr_stall_interrupt: rcvrRail %p thread halted\n", rcvrRail); -+ -+ rcvrRail->rcvr_thread_halted = 1; -+ -+ kcondvar_wakeupall (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+rcvr_stall_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ sdramaddr_t qdesc = ((EP4_COMMS_RAIL *) commsRail)->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ E4_uint64 qbptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ -+ /* Mark the queue as full by writing the fptr */ -+ if (qbptr == (rcvrRail->rcvr_slots_addr + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1))) -+ elan4_sdram_writeq (dev, qdesc + offsetof (E4_InputQueue, q_fptr), rcvrRail->rcvr_slots_addr); -+ else -+ elan4_sdram_writeq (dev, qdesc + offsetof (E4_InputQueue, q_fptr), qbptr + EP_INPUTQ_SIZE); -+ -+ /* Notify the thread that it should stall after processing any outstanding envelopes */ -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), -+ rcvrRail->rcvr_stall_intcookie.int_val); -+ -+ /* Issue a swtevent to the queue event to wake the thread up */ -+ ep4_set_event_cmd (rcvrRail->rcvr_resched, rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent)); -+} -+ -+static void -+rxd_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) arg; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ EP4_RXD_RAIL_MAIN *rxdMain = rxdRail->rxd_main; -+ unsigned long delay = 1; -+ EP_RXD *rxd; -+ EP_ENVELOPE *env; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ for (;;) -+ { -+ if (rxdMain->rxd_done == EP4_STATE_FINISHED || rxdMain->rxd_failed == EP4_STATE_FAILED) -+ break; -+ -+ /* The write to rxd_done could be held up in the PCI bridge even though -+ * we've seen the interrupt cookie. Unlike elan3, there is no possibility -+ * of spurious interrupts since we flush the command queues on node -+ * disconnection and the txcallback mechanism */ -+ mb(); -+ -+ if (delay > EP4_EVENT_FIRING_TLIMIT) -+ { -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "rxd_interrupt - not finished\n"); -+ return; -+ } -+ DELAY(delay); -+ delay <<= 1; -+ } -+ -+ if (rxdMain->rxd_done != EP4_STATE_FINISHED) -+ { -+ EPRINTF8 (DBG_RETRY, "%s: rxd_interrupt: rxdRail %p retry: done=%d failed=%d NodeId=%d XID=%08x.%08x.%016llx\n", -+ rail->r_generic.Name, rxdRail, (int)rxdMain->rxd_done, (int)rxdMain->rxd_failed, rxdRail->rxd_generic.Rxd->RxdMain->Envelope.NodeId, -+ rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Generation, rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Handle, -+ rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Unique); -+ -+ spin_lock (&rcvrRail->rcvr_retrylock); -+ -+ rxdRail->rxd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; /* XXXX backoff ? */ -+ -+ list_add_tail (&rxdRail->rxd_retry_link, &rcvrRail->rcvr_retrylist); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, rxdRail->rxd_retry_time); -+ spin_unlock (&rcvrRail->rcvr_retrylock); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ -+ rxd = rxdRail->rxd_generic.Rxd; -+ env = &rxd->RxdMain->Envelope; -+ -+ /* -+ * Note, since the thread will have sent the remote dma packet before copying -+ * the envelope, we must check that it has completed doing this, we do this -+ * by acquiring the spinlock against the thread which it only drops once it's -+ * completed. -+ */ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ { -+ EP4_SPINENTER (rail->r_ctxt.ctxt_dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), -+ &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ EP4_SPINEXIT (rail->r_ctxt.ctxt_dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), -+ &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION && rxd->RxdMain->Len != EP_RXD_PENDING); -+ } -+ -+ EPRINTF8 (DBG_RCVR, "%s: rxd_interrupt: rxd %p finished from %d XID %08x.%08x.%016llx len %d attr %x\n", rail->r_generic.Name, -+ rxd, rxd->RxdMain->Envelope.NodeId, rxd->RxdMain->Envelope.Xid.Generation, rxd->RxdMain->Envelope.Xid.Handle, -+ rxd->RxdMain->Envelope.Xid.Unique, rxd->RxdMain->Len, rxd->RxdMain->Envelope.Attr); -+ -+ rxdMain->rxd_done = EP4_STATE_PRIVATE; -+ rxd->Data.nmd_attr = EP_RAIL2RAILMASK (rail->r_generic.Number); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxd->RxdMain->Len >= 0 && EP_IS_RPC(env->Attr)) -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ else -+ { -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ } -+ -+ if (rxd->RxdMain->Len >= 0) { -+ INC_STAT(rcvrRail->rcvr_generic.stats,rx); -+ ADD_STAT(rcvrRail->rcvr_generic.stats,rx_len,rxd->RxdMain->Len); -+ INC_STAT(rail->r_generic.Stats,rx); -+ ADD_STAT(rail->r_generic.Stats,rx_len,rxd->RxdMain->Len); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ ep_rxd_received (rxd); -+ -+ break; -+ -+ case EP_RXD_PUT_ACTIVE: -+ case EP_RXD_GET_ACTIVE: -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler (rxd); -+ break; -+ -+ case EP_RXD_COMPLETE_ACTIVE: -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler(rxd); -+ break; -+ -+ default: -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ printk ("%s: rxd_interrupt: rxd %p in invalid state %d\n", rail->r_generic.Name, rxd, rxd->State); -+ /* NOTREACHED */ -+ } -+} -+ -+static void -+ep4rcvr_flush_filtering (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t qdesc = commsRail->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ E4_Addr qbase = rcvrRail->rcvr_slots_addr; -+ E4_Addr qlast = qbase + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1); -+ E4_uint64 qfptr, qbptr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ /* zip down the input queue and invalidate any envelope we find to a node which is locally passivated */ -+ qfptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_fptr)); -+ qbptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ -+ while (qfptr != qbptr) -+ { -+ unsigned int nodeId = elan4_sdram_readl (dev, rcvrRail->rcvr_slots + (qfptr - qbase) + offsetof (EP_ENVELOPE, NodeId)); -+ -+ EPRINTF3 (DBG_DISCON, "%s: ep4rcvr_flush_filtering: nodeId=%d State=%d\n", rail->r_generic.Name, nodeId, rail->r_generic.Nodes[nodeId].State); -+ -+ if (rail->r_generic.Nodes[nodeId].State == EP_NODE_LOCAL_PASSIVATE) -+ elan4_sdram_writel (dev, rcvrRail->rcvr_slots + (qfptr - qbase) + offsetof (EP_ENVELOPE, Version), 0); -+ -+ if (qfptr != qlast) -+ qfptr += EP_INPUTQ_SIZE; -+ else -+ qfptr = qbase; -+ } -+ -+ /* Insert an setevent command into the thread's command queue -+ * to ensure that all sten packets have completed */ -+ elan4_guard (rcvrRail->rcvr_ecq->ecq_cq, GUARD_ALL_CHANNELS); -+ ep4comms_flush_setevent (commsRail, rcvrRail->rcvr_ecq->ecq_cq); -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+ep4rcvr_flush_flushing (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ /* remove any sten packates which are retrying to nodes which are being passivated */ -+ spin_lock_irqsave (&rcvrRail->rcvr_retrylock, flags); -+ list_for_each_safe (el, nel, &rcvrRail->rcvr_retrylist) { -+ EP4_RXD_RAIL *rxdRail = list_entry (el, EP4_RXD_RAIL, rxd_retry_link); -+ EP_ENVELOPE *env = &rxdRail->rxd_generic.Rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF2 (DBG_XMTR, "%s; ep4rcvr_flush_flushing: removing rxdRail %p from retry list\n", rail->r_generic.Name, rxdRail); -+ -+ list_del (&rxdRail->rxd_retry_link); -+ } -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_retrylock, flags); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL (rxdRail, rcvrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF5 (DBG_DISCON, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p state %d elan node %d\n", -+ rail->r_generic.Name, rcvr, rxd, (int)rxdRail->rxd_main->rxd_done, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_flush_flushing: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - passive\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ default: -+ EP4_ASSERT (rail, EP_IS_RPC(env->Attr)); -+ -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - active\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP_INVALIDATE_XID (rxd->MsgXid); /* Ignore any previous NMD map responses */ -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_flush_flushing: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_flush_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep4rcvr_flush_filtering (rcvr, rcvrRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep4rcvr_flush_flushing (rcvr, rcvrRail); -+ break; -+ } -+} -+ -+void -+ep4rcvr_failover_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#if SUPPORT_RAIL_FAILOVER -+ EP_SYS *sys = subsys->Subsys.Sys; -+#endif -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+#if SUPPORT_RAIL_FAILOVER -+ EP_NODE *node = &sys->Nodes[env->NodeId]; -+ EP_MANAGER_MSG_BODY msgBody; -+#endif -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ EPRINTF5 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rcvr %p rxd %p elan node %d state %d\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId, (int)rxdRail->rxd_main->rxd_done); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_failover_callback: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP4_RXD_FORCE_PRIVATE(rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP4_ASSERT (rail, EP_IS_RPC(env->Attr)); -+ -+#if SUPPORT_RAIL_FAILOVER -+ /* XXXX - no rail failover for now .... */ -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE && !EP_IS_NO_FAILOVER(env->Attr)) /* incomplete RPC, which can be failed over */ -+ { -+ EPRINTF6 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rxd %p State %d Xid %llxx MsgXid %llxx nodeId %d - failover\n", -+ rail->r_generic.Name, rxd, rxd->State, env->Xid.Unique, rxd->MsgXid.Unique, env->NodeId); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ /* XXXX maybe only send the message if the node failover retry is now ? */ -+ msgBody.Failover.Xid = env->Xid; -+ msgBody.Failover.Railmask = node->ConnectedRails; -+ -+ ep_send_message (&rail->r_generic, env->NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST, rxd->MsgXid, &msgBody); -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+#endif -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_failover_callback: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ EPRINTF3 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rxd %p nodeId %d - finished\n", rail->r_generic.Name, rxd, env->NodeId); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_disconnect_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ EPRINTF5 (DBG_DISCON, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p elan node %d state %x\n", rail->r_generic.Name, rcvr, rxd, env->NodeId, rxd->State); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_disconnect_callback: rxd state is free but bound to a rail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* remark it as pending if it was partially received */ -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE || rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE) /* incomplete RPC */ -+ { -+ EPRINTF5 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d state %x - not able to failover\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId, rxd->State); -+ -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* Ignore any previous NMD/failover responses */ -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ /* Remove from active list */ -+ list_del (&rxd->Link); -+ -+ if (rxd->State == EP_RXD_RPC_IN_PROGRESS) /* ownder by user .... */ -+ rxd->State = EP_RXD_BEEN_ABORTED; -+ else /* queue for completion */ -+ { -+ rxd->RxdMain->Len = EP_CONN_RESET; /* ensure ep_rxd_status() fails */ -+ list_add_tail (&rxd->Link, &rxdList); -+ } -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_disconnect_callback: rxd state is aborted but bound to a rail\n"); -+ break; -+ } -+ -+ printk ("%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&rxdList)) -+ { -+ EP_RXD *rxd = list_entry (rxdList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+void -+ep4rcvr_neterr_flush (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ /* Insert an setevent command into the thread's command queue -+ * to ensure that all sten packets have completed */ -+ elan4_guard (rcvrRail->rcvr_ecq->ecq_cq, GUARD_ALL_CHANNELS); -+ ep4comms_flush_setevent (commsRail, rcvrRail->rcvr_ecq->ecq_cq); -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_neterr_check (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || env->NodeId != nodeId) -+ continue; -+ -+ if (rxd->State == EP_RXD_RECEIVE_ACTIVE || rxd->State == EP_RXD_GET_ACTIVE) -+ { -+ EP_NETERR_COOKIE cookie; -+ unsigned int first, this; -+ -+ if (rxd->State == EP_RXD_RECEIVE_ACTIVE) -+ first = (EP_MAXFRAG+1) - (( EP_IS_MULTICAST(env->Attr) ? 1 : 0) + (env->nFrags == 0 ? 1 : env->nFrags)); -+ else -+ first = (EP_MAXFRAG+1) - rxd->nFrags; -+ -+ for (this = first; this < (EP_MAXFRAG+1); this++) -+ if (rxdRail->rxd_main->rxd_sent[this] == EP4_STATE_ACTIVE) -+ break; -+ -+ if (this > first) -+ { -+ /* Look at the last completed STEN packet and if it's neterr cookie matches, then change -+ * the rxd to look the same as if the sten packet had failed and then schedule it for retry */ -+ cookie = elan4_sdram_readq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[--this].c_cookie)); -+ -+ if (cookie == cookies[0] || cookie == cookies[1]) -+ { -+ EPRINTF5 (DBG_NETWORK_ERROR, "%s: ep4rcvr_neterr_check: cookie <%lld%s%s%s%s> matches rxd %p rxdRail %p this %d\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(cookie), rxd, rxdRail, this); -+ -+ printk ("%s: ep4rcvr_neterr_check: cookie <%lld%s%s%s%s> matches rxd %p rxdRail %p this %d : time %ld\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(cookie), rxd, rxdRail, this, rxdRail->rxd_retry_time); -+ -+ rxdRail->rxd_main->rxd_sent[this] = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_FAILED; -+ -+ spin_lock (&rcvrRail->rcvr_retrylock); -+ -+ ASSERT (rxdRail->rxd_retry_time == 0); -+ -+ rxdRail->rxd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; -+ -+ list_add_tail (&rxdRail->rxd_retry_link, &rcvrRail->rcvr_retrylist); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, rxdRail->rxd_retry_time); -+ -+ spin_unlock (&rcvrRail->rcvr_retrylock); -+ } -+ } -+ } -+ } -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+int -+ep4rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *r) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_RXD_RAIL *rxdRail; -+ register int i; -+ -+ ASSERT (SPINLOCK_HELD(&rxd->Rcvr->Lock)); -+ -+ if ((rxdRail = get_rxd_rail (rcvrRail)) == NULL) -+ return 0; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ EPRINTF6 (DBG_RCVR, "%s: ep4rcvr_queue_rxd: rcvr %p rxd %p rxdRail %p buffer %x len %x\n", -+ rail->r_generic.Name, rxd->Rcvr, rxd, rxdRail, rxd->Data.nmd_addr, rxd->Data.nmd_len); -+ -+ /* bind the rxdRail and rxd together */ -+ bind_rxd_rail (rxd, rxdRail); -+ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_addr), rxd->Data.nmd_addr); /* PCI write */ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_len), rxd->Data.nmd_len); /* PCI write */ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_attr), rxd->Data.nmd_attr); /* PCI write */ -+ -+ /* Mark as active */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x00, /* %r0 */ -+ ep_symbol (&rail->r_threadcode, "c_queue_rxd")); -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x10, /* %r2 */ -+ rcvrRail->rcvr_elan_addr); -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x18, /* %r3 */ -+ rxdRail->rxd_elan_addr); -+ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_START_CMD_NDWORDS)); -+ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_start)); -+ -+ return 1; -+} -+ -+void -+ep4rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags; -+ EP4_RXD_DMA_CMD cmd; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_PUT_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = local->nmd_addr; -+ cmd.c_dma_dstAddr = remote->nmd_addr; -+ if (i == (nFrags-1)) -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done); -+ else -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]); -+ cmd.c_dma_dstEvent = 0; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_put: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i]), sizeof (EP4_RXD_DMA_CMD)); -+ } -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags-1; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ ASSERT (rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep4rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_GET_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_get rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, i, remote->nmd_addr, local->nmd_addr, remote->nmd_len); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_open), -+ OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(env->NodeId))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_trans), -+ SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_cookie), -+ ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_STEN); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_typeSize), -+ E4_DMA_TYPE_SIZE (local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_cookie), -+ ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_vproc), -+ EP_VP_DATA (rail->r_generic.Position.pos_nodeid)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_srcAddr), -+ remote->nmd_addr); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_dstAddr), -+ local->nmd_addr); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_srcEvent), -+ 0); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_dstEvent), -+ i == (nFrags-1) ? rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done) : -+ rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i])); -+ } -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags-1; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ ASSERT (rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep4rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags - 1; -+ EP4_RXD_DMA_CMD cmd; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_COMPLETE_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = local->nmd_addr; -+ cmd.c_dma_dstAddr = remote->nmd_addr; -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]); -+ cmd.c_dma_dstEvent = 0; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_complete: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i]), sizeof (EP4_RXD_DMA_CMD)); -+ } -+ -+ /* Initialise the status block dma */ -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(EP_STATUSBLK_SIZE, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = rxd->NmdMain.nmd_addr + offsetof (EP_RXD_MAIN, StatusBlk); -+ cmd.c_dma_dstAddr = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, StatusBlk); -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done); -+ cmd.c_dma_dstEvent = env->TxdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_done);; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF6 (DBG_RCVR, "%s: ep4rcvr_rpc_complete: rxd %p [XID=%llx] statusblk source=%08x dest=%08x len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, (int) cmd.c_dma_srcAddr, (int) cmd.c_dma_dstAddr, EP_STATUSBLK_SIZE); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[EP_MAXFRAG]), sizeof (EP4_RXD_DMA_CMD)); -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ ASSERT (rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, CompleteRPC, len); -+} -+ -+EP_RXD * -+ep4rcvr_steal_rxd (EP_RCVR_RAIL *r) -+{ -+ /* XXXX - TBD */ -+ return NULL; -+} -+ -+long -+ep4rcvr_check (EP_RCVR_RAIL *r, long nextRunTime) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ -+ if (rcvrRail->rcvr_freecount < ep_rxd_lowat && !alloc_rxd_block (rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow rxd rail pool\n", rail->r_generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return nextRunTime; -+} -+ -+unsigned long -+ep4rcvr_retry (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_retrylock, flags); -+ while (! list_empty (&rcvrRail->rcvr_retrylist)) -+ { -+ EP4_RXD_RAIL *rxdRail = list_entry (rcvrRail->rcvr_retrylist.next, EP4_RXD_RAIL, rxd_retry_link); -+ EP_ENVELOPE *env = &rxdRail->rxd_generic.Rxd->RxdMain->Envelope; -+ unsigned int first = (EP_MAXFRAG+1) - ((env->Attr & EP_MULTICAST ? 1 : 0) + (env->nFrags == 0 ? 1 : env->nFrags)); -+ -+ if (BEFORE (lbolt, rxdRail->rxd_retry_time)) -+ { -+ if (nextRunTime == 0 || AFTER (nextRunTime, rxdRail->rxd_retry_time)) -+ nextRunTime = rxdRail->rxd_retry_time; -+ -+ break; -+ } -+ -+ list_del (&rxdRail->rxd_retry_link); -+ rxdRail->rxd_retry_time = 0; -+ -+ /* determine which sten packet to resubmit */ -+ for (; first < (EP_MAXFRAG+1); first++) -+ if (rxdRail->rxd_main->rxd_sent[first] == EP4_STATE_ACTIVE) -+ break; -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4rcvr_retry: rxdRail %p, reissuing sten[%d]\n", rail->r_generic.Name, rxdRail, first); -+ -+ /* re-initialise the fail event */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ -+ /* re-initialise the chain event to resubmit this sten packet */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first-1].ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ /* finally issue the setevent to start the chain again */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_retrylock, flags); -+ -+ return nextRunTime; -+} -+ -+void -+ep4rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t qdescs = ((EP4_COMMS_RAIL *) commsRail)->r_descs; -+ EP4_RCVR_RAIL *rcvrRail; -+ E4_InputQueue qdesc; -+ E4_ThreadRegs tregs; -+ sdramaddr_t stack; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (rcvrRail, EP4_RCVR_RAIL *, sizeof (EP4_RCVR_RAIL), 1); -+ -+ spin_lock_init (&rcvrRail->rcvr_freelock); -+ INIT_LIST_HEAD (&rcvrRail->rcvr_freelist); -+ INIT_LIST_HEAD (&rcvrRail->rcvr_blocklist); -+ -+ kcondvar_init (&rcvrRail->rcvr_cleanup_sleep); -+ kcondvar_init (&rcvrRail->rcvr_freesleep); -+ -+ INIT_LIST_HEAD (&rcvrRail->rcvr_retrylist); -+ spin_lock_init (&rcvrRail->rcvr_retrylock); -+ -+ rcvrRail->rcvr_generic.CommsRail = commsRail; -+ rcvrRail->rcvr_generic.Rcvr = rcvr; -+ -+ rcvrRail->rcvr_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_RCVR_RAIL_MAIN), 0, &rcvrRail->rcvr_main_addr); -+ rcvrRail->rcvr_elan = ep_alloc_elan (&rail->r_generic, sizeof (EP4_RCVR_RAIL_ELAN), 0, &rcvrRail->rcvr_elan_addr); -+ rcvrRail->rcvr_slots = ep_alloc_elan (&rail->r_generic, EP_INPUTQ_SIZE * rcvr->InputQueueEntries, 0, &rcvrRail->rcvr_slots_addr); -+ stack = ep_alloc_elan (&rail->r_generic, EP4_STACK_SIZE, 0, &rcvrRail->rcvr_stack); -+ -+ /* allocate a command queue for the thread to use, plus space for it to wait/reschedule */ -+ rcvrRail->rcvr_ecq = ep4_alloc_ecq (rail, CQ_Size64K); -+ rcvrRail->rcvr_resched = ep4_get_ecq (rail, EP4_ECQ_ATOMIC, 8); -+ -+ ep4_register_intcookie (rail, &rcvrRail->rcvr_stall_intcookie, rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), -+ rcvr_stall_interrupt, rcvrRail); -+ -+ /* Initialise the elan portion */ -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent.ev_CountAndType), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_halt.ev_CountAndType), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_tailp), -+ rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head)); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qbase), rcvrRail->rcvr_slots_addr); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qlast), -+ rcvrRail->rcvr_slots_addr + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1)); -+ -+ /* Initialise the main memory portion */ -+ rcvrRail->rcvr_main->rcvr_thread_lock = 0; -+ -+ /* Install our retry handler */ -+ rcvrRail->rcvr_retryops.op_func = ep4rcvr_retry; -+ rcvrRail->rcvr_retryops.op_arg = rcvrRail; -+ -+ ep4_add_retry_ops (rail, &rcvrRail->rcvr_retryops); -+ -+ /* Update the queue desriptor */ -+ qdesc.q_bptr = rcvrRail->rcvr_slots_addr; -+ qdesc.q_fptr = rcvrRail->rcvr_slots_addr; -+ qdesc.q_control = E4_InputQueueControl (rcvrRail->rcvr_slots_addr, rcvrRail->rcvr_slots_addr + (EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1)), EP_INPUTQ_SIZE); -+ qdesc.q_event = rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent); -+ -+ ep4_write_qdesc (rail, qdescs + (rcvr->Service * EP_QUEUE_DESC_SIZE), &qdesc); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->r_generic.Number] = &rcvrRail->rcvr_generic; -+ rcvr->RailMask |= EP_RAIL2RAILMASK (rail->r_generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ { -+ sdramaddr_t stackTop = stack + EP4_STACK_SIZE; -+ E4_Addr stackTopAddr = rcvrRail->rcvr_stack + EP4_STACK_SIZE; -+ -+ ep4_init_thread (rail, &tregs, stackTop, stackTopAddr, ep_symbol (&rail->r_threadcode, "ep4comms_rcvr"), 6, -+ (E4_uint64) rail->r_elan_addr, (E4_uint64) rcvrRail->rcvr_elan_addr, (E4_uint64) rcvrRail->rcvr_main_addr, -+ (E4_uint64) EP_MSGQ_ADDR(rcvr->Service), (E4_uint64) rcvrRail->rcvr_ecq->ecq_addr, (E4_uint64) rcvrRail->rcvr_resched->ecq_addr); -+ } -+ -+ /* Issue the command to the threads private command queue */ -+ elan4_run_thread_cmd (rcvrRail->rcvr_ecq->ecq_cq, &tregs); -+ -+ ep_procfs_rcvr_add_rail(&(rcvrRail->rcvr_generic)); -+} -+ -+void -+ep4rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rcvr->Rails[rail->r_generic.Number]; -+ ELAN4_HALTOP haltop; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ ep_procfs_rcvr_del_rail(&(rcvrRail->rcvr_generic)); -+ -+ /* Run a halt operation to mark the input queue as full and -+ * request the thread to halt */ -+ haltop.op_mask = INT_DiscardingHighPri | INT_TProcHalted; -+ haltop.op_function = rcvr_stall_haltop; -+ haltop.op_arg = rcvrRail; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &haltop); -+ -+ /* Wait for the thread to tell us it's processed the input queue */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ while (! rcvrRail->rcvr_thread_halted) -+ kcondvar_wait (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock, &flags); -+ rcvrRail->rcvr_thread_halted = 0; -+ -+ /* flag the rail as no longer available */ -+ rcvr->RailMask &= ~EP_RAIL2RAILMASK (rail->r_generic.Number); -+ -+ /* wait for all active communications to terminate */ -+ for (;;) -+ { -+ int mustWait = 0; -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail) && rxd->RxdMain->Len != EP_RXD_PENDING) -+ { -+ mustWait++; -+ break; -+ } -+ } -+ -+ if (! mustWait) -+ break; -+ -+ rcvrRail->rcvr_cleanup_waiting++; -+ kcondvar_wait (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point all rxd's in the list that are bound to the deleting rail are pending */ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail)) -+ { -+ EP4_RXD_ASSERT_PENDING (rxdRail); -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* wait for all rxd's for this rail to become free */ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ while (rcvrRail->rcvr_freecount != rcvrRail->rcvr_totalcount) -+ { -+ rcvrRail->rcvr_freewaiting++; -+ kcondvar_wait (&rcvrRail->rcvr_freesleep, &rcvrRail->rcvr_freelock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ /* can now remove the rail as it can no longer be used */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->r_generic.Number] = NULL; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* all the rxd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (rcvrRail->rcvr_totalcount == rcvrRail->rcvr_freecount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&rcvrRail->rcvr_blocklist)) -+ free_rxd_block (rcvrRail, list_entry(rcvrRail->rcvr_blocklist.next, EP4_RXD_RAIL_BLOCK , blk_link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((rcvrRail->rcvr_totalcount == 0) && (rcvrRail->rcvr_totalcount == rcvrRail->rcvr_freecount)); -+ -+ ep4_remove_retry_ops (rail, &rcvrRail->rcvr_retryops); -+ -+ ep4_deregister_intcookie (rail, &rcvrRail->rcvr_stall_intcookie); -+ -+ ep4_put_ecq (rail, rcvrRail->rcvr_resched, 8); -+ ep4_free_ecq (rail, rcvrRail->rcvr_ecq); -+ -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_stack, EP4_STACK_SIZE); -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_slots_addr, EP_INPUTQ_SIZE * rcvr->InputQueueEntries); -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_elan_addr, sizeof (EP4_RCVR_RAIL_ELAN)); -+ ep_free_main (&rail->r_generic, rcvrRail->rcvr_main_addr, sizeof (EP4_RCVR_RAIL_MAIN)); -+ -+ KMEM_FREE (rcvrRail, sizeof (EP4_RCVR_RAIL)); -+} -+ -+void -+ep4rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *r) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) r; -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rxdRail->rxd_generic.RcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ int i; -+ -+ (di->func)(di->arg, " Rail %d rxd %p elan %lx(%x) main %p(%x) ecq %d scq %d debug %llx\n", rail->r_generic.Number, -+ rxdRail, rxdRail->rxd_elan, rxdRail->rxd_elan_addr, rxdRail->rxd_main, rxdRail->rxd_main_addr, -+ elan4_cq2num(rxdRail->rxd_ecq->ecq_cq), elan4_cq2num(rxdRail->rxd_scq->ecq_cq), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_debug))); -+ (di->func)(di->arg, " start %016llx %016llx %016llx [%016llx %016llx]\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_Params[1])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0].c_cookie)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0].c_dma_cookie))); -+ -+ for (i = 0; i < EP_MAXFRAG; i++) -+ (di->func)(di->arg, " chain[%d] %016llx %016llx %016llx [%016llx %016llx]\n", i, -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_Params[1])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[i+1].c_cookie)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[i+1].c_dma_cookie))); -+ (di->func)(di->arg, " done %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_Params[1])), -+ rxdRail->rxd_main->rxd_done); -+ (di->func)(di->arg, " fail %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_Params[1])), -+ rxdRail->rxd_main->rxd_failed); -+ (di->func)(di->arg, " next %016llx queued %016llx main %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_next)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_queued)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_main))); -+ (di->func)(di->arg, " sent %016llx %016llx %016llx %016llx %016llx\n", -+ rxdRail->rxd_main->rxd_sent[0], rxdRail->rxd_main->rxd_sent[1], rxdRail->rxd_main->rxd_sent[2], -+ rxdRail->rxd_main->rxd_sent[3], rxdRail->rxd_main->rxd_sent[4]); -+} -+ -+void -+ep4rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *r) -+{ -+ EP_RCVR *rcvr = r->Rcvr; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t rcvrElan = rcvrRail->rcvr_elan; -+ sdramaddr_t qdesc = commsRail->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ sdramaddr_t event = rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent); -+ unsigned int freeCount = 0; -+ unsigned int blockCount = 0; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ list_for_each (el, &rcvrRail->rcvr_freelist) -+ freeCount++; -+ list_for_each (el, &rcvrRail->rcvr_blocklist) -+ blockCount++; -+ spin_unlock_irqrestore(&rcvrRail->rcvr_freelock, flags); -+ -+ (di->func)(di->arg, " Rail %d elan %lx(%x) main %p(%x) ecq %d resched %d debug %llx\n", -+ rail->r_generic.Number, rcvrRail->rcvr_elan, rcvrRail->rcvr_elan_addr, -+ rcvrRail->rcvr_main, rcvrRail->rcvr_main_addr, elan4_cq2num(rcvrRail->rcvr_ecq->ecq_cq), -+ elan4_cq2num (rcvrRail->rcvr_resched->ecq_cq), -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_debug))); -+ (di->func)(di->arg, " free %d (%d) total %d blocks %d\n", -+ rcvrRail->rcvr_freecount, freeCount, rcvrRail->rcvr_totalcount, blockCount); -+ (di->func)(di->arg, " spinlock %016llx %016llx\n", rcvrRail->rcvr_main->rcvr_thread_lock, -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock))); -+ (di->func)(di->arg, " queue: bptr %016llx fptr %016llx control %016llx (base %lx %x)\n", -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)), -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_fptr)), -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_control)), -+ rcvrRail->rcvr_slots, rcvrRail->rcvr_slots_addr); -+ (di->func)(di->arg, " event %016llx %016llx %016llx\n", -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_CountAndType)), -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_Params[0])), -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_Params[1]))); -+ (di->func)(di->arg, " pending_tailp %016llx pending_head %016llx\n", -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_tailp)), -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head))); -+} -+ -+void -+ep4rcvr_fillout_rail_stats(EP_RCVR_RAIL *rcvr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP4_RCVR_RAIL * ep4rcvr_rail = (EP4_RCVR_RAIL *) rcvr_rail; */ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcommsTx.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcommsTx.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcommsTx.c 2005-06-01 23:12:54.654430744 -0400 -@@ -0,0 +1,919 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx.c,v 1.25.2.5 2004/12/09 10:02:42 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+ -+unsigned int ep_txd_lowat = 5; -+ -+static int -+AllocateTxdBlock (EP_XMTR *xmtr, EP_ATTRIBUTE attr, EP_TXD **txdp) -+{ -+ EP_TXD_BLOCK *blk; -+ EP_TXD *txd; -+ EP_TXD_MAIN *pTxdMain; -+ int i; -+ unsigned long flags; -+ -+ EPRINTF1 (DBG_XMTR, "AllocateTxdBlock: xmtr=%p\n", xmtr); -+ -+ KMEM_ZALLOC (blk, EP_TXD_BLOCK *, sizeof (EP_TXD_BLOCK), ! (attr & EP_NO_SLEEP)); -+ -+ if (blk == NULL) -+ return -ENOMEM; -+ -+ if ((pTxdMain = ep_shared_alloc_main (xmtr->Subsys->Subsys.Sys, EP_TXD_MAIN_SIZE * EP_NUM_TXD_PER_BLOCK, attr, &blk->NmdMain)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP_TXD_BLOCK)); -+ return -ENOMEM; -+ } -+ -+ for (txd = &blk->Txd[0], i = 0; i < EP_NUM_TXD_PER_BLOCK; i++, txd++) -+ { -+ txd->Xmtr = xmtr; -+ txd->TxdMain = pTxdMain; -+ -+ ep_nmd_subset (&txd->NmdMain, &blk->NmdMain, (i * EP_TXD_MAIN_SIZE), EP_TXD_MAIN_SIZE); -+ -+ /* move onto next descriptor */ -+ pTxdMain = (EP_TXD_MAIN *) ((unsigned long) pTxdMain + EP_TXD_MAIN_SIZE); -+ } -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &xmtr->DescBlockList); -+ xmtr->TotalDescCount += EP_NUM_TXD_PER_BLOCK; -+ -+ for (i = txdp ? 1 : 0; i < EP_NUM_TXD_PER_BLOCK; i++) -+ { -+ list_add (&blk->Txd[i].Link, &xmtr->FreeDescList); -+ -+ xmtr->FreeDescCount++; -+ -+ if (xmtr->FreeDescWanted) -+ { -+ xmtr->FreeDescWanted--; -+ kcondvar_wakeupone (&xmtr->FreeDescSleep, &xmtr->FreeDescLock); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (txdp) -+ *txdp = &blk->Txd[0]; -+ -+ return 0; -+} -+ -+static void -+FreeTxdBlock (EP_XMTR *xmtr, EP_TXD_BLOCK *blk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ list_del (&blk->Link); -+ -+ xmtr->TotalDescCount -= EP_NUM_RXD_PER_BLOCK; -+ xmtr->FreeDescCount -= EP_NUM_RXD_PER_BLOCK; -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ ep_shared_free_main (xmtr->Subsys->Subsys.Sys, &blk->NmdMain); -+ KMEM_FREE (blk, sizeof (EP_TXD_BLOCK)); -+} -+ -+static EP_TXD * -+GetTxd (EP_XMTR *xmtr, EP_ATTRIBUTE attr) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_TXD *txd; -+ int low_on_txds; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ while (list_empty (&xmtr->FreeDescList)) -+ { -+ if (! (attr & EP_NO_ALLOC)) -+ { -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (AllocateTxdBlock (xmtr, attr, &txd) == ESUCCESS) -+ return (txd); -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ } -+ -+ if (attr & EP_NO_SLEEP) -+ { -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ return (NULL); -+ } -+ -+ xmtr->FreeDescWanted++; -+ kcondvar_wait (&xmtr->FreeDescSleep, &xmtr->FreeDescLock, &flags); -+ } -+ -+ txd = list_entry (xmtr->FreeDescList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (--xmtr->FreeDescCount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (txd); -+} -+ -+void -+FreeTxd (EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ list_add (&txd->Link, &xmtr->FreeDescList); -+ -+ xmtr->FreeDescCount++; -+ -+ if (xmtr->FreeDescWanted) /* someone waiting for a receive */ -+ { /* descriptor, so wake them up */ -+ xmtr->FreeDescWanted--; -+ kcondvar_wakeupone (&xmtr->FreeDescSleep, &xmtr->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+} -+ -+int -+TxdShouldStabalise (EP_TXD_RAIL *txdRail, EP_RAIL *rail) -+{ -+ EP_TXD *txd = txdRail->Txd; -+ EP_XMTR *xmtr = txd->Xmtr; -+ EP_ATTRIBUTE attr = txd->Envelope.Attr; -+ int stabilise; -+ extern int txd_stabilise; -+ -+ switch (EP_ATTR2TYPE (attr)) -+ { -+ case EP_TYPE_SVC_INDICATOR: /* is the rail in the current service indicator rail mask */ -+ if ((txd_stabilise & 4) == 0) -+ return 0; -+ -+ stabilise = (ep_xmtr_svc_indicator_railmask (xmtr, EP_ATTR2DATA (attr), txd->NodeId) & EP_RAIL2RAILMASK (rail->Number)) == 0; -+ break; -+ -+ case EP_TYPE_TIMEOUT: -+ if ((txd_stabilise & 2) == 0) -+ return 0; -+ -+ stabilise = AFTER(lbolt, txdRail->Txd->TimeStamp + EP_ATTR2DATA(attr)); -+ break; -+ -+ default: -+ if ((txd_stabilise & 1) == 0) -+ return 0; -+ -+ stabilise = AFTER(lbolt, txdRail->Txd->TimeStamp + EP_DEFAULT_TIMEOUT); -+ break; -+ } -+ -+ if (stabilise) -+ { -+ txd->Envelope.Attr = EP_SET_TXD_STABALISING(txd->Envelope.Attr); -+ txd->RetryTime = lbolt; -+ -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ } -+ -+ return stabilise; -+} -+ -+void ep_xmtr_txd_stat(EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ int f; -+ unsigned long size; -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ size = 0; -+ for (f=0; f < txd->Envelope.nFrags; f++) -+ size += txd->Envelope.Frags[f].nmd_len; -+ -+ INC_STAT(xmtr->stats,tx); -+ ADD_STAT(xmtr->stats,tx_len, size); -+ -+ if ((txdRail != NULL) && (txdRail->XmtrRail != NULL)){ -+ INC_STAT(txdRail->XmtrRail->stats,tx); -+ ADD_STAT(txdRail->XmtrRail->stats,tx_len, size); -+ -+ if ((txdRail->XmtrRail->CommsRail != NULL) && ( txdRail->XmtrRail->CommsRail->Rail != NULL)) { -+ INC_STAT(txdRail->XmtrRail->CommsRail->Rail->Stats,tx); -+ ADD_STAT(txdRail->XmtrRail->CommsRail->Rail->Stats,tx_len, size); -+ } -+ } -+} -+ -+static int -+PollActiveTransmitList (EP_XMTR *xmtr, int flag) -+{ -+ struct list_head *el, *nel; -+ struct list_head list; -+ unsigned long flags; -+ int count; -+ -+ INIT_LIST_HEAD (&list); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ if (txdRail == NULL) -+ continue; -+ -+ ASSERT (txdRail->Txd == txd); -+ -+ if (EP_XMTR_OP (txdRail->XmtrRail,PollTxd) (txdRail->XmtrRail, txdRail, flags)) -+ { -+ list_del (&txd->Link); /* remove from active transmit list */ -+ list_add_tail (&txd->Link, &list); /* and add to list to call handlers */ -+ } -+ } -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ for (count = 0; !list_empty (&list); count++) -+ { -+ EP_TXD *txd = list_entry (list.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+ return (count); -+} -+ -+static inline void -+DoTransmit (EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ EP_RAILMASK nmdRailMask = ep_nmd2railmask (txd->Envelope.Frags, txd->Envelope.nFrags); -+ EP_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ int rnum; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ if (EP_IS_SVC_INDICATOR(txd->Envelope.Attr)) -+ nmdRailMask = nmdRailMask & ep_xmtr_svc_indicator_railmask(xmtr, EP_ATTR2DATA(txd->Envelope.Attr), txd->NodeId); -+ -+ if (EP_IS_PREFRAIL_SET(txd->Envelope.Attr)) -+ rnum = EP_ATTR2PREFRAIL(txd->Envelope.Attr); -+ else -+ rnum = ep_xmtr_prefrail (xmtr, nmdRailMask, txd->NodeId); -+ -+ if (rnum < 0 || !(nmdRailMask & EP_RAIL2RAILMASK(rnum))) -+ xmtrRail = NULL; -+ else -+ xmtrRail = xmtr->Rails[rnum]; -+ -+ /* Allocate the XID while holding the xmtr->Lock from our XID cache */ -+ txd->Envelope.Xid = ep_xid_cache_alloc (xmtr->Subsys->Subsys.Sys, &xmtr->XidCache); -+ -+ EPRINTF7 (DBG_XMTR, "ep: transmit txd %p to %d/%d: Xid %llx nFrags %d [%08x.%d]\n", -+ txd, txd->NodeId, txd->Service, (long long) txd->Envelope.Xid.Unique, -+ txd->Envelope.nFrags, txd->Envelope.Frags[0].nmd_addr, txd->Envelope.Frags[0].nmd_len); -+ -+ /* Store time transmit started to timeout if not received */ -+ txd->TimeStamp = lbolt; -+ -+ /* Initialise the retry backoff */ -+ txd->Backoff.type = EP_BACKOFF_FREE; -+ -+ list_add_tail (&txd->Link, &xmtr->ActiveDescList); -+ -+ if (xmtrRail == NULL || !EP_XMTR_OP(xmtrRail,BindTxd) (txd, xmtrRail, EP_TXD_PHASE_ACTIVE)) -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ PollActiveTransmitList (xmtr, POLL_TX_LIST); -+} -+ -+EP_STATUS -+ep_transmit_message (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = (unsigned short) dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_CLEAR_LOCAL_ATTR(attr); -+ txd->Envelope.Range = EP_RANGE (dest, dest); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, DataXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_multicast_message (EP_XMTR *xmtr, unsigned int destLo, unsigned int destHi, bitmap_t *bitmap, EP_SERVICE service, -+ EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_SYS *sys = xmtr->Subsys->Subsys.Sys; -+ EP_TXD *txd; -+ int nnodes; -+ int i, len; -+ unsigned long flags; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if (destLo == -1) -+ destLo = sys->Position.pos_nodeid & ~(EP_MAX_NODES-1); -+ -+ if (destHi == -1 && (destHi = ((sys->Position.pos_nodeid + EP_MAX_NODES) & ~(EP_MAX_NODES-1)) - 1) >= sys->Position.pos_nodes) -+ destHi = sys->Position.pos_nodes-1; -+ -+ nnodes = (destHi-destLo+1); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_MULTICAST(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = EP_RANGE (destLo, destHi); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (EP_IS_SVC_INDICATOR(attr)) -+ ep_xmtr_svc_indicator_bitmap(xmtr, EP_ATTR2DATA(attr), txd->TxdMain->Bitmap, destLo, nnodes); -+ else -+ bt_subset (statemap_tobitmap(sys->NodeSet), txd->TxdMain->Bitmap, destLo, nnodes); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (bitmap != NULL) /* bitmap supplied, so intersect it with */ -+ bt_intersect (txd->TxdMain->Bitmap, bitmap, nnodes); /* the current node set map */ -+ -+ if ((attr & EP_NOT_MYSELF) && destLo <= sys->Position.pos_nodeid && sys->Position.pos_nodeid <= destHi) -+ BT_CLEAR (txd->TxdMain->Bitmap, (sys->Position.pos_nodeid-destLo)); /* clear myself if not wanted */ -+ -+ if ((i = bt_lowbit (txd->TxdMain->Bitmap, nnodes)) < 0) -+ { -+ FreeTxd (xmtr, txd); -+ return (EP_NODE_DOWN); -+ } -+ -+ txd->NodeId = (unsigned short) i; -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, McastXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_transmit_rpc (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_RPC(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = EP_RANGE (dest, dest); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, RPCXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_multicast_forward (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, -+ EP_ENVELOPE *env, EP_PAYLOAD *payload, bitmap_t *bitmap, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = (unsigned short) dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_MULTICAST(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = env->Range; -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ bt_copy (bitmap, txd->TxdMain->Bitmap, EP_RANGE_HIGH(env->Range) - EP_RANGE_LOW(env->Range) + 1); -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, McastXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+int -+ep_poll_transmits (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, POLL_TX_LIST)); -+} -+ -+int -+ep_enable_txcallbacks (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, ENABLE_TX_CALLBACK)); -+} -+ -+int -+ep_disable_txcallbacks (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, DISABLE_TX_CALLBACK)); -+} -+ -+/* functions for accessing fields of txds */ -+int ep_txd_node(EP_TXD *txd) { return (txd->NodeId); } -+EP_STATUSBLK *ep_txd_statusblk(EP_TXD *txd) { return (&txd->TxdMain->StatusBlk); } -+ -+void -+ep_xmtr_xid_msg_handler (void *arg, EP_MANAGER_MSG *msg) -+{ -+ EP_XMTR *xmtr = (EP_XMTR *) arg; -+ EP_SYS *sys = xmtr->Subsys->Subsys.Sys; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST: -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ if (txdRail != NULL && EP_XIDS_MATCH (msg->Body.Failover.Xid, txd->Envelope.Xid)) -+ { -+ EP_XMTR_RAIL *xmtrRail = txdRail->XmtrRail; -+ EP_RAIL *rail = xmtrRail->CommsRail->Rail; -+ EP_MANAGER_MSG_BODY msgBody; -+ int rnum; -+ -+ if (! (msg->Body.Failover.Railmask & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ /* Need to failover this txd to a different rail, select a rail from -+ * the set that she has asked us to use and which is connected to her -+ * on this transmitter. If there are no such rails, then in all probability -+ * we're offline on all common rails and eventually she will see we have no -+ * rails in common and abort the receive. */ -+ if ((rnum = ep_xmtr_prefrail (xmtr, msg->Body.Failover.Railmask, txd->NodeId)) < 0) -+ ep_debugf (DBG_XMTR, "%s: ep_xmtr_xid_msg_handler: FAILOVER_REQUEST but can't determine rail (%04x,%04x,%d,%04x)\n", -+ rail->Name, msg->Body.Failover.Railmask, xmtr->RailMask, txd->NodeId, sys->Nodes[txd->NodeId].ConnectedRails); -+ else -+ { -+ EP_XMTR_RAIL *nXmtrRail = xmtr->Rails[rnum]; -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep_xmtr_xid_msg_handler: FAILOVER_REQUEST txd=%p XID=%llx-> rail %d\n", rail->Name, txd, (long long) txd->Envelope.Xid.Unique, rnum); -+ -+ /* Bind the txd rail onto the new rail - it doesn't matter if we fail -+ * as it will remain bound to the original rail */ -+ (void) EP_XMTR_OP (nXmtrRail, BindTxd) (txd, nXmtrRail, EP_TXD_PHASE_PASSIVE); -+ } -+ } -+ -+ /* Send a failover response including an envelope update */ -+ msgBody.FailoverTxd.Rail = rail->Number; -+ msgBody.FailoverTxd.Xid = txd->Envelope.Xid; -+ msgBody.FailoverTxd.TxdRail = txd->Envelope.TxdRail; -+ -+ ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE, msg->Hdr.Xid, &msgBody); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE: { -+ int txd_has_not_sent_envelope = 0; -+ EP_TXD *txd = NULL; -+ EP_TXD_RAIL *txdRail = NULL; -+ -+ if (msg->Body.NodeState.NetworkErrorState != 0) -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt + MESSAGE_RETRY_TIME); -+ else -+ { -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ -+ txd = list_entry (el, EP_TXD, Link); -+ txdRail = txd->TxdRail; -+ -+ if (txdRail != NULL && EP_XIDS_MATCH (msg->Hdr.Xid, txd->Envelope.Xid)) { -+ txd_has_not_sent_envelope = EP_XMTR_OP(txdRail->XmtrRail,CheckTxdState)(txd); -+ break; -+ } -+ } -+ -+ if (txd_has_not_sent_envelope) { -+ EPRINTF2 (DBG_STABILISE, "ep_xmtr_xid_msg_handler: GET_NODE_STATE_RESPONSE txd=%p XID=%llx not sent envelope\n", -+ txd, (long long) txd->Envelope.Xid.Unique); -+ -+ /* at this point it has finished stabalising */ -+ txd->Envelope.Attr = EP_CLEAR_TXD_STABALISING(txd->Envelope.Attr); -+ -+ /* store railmask into txd if not a service indicator or timeout */ -+ if (EP_IS_NO_TYPE(txd->Envelope.Attr)) -+ txd->Envelope.Attr = EP_SET_DATA(txd->Envelope.Attr, EP_TYPE_RAILMASK, msg->Body.NodeState.Railmask); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* TXD is now no longer bound to a rail , so let ep_check_xmtr() handle it */ -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ } -+ else -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ } -+ break; -+ } -+ default: -+ panic ("ep_xmtr_xid_msg_handler: XID match but invalid message type\n"); -+ } -+} -+ -+EP_XMTR * -+ep_alloc_xmtr (EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_XMTR *xmtr; -+ struct list_head *el; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME)) == NULL) -+ return (NULL); -+ -+ KMEM_ZALLOC (xmtr, EP_XMTR *, sizeof (EP_XMTR), 1); -+ -+ if (xmtr == NULL) -+ return (NULL); -+ -+ xmtr->Subsys = subsys; -+ -+ spin_lock_init (&xmtr->Lock); -+ INIT_LIST_HEAD (&xmtr->ActiveDescList); -+ -+ kcondvar_init (&xmtr->FreeDescSleep); -+ spin_lock_init (&xmtr->FreeDescLock); -+ INIT_LIST_HEAD (&xmtr->FreeDescList); -+ INIT_LIST_HEAD (&xmtr->DescBlockList); -+ -+ ep_xid_cache_init (sys, &xmtr->XidCache); -+ -+ xmtr->XidCache.MessageHandler = ep_xmtr_xid_msg_handler; -+ xmtr->XidCache.Arg = xmtr; -+ -+ kmutex_lock (&subsys->Lock); -+ list_add_tail (&xmtr->Link, &subsys->Transmitters); -+ -+ ep_procfs_xmtr_add(xmtr); -+ -+ /* Now add all rails which are already started */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(commsRail, Xmtr.AddRail) (xmtr, commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_mod_inc_usecount(); -+ -+ return (xmtr); -+} -+ -+void -+ep_free_xmtr (EP_XMTR *xmtr) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *rail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(rail,Xmtr.DelRail) (xmtr, rail); -+ } -+ -+ list_del (&xmtr->Link); -+ kmutex_unlock (&subsys->Lock); -+ -+ /* all the desc's must be free */ -+ ASSERT(xmtr->FreeDescCount == xmtr->TotalDescCount); -+ -+ /* delete the descs */ -+ while (!list_empty (&xmtr->DescBlockList)) -+ FreeTxdBlock( xmtr, list_entry(xmtr->DescBlockList.next, EP_TXD_BLOCK , Link)); -+ -+ /* they had better all be gone now */ -+ ASSERT((xmtr->FreeDescCount == 0) && (xmtr->TotalDescCount == 0)); -+ -+ ep_procfs_xmtr_del(xmtr); -+ -+ ep_xid_cache_destroy (sys, &xmtr->XidCache); -+ -+ spin_lock_destroy (&xmtr->Lock); -+ KMEM_FREE (xmtr, sizeof (EP_XMTR)); -+ -+ ep_mod_dec_usecount(); -+} -+ -+long -+ep_check_xmtr (EP_XMTR *xmtr, long nextRunTime) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ int timed_out=0; -+ int i; -+ EP_MANAGER_MSG_BODY body; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ /* See if we have any txd's which need to be bound to a rail */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_NODE *node = &sys->Nodes[txd->NodeId]; -+ EP_RAILMASK nodeRails = node->ConnectedRails & xmtr->RailMask; -+ EP_ENVELOPE *env = &txd->Envelope; -+ -+ if (EP_IS_TXD_STABALISING(txd->Envelope.Attr)) -+ { -+ ASSERT(txd->TxdRail != NULL); -+ -+ if (AFTER (lbolt, txd->RetryTime)) -+ { -+ EPRINTF6 (DBG_STABILISE, "ep_check_xmtr txd=%p txdRail=%p send get node state to %d Xid=%08x.%08x.%016llx\n", -+ txd, txd->TxdRail, txd->NodeId, env->Xid.Generation, env->Xid.Handle, env->Xid.Unique); -+ -+ body.Service = txd->Service; -+ if (ep_send_message ( txd->TxdRail->XmtrRail->CommsRail->Rail, txd->NodeId, EP_MANAGER_MSG_TYPE_GET_NODE_STATE, env->Xid, &body) == 0) -+ txd->RetryTime = lbolt + (MESSAGE_RETRY_TIME << ep_backoff (&txd->Backoff, EP_BACKOFF_STABILISE)); -+ else -+ txd->RetryTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+ -+ ep_kthread_schedule (&subsys->Thread, txd->RetryTime); -+ continue; -+ } -+ -+ if (txd->TxdRail != NULL) -+ continue; -+ -+ switch (EP_ATTR2TYPE(txd->Envelope.Attr)) -+ { -+ case EP_TYPE_SVC_INDICATOR: -+ { -+ EP_RAILMASK rmask=0; -+ struct list_head *tmp; -+ -+ list_for_each (tmp, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (tmp, EP_COMMS_RAIL, Link); -+ if ( cm_svc_indicator_is_set(commsRail->Rail, EP_ATTR2DATA(txd->Envelope.Attr), txd->NodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ nodeRails &= rmask; -+ break; -+ } -+ case EP_TYPE_TIMEOUT: -+ timed_out = AFTER(lbolt, txd->TimeStamp + EP_ATTR2DATA(txd->Envelope.Attr)) ? (1) : (0); -+ break; -+ case EP_TYPE_RAILMASK: -+ nodeRails &= EP_ATTR2DATA(txd->Envelope.Attr); -+ break; -+ default: -+ timed_out = AFTER(lbolt, txd->TimeStamp + EP_DEFAULT_TIMEOUT) ? (1) : (0); -+ break; -+ } -+ -+ if (nodeRails == 0 || timed_out || (EP_IS_NO_FAILOVER(env->Attr) && EP_IS_PREFRAIL_SET(env->Attr) && -+ (nodeRails & EP_RAIL2RAILMASK(EP_ATTR2PREFRAIL(env->Attr))) == 0)) -+ { -+ EPRINTF5 (timed_out ? DBG_STABILISE : DBG_XMTR, "ep_check_xmtr: txd=%p XID=%llx to %d no rails connected or cannot failover (nodeRails=0x%x,timed_out=%d\n", -+ txd, (long long) env->Xid.Unique, txd->NodeId, nodeRails, timed_out); -+ -+ list_del (&txd->Link); -+ list_add_tail (&txd->Link, &txdList); -+ } -+ else -+ { -+ EP_XMTR_RAIL *xmtrRail; -+ int i, len, rnum; -+ -+ if (EP_IS_PREFRAIL_SET(env->Attr) && (nodeRails & EP_RAIL2RAILMASK(EP_ATTR2PREFRAIL(env->Attr)))) -+ rnum = EP_ATTR2PREFRAIL(env->Attr); -+ else -+ rnum = ep_pickRail (nodeRails); -+ -+ EPRINTF3 (DBG_XMTR, "ep_check_xmtr: txd=%p XID=%llx mapping NMDs onto rail %d \n", txd, (long long) env->Xid.Unique, rnum); -+ -+ for (i = len = 0; i < env->nFrags; i++, len += env->Frags[i].nmd_len) -+ ep_nmd_map_rails (sys, &env->Frags[i], nodeRails); -+ -+ if ((xmtrRail = xmtr->Rails[rnum]) == NULL || -+ !EP_XMTR_OP(xmtrRail,BindTxd) (txd, xmtrRail, EP_TXD_PHASE_ACTIVE)) -+ ep_kthread_schedule (&subsys->Thread, lbolt + RESOURCE_RETRY_TIME); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_NODE_DOWN); -+ FreeTxd (xmtr, txd); -+ } -+ -+ /* Check to see if we're low on txds */ -+ if (xmtr->FreeDescCount < ep_txd_lowat) -+ AllocateTxdBlock (xmtr, 0, NULL); -+ -+ /* Then check each rail */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (xmtr->RailMask & (1 << i) ) -+ nextRunTime = EP_XMTR_OP (xmtr->Rails[i],Check) (xmtr->Rails[i], nextRunTime); -+ return (nextRunTime); -+} -+ -+void -+ep_display_txd (DisplayInfo *di, EP_TXD *txd) -+{ -+ EP_ENVELOPE *env = &txd->Envelope; -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ (di->func)(di->arg, "TXD: %p Version=%x Attr=%x Xid=%08x.%08x.%016llx\n", txd, -+ env->Version, env->Attr, env->Xid.Generation, env->Xid.Handle, (long long) env->Xid.Unique); -+ (di->func)(di->arg, " NodeId=%d Range=%d.%d TxdRail=%x TxdMain=%x.%x.%x nFrags=%d\n", -+ env->NodeId, EP_RANGE_LOW(env->Range), EP_RANGE_HIGH(env->Range), env->TxdRail, -+ env->TxdMain.nmd_addr, env->TxdMain.nmd_len, env->TxdMain.nmd_attr, env->nFrags); -+ (di->func)(di->arg, " Frag[0] %08x.%08x.%08x\n", env->Frags[0].nmd_addr, env->Frags[0].nmd_len, env->Frags[0].nmd_attr); -+ (di->func)(di->arg, " Frag[1] %08x.%08x.%08x\n", env->Frags[1].nmd_addr, env->Frags[1].nmd_len, env->Frags[1].nmd_attr); -+ (di->func)(di->arg, " Frag[2] %08x.%08x.%08x\n", env->Frags[2].nmd_addr, env->Frags[2].nmd_len, env->Frags[2].nmd_attr); -+ (di->func)(di->arg, " Frag[3] %08x.%08x.%08x\n", env->Frags[3].nmd_addr, env->Frags[3].nmd_len, env->Frags[3].nmd_attr); -+ -+ if (txdRail != NULL) EP_XMTR_OP (txdRail->XmtrRail, DisplayTxd) (di, txdRail); -+} -+ -+void -+ep_display_xmtr (DisplayInfo *di, EP_XMTR *xmtr) -+{ -+ int freeCount = 0; -+ int activeCount = 0; -+ struct list_head *el; -+ int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ list_for_each (el, &xmtr->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) -+ activeCount++; -+ -+ (di->func)(di->arg, "ep_display_xmtr: xmtr=%p Free=%d Active=%d\n", xmtr, freeCount, activeCount); -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (xmtr->Rails[i]) EP_XMTR_OP (xmtr->Rails[i], DisplayXmtr) (di, xmtr->Rails[i]); -+ -+ list_for_each (el,&xmtr->ActiveDescList) -+ ep_display_txd (di, list_entry (el, EP_TXD, Link)); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep_xmtr_fillout_stats(EP_XMTR *xmtr, char *str) -+{ -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(xmtr->stats,tx), GET_STAT_PER_SEC(xmtr->stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(xmtr->stats,tx_len) / (1024*1024), GET_STAT_PER_SEC(xmtr->stats,tx_len) / (1024*1024)); -+} -+ -+void -+ep_xmtr_rail_fillout_stats(EP_XMTR_RAIL *xmtr_rail, char *str) -+{ -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(xmtr_rail->stats,tx), GET_STAT_PER_SEC(xmtr_rail->stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(xmtr_rail->stats,tx_len) / (1024*1024), GET_STAT_PER_SEC(xmtr_rail->stats,tx_len) / (1024*1024)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcommsTx_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcommsTx_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcommsTx_elan3.c 2005-06-01 23:12:54.657430288 -0400 -@@ -0,0 +1,1173 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx_elan3.c,v 1.17.2.2 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#define XMTR_TO_RAIL(xmtrRail) ((EP3_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail->Rail) -+#define XMTR_TO_DEV(xmtrRail) (XMTR_TO_RAIL(xmtrRail)->Device) -+#define XMTR_TO_SUBSYS(xmtrRail) (((EP_XMTR_RAIL *) xmtrRail)->Xmtr->Subsys) -+ -+static void TxEnveEvent (EP3_RAIL *rail, void *arg); -+static void TxEnveRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void TxEnveVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS EnveCookieOps = -+{ -+ TxEnveEvent, -+ TxEnveRetry, -+ NULL, /* DmaCancelled */ -+ TxEnveVerify -+}; -+ -+static void TxDataEvent (EP3_RAIL *rail, void *arg); -+static void TxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void TxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS DataCookieOps = -+{ -+ TxDataEvent, -+ TxDataRetry, -+ NULL, /* DmaCancelled */ -+ TxDataVerify -+}; -+ -+static void TxDoneEvent (EP3_RAIL *dev, void *arg); -+static void TxDoneRetry (EP3_RAIL *dev, void *arg, E3_DMA_BE *dma, int status); -+static void TxDoneVerify (EP3_RAIL *dev, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS DoneCookieOps = -+{ -+ TxDoneEvent, -+ TxDoneRetry, -+ NULL, /* DmaCancelled */ -+ TxDoneVerify, -+} ; -+ -+static int -+AllocateTxdRailBlock (EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_TXD_RAIL_BLOCK *blk; -+ EP3_TXD_RAIL *txdRail; -+ sdramaddr_t pTxdElan; -+ EP3_TXD_RAIL_MAIN *pTxdMain; -+ E3_Addr pTxdElanAddr; -+ E3_Addr pTxdMainAddr; -+ E3_BlockCopyEvent event; -+ int i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP3_TXD_RAIL_BLOCK *, sizeof (EP3_TXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((pTxdElan = ep_alloc_elan (&rail->Generic, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK, 0, &pTxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((pTxdMain = ep_alloc_main (&rail->Generic, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK, 0, &pTxdMainAddr)) == (EP3_TXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->Generic, pTxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ReserveDmaRetries (rail, EP3_NUM_TXD_PER_BLOCK, 0) != ESUCCESS) -+ { -+ ep_free_main (&rail->Generic, pTxdMainAddr, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, pTxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (txdRail = &blk->Txd[0], i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ txdRail->Generic.XmtrRail = &xmtrRail->Generic; -+ txdRail->TxdElan = pTxdElan; -+ txdRail->TxdElanAddr = pTxdElanAddr; -+ txdRail->TxdMain = pTxdMain; -+ txdRail->TxdMainAddr = pTxdMainAddr; -+ -+ RegisterCookie (&rail->CookieTable, &txdRail->EnveCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent), &EnveCookieOps, (void *) txdRail); -+ RegisterCookie (&rail->CookieTable, &txdRail->DataCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), &DataCookieOps, (void *) txdRail); -+ RegisterCookie (&rail->CookieTable, &txdRail->DoneCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), &DoneCookieOps, (void *) txdRail); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->EnveCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, EnveEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->DataCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, DataEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->DoneCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, DoneEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), sizeof (E3_BlockCopyEvent)); -+ -+ pTxdMain->EnveEvent = EP3_EVENT_FREE; -+ pTxdMain->DataEvent = EP3_EVENT_FREE; -+ pTxdMain->DoneEvent = EP3_EVENT_FREE; -+ -+ /* move onto next descriptor */ -+ pTxdElan += EP3_TXD_RAIL_ELAN_SIZE; -+ pTxdElanAddr += EP3_TXD_RAIL_ELAN_SIZE; -+ pTxdMain = (EP3_TXD_RAIL_MAIN *) ((unsigned long) pTxdMain + EP3_TXD_RAIL_MAIN_SIZE); -+ pTxdMainAddr += EP3_TXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &xmtrRail->DescBlockList); -+ xmtrRail->TotalDescCount += EP3_NUM_TXD_PER_BLOCK; -+ xmtrRail->FreeDescCount += EP3_NUM_TXD_PER_BLOCK; -+ -+ for (i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++) -+ list_add (&blk->Txd[i].Generic.Link, &xmtrRail->FreeDescList); -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ return 1; -+} -+ -+static void -+FreeTxdRailBlock (EP3_XMTR_RAIL *xmtrRail, EP3_TXD_RAIL_BLOCK *blk) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ EP3_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ xmtrRail->TotalDescCount -= EP3_NUM_TXD_PER_BLOCK; -+ -+ for (txdRail = &blk->Txd[0], i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ xmtrRail->FreeDescCount--; -+ -+ list_del (&txdRail->Generic.Link); -+ -+ DeregisterCookie (&rail->CookieTable, &txdRail->EnveCookie); -+ DeregisterCookie (&rail->CookieTable, &txdRail->DataCookie); -+ DeregisterCookie (&rail->CookieTable, &txdRail->DoneCookie); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ ReleaseDmaRetries (rail, EP3_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->Generic, blk->Txd[0].TxdMainAddr, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, blk->Txd[0].TxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+} -+ -+static EP3_TXD_RAIL * -+GetTxdRail (EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtrRail->Generic.Xmtr->Subsys; -+ EP3_TXD_RAIL *txdRail; -+ int low_on_txds; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ if (list_empty (&xmtrRail->FreeDescList)) -+ txdRail = NULL; -+ else -+ { -+ txdRail = list_entry (xmtrRail->FreeDescList.next, EP3_TXD_RAIL, Generic.Link); -+ -+#if defined(DEBUG) -+ { -+ EP_RAIL *rail = xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = ((EP3_RAIL *) rail)->Device; -+ -+ EP_ASSERT (rail, txdRail->TxdMain->EnveEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, txdRail->TxdMain->DataEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, txdRail->TxdMain->DoneEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ } -+#endif -+ -+ list_del (&txdRail->Generic.Link); -+ -+ xmtrRail->FreeDescCount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (xmtrRail->FreeDescCount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (txdRail); -+} -+ -+static void -+FreeTxdRail (EP3_XMTR_RAIL *xmtrRail, EP3_TXD_RAIL *txdRail) -+{ -+ unsigned long flags; -+ -+#if defined(DEBUG_ASSERT) -+ { -+ EP_RAIL *rail = xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = ((EP3_RAIL *) rail)->Device; -+ -+ EP_ASSERT (rail, txdRail->Generic.XmtrRail == &xmtrRail->Generic); -+ -+ EP_ASSERT (rail, txdRail->TxdMain->EnveEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, txdRail->TxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, txdRail->TxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_FREE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_FREE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_FREE; -+ } -+#endif -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_add (&txdRail->Generic.Link, &xmtrRail->FreeDescList); -+ -+ xmtrRail->FreeDescCount++; -+ -+ if (xmtrRail->FreeDescWaiting) -+ { -+ xmtrRail->FreeDescWaiting--; -+ kcondvar_wakeupall (&xmtrRail->FreeDescSleep, &xmtrRail->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+} -+ -+static void -+BindTxdToRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ASSERT (SPINLOCK_HELD (&txd->Xmtr->Lock)); -+ -+ EPRINTF6 (DBG_XMTR, "%s: BindTxdToRail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long) txd->Envelope.Xid.Unique); -+ -+ txd->TxdRail = &txdRail->Generic; -+ txdRail->Generic.Txd = txd; -+} -+ -+static void -+UnbindTxdFromRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ASSERT (SPINLOCK_HELD (&txd->Xmtr->Lock)); -+ ASSERT (txd->TxdRail == &txdRail->Generic && txdRail->Generic.Txd == txd); -+ -+ EPRINTF6 (DBG_XMTR, "%s: UnbindTxdToRail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long) txd->Envelope.Xid.Unique); -+ txd->TxdRail = NULL; -+ txdRail->Generic.Txd = NULL; -+} -+ -+/* -+ * TxEnveEvent: arg == EP_TXD -+ * Called when envelope delivered -+ */ -+static void -+TxEnveEvent (EP3_RAIL *rail, void *arg) -+{ -+ panic ("TxEnveEvent"); -+} -+ -+/* -+ * TxEnveRetry: arg == EP3_TXD_RAIL -+ * Called on retry of dma of large message envelope. -+ */ -+static void -+TxEnveRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ -+ EPRINTF3 (DBG_XMTR, "%s: TxEnveRetry: xmtr %p txd %p\n", rail->Generic.Name, xmtrRail, txdRail); -+ -+ EP_ASSERT (&rail->Generic, txdRail->TxdMain->EnveEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 1)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txdRail->Generic.Txd->NodeId); -+ -+ if (! TxdShouldStabalise (&txdRail->Generic, &rail->Generic)) -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&txdRail->Backoff, EP_BACKOFF_ENVELOPE)); -+ else -+ QueueDmaForRetry (rail, dma, EP_RETRY_STABALISING); /* place dma on stabilising list for neterr fixup */ -+} -+ -+static void -+TxEnveVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ -+ EP_ASSERT (&rail->Generic, txdRail->TxdMain->EnveEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 1)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txdRail->Generic.Txd->NodeId); -+} -+ -+/* -+ * TxDataEvent: arg == EP3_TXD -+ * Called on completion of a large transmit. -+ */ -+static void -+TxDataEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_XMTR *xmtr = xmtrRail->Generic.Xmtr; -+ EP3_TXD_RAIL_MAIN *txdMain = txdRail->TxdMain; -+ sdramaddr_t txdElan = txdRail->TxdElan; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (txdRail->DataCookie, txdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (rail->Device, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), txdRail->DataCookie, txdMain->DataEvent)) /* PCI read */ -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("TxDataEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ EPRINTF3 (DBG_XMTR, "%s: TxDataEvent: xmtr %p txd %p previously collecting by polling\n", -+ rail->Generic.Name, xmtrRail, txdRail); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ if ((txd = txdRail->Generic.Txd) == NULL || /* If there is no txd, or if the descriptor is marked */ -+ !(EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr)) || /* as no interrupt, or been reused as an RPC, */ -+ (EP_IS_RPC(txd->Envelope.Attr))) /* then we were either called as a result of a previous */ -+ { /* tx which was completed by polling or as a result */ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); /* of a EnableTxCallBack/DisableTxCallback */ -+ -+ EPRINTF4 (DBG_XMTR, "%s: TxDataEvent: xmtr %p txd %p recyled (%x)\n", -+ rail->Generic.Name, xmtr, txd, txd ? txd->Envelope.Attr : 0); -+ return; -+ } -+ -+ ASSERT (EP3_EVENT_FIRED (txdRail->EnveCookie, txdMain->EnveEvent)); -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDataEvent : xmtrRail=%p txdRail=%p tx=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ /* remove from active transmit lists */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags for next time round */ -+ txdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+} -+ -+/* -+ * TxDataRetry: arg == EP3_TXD -+ * Called on retry of remote "put" dma of large transmit data. -+ */ -+static void -+TxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ EP_ASSERT (&rail->Generic, ((txdRail->TxdMain->DataEvent == EP3_EVENT_ACTIVE && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) >= 1)) || /* PCI read */ -+ (EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)))); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txd->NodeId); -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDataRetry: xmtrRail=%p txdRail=%p txd=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&txdRail->Backoff, EP_BACKOFF_DATA)); -+} -+ -+static void -+TxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ EP_ASSERT (&rail->Generic, ((txdRail->TxdMain->DataEvent == EP3_EVENT_ACTIVE && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) >= 1)) || /* PCI read */ -+ (EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)))); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txd->NodeId); -+} -+ -+/* -+ * TxDoneEvent: arg == EP3_TXD -+ * Called on completion of a RPC. -+ */ -+static void -+TxDoneEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_XMTR *xmtr = xmtrRail->Generic.Xmtr; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), txdRail->DoneCookie, txdRail->TxdMain->DoneEvent) && -+ EP3_EVENT_FIRING (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("TxDoneEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ EPRINTF3 (DBG_XMTR, "%s: TxDoneEvent: xmtr %p txdRail %p previously collecting by polling\n", -+ rail->Generic.Name, xmtr, txdRail); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ if ((txd = txdRail->Generic.Txd) == NULL || /* If there is no txd, or if the descriptor is marked */ -+ !(EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr) || EP_IS_RPC(txd->Envelope.Attr))) /* marked as no interrupt, or been reused as an transmit, */ -+ { /* then we were either called as a result of a previous */ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); /* tx which was completed by polling or as a result */ -+ /* of a EnableTxCallBack/DisableTxCallback */ -+ -+ EPRINTF4 (DBG_XMTR, "%s: TxDoneEvent: xmtr %p txd %p recyled (%x)\n", -+ rail->Generic.Name, xmtr, txd, txd ? txd->Envelope.Attr : 0); -+ return; -+ } -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDoneEvent: xmtrRail=%p txdRail=%p txd=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ /* remove from active transmit list */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags for next time round */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ if (txd->Handler) -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+} -+ -+/* -+ * TxDoneRetry: arg == EP3_TXD -+ */ -+static void -+TxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ panic ("TxDoneRetry"); -+} -+ -+static void -+TxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ panic ("TxDoneVerify"); -+} -+ -+static void -+EnableTransmitCallback (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ELAN3_DEV *dev = XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Device; -+ -+ EPRINTF3 (DBG_XMTR, "%s: EnableTransmitCallback: txd %p txdRail %p\n", XMTR_TO_RAIL (txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail); -+ -+ txd->Envelope.Attr = EP_SET_INTERRUPT_ENABLED(txd->Envelope.Attr); -+ -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type), EV_TYPE_BCOPY); -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY | EV_TYPE_EVIRQ | txdRail->DoneCookie.Cookie); -+ } -+ else -+ { -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY | EV_TYPE_EVIRQ | txdRail->DataCookie.Cookie); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY); -+ } -+} -+ -+static void -+DisableTransmitCallback (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ELAN3_DEV *dev = XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Device; -+ -+ EPRINTF3 (DBG_XMTR, "%s: DisableTransmitCallback: txd %p txdRail %p\n", XMTR_TO_RAIL (txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail); -+ -+ txd->Envelope.Attr = EP_CLEAR_INTERRUPT_ENABLED(txd->Envelope.Attr); -+ -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY); -+} -+ -+static void -+InitialiseTxdRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail, int phase) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Initialise the per-rail fields in the envelope */ -+ txd->Envelope.TxdRail = txdRail->TxdElanAddr; -+ txd->Envelope.NodeId = rail->Generic.Position.pos_nodeid; -+ -+ /* Initialise the dma backoff */ -+ txdRail->Backoff.type = EP_BACKOFF_FREE; -+ -+ /* Initialise the per-rail events */ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 1); -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), -+ (txd->Envelope.nFrags ? txd->Envelope.nFrags : 1) + (EP_IS_MULTICAST(txd->Envelope.Attr) ? 1 : 0)); -+ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_ACTIVE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ ASSERT (EP_IS_RPC(txd->Envelope.Attr)); -+ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ -+ txdRail->TxdMain->EnveEvent = txdRail->EnveCookie.Cookie; -+ txdRail->TxdMain->DataEvent = txdRail->DataCookie.Cookie; -+ break; -+ } -+ -+ if (! EP_IS_RPC(txd->Envelope.Attr)) -+ txdRail->TxdMain->DoneEvent = txdRail->DoneCookie.Cookie; -+ else -+ { -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 1); -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_ACTIVE; -+ } -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ DisableTransmitCallback (txd, txdRail); -+ else -+ EnableTransmitCallback (txd, txdRail); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ if ( epdebug_check_sum ) -+ txd->Envelope.CheckSum = ep_calc_check_sum( txd->Xmtr->Subsys->Subsys.Sys, &txd->Envelope, txd->Envelope.Frags, txd->Envelope.nFrags); -+ else -+#endif -+ txd->Envelope.CheckSum = 0; -+ -+ /* copy the envelope and payload if present down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &txd->Envelope, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, Envelope), EP_ENVELOPE_SIZE); -+ -+ if (EP_HAS_PAYLOAD(txd->Envelope.Attr)) -+ elan3_sdram_copyl_to_sdram (rail->Device, &txd->Payload, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, Payload), EP_PAYLOAD_SIZE); -+} -+ -+void -+ep3xmtr_flush_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el; -+ unsigned long flags; -+ -+ switch (rail->Generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ /* only need to acquire/release the Lock to ensure that -+ * the node state transition has been noticed. */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ else if (! EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ } -+ else -+ { -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ default: -+ panic ("ep3xmtr_flush_callback: invalid callback step\n"); -+ break; -+ } -+} -+ -+void -+ep3xmtr_failover_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+#endif -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ /* Only progress relocation of txd's bound to this rail */ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+#ifdef SUPPORT_RAIL_FAILOVER -+ /* Transmit data not been sent, so just restart on different rail */ -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d unbind an retry\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset all events, since non of them could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ /* epcomms thread will restart on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ -+ if (EP_IS_RPC(txd->Envelope.Attr) && !EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ if (EP_IS_NO_FAILOVER(txd->Envelope.Attr)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d - not able to failover\n", -+ rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ list_del (&txd->Link); -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* envelope and data events must have been set, so only clear the done event */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT(elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT(elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ list_add_tail (&txd->Link, &txdList); -+ continue; -+ } -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d passive\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d completed\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+#endif -+ -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+ -+void -+ep3xmtr_disconnect_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_disconnect_callback - xmtr %p txd %p completed to node %d\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ continue; -+ } -+ -+ /* Remove from active list */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_disconnect_callback - xmtr %p txd %p node %d not conected\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ /* add to the list of txd's which are to be completed */ -+ list_add_tail (&txd->Link, &txdList); -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+int -+ep3xmtr_poll_txd (EP_XMTR_RAIL *x, EP_TXD_RAIL *t, int how) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) t; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ switch (how) -+ { -+ case ENABLE_TX_CALLBACK: -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ EnableTransmitCallback (txd, txdRail); -+ break; -+ -+ case DISABLE_TX_CALLBACK: -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ DisableTransmitCallback (txd, txdRail); -+ break; -+ } -+ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ EPRINTF3 (DBG_XMTR, "%s: ep3xmtr_poll_txd: txd=%p XID=%llx completed\n", -+ XMTR_TO_RAIL (xmtrRail)->Generic.Name, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtrRail->Generic.Xmtr,txd); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep3xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *x, unsigned int phase) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP3_TXD_RAIL *txdRail; -+ E3_DMA_BE dmabe; -+ -+ if ((txdRail = GetTxdRail (xmtrRail)) == NULL) -+ return 0; -+ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ if (rail->Generic.Nodes[txd->NodeId].State != EP_NODE_CONNECTED) -+ { -+ EPRINTF2 (DBG_XMTR, "%s: TransmitTxdOnRail: node %u not connected on this rail\n", rail->Generic.Name, txd->NodeId); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset all events, since non of them could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ return 0; -+ } -+ -+ InitialiseTxdRail (txd, txdRail, phase); -+ -+ /* Initialise the dma descriptor */ -+ dmabe.s.dma_type = E3_DMA_TYPE (DMA_BYTE, DMA_WRITE, DMA_QUEUED, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = (EP_HAS_PAYLOAD(txd->Envelope.Attr) ? EP_INPUTQ_SIZE : EP_ENVELOPE_SIZE); -+ dmabe.s.dma_source = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, Envelope); -+ dmabe.s.dma_dest = (E3_Addr) 0; -+ dmabe.s.dma_destEvent = EP_MSGQ_ADDR(txd->Service); -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (txd->NodeId); -+ dmabe.s.dma_srcEvent = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, txd->NodeId); -+ -+ EPRINTF8 (DBG_XMTR, "%s: TransmitTxdOnRail: txd=%p txdRail=%p @ %x XID=%llx dest=%u srcEvent=%x srcCookie=%x\n", rail->Generic.Name, -+ txd, txdRail, txdRail->TxdElanAddr, (long long) txd->Envelope.Xid.Unique, txd->NodeId, dmabe.s.dma_srcEvent, dmabe.s.dma_srcCookieVProc); -+ -+ BindTxdToRail (txd, txdRail); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ InitialiseTxdRail (txd, txdRail, EP_TXD_PHASE_PASSIVE); /* initialise as passive (updated envelope) */ -+ -+ EP_XMTR_OP (txd->TxdRail->XmtrRail, UnbindTxd) (txd, EP_TXD_PHASE_PASSIVE); /* unbind from existing rail */ -+ -+ BindTxdToRail (txd, txdRail); /* and bind it to our new rail */ -+ break; -+ } -+ -+ return 1; -+} -+ -+void -+ep3xmtr_unbind_txd (EP_TXD *txd, unsigned int phase) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ -+ /* XXXX - TBD assertions on phase */ -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+} -+ -+long -+ep3xmtr_check (EP_XMTR_RAIL *x, long nextRunTime) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ -+ if (xmtrRail->FreeDescCount < ep_txd_lowat && !AllocateTxdRailBlock(xmtrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow txd rail pool\n", XMTR_TO_RAIL(xmtrRail)->Generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return nextRunTime; -+} -+ -+void -+ep3xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (xmtrRail, EP3_XMTR_RAIL *, sizeof (EP3_XMTR_RAIL), 1); -+ -+ spin_lock_init (&xmtrRail->FreeDescLock); -+ kcondvar_init (&xmtrRail->FreeDescSleep); -+ INIT_LIST_HEAD (&xmtrRail->FreeDescList); -+ INIT_LIST_HEAD (&xmtrRail->DescBlockList); -+ -+ xmtrRail->Generic.CommsRail = commsRail; -+ xmtrRail->Generic.Xmtr = xmtr; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ xmtr->Rails[commsRail->Rail->Number] = &xmtrRail->Generic; -+ xmtr->RailMask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep3xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]; -+ unsigned long flags; -+ -+ /* rail mask set as not usable */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->RailMask &= ~EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* wait for all txd's for this rail to become free */ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ while (xmtrRail->FreeDescCount != xmtrRail->TotalDescCount) -+ { -+ xmtrRail->FreeDescWaiting++; -+ kcondvar_wait (&xmtrRail->FreeDescSleep, &xmtrRail->FreeDescLock, &flags); -+ } -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->Rails[commsRail->Rail->Number] = NULL; -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* need to free up the txd's and blocks */ -+ /* all the txd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (xmtrRail->TotalDescCount == xmtrRail->FreeDescCount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&xmtrRail->DescBlockList)) -+ FreeTxdRailBlock (xmtrRail, list_entry(xmtrRail->DescBlockList.next, EP3_TXD_RAIL_BLOCK , Link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((xmtrRail->FreeDescCount == 0) && (xmtrRail->TotalDescCount == 0)); -+ -+ spin_lock_destroy (&xmtrRail->FreeDescLock); -+ kcondvar_destroy (&xmtrRail->FreeDescSleep); -+ -+ KMEM_FREE (xmtrRail, sizeof (EP3_XMTR_RAIL)); -+} -+ -+void -+ep3xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *x) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el; -+ unsigned long flags; -+ int freeCount = 0; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ list_for_each (el, &xmtrRail->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ (di->func)(di->arg, " Rail=%d Free=%d Total=%d (%d)\n", -+ rail->Generic.Number, xmtrRail->FreeDescCount, xmtrRail->TotalDescCount, freeCount); -+} -+ -+void -+ep3xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *t) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) t; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_TXD_RAIL_MAIN *txdMain = txdRail->TxdMain; -+ sdramaddr_t txdElan = txdRail->TxdElan; -+ EP3_RAIL *rail = (EP3_RAIL *) xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = rail->Device; -+ -+ (di->func)(di->arg, " EnveEvent=%x DataEvent=%x DoneEvent=%x Rail=%s\n", -+ txdMain->EnveEvent, txdMain->DataEvent, txdMain->DoneEvent, rail->Generic.Name); -+ (di->func)(di->arg, " EnveEvent=%x.%x DataEvent=%x.%x DoneEvent=%x.%x\n", -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type))); -+} -+ -+int -+ep3xmtr_check_txd_state (EP_TXD *txd) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ E3_Addr enveEvent = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent); -+ EP3_RETRY_DMA *retry = NULL; -+ -+ struct list_head *el; -+ struct list_head *nel; -+ unsigned long flags; -+ -+ /* is enevelope event is really not set */ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent )) -+ return (0); -+ -+ /* remove matching dma from stalled list */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ list_for_each_safe(el, nel, &rail->DmaRetries[EP_RETRY_STABALISING]) { -+ retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if ( retry->Dma.s.dma_srcEvent == enveEvent ) { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ break; /* there can only be one */ -+ } -+ } -+ ASSERT ( retry != NULL); /* must find one in list */ -+ ASSERT ( retry->Dma.s.dma_srcEvent == enveEvent ); /* better still be the right type then */ -+ -+ /* add to free list */ -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ return (1); -+} -+ -+void -+ep3xmtr_fillout_rail_stats(EP_XMTR_RAIL *xmtr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP3_XMTR_RAIL * ep3xmtr_rail = (EP3_XMTR_RAIL *) xmtr_rail; */ -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/epcommsTx_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/epcommsTx_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/epcommsTx_elan4.c 2005-06-01 23:12:54.659429984 -0400 -@@ -0,0 +1,1389 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx_elan4.c,v 1.26.2.4 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+#define XMTR_TO_COMMS(xmtrRail) ((EP4_COMMS_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail) -+#define XMTR_TO_RAIL(xmtrRail) ((EP4_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail->Rail) -+#define XMTR_TO_DEV(xmtrRail) (XMTR_TO_RAIL(xmtrRail)->r_ctxt.ctxt_dev) -+#define XMTR_TO_SUBSYS(xmtrRail) (((EP_XMTR_RAIL *) xmtrRail)->Xmtr->Subsys) -+ -+#define TXD_TO_XMTR(txdRail) ((EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail) -+#define TXD_TO_RAIL(txdRail) XMTR_TO_RAIL(TXD_TO_XMTR(txdRail)) -+ -+static void txd_interrupt (EP4_RAIL *rail, void *arg); -+static void poll_interrupt (EP4_RAIL *rail, void *arg); -+ -+static __inline__ int -+on_list (struct list_head *ent, struct list_head *list) -+{ -+ struct list_head *el; -+ unsigned int count = 0; -+ list_for_each (el, list) { -+ if (el == ent) -+ count++; -+ } -+ return count; -+} -+ -+static __inline__ void -+__ep4_txd_assert_free (EP4_TXD_RAIL *txdRail, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ register int failed = 0; -+ -+ if ((txdRail)->txd_retry_time != 0) failed |= (1 << 0); -+ if ((txdRail)->txd_main->txd_env != EP4_STATE_FREE) failed |= (1 << 1); -+ if ((txdRail)->txd_main->txd_data != EP4_STATE_FREE) failed |= (1 << 2); -+ if ((txdRail)->txd_main->txd_done != EP4_STATE_FREE) failed |= (1 << 3); -+ -+ if (sdram_assert) -+ { -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)) >> 32) != -32) failed |= (1 << 4); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)) >> 32) != 0) failed |= (1 << 5); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)) >> 32) != 0) failed |= (1 << 6); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_txd_assert_free: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assert_free: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ (txdRail)->txd_retry_time = 0; -+ (txdRail)->txd_main->txd_env = EP4_STATE_FREE; -+ (txdRail)->txd_main->txd_data = EP4_STATE_FREE; -+ (txdRail)->txd_main->txd_done = EP4_STATE_FREE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType) + 4, -32); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType) + 4, 0); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType) + 4, 0); -+ } -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "__ep4_txd_assert_free"); -+ } -+} -+ -+static __inline__ void -+__ep4_txd_assert_finished (EP4_TXD_RAIL *txdRail, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ register int failed = 0; -+ -+ if ((txdRail)->txd_retry_time != 0) failed |= (1 << 0); -+ if ((txdRail)->txd_main->txd_env != EP4_STATE_FINISHED) failed |= (1 << 1); -+ if ((txdRail)->txd_main->txd_data != EP4_STATE_FINISHED) failed |= (1 << 2); -+ if ((txdRail)->txd_main->txd_done != EP4_STATE_FINISHED) failed |= (1 << 3); -+ -+ if (sdram_assert) -+ { -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)) >> 32) != -32) failed |= (1 << 4); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)) >> 32) != 0) failed |= (1 << 5); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)) >> 32) != 0) failed |= (1 << 6); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_txd_assert_finished: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assert_finished: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ (txdRail)->txd_retry_time = 0; -+ (txdRail)->txd_main->txd_env = EP4_STATE_FINISHED; -+ (txdRail)->txd_main->txd_data = EP4_STATE_FINISHED; -+ (txdRail)->txd_main->txd_done = EP4_STATE_FINISHED; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType) + 4, -32); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType) + 4, 0); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType) + 4, 0); -+ } -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "__ep4_txd_assert_finished"); -+ } -+} -+ -+static __inline__ int -+__ep4_txd_assfail (EP4_TXD_RAIL *txdRail, const char *expr, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ -+ printk ("__ep4_txd_assfail: %s:%d '%s'\n", file, line, expr); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assfail: %s:%d '%s'\n", file, line, expr); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ EP_ASSFAIL (XMTR_TO_RAIL (xmtrRail), "__ep4_txd_assfail"); -+ -+ return 0; -+} -+ -+#define EP4_TXD_ASSERT(txdRail, EX) ((void) ((EX) || (__ep4_txd_assfail(txdRail, #EX, __FILE__, __LINE__)))) -+#define EP4_TXD_ASSERT_FREE(txdRail) __ep4_txd_assert_free(txdRail, __FILE__, __LINE__) -+#define EP4_TXD_ASSERT_FINISHED(txdRail) __ep4_txd_assert_finished(txdRail, __FILE__, __LINE__) -+ -+static int -+alloc_txd_block (EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV(xmtrRail); -+ EP4_TXD_RAIL_BLOCK *blk; -+ EP4_TXD_RAIL_MAIN *txdMain; -+ EP_ADDR txdMainAddr; -+ sdramaddr_t txdElan; -+ EP_ADDR txdElanAddr; -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ KMEM_ZALLOC (blk, EP4_TXD_RAIL_BLOCK *, sizeof (EP4_TXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((txdElan = ep_alloc_elan (&rail->r_generic, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK, 0, &txdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((txdMain = ep_alloc_main (&rail->r_generic, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK, 0, &txdMainAddr)) == (EP4_TXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->r_generic, txdElanAddr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ep4_reserve_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK, 0) != 0) -+ { -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, txdElanAddr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (txdRail = &blk->blk_txds[0], i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ txdRail->txd_generic.XmtrRail = &xmtrRail->xmtr_generic; -+ txdRail->txd_elan = txdElan; -+ txdRail->txd_elan_addr = txdElanAddr; -+ txdRail->txd_main = txdMain; -+ txdRail->txd_main_addr = txdMainAddr; -+ -+ /* We only need to reserve space for one command stream, since the sten packet -+ * can only be retrying *before* the dma source event is set. -+ * reserve bytes of "event" cq space for the completion write + interrupt */ -+ if ((txdRail->txd_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, EP4_INTR_CMD_NDWORDS)) == NULL) -+ goto failed; -+ -+ /* register the main interrupt cookies */ -+ ep4_register_intcookie (rail, &txdRail->txd_intcookie, txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_done), txd_interrupt, txdRail); -+ -+ /* initialise the events */ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CopySource), -+ txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CopyDest), -+ txdRail->txd_ecq->ecq_addr); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_WritePtr), -+ txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_data)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_WriteValue), -+ EP4_STATE_FINISHED); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CopySource), -+ txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CopyDest), -+ txdRail->txd_ecq->ecq_addr); -+ -+ /* Initialise the command streams */ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_env))); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_write_value), -+ EP4_STATE_FAILED); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_done))); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ txdMain->txd_env = EP4_STATE_FREE; -+ txdMain->txd_data = EP4_STATE_FREE; -+ txdMain->txd_done = EP4_STATE_FREE; -+ -+ /* move onto next descriptor */ -+ txdElan += EP4_TXD_RAIL_ELAN_SIZE; -+ txdElanAddr += EP4_TXD_RAIL_ELAN_SIZE; -+ txdMain = (EP4_TXD_RAIL_MAIN *) ((unsigned long) txdMain + EP4_TXD_RAIL_MAIN_SIZE); -+ txdMainAddr += EP4_TXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_add (&blk->blk_link, &xmtrRail->xmtr_blocklist); -+ -+ xmtrRail->xmtr_totalcount += EP4_NUM_TXD_PER_BLOCK; -+ xmtrRail->xmtr_freecount += EP4_NUM_TXD_PER_BLOCK; -+ -+ for (i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++) -+ list_add (&blk->blk_txds[i].txd_generic.Link, &xmtrRail->xmtr_freelist); -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ return 1; -+ -+ failed: -+ while (--i >= 0) -+ { -+ ep4_put_ecq (rail, txdRail->txd_ecq, EP4_INTR_CMD_NDWORDS); -+ ep4_deregister_intcookie (rail, &txdRail->txd_intcookie); -+ } -+ ep4_release_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_txds[0].txd_elan_addr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ -+ return 0; -+} -+ -+static void -+free_txd_block (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL_BLOCK *blk) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_del (&blk->blk_link); -+ -+ xmtrRail->xmtr_totalcount -= EP4_NUM_TXD_PER_BLOCK; -+ -+ for (txdRail = &blk->blk_txds[0], i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ xmtrRail->xmtr_freecount--; -+ -+ ep4_put_ecq (rail, txdRail->txd_ecq, EP4_INTR_CMD_NDWORDS); -+ -+ ep4_deregister_intcookie (rail, &txdRail->txd_intcookie); -+ -+ list_del (&txdRail->txd_generic.Link); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ ep4_release_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_txds[0].txd_elan_addr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+} -+ -+static EP4_TXD_RAIL * -+get_txd_rail (EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = XMTR_TO_SUBSYS(xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int low_on_txds; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ if (list_empty (&xmtrRail->xmtr_freelist)) -+ txdRail = NULL; -+ else -+ { -+ txdRail = list_entry (xmtrRail->xmtr_freelist.next, EP4_TXD_RAIL, txd_generic.Link); -+ -+ EP4_TXD_ASSERT_FREE(txdRail); -+ -+ list_del (&txdRail->txd_generic.Link); -+ -+ xmtrRail->xmtr_freecount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (xmtrRail->xmtr_freecount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ -+ return (txdRail); -+} -+ -+static void -+free_txd_rail (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ unsigned long flags; -+ -+ EP4_TXD_ASSERT_FREE(txdRail); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_add (&txdRail->txd_generic.Link, &xmtrRail->xmtr_freelist); -+ -+ xmtrRail->xmtr_freecount++; -+ -+ if (xmtrRail->xmtr_freewaiting) -+ { -+ xmtrRail->xmtr_freewaiting--; -+ kcondvar_wakeupall (&xmtrRail->xmtr_freesleep, &xmtrRail->xmtr_freelock); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+} -+ -+static void -+bind_txd_rail (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EPRINTF6 (DBG_XMTR, "%s: bind_txd_rail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->txd_generic.XmtrRail)->r_generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, txd->Envelope.Xid.Unique); -+ -+ txd->TxdRail = &txdRail->txd_generic; -+ txdRail->txd_generic.Txd = txd; -+} -+ -+static void -+unbind_txd_rail (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_TXD_ASSERT (txdRail, txd->TxdRail == &txdRail->txd_generic && txdRail->txd_generic.Txd == txd); -+ -+ EPRINTF6 (DBG_XMTR, "%s: unbind_txd_rail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->txd_generic.XmtrRail)->r_generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, txd->Envelope.Xid.Unique); -+ -+ -+ txdRail->txd_generic.Txd = NULL; -+ txd->TxdRail = NULL; -+} -+ -+static void -+initialise_txd (EP_TXD *txd, EP4_TXD_RAIL *txdRail, unsigned int phase) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Initialise the per-rail fields in the envelope */ -+ txd->Envelope.TxdRail = txdRail->txd_elan_addr; -+ txd->Envelope.NodeId = rail->r_generic.Position.pos_nodeid; -+ -+ /* Allocate a network error fixup cookie */ -+ txdRail->txd_cookie = ep4_neterr_cookie (rail, txd->NodeId) | EP4_COOKIE_STEN; -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ if ( epdebug_check_sum ) -+ txd->Envelope.CheckSum = ep_calc_check_sum( txd->Xmtr->Subsys->Subsys.Sys, &txd->Envelope, txd->Envelope.Frags, txd->Envelope.nFrags); -+ else -+#endif -+ txd->Envelope.CheckSum = 0; -+ -+ /* Initialise the per-rail events */ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ { -+ unsigned int nsets = (txd->Envelope.nFrags ? txd->Envelope.nFrags : 1) + ( EP_IS_MULTICAST(txd->Envelope.Attr) ? 1 : 0); -+ -+ if (! EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32 * nsets, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ txdRail->txd_main->txd_data = EP4_STATE_FINISHED; -+ } -+ else -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32 * nsets , E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ txdRail->txd_main->txd_data = EP4_STATE_ACTIVE; -+ } -+ -+ txdRail->txd_main->txd_env = EP4_STATE_ACTIVE; -+ txdRail->txd_main->txd_done = EP4_STATE_ACTIVE; -+ break; -+ } -+ -+ case EP_TXD_PHASE_PASSIVE: -+ EP4_TXD_ASSERT (txdRail, EP_IS_RPC(txd->Envelope.Attr)); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ txdRail->txd_main->txd_data = EP4_STATE_FINISHED; -+ txdRail->txd_main->txd_done = EP4_STATE_ACTIVE; -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ break; -+ } -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), NOP_CMD); -+} -+ -+static void -+terminate_txd_rail (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_SDRAM_ASSERT (TXD_TO_RAIL(txdRail),\ -+ (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType),\ -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS));\ -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->txd_main->txd_env = EP4_STATE_FREE; -+ txdRail->txd_main->txd_data = EP4_STATE_FREE; -+ txdRail->txd_main->txd_done = EP4_STATE_FREE; -+ -+#if defined(DEBUG_ASSERT) -+ if (sdram_assert) -+ { -+ ELAN4_DEV *dev = XMTR_TO_RAIL (xmtrRail)->r_ctxt.ctxt_dev; -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ } -+#endif -+} -+ -+static void -+defer_txd_rail (EP4_TXD_RAIL *txdRail) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP4_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP_COMMS_SUBSYS *subsys = XMTR_TO_SUBSYS(xmtrRail); -+ -+ EPRINTF5 (DBG_XMTR, "%s: defer_txd_rail: xmtrRail=%p txdRail=%p env/data (%d,%d) not finished\n", -+ rail->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ /* transmit has completed, but the data dma has not completed -+ * (because of network error fixup), we queue the txdRail onto a list -+ * to be polled for completion later. -+ */ -+ if (txdRail->txd_retry_time) -+ { -+ EP4_TXD_ASSERT (txdRail, (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) == 1 || -+ on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1)); -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ } -+ -+ txdRail->txd_retry_time = lbolt; -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+} -+ -+static void -+finalise_txd (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ -+ EP4_TXD_ASSERT_FINISHED (txdRail); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+} -+ -+static void -+txd_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) arg; -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP_XMTR *xmtr = xmtrRail->xmtr_generic.Xmtr; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ for (;;) -+ { -+ if (txdRail->txd_main->txd_done == EP4_STATE_FINISHED || txdRail->txd_main->txd_env == EP4_STATE_FAILED) -+ break; -+ -+ /* The write to txd_done could be held up in the PCI bridge even though -+ * we've seen the interrupt cookie. Unlike elan3, there is no possibility -+ * of spurious interrupts since we flush the command queues on node -+ * disconnection and the txcallback mechanism */ -+ mb(); -+ -+ if (delay > EP4_EVENT_FIRING_TLIMIT) -+ { -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "txd_interrupt - not finished\n"); -+ return; -+ } -+ DELAY (delay); -+ delay <<= 1; -+ } -+ -+ txd = txdRail->txd_generic.Txd; -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FAILED) -+ { -+ spin_lock (&xmtrRail->xmtr_retrylock); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time == 0); /* cannot be on retry/poll list */ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_main->txd_done != EP4_STATE_FINISHED); /* data xfer cannot have finished */ -+ -+ if (TxdShouldStabalise (&txdRail->txd_generic, &rail->r_generic)) -+ { -+ EPRINTF6 (DBG_STABILISE, "%s: txd_interrupt: stablise xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ txdRail->txd_retry_time = lbolt; /* indicate on retry list */ -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]); -+ } -+ else -+ { -+ EPRINTF6 (DBG_RETRY, "%s: txd_interrupt: retry xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ txdRail->txd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; /* XXXX: backoff ? */ -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, txdRail->txd_retry_time); -+ } -+ spin_unlock (&xmtrRail->xmtr_retrylock); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ -+ EP4_TXD_ASSERT (txdRail, txd != NULL && !(EP_IS_NO_INTERRUPT(txd->Envelope.Attr))); -+ -+ EPRINTF6 (DBG_XMTR, "%s: txd_interrupt: xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ if (txdRail->txd_main->txd_env != EP4_STATE_FINISHED || txdRail->txd_main->txd_data != EP4_STATE_FINISHED) -+ { -+ defer_txd_rail (txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ } -+ else -+ { -+ /* remove from active transmit list */ -+ list_del (&txd->Link); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ finalise_txd (txd, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+static void -+poll_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) arg; -+ -+ ep_poll_transmits (xmtrRail->xmtr_generic.Xmtr); -+} -+ -+void -+issue_envelope_packet (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ ELAN4_CQ *cq = xmtrRail->xmtr_cq; -+ E4_uint64 *blk0 = (E4_uint64 *) &txd->Envelope; -+ E4_uint64 *blk1 = EP_HAS_PAYLOAD(txd->Envelope.Attr) ? (E4_uint64 *) &txd->Payload : NULL; -+ E4_Addr qaddr = EP_MSGQ_ADDR(txd->Service); -+ -+ EP4_SDRAM_ASSERT (TXD_TO_RAIL(txdRail),\ -+ (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType),\ -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS));\ -+ -+ elan4_open_packet (cq, OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(txd->NodeId))); -+ elan4_sendtrans0 (cq, TR_INPUT_Q_GETINDEX, EP_MSGQ_ADDR(txd->Service)); -+ -+ /* send the payload if present */ -+ if (blk0) elan4_sendtransp (cq, TR_WRITE(128 >> 3, 0, TR_DATATYPE_BYTE), 0, blk0); -+ if (blk1) elan4_sendtransp (cq, TR_WRITE(128 >> 3, 0, TR_DATATYPE_BYTE), 128, blk1); -+ -+ elan4_sendtrans1 (cq, TR_INPUT_Q_COMMIT, qaddr, txdRail->txd_cookie); -+ -+ elan4_guard (cq, GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_write_dword_cmd (cq, txdRail->txd_main_addr + offsetof (EP4_TXD_RAIL_MAIN, txd_env), EP4_STATE_FINISHED); -+ -+ elan4_guard (cq, GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_set_event_cmd (cq, txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_env)); -+ -+ elan4_write_dword_cmd (cq, xmtrRail->xmtr_main_addr + offsetof (EP4_XMTR_RAIL_MAIN, xmtr_flowcnt), ++xmtrRail->xmtr_flowcnt); -+} -+ -+void -+ep4xmtr_flush_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_COMMS_RAIL *commsRail = XMTR_TO_COMMS (xmtrRail); -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ /* need to acquire/release the Lock to ensure that the node state -+ * transition has been noticed and no new envelopes are queued to -+ * nodes which are passivating. */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ /* Then we insert a "setevent" into the command queue to flush -+ * through the envelopes which have already been submitted */ -+ ep4comms_flush_setevent (commsRail, xmtrRail->xmtr_cq); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ /* remove any envelopes which are retrying to nodes which are going down */ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ list_for_each_safe (el, nel, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) { -+ EP4_TXD_RAIL *txdRail = list_entry (el, EP4_TXD_RAIL, txd_retry_link); -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_main->txd_env == EP4_STATE_FAILED); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF2 (DBG_XMTR, "%s; ep4xmtr_flush_callback: removing txdRail %p from retry list\n", rail->r_generic.Name, txdRail); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ list_del (&txdRail->txd_retry_link); -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]); -+ } -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ /* Determine whether we have active or passive messages to -+ * any node which is passivating */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ if (txdRail == NULL || txdRail->txd_generic.XmtrRail != &xmtrRail->xmtr_generic || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF5 (DBG_XMTR, "%s: flush txd=%p txdRail=%p data=%llx done=%llx\n", rail->r_generic.Name, -+ txd, txdRail, txdRail->txd_main->txd_data, txdRail->txd_main->txd_done); -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ else if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ } -+ else -+ { -+ if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ default: -+ panic ("ep4xmtr_flush_callback: invalid callback step\n"); -+ break; -+ } -+} -+ -+void -+ep4xmtr_failover_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ /* Only progress relocation of txd's bound to this rail */ -+ if (! TXD_BOUND2RAIL (txdRail, xmtrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ /* XXXX - no rail failover for now ....*/ -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep4xmtr_failover_callback - xmtr %p txd %p node %d completed\n", rail->r_generic.Name, xmtr, txd, txd->NodeId); -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+ -+void -+ep4xmtr_disconnect_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ if ( ! TXD_BOUND2RAIL (txdRail, xmtrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ if (txdRail->txd_main->txd_done == EP4_STATE_ACTIVE) -+ { -+ -+ EPRINTF8 (DBG_DISCON, "ep4xmtr_disconnect_callback: txdRail=%p : events %llx,%llx,%llx done %llx,%llx,%llx retry %lx\n",txdRail, -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)), -+ txdRail->txd_main->txd_env, txdRail->txd_main->txd_data, txdRail->txd_main->txd_done, -+ txdRail->txd_retry_time); -+ -+ if (txdRail->txd_retry_time) -+ { -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ EP4_TXD_ASSERT (txdRail, on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1); -+ -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ } -+ -+ /* Remove from active list */ -+ list_del (&txd->Link); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep4xmtr_disconnect_callback - xmtr %p txd %p node %d not conected\n", rail->r_generic.Name, xmtr, txd, txd->NodeId); -+ -+ /* add to the list of txd's which are to be completed */ -+ list_add_tail (&txd->Link, &txdList); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+void -+ep4xmtr_neterr_flush (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_COMMS_RAIL *commsRail = XMTR_TO_COMMS (xmtrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ /* insert a "setevent" into the command queue to flush -+ * through the envelopes which have already been submitted */ -+ ep4comms_flush_setevent (commsRail, xmtrRail->xmtr_cq); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep4xmtr_neterr_check (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ -+ if ( ! TXD_BOUND2RAIL (txdRail, xmtrRail) || txd->NodeId != nodeId) -+ continue; -+ -+ /* The only non-dma associated with a txd is the initial sten packet, if it has been acked -+ * and the neterr cookie matches, then change it to look like it's been acked since the -+ * INPUT_Q_COMMIT transaction has already been executed */ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FAILED && (txdRail->txd_cookie == cookies[0] || txdRail->txd_cookie == cookies[1])) -+ { -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: ep4xmtr_neterr_callback: cookie <%lld%s%s%s%s> matches txd %p txdRail %p\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(txdRail->txd_cookie), txd, txdRail); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ spin_lock (&xmtrRail->xmtr_retrylock); -+ -+ EP4_TXD_ASSERT (txdRail, (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) == 1 || -+ on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1)); -+ -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ spin_unlock (&xmtrRail->xmtr_retrylock); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+int -+ep4xmtr_poll_txd (EP_XMTR_RAIL *x, EP_TXD_RAIL *t, int how) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) t; -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ -+ if (! EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ return 0; -+ -+ switch (how) -+ { -+ case ENABLE_TX_CALLBACK: -+ if (!EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (xmtrRail->xmtr_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ txd->Envelope.Attr |= EP_INTERRUPT_ENABLED; -+ } -+ break; -+ -+ case DISABLE_TX_CALLBACK: -+ if (EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr & EP_INTERRUPT_ENABLED)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), NOP_CMD); -+ -+ txd->Envelope.Attr &= ~EP_INTERRUPT_ENABLED; -+ } -+ } -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FINISHED && txdRail->txd_main->txd_data == EP4_STATE_FINISHED && txdRail->txd_main->txd_done == EP4_STATE_FINISHED) -+ { -+ EPRINTF3 (DBG_XMTR, "%s: ep4xmtr_poll_txd: txd=%p XID=%llx completed\n", -+ XMTR_TO_RAIL (xmtrRail)->r_generic.Name, txd, txd->Envelope.Xid.Unique); -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ -+ ep_xmtr_txd_stat(xmtrRail->xmtr_generic.Xmtr,txd); -+ -+ finalise_txd (txd, txdRail); -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep4xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *x, unsigned int phase) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ -+ if ((txdRail = get_txd_rail (xmtrRail)) == NULL) -+ return 0; -+ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ if (rail->r_generic.Nodes[txd->NodeId].State != EP_NODE_CONNECTED) -+ { -+ EPRINTF2 (DBG_XMTR, "%s: ep4xmtr_bind_txd: node %u not connected on this rail\n", rail->r_generic.Name, txd->NodeId); -+ -+ free_txd_rail (xmtrRail, txdRail); -+ return 0; -+ } -+ -+ initialise_txd (txd, txdRail, EP_TXD_PHASE_ACTIVE); -+ -+ bind_txd_rail (txd, txdRail); -+ -+ /* generate the STEN packet to transfer the envelope */ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ if (((int) (xmtrRail->xmtr_flowcnt - xmtrRail->xmtr_main->xmtr_flowcnt)) < EP4_XMTR_FLOWCNT) -+ issue_envelope_packet (xmtrRail, txdRail); -+ else -+ { -+ txdRail->txd_retry_time = lbolt; -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, txdRail->txd_retry_time); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ initialise_txd (txd, txdRail, EP_TXD_PHASE_PASSIVE); -+ -+ EP_XMTR_OP (txd->TxdRail->XmtrRail, UnbindTxd) (txd, EP_TXD_PHASE_PASSIVE); /* unbind from existing rail */ -+ -+ bind_txd_rail (txd, txdRail); /* and bind it to our new rail */ -+ break; -+ } -+ -+ return 1; -+} -+ -+void -+ep4xmtr_unbind_txd (EP_TXD *txd, unsigned int phase) -+{ -+ /* XXXX - TBD */ -+} -+ -+long -+ep4xmtr_check (EP_XMTR_RAIL *x, long nextRunTime) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP_XMTR *xmtr = xmtrRail->xmtr_generic.Xmtr; -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ if (xmtrRail->xmtr_freecount < ep_txd_lowat && !alloc_txd_block (xmtrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow txd rail pool\n", XMTR_TO_RAIL(xmtrRail)->r_generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]) { -+ EP4_TXD_RAIL *txdRail = list_entry (el, EP4_TXD_RAIL, txd_retry_link); -+ -+ if (txdRail->txd_main->txd_env != EP4_STATE_FINISHED || txdRail->txd_main->txd_data != EP4_STATE_FINISHED) -+ { -+ ep_debugf (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) not finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ nextRunTime = lbolt + HZ; -+ } -+ else -+ { -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ -+ ep_debugf (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ EPRINTF5 (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ EPRINTF3 (DBG_XMTR, "%s: done %x data %x\n", XMTR_TO_RAIL(xmtrRail)->r_generic.Name, -+ txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_done), -+ txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_data)); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ /* remove txd from active list and add to list to call handlers */ -+ list_del (&txd->Link); -+ list_add_tail (&txd->Link, &txdList); -+ -+ /* remove and free of txdRail */ -+ txdRail->txd_retry_time = 0; -+ list_del (&txdRail->txd_retry_link); -+ -+ finalise_txd (txd, txdRail); -+ -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ ep_xmtr_txd_stat (xmtr,txd); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+ -+ return nextRunTime; -+} -+ -+unsigned long -+ep4xmtr_retry (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) arg; -+ ELAN4_DEV *dev = XMTR_TO_DEV(xmtrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ while (! list_empty (&xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY])) -+ { -+ EP4_TXD_RAIL *txdRail = list_entry (xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY].next, EP4_TXD_RAIL, txd_retry_link); -+ -+ if (BEFORE (lbolt, txdRail->txd_retry_time)) -+ { -+ if (nextRunTime == 0 || AFTER (nextRunTime, txdRail->txd_retry_time)) -+ nextRunTime = txdRail->txd_retry_time; -+ -+ break; -+ } -+ -+ if (((int) (xmtrRail->xmtr_flowcnt - xmtrRail->xmtr_main->xmtr_flowcnt)) < EP4_XMTR_FLOWCNT) -+ { -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4xmtr_retry: re-issue envelope packet to %d for txdRail=%p\n", -+ rail->r_generic.Name, txdRail->txd_generic.Txd->Envelope.NodeId, txdRail); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_ACTIVE; -+ -+ issue_envelope_packet (xmtrRail, txdRail); -+ } -+ else -+ { -+ EPRINTF2 (DBG_RETRY, "%s: ep4xmtr_retry: cannot re-issue envelope packet to %d\n", rail->r_generic.Name, txdRail->txd_generic.Txd->Envelope.NodeId); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, txdRail->txd_retry_time)) -+ nextRunTime = txdRail->txd_retry_time; -+ -+ break; -+ } -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ return nextRunTime; -+} -+ -+void -+ep4xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP4_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ int i; -+ -+ KMEM_ZALLOC (xmtrRail, EP4_XMTR_RAIL *, sizeof (EP4_XMTR_RAIL), 1); -+ -+ spin_lock_init (&xmtrRail->xmtr_freelock); -+ kcondvar_init (&xmtrRail->xmtr_freesleep); -+ INIT_LIST_HEAD (&xmtrRail->xmtr_freelist); -+ INIT_LIST_HEAD (&xmtrRail->xmtr_blocklist); -+ -+ for (i = 0; i < EP4_TXD_NUM_LISTS; i++) -+ INIT_LIST_HEAD (&xmtrRail->xmtr_retrylist[i]); -+ spin_lock_init (&xmtrRail->xmtr_retrylock); -+ -+ xmtrRail->xmtr_generic.CommsRail = commsRail; -+ xmtrRail->xmtr_generic.Xmtr = xmtr; -+ -+ xmtrRail->xmtr_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_XMTR_RAIL_MAIN), 0, &xmtrRail->xmtr_main_addr); -+ xmtrRail->xmtr_cq = elan4_alloccq (&rail->r_ctxt, EP4_XMTR_CQSIZE, CQ_EnableAllBits, CQ_Priority); -+ -+ xmtrRail->xmtr_retryops.op_func = ep4xmtr_retry; -+ xmtrRail->xmtr_retryops.op_arg = xmtrRail; -+ -+ ep4_add_retry_ops (rail, &xmtrRail->xmtr_retryops); -+ -+ ep4_register_intcookie (rail, &xmtrRail->xmtr_intcookie, xmtrRail->xmtr_main_addr, -+ poll_interrupt, xmtrRail); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ xmtr->Rails[commsRail->Rail->Number] = &xmtrRail->xmtr_generic; -+ xmtr->RailMask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ ep_procfs_xmtr_add_rail(&(xmtrRail->xmtr_generic)); -+} -+ -+void -+ep4xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]; -+ unsigned long flags; -+ -+ /* rail mask set as not usable */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->RailMask &= ~EP_RAIL2RAILMASK (rail->r_generic.Number); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ ep_procfs_xmtr_del_rail(&(xmtrRail->xmtr_generic)); -+ -+ /* wait for all txd's for this rail to become free */ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ while (xmtrRail->xmtr_freecount != xmtrRail->xmtr_totalcount) -+ { -+ xmtrRail->xmtr_freewaiting++; -+ kcondvar_wait (&xmtrRail->xmtr_freesleep, &xmtrRail->xmtr_freelock, &flags); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->Rails[commsRail->Rail->Number] = NULL; -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* all the txd's accociated with DescBlocks must be in the freelist */ -+ ASSERT (xmtrRail->xmtr_totalcount == xmtrRail->xmtr_freecount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&xmtrRail->xmtr_blocklist)) -+ free_txd_block (xmtrRail, list_entry(xmtrRail->xmtr_blocklist.next, EP4_TXD_RAIL_BLOCK , blk_link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((xmtrRail->xmtr_freecount == 0) && (xmtrRail->xmtr_totalcount == 0)); -+ -+ ep4_deregister_intcookie (rail, &xmtrRail->xmtr_intcookie); -+ -+ ep4_remove_retry_ops (rail, &xmtrRail->xmtr_retryops); -+ -+ elan4_freecq (&rail->r_ctxt, xmtrRail->xmtr_cq); -+ ep_free_main (&rail->r_generic, xmtrRail->xmtr_main_addr, sizeof (EP4_XMTR_RAIL_MAIN)); -+ -+ spin_lock_destroy (&xmtrRail->xmtr_retrylock); -+ -+ spin_lock_destroy (&xmtrRail->xmtr_freelock); -+ kcondvar_destroy (&xmtrRail->xmtr_freesleep); -+ -+ KMEM_FREE (xmtrRail, sizeof (EP4_XMTR_RAIL)); -+} -+ -+void -+ep4xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *x) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ unsigned int freeCount = 0; -+ unsigned int pollCount = 0; -+ unsigned int stalledCount = 0; -+ unsigned int retryCount = 0; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ list_for_each (el, &xmtrRail->xmtr_freelist) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]) -+ pollCount++; -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) -+ stalledCount++; -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) -+ retryCount++; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ (di->func)(di->arg, " rail=%d free=%d total=%d (%d) (retry %d,%d,%d)\n", -+ rail->r_generic.Number, xmtrRail->xmtr_freecount, xmtrRail->xmtr_totalcount, -+ freeCount, pollCount, stalledCount, retryCount); -+ (di->func)(di->arg, " cq %d flowcnt %lld,%lld\n", elan4_cq2num (xmtrRail->xmtr_cq), xmtrRail->xmtr_flowcnt, xmtrRail->xmtr_main->xmtr_flowcnt); -+} -+ -+void -+ep4xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *t) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) t; -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP4_TXD_RAIL_MAIN *txdMain = txdRail->txd_main; -+ sdramaddr_t txdElan = txdRail->txd_elan; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ char *list = ""; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ if (txdRail->txd_retry_time) -+ { -+ if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL])) -+ list = " poll"; -+ else if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED])) -+ list = " stalled"; -+ else if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY])) -+ list = " retry"; -+ else -+ list = " ERROR"; -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ (di->func)(di->arg, " Rail %d txd %p elan %lx (%x) main %p (%x) cookie <%lld%s%s%s%s> ecq %d %s\n", rail->r_generic.Number, -+ txdRail, txdRail->txd_elan, txdRail->txd_elan_addr, txdRail->txd_main, txdRail->txd_main_addr, -+ EP4_COOKIE_STRING(txdRail->txd_cookie), elan4_cq2num (txdRail->txd_ecq->ecq_cq), list); -+ -+ (di->func)(di->arg, " env %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_Params[1])), -+ txdMain->txd_env); -+ (di->func)(di->arg, " data %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_Params[1])), -+ txdMain->txd_data); -+ (di->func)(di->arg, " done %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_Params[1])), -+ txdMain->txd_done); -+} -+ -+int -+ep4xmtr_check_txd_state (EP_TXD *txd) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail; -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ unsigned long flags; -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FINISHED) -+ return 0; -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ EP4_TXD_ASSERT (txdRail, on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1); -+ -+ list_del (&txdRail->txd_retry_link); -+ txdRail->txd_retry_time = 0; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+ -+ return 1; -+} -+ -+void -+ep4xmtr_fillout_rail_stats(EP_XMTR_RAIL *xmtr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP4_XMTR_RAIL * ep4xmtr_rail = (EP4_XMTR_RAIL *) xmtr_rail; */ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/ep_procfs.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/ep_procfs.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/ep_procfs.c 2005-06-01 23:12:54.660429832 -0400 -@@ -0,0 +1,331 @@ -+ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: ep_procfs.c,v 1.5.6.3 2004/11/30 10:10:57 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/ep_procfs.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+#include "conf_linux.h" -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+struct proc_dir_entry *ep_procfs_xmtr_root; -+struct proc_dir_entry *ep_procfs_rcvr_root; -+ -+static int -+ep_proc_open (struct inode *inode, struct file *file) -+{ -+ PROC_PRIVATE *pr; -+ int pages = 4; -+ -+ if ((pr = kmalloc (sizeof (PROC_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ do { -+ pr->pr_data_len = PAGESIZE * pages; -+ -+ KMEM_ZALLOC (pr->pr_data, char *, pr->pr_data_len, 1); -+ if (pr->pr_data == NULL) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Out of Memory\n"); -+ break; -+ } -+ -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_data[0] = 0; -+ -+ pr->pr_di.func = proc_character_fill; -+ pr->pr_di.arg = (long)pr; -+ -+ if (!strcmp("debug_xmtr", file->f_dentry->d_iname)) -+ { -+ EP_XMTR *xmtr = (EP_XMTR *)(PDE(inode)->data); -+ ep_display_xmtr (&pr->pr_di, xmtr); -+ } -+ -+ if (!strcmp("debug_rcvr", file->f_dentry->d_iname)) -+ { -+ EP_RCVR *rcvr = (EP_RCVR *)(PDE(inode)->data); -+ ep_display_rcvr (&pr->pr_di, rcvr, 0); -+ } -+ -+ if (!strcmp("debug_full", file->f_dentry->d_iname)) -+ { -+ EP_RCVR *rcvr = (EP_RCVR *)(PDE(inode)->data); -+ ep_display_rcvr (&pr->pr_di, rcvr, 1); -+ } -+ -+ if ( pr->pr_len < pr->pr_data_len) -+ break; /* we managed to get all the output into the buffer */ -+ -+ pages++; -+ KMEM_FREE ( pr->pr_data, pr->pr_data_len); -+ } while (1); -+ -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+struct file_operations ep_proc_operations = -+{ -+ read: proc_read, -+ open: ep_proc_open, -+ release: proc_release, -+}; -+ -+static int -+proc_read_rcvr_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RCVR *rcvr = (EP_RCVR *)data; -+ -+ if (rcvr == NULL) -+ sprintf(page,"proc_read_rcvr_stats rcvr=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_rcvr_fillout_stats(rcvr,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_rcvr_rail_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RCVR_RAIL *rcvr_rail = (EP_RCVR_RAIL *)data; -+ -+ if (rcvr_rail == NULL) { -+ strcpy(page,"proc_read_rcvr_rail_stats rcvr_rail=NULL"); -+ } else { -+ page[0] = 0; -+ ep_rcvr_rail_fillout_stats(rcvr_rail, page); -+ EP_RCVR_OP(rcvr_rail,FillOutRailStats)(rcvr_rail,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+ep_procfs_rcvr_add(EP_RCVR *rcvr) -+{ -+ /* ep/rcvr/service_number/stats */ -+ /* ep/rcvr/service_number/debug_rcvr */ -+ /* ep/rcvr/service_number/debug_full */ -+ struct proc_dir_entry *p; -+ char str[32]; -+ -+ sprintf(str,"%d", rcvr->Service); -+ -+ rcvr->procfs_root = proc_mkdir (str, ep_procfs_rcvr_root); -+ -+ if ((p = create_proc_entry ("stats", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_rcvr_stats; -+ p->data = rcvr; -+ p->owner = THIS_MODULE; -+ } -+ -+ if ((p = create_proc_entry ("debug_rcrv", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rcvr; -+ } -+ -+ if ((p = create_proc_entry ("debug_full", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rcvr; -+ } -+} -+ -+void -+ep_procfs_rcvr_del(EP_RCVR *rcvr) -+{ -+ char str[32]; -+ sprintf(str,"%d", rcvr->Service); -+ -+ remove_proc_entry ("stats", rcvr->procfs_root); -+ remove_proc_entry ("debug_rcvr", rcvr->procfs_root); -+ remove_proc_entry ("debug_full", rcvr->procfs_root); -+ -+ remove_proc_entry (str, ep_procfs_rcvr_root); -+} -+ -+void -+ep_procfs_rcvr_add_rail(EP_RCVR_RAIL *rcvrRail) -+{ -+ /* ep/rcvr/service_number/railN/stats */ -+ -+ struct proc_dir_entry *p; -+ char str[32]; -+ sprintf(str,"rail%d",rcvrRail->CommsRail->Rail->Number); -+ -+ rcvrRail->procfs_root = proc_mkdir (str, rcvrRail->Rcvr->procfs_root); -+ -+ if ((p = create_proc_entry ("stats", 0, rcvrRail->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_rcvr_rail_stats; -+ p->data = rcvrRail; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+ep_procfs_rcvr_del_rail(EP_RCVR_RAIL *rcvrRail) -+{ -+ char str[32]; -+ sprintf(str,"rail%d",rcvrRail->CommsRail->Rail->Number); -+ -+ remove_proc_entry ("stats", rcvrRail->procfs_root); -+ -+ remove_proc_entry (str, rcvrRail->Rcvr->procfs_root); -+} -+ -+ -+ -+ -+static int -+proc_read_xmtr_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_XMTR *xmtr = (EP_XMTR *)data; -+ -+ if (xmtr == NULL) -+ strcpy(page,"proc_read_xmtr_stats xmtr=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_xmtr_fillout_stats(xmtr, page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_xmtr_rail_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_XMTR_RAIL *xmtr_rail = (EP_XMTR_RAIL *)data; -+ -+ if (xmtr_rail == NULL) -+ strcpy(page,"proc_read_xmtr_rail_stats xmtr_rail=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_xmtr_rail_fillout_stats(xmtr_rail, page); -+ EP_XMTR_OP(xmtr_rail,FillOutRailStats)(xmtr_rail,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+ep_procfs_xmtr_add(EP_XMTR *xmtr) -+{ -+ /* ep/xmtr/service_number/stats */ -+ /* ep/xmtr/service_number/debug_xmtr */ -+ struct proc_dir_entry *p; -+ char str[32]; -+ -+ sprintf(str,"%llx", (unsigned long long) (unsigned long)xmtr); -+ -+ xmtr->procfs_root = proc_mkdir (str, ep_procfs_xmtr_root); -+ -+ if ((p = create_proc_entry ("stats", 0, xmtr->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_xmtr_stats; -+ p->data = xmtr; -+ p->owner = THIS_MODULE; -+ } -+ -+ if ((p = create_proc_entry ("debug_xmtr", 0, xmtr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = xmtr; -+ } -+} -+ -+void -+ep_procfs_xmtr_del(EP_XMTR *xmtr) -+{ -+ char str[32]; -+ sprintf(str,"%llx", (unsigned long long) (unsigned long)xmtr); -+ -+ remove_proc_entry ("stats", xmtr->procfs_root); -+ remove_proc_entry ("debug_xmtr", xmtr->procfs_root); -+ -+ remove_proc_entry (str, ep_procfs_xmtr_root); -+} -+ -+void -+ep_procfs_xmtr_add_rail(EP_XMTR_RAIL *xmtrRail) -+{ -+ /* ep/xmtr/service_number/railN/stats */ -+ -+ struct proc_dir_entry *p; -+ char str[32]; -+ sprintf(str,"rail%d",xmtrRail->CommsRail->Rail->Number); -+ -+ xmtrRail->procfs_root = proc_mkdir (str, xmtrRail->Xmtr->procfs_root); -+ -+ if ((p = create_proc_entry ("stats", 0, xmtrRail->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_xmtr_rail_stats; -+ p->data = xmtrRail; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+ep_procfs_xmtr_del_rail(EP_XMTR_RAIL *xmtrRail) -+{ -+ char str[32]; -+ sprintf(str,"rail%d",xmtrRail->CommsRail->Rail->Number); -+ -+ remove_proc_entry ("stats", xmtrRail->procfs_root); -+ -+ remove_proc_entry (str, xmtrRail->Xmtr->procfs_root); -+} -+ -+void -+ep_procfs_rcvr_xmtr_init(void) -+{ -+ ep_procfs_rcvr_root = proc_mkdir ("rcvr", ep_procfs_root); -+ ep_procfs_xmtr_root = proc_mkdir ("xmtr", ep_procfs_root); -+} -+ -+void -+ep_procfs_rcvr_xmtr_fini(void) -+{ -+ remove_proc_entry ("rcvr", ep_procfs_root); -+ remove_proc_entry ("xmtr", ep_procfs_root); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kalloc.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kalloc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kalloc.c 2005-06-01 23:12:54.661429680 -0400 -@@ -0,0 +1,677 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kalloc.c,v 1.17.8.2 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kalloc.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+ -+static void -+HashInPool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ int idx0 = HASH (pool->Handle.nmh_nmd.nmd_addr); -+ int idx1 = HASH (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len); -+ -+ list_add (&pool->HashBase, &alloc->HashBase[idx0]); -+ list_add (&pool->HashTop, &alloc->HashTop[idx1]); -+} -+ -+static void -+HashOutPool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ list_del (&pool->HashBase); -+ list_del (&pool->HashTop); -+} -+ -+static EP_POOL * -+LookupPool (EP_ALLOC *alloc, EP_ADDR addr) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &alloc->HashBase[HASH(addr)]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ if (pool->Handle.nmh_nmd.nmd_addr <= addr && addr < (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len)) -+ return (pool); -+ } -+ -+ list_for_each (el, &alloc->HashTop[HASH(addr)]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashTop); -+ -+ if (pool->Handle.nmh_nmd.nmd_addr <= addr && addr < (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len)) -+ return (pool); -+ } -+ -+ return (NULL); -+} -+ -+static EP_POOL * -+AllocatePool (EP_ALLOC *alloc, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr) -+{ -+ EP_ADDR base = 0; -+ EP_POOL *pool; -+ EP_RAIL *rail; -+ int i, railmask = 0; -+ struct list_head *el; -+ -+ KMEM_ZALLOC (pool, EP_POOL *, sizeof (EP_POOL), !(attr & EP_NO_SLEEP)); -+ -+ if (pool == NULL) -+ return (NULL); -+ -+ if (addr != 0) -+ base = addr; -+ else -+ { -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ { -+ KMEM_ZALLOC (pool->Bitmaps[i - LN2_MIN_SIZE], bitmap_t *, BT_BITOUL(1 << (LN2_MAX_SIZE-i)) * sizeof (bitmap_t), !(attr & EP_NO_SLEEP)); -+ if (pool->Bitmaps[i - LN2_MIN_SIZE] == NULL) -+ goto failed; -+ } -+ -+ if ((base = ep_rmalloc (alloc->ResourceMap, size, !(attr & EP_NO_SLEEP))) == 0) -+ goto failed; -+ } -+ -+ switch (alloc->Type) -+ { -+ case EP_ALLOC_TYPE_PRIVATE_SDRAM: -+ rail = alloc->Data.Private.Rail; -+ -+ if ((pool->Buffer.Sdram = rail->Operations.SdramAlloc (rail, base, size)) == 0) -+ goto failed; -+ -+ ep_perrail_sdram_map (rail, base, pool->Buffer.Sdram, size, perm, attr); -+ -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ break; -+ -+ case EP_ALLOC_TYPE_PRIVATE_MAIN: -+ KMEM_GETPAGES(pool->Buffer.Ptr, unsigned long, btop (size), !(attr & EP_NO_SLEEP)); -+ if (pool->Buffer.Ptr == 0) -+ goto failed; -+ -+ ep_perrail_kaddr_map (alloc->Data.Private.Rail, base, pool->Buffer.Ptr, size, perm, attr); -+ -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ break; -+ -+ case EP_ALLOC_TYPE_SHARED_MAIN: -+ KMEM_GETPAGES(pool->Buffer.Ptr, unsigned long, btop (size), !(attr & EP_NO_SLEEP)); -+ if (pool->Buffer.Ptr == 0) -+ goto failed; -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ ep_perrail_kaddr_map (rail, base, pool->Buffer.Ptr, size, perm, attr); -+ -+ railmask |= (1 << rail->Number); -+ } -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ pool->Handle.nmh_nmd.nmd_attr = EP_NMD_ATTR (alloc->Data.Shared.System->Position.pos_nodeid, railmask); -+ -+ ep_nmh_insert (&alloc->Data.Shared.System->MappingTable, &pool->Handle); -+ break; -+ -+ default: -+ goto failed; -+ } -+ -+ return (pool); -+ -+ failed: -+ if (addr == 0 && base) -+ ep_rmfree (alloc->ResourceMap, size, base); -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ if (pool->Bitmaps[i - LN2_MIN_SIZE] != NULL) -+ KMEM_FREE (pool->Bitmaps[i - LN2_MIN_SIZE], BT_BITOUL(1 << (LN2_MAX_SIZE - i)) * sizeof (bitmap_t)); -+ -+ KMEM_FREE (pool, sizeof (EP_POOL)); -+ return (NULL); -+} -+ -+static void -+FreePool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ struct list_head *el; -+ int i; -+ -+ switch (alloc->Type) -+ { -+ case EP_ALLOC_TYPE_PRIVATE_SDRAM: -+ ep_perrail_unmap (alloc->Data.Private.Rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ alloc->Data.Private.Rail->Operations.SdramFree (alloc->Data.Private.Rail, pool->Buffer.Sdram, pool->Handle.nmh_nmd.nmd_len); -+ break; -+ -+ case EP_ALLOC_TYPE_PRIVATE_MAIN: -+ ep_perrail_unmap (alloc->Data.Private.Rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ KMEM_FREEPAGES (pool->Buffer.Ptr, btop (pool->Handle.nmh_nmd.nmd_len)); -+ break; -+ -+ case EP_ALLOC_TYPE_SHARED_MAIN: -+ ep_nmh_remove (&alloc->Data.Shared.System->MappingTable, &pool->Handle); -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ ep_perrail_unmap (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ } -+ -+ KMEM_FREEPAGES (pool->Buffer.Ptr, btop (pool->Handle.nmh_nmd.nmd_len)); -+ break; -+ } -+ -+ if (pool->Bitmaps[0]) -+ { -+ ep_rmfree (alloc->ResourceMap, pool->Handle.nmh_nmd.nmd_len, pool->Handle.nmh_nmd.nmd_addr); -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ KMEM_FREE (pool->Bitmaps[i - LN2_MIN_SIZE], BT_BITOUL(1 << (LN2_MAX_SIZE - i)) * sizeof (bitmap_t)); -+ } -+ -+ KMEM_FREE (pool, sizeof (EP_POOL)); -+} -+ -+static int -+AddRail (EP_ALLOC *alloc, EP_RAIL *rail) -+{ -+ struct list_head *el; -+ EP_RAIL_ENTRY *l; -+ unsigned long flags; -+ int i; -+ -+ ASSERT (alloc->Type == EP_ALLOC_TYPE_SHARED_MAIN); -+ -+ KMEM_ZALLOC (l, EP_RAIL_ENTRY *, sizeof (EP_RAIL_ENTRY), 1); -+ -+ if (l == NULL) -+ return (ENOMEM); -+ -+ l->Rail = rail; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_perrail_kaddr_map (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Buffer.Ptr, -+ pool->Handle.nmh_nmd.nmd_len, EP_PERM_WRITE, EP_NO_SLEEP); -+ -+ pool->Handle.nmh_nmd.nmd_attr |= EP_NMD_ATTR (0, 1 << rail->Number); -+ } -+ } -+ -+ list_add (&l->Link, &alloc->Data.Shared.Rails); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ return (0); -+} -+ -+static void -+RemoveRail (EP_ALLOC *alloc, EP_RAIL *rail) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_perrail_unmap (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ pool->Handle.nmh_nmd.nmd_attr &= ~EP_NMD_ATTR (0, 1 << rail->Number); -+ } -+ } -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL_ENTRY *tmp = list_entry (el, EP_RAIL_ENTRY, Link); -+ if (tmp->Rail == rail) -+ { -+ list_del (el); -+ KMEM_FREE(tmp, sizeof (EP_RAIL_ENTRY)); -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+} -+ -+static EP_POOL * -+AllocateBlock (EP_ALLOC *alloc, unsigned size, EP_ATTRIBUTE attr, int *offset) -+{ -+ int block, j, k; -+ unsigned long flags; -+ EP_POOL *pool; -+ -+ -+ if (size > MAX_SIZE) -+ { -+ if ((attr & EP_NO_ALLOC) || (pool = AllocatePool (alloc, 0, size, alloc->Perm, attr)) == NULL) -+ return (NULL); -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ HashInPool (alloc, pool); -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ *offset = 0; -+ -+ return pool; -+ } -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ /* Round up size to next power of 2 */ -+ for (k = LN2_MIN_SIZE; (1 << k) < size; k++) -+ ; -+ -+ /* k now has ln2 of the size to allocate. */ -+ /* find the free list with the smallest block we can use*/ -+ for (j = k; j <= LN2_MAX_SIZE && list_empty (&alloc->Freelists[j - LN2_MIN_SIZE]); j++) -+ ; -+ -+ /* j has ln2 of the smallest size block we can use */ -+ if (j < LN2_MAX_SIZE) -+ { -+ int nbits = 1 << (LN2_MAX_SIZE-j); -+ -+ pool = list_entry (alloc->Freelists[j - LN2_MIN_SIZE].next, EP_POOL, Link[j - LN2_MIN_SIZE]); -+ block = (bt_lowbit (pool->Bitmaps[j - LN2_MIN_SIZE], nbits) << j); -+ -+ BT_CLEAR (pool->Bitmaps[j - LN2_MIN_SIZE], block >> j); -+ -+ if (bt_lowbit (pool->Bitmaps[j - LN2_MIN_SIZE], nbits) == -1) -+ list_del (&pool->Link[j - LN2_MIN_SIZE]); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ if ((attr & EP_NO_ALLOC) || (pool = AllocatePool (alloc, 0, MAX_SIZE, alloc->Perm, attr)) == NULL) -+ return (NULL); -+ -+ block = 0; -+ j = LN2_MAX_SIZE; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ HashInPool (alloc, pool); -+ } -+ -+ /* Split it until the buddies are the correct size, putting one -+ * buddy back on the free list and continuing to split the other */ -+ while (--j >= k) -+ { -+ list_add (&pool->Link[j - LN2_MIN_SIZE], &alloc->Freelists[j - LN2_MIN_SIZE]); -+ -+ BT_SET (pool->Bitmaps[j - LN2_MIN_SIZE], block >> j); -+ -+ block += (1 << j); -+ } -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ *offset = block; -+ -+ return (pool); -+} -+ -+static void -+FreeBlock (EP_ALLOC *alloc, EP_ADDR addr, unsigned size) -+{ -+ EP_POOL *pool; -+ int k, block = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ /* Round up size to next power of 2 */ -+ for (k = LN2_MIN_SIZE; (1 << k) < size; k++) -+ ; -+ -+ /* Find the pool containing this block */ -+ pool = LookupPool (alloc, addr); -+ -+ /* It must exist */ -+ ASSERT (pool != NULL); -+ -+ /* If we're freeing a subset of it, then update the bitmaps */ -+ if (size <= MAX_SIZE) -+ { -+ ASSERT (BT_TEST (pool->Bitmaps[k - LN2_MIN_SIZE], (addr - pool->Handle.nmh_nmd.nmd_addr) >> k) == 0); -+ -+ block = addr - pool->Handle.nmh_nmd.nmd_addr; -+ -+ while (k < LN2_MAX_SIZE && BT_TEST (pool->Bitmaps[k - LN2_MIN_SIZE], (block >> k) ^ 1)) -+ { -+ BT_CLEAR (pool->Bitmaps[k - LN2_MIN_SIZE], (block >> k) ^ 1); -+ -+ if (bt_lowbit (pool->Bitmaps[k - LN2_MIN_SIZE], (1 << (LN2_MAX_SIZE - k))) == -1) -+ list_del (&pool->Link[k - LN2_MIN_SIZE]); -+ -+ k++; -+ } -+ } -+ -+ if (k >= LN2_MAX_SIZE) -+ { -+ HashOutPool (alloc, pool); -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ FreePool (alloc, pool); -+ } -+ else -+ { -+ if (bt_lowbit (pool->Bitmaps[k - LN2_MIN_SIZE], (1 << (LN2_MAX_SIZE - k))) == -1) -+ list_add (&pool->Link[k - LN2_MIN_SIZE], &alloc->Freelists[k - LN2_MIN_SIZE]); -+ -+ BT_SET (pool->Bitmaps[k - LN2_MIN_SIZE], block >> k); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ } -+} -+ -+static void -+InitialiseAllocator (EP_ALLOC *alloc, EP_ALLOC_TYPE type, unsigned int perm, EP_RMAP *rmap) -+{ -+ int i; -+ -+ spin_lock_init (&alloc->Lock); -+ -+ alloc->Type = type; -+ alloc->ResourceMap = rmap; -+ alloc->Perm = perm; -+ -+ for (i = 0; i < NHASH; i++) -+ { -+ (&alloc->HashBase[i])->next = &alloc->HashBase[i]; -+ -+ INIT_LIST_HEAD (&alloc->HashBase[i]); -+ INIT_LIST_HEAD (&alloc->HashTop[i]); -+ } -+ -+ for (i = 0; i < NUM_FREELISTS; i++) -+ INIT_LIST_HEAD (&alloc->Freelists[i]); -+} -+ -+static void -+DestroyAllocator (EP_ALLOC *alloc) -+{ -+ struct list_head *el, *next; -+ int i; -+ -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each_safe (el, next, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ printk ("!!DestroyAllocator: pool=%p type=%d addr=%x len=%x\n", pool, alloc->Type, -+ pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ list_del (&pool->HashBase); -+ list_del (&pool->HashTop); -+ -+ // XXXX: FreePool (alloc, pool); -+ } -+ } -+ -+ spin_lock_destroy (&alloc->Lock); -+} -+ -+void -+ep_display_alloc (EP_ALLOC *alloc) -+{ -+ struct list_head *el; -+ int i; -+ int npools = 0; -+ int nbytes = 0; -+ int nfree = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ ep_debugf (DBG_DEBUG, "Kernel comms memory allocator %p type %d\n", alloc, alloc->Type); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_debugf (DBG_DEBUG, " POOL %4x: %p -> %x.%x\n", i, pool, pool->Handle.nmh_nmd.nmd_addr, -+ pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len); -+ -+ npools++; -+ nbytes += pool->Handle.nmh_nmd.nmd_len; -+ } -+ } -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i++) -+ { -+ int n = 0; -+ -+ list_for_each (el, &alloc->Freelists[i - LN2_MIN_SIZE]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, Link[i - LN2_MIN_SIZE]); -+ int nbits = bt_nbits (pool->Bitmaps[i - LN2_MIN_SIZE], 1 << (LN2_MAX_SIZE - i)); -+ -+ n += nbits; -+ nfree += (nbits << i); -+ } -+ -+ if (n != 0) -+ ep_debugf (DBG_DEBUG, " SIZE %5d : num %d\n", (1 << i), n); -+ } -+ ep_debugf (DBG_DEBUG, "%d pools with %d bytes and %d bytes free\n", npools, nbytes, nfree); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+} -+ -+/* per-rail allocators */ -+void -+ep_alloc_init (EP_RAIL *rail) -+{ -+ EP_RMAP *rmap = ep_rmallocmap (EP_PRIVATE_RMAP_SIZE, "PrivateMap", 1); -+ -+ ep_rmfree (rmap, EP_PRIVATE_TOP-EP_PRIVATE_BASE, EP_PRIVATE_BASE); -+ -+ InitialiseAllocator (&rail->ElanAllocator, EP_ALLOC_TYPE_PRIVATE_SDRAM, EP_PERM_ALL, rmap); -+ InitialiseAllocator (&rail->MainAllocator, EP_ALLOC_TYPE_PRIVATE_MAIN, EP_PERM_WRITE, rmap); -+ -+ rail->ElanAllocator.Data.Private.Rail = rail; -+ rail->MainAllocator.Data.Private.Rail = rail; -+} -+ -+void -+ep_alloc_fini (EP_RAIL *rail) -+{ -+ EP_RMAP *rmap = rail->ElanAllocator.ResourceMap; -+ -+ DestroyAllocator (&rail->ElanAllocator); -+ DestroyAllocator (&rail->MainAllocator); -+ -+ ep_rmfreemap (rmap); -+} -+ -+sdramaddr_t -+ep_alloc_memory_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr) -+{ -+ EP_POOL *pool = AllocatePool (&rail->ElanAllocator, addr, size, perm, attr); -+ unsigned long flags; -+ -+ if (pool == NULL) -+ return (0); -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ HashInPool (&rail->ElanAllocator, pool); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ return (pool->Buffer.Sdram); -+} -+ -+void -+ep_free_memory_elan (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ pool = LookupPool (&rail->ElanAllocator, addr); -+ -+ HashOutPool (&rail->ElanAllocator, pool); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ FreePool (&rail->ElanAllocator, pool); -+} -+ -+sdramaddr_t -+ep_alloc_elan (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&rail->ElanAllocator, size, attr, &offset)) == NULL) -+ return (0); -+ -+ *addrp = pool->Handle.nmh_nmd.nmd_addr + offset; -+ -+ return (pool->Buffer.Sdram + offset); -+} -+ -+void -+ep_free_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size) -+{ -+ FreeBlock (&rail->ElanAllocator, addr, size); -+} -+ -+void * -+ep_alloc_main (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&rail->MainAllocator, size, attr, &offset)) == NULL) -+ return (NULL); -+ -+ *addrp = pool->Handle.nmh_nmd.nmd_addr + offset; -+ -+ return ((void *) ((unsigned long) pool->Buffer.Ptr + offset)); -+} -+ -+void -+ep_free_main (EP_RAIL *rail, EP_ADDR addr, unsigned size) -+{ -+ FreeBlock (&rail->MainAllocator, addr, size); -+} -+ -+sdramaddr_t -+ep_elan2sdram (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ sdramaddr_t res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ if ((pool = LookupPool (&rail->ElanAllocator, addr)) == NULL) -+ res = 0; -+ else -+ res = pool->Buffer.Sdram + (addr - pool->Handle.nmh_nmd.nmd_addr); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ return (res); -+} -+ -+void * -+ep_elan2main (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ void *res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->MainAllocator.Lock, flags); -+ if ((pool = LookupPool (&rail->MainAllocator, addr)) == NULL) -+ res = NULL; -+ else -+ res = (void *) ((unsigned long) pool->Buffer.Ptr + (addr - pool->Handle.nmh_nmd.nmd_addr)); -+ spin_unlock_irqrestore (&rail->MainAllocator.Lock, flags); -+ -+ return (res); -+} -+ -+/* shared allocators */ -+int -+ep_shared_alloc_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ return (AddRail (&sys->Allocator, rail)); -+} -+ -+void -+ep_shared_alloc_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ RemoveRail (&sys->Allocator, rail); -+} -+ -+void -+ep_shared_alloc_init (EP_SYS *sys) -+{ -+ EP_RMAP *rmap = ep_rmallocmap (EP_SHARED_RMAP_SIZE, "shared_alloc_map", 1); -+ -+ ep_rmfree (rmap, EP_SHARED_TOP - EP_SHARED_BASE, EP_SHARED_BASE); -+ -+ InitialiseAllocator (&sys->Allocator, EP_ALLOC_TYPE_SHARED_MAIN, EP_PERM_WRITE, rmap); -+ -+ INIT_LIST_HEAD (&sys->Allocator.Data.Shared.Rails); -+ -+ sys->Allocator.Data.Shared.System = sys; -+} -+ -+void -+ep_shared_alloc_fini (EP_SYS *sys) -+{ -+ EP_RMAP *rmap = sys->Allocator.ResourceMap; -+ -+ DestroyAllocator (&sys->Allocator); -+ -+ ep_rmfreemap (rmap); -+} -+ -+void * -+ep_shared_alloc_main (EP_SYS *sys, unsigned size, EP_ATTRIBUTE attr, EP_NMD *nmd) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&sys->Allocator, size, attr, &offset)) == NULL) -+ return (NULL); -+ -+ ep_nmd_subset (nmd, &pool->Handle.nmh_nmd, offset, size); -+ -+ return ((void *) ((unsigned long) pool->Buffer.Ptr + offset)); -+} -+ -+void -+ep_shared_free_main (EP_SYS *sys, EP_NMD *nmd) -+{ -+ FreeBlock (&sys->Allocator, nmd->nmd_addr, nmd->nmd_len); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kcomm.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kcomm.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kcomm.c 2005-06-01 23:12:54.664429224 -0400 -@@ -0,0 +1,1448 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm.c,v 1.50.2.9 2004/12/09 10:02:42 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+ -+int MaxSwitchLevels = 5; /* Max 1024 sized machine */ -+ -+static char *NodeStateNames[EP_NODE_NUM_STATES] = -+{ -+ "Disconnected", -+ "Connecting", -+ "Connnected", -+ "LeavingConnected", -+ "LocalPassivate", -+ "RemotePassivate", -+ "Passivated", -+ "Disconnecting", -+}; -+ -+static void -+ep_xid_cache_fill (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->XidLock, flags); -+ -+ cache->Current = sys->XidNext; -+ cache->Last = cache->Current + EP_XID_CACHE_CHUNKS-1; -+ -+ sys->XidNext += EP_XID_CACHE_CHUNKS; -+ -+ spin_unlock_irqrestore (&sys->XidLock, flags); -+} -+ -+EP_XID -+ep_xid_cache_alloc (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ EP_XID xid; -+ -+ if (cache->Current == cache->Last) -+ ep_xid_cache_fill (sys, cache); -+ -+ xid.Generation = sys->XidGeneration; -+ xid.Handle = cache->Handle; -+ xid.Unique = cache->Current++; -+ -+ return (xid); -+} -+ -+void -+ep_xid_cache_init (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ /* Stall manager thread - it doesn't lock the XidCacheList */ -+ ep_kthread_stall (&sys->ManagerThread); -+ -+ cache->Handle = ++sys->XidHandle; -+ -+ list_add_tail (&cache->Link, &sys->XidCacheList); -+ -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+void -+ep_xid_cache_destroy (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ /* Stall manager thread - it doesn't lock the XidCacheList */ -+ ep_kthread_stall (&sys->ManagerThread); -+ -+ list_del (&cache->Link); -+ -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+EP_XID_CACHE * -+ep_xid_cache_find (EP_SYS *sys, EP_XID xid) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &sys->XidCacheList) { -+ EP_XID_CACHE *cache = list_entry (el, EP_XID_CACHE, Link); -+ -+ if (sys->XidGeneration == xid.Generation && cache->Handle == xid.Handle) -+ return (cache); -+ } -+ -+ return (NULL); -+} -+ -+static int -+MsgBusy (EP_RAIL *rail, EP_OUTPUTQ *outputq, int slotNum) -+{ -+ switch (rail->Operations.OutputQState (rail, outputq, slotNum)) -+ { -+ case EP_OUTPUTQ_BUSY: /* still busy */ -+ return 1; -+ -+ case EP_OUTPUTQ_FAILED: /* NACKed */ -+ { -+#if defined(DEBUG_PRINTF) -+ EP_MANAGER_MSG *msg = rail->Operations.OutputQMsg (rail, outputq, slotNum); -+ -+ EPRINTF4 (DBG_MANAGER, "%s: kcomm msg %d type %d to %d failed\n", rail->Name, slotNum, msg->Hdr.Type, msg->Hdr.DestId); -+#endif -+ break; -+ } -+ -+ case EP_OUTPUTQ_FINISHED: /* anything else is finished */ -+ break; -+ } -+ -+ return 0; -+} -+ -+int -+ep_send_message (EP_RAIL *rail, int nodeId, int type, EP_XID xid, EP_MANAGER_MSG_BODY *body) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ int n = EP_MANAGER_OUTPUTQ_SLOTS; -+ int slotNum; -+ int rnum; -+ EP_RAIL *msgRail; -+ EP_MANAGER_MSG *msg; -+ unsigned long flags; -+ -+ ASSERT (! EP_XID_INVALID (xid)); -+ -+ if ((rnum = ep_pickRail (node->ConnectedRails)) >= 0) -+ msgRail = sys->Rails[rnum]; -+ else -+ { -+ if (EP_MANAGER_MSG_TYPE_CONNECTED(type)) -+ { -+ ep_debugf (DBG_MANAGER, "%s: no rails available, trying to send type %d to %d\n", rail->Name, type, nodeId); -+ return -EHOSTDOWN; -+ } -+ -+ ep_debugf (DBG_MANAGER, "%s: no rails connected to %d - using receiving rail\n", rail->Name, nodeId); -+ -+ msgRail = rail; -+ } -+ -+ -+ spin_lock_irqsave (&msgRail->ManagerOutputQLock, flags); -+ -+ slotNum = msgRail->ManagerOutputQNextSlot; -+ -+ while (n-- > 0 && MsgBusy (msgRail, msgRail->ManagerOutputQ, slotNum)) /* search for idle message buffer */ -+ { -+ if (++(msgRail->ManagerOutputQNextSlot) == EP_MANAGER_OUTPUTQ_SLOTS) -+ msgRail->ManagerOutputQNextSlot = 0; -+ -+ slotNum = msgRail->ManagerOutputQNextSlot; -+ } -+ -+ if (n == 0) /* all message buffers busy */ -+ { -+ spin_unlock_irqrestore (&msgRail->ManagerOutputQLock, flags); -+ -+ ep_debugf (DBG_MANAGER, "%s: all message buffers busy: trying to send type %d to %d\n", msgRail->Name, type, nodeId); -+ return -EBUSY; -+ } -+ -+ msg = msgRail->Operations.OutputQMsg (msgRail, msgRail->ManagerOutputQ, slotNum); -+ -+ EPRINTF7 (DBG_MANAGER, "%s: ep_send_message: type=%d nodeId=%d rail=%d xid=%08x.%08x.%016llx\n", -+ msgRail->Name, type, nodeId, rail->Number, xid.Generation, xid.Handle, (long long) xid.Unique); -+ -+ msg->Hdr.Version = EP_MANAGER_MSG_VERSION; -+ msg->Hdr.Type = type; -+ msg->Hdr.Rail = rail->Number; -+ msg->Hdr.NodeId = msgRail->Position.pos_nodeid; -+ msg->Hdr.DestId = nodeId; -+ msg->Hdr.Xid = xid; -+ msg->Hdr.Checksum = 0; -+ -+ if (body) bcopy (body, &msg->Body, sizeof (EP_MANAGER_MSG_BODY)); -+ -+ msg->Hdr.Checksum = CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE); -+ -+ if (rail->Operations.OutputQSend (msgRail, msgRail->ManagerOutputQ, slotNum, EP_MANAGER_MSG_SIZE, -+ nodeId, EP_SYSTEMQ_MANAGER, EP_MANAGER_OUTPUTQ_RETRIES) < 0) -+ IncrStat (msgRail, SendMessageFailed); -+ -+ if (++(msgRail->ManagerOutputQNextSlot) == EP_MANAGER_OUTPUTQ_SLOTS) /* check this one last next time */ -+ msgRail->ManagerOutputQNextSlot = 0; -+ -+ spin_unlock_irqrestore (&msgRail->ManagerOutputQLock, flags); -+ -+ return 0; -+} -+ -+void -+ep_panic_node (EP_SYS *sys, int nodeId, unsigned char *reason) -+{ -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ EP_MANAGER_MSG_BODY body; -+ EP_XID xid; -+ kcondvar_t sleep; -+ int rnum; -+ unsigned long flags; -+ -+ if (nodeId > sys->Position.pos_nodes) -+ return; -+ -+ strncpy (body.PanicReason, reason, sizeof (body.PanicReason)); -+ -+ kcondvar_init (&sleep); -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ for (;;) -+ { -+ if (node->ConnectedRails == 0) -+ break; -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (node->ConnectedRails & (1 << rnum)) -+ break; -+ -+ xid = ep_xid_cache_alloc(sys, &sys->Rails[rnum]->XidCache); -+ -+ if (ep_send_message (sys->Rails[rnum], nodeId, EP_MANAGER_MSG_TYPE_REMOTE_PANIC, xid, &body) == 0) -+ break; -+ -+ if (kcondvar_timedwaitsig (&sleep, &sys->NodeLock, &flags, lbolt + hz) == CV_RET_SIGPENDING) -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ kcondvar_destroy (&sleep); -+} -+ -+static void -+ProcessNeterrRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: process neterr request - node %d cookies %llx %llx\n", rail->Name, msg->Hdr.NodeId, msg->Body.Cookies[0], msg->Body.Cookies[1]); -+ -+ rail->Operations.NeterrFixup (rail, msg->Hdr.NodeId, msg->Body.Cookies); -+ -+ ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_NETERR_RESPONSE, msg->Hdr.Xid, &msg->Body); -+} -+ -+ -+static void -+ProcessNeterrResponse (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ unsigned long flags; -+ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: process neterr response - node %d cookies %llx %llx\n", rail->Name, msg->Hdr.NodeId, msg->Body.Cookies[0], msg->Body.Cookies[1]); -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (EP_XIDS_MATCH (nodeRail->MsgXid, msg->Hdr.Xid)) -+ { -+ EP_INVALIDATE_XID (nodeRail->MsgXid); -+ -+ if (nodeRail->NetworkErrorCookies[0] != 0 && nodeRail->NetworkErrorCookies[0] == msg->Body.Cookies[0]) -+ nodeRail->NetworkErrorCookies[0] = 0; -+ -+ if (nodeRail->NetworkErrorCookies[1] != 0 && nodeRail->NetworkErrorCookies[1] == msg->Body.Cookies[1]) -+ nodeRail->NetworkErrorCookies[1] = 0; -+ -+ if (nodeRail->NetworkErrorCookies[0] == 0 && nodeRail->NetworkErrorCookies[1] == 0) -+ nodeRail->NetworkErrorState &= ~EP_NODE_NETERR_ATOMIC_PACKET; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+ -+static void -+ProcessGetNodeState (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ unsigned int service = msg->Body.Service; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessGetNodeState: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState ? " (NetworkError)" : ""); -+ -+ msg->Body.NodeState.State = nodeRail->State; -+ msg->Body.NodeState.NetworkErrorState = nodeRail->NetworkErrorState; -+ msg->Body.NodeState.Railmask = ep_rcvr_railmask (rail->System, service); -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE, msg->Hdr.Xid, &msg->Body) < 0) -+ printk ("%s: get node state for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+} -+ -+static void -+ProcessFlushRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessFlushRequest: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState ? " (NetworkError)" : ""); -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_REMOTE_PASSIVATE: -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; /* retransmit our flush request quickly */ -+ EPRINTF3 (DBG_MANAGER, "%s: ProcessFlushRequest: NextRunTime -> %lx (%lx)\n", rail->Name, nodeRail->NextRunTime, lbolt); -+ /* DROPTHROUGH */ -+ -+ case EP_NODE_PASSIVATED: -+ case EP_NODE_DISCONNECTED: -+ if (nodeRail->NetworkErrorState != 0) -+ break; -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE, msg->Hdr.Xid, NULL) < 0) -+ printk ("%s: flush request for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+ break; -+ -+ default: -+ EPRINTF4 (DBG_MANAGER, "%s: flush request for %s[%d] - node not in approriate state - %s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, NodeStateNames[nodeRail->State]); -+ break; -+ } -+} -+ -+static void -+ProcessFlushResponse (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail= &rail->Nodes[msg->Hdr.NodeId]; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessFlushResponse: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], EP_XIDS_MATCH (nodeRail->MsgXid, msg->Hdr.Xid) ? " (XIDS match)" : ""); -+ -+ if (nodeRail->State == EP_NODE_REMOTE_PASSIVATE && EP_XIDS_MATCH(nodeRail->MsgXid, msg->Hdr.Xid)) -+ { -+ EP_INVALIDATE_XID (nodeRail->MsgXid); -+ -+ printk ("%s: flush response from %d - move to passivated list\n", rail->Name, msg->Hdr.NodeId); -+ list_del (&nodeRail->Link); -+ -+ /* Node is now passivated - attempt to failover messages */ -+ list_add_tail (&nodeRail->Link, &rail->PassivatedList); -+ nodeRail->State = EP_NODE_PASSIVATED; -+ } -+ else -+ { -+ printk ("%s: flush response from %d - not passivating (%s) or XIDs mismatch (%llx %llx)\n", rail->Name, -+ msg->Hdr.NodeId, NodeStateNames[nodeRail->State], (long long) nodeRail->MsgXid.Unique, (long long) msg->Hdr.Xid.Unique); -+ } -+} -+ -+static void -+ProcessMapNmdRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_SYS *sys = rail->System; -+ EP_MAP_NMD_BODY *msgBody = &msg->Body.MapNmd; -+ int i; -+ -+ EPRINTF4 (DBG_MANAGER, "%s: Map NMD request from %d for %d NMDs to railmask %x\n", rail->Name, msg->Hdr.NodeId, msgBody->nFrags, msgBody->Railmask); -+ -+ for (i = 0; i < msgBody->nFrags; i++) -+ ep_nmd_map_rails (sys, &msgBody->Nmd[i], msgBody->Railmask); -+ -+ /* Must flush TLBs before responding */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (sys->Rails[i] && sys->Rails[i]->TlbFlushRequired) -+ ep_perrail_dvma_sync (sys->Rails[i]); -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE, msg->Hdr.Xid, &msg->Body) < 0) -+ printk ("%s: map nmd request for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+} -+ -+static void -+ProcessXidMessage (EP_RAIL *msgRail, EP_MANAGER_MSG *msg, EP_XID xid) -+{ -+ EP_XID_CACHE *xidCache = ep_xid_cache_find (msgRail->System, xid); -+ -+ EPRINTF6 (DBG_MANAGER, "%s: ProcessXidMessage: XID=%08x.%0x8.%016llx -> %p(%p)\n", -+ msgRail->Name, xid.Generation, xid.Handle, (long long) xid.Unique, -+ xidCache ? xidCache->MessageHandler : 0, xidCache ? xidCache->Arg : 0); -+ -+ if (xidCache != NULL) -+ xidCache->MessageHandler (xidCache->Arg, msg); -+} -+ -+static void -+ProcessMessage (EP_RAIL *msgRail, void *arg, void *msgbuf) -+{ -+ EP_SYS *sys = msgRail->System; -+ EP_MANAGER_MSG *msg = (EP_MANAGER_MSG *) msgbuf; -+ uint16_t csum = msg->Hdr.Checksum; -+ EP_RAIL *rail; -+ -+ if (msg->Hdr.Version != EP_MANAGER_MSG_VERSION) -+ return; -+ -+ msg->Hdr.Checksum= 0; -+ if (CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE) != csum) -+ { -+ printk ("%s: checksum failed on msg from %d (%d) (%x != %x) ?\n", msgRail->Name, msg->Hdr.NodeId, msg->Hdr.Type, csum, CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE)); -+ return; -+ } -+ -+ if ((rail = sys->Rails[msg->Hdr.Rail]) == NULL) -+ { -+ printk ("%s: rail no longer exists for msg from %d?\n", msgRail->Name, msg->Hdr.NodeId); -+ return; -+ } -+ -+ EPRINTF7 (DBG_MANAGER, "%s: ProcessMessage (%s) type=%d node=%d XID=%08x.%0x8.%016llx\n", -+ msgRail->Name, rail->Name, msg->Hdr.Type, msg->Hdr.NodeId, -+ msg->Hdr.Xid.Generation, msg->Hdr.Xid.Handle, msg->Hdr.Xid.Unique); -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_REMOTE_PANIC: -+ msg->Body.PanicReason[EP_PANIC_STRLEN] = '\0'; /* ensure string terminated */ -+ -+ printk ("%s: remote panic call from elan node %d - %s\n", msgRail->Name, msg->Hdr.NodeId, msg->Body.PanicReason); -+ panic ("ep: remote panic request\n"); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_NETERR_REQUEST: -+ ProcessNeterrRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_NETERR_RESPONSE: -+ ProcessNeterrResponse (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FLUSH_REQUEST: -+ ProcessFlushRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE: -+ ProcessFlushResponse (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST: -+ ProcessMapNmdRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST: -+ ProcessXidMessage (msgRail, msg, msg->Body.Failover.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE: -+ ProcessGetNodeState (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ default: -+ printk ("%s: Unknown message type %d from %d\n", msgRail->Name, msg->Hdr.Type, msg->Hdr.NodeId); -+ break; -+ } -+} -+ -+ -+static void -+ManagerQueueEvent (EP_RAIL *rail, void *arg) -+{ -+ ep_kthread_schedule ((EP_KTHREAD *) arg, lbolt); -+} -+ -+void -+UpdateConnectionState (EP_RAIL *rail, statemap_t *map) -+{ -+ EP_SYS *sys = rail->System; -+ bitmap_t seg; -+ int offset, nodeId; -+ unsigned long flags; -+ -+ while ((offset = statemap_findchange (map, &seg, 1)) >= 0) -+ { -+ for (nodeId = offset; nodeId < (offset + BT_NBIPUL) && nodeId < rail->Position.pos_nodes; nodeId++) -+ { -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[nodeId]; -+ -+ if (statemap_getbits (map, nodeId, 1)) -+ { -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_DISCONNECTED: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Disconnected \n", rail->Name, nodeId); -+ break; -+ -+ case EP_NODE_CONNECTING: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Connect\n", rail->Name, nodeId); -+ -+ /* load the route table entry *before* setting the state -+ * to connected, since DMA's can be initiated as soon as -+ * the node is marked as connected */ -+ rail->Operations.LoadNodeRoute (rail, nodeId); -+ -+ nodeRail->State = EP_NODE_CONNECTED; -+ -+ statemap_setbits (rail->NodeSet, nodeId, 1, 1); -+ if (statemap_getbits (sys->NodeSet, nodeId, 1) == 0) -+ statemap_setbits (sys->NodeSet, nodeId, 1, 1); -+ -+ /* Add to rails connected to this node */ -+ node->ConnectedRails |= (1 << rail->Number); -+ -+ /* Finally lower the per-node context filter */ -+ rail->Operations.LowerFilter (rail, nodeId); -+ break; -+ -+ case EP_NODE_LEAVING_CONNECTED: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Local Passivate\n", rail->Name, nodeId); -+ -+ /* Raise the per-node context filter */ -+ rail->Operations.RaiseFilter (rail, nodeId); -+ -+ /* If it's resolving network errors it will be on the NodeNeterrList, -+ * remove if from this list before placing it on the LocalPassivateList -+ * as we'll resolve the network error later in RemotePassivate */ -+ if (nodeRail->NetworkErrorState) -+ list_del (&nodeRail->Link); -+ -+ list_add_tail (&nodeRail->Link, &rail->LocalPassivateList); -+ nodeRail->State = EP_NODE_LOCAL_PASSIVATE; -+ -+ /* Remove from rails connected to this node */ -+ node->ConnectedRails &= ~(1 << rail->Number); -+ break; -+ -+ default: -+ printk ("%s: Node %d - in NodeChangeMap with state %d\n", rail->Name, nodeId, nodeRail->State); -+ panic ("Node in NodeChangeMap with invalid state\n"); -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ } -+ } -+ } -+} -+ -+void -+ProgressNetworkError (EP_RAIL *rail, EP_NODE_RAIL *nodeRail) -+{ -+ EP_SYS *sys = rail->System; -+ int nodeId = nodeRail - rail->Nodes; -+ EP_MANAGER_MSG_BODY msg; -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_REMOTE_PASSIVATE); -+ -+ if (BEFORE (lbolt, nodeRail->NextRunTime)) -+ return; -+ -+ if (nodeRail->NetworkErrorState & EP_NODE_NETERR_DMA_PACKET) -+ nodeRail->NetworkErrorState &= ~EP_NODE_NETERR_DMA_PACKET; -+ -+ if (nodeRail->NetworkErrorState & EP_NODE_NETERR_ATOMIC_PACKET) -+ { -+ if (EP_XID_INVALID (nodeRail->MsgXid)) -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ msg.Cookies[0] = nodeRail->NetworkErrorCookies[0]; -+ msg.Cookies[1] = nodeRail->NetworkErrorCookies[1]; -+ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: progress neterr - node %d cookies %llx %llx\n", rail->Name, nodeId, msg.Cookies[0], msg.Cookies[1]); -+ -+ if (ep_send_message (rail, nodeId, EP_MANAGER_MSG_TYPE_NETERR_REQUEST, nodeRail->MsgXid, &msg) == 0) -+ nodeRail->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+} -+ -+long -+ProgressNodeLists (EP_RAIL *rail, long nextRunTime) -+{ -+ EP_SYS *sys = rail->System; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ list_for_each_safe (el, nel, &rail->NetworkErrorList) { -+ EP_NODE_RAIL *nodeRail = list_entry (el, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ -+ ProgressNetworkError (rail, nodeRail); -+ -+ if (nodeRail->NetworkErrorState == 0) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: lower context filter for node %d due to network error\n", rail->Name, nodeId); -+ printk ("%s: lower context filter for node %d due to network error\n", rail->Name, nodeId); -+ -+ rail->Operations.LowerFilter (rail, nodeId); -+ -+ list_del (&nodeRail->Link); -+ continue; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (! list_empty (&rail->LocalPassivateList)) -+ { -+ EPRINTF1 (DBG_MANAGER, "%s: Locally Passivating Nodes\n", rail->Name); -+ -+ /* We have disconnected from some nodes or have left ourselves -+ * flush through all communications and determine whether we -+ * need to perform rail failover */ -+ rail->Operations.FlushFilters (rail); -+ -+ ep_call_callbacks (rail, EP_CB_FLUSH_FILTERING, rail->NodeSet); -+ -+ rail->Operations.FlushQueues (rail); -+ -+ ep_call_callbacks (rail, EP_CB_FLUSH_FLUSHING, rail->NodeSet); -+ -+ while (! list_empty (&rail->LocalPassivateList)) -+ { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->LocalPassivateList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ -+ list_del (&nodeRail->Link); -+ -+ rail->Operations.UnloadNodeRoute (rail, nodeId); -+ -+ if (nodeRail->NetworkErrorState == 0 && nodeRail->MessageState == 0) -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Disconnecting\n", rail->Name, nodeId); -+ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ } -+ else -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Remote Passivate\n", rail->Name, nodeId); -+ -+ list_add_tail (&nodeRail->Link, &rail->RemotePassivateList); -+ nodeRail->State = EP_NODE_REMOTE_PASSIVATE; -+ -+ if (nodeRail->NetworkErrorState == 0) -+ nodeRail->NextRunTime = lbolt; -+ } -+ } -+ -+ ep_call_callbacks (rail, EP_CB_PASSIVATED, rail->NodeSet); -+ } -+ -+ list_for_each_safe (el, nel, &rail->RemotePassivateList) { -+ EP_NODE_RAIL *nodeRail = list_entry (el, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ if (node->ConnectedRails == 0) /* no rails connected to this node (anymore) */ -+ { -+ /* Remove from this list */ -+ list_del (&nodeRail->Link); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no rails, Remote Passivate -> Disconnecting\n", rail->Name, nodeId); -+ -+ /* transition towards disconnected */ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ EPRINTF6 (DBG_MANAGER, "%s: Node %d - %s NetworkErrorState=%x NextRunTime=%lx (%lx)\n", -+ rail->Name, nodeId, NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState, -+ nodeRail->NextRunTime, nextRunTime); -+ -+ if (nodeRail->NetworkErrorState) -+ { -+ ProgressNetworkError (rail, nodeRail); -+ } -+ else if (! BEFORE (lbolt, nodeRail->NextRunTime)) -+ { -+ if (EP_XID_INVALID (nodeRail->MsgXid)) -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ if (ep_send_message (rail, nodeId, EP_MANAGER_MSG_TYPE_FLUSH_REQUEST, nodeRail->MsgXid, NULL) == 0) -+ nodeRail->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ -+ if (! list_empty (&rail->PassivatedList)) -+ { -+ ep_call_callbacks (rail, EP_CB_FAILOVER, rail->NodeSet); -+ -+ list_for_each_safe (el, nel, &rail->PassivatedList) { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->PassivatedList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ ASSERT (nodeRail->NetworkErrorState == 0); -+ -+ if (node->ConnectedRails == 0) -+ { -+ /* Remove from this list */ -+ list_del (&nodeRail->Link); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no rails, Passivated -> Disconnecting\n", rail->Name, nodeId); -+ -+ /* transition towards disconnected */ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ EPRINTF6 (DBG_MANAGER, "%s: Node %d - %s NetworkErrorState=%x NextRunTime=%lx (%lx)\n", -+ rail->Name, nodeId, NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState, -+ nodeRail->NextRunTime, nextRunTime); -+ -+ if (nodeRail->MessageState == 0) -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no messages, Passivated -> Disconnecting\n", rail->Name,nodeId); -+ -+ list_del (&nodeRail->Link); -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ nodeRail->MessageState = 0; -+ nodeRail->NextRunTime = lbolt + FAILOVER_RETRY_TIME; -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ } -+ -+ if (! list_empty (&rail->DisconnectingList)) -+ { -+ ep_call_callbacks (rail, EP_CB_DISCONNECTING, rail->NodeSet); -+ -+ while (! list_empty (&rail->DisconnectingList)) -+ { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->DisconnectingList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, Disconnecting -> Disconnected\n", rail->Name, nodeId); -+ -+ list_del (&nodeRail->Link); -+ -+ rail->Operations.NodeDisconnected (rail, nodeId); -+ -+ /* Clear the network error state */ -+ nodeRail->NextRunTime = 0; -+ nodeRail->NetworkErrorState = 0; -+ nodeRail->NetworkErrorCookies[0] = 0; -+ nodeRail->NetworkErrorCookies[1] = 0; -+ -+ /* Clear the message state */ -+ nodeRail->MessageState = 0; -+ -+ cm_node_disconnected (rail, nodeId); -+ -+ nodeRail->State = EP_NODE_DISCONNECTED; -+ -+ statemap_setbits (rail->NodeSet, nodeId, 0, 1); -+ -+ if (node->ConnectedRails == 0) -+ statemap_setbits (sys->NodeSet, nodeId, 0, 1); -+ } -+ -+ ep_call_callbacks (rail, EP_CB_DISCONNECTED, rail->NodeSet); -+ } -+ -+ return (nextRunTime); -+} -+ -+void -+DisplayNodes (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ int i, state, count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ for (state = 0; state < EP_NODE_NUM_STATES; state++) -+ { -+ for (count = i = 0; i < rail->Position.pos_nodes; i++) -+ { -+ ASSERT (rail->Nodes[i].State < EP_NODE_NUM_STATES); -+ -+ if (rail->Nodes[i].State == state) -+ if (state != EP_NODE_DISCONNECTED) -+ printk ("%s %d", !count++ ? NodeStateNames[state] : "", i); -+ } -+ if (count) -+ printk ("%s (%d total)\n", state == EP_NODE_DISCONNECTED ? NodeStateNames[state] : "", count); -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+static void -+PositionFound (EP_RAIL *rail, ELAN_POSITION *pos) -+{ -+ EP_SYS *sys = rail->System; -+ struct list_head *el; -+ int i; -+ -+ /* only called from the ep_managage whilst rail->State == EP_RAIL_STATE_STARTED */ -+ ASSERT ( rail->State == EP_RAIL_STATE_STARTED ); -+ -+#if defined(PER_CPU_TIMEOUT) -+ /* -+ * On Tru64 - if we're running in a "funnelled" thread, then we will be -+ * unable to start the per-cpu timeouts, so if we return then eventually -+ * the ep_manager() thread will find the network position and we're -+ * in control of our own destiny. -+ */ -+ if (THREAD_IS_FUNNELED(current_thread())) -+ { -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+ return; -+ } -+#endif -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, pos->pos_nodeid); -+ -+ if (pos->pos_levels > MaxSwitchLevels) -+ { -+ for (i = 0; i < (pos->pos_levels - MaxSwitchLevels); i++) -+ pos->pos_nodes /= pos->pos_arity[i]; -+ -+ for (i = 0; i < MaxSwitchLevels; i++) -+ pos->pos_arity[i] = pos->pos_arity[i + (pos->pos_levels - MaxSwitchLevels)]; -+ -+ pos->pos_levels = MaxSwitchLevels; -+ pos->pos_nodeid = pos->pos_nodeid % pos->pos_nodes; -+ -+ printk ("%s: limiting switch levels to %d\n", rail->Name, MaxSwitchLevels); -+ printk ("%s: nodeid=%d level=%d numnodes=%d\n", rail->Name, pos->pos_nodeid, pos->pos_levels, pos->pos_nodes); -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, pos->pos_nodeid); -+ } -+ -+ if (rail->Position.pos_mode != ELAN_POS_UNKNOWN && rail->Position.pos_nodeid != pos->pos_nodeid) -+ { -+ printk ("%s: NodeId has changed from %d to %d\n", rail->Name, rail->Position.pos_nodeid, pos->pos_nodeid); -+ panic ("ep: PositionFound: NodeId has changed\n"); -+ } -+ -+ if (sys->Position.pos_mode != ELAN_POS_UNKNOWN && (sys->Position.pos_nodeid != pos->pos_nodeid || sys->Position.pos_nodes != pos->pos_nodes)) -+ { -+ printk ("%s: position incompatible - disabling rail\n", rail->Name); -+ rail->State = EP_RAIL_STATE_INCOMPATIBLE; -+ return; -+ } -+ -+ if (sys->Position.pos_mode == ELAN_POS_UNKNOWN) -+ { -+ sys->Position = *pos; -+ sys->NodeSet = statemap_create (pos->pos_nodes); -+ KMEM_ZALLOC (sys->Nodes, EP_NODE *, pos->pos_nodes * sizeof (EP_NODE), 1); -+ } -+ -+ rail->Position = *pos; -+ rail->SwitchBroadcastLevel = pos->pos_levels - 1; -+ rail->State = EP_RAIL_STATE_RUNNING; -+ -+ for (i = 0; i < pos->pos_levels; i++) -+ { -+ rail->SwitchProbeTick[i] = lbolt; -+ rail->SwitchLast[i].uplink = 4; -+ } -+ -+ rail->Operations.PositionFound (rail, pos); -+ -+ INIT_LIST_HEAD (&rail->NetworkErrorList); -+ INIT_LIST_HEAD (&rail->LocalPassivateList); -+ INIT_LIST_HEAD (&rail->RemotePassivateList); -+ INIT_LIST_HEAD (&rail->PassivatedList); -+ INIT_LIST_HEAD (&rail->DisconnectingList); -+ -+ rail->NodeSet = statemap_create (rail->Position.pos_nodes); -+ rail->NodeChangeMap = statemap_create (rail->Position.pos_nodes); -+ rail->NodeChangeTmp = statemap_create (rail->Position.pos_nodes); -+ -+ KMEM_ZALLOC (rail->Nodes, EP_NODE_RAIL *, rail->Position.pos_nodes * sizeof (EP_NODE_RAIL), 1); -+ -+ for (i = 0; i < rail->Position.pos_nodes; i++) -+ { -+ spin_lock_init (&rail->Nodes[i].CookieLock); -+ -+ INIT_LIST_HEAD (&rail->Nodes[i].StalledDmas); -+ -+ rail->Nodes[i].State = EP_NODE_DISCONNECTED; -+ } -+ -+ /* Notify all subsystems that a new rail has been enabled */ -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (subsys->AddRail) -+ subsys->AddRail (subsys, sys, rail); -+ -+ /* XXXX: what to do if the subsystem refused to add the rail ? */ -+ } -+ kmutex_unlock (&sys->SubsysLock); -+ -+ /* Now enable the manager input queue */ -+ ep_enable_inputq (rail, rail->ManagerInputQ); -+} -+ -+static void -+ep_manager (void *arg) -+{ -+ EP_SYS *sys = (EP_SYS *) arg; -+ struct list_head *el; -+ ELAN_POSITION pos; -+ unsigned long flags; -+ -+ kernel_thread_init ("ep_manager"); -+ kernel_thread_become_highpri(); -+ -+ for (;;) -+ { -+ long nextRunTime = lbolt + MSEC2TICKS(CM_THREAD_SCHEDULE_TIMEOUT); -+ -+ list_for_each (el, &sys->ManagedRails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL, ManagerLink); -+ -+ switch (rail->State) -+ { -+ case EP_RAIL_STATE_STARTED: -+ if (ProbeNetwork (rail, &pos) == 0) -+ { -+ PositionFound (rail, &pos); -+ break; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + HZ)) -+ nextRunTime = lbolt + HZ; -+ break; -+ -+ case EP_RAIL_STATE_RUNNING: -+ if (ep_poll_inputq (rail, rail->ManagerInputQ, 100, ProcessMessage, rail) >= 100) -+ nextRunTime = lbolt; -+ -+ /* Handle any nodes which the cluster membership subsystem -+ * has indicated are to begin connecting or disconnecting */ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (! statemap_changed (rail->NodeChangeMap)) -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ else -+ { -+ /* -+ * Take a copy of the statemap, and zero all entries so -+ * we only see new requests next time -+ */ -+ statemap_copy (rail->NodeChangeTmp, rail->NodeChangeMap); -+ statemap_zero (rail->NodeChangeMap); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ UpdateConnectionState (rail, rail->NodeChangeTmp); -+ } -+ -+ nextRunTime = ProgressNodeLists (rail, nextRunTime); -+ -+ if (statemap_changed (rail->NodeSet)) -+ { -+ ep_call_callbacks (rail, EP_CB_NODESET, rail->NodeSet); -+ -+ statemap_clearchanges (rail->NodeSet); -+ } -+ break; -+ -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ break; -+ } -+ } -+ -+ -+ EPRINTF5 (DBG_MANAGER, "ep_manager: sleep now=%lx nextRunTime=%lx (%ld) [%lx (%ld)]\n", -+ lbolt, nextRunTime, nextRunTime ? nextRunTime - lbolt : 0, sys->ManagerThread.next_run, -+ sys->ManagerThread.next_run ? sys->ManagerThread.next_run - lbolt : 0); -+ -+ if (ep_kthread_sleep (&sys->ManagerThread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_kthread_stopped (&sys->ManagerThread); -+ kernel_thread_exit(); -+} -+ -+void -+ep_connect_node (EP_RAIL *rail, int nodeId) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *node = &rail->Nodes[nodeId]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: ep_connect_node: nodeId %d\n", rail->Name, nodeId); -+ -+ ASSERT (node->State == EP_NODE_DISCONNECTED && statemap_getbits (rail->NodeChangeMap, nodeId, 1) == 0); -+ -+ node->State = EP_NODE_CONNECTING; -+ -+ statemap_setbits (rail->NodeChangeMap, nodeId, 1, 1); -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+} -+ -+int -+ep_disconnect_node (EP_RAIL *rail, int nodeId) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *node = &rail->Nodes[nodeId]; -+ int state; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ EPRINTF3 (DBG_MANAGER, "%s: ep_disconnect_node: nodeId %d - %s\n", rail->Name, nodeId, NodeStateNames[node->State]); -+ -+ switch (state = node->State) -+ { -+ case EP_NODE_CONNECTING: -+ statemap_setbits (rail->NodeChangeMap, nodeId, 0, 1); -+ -+ node->State = EP_NODE_DISCONNECTED; -+ break; -+ -+ case EP_NODE_CONNECTED: -+ statemap_setbits (rail->NodeChangeMap, nodeId, 1, 1); -+ -+ node->State = EP_NODE_LEAVING_CONNECTED; -+ break; -+ -+ case EP_NODE_LEAVING_CONNECTED: -+ /* no assert on NodeChangeMap as the map could have been taken but not acted on */ -+ break; -+ -+ default: -+ ASSERT (statemap_getbits (rail->NodeChangeMap, nodeId, 1) == 0); -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (state == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+ -+ return state; -+} -+ -+int -+ep_manager_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ if ((rail->ManagerOutputQ = ep_alloc_outputq (rail, EP_MANAGER_MSG_SIZE, EP_MANAGER_OUTPUTQ_SLOTS)) == NULL) -+ return -ENOMEM; -+ -+ if ((rail->ManagerInputQ = ep_alloc_inputq (rail, EP_SYSTEMQ_MANAGER, EP_MANAGER_MSG_SIZE, EP_MANAGER_INPUTQ_SLOTS, -+ ManagerQueueEvent, &sys->ManagerThread)) == NULL) -+ { -+ ep_free_outputq (rail, rail->ManagerOutputQ); -+ return -ENOMEM; -+ } -+ -+ spin_lock_init (&rail->ManagerOutputQLock); -+ -+ ep_xid_cache_init (sys, &rail->XidCache); -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ list_add_tail (&rail->ManagerLink, &sys->ManagedRails); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ return (0); -+} -+ -+void -+ep_manager_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ if (rail->ManagerInputQ != NULL) -+ { -+ ep_kthread_stall (&sys->ManagerThread); -+ list_del (&rail->ManagerLink); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ ep_xid_cache_destroy (sys, &rail->XidCache); -+ -+ spin_lock_destroy (&rail->ManagerOutputQLock); -+ -+ ep_disable_inputq (rail, rail->ManagerInputQ); -+ ep_free_inputq (rail, rail->ManagerInputQ); -+ ep_free_outputq (rail, rail->ManagerOutputQ); -+ } -+} -+ -+int -+ep_manager_init (EP_SYS *sys) -+{ -+ INIT_LIST_HEAD (&sys->ManagedRails); -+ -+ ep_kthread_init (&sys->ManagerThread); -+ -+ if (kernel_thread_create (ep_manager, (void *) sys) == 0) -+ return (ENOMEM); -+ -+ ep_kthread_started (&sys->ManagerThread); -+ -+ return (0); -+} -+ -+void -+ep_manager_fini (EP_SYS *sys) -+{ -+ ep_kthread_stop (&sys->ManagerThread); -+ ep_kthread_destroy (&sys->ManagerThread); -+} -+ -+int -+ep_sys_init (EP_SYS *sys) -+{ -+ kmutex_init (&sys->SubsysLock); -+ kmutex_init (&sys->StartStopLock); -+ spin_lock_init (&sys->NodeLock); -+ -+ INIT_LIST_HEAD (&sys->Subsystems); -+ -+ /* initialise the xid allocators */ -+ spin_lock_init (&sys->XidLock); -+ INIT_LIST_HEAD (&sys->XidCacheList); -+ -+ /* initially don't know where we are in the network */ -+ sys->Position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* initialise the network mapping descriptor hash tables */ -+ ep_nmh_init (&sys->MappingTable); -+ -+ /* intialise the shared allocators */ -+ ep_shared_alloc_init (sys); -+ -+ /* initialise the dvma space */ -+ ep_dvma_init (sys); -+ -+ /* intiialise the rail manager */ -+ ep_manager_init (sys); -+ -+ /* initialise all subsystems */ -+ cm_init (sys); -+ ep_comms_init (sys); -+ //ep_msgsys_init (sys); -+ -+ return (0); -+} -+ -+void -+ep_sys_fini (EP_SYS *sys) -+{ -+ /* Destroy the subsystems in the reverse order to their creation */ -+ while (! list_empty (&sys->Subsystems)) -+ { -+ EP_SUBSYS *subsys = list_entry (sys->Subsystems.prev, EP_SUBSYS, Link); -+ -+ list_del (&subsys->Link); -+ -+ subsys->Destroy (subsys, sys); -+ } -+ -+ ep_manager_fini(sys); -+ ep_dvma_fini (sys); -+ ep_shared_alloc_fini (sys); -+ -+ ep_nmh_fini (&sys->MappingTable); -+ -+ if (sys->Position.pos_mode != ELAN_POS_UNKNOWN) { -+ statemap_destroy (sys->NodeSet); -+ KMEM_FREE(sys->Nodes, sys->Position.pos_nodes * sizeof (EP_NODE)); -+ } -+ -+ spin_lock_destroy (&sys->XidLock); -+ -+ spin_lock_destroy (&sys->NodeLock); -+ kmutex_destroy (&sys->SubsysLock); -+ kmutex_destroy (&sys->StartStopLock); -+} -+ -+void -+ep_shutdown (EP_SYS *sys) -+{ -+ sys->Shutdown = 1; -+} -+ -+int -+ep_init_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ static int rnum; -+ -+ rail->System = sys; -+ rail->State = EP_RAIL_STATE_UNINITIALISED; -+ rail->Number = rnum++; -+ rail->Position.pos_mode = ELAN_POS_UNKNOWN; -+ rail->Position.pos_nodeid = ELAN_INVALID_NODE; -+ -+ rail->CallbackRegistered = 0; -+ -+ sprintf (rail->Name, "ep%d", rail->Number); -+ -+ /* Initialise externally visible locks */ -+ kmutex_init (&rail->CallbackLock); -+ -+ ep_alloc_init (rail); -+ -+ sys->Rails[rail->Number] = rail; -+ -+ return 0; -+} -+ -+void -+ep_destroy_rail (EP_RAIL *rail) -+{ -+ ASSERT (rail->State == EP_RAIL_STATE_UNINITIALISED); -+ -+ ep_alloc_fini (rail); -+ -+ kmutex_destroy (&rail->CallbackLock); -+ -+ rail->System->Rails[rail->Number] = NULL; -+ -+ rail->Operations.DestroyRail (rail); -+} -+ -+/* We need to traverse the Subsystems lists backwards -+ * but it's not defined in */ -+#define list_for_each_backwards(pos,list) \ -+ for (pos = (list)->prev; pos != (list); \ -+ pos = (pos)->prev) -+ -+void -+__ep_stop_rail (EP_RAIL *rail) -+{ -+ /* called holding the sys->Lock */ -+ EP_SYS *sys = rail->System; -+ struct list_head *el; -+ -+ rail->Operations.StallRail (rail); -+ -+ /* Notify all subsystems that this rail is being stopped */ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each_backwards (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (subsys->RemoveRail) -+ subsys->RemoveRail (subsys, sys, rail); -+ } -+ kmutex_unlock (&sys->SubsysLock); -+ -+ ep_manager_remove_rail (sys, rail); -+ -+ KMEM_FREE (rail->Nodes, rail->Position.pos_nodes * sizeof (EP_NODE_RAIL)); -+ -+ statemap_destroy (rail->NodeChangeTmp); -+ statemap_destroy (rail->NodeChangeMap); -+ statemap_destroy (rail->NodeSet); -+ } -+ -+ ep_dvma_remove_rail (sys, rail); -+ ep_shared_alloc_remove_rail (sys, rail); -+ -+ rail->Operations.StopRail (rail); -+ -+ rail->State = EP_RAIL_STATE_UNINITIALISED; -+} -+ -+void -+ep_stop_rail (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ -+ /* stall ep_manager */ -+ /* and remove the rail from the manaager */ -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ if ( rail->State == EP_RAIL_STATE_STARTED ) -+ ep_manager_remove_rail (sys, rail); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ __ep_stop_rail (rail); -+} -+ -+int -+ep_start_rail (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ -+ ASSERT (rail->State == EP_RAIL_STATE_UNINITIALISED); -+ -+ if (rail->Operations.StartRail (rail) < 0) -+ return -ENXIO; -+ -+ kmutex_lock (&sys->StartStopLock); -+ /* Add this rail to the shared allocator */ -+ if (ep_shared_alloc_add_rail (rail->System, rail)) -+ goto failed; -+ -+ /* Add this rail to dvma kmap */ -+ if (ep_dvma_add_rail (rail->System, rail)) -+ goto failed; -+ -+ /* rail is now started */ -+ rail->State = EP_RAIL_STATE_STARTED; -+ -+ /* notify the rail manager of the new rail */ -+ if (ep_manager_add_rail (rail->System, rail)) -+ goto failed; -+ -+ kmutex_unlock (&sys->StartStopLock); -+ return (ESUCCESS); -+ -+ failed: -+ printk ("%s: start failed\n", rail->Name); -+ kmutex_unlock (&sys->StartStopLock); -+ __ep_stop_rail (rail); -+ -+ return (ENOMEM); -+} -+ -+void -+ep_subsys_add (EP_SYS *sys, EP_SUBSYS *subsys) -+{ -+ kmutex_lock (&sys->SubsysLock); -+ list_add_tail (&subsys->Link, &sys->Subsystems); -+ kmutex_unlock (&sys->SubsysLock); -+} -+ -+void -+ep_subsys_del (EP_SYS *sys, EP_SUBSYS *subsys) -+{ -+ kmutex_lock (&sys->SubsysLock); -+ list_del (&subsys->Link); -+ kmutex_unlock (&sys->SubsysLock); -+} -+ -+EP_SUBSYS * -+ep_subsys_find (EP_SYS *sys, char *name) -+{ -+ struct list_head *el; -+ -+ ASSERT ( !in_interrupt()); -+ -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (! strcmp (subsys->Name, name)) -+ { -+ kmutex_unlock (&sys->SubsysLock); -+ return (subsys); -+ } -+ } -+ -+ kmutex_unlock (&sys->SubsysLock); -+ return (NULL); -+} -+ -+int -+ep_waitfor_nodeid (EP_SYS *sys) -+{ -+ int i, printed = 0; -+ kcondvar_t Sleep; -+ spinlock_t Lock; -+ -+ kcondvar_init (&Sleep); -+ spin_lock_init (&Lock); -+ -+#define TICKS_TO_WAIT (10*hz) -+#define TICKS_PER_LOOP (hz/10) -+ for (i = 0; sys->Position.pos_mode == ELAN_POS_UNKNOWN && i < TICKS_TO_WAIT; i += TICKS_PER_LOOP) -+ { -+ if (! printed++) -+ printk ("ep: waiting for network position to be found\n"); -+ -+ spin_lock (&Lock); -+ kcondvar_timedwait (&Sleep, &Lock, NULL, lbolt + TICKS_PER_LOOP); -+ spin_unlock (&Lock); -+ } -+ -+ if (sys->Position.pos_mode == ELAN_POS_UNKNOWN) -+ printk ("ep: network position not found after waiting\n"); -+ else if (printed) -+ printk ("ep: network position found at nodeid %d\n", sys->Position.pos_nodeid); -+ -+ spin_lock_destroy (&Lock); -+ kcondvar_destroy (&Sleep); -+ -+ return (sys->Position.pos_mode == ELAN_POS_UNKNOWN ? ELAN_INVALID_NODE : sys->Position.pos_nodeid); -+} -+ -+int -+ep_nodeid (EP_SYS *sys) -+{ -+ return (sys->Position.pos_mode == ELAN_POS_UNKNOWN ? ELAN_INVALID_NODE : sys->Position.pos_nodeid); -+} -+ -+int -+ep_numnodes (EP_SYS *sys) -+{ -+ return (sys->Position.pos_nodes); -+} -+ -+void -+ep_fillout_stats(EP_RAIL *r, char *str) -+{ -+ sprintf(str+strlen(str),"SendMessageFailed %lu NeterrAtomicPacket %lu NeterrDmaPacket %lu \n", r->Stats.SendMessageFailed, r->Stats.NeterrAtomicPacket, r->Stats.NeterrDmaPacket); -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(r->Stats,rx), GET_STAT_PER_SEC(r->Stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu MB/sec\n", GET_STAT_TOTAL(r->Stats,rx_len)/ (1024*1024), GET_STAT_PER_SEC(r->Stats,rx_len) / (1024*1024)); -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(r->Stats,tx), GET_STAT_PER_SEC(r->Stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu MB/sec\n", GET_STAT_TOTAL(r->Stats,tx_len)/ (1024*1024), GET_STAT_PER_SEC(r->Stats,tx_len) / (1024*1024)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kcomm_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan3.c 2005-06-01 23:12:54.665429072 -0400 -@@ -0,0 +1,504 @@ -+ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm_elan3.c,v 1.31.8.3 2004/11/30 12:02:17 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "conf_linux.h" -+ -+extern EP_CODE threadcode_elan3; -+ -+unsigned int -+ep3_create_rails (EP_SYS *sys, unsigned int disabled) -+{ -+ unsigned int rmask = 0; -+ ELAN3_DEV *dev; -+ EP_RAIL *rail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if ((dev = elan3_device (i)) != NULL) -+ { -+ if ((rail = ep3_create_rail (sys, dev)) != NULL) -+ { -+ if (disabled & (1 << rail->Number)) -+ printk ("%s: auto-start of device disabled by configuration\n", rail->Name); -+ else -+ ep_start_rail (rail); -+ -+ ep_procfs_rail_init(rail); -+ -+ rmask |= (1 << rail->Number); -+ } -+ } -+ } -+ -+ return rmask; -+} -+ -+EP_RAIL * -+ep3_create_rail (EP_SYS *sys, ELAN3_DEV *dev) -+{ -+ EP3_RAIL *rail; -+ int res; -+ -+ KMEM_ZALLOC (rail, EP3_RAIL *, sizeof (EP3_RAIL), TRUE); -+ -+ if (rail == NULL) -+ return (EP_RAIL *) NULL; -+ -+ if ((res = ep_init_rail (sys, &rail->Generic)) != 0) -+ { -+ KMEM_FREE (rail, sizeof (EP3_RAIL)); -+ return (EP_RAIL *) NULL; -+ } -+ -+ rail->Device = dev; -+ -+ /* Install our rail operations */ -+ rail->Generic.Operations.DestroyRail = ep3_destroy_rail; -+ rail->Generic.Operations.StartRail = ep3_start_rail; -+ rail->Generic.Operations.StallRail = ep3_stall_rail; -+ rail->Generic.Operations.StopRail = ep3_stop_rail; -+ -+ rail->Generic.Operations.SdramAlloc = ep3_sdram_alloc; -+ rail->Generic.Operations.SdramFree = ep3_sdram_free; -+ rail->Generic.Operations.SdramWriteb = ep3_sdram_writeb; -+ -+ rail->Generic.Operations.KaddrMap = ep3_kaddr_map; -+ rail->Generic.Operations.SdramMap = ep3_sdram_map; -+ rail->Generic.Operations.Unmap = ep3_unmap; -+ -+ rail->Generic.Operations.DvmaReserve = ep3_dvma_reserve; -+ rail->Generic.Operations.DvmaRelease = ep3_dvma_release; -+ rail->Generic.Operations.DvmaSetPte = ep3_dvma_set_pte; -+ rail->Generic.Operations.DvmaReadPte = ep3_dvma_read_pte; -+ rail->Generic.Operations.DvmaUnload = ep3_dvma_unload; -+ rail->Generic.Operations.FlushTlb = ep3_flush_tlb; -+ -+ rail->Generic.Operations.ProbeRoute = ep3_probe_route; -+ rail->Generic.Operations.PositionFound = ep3_position_found; -+ rail->Generic.Operations.CheckPosition = ep3_check_position; -+ rail->Generic.Operations.NeterrFixup = ep3_neterr_fixup; -+ -+ rail->Generic.Operations.LoadSystemRoute = ep3_load_system_route; -+ -+ rail->Generic.Operations.LoadNodeRoute = ep3_load_node_route; -+ rail->Generic.Operations.UnloadNodeRoute = ep3_unload_node_route; -+ rail->Generic.Operations.LowerFilter = ep3_lower_filter; -+ rail->Generic.Operations.RaiseFilter = ep3_raise_filter; -+ rail->Generic.Operations.NodeDisconnected = ep3_node_disconnected; -+ -+ rail->Generic.Operations.FlushFilters = ep3_flush_filters; -+ rail->Generic.Operations.FlushQueues = ep3_flush_queues; -+ -+ rail->Generic.Operations.AllocInputQ = ep3_alloc_inputq; -+ rail->Generic.Operations.FreeInputQ = ep3_free_inputq; -+ rail->Generic.Operations.EnableInputQ = ep3_enable_inputq; -+ rail->Generic.Operations.DisableInputQ = ep3_disable_inputq; -+ rail->Generic.Operations.PollInputQ = ep3_poll_inputq; -+ -+ rail->Generic.Operations.AllocOutputQ = ep3_alloc_outputq; -+ rail->Generic.Operations.FreeOutputQ = ep3_free_outputq; -+ rail->Generic.Operations.OutputQMsg = ep3_outputq_msg; -+ rail->Generic.Operations.OutputQState = ep3_outputq_state; -+ rail->Generic.Operations.OutputQSend = ep3_outputq_send; -+ -+ rail->Generic.Operations.FillOutStats = ep3_fillout_stats; -+ -+ rail->Generic.Devinfo = dev->Devinfo; -+ -+ printk ("%s: connected via elan3 rev%c device %d\n", rail->Generic.Name, -+ 'a' + dev->Devinfo.dev_revision_id, dev->Instance); -+ -+ return (EP_RAIL *) rail; -+} -+ -+void -+ep3_destroy_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ KMEM_FREE (rail, sizeof (EP3_RAIL)); -+} -+ -+static int -+ep3_attach_rail (EP3_RAIL *rail) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ ELAN3_CTXT *ctxt; -+ ELAN_CAPABILITY *cap; -+ int ctx; -+ unsigned long flags; -+ -+ if ((ctxt = elan3_alloc (dev, TRUE)) == (ELAN3_CTXT *) NULL) -+ { -+ printk ("%s: cannot allocate elan context\n", rail->Generic.Name); -+ return -ENXIO; -+ } -+ -+ ctxt->Operations = &ep3_elan3_ops; -+ ctxt->Private = (void *) rail; -+ -+ /* Initialise a capability and attach to the elan*/ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), TRUE); -+ -+ elan_nullcap (cap); -+ -+ cap->cap_type = ELAN_CAP_TYPE_KERNEL; -+ cap->cap_version = ELAN_CAP_VERSION_NUMBER; -+ cap->cap_mycontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_lowcontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_highcontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_railmask = 1 << dev->Devinfo.dev_rail; -+ -+ /* Ensure the context filter is raised while we initialise */ -+ elan3_block_inputter (ctxt, TRUE); -+ -+ if (elan3_doattach (ctxt, cap) != 0) -+ { -+ printk ("%s: cannot attach to kernel context\n", rail->Generic.Name); -+ -+ KMEM_FREE (cap, sizeof (ELAN_CAPABILITY)); -+ elan3_free (ctxt); -+ return -ENXIO; -+ } -+ KMEM_FREE (cap, sizeof (ELAN_CAPABILITY)); -+ -+ /* now attach to all the kernel comms input/dmaring/data contexts */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ { -+ /* place it in the info table. NOTE: don't call elan3mmu_set_info, as this */ -+ /* will queue the info again on the devices info list */ -+ dev->CtxtTable[ctx] = ctxt; -+ -+ elan3mmu_set_context_filter (dev, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ elan3mmu_attach (dev, ctx, ctxt->Elan3mmu, ctxt->RouteTable->Table, ctxt->RouteTable->Size-1); -+ } -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ { -+ /* place it in the info table. NOTE: don't call elan3mmu_set_info, as this */ -+ /* will queue the info again on the devices info list */ -+ dev->CtxtTable[ctx] = ctxt; -+ -+ elan3mmu_set_context_filter (dev, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ elan3mmu_attach (dev, ctx, ctxt->Elan3mmu, ctxt->RouteTable->Table, ctxt->RouteTable->Size-1); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ /* Stash the ctxt,commandport, mmu and route table */ -+ rail->Ctxt = ctxt; -+ rail->CommandPort = ctxt->CommandPort; -+ rail->Elan3mmu = ctxt->Elan3mmu; -+ rail->RouteTable = ctxt->RouteTable; -+ -+ return 0; -+} -+ -+static void -+ep3_detach_rail (EP3_RAIL *rail) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ int ctx; -+ -+ /* detach from the elan */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ { -+ dev->CtxtTable[ctx] = NULL; -+ elan3mmu_detach (dev, ctx); -+ } -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ { -+ dev->CtxtTable[ctx] = NULL; -+ elan3mmu_detach (dev, ctx); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ elan3_dodetach(rail->Ctxt); -+ elan3_free (rail->Ctxt); -+ -+ rail->Ctxt = NULL; -+ rail->CommandPort = 0; -+ rail->Elan3mmu = NULL; -+ rail->RouteTable = NULL; -+} -+ -+int -+ep3_start_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ int i, res; -+ unsigned long flags; -+ -+ if ((res = ep3_attach_rail (rail)) != 0) -+ return res; -+ -+ spin_lock_init (&rail->CookieLock); -+ kmutex_init (&rail->HaltOpMutex); -+ kcondvar_init (&rail->HaltOpSleep); -+ -+ /* Initialise event interrupt cookie table */ -+ InitialiseCookieTable (&rail->CookieTable); -+ -+ /* Load and map the thread code */ -+ rail->ThreadCode = threadcode_elan3; -+ if (ep_loadcode (&rail->Generic, &rail->ThreadCode) != ESUCCESS) -+ goto failed; -+ -+ /* Map the command port to be visible to the Elan */ -+ ep3_ioaddr_map (&rail->Generic, EP3_COMMANDPORT_ADDR, rail->Ctxt->CommandPage, PAGESIZE, EP_PERM_WRITE); -+ rail->CommandPortAddr = EP3_COMMANDPORT_ADDR + (rail->Ctxt->CommandPort - rail->Ctxt->CommandPage); -+ -+ /* Allocate the elan visible sdram/main memory */ -+ if ((rail->RailElan = ep_alloc_elan (&rail->Generic, sizeof (EP3_RAIL_ELAN), 0, &rail->RailElanAddr)) == 0 || -+ (rail->RailMain = ep_alloc_main (&rail->Generic, sizeof (EP3_RAIL_MAIN), 0, &rail->RailMainAddr)) == 0) -+ { -+ goto failed; -+ } -+ -+ /* Allocate the system input queues at their fixed elan address */ -+ if (! (rail->QueueDescs = ep_alloc_memory_elan (&rail->Generic, EP_SYSTEM_QUEUE_BASE, PAGESIZE, EP_PERM_ALL, 0))) -+ goto failed; -+ -+ /* Initialise all queue entries to be full */ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan3_sdram_writel (rail->Device, EP_SYSTEMQ_DESC(rail->QueueDescs, i) + offsetof (EP3_InputQueue, q_state), E3_QUEUE_FULL); -+ -+ /* initialise the dma rings */ -+ if (DmaRingsCreate (rail)) -+ goto failed; -+ -+ if (InitialiseDmaRetries (rail)) -+ goto failed; -+ -+ if (ep3_init_probenetwork (rail)) -+ goto failed; -+ -+ /* can now drop the context filter for the system context */ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, FALSE, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+ -+ return 0; -+ -+ failed: -+ printk ("ep3_start_rail: failed for rail %d\n", rail->Generic.Number); -+ ep3_stop_rail (&rail->Generic); -+ -+ return -ENOMEM; -+} -+ -+void -+ep3_stall_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ int ctx; -+ unsigned long flags; -+ -+ /* raise all the context filters */ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan3mmu_set_context_filter (rail->Device, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ elan3mmu_set_context_filter (rail->Device, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ elan3mmu_set_context_filter (rail->Device, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_stop_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ep3_destroy_probenetwork (rail); -+ -+ if (rail->DmaRetryInitialised) -+ DestroyDmaRetries (rail); -+ -+ DmaRingsRelease(rail); -+ -+ if (rail->Generic.State == EP_RAIL_STATE_RUNNING) -+ { -+ KMEM_FREE (rail->MainCookies, rail->Generic.Position.pos_nodes * sizeof (E3_uint32)); -+ -+ ep_free_elan (&rail->Generic, rail->ElanCookies, rail->Generic.Position.pos_nodes * sizeof (E3_uint32)); -+ } -+ -+ if (rail->QueueDescs) -+ ep_free_memory_elan (&rail->Generic, EP_SYSTEM_QUEUE_BASE); -+ rail->QueueDescs = 0; -+ -+ if (rail->RailMain) -+ ep_free_main (&rail->Generic, rail->RailMainAddr, sizeof (EP3_RAIL_MAIN)); -+ rail->RailMain = 0; -+ -+ if (rail->RailElan) -+ ep_free_elan (&rail->Generic, rail->RailElanAddr, sizeof (EP3_RAIL_ELAN)); -+ rail->RailElan = 0; -+ -+ ep_unloadcode (&rail->Generic, &rail->ThreadCode); -+ -+ DestroyCookieTable (&rail->CookieTable); -+ -+ ep_perrail_unmap (&rail->Generic, rail->Ctxt->CommandPage, PAGESIZE); -+ -+ kcondvar_destroy (&rail->HaltOpSleep); -+ kmutex_destroy (&rail->HaltOpMutex); -+ spin_lock_destroy (&rail->CookieLock); -+ -+ ep3_detach_rail (rail); -+} -+ -+void -+ep3_position_found (EP_RAIL *r, ELAN_POSITION *pos) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t addr; -+ -+ rail->SwitchBroadcastLevelTick = lbolt; -+ -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, NodeId), pos->pos_nodeid); -+ -+ /* Allocate Network Identify cookie state */ -+ KMEM_ZALLOC (rail->MainCookies, E3_uint32 *, pos->pos_nodes * sizeof (E3_uint32), 1); -+ -+ if (! (addr = ep_alloc_elan (&rail->Generic, pos->pos_nodes * sizeof (E3_uint32), 0, &rail->ElanCookies))) -+ panic ("ep: PositionFound: cannot allocate elan cookies array\n"); -+ -+ elan3_sdram_zeroq_sdram (rail->Device, addr, pos->pos_nodes * sizeof (E3_uint32)); -+ -+ ep3_probe_position_found (rail, pos); -+} -+ -+sdramaddr_t -+ep3_sdram_alloc (EP_RAIL *r, EP_ADDR addr, unsigned size) -+{ -+ return elan3_sdram_alloc (((EP3_RAIL *) r)->Device, size); -+} -+ -+void -+ep3_sdram_free (EP_RAIL *r, sdramaddr_t addr, unsigned size) -+{ -+ elan3_sdram_free (((EP3_RAIL *) r)->Device, addr, size); -+} -+ -+void -+ep3_sdram_writeb (EP_RAIL *r, sdramaddr_t addr, unsigned char val) -+{ -+ elan3_sdram_writeb (((EP3_RAIL *) r)->Device, addr, val); -+} -+ -+void -+ep3_flush_tlb (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ -+ IncrStat (dev, TlbFlushes); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | MMU_FLUSH); -+ mmiob (); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ while (! (read_reg32 (dev, Cache_Control_Reg.ContReg) & MMU_FLUSHED)) -+ mb(); -+} -+ -+void -+ep3_load_system_route (EP_RAIL *r, unsigned vp, unsigned lowNode, unsigned highNode) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ -+ nflits = GenerateRoute (&rail->Generic.Position, flits, lowNode, highNode, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, vp, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ { -+ /* XXXX: whilst LoadRoute() can fail - it is not likely. */ -+ panic ("ep3_load_system_route: cannot load p2p route entry\n"); -+ } -+} -+ -+void -+ep3_load_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ -+ nflits = GenerateRoute (&rail->Generic.Position, flits, nodeId, nodeId, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, EP_VP_DATA(nodeId), EP3_CONTEXT_NUM(rail->Generic.Position.pos_nodeid), nflits, flits) != 0) -+ panic ("ep3_load_node_route: cannot load p2p data route entry\n"); -+} -+ -+void -+ep3_unload_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ClearRoute (rail->Device, rail->RouteTable, EP_VP_DATA(nodeId)); -+} -+ -+void -+ep3_lower_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, EP3_CONTEXT_NUM(nodeId), 0, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_raise_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, EP3_CONTEXT_NUM(nodeId), 1, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_node_disconnected (EP_RAIL *r, unsigned nodeId) -+{ -+ FreeStalledDmas ((EP3_RAIL *) r, nodeId); -+} -+ -+void -+ep3_fillout_stats(EP_RAIL *r, char *str) -+{ -+ /* no stats here yet */ -+ /* EP3_RAIL *ep3rail = (EP3_RAIL *)r; */ -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan3.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kcomm_elan3.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan3.h 2005-06-01 23:12:54.666428920 -0400 -@@ -0,0 +1,431 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_ELAN3_H -+#define __EP_KCOMM_ELAN3_H -+ -+#ident "@(#)$Id: kcomm_elan3.h,v 1.50.8.3 2004/12/14 10:19:14 mike Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan3.h,v $*/ -+ -+#if !defined(__ELAN3__) -+#include -+#include -+#include -+#include -+#include -+#include -+#endif /* !defined(__ELAN3__) */ -+ -+#include -+ -+/* private address allocation */ -+#define EP3_TEXT_BASE 0xFF000000 /* base address for thread code (defined in makerules.elan3) */ -+#define EP3_COMMANDPORT_ADDR 0xFFF00000 /* mapping address for elan command port */ -+ -+#define EP3_STACK_SIZE 1024 /* default thread code stack size */ -+ -+#define EP3_PACEMAKER_EVENTADDR 0xfeedbeef /* mis-aligned address used by heartbeat pacemaker */ -+ -+/* context number allocation */ -+#define EP3_CONTEXT_NUM(nodeId) ((ELAN3_KCOMM_BASE_CONTEXT_NUM + (nodeId)) | SYS_CONTEXT_BIT) -+#define EP3_CONTEXT_ISDATA(ctx) (((ctx) & MAX_ROOT_CONTEXT_MASK) >= ELAN3_KCOMM_BASE_CONTEXT_NUM && \ -+ ((ctx) & MAX_ROOT_CONTEXT_MASK) <= ELAN3_KCOMM_TOP_CONTEXT_NUM) -+#define EP3_CONTEXT_TO_NODE(ctx) (((ctx) & MAX_ROOT_CONTEXT_MASK) - ELAN3_KCOMM_BASE_CONTEXT_NUM) -+ -+/* DMA issueing rings */ -+#define EP3_RING_CRITICAL 0 -+#define EP3_RING_CRITICAL_LEN 128 -+#define EP3_RING_HIGH_PRI 1 -+#define EP3_RING_HIGH_PRI_LEN 64 -+#define EP3_RING_LOW_PRI 2 -+#define EP3_RING_LOW_PRI_LEN 32 -+#define EP3_NUM_RINGS 3 -+ -+/* Value to "return" from c_close() when envelope handled by the trap handler */ -+#define EP3_PAckStolen 4 -+ -+/* unimplemented instruction trap types for thread code */ -+#define EP3_UNIMP_TRAP_NO_DESCS 0 -+#define EP3_UNIMP_TRAP_PACKET_NACKED 1 -+#define EP3_UNIMP_THREAD_HALTED 2 -+#define EP3_NUM_UNIMP_TRAPS 3 -+ -+/* forward declarations */ -+typedef struct ep3_rail EP3_RAIL; -+ -+/* block copy elan3 inputter queue - with waitvent0 */ -+typedef struct ep3_inputqueue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_BlockCopyEvent q_event; /* queue block copy event */ -+ E3_uint32 q_pad[4]; /* pad to 64 bytes */ -+ E3_Addr q_wevent; /* WaitEvent0 struct */ -+ E3_int32 q_wcount; -+} EP3_InputQueue; -+ -+ -+#if !defined(__ELAN3__) -+ -+/* dma retries types and retry times */ -+typedef struct ep3_retry_dma -+{ -+ struct list_head Link; /* chained on free/retry list */ -+ long RetryTime; /* "lbolt" to retry at */ -+ E3_DMA_BE Dma; /* DMA (in main memory) */ -+} EP3_RETRY_DMA; -+ -+typedef struct ep3_dma_ring -+{ -+ sdramaddr_t pEvent; -+ E3_Addr epEvent; -+ -+ sdramaddr_t pDma; -+ E3_Addr epDma; -+ -+ E3_uint32 *pDoneBlk; -+ E3_Addr epDoneBlk; -+ -+ int Entries; /* number of slots in array */ -+ int Position; /* current position in array */ -+ -+ ioaddr_t CommandPort; -+ ioaddr_t CommandPage; -+ DeviceMappingHandle CommandPageHandle; -+} EP3_DMA_RING; -+ -+#define DMA_RING_EVENT(ring,n) ((ring)->pEvent + (n)*sizeof (E3_BlockCopyEvent)) -+#define DMA_RING_EVENT_ELAN(ring,n) ((ring)->epEvent + (n)*sizeof (E3_BlockCopyEvent)) -+ -+#define DMA_RING_DMA(ring,n) ((ring)->pDma + (n)*sizeof (E3_DMA)) -+#define DMA_RING_DMA_ELAN(ring,n) ((ring)->epDma + (n)*sizeof (E3_DMA)) -+ -+#define DMA_RING_DONE_ELAN(ring,n) ((ring)->epDoneBlk + (n)*sizeof (E3_uint32)) -+ -+/* Event interrupt cookie operations and lookup table */ -+typedef struct ep3_cookie_ops -+{ -+ void (*Event) (EP3_RAIL *rail, void *arg); /* called from the interrupt handler when an event is "set" */ -+ void (*DmaRetry) (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int error); /* called from the interrupt handler when a DMA is "nacked" */ -+ void (*DmaCancelled)(EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); /* called from the interrupt handler/flush disconnecting when cancelled. */ -+ void (*DmaVerify) (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); /* called from multiple places, to check dma is consistent with state. */ -+} EP3_COOKIE_OPS; -+ -+typedef struct ep3_cookie -+{ -+ struct ep3_cookie *Next; /* Cookies are chained in hash table. */ -+ E3_uint32 Cookie; /* Cooke store in ev_Type */ -+ EP3_COOKIE_OPS *Operations; /* Cookie operations */ -+ void *Arg; /* Users arguement. */ -+} EP3_COOKIE; -+ -+#define EP3_COOKIE_HASH_SIZE (256) -+#define EP3_HASH_COOKIE(a) ((((a) >> 3) ^ ((a) >> 7) ^ ((a) >> 11)) & (EP3_COOKIE_HASH_SIZE-1)) -+ -+typedef struct ep3_cookie_table -+{ -+ spinlock_t Lock; -+ EP3_COOKIE *Entries[EP3_COOKIE_HASH_SIZE]; -+} EP3_COOKIE_TABLE; -+ -+#endif /* !defined(__ELAN3__) */ -+ -+#define EP3_EVENT_FREE ((1 << 4) | EV_WCOPY) -+#define EP3_EVENT_ACTIVE ((2 << 4) | EV_WCOPY) -+/* DONE == Cookie */ -+#define EP3_EVENT_FAILED ((3 << 4) | EV_WCOPY) -+#define EP3_EVENT_PRIVATE ((4 << 4) | EV_WCOPY) -+ -+/* The event cookie can get posted (and seen) before the write has */ -+/* hit main memory - in this case the event count is <= 0 and the block */ -+/* will be marked as ACTIVE - but could transition to DONE at any time */ -+/* Also for a word copy event, the value written into the "done" word */ -+/* can be the event interrupt cookie rather than the "source" value */ -+/* this happens since the uCode does not wait for the write to have */ -+/* occured before overwriting TMP_0 with the cookie */ -+#define EP3_EVENT_FIRING(edev, event, cookie, done) \ -+ (((((done) & ~(EV_TYPE_BCOPY | EV_TYPE_MASK_EVIRQ)) == (cookie).Cookie) || (done) == EP3_EVENT_ACTIVE) && \ -+ (int) elan3_sdram_readl (edev, (event) + offsetof (E3_BlockCopyEvent, ev_Count)) <= 0) -+#define EP3_EVENT_FIRED(cookie, done) \ -+ (((done) & ~(EV_TYPE_BCOPY | EV_TYPE_MASK_EVIRQ)) == (cookie).Cookie) -+ -+ -+/* Time limit to wait while event is firing and block write has not occured */ -+#define EP3_EVENT_FIRING_TLIMIT 16384 /* 1023 uS */ -+ -+#define EP3_INIT_COPY_EVENT(event, cookie, dest, intr) \ -+{ \ -+ (event).ev_Count = 0; \ -+ (event).ev_Type = (intr) ? EV_TYPE_BCOPY | EV_TYPE_EVIRQ | (cookie).Cookie : EV_TYPE_BCOPY; \ -+ (event).ev_Source = (cookie).Cookie | EV_WCOPY; \ -+ (event).ev_Dest = (dest) | EV_TYPE_BCOPY_WORD; \ -+} -+ -+#if !defined(__ELAN3__) -+ -+/* Generic input queues which can be polled */ -+typedef struct ep3_inputq -+{ -+ EP3_COOKIE q_cookie; -+ unsigned int q_slotSize; -+ unsigned int q_slotCount; -+ -+ void *q_slots; -+ EP_ADDR q_slotsAddr; -+ -+ EP_INPUTQ_CALLBACK *q_callback; -+ void *q_arg; -+ -+ sdramaddr_t q_desc; -+ E3_Addr q_descAddr; -+ -+ E3_Addr q_base; -+ E3_Addr q_top; -+ E3_Addr q_fptr; -+ -+ E3_uint32 q_waitCount; -+} EP3_INPUTQ; -+ -+typedef struct ep3_outputq -+{ -+ EP3_COOKIE q_cookie; -+ -+ unsigned int q_slotCount; /* # slots allocated */ -+ unsigned int q_slotSize; /* size of each slot (rounded up) */ -+ -+ sdramaddr_t q_elan; -+ E3_Addr q_elanAddr; -+ -+ void *q_main; -+ E3_Addr q_mainAddr; -+} EP3_OUTPUTQ; -+ -+#endif /* !defined(__ELAN3__) */ -+ -+/* per-rail elan memory portion of device */ -+typedef struct ep3_rail_elan -+{ -+ E3_uint16 ProbeSource0[TR_TRACEROUTE_ENTRIES]; /* 32 byte aligned */ -+ E3_uint16 ProbeSource1[TR_TRACEROUTE_ENTRIES]; -+ -+ E3_BlockCopyEvent ProbeDone; /* 16 byte aligned */ -+ E3_Event ProbeStart; /* 8 byte aligned */ -+ -+ E3_uint32 ProbeType; /* 4 byte aligned */ -+ E3_uint32 ProbeLevel; -+ -+ E3_uint32 NodeId; -+} EP3_RAIL_ELAN; -+ -+/* values for ProbeType */ -+#define PROBE_SINGLE 0 -+#define PROBE_MULTIPLE 1 -+/* number of attempts for each type */ -+#define PROBE_SINGLE_ATTEMPTS 10 -+#define PROBE_SINGLE_TIMEOUTS 5 -+#define PROBE_MULTIPLE_ATTEMPTS 20 -+#define PROBE_MULTIPLE_TIMEOUTS 10 -+ -+/* per-rail elan memory portsion of device */ -+typedef struct ep3_rail_main -+{ -+ E3_uint16 ProbeDest0[TR_TRACEROUTE_ENTRIES]; /* 32 byte aligned */ -+ E3_uint16 ProbeDest1[TR_TRACEROUTE_ENTRIES]; -+ -+ E3_uint32 ProbeDone; /* 4 byte aligned */ -+ E3_uint32 ProbeResult; -+ E3_uint32 ProbeLevel; -+} EP3_RAIL_MAIN; -+ -+#if !defined(__ELAN3__) -+ -+struct ep3_rail -+{ -+ EP_RAIL Generic; /* Generic rail */ -+ -+ ELAN3_DEV *Device; /* Elan device we're using */ -+ ELAN3_CTXT *Ctxt; /* Elan context struct */ -+ ioaddr_t CommandPort; /* commandport from context */ -+ E3_Addr CommandPortAddr; /* and address mapped into elan */ -+ -+ ELAN3_ROUTE_TABLE *RouteTable; /* routetable from context */ -+ ELAN3MMU *Elan3mmu; /* elanmmu from context */ -+ -+ EP3_COOKIE_TABLE CookieTable; /* Event cookie table */ -+ -+ EP_CODE ThreadCode; /* copy of thread code */ -+ unsigned int CommandPortEventTrap; /* flag to indicate command port eventint queue overflow trap */ -+ -+ sdramaddr_t RailElan; /* Elan visible main/sdram portions of */ -+ E3_Addr RailElanAddr; /* device structure */ -+ EP3_RAIL_MAIN *RailMain; -+ E3_Addr RailMainAddr; -+ -+ /* small system message queues */ -+ sdramaddr_t QueueDescs; /* Input Queue descriptors */ -+ -+ /* Network position prober */ -+ E3_Addr ProbeStack; /* Network position thread command structure */ -+ EP3_COOKIE ProbeCookie; /* event cookie for Done event */ -+ kcondvar_t ProbeWait; /* place to wait on probe thread */ -+ spinlock_t ProbeLock; /* and lock */ -+ volatile int ProbeDone; /* and flag to indicate it's done */ -+ -+ E3_uint16 ProbeDest0[TR_TRACEROUTE_ENTRIES]; /* last result of CheckNetworkPosition */ -+ E3_uint16 ProbeDest1[TR_TRACEROUTE_ENTRIES]; -+ E3_uint32 ProbeResult; -+ -+ long ProbeLevelTick[ELAN_MAX_LEVELS]; -+ long SwitchBroadcastLevelTick; -+ -+ /* rings for issueing dmas */ -+ EP3_DMA_RING DmaRings[EP3_NUM_RINGS]; -+ -+ /* retry lists for dmas */ -+ struct list_head DmaRetries[EP_NUM_RETRIES]; /* Dma retry lists */ -+ struct list_head DmaRetryFreeList; /* and free list */ -+ u_int DmaRetryCount; /* and total retry count */ -+ u_int DmaRetryReserved; /* and number reserved */ -+ u_int DmaRetryThreadShouldStall; /* count of reasons to stall retries */ -+ u_int DmaRetryThreadStarted:1; /* dma retry thread running */ -+ u_int DmaRetryThreadShouldStop:1; /* but should stop */ -+ u_int DmaRetryThreadStopped:1; /* and now it's stopped */ -+ u_int DmaRetryInitialised:1; /* have initialise dma retries */ -+ -+ spinlock_t DmaRetryLock; /* spinlock protecting lists */ -+ kcondvar_t DmaRetryWait; /* place retry thread sleeps */ -+ long DmaRetryTime; /* and when it will next wakeup */ -+ unsigned int DmaRetrySleeping; /* and it's sleeping there */ -+ -+ /* Network Identify Cookies */ -+ E3_uint32 *MainCookies; /* One cookie allocator per-node for main*/ -+ E3_Addr ElanCookies; /* and one for elan */ -+ spinlock_t CookieLock; /* spinlock to protect main cookies */ -+ -+ /* Halt operation flags for flushing. */ -+ kmutex_t HaltOpMutex; /* serialize access to halt operations */ -+ unsigned int HaltOpCompleted; /* flag to indicate halt operation completed */ -+ kcondvar_t HaltOpSleep; /* place to wait for it to complete */ -+ -+ /* Network error state */ -+ kcondvar_t NetworkErrorSleep; /* place to sleep for network error halt operation */ -+ u_int NetworkErrorFlushed; /* and flag to indicate flushed */ -+ -+ -+ EP3_RAIL_STATS Stats; /* statistics */ -+}; -+ -+/* support.c */ -+ -+extern ELAN3_OPS ep3_elan3_ops; -+ -+extern E3_uint32 LocalCookie (EP3_RAIL *rail, unsigned int remoteNode); -+extern E3_uint32 RemoteCookie (EP3_RAIL *rail, unsigned int remoteNode); -+ -+extern void InitialiseCookieTable (EP3_COOKIE_TABLE *table); -+extern void DestroyCookieTable (EP3_COOKIE_TABLE *table); -+extern void RegisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cookie, -+ E3_Addr event, EP3_COOKIE_OPS *ops, void *arg); -+extern void DeregisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cookie); -+extern EP3_COOKIE *LookupCookie (EP3_COOKIE_TABLE *table, uint32_t cookie); -+extern EP3_COOKIE *LookupEventCookie (EP3_RAIL *rail, EP3_COOKIE_TABLE *table, E3_Addr); -+ -+extern int DmaRingsCreate (EP3_RAIL *rail); -+extern void DmaRingsRelease (EP3_RAIL *rail); -+extern int IssueDma (EP3_RAIL *rail, E3_DMA_BE *dma, int type, int retryThread); -+ -+extern int IssueWaitevent (EP3_RAIL *rail, E3_Addr value); -+extern void IssueSetevent (EP3_RAIL *rail, E3_Addr value); -+extern void IssueRunThread (EP3_RAIL *rail, E3_Addr value); -+extern long DmaRetryTime (int type); -+extern int InitialiseDmaRetries (EP3_RAIL *rail); -+extern void DestroyDmaRetries (EP3_RAIL *rail); -+extern int ReserveDmaRetries (EP3_RAIL *rail, int count, EP_ATTRIBUTE attr); -+extern void ReleaseDmaRetries (EP3_RAIL *rail, int count); -+extern void StallDmaRetryThread (EP3_RAIL *rail); -+extern void ResumeDmaRetryThread (EP3_RAIL *rail); -+extern void QueueDmaForRetry (EP3_RAIL *rail, E3_DMA_BE *dma, int interval); -+extern void QueueDmaOnStalledList (EP3_RAIL *rail, E3_DMA_BE *dma); -+extern void FreeStalledDmas (EP3_RAIL *rail, unsigned int nodeId); -+ -+extern void SetQueueLocked(EP3_RAIL *rail, sdramaddr_t qaddr); -+ -+/* threadcode_elan3.c */ -+extern E3_Addr ep3_init_thread (ELAN3_DEV *dev, E3_Addr fn, E3_Addr addr, sdramaddr_t stack, -+ int stackSize, int nargs, ...); -+ -+/* probenetwork.c */ -+extern int ep3_init_probenetwork (EP3_RAIL *rail); -+extern void ep3_destroy_probenetwork (EP3_RAIL *rail); -+extern void ep3_probe_position_found (EP3_RAIL *rail, ELAN_POSITION *pos); -+extern int ep3_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw); -+extern int ep3_check_position (EP_RAIL *rail); -+ -+/* neterr_elan3.c */ -+extern void ep3_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* kcomm_elan3.c */ -+extern EP_RAIL *ep3_create_rail (EP_SYS *sys, ELAN3_DEV *dev); -+extern void ep3_destroy_rail (EP_RAIL *rail); -+ -+extern int ep3_start_rail (EP_RAIL *rail); -+extern void ep3_stall_rail (EP_RAIL *rail); -+extern void ep3_stop_rail (EP_RAIL *rail); -+ -+extern void ep3_position_found (EP_RAIL *rail, ELAN_POSITION *pos); -+ -+extern sdramaddr_t ep3_sdram_alloc (EP_RAIL *rail, EP_ADDR addr, unsigned int size); -+extern void ep3_sdram_free (EP_RAIL *rail, sdramaddr_t addr, unsigned int size); -+extern void ep3_sdram_writeb (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+extern void ep3_flush_tlb (EP_RAIL *r); -+extern void ep3_load_system_route (EP_RAIL *r, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+extern void ep3_load_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_unload_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_lower_filter (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_raise_filter (EP_RAIL *rail, unsigned int nodeId); -+extern void ep3_node_disconnected (EP_RAIL *r, unsigned int nodeId); -+ -+extern void ep3_fillout_stats(EP_RAIL *rail, char *str); -+ -+/* kmap_elan3.c */ -+extern void ep3_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep3_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep3_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned int len, unsigned int perm); -+extern void ep3_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len); -+extern void *ep3_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages); -+extern void ep3_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private); -+extern void ep3_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm); -+extern physaddr_t ep3_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index); -+extern void ep3_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages); -+ -+/* kmsg_elan3.c */ -+extern EP_INPUTQ *ep3_alloc_inputq (EP_RAIL *r, unsigned int qnum, unsigned int slotSize, unsigned int slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg); -+extern void ep3_free_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep3_enable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep3_disable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern int ep3_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+extern EP_OUTPUTQ *ep3_alloc_outputq (EP_RAIL *r, unsigned int slotSize, unsigned int slotCount); -+extern void ep3_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q); -+extern void *ep3_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep3_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep3_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum, unsigned int size, -+ unsigned int nodeId, unsigned int qnum, unsigned int retries); -+ -+/* support_elan3.c */ -+extern void ep3_flush_filters (EP_RAIL *r); -+extern void ep3_flush_queues (EP_RAIL *r); -+ -+#endif /* !defined(__ELAN3__) */ -+ -+#endif /* __EP_KCOMM_ELAN3_H */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kcomm_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan4.c 2005-06-01 23:12:54.667428768 -0400 -@@ -0,0 +1,526 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm_elan4.c,v 1.16.2.3 2004/11/30 12:02:17 mike Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan4.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "conf_linux.h" -+ -+extern EP_CODE threadcode_elan4; -+ -+unsigned int -+ep4_create_rails (EP_SYS *sys, unsigned int disabled) -+{ -+ unsigned int rmask = 0; -+ ELAN4_DEV *dev; -+ EP_RAIL *rail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if ((dev = elan4_reference_device (i, ELAN4_STATE_STARTED)) != NULL) -+ { -+ if ((rail = ep4_create_rail (sys, dev)) == NULL) -+ elan4_dereference_device (dev); -+ else -+ { -+ if (disabled & (1 << rail->Number)) -+ printk ("%s: auto-start of device disabled by configuration\n", rail->Name); -+ else -+ ep_start_rail (rail); -+ -+ ep_procfs_rail_init(rail); -+ -+ rmask |= (1 << rail->Number); -+ } -+ } -+ } -+ -+ if (rmask) -+ qsnet_debug_alloc(); -+ -+ return rmask; -+} -+ -+EP_RAIL * -+ep4_create_rail (EP_SYS *sys, ELAN4_DEV *dev) -+{ -+ EP4_RAIL *rail; -+ int res; -+ -+ KMEM_ZALLOC (rail, EP4_RAIL *, sizeof (EP4_RAIL), 1); -+ -+ if (rail == NULL) -+ return (EP_RAIL *) NULL; -+ -+ if ((res = ep_init_rail (sys, &rail->r_generic)) != 0) -+ { -+ KMEM_FREE (rail, sizeof (EP4_RAIL)); -+ return (EP_RAIL *) NULL; -+ } -+ -+ rail->r_ctxt.ctxt_dev = dev; -+ -+ /* install our rail operations */ -+ rail->r_generic.Operations.DestroyRail = ep4_destroy_rail; -+ rail->r_generic.Operations.StartRail = ep4_start_rail; -+ rail->r_generic.Operations.StallRail = ep4_stall_rail; -+ rail->r_generic.Operations.StopRail = ep4_stop_rail; -+ -+ rail->r_generic.Operations.SdramAlloc = ep4_sdram_alloc; -+ rail->r_generic.Operations.SdramFree = ep4_sdram_free; -+ rail->r_generic.Operations.SdramWriteb = ep4_sdram_writeb; -+ -+ rail->r_generic.Operations.KaddrMap = ep4_kaddr_map; -+ rail->r_generic.Operations.SdramMap = ep4_sdram_map; -+ rail->r_generic.Operations.Unmap = ep4_unmap; -+ -+ rail->r_generic.Operations.DvmaReserve = ep4_dvma_reserve; -+ rail->r_generic.Operations.DvmaRelease = ep4_dvma_release; -+ rail->r_generic.Operations.DvmaSetPte = ep4_dvma_set_pte; -+ rail->r_generic.Operations.DvmaReadPte = ep4_dvma_read_pte; -+ rail->r_generic.Operations.DvmaUnload = ep4_dvma_unload; -+ rail->r_generic.Operations.FlushTlb = ep4_flush_tlb; -+ -+ rail->r_generic.Operations.ProbeRoute = ep4_probe_route; -+ -+ rail->r_generic.Operations.PositionFound = ep4_position_found; -+ rail->r_generic.Operations.CheckPosition = ep4_check_position; -+ rail->r_generic.Operations.NeterrFixup = ep4_neterr_fixup; -+ -+ rail->r_generic.Operations.LoadSystemRoute = ep4_load_system_route; -+ -+ rail->r_generic.Operations.LoadNodeRoute = ep4_load_node_route; -+ rail->r_generic.Operations.UnloadNodeRoute = ep4_unload_node_route; -+ rail->r_generic.Operations.LowerFilter = ep4_lower_filter; -+ rail->r_generic.Operations.RaiseFilter = ep4_raise_filter; -+ rail->r_generic.Operations.NodeDisconnected = ep4_node_disconnected; -+ -+ rail->r_generic.Operations.FlushFilters = ep4_flush_filters; -+ rail->r_generic.Operations.FlushQueues = ep4_flush_queues; -+ -+ rail->r_generic.Operations.AllocInputQ = ep4_alloc_inputq; -+ rail->r_generic.Operations.FreeInputQ = ep4_free_inputq; -+ rail->r_generic.Operations.EnableInputQ = ep4_enable_inputq; -+ rail->r_generic.Operations.DisableInputQ = ep4_disable_inputq; -+ rail->r_generic.Operations.PollInputQ = ep4_poll_inputq; -+ -+ rail->r_generic.Operations.AllocOutputQ = ep4_alloc_outputq; -+ rail->r_generic.Operations.FreeOutputQ = ep4_free_outputq; -+ rail->r_generic.Operations.OutputQMsg = ep4_outputq_msg; -+ rail->r_generic.Operations.OutputQState = ep4_outputq_state; -+ rail->r_generic.Operations.OutputQSend = ep4_outputq_send; -+ -+ rail->r_generic.Operations.FillOutStats = ep4_fillout_stats; -+ rail->r_generic.Operations.Debug = ep4_debug_rail; -+ -+ rail->r_generic.Devinfo = dev->dev_devinfo; -+ -+ printk ("%s: connected via elan4 rev%c device %d\n", rail->r_generic.Name, -+ 'a' + dev->dev_devinfo.dev_revision_id, dev->dev_instance); -+ -+ return (EP_RAIL *) rail; -+} -+ -+void -+ep4_destroy_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_dereference_device (rail->r_ctxt.ctxt_dev); -+ -+ KMEM_FREE (rail, sizeof (EP4_RAIL)); -+} -+ -+static int -+ep4_attach_rail (EP4_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned ctx; -+ -+ if (elan4_insertctxt (dev, &rail->r_ctxt, &ep4_trap_ops) != 0) -+ return -ENOMEM; -+ -+ if ((rail->r_routetable = elan4_alloc_routetable (dev, 4)) == NULL) /* 512 << 4 == 8192 entries */ -+ { -+ elan4_removectxt (dev, &rail->r_ctxt); -+ return -ENOMEM; -+ } -+ elan4_set_routetable (&rail->r_ctxt, rail->r_routetable); -+ -+ /* Attach to the kernel comms nextwork context */ -+ if (elan4_attach_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM) < 0) -+ { -+ elan4_free_routetable (dev, rail->r_routetable); -+ elan4_removectxt (dev, &rail->r_ctxt); -+ -+ return -EBUSY; -+ } -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_attach_filter (&rail->r_ctxt, ctx); -+ -+ return 0; -+} -+ -+static void -+ep4_detach_rail (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned ctx; -+ -+ elan4_detach_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_detach_filter (&rail->r_ctxt, ctx); -+ -+ if (rail->r_routetable) -+ { -+ elan4_set_routetable (&rail->r_ctxt, NULL); -+ elan4_free_routetable (dev, rail->r_routetable); -+ } -+ -+ elan4_removectxt (dev, &rail->r_ctxt); -+} -+ -+int -+ep4_start_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_InputQueue qdesc; -+ int i, res; -+ -+ if ((res = ep4_attach_rail (rail)) < 0) -+ return res; -+ -+ /* Initialise main interrupt cookie table */ -+ spin_lock_init (&rail->r_intcookie_lock); -+ for (i = 0; i < EP4_INTCOOKIE_HASH_SIZE; i++) -+ INIT_LIST_HEAD (&rail->r_intcookie_hash[i]); -+ -+ kmutex_init (&rail->r_haltop_mutex); -+ kcondvar_init (&rail->r_haltop_sleep); -+ spin_lock_init (&rail->r_haltop_lock); -+ -+ spin_lock_init (&rail->r_cookie_lock); -+ -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_EVENT]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_ATOMIC]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_SINGLE]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_MAIN]); -+ spin_lock_init (&rail->r_ecq_lock); -+ -+ ep_kthread_init (&rail->r_retry_thread); -+ INIT_LIST_HEAD (&rail->r_retry_ops); -+ -+ INIT_LIST_HEAD (&rail->r_neterr_ops); -+ -+ kmutex_init (&rail->r_flush_mutex); -+ kcondvar_init (&rail->r_flush_sleep); -+ -+ /* Allocate the elan visible sdram/main memory */ -+ if ((rail->r_elan = ep_alloc_elan (&rail->r_generic, sizeof (EP4_RAIL_ELAN), 0, &rail->r_elan_addr)) == 0 || -+ (rail->r_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_RAIL_MAIN), 0, &rail->r_main_addr)) == 0) -+ { -+ goto failed; -+ } -+ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_qevents[i].ev_CountAndType), 0); -+ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_CountAndType), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ /* Allocate the system input queues at their fixed elan address */ -+ /* avoid sdram address aliasing by allocating the min sdram pagesize */ -+ if (! (rail->r_queuedescs= ep_alloc_memory_elan (&rail->r_generic, EP_SYSTEM_QUEUE_BASE, SDRAM_PAGE_SIZE, EP_PERM_ALL, 0))) -+ goto failed; -+ -+ /* Initialise the input queue descriptor as "full" with no event */ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl(qdesc.q_bptr, qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan4_sdram_copyq_to_sdram (dev, &qdesc, EP_SYSTEMQ_DESC (rail->r_queuedescs, i), sizeof (E4_InputQueue)); -+ -+ /* Allocate the resource map for command queue mappings */ -+ if ((rail->r_ecq_rmap = ep_rmallocmap (EP4_ECQ_RMAPSIZE, "r_ecq_rmap", 1)) == NULL) -+ goto failed; -+ -+ ep_rmfree (rail->r_ecq_rmap, EP4_ECQ_TOP - EP4_ECQ_BASE, EP4_ECQ_BASE); -+ -+ /* register an interrupt cookie & allocate command queues for command queue flushing */ -+ rail->r_flush_mcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4); -+ rail->r_flush_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1); -+ -+ if (rail->r_flush_mcq == NULL || rail->r_flush_ecq == NULL) -+ goto failed; -+ -+ ep4_register_intcookie (rail, &rail->r_flush_intcookie, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event), ep4_flush_interrupt, rail); -+ -+ /* startup the retry thread */ -+ if (kernel_thread_create (ep4_retry_thread, (void *) rail) == 0) -+ goto failed; -+ ep_kthread_started (&rail->r_retry_thread); -+ -+ ep4_initialise_dma_retries (rail); -+ -+ if ((rail->r_event_ecq = ep4_alloc_ecq (rail, CQ_Size1K)) == NULL) -+ goto failed; -+ -+ rail->r_threadcode = threadcode_elan4; -+ if (ep_loadcode (&rail->r_generic, &rail->r_threadcode)) -+ goto failed; -+ -+ elan4_flush_icache (&rail->r_ctxt); -+ -+ if (ep4_probe_init (rail)) -+ goto failed; -+ -+ /* can now drop the context filter for the system context */ -+ elan4_set_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM, E4_FILTER_HIGH_PRI); -+ -+ return 0; -+ -+ failed: -+ printk ("ep4_start_rail: failed for rail '%s'\n", rail->r_generic.Name); -+ ep4_stop_rail (&rail->r_generic); -+ -+ return -ENOMEM; -+} -+ -+void -+ep4_stall_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ unsigned ctx; -+ -+ /* Raise all the context filters */ -+ elan4_set_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM, E4_FILTER_DISCARD_ALL); -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_set_filter (&rail->r_ctxt, ctx, E4_FILTER_DISCARD_ALL); -+} -+ -+void -+ep4_stop_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ if (rail->r_generic.State == EP_RAIL_STATE_RUNNING) /* undo ep4_position_found() */ -+ { -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ EP_ADDR addr = elan4_sdram_readq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_cookies)); -+ -+ ep_free_elan (&rail->r_generic, addr, pos->pos_nodes * sizeof (E4_uint64)); -+ -+ KMEM_FREE (rail->r_cookies, pos->pos_nodes * sizeof (E4_uint64)); -+ } -+ -+ ep4_probe_destroy (rail); -+ -+ ep_unloadcode (&rail->r_generic, &rail->r_threadcode); -+ -+ if (rail->r_event_ecq) -+ ep4_free_ecq (rail, rail->r_event_ecq); -+ rail->r_event_ecq = NULL; -+ -+ ep4_finalise_dma_retries (rail); -+ -+ ep_kthread_stop (&rail->r_retry_thread); -+ ep_kthread_destroy (&rail->r_retry_thread); -+ -+ if (rail->r_flush_intcookie.int_arg) -+ ep4_deregister_intcookie (rail, &rail->r_flush_intcookie); -+ rail->r_flush_intcookie.int_arg = NULL; -+ -+ if (rail->r_flush_mcq) -+ ep4_put_ecq (rail, rail->r_flush_mcq, 4); -+ rail->r_flush_mcq = NULL; -+ -+ if (rail->r_flush_ecq) -+ ep4_put_ecq (rail, rail->r_flush_ecq, 1); -+ rail->r_flush_ecq = NULL; -+ -+ if (rail->r_ecq_rmap) -+ ep_rmfreemap (rail->r_ecq_rmap); -+ -+ if (rail->r_queuedescs) -+ ep_free_memory_elan (&rail->r_generic, EP_SYSTEM_QUEUE_BASE); -+ rail->r_queuedescs = 0; -+ -+ if (rail->r_elan) -+ ep_free_elan (&rail->r_generic, rail->r_elan_addr, sizeof (EP4_RAIL_ELAN)); -+ rail->r_elan = 0; -+ -+ if (rail->r_main) -+ ep_free_main (&rail->r_generic, rail->r_main_addr, sizeof (EP4_RAIL_MAIN)); -+ rail->r_main = NULL; -+ -+ kcondvar_destroy (&rail->r_flush_sleep); -+ kmutex_destroy (&rail->r_flush_mutex); -+ -+ spin_lock_destroy (&rail->r_ecq_lock); -+ spin_lock_destroy (&rail->r_cookie_lock); -+ -+ spin_lock_destroy (&rail->r_haltop_lock); -+ kcondvar_destroy(&rail->r_haltop_sleep); -+ kmutex_destroy (&rail->r_haltop_mutex); -+ spin_lock_destroy (&rail->r_intcookie_lock); -+ -+ ep4_detach_rail (rail); -+} -+ -+void -+ep4_position_found (EP_RAIL *r, ELAN_POSITION *pos) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ sdramaddr_t cookies; -+ EP_ADDR addr; -+ int i; -+ -+ KMEM_ZALLOC (rail->r_cookies, E4_uint64 *, pos->pos_nodes * sizeof (E4_uint64), 1); -+ -+ if (! (cookies = ep_alloc_elan (&rail->r_generic, pos->pos_nodes * sizeof (E4_uint64), 0, &addr))) -+ panic ("ep4_position_found: cannot allocate elan cookies array\n"); -+ -+ for (i = 0; i < pos->pos_nodes; i++) -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, cookies + (i * sizeof (E4_uint64)), 0); -+ -+ for (i = 0; i < pos->pos_nodes; i++) -+ rail->r_cookies[i] = 0; -+ -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_nodeid), pos->pos_nodeid); -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_cookies), addr); -+ -+ ep4_probe_position_found (rail, pos); -+} -+ -+sdramaddr_t -+ep4_sdram_alloc (EP_RAIL *r, EP_ADDR addr, unsigned size) -+{ -+ ELAN4_DEV *dev = ((EP4_RAIL *) r)->r_ctxt.ctxt_dev; -+ -+ if (size >= SDRAM_PAGE_SIZE) -+ return elan4_sdram_alloc (dev, size); -+ else -+ { -+ sdramaddr_t block = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ sdramaddr_t sdram = block + (addr & (SDRAM_PAGE_SIZE-1)); -+ -+ /* free of the portion before sdram */ -+ if (sdram > block) -+ elan4_sdram_free (dev, block, sdram - block); -+ -+ /* free of the portion after sdram */ -+ if ((block + SDRAM_PAGE_SIZE) > (sdram + size)) -+ elan4_sdram_free (dev, sdram + size, block + SDRAM_PAGE_SIZE - (sdram + size)); -+ -+ return sdram; -+ } -+} -+ -+void -+ep4_sdram_free (EP_RAIL *r, sdramaddr_t addr, unsigned size) -+{ -+ elan4_sdram_free (((EP4_RAIL *) r)->r_ctxt.ctxt_dev, addr, size); -+} -+ -+void -+ep4_sdram_writeb (EP_RAIL *r, sdramaddr_t addr, unsigned char val) -+{ -+ elan4_sdram_writeb (((EP4_RAIL *) r)->r_ctxt.ctxt_dev, addr, val); -+} -+ -+void -+ep4_flush_tlb (EP_RAIL *r) -+{ -+ elan4mmu_flush_tlb (((EP4_RAIL *) r)->r_ctxt.ctxt_dev); -+} -+ -+void -+ep4_load_system_route (EP_RAIL *r, unsigned vp, unsigned lowNode, unsigned highNode) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ -+ if (elan4_generate_route (&rail->r_generic.Position, &route, ELAN4_KCOMM_CONTEXT_NUM, -+ lowNode, highNode, FIRST_SYSTEM_PACKET | FIRST_HIGH_PRI | FIRST_TIMEOUT(3)) < 0) -+ { -+ panic ("ep4_load_system_route: generate route failed\n"); -+ /* NOTREACHED */ -+ } -+ -+ elan4_write_route (dev, rail->r_routetable, vp, &route); -+} -+ -+void -+ep4_load_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ -+ if (elan4_generate_route (&rail->r_generic.Position, &route, EP4_CONTEXT_NUM(rail->r_generic.Position.pos_nodeid), -+ nodeId, nodeId, FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3)) < 0) -+ { -+ panic ("ep4_load_node_route: generate route failed\n"); -+ /* NOTREACHED */ -+ } -+ -+ elan4_write_route (dev, rail->r_routetable, EP_VP_DATA(nodeId), &route); -+} -+ -+void -+ep4_unload_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ elan4_invalidate_route (dev, rail->r_routetable, EP_VP_DATA(nodeId)); -+} -+ -+void -+ep4_lower_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_set_filter (&rail->r_ctxt, EP4_CONTEXT_NUM(nodeId), E4_FILTER_HIGH_PRI); -+} -+ -+void -+ep4_raise_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_set_filter (&rail->r_ctxt, EP4_CONTEXT_NUM(nodeId), E4_FILTER_DISCARD_ALL); -+} -+ -+void -+ep4_node_disconnected (EP_RAIL *r, unsigned nodeId) -+{ -+ ep4_free_stalled_dmas ((EP4_RAIL *) r, nodeId); -+} -+ -+void -+ep4_fillout_stats(EP_RAIL *r, char *str) -+{ -+ /* no stats here yet */ -+ /* EP4_RAIL *ep4rail = (EP4_RAIL *)r; */ -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan4.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kcomm_elan4.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kcomm_elan4.h 2005-06-01 23:12:54.668428616 -0400 -@@ -0,0 +1,443 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_ELAN4_H -+#define __EP_KCOMM_ELAN4_H -+ -+#ident "@(#)$Id: kcomm_elan4.h,v 1.16.2.2 2004/12/14 10:19:14 mike Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan4.h,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#if !defined(__elan4__) -+#include -+#endif /* ! defined(__elan4__) */ -+ -+/* private address allocation */ -+#define EP4_TEXT_BASE 0xF8000000 /* base address for thread code (defined in makerules.elan4) */ -+#define EP4_ECQ_BASE 0xFF000000 /* address space for mapping command queues */ -+#define EP4_ECQ_TOP 0xFF800000 -+ -+#define EP4_ECQ_RMAPSIZE 128 -+#define EP4_STACK_SIZE 1024 /* default thread code stack size */ -+#define EP4_MAX_LEVELS 8 /* same as ELAN_MAX_LEVELS */ -+ -+/* context number allocation */ -+#define EP4_CONTEXT_NUM(nodeId) (ELAN4_KCOMM_BASE_CONTEXT_NUM + (nodeId)) -+#define EP4_CONTEXT_ISDATA(ctx) ((ctx) >= ELAN4_KCOMM_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN4_KCOMM_TOP_CONTEXT_NUM) -+#define EP4_CONTEXT_TO_NODE(ctx) ((ctx) - ELAN4_KCOMM_BASE_CONTEXT_NUM) -+ -+/* -+ * network error cookie format: -+ * ------------------------------------------------- -+ * | unique cookie value | Remote | DMA | Location | -+ * ------------------------------------------------- -+ * [63:4] Cookie - unique cookie number -+ * [3] Thread - cookie generated by thread code -+ * [2] Remote - cookie generated by remote end -+ * [1] STEN - cookie is for a STEN packet -+ * [0] DMA - cookie is for a DMA -+ */ -+#define EP4_COOKIE_DMA (1 << 0) -+#define EP4_COOKIE_STEN (1 << 1) -+#define EP4_COOKIE_REMOTE (1 << 2) -+#define EP4_COOKIE_THREAD (1 << 3) -+#define EP4_COOKIE_INC (1ull << 4) -+ -+#define EP4_COOKIE_STRING(val) ((val) & ~(EP4_COOKIE_INC-1)) >> 4, \ -+ ((val) & EP4_COOKIE_DMA) ? ",dma" : "", \ -+ ((val) & EP4_COOKIE_REMOTE) ? ",remote" : "", \ -+ ((val) & EP4_COOKIE_THREAD) ? ",thread" : "", \ -+ ((val) & EP4_COOKIE_STEN) ? ",sten" : "" -+/* -+ * Done "word" values -+ */ -+#define EP4_STATE_FREE 0 -+#define EP4_STATE_ACTIVE 1 -+#define EP4_STATE_FINISHED 2 -+#define EP4_STATE_FAILED 3 -+#define EP4_STATE_PRIVATE 4 -+ -+#define EP4_EVENT_FIRING_TLIMIT 16384 /* 1023 uS */ -+ -+/* forward declarations */ -+typedef struct ep4_rail EP4_RAIL; -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_intcookie -+{ -+ struct list_head int_link; -+ E4_uint64 int_val; -+ void (*int_callback)(EP4_RAIL *rail, void *arg); -+ void *int_arg; -+} EP4_INTCOOKIE; -+ -+#define EP4_INTCOOKIE_HASH_SIZE 256 -+#define EP4_INTCOOKIE_HASH(a) ((((a) >> 3) ^ ((a) >> 7) ^ ((a) >> 11)) & (EP4_INTCOOKIE_HASH_SIZE-1)) -+ -+typedef struct ep4_ecq -+{ -+ struct list_head ecq_link; /* linked on r_ecq_list */ -+ ELAN4_INTOP ecq_intop; /* main interrupt op space */ -+ ELAN4_CQ *ecq_cq; /* command queue */ -+ E4_Addr ecq_addr; /* address mapped into elan */ -+ unsigned int ecq_avail; /* # dwords still available */ -+ -+ spinlock_t ecq_lock; /* spinlock for main accesses */ -+ sdramaddr_t ecq_event; /* event for flushing "event" queues */ -+ EP_ADDR ecq_event_addr; -+ struct ep4_ecq *ecq_flushcq; /* and command port to issue setevent to */ -+} EP4_ECQ; -+ -+#define EP4_ECQ_EVENT 0 /* command queues targetted by multi-blocks events */ -+#define EP4_ECQ_ATOMIC 1 /* command queues targetted by atomic store operations */ -+#define EP4_ECQ_SINGLE 2 /* command queues targetted by single word commands from main */ -+#define EP4_ECQ_MAIN 3 /* command queues targetted by multi word commands from main */ -+#define EP4_NUM_ECQ 4 -+ -+#define EP4_ECQ_Size(which) ((which) == EP4_ECQ_EVENT ? CQ_Size64K : \ -+ (which) == EP4_ECQ_ATOMIC ? CQ_Size8K : \ -+ (which) == EP4_ECQ_SINGLE ? CQ_Size1K : \ -+ (which) == EP4_ECQ_MAIN ? CQ_Size8K : \ -+ CQ_Size1K) -+ -+typedef struct ep4_dma_retry -+{ -+ struct list_head retry_link; /* chained on free/retry list */ -+ unsigned long retry_time; /* "lbolt" to retry at */ -+ E4_DMA retry_dma; /* DMA (in main memory) */ -+} EP4_DMA_RETRY; -+ -+#define EP4_DMA_RETRY_CQSIZE CQ_Size8K /* size of command queue for dma retry */ -+#define EP4_DMA_RETRY_FLOWCNT (CQ_Size(EP4_DMA_RETRY_CQSIZE)/72) /* # of reissued DMA's which can fit in */ -+ -+typedef struct ep4_inputq -+{ -+ EP4_INTCOOKIE q_intcookie; -+ unsigned int q_slotSize; -+ unsigned int q_slotCount; -+ -+ void *q_slots; -+ EP_ADDR q_slotsAddr; -+ -+ EP_INPUTQ_CALLBACK *q_callback; -+ void *q_arg; -+ -+ sdramaddr_t q_desc; -+ EP_ADDR q_descAddr; -+ EP_ADDR q_eventAddr; -+ EP4_ECQ *q_wcq; /* command queue to issue waitevent to */ -+ EP4_ECQ *q_ecq; /* command queue targetted by event to generate interrupt */ -+ -+ EP_ADDR q_fptr; /* cached current front pointer */ -+ EP_ADDR q_last; /* elan addr for last queue slot */ -+ -+ atomic_t q_fired; /* atomic flag that interrupt received */ -+ unsigned int q_count; /* count of slots consumed */ -+} EP4_INPUTQ; -+ -+typedef struct ep4_outputq -+{ -+ spinlock_t q_lock; -+ unsigned int q_slotCount; -+ unsigned int q_slotSize; -+ unsigned int q_dwords; -+ ELAN4_CQ *q_cq; -+ void *q_main; -+ EP_ADDR q_mainAddr; -+ unsigned int q_retries; -+} EP4_OUTPUTQ; -+ -+#endif /* ! defined(__elan4__) */ -+ -+typedef struct ep4_check_sten -+{ -+ E4_uint64 c_reset_event_cmd; /* WRITEDWORD to reset start event */ -+ E4_uint64 c_reset_event_value; -+ -+ E4_uint64 c_open; /* OPEN VP_PROBE(lvl) */ -+ E4_uint64 c_trans_traceroute0; /* SENDTRANS TR_TRACEROUTE 0s */ -+ E4_uint64 c_addr_traceroute0; -+ E4_uint64 c_data_traceroute0[8]; -+ E4_uint64 c_trans_traceroute1; /* SENDTRANS TR_TRACEROUTE 1s */ -+ E4_uint64 c_addr_traceroute1; -+ E4_uint64 c_data_traceroute1[8]; -+ E4_uint64 c_trans_sendack; /* SENDTRANS SENDACK */ -+ E4_uint64 c_addr_sendack; -+ -+ E4_uint64 c_guard_ok; /* GUARD OK - write level */ -+ E4_uint64 c_writedword_ok; -+ E4_uint64 c_value_ok; -+ -+ E4_uint64 c_guard_fail; /* GUARD FAIL - chain setevent/write fail */ -+ E4_uint64 c_setevent_fail; -+ E4_uint64 c_setevent_nop; -+ E4_uint64 c_nop_pad; -+} EP4_CHECK_STEN; -+ -+#define EP4_CHECK_STEN_NDWORDS (sizeof (EP4_CHECK_STEN) >> 3) -+ -+typedef struct ep4_rail_elan -+{ -+ EP4_CHECK_STEN r_check_sten[EP4_MAX_LEVELS]; -+ E4_Event32 r_check_fail; /* Check failed (== r_check_start[-1]) */ -+ E4_Event32 r_check_start[EP4_MAX_LEVELS]; -+ -+ E4_Event32 r_qevents[EP_NUM_SYSTEMQ]; -+ E4_Event32 r_flush_event; -+ -+ E4_uint64 r_nodeid; -+#ifdef __elan4__ -+ E4_uint64 *r_cookies; -+#else -+ E4_Addr r_cookies; -+#endif -+} EP4_RAIL_ELAN; -+ -+#define TRACEROUTE_ENTRIES 16 /* 2 * ELAN_MAX_LEVELS */ -+#define TRACEROUTE_NDWORDS (TRACEROUTE_ENTRIES/2) -+ -+typedef struct ep4_rail_main -+{ -+ E4_uint32 r_probe_dest0[TRACEROUTE_ENTRIES]; -+ E4_uint32 r_probe_dest1[TRACEROUTE_ENTRIES]; -+ E4_uint64 r_probe_result; -+ E4_uint64 r_probe_level; -+ -+ E4_uint64 r_dma_flowcnt; /* count of dma's queued */ -+} EP4_RAIL_MAIN; -+ -+#define EP4_PROBE_ACTIVE (0xffff) -+#define EP4_PROBE_FAILED (0xfffe) -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_retry_ops -+{ -+ struct list_head op_link; -+ unsigned long (*op_func)(EP4_RAIL *rail, void *arg, unsigned long nextRunTime); -+ void *op_arg; -+} EP4_RETRY_OPS; -+ -+typedef struct ep4_neterr_ops -+{ -+ struct list_head op_link; -+ void (*op_func) (EP4_RAIL *rail, void *arg, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ void *op_arg; -+} EP4_NETERR_OPS; -+ -+struct ep4_rail -+{ -+ EP_RAIL r_generic; -+ ELAN4_CTXT r_ctxt; -+ ELAN4_ROUTE_TABLE *r_routetable; -+ -+ spinlock_t r_intcookie_lock; -+ struct list_head r_intcookie_hash[EP4_INTCOOKIE_HASH_SIZE]; -+ -+ sdramaddr_t r_elan; -+ EP_ADDR r_elan_addr; -+ EP4_RAIL_MAIN *r_main; -+ EP_ADDR r_main_addr; -+ -+ EP_CODE r_threadcode; /* copy of thread code */ -+ -+ sdramaddr_t r_queuedescs; /* systemq queue descriptors */ -+ -+ E4_uint64 *r_cookies; /* network error cookies */ -+ spinlock_t r_cookie_lock; /* and spin lock */ -+ -+ kcondvar_t r_probe_wait; /* network position probing */ -+ spinlock_t r_probe_lock; -+ volatile int r_probe_done; -+ EP4_INTCOOKIE r_probe_intcookie; -+ EP4_ECQ *r_probe_cq; -+ E4_uint32 r_probe_source0[TRACEROUTE_ENTRIES]; -+ E4_uint32 r_probe_source1[TRACEROUTE_ENTRIES]; -+ -+ kmutex_t r_haltop_mutex; /* halt/flush operations */ -+ ELAN4_HALTOP r_haltop; -+ ELAN4_DMA_FLUSHOP r_flushop; -+ kcondvar_t r_haltop_sleep; -+ spinlock_t r_haltop_lock; -+ -+ struct list_head r_ecq_list[EP4_NUM_ECQ]; /* list of statically allocated command queues */ -+ EP_RMAP *r_ecq_rmap; /* resource map for command queue mappings */ -+ spinlock_t r_ecq_lock; /* spinlock for list/space management */ -+ -+ kmutex_t r_flush_mutex; /* serialize command queue flushing */ -+ unsigned long r_flush_count; /* # setevents issued for flushing */ -+ EP4_ECQ *r_flush_mcq; /* and command queue for waitevent */ -+ EP4_ECQ *r_flush_ecq; /* and command queue for interrupt */ -+ EP4_INTCOOKIE r_flush_intcookie; /* and interrupt cookie */ -+ kcondvar_t r_flush_sleep; /* and place to sleep ... */ -+ -+ EP_KTHREAD r_retry_thread; /* retry thread */ -+ struct list_head r_retry_ops; /* list of retry operations */ -+ -+ EP4_RETRY_OPS r_dma_ops; /* dma retry operations */ -+ EP4_ECQ *r_dma_ecq; /* command queue to reissue DMAs */ -+ E4_uint64 r_dma_flowcnt; /* count of dma's reissued */ -+ struct list_head r_dma_retrylist[EP_NUM_RETRIES]; /* retry lists */ -+ struct list_head r_dma_freelist; /* and free list */ -+ spinlock_t r_dma_lock; /* and spinlock to protect lists */ -+ unsigned long r_dma_allocated; /* # retries allocated*/ -+ unsigned long r_dma_reserved; /* # retries reserved */ -+ -+ EP4_ECQ *r_event_ecq; /* command queue for occasional setevents */ -+ -+ struct list_head r_neterr_ops; /* list of neterr fixup operations */ -+ -+ ELAN4_IPROC_TRAP r_iproc_trap; -+ ELAN4_TPROC_TRAP r_tproc_trap; -+} ; -+ -+#define EP4_CTXT_TO_RAIL(ctxt) ((EP4_RAIL *) (((unsigned long) (ctxt)) - offsetof (EP4_RAIL, r_ctxt))) -+ -+#if defined(DEBUG_ASSERT) -+#define EP4_ASSERT(rail,EXPR) EP_ASSERT(&((rail)->r_generic), EXPR) -+#define EP4_SDRAM_ASSERT(rail,off,value) EP4_ASSERT(rail, (sdram_assert ? elan4_sdram_readq ((rail)->r_ctxt.ctxt_dev, (off)) == (value) : 1)) -+#else -+#define EP4_ASSERT(rail,EXPR) -+#define EP4_SDRAM_ASSERT(rail,off,value) -+#endif -+ -+/* kcomm_elan4.c */ -+extern EP_RAIL *ep4_create_rail (EP_SYS *sys, ELAN4_DEV *dev); -+extern void ep4_destroy_rail (EP_RAIL *rail); -+ -+extern int ep4_start_rail (EP_RAIL *rail); -+extern void ep4_stall_rail (EP_RAIL *rail); -+extern void ep4_stop_rail (EP_RAIL *rail); -+ -+extern void ep4_debug_rail (EP_RAIL *rail); -+ -+extern void ep4_position_found (EP_RAIL *rail, ELAN_POSITION *pos); -+ -+extern sdramaddr_t ep4_sdram_alloc (EP_RAIL *rail, EP_ADDR addr, unsigned int size); -+extern void ep4_sdram_free (EP_RAIL *rail, sdramaddr_t addr, unsigned int size); -+extern void ep4_sdram_writeb (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+extern void ep4_flush_tlb (EP_RAIL *r); -+extern void ep4_load_system_route (EP_RAIL *r, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+extern void ep4_load_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_unload_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_lower_filter (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_raise_filter (EP_RAIL *rail, unsigned int nodeId); -+extern void ep4_node_disconnected (EP_RAIL *r, unsigned int nodeId); -+ -+/* kmap_elan4.c */ -+extern void ep4_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep4_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep4_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned int len, unsigned int perm); -+extern void ep4_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len); -+extern void *ep4_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages); -+extern void ep4_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private); -+extern void ep4_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm); -+extern physaddr_t ep4_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index); -+extern void ep4_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages); -+ -+/* kmsg_elan4.c */ -+extern EP_INPUTQ *ep4_alloc_inputq (EP_RAIL *r, unsigned int qnum, unsigned int slotSize, unsigned int slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg); -+extern void ep4_free_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep4_enable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep4_disable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern int ep4_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+extern EP_OUTPUTQ *ep4_alloc_outputq (EP_RAIL *r, unsigned int slotSize, unsigned int slotCount); -+extern void ep4_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q); -+extern void *ep4_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep4_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep4_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum, unsigned int size, -+ unsigned int nodeId, unsigned int qnum, unsigned int retries); -+ -+/* probenetwork_elan4.c */ -+extern int ep4_probe_init (EP4_RAIL *r); -+extern void ep4_probe_destroy (EP4_RAIL *r); -+extern void ep4_probe_position_found (EP4_RAIL *rail, ELAN_POSITION *pos); -+extern int ep4_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw); -+extern int ep4_check_position (EP_RAIL *rail); -+ -+/* support_elan4.c */ -+extern ELAN4_TRAP_OPS ep4_trap_ops; -+extern void ep4_register_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp, E4_uint64 cookie, void (*callback)(EP4_RAIL *r, void *arg), void *arg); -+extern void ep4_deregister_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp); -+extern EP4_INTCOOKIE *ep4_lookup_intcookie (EP4_RAIL *rail, E4_uint64 cookie); -+extern E4_uint64 ep4_neterr_cookie (EP4_RAIL *rail, unsigned int node); -+ -+extern void ep4_flush_filters (EP_RAIL *r); -+extern void ep4_flush_queues (EP_RAIL *r); -+extern void ep4_write_qdesc (EP4_RAIL *rail, sdramaddr_t qaddr, E4_InputQueue *qdesc); -+ -+extern EP4_ECQ *ep4_alloc_ecq (EP4_RAIL *rail, unsigned int cqsize); -+extern void ep4_free_ecq (EP4_RAIL *rail, EP4_ECQ *ecq); -+extern EP4_ECQ *ep4_get_ecq (EP4_RAIL *rail, unsigned int which, unsigned int ndwords); -+extern void ep4_put_ecq (EP4_RAIL *rail, EP4_ECQ *ecq, unsigned int ndwords); -+ -+extern void ep4_nop_cmd (EP4_ECQ *ecq, E4_uint64 tag); -+extern void ep4_set_event_cmd (EP4_ECQ *ecq, E4_Addr event); -+extern void ep4_wait_event_cmd (EP4_ECQ *ecq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1); -+ -+extern void ep4_flush_interrupt (EP4_RAIL *rail, void *arg); -+extern void ep4_flush_ecqs (EP4_RAIL *rail); -+ -+extern void ep4_init_thread (EP4_RAIL *rail, E4_ThreadRegs *regs, sdramaddr_t stackTop, -+ EP_ADDR stackAddr, E4_Addr startpc, int nargs,...); -+ -+extern void ep4_initialise_dma_retries (EP4_RAIL *rail); -+extern void ep4_finalise_dma_retries (EP4_RAIL *rail); -+extern int ep4_reserve_dma_retries (EP4_RAIL *rail, unsigned int count, unsigned int attr); -+extern void ep4_release_dma_retries(EP4_RAIL *rail, unsigned int count); -+extern void ep4_queue_dma_retry (EP4_RAIL *rail, E4_DMA *dma, int interval); -+extern void ep4_queue_dma_stalled (EP4_RAIL *rail, E4_DMA *dma); -+extern void ep4_free_stalled_dmas (EP4_RAIL *rail, unsigned int nodeId); -+extern void ep4_display_rail (EP4_RAIL *rail); -+ -+extern void ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_retry_thread (EP4_RAIL *rail); -+ -+/* neterr_elan4.c */ -+extern void ep4_add_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops); -+extern void ep4_remove_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops); -+extern void ep4_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* commands_elan4.c */ -+extern void elan4_nop_cmd (ELAN4_CQ *cq, E4_uint64 tag); -+extern void elan4_write_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data); -+extern void elan4_add_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data); -+extern void elan4_copy64_cmd (ELAN4_CQ *cq, E4_Addr from, E4_Addr to, E4_uint32 datatype); -+extern void elan4_interrupt_cmd (ELAN4_CQ *cq, E4_uint64 cookie); -+extern void elan4_run_thread_cmd (ELAN4_CQ *cq, E4_ThreadRegs *regs); -+extern void elan4_run_dma_cmd (ELAN4_CQ *cq, E4_DMA *dma); -+extern void elan4_set_event_cmd (ELAN4_CQ *cq, E4_Addr event); -+extern void elan4_set_eventn_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint32 count); -+extern void elan4_wait_event_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1); -+extern void elan4_open_packet (ELAN4_CQ *cq, E4_uint64 command); -+extern void elan4_guard (ELAN4_CQ *cq, E4_uint64 command); -+extern void elan4_sendtrans0 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr); -+extern void elan4_sendtrans1 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0); -+extern void elan4_sendtrans2 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0, E4_uint64 p1); -+extern void elan4_sendtransn (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, ...); -+extern void elan4_sendtransp (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 *ptr); -+ -+extern void ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_retry_thread (EP4_RAIL *rail); -+ -+extern void ep4_fillout_stats(EP_RAIL *rail, char *str); -+ -+#endif /* ! defined(__elan4__) */ -+ -+#endif /* __EP_KCOMM_ELAN4_H */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kcomm_vp.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kcomm_vp.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kcomm_vp.h 2005-06-01 23:12:54.668428616 -0400 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_VP_H -+#define __EP_KCOMM_VP_H -+ -+#ident "@(#)$Id: kcomm_vp.h,v 1.2 2004/03/24 11:32:56 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_vp.h,v $*/ -+ -+#define EP_MAX_NODES 2048 /* Max nodes we support */ -+ -+/* virtual process allocation */ -+#define EP_VP_NODE_BASE (0) -+#define EP_VP_DATA_BASE (EP_VP_NODE_BASE + EP_MAX_NODES) -+#define EP_VP_PROBE_BASE (EP_VP_DATA_BASE + EP_MAX_NODES) -+#define EP_VP_PROBE_COUNT ELAN_MAX_LEVELS -+ -+#define EP_VP_BCAST_BASE (EP_VP_PROBE_BASE + EP_VP_PROBE_COUNT) -+#define EP_VP_BCAST_COUNT (CM_SGMTS_PER_LEVEL * (CM_MAX_LEVELS - 1) + 1) -+ -+#define EP_VP_NODE(nodeId) (EP_VP_NODE_BASE + (nodeId)) -+#define EP_VP_DATA(nodeId) (EP_VP_DATA_BASE + (nodeId)) -+#define EP_VP_PROBE(lvl) (EP_VP_PROBE_BASE + (lvl)) -+#define EP_VP_BCAST(lvl,sgmt) (EP_VP_BCAST_BASE + ((lvl) - 1)*CM_SGMTS_PER_LEVEL + (sgmt)) -+ -+#define EP_VP_TO_NODE(vp) ((vp) & (EP_MAX_NODES-1)) -+#define EP_VP_ISDATA(vp) ((vp) >= EP_VP_DATA_BASE && (vp) < (EP_VP_DATA_BASE + EP_MAX_NODES)) -+ -+#endif /* __EP_KCOMM_VP_H */ -+ -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/kmap.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kmap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kmap.c 2005-06-01 23:12:54.669428464 -0400 -@@ -0,0 +1,561 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap.c,v 1.10.6.2 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "debug.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+void -+ep_perrail_kaddr_map (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t kaddr, unsigned long len, unsigned int perm, int ep_attr) -+{ -+ rail->Operations.KaddrMap (rail, eaddr, kaddr, len, perm, ep_attr); -+} -+ -+void -+ep_perrail_sdram_map (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned long len, unsigned int perm, int ep_attr) -+{ -+ rail->Operations.SdramMap (rail, eaddr, saddr, len, perm, ep_attr); -+} -+ -+void -+ep_perrail_unmap (EP_RAIL *rail, EP_ADDR eaddr, unsigned long len) -+{ -+ rail->Operations.Unmap (rail, eaddr, len); -+} -+ -+void -+ep_perrail_dvma_sync (EP_RAIL *rail) -+{ -+ if (rail->TlbFlushRequired) -+ { -+ rail->TlbFlushRequired = 0; -+ -+ rail->Operations.FlushTlb (rail); -+ } -+} -+ -+ -+static int ep_dvma_map_rails (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+static uint16_t ep_dvma_calc_check_sum (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum); -+#endif -+ -+EP_NMH_OPS ep_dvma_nmh_ops = -+{ -+ ep_dvma_map_rails, -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ep_dvma_calc_check_sum -+#endif -+}; -+ -+extern void -+ep_dvma_init (EP_SYS *sys) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ -+ kmutex_init (&d->dvma_lock); -+ -+ INIT_LIST_HEAD (&d->dvma_handles); -+ INIT_LIST_HEAD (&d->dvma_rails); -+ -+ d->dvma_rmap = ep_rmallocmap (EP_DVMA_RMAP_SIZE, "dvma_rmap", 1); -+ -+ ep_rmfree (d->dvma_rmap, EP_DVMA_TOP - EP_DVMA_BASE, EP_DVMA_BASE); -+} -+ -+extern void -+ep_dvma_fini (EP_SYS *sys) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ -+ ep_rmfreemap (d->dvma_rmap); -+ -+ kmutex_destroy (&d->dvma_lock); -+} -+ -+extern int -+ep_dvma_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_RAIL_ENTRY *l; -+ struct list_head *el; -+ -+ KMEM_ZALLOC (l, EP_RAIL_ENTRY *, sizeof (EP_RAIL_ENTRY), 1); -+ -+ if (l == NULL) -+ return (ENOMEM); -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ l->Rail = rail; -+ -+ list_add_tail (&l->Link, &d->dvma_rails); -+ -+ list_for_each (el, &d->dvma_handles) { -+ EP_DVMA_NMH *desc = list_entry (el, EP_DVMA_NMH, dvma_link); -+ int npages = desc->dvma_nmh.nmh_nmd.nmd_len >> PAGESHIFT; -+ -+ desc->dvma_rails[rail->Number] = rail; -+ desc->dvma_railmask |= ( 1 << rail->Number); -+ -+ desc->dvma_private[rail->Number] = rail->Operations.DvmaReserve (rail, desc->dvma_nmh.nmh_nmd.nmd_addr, npages); -+ } -+ -+ kmutex_unlock (&d->dvma_lock); -+ return (0); -+} -+ -+extern void -+ep_dvma_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ struct list_head *el; -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ list_for_each (el, &d->dvma_handles) { -+ EP_DVMA_NMH *desc = list_entry (el, EP_DVMA_NMH, dvma_link); -+ int npages = desc->dvma_nmh.nmh_nmd.nmd_len >> PAGESHIFT; -+ -+ desc->dvma_rails[rail->Number] = NULL; -+ desc->dvma_railmask &= ~(1 << rail->Number); -+ -+ rail->Operations.DvmaRelease (rail, desc->dvma_nmh.nmh_nmd.nmd_addr, npages, desc->dvma_private[rail->Number]); -+ } -+ -+ list_for_each (el, &d->dvma_rails) { -+ EP_RAIL_ENTRY *tmp = list_entry (el, EP_RAIL_ENTRY, Link); -+ -+ if (tmp->Rail == rail) -+ { -+ list_del (el); -+ -+ KMEM_FREE (tmp, sizeof (EP_RAIL_ENTRY)); -+ break; -+ } -+ } -+ kmutex_unlock (&d->dvma_lock); -+} -+ -+EP_NMH * -+ep_dvma_reserve (EP_SYS *sys, unsigned npages, unsigned perm) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_DVMA_NMH *desc; -+ EP_ADDR addr; -+ struct list_head *el; -+ int i; -+ -+ KMEM_ZALLOC (desc, EP_DVMA_NMH *, offsetof (EP_DVMA_NMH, dvma_attrs[npages]), 1); -+ -+ if (desc == NULL) -+ return (NULL); -+ -+ if ((addr = ep_rmalloc (d->dvma_rmap, npages << PAGESHIFT, 0)) == 0) -+ { -+ -+ KMEM_FREE (desc, sizeof (EP_DVMA_NMH)); -+ return (NULL); -+ } -+ -+ spin_lock_init (&desc->dvma_lock); -+ -+ desc->dvma_perm = perm; -+ -+ kmutex_lock (&d->dvma_lock); -+ /* reserve the mapping resource */ -+ list_for_each (el, &d->dvma_rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ EPRINTF4 (DBG_KMAP, "%s: ep_dvma_reserve desc=%p npages=%d rail=%p\n", rail->Name, desc, npages, rail); -+ -+ if ((desc->dvma_private[rail->Number] = rail->Operations.DvmaReserve (rail, addr, npages)) == NULL) -+ { -+ printk ("%s: !!ep_dvma_reserve - rail->DvmaReserve failed\n", rail->Name); -+ goto failed; -+ } -+ -+ desc->dvma_rails[rail->Number] = rail; -+ desc->dvma_railmask |= (1 << rail->Number); -+ } -+ -+ /* insert into the network mapping handle table */ -+ desc->dvma_nmh.nmh_nmd.nmd_addr = addr; -+ desc->dvma_nmh.nmh_nmd.nmd_len = npages << PAGESHIFT; -+ desc->dvma_nmh.nmh_nmd.nmd_attr = EP_NMD_ATTR (sys->Position.pos_nodeid, 0); -+ desc->dvma_nmh.nmh_ops = &ep_dvma_nmh_ops; -+ -+ ep_nmh_insert (&sys->MappingTable, &desc->dvma_nmh); -+ -+ list_add (&desc->dvma_link, &d->dvma_handles); -+ -+ kmutex_unlock (&d->dvma_lock); -+ -+ return (&desc->dvma_nmh); -+ -+ failed: -+ -+ kmutex_unlock (&d->dvma_lock); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (desc->dvma_rails[i] != NULL) -+ desc->dvma_rails[i]->Operations.DvmaRelease (desc->dvma_rails[i], addr, npages, desc->dvma_private[i]); -+ -+ ep_rmfree (d->dvma_rmap, npages << PAGESHIFT, addr); -+ -+ KMEM_FREE (desc, sizeof (EP_DVMA_NMH)); -+ return (NULL); -+} -+ -+void -+ep_dvma_release (EP_SYS *sys, EP_NMH *nmh) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ EP_ADDR addr = nmh->nmh_nmd.nmd_addr; -+ int npages = nmh->nmh_nmd.nmd_len >> PAGESHIFT; -+ EP_RAIL *rail; -+ int i; -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ list_del (&desc->dvma_link); -+ -+ ep_nmh_remove (&sys->MappingTable, nmh); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if ((rail = desc->dvma_rails[i]) != NULL) -+ rail->Operations.DvmaRelease (rail, addr, npages, desc->dvma_private[i]); -+ -+ ep_rmfree (d->dvma_rmap, npages << PAGESHIFT, addr); -+ -+ KMEM_FREE (desc, offsetof (EP_DVMA_NMH, dvma_attrs[npages])); -+ -+ kmutex_unlock (&d->dvma_lock); -+} -+ -+void -+ep_dvma_load (EP_SYS *sys, void *map, caddr_t vaddr, unsigned len, EP_NMH *nmh, unsigned index, EP_RAILMASK *hints, EP_NMD *subset) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = (unsigned long) vaddr & PAGEOFFSET; -+ unsigned npages = btopr (len + offset); -+ EP_ADDR addr = nmh->nmh_nmd.nmd_addr + (index << PAGESHIFT); -+ int rmask = *hints; -+ EP_RAIL *rail; -+ register int i, rnum; -+ unsigned long flags; -+ -+ EPRINTF7 (DBG_KMAP, "ep_dvma_load: map=%p vaddr=%p len=%x nmh=%p(%x,%x) index=%d\n", -+ map, vaddr, len, nmh, nmh->nmh_nmd.nmd_addr, nmh->nmh_nmd.nmd_len, index); -+ -+ /* If no rail specified, then map into all rails */ -+ if (rmask == 0) -+ rmask = desc->dvma_railmask; -+ -+ ASSERT ((index + npages) <= (nmh->nmh_nmd.nmd_len >> PAGESHIFT)); -+ -+ /* If not map specified then use the kernel map */ -+ if (map == NULL) -+ map = kernel_map; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ /* Now map each of the specified pages (backwards) */ -+ -+ vaddr = (vaddr - offset) + (npages-1)*PAGESIZE; -+ for (i = npages-1; i >= 0; i--, vaddr -= PAGESIZE) -+ { -+ physaddr_t paddr = vaddr_to_phys (map, vaddr); -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ { -+ if (! (rmask & (1 << rnum)) || (rail = desc->dvma_rails[rnum]) == NULL) -+ rmask &= ~(1 << rnum); -+ else -+ { -+ rail->Operations.DvmaSetPte (rail, desc->dvma_private[rnum], index + i, paddr, desc->dvma_perm); -+ -+ desc->dvma_attrs[index + i] |= (1 << rnum); -+ } -+ } -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((rmask & (1 << rnum)) && (rail = desc->dvma_rails[rnum]) != NULL) -+ rail->TlbFlushRequired = 1; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ /* Construct the network mapping handle to be returned. */ -+ subset->nmd_addr = addr + offset; -+ subset->nmd_len = len; -+ subset->nmd_attr = EP_NMD_ATTR(sys->Position.pos_nodeid, rmask); -+} -+ -+void -+ep_dvma_unload (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ EP_RAIL *rail; -+ int rnum; -+ int rmask; -+ register int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ /* compute which rails we need to unload on */ -+ for (rmask = 0, i = 0; i < npages; i++) -+ { -+ rmask |= desc->dvma_attrs[index + i]; -+ -+ desc->dvma_attrs[index + i] = 0; -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((rmask & (1 << rnum)) && (rail = desc->dvma_rails[rnum]) != NULL) -+ rail->Operations.DvmaUnload (rail, desc->dvma_private[rnum], index, npages); -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+} -+ -+int -+ep_dvma_map_rails (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ int r, rnum; -+ register int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_map_rails: nmd=%08x.%08x.%08x mask=%04x\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, mask); -+ -+ if ((mask &= desc->dvma_railmask) == 0) -+ { -+ printk ("ep_dvma_map_rails: no intersecting rails %04x.%04x\n", mask, desc->dvma_railmask); -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ return (-1); -+ } -+ -+ for (i = npages-1; i >= 0; i--) -+ { -+ int pgidx = (index + i); -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (desc->dvma_attrs[pgidx] & (1 << rnum)) -+ break; -+ -+ if (rnum == EP_MAX_RAILS) -+ { -+ EPRINTF3 (DBG_KMAP, "ep_dvma_map_rails: nmh=%p idx=%x [%08x] not ptes valid\n", nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + ((pgidx) << PAGESHIFT)); -+ mask = 0; -+ } -+ else -+ { -+ EP_RAIL *rail = desc->dvma_rails[rnum]; -+ physaddr_t paddr = rail->Operations.DvmaReadPte (rail, desc->dvma_private[rnum], pgidx); -+ -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_map_rails: nmh=%p idx=%x [%08x] paddr %llx\n", rail->Name, nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), (long long) paddr); -+ -+ for (r = 0; r < EP_MAX_RAILS; r++) -+ { -+ if ((mask & (1 << r)) == 0) -+ continue; -+ -+ if ((desc->dvma_attrs[pgidx] & (1 << r)) == 0) -+ { -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_map_rails: nmh=%p idx=%x [%08x] paddr=%llx\n", -+ desc->dvma_rails[rnum]->Name, nmh, pgidx, nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), -+ (long long) paddr); -+ -+ rail->Operations.DvmaSetPte (rail, desc->dvma_private[rnum], pgidx, paddr, desc->dvma_perm); -+ -+ desc->dvma_attrs[pgidx] |= (1 << r); -+ } -+ } -+ } -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((mask & (1 << rnum)) != 0) -+ desc->dvma_rails[rnum]->TlbFlushRequired = 1; -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_map_rails: nmd=%08x.%08x.%08x|%04x\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, mask); -+ -+ /* Finally update the network memory descriptor */ -+ nmd->nmd_attr |= mask; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ return (0); -+} -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+#include -+ -+/* Generic rolling checksum algorithm */ -+uint16_t -+rolling_check_sum (char *msg, int nob, uint16_t sum) -+{ -+ while (nob-- > 0) -+ sum = sum * 13 + *msg++; -+ -+ return (sum); -+} -+ -+#if ! defined(NO_RMAP) -+void -+unmap_phys_address(unsigned long phys_addr) -+{ -+ unsigned long pfn = (phys_addr >> PAGE_SHIFT); -+ -+ if (pfn_valid(pfn)) -+ kunmap(pfn_to_page(pfn)); -+} -+ -+void * -+map_phys_address(unsigned long phys_addr) -+{ -+ unsigned long pfn = (phys_addr >> PAGE_SHIFT); -+ -+ if (pfn_valid(pfn)) -+ return kmap(pfn_to_page(pfn)); -+ -+ return NULL; -+} -+#else -+void -+unmap_phys_address(unsigned long phys_addr) -+{ -+ struct page *p = virt_to_page(__va(phys_addr)); -+ -+ if (VALID_PAGE(p)) -+ kunmap(p); -+} -+ -+void * -+map_phys_address(unsigned long phys_addr) -+{ -+ struct page *p = virt_to_page(__va(phys_addr)); -+ -+ if (VALID_PAGE(p)) -+ return kmap(p); -+ -+ return NULL; -+} -+#endif -+ -+uint16_t -+ep_dvma_calc_check_sum (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum) -+{ -+ /* cant be called from an interupt */ -+ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ unsigned start, len; -+ int rnum; -+ register int i; -+ unsigned long flags; -+ EP_RAIL *rail; -+ -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ EPRINTF3 (DBG_KMAP, "ep_dvma_calc_check_sum: nmd=%08x.%08x.%08x \n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ -+ /* find a rail */ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (desc->dvma_attrs[index] & (1 << rnum)) -+ break; -+ -+ ASSERT (rnum != EP_MAX_RAILS); -+ -+ rail = desc->dvma_rails[rnum]; -+ -+ for (i = 0; i <= (npages-1); i++) -+ { -+ int pgidx = (index + i); -+ physaddr_t paddr = rail->Operations.DvmaReadPte (rail, desc->dvma_private[rnum], pgidx); -+ void * virt; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); /* unlock for check sum calc */ -+ -+ virt = map_phys_address(paddr); -+ -+ if (!virt) -+ printk("ep_dvma_calc_check_sum: virt = NULL ! \n"); -+ else { -+ if ( i == 0 ) { -+ /* last bit of the first page */ -+ start = (nmd->nmd_addr & (PAGESIZE - 1)) ; -+ len = PAGESIZE - start; -+ if ( len > nmd->nmd_len) /* less than the remaining page */ -+ len = nmd->nmd_len; -+ } else { -+ if ( i != (npages-1)) { -+ /* all of the middle pages */ -+ start = 0; -+ len = PAGESIZE; -+ } else { -+ /* first bit of the last page */ -+ start = 0; -+ len = ((nmd->nmd_addr + nmd->nmd_len -1) & (PAGESIZE -1)) +1; -+ } -+ } -+ -+ check_sum = rolling_check_sum (((char *)virt)+start, len, check_sum); -+ unmap_phys_address(paddr); -+ -+ /* re aquire the lock */ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ } -+ -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_calc_check_sum: nmh=%p idx=%x [%08x] paddr %llx\n", rail->Name, nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), (long long) paddr); -+ } -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_calc_check_sum: nmd=%08x.%08x.%08x = %d\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, check_sum); -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ return (check_sum); -+} -+#endif -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kmap_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kmap_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kmap_elan3.c 2005-06-01 23:12:54.670428312 -0400 -@@ -0,0 +1,209 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap_elan3.c,v 1.3.8.1 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "kcomm_elan3.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+#define ELAN3_PTES_PER_PAGE (PAGESIZE/ELAN3_PAGE_SIZE) -+ -+#if defined(__LITTLE_ENDIAN__) -+#define PERM_ENDIAN 0 -+#else -+#define PERM_ENDIAN ELAN3_PTE_BIG_ENDIAN -+#endif -+ -+static unsigned int main_permtable[] = -+{ -+ ELAN3_PERM_REMOTEALL, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int sdram_permtable[] = -+{ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEALL, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int io_permtable[] = -+{ -+ ELAN3_PERM_LOCAL_READ, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_ALL */ -+}; -+ -+void -+ep3_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned len, unsigned int perm, int ep_attr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (kaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) kaddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr + off, paddr + off, -+ main_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC | ((ep_attr & EP_NO_SLEEP) ? PTE_NO_SLEEP : 0)); -+ -+ eaddr += PAGESIZE; -+ kaddr += PAGESIZE; -+ } -+} -+ -+void -+ep3_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned len, unsigned int perm, int ep_attr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (saddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = elan3_sdram_to_phys (rail->Device, saddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr+off, paddr+off, -+ sdram_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC | ((ep_attr & EP_NO_SLEEP) ? PTE_NO_SLEEP : 0) ); -+ -+ eaddr += PAGESIZE; -+ saddr += PAGESIZE; -+ } -+} -+ -+void -+ep3_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned len, unsigned int perm) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (ioaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) ioaddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr + off, paddr + off, -+ io_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC); -+ -+ eaddr += PAGESIZE; -+ ioaddr += PAGESIZE; -+ } -+} -+void -+ep3_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned len) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ elan3mmu_unload (rail->Elan3mmu, eaddr, len, PTE_UNLOAD_UNLOCK | PTE_UNLOAD_NOSYNC); -+} -+ -+void * -+ep3_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned npages) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ void *private; -+ -+ KMEM_ALLOC (private, void *, npages * ELAN3_PTES_PER_PAGE * sizeof (sdramaddr_t), 1); -+ -+ if (private == NULL) -+ return NULL; -+ -+ elan3mmu_reserve (rail->Elan3mmu, eaddr, npages * ELAN3_PTES_PER_PAGE, (sdramaddr_t *) private); -+ -+ return private; -+} -+ -+void -+ep3_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned npages, void *private) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ elan3mmu_release (rail->Elan3mmu, eaddr, npages * ELAN3_PTES_PER_PAGE, (sdramaddr_t *) private); -+ -+ KMEM_FREE (private, npages * ELAN3_PTES_PER_PAGE * sizeof (sdramaddr_t)); -+} -+ -+void -+ep3_dvma_set_pte (EP_RAIL *r, void *private, unsigned index, physaddr_t paddr, unsigned int perm) -+{ -+ ELAN3_DEV *dev = ((EP3_RAIL *) r)->Device; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ int off; -+ -+ for (off =0 ; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ { -+ ELAN3_PTE newpte = elan3mmu_phys_to_pte (dev, paddr + off, main_permtable[perm]) | ELAN3_PTE_REF | ELAN3_PTE_MOD; -+ -+ elan3_writepte (dev, *ptep, newpte); -+ -+ ptep++; -+ } -+} -+ -+physaddr_t -+ep3_dvma_read_pte (EP_RAIL *r, void *private, unsigned index) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ ELAN3_PTE pte = elan3_readpte (rail->Device, *ptep); -+ -+ return pte & ELAN3_PTE_PFN_MASK; -+} -+ -+void -+ep3_dvma_unload (EP_RAIL *r, void *private, unsigned index, unsigned npages) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ ELAN3_PTE tpte = elan3mmu_kernel_invalid_pte (rail->Elan3mmu); -+ int i; -+ -+ for (i = (npages * ELAN3_PTES_PER_PAGE) - 1; i >= 0; i--) -+ elan3_writepte (rail->Device, ptep[i], tpte); -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/kmap_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kmap_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kmap_elan4.c 2005-06-01 23:12:54.670428312 -0400 -@@ -0,0 +1,226 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap_elan4.c,v 1.7.8.2 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+#include "kcomm_elan4.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+static unsigned int main_permtable[] = -+{ -+ PERM_Unused, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_DataReadWrite, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int sdram_permtable[] = -+{ -+ PERM_LocExecute, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_RemoteAll, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int io_permtable[] = -+{ -+ PERM_Unused, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_Unused, /* EP_PERM_ALL */ -+}; -+ -+void -+ep4_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (kaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) kaddr); -+ -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, main_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ kaddr += PAGESIZE; -+ } -+} -+ -+void -+ep4_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (saddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ if ((eaddr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (saddr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT))) -+ printk ("ep4_sdram_map: eaddr=%x saddr=%lx - incorrectly alised\n", eaddr, saddr); -+ -+ for (i = 0; i < npages; i++) -+ { -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = ((saddr + off) >> PTE_PADDR_SHIFT) | PTE_SetPerm (sdram_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ saddr += PAGESIZE; -+ } -+} -+ -+void -+ep4_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned int len, unsigned int perm) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (ioaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) ioaddr); -+ -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, io_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ ioaddr += PAGESIZE; -+ } -+} -+void -+ep4_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ elan4mmu_unload_range (&rail->r_ctxt, 0, eaddr, len); -+} -+ -+void * -+ep4_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_reserve: eaddr=%x npages=%d (=> %d)\n", eaddr, npages, (npages << (PAGE_SHIFT - dev->dev_pageshift[0]))); -+ -+ return elan4mmu_reserve (&rail->r_ctxt, 0, (E4_Addr) eaddr, (npages << (PAGE_SHIFT - dev->dev_pageshift[0])), 0); -+} -+ -+void -+ep4_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_release: eaddr=%x npages=%d private=%p\n", eaddr, npages, private); -+ -+ elan4mmu_release (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private); -+} -+ -+void -+ep4_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int off; -+ unsigned long flags; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_set_pte: index %x -> eaddr %llx paddr %llx\n", -+ index, ((ELAN4_HASH_CACHE *) private)->hc_start + (index * PAGE_SIZE), (long long) paddr); -+ -+ local_irq_save (flags); -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, main_permtable[perm]); -+ -+ elan4mmu_set_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, (index << (PAGE_SHIFT - dev->dev_pageshift[0])) + -+ (off >> dev->dev_pageshift[0]), newpte); -+ } -+ local_irq_restore (flags); -+} -+ -+physaddr_t -+ep4_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_uint64 pte; -+ unsigned long flags; -+ -+ local_irq_save (flags); -+ pte = elan4mmu_get_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, index << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ local_irq_restore (flags); -+ -+ return elan4mmu_pte2phys (dev, pte); -+} -+ -+void -+ep4_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP_ADDR eaddr = ((ELAN4_HASH_CACHE *) private)->hc_start + (index * PAGE_SIZE); -+ unsigned long idx = (index << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ unsigned long lim = idx + (npages << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ unsigned long flags; -+ -+ EPRINTF5 (DBG_KMAP, "ep4_dvma_unload: eaddr %x -> %lx : index=%d idx=%ld lim=%ld\n", -+ eaddr, (unsigned long)(eaddr + (npages * PAGE_SIZE)), index, idx, lim); -+ -+ local_irq_save (flags); -+ for (; idx < lim; idx++) -+ elan4mmu_clear_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, idx); -+ local_irq_restore (flags); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kmsg_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kmsg_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kmsg_elan3.c 2005-06-01 23:12:54.671428160 -0400 -@@ -0,0 +1,345 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmsg_elan3.c,v 1.3.8.1 2004/09/30 09:52:37 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+static void -+ep3_inputq_event (EP3_RAIL *rail, void *arg) -+{ -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) arg; -+ -+ (*inputq->q_callback)((EP_RAIL *)rail, inputq->q_arg); -+} -+ -+static EP3_COOKIE_OPS ep3_inputq_cookie_ops = -+{ -+ ep3_inputq_event, -+}; -+ -+EP_INPUTQ * -+ep3_alloc_inputq (EP_RAIL *r, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq; -+ EP3_InputQueue qdesc; -+ void *slots; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (inputq, EP3_INPUTQ *, sizeof (EP3_INPUTQ), TRUE); -+ -+ if (inputq == NULL) -+ return (EP_INPUTQ *) NULL; -+ -+ if ((slots = ep_alloc_main (&rail->Generic, slotSize * slotCount, 0, &inputq->q_slotsAddr)) == NULL) -+ { -+ KMEM_FREE (inputq, sizeof (EP3_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ inputq->q_slotSize = slotSize; -+ inputq->q_slotCount = slotCount; -+ inputq->q_callback = callback; -+ inputq->q_arg = arg; -+ inputq->q_slots = slots; -+ -+ /* Initialise all the slots to be "unreceived" */ -+ for (i = 0; i < slotCount; i++) -+ ((uint32_t *) ((unsigned long) slots + (i+1) * slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ inputq->q_base = inputq->q_slotsAddr; -+ inputq->q_top = inputq->q_base + (slotCount-1) * slotSize; -+ inputq->q_fptr = inputq->q_base; -+ inputq->q_desc = EP_SYSTEMQ_DESC(rail->QueueDescs, qnum); -+ inputq->q_descAddr = EP_SYSTEMQ_ADDR (qnum); -+ -+ if (callback) -+ RegisterCookie (&rail->CookieTable, &inputq->q_cookie, inputq->q_descAddr, &ep3_inputq_cookie_ops, inputq); -+ -+ /* Initialise the input queue descriptor */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_bptr = inputq->q_base + slotSize; -+ qdesc.q_fptr = inputq->q_fptr; -+ qdesc.q_base = inputq->q_base; -+ qdesc.q_top = inputq->q_top; -+ qdesc.q_size = slotSize; -+ qdesc.q_event.ev_Count = 1; -+ qdesc.q_event.ev_Type = callback ? EV_TYPE_EVIRQ | inputq->q_cookie.Cookie : 0; -+ qdesc.q_wevent = inputq->q_descAddr + offsetof (EP3_InputQueue, q_event); -+ qdesc.q_wcount = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, inputq->q_desc, sizeof (EP3_InputQueue)); -+ -+ return (EP_INPUTQ *) inputq; -+} -+ -+void -+ep3_free_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ -+ ep_free_main (&rail->Generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ if (inputq->q_callback) -+ DeregisterCookie (&rail->CookieTable, &inputq->q_cookie); -+ -+ KMEM_FREE (inputq, sizeof (EP3_INPUTQ)); -+} -+ -+void -+ep3_enable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ -+ elan3_sdram_writel (rail->Device, inputq->q_desc + offsetof (EP3_InputQueue, q_state), 0); -+} -+ -+void -+ep3_disable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ EP3_InputQueue qdesc; -+ -+ /* mark the queue as locked */ -+ SetQueueLocked (rail, inputq->q_desc); -+ -+ /* re-initialise the queue as empty */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_bptr = (E3_Addr) inputq->q_base + inputq->q_slotSize; -+ qdesc.q_fptr = inputq->q_fptr; -+ qdesc.q_base = inputq->q_base; -+ qdesc.q_top = inputq->q_top; -+ qdesc.q_size = inputq->q_slotSize; -+ qdesc.q_event.ev_Count = 1; -+ qdesc.q_event.ev_Type = inputq->q_callback ? EV_TYPE_EVIRQ | inputq->q_cookie.Cookie : 0; -+ qdesc.q_wevent = inputq->q_descAddr + offsetof (EP3_InputQueue, q_event); -+ qdesc.q_wcount = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, inputq->q_desc, sizeof (EP3_InputQueue)); -+} -+ -+int -+ep3_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ sdramaddr_t qdesc = inputq->q_desc; -+ E3_Addr nfptr; -+ int count = 0; -+ E3_uint32 state; -+ int delay; -+ -+ run_again_because_of_eventqueue_overflow: -+ nfptr = inputq->q_fptr + inputq->q_slotSize; -+ if (nfptr > inputq->q_top) -+ nfptr = inputq->q_base; -+ -+ while (nfptr != elan3_sdram_readl (rail->Device, qdesc + offsetof (EP3_InputQueue, q_bptr))) /* PCI read */ -+ { -+ unsigned long slot = (unsigned long) inputq->q_slots + (nfptr - inputq->q_base); -+ -+ /* Poll the final word of the message until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; ((uint32_t *) (slot + inputq->q_slotSize))[-1] == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ /* Call the message handler */ -+ (*handler) (r, arg, (void *) slot); -+ -+ state = elan3_sdram_readl (rail->Device, qdesc + offsetof (EP3_InputQueue, q_state)); /* PCI read */ -+ if ((state & E3_QUEUE_FULL) == 0) -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_fptr), nfptr); /* PCI write */ -+ else -+ { -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_fptr), nfptr); /* PCI write */ -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_state), (state & ~E3_QUEUE_FULL)); /* PCI write */ -+ } -+ inputq->q_fptr = nfptr; -+ -+ nfptr += roundup (inputq->q_slotSize, E3_BLK_ALIGN); -+ if (nfptr > inputq->q_top) -+ nfptr = inputq->q_base; -+ -+ if (++count >= maxCount && maxCount) -+ break; -+ } -+ -+ if (inputq->q_callback && count != 0) -+ { -+ if (count != inputq->q_waitCount) -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_wcount), inputq->q_waitCount = count); -+ -+ if (IssueWaitevent (rail, inputq->q_descAddr + offsetof (EP3_InputQueue, q_wevent)) == ISSUE_COMMAND_TRAPPED) -+ goto run_again_because_of_eventqueue_overflow; -+ } -+ -+ return count; -+} -+ -+#define Q_EVENT(q,slotNum) ((q)->q_elan + (slotNum) * sizeof (E3_BlockCopyEvent)) -+#define Q_EVENT_ADDR(q,slotNum) ((q)->q_elanAddr + (slotNum) * sizeof (E3_BlockCopyEvent)) -+#define Q_MSG(q,slotNum) (void *)((q)->q_main + (slotNum) * (q)->q_slotSize) -+#define Q_MSG_ADDR(q,slotNum) ((q)->q_mainAddr + (slotNum) * (q)->q_slotSize) -+#define Q_DONE(q,slotNum) (*((int *)((q)->q_main + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E3_uint32)))) -+#define Q_DONE_ADDR(q,slotNum) ((q)->q_mainAddr + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E3_uint32)) -+ -+#define Q_ELAN_SIZE(q) ((q)->q_slotCount * sizeof (E3_BlockCopyEvent)) -+#define Q_MAIN_SIZE(q) ((q)->q_slotCount * ((q)->q_slotSize + sizeof (E3_uint32))) -+ -+static void -+ep3_outputq_retry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int error) -+{ -+ E3_DMA_BE *dmabe = (E3_DMA_BE *) dma; -+ sdramaddr_t event = ep_elan2sdram (&rail->Generic, dmabe->s.dma_srcEvent); -+ E3_Addr done = elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Dest)); -+ E3_uint32 *donep = ep_elan2main (&rail->Generic, done & ~EV_BCOPY_DTYPE_MASK); -+ -+ EPRINTF1 (DBG_KMSG, "ep3_ouputq_retry: donep at %p -> FAILED\n", donep); -+ -+ *donep = EP3_EVENT_FAILED; -+} -+ -+static EP3_COOKIE_OPS ep3_outputq_cookie_ops = -+{ -+ NULL, /* Event */ -+ ep3_outputq_retry, -+ NULL, /* DmaCancelled */ -+ NULL, /* DmaVerify */ -+}; -+ -+EP_OUTPUTQ * -+ep3_alloc_outputq (EP_RAIL *r, unsigned slotSize, unsigned slotCount) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq; -+ int i; -+ E3_BlockCopyEvent event; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (outputq, EP3_OUTPUTQ *, sizeof (EP3_OUTPUTQ), 1); -+ -+ if (outputq == NULL) -+ return NULL; -+ -+ outputq->q_slotCount = slotCount; -+ outputq->q_slotSize = slotSize; -+ -+ outputq->q_elan = ep_alloc_elan (r, Q_ELAN_SIZE(outputq), 0, &outputq->q_elanAddr); -+ -+ if (outputq->q_elan == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+ return NULL; -+ } -+ -+ outputq->q_main = ep_alloc_main (r, Q_MAIN_SIZE(outputq), 0, &outputq->q_mainAddr); -+ -+ if (outputq->q_main == (void *) NULL) -+ { -+ ep_free_elan (r, outputq->q_elanAddr, Q_ELAN_SIZE(outputq)); -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+ return NULL; -+ } -+ -+ RegisterCookie (&rail->CookieTable, &outputq->q_cookie, outputq->q_elanAddr, &ep3_outputq_cookie_ops, outputq); -+ -+ for (i = 0; i < slotCount; i++) -+ { -+ EP3_INIT_COPY_EVENT (event, outputq->q_cookie, Q_DONE_ADDR(outputq, i), 0); -+ -+ Q_DONE(outputq, i) = outputq->q_cookie.Cookie; -+ -+ elan3_sdram_copyl_to_sdram (rail->Device, &event, Q_EVENT(outputq, i), sizeof (E3_BlockCopyEvent)); -+ } -+ -+ return (EP_OUTPUTQ *) outputq; -+} -+ -+void -+ep3_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq = (EP3_OUTPUTQ *) q; -+ -+ DeregisterCookie (&rail->CookieTable, &outputq->q_cookie); -+ -+ ep_free_main (r, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ ep_free_elan (r, outputq->q_elanAddr, Q_ELAN_SIZE(outputq)); -+ -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+} -+ -+void * -+ep3_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ return Q_MSG ((EP3_OUTPUTQ *) q, slotNum); -+} -+ -+int -+ep3_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ switch (Q_DONE((EP3_OUTPUTQ *) q, slotNum)) -+ { -+ case EP3_EVENT_ACTIVE: -+ return EP_OUTPUTQ_BUSY; -+ -+ case EP3_EVENT_FAILED: -+ return EP_OUTPUTQ_FAILED; -+ -+ default: -+ return EP_OUTPUTQ_FINISHED; -+ } -+} -+ -+int -+ep3_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq = (EP3_OUTPUTQ *) q; -+ unsigned base = outputq->q_slotSize - roundup (size, E3_BLK_ALIGN); -+ E3_DMA_BE dmabe; -+ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_QUEUED, retries); -+ dmabe.s.dma_size = roundup (size, E3_BLK_ALIGN); -+ dmabe.s.dma_source = Q_MSG_ADDR(outputq, slotNum) + base; -+ dmabe.s.dma_dest = base; -+ dmabe.s.dma_destEvent = EP_SYSTEMQ_ADDR(qnum); -+ dmabe.s.dma_destCookieVProc = vp; -+ dmabe.s.dma_srcEvent = Q_EVENT_ADDR(outputq, slotNum); -+ dmabe.s.dma_srcCookieVProc = 0; -+ -+ Q_DONE(outputq, slotNum) = EP3_EVENT_ACTIVE; -+ -+ elan3_sdram_writel (rail->Device, Q_EVENT(outputq, slotNum), 1); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_CRITICAL, FALSE) != ISSUE_COMMAND_OK) -+ { -+ Q_DONE(outputq, slotNum) = EP3_EVENT_FAILED; -+ return FALSE; -+ } -+ -+ return TRUE; -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/kmsg_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kmsg_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kmsg_elan4.c 2005-06-01 23:12:54.672428008 -0400 -@@ -0,0 +1,416 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmsg_elan4.c,v 1.8.6.1 2004/09/30 09:52:37 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+ -+#include -+ -+static void -+ep4_inputq_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) arg; -+ -+ /* mark the queue as "fired" to cause a single waitevent -+ * to be issued next time the queue is polled */ -+ atomic_inc (&inputq->q_fired); -+ -+ (*inputq->q_callback)(&rail->r_generic, inputq->q_arg); -+} -+ -+EP_INPUTQ * -+ep4_alloc_inputq (EP_RAIL *r, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq; -+ E4_Event32 qevent; -+ void *slots; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (inputq, EP4_INPUTQ *, sizeof (EP4_INPUTQ), 1); -+ -+ if (inputq == NULL) -+ return (EP_INPUTQ *) NULL; -+ -+ if ((slots = ep_alloc_main (&rail->r_generic, slotSize * slotCount, 0, &inputq->q_slotsAddr)) == NULL) -+ { -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ inputq->q_slotSize = slotSize; -+ inputq->q_slotCount = slotCount; -+ inputq->q_callback = callback; -+ inputq->q_arg = arg; -+ inputq->q_slots = slots; -+ -+ /* Initialise all the slots to be "unreceived" */ -+ for (i = 0; i < slotCount; i++) -+ ((uint32_t *) ((unsigned long) slots + (i+1) * slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ inputq->q_last = inputq->q_slotsAddr + (slotCount-1) * slotSize; -+ inputq->q_fptr = inputq->q_slotsAddr; -+ inputq->q_desc = EP_SYSTEMQ_DESC (rail->r_queuedescs, qnum); -+ inputq->q_descAddr = EP_SYSTEMQ_ADDR (qnum); -+ inputq->q_eventAddr = rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_qevents[qnum]); -+ -+ if (callback) -+ { -+ if ((inputq->q_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1)) == 0) -+ { -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ if ((inputq->q_wcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4)) == 0) -+ { -+ ep4_put_ecq (rail, inputq->q_ecq, 1); -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ ep4_register_intcookie (rail, &inputq->q_intcookie, inputq->q_descAddr, ep4_inputq_interrupt, inputq); -+ -+ inputq->q_count = 0; -+ -+ atomic_set (&inputq->q_fired, 0); -+ -+ /* Initialise the queue event */ -+ qevent.ev_CountAndType = E4_EVENT_INIT_VALUE (callback ? -32 : 0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0); -+ qevent.ev_WritePtr = inputq->q_ecq->ecq_addr; -+ qevent.ev_WriteValue = (inputq->q_intcookie.int_val << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD; -+ } -+ -+ /* copy the event down to sdram */ -+ elan4_sdram_copyq_to_sdram (rail->r_ctxt.ctxt_dev, &qevent, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_qevents[qnum]), sizeof (E4_Event32)); -+ -+ return (EP_INPUTQ *) inputq; -+} -+ -+void -+ep4_free_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ if (inputq->q_callback) -+ { -+ ep4_deregister_intcookie (rail, &inputq->q_intcookie); -+ ep4_put_ecq (rail, inputq->q_ecq, 1); -+ ep4_put_ecq (rail, inputq->q_wcq, 4); -+ } -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+} -+ -+void -+ep4_enable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ EP_ADDR lastSlot = inputq->q_slotsAddr + (inputq->q_slotCount-1) * inputq->q_slotSize; -+ E4_InputQueue qdesc; -+ -+ qdesc.q_bptr = inputq->q_slotsAddr; -+ qdesc.q_fptr = inputq->q_slotsAddr; -+ qdesc.q_control = E4_InputQueueControl (inputq->q_slotsAddr, lastSlot, inputq->q_slotSize); -+ qdesc.q_event = inputq->q_callback ? inputq->q_eventAddr : 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ ep4_write_qdesc (rail, inputq->q_desc, &qdesc); -+ -+ EPRINTF5 (DBG_KMSG, "ep_enable_inputq: %x - %016llx %016llx %016llx %016llx\n", (int) inputq->q_descAddr, -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 0), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 8), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 16), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 24)); -+} -+ -+void -+ep4_disable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ E4_InputQueue qdesc; -+ -+ /* Initialise the input queue descriptor as "full" with no event */ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl(qdesc.q_bptr, qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ ep4_write_qdesc (rail, inputq->q_desc, &qdesc); -+} -+ -+int -+ep4_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ sdramaddr_t qdesc = inputq->q_desc; -+ E4_Addr fptr = inputq->q_fptr; -+ E4_Addr bptr = elan4_sdram_readl (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ int count = 0; -+ int delay; -+ -+ while (bptr != 0 && fptr != bptr) -+ { -+ while (fptr != bptr) -+ { -+ unsigned long slot = (unsigned long) inputq->q_slots + (fptr - inputq->q_slotsAddr); -+ -+ /* Poll the final word of the message until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; ((uint32_t *) (slot + inputq->q_slotSize))[-1] == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ EPRINTF4(DBG_KMSG, "ep4_poll_inputq: %x slot %d of %d [%08x]\n", (int)inputq->q_descAddr, -+ ((int)(fptr - inputq->q_slotsAddr))/inputq->q_slotSize, -+ inputq->q_slotCount, ((uint32_t *) (slot + inputq->q_slotSize))[-1]); -+ -+ /* Call the message handler */ -+ (*handler) (r, arg, (void *) slot); -+ -+ /* reset the last word of the slot to "unreceived" */ -+ ((uint32_t *) (slot + inputq->q_slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ /* move on the front pointer */ -+ fptr = (fptr == inputq->q_last) ? inputq->q_slotsAddr : fptr + inputq->q_slotSize; -+ -+ elan4_sdram_writel (dev, qdesc + offsetof (E4_InputQueue, q_fptr), fptr); -+ -+ inputq->q_count++; -+ -+ if (++count >= maxCount && maxCount) -+ { -+ inputq->q_fptr = fptr; -+ -+ return count; -+ } -+ } -+ -+ bptr = elan4_sdram_readl (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ } -+ -+ inputq->q_fptr = fptr; -+ -+ /* Only insert a single wait event command if the callback has -+ * occured, otherwise just acrue the count as we've just periodically -+ * polled it. -+ */ -+ if (inputq->q_callback && atomic_read (&inputq->q_fired)) -+ { -+ atomic_dec (&inputq->q_fired); -+ -+ ep4_wait_event_cmd (inputq->q_wcq, inputq->q_eventAddr, -+ E4_EVENT_INIT_VALUE (-inputq->q_count << 5, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), -+ inputq->q_ecq->ecq_addr, -+ (inputq->q_intcookie.int_val << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD); -+ -+ inputq->q_count = 0; -+ } -+ -+ return count; -+} -+ -+#define Q_MSG(q,slotNum) (unsigned long)((q)->q_main + (slotNum) * (q)->q_slotSize) -+#define Q_MSG_ADDR(q,slotNum) ((q)->q_mainAddr + (slotNum) * (q)->q_slotSize) -+#define Q_DONE(q,slotNum) *((E4_uint64 *)((q)->q_main + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E4_uint64))) -+#define Q_DONE_ADDR(q,slotNum) ((q)->q_mainAddr + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E4_uint64)) -+ -+#define Q_MAIN_SIZE(q) ((q)->q_slotCount * ((q)->q_slotSize + sizeof (E4_uint64))) -+ -+#define Q_DONE_VAL(val,cnt) ((cnt) << 16 | (val)) -+#define Q_DONE_RET(done) ((int) ((done) & 0xffff)) -+#define Q_DONE_CNT(done) ((int) ((done) >> 16)) -+ -+EP_OUTPUTQ * -+ep4_alloc_outputq (EP_RAIL *r, unsigned slotSize, unsigned slotCount) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_OUTPUTQ *outputq; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (outputq, EP4_OUTPUTQ *, sizeof (EP4_OUTPUTQ), 1); -+ -+ if (outputq == NULL) -+ return NULL; -+ -+ spin_lock_init (&outputq->q_lock); -+ -+ outputq->q_slotCount = slotCount; -+ outputq->q_slotSize = slotSize; -+ outputq->q_main = ep_alloc_main (r, Q_MAIN_SIZE(outputq), 0, &outputq->q_mainAddr); -+ -+ if (outputq->q_main == (E4_uint64 *) NULL) -+ { -+ KMEM_FREE (outputq, sizeof (EP_OUTPUTQ)); -+ return NULL; -+ } -+ -+ outputq->q_cq = elan4_alloccq (&rail->r_ctxt, CQ_Size64K, CQ_STENEnableBit | CQ_WriteEnableBit, CQ_Priority); -+ -+ if (outputq->q_cq == (ELAN4_CQ *) NULL) -+ { -+ ep_free_main (&rail->r_generic, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ -+ KMEM_FREE (outputq, sizeof (EP_OUTPUTQ)); -+ } -+ -+ outputq->q_dwords = CQ_Size (outputq->q_cq->cq_size) >> 3; -+ -+ /* mark all the queue slots as finished */ -+ for (i = 0; i < slotCount; i++) -+ Q_DONE(outputq, i) = Q_DONE_VAL (EP_OUTPUTQ_FINISHED, 0); -+ -+ return (EP_OUTPUTQ *) outputq; -+} -+ -+void -+ep4_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_OUTPUTQ *outputq = (EP4_OUTPUTQ *) q; -+ -+ elan4_freecq (&rail->r_ctxt, outputq->q_cq); -+ -+ ep_free_main (&rail->r_generic, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ -+ spin_lock_destroy (&outputq->q_lock); -+ -+ KMEM_FREE (outputq, sizeof (EP4_OUTPUTQ)); -+} -+ -+void * -+ep4_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ return (void *) Q_MSG ((EP4_OUTPUTQ *) q, slotNum); -+} -+ -+int -+ep4_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ EPRINTF2 (DBG_KMSG, "ep4_outputq_state: slotNum %d state %x\n", slotNum, (int)Q_DONE((EP4_OUTPUTQ *) q, slotNum)); -+ -+ return Q_DONE_RET(Q_DONE((EP4_OUTPUTQ *)q, slotNum)); -+} -+ -+int -+ep4_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries) -+{ -+ EP4_OUTPUTQ *outputq = (EP4_OUTPUTQ *) q; -+ unsigned int nbytes = roundup (size, 32); -+ unsigned int base = outputq->q_slotSize - nbytes; -+ unsigned int i, dwords; -+ unsigned long flags; -+ E4_uint64 val; -+ -+ spin_lock_irqsave (&outputq->q_lock, flags); -+ -+ EPRINTF4 (DBG_KMSG, "ep4_outputq_send: slotNum=%d size=%d vp=%d qnum=%d\n", slotNum, size, vp, qnum); -+ -+ /* compute command queue size as follows - each slot uses -+ * overhead: 14 dwords + -+ * data > 128 ? 36 dwords -+ * data > 64 ? 18 dwords -+ * data > 32 ? 10 dwords -+ * else 6 dwords -+ */ -+ dwords = 14 + (size > 128 ? 36 : -+ size > 64 ? 18 : -+ size ? 10 : 6); -+ -+ outputq->q_dwords += Q_DONE_CNT (Q_DONE(outputq, slotNum)); -+ -+ if (dwords > outputq->q_dwords) -+ { -+ /* attempt to reclaim command queue space from other slots */ -+ i = slotNum; -+ do { -+ if (++i == outputq->q_slotCount) -+ i = 0; -+ -+ val = Q_DONE(outputq, i); -+ -+ if ((Q_DONE_RET (val) == EP_OUTPUTQ_FINISHED || Q_DONE_RET (val) == EP_OUTPUTQ_FAILED) && Q_DONE_CNT(val) > 0) -+ { -+ outputq->q_dwords += Q_DONE_CNT (val); -+ -+ Q_DONE(outputq, i) = Q_DONE_VAL(Q_DONE_RET(val), 0); -+ } -+ } while (i != slotNum && dwords > outputq->q_dwords); -+ } -+ -+ if (dwords > outputq->q_dwords) -+ { -+ spin_unlock_irqrestore (&outputq->q_lock, flags); -+ -+ EPRINTF0 (DBG_KMSG, "ep4_outputq_state: no command queue space\n"); -+ return 0; -+ } -+ -+ outputq->q_dwords -= dwords; -+ -+ Q_DONE(outputq, slotNum) = Q_DONE_VAL (EP_OUTPUTQ_BUSY, dwords); -+ -+ if (outputq->q_retries != retries) -+ { -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL(1) | GUARD_RESET(outputq->q_retries = retries)); -+ elan4_nop_cmd (outputq->q_cq, 0); -+ } -+ -+ /* transfer the top "size" bytes from message buffer to top of input queue */ -+ elan4_open_packet (outputq->q_cq, OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, vp)); -+ elan4_sendtrans0 (outputq->q_cq, TR_INPUT_Q_GETINDEX, EP_SYSTEMQ_ADDR(qnum)); -+ -+ /* send upto EP_SYSTEMQ_MSG_MAX (256) bytes of message to the top of the slot */ -+ if (size > 128) -+ { -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base + 0, (void *) (Q_MSG(outputq, slotNum) + base + 0)); -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base + 128, (void *) (Q_MSG(outputq, slotNum) + base + 128)); -+ } -+ else if (size > 64) -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ else if (size > 32) -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (64 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ else -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (32 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ elan4_sendtrans1 (outputq->q_cq, TR_INPUT_Q_COMMIT, EP_SYSTEMQ_ADDR(qnum), 0 /* no cookie */); -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (outputq->q_retries)); -+ elan4_write_dword_cmd (outputq->q_cq, Q_DONE_ADDR(outputq, slotNum), Q_DONE_VAL (EP_OUTPUTQ_FINISHED, dwords)); -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (outputq->q_retries)); -+ elan4_write_dword_cmd (outputq->q_cq, Q_DONE_ADDR(outputq, slotNum), Q_DONE_VAL (EP_OUTPUTQ_FAILED, dwords)); -+ -+ spin_unlock_irqrestore (&outputq->q_lock, flags); -+ -+ return 1; -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/kthread.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kthread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kthread.c 2005-06-01 23:12:54.672428008 -0400 -@@ -0,0 +1,186 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kthread.c,v 1.5 2004/05/19 08:54:57 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.c,v $*/ -+ -+#include -+ -+#include -+ -+void -+ep_kthread_init (EP_KTHREAD *kt) -+{ -+ spin_lock_init (&kt->lock); -+ kcondvar_init (&kt->wait); -+ -+ kt->next_run = 0; -+ kt->should_stall = 0; -+ kt->started = 0; -+ kt->should_stop = 0; -+ kt->stopped = 0; -+ kt->state = KT_STATE_RUNNING; -+} -+ -+void -+ep_kthread_destroy (EP_KTHREAD *kt) -+{ -+ spin_lock_destroy (&kt->lock); -+ kcondvar_destroy (&kt->wait); -+} -+ -+void -+ep_kthread_started (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->started = 1; -+ spin_unlock_irqrestore(&kt->lock, flags); -+} -+ -+void -+ep_kthread_stopped (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->stopped = 1; -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ spin_unlock_irqrestore(&kt->lock, flags); -+} -+ -+int -+ep_kthread_should_stall (EP_KTHREAD *kth) -+{ -+ return (kth->should_stall); -+} -+ -+int -+ep_kthread_sleep (EP_KTHREAD *kt, long next_run) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (next_run && (kt->next_run == 0 || BEFORE (next_run, kt->next_run))) -+ kt->next_run = next_run; -+ -+ if (kt->should_stop) -+ { -+ spin_unlock_irqrestore (&kt->lock, flags); -+ return (-1); -+ } -+ -+ do { -+ if (kt->should_stall) -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ -+ kt->state = KT_STATE_SLEEPING; -+ kt->running = 0; -+ if (kt->should_stall || kt->next_run == 0) -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ else -+ kcondvar_timedwait (&kt->wait,&kt->lock, &flags, kt->next_run); -+ kt->state = KT_STATE_RUNNING; -+ kt->running = lbolt; -+ } while (kt->should_stall); -+ kt->next_run = 0; -+ spin_unlock_irqrestore (&kt->lock, flags); -+ -+ return (0); -+} -+ -+void -+ep_kthread_schedule (EP_KTHREAD *kt, long tick) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (kt->next_run == 0 || BEFORE (tick, kt->next_run)) -+ { -+ kt->next_run = tick; -+ if (!kt->should_stall && kt->state == KT_STATE_SLEEPING) -+ { -+ kt->state = KT_STATE_SCHEDULED; -+ kcondvar_wakeupone (&kt->wait, &kt->lock); -+ } -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_stall (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (kt->should_stall++ == 0) -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ -+ while (kt->state != KT_STATE_SLEEPING) -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_resume (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (--kt->should_stall == 0) -+ { -+ kt->state = KT_STATE_SCHEDULED; -+ kcondvar_wakeupone (&kt->wait, &kt->lock); -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_stop (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->should_stop = 1; -+ while (kt->started && !kt->stopped) -+ { -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+int -+ep_kthread_state (EP_KTHREAD *kt, long *time) -+{ -+ unsigned long flags; -+ int res = KT_STATE_SLEEPING; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ -+ if (kt->next_run) { -+ *time = kt->next_run; -+ res = kt->should_stall ? KT_STATE_STALLED : KT_STATE_SCHEDULED; -+ } -+ -+ if (kt->running) { -+ *time = kt->running; -+ res = KT_STATE_RUNNING; -+ } -+ -+ spin_unlock_irqrestore (&kt->lock, flags); -+ -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/kthread.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/kthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/kthread.h 2005-06-01 23:12:54.673427856 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KTHREAD_H -+#define __ELAN3_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.4 2004/05/06 14:24:08 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.h,v $*/ -+ -+typedef struct ep_kthread -+{ -+ kcondvar_t wait; /* place to sleep */ -+ spinlock_t lock; /* and lock */ -+ long next_run; /* tick when thread should next run */ -+ long running; /* tick when thread started to run */ -+ unsigned short should_stall; -+ unsigned char state; -+ unsigned int started:1; -+ unsigned int should_stop:1; -+ unsigned int stopped:1; -+} EP_KTHREAD; -+ -+#define KT_STATE_SLEEPING 0 -+#define KT_STATE_SCHEDULED 1 -+#define KT_STATE_RUNNING 2 -+#define KT_STATE_STALLED 3 -+ -+#define AFTER(a, b) ((((long)(a)) - ((long)(b))) > 0) -+#define BEFORE(a,b) ((((long)(a)) - ((long)(b))) < 0) -+ -+extern void ep_kthread_init (EP_KTHREAD *kt); -+extern void ep_kthread_destroy (EP_KTHREAD *kt); -+extern void ep_kthread_started (EP_KTHREAD *kt); -+extern void ep_kthread_stopped (EP_KTHREAD *kt); -+extern int ep_kthread_should_stall (EP_KTHREAD *kth); -+extern int ep_kthread_sleep (EP_KTHREAD *kth, long next_run); -+extern void ep_kthread_schedule (EP_KTHREAD *kt, long when); -+extern void ep_kthread_stall (EP_KTHREAD *kth); -+extern void ep_kthread_resume (EP_KTHREAD *kt); -+extern void ep_kthread_stop (EP_KTHREAD *kt); -+extern int ep_kthread_state (EP_KTHREAD *kt, long *time); -+#endif /* __ELAN3_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/Makefile 2005-06-01 23:12:54.673427856 -0400 -@@ -0,0 +1,33 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/ep/Makefile -+# -+ -+ -+ep3-$(CONFIG_ELAN3) := kcomm_elan3.o kmsg_elan3.o kmap_elan3.o neterr_elan3.o probenetwork_elan3.o support_elan3.o threadcode_elan3.o threadcode_elan3_Linux.o epcomms_elan3.o epcommsTx_elan3.o epcommsRx_elan3.o -+ep4-$(CONFIG_ELAN4) := kcomm_elan4.o kmsg_elan4.o kmap_elan4.o neterr_elan4.o probenetwork_elan4.o commands_elan4.o debug_elan4.o support_elan4.o threadcode_elan4_Linux.o epcomms_elan4.o epcommsTx_elan4.o epcommsRx_elan4.o -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/ep/Makefile -+# -+ -+list-multi := ep.o -+ep-objs := cm.o debug.o kalloc.o kcomm.o kmap.o kthread.o neterr.o nmh.o probenetwork.o railhints.o rmap.o statemap.o support.o threadcode.o epcomms.o epcommsRx.o epcommsTx.o epcommsFwd.o conf_linux.o procfs_linux.o ep_procfs.o cm_procfs.o $(ep3-$(CONFIG_EP)) $(ep4-$(CONFIG_EP)) -+export-objs := conf_linux.o -+obj-$(CONFIG_EP) := ep.o -+ -+ep.o : $(ep-objs) -+ $(LD) -r -o $@ $(ep-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/Makefile.conf 2005-06-01 23:12:54.673427856 -0400 -@@ -0,0 +1,12 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = ep.o -+MODULENAME = ep -+KOBJFILES = cm.o debug.o kalloc.o kcomm.o kmap.o kthread.o neterr.o nmh.o probenetwork.o railhints.o rmap.o statemap.o support.o threadcode.o epcomms.o epcommsRx.o epcommsTx.o epcommsFwd.o conf_linux.o procfs_linux.o ep_procfs.o cm_procfs.o \$\(ep3-\$\(CONFIG_EP\)\) \$\(ep4-\$\(CONFIG_EP\)\) -+EXPORT_KOBJS = conf_linux.o -+CONFIG_NAME = CONFIG_EP -+SGALFC = -+# EXTRALINES START -+ -+ep3-$(CONFIG_ELAN3) := kcomm_elan3.o kmsg_elan3.o kmap_elan3.o neterr_elan3.o probenetwork_elan3.o support_elan3.o threadcode_elan3.o threadcode_elan3_Linux.o epcomms_elan3.o epcommsTx_elan3.o epcommsRx_elan3.o -+ep4-$(CONFIG_ELAN4) := kcomm_elan4.o kmsg_elan4.o kmap_elan4.o neterr_elan4.o probenetwork_elan4.o commands_elan4.o debug_elan4.o support_elan4.o threadcode_elan4_Linux.o epcomms_elan4.o epcommsTx_elan4.o epcommsRx_elan4.o -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/ep/neterr.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/neterr.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/neterr.c 2005-06-01 23:12:54.674427704 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr.c,v 1.25.8.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr.c,v $ */ -+ -+#include -+#include -+ -+#include "debug.h" -+ -+void -+ep_queue_network_error (EP_RAIL *rail, int nodeId, int what, int channel, EP_NETERR_COOKIE cookie) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[nodeId]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (nodeRail->NetworkErrorState == 0) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: raise context filter for node %d due to network error\n", rail->Name, nodeId); -+ printk ("%s: raise context filter for node %d due to network error\n", rail->Name, nodeId); -+ -+ rail->Operations.RaiseFilter (rail, nodeId); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ printk ("%s: node %d is flushing - deferring network error fixup\n", rail->Name, nodeId); -+ else -+ list_add_tail (&nodeRail->Link, &rail->NetworkErrorList); -+ } -+ -+ switch (what) -+ { -+ case EP_NODE_NETERR_ATOMIC_PACKET: -+ ASSERT (nodeRail->NetworkErrorCookies[channel] == 0); -+ -+ /* Need to raise the approriate context filter for this node, -+ * and periodically send a neterr fixup message to it until -+ * we receive an ack from it -+ */ -+ IncrStat (rail, NeterrAtomicPacket); -+ -+ nodeRail->NetworkErrorCookies[channel] = cookie; -+ -+ nodeRail->NetworkErrorState |= EP_NODE_NETERR_ATOMIC_PACKET; -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: atomic packet destroyed - node %d cookie %llx\n", rail->Name, nodeId, cookie); -+ -+ printk ("%s: atomic packet destroyed - node %d cookie %llx\n", rail->Name, nodeId, cookie); -+ break; -+ -+ case EP_NODE_NETERR_DMA_PACKET: -+ /* Must be an overlapped dma packet, raise the context filter, -+ * and hold it up for a NETWORK_ERROR_TIMEOUT */ -+ IncrStat (rail, NeterrDmaPacket); -+ -+ nodeRail->NetworkErrorState |= EP_NODE_NETERR_DMA_PACKET; -+ break; -+ } -+ -+ nodeRail->NextRunTime = lbolt + NETWORK_ERROR_TIMEOUT; -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ep_kthread_schedule (&sys->ManagerThread, nodeRail->NextRunTime); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/neterr_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/neterr_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/neterr_elan3.c 2005-06-01 23:12:54.674427704 -0400 -@@ -0,0 +1,326 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr_elan3.c,v 1.24 2003/11/17 13:26:45 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+typedef struct neterr_halt_args -+{ -+ EP3_RAIL *Rail; -+ unsigned int NodeId; -+ EP_NETERR_COOKIE *Cookies; -+} NETERR_HALT_ARGS; -+ -+static int -+DmaMatchesCookie (EP3_RAIL *rail, E3_DMA_BE *dma, int nodeId, EP_NETERR_COOKIE *cookies, char *where) -+{ -+ E3_uint32 cvproc; -+ E3_uint32 cookie; -+ -+ if (dma->s.dma_direction == DMA_WRITE) -+ { -+ cvproc = dma->s.dma_destCookieVProc; -+ cookie = dma->s.dma_srcCookieVProc; -+ } -+ else -+ { -+ cvproc = dma->s.dma_srcCookieVProc; -+ cookie = dma->s.dma_destCookieVProc; -+ } -+ -+ EPRINTF6 (DBG_NETWORK_ERROR, "%s: Neterr - %s: DMA %08x %08x %08x %08x\n", rail->Generic.Name, where, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_NETWORK_ERROR, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ if (EP_VP_ISDATA((cvproc & DMA_PROCESS_MASK)) && EP_VP_TO_NODE(cvproc & DMA_PROCESS_MASK) == nodeId) -+ { -+ /* -+ * This is a DMA going to the node which has a network fixup -+ * request pending, so check if the cookie matches. -+ */ -+ if ((cookie == cookies[0] || cookie == cookies[1]) /* && !WaitForEop */) -+ { -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: match cookie %08x on %s\n", rail->Generic.Name, cookie, where); -+ -+ return (TRUE); -+ } -+ } -+ -+ return (FALSE); -+} -+ -+ -+static void -+NetworkErrorHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ NETERR_HALT_ARGS *args = (NETERR_HALT_ARGS *) arg; -+ EP3_RAIL *rail = args->Rail; -+ EP_SYS *sys = rail->Generic.System; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ E3_DMA_BE dma; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ if (DmaMatchesCookie (rail, &dma, args->NodeId, args->Cookies, "runq ")) -+ { -+ /* -+ * Transfer the DMA to the node, it's source event will -+ * get executed later. -+ */ -+ QueueDmaOnStalledList (rail, &dma); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = (SYS_CONTEXT_BIT << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destEvent = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ rail->NetworkErrorFlushed = TRUE; -+ kcondvar_wakeupall (&rail->NetworkErrorSleep, &sys->NodeLock); -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+void -+ep3_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP_SYS *sys = rail->Generic.System; -+ ELAN3_DEV *dev = rail->Device; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[nodeId]; -+ E3_DMA_BE dmabe; -+ EP3_COOKIE *cp; -+ E3_uint32 vp; -+ NETERR_HALT_ARGS args; -+ struct list_head *el, *nel, matchedList; -+ int i; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&matchedList); -+ -+ StallDmaRetryThread (rail); -+ -+ args.Rail = rail; -+ args.NodeId = nodeId; -+ args.Cookies = cookies; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ QueueHaltOperation (rail->Device, 0, NULL, INT_TProcHalted | INT_DProcHalted, NetworkErrorHaltOperation, &args); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ while (! rail->NetworkErrorFlushed) -+ kcondvar_wait (&rail->NetworkErrorSleep, &sys->NodeLock, &flags); -+ rail->NetworkErrorFlushed = FALSE; -+ -+ spin_lock (&rail->DmaRetryLock); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el, nel, &rail->DmaRetries[i]) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (DmaMatchesCookie (rail, &retry->Dma, nodeId, cookies, "retry")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->Link, &matchedList); -+ } -+ } -+ } -+ -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (DmaMatchesCookie (rail, &retry->Dma, nodeId, cookies, "stalled")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->Link, &matchedList); -+ } -+ } -+ -+ spin_unlock (&rail->DmaRetryLock); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ResumeDmaRetryThread (rail); -+ -+ /* Now "set" the source event of any write DMA's */ -+ while (! list_empty (&matchedList)) -+ { -+ EP3_RETRY_DMA *retry = list_entry (matchedList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ if (retry->Dma.s.dma_direction == DMA_WRITE && retry->Dma.s.dma_srcEvent) -+ { -+ sdramaddr_t event = ep_elan2sdram (&rail->Generic, retry->Dma.s.dma_srcEvent); -+ -+ /* Block local interrupts, since we need to atomically -+ * decrement the event count and perform the word write -+ */ -+ local_irq_save (flags); -+ { -+ E3_uint32 type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type)); -+ E3_uint32 count = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Count)); -+ -+ elan3_sdram_writel (dev, event + offsetof (E3_Event, ev_Count), count - 1); -+ -+ if (count == 1) -+ { -+ if (type & EV_TYPE_MASK_BCOPY) -+ { -+ E3_Addr srcVal = elan3_sdram_readl (dev, event + offsetof (E3_BlockCopyEvent, ev_Source)); -+ E3_Addr dstAddr = elan3_sdram_readl (dev, event + offsetof (E3_BlockCopyEvent, ev_Dest)) & ~EV_BCOPY_DTYPE_MASK; -+ -+ ASSERT ((srcVal & EV_WCOPY) != 0); -+ -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: neterr perform event word write at %08x with %08x\n", rail->Generic.Name, dstAddr, srcVal); -+ -+ ELAN3_OP_STORE32 (rail->Ctxt, dstAddr, srcVal); -+ } -+ -+ if ((type & ~EV_TYPE_MASK_BCOPY) != 0) -+ { -+ if ((type & EV_TYPE_MASK_CHAIN) == EV_TYPE_CHAIN) -+ { -+ printk ("%s: event at %08x - chained event %x is invalid\n", rail->Generic.Name, retry->Dma.s.dma_srcEvent, type); -+ panic ("ep: neterr invalid event type\n"); -+ } -+ else if ((type & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: neterr event interrupt - cookie %08x\n", rail->Generic.Name, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY))); -+ -+ cp = LookupCookie (&rail->CookieTable, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY))); -+ -+ if (cp->Operations->Event) -+ cp->Operations->Event(rail, cp->Arg); -+ } -+ else if ((type & EV_TYPE_MASK_DMA) == EV_TYPE_DMA) -+ { -+ sdramaddr_t dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2)); -+ -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: neterr chained dma - %08x\n", rail->Generic.Name, (type & ~EV_TYPE_MASK2)); -+ -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ { -+ vp = dmabe.s.dma_destVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ } -+ else -+ { -+ vp = dmabe.s.dma_srcVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the -+ * DMA descriptor will be read from the EP_RETRY_DMA rather than the -+ * original DMA - this can then get reused and an incorrect DMA -+ * descriptor sent -+ * eventp->ev_Type contains the dma address with type in the lower bits -+ */ -+ -+ dmabe.s.dma_source = (type & ~EV_TYPE_MASK2); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+ ASSERT (EP_VP_ISDATA(vp)); -+ -+ nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_CONNECTED: -+ case EP_NODE_LEAVING_CONNECTED: -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ -+ case EP_NODE_LOCAL_PASSIVATE: -+ QueueDmaOnStalledList (rail, &dmabe); -+ break; -+ -+ default: -+ panic ("ep: neterr incorrect state for node\n"); -+ } -+ } -+ else if ((type & EV_TYPE_MASK_THREAD) == EV_TYPE_THREAD) -+ { -+ printk ("%s: event at %08x - thread waiting %x is invalid\n", rail->Generic.Name, retry->Dma.s.dma_srcEvent, type); -+ panic ("ep: neterr invalid event type\n"); -+ } -+ } -+ } -+ } -+ local_irq_restore(flags); -+ } -+ -+ /* add to free list */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/neterr_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/neterr_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/neterr_elan4.c 2005-06-01 23:12:54.675427552 -0400 -@@ -0,0 +1,251 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr_elan4.c,v 1.2 2003/11/24 17:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+struct neterr_desc -+{ -+ EP4_RAIL *rail; -+ unsigned int nodeid; -+ EP_NETERR_COOKIE *cookies; -+ int done; -+} ; -+ -+static int -+dma_matches_cookie (EP4_RAIL *rail, E4_uint64 vproc, E4_uint64 cookie, unsigned int nodeId, EP_NETERR_COOKIE *cookies, const char *where) -+{ -+ if ((EP_VP_ISDATA (vproc) && EP_VP_TO_NODE (vproc) == nodeId) && (cookie == cookies[0] || cookie == cookies[1])) -+ { -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: match cookie %016llx on %s\n", rail->r_generic.Name, cookie, where); -+ -+ return 1; -+ } -+ return 0; -+} -+ -+static void -+ep4_neterr_dma_flushop (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ struct neterr_desc *desc = (struct neterr_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ E4_uint64 qptrs = read_reg64 (dev, DProcHighPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ unsigned long flags; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 cookie = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_cookie)); -+ E4_uint64 vproc = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_vproc)); -+ -+ if (dma_matches_cookie (rail, vproc, cookie, desc->nodeid, desc->cookies, "runq ")) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_DProcQueueEntry)); -+ -+ ep4_queue_dma_stalled (rail, &qentry.Desc); -+ -+ /* Replace the dma with one which will "disappear" */ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+static void -+ep4_neterr_dma_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct neterr_desc *desc = (struct neterr_desc *) arg; -+ -+ elan4_queue_dma_flushop (dev, &desc->rail->r_flushop, 1); -+} -+ -+void -+ep4_neterr_fixup_dmas (EP4_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[nodeId]; -+ struct neterr_desc desc; -+ struct list_head matchedList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ register int i; -+ -+ desc.rail = rail; -+ desc.nodeid = nodeId; -+ desc.cookies = cookies; -+ desc.done = 0; -+ -+ INIT_LIST_HEAD (&matchedList); -+ -+ /* First - stall the retry thread, so that it will no longer restart -+ * any dma's from the retry list */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ /* Second - flush through all command queues targetted by events, thread etc */ -+ ep4_flush_ecqs (rail); -+ -+ /* Third - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queues */ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DProcHalted; -+ rail->r_haltop.op_function = ep4_neterr_dma_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ rail->r_flushop.op_function = ep4_neterr_dma_flushop; -+ rail->r_flushop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ kmutex_unlock (&rail->r_haltop_mutex); -+ -+ /* Fourth - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el,nel, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ if (dma_matches_cookie (rail, retry->retry_dma.dma_vproc, retry->retry_dma.dma_cookie, nodeId, cookies, "retry")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->retry_link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->retry_link, &matchedList); -+ } -+ } -+ } -+ -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ if (dma_matches_cookie (rail, retry->retry_dma.dma_vproc, retry->retry_dma.dma_cookie, nodeId, cookies, "stalled")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->retry_link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->retry_link, &matchedList); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ /* Now "set" the source event of any put DMA#'s we can use the dma -+ * retry command queue as the retry thread is stalled */ -+ while (! list_empty (&matchedList)) -+ { -+ EP4_DMA_RETRY *retry = list_entry (matchedList.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ elan4_set_event_cmd (rail->r_dma_ecq->ecq_cq, retry->retry_dma.dma_srcEvent); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ -+ /* Flush through the command queues to ensure that all the setevents have executed */ -+ ep4_flush_ecqs (rail); -+ -+ /* Finally - allow the retry thread to run again */ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_add_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops) -+{ -+ /* we're called from the ManagerThread, so no need to stall it */ -+ list_add_tail (&ops->op_link, &rail->r_neterr_ops); -+} -+void -+ep4_remove_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops) -+{ -+ EP_SYS *sys = rail->r_generic.System; -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ list_del (&ops->op_link); -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+void -+ep4_neterr_fixup_sten (EP4_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &rail->r_neterr_ops) { -+ EP4_NETERR_OPS *op = list_entry (el, EP4_NETERR_OPS, op_link); -+ -+ (op->op_func) (rail, op->op_arg, nodeId, cookies); -+ } -+} -+ -+void -+ep4_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ /* network error cookies can come from the following : -+ * -+ * DMA engine -+ * if a DMA matches a network error cookie, then we just need to -+ * execute the local setevent *before* returning. -+ * -+ * STEN packet -+ * if the STEN packet was generated with as a WAIT_FOR_EOP -+ * and it's not present on the retry lists, then re-create -+ * it. -+ * -+ */ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: ep4_neterr_fixup: node %d cookies <%lld%s%s%s%s> <%lld%s%s%s%s>\n", -+ rail->r_generic.Name, nodeId, EP4_COOKIE_STRING(cookies[0]), EP4_COOKIE_STRING(cookies[1])); -+ -+ if ((cookies[0] & EP4_COOKIE_DMA) || (cookies[1] & EP4_COOKIE_DMA)) -+ ep4_neterr_fixup_dmas (rail, nodeId, cookies); -+ -+ if ((cookies[0] & EP4_COOKIE_STEN) || (cookies[1] & EP4_COOKIE_STEN)) -+ ep4_neterr_fixup_sten (rail, nodeId, cookies); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/nmh.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/nmh.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/nmh.c 2005-06-01 23:12:54.676427400 -0400 -@@ -0,0 +1,181 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+#ident "@(#)$Id: nmh.c,v 1.6 2004/01/05 13:48:08 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/nmh.c,v $*/ -+ -+#include -+ -+#include -+ -+#define EP_NMD_SPANS(nmd, base, top) ((nmd)->nmd_addr <= (base) && \ -+ ((nmd)->nmd_addr + (nmd)->nmd_len - 1) >= (top)) -+ -+#define EP_NMD_OVERLAPS(nmd, addr, len) ((nmd)->nmd_addr <= ((addr) + (len)) && \ -+ ((nmd)->nmd_addr + (nmd)->nmd_len - 1) >= (addr)) -+ -+#define EP_NMH_HASH(tbl,idx,addr) ((addr) % (tbl)->tbl_size[idx]) -+ -+int -+ep_nmh_init (EP_NMH_TABLE *tbl) -+{ -+ int i, idx, hsize = 1; -+ -+ for (idx = EP_NMH_NUMHASH-1; idx >= 0; idx--, hsize <<= 1) -+ { -+ tbl->tbl_size[idx] = (hsize < EP_NMH_HASHSIZE) ? hsize : EP_NMH_HASHSIZE; -+ -+ KMEM_ZALLOC (tbl->tbl_hash[idx], struct list_head *, sizeof (struct list_head) * tbl->tbl_size[idx], 1); -+ -+ if (tbl->tbl_hash == NULL) -+ { -+ while (++idx < EP_NMH_NUMHASH) -+ KMEM_FREE (tbl->tbl_hash[idx], sizeof (struct list_head) * tbl->tbl_size[idx]); -+ return (ENOMEM); -+ } -+ -+ for (i = 0; i < tbl->tbl_size[idx]; i++) -+ INIT_LIST_HEAD (&tbl->tbl_hash[idx][i]); -+ } -+ -+ return (0); -+} -+ -+void -+ep_nmh_fini (EP_NMH_TABLE *tbl) -+{ -+ int idx; -+ -+ for (idx = 0; idx < EP_NMH_NUMHASH; idx++) -+ if (tbl->tbl_hash[idx]) -+ KMEM_FREE (tbl->tbl_hash[idx], sizeof (struct list_head) * tbl->tbl_size[idx]); -+ -+ bzero (tbl, sizeof (EP_NMH_TABLE)); -+} -+ -+void -+ep_nmh_insert (EP_NMH_TABLE *tbl, EP_NMH *nmh) -+{ -+ EP_ADDR base = nmh->nmh_nmd.nmd_addr; -+ EP_ADDR top = base + nmh->nmh_nmd.nmd_len - 1; -+ int idx; -+ -+ for (idx = 0, base >>= 12, top >>= 12; base != top && idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) -+ ; -+ -+ list_add_tail (&nmh->nmh_link, &tbl->tbl_hash[idx][EP_NMH_HASH(tbl, idx, base)]); -+} -+ -+void -+ep_nmh_remove (EP_NMH_TABLE *tbl, EP_NMH *nmh) -+{ -+ list_del (&nmh->nmh_link); -+} -+ -+EP_NMH * -+ep_nmh_find (EP_NMH_TABLE *tbl, EP_NMD *nmd) -+{ -+ EP_ADDR base = nmd->nmd_addr; -+ EP_ADDR top = base + nmd->nmd_len - 1; -+ int idx; -+ struct list_head *le; -+ -+ for (idx = 0, base >>= 12, top >>= 12; base != top && idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) -+ ; -+ -+ for (; idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) { -+ -+ list_for_each (le, &tbl->tbl_hash[idx][EP_NMH_HASH(tbl, idx, base)]) { -+ EP_NMH *nmh = list_entry (le, EP_NMH, nmh_link); -+ -+ if (EP_NMD_SPANS (&nmh->nmh_nmd, nmd->nmd_addr, nmd->nmd_addr + nmd->nmd_len - 1)) -+ return (nmh); -+ } -+ } -+ -+ return (0); -+} -+ -+void -+ep_nmd_subset (EP_NMD *subset, EP_NMD *nmd, unsigned off, unsigned len) -+{ -+ ASSERT ((off + len - 1) <= nmd->nmd_len); -+ -+ subset->nmd_addr = nmd->nmd_addr + off; -+ subset->nmd_len = len; -+ subset->nmd_attr = nmd->nmd_attr; -+} -+ -+int -+ep_nmd_merge (EP_NMD *merged, EP_NMD *a, EP_NMD *b) -+{ -+ if (EP_NMD_NODEID (a) != EP_NMD_NODEID (b)) /* not generated on the same node */ -+ return 0; -+ -+ if ((EP_NMD_RAILMASK (a) & EP_NMD_RAILMASK (b)) == 0) /* no common rails */ -+ return 0; -+ -+ if (b->nmd_addr == (a->nmd_addr + a->nmd_len)) -+ { -+ if (merged != NULL) -+ { -+ merged->nmd_addr = a->nmd_addr; -+ merged->nmd_len = a->nmd_len + b->nmd_len; -+ merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(a), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b)); -+ } -+ return 1; -+ } -+ -+ if (a->nmd_addr == (b->nmd_addr + b->nmd_len)) -+ { -+ if (merged != NULL) -+ { -+ merged->nmd_addr = b->nmd_addr; -+ merged->nmd_len = b->nmd_len + a->nmd_len; -+ merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(b), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b)); -+ } -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep_nmd_map_rails (EP_SYS *sys, EP_NMD *nmd, unsigned railmask) -+{ -+ EP_NMH *nmh = ep_nmh_find (&sys->MappingTable, nmd); -+ -+ if (nmh == NULL) -+ { -+ printk ("ep_nmd_map_rails: nmd=%08x.%08x.%08x cannot be found\n", -+ nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ return (-1); -+ } -+ -+ return (nmh->nmh_ops->op_map_rails (sys, nmh, nmd, railmask)); -+} -+ -+EP_RAILMASK -+ep_nmd2railmask (EP_NMD *frags, int nFrags) -+{ -+ EP_RAILMASK mask; -+ -+ if (nFrags == 0) -+ return ((EP_RAILMASK)-1); -+ -+ for (mask = EP_NMD_RAILMASK(frags); --nFrags; ) -+ mask &= EP_NMD_RAILMASK(++frags); -+ -+ return (mask); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/probenetwork.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/probenetwork.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/probenetwork.c 2005-06-01 23:12:54.677427248 -0400 -@@ -0,0 +1,446 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork.c,v 1.43 2004/04/19 15:43:15 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork.c,v $ */ -+ -+#include -+ -+#include -+#include "debug.h" -+ -+int PositionCheck = 1; -+ -+#define NUM_DOWN_FROM_VAL(NumDownLinksVal, level) (((NumDownLinksVal) >> ((level) << 2)) & 0xF) -+ -+int -+ProbeNetwork (EP_RAIL *rail, ELAN_POSITION *pos) -+{ -+ int lvl, i; -+ int level; -+ int nodeid; -+ int numnodes; -+ int randomRoutingDisabled; -+ int sw; -+ int nacks; -+ int nowayup; -+ int nalias; -+ int upmask; -+ int partial; -+ int link; -+ int invalid; -+ int linkdown[ELAN_MAX_LEVELS]; -+ int linkup[ELAN_MAX_LEVELS]; -+ EP_SWITCH *switches[ELAN_MAX_LEVELS]; -+ int switchCount[ELAN_MAX_LEVELS+1]; -+ int lowestBcast; -+ int numUpLinks[ELAN_MAX_LEVELS]; -+ int routedown [ELAN_MAX_LEVELS]; -+ -+ EPRINTF1 (DBG_PROBE, "%s: ProbeNetwork started\n", rail->Name); -+ -+ switchCount[0] = 1; -+ numUpLinks [0] = 4; -+ -+ for (level = 0; level < ELAN_MAX_LEVELS; level++) -+ { -+ int ndown = NUM_DOWN_FROM_VAL (rail->Devinfo.dev_num_down_links_value, level); -+ -+ KMEM_ZALLOC (switches[level], EP_SWITCH *, sizeof (EP_SWITCH) * switchCount[level], 1); -+ -+ for (sw = 0, nacks = 0, nowayup = 0, lowestBcast=7; sw < switchCount[level]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[level][sw]; -+ int good = 1; -+ int tsw; -+ -+ for (nodeid = 0,tsw = sw, lvl = level-1 ; lvl >= 0 ; lvl--) -+ { -+ EP_SWITCH *lsw; -+ int link = (8-numUpLinks[lvl]) + (tsw % numUpLinks[lvl]); -+ -+ tsw = tsw / numUpLinks[lvl]; -+ lsw = &switches[lvl][tsw]; -+ -+ if (lsw->present == 0 || (lsw->lnr & (1 << link))) -+ { -+ EPRINTF4 (DBG_PROBE, "lvl %d sw %d present=%d lnr=%x\n", lvl, sw, lsw->present, lsw->lnr); -+ good = 0; -+ } -+ -+ linkup[lvl] = link; -+ linkdown[lvl] = lsw->link; -+ -+ if ( lvl ) nodeid = ((nodeid + linkdown[lvl]) * (8-numUpLinks[lvl-1])); -+ else nodeid += linkdown[0]; -+ -+ } -+ -+ /* -+ * don't bother probing routes which we we've already seen are unreachable -+ * because a link upwards was in reset or the switch previously nacked us. -+ */ -+ if (! good) -+ { -+ lsw->present = 0; -+ -+ nacks++; -+ nowayup++; -+ -+ continue; -+ } -+ -+ lsw->present = rail->Operations.ProbeRoute (rail, level, sw, nodeid, linkup, linkdown, 5, lsw); -+ -+ if (! lsw->present) -+ { -+ EPRINTF3 (DBG_PROBE, "%s: level %d switch %d - unexpected nack\n", rail->Name, level, sw); -+ -+ nacks++; -+ nowayup++; -+ } -+ else -+ { -+ EPRINTF5 (DBG_PROBE, "%s: level %d switch %d - link %d bcast %d\n", rail->Name, level, sw, lsw->link, lsw->bcast); -+ -+ if (level == 2 && rail->Devinfo.dev_device_id == PCI_DEVICE_ID_ELAN3) -+ { -+ /* If we see broadcast top as 7, and we came in on a low link, then we can't -+ * determine whether we're in a 128 way or a un-configured 64u64d switch, so -+ * we treat it as a 64u64d and detect the 128 way case by "going over the top" -+ * below. Unless we've been told what it really is by NumDownLinksVal. -+ */ -+ if (lsw->bcast == 7 && lsw->link < 4) -+ lsw->bcast = ndown ? (ndown - 1) : 3; -+ } -+ -+ if ( lowestBcast > lsw->bcast ) -+ lowestBcast = lsw->bcast; -+ -+ if (lsw->link > (ndown ? (ndown-1) : (lowestBcast == 7 ? 3 : lowestBcast))) -+ { -+ /* We've arrived on a "up-link" - this could be either -+ * we're in the top half of a x8 top-switch - or we're -+ * in the bottom half and have gone "over the top". We -+ * differentiate these cases since the switches below -+ * a x8 top-switch will have broadcast top set to 3, -+ * and the x8 topswitch have broadcast top set to 7. -+ */ -+ if (lsw->bcast == 7) -+ nowayup++; -+ else -+ { -+ EPRINTF2 (DBG_PROBE, "%s: level %d - gone over the top\n", -+ rail->Name, level); -+ -+ if (level > 0) -+ { -+ KMEM_FREE (switches[level], sizeof (EP_SWITCH) * switchCount[level] ); -+ level--; -+ } -+ -+ numUpLinks[level] = 0; -+ goto finished; -+ } -+ } -+ -+ } -+ } -+ -+ numUpLinks[level] = ndown ? (8 - ndown) : (7 - lowestBcast); -+ switchCount[level+1] = switchCount[level] * numUpLinks[level]; -+ -+ /* Now we know which links are uplinks, we can see whether there is -+ * any possible ways up */ -+ upmask = (ndown ? (0xFF << ndown) & 0xFF : (0xFF << (8 - numUpLinks[level])) & 0xFF); -+ -+ for (sw = 0; sw < switchCount[level]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[level][sw]; -+ -+ if (lsw->present && lsw->link <= (ndown ? (ndown-1) : (lowestBcast == 7 ? 3 : lowestBcast)) && (switches[level][sw].lnr & upmask) == upmask) -+ nowayup++; -+ } -+ -+ EPRINTF7 (DBG_PROBE, "%s: level %d - sw=%d nacks=%d nowayup=%d bcast=%d numup=%d\n", -+ rail->Name, level, sw, nacks, nowayup, lowestBcast, numUpLinks[level]); -+ -+ if (nacks == sw) -+ { -+ static bitmap_t printed[BT_BITOUL(EP_MAX_RAILS)]; -+ -+ if (! BT_TEST (printed, rail->Number)) -+ printk ("%s: cannot determine network position\n", rail->Name); -+ BT_SET (printed, rail->Number); -+ goto failed; -+ } -+ -+ if (nowayup == sw) -+ goto finished; -+ } -+ -+ printk ("%s: exceeded number of levels\n", rail->Name); -+ level = ELAN_MAX_LEVELS - 1; -+ -+ failed: -+ -+ for (lvl = 0; lvl <= level; lvl++) -+ KMEM_FREE (switches[lvl], sizeof (EP_SWITCH) * switchCount[lvl] ); -+ -+ return -EAGAIN; -+ -+ finished: -+ /* we've successfully probed the network - now calculate our node -+ * positon and what level of random routing is possible */ -+ nalias = 1; -+ for (lvl = 0, invalid = 0, partial = 0, randomRoutingDisabled = 0; lvl <= level; lvl++) -+ { -+ int ndown = NUM_DOWN_FROM_VAL (rail->Devinfo.dev_num_down_links_value, lvl); -+ int upmask = ndown ? (0xFF << ndown) & 0xFF : 0xF0; -+ -+ for (sw = 0, nalias = 0; sw < switchCount[lvl]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[lvl][sw]; -+ -+ /* You can only use adaptive routing if links 4-7 are uplinks, and at least one of them is -+ * not in reset. Otherwise you can randomly select an "uplink" if all the uplinks are not -+ * in reset. */ -+ if (lsw->present && ((upmask == 0xF0) ? (lsw->lnr & upmask) == upmask : (lsw->lnr & upmask) != 0)) -+ randomRoutingDisabled |= (1 << lvl); -+ -+ if (!lsw->present) -+ partial++; -+ else -+ { -+ if (lsw->invalid) -+ { -+ printk ("%s: invalid switch detected (level %d switch %d)\n", rail->Name, lvl, sw); -+ invalid++; -+ } -+ -+ for (i = 0; i < nalias; i++) -+ if (linkdown[i] == lsw->link) -+ break; -+ if (i == nalias) -+ linkdown[nalias++] = lsw->link; -+ } -+ } -+ -+ link = linkdown[0]; -+ for (i = 1; i < nalias; i++) -+ if (linkdown[i] < link) -+ link = linkdown[i]; -+ -+ if (nalias > 1 && lvl != level) -+ { -+ printk ("%s: switch aliased below top level (level %d)\n", rail->Name, lvl); -+ invalid++; -+ } -+ -+ routedown[lvl] = link; -+ } -+ -+ for (lvl = 0; lvl <= level; lvl++) -+ KMEM_FREE (switches[lvl], sizeof (EP_SWITCH) * switchCount[lvl] ); -+ -+ if (invalid) -+ { -+ printk ("%s: invalid switch configuration\n", rail->Name); -+ return (EINVAL); -+ } -+ -+ /* Handle the aliasing case where a 16 way is used as multiple smaller switches */ -+ if (nalias == 1) -+ level++; -+ else if (nalias == 2) /* a 16 way as 2x8 ways */ -+ numUpLinks[level++] = 6; /* only 2 down links */ -+ else if (nalias > 4) /* a 16 way as 8x2 ways */ -+ numUpLinks[level-1] = 6; -+ -+ /* -+ * Compute my nodeid and number of nodes in the machine -+ * from the routedown and the number of downlinks at each level. -+ */ -+ for(nodeid=0, lvl = level - 1; lvl >= 0; lvl--) -+ { -+ if (lvl) nodeid = ((nodeid + routedown[lvl]) * (8-numUpLinks[lvl-1])); -+ else nodeid += routedown[0]; -+ } -+ -+ for (numnodes = 1, lvl = 0; lvl < level; lvl++) -+ numnodes *= (8 - numUpLinks[lvl]); -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, nodeid); -+ -+ if (randomRoutingDisabled & ((1 << (level-1))-1)) -+ printk ("%s: nodeid=%d level=%d numnodes=%d (random routing disabled 0x%x)\n", -+ rail->Name, nodeid, level, numnodes, randomRoutingDisabled); -+ else if (partial) -+ printk ("%s: nodeid=%d level=%d numnodes=%d (random routing ok)\n", -+ rail->Name, nodeid, level, numnodes); -+ else -+ printk ("%s: nodeid=%d level=%d numnodes=%d\n", -+ rail->Name, nodeid, level, numnodes); -+ -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ pos->pos_nodeid = nodeid; -+ pos->pos_levels = level; -+ pos->pos_nodes = numnodes; -+ pos->pos_random_disabled = randomRoutingDisabled; -+ -+ for(lvl = 0; lvl < level; lvl++) -+ pos->pos_arity[level -lvl - 1] = (8-numUpLinks[lvl]); -+ pos->pos_arity[level] = 1; /* XXXX why does this need to be 1 ? */ -+ -+ return 0; -+} -+ -+/* -+ * broadcast top is invalid if it is not set to the number of downlinks-1, -+ * or at the topmost level it is less than ndown-1. -+ */ -+#define BCAST_TOP_INVALID(lvl, bcast, ndown) ((lvl) == 0 ? (bcast) < ((ndown)-1) : (bcast) != ((ndown) - 1)) -+ -+void -+CheckPosition (EP_RAIL *rail) -+{ -+ ELAN_POSITION *pos = &rail->Position; -+ unsigned int nodeid = pos->pos_nodeid; -+ unsigned int invalid = 0; -+ unsigned int changed = 0; -+ int lvl, slvl; -+ -+ if (! PositionCheck) -+ return; -+ -+ if (rail->Operations.CheckPosition(rail)) /* is update ready for this rail */ -+ { -+ EPRINTF2 (DBG_ROUTETABLE, "%s: check position: SwitchProbeLevel=%d\n", rail->Name, rail->SwitchProbeLevel); -+ -+ for (lvl = 0, slvl = pos->pos_levels-1; lvl <= rail->SwitchProbeLevel; lvl++, slvl--) -+ { -+ EP_SWITCHSTATE *state = &rail->SwitchState[lvl]; -+ EP_SWITCHSTATE *lstate = &rail->SwitchLast[lvl]; -+ unsigned int ndown = pos->pos_arity[slvl]; -+ unsigned int upmask = (0xFF << ndown) & 0xFF; -+ unsigned int mylink = nodeid % ndown; -+ unsigned int error = 0; -+ unsigned int binval = 0; -+ -+ nodeid /= ndown; -+ -+ /* -+ * broadcast top is invalid if it is not set to the number of downlinks-1, -+ * or at the topmost level it is less than ndown-1. -+ */ -+ if (BCAST_TOP_INVALID(lvl, state->bcast, ndown) || (state->LNR & upmask) == upmask) -+ { -+ /* no way up from here - we'd better be at the top */ -+ if (lvl != (pos->pos_levels-1)) -+ { -+ if (state->bcast != (ndown-1)) -+ printk ("%s: invalid broadcast top %d at level %d\n", rail->Name, state->bcast, lvl); -+ else if ((state->LNR & upmask) == upmask && (lstate->LNR & upmask) == upmask) -+ printk ("%s: no way up to switch at level %d (turned off ?)\n", rail->Name, lvl+1); -+ } -+ else -+ { -+ if (state->linkid != mylink) -+ printk ("%s: moved at top level was connected to link %d now connected to %d\n", rail->Name, mylink, state->linkid); -+ } -+ -+ if (state->linkid != mylink) -+ error++; -+ -+ if (BCAST_TOP_INVALID (lvl, state->bcast, ndown)) -+ binval++; -+ } -+ else -+ { -+ if (state->linkid != mylink) -+ { -+ if (state->linkid != rail->SwitchLast[lvl].linkid) -+ printk ("%s: moved at lvl %d was connected to link %d now connected to %d\n", rail->Name, lvl, mylink, state->linkid); -+ -+ error++; -+ } -+ } -+ -+ if (error == 0 && invalid == 0) -+ rail->SwitchProbeTick[lvl] = lbolt; -+ -+ EPRINTF10 (DBG_ROUTETABLE, "%s: lvl=%d (slvl=%d) linkid=%d bcast=%d lnr=%02x uplink=%d : error=%d binval=%d invalid=%d\n", -+ rail->Name, lvl, slvl, state->linkid, state->bcast, state->LNR, state->uplink, error, binval, invalid); -+ -+ invalid |= (error | binval); -+ } -+ -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ if (rail->SwitchState[lvl].uplink != rail->SwitchLast[lvl].uplink) -+ changed++; -+ -+ if (changed) -+ { -+ printk ("%s: broadcast tree has changed from", rail->Name); -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ printk ("%c%d", lvl == 0 ? ' ' : ',', rail->SwitchLast[lvl].uplink); -+ -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ printk ("%s%d", lvl == 0 ? " to " : ",", rail->SwitchState[lvl].uplink); -+ printk ("\n"); -+ } -+ -+ if (rail->SwitchProbeLevel > 0) -+ bcopy (rail->SwitchState, rail->SwitchLast, rail->SwitchProbeLevel * sizeof (EP_SWITCHSTATE)); -+ } -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ EPRINTF4 (DBG_ROUTETABLE, "%s: level %d lbolt=%lx ProbeLevelTick=%lx\n", -+ rail->Name, lvl, lbolt, rail->SwitchProbeTick[lvl]); -+ -+ if (AFTER (lbolt, rail->SwitchProbeTick[lvl] + EP_POSITION_TIMEOUT)) -+ { -+ if (lvl < rail->SwitchBroadcastLevel+1) -+ { -+ if (lvl == 0) -+ printk ("%s: cable disconnected\n", rail->Name); -+ else -+ printk ("%s: broadcast level has dropped to %d (should be %d)\n", -+ rail->Name, lvl, rail->Position.pos_levels); -+ } -+ break; -+ } -+ } -+ -+ if (lvl > rail->SwitchBroadcastLevel+1) -+ { -+ if (rail->SwitchBroadcastLevel < 0) -+ printk ("%s: cable reconnected\n", rail->Name); -+ if (lvl == rail->Position.pos_levels) -+ printk ("%s: broadcast level has recovered\n", rail->Name); -+ else -+ printk ("%s: broadcast level has recovered to %d (should be %d)\n", -+ rail->Name, lvl, rail->Position.pos_levels); -+ } -+ -+ if (rail->SwitchBroadcastLevel != (lvl - 1)) -+ { -+ EPRINTF2 (DBG_ROUTETABLE, "%s: setting SwitchBroadcastLevel to %d\n", rail->Name, lvl-1); -+ -+ rail->SwitchBroadcastLevel = lvl - 1; -+ rail->SwitchBroadcastLevelTick = lbolt; -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/probenetwork_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/probenetwork_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/probenetwork_elan3.c 2005-06-01 23:12:54.677427248 -0400 -@@ -0,0 +1,298 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan3.c,v 1.40 2004/04/15 12:30:08 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+#include -+ -+static void ep3_probe_event (EP3_RAIL *rail, void *arg); -+static EP3_COOKIE_OPS ep3_probe_ops = -+{ -+ ep3_probe_event -+} ; -+ -+int -+ep3_init_probenetwork (EP3_RAIL *rail) -+{ -+ sdramaddr_t stack; -+ E3_Addr sp; -+ E3_BlockCopyEvent event; -+ int i; -+ -+ if (! (stack = ep_alloc_elan (&rail->Generic, EP3_STACK_SIZE, 0, &rail->ProbeStack))) -+ return -ENOMEM; -+ -+ spin_lock_init (&rail->ProbeLock); -+ kcondvar_init (&rail->ProbeWait); -+ -+ /* Initialise the probe command structure */ -+ for (i = 0; i < TR_TRACEROUTE_ENTRIES; i++) -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[i]), 0); -+ for (i = 0; i < TR_TRACEROUTE_ENTRIES; i++) -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[i]), 1); -+ -+ RegisterCookie (&rail->CookieTable, &rail->ProbeCookie, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeDone), &ep3_probe_ops, rail); -+ -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeStart.ev_Type), 0); -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeStart.ev_Count), 0); -+ -+ EP3_INIT_COPY_EVENT (event, rail->ProbeCookie, rail->RailMainAddr + offsetof (EP3_RAIL_MAIN, ProbeDone), 1); -+ elan3_sdram_copyl_to_sdram (rail->Device, &event, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeDone), sizeof (E3_BlockCopyEvent)); -+ -+ rail->RailMain->ProbeDone = EP3_EVENT_FREE; -+ -+ sp = ep3_init_thread (rail->Device, ep_symbol (&rail->ThreadCode, "kcomm_probe"), -+ rail->ProbeStack, stack, EP3_STACK_SIZE, -+ 3, rail->CommandPortAddr, rail->RailElanAddr, rail->RailMainAddr); -+ -+ IssueRunThread (rail, sp); -+ -+ return 0; -+} -+ -+void -+ep3_destroy_probenetwork (EP3_RAIL *rail) -+{ -+ if (rail->ProbeStack == (sdramaddr_t) 0) -+ return; -+ -+ /* XXXX: ensure that the network probe thread is stopped */ -+ -+ DeregisterCookie (&rail->CookieTable, &rail->ProbeCookie); -+ -+ kcondvar_destroy (&rail->ProbeWait); -+ spin_lock_destroy (&rail->ProbeLock); -+ -+ ep_free_elan (&rail->Generic, rail->ProbeStack, EP3_STACK_SIZE); -+} -+ -+static void -+ep3_probe_event (EP3_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ProbeLock, flags); -+ rail->ProbeDone = 1; -+ kcondvar_wakeupone (&rail->ProbeWait, &rail->ProbeLock); -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+} -+ -+int -+ep3_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_RAIL_MAIN *railMain = rail->RailMain; -+ sdramaddr_t railElan = rail->RailElan; -+ E3_uint16 flits[MAX_FLITS]; -+ E3_uint32 result; -+ int nflits; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ProbeLock, flags); -+ -+ nflits = GenerateProbeRoute ( flits, nodeid, level, linkup, linkdown, 0); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, EP_VP_PROBE(level), ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ { -+ EPRINTF0 (DBG_ROUTETABLE, "ProbeRoute: cannot load route entry\n"); -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+ return (EINVAL); -+ } -+ -+ do { -+ /* Initialise the probe source to include our partially computed nodeid */ -+ elan3_sdram_writew (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[TR_TRACEROUTE_ENTRIES-1]), nodeid); -+ elan3_sdram_writew (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[TR_TRACEROUTE_ENTRIES-1]), nodeid); -+ -+ /* Initialise the count result etc */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeType), PROBE_SINGLE); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeLevel), level); -+ -+ railMain->ProbeResult = -1; -+ -+ /* Clear the receive area */ -+ bzero (railMain->ProbeDest0, sizeof (railMain->ProbeDest0)); -+ bzero (railMain->ProbeDest1, sizeof (railMain->ProbeDest1)); -+ -+ /* Re-arm the completion event */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Count), 1); -+ railMain->ProbeDone = EP3_EVENT_ACTIVE; -+ rail->ProbeDone = 0; -+ -+ /* And wakeup the thread to do the probe */ -+ IssueSetevent (rail, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeStart)); -+ -+ /* Now wait for it to complete */ -+ while (! rail->ProbeDone) -+ kcondvar_wait (&rail->ProbeWait, &rail->ProbeLock, &flags); -+ -+ /* wait for block copy event to flush write buffers */ -+ while (! EP3_EVENT_FIRED (rail->ProbeCookie, railMain->ProbeDone)) -+ if (! EP3_EVENT_FIRING(rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone), rail->ProbeCookie, railMain->ProbeDone)) -+ panic ("ProbeRoute: network probe event failure\n"); -+ -+ result = railMain->ProbeResult; -+ -+ if (result == C_ACK_ERROR) -+ kcondvar_timedwait (&rail->ProbeWait, &rail->ProbeLock, &flags, lbolt + (hz/8)); -+ -+ railMain->ProbeDone = EP3_EVENT_FREE; -+ -+ } while (result != C_ACK_OK && --attempts); -+ -+ if (result == C_ACK_OK) -+ { -+ if (railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != nodeid || -+ railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != nodeid) -+ { -+ printk ("%s: lost nodeid at level %d switch %d - %d != %d\n", rail->Generic.Name, level, sw, -+ railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1], nodeid); -+ -+ result = C_ACK_ERROR; -+ } -+ else -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - level - 1]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - level - 1]; -+ -+ EPRINTF7 (DBG_PROBE, "%s: level %d switch %d - linkid=%d bcast=%d LNR=%02x%s\n", -+ rail->Generic.Name, level, sw, TR_TRACEROUTE0_LINKID(val0), -+ TR_TRACEROUTE1_BCAST_TOP(val1), TR_TRACEROUTE0_LNR(val0), -+ TR_TRACEROUTE0_REVID(val0) ? "" : " RevA Part"); -+ -+ lsw->lnr = TR_TRACEROUTE0_LNR(val0); -+ lsw->link = TR_TRACEROUTE0_LINKID(val0); -+ lsw->bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ lsw->invalid = (TR_TRACEROUTE0_REVID(val0) == 0); -+ } -+ } -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+ -+ return (result == C_ACK_OK); -+} -+ -+void -+ep3_probe_position_found (EP3_RAIL *rail, ELAN_POSITION *pos) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ int lvl, nflits; -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ nflits = GenerateCheckRoute (pos, flits, pos->pos_levels - lvl - 1, 0); -+ -+ if (LoadRoute (rail->Device, rail->Ctxt->RouteTable, EP_VP_PROBE(lvl), ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ panic ("ep3_probe_position_found: cannot load probe route entry\n"); -+ } -+ -+ /* Initialise the traceroute source data with our nodeid */ -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[TR_TRACEROUTE_ENTRIES-1]), pos->pos_nodeid); -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[TR_TRACEROUTE_ENTRIES-1]), pos->pos_nodeid); -+} -+ -+int -+ep3_check_position (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_RAIL_MAIN *railMain = rail->RailMain; -+ sdramaddr_t railElan = rail->RailElan; -+ ELAN_POSITION *pos = &rail->Generic.Position; -+ unsigned int level = rail->RailMain->ProbeLevel; -+ unsigned int updated = EP3_EVENT_FIRED (rail->ProbeCookie, railMain->ProbeDone); -+ unsigned int lvl; -+ -+ if (updated) -+ { -+ if (railMain->ProbeResult != C_ACK_OK) -+ { -+ EPRINTF2 (DBG_PROBE, "%s: CheckNetworkPosition: packet nacked result=%d\n", rail->Generic.Name, railMain->ProbeResult); -+ -+ rail->Generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - 2*(level+1)]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - 2*(level+1)]; -+ -+ if (val0 != pos->pos_nodeid || val1 != pos->pos_nodeid) -+ { -+ static unsigned long printed = 0; -+ -+ /* We've received a packet from another node - this probably means -+ * that we've moved */ -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: ep3_check_position - level %d lost nodeid\n", rail->Generic.Name, level); -+ printed = lbolt; -+ } -+ -+ rail->Generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ for (lvl = 0; lvl <= level; lvl++) -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ -+ rail->Generic.SwitchState[lvl].linkid = TR_TRACEROUTE0_LINKID(val0); -+ rail->Generic.SwitchState[lvl].LNR = TR_TRACEROUTE0_LNR(val0); -+ rail->Generic.SwitchState[lvl].bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ rail->Generic.SwitchState[lvl].uplink = 4; -+ -+ EPRINTF5 (DBG_PROBE, " --- lvl %d: linkid=%d LNR=%x bcast=%d uplink=%d\n", lvl, rail->Generic.SwitchState[lvl].linkid, -+ rail->Generic.SwitchState[lvl].LNR, rail->Generic.SwitchState[lvl].bcast ,rail->Generic.SwitchState[lvl].uplink); -+ } -+ rail->Generic.SwitchProbeLevel = level; -+ } -+ } -+ -+ railMain->ProbeDone = EP3_EVENT_FREE; -+ } -+ -+ if (railMain->ProbeDone == EP3_EVENT_FREE) -+ { -+ if (rail->Generic.SwitchBroadcastLevel == rail->Generic.Position.pos_levels-1) -+ level = rail->Generic.Position.pos_levels - 1; -+ else -+ level = rail->Generic.SwitchBroadcastLevel + 1; -+ -+ EPRINTF2 (DBG_PROBE, "%s: ep3_check_postiion: level %d\n", rail->Generic.Name, level); -+ -+ /* Initialise the count result etc */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeType), PROBE_MULTIPLE); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeLevel), level); -+ -+ railMain->ProbeResult = -1; -+ railMain->ProbeLevel = -1; -+ -+ /* Clear the receive area */ -+ bzero (railMain->ProbeDest0, sizeof (railMain->ProbeDest0)); -+ bzero (railMain->ProbeDest1, sizeof (railMain->ProbeDest1)); -+ -+ /* Re-arm the completion event */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Count), 1); -+ -+ railMain->ProbeDone = EP3_EVENT_ACTIVE; -+ -+ IssueSetevent (rail, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeStart)); -+ } -+ -+ return updated; -+} -+ -Index: linux-2.4.21/drivers/net/qsnet/ep/probenetwork_elan3_thread.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/probenetwork_elan3_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/probenetwork_elan3_thread.c 2005-06-01 23:12:54.678427096 -0400 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan3_thread.c,v 1.19 2004/03/24 11:32:56 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan3_thread.c,v $*/ -+ -+#include -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+ -+static int -+kcomm_probe_vp (EP3_RAIL_ELAN *railElan, EP3_RAIL_MAIN *railMain, int vp, int attempts, int timeouts) -+{ -+ int rc; -+ -+ /* Since we use %g1 to hold the "rxd" so the trap handler can -+ * complete the envelope processing - we pass zero to indicate we're -+ * not a receiver thread */ -+ asm volatile ("mov %g0, %g1"); -+ -+ while (attempts && timeouts) -+ { -+ c_open (vp); -+ c_sendmem (TR_TRACEROUTE, &railMain->ProbeDest0, &railElan->ProbeSource0); -+ c_sendmem (TR_TRACEROUTE, &railMain->ProbeDest1, &railElan->ProbeSource1); -+ c_sendtrans0 (TR_SENDACK | TR_SETEVENT, (E3_Addr) 0); -+ -+ switch (rc = c_close()) -+ { -+ case C_ACK_OK: -+ return (C_ACK_OK); -+ -+ case C_ACK_DISCARD: -+ attempts--; -+ break; -+ -+ default: /* output timeout */ -+ timeouts--; -+ } -+ -+ c_break_busywait(); -+ } -+ -+ return (timeouts == 0 ? C_ACK_ERROR : C_ACK_DISCARD); -+} -+ -+void -+kcomm_probe (E3_CommandPort *cport, EP3_RAIL_ELAN *railElan, EP3_RAIL_MAIN *railMain) -+{ -+ int level; -+ -+ for (;;) -+ { -+ c_waitevent (&railElan->ProbeStart, 1); -+ -+ switch (railElan->ProbeType) -+ { -+ case PROBE_SINGLE: -+ railMain->ProbeResult = kcomm_probe_vp (railElan, railMain, EP_VP_PROBE(railElan->ProbeLevel), -+ PROBE_SINGLE_ATTEMPTS, PROBE_SINGLE_TIMEOUTS); -+ -+ cport->SetEvent = (E3_Addr) &railElan->ProbeDone; -+ break; -+ -+ case PROBE_MULTIPLE: -+ for (level = railElan->ProbeLevel; level >= 0; level--) -+ { -+ if (kcomm_probe_vp (railElan, railMain, EP_VP_PROBE(level), -+ PROBE_MULTIPLE_ATTEMPTS, PROBE_MULTIPLE_TIMEOUTS) == C_ACK_OK) -+ { -+ railMain->ProbeLevel = level; -+ railMain->ProbeResult = C_ACK_OK; -+ break; -+ } -+ -+ c_break_busywait(); -+ } -+ cport->SetEvent = (E3_Addr) &railElan->ProbeDone; -+ break; -+ } -+ -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/probenetwork_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/probenetwork_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/probenetwork_elan4.c 2005-06-01 23:12:54.679426944 -0400 -@@ -0,0 +1,396 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan4.c,v 1.9 2004/08/19 11:05:03 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+#include -+ -+static void -+probe_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_probe_lock, flags); -+ rail->r_probe_done = 1; -+ kcondvar_wakeupone (&rail->r_probe_wait, &rail->r_probe_lock); -+ spin_unlock_irqrestore (&rail->r_probe_lock, flags); -+} -+ -+int -+ep4_probe_init (EP4_RAIL *rail) -+{ -+ spin_lock_init (&rail->r_probe_lock); -+ kcondvar_init (&rail->r_probe_wait); -+ -+ rail->r_probe_cq = ep4_alloc_ecq (rail, CQ_Size1K); -+ -+ if (rail->r_probe_cq == NULL) -+ return -ENOMEM; -+ -+ ep4_register_intcookie (rail, &rail->r_probe_intcookie, rail->r_elan_addr, probe_interrupt, rail); -+ -+ return 0; -+} -+ -+void -+ep4_probe_destroy (EP4_RAIL *rail) -+{ -+ if (rail->r_probe_cq) -+ ep4_free_ecq (rail, rail->r_probe_cq); -+ -+ if (rail->r_probe_intcookie.int_arg == NULL) -+ return; -+ ep4_deregister_intcookie (rail, &rail->r_probe_intcookie); -+ -+ kcondvar_destroy (&rail->r_probe_wait); -+ spin_lock_destroy (&rail->r_probe_lock); -+} -+ -+#define LINKDOWN(nodeid, level) ((nodeid >> (level << 1)) & 3) -+#define PROBE_PATTERN0(nodeid) (0xaddebabe ^ nodeid) -+#define PROBE_PATTERN1(nodeid) (0xfeedbeef ^ nodeid) -+ -+#define EP4_PROBE_RETRIES 4 -+ -+int -+ep4_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_RAIL_MAIN *rmain = rail->r_main; -+ E4_uint16 first = 0; -+ int rb = 0; -+ -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ E4_VirtualProcessEntry route; -+ unsigned long flags; -+ int i; -+ -+ for (i = 0; i < ROUTE_NUM_PACKED; i++) -+ packed[i] = 0; -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < level; i++) -+ if (first == 0) -+ first = linkup ? FIRST_ROUTE(linkup[i]) : FIRST_ADAPTIVE; -+ else -+ packed[rb++] = linkup ? PACKED_ROUTE(linkup[i]) : PACKED_ADAPTIVE; -+ -+ /* Generate a "to-me" route down */ -+ if (first == 0) -+ first = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ /* Generate the "down" routes */ -+ for (i = level-1; i >= 0; i--) -+ packed[rb++] = linkdown ? PACKED_ROUTE(linkdown[i]) : PACKED_ROUTE(LINKDOWN(nodeid, i)); -+ -+ /* Pack up the routes into the virtual process entry */ -+ route.Values[0] = first | FIRST_HIGH_PRI | FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3); -+ route.Values[1] = ROUTE_CTXT_VALUE(ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ route.Values[0] |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ route.Values[1] |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ elan4_write_route (rail->r_ctxt.ctxt_dev, rail->r_routetable, EP_VP_PROBE(level), &route); -+ -+ while (attempts--) -+ { -+ rail->r_probe_done = 0; -+ -+ /* generate the STEN packet - note we use a datatype of dword as we're copying to elan in dwords -+ * NB - no flow control is required, since the max packet size is less than the command queue -+ * size and it's dedicated for network probing. -+ */ -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_nop_cmd (rail->r_probe_cq->ecq_cq, 0); -+ -+ elan4_open_packet (rail->r_probe_cq->ecq_cq, OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_PROBE(level))); -+ elan4_sendtransn (rail->r_probe_cq->ecq_cq, TR_TRACEROUTE(TRACEROUTE_NDWORDS), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest0), -+ 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, -+ 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull | ((E4_uint64)PROBE_PATTERN0(nodeid) << 32)); -+ elan4_sendtransn (rail->r_probe_cq->ecq_cq, TR_TRACEROUTE(TRACEROUTE_NDWORDS), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest1), -+ 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, -+ 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000000000001ull | ((E4_uint64)PROBE_PATTERN1(nodeid) << 32)); -+ elan4_sendtrans0 (rail->r_probe_cq->ecq_cq, TR_NOP_TRANS | TR_LAST_AND_SEND_ACK, 0); -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_write_dword_cmd (rail->r_probe_cq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_result), EP4_STATE_FINISHED); -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_write_dword_cmd (rail->r_probe_cq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_result), EP4_STATE_FAILED); -+ -+ elan4_interrupt_cmd (rail->r_probe_cq->ecq_cq, rail->r_probe_intcookie.int_val); -+ -+ spin_lock_irqsave (&rail->r_probe_lock, flags); -+ while (! rail->r_probe_done) -+ kcondvar_wait (&rail->r_probe_wait, &rail->r_probe_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_probe_lock, flags); -+ -+ if (rmain->r_probe_result == EP4_STATE_FINISHED) -+ { -+ if (rmain->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != PROBE_PATTERN0(nodeid) || -+ rmain->r_probe_dest1[TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != PROBE_PATTERN1(nodeid)) -+ { -+ printk ("%s: lost nodeid at level %d switch %d - %d != %d\n", rail->r_generic.Name, level, sw, -+ rmain->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level)+1) - 1], PROBE_PATTERN0(nodeid)); -+ } -+ else -+ { -+ E4_uint32 val0 = rmain->r_probe_dest0[TRACEROUTE_ENTRIES - level - 1]; -+ E4_uint32 val1 = rmain->r_probe_dest1[TRACEROUTE_ENTRIES - level - 1]; -+ -+ lsw->lnr = TR_TRACEROUTE0_LNR(val0); -+ lsw->link = TR_TRACEROUTE0_LINKID(val0); -+ lsw->bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ lsw->invalid = 0; -+ -+ return 1; -+ } -+ } -+ -+ rmain->r_probe_result = EP4_STATE_FREE; -+ } -+ -+ return 0; -+} -+ -+ -+void -+ep4_probe_position_found (EP4_RAIL *rail, ELAN_POSITION *pos) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ int lvl; -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ /* Initialise the "probe" route to use the broadcast tree */ -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ unsigned char *arityp = &pos->pos_arity[pos->pos_levels - 1]; -+ unsigned int spanned = *arityp; -+ E4_uint16 first = 0; -+ int rb = 0; -+ -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ E4_VirtualProcessEntry route; -+ int i; -+ -+ for (i = 0; i < ROUTE_NUM_PACKED; i++) -+ packed[i] = 0; -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < lvl; i++, spanned *= *(--arityp)) -+ { -+ if (first == 0) -+ first = FIRST_BCAST_TREE; -+ else -+ packed[rb++] = PACKED_BCAST_TREE; -+ } -+ -+ /* Generate a "to-me" route down */ -+ if (first == 0) -+ first = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ spanned /= *arityp++; -+ -+ /* Generate the "down" routes */ -+ for (i = lvl-1; i >= 0; i--) -+ { -+ spanned /= *arityp; -+ packed[rb++] = PACKED_ROUTE((pos->pos_nodeid / spanned) % *arityp); -+ arityp++; -+ } -+ -+ -+ /* Pack up the routes into the virtual process entry */ -+ route.Values[0] = first | FIRST_HIGH_PRI | FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3); -+ route.Values[1] = ROUTE_CTXT_VALUE(ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ route.Values[0] |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ route.Values[1] |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ elan4_write_route (rail->r_ctxt.ctxt_dev, rail->r_routetable, EP_VP_PROBE(lvl), &route); -+ -+ /* Initialise "start" event for this level */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_CHECK_STEN_NDWORDS)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CopySource), -+ rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl])); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CopyDest), -+ rail->r_probe_cq->ecq_addr); -+ -+ /* Initiailise command stream - reset the start event */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_reset_event_cmd), -+ WRITE_DWORD_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[lvl]))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_reset_event_value), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_CHECK_STEN_NDWORDS)); -+ -+ /* Initiailise command stream - sten traceroute packet */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_open), -+ OPEN_STEN_PKT_CMD | OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_PROBE(lvl))); -+ -+ /* Initiailise command stream - traceroute 0 */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_traceroute0), -+ SEND_TRANS_CMD | (TR_TRACEROUTE(TRACEROUTE_NDWORDS) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_traceroute0), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest0)); -+ for (i = 0; i < (TRACEROUTE_NDWORDS-1); i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute0[i]), -+ 0x0000000000000000ull); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute0[i]), -+ 0x0000000000000000ull | ((E4_uint64) PROBE_PATTERN0(pos->pos_nodeid) << 32)); -+ -+ /* Initiailise command stream - traceroute 1 */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_traceroute1), -+ SEND_TRANS_CMD | (TR_TRACEROUTE(TRACEROUTE_NDWORDS) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_traceroute1), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest1)); -+ for (i = 0; i < (TRACEROUTE_NDWORDS-1); i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute1[i]), -+ 0x0000000100000001ull); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute1[i]), -+ 0x0000000000000001ull | ((E4_uint64) PROBE_PATTERN1(pos->pos_nodeid) << 32)); -+ -+ /* Initiailise command stream - null sendack */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_sendack), -+ SEND_TRANS_CMD | ((TR_NOP_TRANS | TR_LAST_AND_SEND_ACK) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_sendack), -+ 0); -+ -+ /* Initiailise command stream - guard ok, write done */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_guard_ok), -+ GUARD_CMD | GUARD_CHANNEL(1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_writedword_ok), -+ WRITE_DWORD_CMD | (rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_level))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_value_ok), -+ lvl); -+ -+ /* Initiailise command stream - guard fail, chain to next or write done */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_guard_fail), -+ GUARD_CMD | GUARD_CHANNEL(1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ -+ if (lvl > 0) -+ { -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_fail), -+ SET_EVENT_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[lvl-1]))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_nop), -+ NOP_CMD); -+ } -+ else -+ { -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_fail), -+ WRITE_DWORD_CMD | (rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_level))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_nop), -+ EP4_PROBE_FAILED); -+ } -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_nop_pad), -+ NOP_CMD); -+ } -+ -+ -+ rail->r_main->r_probe_level = EP4_PROBE_ACTIVE; -+ -+ mb(); -+ ep4_set_event_cmd (rail->r_probe_cq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[pos->pos_levels-1])); -+} -+ -+int -+ep4_check_position (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ unsigned int level = rail->r_main->r_probe_level; -+ unsigned int lvl; -+ -+ EPRINTF2 (DBG_PROBE, "%s: ep4_check_position: level=%lld\n", rail->r_generic.Name, rail->r_main->r_probe_level); -+ -+ if (rail->r_main->r_probe_level != EP4_PROBE_ACTIVE) -+ { -+ if (rail->r_main->r_probe_level == EP4_PROBE_FAILED) -+ { -+ EPRINTF1 (DBG_PROBE, "%s: ep4_check_position: packets all nacked\n", rail->r_generic.Name); -+ -+ rail->r_generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ E4_uint32 val0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - 2*(level+1)]; -+ E4_uint32 val1 = rail->r_main->r_probe_dest1[TRACEROUTE_ENTRIES - 2*(level+1)]; -+ -+ if (val0 != PROBE_PATTERN0 (pos->pos_nodeid) || val1 != PROBE_PATTERN1 (pos->pos_nodeid)) -+ { -+ static unsigned long printed = 0; -+ -+ /* We've received a packet from another node - this probably means -+ * that we've moved */ -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: ep4_check_position - level %d lost nodeid\n", rail->r_generic.Name, level); -+ printed = lbolt; -+ } -+ -+ rail->r_generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ for (lvl = 0 ; lvl <= level; lvl++) -+ { -+ E4_uint32 uval0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - lvl - 1]; -+ E4_uint32 dval0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ E4_uint32 dval1 = rail->r_main->r_probe_dest1[TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ -+ rail->r_generic.SwitchState[lvl].linkid = TR_TRACEROUTE0_LINKID (dval0); -+ rail->r_generic.SwitchState[lvl].LNR = TR_TRACEROUTE0_LNR(dval0); -+ rail->r_generic.SwitchState[lvl].bcast = TR_TRACEROUTE1_BCAST_TOP (dval1); -+ rail->r_generic.SwitchState[lvl].uplink = TR_TRACEROUTE0_LINKID (uval0); -+ -+ EPRINTF5 (DBG_PROBE, " --- lvl %d: linkid=%d LNR=%x bcast=%d uplink=%d\n", lvl, rail->r_generic.SwitchState[lvl].linkid, -+ rail->r_generic.SwitchState[lvl].LNR, rail->r_generic.SwitchState[lvl].bcast ,rail->r_generic.SwitchState[lvl].uplink); -+ -+ } -+ -+ rail->r_generic.SwitchProbeLevel = level; -+ } -+ } -+ -+ rail->r_main->r_probe_level = EP4_PROBE_ACTIVE; -+ mb(); -+ -+ if (rail->r_generic.SwitchBroadcastLevel == rail->r_generic.Position.pos_levels-1) -+ level = rail->r_generic.Position.pos_levels - 1; -+ else -+ level = rail->r_generic.SwitchBroadcastLevel + 1; -+ -+ ep4_set_event_cmd (rail->r_probe_cq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[level])); -+ -+ return 1; -+ } -+ -+ return 0; -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/procfs_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/procfs_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/procfs_linux.c 2005-06-01 23:12:54.680426792 -0400 -@@ -0,0 +1,693 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_linux.c,v 1.53.2.4 2005/01/18 14:18:42 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/procfs_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+#include "conf_linux.h" -+#include -+#include -+#include -+ -+#include -+ -+struct proc_dir_entry *ep_procfs_root; -+struct proc_dir_entry *ep_config_root; -+ -+/* -+ * We provide a slightly "special" interface for /proc/elan/device%d/nodeset, -+ * so that it can be included in a "poll" system call. On each "read" on the -+ * file, we generate a new nodeset if a) the previous one has been completely -+ * read and b) if it has changed since it was generated. -+ * -+ * Unfortunately ... this doesn't allow "tail -f" to work, since this uses -+ * fstat() on the fd, as we only hold the last nodeset string, we could not -+ * handle the case where two processes were reading a different rates. -+ * We could maybe have implemented this as a "sliding window", so that we -+ * add a new nodeset string, when it has changed and someone reads past -+ * end of the last one. Then if someone read from before out "window" -+ * we would produce "padding" data. The problem with this, is that a -+ * simple "cat" on /proc/elan/device%d/nodeset will read the whole "file" -+ * which will be mostly padding ! -+ * -+ * Just to not that the purpose of this interface is: -+ * 1) to allow cat /proc/elan/device%d/nodeset to show the current -+ * nodeset. -+ * 2) to allow rms (or similar) to poll() on the file, and when the -+ * nodeset changes read a new one. -+ * -+ * so ... we don't bother solving the troublesome "tail -f" problem. -+ */ -+ -+typedef struct nodeset_private -+{ -+ struct nodeset_private *pr_next; -+ EP_RAIL *pr_rail; -+ unsigned pr_changed; -+ char *pr_page; -+ unsigned pr_off; -+ unsigned pr_len; -+} NODESET_PRIVATE; -+ -+NODESET_PRIVATE *ep_nodeset_list; -+wait_queue_head_t ep_nodeset_wait; -+spinlock_t ep_nodeset_lock; -+ -+static int -+proc_write_state(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "start") && rail->State == EP_RAIL_STATE_UNINITIALISED) -+ ep_start_rail (rail); -+ -+ if (! strcmp (tmpbuf, "stop") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ ep_stop_rail (rail); -+ -+ if (! strcmp (tmpbuf, "offline") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ cm_force_offline (rail, 1, CM_OFFLINE_PROCFS); -+ -+ if (! strcmp (tmpbuf, "online") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ cm_force_offline (rail, 0, CM_OFFLINE_PROCFS); -+ -+ if (! strncmp (tmpbuf, "restart=", 8) && rail->State == EP_RAIL_STATE_RUNNING) -+ cm_restart_node (rail, simple_strtol (tmpbuf + 8, NULL, 0)); -+ -+ if (! strncmp (tmpbuf, "panic=", 6)) -+ ep_panic_node (rail->System, simple_strtol(tmpbuf + 6, NULL, 0), -+ strchr (tmpbuf, ',') ? strchr(tmpbuf, ',') + 1 : "remote panic request"); -+ -+ if (! strncmp (tmpbuf, "raise=", 6) && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ rail->Operations.RaiseFilter (rail, simple_strtol (tmpbuf + 6, NULL, 0)); -+ -+ if (! strncmp (tmpbuf, "lower=", 6) && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ rail->Operations.LowerFilter (rail, simple_strtol (tmpbuf + 6, NULL, 0)); -+ -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_state(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ int len; -+ -+ switch (rail->State) -+ { -+ case EP_RAIL_STATE_UNINITIALISED: -+ len = sprintf (page, "uninitialised\n"); -+ break; -+ case EP_RAIL_STATE_STARTED: -+ len = sprintf (page, "started\n"); -+ break; -+ case EP_RAIL_STATE_RUNNING: -+ len = sprintf (page, "running NodeId=%d NumNodes=%d\n", rail->Position.pos_nodeid, rail->Position.pos_nodes); -+ break; -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ len = sprintf (page, "incompatible NodeId=%d NumNodes=%d\n", rail->Position.pos_nodeid, rail->Position.pos_nodes); -+ break; -+ default: -+ len = sprintf (page, "\n"); -+ break; -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_display(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "rail")) -+ DisplayRail (rail); -+ if (! strcmp (tmpbuf, "segs")) -+ DisplaySegs (rail); -+ if (! strcmp (tmpbuf, "nodes")) -+ DisplayNodes (rail); -+ if (! strcmp (tmpbuf, "status")) -+ DisplayStatus (rail); -+ if (! strcmp (tmpbuf, "debug") && rail->Operations.Debug) -+ rail->Operations.Debug (rail); -+ if (! strncmp (tmpbuf, "epcomms", 7)) -+ ep_comms_display (rail->System, tmpbuf[7] == '=' ? tmpbuf + 8 : NULL); -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_display(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len = sprintf (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+ -+static int -+proc_read_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ -+ if ( rail == NULL ) { -+ strcpy(page,"proc_read_stats rail=NULL\n"); -+ } else { -+ page[0] = 0; -+ ep_fillout_stats(rail, page); -+ rail->Operations.FillOutStats (rail, page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_devinfo(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ ELAN_DEVINFO *devinfo = &rail->Devinfo; -+ ELAN_POSITION *pos = &rail->Position; -+ char *p = page; -+ -+ switch (devinfo->dev_device_id) -+ { -+ case PCI_DEVICE_ID_ELAN3: -+ p += sprintf (p, "ep%d is elan3 %d rev %c\n", rail->Number, -+ devinfo->dev_instance, 'a' + devinfo->dev_revision_id); -+ break; -+ -+ case PCI_DEVICE_ID_ELAN4: -+ p += sprintf (p, "ep%d is elan4 %d rev %c\n", rail->Number, -+ devinfo->dev_instance, 'a' + devinfo->dev_revision_id); -+ break; -+ default: -+ p += sprintf (p, "ep%d is unkown %x/%x\n", rail->Number, devinfo->dev_vendor_id, devinfo->dev_device_id); -+ break; -+ } -+ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ p += sprintf (p, "ep%d nodeid %d numnodes %d\n", rail->Number, pos->pos_nodeid, pos->pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static struct rail_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} rail_info[] = { -+ {"state", proc_read_state, proc_write_state}, -+ {"display", proc_read_display, proc_write_display}, -+ {"stats", proc_read_stats, NULL}, -+ {"devinfo", proc_read_devinfo, NULL}, -+}; -+ -+static int -+nodeset_open (struct inode *inode, struct file *file) -+{ -+ NODESET_PRIVATE *pr; -+ -+ if ((pr = kmalloc (sizeof (NODESET_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_changed = 1; -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_page = NULL; -+ pr->pr_rail = (EP_RAIL *)( PDE(inode)->data ); -+ -+ spin_lock (&ep_nodeset_lock); -+ pr->pr_next = ep_nodeset_list; -+ ep_nodeset_list = pr; -+ spin_unlock (&ep_nodeset_lock); -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+nodeset_release (struct inode *inode, struct file *file) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ NODESET_PRIVATE **ppr; -+ -+ spin_lock (&ep_nodeset_lock); -+ for (ppr = &ep_nodeset_list; (*ppr) != pr; ppr = &(*ppr)->pr_next) -+ ; -+ (*ppr) = pr->pr_next; -+ spin_unlock (&ep_nodeset_lock); -+ -+ if (pr->pr_page) -+ free_page ((unsigned long) pr->pr_page); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+nodeset_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ EP_RAIL *rail = pr->pr_rail; -+ int error; -+ unsigned long flags; -+ -+ if (!pr->pr_changed && pr->pr_off >= pr->pr_len) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (pr->pr_page == NULL && (pr->pr_page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ kmutex_lock (&rail->CallbackLock); -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ spin_lock_irqsave (&rail->System->NodeLock, flags); -+ ep_sprintf_bitmap (pr->pr_page, PAGESIZE, statemap_tobitmap(rail->NodeSet), 0, 0, rail->Position.pos_nodes); -+ spin_unlock_irqrestore (&rail->System->NodeLock, flags); -+ -+ if (rail->SwitchBroadcastLevel == -1) -+ strcat (pr->pr_page, ""); -+ else if (rail->SwitchBroadcastLevel < (rail->Position.pos_levels-1)) -+ sprintf (pr->pr_page + strlen (pr->pr_page), "<%d>", rail->SwitchBroadcastLevel); -+ strcat (pr->pr_page, "\n"); -+ } -+ else -+ strcpy (pr->pr_page, "\n"); -+ kmutex_unlock (&rail->CallbackLock); -+ -+ pr->pr_len = strlen (pr->pr_page); -+ pr->pr_off = 0; -+ pr->pr_changed = 0; -+ } -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_page + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ free_page ((unsigned long) pr->pr_page); -+ pr->pr_page = NULL; -+ } -+ -+ return (count); -+} -+ -+static unsigned int -+nodeset_poll (struct file *file, poll_table *wait) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ -+ poll_wait (file, &ep_nodeset_wait, wait); -+ if (pr->pr_changed || pr->pr_off < pr->pr_len) -+ return (POLLIN | POLLRDNORM); -+ return (0); -+} -+ -+static void -+nodeset_callback (void *arg, statemap_t *map) -+{ -+ EP_RAIL *rail = (EP_RAIL *) arg; -+ NODESET_PRIVATE *pr; -+ -+ ep_display_bitmap (rail->Name, "Nodeset", statemap_tobitmap(map), 0, ep_numnodes(rail->System)); -+ -+ spin_lock (&ep_nodeset_lock); -+ for (pr = ep_nodeset_list; pr; pr = pr->pr_next) -+ if (pr->pr_rail == rail) -+ pr->pr_changed = 1; -+ spin_unlock (&ep_nodeset_lock); -+ -+ wake_up_interruptible (&ep_nodeset_wait); -+} -+ -+void -+proc_character_fill (long mode, char *fmt, ...) -+{ -+ int len; -+ va_list ap; -+ PROC_PRIVATE *private = (PROC_PRIVATE *)mode; -+ -+ /* is the buffer already full */ -+ if (private->pr_len >= private->pr_data_len) -+ return; -+ -+ /* attempt to fill up to the remaining space */ -+ va_start (ap, fmt); -+ len = vsnprintf ( & private->pr_data[private->pr_len], (private->pr_data_len - private->pr_len), fmt, ap); -+ va_end (ap); -+ -+ if (len < 0 ) -+ { -+ /* we have reached the end of buffer and need to fail all future writes -+ * the caller can check (pr_len >= pr_data_len) and recall with more space -+ */ -+ private->pr_len = private->pr_data_len; -+ return; -+ } -+ -+ /* move the length along */ -+ private->pr_len += len; -+} -+ -+int -+proc_release (struct inode *inode, struct file *file) -+{ -+ PROC_PRIVATE *pr = (PROC_PRIVATE *) file->private_data; -+ -+ if (pr->pr_data) -+ KMEM_FREE (pr->pr_data, pr->pr_data_len); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+ssize_t -+proc_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ PROC_PRIVATE *pr = (PROC_PRIVATE *) file->private_data; -+ int error; -+ -+ if (pr->pr_off >= pr->pr_len) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_data + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ return (count); -+} -+ -+static int -+proc_open (struct inode *inode, struct file *file) -+{ -+ PROC_PRIVATE *pr; -+ CM_RAIL *cmRail; -+ int pages = 4; -+ unsigned long flags; -+ -+ if ((pr = kmalloc (sizeof (PROC_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_rail = (EP_RAIL *)(PDE(inode)->data); -+ -+ do { -+ pr->pr_data_len = PAGESIZE * pages; -+ -+ KMEM_ZALLOC (pr->pr_data, char *, pr->pr_data_len, 1); -+ if (pr->pr_data == NULL) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Out of Memory\n"); -+ break; -+ } -+ -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_data[0] = 0; -+ -+ if (pr->pr_rail->State != EP_RAIL_STATE_RUNNING) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Rail not Running\n"); -+ break; -+ } -+ else -+ { -+ pr->pr_di.func = proc_character_fill; -+ pr->pr_di.arg = (long)pr; -+ -+ if (!strcmp("maps", file->f_dentry->d_iname)) -+ { -+ cmRail = pr->pr_rail->ClusterRail; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ DisplayNodeMaps (&pr->pr_di, cmRail); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ if (!strcmp("segs", file->f_dentry->d_iname)) -+ { -+ cmRail = pr->pr_rail->ClusterRail; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ DisplayNodeSgmts (&pr->pr_di, cmRail); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ if (!strcmp("tree", file->f_dentry->d_iname)) -+ DisplayRailDo (&pr->pr_di, pr->pr_rail); -+ } -+ -+ if ( pr->pr_len < pr->pr_data_len) -+ break; /* we managed to get all the output into the buffer */ -+ -+ pages++; -+ KMEM_FREE ( pr->pr_data, pr->pr_data_len); -+ } while (1); -+ -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+struct file_operations proc_nodeset_operations = -+{ -+ read: nodeset_read, -+ poll: nodeset_poll, -+ open: nodeset_open, -+ release: nodeset_release, -+}; -+ -+struct file_operations proc_operations = -+{ -+ read: proc_read, -+ open: proc_open, -+ release: proc_release, -+}; -+ -+void -+ep_procfs_rail_init (EP_RAIL *rail) -+{ -+ struct proc_dir_entry *dir; -+ struct proc_dir_entry *p; -+ char name[10]; -+ int i; -+ -+ sprintf (name, "rail%d", rail->Number); -+ -+ if ((dir = rail->ProcDir = proc_mkdir (name, ep_procfs_root)) == NULL) -+ return; -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ { -+ if ((p = create_proc_entry (rail_info[i].name, 0, dir)) != NULL) -+ { -+ p->read_proc = rail_info[i].read_func; -+ p->write_proc = rail_info[i].write_func; -+ p->data = rail; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ if ((p = create_proc_entry ("nodeset", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_nodeset_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ -+ rail->CallbackRegistered = 1; -+ ep_register_callback (rail, EP_CB_NODESET, nodeset_callback, rail); -+ } -+ -+ if ((p = create_proc_entry ("maps", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+ if ((p = create_proc_entry ("segs", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+ if ((p = create_proc_entry ("tree", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+} -+ -+void -+ep_procfs_rail_fini (EP_RAIL *rail) -+{ -+ struct proc_dir_entry *dir = rail->ProcDir; -+ char name[10]; -+ int i; -+ -+ if (dir == NULL) -+ return; -+ -+ if (rail->CallbackRegistered) -+ { -+ ep_remove_callback (rail, EP_CB_NODESET, nodeset_callback, rail); -+ -+ remove_proc_entry ("nodeset", dir); -+ } -+ -+ remove_proc_entry ("maps", dir); -+ remove_proc_entry ("segs", dir); -+ remove_proc_entry ("tree", dir); -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ remove_proc_entry (rail_info[i].name, dir); -+ -+ sprintf (name, "rail%d", rail->Number); -+ remove_proc_entry (name, ep_procfs_root); -+} -+ -+#include "quadrics_version.h" -+static char quadrics_version[] = QUADRICS_VERSION; -+ -+void -+ep_procfs_init() -+{ -+ extern int txd_stabilise; -+ extern int MaxSwitchLevels; -+ -+ spin_lock_init (&ep_nodeset_lock); -+ init_waitqueue_head (&ep_nodeset_wait); -+ -+ ep_procfs_root = proc_mkdir ("ep", qsnet_procfs_root); -+ ep_config_root = proc_mkdir ("config", ep_procfs_root); -+ -+ qsnet_proc_register_str (ep_procfs_root, "version", quadrics_version, 1); -+ -+ qsnet_proc_register_hex (ep_config_root, "epdebug", &epdebug, 0); -+ qsnet_proc_register_hex (ep_config_root, "epdebug_console", &epdebug_console, 0); -+ qsnet_proc_register_hex (ep_config_root, "epdebug_cmlevel", &epdebug_cmlevel, 0); -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ qsnet_proc_register_hex (ep_config_root, "epdebug_check_sum", &epdebug_check_sum, 0); -+#endif -+ qsnet_proc_register_hex (ep_config_root, "epcomms_forward_limit", &epcomms_forward_limit, 0); -+ qsnet_proc_register_int (ep_config_root, "txd_stabilise", &txd_stabilise, 0); -+ qsnet_proc_register_int (ep_config_root, "assfail_mode", &assfail_mode, 0); -+ qsnet_proc_register_int (ep_config_root, "max_switch_levels", &MaxSwitchLevels, 1); -+ -+ ep_procfs_rcvr_xmtr_init(); -+} -+ -+void -+ep_procfs_fini(void) -+{ -+ ep_procfs_rcvr_xmtr_fini(); -+ -+ remove_proc_entry ("max_switch_levels", ep_config_root); -+ remove_proc_entry ("assfail_mode", ep_config_root); -+ remove_proc_entry ("txd_stabilise", ep_config_root); -+ remove_proc_entry ("epcomms_forward_limit", ep_config_root); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ remove_proc_entry ("epdebug_check_sum", ep_config_root); -+#endif -+ remove_proc_entry ("epdebug_cmlevel", ep_config_root); -+ remove_proc_entry ("epdebug_console", ep_config_root); -+ remove_proc_entry ("epdebug", ep_config_root); -+ -+ remove_proc_entry ("version", ep_procfs_root); -+ -+ remove_proc_entry ("config", ep_procfs_root); -+ remove_proc_entry ("ep", qsnet_procfs_root); -+ -+ spin_lock_destroy (&ep_nodeset_lock); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/quadrics_version.h 2005-06-01 23:12:54.680426792 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/ep/railhints.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/railhints.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/railhints.c 2005-06-01 23:12:54.680426792 -0400 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: railhints.c,v 1.5 2004/02/06 22:37:06 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/railhints.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+int -+ep_pickRail(EP_RAILMASK railmask) -+{ -+ static volatile int lastGlobal; -+ int i, rnum, last = lastGlobal; -+ -+ /* Pick a single rail out of the railmask */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (railmask & (1 << ((last + i) % EP_MAX_RAILS))) -+ break; -+ -+ if (i == EP_MAX_RAILS) -+ return (-1); -+ -+ rnum = (last + i) % EP_MAX_RAILS; -+ -+ lastGlobal = (rnum + 1) % EP_MAX_RAILS; -+ -+ ASSERT (railmask & (1 << rnum)); -+ -+ return (rnum); -+} -+ -+int -+ep_xmtr_bcastrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails) -+{ -+ /* Retrun a single rail out of allowed mask with the best connectivity for broadcast. */ -+ return (ep_pickRail (allowedRails & xmtr->RailMask)); -+} -+ -+int -+ep_xmtr_prefrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails, unsigned nodeId) -+{ -+ EP_NODE *node = &xmtr->Subsys->Subsys.Sys->Nodes[nodeId]; -+ -+ EPRINTF5 (DBG_XMTR, "ep_xmtr_prefrail: xmtr=%p allowedRails=%x nodeId=%d xmtr->RailMaks=%x Connected=%x\n", -+ xmtr, allowedRails, nodeId, xmtr->RailMask, node->ConnectedRails); -+ -+ /* Return a single rail which is currently connected to nodeId (limited to rails -+ * in allowedmask) - if more than one rail is possible, then round-robin between -+ * them */ -+ return (ep_pickRail (allowedRails & xmtr->RailMask & node->ConnectedRails)); -+} -+ -+EP_RAILMASK -+ep_xmtr_availrails (EP_XMTR *xmtr) -+{ -+ /* Return which rails can be used to transmit one. */ -+ -+ return (xmtr->RailMask); -+} -+ -+EP_RAILMASK -+ep_xmtr_noderails (EP_XMTR *xmtr, unsigned nodeId) -+{ -+ EP_NODE *node = &xmtr->Subsys->Subsys.Sys->Nodes[nodeId]; -+ -+ /* Return which rails can be used to transmit to this node. */ -+ -+ return (xmtr->RailMask & node->ConnectedRails); -+} -+ -+int -+ep_rcvr_prefrail (EP_RCVR *rcvr, EP_RAILMASK allowedRails) -+{ -+ /* Return the "best" rail for queueing a receive buffer out on - this will be a -+ * rail with ThreadWaiting set or the rail with the least descriptors queued -+ * on it. */ -+ -+ return (ep_pickRail (allowedRails & rcvr->RailMask)); -+} -+ -+EP_RAILMASK -+ep_rcvr_availrails (EP_RCVR *rcvr) -+{ -+ /* Return which rails can be used to queue receive buffers. */ -+ return (rcvr->RailMask); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/rmap.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/rmap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/rmap.c 2005-06-01 23:12:54.681426640 -0400 -@@ -0,0 +1,365 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: rmap.c,v 1.15 2004/05/19 10:24:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/rmap.c,v $ */ -+ -+#include -+#include -+ -+#include "debug.h" -+ -+void -+ep_display_rmap (EP_RMAP *mp) -+{ -+ EP_RMAP_ENTRY *bp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ ep_debugf (DBG_DEBUG, "map: %s size %d free %d\n", mp->m_name, mp->m_size, mp->m_free); -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ ep_debugf (DBG_DEBUG, " [%lx - %lx]\n", bp->m_addr, bp->m_addr+bp->m_size-1); -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+} -+ -+void -+ep_mapinit (EP_RMAP *mp, char *name, u_int mapsize) -+{ -+ spin_lock_init (&mp->m_lock); -+ kcondvar_init (&mp->m_wait); -+ -+ /* The final segment in the array has size 0 and acts as a delimiter -+ * we insure that we never use segments past the end of the array by -+ * maintaining a free segment count in m_free. When excess segments -+ * occur we discard some resources */ -+ -+ mp->m_size = mapsize; -+ mp->m_free = mapsize; -+ mp->m_name = name; -+ -+ bzero (mp->m_map, sizeof (EP_RMAP_ENTRY) * (mapsize+1)); -+} -+ -+EP_RMAP * -+ep_rmallocmap (size_t mapsize, char *name, int cansleep) -+{ -+ EP_RMAP *mp; -+ -+ KMEM_ZALLOC (mp, EP_RMAP *, sizeof (EP_RMAP) + mapsize*sizeof (EP_RMAP_ENTRY), cansleep); -+ -+ if (mp != NULL) -+ ep_mapinit (mp, name, mapsize); -+ -+ return (mp); -+} -+ -+void -+ep_rmfreemap (EP_RMAP *mp) -+{ -+ spin_lock_destroy (&mp->m_lock); -+ kcondvar_destroy (&mp->m_wait); -+ -+ KMEM_FREE (mp, sizeof (EP_RMAP) + mp->m_size * sizeof (EP_RMAP_ENTRY)); -+} -+ -+static u_long -+ep_rmalloc_locked (EP_RMAP *mp, size_t size) -+{ -+ EP_RMAP_ENTRY *bp; -+ u_long addr; -+ -+ ASSERT (size > 0); -+ ASSERT (SPINLOCK_HELD (&mp->m_lock)); -+ -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ { -+ if (bp->m_size >= size) -+ { -+ addr = bp->m_addr; -+ bp->m_addr += size; -+ -+ if ((bp->m_size -= size) == 0) -+ { -+ /* taken all of this slot - so shift the map down */ -+ do { -+ bp++; -+ (bp-1)->m_addr = bp->m_addr; -+ } while (((bp-1)->m_size = bp->m_size) != 0); -+ -+ mp->m_free++; -+ } -+ return (addr); -+ } -+ } -+ -+ return (0); -+} -+ -+u_long -+ep_rmalloc (EP_RMAP *mp, size_t size, int cansleep) -+{ -+ unsigned long addr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ while ((addr = ep_rmalloc_locked (mp, size)) == 0 && cansleep) -+ { -+ mp->m_want = 1; -+ kcondvar_wait (&mp->m_wait, &mp->m_lock, &flags); -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ -+ return (addr); -+} -+ -+ -+ -+u_long -+ep_rmalloc_constrained (EP_RMAP *mp, size_t size, u_long alo, u_long ahi, u_long align, int cansleep) -+{ -+ EP_RMAP_ENTRY *bp, *bp2, *lbp; -+ unsigned long addr=0; -+ size_t delta; -+ int ok; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ again: -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ { -+ delta = 0; -+ -+ if (alo < bp->m_addr) -+ { -+ addr = bp->m_addr; -+ -+ if (addr & (align-1)) -+ addr = (addr + (align-1)) & ~(align-1); -+ -+ delta = addr - bp->m_addr; -+ -+ if (ahi >= bp->m_addr + bp->m_size) -+ ok = (bp->m_size >= (size + delta)); -+ else -+ ok = ((bp->m_addr + size + delta) <= ahi); -+ } -+ else -+ { -+ addr = alo; -+ if (addr & (align-1)) -+ addr = (addr + (align-1)) & ~(align-1); -+ delta = addr - bp->m_addr; -+ -+ if (ahi >= bp->m_addr + bp->m_size) -+ ok = ((alo + size + delta) <= (bp->m_addr + bp->m_size)); -+ else -+ ok = ((alo + size + delta) <= ahi); -+ } -+ -+ if (ok) -+ break; -+ } -+ -+ if (bp->m_size == 0) -+ { -+ if (cansleep) -+ { -+ mp->m_want = 1; -+ kcondvar_wait (&mp->m_wait, &mp->m_lock, &flags); -+ goto again; -+ } -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ return (0); -+ } -+ -+ /* found an approriate map entry - so take the bit out which we want */ -+ if (bp->m_addr == addr) -+ { -+ if (bp->m_size == size) -+ { -+ /* allocate entire segment and compress map */ -+ bp2 = bp; -+ while (bp2->m_size) -+ { -+ bp2++; -+ (bp2-1)->m_addr = bp2->m_addr; -+ (bp2-1)->m_size = bp2->m_size; -+ } -+ mp->m_free++; -+ } -+ else -+ { -+ /* take from start of segment */ -+ bp->m_addr += size; -+ bp->m_size -= size; -+ } -+ } -+ else -+ { -+ if (bp->m_addr + bp->m_size == addr + size) -+ { -+ /* take from end of segment */ -+ bp->m_size -= size; -+ } -+ else -+ { -+ /* split the segment loosing the last entry if there's no space */ -+ if (mp->m_free == 0) -+ { -+ /* find last map entry */ -+ for (lbp = bp; lbp->m_size != 0; lbp++) -+ ; -+ lbp--; -+ -+ if (lbp->m_size > (lbp-1)->m_size) -+ lbp--; -+ -+ printk ("%s: lost resource map entry [%lx, %lx]\n", -+ mp->m_name, lbp->m_addr, lbp->m_addr + lbp->m_size); -+ -+ *lbp = *(lbp+1); -+ (lbp+1)->m_size = 0; -+ -+ mp->m_free++; -+ } -+ -+ for (bp2 = bp; bp2->m_size != 0; bp2++) -+ continue; -+ -+ for (bp2--; bp2 > bp; bp2--) -+ { -+ (bp2+1)->m_addr = bp2->m_addr; -+ (bp2+1)->m_size = bp2->m_size; -+ } -+ -+ mp->m_free--; -+ -+ (bp+1)->m_addr = addr + size; -+ (bp+1)->m_size = bp->m_addr + bp->m_size - (addr + size); -+ bp->m_size = addr - bp->m_addr; -+ } -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ return (addr); -+} -+ -+void -+ep_rmfree (EP_RMAP *mp, size_t size, u_long addr) -+{ -+ EP_RMAP_ENTRY *bp; -+ unsigned long t; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ -+ ASSERT (addr != 0 && size > 0); -+ -+again: -+ /* find the piece of the map which starts after the returned space -+ * or the end of the map */ -+ for (bp = &mp->m_map[0]; bp->m_addr <= addr && bp->m_size != 0; bp++) -+ ; -+ -+ /* bp points to the piece to the right of where we want to go */ -+ -+ if (bp > &mp->m_map[0] && (bp-1)->m_addr + (bp-1)->m_size >= addr) -+ { -+ /* merge with piece on the left */ -+ -+ ASSERT ((bp-1)->m_addr + (bp-1)->m_size <= addr); -+ -+ (bp-1)->m_size += size; -+ -+ ASSERT (bp->m_size == 0 || addr+size <= bp->m_addr); -+ -+ if (bp->m_size && (addr + size) == bp->m_addr) -+ { -+ /* merge witht he piece on the right by -+ * growing the piece on the left and shifting -+ * the map down */ -+ -+ ASSERT ((addr + size) <= bp->m_addr); -+ -+ (bp-1)->m_size += bp->m_size; -+ while (bp->m_size) -+ { -+ bp++; -+ (bp-1)->m_addr = bp->m_addr; -+ (bp-1)->m_size = bp->m_size; -+ } -+ -+ mp->m_free++; -+ } -+ } -+ else if (addr + size >= bp->m_addr && bp->m_size) -+ { -+ /* merge with piece to the right */ -+ -+ ASSERT ((addr + size) <= bp->m_addr); -+ -+ bp->m_addr -= size; -+ bp->m_size += size; -+ } -+ else -+ { -+ /* doesn't join with left or right - check for map -+ overflow and discard the smallest of the last or -+ next to last entries */ -+ -+ if (mp->m_free == 0) -+ { -+ EP_RMAP_ENTRY *lbp; -+ -+ /* find last map entry */ -+ for (lbp = bp; lbp->m_size != 0; lbp++) -+ ; -+ lbp--; -+ -+ if (lbp->m_size > (lbp-1)->m_size) -+ lbp--; -+ -+ printk ("%s: lost resource map entry [%lx, %lx]\n", -+ mp->m_name, lbp->m_addr, lbp->m_addr + lbp->m_size); -+ -+ *lbp = *(lbp+1); -+ (lbp+1)->m_size = 0; -+ -+ mp->m_free++; -+ goto again; -+ } -+ -+ /* make a new entry and push the remaining ones up */ -+ do { -+ t = bp->m_addr; -+ bp->m_addr = addr; -+ addr = t; -+ t = bp->m_size; -+ bp->m_size = size; -+ bp++; -+ } while ((size = t) != 0); -+ -+ mp->m_free--; -+ } -+ -+ /* if anyone blocked on rmalloc failure, wake 'em up */ -+ if (mp->m_want) -+ { -+ mp->m_want = 0; -+ kcondvar_wakeupall (&mp->m_wait, &mp->m_lock); -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/spinlock_elan3_thread.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/spinlock_elan3_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/spinlock_elan3_thread.c 2005-06-01 23:12:54.681426640 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: spinlock_elan3_thread.c,v 1.9 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/spinlock_elan3_thread.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+void -+ep3_spinblock (EP3_SPINLOCK_ELAN *sle, EP3_SPINLOCK_MAIN *sl) -+{ -+ do { -+ sl->sl_seq = sle->sl_seq; /* Release my lock */ -+ -+ while (sle->sl_lock) /* Wait until the main */ -+ c_break(); /* releases the lock */ -+ -+ sle->sl_seq++; /* and try and relock */ -+ } while (sle->sl_lock); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/statemap.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/statemap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/statemap.c 2005-06-01 23:12:54.682426488 -0400 -@@ -0,0 +1,385 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: statemap.c,v 1.11.8.1 2004/11/18 12:05:00 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statemap.c,v $ */ -+ -+#include -+#include -+ -+/******************************** global state bitmap stuff **********************************/ -+static int -+statemap_setmapbit (bitmap_t *map, int offset, int bit) -+{ -+ bitmap_t *e = &map[offset >> BT_ULSHIFT]; -+ bitmap_t mask = ((bitmap_t)1) << (offset & BT_ULMASK); -+ int rc = ((*e) & mask) != 0; -+ -+ if (bit) -+ { -+ *e |= mask; -+ return (!rc); -+ } -+ -+ *e &= ~mask; -+ return (rc); -+} -+ -+static int -+statemap_firstsegbit (bitmap_t seg) -+{ -+ int bit = 0; -+ -+ if (seg == 0) -+ return (-1); -+ -+#if (BT_ULSHIFT == 6) -+ if ((seg & 0xffffffffL) == 0) -+ { -+ seg >>= 32; -+ bit += 32; -+ } -+#elif (BT_ULSHIFT != 5) -+# error "Unexpected value of BT_ULSHIFT" -+#endif -+ -+ if ((seg & 0xffff) == 0) -+ { -+ seg >>= 16; -+ bit += 16; -+ } -+ -+ if ((seg & 0xff) == 0) -+ { -+ seg >>= 8; -+ bit += 8; -+ } -+ -+ if ((seg & 0xf) == 0) -+ { -+ seg >>= 4; -+ bit += 4; -+ } -+ -+ if ((seg & 0x3) == 0) -+ { -+ seg >>= 2; -+ bit += 2; -+ } -+ -+ return (((seg & 0x1) == 0) ? bit + 1 : bit); -+} -+ -+bitmap_t -+statemap_getseg (statemap_t *map, unsigned int offset) -+{ -+ ASSERT (offset < map->size); -+ ASSERT ((offset & BT_ULMASK) == 0); -+ -+ return (map->bitmap[offset >> BT_ULSHIFT]); -+} -+ -+void -+statemap_setseg (statemap_t *map, unsigned int offset, bitmap_t seg) -+{ -+ ASSERT (offset < map->size); -+ ASSERT ((offset & BT_ULMASK) == 0); -+ -+ offset >>= BT_ULSHIFT; -+ if (map->bitmap[offset] == seg) -+ return; -+ -+ map->bitmap[offset] = seg; -+ -+ if (statemap_setmapbit (map->changemap2, offset, 1) && -+ statemap_setmapbit (map->changemap1, offset >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, offset >>= BT_ULSHIFT, 1); -+} -+ -+bitmap_t -+statemap_getbits (statemap_t *map, unsigned int offset, int nbits) -+{ -+ int index = offset >> BT_ULSHIFT; -+ bitmap_t mask = (nbits == BT_NBIPUL) ? (bitmap_t) -1 : (((bitmap_t)1) << nbits) - 1; -+ -+ ASSERT (nbits <= BT_NBIPUL); -+ ASSERT (offset + nbits <= map->size); -+ -+ offset &= BT_ULMASK; -+ if (offset + nbits <= BT_NBIPUL) -+ return ((map->bitmap[index] >> offset) & mask); -+ -+ return (((map->bitmap[index] >> offset) | -+ (map->bitmap[index + 1] << (BT_NBIPUL - offset))) & mask); -+} -+ -+void -+statemap_setbits (statemap_t *map, unsigned int offset, bitmap_t bits, int nbits) -+{ -+ int index = offset >> BT_ULSHIFT; -+ bitmap_t mask; -+ bitmap_t seg; -+ bitmap_t newseg; -+ -+ ASSERT (nbits <= BT_NBIPUL); -+ ASSERT (offset + nbits <= map->size); -+ -+ offset &= BT_ULMASK; -+ if (offset + nbits <= BT_NBIPUL) -+ { -+ mask = ((nbits == BT_NBIPUL) ? -1 : ((((bitmap_t)1) << nbits) - 1)) << offset; -+ seg = map->bitmap[index]; -+ newseg = ((bits << offset) & mask) | (seg & ~mask); -+ -+ if (seg == newseg) -+ return; -+ -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >>= BT_ULSHIFT, 1); -+ return; -+ } -+ -+ mask = ((bitmap_t)-1) << offset; -+ seg = map->bitmap[index]; -+ newseg = ((bits << offset) & mask) | (seg & ~mask); -+ -+ if (seg != newseg) -+ { -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >> BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >> (2 * BT_ULSHIFT), 1); -+ } -+ -+ index++; -+ offset = BT_NBIPUL - offset; -+ mask = (((bitmap_t)1) << (nbits - offset)) - 1; -+ seg = map->bitmap[index]; -+ newseg = ((bits >> offset) & mask) | (seg & ~mask); -+ -+ if (seg == newseg) -+ return; -+ -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >>= BT_ULSHIFT, 1); -+} -+ -+void -+statemap_zero (statemap_t *dst) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, offset += BT_NBIPUL) -+ { -+ *dstmap = 0; -+ *changemap2 |= bit2; -+ } -+ *changemap1 |= bit1; -+ } -+ *changemap0 |= bit0; -+ } -+} -+ -+void -+statemap_setmap (statemap_t *dst, statemap_t *src) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t *srcmap = src->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ -+ ASSERT (src->size == size); -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, srcmap++, offset += BT_NBIPUL) -+ if (*dstmap != *srcmap) -+ { -+ *dstmap = *srcmap; -+ *changemap2 |= bit2; -+ } -+ if (*changemap2 != 0) -+ *changemap1 |= bit1; -+ } -+ if (*changemap1 != 0) -+ *changemap0 |= bit0; -+ } -+} -+ -+void -+statemap_ormap (statemap_t *dst, statemap_t *src) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t *srcmap = src->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ bitmap_t seg; -+ -+ ASSERT (src->size == size); -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, srcmap++, offset += BT_NBIPUL) -+ { -+ seg = *dstmap | *srcmap; -+ if (*dstmap != seg) -+ { -+ *dstmap = seg; -+ *changemap2 |= bit2; -+ } -+ } -+ if (*changemap2 != 0) -+ *changemap1 |= bit1; -+ } -+ if (*changemap1 != 0) -+ *changemap0 |= bit0; -+ } -+} -+ -+int -+statemap_findchange (statemap_t *map, bitmap_t *newseg, int clearchange) -+{ -+ int bit0; -+ bitmap_t *cm1; -+ int bit1; -+ bitmap_t *cm2; -+ int bit2; -+ unsigned int offset; -+ -+ bit0 = statemap_firstsegbit (*(map->changemap0)); -+ if (bit0 < 0) -+ return (-1); -+ -+ offset = bit0; -+ cm1 = map->changemap1 + offset; -+ bit1 = statemap_firstsegbit (*cm1); -+ ASSERT (bit1 >= 0); -+ -+ offset = (offset << BT_ULSHIFT) + bit1; -+ cm2 = map->changemap2 + offset; -+ bit2 = statemap_firstsegbit (*cm2); -+ ASSERT (bit2 >= 0); -+ -+ offset = (offset << BT_ULSHIFT) + bit2; -+ *newseg = map->bitmap[offset]; -+ -+ if (clearchange && -+ (*cm2 &= ~(((bitmap_t)1) << bit2)) == 0 && -+ (*cm1 &= ~(((bitmap_t)1) << bit1)) == 0) -+ map->changemap0[0] &= ~(((bitmap_t)1) << bit0); -+ -+ return (offset << BT_ULSHIFT); -+} -+ -+int -+statemap_changed (statemap_t *map) -+{ -+ return ((*(map->changemap0) != 0)); -+} -+ -+void -+statemap_reset (statemap_t *map) -+{ -+ bzero (map->changemap0, map->changemap_nob + map->bitmap_nob); -+} -+ -+void -+statemap_copy (statemap_t *dst, statemap_t *src) -+{ -+ ASSERT (dst->size == src->size); -+ bcopy (src->changemap0, dst->changemap0, src->changemap_nob + src->bitmap_nob); -+} -+ -+void -+statemap_clearchanges (statemap_t *map) -+{ -+ if (statemap_changed (map)) -+ bzero (map->changemap0, map->changemap_nob); -+} -+ -+bitmap_t * -+statemap_tobitmap (statemap_t *map) -+{ -+ return (map->bitmap); -+} -+ -+statemap_t * -+statemap_create (int size) -+{ -+ int struct_entries = (sizeof (statemap_t) * 8 + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int bitmap_entries = (size + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap2_entries = (bitmap_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap1_entries = (changemap2_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap0_entries = (changemap1_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap_entries = changemap0_entries + changemap1_entries + changemap2_entries; -+ int nob = (struct_entries + bitmap_entries + changemap_entries) * sizeof (bitmap_t); -+ statemap_t *map; -+ -+ ASSERT ((1 << BT_ULSHIFT) == BT_NBIPUL); -+ ASSERT (changemap0_entries == 1); -+ -+ KMEM_ZALLOC (map, statemap_t *, nob, 1); -+ -+ map->size = size; -+ map->nob = nob; -+ map->changemap_nob = changemap_entries * sizeof (bitmap_t); -+ map->bitmap_nob = bitmap_entries * sizeof (bitmap_t); -+ map->changemap0 = ((bitmap_t *)map) + struct_entries; -+ map->changemap1 = map->changemap0 + changemap0_entries; -+ map->changemap2 = map->changemap1 + changemap1_entries; -+ map->bitmap = map->changemap2 + changemap2_entries; -+ -+ return (map); -+} -+ -+void -+statemap_destroy (statemap_t *map) -+{ -+ KMEM_FREE (map, map->nob); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/statusmon.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/statusmon.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/statusmon.h 2005-06-01 23:12:54.682426488 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: statusmon.h,v 1.6 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statusmon.h,v $*/ -+ -+#ifndef __ELAN3_STATUSMON_H -+#define __ELAN3_STATUSMON_H -+ -+typedef struct statusmon_node -+{ -+ u_int NodeId; -+ u_int State; -+} STATUSMON_SGMT; -+ -+typedef struct statusmon_level -+{ -+ unsigned Width; -+ STATUSMON_SGMT Nodes[CM_SGMTS_PER_LEVEL]; -+} STATUSMON_LEVEL; -+ -+typedef struct statusmon_msg -+{ -+ unsigned Type; -+ unsigned NodeId; -+ unsigned NumLevels; -+ unsigned TopLevel; -+ unsigned Role; -+ STATUSMON_LEVEL Levels[CM_MAX_LEVELS]; -+} STATUSMON_MSG; -+ -+ -+#endif /* __ELAN3_STATUSMON_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/support.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/support.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/support.c 2005-06-01 23:12:54.683426336 -0400 -@@ -0,0 +1,109 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support.c,v 1.37.8.1 2004/09/30 15:01:53 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/support.c,v $ */ -+ -+#include -+#include -+ -+/****************************************************************************************/ -+/* -+ * Nodeset/flush callbacks. -+ */ -+int -+ep_register_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg) -+{ -+ EP_CALLBACK *cb; -+ -+ KMEM_ALLOC (cb, EP_CALLBACK *, sizeof (EP_CALLBACK), 1); -+ -+ cb->Routine = routine; -+ cb->Arg = arg; -+ -+ kmutex_lock (&rail->CallbackLock); -+ cb->Next = rail->CallbackList[idx]; -+ rail->CallbackList[idx] = cb; -+ kmutex_unlock (&rail->CallbackLock); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_remove_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg) -+{ -+ EP_CALLBACK *cb; -+ EP_CALLBACK **predp; -+ -+ kmutex_lock (&rail->CallbackLock); -+ for (predp = &rail->CallbackList[idx]; (cb = *predp); predp = &cb->Next) -+ if (cb->Routine == routine && cb->Arg == arg) -+ break; -+ -+ if (cb == NULL) -+ panic ("ep_remove_member_callback"); -+ -+ *predp = cb->Next; -+ kmutex_unlock (&rail->CallbackLock); -+ -+ KMEM_FREE (cb, sizeof (EP_CALLBACK)); -+} -+ -+void -+ep_call_callbacks (EP_RAIL *rail, unsigned idx, statemap_t *map) -+{ -+ EP_CALLBACK *cb; -+ -+ kmutex_lock (&rail->CallbackLock); -+ -+ rail->CallbackStep = idx; -+ -+ for (cb = rail->CallbackList[idx]; cb; cb = cb->Next) { -+ (cb->Routine) (cb->Arg, map); -+ } -+ kmutex_unlock (&rail->CallbackLock); -+} -+ -+unsigned int -+ep_backoff (EP_BACKOFF *backoff, int type) -+{ -+ static int bcount[EP_NUM_BACKOFF] = {1, 16, 32, 64, 128, 256, 512, 1024}; -+ -+ if (backoff->type != type) -+ { -+ backoff->type = type; -+ backoff->indx = 0; -+ backoff->count = 0; -+ } -+ -+ if (++backoff->count > bcount[backoff->indx] && backoff->indx < (EP_NUM_BACKOFF-1)) -+ { -+ backoff->indx++; -+ backoff->count = 0; -+ } -+ -+ return (backoff->indx); -+} -+ -+/* Generic checksum algorithm */ -+uint16_t -+CheckSum (char *msg, int nob) -+{ -+ uint16_t sum = 0; -+ -+ while (nob-- > 0) -+ sum = sum * 13 + *msg++; -+ -+ return (sum); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/support_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/support_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/support_elan3.c 2005-06-01 23:12:54.687425728 -0400 -@@ -0,0 +1,2111 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support_elan3.c,v 1.42.8.3 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/support_elan3.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#include -+#include -+ -+/****************************************************************************************/ -+#define DMA_RING_NEXT_POS(ring) ((ring)->Position+1 == ring->Entries ? 0 : ((ring)->Position+1)) -+#define DMA_RING_PREV_POS(ring,pos) ((pos) == 0 ? (ring)->Entries-1 : (pos) - 1) -+ -+static int -+DmaRingCreate (EP3_RAIL *rail, EP3_DMA_RING *ring, int ctxnum, int entries) -+{ -+ unsigned long pgnum = (ctxnum * sizeof (E3_CommandPort)) / PAGE_SIZE; -+ unsigned long pgoff = (ctxnum * sizeof (E3_CommandPort)) & (PAGE_SIZE-1); -+ int s; -+ -+ /* set up the initial position */ -+ ring->Entries = entries; -+ ring->Position = 0; -+ -+ if (! (ring->pEvent = ep_alloc_elan (&rail->Generic, entries * sizeof (E3_BlockCopyEvent), 0, &ring->epEvent))) -+ { -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (! (ring->pDma = ep_alloc_elan (&rail->Generic, entries * sizeof (E3_DMA), 0, &ring->epDma))) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (! (ring->pDoneBlk = ep_alloc_main (&rail->Generic, entries * sizeof (E3_uint32), 0, &ring->epDoneBlk))) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, entries * sizeof (E3_DMA)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (MapDeviceRegister (rail->Device, ELAN3_BAR_COMMAND_PORT, &ring->CommandPage, pgnum * PAGE_SIZE, PAGE_SIZE, &ring->CommandPageHandle) != ESUCCESS) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, entries * sizeof (E3_DMA)); -+ ep_free_main (&rail->Generic, ring->epDoneBlk, entries * sizeof (E3_uint32)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ ring->CommandPort = ring->CommandPage + pgoff; -+ -+ for (s = 0; s < entries; s++) -+ { -+ /* setup the event */ -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Type), -+ EV_TYPE_BCOPY | EV_TYPE_DMA | DMA_RING_DMA_ELAN(ring, s)); -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Source), DMA_RING_DMA_ELAN(ring,s) | EV_WCOPY); -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Dest), DMA_RING_DONE_ELAN(ring,s) | EV_TYPE_BCOPY_WORD ); -+ -+ /* need to set all the doneBlks to appear that they have completed */ -+ ring->pDoneBlk[s] = DMA_RING_DMA_ELAN(ring,s) | EV_WCOPY; -+ } -+ -+ return 0; /* success */ -+} -+ -+static void -+DmaRingRelease(EP3_RAIL *rail, EP3_DMA_RING *ring) -+{ -+ if (ring->CommandPage != (ioaddr_t) 0) -+ { -+ UnmapDeviceRegister(rail->Device, &ring->CommandPageHandle); -+ -+ ep_free_elan (&rail->Generic, ring->epEvent, ring->Entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, ring->Entries * sizeof (E3_DMA)); -+ ep_free_main (&rail->Generic, ring->epDoneBlk, ring->Entries * sizeof (E3_uint32)); -+ } -+ ring->CommandPage = (ioaddr_t) 0; -+} -+ -+void -+DmaRingsRelease (EP3_RAIL *rail) -+{ -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_CRITICAL]); -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_HIGH_PRI]); -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_LOW_PRI]); -+} -+ -+int -+DmaRingsCreate (EP3_RAIL *rail) -+{ -+ if (DmaRingCreate (rail, &rail->DmaRings[EP3_RING_CRITICAL], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_CRITICAL, EP3_RING_CRITICAL_LEN) || -+ DmaRingCreate (rail, &rail->DmaRings[EP3_RING_HIGH_PRI], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_HIGH_PRI, EP3_RING_HIGH_PRI_LEN) || -+ DmaRingCreate (rail, &rail->DmaRings[EP3_RING_LOW_PRI], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_LOW_PRI, EP3_RING_LOW_PRI_LEN)) -+ { -+ DmaRingsRelease (rail); -+ return (ENOMEM); -+ } -+ -+ return 0; -+} -+ -+static int -+DmaRingNextSlot (EP3_DMA_RING *ring) -+{ -+ int pos = ring->Position; -+ int npos = DMA_RING_NEXT_POS(ring); -+ -+ if (ring->pDoneBlk[npos] == EP3_EVENT_ACTIVE) -+ return (-1); -+ -+ ring->pDoneBlk[pos] = EP3_EVENT_ACTIVE; -+ -+ ring->Position = npos; /* move on one */ -+ -+ return (pos); -+} -+ -+ -+/****************************************************************************************/ -+/* -+ * Dma/event command issueing - these handle cproc queue overflow traps. -+ */ -+static int -+DmaRunQueueSizeCheck (EP3_RAIL *rail, E3_uint32 len) -+{ -+ E3_uint64 FandBPtr = read_reg64 (rail->Device, DProc_SysCntx_FPtr); -+ E3_uint32 FPtr, BPtr; -+ E3_uint32 qlen; -+ -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ FPtr = (FandBPtr & 0xFFFFFFFFull); -+ BPtr = (FandBPtr >> 32); -+#else -+ FPtr = (FandBPtr >> 32); -+ BPtr = (FandBPtr & 0xFFFFFFFFull); -+#endif -+ -+ qlen = (((BPtr - FPtr)/sizeof (E3_DMA)) & (E3_SysCntxQueueSize-1)); -+ -+ if (qlen < 4) IncrStat (rail, DmaQueueLength[0]); -+ else if (qlen < 8) IncrStat (rail, DmaQueueLength[1]); -+ else if (qlen < 16) IncrStat (rail, DmaQueueLength[2]); -+ else if (qlen < 32) IncrStat (rail, DmaQueueLength[3]); -+ else if (qlen < 64) IncrStat (rail, DmaQueueLength[4]); -+ else if (qlen < 128) IncrStat (rail, DmaQueueLength[5]); -+ else if (qlen < 240) IncrStat (rail, DmaQueueLength[6]); -+ else IncrStat (rail, DmaQueueLength[7]); -+ -+ return (qlen < len); -+} -+ -+int -+IssueDma (EP3_RAIL *rail, E3_DMA_BE * dmabe, int type, int retryThread) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RETRY_DMA *retry; -+ EP3_DMA_RING *ring; -+ int slot; -+ int i, res; -+ unsigned long flags; -+ -+ ASSERT (dmabe->s.dma_direction == DMA_WRITE || dmabe->s.dma_direction == DMA_READ_REQUEUE); -+ -+ ASSERT (! EP_VP_ISDATA(dmabe->s.dma_destVProc) || -+ (dmabe->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dmabe->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dmabe->s.dma_destVProc) == rail->Generic.Position.pos_nodeid)); -+ -+ /* -+ * If we're not the retry thread - then don't issue this DMA -+ * if there are any already queued on the retry lists with -+ * higher or equal priority than this one that are ready to -+ * retry. -+ */ -+ if (! retryThread) -+ { -+ for (i = EP_RETRY_BASE; i < type; i++) -+ { -+ if (list_empty (&rail->DmaRetries[i])) -+ continue; -+ -+ retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ if (AFTER (lbolt, retry->RetryTime)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ } -+ } -+ -+ /* -+ * Depending on the type of DMA we're issuing - throttle back -+ * issueing of it if the DMA run queue is too full. This then -+ * prioritises the "special" messages and completing data -+ * transfers which have matched a receive buffer. -+ */ -+ -+ if (type >= EP_RETRY_LOW_PRI_RETRY) -+ { -+ if (! DmaRunQueueSizeCheck (rail, E3_SysCntxQueueSize / 2)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ ring = &rail->DmaRings[EP3_RING_LOW_PRI]; -+ } -+ else if (type == EP_RETRY_LOW_PRI) -+ { -+ if (! DmaRunQueueSizeCheck (rail, E3_SysCntxQueueSize / 3)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ ring = &rail->DmaRings[EP3_RING_LOW_PRI]; -+ } -+ else if (type >= EP_RETRY_HIGH_PRI) -+ ring = &rail->DmaRings[EP3_RING_HIGH_PRI]; -+ else -+ ring = &rail->DmaRings[EP3_RING_CRITICAL]; -+ -+ local_irq_save (flags); -+ if (! spin_trylock (&dev->CProcLock)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ if ((slot = DmaRingNextSlot (ring)) == -1) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ EPRINTF4 (DBG_COMMAND, "IssueDma: type %08x size %08x Elan source %08x Elan dest %08x\n", -+ dmabe->s.dma_type, dmabe->s.dma_size, dmabe->s.dma_source, dmabe->s.dma_dest); -+ EPRINTF2 (DBG_COMMAND, " dst event %08x cookie/proc %08x\n", -+ dmabe->s.dma_destEvent, dmabe->s.dma_destCookieVProc); -+ EPRINTF2 (DBG_COMMAND, " src event %08x cookie/proc %08x\n", -+ dmabe->s.dma_srcEvent, dmabe->s.dma_srcCookieVProc); -+ -+ elan3_sdram_copyq_to_sdram (dev, dmabe, DMA_RING_DMA(ring, slot), sizeof (E3_DMA)); /* PCI write block */ -+ elan3_sdram_writel (dev, DMA_RING_EVENT(ring, slot) + offsetof (E3_BlockCopyEvent, ev_Count), 1); /* PCI write */ -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (DMA_RING_EVENT_ELAN(ring,slot), ring->CommandPort + offsetof (E3_CommandPort, SetEvent)); -+ mmiob(); /* and flush through IO writes */ -+ -+ res = ISSUE_COMMAND_OK; -+ } -+ spin_unlock (&dev->CProcLock); -+ } -+ local_irq_restore (flags); -+ -+ return (res); -+} -+ -+int -+IssueWaitevent (EP3_RAIL *rail, E3_Addr value) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (rail->CommandPortEventTrap == FALSE); -+ -+ /* -+ * Disable the command processor interrupts, so that we don't see -+ * spurious interrupts appearing. -+ */ -+ DISABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, rail->CommandPort + offsetof (E3_CommandPort, WaitEvent0)); -+ mmiob(); /* and flush through IO writes */ -+ -+ do { -+ res = CheckCommandQueueFlushed (rail->Ctxt, EventComQueueNotEmpty, ISSUE_COMMAND_CANT_WAIT, &flags); -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: CheckCommandQueueFlushed -> %d\n", res); -+ -+ if (res == ISSUE_COMMAND_WAIT) -+ HandleCProcTrap (dev, 0, NULL); -+ } while (res != ISSUE_COMMAND_OK); -+ -+ if (! rail->CommandPortEventTrap) -+ res = ISSUE_COMMAND_OK; -+ else -+ { -+ rail->CommandPortEventTrap = FALSE; -+ res = ISSUE_COMMAND_TRAPPED; -+ } -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: -> %d\n", res); -+ -+ /* -+ * Re-enable the command processor interrupt as we've finished -+ * polling it. -+ */ -+ ENABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+IssueSetevent (EP3_RAIL *rail, E3_Addr value) -+{ -+ EPRINTF1 (DBG_COMMAND, "IssueSetevent: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, rail->CommandPort + offsetof (E3_CommandPort, SetEvent)); -+ mmiob(); /* and flush through IO writes */ -+} -+ -+void -+IssueRunThread (EP3_RAIL *rail, E3_Addr value) -+{ -+ EPRINTF1 (DBG_COMMAND, "IssueRunThread: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, rail->CommandPort + offsetof (E3_CommandPort, RunThread)); -+ mmiob(); /* and flush through IO writes */ -+} -+ -+/****************************************************************************************/ -+/* -+ * DMA retry list management -+ */ -+static unsigned DmaRetryTimes[EP_NUM_RETRIES]; -+ -+static void -+ep3_dma_retry (EP3_RAIL *rail) -+{ -+ EP3_COOKIE *cp; -+ int res; -+ int vp; -+ unsigned long flags; -+ int i; -+ -+ kernel_thread_init("ep3_dma_retry"); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ for (;;) -+ { -+ long yieldAt = lbolt + (hz/10); -+ long retryTime = 0; -+ -+ if (rail->DmaRetryThreadShouldStop) -+ break; -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ while (! list_empty (&rail->DmaRetries[i])) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ if (! AFTER (lbolt, retry->RetryTime)) -+ break; -+ -+ if (rail->DmaRetryThreadShouldStall || AFTER (lbolt, yieldAt)) -+ goto cant_do_more; -+ -+ EPRINTF2 (DBG_RETRY, "%s: DmaRetryThread: retry %p\n", rail->Generic.Name, retry); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", -+ rail->Generic.Name, retry->Dma.s.dma_type, retry->Dma.s.dma_size, retry->Dma.s.dma_source, retry->Dma.s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", -+ rail->Generic.Name, retry->Dma.s.dma_destEvent, retry->Dma.s.dma_destCookieVProc, -+ retry->Dma.s.dma_srcEvent, retry->Dma.s.dma_srcCookieVProc); -+#if defined(DEBUG) -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, retry->Dma.s.dma_srcEvent); -+ else -+ cp = LookupEventCookie (rail, &rail->CookieTable, retry->Dma.s.dma_destEvent); -+ -+ ASSERT (cp != NULL || (retry->Dma.s.dma_srcEvent == 0 && retry->Dma.s.dma_direction == DMA_WRITE && retry->Dma.s.dma_isRemote)); -+ -+ if (cp && cp->Operations->DmaVerify) -+ cp->Operations->DmaVerify (rail, cp->Arg, &retry->Dma); -+#endif -+ -+#if defined(DEBUG_ASSERT) -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ vp = retry->Dma.s.dma_destVProc; -+ else -+ vp = retry->Dma.s.dma_srcVProc; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || -+ (rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State >= EP_NODE_CONNECTED && -+ rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State <= EP_NODE_LOCAL_PASSIVATE)); -+#endif -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ res = IssueDma (rail, &(retry->Dma), i, TRUE); -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ if (res != ISSUE_COMMAND_OK) -+ goto cant_do_more; -+ -+ /* Command issued, so remove from list, and add to free list */ -+ list_del (&retry->Link); -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ } -+ } -+ cant_do_more: -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ if (!list_empty (&rail->DmaRetries[i])) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ retryTime = retryTime ? MIN(retryTime, retry->RetryTime) : retry->RetryTime; -+ } -+ } -+ -+ if (retryTime && !AFTER (retryTime, lbolt)) -+ retryTime = lbolt + 1; -+ -+ do { -+ EPRINTF3 (DBG_RETRY, "%s: ep_cm_retry: %s %lx\n", rail->Generic.Name, rail->DmaRetryThreadShouldStall ? "stalled" : "sleeping", retryTime); -+ -+ if (rail->DmaRetryTime == 0 || (retryTime != 0 && retryTime < rail->DmaRetryTime)) -+ rail->DmaRetryTime = retryTime; -+ -+ rail->DmaRetrySleeping = TRUE; -+ -+ if (rail->DmaRetryThreadShouldStall) /* wakeup threads waiting in StallDmaRetryThread */ -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); /* for us to really go to sleep for good. */ -+ -+ if (rail->DmaRetryTime == 0 || rail->DmaRetryThreadShouldStall) -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ else -+ kcondvar_timedwait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags, rail->DmaRetryTime); -+ -+ rail->DmaRetrySleeping = FALSE; -+ -+ } while (rail->DmaRetryThreadShouldStall); -+ -+ rail->DmaRetryTime = 0; -+ } -+ -+ rail->DmaRetryThreadStopped = 1; -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ kernel_thread_exit(); -+} -+ -+void -+StallDmaRetryThread (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryThreadShouldStall++; -+ -+ while (! rail->DmaRetrySleeping) -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+ResumeDmaRetryThread (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ ASSERT (rail->DmaRetrySleeping); -+ -+ if (--rail->DmaRetryThreadShouldStall == 0) -+ { -+ rail->DmaRetrySleeping = 0; -+ kcondvar_wakeupone (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+int -+InitialiseDmaRetries (EP3_RAIL *rail) -+{ -+ int i; -+ -+ spin_lock_init (&rail->DmaRetryLock); -+ kcondvar_init (&rail->DmaRetryWait); -+ -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ INIT_LIST_HEAD (&rail->DmaRetries[i]); -+ -+ INIT_LIST_HEAD (&rail->DmaRetryFreeList); -+ -+ DmaRetryTimes[EP_RETRY_HIGH_PRI] = EP_RETRY_HIGH_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ DmaRetryTimes[EP_RETRY_HIGH_PRI_RETRY+i] = EP_RETRY_HIGH_PRI_TIME << i; -+ -+ DmaRetryTimes[EP_RETRY_LOW_PRI] = EP_RETRY_LOW_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ DmaRetryTimes[EP_RETRY_LOW_PRI_RETRY+i] = EP_RETRY_LOW_PRI_TIME << i; -+ -+ DmaRetryTimes[EP_RETRY_ANONYMOUS] = EP_RETRY_ANONYMOUS_TIME; -+ DmaRetryTimes[EP_RETRY_NETERR] = EP_RETRY_NETERR_TIME; -+ -+ rail->DmaRetryInitialised = 1; -+ -+ if (kernel_thread_create (ep3_dma_retry, (void *) rail) == 0) -+ { -+ spin_lock_destroy (&rail->DmaRetryLock); -+ return (ENOMEM); -+ } -+ -+ rail->DmaRetryThreadStarted = 1; -+ -+ return (ESUCCESS); -+} -+ -+void -+DestroyDmaRetries (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryThreadShouldStop = 1; -+ while (rail->DmaRetryThreadStarted && !rail->DmaRetryThreadStopped) -+ { -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ } -+ rail->DmaRetryThreadStarted = 0; -+ rail->DmaRetryThreadStopped = 0; -+ rail->DmaRetryThreadShouldStop = 0; -+ rail->DmaRetryInitialised = 0; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ /* Everyone should have given back their retry dma's by now */ -+ ASSERT (rail->DmaRetryReserved == 0); -+ -+ while (! list_empty (&rail->DmaRetryFreeList)) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ KMEM_FREE (retry, sizeof (EP3_RETRY_DMA)); -+ } -+ -+ kcondvar_destroy (&rail->DmaRetryWait); -+ spin_lock_destroy (&rail->DmaRetryLock); -+} -+ -+int -+ReserveDmaRetries (EP3_RAIL *rail, int count, EP_ATTRIBUTE attr) -+{ -+ EP3_RETRY_DMA *retry; -+ int remaining = count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ if (remaining <= (rail->DmaRetryCount - rail->DmaRetryReserved)) -+ { -+ rail->DmaRetryReserved += remaining; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ return (ESUCCESS); -+ } -+ -+ remaining -= (rail->DmaRetryCount - rail->DmaRetryReserved); -+ -+ rail->DmaRetryReserved = rail->DmaRetryCount; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ while (remaining) -+ { -+ KMEM_ALLOC (retry, EP3_RETRY_DMA *, sizeof (EP3_RETRY_DMA), !(attr & EP_NO_SLEEP)); -+ -+ if (retry == NULL) -+ goto failed; -+ -+ /* clear E3_DMA */ -+ bzero((char *)(&(retry->Dma.s)), sizeof(E3_DMA)); -+ -+ remaining--; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ -+ rail->DmaRetryCount++; -+ rail->DmaRetryReserved++; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ } -+ return (ESUCCESS); -+ -+ failed: -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryReserved -= (count - remaining); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ return (ENOMEM); -+} -+ -+void -+ReleaseDmaRetries (EP3_RAIL *rail, int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryReserved -= count; -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+QueueDmaForRetry (EP3_RAIL *rail, E3_DMA_BE *dma, int interval) -+{ -+ EP3_RETRY_DMA *retry; -+ unsigned long flags; -+ -+ /* -+ * When requeueing DMAs they must never be "READ" dma's since -+ * these would fetch the DMA descriptor from the retryn descriptor -+ */ -+ ASSERT (dma->s.dma_direction == DMA_WRITE || dma->s.dma_direction == DMA_READ_REQUEUE); -+ ASSERT (dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dma->s.dma_destVProc) == rail->Generic.Position.pos_nodeid); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ EP_ASSERT (&rail->Generic, !list_empty (&rail->DmaRetryFreeList)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: QueueDmaForRetry: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n",rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, -+ dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ /* copy the DMA into the retry descriptor */ -+ retry->Dma.s.dma_type = dma->s.dma_type; -+ retry->Dma.s.dma_size = dma->s.dma_size; -+ retry->Dma.s.dma_source = dma->s.dma_source; -+ retry->Dma.s.dma_dest = dma->s.dma_dest; -+ retry->Dma.s.dma_destEvent = dma->s.dma_destEvent; -+ retry->Dma.s.dma_destCookieVProc = dma->s.dma_destCookieVProc; -+ retry->Dma.s.dma_srcEvent = dma->s.dma_srcEvent; -+ retry->Dma.s.dma_srcCookieVProc = dma->s.dma_srcCookieVProc; -+ -+ retry->RetryTime = lbolt + DmaRetryTimes[interval]; -+ -+ /* chain onto the end of the approriate retry list */ -+ list_add_tail (&retry->Link, &rail->DmaRetries[interval]); -+ -+ /* now wakeup the retry thread */ -+ if (rail->DmaRetryTime == 0 || retry->RetryTime < rail->DmaRetryTime) -+ rail->DmaRetryTime = retry->RetryTime; -+ -+ if (rail->DmaRetrySleeping && !rail->DmaRetryThreadShouldStall) -+ { -+ rail->DmaRetrySleeping = 0; -+ kcondvar_wakeupone (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ } -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+QueueDmaOnStalledList (EP3_RAIL *rail, E3_DMA_BE *dma) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) : -+ EP_VP_TO_NODE(dma->s.dma_destVProc)]; -+ EP3_RETRY_DMA *retry; -+ unsigned long flags; -+ -+ /* -+ * When requeueing DMAs they must never be "READ" dma's since -+ * these would fetch the DMA descriptor from the retryn descriptor -+ */ -+ ASSERT (dma->s.dma_direction == DMA_WRITE || dma->s.dma_direction == DMA_READ_REQUEUE); -+ ASSERT (dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dma->s.dma_destVProc) == rail->Generic.Position.pos_nodeid); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ EP_ASSERT (&rail->Generic, !list_empty (&rail->DmaRetryFreeList)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: QueueDmaOnStalledList: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, -+ dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ /* copy the DMA into the retry descriptor */ -+ retry->Dma.s.dma_type = dma->s.dma_type; -+ retry->Dma.s.dma_size = dma->s.dma_size; -+ retry->Dma.s.dma_source = dma->s.dma_source; -+ retry->Dma.s.dma_dest = dma->s.dma_dest; -+ retry->Dma.s.dma_destEvent = dma->s.dma_destEvent; -+ retry->Dma.s.dma_destCookieVProc = dma->s.dma_destCookieVProc; -+ retry->Dma.s.dma_srcEvent = dma->s.dma_srcEvent; -+ retry->Dma.s.dma_srcCookieVProc = dma->s.dma_srcCookieVProc; -+ -+ /* chain onto the node cancelled dma list */ -+ list_add_tail (&retry->Link, &nodeRail->StalledDmas); -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+FreeStalledDmas (EP3_RAIL *rail, unsigned int nodeId) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[nodeId]; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ list_del (el); -+ list_add (el, &rail->DmaRetryFreeList); -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+/****************************************************************************************/ -+/* -+ * Connection management. -+ */ -+static void -+DiscardingHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&rail->HaltOpSleep, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+typedef struct { -+ EP3_RAIL *rail; -+ sdramaddr_t qaddr; -+} SetQueueFullData; -+ -+static void -+SetQueueLockedOperation (ELAN3_DEV *dev, void *arg) -+{ -+ SetQueueFullData *data = (SetQueueFullData *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3_sdram_writel (dev, data->qaddr, E3_QUEUE_LOCKED | elan3_sdram_readl(dev, data->qaddr)); -+ -+ data->rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&data->rail->HaltOpSleep, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+static void -+FlushDmaQueuesHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) arg; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ E3_DMA_BE dma; -+ EP_NODE_RAIL *node; -+ int vp; -+ unsigned long flags; -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ EPRINTF5 (DBG_DISCON, "%s: FlushDmaQueuesHaltOperation: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma.s.dma_type, dma.s.dma_size, dma.s.dma_source, dma.s.dma_dest); -+ EPRINTF5 (DBG_DISCON, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma.s.dma_destEvent, dma.s.dma_destCookieVProc, -+ dma.s.dma_srcEvent, dma.s.dma_srcCookieVProc); -+ -+ ASSERT ((dma.s.dma_u.s.Context & SYS_CONTEXT_BIT) != 0); -+ -+ if (dma.s.dma_direction == DMA_WRITE) -+ vp = dma.s.dma_destVProc; -+ else -+ vp = dma.s.dma_srcVProc; -+ -+ node = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (node->State >= EP_NODE_CONNECTED && node->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && node->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ /* -+ * This is a DMA going to the node which is being removed, -+ * so move it onto the node dma list where it will get -+ * handled later. -+ */ -+ EPRINTF1 (DBG_DISCON, "%s: FlushDmaQueuesHaltOperation: move dma to cancelled list\n", rail->Generic.Name); -+ -+ if (dma.s.dma_direction != DMA_WRITE) -+ { -+ /* for read dma's set the DMA_READ_REQUEUE bits as the dma_source has been -+ * modified by the elan to point at the dma in the rxd where it was issued -+ * from */ -+ dma.s.dma_direction = (dma.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+ QueueDmaOnStalledList (rail, &dma); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = (SYS_CONTEXT_BIT << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destEvent = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&rail->HaltOpSleep, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+SetQueueLocked (EP3_RAIL *rail, sdramaddr_t qaddr) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ SetQueueFullData data; -+ unsigned long flags; -+ -+ /* Ensure that the context filter changes have been seen by halting -+ * then restarting the inputters - this also ensures that any setevent -+ * commands used to issue dma's have completed and any trap has been -+ * handled. */ -+ data.rail = rail; -+ data.qaddr = qaddr; -+ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DiscardingSysCntx | INT_TProcHalted, SetQueueLockedOperation, &data); -+ -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+} -+ -+void -+ep3_flush_filters (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ -+ /* Ensure that the context filter changes have been seen by halting -+ * then restarting the inputters - this also ensures that any setevent -+ * commands used to issue dma's have completed and any trap has been -+ * handled. */ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DiscardingSysCntx, DiscardingHaltOperation, rail); -+ -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+} -+ -+void -+ep3_flush_queues (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el; -+ struct list_head *nel; -+ EP_NODE_RAIL *node; -+ unsigned long flags; -+ int vp, i; -+ -+ ASSERT (NO_LOCKS_HELD); -+ -+ /* First - stall the dma retry thread, so that it will no longer -+ * restart any dma's from the rety lists. */ -+ StallDmaRetryThread (rail); -+ -+ /* Second - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queue. */ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted | INT_TProcHalted, FlushDmaQueuesHaltOperation, rail); -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+ -+ /* Third - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el, nel, &rail->DmaRetries[i]) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ vp = retry->Dma.s.dma_destVProc; -+ else -+ vp = retry->Dma.s.dma_srcVProc; -+ -+ node = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (node->State >= EP_NODE_CONNECTED && node->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && node->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF5 (DBG_DISCON, "%s: FlushDmaQueues: %08x %08x %08x %08x\n",rail->Generic.Name, -+ retry->Dma.s.dma_type, retry->Dma.s.dma_size, retry->Dma.s.dma_source, retry->Dma.s.dma_dest); -+ EPRINTF5 (DBG_DISCON, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ retry->Dma.s.dma_destEvent, retry->Dma.s.dma_destCookieVProc, -+ retry->Dma.s.dma_srcEvent, retry->Dma.s.dma_srcCookieVProc); -+ -+ list_del (&retry->Link); -+ -+ list_add_tail (&retry->Link, &node->StalledDmas); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ /* Finally - allow the dma retry thread to run again */ -+ ResumeDmaRetryThread (rail); -+} -+ -+/****************************************************************************************/ -+/* NOTE - we require that all cookies are non-zero, which is -+ * achieved because EP_VP_DATA() is non-zero for all -+ * nodes */ -+E3_uint32 -+LocalCookie (EP3_RAIL *rail, unsigned remoteNode) -+{ -+ E3_uint32 cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->CookieLock, flags); -+ cookie = DMA_COOKIE (rail->MainCookies[remoteNode], EP_VP_DATA(rail->Generic.Position.pos_nodeid)); -+ spin_unlock_irqrestore (&rail->CookieLock, flags); -+ -+ /* Main processor cookie for srcCookie - this is what is sent -+ * to the remote node along with the setevent from the put -+ * or the dma descriptor for a get */ -+ return (cookie); -+} -+ -+E3_uint32 -+RemoteCookie (EP3_RAIL *rail, u_int remoteNode) -+{ -+ uint32_t cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->CookieLock, flags); -+ cookie = DMA_REMOTE_COOKIE (rail->MainCookies[remoteNode], EP_VP_DATA(remoteNode)); -+ spin_unlock_irqrestore (&rail->CookieLock, flags); -+ -+ /* Main processor cookie for dstCookie - this is the cookie -+ * that the "remote put" dma uses for it's setevent packets for -+ * a get dma */ -+ -+ return (cookie); -+} -+ -+/****************************************************************************************/ -+/* -+ * Event Cookie management. -+ * -+ * We find the ep_cookie in one of two ways: -+ * 1) for block copy events -+ * the cookie value is stored in the ev_Source - for EVIRQ events -+ * it is also stored in the ev_Type -+ * 2) for normal events -+ * we just use the event address. -+ */ -+void -+InitialiseCookieTable (EP3_COOKIE_TABLE *table) -+{ -+ register int i; -+ -+ spin_lock_init (&table->Lock); -+ -+ for (i = 0; i < EP3_COOKIE_HASH_SIZE; i++) -+ table->Entries[i] = NULL; -+} -+ -+void -+DestroyCookieTable (EP3_COOKIE_TABLE *table) -+{ -+ register int i; -+ -+ for (i = 0; i < EP3_COOKIE_HASH_SIZE; i++) -+ if (table->Entries[i]) -+ printk ("DestroyCookieTable: entry %d not empty\n", i); -+ -+ spin_lock_destroy (&table->Lock); -+} -+ -+void -+RegisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cp, E3_uint32 cookie, EP3_COOKIE_OPS *ops, void *arg) -+{ -+ EP3_COOKIE *tcp; -+ int hashval = EP3_HASH_COOKIE(cookie); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ cp->Operations = ops; -+ cp->Arg = arg; -+ cp->Cookie = cookie; -+ -+#if defined(DEBUG) -+ /* Check that the cookie is unique */ -+ for (tcp = table->Entries[hashval]; tcp; tcp = tcp->Next) -+ if (tcp->Cookie == cookie) -+ panic ("RegisterEventCookie: non unique cookie\n"); -+#endif -+ cp->Next = table->Entries[hashval]; -+ -+ table->Entries[hashval] = cp; -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+} -+ -+void -+DeregisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cp) -+{ -+ EP3_COOKIE **predCookiep; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ for (predCookiep = &table->Entries[EP3_HASH_COOKIE (cp->Cookie)]; *predCookiep; predCookiep = &(*predCookiep)->Next) -+ { -+ if (*predCookiep == cp) -+ { -+ *predCookiep = cp->Next; -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+ -+ cp->Operations = NULL; -+ cp->Arg = NULL; -+ cp->Cookie = 0; -+ cp->Next = NULL; -+} -+ -+EP3_COOKIE * -+LookupCookie (EP3_COOKIE_TABLE *table, E3_Addr cookie) -+{ -+ EP3_COOKIE *cp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ for (cp = table->Entries[EP3_HASH_COOKIE(cookie)]; cp; cp = cp->Next) -+ if (cp->Cookie == cookie) -+ break; -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+ return (cp); -+} -+ -+EP3_COOKIE * -+LookupEventCookie (EP3_RAIL *rail, EP3_COOKIE_TABLE *table, E3_Addr eaddr) -+{ -+ sdramaddr_t event; -+ E3_uint32 type; -+ -+ if ((event = ep_elan2sdram (&rail->Generic, eaddr)) != (sdramaddr_t) 0) -+ { -+ type = elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Type)); -+ -+ if (type & EV_TYPE_BCOPY) -+ return (LookupCookie (table, elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Source)) & ~EV_WCOPY)); -+ else -+ return (LookupCookie (table, eaddr)); -+ } -+ -+ return (NULL); -+} -+ -+/****************************************************************************************/ -+/* -+ * Elan context operations - note only support interrupt ops. -+ */ -+static int ep3_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+static int ep3_dprocTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+static int ep3_tprocTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+static int ep3_iprocTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int chan); -+static int ep3_cprocTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+static int ep3_cprocReissue (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *tbuf); -+ -+static E3_uint8 ep3_load8 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+static E3_uint16 ep3_load16 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+static E3_uint32 ep3_load32 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+static E3_uint64 ep3_load64 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+ELAN3_OPS ep3_elan3_ops = -+{ -+ ELAN3_OPS_VERSION, /* Version */ -+ -+ NULL, /* Exception */ -+ NULL, /* GetWordItem */ -+ NULL, /* GetBlockItem */ -+ NULL, /* PutWordItem */ -+ NULL, /* PutBlockItem */ -+ NULL, /* PutbackItem */ -+ NULL, /* FreeWordItem */ -+ NULL, /* FreeBlockItem */ -+ NULL, /* CountItems */ -+ ep3_event, /* Event */ -+ NULL, /* SwapIn */ -+ NULL, /* SwapOut */ -+ NULL, /* FreePrivate */ -+ NULL, /* FixupNetworkError */ -+ ep3_dprocTrap, /* DProcTrap */ -+ ep3_tprocTrap, /* TProcTrap */ -+ ep3_iprocTrap, /* IProcTrap */ -+ ep3_cprocTrap, /* CProcTrap */ -+ ep3_cprocReissue, /* CProcReissue */ -+ NULL, /* StartFaultCheck */ -+ NULL, /* EndFaulCheck */ -+ ep3_load8, /* Load8 */ -+ ep3_store8, /* Store8 */ -+ ep3_load16, /* Load16 */ -+ ep3_store16, /* Store16 */ -+ ep3_load32, /* Load32 */ -+ ep3_store32, /* Store32 */ -+ ep3_load64, /* Load64 */ -+ ep3_store64, /* Store64 */ -+}; -+ -+static int -+ep3_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ EP3_COOKIE *cp = LookupCookie (&rail->CookieTable, cookie); -+ -+ if (cp == NULL) -+ { -+ printk ("ep3_event: cannot find event cookie for %x\n", cookie); -+ return (OP_HANDLED); -+ } -+ -+ if (cp->Operations->Event) -+ cp->Operations->Event(rail, cp->Arg); -+ -+ return (OP_HANDLED); -+} -+ -+/* Trap interface */ -+int -+ep3_dprocTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_COOKIE *cp; -+ E3_FaultSave_BE *FaultArea; -+ E3_uint16 vp; -+ int validTrap; -+ int numFaults; -+ int i; -+ sdramaddr_t event; -+ E3_uint32 type; -+ sdramaddr_t dma; -+ E3_DMA_BE dmabe; -+ int status = EAGAIN; -+ -+ EPRINTF4 (DBG_EPTRAP, "ep3_dprocTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ EPRINTF4 (DBG_EPTRAP, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ EPRINTF2 (DBG_EPTRAP, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ EPRINTF2 (DBG_EPTRAP, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+ ASSERT (trap->Status.s.Context & SYS_CONTEXT_BIT); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ if (trap->Desc.s.dma_direction == DMA_WRITE) -+ vp = trap->Desc.s.dma_destVProc; -+ else -+ vp = trap->Desc.s.dma_srcVProc; -+ -+ if (! trap->PacketInfo.s.PacketTimeout) -+ status = ETIMEDOUT; -+ else -+ { -+ status = EHOSTDOWN; -+ -+ /* XXXX: dma timedout - might want to "restart" tree ? */ -+ } -+ goto retry_dma; -+ -+ case MI_DmaFailCountError: -+ goto retry_dma; -+ -+ case MI_TimesliceDmaQueueOverflow: -+ IncrStat (rail, DprocDmaQueueOverflow); -+ -+ goto retry_dma; -+ -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ case MI_DequeueNonSysCntxDma: -+ case MI_DequeueSysCntxDma: -+ /* -+ * The DMA processor has trapped due to outstanding prefetches from the previous -+ * dma. The "current" dma has not been consumed, so we just ignore the trap -+ */ -+ return (OP_HANDLED); -+ -+ case MI_EventQueueOverflow: -+ IncrStat (rail, DprocEventQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, trap->Desc.s.dma_srcEvent)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof(E3_Event,ev_Type))) & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ spin_unlock (&ctxt->Device->IntrLock); -+ ep3_event (ctxt, (type & ~(EV_TYPE_MASK_EVIRQ | EV_TYPE_MASK_BCOPY)), OP_LWP); -+ spin_lock (&ctxt->Device->IntrLock); -+ } -+ return (OP_HANDLED); -+ -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, DprocDmaQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, trap->Desc.s.dma_srcEvent)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_DMA) == EV_TYPE_DMA && -+ (dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2))) != (sdramaddr_t) 0) -+ { -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ /* We only chain together DMA's of the same direction, so since -+ * we took a DmaQueueOverflow trap - this means that DMA which -+ * trapped was a WRITE dma - hence the one we chain to must also -+ * be a WRITE dma. -+ */ -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = dmabe.s.dma_destVProc; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (cp != NULL && (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE))); -+ } -+#endif -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ -+ return (OP_HANDLED); -+ } -+ -+ panic ("ep3_dprocTrap\n"); -+ return (OP_HANDLED); -+ -+ default: -+ break; -+ } -+ -+ /* If it's a dma which traps past the end of the source, then */ -+ /* just re-issue it */ -+ numFaults = validTrap = (trap->FaultSave.s.FSR.Status != 0); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ { -+ if (FaultArea->s.FSR.Status != 0) -+ { -+ numFaults++; -+ -+ /* XXXX: Rev B Elans can prefetch data past the end of the dma descriptor */ -+ /* if the fault relates to this, then just ignore it */ -+ if (FaultArea->s.FaultAddress >= (trap->Desc.s.dma_source+trap->Desc.s.dma_size)) -+ { -+ static int i; -+ if (i < 10 && i++ < 10) -+ printk ("ep3_dprocTrap: Rev B prefetch trap error %08x %08x\n", -+ FaultArea->s.FaultAddress, (trap->Desc.s.dma_source+trap->Desc.s.dma_size)); -+ continue; -+ } -+ -+ validTrap++; -+ } -+ } -+ -+ /* -+ * NOTE: for physical errors (uncorrectable ECC/PCI parity errors) the FSR will -+ * be zero - hence we will not see any faults - and none will be valid, -+ * so only ignore a Rev B prefetch trap if we've seen some faults. Otherwise -+ * we can reissue a DMA which has already sent it's remote event ! -+ */ -+ if (numFaults != 0 && validTrap == 0) -+ { -+ retry_dma: -+ if (trap->Desc.s.dma_direction == DMA_WRITE) -+ { -+ vp = trap->Desc.s.dma_destVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, trap->Desc.s.dma_srcEvent); -+ } -+ else -+ { -+ ASSERT (EP3_CONTEXT_ISDATA(trap->Desc.s.dma_queueContext) || trap->Desc.s.dma_direction == DMA_READ_REQUEUE); -+ -+ vp = trap->Desc.s.dma_srcVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, trap->Desc.s.dma_destEvent); -+ -+ /* for read dma's set the DMA_READ_REQUEUE bits as the dma_source has been -+ * modified by the elan to point at the dma in the rxd where it was issued -+ * from */ -+ trap->Desc.s.dma_direction = (trap->Desc.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+#ifdef DEBUG_ASSERT -+ { -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &trap->Desc, status); -+ else -+ { -+ ASSERT (trap->Desc.s.dma_direction == DMA_WRITE && trap->Desc.s.dma_srcEvent == 0 && trap->Desc.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &trap->Desc, EP_RETRY_ANONYMOUS); -+ } -+ -+ return (OP_HANDLED); -+ } -+ -+ printk ("ep3_dprocTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ printk (" FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ printk (" %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ printk (" type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ printk (" Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ printk (" Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+// panic ("ep3_dprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+int -+ep3_tprocTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ -+ EPRINTF6 (DBG_EPTRAP, "ep3_tprocTrap: SP=%08x PC=%08x NPC=%08x DIRTY=%08x TRAP=%08x MI=%s\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits, trap->TrapBits.Bits, MiToName (trap->mi)); -+ EPRINTF4 (DBG_EPTRAP, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ ASSERT (trap->Status.s.Context & SYS_CONTEXT_BIT); -+ -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ { -+ ASSERT (trap->TrapBits.s.OutputWasOpen == 0); -+ -+ EPRINTF0 (DBG_EPTRAP, "ep3_tprocTrap: ForcedTProcTrap\n"); -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, FALSE)); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ EPRINTF0 (DBG_EPTRAP, "ep3_tprocTrap: ThreadTimeout\n"); -+ -+ if (trap->Registers[REG_GLOBALS + (1^WordEndianFlip)] == 0) -+ RollThreadToClose (ctxt, trap, trap->TrapBits.s.PacketAckValue); -+ else -+ { -+ CompleteEnvelope (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], trap->TrapBits.s.PacketAckValue); -+ -+ RollThreadToClose (ctxt, trap, EP3_PAckStolen); -+ } -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, FALSE)); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->TrapBits.s.Unimplemented) -+ { -+ E3_uint32 instr = ELAN3_OP_LOAD32 (ctxt, trap->pc & PC_MASK); -+ -+ PRINTF1 (ctxt, DBG_EPTRAP, "ep3_tprocTrap: unimplemented instruction %08x\n", instr); -+ -+ if ((instr & OPCODE_MASK) == OPCODE_Ticc && -+ (instr & OPCODE_IMM) == OPCODE_IMM && -+ (Ticc_COND(instr) == Ticc_TA)) -+ { -+ switch (INSTR_IMM(instr)) -+ { -+ case EP3_UNIMP_TRAP_NO_DESCS: -+ StallThreadForNoDescs (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], -+ SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ case EP3_UNIMP_TRAP_PACKET_NACKED: -+ CompleteEnvelope (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], E3_PAckDiscard); -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ case EP3_UNIMP_THREAD_HALTED: -+ StallThreadForHalted (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], -+ SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ default: -+ break; -+ -+ } -+ } -+ } -+ break; -+ -+ default: -+ break; -+ } -+ -+ /* All other traps should not happen for kernel comms */ -+ printk ("ep3_tprocTrap: SP=%08x PC=%08x NPC=%08x DIRTY=%08x TRAP=%08x MI=%s\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits, -+ trap->TrapBits.Bits, MiToName (trap->mi)); -+ printk (" FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ printk (" DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ printk (" InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ printk (" OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ if (trap->DirtyBits.s.GlobalsDirty) -+ { -+ printk (" g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ printk (" g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.OutsDirty) -+ { -+ printk (" o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ printk (" o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.LocalsDirty) -+ { -+ printk (" l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ printk (" l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.InsDirty) -+ { -+ printk (" i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ printk (" i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ } -+ -+// panic ("ep3_tprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+int -+ep3_iprocTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int channel) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ EP3_COOKIE *cp; -+ sdramaddr_t event; -+ E3_uint32 type; -+ sdramaddr_t dma; -+ E3_DMA_BE dmabe; -+ -+ ASSERT (trap->Transactions[0].s.TrTypeCntx.s.Context & SYS_CONTEXT_BIT); -+ -+ /* -+ * first process the trap to determine the cause -+ */ -+ InspectIProcTrap (ctxt, trap); -+ -+ if (! trap->AckSent && trap->LockQueuePointer) /* Must be a network error in a queueing DMA */ -+ { /* packet - unlock the queue */ -+ IncrStat (rail, QueueingPacketTrap); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, FALSE); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->AckSent && trap->BadTransaction) -+ { -+ spin_unlock (&dev->IntrLock); -+ -+ /* NOTE - no network error fixup is necessary for system context -+ * messages since they are idempotent and are single packet -+ * dmas -+ */ -+ if (EP3_CONTEXT_ISDATA (trap->Transactions[0].s.TrTypeCntx.s.Context)) -+ { -+ int nodeId = EP3_CONTEXT_TO_NODE(trap->Transactions[0].s.TrTypeCntx.s.Context); -+ -+ if (trap->DmaIdentifyTransaction) -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, channel, trap->DmaIdentifyTransaction->s.TrAddr); -+ else if (trap->ThreadIdentifyTransaction) -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, channel, trap->ThreadIdentifyTransaction->s.TrAddr); -+ else -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_DMA_PACKET, channel, 0); -+ } -+ -+ spin_lock (&dev->IntrLock); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->AckSent) -+ { -+ if (trap->TrappedTransaction == NULL) -+ return (OP_HANDLED); -+ -+ while (! trap->TrappedTransaction->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ E3_IprocTrapHeader_BE *hdrp = trap->TrappedTransaction; -+ E3_IprocTrapData_BE *datap = trap->TrappedDataBuffer; -+ -+ ASSERT (hdrp->s.TrTypeCntx.s.StatusRegValid != 0); -+ -+ if ((hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0) -+ { -+ printk ("ep3_iprocTrap: WRITEBLOCK : Addr %x\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ } -+ else -+ { -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: -+ switch (GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus)) -+ { -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, IprocDmaQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, hdrp->s.TrAddr)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_DMA) == EV_TYPE_DMA && -+ (dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2))) != (sdramaddr_t) 0) -+ { -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ else -+ { -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the -+ * DMA descriptor will be read from the EP3_RETRY_DMA rather than the -+ * original DMA - this can then get reused and an incorrect DMA -+ * descriptor sent -+ * eventp->ev_Type contains the dma address with type in the lower bits -+ */ -+ -+ dmabe.s.dma_source = (type & ~EV_TYPE_MASK2); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = (dmabe.s.dma_direction == DMA_WRITE ? dmabe.s.dma_destVProc : dmabe.s.dma_srcVProc); -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ } -+ -+ printk ("ep3_iprocTrap: SETEVENT : %x - cannot find dma to restart\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ -+ case MI_EventQueueOverflow: -+ { -+ sdramaddr_t event; -+ E3_uint32 type; -+ -+ IncrStat (rail, IprocEventQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, hdrp->s.TrAddr)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ spin_unlock (&dev->IntrLock); -+ ep3_event (ctxt, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)), OP_LWP); -+ spin_lock (&dev->IntrLock); -+ -+ break; -+ } -+ -+ printk ("ep3_iprocTrap: SETEVENT : %x - cannot find event\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ } -+ -+ default: -+ printk ("ep3_iprocTrap: SETEVENT : %x MI=%x\n", hdrp->s.TrAddr, GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus)); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ } -+ break; -+ -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: -+ /* Just ignore send-discard transactions */ -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: -+ { -+ E3_DMA_BE *dmap = (E3_DMA_BE *) datap; -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) != MI_DmaQueueOverflow) -+ { -+ printk ("ep3_iprocTrap: MI=%x\n", GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus)); -+ break; -+ } -+ -+ IncrStat (rail, IprocDmaQueueOverflow); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmap->s.dma_srcEvent); -+ -+ /* modify the dma type since it will still be a "read" dma */ -+ dmap->s.dma_type = (dmap->s.dma_type & ~DMA_TYPE_READ) | DMA_TYPE_ISREMOTE; -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = dmap->s.dma_destVProc; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, dmap, EAGAIN); -+ else -+ { -+ ASSERT (dmap->s.dma_direction == DMA_WRITE && dmap->s.dma_srcEvent == 0 && dmap->s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, dmap, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ } -+ default: -+ printk ("ep3_iprocTrap: %s\n", IProcTrapString (hdrp, datap)); -+ break; -+ } -+ } -+ -+ /* -+ * We've successfully processed this transaction, so move onto the -+ * next one. -+ */ -+ trap->TrappedTransaction++; -+ trap->TrappedDataBuffer++; -+ } -+ -+ return (OP_HANDLED); -+ } -+ -+ /* Workaround WRITEBLOCK transaction executed when LOCKQUEUE transaction missed */ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) && /* a DMA packet */ -+ trap->LockQueuePointer == 0 && trap->UnlockQueuePointer && /* a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr == trap->FaultSave.s.FaultAddress) /* and missed lockqueue */ -+ { -+ printk ("ep3_iprocTrap: missed lockqueue transaction for queue %x\n", trap->UnlockQueuePointer); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->FaultSave.s.FaultContext != 0) -+ printk ("ep3_iprocTrap: pagefault at %08x in context %x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.FaultContext); -+ -+// panic ("ep3_iprocTrap: unexpected inputter trap\n"); -+ -+ return (OP_HANDLED); -+} -+ -+/* -+ * Command processor trap -+ * kernel comms should only be able to generate -+ * queue overflow traps -+ */ -+int -+ep3_cprocTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ int ctxnum = (trap->TrapBuf.r.Breg >> 16) & MAX_ROOT_CONTEXT_MASK; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_DMA_RING *ring; -+ EP3_COOKIE *cp; -+ E3_DMA_BE dmabe; -+ int vp, slot; -+ unsigned long flags; -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, CprocDmaQueueOverflow); -+ -+ /* Use the context number that the setevent was issued in, -+ * to find the appropriate dma ring, then since they are guaranteed -+ * to be issued in order, we just search backwards till we find the -+ * last one which has completed its word copy - this must be the -+ * one which had caused the DmaQueueOverflow trap ! */ -+ -+ ASSERT (ctxnum >= ELAN3_DMARING_BASE_CONTEXT_NUM && ctxnum < (ELAN3_DMARING_BASE_CONTEXT_NUM+EP3_NUM_RINGS)); -+ -+ spin_lock_irqsave (&dev->CProcLock, flags); -+ -+ ring = &rail->DmaRings[ctxnum - ELAN3_DMARING_BASE_CONTEXT_NUM]; -+ slot = DMA_RING_PREV_POS(ring, ring->Position); -+ -+ while (ring->pDoneBlk[slot] == EP3_EVENT_ACTIVE) -+ slot = DMA_RING_PREV_POS(ring, slot); -+ -+ elan3_sdram_copyq_from_sdram (rail->Device , DMA_RING_DMA(ring,slot), &dmabe, sizeof (E3_DMA)); -+ -+#if defined(DEBUG_ASSERT) -+ while (slot != DMA_RING_PREV_POS(ring, ring->Position)) -+ { -+ ASSERT (ring->pDoneBlk[slot] != EP3_EVENT_ACTIVE); -+ -+ slot = DMA_RING_PREV_POS(ring, slot); -+ } -+#endif -+ spin_unlock_irqrestore (&dev->CProcLock, flags); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction = DMA_READ_REQUEUE); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ } -+ -+#if defined(DEBUG_ASSERT) -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ vp = dmabe.s.dma_destVProc; -+ else -+ vp = dmabe.s.dma_srcVProc; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State >= EP_NODE_CONNECTED && -+ rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State <= EP_NODE_LOCAL_PASSIVATE)); -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ -+ return (OP_HANDLED); -+ -+ case MI_EventQueueOverflow: -+ ASSERT (ctxnum == ELAN3_MRF_CONTEXT_NUM); -+ -+ IncrStat (rail, CprocEventQueueOverflow); -+ -+ rail->CommandPortEventTrap = TRUE; -+ return (OP_HANDLED); -+ -+#if defined(PER_CPU_TIMEOUT) -+ case MI_SetEventReadWait: -+ if (ctxnum == ELAN3_MRF_CONTEXT_NUM && trap->FaultSave.s.EventAddress == EP_PACEMAKER_EVENTADDR) -+ { -+ HeartbeatPacemaker (rail); -+ return (OP_HANDLED); -+ } -+#endif -+ -+ default: -+ printk ("ep3_cprocTrap : Context=%x Status=%x TrapType=%x\n", ctxnum, trap->Status.Status, trap->Status.s.TrapType); -+ printk (" FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ break; -+ } -+ -+// panic ("ep3_cprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+static int -+ep3_cprocReissue (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *tbuf) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ unsigned cmdoff = (tbuf->s.ContextType >> 5) & 0xFF; -+ int ctxnum = (tbuf->s.ContextType >> 16) & MAX_ROOT_CONTEXT_MASK; -+ -+ if (ctxnum >= ELAN3_DMARING_BASE_CONTEXT_NUM && ctxnum < (ELAN3_DMARING_BASE_CONTEXT_NUM+EP3_NUM_RINGS)) -+ { -+ EP3_DMA_RING *ring = &rail->DmaRings[ctxnum - ELAN3_DMARING_BASE_CONTEXT_NUM]; -+ -+ ASSERT ((cmdoff << 2) == offsetof (E3_CommandPort, SetEvent)); /* can only be setevent commands! */ -+ ASSERT (tbuf->s.Addr >= DMA_RING_EVENT_ELAN(ring,0) && tbuf->s.Addr < DMA_RING_EVENT_ELAN(ring, ring->Entries)); -+ -+ writel (tbuf->s.Addr, ring->CommandPort + (cmdoff << 2)); -+ } -+ else -+ { -+ ASSERT (ctxnum == ELAN3_MRF_CONTEXT_NUM); -+ -+ writel (tbuf->s.Addr, ctxt->CommandPort + (cmdoff << 2)); -+ } -+ -+ return (OP_HANDLED); -+} -+ -+static E3_uint8 -+ep3_load8 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint8 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readb (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != NULL) -+ return (*ptr); -+ -+ printk ("ep3_load8: %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint8 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writeb (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store8 %08x\n", addr); -+} -+ -+static E3_uint16 -+ep3_load16 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint16 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readw (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load16 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint16 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writew (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store16 %08x\n", addr); -+} -+ -+static E3_uint32 -+ep3_load32 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint32 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readl(dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load32 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint32 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writel (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store32 %08x\n", addr); -+} -+ -+static E3_uint64 -+ep3_load64 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint64 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readq (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load64 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint64 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writeq (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store64 %08x\n", addr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/support_elan4.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/support_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/support_elan4.c 2005-06-01 23:12:54.689425424 -0400 -@@ -0,0 +1,1184 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support_elan4.c,v 1.18.2.3 2004/11/18 12:05:00 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/support_elan4.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+#include -+ -+void -+ep4_register_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp, E4_uint64 cookie, void (*callback)(EP4_RAIL *r, void *arg), void *arg) -+{ -+ unsigned long flags; -+ -+ cp->int_val = cookie; -+ cp->int_callback = callback; -+ cp->int_arg = arg; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_add_tail (&cp->int_link, &rail->r_intcookie_hash[EP4_INTCOOKIE_HASH(cookie)]); -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+} -+ -+void -+ep4_deregister_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_del (&cp->int_link); -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+} -+ -+ -+EP4_INTCOOKIE * -+ep4_lookup_intcookie (EP4_RAIL *rail, E4_uint64 cookie) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_for_each (el, &rail->r_intcookie_hash[EP4_INTCOOKIE_HASH(cookie)]) { -+ EP4_INTCOOKIE *cp = list_entry (el, EP4_INTCOOKIE, int_link); -+ -+ if (cp->int_val == cookie) -+ { -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+ return cp; -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+ return NULL; -+} -+ -+E4_uint64 -+ep4_neterr_cookie (EP4_RAIL *rail, unsigned int node) -+{ -+ E4_uint64 cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_cookie_lock, flags); -+ cookie = rail->r_cookies[node]; -+ -+ rail->r_cookies[node] += EP4_COOKIE_INC; -+ -+ spin_unlock_irqrestore (&rail->r_cookie_lock, flags); -+ -+ return cookie; -+} -+ -+void -+ep4_eproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_EPROC_TRAP trap; -+ -+ elan4_extract_eproc_trap (ctxt->ctxt_dev, status, &trap, 0); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_eproc_trap (DBG_BUFFER, 0, "ep4_eproc_trap", &trap); -+ -+ switch (EPROC_TrapType (status)) -+ { -+ case EventProcNoFault: -+ EPRINTF1 (DBG_EPTRAP, "%s: EventProcNoFault\n", rail->r_generic.Name); -+ return; -+ -+ default: -+ printk ("%s: unhandled eproc trap %d\n", rail->r_generic.Name, EPROC_TrapType (status)); -+ elan4_display_eproc_trap (DBG_CONSOLE, 0, "ep4_eproc_trap", &trap); -+ } -+} -+ -+void -+ep4_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_CPROC_TRAP trap; -+ struct list_head *el; -+ register int i; -+ -+ elan4_extract_cproc_trap (ctxt->ctxt_dev, status, &trap, cqnum); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_cproc_trap (DBG_BUFFER, 0, "ep4_cproc_trap", &trap); -+ -+ switch (CPROC_TrapType (status)) -+ { -+ case CommandProcInterruptQueueOverflow: -+ /* -+ * Try and handle a bunch of elan main interrupts -+ */ -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (elan4_cq2num (ecq->ecq_cq) == cqnum) -+ { -+ printk ("%s: defer command queue %d after trap %x\n", -+ rail->r_generic.Name, cqnum, CPROC_TrapType (status)); -+ -+ elan4_queue_mainintop (ctxt->ctxt_dev, &ecq->ecq_intop); -+ return; -+ } -+ } -+ } -+ break; -+ -+ case CommandProcDmaQueueOverflow: -+ case CommandProcThreadQueueOverflow: -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (elan4_cq2num (ecq->ecq_cq) == cqnum) -+ { -+ printk ("%s: restart command queue %d after trap %x\n", -+ rail->r_generic.Name, cqnum, CPROC_TrapType (status)); -+ -+ elan4_restartcq (ctxt->ctxt_dev, ecq->ecq_cq); -+ return; -+ } -+ } -+ } -+ break; -+ } -+ -+ printk ("%s: unhandled cproc trap %d for cqnum %d\n", rail->r_generic.Name, CPROC_TrapType (status), cqnum); -+ elan4_display_cproc_trap (DBG_CONSOLE, 0, "ep4_cproc_trap", &trap); -+} -+ -+void -+ep4_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_DPROC_TRAP trap; -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, &trap, unit); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_dproc_trap (DBG_BUFFER, 0, "ep4_dproc_trap", &trap); -+ -+ if (! DPROC_PrefetcherFault (trap.tr_status)) -+ { -+ switch (DPROC_TrapType (trap.tr_status)) -+ { -+ case DmaProcFailCountError: -+ goto retry_this_dma; -+ -+ case DmaProcPacketAckError: -+ goto retry_this_dma; -+ -+ case DmaProcQueueOverflow: -+ goto retry_this_dma; -+ } -+ } -+ -+ printk ("%s: unhandled dproc trap\n", rail->r_generic.Name); -+ elan4_display_dproc_trap (DBG_CONSOLE, 0, "ep4_dproc_trap", &trap); -+ return; -+ -+ retry_this_dma: -+ /*XXXX implement backoff .... */ -+ -+ ep4_queue_dma_retry (rail, &trap.tr_desc, EP_RETRY_LOW_PRI); -+} -+ -+void -+ep4_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_TPROC_TRAP *trap = &rail->r_tproc_trap; -+ -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, trap); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_tproc_trap (DBG_BUFFER, 0, "ep4_tproc_trap", trap); -+ -+ printk ("%s: unhandled tproc trap\n", rail->r_generic.Name); -+ elan4_display_tproc_trap (DBG_CONSOLE, 0, "ep4_tproc_trap", trap); -+} -+ -+void -+ep4_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_IPROC_TRAP *trap = &rail->r_iproc_trap; -+ -+ elan4_extract_iproc_trap (ctxt->ctxt_dev, status, trap, unit); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_iproc_trap (DBG_BUFFER, 0, "ep4_iproc_trap", trap); -+ -+ elan4_inspect_iproc_trap (trap); -+ -+ switch (IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)) -+ { -+ case InputDmaQueueOverflow: -+ ep4_queue_dma_retry (rail, (E4_DMA *) &trap->tr_dataBuffers[trap->tr_trappedTrans], EP_RETRY_LOW_PRI); -+ return; -+ -+ case InputEventEngineTrapped: -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[trap->tr_trappedTrans]; -+ sdramaddr_t inputq; -+ E4_Addr event; -+ -+ /* XXXX: flow control on the command queue which we issue to is -+ * rather difficult, we don't want to have space for an event -+ * for each possible context, nor the mechanism to hold the -+ * context filter up until the event has been executed. Given -+ * that the event engine will be restarted by this same interrupt -+ * and we're using high priority command queues, then we just use -+ * a single small command queue for this. -+ */ -+ switch (IPROC_TransactionType(hdrp->IProcStatusCntxAndTrType) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_MASK: -+ if (hdrp->TrAddr != 0) -+ ep4_set_event_cmd (rail->r_event_ecq, hdrp->TrAddr); -+ return; -+ -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: -+ if ((inputq = ep_elan2sdram (&rail->r_generic, hdrp->TrAddr)) == 0) -+ printk ("%s: TR_INPUT_Q_COMMIT at %llx is not sdram\n", rail->r_generic.Name, hdrp->TrAddr); -+ else -+ { -+ if ((event = elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq + offsetof (E4_InputQueue, q_event))) != 0) -+ ep4_set_event_cmd (rail->r_event_ecq, event); -+ return; -+ } -+ } -+ break; -+ } -+ -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ case InputCrcErrorAfterPAckOk: -+ if (! (trap->tr_flags & TR_FLAG_ACK_SENT) || (trap->tr_flags & TR_FLAG_EOP_BAD)) -+ return; -+ -+ if (EP4_CONTEXT_ISDATA (IPROC_NetworkContext (status))) -+ { -+ unsigned int nodeId = EP4_CONTEXT_TO_NODE (IPROC_NetworkContext (status)); -+ -+ if ((trap->tr_flags & (TR_FLAG_DMA_PACKET | TR_FLAG_BAD_TRANS)) || -+ ((trap->tr_flags & TR_FLAG_EOP_ERROR) && (trap->tr_identifyTrans == TR_TRANS_INVALID))) -+ { -+ printk ("%s: network error on dma packet from node %d\n", rail->r_generic.Name, nodeId); -+ -+ ep_queue_network_error (&rail->r_generic, EP4_CONTEXT_TO_NODE(IPROC_NetworkContext (status)), EP_NODE_NETERR_DMA_PACKET, unit & 1, 0); -+ return; -+ } -+ -+ if (trap->tr_flags & TR_FLAG_EOP_ERROR) -+ { -+ E4_uint64 status = trap->tr_transactions[trap->tr_identifyTrans].IProcStatusCntxAndTrType; -+ EP_NETERR_COOKIE cookie = 0; -+ -+ switch (IPROC_TransactionType (status) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT_IDENTIFY & TR_OPCODE_MASK: -+ if (IPROC_TrapValue(status) == InputNoFault) -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ else -+ cookie = trap->tr_dataBuffers[trap->tr_identifyTrans].Data[0]; -+ printk ("%s: network error on setevent <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: -+ if (IPROC_TrapValue(status) == InputNoFault) -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ else -+ cookie = trap->tr_dataBuffers[trap->tr_identifyTrans].Data[0]; -+ printk ("%s: network error on queue commit <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_MASK: -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ printk ("%s: network error on remote dma <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_IDENTIFY & TR_OPCODE_MASK: -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ printk ("%s: network error on identify <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ default: -+ panic ("%s: unknown identify transaction type %x for eop error from node %d\n", rail->r_generic.Name, -+ IPROC_TransactionType (trap->tr_transactions[trap->tr_identifyTrans].IProcStatusCntxAndTrType), nodeId); -+ break; -+ } -+ -+ ep_queue_network_error (&rail->r_generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, unit & 1, cookie); -+ } -+ } -+ return; -+ } -+ -+ printk ("%s: unhandled iproc trap\n", rail->r_generic.Name); -+ elan4_display_iproc_trap (DBG_CONSOLE, 0, "ep4_iproc_trap", trap); -+} -+ -+void -+ep4_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ EP4_INTCOOKIE *cp = ep4_lookup_intcookie (rail, cookie); -+ -+ if (cp == NULL) -+ { -+ printk ("ep4_interrupt: cannot find event cookie for %016llx\n", (long long) cookie); -+ return; -+ } -+ -+ cp->int_callback (rail, cp->int_arg); -+} -+ -+ELAN4_TRAP_OPS ep4_trap_ops = -+{ -+ ep4_eproc_trap, -+ ep4_cproc_trap, -+ ep4_dproc_trap, -+ ep4_tproc_trap, -+ ep4_iproc_trap, -+ ep4_interrupt, -+}; -+ -+void -+ep4_flush_filters (EP_RAIL *r) -+{ -+ /* nothing to do here as elan4_set_filter() flushes the context filter */ -+} -+ -+struct flush_queues_desc -+{ -+ EP4_RAIL *rail; -+ volatile int done; -+} ; -+ -+static void -+ep4_flush_queues_flushop (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ struct flush_queues_desc *desc = (struct flush_queues_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ E4_uint64 qptrs = read_reg64 (dev, DProcHighPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ unsigned long flags; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 typeSize = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_typeSize)); -+ -+ if (DMA_Context (qentry.Desc.dma_typeSize) == rail->r_ctxt.ctxt_num) -+ { -+ E4_uint64 vp = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_vproc)); -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ EP4_ASSERT (rail, !EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ /* -+ * This is a DMA going to the node which is being removed, -+ * so move it onto the node dma list where it will get -+ * handled later. -+ */ -+ qentry.Desc.dma_typeSize = typeSize; -+ qentry.Desc.dma_cookie = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_cookie)); -+ qentry.Desc.dma_vproc = vp; -+ qentry.Desc.dma_srcAddr = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_srcAddr)); -+ qentry.Desc.dma_dstAddr = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_dstAddr)); -+ qentry.Desc.dma_srcEvent = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_srcEvent)); -+ qentry.Desc.dma_dstEvent = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_dstEvent)); -+ -+ EPRINTF4 (DBG_RETRY, "ep4_flush_dmas: %016llx %016llx %016llx %016llx\n", qentry.Desc.dma_typeSize, -+ qentry.Desc.dma_cookie, qentry.Desc.dma_vproc, qentry.Desc.dma_srcAddr); -+ EPRINTF3 (DBG_RETRY, " %016llx %016llx %016llx\n", qentry.Desc.dma_dstAddr, -+ qentry.Desc.dma_srcEvent, qentry.Desc.dma_dstEvent); -+ -+ ep4_queue_dma_stalled (rail, &qentry.Desc); -+ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+static void -+ep4_flush_queues_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct flush_queues_desc *desc = (struct flush_queues_desc *) arg; -+ -+ elan4_queue_dma_flushop (dev, &desc->rail->r_flushop, 1); -+} -+ -+void -+ep4_flush_queues (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ struct flush_queues_desc desc; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ int i; -+ -+ /* initialise descriptor */ -+ desc.rail = rail; -+ desc.done = 0; -+ -+ /* First - stall the dma retry thread, so that it will no longer restart -+ * any dma's from the retry list */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ /* Second - flush through all command queues targetted by events, thread etc */ -+ ep4_flush_ecqs (rail); -+ -+ /* Third - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queues */ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DProcHalted; -+ rail->r_haltop.op_function = ep4_flush_queues_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ rail->r_flushop.op_function = ep4_flush_queues_flushop; -+ rail->r_flushop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ kmutex_unlock (&rail->r_haltop_mutex); -+ -+ /* Fourth - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el,nel, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(retry->retry_dma.dma_vproc)]; -+ -+ EP4_ASSERT (rail, nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ list_del (&retry->retry_link); -+ list_add_tail (&retry->retry_link, &nodeRail->StalledDmas); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ /* Finally - allow the retry thread to run again */ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+struct write_qdesc_desc -+{ -+ EP4_RAIL *rail; -+ sdramaddr_t qaddr; -+ E4_InputQueue *qdesc; -+ volatile int done; -+} ; -+ -+static void -+ep4_write_qdesc_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct write_qdesc_desc *desc = (struct write_qdesc_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ unsigned long flags; -+ -+ elan4_sdram_copyq_to_sdram (dev, desc->qdesc, desc->qaddr, sizeof (E4_InputQueue)); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+void -+ep4_write_qdesc (EP4_RAIL *rail, sdramaddr_t qaddr, E4_InputQueue *qdesc) -+{ -+ struct write_qdesc_desc desc; -+ unsigned long flags; -+ -+ /* initialise descriptor */ -+ desc.rail = rail; -+ desc.qaddr = qaddr; -+ desc.qdesc = qdesc; -+ desc.done = 0; -+ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DiscardingHighPri; -+ rail->r_haltop.op_function = ep4_write_qdesc_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ -+ kmutex_unlock (&rail->r_haltop_mutex); -+} -+#define CQ_SIZE_NWORDS ((CQ_Size (ecq->ecq_cq->cq_size) >> 3) - 8) /* available number of dwords (less enough to flush) */ -+EP4_ECQ * -+ep4_alloc_ecq (EP4_RAIL *rail, unsigned cqsize) -+{ -+ EP4_ECQ *ecq; -+ unsigned long pgoff; -+ -+ /* no space available, so allocate a new entry */ -+ KMEM_ZALLOC (ecq, EP4_ECQ *, sizeof (EP4_ECQ), 1); -+ -+ if (ecq == NULL) -+ return 0; -+ -+ if ((ecq->ecq_cq = elan4_alloccq (&rail->r_ctxt, cqsize, CQ_EnableAllBits, CQ_Priority)) == NULL) -+ { -+ KMEM_FREE (ecq, sizeof (EP4_ECQ)); -+ return 0; -+ } -+ -+ pgoff = (ecq->ecq_cq->cq_mapping & (PAGE_SIZE-1)); -+ -+ ecq->ecq_addr = ep_rmalloc (rail->r_ecq_rmap, PAGESIZE, 0) + pgoff; -+ ecq->ecq_avail = CQ_SIZE_NWORDS; /* available number of dwords (less enough to flush) */ -+ -+ ecq->ecq_intop.op_function = (ELAN4_HALTFN *) elan4_restartcq; -+ ecq->ecq_intop.op_arg = ecq->ecq_cq; -+ -+ ep4_ioaddr_map (&rail->r_generic, ecq->ecq_addr - pgoff, ecq->ecq_cq->cq_mapping - pgoff, PAGESIZE, EP_PERM_WRITE); -+ -+ spin_lock_init (&ecq->ecq_lock); -+ -+ return ecq; -+} -+ -+void -+ep4_free_ecq (EP4_RAIL *rail, EP4_ECQ *ecq) -+{ -+ unsigned long pgoff = (ecq->ecq_cq->cq_mapping & (PAGE_SIZE-1)); -+ -+ spin_lock_destroy (&ecq->ecq_lock); -+ -+ ep4_unmap (&rail->r_generic, ecq->ecq_addr - pgoff, PAGESIZE); -+ ep_rmfree (rail->r_ecq_rmap, PAGESIZE, ecq->ecq_addr - pgoff); -+ -+ elan4_freecq (&rail->r_ctxt, ecq->ecq_cq); -+ -+ KMEM_FREE (ecq, sizeof (EP4_ECQ)); -+} -+ -+EP4_ECQ * -+ep4_get_ecq (EP4_RAIL *rail, unsigned which, unsigned ndwords) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ EP4_ECQ *ecq; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ list_for_each (el, &rail->r_ecq_list[which]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (ecq->ecq_avail >= ndwords) -+ { -+ ecq->ecq_avail -= ndwords; -+ -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ return ecq; -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ if ((ecq = ep4_alloc_ecq (rail, EP4_ECQ_Size (which))) == NULL) -+ return NULL; -+ -+ if (which == EP4_ECQ_EVENT) -+ { -+ if ((ecq->ecq_event = ep_alloc_elan (&rail->r_generic, sizeof (E4_Event32), 0, &ecq->ecq_event_addr)) == 0) -+ { -+ ep4_free_ecq (rail, ecq); -+ return NULL; -+ } -+ -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WritePtr), -+ ecq->ecq_addr); -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WriteValue), -+ SET_EVENT_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event))); -+ -+ if ((ecq->ecq_flushcq = ep4_get_ecq (rail, EP4_ECQ_SINGLE, 1)) == NULL) -+ { -+ ep_free_elan (&rail->r_generic, ecq->ecq_event_addr, sizeof (E4_Event32)); -+ ep4_free_ecq (rail, ecq); -+ return NULL; -+ } -+ } -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ list_add (&ecq->ecq_link, &rail->r_ecq_list[which]); -+ -+ ecq->ecq_avail -= ndwords; -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ return ecq; -+} -+ -+void -+ep4_put_ecq (EP4_RAIL *rail, EP4_ECQ *ecq, unsigned ndwords) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ -+ ecq->ecq_avail += ndwords; -+ -+ if (ecq->ecq_avail != CQ_SIZE_NWORDS) -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ else -+ { -+ list_del (&ecq->ecq_link); -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ if (ecq->ecq_flushcq) -+ ep4_put_ecq (rail, ecq->ecq_flushcq, 1); -+ if (ecq->ecq_event_addr) -+ ep_free_elan (&rail->r_generic, ecq->ecq_event_addr, sizeof (E4_Event32)); -+ -+ ep4_free_ecq (rail, ecq); -+ } -+} -+ -+void -+ep4_nop_cmd (EP4_ECQ *ecq, E4_uint64 tag) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_nop_cmd (ecq->ecq_cq, tag); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+ -+} -+ -+void -+ep4_set_event_cmd (EP4_ECQ *ecq, E4_Addr event) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_set_event_cmd (ecq->ecq_cq, event); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+} -+ -+void -+ep4_wait_event_cmd (EP4_ECQ *ecq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_wait_event_cmd (ecq->ecq_cq, event, candt, param0, param1); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+} -+ -+void -+ep4_flush_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ rail->r_flush_count = 0; -+ kcondvar_wakeupone (&rail->r_flush_sleep, &rail->r_ecq_lock); -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+} -+ -+void -+ep4_flush_ecqs (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ kmutex_lock (&rail->r_flush_mutex); -+ -+ EP4_SDRAM_ASSERT (rail, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0)); -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ /* first flush all the "event" queues */ -+ list_for_each (el, &rail->r_ecq_list[EP4_ECQ_EVENT]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ ep4_set_event_cmd (ecq->ecq_flushcq, ecq->ecq_event_addr); -+ -+ rail->r_flush_count++; -+ } -+ -+ /* next issue the setevents to all the other queues */ -+ for (i = EP4_ECQ_ATOMIC; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ ep4_set_event_cmd (ecq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event)); -+ -+ rail->r_flush_count++; -+ } -+ } -+ -+ /* issue the waitevent command */ -+ ep4_wait_event_cmd (rail->r_flush_mcq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event), -+ E4_EVENT_INIT_VALUE (-32 * rail->r_flush_count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0), -+ rail->r_flush_ecq->ecq_addr, -+ INTERRUPT_CMD | (rail->r_flush_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ while (rail->r_flush_count) -+ kcondvar_wait (&rail->r_flush_sleep, &rail->r_ecq_lock, &flags); -+ -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ EP4_SDRAM_ASSERT (rail, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0)); -+ -+ kmutex_unlock (&rail->r_flush_mutex); -+} -+ -+void -+ep4_init_thread (EP4_RAIL *rail, E4_ThreadRegs *regs, sdramaddr_t stackTop, -+ EP_ADDR stackAddr, E4_Addr startpc, int nargs,...) -+{ -+ sdramaddr_t sp = stackTop - roundup (nargs * sizeof (E4_uint64), E4_STACK_ALIGN); -+ int i; -+ va_list ap; -+ -+ /* -+ * the thread start code expects the following : -+ * %r1 = stack pointer -+ * %r6 = frame pointer -+ * %r2 = function to call -+ * -+ * function args are store on stack above %sp -+ */ -+ -+ va_start(ap, nargs); -+ for (i = 0; i < nargs; i++) -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, sp + (i * sizeof (E4_uint64)), va_arg (ap, E4_uint64)); -+ va_end (ap); -+ -+ regs->Registers[0] = ep_symbol (&rail->r_threadcode, ".thread_start"); /* %r0 - PC */ -+ regs->Registers[1] = stackAddr - (stackTop - sp); /* %r1 - stack pointer */ -+ regs->Registers[2] = startpc; /* %r2 - start pc */ -+ regs->Registers[3] = 0; -+ regs->Registers[4] = 0; -+ regs->Registers[5] = 0; -+ regs->Registers[6] = stackTop; /* %r6 - frame pointer */ -+} -+ -+/* retransmission thread */ -+ -+void -+ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops) -+{ -+ ep_kthread_stall (&rail->r_retry_thread); -+ list_add_tail (&ops->op_link, &rail->r_retry_ops); -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops) -+{ -+ ep_kthread_stall (&rail->r_retry_thread); -+ list_del (&ops->op_link); -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_retry_thread (EP4_RAIL *rail) -+{ -+ struct list_head *el; -+ -+ kernel_thread_init ("ep4_retry"); -+ -+ for (;;) -+ { -+ long nextRunTime = 0; -+ -+ list_for_each (el, &rail->r_retry_ops) { -+ EP4_RETRY_OPS *ops = list_entry (el, EP4_RETRY_OPS, op_link); -+ -+ nextRunTime = ops->op_func (rail, ops->op_arg, nextRunTime); -+ } -+ -+ if (ep_kthread_sleep (&rail->r_retry_thread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_kthread_stopped (&rail->r_retry_thread); -+ -+ kernel_thread_exit(); -+} -+ -+/* DMA retransmission */ -+static unsigned ep4_dma_retry_times[EP_NUM_RETRIES]; -+ -+static unsigned long -+ep4_retry_dmas (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ unsigned long yieldAt = lbolt + (hz/10); -+ unsigned long flags; -+ int i; -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ while (! list_empty (&rail->r_dma_retrylist[i])) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_retrylist[i].next, EP4_DMA_RETRY, retry_link); -+ -+ if (! AFTER(lbolt, retry->retry_time)) -+ break; -+ -+ if (ep_kthread_should_stall (&rail->r_retry_thread) || AFTER (lbolt, yieldAt)) -+ goto cant_do_more; -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4_retry_dmas: flowcnt %llx %llx\n", rail->r_generic.Name, rail->r_dma_flowcnt, rail->r_main->r_dma_flowcnt); -+ -+ if ((rail->r_dma_flowcnt - rail->r_main->r_dma_flowcnt) > EP4_DMA_RETRY_FLOWCNT) -+ { -+ printk ("ep4_retry_dmas: flowcnt %llx %llx\n", rail->r_dma_flowcnt, rail->r_main->r_dma_flowcnt); -+ -+ goto cant_do_more; -+ } -+ -+ EPRINTF4 (DBG_RETRY, "%s: ep4_retry_dmas: %016llx %016llx %016llx\n", rail->r_generic.Name, -+ retry->retry_dma.dma_typeSize, retry->retry_dma.dma_cookie, retry->retry_dma.dma_vproc); -+ EPRINTF5 (DBG_RETRY, "%s: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ retry->retry_dma.dma_srcAddr, retry->retry_dma.dma_dstAddr, retry->retry_dma.dma_srcEvent, -+ retry->retry_dma.dma_dstEvent); -+ -+ elan4_run_dma_cmd (rail->r_dma_ecq->ecq_cq, &retry->retry_dma); -+ elan4_write_dword_cmd (rail->r_dma_ecq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_dma_flowcnt), ++rail->r_dma_flowcnt); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_del (&retry->retry_link); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ } -+ cant_do_more: -+ -+ /* re-compute the next retry time */ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ if (! list_empty (&rail->r_dma_retrylist[i])) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_retrylist[i].next, EP4_DMA_RETRY, retry_link); -+ -+ SET_NEXT_RUN_TIME (nextRunTime, retry->retry_time); -+ } -+ } -+ -+ return nextRunTime; -+} -+ -+void -+ep4_initialise_dma_retries (EP4_RAIL *rail) -+{ -+ int i; -+ -+ spin_lock_init (&rail->r_dma_lock); -+ -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ INIT_LIST_HEAD (&rail->r_dma_retrylist[i]); -+ -+ INIT_LIST_HEAD (&rail->r_dma_freelist); -+ -+ rail->r_dma_ecq = ep4_alloc_ecq (rail, EP4_DMA_RETRY_CQSIZE); -+ -+ rail->r_dma_allocated = 0; -+ rail->r_dma_reserved = 0; -+ -+ ep4_dma_retry_times[EP_RETRY_HIGH_PRI] = EP_RETRY_HIGH_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ ep4_dma_retry_times[EP_RETRY_HIGH_PRI_RETRY+i] = EP_RETRY_HIGH_PRI_TIME << i; -+ -+ ep4_dma_retry_times[EP_RETRY_LOW_PRI] = EP_RETRY_LOW_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ ep4_dma_retry_times[EP_RETRY_LOW_PRI_RETRY+i] = EP_RETRY_LOW_PRI_TIME << i; -+ -+ ep4_dma_retry_times[EP_RETRY_ANONYMOUS] = EP_RETRY_ANONYMOUS_TIME; -+ ep4_dma_retry_times[EP_RETRY_NETERR] = EP_RETRY_NETERR_TIME; -+ -+ rail->r_dma_ops.op_func = ep4_retry_dmas; -+ rail->r_dma_ops.op_arg = NULL; -+ -+ ep4_add_retry_ops (rail, &rail->r_dma_ops); -+} -+ -+void -+ep4_finalise_dma_retries (EP4_RAIL *rail) -+{ -+ ep4_remove_retry_ops (rail, &rail->r_dma_ops); -+ -+ /* Everyone should have given back their retry dma's by now */ -+ EP4_ASSERT (rail, rail->r_dma_reserved == 0); -+ -+ while (! list_empty (&rail->r_dma_freelist)) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ KMEM_FREE (retry, sizeof (EP4_DMA_RETRY)); -+ } -+ -+ ep4_free_ecq (rail, rail->r_dma_ecq); -+ -+ spin_lock_destroy (&rail->r_dma_lock); -+} -+ -+int -+ep4_reserve_dma_retries (EP4_RAIL *rail, unsigned int count, EP_ATTRIBUTE attr) -+{ -+ EP4_DMA_RETRY *retry; -+ unsigned int remaining = count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ if (remaining <= (rail->r_dma_allocated - rail->r_dma_reserved)) -+ { -+ rail->r_dma_reserved += remaining; -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ return 0; -+ } -+ -+ remaining -= (rail->r_dma_allocated - rail->r_dma_reserved); -+ -+ rail->r_dma_reserved = rail->r_dma_allocated; -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ while (remaining > 0) -+ { -+ KMEM_ALLOC (retry, EP4_DMA_RETRY *, sizeof (EP4_DMA_RETRY), !(attr & EP_NO_SLEEP)); -+ -+ if (retry == NULL) -+ goto failed; -+ -+ remaining--; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ -+ rail->r_dma_allocated++; -+ rail->r_dma_reserved++; -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ -+ return 0; -+ -+ failed: -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ rail->r_dma_reserved -= (count - remaining); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ return 1; -+} -+ -+void -+ep4_release_dma_retries (EP4_RAIL *rail, unsigned int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ rail->r_dma_reserved -= count; -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_queue_dma_retry (EP4_RAIL *rail, E4_DMA *dma, int interval) -+{ -+ EP4_DMA_RETRY *retry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ EP4_ASSERT (rail, !list_empty (&rail->r_dma_freelist)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: ep4_queue_dma_retry: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ dma->dma_typeSize, dma->dma_cookie, dma->dma_vproc, dma->dma_srcAddr); -+ EPRINTF5 (DBG_RETRY, "%s: %016llx %016llx %016llx (%d)\n", rail->r_generic.Name, -+ dma->dma_dstAddr, dma->dma_srcEvent, dma->dma_dstEvent, interval); -+ -+ retry->retry_dma.dma_typeSize = dma->dma_typeSize; -+ retry->retry_dma.dma_cookie = dma->dma_cookie; -+ retry->retry_dma.dma_vproc = dma->dma_vproc; -+ retry->retry_dma.dma_srcAddr = dma->dma_srcAddr; -+ retry->retry_dma.dma_dstAddr = dma->dma_dstAddr; -+ retry->retry_dma.dma_srcEvent = dma->dma_srcEvent; -+ retry->retry_dma.dma_dstEvent = dma->dma_dstEvent; -+ -+ retry->retry_time = lbolt + ep4_dma_retry_times[interval]; -+ -+ /* chain onto the end of the approriate retry list */ -+ list_add_tail (&retry->retry_link, &rail->r_dma_retrylist[interval]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, retry->retry_time); -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_queue_dma_stalled (EP4_RAIL *rail, E4_DMA *dma) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(dma->dma_vproc)]; -+ EP4_DMA_RETRY *retry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ EP4_ASSERT (rail, !list_empty (&rail->r_dma_freelist)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: ep4_queue_dma_stalled: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ dma->dma_typeSize, dma->dma_cookie, dma->dma_vproc, dma->dma_srcAddr); -+ EPRINTF4 (DBG_RETRY, "%s: %016llx %016llx %016llx\n", rail->r_generic.Name, -+ dma->dma_dstAddr, dma->dma_srcEvent, dma->dma_dstEvent); -+ -+ retry->retry_dma.dma_typeSize = dma->dma_typeSize; -+ retry->retry_dma.dma_cookie = dma->dma_cookie; -+ retry->retry_dma.dma_vproc = dma->dma_vproc; -+ retry->retry_dma.dma_srcAddr = dma->dma_srcAddr; -+ retry->retry_dma.dma_dstAddr = dma->dma_dstAddr; -+ retry->retry_dma.dma_srcEvent = dma->dma_srcEvent; -+ retry->retry_dma.dma_dstEvent = dma->dma_dstEvent; -+ -+ /* chain onto the node cancelled dma list */ -+ list_add_tail (&retry->retry_link, &nodeRail->StalledDmas); -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_free_stalled_dmas (EP4_RAIL *rail, unsigned int nodeId) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[nodeId]; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ list_del (el); -+ list_add (el, &rail->r_dma_freelist); -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_display_rail (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ register int i; -+ unsigned long flags; -+ -+ ep_debugf (DBG_DEBUG, "%s: vendorid=%x deviceid=%x\n", rail->r_generic.Name, -+ rail->r_generic.Devinfo.dev_vendor_id, rail->r_generic.Devinfo.dev_device_id); -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ for (i = 0; i < EP4_NUM_ECQ; i++) -+ { -+ list_for_each (el, &rail->r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (i == EP4_ECQ_EVENT) -+ ep_debugf (DBG_DEBUG, " ECQ[%d] ecq=%p cqnum=%d addr=%llx avail=%d event=%llx,%llx,%llx\n", -+ i, ecq, elan4_cq2num (ecq->ecq_cq), ecq->ecq_addr, ecq->ecq_avail, -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType)), -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WriteValue)), -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WritePtr))); -+ -+ else -+ ep_debugf (DBG_DEBUG, " ECQ[%d] ecq=%p cqnum=%d addr=%llx avail=%d\n", -+ i, ecq, elan4_cq2num (ecq->ecq_cq), ecq->ecq_addr, ecq->ecq_avail); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ ep_debugf (DBG_DEBUG, " flush count=%ld mcq=%p ecq=%p event %llx.%llx.%llx\n", -+ rail->r_flush_count, rail->r_flush_mcq, rail->r_flush_ecq, -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_CountAndType)), -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_WritePtr)), -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_WriteValue))); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each (el, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ ep_debugf (DBG_DEBUG, " RETRY[%d] typeSize %llx cookie %llx vproc %llx events %llx %llx\n", -+ i, retry->retry_dma.dma_typeSize, retry->retry_dma.dma_cookie, -+ retry->retry_dma.dma_vproc, retry->retry_dma.dma_srcEvent, retry->retry_dma.dma_dstEvent); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -Index: linux-2.4.21/drivers/net/qsnet/ep/threadcode.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/threadcode.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/threadcode.c 2005-06-01 23:12:54.689425424 -0400 -@@ -0,0 +1,146 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: threadcode.c,v 1.11 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/threadcode.c,v $ */ -+ -+#include -+ -+#include -+ -+EP_ADDR -+ep_symbol (EP_CODE *code, char *name) -+{ -+ EP_SYMBOL *s = code->symbols; -+ -+ while (s->name && strcmp (s->name, name)) -+ s++; -+ -+ return (s->name ? s->value : (EP_ADDR) 0); -+} -+ -+int -+ep_loadcode (EP_RAIL *rail, EP_CODE *code) -+{ -+ register int i; -+ -+ EP_ADDR _stext = ep_symbol (code, "_stext"); -+ EP_ADDR _etext = ep_symbol (code, "_etext"); -+ EP_ADDR _sdata = ep_symbol (code, "_sdata"); -+ EP_ADDR _edata = ep_symbol (code, "_edata"); -+ EP_ADDR _end = ep_symbol (code, "_end"); -+ EP_ADDR _rodata = roundup (_etext, sizeof (uint64_t)); -+ -+ if (_stext == (EP_ADDR) 0 || _etext == (EP_ADDR) 0 || -+ _sdata == (EP_ADDR) 0 || _edata == (EP_ADDR) 0 || -+ _end == (EP_ADDR) 0) -+ { -+ printk ("ep_loadcode: symbols not defined correctly for code at %p\n", code); -+ return (EINVAL); -+ } -+ -+ /* -+ * Include the rodata in the text segment -+ */ -+ _etext = _rodata + code->rodata_size; -+ -+ /* -+ * If _etext is in the same page as _sdata, then allocate a contiguous -+ * chunk of memory and map it as read/write. otherwise allocate two chunks -+ * and map the code in as read-only. -+ */ -+ if ((_etext & PAGEMASK) == (_sdata & PAGEMASK)) -+ { -+ code->ntext = btopr (_end - (_stext & PAGEMASK)); -+ code->pptext = ep_alloc_memory_elan (rail, _stext & PAGEMASK, ptob (code->ntext), EP_PERM_EXECUTE, 0); -+ -+ if (code->pptext == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ code->_stext = code->pptext + (_stext & PAGEOFFSET); -+ code->_rodata = code->_stext + (_rodata - _stext); -+ code->_sdata = code->_stext + (_sdata - _stext); -+ } -+ else -+ { -+ code->ntext = btopr (_etext - (_stext & PAGEMASK)); -+ code->ndata = btopr (_end - (_sdata & PAGEMASK)); -+ -+ if (code->ntext) -+ { -+ code->pptext = ep_alloc_memory_elan (rail, _stext & PAGEMASK, ptob (code->ntext), EP_PERM_EXECUTE, 0); -+ -+ if (code->pptext == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ code->_stext = code->pptext + (_stext & PAGEOFFSET); -+ code->_rodata = code->_stext + (_rodata - _stext); -+ } -+ -+ if (code->ndata) -+ { -+ code->ppdata = ep_alloc_memory_elan (rail, _sdata & PAGEMASK, ptob (code->ndata), EP_PERM_WRITE, 0); -+ -+ if (code->ppdata == (sdramaddr_t) 0) -+ { -+ if (code->ntext) ep_free_memory_elan (rail, _sdata & PAGEMASK); -+ code->ntext = 0; -+ -+ return (ENOMEM); -+ } -+ -+ code->_sdata = code->ppdata + (_sdata & PAGEOFFSET); -+ } -+ } -+ -+#ifdef __LITTLE_ENDIAN__ -+# define Flip 3 -+#else -+# define Flip 0 -+#endif -+ -+ /* -+ * Now copy the text and rodata into the SDRAM -+ * this is linked into the module to be byte -+ * copied to the SDRAM, since we want to copy -+ * with word accesses we have to do the byte -+ * assembly correctly. -+ */ -+ for (i = 0; i < code->text_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_stext + i, code->text[i^Flip]); -+ -+ for (i = 0; i < code->rodata_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_rodata + i, code->rodata[i^Flip]); -+ -+ /* -+ * And the initialised data segment. -+ */ -+ for (i = 0; i < code->data_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_sdata + i, code->data[i^Flip]); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_unloadcode (EP_RAIL *rail, EP_CODE *code) -+{ -+ EP_ADDR _stext = ep_symbol (code, "_stext"); -+ EP_ADDR _sdata = ep_symbol (code, "_sdata"); -+ -+ if (code->pptext) -+ ep_free_memory_elan (rail, _stext & PAGEMASK); -+ if (code->ppdata) -+ ep_free_memory_elan (rail, _sdata & PAGEMASK); -+ code->pptext = code->ppdata = 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/threadcode_elan3.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/threadcode_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/threadcode_elan3.c 2005-06-01 23:12:54.690425272 -0400 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: threadcode_elan3.c,v 1.11 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/threadcode_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+#include -+ -+E3_Addr -+ep3_init_thread (ELAN3_DEV *dev, -+ E3_Addr fn, /* Elan address of function */ -+ E3_Addr addr, /* Elan address of stack */ -+ sdramaddr_t stack, /* sdram address of stack */ -+ int stackSize, /* stack size (in bytes) */ -+ int nargs, -+ ...) -+{ -+ sdramaddr_t frame; -+ sdramaddr_t regs; -+ sdramaddr_t argsp; -+ int i; -+ va_list ap; -+ -+ /* -+ * Align the stack pointer at the top of the stack and leave space for a stack frame -+ */ -+ stack = ((stack + stackSize) & ~(E3_STACK_ALIGN-1)) - sizeof (E3_Frame); -+ addr = ((addr + stackSize) & ~(E3_STACK_ALIGN-1)) - sizeof (E3_Frame); -+ -+ va_start (ap, nargs); -+ -+ if (nargs > 6) -+ { -+ stack -= (((nargs*sizeof (E3_uint32))+E3_STACK_ALIGN-1) & ~(E3_STACK_ALIGN-1)); -+ addr -= (((nargs*sizeof (E3_uint32))+E3_STACK_ALIGN-1) & ~(E3_STACK_ALIGN-1)); -+ } -+ -+ frame = stack; -+ regs = stack - sizeof (E3_OutsRegs); -+ -+ /* -+ * Initialise the registers, and stack frame. -+ */ -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[6]), fn); -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[7]), 0); -+ -+ if (nargs <= 6) -+ { -+ for (i = 0; i < nargs; i++) -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[i]), va_arg (ap, E3_uint32)); -+ } -+ else -+ { -+ for (i = 0; i < 6; i++) -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[i]), va_arg (ap, E3_uint32)); -+ -+ for (argsp = frame + offsetof (E3_Frame, fr_argx[0]); i < nargs; i++, argsp += sizeof (E3_uint32)) -+ elan3_sdram_writel (dev, argsp, va_arg (ap, int)); -+ } -+ -+ elan3_sdram_writel (dev, frame + offsetof (E3_Frame, fr_savefp), 0); -+ elan3_sdram_writel (dev, frame + offsetof (E3_Frame, fr_savepc), 0); -+ -+ va_end (ap); -+ -+ return (addr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/ep/threadcode_elan3_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/threadcode_elan3_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/threadcode_elan3_Linux.c 2005-06-01 23:12:54.690425272 -0400 -@@ -0,0 +1,112 @@ -+/* --------------------------------------------------------*/ -+/* MACHINE GENERATED ELAN CODE */ -+#include -+#include -+#include "kcomm_elan3.h" -+static uint32_t threadcode_elan3_text[] = { -+0x80a0239c, 0x00001082, 0x00e0a280, 0x47008002, 0x0020a380, 0x20600288, 0x20200286, 0x43008002, -+0x00000001, 0x0a006081, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0xa800c613, 0xa300c609, 0x0020108a, 0x0080900b, 0x00006885, 0x0580a080, -+0x06008002, 0x02a0a080, 0x06008022, 0xffff0296, 0x04008010, 0xff3f0398, 0x1f008010, 0x00201090, -+0x00007081, 0x1600801c, 0x00000001, 0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, 0x40a0a3e0, 0x00c03f3f, -+0xf8e017be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, 0x0000a081, 0x06008010, 0x40a083e0, -+0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, 0x00a083c0, 0x60a0039c, 0x00e0a280, -+0xbfffbf12, 0x0020a380, 0x03008012, 0x02201090, 0x03201090, 0x08e0c381, 0x80a0039c, 0xe0a0239c, -+0x60a023de, 0x80a0a3e0, 0xa0a0a3f0, 0x080010b8, 0x090010b0, 0x0a0010b2, 0x04000037, 0x402006b4, -+0x50200690, 0x01201092, 0x20a0239c, 0x00a0a3f0, 0x00c03f3f, 0x8ce117be, 0x04e08f80, 0x06008012, -+0x00000001, 0x00c01ff8, 0x0000b081, 0x06008010, 0x00a083f0, 0x14e007be, 0x00c01ff8, 0x0000b081, -+0x00a083f0, 0x20a0039c, 0x582006d0, 0x0020a280, 0x05008002, 0x0900a280, 0x10008002, 0x50200690, -+0xeaffbf30, 0x5c2006d4, 0x18001090, 0x19001092, 0x1b800294, 0x0a201096, 0x8affff7f, 0x05201098, -+0x446026d0, 0x302027f4, 0xdfffbf10, 0x50200690, 0xfdffbf10, 0x446026c0, 0x5c2006e0, 0x0020a480, -+0xf9ffbf06, 0x18001090, 0x19001092, 0x1b000494, 0x14201096, 0x7bffff7f, 0x0a201098, 0x0020a280, -+0xf4ffbf22, 0x486026e0, 0x00007081, 0x1600801c, 0x00000001, 0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, -+0x40a0a3e0, 0x00c03f3f, 0x60e217be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, 0x0000a081, -+0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, 0x00a083c0, -+0x60a0039c, 0xff3f84a0, 0xe0ffbf1c, 0x18001090, 0xd5ffbf30, 0x60a003de, 0x80a083e0, 0xa0a083f0, -+0x08e0c381, 0xe0a0039c, 0x00a1239c, 0x60a023de, 0x80a0a3e0, 0xa0a0a3f0, 0x44a123d0, 0x090010b0, -+0x0a0010b6, 0x0b0010b8, 0x0c0010b4, 0x012010ba, 0xdca023fa, 0x142007d2, 0x082007d0, 0x084002b2, -+0x000027c0, 0xf42006d0, 0x0020a280, 0x15008032, 0xf42006d0, 0x18200790, 0xdca003d2, 0x20a0239c, -+0x00a0a3f0, 0x00c03f3f, 0x20e317be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ff8, 0x0000b081, -+0x06008010, 0x00a083f0, 0x14e007be, 0x00c01ff8, 0x0000b081, 0x00a083f0, 0x20a0039c, 0xf42006d0, -+0x0020a280, 0x0a008022, 0xdca023c0, 0x042007d0, 0x0840a680, 0x06008032, 0xdca023c0, 0x18001082, -+0x0220d091, 0xe1ffbf10, 0xf42006d0, 0x06008010, 0x190010a2, 0x042006d0, 0x00c026d0, 0x18001082, -+0x0020d091, 0x042006d0, 0x01200290, 0x042026d0, 0x000006d0, 0x0020a280, 0x04008002, 0x18001090, -+0x4f010040, 0x1b001092, 0xf02006e0, 0x0020a480, 0xf1ffbf02, 0x40b03611, 0x004004d2, 0x01201290, -+0x0840a280, 0x0e018012, 0x10001096, 0x046004d0, 0x01208a80, 0x33008002, 0xa0200484, 0x0c2610ba, -+0x000024fa, 0x00211090, 0x042024d0, 0x246004d0, 0x80200290, 0x082024d0, 0xec2004d0, 0x00210290, -+0x0c2024d0, 0x102024c4, 0x186004d2, 0x02602a93, 0x098006d0, 0x0001003b, 0x1d000290, 0x098026d0, -+0xc0ff3f3b, 0x1d000a90, 0x44a103fa, 0x606007d2, 0x00680292, 0x09001290, 0x4000003b, 0x1d001290, -+0x142024d0, 0x206004d0, 0x10210290, 0x182024d0, 0x186004d0, 0x02202a91, 0x088006d2, 0x0001003b, -+0x1d400292, 0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x186004d0, 0x00280290, 0x80000015, 0x0a001290, -+0x08401292, 0x4000003b, 0x1d401292, 0x1c2024d2, 0x01201090, 0xa02024d0, 0x20200496, 0xa8200484, -+0x306004d0, 0x0020a280, 0x2b008012, 0x00201098, 0x0c2610ba, 0x00c022fa, 0x04e022c0, 0xc0200490, -+0x10e022d0, 0x186004d2, 0x02602a93, 0x098006d0, 0x0001003b, 0x1d000290, 0x098026d0, 0xc0ff3f3b, -+0x1d000a90, 0x44a103fa, 0x606007d2, 0x00680292, 0x09001290, 0x4000003b, 0x1d001290, 0x14e022d0, -+0x206004d0, 0x10210290, 0x18e022d0, 0x186004d0, 0x02202a91, 0x088006d2, 0x0001003b, 0x1d400292, -+0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x186004d0, 0x00280290, 0x80000015, 0x0a001290, 0x08401292, -+0x4000003b, 0x1d401292, 0x1ce022d2, 0x4f008010, 0x0020109a, 0x0c00109a, 0x306004d0, 0x0840a380, -+0x3b00801a, 0xe02004c6, 0x0c2610ba, 0x00c022fa, 0x01202b91, 0x0c000290, 0x02202a91, 0x08400490, -+0x382002d2, 0x04e022d2, 0x342002d0, 0x08e022d0, 0x0ce022c6, 0x10e022c4, 0x186004d0, 0x02202a91, -+0x088006d2, 0x0001003b, 0x1d400292, 0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x44a103fa, 0x606007d0, -+0x00280290, 0x08401292, 0x4000003b, 0x1d401292, 0x14e022d2, 0x206004d0, 0x10210290, 0x18e022d0, -+0x186004d0, 0x02202a91, 0x088006d4, 0x0001003b, 0x1d800294, 0x088026d4, 0xc0ff3f3b, 0x1d800a94, -+0x186004d0, 0x00280290, 0x80000013, 0x09001290, 0x08801294, 0x4000003b, 0x1d801294, 0x1ce022d4, -+0x01201090, 0x008020d0, 0x04e002d0, 0x08c00086, 0x0840039a, 0x01200398, 0x20e00296, 0x306004d0, -+0x0800a380, 0xc9ffbf0a, 0x08a00084, 0xc0200490, 0xf0ff22d0, 0xe42004d0, 0x0d00a280, 0x0b00801a, -+0x00201098, 0x04008010, 0x10001096, 0x01200398, 0x20e00296, 0x306004d0, 0x0800a380, 0xfcffbf2a, -+0x04e022c0, 0xfc3f109a, 0xe42024da, 0x10001082, 0x186004d0, 0x00280290, 0x08006081, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00201098, -+0x0c00109a, 0x142004fa, 0xec00823b, 0x3080d61b, 0x00006891, 0x0420a280, 0x3b008002, 0x0c00a280, -+0x04008002, 0x00000001, 0x0120d091, 0x36008030, 0x7c2006d0, 0x01200290, 0x7c2026d0, 0x782006d0, -+0x0020a280, 0x04008002, 0x78200690, 0x64000040, 0x40e00692, 0xf02004d0, 0x0020a280, 0x03008012, -+0xf02026d0, 0x80e026c0, 0x7c2006d0, 0x40e026d0, 0x046004d0, 0x04208a80, 0x13008002, 0x1100108a, -+0xec2004cc, 0x3fa00b8e, 0x40e0018e, 0x0780239c, 0x0080bbe0, 0x006099e0, 0x00a0b9e0, 0x406099e0, -+0x40a0b9e0, 0x806099e0, 0x80a0b9e0, 0xc06099e0, 0xc0a0b9e0, 0x00809be0, 0x0780039c, 0x0e008010, -+0xec2004d2, 0xec2004cc, 0x3fa00b8e, 0x40e0018e, 0x0780239c, 0x0080bbe0, 0x006099e0, 0x00a0b9e0, -+0x406099e0, 0x40a0b9e0, 0x00809be0, 0x0780039c, 0xec2004d2, 0xe42004d0, 0x886222d0, 0x042006d0, -+0x00c026d0, 0x000007d0, 0x01208a80, 0x05008012, 0x00000001, 0x142027f2, 0x06008010, 0xdca003fa, -+0x142027f2, 0xfe3f0a90, 0x000027d0, 0xdca003fa, 0x016007ba, 0xdca023fa, 0x0c2007d0, 0x0840a680, -+0x04008032, 0x082007d0, 0x03008010, 0x102007f2, 0x084006b2, 0x00007081, 0x1600801c, 0x00000001, -+0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, 0x40a0a3e0, 0x02c03f3f, 0x8ce017be, 0x04e08f80, 0x06008012, -+0x00000001, 0x00c01ffc, 0x0000a081, 0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, -+0x40a083e0, 0x20a083f0, 0x00a083c0, 0x60a0039c, 0x042007d0, 0x0840a680, 0xb3febf12, 0x190010a2, -+0x8afebf10, 0xf42006d0, 0x60a003de, 0x80a083e0, 0xa0a083f0, 0x08e0c381, 0x00a1039c, 0x80a0239c, -+0x042002c4, 0x004022c4, 0x18008030, 0x00007081, 0x16008012, 0x00000001, 0x60a0239c, 0x00a0a3c0, -+0x20a0a3f0, 0x40a0a3e0, 0x02c03f3f, 0x24e117be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, -+0x0000a081, 0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, -+0x00a083c0, 0x60a0039c, 0x000002c4, 0x00a0a080, 0xe7ffbf12, 0x00000001, 0x042002c4, 0x01a00084, -+0x042022c4, 0x000002c4, 0x00a0a080, 0xddffbf12, 0x00000001, 0x08e0c381, 0x80a0039c, }; -+#define threadcode_elan3_text_size 0x97c -+static uint32_t threadcode_elan3_data[] = { -+0}; -+#define threadcode_elan3_data_size 0x0 -+static uint32_t threadcode_elan3_rodata[] = { -+0}; -+#define threadcode_elan3_rodata_size 0x0 -+static EP_SYMBOL threadcode_elan3_symbols[] = { -+ {"__bss_start", 0xff00297c}, -+ {"_edata", 0xff00297c}, -+ {"_end", 0xff002988}, -+ {"_etext", 0xff00097c}, -+ {"_sdata", 0xff00297c}, -+ {"_stext", 0xff000000}, -+ {"ep3_spinblock", 0xff0008dc}, -+ {"ep3comms_rcvr", 0xff0002a8}, -+ {"kcomm_probe", 0xff00013c}, -+ {"r", 0xff00297c}, -+ {"rail", 0xff002984}, -+ {"rm", 0xff002980}, -+ {0, 0}}; -+EP_CODE threadcode_elan3 = { -+ (unsigned char *) threadcode_elan3_text, -+ threadcode_elan3_text_size, -+ (unsigned char *) threadcode_elan3_data, -+ threadcode_elan3_data_size, -+ (unsigned char *) threadcode_elan3_rodata, -+ threadcode_elan3_rodata_size, -+ threadcode_elan3_symbols, -+}; -Index: linux-2.4.21/drivers/net/qsnet/ep/threadcode_elan4_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/ep/threadcode_elan4_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/ep/threadcode_elan4_Linux.c 2005-06-01 23:12:54.691425120 -0400 -@@ -0,0 +1,112 @@ -+/* --------------------------------------------------------*/ -+/* MACHINE GENERATED ELAN CODE */ -+#include -+#include -+#include "kcomm_elan4.h" -+static uint32_t threadcode_elan4_text[] = { -+0x00a00087, 0xc04060cb, 0x00003080, 0x80001080, 0x02606180, 0x02004032, 0x807f60cb, 0x04606180, -+0x02004032, 0x407f60d3, 0x08606180, 0x02004032, 0x007f60db, 0x10606180, 0x02004032, 0xc07e60e3, -+0x20606180, 0x02004032, 0x807e60eb, 0x40606180, 0x02004032, 0x407e60f3, 0x80606180, 0x02004032, -+0x007e60fb, 0x40001180, 0xc3801080, 0xc07f60c3, 0x20002000, 0x20002000, 0x20002000, 0x20002000, -+0x407f8001, 0x4060c0c7, 0x4860c0d0, 0x5060c0d1, 0x5860c0d2, 0x6060c0d3, 0x6860c0d4, 0x00208292, -+0x00608291, 0x00a08294, 0xff3f8088, 0x1c381293, 0xc04044c8, 0x13004290, 0xc000c5d0, 0x08004030, -+0x00001088, 0x04204288, 0x0020b200, 0x04004003, 0x00208080, 0x9c010040, 0x00a08488, 0xc04044c8, -+0x20381288, 0x0020b200, 0xf6ff7f13, 0x01208408, 0x11161282, 0x804094c2, 0xc04044c8, 0x20381288, -+0x0020b200, 0xebff7f13, 0x00208080, 0x406040c7, 0x486040d0, 0x506040d1, 0x586040d2, 0x606040d3, -+0x686040d4, 0x08e00180, 0xc0608001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x807e8001, 0x4060c0c7, 0x4860c0d0, 0x5060c0d1, 0x5860c0d2, 0x6060c0d3, 0x6860c0d4, 0x7060c0d5, -+0x7860c0d6, 0x8060c0d7, 0x8860c0d8, 0x9060c0d9, 0x9860c0da, 0xa060c0db, 0xa860c0dc, 0xb060c0dd, -+0xb860c0de, 0xc060c0df, 0x8061c0c8, 0x00608296, 0x00a0829a, 0x9861c0cb, 0xa061c0cc, 0xa861c0cd, -+0x01208088, 0x3861c0c8, 0x08e042d2, 0x386140c9, 0x0900900a, 0xa06140c8, 0x986140cb, 0x18e042c9, -+0x72010040, 0x05b4128a, 0x0020808c, 0x3861c0cc, 0x986140c9, 0xc04042c8, 0x0880b400, 0x39014003, -+0xffff3f08, 0x90a0851c, 0xe023829f, 0x20f4179f, 0x10e3879f, 0xffff3f08, 0xe023829e, 0x20b4179e, -+0x03a3879e, 0xffff3f08, 0xe023829d, 0x2074179d, 0x0363879d, 0x00a08495, 0x18a08408, 0x800012c2, -+0x089a109b, 0x20f4169b, 0x20f8169b, 0x00e88609, 0x20741289, 0x01120008, 0x0a381288, 0x08408297, -+0x45208088, 0x06341288, 0x806140ca, 0xc88042c8, 0x00288218, 0x04a08408, 0x800012c2, 0x089a1088, -+0x20341288, 0x20381288, 0x00281299, 0x20a08408, 0x800012c2, 0x089a108a, 0x20b4128a, 0x20b8128a, -+0x30a08408, 0x800012c2, 0x089a1093, 0x20f41493, 0x20f81493, 0x03f41689, 0x806140cb, 0x2922808c, -+0x0334138c, 0xccc042c8, 0xc90042d1, 0x02604688, 0x0020b200, 0x03004002, 0x60a08214, 0x80a08214, -+0x90a08509, 0x804012c8, 0x01208208, 0x804092c8, 0x046012c8, 0x043a1288, 0x0020b200, 0x04004003, -+0xa86140c8, 0x67ffff7f, 0x00a0868a, 0x88a045d0, 0x0020b400, 0x12004013, 0x00208080, 0x800017c8, -+0x808096c8, 0x72010040, 0x00a08588, 0x00208290, 0x90a08509, 0x804012c8, 0x01208208, 0x804092c8, -+0x046012c8, 0x043a1288, 0x0020b200, 0x04004003, 0xa86140c8, 0x53ffff7f, 0x00a0868a, 0x804015c2, -+0x159a1089, 0x20741289, 0x20781289, 0x40b03608, 0x01208288, 0x0840b200, 0x06004023, 0xa02344c4, -+0x800017c8, 0x808096c8, 0xbb004010, 0xa8a045c8, 0x01604688, 0x00281288, 0x08009008, 0x00e0b400, -+0x05004003, 0x3f381289, 0x13408209, 0x03004010, 0x05208088, 0x04208088, 0x09009220, 0x07341889, -+0x0900840b, 0x05341888, 0x0023820a, 0x01604688, 0x0020b200, 0x1d004002, 0x0a00840c, 0xc900c4d7, -+0x40c40f08, 0x09208288, 0x08e0c2c8, 0x0a608488, 0x10e0c2c8, 0x81001008, 0x0a341288, 0x18e0c2c8, -+0x1d608488, 0x20e0c2c8, 0x28e0c2d8, 0x24608508, 0x800012c2, 0x089a1088, 0x20341288, 0x20381288, -+0x80208208, 0x30e0c2c8, 0x00218108, 0x38e0c2c8, 0x40e0c2d4, 0x48e0c2cc, 0xca00c4df, 0x20608411, -+0x80e0820b, 0x2020830c, 0x00e0b400, 0x13004013, 0x0020808e, 0xc0c0c2d7, 0x40c40f09, 0x09608289, -+0x08e0c2c9, 0x0a608488, 0x10e0c2c8, 0x00040008, 0x18e0c2c8, 0x1d608488, 0x20e0c2c8, 0x28e0c2d8, -+0x40e0c2d4, 0x48e0c2cc, 0xc000c3de, 0x00208083, 0x4c004010, 0x20608411, 0xb8238408, 0x800012c2, -+0x089a108f, 0x20f4138f, 0x20f8138f, 0x00208083, 0x13c0b000, 0x2e00401b, 0x40c40f08, 0x092082a2, -+0x00040021, 0xffff3f08, 0xe023828d, 0x2074138d, 0x1063838d, 0x0e808309, 0x0e408209, 0x02741289, -+0x1540820a, 0x38a0820a, 0x808012c2, 0x0a9a108a, 0x20b4128a, 0x20b8128a, 0xc0c0c2d7, 0x08e0c2e2, -+0x0a608488, 0x10e0c2c8, 0x20b41288, 0x21008288, 0x18e0c2c8, 0x1d608488, 0x20e0c2c8, 0x28e0c2d8, -+0x15408209, 0x34608209, 0x804012c2, 0x099a1089, 0x20741289, 0x20781289, 0x30e0c2c9, 0x38e0c2cf, -+0x40e0c2d4, 0x48e0c2cc, 0xc000c3cd, 0x0ac0830f, 0x0ac08003, 0x20608411, 0x80e0820b, 0x01a0830e, -+0x1380b300, 0xdcff7f0b, 0x2020830c, 0xe03f830c, 0xc000c3dd, 0xbc238408, 0x800012c2, 0x089a1088, -+0x20341288, 0x20381288, 0x0300b200, 0x0d00401b, 0x07341888, 0x0020888e, 0x0420b800, 0x08004019, -+0x0800840b, 0x00040008, 0x18e0c2c8, 0x01a0830e, 0x04a0b300, 0xfdff7f09, 0x80e0820b, 0xfc3f8083, -+0x07341888, 0x08008408, 0xa06140ca, 0xc00062e3, 0x402062f3, 0xc080e2e3, 0xc080e2f3, 0x982244c8, -+0x88a0c5c8, 0x88a045c8, 0x0020b200, 0x05004013, 0x04604688, 0x88a08508, 0x80a0c5c8, 0x04604688, -+0x0020b200, 0x0c004002, 0xd822c4c0, 0xc04065e3, 0x406065f3, 0xc000e1e3, 0x806065e3, 0x4020e1f3, -+0xc06065f3, 0x8020e1e3, 0xc020e1f3, 0x07004010, 0x88228108, 0xc04065e3, 0x406065f3, 0xc000e1e3, -+0x4020e1f3, 0x88228108, 0x08d61082, 0x800092c2, 0x03f41689, 0x806140cb, 0x2922808c, 0x0334138c, -+0xccc042c8, 0xc900c2d1, 0x800017c8, 0x808096c8, 0xa8a045c8, 0x0880b400, 0x03004013, 0x00a18412, -+0xa0a045d2, 0x98a045c8, 0x0020b200, 0x05004013, 0x386140c9, 0x986140c8, 0x0820c2d2, 0x386140c9, -+0x01608209, 0xfe61b200, 0x0e004015, 0x3861c0c9, 0x00001088, 0x02204288, 0x0020b200, 0x05004003, -+0x986140ca, 0x28000040, 0xa06140c8, 0x986140ca, 0xc08042c8, 0x0880b400, 0xd8fe7f13, 0x00a08495, -+0x98a045cb, 0x00e0b200, 0xbafe7f03, 0x386140c9, 0xa06140c8, 0x60a08509, 0x48000040, 0xe03f808a, -+0x986140cb, 0x08e0c2d2, 0x386140cc, 0x0120830c, 0xaffe7f10, 0x3861c0cc, 0x406040c7, 0x486040d0, -+0x506040d1, 0x586040d2, 0x606040d3, 0x686040d4, 0x706040d5, 0x786040d6, 0x806040d7, 0x886040d8, -+0x906040d9, 0x986040da, 0xa06040db, 0xa86040dc, 0xb06040dd, 0xb86040de, 0xc06040df, 0x08e00180, -+0x80618001, 0x807f8001, 0xc040e0d3, 0x4060e0db, 0x00208490, 0x00208698, 0x00208080, 0x00208080, -+0x00e08192, 0x02000040, 0x00608091, 0x14e08110, 0x17208097, 0xc000f2d3, 0xc04060d3, 0x406060db, -+0x08a00080, 0x80608001, 0x407f8001, 0x4060e0d3, 0x8060e0db, 0x00208490, 0x00208698, 0x00208080, -+0x00208080, 0x00e08192, 0x02000040, 0x00608091, 0x40e08110, 0xc040e0d1, 0x37208097, 0x3860c0d7, -+0x00208490, 0x00e08597, 0x00208080, 0x00208080, 0x1f608290, 0x20b41291, 0x08638491, 0x00608092, -+0x00208293, 0xc000f2d1, 0x406060d3, 0x806060db, 0x08a00080, 0xc0608001, 0x407f8001, 0x4060e0d3, -+0x8060e0db, 0x00208490, 0x00208698, 0x00208080, 0x00208080, 0x00e08192, 0x02000040, 0x00608091, -+0x54e08110, 0xc040e0d1, 0x37208097, 0x3860c0d7, 0x00208490, 0x00e08597, 0x00208080, 0x00208080, -+0x1f608290, 0x20b41291, 0x08638491, 0x00608092, 0x00208293, 0x0ef41294, 0x0d208594, 0x17208095, -+0x17208096, 0x17208097, 0xc000f2d3, 0x406060d3, 0x806060db, 0x08a00080, 0xc0608001, 0x01208097, -+0xb0e3c0d7, 0x80a060d2, 0x98e28004, 0x98e2c0c0, 0x80a0c0c4, 0xc080c4c3, 0x01e0b400, 0x06004002, -+0x00a08490, 0x00e08097, 0x02208097, 0xb0e3c0d7, 0xd8e2d0d0, 0xd8e2c0d0, 0x03208097, 0xb0e3c0d7, -+0x00e08088, 0x0e004010, 0x00a060c3, 0x407f8001, 0x4060e0d3, 0x8060e0db, 0x00208490, 0x00208698, -+0x00208080, 0x00208080, 0x01208089, 0x8820c2c9, 0x00608091, 0x00e08197, 0x0020f2d3, 0x406060d3, -+0x806060db, 0x08e00180, 0xc0608001, }; -+#define threadcode_elan4_text_size 0x90c -+static uint32_t threadcode_elan4_data[] = { -+0}; -+#define threadcode_elan4_data_size 0x0 -+static uint32_t threadcode_elan4_rodata[] = { -+0}; -+#define threadcode_elan4_rodata_size 0x0 -+static EP_SYMBOL threadcode_elan4_symbols[] = { -+ {".thread_restart", 0x00000000f800000c}, -+ {".thread_start", 0x00000000f8000000}, -+ {"__bss_start", 0x00000000f810090c}, -+ {"_edata", 0x00000000f810090c}, -+ {"_end", 0x00000000f8100910}, -+ {"_etext", 0x00000000f800090c}, -+ {"_sdata", 0x00000000f810090c}, -+ {"_stext", 0x00000000f8000000}, -+ {"c_queue_rxd", 0x00000000f800087c}, -+ {"c_reschedule", 0x00000000f8000744}, -+ {"c_stall_thread", 0x00000000f80008cc}, -+ {"c_waitevent", 0x00000000f8000788}, -+ {"c_waitevent_interrupt", 0x00000000f80007f8}, -+ {"ep4_spinblock", 0x00000000f8000080}, -+ {"ep4comms_rcvr", 0x00000000f8000140}, -+ {0, 0}}; -+EP_CODE threadcode_elan4 = { -+ (unsigned char *) threadcode_elan4_text, -+ threadcode_elan4_text_size, -+ (unsigned char *) threadcode_elan4_data, -+ threadcode_elan4_data_size, -+ (unsigned char *) threadcode_elan4_rodata, -+ threadcode_elan4_rodata_size, -+ threadcode_elan4_symbols, -+}; -Index: linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/jtag/jtagdrv.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv.c 2005-06-01 23:12:54.692424968 -0400 -@@ -0,0 +1,451 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: jtagdrv.c,v 1.12 2003/06/07 16:02:35 david Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv.c,v $*/ -+ -+#include -+ -+#include "jtagdrv.h" -+#include -+ -+int -+jtagdrv_strobe_data (JTAG_DEV *dev, u_char data) -+{ -+ u_char dsr; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_strobe_data: %s %s %s -> ", (data & LPT_DATA_TRST) ? "TRST" : "trst", -+ (data & LPT_DATA_TDI) ? "TDI" : "tdi", (data & LPT_DATA_TMS) ? "TMS" : "tms")); -+ -+ -+ LPT_WRITE_DATA (dev, data); DELAY(5); /* Drive NEW values on data wires */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_TCLK); DELAY(5); /* Drive strobe low */ -+ LPT_READ_STAT (dev, dsr); DELAY(5); /* Sample TDI from ring */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe high */ -+ -+ PRINTF (DBG_ECPP, ("%s\n", (dsr & LPT_STAT_PE) ? "TDO" : "tdo")); -+ -+ return ((dsr & LPT_STAT_PE) ? 1 : 0); -+} -+ -+void -+jtagdrv_select_ring (JTAG_DEV *dev, u_int ring) -+{ -+ PRINTF (DBG_ECPP, ("jtagdrv_select_ring: ring=0x%x\n", ring)); -+ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe and TCLK high */ -+ LPT_WRITE_DATA (dev, ring); DELAY(5); /* Drive ring address */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_RCLK); DELAY(5); /* Drive strobe low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe high */ -+} -+ -+void -+jtagdrv_reset (JTAG_DEV *dev) -+{ -+ register int i; -+ -+ for (i = 0; i < 5; i++) -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* 5 clocks to Reset from any state */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+void -+jtagdrv_shift_ir (JTAG_DEV *dev, u_char *value, int nbits) -+{ -+ register int i; -+ register int bit; -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select DR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select IR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Capture-IR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Shift-IR */ -+ -+ for (i = 0; i < nbits; i++) -+ { -+ /* strobe through the instruction bits, asserting TMS on the last bit */ -+ -+ if (i == (nbits-1)) -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ else -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ -+ if (bit) -+ JTAG_SET_BIT(value, i); -+ else -+ JTAG_CLR_BIT(value, i); -+ } -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Update-IR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+ -+void -+jtagdrv_shift_dr (JTAG_DEV *dev, u_char *value, int nbits) -+{ -+ register int i; -+ register int bit; -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select DR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Capture-DR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Shift-DR */ -+ -+ for (i = 0; i < nbits; i++) -+ { -+ /* strobe through the data bits, asserting TMS on the last bit */ -+ -+ if (i == (nbits-1)) -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ else -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ -+ if (bit) -+ JTAG_SET_BIT(value, i); -+ else -+ JTAG_CLR_BIT(value, i); -+ } -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Update-DR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+static int -+jtagdrv_i2c_start (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start\n")); -+ -+ /* Issue a stop sequence */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ -+ /* sample the line to see if we're idle */ -+ LPT_READ_STAT (dev, dsr); /* sample SDA */ -+ if ((dsr & LPT_STAT_SDA) == 0) /* Cannot start if SDA already driven */ -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start: cannot start - sda driven low\n")); -+ -+ for (i = 0; i < 16 ; i++) -+ { -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(5); /* SCLK low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); -+ -+ if (dsr & LPT_STAT_SDA) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start - stopped after %d clocks\n", i)); -+ break; -+ } -+ } -+ -+ if ((dsr & LPT_STAT_SDA) == 0) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start - cannot start - not idle\n")); -+ return (0); -+ } -+ -+ /* seen SDA float high, so issue a stop sequence */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ } -+ -+ LPT_WRITE_DATA (dev, 0); DELAY(4); /* drive SDA low */ -+ return (1); -+} -+ -+static void -+jtagdrv_i2c_stop (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop\n")); -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ -+ /* -+ * bug fix for temperature sensor chip -+ * if it's still driving SDA, then clock -+ * it until it stops driving it -+ */ -+ LPT_READ_STAT (dev, dsr); -+ if ((dsr & LPT_STAT_SDA) == 0) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop - slave not stodeved\n")); -+ for (i = 0; i < 16 ; i++) -+ { -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(5); /* SCLK low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); -+ -+ if (dsr & LPT_STAT_SDA) -+ break; -+ } -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop - stodeved after %d clocks\n", i)); -+ } -+} -+ -+static int -+jtagdrv_i2c_strobe (JTAG_DEV *dev, u_char data) -+{ -+ u_char dsr; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_strobe : %s", (data & LPT_DATA_SDA) ? "SDA" : "sda")); -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, data); DELAY(5); /* write data */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA */ -+ -+ PRINTF (DBG_ECPP, (" -> %s\n", (dsr & LPT_STAT_SDA) ? "SDA" : "sda")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 1 : 0); -+} -+ -+static int -+jtagdrv_i2c_get_ack (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA */ -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_get_ack -> %s\n", (dsr & LPT_STAT_SDA) ? "no ack" : "ack")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 0 : 1); -+} -+ -+static int -+jtagdrv_i2c_drive_ack (JTAG_DEV *dev, int nack) -+{ -+ u_char dsr; -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, nack ? LPT_DATA_SDA : 0); DELAY(5); /* SDA low for ack, high for nack */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA for ack */ -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_drive_ack %d -> %s\n", nack, (dsr & LPT_STAT_SDA) ? "done" : "more")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 1 : 0); -+} -+ -+static void -+jtagdrv_i2c_shift_addr (JTAG_DEV *dev, u_int address, int readNotWrite) -+{ -+ register int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_addr: %x\n", address)); -+ -+ for (i = I2C_ADDR_LEN-1; i >= 0; i--) -+ jtagdrv_i2c_strobe (dev, (address & (1 << i)) ? LPT_DATA_SDA : 0); -+ -+ jtagdrv_i2c_strobe (dev, readNotWrite ? LPT_DATA_SDA : 0); -+} -+ -+static u_char -+jtagdrv_i2c_shift_data (JTAG_DEV *dev, u_char data) -+{ -+ register int i; -+ u_char val = 0; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_data : %02x\n", data)); -+ -+ for (i = I2C_DATA_LEN-1; i >= 0; i--) -+ if (jtagdrv_i2c_strobe (dev, data & (1 << i) ? LPT_DATA_SDA : 0)) -+ val |= (1 << i); -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_data : -> %02x\n", val)); -+ -+ return (val); -+} -+ -+int -+jtagdrv_i2c_write (JTAG_DEV *dev, u_int address, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: address=%x count=%d data=%02x\n", address, count, data[0])); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ jtagdrv_i2c_shift_data (dev, data[i]); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: no ack on data phase %d\n", i)); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_WRITE_TO_BIG); -+ } -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_read (JTAG_DEV *dev, u_int address, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_read: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 1); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_read: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ data[i] = jtagdrv_i2c_shift_data (dev, 0xff); -+ -+ jtagdrv_i2c_drive_ack (dev, (i == (count-1) ? 1 : 0)); -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_writereg (JTAG_DEV *dev, u_int address, u_int intaddress, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_shift_data (dev, intaddress); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: no ack on intaddress phase\n")); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ jtagdrv_i2c_shift_data (dev, data[i]); -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writedate: no ack on byte %d\n", i)); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_WRITE_TO_BIG); -+ } -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_readreg (JTAG_DEV *dev, u_int address, u_int intaddress, u_int count, u_char *data) -+{ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_shift_data (dev, intaddress); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: no ack on intaddress phase\n")); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ -+ return (jtagdrv_i2c_read (dev, address, count, data)); -+} -+ -+void -+jtagdrv_i2c_clock_shift (JTAG_DEV *dev, u_int t, u_int n, u_int m) -+{ -+ int i; -+ -+ for (i = 2; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((t & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((t & (1 << i)) ? LPT_DATA_TDI : 0) | LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ for (i = 1; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((n & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((n & (1 << i)) ? LPT_DATA_TDI : 0)| LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ for (i = 6; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((m & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((m & (1 << i)) ? LPT_DATA_TDI : 0) | LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ LPT_WRITE_DATA (dev, 0); DELAY(1); /* clock low | 0 */ -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_TCLK); DELAY(1); /* strobe low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(1); /* strobe low */ -+} -+ -Index: linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/jtag/jtagdrv.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv.h 2005-06-01 23:12:54.692424968 -0400 -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __JTAGDRV_COMMON_H -+#define __JTAGDRV_COMMON_H -+ -+#ident "@(#)$Id: jtagdrv.h,v 1.5 2002/08/09 11:18:37 addy Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv.h,v $*/ -+ -+#include -+ -+/* include OS specific header file */ -+#if defined(LINUX) -+# include "jtagdrv_Linux.h" -+#elif defined(DIGITAL_UNIX) -+# include "jtagdrv_OSF1.h" -+#elif defined(QNX) -+# include "jtagdrv_QNX.h" -+#else -+# error cannot determint os type -+#endif -+ -+extern int jtagdebug; -+ -+#define DBG_CFG (1 << 0) -+#define DBG_OPEN (1 << 1) -+#define DBG_IOCTL (1 << 2) -+#define DBG_ECPP (1 << 3) -+#define DBG_FN (1 << 4) -+ -+#define DRIVER_NAME "jtag" -+ -+#if defined(LINUX) -+#define PRINTF(n,X) ((n) & jtagdebug ? (void) printk X : (void) 0) -+#define PRINTMSG(fmt, arg...) printk(KERN_INFO DRIVER_NAME ": " fmt, ##arg) -+#else -+#define PRINTF(n,X) ((n) & jtagdebug ? (void) printf X : (void) 0) -+#define PRINTMSG(M, A) printf ("jtag: " M, A) -+#endif -+ -+extern void jtagdrv_select_ring (JTAG_DEV *pp, u_int ring); -+extern void jtagdrv_reset (JTAG_DEV *pp); -+extern void jtagdrv_shift_ir (JTAG_DEV *pp, u_char *value, int nbits); -+extern void jtagdrv_shift_dr (JTAG_DEV *pp, u_char *value, int nbits); -+ -+extern int jtagdrv_i2c_write (JTAG_DEV *pp, u_int address, u_int count, u_char *data); -+extern int jtagdrv_i2c_read (JTAG_DEV *pp, u_int address, u_int count, u_char *data); -+extern int jtagdrv_i2c_writereg (JTAG_DEV *pp, u_int address, u_int intaddress, u_int count, u_char *data); -+extern int jtagdrv_i2c_readreg (JTAG_DEV *pp, u_int address, u_int intaddress, u_int count, u_char *data); -+extern void jtagdrv_i2c_clock_shift (JTAG_DEV *pp, u_int t, u_int n, u_int m); -+ -+ -+#endif /* __JTAGDRV_COMMON_H */ -Index: linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/jtag/jtagdrv_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv_Linux.c 2005-06-01 23:12:54.693424816 -0400 -@@ -0,0 +1,319 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * $Id: jtagdrv_Linux.c,v 1.18 2004/01/06 11:15:46 fabien Exp $ -+ * $Source: /cvs/master/quadrics/jtagmod/jtagdrv_Linux.c,v $ -+ */ -+ -+#include "jtagdrv.h" -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("JTAG Parallel port QsNet switch interface"); -+ -+MODULE_LICENSE("GPL"); -+ -+#define MAJOR_INSTANCE 0 /* 0 is dynamic assign of device major */ -+#define MAX_JTAG_DEV 4 -+ -+int jtag_major = MAJOR_INSTANCE; -+int jtagdebug = 0; -+MODULE_PARM(jtag_major, "i"); -+MODULE_PARM(jtagdebug, "i"); -+ -+JTAG_DEV jtag_devs[MAX_JTAG_DEV]; -+ -+int io[MAX_JTAG_DEV]= { 0, }; -+MODULE_PARM(io, "1-4i"); -+ -+ -+/* The fops functions */ -+int jtag_open(struct inode *, struct file *); -+int jtag_close(struct inode *, struct file *); -+int jtag_ioctl(struct inode *, struct file *, unsigned int, unsigned long ); -+ -+struct file_operations jtag_fops = { -+ ioctl: jtag_ioctl, -+ open: jtag_open, -+ release: jtag_close, -+}; -+ -+int -+jtag_probe(void) -+{ -+ int i=0; -+ int default_io = 1; -+ JTAG_DEV *dev; -+ unsigned char value=0xff; -+ -+ /* see if there are any user supplied io addr */ -+ for ( i = 0; i < MAX_JTAG_DEV; i++) { -+ if ( io[i] != 0x00) -+ default_io = 0; -+ jtag_devs[i].base = io[i]; -+ } -+ -+ if ( default_io ) { -+ jtag_devs[0].base = 0x3bc; -+ jtag_devs[1].base = 0x378; -+ jtag_devs[2].base = 0x278; -+ jtag_devs[3].base = 0x268; -+ } -+ -+ for ( i = 0 ; i < MAX_JTAG_DEV; i++) { -+ if ( jtag_devs[i].base == 0x3bc ) -+ jtag_devs[i].region = 3; -+ else -+ jtag_devs[i].region = 8; -+ jtag_devs[i].present = 0; -+ } -+ -+ -+ if( default_io ) -+ { -+ for( i = 0 ; i < MAX_JTAG_DEV; i++) { -+ dev=&(jtag_devs[i]); -+ if(dev->base && request_region(dev->base, dev->region, "jtag")) { -+ LPT_WRITE(dev, 0,0); -+ LPT_READ(dev, 0,value); -+ if ( value != 0xff) { -+ PRINTMSG("(%d , %d) present, io=0x%04lx\n",jtag_major,i,dev->base); -+ -+ dev->present=1; -+ } -+ else -+ release_region(dev->base, dev->region); -+ } -+ } -+ return 0; -+ } -+ else /* Force the region to be present, this makes the PCI parallel cards work */ -+ { -+ for( i = 0 ; i < MAX_JTAG_DEV; i++) -+ { -+ dev=&(jtag_devs[i]); -+ if(dev->base && request_region(dev->base, dev->region, "jtag") && (dev->base != 0)) -+ { -+ PRINTMSG("(%d , %d) forced by user, io=0x%04lx\n",jtag_major,i,dev->base); -+ dev->present=1; -+ } -+ else -+ { -+ if( dev->base != 0) -+ release_region(dev->base, dev->region); -+ } -+ } -+ return 0; -+ } -+} -+ -+int init_module(void) -+{ -+ int result,i; -+ result = register_chrdev(jtag_major, DRIVER_NAME, &jtag_fops); -+ if (result < 0) { -+ PRINTMSG("Couldn't register char device err == %d\n",jtag_major); -+ return -1; -+ } -+ -+ if ( jtag_major == 0 ) -+ jtag_major = result; -+ -+ for ( i = 0; i < MAX_JTAG_DEV; i++) { -+ jtag_devs[i].base=io[i]; -+ } -+ -+ jtag_probe(); -+ -+ PRINTMSG("Registered character device, major == %d\n",jtag_major); -+ return 0; -+} -+ -+void cleanup_module(void) -+{ -+ int i=0; -+ -+ for( i = 0; i < MAX_JTAG_DEV; i++) { -+ if( jtag_devs[i].present) -+ release_region(jtag_devs[i].base, jtag_devs[i].region); -+ } -+ -+ unregister_chrdev(jtag_major, DRIVER_NAME); -+ PRINTMSG("Unloaded char device\n"); -+} -+ -+ -+int -+jtag_open (struct inode *inode, struct file *filp) -+{ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ /* -+ * Only allow a single open at a time -+ */ -+ if (dev->open) -+ return (-EBUSY); -+ dev->open = 1; -+ -+ /* -+ * Initialise the hardware registers -+ */ -+ -+ LPT_WRITE (dev, LPT_CTRL, 0); -+ DELAY(50); -+ LPT_WRITE (dev, LPT_CTRL, LPT_CTRL_INIT); -+ -+ MOD_INC_USE_COUNT; -+ -+ return (0); -+} -+ -+int -+jtag_close(struct inode *inode, struct file *filp) -+{ -+ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ dev->open = 0; -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (0); -+} -+ -+int -+jtag_ioctl (struct inode *inode, struct file *filp, unsigned int io_cmd, unsigned long io_data) -+{ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ JTAG_RESET_ARGS *resetargs; -+ JTAG_SHIFT_ARGS *shiftargs; -+ I2C_ARGS *i2cargs; -+ I2C_CLOCK_SHIFT_ARGS *clockargs; -+ u_char *buf; -+ int freq; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ PRINTF (DBG_IOCTL, ("jtag_ioctl: device %d cmd=%x\n", unit, io_cmd)); -+ -+ switch (io_cmd) -+ { -+ case JTAG_RESET: -+ resetargs = (JTAG_RESET_ARGS *) io_data; -+ -+ if (! VALID_JTAG_RING (resetargs->ring)) -+ return (-EINVAL); -+ -+ jtagdrv_select_ring (dev, resetargs->ring); -+ jtagdrv_reset (dev); -+ return (0); -+ -+ case JTAG_SHIFT_IR: -+ case JTAG_SHIFT_DR: -+ shiftargs = (JTAG_SHIFT_ARGS *) io_data; -+ -+ if (! VALID_JTAG_RING (shiftargs->ring) || shiftargs->nbits > (JTAG_MAX_DATA_LEN*JTAG_MAX_CHIPS)) { -+ return (-EFAULT); -+ } -+ -+ buf = (u_char *) kmalloc (JTAG_NBYTES(shiftargs->nbits), GFP_KERNEL); -+ -+ if (buf == (u_char *) NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (buf, shiftargs->value, JTAG_NBYTES(shiftargs->nbits))) -+ { -+ kfree(buf); -+ return (-EFAULT); -+ } -+ -+ -+ jtagdrv_select_ring (dev, shiftargs->ring); -+ -+ if (io_cmd == JTAG_SHIFT_IR) -+ jtagdrv_shift_ir (dev, buf, shiftargs->nbits); -+ else -+ jtagdrv_shift_dr (dev, buf, shiftargs->nbits); -+ -+ if (copy_to_user (shiftargs->value, buf, JTAG_NBYTES (shiftargs->nbits))) -+ { -+ kfree (buf); -+ return (-EFAULT); -+ } -+ -+ kfree (buf); -+ return (0); -+ -+ case I2C_WRITE: -+ case I2C_READ: -+ case I2C_WRITEREG: -+ case I2C_READREG: -+ i2cargs = (I2C_ARGS *) io_data; -+ -+ if (! VALID_I2C_RING(i2cargs->ring) || i2cargs->count > I2C_MAX_DATA_LEN) -+ return (-EFAULT); -+ -+ jtagdrv_select_ring (dev, RING_I2C_BIT | i2cargs->ring); -+ switch (io_cmd) -+ { -+ case I2C_WRITE: -+ i2cargs->ok = jtagdrv_i2c_write (dev, i2cargs->device, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_READ: -+ i2cargs->ok = jtagdrv_i2c_read (dev, i2cargs->device, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_WRITEREG: -+ i2cargs->ok = jtagdrv_i2c_writereg (dev, i2cargs->device, i2cargs->reg, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_READREG: -+ i2cargs->ok = jtagdrv_i2c_readreg (dev, i2cargs->device, i2cargs->reg, i2cargs->count, i2cargs->data); -+ break; -+ } -+ return (0); -+ -+ case I2C_CLOCK_SHIFT: -+ clockargs = (I2C_CLOCK_SHIFT_ARGS *) io_data; -+ -+ freq = (10 * clockargs->m / (1 << (((clockargs->n + 1) & 3)))); -+ -+ /* validate the value, and initialise the ring */ -+ if (clockargs->t != 0 || clockargs->n > 3 || clockargs->m > 127) -+ return (-EINVAL); -+ -+ jtagdrv_select_ring (dev, RING_I2C_BIT | RING_CLOCK_SHIFT); -+ jtagdrv_i2c_clock_shift (dev, clockargs->t, clockargs->n, clockargs->m); -+ jtagdrv_select_ring (dev, 0); -+ return (0); -+ -+ default: -+ return (-EINVAL); -+ } -+ return (-EINVAL); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv_Linux.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/jtag/jtagdrv_Linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/jtag/jtagdrv_Linux.h 2005-06-01 23:12:54.693424816 -0400 -@@ -0,0 +1,174 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: jtagdrv_Linux.h,v 1.3 2002/08/09 11:18:37 addy Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv_Linux.h,v $*/ -+ -+#ifndef __JTAGDRV_LINUX_H -+#define __JTAGDRV_LINUX_H -+ -+#include -+#include -+ -+typedef struct jtag_dev -+{ -+ unsigned long base; -+ int region; -+ -+ u_int present:1; -+ u_int open:1; -+} JTAG_DEV; -+ -+/* -+** -+** Hardware Defines -+** -+*/ -+ -+/* -+ * Assume that bit 4 of the Control Register is set to 1 (by default) -+ * to enable the printer port (CS3). -+ * -+ * The default base address is 3BC-3BF. -+ */ -+ -+#define LPT0 0x3BC /* CSR Base Address - note this can -+ * change depending on the setting -+ * in the Control Register 0. -+ * -+ * LPT1 0x378 -+ * LPT2 0x278 -+ * LPT3 0x268 -+ */ -+ -+/* -+ * Register offsets from the port base address -+ */ -+ -+#define LPT_REGISTER_0 0 -+#define LPT_REGISTER_1 1 -+#define LPT_REGISTER_2 2 -+#define LPT_REGISTER_3 0x400 -+#define LPT_REGISTER_4 0x401 -+#define LPT_REGISTER_5 0x402 -+ -+/* -+ * Chip control registers -+ */ -+ /* Base address for Super I/O National*/ -+ -+#define SIO_BASE_ADDR 0x26e /* Semiconductor PC87332VLJ combo-chip*/ -+#define CR4_REG 0x04 /* index 4, printer control reg 4 */ -+ -+#define LPT_EPP 0x01 /* Enable bit for epp */ -+#define LPT_ECP 0x04 /* Enable bit for ecp */ -+ -+/* -+ * Registers for use with centronics, nibble and byte modes. -+ */ -+ -+#define LPT_DATA LPT_REGISTER_0 /* line printer port data */ -+#define LPT_STAT LPT_REGISTER_1 /* LPT port status */ -+#define LPT_CTRL LPT_REGISTER_2 /* LPT port control */ -+ -+/* -+ * Registers for use with ECP mode. -+ */ -+ -+#define LPT_DFIFO LPT_REGISTER_3 /* r/w fifo register */ -+#define LPT_CFGB LPT_REGISTER_4 /* Configuration B */ -+#define LPT_ECR LPT_REGISTER_5 /* Exteded control */ -+ -+/* -+ * Bit assignments for ECR register. -+ */ -+ -+ /* Bits 0-4 */ -+ -+#define LPT_ECR_EMPTY 0x01 /* FIFO is empty */ -+#define LPT_ECR_FULL 0x02 /* FIFO is full */ -+#define LPT_ECR_SERV 0x04 /* Service bit */ -+#define LPT_ECR_DMA 0x08 /* DMA enable */ -+#define LPT_ECR_nINTR 0x10 /* Interrupt disable */ -+ -+ /* -+ * Bits 5-7 are ECR modes. -+ */ -+ -+#define LPT_ECR_PAR 0x20 /* Parallel port FIFO mode */ -+#define LPT_ECR_ECP 0x60 /* ECP mode */ -+#define LPT_ECR_CFG 0xE0 /* Configuration mode */ -+#define LPT_ECR_CLEAR ~0xE0 /* Cear mode bits */ -+ -+/* -+ * Bit assignments for the parallel port STATUS register: -+ */ -+ -+#define LPT_STAT_BIT0 0X1 /* Reserved. Bit always set. */ -+#define LPT_STAT_BIT1 0X2 /* Reserved. Bit always set. */ -+#define LPT_STAT_IRQ 0x4 /* interrupt status bit */ -+#define LPT_STAT_ERROR 0x8 /* set to 0 to indicate error */ -+#define LPT_STAT_SLCT 0x10 /* status of SLCT lead from printer */ -+#define LPT_STAT_PE 0x20 /* set to 1 when out of paper */ -+#define LPT_STAT_ACK 0x40 /* acknowledge - set to 0 when ready */ -+#define LPT_STAT_nBUSY 0x80 /* busy status bit, 0=busy, 1=ready */ -+ -+/* -+ * Bit assignments for the parallel port CONTROL register: -+ */ -+ -+#define LPT_CTRL_nSTROBE 0x1 /* Printer Strobe Control */ -+#define LPT_CTRL_nAUTOFD 0x2 /* Auto Feed Control */ -+#define LPT_CTRL_INIT 0x4 /* Initialize Printer Control */ -+#define LPT_CTRL_nSLCTIN 0x8 /* 0=select printer, 1=not selected */ -+#define LPT_CTRL_IRQ 0x10 /* Interrupt Request Enable Control */ -+#define LPT_CTRL_DIR 0x20 /* Direction control */ -+#define LPT_CTRL_BIT6 0X40 /* Reserved. Bit always set. */ -+#define LPT_CTRL_BIT7 0X80 /* Reserved. Bit always set. */ -+ -+ -+#define LPT_WRITE(dev, regname, value) do { outb(value, (dev)->base + regname); } while (0) -+#define LPT_READ(dev, regname,value) do { value = inb((dev)->base + regname); } while (0) -+ -+ -+ -+/* Standard register access macros */ -+#define LPT_WRITE_CTRL(dev, value) LPT_WRITE(dev, LPT_CTRL, LPT_CTRL_INIT | value) -+#define LPT_WRITE_DATA(dev, value) LPT_WRITE(dev, LPT_DATA, value) -+#define LPT_READ_STAT(dev, value) LPT_READ(dev, LPT_STAT, value) -+ -+/* -+ * The jtag signals are connected to the parallel port as follows : -+ * -+ * TRST bit 0 -+ * TDI bit 1 -+ * TMS bit 2 -+ * TCLK AFX -+ * TDO PE -+ */ -+#define LPT_DATA_TRST 1 -+#define LPT_DATA_TDI 2 -+#define LPT_DATA_TMS 4 -+#define LPT_CTRL_TCLK LPT_CTRL_nAUTOFD -+#define LPT_STAT_TDO LPT_STAT_PE -+ -+/* -+ * The I2C signals are connected as follows : -+ */ -+#define LPT_DATA_SDA 2 -+#define LPT_CTRL_SCLK LPT_CTRL_nAUTOFD -+#define LPT_STAT_SDA LPT_STAT_PE -+ -+/* -+ * The ring selection signals are as follows : -+ * addr bit 0-7 -+ * clock nSLCTIN -+ */ -+#define LPT_CTRL_RCLK LPT_CTRL_nSLCTIN -+ -+ -+#endif /* __JTAGDRV_LINUX_H */ -Index: linux-2.4.21/drivers/net/qsnet/jtag/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/jtag/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/jtag/Makefile 2005-06-01 23:12:54.694424664 -0400 -@@ -0,0 +1,31 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/jtag/Makefile -+# -+ -+ -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/jtag/Makefile -+# -+ -+list-multi := jtag.o -+jtag-objs := jtagdrv_Linux.o jtagdrv.o -+export-objs := -+obj-$(CONFIG_JTAG) := jtag.o -+ -+jtag.o : $(jtag-objs) -+ $(LD) -r -o $@ $(jtag-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/jtag/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/jtag/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/jtag/Makefile.conf 2005-06-01 23:12:54.694424664 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = jtag.o -+MODULENAME = jtag -+KOBJFILES = jtagdrv_Linux.o jtagdrv.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_JTAG -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/jtag/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/jtag/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/jtag/quadrics_version.h 2005-06-01 23:12:54.694424664 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/Makefile 2005-06-01 23:12:54.695424512 -0400 -@@ -0,0 +1,17 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2003 Quadrics Ltd. -+# -+# File: driver/net/qsnet/Makefile -+# -+ -+subdir-$(CONFIG_QSNET) += qsnet elan -+subdir-$(CONFIG_ELAN3) += elan3 -+subdir-$(CONFIG_ELAN4) += elan4 -+subdir-$(CONFIG_EP) += ep -+subdir-$(CONFIG_EIP) += eip -+subdir-$(CONFIG_RMS) += rms -+subdir-$(CONFIG_JTAG) += jtag -+ -+include $(TOPDIR)/Rules.make -Index: linux-2.4.21/drivers/net/qsnet/qsnet/debug.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/qsnet/debug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/qsnet/debug.c 2005-06-01 23:12:54.696424360 -0400 -@@ -0,0 +1,583 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.21 2004/08/19 08:09:57 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/debug.c,v $ */ -+ -+#include -+#include -+#include -+ -+caddr_t qsnet_debug_buffer_ptr = NULL; -+int qsnet_debug_front = 0; -+int qsnet_debug_back = 0; -+int qsnet_debug_lost_lines = 0; -+int qsnet_debug_disabled = 0; -+ -+int qsnet_debug_line_size = 256; -+int qsnet_debug_num_lines = 8192; -+ -+int qsnet_assfail_mode = 1; /* default to BUG() */ -+ -+int qsnet_debug_running = 0; -+int kqsnet_debug_running = 0; -+ -+static spinlock_t qsnet_debug_lock; -+static kcondvar_t qsnet_debug_wait; -+static char qsnet_debug_buffer_space[8192]; -+ -+#define QSNET_DEBUG_PREFIX_MAX_SIZE 32 -+#define QSNET_DEBUG_MAX_WORDWRAP 15 -+ -+/* must be larger than QSNET_DEBUG_PREFIX_MAX_SIZE + QSNET_DEBUG_MAX_WORDWRAP + 2 */ -+#if defined(DIGITAL_UNIX) -+#define QSNET_DEBUG_CONSOLE_WIDTH 80 -+#elif defined(LINUX) -+#define QSNET_DEBUG_CONSOLE_WIDTH 128 -+#endif -+ -+#define isspace(CH) ((CH==' ') | (CH=='\t') | (CH=='\n')) -+ -+#ifdef LINUX -+#define ALLOC_DEBUG_BUFFER(ptr) do { (ptr) = (void *)__get_free_pages (GFP_KERNEL, get_order (qsnet_debug_num_lines * qsnet_debug_line_size)); } while (0) -+#define FREE_DEBUG_BUFFER(ptr) free_pages ((unsigned long) ptr, get_order (qsnet_debug_num_lines * qsnet_debug_line_size)) -+#else -+#define ALLOC_DEBUG_BUFFER(ptr) KMEM_ALLOC (ptr, caddr_t, qsnet_debug_num_lines * qsnet_debug_line_size, 1) -+#define FREE_DEBUG_BUFFER(ptr) KMEM_FREE (ptr, qsnet_debug_num_lines * qsnet_debug_line_size) -+#endif -+ -+void -+qsnet_debug_init () -+{ -+ spin_lock_init (&qsnet_debug_lock); -+ kcondvar_init (&qsnet_debug_wait); -+ -+ qsnet_debug_front = 0; -+ qsnet_debug_back = 0; -+ qsnet_debug_lost_lines = 0; -+ -+ if (qsnet_debug_line_size < (QSNET_DEBUG_PREFIX_MAX_SIZE + QSNET_DEBUG_MAX_WORDWRAP + 2)) -+ qsnet_debug_line_size = 256; -+ -+ qsnet_debug_running = 1; -+ -+ qsnet_proc_register_int (qsnet_procfs_config, "assfail_mode", &qsnet_assfail_mode, 0); -+} -+ -+void -+qsnet_debug_fini() -+{ -+ if (!qsnet_debug_running) return; -+ -+ remove_proc_entry ("assfail_mode", qsnet_procfs_config); -+ -+ spin_lock_destroy (&qsnet_debug_lock); -+ kcondvar_destroy (&qsnet_debug_wait); -+ -+ if (qsnet_debug_buffer_ptr) -+ FREE_DEBUG_BUFFER (qsnet_debug_buffer_ptr); -+ -+ qsnet_debug_buffer_ptr = NULL; -+ qsnet_debug_lost_lines = 0; -+ qsnet_debug_running = 0; -+} -+ -+void -+qsnet_debug_disable(int val) -+{ -+ qsnet_debug_disabled = val; -+} -+ -+void -+qsnet_debug_alloc() -+{ -+ caddr_t ptr; -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ ALLOC_DEBUG_BUFFER (ptr); -+ -+ if (ptr != NULL) -+ { -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ qsnet_debug_buffer_ptr = ptr; -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ FREE_DEBUG_BUFFER (ptr); -+ } -+ } -+ } -+ -+} -+ -+static void -+qsnet_prefix_debug(unsigned int mode, char *prefix, char *buffer) -+{ -+ /* assumes caller has lock */ -+ -+ int prefixlen = strlen(prefix); -+ char pref[QSNET_DEBUG_PREFIX_MAX_SIZE]; -+ int prefix_done = 0; -+ -+ if (!qsnet_debug_running) return; -+ -+ if (qsnet_debug_disabled) -+ return; -+ -+ if (prefixlen >= QSNET_DEBUG_PREFIX_MAX_SIZE) -+ { -+ strncpy(pref,prefix,QSNET_DEBUG_PREFIX_MAX_SIZE -2); -+ strcpy (&pref[QSNET_DEBUG_PREFIX_MAX_SIZE-5],"... "); -+ -+ prefix = pref; -+ prefixlen = strlen(prefix); -+ } -+ -+#ifdef CONFIG_MPSAS -+ { -+ char *p; -+#define TRAP_PUTCHAR_B (0x17a - 256) -+#define SAS_PUTCHAR(c) do {\ -+ register int o0 asm ("o0") = (c);\ -+\ -+ asm volatile ("ta %0; nop" \ -+ : /* no outputs */\ -+ : /* inputs */ "i" (TRAP_PUTCHAR_B), "r" (o0)\ -+ : /* clobbered */ "o0");\ -+\ -+ if (o0 == '\n') {\ -+ o0 = '\r';\ -+\ -+ asm volatile ("ta %0; nop" \ -+ : /* no outputs */\ -+ : /* inputs */ "i" (TRAP_PUTCHAR_B), "r" (o0)\ -+ : /* clobbered */ "o0");\ -+ }\ -+ } while(0) -+ -+ for (p = prefix; *p; p++) -+ SAS_PUTCHAR (*p); -+ -+ for (p = buffer; *p; p++) -+ SAS_PUTCHAR (*p); -+ } -+#else -+ if (mode & QSNET_DEBUG_BUFFER) -+ { -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_lost_lines++; -+ else -+ { -+ caddr_t base = &qsnet_debug_buffer_ptr[qsnet_debug_line_size * qsnet_debug_back]; -+ caddr_t lim = base + qsnet_debug_line_size - 2; -+ caddr_t p; -+ -+ p = buffer; -+ prefix_done = 0; -+ while (*p) -+ { -+ /* sort out prefix */ -+ if ( prefix_done++ ) -+ { -+ int i; -+ for(i=0;i 0 ) -+ { -+ int i; -+ for(i=0;i remaining) len = remaining; -+ -+ strncpy(line, p, len); -+ line[len] = 0; -+ p += len; -+ -+ /* word wrap */ -+ if ((len == remaining) && *p && !isspace(*p)) -+ { -+ /* lets see if we can back track and find a white space to break on */ -+ char * ptr = &line[len-1]; -+ int count = 1; -+ -+ while ( ( !isspace(*ptr) ) && ( count < QSNET_DEBUG_MAX_WORDWRAP )) -+ { -+ count++; -+ ptr--; -+ } -+ -+ if ( isspace(*ptr) ) -+ { -+ /* found somewhere to wrap to */ -+ p -= (count-1); /* need to loose the white space */ -+ len -= count; -+ } -+ } -+ -+ if (line[len-1] != '\n' ) -+ { -+ line[len] = '\n'; -+ line[len+1] = 0; -+ } -+ -+ /* we put a \n in so dont need another one next */ -+ if ( *p == '\n') -+ p++; -+ -+#if defined(DIGITAL_UNIX) -+ { -+ char *pr; -+ -+ for (pr = pref; *pr; pr++) -+ cnputc (*pr); -+ -+ for (pr = line; *pr; pr++) -+ cnputc (*pr); -+ } -+#elif defined(LINUX) -+ printk("%s%s",pref,line); -+#endif -+ } -+ } -+#endif /* CONFIG_MPSAS */ -+} -+ -+void -+qsnet_vdebugf (unsigned int mode, char *prefix, char *fmt, va_list ap) -+{ -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ qsnet_debug_buffer_space[0] = '\0'; -+ -+#if defined(DIGITAL_UNIX) -+ prf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), NULL, fmt, ap); -+#elif defined(LINUX) -+ vsprintf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), fmt, ap); -+#endif -+ -+ if (prefix == NULL) -+ printk ("qsnet_vdebugf: prefix==NULL\n"); -+ else -+ qsnet_prefix_debug(mode, prefix, qsnet_debug_buffer_space); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+} -+ -+void kqsnet_debugf(char *fmt,...) -+{ -+ if ( kqsnet_debug_running ) { -+ va_list ap; -+ char string[20]; -+ -+ sprintf (string, "mm=%p:", current->mm); -+ va_start(ap, fmt); -+ qsnet_vdebugf(QSNET_DEBUG_BUFFER, string, fmt, ap); -+ va_end(ap); -+ } -+} -+void -+qsnet_debugf(unsigned int mode, char *fmt,...) -+{ -+ va_list ap; -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ qsnet_debug_buffer_space[0] = '\0'; -+ -+ va_start (ap, fmt); -+#if defined(DIGITAL_UNIX) -+ prf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), NULL, fmt, ap); -+#elif defined(LINUX) -+ vsprintf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), fmt, ap); -+#endif -+ va_end (ap); -+ -+ qsnet_prefix_debug(mode, "", qsnet_debug_buffer_space); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+} -+ -+int -+qsnet_debug_buffer (caddr_t ubuffer, int len) -+{ -+ caddr_t buffer, ptr, base; -+ int remain, len1; -+ unsigned long flags; -+ static char qsnet_space[65536]; -+ -+ if (!qsnet_debug_running) return (0); -+ -+ if (len < qsnet_debug_line_size) -+ return (-1); -+ -+ if (len > (qsnet_debug_line_size * qsnet_debug_num_lines)) -+ len = qsnet_debug_line_size * qsnet_debug_num_lines; -+ -+ if ( len > 65536 ) { -+ KMEM_ZALLOC (buffer, caddr_t, len, 1); -+ } else -+ buffer = qsnet_space; -+ -+ if (buffer == NULL) -+ return (-1); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ if ( len > 65536 ) -+ KMEM_FREE (buffer, len); -+ return (-1); -+ } -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ while (!qsnet_debug_lost_lines && (qsnet_debug_back == qsnet_debug_front)) -+ if (kcondvar_waitsig (&qsnet_debug_wait, &qsnet_debug_lock, &flags) == 0) -+ break; -+ -+ ptr = buffer; -+ remain = len; -+ -+ if (qsnet_debug_lost_lines) -+ { -+ qsnet_debug_lost_lines = 0; -+ strcpy (ptr, "Debug Buffer has overflowed!!\n"); -+ len1 = strlen (ptr); -+ -+ remain -= len1; -+ ptr += len1; -+ } -+ -+ while (qsnet_debug_front != qsnet_debug_back) -+ { -+ /* copy the line from DebugFront */ -+ base = &qsnet_debug_buffer_ptr[qsnet_debug_front*qsnet_debug_line_size]; -+ -+ len1 = strlen (base); -+ -+ if (len1 > remain) -+ break; -+ -+ bcopy (base, ptr, len1); -+ -+ ptr += len1; -+ remain -= len1; -+ -+ qsnet_debug_front = (++qsnet_debug_front == qsnet_debug_num_lines) ? 0 : qsnet_debug_front; -+ } -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ len1 = ptr - buffer; -+ -+ if (len1 != 0 && copyout (buffer, ubuffer, len1)) -+ len1 = -1; -+ -+ if ( len > 65536 ) -+ KMEM_FREE (buffer, len); -+ -+ return (len1); -+} -+ -+void -+qsnet_debug_buffer_on() -+{ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+} -+ -+void -+qsnet_debug_buffer_clear() -+{ -+ unsigned long flags; -+ -+ qsnet_debug_buffer_on(); -+ -+ if (qsnet_debug_buffer_ptr != NULL){ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ qsnet_debug_front = 0; -+ qsnet_debug_back = 0; -+ qsnet_prefix_debug(QSNET_DEBUG_BUFFER,"Clear",""); -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+} -+ -+void -+qsnet_debug_buffer_mark(char *str) -+{ -+ unsigned long flags; -+ -+ qsnet_debug_buffer_on(); -+ -+ if (qsnet_debug_buffer_ptr != NULL) { -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ qsnet_prefix_debug(QSNET_DEBUG_BUFFER,"Mark",str); -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+} -+int -+qsnet_debug_dump () -+{ -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return (0); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ return (-1); -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ while (qsnet_debug_front != qsnet_debug_back) -+ { -+ printk ("%s", &qsnet_debug_buffer_ptr[qsnet_debug_front*qsnet_debug_line_size]); -+ -+ qsnet_debug_front = (++qsnet_debug_front == qsnet_debug_num_lines) ? 0 : qsnet_debug_front; -+ } -+ -+ if (qsnet_debug_lost_lines) -+ printk ("\n**** Debug buffer has lost %d lines\n****\n",qsnet_debug_lost_lines); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ return (0); -+} -+ -+int -+qsnet_debug_kmem (void *handle) -+{ -+ if (!qsnet_debug_running) return (0); -+ -+#ifdef KMEM_DEBUG -+ qsnet_kmem_display(handle); -+#endif -+ return (0); -+} -+ -+int -+qsnet_assfail (char *ex, const char *func, char *file, int line) -+{ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ printk (KERN_EMERG "qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ if (panicstr) -+ return (0); -+ -+ if (qsnet_assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (qsnet_assfail_mode & 2) -+ panic ("qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ if (qsnet_assfail_mode & 4) -+ qsnet_debug_disable (1); -+ -+ return 0; -+ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/qsnet/i686_mmx.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/qsnet/i686_mmx.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/qsnet/i686_mmx.c 2005-06-01 23:12:54.696424360 -0400 -@@ -0,0 +1,99 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: i686_mmx.c,v 1.11 2004/01/05 12:08:25 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/i686_mmx.c,v $*/ -+ -+#include -+ -+#if defined(LINUX_I386) -+ -+#include -+#include -+#include -+#include -+ -+int mmx_disabled = 0; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+/* These functions are lifted from arch/i386/kernel/i387.c -+ * and MUST be kept in step with the kernel (currently 2.4.17) -+ * alternatively we should export the kernel_fpu_begin() function -+ */ -+static inline void __save_init_fpu( struct task_struct *tsk ) -+{ -+ if ( cpu_has_fxsr ) { -+ asm volatile( "fxsave %0 ; fnclex" -+ : "=m" (tsk->thread.i387.fxsave) ); -+ } else { -+ asm volatile( "fnsave %0 ; fwait" -+ : "=m" (tsk->thread.i387.fsave) ); -+ } -+ tsk->flags &= ~PF_USEDFPU; -+} -+#if defined(MODULE) -+void kernel_fpu_begin(void) -+{ -+ struct task_struct *tsk = current; -+ -+ if (tsk->flags & PF_USEDFPU) { -+ __save_init_fpu(tsk); -+ return; -+ } -+ clts(); -+} -+#endif -+#endif -+ -+extern inline int -+mmx_preamble(void) -+{ -+ if (mmx_disabled || in_interrupt()) -+ return (0); -+ -+ kernel_fpu_begin(); -+ -+ return (1); -+} -+ -+extern inline void -+mmx_postamble(void) -+{ -+ kernel_fpu_end(); -+} -+ -+extern u64 -+qsnet_readq (volatile u64 *ptr) -+{ -+ u64 value; -+ -+ if (! mmx_preamble()) -+ value = *ptr; -+ else -+ { -+ asm volatile ("movq (%0), %%mm0\n" -+ "movq %%mm0, (%1)\n" -+ : : "r" (ptr), "r" (&value) : "memory"); -+ mmx_postamble(); -+ } -+ return (value); -+} -+ -+void -+qsnet_writeq(u64 value, volatile u64 *ptr) -+{ -+ if (! mmx_preamble()) -+ *ptr = value; -+ else -+ { -+ asm volatile ("movq (%0), %%mm0\n" -+ "movq %%mm0, (%1)\n" -+ : : "r" (&value), "r" (ptr) : "memory"); -+ mmx_postamble(); -+ } -+} -+#endif -Index: linux-2.4.21/drivers/net/qsnet/qsnet/kernel_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/qsnet/kernel_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/qsnet/kernel_linux.c 2005-06-01 23:12:54.697424208 -0400 -@@ -0,0 +1,856 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kernel_linux.c,v 1.71.2.3 2004/11/04 11:03:47 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel_linux.c,v $*/ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include /* for smp_call_function() prototype */ -+#include -+#include -+ -+#include -+ -+extern int mmx_disabled; -+extern int qsnet_debug_line_size; -+extern int qsnet_debug_num_lines; -+ -+gid_t qsnet_procfs_gid; -+struct proc_dir_entry *qsnet_procfs_root; -+struct proc_dir_entry *qsnet_procfs_config; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("QsNet Kernel support code"); -+ -+MODULE_LICENSE("GPL"); -+ -+#if defined(LINUX_I386) -+MODULE_PARM(mmx_disabled, "i"); -+#endif -+ -+MODULE_PARM(qsnet_debug_line_size, "i"); -+MODULE_PARM(qsnet_debug_num_lines, "i"); -+ -+MODULE_PARM(qsnet_procfs_gid, "i"); -+ -+#ifdef KMEM_DEBUG -+EXPORT_SYMBOL(qsnet_kmem_alloc_debug); -+EXPORT_SYMBOL(qsnet_kmem_free_debug); -+#else -+EXPORT_SYMBOL(qsnet_kmem_alloc); -+EXPORT_SYMBOL(qsnet_kmem_free); -+#endif -+ -+EXPORT_SYMBOL(qsnet_kmem_display); -+EXPORT_SYMBOL(kmem_to_phys); -+ -+EXPORT_SYMBOL(cpu_hold_all); -+EXPORT_SYMBOL(cpu_release_all); -+ -+#if defined(LINUX_I386) -+EXPORT_SYMBOL(qsnet_readq); -+EXPORT_SYMBOL(qsnet_writeq); -+#endif -+ -+/* debug.c */ -+EXPORT_SYMBOL(qsnet_debugf); -+EXPORT_SYMBOL(kqsnet_debugf); -+EXPORT_SYMBOL(qsnet_vdebugf); -+EXPORT_SYMBOL(qsnet_debug_buffer); -+EXPORT_SYMBOL(qsnet_debug_alloc); -+EXPORT_SYMBOL(qsnet_debug_dump); -+EXPORT_SYMBOL(qsnet_debug_kmem); -+EXPORT_SYMBOL(qsnet_debug_disable); -+ -+EXPORT_SYMBOL(qsnet_assfail); -+ -+EXPORT_SYMBOL(qsnet_procfs_gid); -+EXPORT_SYMBOL(qsnet_procfs_root); -+ -+static int qsnet_open (struct inode *ino, struct file *fp); -+static int qsnet_release (struct inode *ino, struct file *fp); -+static int qsnet_ioctl (struct inode *ino, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static struct file_operations qsnet_ioctl_fops = -+{ -+ ioctl: qsnet_ioctl, -+ open: qsnet_open, -+ release: qsnet_release, -+}; -+ -+static int -+qsnet_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+qsnet_release (struct inode *inode, struct file *fp) -+{ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+qsnet_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int res=0; -+ -+ switch (cmd) -+ { -+ case QSNETIO_DEBUG_KMEM: -+ { -+ QSNETIO_DEBUG_KMEM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (QSNETIO_DEBUG_KMEM_STRUCT))) -+ return (-EFAULT); -+ -+ /* doesnt use handle as a pointer */ -+ qsnet_kmem_display(args.handle); -+ break; -+ } -+ -+ case QSNETIO_DEBUG_DUMP : -+ { -+ res = qsnet_debug_dump(); -+ break; -+ } -+ -+ case QSNETIO_DEBUG_BUFFER : -+ { -+ QSNETIO_DEBUG_BUFFER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (QSNETIO_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ -+ /* qsnet_debug_buffer uses copyout */ -+ if ((res = qsnet_debug_buffer (args.addr, args.len)) != -1) -+ { -+ args.len = res; -+ if (copy_to_user ((void *) arg, &args, sizeof (QSNETIO_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ res = 0; -+ } -+ break; -+ } -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+ -+#ifdef KMEM_DEBUG -+static int qsnet_kmem_open (struct inode *ino, struct file *fp); -+static int qsnet_kmem_release (struct inode *ino, struct file *fp); -+static ssize_t qsnet_kmem_read (struct file *file, char *buf, size_t count, loff_t *ppos); -+ -+static struct file_operations qsnet_kmem_fops = -+{ -+ open: qsnet_kmem_open, -+ release: qsnet_kmem_release, -+ read: qsnet_kmem_read, -+}; -+ -+typedef struct qsnet_private_space -+{ -+ char * space; -+ int size; -+ struct qsnet_private_space *next; -+} QSNET_PRIVATE_SPACE; -+ -+typedef struct qsnet_private -+{ -+ QSNET_PRIVATE_SPACE *space_chain; -+ QSNET_PRIVATE_SPACE *current_space; -+ int current_pos; -+ -+} QSNET_PRIVATE; -+ -+#define QSNET_KMEM_DEBUG_LINE_SIZE ((int)512) -+#define QSNET_PRIVATE_PAGE_SIZE ((int)(4*1024)) -+ -+static int qsnet_kmem_fill(QSNET_PRIVATE *pd); -+ -+void -+destroy_chain(QSNET_PRIVATE * pd) -+{ -+ QSNET_PRIVATE_SPACE *mem, *next; -+ -+ if (pd == NULL) return; -+ -+ for(mem = pd->space_chain ; mem != NULL; ) -+ { -+ next = mem->next; -+ if ( mem->space ) -+ kfree ( mem->space); -+ kfree(mem); -+ mem = next; -+ } -+ kfree (pd); -+} -+ -+QSNET_PRIVATE * -+make_chain(int len) -+{ -+ QSNET_PRIVATE * pd; -+ QSNET_PRIVATE_SPACE * mem; -+ int i; -+ -+ /* make the private data block */ -+ if ((pd = kmalloc (sizeof (QSNET_PRIVATE), GFP_KERNEL)) == NULL) -+ return NULL; -+ pd->space_chain = NULL; -+ -+ /* first make the holders */ -+ for(i=0;inext = pd->space_chain; -+ mem->size = 0; -+ mem->space = 0; -+ pd->space_chain = mem; -+ -+ /* now add the space */ -+ if ((mem->space = kmalloc (QSNET_PRIVATE_PAGE_SIZE, GFP_KERNEL)) == NULL) -+ { -+ destroy_chain(pd); -+ return (NULL); -+ } -+ -+ mem->space[0] = 0; -+ -+ } -+ -+ pd->current_space = pd->space_chain; -+ pd->current_pos = 0; -+ -+ return pd; -+} -+ -+static int -+qsnet_kmem_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+qsnet_kmem_release (struct inode *inode, struct file *fp) -+{ -+ if ( fp->private_data ) -+ { -+ QSNET_PRIVATE * pd = (QSNET_PRIVATE *) fp->private_data; -+ -+ /* free the space */ -+ if (pd->space_chain) -+ kfree (pd->space_chain); -+ -+ /* free struct */ -+ kfree (pd); -+ } -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+qsnet_kmem_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ QSNET_PRIVATE * pd = (QSNET_PRIVATE *) file->private_data; -+ int error; -+ int output_count; -+ int num_of_links=10; -+ -+ /* make a buffer to output count bytes in */ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if ( pd == NULL) -+ { -+ /* first time */ -+ -+ /* ok we have to guess at how much space we are going to need */ -+ /* if it fails we up the space and carry try again */ -+ /* we have to do it this way as we cant get more memory whilst */ -+ /* holding the lock */ -+ if ((pd = make_chain(num_of_links)) == NULL) -+ return (-ENOMEM); -+ -+ while ( qsnet_kmem_fill(pd) ) -+ { -+ destroy_chain(pd); -+ num_of_links += 10; -+ if ((pd = make_chain(num_of_links)) == NULL) -+ return (-ENOMEM); -+ } -+ -+ /* we have the space and filled it */ -+ file->private_data = (void *)pd; -+ } -+ -+ /* output buffer */ -+ if ( pd->current_pos >= pd->current_space->size ) -+ return (0); /* finished */ -+ -+ output_count = pd->current_space->size - pd->current_pos; -+ if ( output_count > count ) -+ output_count = count; -+ -+ copy_to_user(buf, (pd->current_space->space + pd->current_pos), output_count); -+ -+ pd->current_pos += output_count; -+ ppos += output_count; -+ -+ /* just check to see if we have finished the current space */ -+ if ( pd->current_pos >= pd->current_space->size ) -+ { -+ if ( pd->current_space->next ) -+ { -+ pd->current_space = pd->current_space->next; -+ pd->current_pos = 0; -+ } -+ } -+ -+ return (output_count); -+} -+#endif /* KMEM_DEBUG */ -+ -+static int -+proc_write_qsnetdebug(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "on")) -+ qsnet_debug_buffer_on(); -+ -+ if (! strcmp (tmpbuf, "clear")) -+ qsnet_debug_buffer_clear(); -+ -+ if (! strncmp (tmpbuf, "mark",4)) -+ qsnet_debug_buffer_mark( &tmpbuf[4] ); -+ -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_qsnetdebug(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len = sprintf (page, "echo command > /proc/qsnet/config/qsnetdebug\ncommand = on | off | clear | mark text\n"); -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+#include "quadrics_version.h" -+extern int kqsnet_debug_running; -+static char quadrics_version[] = QUADRICS_VERSION; -+ -+static int __init qsnet_init(void) -+{ -+ struct proc_dir_entry *p; -+ -+ if ((qsnet_procfs_root = proc_mkdir ("qsnet", 0)) == NULL) -+ { -+ printk ("qsnet: failed to create /proc/qsnet \n"); -+ return (-ENXIO); -+ } -+ -+ if ((p = create_proc_entry ("ioctl", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_root)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/ioctl\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &qsnet_ioctl_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+ -+ qsnet_proc_register_str (qsnet_procfs_root, "version", quadrics_version, S_IRUGO); -+ -+ if ((qsnet_procfs_config = proc_mkdir ("config", qsnet_procfs_root)) == NULL) -+ { -+ printk ("qsnet: failed to create /proc/qsnet/config \n"); -+ return (-ENXIO); -+ } -+ -+#ifdef KMEM_DEBUG -+ if ((p = create_proc_entry ("kmem_debug", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_config)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/config/kmem_debug\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &qsnet_kmem_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+#endif -+ -+ qsnet_debug_init(); -+ -+ qsnet_proc_register_int (qsnet_procfs_config, "kqsnet_debug_running", &kqsnet_debug_running, 0); -+ -+ if ((p = create_proc_entry ("qsnetdebug", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_config)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/config/qsnetdebug\n"); -+ return (-ENXIO); -+ } -+ p->read_proc = proc_read_qsnetdebug; -+ p->write_proc = proc_write_qsnetdebug; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+ -+ return (0); -+} -+ -+static void __exit qsnet_exit(void) -+{ -+#ifdef KMEM_DEBUG -+ qsnet_kmem_display(0); -+#endif -+ qsnet_debug_fini(); -+ -+ remove_proc_entry ("qsnetdebug", qsnet_procfs_config); -+ remove_proc_entry ("kqsnet_debug_running", qsnet_procfs_config); -+#ifdef KMEM_DEBUG -+ remove_proc_entry ("kmem_debug", qsnet_procfs_config); -+#endif -+ remove_proc_entry ("config", qsnet_procfs_root); -+ -+ remove_proc_entry ("version", qsnet_procfs_root); -+ remove_proc_entry ("ioctl", qsnet_procfs_root); -+ -+ remove_proc_entry ("qsnet", 0); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(qsnet_init); -+module_exit(qsnet_exit); -+ -+#ifdef KMEM_DEBUG -+/* -+ * Kernel memory allocation. We maintain our own list of allocated mem -+ * segments so we can free them on module cleanup. -+ * -+ * We use kmalloc for allocations less than one page in size; vmalloc for -+ * larger sizes. -+ */ -+ -+typedef struct { -+ struct list_head list; -+ void *ptr; -+ int len; -+ int used_vmalloc; -+ void *owner; -+ void *caller; -+ unsigned int time; -+ int line; -+ char filename[20]; -+} kmalloc_t; -+ -+static LIST_HEAD(kmalloc_head); -+ -+static spinlock_t kmalloc_lock = SPIN_LOCK_UNLOCKED; -+ -+/* -+ * Kernel memory allocation. We use kmalloc for allocations less -+ * than one page in size; vmalloc for larger sizes. -+ */ -+ -+static int -+qsnet_kmem_fill(QSNET_PRIVATE *pd) -+{ -+ kmalloc_t *kp; -+ struct list_head *lp; -+ unsigned long flags; -+ char str[QSNET_KMEM_DEBUG_LINE_SIZE]; -+ QSNET_PRIVATE_SPACE * current_space; -+ int current_pos; -+ int len; -+ current_space = pd->space_chain; -+ current_pos = 0; -+ -+ -+ current_space->space[0] = 0; -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ -+ /* make the next line */ -+ sprintf(str,"%p %d %d %p %p %u %d %s\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->time, kp->line, kp->filename); -+ len = strlen(str); -+ -+ /* does it fit on the current page */ -+ if ( (current_pos + len + 1) >= QSNET_PRIVATE_PAGE_SIZE) -+ { -+ /* move onto next page */ -+ if ((current_space = current_space->next) == NULL) -+ { -+ /* run out of space !!!! */ -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ return (1); -+ } -+ current_space->space[0] = 0; -+ current_pos = 0; -+ } -+ strcat( current_space->space + current_pos, str); -+ current_pos += len; -+ -+ /* remember how much we wrote to this page */ -+ current_space->size = current_pos; -+ -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ return (0); -+} -+ -+void * -+qsnet_kmem_alloc_debug(int len, int cansleep, int zerofill, char *file, int line) -+{ -+ void *new; -+ unsigned long flags; -+ kmalloc_t *kp; -+ -+ if (len < PAGE_SIZE || !cansleep) -+ new = kmalloc(len, cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ else -+ new = vmalloc(len); -+ -+ if (len >= PAGE_SIZE) -+ ASSERT(PAGE_ALIGNED((uintptr_t) new)); -+ -+ if (new && zerofill) -+ memset(new,0,len); -+ -+ /* record allocation */ -+ kp = kmalloc(sizeof(kmalloc_t), cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ ASSERT(kp != NULL); -+ kp->len = len; -+ kp->ptr = new; -+ kp->used_vmalloc = (len >= PAGE_SIZE || cansleep); -+ kp->owner = current; -+ kp->caller = __builtin_return_address(0); -+ kp->time = lbolt; -+ kp->line = line; -+ len = strlen(file); -+ -+ if (len > 18) -+ strcpy(kp->filename,&file[len-18]); -+ else -+ strcpy(kp->filename,file); -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ list_add(&kp->list, &kmalloc_head); -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ return new; -+} -+ -+void -+qsnet_kmem_free_debug(void *ptr, int len, char *file, int line) -+{ -+ unsigned long flags; -+ kmalloc_t *kp; -+ struct list_head *lp; -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ if (kp->ptr == ptr) { -+ if (kp->len != len) -+ printk("qsnet_kmem_free_debug(%p) ptr %p len %d mismatch: expected %d caller %p owner %p (%s:%d)\n", -+ current, ptr, len, kp->len, __builtin_return_address(0), kp->caller, file, line); -+ list_del(lp); -+ kfree(kp); /* free off descriptor */ -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ if (lp == &kmalloc_head) /* segment must be found */ -+ { -+ printk( "qsnet_kmem_free_debug(%p) ptr %p len %d not found: caller %p (%s:%d)\n", -+ current, ptr, len, __builtin_return_address(0), file, line); -+ } -+ -+ if ((((unsigned long) ptr) >= VMALLOC_START && ((unsigned long) ptr) < VMALLOC_END)) -+ vfree (ptr); -+ else -+ kfree (ptr); -+} -+ -+#else /* !KMEM_DEBUG */ -+ -+void * -+qsnet_kmem_alloc(int len, int cansleep, int zerofill) -+{ -+ void *new; -+ -+ if (len < PAGE_SIZE || !cansleep) -+ new = kmalloc(len, cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ else -+ new = vmalloc(len); -+ -+ if (len >= PAGE_SIZE) -+ ASSERT(PAGE_ALIGNED((unsigned long) new)); -+ -+ if (new && zerofill) -+ memset(new,0,len); -+ -+ return new; -+} -+ -+void -+qsnet_kmem_free(void *ptr, int len) -+{ -+ if ((((unsigned long) ptr) >= VMALLOC_START && ((unsigned long) ptr) < VMALLOC_END)) -+ vfree (ptr); -+ else -+ kfree (ptr); -+} -+#endif /* !KMEM_DEBUG */ -+ -+void -+qsnet_kmem_display(void *handle) -+{ -+#ifdef KMEM_DEBUG -+ kmalloc_t *kp; -+ struct list_head *lp; -+ unsigned long flags; -+ int count = 0, totsize = 0; -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ -+ if (!handle || handle == kp->owner) -+ { -+ printk("qsnet_kmem_display(%p): mem %p len %d unfreed caller %p (%p) \n", -+ handle, kp->ptr, kp->len, kp->caller, kp->owner); -+ -+ count++; -+ totsize += kp->len; -+ } -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ printk("qsnet_kmem_display(%p): %d bytes left in %d objects\n", handle, totsize, count); -+#endif -+} -+ -+physaddr_t -+kmem_to_phys(void *ptr) -+{ -+ virtaddr_t virt = (virtaddr_t) ptr; -+ physaddr_t phys; -+ pte_t *pte; -+ -+ if ((virt >= VMALLOC_START && virt < VMALLOC_END)) -+ { -+ pte = find_pte_kernel(virt); -+ ASSERT(pte && !pte_none(*pte)); -+ phys = pte_phys(*pte) + (virt & (PAGE_SIZE-1)); -+ } -+#if defined(PKMAP_BASE) -+ else if (virt >= PKMAP_BASE && virt < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) -+ { -+ pte = find_pte_kernel(virt); -+ ASSERT(pte && !pte_none(*pte)); -+ phys = pte_phys(*pte) + (virt & (PAGE_SIZE-1)); -+ } -+#endif -+#if defined(__ia64) -+ else if (virt >= __IA64_UNCACHED_OFFSET && virt < PAGE_OFFSET) -+ { -+ /* ia64 non-cached KSEG */ -+ phys = ((physaddr_t) ptr - __IA64_UNCACHED_OFFSET); -+ } -+#endif -+ else /* otherwise it's KSEG */ -+ { -+ phys = __pa(virt); -+ } -+ -+#if defined(CONFIG_ALPHA_GENERIC) || (defined(CONFIG_ALPHA_EV6) && !defined(USE_48_BIT_KSEG)) -+ /* -+ * with TS_BIAS as bit 40 - the tsunami pci space is mapped into -+ * the kernel at 0xfffff500.00000000 however we need to convert -+ * this to the true physical address 0x00000800.00000000. -+ * -+ * there is no need for PHYS_TWIDDLE since we knew we'd get a kernel -+ * virtual address already and handled this with __pa(). -+ */ -+ if (phys & (1ul << 40)) { -+ phys &= ~(1ul << 40); /* clear bit 40 (kseg I/O select) */ -+ phys |= (1ul << 43); /* set bit 43 (phys I/O select) */ -+ } -+#endif -+ return phys; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ -+EXPORT_SYMBOL(pci_resource_size); -+EXPORT_SYMBOL(pci_get_base_address); -+EXPORT_SYMBOL(pci_base_to_kseg); -+ -+ -+/* -+ * PCI stuff. -+ * -+ * XXX pci_base_to_kseg() and pci_kseg_to_phys() are problematic -+ * in that they may not work on non-Tsunami (DS20, ES40, etc) -+ * architectures, and may not work in non-zero PCI bus numbers. -+ */ -+ -+unsigned long -+pci_get_base_address(struct pci_dev *pdev, int index) -+{ -+ unsigned long base; -+ -+ ASSERT(index >= 0 && index <= 5); -+ /* borrowed in part from drivers/scsi/sym53c8xx.c */ -+ base = pdev->base_address[index++]; -+ -+#if BITS_PER_LONG > 32 -+ if ((base & 0x7) == 0x4) -+ base |= (((unsigned long)pdev->base_address[index]) << 32); -+#endif -+ return base; -+} -+ -+unsigned long -+pci_resource_size(struct pci_dev *pdev, int index) -+{ -+ u32 addr, mask, size; -+ -+ static u32 bar_addr[] = { -+ PCI_BASE_ADDRESS_0, -+ PCI_BASE_ADDRESS_1, -+ PCI_BASE_ADDRESS_2, -+ PCI_BASE_ADDRESS_3, -+ PCI_BASE_ADDRESS_4, -+ PCI_BASE_ADDRESS_5, -+ }; -+ ASSERT(index >= 0 && index <= 5); -+ -+ /* algorithm from Rubini book */ -+ pci_read_config_dword (pdev, bar_addr[index], &addr); -+ pci_write_config_dword(pdev, bar_addr[index], ~0); -+ pci_read_config_dword (pdev, bar_addr[index], &mask); -+ pci_write_config_dword(pdev, bar_addr[index], addr); -+ -+ mask &= PCI_BASE_ADDRESS_MEM_MASK; -+ size = ~mask + 1; -+ return size; -+} -+ -+/* -+ * Convert BAR register value to KSEG address. -+ */ -+void * -+pci_base_to_kseg(u64 baddr, int bus) -+{ -+ u64 kseg; -+ -+ /* XXX tsunami specific */ -+ baddr &= ~(u64)0x100000000; /* mask out hose bit */ -+ kseg = TSUNAMI_MEM(bus) + baddr; -+ return (void *)kseg; -+} -+ -+#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,0) */ -+ -+/* -+ * Spin the other CPU's in an SMP system. -+ * smp_call_function() needed to be exported to modules. It will be -+ * papered over in if running on a non-SMP box. -+ */ -+static spinlock_t hold_lock = SPIN_LOCK_UNLOCKED; -+ -+#if 0 -+static void cpu_hold(void *unused) -+{ -+ spin_lock(&hold_lock); -+ spin_unlock(&hold_lock); -+} -+#endif -+ -+void cpu_hold_all(void) -+{ -+ spin_lock(&hold_lock); -+ -+#if 0 -+ { -+ int res; -+ int retries = 10; -+ -+ /* XXXXX: cannot call smp_call_function() from interrupt context */ -+ -+ do { -+ /* only request blocking retry if not in interrupt context */ -+ res = smp_call_function(cpu_hold, NULL, !in_interrupt(), 0); -+ if (res) -+ mdelay(5); -+ } while (res && retries--); -+ -+ if (res) -+ printk("cpu_hold_all: IPI timeout\n"); -+ } -+#endif -+} -+ -+void cpu_release_all(void) -+{ -+ spin_unlock(&hold_lock); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/qsnet/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/qsnet/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/qsnet/Makefile 2005-06-01 23:12:54.697424208 -0400 -@@ -0,0 +1,31 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/qsnet/Makefile -+# -+ -+ -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/qsnet/Makefile -+# -+ -+list-multi := qsnet.o -+qsnet-objs := debug.o kernel_linux.o i686_mmx.o -+export-objs := kernel_linux.o -+obj-$(CONFIG_QSNET) := qsnet.o -+ -+qsnet.o : $(qsnet-objs) -+ $(LD) -r -o $@ $(qsnet-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/qsnet/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/qsnet/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/qsnet/Makefile.conf 2005-06-01 23:12:54.698424056 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = qsnet.o -+MODULENAME = qsnet -+KOBJFILES = debug.o kernel_linux.o i686_mmx.o -+EXPORT_KOBJS = kernel_linux.o -+CONFIG_NAME = CONFIG_QSNET -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/qsnet/qsnetkmem_linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/qsnet/qsnetkmem_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/qsnet/qsnetkmem_linux.c 2005-06-01 23:12:54.698424056 -0400 -@@ -0,0 +1,325 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: qsnetkmem_linux.c,v 1.3 2003/08/13 10:03:27 fabien Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/qsnetkmem_linux.c,v $*/ -+ -+/* macro macros */ -+#define MACRO_BEGIN do { -+#define MACRO_END } while (0) -+#define offsetof(T,F) ((int )&(((T *)0)->F)) -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define LIST_HEAD_INIT(name) { &(name), &(name) } -+ -+#define LIST_HEAD(name) \ -+ struct list_head name = LIST_HEAD_INIT(name) -+ -+typedef struct { -+ struct list_head list; -+ void *ptr; -+ int len; -+ int used_vmalloc; -+ void *owner; -+ void *caller; -+ unsigned int time; -+ int mark; -+ int line; -+ char file[256]; -+ -+} kmalloc_t; -+ -+ -+static LIST_HEAD(current_kmem); -+static LIST_HEAD(stored_kmem); -+ -+void -+count_kmem(struct list_head * list, long * count, long * size ) -+{ -+ long c,s; -+ struct list_head *tmp; -+ kmalloc_t *kmem_ptr = NULL; -+ -+ -+ c = s = 0L; -+ -+ list_for_each(tmp, list) { -+ kmem_ptr = list_entry(tmp, kmalloc_t , list); -+ c++; -+ s += kmem_ptr->len; -+ } -+ -+ *count = c; -+ *size = s; -+} -+ -+void -+clear_kmem(struct list_head * list) -+{ -+ struct list_head *tmp,*tmp2; -+ kmalloc_t *kmem_ptr = NULL; -+ -+ list_for_each_safe(tmp, tmp2, list) { -+ kmem_ptr = list_entry(tmp, kmalloc_t , list); -+ list_del_init(&kmem_ptr->list); -+ free( kmem_ptr ); -+ } -+} -+ -+void -+move_kmem(struct list_head * dest, struct list_head *src) -+{ -+ struct list_head *tmp,*tmp2; -+ kmalloc_t *kp= NULL; -+ -+ list_for_each_safe(tmp, tmp2, src) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ list_del_init(&kp->list); -+ -+/* -+ printf("mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+*/ -+ -+ list_add_tail(&kp->list, dest); -+ } -+} -+ -+void -+read_kmem(struct list_head * list) -+{ -+ FILE * fd; -+ char line[1024]; -+ int line_size = 100; -+ char * rep; -+ kmalloc_t * kp; -+ -+ clear_kmem(list); -+ -+ fd = fopen(QSNET_PROCFS_KMEM_DEBUG,"r"); -+ if ( fd == NULL) -+ { -+ printf("No Kmem Debug\n"); -+ return; -+ } -+ -+ rep = fgets(line,line_size, fd); -+ -+ while ( rep != NULL ) -+ { -+ kp = malloc(sizeof(kmalloc_t)); -+ -+ sscanf(line,"%p %d %d %p %p %u %d %s\n", -+ &kp->ptr, &kp->len, &kp->used_vmalloc, &kp->caller, &kp->owner, &kp->time, &kp->line, &kp->file[0]); -+ -+/* -+ printf(">>%s<<\n",line); -+ printf("%p %d %d %p %p %u %d %s\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->time, kp->line, kp->file); -+*/ -+ -+ list_add_tail(&kp->list, list); -+ -+ rep = fgets(line,line_size, fd); -+ } -+ fclose(fd); -+} -+ -+void -+mark_kmem(struct list_head * list, int mark) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ -+ kp->mark = mark; -+ } -+} -+ -+kmalloc_t * -+find_kmem(kmalloc_t * value, struct list_head * list) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if ( (kp->ptr == value->ptr) -+ && (kp->len == value->len) -+ && (kp->used_vmalloc == value->used_vmalloc ) -+ && (kp->owner == value->owner ) -+ && (kp->caller == value->caller ) -+ && (kp->time == value->time ) -+ && (kp->line == value->line ) -+ && !(strcmp(kp->file,value->file) )) -+ return kp; -+ } -+ return NULL; -+} -+ -+void -+diff_kmem(struct list_head *curr, struct list_head *stored) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ long c,s; -+ -+ mark_kmem(stored, 0); -+ mark_kmem(curr, 0); -+ -+ list_for_each(tmp, stored) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (find_kmem( kp, curr) != NULL) -+ kp->mark = 1; -+ } -+ -+ list_for_each(tmp, curr) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (find_kmem( kp, stored) != NULL) -+ kp->mark = 1; -+ } -+ -+ c=s=0L; -+ list_for_each(tmp, stored) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (kp->mark != 1) -+ { -+ printf("-- mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ c++; -+ s+= kp->len; -+ } -+ } -+ printf("-- %4ld %10ld \n",c,s); -+ -+ c=s=0L; -+ list_for_each(tmp, curr) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (kp->mark != 1) -+ { -+ printf("++ mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ c++; -+ s+= kp->len; -+ } -+ } -+ printf("++ %4ld %10ld \n",c,s); -+} -+ -+ -+void -+print_kmem(struct list_head * list) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ -+ printf("mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ -+ } -+} -+ -+void -+print_cmds() -+{ -+ long c,s; -+ -+ printf("q : quits \n"); -+ printf("r : read\n"); -+ printf("c : print current\n"); -+ printf("o : print stored\n"); -+ printf("s : store\n"); -+ -+ count_kmem(¤t_kmem, &c, &s ); -+ printf("\ncurrent : %4ld %10ld\n", c , s); -+ -+ count_kmem(&stored_kmem, &c, &s ); -+ printf("store : %4ld %10ld\n", c , s); -+ -+} -+ -+int -+main() -+{ -+ char line[128]; -+ int line_size=127; -+ int len; -+ -+ -+ while (1) -+ { -+ -+ printf(">> "); -+ fgets(line,line_size, stdin); -+ -+ -+ len = strlen( line ) -1; -+ if ( len ) -+ { -+ switch ( tolower(line[0]) ) -+ { -+ case 'q': -+ exit(0); -+ -+ case 'r' : -+ read_kmem(¤t_kmem); -+ break; -+ -+ case 'c' : -+ print_kmem(¤t_kmem); -+ break; -+ -+ case 'o' : -+ print_kmem(&stored_kmem); -+ break; -+ -+ case 's' : -+ clear_kmem(&stored_kmem); -+ move_kmem(&stored_kmem, ¤t_kmem); -+ break; -+ -+ case 'd' : -+ diff_kmem(¤t_kmem, &stored_kmem); -+ break; -+ -+ default: -+ print_cmds(); -+ } -+ -+ -+ -+ } -+ else -+ print_cmds(); -+ } -+ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/drivers/net/qsnet/qsnet/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/qsnet/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/qsnet/quadrics_version.h 2005-06-01 23:12:54.699423904 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/rms/Makefile -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/rms/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/rms/Makefile 2005-06-01 23:12:54.699423904 -0400 -@@ -0,0 +1,31 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/rms/Makefile -+# -+ -+ -+# -+ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/rms/Makefile -+# -+ -+list-multi := rms.o -+rms-objs := rms_kern.o rms_kern_Linux.o -+export-objs := -+obj-$(CONFIG_RMS) := rms.o -+ -+rms.o : $(rms-objs) -+ $(LD) -r -o $@ $(rms-objs) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -+ -+include $(TOPDIR)/Rules.make -+ -Index: linux-2.4.21/drivers/net/qsnet/rms/Makefile.conf -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/rms/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/rms/Makefile.conf 2005-06-01 23:12:54.699423904 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = rms.o -+MODULENAME = rms -+KOBJFILES = rms_kern.o rms_kern_Linux.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_RMS -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.4.21/drivers/net/qsnet/rms/quadrics_version.h -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/rms/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/rms/quadrics_version.h 2005-06-01 23:12:54.700423752 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.30qsnet" -Index: linux-2.4.21/drivers/net/qsnet/rms/rms_kern.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/rms/rms_kern.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/rms/rms_kern.c 2005-06-01 23:12:54.702423448 -0400 -@@ -0,0 +1,1757 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * rms_kern.c: RMS kernel module -+ * -+ * $Source: /cvs/master/quadrics/rmsmod/rms_kern.c,v $ -+ */ -+ -+#ident "@(#)$Id: rms_kern.c,v 1.62.2.4 2005/01/18 11:05:45 duncan Exp $" -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * extend stats added in version 5 -+ */ -+#define RMS_MODVERSION 5 -+ -+#if defined(SOLARIS) -+ -+#define CURUID() CURPROC()->p_cred->cr_uid -+#define RMS_NCPUS() 4 -+#define PROC_STRUCT proc -+ -+#include -+ -+#elif defined(LINUX) -+ -+#ifdef PROCESS_ACCT -+#define TIMEVAL_TO_MSEC(tv) ((tv)->tv_sec * 1000 + (tv)->tv_usec / 1000) -+#define TIMEVAL_TO_CT(tv) ((tv)->tv_sec * HZ + (tv)->tv_usec / (1000000L / HZ)) -+#endif -+ -+#ifdef RSS_ATOMIC -+#define PROC_RSS(proc) ((proc)->mm ? atomic_read(&(proc)->mm->rss) : 0) -+#else -+#define PROC_RSS(proc) ((proc)->mm ? (proc)->mm->rss : 0) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+# define RMS_NCPUS() smp_num_cpus -+#else -+# define RMS_NCPUS() num_online_cpus() -+#endif -+ -+#define CURUID() CURPROC()->uid -+#define p_pid pid -+#define PROC_STRUCT task_struct -+ -+/* care needed with conversion to millisecs on 32-bit Linux */ -+#ifdef LINUX -+#ifdef LINUX_I386 -+#define CT_TO_MSEC(x) ct_to_msec(x) -+ -+uint64_t ct_to_msec(clock_t t) -+{ -+ uint64_t msecs; -+ if (t < 2000000) -+ { -+ t = (1000 * t)/HZ; -+ msecs = t; -+ } -+ else -+ { -+ t = t / HZ; -+ msecs = t * 1000; -+ } -+ return(msecs); -+} -+ -+#else -+#define CT_TO_MSEC(x) (((x) * 1000)/HZ) -+#endif -+#endif -+ -+#ifndef FALSE -+#define FALSE (0) -+#define TRUE (!FALSE) -+#endif -+ -+#include -+#include -+#include -+ -+#include -+ -+#elif defined(DIGITAL_UNIX) -+ -+#define CURUID() CURPROC()->p_ruid -+extern int ncpus; -+#define RMS_NCPUS() ncpus -+#define PROC_STRUCT proc -+#define TIMEVAL_TO_MSEC(tv) ((tv)->tv_sec * 1000 + (tv)->tv_usec / 1000) -+ -+#include -+ -+#else -+#error cannot determine operating system -+#endif -+ -+int shm_cleanup(void); -+ -+struct cap_desc { -+ -+ struct cap_desc *next; -+ int index; /* index of capability in program */ -+ ELAN_CAPABILITY cap; /* elan capability */ -+ -+}; -+ -+struct proc_desc { -+ -+ struct proc_desc *next; -+ struct PROC_STRUCT *proc; -+ struct prg_desc *program; /* controlling program */ -+ int mycap; /* index of my capability */ -+ int myctx; /* context number for process */ -+ int flags; -+ int vp; /* elan virtual process number */ -+}; -+ -+struct prg_desc { -+ -+ struct prg_desc *next; -+ int id; /* program id */ -+ int flags; /* program status flags */ -+ uid_t uid; /* user id */ -+ int ncpus; /* number of cpus allocated to program */ -+ int nprocs; /* number of processes in program */ -+ struct proc_desc *pdescs; /* processes in this program */ -+ int ncaps; /* number of capabilities */ -+ struct cap_desc *caps; /* elan capabilities */ -+ char *corepath; /* core path for parallel program */ -+ int psid; /* processor set id */ -+ -+ uint64_t cutime; /* user time accumulated by children */ -+ uint64_t cstime; /* system time accumulated by children */ -+ uint64_t start_time; /* time program created */ -+ uint64_t end_time; /* time last process exited */ -+ uint64_t sched_time; /* last time job was scheduled */ -+ uint64_t accum_atime; /* allocated time last deschedule */ -+ uint64_t memint; /* accumulated memory integral */ -+ uint64_t ebytes; /* data transferred by the Elan(s) */ -+ uint64_t exfers; /* number of Elan data transfers */ -+ long maxrss; /* maximum size to date */ -+ long majflt; -+ -+#ifdef LINUX -+ struct proc_dir_entry *proc_entry; -+#endif -+ -+}; -+ -+#if defined(LINUX) -+static int rms_ptrack_callback (void *arg, int phase, struct task_struct *child); -+#else -+static void rms_xd_callback(void *arg, int phase, void *ctask); -+static void rms_xa_callback (void *arg, int phase, void *ctask); -+#endif -+ -+static void prgsignal(struct prg_desc *program, int signo); -+static uint64_t gettime(void); -+static void freeProgram(struct prg_desc *program); -+ -+static struct prg_desc *programs = 0; -+ -+kmutex_t rms_lock; -+ -+int rms_init(void) -+{ -+ kmutex_init (&rms_lock); -+ -+ DBG(printk("rms: initialising\n")); -+ -+ return(ESUCCESS); -+} -+ -+int rms_reconfigure(void) -+{ -+ return(ESUCCESS); -+} -+ -+int rms_programs_registered(void) -+{ -+ /* -+ ** Called when trying to unload rms.mod will not succeed -+ ** if programs registered -+ */ -+ -+ struct prg_desc *program, **pp; -+ -+ kmutex_lock(&rms_lock); -+ -+ for (program = programs; program; program = program->next) -+ { -+ if (program->nprocs != 0) -+ { -+ kmutex_unlock(&rms_lock); -+ return(EBUSY); -+ } -+ } -+ -+ /* -+ ** We have traversed the programs list and no processes registered -+ ** Now free the memory -+ */ -+ -+ pp = &programs; -+ while ((program = *pp) != NULL) -+ { -+ *pp = program->next; -+ freeProgram(program); -+ } -+ kmutex_unlock(&rms_lock); -+ -+ return(ESUCCESS); -+ -+} -+ -+int rms_fini(void) -+{ -+ /* -+ * don't allow an unload if there are programs registered -+ */ -+ if (rms_programs_registered()) -+ return(EBUSY); -+ -+ kmutex_destroy (&rms_lock); -+ -+ DBG(printk("rms: removed\n")); -+ -+ return(ESUCCESS); -+} -+ -+#ifdef LINUX -+ -+extern struct proc_dir_entry *rms_procfs_programs; -+ -+/* -+ * display one pid per line if there isn't enough space -+ * for another pid then add "...\n" and stop -+ */ -+int pids_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ struct prg_desc *program = (struct prg_desc *)data; -+ struct proc_desc *pdesc; -+ char *ptr = page; -+ int bytes = 0, nb; -+ -+ kmutex_lock(&rms_lock); -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ if (bytes > count - 15) -+ { -+ bytes += sprintf(ptr,"...\n"); -+ break; -+ } -+ nb = sprintf(ptr, "%d %d\n", pdesc->proc->p_pid, pdesc->vp); -+ bytes += nb; -+ ptr += nb; -+ } -+ kmutex_unlock(&rms_lock); -+ -+ return(bytes); -+} -+ -+int status_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ struct prg_desc *program = (struct prg_desc *)data; -+ int bytes; -+ if (program->flags & PRG_KILLED) -+ bytes = sprintf(page, "killed\n"); -+ else -+ bytes = sprintf(page, "running\n"); -+ return(bytes); -+} -+ -+void rms_create_proc_entry(struct prg_desc *program) -+{ -+ struct proc_dir_entry *p; -+ char name[32]; -+ -+ if (rms_procfs_programs) -+ { -+ sprintf(name,"%d", program->id); -+ if ((program->proc_entry = proc_mkdir(name, rms_procfs_programs)) != NULL) -+ { -+ if ((p = create_proc_entry ("pids", S_IRUGO, program->proc_entry)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = program; -+ p->read_proc = pids_callback; -+ } -+ if ((p = create_proc_entry ("status", S_IRUGO, program->proc_entry)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = program; -+ p->read_proc = status_callback; -+ } -+ } -+ } -+} -+ -+void rms_remove_proc_entry(struct prg_desc *program) -+{ -+ char name[32]; -+ if (rms_procfs_programs) -+ { -+ if (program->proc_entry) -+ { -+ remove_proc_entry ("pids", program->proc_entry); -+ remove_proc_entry ("status", program->proc_entry); -+ } -+ sprintf(name,"%d", program->id); -+ remove_proc_entry (name, rms_procfs_programs); -+ } -+} -+ -+#endif -+ -+/* -+ * find a program from its index/pid -+ * -+ * Duncan: make the lookup more efficient for large numbers of programs/processes -+ */ -+static struct prg_desc *findProgram(const int id) -+{ -+ struct prg_desc *program; -+ for (program = programs; program; program = program->next) -+ if (program->id == id) -+ return(program); -+ return(0); -+} -+ -+static struct proc_desc *findProcess(const int pid) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ for (program = programs; program; program = program->next) -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ if (pdesc->proc->p_pid == pid) -+ return(pdesc); -+ return(0); -+} -+ -+static void freeProgram(struct prg_desc *program) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ -+#ifdef LINUX -+ rms_remove_proc_entry(program); -+#endif -+ -+ while ((pdesc = program->pdescs) != NULL) -+ { -+ program->pdescs = pdesc->next; -+ KMEM_FREE(pdesc, sizeof(struct proc_desc)); -+ } -+ -+ while ((cdesc = program->caps) != NULL) -+ { -+ program->caps = cdesc->next; -+ KMEM_FREE(cdesc, sizeof(struct cap_desc)); -+ } -+ -+ if (program->corepath) -+ KMEM_FREE(program->corepath, MAXCOREPATHLEN + 1); -+ -+ KMEM_FREE(program, sizeof(struct prg_desc)); -+ -+#ifdef LINUX -+ MOD_DEC_USE_COUNT; -+#endif -+} -+ -+/* -+ * rms_prgcreate -+ * -+ * create a new program description -+ */ -+int rms_prgcreate(int id, uid_t uid, int cpus) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ -+ DBG(printk("rms_prgcreate :: program %d pid %d uid %d cpus %d\n", id, CURPROC()->p_pid, uid, cpus)); -+ -+ /* -+ * parallel programs are created as root by the rmsd as it forks the loader -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ /* -+ * program ids must be unique -+ */ -+ kmutex_lock(&rms_lock); -+ program = findProgram(id); -+ kmutex_unlock(&rms_lock); -+ if (program) -+ return(EINVAL); -+ -+ /* -+ * create a new program description -+ */ -+ KMEM_ALLOC(program, struct prg_desc *, sizeof(struct prg_desc), TRUE); -+ if (!program) -+ return(ENOMEM); -+ -+ program->id = id; -+ program->flags = PRG_RUNNING; -+ program->ncpus = cpus; -+ program->nprocs = 1; -+ program->uid = uid; -+ program->ncaps = 0; -+ program->caps = 0; -+ program->corepath = 0; -+ program->psid = 0; -+ program->start_time = program->sched_time = gettime(); -+ program->end_time = 0; -+ program->accum_atime = 0; -+ program->cutime = 0; -+ program->cstime = 0; -+ program->maxrss = 0; -+ program->memint = 0; -+ program->majflt = 0; -+ program->ebytes = 0; -+ program->exfers = 0; -+ -+ KMEM_ALLOC(pdesc, struct proc_desc *, sizeof(struct proc_desc), TRUE); -+ if (!pdesc) -+ return(ENOMEM); -+ -+ pdesc->proc = CURPROC(); -+ pdesc->next = 0; -+ pdesc->mycap = ELAN_CAP_UNINITIALISED; -+ pdesc->myctx = ELAN_CAP_UNINITIALISED; -+ pdesc->vp = -1; /* rmsloader */ -+ pdesc->program = program; -+ program->pdescs = pdesc; -+ -+#ifdef LINUX -+ rms_create_proc_entry(program); -+#endif -+ -+ kmutex_lock(&rms_lock); -+ -+#if defined(LINUX) -+ if (ptrack_register (rms_ptrack_callback, NULL) != 0) -+ { -+ kmutex_unlock(&rms_lock); -+ KMEM_FREE(pdesc,sizeof(struct proc_desc)); -+ KMEM_FREE(program,sizeof(struct prg_desc)); -+ return(ENOMEM); -+ } -+#else -+ /* -+ * install a fork handler -+ */ -+ if (HANDLER_REGISTER((void *)(unsigned long)rms_xa_callback, NULL, XA_FORK | XA_EXIT | XA_IOF | XA_KOF | XA_KOE) == NULL) -+ { -+ kmutex_unlock(&rms_lock); -+ KMEM_FREE(pdesc,sizeof(struct proc_desc)); -+ KMEM_FREE(program,sizeof(struct prg_desc)); -+ return(ENOMEM); -+ } -+#endif -+ -+ program->next = programs; -+ programs = program; -+ -+#ifdef LINUX -+ MOD_INC_USE_COUNT; -+#endif -+ -+ kmutex_unlock(&rms_lock); -+ return(ESUCCESS); -+} -+ -+ -+/* -+ * rms_prgdestroy -+ * -+ * destroy a program description -+ */ -+int rms_prgdestroy(int id) -+{ -+ struct prg_desc *program, **pp; -+ int status = ESRCH; -+ -+ /* -+ * parallel programs are created and destroyed by the rmsd -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ kmutex_lock(&rms_lock); -+ -+ pp = &programs; -+ while ((program = *pp) != NULL) -+ { -+ if (program->id == id) -+ { -+ if (program->nprocs == 0) -+ { -+ DBG(printk("rms_prgdestro :: removing program %d\n", program->id)); -+ *pp = program->next; -+ freeProgram(program); -+ status = ESUCCESS; -+ } -+ else -+ { -+ DBG(printk("rms_prgdestro :: failed to remove program %d: %d\n", program->id, program->nprocs)); -+ status = ECHILD; -+ pp = &program->next; -+ } -+ } -+ else -+ pp = &program->next; -+ } -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+/* -+ * rms_prgids -+ */ -+int rms_prgids(int maxids, int *prgids, int *nprgs) -+{ -+ struct prg_desc *program; -+ int count = 0, *buf, *bufp; -+ int status = ESUCCESS; -+ -+ if (maxids < 1) -+ return(EINVAL); -+ -+ kmutex_lock(&rms_lock); -+ -+ for (program = programs; program; program = program->next) -+ count++; -+ count = MIN(count, maxids); -+ -+ if (count > 0) -+ { -+ KMEM_ALLOC(buf, int *, count * sizeof(int), TRUE); -+ if (buf) -+ { -+ for (program = programs, bufp=buf; bufp < buf + count; -+ program = program->next) -+ *bufp++ = program->id; -+ -+ if (copyout(buf, prgids, sizeof(int) * count)) -+ status = EFAULT; -+ -+ KMEM_FREE(buf, count * sizeof(int)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&count, nprgs, sizeof(int))) -+ status = EFAULT; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+/* -+ * rms_prginfo -+ */ -+int rms_prginfo(int id, int maxpids, pid_t *pids, int *nprocs) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ pid_t *pidp, *buf; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (program->nprocs > 0) -+ { -+ KMEM_ALLOC(buf, pid_t *, program->nprocs * sizeof(pid_t), TRUE); -+ if (buf) -+ { -+ for (pidp = buf, pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ *pidp++ = pdesc->proc->p_pid; -+ -+ if (copyout(buf, pids, sizeof(pid_t) * MIN(program->nprocs, maxpids))) -+ status = EFAULT; -+ -+ KMEM_FREE(buf, program->nprocs * sizeof(pid_t)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&program->nprocs, nprocs, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+/* -+ * rmsmod always used to use psignal but this doesn't work -+ * on Linux 2.6.7 so we have changed to kill_proc -+ */ -+static void prgsignal(struct prg_desc *program, int signo) -+{ -+ struct proc_desc *pdesc; -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ kill_proc(pdesc->proc->p_pid, signo, 1); -+} -+ -+ -+int rms_prgsignal(int id, int signo) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ prgsignal(program, signo); -+ if (signo == SIGKILL) -+ program->flags |= PRG_KILLED; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+int rms_prgaddcap(int id, int index, ELAN_CAPABILITY *cap) -+{ -+ struct prg_desc *program; -+ struct cap_desc *cdesc; -+ int status = ESUCCESS; -+ -+ if (cap == NULL) -+ return(EINVAL); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ KMEM_ALLOC(cdesc, struct cap_desc *, sizeof(struct cap_desc), TRUE); -+ if (cdesc) -+ { -+ cdesc->index = index; -+ if (copyin(cap, &cdesc->cap, sizeof(ELAN_CAPABILITY))) -+ { -+ KMEM_FREE(cdesc, sizeof(struct cap_desc)); -+ status = EFAULT; -+ } -+ else -+ { -+ DBG(printk("rms_prgaddcap :: program %d index %d context %d<-->%d\n", -+ program->id, index, cdesc->cap.cap_lowcontext, cdesc->cap.cap_highcontext)); -+ cdesc->next = program->caps; -+ program->caps = cdesc; -+ program->ncaps++; -+ } -+ } -+ else -+ status = ENOMEM; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+static uint64_t gettime(void) -+{ -+ uint64_t now; -+ -+#if defined(SOLARIS) -+ timespec_t tv; -+ gethrestime(&tv); -+ now = tv.tv_sec * 1000 + tv.tv_nsec / 1000000; -+#elif defined(LINUX) -+ struct timeval tv; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) -+ get_fast_time(&tv); -+#else -+ do_gettimeofday(&tv); -+#endif -+ now = tv.tv_sec * 1000 + tv.tv_usec / 1000; -+#elif defined(DIGITAL_UNIX) -+ struct timeval tv; -+ microtime(&tv); -+ now = tv.tv_sec * 1000 + tv.tv_usec / 1000; -+#endif -+ -+ return(now); -+} -+ -+#ifdef DIGITAL_UNIX -+ -+int rms_getrusage(struct proc_desc *pdesc, struct rusage *ru) -+{ -+ task_t task; -+ thread_t thread; -+ -+ if (!pdesc->proc) -+ return(-1); -+ -+ /* -+ * locking required unless called from the current proc -+ */ -+ if (pdesc->proc != CURPROC()) -+ { -+ if (!P_REF(pdesc->proc)) -+ return(-1); -+ -+ task = proc_to_task(pdesc->proc); -+ if (!task) -+ { -+ P_UNREF(pdesc->proc); -+ DBG(printk("rms_getrusage :: process (%d) has no task\n", pdesc->proc->p_pid)); -+ return(-1); -+ } -+ -+ task_reference(task); -+ task_lock(task); -+ -+ if (!queue_empty(&task->thread_list)) -+ thread = (thread_t) queue_first(&task->thread_list); -+ else -+ { -+ task_unlock(task); -+ task_deallocate(task); -+ P_UNREF(pdesc->proc); -+ return(-1); -+ } -+ -+ thread_reference(thread); -+ task_unlock(task); -+ } -+ -+ *ru = proc_to_utask(pdesc->proc)->uu_ru; -+ task_get_rusage(ru, proc_to_task(pdesc->proc)); -+ -+ if (pdesc->proc != CURPROC()) -+ { -+ task_deallocate(task); -+ thread_deallocate(thread); -+ P_UNREF(pdesc->proc); -+ } -+ return(0); -+} -+ -+#endif -+ -+/* -+ * new stats collection interface, 64-bit with addition of Elan stats -+ */ -+int rms_prggetstats(int id, prgstats_t *stats) -+{ -+#ifdef DIGITAL_UNIX -+ long ruixrss, ruidrss, ruisrss, rumaxrss, rumajflt; -+#endif -+ struct prg_desc *program = 0; -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ prgstats_t totals; -+ uint64_t now = gettime(); -+#if defined(SOLARIS) -+ clock_t utime, stime; -+#elif defined(LINUX) -+ uint64_t utime, stime; -+#endif -+ -+ long maxrss; -+ -+ kmutex_lock(&rms_lock); -+ -+ if (id < 0) -+ { -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ program = pdesc->program; -+ } -+ else -+ program = findProgram(id); -+ -+ if (program) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ totals.flags = program->flags; -+ totals.ncpus = program->ncpus; -+ maxrss = 0; -+ -+ if (program->nprocs > 0) -+ totals.etime = now - program->start_time; -+ else -+ totals.etime = program->end_time - program->start_time; -+ -+ totals.atime = program->accum_atime; -+ if (program->flags & PRG_RUNNING) -+ totals.atime += program->ncpus * (now - program->sched_time); -+ -+#if defined(SOLARIS) -+ utime = stime = 0; -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ utime += pdesc->proc->p_utime; -+ stime += pdesc->proc->p_stime; -+ } -+ totals.utime = TICK_TO_MSEC(utime); -+ totals.stime = TICK_TO_MSEC(stime); -+ -+#elif defined(LINUX) -+ utime = stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+#ifdef PROCESS_ACCT -+ DBG(printk("rms_prggetsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, TIMEVAL_TO_CT(&pdesc->proc->utime), -+ TIMEVAL_TO_CT(&pdesc->proc->stime))); -+ utime += TIMEVAL_TO_CT(&pdesc->proc->utime); -+ stime += TIMEVAL_TO_CT(&pdesc->proc->stime); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ DBG(printk("rms_prggetsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->times.tms_utime, -+ pdesc->proc->times.tms_stime)); -+ utime += pdesc->proc->times.tms_utime; -+ stime += pdesc->proc->times.tms_stime; -+#else -+ DBG(printk("rms_prggetsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->utime, pdesc->proc->stime)); -+ utime += pdesc->proc->utime; -+ stime += pdesc->proc->stime; -+#endif -+ -+ totals.pageflts += pdesc->proc->maj_flt; -+ -+ maxrss += PROC_RSS(pdesc->proc) >> (20 - PAGE_SHIFT); -+ } -+ -+ /* convert user and system times to millisecs */ -+ totals.utime = CT_TO_MSEC(utime); -+ totals.stime = CT_TO_MSEC(stime); -+ -+#elif defined(DIGITAL_UNIX) -+ totals.utime = totals.stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ struct rusage ru; -+ if (rms_getrusage(pdesc, &ru) < 0) -+ continue; -+ -+ totals.utime += TIMEVAL_TO_MSEC(&ru.ru_utime); -+ totals.stime += TIMEVAL_TO_MSEC(&ru.ru_stime); -+ -+ /* convert maxrss to megabytes */ -+ rumaxrss = ru.ru_maxrss >> 10; -+ rumajflt = ru.ru_majflt; -+ totals.pageflts += rumajflt; -+ -+ /* -+ * memory intergals are still broken in 5.1 -+ */ -+ -+#ifdef FIXED_MEMINIT -+ -+ /* convert from pages * clock ticks to Mbytes * secs */ -+ ruixrss = (ru.ru_ixrss >> (20 - PAGE_SHIFT)) / hz; -+ ruidrss = (ru.ru_idrss >> (20 - PAGE_SHIFT)) / hz; -+ ruisrss = (ru.ru_isrss >> (20 - PAGE_SHIFT)) / hz; -+ -+ DBG(printk("rms_prggetsta :: process %d mem %d int %d %d %d flt %d\n", pdesc->proc->p_pid, -+ rumaxrss, ruixrss, ruidrss, ruisrss, rumajflt)); -+ -+ totals.memint += ruixrss + ruidrss + ruisrss; -+#else -+ DBG(printk("rms_prggetsta :: process %d mem %d flt %d\n", pdesc->proc->p_pid, rumaxrss, rumajflt)); -+ totals.memint = 0; -+#endif -+ maxrss += rumaxrss; -+ } -+#endif /* DIGITAL_UNIX */ -+ -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+ -+ totals.utime += program->cutime; -+ totals.stime += program->cstime; -+ totals.mem = program->maxrss; -+ totals.ebytes = program->ebytes; -+ totals.exfers = program->exfers; -+ -+ DBG(printk("rms_prggetsta :: program %d mem %d flt %d\n", program->id, totals.mem, totals.pageflts)); -+ -+ if (copyout(&totals, stats, sizeof(prgstats_t))) -+ status = EFAULT; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+/* -+ * preserve the old stats stats collection interface -+ */ -+ -+int rms_prggetoldstats(int id, prgstats_old_t *stats) -+{ -+#ifdef DIGITAL_UNIX -+ long ruixrss, ruidrss, ruisrss, rumaxrss, rumajflt; -+#endif -+ struct prg_desc *program = 0; -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ prgstats_old_t totals; -+ uint64_t now = gettime(); -+#if defined(SOLARIS) || defined(LINUX) -+ clock_t utime, stime; -+#endif -+ long maxrss; -+ -+ kmutex_lock(&rms_lock); -+ -+ if (id < 0) -+ { -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ program = pdesc->program; -+ } -+ else -+ program = findProgram(id); -+ -+ if (program) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ totals.flags = program->flags; -+ totals.ncpus = program->ncpus; -+ maxrss = 0; -+ -+ if (program->nprocs > 0) -+ totals.etime = now - program->start_time; -+ else -+ totals.etime = program->end_time - program->start_time; -+ -+ totals.atime = program->accum_atime; -+ if (program->flags & PRG_RUNNING) -+ totals.atime += program->ncpus * (now - program->sched_time); -+ -+#if defined(SOLARIS) -+ utime = stime = 0; -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ utime += pdesc->proc->p_utime; -+ stime += pdesc->proc->p_stime; -+ } -+ totals.utime = TICK_TO_MSEC(utime); -+ totals.stime = TICK_TO_MSEC(stime); -+ -+#elif defined(LINUX) -+ utime = stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+#ifdef PROCESS_ACCT -+ DBG(printk("rms_getoldsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, TIMEVAL_TO_CT(&pdesc->proc->utime), -+ TIMEVAL_TO_CT(&pdesc->proc->stime))); -+ utime += TIMEVAL_TO_CT(&pdesc->proc->utime); -+ stime += TIMEVAL_TO_CT(&pdesc->proc->stime); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ DBG(printk("rms_getoldsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->times.tms_utime, -+ pdesc->proc->times.tms_stime)); -+ utime += pdesc->proc->times.tms_utime; -+ stime += pdesc->proc->times.tms_stime; -+#else -+ DBG(printk("rms_getoldsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->utime, pdesc->proc->stime)); -+ utime += pdesc->proc->utime; -+ stime += pdesc->proc->stime; -+#endif -+ -+ totals.pageflts += pdesc->proc->maj_flt; -+ maxrss += PROC_RSS(pdesc->proc) >> (20 - PAGE_SHIFT); -+ } -+ -+ /* convert user and system times to millisecs */ -+ totals.utime = CT_TO_MSEC(utime); -+ totals.stime = CT_TO_MSEC(stime); -+ -+#elif defined(DIGITAL_UNIX) -+ totals.utime = totals.stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ struct rusage ru; -+ if (rms_getrusage(pdesc, &ru) < 0) -+ continue; -+ -+ totals.utime += TIMEVAL_TO_MSEC(&ru.ru_utime); -+ totals.stime += TIMEVAL_TO_MSEC(&ru.ru_stime); -+ -+ /* convert maxrss to megabytes */ -+ rumaxrss = ru.ru_maxrss >> 10; -+ rumajflt = ru.ru_majflt; -+ totals.pageflts += rumajflt; -+ -+ /* -+ * memory intergals are still broken in 5.1 -+ */ -+ -+#ifdef FIXED_MEMINIT -+ -+ /* convert from pages * clock ticks to Mbytes * secs */ -+ ruixrss = (ru.ru_ixrss >> (20 - PAGE_SHIFT)) / hz; -+ ruidrss = (ru.ru_idrss >> (20 - PAGE_SHIFT)) / hz; -+ ruisrss = (ru.ru_isrss >> (20 - PAGE_SHIFT)) / hz; -+ -+ DBG(printk("rms_getoldsta :: process %d mem %d int %d %d %d flt %d\n", pdesc->proc->p_pid, -+ rumaxrss, ruixrss, ruidrss, ruisrss, rumajflt)); -+ -+ totals.memint += ruixrss + ruidrss + ruisrss; -+#else -+ DBG(printk("rms_getoldsta :: process %d mem %d flt %d\n", pdesc->proc->p_pid, rumaxrss, rumajflt)); -+ totals.memint = 0; -+#endif -+ maxrss += rumaxrss; -+ } -+#endif /* DIGITAL_UNIX */ -+ -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+ -+ totals.utime += program->cutime; -+ totals.stime += program->cstime; -+ totals.mem = program->maxrss; -+ -+ DBG(printk("rms_getoldsta :: program %d mem %d flt %d\n", program->id, totals.mem, totals.pageflts)); -+ -+ if (copyout(&totals, stats, sizeof(prgstats_old_t))) -+ status = EFAULT; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_prgsuspend(int id) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->flags &= ~PRG_RUNNING; -+ program->flags |= PRG_SUSPEND; -+ program->accum_atime += program->ncpus * (gettime() - program->sched_time); -+ -+ /* suspend/resume just use signals for now */ -+ prgsignal(program, SIGSTOP); -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_prgresume(int id) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->flags &= ~PRG_SUSPEND; -+ program->flags |= PRG_RUNNING; -+ program->sched_time = gettime(); -+ prgsignal(program, SIGCONT); -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_ncaps(int *ncaps) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ if (copyout(&pdesc->program->ncaps, ncaps, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_getprgid(pid_t pid, int *id) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ if (pid == 0) -+ pid = CURPROC()->p_pid; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(pid)) != NULL) -+ { -+ if (copyout(&pdesc->program->id, id, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_setcap(int index, int ctx) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ int status = EINVAL; -+ -+ DBG(printk("rms_setcap :: process %d cap %d ctx %d\n",CURPROC()->p_pid,index,ctx)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ for (cdesc = pdesc->program->caps; cdesc; cdesc = cdesc->next) -+ if (cdesc->index == index && 0 <= ctx && ctx <= (cdesc->cap.cap_highcontext - cdesc->cap.cap_lowcontext + 1)) -+ { -+ pdesc->mycap = index; -+ pdesc->myctx = cdesc->cap.cap_lowcontext + ctx; -+ status = ESUCCESS; -+ } -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_mycap(int *index) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ DBG(printk("rms_mycap :: process %d\n", CURPROC()->p_pid)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ DBG(printk("rms_mycap :: found process %d mycap = %d\n", CURPROC()->p_pid, pdesc->mycap)); -+ if (copyout(&pdesc->mycap, index, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_getcap(int index, ELAN_CAPABILITY *cap) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ for (cdesc = pdesc->program->caps; cdesc; cdesc = cdesc->next) -+ if (cdesc->index == index) -+ break; -+ -+ if (cdesc) -+ { -+ /* tell each process about its own context */ -+ cdesc->cap.cap_mycontext = pdesc->myctx; -+ -+ if (copyout(&cdesc->cap, cap, ELAN_CAP_SIZE(&cdesc->cap))) -+ status = EFAULT; -+ -+ DBG(printk("rms_getcap :: program %d index %d context %d<-->%d\n", pdesc->program->id, -+ cdesc->index, cdesc->cap.cap_lowcontext, cdesc->cap.cap_highcontext)); -+ } -+ else -+ status = EINVAL; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+static int -+rms_fork_callback (struct PROC_STRUCT *curproc, struct PROC_STRUCT *child) -+{ -+ struct prg_desc *program; -+ struct proc_desc *parent; -+ struct proc_desc *pdesc = NULL; -+ -+ kmutex_lock(&rms_lock); -+ -+ DBG(printk("rms_fork_func :: phase is fork pid %d child %d\n", curproc->p_pid, child->p_pid)); -+ -+ /* -+ * find the process that forked -+ */ -+ if ((parent = findProcess(curproc->p_pid)) != NULL) -+ { -+ program = parent->program; -+ -+ DBG(printk("rms_fork_func :: program is %d flags %d\n", program->id, program->flags)); -+ -+ /* -+ * processes can be blocked in fork while prgsignal is in progress -+ * so check to see if the PRG_KILLED flag is set -+ */ -+ if (program->flags & PRG_KILLED) -+ DBG(printk("rms_fork_func :: fork handler called after program killed\n")); -+ else -+ { -+ /* -+ * create a new process description and add to program -+ */ -+ KMEM_ALLOC(pdesc, struct proc_desc *, sizeof(struct proc_desc), TRUE); -+ if (pdesc) -+ { -+ pdesc->next = program->pdescs; -+ program->pdescs = pdesc; -+ pdesc->proc = child; -+ pdesc->mycap = parent->mycap; -+ pdesc->myctx = parent->myctx; -+ pdesc->program = program; -+ pdesc->vp = -1; /* assigned by elaninitdone */ -+ program->nprocs++; -+ } -+ else -+ printk("rms_fork_func :: memory allocation failed\n"); -+ } -+ } -+ else -+ DBG(printk("rms_fork_func :: no program\n")); -+ -+ kmutex_unlock (&rms_lock); -+ -+ return pdesc == NULL; -+} -+ -+static void -+rms_exit_callback (struct PROC_STRUCT *curproc) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc, **pdescp, *p; -+#ifdef DIGITAL_UNIX -+ struct rusage ru; -+#endif -+ long maxrss; -+ -+ kmutex_lock(&rms_lock); -+ -+ DBG(printk("rms_exit_func :: process %d exiting\n", curproc->p_pid)); -+ -+ /* -+ * find the process that exited and accumulate -+ * resource usage in its parent program -+ */ -+ for (program = programs, pdesc = 0; program && !pdesc; program = program->next) -+ { -+ pdescp = &program->pdescs; -+ while ((pdesc = *pdescp) != NULL) -+ { -+ if (pdesc->proc->p_pid == curproc->p_pid) -+ { -+ /* -+ * keep track of the resources used -+ */ -+#if defined(SOLARIS) -+ program->cutime += TICK_TO_MSEC(pdesc->proc->p_utime); -+ program->cstime += TICK_TO_MSEC(pdesc->proc->p_stime); -+ -+#elif defined(LINUX) -+#ifdef PROCESS_ACCT -+ DBG(printk("rms_exit_func :: process %d exit utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, -+ TIMEVAL_TO_CT(&pdesc->proc->utime), -+ TIMEVAL_TO_CT(&pdesc->proc->stime))); -+ program->cutime += TIMEVAL_TO_MSEC(&pdesc->proc->utime); -+ program->cstime += TIMEVAL_TO_MSEC(&pdesc->proc->stime); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ DBG(printk("rms_exit_func :: process %d exit utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->times.tms_utime, -+ pdesc->proc->times.tms_stime)); -+ -+ program->cutime += CT_TO_MSEC(pdesc->proc->times.tms_utime); -+ program->cstime += CT_TO_MSEC(pdesc->proc->times.tms_stime); -+#else -+ DBG(printk("rms_exit_func :: process %d exit utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->utime, pdesc->proc->stime)); -+ -+ program->cutime += CT_TO_MSEC(pdesc->proc->utime); -+ program->cstime += CT_TO_MSEC(pdesc->proc->stime); -+#endif -+ program->majflt += pdesc->proc->maj_flt; -+ maxrss = PROC_RSS(pdesc->proc) >> (20 - PAGE_SHIFT); -+ -+#elif defined(DIGITAL_UNIX) -+ if (rms_getrusage(pdesc, &ru) == 0) -+ { -+ program->cutime += TIMEVAL_TO_MSEC(&ru.ru_utime); -+ program->cstime += TIMEVAL_TO_MSEC(&ru.ru_stime); -+ program->majflt += ru.ru_majflt; -+ -+ /* convert maxrss to megabytes */ -+ maxrss = ru.ru_maxrss >> 10; -+ } -+#endif -+ -+ /* -+ * shared memory segment cleanup -+ */ -+#if defined(DIGITAL_UNIX) -+ rms_shmcleanup(-1); -+#elif defined(LINUX) -+ shm_cleanup(); -+#endif -+ -+ /* -+ * remove process from program -+ */ -+ *pdescp = pdesc->next; -+ KMEM_FREE(pdesc, sizeof(struct proc_desc)); -+ program->nprocs--; -+ -+ /* -+ * update the memory high water mark for the program -+ */ -+ for (p = program->pdescs; p; p = p->next) -+ { -+#if defined(DIGITAL_UNIX) -+ if (rms_getrusage(p, &ru) < 0) -+ continue; -+ -+ /* convert maxrss to megabytes */ -+ maxrss += ru.ru_maxrss >> 10; -+ -+#elif defined(LINUX) -+ maxrss += PROC_RSS(p->proc) >> (20 - PAGE_SHIFT); -+#endif -+ } -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+ -+ DBG(printk("rms_exit_func :: program %d procs %d mem %ld\n", program->id, program->nprocs, program->maxrss)); -+ -+ /* -+ * final update to the program if this is the last process -+ */ -+ if (program->nprocs == 0) -+ { -+ program->end_time = gettime(); -+ program->flags &= ~PRG_RUNNING; -+ program->accum_atime += program->ncpus * (program->end_time - program->sched_time); -+ DBG(printk("rms_exit_func :: last process has gone\n")); -+ } -+ break; -+ } -+ else -+ pdescp = &pdesc->next; -+ } -+ } -+ kmutex_unlock (&rms_lock); -+} -+ -+#if defined(LINUX) -+static int -+rms_ptrack_callback (void *arg, int phase, struct task_struct *child) -+{ -+ switch (phase) -+ { -+ case PTRACK_PHASE_CLONE: -+ if (rms_fork_callback (current, child)) -+ return PTRACK_DENIED; -+ else -+ return PTRACK_INNHERIT; -+ -+ case PTRACK_PHASE_CLONE_FAIL: -+ DBG(printk("rms_fork_func :: fork failed pid %d child %d\n", current->p_pid, child->p_pid)); -+ rms_exit_callback(child); -+ break; -+ -+ case PTRACK_PHASE_EXIT: -+ rms_exit_callback(current); -+ break; -+ } -+ return PTRACK_FINISHED; -+} -+ -+#else -+ -+static void -+rms_xa_callback (void *arg, int phase, void *ctask) -+{ -+ switch (phase) -+ { -+ case XA_FORK: -+ if (rms_fork_callback (CURPROC(), (struct PROC_STRUCT *)task_to_proc(ctask))) -+ psignal(task_to_proc(ctask), SIGKILL); -+ break; -+ case XA_EXIT: -+ rms_exit_callback (CURPROC()); -+ break; -+ } -+} -+ -+#endif -+ -+#ifdef DIGITAL_UNIX -+ -+/* -+ * NB: These functions will only work on steelos. -+ */ -+ -+/* -+ * rms_setcorepath -+ * -+ * set a path at which to dump core if the task aborts -+ * -+ * enhanced core file names must be enabled for this to work -+ */ -+int rms_setcorepath(char *corepath) -+{ -+ int length; -+ char *path; -+ int status; -+ struct proc_desc *pdesc; -+ -+ /* -+ * access restricted - we don't want users moving -+ * their corepath and generating a huge I/O load -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ if (!(pdesc = findProcess(CURPROC()->p_pid))) -+ return(ESRCH); -+ -+ if (pdesc->program->corepath) -+ return(EEXIST); -+ -+ KMEM_ALLOC(path, char *, MAXCOREPATHLEN + 1, TRUE); -+ if (path == 0) -+ return(ENOMEM); -+ -+ if (copyinstr(corepath, path, MAXCOREPATHLEN, &length)) -+ return(EFAULT); -+ -+ path[length] = 0; -+ status = add_corepath(path); -+ -+ DBG(printk("rms_setcorepa :: id %d corepath %s status %d\n", pdesc->program->id, path, status)); -+ -+ if (status == ESUCCESS) -+ pdesc->program->corepath = path; -+ else -+ KMEM_FREE(path, MAXCOREPATHLEN + 1); -+ -+ return(status); -+} -+ -+static int find_corepath(pid_t pid, char *path, int len) -+{ -+ struct proc *procp; -+ struct utask *utask; -+ int status = ESUCCESS; -+ -+ procp = pfind(pid); -+ if (procp == NULL) -+ return(ENOENT); -+ -+ utask = proc_to_utask(procp); -+ -+ if (utask->uu_coredir) -+ bcopy(utask->uu_coredir,path,len); -+ else -+ status = ENOENT; -+ -+ /* pfind takes out a reference */ -+ P_UNREF(procp); -+ -+ return(status); -+} -+ -+int rms_getcorepath(pid_t pid, char *corepath, int maxlen) -+{ -+ char src[MAXCOREPATHLEN]; -+ int len; -+ int status; -+ -+ if (maxlen < 2) -+ return(EINVAL); -+ -+ len = MIN(maxlen, MAXCOREPATHLEN); -+ -+ status = find_corepath(pid, src, len); -+ -+ if (status == ESUCCESS) -+ len = strlen(src)+1; -+ else if (status == ENOENT) -+ { -+ len = 2; -+ src[0] = '.'; -+ src[1] = '\0'; -+ status = ESUCCESS; -+ } -+ -+ if (copyout(src, corepath, len)) -+ return(EFAULT); -+ -+ return(status); -+} -+ -+#endif -+ -+/* -+ * rms_elaninitdone - mark a process as having successfully completed elan initialisation -+ */ -+int rms_elaninitdone(int vp) -+{ -+ int status = ESUCCESS; -+ struct proc_desc *pdesc; -+ -+ DBG(printk("rms_elaninit :: process %d vp %d\n", CURPROC()->p_pid, vp)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ pdesc->vp = vp; -+ else -+ status = ESRCH; -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+/* -+ * rms_prgelanpids - return the ids of processes that have completed elan initialisation -+ */ -+int rms_prgelanpids(int id, int maxpids, int *vps, pid_t *pids, int *npids) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ pid_t *pidbuf; -+ int status = ESUCCESS, count = 0, *vpbuf; -+ -+ DBG(printk("rms_elanpids :: process %d id %d\n", CURPROC()->p_pid, id)); -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (program->nprocs > 0) -+ { -+ KMEM_ALLOC(pidbuf, pid_t *, program->nprocs * sizeof(pid_t), TRUE); -+ KMEM_ALLOC(vpbuf, int *, program->nprocs * sizeof(int), TRUE); -+ if (pidbuf && vpbuf) -+ { -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ if (pdesc->vp >= 0) -+ { -+ pidbuf[count] = pdesc->proc->p_pid; -+ vpbuf[count] = pdesc->vp; -+ count++; -+ } -+ -+ if (count > 0 && (copyout(pidbuf, pids, sizeof(pid_t) * MIN(count, maxpids)) || -+ copyout(vpbuf, vps, sizeof(int) * MIN(count, maxpids)))) -+ status = EFAULT; -+ -+ KMEM_FREE(pidbuf, program->nprocs * sizeof(pid_t)); -+ KMEM_FREE(vpbuf, program->nprocs * sizeof(int)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&count, npids, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+ -+} -+ -+int rms_setpset(int psid) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ if (CURUID()) -+ return(EACCES); -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ program = pdesc->program; -+ program->psid = psid; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_getpset(int id, int *psid) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (copyout(&program->psid, psid, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int -+rms_setelanstats(int id, uint64_t ebytes, uint64_t exfers) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ DBG(printk("rms_setelanst :: process %d id %d\n", CURPROC()->p_pid, id)); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->ebytes = ebytes; -+ program->exfers = exfers; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+rms_modversion() -+{ -+ return(RMS_MODVERSION); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -+ -+ -+ -+ -+ -+ -Index: linux-2.4.21/drivers/net/qsnet/rms/rms_kern_Linux.c -=================================================================== ---- linux-2.4.21.orig/drivers/net/qsnet/rms/rms_kern_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/drivers/net/qsnet/rms/rms_kern_Linux.c 2005-06-01 23:12:54.703423296 -0400 -@@ -0,0 +1,430 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: rms_kern_Linux.c,v 1.20 2004/05/14 08:55:57 duncan Exp $" -+/* $Source: /cvs/master/quadrics/rmsmod/rms_kern_Linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd"); -+MODULE_DESCRIPTION("RMS support module"); -+MODULE_LICENSE("GPL"); -+ -+int rms_debug = 0; -+ -+ctl_table rms_table[] = { -+ { -+ .ctl_name = 1, -+ .procname = "rms_debug", -+ .data = &rms_debug, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .child = NULL, -+ .proc_handler = &proc_dointvec, -+ }, -+ {0} -+}; -+ -+ctl_table rms_root_table[] = { -+ { -+ .ctl_name = CTL_DEBUG, -+ .procname = "rms", -+ .data = NULL, -+ .maxlen = 0, -+ .mode = 0555, -+ .child = rms_table, -+ }, -+ {0} -+}; -+ -+static struct ctl_table_header *rms_sysctl_header; -+ -+static int rms_open (struct inode *ino, struct file *fp); -+static int rms_release (struct inode *ino, struct file *fp); -+static int rms_ioctl (struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+rms_ioctl32_cmds[] = -+{ -+ RMSIO_GETPRGID32, -+ RMSIO_GETCAP32 -+}; -+ -+static int rms_ioctl32 (unsigned int fd, unsigned int cmd, -+ unsigned long arg, struct file *file); -+#endif -+ -+static struct file_operations rms_fops = -+{ -+ .owner = THIS_MODULE, -+ .ioctl = rms_ioctl, -+ .open = rms_open, -+ .release = rms_release, -+}; -+ -+struct proc_dir_entry *rms_procfs_programs; -+static struct proc_dir_entry *rms_procfs_root; -+ -+int version_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ return(sprintf(page, "$Id: rms_kern_Linux.c,v 1.20 2004/05/14 08:55:57 duncan Exp $\n")); -+} -+ -+static int __init rms_start(void) -+{ -+ struct proc_dir_entry *p; -+ int res; -+ -+ if ((rms_sysctl_header = register_sysctl_table(rms_root_table, 1)) == 0) -+ { -+ printk ("rmsmod: failed to register sysctl table\n"); -+ return (-ENXIO); -+ } -+ -+ if ((rms_procfs_root = proc_mkdir("rms", NULL)) == NULL || -+ (rms_procfs_programs = proc_mkdir("programs", rms_procfs_root)) == NULL || -+ (p = create_proc_entry ("control", S_IRUGO, rms_procfs_root)) == NULL) -+ { -+ unregister_sysctl_table (rms_sysctl_header); -+ printk ("rmsmod: failed to register /proc/rms\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &rms_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ -+ if ((p = create_proc_entry ("version", S_IRUGO, rms_procfs_root)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->read_proc = version_callback; -+ } -+ -+ if ((res = rms_init()) != ESUCCESS) -+ { -+ remove_proc_entry ("programs", rms_procfs_root); -+ remove_proc_entry ("control", rms_procfs_root); -+ remove_proc_entry ("rms", NULL); -+ unregister_sysctl_table (rms_sysctl_header); -+ return (-res); -+ } -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); -+ register int i; -+ for (i = 0; i < sizeof (rms_ioctl32_cmds)/sizeof(rms_ioctl32_cmds[0]); i++) -+ register_ioctl32_conversion (rms_ioctl32_cmds[i], rms_ioctl32); -+ } -+ unlock_kernel(); -+#endif -+ return (0); -+} -+ -+static void __exit rms_exit(void) -+{ -+ rms_fini(); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern void unregister_ioctl32_conversion(unsigned int cmd); -+ register int i; -+ -+ for (i = 0; i < sizeof (rms_ioctl32_cmds)/sizeof(rms_ioctl32_cmds[0]); i++) -+ unregister_ioctl32_conversion (rms_ioctl32_cmds[i]); -+ } -+ unlock_kernel(); -+#endif -+ -+ remove_proc_entry ("version", rms_procfs_root); -+ remove_proc_entry ("programs", rms_procfs_root); -+ remove_proc_entry ("control", rms_procfs_root); -+ remove_proc_entry ("rms", NULL); -+ unregister_sysctl_table(rms_sysctl_header); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(rms_start); -+module_exit(rms_exit); -+ -+static int -+rms_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ -+ return (0); -+} -+ -+static int -+rms_release (struct inode *inode, struct file *fp) -+{ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+rms_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int res; -+ -+ switch (cmd) -+ { -+/* no corepath support in Linux yet */ -+#if 0 -+ case RMSIO_SETCOREPATH: -+ res = rms_setcorepath((caddr_t)arg); -+ break; -+ -+ case RMSIO_GETCOREPATH: -+ { -+ RMSIO_GETCOREPATH_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcorepath(args.pid, args.corepath, args.maxlen); -+ break; -+ } -+#endif -+ -+ case RMSIO_PRGCREATE: -+ { -+ RMSIO_PRGCREATE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgcreate(args.id, args.uid, args.cpus); -+ break; -+ } -+ -+ case RMSIO_PRGDESTROY: -+ res = rms_prgdestroy(arg); -+ break; -+ -+ case RMSIO_PRGIDS: -+ { -+ RMSIO_PRGIDS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgids(args.maxids, args.prgids, args.nprgs); -+ break; -+ } -+ -+ case RMSIO_PRGINFO: -+ { -+ RMSIO_PRGINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prginfo(args.id, args.maxpids, args.pids, args.nprocs); -+ break; -+ } -+ -+ case RMSIO_PRGSIGNAL: -+ { -+ RMSIO_PRGSIGNAL_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgsignal(args.id, args.signo); -+ break; -+ } -+ -+ case RMSIO_PRGADDCAP: -+ { -+ RMSIO_PRGADDCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgaddcap(args.id, args.index, args.cap); -+ break; -+ } -+ -+ case RMSIO_SETCAP: -+ { -+ RMSIO_SETCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_setcap(args.index, args.ctx); -+ break; -+ } -+ -+ case RMSIO_NCAPS: -+ res = rms_ncaps((int *)arg); -+ break; -+ -+ case RMSIO_GETPRGID: -+ { -+ RMSIO_GETPRGID_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getprgid(args.pid, args.id); -+ break; -+ } -+ -+ case RMSIO_GETMYCAP: -+ res = rms_mycap((int *)arg); -+ break; -+ -+ case RMSIO_GETCAP: -+ { -+ RMSIO_GETCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcap(args.index, args.cap); -+ break; -+ } -+ -+ case RMSIO_PRGGETSTATS: -+ { -+ RMSIO_PRGGETSTATS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prggetoldstats(args.id, args.stats); -+ break; -+ } -+ -+ case RMSIO_PRGGETSTATS2: -+ { -+ RMSIO_PRGGETSTATS2_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prggetstats(args.id, args.stats); -+ break; -+ } -+ -+ case RMSIO_PRGSUSPEND: -+ res = rms_prgsuspend(arg); -+ break; -+ -+ case RMSIO_PRGRESUME: -+ res = rms_prgresume(arg); -+ break; -+ -+ case RMSIO_ELANINITDONE: -+ res = rms_elaninitdone(arg); -+ break; -+ -+ case RMSIO_PRGELANPIDS: -+ { -+ RMSIO_PRGELANPIDS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgelanpids(args.id, args.maxpids, args.vps, args.pids, args.npids); -+ break; -+ } -+ -+ case RMSIO_SETELANSTATS: -+ { -+ RMSIO_SETELANSTATS_STRUCT args; -+ elanstats_t estats; -+ -+ if (copy_from_user(&args, (void *)arg, sizeof(args)) || -+ copy_from_user(&estats, (void *)args.estats, sizeof(estats))) -+ return(-EFAULT); -+ -+ res = rms_setelanstats(args.id, estats.ebytes, estats.exfers); -+ break; -+ } -+ -+ case RMSIO_MODVERSION: -+ { -+ RMSIO_MODVERSION_STRUCT args; -+ int version = rms_modversion(); -+ -+ if (copy_from_user (&args, (void *)arg, sizeof (args))) -+ return (-EFAULT); -+ -+ if (copyout(&version, args.version, sizeof(int))) -+ res = EFAULT; -+ else -+ res = ESUCCESS; -+ -+ break; -+ } -+ -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+rms_ioctl32 (unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file) -+{ -+ int res; -+ -+ switch (cmd) -+ { -+ case RMSIO_GETPRGID32: -+ { -+ RMSIO_GETPRGID_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getprgid(args.pid, (int *)(unsigned long) args.idptr); -+ break; -+ } -+ -+ case RMSIO_GETCAP32: -+ { -+ RMSIO_GETCAP_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcap(args.index, (ELAN_CAPABILITY *)(unsigned long) args.capptr); -+ break; -+ } -+ -+ default: -+ return (sys_ioctl (fd, cmd, arg)); -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/fs/exec.c -=================================================================== ---- linux-2.4.21.orig/fs/exec.c 2005-06-01 22:58:09.044063984 -0400 -+++ linux-2.4.21/fs/exec.c 2005-06-01 23:12:54.704423144 -0400 -@@ -51,6 +51,7 @@ - #ifdef CONFIG_KMOD - #include - #endif -+#include - - int core_uses_pid; - char core_pattern[65] = "core"; -@@ -1125,6 +1126,10 @@ - if (retval < 0) - goto out; - -+ -+ /* Notify any ptrack callbacks of the process exec */ -+ ptrack_call_callbacks (PTRACK_PHASE_EXEC, NULL); -+ - retval = search_binary_handler(&bprm,regs); - if (retval >= 0) - /* execve success */ -Index: linux-2.4.21/include/elan/bitmap.h -=================================================================== ---- linux-2.4.21.orig/include/elan/bitmap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/bitmap.h 2005-06-01 23:12:54.704423144 -0400 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_BITMAP_H -+#define __QSNET_BITMAP_H -+ -+#ident "$Id: bitmap.h,v 1.5 2004/01/20 17:32:15 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/bitmap.h,v $ */ -+ -+typedef unsigned int bitmap_t; -+ -+#define BT_NBIPUL 32 /* n bits per bitmap_t */ -+#define BT_ULSHIFT 5 /* log 2 BT_NBIPUL to extract word index */ -+#define BT_ULMASK 0x1f /* to extract bit index */ -+ -+#define BT_WIM(bitmap,bitindex) ((bitmap)[(bitindex) >> BT_ULSHIFT]) /* word in map */ -+#define BT_BIW(bitindex) (1 << ((bitindex) & BT_ULMASK)) /* bit in word */ -+ -+/* BT_BITOUL -- n bits to n words */ -+#define BT_BITOUL(nbits) (((nbits) + BT_NBIPUL -1) / BT_NBIPUL) -+ -+#define BT_TEST(bitmap,bitindex) ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0) -+#define BT_SET(bitmap,bitindex) do { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); } while (0) -+#define BT_CLEAR(bitmap,bitindex) do { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); } while (0) -+ -+/* return first free bit in the bitmap, or -1 for failure */ -+extern int bt_freebit (bitmap_t *bitmap, int nbits); -+ -+/* return the index of the lowest set bit in the bitmap or -1 for failure */ -+extern int bt_lowbit (bitmap_t *bitmap, int nbits); -+ -+/* return the index of the next set/clear bit in the bitmap or -1 for failure */ -+extern int bt_nextbit (bitmap_t *bitmap, int nbits, int last, int isset); -+ -+/* copy/zero/fill/compare a bit map */ -+extern void bt_copy (bitmap_t *a, bitmap_t *b, int nbits); -+extern void bt_zero (bitmap_t *a, int nbits); -+extern void bt_fill (bitmap_t *a, int nbits); -+extern int bt_cmp (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* intersect bitmap 'a' with bitmap 'b' and return in 'a' */ -+extern void bt_intersect (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* remove/add bitmap 'b' from bitmap 'a' */ -+extern void bt_remove (bitmap_t *a, bitmap_t *b, int nbits); -+extern void bt_add (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* check whether bitmap 'a' spans bitmap 'b' */ -+extern int bt_spans (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* copy [base,base+nbits-1] from 'a' to 'b' */ -+extern void bt_subset (bitmap_t *a, bitmap_t *b, int base, int nbits); -+ -+/* find bits clear in 'a' and set in 'b', put result in 'c' */ -+extern void bt_up (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits); -+ -+/* find bits set in 'a' and clear in 'b', put result in 'c' */ -+extern void bt_down (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits); -+ -+/* return number of bits set in bitmap */ -+extern int bt_nbits (bitmap_t *a, int nbits); -+ -+ -+#endif /* __QSNET_BITMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/capability.h -=================================================================== ---- linux-2.4.21.orig/include/elan/capability.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/capability.h 2005-06-01 23:12:54.705422992 -0400 -@@ -0,0 +1,197 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability.h,v 1.16 2004/07/20 10:15:33 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/capability.h,v $*/ -+ -+#ifndef __ELAN_CAPABILITY_H -+#define __ELAN_CAPABILITY_H -+ -+#include -+ -+/* Maximum number of rails */ -+#define ELAN_MAX_RAILS (31) -+/* Maximum number of virtual processes we support */ -+#define ELAN_MAX_VPS (16384) -+ -+/* Number of words in a bitmap capability */ -+#define ELAN_BITMAPSIZE BT_BITOUL(ELAN_MAX_VPS) -+ -+/* Guaranteed invalid values */ -+#define ELAN_INVALID_PROCESS (0x7fffffff) /* A GUARANTEED invalid process # */ -+#define ELAN_INVALID_NODE (0xFFFF) -+#define ELAN_INVALID_CONTEXT (0xFFFF) -+ -+/* Number of values in a user key */ -+#define ELAN_USERKEY_ENTRIES 4 -+ -+typedef void * ELAN_CAP_OWNER; -+ -+/* -+ * When used in userspace this is relative to the base of -+ * the capabality but is an absolute location for kernel space. -+ */ -+typedef struct elan_location -+{ -+ unsigned short loc_node; -+ unsigned short loc_context; -+} ELAN_LOCATION; -+ -+typedef struct elan_userkey -+{ -+ unsigned key_values[ELAN_USERKEY_ENTRIES]; -+} ELAN_USERKEY; -+ -+typedef struct elan_capability -+{ -+ ELAN_USERKEY cap_userkey; /* User defined protection */ -+ -+ int cap_version; /* Version number */ -+ unsigned short cap_type; /* Capability Type */ -+ unsigned short cap_spare; /* spare was cap_elan_type */ -+ -+ int cap_lowcontext; /* low context number in block */ -+ int cap_highcontext; /* high context number in block */ -+ int cap_mycontext; /* my context number */ -+ -+ int cap_lownode; /* low elan id of group */ -+ int cap_highnode; /* high elan id of group */ -+ -+ unsigned int cap_railmask; /* which rails this capability is valid for */ -+ -+ bitmap_t cap_bitmap[ELAN_BITMAPSIZE]; /* Bitmap of process to processor translation */ -+} ELAN_CAPABILITY; -+ -+#define ELAN_CAP_UNINITIALISED (-1) -+ -+#define ELAN_CAP_VERSION_NUMBER (0x00010002) -+ -+#define ELAN_CAP_NUM_NODES(cap) ((cap)->cap_highnode - (cap)->cap_lownode + 1) -+#define ELAN_CAP_NUM_CONTEXTS(cap) ((cap)->cap_highcontext - (cap)->cap_lowcontext + 1) -+ -+/* using or defining our own MIN/MAX had confilicts with dunix so we define ELAN_ ones */ -+#define ELAN_MIN(a,b) ((a) > (b) ? (b) : (a)) -+#define ELAN_MAX(a,b) ((a) > (b) ? (a) : (b)) -+#define ELAN_CAP_BITMAPSIZE(cap) (ELAN_MAX (ELAN_MIN (ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap), ELAN_MAX_VPS), 0)) -+ -+#define ELAN_CAP_SIZE(cap) (offsetof (ELAN_CAPABILITY, cap_bitmap[BT_BITOUL(ELAN_CAP_BITMAPSIZE(cap))])) -+#define ELAN_CAP_ENTRIES(cap) (((cap)->cap_type & ELAN_CAP_TYPE_NO_BITMAP) ? ELAN_CAP_BITMAPSIZE((cap)) : bt_nbits((cap)->cap_bitmap, ELAN_CAP_BITMAPSIZE((cap)))) -+ -+#define ELAN_CAP_IS_RAIL_SET(cap,rail) ((cap)->cap_railmask & (1<cap_userkey.key_values[0] == (cap2)->cap_userkey.key_values[0] && \ -+ (cap1)->cap_userkey.key_values[1] == (cap2)->cap_userkey.key_values[1] && \ -+ (cap1)->cap_userkey.key_values[2] == (cap2)->cap_userkey.key_values[2] && \ -+ (cap1)->cap_userkey.key_values[3] == (cap2)->cap_userkey.key_values[3]) -+ -+#define ELAN_CAP_TYPE_MATCH(cap1,cap2) ((cap1)->cap_version == (cap2)->cap_version && \ -+ (cap1)->cap_type == (cap2)->cap_type) -+ -+#define ELAN_CAP_GEOM_MATCH(cap1,cap2) ((cap1)->cap_lowcontext == (cap2)->cap_lowcontext && \ -+ (cap1)->cap_highcontext == (cap2)->cap_highcontext && \ -+ (cap1)->cap_lownode == (cap2)->cap_lownode && \ -+ (cap1)->cap_highnode == (cap2)->cap_highnode && \ -+ (cap1)->cap_railmask == (cap2)->cap_railmask && \ -+ !bcmp (&(cap1)->cap_bitmap[0], &(cap2)->cap_bitmap[0], \ -+ BT_BITOUL(ELAN_CAP_BITMAPSIZE(cap1)*sizeof(bitmap_t)))) -+ -+#define ELAN_CAP_MATCH(cap1,cap2) (ELAN_CAP_KEY_MATCH (cap1, cap2) && \ -+ ELAN_CAP_TYPE_MATCH (cap1, cap2) && \ -+ ELAN_CAP_GEOM_MATCH (cap1, cap2)) -+ -+#define ELAN_CAP_VALID_MYCONTEXT(cap) ( ((cap)->cap_lowcontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_mycontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_highcontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_lowcontext <= (cap)->cap_mycontext) \ -+ && ((cap)->cap_mycontext <= (cap)->cap_highcontext)) -+ -+/* -+ * Definitions for type -+ */ -+#define ELAN_CAP_TYPE_BLOCK 1 /* Block distribution */ -+#define ELAN_CAP_TYPE_CYCLIC 2 /* Cyclic distribution */ -+#define ELAN_CAP_TYPE_KERNEL 3 /* Kernel capability */ -+ -+#define ELAN_CAP_TYPE_MASK (0xFFF) /* Mask for type */ -+ -+/* OR these bits in for extra features */ -+#define ELAN_CAP_TYPE_HWTEST (1 << 12) /* Hardware test capability type */ -+#define ELAN_CAP_TYPE_MULTI_RAIL (1 << 13) /* "new" multi rail capability */ -+#define ELAN_CAP_TYPE_NO_BITMAP (1 << 14) /* don't use bit map */ -+#define ELAN_CAP_TYPE_BROADCASTABLE (1 << 15) /* broadcastable */ -+ -+ -+extern void elan_nullcap (ELAN_CAPABILITY *cap); -+extern char *elan_capability_string (ELAN_CAPABILITY *cap, char *str); -+extern ELAN_LOCATION elan_vp2location (unsigned process, ELAN_CAPABILITY *cap); -+extern int elan_location2vp (ELAN_LOCATION location, ELAN_CAPABILITY *cap); -+extern int elan_nvps (ELAN_CAPABILITY *cap); -+extern int elan_nlocal (int node, ELAN_CAPABILITY *cap); -+extern int elan_maxlocal (ELAN_CAPABILITY *cap); -+extern int elan_localvps (int node, ELAN_CAPABILITY *cap, int *vps, int size); -+extern int elan_nrails (ELAN_CAPABILITY *cap); -+extern int elan_rails (ELAN_CAPABILITY *cap, int *rails); -+extern int elan_cap_overlap (ELAN_CAPABILITY *cap1, ELAN_CAPABILITY *cap2); -+ -+/* -+ * capability creation/access fns provide for running -+ * new libelan code on old OS releases -+ */ -+extern int elan_lowcontext(ELAN_CAPABILITY *cap); -+extern int elan_mycontext(ELAN_CAPABILITY *cap); -+extern int elan_highcontext(ELAN_CAPABILITY *cap); -+extern int elan_lownode(ELAN_CAPABILITY *cap); -+extern int elan_highnode(ELAN_CAPABILITY *cap); -+extern int elan_captype(ELAN_CAPABILITY *cap); -+extern int elan_railmask(ELAN_CAPABILITY *cap); -+ -+extern int elan_getenvCap (ELAN_CAPABILITY *cap, int index); -+extern ELAN_CAPABILITY *elan_createCapability(void); -+extern ELAN_CAPABILITY *elan_copyCapability(ELAN_CAPABILITY *from, int ctxShift); -+extern int elan_generateCapability(char *string); -+ -+typedef struct elan_cap_struct -+{ -+ ELAN_CAP_OWNER owner; -+ ELAN_CAPABILITY cap; -+ -+ unsigned int attached; /* count of people attached */ -+ unsigned int active; /* ie not being destroyed */ -+} ELAN_CAP_STRUCT; -+ -+#if ! defined(__KERNEL__) -+extern void elan_get_random_key(ELAN_USERKEY *key); -+extern int elan_prefrails(ELAN_CAPABILITY *cap, int *pref, int nvp); -+#endif -+ -+#if defined(__KERNEL__) -+/* capability.c */ -+extern int elan_validate_cap (ELAN_CAPABILITY *cap); -+extern int elan_validate_map (ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+extern int elan_create_cap (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+extern int elan_destroy_cap (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+extern int elan_create_vp (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+extern int elan_destroy_vp (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+typedef void (*ELAN_DESTROY_CB)(void *args, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+extern int elan_attach_cap (ELAN_CAPABILITY *cap, unsigned int rail, void *args, ELAN_DESTROY_CB callback); -+extern int elan_detach_cap (ELAN_CAPABILITY *cap, unsigned int rail); -+ -+extern int elan_get_caps (uint *number_of_results, uint array_size, ELAN_CAP_STRUCT *caps); -+extern int elan_cap_dump (void); -+#endif /* __KERNEL__ */ -+ -+ -+#endif /* __ELAN_CAPABILITY_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/cm.h -=================================================================== ---- linux-2.4.21.orig/include/elan/cm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/cm.h 2005-06-01 23:12:54.706422840 -0400 -@@ -0,0 +1,412 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_CM_H -+#define __ELAN_CM_H -+ -+#ident "@(#)$Id: cm.h,v 1.14.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.h,v $*/ -+ -+#include -+ -+#if defined(DIGITAL_UNIX) -+/* -+ * On Tru64 - SMP doesn't mean Symmetric - cpu 0 is a master cpu and is responsible -+ * for handling all PCI interrupts and "funneled" operations. When a kernel thread -+ * is made runnable, the scheduler will choose which cpu it will run on at that time, -+ * and will only execute a higher priority thread from another cpu's run queue when -+ * it becomes totally idle (apparently also including user processes). Also the -+ * assert_wait_mesg_timo function uses a per-cpu timeout - these can only get executed -+ * at "preemptable" places - so again have no guarantee on when they will execute if -+ * they happen to be queued on a "hogged" cpu. The combination of these mean that the Tru64 -+ * is incapable of scheduling a high priority kernel thread within a deterministic time -+ * of when it should have become runnable - wonderfull. -+ * -+ * Hence the solution Compaq have proposed it to schedule a timeout onto all of the -+ * cpu's timeouts lists at the maximum frequency that we could want to execute code, -+ * then to handle the scheduling of work between these ourselves. With a bit of luck -+ * ..... at least one cpu will be sufficiently unloaded to allow us to get a chance -+ * to do our important work. -+ * -+ * However ..... this still is not reliable, since timeouts under Tru64 are still -+ * only run when the currently running kernel thread "co-operates" by calling one -+ * of a number of functions which is permitted to run the "lwc"s AND is not holding -+ * any spinlocks AND is running ai IPL 0. However Compaq are unable to provide -+ * any upper limit on the time between the "lwc"'s being run and so it is possible -+ * for all 4 cpus to not run them for an unbounded time. -+ * -+ * The solution proposed is to use the RM_TEMP_BACKDOOR hook which was added to -+ * hardclock() to "solve" this problem for Memory Channel. However, since it -+ * is called within the clock interrupt it is not permissible to aquire any -+ * spinlocks, nor to run for "too long". This means that it is not possible to -+ * call the heartbeat algorithm from this hook. -+ * -+ * Our solution to these limitations is to use the hook to cause an elan interrupt -+ * to be delivered, by issueing a mis-aligned SetEvent command - this causes the device -+ * to trap and ep_cprocTrap() can then run the heartbeat code. However there is a lock -+ * order violation between the elan_dev::IntrLock and ep_dev::Lock, so we have to -+ * use a trylock and if we fail, then hope that when the interrupt is delievered again -+ * some time later we will succeed. -+ * -+ * However this only works if the kernel is able to respond to the Elan interrupt, -+ * so we panic inside the RM_TEMP_BACKDOOR hook if the SetEvent's interrupt has -+ * not been taken for more than an CM_TIMER_SCHEDULE_TIMEOUT interval. -+ * -+ * In fact this is exactly the mechanism that other operating systems use to -+ * execute timeouts, since the hardclock interrupt posts a low priority -+ * "soft interrupt" which "pre-eempts" the currently running thread and then -+ * executes the timeouts.To block timeouts you use splsoftclock() the same as -+ * in Tru64. -+ */ -+#define PER_CPU_TIMEOUT TRUE -+#endif -+ -+ -+#define CM_SGMTS_PER_LEVEL 8 /* maximum nodes in each segment */ -+#define CM_MAX_LEVELS 6 /* maximum depth of tree */ -+ -+/* message buffers/dmas/events etc */ -+#define CM_NUM_NODE_MSG_BUFFERS (CM_MAX_LEVELS * CM_SGMTS_PER_LEVEL) /* subordinates and leader */ -+#define CM_NUM_SPARE_MSG_BUFFERS 8 /* spare msg buffers for non-connected nodes */ -+#define CM_NUM_MSG_BUFFERS (CM_NUM_NODE_MSG_BUFFERS + CM_NUM_SPARE_MSG_BUFFERS) -+ -+#define CM_INPUTQ_ENTRIES 128 /* # entries in input queue */ -+ -+#define CM_PERIODIC_DISCOVER_INTERVAL (5000) /* 5s (infrequent resolution of established leader conflicts) */ -+#define CM_URGENT_DISCOVER_INTERVAL (50) /* 0.05s (more frequently than heartbeats 'cause they don't retry) */ -+#define CM_HEARTBEAT_INTERVAL (125) /* 0.125s */ -+#define CM_TIMER_SCHEDULE_TIMEOUT (4000) /* 4s Maximum time before a timer that's secheduled to run gets to run (eg blocked in interrupt handlers etc) */ -+#define CM_THREAD_SCHEDULE_TIMEOUT (30000) /* 30s Maximum time before a thread that's scheduled to run gets to run */ -+#define CM_THREAD_RUNNING_TIMEOUT (30000) /* 30s Don't expect the manager thread to be running longer than this */ -+ -+#ifdef PER_CPU_TIMEOUT -+#define CM_PERCPU_TIMEOUT_INTERVAL (50) /* 0.05s (must be less than all above intervals) */ -+#define CM_PACEMAKER_INTERVAL (500) /* 0.05s */ -+ -+#define CM_HEARTBEAT_OVERDUE (250) /* 0.25s Maximum time a timeout can be overdue before taking extreme action */ -+#endif -+ -+#define CM_P2P_DMA_RETRIES 31 -+ -+/* We expect at least 1 point-to-point message in CM_P2P_MSG_RETRIES -+ * attempts to send one to be successfully received */ -+#define CM_P2P_MSG_RETRIES 8 -+ -+/* We expect at least 1 broadcast message in CM_BCAST_MSG_RETRIES attempts -+ * to send one to be successfully received. */ -+#define CM_BCAST_MSG_RETRIES 40 -+ -+/* Heartbeat timeout allows for a node stalling and still getting its -+ * heartbeat. The 2 is to allow for unsynchronised polling times. */ -+#define CM_HEARTBEAT_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_P2P_MSG_RETRIES) * CM_HEARTBEAT_INTERVAL) -+ -+/* Discover timeout must be > CM_HEARTBEAT_TIMEOUT to guarantee that people -+ * who don't see discovery are considered dead by their leader. This -+ * ensures that by the time a node "discovers" it is a leader of a segment, -+ * the previous leader of that segment will have been deemed to be dead by -+ * its the parent segment's leader */ -+#define CM_DISCOVER_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_BCAST_MSG_RETRIES) * CM_URGENT_DISCOVER_INTERVAL) -+ -+#define CM_WAITING_TIMEOUT (CM_DISCOVER_TIMEOUT * 100) -+ -+/* -+ * Convert all timeouts specified in mS into "ticks" -+ */ -+#define MSEC2TICKS(MSEC) (((MSEC)*HZ)/1000) -+ -+ -+/* statemap entry */ -+typedef struct cm_state_entry -+{ -+ int16_t level; /* cluster level to apply to */ -+ int16_t offset; /* from statemap_findchange() */ -+ uint16_t seg[BT_NBIPUL/16]; /* ditto */ -+} CM_STATEMAP_ENTRY; -+ -+/* offset is >= 0 for a change to apply and */ -+#define STATEMAP_NOMORECHANGES (-1) /* end of a set of updates */ -+#define STATEMAP_RESET (-2) /* reset the target map */ -+#define STATEMAP_NOOP (-3) /* null token */ -+ -+/* CM message format */ -+typedef int8_t CM_SEQ; /* heartbeat sequence numbers; at least 2 bits, signed */ -+ -+/* -+ * The message header is received into the last 64 byte block of -+ * the input queue and the Version *MUST* be the last word of the -+ * block to ensure that we can see that the whole of the message -+ * has reached main memory after we've seen the input queue pointer -+ * have been updated. -+ */ -+typedef struct ep_cm_hdr -+{ -+ uint32_t Pad0; -+ uint32_t Pad1; -+ -+ uint8_t Type; -+ uint8_t Level; -+ CM_SEQ Seq; /* precision at least 2 bits each*/ -+ CM_SEQ AckSeq; -+ -+ uint16_t NumMaps; -+ uint16_t MachineId; -+ -+ uint16_t NodeId; -+ uint16_t Checksum; -+ -+ uint32_t Timestamp; -+ uint32_t ParamHash; -+ uint32_t Version; -+} CM_HDR; -+ -+#define CM_HDR_SIZE sizeof (CM_HDR) -+ -+typedef struct cm_msg -+{ -+ union { -+ CM_STATEMAP_ENTRY Statemaps[1]; /* piggy-backed statemap updates start here */ -+ uint8_t Space[EP_SYSTEMQ_MSG_MAX - CM_HDR_SIZE]; -+ } Payload; -+ -+ CM_HDR Hdr; -+} CM_MSG; -+ -+/* The maximum number of statemap entries that can fit within an EP_CM_MSG_BUFFER */ -+#define CM_MSG_MAXMAPS (offsetof (CM_MSG, Hdr) / sizeof (CM_STATEMAP_ENTRY)) -+#define CM_MSG_MAP(mapno) (CM_MSG_MAXMAPS - (mapno) - 1) -+ -+/* The actual special message base & size, including 'nmaps' piggy-backed statemap entries */ -+#define CM_MSG_BASE(nmaps) (nmaps == 0 ? offsetof (CM_MSG, Hdr) : offsetof (CM_MSG, Payload.Statemaps[CM_MSG_MAXMAPS - nmaps])) -+#define CM_MSG_SIZE(nmaps) (sizeof (CM_MSG) - CM_MSG_BASE(nmaps)) -+ -+#define CM_MSG_VERSION 0xcad00005 -+#define CM_MSG_TYPE_RESOLVE_LEADER 0 -+#define CM_MSG_TYPE_DISCOVER_LEADER 1 -+#define CM_MSG_TYPE_NOTIFY 2 -+#define CM_MSG_TYPE_DISCOVER_SUBORDINATE 3 -+#define CM_MSG_TYPE_IMCOMING 4 -+#define CM_MSG_TYPE_HEARTBEAT 5 -+#define CM_MSG_TYPE_REJOIN 6 -+ -+/* CM machine segment */ -+typedef struct cm_sgmtMaps -+{ -+ u_char InputMapValid; /* Input map has been set */ -+ u_char OutputMapValid; /* Output map has been set */ -+ u_char SentChanges; /* got an outstanding STATEMAP_NOMORECHANGES to send */ -+ statemap_t *OutputMap; /* state to send */ -+ statemap_t *InputMap; /* state received */ -+ statemap_t *CurrentInputMap; /* state being received */ -+} CM_SGMTMAPS; -+ -+typedef struct cm_sgmt -+{ -+ u_char State; -+ u_char SendMaps; -+ u_char MsgAcked; -+ CM_SEQ MsgSeq; -+ CM_SEQ AckSeq; -+ u_int NodeId; -+ long UpdateTick; -+ long WaitingTick; -+ uint32_t Timestamp; -+ CM_SGMTMAPS Maps[CM_MAX_LEVELS]; /* Maps[i] == state for cluster level i */ -+ u_short MsgNumber; /* msg buffer to use */ -+ u_short NumMaps; /* # maps in message buffer */ -+ u_short Level; -+ u_short Sgmt; -+} CM_SGMT; -+ -+#define CM_SGMT_ABSENT 0 /* no one there at all */ -+#define CM_SGMT_WAITING 1 /* waiting for subtree to connect */ -+#define CM_SGMT_COMING 2 /* expecting a subtree to reconnect */ -+#define CM_SGMT_PRESENT 3 /* connected */ -+ -+typedef struct cm_level -+{ -+ int SwitchLevel; -+ u_int MinNodeId; -+ u_int NumNodes; -+ u_int NumSegs; -+ u_int MySgmt; -+ -+ /* SubordinateMap[i] == OR of all subordinate maps on this level and down for cluster level i */ -+ u_char SubordinateMapValid[CM_MAX_LEVELS]; -+ statemap_t *SubordinateMap[CM_MAX_LEVELS]; -+ -+ /* maps/flags for this cluster level */ -+ u_int Online:1; /* I've gone online (seen myself running) */ -+ u_int Restarting:1; /* driving my owm restart bit */ -+ u_char OfflineReasons; /* forced offline by broadcast */ -+ -+ u_char GlobalMapValid; -+ u_char SubTreeMapValid; -+ u_long Connected; -+ -+ statemap_t *LocalMap; /* state bits I drive */ -+ statemap_t *SubTreeMap; /* OR of my and my subtree states */ -+ statemap_t *GlobalMap; /* OR of all node states */ -+ statemap_t *LastGlobalMap; /* last map I saw */ -+ statemap_t *TmpMap; /* scratchpad */ -+ -+ CM_SGMT Sgmts[CM_SGMTS_PER_LEVEL]; -+} CM_LEVEL; -+ -+#define CM_ROLE_LEADER_CANDIDATE 0 -+#define CM_ROLE_LEADER 1 -+#define CM_ROLE_SUBORDINATE 2 -+ -+/* global status bits */ -+#define CM_GSTATUS_STATUS_MASK 0x03 /* bits nodes drive to broadcast their status */ -+#define CM_GSTATUS_ABSENT 0x00 /* Off the network */ -+#define CM_GSTATUS_STARTING 0x01 /* I'm waiting for everyone to see me online */ -+#define CM_GSTATUS_RUNNING 0x03 /* up and running */ -+#define CM_GSTATUS_CLOSING 0x02 /* I'm waiting for everyone to see me offline */ -+ -+#define CM_GSTATUS_ACK_MASK 0x0c /* bits node drive to ack other status */ -+#define CM_GSTATUS_MAY_START 0x04 /* Everyone thinks I may not start */ -+#define CM_GSTATUS_MAY_RUN 0x08 /* Everyone thinks I may not run */ -+ -+#define CM_GSTATUS_RESTART 0x10 /* Someone thinks I should restart */ -+#define CM_GSTATUS_BITS 5 -+ -+#define CM_GSTATUS_BASE(node) ((node) * CM_GSTATUS_BITS) -+ -+#if defined(PER_CPU_TIMEOUT) -+typedef struct cm_timeout_data -+{ -+ long ScheduledAt; /* lbolt timeout was scheduled to run at */ -+ -+ unsigned long EarlyCount; /* # times run early than NextRun */ -+ unsigned long MissedCount; /* # times run on time - but someone else was running it */ -+ unsigned long WastedCount; /* # times we failed to get the spinlock */ -+ unsigned long WorkCount; /* # times we're the one running */ -+ -+ unsigned long WorstDelay; /* worst scheduling delay */ -+ unsigned long BestDelay; /* best scheduling delay */ -+ -+ unsigned long WorstLockDelay; /* worst delay before getting rail->Lock */ -+ -+ unsigned long WorstHearbeatDelay; /* worst delay before calling DoHeartbeatWork */ -+} CM_TIMEOUT_DATA; -+#endif -+ -+typedef struct cm_rail -+{ -+ EP_RAIL *Rail; /* rail we're associated with */ -+ struct list_head Link; /* and linked on the CM_SUBSYS */ -+ -+ uint32_t ParamHash; /* hash of critical parameters */ -+ uint32_t Timestamp; -+ long DiscoverStartTick; /* when discovery start */ -+ -+ unsigned int NodeId; /* my node id */ -+ unsigned int NumNodes; /* and number of nodes */ -+ unsigned int NumLevels; /* number of levels computed from machine size */ -+ int BroadcastLevel; -+ long BroadcastLevelTick; -+ unsigned int TopLevel; /* level at which I'm not a leader */ -+ unsigned char Role; /* state at TopLevel */ -+ -+ EP_INPUTQ *PolledQueue; /* polled input queue */ -+ EP_INPUTQ *IntrQueue; /* intr input queue */ -+ EP_OUTPUTQ *MsgQueue; /* message */ -+ unsigned int NextSpareMsg; /* next "spare" message buffer to use */ -+ -+ EP_CM_RAIL_STATS Stats; /* statistics */ -+ -+ kmutex_t Mutex; -+ spinlock_t Lock; -+ -+ long NextHeartbeatTime; /* next time to check/send heartbeats */ -+ long NextDiscoverTime; /* next time to progress discovery */ -+ long NextRunTime; /* the earlier of the above two or intr requires inputq poll*/ -+ -+ unsigned int OfflineReasons; /* forced offline by procfs/manager thread stuck */ -+ -+#if defined(PER_CPU_TIMEOUT) -+ spinlock_t HeartbeatTimeoutsLock; /* spinlock to sequentialise per-cpu timeouts */ -+ long HeartbeatTimeoutsStarted; /* bitmap of which timeouts have started */ -+ long HeartbeatTimeoutsStopped; /* bitmap of which timeouts have stopped */ -+ long HeartbeatTimeoutsShouldStop; /* flag to indicate timeouts should stop */ -+ kcondvar_t HeartbeatTimeoutsWait; /* place to sleep waiting for timeouts to stop */ -+ long HeartbeatTimeoutRunning; /* someone is running the timeout - don't try for the lock */ -+ -+ long HeartbeatTimeoutOverdue; /* heartbeat seen as overdue - interrupt requested */ -+ -+ CM_TIMEOUT_DATA *HeartbeatTimeoutsData; /* per timeout data */ -+#else -+ struct timer_list HeartbeatTimer; /* timer for heartbeat/discovery */ -+#endif -+ -+ CM_LEVEL Levels[CM_MAX_LEVELS]; -+} CM_RAIL; -+ -+/* OfflineReasons (both per-rail and */ -+#define CM_OFFLINE_BROADCAST (1 << 0) -+#define CM_OFFLINE_PROCFS (1 << 1) -+#define CM_OFFLINE_MANAGER (1 << 2) -+ -+typedef struct cm_subsys -+{ -+ EP_SUBSYS Subsys; -+ CM_RAIL *Rails[EP_MAX_RAILS]; -+} CM_SUBSYS; -+ -+extern int MachineId; -+ -+extern void cm_node_disconnected (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_node (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_comms (CM_RAIL *cmRail); -+extern int cm_init (EP_SYS *sys); -+ -+extern void DisplayRail(EP_RAIL *rail); -+extern void DisplaySegs (EP_RAIL *rail); -+extern void DisplayStatus (EP_RAIL *rail); -+ -+typedef struct proc_private -+{ -+ struct nodeset_private *pr_next; -+ EP_RAIL *pr_rail; -+ char *pr_data; -+ int pr_data_len; -+ unsigned pr_off; -+ unsigned pr_len; -+ DisplayInfo pr_di; -+} PROC_PRIVATE; -+ -+extern void proc_character_fill (long mode, char *fmt, ...); -+extern int proc_release (struct inode *inode, struct file *file); -+extern ssize_t proc_read (struct file *file, char *buf, size_t count, loff_t *ppos); -+ -+ -+extern void DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayRailDo (DisplayInfo *di, EP_RAIL *rail); -+ -+extern int cm_read_cluster(EP_RAIL *rail,char *page); -+extern void cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason); -+ -+extern int cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId); -+extern int cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+/* cm_procfs.c */ -+extern void cm_procfs_init (CM_SUBSYS *subsys); -+extern void cm_procfs_fini (CM_SUBSYS *subsys); -+extern void cm_procfs_rail_init (CM_RAIL *rail); -+extern void cm_procfs_rail_fini (CM_RAIL *rail); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_CM_H */ -+ -Index: linux-2.4.21/include/elan/compat.h -=================================================================== ---- linux-2.4.21.orig/include/elan/compat.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/compat.h 2005-06-01 23:12:54.706422840 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: compat.h,v 1.1 2003/12/03 13:18:48 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/compat.h,v $*/ -+ -+#ifndef __ELAN_COMPAT_H -+#define __ELAN_COMPAT_H -+ -+#define ELANMOD_STATS_MAP ELAN_STATS_MAP -+ -+#endif /* __ELAN_COMPAT_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/device.h -=================================================================== ---- linux-2.4.21.orig/include/elan/device.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/device.h 2005-06-01 23:12:54.707422688 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/device.h,v $*/ -+ -+#ifndef __ELAN_DEVICE_H -+#define __ELAN_DEVICE_H -+ -+/* non-kernel headings */ -+typedef unsigned int ELAN_DEV_IDX; -+ -+#if defined(__KERNEL__) -+ -+/* device callbacks */ -+#define ELAN_DEV_OPS_VERSION ((u_int)1) -+ -+typedef struct elan_dev_ops -+{ -+ /* dev info */ -+ int (*get_position) (void *user_data, ELAN_POSITION *position); -+ int (*set_position) (void *user_data, unsigned short nodeId, unsigned short numNodes); -+ -+ /* cap */ -+ -+ u_int ops_version; -+} ELAN_DEV_OPS; -+ -+typedef struct elan_dev_struct -+{ -+ struct list_head node; -+ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; -+ void *user_data; -+ ELAN_DEV_OPS *ops; -+} ELAN_DEV_STRUCT; -+ -+/* device.c */ -+extern ELAN_DEV_IDX elan_dev_register (ELAN_DEVINFO *devinfo, -+ ELAN_DEV_OPS *ops, -+ void *userdata); -+extern int elan_dev_deregister (ELAN_DEVINFO *devinfo); -+ -+extern ELAN_DEV_STRUCT * elan_dev_find (ELAN_DEV_IDX devidx); -+ -+extern ELAN_DEV_STRUCT * elan_dev_find_byrail(unsigned short deviceid, unsigned rail); -+extern int elan_dev_dump (void); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_DEVICE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/devinfo.h -=================================================================== ---- linux-2.4.21.orig/include/elan/devinfo.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/devinfo.h 2005-06-01 23:12:54.707422688 -0400 -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: devinfo.h,v 1.11 2004/03/12 14:27:39 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/devinfo.h,v $*/ -+ -+#ifndef __ELAN_DEVINFO_H -+#define __ELAN_DEVINFO_H -+ -+#define ELAN_MAX_LEVELS 8 /* maximum number of levels in switch network */ -+ -+typedef struct elan_position -+{ -+ unsigned pos_mode; /* mode we're operating in */ -+ unsigned pos_nodeid; /* port this device connected to */ -+ unsigned pos_levels; /* number of levels to top switch */ -+ unsigned pos_nodes; /* number of nodes in the machine */ -+ unsigned pos_random_disabled; /* levels at which "random" routing is not possible */ -+ unsigned char pos_arity[ELAN_MAX_LEVELS]; /* number of downlinks per switch level */ -+} ELAN_POSITION; -+ -+#define ELAN4_PARAM_PCI_PADDING_FLAGS 0 /* A bit field, representing good places to burst across the pci */ -+#define ELAN4_PARAM_EVENT_COPY_WIN 1 /* The num of cmds when it becomes quicker to send via event copy than write directly */ -+#define ELAN4_PARAM_WRITE_COMBINING 2 /* If set the device supports bursts accesses across the pci bus */ -+#define ELAN4_PARAM_COUNT 12 -+ -+typedef struct elan_params -+{ -+ unsigned values[ELAN4_PARAM_COUNT]; -+} ELAN_PARAMS; -+ -+/* values for pos_mode */ -+#define ELAN_POS_UNKNOWN 0 /* network position unknown */ -+#define ELAN_POS_MODE_SWITCHED 1 /* connected to a switch */ -+#define ELAN_POS_MODE_LOOPBACK 2 /* loopback connector */ -+#define ELAN_POS_MODE_BACKTOBACK 3 /* cabled back-to-back to another node */ -+ -+typedef struct elan_devinfo -+{ -+ unsigned short dev_vendor_id; /* pci vendor id */ -+ unsigned short dev_device_id; /* pci device id */ -+ unsigned char dev_revision_id; /* pci revision id */ -+ unsigned char dev_instance; /* device instance number */ -+ unsigned char dev_rail; /* device rail number */ -+ -+ unsigned short dev_driver_version; /* device driver version */ -+ unsigned short dev_params_mask; /* mask for valid entries in dev_params array */ -+ ELAN_PARAMS dev_params; /* device parametization */ -+ -+ unsigned dev_num_down_links_value; /* MRH hint as to machine size NEEDS coding XXXXX */ -+} ELAN_DEVINFO; -+ -+#define PCI_VENDOR_ID_QUADRICS 0x14fc -+#define PCI_DEVICE_ID_ELAN3 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVB 0x0001 -+#define PCI_DEVICE_ID_ELAN4 0x0001 -+#define PCI_REVISION_ID_ELAN4_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN4_REVB 0x0001 -+ -+#if defined(__KERNEL__) -+/* devinfo.c */ -+#include -+#include -+extern int elan_get_devinfo (ELAN_DEV_IDX devidx, ELAN_DEVINFO *devinfo); -+extern int elan_get_position (ELAN_DEV_IDX devidx, ELAN_POSITION *position); -+extern int elan_set_position (ELAN_DEV_IDX devidx, unsigned short nodeId, unsigned short numNodes); -+#endif /* __KERNEL__ */ -+ -+ -+#endif /* __ELAN_DEVINFO_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/elanmoddebug.h -=================================================================== ---- linux-2.4.21.orig/include/elan/elanmoddebug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/elanmoddebug.h 2005-06-01 23:12:54.707422688 -0400 -@@ -0,0 +1,63 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN_DEBUG_H -+#define _ELAN_DEBUG_H -+ -+ -+#ident "$Id: elanmoddebug.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmoddebug.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+/* 0 | QSNET_DEBUG_BUFFER | QSNET_DEBUG_CONSOLE */ -+extern int elan_debug_mode; -+extern int elan_debug_mask; -+ -+#define ELAN_DBG_VP 0x00000001 -+#define ELAN_DBG_CAP 0x00000002 -+#define ELAN_DBG_CTRL 0x00000004 -+#define ELAN_DBG_SYS_FN 0x00000008 -+#define ELAN_DBG_ALL 0xffffffff -+ -+ -+#if defined(DEBUG_PRINTF) -+# define ELAN_DEBUG0(m,fmt) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt) : (void)0) -+# define ELAN_DEBUG1(m,fmt,a) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a) : (void)0) -+# define ELAN_DEBUG2(m,fmt,a,b) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b) : (void)0) -+# define ELAN_DEBUG3(m,fmt,a,b,c) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c) : (void)0) -+# define ELAN_DEBUG4(m,fmt,a,b,c,d) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d) : (void)0) -+# define ELAN_DEBUG5(m,fmt,a,b,c,d,e) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d,e) : (void)0) -+# define ELAN_DEBUG6(m,fmt,a,b,c,d,e,f) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d,e,f) : (void)0) -+#ifdef __GNUC__ -+# define ELAN_DEBUG(m,args...) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode, ##args) : (void)0) -+#endif -+ -+#else -+ -+# define ELAN_DEBUG0(m,fmt) (0) -+# define ELAN_DEBUG1(m,fmt,a) (0) -+# define ELAN_DEBUG2(m,fmt,a,b) (0) -+# define ELAN_DEBUG3(m,fmt,a,b,c) (0) -+# define ELAN_DEBUG4(m,fmt,a,b,c,d) (0) -+# define ELAN_DEBUG5(m,fmt,a,b,c,d,e) (0) -+# define ELAN_DEBUG6(m,fmt,a,b,c,d,e,f) (0) -+#ifdef __GNUC__ -+# define ELAN_DEBUG(m,args...) -+#endif -+ -+#endif /* DEBUG_PRINTF */ -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ELAN_DEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/elanmod.h -=================================================================== ---- linux-2.4.21.orig/include/elan/elanmod.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/elanmod.h 2005-06-01 23:12:54.708422536 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod.h,v 1.10 2004/06/18 09:28:16 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod.h,v $*/ -+ -+#ifndef __ELAN_MOD_H -+#define __ELAN_MOD_H -+ -+#include -+#include -+#include -+#include -+ -+#if defined(__KERNEL__) -+ -+#include -+ -+extern kmutex_t elan_mutex; -+ -+/* elan_general.c */ -+extern int elan_init(void); -+extern int elan_fini(void); -+ -+/* return codes, -ve => errno, +ve => success */ -+#define ELAN_CAP_OK (0) -+#define ELAN_CAP_RMS (1) -+ -+#define ELAN_USER_ATTACH (1) -+#define ELAN_USER_DETACH (2) -+#define ELAN_USER_P2P (3) -+#define ELAN_USER_BROADCAST (4) -+ -+extern int elanmod_classify_cap (ELAN_POSITION *position, ELAN_CAPABILITY *cap, unsigned use); -+ -+#define ELAN_USER_BASE_CONTEXT_NUM 0x000 /* first user allowable context */ -+#define ELAN_USER_TOP_CONTEXT_NUM 0x7FF /* last user allowable context */ -+ -+#define ELAN_RMS_BASE_CONTEXT_NUM 0x400 /* reserved for RMS allocation */ -+#define ELAN_RMS_TOP_CONTEXT_NUM 0x7FF -+ -+#define ELAN_USER_CONTEXT(ctx) ((ctx) >= ELAN_USER_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN_USER_TOP_CONTEXT_NUM) -+ -+#define ELAN_RMS_CONTEXT(ctx) ((ctx) >= ELAN_RMS_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN_RMS_TOP_CONTEXT_NUM) -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_MOD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/elanmod_linux.h -=================================================================== ---- linux-2.4.21.orig/include/elan/elanmod_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/elanmod_linux.h 2005-06-01 23:12:54.708422536 -0400 -@@ -0,0 +1,140 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod_linux.h,v 1.6 2003/09/29 15:36:20 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod_linux.h,v $*/ -+ -+#ifndef __ELAN_MOD_LINUX_H -+#define __ELAN_MOD_LINUX_H -+ -+#define ELANCRTL_USER_BASE 0x40 -+ -+/* stats */ -+typedef struct elanctrl_stats_get_next_struct -+{ -+ ELAN_STATS_IDX statidx; -+ ELAN_STATS_IDX *next_statidx; /* return value */ -+} ELANCTRL_STATS_GET_NEXT_STRUCT; -+#define ELANCTRL_STATS_GET_NEXT _IOR ('e', ELANCRTL_USER_BASE + 0, ELANCTRL_STATS_GET_NEXT_STRUCT) -+ -+typedef struct elanctrl_stats_find_index_struct -+{ -+ caddr_t block_name; -+ ELAN_STATS_IDX *statidx; /* return value */ -+ uint *num_entries; /* return value */ -+} ELANCTRL_STATS_FIND_INDEX_STRUCT; -+#define ELANCTRL_STATS_FIND_INDEX _IOR ('e', ELANCRTL_USER_BASE + 1, ELANCTRL_STATS_FIND_INDEX_STRUCT) -+ -+typedef struct elanctrl_stats_get_block_info_struct -+{ -+ ELAN_STATS_IDX statidx; -+ caddr_t block_name; /* return value */ -+ uint *num_entries; /* return value */ -+} ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT; -+#define ELANCTRL_STATS_GET_BLOCK_INFO _IOR ('e', ELANCRTL_USER_BASE + 2, ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT) -+ -+typedef struct elanctrl_stats_get_index_name_struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint index; -+ caddr_t name; /* return value */ -+} ELANCTRL_STATS_GET_INDEX_NAME_STRUCT; -+#define ELANCTRL_STATS_GET_INDEX_NAME _IOR ('e', ELANCRTL_USER_BASE + 3, ELANCTRL_STATS_GET_INDEX_NAME_STRUCT) -+ -+typedef struct elanctrl_stats_clear_block_struct -+{ -+ ELAN_STATS_IDX statidx; -+} ELANCTRL_STATS_CLEAR_BLOCK_STRUCT; -+#define ELANCTRL_STATS_CLEAR_BLOCK _IOR ('e', ELANCRTL_USER_BASE + 4, ELANCTRL_STATS_CLEAR_BLOCK_STRUCT) -+ -+typedef struct elanctrl_stats_get_block_struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint entries; -+ ulong *values; /* return values */ -+} ELANCTRL_STATS_GET_BLOCK_STRUCT; -+#define ELANCTRL_STATS_GET_BLOCK _IOR ('e', ELANCRTL_USER_BASE + 5, ELANCTRL_STATS_GET_BLOCK_STRUCT) -+ -+ -+typedef struct elanctrl_get_devinfo_struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; /* return values */ -+} ELANCTRL_GET_DEVINFO_STRUCT; -+#define ELANCTRL_GET_DEVINFO _IOR ('e', ELANCRTL_USER_BASE + 6, ELANCTRL_GET_DEVINFO_STRUCT) -+ -+typedef struct elanctrl_get_position_struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_POSITION *position; /* return values */ -+} ELANCTRL_GET_POSITION_STRUCT; -+#define ELANCTRL_GET_POSITION _IOR ('e', ELANCRTL_USER_BASE + 7, ELANCTRL_GET_POSITION_STRUCT) -+ -+typedef struct elanctrl_set_position_struct -+{ -+ ELAN_DEV_IDX devidx; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} ELANCTRL_SET_POSITION_STRUCT; -+#define ELANCTRL_SET_POSITION _IOR ('e', ELANCRTL_USER_BASE + 8, ELANCTRL_SET_POSITION_STRUCT) -+ -+typedef struct elanctrl_create_cap_struct -+{ -+ ELAN_CAPABILITY cap; -+} ELANCTRL_CREATE_CAP_STRUCT; -+#define ELANCTRL_CREATE_CAP _IOW ('e', ELANCRTL_USER_BASE + 9, ELANCTRL_CREATE_CAP_STRUCT) -+ -+typedef struct elanctrl_destroy_cap_struct -+{ -+ ELAN_CAPABILITY cap; -+} ELANCTRL_DESTROY_CAP_STRUCT; -+#define ELANCTRL_DESTROY_CAP _IOW ('e', ELANCRTL_USER_BASE + 10, ELANCTRL_DESTROY_CAP_STRUCT) -+ -+typedef struct elanctrl_create_vp_struct -+{ -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} ELANCTRL_CREATE_VP_STRUCT; -+#define ELANCTRL_CREATE_VP _IOW ('e', ELANCRTL_USER_BASE + 11, ELANCTRL_CREATE_VP_STRUCT) -+ -+typedef struct elanctrl_destroy_vp_struct -+{ -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} ELANCTRL_DESTROY_VP_STRUCT; -+#define ELANCTRL_DESTROY_VP _IOW ('e', ELANCRTL_USER_BASE + 12, ELANCTRL_DESTROY_VP_STRUCT) -+ -+#define ELANCTRL_DEBUG_DUMP _IO ('e', ELANCRTL_USER_BASE + 13) -+ -+typedef struct elanctrl_get_caps_struct -+{ -+ uint *number_of_results; -+ uint array_size; -+ ELAN_CAP_STRUCT *caps; -+} ELANCTRL_GET_CAPS_STRUCT; -+#define ELANCTRL_GET_CAPS _IOW ('e', ELANCRTL_USER_BASE + 14, ELANCTRL_GET_CAPS_STRUCT) -+ -+ -+typedef struct elanctrl_debug_buffer_struct -+{ -+ caddr_t buffer; -+ int size; -+} ELANCTRL_DEBUG_BUFFER_STRUCT; -+#define ELANCTRL_DEBUG_BUFFER _IOW ('e', ELANCRTL_USER_BASE + 15, ELANCTRL_DEBUG_BUFFER_STRUCT) -+ -+#define ELANMOD_PROCFS_IOCTL "/proc/qsnet/elan/ioctl" -+#define ELANMOD_PROCFS_VERSION "/proc/qsnet/elan/version" -+#define ELANMOD_PROCFS_DEBUG_MASK "/proc/qsnet/elan/debug_mask" -+#define ELANMOD_PROCFS_DEBUG_MODE "/proc/qsnet/elan/debug_mode" -+ -+#endif /* __ELAN_MOD_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/elanmod_subsystem.h -=================================================================== ---- linux-2.4.21.orig/include/elan/elanmod_subsystem.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/elanmod_subsystem.h 2005-06-01 23:12:54.708422536 -0400 -@@ -0,0 +1,138 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_SUBSYSTEM_H -+#define __ELAN_SUBSYSTEM_H -+ -+#include -+#include -+ -+#if defined( __KERNEL__) -+int elan_configure( -+ cfg_op_t op, -+ caddr_t indata, -+ ulong indata_size, -+ caddr_t outdata, -+ ulong outdata_size); -+#endif -+ -+#define ELAN_KMOD_CODE(x) ((x)+CFG_OP_SUBSYS_MIN) -+#define ELAN_MAX_KMOD_CODES 100 -+ -+#define ELAN_SUBSYS "elan" -+ -+#define ELAN_STATS_GET_NEXT 0x01 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ ELAN_STATS_IDX *next_statidx; -+} elan_stats_get_next_struct; -+ -+ -+#define ELAN_STATS_FIND_INDEX 0x02 -+typedef struct { -+ caddr_t block_name; -+ ELAN_STATS_IDX *statidx; /* return value */ -+ uint *num_entries; /* return value */ -+} elan_stats_find_index_struct; -+ -+#define ELAN_STATS_GET_BLOCK_INFO 0x03 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ caddr_t block_name; /* return value */ -+ uint *num_entries; /* return value */ -+} elan_stats_get_block_info_struct; -+ -+#define ELAN_STATS_GET_INDEX_NAME 0x04 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ uint index; -+ caddr_t name; /* return value */ -+} elan_stats_get_index_name_struct; -+ -+#define ELAN_STATS_CLEAR_BLOCK 0x05 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+} elan_stats_clear_block_struct; -+ -+#define ELAN_STATS_GET_BLOCK 0x06 -+typedef struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint entries; -+ ulong *values; /* return values */ -+} elan_stats_get_block_struct; -+ -+#define ELAN_GET_DEVINFO 0x07 -+typedef struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; /* return values */ -+} elan_get_devinfo_struct; -+ -+#define ELAN_GET_POSITION 0x08 -+typedef struct { -+ ELAN_DEV_IDX devidx; -+ ELAN_POSITION *position; /* return values */ -+} elan_get_position_struct; -+ -+#define ELAN_SET_POSITION 0x09 -+typedef struct { -+ ELAN_DEV_IDX devidx; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} elan_set_position_struct; -+ -+#define ELAN_CREATE_CAP 0x0a -+typedef struct { -+ ELAN_CAPABILITY cap; -+} elan_create_cap_struct; -+ -+#define ELAN_DESTROY_CAP 0x0b -+typedef struct { -+ ELAN_CAPABILITY cap; -+} elan_destroy_cap_struct; -+ -+#define ELAN_CREATE_VP 0x0c -+typedef struct { -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} elan_create_vp_struct; -+ -+#define ELAN_DESTROY_VP 0x0d -+typedef struct { -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} elan_destroy_vp_struct; -+ -+ -+#define ELAN_DEBUG_DUMP 0x0e -+ -+#define ELAN_GET_CAPS 0x0f -+typedef struct { -+ uint *number_of_results; -+ uint array_size; -+ ELAN_CAP_STRUCT *caps; -+} elan_get_caps_struct; -+ -+#define ELAN_DEBUG_BUFFER 0x10 -+typedef struct { -+ caddr_t addr; -+ int len; -+} elan_debug_buffer_struct; -+ -+#define ELANMOD_PROCFS_IOCTL "/proc/qsnet/elan/ioctl" -+#define ELANMOD_PROCFS_VERSION "/proc/qsnet/elan/version" -+#define ELANMOD_PROCFS_DEBUG_MASK "/proc/qsnet/elan/debug_mask" -+#define ELANMOD_PROCFS_DEBUG_MODE "/proc/qsnet/elan/debug_mode" -+ -+#endif /* __ELAN_SUBSYSTEM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/epcomms.h -=================================================================== ---- linux-2.4.21.orig/include/elan/epcomms.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/epcomms.h 2005-06-01 23:12:54.710422232 -0400 -@@ -0,0 +1,635 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_EPCOMMS_H -+#define __ELAN_EPCOMMS_H -+ -+#ident "$Id: epcomms.h,v 1.44.2.2 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms.h,v $ */ -+ -+#include -+#include -+ -+#define EPCOMMS_SUBSYS_NAME "epcomms" -+ -+/* message service numbers */ -+#define EP_MSG_SVC_EIP512 0x00 /* Quadrics EIP services */ -+#define EP_MSG_SVC_EIP1K 0x01 -+#define EP_MSG_SVC_EIP2K 0x02 -+#define EP_MSG_SVC_EIP4K 0x03 -+#define EP_MSG_SVC_EIP8K 0x04 -+#define EP_MSG_SVC_EIP16K 0x05 -+#define EP_MSG_SVC_EIP32K 0x06 -+#define EP_MSG_SVC_EIP64K 0x07 -+#define EP_MSG_SVC_EIP128K 0x08 -+ -+#define EP_MSG_SVC_PFS 0x09 /* Quadrics PFS rpc service */ -+ -+#define EP_MSG_SVC_PORTALS_SMALL 0x10 /* Lustre Portals */ -+#define EP_MSG_SVC_PORTALS_LARGE 0x11 -+ -+#define EP_MSG_NSVC 0x40 /* Max number of services */ -+ -+#define EP_MSGQ_ADDR(qnum) (EP_EPCOMMS_QUEUE_BASE + (qnum) * EP_QUEUE_DESC_SIZE) -+ -+/* -+ * EP_ENVELOPE -+ * Messages are sent by sending an envelope to the destination -+ * describing the source buffers to transfer. The receiving thread -+ * then allocates a receive buffer and fetches the data by issuing -+ * "get" dmas. -+ * -+ * NOTE: envelopes are not explicitly converted to network byte order -+ * since they are always transferred little endian as they are -+ * copied to/from elan memory using word operations. -+ */ -+typedef struct ep_envelope -+{ -+ uint32_t Version; /* Protocol version field */ -+ -+ EP_ATTRIBUTE Attr; /* Attributes */ -+ -+ EP_XID Xid; /* transaction id */ -+ -+ uint32_t NodeId; /* Source processor */ -+ uint32_t Range; /* range we're sending to (high << 16 | low) */ -+ -+ EP_ADDR TxdRail; /* address of per-rail txd */ -+ EP_NMD TxdMain; /* address of main memory portion of txd */ -+ -+ uint32_t nFrags; /* # fragments */ -+ EP_NMD Frags[EP_MAXFRAG]; /* network mapping handles of source data */ -+ -+ uint32_t CheckSum; /* holds the check sum value when active -+ * must be after all members to be checksum'd -+ */ -+ -+ uint32_t Pad[6]; /* Pad to 128 bytes */ -+} EP_ENVELOPE; -+ -+#define EP_ENVELOPE_VERSION 0xdac10001 -+#define EP_ENVELOPE_SIZE roundup (sizeof (EP_ENVELOPE), EP_BLK_SIZE) -+ -+/* -+ * RPC payload - this small amount of data is transfered in -+ * the envelope for RPCs -+ */ -+typedef struct ep_payload -+{ -+ uint32_t Data[128/sizeof(uint32_t)]; -+} EP_PAYLOAD; -+ -+#define EP_PAYLOAD_SIZE roundup (sizeof (EP_PAYLOAD), EP_BLK_SIZE) -+ -+#define EP_INPUTQ_SIZE (EP_ENVELOPE_SIZE + EP_PAYLOAD_SIZE) -+ -+/* -+ * EP_STATUSBLK -+ * RPC completion transfers a status block to the client. -+ */ -+typedef struct ep_statusblk -+{ -+ uint32_t Data[128/sizeof(uint32_t)]; -+} EP_STATUSBLK; -+ -+#define EP_STATUSBLK_SIZE roundup (sizeof(EP_STATUSBLK), EP_BLK_SIZE) -+ -+#define EP_RANGE(low,high) ((high) << 16 | (low)) -+#define EP_RANGE_LOW(range) ((range) & 0xFFFF) -+#define EP_RANGE_HIGH(range) (((range) >> 16) & 0xFFFF) -+ -+/* return codes from functions, + 'res' parameter to txd callback, ep_rxd_status() */ -+typedef enum -+{ -+ EP_SUCCESS = 0, /* message sent/received successfully */ -+ EP_RXD_PENDING = -1, /* rxd not completed by thread */ -+ EP_CONN_RESET = -2, /* virtual circuit reset */ -+ EP_NODE_DOWN = -3, /* node down - transmit not attempted */ -+ EP_MSG_TOO_BIG = -4, /* received message larger than buffer */ -+ EP_ENOMEM = -5, /* memory alloc failed */ -+ EP_EINVAL = -6, /* invalid parameters */ -+ EP_SHUTDOWN = -7, /* receiver is being shut down */ -+} EP_STATUS; -+ -+/* forward declarations */ -+typedef struct ep_rxd EP_RXD; -+typedef struct ep_txd EP_TXD; -+typedef struct ep_rcvr_rail EP_RCVR_RAIL; -+typedef struct ep_rcvr EP_RCVR; -+typedef struct ep_xmtr_rail EP_XMTR_RAIL; -+typedef struct ep_xmtr EP_XMTR; -+typedef struct ep_comms_rail EP_COMMS_RAIL; -+typedef struct ep_comms_subsys EP_COMMS_SUBSYS; -+ -+typedef struct ep_rcvr_stats EP_RCVR_STATS; -+typedef struct ep_xmtr_stats EP_XMTR_STATS; -+typedef struct ep_rcvr_rail_stats EP_RCVR_RAIL_STATS; -+typedef struct ep_xmtr_rail_stats EP_XMTR_RAIL_STATS; -+ -+typedef void (EP_RXH)(EP_RXD *rxd); /* callback function from receive completion */ -+typedef void (EP_TXH)(EP_TXD *txd, void *arg, EP_STATUS res); /* callback function from transmit completion */ -+ -+/* Main memory portion shared descriptor */ -+typedef struct ep_rxd_main -+{ -+ EP_ENVELOPE Envelope; /* 128 byte aligned envelope */ -+ EP_PAYLOAD Payload; /* 128 byte aligned payload */ -+ bitmap_t Bitmap[BT_BITOUL(EP_MAX_NODES)]; /* broadcast bitmap */ -+ EP_STATUSBLK StatusBlk; /* RPC status block to return */ -+ uint64_t Next; /* linked list when on active list (main address) */ -+ int32_t Len; /* Length of message received */ -+} EP_RXD_MAIN; -+ -+#define EP_RXD_MAIN_SIZE roundup (sizeof (EP_RXD_MAIN), EP_BLK_SIZE) -+ -+/* Phases for message/rpc */ -+#ifndef __ELAN__ -+ -+/* Kernel memory portion of per-rail receive descriptor */ -+typedef struct ep_rxd_rail -+{ -+ struct list_head Link; /* linked on freelist */ -+ EP_RCVR_RAIL *RcvrRail; /* rvcr we're associated with */ -+ -+ EP_RXD *Rxd; /* receive descriptor we're bound to */ -+} EP_RXD_RAIL; -+ -+#define RXD_BOUND2RAIL(rxdRail,rcvrRail) ((rxdRail) != NULL && ((EP_RXD_RAIL *) (rxdRail))->RcvrRail == (EP_RCVR_RAIL *) rcvrRail) -+ -+struct ep_rxd -+{ -+ struct list_head Link; /* linked on free/active list */ -+ EP_RCVR *Rcvr; /* owning receiver */ -+ -+ EP_RXD_MAIN *RxdMain; /* shared main memory portion. */ -+ EP_NMD NmdMain; /* and network mapping descriptor */ -+ -+ EP_RXD_RAIL *RxdRail; /* per-rail rxd we're bound to */ -+ -+ EP_RXH *Handler; /* completion function */ -+ void *Arg; /* and arguement */ -+ -+ unsigned int State; /* RXD status (active,stalled,failed) */ -+ -+ EP_NMD Data; /* network mapping descriptor for user buffer */ -+ -+ int nFrags; /* network mapping descriptor for put/get/complete */ -+ EP_NMD Local[EP_MAXFRAG]; -+ EP_NMD Remote[EP_MAXFRAG]; -+ -+ long NextRunTime; /* time to resend failover/map requests */ -+ EP_XID MsgXid; /* and transaction id */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ struct list_head CheckSumLink; /* linked on check sum list */ -+#endif -+}; -+ -+#define EP_NUM_RXD_PER_BLOCK 16 -+ -+/* rxd->State */ -+#define EP_RXD_FREE 0 -+ -+#define EP_RXD_RECEIVE_UNBOUND 1 -+#define EP_RXD_RECEIVE_ACTIVE 2 -+ -+#define EP_RXD_PUT_ACTIVE 3 -+#define EP_RXD_PUT_STALLED 4 -+#define EP_RXD_GET_ACTIVE 5 -+#define EP_RXD_GET_STALLED 6 -+ -+#define EP_RXD_COMPLETE_ACTIVE 7 -+#define EP_RXD_COMPLETE_STALLED 8 -+ -+#define EP_RXD_RPC_IN_PROGRESS 9 -+#define EP_RXD_COMPLETED 10 -+ -+#define EP_RXD_BEEN_ABORTED 11 /* rxd was aborted while in a private state */ -+ -+typedef struct ep_rxd_block -+{ -+ struct list_head Link; -+ -+ EP_NMD NmdMain; -+ -+ EP_RXD Rxd[EP_NUM_RXD_PER_BLOCK]; -+} EP_RXD_BLOCK; -+ -+struct ep_rcvr_rail_stats -+{ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+}; -+ -+struct ep_rcvr_rail -+{ -+ EP_RCVR *Rcvr; /* associated receiver */ -+ EP_COMMS_RAIL *CommsRail; /* comms rail */ -+ -+ struct proc_dir_entry *procfs_root; /* root of this rcvr_rail's procfs entry */ -+ EP_RCVR_RAIL_STATS stats; /* generic rcvr_rail stats */ -+}; -+ -+struct ep_rcvr_stats -+{ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+}; -+ -+struct ep_rcvr -+{ -+ struct list_head Link; /* queued on subsystem */ -+ EP_COMMS_SUBSYS *Subsys; /* kernel comms subsystem */ -+ EP_SERVICE Service; /* service number */ -+ -+ unsigned int InputQueueEntries; /* # entries on receive queue */ -+ -+ EP_RAILMASK RailMask; /* bitmap of which rails are available */ -+ EP_RCVR_RAIL *Rails[EP_MAX_RAILS]; -+ -+ spinlock_t Lock; /* spinlock for rails/receive lists */ -+ -+ struct list_head ActiveDescList; /* List of pending/active receive descriptors */ -+ -+ EP_XID_CACHE XidCache; /* XID cache (protected by Lock) */ -+ -+ struct list_head FreeDescList; /* List of free receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; /* total number created */ -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ kcondvar_t FreeDescSleep; /* with place to sleep for rx desc */ -+ int FreeDescWanted; /* and flag */ -+ struct list_head DescBlockList; -+ -+ unsigned int ForwardRxdCount; /* count of rxd's being forwarded */ -+ unsigned int CleanupWaiting; /* waiting for cleanup */ -+ kcondvar_t CleanupSleep; /* and place to sleep */ -+ -+ struct proc_dir_entry *procfs_root; /* place where this rcvr's proc entry is */ -+ EP_RCVR_STATS stats; -+}; -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+#define EP_ENVELOPE_CHECK_SUM (1<<31) -+extern uint32_t ep_calc_check_sum (EP_SYS *sys, EP_ENVELOPE *env, EP_NMD *nmd, int nFrags); -+#endif -+ -+#endif /* ! __ELAN__ */ -+ -+typedef struct ep_txd_main -+{ -+ EP_STATUSBLK StatusBlk; /* RPC status block */ -+ bitmap_t Bitmap[BT_BITOUL(EP_MAX_NODES)]; /* broadcast bitmap */ -+} EP_TXD_MAIN; -+ -+#define EP_TXD_MAIN_SIZE roundup (sizeof (EP_TXD_MAIN), EP_BLK_SIZE) -+ -+#ifndef __ELAN__ -+typedef struct ep_txd_rail -+{ -+ struct list_head Link; /* linked on freelist */ -+ EP_XMTR_RAIL *XmtrRail; /* xmtr we're associated with */ -+ -+ EP_TXD *Txd; /* txd we're bound to */ -+} EP_TXD_RAIL; -+ -+#define TXD_BOUND2RAIL(rxdRail,xmtrRail) ((txdRail) != NULL && ((EP_TXD_RAIL *) (txdRail))->XmtrRail == (EP_XMTR_RAIL *) xmtrRail) -+ -+struct ep_txd -+{ -+ struct list_head Link; /* linked on free/active list */ -+ EP_XMTR *Xmtr; /* service we're associated with */ -+ -+ EP_TXD_MAIN *TxdMain; /* shared main memory portion */ -+ EP_NMD NmdMain; /* and network mapping descriptor */ -+ -+ EP_TXD_RAIL *TxdRail; /* per-rail txd for this phase */ -+ -+ EP_TXH *Handler; /* completion function */ -+ void *Arg; /* and arguement */ -+ -+ unsigned short NodeId; /* node transmit is to. */ -+ EP_SERVICE Service; /* and seervice */ -+ -+ long TimeStamp; /* time we where created at, to find sends taking too long */ -+ long RetryTime; -+ EP_BACKOFF Backoff; -+ -+ EP_ENVELOPE Envelope; /* envelope for transmit */ -+ EP_PAYLOAD Payload; /* payload for transmit */ -+}; -+ -+#define EP_NUM_TXD_PER_BLOCK 16 -+ -+/* "phase" parameter to BindTxd */ -+#define EP_TXD_PHASE_ACTIVE 1 -+#define EP_TXD_PHASE_PASSIVE 2 -+ -+typedef struct ep_txd_block -+{ -+ struct list_head Link; -+ EP_NMD NmdMain; -+ EP_TXD Txd[EP_NUM_TXD_PER_BLOCK]; /* transmit descriptors */ -+} EP_TXD_BLOCK; -+ -+struct ep_xmtr_rail_stats -+{ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+}; -+ -+struct ep_xmtr_rail -+{ -+ EP_COMMS_RAIL *CommsRail; /* associated comms rail */ -+ EP_XMTR *Xmtr; /* associated transmitter */ -+ -+ struct proc_dir_entry *procfs_root; /* place where this xmtr's proc entry is */ -+ -+ EP_XMTR_RAIL_STATS stats; -+}; -+ -+struct ep_xmtr_stats -+{ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+}; -+ -+struct ep_xmtr -+{ -+ struct list_head Link; /* Linked on subsys */ -+ EP_COMMS_SUBSYS *Subsys; /* kernel comms subsystem */ -+ -+ EP_RAILMASK RailMask; /* bitmap of which rails are available */ -+ EP_XMTR_RAIL *Rails[EP_MAX_RAILS]; /* per-rail state */ -+ -+ spinlock_t Lock; /* lock for active descriptor list */ -+ -+ struct list_head ActiveDescList; /* list of active transmit descriptors */ -+ -+ EP_XID_CACHE XidCache; /* XID cache (protected by Lock) */ -+ -+ struct list_head FreeDescList; /* List of free receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ kcondvar_t FreeDescSleep; /* with place to sleep for rx desc */ -+ int FreeDescWanted; /* and flag */ -+ struct list_head DescBlockList; -+ -+ struct proc_dir_entry *procfs_root; /* place where this rcvr's proc entry is */ -+ EP_XMTR_STATS stats; -+}; -+ -+/* forward descriptor */ -+#define EP_TREE_ARITY 3 -+ -+typedef struct ep_fwd_desc -+{ -+ struct list_head Link; /* linked on forward/free lists */ -+ EP_RXD *Rxd; /* rxd to forward */ -+ EP_NMD Data; /* nmd of subset of receive buffer */ -+ unsigned NumChildren; /* number of places we're forwarding */ -+ unsigned Children[EP_TREE_ARITY]; -+} EP_FWD_DESC; -+ -+typedef struct ep_comms_ops -+{ -+ void (*DelRail) (EP_COMMS_RAIL *rail); -+ void (*DisplayRail) (EP_COMMS_RAIL *rail); -+ -+ struct { -+ void (*AddRail) (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ void (*DelRail) (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+ long (*Check) (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+ -+ int (*QueueRxd) (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+ void (*RpcPut)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ void (*RpcGet)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ void (*RpcComplete)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+ EP_RXD *(*StealRxd)(EP_RCVR_RAIL *rcvrRail); -+ -+ void (*DisplayRcvr) (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+ void (*DisplayRxd) (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+ void (*FillOutRailStats) (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+ } Rcvr; -+ -+ struct { -+ void (*AddRail) (EP_XMTR *xmtr, EP_COMMS_RAIL *rail); -+ void (*DelRail) (EP_XMTR *xmtr, EP_COMMS_RAIL *rail); -+ -+ long (*Check) (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+ -+ int (*BindTxd) (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+ void (*UnbindTxd) (EP_TXD *txd, unsigned int phase); -+ int (*PollTxd) (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+ -+ void (*DisplayXmtr) (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+ void (*DisplayTxd) (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+ int (*CheckTxdState) (EP_TXD *txd); -+ -+ void (*FillOutRailStats) (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+ } Xmtr; -+} EP_COMMS_OPS; -+ -+#define EP_RAIL_OP(commsRail, Which) (commsRail)->Ops.Which -+#define EP_RCVR_OP(rcvrRail, Which) (rcvrRail)->CommsRail->Ops.Rcvr.Which -+#define EP_XMTR_OP(xmtrRail, Which) (xmtrRail)->CommsRail->Ops.Xmtr.Which -+ -+/* "how" parameter to PollTxd */ -+#define POLL_TX_LIST 0 -+#define ENABLE_TX_CALLBACK 1 -+#define DISABLE_TX_CALLBACK 2 -+ -+struct ep_comms_rail -+{ -+ struct list_head Link; /* Linked on subsys */ -+ EP_RAIL *Rail; /* kernel comms rail */ -+ EP_COMMS_SUBSYS *Subsys; -+ EP_COMMS_OPS Ops; -+ -+ EP_COMMS_RAIL_STATS Stats; /* statistics */ -+}; -+ -+struct ep_comms_subsys -+{ -+ EP_SUBSYS Subsys; /* is a kernel comms subsystem */ -+ -+ kmutex_t Lock; /* global lock */ -+ -+ EP_COMMS_STATS Stats; /* statistics */ -+ -+ struct list_head Rails; /* list of all rails */ -+ -+ struct list_head Receivers; /* list of receivers */ -+ struct list_head Transmitters; /* and transmitters */ -+ -+ /* forward/allocator thread */ -+ EP_KTHREAD Thread; /* place thread sleeps */ -+ -+ /* message passing "broadcast" forward lists */ -+ spinlock_t ForwardDescLock; /* Lock for broadcast forwarding */ -+ struct list_head ForwardDescList; /* List of rxd's to forward */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ spinlock_t CheckSumDescLock; /* Lock for CheckSums */ -+ struct list_head CheckSumDescList; /* List of rxd's to be CheckSumed */ -+#endif -+ -+ EP_XMTR *ForwardXmtr; /* and transmitter to forward with */ -+}; -+ -+/* epcomms.c subsystem initialisation */ -+extern unsigned int epcomms_forward_limit; -+ -+extern int ep_comms_init (EP_SYS *sys); -+extern void ep_comms_display (EP_SYS *sys, char *how); -+extern EP_RAILMASK ep_rcvr_railmask (EP_SYS *epsys, EP_SERVICE service); -+ -+/* epcomms_elan3.c */ -+extern EP_COMMS_RAIL *ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail); -+ -+/* epcomms_elan4.c */ -+extern EP_COMMS_RAIL *ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail); -+ -+/* epcommsTx.c */ -+extern int TxdShouldStabalise (EP_TXD_RAIL *txdRail, EP_RAIL *rail); -+extern void FreeTxd (EP_XMTR *xmtr, EP_TXD *txd); -+ -+extern unsigned int ep_txd_lowat; -+extern long ep_check_xmtr (EP_XMTR *xmtr, long nextRunTime); -+extern void ep_display_xmtr (DisplayInfo *di, EP_XMTR *xmtr); -+extern void ep_xmtr_flush_callback (EP_XMTR *xmtr, EP_XMTR_RAIL *xmtrRail); -+extern void ep_xmtr_reloc_callback (EP_XMTR *xmtr, EP_XMTR_RAIL *xmtrRail); -+ -+extern void ep_xmtr_fillout_stats (EP_XMTR *xmtr, char *str); -+extern void ep_xmtr_rail_fillout_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+extern void ep_xmtr_txd_stat (EP_XMTR *xmtr, EP_TXD *txd); -+ -+/* epcommsRx.c */ -+extern EP_RXD *StealRxdFromOtherRail (EP_RCVR *rcvr); -+ -+extern unsigned int ep_rxd_lowat; -+extern long ep_check_rcvr (EP_RCVR *rcvr, long nextRunTime); -+extern void ep_rcvr_flush_callback (EP_RCVR *rcvr, EP_RCVR_RAIL *rcvrRail); -+extern void ep_rcvr_reloc_callback (EP_RCVR *rcvr, EP_RCVR_RAIL *rcvrRail); -+extern void ep_display_rcvr (DisplayInfo *di, EP_RCVR *rcvr, int full); -+ -+extern long ep_forward_rxds (EP_COMMS_SUBSYS *subsys, long nextRunTime); -+ -+extern void ep_rcvr_fillout_stats (EP_RCVR *rcvr, char *str); -+extern void ep_rcvr_rail_fillout_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern void ep_csum_rxds (EP_COMMS_SUBSYS *subsys); -+extern void ep_rxd_queue_csum (EP_RXD *rxd); -+#endif -+ -+extern void ep_rxd_received (EP_RXD *rxd); -+extern void ep_rxd_received_now (EP_RXD *rxd); -+ -+/* ep_procfs.c */ -+extern struct proc_dir_entry *ep_procfs_root; -+ -+extern void ep_procfs_rcvr_xmtr_init(void); -+extern void ep_procfs_rcvr_xmtr_fini(void); -+ -+extern void ep_procfs_rcvr_add(EP_RCVR *rcvr); -+extern void ep_procfs_rcvr_del(EP_RCVR *rcvr); -+ -+extern void ep_procfs_rcvr_add_rail(EP_RCVR_RAIL *rcvrRail); -+extern void ep_procfs_rcvr_del_rail(EP_RCVR_RAIL *rcvrRail); -+ -+extern void ep_procfs_xmtr_add(EP_XMTR *xmtr); -+extern void ep_procfs_xmtr_del(EP_XMTR *xmtr); -+ -+extern void ep_procfs_xmtr_add_rail(EP_XMTR_RAIL *xmtrRail); -+extern void ep_procfs_xmtr_del_rail(EP_XMTR_RAIL *xmtrRail); -+ -+ -+/* Public Interface */ -+ -+ -+/* epcomms.c message xmtr functions */ -+extern EP_XMTR *ep_alloc_xmtr (EP_SYS *sys); -+extern void ep_free_xmtr (EP_XMTR *xmtr); -+ -+extern EP_STATUS ep_transmit_message (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, -+ EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_multicast_message (EP_XMTR *xmtr, unsigned int destLo, unsigned int destHi, bitmap_t *bitmap, -+ EP_SERVICE service, EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, -+ EP_PAYLOAD *payload, EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_transmit_rpc (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, -+ EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_multicast_forward (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_ENVELOPE *env, EP_PAYLOAD *payload, -+ bitmap_t *bitmap, EP_NMD *nmd, int nFrags); -+ -+/* epcomms.c functions for use with polled transmits */ -+extern int ep_poll_transmits (EP_XMTR *xmtr); -+extern int ep_enable_txcallbacks (EP_XMTR *xmtr); -+extern int ep_disable_txcallbacks (EP_XMTR *xmtr); -+ -+/* epcomms.c message rcvr functions */ -+extern EP_RCVR *ep_alloc_rcvr (EP_SYS *sys, EP_SERVICE svc, unsigned int nenvelopes); -+extern void ep_free_rcvr (EP_RCVR *rcvr); -+ -+extern EP_STATUS ep_queue_receive (EP_RCVR *rcvr, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr); -+extern void ep_requeue_receive (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr); -+extern EP_STATUS ep_rpc_put (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *from, EP_NMD *to, int nFrags); -+extern EP_STATUS ep_rpc_get (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *from, EP_NMD *to, int nFrags); -+extern EP_STATUS ep_complete_rpc (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_STATUSBLK *blk, -+ EP_NMD *from, EP_NMD *to, int nFrags); -+extern void ep_complete_receive (EP_RXD *rxd); -+ -+/* railhints.c */ -+extern int ep_xmtr_bcastrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails); -+extern int ep_xmtr_prefrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails, unsigned nodeId); -+extern EP_RAILMASK ep_xmtr_availrails (EP_XMTR *xmtr); -+extern EP_RAILMASK ep_xmtr_noderails (EP_XMTR *xmtr, unsigned nodeId); -+extern int ep_rcvr_prefrail (EP_RCVR *rcvr, EP_RAILMASK allowedRails); -+extern EP_RAILMASK ep_rcvr_availrails (EP_RCVR *rcvr); -+extern EP_RAILMASK ep_rxd_railmask (EP_RXD *rxd); -+ -+/* epcomms.c functions for accessing fields of rxds */ -+extern void *ep_rxd_arg(EP_RXD *rxd); -+extern int ep_rxd_len(EP_RXD *rxd); -+extern EP_STATUS ep_rxd_status(EP_RXD *rxd); -+extern int ep_rxd_isrpc(EP_RXD *rxd); -+extern EP_ENVELOPE *ep_rxd_envelope(EP_RXD *rxd); -+extern EP_PAYLOAD *ep_rxd_payload(EP_RXD *rxd); -+extern int ep_rxd_node(EP_RXD *rxd); -+extern EP_STATUSBLK *ep_rxd_statusblk(EP_RXD *rxd); -+ -+/* functions for accessing fields of txds */ -+extern int ep_txd_node(EP_TXD *txd); -+extern EP_STATUSBLK *ep_txd_statusblk(EP_TXD *txd); -+ -+/* functions for controlling how many processes are using module */ -+extern void ep_mod_dec_usecount (void); -+extern void ep_mod_inc_usecount (void); -+ -+extern EP_RAILMASK ep_xmtr_svc_indicator_railmask (EP_XMTR *xmtr, int svc_indicator, int nodeId); -+extern int ep_xmtr_svc_indicator_bitmap (EP_XMTR *xmtr, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+#endif /* ! __ELAN__ */ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_EPCOMMS_H */ -+ -Index: linux-2.4.21/include/elan/epsvc.h -=================================================================== ---- linux-2.4.21.orig/include/elan/epsvc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/epsvc.h 2005-06-01 23:12:54.710422232 -0400 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_EPSVC_H -+#define __ELAN_EPSVC_H -+ -+#ident "@(#)$Id: epsvc.h,v 1.9 2004/02/13 10:03:27 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epsvc.h,v $ */ -+ -+ -+#define EP_SVC_NUM_INDICATORS 8 -+#define EP_SVC_INDICATOR_MAX_NAME 32 -+ -+#define EP_SVC_EIP 0 -+#define EP_SVC_NAMES {"eip", "1", "2", "3", "4", "5", "6", "7"}; -+ -+#if defined(__KERNEL__) -+extern int ep_svc_indicator_set (EP_SYS *epsys, int svc_indicator); -+extern int ep_svc_indicator_clear (EP_SYS *epsys, int svc_indicator); -+extern int ep_svc_indicator_is_set (EP_SYS *epsys, int svc_indicator, int nodeId); -+extern int ep_svc_indicator_bitmap (EP_SYS *epsys, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+extern EP_RAILMASK ep_svc_indicator_railmask (EP_SYS *epsys, int svc_indicator, int nodeId); -+#endif -+ -+#endif /* __ELAN_EPSVC_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/kalloc.h -=================================================================== ---- linux-2.4.21.orig/include/elan/kalloc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/kalloc.h 2005-06-01 23:12:54.710422232 -0400 -@@ -0,0 +1,108 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KALLOC_H -+#define __ELAN3_KALLOC_H -+ -+#ident "$Id: kalloc.h,v 1.11 2004/05/19 10:23:59 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kalloc.h,v $ */ -+ -+#include -+ -+/* -+ * Memory allocator -+ */ -+#define LN2_MIN_SIZE 6 /* 64 bytes */ -+#define LN2_MAX_SIZE 16 /* 64k bytes */ -+#define NUM_FREELISTS (LN2_MAX_SIZE-LN2_MIN_SIZE + 1) -+#define MIN_SIZE (1 << LN2_MIN_SIZE) -+#define MAX_SIZE (1 << LN2_MAX_SIZE) -+ -+#define HASHSHIFT LN2_MAX_SIZE -+#define NHASH 32 -+#define HASH(addr) (((addr) >> HASHSHIFT) & (NHASH-1)) -+ -+typedef enum -+{ -+ EP_ALLOC_TYPE_PRIVATE_SDRAM, -+ EP_ALLOC_TYPE_PRIVATE_MAIN, -+ EP_ALLOC_TYPE_SHARED_MAIN, -+} EP_ALLOC_TYPE; -+ -+typedef struct ep_pool -+{ -+ EP_NMH Handle; /* network mapping handle */ -+ -+ struct list_head HashBase; /* linked on hash lists */ -+ struct list_head HashTop; /* linked on hash lists */ -+ -+ struct list_head Link[NUM_FREELISTS]; /* linked on free lists */ -+ bitmap_t *Bitmaps[NUM_FREELISTS]; /* bitmaps for each size */ -+ -+ union { -+ sdramaddr_t Sdram; -+ unsigned long Ptr; -+ } Buffer; -+} EP_POOL; -+ -+typedef struct ep_alloc -+{ -+ spinlock_t Lock; -+ -+ EP_ALLOC_TYPE Type; -+ unsigned int Perm; -+ -+ EP_RMAP *ResourceMap; -+ -+ struct list_head HashBase[NHASH]; -+ struct list_head HashTop[NHASH]; -+ struct list_head Freelists[NUM_FREELISTS]; -+ -+ union { -+ struct { -+ EP_SYS *System; -+ struct list_head Rails; -+ } Shared; -+ -+ struct { -+ EP_RAIL *Rail; -+ } Private; -+ } Data; -+} EP_ALLOC; -+ -+extern void ep_display_alloc (EP_ALLOC *alloc); -+ -+extern void ep_alloc_init (EP_RAIL *rail); -+extern void ep_alloc_fini (EP_RAIL *rail); -+ -+extern sdramaddr_t ep_alloc_memory_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr); -+extern void ep_free_memory_elan (EP_RAIL *rail, EP_ADDR addr); -+ -+extern sdramaddr_t ep_alloc_elan (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp); -+extern void ep_free_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+extern void *ep_alloc_main (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addr); -+extern void ep_free_main (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+ -+extern sdramaddr_t ep_elan2sdram (EP_RAIL *rail, EP_ADDR addr); -+extern void *ep_elan2main (EP_RAIL *rail, EP_ADDR addr); -+ -+extern void ep_shared_alloc_init (EP_SYS *sys); -+extern void ep_shared_alloc_fini (EP_SYS *sys); -+extern int ep_shared_alloc_add_rail (EP_SYS *sys, EP_RAIL *rail); -+extern void ep_shared_alloc_remove_rail (EP_SYS *sys, EP_RAIL *rail); -+ -+extern void *ep_shared_alloc_main (EP_SYS *sys, unsigned size, EP_ATTRIBUTE attr, EP_NMD *nmd); -+extern void ep_shared_free_main (EP_SYS *sys, EP_NMD *nmd); -+ -+#endif /* __ELAN_KALLOC_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/kcomm.h -=================================================================== ---- linux-2.4.21.orig/include/elan/kcomm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/kcomm.h 2005-06-01 23:12:54.712421928 -0400 -@@ -0,0 +1,839 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KCOMM_H -+#define __ELAN_KCOMM_H -+ -+#ident "$Id: kcomm.h,v 1.71.2.8 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm.h,v $*/ -+#define EP_KCOMM_MAJOR_VERSION 3 -+#define EP_KCOMM_MINOR_VERSION 1 -+ -+#define EP_PROTOCOL_VERSION 1 /* CM/KCOMM protocol revision */ -+ -+#define EP_MAX_NODES 2048 /* Max nodes we support */ -+#define EP_MAX_RAILS 16 /* max number of rails (we use an unsigned short for bitmaps !) */ -+#define EP_MAXFRAG 4 /* max number of fragments */ -+ -+#define EP_BLK_SIZE 64 /* align objects for elan access */ -+ -+/* Elan virtual address address space */ -+#define EP_SYSTEM_QUEUE_BASE 0x00010000 /* Base address for system queues */ -+#define EP_MSGSYS_QUEUE_BASE 0x00020000 /* Base address for msgsys queues */ -+#define EP_EPCOMMS_QUEUE_BASE 0x00030000 /* Base address for message queues */ -+#define EP_DVMA_BASE 0x10000000 /* elan address range for dvma mapping. */ -+#define EP_DVMA_TOP 0xE0000000 -+ -+#define EP_SHARED_BASE 0xE0000000 /* shared main/elan allocators */ -+#define EP_SHARED_TOP 0xF0000000 -+ -+#define EP_PRIVATE_BASE 0xF0000000 /* private main/elan allocators */ -+#define EP_PRIVATE_TOP 0xF8000000 -+ -+#define EP_DVMA_RMAP_SIZE 1024 /* size of resource map for dvma address space */ -+#define EP_SHARED_RMAP_SIZE 1024 /* size of resource map for shared address space */ -+#define EP_PRIVATE_RMAP_SIZE 1024 /* size of resource map for private address space */ -+ -+/* Input queue descriptors fit into 64 bytes */ -+#define EP_QUEUE_DESC_SIZE 64 -+ -+/* Timeouts for checking network position */ -+#define EP_POSITION_TIMEOUT (4*HZ) /* 1s time to notice CheckNetworkPosition changes */ -+#define EP_WITHDRAW_TIMEOUT (2*HZ) /* 2s time before withdrawing from unreachable nodes */ -+ -+/* Time to try again due to resource failue (eg malloc etc) */ -+#define RESOURCE_RETRY_TIME (HZ/20) -+ -+/* Time to retransmit message when send failed */ -+#define MSGBUSY_RETRY_TIME (HZ/20) -+ -+/* Time between retransmits of messages network flush requests */ -+#define MESSAGE_RETRY_TIME (HZ/5) -+ -+/* time to hold the context filter up to ensure that the -+ * next packet of a dma is guaranteed to get nacked (8mS) */ -+#define NETWORK_ERROR_TIMEOUT (1 + roundup (HZ * 8 / 1000, 1)) -+ -+/* Time between retransmits of message failover requests */ -+#define FAILOVER_RETRY_TIME (HZ/5) -+ -+/* compute earliest time */ -+#define SET_NEXT_RUN_TIME(nextRunTime, time) \ -+do { \ -+ if ((nextRunTime) == 0 || AFTER(nextRunTime, (time)))\ -+ (nextRunTime) = (time);\ -+} while (0) -+ -+/* DMA retry backoff/priorities/issue rings */ -+#define EP_NUM_BACKOFF 8 -+#define EP_RETRY_STABALISING 0 -+#define EP_RETRY_BASE 1 -+ -+#define EP_RETRY_CRITICAL EP_RETRY_BASE -+#define EP_RETRY_HIGH_PRI (EP_RETRY_CRITICAL + 1) -+#define EP_RETRY_HIGH_PRI_TIME (1) -+#define EP_RETRY_HIGH_PRI_RETRY (EP_RETRY_HIGH_PRI + 1) -+#define EP_RETRY_HIGH_PRI_RETRY_TIME (2) -+#define EP_RETRY_LOW_PRI (EP_RETRY_HIGH_PRI_RETRY + EP_NUM_BACKOFF) -+#define EP_RETRY_LOW_PRI_TIME (2) -+#define EP_RETRY_LOW_PRI_RETRY (EP_RETRY_LOW_PRI + 1) -+#define EP_RETRY_LOW_PRI_RETRY_TIME (4) -+#define EP_RETRY_ANONYMOUS (EP_RETRY_LOW_PRI_RETRY + EP_NUM_BACKOFF) -+#define EP_RETRY_ANONYMOUS_TIME (10) -+#define EP_RETRY_NETERR (EP_RETRY_ANONYMOUS + EP_NUM_BACKOFF) -+#define EP_RETRY_NETERR_TIME (10) -+#define EP_NUM_RETRIES (EP_RETRY_NETERR + 1) -+ -+typedef unsigned short EP_SERVICE; -+ -+/* EP_ATTRIBUTE 32 bits -+ * -+ * 0-2 -+ * for initial call :- -+ * 0 (0x1) EP_NO_ALLOC used once -+ * 1 (0x2) EP_NO_SLEEP used once -+ * 2 (0x4) EP_NOT_MYSELF used once -+ * -+ * when stored and transmited :- -+ * 0 (0x0) EP_MULTICAST envelope -+ * 1 (0x2) EP_RPC envelope -+ * 2 (0x4) EP_HAS_PAYLOAD envelope -+ * -+ * 3-11 -+ * 3 (0x08) EP_PREFRAIL_SET preserved -+ * 4-7 (0xf0) Pref Rail -+ * 8 (0x100) EP_NO_INTERUPT -+ * 9 (0x200) EP_NO_FAILOVER -+ * -+ * 10 (0x400) EP_INTERRUPT_ENABLED internal -+ * 11 (0x800) EP_TXD_STABALISING internal -+ * -+ * 12-13 Not Used. -+ * -+ * 14-15 (0xC000) Data Type. passed in -+ * 00 none. -+ * 01 Service Indicator. -+ * 10 TimeOut. -+ * 11 RailMask -+ * -+ * 16-31 (0x10000) Data. Service Indicator, TimeOut, RailMask, Pref Rail. -+ * -+*/ -+ -+typedef uint32_t EP_ATTRIBUTE; -+ -+#define EP_LOCAL_ATTR_MASK 0x07 -+#define EP_CLEAR_LOCAL_ATTR(ATTR) ( (ATTR) & ~EP_LOCAL_ATTR_MASK ) -+ -+#define EP_NO_ALLOC 0x01 /* Don't call allocators if no free descriptors */ -+#define EP_NO_SLEEP 0x02 /* Don't sleep if no free descriptors */ -+#define EP_NOT_MYSELF 0x04 /* Don't send multicast to me */ -+ -+#define EP_MULTICAST 0x01 /* Message is a multicast */ -+#define EP_RPC 0x02 /* Wait for RPC reply */ -+#define EP_HAS_PAYLOAD_BIT 0x04 /* transfer payload */ -+ -+ -+#define EP_PREFRAIL_SET 0x08 /* preferred rail is set (otherwise pick one from the NMDs) */ -+ -+#define EP_PREFRAIL_SHIFT (4) -+#define EP_PREFRAIL_MASK 0xf0 -+#define EP_IS_PREFRAIL_SET(ATTR) (((ATTR) & EP_PREFRAIL_SET) != 0) -+#define EP_CLEAR_PREFRAIL(ATTR) (((ATTR) & ~EP_PREFRAIL_SET) & ~EP_PREFRAIL_MASK) -+#define EP_SET_PREFRAIL(ATTR,RAIL) (EP_CLEAR_PREFRAIL(ATTR) | (((RAIL) << EP_PREFRAIL_SHIFT ) & EP_PREFRAIL_MASK ) | EP_PREFRAIL_SET) -+ -+ -+#define EP_ATTR2PREFRAIL(ATTR) (((ATTR) & EP_PREFRAIL_MASK) >> EP_PREFRAIL_SHIFT) -+ -+ -+#define EP_INTERRUPT_ENABLED 0x400 /* event interrupt enabled on EP_NO_INTERRUPT */ -+#define EP_TXD_STABALISING 0x800 /* flag to indicate this is attempting to stabalise */ -+ -+#define EP_IS_MULTICAST(ATTR) (((ATTR) & EP_MULTICAST) != 0) -+#define EP_SET_MULTICAST(ATTR) ( (ATTR) | EP_MULTICAST) -+#define EP_CLEAR_MULTICAST(ATTR) ( (ATTR) & ~EP_MULTICAST) -+ -+#define EP_IS_RPC(ATTR) (((ATTR) & EP_RPC) != 0) -+#define EP_SET_RPC(ATTR) ( (ATTR) | EP_RPC) -+#define EP_CLEAR_RPC(ATTR) ( (ATTR) & ~EP_RPC) -+ -+#define EP_HAS_PAYLOAD(ATTR) (((ATTR) & EP_HAS_PAYLOAD_BIT) != 0) -+#define EP_SET_HAS_PAYLOAD(ATTR) ( (ATTR) | EP_HAS_PAYLOAD_BIT) -+#define EP_CLEAR_HAS_PAYLOAD(ATTR) ( (ATTR) & ~EP_HAS_PAYLOAD_BIT) -+ -+#define EP_IS_INTERRUPT_ENABLED(ATTR) (((ATTR) & EP_INTERRUPT_ENABLED) != 0) -+#define EP_SET_INTERRUPT_ENABLED(ATTR) ( (ATTR) | EP_INTERRUPT_ENABLED) -+#define EP_CLEAR_INTERRUPT_ENABLED(ATTR) ( (ATTR) & ~EP_INTERRUPT_ENABLED) -+ -+#define EP_IS_TXD_STABALISING(ATTR) (((ATTR) & EP_TXD_STABALISING) != 0) -+#define EP_SET_TXD_STABALISING(ATTR) ( (ATTR) | EP_TXD_STABALISING) -+#define EP_CLEAR_TXD_STABALISING(ATTR) ( (ATTR) & ~EP_TXD_STABALISING) -+ -+#define EP_NO_INTERRUPT 0x100 /* Don't generate completion interrupt (tx) */ -+#define EP_NO_FAILOVER 0x200 /* don't attempt rail failover, just abort */ -+ -+#define EP_IS_NO_INTERRUPT(ATTR) (((ATTR) & EP_NO_INTERRUPT) != 0) -+#define EP_SET_NO_INTERRUPT(ATTR) ( (ATTR) | EP_NO_INTERRUPT) -+#define EP_CLEAR_NO_INTERRUPT(ATTR) ( (ATTR) & ~EP_NO_INTERRUPT) -+ -+#define EP_IS_NO_FAILOVER(ATTR) (((ATTR) & EP_NO_FAILOVER) != 0) -+#define EP_SET_NO_FAILOVER(ATTR) ( (ATTR) | EP_NO_FAILOVER) -+#define EP_CLEAR_NO_FAILOVER(ATTR) ( (ATTR) & ~EP_NO_FAILOVER) -+ -+#define EP_TYPE_MASK 0xC000 -+#define EP_TYPE_SVC_INDICATOR 0x4000 -+#define EP_TYPE_TIMEOUT 0x8000 -+#define EP_TYPE_RAILMASK 0xC000 -+ -+#define EP_ATTR2TYPE(ATTR) ( (ATTR) & EP_TYPE_MASK ) -+ -+#define EP_IS_SVC_INDICATOR(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_SVC_INDICATOR) -+#define EP_IS_TIMEOUT(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_TIMEOUT) -+#define EP_IS_RAILMASK(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_RAILMASK) -+#define EP_IS_NO_TYPE(ATTR) (EP_ATTR2TYPE(ATTR) == 0) -+ -+#define EP_DATA_SHIFT (16) -+#define EP_DATA_MASK 0xffff0000 -+ -+#define EP_ATTR2DATA(ATTR) (((ATTR) & EP_DATA_MASK) >> EP_DATA_SHIFT) -+#define EP_DATA2ATTR(DATA) (((DATA) << EP_DATA_SHIFT) & EP_DATA_MASK) -+ -+#define EP_CLEAR_DATA(ATTR) (((ATTR) & ~EP_TYPE_MASK) & ~EP_DATA_MASK) -+#define EP_SET_DATA(ATTR,TYPE,DATA) (EP_CLEAR_DATA(ATTR) | ((TYPE) & EP_TYPE_MASK) | (((DATA) << EP_DATA_SHIFT) & EP_DATA_MASK)) -+ -+#define EP_DEFAULT_TIMEOUT (HZ*30) -+ -+#if !defined(offsetof) -+#define offsetof(s, m) (unsigned long)(&(((s *)0)->m)) -+#endif -+#if !defined(roundup) -+#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) -+#endif -+ -+/* -+ * Message transaction ID's - these are unique 64 bts -+ * numbers which include the initial rail number. -+ */ -+typedef struct ep_xid -+{ -+ uint32_t Generation; -+ uint32_t Handle; -+ uint64_t Unique; -+} EP_XID; -+ -+#define EP_INVALIDATE_XID(xid) ((xid).Generation = (xid).Handle = (xid).Unique = 0) -+ -+#define EP_XID_INVALID(xid) ((xid).Generation == 0 && (xid).Handle == 0 && (xid).Unique == 0) -+#define EP_XIDS_MATCH(a,b) ((a).Generation == (b).Generation && (a).Handle == (b).Handle && (a).Unique == (b).Unique) -+ -+typedef struct ep_backoff -+{ -+ unsigned char type; -+ unsigned char indx; -+ unsigned short count; -+} EP_BACKOFF; -+ -+/* values for "type" */ -+#define EP_BACKOFF_FREE 0 -+#define EP_BACKOFF_ENVELOPE 1 -+#define EP_BACKOFF_FETCH 2 -+#define EP_BACKOFF_DATA 3 -+#define EP_BACKOFF_DONE 4 -+#define EP_BACKOFF_STABILISE 5 -+ -+#ifndef __ELAN__ -+ -+/* forward declaration of types */ -+typedef struct ep_rail EP_RAIL; -+typedef struct ep_sys EP_SYS; -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+typedef struct ep_callback -+{ -+ struct ep_callback *Next; -+ void (*Routine)(void *, statemap_t *); -+ void *Arg; -+} EP_CALLBACK; -+ -+#define EP_CB_FLUSH_FILTERING 0 -+#define EP_CB_FLUSH_FLUSHING 1 -+#define EP_CB_PASSIVATED 2 -+#define EP_CB_FAILOVER 3 -+#define EP_CB_DISCONNECTING 4 -+#define EP_CB_DISCONNECTED 5 -+#define EP_CB_NODESET 6 -+#define EP_CB_COUNT 7 -+ -+#endif /* !defined(__ELAN__) */ -+ -+/* Small unreliable system message queues */ -+#define EP_SYSTEMQ_INTR 0 /* input queue for cluster membership generating an interrupt */ -+#define EP_SYSTEMQ_POLLED 1 /* input queue for cluster membership polled on clock tick */ -+#define EP_SYSTEMQ_MANAGER 2 /* input queue for manager messages */ -+#define EP_NUM_SYSTEMQ 64 -+ -+#define EP_SYSTEMQ_ADDR(qnum) (EP_SYSTEM_QUEUE_BASE + (qnum) * EP_QUEUE_DESC_SIZE) -+#define EP_SYSTEMQ_DESC(base,qnum) ((base) + (qnum) * EP_QUEUE_DESC_SIZE) -+ -+#define EP_SYSTEMQ_MSG_ALIGN 64 /* message sizes aligned to 64 byte boundaries */ -+#define EP_SYSTEMQ_MSG_MAX (4*64) /* max message size */ -+ -+/* Special flag for Version field to indicate message not -+ * seen in main memory yet and time limit to poll for it */ -+#define EP_SYSTEMQ_UNRECEIVED 0xdeadbabe -+#define EP_SYSTEMQ_UNRECEIVED_TLIMIT 16384 /* 1023 uS */ -+ -+#ifndef __ELAN__ -+ -+typedef void (EP_INPUTQ_HANDLER) (EP_RAIL *rail, void *arg, void *msg); -+typedef void (EP_INPUTQ_CALLBACK) (EP_RAIL *rail, void *arg); -+ -+typedef struct ep_inputq -+{ -+ unsigned long q_hidden; /* implementation hidden as ep3 or ep4 */ -+} EP_INPUTQ; -+ -+typedef struct ep_outputq -+{ -+ unsigned long q_hidden; /* implementation hidden as ep3 or ep4 */ -+} EP_OUTPUTQ; -+ -+/* returned values for ep_outputq_state */ -+#define EP_OUTPUTQ_BUSY 0 -+#define EP_OUTPUTQ_FAILED 1 -+#define EP_OUTPUTQ_FINISHED 2 -+ -+typedef struct ep_switch -+{ -+ unsigned present:1; -+ unsigned invalid:1; -+ unsigned link:3; -+ unsigned bcast:3; -+ unsigned lnr; -+} EP_SWITCH; -+ -+/* -+ * Network error fixup, flush, relocation messges -+ */ -+typedef struct ep_map_nmd_body -+{ -+ uint32_t nFrags; -+ EP_RAILMASK Railmask; -+ EP_NMD Nmd[EP_MAXFRAG]; -+} EP_MAP_NMD_BODY; -+ -+typedef struct ep_failover_body -+{ -+ EP_XID Xid; -+ EP_RAILMASK Railmask; -+} EP_FAILOVER_BODY; -+ -+typedef struct ep_failover_txd -+{ -+ EP_XID Xid; -+ uint32_t Rail; -+ EP_ADDR TxdRail; -+} EP_FAILOVER_TXD; -+ -+typedef uint64_t EP_NETERR_COOKIE; -+ -+#define EP_PANIC_STRLEN 31 -+ -+typedef struct ep_node_state -+{ -+ unsigned char State; -+ unsigned char NetworkErrorState; -+ EP_RAILMASK Railmask; -+} EP_NODE_STATE; -+ -+#define EP_MANAGER_MSG_SIZE (2 * EP_SYSTEMQ_MSG_ALIGN) -+ -+typedef struct ep_manager_msg_hdr -+{ -+ EP_XID Xid; /* Message transaction id */ -+ -+ uint16_t NodeId; /* Originating node number */ -+ uint16_t DestId; /* destination node id */ -+ -+ uint16_t Checksum; /* Message checksum */ -+ uint8_t Rail; /* Rail message associated with */ -+ uint8_t Type; /* Message type */ -+ -+ uint32_t Pad; /* pad to 32 bytes */ -+ -+ uint32_t Version; /* Message Version */ -+} EP_MANAGER_MSG_HDR; -+ -+typedef union ep_manager_msg_body -+{ -+ unsigned char Space[EP_MANAGER_MSG_SIZE - sizeof (EP_MANAGER_MSG_HDR)]; -+ -+ EP_NETERR_COOKIE Cookies[2]; /* EP_MSG_TYPE_NETERR */ -+ EP_MAP_NMD_BODY MapNmd; /* EP_MSG_TYPE_MAP_NMD */ -+ EP_FAILOVER_BODY Failover; /* EP_MSG_TYPE_FAILOVER_REQUEST */ -+ EP_FAILOVER_TXD FailoverTxd; /* EP_MSG_TYPE_FAILOVER_RESPONSE */ -+ unsigned char PanicReason[EP_PANIC_STRLEN+1]; /* EP_MSG_TYPE_REMOTE_PANIC */ -+ EP_NODE_STATE NodeState; /* EP_MSG_TYPE_GET_NODE_STATE_RESPONSE */ -+ EP_SERVICE Service; /* EP_MSG_TYPE_GET_NODE_STATE */ -+} EP_MANAGER_MSG_BODY; -+ -+typedef struct ep_manager_msg -+{ -+ EP_MANAGER_MSG_BODY Body; -+ EP_MANAGER_MSG_HDR Hdr; -+} EP_MANAGER_MSG; -+ -+#define EP_MANAGER_MSG_VERSION 0xcad01000 -+#define EP_MANAGER_MSG_TYPE_REMOTE_PANIC 0x00 -+#define EP_MANAGER_MSG_TYPE_NETERR_REQUEST 0x01 -+#define EP_MANAGER_MSG_TYPE_NETERR_RESPONSE 0x02 -+#define EP_MANAGER_MSG_TYPE_FLUSH_REQUEST 0x03 -+#define EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE 0x04 -+#define EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST 0x05 -+#define EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE 0x06 -+#define EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST 0x07 -+#define EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE 0x08 -+#define EP_MANAGER_MSG_TYPE_GET_NODE_STATE 0x09 -+#define EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE 0x0a -+ -+/* Message types which should only be sent when a rail is connected */ -+#define EP_MANAGER_MSG_TYPE_CONNECTED(type) (((type) & 1) == 1) -+ -+#define EP_MANAGER_OUTPUTQ_SLOTS 128 /* # entries in outputq */ -+#define EP_MANAGER_INPUTQ_SLOTS 128 /* # entries in inputq */ -+#define EP_MANAGER_OUTPUTQ_RETRIES 31 /* # retries for manager messages */ -+ -+/* XID's are allocated from a cache, which doesn't -+ * require locking since it relies on the caller to -+ * manage the locking for us. -+ */ -+typedef struct ep_xid_cache -+{ -+ struct list_head Link; -+ -+ uint32_t Handle; /* my XID cache handle */ -+ uint64_t Current; /* range of XID.Unique we can allocate from */ -+ uint64_t Last; -+ -+ void (*MessageHandler)(void *arg, EP_MANAGER_MSG *); -+ void *Arg; -+} EP_XID_CACHE; -+ -+#define EP_XID_CACHE_CHUNKS (10000) -+ -+typedef struct ep_node_rail -+{ -+ struct list_head Link; /* can be linked on work lists */ -+ -+ unsigned char State; /* node connection state */ -+ unsigned char NetworkErrorState; /* reasons for keeping the context filter up */ -+ unsigned char MessageState; /* state of messages during passivate/relocate */ -+ -+ EP_XID MsgXid; /* neterr/flush transaction id */ -+ long NextRunTime; /* time to drop context filter for destroyed dma packet, or to send next request */ -+ EP_NETERR_COOKIE NetworkErrorCookies[2]; /* identify cookie for destroyed atomic packet */ -+ -+ uint32_t Cookie; /* per-node network error cookie */ -+ spinlock_t CookieLock; /* and spinlock for it. */ -+ -+ struct list_head StalledDmas; /* list of stalled DMAs */ -+} EP_NODE_RAIL; -+ -+#define EP_NODE_DISCONNECTED 0 /* node is disconnected */ -+#define EP_NODE_CONNECTING 1 /* awaiting connection */ -+#define EP_NODE_CONNECTED 2 /* node is connected */ -+#define EP_NODE_LEAVING_CONNECTED 3 /* node is starting to disconnect */ -+#define EP_NODE_LOCAL_PASSIVATE 4 /* flushing context filter/run queues */ -+#define EP_NODE_REMOTE_PASSIVATE 5 /* stalling for neterr flush */ -+#define EP_NODE_PASSIVATED 6 /* relocating active/passive messages */ -+#define EP_NODE_DISCONNECTING 7 /* entering disconncted - abort remaining comms */ -+#define EP_NODE_NUM_STATES 8 -+ -+#define EP_NODE_NETERR_ATOMIC_PACKET (1 << 0) -+#define EP_NODE_NETERR_DMA_PACKET (1 << 1) -+ -+#define EP_NODE_PASSIVE_MESSAGES (1 << 0) -+#define EP_NODE_ACTIVE_MESSAGES (1 << 1) -+ -+/* -+ * Kernel thread code is loaded as a table. -+ */ -+typedef struct ep_symbol -+{ -+ char *name; -+ EP_ADDR value; -+} EP_SYMBOL; -+ -+typedef struct ep_code -+{ -+ u_char *text; -+ u_int text_size; -+ u_char *data; -+ u_int data_size; -+ u_char *rodata; -+ u_int rodata_size; -+ EP_SYMBOL *symbols; -+ -+ int ntext; -+ sdramaddr_t pptext; -+ EP_ADDR etext; -+ sdramaddr_t _stext; -+ sdramaddr_t _rodata; -+ -+ int ndata; -+ sdramaddr_t ppdata; -+ EP_ADDR edata; -+ sdramaddr_t _sdata; -+} EP_CODE; -+ -+typedef struct ep_switchstate -+{ -+ unsigned char linkid; -+ unsigned char LNR; -+ unsigned char bcast; -+ unsigned char uplink; -+} EP_SWITCHSTATE; -+ -+typedef struct ep_rail_ops -+{ -+ void (*DestroyRail) (EP_RAIL *rail); -+ -+ int (*StartRail) (EP_RAIL *rail); -+ void (*StallRail) (EP_RAIL *rail); -+ void (*StopRail) (EP_RAIL *rail); -+ -+ sdramaddr_t (*SdramAlloc) (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+ void (*SdramFree) (EP_RAIL *rail, sdramaddr_t addr, unsigned size); -+ void (*SdramWriteb) (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+ void (*KaddrMap) (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t kaddr, unsigned len, unsigned int perm, int ep_attr); -+ void (*SdramMap) (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned len, unsigned int perm, int ep_attr); -+ void (*Unmap) (EP_RAIL *rail, EP_ADDR eaddr, unsigned len); -+ -+ void *(*DvmaReserve) (EP_RAIL *rail, EP_ADDR eaddr, unsigned npages); -+ void (*DvmaRelease) (EP_RAIL *rail, EP_ADDR eaddr, unsigned npages, void *private); -+ void (*DvmaSetPte) (EP_RAIL *rail, void *private, unsigned index, physaddr_t phys, unsigned int perm); -+ physaddr_t (*DvmaReadPte) (EP_RAIL *rail, void *private, unsigned index); -+ void (*DvmaUnload)(EP_RAIL *rail, void *private, unsigned index, unsigned npages); -+ void (*FlushTlb) (EP_RAIL *rail); -+ -+ int (*ProbeRoute) (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, -+ int *linkdown, int attempts, EP_SWITCH *lsw); -+ void (*PositionFound) (EP_RAIL *rail, ELAN_POSITION *pos); -+ int (*CheckPosition) (EP_RAIL *rail); -+ void (*NeterrFixup) (EP_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+ void (*LoadSystemRoute) (EP_RAIL *rail, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+ -+ void (*LoadNodeRoute) (EP_RAIL *rail, unsigned nodeId); -+ void (*UnloadNodeRoute) (EP_RAIL *rail, unsigned nodeId); -+ void (*LowerFilter) (EP_RAIL *rail, unsigned nodeId); -+ void (*RaiseFilter) (EP_RAIL *rail, unsigned nodeId); -+ void (*NodeDisconnected) (EP_RAIL *rail, unsigned nodeId); -+ -+ void (*FlushFilters) (EP_RAIL *rail); -+ void (*FlushQueues) (EP_RAIL *rail); -+ -+ -+ EP_INPUTQ *(*AllocInputQ) (EP_RAIL *rail, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ void (*callback)(EP_RAIL *rail, void *arg), void *arg); -+ void (*FreeInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ void (*EnableInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ void (*DisableInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ int (*PollInputQ) (EP_RAIL *rail, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+ -+ EP_OUTPUTQ *(*AllocOutputQ) (EP_RAIL *rail, unsigned slotSize, unsigned slotCount); -+ void (*FreeOutputQ) (EP_RAIL *rail, EP_OUTPUTQ *outputq); -+ void *(*OutputQMsg) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum); -+ int (*OutputQState) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum); -+ int (*OutputQSend) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries); -+ -+ void (*FillOutStats) (EP_RAIL *rail, char *str); -+ void (*Debug) (EP_RAIL *rail); -+ -+} EP_RAIL_OPS; -+ -+#define ep_alloc_inputq(rail,qnum,slotSize,slotCount,callback,arg) \ -+ (rail)->Operations.AllocInputQ(rail,qnum,slotSize,slotCount,callback,arg) -+#define ep_free_inputq(rail,inputq) \ -+ (rail)->Operations.FreeInputQ(rail,inputq) -+#define ep_enable_inputq(rail,inputq) \ -+ (rail)->Operations.EnableInputQ(rail,inputq) -+#define ep_disable_inputq(rail,inputq) \ -+ (rail)->Operations.DisableInputQ(rail,inputq) -+#define ep_poll_inputq(rail,inputq,maxCount,handler,arg) \ -+ (rail)->Operations.PollInputQ(rail,inputq,maxCount,handler,arg) -+#define ep_alloc_outputq(rail,slotSize,slotCount)\ -+ (rail)->Operations.AllocOutputQ(rail,slotSize,slotCount) -+#define ep_free_outputq(rail,outputq)\ -+ (rail)->Operations.FreeOutputQ(rail,outputq) -+#define ep_outputq_msg(rail,outputq,slotNum)\ -+ (rail)->Operations.OutputQMsg(rail,outputq,slotNum) -+#define ep_outputq_state(rail,outputq,slotNum)\ -+ (rail)->Operations.OutputQState(rail,outputq,slotNum) -+#define ep_outputq_send(rail,outputq,slotNum,size,vp,qnum,retries)\ -+ (rail)->Operations.OutputQSend(rail,outputq,slotNum,size,vp,qnum,retries) -+ -+struct ep_rail -+{ -+ EP_SYS *System; /* "system" we've attached to */ -+ -+ unsigned char Number; /* Rail number */ -+ unsigned char State; /* Rail state */ -+ char Name[32]; /* Rail name */ -+ -+ struct list_head ManagerLink; /* linked on ManagedRails list */ -+ -+ ELAN_DEVINFO Devinfo; /* Device information for this rail */ -+ ELAN_POSITION Position; /* Position on switch device is connected to */ -+ -+ EP_RAIL_OPS Operations; /* device specific operations */ -+ EP_RAIL_STATS Stats; /* statistics */ -+ -+ EP_ALLOC ElanAllocator; /* per-rail elan memory allocator */ -+ EP_ALLOC MainAllocator; /* per-rail main memory allocator */ -+ -+ unsigned TlbFlushRequired; /* lazy TLB flushing */ -+ -+ int SwitchBroadcastLevel; /* current switch level ok for broadcast */ -+ unsigned long SwitchBroadcastLevelTick; -+ -+ int SwitchProbeLevel; /* result of last switch probe */ -+ EP_SWITCHSTATE SwitchState[ELAN_MAX_LEVELS]; -+ EP_SWITCHSTATE SwitchLast[ELAN_MAX_LEVELS]; -+ unsigned long SwitchProbeTick[ELAN_MAX_LEVELS]; -+ -+ /* Node disconnecting/connecting state */ -+ EP_CALLBACK *CallbackList[EP_CB_COUNT]; /* List of callbacks */ -+ kmutex_t CallbackLock; /* and lock for it. */ -+ unsigned CallbackStep; /* step through UpdateConnectionState. */ -+ -+ /* back pointer for cluster membership */ -+ void *ClusterRail; -+ -+ /* Per node state for message passing */ -+ EP_NODE_RAIL *Nodes; /* array of per-node state */ -+ statemap_t *NodeSet; /* per-rail statemap of connected nodes */ -+ statemap_t *NodeChangeMap; /* statemap of nodes to being connected/disconnected */ -+ statemap_t *NodeChangeTmp; /* and temporary copies */ -+ -+ struct list_head NetworkErrorList; /* list of nodes resolving network errors */ -+ struct list_head LocalPassivateList; /* list of nodes in state LOCAL_PASSIVATE */ -+ struct list_head RemotePassivateList; /* list of nodes waiting for remote network error flush */ -+ struct list_head PassivatedList; /* list of nodes performing message relocation */ -+ struct list_head DisconnectingList; /* list of nodes transitioning to disconnected */ -+ -+ EP_XID_CACHE XidCache; /* XID cache for node messages (single threaded access) */ -+ -+ /* Manager messages */ -+ EP_INPUTQ *ManagerInputQ; -+ EP_OUTPUTQ *ManagerOutputQ; -+ unsigned ManagerOutputQNextSlot; -+ spinlock_t ManagerOutputQLock; -+ -+ /* /proc entries */ -+ struct proc_dir_entry *ProcDir; -+ struct proc_dir_entry *SvcIndicatorDir; -+ int CallbackRegistered; -+}; -+ -+/* values for State */ -+#define EP_RAIL_STATE_UNINITIALISED 0 /* device uninitialised */ -+#define EP_RAIL_STATE_STARTED 1 /* device started but network position unknown */ -+#define EP_RAIL_STATE_RUNNING 2 /* device started and position known */ -+#define EP_RAIL_STATE_INCOMPATIBLE 3 /* device started, but position incompatible */ -+ -+typedef struct ep_rail_entry -+{ -+ struct list_head Link; -+ EP_RAIL *Rail; -+} EP_RAIL_ENTRY; -+ -+typedef struct ep_subsys -+{ -+ EP_SYS *Sys; -+ -+ struct list_head Link; /* Linked on sys->Subsystems */ -+ char *Name; /* Name to lookup */ -+ -+ void (*Destroy) (struct ep_subsys *subsys, EP_SYS *sys); -+ -+ int (*AddRail) (struct ep_subsys *subsys, EP_SYS *sys, EP_RAIL *rail); -+ void (*RemoveRail) (struct ep_subsys *subsys, EP_SYS *sys, EP_RAIL *rail); -+} EP_SUBSYS; -+ -+typedef struct ep_node -+{ -+ EP_RAILMASK ConnectedRails; -+} EP_NODE; -+ -+struct ep_sys -+{ -+ EP_RAIL *Rails[EP_MAX_RAILS]; /* array of all available devices */ -+ -+ kmutex_t StartStopLock; /* lock for starting stopping rails */ -+ -+ ELAN_POSITION Position; /* primary node position */ -+ -+ EP_NMH_TABLE MappingTable; /* Network mapping handle table */ -+ -+ EP_ALLOC Allocator; /* shared main memory allocator */ -+ -+ EP_DVMA_STATE DvmaState; /* dvma state */ -+ -+ kmutex_t SubsysLock; /* lock on the Subsytems list */ -+ struct list_head Subsystems; /* list of subsystems */ -+ -+ /* device manager state */ -+ struct list_head ManagedRails; /* list of managed devices */ -+ EP_KTHREAD ManagerThread; /* place for manager thread to sleep */ -+ -+ /* global node state */ -+ spinlock_t NodeLock; /* spinlock for node state (including per-device node state) */ -+ EP_NODE *Nodes; /* system wide node state */ -+ statemap_t *NodeSet; /* system wide nodeset */ -+ struct list_head NodesetCallbackList; /* list of "callbacks" */ -+ -+ /* Transaction Id */ -+ struct list_head XidCacheList; /* list of XID caches */ -+ uint32_t XidGeneration; /* XID generation number (distinguishes reboots) */ -+ uint32_t XidHandle; /* XID handles (distinguishes XID caches) */ -+ uint64_t XidNext; /* next XID to prime cache */ -+ spinlock_t XidLock; /* and it's spinlock */ -+ -+ /* Shutdown/Panic */ -+ unsigned int Shutdown; /* node has shutdown/panic'd */ -+}; -+ -+#if defined(DEBUG_ASSERT) -+extern int ep_assfail (EP_RAIL *rail, const char *string, const char *func, const char *file, const int line); -+extern int sdram_assert; -+extern int assfail_mode; -+ -+#define EP_ASSERT(rail, EX) do { \ -+ if (!(EX) && ep_assfail ((EP_RAIL *) (rail), #EX, __FUNCTION__, __FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#define EP_ASSFAIL(rail,EX) do { \ -+ if (ep_assfail ((EP_RAIL *) (rail), EX, __FUNCTION__, __FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#define SDRAM_ASSERT(EX) (sdram_assert ? (EX) : 1) -+#else -+#define EP_ASSERT(rail, EX) ((void) 0) -+#define EP_ASSFAIL(rail,str) ((void) 0) -+#define SDRAM_ASSERT(EX) (1) -+#endif -+ -+/* conf_osdep.c */ -+extern EP_SYS *ep_system(void); -+extern void ep_mod_dec_usecount (void); -+extern void ep_mod_inc_usecount (void); -+ -+/* procfs_osdep.c */ -+extern struct proc_dir_entry *ep_procfs_root; -+extern struct proc_dir_entry *ep_config_root; -+ -+/* kcomm.c */ -+extern int ep_sys_init (EP_SYS *sys); -+extern void ep_sys_fini (EP_SYS *sys); -+extern void ep_shutdown (EP_SYS *sys); -+extern int ep_init_rail (EP_SYS *sys, EP_RAIL *rail); -+extern void ep_destroy_rail (EP_RAIL *rail); -+extern int ep_start_rail (EP_RAIL *rail); -+extern void ep_stop_rail (EP_RAIL *rail); -+ -+extern void ep_connect_node (EP_RAIL *rail, int nodeId); -+extern int ep_disconnect_node (EP_RAIL *rail, int nodeId); -+ -+extern EP_XID ep_xid_cache_alloc (EP_SYS *sys, EP_XID_CACHE *cache); -+extern void ep_xid_cache_init (EP_SYS *sys, EP_XID_CACHE *cache); -+extern void ep_xid_cache_destroy (EP_SYS *sys, EP_XID_CACHE *cache); -+ -+extern int ep_send_message (EP_RAIL *rail, int nodeId, int type, EP_XID xid, EP_MANAGER_MSG_BODY *body); -+ -+extern void ep_panic_node (EP_SYS *sys, int nodeId, unsigned char *reason); -+ -+extern void ep_subsys_add (EP_SYS *sys, EP_SUBSYS *subsys); -+extern void ep_subsys_del (EP_SYS *sys, EP_SUBSYS *subsys); -+extern EP_SUBSYS *ep_subsys_find (EP_SYS *sys, char *name); -+ -+extern void DisplayNodes (EP_RAIL *rail); -+ -+extern void ep_fillout_stats(EP_RAIL *rail, char *str); -+ -+/* neterr.c */ -+extern void ep_queue_network_error (EP_RAIL *rail, int nodeId, int what, int channel, EP_NETERR_COOKIE cookie); -+ -+/* kcomm_elan3.c */ -+extern unsigned int ep3_create_rails (EP_SYS *sys, unsigned int disabled); -+ -+/* kcomm_elan4.c */ -+extern unsigned int ep4_create_rails (EP_SYS *sys, unsigned int disabled); -+ -+/* probenetwork.c */ -+extern int ProbeNetwork (EP_RAIL *rail, ELAN_POSITION *pos); -+extern void CheckPosition (EP_RAIL *rail); -+ -+extern uint16_t CheckSum (char *msg, int nob); -+ -+/* threadcode.c */ -+extern EP_ADDR ep_symbol (EP_CODE *code, char *name); -+extern int ep_loadcode (EP_RAIL *rail, EP_CODE *code); -+extern void ep_unloadcode (EP_RAIL *rail, EP_CODE *code); -+ -+/* Public interface */ -+/* debug.c */ -+extern int ep_sprintf_bitmap (char *str, unsigned nbytes, bitmap_t *bitmap, int base, int count, int off); -+extern void ep_display_bitmap (char *prefix, char *tag, bitmap_t *bitmap, unsigned base, unsigned nbits); -+ -+/* epcomms.c */ -+extern int ep_waitfor_nodeid (EP_SYS *sys); -+extern int ep_nodeid (EP_SYS *sys); -+extern int ep_numnodes (EP_SYS *sys); -+ -+/* railhints.c */ -+extern int ep_pickRail(EP_RAILMASK railmask); -+ -+/* support.c */ -+extern int ep_register_nodeset_callback (EP_SYS *sys, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_remove_nodeset_callback (EP_SYS *sys, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_call_nodeset_callbacks (EP_SYS *sys, statemap_t *map); -+ -+extern int ep_register_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_remove_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_call_callbacks (EP_RAIL *rail, unsigned idx, statemap_t *); -+extern unsigned int ep_backoff (EP_BACKOFF *backoff, int type); -+ -+#endif /* !__ELAN__ */ -+ -+typedef struct display_info { -+ void (*func)(long, char *, ...); -+ long arg; -+} DisplayInfo; -+ -+extern DisplayInfo di_ep_debug; -+ -+ -+#endif /* __ELAN_KCOMM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/kcomm_stats.h -=================================================================== ---- linux-2.4.21.orig/include/elan/kcomm_stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/kcomm_stats.h 2005-06-01 23:12:54.712421928 -0400 -@@ -0,0 +1,153 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_EPSTATS_H -+#define __EP_EPSTATS_H -+ -+#ident "$Id: kcomm_stats.h,v 1.4.8.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_stats.h,v $ */ -+ -+#define EP_BUCKET_SLOTS 8 -+ -+#define BucketStat(obj,stat,size) ((size) < 128 ? (obj)->Stats.stat[0]++ : \ -+ (size) < 512 ? (obj)->Stats.stat[1]++ : \ -+ (size) < 1024 ? (obj)->Stats.stat[2]++ : \ -+ (size) < 8192 ? (obj)->Stats.stat[3]++ : \ -+ (size) < 16384 ? (obj)->Stats.stat[4]++ : \ -+ (size) < 32768 ? (obj)->Stats.stat[5]++ : \ -+ (size) < 65536 ? (obj)->Stats.stat[6]++ : \ -+ (obj)->Stats.stat[7]++) -+#define IncrStat(obj,stat) ((obj)->Stats.stat++) -+ -+ -+#define EP3_NUM_DMA_FAIL 11 /* NOTE - the same as EP_NUM_RETRIES */ -+ -+#define ADD_STAT(STATS,STAT,VALUE) { unsigned long now = lbolt;\ -+ STATS.STAT.total += VALUE; \ -+ if ( ( now - STATS.STAT.last_time ) > HZ ) { \ -+ STATS.STAT.last_per_sec = ( STATS.STAT.total - STATS.STAT.last_count)/ ( (( now - STATS.STAT.last_time ) + (HZ/2)) / HZ);\ -+ STATS.STAT.last_time = now; \ -+ STATS.STAT.last_count = STATS.STAT.total; \ -+ }} \ -+ -+#define INC_STAT(STATS,STAT) ADD_STAT(STATS,STAT,1) -+ -+#define GET_STAT_PER_SEC(STATS, STAT) ( (( lbolt - STATS.STAT.last_time ) < (HZ * 5)) ? STATS.STAT.last_per_sec : 0 ) -+#define GET_STAT_TOTAL(STATS, STAT) ( STATS.STAT.total ) -+ -+struct ep_stats_count -+{ -+ unsigned long total; -+ unsigned long last_time; -+ unsigned long last_count; -+ unsigned long last_per_sec; -+}; -+ -+typedef struct ep_stats_count EP_STATS_COUNT; -+ -+typedef struct ep3_rail_stats -+{ -+ unsigned long IssueDmaFail[EP3_NUM_DMA_FAIL]; -+ -+ unsigned long DmaQueueLength[EP_BUCKET_SLOTS]; -+ unsigned long CprocDmaQueueOverflow; -+ unsigned long DprocDmaQueueOverflow; -+ unsigned long IprocDmaQueueOverflow; -+ unsigned long CprocEventQueueOverflow; -+ unsigned long DprocEventQueueOverflow; -+ unsigned long IprocEventQueueOverflow; -+ -+ unsigned long QueueingPacketTrap; -+ unsigned long DmaIdentifyTrap; -+ unsigned long ThreadIdentifyTrap; -+ unsigned long DmaPacketTrap; -+} EP3_RAIL_STATS; -+ -+typedef struct ep4_rail_stats -+{ -+ unsigned long somestatsgohere; -+} EP4_RAIL_STATS; -+ -+typedef struct ep_rail_stats -+{ -+ unsigned long SendMessageFailed; -+ unsigned long NeterrAtomicPacket; -+ unsigned long NeterrDmaPacket; -+ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+ -+} EP_RAIL_STATS; -+ -+typedef struct ep_cm_rail_stats -+{ -+ /* cluster membership statistics */ -+ unsigned long HeartbeatsSent; -+ unsigned long HeartbeatsRcvd; -+ -+ unsigned long RetryHeartbeat; -+ unsigned long RejoinRequest; -+ unsigned long RejoinTooSlow; -+ unsigned long LaunchMessageFail; -+ unsigned long MapChangesSent; -+ -+ /* Heartbeat scheduling stats */ -+ unsigned long HeartbeatOverdue; -+} EP_CM_RAIL_STATS; -+ -+typedef struct ep_comms_rail_stats -+{ -+ /* kernel comms large message statistics */ -+ unsigned long TxEnveEvent; -+ unsigned long TxDataEvent; -+ unsigned long TxDoneEvent; -+ unsigned long RxDoneEvent; -+ unsigned long MulticastTxDone; -+ unsigned long QueueReceive; -+ -+ unsigned long TxEnveRetry; -+ unsigned long TxDataRetry; -+ unsigned long TxDoneRetry; -+ unsigned long RxThrdEvent; -+ unsigned long RxDataRetry; -+ unsigned long RxDoneRetry; -+ unsigned long StallThread; -+ unsigned long ThrdWaiting; -+ unsigned long CompleteEnvelope; -+ -+ unsigned long NoFreeTxds; -+ unsigned long NoFreeRxds; -+ -+ unsigned long LockRcvrTrapped; -+} EP_COMMS_RAIL_STATS; -+ -+typedef struct ep_comms_stats -+{ -+ unsigned long DataXmit[8]; -+ unsigned long McastXmit[8]; -+ unsigned long RPCXmit[8]; -+ unsigned long RPCPut[8]; -+ unsigned long RPCGet[8]; -+ unsigned long CompleteRPC[8]; -+ unsigned long RxData[8]; -+ unsigned long RxMcast[8]; -+ -+ unsigned long NoFreeTxds; -+ unsigned long NoFreeRxds; -+} EP_COMMS_STATS; -+ -+#endif /* __EP_EPSTATS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/kmap.h -=================================================================== ---- linux-2.4.21.orig/include/elan/kmap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/kmap.h 2005-06-01 23:12:54.713421776 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KMAP_H -+#define __ELAN_KMAP_H -+ -+#ident "$Id: kmap.h,v 1.3.8.1 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap.h,v $ */ -+ -+#include -+ -+extern void ep_perrail_kaddr_map (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t vaddr, unsigned long len, unsigned int perm, int ep_attr); -+extern void ep_perrail_sdram_map (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned long len, unsigned int perm, int ep_attr); -+extern void ep_perrail_unmap (EP_RAIL *rail, EP_ADDR eaddr, unsigned long len); -+extern void ep_perrail_dvma_sync (EP_RAIL *rail); -+ -+typedef struct ep_dvma_nmh -+{ -+ EP_NMH dvma_nmh; -+ -+ struct list_head dvma_link; /* chained on ep_dvma_state */ -+ unsigned dvma_perm; /* permissions for region */ -+ -+ spinlock_t dvma_lock; -+ EP_RAILMASK dvma_railmask; /* bitmap of rails */ -+ EP_RAIL *dvma_rails[EP_MAX_RAILS]; /* assoicated rails */ -+ void *dvma_private[EP_MAX_RAILS]; /* pointers to rail private data */ -+ unsigned int dvma_attrs[1]; /* bitmap of which rails pages are loaded NOTE - max 32 rails */ -+} EP_DVMA_NMH; -+ -+/* values for dvma_perm */ -+#define EP_PERM_EXECUTE 0 -+#define EP_PERM_READ 1 -+#define EP_PERM_WRITE 2 -+#define EP_PERM_ALL 3 -+ -+typedef struct ep_dvma_state -+{ -+ kmutex_t dvma_lock; -+ struct list_head dvma_handles; -+ struct list_head dvma_rails; -+ EP_RMAP *dvma_rmap; -+} EP_DVMA_STATE; -+ -+extern void ep_dvma_init (EP_SYS *sys); -+extern void ep_dvma_fini (EP_SYS *sys); -+extern EP_NMH *ep_dvma_reserve (EP_SYS *sys, unsigned npages, unsigned perm); -+extern void ep_dvma_release (EP_SYS *sys, EP_NMH *nmh); -+extern void ep_dvma_load (EP_SYS *sys, void *map, caddr_t vaddr, unsigned len, -+ EP_NMH *nmh, unsigned index, EP_RAILMASK *hints, EP_NMD *subset); -+extern void ep_dvma_unload (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd); -+ -+extern void ep_dvma_remove_rail (EP_SYS *sys, EP_RAIL *rail); -+extern int ep_dvma_add_rail (EP_SYS *sys, EP_RAIL *rail); -+ -+extern uint16_t rolling_check_sum (char *msg, int nob, uint16_t sum); -+ -+#endif /* __ELAN_KMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/kmsg.h -=================================================================== ---- linux-2.4.21.orig/include/elan/kmsg.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/kmsg.h 2005-06-01 23:12:54.713421776 -0400 -@@ -0,0 +1,14 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KMSG_H -+#define __ELAN_KMSG_H -+ -+#ident "@(#)$Id: kmsg.h,v 1.1 2003/09/23 13:55:12 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg.h,v $ */ -+ -+#endif /* __ELAN_KMSG_H */ -Index: linux-2.4.21/include/elan/kthread.h -=================================================================== ---- linux-2.4.21.orig/include/elan/kthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/kthread.h 2005-06-01 23:12:54.713421776 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KTHREAD_H -+#define __ELAN3_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.4 2004/05/06 14:24:08 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.h,v $*/ -+ -+typedef struct ep_kthread -+{ -+ kcondvar_t wait; /* place to sleep */ -+ spinlock_t lock; /* and lock */ -+ long next_run; /* tick when thread should next run */ -+ long running; /* tick when thread started to run */ -+ unsigned short should_stall; -+ unsigned char state; -+ unsigned int started:1; -+ unsigned int should_stop:1; -+ unsigned int stopped:1; -+} EP_KTHREAD; -+ -+#define KT_STATE_SLEEPING 0 -+#define KT_STATE_SCHEDULED 1 -+#define KT_STATE_RUNNING 2 -+#define KT_STATE_STALLED 3 -+ -+#define AFTER(a, b) ((((long)(a)) - ((long)(b))) > 0) -+#define BEFORE(a,b) ((((long)(a)) - ((long)(b))) < 0) -+ -+extern void ep_kthread_init (EP_KTHREAD *kt); -+extern void ep_kthread_destroy (EP_KTHREAD *kt); -+extern void ep_kthread_started (EP_KTHREAD *kt); -+extern void ep_kthread_stopped (EP_KTHREAD *kt); -+extern int ep_kthread_should_stall (EP_KTHREAD *kth); -+extern int ep_kthread_sleep (EP_KTHREAD *kth, long next_run); -+extern void ep_kthread_schedule (EP_KTHREAD *kt, long when); -+extern void ep_kthread_stall (EP_KTHREAD *kth); -+extern void ep_kthread_resume (EP_KTHREAD *kt); -+extern void ep_kthread_stop (EP_KTHREAD *kt); -+extern int ep_kthread_state (EP_KTHREAD *kt, long *time); -+#endif /* __ELAN3_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/nmh.h -=================================================================== ---- linux-2.4.21.orig/include/elan/nmh.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/nmh.h 2005-06-01 23:12:54.714421624 -0400 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_NMH_H -+#define __ELAN3_NMH_H -+ -+#ident "@(#)$Id: nmh.h,v 1.7 2004/01/06 10:29:55 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/nmh.h,v $*/ -+ -+ -+/* Forward declarations */ -+typedef struct ep_nmd EP_NMD; -+typedef struct ep_nmh_ops EP_NMH_OPS; -+typedef struct ep_nmh EP_NMH; -+ -+/* Railmask held in 16 bit field (packs with nodeId into NMD */ -+typedef uint16_t EP_RAILMASK; -+ -+#define EP_RAIL2RAILMASK(rnum) (1 << (rnum)) -+#define EP_RAILMASK_ALL 0xffff -+ -+/* kernel comms elan network address */ -+typedef uint32_t EP_ADDR; -+ -+/* network mapping descriptor - this is returned to the user from a map operation, -+ * and is what is passed to all communication functions */ -+struct ep_nmd -+{ -+ EP_ADDR nmd_addr; /* base address */ -+ uint32_t nmd_len; /* size in bytes */ -+ uint32_t nmd_attr; /* nodeid << 16 | railmask */ -+}; -+ -+#define EP_NMD_ATTR(nodeid,railmask) (((nodeid) << 16) | (railmask)) -+#define EP_NMD_NODEID(nmd) ((nmd)->nmd_attr >> 16) -+#define EP_NMD_RAILMASK(nmd) ((nmd)->nmd_attr & EP_RAILMASK_ALL) -+ -+#if !defined(__ELAN__) -+ -+struct ep_nmh_ops -+{ -+ int (*op_map_rails) (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask); /* add mappings to different rail(s) */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ uint16_t (*op_calc_check_sum) (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum); /* calculates check sum */ -+#endif -+}; -+ -+struct ep_nmh -+{ -+ EP_NMD nmh_nmd; /* public field */ -+ struct list_head nmh_link; /* linked on hash table */ -+ EP_NMH_OPS *nmh_ops; /* operations to perform on object */ -+}; -+ -+#define EP_NMH_NUMHASH (32 - 11 + 1) /* one hash table for each power of 2 above pagesize */ -+#define EP_NMH_HASHSIZE (64) /* max size of each hash table */ -+ -+typedef struct ep_nmh_table -+{ -+ struct list_head *tbl_hash[EP_NMH_NUMHASH]; -+ unsigned tbl_size[EP_NMH_NUMHASH]; -+} EP_NMH_TABLE; -+ -+extern int ep_nmh_init (EP_NMH_TABLE *tbl); -+extern void ep_nmh_fini (EP_NMH_TABLE *tbl); -+ -+extern void ep_nmh_insert (EP_NMH_TABLE *tbl, EP_NMH *nmd); -+extern void ep_nmh_remove (EP_NMH_TABLE *tbl, EP_NMH *nmd); -+extern EP_NMH *ep_nmh_find (EP_NMH_TABLE *tbl, EP_NMD *nmh); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern uint32_t ep_nmd_calc_data_check_sum(EP_SYS *sys, EP_NMD *nmd, int nFrags); -+#endif -+ -+/* Public interface */ -+extern EP_RAILMASK ep_nmd2railmask (EP_NMD *frags, int nFrags); -+extern void ep_nmd_subset (EP_NMD *subset, EP_NMD *nmd, unsigned off, unsigned len); -+extern int ep_nmd_merge (EP_NMD *merged, EP_NMD *a, EP_NMD *b); -+extern int ep_nmd_map_rails (EP_SYS *sys, EP_NMD *nmd, unsigned railmask); -+ -+#endif /* __ELAN__ */ -+ -+#endif /* __ELAN3_NMH_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/rmap.h -=================================================================== ---- linux-2.4.21.orig/include/elan/rmap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/rmap.h 2005-06-01 23:12:54.714421624 -0400 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_RMAP_H -+#define __ELAN_RMAP_H -+ -+#ident "$Id: rmap.h,v 1.8 2004/05/19 10:24:40 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/rmap.h,v $ */ -+ -+ -+typedef struct ep_rmap_entry -+{ -+ size_t m_size; -+ u_long m_addr; -+} EP_RMAP_ENTRY; -+ -+typedef struct ep_rmap -+{ -+ spinlock_t m_lock; -+ kcondvar_t m_wait; -+ u_int m_size; -+ u_int m_free; -+ u_int m_want; -+ char *m_name; -+ EP_RMAP_ENTRY m_map[1]; -+} EP_RMAP; -+ -+extern void ep_display_rmap (EP_RMAP *map); -+ -+extern void ep_rmapinit (EP_RMAP *rmap, char *name, u_int mapsize); -+extern unsigned long ep_rmalloc (EP_RMAP *rmap, size_t size, int cansleep); -+extern unsigned long ep_rmalloc_constrained (EP_RMAP *mp, size_t size, unsigned long alo, unsigned long ahi, unsigned long align, int cansleep); -+extern void ep_rmfree (EP_RMAP *rmap, size_t size, unsigned long addr); -+extern unsigned long ep_rmget (EP_RMAP *rmap, size_t size, unsigned long addr); -+extern EP_RMAP *ep_rmallocmap (size_t size, char *name, int cansleep); -+extern void ep_rmfreemap (EP_RMAP *map); -+ -+#endif /* __ELAN3_RMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/statemap.h -=================================================================== ---- linux-2.4.21.orig/include/elan/statemap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/statemap.h 2005-06-01 23:12:54.714421624 -0400 -@@ -0,0 +1,52 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_STATEMAP_H -+#define __ELAN_STATEMAP_H -+ -+#ident "$Id: statemap.h,v 1.8 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statemap.h,v $ */ -+ -+#include -+ -+/******************************** global state bitmap stuff **********************************/ -+typedef struct -+{ -+ unsigned int size; -+ unsigned int nob; -+ unsigned int changemap_nob; -+ unsigned int bitmap_nob; -+ bitmap_t *changemap0; -+ bitmap_t *changemap1; -+ bitmap_t *changemap2; -+ bitmap_t *bitmap; -+} statemap_t; -+ -+extern bitmap_t statemap_getseg (statemap_t *map, unsigned int offset); -+extern void statemap_setseg (statemap_t *map, unsigned int offset, bitmap_t seg); -+extern bitmap_t statemap_getbits (statemap_t *map, unsigned int offset, int nbits); -+extern void statemap_setbits (statemap_t *map, unsigned int offset, bitmap_t bits, int nbits); -+extern void statemap_zero (statemap_t *map); -+extern void statemap_setmap (statemap_t *dst, statemap_t *src); -+extern void statemap_ormap (statemap_t *dst, statemap_t *src); -+extern int statemap_findchange (statemap_t *map, bitmap_t *newseg, int clearchange); -+extern int statemap_changed (statemap_t *map); -+extern void statemap_reset (statemap_t *map); -+extern void statemap_copy (statemap_t *dst, statemap_t *src); -+extern void statemap_clearchanges (statemap_t *map); -+extern bitmap_t *statemap_tobitmap (statemap_t *map); -+extern statemap_t *statemap_create (int size); -+extern void statemap_destroy (statemap_t *map); -+ -+#endif /* __ELAN_STATEMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan/stats.h -=================================================================== ---- linux-2.4.21.orig/include/elan/stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan/stats.h 2005-06-01 23:12:54.715421472 -0400 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/stats.h,v $*/ -+ -+#ifndef __ELAN_STATS_H -+#define __ELAN_STATS_H -+ -+ -+/* non-kernel headings */ -+#define ELAN_STATS_NAME_MAX_LEN ((uint)64) -+typedef unsigned int ELAN_STATS_IDX; -+ -+typedef struct elan_stats_map -+{ -+ char entry_name[ELAN_STATS_NAME_MAX_LEN]; -+ int index; -+} ELAN_STATS_MAP; -+ -+#if defined(__KERNEL__) -+ -+/* stats callbacks */ -+#define ELAN_STATS_OPS_VERSION ((u_int)1) -+typedef struct elan_stats_ops -+{ -+ u_int ops_version; -+ -+ int (*elan_stats_get_name) (void * arg, uint index, caddr_t name); -+ int (*elan_stats_get_block) (void * arg, uint entries, ulong *values); -+ int (*elan_stats_clear_block) (void * arg); -+ -+} ELAN_STATS_OPS; -+ -+typedef struct elan_stats_struct -+{ -+ struct list_head node; -+ -+ ELAN_STATS_IDX statidx; -+ char block_name[ELAN_STATS_NAME_MAX_LEN]; -+ uint num_entries; -+ ELAN_STATS_OPS *ops; -+ void *arg; -+ -+} ELAN_STATS_STRUCT; -+ -+/* stats.c */ -+extern int elan_stats_register (ELAN_STATS_IDX *statidx, -+ char *block_name, -+ uint num_entries, -+ ELAN_STATS_OPS *ops, -+ void *arg); -+ -+extern int elan_stats_deregister (ELAN_STATS_IDX statidx); -+extern ELAN_STATS_STRUCT *elan_stats_find (ELAN_STATS_IDX statidx); -+extern ELAN_STATS_STRUCT *elan_stats_find_by_name(caddr_t block_name); -+extern ELAN_STATS_STRUCT *elan_stats_find_next (ELAN_STATS_IDX statidx); -+ -+ -+/* elan_stats.c */ -+extern int elan_stats_get_next_index (ELAN_STATS_IDX statidx, ELAN_STATS_IDX *next_statidx); -+ -+extern int elan_stats_find_index (caddr_t block_name, ELAN_STATS_IDX *statidx, uint *num_entries); -+ -+extern int elan_stats_get_block_info (ELAN_STATS_IDX statidx, caddr_t block_name, uint *num_entries); -+ -+extern int elan_stats_get_index_name (ELAN_STATS_IDX statidx, uint index, caddr_t name); -+ -+extern int elan_stats_get_block (ELAN_STATS_IDX statidx, uint entries, ulong *values); -+ -+extern int elan_stats_clear_block (ELAN_STATS_IDX statidx); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_STATS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/compat.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/compat.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/compat.h 2005-06-01 23:12:54.715421472 -0400 -@@ -0,0 +1,177 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: compat.h,v 1.4 2004/06/09 09:07:03 mike Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/compat.h,v $*/ -+ -+#ifndef __ELAN3_COMPAT_H -+#define __ELAN3_COMPAT_H -+ -+/* compatibility header to allow Eagle branch QSNETLIBS -+ * to compile against head kernel */ -+ -+#define ELAN_EAGLE_COMPAT -+ -+/* vmseg.h */ -+#define ELAN_FLAGSTATS ELAN3_FLAGSTATS -+ -+/* uregs.h */ -+#define ELAN_STATS_NAME ELAN3_STATS_NAME -+#define elan3_stats_names elan_stats_names -+ -+/* spinlock.h */ -+#define ELAN_SPINLOCK ELAN3_SPINLOCK -+#define ELAN_SPINLOCK_MAIN ELAN3_SPINLOCK_MAIN -+#define ELAN_SPINLOCK_ELAN ELAN3_SPINLOCK_ELAN -+#define ELAN_ME_SPINENTER ELAN3_ME_SPINENTER -+#define ELAN_ME_FORCEENTER ELAN3_ME_FORCEENTER -+#define ELAN_ME_SPINEXIT ELAN3_ME_SPINEXIT -+#define ELAN_SPINENTER ELAN3_SPINENTER -+#define ELAN_SPINEXIT ELAN3_SPINEXIT -+#define elan3_me_spinblock elan_me_spinblock -+#define elan3_spinenter elan_spinenter -+ -+/* elanio.h */ -+#define ELANIO_CONTROL_PATHNAME ELAN3IO_CONTROL_PATHNAME -+#define ELANIO_USER_PATHNAME ELAN3IO_USER_PATHNAME -+#define ELANIO_SDRAM_PATHNAME ELAN3IO_SDRAM_PATHNAME -+#define ELANIO_MAX_PATHNAMELEN ELAN3IO_MAX_PATHNAMELEN -+ -+#define ELANIO_SET_BOUNDARY_SCAN ELAN3IO_SET_BOUNDARY_SCAN -+#define ELANIO_CLEAR_BOUNDARY_SCAN ELAN3IO_CLEAR_BOUNDARY_SCAN -+#define ELANIO_READ_LINKVAL ELAN3IO_READ_LINKVAL -+#define ELANIO_WRITE_LINKVAL ELAN3IO_WRITE_LINKVAL -+#define ELANIO_SET_DEBUG_STRUCT ELAN3IO_SET_DEBUG_STRUCT -+#define ELANIO_SET_DEBUG ELAN3IO_SET_DEBUG -+#define ELANIO_DEBUG_BUFFER_STRUCT ELAN3IO_DEBUG_BUFFER_STRUCT -+#define ELANIO_DEBUG_BUFFER ELAN3IO_DEBUG_BUFFER -+#define ELANIO_NETERR_SERVER_STRUCT ELAN3IO_NETERR_SERVER_STRUCT -+#define ELANIO_NETERR_SERVER ELAN3IO_NETERR_SERVER -+#define ELANIO_NETERR_FIXUP ELAN3IO_NETERR_FIXUP -+ -+#define ELANIO_FREE ELAN3IO_FREE -+#define ELANIO_ATTACH ELAN3IO_ATTACH -+#define ELANIO_DETACH ELAN3IO_DETACH -+#define ELANIO_ADDVP_STRUCT ELAN3IO_ADDVP_STRUCT -+#define ELANIO_ADDVP ELAN3IO_ADDVP -+#define ELANIO_REMOVEVP ELAN3IO_REMOVEVP -+#define ELANIO_BCASTVP_STRUCT ELAN3IO_BCASTVP_STRUCT -+#define ELANIO_BCASTVP ELAN3IO_BCASTVP -+#define ELANIO_LOAD_ROUTE_STRUCT ELAN3IO_LOAD_ROUTE_STRUCT -+#define ELANIO_LOAD_ROUTE ELAN3IO_LOAD_ROUTE -+#define ELANIO_PROCESS ELAN3IO_PROCESS -+#define ELANIO_SETPERM_STRUCT ELAN3IO_SETPERM_STRUCT -+#define ELANIO_SETPERM ELAN3IO_SETPERM -+#define ELANIO_CLEARPERM_STRUCT ELAN3IO_CLEARPERM_STRUCT -+#define ELANIO_CLEARPERM ELAN3IO_CLEARPERM -+#define ELANIO_CHANGEPERM_STRUCT ELAN3IO_CHANGEPERM_STRUCT -+#define ELANIO_CHANGEPERM ELAN3IO_CHANGEPERM -+#define ELANIO_HELPER_THREAD ELAN3IO_HELPER_THREAD -+#define ELANIO_WAITCOMMAND ELAN3IO_WAITCOMMAND -+#define ELANIO_BLOCK_INPUTTER ELAN3IO_BLOCK_INPUTTER -+#define ELANIO_SET_FLAGS ELAN3IO_SET_FLAGS -+#define ELANIO_WAITEVENT ELAN3IO_WAITEVENT -+#define ELANIO_ALLOC_EVENTCOOKIE ELAN3IO_ALLOC_EVENTCOOKIE -+#define ELANIO_FREE_EVENTCOOKIE ELAN3IO_FREE_EVENTCOOKIE -+#define ELANIO_ARM_EVENTCOOKIE ELAN3IO_ARM_EVENTCOOKIE -+#define ELANIO_WAIT_EVENTCOOKIE ELAN3IO_WAIT_EVENTCOOKIE -+#define ELANIO_SWAPSPACE ELAN3IO_SWAPSPACE -+#define ELANIO_EXCEPTION_SPACE ELAN3IO_EXCEPTION_SPACE -+#define ELANIO_GET_EXCEPTION ELAN3IO_GET_EXCEPTION -+#define ELANIO_UNLOAD_STRUCT ELAN3IO_UNLOAD_STRUCT -+#define ELANIO_UNLOAD ELAN3IO_UNLOAD -+#define ELANIO_GET_ROUTE_STRUCT ELAN3IO_GET_ROUTE_STRUCT -+#define ELANIO_GET_ROUTE ELAN3IO_GET_ROUTE -+#define ELANIO_RESET_ROUTE_STRUCT ELAN3IO_RESET_ROUTE_STRUCT -+#define ELANIO_RESET_ROUTE ELAN3IO_RESET_ROUTE -+#define ELANIO_CHECK_ROUTE_STRUCT ELAN3IO_CHECK_ROUTE_STRUCT -+#define ELANIO_CHECK_ROUTE ELAN3IO_CHECK_ROUTE -+#define ELANIO_VP2NODEID_STRUCT ELAN3IO_VP2NODEID_STRUCT -+#define ELANIO_VP2NODEID ELAN3IO_VP2NODEID -+#define ELANIO_SET_SIGNAL ELAN3IO_SET_SIGNAL -+#define ELANIO_PROCESS_2_LOCATION_STRUCT ELAN3IO_PROCESS_2_LOCATION_STRUCT -+#define ELANIO_PROCESS_2_LOCATION ELAN3IO_PROCESS_2_LOCATION -+#define ELANIO_GET_DEVINFO_STRUCT ELAN3IO_GET_DEVINFO_STRUCT -+#define ELANIO_GET_DEVINFO ELAN3IO_GET_DEVINFO -+#define ELANIO_GET_POSITION_STRUCT ELAN3IO_GET_POSITION_STRUCT -+#define ELANIO_GET_POSITION ELAN3IO_GET_POSITION -+#define ELANIO_STATS_STRUCT ELAN3IO_STATS_STRUCT -+#define ELANIO_STATS ELAN3IO_STATS -+# define ELAN_SYS_STATS_DEVICE ELAN3_SYS_STATS_DEVICE -+# define ELAN_SYS_STATS_ELAN3MMU ELAN3_SYS_STATS_MMU -+ -+#define ELANIO_OFF_FLAG_PAGE ELAN3IO_OFF_FLAG_PAGE -+#define ELANIO_OFF_UREG_PAGE ELAN3IO_OFF_UREG_PAGE -+#define ELANIO_OFF_COMMAND_PAGE ELAN3IO_OFF_COMMAND_PAGE -+ -+ -+/* elanvp.h */ -+#define ELAN_ROUTE_SUCCESS ELAN3_ROUTE_SUCCESS -+#define ELAN_ROUTE_SYSCALL_FAILED ELAN3_ROUTE_SYSCALL_FAILED -+#define ELAN_ROUTE_INVALID ELAN3_ROUTE_INVALID -+#define ELAN_ROUTE_TOO_LONG ELAN3_ROUTE_TOO_LONG -+#define ELAN_ROUTE_LOAD_FAILED ELAN3_ROUTE_LOAD_FAILED -+#define ELAN_ROUTE_PROC_RANGE ELAN3_ROUTE_PROC_RANGE -+#define ELAN_ROUTE_INVALID_LEVEL ELAN3_ROUTE_INVALID_LEVEL -+#define ELAN_ROUTE_OCILATES ELAN3_ROUTE_OCILATES -+#define ELAN_ROUTE_WRONG_DEST ELAN3_ROUTE_WRONG_DEST -+#define ELAN_ROUTE_TURN_LEVEL ELAN3_ROUTE_TURN_LEVEL -+#define ELAN_ROUTE_NODEID_UNKNOWN ELAN3_ROUTE_NODEID_UNKNOWN -+ -+/* elandev.h */ -+#define ELAN_STATS ELAN3_STATS -+#define ELAN_STATS_VERSION ELAN3_STATS_VERSION -+ -+/* perm.h */ -+#define ELAN_PERM_NOREMOTE ELAN3_PERM_NOREMOTE -+#define ELAN_PERM_LOCAL_READ ELAN3_PERM_LOCAL_READ -+#define ELAN_PERM_REMOTEALL ELAN3_PERM_REMOTEALL -+ -+/* threadsyscall.h */ -+#define ELAN_ABORT_TRAPNUM ELAN3_ABORT_TRAPNUM -+#define ELAN_ELANCALL_TRAPNUM ELAN3_ELANCALL_TRAPNUM -+#define ELAN_SYSCALL_TRAPNUM ELAN3_SYSCALL_TRAPNUM -+#define ELAN_SYS_close ELAN3_SYS_close -+#define ELAN_SYS_getpid ELAN3_SYS_getpid -+#define ELAN_SYS_ioctl ELAN3_SYS_ioctl -+#define ELAN_SYS_kill ELAN3_SYS_kill -+#define ELAN_SYS_lseek ELAN3_SYS_lseek -+#define ELAN_SYS_mmap ELAN3_SYS_mmap -+#define ELAN_SYS_munmap ELAN3_SYS_munmap -+#define ELAN_SYS_open ELAN3_SYS_open -+#define ELAN_SYS_poll ELAN3_SYS_poll -+#define ELAN_SYS_read ELAN3_SYS_read -+#define ELAN_SYS_write ELAN3_SYS_write -+#define ELAN_T_SYSCALL_CODE ELAN3_T_SYSCALL_CODE -+#define ELAN_T_SYSCALL_ERRNO ELAN3_T_SYSCALL_ERRNO -+ -+/* elansyscall.h */ -+#define ELAN_SYS_FLAG_DMA_BADVP ELAN3_SYS_FLAG_DMA_BADVP -+#define ELAN_SYS_FLAG_THREAD_BADVP ELAN3_SYS_FLAG_THREAD_BADVP -+#define ELAN_SYS_FLAG_DMAFAIL ELAN3_SYS_FLAG_DMAFAIL -+#define ELAN_SYS_FLAG_NETERR ELAN3_SYS_FLAG_NETERR -+ -+/* intrinsics.h */ -+#define elan_copy64w elan3_copy64w -+#define elan_read64dw elan3_read64dw -+#define elan_write64dw elan3_write64dw -+ -+#ifndef ELAN_POLL_EVENT -+#define ELAN_POLL_EVENT ELAN3_POLL_EVENT -+#endif -+#ifndef ELAN_WAIT_EVENT -+#define ELAN_WAIT_EVENT ELAN3_WAIT_EVENT -+#endif -+ -+#endif /* __ELAN3_COMPAT_H */ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.4.21/include/elan3/dma.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/dma.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/dma.h 2005-06-01 23:12:54.716421320 -0400 -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_DMA_H -+#define __ELAN3_DMA_H -+ -+#ident "$Id: dma.h,v 1.38 2002/08/21 12:43:27 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/dma.h,v $ */ -+ -+#include -+#include -+ -+/* Alignment for a DMA descriptor */ -+#define E3_DMA_ALIGN (32) -+ -+/* The maximum size a DMA can be (i.e. < 2GB) */ -+#define E3_MAX_DMA_SIZE 0x7fffffff -+ -+/* This macro returns TRUE if a fixup for the ELAN_REVB_BUG_2 problem is required -+ * i.e. if the DMA begins in the last 64-bytes of a page and its size causes it to enter the -+ * next page, hence causing the Elan to issue 2 (64-byte) block reads to different pages. -+ * See GNAT hw-elan3/3263 -+ */ -+#define E3_DMA_REVB_BUG_2(SIZE, ADDR, PAGESIZE) \ -+ ( (((int) (ADDR) & (PAGESIZE-64)) == (PAGESIZE-64)) && (-(((int) (ADDR) | ~(PAGESIZE-1))) < (SIZE)) ) -+ -+/* There is a point where a dma runs quicker from main memory than -+ * when running from sdram and having to copy all the data down -+ * first. -+ */ -+#define E3_DMA_SDRAM_CUTOFF 128 -+ -+typedef union _e3_DmaType -+{ -+ E3_uint32 type; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 dataType:2; /* Bits 0 to 1 */ -+ E3_uint32 direction:3; /* Bit 4 to 2 */ -+ E3_uint32 opCode:4; /* Bits 5 to 8 */ -+ E3_uint32 failCount:6; /* Bits 9 to 14 */ -+ E3_uint32 isRemote:1; /* Bit 15 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+#else -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 isRemote:1; /* Bit 15 */ -+ E3_uint32 failCount:6; /* Bits 9 to 14 */ -+ E3_uint32 opCode:4; /* Bits 5 to 8 */ -+ E3_uint32 direction:3; /* Bit 4 to 2 */ -+ E3_uint32 dataType:2; /* Bits 0 to 1 */ -+#endif -+ } s; -+} E3_DmaType; -+ -+#define E3_DMA_CONTEXT_MASK (ALL_CONTEXT_BITS << 16) -+ -+#define E3_DMA_CONTEXT(type) (((type) >> 16) & ALL_CONTEXT_BITS) -+#define E3_DMA_ISREMOTE(type) (((type) >> 15) & 1) -+#define E3_DMA_FAILCOUNT(type) (((type) >> 9) & 0x3F) -+#define E3_DMA_OPCODE(type) (((type) >> 5) & 0xF) -+#define E3_DMA_DIRECTION(type) (((type) >> 2) & 0x7) -+#define EP_DMA_DATATYPE(type) (((type) >> 0) & 0x3) -+ -+#define E3_DMA_TYPE(dataType, direction, opCode, failCount) \ -+ (((dataType) & 0x3) | (((direction) & 7) << 2) | (((opCode) & 0xF) << 5) | (((failCount) & 0x3F) << 9)) -+ -+ -+typedef union _e3_CookieVProc -+{ -+ E3_uint32 cookie_vproc; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 vproc:16; /* Bit 15 to 0 */ -+ E3_uint32 cookie:16; /* Bits 31 to 16 */ -+#else -+ E3_uint32 cookie:16; /* Bits 31 to 16 */ -+ E3_uint32 vproc:16; /* Bit 15 to 0 */ -+#endif -+ } s; -+} E3_CookieVProc; -+ -+#define E3_DMA_COOKIE_PROC(Cookie, VProc) (((VProc) & 0xffff) | (((Cookie) << 16))) -+ -+#define DMA_COOKIE_MASK (0xffff0000) -+#define DMA_PROCESS_MASK (0x0000ffff) -+ -+/* We use the bottom bit of the cookie to -+ * distinguish main/thread generated cookies -+ */ -+#define DMA_COOKIE_THREAD (0x01 << 16) -+ -+/* We use the next bit of the cookie to -+ * distinguish locally/remotely generated cookies -+ */ -+#define DMA_COOKIE_REMOTE (0x02 << 16) -+ -+/* Assign and increment cookie (NB: we have reserved the bottom two bits) -+ */ -+#define DMA_COOKIE(COOKIE, VPROC) ((((COOKIE) += (0x4 << 16)) & DMA_COOKIE_MASK) | VPROC) -+#define DMA_REMOTE_COOKIE(COOKIE, VPROC) ((((COOKIE) += (0x4 << 16)) & DMA_COOKIE_MASK) | DMA_COOKIE_REMOTE | VPROC) -+ -+#define DMA_COOKIE_REFRESH(COOKIEVP, COOKIE) \ -+do { \ -+ COOKIEVP &= ~DMA_COOKIE_MASK; /* Clear cookie */ \ -+ COOKIEVP |= DMA_COOKIE(COOKIE,0); /* Assign new cookie */ \ -+} while (0) -+ -+typedef struct e3_dma -+{ -+ E3_DmaType dma_u; -+ E3_uint32 dma_size; -+ E3_Addr dma_source; -+ E3_Addr dma_dest; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_srcEvent; -+ E3_CookieVProc dma_srcCookieProc; -+} E3_DMA; -+ -+ -+/* -+ * Word-swapped version of DMA descriptor. -+ * This is used by the UltraSPARC code to format the descriptor -+ * in main memory before block-copying it down to Elan SDRAM. -+ * In the process it does a dword (64-bit) conversion and so swaps -+ * the word order on a double-word pair basis -+ */ -+typedef struct e3_dma_swapped -+{ -+ E3_uint32 dma_size; -+ E3_DmaType dma_u; -+ E3_Addr dma_dest; -+ E3_Addr dma_source; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_srcCookieProc; -+ E3_Addr dma_srcEvent; -+} E3_DMA_SWAPPED; -+ -+/* Define a Main memory structure for DMA desc based on Endianess of machine */ -+#if defined(__LITTLE_ENDIAN__) -+#define E3_DMA_MAIN E3_DMA -+#else -+#define E3_DMA_MAIN E3_DMA_SWAPPED; -+#endif -+ -+#define dma_type dma_u.type -+#define dma_failCount dma_u.s.failCount -+#define dma_isRemote dma_u.s.isRemote -+#define dma_opCode dma_u.s.opCode -+#define dma_direction dma_u.s.direction -+#define dma_dataType dma_u.s.dataType -+#define dma_queueContext dma_u.s.Context -+ -+#define dma_destCookieVProc dma_destCookieProc.cookie_vproc -+#define dma_destVProc dma_destCookieProc.s.vproc -+#define dma_destCookie dma_destCookieProc.s.cookie -+#define dma_srcCookieVProc dma_srcCookieProc.cookie_vproc -+#define dma_srcVProc dma_srcCookieProc.s.vproc -+#define dma_srcCookie dma_srcCookieProc.s.cookie -+ -+/* -+ * Values for dma_opCode -+ */ -+#define DMA_NORMAL 0 -+#define DMA_QUEUED 1 -+#define DMA_NORMAL_BROADCAST 2 -+#define DMA_QUEUED_BROADCAST 3 -+#define DMA_NORMAL_UNSAFE 4 -+#define DMA_QUEUED_UNSAFE 5 -+#define DMA_NORMAL_BROADCAST_UNSAFE 6 -+#define DMA_QUEUED_BROADCAST_UNSAFE 7 -+ -+/* -+ * Values for dma_direction -+ */ -+#define DMA_WRITE 0 -+#define DMA_READ_REQUEUE 1 -+#define DMA_READ 3 -+#define DMA_READ_BROADCAST 7 -+ -+/* -+ * Values for dma_dataType -+ */ -+#define DMA_BYTE 0 -+#define DMA_HALFWORD 1 -+#define DMA_WORD 2 -+#define DMA_DOUBLE 3 -+ -+/* OUT OF DATE ? -+ #define DMA_OPCODE_SHIFT 3 -+ #define DMA_FAILCOUNT_SHIFT 9 -+*/ -+#define DMA_TYPE_ISREMOTE (1 << 15) -+#define DMA_TYPE_READ (3 << 2) -+#define DMA_TYPE_READ_REQUEUE (1 << 2) -+#define DMA_TYPE_DIRECTION_MASK (3 << 2) -+ -+#endif /* __ELAN3_DMA_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/e3types.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/e3types.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/e3types.h 2005-06-01 23:12:54.716421320 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_E3TYPES_H -+#define __ELAN3_E3TYPES_H -+ -+#ident "$Id: e3types.h,v 1.18 2002/08/09 11:23:33 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/e3types.h,v $ */ -+ -+#include -+/* -+ * "flip" values for correctly indexing into -+ * block data which was copied from the Elan -+ * using 64 bit accesses. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+# define ByteEndianFlip 0 -+# define ShortEndianFlip 0 -+# define WordEndianFlip 0 -+#else -+# define ByteEndianFlip 7 -+# define ShortEndianFlip 3 -+# define WordEndianFlip 1 -+#endif -+ -+ -+#ifndef _ASM -+ -+typedef signed int E3_int; -+typedef unsigned int E3_uint; -+ -+typedef signed char E3_int8; -+typedef unsigned char E3_uint8; -+ -+typedef signed short E3_int16; -+typedef unsigned short E3_uint16; -+ -+typedef signed int E3_int32; -+typedef unsigned int E3_uint32; -+ -+#ifdef __ELAN3__ -+typedef signed long long E3_int64; -+typedef unsigned long long E3_uint64; -+#ifdef _MAIN_LP64 -+/* NOTE: If the Main is 64-bit we declare the Elan thread's -+ * E3_uintptr to be 64-bits too -+ */ -+typedef unsigned long long E3_uintptr; -+#else -+typedef unsigned long E3_uintptr; -+#endif -+ -+#else -+ -+#ifdef _LP64 -+typedef signed long E3_int64; -+typedef unsigned long E3_uint64; -+typedef unsigned long E3_uintptr; -+#else /* _ILP32 */ -+typedef signed long long E3_int64; -+typedef unsigned long long E3_uint64; -+typedef unsigned long E3_uintptr; -+#endif -+ -+#endif /* __ELAN3__ */ -+ -+/* 32-bit Elan3 address */ -+typedef E3_uint32 E3_Addr; -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN3_E3TYPES_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elan3mmu.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elan3mmu.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elan3mmu.h 2005-06-01 23:12:54.717421168 -0400 -@@ -0,0 +1,346 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELAN3MMU_H -+#define __ELAN3_ELAN3MMU_H -+ -+#ident "$Id: elan3mmu.h,v 1.40.2.1 2004/12/14 10:19:48 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3mmu.h,v $*/ -+ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct elan3mmu_global_stats -+{ -+ int version; -+ int pteload; -+ int pteunload; -+ int ptereload; -+ -+ int streamable_alloc; -+ int streamable_free; -+ int streamable_alloc_failed; -+ -+ int num_ptbl_level[4]; /* number of level N ptbls */ -+ -+ int create_ptbl_failed; /* count of ptbl creation failure */ -+ -+ int lX_alloc_l3; /* count of l3 ptbls used as lX */ -+ int lX_freed_l3; /* count of lX ptbls freed as l3 */ -+ -+ int l2_alloc_l3; /* count of l3 ptbls used as l2 */ -+ int l2_freed_l3; /* count of l2 ptbls freed as l3 */ -+ -+ int stolen_ptbls; /* count of l3 ptbls stolen */ -+} ELAN3MMU_GLOBAL_STATS; -+ -+#define ELAN3MMU_STATS_VERSION 1 -+ -+#define ELAN3MMU_STAT(what) (elan3mmu_global_stats.what++) -+#define ELAN3MMU_SET_STAT(what,count) (elan3mmu_global_stats.what = count) -+ -+#ifdef __KERNEL__ -+ -+#define ELAN3_PT_SHIFT (ELAN3_L2_SHIFT + 2) -+ -+typedef struct elan3_ptbl -+{ -+ struct elan3_ptbl *ptbl_parent; /* Parent page table, or next on freelist */ -+ struct elan3mmu *ptbl_elan3mmu; /* elan3mmu we're allocated for */ -+ E3_Addr ptbl_base; /* Virtual address we're mapping */ -+ u_char ptbl_index; /* Index in ptbl group */ -+ u_char ptbl_valid; /* Number of valid entries */ -+ u_char ptbl_flags; /* Flags, defined below. */ -+ u_char ptbl_spare; -+} ELAN3_PTBL; -+ -+#define ptbl_next ptbl_parent /* Parent pointer is next pointer when on free list */ -+ -+#define PTBL_LEVEL_X 0x00 -+#define PTBL_LEVEL_1 0x01 -+#define PTBL_LEVEL_2 0x02 -+#define PTBL_LEVEL_3 0x03 -+#define PTBL_LEVEL_MASK 0x03 -+#define PTBL_LOCKED 0x04 /* Page table is locked, protects all fields */ -+#define PTBL_KEEP 0x08 /* This ptbl is not to be stolen */ -+#define PTBL_ALLOCED 0x10 /* This ptbl has been allocated, and is not free */ -+#define PTBL_GROUPED 0x20 /* This ptbl is a member of a group of ptbls */ -+#define PTBL_KERNEL 0x80 /* This ptbl is allocated for the kernel */ -+ -+#define PTBL_LEVEL(flags) ((flags) & PTBL_LEVEL_MASK) -+#define PTBL_IS_LOCKED(flags) (((flags) & (PTBL_LOCKED|PTBL_ALLOCED)) == (PTBL_LOCKED|PTBL_ALLOCED)) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define PTBL_GROUP_SIZE 8192 /* page table groups are 8k bytes */ -+# define PTBLS_PER_GROUP_L1 8 /* Number of level 1 tables in a group */ -+# define PTBLS_PER_GROUP_L2 32 /* ... level 2 */ -+# define PTBLS_PER_GROUP_L3 32 /* ... level 3 */ -+# define PTBLS_PER_GROUP_LX 32 /* ... level X */ -+# define PTBLS_PER_GROUP_MAX 32 /* max of l1,l2,l3,lX */ -+#else -+# define PTBL_GROUP_SIZE 4096 /* page table groups are 4k bytes */ -+# define PTBLS_PER_GROUP_L1 4 /* Number of level 1 tables in a group */ -+# define PTBLS_PER_GROUP_L2 16 /* ... level 2 */ -+# define PTBLS_PER_GROUP_L3 8 /* ... level 3 */ -+# define PTBLS_PER_GROUP_LX 16 /* ... level X */ -+# define PTBLS_PER_GROUP_MAX 16 /* max of l1,l2,l3,lX */ -+#endif -+ -+#define HMES_PER_GROUP (PTBLS_PER_GROUP_L3*ELAN3_L3_ENTRIES) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define PTBLS_PER_PTBL_L1 4 /* 256 PTPs */ -+# define PTBLS_PER_PTBL_L2 1 /* 64 PTPs */ -+# define PTBLS_PER_PTBL_L3 1 /* 32 PTEs */ -+#else -+# define PTBLS_PER_PTBL_L1 4 /* 256 PTPs */ -+# define PTBLS_PER_PTBL_L2 1 /* 64 PTPs */ -+# define PTBLS_PER_PTBL_L3 2 /* 64 PTEs */ -+#endif -+ -+#define ELAN3_LX_ENTRIES (32) -+#define PTBLS_PER_PTBL_LX (1) -+ -+#define L1_VA_PER_PTBL (ELAN3_L1_SIZE*(ELAN3_L1_ENTRIES/PTBLS_PER_PTBL_L1)) /* 4 ptbl for L1 */ -+#define L2_VA_PER_PTBL (ELAN3_L2_SIZE*(ELAN3_L2_ENTRIES/PTBLS_PER_PTBL_L2)) /* 1 ptbl for L2 */ -+#define L3_VA_PER_PTBL (ELAN3_L3_SIZE*(ELAN3_L3_ENTRIES/PTBLS_PER_PTBL_L3)) /* 1 ptbl for L3 */ -+ -+typedef struct elan3_ptbl_gr -+{ -+ struct elan3_ptbl_gr *pg_next; /* Next in list. */ -+ int pg_level; /* Level PG allocated for */ -+ sdramaddr_t pg_addr; /* sdram offset of ptes/ptps */ -+ ELAN3_PTBL pg_ptbls[PTBLS_PER_GROUP_MAX]; /* The actual page tables */ -+} ELAN3_PTBL_GR; -+ -+ -+/* -+ * The elan3mmu structure is the mmu dependant hardware address translation -+ * structure linked to the address space structure to show the translatioms -+ * provided by the elan for an address sapce. -+ * -+ * We also have a doubly linked list of 'regions' which allow the -+ * elan3mmu code to determine the access permissions for the elan -+ * dependant on the virtual address that the translation is being -+ * loaded at. -+ */ -+ -+typedef struct elan3mmu_rgn -+{ -+ struct elan3mmu_rgn *rgn_mnext; /* Doubly linked list of regions */ -+ struct elan3mmu_rgn *rgn_mprev; /* sorted on main address */ -+ caddr_t rgn_mbase; /* main address of base of region */ -+ -+ struct elan3mmu_rgn *rgn_enext; /* Doubly linked list of regions */ -+ struct elan3mmu_rgn *rgn_eprev; /* sorted on elan address */ -+ E3_Addr rgn_ebase; /* elan address of base of region */ -+ -+ u_int rgn_len; /* length of region */ -+ u_int rgn_perm; /* elan access permission */ -+} ELAN3MMU_RGN; -+ -+typedef struct elan3mmu -+{ -+ spinlock_t elan3mmu_lock; /* spinlock lock for regions */ -+ ELAN3MMU_RGN *elan3mmu_mrgns; /* Doubly linked list of memory regions */ -+ ELAN3MMU_RGN *elan3mmu_mtail; /* Last memory region on list */ -+ ELAN3MMU_RGN *elan3mmu_mrgnlast; /* Last region 'hit' */ -+ -+ ELAN3MMU_RGN *elan3mmu_ergns; /* Doubly linked list of memory regions */ -+ ELAN3MMU_RGN *elan3mmu_etail; /* Last memory region on list */ -+ ELAN3MMU_RGN *elan3mmu_ergnlast; /* Last region 'hit' */ -+ -+ struct elan3_dev *elan3mmu_dev; /* Elan device we're using. */ -+ struct elan3_ctxt *elan3mmu_ctxt; /* Elan ctxt we're associated with */ -+ -+ sdramaddr_t elan3mmu_ctp; /* Context table entry for our context */ -+ ELAN3_PTBL *elan3mmu_l1ptbl; /* Level 1 Page table (first of 4) */ -+ -+ spinlock_t elan3mmu_lXptbl_lock; /* spinlock for level X table list */ -+ ELAN3_PTBL *elan3mmu_lXptbl; /* Level X Page table list */ -+ -+#ifdef LINUX -+ struct mm_struct *elan3mmu_coproc_mm; /* Linux mm we're mapping */ -+#endif -+} ELAN3MMU; -+ -+_NOTE(LOCK_ORDER(elan3mmu::elan3mmu_lock elan3_dev::IntrLock)) -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3mmu::elan3mmu_lock, -+ elan3mmu::elan3mmu_mrgns elan3mmu::elan3mmu_mtail -+ elan3mmu::elan3mmu_ergns elan3mmu::elan3mmu_etail)) -+/* protected by dev->IntrLock for read by device driver */ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3mmu::elan3mmu_mrgns elan3mmu::elan3mmu_mtail -+ elan3mmu::elan3mmu_ergns elan3mmu::elan3mmu_etail)) -+ -+_NOTE(SCHEME_PROTECTS_DATA("only set to valid region", -+ elan3mmu::elan3mmu_ergnlast elan3mmu::elan3mmu_mrgnlast)) -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3mmu::elan3mmu_l1ptbl -+ elan3mmu::elan3mmu_ctp -+ elan3mmu::elan3mmu_dev)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3mmu::elan3mmu_l1ptbl -+ elan3mmu::elan3mmu_ctp -+ elan3mmu::elan3mmu_dev)) -+ -+/* -+ * Macros for accessing ptes/ptbls/ptbl_grs -+ */ -+ -+#define OFFSETOF(object,member) /* calculate offset of structure member */ \ -+ ((size_t) (&(((object *)0)->member))) -+#define PTBL_TO_GR(ptbl) /* convert ptbl to ptbl group */ \ -+ ((ELAN3_PTBL_GR *) ((caddr_t) ((ptbl) - (ptbl)->ptbl_index) - OFFSETOF(ELAN3_PTBL_GR,pg_ptbls[0]))) -+#define PTBL_TO_PTADDR(ptbl) /* convert ptbl to a ptp pointing at it */ \ -+ (PTBL_TO_GR(ptbl)->pg_addr + ((ptbl)->ptbl_index<pg_hmes + ((pte) - (ELAN3_PTE *) PTBL_TO_GR(ptbl)->pg_vaddr)) -+#define HME_TO_PTE(ptebl,hme) /* convert hme to corresponding pte */ \ -+ ((ELAN3_PTE *) PTBL_TO_GR(ptbl)->pg_vaddr + ((hme) - (PTBL_TO_GR(ptbl)->pg_hmes))) -+ -+ -+/* Flags for lock_ptbl */ -+#define LK_PTBL_NOWAIT 0x1 -+#define LK_PTBL_FAILOK 0x2 -+ -+/* Return values for lock_ptbl */ -+#define LK_PTBL_OK 0x0 -+#define LK_PTBL_MISMATCH 0x1 -+#define LK_PTBL_FAILED 0x2 -+ -+/* Flags for elan3mmu_ptesync */ -+#define NO_MLIST_LOCK 0 -+#define MLIST_LOCKED 1 -+ -+/* Flags for elan3mmu_pteload */ -+#define PTE_LOAD 0x00 -+#define PTE_LOAD_LOCK 0x01 /* translation should be locked */ -+#define PTE_LOAD_NOSYNC 0x02 /* ref/mod bits should not be sync'ed to page */ -+#define PTE_NO_SLEEP 0x04 /* true if we cant sleep */ -+#define PTE_NO_STEAL 0x08 /* true if we don't want to steal ptbls */ -+ -+#define PTE_LOAD_ENDIAN_MASK 0x10 /* mask for endian-ness */ -+#define PTE_LOAD_LITTLE_ENDIAN 0x00 /* translation is to little-endian memory */ -+#define PTE_LOAD_BIG_ENDIAN 0x10 /* translation is to big-endian memory */ -+ -+ -+/* Flags for elan3mmu_unload */ -+#define PTE_UNLOAD 0x00 -+#define PTE_UNLOAD_UNLOCK 0x01 -+#define PTE_UNLOAD_NOFLUSH 0x02 -+#define PTE_UNLOAD_NOSYNC 0x04 -+ -+extern int elan3mmu_debug; -+#ifdef DEBUG_PRINTF -+# define HAT_PRINTF0(n,msg) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg) : (void) 0) -+# define HAT_PRINTF1(n,msg,a) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a) : (void) 0) -+# define HAT_PRINTF2(n,msg,a,b) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b) : (void) 0) -+# define HAT_PRINTF3(n,msg,a,b,c) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c) : (void) 0) -+# define HAT_PRINTF4(n,msg,a,b,c,d) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d) : (void) 0) -+# define HAT_PRINTF5(n,msg,a,b,c,d,e) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d,e) : (void) 0) -+# define HAT_PRINTF6(n,msg,a,b,c,d,e,f) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d,e,f) : (void) 0) -+# ifdef LINUX -+# define HAT_PRINTF(n,args...) ((elan3mmu_debug & n) ? (void) elan3_debugf(NULL, DBG_HAT, ##args) : (void) 0) -+# endif -+#else -+# define HAT_PRINTF0(n,msg) -+# define HAT_PRINTF1(n,msg,a) -+# define HAT_PRINTF2(n,msg,a,b) -+# define HAT_PRINTF3(n,msg,a,b,c) -+# define HAT_PRINTF4(n,msg,a,b,c,d) -+# define HAT_PRINTF5(n,msg,a,b,c,d,e) -+# define HAT_PRINTF6(n,msg,a,b,c,d,e,f) -+# ifdef LINUX -+# define HAT_PRINTF(n,args...) -+# endif -+#endif -+ -+/* elan3mmu_generic.c */ -+extern ELAN3MMU_GLOBAL_STATS elan3mmu_global_stats; -+ -+extern void elan3mmu_init (void); -+extern void elan3mmu_fini (void); -+ -+extern ELAN3MMU *elan3mmu_alloc (struct elan3_ctxt *ctxt); -+extern void elan3mmu_free (ELAN3MMU *elan3mmu); -+ -+extern void elan3mmu_set_context_filter (ELAN3_DEV *dev, int ctx, int disabled, E3_uint32 Pend, E3_uint32 *Maskp); -+extern int elan3mmu_attach (ELAN3_DEV *dev, int ctx, ELAN3MMU *elan3mmu, sdramaddr_t routeTable, E3_uint32 routeMask); -+extern void elan3mmu_detach (ELAN3_DEV *dev, int ctx); -+ -+extern ELAN3MMU_RGN *elan3mmu_findrgn_elan (ELAN3MMU *elan3mmu, E3_Addr addr, int tail); -+extern int elan3mmu_addrgn_elan (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn); -+extern ELAN3MMU_RGN *elan3mmu_removergn_elan (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern ELAN3MMU_RGN *elan3mmu_rgnat_elan (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern ELAN3MMU_RGN *elan3mmu_findrgn_main (ELAN3MMU *elan3mmu, caddr_t addr, int tail); -+extern int elan3mmu_addrgn_main (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn); -+extern ELAN3MMU_RGN *elan3mmu_removergn_main (ELAN3MMU *elan3mmu, caddr_t addr); -+extern ELAN3MMU_RGN *elan3mmu_rgnat_main (ELAN3MMU *elan3mmu, caddr_t addr); -+ -+extern int elan3mmu_setperm (ELAN3MMU *elan3mmu, caddr_t maddr, E3_Addr eaddr, u_int len, u_int perm); -+extern void elan3mmu_clrperm (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len); -+extern int elan3mmu_checkperm (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, u_int access); -+extern caddr_t elan3mmu_mainaddr (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern E3_Addr elan3mmu_elanaddr (ELAN3MMU *elan3mmu, caddr_t addr); -+ -+extern void elan3mmu_expand (ELAN3MMU *elan3mmu, E3_Addr addr, int len, int level, int attr); -+extern void elan3mmu_reserve (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *); -+extern void elan3mmu_release (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *); -+ -+extern void elan3mmu_pteload (ELAN3MMU *elan3mmu, int level, E3_Addr addr, physaddr_t paddr, int perm, int attr); -+extern void elan3mmu_unload (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, int flags); -+extern void elan3mmu_sync (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, u_int clearflag); -+extern void elan3mmu_pteunload (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock); -+extern void elan3mmu_ptesync (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock); -+extern sdramaddr_t elan3mmu_ptp2pte (ELAN3MMU *elan3mmu, sdramaddr_t ptp, int level); -+extern sdramaddr_t elan3mmu_ptefind (ELAN3MMU *elan3mmu, E3_Addr, int *level, ELAN3_PTBL **pptbl, spinlock_t **plock, unsigned long *flags); -+extern sdramaddr_t elan3mmu_ptealloc (ELAN3MMU *elan3mmu, E3_Addr, int level, ELAN3_PTBL **pptbl, spinlock_t **plock, int attr, unsigned long *flags); -+extern void elan3mmu_l1inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l1ptbl, int flags); -+extern int elan3mmu_l2inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l2ptbl, int flags, E3_Addr addr, spinlock_t **pl2lock, unsigned long *lock_flags); -+extern int elan3mmu_l3inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l3ptbl, int flags, E3_Addr addr, spinlock_t **pl3lock, unsigned long *lock_flags); -+ -+extern void elan3mmu_free_l1ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+extern void elan3mmu_free_l2ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+extern void elan3mmu_free_l3ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+ -+extern int elan3mmu_lock_this_ptbl (ELAN3_PTBL *ptbl, int flag, spinlock_t **plock, unsigned long *flags); -+extern int elan3mmu_lock_ptbl (ELAN3_PTBL *ptbl, u_int flag, ELAN3MMU *elan3mmu, E3_Addr va, int level, spinlock_t **plock, unsigned long *flags); -+extern void elan3mmu_unlock_ptbl (ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+ -+/* elan3mmu_osdep.c */ -+extern void elan3mmu_init_osdep (void); -+extern void elan3mmu_fini_osdep (void); -+extern void elan3mmu_alloc_osdep (ELAN3MMU *elan3mmu); -+extern void elan3mmu_free_osdep (ELAN3MMU *elan3mmu); -+extern ELAN3_PTE elan3mmu_phys_to_pte (ELAN3_DEV *dev, physaddr_t paddr, int perm); -+extern ELAN3_PTE elan3mmu_kernel_invalid_pte (ELAN3MMU *elan3mmu); -+ -+#if defined (DIGITAL_UNIX) -+# include -+#elif defined (LINUX) -+# include -+#endif -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELAN3MMU_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elan3mmu_linux.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elan3mmu_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elan3mmu_linux.h 2005-06-01 23:12:54.717421168 -0400 -@@ -0,0 +1,39 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_MMU_LINUX_H -+#define __ELAN3_MMU_LINUX_H -+ -+#ident "$Id: elan3mmu_linux.h,v 1.12 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3mmu_linux.h,v $*/ -+ -+/* XXX copy of elan3mmu_dunix.h */ -+ -+#define ALLOC_ELAN3MMU(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3MMU *, sizeof (ELAN3MMU), cansleep) -+#define ALLOC_PTBL_GR(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3_PTBL_GR *, sizeof (ELAN3_PTBL_GR), cansleep) -+#define ALLOC_ELAN3MMU_RGN(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3MMU_RGN *, sizeof (ELAN3MMU_RGN), cansleep) -+#define ALLOC_HMENTS(ptr,cansleep) KMEM_ALLOC((ptr,ELAN3_HMENT *, sizeof (ELAN3_HMENT), cansleep) -+ -+#define FREE_ELAN3MMU(ptr) KMEM_FREE(ptr,sizeof (ELAN3MMU)) -+#define FREE_PTBL_GR(ptr) KMEM_FREE(ptr,sizeof (ELAN3_PTBL_GR)) -+#define FREE_ELAN3MMU_RGN(ptr) KMEM_FREE(ptr,sizeof (ELAN3MMU_RGN)) -+#define FREE_HMENTS(ptr) KMEM_FREE(ptr,sizeof (ELAN3_HMENT)) -+ -+extern void elan3mmu_init_osdep(void); -+extern void elan3mmu_fini_osdep(void); -+ -+extern void elan3mmu_pte_range_unload (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len); -+extern void elan3mmu_pte_range_update (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len); -+extern void elan3mmu_pte_ctxt_unload(ELAN3MMU *elan3mmu); -+ -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elan3ops.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elan3ops.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elan3ops.h 2005-06-01 23:12:54.718421016 -0400 -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* $Id: elan3ops.h,v 1.3 2003/09/24 13:57:24 david Exp $ */ -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3ops.h,v $ */ -+ -+#ifndef _ELAN3_OPS_H -+#define _ELAN3_OPS_H -+ -+int get_position (void *arg, ELAN_POSITION *position); -+int set_position (void *arg, unsigned short nodeId, unsigned short numNodes); -+ -+int elan3mod_create_cap (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+int elan3mod_destroy_cap (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+ -+int elan3mod_create_vp (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+int elan3mod_destroy_vp (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+int elan3mod_attach_cap (void *arg_ctxt, ELAN_CAPABILITY *cap); -+int elan3mod_detach_cap (void *arg_ctxt); -+ -+extern ELAN_DEV_OPS elan3_dev_ops; -+ -+int stats_get_index_name (void *arg, uint index, caddr_t name); -+int stats_get_block (void *arg, uint entries, ulong *value); -+int stats_clear_block (void *arg); -+ -+int elan3_register_dev_stats (ELAN3_DEV * dev); -+void elan3_deregister_dev_stats (ELAN3_DEV * dev); -+ -+ -+#endif /* __ELAN3_OPS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elanctxt.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elanctxt.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elanctxt.h 2005-06-01 23:12:54.719420864 -0400 -@@ -0,0 +1,856 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANCTXT_H -+#define _ELAN3_ELANCTXT_H -+ -+#ident "$Id: elanctxt.h,v 1.81 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanctxt.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include -+#include -+ -+#define BumpUserStat(ctxt, stat) ((ctxt)->FlagPage->stat++) -+ -+#if defined(__LITTLE_ENDIAN__) -+ -+typedef union _CProcTrapBuf -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ E3_uint32 Areg; -+ E3_uint32 Breg; -+ } r; -+ struct -+ { -+ E3_uint32 Addr; -+ E3_uint32 ContextType; -+ } s; -+} CProcTrapBuf_BE; -+ -+typedef E3_EventInt E3_EventInt_BE; -+typedef E3_IprocTrapHeader E3_IprocTrapHeader_BE; -+typedef E3_IprocTrapData E3_IprocTrapData_BE; -+typedef E3_FaultSave E3_FaultSave_BE; -+ -+typedef union -+{ -+ E3_uint64 Align64; -+ E3_DMA s; -+} E3_DMA_BE; -+ -+typedef E3_ThreadQueue E3_ThreadQueue_BE; -+ -+#else -+ -+/* "Big-Endian" data structures copied by 64 bit loads, these are 32 bit word flipped */ -+/* from the corresponding data structure. */ -+ -+typedef union _CProcTrapBuf -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ E3_uint32 Breg; -+ E3_uint32 Areg; -+ } r; -+ struct -+ { -+ E3_uint32 ContextType; -+ E3_uint32 Addr; -+ } s; -+} CProcTrapBuf_BE; -+ -+typedef union _E3_EventInt_BE -+{ -+ E3_uint64 Align64; -+ struct { -+ E3_uint32 EventContext; /* Bits 16 to 28 */ -+ E3_uint32 IntCookie; -+ } s; -+} E3_EventInt_BE; -+ -+typedef union _E3_IprocTrapHeader_BE -+{ -+ E3_uint64 Align64; -+ -+ struct -+ { -+ E3_uint32 TrAddr; -+ E3_TrTypeCntx TrTypeCntx; -+ union -+ { -+ E3_IProcStatus_Reg u_IProcStatus; -+ E3_uint32 u_TrData1; -+ } ipsotd; -+ E3_uint32 TrData0; -+ } s; -+} E3_IprocTrapHeader_BE; -+ -+typedef E3_IprocTrapData E3_IprocTrapData_BE; -+ -+typedef union _E3_FaultSave_be -+{ -+ E3_uint64 Align64; -+ struct { -+ volatile E3_uint32 FaultContext; -+ E3_FaultStatusReg FSR; -+ volatile E3_uint32 EventAddress; -+ volatile E3_uint32 FaultAddress; -+ } s; -+} E3_FaultSave_BE; -+ -+typedef union _e3_dma_be -+{ -+ E3_uint64 Align64; -+ struct { -+ E3_uint32 dma_size; -+ E3_DmaType dma_u; -+ E3_Addr dma_dest; -+ E3_Addr dma_source; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_srcCookieProc; -+ E3_Addr dma_srcEvent; -+ } s; -+} E3_DMA_BE; -+ -+typedef union _E3_ThreadQueue_BE -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ /* copied by 64 bit copy from elan to main */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :16; /* Bits 0 to 15 */ -+ E3_Addr Thread; /* Bits 32 to 63 */ -+ } s; -+} E3_ThreadQueue_BE; -+ -+#endif /* defined(LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) */ -+ -+typedef struct neterr_msg -+{ -+ E3_uint32 Rail; /* Rail error received on */ -+ ELAN_CAPABILITY SrcCapability; /* Capability of source of packet */ -+ ELAN_CAPABILITY DstCapability; /* Capability of dest of packet */ -+ -+ E3_uint32 DstProcess; /* Virtual Process of dest of packet */ -+ E3_Addr CookieAddr; /* Cookie Address (or NULL for DMA) */ -+ E3_uint32 CookieVProc; /* Cookie and VP (identifies DMA) */ -+ E3_uint32 NextCookie; /* Next Cookie value (for thread) */ -+ E3_uint32 WaitForEop; /* Wait for EOP transaction */ -+} NETERR_MSG; -+ -+#ifdef __KERNEL__ -+ -+/* -+ * Associated with each input channel can be a network error -+ * resolver structure, which can be queued on the network -+ * error resolver threads to perform RPCs to the other kernels -+ * when a network error occurs with an identify transaction -+ * included -+ */ -+typedef struct neterr_resolver -+{ -+ struct neterr_resolver *Next; -+ -+ spinlock_t Lock; -+ -+ struct elan3_ctxt *Ctxt; -+ ELAN_LOCATION Location; -+ -+ int Completed; -+ int Status; -+ long Timestamp; -+ -+ NETERR_MSG Message; -+} NETERR_RESOLVER; -+ -+ -+typedef struct neterr_fixup -+{ -+ struct neterr_fixup *Next; -+ -+ kcondvar_t Wait; -+ int Completed; -+ int Status; -+ -+ NETERR_MSG Message; -+} NETERR_FIXUP; -+ -+#endif /* __KERNEL__ */ -+ -+/* Each of the following structures must be padded to a whole */ -+/* number of 64 bit words since the kernel uses 64 bit load/stores */ -+/* to transfer the elan register state. */ -+typedef struct command_trap -+{ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_uint32 Pad; /* 4 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ CProcTrapBuf_BE TrapBuf; /* 8 bytes */ -+} COMMAND_TRAP; -+ -+typedef struct thread_trap -+{ -+ E3_uint32 Registers[32]; /* 128 bytes */ -+#define REG_GLOBALS 0 -+#define REG_OUTS 8 -+#define REG_LOCALS 16 -+#define REG_INS 24 -+ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ E3_FaultSave_BE DataFaultSave; /* 16 bytes */ -+ E3_FaultSave_BE InstFaultSave; /* 16 bytes */ -+ E3_FaultSave_BE OpenFaultSave; /* 16 bytes */ -+ -+ E3_Status_Reg Status; /* 4 bytes */ -+ -+ E3_Addr pc; /* 4 bytes */ -+ E3_Addr npc; /* 4 bytes */ -+ E3_Addr StartPC; /* 4 bytes */ -+ E3_Addr sp; /* 4 bytes */ -+ E3_uint32 mi; /* 4 bytes */ -+ E3_TrapBits TrapBits; /* 4 bytes */ -+ E3_DirtyBits DirtyBits; /* 4 bytes */ -+} THREAD_TRAP; -+ -+typedef struct dma_trap -+{ -+ E3_DMA_BE Desc; /* 32 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ E3_FaultSave_BE Data0; /* 16 bytes */ -+ E3_FaultSave_BE Data1; /* 16 bytes */ -+ E3_FaultSave_BE Data2; /* 16 bytes */ -+ E3_FaultSave_BE Data3; /* 16 bytes */ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_DmaInfo PacketInfo; /* 4 bytes */ -+} DMA_TRAP; -+ -+typedef struct input_trap -+{ -+ E3_uint32 State; /* 4 bytes */ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ -+ u_int NumTransactions; /* 4 bytes */ -+ u_int Overflow; /* 4 bytes */ -+ u_int AckSent; /* 4 bytes */ -+ u_int BadTransaction; /* 4 bytes */ -+ -+ E3_IprocTrapHeader_BE *TrappedTransaction; /* 4 bytes */ -+ E3_IprocTrapData_BE *TrappedDataBuffer; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *WaitForEopTransaction; /* 4 bytes */ -+ E3_IprocTrapData_BE *WaitForEopDataBuffer; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *DmaIdentifyTransaction; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *ThreadIdentifyTransaction; /* 4 bytes */ -+ E3_Addr LockQueuePointer; /* 4 bytes */ -+ E3_Addr UnlockQueuePointer; /* 4 bytes */ -+ -+ E3_IprocTrapHeader_BE Transactions[MAX_TRAPPED_TRANS]; /* n * 8 bytes */ -+ E3_IprocTrapData_BE DataBuffers[MAX_TRAPPED_TRANS]; /* n * 64 bytes */ -+} INPUT_TRAP; -+ -+typedef struct input_fault_save -+{ -+ struct input_fault_save *Next; -+ E3_Addr Addr; -+ E3_uint32 Count; -+} INPUT_FAULT_SAVE; -+ -+#define NUM_INPUT_FAULT_SAVE 32 -+#define MIN_INPUT_FAULT_PAGES 8 -+#define MAX_INPUT_FAULT_PAGES 128 -+ -+typedef E3_uint32 EVENT_COOKIE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct event_cookie_entry -+{ -+ struct event_cookie_entry *ent_next; -+ struct event_cookie_entry *ent_prev; -+ -+ spinlock_t ent_lock; -+ unsigned ent_ref; -+ -+ EVENT_COOKIE ent_cookie; -+ EVENT_COOKIE ent_fired; -+ kcondvar_t ent_wait; -+} EVENT_COOKIE_ENTRY; -+ -+typedef struct event_cookie_table -+{ -+ struct event_cookie_table *tbl_next; -+ struct event_cookie_table *tbl_prev; -+ -+ unsigned long tbl_task; -+ unsigned long tbl_handle; -+ -+ spinlock_t tbl_lock; -+ unsigned tbl_ref; -+ EVENT_COOKIE_ENTRY *tbl_entries; -+} EVENT_COOKIE_TABLE; -+ -+#define NBYTES_PER_SMALL_ROUTE 8 -+#define NBYTES_PER_LARGE_ROUTE 16 -+ -+#define ROUTE_BLOCK_SIZE ELAN3_PAGE_SIZE -+#define NROUTES_PER_BLOCK (ROUTE_BLOCK_SIZE/NBYTES_PER_LARGE_ROUTE) -+ -+typedef struct elan3_routes -+{ -+ struct elan3_routes *Next; /* Can be chained together */ -+ -+ sdramaddr_t Routes; /* sdram offset of route entries */ -+ bitmap_t Bitmap[BT_BITOUL(NROUTES_PER_BLOCK)]; /* Bitmap of which entries are used */ -+} ELAN3_ROUTES; -+ -+ -+typedef struct elan3_route_table -+{ -+ spinlock_t Lock; /* Route lock */ -+ sdramaddr_t Table; /* Kernel address for route table */ -+ u_int Size; /* # entries in route table */ -+ -+ ELAN3_ROUTES *LargeRoutes; /* Large routes */ -+} ELAN3_ROUTE_TABLE; -+ -+typedef struct elan3_vpseg -+{ -+ struct elan3_vpseg *Next; -+ int Process; /* Virtual process */ -+ int Entries; /* and # processes */ -+ int Type; /* Type of cookie */ -+ -+ union -+ { -+ -+ ELAN_CAPABILITY Capability; /* Capability of remote segment */ -+# define SegCapability SegUnion.Capability -+ struct { -+ u_short LowProc; /* Base process number */ -+ u_short HighProc; /* and high process number */ -+# define SegLowProc SegUnion.BROADCAST.LowProc -+# define SegHighProc SegUnion.BROADCAST.HighProc -+ } BROADCAST; -+ } SegUnion; -+} ELAN3_VPSEG; -+ -+#define ELAN3_VPSEG_UNINT 0 /* Unitialised */ -+#define ELAN3_VPSEG_P2P 1 /* Point to Point */ -+#define ELAN3_VPSEG_BROADCAST 2 /* Broadcast */ -+ -+#define NUM_LISTS 7 /* Number of "swap" lists */ -+ -+typedef struct elan3_ctxt -+{ -+ struct elan3_ctxt *Next; /* can be queued on a task */ -+ struct elan3_ctxt *Prev; -+ -+ CtxtHandle Handle; /* user handle */ -+ int RefCnt; /* reference count */ -+ -+ ELAN3MMU *Elan3mmu; /* elan3mmu allocated for Elan translations */ -+ -+ struct elan3_ops *Operations; /* User supplied helper functions */ -+ void *Private; /* Users private pointer */ -+ -+ int Status; /* Status (guarded by dev_mutex) */ -+ int OthersState; /* State of halt queueing for dma/thread */ -+ int LwpCount; /* Number of lwp's running */ -+ -+ ELAN3_DEV *Device; /* Elan device */ -+ -+ ELAN_CAPABILITY Capability; /* Capability I've attached as */ -+ ELAN_POSITION Position; /* Position when I was created */ -+ -+ ELAN3_VPSEG *VpSegs; /* List of virtual process segments */ -+ ELAN3_ROUTE_TABLE *RouteTable; -+ -+ krwlock_t VpLock; /* Reader/writer lock for vp list */ -+ kmutex_t SwapListsLock; /* mutex to lock swap lists */ -+ kmutex_t CmdLock; /* mutex to lock trapped dma command */ -+ kmutex_t CmdPortLock; /* mutex to load/unload commandport xlation */ -+ -+ kcondvar_t Wait; /* Condition variable to sleep on */ -+ kcondvar_t CommandPortWait; /* Condition variable to wait for commandport */ -+ kcondvar_t LwpWait; /* Condition variable to wait for lwps to stop */ -+ kcondvar_t HaltWait; /* Condition variable to wait for halt */ -+ int Halted; /* and flag for halt cv */ -+ -+ caddr_t CommandPageMapping; /* user virtual address for command page mapping */ -+ ioaddr_t CommandPage; /* Elan command port mapping page */ -+ DeviceMappingHandle CommandPageHandle; /* DDI Handle */ -+ ioaddr_t CommandPort; /* Elan command port */ -+ void *CommandPortItem; /* Item we're re-issuing to commandport */ -+ -+ ELAN3_FLAGSTATS *FlagPage; /* Page visible to user process */ -+ -+ COMMAND_TRAP *CommandTraps; /* Command port traps */ -+ ELAN3_SPLIT_QUEUE CommandTrapQ; -+ -+ CProcTrapBuf_BE *Commands; /* Overflowed commands */ -+ ELAN3_QUEUE CommandQ; -+ -+ THREAD_TRAP *ThreadTraps; /* Thread processor traps */ -+ ELAN3_QUEUE ThreadTrapQ; -+ -+ DMA_TRAP *DmaTraps; /* Dma processor tra[ed */ -+ ELAN3_QUEUE DmaTrapQ; -+ -+ INPUT_TRAP Input0Trap; /* Inputter channel 0 trap */ -+ INPUT_TRAP Input1Trap; /* Inputter channel 1 trap */ -+ NETERR_RESOLVER *Input0Resolver; /* Inputter channel 0 network error resolver */ -+ NETERR_RESOLVER *Input1Resolver; /* Inputter channel 1 network error resolver */ -+ -+ INPUT_FAULT_SAVE InputFaults[NUM_INPUT_FAULT_SAVE]; /* stored writeblock addresses */ -+ INPUT_FAULT_SAVE *InputFaultList; /* organized in list for LRU */ -+ spinlock_t InputFaultLock; /* and lock for list */ -+ -+ kmutex_t NetworkErrorLock; -+ NETERR_FIXUP *NetworkErrorFixups; -+ -+ EVENT_COOKIE *EventCookies; /* Event cookies. */ -+ ELAN3_QUEUE EventCookieQ; -+ -+ E3_Addr *SwapThreads; /* Swapped Thread Queue */ -+ ELAN3_QUEUE SwapThreadQ; -+ -+ E3_DMA_BE *SwapDmas; /* Swapped Dmas Queue */ -+ ELAN3_QUEUE SwapDmaQ; -+ -+ int ItemCount[NUM_LISTS]; /* Count of items on each swap list */ -+ int inhibit; /* if set lwp not to reload translations */ -+ -+ int Disabled; -+} ELAN3_CTXT; -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3_ctxt::Status elan3_ctxt::OthersState -+ elan3_ctxt::CommandTrapQ elan3_ctxt::CommandQ elan3_ctxt::ThreadTrapQ elan3_ctxt::DmaTrapQ -+ elan3_ctxt::Input0Trap elan3_ctxt::Input1Trap elan3_ctxt::EventCookieQ elan3_ctxt::SwapThreadQ -+ elan3_ctxt::SwapDmaQ elan3_ctxt::CommandPortItem elan3_ctxt::LwpCount)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_ctxt::SwapListsLock, -+ elan3_ctxt::ItemCount)) -+_NOTE(RWLOCK_PROTECTS_DATA(elan3_ctxt::VpLock, -+ elan3_ctxt::VpSegs elan3_vpseg::Next elan3_vpseg::Process -+ elan3_vpseg::Entries elan3_vpseg::Type)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_ctxt::ItemCount elan3_ctxt::Status elan3_ctxt::CommandPortItem)) -+ -+_NOTE(LOCK_ORDER(elan3_ctxt::SwapListsLock elan3_ctxt::CmdLock elan3_dev::IntrLock)) -+_NOTE(LOCK_ORDER(elan3_ctxt::SwapListsLock as::a_lock)) /* implicit by pagefault */ -+ -+#define CTXT_DETACHED (1 << 0) /* Context is detached. */ -+#define CTXT_NO_LWPS (1 << 1) /* No lwp's to handle faults */ -+#define CTXT_EXITING (1 << 2) /* User process is exiting */ -+ -+#define CTXT_SWAPPING_OUT (1 << 3) /* Context is swapping out */ -+#define CTXT_SWAPPED_OUT (1 << 4) /* Context is swapped out */ -+ -+#define CTXT_SWAP_FREE (1 << 5) /* Swap buffer is free */ -+#define CTXT_SWAP_VALID (1 << 6) /* Swap buffer has queue entries in it */ -+ -+#define CTXT_DMA_QUEUE_FULL (1 << 7) /* Dma trap queue is full */ -+#define CTXT_THREAD_QUEUE_FULL (1 << 8) /* Thread trap queue is full */ -+#define CTXT_EVENT_QUEUE_FULL (1 << 9) /* Event interrupt queue is full */ -+#define CTXT_COMMAND_OVERFLOW_ERROR (1 << 10) /* Trap queue overflow */ -+ -+#define CTXT_SWAP_WANTED (1 << 11) /* Some one wanted to swap */ -+#define CTXT_WAITING_SWAPIN (1 << 12) /* Someone waiting on swapin */ -+ -+#define CTXT_WAITING_COMMAND (1 << 13) /* swgelan waiting on command port */ -+#define CTXT_COMMAND_MAPPED_MAIN (1 << 14) /* segelan has mapped command port */ -+ -+#define CTXT_QUEUES_EMPTY (1 << 15) /* dma/thread run queues are empty */ -+#define CTXT_QUEUES_EMPTYING (1 << 16) /* dma/thread run queues are being emptied */ -+ -+#define CTXT_USER_FILTERING (1 << 17) /* user requested context filter */ -+ -+#define CTXT_KERNEL (1 << 18) /* context is a kernel context */ -+#define CTXT_COMMAND_MAPPED_ELAN (1 << 19) /* command port is mapped for elan */ -+#define CTXT_FIXUP_NETERR (1 << 20) /* fixing up a network error */ -+ -+ -+#define CTXT_SWAPPED_REASONS (CTXT_NO_LWPS | \ -+ CTXT_DETACHED | \ -+ CTXT_EXITING | \ -+ CTXT_FIXUP_NETERR) -+ -+#define CTXT_OTHERS_REASONS (CTXT_EVENT_QUEUE_FULL | \ -+ CTXT_DMA_QUEUE_FULL | \ -+ CTXT_THREAD_QUEUE_FULL | \ -+ CTXT_COMMAND_OVERFLOW_ERROR | \ -+ CTXT_SWAPPED_REASONS) -+ -+#define CTXT_INPUTTER_REASONS (CTXT_USER_FILTERING | \ -+ CTXT_OTHERS_REASONS) -+ -+#define CTXT_COMMAND_MAPPED (CTXT_COMMAND_MAPPED_MAIN | \ -+ CTXT_COMMAND_MAPPED_ELAN) -+ -+#define CTXT_IS_KERNEL(ctxt) ((ctxt)->Status & CTXT_KERNEL) -+ -+/* -+ * State values for ctxt_inputterState/ctxt_commandportStats -+ */ -+#define CTXT_STATE_OK 0 -+#define CTXT_STATE_TRAPPED 1 /* Inputter channel 0 trapped */ -+#define CTXT_STATE_RESOLVING 2 /* An LWP is resolving the trap */ -+#define CTXT_STATE_NEEDS_RESTART 3 /* Th trapped packet needs to be executed */ -+#define CTXT_STATE_NETWORK_ERROR 4 /* We're waiting on an RPC for the identify transaction */ -+#define CTXT_STATE_EXECUTING 5 /* An LWP is executing the trapped packet */ -+ -+/* -+ * State values for OthersState. -+ */ -+#define CTXT_OTHERS_RUNNING 0 -+#define CTXT_OTHERS_HALTING 1 -+#define CTXT_OTHERS_SWAPPING 2 -+#define CTXT_OTHERS_HALTING_MORE 3 -+#define CTXT_OTHERS_SWAPPING_MORE 4 -+#define CTXT_OTHERS_SWAPPED 5 -+ -+typedef struct elan3_ops -+{ -+ u_int Version; -+ -+ int (*Exception) (ELAN3_CTXT *ctxt, int type, int proc, void *trap, va_list ap); -+ -+ /* swap item list functions */ -+ int (*GetWordItem) (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep); -+ int (*GetBlockItem) (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep); -+ void (*PutWordItem) (ELAN3_CTXT *ctxt, int list, E3_Addr value); -+ void (*PutBlockItem) (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr); -+ void (*PutbackItem) (ELAN3_CTXT *ctxt, int list, void *item); -+ void (*FreeWordItem) (ELAN3_CTXT *ctxt, void *item); -+ void (*FreeBlockItem) (ELAN3_CTXT *ctxt, void *item); -+ int (*CountItems) (ELAN3_CTXT *ctxt, int list); -+ -+ /* event interrupt cookie */ -+ int (*Event) (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+ -+ /* swapin/swapout functions. */ -+ void (*Swapin) (ELAN3_CTXT *ctxt); -+ void (*Swapout) (ELAN3_CTXT *ctxt); -+ -+ /* Free of private data */ -+ void (*FreePrivate) (ELAN3_CTXT *ctxt); -+ -+ /* Fixup a network error */ -+ int (*FixupNetworkError) (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef); -+ -+ /* Interrupt handler trap interface */ -+ int (*DProcTrap) (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+ int (*TProcTrap) (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+ int (*IProcTrap) (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int chan); -+ int (*CProcTrap) (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+ int (*CProcReissue) (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *TrapBuf); -+ -+ /* User memory access functions */ -+ int (*StartFaultCheck)(ELAN3_CTXT *ctxt); -+ void (*EndFaultCheck) (ELAN3_CTXT *ctxt); -+ -+ E3_uint8 (*Load8) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store8) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+ E3_uint16 (*Load16) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store16) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+ E3_uint32 (*Load32) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store32) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+ E3_uint64 (*Load64) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store64) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+} ELAN3_OPS; -+ -+#define ELAN3_OPS_VERSION 0xdeef0001 -+ -+/* -+ * Flags for ops_event. -+ */ -+#define OP_INTR 0 /* Called from interrupt handler */ -+#define OP_LWP 1 /* Called from "lwp" */ -+ -+/* -+ * Return codes for "ops" functions. -+ */ -+#define OP_DEFER 0 /* Defer to next lower interrupt */ -+#define OP_IGNORE 1 /* No event hander, so ignore it */ -+#define OP_HANDLED 2 /* Handled event (resume thread) */ -+#define OP_FAILED 3 /* Failed */ -+ -+#define ELAN3_CALL_OP(ctxt,fn) ((ctxt)->Operations && (ctxt)->Operations->fn) ? (ctxt)->Operations->fn -+ -+#define ELAN3_OP_EXCEPTION(ctxt,type,proc,trap,ap) (ELAN3_CALL_OP(ctxt,Exception) (ctxt,type,proc,trap,ap) : OP_IGNORE) -+#define ELAN3_OP_GET_WORD_ITEM(ctxt,list,itemp,valuep) (ELAN3_CALL_OP(ctxt,GetWordItem) (ctxt,list,itemp,valuep) : 0) -+#define ELAN3_OP_GET_BLOCK_ITEM(ctxt,list,itemp,valuep) (ELAN3_CALL_OP(ctxt,GetBlockItem) (ctxt,list,itemp,valuep) : 0) -+#define ELAN3_OP_PUT_WORD_ITEM(ctxt,list,value) (ELAN3_CALL_OP(ctxt,PutWordItem) (ctxt,list,value) : (void)0) -+#define ELAN3_OP_PUT_BLOCK_ITEM(ctxt,list,ptr) (ELAN3_CALL_OP(ctxt,PutBlockItem) (ctxt,list,ptr) : (void)0) -+#define ELAN3_OP_PUTBACK_ITEM(ctxt,list,item) (ELAN3_CALL_OP(ctxt,PutbackItem) (ctxt,list,item) : (void)0) -+#define ELAN3_OP_FREE_WORD_ITEM(ctxt,item) (ELAN3_CALL_OP(ctxt,FreeWordItem) (ctxt,item) : (void)0) -+#define ELAN3_OP_FREE_BLOCK_ITEM(ctxt,item) (ELAN3_CALL_OP(ctxt,FreeBlockItem)(ctxt,item) : (void)0) -+#define ELAN3_OP_COUNT_ITEMS(ctxt,list) (ELAN3_CALL_OP(ctxt,CountItems)(ctxt,list) : 0) -+#define ELAN3_OP_EVENT(ctxt,cookie,flag) (ELAN3_CALL_OP(ctxt,Event)(ctxt,cookie,flag) : OP_IGNORE) -+#define ELAN3_OP_SWAPIN(ctxt) (ELAN3_CALL_OP(ctxt,Swapin)(ctxt) : (void)0) -+#define ELAN3_OP_SWAPOUT(ctxt) (ELAN3_CALL_OP(ctxt,Swapout)(ctxt) : (void)0) -+#define ELAN3_OP_FREE_PRIVATE(ctxt) (ELAN3_CALL_OP(ctxt,FreePrivate)(ctxt) : (void)0) -+#define ELAN3_OP_FIXUP_NETWORK_ERROR(ctxt, nef) (ELAN3_CALL_OP(ctxt,FixupNetworkError)(ctxt,nef) : OP_FAILED) -+ -+#define ELAN3_OP_DPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,DProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_TPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,TProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_IPROC_TRAP(ctxt, trap, chan) (ELAN3_CALL_OP(ctxt,IProcTrap)(ctxt,trap,chan) : OP_DEFER) -+#define ELAN3_OP_CPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,CProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_CPROC_REISSUE(ctxt,tbuf) (ELAN3_CALL_OP(ctxt,CProcReissue)(ctxt, tbuf) : OP_DEFER) -+ -+#define ELAN3_OP_START_FAULT_CHECK(ctxt) (ELAN3_CALL_OP(ctxt,StartFaultCheck)(ctxt) : 0) -+#define ELAN3_OP_END_FAULT_CHECK(ctxt) (ELAN3_CALL_OP(ctxt,EndFaultCheck)(ctxt) : (void)0) -+#define ELAN3_OP_LOAD8(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load8)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE8(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store8)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD16(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load16)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE16(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store16)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD32(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load32)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE32(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store32)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD64(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load64)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE64(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store64)(ctxt,addr,val) : (void)0) -+ -+#endif /* __KERNEL__ */ -+ -+/* "list" arguement to ops functions */ -+#define LIST_DMA_PTR 0 -+#define LIST_DMA_DESC 1 -+#define LIST_THREAD 2 -+#define LIST_COMMAND 3 -+#define LIST_SETEVENT 4 -+#define LIST_FREE_WORD 5 -+#define LIST_FREE_BLOCK 6 -+ -+#define MAX_LISTS 7 -+ -+#if defined(__KERNEL__) && MAX_LISTS != NUM_LISTS -+# error Check NUM_LISTS == MAX_LISTS -+#endif -+ -+/* -+ * Values for the 'type' field to PostException(). -+ */ -+#define EXCEPTION_INVALID_ADDR 1 /* FaultArea, res */ -+#define EXCEPTION_UNIMP_INSTR 2 /* instr */ -+#define EXCEPTION_INVALID_PROCESS 3 /* proc, res */ -+#define EXCEPTION_SIMULATION_FAILED 4 /* */ -+#define EXCEPTION_UNIMPLEMENTED 5 /* */ -+#define EXCEPTION_SWAP_FAULT 6 /* */ -+#define EXCEPTION_SWAP_FAILED 7 /* */ -+#define EXCEPTION_BAD_PACKET 8 /* */ -+#define EXCEPTION_FAULTED 9 /* addr */ -+#define EXCEPTION_QUEUE_OVERFLOW 10 /* FaultArea, TrapType */ -+#define EXCEPTION_COMMAND_OVERFLOW 11 /* count */ -+#define EXCEPTION_DMA_RETRY_FAIL 12 /* */ -+#define EXCEPTION_CHAINED_EVENT 13 /* EventAddr */ -+#define EXCEPTION_THREAD_KILLED 14 /* */ -+#define EXCEPTION_CANNOT_SAVE_THREAD 15 -+#define EXCEPTION_BAD_SYSCALL 16 /* */ -+#define EXCEPTION_DEBUG 17 -+#define EXCEPTION_BAD_EVENT 18 /* */ -+#define EXCEPTION_NETWORK_ERROR 19 /* rvp */ -+#define EXCEPTION_BUS_ERROR 20 -+#define EXCEPTION_COOKIE_ERROR 21 -+#define EXCEPTION_PACKET_TIMEOUT 22 -+#define EXCEPTION_BAD_DMA 23 /* */ -+#define EXCEPTION_ENOMEM 24 -+ -+/* -+ * Values for the 'proc' field to ElanException(). -+ */ -+#define COMMAND_PROC 1 -+#define THREAD_PROC 2 -+#define DMA_PROC 3 -+#define INPUT_PROC 4 -+#define EVENT_PROC 5 -+ -+/* Flags to IssueDmaCommand */ -+#define ISSUE_COMMAND_FOR_CPROC 1 -+#define ISSUE_COMMAND_CANT_WAIT 2 -+ -+/* Return code from IssueDmaCommand.*/ -+#define ISSUE_COMMAND_OK 0 -+#define ISSUE_COMMAND_TRAPPED 1 -+#define ISSUE_COMMAND_RETRY 2 -+#define ISSUE_COMMAND_WAIT 3 -+ -+#ifdef __KERNEL__ -+ -+extern ELAN3_CTXT *elan3_alloc(ELAN3_DEV *dev, int kernel); -+extern void elan3_free (ELAN3_CTXT *ctxt); -+ -+extern int elan3_attach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+extern int elan3_doattach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+extern void elan3_detach (ELAN3_CTXT *ctxt); -+extern void elan3_dodetach (ELAN3_CTXT *ctxt); -+ -+extern int elan3_addvp (ELAN3_CTXT *ctxt, int process, ELAN_CAPABILITY *cap); -+extern int elan3_removevp (ELAN3_CTXT *ctxt, int process); -+extern int elan3_addbcastvp(ELAN3_CTXT *ctxt, int process, int base, int count); -+ -+extern int elan3_process (ELAN3_CTXT *ctxt); -+ -+extern int elan3_load_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits); -+extern int elan3_check_route(ELAN3_CTXT *ctxt, int process, E3_uint16 *flits, E3_uint32 *routeError); -+ -+extern int elan3_lwp (ELAN3_CTXT *ctxt); -+ -+extern void elan3_swapin (ELAN3_CTXT *ctxt, int reason); -+extern void elan3_swapout (ELAN3_CTXT *ctxt, int reason); -+extern int elan3_pagefault (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSave, int npages); -+extern void elan3_block_inputter (ELAN3_CTXT *ctxt, int block); -+ -+ -+extern E3_Addr elan3_init_thread (ELAN3_DEV *dev, E3_Addr fn, E3_Addr addr, sdramaddr_t stack, int stackSize, int nargs, ...); -+ -+extern void SetInputterState (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+extern void SetInputterStateForContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+extern void UnloadCommandPageMapping (ELAN3_CTXT *ctxt); -+extern void StartSwapoutContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+ -+extern int HandleExceptions (ELAN3_CTXT *ctxt, unsigned long *flags); -+extern int RestartContext (ELAN3_CTXT *ctxt, unsigned long *flags); -+extern int CheckCommandQueueFlushed (ELAN3_CTXT *ctxt, E3_uint32 cflags, int how, unsigned long *flags); -+extern int IssueCommand (ELAN3_CTXT *ctxt, unsigned cmdoff, E3_Addr value, int flags); -+extern int IssueDmaCommand (ELAN3_CTXT *ctxt, E3_Addr value, void *item, int flags); -+extern int WaitForDmaCommand (ELAN3_CTXT *ctxt, void *item, int flags); -+extern void FixupEventTrap (ELAN3_CTXT *ctxt, int proc, void *trap, E3_uint32 TrapType, -+ E3_FaultSave_BE *FaultSaveArea, int flags); -+extern int SimulateBlockCopy (ELAN3_CTXT *ctxt, E3_Addr EventAddress); -+extern void ReissueEvent (ELAN3_CTXT *ctxt, E3_Addr addr,int flags); -+extern int SetEventsNeedRestart (ELAN3_CTXT *ctxt); -+extern void RestartSetEvents (ELAN3_CTXT *ctxt); -+extern int RunEventType (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSaveArea, E3_uint32 EventType); -+extern void WakeupLwp (ELAN3_DEV *dev, void *arg); -+extern void QueueEventInterrupt (ELAN3_CTXT *ctxt, E3_uint32 cookie); -+extern int WaitForCommandPort (ELAN3_CTXT *ctxt); -+ -+extern int ElanException (ELAN3_CTXT *ctxt, int type, int proc, void *trap, ...); -+ -+/* context_osdep.c */ -+extern int LoadElanTranslation (ELAN3_CTXT *ctxt, E3_Addr elanAddr, int len, int protFault, int writeable); -+extern void LoadCommandPortTranslation (ELAN3_CTXT *ctxt); -+ -+#if defined(DIGITAL_UNIX) -+/* seg_elan.c */ -+extern caddr_t elan3_segelan3_create (ELAN3_CTXT *ctxt); -+extern void elan3_segelan3_destroy (ELAN3_CTXT *ctxt); -+extern int elan3_segelan3_map (ELAN3_CTXT *ctxt); -+extern void elan3_segelan3_unmap (ELAN3_CTXT *ctxt); -+ -+/* seg_elanmem.c */ -+extern int elan3_segelanmem_create (ELAN3_DEV *dev, unsigned object, unsigned off, vm_offset_t *addrp, int len); -+#endif /* defined(DIGITAL_UNIX) */ -+ -+/* route_table.c */ -+extern ELAN3_ROUTE_TABLE *AllocateRouteTable (ELAN3_DEV *dev, int size); -+extern void FreeRouteTable (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl); -+extern int LoadRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp, int ctxnum, int nflits, E3_uint16 *flits); -+extern int GetRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, E3_uint16 *flits); -+extern void InvalidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+extern void ValidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+extern void ClearRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+ -+extern int GenerateRoute (ELAN_POSITION *pos, E3_uint16 *flits, int lowid, int highid, int timeout, int highPri); -+extern int GenerateProbeRoute (E3_uint16 *flits, int nodeid, int level, int *linkup, int *linkdown, int adaptive); -+extern int GenerateCheckRoute (ELAN_POSITION *pos, E3_uint16 *flits, int level, int adaptive); -+ -+/* virtual_process.c */ -+extern ELAN_LOCATION ProcessToLocation (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, int process, ELAN_CAPABILITY *cap); -+extern int ResolveVirtualProcess (ELAN3_CTXT *ctxt, int process); -+extern caddr_t CapabilityString (ELAN_CAPABILITY *cap); -+extern void UnloadVirtualProcess (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+ -+extern int elan3_get_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits); -+extern int elan3_reset_route (ELAN3_CTXT *ctxt, int process); -+ -+/* cproc.c */ -+extern int NextCProcTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+extern void ResolveCProcTrap (ELAN3_CTXT *ctxt); -+extern int RestartCProcTrap (ELAN3_CTXT *ctxt); -+ -+/* iproc.c */ -+extern void InspectIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap); -+extern void ResolveIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvp); -+extern int RestartIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap); -+extern char *IProcTrapString (E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData *datap); -+extern void SimulateUnlockQueue (ELAN3_CTXT *ctxt, E3_Addr QueuePointer, int SentAck); -+ -+/* tproc.c */ -+extern int NextTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+extern void ResolveTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+extern int TProcNeedsRestart (ELAN3_CTXT *ctxt); -+extern void RestartTProcItems (ELAN3_CTXT *ctxt); -+extern E3_Addr SaveThreadToStack (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int SkipInstruction); -+extern void ReissueStackPointer (ELAN3_CTXT *ctxt, E3_Addr StackPointer); -+ -+/* tprocinsts.c */ -+extern int RollThreadToClose (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, E3_uint32 PAckVal); -+ -+/* tproc_osdep.c */ -+extern int ThreadSyscall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip); -+extern int ThreadElancall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip); -+ -+/* dproc.c */ -+extern int NextDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern void ResolveDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern int DProcNeedsRestart (ELAN3_CTXT *ctxt); -+extern void RestartDProcItems (ELAN3_CTXT *ctxt); -+extern void RestartDmaDesc (ELAN3_CTXT *ctxt, E3_DMA_BE *desc); -+extern void RestartDmaTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern void RestartDmaPtr (ELAN3_CTXT *ctxt, E3_Addr ptr); -+ -+/* network_error.c */ -+extern void InitialiseNetworkErrorResolver (void); -+extern void FinaliseNetworkErrorResolver (void); -+extern int QueueNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp); -+extern void FreeNetworkErrorResolver (NETERR_RESOLVER *rvp); -+extern void CancelNetworkErrorResolver (NETERR_RESOLVER *rvp); -+extern int ExecuteNetworkErrorFixup (NETERR_MSG *msg); -+extern void CompleteNetworkErrorFixup (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, int status); -+ -+extern int AddNeterrServerSyscall (int elanId, void *configp, void *addrp, char *namep); -+ -+/* eventcookie.c */ -+extern void cookie_init(void); -+extern void cookie_fini(void); -+extern EVENT_COOKIE_TABLE *cookie_alloc_table (unsigned long task, unsigned long handle); -+extern void cookie_free_table (EVENT_COOKIE_TABLE *tbl); -+extern int cookie_alloc_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_free_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_fire_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_wait_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_arm_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+ -+/* routecheck.c */ -+extern int elan3_route_check (ELAN3_CTXT *ctxt, E3_uint16 *flits, int destNode); -+extern int elan3_route_broadcast_check(ELAN3_CTXT *ctxt, E3_uint16 *flitsA, int lowNode, int highNode); -+ -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _ELAN3_ELANCTXT_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elandebug.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elandebug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elandebug.h 2005-06-01 23:12:54.720420712 -0400 -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANDEBUG_H -+#define _ELAN3_ELANDEBUG_H -+ -+#ident "$Id: elandebug.h,v 1.38 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandebug.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+extern u_int elan3_debug; -+extern u_int elan3_debug_console; -+extern u_int elan3_debug_buffer; -+extern u_int elan3_debug_ignore_dev; -+extern u_int elan3_debug_ignore_kcomm; -+extern u_int elan3_debug_ignore_ctxt; -+extern u_int elan3_debug_display_ctxt; -+ -+#define DBG_CONFIG 0x00000001 /* Module configuration */ -+#define DBG_HAT 0x00000002 -+#define DBG_FN 0x00000004 -+#define DBG_SEG 0x00000008 -+#define DBG_INTR 0x00000010 -+#define DBG_LWP 0x00000020 -+#define DBG_FAULT 0x00000040 -+#define DBG_EVENT 0x00000080 -+#define DBG_CPROC 0x00000100 -+#define DBG_TPROC 0x00000200 -+#define DBG_DPROC 0x00000400 -+#define DBG_IPROC 0x00000800 -+#define DBG_SWAP 0x00001000 -+#define DBG_CMD 0x00002000 -+#define DBG_VP 0x00004000 -+#define DBG_SYSCALL 0x00008000 -+#define DBG_BSCAN 0x00010000 -+#define DBG_LINKERR 0x00020000 -+#define DBG_NETERR 0x00040000 -+#define DBG_NETRPC 0x00080000 -+#define DBG_EVENTCOOKIE 0x00100000 -+#define DBG_SDRAM 0x00200000 -+ -+#define DBG_EP 0x10000000 -+#define DBG_EPCONSOLE 0x20000000 -+ -+#define DBG_EIP 0x40000000 -+#define DBG_EIPFAIL 0x80000000 -+ -+#define DBG_ALL 0xffffffff -+ -+/* values to pass as "ctxt" rather than a "ctxt" pointer */ -+#define DBG_DEVICE ((void *) 0) -+#define DBG_KCOMM ((void *) 1) -+#define DBG_ICS ((void *) 2) -+#define DBG_USER ((void *) 3) -+#define DBG_NTYPES 64 -+ -+#if defined(DEBUG_PRINTF) -+# define DBG(m,fn) ((elan3_debug&(m)) ? (void)(fn) : (void)0) -+# define PRINTF0(ctxt,m,fmt) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt) : (void)0) -+# define PRINTF1(ctxt,m,fmt,a) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a) : (void)0) -+# define PRINTF2(ctxt,m,fmt,a,b) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b) : (void)0) -+# define PRINTF3(ctxt,m,fmt,a,b,c) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c) : (void)0) -+# define PRINTF4(ctxt,m,fmt,a,b,c,d) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d) : (void)0) -+# define PRINTF5(ctxt,m,fmt,a,b,c,d,e) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d,e) : (void)0) -+# define PRINTF6(ctxt,m,fmt,a,b,c,d,e,f) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d,e,f) : (void)0) -+#ifdef __GNUC__ -+# define PRINTF(ctxt,m,args...) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m, ##args) : (void)0) -+#endif -+ -+#else -+ -+# define DBG(m, fn) do { ; } while (0) -+# define PRINTF0(ctxt,m,fmt) do { ; } while (0) -+# define PRINTF1(ctxt,m,fmt,a) do { ; } while (0) -+# define PRINTF2(ctxt,m,fmt,a,b) do { ; } while (0) -+# define PRINTF3(ctxt,m,fmt,a,b,c) do { ; } while (0) -+# define PRINTF4(ctxt,m,fmt,a,b,c,d) do { ; } while (0) -+# define PRINTF5(ctxt,m,fmt,a,b,c,d,e) do { ; } while (0) -+# define PRINTF6(ctxt,m,fmt,a,b,c,d,e,f) do { ; } while (0) -+#ifdef __GNUC__ -+# define PRINTF(ctxt,m,args...) do { ; } while (0) -+#endif -+ -+#endif /* DEBUG_PRINTF */ -+ -+#ifdef __GNUC__ -+extern void elan3_debugf (void *ctxt, unsigned int mode, char *fmt, ...) -+ __attribute__ ((format (printf,3,4))); -+#else -+extern void elan3_debugf (void *ctxt, unsigned int mode, char *fmt, ...); -+#endif -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ELAN3_ELANDEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elandev.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elandev.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elandev.h 2005-06-01 23:12:54.721420560 -0400 -@@ -0,0 +1,581 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANDEV_H -+#define __ELAN3_ELANDEV_H -+ -+#ident "$Id: elandev.h,v 1.74.2.2 2004/12/10 11:10:19 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandev.h,v $ */ -+ -+#include -+#include -+#include -+ -+#if defined(DIGITAL_UNIX) -+# include -+#elif defined(LINUX) -+# include -+#elif defined(SOLARIS) -+# include -+#endif -+ -+#ifndef TRUE -+# define TRUE 1 -+#endif -+#ifndef FALSE -+# define FALSE 0 -+#endif -+ -+/* -+ * Elan base address registers defined as follows : -+ */ -+#define ELAN3_BAR_SDRAM 0 -+#define ELAN3_BAR_COMMAND_PORT 1 -+#define ELAN3_BAR_REGISTERS 2 -+#define ELAN3_BAR_EBUS 3 -+ -+/* Macro to generate 'offset' to mmap "mem" device */ -+#define OFF_TO_SPACE(off) ((off) >> 28) -+#define OFF_TO_OFFSET(off) ((off) & 0x0FFFFFFF) -+#define GEN_OFF(space,off) (((space) << 28) | ((off) & 0x0FFFFFFF)) -+ -+#ifdef __KERNEL__ -+ -+/* -+ * Elan EBUS is configured as follows : -+ */ -+#define ELAN3_EBUS_ROM_OFFSET 0x000000 /* rom */ -+#define ELAN3_EBUS_INTPAL_OFFSET 0x180000 /* interrupt pal (write only) */ -+ -+#define ELAN3_EBUS_ROM_SIZE 0x100000 -+ -+/* -+ * Elan SDRAM is arranged as follows : -+ */ -+#define ELAN3_TANDQ_SIZE 0x0020000 /* Trap And Queue Size */ -+#define ELAN3_CONTEXT_SIZE 0x0010000 /* Context Table Size */ -+#define ELAN3_COMMAND_TRAP_SIZE 0x0010000 /* Command Port Trap Size */ -+ -+#ifdef MPSAS -+#define ELAN3_LN2_NUM_CONTEXTS 8 /* Support 256 contexts */ -+#else -+#define ELAN3_LN2_NUM_CONTEXTS 12 /* Support 4096 contexts */ -+#endif -+#define ELAN3_NUM_CONTEXTS (1 << ELAN3_LN2_NUM_CONTEXTS) /* Entries in context table */ -+ -+#define ELAN3_SDRAM_NUM_BANKS 4 /* Elan supports 4 Banks of Sdram */ -+#define ELAN3_SDRAM_BANK_SHIFT 26 /* each of which can be 64 mbytes ? */ -+#define ELAN3_SDRAM_BANK_SIZE (1 << ELAN3_SDRAM_BANK_SHIFT) -+ -+#define ELAN3_MAX_CACHE_SIZE (64 * 1024) /* Maximum cache size */ -+#define ELAN3_CACHE_SIZE (64 * 4 * E3_CACHELINE_SIZE) /* Elan3 has 8K cache */ -+ -+#ifndef offsetof -+#define offsetof(s, m) (size_t)(&(((s *)0)->m)) -+#endif -+ -+/* -+ * circular queue and macros to access members. -+ */ -+typedef struct -+{ -+ u_int q_back; /* Next free space */ -+ u_int q_front; /* First object to remove */ -+ u_int q_size; /* Size of queue */ -+ u_int q_count; /* Current number of entries */ -+ u_int q_slop; /* FULL <=> (count+slop) == size */ -+} ELAN3_QUEUE; -+ -+typedef struct -+{ -+ u_int q_back; /* Next free space */ -+ u_int q_middle; /* Middle pointer */ -+ u_int q_front; /* First object to remove */ -+ u_int q_size; /* Size of queue */ -+ u_int q_count; /* Current number of entries */ -+ u_int q_slop; /* FULL <=> (count+slop) == size */ -+} ELAN3_SPLIT_QUEUE; -+ -+#define ELAN3_QUEUE_INIT(q,num,slop) ((q).q_size = (num), (q).q_slop = (slop)+1, (q).q_front = (q).q_back = 0, (q).q_count = 0) -+#define ELAN3_QUEUE_FULL(q) ((q).q_count == ((q).q_size - (q).q_slop)) -+#define ELAN3_QUEUE_REALLY_FULL(q) ((q).q_count == (q).q_size - 1) -+#define ELAN3_QUEUE_EMPTY(q) ((q).q_count == 0) -+#define ELAN3_QUEUE_FRONT_EMPTY(q) ((q).q_front == (q).q_middle) -+#define ELAN3_QUEUE_BACK_EMPTY(q) ((q).q_middle == (q).q_back) -+#define ELAN3_QUEUE_ADD(q) ((q).q_back = ((q).q_back+1) % (q).q_size, (q).q_count++) -+#define ELAN3_QUEUE_REMOVE(q) ((q).q_front = ((q).q_front+1) % (q).q_size, (q).q_count--) -+#define ELAN3_QUEUE_ADD_FRONT(q) ((q).q_front = ((q).q_front-1) % (q).q_size, (q).q_count++) -+#define ELAN3_QUEUE_CONSUME(q) ((q).q_middle = ((q).q_middle+1) % (q).q_size) -+#define ELAN3_QUEUE_FRONT(q,qArea) (&(qArea)[(q).q_front]) -+#define ELAN3_QUEUE_MIDDLE(q,qArea) (&(qArea)[(q).q_middle]) -+#define ELAN3_QUEUE_BACK(q,qArea) (&(qArea)[(q).q_back]) -+ -+#define SDRAM_MIN_BLOCK_SHIFT 10 -+#define SDRAM_NUM_FREE_LISTS 17 /* allows max 64Mb block */ -+#define SDRAM_MIN_BLOCK_SIZE (1 << SDRAM_MIN_BLOCK_SHIFT) -+#define SDRAM_MAX_BLOCK_SIZE (SDRAM_MIN_BLOCK_SIZE << (SDRAM_NUM_FREE_LISTS-1)) -+#define SDRAM_FREELIST_TRIGGER 32 -+ -+typedef struct elan3_sdram_bank -+{ -+ u_int Size; /* Size of bank of memory */ -+ -+ ioaddr_t Mapping; /* Where mapped in the kernel */ -+ DeviceMappingHandle Handle; /* and mapping handle */ -+ -+ struct elan3_ptbl_gr **PtblGroups; -+ -+ bitmap_t *Bitmaps[SDRAM_NUM_FREE_LISTS]; -+} ELAN3_SDRAM_BANK; -+ -+typedef struct elan3_haltop -+{ -+ struct elan3_haltop *Next; /* Chain to next in list. */ -+ E3_uint32 Mask; /* Interrupt mask to see before calling function */ -+ -+ void (*Function)(void *, void *); /* Function to call */ -+ void *Arguement; /* Arguement to pass to function */ -+} ELAN3_HALTOP; -+ -+#define HALTOP_BATCH 32 -+ -+#endif /* __KERNEL__ */ -+ -+typedef struct elan3_stats -+{ -+ u_long Version; /* version field */ -+ u_long Interrupts; /* count of elan interrupts */ -+ u_long TlbFlushes; /* count of tlb flushes */ -+ u_long InvalidContext; /* count of traps with invalid context */ -+ u_long ComQueueHalfFull; /* count of interrupts due to com queue being half full */ -+ -+ u_long CProcTraps; /* count of cproc traps */ -+ u_long DProcTraps; /* count of dproc traps */ -+ u_long TProcTraps; /* cound of tproc traps */ -+ u_long IProcTraps; /* count of iproc traps */ -+ u_long EventInterrupts; /* count of event interrupts */ -+ -+ u_long PageFaults; /* count of elan page faults */ -+ -+ /* inputter related */ -+ u_long EopBadAcks; /* count of EOP_BAD_ACKs */ -+ u_long EopResets; /* count of EOP_ERROR_RESET */ -+ u_long InputterBadLength; /* count of BadLength */ -+ u_long InputterCRCDiscards; /* count of CRC_STATUS_DISCARD */ -+ u_long InputterCRCErrors; /* count of CRC_STATUS_ERROR */ -+ u_long InputterCRCBad; /* count of CRC_STATUS_BAD */ -+ u_long DmaNetworkErrors; /* count of errors in dma data */ -+ u_long DmaIdentifyNetworkErrors; /* count of errors after dma identify */ -+ u_long ThreadIdentifyNetworkErrors; /* count of errors after thread identify */ -+ -+ /* dma related */ -+ u_long DmaRetries; /* count of dma retries (due to retry fail count) */ -+ u_long DmaOutputTimeouts; /* count of dma output timeouts */ -+ u_long DmaPacketAckErrors; /* count of dma packet ack errors */ -+ -+ /* thread related */ -+ u_long ForcedTProcTraps; /* count of forced tproc traps */ -+ u_long TrapForTooManyInsts; /* count of too many instruction traps */ -+ u_long ThreadOutputTimeouts; /* count of thread output timeouts */ -+ u_long ThreadPacketAckErrors; /* count of thread packet ack errors */ -+ -+ /* link related */ -+ u_long LockError; /* count of RegPtr->Exts.LinkErrorTypes:LS_LockError */ -+ u_long DeskewError; /* count of RegPtr->Exts.LinkErrorTypes:LS_DeskewError */ -+ u_long PhaseError; /* count of RegPtr->Exts.LinkErrorTypes:LS_PhaseError */ -+ u_long DataError; /* count of RegPtr->Exts.LinkErrorTypes:LS_DataError */ -+ u_long FifoOvFlow0; /* count of RegPtr->Exts.LinkErrorTypes:LS_FifoOvFlow0 */ -+ u_long FifoOvFlow1; /* count of RegPtr->Exts.LinkErrorTypes:LS_FifoOvFlow1 */ -+ u_long LinkErrorValue; /* link error value on data error */ -+ -+ /* memory related */ -+ u_long CorrectableErrors; /* count of correctable ecc errors */ -+ u_long UncorrectableErrors; /* count of uncorrectable ecc errors */ -+ u_long MultipleErrors; /* count of multiple ecc errors */ -+ u_long SdramBytesFree; /* count of sdram bytes free */ -+ -+ /* Interrupt related */ -+ u_long LongestInterrupt; /* length of longest interrupt in ticks */ -+ -+ u_long EventPunts; /* count of punts of event interrupts to thread */ -+ u_long EventRescheds; /* count of reschedules of event interrupt thread */ -+} ELAN3_STATS; -+ -+#define ELAN3_STATS_VERSION (ulong)2 -+#define ELAN3_NUM_STATS (sizeof (ELAN3_STATS)/sizeof (u_long)) -+ -+#define ELAN3_STATS_DEV_FMT "elan3_stats_dev_%d" -+ -+#ifdef __KERNEL__ -+ -+#define BumpStat(dev,stat) ((dev)->Stats.stat++) -+ -+typedef struct elan3_level_ptbl_block -+{ -+ spinlock_t PtblLock; /* Page table freelist lock */ -+ int PtblTotal; /* Count of level N page tables allocated */ -+ int PtblFreeCount; /* Count of free level N page tables */ -+ struct elan3_ptbl *PtblFreeList; /* Free level N page tables */ -+ struct elan3_ptbl_gr *PtblGroupList; /* List of Groups of level N page tables */ -+} ELAN3_LEVEL_PTBL_BLOCK; -+ -+typedef struct elan3_dev -+{ -+ ELAN3_DEV_OSDEP Osdep; /* OS specific entries */ -+ int Instance; /* Device number */ -+ ELAN_DEVINFO Devinfo; -+ ELAN_POSITION Position; /* position in switch network (for user code) */ -+ ELAN_DEV_IDX DeviceIdx; /* device index registered with elanmod */ -+ -+ int ThreadsShouldStop; /* flag that kernel threads should stop */ -+ -+ spinlock_t IntrLock; -+ spinlock_t TlbLock; -+ spinlock_t CProcLock; -+ kcondvar_t IntrWait; /* place event interrupt thread sleeps */ -+ unsigned EventInterruptThreadStarted:1; /* event interrupt thread started */ -+ unsigned EventInterruptThreadStopped:1; /* event interrupt thread stopped */ -+ -+ DeviceMappingHandle RegHandle; /* DDI Handle */ -+ ioaddr_t RegPtr; /* Elan Registers */ -+ -+ volatile E3_uint32 InterruptMask; /* copy of RegPtr->InterruptMask */ -+ volatile E3_uint32 Event_Int_Queue_FPtr; /* copy of RegPtr->Event_Int_Queue_FPtr */ -+ volatile E3_uint32 SchCntReg; /* copy of RegPtr->SchCntReg */ -+ volatile E3_uint32 Cache_Control_Reg; /* true value for RegPtr->Cache_Control_Reg */ -+ -+ ELAN3_SDRAM_BANK SdramBanks[ELAN3_SDRAM_NUM_BANKS]; /* Elan sdram banks */ -+ spinlock_t SdramLock; /* Sdram allocator */ -+ sdramaddr_t SdramFreeLists[SDRAM_NUM_FREE_LISTS]; -+ unsigned SdramFreeCounts[SDRAM_NUM_FREE_LISTS]; -+ -+ sdramaddr_t TAndQBase; /* Trap and Queue area */ -+ sdramaddr_t ContextTable; /* Elan Context Table */ -+ u_int ContextTableSize; /* # entries in context table */ -+ -+ struct elan3_ctxt **CtxtTable; /* array of ctxt pointers or nulls */ -+ -+ sdramaddr_t CommandPortTraps[2]; /* Command port trap overflow */ -+ int CurrentCommandPortTrap; /* Which overflow queue we're using */ -+ -+ u_int HaltAllCount; /* Count of reasons to halt context 0 queues */ -+ u_int HaltNonContext0Count; /* Count of reasons to halt non-context 0 queues */ -+ u_int HaltDmaDequeueCount; /* Count of reasons to halt dma from dequeuing */ -+ u_int HaltThreadCount; /* Count of reasons to halt the thread processor */ -+ u_int FlushCommandCount; /* Count of reasons to flush command queues */ -+ u_int DiscardAllCount; /* Count of reasons to discard context 0 */ -+ u_int DiscardNonContext0Count; /* Count of reasons to discard non context 0 */ -+ -+ struct thread_trap *ThreadTrap; /* Thread Processor trap space */ -+ struct dma_trap *DmaTrap; /* DMA Processor trap space */ -+ -+ spinlock_t FreeHaltLock; /* Lock for haltop free list */ -+ ELAN3_HALTOP *FreeHaltOperations; /* Free list of haltops */ -+ u_int NumHaltOperations; /* Number of haltops allocated */ -+ u_int ReservedHaltOperations; /* Number of haltops reserved */ -+ -+ ELAN3_HALTOP *HaltOperations; /* List of operations to call */ -+ ELAN3_HALTOP **HaltOperationsTailpp; /* Pointer to last "next" pointer in list */ -+ E3_uint32 HaltOperationsMask; /* Or of all bits in list of operations */ -+ -+ physaddr_t SdramPhysBase; /* Physical address of SDRAM */ -+ physaddr_t SdramPhysMask; /* and mask of significant bits */ -+ -+ physaddr_t PciPhysBase; /* physical address of local PCI segment */ -+ physaddr_t PciPhysMask; /* and mask of significant bits */ -+ -+ long ErrorTime; /* lbolt at last error (link,ecc etc) */ -+ long ErrorsPerTick; /* count of errors for this tick */ -+ timer_fn_t ErrorTimeoutId; /* id of timeout when errors masked out */ -+ timer_fn_t DmaPollTimeoutId; /* id of timeout to poll for "bad" dmas */ -+ int FilterHaltQueued; -+ -+ /* -+ * HAT layer specific entries. -+ */ -+ ELAN3_LEVEL_PTBL_BLOCK Level[4]; -+ spinlock_t PtblGroupLock; /* Lock for Page Table group lists */ -+ struct elan3_ptbl_gr *Level3PtblGroupHand; /* Hand for ptbl stealing */ -+ -+ /* -+ * Per-Context Information structures. -+ */ -+ struct elan3_info *Infos; /* List of "infos" for this device */ -+ -+ char LinkShutdown; /* link forced into reset by panic/shutdown/dump */ -+ -+ /* -+ * Device statistics. -+ */ -+ ELAN3_STATS Stats; -+ ELAN_STATS_IDX StatsIndex; -+ -+ struct { -+ E3_Regs *RegPtr; -+ char *Sdram[ELAN3_SDRAM_NUM_BANKS]; -+ } PanicState; -+} ELAN3_DEV; -+ -+#define ELAN3_DEV_CTX_TABLE(dev,ctxtn) ( (dev)->CtxtTable[ (ctxtn) & MAX_ROOT_CONTEXT_MASK] ) -+ -+/* macros for accessing dev->RegPtr.Tags/Sets. */ -+#define write_cache_tag(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, Tags.what)) -+#define read_cache_tag(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, Tags.what)) -+#define write_cache_set(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, Sets.what)) -+#define read_cache_set(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, Sets.what)) -+ -+/* macros for accessing dev->RegPtr.Regs. */ -+#define write_reg64(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+#define write_reg32(dev,what,val) writel (val, dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+#define read_reg64(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+#define read_reg32(dev,what) readl (dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+ -+/* macros for accessing dev->RegPtr.uRegs. */ -+#define write_ureg64(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+#define write_ureg32(dev,what,val) writel (val, dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+#define read_ureg64(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+#define read_ureg32(dev,what) readl (dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+ -+/* macros for accessing dma descriptor/thread regs */ -+#define copy_dma_regs(dev, desc) \ -+MACRO_BEGIN \ -+ register int i; \ -+ for (i = 0; i < sizeof (E3_DMA)/sizeof(E3_uint64); i++) \ -+ ((E3_uint64 *) desc)[i] = readq (dev->RegPtr + offsetof (E3_Regs, Regs.Dma_Desc) + i*sizeof (E3_uint64)); \ -+MACRO_END -+ -+#define copy_thread_regs(dev, regs) \ -+MACRO_BEGIN \ -+ register int i; \ -+ for (i = 0; i < (32*sizeof (E3_uint32))/sizeof(E3_uint64); i++) \ -+ ((E3_uint64 *) regs)[i] = readq (dev->RegPtr + offsetof (E3_Regs, Regs.Globals[0]) + i*sizeof (E3_uint64)); \ -+MACRO_END -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ _E3_DataBusMap::Exts _E3_DataBusMap::Input_Context_Fil_Flush -+ elan3_dev::CurrentCommandPortTrap elan3_dev::HaltAllCount elan3_dev::HaltDmaDequeueCount -+ elan3_dev::FlushCommandCount elan3_dev::DiscardAllCount elan3_dev::DiscardNonContext0Count -+ elan3_dev::HaltOperations elan3_dev::HaltOperationsMask)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::TlbLock, -+ _E3_DataBusMap::Cache_Control_Reg)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::InfoLock, -+ elan3_dev::Infos elan3_dev::InfoTable)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::FreeHaltLock, -+ elan3_dev::FreeHaltOperations elan3_dev::NumHaltOperations elan3_dev::ReservedHaltOperations)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::PageFreeListLock, -+ elan3_dev::PageFreeList elan3_dev::PageFreeListSize)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level1PtblLock, -+ elan3_dev::Level1PtblTotal elan3_dev::Level1PtblFreeCount elan3_dev::Level1PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level2PtblLock, -+ elan3_dev::Level2PtblTotal elan3_dev::Level2PtblFreeCount elan3_dev::Level2PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level3PtblLock, -+ elan3_dev::Level3PtblTotal elan3_dev::Level3PtblFreeCount elan3_dev::Level3PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::PtblGroupLock, -+ elan3_dev::Level1PtblGroupList elan3_dev::Level2PtblGroupList elan3_dev::Level3PtblGroupList)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_dev::InfoTable elan3_dev::Level1PtblFreeList -+ elan3_dev::Level2PtblFreeList elan3_dev::Level3PtblFreeList)) -+ -+_NOTE(LOCK_ORDER(elan3_dev::InfoLock elan3_dev::IntrLock)) -+_NOTE(LOCK_ORDER(as::a_lock elan3_dev::InfoLock)) -+_NOTE(LOCK_ORDER(as::a_lock elan3_dev::IntrLock)) -+ -+#define SET_INT_MASK(dev,Mask) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev)->InterruptMask = (Mask))); mmiob(); MACRO_END -+#define ENABLE_INT_MASK(dev, bits) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev->InterruptMask |= (bits)))); mmiob(); MACRO_END -+#define DISABLE_INT_MASK(dev, bits) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev->InterruptMask &= ~(bits)))); mmiob(); MACRO_END -+ -+#define INIT_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ (dev)->SchCntReg = (val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define SET_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ ASSERT (((val) & HaltStopAndExtTestMask) == (val)); \ -+ (dev)->SchCntReg |= (val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob (); \ -+MACRO_END -+ -+#define CLEAR_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ ASSERT (((val) & HaltStopAndExtTestMask) == (val)); \ -+ (dev)->SchCntReg &= ~(val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define MODIFY_SCHED_STATUS(dev, SetBits, ClearBits) \ -+MACRO_BEGIN \ -+ ASSERT ((((SetBits)|(ClearBits)) & HaltStopAndExtTestMask) == ((SetBits)|(ClearBits))); \ -+ (dev)->SchCntReg = (((dev)->SchCntReg | (SetBits)) & ~(ClearBits)); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define PULSE_SCHED_STATUS(dev, RestartBits) \ -+MACRO_BEGIN \ -+ ASSERT (((RestartBits) & HaltStopAndExtTestMask) == 0); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg | (RestartBits)); \ -+ mmiob(); \ -+MACRO_END -+ -+#define SET_SCHED_LINK_VALUE(dev, enabled, val) \ -+MACRO_BEGIN \ -+ (dev)->SchCntReg = (((dev)->SchCntReg & HaltAndStopMask) | ((enabled) ? LinkBoundaryScan : 0) | LinkSetValue(val, 0)); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#ifdef DEBUG_ASSERT -+# define ELAN3_ASSERT(dev, EX) ((void)((EX) || elan3_assfail(dev, #EX, __FILE__, __LINE__))) -+#else -+# define ELAN3_ASSERT(dev, EX) -+#endif -+ -+/* elandev_generic.c */ -+extern int InitialiseElan (ELAN3_DEV *dev, ioaddr_t CmdPort); -+extern void FinaliseElan (ELAN3_DEV *dev); -+extern int InterruptHandler (ELAN3_DEV *dev); -+extern void PollForDmaHungup (void *arg); -+ -+extern int SetLinkBoundaryScan (ELAN3_DEV *dev); -+extern void ClearLinkBoundaryScan (ELAN3_DEV *dev); -+extern int WriteBoundaryScanValue (ELAN3_DEV *dev, int value); -+extern int ReadBoundaryScanValue(ELAN3_DEV *dev, int link); -+ -+extern int ReadVitalProductData (ELAN3_DEV *dev, int *CasLatency); -+ -+extern struct elan3_ptbl_gr *ElanGetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset); -+extern void ElanSetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset, struct elan3_ptbl_gr *ptg); -+ -+extern void ElanFlushTlb (ELAN3_DEV *dev); -+ -+extern void SetSchedStatusRegister (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp); -+extern void FreeHaltOperation (ELAN3_DEV *dev, ELAN3_HALTOP *op); -+extern int ReserveHaltOperations (ELAN3_DEV *dev, int count, int cansleep); -+extern void ReleaseHaltOperations (ELAN3_DEV *dev, int count); -+extern void ProcessHaltOperations (ELAN3_DEV *dev, E3_uint32 Pend); -+extern void QueueHaltOperation (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp, -+ E3_uint32 ReqMask, void (*Function)(ELAN3_DEV *, void *), void *Arguement); -+ -+extern int ComputePosition (ELAN_POSITION *pos, unsigned NodeId, unsigned NumNodes, unsigned numDownLinksVal); -+ -+extern caddr_t MiToName (int mi); -+extern void ElanBusError (ELAN3_DEV *dev); -+ -+extern void TriggerLsa (ELAN3_DEV *dev); -+ -+extern ELAN3_DEV *elan3_device (int instance); -+extern int DeviceRegisterSize (ELAN3_DEV *dev, int rnumber, int *sizep); -+extern int MapDeviceRegister (ELAN3_DEV *dev, int rnumber, ioaddr_t *addrp, int offset, -+ int len, DeviceMappingHandle *handlep); -+extern void UnmapDeviceRegister (ELAN3_DEV *dev, DeviceMappingHandle *handlep); -+ -+ -+/* sdram.c */ -+/* sdram accessing functions - define 4 different types for 8,16,32,64 bit accesses */ -+extern unsigned char elan3_sdram_readb (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned short elan3_sdram_readw (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned int elan3_sdram_readl (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned long long elan3_sdram_readq (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern void elan3_sdram_writeb (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned char val); -+extern void elan3_sdram_writew (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned short val); -+extern void elan3_sdram_writel (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned int val); -+extern void elan3_sdram_writeq (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned long long val); -+ -+extern void elan3_sdram_zerob_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zerow_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zerol_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zeroq_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+ -+extern void elan3_sdram_copyb_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyw_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyl_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyq_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyb_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyw_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyl_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyq_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+ -+extern void elan3_sdram_init (ELAN3_DEV *dev); -+extern void elan3_sdram_fini (ELAN3_DEV *dev); -+extern void elan3_sdram_add (ELAN3_DEV *dev, sdramaddr_t base, sdramaddr_t top); -+extern sdramaddr_t elan3_sdram_alloc (ELAN3_DEV *dev, int nbytes); -+extern void elan3_sdram_free (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern physaddr_t elan3_sdram_to_phys (ELAN3_DEV *dev, sdramaddr_t addr); -+ -+/* cproc.c */ -+extern void HandleCProcTrap (ELAN3_DEV *dev, E3_uint32 Pend, E3_uint32 *Mask); -+ -+/* iproc.c */ -+extern void HandleIProcTrap (ELAN3_DEV *dev, int Channel, E3_uint32 Pend, sdramaddr_t FaultSaveOff, -+ sdramaddr_t TransactionsOff, sdramaddr_t DataOff); -+ -+/* tproc.c */ -+extern int HandleTProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits); -+extern void DeliverTProcTrap (ELAN3_DEV *dev, struct thread_trap *threadTrap, E3_uint32 Pend); -+ -+/* dproc.c */ -+extern int HandleDProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits); -+extern void DeliverDProcTrap (ELAN3_DEV *dev, struct dma_trap *dmaTrap, E3_uint32 Pend); -+ -+#if defined(LINUX) -+/* procfs_linux.h */ -+extern struct proc_dir_entry *elan3_procfs_root; -+extern struct proc_dir_entry *elan3_config_root; -+ -+extern void elan3_procfs_init(void); -+extern void elan3_procfs_fini(void); -+extern void elan3_procfs_device_init (ELAN3_DEV *dev); -+extern void elan3_procfs_device_fini (ELAN3_DEV *dev); -+#endif /* defined(LINUX) */ -+ -+/* elan3_osdep.c */ -+extern int BackToBackMaster; -+extern int BackToBackSlave; -+ -+#define ELAN_REG_REC_MAX (100) -+#define ELAN_REG_REC(REG) { \ -+elan_reg_rec_file [elan_reg_rec_index] = __FILE__; \ -+elan_reg_rec_line [elan_reg_rec_index] = __LINE__; \ -+elan_reg_rec_reg [elan_reg_rec_index] = REG; \ -+elan_reg_rec_cpu [elan_reg_rec_index] = smp_processor_id(); \ -+elan_reg_rec_lbolt[elan_reg_rec_index] = lbolt; \ -+elan_reg_rec_index = ((elan_reg_rec_index+1) % ELAN_REG_REC_MAX);} -+ -+extern char * elan_reg_rec_file [ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_line [ELAN_REG_REC_MAX]; -+extern long elan_reg_rec_lbolt[ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_cpu [ELAN_REG_REC_MAX]; -+extern E3_uint32 elan_reg_rec_reg [ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_index; -+ -+#endif /* __KERNEL__ */ -+ -+ -+#define ELAN3_PROCFS_ROOT "/proc/qsnet/elan3" -+#define ELAN3_PROCFS_VERSION "/proc/qsnet/elan3/version" -+#define ELAN3_PROCFS_DEBUG "/proc/qsnet/elan3/config/elandebug" -+#define ELAN3_PROCFS_DEBUG_CONSOLE "/proc/qsnet/elan3/config/elandebug_console" -+#define ELAN3_PROCFS_DEBUG_BUFFER "/proc/qsnet/elan3/config/elandebug_buffer" -+#define ELAN3_PROCFS_MMU_DEBUG "/proc/qsnet/elan3/config/elan3mmu_debug" -+#define ELAN3_PROCFS_PUNT_LOOPS "/proc/qsnet/elan3/config/eventint_punt_loops" -+ -+#define ELAN3_PROCFS_DEVICE_STATS_FMT "/proc/qsnet/elan3/device%d/stats" -+#define ELAN3_PROCFS_DEVICE_POSITION_FMT "/proc/qsnet/elan3/device%d/position" -+#define ELAN3_PROCFS_DEVICE_NODESET_FMT "/proc/qsnet/elan3/device%d/nodeset" -+ -+#endif /* __ELAN3_ELANDEV_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elandev_linux.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elandev_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elandev_linux.h 2005-06-01 23:12:54.721420560 -0400 -@@ -0,0 +1,56 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELANDEV_LINUX_H -+#define __ELANDEV_LINUX_H -+ -+#ident "$Id: elandev_linux.h,v 1.11 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandev_linux.h,v $*/ -+ -+#ifdef __KERNEL__ -+#include -+#include -+#include -+#endif -+ -+#define ELAN3_MAJOR 60 -+#define ELAN3_NAME "elan3" -+#define ELAN3_MAX_CONTROLLER 16 /* limited to 4 bits */ -+ -+#define ELAN3_MINOR_DEVNUM(m) ((m) & 0x0f) /* card number */ -+#define ELAN3_MINOR_DEVFUN(m) (((m) >> 4) & 0x0f) /* function */ -+#define ELAN3_MINOR_CONTROL 0 /* function values */ -+#define ELAN3_MINOR_MEM 1 -+#define ELAN3_MINOR_USER 2 -+ -+typedef void *DeviceMappingHandle; -+ -+/* task and ctxt handle types */ -+typedef struct mm_struct *TaskHandle; -+typedef int CtxtHandle; -+ -+#define ELAN3_MY_TASK_HANDLE() (current->mm) -+#define KERNEL_TASK_HANDLE() (get_kern_mm()) -+ -+/* -+ * OS-dependent component of ELAN3_DEV struct. -+ */ -+typedef struct elan3_dev_osdep -+{ -+ struct pci_dev *pci; /* PCI config data */ -+ int ControlDeviceOpen; /* flag to indicate control */ -+ /* device open */ -+ struct proc_dir_entry *procdir; -+} ELAN3_DEV_OSDEP; -+ -+#endif /* __ELANDEV_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elanio.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elanio.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elanio.h 2005-06-01 23:12:54.722420408 -0400 -@@ -0,0 +1,226 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELAN3IO_H -+#define __ELAN3_ELAN3IO_H -+ -+#ident "$Id: elanio.h,v 1.19 2003/12/08 15:40:26 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanio.h,v $*/ -+ -+#define ELAN3IO_CONTROL_PATHNAME "/dev/elan3/control%d" -+#define ELAN3IO_MEM_PATHNAME "/dev/elan3/mem%d" -+#define ELAN3IO_USER_PATHNAME "/dev/elan3/user%d" -+#define ELAN3IO_SDRAM_PATHNAME "/dev/elan3/sdram%d" -+#define ELAN3IO_MAX_PATHNAMELEN 32 -+ -+/* ioctls on /dev/elan3/control */ -+#define ELAN3IO_CONTROL_BASE 0 -+ -+#define ELAN3IO_SET_BOUNDARY_SCAN _IO ('e', ELAN3IO_CONTROL_BASE + 0) -+#define ELAN3IO_CLEAR_BOUNDARY_SCAN _IO ('e', ELAN3IO_CONTROL_BASE + 1) -+#define ELAN3IO_READ_LINKVAL _IOWR ('e', ELAN3IO_CONTROL_BASE + 2, E3_uint32) -+#define ELAN3IO_WRITE_LINKVAL _IOWR ('e', ELAN3IO_CONTROL_BASE + 3, E3_uint32) -+ -+typedef struct elanio_set_debug_struct -+{ -+ char what[32]; -+ u_long value; -+} ELAN3IO_SET_DEBUG_STRUCT; -+#define ELAN3IO_SET_DEBUG _IOW ('e', ELAN3IO_CONTROL_BASE + 4, ELAN3IO_SET_DEBUG_STRUCT) -+ -+typedef struct elanio_debug_buffer_struct -+{ -+ caddr_t addr; -+ size_t len; -+} ELAN3IO_DEBUG_BUFFER_STRUCT; -+#define ELAN3IO_DEBUG_BUFFER _IOWR ('e', ELAN3IO_CONTROL_BASE + 5, ELAN3IO_DEBUG_BUFFER_STRUCT) -+ -+typedef struct elanio_neterr_server_struct -+{ -+ u_int elanid; -+ void *addr; -+ char *name; -+} ELAN3IO_NETERR_SERVER_STRUCT; -+#define ELAN3IO_NETERR_SERVER _IOW ('e', ELAN3IO_CONTROL_BASE + 6, ELAN3IO_NETERR_SERVER_STRUCT) -+#define ELAN3IO_NETERR_FIXUP _IOWR ('e', ELAN3IO_CONTROL_BASE + 7, NETERR_MSG) -+ -+typedef struct elanio_set_position_struct -+{ -+ u_int device; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} ELAN3IO_SET_POSITION_STRUCT; -+#define ELAN3IO_SET_POSITION _IOW ('e', ELAN3IO_CONTROL_BASE + 8, ELAN3IO_SET_POSITION_STRUCT) -+ -+#if defined(LINUX) -+ -+/* ioctls on /dev/elan3/sdram */ -+#define ELAN3IO_SDRAM_BASE 20 -+ -+/* ioctls on /dev/elan3/user */ -+#define ELAN3IO_USER_BASE 30 -+ -+#define ELAN3IO_FREE _IO ('e', ELAN3IO_USER_BASE + 0) -+ -+#define ELAN3IO_ATTACH _IOWR('e', ELAN3IO_USER_BASE + 1, ELAN_CAPABILITY) -+#define ELAN3IO_DETACH _IO ('e', ELAN3IO_USER_BASE + 2) -+ -+typedef struct elanio_addvp_struct -+{ -+ u_int process; -+ ELAN_CAPABILITY capability; -+} ELAN3IO_ADDVP_STRUCT; -+#define ELAN3IO_ADDVP _IOWR('e', ELAN3IO_USER_BASE + 3, ELAN3IO_ADDVP_STRUCT) -+#define ELAN3IO_REMOVEVP _IOW ('e', ELAN3IO_USER_BASE + 4, int) -+ -+typedef struct elanio_bcastvp_struct -+{ -+ u_int process; -+ u_int lowvp; -+ u_int highvp; -+} ELAN3IO_BCASTVP_STRUCT; -+#define ELAN3IO_BCASTVP _IOW ('e', ELAN3IO_USER_BASE + 5, ELAN3IO_BCASTVP_STRUCT) -+ -+typedef struct elanio_loadroute_struct -+{ -+ u_int process; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_LOAD_ROUTE_STRUCT; -+#define ELAN3IO_LOAD_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 6, ELAN3IO_LOAD_ROUTE_STRUCT) -+ -+#define ELAN3IO_PROCESS _IO ('e', ELAN3IO_USER_BASE + 7) -+ -+typedef struct elanio_setperm_struct -+{ -+ caddr_t maddr; -+ E3_Addr eaddr; -+ size_t len; -+ int perm; -+} ELAN3IO_SETPERM_STRUCT; -+#define ELAN3IO_SETPERM _IOW ('e', ELAN3IO_USER_BASE + 8, ELAN3IO_SETPERM_STRUCT) -+ -+typedef struct elanio_clearperm_struct -+{ -+ E3_Addr eaddr; -+ size_t len; -+} ELAN3IO_CLEARPERM_STRUCT; -+#define ELAN3IO_CLEARPERM _IOW ('e', ELAN3IO_USER_BASE + 9, ELAN3IO_CLEARPERM_STRUCT) -+ -+typedef struct elanio_changeperm_struct -+{ -+ E3_Addr eaddr; -+ size_t len; -+ int perm; -+} ELAN3IO_CHANGEPERM_STRUCT; -+#define ELAN3IO_CHANGEPERM _IOW ('e', ELAN3IO_USER_BASE + 10, ELAN3IO_CHANGEPERM_STRUCT) -+ -+ -+#define ELAN3IO_HELPER_THREAD _IO ('e', ELAN3IO_USER_BASE + 11) -+#define ELAN3IO_WAITCOMMAND _IO ('e', ELAN3IO_USER_BASE + 12) -+#define ELAN3IO_BLOCK_INPUTTER _IOW ('e', ELAN3IO_USER_BASE + 13, int) -+#define ELAN3IO_SET_FLAGS _IOW ('e', ELAN3IO_USER_BASE + 14, int) -+ -+#define ELAN3IO_WAITEVENT _IOW ('e', ELAN3IO_USER_BASE + 15, E3_Event) -+#define ELAN3IO_ALLOC_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 16, EVENT_COOKIE) -+#define ELAN3IO_FREE_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 17, EVENT_COOKIE) -+#define ELAN3IO_ARM_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 18, EVENT_COOKIE) -+#define ELAN3IO_WAIT_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 19, EVENT_COOKIE) -+ -+#define ELAN3IO_SWAPSPACE _IOW ('e', ELAN3IO_USER_BASE + 20, SYS_SWAP_SPACE) -+#define ELAN3IO_EXCEPTION_SPACE _IOW ('e', ELAN3IO_USER_BASE + 21, SYS_EXCEPTION_SPACE) -+#define ELAN3IO_GET_EXCEPTION _IOR ('e', ELAN3IO_USER_BASE + 22, SYS_EXCEPTION) -+ -+typedef struct elanio_unload_struct -+{ -+ void *addr; -+ size_t len; -+} ELAN3IO_UNLOAD_STRUCT; -+#define ELAN3IO_UNLOAD _IOW ('e', ELAN3IO_USER_BASE + 23, ELAN3IO_UNLOAD_STRUCT) -+ -+ -+ -+typedef struct elanio_getroute_struct -+{ -+ u_int process; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_GET_ROUTE_STRUCT; -+#define ELAN3IO_GET_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 24, ELAN3IO_GET_ROUTE_STRUCT) -+ -+typedef struct elanio_resetroute_struct -+{ -+ u_int process; -+} ELAN3IO_RESET_ROUTE_STRUCT; -+#define ELAN3IO_RESET_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 25, ELAN3IO_RESET_ROUTE_STRUCT) -+ -+typedef struct elanio_checkroute_struct -+{ -+ u_int process; -+ E3_uint32 routeError; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_CHECK_ROUTE_STRUCT; -+#define ELAN3IO_CHECK_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 26, ELAN3IO_CHECK_ROUTE_STRUCT) -+ -+typedef struct elanio_vp2nodeId_struct -+{ -+ u_int process; -+ unsigned short nodeId; -+ ELAN_CAPABILITY cap; -+} ELAN3IO_VP2NODEID_STRUCT; -+#define ELAN3IO_VP2NODEID _IOWR('e', ELAN3IO_USER_BASE + 27, ELAN3IO_VP2NODEID_STRUCT) -+ -+#define ELAN3IO_SET_SIGNAL _IOW ('e', ELAN3IO_USER_BASE + 28, int) -+ -+typedef struct elanio_process_2_location_struct -+{ -+ u_int process; -+ ELAN_LOCATION loc; -+} ELAN3IO_PROCESS_2_LOCATION_STRUCT; -+#define ELAN3IO_PROCESS_2_LOCATION _IOW ('e', ELAN3IO_USER_BASE + 29, ELAN3IO_PROCESS_2_LOCATION_STRUCT) -+ -+ -+ -+/* ioctls on all device */ -+#define ELAN3IO_GENERIC_BASE 100 -+typedef struct elanio_get_devinfo_struct -+{ -+ ELAN_DEVINFO *devinfo; -+} ELAN3IO_GET_DEVINFO_STRUCT; -+#define ELAN3IO_GET_DEVINFO _IOR ('e', ELAN3IO_GENERIC_BASE + 0, ELAN_DEVINFO) -+ -+typedef struct elanio_get_position_struct -+{ -+ ELAN_POSITION *position; -+} ELAN3IO_GET_POSITION_STRUCT; -+#define ELAN3IO_GET_POSITION _IOR ('e', ELAN3IO_GENERIC_BASE + 1, ELAN_POSITION) -+ -+typedef struct elanio_stats_struct -+{ -+ int which; -+ void *ptr; -+} ELAN3IO_STATS_STRUCT; -+#define ELAN3IO_STATS _IOR ('e', ELAN3IO_GENERIC_BASE + 2, ELAN3IO_STATS_STRUCT) -+# define ELAN3_SYS_STATS_DEVICE 0 -+# define ELAN3_SYS_STATS_MMU 1 -+ -+/* offsets on /dev/elan3/control */ -+ -+/* offsets on /dev/elan3/mem */ -+ -+/* page numbers on /dev/elan3/user */ -+#define ELAN3IO_OFF_COMMAND_PAGE 0 -+#define ELAN3IO_OFF_FLAG_PAGE 1 -+#define ELAN3IO_OFF_UREG_PAGE 2 -+ -+#endif /* LINUX */ -+ -+#endif /* __ELAN3_ELAN3IO_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elanregs.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elanregs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elanregs.h 2005-06-01 23:12:54.724420104 -0400 -@@ -0,0 +1,1063 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * Header file for internal slave mapping of the ELAN3 registers -+ */ -+ -+#ifndef _ELAN3_ELANREGS_H -+#define _ELAN3_ELANREGS_H -+ -+#ident "$Id: elanregs.h,v 1.87 2004/04/22 12:27:21 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanregs.h,v $*/ -+ -+#include -+#include -+#include -+ -+#define MAX_ROOT_CONTEXT_MASK 0xfff -+#define SYS_CONTEXT_BIT 0x1000 -+#define ALL_CONTEXT_BITS (MAX_ROOT_CONTEXT_MASK | SYS_CONTEXT_BIT) -+#define ROOT_TAB_OFFSET(Cntxt) (((Cntxt) & MAX_ROOT_CONTEXT_MASK) << 4) -+#define CLEAR_SYS_BIT(Cntxt) ((Cntxt) & ~SYS_CONTEXT_BIT) -+ -+#define E3_CACHELINE_SIZE (32) -+#define E3_CACHE_SIZE (8192) -+ -+typedef volatile struct _E3_CacheSets -+{ -+ E3_uint64 Set0[256]; /* 2k bytes per set */ -+ E3_uint64 Set1[256]; /* 2k bytes per set */ -+ E3_uint64 Set2[256]; /* 2k bytes per set */ -+ E3_uint64 Set3[256]; /* 2k bytes per set */ -+} E3_CacheSets; -+ -+typedef union e3_cache_tag -+{ -+ E3_uint64 Value; -+ struct { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 pad2:8; /* Undefined value when read */ -+ E3_uint32 LineError:1; /* A line error has occured */ -+ E3_uint32 Modified:1; /* Cache data is modified */ -+ E3_uint32 FillPending:1; /* Pipelined fill occuring*/ -+ E3_uint32 AddrTag27to11:17; /* Tag address bits 27 to 11 */ -+ E3_uint32 pad1:4; /* Undefined value when read */ -+ E3_uint32 pad0; /* Undefined value when read */ -+#else -+ E3_uint32 pad0; /* Undefined value when read */ -+ E3_uint32 pad1:4; /* Undefined value when read */ -+ E3_uint32 AddrTag27to11:17; /* Tag address bits 27 to 11 */ -+ E3_uint32 FillPending:1; /* Pipelined fill occuring*/ -+ E3_uint32 Modified:1; /* Cache data is modified */ -+ E3_uint32 LineError:1; /* A line error has occured */ -+ E3_uint32 pad2:8; /* Undefined value when read */ -+#endif -+ } s; -+} E3_CacheTag; -+ -+#define E3_NumCacheLines 64 -+#define E3_NumCacheSets 4 -+ -+typedef volatile struct _E3_CacheTags -+{ -+ E3_CacheTag Tags[E3_NumCacheLines][E3_NumCacheSets]; /* 2k bytes per set */ -+} E3_CacheTags; -+ -+typedef union E3_IProcStatus_Reg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 TrapType:8; /* iprocs trap ucode address */ -+ E3_uint32 SuspendAddr:8; /* iprocs suspend address */ -+ E3_uint32 EopType:2; /* Type of Eop Received */ -+ E3_uint32 QueueingPacket:1; /* receiving a queueing packet */ -+ E3_uint32 AckSent:1; /* a packet ack has been sent */ -+ E3_uint32 Reject:1; /* a packet nack has been sent */ -+ E3_uint32 CrcStatus:2; /* Crc Status value */ -+ E3_uint32 BadLength:1; /* Eop was received in a bad place */ -+ E3_uint32 Chan1:1; /* This packet received on v chan1 */ -+ E3_uint32 First:1; /* This is the first transaction in the packet */ -+ E3_uint32 Last:1; /* This is the last transaction in the packet */ -+ E3_uint32 Unused:2; -+ E3_uint32 WakeupFunction:3; /* iprocs wakeup function */ -+#else -+ E3_uint32 WakeupFunction:3; /* iprocs wakeup function */ -+ E3_uint32 Unused:2; -+ E3_uint32 Last:1; /* This is the last transaction in the packet */ -+ E3_uint32 First:1; /* This is the first transaction in the packet */ -+ E3_uint32 Chan1:1; /* This packet received on v chan1 */ -+ E3_uint32 BadLength:1; /* Eop was received in a bad place */ -+ E3_uint32 CrcStatus:2; /* Crc Status value */ -+ E3_uint32 Reject:1; /* a packet nack has been sent */ -+ E3_uint32 AckSent:1; /* a packet ack has been sent */ -+ E3_uint32 QueueingPacket:1; /* receiving a queueing packet */ -+ E3_uint32 EopType:2; /* Type of Eop Received */ -+ E3_uint32 SuspendAddr:8; /* iprocs suspend address */ -+ E3_uint32 TrapType:8; /* iprocs trap ucode address */ -+#endif -+ } s; -+} E3_IProcStatus_Reg; -+ -+#define CRC_STATUS_GOOD (0 << 21) -+#define CRC_STATUS_DISCARD (1 << 21) -+#define CRC_STATUS_ERROR (2 << 21) -+#define CRC_STATUS_BAD (3 << 21) -+ -+#define CRC_MASK (3 << 21) -+ -+#define EOP_GOOD (1 << 16) -+#define EOP_BADACK (2 << 16) -+#define EOP_ERROR_RESET (3 << 16) -+ -+#define E3_IPS_LastTrans (1 << 26) -+#define E3_IPS_FirstTrans (1 << 25) -+#define E3_IPS_VChan1 (1 << 24) -+#define E3_IPS_BadLength (1 << 23) -+#define E3_IPS_CrcMask (3 << 21) -+#define E3_IPS_Rejected (1 << 20) -+#define E3_IPS_AckSent (1 << 19) -+#define E3_IPS_QueueingPacket (1 << 18) -+#define E3_IPS_EopType (3 << 16) -+ -+typedef union E3_Status_Reg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 TrapType:8; /* procs trap ucode address */ -+ E3_uint32 SuspendAddr:8; /* procs suspend address */ -+ E3_uint32 Context:13; /* procs current context */ -+ E3_uint32 WakeupFunction:3; /* procs wakeup function */ -+#else -+ E3_uint32 WakeupFunction:3; /* procs wakeup function */ -+ E3_uint32 Context:13; /* procs current context */ -+ E3_uint32 SuspendAddr:8; /* procs suspend address */ -+ E3_uint32 TrapType:8; /* procs trap ucode address */ -+#endif -+ } s; -+} E3_Status_Reg; -+ -+/* values for WakeupFunction */ -+#define SleepOneTick 0 -+#define WakeupToSendTransOrEop 1 -+#define SleepOneTickThenRunnable 2 -+#define WakeupNever 4 -+/* extra dma wakeup functions */ -+#define WakupeToSendTransOrEop 1 -+#define WakeupForPacketAck 3 -+#define WakeupToSendTrans 5 -+/* extra thread wakup function */ -+#define WakeupStopped 3 -+/* extra cproc wakup function */ -+#define WakeupSetEvent 3 -+ -+#define GET_STATUS_CONTEXT(Ptr) ((Ptr.Status >> 16) & 0x1fff) -+#define GET_STATUS_SUSPEND_ADDR(Ptr) ((Ptr.Status >> 8) & 0xff) -+#define GET_STATUS_TRAPTYPE(Ptr) ((E3_uint32)(Ptr.Status & 0xff)) -+ -+/* -+ * Interrupt register bits -+ */ -+#define INT_PciMemErr (1<<15) /* Pci memory access error */ -+#define INT_SDRamInt (1<<14) /* SDRam ECC interrupt */ -+#define INT_EventInterrupt (1<<13) /* Event Interrupt */ -+#define INT_LinkError (1<<12) /* Link Error */ -+#define INT_ComQueue (1<<11) /* a comm queue half full */ -+#define INT_TProcHalted (1<<10) /* Tproc Halted */ -+#define INT_DProcHalted (1<<9) /* Dmas Halted */ -+#define INT_DiscardingNonSysCntx (1<<8) /* Inputters Discarding Non-SysCntx */ -+#define INT_DiscardingSysCntx (1<<7) /* Inputters Discarding SysCntx */ -+#define INT_TProc (1<<6) /* tproc interrupt */ -+#define INT_CProc (1<<5) /* cproc interrupt */ -+#define INT_DProc (1<<4) /* dproc interrupt */ -+#define INT_IProcCh1NonSysCntx (1<<3) /* iproc non-SysCntx interrupt */ -+#define INT_IProcCh1SysCntx (1<<2) /* iproc SysCntx interrupt */ -+#define INT_IProcCh0NonSysCntx (1<<1) /* iproc non-SysCntx interrupt */ -+#define INT_IProcCh0SysCntx (1<<0) /* iproc SysCntx interrupt */ -+ -+#define INT_Inputters (INT_IProcCh0SysCntx | INT_IProcCh0NonSysCntx | INT_IProcCh1SysCntx | INT_IProcCh1NonSysCntx) -+#define INT_Discarding (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx) -+#define INT_Halted (INT_DProcHalted | INT_TProcHalted) -+#define INT_ErrorInterrupts (INT_PciMemErr | INT_SDRamInt | INT_LinkError) -+ -+/* -+ * Link state bits. -+ */ -+#define LS_LinkNotReady (1 << 0) /* Link is in reset or recovering from an error */ -+#define LS_Locked (1 << 1) /* Linkinput PLL is locked */ -+#define LS_LockError (1 << 2) /* Linkinput PLL was unable to lock onto the input clock. */ -+#define LS_DeskewError (1 << 3) /* Linkinput was unable to Deskew all the inputs. (Broken wire?) */ -+#define LS_PhaseError (1 << 4) /* Linkinput Phase alignment error. */ -+#define LS_DataError (1 << 5) /* Received value was neither good data or a token. */ -+#define LS_FifoOvFlow0 (1 << 6) /* Channel 0 input fifo overflowed. */ -+#define LS_FifoOvFlow1 (1 << 7) /* Channel 1 input fifo overflowed. */ -+ -+/* -+ * Link State Constant defines, used for writing to LinkSetValue -+ */ -+ -+#define LRS_DataDel0 0x0 -+#define LRS_DataDel1 0x1 -+#define LRS_DataDel2 0x2 -+#define LRS_DataDel3 0x3 -+#define LRS_DataDel4 0x4 -+#define LRS_DataDel5 0x5 -+#define LRS_DataDel6 0x6 -+#define LRS_DataDel7 0x7 -+#define LRS_DataDel8 0x8 -+#define LRS_PllDelValue 0x9 -+#define LRS_ClockEven 0xA -+#define LRS_ClockOdd 0xB -+#define LRS_ErrorLSW 0xC -+#define LRS_ErrorMSW 0xD -+#define LRS_FinCoarseDeskew 0xE -+#define LRS_LinkInValue 0xF -+#define LRS_NumLinkDels 0x10 -+ -+#define LRS_Pllfast 0x40 -+ -+union Sched_Status -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 StopNonSysCntxs:1; -+ E3_uint32 FlushCommandQueues:1; -+ E3_uint32 HaltDmas:1; -+ E3_uint32 HaltDmaDequeue:1; -+ E3_uint32 HaltThread:1; -+ E3_uint32 CProcStop:1; -+ E3_uint32 DiscardSysCntxIn:1; -+ E3_uint32 DiscardNonSysCntxIn:1; -+ E3_uint32 RestartCh0SysCntx:1; -+ E3_uint32 RestartCh0NonSysCntx:1; -+ E3_uint32 RestartCh1SysCntx:1; -+ E3_uint32 RestartCh1NonSysCntx:1; -+ E3_uint32 RestartDProc:1; -+ E3_uint32 RestartTProc:1; -+ E3_uint32 RestartCProc:1; -+ E3_uint32 ClearLinkErrorInt:1; -+ E3_uint32 :3; -+ E3_uint32 LinkSetValue:10; -+ E3_uint32 FixLinkDelays:1; -+ E3_uint32 LinkBoundaryScan:1; -+#else -+ E3_uint32 LinkBoundaryScan:1; -+ E3_uint32 FixLinkDelays:1; -+ E3_uint32 LinkSetValue:10; -+ E3_uint32 :3; -+ E3_uint32 ClearLinkErrorInt:1; -+ E3_uint32 RestartCProc:1; -+ E3_uint32 RestartTProc:1; -+ E3_uint32 RestartDProc:1; -+ E3_uint32 RestartCh1NonSysCntx:1; -+ E3_uint32 RestartCh1SysCntx:1; -+ E3_uint32 RestartCh0NonSysCntx:1; -+ E3_uint32 RestartCh0SysCntx:1; -+ E3_uint32 DiscardNonSysCntxIn:1; -+ E3_uint32 DiscardSysCntxIn:1; -+ E3_uint32 CProcStop:1; -+ E3_uint32 HaltThread:1; -+ E3_uint32 HaltDmaDequeue:1; -+ E3_uint32 HaltDmas:1; -+ E3_uint32 FlushCommandQueues:1; -+ E3_uint32 StopNonSysCntxs:1; -+#endif -+ } s; -+}; -+ -+#define LinkBoundaryScan ((E3_uint32) 1<<31) /* Clears the link error interrupt */ -+#define FixLinkDelays ((E3_uint32) 1<<30) /* Clears the link error interrupt */ -+#define LinkSetValue(Val, OldVal) ((E3_uint32) (((Val) & 0x3ff) << 20) | ((OldVal) & ((~0x3ff) << 20))) -+ -+#define ClearLinkErrorInt ((E3_uint32) 1<<16) /* Clears the link error interrupt */ -+#define RestartCProc ((E3_uint32) 1<<15) /* Clears command proc interrupt */ -+#define RestartTProc ((E3_uint32) 1<<14) /* Clears thread interrupt */ -+#define RestartDProc ((E3_uint32) 1<<13) /* Clears dma0 interrupt */ -+#define RestartCh1NonSysCntx ((E3_uint32) 1<<12) /* Clears interrupt */ -+#define RestartCh1SysCntx ((E3_uint32) 1<<11) /* Clears interrupt */ -+#define RestartCh0NonSysCntx ((E3_uint32) 1<<10) /* Clears interrupt */ -+#define RestartCh0SysCntx ((E3_uint32) 1<<9) /* Clears interrupt */ -+#define CProcStopped ((E3_uint32) 1<<9) /* Read value only */ -+ -+#define TraceSetEvents ((E3_uint32) 1<<8) -+#define DiscardNonSysCntxIn ((E3_uint32) 1<<7) -+#define DiscardSysCntxIn ((E3_uint32) 1<<6) -+#define CProcStop ((E3_uint32) 1<<5) /* Will empty all the command port queues. */ -+#define HaltThread ((E3_uint32) 1<<4) /* Will stop the thread proc and clear the tproc command queue */ -+#define HaltDmaDequeue ((E3_uint32) 1<<3) /* Will stop the dmaers starting new dma's. */ -+#define HaltDmas ((E3_uint32) 1<<2) /* Will stop the dmaers and clear the dma command queues */ -+#define FlushCommandQueues ((E3_uint32) 1<<1) /* Causes the command ports to be flushed. */ -+#define StopNonSysCntxs ((E3_uint32) 1<<0) /* Prevents a non-SysCntx from starting. */ -+ -+/* Initial value of schedule status register */ -+#define LinkResetToken 0x00F -+ -+#define Sched_Initial_Value (LinkBoundaryScan | (LinkResetToken << 20) | \ -+ DiscardSysCntxIn | DiscardNonSysCntxIn | HaltThread | HaltDmas) -+ -+#define StopDmaQueues (HaltDmaDequeue | HaltDmas | \ -+ DiscardNonSysCntxIn | DiscardSysCntxIn) -+#define CheckDmaQueueStopped (INT_DiscardingNonSysCntx | INT_DiscardingSysCntx | INT_DProcHalted) -+ -+#define HaltStopAndExtTestMask 0xfff001ff -+#define HaltAndStopMask 0x000001ff -+ -+ -+#define DmaComQueueNotEmpty (1<<0) -+#define ThreadComQueueNotEmpty (1<<1) -+#define EventComQueueNotEmpty (1<<2) -+#define DmaComQueueHalfFull (1<<3) -+#define ThreadComQueueHalfFull (1<<4) -+#define EventComQueueHalfFull (1<<5) -+#define DmaComQueueError (1<<6) -+#define ThreadComQueueError (1<<7) -+#define EventComQueueError (1<<8) -+ -+#define ComQueueNotEmpty (DmaComQueueNotEmpty | ThreadComQueueNotEmpty | EventComQueueNotEmpty) -+#define ComQueueError (DmaComQueueError | ThreadComQueueError | EventComQueueError) -+ -+typedef union _E3_DmaInfo -+{ -+ E3_uint32 Value; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 DmaOutputOpen:1; /* The packet is currently open */ -+ E3_uint32 :7; -+ E3_uint32 TimeSliceCount:2; /* Time left to timeslice */ -+ E3_uint32 UseRemotePriv:1; /* Set for remote read dmas */ -+ E3_uint32 DmaLastPacket:1; /* Set for the last packet of a dma */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 AckBufferValid:1; /* Packet ack is valid. */ -+ E3_uint32 :16; /* read as Zero */ -+#else -+ E3_uint32 :16; /* read as Zero */ -+ E3_uint32 AckBufferValid:1; /* Packet ack is valid. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 DmaLastPacket:1; /* Set for the last packet of a dma */ -+ E3_uint32 UseRemotePriv:1; /* Set for remote read dmas */ -+ E3_uint32 TimeSliceCount:2; /* Time left to timeslice */ -+ E3_uint32 :7; -+ E3_uint32 DmaOutputOpen:1; /* The packet is currently open */ -+#endif -+ } s; -+} E3_DmaInfo; -+ -+typedef volatile struct _E3_DmaRds -+{ -+ E3_uint32 DMA_Source4to0AndTwoReads; -+ E3_uint32 pad13; -+ E3_uint32 DMA_BytesToRead; -+ E3_uint32 pad14; -+ E3_uint32 DMA_MinusPacketSize; -+ E3_uint32 pad15; -+ E3_uint32 DMA_MaxMinusPacketSize; -+ E3_uint32 pad16; -+ E3_uint32 DMA_DmaOutputOpen; -+ E3_uint32 pad16a; -+ E3_DmaInfo DMA_PacketInfo; -+ E3_uint32 pad17[7]; -+ E3_uint32 IProcTrapBase; -+ E3_uint32 pad18; -+ E3_uint32 IProcBlockTrapBase; -+ E3_uint32 pad19[11]; -+} E3_DmaRds; -+ -+typedef volatile struct _E3_DmaWrs -+{ -+ E3_uint64 pad0; -+ E3_uint64 LdAlignment; -+ E3_uint64 ResetAckNLdBytesToWr; -+ E3_uint64 SetAckNLdBytesToWr; -+ E3_uint64 LdBytesToRd; -+ E3_uint64 LdDmaType; -+ E3_uint64 SendRoutes; -+ E3_uint64 SendEop; -+ E3_uint64 pad1[8]; -+} E3_DmaWrs; -+ -+typedef volatile struct _E3_Exts -+{ -+ E3_uint32 CurrContext; /* 0x12a00 */ -+ E3_uint32 pad0; -+ E3_Status_Reg DProcStatus; /* 0x12a08 */ -+ E3_uint32 pad1; -+ E3_Status_Reg CProcStatus; /* 0x12a10 */ -+ E3_uint32 pad2; -+ E3_Status_Reg TProcStatus; /* 0x12a18 */ -+ E3_uint32 pad3; -+ E3_IProcStatus_Reg IProcStatus; /* 0x12a20 */ -+ E3_uint32 pad4[3]; -+ -+ E3_uint32 IProcTypeContext; /* 0x12a30 */ -+ E3_uint32 pad5; -+ E3_uint32 IProcTransAddr; /* 0x12a38 */ -+ E3_uint32 pad6; -+ E3_uint32 IProcCurrTransData0; /* 0x12a40 */ -+ E3_uint32 pad7; -+ E3_uint32 IProcCurrTransData1; /* 0x12a48 */ -+ E3_uint32 pad8; -+ -+ E3_uint32 SchCntReg; /* 0x12a50 */ -+ E3_uint32 pad9; -+ E3_uint32 InterruptReg; /* 0x12a58 */ -+ E3_uint32 pad10; -+ E3_uint32 InterruptMask; /* 0x12a60 */ -+ E3_uint32 pad11; -+ E3_uint32 LinkErrorTypes; /* 0x12a68 */ -+ E3_uint32 pad12[3]; -+ E3_uint32 LinkState; /* a read here returens the DataDel value for the */ -+ /* link that has just been defined by a write to */ -+ /* Regs.Exts.SchCntReg.LinkSetValue */ -+ E3_uint32 pad13; -+ -+ union /* 0x12a80 */ -+ { -+ E3_DmaWrs DmaWrs; -+ E3_DmaRds DmaRds; -+ } Dmas; -+} E3_Exts; -+ -+typedef union com_port_entry -+{ -+ E3_uint64 type; -+ struct -+ { -+ E3_uint32 Address; /* Command VAddr */ -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 Context0Issue:1; /* Issue was for context 0 */ -+ E3_uint32 EventNotCommand:1; /* Issue address bit 3 */ -+ E3_uint32 RemoteDesc:1; /* Issue address bit 5 */ -+ E3_uint32 :13; /* read as Zero */ -+ E3_uint32 Context:12; /* Command Context */ -+ E3_uint32 :4; /* read as Zero */ -+#else -+ E3_uint32 :4; /* read as Zero */ -+ E3_uint32 Context:12; /* Command Context */ -+ E3_uint32 :13; /* read as Zero */ -+ E3_uint32 RemoteDesc:1; /* Issue address bit 5 */ -+ E3_uint32 EventNotCommand:1; /* Issue address bit 3 */ -+ E3_uint32 Context0Issue:1; /* Issue was for context 0 */ -+#endif -+ } s; -+} E3_ComPortEntry; -+ -+/* control reg bits */ -+#define CONT_MMU_ENABLE (1 << 0) /* bit 0 enables mmu */ -+#define CONT_ENABLE_8K_PAGES (1 << 1) /* When set smallest page is 8k instead of 4k. */ -+#define CONT_EN_ALL_SETS (1 << 2) /* enable cache */ -+#define CONT_CACHE_LEVEL0 (1 << 3) /* cache context table */ -+#define CONT_CACHE_LEVEL1 (1 << 4) /* cache up level 1 PTD/PTE */ -+#define CONT_CACHE_LEVEL2 (1 << 5) /* cache up level 2 PTD/PTE */ -+#define CONT_CACHE_LEVEL3 (1 << 6) /* cache up level 3 PTD/PTE */ -+#define CONT_CACHE_TRAPS (1 << 7) /* cache up traps */ -+#define CONT_CACHE_LEV0_ROUTES (1 << 8) /* cache up small routes */ -+#define CONT_CACHE_LEV1_ROUTES (1 << 9) /* cache up large routes */ -+#define CONT_CACHE_ALL (CONT_CACHE_LEVEL0 | CONT_CACHE_LEVEL1 | CONT_CACHE_LEVEL2 | \ -+ CONT_CACHE_LEVEL3 | CONT_CACHE_TRAPS | \ -+ CONT_CACHE_LEV0_ROUTES | CONT_CACHE_LEV1_ROUTES) -+ -+#define CONT_SYNCHRONOUS (1 << 10) /* PCI running sync */ -+#define CONT_SER (1 << 11) /* Single bit output (Elan1 SER bit) */ -+#define CONT_SIR (1 << 12) /* Writing 1 resets elan. */ -+ -+#define CONT_PSYCHO_MODE (1 << 13) /* Enables all the perversion required by psycho */ -+#define CONT_ENABLE_ECC (1 << 14) /* Enables error detecting on the ECC */ -+#define CONT_SDRAM_TESTING (1 << 15) /* Switches to test mode for checking EEC data bits */ -+ -+/* defines SDRam CasLatency. Once set will not change again unless reset is reasserted. */ -+/* 1 = Cas Latency is 3, 0 = Cas Latency is 2 */ -+#define CAS_LATENCY_2 (0 << 16) -+#define CAS_LATENCY_3 (1 << 16) -+#define REFRESH_RATE_2US (0 << 17) /* defines 2us SDRam Refresh rate. */ -+#define REFRESH_RATE_4US (1 << 17) /* defines 4us SDRam Refresh rate. */ -+#define REFRESH_RATE_8US (2 << 17) /* defines 8us SDRam Refresh rate. */ -+#define REFRESH_RATE_16US (3 << 17) /* defines 16us SDRam Refresh rate. */ -+ -+#define CONT_PCI_ERR (1 << 19) /* Read 1 if PCI Error */ -+#define CONT_CLEAR_PCI_ERROR (1 << 19) /* Clears an PCI error. */ -+ -+/* Will cause the PCI error bit to become set. This is used to force the threads proc -+ and the uProc to start to stall. */ -+#define CONT_SET_PCI_ERROR (1 << 20) -+ -+/* Writes SDram control reg when set. Also starts SDram memory system refreshing. */ -+#define SETUP_SDRAM (1 << 21) -+ -+/* Flushes the tlb */ -+#define MMU_FLUSH (1 << 22) -+/* and read back when it's finished */ -+#define MMU_FLUSHED (1 << 0) -+ -+/* Clears any ECC error detected by SDRam interface */ -+#define CLEAR_SDRAM_ERROR (1 << 23) -+ -+#define ECC_ADDR_MASK 0x0ffffff8 -+#define ECC_UE_MASK 0x1 -+#define ECC_CE_MASK 0x2 -+#define ECC_ME_MASK 0x4 -+#define ECC_SYN_MASK 0xff -+ -+/* define page table entry bit fields */ -+#define TLB_PageSizeBits (3 << 0) -+#define TLB_ACCBits (7 << 2) -+#define TLB_LocalBit (1 << 5) -+#define TLB_PCI64BitTargetBit (1 << 6) -+#define TLB_PCIBigEndianBit (1 << 7) -+ -+#define TLB_ModifiedBit (1 << 55) -+#define TLB_ReferencedBit (1 << 63) -+ -+/* Used to read values from the tlb. */ -+#define TLB_TlbReadCntBitsSh 56 -+#define TLB_UseSelAddrSh (1ULL << 60) -+#define TLB_WriteTlbLine (1ULL << 61) -+ -+#define TLB_SEL_LINE(LineNo) (TLB_UseSelAddrSh | \ -+ ((E3_uint64)((LineNo) & 0xf) << TLB_TlbReadCntBitsSh)) -+ -+typedef union _E3_CacheContReg -+{ -+ E3_uint32 ContReg; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 MMU_Enable:1; /* wr 1 to enable the MMU */ -+ E3_uint32 Set8kPages:1; /* wr 1 smallest page is 8k. */ -+ E3_uint32 EnableAllSets:1; /* wr 1 All the cache sets are enabled */ -+ E3_uint32 Cache_Level0:1; /* wr 1 lev0 page tabs will be cached */ -+ E3_uint32 Cache_Level1:1; /* wr 1 lev1 page tabs will be cached */ -+ E3_uint32 Cache_Level2:1; /* wr 1 lev2 page tabs will be cached */ -+ E3_uint32 Cache_Level3:1; /* wr 1 lev3 page tabs will be cached */ -+ E3_uint32 Cache_Traps:1; /* wr 1 trap info will be cached */ -+ E3_uint32 Cache_Lev0_Routes:1; /* wr 1 small routes will be cached */ -+ E3_uint32 Cache_Lev1_Routes:1; /* wr 1 big routes will be cached */ -+ E3_uint32 PCI_Synchronous:1; /* Pci and sys clocks are running synchronously*/ -+ E3_uint32 SER:1; /* 1 bit output port */ -+ E3_uint32 SIR:1; /* write 1 will reset elan */ -+ E3_uint32 PsychoMode:1; /* Enables psycho perversion mode. */ -+ E3_uint32 CasLatency:1; /* 1=cas latency=3, 1=cas latency=2 */ -+ E3_uint32 RefreshRate:2; /* 0=2us, 1=4us, 2=8us, 3=16us */ -+ E3_uint32 Pci_Err:1; /* pci error. Write 1 clears err */ -+ E3_uint32 Set_Pci_Error:1; /* Will simulate an Pci error */ -+ E3_uint32 StartSDRam:1; /* Starts the sdram subsystem */ -+ E3_uint32 FlushTlb:1; /* Flush the contence of the tlb */ -+ E3_uint32 :11; -+#else -+ E3_uint32 :11; -+ E3_uint32 FlushTlb:1; /* Flush the contence of the tlb */ -+ E3_uint32 StartSDRam:1; /* Starts the sdram subsystem */ -+ E3_uint32 Set_Pci_Error:1; /* Will simulate an Pci error */ -+ E3_uint32 Pci_Err:1; /* pci error. Write 1 clears err */ -+ E3_uint32 RefreshRate:2; /* 0=2us, 1=4us, 2=8us, 3=16us */ -+ E3_uint32 CasLatency:1; /* 1=cas latency=3, 1=cas latency=2 */ -+ E3_uint32 PsychoMode:1; /* Enables psycho perversion mode. */ -+ E3_uint32 SIR:1; /* write 1 will reset elan */ -+ E3_uint32 SER:1; /* 1 bit output port */ -+ E3_uint32 PCI_Synchronous:1; /* Pci and sys clocks are running synchronously*/ -+ E3_uint32 Cache_Lev1_Routes:1; /* wr 1 big routes will be cached */ -+ E3_uint32 Cache_Lev0_Routes:1; /* wr 1 small routes will be cached */ -+ E3_uint32 Cache_Traps:1; /* wr 1 trap info will be cached */ -+ E3_uint32 Cache_Level3:1; /* wr 1 lev3 page tabs will be cached */ -+ E3_uint32 Cache_Level2:1; /* wr 1 lev2 page tabs will be cached */ -+ E3_uint32 Cache_Level1:1; /* wr 1 lev1 page tabs will be cached */ -+ E3_uint32 Cache_Level0:1; /* wr 1 lev0 page tabs will be cached */ -+ E3_uint32 EnableAllSets:1; /* wr 1 All the cache sets are enabled */ -+ E3_uint32 Set8kPages:1; /* wr 1 smallest page is 8k. */ -+ E3_uint32 MMU_Enable:1; /* wr 1 to enable the MMU */ -+#endif -+ } s; -+} E3_CacheContReg; -+ -+typedef union _E3_TrapBits -+{ -+ volatile E3_uint32 Bits; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 ForcedTProcTrap:1; /* The theads proc has been halted */ -+ E3_uint32 InstAccessException:1; /* An instruction access exception */ -+ E3_uint32 Unimplemented:1; /* Unimplemented instruction executed */ -+ E3_uint32 DataAccessException:1; /* A data access exception */ -+ -+ E3_uint32 ThreadTimeout:1; /* The threads outputer has timed out */ -+ E3_uint32 OpenException:1; /* Invalid sequence of open, sendtr or close */ -+ E3_uint32 OpenRouteFetch:1; /* Fault while fetching routes for previous open*/ -+ E3_uint32 TrapForTooManyInsts:1; /* Thread has been executing for too long */ -+ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ -+ E3_uint32 AckBufferValid:1; /* The PacketAckValue bits are valid */ -+ E3_uint32 OutputWasOpen:1; /* The output was open when tproc trapped */ -+ E3_uint32 TProcDeschedule:2; /* The reason the tproc stopped running. */ -+ E3_uint32 :17; -+#else -+ E3_uint32 :17; -+ E3_uint32 TProcDeschedule:2; /* The reason the tproc stopped running. */ -+ E3_uint32 OutputWasOpen:1; /* The output was open when tproc trapped */ -+ E3_uint32 AckBufferValid:1; /* The PacketAckValue bits are valid */ -+ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ -+ E3_uint32 TrapForTooManyInsts:1; /* Thread has been executing for too long */ -+ E3_uint32 OpenRouteFetch:1; /* Fault while fetching routes for previous open*/ -+ E3_uint32 OpenException:1; /* Invalid sequence of open, sendtr or close */ -+ E3_uint32 ThreadTimeout:1; /* The threads outputer has timed out */ -+ -+ E3_uint32 DataAccessException:1; /* A data access exception */ -+ E3_uint32 Unimplemented:1; /* Unimplemented instruction executed */ -+ E3_uint32 InstAccessException:1; /* An instruction access exception */ -+ E3_uint32 ForcedTProcTrap:1; /* The theads proc has been halted */ -+#endif -+ } s; -+} E3_TrapBits; -+ -+typedef union _E3_DirtyBits -+{ -+ volatile E3_uint32 Bits; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 GlobalsDirty:8; -+ E3_uint32 OutsDirty:8; /* will always read as dirty. */ -+ E3_uint32 LocalsDirty:8; -+ E3_uint32 InsDirty:8; -+#else -+ E3_uint32 InsDirty:8; -+ E3_uint32 LocalsDirty:8; -+ E3_uint32 OutsDirty:8; /* will always read as dirty. */ -+ E3_uint32 GlobalsDirty:8; -+#endif -+ } s; -+} E3_DirtyBits; -+ -+#define E3_TProcDescheduleMask 0x6000 -+#define E3_TProcDescheduleWait 0x2000 -+#define E3_TProcDescheduleSuspend 0x4000 -+#define E3_TProcDescheduleBreak 0x6000 -+ -+#define E3_TrapBitsMask 0x7fff -+ -+#define ThreadRestartFromTrapBit 1 -+#define ThreadReloadAllRegs 2 -+ -+#define E3_PAckOk 0 -+#define E3_PAckTestFail 1 -+#define E3_PAckDiscard 2 -+#define E3_PAckError 3 -+ -+typedef volatile struct _E3_DataBusMap -+{ -+ E3_uint64 Dma_Alignment_Port[8]; /* 0x00002800 */ -+ E3_uint32 pad0[0x30]; /* 0x00002840 */ -+ -+ E3_uint32 Input_Trans0_Data[0x10]; /* 0x00002900 */ -+ E3_uint32 Input_Trans1_Data[0x10]; -+ E3_uint32 Input_Trans2_Data[0x10]; -+ E3_uint32 Input_Trans3_Data[0x10]; -+ -+/* this is the start of the exts directly addressable from the ucode. */ -+ E3_Exts Exts; /* 0x00002a00 */ -+ -+/* this is the start of the registers directly addressable from the ucode. */ -+ E3_DMA Dma_Desc; /* 0x00002b00 */ -+ -+ E3_uint32 Dma_Last_Packet_Size; /* 0x00002b20 */ -+ E3_uint32 Dma_This_Packet_Size; /* 0x00002b24 */ -+ E3_uint32 Dma_Tmp_Source; /* 0x00002b28 */ -+ E3_uint32 Dma_Tmp_Dest; /* 0x00002b2c */ -+ -+ E3_Addr Thread_SP_Save_Ptr; /* points to the thread desched save word. */ -+ E3_uint32 Dma_Desc_Size_InProg; /* 0x00002b34 */ -+ -+ E3_uint32 Thread_Desc_SP; /* 0x00002b38 */ -+ E3_uint32 Thread_Desc_Context; /* 0x00002b3c */ -+ -+ E3_uint32 uCode_TMP[0x10]; /* 0x00002b40 */ -+ -+ E3_uint32 TProc_NonSysCntx_FPtr; /* 0x00002b80 */ -+ E3_uint32 TProc_NonSysCntx_BPtr; /* 0x00002b84 */ -+ E3_uint32 TProc_SysCntx_FPtr; /* 0x00002b88 */ -+ E3_uint32 TProc_SysCntx_BPtr; /* 0x00002b8c */ -+ E3_uint32 DProc_NonSysCntx_FPtr; /* 0x00002b90 */ -+ E3_uint32 DProc_NonSysCntx_BPtr; /* 0x00002b94 */ -+ E3_uint32 DProc_SysCntx_FPtr; /* 0x00002b98 */ -+ E3_uint32 DProc_SysCntx_BPtr; /* 0x00002b9c */ -+ -+ E3_uint32 Input_Trap_Base; /* 0x00002ba0 */ -+ E3_uint32 Input_Queue_Offset; /* 0x00002ba4 */ -+ E3_uint32 CProc_TrapSave_Addr; /* 0x00002ba8 */ -+ E3_uint32 Input_Queue_Addr; /* 0x00002bac */ -+ E3_uint32 uCode_TMP10; /* 0x00002bb0 */ -+ E3_uint32 uCode_TMP11; /* 0x00002bb4 */ -+ E3_uint32 Event_Trace_Ptr; /* 0x00002bb8 */ -+ E3_uint32 Event_Trace_Mask; /* 0x00002bbc */ -+ -+ E3_ComPortEntry DmaComQueue[3]; /* 0x00002bc0 */ -+ -+ E3_uint32 Event_Int_Queue_FPtr; /* 0x00002bd8 */ -+ E3_uint32 Event_Int_Queue_BPtr; /* 0x00002bdc */ -+ -+ E3_ComPortEntry ThreadComQueue[2]; /* 0x00002be0 */ -+ E3_ComPortEntry SetEventComQueue[2]; /* 0x00002bf0 */ -+ -+ E3_uint32 pad1[96]; /* 0x00002c00 */ -+ E3_uint32 ComQueueStatus; /* 0x00002d80 */ -+ E3_uint32 pad2[31]; /* 0x00002d84 */ -+ -+/* These are the internal registers of the threads proc. */ -+ E3_uint32 Globals[8]; /* 0x00002e00 */ -+ E3_uint32 Outs[8]; -+ E3_uint32 Locals[8]; -+ E3_uint32 Ins[8]; -+ -+ E3_uint32 pad3[16]; -+ -+ E3_uint32 IBufferReg[4]; -+ -+ E3_uint32 ExecuteNPC; -+ E3_uint32 ExecutePC; -+ -+ E3_uint32 StartPC; -+ E3_uint32 pad4; -+ -+ E3_uint32 StartnPC; -+ E3_uint32 pad5; -+ -+ E3_TrapBits TrapBits; -+ E3_DirtyBits DirtyBits; -+ E3_uint64 LoadDataReg; -+ E3_uint64 StoreDataReg; -+ -+ E3_uint32 ECC_STATUS0; -+ E3_uint32 ECC_STATUS1; -+ E3_uint32 pad6[0xe]; -+ -+/* Pci slave port regs */ -+ E3_uint32 PciSlaveReadCache[0x10]; -+ -+ E3_uint32 Fault_Base_Ptr; -+ E3_uint32 pad7; -+ E3_uint32 Context_Ptr; -+ E3_uint32 pad8; -+ E3_uint32 Input_Context_Filter; /* write only, No data */ -+ E3_uint32 Input_Context_Fil_Flush; /* write only, No data */ -+ E3_CacheContReg Cache_Control_Reg; -+ E3_uint32 pad9; -+ -+ E3_uint64 Tlb_Line_Value; -+ -+ E3_uint32 Walk_Datareg1; -+ E3_uint32 Walk_VAddr_Tab_Base; -+ E3_uint32 Walk_Datareg; -+ E3_uint32 Walk_ContextReg; -+ E3_uint32 Walk_FaultAddr; -+ E3_uint32 Walk_EventAddr; -+ -+/* outputers output cont ext registers. */ -+ E3_uint64 Dma_Route_012345_Context; -+ E3_uint64 pad10; -+ E3_uint64 Dma_Route_01234567; -+ E3_uint64 Dma_Route_89ABCDEF; -+ -+ E3_uint64 Thread_Route_012345_Context; -+ E3_uint64 pad11; -+ E3_uint64 Thread_Route_01234567; -+ E3_uint64 Thread_Route_89ABCDEF; -+} E3_DataBusMap; -+ -+typedef volatile struct _E3_Regs -+{ -+ E3_CacheSets Sets; /* 0x00000000 */ -+ E3_CacheTags Tags; /* 0x00002000 */ -+ E3_DataBusMap Regs; /* 0x00002800 */ -+ E3_uint32 pad1[0x400]; -+ E3_User_Regs URegs; -+} E3_Regs; -+ -+#define MAX_TRAPPED_TRANS 16 -+#define TRANS_DATA_WORDS 16 -+#define TRANS_DATA_BYTES 64 -+ -+/* -+ * Event interrupt -+ */ -+typedef volatile union _E3_EventInt -+{ -+ E3_uint64 ForceAlign; -+ struct { -+ E3_uint32 IntCookie; -+ E3_uint32 EventContext; /* Bits 16 to 28 */ -+ } s; -+} E3_EventInt; -+ -+#define GET_EVENT_CONTEXT(Ptr) ((Ptr->s.EventContext >> 16) & MAX_ROOT_CONTEXT_MASK) -+ -+typedef volatile union _E3_ThreadQueue -+{ -+ E3_uint64 ForceAlign; -+ struct -+ { -+ E3_Addr Thread; -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 :16; /* Bits 0 to 15 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+#else -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :16; /* Bits 0 to 15 */ -+#endif -+ } s; -+} E3_ThreadQueue; -+ -+typedef volatile union _E3_FaultStatusReg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 AccTypePerm:3; /* Access permission. See below. Bits 0 to 2 */ -+ E3_uint32 AccSize:4; /* Access size. See below for different types. Bits 3 to 6 */ -+ E3_uint32 WrAcc:1; /* Access was a write. Bit 7 */ -+ E3_uint32 NonAllocAcc:1; /* Access was a cache non allocate type. Bit 8 */ -+ E3_uint32 BlkDataType:2; /* Data size used for endian flips. Bits 9 to 10 */ -+ E3_uint32 RdLine:1; /* Access was a dma read line. Bit 11 */ -+ E3_uint32 RdMult:1; /* Access was a dma read multiple. Bit 12 */ -+ E3_uint32 Walking:1; /* The fault occued when walking. Bit 13 */ -+ E3_uint32 Level:2; /* Page table level when the fault occued. Bits 14 to 15 */ -+ E3_uint32 ProtFault:1; /* A protection fault occured. Bit 16 */ -+ E3_uint32 FaultPte:2; /* Page table type when the fault occured. Bit 17 */ -+ E3_uint32 AlignmentErr:1; /* Address alignment did not match the access size. Bit 19 */ -+ E3_uint32 VProcSizeErr:1; /* VProc number is out of range. Bit 20 */ -+ E3_uint32 WalkBadData:1; /* Memory CRC error during a walk. Bit 21 */ -+ E3_uint32 :10; /* Bits 22 to 31 */ -+#else -+ E3_uint32 :10; /* Bits 22 to 31 */ -+ E3_uint32 WalkBadData:1; /* Memory CRC error during a walk. Bit 21 */ -+ E3_uint32 VProcSizeErr:1; /* VProc number is out of range. Bit 20 */ -+ E3_uint32 AlignmentErr:1; /* Address alignment did not match the access size. Bit 19 */ -+ E3_uint32 FaultPte:2; /* Page table type when the fault occured. Bit 17 */ -+ E3_uint32 ProtFault:1; /* A protection fault occured. Bit 16 */ -+ E3_uint32 Level:2; /* Page table level when the fault occued. Bits 14 to 15 */ -+ E3_uint32 Walking:1; /* The fault occued when walking. Bit 13 */ -+ E3_uint32 RdMult:1; /* Access was a dma read multiple. Bit 12 */ -+ E3_uint32 RdLine:1; /* Access was a dma read line. Bit 11 */ -+ E3_uint32 BlkDataType:2; /* Data size used for endian flips. Bits 9 to 10 */ -+ E3_uint32 NonAllocAcc:1; /* Access was a cache non allocate type. Bit 8 */ -+ E3_uint32 WrAcc:1; /* Access was a write. Bit 7 */ -+ E3_uint32 AccSize:4; /* Access size. See below for different types. Bits 3 to 6 */ -+ E3_uint32 AccTypePerm:3; /* Access permission. See below. Bits 0 to 2 */ -+#endif -+ } s; -+} E3_FaultStatusReg; -+ -+typedef union _E3_FaultSave -+{ -+ E3_uint64 ForceAlign; -+ struct { -+ E3_FaultStatusReg FSR; -+ volatile E3_uint32 FaultContext; -+ volatile E3_uint32 FaultAddress; -+ volatile E3_uint32 EventAddress; -+ } s; -+} E3_FaultSave; -+ -+/* MMU fault status reg bit positions. */ -+#define FSR_WritePermBit 0 /* 1=Write access perm, 0=Read access perm */ -+#define FSR_RemotePermBit 1 /* 1=Remote access perm, 0=local access perm */ -+#define FSR_EventPermBit 2 /* 1=Event access perm, 0=data access perm */ -+#define FSR_Size0Bit 3 -+#define FSR_Size1Bit 4 -+#define FSR_Size2Bit 5 -+#define FSR_Size3Bit 6 -+#define FSR_WriteAccBit 7 /* 1=Write access, 0=Read access. */ -+#define FSR_NonAllocBit 8 /* 1=Do not fill cache with this data */ -+#define FSR_BlkDataTy0Bit 9 -+#define FSR_BlkDataTy1Bit 10 -+#define FSR_ReadLineBit 11 -+#define FSR_ReadMultipleBit 12 -+ -+#define FSR_PermMask (0xf << FSR_WritePermBit) -+#define FSR_SizeMask (0xf << FSR_Size0Bit) -+#define FSR_AccTypeMask (3 << FSR_WriteAccBit) -+#define FSR_BlkDataTyMask (3 << FSR_BlkDataTy0Bit) -+#define FSR_PciAccTyMask (3 << FSR_ReadLineBit) -+#define FSR_Walking (0x1 << 13) -+#define FSR_Level_Mask (0x3 << 14) -+#define FSR_ProtFault (0x1 << 16) -+#define FSR_FaultPTEType (0x2 << 17) -+#define FSR_AddrSizeError (0x1 << 19) -+#define FSR_VProcSizeError (0x1 << 20) -+#define FSR_WalkBadData (0x1 << 21) -+ -+#define FSR_PermRead 0 -+#define FSR_PermWrite 1 -+#define FSR_PermRemoteRead 2 -+#define FSR_PermRemoteWrite 3 -+#define FSR_PermEventRd 4 -+#define FSR_PermEventWr 5 -+#define FSR_PermRemoteEventRd 6 -+#define FSR_PermRemoteEventWr 7 -+ -+/* AT size values for each access type */ -+#define FSR_Word (0x0 << FSR_Size0Bit) -+#define FSR_DWord (0x1 << FSR_Size0Bit) -+#define FSR_QWord (0x2 << FSR_Size0Bit) -+#define FSR_Block32 (0x3 << FSR_Size0Bit) -+#define FSR_ReservedBlock (0x6 << FSR_Size0Bit) -+#define FSR_Block64 (0x7 << FSR_Size0Bit) -+#define FSR_GetCntxFilter (0x8 << FSR_Size0Bit) -+#define FSR_QueueDWord (0x9 << FSR_Size0Bit) -+#define FSR_RouteFetch (0xa << FSR_Size0Bit) -+#define FSR_QueueBlock (0xb << FSR_Size0Bit) -+#define FSR_Block32PartWrite (0xe << FSR_Size0Bit) -+#define FSR_Block64PartWrite (0xf << FSR_Size0Bit) -+ -+#define FSR_AllocRead (0 << FSR_WriteAccBit) -+#define FSR_AllocWrite (1 << FSR_WriteAccBit) -+#define FSR_NonAllocRd (2 << FSR_WriteAccBit) -+#define FSR_NonAllocWr (3 << FSR_WriteAccBit) -+ -+#define FSR_TypeByte (0 << FSR_BlkDataTy0Bit) -+#define FSR_TypeHWord (1 << FSR_BlkDataTy0Bit) -+#define FSR_TypeWord (2 << FSR_BlkDataTy0Bit) -+#define FSR_TypeDWord (3 << FSR_BlkDataTy0Bit) -+ -+typedef union E3_TrTypeCntx -+{ -+ E3_uint32 TypeContext; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 Type:16; /* Transaction type field */ -+ E3_uint32 Context:13; /* Transaction context */ -+ E3_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E3_uint32 StatusRegValid:1; /* Bit 30 */ -+ E3_uint32 LastTrappedTrans:1; /* Bit 31 */ -+#else -+ E3_uint32 LastTrappedTrans:1; /* Bit 31 */ -+ E3_uint32 StatusRegValid:1; /* Bit 30 */ -+ E3_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E3_uint32 Context:13; /* Transaction context */ -+ E3_uint32 Type:16; /* Transaction type field */ -+#endif -+ } s; -+} E3_TrTypeCntx; -+ -+#define GET_TRAP_TYPE(Ptr) (Ptr.TypeContext & 0xfff) -+#define GET_TRAP_CONTEXT(Ptr) ((Ptr.TypeContext >> 16) & 0x1fff) -+ -+/* Words have been swapped for big endian access when fetched with dword access from elan.*/ -+typedef union _E3_IprocTrapHeader -+{ -+ E3_uint64 forceAlign; -+ -+ struct -+ { -+ E3_TrTypeCntx TrTypeCntx; -+ E3_uint32 TrAddr; -+ E3_uint32 TrData0; -+ union -+ { -+ E3_IProcStatus_Reg u_IProcStatus; -+ E3_uint32 u_TrData1; -+ } ipsotd; -+ } s; -+} E3_IprocTrapHeader; -+ -+#define IProcTrapStatus ipsotd.u_IProcStatus -+#define TrData1 ipsotd.u_TrData1 -+ -+typedef struct E3_IprocTrapData -+{ -+ E3_uint32 TrData[TRANS_DATA_WORDS]; -+} E3_IprocTrapData; -+ -+/* -+ * 64 kbytes of elan local memory. Must be aligned on a 64k boundary -+ */ -+#define E3_NonSysCntxQueueSize 0x400 -+#define E3_SysCntxQueueSize 0x100 -+ -+typedef struct _E3_TrapAndQueue -+{ -+ E3_DMA NonSysCntxDmaQueue[E3_NonSysCntxQueueSize]; /* 0x000000 */ -+ E3_DMA SysCntxDmaQueue[E3_SysCntxQueueSize]; /* 0x008000 */ -+ E3_EventInt EventIntQueue[E3_NonSysCntxQueueSize]; /* 0x00A000 */ -+ E3_ThreadQueue NonSysCntxThreadQueue[E3_NonSysCntxQueueSize]; /* 0x00C000 */ -+ E3_ThreadQueue SysCntxThreadQueue[E3_SysCntxQueueSize]; /* 0x00E000 */ -+ E3_FaultSave IProcSysCntx; /* 0x00E800 */ -+ E3_Addr Thread_SP_Save; /* 0x00E810 */ -+ E3_uint32 dummy0[3]; /* 0x00E814 */ -+ E3_FaultSave ThreadProcData; /* 0x00E820 */ -+ E3_FaultSave ThreadProcInst; /* 0x00E830 */ -+ E3_FaultSave dummy1[2]; /* 0x00E840 */ -+ E3_FaultSave ThreadProcOpen; /* 0x00E860 */ -+ E3_FaultSave dummy2; /* 0x00E870 */ -+ E3_FaultSave IProcNonSysCntx; /* 0x00E880 */ -+ E3_FaultSave DProc; /* 0x00E890 */ -+ E3_FaultSave CProc; /* 0x00E8A0 */ -+ E3_FaultSave TProc; /* 0x00E8B0 */ -+ E3_FaultSave DProcData0; /* 0x00E8C0 */ -+ E3_FaultSave DProcData1; /* 0x00E8D0 */ -+ E3_FaultSave DProcData2; /* 0x00E8E0 */ -+ E3_FaultSave DProcData3; /* 0x00E8F0 */ -+ E3_uint32 dummy3[0xc0]; /* 0x00E900 */ -+ E3_IprocTrapHeader VCh0_C0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh0_NonC0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh1_C0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh1_NonC0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh0_C0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh0_NonC0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh1_C0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh1_NonC0_TrData[MAX_TRAPPED_TRANS]; -+ E3_uint64 DmaOverflowQueueSpace[0x1000]; -+ E3_uint64 ThreadOverflowQueueSpace[0x800]; -+ E3_uint64 EventOverflowQueueSpace[0x800]; -+} E3_TrapAndQueue; -+ -+ -+typedef struct _E3_ContextControlBlock -+{ -+ E3_uint32 rootPTP; -+ E3_uint32 filter; -+ E3_uint32 VPT_ptr; -+ E3_uint32 VPT_mask; -+} E3_ContextControlBlock; -+ -+#define E3_CCB_CNTX0 (0x20000000) -+#define E3_CCB_DISCARD_ALL (0x40000000) -+#define E3_CCB_ACKOK_ALL (0x80000000) -+#define E3_CCB_MASK (0xc0000000) -+ -+#define E3_NUM_CONTEXT_0 (0x20) -+ -+/* Macros to manipulate event queue pointers */ -+/* generate index in EventIntQueue */ -+#define E3_EVENT_INTQ_INDEX(fptr) (((fptr) & 0x1fff) >> 3) -+/* generate next fptr */ -+#define E3_EVENT_INTQ_NEXT(fptr) ((((fptr) + 8) & ~0x4000) | 0x2000) -+ -+ -+#endif /* notdef _ELAN3_ELANREGS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elansyscall.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elansyscall.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elansyscall.h 2005-06-01 23:12:54.724420104 -0400 -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANSYSCALL_H -+#define __ELAN3_ELANSYSCALL_H -+ -+#ident "$Id: elansyscall.h,v 1.34 2004/06/07 13:50:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elansyscall.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#ifndef _ASM -+ -+typedef struct sys_word_item -+{ -+ struct sys_word_item *Next; -+ E3_uint32 Value; -+} SYS_WORD_ITEM; -+ -+typedef struct sys_block_item -+{ -+ struct sys_block_item *Next; -+ E3_uint32 *Pointer; -+} SYS_BLOCK_ITEM; -+ -+typedef struct sys_swap_space -+{ -+ int Magic; -+ void *ItemListsHead[MAX_LISTS]; -+ void **ItemListsTailp[MAX_LISTS]; -+} SYS_SWAP_SPACE; -+ -+typedef struct sys_exception -+{ -+ int Type; -+ int Proc; -+ u_long Res; -+ u_long Value; -+ E3_FaultSave_BE FaultArea; -+ -+ union -+ { -+ DMA_TRAP Dma; -+ THREAD_TRAP Thread; -+ COMMAND_TRAP Command; -+ INPUT_TRAP Input; -+ } Union; -+} SYS_EXCEPTION; -+ -+typedef struct sys_exception_space -+{ -+ struct sys_exception_space *Next; -+ int Magic; -+ int Front; -+ int Back; -+ int Count; -+ int Overflow; -+ SYS_EXCEPTION Exceptions[1]; -+} SYS_EXCEPTION_SPACE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct sys_ctxt -+{ -+ SYS_SWAP_SPACE *Swap; -+ SYS_EXCEPTION_SPACE *Exceptions; -+ kmutex_t Lock; -+ -+ spinlock_t WaitLock; -+ kcondvar_t NetworkErrorWait; -+ -+ int Armed; -+ int Backoff; -+ long Time; -+ -+ u_long Flags; -+ int signal; -+ -+ EVENT_COOKIE_TABLE *Table; -+} SYS_CTXT; -+ -+extern SYS_CTXT *sys_init (ELAN3_CTXT *ctxt); -+extern int sys_waitevent (ELAN3_CTXT *ctxt, E3_Event *event); -+extern void sys_addException (SYS_CTXT *sctx, int type, int proc, caddr_t ptr, int size, -+ E3_FaultSave_BE *, u_long res, u_long value); -+extern int sys_getException (SYS_CTXT *sctx, SYS_EXCEPTION *ex); -+ -+/* returns -ve error or ELAN_CAP_OK or ELAN_CAP_RMS */ -+/* use = ELAN_USER_ATTACH, ELAN_USER_P2P, ELAN_USER_BROADCAST */ -+extern int elan3_validate_cap (ELAN3_DEV *dev, ELAN_CAPABILITY *cap ,int use); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _ASM */ -+ -+/* values for "Flags" */ -+#define ELAN3_SYS_FLAG_DMA_BADVP 1 -+#define ELAN3_SYS_FLAG_THREAD_BADVP 2 -+#define ELAN3_SYS_FLAG_DMAFAIL 4 -+#define ELAN3_SYS_FLAG_NETERR 8 -+ -+#define SYS_SWAP_MAGIC 0xB23C52DF -+#define SYS_EXCEPTION_MAGIC 0xC34D63E0 -+ -+#define EXCEPTION_GLOBAL_STRING "elan3_exceptions" -+#define EXCEPTION_ABORT_STRING "elan3_abortstring" -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELANSYSCALL_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elanuregs.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elanuregs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elanuregs.h 2005-06-01 23:12:54.725419952 -0400 -@@ -0,0 +1,295 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANUREGS_H -+#define __ELAN3_ELANUREGS_H -+ -+#ident "$Id: elanuregs.h,v 1.10 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanuregs.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Statistic control reg values -+ * Each 4-bit nibble of the control word specifies what statistic -+ * is to be recorded in each of the 8 statistic counters -+ */ -+ -+/* Count reg 0 */ -+#define STC_INPUT_TRANSACTIONS 0 -+#define STP_DMA_EOP_WAIT_ACK 1 -+#define STP_THREAD_RUNNING 2 -+#define STP_UCODE_WAIT_MEM 3 -+#define STC_CACHE_WRITE_BACKS 4 -+#define STC_PCI_SLAVE_READS 5 -+#define STC_REG0_UNUSED6 6 -+#define STP_REG0_UNUSED7 7 -+ -+#define STATS_REG0_NAMES { \ -+ "STC_INPUT_TRANSACTIONS", \ -+ "STP_DMA_EOP_WAIT_ACK", \ -+ "STP_THREAD_RUNNING", \ -+ "STP_UCODE_WAIT_MEM", \ -+ "STC_CACHE_WRITE_BACKS", \ -+ "STC_PCI_SLAVE_READS", \ -+ "STC_REG0_UNUSED6", \ -+ "STP_REG0_UNUSED7" \ -+} -+ -+/* Count reg 1 */ -+#define STC_INPUT_WRITE_BLOCKS (0 << 4) -+#define STP_DMA_DATA_TRANSMITTING (1 << 4) -+#define STP_THEAD_WAITING_INST (2 << 4) -+#define STC_REG1_UNUSED3 (3 << 4) -+#define STP_FETCHING_ROUTES (4 << 4) -+#define STC_REG1_UNUSED5 (5 << 4) -+#define STC_PCI_SLAVE_WRITES (6 << 4) -+#define STP_PCI_SLAVE_READ_WAITING (7 << 4) -+ -+#define STATS_REG1_NAMES { \ -+ "STC_INPUT_WRITE_BLOCKS", \ -+ "STP_DMA_DATA_TRANSMITTING", \ -+ "STP_THEAD_WAITING_INST", \ -+ "STC_REG1_UNUSED3", \ -+ "STP_FETCHING_ROUTES", \ -+ "STC_REG1_UNUSED5", \ -+ "STC_PCI_SLAVE_WRITES", \ -+ "STP_PCI_SLAVE_READ_WAITING" \ -+} -+ -+/* Count reg 2 */ -+#define STC_INPUT_PKTS (0 << 8) -+#define STP_DMA_WAITING_MEM (1 << 8) -+#define STP_THREAD_WAIT_OPEN_PKT (2 << 8) -+#define STC_REG2_UNUSED3 (3 << 8) -+#define STC_ROUTE_FETCHES (4 << 8) -+#define STC_CACHE_NON_ALLOC_MISSES (5 << 8) -+#define STC_REG2_UNUSED6 (6 << 8) -+#define STP_PCI_SLAVE_WRITE_WAITING (7 << 8) -+ -+#define STATS_REG2_NAMES { \ -+ "STC_INPUT_PKTS", \ -+ "STP_DMA_WAITING_MEM", \ -+ "STP_THREAD_WAIT_OPEN_PKT", \ -+ "STC_REG2_UNUSED3", \ -+ "STC_ROUTE_FETCHES", \ -+ "STC_CACHE_NON_ALLOC_MISSES", \ -+ "STC_REG2_UNUSED6", \ -+ "STP_PCI_SLAVE_WRITE_WAITING" \ -+} -+ -+/* Count reg 3 */ -+#define STC_INPUT_PKTS_REJECTED (0 << 12) -+#define STP_DMA_WAIT_NETWORK_BUSY (1 << 12) -+#define STP_THREAD_WAIT_PACK (2 << 12) -+#define STP_UCODE_BLOCKED_UCODE (3 << 12) -+#define STC_TLB_HITS (4 << 12) -+#define STC_REG3_UNUSED5 (5 << 12) -+#define STC_PCI_MASTER_READS (6 << 12) -+#define STP_PCI_MASTER_WRITE_WAITING (7 << 12) -+ -+#define STATS_REG3_NAMES { \ -+ "STC_INPUT_PKTS_REJECTED", \ -+ "STP_DMA_WAIT_NETWORK_BUSY", \ -+ "STP_THREAD_WAIT_PACK", \ -+ "STP_UCODE_BLOCKED_UCODE", \ -+ "STC_TLB_HITS", \ -+ "STC_REG3_UNUSED5", \ -+ "STC_PCI_MASTER_READS", \ -+ "STP_PCI_MASTER_WRITE_WAITING"\ -+} -+ -+/* Count reg 4 */ -+#define STP_INPUT_DATA_TRANSMITTING (0 << 16) -+#define STC_DMA_NON_CTX0_PKTS (1 << 16) -+#define STP_THREAD_EOP_WAIT_ACK (2 << 16) -+#define STP_UCODE_DPROC_RUNNING (3 << 16) -+#define STC_TLB_MEM_WALKS (4 << 16) -+#define STC_REG4_UNUSED5 (5 << 16) -+#define STC_PCI_MASTER_WRITES (6 << 16) -+#define STP_PCI_MASTER_READ_WAITING (7 << 16) -+ -+#define STATS_REG4_NAMES { \ -+ "STP_INPUT_DATA_TRANSMITTING", \ -+ "STC_DMA_NON_CTX0_PKTS", \ -+ "STP_THREAD_EOP_WAIT_ACK", \ -+ "STP_UCODE_DPROC_RUNNING", \ -+ "STC_TLB_MEM_WALKS", \ -+ "STC_REG4_UNUSED5", \ -+ "STC_PCI_MASTER_WRITES", \ -+ "STP_PCI_MASTER_READ_WAITING" \ -+} -+ -+/* Count reg 5 */ -+#define STP_INPUT_WAITING_NETWORK_DATA (0 << 20) -+#define STC_DMA_NON_CTX0_PKTS_REJECTED (1 << 20) -+#define STP_THREAD_WAITING_DATA (2 << 20) -+#define STP_UCODE_CPROC_RUNNING (3 << 20) -+#define STP_THREAD_TRANSMITTING_DATA (4 << 20) -+#define STP_PCI_WAITING_MAIN (5 << 20) -+#define STC_REG5_UNUSED6 (6 << 20) -+#define STC_REG5_UNUSED7 (7 << 20) -+ -+#define STATS_REG5_NAMES { \ -+ "STP_INPUT_WAITING_NETWORK_DATA", \ -+ "STC_DMA_NON_CTX0_PKTS_REJECTED", \ -+ "STP_THREAD_WAITING_DATA", \ -+ "STP_UCODE_CPROC_RUNNING", \ -+ "STP_THREAD_TRANSMITTING_DATA", \ -+ "STP_PCI_WAITING_MAIN", \ -+ "STC_REG5_UNUSED6", \ -+ "STC_REG5_UNUSED7" \ -+} -+ -+/* Count reg 6 */ -+#define STP_INPUT_WAITING_MEMORY (0 << 24) -+#define STC_DMA_CTX0_PKTS (1 << 24) -+#define STP_THREAD_WAITING_MEMORY (2 << 24) -+#define STP_UCODE_TPROC_RUNNING (3 << 24) -+#define STC_CACHE_HITS (4 << 24) -+#define STP_PCI_WAITING_ELAN (5 << 24) -+#define STC_REG6_UNUSED4 (6 << 24) -+#define STC_REG6_UNUSED7 (7 << 24) -+ -+#define STATS_REG6_NAMES { \ -+ "STP_INPUT_WAITING_MEMORY", \ -+ "STC_DMA_CTX0_PKTS", \ -+ "STP_THREAD_WAITING_MEMORY", \ -+ "STP_UCODE_TPROC_RUNNING", \ -+ "STC_CACHE_HITS", \ -+ "STP_PCI_WAITING_ELAN", \ -+ "STC_REG6_UNUSED4", \ -+ "STC_REG6_UNUSED7" \ -+} -+ -+/* Count reg 7 */ -+#define STC_INPUT_CTX_FILTER_FILL (0 << 28) -+#define STC_DMA_CTX0_PKTS_REJECTED (1 << 28) -+#define STP_THREAD_WAIT_NETWORK_BUSY (2 << 28) -+#define STP_UCODE_IPROC_RUNNING (3 << 28) -+#define STP_TLB_MEM_WALKING (4 << 28) -+#define STC_CACHE_ALLOC_MISSES (5 << 28) -+#define STP_PCI_DATA_TRANSFER (6 << 28) -+#define STC_REG7_UNUSED7 (7 << 28) -+ -+#define STATS_REG7_NAMES { \ -+ "STC_INPUT_CTX_FILTER_FILL", \ -+ "STC_DMA_CTX0_PKTS_REJECTED", \ -+ "STP_THREAD_WAIT_NETWORK_BUSY",\ -+ "STP_UCODE_IPROC_RUNNING", \ -+ "STP_TLB_MEM_WALKING", \ -+ "STC_CACHE_ALLOC_MISSES", \ -+ "STP_PCI_DATA_TRANSFER", \ -+ "STC_REG7_UNUSED7" \ -+} -+ -+#define STATS_REG_NAMES { \ -+ STATS_REG0_NAMES, \ -+ STATS_REG1_NAMES, \ -+ STATS_REG2_NAMES, \ -+ STATS_REG3_NAMES, \ -+ STATS_REG4_NAMES, \ -+ STATS_REG5_NAMES, \ -+ STATS_REG6_NAMES, \ -+ STATS_REG7_NAMES, \ -+} -+ -+extern const char *elan3_stats_names[8][8]; -+ -+#define ELAN3_STATS_NAME(COUNT, CONTROL) (elan3_stats_names[(COUNT)][(CONTROL) & 7]) -+ -+typedef volatile union e3_StatsControl -+{ -+ E3_uint32 StatsControl; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 StatCont0:4; -+ E3_uint32 StatCont1:4; -+ E3_uint32 StatCont2:4; -+ E3_uint32 StatCont3:4; -+ E3_uint32 StatCont4:4; -+ E3_uint32 StatCont5:4; -+ E3_uint32 StatCont6:4; -+ E3_uint32 StatCont7:4; -+#else -+ E3_uint32 StatCont7:4; -+ E3_uint32 StatCont6:4; -+ E3_uint32 StatCont5:4; -+ E3_uint32 StatCont4:4; -+ E3_uint32 StatCont3:4; -+ E3_uint32 StatCont2:4; -+ E3_uint32 StatCont1:4; -+ E3_uint32 StatCont0:4; -+#endif -+ } s; -+} E3_StatsControl; -+ -+typedef volatile union e3_StatsCount -+{ -+ E3_uint64 ClockStat; -+ struct -+ { -+ E3_uint32 ClockLSW; /* read only */ -+ E3_uint32 StatsCount; -+ } s; -+} E3_StatsCount; -+ -+typedef volatile union e3_clock -+{ -+ E3_uint64 NanoSecClock; -+ struct -+ { -+ E3_uint32 ClockLSW; -+ E3_uint32 ClockMSW; -+ } s; -+} E3_Clock; -+#define E3_TIME( X ) ((X).NanoSecClock) -+ -+typedef volatile struct _E3_User_Regs -+{ -+ E3_StatsCount StatCounts[8]; -+ E3_StatsCount InstCount; -+ E3_uint32 pad0; -+ E3_StatsControl StatCont; -+ E3_Clock Clock; -+ E3_uint32 pad1[0x7ea]; -+} E3_User_Regs; -+ -+typedef volatile struct _E3_CommandPort -+{ -+ E3_Addr PutDma; /* 0x000 */ -+ E3_uint32 Pad1; -+ E3_Addr GetDma; /* 0x008 */ -+ E3_uint32 Pad2; -+ E3_Addr RunThread; /* 0x010 */ -+ E3_uint32 Pad3[3]; -+ E3_Addr WaitEvent0; /* 0x020 */ -+ E3_uint32 Pad4; -+ E3_Addr WaitEvent1; /* 0x028 */ -+ E3_uint32 Pad5; -+ E3_Addr SetEvent; /* 0x030 */ -+ E3_uint32 Pad6[3]; -+ E3_uint32 Pad7[0x7f0]; /* Fill out to an 8K page */ -+} E3_CommandPort; -+/* Should have the new structures for the top four pages of the elan3 space */ -+ -+#define E3_COMMANDPORT_SIZE (sizeof (E3_CommandPort)) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELANUREGS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/elanvp.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/elanvp.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/elanvp.h 2005-06-01 23:12:54.726419800 -0400 -@@ -0,0 +1,165 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANVP_H -+#define _ELAN3_ELANVP_H -+ -+#ident "$Id: elanvp.h,v 1.45 2004/06/18 09:28:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanvp.h,v $ */ -+ -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Context number allocation. -+ * [0-31] system contexts -+ * [32-63] hardware test -+ * [64-1023] available -+ * [1024-2047] RMS allocatable -+ * [2048-4095] kernel comms data contexts -+ */ -+#define ELAN3_KCOMM_CONTEXT_NUM 0x001 /* old kernel comms context (system) */ -+#define ELAN3_CM_CONTEXT_NUM 0x002 /* new cluster member ship comms context (system) */ -+#define ELAN3_MRF_CONTEXT_NUM 0x003 /* multi-rail kernel comms context */ -+#define ELAN3_DMARING_BASE_CONTEXT_NUM 0x010 /* 16 contexts for dma ring issue (system) */ -+#define ELAN3_DMARING_TOP_CONTEXT_NUM 0x01f -+ -+#define ELAN3_HWTEST_BASE_CONTEXT_NUM 0x020 /* reserved for hardware test */ -+#define ELAN3_HWTEST_TOP_CONTEXT_NUM 0x03f -+ -+#define ELAN3_KCOMM_BASE_CONTEXT_NUM 0x800 /* kernel comms data transfer contexts */ -+#define ELAN3_KCOMM_TOP_CONTEXT_NUM 0xfff -+ -+#define ELAN3_HWTEST_CONTEXT(ctx) ((ctx) >= ELAN3_HWTEST_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN3_HWTEST_TOP_CONTEXT_NUM) -+ -+#define ELAN3_SYSTEM_CONTEXT(ctx) (((ctx) & SYS_CONTEXT_BIT) != 0 || \ -+ (ctx) < E3_NUM_CONTEXT_0 || \ -+ (ctx) >= ELAN3_KCOMM_BASE_CONTEXT_NUM) -+ -+/* Maximum number of virtual processes */ -+#define ELAN3_MAX_VPS (16384) -+ -+#define ELAN3_INVALID_PROCESS (0x7fffffff) /* A GUARANTEED invalid process # */ -+#define ELAN3_INVALID_NODE (0xFFFF) -+#define ELAN3_INVALID_CONTEXT (0xFFFF) -+ -+ -+ -+#if defined(__KERNEL__) && !defined(__ELAN3__) -+ -+/* -+ * Contexts are accessible via Elan capabilities, -+ * for each context that can be "attached" to there -+ * is a ELAN3_CTXT_INFO structure created by its -+ * "owner". This also "remembers" all remote -+ * segments that have "blazed" a trail to it. -+ * -+ * If the "owner" goes away the soft info is -+ * destroyed when it is no longer "attached" or -+ * "referenced" by a remote segment. -+ * -+ * If the owner changes the capability, then -+ * the soft info must be not "referenced" or -+ * "attached" before a new process can "attach" -+ * to it. -+ */ -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::InfoLock, -+ elan3_info::Next elan3_info::Prev elan3_info::Device elan3_info::Owner -+ elan3_info::Capability elan3_info::AttachedCapability elan3_info::Context)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3_info::Nacking elan3_info::Disabled)) -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_info::Context elan3_info::Device elan3_info::Capability)) -+ -+#endif /* __KERNEL__ */ -+ -+#define LOW_ROUTE_PRIORITY 0 -+#define HIGH_ROUTE_PRIORITY 1 -+ -+#define DEFAULT_ROUTE_TIMEOUT 3 -+#define DEFAULT_ROUTE_PRIORITY LOW_ROUTE_PRIORITY -+ -+ -+/* a small route is 4 flits (8 bytes), a big route */ -+/* is 8 flits (16 bytes) - each packed route is 4 bits */ -+/* so giving us a maximum of 28 as flit0 does not contain */ -+/* packed routes */ -+#define MAX_FLITS 8 -+#define MAX_PACKED 28 -+ -+/* bit definitions for 64 bit route pointer */ -+#define ROUTE_VALID (1ULL << 63) -+#define ROUTE_PTR (1ULL << 62) -+#define ROUTE_CTXT_SHIFT 48 -+#define ROUTE_PTR_MASK ((1ull << ROUTE_CTXT_SHIFT)-1) -+#define ROUTE_GET_CTXT ((VAL >> ROUTE_CTXT_SHIFT) & 0x3fff ) -+ -+#define SMALL_ROUTE(flits, context) (((E3_uint64) (flits)[0] << 0) | ((E3_uint64) (flits)[1] << 16) | \ -+ ((E3_uint64) (flits)[2] << 32) | ((E3_uint64) (context) << ROUTE_CTXT_SHIFT) | \ -+ ROUTE_VALID) -+ -+#define BIG_ROUTE_PTR(paddr, context) ((E3_uint64) (paddr) | ((E3_uint64) context << ROUTE_CTXT_SHIFT) | ROUTE_VALID | ROUTE_PTR) -+ -+#define BIG_ROUTE0(flits) (((E3_uint64) (flits)[0] << 0) | ((E3_uint64) (flits)[1] << 16) | \ -+ ((E3_uint64) (flits)[2] << 32) | ((E3_uint64) (flits)[3] << 48)) -+#define BIG_ROUTE1(flits) (((E3_uint64) (flits)[4] << 0) | ((E3_uint64) (flits)[5] << 16) | \ -+ ((E3_uint64) (flits)[6] << 32) | ((E3_uint64) (flits)[7] << 48)) -+ -+ -+/* defines for first flit of a route */ -+#define FIRST_HIGH_PRI (1 << 15) -+#define FIRST_AGE(Val) ((Val) << 11) -+#define FIRST_TIMEOUT(Val) ((Val) << 9) -+#define FIRST_PACKED(X) ((X) << 7) -+#define FIRST_ROUTE(Val) (Val) -+#define FIRST_ADAPTIVE (0x30) -+#define FIRST_BCAST_TREE (0x20) -+#define FIRST_MYLINK (0x10) -+#define FIRST_BCAST(Top, Bot) (0x40 | ((Top) << 3) | (Bot)) -+ -+/* defines for 3 bit packed entries for subsequent flits */ -+#define PACKED_ROUTE(Val) (8 | (Val)) -+#define PACKED_ADAPTIVE (3) -+#define PACKED_BCAST_TREE (2) -+#define PACKED_MYLINK (1) -+#define PACKED_BCAST0(Top,Bot) (4 | (Bot & 3)) -+#define PACKED_BCAST1(Top,Bot) ((Top << 1) | (Bot >> 2)) -+ -+/* ---------------------------------------------------------- -+ * elan3_route functions -+ * return ELAN3_ROUTE_xxx codes -+ * ---------------------------------------------------------- */ -+ -+#define ELAN3_ROUTE_SUCCESS (0x00) -+#define ELAN3_ROUTE_SYSCALL_FAILED (0x01) -+#define ELAN3_ROUTE_INVALID (0x02) -+#define ELAN3_ROUTE_TOO_LONG (0x04) -+#define ELAN3_ROUTE_LOAD_FAILED (0x08) -+#define ELAN3_ROUTE_PROC_RANGE (0x0f) -+#define ELAN3_ROUTE_INVALID_LEVEL (0x10) -+#define ELAN3_ROUTE_OCILATES (0x20) -+#define ELAN3_ROUTE_WRONG_DEST (0x40) -+#define ELAN3_ROUTE_TURN_LEVEL (0x80) -+#define ELAN3_ROUTE_NODEID_UNKNOWN (0xf0) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _ELAN3_ELANVP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/events.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/events.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/events.h 2005-06-01 23:12:54.726419800 -0400 -@@ -0,0 +1,183 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_EVENTS_H -+#define _ELAN3_EVENTS_H -+ -+#ident "$Id: events.h,v 1.45 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/events.h,v $*/ -+ -+/* -+ * Alignments for events, event queues and blockcopy blocks. -+ */ -+#define E3_EVENT_ALIGN (8) -+#define E3_QUEUE_ALIGN (32) -+#define E3_BLK_ALIGN (64) -+#define E3_BLK_SIZE (64) -+#define E3_BLK_PATTERN (0xfeedface) -+ -+#define E3_EVENT_FREE ((0 << 4) | EV_WCOPY) -+#define E3_EVENT_PENDING ((1 << 4) | EV_WCOPY) -+#define E3_EVENT_ACTIVE ((2 << 4) | EV_WCOPY) -+#define E3_EVENT_FIRED ((3 << 4) | EV_WCOPY) -+#define E3_EVENT_FAILED ((4 << 4) | EV_WCOPY) -+#define E3_EVENT_DONE ((5 << 4) | EV_WCOPY) -+#define E3_EVENT_PRIVATE ((6 << 4) | EV_WCOPY) -+ -+/* -+ * Event values and masks -+ * -+ * Block Copy event xxxxxxxxxxxxxxxx1 -+ * Chained event 30 bit ptr ....0x -+ * Event interrupt 29 bit cookie 01x -+ * Dma event 28 bit ptr 011x -+ * thread event 28 bit ptr 111x -+ */ -+#define EV_CLEAR (0x00000000) -+#define EV_TYPE_BCOPY (0x00000001) -+#define EV_TYPE_CHAIN (0x00000000) -+#define EV_TYPE_EVIRQ (0x00000002) -+#define EV_TYPE_DMA (0x00000006) -+#define EV_TYPE_THREAD (0x0000000e) -+ -+#define EV_TYPE_BCOPY_BYTE (0) -+#define EV_TYPE_BCOPY_HWORD (1) -+#define EV_TYPE_BCOPY_WORD (2) -+#define EV_TYPE_BCOPY_DWORD (3) -+ -+/* -+ * Data type is in the lowest two bits of the Dest pointer. -+ */ -+#define EV_BCOPY_DTYPE_MASK (3) -+#define EV_WCOPY (1) /* [DestWord] = Source */ -+#define EV_BCOPY (0) /* [DestBlock] = [SourceBlock] */ -+ -+#define EV_TYPE_MASK (0x0000000e) -+#define EV_TYPE_MASK_BCOPY (0x00000001) -+#define EV_TYPE_MASK_CHAIN (0x00000002) -+#define EV_TYPE_MASK_EVIRQ (0x00000006) -+#define EV_TYPE_MASK_DMA (0x0000000e) -+#define EV_TYPE_MASK_THREAD (0x0000000e) -+#define EV_TYPE_MASK2 (0x0000000f) -+ -+/* -+ * Min/Max size for Elan queue entries -+ */ -+#define E3_QUEUE_MIN E3_BLK_SIZE -+#define E3_QUEUE_MAX (E3_BLK_SIZE * 5) -+ -+/* -+ * Elan queue state bits -+ */ -+#define E3_QUEUE_FULL (1<<0) -+#define E3_QUEUE_LOCKED (1<<8) -+ -+#ifndef _ASM -+ -+typedef union _E3_Event -+{ -+ E3_uint64 ev_Int64; -+ struct { -+ volatile E3_int32 u_Count; -+ E3_uint32 u_Type; -+ } ev_u; -+} E3_Event; -+ -+typedef union _E3_BlockCopyEvent -+{ -+ E3_uint64 ev_ForceAlign; -+ struct E3_BlockCopyEvent_u { -+ volatile E3_int32 u_Count; -+ E3_uint32 u_Type; -+ E3_Addr u_Source; -+ E3_Addr u_Dest; /* lowest bits are the data type for endian conversion */ -+ } ev_u; -+} E3_BlockCopyEvent; -+ -+#define ev_Type ev_u.u_Type -+#define ev_Count ev_u.u_Count -+#define ev_Source ev_u.u_Source -+#define ev_Dest ev_u.u_Dest -+ -+typedef union _E3_WaitEvent0 -+{ -+ E3_uint64 we_ForceAlign; -+ struct { -+ E3_Addr u_EventLoc; -+ E3_int32 u_WaitCount; -+ } we_u; -+} E3_WaitEvent0; -+#define we_EventLoc we_u.u_EventLoc -+#define we_WaitCount we_u.u_WaitCount -+ -+typedef union _E3_Event_Blk -+{ -+ E3_uint8 eb_Bytes[E3_BLK_SIZE]; -+ E3_uint32 eb_Int32[E3_BLK_SIZE/sizeof (E3_uint32)]; -+ E3_uint64 eb_Int64[E3_BLK_SIZE/sizeof (E3_uint64)]; -+} E3_Event_Blk; -+ -+/* We make eb_done the last word of the blk -+ * so that we can guarantee the rest of the blk is -+ * correct when this value is set. -+ * However, when the TPORT code copies the envelope -+ * info into the blk, it uses a dword endian type. -+ * Thus we must correct for this when initialising -+ * the pattern in the Elan SDRAM blk (eeb_done) -+ */ -+#define eb_done eb_Int32[15] -+#define eeb_done eb_Int32[15^WordEndianFlip] -+ -+#define EVENT_WORD_READY(WORD) (*((volatile E3_uint32 *) WORD) != 0) -+#define EVENT_BLK_READY(BLK) (((volatile E3_Event_Blk *) (BLK))->eb_done != 0) -+#define EVENT_READY(EVENT) (((volatile E3_Event *) (EVENT))->ev_Count <= 0) -+ -+#define ELAN3_WAIT_EVENT (0) -+#define ELAN3_POLL_EVENT (-1) -+ -+#define SETUP_EVENT_TYPE(ptr,typeval) (((unsigned long)(ptr)) | (typeval)) -+ -+#define E3_RESET_BCOPY_BLOCK(BLK) \ -+ do { \ -+ (BLK)->eb_done = 0; \ -+ } while (0) -+ -+typedef struct e3_queue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_Event q_event; /* queue event */ -+} E3_Queue; -+ -+typedef struct e3_blockcopy_queue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_BlockCopyEvent q_event; /* queue event */ -+ E3_uint32 q_pad[6]; -+} E3_BlockCopyQueue; -+ -+#define E3_QUEUE_EVENT_OFFSET 24 -+#define QUEUE_FULL(Q) ((Q)->q_state & E3_QUEUE_FULL) -+ -+#endif /* ! _ASM */ -+ -+#endif /* _ELAN3_EVENTS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/intrinsics.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/intrinsics.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/intrinsics.h 2005-06-01 23:12:54.727419648 -0400 -@@ -0,0 +1,320 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_INTRINSICS_H -+#define _ELAN3_INTRINSICS_H -+ -+#ident "$Id: intrinsics.h,v 1.35 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/intrinsics.h,v $ */ -+ -+#include -+#include -+ -+/* -+ * This file contains definitions of the macros for accessing the QSW -+ * specific instructions, as if they were functions. -+ * The results from the function -+ */ -+ -+#define C_ACK_OK 0 /* return from c_close() */ -+#define C_ACK_TESTFAIL 1 /* return from c_close() */ -+#define C_ACK_DISCARD 2 /* return from c_close() */ -+#define C_ACK_ERROR 3 /* return from c_close() */ -+ -+/* -+ * Elan asi's for tproc block accesses -+ */ -+#define EASI_BYTE 0 -+#define EASI_HALF 1 -+#define EASI_WORD 2 -+#define EASI_DOUBLE 3 -+ -+#if defined(__ELAN3__) && !defined (_ASM) -+ -+extern inline void c_abort(void) -+{ -+ asm volatile (".word 0x0000 ! die you thread you " : : ); -+} -+ -+extern inline void c_suspend(void) -+{ -+ asm volatile ( -+ "set 1f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 1f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "suspend ! do the real suspend\n" -+ "1: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "suspend ! do the real suspend\n" : : ); -+} -+ -+extern inline int c_close(void) -+{ -+ register int rc asm("o0"); -+ -+ asm volatile ("close %0" : "=r" (rc) : ); -+ -+ return (rc); -+} -+ -+extern inline int c_close_cookie(volatile E3_uint32 *cookiep, E3_uint32 next) -+{ -+ register int rc asm("o0"); -+ -+ asm volatile ("close %0 ! close the packet\n" -+ "bz,a 1f ! ack received\n" -+ "st %1, [%2] ! update cookie on ack\n" -+ "1: ! label for not-ack\n" -+ : "=r" (rc) : "r" (next), "r" (cookiep)); -+ -+ return (rc); -+} -+ -+extern inline void c_break_busywait(void) -+{ -+ asm volatile ( -+ "breaktest ! test to see if break necessary\n" -+ "bpos 1f ! no other thread ready\n" -+ "nop ! delay slot\n" -+ "sub %%sp,3*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%g0,[%%sp+0] ! save the globals\n" -+ "stblock %%i0,[%%sp+8*4] ! save the ins\n" -+ "stblock %%l0,[%%sp+16*4] ! save the locals\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+16*4],%%l0 ! RevB bug fix. restore locals in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "ldblock [%%sp+16*4],%%l0 ! restore locals\n" -+ "4: ldblock [%%sp+8*4], %%i0 ! restore ins\n" -+ "ldblock [%%sp+0],%%g0 ! restore globals\n" -+ "add %%sp,3*8*4,%%sp ! restore stack pointer\n" -+ "1: " : : ); -+} -+ -+extern inline void c_break(void) -+{ -+ asm volatile ( -+ "breaktest ! test to see if break necessary\n" -+ "bne 1f ! haven't exceeded our inst count yet\n" -+ "nop ! delay slot\n" -+ "sub %%sp,3*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%g0,[%%sp+0] ! save the globals\n" -+ "stblock %%i0,[%%sp+8*4] ! save the ins\n" -+ "stblock %%l0,[%%sp+16*4] ! save the locals\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+16*4],%%l0 ! RevB bug fix. restore locals in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "ldblock [%%sp+16*4],%%l0 ! restore locals\n" -+ "4: ldblock [%%sp+8*4], %%i0 ! restore ins\n" -+ "ldblock [%%sp+0],%%g0 ! restore globals\n" -+ "add %%sp,3*8*4,%%sp ! restore stack pointer\n" -+ "1: " : : ); -+} -+ -+extern inline void c_open( const int arg ) -+{ -+ asm volatile ("open %0" : : "r" (arg) ); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+} -+ -+extern inline void c_waitevent( volatile E3_Event *const ptr, -+ const int count) -+{ -+ register volatile E3_Event *a_unlikely asm("o0") = ptr; -+ register int a_very_unlikely asm("o1") = count; -+ -+ asm volatile ( -+ "sub %%sp,1*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%i0,[%%sp+0] ! save the ins\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i4 ! RevB bug fix. data fetch of instructions\n" -+ "waitevent ! do the business\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+0],%%i0 ! RevB bug fix. restore ins in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i4 ! RevB bug fix. data fetch of instructions\n" -+ "waitevent ! do the business\n" -+ "ldblock [%%sp+0],%%i0 ! restore ins\n" -+ "4: add %%sp,1*8*4,%%sp ! restore stack pointer\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (a_unlikely), "r" (a_very_unlikely) -+ : /* clobbered */ "g0", "g1", "g2", "g3", "g4", "g5", "g6", "g7", -+ "l0", "l1", "l2", "l3", "l4", "l5", "l6", "l7" ); -+ -+} -+ -+#define c_sendtrans0(type,dest) \ -+ asm volatile ("sendtrans %0, %%g0, %1" : : "i" (type), "r" (dest)) -+ -+#define c_sendtrans1(type,dest,arg) \ -+ asm volatile ("sendtrans %0, %2, %1" : : "i" (type), "r" (dest), "r" (arg)) -+ -+#define c_sendtrans2(type,dest,arg1,arg2) \ -+ do { \ -+ register const unsigned long a_unlikely_1 asm("o4") = arg1; \ -+ register const unsigned long a_unlikely_2 asm("o5") = arg2; \ -+ asm volatile ("sendtrans %0, %2, %1" \ -+ : : "i" (type), "r" (dest), "r" (a_unlikely_1), "r" (a_unlikely_2)); \ -+ } while(0) -+ -+#define c_sendmem(type,dest,ptr) \ -+ asm volatile ("sendtrans %0, [%2], %1" : : "i" (type), "r" (dest), "r" (ptr)) -+ -+/* Copy a single 64-byte block (src blk is read using a BYTE endian type) */ -+extern inline void elan3_copy64b(void *src, void *dst) -+{ -+ /* Copy 64 bytes using ldblock/stblock -+ * We save and restore the locals/ins because if we don't gcc -+ * really makes a bad job of optimisising the rest of the thread code! -+ * -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ register void *tmp1 asm("g5") = src; -+ register void *tmp2 asm("g6") = dst; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "sub %%sp,2*8*4,%%sp ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ "ldblock64a [%0]%2,%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64a %%l0,[%1]%2 ! store 64-byte block from local/ins\n" -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7, %%sp ! undo alignment\n" -+ "add %%sp,2*8*4,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (tmp1), "r" (tmp2), "n" (EASI_BYTE) -+ : /* clobbered */ "g5", "g6", "g7" ); -+} -+ -+/* Copy a single 64-byte block (src blk is read using a WORD endian type) */ -+extern inline void elan3_copy64w(void *src, void *dst) -+{ -+ /* Copy 64 bytes using ldblock/stblock -+ * We save and restore the locals/ins because if we don't gcc -+ * really makes a bad job of optimisising the rest of the thread code! -+ * -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ register void *tmp1 asm("g5") = src; -+ register void *tmp2 asm("g6") = dst; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "sub %%sp,2*8*4,%%sp ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ "ldblock64a [%0]%2,%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64a %%l0,[%1]%2 ! store 64-byte block from local/ins\n" -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7, %%sp ! undo alignment\n" -+ "add %%sp,2*8*4,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (tmp1), "r" (tmp2), "n" (EASI_WORD) -+ : /* clobbered */ "g5", "g6", "g7" ); -+} -+ -+/* Read a 64-bit value with a WORD (32-bit) endian type */ -+extern inline E3_uint64 elan3_read64w( volatile E3_uint64 *const ptr ) -+{ -+ E3_uint64 result; -+ -+ asm volatile ( -+ "ldblock8a [%1]%2, %0\n" -+ : /* outputs */ "=r" (result) -+ : /* inputs */ "r" (ptr), "n" (EASI_WORD) ); -+ -+ return( result ); -+} -+ -+/* Read a 64-bit value with a DOUBLEWORD (64-bit) endian type */ -+extern inline E3_uint64 elan3_read64dw( volatile E3_uint64 *const ptr ) -+{ -+ E3_uint64 result; -+ -+ asm volatile ( -+ "ldblock8a [%1]%2, %0\n" -+ : /* outputs */ "=r" (result) -+ : /* inputs */ "r" (ptr), "n" (EASI_DOUBLE) ); -+ -+ return( result ); -+} -+ -+/* Write a 32-bit value with a WORD (32-bit) endian type */ -+extern inline void elan3_write64w( volatile E3_uint64 *const ptr, E3_uint64 value ) -+{ -+ asm volatile ( -+ "stblock8a %1, [%0]%2\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (ptr), "r" (value), "n" (EASI_WORD) ); -+} -+ -+/* Write a 64-bit value with a DOUBLEWORD (64-bit) endian type */ -+extern inline void elan3_write64dw( volatile E3_uint64 *const ptr, E3_uint64 value ) -+{ -+ asm volatile ( -+ "stblock8a %1, [%0]%2\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (ptr), "r" (value), "n" (EASI_DOUBLE) ); -+} -+ -+extern inline E3_uint32 c_swap(volatile E3_uint32 *source, E3_uint32 result) -+{ -+ asm volatile("swap [%1],%0\n" -+ : "=r" (result) -+ : "r" (source) ,"0" (result) -+ : "memory"); -+ return result; -+} -+ -+extern inline E3_uint32 c_swap_save(volatile E3_uint32 *source, const E3_uint32 result) -+{ -+ register E3_uint32 a_unlikely; -+ asm volatile("" : "=r" (a_unlikely) : ); -+ -+ asm volatile("mov %2,%0; swap [%1],%0\n" -+ : "=r" (a_unlikely) -+ : "r" (source) ,"r" (result), "0" (a_unlikely) -+ : "memory"); -+ return a_unlikely; -+} -+#endif /* (__ELAN3__) && !(_ASM) */ -+ -+#endif /* _ELAN3_INTRINSICS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/minames.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/minames.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/minames.h 2005-06-01 23:12:54.728419496 -0400 -@@ -0,0 +1,256 @@ -+{MI_WaitForRemoteDescRead, "MI_WaitForRemoteDescRead"}, -+{MI_WaitForRemoteDescRead2, "MI_WaitForRemoteDescRead2"}, -+{MI_WaitForRemoteDescRead2_seq1, "MI_WaitForRemoteDescRead2_seq1"}, -+{MI_SendRemoteDmaRoutes, "MI_SendRemoteDmaRoutes"}, -+{MI_IProcTrapped, "MI_IProcTrapped"}, -+{MI_DProcTrapped, "MI_DProcTrapped"}, -+{MI_CProcTrapped, "MI_CProcTrapped"}, -+{MI_TProcTrapped, "MI_TProcTrapped"}, -+{MI_TestWhichDmaQueue, "MI_TestWhichDmaQueue"}, -+{MI_TestWhichDmaQueue_seq1, "MI_TestWhichDmaQueue_seq1"}, -+{MI_InputRemoteDmaUpdateBPtr, "MI_InputRemoteDmaUpdateBPtr"}, -+{MI_FixupQueueContextAndRemoteBit, "MI_FixupQueueContextAndRemoteBit"}, -+{MI_FixupQueueContextAndRemoteBit_seq1, "MI_FixupQueueContextAndRemoteBit_seq1"}, -+{MI_FixupQueueContextAndRemoteBit_seq2, "MI_FixupQueueContextAndRemoteBit_seq2"}, -+{MI_FixupQueueContextAndRemoteBit_seq3, "MI_FixupQueueContextAndRemoteBit_seq3"}, -+{MI_FixupQueueContextAndRemoteBit_seq4, "MI_FixupQueueContextAndRemoteBit_seq4"}, -+{MI_RunDmaCommand, "MI_RunDmaCommand"}, -+{MI_DoSendRemoteDmaDesc, "MI_DoSendRemoteDmaDesc"}, -+{MI_DequeueNonSysCntxDma, "MI_DequeueNonSysCntxDma"}, -+{MI_WaitForRemoteDescRead1, "MI_WaitForRemoteDescRead1"}, -+{MI_RemoteDmaCommand, "MI_RemoteDmaCommand"}, -+{MI_WaitForRemoteRoutes, "MI_WaitForRemoteRoutes"}, -+{MI_DequeueSysCntxDma, "MI_DequeueSysCntxDma"}, -+{MI_ExecuteDmaDescriptorForQueue, "MI_ExecuteDmaDescriptorForQueue"}, -+{MI_ExecuteDmaDescriptor1, "MI_ExecuteDmaDescriptor1"}, -+{MI_ExecuteDmaDescriptor1_seq1, "MI_ExecuteDmaDescriptor1_seq1"}, -+{MI_ExecuteDmaDescriptor1_seq2, "MI_ExecuteDmaDescriptor1_seq2"}, -+{MI_ExecuteDmaDescriptor1_seq3, "MI_ExecuteDmaDescriptor1_seq3"}, -+{MI_GetNewSizeInProg, "MI_GetNewSizeInProg"}, -+{MI_GetNewSizeInProg_seq1, "MI_GetNewSizeInProg_seq1"}, -+{MI_FirstBlockRead, "MI_FirstBlockRead"}, -+{MI_ExtraFirstBlockRead, "MI_ExtraFirstBlockRead"}, -+{MI_UnimplementedError, "MI_UnimplementedError"}, -+{MI_UpdateDescriptor, "MI_UpdateDescriptor"}, -+{MI_UpdateDescriptor_seq1, "MI_UpdateDescriptor_seq1"}, -+{MI_UpdateDescriptor_seq2, "MI_UpdateDescriptor_seq2"}, -+{MI_UpdateDescriptor_seq3, "MI_UpdateDescriptor_seq3"}, -+{MI_UpdateDescriptor_seq4, "MI_UpdateDescriptor_seq4"}, -+{MI_UpdateDescriptor_seq5, "MI_UpdateDescriptor_seq5"}, -+{MI_GetNextSizeInProg, "MI_GetNextSizeInProg"}, -+{MI_DoStopThisDma, "MI_DoStopThisDma"}, -+{MI_DoStopThisDma_seq1, "MI_DoStopThisDma_seq1"}, -+{MI_GenNewBytesToRead, "MI_GenNewBytesToRead"}, -+{MI_WaitForEventReadTy1, "MI_WaitForEventReadTy1"}, -+{MI_WaitUpdateEvent, "MI_WaitUpdateEvent"}, -+{MI_WaitUpdateEvent_seq1, "MI_WaitUpdateEvent_seq1"}, -+{MI_DoSleepOneTickThenRunable, "MI_DoSleepOneTickThenRunable"}, -+{MI_RunEvent, "MI_RunEvent"}, -+{MI_EnqueueThread, "MI_EnqueueThread"}, -+{MI_CheckContext0, "MI_CheckContext0"}, -+{MI_EnqueueDma, "MI_EnqueueDma"}, -+{MI_CprocTrapping, "MI_CprocTrapping"}, -+{MI_CprocTrapping_seq1, "MI_CprocTrapping_seq1"}, -+{MI_WaitForRemoteRoutes1, "MI_WaitForRemoteRoutes1"}, -+{MI_SetEventCommand, "MI_SetEventCommand"}, -+{MI_DoSetEvent, "MI_DoSetEvent"}, -+{MI_DoRemoteSetEventNowOrTrapQueueingDma, "MI_DoRemoteSetEventNowOrTrapQueueingDma"}, -+{MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1, "MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1"}, -+{MI_SendRemoteDmaRoutes2, "MI_SendRemoteDmaRoutes2"}, -+{MI_WaitForRemoteRoutes2, "MI_WaitForRemoteRoutes2"}, -+{MI_WaitEventCommandTy0, "MI_WaitEventCommandTy0"}, -+{MI_DequeueNonSysCntxDma2, "MI_DequeueNonSysCntxDma2"}, -+{MI_WaitEventCommandTy1, "MI_WaitEventCommandTy1"}, -+{MI_WaitEventCommandTy1_seq1, "MI_WaitEventCommandTy1_seq1"}, -+{MI_DequeueNonSysCntxThread, "MI_DequeueNonSysCntxThread"}, -+{MI_DequeueSysCntxDma1, "MI_DequeueSysCntxDma1"}, -+{MI_DequeueSysCntxThread, "MI_DequeueSysCntxThread"}, -+{MI_TestNonSysCntxDmaQueueEmpty, "MI_TestNonSysCntxDmaQueueEmpty"}, -+{MI_TestNonSysCntxDmaQueueEmpty_seq1, "MI_TestNonSysCntxDmaQueueEmpty_seq1"}, -+{MI_TestNonSysCntxDmaQueueEmpty_seq2, "MI_TestNonSysCntxDmaQueueEmpty_seq2"}, -+{MI_RunThreadCommand, "MI_RunThreadCommand"}, -+{MI_SetEventWaitForLastAcess, "MI_SetEventWaitForLastAcess"}, -+{MI_SetEventReadWait, "MI_SetEventReadWait"}, -+{MI_SetEventReadWait_seq1, "MI_SetEventReadWait_seq1"}, -+{MI_TestEventType, "MI_TestEventType"}, -+{MI_TestEventType_seq1, "MI_TestEventType_seq1"}, -+{MI_TestEventBit2, "MI_TestEventBit2"}, -+{MI_DmaDescOrBlockCopyOrChainedEvent, "MI_DmaDescOrBlockCopyOrChainedEvent"}, -+{MI_RunThread, "MI_RunThread"}, -+{MI_RunThread1, "MI_RunThread1"}, -+{MI_RunThread1_seq1, "MI_RunThread1_seq1"}, -+{MI_IncDmaSysCntxBPtr, "MI_IncDmaSysCntxBPtr"}, -+{MI_IncDmaSysCntxBPtr_seq1, "MI_IncDmaSysCntxBPtr_seq1"}, -+{MI_IncDmaSysCntxBPtr_seq2, "MI_IncDmaSysCntxBPtr_seq2"}, -+{MI_WaitForCntxDmaDescRead, "MI_WaitForCntxDmaDescRead"}, -+{MI_FillInContext, "MI_FillInContext"}, -+{MI_FillInContext_seq1, "MI_FillInContext_seq1"}, -+{MI_WriteNewDescToQueue, "MI_WriteNewDescToQueue"}, -+{MI_WriteNewDescToQueue_seq1, "MI_WriteNewDescToQueue_seq1"}, -+{MI_TestForQueueWrap, "MI_TestForQueueWrap"}, -+{MI_TestForQueueWrap_seq1, "MI_TestForQueueWrap_seq1"}, -+{MI_TestQueueIsFull, "MI_TestQueueIsFull"}, -+{MI_TestQueueIsFull_seq1, "MI_TestQueueIsFull_seq1"}, -+{MI_TestQueueIsFull_seq2, "MI_TestQueueIsFull_seq2"}, -+{MI_CheckPsychoShitFixup, "MI_CheckPsychoShitFixup"}, -+{MI_PsychoShitFixupForcedRead, "MI_PsychoShitFixupForcedRead"}, -+{MI_PrepareDMATimeSlice, "MI_PrepareDMATimeSlice"}, -+{MI_PrepareDMATimeSlice_seq1, "MI_PrepareDMATimeSlice_seq1"}, -+{MI_TProcRestartFromTrapOrTestEventBit2, "MI_TProcRestartFromTrapOrTestEventBit2"}, -+{MI_TProcRestartFromTrapOrTestEventBit2_seq1, "MI_TProcRestartFromTrapOrTestEventBit2_seq1"}, -+{MI_WaitForGlobalsRead, "MI_WaitForGlobalsRead"}, -+{MI_WaitForNPCRead, "MI_WaitForNPCRead"}, -+{MI_EventInterrupt, "MI_EventInterrupt"}, -+{MI_EventInterrupt_seq1, "MI_EventInterrupt_seq1"}, -+{MI_EventInterrupt_seq2, "MI_EventInterrupt_seq2"}, -+{MI_EventInterrupt_seq3, "MI_EventInterrupt_seq3"}, -+{MI_TestSysCntxDmaQueueEmpty, "MI_TestSysCntxDmaQueueEmpty"}, -+{MI_TestSysCntxDmaQueueEmpty_seq1, "MI_TestSysCntxDmaQueueEmpty_seq1"}, -+{MI_TestIfRemoteDesc, "MI_TestIfRemoteDesc"}, -+{MI_DoDmaLocalSetEvent, "MI_DoDmaLocalSetEvent"}, -+{MI_DoDmaLocalSetEvent_seq1, "MI_DoDmaLocalSetEvent_seq1"}, -+{MI_DoDmaLocalSetEvent_seq2, "MI_DoDmaLocalSetEvent_seq2"}, -+{MI_DmaLoop1, "MI_DmaLoop1"}, -+{MI_ExitDmaLoop, "MI_ExitDmaLoop"}, -+{MI_ExitDmaLoop_seq1, "MI_ExitDmaLoop_seq1"}, -+{MI_RemoteDmaTestPAckType, "MI_RemoteDmaTestPAckType"}, -+{MI_PacketDiscardOrTestFailRecIfCCis0, "MI_PacketDiscardOrTestFailRecIfCCis0"}, -+{MI_PacketDiscardOrTestFailRecIfCCis0_seq1, "MI_PacketDiscardOrTestFailRecIfCCis0_seq1"}, -+{MI_TestNackFailIsZero2, "MI_TestNackFailIsZero2"}, -+{MI_TestNackFailIsZero3, "MI_TestNackFailIsZero3"}, -+{MI_DmaFailCountError, "MI_DmaFailCountError"}, -+{MI_TestDmaForSysCntx, "MI_TestDmaForSysCntx"}, -+{MI_TestDmaForSysCntx_seq1, "MI_TestDmaForSysCntx_seq1"}, -+{MI_TestDmaForSysCntx_seq2, "MI_TestDmaForSysCntx_seq2"}, -+{MI_TestAeqB2, "MI_TestAeqB2"}, -+{MI_TestAeqB2_seq1, "MI_TestAeqB2_seq1"}, -+{MI_GetNextDmaDescriptor, "MI_GetNextDmaDescriptor"}, -+{MI_DequeueSysCntxDma2, "MI_DequeueSysCntxDma2"}, -+{MI_InputSetEvent, "MI_InputSetEvent"}, -+{MI_PutBackSysCntxDma, "MI_PutBackSysCntxDma"}, -+{MI_PutBackSysCntxDma_seq1, "MI_PutBackSysCntxDma_seq1"}, -+{MI_PutBackSysCntxDma_seq2, "MI_PutBackSysCntxDma_seq2"}, -+{MI_InputRemoteDma, "MI_InputRemoteDma"}, -+{MI_InputRemoteDma_seq1, "MI_InputRemoteDma_seq1"}, -+{MI_WaitOneTickForWakeup1, "MI_WaitOneTickForWakeup1"}, -+{MI_SendRemoteDmaDesc, "MI_SendRemoteDmaDesc"}, -+{MI_InputLockQueue, "MI_InputLockQueue"}, -+{MI_CloseTheTrappedPacketIfCCis1, "MI_CloseTheTrappedPacketIfCCis1"}, -+{MI_CloseTheTrappedPacketIfCCis1_seq1, "MI_CloseTheTrappedPacketIfCCis1_seq1"}, -+{MI_PostDmaInterrupt, "MI_PostDmaInterrupt"}, -+{MI_InputUnLockQueue, "MI_InputUnLockQueue"}, -+{MI_WaitForUnLockDescRead, "MI_WaitForUnLockDescRead"}, -+{MI_SendEOPforRemoteDma, "MI_SendEOPforRemoteDma"}, -+{MI_LookAtRemoteAck, "MI_LookAtRemoteAck"}, -+{MI_InputWriteBlockQueue, "MI_InputWriteBlockQueue"}, -+{MI_WaitForSpStore, "MI_WaitForSpStore"}, -+{MI_TProcNext, "MI_TProcNext"}, -+{MI_TProcStoppedRunning, "MI_TProcStoppedRunning"}, -+{MI_InputWriteBlock, "MI_InputWriteBlock"}, -+{MI_RunDmaOrDeqNonSysCntxDma, "MI_RunDmaOrDeqNonSysCntxDma"}, -+{MI_ExecuteDmaDescriptorForRun, "MI_ExecuteDmaDescriptorForRun"}, -+{MI_ConfirmQueueLock, "MI_ConfirmQueueLock"}, -+{MI_DmaInputIdentify, "MI_DmaInputIdentify"}, -+{MI_TProcStoppedRunning2, "MI_TProcStoppedRunning2"}, -+{MI_TProcStoppedRunning2_seq1, "MI_TProcStoppedRunning2_seq1"}, -+{MI_TProcStoppedRunning2_seq2, "MI_TProcStoppedRunning2_seq2"}, -+{MI_ThreadInputIdentify, "MI_ThreadInputIdentify"}, -+{MI_InputIdWriteAddrAndType3, "MI_InputIdWriteAddrAndType3"}, -+{MI_IProcTrappedWriteStatus, "MI_IProcTrappedWriteStatus"}, -+{MI_FinishTrappingEop, "MI_FinishTrappingEop"}, -+{MI_InputTestTrans, "MI_InputTestTrans"}, -+{MI_TestAeqB3, "MI_TestAeqB3"}, -+{MI_ThreadUpdateNonSysCntxBack, "MI_ThreadUpdateNonSysCntxBack"}, -+{MI_ThreadQueueOverflow, "MI_ThreadQueueOverflow"}, -+{MI_RunContext0Thread, "MI_RunContext0Thread"}, -+{MI_RunContext0Thread_seq1, "MI_RunContext0Thread_seq1"}, -+{MI_RunContext0Thread_seq2, "MI_RunContext0Thread_seq2"}, -+{MI_RunDmaDesc, "MI_RunDmaDesc"}, -+{MI_RunDmaDesc_seq1, "MI_RunDmaDesc_seq1"}, -+{MI_RunDmaDesc_seq2, "MI_RunDmaDesc_seq2"}, -+{MI_TestAeqB, "MI_TestAeqB"}, -+{MI_WaitForNonCntxDmaDescRead, "MI_WaitForNonCntxDmaDescRead"}, -+{MI_DmaQueueOverflow, "MI_DmaQueueOverflow"}, -+{MI_BlockCopyEvent, "MI_BlockCopyEvent"}, -+{MI_BlockCopyEventReadBlock, "MI_BlockCopyEventReadBlock"}, -+{MI_BlockCopyWaitForReadData, "MI_BlockCopyWaitForReadData"}, -+{MI_InputWriteWord, "MI_InputWriteWord"}, -+{MI_TraceSetEvents, "MI_TraceSetEvents"}, -+{MI_TraceSetEvents_seq1, "MI_TraceSetEvents_seq1"}, -+{MI_TraceSetEvents_seq2, "MI_TraceSetEvents_seq2"}, -+{MI_InputWriteDoubleWd, "MI_InputWriteDoubleWd"}, -+{MI_SendLockTransIfCCis1, "MI_SendLockTransIfCCis1"}, -+{MI_WaitForDmaRoutes1, "MI_WaitForDmaRoutes1"}, -+{MI_LoadDmaContext, "MI_LoadDmaContext"}, -+{MI_InputTestAndSetWord, "MI_InputTestAndSetWord"}, -+{MI_InputTestAndSetWord_seq1, "MI_InputTestAndSetWord_seq1"}, -+{MI_GetDestEventValue, "MI_GetDestEventValue"}, -+{MI_SendDmaIdentify, "MI_SendDmaIdentify"}, -+{MI_InputAtomicAddWord, "MI_InputAtomicAddWord"}, -+{MI_LoadBFromTransD0, "MI_LoadBFromTransD0"}, -+{MI_ConditionalWriteBackCCTrue, "MI_ConditionalWriteBackCCTrue"}, -+{MI_WaitOneTickForWakeup, "MI_WaitOneTickForWakeup"}, -+{MI_SendFinalUnlockTrans, "MI_SendFinalUnlockTrans"}, -+{MI_SendDmaEOP, "MI_SendDmaEOP"}, -+{MI_GenLastAddrForPsycho, "MI_GenLastAddrForPsycho"}, -+{MI_FailedAckIfCCis0, "MI_FailedAckIfCCis0"}, -+{MI_FailedAckIfCCis0_seq1, "MI_FailedAckIfCCis0_seq1"}, -+{MI_WriteDmaSysCntxDesc, "MI_WriteDmaSysCntxDesc"}, -+{MI_TimesliceDmaQueueOverflow, "MI_TimesliceDmaQueueOverflow"}, -+{MI_DequeueNonSysCntxThread1, "MI_DequeueNonSysCntxThread1"}, -+{MI_DequeueNonSysCntxThread1_seq1, "MI_DequeueNonSysCntxThread1_seq1"}, -+{MI_TestThreadQueueEmpty, "MI_TestThreadQueueEmpty"}, -+{MI_ClearThreadQueueIfCC, "MI_ClearThreadQueueIfCC"}, -+{MI_DequeueSysCntxThread1, "MI_DequeueSysCntxThread1"}, -+{MI_DequeueSysCntxThread1_seq1, "MI_DequeueSysCntxThread1_seq1"}, -+{MI_TProcStartUpGeneric, "MI_TProcStartUpGeneric"}, -+{MI_WaitForPCload2, "MI_WaitForPCload2"}, -+{MI_WaitForNPCWrite, "MI_WaitForNPCWrite"}, -+{MI_WaitForEventWaitAddr, "MI_WaitForEventWaitAddr"}, -+{MI_WaitForWaitEventAccess, "MI_WaitForWaitEventAccess"}, -+{MI_WaitForWaitEventAccess_seq1, "MI_WaitForWaitEventAccess_seq1"}, -+{MI_WaitForWaitEventDesc, "MI_WaitForWaitEventDesc"}, -+{MI_WaitForEventReadTy0, "MI_WaitForEventReadTy0"}, -+{MI_SendCondTestFail, "MI_SendCondTestFail"}, -+{MI_InputMoveToNextTrans, "MI_InputMoveToNextTrans"}, -+{MI_ThreadUpdateSysCntxBack, "MI_ThreadUpdateSysCntxBack"}, -+{MI_FinishedSetEvent, "MI_FinishedSetEvent"}, -+{MI_EventIntUpdateBPtr, "MI_EventIntUpdateBPtr"}, -+{MI_EventQueueOverflow, "MI_EventQueueOverflow"}, -+{MI_MaskLowerSource, "MI_MaskLowerSource"}, -+{MI_DmaLoop, "MI_DmaLoop"}, -+{MI_SendNullSetEvent, "MI_SendNullSetEvent"}, -+{MI_SendFinalSetEvent, "MI_SendFinalSetEvent"}, -+{MI_TestNackFailIsZero1, "MI_TestNackFailIsZero1"}, -+{MI_DmaPacketTimedOutOrPacketError, "MI_DmaPacketTimedOutOrPacketError"}, -+{MI_NextPacketIsLast, "MI_NextPacketIsLast"}, -+{MI_TestForZeroLengthDma, "MI_TestForZeroLengthDma"}, -+{MI_WaitForPCload, "MI_WaitForPCload"}, -+{MI_ReadInIns, "MI_ReadInIns"}, -+{MI_WaitForInsRead, "MI_WaitForInsRead"}, -+{MI_WaitForLocals, "MI_WaitForLocals"}, -+{MI_WaitForOutsWrite, "MI_WaitForOutsWrite"}, -+{MI_WaitForWaitEvWrBack, "MI_WaitForWaitEvWrBack"}, -+{MI_WaitForLockRead, "MI_WaitForLockRead"}, -+{MI_TestQueueLock, "MI_TestQueueLock"}, -+{MI_InputIdWriteAddrAndType, "MI_InputIdWriteAddrAndType"}, -+{MI_InputIdWriteAddrAndType2, "MI_InputIdWriteAddrAndType2"}, -+{MI_ThreadInputIdentify2, "MI_ThreadInputIdentify2"}, -+{MI_WriteIntoTrapArea0, "MI_WriteIntoTrapArea0"}, -+{MI_GenQueueBlockWrAddr, "MI_GenQueueBlockWrAddr"}, -+{MI_InputDiscardFreeLock, "MI_InputDiscardFreeLock"}, -+{MI_WriteIntoTrapArea1, "MI_WriteIntoTrapArea1"}, -+{MI_WriteIntoTrapArea2, "MI_WriteIntoTrapArea2"}, -+{MI_ResetBPtrToBase, "MI_ResetBPtrToBase"}, -+{MI_InputDoTrap, "MI_InputDoTrap"}, -+{MI_RemoteDmaCntxt0Update, "MI_RemoteDmaCntxt0Update"}, -+{MI_ClearQueueLock, "MI_ClearQueueLock"}, -+{MI_IProcTrappedBlockWriteData, "MI_IProcTrappedBlockWriteData"}, -+{MI_FillContextFilter, "MI_FillContextFilter"}, -+{MI_IProcTrapped4, "MI_IProcTrapped4"}, -+{MI_RunSysCntxDma, "MI_RunSysCntxDma"}, -+{MI_ChainedEventError, "MI_ChainedEventError"}, -+{MI_InputTrappingEOP, "MI_InputTrappingEOP"}, -+{MI_CheckForRunIfZero, "MI_CheckForRunIfZero"}, -+{MI_TestForBreakOrSuspend, "MI_TestForBreakOrSuspend"}, -+{MI_SwapForRunable, "MI_SwapForRunable"}, -Index: linux-2.4.21/include/elan3/neterr_rpc.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/neterr_rpc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/neterr_rpc.h 2005-06-01 23:12:54.728419496 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_NETERR_RPC_H -+#define __ELAN3_NETERR_RPC_H -+ -+#ident "$Id: neterr_rpc.h,v 1.20 2003/06/26 16:05:22 fabien Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/neterr_rpc.h,v $*/ -+ -+#define NETERR_SERVICE "neterr-srv" -+#define NETERR_PROGRAM ((u_long) 170002) -+#define NETERR_VERSION ((u_long) 1) -+ -+#define NETERR_NULL_RPC 0 -+#define NETERR_FIXUP_RPC 1 -+ -+/* network error rpc timeout */ -+#define NETERR_RPC_TIMEOUT 5 -+ -+/* -+ * XDR functions for Tru64 and Linux in userspace. -+ * NB Linux kernelspace xdr routines are in network_error. -+ * and *must* be kept consistent. -+ */ -+#if defined(DIGITAL_UNIX) || !defined(__KERNEL__) -+bool_t -+xdr_capability (XDR *xdrs, void *arg) -+{ -+ ELAN_CAPABILITY *cap = (ELAN_CAPABILITY *) arg; -+ -+ return (xdr_opaque (xdrs, (caddr_t) &cap->cap_userkey, sizeof (cap->cap_userkey)) && -+ xdr_int (xdrs, &cap->cap_version) && -+ xdr_u_short (xdrs, &cap->cap_type) && -+ xdr_int (xdrs, &cap->cap_lowcontext) && -+ xdr_int (xdrs, &cap->cap_highcontext) && -+ xdr_int (xdrs, &cap->cap_mycontext) && -+ xdr_int (xdrs, &cap->cap_lownode) && -+ xdr_int (xdrs, &cap->cap_highnode) && -+ xdr_u_int (xdrs, &cap->cap_railmask) && -+ xdr_opaque (xdrs, (caddr_t) &cap->cap_bitmap[0], sizeof (cap->cap_bitmap))); -+} -+ -+bool_t -+xdr_neterr_msg (XDR *xdrs, void *req) -+{ -+ NETERR_MSG *msg = (NETERR_MSG *) req; -+ -+ return (xdr_u_int (xdrs, &msg->Rail) && -+ xdr_capability (xdrs, &msg->SrcCapability) && -+ xdr_capability (xdrs, &msg->DstCapability) && -+ xdr_u_int (xdrs, &msg->DstProcess) && -+ xdr_u_int (xdrs, &msg->CookieAddr) && -+ xdr_u_int (xdrs, &msg->CookieVProc) && -+ xdr_u_int (xdrs, &msg->NextCookie) && -+ xdr_u_int (xdrs, &msg->WaitForEop)); -+} -+#endif /* INCLUDE_XDR_INLINE */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN3_NETERR_RPC_H */ -Index: linux-2.4.21/include/elan3/perm.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/perm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/perm.h 2005-06-01 23:12:54.728419496 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_PERM_H -+#define __ELAN3_PERM_H -+ -+#ident "$Id: perm.h,v 1.7 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/perm.h,v $*/ -+ -+#define ELAN3_PERM_NULL 0x00 -+#define ELAN3_PERM_LOCAL_READ 0x04 -+#define ELAN3_PERM_READ 0x08 -+#define ELAN3_PERM_NOREMOTE 0x0c -+#define ELAN3_PERM_REMOTEREAD 0x10 -+#define ELAN3_PERM_REMOTEWRITE 0x14 -+#define ELAN3_PERM_REMOTEEVENT 0x18 -+#define ELAN3_PERM_REMOTEALL 0x1c -+ -+#endif /* __ELAN3_PERM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/pte.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/pte.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/pte.h 2005-06-01 23:12:54.729419344 -0400 -@@ -0,0 +1,139 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_PTE_H -+#define __ELAN3_PTE_H -+ -+#ident "$Id: pte.h,v 1.26 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/pte.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" -+{ -+#endif -+ -+#include -+#include -+ -+typedef E3_uint64 ELAN3_PTE; -+typedef E3_uint32 ELAN3_PTP; -+ -+#define ELAN3_PTE_SIZE (8) -+#define ELAN3_PTP_SIZE (4) -+ -+#define ELAN3_PTE_REF ((E3_uint64) 1 << 63) /* 63 - referenced bit */ -+#define ELAN3_PTE_MOD ((E3_uint64) 1 << 55) /* 55 - modified bit */ -+#define ELAN3_RM_MASK (ELAN3_PTE_REF | ELAN3_PTE_MOD) -+ -+#define ELAN3_PTE_PFN_MASK 0x0000fffffffff000ull /* [12:48] - Physical address */ -+ -+#define ELAN3_PTE_BIG_ENDIAN 0x80 /* 7 - big endian */ -+#define ELAN3_PTE_64_BIT 0x40 /* 6 - 64 bit pci address */ -+#define ELAN3_PTE_LOCAL 0x20 /* 5 - local sdram */ -+ -+#define ELAN3_PTE_PERM_MASK 0x1c /* [2:4] - Permissions */ -+#define ELAN3_PTE_PERM_SHIFT 2 -+ -+#define ELAN3_ET_MASK 0x3 -+#define ELAN3_ET_INVALID 0x0 /* [0:1] */ -+#define ELAN3_ET_PTP 0x1 -+#define ELAN3_ET_PTE 0x2 -+ -+#define ELAN3_INVALID_PTP ((ELAN3_PTP) 0) -+#define ELAN3_INVALID_PTE ((ELAN3_PTE) 0) -+ -+#define ELAN3_PTP_TYPE(ptp) ((ptp) & ELAN3_ET_MASK) -+#define ELAN3_PTE_TYPE(pte) ((pte) & ELAN3_ET_MASK) -+#define ELAN3_PTE_PERM(pte) ((pte) & ELAN3_PTE_PERM_MASK) -+#define ELAN3_PTE_VALID(pte) (((pte) & ELAN3_ET_MASK) == ELAN3_ET_PTE) -+#define ELAN3_PTE_ISREF(pte) ((pte) & ELAN3_PTE_REF) -+#define ELAN3_PTE_ISMOD(pte) ((pte) & ELAN3_PTE_MOD) -+#define ELAN3_PTE_WRITEABLE(pte) (ELAN3_PERM_WRITEABLE(ELAN3_PTE_PERM(pte))) -+ -+#define ELAN3_PERM_WRITEABLE(perm) ((perm) == ELAN3_PERM_NOREMOTE || (perm) > ELAN3_PERM_REMOTEREAD) -+#define ELAN3_PERM_REMOTE(perm) ((perm) > ELAN3_PERM_NOREMOTE) -+ -+#define ELAN3_PERM_READONLY(perm) ((perm) == ELAN3_PERM_NOREMOTE ? ELAN3_PERM_LOCAL_READ : \ -+ (perm) > ELAN3_PERM_REMOTEREAD ? ELAN3_PERM_READ : (perm)) -+#if PAGE_SHIFT == 12 -+# define ELAN3_PAGE_SHIFT 12 -+#else -+# define ELAN3_PAGE_SHIFT 13 -+#endif -+ -+#define ELAN3_PAGE_SIZE (1 << ELAN3_PAGE_SHIFT) -+#define ELAN3_PAGE_OFFSET (ELAN3_PAGE_SIZE-1) -+#define ELAN3_PAGE_MASK (~ELAN3_PAGE_OFFSET) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define ELAN3_L3_SHIFT 5 -+#else -+# define ELAN3_L3_SHIFT 6 -+#endif -+#define ELAN3_L2_SHIFT 6 -+#define ELAN3_L1_SHIFT 8 -+ -+/* Number of entries in a given level ptbl */ -+#define ELAN3_L3_ENTRIES (1 << ELAN3_L3_SHIFT) -+#define ELAN3_L2_ENTRIES (1 << ELAN3_L2_SHIFT) -+#define ELAN3_L1_ENTRIES (1 << ELAN3_L1_SHIFT) -+ -+/* Virtual address spanned by each entry */ -+#define ELAN3_L3_SIZE (1 << (ELAN3_PAGE_SHIFT)) -+#define ELAN3_L2_SIZE (1 << (ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L1_SIZE (1 << (ELAN3_L3_SHIFT+ELAN3_L2_SHIFT+ELAN3_PAGE_SHIFT)) -+ -+/* Virtual address size of page table */ -+#define ELAN3_L1_PTSIZE (ELAN3_L1_ENTRIES * ELAN3_L1_SIZE) -+#define ELAN3_L3_PTSIZE (ELAN3_L3_ENTRIES * ELAN3_L3_SIZE) -+#define ELAN3_L2_PTSIZE (ELAN3_L2_ENTRIES * ELAN3_L2_SIZE) -+ -+/* Mask for offset into page table */ -+#define ELAN3_L1_PTOFFSET ((ELAN3_L1_SIZE*ELAN3_L1_ENTRIES)-1) -+#define ELAN3_L3_PTOFFSET ((ELAN3_L3_SIZE*ELAN3_L3_ENTRIES)-1) -+#define ELAN3_L2_PTOFFSET ((ELAN3_L2_SIZE*ELAN3_L2_ENTRIES)-1) -+ -+#define ELAN3_L1_INDEX(addr) (((E3_Addr) (addr) & 0xFF000000) >> (ELAN3_L2_SHIFT+ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L2_INDEX(addr) (((E3_Addr) (addr) & 0x00FD0000) >> (ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L3_INDEX(addr) (((E3_Addr) (addr) & 0x0003F000) >> ELAN3_PAGE_SHIFT) -+ -+#define ELAN3_L1_BASE(addr) (((E3_Addr)(addr)) & 0x00000000) -+#define ELAN3_L2_BASE(addr) (((E3_Addr)(addr)) & 0xFF000000) -+#define ELAN3_L3_BASE(addr) (((E3_Addr)(addr)) & 0xFFFC0000) -+ -+/* Convert a page table pointer entry to the PT */ -+#define PTP_TO_PT_PADDR(ptp) ((E3_Addr)(ptp & 0xFFFFFFFC)) -+ -+#ifdef __KERNEL__ -+/* -+ * incompatible access for permission macro. -+ */ -+extern u_char elan3mmu_permissionTable[8]; -+#define ELAN3_INCOMPAT_ACCESS(perm,access) (! (elan3mmu_permissionTable[(perm)>>ELAN3_PTE_PERM_SHIFT] & (1 << (access)))) -+ -+#define elan3_readptp(dev, ptp) (elan3_sdram_readl (dev, ptp)) -+#define elan3_writeptp(dev, ptp, value) (elan3_sdram_writel (dev, ptp, value)) -+#define elan3_readpte(dev, pte) (elan3_sdram_readq (dev, pte)) -+#define elan3_writepte(dev,pte, value) (elan3_sdram_writeq (dev, pte, value)) -+ -+#define elan3_invalidatepte(dev, pte) (elan3_sdram_writel (dev, pte, 0)) -+#define elan3_modifypte(dev,pte,new) (elan3_sdram_writel (dev, pte, (int) (new))) -+#define elan3_clrref(dev,pte) (elan3_sdram_writeb (dev, pte + 7) -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_PTE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/spinlock.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/spinlock.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/spinlock.h 2005-06-01 23:12:54.729419344 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_SPINLOCK_ -+#define _ELAN3_SPINLOCK_ -+ -+#ident "$Id: spinlock.h,v 1.31 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/spinlock.h,v $*/ -+ -+/* -+ * This spinlock is designed for main/elan processor interactions. -+ * The lock is split over Elan/Main memory in such a way that -+ * we don't end up busy-polling over the PCI. -+ * In the Elan memory we have two words; one is a sequence number -+ * and the other is a lock word for main. -+ * In main memory we have a copy of the sequence number which main polls when it is -+ * waiting for the Elan to drop the lock. Main polls this word until it becomes -+ * equal to the sequence number it sampled. -+ * The Elan drops the lock by writing the current sequence number to main memory. -+ * It is coded to always give priority to the Elan thread, and so when both go for the -+ * lock, main will back off first. -+ * -+ * 18/3/98 -+ * This has been extended to avoid a starvation case where both the main and thread claim the -+ * lock and so both backoff (thread does a break). So now, main attempts to claim the -+ * lock by writing 'mainLock' then samples the 'sl_seq' and if it has the lock -+ * it sets 'mainGotLock'. The thread will now see the 'sl_mainLock' set, but will only -+ * backoff with a c_break_busywait() if 'mainGotLock' is set too. -+ */ -+typedef struct elan3_spinlock_elan { -+ union { -+ volatile E3_uint64 mainLocks; /* main writes this dble word */ -+ struct { -+ volatile E3_uint32 mainLock; /* main wants a lock */ -+ volatile E3_uint32 mainGotLock; /* main has the lock */ -+ } s; -+ } sl_u; -+ volatile E3_uint32 sl_seq; /* thread owns this word */ -+ volatile E3_uint32 sl_mainWait; /* performance counter */ -+ volatile E3_uint32 sl_elanWait; /* performance counter */ -+ volatile E3_uint32 sl_elanBusyWait; /* performance counter */ -+ /* NOTE: The lock/seq words must be within the same 32-byte Elan cache-line */ -+ E3_uint64 sl_pad[5]; /* pad to 64-bytes */ -+} ELAN3_SPINLOCK_ELAN; -+ -+#define sl_mainLocks sl_u.mainLocks -+#define sl_mainLock sl_u.s.mainLock -+#define sl_mainGotLock sl_u.s.mainGotLock -+ -+#define SL_MAIN_RECESSIVE 1 -+#define SL_MAIN_DOMINANT 2 -+ -+/* Declare this as a main memory cache block for efficiency */ -+typedef union elan3_spinlock_main { -+ volatile E3_uint32 sl_seq; /* copy of seq number updated by Elan */ -+ volatile E3_uint32 sl_Int32[E3_BLK_SIZE/sizeof (E3_uint32)]; -+} ELAN3_SPINLOCK_MAIN; -+ -+/* Main/Main or Elan/Elan lock word */ -+typedef volatile int ELAN3_SPINLOCK; -+ -+#ifdef __ELAN3__ -+ -+/* Main/Elan interlock */ -+ -+#define ELAN3_ME_SPINENTER(SLE,SL) do {\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ (SLE)->sl_seq++; \ -+ if ((SLE)->sl_mainLock) \ -+ elan3_me_spinblock(SLE, SL);\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ } while (0) -+#define ELAN3_ME_SPINEXIT(SLE,SL) do {\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ (SL)->sl_seq = (SLE)->sl_seq;\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ } while (0) -+ -+ -+/* Elan/Elan interlock */ -+#define ELAN3_SPINENTER(L) do {\ -+ asm volatile ("! store barrier");\ -+ if (c_swap ((L), 1)) elan3_spinenter(L);\ -+ asm volatile ("! store barrier");\ -+ } while (0) -+#define ELAN3_SPINEXIT(L) do {\ -+ asm volatile ("! store barrier");\ -+ c_swap((L), 0);\ -+ asm volatile ("! store barrier");\ -+ } while (0) -+ -+extern void elan3_me_spinblock (ELAN3_SPINLOCK_ELAN *sle, ELAN3_SPINLOCK_MAIN *sl); -+extern void elan3_spinenter (ELAN3_SPINLOCK *l); -+ -+#else -+ -+/* Main/Elan interlock */ -+#ifdef DEBUG -+#define ELAN3_ME_SPINENTER(SDRAM,SLE,SL) do {\ -+ register E3_int32 maxLoops = 0x7fffffff; \ -+ register E3_uint32 seq;\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) {\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), 0); \ -+ while ((SL)->sl_seq == (seq-1) && maxLoops--) ; \ -+ if (maxLoops < 0) { \ -+ printf("Failed to get ME lock %lx/%lx seq %d sle_seq %d sl_seq %d\n", \ -+ SL, SLE, seq, \ -+ elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)), \ -+ (SL)->sl_seq); \ -+ } \ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ }\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } while (0) -+#else -+#define ELAN3_ME_SPINENTER(SDRAM,SLE,SL) do {\ -+ register E3_uint32 seq;\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) {\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), 0); \ -+ while ((SL)->sl_seq == (seq-1)) ; \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ }\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } while (0) -+#endif -+#define ELAN3_ME_FORCEENTER(SDRAM,SLE,SL) do { \ -+ register E3_uint32 seq; \ -+ MEMBAR_STORELOAD(); \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_DOMINANT); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) \ -+ { \ -+ /* NOTE: we MUST call elan3_usecspin here for kernel comms */\ -+ while ((SL)->sl_seq == (seq)-1) \ -+ elan3_usecspin (1); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ } \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD(); \ -+} while (0) -+ -+#define ELAN3_ME_TRYENTER(SDRAM,SLE,SL,SEQ) do { \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ SEQ = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+} while (0) -+ -+#define ELAN3_ME_CHECKENTER(SDRAM,SLE,SL,SEQ) do { \ -+ if ((SEQ) == ((SL)->sl_seq)) { \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } \ -+ else ELAN3_ME_SPINENTER(SLE,SL); \ -+} while (0) -+ -+#define ELAN3_ME_SPINEXIT(SDRAM,SLE,SL) do {\ -+ MEMBAR_STORESTORE(); \ -+ elan3_write64_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLocks), 0); \ -+ MEMBAR_STORESTORE(); \ -+ } while (0) -+ -+ -+/* Main/Main */ -+#define ELAN3_SPINENTER(L) do {\ -+ while (c_swap ((L), 1)) ; \ -+ } while (0) -+#define ELAN3_SPINEXIT(L) do {\ -+ c_swap((L), 0);\ -+ } while (0) -+#endif /* _ELAN3_ */ -+ -+#endif /* _ELAN3_SPINLOCK_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/thread.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/thread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/thread.h 2005-06-01 23:12:54.730419192 -0400 -@@ -0,0 +1,137 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_THREAD_H -+#define _ELAN3_THREAD_H -+ -+#ident "$Id: thread.h,v 1.17 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/thread.h,v $*/ -+ -+/* Alignment for a stack frame */ -+#define E3_STACK_ALIGN (64) -+ -+typedef struct _E3_Frame { -+ E3_uint32 fr_local[8]; /* saved locals (not used) */ -+ E3_uint32 fr_arg[6]; /* saved arguements o0 -> o5 */ -+ E3_Addr fr_savefp; /* saved frame pointer o6 */ -+ E3_Addr fr_savepc; /* saved program counter o7 */ -+ E3_Addr fr_stret; /* stuct return addr */ -+ E3_uint32 fr_argd[6]; /* arg dump area */ -+ E3_uint32 fr_argx[1]; /* array of args past the sixth */ -+} E3_Frame; -+ -+typedef struct _E3_Stack { -+ E3_uint32 Locals[8]; -+ E3_uint32 Ins[8]; -+ E3_uint32 Globals[8]; -+ E3_uint32 Outs[8]; -+} E3_Stack; -+ -+typedef struct _E3_OutsRegs { -+ E3_uint32 o[8]; /* o6 == pc, o7 == fptr */ -+} E3_OutsRegs; -+ -+/* -+ * "Magic" value for stack pointer to be ignored. -+ */ -+#define VanishingStackPointer 0x42 -+ -+ -+/* -+ * When the Elan traps the N & Z CC bits are held in the NPC -+ * and the V & C bits are in the PC -+ */ -+#define PSR_C_BIT (1) -+#define PSR_V_BIT (2) -+#define PSR_Z_BIT (1) -+#define PSR_N_BIT (2) -+#define CC_MASK (3) -+#define PC_MASK (~3) -+#define SP_MASK (~3) -+ -+/* -+ * Threads processor Opcodes. -+ */ -+#define OPCODE_MASK (0xC1F80000) -+#define OPCODE_IMM (1 << 13) -+ -+#define OPCODE_CLASS(instr) ((instr) & 0xC0000000) -+#define OPCODE_CLASS_0 0x00000000 -+#define OPCODE_CLASS_1 0x40000000 -+#define OPCODE_CLASS_2 0x80000000 -+#define OPCODE_CLASS_3 0xC0000000 -+ -+#define OPCODE_CPOP 0x81B00000 -+#define OPCODE_Ticc 0x81D00000 -+ -+#define OPCODE_FCODE_SHIFT 19 -+#define OPCODE_FCODE_MASK 0x1f -+#define OPCODE_NOT_ALUOP 0x01000000 -+ -+#define OPCODE_SLL 0x81280000 -+#define OPCODE_SRL 0x81300000 -+#define OPCODE_SRA 0x81380000 -+ -+#define OPCODE_OPEN 0x81600000 -+#define OPCODE_CLOSE 0x81680000 -+#define OPCODE_BREAKTEST 0x81700000 -+ -+#define OPCODE_BREAK 0x81a00000 -+#define OPCODE_SUSPEND 0x81a80000 -+#define OPCODE_WAIT 0x81b00000 -+ -+#define OPCODE_JMPL 0x81c00000 -+ -+#define OPCODE_LD 0xC0000000 -+#define OPCODE_LDD 0xC0180000 -+ -+#define OPCODE_LDBLOCK16 0xC0900000 -+#define OPCODE_LDBLOCK32 0xC0800000 -+#define OPCODE_LDBLOCK64 0xC0980000 -+ -+#define OPCODE_ST 0xC0200000 -+#define OPCODE_STD 0xC0380000 -+ -+#define OPCODE_SWAP 0xC0780000 -+ -+#define OPCODE_STBLOCK16 0xC0b00000 -+#define OPCODE_STBLOCK32 0xC0a00000 -+#define OPCODE_STBLOCK64 0xC0b80000 -+ -+#define OPCODE_CLASS0_MASK 0xC1C00000 -+#define OPCODE_SETHI 0x01000000 -+#define OPCODE_BICC 0x00800000 -+#define OPCODE_SENDREG 0x01800000 -+#define OPCODE_SENDMEM 0x01c00000 -+ -+#define OPCODE_BICC_BN 0x00000000 -+#define OPCODE_BICC_BE 0x02000000 -+#define OPCODE_BICC_BLE 0x04000000 -+#define OPCODE_BICC_BL 0x06000000 -+#define OPCODE_BICC_BLEU 0x08000000 -+#define OPCODE_BICC_BCS 0x0A000000 -+#define OPCODE_BICC_BNEG 0x0C000000 -+#define OPCODE_BICC_BVS 0x0E000000 -+ -+#define OPCODE_BICC_MASK 0x0E000000 -+#define OPCODE_BICC_ANNUL 0x20000000 -+ -+#define INSTR_RS2(instr) (((instr) >> 0) & 0x1F) -+#define INSTR_RS1(instr) (((instr) >> 14) & 0x1F) -+#define INSTR_RD(instr) (((instr) >> 25) & 0x1F) -+#define INSTR_IMM(instr) (((instr) & 0x1000) ? ((instr) & 0xFFF) | 0xFFFFF000 : (instr) & 0xFFF) -+ -+#define Ticc_COND(instr) INSTR_RD(instr) -+#define Ticc_TA 8 -+ -+#endif /* _ELAN3_THREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/threadlinkage.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/threadlinkage.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/threadlinkage.h 2005-06-01 23:12:54.730419192 -0400 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_THREADLINKAGE_H -+#define __ELAN3_THREADLINKAGE_H -+ -+#ident "$Id: threadlinkage.h,v 1.6 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/threadlinkage.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if defined(_ASM) || defined(__LANGUAGE_ASSEMBLY__) -+ -+/* -+ * Macro to define weak symbol aliases. These are similar to the ANSI-C -+ * #pragma weak name = _name -+ * except a compiler can determine type. The assembler must be told. Hence, -+ * the second parameter must be the type of the symbol (i.e.: function,...) -+ */ -+#define ANSI_PRAGMA_WEAK(sym, stype) \ -+ .weak sym; \ -+ .type sym, #stype; \ -+/* CSTYLED */ \ -+sym = _/**/sym -+ -+/* -+ * ENTRY provides the standard procedure entry code -+ */ -+#define ENTRY(x) \ -+ .section ".text"; \ -+ .align 4; \ -+ .global x; \ -+x: -+ -+/* -+ * ENTRY2 is identical to ENTRY but provides two labels for the entry point. -+ */ -+#define ENTRY2(x, y) \ -+ .section ".text"; \ -+ .align 4; \ -+ .global x, y; \ -+/* CSTYLED */ \ -+x: ; \ -+y: -+ -+ -+/* -+ * ALTENTRY provides for additional entry points. -+ */ -+#define ALTENTRY(x) \ -+ .global x; \ -+x: -+ -+/* -+ * DGDEF and DGDEF2 provide global data declarations. -+ * -+ * DGDEF provides a word aligned word of storage. -+ * -+ * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This -+ * implies this macro is best used for byte arrays. -+ * -+ * DGDEF3 allocates "sz" bytes of storage with "algn" alignment. -+ */ -+#define DGDEF2(name, sz) \ -+ .section ".data"; \ -+ .global name; \ -+ .size name, sz; \ -+name: -+ -+#define DGDEF3(name, sz, algn) \ -+ .section ".data"; \ -+ .align algn; \ -+ .global name; \ -+ .size name, sz; \ -+name: -+ -+#define DGDEF(name) DGDEF3(name, 4, 4) -+ -+/* -+ * SET_SIZE trails a function and set the size for the ELF symbol table. -+ */ -+#define SET_SIZE(x) \ -+ .size x, (.-x) -+ -+#endif /* _ASM || __LANGUAGE_ASSEMBLY__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_THREADLINKAGE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/threadsyscall.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/threadsyscall.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/threadsyscall.h 2005-06-01 23:12:54.730419192 -0400 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_SYSCALL_H -+#define __ELAN3_SYSCALL_H -+ -+#ident "$Id: threadsyscall.h,v 1.12 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/threadsyscall.h,v $*/ -+ -+/* -+ * This file contains the system calls supported from the Elan. -+ */ -+#define ELAN3_DEBUG_TRAPNUM 5 /* thread debugging trap */ -+#define ELAN3_ABORT_TRAPNUM 6 /* bad abort trap */ -+#define ELAN3_ELANCALL_TRAPNUM 7 /* elansyscall trap */ -+#define ELAN3_SYSCALL_TRAPNUM 8 /* new syscall trap */ -+ -+#define ELAN3_T_SYSCALL_CODE 0 /* offsets in struct elan3_t_syscall */ -+#define ELAN3_T_SYSCALL_ERRNO 4 -+ -+#define ELAN3_SYS_open 1 -+#define ELAN3_SYS_close 2 -+#define ELAN3_SYS_write 3 -+#define ELAN3_SYS_read 4 -+#define ELAN3_SYS_poll 5 -+#define ELAN3_SYS_ioctl 6 -+#define ELAN3_SYS_lseek 7 -+#define ELAN3_SYS_mmap 8 -+#define ELAN3_SYS_munmap 9 -+#define ELAN3_SYS_kill 10 -+#define ELAN3_SYS_getpid 11 -+ -+#if !defined(SYS_getpid) && defined(__NR_getxpid) -+#define SYS_getpid __NR_getxpid /* for linux */ -+#endif -+ -+#if !defined(_ASM) && !defined(__LANGUAGE_ASSEMBLY__) -+ -+extern int elan3_t_open (const char *, int, ...); -+extern ssize_t elan3_t_write (int, const void *, unsigned); -+extern ssize_t elan3_t_read(int, void *, unsigned); -+extern int elan3_t_ioctl(int, int, ...); -+extern int elan3_t_close(int); -+extern off_t elan3_t_lseek(int filedes, off_t offset, int whence); -+ -+extern caddr_t elan3_t_mmap(caddr_t, size_t, int, int, int, off_t); -+extern int elan3_t_munmap(caddr_t, size_t); -+ -+extern int elan3_t_getpid(void); -+extern void elan3_t_abort(char *str); -+ -+#endif /* !_ASM && ! __LANGUAGE_ASSEMBLY__ */ -+ -+#endif /* __ELAN3_SYSCALL_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/trtype.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/trtype.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/trtype.h 2005-06-01 23:12:54.731419040 -0400 -@@ -0,0 +1,116 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_TRTYPE_H -+#define _ELAN3_TRTYPE_H -+ -+#ident "$Id: trtype.h,v 1.13 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/trtype.h,v $ */ -+ -+/*<15> ackNow */ -+#define TR_SENDACK (1 << 15) -+ -+#define TR_SIZE_SHIFT 12 -+#define TR_SIZE_MASK 7 -+ -+/*<14:12> Size 0, 1, 2, 4, 8, 16, 32, 64 Double Words -+ Bit 14 is forced to zero currently so that only size 0, 1, 2, 4 are -+ allowed */ -+ -+#define TR_SIZE0 (0 << TR_SIZE_SHIFT) -+#define TR_SIZE1 (1 << TR_SIZE_SHIFT) -+#define TR_SIZE2 (2 << TR_SIZE_SHIFT) -+#define TR_SIZE4 (3 << TR_SIZE_SHIFT) -+#define TR_SIZE8 (4 << TR_SIZE_SHIFT) -+ -+#define TR_64_BIT_ADDR (1 << 11) -+#define TR_LAST_TRANS (1 << 10) -+ -+#define TR_WRITEBLOCK_BIT (1 << 9) -+#define TR_WRITEBLOCK (TR_WRITEBLOCK_BIT | TR_SIZE8) -+ -+ -+#define TR_WRITEBLOCK_SIZE 64 -+ -+/* -+ * write-block -+ */ -+/* WriteBlock <8:7> Data type -+ <6:0> Part write size */ -+#define TR_TYPE_SHIFT 7 -+#define TR_TYPE_MASK ((1 << 2) - 1) -+ -+#define TR_TYPE_BYTE 0 -+#define TR_TYPE_SHORT 1 -+#define TR_TYPE_WORD 2 -+#define TR_TYPE_DWORD 3 -+ -+#define TR_PARTSIZE_MASK ((1 << 7) -1) -+ -+#define TR_WAIT_FOR_EOP (1 << 8) -+ -+/* -+ * trace-route format -+ */ -+#define TR_TRACEROUTE0_CHANID(val) ((val) & 1) /* 0 Chan Id */ -+#define TR_TRACEROUTE0_LINKID(val) (((val) >> 1) & 7) /* 1:3 Link Id */ -+#define TR_TRACEROUTE0_REVID(val) (((val) >> 4) & 7) /* 4:6 Revision ID */ -+#define TR_TRACEROUTE0_BCAST_TOP_PIN(val) (((val) >> 7) & 1) /* 7 Broadcast Top Pin (REV B) */ -+#define TR_TRACEROUTE0_LNR(val) ((val) >> 8) /* 8:15 Global Link Not Ready */ -+ -+#define TR_TRACEROUTE1_PRIO(val) ((val & 0xF)) /* 0:3 Arrival Priority (REV A) */ -+#define TR_TRACEROUTE1_AGE(val) (((val) >> 4) & 0xF) /* 4:7 Priority Held(Age) (REV A) */ -+#define TR_TRACEROUTE1_ROUTE_SELECTED(val) ((val) & 0xFF) /* 0:7 Arrival age (REV B) */ -+#define TR_TRACEROUTE1_BCAST_TOP(val) (((val) >> 8) & 7) /* 8:10 Broadcast Top */ -+#define TR_TRACEROUTE1_ADAPT(val) (((val) >> 12) & 3) /* 12:13 This Adaptive Value (REV A) */ -+#define TR_TRACEROUTE1_BCAST_BOT(val) (((val) >> 12) & 7) /* 12:14 Broadcast Bottom (REV B) */ -+ -+#define TR_TRACEROUTE2_ARRIVAL_AGE(val) ((val) & 0xF) /* 0:3 Arrival Age (REV B) */ -+#define TR_TRACEROUTE2_CURR_AGE(val) (((val) >> 4) & 0xF) /* 4:7 Current Age (REV B) */ -+#define TR_TRACEROUTE2_BUSY(val) (((val) >> 8) & 0xFF) /* 8:15 Busy (REV B) */ -+ -+#define TR_TRACEROUTE_SIZE 32 -+#define TR_TRACEROUTE_ENTRIES (TR_TRACEROUTE_SIZE/2) -+ -+/* -+ * non-write block -+ */ -+#define TR_OPCODE_MASK (((1 << 8) - 1) | \ -+ (TR_SIZE_MASK << TR_SIZE_SHIFT) | \ -+ TR_WRITEBLOCK_BIT) -+ -+#define TR_NOP_TRANS (0x0 | TR_SIZE0) -+#define TR_SETEVENT (0x0 | TR_SIZE0 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_REMOTEDMA (0x1 | TR_SIZE4 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_LOCKQUEUE (0x2 | TR_SIZE0) -+#define TR_UNLOCKQUEUE (0x3 | TR_SIZE0 | TR_SENDACK | TR_LAST_TRANS) -+ -+#define TR_SENDDISCARD (0x4 | TR_SIZE0) -+#define TR_TRACEROUTE (0x5 | TR_SIZE4) -+ -+#define TR_DMAIDENTIFY (0x6 | TR_SIZE0) -+#define TR_THREADIDENTIFY (0x7 | TR_SIZE1) -+ -+#define TR_GTE (0x8 | TR_SIZE1) -+#define TR_LT (0x9 | TR_SIZE1) -+#define TR_EQ (0xA | TR_SIZE1) -+#define TR_NEQ (0xB | TR_SIZE1) -+ -+#define TR_WRITEWORD (0xC | TR_SIZE1) -+#define TR_WRITEDOUBLEWORD (0xD | TR_SIZE1) -+#define TR_TESTANDWRITE (0xE | TR_SIZE1) -+#define TR_ATOMICADDWORD (0xF | TR_SIZE1 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_OPCODE_TYPE_MASK 0xff -+ -+ -+#endif /* notdef _ELAN3_TRTYPE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/urom_addrs.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/urom_addrs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/urom_addrs.h 2005-06-01 23:12:54.731419040 -0400 -@@ -0,0 +1,262 @@ -+#define MI_WaitForRemoteDescRead 0x0 -+#define MI_WaitForRemoteDescRead2 0x1 -+#define MI_WaitForRemoteDescRead2_seq1 0x2 -+#define MI_SendRemoteDmaRoutes 0x3 -+#define MI_IProcTrapped 0x4 -+#define MI_DProcTrapped 0x5 -+#define MI_CProcTrapped 0x6 -+#define MI_TProcTrapped 0x7 -+#define MI_TestWhichDmaQueue 0x8 -+#define MI_TestWhichDmaQueue_seq1 0x9 -+#define MI_InputRemoteDmaUpdateBPtr 0xa -+#define MI_FixupQueueContextAndRemoteBit 0xb -+#define MI_FixupQueueContextAndRemoteBit_seq1 0xc -+#define MI_FixupQueueContextAndRemoteBit_seq2 0xd -+#define MI_FixupQueueContextAndRemoteBit_seq3 0xe -+#define MI_FixupQueueContextAndRemoteBit_seq4 0xf -+#define MI_RunDmaCommand 0x10 -+#define MI_DoSendRemoteDmaDesc 0x11 -+#define MI_DequeueNonSysCntxDma 0x12 -+#define MI_WaitForRemoteDescRead1 0x13 -+#define MI_RemoteDmaCommand 0x14 -+#define MI_WaitForRemoteRoutes 0x15 -+#define MI_DequeueSysCntxDma 0x16 -+#define MI_ExecuteDmaDescriptorForQueue 0x17 -+#define MI_ExecuteDmaDescriptor1 0x18 -+#define MI_ExecuteDmaDescriptor1_seq1 0x19 -+#define MI_ExecuteDmaDescriptor1_seq2 0x1a -+#define MI_ExecuteDmaDescriptor1_seq3 0x1b -+#define MI_GetNewSizeInProg 0x1c -+#define MI_GetNewSizeInProg_seq1 0x1d -+#define MI_FirstBlockRead 0x1e -+#define MI_ExtraFirstBlockRead 0x1f -+#define MI_UnimplementedError 0x20 -+#define MI_UpdateDescriptor 0x21 -+#define MI_UpdateDescriptor_seq1 0x22 -+#define MI_UpdateDescriptor_seq2 0x23 -+#define MI_UpdateDescriptor_seq3 0x24 -+#define MI_UpdateDescriptor_seq4 0x25 -+#define MI_UpdateDescriptor_seq5 0x26 -+#define MI_GetNextSizeInProg 0x27 -+#define MI_DoStopThisDma 0x28 -+#define MI_DoStopThisDma_seq1 0x29 -+#define MI_GenNewBytesToRead 0x2a -+#define MI_WaitForEventReadTy1 0x2b -+#define MI_WaitUpdateEvent 0x2c -+#define MI_WaitUpdateEvent_seq1 0x2d -+#define MI_DoSleepOneTickThenRunable 0x2e -+#define MI_RunEvent 0x2f -+#define MI_EnqueueThread 0x30 -+#define MI_CheckContext0 0x31 -+#define MI_EnqueueDma 0x32 -+#define MI_CprocTrapping 0x33 -+#define MI_CprocTrapping_seq1 0x34 -+#define MI_WaitForRemoteRoutes1 0x35 -+#define MI_SetEventCommand 0x36 -+#define MI_DoSetEvent 0x37 -+#define MI_DoRemoteSetEventNowOrTrapQueueingDma 0x38 -+#define MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1 0x39 -+#define MI_SendRemoteDmaRoutes2 0x3a -+#define MI_WaitForRemoteRoutes2 0x3b -+#define MI_WaitEventCommandTy0 0x3c -+#define MI_DequeueNonSysCntxDma2 0x3d -+#define MI_WaitEventCommandTy1 0x3e -+#define MI_WaitEventCommandTy1_seq1 0x3f -+#define MI_DequeueNonSysCntxThread 0x40 -+#define MI_DequeueSysCntxDma1 0x41 -+#define MI_DequeueSysCntxThread 0x42 -+#define MI_TestNonSysCntxDmaQueueEmpty 0x43 -+#define MI_TestNonSysCntxDmaQueueEmpty_seq1 0x44 -+#define MI_TestNonSysCntxDmaQueueEmpty_seq2 0x45 -+#define MI_RunThreadCommand 0x46 -+#define MI_SetEventWaitForLastAcess 0x47 -+#define MI_SetEventReadWait 0x48 -+#define MI_SetEventReadWait_seq1 0x49 -+#define MI_TestEventType 0x4a -+#define MI_TestEventType_seq1 0x4b -+#define MI_TestEventBit2 0x4c -+#define MI_DmaDescOrBlockCopyOrChainedEvent 0x4d -+#define MI_RunThread 0x4e -+#define MI_RunThread1 0x4f -+#define MI_RunThread1_seq1 0x50 -+#define MI_IncDmaSysCntxBPtr 0x51 -+#define MI_IncDmaSysCntxBPtr_seq1 0x52 -+#define MI_IncDmaSysCntxBPtr_seq2 0x53 -+#define MI_WaitForCntxDmaDescRead 0x54 -+#define MI_FillInContext 0x55 -+#define MI_FillInContext_seq1 0x56 -+#define MI_WriteNewDescToQueue 0x57 -+#define MI_WriteNewDescToQueue_seq1 0x58 -+#define MI_TestForQueueWrap 0x59 -+#define MI_TestForQueueWrap_seq1 0x5a -+#define MI_TestQueueIsFull 0x5b -+#define MI_TestQueueIsFull_seq1 0x5c -+#define MI_TestQueueIsFull_seq2 0x5d -+#define MI_CheckPsychoShitFixup 0x5e -+#define MI_PsychoShitFixupForcedRead 0x5f -+#define MI_PrepareDMATimeSlice 0x60 -+#define MI_PrepareDMATimeSlice_seq1 0x61 -+#define MI_TProcRestartFromTrapOrTestEventBit2 0x62 -+#define MI_TProcRestartFromTrapOrTestEventBit2_seq1 0x63 -+#define MI_WaitForGlobalsRead 0x64 -+#define MI_WaitForNPCRead 0x65 -+#define MI_EventInterrupt 0x66 -+#define MI_EventInterrupt_seq1 0x67 -+#define MI_EventInterrupt_seq2 0x68 -+#define MI_EventInterrupt_seq3 0x69 -+#define MI_TestSysCntxDmaQueueEmpty 0x6a -+#define MI_TestSysCntxDmaQueueEmpty_seq1 0x6b -+#define MI_TestIfRemoteDesc 0x6c -+#define MI_DoDmaLocalSetEvent 0x6d -+#define MI_DoDmaLocalSetEvent_seq1 0x6e -+#define MI_DoDmaLocalSetEvent_seq2 0x6f -+#define MI_DmaLoop1 0x70 -+#define MI_ExitDmaLoop 0x71 -+#define MI_ExitDmaLoop_seq1 0x72 -+#define MI_RemoteDmaTestPAckType 0x73 -+#define MI_PacketDiscardOrTestFailRecIfCCis0 0x74 -+#define MI_PacketDiscardOrTestFailRecIfCCis0_seq1 0x75 -+#define MI_TestNackFailIsZero2 0x76 -+#define MI_TestNackFailIsZero3 0x77 -+#define MI_DmaFailCountError 0x78 -+#define MI_TestDmaForSysCntx 0x79 -+#define MI_TestDmaForSysCntx_seq1 0x7a -+#define MI_TestDmaForSysCntx_seq2 0x7b -+#define MI_TestAeqB2 0x7c -+#define MI_TestAeqB2_seq1 0x7d -+#define MI_GetNextDmaDescriptor 0x7e -+#define MI_DequeueSysCntxDma2 0x7f -+#define MI_InputSetEvent 0x80 -+#define MI_PutBackSysCntxDma 0x81 -+#define MI_PutBackSysCntxDma_seq1 0x82 -+#define MI_PutBackSysCntxDma_seq2 0x83 -+#define MI_InputRemoteDma 0x84 -+#define MI_InputRemoteDma_seq1 0x85 -+#define MI_WaitOneTickForWakeup1 0x86 -+#define MI_SendRemoteDmaDesc 0x87 -+#define MI_InputLockQueue 0x88 -+#define MI_CloseTheTrappedPacketIfCCis1 0x89 -+#define MI_CloseTheTrappedPacketIfCCis1_seq1 0x8a -+#define MI_PostDmaInterrupt 0x8b -+#define MI_InputUnLockQueue 0x8c -+#define MI_WaitForUnLockDescRead 0x8d -+#define MI_SendEOPforRemoteDma 0x8e -+#define MI_LookAtRemoteAck 0x8f -+#define MI_InputWriteBlockQueue 0x90 -+#define MI_WaitForSpStore 0x91 -+#define MI_TProcNext 0x92 -+#define MI_TProcStoppedRunning 0x93 -+#define MI_InputWriteBlock 0x94 -+#define MI_RunDmaOrDeqNonSysCntxDma 0x95 -+#define MI_ExecuteDmaDescriptorForRun 0x96 -+#define MI_ConfirmQueueLock 0x97 -+#define MI_DmaInputIdentify 0x98 -+#define MI_TProcStoppedRunning2 0x99 -+#define MI_TProcStoppedRunning2_seq1 0x9a -+#define MI_TProcStoppedRunning2_seq2 0x9b -+#define MI_ThreadInputIdentify 0x9c -+#define MI_InputIdWriteAddrAndType3 0x9d -+#define MI_IProcTrappedWriteStatus 0x9e -+#define MI_FinishTrappingEop 0x9f -+#define MI_InputTestTrans 0xa0 -+#define MI_TestAeqB3 0xa1 -+#define MI_ThreadUpdateNonSysCntxBack 0xa2 -+#define MI_ThreadQueueOverflow 0xa3 -+#define MI_RunContext0Thread 0xa4 -+#define MI_RunContext0Thread_seq1 0xa5 -+#define MI_RunContext0Thread_seq2 0xa6 -+#define MI_RunDmaDesc 0xa7 -+#define MI_RunDmaDesc_seq1 0xa8 -+#define MI_RunDmaDesc_seq2 0xa9 -+#define MI_TestAeqB 0xaa -+#define MI_WaitForNonCntxDmaDescRead 0xab -+#define MI_DmaQueueOverflow 0xac -+#define MI_BlockCopyEvent 0xad -+#define MI_BlockCopyEventReadBlock 0xae -+#define MI_BlockCopyWaitForReadData 0xaf -+#define MI_InputWriteWord 0xb0 -+#define MI_TraceSetEvents 0xb1 -+#define MI_TraceSetEvents_seq1 0xb2 -+#define MI_TraceSetEvents_seq2 0xb3 -+#define MI_InputWriteDoubleWd 0xb4 -+#define MI_SendLockTransIfCCis1 0xb5 -+#define MI_WaitForDmaRoutes1 0xb6 -+#define MI_LoadDmaContext 0xb7 -+#define MI_InputTestAndSetWord 0xb8 -+#define MI_InputTestAndSetWord_seq1 0xb9 -+#define MI_GetDestEventValue 0xba -+#define MI_SendDmaIdentify 0xbb -+#define MI_InputAtomicAddWord 0xbc -+#define MI_LoadBFromTransD0 0xbd -+#define MI_ConditionalWriteBackCCTrue 0xbe -+#define MI_WaitOneTickForWakeup 0xbf -+#define MI_SendFinalUnlockTrans 0xc0 -+#define MI_SendDmaEOP 0xc1 -+#define MI_GenLastAddrForPsycho 0xc2 -+#define MI_FailedAckIfCCis0 0xc3 -+#define MI_FailedAckIfCCis0_seq1 0xc4 -+#define MI_WriteDmaSysCntxDesc 0xc5 -+#define MI_TimesliceDmaQueueOverflow 0xc6 -+#define MI_DequeueNonSysCntxThread1 0xc7 -+#define MI_DequeueNonSysCntxThread1_seq1 0xc8 -+#define MI_TestThreadQueueEmpty 0xc9 -+#define MI_ClearThreadQueueIfCC 0xca -+#define MI_DequeueSysCntxThread1 0xcb -+#define MI_DequeueSysCntxThread1_seq1 0xcc -+#define MI_TProcStartUpGeneric 0xcd -+#define MI_WaitForPCload2 0xce -+#define MI_WaitForNPCWrite 0xcf -+#define MI_WaitForEventWaitAddr 0xd0 -+#define MI_WaitForWaitEventAccess 0xd1 -+#define MI_WaitForWaitEventAccess_seq1 0xd2 -+#define MI_WaitForWaitEventDesc 0xd3 -+#define MI_WaitForEventReadTy0 0xd4 -+#define MI_SendCondTestFail 0xd5 -+#define MI_InputMoveToNextTrans 0xd6 -+#define MI_ThreadUpdateSysCntxBack 0xd7 -+#define MI_FinishedSetEvent 0xd8 -+#define MI_EventIntUpdateBPtr 0xd9 -+#define MI_EventQueueOverflow 0xda -+#define MI_MaskLowerSource 0xdb -+#define MI_DmaLoop 0xdc -+#define MI_SendNullSetEvent 0xdd -+#define MI_SendFinalSetEvent 0xde -+#define MI_TestNackFailIsZero1 0xdf -+#define MI_DmaPacketTimedOutOrPacketError 0xe0 -+#define MI_NextPacketIsLast 0xe1 -+#define MI_TestForZeroLengthDma 0xe2 -+#define MI_WaitForPCload 0xe3 -+#define MI_ReadInIns 0xe4 -+#define MI_WaitForInsRead 0xe5 -+#define MI_WaitForLocals 0xe6 -+#define MI_WaitForOutsWrite 0xe7 -+#define MI_WaitForWaitEvWrBack 0xe8 -+#define MI_WaitForLockRead 0xe9 -+#define MI_TestQueueLock 0xea -+#define MI_InputIdWriteAddrAndType 0xeb -+#define MI_InputIdWriteAddrAndType2 0xec -+#define MI_ThreadInputIdentify2 0xed -+#define MI_WriteIntoTrapArea0 0xee -+#define MI_GenQueueBlockWrAddr 0xef -+#define MI_InputDiscardFreeLock 0xf0 -+#define MI_WriteIntoTrapArea1 0xf1 -+#define MI_WriteIntoTrapArea2 0xf2 -+#define MI_ResetBPtrToBase 0xf3 -+#define MI_InputDoTrap 0xf4 -+#define MI_RemoteDmaCntxt0Update 0xf5 -+#define MI_ClearQueueLock 0xf6 -+#define MI_IProcTrappedBlockWriteData 0xf7 -+#define MI_FillContextFilter 0xf8 -+#define MI_IProcTrapped4 0xf9 -+#define MI_RunSysCntxDma 0xfa -+#define MI_ChainedEventError 0xfb -+#define MI_InputTrappingEOP 0xfc -+#define MI_CheckForRunIfZero 0xfd -+#define MI_TestForBreakOrSuspend 0xfe -+#define MI_SwapForRunable 0xff -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/vmseg.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/vmseg.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/vmseg.h 2005-06-01 23:12:54.732418888 -0400 -@@ -0,0 +1,75 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _VM_SEG_ELAN3_H -+#define _VM_SEG_ELAN3_H -+ -+#ident "$Id: vmseg.h,v 1.20 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/vmseg.h,v $*/ -+ -+#include -+ -+/* -+ * This segment maps Elan registers, it is fixed size and has 8K -+ * pages split up as follows -+ * -+ * ---------------------------------------- -+ * | Performance Counters (read-only) | -+ * ---------------------------------------- -+ * | Flag Page (read-only) | -+ * ---------------------------------------- -+ * | Command Port | -+ * ---------------------------------------- -+ */ -+typedef volatile struct elan3_flagstats -+{ -+ u_int CommandFlag; -+ u_int PageFaults; -+ u_int CProcTraps; -+ u_int DProcTraps; -+ u_int TProcTraps; -+ u_int IProcTraps; -+ u_int EopBadAcks; -+ u_int EopResets; -+ u_int DmaNetworkErrors; -+ u_int DmaIdentifyNetworkErrors; -+ u_int ThreadIdentifyNetworkErrors; -+ u_int DmaRetries; -+ u_int ThreadSystemCalls; -+ u_int ThreadElanCalls; -+ u_int LoadVirtualProcess; -+} ELAN3_FLAGSTATS; -+ -+#ifdef DIGITAL_UNIX -+typedef volatile union elan3_flagpage -+{ -+ u_char Padding[8192]; -+ ELAN3_FLAGSTATS Stats; -+} ELAN3_FLAGPAGE; -+ -+typedef volatile struct elan3_vmseg -+{ -+ E3_CommandPort CommandPort; -+ ELAN3_FLAGPAGE FlagPage; -+ E3_User_Regs UserRegs; -+} ELAN3_VMSEG; -+ -+#define SEGELAN3_SIZE (sizeof (ELAN3_VMSEG)) -+ -+#define SEGELAN3_COMMAND_PORT 0 -+#define SEGELAN3_FLAG_PAGE 1 -+#define SEGELAN3_PERF_COUNTERS 2 -+ -+#endif /* DIGITAL_UNIX */ -+ -+#endif /* _VM_SEG_ELAN3_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan3/vpd.h -=================================================================== ---- linux-2.4.21.orig/include/elan3/vpd.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan3/vpd.h 2005-06-01 23:12:54.732418888 -0400 -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: vpd.h,v 1.5 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/vpd.h,v $*/ -+ -+#ifndef __ELAN3_VPD_H -+#define __ELAN3_VPD_H -+ -+#define LARGE_RESOURCE_BIT 0x80 -+ -+#define SMALL_RESOURCE_COMPATIBLE_DEVICE_ID 0x3 -+#define SMALL_RESOURCE_VENDOR_DEFINED 0xE -+#define SMALL_RESOURCE_END_TAG 0xF -+ -+#define LARGE_RESOURCE_STRING 0x2 -+#define LARGE_RESOURCE_VENDOR_DEFINED 0x4 -+#define LARGE_RESOURCE_VITAL_PRODUCT_DATA 0x10 -+ -+#define VPD_PART_NUMBER "PN" -+#define VPD_FRU_PART_NUMBER "FN" -+#define VPD_EC_LEVEL "EC" -+#define VPD_MANUFACTURE_ID "MN" -+#define VPD_SERIAL_NUMBER "SN" -+ -+#define VPD_LOAD_ID "LI" -+#define VPD_ROM_LEVEL "RL" -+#define VPD_ALTERABLE_ROM_LEVEL "RM" -+#define VPD_NETWORK_ADDRESS "NA" -+#define VPD_DEVICE_DRIVER_LEVEL "DD" -+#define VPD_DIAGNOSTIC_LEVEL "DG" -+#define VPD_LOADABLE_MICROCODE_LEVEL "LL" -+#define VPD_VENDOR_ID "VI" -+#define VPD_FUNCTION_NUMBER "FU" -+#define VPD_SUBSYSTEM_VENDOR_ID "SI" -+ -+#endif /* __ELAN3_VPD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/elan4/commands.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/commands.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/commands.h 2005-06-01 23:12:54.733418736 -0400 -@@ -0,0 +1,247 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_COMMANDS_H -+#define __ELAN4_COMMANDS_H -+ -+#ident "$Id: commands.h,v 1.29 2004/06/16 15:45:02 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/commands.h,v $*/ -+ -+/* -+ * This header file describes the command format for the Elan 4 -+ * See CommandFormat.doc -+ */ -+ -+/* -+ * Number of channels in traced elanlib_trace.c -+ */ -+#define TRACE_MAX_CHANNELS 2 -+ -+/* -+ * Define encoding for the commands issued into the command queues -+ */ -+#define RUN_THREAD_CMD 0x00 -+#define OPEN_STEN_PKT_CMD 0x01 -+#define WRITE_DWORD_CMD 0x02 -+#define ADD_DWORD_CMD 0x03 -+#define COPY64_CMD 0x05 -+#define GUARD_CMD 0x06 -+#define SET_EVENT_CMD 0x07 -+#define SEND_TRANS_CMD 0x09 -+#define INTERRUPT_CMD 0x0d -+#define RUN_DMA_CMD 0x0e -+#define SET_EVENTN_CMD 0x0f -+#define NOP_CMD 0x17 -+#define MAKE_EXT_CLEAN_CMD 0x37 -+#define WAIT_EVENT_CMD 0x1f -+ -+/* -+ * Define the portion of the data word the user is NOT -+ * allowed to use. This varies with Commmand type -+ */ -+#define RUN_THREAD_CMD_MASK 0x03 -+#define OPEN_STEN_PKT_CMD_MASK 0x0f -+#define WRITE_DWORD_CMD_MASK 0x07 -+#define ADD_DWORD_CMD_MASK 0x07 -+#define COPY64_CMD_MASK 0x0f -+#define GUARD_CMD_MASK 0x0f -+#define SET_EVENT_CMD_MASK 0x1f -+#define SEND_TRANS_CMD_MASK 0x1f -+#define INTERRUPT_CMD_MASK 0x0f -+#define RUN_DMA_CMD_MASK 0x0f -+#define SET_EVENTN_CMD_MASK 0x1f -+#define NOP_CMD_MASK 0x3f -+#define MAKE_EXT_CLEAN_MASK 0x3f -+#define WAIT_EVENT_CMD_MASK 0x1f -+ -+#define COPY64_DATA_TYPE_SHIFT 0x4 -+#define COPY64_DTYPE_BYTE (0 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_SHORT (1 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_WORD (2 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_LONG (3 << COPY64_DATA_TYPE_SHIFT) -+ -+/* -+ * SET_EVENTN - word 1 has following form -+ * [63:5] Event Address -+ * [4:0] Part Set Value. -+ */ -+#define SET_EVENT_PART_SET_MASK 0x1f -+ -+/* OPEN_STEN_PKT_CMD -+ * [63:32] Vproc -+ * [31] Use Test -+ * [30:28] unused -+ * [27:21] Test Acceptable PAck code -+ * [20:16] Test Ack Channel Number -+ * [15:9] Acceptable PAck code -+ * [8:4] Ack Channel Number (1 bit on Elan4) -+ * [3:0] Command type -+ */ -+/* Acceptable PAck code */ -+#define PACK_OK (1 << 0) -+#define PACK_TESTFAIL (1 << 1) -+#define PACK_DISCARD (1 << 2) -+#define RESTART_COUNT_ZERO (1 << 3) -+#define PACK_ERROR (1 << 7) -+#define PACK_TIMEOUT (1 << 8) -+ -+/* -+ *#ifndef USE_DIRTY_COMMANDS -+ *#define USE_DIRTY_COMMANDS -+ *#endif -+ */ -+#ifdef USE_DIRTY_COMMANDS -+#define OPEN_PACKET_USED_MASK 0x00000000780f00e0ULL -+#define SEND_TRANS_USED_MASK 0xffffffff0000fff0ULL -+#define COPY64_WRITE_USED_MASK 0x000000000000000fULL -+#define MAIN_INT_USED_MASK 0x0000000000003ff0ULL -+#define GUARD_USED_MASK 0xfffffe007000fde0ULL -+#define DMA_TYPESIZE_USED_MASK 0x000000000000fff0ULL -+#define SETEVENTN_USED_MASK 0xffffffffffffffe0ULL -+#define NOP_USED_MASK 0xffffffffffffffc0ULL -+#define EXT_CLEAN_USED_MASK 0xffffffffffffffc0ULL -+#define WAIT_CNT_TYPE_USED_MASK 0x00000000fffff800ULL -+#else -+#define OPEN_PACKET_USED_MASK 0x0ULL -+#define SEND_TRANS_USED_MASK 0x0ULL -+#define COPY64_WRITE_USED_MASK 0x0ULL -+#define MAIN_INT_USED_MASK 0x0ULL -+#define GUARD_USED_MASK 0x0ULL -+#define DMA_TYPESIZE_USED_MASK 0x0ULL -+#define SETEVENTN_USED_MASK 0x0ULL -+#define NOP_USED_MASK 0x0ULL -+#define EXT_CLEAN_USED_MASK 0x0ULL -+#define WAIT_CNT_TYPE_USED_MASK 0x0ULL -+#endif -+ -+#define OPEN_PACKET(chan, code, vproc) \ -+ ((((chan) & 1) << 4) | (((code) & 0x7f) << 9) | ((E4_uint64)(vproc) << 32) | OPEN_STEN_PKT_CMD) -+ -+#define OPEN_PACKET_TEST(chan, code, vproc, tchan, tcode) \ -+ ((((chan) & 1) << 4) | (((code) & 0x7f) << 9) | ((E4_uint64)(vproc) << 32) | \ -+ (((tchan) & 1) << 16) | (((tcode) & 0x7f) << 21) | (((E4_uint64) 1) << 31) | OPEN_STEN_PKT_CMD) -+ -+/* -+ * GUARD_CMD -+ * [63:41] unused -+ * [40] Reset Restart Fail Count // only performed if the Guard executes the next command. -+ * [39:32] New Restart Fail Count value -+ * [31] Use Test -+ * [30:28] unused -+ * [27:21] Test Acceptable PAck code -+ * [20:16] Test Ack Channel Number -+ * [15:9] unused -+ * [8:4] Ack Channel Number -+ * [3:0] Command type -+ */ -+/* GUARD_CHANNEL(chan) -+ */ -+#define GUARD_ALL_CHANNELS ((1 << 9) | GUARD_CMD) -+#define GUARD_CHANNEL(chan) ((((chan) & 1) << 4) | GUARD_CMD) -+#define GUARD_TEST(chan,code) ((1ull << 31) | (((code) & 0x7f) << 21) | (((chan) & 1) << 16)) -+#define GUARD_RESET(count) ((1ull << 40) | ((((E4_uint64) count) & 0xff) << 32)) -+ -+#define GUARD_CHANNEL_TEST(chan,tchan,tcode) \ -+ ((((chan) & 1) << 4) | (((tchan) & 1) << 16) | (((tcode) & 0x7f) << 21) | \ -+ (((E4_uint64) 1) << 31) | GUARD_CMD) -+ -+/* -+ * SEND_TRANS_CMD -+ * [63:32] unused -+ * [31:16] transaction type -+ * [15:4] unused -+ * [3:0] Command type -+ */ -+#define SEND_TRANS(TransType) (((TransType) << 16) | SEND_TRANS_CMD) -+ -+/* -+ * Command port trace debug levels -+ */ -+#define TRACE_CMD_BUFFER 0x01 -+#define TRACE_CMD_TYPE 0x02 -+#define TRACE_CHANNEL_OPENS 0x04 -+#define TRACE_GUARDED_ATOMICS 0x08 -+#define TRACE_CMD_TIMEOUT 0x10 -+ -+/* -+ * Commands that should be preceeded by a GUARD_CMD. -+ */ -+#define IS_ATOMIC_CMD(cmd) \ -+ ((cmd) == RUN_THREAD_CMD || (cmd) == ADD_DWORD_CMD || (cmd) == INTERRUPT_CMD || \ -+ (cmd) == RUN_DMA_CMD || (cmd) == SET_EVENT_CMD || (cmd) == SET_EVENTN_CMD || \ -+ (cmd) == WAIT_EVENT_CMD) -+ -+#ifndef _ASM -+ -+/* -+ * These structures are used to build event copy command streams. They are intended to be included -+ * in a larger structure to form a self documenting command sequence that can be easily coped and manipulated. -+ */ -+ -+typedef struct e4_runthreadcmd -+{ -+ E4_Addr PC; -+ E4_uint64 r[6]; -+} E4_RunThreadCmd; -+ -+typedef E4_uint64 E4_OpenCmd; -+ -+typedef struct e4_writecmd -+{ -+ E4_Addr WriteAddr; -+ E4_uint64 WriteValue; -+} E4_WriteCmd; -+ -+typedef struct e4_addcmd -+{ -+ E4_Addr AddAddr; -+ E4_uint64 AddValue; -+} E4_AddCmd; -+ -+typedef struct e4_copycmd -+{ -+ E4_Addr SrcAddr; -+ E4_Addr DstAddr; -+} E4_CopyCmd; -+ -+typedef E4_uint64 E4_GaurdCmd; -+typedef E4_uint64 E4_SetEventCmd; -+ -+/* -+ * The data to this command must be declared as a vector after the use of this. -+ */ -+typedef struct e4_sendtranscmd -+{ -+ E4_Addr Type; -+ E4_Addr Addr; -+} E4_SendTransCmd; -+ -+typedef E4_uint64 E4_IntCmd; -+ -+/* The normal Dma struc can be used here. */ -+ -+typedef struct e4_seteventncmd -+{ -+ E4_Addr Event; -+ E4_Addr SetCount; -+} E4_SetEventNCmd; -+ -+typedef E4_uint64 E4_NopCmd; -+typedef E4_uint64 E4_MakeExtCleanCmd; -+ -+typedef struct e4_waitcmd -+{ -+ E4_Addr ev_Event; -+ E4_Addr ev_CountType; -+ E4_Addr ev_Params[2]; -+} E4_WaitCmd; -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_COMMANDS_H */ -+ -Index: linux-2.4.21/include/elan4/debug.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/debug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/debug.h 2005-06-01 23:12:54.733418736 -0400 -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_ELANDEBUG_H -+#define _ELAN4_ELANDEBUG_H -+ -+#ident "$Id: debug.h,v 1.19.6.1 2005/01/18 14:36:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/debug.h,v $ */ -+ -+/* values for "type" field - note a "ctxt" is permissible */ -+/* and BUFFER/CONSOLE are for explict calls to elan4_debugf() */ -+#define DBG_DEVICE ((void *) 0) -+#define DBG_USER ((void *) 1) -+ -+#define DBG_BUFFER ((void *) 62) -+#define DBG_CONSOLE ((void *) 63) -+#define DBG_NTYPES 64 -+ -+/* values for "mode" field */ -+#define DBG_CONFIG 0x00000001 -+#define DBG_INTR 0x00000002 -+#define DBG_MAININT 0x00000004 -+#define DBG_SDRAM 0x00000008 -+#define DBG_MMU 0x00000010 -+#define DBG_REGISTER 0x00000020 -+#define DBG_CQ 0x00000040 -+#define DBG_NETWORK_CTX 0x00000080 -+ -+#define DBG_FLUSH 0x00000100 -+#define DBG_FILE 0x00000200 -+#define DBG_CONTROL 0x00000400 -+#define DBG_MEM 0x00000800 -+ -+#define DBG_PERM 0x00001000 -+#define DBG_FAULT 0x00002000 -+#define DBG_SWAP 0x00004000 -+#define DBG_TRAP 0x00008000 -+#define DBG_DDCQ 0x00010000 -+#define DBG_VP 0x00020000 -+#define DBG_RESTART 0x00040000 -+#define DBG_RESUME 0x00080000 -+#define DBG_CPROC 0x00100000 -+#define DBG_DPROC 0x00200000 -+#define DBG_EPROC 0x00400000 -+#define DBG_IPROC 0x00800000 -+#define DBG_TPROC 0x01000000 -+#define DBG_IOPROC 0x02000000 -+#define DBG_ROUTE 0x04000000 -+#define DBG_NETERR 0x08000000 -+ -+#define DBG_ALL 0x7FFFFFFF -+ -+ -+#ifdef DEBUG_PRINTF -+ -+# define PRINTF0(type,m,fmt) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt) : (void)0) -+# define PRINTF1(type,m,fmt,a) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a) : (void)0) -+# define PRINTF2(type,m,fmt,a,b) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b) : (void)0) -+# define PRINTF3(type,m,fmt,a,b,c) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c) : (void)0) -+# define PRINTF4(type,m,fmt,a,b,c,d) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d) : (void)0) -+# define PRINTF5(type,m,fmt,a,b,c,d,e) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e) : (void)0) -+# define PRINTF6(type,m,fmt,a,b,c,d,e,f) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f) : (void)0) -+# define PRINTF7(type,m,fmt,a,b,c,d,e,f,g) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g) : (void)0) -+# define PRINTF8(type,m,fmt,a,b,c,d,e,f,g,h) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define PRINTF9(type,m,fmt,a,b,c,d,e,f,g,h,i) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g,h,i): (void)0) -+#ifdef __GNUC__ -+# define PRINTF(type,m,args...) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m, ##args) : (void)0) -+#endif -+# define DBGCMD(type,m,cmd) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? (void) (cmd) : (void) 0) -+ -+#else -+ -+# define PRINTF0(type,m,fmt) (0) -+# define PRINTF1(type,m,fmt,a) (0) -+# define PRINTF2(type,m,fmt,a,b) (0) -+# define PRINTF3(type,m,fmt,a,b,c) (0) -+# define PRINTF4(type,m,fmt,a,b,c,d) (0) -+# define PRINTF5(type,m,fmt,a,b,c,d,e) (0) -+# define PRINTF6(type,m,fmt,a,b,c,d,e,f) (0) -+# define PRINTF7(type,m,fmt,a,b,c,d,e,f,g) (0) -+# define PRINTF8(type,m,fmt,a,b,c,d,e,f,g,h) (0) -+# define PRINTF9(type,m,fmt,a,b,c,d,e,f,g,h,i) (0) -+#ifdef __GNUC__ -+# define PRINTF(type,m,args...) -+#endif -+# define DBGCMD(type,m,cmd) ((void) 0) -+ -+#endif /* DEBUG_PRINTF */ -+ -+extern unsigned elan4_debug; -+extern unsigned elan4_debug_toconsole; -+extern unsigned elan4_debug_tobuffer; -+extern unsigned elan4_debug_display_ctxt; -+extern unsigned elan4_debug_ignore_ctxt; -+extern unsigned elan4_debug_ignore_type; -+ -+extern void elan4_debug_init(void); -+extern void elan4_debug_fini(void); -+extern void elan4_debugf (void *type, int mode, char *fmt, ...); -+extern int elan4_debug_snapshot (caddr_t ubuffer, int len); -+extern int elan4_debug_display (void); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN4_ELANDEBUG_H */ -Index: linux-2.4.21/include/elan4/device.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/device.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/device.h 2005-06-01 23:12:54.735418432 -0400 -@@ -0,0 +1,781 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_ELANDEV_H -+#define __ELAN4_ELANDEV_H -+ -+#ident "$Id: device.h,v 1.68.2.1 2004/11/03 14:24:32 duncant Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device.h,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_MPSAS -+#include -+#endif -+ -+#if defined(LINUX) -+#include -+#elif defined(TRU64UNIX) -+#include -+#elif defined(SOLARIS) -+#include -+#endif -+ -+/* -+ * Network context number allocation. -+ * [0] neterr fixup system context -+ * [1] kernel comms system context -+ * [2048-4095] kernel comms data contexts -+ */ -+#define ELAN4_NETERR_CONTEXT_NUM 0x00 /* network error fixup context number */ -+#define ELAN4_KCOMM_CONTEXT_NUM 0x01 /* kernel comms context number */ -+#define ELAN4_KCOMM_BASE_CONTEXT_NUM 0x800 /* kernel comms data transfer contexts */ -+#define ELAN4_KCOMM_TOP_CONTEXT_NUM 0xfff -+ -+#define ELAN4_SYSTEM_CONTEXT(ctx) ((ctx) >= ELAN4_KCOMM_BASE_CONTEXT_NUM) -+ -+typedef void (ELAN4_HALTFN)(struct elan4_dev *dev, void *arg); -+ -+typedef struct elan4_haltop -+{ -+ struct list_head op_link; /* chain on a list */ -+ E4_uint32 op_mask; /* Interrupt mask to see before calling function */ -+ -+ ELAN4_HALTFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+} ELAN4_HALTOP; -+ -+typedef void (ELAN4_DMA_FLUSHFN)(struct elan4_dev *dev, void *arg, int qfull); -+ -+typedef struct elan4_dma_flushop -+{ -+ struct list_head op_link; /* chain on a list */ -+ ELAN4_DMA_FLUSHFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+} ELAN4_DMA_FLUSHOP; -+ -+typedef void (ELAN4_INTFN)(struct elan4_dev *dev, void *arg); -+ -+typedef struct elan4_intop -+{ -+ struct list_head op_link; /* chain on a list */ -+ ELAN4_INTFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+ E4_uint64 op_cookie; /* and main interrupt cookie */ -+} ELAN4_INTOP; -+ -+#define SDRAM_MIN_BLOCK_SHIFT 10 -+#define SDRAM_NUM_FREE_LISTS 19 /* allows max 256 Mb block */ -+#define SDRAM_MIN_BLOCK_SIZE (1 << SDRAM_MIN_BLOCK_SHIFT) -+#define SDRAM_MAX_BLOCK_SIZE (SDRAM_MIN_BLOCK_SIZE << (SDRAM_NUM_FREE_LISTS-1)) -+ -+#if PAGE_SHIFT < 13 -+#define SDRAM_PAGE_SIZE 8192 -+#define SDRAM_PGOFF_OFFSET 1 -+#define SDRAM_PGOFF_MASK (~SDRAM_PGOFF_OFFSET) -+#else -+#define SDRAM_PAGE_SIZE PAGE_SIZE -+#define SDRAM_PGOFF_OFFSET 0 -+#define SDRAM_PGOFF_MASK (~SDRAM_PGOFF_OFFSET) -+#endif -+ -+typedef struct elan4_sdram -+{ -+ sdramaddr_t b_base; /* offset in sdram bar */ -+ unsigned b_size; /* size of bank */ -+ ioaddr_t b_ioaddr; /* ioaddr where mapped into the kernel */ -+ ELAN4_MAP_HANDLE b_handle; /* and mapping handle */ -+ bitmap_t *b_bitmaps[SDRAM_NUM_FREE_LISTS]; /* buddy allocator bitmaps */ -+} ELAN4_SDRAM_BANK; -+ -+/* command queue */ -+typedef struct elan4_cq -+{ -+ struct elan4_cqa *cq_cqa; /* command queue allocator this belongs to */ -+ unsigned cq_idx; /* and which command queue this is */ -+ -+ sdramaddr_t cq_space; /* sdram backing up command queue */ -+ unsigned cq_size; /* size value */ -+ unsigned cq_perm; /* permissions */ -+ ioaddr_t cq_mapping; /* mapping of command queue page */ -+ ELAN4_MAP_HANDLE cq_handle; /* and mapping handle */ -+} ELAN4_CQ; -+ -+/* cqtype flags to elan4_alloccq() */ -+#define CQ_Priority (1 << 0) -+#define CQ_Reorder (1 << 1) -+ -+/* command queues are allocated in chunks,so that all the -+ * command ports are in a single system page */ -+#define ELAN4_CQ_PER_CQA MAX(1, (PAGESIZE/CQ_CommandMappingSize)) -+ -+/* maximum number of command queues per context */ -+#define ELAN4_MAX_CQA (256 / ELAN4_CQ_PER_CQA) -+ -+typedef struct elan4_cqa -+{ -+ struct list_head cqa_link; /* linked together */ -+ bitmap_t cqa_bitmap[BT_BITOUL(ELAN4_CQ_PER_CQA)]; /* bitmap of which are free */ -+ unsigned int cqa_type; /* allocation type */ -+ unsigned int cqa_cqnum; /* base cq number */ -+ unsigned int cqa_ref; /* "mappings" to a queue */ -+ unsigned int cqa_idx; /* index number */ -+ ELAN4_CQ cqa_cq[ELAN4_CQ_PER_CQA]; /* command queue entries */ -+} ELAN4_CQA; -+ -+#define elan4_cq2num(cq) ((cq)->cq_cqa->cqa_cqnum + (cq)->cq_idx) -+#define elan4_cq2idx(cq) ((cq)->cq_cqa->cqa_idx * ELAN4_CQ_PER_CQA + (cq)->cq_idx) -+ -+typedef struct elan4_ctxt -+{ -+ struct elan4_dev *ctxt_dev; /* device we're associated with */ -+ struct list_head ctxt_link; /* chained on device */ -+ -+ struct elan4_trap_ops *ctxt_ops; /* client specific operations */ -+ -+ signed ctxt_num; /* local context number */ -+ -+ struct list_head ctxt_cqalist; /* link list of command queue allocators */ -+ bitmap_t ctxt_cqamap[BT_BITOUL(ELAN4_MAX_CQA)]; /* bitmap for allocating cqa_idx */ -+ -+ ELAN4_HASH_ENTRY **ctxt_mmuhash[2]; /* software hash tables */ -+ spinlock_t ctxt_mmulock; /* and spinlock. */ -+} ELAN4_CTXT; -+ -+typedef struct elan4_trap_ops -+{ -+ void (*op_eproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status); -+ void (*op_cproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum); -+ void (*op_dproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit); -+ void (*op_tproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status); -+ void (*op_iproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit); -+ void (*op_interrupt) (ELAN4_CTXT *ctxt, E4_uint64 cookie); -+ void (*op_neterrmsg) (ELAN4_CTXT *ctxt, ELAN4_NETERR_MSG *msg); -+} ELAN4_TRAP_OPS; -+ -+typedef struct elan4_route_table -+{ -+ spinlock_t tbl_lock; -+ unsigned tbl_size; -+ sdramaddr_t tbl_entries; -+} ELAN4_ROUTE_TABLE; -+ -+#ifdef ELAN4_LARGE_PAGE_SUPPORT -+#define NUM_HASH_TABLES 2 -+#else -+#define NUM_HASH_TABLES 1 -+#endif -+ -+#define DEV_STASH_ROUTE_COUNT 20 -+ -+typedef struct elan4_route_ringbuf { -+ int start; -+ int end; -+ E4_VirtualProcessEntry routes[DEV_STASH_ROUTE_COUNT]; -+} ELAN4_ROUTE_RINGBUF; -+ -+#define elan4_ringbuf_init(ringbuf) memset(&ringbuf, 0, sizeof(ELAN4_ROUTE_RINGBUF)); -+ -+typedef struct elan4_dev -+{ -+ ELAN4_CTXT dev_ctxt; /* context for device operations */ -+ -+ ELAN4_DEV_OSDEP dev_osdep; /* OS specific entries */ -+ -+ int dev_instance; /* device number */ -+ ELAN_DEVINFO dev_devinfo; /* device information (revision etc */ -+ ELAN_POSITION dev_position; /* position connected to switch */ -+ ELAN_DEV_IDX dev_idx; /* device idx registered with elanmod */ -+ -+ kmutex_t dev_lock; /* lock for device state/references */ -+ unsigned dev_state; /* device state */ -+ unsigned dev_references; /* # references */ -+ unsigned dev_features; /* features supported */ -+ -+ ioaddr_t dev_regs; /* Mapping of device registers */ -+ ELAN4_MAP_HANDLE dev_regs_handle; -+ ioaddr_t dev_rom; /* Mapping of rom */ -+ ELAN4_MAP_HANDLE dev_rom_handle; -+ ioaddr_t dev_i2c; /* Mapping of I2C registers */ -+ ELAN4_MAP_HANDLE dev_i2c_handle; -+ -+ E4_uint64 dev_sdram_cfg; /* SDRAM config value (from ROM) */ -+ E4_uint64 dev_sdram_initial_ecc_val; /* power on ECC register value */ -+ int dev_sdram_numbanks; /* # banks of sdram */ -+ ELAN4_SDRAM_BANK dev_sdram_banks[SDRAM_MAX_BANKS]; /* Mapping of sdram banks */ -+ spinlock_t dev_sdram_lock; /* spinlock for buddy allocator */ -+ sdramaddr_t dev_sdram_freelists[SDRAM_NUM_FREE_LISTS]; -+ unsigned dev_sdram_freecounts[SDRAM_NUM_FREE_LISTS]; -+ -+ sdramaddr_t dev_cacheflush_space; /* sdram reserved for cache flush operation */ -+ -+ sdramaddr_t dev_faultarea; /* fault areas for each unit */ -+ sdramaddr_t dev_inputtraparea; /* trap area for trapped transactions */ -+ sdramaddr_t dev_ctxtable; /* context table (E4_ContextControlBlock) */ -+ int dev_ctxtableshift; /* and size (in bits) */ -+ -+ E4_uint32 dev_syscontrol; /* copy of system control register */ -+ spinlock_t dev_syscontrol_lock; /* spinlock to sequentialise modifications */ -+ unsigned dev_direct_map_pci_writes; /* # counts for CONT_DIRECT_MAP_PCI_WRITES */ -+ -+ volatile E4_uint32 dev_intmask; /* copy of interrupt mask register */ -+ spinlock_t dev_intmask_lock; /* spinlock to sequentialise modifications */ -+ -+ /* i2c section */ -+ spinlock_t dev_i2c_lock; /* spinlock for i2c operations */ -+ unsigned int dev_i2c_led_disabled; /* count of reasons led auto update disabled */ -+ -+ /* mmu section */ -+ unsigned dev_pagesizeval[NUM_HASH_TABLES]; /* page size value */ -+ unsigned dev_pageshift[NUM_HASH_TABLES]; /* pageshift in bits. */ -+ unsigned dev_hashsize[NUM_HASH_TABLES]; /* # entries in mmu hash table */ -+ sdramaddr_t dev_hashtable[NUM_HASH_TABLES]; /* mmu hash table */ -+ ELAN4_HASH_ENTRY *dev_mmuhash[NUM_HASH_TABLES]; /* and software shadow */ -+ ELAN4_HASH_ENTRY **dev_mmufree[NUM_HASH_TABLES]; /* and partially free blocks */ -+ ELAN4_HASH_ENTRY *dev_mmufreelist; /* and free blocks */ -+ spinlock_t dev_mmulock; -+ E4_uint16 dev_topaddr[4]; /* top address values */ -+ unsigned char dev_topaddrvalid; -+ unsigned char dev_topaddrmode; -+ unsigned char dev_pteval; /* allow setting of relaxed order/dont snoop attributes */ -+ -+ unsigned dev_rsvd_hashmask[NUM_HASH_TABLES]; -+ unsigned dev_rsvd_hashval[NUM_HASH_TABLES]; -+ -+ /* run queues */ -+ sdramaddr_t dev_comqlowpri; /* CProc low & high pri run queues */ -+ sdramaddr_t dev_comqhighpri; -+ -+ sdramaddr_t dev_dmaqlowpri; /* DProc,TProc,Interrupt queues */ -+ sdramaddr_t dev_dmaqhighpri; -+ sdramaddr_t dev_threadqlowpri; -+ sdramaddr_t dev_threadqhighpri; -+ sdramaddr_t dev_interruptq; -+ -+ E4_uint32 dev_interruptq_nfptr; /* cache next main interrupt fptr */ -+ struct list_head dev_interruptq_list; /* list of operations to call when space in interruptq*/ -+ -+ /* command queue section */ -+ sdramaddr_t dev_cqaddr; /* SDRAM address of command queues */ -+ unsigned dev_cqoffset; /* offset for command queue alignment constraints */ -+ unsigned dev_cqcount; /* number of command queue descriptors */ -+ bitmap_t *dev_cqamap; /* bitmap for allocation */ -+ spinlock_t dev_cqlock; /* spinlock to protect bitmap */ -+#ifdef CONFIG_MTRR -+ unsigned dev_cqreorder; /* offset for first re-ordering queue on revb */ -+#endif -+ -+ /* halt operation section */ -+ struct list_head dev_haltop_list; /* list of operations to call when units halted */ -+ E4_uint32 dev_haltop_mask; /* mask of which ones to halt */ -+ E4_uint32 dev_haltop_active; /* mask of which haltops are executing */ -+ spinlock_t dev_haltop_lock; /* and their spinlock */ -+ -+ struct { -+ struct list_head list; /* list of halt operations for DMAs */ -+ ELAN4_CQ *cq; /* and command queue's */ -+ ELAN4_INTOP intop; /* and main interrupt op */ -+ E4_uint64 status; /* status register (when waiting for intop)*/ -+ } dev_dma_flushop[2]; -+ -+ unsigned dev_halt_all_count; /* count of reasons to halt all units */ -+ unsigned dev_halt_lowpri_count; /* count of reasons to halt lowpri queues */ -+ unsigned dev_halt_cproc_count; /* count of reasons to halt command processor */ -+ unsigned dev_halt_dproc_count; /* count of reasons to halt dma processor */ -+ unsigned dev_halt_tproc_count; /* count of reasons to halt thread processor */ -+ unsigned dev_discard_all_count; /* count of reasons to discard all packets */ -+ unsigned dev_discard_lowpri_count; /* count of reasons to discard non-system packets */ -+ unsigned dev_discard_highpri_count; /* count of reasons to discard system packets */ -+ -+ E4_uint32 dev_schedstatus; /* copy of schedule status register */ -+ -+ /* local context allocation section */ -+ spinlock_t dev_ctxlock; /* spinlock to protect bitmap */ -+ bitmap_t *dev_ctxmap; /* bitmap for local context allocation */ -+ -+ spinlock_t dev_ctxt_lock; /* spinlock to protect context list */ -+ struct list_head dev_ctxt_list; /* linked list of contexts */ -+ -+ /* locks to sequentialise interrupt handling */ -+ spinlock_t dev_trap_lock; /* spinlock while handling a trap */ -+ spinlock_t dev_requeue_lock; /* spinlock sequentialising cproc requeue */ -+ -+ /* error rate interrupt section */ -+ long dev_error_time; /* lbolt at start of sampling period */ -+ unsigned dev_errors_per_period; /* errors so far this sampling period */ -+ timer_fn_t dev_error_timeoutid; /* timeout to re-enable error interrupts */ -+ timer_fn_t dev_linkerr_timeoutid; /* timeout to clear link error led */ -+ -+ /* kernel threads */ -+ unsigned dev_stop_threads:1; /* kernel threads should exit */ -+ -+ /* main interrupt thread */ -+ kcondvar_t dev_mainint_wait; /* place for mainevent interrupt thread to sleep */ -+ spinlock_t dev_mainint_lock; /* and it's spinlock */ -+ unsigned dev_mainint_started:1; -+ unsigned dev_mainint_stopped:1; -+ -+ /* device context - this is used to flush insert cache/instruction cache/dmas & threads */ -+ ELAN4_CPROC_TRAP dev_cproc_trap; /* space to extract cproc trap into */ -+ -+ struct list_head dev_intop_list; /* list of main interrupt operations */ -+ spinlock_t dev_intop_lock; /* and spinlock */ -+ E4_uint64 dev_intop_cookie; /* and next cookie to use */ -+ -+ spinlock_t dev_flush_lock; /* spinlock for flushing */ -+ kcondvar_t dev_flush_wait; /* and place to sleep */ -+ -+ ELAN4_CQ *dev_flush_cq[COMMAND_INSERTER_CACHE_ENTRIES]; /* command queues to flush the insert cache */ -+ ELAN4_INTOP dev_flush_op[COMMAND_INSERTER_CACHE_ENTRIES]; /* and a main interrupt operation for each one */ -+ unsigned dev_flush_finished; /* flush command finished */ -+ -+ ELAN4_HALTOP dev_iflush_haltop; /* halt operation for icache flush */ -+ unsigned dev_iflush_queued:1; /* icache haltop queued */ -+ -+ ELAN4_ROUTE_TABLE *dev_routetable; /* virtual process table (for dma queue flush)*/ -+ sdramaddr_t dev_sdrampages[2]; /* pages of sdram to hold suspend code sequence */ -+ E4_Addr dev_tproc_suspend; /* st8suspend instruction */ -+ E4_Addr dev_tproc_space; /* and target memory */ -+ -+ sdramaddr_t dev_neterr_inputq; /* network error input queue descriptor & event */ -+ sdramaddr_t dev_neterr_slots; /* network error message slots */ -+ ELAN4_CQ *dev_neterr_msgcq; /* command queue for sending messages */ -+ ELAN4_CQ *dev_neterr_intcq; /* command queue for message received interrupt */ -+ ELAN4_INTOP dev_neterr_intop; /* and it's main interrupt operation */ -+ E4_uint64 dev_neterr_queued; /* # message queued in msgcq */ -+ spinlock_t dev_neterr_lock; /* and spinlock .... */ -+ -+ ELAN4_DEV_STATS dev_stats; /* device statistics */ -+ E4_uint64 dev_sdramerrs[30]; /* last few sdram errors for procfs */ -+ -+ spinlock_t dev_error_routes_lock; -+ unsigned int *dev_ack_errors; /* Map of source of dproc ack errors */ -+ ELAN4_ROUTE_RINGBUF dev_ack_error_routes; -+ unsigned int *dev_dproc_timeout; /* Ditto dproc timeout errors */ -+ ELAN4_ROUTE_RINGBUF dev_dproc_timeout_routes; -+ unsigned int *dev_cproc_timeout; /* Ditto cproc timeout errors */ -+ ELAN4_ROUTE_RINGBUF dev_cproc_timeout_routes; -+ -+ struct list_head dev_hc_list; /* list of the allocated hash_chunks */ -+ -+ ELAN4_IPROC_TRAP dev_iproc_trap; /* space for iproc trap */ -+} ELAN4_DEV; -+ -+/* values for dev_state */ -+#define ELAN4_STATE_STOPPED (1 << 0) /* device initialised but not started */ -+#define ELAN4_STATE_STARTING (1 << 1) /* device in process of starting */ -+#define ELAN4_STATE_STARTED (1 << 2) /* device started */ -+#define ELAN4_STATE_STOPPING (1 << 3) /* device in process of stopping */ -+ -+/* values for dev_features */ -+#define ELAN4_FEATURE_NO_WRITE_COMBINE (1 << 0) /* don't allow write combinig at all */ -+#define ELAN4_FEATURE_PCI_MAP (1 << 1) /* must use pci mapping functions */ -+#define ELAN4_FEATURE_NO_DWORD_READ (1 << 2) /* must perform 64 bit PIO reads */ -+ -+extern __inline__ unsigned int -+__elan4_readb (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_features & ELAN4_FEATURE_NO_DWORD_READ) -+ { -+ uint64_t val = readq ((void *) ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xff); -+ } -+ return readb (addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readw (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_features & ELAN4_FEATURE_NO_DWORD_READ) -+ { -+ uint64_t val = readq ((void *) ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xffff); -+ } -+ return readw (addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readl (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_features & ELAN4_FEATURE_NO_DWORD_READ) -+ { -+ uint64_t val = readq ((void *) ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xffffffff); -+ } -+ return readl (addr); -+} -+ -+/* macros for accessing dev->dev_regs.Tags. */ -+#define write_tag(dev,what,val) writeq (val, dev->dev_regs + offsetof (E4_Registers, Tags.what)) -+#define read_tag(dev,what) readq (dev->dev_regs + offsetof (E4_Registers, Tags.what)) -+ -+/* macros for accessing dev->dev_regs.Regs. */ -+#define write_reg64(dev,what,val) writeq (val, dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+#define write_reg32(dev,what,val) writel (val, dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+#define read_reg64(dev,what) readq (dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+#define read_reg32(dev,what) __elan4_readl (dev, dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+ -+/* macros for accessing dev->dev_regs.uRegs. */ -+#define write_ureg64(dev,what,val) writeq (val, dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+#define write_ureg32(dev,what,val) writel (val, dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+#define read_ureg64(dev,what) readq (dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+#define read_ureg32(dev,what) __elan4_readl (dev, dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+ -+/* macros for accessing dev->dev_i2c */ -+#define write_i2c(dev,what,val) writeb (val, dev->dev_i2c + offsetof (E4_I2C, what)) -+#define read_i2c(dev,what) __elan4_readb (dev, dev->dev_i2c + offsetof (E4_I2C, what)) -+ -+/* macros for accessing dev->dev_rom */ -+#define read_ebus_rom(dev,off) __elan4_readb (dev, dev->dev_rom + off) -+ -+/* PIO flush operations - ensure writes to registers/sdram are ordered */ -+#ifdef CONFIG_IA64_SGI_SN2 -+#define pioflush_reg(dev) read_reg32(dev,InterruptReg) -+#define pioflush_sdram(dev) elan4_sdram_readl(dev, 0) -+#else -+#define pioflush_reg(dev) mb() -+#define pioflush_sdram(dev) mb() -+#endif -+ -+/* macros for manipulating the interrupt mask register */ -+#define SET_INT_MASK(dev,value) \ -+do { \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask = (value)); \ -+ pioflush_reg(dev);\ -+} while (0) -+ -+#define CHANGE_INT_MASK(dev, value) \ -+do { \ -+ if ((dev)->dev_intmask != (value)) \ -+ {\ -+ write_reg32 (dev, InterruptMask, (dev)->dev_intmask = (value));\ -+ pioflush_reg(dev);\ -+ }\ -+} while (0) -+ -+#define ENABLE_INT_MASK(dev,value) \ -+do { \ -+ unsigned long flags; \ -+ \ -+ spin_lock_irqsave (&(dev)->dev_intmask_lock, flags); \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask |= (value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_intmask_lock, flags); \ -+} while (0) -+ -+#define DISABLE_INT_MASK(dev,value) \ -+do { \ -+ unsigned long flags; \ -+ \ -+ spin_lock_irqsave (&(dev)->dev_intmask_lock, flags); \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask &= ~(value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_intmask_lock, flags); \ -+} while (0) -+ -+#define SET_SYSCONTROL(dev,what,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ if ((dev)->what++ == 0) \ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol |= (value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define CLEAR_SYSCONTROL(dev,what,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ if (--(dev)->what == 0)\ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol &= ~(value)); \ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define PULSE_SYSCONTROL(dev,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol | (value)); \ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define CHANGE_SYSCONTROL(dev,add,sub) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ dev->dev_syscontrol |= (add);\ -+ dev->dev_syscontrol &= ~(sub);\ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol);\ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define SET_SCHED_STATUS(dev, value)\ -+do {\ -+ write_reg32 (dev, SchedStatus.Status, (dev)->dev_schedstatus = (value));\ -+ pioflush_reg (dev);\ -+} while (0) -+ -+#define CHANGE_SCHED_STATUS(dev, value)\ -+do {\ -+ if ((dev)->dev_schedstatus != (value))\ -+ {\ -+ write_reg32 (dev, SchedStatus.Status, (dev)->dev_schedstatus = (value));\ -+ pioflush_reg (dev);\ -+ }\ -+} while (0) -+ -+#define PULSE_SCHED_RESTART(dev,value)\ -+do {\ -+ write_reg32 (dev, SchedStatus.Restart, value);\ -+ pioflush_reg (dev);\ -+} while (0) -+ -+/* device context elan address space */ -+#define DEVICE_TPROC_SUSPEND_ADDR (0x1000000000000000ull) -+#define DEVICE_TPROC_SPACE_ADDR (0x1000000000000000ull + SDRAM_PAGE_SIZE) -+#if defined(__LITTLE_ENDIAN__) -+# define DEVICE_TPROC_SUSPEND_INSTR 0xd3f040c0 /* st64suspend %r16, [%r1] */ -+#else -+# define DEVICE_TPROC_SUSPEND_INSTR 0xc040f0d3 /* st64suspend %r16, [%r1] */ -+#endif -+ -+#define DEVICE_NETERR_INPUTQ_ADDR (0x2000000000000000ull) -+#define DEVICE_NETERR_INTCQ_ADDR (0x2000000000000000ull + SDRAM_PAGE_SIZE) -+#define DEVICE_NETERR_SLOTS_ADDR (0x2000000000000000ull + SDRAM_PAGE_SIZE*2) -+ -+/* -+ * Interrupt operation cookie space -+ * [50:48] type -+ * [47:0] value -+ */ -+#define INTOP_PERSISTENT (0x1000000000000ull) -+#define INTOP_ONESHOT (0x2000000000000ull) -+#define INTOP_TYPE_MASK (0x3000000000000ull) -+#define INTOP_VALUE_MASK (0x0ffffffffffffull) -+ -+/* functions for accessing sdram - sdram.c */ -+extern unsigned char elan4_sdram_readb (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned short elan4_sdram_readw (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned int elan4_sdram_readl (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned long long elan4_sdram_readq (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern void elan4_sdram_writeb (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned char val); -+extern void elan4_sdram_writew (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned short val); -+extern void elan4_sdram_writel (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned int val); -+extern void elan4_sdram_writeq (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned long long val); -+ -+extern void elan4_sdram_zerob_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zerow_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zerol_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zeroq_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+ -+extern void elan4_sdram_copyb_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyw_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyl_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyq_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyb_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyw_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyl_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyq_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+ -+/* device.c - configuration */ -+extern unsigned int elan4_hash_0_size_val; -+extern unsigned int elan4_hash_1_size_val; -+extern unsigned int elan4_ctxt_table_shift; -+extern unsigned int elan4_ln2_max_cqs; -+extern unsigned int elan4_dmaq_highpri_size; -+extern unsigned int elan4_threadq_highpri_size; -+extern unsigned int elan4_dmaq_lowpri_size; -+extern unsigned int elan4_threadq_lowpri_size; -+extern unsigned int elan4_interruptq_size; -+ -+extern unsigned int elan4_mainint_punt_loops; -+extern unsigned int elan4_mainint_resched_ticks; -+ -+ -+/* device.c */ -+extern void elan4_set_schedstatus (ELAN4_DEV *dev, E4_uint32 intreg); -+extern void elan4_queue_haltop (ELAN4_DEV *dev, ELAN4_HALTOP *op); -+extern void elan4_queue_intop (ELAN4_DEV *dev, ELAN4_CQ *cq, ELAN4_INTOP *op); -+extern void elan4_register_intop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+extern void elan4_deregister_intop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+extern void elan4_queue_dma_flushop (ELAN4_DEV *dev, ELAN4_DMA_FLUSHOP *op, int hipri); -+extern void elan4_queue_mainintop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+ -+extern int elan4_1msi0 (ELAN4_DEV *dev); -+ -+extern int elan4_insertctxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt, ELAN4_TRAP_OPS *ops); -+extern void elan4_removectxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt); -+extern ELAN4_CTXT *elan4_localctxt (ELAN4_DEV *dev, unsigned num); -+extern ELAN4_CTXT *elan4_networkctxt (ELAN4_DEV *dev, unsigned num); -+ -+extern int elan4_attach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum); -+extern void elan4_detach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum); -+extern void elan4_set_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum, E4_uint32 state); -+extern void elan4_set_routetable (ELAN4_CTXT *ctxt, ELAN4_ROUTE_TABLE *tbl); -+ -+extern ELAN4_CQA * elan4_getcqa (ELAN4_CTXT *ctxt, unsigned int idx); -+extern void elan4_putcqa (ELAN4_CTXT *ctxt, unsigned int idx); -+extern ELAN4_CQ *elan4_alloccq (ELAN4_CTXT *ctxt, unsigned cqsize, unsigned cqperm, unsigned cqtype); -+extern void elan4_freecq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq); -+extern void elan4_restartcq (ELAN4_DEV *dev, ELAN4_CQ *cq); -+extern void elan4_flushcq (ELAN4_DEV *dev, ELAN4_CQ *cq); -+extern void elan4_updatecq (ELAN4_DEV *dev, ELAN4_CQ *cq, unsigned perm, unsigned restart); -+ -+extern void elan4_flush_icache (ELAN4_CTXT *ctxt); -+extern void elan4_flush_icache_halted (ELAN4_CTXT *ctxt); -+ -+extern int elan4_initialise_device (ELAN4_DEV *dev); -+extern void elan4_finalise_device (ELAN4_DEV *dev); -+extern int elan4_start_device (ELAN4_DEV *dev); -+extern void elan4_stop_device (ELAN4_DEV *dev); -+ -+extern int elan4_compute_position (ELAN_POSITION *pos, unsigned nodeid, unsigned numnodes, unsigned aritiyval); -+extern int elan4_get_position (ELAN4_DEV *dev, ELAN_POSITION *pos); -+extern int elan4_set_position (ELAN4_DEV *dev, ELAN_POSITION *pos); -+extern void elan4_get_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short *mask); -+extern void elan4_set_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short mask); -+ -+ -+extern int elan4_read_vpd(ELAN4_DEV *dev, unsigned char *tag, unsigned char *result) ; -+ -+ -+/* device_osdep.c */ -+extern unsigned int elan4_pll_cfg; -+extern int elan4_pll_div; -+extern int elan4_mod45disable; -+ -+extern int elan4_pciinit (ELAN4_DEV *dev); -+extern void elan4_pcifini (ELAN4_DEV *dev); -+extern void elan4_pcierror (ELAN4_DEV *dev); -+ -+extern ELAN4_DEV *elan4_reference_device (int instance, int state); -+extern void elan4_dereference_device (ELAN4_DEV *dev); -+ -+extern ioaddr_t elan4_map_device (ELAN4_DEV *dev, unsigned bar, unsigned off, unsigned size, ELAN4_MAP_HANDLE *handlep); -+extern void elan4_unmap_device (ELAN4_DEV *dev, ioaddr_t ptr, unsigned size, ELAN4_MAP_HANDLE *handlep); -+extern unsigned long elan4_resource_len (ELAN4_DEV *dev, unsigned bar); -+ -+extern void elan4_configure_mtrr (ELAN4_DEV *dev); -+extern void elan4_unconfigure_mtrr (ELAN4_DEV *dev); -+ -+/* i2c.c */ -+extern int i2c_disable_auto_led_update (ELAN4_DEV *dev); -+extern void i2c_enable_auto_led_update (ELAN4_DEV *dev); -+extern int i2c_write (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+extern int i2c_read (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+extern int i2c_writereg (ELAN4_DEV *dev, unsigned int addr, unsigned int reg, unsigned int count, unsigned char *data); -+extern int i2c_readreg (ELAN4_DEV *dev, unsigned int addr, unsigned int reg, unsigned int count, unsigned char *data); -+extern int i2c_read_rom (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+ -+#if defined(__linux__) -+/* procfs_Linux.c */ -+extern void elan4_procfs_device_init (ELAN4_DEV *dev); -+extern void elan4_procfs_device_fini (ELAN4_DEV *dev); -+extern void elan4_procfs_init(void); -+extern void elan4_procfs_fini(void); -+ -+extern struct proc_dir_entry *elan4_procfs_root; -+extern struct proc_dir_entry *elan4_config_root; -+#endif -+ -+/* sdram.c */ -+extern void elan4_sdram_init (ELAN4_DEV *dev); -+extern void elan4_sdram_fini (ELAN4_DEV *dev); -+extern void elan4_sdram_setup_delay_lines (ELAN4_DEV *dev); -+extern int elan4_sdram_init_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern void elan4_sdram_fini_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern void elan4_sdram_add_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern sdramaddr_t elan4_sdram_alloc (ELAN4_DEV *dev, int nbytes); -+extern void elan4_sdram_free (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_flushcache (ELAN4_DEV *dev, sdramaddr_t base, int nbytes); -+extern char *elan4_sdramerr2str (ELAN4_DEV *dev, E4_uint64 status, char *str); -+ -+/* traps.c */ -+extern void elan4_display_eproc_trap (void *type, int mode, char *str, ELAN4_EPROC_TRAP *trap); -+extern void elan4_display_cproc_trap (void *type, int mode, char *str, ELAN4_CPROC_TRAP *trap); -+extern void elan4_display_dproc_trap (void *type, int mode, char *str, ELAN4_DPROC_TRAP *trap); -+extern void elan4_display_tproc_trap (void *type, int mode, char *str, ELAN4_TPROC_TRAP *trap); -+extern void elan4_display_iproc_trap (void *type, int mode, char *str, ELAN4_IPROC_TRAP *trap); -+ -+ -+extern void elan4_extract_eproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_EPROC_TRAP *trap, int iswaitevent); -+extern void elan4_extract_cproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_CPROC_TRAP *trap, unsigned cqnum); -+extern void elan4_extract_dproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_DPROC_TRAP *trap, unsigned unit); -+extern void elan4_extract_tproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_TPROC_TRAP *trap); -+extern void elan4_extract_iproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_IPROC_TRAP *trap, unsigned unit); -+extern void elan4_ringbuf_store(ELAN4_ROUTE_RINGBUF *ringbuf, E4_VirtualProcessEntry *route, ELAN4_DEV *dev); -+extern int cproc_open_extract_vp (ELAN4_DEV *dev, ELAN4_CQ *cq); -+ -+extern void elan4_inspect_iproc_trap (ELAN4_IPROC_TRAP *trap); -+extern E4_uint64 elan4_trapped_open_command (ELAN4_DEV *dev, ELAN4_CQ *cq); -+ -+/* mmu.c */ -+extern void elan4mmu_flush_tlb (ELAN4_DEV *dev); -+extern ELAN4_HASH_ENTRY *elan4mmu_ptealloc (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, unsigned int *tagidxp); -+extern int elan4mmu_pteload (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, E4_uint64 pte); -+extern void elan4mmu_unload_range (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned long len); -+extern void elan4mmu_invalidate_ctxt (ELAN4_CTXT *ctxt); -+ -+extern ELAN4_HASH_CACHE *elan4mmu_reserve (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned int npages, int cansleep); -+extern void elan4mmu_release (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc); -+extern void elan4mmu_set_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx, E4_uint64 newpte); -+extern E4_uint64 elan4mmu_get_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx); -+extern void elan4mmu_clear_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx); -+ -+/* mmu_osdep.c */ -+extern int elan4mmu_categorise_paddr (ELAN4_DEV *dev, physaddr_t *physp); -+extern int elan4mmu_alloc_topaddr (ELAN4_DEV *dev, physaddr_t paddr, unsigned type); -+extern E4_uint64 elan4mmu_phys2pte (ELAN4_DEV *dev, physaddr_t paddr, unsigned perm); -+extern physaddr_t elan4mmu_pte2phys (ELAN4_DEV *dev, E4_uint64 pte); -+ -+/* neterr.c */ -+extern int elan4_neterr_init (ELAN4_DEV *dev); -+extern void elan4_neterr_destroy (ELAN4_DEV *dev); -+extern int elan4_neterr_sendmsg (ELAN4_DEV *dev, unsigned int nodeid, unsigned int retries, ELAN4_NETERR_MSG *msg); -+extern int elan4_neterr_iproc_trap (ELAN4_DEV *dev, ELAN4_IPROC_TRAP *trap); -+ -+/* routetable.c */ -+extern ELAN4_ROUTE_TABLE *elan4_alloc_routetable (ELAN4_DEV *dev, unsigned size); -+extern void elan4_free_routetable (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl); -+extern void elan4_write_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry); -+extern void elan4_read_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry); -+extern void elan4_invalidate_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp); -+extern int elan4_generate_route (ELAN_POSITION *pos, E4_VirtualProcessEntry *route, unsigned ctxnum, -+ unsigned lowid, unsigned highid, unsigned options); -+extern int elan4_check_route (ELAN_POSITION *pos, ELAN_LOCATION location, E4_VirtualProcessEntry *route, unsigned flags); -+ -+/* user.c */ -+extern int __categorise_command (E4_uint64 command, int *cmdSize); -+extern int __whole_command (sdramaddr_t *commandPtr, sdramaddr_t insertPtr, unsigned int cqSize, unsigned int cmdSize); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_ELANDEV_H */ -Index: linux-2.4.21/include/elan4/device_Linux.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/device_Linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/device_Linux.h 2005-06-01 23:12:54.735418432 -0400 -@@ -0,0 +1,97 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_ELANDEV_LINUX_H -+#define __ELAN4_ELANDEV_LINUX_H -+ -+#ident "$Id: device_Linux.h,v 1.19 2004/08/09 14:02:37 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device_Linux.h,v $*/ -+ -+#include -+ -+#if defined(MPSAS) -+#include -+#endif -+ -+#if defined(CONFIG_DEVFS_FS) -+#include -+#endif -+ -+#define ELAN4_MAJOR 61 -+#define ELAN4_NAME "elan4" -+#define ELAN4_MAX_CONTROLLER 16 /* limited to 4 bits */ -+ -+/* OS dependant component of ELAN4_DEV struct */ -+typedef struct elan4_dev_osdep -+{ -+ struct pci_dev *pdev; /* PCI config data */ -+ -+ struct proc_dir_entry *procdir; -+ struct proc_dir_entry *configdir; -+ struct proc_dir_entry *statsdir; -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_handle_t devfs_control; -+ devfs_handle_t devfs_sdram; -+ devfs_handle_t devfs_user; -+#endif -+ -+#if defined(CONFIG_MTRR) -+ int sdram_mtrr; -+ int regs_mtrr; -+#endif -+} ELAN4_DEV_OSDEP; -+ -+/* /dev/elan/rmsX */ -+ -+/* /dev/elan4/controlX */ -+typedef struct control_private -+{ -+ struct elan4_dev *pr_dev; -+ unsigned pr_boundary_scan; -+} CONTROL_PRIVATE; -+ -+/* /dev/elan4/sdramX */ -+typedef struct mem_page -+{ -+ struct mem_page *pg_next; -+ sdramaddr_t pg_addr; -+ unsigned long pg_pgoff; -+ unsigned pg_ref; -+} MEM_PAGE; -+ -+#define MEM_HASH_SIZE 32 -+#define MEM_HASH(pgoff) ((pgoff) & (MEM_HASH_SIZE-1)) -+ -+typedef struct mem_private -+{ -+ struct elan4_dev *pr_dev; -+ MEM_PAGE *pr_pages[MEM_HASH_SIZE]; -+ spinlock_t pr_lock; -+} MEM_PRIVATE; -+ -+/* /dev/elan4/userX */ -+typedef struct user_private -+{ -+ atomic_t pr_ref; -+ struct user_ctxt *pr_uctx; -+ struct mm_struct *pr_mm; -+ coproc_ops_t pr_coproc; -+} USER_PRIVATE; -+ -+/* No mapping handles on linux */ -+typedef void *ELAN4_MAP_HANDLE; -+ -+#define ELAN4_TASK_HANDLE() ((unsigned long) current->mm) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_ELANDEV_LINUX_H */ -Index: linux-2.4.21/include/elan4/dma.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/dma.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/dma.h 2005-06-01 23:12:54.736418280 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_DMA_H -+#define __ELAN4_DMA_H -+ -+#ident "$Id: dma.h,v 1.16 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/dma.h,v $*/ -+ -+#include -+ -+/* Alignment for a DMA descriptor */ -+#define E4_DMA_ALIGN (64) -+ -+/* Maximum size of a single DMA ((1 << 31)-1) */ -+#define E4_MAX_DMA_SIZE (0x7fffffff) -+ -+/* -+ * dma_typeSize -+ * -+ * [63:32] Size -+ * [31] unused -+ * [30] IsRemote -+ * [29] QueueWrite -+ * [28] ShmemWrite -+ * [27:26] DataType -+ * [25] Broadcast -+ * [24] AlignPackets -+ * [23:16] FailCount -+ * [15:14] unused -+ * [13:0] Context -+ */ -+ -+#define DMA_FailCount(val) (((val) & 0xff) << 16) -+#define DMA_AlignPackets (1 << 24) -+#define DMA_Broadcast (1 << 25) -+#define DMA_ShMemWrite (1 << 28) -+#define DMA_QueueWrite (1 << 29) -+#define DMA_IsRemote (1 << 30) -+#define DMA_Context(val) ((unsigned) (val) & 0x3ff) -+#define DMA_ContextMask 0x3fffull -+#define Dma_TypeSizeMask 0xfffffffffff00000ull -+ -+#define DMA_DataTypeByte (E4_DATATYPE_BYTE << 26) -+#define DMA_DataTypeShort (E4_DATATYPE_SHORT << 26) -+#define DMA_DataTypeWord (E4_DATATYPE_WORD << 26) -+#define DMA_DataTypeLong (E4_DATATYPE_DWORD << 26) -+ -+#define E4_DMA_TYPE_SIZE(size, dataType, flags, failCount) \ -+ ((((E4_uint64)(size)) << 32) | ((dataType) & DMA_DataTypeLong) | \ -+ (flags) | DMA_FailCount(failCount)) -+ -+typedef volatile struct e4_dma -+{ -+ E4_uint64 dma_typeSize; -+ E4_uint64 dma_cookie; -+ E4_uint64 dma_vproc; -+ E4_Addr dma_srcAddr; -+ E4_Addr dma_dstAddr; -+ E4_Addr dma_srcEvent; -+ E4_Addr dma_dstEvent; -+} E4_DMA; -+ -+/* Same as above but padded to 64-bytes */ -+typedef volatile struct e4_dma64 -+{ -+ E4_uint64 dma_typeSize; -+ E4_uint64 dma_cookie; -+ E4_uint64 dma_vproc; -+ E4_Addr dma_srcAddr; -+ E4_Addr dma_dstAddr; -+ E4_Addr dma_srcEvent; -+ E4_Addr dma_dstEvent; -+ E4_Addr dma_pad; -+} E4_DMA64; -+ -+#endif /* __ELAN4_DMA_H */ -Index: linux-2.4.21/include/elan4/events.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/events.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/events.h 2005-06-01 23:12:54.736418280 -0400 -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_EVENTS_H -+#define __ELAN4_EVENTS_H -+ -+#ident "$Id: events.h,v 1.22 2004/06/23 11:07:18 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/events.h,v $*/ -+ -+#define E4_EVENT_ALIGN 32 -+#define E4_EVENTBLOCK_SIZE 64 -+ -+#ifndef _ASM -+/* -+ * Event locations must be aligned to a 32 byte boundary. It is very much more efficent to place -+ * them in elan local memory but is not essential. -+ */ -+typedef struct _E4_Event -+{ -+ volatile E4_uint64 ev_CountAndType; -+ E4_uint64 ev_Params[2]; -+} E4_Event; -+ -+/* Same as above but padded to correct Event alignment */ -+typedef struct _E4_Event32 -+{ -+ volatile E4_uint64 ev_CountAndType; -+ E4_uint64 ev_Params[2]; -+ E4_uint64 ev_pad; -+} E4_Event32; -+ -+/* -+ * An E4_EVENTBLOCK_SIZE aligned block of Main or Elan memory -+ */ -+typedef union _E4_Event_Blk -+{ -+ /* Padded to 64-bytes in case a cache-line write is more efficient */ -+ volatile E4_uint8 eb_unit8[E4_EVENTBLOCK_SIZE]; -+ volatile E4_uint32 eb_uint32[E4_EVENTBLOCK_SIZE/sizeof(E4_uint32)]; -+ volatile E4_uint64 eb_uint64[E4_EVENTBLOCK_SIZE/sizeof(E4_uint64)]; -+} E4_Event_Blk; -+#define eb_done eb_uint32[14] -+#define eb_done_dword eb_uint64[7] -+ -+#endif /* ! _ASM */ -+ -+/* -+ * ev_CountAndType -+ * [63:31] Count -+ * [10] CopyType -+ * [9:8] DataType -+ * [7:0] CopySize -+ */ -+#define E4_EVENT_TYPE_MASK 0x00000000ffffffffull -+#define E4_EVENT_COUNT_MASK 0xffffffff00000000ull -+#define E4_EVENT_COUNT_SHIFT 32 -+#define E4_EVENT_COPY_TYPE_MASK (1 << 10) -+#define E4_EVENT_DATA_TYPE_MASK (3 << 8) -+#define E4_EVENT_COPY_SIZE_MASK (0xff) -+ -+/* CopyType */ -+#define E4_EVENT_COPY (0 << 10) -+#define E4_EVENT_WRITE (1 << 10) -+ -+/* DataType */ -+#define E4_EVENT_DTYPE_BYTE (0 << 8) -+#define E4_EVENT_DTYPE_SHORT (1 << 8) -+#define E4_EVENT_DTYPE_WORD (2 << 8) -+#define E4_EVENT_DTYPE_LONG (3 << 8) -+ -+#define EVENT_COUNT(EventPtr) ((E4_int32)(elan4_load64 (&(EventPtr)->ev_CountAndType) >> E4_EVENT_COUNT_SHIFT)) -+#define EVENT_TYPE(EventPtr) ((E4_uint32)(elan4_load64 (&(EventPtr)->ev_CountAndType) & E4_EVENT_TYPE_MASK)) -+ -+#define E4_WAITEVENT_COUNT_TYPE_VALUE(Count, EventType, DataType, CopySize) \ -+ (((E4_uint64)(Count) << E4_EVENT_COUNT_SHIFT) | (EventType) | (DataType) | (CopySize)) -+ -+#define E4_EVENT_TYPE_VALUE(EventType, DataType, CopySize) \ -+ ((EventType) | (DataType) | (CopySize)) -+ -+#define E4_EVENT_INIT_VALUE(InitialCount, EventType, DataType, CopySize) \ -+ (((E4_uint64)(InitialCount) << E4_EVENT_COUNT_SHIFT) | E4_EVENT_TYPE_VALUE(EventType, DataType, CopySize)) -+ -+#define ev_CopySource ev_Params[0] -+#define ev_CopyDest ev_Params[1] -+#define ev_WritePtr ev_Params[0] -+#define ev_WriteValue ev_Params[1] -+ -+#define EVENT_BLK_READY(BLK) ((BLK)->eb_done != 0) -+#define EVENT_READY(EVENT) ((E4_uint32)((((volatile E4_Event *) (EVENT))->ev_CountAndType) >> E4_EVENT_COUNT_SHIFT) >= 0) -+ -+#define ELAN_WAIT_EVENT (0) -+#define ELAN_POLL_EVENT (-1) -+ -+#define E4_BLK_PATTERN ((E4_uint32)0xfeedface) -+ -+#define E4_INIT_COPY_EVENT(EVENT, BLK_ELAN, BLK, SIZE) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, SIZE), &(EVENT)->ev_CountAndType); \ -+ elan4_store64 ((BLK_ELAN), &(EVENT)->ev_CopySource); \ -+ elan4_store64 ((BLK), &(EVENT)->ev_CopyDest); \ -+ } while (0) -+ -+#define E4_INIT_WRITE_EVENT(EVENT, DWORD) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), &(EVENT)->ev_CountAndType); \ -+ elan4_store64 ((DWORD), &(EVENT)->ev_WritePtr); \ -+ elan4_store64 ((E4_Addr) (E4_BLK_PATTERN), &(EVENT)->ev_WriteValue); \ -+ } while (0) -+ -+#define E4_RESET_BLK_EVENT(BLK) \ -+ do { \ -+ (BLK)->eb_done = (0); \ -+ } while (0) -+ -+#define E4_PRIME_BLK_EVENT(EVENT, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#define E4_PRIME_COPY_EVENT(EVENT, SIZE, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, (SIZE >> 3)), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#define E4_PRIME_WRITE_EVENT(EVENT, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#ifndef _ASM -+ -+#define E4_INPUTQ_ALIGN 32 /* Descriptor must be 32-byte aligned */ -+ -+typedef struct _E4_InputQueue -+{ -+ volatile E4_Addr q_bptr; /* 64 bit aligned ptr to current back item */ -+ E4_Addr q_fptr; /* 64 bit aligned ptr to current front item */ -+ E4_uint64 q_control; /* this defines the last item, item size, and offset back to the first item. */ -+ E4_Addr q_event; /* queue event */ -+} E4_InputQueue; -+ -+#define E4_INPUTQ_LASTITEM_MASK 0x00000000ffffffffULL -+#define E4_INPUTQ_ITEMSIZE_MASK 0x000000ff00000000ULL -+#define E4_INPUTQ_LASTITEM_OFFSET_MASK 0xffffff0000000000ULL -+#define E4_INPUTQ_LASTITEM_SHIFT 0 -+#define E4_INPUTQ_ITEMSIZE_SHIFT 32 -+#define E4_INPUTQ_LASTITEM_OFFSET_SHIFT 40 -+ -+/* -+ * Macro to initialise the InputQueue control word given the FirstItem, LastItem & ItemSize -+ * FirstItem and LastItem are 64 bit double word aligned elan addresses. -+ */ -+#define E4_InputQueueControl(FirstItem, LastItem, ItemSizeInBytes)\ -+ (((((E4_uint64)(LastItem))) & E4_INPUTQ_LASTITEM_MASK) |\ -+ ((((E4_uint64)(ItemSizeInBytes)) << (E4_INPUTQ_ITEMSIZE_SHIFT-3)) & E4_INPUTQ_ITEMSIZE_MASK) |\ -+ ((((E4_uint64)((FirstItem)-(LastItem))) << (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3)) & E4_INPUTQ_LASTITEM_OFFSET_MASK)) -+ -+/* -+ * LastItemOffset is a sign extended -ve quantity with LastItemOffset[26:3] == q_control[63:40] -+ * we sign extend this by setting LastItemOffset[63:27] to be #one. -+ */ -+#define E4_InputQueueLastItemOffset(control) ((((E4_int64) -1) << (64 - (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3))) | \ -+ ((E4_int64) (((control) & E4_INPUTQ_LASTITEM_OFFSET_MASK) >> (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3)))) -+#define E4_InputQueueItemSize(control) (((control) & E4_INPUTQ_ITEMSIZE_MASK) >> (E4_INPUTQ_ITEMSIZE_SHIFT-3)) -+ -+/* -+ * Macro to increment the InputQ front pointer taking into account wrap -+ */ -+#define E4_InputQueueFptrIncrement(Q, FirstItem, LastItem, ItemSizeInBytes) \ -+ ((Q)->q_fptr = ( ((Q)->q_fptr == (LastItem)) ? (FirstItem) : ((Q)->q_fptr + (ItemSizeInBytes))) ) -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_EVENTS_H */ -Index: linux-2.4.21/include/elan4/i2c.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/i2c.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/i2c.h 2005-06-01 23:12:54.736418280 -0400 -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_I2C_H -+#define _ELAN4_I2C_H -+ -+#ident "@(#)$Id: i2c.h,v 1.10 2003/12/02 16:11:22 lee Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/elan4hdr/i2c.h,v $*/ -+ -+/* I2C address space - bits[7:1] */ -+#define I2C_LED_I2C_ADDR 0x20 -+#define I2C_TEMP_ADDR 0x48 -+#define I2C_EEPROM_ADDR 0x50 -+ -+#define I2C_WRITE_ADDR(addr) ((addr) << 1 | 0) -+#define I2C_READ_ADDR(addr) ((addr) << 1 | 1) -+ -+/* I2C EEPROM appears as 8 I2C 256 byte devices */ -+#define I2C_24LC16B_BLOCKSIZE (256) -+#define I2C_24LC16B_BLOCKADDR(addr) ((addr) >> 8) -+#define I2C_24LC16B_BLOCKOFFSET(addr) ((addr) & 0xff) -+ -+#define I2C_ELAN_EEPROM_PCI_BASEADDR 0 /* PCI config starts at addr 0 in the EEPROM */ -+#define I2C_ELAN_EEPROM_VPD_BASEADDR 256 /* VPD data start */ -+#define I2C_ELAN_EEPROM_PCI_SIZE 256 /* PCI data max size */ -+#define I2C_ELAN_EEPROM_VPD_SIZE 256 /* VPD data max size */ -+ -+#define I2C_ELAN_EEPROM_SIZE 2048 -+ -+#define I2C_ELAN_EEPROM_DEVICE_ID 0xA0 -+#define I2C_ELAN_EEPROM_FAIL_LIMIT 8 -+ -+#define I2C_ELAN_EEPROM_ADDR_BLOCKSIZE_SHIFT 0x8 -+#define I2C_ELAN_EEPROM_ADDR_BLOCK_MASK 0x7 -+#define I2C_ELAN_EEPROM_ADDR_BLOCK_SHIFT 0x1 -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN4_I2C_H */ -Index: linux-2.4.21/include/elan4/intcookie.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/intcookie.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/intcookie.h 2005-06-01 23:12:54.737418128 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: intcookie.h,v 1.10 2004/08/09 14:02:37 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/intcookie.h,v $*/ -+ -+#ifndef __ELAN4_INTCOOKIE_H -+#define __ELAN4_INTCOOKIE_H -+ -+typedef E4_uint64 ELAN4_INTCOOKIE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct intcookie_entry -+{ -+ struct intcookie_entry *ent_next; -+ struct intcookie_entry *ent_prev; -+ -+ spinlock_t ent_lock; -+ unsigned ent_ref; -+ -+ ELAN4_INTCOOKIE ent_cookie; -+ ELAN4_INTCOOKIE ent_fired; -+ kcondvar_t ent_wait; -+} INTCOOKIE_ENTRY; -+ -+typedef struct intcookie_table -+{ -+ struct intcookie_table *tbl_next; -+ struct intcookie_table *tbl_prev; -+ -+ ELAN_CAPABILITY *tbl_cap; -+ -+ spinlock_t tbl_lock; -+ unsigned tbl_ref; -+ INTCOOKIE_ENTRY *tbl_entries; -+} INTCOOKIE_TABLE; -+ -+extern void intcookie_init(void); -+extern void intcookie_fini(void); -+extern INTCOOKIE_TABLE *intcookie_alloc_table (ELAN_CAPABILITY *cap); -+extern void intcookie_free_table (INTCOOKIE_TABLE *tbl); -+extern int intcookie_alloc (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_free (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_fire (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_fire_cap (ELAN_CAPABILITY *cap, ELAN4_INTCOOKIE cookie); -+extern int intcookie_wait (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_arm (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+ -+#endif /* __KERNEL */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_INTCOOKIE_H */ -Index: linux-2.4.21/include/elan4/ioctl.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/ioctl.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/ioctl.h 2005-06-01 23:12:54.738417976 -0400 -@@ -0,0 +1,320 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_IOCTL_H -+#define __ELAN4_IOCTL_H -+ -+#ident "@(#)$Id: ioctl.h,v 1.27.6.2 2005/01/11 12:15:39 duncant Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/ioctl.h,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#define ELAN4IO_CONTROL_PATHNAME "/dev/elan4/control%d" -+#define ELAN4IO_USER_PATHNAME "/dev/elan4/user%d" -+#define ELAN4IO_SDRAM_PATHNAME "/dev/elan4/sdram%d" -+#define ELAN4IO_MAX_PATHNAMELEN 32 -+ -+/* -+ * NOTE - ioctl values 0->0x1f are defined for -+ * generic/control usage. -+ */ -+ -+/* Macro to generate 'offset' to mmap "control" device */ -+#define OFF_TO_BAR(off) (((off) >> 28) & 0xF) -+#define OFF_TO_OFFSET(off) ((off) & 0x0FFFFFFF) -+#define GEN_OFF(bar,off) (((bar) << 28) | ((off) & 0x0FFFFFFF)) -+ -+/* Definiations for generic ioctls */ -+#define ELAN4IO_GENERIC_BASE 0x00 -+ -+typedef struct elan4io_stats_struct -+{ -+ int which; -+ unsigned long long ptr; /* always pass pointer as 64 bit */ -+} ELAN4IO_STATS_STRUCT; -+ -+#define ELAN4IO_STATS _IOR ('e', ELAN4IO_GENERIC_BASE + 0, ELAN4IO_STATS_STRUCT) -+#define ELAN4IO_DEVINFO _IOR ('e', ELAN4IO_GENERIC_BASE + 1, ELAN_DEVINFO) -+#define ELAN4IO_POSITION _IOR ('e', ELAN4IO_GENERIC_BASE + 2, ELAN_POSITION) -+ -+ -+/* -+ * Definitions for /dev/elan4/controlX -+ */ -+#define ELAN4IO_CONTROL_BASE 0x20 -+ -+#define ELAN4IO_GET_POSITION _IOR ('e', ELAN4IO_CONTROL_BASE + 0, ELAN_POSITION) -+#define ELAN4IO_SET_POSITION _IOW ('e', ELAN4IO_CONTROL_BASE + 1, ELAN_POSITION) -+#define ELAN4IO_DEBUG_SNAPSHOT _IOW ('e', ELAN4IO_CONTROL_BASE + 2, ) -+ -+typedef struct elan4io_params_mask_struct -+{ -+ unsigned short p_mask; -+ ELAN_PARAMS p_params; -+} ELAN4IO_PARAMS_STRUCT; -+#define ELAN4IO_GET_PARAMS _IOR ('e', ELAN4IO_CONTROL_BASE + 3, ELAN4IO_PARAMS_STRUCT) -+#define ELAN4IO_SET_PARAMS _IOW ('e', ELAN4IO_CONTROL_BASE + 4, ELAN4IO_PARAMS_STRUCT) -+ -+/* old versions - implicit p_mask == 3 */ -+#define ELAN4IO_OLD_GET_PARAMS _IOR ('e', ELAN4IO_CONTROL_BASE + 3, ELAN_PARAMS) -+#define ELAN4IO_OLD_SET_PARAMS _IOW ('e', ELAN4IO_CONTROL_BASE + 4, ELAN_PARAMS) -+ -+/* -+ * Definitions for /dev/elan4/userX -+ */ -+#define ELAN4IO_USER_BASE 0x40 -+ -+#define ELAN4IO_FREE _IO ('e', ELAN4IO_USER_BASE + 0) -+#define ELAN4IO_ATTACH _IOWR ('e', ELAN4IO_USER_BASE + 1, ELAN_CAPABILITY) -+#define ELAN4IO_DETACH _IOWR ('e', ELAN4IO_USER_BASE + 2, ELAN_CAPABILITY) -+#define ELAN4IO_BLOCK_INPUTTER _IO ('e', ELAN4IO_USER_BASE + 3) -+ -+typedef struct elan4io_add_p2pvp_struct -+{ -+ unsigned vp_process; -+ ELAN_CAPABILITY vp_capability; -+} ELAN4IO_ADD_P2PVP_STRUCT; -+ -+#define ELAN4IO_ADD_P2PVP _IOW ('e', ELAN4IO_USER_BASE + 4, ELAN4IO_ADD_P2PVP_STRUCT) -+ -+typedef struct elan4io_add_bcastvp_struct -+{ -+ unsigned int vp_process; -+ unsigned int vp_lowvp; -+ unsigned int vp_highvp; -+} ELAN4IO_ADD_BCASTVP_STRUCT; -+ -+#define ELAN4IO_ADD_BCASTVP _IOW ('e', ELAN4IO_USER_BASE + 5, ELAN4IO_ADD_BCASTVP_STRUCT) -+ -+#define ELAN4IO_REMOVEVP _IO ('e', ELAN4IO_USER_BASE + 6) -+ -+typedef struct elan4io_route_struct -+{ -+ unsigned int rt_process; -+ unsigned int rt_error; -+ E4_VirtualProcessEntry rt_route; -+} ELAN4IO_ROUTE_STRUCT; -+ -+#define ELAN4IO_SET_ROUTE _IOW ('e', ELAN4IO_USER_BASE + 7, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_RESET_ROUTE _IOW ('e', ELAN4IO_USER_BASE + 9, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_GET_ROUTE _IOWR ('e', ELAN4IO_USER_BASE + 8, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_CHECK_ROUTE _IOWR ('e', ELAN4IO_USER_BASE + 10, ELAN4IO_ROUTE_STRUCT) -+ -+typedef struct elan4io_alloc_cq_struct -+{ -+ unsigned int cq_size; /* input: size of queue */ -+ unsigned int cq_perm; /* input: requested permissions */ -+ unsigned int cq_type; /* input: queue type */ -+ unsigned int cq_indx; /* output: queue number */ -+} ELAN4IO_ALLOCCQ_STRUCT; -+ -+#define ELAN4IO_ALLOCCQ _IOWR ('e', ELAN4IO_USER_BASE + 11, ELAN4IO_ALLOCCQ_STRUCT) -+#define ELAN4IO_FREECQ _IOWR ('e', ELAN4IO_USER_BASE + 12, unsigned) -+ -+#define ELAN4IO_CQ_TYPE_REORDER 1 /* revb reordering command queue */ -+ -+typedef struct elan4io_perm_struct -+{ -+ E4_Addr ps_eaddr; -+ E4_uint64 ps_len; -+ unsigned long ps_maddr; -+ unsigned int ps_perm; -+} ELAN4IO_PERM_STRUCT; -+ -+typedef struct elan4io_perm_struct32 -+{ -+ E4_Addr ps_eaddr; -+ E4_uint64 ps_len; -+ unsigned int ps_maddr; -+ unsigned int ps_perm; -+} ELAN4IO_PERM_STRUCT32; -+ -+#define ELAN4IO_SETPERM _IOWR ('e', ELAN4IO_USER_BASE + 13, ELAN4IO_PERM_STRUCT) -+#define ELAN4IO_SETPERM32 _IOWR ('e', ELAN4IO_USER_BASE + 13, ELAN4IO_PERM_STRUCT32) -+#define ELAN4IO_CLRPERM _IOWR ('e', ELAN4IO_USER_BASE + 14, ELAN4IO_PERM_STRUCT) -+#define ELAN4IO_CLRPERM32 _IOWR ('e', ELAN4IO_USER_BASE + 14, ELAN4IO_PERM_STRUCT32) -+ -+typedef struct elan4io_trapsig_struct -+{ -+ int ts_signo; -+} ELAN4IO_TRAPSIG_STRUCT; -+#define ELAN4IO_TRAPSIG _IOW ('e', ELAN4IO_USER_BASE + 15, ELAN4IO_TRAPSIG_STRUCT) -+ -+typedef struct elan4io_traphandler_struct -+{ -+ unsigned int th_nticks; /* number of ticks to sleep for next trap */ -+ unsigned int th_proc; /* elan processor involved */ -+ unsigned long th_trapp; /* space to store trap */ -+} ELAN4IO_TRAPHANDLER_STRUCT; -+ -+typedef struct elan4io_traphandler_struct32 -+{ -+ unsigned int th_nticks; /* number of ticks to sleep for next trap */ -+ unsigned int th_proc; /* elan processor involved */ -+ unsigned int th_trapp; /* space to store trap */ -+} ELAN4IO_TRAPHANDLER_STRUCT32; -+ -+#define ELAN4IO_TRAPHANDLER _IOW ('e', ELAN4IO_USER_BASE + 16, ELAN4IO_TRAPHANDLER_STRUCT) -+#define ELAN4IO_TRAPHANDLER32 _IOW ('e', ELAN4IO_USER_BASE + 16, ELAN4IO_TRAPHANDLER_STRUCT32) -+ -+typedef struct elan4io_required_mappings_struct -+{ -+ E4_Addr rm_upage_addr; /* elan address of user page */ -+ E4_Addr rm_trestart_addr; /* elan address of tproc restart trampoline */ -+} ELAN4IO_REQUIRED_MAPPINGS_STRUCT; -+#define ELAN4IO_REQUIRED_MAPPINGS _IOW ('e', ELAN4IO_USER_BASE + 17, ELAN4IO_REQUIRED_MAPPINGS_STRUCT) -+ -+typedef struct elan4io_resume_eproc_trap_struct -+{ -+ E4_Addr rs_addr; -+} ELAN4IO_RESUME_EPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_EPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 18, ELAN4IO_RESUME_EPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_cproc_trap_struct -+{ -+ unsigned int rs_indx; -+} ELAN4IO_RESUME_CPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_CPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 19, ELAN4IO_RESUME_CPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_dproc_trap_struct -+{ -+ E4_DMA rs_desc; -+} ELAN4IO_RESUME_DPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_DPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 20, ELAN4IO_RESUME_DPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_tproc_trap_struct -+{ -+ E4_ThreadRegs rs_regs; -+} ELAN4IO_RESUME_TPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_TPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 21, ELAN4IO_RESUME_TPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_iproc_trap_struct -+{ -+ unsigned int rs_channel; -+ unsigned int rs_trans; -+ E4_IprocTrapHeader rs_header; -+ E4_IprocTrapData rs_data; -+} ELAN4IO_RESUME_IPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_IPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 22, ELAN4IO_RESUME_IPROC_TRAP_STRUCT) -+ -+#define ELAN4IO_FLUSH_ICACHE _IO ('e', ELAN4IO_USER_BASE + 23) -+#define ELAN4IO_STOP_CTXT _IO ('e', ELAN4IO_USER_BASE + 24) -+ -+#define ELAN4IO_ALLOC_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 25, ELAN4_INTCOOKIE) -+#define ELAN4IO_FREE_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 26, ELAN4_INTCOOKIE) -+#define ELAN4IO_ARM_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 27, ELAN4_INTCOOKIE) -+#define ELAN4IO_WAIT_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 28, ELAN4_INTCOOKIE) -+ -+typedef struct elan4io_alloc_trap_queues_struct -+{ -+ unsigned int tq_ndproc_traps; -+ unsigned int tq_neproc_traps; -+ unsigned int tq_ntproc_traps; -+ unsigned int tq_nthreads; -+ unsigned int tq_ndmas; -+} ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT; -+#define ELAN4IO_ALLOC_TRAP_QUEUES _IOW ('e', ELAN4IO_USER_BASE + 29, ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT) -+ -+typedef struct elan4io_neterr_msg_struct -+{ -+ unsigned int nm_vp; -+ unsigned int nm_nctx; -+ unsigned int nm_retries; -+ unsigned int nm_pad; -+ ELAN4_NETERR_MSG nm_msg; -+} ELAN4IO_NETERR_MSG_STRUCT; -+#define ELAN4IO_NETERR_MSG _IOW ('e', ELAN4IO_USER_BASE + 30, ELAN4IO_NETERR_MSG_STRUCT) -+ -+typedef struct elan4io_neterr_timer_struct -+{ -+ unsigned int nt_usecs; -+} ELAN4IO_NETERR_TIMER_STUCT; -+ -+#define ELAN4IO_NETERR_TIMER _IO ('e', ELAN4IO_USER_BASE + 31) -+ -+typedef struct elan4io_neterr_fixup_struct -+{ -+ E4_uint64 nf_cookie; -+ unsigned int nf_waitforeop; -+ unsigned int nf_sten; -+ unsigned int nf_vp; -+ unsigned int nf_pad; -+} ELAN4IO_NETERR_FIXUP_STRUCT; -+ -+#define ELAN4IO_NETERR_FIXUP _IOW ('e', ELAN4IO_USER_BASE + 32, ELAN4IO_NETERR_FIXUP_STRUCT) -+ -+typedef struct elan4io_firecap_struct -+{ -+ ELAN_CAPABILITY fc_capability; -+ ELAN4_INTCOOKIE fc_cookie; -+} ELAN4IO_FIRECAP_STRUCT; -+ -+#define ELAN4IO_FIRE_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 33, ELAN4IO_FIRECAP_STRUCT) -+ -+#define ELAN4IO_ALLOC_INTCOOKIE_TABLE _IOW ('e', ELAN4IO_USER_BASE + 34, ELAN_CAPABILITY) -+#define ELAN4IO_FREE_INTCOOKIE_TABLE _IO ('e', ELAN4IO_USER_BASE + 35) -+ -+typedef struct elan4io_translation -+{ -+ E4_Addr tr_addr; -+ unsigned long tr_len; -+ unsigned int tr_access; -+} ELAN4IO_TRANSLATION_STRUCT; -+ -+#define ELAN4IO_LOAD_TRANSLATION _IOW ('e', ELAN4IO_USER_BASE + 36, ELAN4IO_TRANSLATION_STRUCT) -+#define ELAN4IO_UNLOAD_TRANSLATION _IOW ('e', ELAN4IO_USER_BASE + 37, ELAN4IO_TRANSLATION_STRUCT) -+ -+typedef struct elan4io_dumpcq_struct32 -+{ -+ E4_uint64 cq_space; /* output: sdram addr of q, used to decode ptrs */ -+ E4_uint32 cq_size; /* output: The real size of the command queue */ -+ E4_uint32 bufsize; /* input: The size of the buffer to dump to */ -+ E4_uint32 cq_indx; /* input: index of cq to dump */ -+ unsigned int buffer; /* input: user address of rgs->buffer to dump to */ -+} ELAN4IO_DUMPCQ_STRUCT32; -+ -+typedef struct elan4io_dumpcq_struct -+{ -+ E4_uint64 cq_space; /* output: sdram addr of q, used to decode ptrs */ -+ E4_uint32 cq_size; /* output: The real size of the command queue */ -+ E4_uint32 bufsize; /* input: The size of the buffer to dump to */ -+ E4_uint32 cq_indx; /* input: index of cq to dump */ -+ unsigned long buffer; /* input: user address of rgs->buffer to dump to */ -+} ELAN4IO_DUMPCQ_STRUCT; -+ -+#define ELAN4IO_DUMPCQ _IOWR ('e', ELAN4IO_USER_BASE + 38, ELAN4IO_DUMPCQ_STRUCT) -+#define ELAN4IO_DUMPCQ32 _IOWR ('e', ELAN4IO_USER_BASE + 38, ELAN4IO_DUMPCQ_STRUCT32) -+ -+/* mmap offsets - - we define the file offset space as follows: -+ * -+ * page 0 - 4095 - command queues -+ * page 4096 - device user registers -+ * page 4097 - flag page/user stats -+ * page 4098 - device stats -+ * page 4099 - tproc trampoline -+ */ -+ -+#define ELAN4_OFF_COMMAND_QUEUES 0 -+#define ELAN4_OFF_USER_REGS 4096 -+#define ELAN4_OFF_USER_PAGE 4097 -+#define ELAN4_OFF_DEVICE_STATS 4098 -+#define ELAN4_OFF_TPROC_TRAMPOLINE 4099 -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_IOCTL_H */ -Index: linux-2.4.21/include/elan4/mmu.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/mmu.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/mmu.h 2005-06-01 23:12:54.738417976 -0400 -@@ -0,0 +1,94 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu.h,v 1.11 2004/04/21 12:04:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu.h,v $*/ -+ -+ -+#ifndef __ELAN4_MMU_H -+#define __ELAN4_MMU_H -+ -+typedef struct elan4_hash_entry -+{ -+ struct elan4_hash_entry *he_next; -+ struct elan4_hash_entry *he_prev; -+ -+ sdramaddr_t he_entry; -+ -+ struct elan4_hash_entry *he_chain[2]; -+ E4_uint64 he_tag[2]; -+ E4_uint32 he_pte[2]; -+} ELAN4_HASH_ENTRY; -+ -+#define ELAN4_HENT_CHUNKS 16 /* SDRAM_MIN_BLOCK_SIZE/sizeof (E4_HashTableEntry) */ -+ -+typedef struct elan4_hash_chunk -+{ -+ struct list_head hc_link; -+ ELAN4_HASH_ENTRY hc_hents[ELAN4_HENT_CHUNKS]; -+} ELAN4_HASH_CHUNK; -+ -+typedef struct elan4_hash_cache -+{ -+ E4_Addr hc_start; -+ E4_Addr hc_end; -+ int hc_tbl; -+ -+ ELAN4_HASH_ENTRY *hc_hes[1]; -+} ELAN4_HASH_CACHE; -+ -+/* -+ * he_pte is really 4 bytes of pte "type" one for each pte -+ * entry - however we declare it as an "int" so we can -+ * easily determine that all 4 entries are invalid -+ */ -+#define HE_SET_PTE(he,tagidx,pteidx,val) (((E4_uint8 *) &(he->he_pte[tagidx]))[pteidx] = (val)) -+#define HE_GET_PTE(he,tagidx,pteidx) (((E4_uint8 *) &(he->he_pte[tagidx]))[pteidx]) -+ -+/* -+ * he_tag has the following form : -+ * [63:27] tag -+ * [20:17] pte valid -+ * [16] locked -+ * [15] copy -+ * [14] valid -+ * [13:0] context -+ */ -+ -+#define HE_TAG_VALID (1 << 14) -+#define HE_TAG_COPY (1 << 15) -+#define HE_TAG_LOCKED (1 << 16) -+ -+#define INVALID_CONTEXT 0 -+ -+extern u_char elan4_permtable[]; -+#define ELAN4_INCOMPAT_ACCESS(perm,access) ((elan4_permtable[(perm)] & (1 << (access))) == 0) -+extern u_char elan4_permreadonly[]; -+#define ELAN4_PERM_READONLY(perm) (elan4_permreadonly[(perm)]) -+ -+/* return code from elan4mmu_categorise_paddr */ -+#define ELAN4MMU_PADDR_SDRAM 0 -+#define ELAN4MMU_PADDR_COMMAND 1 -+#define ELAN4MMU_PADDR_LOCALPCI 2 -+#define ELAN4MMU_PADDR_PAGE 3 -+#define ELAN4MMU_PADDR_OTHER 4 -+ -+extern int elan4_debug_mmu; -+ -+#ifdef DEBUG_PRINTF -+# define MPRINTF(ctxt,lvl,args...) (elan4_debug_mmu > (lvl) ? elan4_debugf(ctxt,DBG_MMU, ##args) : (void)0) -+#else -+# define MPRINTF(ctxt,lvl,args...) ((void) 0) -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_MMU_H */ -Index: linux-2.4.21/include/elan4/neterr.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/neterr.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/neterr.h 2005-06-01 23:12:54.738417976 -0400 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_NETERR_H -+#define __ELAN4_NETERR_H -+ -+#ident "@(#)$Id: neterr.h,v 1.1 2004/01/19 14:38:34 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/elan4mod/neterr.h,v $*/ -+ -+typedef struct elan4_neterr_msg -+{ -+ E4_uint8 msg_type; -+ E4_uint8 msg_waitforeop; -+ E4_uint16 msg_context; /* network context # message sent to */ -+ E4_int16 msg_found; /* # cookie found (response) */ -+ -+ ELAN_LOCATION msg_sender; /* nodeid/context # message sent from */ -+ E4_uint32 msg_pad; -+ -+ E4_uint64 msg_cookies[6]; /* 64 bit cookies from identify packets */ -+} ELAN4_NETERR_MSG; -+ -+#define ELAN4_NETERR_MSG_SIZE sizeof (ELAN4_NETERR_MSG) -+#define ELAN4_NETERR_MSG_REQUEST 1 -+#define ELAN4_NETERR_MSG_RESPONSE 2 -+ -+#define ELAN4_NETERR_MAX_COOKIES (sizeof (((ELAN4_NETERR_MSG *) 0)->msg_cookies) / \ -+ sizeof (((ELAN4_NETERR_MSG *) 0)->msg_cookies[0])) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_NETERR_H */ -Index: linux-2.4.21/include/elan4/pci.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/pci.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/pci.h 2005-06-01 23:12:54.739417824 -0400 -@@ -0,0 +1,227 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_PCI_H -+#define __ELAN4_PCI_H -+ -+#ident "$Id: pci.h,v 1.32 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/pci.h,v $*/ -+ -+/* Elan has 2 64 bit bars */ -+#define ELAN4_BAR_SDRAM 0 -+#define ELAN4_BAR_REGISTERS 2 -+ -+#define PCI_VENDOR_ID_QUADRICS 0x14fc -+#define PCI_DEVICE_ID_ELAN3 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVB 0x0001 -+#define PCI_DEVICE_ID_ELAN4 0x0001 -+#define PCI_REVISION_ID_ELAN4_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN4_REVB 0x0001 -+ -+/* support standard pseudo bars */ -+#define ELAN4_PSEUDO_BAR_ROM 8 -+ -+/* Elan PCI control -+ configuration space register. ElanControlRegister */ -+#define PCI_ELAN_PARITY_ADDR_LO 0x40 -+#define PCI_ELAN_PARITY_ADDR_HI 0x44 -+#define PCI_ELAN_PARITY_TYPE 0x48 -+#define PCI_ELAN_CONTROL 0x4c -+#define PCI_ELAN_PLL_CONTROL 0x50 -+#define PCI_ELAN_SPLIT_MESSAGE_ATTR 0x54 -+#define PCI_ELAN_SPLIT_MESSAGE_VALUE 0x54 -+#define PCI_ELAN_RAMBIST_FAILED 0x54 -+#define PCI_ELAN_TOPPHYSADDR(i) (0x58 + ((i)<<1)) -+ -+/* -+ * [31] PciM66EN This is set it the bus is running in PCI2.3 - 66MHz mode. -+ * [30:28] InitPattern This gives the PCI-X startup mode. See "Pci intialisation patterns" below. -+ * [27] notBusIs64Bits If set the bus is running 32 bits wide. If Clear it is a 64 bit bus. -+ * [26:24] RamBistCntl Used to control the Elan4 RAM BIST. Not acitive it zero. -+ * [23] RamBistFinished Only used when performing the RAM BIST test. -+ * [22] SelectSplitMessAttr See ECTRL_SELECT_SPLIT_MESS_ATTR below. -+ * [21] ReceivedSplitCompError See ECTRL_REC_SPLIT_COMP_MESSAGE below -+ * [20:16] WriteHighPriTime Used with ReadHighPriTime to control the ratio of PCI master write to PCI master -+ * read bandwidth under heavy load. The high the value of WriteHighPriTime the longer -+ * the PCI write bursts will be allowed without interruption from a read transfer. -+ * [15] DisableCouplingTest This is only used as part of the RAM BIST test. It effects the testing of the main -+ * cache tag RAMS. -+ * [14:13] Not used Will read as zero. -+ * [12:8] ReadHighPriTime Used with WriteHighPriTime to control the ratio of PCI master write to PCI master -+ * read bandwidth under heavy load. The high the value of ReadHighPriTime the longer -+ * the PCI read bursts will be allowed without interruption from a write transfer. -+ * [7] EnableLatencyCountReset This bit effect the behaviour of disconnects due to the removal of GNT# after the latency -+ * counter has expired. If set it will allow the latency counter to be reset each time the -+ * GNT# is reasserted. If asserted it should provided improved bandwidth on the PCI bus -+ * without increasing the maximum latency another device would have for access to the bus. -+ * It will increase the average latency of other devices. -+ * [6] ExtraMasterAddrBits This bit used to control the physical PCI addresses generated by the MMU. -+ * [5] ReducedPciDecode If set the PCI local memory BAR will decode 256Mbytes of PCI address space. If clear it -+ * will decode 2Gbyte of PCI address space. -+ * [4] ConfigInEBusRom If set the constant values of the Elan4 PCI configuration space will be taken from the -+ * EEPROM. If clear the internal values will be used. -+ * [3] EnableRd2_2Bursts This bit only effects the behaviour of burst reads when the PCI bus is operating in -+ * PCI-2.2 mode. It allows adjacent reads to be merged into longer bursts for higher -+ * performance. -+ * [2] SoftIntReset If set this bit will cause the Elan4 to reset itself with the exception of the PCI -+ * configuation space. All internal state machines will be put into the reset state. -+ * [1] EnableWrBursts This bit allows much longer PCI-X write bursts. If set it will stop the Elan4 from -+ * being completely PCI-X compliant as the Elan4 may request a long PCI-X write burst that -+ * it does not complete. However it should significantly increase the maximum PCI-X write -+ * bandwidth and is unlikely to cause problems with many PCI-X bridge chips. -+ * [0] InvertMSIPriority This bit effect the way MSI interrupts are generated. It provides flexiblity to generate -+ * the MSI interrupts in a different way to allow for different implimentations of MSI -+ * logic and still give the correct priority of Elan4 interrupts. -+ * -+ * {PciM66EN, InitPattern, notBusIs64Bits, RamBistCntl, RamBistFinished, -+ * SelectSplitMessAttr, ReceivedSplitCompError, WriteHighPriTime, -+ * DisableCouplingTest, 2'h0, ReadHighPriTime, -+ * EnableLatencyCountReset, ExtraMasterAddrBits, ReducedPciDecode, ConfigInEBusRom, -+ * EnableRd2_2Bursts, SoftIntReset, EnableWrBursts, InvertMSIPriority} -+ */ -+ -+#define ECTRL_INVERT_MSI_PRIO (1 << 0) -+#define ECTRL_ENABLE_WRITEBURSTS (1 << 1) -+#define ECTRL_SOFTWARE_INTERNAL_RESET (1 << 2) -+#define ECTRL_ENABLE_2_2READBURSTS (1 << 3) -+#define ECTRL_CONFIG_IN_EBUS_ROM (1 << 4) -+#define ECTRL_28_NOT_30_BIT_LOCAL_BAR (1 << 5) -+#define ECTRL_ExtraMasterAddrBits (1 << 6) -+#define ECTRL_ENABLE_LATENCY_RESET (1 << 7) -+#define ECTRL_DISABLE_COUPLING_TEST (1 << 15) -+ -+/* -+ * Ratio of the following two registers set the relative bandwidth given to intputer data -+ * versus other PCI pci traffic when scheduling new PCI master accesses. -+ */ -+#define ECTRL_OTHER_HIGH_PRI_TIME_SHIFT (8) /* Sets top 4 bits of 8 bit counter */ -+#define ECTRL_OTHER_HIGH_PRI_TIME_MASK (0x1f) -+ -+ -+#define ECTRL_IPROC_HIGH_PRI_TIME_SHIFT (16) /* Sets top 4 bits of 8 bit counter */ -+#define ECTRL_IPROC_HIGH_PRI_TIME_MASK (0x1f) -+ -+/* -+ * This is set if a split completion message is received. -+ * This will cause a PCI error interrupt. -+ * This error is cleared by writting a 1 to this bit. -+ */ -+#define ECTRL_REC_SPLIT_COMP_MESSAGE (1 << 21) -+/* -+ * This bit is used to select reading of either the Split message attribute value when -+ * set or the split completion message data value from 0x54 in the config space -+ * if the ECTRL_REC_SPLIT_COMP_MESSAGE bit is set. 0x54 returns the the BistFailed flags -+ * if any of the BIST control bits are set (bits 26 to 24) -+ */ -+#define ECTRL_SELECT_SPLIT_MESS_ATTR (1 << 22) -+ -+// Internal RAM bist control bits. -+// Three bits of state control the RAM BIST (Built in self test). -+// -+// These bits must not be set unless the ECTRL_SOFTWARE_INTERNAL_RESET bit has also been set! -+// -+// For a normal fast ram test assert ECTRL_BIST_FAST_TEST. -+// For a data retention test first write ECTRL_START_RETENTION_TEST then wait the retention period of -+// at least 1ms and preferably much longer then write ECTRL_CONTINUE_RETENTION_TEST then wait -+// again and finallly write ECTRL_FINISH_RETENTION_TEST. -+// -+// The read only bit ECTRL_BIST_FINISHED_TEST can be polled to check that the test has compleated. -+#define ECTRL_BIST_CTRL_SHIFT (24) -+#define ECTRL_BIST_CTRL_MASK (7 << 24) -+ -+#define ECTRL_BIST_FAST_TEST ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) // old scheme -+#define ECTRL_START_RETENTION_TEST ((1 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_CONTINUE_RETENTION_TEST ((3 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_FINISH_RETENTION_TEST ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+ -+#define ECTRL_BIST_KICK_OFF ((1 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) // new scheme -+#define ECTRL_BIST_MOVE_ON_ODD ((3 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_BIST_MOVE_ON_EVEN ((5 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_BIST_SCREAM_THROUGH ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+ -+#define ECTRL_CLEAR_BIST_TEST (0 << 24) -+#define ECTRL_BIST_FINISHED_TEST (1 << 23) -+ -+// Read only current PCI bus type. -+#define ECTRL_RUNNING_32BIT_MODE (1 << 27) -+#define ECTRL_INITIALISATION_MODE (7 << 28) -+#define ECTRL_RUNNING_M66EN_MODE (1 << 31) -+ -+#define ECTRL_INIT_PATTERN_SHIFT (28) -+#define ECTRL_INIT_PATTERN_MASK (0x7) -+ -+// Pci intialisation patterns -+#define Pci2_2 (0 << 28) -+#define PciX50To66MHz (1 << 28) -+#define PciX66to100MHz (2 << 28) -+#define PciX100to133MHz (3 << 28) -+#define PciXReserved1 (4 << 28) -+#define PciXReserved2 (5 << 28) -+#define PciXReserved3 (6 << 28) -+#define PciXReserved4 (7 << 28) -+ -+/* Elan PCI pll and pad control configuration space register. ElanPllControlReg */ -+// This overrides the default PCI pll control settings. -+#define PciPll_FeedForwardISel0 (1 << 0) // Lsi name Z0 -+#define PciPll_FeedForwardISel1 (1 << 1) // Lsi name Z1 -+#define PciPll_ChargePumpISel0 (1 << 2) // Lsi name P0 -+#define PciPll_ChargePumpISel1 (1 << 3) // Lsi name P1 -+#define PciPll_EnableAutoReset (1 << 4) // Lsi name ENARST -+#define PciPll_RSEL200500 (1 << 5) // Lsi name Range Select, 0: 100 - 250MHz, 1: 200 - 500MHz -+#define PciPll_DivideFeedback (1 << 6) // Just used for test - This divides the shortcut feedback to the PCI PLL so that it can lock to the tester clock. -+#define PciPll_CutFeedback (1 << 7) // Just used for test - This disables the shortcut feedback. -+ -+// This overrides the default PCI BZ controler settings. -+#define PciBZ_UPDI (0xf << 8) -+#define PciBZ_WAIT_INT (0xf << 12) -+ -+// This overrides the default Sys and SDRam pll control settings. -+#define SysPll_FeedForwardISel0 (1 << 16) // Lsi name P0 -+#define SysPll_FeedForwardISel1 (1 << 17) // Lsi name P1 -+#define SysPll_ChargePumpISel0 (1 << 18) // Lsi name Z0 -+#define SysPll_ChargePumpISel1 (1 << 19) // Lsi name Z1 -+#define SysPll_EnableAutoReset (1 << 20) // Lsi name ENARST -+#define SysPll_DivPhaseCompInBy2 (1 << 21) // Lsi name NODIV (Should be DIV) -+#define SysPll_PllTestClkSel (1 << 22) // If asserted the master clock source is not taken from the pll. -+ -+#define Pll_ForceEBusADTristate (1 << 23) // Required to enable the testing of EnableAutoReset. Enables use of EBusAD[7] (rev A) -+#define Pll_LinkErrDirectToSDA (1 << 23) // Access to link error flag for triggering (rev B) -+ -+ -+#define ECTRL_SYS_CLOCK_RATIO_SHIFT (24) -+// Config: with 800MHz Speeds are 266 200 160 133. -+// 0 = 133/133 (1:1) 6:6 1 -+// 1 = 160/133 (6:5) 5:6 1.2 -+// 2 = 200/133 (3:2) 4:6 1.5 -+// 3 = 266/133 (2:1) 3:6 2 -+// 4 = 200/200 (1:1) 4:4 1 -+// 5 = 266/200 (4:3) 3:4 1.33 -+ -+// Config: with 600MHz Speeds are 200 150 120 100 -+// 0 = 100/100 (1:1) 6:6 1 -+// 1 = 120/100 (6:5) 5:6 1.2 -+// 2 = 150/100 (3:2) 4:6 1.5 -+// 3 = 200/100 (2:1) 3:6 2 -+// 4 = 150/150 (1:1) 4:4 1 -+// 5 = 200/150 (4:3) 3:4 1.33 -+ -+#define ECTRL_SYS_CLOCK_RATIO_SHIFT (24) -+#define ECTRL_SYS_CLOCK_RATIO_1_1Slow (0 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_6_5 (1 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_3_2 (2 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_2_1 (3 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_1_1Fast (4 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_4_3 (5 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_MAX_NORMAL (6) /* used to generate a valid random value */ -+#define GET_RANDOM_CLOCK_RATIO (Random(ECTRL_SYS_CLOCK_MAX_NORMAL) << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_PLL_TEST (6 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_TEST (7 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_MASK (7 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+ -+#endif /* __ELAN4_PCI_H */ -Index: linux-2.4.21/include/elan4/registers.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/registers.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/registers.h 2005-06-01 23:12:54.742417368 -0400 -@@ -0,0 +1,1588 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_REGISTERS_H -+#define _ELAN4_REGISTERS_H -+ -+#ident "$Id: registers.h,v 1.117.2.1 2004/10/04 14:26:18 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/registers.h,v $*/ -+ -+/* -+ * Header file for internal slave mapping of the ELAN4 registers -+ */ -+ -+#define E4_CACHELINE_SIZE (64) -+#define E4_STACK_ALIGN (64) -+ -+#ifndef _ASM -+ -+#include -+#include -+#include -+ -+typedef volatile struct _E4_CacheSets -+{ -+ E4_uint64 Set0[1024]; /* 8k bytes per set */ -+ E4_uint64 Set1[1024]; /* 8k bytes per set */ -+ E4_uint64 Set2[1024]; /* 8k bytes per set */ -+ E4_uint64 Set3[1024]; /* 8k bytes per set */ -+} E4_CacheSets; -+ -+typedef union e4_cache_tag -+{ -+ struct { -+ E4_uint32 pad0; /* Undefined value when read */ -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 :10; /* 0-9 - reserved */ -+ E4_uint32 LineError:1; /* 10 - line error */ -+ E4_uint32 Modified:1; /* 11 - modified */ -+ E4_uint32 FillPending:1; /* 12 - fill pending */ -+ E4_uint32 AddrTag30to13:18; /* 30-13 - tag */ -+ E4_uint32 :1; /* 31 - */ -+#else -+ E4_uint32 :1; /* 31 - */ -+ E4_uint32 AddrTag30to13:18; /* 30-13 - tag */ -+ E4_uint32 FillPending:1; /* 12 - fill pending */ -+ E4_uint32 Modified:1; /* 11 - modified */ -+ E4_uint32 LineError:1; /* 10 - line error */ -+ E4_uint32 :10; /* 0-9 - reserved */ -+#endif -+ } s; -+ E4_uint64 Value; -+} E4_CacheTag; -+ -+typedef volatile struct _E4_CacheTags -+{ -+ E4_CacheTag Tags[4][128]; /* 8k bytes per set, 64 byte cache line */ -+} E4_CacheTags; -+ -+#define E4_NumCacheSets 4 -+#define E4_NumCacheLines 128 -+#define E4_CacheLineSize 64 -+#define E4_CacheSize (E4_NumCacheSets * E4_NumCacheLines * E4_CacheLineSize) -+#define E4_CacheSetSize (E4_NumCacheLines * E4_CacheLineSize) -+ -+/* -+ * Run Queue pointers -+ * -+ * [62:35] FrontPointer[30:3] -+ * [33:32] Size Value -+ * [30:3] BackPointer[30:3] -+ */ -+#define E4_QueuePtrMask (0x7ffffff8ULL) -+#define E4_QueueSizeMask 3 -+#define E4_QueueEntrySize sizeof (E4_uint64) -+ -+#define E4_Queue8KBytes 0 -+#define E4_Queue64KBytes 1 -+#define E4_Queue512KBytes 2 -+#define E4_Queue4MBytes 3 -+ -+#define E4_QueueFrontValue(val,size) ((val) | (size)) -+#define E4_QueueValue(queue,size) (((E4_uint64) E4_QueueFrontValue(queue,size)) << 32 | ((E4_uint64) (queue))) -+ -+#define E4_QueueFrontPointer(val) /* extract queue front pointer from register */\ -+ (((val) >> 32) & E4_QueuePtrMask) -+#define E4_QueueBackPointer(val) /* extract queue back pointer from register */ \ -+ ((val) & E4_QueuePtrMask) -+#define E4_QueueSizeValue(val) /* extract queue size value from register */ \ -+ (((val) >> 32) & E4_QueueSizeMask) -+#define E4_QueueSize(value) /* queue size in bytes from size value */ \ -+ (1 << (((value)*3) + 13)) -+#define E4_QueueOffsetMask(fptr)\ -+ ((8192 << (((fptr) & E4_QueueSizeMask) << 3)) - 1) -+#define E4_QueueOffset(fptr)\ -+ ((fptr) & E4_QueueOffsetMask(fptr)) -+#define E4_QueueFrontPointerInc(fptr) \ -+ ( ((fptr) & ~E4_QueueOffsetMask(fptr)) | ((E4_QueueOffset(fptr) + 8) & E4_QueueOffsetMask(fptr)) ) -+ -+typedef union _E4_QueuePtr -+{ -+ E4_uint64 Value; -+ struct { -+ E4_uint32 Back; -+ E4_uint32 Front; -+ } s; -+} E4_QueuePtr; -+ -+/* -+ * DMA processor status register. -+ * -+ * [48] FirstSendTrans Set for the first packet of a dma. -+ * [47:46] TimeSliceCount Time left to timeslice. -+ * [45] DmaLastPacket Set for the last packet of a dma. -+ * [44] CurrPrefetchDma Dma descriptor the prefetcher is valid for. -+ * [43:39] PrefetcherState Dma prefetcher's state machines value. -+ * [38:33] PacketAssemblyState Packet assembler's state machines value. -+ * [32:31] PrefetcherWakeupFnt Dma prefetcher's wakeup function. -+ * [30:28] PacketAssWakeupFnt Packet assembler's wakeup function. -+ * [27] AckBufferValid Packet ack is valid. -+ * [26] PrefetchedDataProblem Had either a data read fault or data error. Valid if AckBufferValid. -+ * [25] PrefetcherHalting Prefetch data about to stop for halt. Valid if AckBufferValid. -+ * [24] PacketTimeout Packet timeout. Sent an EopError. Valid if AckBufferValid set. -+ * [23:22] PacketAckValue Packet ack type. Valid if AckBufferValid set. -+ * [21:20] FaultUnitNo Set if the dma prefetcher has faulted. -+ * [19:17] TrapType Packet assembler's trap type. -+ * [16] PrefetcherFault Set if the dma prefetcher has faulted for this DMA unit. -+ * [15] Remote The Dma had been issued remotly -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define DPROC_FirstSendTrans(s) ((unsigned)((s) >> 48) & 1) -+#define DPROC_TimeSliceCount(s) ((unsigned)(((s) >> 46) & 3) -+#define DPROC_DmaLastPacket(s) ((unsigned)((s) >> 45) & 1) -+#define DPROC_CurrPrefetchDma(s) ((unsigned)((s) >> 44) & 1) -+#define DPROC_PrefetcerState(s) ((unsigned)((s) >> 39) & 0x1f) -+#define DPROC_PacketAssemblerState(s) ((unsigned)((s) >> 33) & 0x1f) -+#define DPROC_PrefetcherWakeupFn(s) ((unsigned)((s) >> 31) & 3) -+#define DPROC_PacketAssemblerWakeupFn(s)((unsigned)((s) >> 28) & 3) -+#define DPROC_AckBufferValid(s) ((unsigned)((s) >> 27) & 1) -+#define DPROC_PrefetcherDataProblem(s) ((unsigned)((s) >> 26) & 1) -+#define DPROC_PrefetcherHalting(s) ((unsigned)((s) >> 25) & 1) -+#define DPROC_PacketTimeout(s) ((unsigned)((s) >> 24) & 1) -+#define DPROC_PacketAckValue(s) ((unsigned)((s) >> 22) & 3) -+#define DPROC_FaultUnitNo(s) ((unsigned)((s) >> 20) & 3) -+#define DPROC_TrapType(s) ((unsigned)((s) >> 17) & 7) -+#define DPROC_PrefetcherFault(s) ((unsigned)((s) >> 16) & 1) -+#define DPROC_Remote(s) ((unsigned)((s) >> 15) & 1) -+#define DPROC_Priority(s) ((unsigned)((s) >> 14) & 1) -+#define DPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Command processor status register. -+ * -+ * [26:21] CPState procs current state. -+ * [20] WakeupFnt procs wakeup function. -+ * [19:16] TrapValue procs trap value. -+ * [15] Remote Issued remotely. -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define CPROC_TrapType(s) ((unsigned)((s) >> 16) & 0xf) -+#define CPROC_Remote(s) ((unsigned)((s) >> 15) & 0x1) -+#define CPROC_Priority(s) ((unsigned)((s) >> 14) & 0x1) -+#define CPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Event processor status register. -+ * -+ * [34:30] CPState event procs current state. -+ * [29:28] WakeupFnt event procs wakeup function. -+ * [27:20] EventCopySize This is the number of DWords to still be copied on a copy dword event. -+ * [19] EProcPort1Fault CUN_EventProc1 has taken a translation fault. -+ * [18] EProcPort0Fault CUN_EventProc0 has taken a translation fault. -+ * [17:16] TrapValue event proc's trap value. -+ * [15] Remote Issued remotely. -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define EPROC_CPState(s) ((unsigned)((s) >> 30) & 0x1f) -+#define EPROC_WakeupFunction(s) ((unsigned)((s) >> 28) & 3) -+#define EPROC_CopySize(s) ((unsigned)((s) >> 20) & 0xFF) -+#define EPROC_Port1Fault(s) ((unsigned)((s) >> 19) & 1) -+#define EPROC_Port0Fault(s) ((unsigned)((s) >> 18) & 1) -+#define EPROC_TrapType(s) ((unsigned)((s) >> 16) & 3) -+#define EPROC_Remote(s) ((unsigned)((s) >> 15) & 1) -+#define EPROC_Priority(s) ((unsigned)((s) >> 14) & 1) -+#define EPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Thread processor status register. -+ * -+ * [39:24] MemPortBusy 16 bits of port busy flags for all FFU memory ports. -+ * [23:21] Reads as zero -+ * [20:18] TQState State vector for thread queuing proc. -+ * [17] HighRunQueueFull High priority run queue is full -+ * [16] LowRunQueueFull Low priority run queue is full -+ * [15] ReadyHigh More runable threads at high priority -+ * [14] ReadyLow More runable threads at low priority -+ * [13:0] Context procs current context. -+ */ -+#define TPROC_HighRunQueueFull(s) ((unsigned)((s) >> 17) & 1) -+#define TPROC_LowRunQueueFull(s) ((unsigned)((s) >> 16) & 1) -+#define TPROC_ReadyHigh(s) ((unsigned)((s) >> 15) & 1) -+#define TPROC_ReadyLow(s) ((unsigned)((s) >> 14) & 1) -+#define TPROC_Context(s) ((unsigned)((s) & 0x3fff)) -+ -+/* -+ * Input processor status register -+ * -+ * [55] Last Trans (~EOP) -+ * [54] First Trans (~EOP) -+ * [53] Channel (~EOP) -+ * [52] Bad Length (~EOP) -+ * [51:50] Trans CRC Status (~EOP) -+ * [49:48] EOP type -+ * [47] EOP trap -+ * [46] Trapping priority -+ * [45] Trapping Channel -+ * [44:43] Bad ack sent -+ * [42:41] Good ack sent -+ * [40] Queueing Packet (~EOP) -+ * [39:36] Channel trapped bits -+ * [35:32] IProc Trap Value -+ * [31:16] Network Context (~EOP) -+ * [15:0] Transaction Type (~EOP) -+ */ -+#define IPROC_LastTrans(s) ((unsigned)((s) >> 55) & 0x1) -+#define IPROC_FirstTrans(s) ((unsigned)((s) >> 54) & 0x1) -+#define IPROC_Channel(s) ((unsigned)((s) >> 53) & 0x1) -+#define IPROC_BadLength(s) ((unsigned)((s) >> 52) & 0x1) -+#define IPROC_TransCRCStatus(s) ((unsigned)((s) >> 50) & 0x3) -+#define IPROC_EOPType(s) ((unsigned)((s) >> 48) & 0x3) -+#define IPROC_EOPTrap(s) ((unsigned)((s) >> 47) & 0x1) -+#define IPROC_InputterPri(s) ((unsigned)((s) >> 46) & 0x1) -+#define IPROC_InputterChan(s) ((unsigned)((s) >> 45) & 0x1) -+#define IPROC_BadAckSent(s) ((unsigned)((s) >> 43) & 0x3) -+#define IPROC_GoodAckSent(s) ((unsigned)((s) >> 41) & 0x3) -+#define IPROC_QueueingPacket(s) ((unsigned)((s) >> 40) & 0x1) -+#define IPROC_ChannelTrapped(s) ((unsigned)((s) >> 36) & 0xF) -+#define IPROC_TrapValue(s) ((unsigned)((s) >> 32) & 0xF) -+#define IPROC_NetworkContext(s) ((unsigned)((s) >> 16) & 0xFFFF) -+#define IPROC_TransactionType(s) ((unsigned)(s) & 0xFFFF) -+ -+/* values for IPROC_TransCRCStatus */ -+#define CRC_STATUS_GOOD (0) -+#define CRC_STATUS_DISCARD (1) -+#define CRC_STATUS_ERROR (2) -+#define CRC_STATUS_BAD (3) -+ -+/* values for IPROC_EOPType */ -+#define EOP_GOOD (1) -+#define EOP_BADACK (2) -+#define EOP_ERROR_RESET (3) -+ -+/* -+ * Interrupt register bits -+ * -+ * There are up to four sources of interrupt for the MSI port. -+ * The Elan will request 4 ports but may only get either 2 or 1 port. The Interrupts are assigned -+ * as shown below: -+ * No Of MSI ints Low Prioity High Prioity -+ * 4 Event Ints OtherInts Inputer Ints Hard Error ints. -+ * i.e. Dproc, Tproc, Sten. HighPri and LowPri Link errs, ECC errs, -+ * -+ * 2 Event Ints All other interrupts. -+ * 1 All together. -+ * -+ * It is not safe to change the number of sources of interrupt while there may be outstanding, -+ * unserviced interrupts pending. -+ * There two forms of encoding. This has been provided in case an MSI implimentation assumes either -+ * a high value to have a high priority or a low value to have a high priority. This is controled -+ * by a bit in the Elan Pci Control register. -+ */ -+#define INT_LinkPortKeyFail (1<<18) -+#define INT_PciMemErr (1<<17) -+#define INT_SDRamInt (1<<16) -+#define INT_LinkError (1<<15) -+#define INT_IProcCh1HighPri (1<<14) -+#define INT_IProcCh0HighPri (1<<13) -+#define INT_IProcCh1LowPri (1<<12) -+#define INT_IProcCh0LowPri (1<<11) -+#define INT_DiscardingHighPri (1<<10) -+#define INT_DiscardingLowPri (1<<9) -+#define INT_CProcHalted (1<<8) -+#define INT_TProcHalted (1<<7) -+#define INT_DProcHalted (1<<6) -+#define INT_EProc (1<<5) -+#define INT_TProc (1<<4) -+#define INT_CProc (1<<3) -+#define INT_Dma1Proc (1<<2) -+#define INT_Dma0Proc (1<<1) -+#define INT_MainInterrupt (1<<0) -+ -+#define INT_Units (INT_EProc | INT_TProc | INT_CProc | INT_Dma1Proc | INT_Dma0Proc) -+#define INT_Inputters (INT_IProcCh1HighPri | INT_IProcCh0HighPri | INT_IProcCh1LowPri | INT_IProcCh0LowPri) -+#define INT_Discarding (INT_DiscardingHighPri | INT_DiscardingLowPri) -+#define INT_Halted (INT_CProcHalted | INT_TProcHalted | INT_DProcHalted) -+#define INT_ErrorInterrupts (INT_LinkPortKeyFail | INT_PciMemErr | INT_SDRamInt | INT_LinkError) -+ -+#define INT_MSI0 INT_MainInterrupt -+#define INT_MSI1 (INT_Units | INT_Discarding | INT_Halted) -+#define INT_MSI2 (INT_Inputters) -+#define INT_MSI3 (INT_ErrorInterrupts) -+ -+#define E4_INTERRUPT_REG_SHIFT 32 -+#define E4_INTERRUPT_MASK_MASK (0xffffffffULL) -+ -+/* -+ * Trap type values - see trapvalues.v -+ */ -+ -+#define CommandProcInserterError 0x1 -+#define CommandProcPermissionTrap 0x2 -+#define CommandProcSendTransInvalid 0x3 -+#define CommandProcSendTransExpected 0x4 -+#define CommandProcDmaQueueOverflow 0x5 -+#define CommandProcInterruptQueueOverflow 0x6 -+#define CommandProcMemoryFault 0x7 -+#define CommandProcRouteFetchFault 0x8 -+#define CommandProcFailCountZero 0x9 -+#define CommandProcAddressAlignment 0xa -+#define CommandProcWaitTrap 0xb -+#define CommandProcMultipleGuards 0xc -+#define CommandProcOpenOnGuardedChan 0xd -+#define CommandProcThreadQueueOverflow 0xe -+#define CommandProcBadData 0xf -+ -+#define DmaProcNoFault 0x0 -+#define DmaProcRouteFetchFault 0x1 -+#define DmaProcFailCountError 0x2 -+#define DmaProcPacketAckError 0x3 -+#define DmaProcRunQueueReadFault 0x4 -+#define DmaProcQueueOverflow 0x5 -+ -+#define EventProcNoFault 0x0 -+#define EventProcAddressAlignment 0x1 -+#define EventProcMemoryFault 0x2 -+#define EventProcCountWrapError 0x3 -+ -+#define InputNoFault 0x0 -+#define InputAddressAlignment 0x1 -+#define InputMemoryFault 0x2 -+#define InputInvalidTransType 0x3 -+#define InputDmaQueueOverflow 0x4 -+#define InputEventEngineTrapped 0x5 -+#define InputCrcErrorAfterPAckOk 0x6 -+#define InputEopErrorOnWaitForEop 0x7 -+#define InputEopErrorTrap 0x8 -+#define InputDiscardAfterAckOk 0x9 -+ -+typedef struct _E4_Sched_Status -+{ -+ E4_uint32 Status; -+ E4_uint32 Restart; -+} E4_Sched_Status; -+ -+typedef struct _E4_Input_Ptrs -+{ -+ E4_uint32 ContextFilterTable; -+ E4_uint32 TrapBasePtr; -+} E4_Input_Ptrs; -+ -+#define SCH_StopLowPriQueues (1 << 0) -+#define SCH_DProcHalt (1 << 1) -+#define SCH_TProcHalt (1 << 2) -+#define SCH_CProcHalt (1 << 3) -+ -+#define SCH_CProcTimeout600ns (1 << 4) -+#define SCH_CProcTimeout1p4us (2 << 4) -+#define SCH_CProcTimeout3p0us (3 << 4) -+#define SCH_CProcTimeout6p2us (4 << 4) -+#define SCH_CProcTimeout12p6us (5 << 4) -+#define SCH_CProcTimeout25p4us (6 << 4) -+#define SCH_CProcTimeout51p0us (7 << 4) -+#define SCH_DiscardLowPriInput (1 << 7) -+#define SCH_DiscardHighPriInput (1 << 8) -+ -+#define SCH_DProcTimeslice64us (0 << 9) -+#define SCH_DProcTimeslice128us (1 << 9) -+#define SCH_DProcTimeslice256us (2 << 9) -+#define SCH_DProcTimeslice512us (3 << 9) -+ -+#define SCH_Halt (SCH_StopLowPriQueues | SCH_DProcHalt | SCH_TProcHalt | SCH_CProcHalt) -+#define SCH_Discard (SCH_DiscardLowPriInput | SCH_DiscardHighPriInput) -+ -+#define SCH_RestartCProc (1 << 0) -+#define SCH_RestartTProc (1 << 1) -+#define SCH_RestartEProc (1 << 2) -+#define SCH_RestartDma0Proc (1 << 3) -+#define SCH_RestartDma1Proc (1 << 4) -+#define SCH_RestartDmaPrefetchProc (1 << 5) -+#define SCH_RestartCh0LowPriInput (1 << 6) -+#define SCH_RestartCh1LowPriInput (1 << 7) -+#define SCH_RestartCh0HighPriInput (1 << 8) -+#define SCH_RestartCh1HighPriInput (1 << 9) -+#define SCH_ClearLinkErrorInt (1 << 10) -+#define SCH_ContextFilterFlush (1 << 11) -+ -+/* -+ * Link state bits. -+ */ -+#define LS_LinkNotReady (1 << 0) /* Link is in reset or recovering from an error */ -+#define LS_Locked (1 << 1) /* Linkinput PLL is locked */ -+#define LS_LockError (1 << 2) /* Linkinput PLL was unable to lock onto the input clock. */ -+#define LS_DeskewError (1 << 3) /* Linkinput was unable to Deskew all the inputs. (Broken wire?) */ -+#define LS_PhaseError (1 << 4) /* Linkinput Phase alignment error. */ -+#define LS_DataError (1 << 5) /* Received value was neither good data or a token. */ -+#define LS_FifoOvFlow0 (1 << 6) /* Channel 0 input fifo overflowed. */ -+#define LS_FifoOvFlow1 (1 << 7) /* Channel 1 input fifo overflowed. */ -+#define LS_Mod45Changed (1 << 8) /* Mod45 bit has changed. Error setr to force reset. */ -+#define LS_PAckNotSeenError (1 << 9) /* PAck value not returned for this packet. */ -+ -+/* -+ * Link State Constant defines, used for writing to LinkSetValue -+ */ -+ -+#define LRS_DataDel0 0x0 -+#define LRS_DataDel1 0x1 -+#define LRS_DataDel2 0x2 -+#define LRS_DataDel3 0x3 -+#define LRS_DataDel4 0x4 -+#define LRS_DataDel5 0x5 -+#define LRS_DataDel6 0x6 -+#define LRS_DataDel7 0x7 -+#define LRS_DataDel8 0x8 -+#define LRS_LinkInValue 0x9 -+#define LRS_PllDelValue 0xA -+#define LRS_ClockEven 0xB -+#define LRS_ErrorVal8to0 0xC -+#define LRS_ErrorVal17to9 0xD -+#define LRS_ErrorVal26to18 0xE -+#define LRS_ErrorVal35to27 0xF -+#define LRS_NumLinkDels 0x10 -+ -+#define LRS_Pllfast 0x40 -+ -+typedef struct _E4_CommandControl -+{ -+ volatile E4_uint32 CommandQueueDescsBase; -+ volatile E4_uint32 CommandRequeuePtr; -+} E4_CommandControl; -+ -+#define E4_CommandRequeueBusy 0x80000000 /* Test against read value of CommandRequeuePtr */ -+#define E4_CommandRequeueHighPri 0x1 /* Will requeue onto the high pri queue */ -+#define E4_QueueDescPtrMask 0x7fffffe0 -+ -+typedef struct _E4_CommandQueueDesc -+{ -+ E4_uint64 CQ_QueuePtrs; -+ E4_uint64 CQ_HoldingValue; /* 32 bit value for 32 bit accesses or OutOfOrderMask*/ -+ E4_uint64 CQ_AckBuffers; /* Space for 32 4 bit ack buffer values. */ -+ E4_uint64 CQ_Control; -+} E4_CommandQueueDesc; -+ -+/* -+ * Rev A - CQ_QueuePtrs -+ * [63] Unused Should be set to zero. -+ * [62:51] Unused (reads as top of InsertPtr) -+ * [50:35] CompletedPtr Completed pointer. This is alligned to a byte address. -+ * [34] Trapped Will be set if the command has trapped. -+ * [33:32] Size Size of queue. -+ * [31] Used Will be set if the descriptor has been changed and written back by the elan. -+ * [30:3] InsertPtr Insert pointer. This is alligned to a byte address. -+ * [2] TimedOut Will be set if the queue timedout executing a command. -+ * [1] Priority When set the queue runs at high priority. -+ * [0] Error If this becomes set all new data written to the queue is * discarded. -+ * -+ * Rev B - CQ_QueuePtrs -+ * [63] TimedOut Will be set if the queue timedout executing a command. -+ * [62] Priority When set the queue runs at high priority. -+ * [61] QueueType 1=will accept unordered 64 bit PCI writes. 0=will accept ordered 32 or 64 bit PCI writes. -+ * [60:51] Unused (reads as top of InsertPtr) -+ * [50:35] CompletedPtr Completed pointer. This is alligned to a byte address. -+ * [34] Trapped Will be set if the command has trapped. -+ * [33:32] Size Size of queue. -+ * [31] Used Will be set if the descriptor has been changed and written back by the elan. -+ * [30:3] InsertPtr Insert pointer. This is alligned to a byte address. -+ * [2] OrderControl Holds bit 8 of last PCI accesses. Used by a reordering queue. -+ * [1:0] ErrorType This field has the current error status of the queue. -+ */ -+ -+/* Common between revA and RevB */ -+#define CQ_PtrMask (0x7ffffff8) /* 31 bit sdram address */ -+#define CQ_PtrOffsetMask (0x7fff8) -+#define CQ_PtrBaseMask (0x7ff80000) -+ -+#define CQ_InsertPtrShift (3 - 3) /* InsertPtr is 64 bit aligned */ -+#define CQ_SizeShift (32) -+# define CQ_Size1K 0 -+# define CQ_Size8K 1 -+# define CQ_Size64K 2 -+# define CQ_Size512K 3 -+# define CQ_SizeMask 3 -+ -+#define CQ_CompletedPtrShift (35 - 3) /* CompletedPtr is 64 but aligned */ -+ -+#define CQ_Used (1ull << 31) -+#define CQ_Trapped (1ull << 34) -+ -+#define CQ_QueuePtrsValue(Size,Inserter,Completer) \ -+ (((E4_uint64) (Size) << CQ_SizeShift) | \ -+ ((E4_uint64) (Inserter) << CQ_InsertPtrShift) | \ -+ ((E4_uint64) (Completer) << CQ_CompletedPtrShift)) -+ -+#define CQ_InsertPtr(QueuePtrs) \ -+ (((E4_uint64) QueuePtrs) & CQ_PtrMask) -+ -+#define CQ_CompletedPtr(QueuePtrs) \ -+ (((E4_uint32)((QueuePtrs) >> CQ_CompletedPtrShift) & CQ_PtrOffsetMask) | \ -+ (CQ_InsertPtr(QueuePtrs) & CQ_PtrBaseMask)) -+ -+#define CQ_Size(SizeVal) (1024 * (1 << ((SizeVal)*3))) -+ -+/* Rev A specific */ -+#define CQ_RevA_Error (1 << 0) -+#define CQ_RevA_Priority (1 << 1) -+#define CQ_RevA_TimedOut (1 << 2) -+ -+/* Rev B specific */ -+#define CQ_RevB_ErrorType(QueuePtr) ((QueuePtr) & (3 << 0)) -+# define CQ_RevB_NoError (0ull << 0) -+# define CQ_RevB_Overflowed (1ull << 0) -+# define CQ_RevB_InvalidWriteSize (2ull << 0) -+# define CQ_RevB_InvalidWriteOrder (3ull << 0) -+#define CQ_RevB_OrderControl (1ull << 2) -+ -+#define CQ_RevB_QueueType(QueuePtr) ((QueuePtr) & (1ull << 61)) -+# define CQ_RevB_ReorderingQueue (1ull << 61) -+# define CQ_RevB_32bitWriteQueue (0ull << 61) -+ -+#define CQ_RevB_Priority (1ull << 62) -+#define CQ_RevB_TimedOut (1ull << 62) -+ -+/* -+ * CQ_AckBuffers - Packet Ack Values -+ */ -+#define PackOk (0x0) -+#define PackTestFail (0x1) -+#define PackDiscard (0x2) -+#define PackError (0x7) -+#define PackTimeout (0x8) -+#define PackWaiting (0xF) -+#define PackValue(val,chan) (((val) >> ((chan) * 4)) & 0xf) -+ -+/* -+ * CQ_Control -+ * [63:35] ExtractPtr -+ * [34] Unused -+ * [33:32] ChannelNotCompleted -+ * [31:24] Permissions -+ * [23:16] RestartCount Decremented after each restart. Will trap when zero -+ * [15:14] Unused Should be set to zero -+ * [13:0] Context -+ */ -+#define CQ_Context(Control) ((E4_uint32) ((Control) >> 0) & 0x3fff) -+#define CQ_RestartCount(Control) ((E4_uint32) ((Control) >> 16) & 0x7f) -+#define CQ_ChannelNotCompleted(Control) ((E4_uint32) ((Control) >> 32) & 3) -+#define CQ_ExtractPtr(Control) ((E4_uint32) ((Control) >> 32) & 0xFFFFFFF8) -+ -+#define CQ_RestartCountShift 16 -+ -+#define CQ_SetEventEnableBit (1 << 24) -+#define CQ_WaitEventEnableBit (1 << 25) -+#define CQ_ModifyEnableBit (1 << 26) -+#define CQ_WriteEnableBit (1 << 27) -+#define CQ_ThreadStartEnableBit (1 << 28) -+#define CQ_DmaStartEnableBit (1 << 29) -+#define CQ_STENEnableBit (1 << 30) -+#define CQ_InterruptEnableBit (1 << 31) -+#define CQ_EnableAllBits (0xFF000000) -+#define CQ_PermissionMask (0xFF000000) -+ -+#define CQ_ControlValue(Cntx, RestartCount, Permissions) \ -+ (((Cntx) & 0x3fff) | (((RestartCount) & 0xff) << 16) | ((Permissions) & CQ_PermissionMask)) -+ -+/* -+ * This file describes the slave address map of Elan4. -+ * -+ * Elan4 has two PCI 64 bit base address registers. One is setup for elan -+ * local memory and the other is for the command port, elan registers and ebus. -+ * -+ * This file describes the command port, elan registers and ebus BAR. This is a -+ * 26 bit base address register and is split up as follows: -+ * 1 The ebus requires 21 bits of address. 26'h3e00000 to 26'h3ffffff -+ * 2 The control regsiters requires 16 bits of address. 26'h3df0000 to 26'h3dfffff -+ * 3 The command port has the rest. This give just under 8k command ports or about 123 per -+ * processor of a 64 node SMP. -+ */ -+ -+/* BAR1 contains the command queues followed by the registers and the Ebus - and is 26 bits */ -+/* each command queue has an 8K page associated with it */ -+#define CQ_CommandMappingSize (1 << 13) -+#define CQ_NumCommandDescs ((1 << (26 - 13))) -+#define CQ_CommandDescsAlignment ((1 << (26 - 13)) * sizeof (E4_CommandQueueDesc)) -+ -+/* control reg bits i.e. E4_DataBusMap.SysControlReg */ -+#define CONT_EN_ALL_SETS (1ULL << 0) /* enable cache */ -+#define CONT_MMU_ENABLE (1ULL << 1) /* bit 0 enables mmu */ -+#define CONT_CACHE_HASH_TABLE (1ULL << 2) /* cache up hash table entries */ -+#define CONT_CACHE_CHAINS (1ULL << 3) /* cache up chain entries */ -+#define CONT_CACHE_ROOT_CNTX (1ULL << 4) /* cache root context table for routes and filters. */ -+#define CONT_CACHE_STEN_ROUTES (1ULL << 5) /* cache up sten packet routes */ -+#define CONT_CACHE_DMA_ROUTES (1ULL << 6) /* cache up dma packet routes */ -+ -+#define CONT_CACHE_NONE 0ULL -+#define CONT_CACHE_ALL (CONT_CACHE_HASH_TABLE | CONT_CACHE_CHAINS | CONT_CACHE_ROOT_CNTX | \ -+ CONT_CACHE_STEN_ROUTES | CONT_CACHE_DMA_ROUTES) -+ -+/* This controls the format size and position of the MMU hash tables. */ -+#define CONT_INHIBIT_MAX_CHAIN_ITEMS (1ULL << 7) /* Prevents the MaxChainItems value of 1024 from forcing a translation miss */ -+#define CONT_TABLE0_MASK_SIZE_SHIFT 8 /* Defines the size of hash table 0 */ -+#define CONT_TABLE0_PAGE_SIZE_SHIFT 13 /* Set the page size for hash table 0 */ -+#define CONT_TABLE1_MASK_SIZE_SHIFT 16 /* Defines the size of hash table 1 */ -+#define CONT_TABLE1_PAGE_SIZE_SHIFT 21 /* Set the page size for hash table 1 */ -+#define CONT_TWO_HASH_TABLES (1ULL << 24) /* Sets the MMU to use two hash tables. If not set only 0 used. */ -+#define CONT_2K_NOT_1K_DMA_PACKETS (1ULL << 25) /* Used to select the default DMA packet size. */ -+#define CONT_ALIGN_ALL_DMA_PACKETS (1ULL << 26) /* Will force all dma packets to be aligned to a page.*/ -+#define CONT_DIRECT_MAP_PCI_WRITES (1ULL << 27) /* Will force pci writes to write and flush the dcache.*/ -+#define CONT_TLB_FLUSH (1ULL << 28) /* Invalidates the TLB and indicates when flushed */ -+#define CONT_CLEAR_WALK_WROTE_TABLES (1ULL << 29) /* Used to guarantee that the elan is using new PTE values. */ -+#define CONT_ROUTE_FLUSH (1ULL << 30) /* Invalidates all route cache entries. */ -+#define CONT_CLEAR_LINKPORT_INT (1ULL << 31) /* Clears the Linkport key fail interrupt. Reads as 0. */ -+#define CONT_CLEAR_SDRAM_ERROR (1ULL << 32) /* Clears an EEC error interrupt. Reads as 0. */ -+ -+/* -+ * These are extra control bits used for testing the DLLs of the SDRAM interface. Most of the Sdram -+ * control bits are defined in xsdram.h -+ */ -+#define SDRAM_FIXED_DLL_DELAY_SHIFT 47 -+#define SDRAM_FIXED_DLL_DELAY_BITS 5 -+#define SDRAM_FIXED_DLL_DELAY_MASK ((1ULL << SDRAM_FIXED_DLL_DELAY_BITS) - 1ULL) -+#define SDRAM_FIXED_DLL_DELAY(Value) ((SDRAM_FIXED_DLL_DELAY_MASK & (Value)) << SDRAM_FIXED_DLL_DELAY_SHIFT) -+#define SDRAM_FIXED_DELAY_ENABLE (1ULL << 52) -+#define SDRAM_GET_DLL_DELAY(Value) (((Value) >> SDRAM_FIXED_DLL_DELAY_SHIFT) & SDRAM_FIXED_DLL_DELAY_MASK) -+ -+#define SDRAM_DLL_CORRECTION_FACTOR 3 /* This is to allow for SSO and ringing on the DQ lines */ -+ -+#define PAGE_SIZE_4K 0x0 -+#define PAGE_SIZE_8K 0x1 -+#define PAGE_SIZE_64K 0x2 -+#define PAGE_SIZE_512K 0x3 -+#define PAGE_SIZE_2M 0x4 -+#define PAGE_SIZE_4M 0x5 -+#define PAGE_SIZE_64M 0x6 -+#define PAGE_SIZE_512M 0x7 -+ -+#define PAGE_SIZE_MASK 0x7 -+#define PAGE_MASK_MASK 0x1f -+ -+/* control reg bits i.e. E4_DataBusMap.LinkControlReg */ -+#define LCONT_REVA_GREEN_LED (1 << 0) -+#define LCONT_REVA_YELLOW_LED (1 << 1) -+#define LCONT_REVA_RED_LED (1 << 2) -+#define LCONT_REVA_ENABLE_LED_DRIVE (1 << 3) /* Enable manual setting of the Leds to the bits set above. */ -+ -+#define LCONT_REVB_DISABLE_TLB_PREFETCH (1 << 0) -+#define LCONT_REVB_DISABLE_CRC_ERROR_CHECKING (1 << 1) -+ -+ -+#define LCONT_EN_SYS_WRITES (1 << 4) /* Enable linkport writes to sys registers. i.e. all of E4_DataBusMap. */ -+#define LCONT_EN_SYS_READS (1 << 5) /* Enable linkport reads from sys registers. i.e. all of E4_DataBusMap. */ -+#define LCONT_EN_USER_WRITES (1 << 6) /* Enable linkport writes to user registers. i.e. all of E4_User_Regs. */ -+#define LCONT_EN_USER_READS (1 << 7) /* Enable linkport reads from user registers. i.e. all of E4_User_Regs. */ -+ -+#define LCONT_TEST_VALUE_MASK 0x3ff /* Value used for test writes and link boundary scan. */ -+#define LCONT_TEST_VALUE_SHIFT 8 -+#define LCONT_TEST_VALUE(Value) ((LCONT_LINK_STATE_MASK & (Value)) << LCONT_TEST_VALUE_SHIFT) -+ -+/* -+ * State read from LINK_STATE when TEST_VALUE is set to the following values. -+ * TEST_VALUE LINK_STATE read TEST_VALUE LINK_STATE read -+ * 000 - Data delay count 0 008 - Data delay count 8 -+ * 001 - Data delay count 1 009 - Link in value -+ * 002 - Data delay count 2 00a - PLL delay -+ * 003 - Data delay count 3 00b - Clock Delay -+ * 004 - Data delay count 4 00c ? ErrorVal8to0 -+ * 005 - Data delay count 5 00d ? ErrorVal17to9 -+ * 006 - Data delay count 6 00e ? ErrorVal26to18 -+ * 007 - Data delay count 7 00f ? ErrorVal35to27 -+ */ -+ -+#define LCONT_TEST_CONTROL_MASK 0x3 /* Selects and controls the action of the LINK_STATE value. */ -+#define LCONT_TEST_CONTROL_SHIFT 18 -+ -+#define LCONT_READ_ERRORS 0 /* {Mod45RequestChanged, FifoOverflowError, DataError, PhaseError, -+ * DeskewError, LockError, Locked, LinkNotReady} */ -+#define LCONT_READ_STATE 1 /* Read valus addressed by TEST_CONTROL value */ -+#define LCONT_FIX_LINK_DELAYS 2 /* Sets delays to TEST_CONTROL value */ -+#define LCONT_BOUNDARY_SCAN 3 /* Puts link into boundary scan. Outputs TEST_CONTROL value to link, -+ * reads LINK_STATE from link. */ -+ -+#define LCONT_LINK_STATE_MASK 0x3ff /* Read only */ -+#define LCONT_LINK_STATE_SHIFT 20 /* Read only */ -+#define LCONT_LINK_STATE(ControlRegValue) (LCONT_LINK_STATE_MASK & ((ControlRegValue) >> LCONT_LINK_STATE_SHIFT)) -+ -+/* control reg bits i.e. E4_DataBusMap.LinkContSettings */ -+#define LCONT_MOD45_DISABLE (1 << 0) /* is set the link will try to run in TNB mode. */ -+#define LCONT_CONFIG_PHASE_MASK 0x7 /* This set the delay through the phase alignment buffer. */ -+#define LCONT_CONFIG_PHASE_SHIFT 1 -+ -+#define LCONT_PLL_REF_VAL_BITS_MASK 0x7f /* This is the divide value on the LinkIn clock to form the comms PLL */ -+#define LCONT_PLL_REF_VAL_BITS_SHIFT 4 /* reference clock. Div value is (n - 2). e.g. to Divide by 7 set to 5. */ -+ -+#define LCONT_FORCE_COMMSCLK_LOCAL (1 << 11) /* This must be set at one end of a back to back Elan configuration. */ -+#define LCONT_LVDS_VOLTAGE_BITS_MASK 0x3 /* This is used to set the voltage swing on the LVDS link output pads. */ -+#define LCONT_LVDS_VOLTAGE_BITS_SHIFT 12 /* reference clock. Div value is (n - 2). e.g. to Divide by 7 set to 5. */ -+ -+#define LCONT_VOD_170 0 /* Approximate differential voltage swing in mV of link outputs into */ -+#define LCONT_VOD_360 1 /* a 100 ohm diferential load. */ -+#define LCONT_VOD_460 2 -+#define LCONT_VOD_550 3 -+ -+#define LCONT_LVDS_TERMINATION_MASK 0x3 /* This set the resistor values of the internal single ended termation */ -+#define LCONT_LVDS_TERMINATION_SHIFT 14 /* resistors of the link input and comms input clcok. */ -+ -+#define LCONT_TERM_55_OHM 0 /* Resistor values for internal termination of LVDS pads. */ -+#define LCONT_TERM_50_OHM 1 -+#define LCONT_TERM_AUTO_OHM 2 /* Should normally be set to auto. */ -+#define LCONT_TERM_45_OHM 3 -+ -+#define LCONT_LVDS_EN_TERM_UPDATE (1 << 47) /* This should be asserted and deasserted if LCONT_LVDS_TERMINATION is changed. */ -+ -+/* Macros used to access and construct MMU hash table and chain entries. */ -+/* -+ * Each hash entry is made up of a 64 byte block. Each entry hash two tags where each -+ * tag has 4 PTE's. PTE's 0 to 2 use the bottom 48 bits of a 64 bit word and PTE 3 -+ * uses the top 16 bits of 3 64 bit words. -+ * -+ * These macros can be used to build a single PTE. PTE3 needs to be built into a 48 bit -+ * object before they can be used. -+ */ -+#define PTE_ENTRY_MASK 0x0000ffffffffffffULL -+#define PTE_TYPE_MASK 0x000000000000000fULL -+#define PTE_PERM_MASK 0x00000000000000f0ULL -+#define PTE_PERM_TYPE_MASK 0x00000000000000ffULL -+#define PTE_REF_MASK 0x0000000000000100ULL -+#define PTE_PPN_MASK 0x00007ffffffffe00ULL -+#define PTE_MOD_MASK 0x0000800000000000ULL -+#define PTE_TOPADDR_MASK 0x0000600000000000ULL -+ -+#define PTE_MOD_SHIFT 47 -+#define PTE_PPN_SHIFT 9 -+#define PTE_REF_SHIFT 8 -+#define PTE_PERM_SHIFT 4 -+#define PTE_TYPE_SHIFT 0 -+ -+#define PTE_PADDR_SHIFT (12 - 9) /* Physical addresses are shifted down 3 this to go into the PTE */ -+ -+ -+/* Values required for tag 3 */ -+#define PTE_REF_3 0x0100000000000000ULL -+#define PTE_MOD_3 0x8000000000000000ULL -+#define PTE_ENTRY_MASK_3 0xffff000000000000ULL -+#define PTE_PERM_TYPE_MASK_3 0x00ff000000000000ULL -+#define PTE_ENTRY_3_FOR_0(NewPte) ((NewPte << (48)) & PTE_ENTRY_MASK_3) -+#define PTE_ENTRY_3_FOR_1(NewPte) ((NewPte << (32)) & PTE_ENTRY_MASK_3) -+#define PTE_ENTRY_3_FOR_2(NewPte) ((NewPte << (16)) & PTE_ENTRY_MASK_3) -+ -+/* Values required for the tags */ -+#define TAG_CONTEXT_MASK 0x0000000000003fffULL -+#define TAG_ADDRESS_MASK 0xfffffffff8000000ULL -+#define TAG_CHAINPTR_18TO6_MASK 0x0000000007ffc000ULL -+#define TAG_CHAINPTR_LOW_SHIFT (14 - 6) -+#define TAG_CHAINPTR_30TO19_MASK 0x0000000003ffc000ULL -+#define TAG_CHAINPTR_HIGH_SHIFT (19 - 14) -+#define TAG_COPY_BIT 0x0000000004000000ULL -+ -+/* -+ * This takes number loaded into the control register and returns the page size as a power of two. -+ */ -+ -+#define E4_PAGE_SIZE_TABLE E4_uint32 const PageSizeTable[] = {12, 13, 16, 19, 21, 22, 26, 29} -+#define E4_PAGE_SIZE_TABLE_SIZE (sizeof(PageSizeTable)/sizeof(PageSizeTable[0])) -+ -+/* -+ * This macro generates a hash block index. -+ * -+ * Cntx This is the 14 bit context. It should not be larger than 14 bits. -+ * VAddr This is the 64 bit virtual address. It does not require any masking and can be a byte address. -+ * PageSize This is the value loaded into the control register for this hash table. -+ * HashTableMask This should be set mask out upper bits past the end of the hash table. -+ */ -+#define E4MMU_SHIFT_ADDR(VAddr, Shift) \ -+ ((((E4_uint32)(VAddr)) >> (Shift)) | (((E4_uint32)((VAddr) >> 32)) << (32 - (Shift)))) -+ -+#define E4MMU_CONTEXT_SCRAMBLE(Cntx) \ -+ ((((Cntx) << 8) | ((Cntx) >> 6)) ^ (((Cntx) << 15) | ((Cntx) << 1))) -+ -+#define E4MMU_HASH_INDEX(Cntx, VAddr, PageShift, HashTableMask) \ -+ ((E4MMU_SHIFT_ADDR(VAddr, (PageShift) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(Cntx)) & (HashTableMask)) -+ -+#define E4MMU_TAG(vaddr,ctx) (((vaddr) & TAG_ADDRESS_MASK) | ((ctx) & TAG_CONTEXT_MASK)) -+ -+#define E4MMU_TAG2VADDR(tag,hashidx,PageShift,HashTableMask) \ -+ (((tag) & TAG_ADDRESS_MASK) | ((((hashidx) ^ E4MMU_CONTEXT_SCRAMBLE((tag) & TAG_CONTEXT_MASK)) & (HashTableMask)) << ((PageShift + 2)))) -+ -+/* -+ * Detailed bit descriptions for the tags and PTE's are better done with the macros -+ * defined above. -+ */ -+typedef struct _E4_HashTableEntry -+{ -+ E4_uint64 Tag[2]; -+ E4_uint64 TagPTE[2][3]; -+} E4_HashTableEntry; -+ -+#define E4MMU_TAG_OFFSET(tag) ((tag) << 3) -+#define E4MMU_PTE_LOW_OFFSET(tag,pte) ((((tag)*3 + (pte) + 2) << 3)) -+#define E4MMU_PTE_HIGH_OFFSET(tag,pte) ((((tag)*3 + (pte) + 2) << 3) + 4) -+#define E4MMU_PTE3_WORD0_OFFSET(tag) ((((tag)*3 + 2) << 3) + 6) -+#define E4MMU_PTE3_WORD1_OFFSET(tag) ((((tag)*3 + 3) << 3) + 6) -+#define E4MMU_PTE3_WORD2_OFFSET(tag) ((((tag)*3 + 4) << 3) + 6) -+ -+ -+/* -+ * Hash0AddrBits is the size of the hash table in bytes as a power of 2. -+ * e.g. 11 would give 32 hash entries where each entry is 64 bytes. -+ */ -+#define SETUP_HASH_TABLES(Hash0PageSize, Hash0AddrBits, Hash1PageSize, Hash1AddrBits) \ -+ (((Hash0PageSize) << CONT_TABLE0_PAGE_SIZE_SHIFT) | \ -+ ((Hash0AddrBits) << CONT_TABLE0_MASK_SIZE_SHIFT) | \ -+ ((Hash1PageSize) << CONT_TABLE1_PAGE_SIZE_SHIFT) | \ -+ ((Hash1AddrBits) << CONT_TABLE1_MASK_SIZE_SHIFT)) -+ -+/* ECC status register */ -+#define ECC_Addr(s) ((s) & 0x7ffffff8ULL) -+#define ECC_Syndrome(s) (((s) >> 32) & 0xffffULL) -+#define ECC_RisingDQSSyndrome(s) (((s) >> 32) & 0xffULL) -+#define ECC_FallingDQSSyndrome(s) (((s) >> 40) & 0xffULL) -+#define ECC_UncorrectableErr(s) (((s) >> 48) & 1ULL) -+#define ECC_MultUncorrectErrs(s) (((s) >> 49) & 1ULL) -+#define ECC_CorrectableErr(s) (((s) >> 50) & 1ULL) -+#define ECC_MultCorrectErrs(s) (((s) >> 51) & 1ULL) -+ -+/* Permission type saved in a PTE. This is a four bit field */ -+#define PERM_Disabled 0x0 -+#define PERM_Unused 0x1 -+#define PERM_LocDataRead 0x2 -+#define PERM_LocDataWrite 0x3 -+#define PERM_LocRead 0x4 -+#define PERM_LocExecute 0x5 -+#define PERM_ReadOnly 0x6 -+#define PERM_LocWrite 0x7 -+#define PERM_LocEventOnly 0x8 -+#define PERM_LocEventWrite 0x9 -+#define PERM_RemoteEvent 0xa -+#define PERM_RemoteAll 0xb -+#define PERM_RemoteReadOnly 0xc -+#define PERM_RemoteWriteLocRead 0xd -+#define PERM_DataReadWrite 0xe -+#define PERM_NoFault 0xf -+ -+#define PERM_Mask 0xf -+ -+/* Permission type hints to device driver */ -+#define PERM_Preload 0x10 -+ -+#define PTE_SetPerm(Perm) (((Perm) & PERM_Mask) << 4) -+ -+/* Control info saved in the lookup field of the TLB */ -+#define PTE_PciNotLocal (1ULL << 0) /* Directs the access to the PCI interface */ -+#define PTE_BigEndian (1ULL << 1) /* Valid for PCI entries only */ -+#define PTE_RelaxedOrder (1ULL << 2) /* Valid for PCI entries only */ -+#define PTE_DontSnoop (1ULL << 3) /* Valid for PCI entries only */ -+ -+#define PTE_UseFixedSet (1ULL << 1) /* Value for non PCI entries only */ -+#define PTE_CommandQueue (1ULL << 2) /* Value for non PCI entries only */ -+#define PTE_SetFixedSetNo(Set) ((((Set) & 3) << 2) | PTE_UseFixedSet) -+ -+#define PTE_TypeBitsMask (0xfULL) -+#define PTE_PermissionTypeMask (0xfULL << 4) -+#define PTE_Referenced (1ULL << 8) -+#define PTE_PhysicalPageNoMask (0x7ffffffffe00ULL) -+#define PTE_Modified (1ULL << 47) -+ -+#define PTE_PhysicalAddrShiftIntoPTE (12 - 9) -+ -+/* define page table entry bit fields */ -+#define TLB_PageSizeBits (3 << 0) -+#define TLB_ACCBits (7 << 2) -+#define TLB_LocalBit (1 << 5) -+#define TLB_PCI64BitTargetBit (1 << 6) -+#define TLB_PCIBigEndianBit (1 << 7) -+ -+#define TLB_ModifiedBit (1 << 55) -+#define TLB_ReferencedBit (1 << 63) -+ -+/* Used to read values from the tlb. */ -+#define TLB_TlbReadCntBitsSh 56 -+#define TLB_UseSelAddrSh (1ULL << 60) -+#define TLB_WriteTlbLine (1ULL << 61) -+ -+#define TLB_SEL_LINE(LineNo) (TLB_UseSelAddrSh | \ -+ ((E4_uint64)((LineNo) & 0xf) << TLB_TlbReadCntBitsSh)) -+ -+#define TLB_NUM_ENTRIES 16 -+/* -+ * The following macros are used with the test access port (TlbLineValue) for the TLBs. -+ */ -+#define TLV_DoPciAccess (1ULL << 0) -+#define TLV_CommandAccess (1ULL << 1) -+#define TLV_DoCacheAccess (1ULL << 2) -+#define TLV_notStartTLBWalk (1ULL << 3) -+#define TLV_UseFixedSet (1ULL << 4) -+#define TLV_BigEndian (1ULL << 4) -+#define TLV_RelaxedOrder (1ULL << 5) -+#define TLV_DontSnoop (1ULL << 6) -+#define TLV_FixedSetNo_MASK (3ULL << 5) -+#define TLV_PciTypeBits_MASK (7ULL << 4) -+#define TLV_LookupBits_MASK (0x7fULL) -+#define TLV_MissErr (1ULL << 7) -+#define TLV_TypeBits (0xffULL) -+ -+#define TLV_PhysicalAddr_MASK (0x3fffffffff000ULL) -+ -+#define TLV_TlbTesting (1ULL << 51) -+#define TLV_SelectUnitsTlbRead (1ULL << 52) -+#define TLV_SelectTProcTlbRead (1ULL << 53) -+ -+#define TLV_TlbLineSelect_MASK (0xf) -+#define TLV_UnitsTlbLineSelect_SHIFT (54) -+#define TLV_TProcTlbLineSelect_SHIFT (59) -+#define TLV_EnableUnitsTlbRead (1ULL << 58) -+#define TLV_EnableTProcTlbRead (1ULL << 63) -+ -+/* -+ * Use this macro to enable direct testing of the Units TLB. -+ * When Line is in the range 0 to 15 a TLB line is selected for reading or writing. -+ * When Line is set to -1 the tlb will be activated to perform a match. -+ */ -+#define TLV_UnitsTlbLineSel(Line) (((Line) == -1) ? 0ULL : \ -+ (TLV_EnableUnitsTlbRead | ((E4_uint64)((Line) & TLV_TlbLineSelect_MASK) << TLV_UnitsTlbLineSelect_SHIFT))) -+#define TLV_TProcTlbLineSel(Line) (((Line) == -1) ? 0ULL : \ -+ (TLV_EnableTProcTlbRead | ((E4_uint64)((Line) & TLV_TlbLineSelect_MASK) << TLV_TProcTlbLineSelect_SHIFT))) -+ -+/* -+ * Thread_Trap_State -+ * see f_RegFileControl.v TProcStatus -+ */ -+#define TS_HaltThread (1 << 0) -+#define TS_TrapForTooManyInstructions (1 << 1) -+#define TS_InstAccessException (1 << 2) -+#define TS_Unimplemented (1 << 3) -+#define TS_DataAccessException (1 << 4) -+#define TS_DataAlignmentError (1 << 5) -+#define TS_TrapForUsingBadData (1 << 6) -+#define TS_TrapTypeMask (0x7f) -+#define TS_DataPortNo(ts) (((ts) >> 7) & 7) -+#define TS_TrappedFlag (1 << 10) -+#define TS_MemLock (1 << 11) -+#define TS_XCCshift 12 -+#define TS_XCCmask 0xff -+#define TS_ICC(ts) (((ts) >> 12) & 15) -+#define TS_XCC(ts) (((ts) >> 16) & 15) -+#define TS_InstValid_F (1 << 20) -+#define TS_InstValid_R (1 << 21) -+#define TS_InstValid_E (1 << 22) -+#define TS_InstValid_W (1 << 23) -+#define TS_HighPriority (1 << 24) -+#define TS_RemoteThread (1 << 25) -+#define TS_TProcTranslationInProgress (1 << 26) -+#define TS_MemLock_E (1 << 27) -+ -+/* Thread run queue entries */ -+typedef struct E4_ThreadRegs -+{ -+ E4_uint64 Registers[7]; -+} E4_ThreadRegs; -+ -+typedef struct E4_TProcQueueEntry -+{ -+ E4_ThreadRegs Regs; /* XXXX: jon check this */ -+ E4_uint64 Context; /* XXXX: jon check this */ -+} E4_TProcQueueEntry; -+ -+typedef struct E4_DProcQueueEntry -+{ -+ E4_DMA Desc; -+ E4_uint64 Pad; -+} E4_DProcQueueEntry; -+ -+/* -+ * Packet acknowledge values. -+ */ -+#define E4_PAckOk 0 -+#define E4_PAckTestFail 1 -+#define E4_PAckDiscard 2 -+#define E4_PAckError 3 -+ -+/* -+ * return values from breaktest instruction. -+ */ -+#define ICC_CARRY_BIT (0x1ULL << 0) /* Breaktest: Load pending */ -+#define ICC_ZERO_BIT (0x1ULL << 1) /* Breaktest: Time to break */ -+#define ICC_SIGNED_BIT (0x1ULL << 2) /* Breaktest: Another thread ready */ -+#define ICC_TPROC_RDY_LOW_PRI (0x1ULL << 3) -+#define ICC_TPROC_RDY_HIGH_PRI (0x1ULL << 4) -+#define ICC_RUNNING_HIGH_PRI (0x1ULL << 5) -+#define ICC_RUNNING_AS_REMOTE (0x1ULL << 6) -+#define ICC_TIME_TO_BREAK (0x1ULL << 7) -+#define ICC_RS1LOAD_PENDING (0x1ULL << 8) -+#define ICC_TPROC_HALT (0x1ULL << 9) -+ -+/* -+ * Main Interrupt cookies -+ * [63:14] user cookie -+ * [13:0] context -+ */ -+#define E4_MAIN_INT_SHIFT 14 -+#define E4_MAIN_INT_COOKIE(cookie) ((cookie) >> E4_MAIN_INT_SHIFT) -+#define E4_MAIN_INT_CTX(cookie) ((cookie) & 0x3FFF) -+ -+typedef E4_uint64 E4_MainIntEntry; -+ -+#define E4_MainIntEntrySize sizeof (E4_MainIntEntry) -+ -+/* -+ * The internal databus is 64 bits wide. -+ * All writes to the internal registers MUST be made with 64 bit write operations. -+ * These can be made up of pairs 32 bit writes on the PCI bus. The writes will be -+ * treated as nops if they are performed with two separate 32 bit writes. -+ */ -+typedef volatile struct _E4_DataBusMap -+{ -+ E4_uint64 InputTrans[4][16]; /* 0x000 */ -+ -+ E4_uint64 Dma0TransAddr; /* 0x200 */ -+ E4_DMA Dma0Desc; /* Current Dma0 registers */ /* 0x208 */ -+ -+ E4_uint64 Dma1TransAddr; /* 0x240 */ -+ E4_DMA Dma1Desc; /* Current Dma1 registers */ /* 0x248 */ -+ -+ E4_uint64 Dma0LastPacketSize; /* 0x280 */ -+ E4_uint64 Dma0ThisPacketSize; /* 0x288 */ -+ E4_uint64 Dma0DescSizeInProg; /* 0x290 */ -+ E4_uint64 Dma0BytesToPrefetch; /* 0x298 */ -+ E4_uint64 Dma0PrefetchAddr; /* 0x2a0 */ -+ E4_uint64 EventCountAndType; /* 0x2a8 */ -+ E4_uint64 EventParameters[2]; /* 0x2b0 */ -+ -+ E4_uint64 Dma1LastPacketSize; /* 0x2c0 */ -+ E4_uint64 Dma1ThisPacketSize; /* 0x2c8 */ -+ E4_uint64 Dma1DescSizeInProg; /* 0x2d0 */ -+ E4_uint64 Dma1BytesToPrefetch; /* 0x2d8 */ -+ E4_uint64 Dma1PrefetchAddr; /* 0x2e0 */ -+ E4_Input_Ptrs InputTrapAndFilter; /* 0x2e8 */ -+ E4_uint64 EventAddress; /* 0x2f0 */ -+ E4_QueuePtr MainIntQueuePtrs; /* 0x2f8 */ -+ -+ E4_uint64 Event_Copy[16]; /* 0x300 */ -+ -+ E4_uint64 CommandCopy[7]; /* 0x380 */ -+ E4_uint64 CommandHold; /* 0x3b8 */ -+ -+ E4_uint64 InputQueueDesc[4]; /* 0x3c0 */ -+ -+ /* Run queue Pointers */ -+ E4_uint64 DProcLowPriPtrs; /* 0x3e0 */ -+ E4_uint64 DProcHighPriPtrs; /* 0x3e8 */ -+ E4_uint64 TProcLowPriPtrs; /* 0x3f0 */ -+ E4_uint64 TProcHighPriPtrs; /* 0x3f8 */ -+ -+ E4_uint64 CProcStatus; /* 0x400 */ -+ E4_uint64 TProcStatus; /* 0x408 */ -+ E4_uint64 IProcStatus; /* 0x410 */ -+ E4_uint64 EProcStatus; /* 0x418 */ -+ E4_uint64 DProc0Status; /* 0x420 */ -+ E4_uint64 DProc1Status; /* 0x428 */ -+ E4_Sched_Status SchedStatus; /* 0x430 */ -+ -+ E4_uint64 LoadIProcCntxFilter; /* Will load one of 4 cntx filter regs. Write only */ /* 0x438 */ -+ -+ E4_CommandControl CommandControl; /* 0x440 */ -+ E4_uint64 CommandCacheTestPort; /* 0x448 */ -+ E4_uint64 CommandLowPriRunPtrs; /* 0x450 */ -+ E4_uint64 CommandHighPriRunPtrs; /* 0x458 */ -+ E4_uint64 CommandSchedDataPort[4]; /* 0x460 */ -+ -+ E4_uint64 DmaRouteBuffer[2][2]; /* Write only. Should not be written to. */ /* 0x480 */ -+ E4_uint64 StenRouteBuffer[2]; /* Write only. Should not be written to. */ /* 0x4a0 */ -+ E4_uint64 pad4[0x098 - 0x096]; /* 0x4b0 */ -+ -+ E4_uint64 DmaAlignmentPort[8]; /* Write only. Should only be written to clear the prev reg. */ /* 0x4c0 */ -+ -+ E4_uint64 MmuBlockEntry[8]; /* Used for hash table and chain fetches */ /* 0x500 */ -+ E4_uint64 WriteUnitsTlbLine[3]; /* 0x550 */ -+ E4_uint64 pad5; /* 0x540 */ -+ E4_uint64 WriteTProcTlbLine[3]; /* 0x568 */ -+ E4_uint64 pad6; /* 0x540 */ -+ -+ E4_uint64 MmuTableBasePtrs; /* Both tables packed into a single 64 bit value */ /* 0x580 */ -+ E4_uint64 MmuFaultAndRootCntxPtr; /* Both packed into a single 64 bit value */ /* 0x588 */ -+ E4_uint64 UnitsVAddr; /* 0x590 */ -+ E4_uint64 TProcVAddr; /* 0x598 */ -+ E4_uint64 UnitsCntx; /* 0x5a0 */ -+ E4_uint64 TProcCntx; /* Read only. Writes access VProcCacheWritePort */ /* 0x5a8 */ -+ E4_uint64 FaultAddrReg; /* 0x5b0 */ -+ E4_uint64 FaultTypeAndContextReg; /* 0x5b8 */ -+ -+ E4_uint32 SysControlReg; /* 0x5c0 */ -+ E4_uint32 CacheTagValue; /* 0x5c4 */ -+ E4_uint64 TlbLineValue; /* 0x5c8 */ -+ E4_uint64 SDRamConfigReg; /* 0x5d0 */ -+ E4_uint32 InterruptMask; /* 0x5d8 */ -+ E4_uint32 InterruptReg; /* 0x5dc */ -+ E4_uint64 SDRamECCStatus; /* 0x5e0 */ -+ E4_uint32 LinkControlReg; /* 0x5e8 */ -+ E4_uint32 LinkContSettings; /* 0x5ec */ -+ E4_uint64 LinkPortKey; /* 0x5f0 */ -+ E4_uint64 LinkPortLock; /* 0x5f8 */ -+ -+ E4_uint64 SDRamWriteBuffer[4][8]; /* 0x600 */ -+ E4_uint64 SDRamReadBuffer[4][8]; /* 0x700 */ -+ -+ E4_uint64 TProcRegs[64]; /* 0x800 */ -+ E4_uint64 TProcStartUp[8]; /* Not to be used except by the elan itself */ /* 0xa00 */ -+ -+ E4_uint64 LoadPending; /* 0xa40 */ -+ E4_uint64 StortPending; /* 0xa48 */ -+ E4_uint64 DirtyBits; /* 0xa50 */ -+ E4_uint64 BadBits; /* 0xa58 */ -+ -+ E4_uint64 ICachePort_Cntl_Addr; /* 0xa60 */ -+ E4_uint64 Thread_Trap_State; /* 0xa68 */ -+ -+/* Instruction buffer (4 * 32 bit words) */ -+ E4_uint64 nPC_W; /* 0xa70 */ -+ E4_uint64 PC_W; /* 0xa78 */ -+ -+ E4_uint64 ICacheFillData[8]; /* 0xa80 */ -+ E4_uint64 ICachePort[8]; /* 0xac0 */ -+ -+ E4_uint64 PciDataBufs[4][8]; /* 0xb00 */ -+ -+ E4_uint64 CommandQueueBuffer[128]; /* 0xc00 */ -+} E4_DataBusMap; -+ -+#define LINK_PORT_LOCK_VALUE 0x123456789abcdef0ULL -+ -+/* -+ * These macros are used to setup the thread pcoessors ICache. -+ */ -+#define E4_ICacheTagAddrShift 6 -+#define E4_AccessICacheRams 1 -+#define E4_InvalidTagValue 0xffffffffffffffffULL -+#define E4_ICacheSizeInBytes (1024*16) -+#define E4_ICacheLineSizeInBytes (64) -+#define E4_ICacheLines (E4_ICacheSizeInBytes/E4_ICacheLineSizeInBytes) -+#define E4_ICachePortSize ( (sizeof((E4_DataBusMap *) 0)->ICachePort) / \ -+ (sizeof((E4_DataBusMap *) 0)->ICachePort[0])) -+ -+#define E4_ICacheFixupInsn 0xc0b02f95ull /* st1 [%r0 + 0xf95] */ -+#define E4_ICacheFixupAddr 0xf95ull -+#define E4_ICacheFixupOffset 0xfc0 -+ -+/* -+ * Event interrupt -+ */ -+typedef volatile union _E4_EventInt -+{ -+ E4_uint64 ForceAlign; -+ struct { -+ E4_uint32 IntCookie; -+ E4_uint32 EventContext; /* Bits 16 to 28 */ -+ } s; -+} E4_EventInt; -+ -+/* -+ * The following are used to interpret a fault status register. -+ */ -+ -+/* -+ * FSR[14:0] - AccessType -+ * -+ * T = Type bit -+ * S = size bit. Size is in units of 64 bits or 8 bytes. -+ * E = Byte end pointer. Used to define the last written byte of the last 64 bits written. -+ * D = Data type bit. Used for endian conversion in the PCI interface. -+ * C = Used by the cache to decide if this access should allocate a cache line. -+ * d = Set if dma read or write data data. This is used to guarantee order at the PCI interface. -+ * A = Access type used to check permissions by the MMU in a virtual access. -+ * P = Part Write. If set some byte enables may be used. Effects the action of a cache miss. -+ */ -+ -+/* FSR[7:0] */ -+/* bit 7 => virtual write */ -+#define AT_VirtualWriteAccBit (1 << 7) /* AAADDdC1EEESSSS = Virtual Write */ -+#define AT_VirtualWriteSizeMask 0xf /* size of write access (0 => 128 bytes) */ -+#define AT_VirtualWriteEndPtrShift 4 /* end byte pointer for part write block */ -+#define AT_VirtualWriteEndPtrMask 0x7 -+ -+/* else bit 6 => virtual read */ -+#define AT_VirtualReadAccBit (1 << 6) /* AAADDdC01SSSSSS = Virtual Read */ -+#define AT_VirtualReadSizeMask 0x3f /* size of read access (0 => 512 bytes) */ -+ -+/* else => special access */ -+#define AT_SelBitsMask 0xf /* Bits to select the type of acces from */ -+#define AT_SelBitsShift 0x4 -+#define AT_SpecialRd (0x0 << 4) /* AAADDdC0000TTTT = Special read Access */ -+#define AT_SpecialWr (0x1 << 4) /* AAADDdC0001TTTT = Special write Access */ -+#define AT_PhysicalRd (0x2 << 4) /* AAADDdC00100SSS = Physical Read */ -+#define AT_PhysicalWr (0x3 << 4) /* AAADDdC0011PSSS = Physical write */ -+ -+#define AT_OtherSizeMask 0xf /* Size bits used by all other accesses. 0=128 bytes */ -+#define AT_SpecialBitsMask 0xf /* Bits used to define the special access types */ -+#define AT_CacheSizeBitsMask 0x7 /* Size bits used for local accesses. 0=64 */ -+#define AT_CachePhysPartWriteBit 0x8 /* This bit is set if the access is a part write to the cache */ -+ -+/* Special memory access operations */ -+#define AT_RegAccess 0x0 -+#define AT_GetCntxFilter 0xe /* Only used by special reads */ -+#define AT_RouteFetch 0xf /* Only used by special reads */ -+ -+/* FSR[9:8] */ -+#define AT_NonAlloc (1 << 8) /* 1=Do not fill cache with this data */ -+#define AT_DmaData (1 << 9) /* This is a DMA read access. Required to guarantee dma read order. */ -+ -+/* FSR[11:10] - Data Type - defines data type for endian conversion in PCI interface*/ -+#define AT_BlkDataTyMask 0x3 -+#define AT_BlkDataTyShift 10 -+ -+#define AT_BlkDataType(FSR) (((FSR) >> AT_BlkDataTyShift) & AT_BlkDataTyMask) -+#define AT_TypeByte 0x0 -+#define AT_TypeHWord 0x1 -+#define AT_TypeWord 0x2 -+#define AT_TypeDWord 0x3 -+ -+/* FSR[14:12] - Access Permissions */ -+#define AT_PermBitsMask 0x7 -+#define AT_PermBitsShift 12 -+ -+#define AT_Perm(FSR) (((FSR) >> AT_PermBitsShift) & AT_PermBitsMask) -+#define AT_PermLocalDataRead 0x0 -+#define AT_PermLocalDataWrite 0x1 -+#define AT_PermRemoteRead 0x2 -+#define AT_PermRemoteWrite 0x3 -+#define AT_PermExecute 0x4 -+#define AT_PermLocalEvent 0x5 -+#define AT_PermRemoteEvent 0x7 -+ -+/* FSR[22:15] - reason for fault */ -+ -+#define FSR_WalkForThread (1 << 15) /* The thread processor caused the fault */ -+#define FSR_Walking (1 << 16) /* The fault was caused during a hash table access */ -+#define FSR_NoTranslationsFound (1 << 17) /* The hash table did not contain a matching tag */ -+#define FSR_WalkingProtectionFault (1 << 18) /* A protection fault was detected while walking */ -+#define FSR_HashTable1 (1 << 19) /* Was accessing hash table 1 not 0 */ -+#define FSR_RouteVProcErr (1 << 20) /* This is an invalid vproc for a route fetch */ -+#define FSR_FaultForBadData (1 << 21) /* Bad data (double bit ECC error) while performing a walk access */ -+#define FSR_FaultForMaxChainCount (1 << 22) /* The Elan4 has walked a chain of 1024 items. */ -+ -+typedef volatile struct _E4_FaultSave -+{ -+ E4_uint64 FSRAndFaultContext; /* Bits 0-31 : FaultContext. Bits 32-63 : FaultStatus Register */ -+ E4_uint64 FaultAddress; -+} E4_FaultSave; -+ -+#define FaultSaveContext(FSRAndFaultContext) ((E4_uint32) ((FSRAndFaultContext) & 0xFFFFFFFF)) -+#define FaultSaveFSR(FSRAndFaultContext) ((E4_uint32) ((FSRAndFaultContext) >> 32)) -+ -+typedef union E4_TrTypeCntx -+{ -+ E4_uint32 TypeContext; -+ struct -+ { -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 Type:16; /* Transaction type field */ -+ E4_uint32 Context:13; /* Transaction context */ -+ E4_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E4_uint32 StatusRegValid:1; /* Bit 30 */ -+ E4_uint32 LastTrappedTrans:1; /* Bit 31 */ -+#else -+ E4_uint32 LastTrappedTrans:1; /* Bit 31 */ -+ E4_uint32 StatusRegValid:1; /* Bit 30 */ -+ E4_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E4_uint32 Context:13; /* Transaction context */ -+ E4_uint32 Type:16; /* Transaction type field */ -+#endif -+ } s; -+} E4_TrTypeCntx; -+ -+#define MAX_TRAPPED_TRANS 28 -+#define TRANS_DATA_DWORDS 16 -+#define TRANS_DATA_BYTES 128 -+#define NO_OF_INPUT_CHANNELS 4 -+ -+#define CH0_LOW_PRI_CHAN 0 -+#define CH1_LOW_PRI_CHAN 1 -+#define CH0_HIGH_PRI_CHAN 2 -+#define CH1_HIGH_PRI_CHAN 3 -+ -+/* Words have been swapped for big endian access when fetched with dword access from elan.*/ -+typedef struct _E4_IprocTrapHeader -+{ -+ E4_uint64 TrAddr; -+ E4_uint64 IProcStatusCntxAndTrType; -+} E4_IprocTrapHeader; -+ -+typedef struct _E4_IprocTrapData -+{ -+ E4_uint64 Data[TRANS_DATA_DWORDS]; -+} E4_IprocTrapData; -+ -+/* -+ * This struct defines the trap state for the inputers. It requires a contiguous 16K byte block of local memory. -+ * The channel bits have been grouped to the low end of the address to force all Identify cookies to use the -+ * same cache line. -+ */ -+typedef struct _E4_IprocTrapState -+{ -+ E4_IprocTrapData TrData[MAX_TRAPPED_TRANS][NO_OF_INPUT_CHANNELS]; -+ E4_IprocTrapHeader TrHeader[MAX_TRAPPED_TRANS][NO_OF_INPUT_CHANNELS]; -+ E4_uint64 pad[8*NO_OF_INPUT_CHANNELS]; -+} E4_IprocTrapState; -+ -+/* -+ * 64 kbytes of elan local memory. Must be aligned on a 64k boundary -+ */ -+#define E4_LowPriQueueSize 0x400 -+#define E4_HighPriQueueSize 0x100 -+ -+typedef struct _E4_FaultSaveArea -+{ -+ E4_FaultSave TProcData[8]; -+ E4_FaultSave TProcInst; -+ E4_FaultSave Dummy[7]; -+ E4_FaultSave SchedProc; -+ E4_FaultSave DProc; -+ E4_FaultSave EventProc; -+ E4_FaultSave IProc; -+ E4_FaultSave DProcData[4]; -+ E4_FaultSave QReadData[8]; -+} E4_FaultSaveArea; -+ -+/* Macros to manipulate event queue pointers */ -+/* generate index in EventIntQueue */ -+#define E4_EVENT_INTQ_INDEX(fptr) (((fptr) & 0x1fff) >> 3) -+/* generate next fptr */ -+#define E4_EVENT_INTQ_NEXT(fptr) ((((fptr) + 8) & ~0x4000) | 0x2000) -+ -+typedef struct _E4_CommandPort -+{ -+ volatile E4_uint64 Command[1024]; /* a whole 8k page */ -+} E4_CommandPort; -+ -+/* -+ * This is the allocation of unit numbers within the ELAN. It is used to extract the fault address -+ * and fault type after a unit has trapped on a memory fetch. Only units that can generate traps -+ * have been included. -+ */ -+#define CUN_TProcData0 0x00 -+#define CUN_TProcData1 0x01 -+#define CUN_TProcData2 0x02 -+#define CUN_TProcData3 0x03 -+#define CUN_TProcData4 0x04 -+#define CUN_TProcData5 0x05 -+#define CUN_TProcData6 0x06 -+#define CUN_TProcData7 0x07 -+#define CUN_TProcInst 0x08 -+ -+/* memory current unit numbers -+ * TProc data bus */ -+#define CUN_DProcPA0 0x10 -+#define CUN_DProcPA1 0x11 -+#define CUN_DProcPrefetch 0x12 -+#define CUN_CommandProc 0x13 -+#define CUN_DProcData0 0x14 /* Dma prefetch reads. */ -+#define CUN_DProcData1 0x15 /* Dma prefetch reads. */ -+#define CUN_DProcData2 0x16 /* Dma prefetch reads. */ -+#define CUN_DProcData3 0x17 /* Dma prefetch reads. */ -+ -+#define CUN_IProcLowPri 0x18 -+#define CUN_IProcHighPri 0x19 -+#define CUN_Spare0 0x1A -+#define CUN_Spare1 0x1B -+#define CUN_Spare2 0x1C -+#define CUN_ThreadQueue 0x1D -+#define CUN_EventProc0 0x1e -+#define CUN_EventProc1 0x1f -+ -+#define CUN_Entries 0x20 -+ -+typedef struct E4_Registers -+{ -+ E4_CacheTags Tags; /* 4k bytes c000 -> cfff */ -+ E4_DataBusMap Regs; /* 4k bytes d000 -> dfff */ -+ E4_User_Regs uRegs; /* 8k bytes e000 -> ffff */ -+} E4_Registers; -+ -+#define I2cCntl_I2cPortWrite (0 << 0) -+#define I2cCntl_I2cPortRead (1 << 0) -+#define I2cCntl_I2cPortGenStopBit (1 << 1) -+#define I2cCntl_I2cPortGenRestartBit (1 << 2) -+#define I2cCntl_I2cPortAccFailed (1 << 3) -+#define I2cCntl_I2cStopped (1 << 4) -+#define I2cCntl_I2cWakeupFailed (1 << 5) -+#define I2cCntl_I2cFastMode (1 << 6) -+#define I2cCntl_I2cPortBusy (1 << 7) -+ -+#define I2cCntl_LedI2cRegBase_Mask 0x7f -+#define I2cCntl_I2cUpdatingLedReg (1 << 7) -+ -+#define I2cCntl_InvertLedValues (1 << 0) /* read/write */ -+#define I2cCntl_LedRegWriteFailed (1 << 1) /* read only */ -+#define I2cCntl_EEPromLoadFailed (1 << 2) /* read only */ -+#define I2cCntl_InhibitI2CRom (1 << 3) /* read only */ -+#define I2cCntl_BadRomCrc (1 << 4) /* read only */ -+#define I2cCntl_MapInI2cConfigData (1 << 5) /* read/write */ -+#define I2cCntl_SampleNewLedValues (1 << 6) /* read/write */ -+#define I2cCntl_ClearLinkError (1 << 7) /* write only */ -+ -+typedef struct E4_I2C -+{ -+ volatile E4_uint8 I2cWrData; -+ volatile E4_uint8 I2cRdData; -+ volatile E4_uint8 I2cPortControl; -+ volatile E4_uint8 I2cLedBase; -+ volatile E4_uint8 I2cStatus; -+ volatile E4_uint8 I2cLedsValue; -+ volatile E4_uint16 I2cPad; -+ -+ E4_uint8 pad[256 - sizeof(E4_uint64)]; -+ -+ E4_uint8 UnchangedElan4ConfigRegs[256]; -+ E4_uint8 I2cRomConfigShadowValues[256]; -+ E4_uint8 ChangedElan4ConfigRegs[256]; -+} E4_I2C; -+ -+typedef struct _E4_ContextControlBlock -+{ -+ E4_uint32 Filter; /* Use a Network context to index for this value */ -+ E4_uint32 VirtualProcessTable; /* Use a local context to index for this value */ -+} E4_ContextControlBlock; -+ -+/* -+ * Filter -+ * [13:0] Context -+ * [14] DiscardAll -+ * [15] AckAll -+ * [16] HighPri -+ * [17] CountStats -+ * [31:18] Unused -+ */ -+#define E4_FILTER_STATS (1 << 17) -+#define E4_FILTER_HIGH_PRI (1 << 16) -+#define E4_FILTER_ACKOK_ALL (1 << 15) -+#define E4_FILTER_DISCARD_ALL (1 << 14) -+#define E4_FILTER_CONTEXT_MASK (0x3FFF) -+ -+/* -+ * VirtualProcessTable -+ * [8:0] Unused -+ * [12:9] Size num vp entries = 512 << Size -+ * [30:13] Pointer -+ * [31] Valid -+ */ -+#define E4_VPT_MIN_ENTRIES 512 -+#define E4_VPT_VALID ((unsigned)1 << 31) -+#define E4_VPT_PTR_SHIFT 0 -+#define E4_VPT_SIZE_SHIFT 9 -+#define E4_VPT_SIZE_MASK 0xf -+#define E4_VPT_NUM_VP(vpt_val) (E4_VPT_MIN_ENTRIES << (((vpt_val) >> E4_VPT_SIZE_SHIFT) & E4_VPT_SIZE_MASK)) -+#define E4_VPT_VALUE(ptr,size) (((ptr) << E4_VPT_PTR_SHIFT) | ((size) << E4_VPT_SIZE_SHIFT)) -+ -+ -+/* Virtual Process Table */ -+typedef struct _E4_VirtualProcessEntry -+{ -+ E4_uint64 Values[2]; -+} E4_VirtualProcessEntry; -+ -+/* -+ * Entries have the following format - rtX is a packed route -+ * -+ * |rt11|rt10|rt9 |rt8 |rt7 |rt6 |rt5 |rt4 |rt3 |rt2 |rt2 |rt0 |PAAADD RRRRRR| -+ * |output context |rt23|rt22|rt21|rt20|rt19|rt18|rt17|rt16|rt15|rt14|rt13|rt12| -+ */ -+ -+#define ROUTE_CTXT_SHIFT 48 -+#define ROUTE_CTXT_MASK (~((1ull << ROUTE_CTXT_SHIFT)-1)) -+#define ROUTE_CTXT_VALUE(ctx) (((E4_uint64) ctx) << ROUTE_CTXT_SHIFT) -+ -+#define ROUTE_PACKED_OFFSET 16 -+#define ROUTE_NUM_PACKED 24 -+ -+/* defines for first flit of a route */ -+#define FIRST_TIMEOUT(Val) ((Val) << 14) /* [15:14] */ -+#define FIRST_SYSTEM_PACKET (1 << 13) /* [13] */ -+#define FIRST_FLOOD_PACKET (1 << 12) /* [12] */ -+#define FIRST_HIGH_PRI (1 << 11) /* [11] */ -+#define FIRST_AGE(Val) ((Val) << 7) /* [10:7] */ -+#define FIRST_OPTIONS_MASK (0xFF80) -+ -+/* [6:0] unpacked 1st route value */ -+#define FIRST_INVALID (0) -+#define FIRST_ROUTE(Val) (0x08 | (Val)) -+#define FIRST_ADAPTIVE (0x30) -+#define FIRST_BCAST_TREE (0x20) -+#define FIRST_MYLINK (0x10) -+#define FIRST_BCAST(Top, Bot) (0x40 | ((Top) << 3) | (Bot)) -+ -+/* defines for 3 bit packed entries for subsequent flits */ -+#define PACKED_INVALID (0) -+#define PACKED_ROUTE(Val) (8 | (Val)) -+#define PACKED_ADAPTIVE (3) -+#define PACKED_BCAST_TREE (2) -+#define PACKED_MYLINK (1) -+#define PACKED_BCAST0(Top,Bot) (4 | (Bot & 3)) -+#define PACKED_BCAST1(Top,Bot) ((Top << 1) | (Bot >> 2)) -+ -+#endif /* _ASM */ -+/* The MMU root context pointer has a mask to bounds check -+ * it - this is computed as follows. -+ */ -+#define E4_CONTEXT_MASK(num) (((num) >= 0x2000) ? 0x00 : \ -+ ((num) >= 0x1000) ? 0x80 : \ -+ ((num) >= 0x0800) ? 0xc0 : \ -+ ((num) >= 0x0400) ? 0xe0 : \ -+ ((num) >= 0x0200) ? 0xf0 : \ -+ ((num) >= 0x0100) ? 0xf8 : \ -+ ((num) >= 0x0080) ? 0xfc : \ -+ ((num) >= 0x0040) ? 0xfe : 0xff) -+/* -+ * This generates the size field for a virtual process table. -+ * Size defined as 2^n no of 8K pages. -+ * Single cycle route fetches are possible if the minimum vproc table size is 8k. -+ */ -+#define E4_GEN_VPT_SIZE(Size) (((Size) & E4_VPT_SIZE_MASK) << E4_VPT_SIZE_SHIFT) -+ -+#define COMMAND_RUN_QUEUE_BITS (13 + 2) /* 8K entries of 4 bytes. This is fixed in hardware. */ -+#define COMMAND_DESCS_SPACE_BITS (13 + 5) /* 8K entries of 32 bytes. This is fixed in hardware. */ -+#define COMMAND_INSERTER_CACHE_ENTRIES 16 -+ -+#define COM_TEST_PORT_ADDR_MASK 0xfULL -+#define COM_TEST_PORT_ADDR_SH 0 -+ -+/* -+ * The flush register is accessed through the CommandControl register. -+ * The address is naturally alligned. It also positions the command descriptors in memory. -+ * When no command queues need flushing it should be or with COM_FLUSH_INVALID. This sets -+ * it to the top command queue descriptor. This cannot be accessed from the PCI. -+ */ -+#define COM_ENABLE_DEQUEUE (1 << 4) -+#define COM_FLUSH_DESCRIPTOR_MASK 0x7fffffe0ULL -+#define COM_FLUSH_INVALID 0x0003ffe0ULL -+ -+ -+/* -+ * Elan4 BAR1 is split up as follows : -+ * -+ * RevA -+ * 0x3f00000 EBUS other -+ * 0x3e00000 EBUS ROM -+ * 0x3dfc000 registers -+ * 0x0000000 command ports -+ * -+ * RevB -+ * 0x3ffc000 registers -+ * 0x3ff8000 padding -+ * 0x3ff6000 i2c registers -+ * 0x0000000 command ports -+ */ -+#define ELAN4_BAR1_SIZE (1 << 26) /* 64M */ -+#define ELAN4_REG_SIZE (1 << 14) /* 16K */ -+ -+#define ELAN4_REVA_EBUS_SIZE (1 << 21) /* 2M */ -+#define ELAN4_REVA_EBUS_OFFSET (ELAN4_BAR1_SIZE - ELAN4_REVA_EBUS_SIZE) -+#define ELAN4_REVA_REG_OFFSET (ELAN4_REVA_EBUS_OFFSET - ELAN4_REG_SIZE) -+#define ELAN4_REVA_NUM_COMMAND_QUEUES (ELAN4_REVA_REG_OFFSET >> 13) -+ -+#define ELAN4_REVA_EBUS_ROM_SIZE (1 << 20) /* 1M */ -+#define ELAN4_REVA_EBUS_ROM_OFFSET 0 -+ -+#define ELAN4_REVB_I2C_PADDING (1 << 14) /* 16K */ -+#define ELAN4_REVB_I2C_SIZE (1 << 13) /* 8k */ -+#define ELAN4_REVB_REG_OFFSET (ELAN4_BAR1_SIZE - ELAN4_REG_SIZE) -+#define ELAN4_REVB_I2C_OFFSET (ELAN4_REVB_REG_OFFSET - ELAN4_REVB_I2C_PADDING - ELAN4_REVB_I2C_SIZE) -+#define ELAN4_REVB_NUM_COMMAND_QUEUES (ELAN4_REVB_I2C_OFFSET >> 13) -+ -+#endif /* notdef _ELAN4_REGISTERS_H */ -Index: linux-2.4.21/include/elan4/sdram.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/sdram.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/sdram.h 2005-06-01 23:12:54.743417216 -0400 -@@ -0,0 +1,41 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_SDRAM_H -+#define __ELAN4_SDRAM_H -+ -+#ident "$Id: sdram.h,v 1.8 2003/09/24 13:55:55 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/sdram.h,v $*/ -+ -+/* Include header file generated by sdram configuration program */ -+#include -+ -+/* SDRAM bank shift definitions */ -+#define SDRAM_0_CS_SHIFT 25 -+#define SDRAM_1_CS_SHIFT 27 -+#define SDRAM_2_CS_SHIFT 28 -+#define SDRAM_3_CS_SHIFT 29 -+ -+#define SDRAM_BANK_SHIFT(cfg) \ -+ (((cfg >> SDRAM_RamSize_SH) & 3) == 0 ? SDRAM_0_CS_SHIFT : \ -+ ((cfg >> SDRAM_RamSize_SH) & 3) == 1 ? SDRAM_1_CS_SHIFT : \ -+ ((cfg >> SDRAM_RamSize_SH) & 3) == 2 ? SDRAM_2_CS_SHIFT : SDRAM_3_CS_SHIFT) -+ -+#define SDRAM_BANK_SIZE(cfg) (1ULL << SDRAM_BANK_SHIFT(cfg)) -+#define SDRAM_BANK_OFFSET(cfg,bank) ((unsigned long long)(bank) << SDRAM_BANK_SHIFT(cfg)) -+#define SDRAM_NUM_BANKS(cfg) (4) -+#define SDRAM_MAX_BANKS 4 -+ -+/* When the elan access sdram it passes eaddr[12] as sdramaddr[12] when -+ * running with a 4k page size, however PCI accesses pass paddr[12], so -+ * we must ensure that sdram pages are allocated such that eaddr[12] is the -+ * same as paddr[12] - the easiest way is to allocate sdram in 8k chunks and -+ * ensure that maddr[12] == eaddr[12] == pgoff[0] */ -+#define SDRAM_MIN_PAGE_SIZE (8192) -+ -+#endif /* __ELAN4_SDRAM_H */ -Index: linux-2.4.21/include/elan4/stats.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/stats.h 2005-06-01 23:12:54.743417216 -0400 -@@ -0,0 +1,83 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.h,v 1.10.12.1 2004/10/06 11:09:12 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/stats.h,v $*/ -+ -+#ifndef __ELAN4_STATS_H -+#define __ELAN4_STATS_H -+ -+#define ELAN4_DEV_STATS_BUCKETS 8 -+ -+ -+typedef struct elan4_dev_stats -+{ -+ unsigned long s_interrupts; -+ -+ unsigned long s_mainints[ELAN4_DEV_STATS_BUCKETS]; -+ unsigned long s_mainint_punts; -+ unsigned long s_mainint_rescheds; -+ -+ unsigned long s_haltints; -+ -+ unsigned long s_cproc_traps; -+ unsigned long s_dproc_traps; -+ unsigned long s_eproc_traps; -+ unsigned long s_iproc_traps; -+ unsigned long s_tproc_traps; -+ -+ unsigned long s_cproc_trap_types[0x10]; -+ unsigned long s_dproc_trap_types[6]; -+ unsigned long s_eproc_trap_types[4]; -+ unsigned long s_iproc_trap_types[0xa]; -+ unsigned long s_tproc_trap_types[7]; -+ -+ unsigned long s_correctable_errors; -+ unsigned long s_multiple_errors; -+ -+ unsigned long s_link_errors; -+ unsigned long s_lock_errors; -+ unsigned long s_deskew_errors; -+ unsigned long s_phase_errors; -+ unsigned long s_data_errors; -+ unsigned long s_fifo_overflow0; -+ unsigned long s_fifo_overflow1; -+ unsigned long s_mod45changed; -+ unsigned long s_pack_not_seen; -+ unsigned long s_linkport_keyfail; -+ -+ unsigned long s_eop_reset; -+ unsigned long s_bad_length; -+ unsigned long s_crc_bad; -+ unsigned long s_crc_error; -+ -+ unsigned long s_cproc_timeout; -+ unsigned long s_dproc_timeout; -+ -+ unsigned long s_sdram_bytes_free; -+} ELAN4_DEV_STATS; -+ -+#define MainIntBuckets ((int[ELAN4_DEV_STATS_BUCKETS-1]) {1, 2, 3, 4, 8, 16, 32}) -+ -+#define BumpDevStat(dev,stat) ((dev)->dev_stats.stat++) -+#define BucketDevStat(dev,stat,n,bucket) ((n) <= (bucket)[0] ? (dev)->dev_stats.stat[0]++ : \ -+ (n) <= (bucket)[1] ? (dev)->dev_stats.stat[1]++ : \ -+ (n) <= (bucket)[2] ? (dev)->dev_stats.stat[2]++ : \ -+ (n) <= (bucket)[3] ? (dev)->dev_stats.stat[3]++ : \ -+ (n) <= (bucket)[4] ? (dev)->dev_stats.stat[4]++ : \ -+ (n) <= (bucket)[5] ? (dev)->dev_stats.stat[5]++ : \ -+ (n) <= (bucket)[6] ? (dev)->dev_stats.stat[6]++ : \ -+ (dev)->dev_stats.stat[7]++) -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /*__ELAN4_STATS_H */ -Index: linux-2.4.21/include/elan4/tprintf.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/tprintf.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/tprintf.h 2005-06-01 23:12:54.743417216 -0400 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_TPRINTF_H -+#define __ELAN4_TPRINTF_H -+ -+#ident "$Id: tprintf.h,v 1.6 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/tprintf.h,v $*/ -+ -+ -+#ifdef _ASM -+#define TPRINTF0(string) add %r0, __LINE__, %r0 -+#define TPRINTF1(string,reg) add reg, __LINE__, %r0 -+#else -+#define TPRINTF0(string) asm volatile ("add %%r0, %0, %%r0" : : "i" (__LINE__)) -+#define TPRINTF1(string, value) asm volatile ("add %0, %1, %%r0" : : "r" (value), "i" (__LINE__)) -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_TPRINTF_H */ -Index: linux-2.4.21/include/elan4/trap.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/trap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/trap.h 2005-06-01 23:12:54.743417216 -0400 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: trap.h,v 1.10 2003/10/07 12:11:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/trap.h,v $*/ -+ -+#ifndef __ELAN4_TRAP_H -+#define __ELAN4_TRAP_H -+ -+/* -+ * If the EProc Faults whilst performing an action (e.g. Read/Write on the data src or dest Addr) -+ * the Eproc increments the Addr(s) by a block size (64 bytes): -+ * 1: Fault on Read: -+ * Src EventAddr = Read Addr + block -+ * 2: Fault on Write: -+ * Src EventAddr = Read Addr + block -+ * Dst EventAddr = Read Addr + block -+ * Size = Size - block ndwords -+ * We must rewind the addr correctly to completely the transfer successfully -+ */ -+#define EVENT_COPY_NDWORDS 0x8 -+#define EVENT_COPY_BLOCK_SIZE 0x40 -+ -+typedef struct elan4_eproc_trap -+{ -+ E4_uint64 tr_status; -+ E4_FaultSave tr_faultarea; -+ E4_Event tr_event; -+ E4_Addr tr_eventaddr; -+} ELAN4_EPROC_TRAP; -+ -+typedef struct elan4_cproc_trap -+{ -+ E4_uint64 tr_status; /* cproc status register */ -+ E4_uint64 tr_command; /* cproc command */ -+ E4_CommandQueueDesc tr_qdesc; /* copy of command queue descriptor */ -+ E4_FaultSave tr_faultarea; /* fault area for mmu traps */ -+ ELAN4_EPROC_TRAP tr_eventtrap; /* associated event trap (waitevent) */ -+} ELAN4_CPROC_TRAP; -+ -+typedef struct elan4_dproc_trap -+{ -+ E4_DMA tr_desc; -+ E4_FaultSave tr_packAssemFault; -+ E4_FaultSave tr_prefetchFault; -+ E4_uint64 tr_status; -+} ELAN4_DPROC_TRAP; -+ -+typedef struct elan4_tproc_trap -+{ -+ E4_uint64 tr_regs[64]; -+ E4_FaultSave tr_dataFault; -+ E4_FaultSave tr_instFault; -+ E4_uint64 tr_status; -+ E4_uint64 tr_state; -+ E4_Addr tr_pc; -+ E4_Addr tr_npc; -+ E4_uint64 tr_dirty; -+ E4_uint64 tr_bad; -+} ELAN4_TPROC_TRAP; -+ -+typedef struct elan4_iproc_trap -+{ -+ E4_uint32 tr_numTransactions; -+ E4_uint32 tr_flags; -+ E4_uint32 tr_trappedTrans; -+ E4_uint32 tr_waitForEopTrans; -+ E4_uint32 tr_identifyTrans; -+ E4_uint32 tr_pad; -+ -+ E4_FaultSave tr_faultarea; -+ E4_IprocTrapHeader tr_transactions[MAX_TRAPPED_TRANS]; -+ E4_IprocTrapData tr_dataBuffers[MAX_TRAPPED_TRANS]; -+} ELAN4_IPROC_TRAP; -+ -+#define TR_FLAG_ACK_SENT (1 << 0) -+#define TR_FLAG_EOP_ERROR (1 << 1) -+#define TR_FLAG_BAD_TRANS (1 << 2) -+#define TR_FLAG_DMA_PACKET (1 << 3) -+#define TR_FLAG_EOP_BAD (1 << 4) -+#define TR_FLAG_TOOMANY_TRANS (1 << 5) -+ -+#define TR_TRANS_INVALID (0xffffffff) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_TRAP_H */ -Index: linux-2.4.21/include/elan4/trtype.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/trtype.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/trtype.h 2005-06-01 23:12:54.744417064 -0400 -@@ -0,0 +1,112 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_TRTYPE_H -+#define _ELAN4_TRTYPE_H -+ -+#ident "$Id: trtype.h,v 1.20 2004/02/06 10:38:21 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/trtype.h,v $*/ -+ -+/*<15:11> Size field is used to give the number of additional 64 bit data values. -+ A value from 0 to 16 inclusive is valid. */ -+ -+#include -+ -+#define TR_SIZE_SHIFT (11) -+#define TR_SIZE_MASK (0x1f << TR_SIZE_SHIFT) -+#define SET_TR_SIZE(Size) (((Size) << TR_SIZE_SHIFT) & TR_SIZE_MASK) -+ -+/* <10:9> Last Transaction and AckNow bits, marks the last transaction and -+ enables a PACK_OK to be sent. */ -+#define TR_LAST_AND_SEND_ACK (3 << 9) -+ -+ -+/* <8> Only valid on the last transaction. Delays execution until an EOP_GOOD is received. -+ * Any other EOP type will abort execution of this transaction. */ -+#define TR_WAIT_FOR_EOP (1 << 8) -+ -+/* -+ * Data type. This is used by transactions of variable data type. It controls any endian -+ * converion required if the destiantion host processor has a big endian memory format. -+ */ -+/* WriteBlock <8:7> Data type -+ <6:0> Part write size */ -+#define TR_DATATYPE_SHIFT (6) -+#define TR_DATATYPE_MASK ((1 << 2) - 1) -+ -+#define TR_DATATYPE_BYTE E4_DATATYPE_BYTE -+#define TR_DATATYPE_SHORT E4_DATATYPE_SHORT -+#define TR_DATATYPE_WORD E4_DATATYPE_WORD -+#define TR_DATATYPE_DWORD E4_DATATYPE_DWORD -+ -+/* <5:0> Transaction Type -+ * For Writeblock <5:3> 000 => Write, 0001 => Read -+ * <2:0> End Byte Addr */ -+#define TR_OPCODE_MASK 0x3F -+#define TR_BLOCK_OPCODE_MASK 0x38 -+ -+#define TR_WRITEBLOCK 0x0 -+#define TR_ENDBYTE_MASK 0x7 -+#define TR_WRITE(Size, EndByte, DataType) \ -+ (0x0 | SET_TR_SIZE(Size) | ((EndByte) & TR_ENDBYTE_MASK) | \ -+ (((DataType) & TR_DATATYPE_MASK) << TR_DATATYPE_SHIFT)) -+ -+#define TR_NOP_TRANS (0x10 | SET_TR_SIZE(0)) -+#define TR_SETEVENT 0x10 -+#define TR_SETEVENT_NOIDENT (TR_SETEVENT | SET_TR_SIZE(0) | TR_LAST_AND_SEND_ACK) -+#define TR_SETEVENT_IDENTIFY (TR_SETEVENT | SET_TR_SIZE(1) | TR_LAST_AND_SEND_ACK) -+#define TR_REMOTEDMA (0x11 | SET_TR_SIZE(7) | TR_LAST_AND_SEND_ACK) -+#define TR_SENDDISCARD (0x12 | SET_TR_SIZE(0)) -+ -+/* -+ * Conditional transactions that might return PAckTestFail. -+ * All will allow further exection of the packet if ([Address] operator DataValue) is true. -+ * e.g. for TR_GTE further execution if ([Address] >= DataValue) is true. -+ * These should be used where a definite TRUE/FALSE answer is required. -+ */ -+#define TR_GTE (0x14 | SET_TR_SIZE(1)) -+#define TR_LT (0x15 | SET_TR_SIZE(1)) -+#define TR_EQ (0x16 | SET_TR_SIZE(1)) -+#define TR_NEQ (0x17 | SET_TR_SIZE(1)) -+ -+/* -+ * Conditional transactions that might return PAckDiscard. -+ * All will allow further exection of the packet if ([Address] operator DataValue) is true. -+ * e.g. for TR_GTE further execution if ([Address] >= DataValue) is true. -+ * These should be used where eventually a TRUE answer is expected but the node might not be ready yet. -+ * These can be mixed with the normal conditionals to allow a single packet to test for readyness and -+ * a TRUE/FALSE answer. -+ */ -+#define TR_GTE_DISCARD (0x34 | SET_TR_SIZE(1)) -+#define TR_LT_DISCARD (0x35 | SET_TR_SIZE(1)) -+#define TR_EQ_DISCARD (0x36 | SET_TR_SIZE(1)) -+#define TR_NEQ_DISCARD (0x37 | SET_TR_SIZE(1)) -+ -+#define TR_TRACEROUTE_TRANS 0x18 -+#define TR_TRACEROUTE(Size) (TR_TRACEROUTE_TRANS | (TR_DATATYPE_WORD << TR_DATATYPE_SHIFT) |SET_TR_SIZE(Size)) -+#define TR_IDENTIFY (0x19 | SET_TR_SIZE(0)) -+ -+#define TR_ADDWORD (0x1c | SET_TR_SIZE(2) | TR_LAST_AND_SEND_ACK) -+#define TR_INPUT_Q_COMMIT (0x1d | SET_TR_SIZE(1) | TR_LAST_AND_SEND_ACK) -+#define TR_TESTANDWRITE (0x1e | SET_TR_SIZE(3) | TR_LAST_AND_SEND_ACK) -+#define TR_INPUT_Q_GETINDEX (0x1f | SET_TR_SIZE(0)) -+ -+ -+ -+/* TraceRoute formate */ -+#define TR_TRACEROUTE0_CHANID(val) ((val) & 1) /* 0 Chan Id */ -+#define TR_TRACEROUTE0_LINKID(val) (((val) >> 1) & 7) /* 1:3 Link Id */ -+#define TR_TRACEROUTE0_REVID(val) (((val) >> 4) & 7) /* 4:6 Revision Id */ -+#define TR_TRACEROUTE0_BCAST_PIN(val) (((val) >> 7) & 1) /* 7 Bcast Top Pin */ -+#define TR_TRACEROUTE0_LNR(val) (((val) >> 8) & 0xFF) /* 8:15 Global Link Not Ready */ -+ -+#define TR_TRACEROUTE1_ROUTES_SELECTED(val) ((val & 0xFF)) /* 0:7 Routes Selected */ -+#define TR_TRACEROUTE1_BCAST_TOP(val) (((val) >> 8) & 7) /* 8:10 Broadcast Top */ -+#define TR_TRACEROUTE1_BCAST_BOTTOM(val) (((val) >> 12) & 7) /* 12:14 Broadcast Bottom */ -+ -+#endif /* _ELAN4_TRANSACTIONTYPE_H */ -Index: linux-2.4.21/include/elan4/types.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/types.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/types.h 2005-06-01 23:12:54.744417064 -0400 -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_TYPES_H -+#define __ELAN4_TYPES_H -+ -+#ident "@(#)$Id: types.h,v 1.9 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/types.h,v $*/ -+ -+#include -+/* -+ * "flip" values for correctly indexing into -+ * block data which was copied from the Elan -+ * using 64 bit accesses. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+# define ByteEndianFlip 0 -+# define ShortEndianFlip 0 -+# define WordEndianFlip 0 -+#else -+# define ByteEndianFlip 7 -+# define ShortEndianFlip 3 -+# define WordEndianFlip 1 -+#endif -+ -+ -+#ifndef _ASM -+ -+typedef signed int E4_int; -+typedef unsigned int E4_uint; -+ -+typedef signed char E4_int8; -+typedef unsigned char E4_uint8; -+ -+typedef signed short E4_int16; -+typedef unsigned short E4_uint16; -+ -+typedef signed int E4_int32; -+typedef unsigned int E4_uint32; -+ -+#ifdef _LP64 -+typedef signed long E4_int64; -+typedef unsigned long E4_uint64; -+#else -+typedef signed long long E4_int64; -+typedef unsigned long long E4_uint64; -+#endif -+ -+/* 64-bit Elan4 */ -+typedef E4_uint64 E4_Addr; -+typedef E4_uint32 E4_LocPhysAddr; /* Really 31 bits */ -+ -+#define OneK (1024) -+#define EightK (8*OneK) -+ -+#define E4_DATATYPE_BYTE 0 -+#define E4_DATATYPE_SHORT 1 -+#define E4_DATATYPE_WORD 2 -+#define E4_DATATYPE_DWORD 3 -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_TYPES_H */ -+ -Index: linux-2.4.21/include/elan4/user.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/user.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/user.h 2005-06-01 23:12:54.745416912 -0400 -@@ -0,0 +1,344 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user.h,v 1.37.2.2 2004/11/18 17:54:17 duncant Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user.h,v $*/ -+ -+#ifndef __ELAN4_USER_H -+#define __ELAN4_USER_H -+ -+#include -+#include -+#include -+ -+typedef struct trap_queue -+{ -+ unsigned q_back; /* Next free space */ -+ unsigned q_front; /* First object to remove */ -+ unsigned q_size; /* Size of queue */ -+ unsigned q_count; /* Current number of entries */ -+ unsigned q_slop; /* FULL <=> (count+slop) == size */ -+} RING_QUEUE; -+ -+#define RING_QUEUE_INIT(q,num,slop) ((q).q_size = (num), (q).q_slop = (slop), (q).q_front = (q).q_back = 0, (q).q_count = 0) -+#define RING_QUEUE_FULL(q) ((q).q_count >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_REALLY_FULL(q) ((q).q_count == (q).q_size) -+#define RING_QUEUE_EMPTY(q) ((q).q_count == 0) -+#define RING_QUEUE_NEXT(q,indx) ((indx) = (((indx)+1) % (q).q_size)) -+#define RING_QUEUE_PREV(q,indx) ((indx) = (((indx)+(q).q_size-1) % (q).q_size)) -+#define RING_QUEUE_ADD(q) (RING_QUEUE_NEXT(q ,(q).q_back), (++(q).q_count) >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_REMOVE(q) (RING_QUEUE_NEXT(q, (q).q_front), (--(q).q_count) == 0) -+#define RING_QUEUE_ADD_FRONT(q) (RING_QUEUE_PREV(q, (q).q_front), (++(q).q_count) >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_ENTRY(qArea,indx) (&(qArea)[(indx)]) -+#define RING_QUEUE_FRONT(q,qArea) RING_QUEUE_ENTRY(qArea, (q).q_front) -+#define RING_QUEUE_BACK(q,qArea) RING_QUEUE_ENTRY(qArea, (q).q_back) -+#define RING_QUEUE_ITERATE(q,idx) for (idx = (q).q_front; idx != (q).q_back; idx = (((idx) + 1) % (q).q_size)) -+ -+typedef struct user_rgn -+{ -+ struct user_rgn *rgn_mnext; /* Doubly linked list of regions */ -+ struct user_rgn *rgn_mprev; /* sorted on main address */ -+ virtaddr_t rgn_mbase; /* main address of base of region */ -+ -+ struct user_rgn *rgn_enext; /* Doubly linked list of regions */ -+ struct user_rgn *rgn_eprev; /* sorted on elan address */ -+ E4_Addr rgn_ebase; /* elan address of base of region */ -+ -+ unsigned long rgn_len; /* length of region */ -+ unsigned rgn_perm; /* elan access permission */ -+} USER_RGN; -+ -+typedef struct user_vpseg -+{ -+ struct list_head vps_link; -+ -+ unsigned short vps_process; /* virtual process number */ -+ unsigned short vps_entries; /* and # virtual processes */ -+ -+ unsigned vps_type; -+ union -+ { -+ struct { -+ ELAN_CAPABILITY *cap; -+ E4_VirtualProcessEntry *routes; -+ } p2p; -+#define vps_p2p_cap vps_u.p2p.cap -+#define vps_p2p_routes vps_u.p2p.routes -+ -+ struct { -+ unsigned short lowvp; -+ unsigned short highvp; -+ } bcast; -+#define vps_bcast_lowvp vps_u.bcast.lowvp -+#define vps_bcast_highvp vps_u.bcast.highvp -+ } vps_u; -+} USER_VPSEG; -+ -+/* values for vps_type */ -+#define USER_VPSEG_P2P 0 -+#define USER_VPSEG_BCAST 1 -+ -+typedef struct user_cq -+{ -+ struct list_head ucq_link; -+ -+ ELAN4_CQ *ucq_cq; /* the real command queue */ -+ -+ unsigned char ucq_state; /* command queue state */ -+ unsigned char ucq_errored; /* command queue has errored */ -+ unsigned char ucq_flags; /* flags */ -+ ELAN4_CPROC_TRAP ucq_trap; /* trap state */ -+ -+ atomic_t ucq_ref; /* # references to this cq (mmaps) */ -+} USER_CQ; -+ -+/* values for ucq_state */ -+#define UCQ_RUNNING 0 /* command queue is running */ -+#define UCQ_TRAPPED 1 /* command queue has trapped */ -+#define UCQ_NEEDS_RESTART 2 /* command queue has trapped, and needs restarting */ -+#define UCQ_STOPPED 3 /* command queue has trapped, and delivered to user */ -+ -+/* values for ucq_flags */ -+#define UCQ_SYSTEM (1 << 0) -+#define UCQ_REORDER (1 << 1) -+ -+extern int num_fault_save; -+extern int min_fault_pages; -+extern int max_fault_pages; -+ -+typedef struct fault_save -+{ -+ struct fault_save *next; -+ E4_Addr addr; -+ E4_uint32 count; -+} FAULT_SAVE; -+ -+typedef struct user_iproc_trap -+{ -+ unsigned char ut_state; -+ ELAN4_IPROC_TRAP ut_trap; -+} USER_IPROC_TRAP; -+ -+/* values for ut_state */ -+#define UTS_IPROC_RUNNING 0 -+#define UTS_IPROC_TRAPPED 1 -+#define UTS_IPROC_RESOLVING 2 -+#define UTS_IPROC_EXECUTE_PACKET 3 -+#define UTS_IPROC_EXECUTING 4 -+#define UTS_IPROC_NETWORK_ERROR 5 -+#define UTS_IPROC_STOPPED 6 -+ -+typedef struct user_ctxt_entry -+{ -+ struct list_head cent_link; /* entry chained on context */ -+ ELAN_CAPABILITY *cent_cap; /* capability we attached with */ -+} USER_CTXT_ENTRY; -+ -+typedef struct user_ctxt -+{ -+ ELAN4_CTXT uctx_ctxt; /* is also an elan context */ -+ -+ spinlock_t uctx_spinlock; /* spinlock for items used with interrupt handler */ -+ kcondvar_t uctx_wait; /* place to sleep (traphandler/swapout/swapin/neterr fixup) */ -+ -+ unsigned uctx_status; /* status (uctx_spinlock) */ -+ -+ pid_t uctx_trap_pid; /* pid to deliver signals to on trap */ -+ int uctx_trap_signo; /* signal number to deliver */ -+ unsigned uctx_trap_state; /* state of trap handling code */ -+ unsigned uctx_trap_count; /* count of "thread" in user_trap_handler() */ -+ -+ unsigned uctx_int_count; /* # interrupts since last zeroed */ -+ unsigned long uctx_int_start; /* tick when int_count last zeroed */ -+ unsigned long uctx_int_delay; /* # ticks to delay next wakeup */ -+ struct timer_list uctx_int_timer; /* and timer to use to delay signal */ -+ -+ struct timer_list uctx_neterr_timer; /* network error timer */ -+ -+ struct list_head uctx_vpseg_list; /* list of vp segments we've got */ -+ kmutex_t uctx_vpseg_lock; /* and lock to protect it. */ -+ ELAN4_ROUTE_TABLE *uctx_routetable; /* our virtual process table */ -+ ELAN_POSITION uctx_position; /* position in network */ -+ -+ struct list_head uctx_cent_list; /* list of attached network contexts */ -+ -+ USER_CQ *uctx_ddcq; /* command queue for re-issueing traps */ -+ E4_uint64 uctx_ddcq_insertcnt; /* # dwords inserted into command queue */ -+ E4_uint64 uctx_ddcq_completed; /* last "completed" write was here */ -+ int uctx_ddcq_intr; /* count of outstanding ddcq interrupts */ -+ -+ ELAN4_HALTOP uctx_haltop; /* halt operation for flushing */ -+ ELAN4_DMA_FLUSHOP uctx_dma_flushop; /* flush operation for flushing dma runqueue */ -+ -+ INTCOOKIE_TABLE *uctx_intcookie_table; /* table of interrupt cookies (shared with other uctxs for this task) */ -+ -+ kmutex_t uctx_cqlock; /* lock for create/destory cqs */ -+ struct list_head uctx_cqlist; /* list of command queues (uctx_cqlock,uctx_spinlock) */ -+ -+ ELAN4_DPROC_TRAP *uctx_dprocTraps; /* queue of dproc traps to resolve/reissue */ -+ RING_QUEUE uctx_dprocTrapQ; -+ -+ ELAN4_TPROC_TRAP *uctx_tprocTraps; /* queue of tproc traps to resolve/reissue */ -+ RING_QUEUE uctx_tprocTrapQ; -+ -+ ELAN4_EPROC_TRAP *uctx_eprocTraps; /* queue of eproc traps to resolve */ -+ RING_QUEUE uctx_eprocTrapQ; -+ -+ USER_IPROC_TRAP uctx_iprocTrap[2]; /* input trap state, 1 per virtual channel */ -+ -+ E4_DMA *uctx_dmas; /* queue of dmas to restart */ -+ RING_QUEUE uctx_dmaQ; -+ -+ E4_ThreadRegs *uctx_threads; /* queue of threads to restart */ -+ RING_QUEUE uctx_threadQ; -+ -+ ELAN4_NETERR_MSG *uctx_msgs; /* queue of neterr messages */ -+ RING_QUEUE uctx_msgQ; -+ kmutex_t uctx_rgnmutex; /* lock for create/destroy regions */ -+ spinlock_t uctx_rgnlock; /* spinlock to protect linked lists */ -+ USER_RGN *uctx_mrgns; /* Doubly linked list of memory regions (uctx_rgnlock) */ -+ USER_RGN *uctx_mtail; /* Last memory region on list (uctx_rgnlock) */ -+ USER_RGN *uctx_mrgnlast; /* Last region 'hit' (uctx_rgnlock) */ -+ -+ USER_RGN *uctx_ergns; /* Doubly linked list of memory regions (uctx_rgnlock) */ -+ USER_RGN *uctx_etail; /* Last memory region on list (uctx_rgnlock) */ -+ USER_RGN *uctx_ergnlast; /* Last region 'hit' (uctx_rgnlock) */ -+ -+ ELAN4_USER_PAGE *uctx_upage; /* kernel page shared with user */ -+ sdramaddr_t uctx_trampoline; /* sdram page for tproc trampoline */ -+ -+ E4_Addr uctx_upage_addr; /* elan addr page mapped into */ -+ E4_Addr uctx_trestart_addr; /* address of thread restart code */ -+ FAULT_SAVE *uctx_faults; -+ FAULT_SAVE *uctx_fault_list; -+ int uctx_num_fault_save; -+ spinlock_t uctx_fault_lock; -+} USER_CTXT; -+ -+/* bit values for uctx_status */ -+#define UCTX_EXITING (1 << 0) /* context is exiting. */ -+#define UCTX_USER_FILTERING (1 << 1) /* user requested context filter */ -+#define UCTX_USER_STOPPED (1 << 2) /* user requested stop */ -+ -+#define UCTX_SWAPPING (1 << 3) /* context is swapping out */ -+#define UCTX_SWAPPED (1 << 4) /* context is swapped out */ -+ -+#define UCTX_STOPPING (1 << 5) /* stopping elan from running this context */ -+#define UCTX_STOPPED (1 << 6) /* elan no longer running this context */ -+ -+#define UCTX_EPROC_QUEUE_FULL (1 << 7) /* reasons for stopping running */ -+#define UCTX_DPROC_QUEUE_FULL (1 << 8) -+#define UCTX_TPROC_QUEUE_FULL (1 << 9) -+#define UCTX_IPROC_CH0_TRAPPED (1 << 10) -+#define UCTX_IPROC_CH1_TRAPPED (1 << 11) -+ -+#define UCTX_NETERR_TIMER (1 << 12) -+#define UCTX_NETERR_FIXUP (1 << 13) -+ -+#define UCTX_EPROC_QUEUE_OVERFLOW (1 << 14) -+#define UCTX_DPROC_QUEUE_OVERFLOW (1 << 15) -+#define UCTX_TPROC_QUEUE_OVERFLOW (1 << 16) -+ -+#define UCTX_EPROC_QUEUE_ERROR (1 << 17) -+#define UCTX_DPROC_QUEUE_ERROR (1 << 18) -+#define UCTX_TPROC_QUEUE_ERROR (1 << 19) -+ -+#define UCTX_STOPPED_REASONS (UCTX_EPROC_QUEUE_FULL | UCTX_DPROC_QUEUE_FULL | UCTX_TPROC_QUEUE_FULL) -+#define UCTX_SWAPPED_REASONS (UCTX_EXITING | UCTX_USER_STOPPED | UCTX_NETERR_FIXUP) -+#define UCTX_NACKING_REASONS (UCTX_USER_FILTERING | UCTX_IPROC_CH0_TRAPPED | UCTX_IPROC_CH1_TRAPPED) -+ -+#define UCTX_OVERFLOW_REASONS (UCTX_EPROC_QUEUE_OVERFLOW | UCTX_DPROC_QUEUE_OVERFLOW | UCTX_TPROC_QUEUE_OVERFLOW) -+#define UCTX_ERROR_REASONS (UCTX_EPROC_QUEUE_ERROR | UCTX_DPROC_QUEUE_ERROR | UCTX_TPROC_QUEUE_ERROR) -+ -+#define UCTX_RUNNABLE(uctx) (((uctx)->uctx_status & (UCTX_SWAPPED_REASONS | UCTX_STOPPED_REASONS)) == 0) -+#define UCTX_NACKING(uctx) (((uctx)->uctx_status & (UCTX_SWAPPED_REASONS | UCTX_STOPPED_REASONS | UCTX_NACKING_REASONS)) != 0) -+ -+/* values for uctx_trap_signalled */ -+#define UCTX_TRAP_IDLE 0 -+#define UCTX_TRAP_SLEEPING 1 -+#define UCTX_TRAP_SIGNALLED 2 -+#define UCTX_TRAP_ACTIVE 3 -+ -+extern int user_p2p_route_options; -+extern int user_bcast_route_options; -+extern int user_dproc_retry_count; -+extern int user_cproc_retry_count; -+ -+extern USER_CTXT *user_alloc (ELAN4_DEV *dev); -+extern void user_free (USER_CTXT *uctx); -+extern void user_swapout (USER_CTXT *uctx, unsigned reason); -+extern void user_swapin (USER_CTXT *uctx, unsigned reason); -+extern int user_attach (USER_CTXT *uctx, ELAN_CAPABILITY *cap); -+extern void user_detach (USER_CTXT *uctx, ELAN_CAPABILITY *cap); -+extern void user_block_inputter (USER_CTXT *uctx, unsigned blocked); -+extern int user_alloc_trap_queues (USER_CTXT *uctx, unsigned ndproc_traps, unsigned neproc_traps, -+ unsigned ntproc_traps, unsigned nthreads, unsigned ndmas); -+ -+extern int user_add_p2pvp (USER_CTXT *uctx, unsigned process, ELAN_CAPABILITY *cap); -+extern int user_add_bcastvp (USER_CTXT *uctx, unsigned process, unsigned lowvp, unsigned highvp); -+extern int user_removevp (USER_CTXT *uctx, unsigned process); -+ -+extern int user_set_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route); -+extern int user_reset_route (USER_CTXT *uctx, unsigned process); -+extern int user_get_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route); -+extern int user_check_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route, unsigned *error); -+extern int user_send_neterr_msg (USER_CTXT *uctx, unsigned int vp, unsigned int nctx, unsigned int retries, ELAN4_NETERR_MSG *msg); -+extern int user_neterr_sten (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop); -+extern int user_neterr_dma (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop); -+ -+extern int user_resume_eproc_trap (USER_CTXT *uctx, E4_Addr addr); -+extern int user_resume_cproc_trap (USER_CTXT *uctx, unsigned indx); -+extern int user_resume_dproc_trap (USER_CTXT *uctx, E4_DMA *dma); -+extern int user_resume_tproc_trap (USER_CTXT *uctx, E4_ThreadRegs *regs); -+extern int user_resume_iproc_trap (USER_CTXT *uctx, unsigned channel, unsigned trans, -+ E4_IprocTrapHeader *hdrp, E4_IprocTrapData *datap); -+ -+extern int user_trap_handler (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int nticks); -+extern USER_CQ *user_findcq (USER_CTXT *uctx, unsigned num); -+extern USER_CQ *user_alloccq (USER_CTXT *uctx, unsigned size, unsigned perm, unsigned flags); -+extern void user_freecq (USER_CTXT *uctx, USER_CQ *cq); -+extern void user_dropcq (USER_CTXT *uctx, USER_CQ *cq); -+ -+/* user_osdep.c */ -+extern int user_load_range (USER_CTXT *uctx, E4_Addr addr, unsigned long nbytes, E4_uint32 fsr); -+extern void user_update_main (USER_CTXT *uctx, struct mm_struct *mm, unsigned long start, unsigned long len); -+extern void user_unload_main (USER_CTXT *uctx, unsigned long start, unsigned long len); -+ -+ -+/* regions.c */ -+extern USER_RGN *user_findrgn_elan (USER_CTXT *uctx, E4_Addr addr, int tail); -+extern USER_RGN *user_findrgn_main (USER_CTXT *uctx, virtaddr_t addr, int tail); -+extern USER_RGN *user_rgnat_elan (USER_CTXT *uctx, E4_Addr addr); -+extern USER_RGN *user_rgnat_main (USER_CTXT *uctx, virtaddr_t addr); -+extern int user_setperm (USER_CTXT *uctx, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, unsigned perm); -+extern void user_clrperm (USER_CTXT *uctx, E4_Addr addr, unsigned long len); -+extern int user_checkperm (USER_CTXT *uctx, E4_Addr raddr, unsigned long rsize, unsigned access); -+extern virtaddr_t user_elan2main (USER_CTXT *uctx, E4_Addr addr); -+extern E4_Addr user_main2elan (USER_CTXT *uctx, virtaddr_t addr); -+extern void user_preload_main (USER_CTXT *uctx, virtaddr_t addr, unsigned long len); -+extern void user_freergns (USER_CTXT *uctx); -+ -+/* user_ddcq.c */ -+extern int user_ddcq_check (USER_CTXT *uctx, unsigned num); -+extern int user_ddcq_flush (USER_CTXT *uctx); -+extern void user_ddcq_intr (USER_CTXT *uctx); -+extern void user_ddcq_write_dword (USER_CTXT *uctx, E4_Addr addr, E4_uint64 value); -+extern void user_ddcq_interrupt (USER_CTXT *uctx, E4_uint64 cookie); -+extern void user_ddcq_run_dma (USER_CTXT *uctx, E4_DMA *dma); -+extern void user_ddcq_run_thread (USER_CTXT *uctx, E4_ThreadRegs *regs); -+extern void user_ddcq_setevent (USER_CTXT *uctx, E4_Addr addr); -+extern void user_ddcq_seteventn (USER_CTXT *uctx, E4_Addr addr, E4_uint32 count); -+extern void user_ddcq_waitevent (USER_CTXT *uctx, E4_Addr addr, E4_uint64 CountAndType, E4_uint64 Param0, E4_uint64 Param1); -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_USER_H */ -Index: linux-2.4.21/include/elan4/userregs.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/userregs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/userregs.h 2005-06-01 23:12:54.746416760 -0400 -@@ -0,0 +1,383 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_USERREGS_H -+#define __ELAN4_USERREGS_H -+ -+#ident "$Id: userregs.h,v 1.14.2.1 2004/10/07 10:57:40 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/userregs.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Statistic control reg values -+ * Each 4-bit nibble of the control word specifies what statistic -+ * is to be recorded in each of the 8 statistic counters -+ */ -+#define COUNT_REG0_SHIFT 32ull -+#define COUNT_REG1_SHIFT 36ull -+#define COUNT_REG2_SHIFT 40ull -+#define COUNT_REG3_SHIFT 44ull -+#define COUNT_REG4_SHIFT 48ull -+#define COUNT_REG5_SHIFT 52ull -+#define COUNT_REG6_SHIFT 56ull -+#define COUNT_REG7_SHIFT 60ull -+ -+ -+/* Count reg 0 */ -+#define STC_INPUT_NON_WRITE_BLOCKS (0x0ull << COUNT_REG0_SHIFT) -+#define STP_DMA_EOP_WAIT_ACK (0x1ull << COUNT_REG0_SHIFT) -+#define STP_TPROC_RUNNING (0x2ull << COUNT_REG0_SHIFT) -+#define STC_STEN_PKTS_OPEN (0x3ull << COUNT_REG0_SHIFT) -+#define STP_CPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG0_SHIFT) -+#define STC_TLB_TABLE_WALKS (0x5ull << COUNT_REG0_SHIFT) -+#define STC_CACHE_HITS (0x6ull << COUNT_REG0_SHIFT) -+#define STC_PCI_SLAVE_READS (0x7ull << COUNT_REG0_SHIFT) -+#define STP_PCI_WAITING_FOR_GNT (0x8ull << COUNT_REG0_SHIFT) -+#define STP_SYS_CLOCK_RATE0 (0xfull << COUNT_REG0_SHIFT) -+ -+#define STATS_REG0_NAMES { \ -+ "STC_INPUT_NON_WRITE_BLOCKS", \ -+ "STP_DMA_EOP_WAIT_ACK", \ -+ "STP_TPROC_RUNNING", \ -+ "STC_STEN_PKTS_OPEN", \ -+ "STP_CPROC_HOLDS_FFU_DP", \ -+ "STC_TLB_TABLE_WALKS", \ -+ "STC_CACHE_HITS", \ -+ "STC_PCI_SLAVE_READS", \ -+ "STP_PCI_WAITING_FOR_GNT", \ -+ "STP_SYS_CLOCK_RATE0" \ -+} -+ -+/* Count reg 1 */ -+#define STC_INPUT_WRITE_BLOCKS (0x0ull << COUNT_REG1_SHIFT) -+#define STP_DMA_DATA_TRANSMITTING (0x1ull << COUNT_REG1_SHIFT) -+#define STC_CPROC_VALUES_EXE (0x2ull << COUNT_REG1_SHIFT) -+#define STC_STEN_TRANS_SENT (0x3ull << COUNT_REG1_SHIFT) -+#define STP_TPROC_DQ_HOLDS_FFU_DP (0x4ull << COUNT_REG1_SHIFT) -+#define STC_TPROC_TLB_HITS (0x5ull << COUNT_REG1_SHIFT) -+#define STC_CACHE_ALLOC_MISSES (0x6ull << COUNT_REG1_SHIFT) -+#define STP_PCI_MASTER_READ_WAITING (0x7ull << COUNT_REG1_SHIFT) -+#define STP_PCI_WAITING_FOR_DEVSEL (0x8ull << COUNT_REG1_SHIFT) -+#define STP_SYS_CLOCK_RATE1 (0xfull << COUNT_REG1_SHIFT) -+ -+#define STATS_REG1_NAMES { \ -+ "STC_INPUT_WRITE_BLOCKS", \ -+ "STP_DMA_DATA_TRANSMITTING", \ -+ "STC_CPROC_VALUES_EXE", \ -+ "STC_STEN_TRANS_SENT", \ -+ "STP_TPROC_DQ_HOLDS_FFU_DP", \ -+ "STC_TPROC_TLB_HITS", \ -+ "STC_CACHE_ALLOC_MISSES", \ -+ "STP_PCI_MASTER_READ_WAITING", \ -+ "STP_PCI_WAITING_FOR_DEVSEL", \ -+ "STP_SYS_CLOCK_RATE1" \ -+} -+ -+/* Count reg 2 */ -+#define STC_INPUT_PKTS (0x0ull << COUNT_REG2_SHIFT) -+#define STP_DMA_WAITING_MEM (0x1ull << COUNT_REG2_SHIFT) -+#define STC_CPROC_TRANSFERS (0x2ull << COUNT_REG2_SHIFT) -+#define STP_STEN_WAIT_NETWORK_BUSY (0x3ull << COUNT_REG2_SHIFT) -+#define STP_IPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG2_SHIFT) -+#define STC_UNITS_TLB_HITS (0x5ull << COUNT_REG2_SHIFT) -+#define STC_CACHE_NON_ALLOC_MISSES (0x6ull << COUNT_REG2_SHIFT) -+#define STP_PCI_MASTER_WRITE_WAITING (0x7ull << COUNT_REG2_SHIFT) -+#define STC_PCI_OUT_OF_ORDER_SPLIT_COMP (0x8ull << COUNT_REG2_SHIFT) -+#define STP_SYS_CLOCK_RATE2 (0xfull << COUNT_REG2_SHIFT) -+ -+#define STATS_REG2_NAMES { \ -+ "STC_INPUT_PKTS", \ -+ "STP_DMA_WAITING_MEM", \ -+ "STC_CPROC_TRANSFERS", \ -+ "STP_STEN_WAIT_NETWORK_BUSY", \ -+ "STP_IPROC_HOLDS_FFU_DP", \ -+ "STC_UNITS_TLB_HITS", \ -+ "STC_CACHE_NON_ALLOC_MISSES", \ -+ "STP_PCI_MASTER_WRITE_WAITING", \ -+ "STC_PCI_OUT_OF_ORDER_SPLIT_COMP", \ -+ "STP_SYS_CLOCK_RATE2" \ -+} -+ -+/* Count reg 3 */ -+#define STC_INPUT_PKTS_REJECTED (0x0ull << COUNT_REG3_SHIFT) -+#define STP_DMA_WAIT_NETWORK_BUSY (0x1ull << COUNT_REG3_SHIFT) -+#define STC_CPROC_PREFETCH_SDRAM (0x2ull << COUNT_REG3_SHIFT) -+#define STP_STEN_BLOCKED_ACKS_OR_VC (0x3ull << COUNT_REG3_SHIFT) -+#define STP_EPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG3_SHIFT) -+#define STP_TPROC_BLOCKED_MEMSYS (0x5ull << COUNT_REG3_SHIFT) -+#define STC_CACHE_WRITE_BACKS (0x6ull << COUNT_REG3_SHIFT) -+#define STP_PCI_SLAVE_READ_WAITING (0x7ull << COUNT_REG3_SHIFT) -+#define STP_PCI_IDLE_CYCLES (0x8ull << COUNT_REG3_SHIFT) -+#define STP_SYS_CLOCK_RATE3 (0xfull << COUNT_REG3_SHIFT) -+ -+#define STATS_REG3_NAMES { \ -+ "STC_INPUT_PKTS_REJECTED", \ -+ "STP_DMA_WAIT_NETWORK_BUSY", \ -+ "STC_CPROC_PREFETCH_SDRAM", \ -+ "STP_STEN_BLOCKED_ACKS_OR_VC", \ -+ "STP_EPROC_HOLDS_FFU_DP", \ -+ "STP_TPROC_BLOCKED_MEMSYS", \ -+ "STC_CACHE_WRITE_BACKS", \ -+ "STP_PCI_SLAVE_READ_WAITING", \ -+ "STP_PCI_IDLE_CYCLES", \ -+ "STP_SYS_CLOCK_RATE3" \ -+} -+ -+/* Count reg 4 */ -+#define STP_INPUT_DATA_TRANSMITTING (0x0ull << COUNT_REG4_SHIFT) -+#define STC_DMA_PKTS_ACCEPTED (0x1ull << COUNT_REG4_SHIFT) -+#define STC_CPROC_FLUSH_REQ_SDRAM (0x2ull << COUNT_REG4_SHIFT) -+#define STP_STEN_EOP_WAIT_ACK (0x3ull << COUNT_REG4_SHIFT) -+#define STP_DMA_HOLDS_FFU_DP (0x4ull << COUNT_REG4_SHIFT) -+#define STP_UNIT_BLOCKED_MEMSYS (0x5ull << COUNT_REG4_SHIFT) -+#define STC_PCI_MASTER_READS (0x6ull << COUNT_REG4_SHIFT) -+#define STP_PCI_SLAVE_WRITE_WAITING (0x7ull << COUNT_REG4_SHIFT) -+#define STC_INPUT_PACKETS_DISCARDED (0x8ull << COUNT_REG4_SHIFT) -+#define STP_SYS_CLOCK_RATE4 (0xfull << COUNT_REG4_SHIFT) -+ -+#define STATS_REG4_NAMES { \ -+ "STP_INPUT_DATA_TRANSMITTING", \ -+ "STC_DMA_PKTS_ACCEPTED", \ -+ "STC_CPROC_FLUSH_REQ_SDRAM", \ -+ "STP_STEN_EOP_WAIT_ACK", \ -+ "STP_DMA_HOLDS_FFU_DP", \ -+ "STP_UNIT_BLOCKED_MEMSYS", \ -+ "STC_PCI_MASTER_READS", \ -+ "STP_PCI_SLAVE_WRITE_WAITING", \ -+ "STC_INPUT_PACKETS_DISCARDED", \ -+ "STP_SYS_CLOCK_RATE4" \ -+} -+ -+/* Count reg 5 */ -+#define STP_INPUT_WAITING_NETWORK_DATA (0x0ull << COUNT_REG5_SHIFT) -+#define STC_DMA_PKTS_REJECTED (0x1ull << COUNT_REG5_SHIFT) -+#define STC_CPROC_INSERT_CACHE_MISSES (0x2ull << COUNT_REG5_SHIFT) -+#define STP_STEN_TRANSMITTING_DATA (0x3ull << COUNT_REG5_SHIFT) -+#define FFU_BLOCKED_DIFF_FFU_PROC (0x4ull << COUNT_REG5_SHIFT) -+#define STP_TABLE_WALKS_BLOCKED_MEMSYS (0x5ull << COUNT_REG5_SHIFT) -+#define STC_PCI_MASTER_WRITES (0x6ull << COUNT_REG5_SHIFT) -+#define STP_PCI_MASTER_HOLDS_BUS (0x7ull << COUNT_REG5_SHIFT) -+#define STC_PCI_NO_SPLIT_COMPS (0x8ull << COUNT_REG5_SHIFT) -+#define STP_SYS_CLOCK_RATE5 (0xfull << COUNT_REG5_SHIFT) -+ -+#define STATS_REG5_NAMES { \ -+ "STP_INPUT_WAITING_NETWORK_DATA", \ -+ "STC_DMA_PKTS_REJECTED", \ -+ "STC_CPROC_INSERT_CACHE_MISSES", \ -+ "STP_STEN_TRANSMITTING_DATA", \ -+ "FFU_BLOCKED_DIFF_FFU_PROC", \ -+ "STP_TABLE_WALKS_BLOCKED_MEMSYS", \ -+ "STC_PCI_MASTER_WRITES", \ -+ "STP_PCI_MASTER_HOLDS_BUS", \ -+ "STC_PCI_NO_SPLIT_COMPS", \ -+ "STP_SYS_CLOCK_RATE5" \ -+} -+ -+/* Count reg 6 */ -+#define STP_INPUT_BLOCKED_WAITING_TRANS (0x0ull << COUNT_REG6_SHIFT) -+#define STP_TPROC_INST_STALL (0x1ull << COUNT_REG6_SHIFT) -+#define STP_CPROC_WAITING_DESCHED (0x2ull << COUNT_REG6_SHIFT) -+#define STP_STEN_PKT_OPEN_WAITING_DATA (0x3ull << COUNT_REG6_SHIFT) -+#define STP_TLB_HASH_TABLE_ACCESSES (0x4ull << COUNT_REG6_SHIFT) -+#define STP_PCI_SLAVE_BLOCKED_MEMSYS (0x5ull << COUNT_REG6_SHIFT) -+#define STP_PCI_TRANSFERRING_DATA (0x6ull << COUNT_REG6_SHIFT) -+#define STP_PCI_MASTER_WAITING_BUS (0x7ull << COUNT_REG6_SHIFT) -+#define STP_PCI_READ_LATENCY (0x8ull << COUNT_REG6_SHIFT) -+#define STP_SYS_CLOCK_RATE6 (0xfull << COUNT_REG6_SHIFT) -+ -+#define STATS_REG6_NAMES { \ -+ "STP_INPUT_BLOCKED_WAITING_TRANS", \ -+ "STP_TPROC_INST_STALL", \ -+ "STP_CPROC_WAITING_DESCHED", \ -+ "STP_STEN_PKT_OPEN_WAITING_DATA", \ -+ "STP_TLB_HASH_TABLE_ACCESSES", \ -+ "STP_PCI_SLAVE_BLOCKED_MEMSYS", \ -+ "STP_PCI_TRANSFERRING_DATA", \ -+ "STP_PCI_MASTER_WAITING_BUS", \ -+ "STP_PCI_READ_LATENCY", \ -+ "STP_SYS_CLOCK_RATE6" \ -+} -+ -+/* Count reg 7 */ -+#define STC_INPUT_CTX_FILTER_FILL (0x0ull << COUNT_REG7_SHIFT) -+#define STP_TPROC_LOAD_STORE_STALL (0x1ull << COUNT_REG7_SHIFT) -+#define STC_CPROC_TIMEOUTS (0x2ull << COUNT_REG7_SHIFT) -+#define STP_STEN_BLOCKED_NETWORK (0x3ull << COUNT_REG7_SHIFT) -+#define STP_TLB_CHAIN_ACCESSES (0x4ull << COUNT_REG7_SHIFT) -+#define STP_CPROC_SCHED_BLOCKED_MEMSYS (0x5ull << COUNT_REG7_SHIFT) -+#define STC_PCI_SLAVE_WRITES (0x6ull << COUNT_REG7_SHIFT) -+#define STC_PCI_DISCONNECTS_RETRIES (0x7ull << COUNT_REG7_SHIFT) -+#define STC_RING_OSCILLATOR (0x8ull << COUNT_REG7_SHIFT) -+#define STP_SYS_CLOCK_RATE7 (0xfull << COUNT_REG7_SHIFT) -+ -+#define STATS_REG7_NAMES { \ -+ "STC_INPUT_CTX_FILTER_FILL", \ -+ "STP_TPROC_LOAD_STORE_STALL", \ -+ "STC_CPROC_TIMEOUTS", \ -+ "STP_STEN_BLOCKED_NETWORK", \ -+ "STP_TLB_CHAIN_ACCESSES", \ -+ "STP_CPROC_SCHED_BLOCKED_MEMSYS", \ -+ "STC_PCI_SLAVE_WRITES", \ -+ "STC_PCI_DISCONNECTS_RETRIES", \ -+ "STC_RING_OSCILLATOR", \ -+ "STP_SYS_CLOCK_RATE7" \ -+} -+ -+#define STATS_REG_NAMES { \ -+ STATS_REG0_NAMES, \ -+ STATS_REG1_NAMES, \ -+ STATS_REG2_NAMES, \ -+ STATS_REG3_NAMES, \ -+ STATS_REG4_NAMES, \ -+ STATS_REG5_NAMES, \ -+ STATS_REG6_NAMES, \ -+ STATS_REG7_NAMES, \ -+} -+ -+ -+#define INPUT_PERF_STATS (STC_INPUT_NON_WRITE_BLOCKS | STC_INPUT_WRITE_BLOCKS | \ -+ STC_INPUT_PKTS | STC_INPUT_PKTS_REJECTED | \ -+ STC_INPUT_CTX_FILTER_FILL | STP_INPUT_DATA_TRANSMITTING | \ -+ STP_INPUT_WAITING_NETWORK_DATA | STP_INPUT_BLOCKED_WAITING_TRANS | STC_INPUT_PACKETS_DISCARDED) -+ -+#define DMA_PERF_STATS (STC_DMA_PKTS_ACCEPTED | STC_DMA_PKTS_REJECTED | \ -+ STP_DMA_EOP_WAIT_ACK | STP_DMA_DATA_TRANSMITTING | \ -+ STP_DMA_WAITING_MEM | STP_DMA_WAIT_NETWORK_BUSY) -+ -+ -+#define TPROC_PERF_STATS (STP_TPROC_RUNNING | STP_TPROC_INST_STALL | \ -+ STP_TPROC_LOAD_STORE_STALL) -+ -+#define CPROC_PERF_STATS (STC_CPROC_VALUES_EXE | STC_CPROC_TRANSFERS | \ -+ STC_CPROC_PREFETCH_SDRAM | STC_CPROC_FLUSH_REQ_SDRAM | \ -+ STC_CPROC_INSERT_CACHE_MISSES | STP_CPROC_WAITING_DESCHED | \ -+ STC_CPROC_TIMEOUTS) -+ -+#define STEN_PERF_STATS (STC_STEN_PKTS_OPEN | STC_STEN_TRANS_SENT | \ -+ STP_STEN_WAIT_NETWORK_BUSY | STP_STEN_BLOCKED_ACKS_OR_VC | \ -+ STP_STEN_EOP_WAIT_ACK | STP_STEN_TRANSMITTING_DATA | \ -+ STP_STEN_PKT_OPEN_WAITING_DATA | STP_STEN_BLOCKED_NETWORK) -+ -+#define FFU_PREF_STATS (STP_CPROC_HOLDS_FFU_DP | STP_TPROC_DQ_HOLDS_FFU_DP | \ -+ STP_IPROC_HOLDS_FFU_DP | STP_EPROC_HOLDS_FFU_DP | \ -+ STP_DMA_HOLDS_FFU_DP | FFU_BLOCKED_DIFF_FFU_PROC) -+ -+#define TABLE_WALK_PERF_STATS (STC_TPROC_TLB_HITS | STC_UNITS_TLB_HITS | \ -+ STP_TLB_HASH_TABLE_ACCESSES | STP_TLB_CHAIN_ACCESSES | \ -+ STC_TLB_TABLE_WALKS) -+ -+#define ADDRESS_ARB_PERF_STATS (STP_UNIT_BLOCKED_MEMSYS | STP_TPROC_BLOCKED_MEMSYS | \ -+ STP_TABLE_WALKS_BLOCKED_MEMSYS | STP_CPROC_SCHED_BLOCKED_MEMSYS | \ -+ STP_PCI_SLAVE_BLOCKED_MEMSYS) -+ -+#define CACHE_PERF_STATS (STC_CACHE_HITS | STC_CACHE_ALLOC_MISSES | \ -+ STC_CACHE_NON_ALLOC_MISSES | STC_CACHE_WRITE_BACKS) -+ -+ -+#define PCI_PERF_STATS (STC_PCI_SLAVE_READS | STP_PCI_MASTER_READ_WAITING | \ -+ STP_PCI_MASTER_WRITE_WAITING | STP_PCI_SLAVE_READ_WAITING | \ -+ STP_PCI_SLAVE_WRITE_WAITING | STC_PCI_MASTER_WRITES | \ -+ STP_PCI_TRANSFERRING_DATA | STC_PCI_SLAVE_WRITES) -+ -+#define PCIBUS_PERF_STATS (STP_PCI_WAITING_FOR_GNT | STP_PCI_WAITING_FOR_DEVSEL | \ -+ STC_PCI_OUT_OF_ORDER_SPLIT_COMP | STP_PCI_IDLE_CYCLES | \ -+ STC_PCI_MASTER_READS | STP_PCI_MASTER_HOLDS_BUS | \ -+ STP_PCI_MASTER_WAITING_BUS | STC_PCI_DISCONNECTS_RETRIES) -+ -+ -+ extern const char *elan_stats_names[8][10]; -+ -+#define ELAN_STATS_NAME(COUNT, CONTROL) (elan_stats_names[(COUNT)][(CONTROL) & 7]) -+ -+ typedef volatile union e4_StatsControl -+ { -+ E4_uint64 StatsControl; -+ struct -+ { -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 StatCont0:4; -+ E4_uint32 StatCont1:4; -+ E4_uint32 StatCont2:4; -+ E4_uint32 StatCont3:4; -+ E4_uint32 StatCont4:4; -+ E4_uint32 StatCont5:4; -+ E4_uint32 StatCont6:4; -+ E4_uint32 StatCont7:4; -+#else -+ E4_uint32 StatCont7:4; -+ E4_uint32 StatCont6:4; -+ E4_uint32 StatCont5:4; -+ -+ E4_uint32 StatCont4:4; -+ E4_uint32 StatCont3:4; -+ E4_uint32 StatCont2:4; -+ E4_uint32 StatCont1:4; -+ E4_uint32 StatCont0:4; -+#endif -+ E4_uint32 pad; -+ } s; -+ } E4_StatsControl; -+ -+typedef volatile union e4_StatsCount -+{ -+ E4_uint64 ClockStat; -+ struct -+ { -+ E4_uint32 ClockLSW; /* read only */ -+ E4_uint32 StatsCount; -+ } s; -+} E4_StatsCount; -+ -+typedef volatile union e4_clock -+{ -+ E4_uint64 NanoSecClock; -+ struct -+ { -+ E4_uint32 ClockLSW; -+ E4_uint32 ClockMSW; -+ } s; -+} E4_Clock; -+#define E4_TIME( X ) ((X).NanoSecClock) -+ -+#define ELAN4_COMMS_CLOCK_FREQUENCY 660 /* In Mhz. This is half the bit rate. */ -+#define ELAN4_CLOCK_ADD_VALUE 200 /* For 200ns increment rate */ -+#define ELAN4_CLOCK_COMMS_DIV_VALUE (((ELAN4_COMMS_CLOCK_FREQUENCY * ELAN4_CLOCK_ADD_VALUE) / (1000 * 4)) - 1) -+#define ELAN4_CLOCK_TICK_RATE ((ELAN4_CLOCK_ADD_VALUE << 8) + ELAN4_CLOCK_COMMS_DIV_VALUE) -+ -+typedef volatile union e4_clocktickrate -+{ -+ E4_uint64 NanoSecClock; -+ struct -+ { -+ E4_uint32 pad1; -+ E4_uint32 TickRates; -+ } s; -+} E4_ClockTickRate; -+ -+/* -+ * This is made into an 8k byte object. -+ */ -+typedef volatile struct _E4_User_Regs -+{ -+ E4_StatsCount StatCounts[8]; -+ E4_StatsCount InstCount; -+ E4_Clock Clock; -+ E4_StatsControl StatCont; -+ E4_ClockTickRate ClockTickRate; -+ E4_uint8 pad1[EightK - ((sizeof(E4_StatsCount)*9)+sizeof(E4_StatsControl)+ -+ sizeof(E4_Clock)+sizeof(E4_ClockTickRate))]; -+} E4_User_Regs; -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN4_USERREGS_H */ -Index: linux-2.4.21/include/elan4/usertrap.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/usertrap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/usertrap.h 2005-06-01 23:12:54.746416760 -0400 -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: usertrap.h,v 1.17 2004/05/05 09:08:35 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/usertrap.h,v $*/ -+ -+#ifndef __ELAN4_USERTRAP_H -+#define __ELAN4_USERTRAP_H -+ -+#ifndef _ASM -+typedef struct elan4_user_page -+{ -+ E4_uint64 upage_ddcq_completed; -+} ELAN4_USER_PAGE; -+ -+typedef struct elan4_user_trap -+{ -+ int ut_type; -+ unsigned ut_proc; -+ unsigned ut_args[4]; -+ -+ union { -+ ELAN4_EPROC_TRAP eproc; -+ ELAN4_CPROC_TRAP cproc; -+ ELAN4_DPROC_TRAP dproc; -+ ELAN4_IPROC_TRAP iproc; -+ ELAN4_TPROC_TRAP tproc; -+ ELAN4_NETERR_MSG msg; -+ } ut_trap; -+} ELAN4_USER_TRAP; -+ -+#endif /* _ASM */ -+ -+ -+/* value for ut_type */ -+#define UTS_FINISHED 0 /* all pending traps have been handled */ -+#define UTS_RESCHEDULE 1 /* must return to user mode and re-enter */ -+#define UTS_UNIMP_INSTR 2 /* unimplemented thread instruction */ -+#define UTS_EXECUTE_PACKET 3 /* iproc trap needs packet executing */ -+#define UTS_NETWORK_ERROR_TRAP 4 /* network error on this trap */ -+#define UTS_NETWORK_ERROR_MSG 5 /* network error message */ -+#define UTS_NETWORK_ERROR_TIMER 6 /* network error timer expired */ -+ -+#define UTS_EFAULT -1 /* failed to copyout trap */ -+#define UTS_INVALID_ADDR -2 /* all -ve codes mean trap could not be resolved. */ -+#define UTS_INVALID_VPROC -3 -+#define UTS_INVALID_COMMAND -4 -+#define UTS_BAD_TRAP -5 -+#define UTS_ALIGNMENT_ERROR -6 -+#define UTS_QUEUE_OVERFLOW -7 -+#define UTS_QUEUE_ERROR -8 -+#define UTS_INVALID_TRANS -9 -+#define UTS_PERMISSION_DENIED -10 -+#define UTS_CPROC_ERROR -11 -+#define UTS_INVALID_COOKIE -12 -+#define UTS_NETERR_ERROR -13 -+ -+/* "special" values for registering handlers */ -+#define UTS_ALL_TRAPS -9999 -+ -+/* value for ut_proc */ -+#define UTS_NOPROC 0 -+#define UTS_EPROC 1 -+#define UTS_CPROC 2 -+#define UTS_DPROC 3 -+#define UTS_TPROC 4 -+#define UTS_IPROC 5 -+#define UTS_NETERR_MSG 6 -+ -+/* unimplemented trap numbers for thread processor */ -+#define ELAN4_T_TRAP_INSTR(t) (0x80202000 | ((t) & 0xFF)) -+ -+#define ELAN4_T_SYSCALL_TRAP 1 -+# define ELAN4_T_OPEN 0 -+# define ELAN4_T_WRITE 1 -+# define ELAN4_T_READ 2 -+# define ELAN4_T_IOCTL 3 -+# define ELAN4_T_LSEEK 4 -+# define ELAN4_T_POLL 5 -+# define ELAN4_T_CLOSE 6 -+# define ELAN4_T_KILL 7 -+# define ELAN4_T_MMAP 8 -+# define ELAN4_T_MUNMAP 9 -+# define ELAN4_T_ABORT 100 -+# define ELAN4_T_DEBUG 101 -+# define ELAN4_T_REGDUMP 102 -+ -+#define ELAN4_T_REGDUMP_TRAP 2 -+ -+#define ELAN4_T_LIBELAN_TRAP 3 -+# define ELAN4_T_TPORT_NEWBUF 0 -+# define ELAN4_T_TPORT_GC 1 -+# define ELAN4_T_TPORT_DEBUG 2 -+ -+#define ELAN4_T_ALLOC_TRAP 4 -+# define ELAN4_T_ALLOC_ELAN 0 -+# define ELAN4_T_ALLOC_MAIN 1 -+# define ELAN4_T_FREE_ELAN 2 -+# define ELAN4_T_FREE_MAIN 3 -+ -+/* reserved main interrupt cookies */ -+#define ELAN4_INT_COOKIE_DDCQ 0 -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_USERTRAP_H */ -Index: linux-2.4.21/include/elan4/xsdram.h -=================================================================== ---- linux-2.4.21.orig/include/elan4/xsdram.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/elan4/xsdram.h 2005-06-01 23:12:54.747416608 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_XSDRAM_H -+#define __ELAN4_XSDRAM_H -+ -+#ident "@(#)$Id: xsdram.h,v 1.13 2004/03/05 12:32:04 jon Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/elan4hdr/xsdram.h,v $*/ -+ -+/* SAMSUNG K4H281638D-TCB3 */ -+ -+#define SDRAM_tRCF_1_SH 0 -+#define SDRAM_tRP_1_SH 4 -+#define SDRAM_tRCD_SH 8 -+#define SDRAM_tRRD_SH 12 -+#define SDRAM_tEndWr_SH 16 -+#define SDRAM_tEndRd_SH 20 -+#define SDRAM_Burst_SH 24 -+#define SDRAM_CL_SH 28 -+#define SDRAM_DsblBypass (1ULL << 31) -+#define SDRAM_RefreshRate_SH 32 -+#define SDRAM_RamSize_SH 34 -+#define SDRAM_ReadLtncy_1_SH 36 -+#define SDRAM_RdOffset_SH 40 -+#define SDRAM_FlightDelay_SH 42 -+ -+#define SDRAM_ENABLE_ECC (1ULL << 44) // Enables error detecting on the ECC. -+#define SDRAM_SDRAM_TESTING (1ULL << 45) // Switches to test mode for checking EEC data bits -+#define SDRAM_SETUP (1ULL << 46) // Writes SDram control reg when set. Also starts -+ -+#define SDRAM_CS_MODE0 0ULL // 64Mbit, 128Mbit, 256Mbit, 512Mbit or 1Gbit (16-bit output) -+#define SDRAM_CS_MODE1 1ULL // 64Mbit, 128Mbit, 256Mbit or 512Mbit (8-bit output) -+#define SDRAM_CS_MODE2 2ULL // 2Gbit (16-bit output) or 1Gbit (8-bit output) -+#define SDRAM_CS_MODE3 3ULL // 4Gbit (16-bit output) or 2Gbit (8-bit output) -+ -+#if defined(LINUX) && !defined(CONFIG_MPSAS) -+#define SDRAM_STARTUP_VALUE ((0xbULL << SDRAM_tRCF_1_SH) | (0x2ULL << SDRAM_tRP_1_SH) | \ -+ (0x3ULL << SDRAM_tRCD_SH) | (0x2ULL << SDRAM_tRRD_SH) | \ -+ (0xaULL << SDRAM_tEndWr_SH) | (0x6ULL << SDRAM_tEndRd_SH) | \ -+ (0x8ULL << SDRAM_Burst_SH) | (0x6ULL << SDRAM_CL_SH) | \ -+ (0x2ULL << SDRAM_RefreshRate_SH) | (0x3ULL << SDRAM_RamSize_SH) | \ -+ (0x1ULL << SDRAM_RdOffset_SH) | (0x1ULL << SDRAM_FlightDelay_SH) | \ -+ (0x4ULL << SDRAM_ReadLtncy_1_SH)) -+#else -+#define SDRAM_STARTUP_VALUE ((0xbULL << SDRAM_tRCF_1_SH) | (0x2ULL << SDRAM_tRP_1_SH) | \ -+ (0x3ULL << SDRAM_tRCD_SH) | (0x2ULL << SDRAM_tRRD_SH) | \ -+ (0xaULL << SDRAM_tEndWr_SH) | (0x6ULL << SDRAM_tEndRd_SH) | \ -+ (0x8ULL << SDRAM_Burst_SH) | (0x6ULL << SDRAM_CL_SH) | \ -+ (0x0ULL << SDRAM_RefreshRate_SH) | (0x0ULL << SDRAM_RamSize_SH) | \ -+ (0x1ULL << SDRAM_RdOffset_SH) | (0x1ULL << SDRAM_FlightDelay_SH) | \ -+ (0x4ULL << SDRAM_ReadLtncy_1_SH) | SDRAM_ENABLE_ECC | SDRAM_SETUP) -+#endif -+ -+#endif /* __ELAN4_XSDRAM_H */ -Index: linux-2.4.21/include/jtag/jtagio.h -=================================================================== ---- linux-2.4.21.orig/include/jtag/jtagio.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/jtag/jtagio.h 2005-06-01 23:12:54.747416608 -0400 -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: jtagio.h,v 1.7.8.1 2005/01/27 15:21:47 lee Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagio.h,v $*/ -+ -+ -+#ifndef __SYS_JTAGMOD_H -+#define __SYS_JTAGMOD_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define JTAG_MAX_CHIPS 8 -+#define JTAG_MAX_INSTR_LEN 8 -+#define JTAG_MAX_BITS (JTAG_MAX_CHIPS * JTAG_MAX_INSTR_LEN) -+#define JTAG_MAX_DATA_LEN 1024 -+ -+#define JTAG_BYPASS 0xFF -+ -+#define I2C_ADDR_LEN 7 /* 7 bits of address */ -+#define I2C_DATA_LEN 8 /* 8 bits of data */ -+#define I2C_MAX_DATA_LEN 9 /* and upto 9 bytes worth */ -+ -+#define BITS_PER_BYTE 8 -+#define JTAG_NBYTES(nbits) (((nbits)+BITS_PER_BYTE-1)/BITS_PER_BYTE) -+#define JTAG_BIT(v, num) (((v)[(num) / BITS_PER_BYTE] >> ((num) % BITS_PER_BYTE)) & 1) -+#define JTAG_SET_BIT(v, num) ((v)[(num) / BITS_PER_BYTE] |= (1 << ((num) % BITS_PER_BYTE))) -+#define JTAG_CLR_BIT(v, num) ((v)[(num) / BITS_PER_BYTE] &= ~(1 << ((num) % BITS_PER_BYTE))) -+ -+#define RING_CLOCK_CARD (0x3D) -+#define RING_CLOCK_SHIFT (0x3E) -+#define RING_JTAG_LOOPBACK (0x3F) -+#define RING_MAX (0x40) -+ -+#define RING_QUAD_BIT (0x40) -+#define RING_I2C_BIT (0x80) -+ -+#define VALID_JTAG_RING(ring) ((ring) < 0x20 || (ring) == RING_JTAG_LOOPBACK) -+#define VALID_I2C_RING(ring) ((ring) < 0x20 || (ring) == RING_CLOCK_CARD) -+ -+ -+typedef struct jtag_value -+{ -+ u_char bytes[JTAG_NBYTES(JTAG_MAX_DATA_LEN)]; -+} JTAG_VALUE; -+ -+/* arguements to JTAG_SHIFT_IR/JTAG_SHIFT_DR */ -+typedef struct jtag_reset_args -+{ -+ u_int ring; -+} JTAG_RESET_ARGS; -+ -+typedef struct jtag_shift_args -+{ -+ u_int ring; -+ u_int nbits; -+ u_char *value; -+} JTAG_SHIFT_ARGS; -+ -+typedef struct i2c_args -+{ -+ u_int ring; -+ u_int device; -+ u_int reg; -+ u_int count; -+ u_int ok; -+ u_char data[I2C_MAX_DATA_LEN]; -+} I2C_ARGS; -+ -+/* values for 'ok' - the return value from i2c_xx functions */ -+#define I2C_OP_SUCCESS 0 -+#define I2C_OP_ERROR 1 -+#define I2C_OP_NOT_IDLE 2 -+#define I2C_OP_NO_DEVICE 3 -+#define I2C_OP_WRITE_TO_BIG 4 -+#define I2C_OP_BAD_RESOURCE 5 -+ -+typedef struct i2c_clock_shift_args -+{ -+ u_int t; -+ u_int n; -+ u_int m; -+} I2C_CLOCK_SHIFT_ARGS; -+ -+#define JTAG_RESET _IOWR('j', '0', JTAG_RESET_ARGS) -+#define JTAG_SHIFT_IR _IOWR('j', '1', JTAG_SHIFT_ARGS) -+#define JTAG_SHIFT_DR _IOWR('j', '2', JTAG_SHIFT_ARGS) -+ -+#define I2C_CLOCK_SHIFT _IOWR('j', '4', I2C_CLOCK_SHIFT_ARGS) -+#define I2C_WRITE _IOWR('j', '5', I2C_ARGS) -+#define I2C_READ _IOWR('j', '6', I2C_ARGS) -+#define I2C_WRITEREG _IOWR('j', '7', I2C_ARGS) -+#define I2C_READREG _IOWR('j', '8', I2C_ARGS) -+ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __SYS_JTAGMOD_H */ -Index: linux-2.4.21/include/linux/coproc.h -=================================================================== ---- linux-2.4.21.orig/include/linux/coproc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/linux/coproc.h 2005-06-01 23:12:54.748416456 -0400 -@@ -0,0 +1,206 @@ -+/* -+ * Copyright (C) 2002, 2003 Quadrics Ltd. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * -+ */ -+ -+/* -+ * Callbacks for coprocessor page table updates. -+ */ -+ -+#ifndef __LINUX_COPROC_H__ -+#define __LINUX_COPROC_H__ -+ -+#include -+#include -+#include -+#include /* kmalloc */ -+ -+typedef struct coproc_ops_struct { -+ struct list_head list; -+ void *arg; -+ -+ void (*release)(void *arg, struct mm_struct *mm); -+ void (*sync_range)(void *arg, struct mm_struct *mm, unsigned long start, unsigned long end); -+ void (*invalidate_range)(void *arg, struct mm_struct *mm, unsigned long start, unsigned long end); -+ void (*update_range)(void *arg, struct mm_struct *mm, unsigned long start, unsigned long end); -+ -+ void (*change_protection)(void *arg, struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot); -+ -+ void (*sync_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ void (*invalidate_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ void (*update_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ -+} coproc_ops_t; -+ -+extern __inline__ void -+register_coproc_ops(struct mm_struct *mm, coproc_ops_t *cp) -+{ -+ if (mm->coproc_ops == NULL) { -+ mm->coproc_ops = (struct list_head *) -+ kmalloc(sizeof(struct list_head), GFP_KERNEL); -+ INIT_LIST_HEAD(mm->coproc_ops); -+ } -+ list_add(&cp->list, mm->coproc_ops); -+} -+ -+extern __inline__ void -+unregister_coproc_ops(struct mm_struct *mm, coproc_ops_t *cp) -+{ -+ list_del(&cp->list); -+ if (list_empty(mm->coproc_ops)) { -+ kfree(mm->coproc_ops); -+ mm->coproc_ops = NULL; -+ } -+} -+ -+extern __inline__ void -+coproc_release(struct mm_struct *mm) -+{ -+ struct list_head *head = mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ while (! list_empty(head)) { -+ lp = head->next; -+ cp = list_entry(lp, coproc_ops_t, list); -+ -+ list_del (&cp->list); -+ -+ if (cp->release) -+ cp->release(cp->arg, mm); -+ } -+ kfree(head); -+ mm->coproc_ops = NULL; -+ } -+} -+ -+extern __inline__ void -+coproc_sync_range(struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ struct list_head *head = mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ for (lp = head->next; lp != head; lp = lp->next) { -+ cp = list_entry(lp, coproc_ops_t, list); -+ if (cp->sync_range) -+ cp->sync_range(cp->arg, mm, start, end); -+ } -+ } -+} -+ -+extern __inline__ void -+coproc_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ struct list_head *head = mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ for (lp = head->next; lp != head; lp = lp->next) { -+ cp = list_entry(lp, coproc_ops_t, list); -+ if (cp->invalidate_range) -+ cp->invalidate_range(cp->arg, mm, start, end); -+ } -+ } -+} -+ -+extern __inline__ void -+coproc_update_range(struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ struct list_head *head = mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ for (lp = head->next; lp != head; lp = lp->next) { -+ cp = list_entry(lp, coproc_ops_t, list); -+ if (cp->update_range) -+ cp->update_range(cp->arg, mm, start, end); -+ } -+ } -+} -+ -+extern __inline__ void -+coproc_change_protection (struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ struct list_head *head = mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ for (lp = head->next; lp != head; lp = lp->next) { -+ cp = list_entry(lp, coproc_ops_t, list); -+ if (cp->change_protection) -+ cp->change_protection(cp->arg, mm, start, end, newprot); -+ } -+ } -+} -+ -+extern __inline__ void -+coproc_sync_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct list_head *head = vma->vm_mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ for (lp = head->next; lp != head; lp = lp->next) { -+ cp = list_entry(lp, coproc_ops_t, list); -+ if (cp->sync_page) -+ cp->sync_page(cp->arg, vma, addr); -+ } -+ } -+} -+ -+extern __inline__ void -+coproc_invalidate_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct list_head *head = vma->vm_mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ for (lp = head->next; lp != head; lp = lp->next) { -+ cp = list_entry(lp, coproc_ops_t, list); -+ if (cp->invalidate_page) -+ cp->invalidate_page(cp->arg, vma, addr); -+ } -+ } -+} -+ -+extern __inline__ void -+coproc_update_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct list_head *head = vma->vm_mm->coproc_ops; -+ struct list_head *lp; -+ coproc_ops_t *cp; -+ -+ if (head) { -+ for (lp = head->next; lp != head; lp = lp->next) { -+ cp = list_entry(lp, coproc_ops_t, list); -+ if (cp->update_page) -+ cp->update_page(cp->arg, vma, addr); -+ } -+ } -+} -+ -+ -+#endif /* __LINUX_COPROC_H__ */ -Index: linux-2.4.21/include/linux/ptrack.h -=================================================================== ---- linux-2.4.21.orig/include/linux/ptrack.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/linux/ptrack.h 2005-06-01 23:12:54.748416456 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * Derived from exit_actn.c by -+ * Copyright (C) 2003 Quadrics Ltd. -+ * -+ */ -+#ifndef __LINUX_PTRACK_H -+#define __LINUX_PTRACK_H -+ -+/* -+ * Process tracking - this allows a module to keep track of processes -+ * in order that it can manage all tasks derived from a single process. -+ */ -+ -+#define PTRACK_PHASE_CLONE 1 -+#define PTRACK_PHASE_CLONE_FAIL 2 -+#define PTRACK_PHASE_EXEC 3 -+#define PTRACK_PHASE_EXIT 4 -+ -+typedef int (*ptrack_callback_t)(void *arg, int phase, struct task_struct *child); -+ -+#define PTRACK_FINISHED 0 -+#define PTRACK_INNHERIT 1 -+#define PTRACK_DENIED 2 -+ -+struct ptrack_desc { -+ struct list_head link; -+ ptrack_callback_t callback; -+ void *arg; -+}; -+ -+extern int ptrack_register (ptrack_callback_t callback, void *arg); -+extern void ptrack_deregister (ptrack_callback_t callback, void *arg); -+extern int ptrack_registered (ptrack_callback_t callback, void *arg); -+ -+extern int ptrack_call_callbacks (int phase, struct task_struct *child); -+ -+#endif /* __LINUX_PTRACK_H */ -Index: linux-2.4.21/include/linux/sched.h -=================================================================== ---- linux-2.4.21.orig/include/linux/sched.h 2005-06-01 22:52:05.000000000 -0400 -+++ linux-2.4.21/include/linux/sched.h 2005-06-01 23:12:54.749416304 -0400 -@@ -30,6 +30,8 @@ - #include - #include - -+#include -+ - struct exec_domain; - extern int exec_shield; - extern int exec_shield_randomize; -@@ -322,6 +324,9 @@ - #endif - /* Architecture-specific MM context */ - mm_context_t context; -+ -+ /* Support page table updates on adapter cards with on-board MMU */ -+ struct list_head *coproc_ops; - - /* coredumping support */ - int core_waiters; -@@ -342,6 +347,7 @@ - mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \ - page_table_lock: SPIN_LOCK_UNLOCKED, \ - mmlist: LIST_HEAD_INIT(name.mmlist), \ -+ coproc_ops: NULL, \ - rlimit_rss: RLIM_INFINITY, \ - } - -@@ -572,6 +578,9 @@ - /* context-switch lock */ - spinlock_t switch_lock; - -+/* process tracking callbacks */ -+ struct list_head ptrack_list; -+ - /* journalling filesystem info */ - void *journal_info; - -@@ -740,6 +749,7 @@ - blocked: {{0}}, \ - alloc_lock: SPIN_LOCK_UNLOCKED, \ - switch_lock: SPIN_LOCK_UNLOCKED, \ -+ ptrack_list: LIST_HEAD_INIT(tsk.ptrack_list), \ - journal_info: NULL, \ - real_stack: &tsk, \ - } -Index: linux-2.4.21/include/qsnet/autoconf.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/autoconf.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/autoconf.h 2005-06-01 23:12:54.750416152 -0400 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * NOTE: This file has been automatically generated: -+ * node : milano -+ * kernel : /src/linux/qsnet/linux-2.4.21 -+ * date : Wed May 4 18:24:23 EDT 2005 -+ * -+ */ -+ -+#include -+#undef NO_RMAP -+#define AC -+#undef NO_O1_SCHED -+#undef NO_NPTL -+#define NO_ABI -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+#define PROCESS_ACCT -+#endif -+#undef RSS_ATOMIC -+#undef NO_COPROC -+#define NO_IOPROC -+#undef NO_PTRACK -+#define NO_PANIC_NOTIFIER -+#undef NO_SHM_CLEANUP -+#undef NO_PDE -+ -+ -+#define CONFIG_EIP -+#define CONFIG_ELAN -+#define CONFIG_ELAN3 -+#define CONFIG_ELAN4 -+#define CONFIG_EP -+#define CONFIG_JTAG -+#define CONFIG_QSNET -+#define CONFIG_RMS -Index: linux-2.4.21/include/qsnet/condvar.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/condvar.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/condvar.h 2005-06-01 23:12:54.750416152 -0400 -@@ -0,0 +1,140 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+#if !defined(_LINUX_CONDVAR_H) -+#define _LINUX_CONDVAR_H -+ -+#if defined(__KERNEL__) -+ -+#include -+#include -+ -+#define CV_RET_SIGPENDING 0 -+#define CV_RET_TIMEOUT (-1) -+#define CV_RET_NORMAL 1 -+ -+struct kcondvar_task { -+ struct task_struct *task; /* need to wrap task in this */ -+ struct list_head list; /* to thread as a list */ -+ int blocked; -+}; -+ -+typedef struct { -+ struct list_head task_list; /* list of kcondvar_task's */ -+} kcondvar_t; -+ -+#define kcondvar_wait(c,l,fl) debug_kcondvar_wait(c, l, fl, 0, TASK_UNINTERRUPTIBLE) -+#define kcondvar_waitsig(c,l,fl) debug_kcondvar_wait(c, l, fl, 0, TASK_INTERRUPTIBLE) -+#define kcondvar_timedwait(c,l,fl,to) debug_kcondvar_wait(c, l, fl, to, TASK_UNINTERRUPTIBLE) -+#define kcondvar_timedwaitsig(c,l,fl,to) debug_kcondvar_wait(c, l, fl, to, TASK_INTERRUPTIBLE) -+#define kcondvar_wakeupone(c,l) kcondvar_wakeup(c, l, 0) -+#define kcondvar_wakeupall(c,l) kcondvar_wakeup(c, l, 1) -+ -+extern __inline__ void -+kcondvar_init(kcondvar_t *c) -+{ -+ INIT_LIST_HEAD(&c->task_list); -+} -+ -+extern __inline__ void -+kcondvar_destroy(kcondvar_t *c) -+{ -+ ASSERT(list_empty(&c->task_list)); -+} -+ -+/* -+ * We thread a struct kcondvar_task, allocated on the stack, onto the kcondvar_t's -+ * task_list, and take it off again when we wake up. -+ */ -+extern __inline__ int -+debug_kcondvar_wait(kcondvar_t *c, spinlock_t *l, unsigned long *fl, long tmo, int state) -+{ -+ struct kcondvar_task cvt; -+ int ret = CV_RET_NORMAL; -+ -+ ASSERT(!in_interrupt()); /* we can block */ -+ ASSERT(SPINLOCK_HELD(l)); /* enter holding lock */ -+ -+ cvt.task = current; -+ cvt.blocked = 1; -+ list_add(&cvt.list, &c->task_list); -+ do { -+ /* Note: we avoid using TASK_UNINTERRUPTIBLE here because avenrun() -+ * (linux/kernel/timer.c:calc_load()) -+ * computation treats it like TASK_RUNNABLE hence creates false high -+ * load averages when we create kernel threads. -+ * The cvt.blocked flag distinguishes a signal wakeup from a kcondvar_wakeup. -+ * -+ * However, if we do take a signal we could end up busily spinning here, if -+ * we ignore it (state == TASK_UNINTERRUPTIBLE) so once we see a signal -+ * pending we do sleep TASK_UNINTERRUPTIBLE to stop a busy spin. -+ * I have now blocked all signals for kernel threads to prevent this -+ * happening but other users of kcondvar_wait may still hit this spin. -+ */ -+ set_current_state (signal_pending(current) ? state : TASK_INTERRUPTIBLE); -+ -+ if (fl) -+ spin_unlock_irqrestore(l, *fl); -+ else -+ spin_unlock(l); -+ if (tmo) { -+ if (tmo <= jiffies || !schedule_timeout(tmo - jiffies)) -+ ret = CV_RET_TIMEOUT; -+ } else -+ schedule(); -+ if (fl) -+ spin_lock_irqsave (l, *fl); -+ else -+ spin_lock(l); -+ -+ /* signal_pending - Only exit the loop if the user was waiting TASK_INTERRUPTIBLE */ -+ if ((state == TASK_INTERRUPTIBLE) && signal_pending(current)) -+ ret = CV_RET_SIGPENDING; -+ -+ } while (cvt.blocked && ret == CV_RET_NORMAL); -+ list_del(&cvt.list); -+ -+ /* Reset task state in case we didn't sleep above */ -+ set_current_state (TASK_RUNNING); -+ -+ return ret; /* return holding lock */ -+} -+ -+extern __inline__ void -+kcondvar_wakeup(kcondvar_t *c, spinlock_t *l, int wakeall) -+{ -+ struct list_head *lp; -+ struct kcondvar_task *cvtp; -+ -+ ASSERT(SPINLOCK_HELD(l)); /* already holding lock */ -+ for (lp = c->task_list.next; lp != &c->task_list; lp = lp->next) { -+ cvtp = list_entry(lp, struct kcondvar_task, list); -+ if (cvtp->blocked) { -+ cvtp->blocked = 0; -+ /* wake_up_process added to kernel/ksyms.c */ -+ wake_up_process(cvtp->task); -+ if (!wakeall) -+ break; -+ } -+ } -+} /* return still holding lock */ -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_CONDVAR_H */ -Index: linux-2.4.21/include/qsnet/config.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/config.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/config.h 2005-06-01 23:12:54.751416000 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _QSNET_CONFIG_H -+#define _QSNET_CONFIG_H -+ -+#ident "$Id: config.h,v 1.23 2003/07/24 21:31:19 robin Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/config.h,v $*/ -+ -+ -+/* -+ * QSNET standard defines : -+ * -+ * Target operating system defines -+ * SOLARIS -+ * TRU64UNIX/DIGITAL_UNIX -+ * LINUX -+ * -+ * Target processor defines -+ * SPARC -+ * ALPHA -+ * I386 -+ * IA64 -+ * X86_64 -+ * -+ * Byte order defines -+ * __LITTLE_ENDIAN__ -+ * __BIG_ENDIAN__ -+ * -+ * Data size defines -+ * _LP64 - LP64 - long/pointer is 64 bits -+ * _ILP32 - LP32 - long/pointer is 32 bits -+ * -+ * Elan defines for main processor -+ * __MAIN_LITTLE_ENDIAN__ - main byte order (for thread code) -+ * __MAIN_BIG_ENDIAN__ -+ * _MAIN_LP64 - main long size (for thread code) -+ * _MAIN_ILP32 -+ * -+ * Compiling for kernel (defined in makefile) -+ * _KERNEL -+ * -+ */ -+ -+#if defined(__LP64__) && !defined(_LP64) -+# define _LP64 -+#endif -+ -+#if defined(__arch64__) && !defined(_LP64) && !defined(_ILP32) -+# define _LP64 -+#endif -+ -+#if defined(__alpha__) && !defined(_LP64) && !defined(_ILP32) -+# define _LP64 -+#endif -+ -+#if !defined(__arch64__) && !defined(_ILP32) && !defined(_LP64) -+# define _ILP32 -+#endif -+ -+#if defined(__ELAN__) || defined(__ELAN3__) -+ -+#define __LITTLE_ENDIAN__ -+ -+#if defined(__host_solaris) && defined(__host_sparc) -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#define _MAIN_ILP32 -+#define __MAIN_BIG_ENDIAN__ -+ -+#elif defined(__host_osf) -+#define TRU64UNIX -+#define DIGITAL_UNIX -+#define ALPHA -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_alpha) -+#define LINUX -+#define ALPHA -+#define LINUX_ALPHA -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_sparc) -+#define LINUX -+#define SPARC -+#define LINUX_SPARC -+#define __MAIN_BIG_ENDIAN__ -+#ifdef __KERNEL__ -+# define _MAIN_LP64 -+#else -+# define _MAIN_ILP32 -+#endif -+ -+#elif defined(__host_linux) && defined(__host_i386) -+#define LINUX -+#define I386 -+#define LINUX_I386 -+#define _MAIN_ILP32 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_ia64) -+#define LINUX -+#define IA64 -+#define LINUX_IA64 -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_x86_64) -+#define LINUX -+#define X86_64 -+#define LINUX_X86_64 -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#else -+#error Cannot determine operating system/processor architecture. -+#endif -+ -+#else /* !defined(__ELAN3__) */ -+ -+#if (defined(sun) || defined(__sun)) && defined(sparc) && !defined(__sparcv9) /* Sun Solaris 5.6 */ -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#ifndef __BIG_ENDIAN__ -+#define __BIG_ENDIAN__ -+#endif -+ -+#elif (defined(sun) || defined(__sun)) && defined(sparc) && defined(__sparcv9) /* Sun Solaris 5.7 */ -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#define __BIG_ENDIAN__ -+ -+#elif defined(__osf__) && defined(__alpha) /* Digital Unix */ -+#define TRU64UNIX -+#define DIGITAL_UNIX -+#define ALPHA -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__alpha) /* Linux Alpha */ -+ -+#define LINUX -+#define ALPHA -+#define LINUX_ALPHA -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__sparc) /* Linux Sparc */ -+ -+#define LINUX -+#define SPARC -+#define LINUX_SPARC -+#define __BIG_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__i386) /* Linux i386 */ -+ -+#define LINUX -+#define I386 -+#define LINUX_I386 -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__ia64) /* Linux ia64 */ -+ -+#define LINUX -+#define IA64 -+#define LINUX_IA64 -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__x86_64) /* Linux x86_64 */ -+ -+#define LINUX -+#define X86_64 -+#define LINUX_X86_64 -+#define __LITTLE_ENDIAN__ -+ -+#elif defined(__QNXNTO__) -+#define QNX -+#define I386 -+#define __LITTLE_ENDIAN__ -+#else -+#error Cannot determine operating system/processor architecture. -+#endif -+ -+#endif -+ -+#include -+ -+#endif /* _QSNET_CONFIG_H */ -Index: linux-2.4.21/include/qsnet/crwlock.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/crwlock.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/crwlock.h 2005-06-01 23:12:54.751416000 -0400 -@@ -0,0 +1,207 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+/* -+ * Complex - Reader/Writer locks -+ * Ref: "UNIX Systems for Modern Architectures", by Curt Schimmel, -+ * sec 11.6.3. -+ * -+ * This implementation is based on semaphores and may not be called from -+ * interrupt handlers. -+ * -+ */ -+ -+#if !defined(_LINUX_RWLOCK_H) -+#define _LINUX_RWLOCK_H -+ -+#if defined(__KERNEL__) -+ -+typedef enum { RD, WRT, ANY } crwlock_type_t; -+ -+#define crwlock_write_held(l) debug_crwlock_held(l, WRT, __BASE_FILE__,__LINE__) -+#define crwlock_read_held(l) debug_crwlock_held(l, RD, __BASE_FILE__, __LINE__) -+#define crwlock_held(l) debug_crwlock_held(l, ANY, __BASE_FILE__, __LINE__) -+ -+#define crwlock_read(l) debug_crwlock_read(l, __BASE_FILE__, __LINE__) -+#define crwlock_write(l) debug_crwlock_write(l, __BASE_FILE__, __LINE__) -+#define crwlock_done(l) debug_crwlock_done(l, __BASE_FILE__, __LINE__) -+ -+#if defined(DEBUG_RWLOCK) && defined(__alpha__) && !defined(DEBUG_SPINLOCK) -+#define DEBUG_SPINLOCK -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if !defined(DEBUG_SPINLOCK) -+#define debug_spin_lock(lock, file, line) spin_lock(lock) -+#endif -+ -+typedef struct { -+ spinlock_t m_lock; /* protects cnt fields below */ -+ int m_rdcnt; /* # of rdrs in crit section */ -+ int m_wrcnt; /* # of wrtrs in crit section */ -+ int m_rdwcnt; /* # of waiting readers */ -+ int m_wrwcnt; /* # of waiting writers */ -+ struct semaphore m_rdwait; /* sema where readers wait */ -+ struct semaphore m_wrwait; /* sema where writers wait */ -+ pid_t m_wrholder; /* task holding write lock */ -+} crwlock_t; -+ -+extern __inline__ void -+crwlock_init(crwlock_t *l) -+{ -+ l->m_lock = SPIN_LOCK_UNLOCKED; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0) -+ l->m_rdwait = MUTEX_LOCKED; -+ l->m_wrwait = MUTEX_LOCKED; -+#else -+ sema_init(&l->m_rdwait,0); -+ sema_init(&l->m_wrwait,0); -+#endif -+ l->m_rdcnt = l->m_wrcnt = l->m_rdwcnt = l->m_wrwcnt = 0; -+ l->m_wrholder = PID_NONE; -+} -+ -+extern __inline__ void -+crwlock_destroy(crwlock_t *l) -+{ -+ ASSERT(l->m_rdcnt == 0 && l->m_wrcnt == 0); -+} -+ -+/* -+ * If a writer has the lock presently or there are writers waiting, -+ * then we have to wait. -+ */ -+extern __inline__ void -+debug_crwlock_read(crwlock_t *l, char *file, int line) -+{ -+ ASSERT(!in_interrupt()); -+ spin_lock(&l->m_lock); -+ if (l->m_wrcnt || l->m_wrwcnt) { -+ l->m_rdwcnt++; -+ spin_unlock(&l->m_lock); -+ down(&l->m_rdwait); /* P */ -+ } else { -+ l->m_rdcnt++; -+ spin_unlock(&l->m_lock); -+ } -+} -+ -+/* -+ * If we're the last reader, and a writer is waiting, -+ * then let the writer go now. -+ */ -+/* private */ -+extern __inline__ void -+debug_crwlock_read_done(crwlock_t *l, char *file, int line) -+{ -+ spin_lock(&l->m_lock); -+ l->m_rdcnt--; -+ if (l->m_wrwcnt && l->m_rdcnt == 0) { -+ l->m_wrcnt = 1; -+ l->m_wrwcnt--; -+ spin_unlock(&l->m_lock); -+ up(&l->m_wrwait); /* V */ -+ return; -+ } -+ spin_unlock(&l->m_lock); -+} -+ -+extern __inline__ void -+debug_crwlock_write(crwlock_t *l, char *file, int line) -+{ -+ ASSERT(!in_interrupt()); -+ spin_lock(&l->m_lock); -+ if (l->m_wrcnt || l->m_rdcnt) { /* block if lock is in use */ -+ l->m_wrwcnt++; -+ spin_unlock(&l->m_lock); -+ down(&l->m_wrwait); /* P */ -+ } else { /* lock is not in use */ -+ l->m_wrcnt = 1; -+ spin_unlock(&l->m_lock); -+ } -+ l->m_wrholder = current->pid; -+} -+ -+/* private */ -+extern __inline__ void -+debug_crwlock_write_done(crwlock_t *l, char *file, int line) -+{ -+ int rdrs; -+ -+ spin_lock(&l->m_lock); -+ l->m_wrholder = PID_NONE; -+ if (l->m_rdwcnt) { /* let any readers go first */ -+ l->m_wrcnt = 0; -+ rdrs = l->m_rdwcnt; -+ l->m_rdcnt = rdrs; -+ l->m_rdwcnt = 0; -+ spin_unlock(&l->m_lock); -+ while (rdrs--) -+ up(&l->m_rdwait); /* V */ -+ } else if (l->m_wrwcnt) { /* or let any writer go */ -+ l->m_wrwcnt--; -+ spin_unlock(&l->m_lock); -+ up(&l->m_wrwait); /* V */ -+ } else { /* nobody waiting, unlock */ -+ l->m_wrcnt = 0; -+ spin_unlock(&l->m_lock); -+ } -+} -+ -+extern __inline__ void -+debug_crwlock_done(crwlock_t *l, char *file, int line) -+{ -+ if (l->m_wrholder == current->pid) -+ debug_crwlock_write_done(l, file, line); -+ else -+ debug_crwlock_read_done(l, file, line); -+} -+ -+/* -+ * Return nonzero if lock is held -+ */ -+extern __inline__ int -+debug_crwlock_held(crwlock_t *l, crwlock_type_t t, char *file, int line) -+{ -+ int res; -+ -+ spin_lock(&l->m_lock); -+ switch(t) { -+ case RD: -+ res = l->m_rdcnt; -+ break; -+ case WRT: -+ res = l->m_wrcnt; -+ break; -+ case ANY: -+ res = l->m_wrcnt + l->m_rdcnt; -+ break; -+ } -+ spin_unlock(&l->m_lock); -+ -+ return res; -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_RWLOCK_H */ -Index: linux-2.4.21/include/qsnet/ctrl_linux.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/ctrl_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/ctrl_linux.h 2005-06-01 23:12:54.751416000 -0400 -@@ -0,0 +1,37 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_CTRL_LINUX_H -+#define __QSNET_CTRL_LINUX_H -+ -+#ident "$Id: ctrl_linux.h,v 1.3 2003/03/26 09:32:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/ctrl_linux.h,v $*/ -+ -+#define QSNETIO_USER_BASE 0x40 -+ -+#define QSNETIO_DEBUG_DUMP _IO ('e', QSNETIO_USER_BASE + 0) -+ -+typedef struct qsnetio_debug_buffer_struct -+{ -+ caddr_t addr; -+ size_t len; -+} QSNETIO_DEBUG_BUFFER_STRUCT; -+#define QSNETIO_DEBUG_BUFFER _IOWR ('e', QSNETIO_USER_BASE + 1, QSNETIO_DEBUG_BUFFER_STRUCT) -+ -+typedef struct qsnetio_debug_kmem_struct -+{ -+ void *handle; -+} QSNETIO_DEBUG_KMEM_STRUCT; -+#define QSNETIO_DEBUG_KMEM _IOWR ('e', QSNETIO_USER_BASE + 2, QSNETIO_DEBUG_KMEM_STRUCT) -+ -+#endif /* __QSNET_CTRL_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/debug.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/debug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/debug.h 2005-06-01 23:12:54.752415848 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+#ifndef _QSNET_DEBUG_H -+#define _QSNET_DEBUG_H -+ -+#if defined(DIGITAL_UNIX) -+#include -+#elif defined(LINUX) -+extern int qsnet_assfail (char *ex, const char *func, char *file, int line); -+ -+#define ASSERT(EX) do { \ -+ if (!(EX) && qsnet_assfail (#EX, __FUNCTION__, __BASE_FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#endif /* DIGITAL_UNIX */ -+ -+/* debug.c */ -+extern void qsnet_debug_init(void); -+extern void qsnet_debug_fini(void); -+extern void qsnet_debug_disable(int); -+extern void qsnet_debug_alloc(void); -+ -+#define QSNET_DEBUG_BUFFER ((unsigned int)(0x01)) -+#define QSNET_DEBUG_CONSOLE ((unsigned int)(0x02)) -+#define QSNET_DEBUG_BUF_CON ( QSNET_DEBUG_BUFFER | QSNET_DEBUG_CONSOLE ) -+ -+#ifdef __GNUC__ -+extern void qsnet_debugf (unsigned int mode, char *fmt, ...) -+ __attribute__ ((format (printf,2,3))); -+extern void kqsnet_debugf (char *fmt, ...) -+ __attribute__ ((format (printf,1,2))); -+#else -+extern void qsnet_debugf (unsigned int mode, char *fmt, ...); -+extern void kqsnet_debugf (char *fmt, ...); -+#endif -+extern void qsnet_vdebugf (unsigned int mode, char * prefix, char *fmt, va_list ap); -+extern int qsnet_debug_buffer(caddr_t ubuffer, int len); -+extern int qsnet_debug_dump (void); -+extern int qsnet_debug_kmem (void *handle); -+ -+extern void qsnet_debug_buffer_on(void); -+extern void qsnet_debug_buffer_clear(void); -+extern void qsnet_debug_buffer_mark(char *str); -+ -+#endif /* _QSNET_DEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/fence.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/fence.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/fence.h 2005-06-01 23:12:54.752415848 -0400 -@@ -0,0 +1,178 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* $Id: fence.h,v 1.21.6.4 2004/11/23 14:34:45 addy Exp $ */ -+/* $Source: /cvs/master/quadrics/qsnet/fence.h,v $*/ -+ -+#ifndef _CONFIG_FENCE_H -+#define _CONFIG_FENCE_H -+ -+#ident "$Id: fence.h,v 1.21.6.4 2004/11/23 14:34:45 addy Exp $" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if defined(__ELAN__) || defined(__ELAN3__) -+ -+/* no memory barriers required on elan3/elan4 */ -+ -+#elif defined QSNET_MEMBARS_ASSERT -+ -+#include -+#define MEMBAR_MEMISSUE() assert(0); -+#define MEMBAR_SYNC() assert(0); -+#define MEMBAR_STORELOAD() assert(0); -+#define MEMBAR_LOADSTORE() assert(0); -+#define MEMBAR_STORESTORE() assert(0); -+#define MEMBAR_LOADLOAD() assert(0); -+#define MEMBAR_VISIBLE() assert(0); -+#define MEMBAR_DRAIN() assert(0); -+ -+#elif defined(__alpha) -+ -+/* Memory barrier instructions */ -+#if defined(__DECC) || defined(__DECXX) -+long asm( const char *,...); -+#pragma intrinsic( asm ) -+#define MEMBAR_MEMISSUE() asm("mb") -+#define MEMBAR_SYNC() asm("mb") -+#define MEMBAR_STORELOAD() asm("wmb") -+#define MEMBAR_LOADSTORE() asm("mb") -+#define MEMBAR_STORESTORE() asm("wmb") -+#define MEMBAR_LOADLOAD() asm("mb") -+#define MEMBAR_VISIBLE() asm("") -+#define MEMBAR_DRAIN() asm("wmb") -+ -+#else -+/* Assume gcc */ -+#define MEMBAR_MEMISSUE() asm volatile ("mb"::) -+#define MEMBAR_SYNC() asm volatile ("mb"::) -+#define MEMBAR_STORELOAD() asm volatile ("wmb"::) -+#define MEMBAR_LOADSTORE() asm volatile ("mb"::) -+#define MEMBAR_STORESTORE() asm volatile ("wmb"::) -+#define MEMBAR_LOADLOAD() asm volatile ("mb"::) -+#define MEMBAR_VISIBLE() asm volatile ("" ::: "memory") -+#define MEMBAR_DRAIN() asm volatile ("wmb"::: "memory") -+ -+#endif /* __DECC */ -+ -+#elif defined(__sparc) -+ -+/* UltraSPARC with WRITE MERGING enabled */ -+#define MEMBAR_MEMISSUE() asm volatile ("membar #MemIssue"); -+#define MEMBAR_SYNC() asm volatile ("membar #Sync"); -+#define MEMBAR_STORELOAD() asm volatile ("membar #StoreLoad"); -+#define MEMBAR_LOADSTORE() asm volatile ("membar #LoadStore"); -+#define MEMBAR_STORESTORE() asm volatile ("membar #StoreStore"); -+#define MEMBAR_LOADLOAD() asm volatile ("membar #LoadLoad"); -+#define MEMBAR_VISIBLE() asm volatile (""::: "memory") -+#define MEMBAR_DRAIN() asm volatile (""::: "memory") -+ -+#elif defined(__linux__) -+ -+#if defined(__INTEL_COMPILER) -+ -+/* NB: Intel compiler version 8.0 now also defines __GNUC__ unless you set the -no-gcc cmdline option -+ * I've moved the check for __INTEL_COMPILER to be first to get around this -+ */ -+#ifdef __ECC -+ -+#include -+ -+#define MEMBAR_MEMISSUE() __mf() -+#define MEMBAR_SYNC() __mf() -+#define MEMBAR_STORELOAD() __mf() -+#define MEMBAR_LOADSTORE() __mf() -+#define MEMBAR_STORESTORE() __mf() -+#define MEMBAR_LOADLOAD() __mf() -+#define MEMBAR_VISIBLE() __mf() -+#define MEMBAR_DRAIN() __mf() -+ -+#else -+ -+#warning Membars not implemented with this compiler. -+#define MEMBAR_MEMISSUE() ; -+#define MEMBAR_SYNC() ; -+#define MEMBAR_STORELOAD() ; -+#define MEMBAR_LOADSTORE() ; -+#define MEMBAR_STORESTORE() ; -+#define MEMBAR_LOADLOAD() ; -+#define MEMBAR_VISIBLE() ; -+#define MEMBAR_DRAIN() ; -+ -+#endif /* __ECC */ -+ -+#elif defined(__GNUC__) -+ -+#ifndef __ia64 -+ -+/* These are needed by on AMD64 */ -+#include -+#include -+ -+#ifndef __cplusplus -+/* this header file has a parameter called "new" - great huh */ -+#include -+#endif -+ -+#else -+# define mb() __asm__ __volatile__ ("mf" ::: "memory") -+# define rmb() mb() -+# define wmb() mb() -+#endif /* !__ia64 */ -+ -+#if defined(__x86_64) || defined(__i386) -+/* For some reason the AMD64 definition (glibc-devel 2.3.X) of this -+ * is not useful (compiler only directive) so we overload it here -+ */ -+/* I don't trust the IA32 header files either as with mtrr enabled -+ * we really need a membar and not a compiler directive -+ * NB: sfence is only available with X86_FEATURE_XMM CPUs -+ */ -+#undef wmb -+#define wmb() asm volatile("sfence":::"memory"); -+#endif /* __x86_64 */ -+ -+#define MEMBAR_MEMISSUE() mb() -+#define MEMBAR_SYNC() mb() -+#define MEMBAR_STORELOAD() wmb() -+#define MEMBAR_LOADSTORE() mb() -+#define MEMBAR_STORESTORE() wmb() -+#define MEMBAR_LOADLOAD() mb() -+ -+#ifdef __ia64 -+#define MEMBAR_VISIBLE() asm volatile ("mf.a;;mf;;"::: "memory") -+#define MEMBAR_DRAIN() asm volatile ("mf;"::: "memory") -+#else -+#define MEMBAR_VISIBLE() asm volatile (""::: "memory") -+#define MEMBAR_DRAIN() wmb() -+#endif -+ -+#else /* elif __GNUC__ */ -+ -+#error Membars not implemented for this architecture/compiler. -+ -+#endif /* __INTEL_COMPILER */ -+ -+#else /* elif __linux__ */ -+ -+#error Membars not implemented for this architecture/compiler. -+ -+#endif -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _CONFIG_FENCE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/kernel.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/kernel.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/kernel.h 2005-06-01 23:12:54.752415848 -0400 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KERNEL_H -+#define __QSNET_KERNEL_H -+ -+#ident "$Id: kernel.h,v 1.8 2003/03/14 10:18:22 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel.h,v $*/ -+ -+#include -+#include -+ -+#if defined(SOLARIS) -+#include -+#endif -+ -+#if defined(DIGITAL_UNIX) -+#include -+#endif -+ -+#if defined(LINUX) -+#include -+#endif -+ -+#include -+ -+#endif /* __QSNET_KERNEL_H */ -+ -+ -+ -+ -+ -+ -+ -Index: linux-2.4.21/include/qsnet/kernel_linux.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/kernel_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/kernel_linux.h 2005-06-01 23:12:54.753415696 -0400 -@@ -0,0 +1,354 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KERNEL_LINUX_H -+#define __QSNET_KERNEL_LINUX_H -+ -+#ident "$Id: kernel_linux.h,v 1.62.6.5 2005/01/18 14:37:22 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel_linux.h,v $*/ -+ -+#if defined(MODVERSIONS) -+#include -+#endif -+ -+#include -+#include -+ -+ -+/* ASSERT(spin_is_locked(l)) would always fail on UP kernels */ -+#if defined(CONFIG_SMP) -+#define SPINLOCK_HELD(l) spin_is_locked(l) -+#else -+#define SPINLOCK_HELD(l) (1) -+#endif -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include /* Quadrics added */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#if defined(LINUX_ALPHA) -+# include /* for TSUNAMI_MEM */ -+#endif -+ -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0) -+# undef MOD_INC_USE_COUNT -+# undef MOD_DEC_USE_COUNT -+# define MOD_INC_USE_COUNT -+# define MOD_DEC_USE_COUNT -+#endif -+ -+#define MIN(a,b) ((a) > (b) ? (b) : (a)) -+#define MAX(a,b) ((a) > (b) ? (a) : (b)) -+ -+/* stray types */ -+typedef u64 u_longlong_t; -+typedef unsigned long uintptr_t; -+typedef int bool_t; -+ -+typedef unsigned long virtaddr_t; /* virtual address */ -+typedef unsigned long ioaddr_t; /* io address */ -+typedef unsigned long sdramaddr_t; /* elan sdram offset */ -+ -+/* 386 kernel can be compiled with PAE enabled to use a 44 bit physical address */ -+#if defined(CONFIG_X86_PAE) -+typedef unsigned long long physaddr_t; -+#else -+typedef unsigned long physaddr_t; -+#endif -+ -+/* ticks since reboot, and tick freq */ -+#define lbolt jiffies -+#define hz HZ -+ -+/* System page size and friends */ -+#define PAGESIZE PAGE_SIZE -+#define PAGESHIFT PAGE_SHIFT -+#define PAGEOFFSET (PAGE_SIZE - 1) -+#define PAGEMASK PAGE_MASK -+ -+#define PAGE_ALIGNED(a) (((a) & PAGE_MASK) == a) -+ -+/* convert between bytes and pages */ -+#define btop(b) ((unsigned long)(b) >> PAGE_SHIFT) /* rnd down */ -+#define btopr(b) btop(PAGE_ALIGN((unsigned long) b)) /* rnd up */ -+#define ptob(p) ((unsigned long)(p) << PAGE_SHIFT) -+ -+/* round up sz to the nearest multiple of blk */ -+#define roundup(sz,blk) ((blk) * ((sz) / (blk) + ((sz) % (blk) ? 1 : 0))) -+ -+/* send a signal to a process */ -+#define psignal(pr,sig) send_sig(sig,pr,0) -+ -+/* microsecond delay */ -+#define DELAY(us) udelay(us) -+ -+/* macro macros */ -+#define MACRO_BEGIN do { -+#define MACRO_END } while (0) -+ -+/* D-Unix compatable errno values */ -+#define ESUCCESS 0 -+#define EFAIL 255 -+ -+/* ASSERT(NO_LOCKS_HELD) will be a no-op */ -+#define NO_LOCKS_HELD 1 -+ -+/* misc */ -+typedef int label_t; -+#define on_fault(ljp) ((ljp) == NULL) -+#define _NOTE(X) -+#define no_fault() ((void) 0) -+#define panicstr 0 -+ -+/* return from system call is -EXXX on linux */ -+#define set_errno(e) (-(e)) -+ -+/* -+ * BSD-style byte ops -+ */ -+ -+#define bcmp(src1,src2,len) memcmp(src1,src2,len) -+#define bzero(dst,len) memset(dst,0,len) -+#define bcopy(src,dst,len) memcpy(dst,src,len) -+ -+#define preemptable_start do { long must_yield_at = lbolt + (hz/10); -+#define preemptable_end } while (0) -+#define preemptable_check() do {\ -+ if ((lbolt - must_yield_at) > 0)\ -+ {\ -+ preemptable_yield() ; \ -+ must_yield_at = lbolt + (hz/10);\ -+ }\ -+ } while (0) -+ -+#define preemptable_yield() schedule() -+ -+#define CURPROC() current -+#define CURTHREAD() current -+#define SUSER() suser() -+ -+/* 64 bit IO operations on 32 bit intel cpus using MMX */ -+#if defined(LINUX_I386) -+extern u64 qsnet_readq (volatile u64 *ptr); -+extern void qsnet_writeq (u64 value, volatile u64 *ptr); -+ -+#define readq(ptr) qsnet_readq((void *) ptr) -+#define writeq(val,ptr) qsnet_writeq(val, (void *)ptr) -+#endif -+ -+/* -+ * Memory barriers -+ */ -+#ifndef mmiob -+# define mmiob() mb() -+#endif -+ -+/* -+ * Exit handlers -+ */ -+#define HANDLER_REGISTER(func,arg,flags) xa_handler_register(func,arg,flags) -+#define HANDLER_UNREGISTER(func,arg,flags) xa_handler_unregister(func,arg,flags) -+ -+/* -+ * KMEM_GETPAGES and KMEM_ALLOC both call kmem_alloc, which -+ * translates the call to kmalloc if < PAGE_SIZE, or vmalloc -+ * if >= PAGE_SIZE. vmalloc will always return a page-aligned -+ * region rounded up to the nearest page, while kmalloc will -+ * return bits and pieces of a page. -+ */ -+ -+#ifdef KMEM_DEBUG -+extern void *qsnet_kmem_alloc_debug(int len, int sleep, int zerofill, char *file, int line); -+extern void qsnet_kmem_free_debug(void *ptr, int len, char *file, int line); -+#define KMEM_ALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc_debug(len,sleep,0,__FILE__,__LINE__); } -+#define KMEM_ZALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc_debug(len,sleep,1,__FILE__,__LINE__); } -+ -+#define KMEM_FREE(ptr,len) qsnet_kmem_free_debug((void *)ptr,len,__FILE__,__LINE__) -+ -+#else -+ -+extern void *qsnet_kmem_alloc(int len, int sleep, int zerofill); -+extern void qsnet_kmem_free(void *ptr, int len); -+ -+#define KMEM_ALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc(len,sleep,0); } -+#define KMEM_ZALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc(len,sleep,1); } -+ -+#define KMEM_FREE(ptr,len) qsnet_kmem_free((void *)ptr,len) -+ -+#endif -+extern void qsnet_kmem_display(void *handle); -+extern physaddr_t kmem_to_phys(void *ptr); -+ -+#define KMEM_ASSERT(sleep) ASSERT(!(in_interrupt() && sleep)) -+ -+ -+#define KMEM_GETPAGES(ptr,type,pgs,sleep) KMEM_ZALLOC(ptr,type,ptob(pgs),sleep) -+#define KMEM_FREEPAGES(ptr,pgs) KMEM_FREE(ptr,ptob(pgs)); -+ -+/* -+ * Copying from user space -> kernel space (perms checked) -+ */ -+#define copyin(up,kp,size) copy_from_user(kp,up,size) -+#define copyin_noerr(up,kp,size) copy_from_user(kp,up,size) -+ -+/* get_user() gets xfer width right */ -+#define fulinux(ret, up) (get_user(ret, (up)) == 0 ? ret : -1) -+#define fulinuxp(ret, up) (get_user(ret, (up)) == 0 ? ret : NULL) -+ -+extern __inline__ int fubyte (u8 *up) { u8 ret; return fulinux(ret, up);} -+extern __inline__ int fusword (u16 *up) { u16 ret; return fulinux(ret, up);} -+extern __inline__ int fuword (u32 *up) { u32 ret; return fulinux(ret, up);} -+#if BITS_PER_LONG > 32 -+extern __inline__ u64 fulonglong(u64 *up) { u64 ret; return fulinux(ret, up);} -+#else -+extern __inline__ u64 fulonglong(u64 *up) { return ((u64) fuword((u32 *)up) | (((u64) fuword(((u32 *)up)+1))<<32)); } -+#endif -+extern __inline__ void *fuptr (void **up) { void *ret; return fulinuxp(ret,up);} -+ -+#define fubyte_noerr(up) fubyte(up) -+#define fusword_noerr(up) fusword(up) -+#define fuword_noerr(up) fuword(up) -+#define fulonglong_noerr(up) fulonglong(up) -+#define fuptr_noerr(up) fuptr(up) -+ -+extern __inline__ int copyinstr(char *up, char *kp, int max, int *size) -+{ -+ for (*size = 1; *size <= max; (*size)++) { -+ if (get_user(*kp, up++) != 0) -+ return EFAULT; /* bad user space addr */ -+ if (*kp++ == '\0') -+ return 0; /* success */ -+ } -+ *size = max; -+ return ENAMETOOLONG; /* runaway string */ -+} -+ -+/* -+ * Copying from kernel space -> user space (perms checked) -+ */ -+ -+#define copyout(kp,up,size) copy_to_user(up,kp,size) -+#define copyout_noerr(kp,up,size) copy_to_user(up,kp,size) -+ -+/* put_user() gets xfer width right */ -+#define sulinux(val, up) (put_user(val, (up)) == 0 ? 0 : -1) -+ -+extern __inline__ int subyte (u8 *up, u8 val) { return sulinux(val, up); } -+extern __inline__ int susword (u16 *up, u16 val) { return sulinux(val, up); } -+extern __inline__ int suword (u32 *up, u32 val) { return sulinux(val, up); } -+#if BITS_PER_LONG > 32 -+extern __inline__ int sulonglong(u64 *up, u64 val) { return sulinux(val, up); } -+#else -+extern __inline__ int sulonglong(u64 *up, u64 val) { return (suword((u32 *) up, (u32) val) == 0 ? -+ suword(((u32 *) up)+1, (u32) (val >> 32)) : -1); } -+#endif -+extern __inline__ int suptr (void **up,void *val){ return sulinux(val, up); } -+ -+#define subyte_noerr(up,val) subyte(up,val) -+#define susword_noerr(up,val) susword(up,val) -+#define suword_noerr(up,val) suword(up,val) -+#define sulonglong_noerr(up,val) sulonglong(up,val) -+#define suptr_noerr(up,val) suptr(up,val) -+ -+/* -+ * /proc/qsnet interface -+ */ -+extern inline int -+str_append(char *buf, char *add, int size) -+{ -+#define TRUNC_MSG "[Output truncated]\n" -+ int full = 0; -+ int max = size - strlen(TRUNC_MSG) - strlen(add) - 1; -+ -+ if (strlen(buf) > max) { -+ strcat(buf, TRUNC_MSG); -+ full = 1; -+ } else -+ strcat(buf, add); -+ return full; -+} -+ -+/* Spinlocks */ -+#define spin_lock_destroy(l) ((void) 0) -+ -+/* Complex - Reader/Writer locks - we added */ -+typedef crwlock_t krwlock_t; -+#define krwlock_init(l) crwlock_init(l) -+#define krwlock_destroy(l) crwlock_destroy(l) -+#define krwlock_write(l) crwlock_write(l) -+#define krwlock_read(l) crwlock_read(l) -+#define krwlock_done(l) crwlock_done(l) -+#define krwlock_is_locked(l) crwlock_held(l) -+#define krwlock_is_write_locked(l) crwlock_write_held(l) -+#define krwlock_is_read_locked(l) crwlock_read_held(l) -+ -+/* -+ * Timeouts - Solaris style. -+ */ -+typedef struct timer_list timer_fn_t; -+ -+extern inline void -+schedule_timer_fn(timer_fn_t *timer, void (*fun)(void *), void *arg, long hz_delay) -+{ -+ init_timer(timer); -+ -+ timer->function = (void (*)(unsigned long)) fun; -+ timer->data = (unsigned long) arg; -+ timer->expires = jiffies + hz_delay; -+ -+ add_timer(timer); -+} -+ -+/* returns 1 if timer_fn was cancelled */ -+extern inline int -+cancel_timer_fn(timer_fn_t *timer) -+{ -+ return (del_timer_sync(timer)); -+} -+ -+extern inline int -+timer_fn_queued(timer_fn_t *timer) -+{ -+ return (timer_pending (timer)); -+} -+/* -+ * Hold/release CPU's. -+ */ -+ -+extern void cpu_hold_all(void); -+extern void cpu_release_all(void); -+#define CAPTURE_CPUS() cpu_hold_all() -+#define RELEASE_CPUS() cpu_release_all() -+ -+#define IASSERT ASSERT -+ -+#endif /* __QSNET_KERNEL_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/kpte.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/kpte.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/kpte.h 2005-06-01 23:12:54.753415696 -0400 -@@ -0,0 +1,107 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KPTE_H -+#define __QSNET_KPTE_H -+ -+#ident "@(#)$Id: kpte.h,v 1.1.2.1 2004/11/02 10:45:29 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/qsnet/kpte.h,v $*/ -+ -+#include -+ -+#ifdef NO_RMAP -+# define pte_offset_kernel pte_offset -+# define pte_offset_map pte_offset -+# define pte_unmap(A) do { ; } while (0) -+#endif -+ -+/* -+ * Pte stuff -+ */ -+static __inline__ struct mm_struct * -+get_kern_mm(void) -+{ -+ return &init_mm; -+} -+ -+static __inline__ pte_t * -+find_pte_map(struct mm_struct *mm, unsigned long vaddr) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *ptep; -+ -+/* XXXX - handle hugh tlb code */ -+ pgd = pgd_offset(mm, vaddr); -+ if (pgd_none(*pgd) || pgd_bad(*pgd)) -+ goto out; -+ -+ pmd = pmd_offset(pgd, vaddr); -+ if (pmd_none(*pmd) || pmd_bad (*pmd)) -+ goto out; -+ -+ ptep = pte_offset_map (pmd, vaddr); -+ if (! ptep) -+ goto out; -+ -+ if (pte_present (*ptep)) -+ return ptep; -+ -+ pte_unmap (ptep); -+out: -+ return NULL; -+} -+ -+static __inline__ pte_t * -+find_pte_kernel(unsigned long vaddr) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ pgd = pgd_offset_k(vaddr); -+ if (pgd && !pgd_none(*pgd)) { -+ pmd = pmd_offset(pgd, vaddr); -+ if (pmd && pmd_present(*pmd)) { -+ pte = pte_offset_kernel(pmd, vaddr); -+ if (pte && pte_present(*pte)) -+ return (pte); -+ } -+ } -+ return (NULL); -+} -+ -+static __inline__ physaddr_t -+pte_phys(pte_t pte) -+{ -+#if defined(LINUX_ALPHA) -+ /* RedHat 7.1 2.4.3-12 -+ * They have now enabled Monster windows on Tsunami -+ * and so can use the Main's phys pte value -+ */ -+ return (pte_val(pte) >> (32-PAGE_SHIFT)); -+#elif defined(LINUX_I386) -+ return (pte_val(pte) & ~((1 << PAGE_SHIFT)-1)); -+#elif defined(LINUX_SPARC) -+ return (pte_val(pte) & _PAGE_PADDR); -+#elif defined(LINUX_IA64) -+ return (pte_val(pte) & _PFN_MASK); -+#elif defined(LINUX_X86_64) -+ return (pte_val(pte) & ~((1 << PAGE_SHIFT)-1) & ~_PAGE_NX); -+#else -+#error Unknown architecture -+#endif -+} -+ -+#endif /* __QSNET_KPTE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/kthread.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/kthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/kthread.h 2005-06-01 23:12:54.754415544 -0400 -@@ -0,0 +1,71 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KTHREAD_H -+#define __QSNET_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.1 2004/10/28 11:50:29 david Exp $ $Name: QSNETMODULES-4-30_20050128 $" -+/* $Source: /cvs/master/quadrics/qsnet/kthread.h,v $*/ -+ -+#include -+ -+/* -+ * kernel threads -+ */ -+extern __inline__ void -+kernel_thread_init(char *comm) -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#ifndef NO_NPTL -+# define sigmask_lock sighand->siglock -+#endif -+ lock_kernel(); -+ daemonize(); -+ reparent_to_init(); -+ -+ /* avoid getting signals */ -+ spin_lock_irq(¤t->sigmask_lock); -+ flush_signals(current); -+ sigfillset(¤t->blocked); -+ -+#ifdef NO_NPTL -+ recalc_sigpending(current); -+#else -+ recalc_sigpending(); -+#endif -+ -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ /* set our name for identification purposes */ -+ strncpy(current->comm, comm, sizeof(current->comm)); -+ -+ unlock_kernel(); -+#else -+ daemonize(comm); -+#endif -+} -+ -+extern __inline__ void * -+kernel_thread_wrap(caddr_t stk, int stksize, void (*proc)(void *), void *arg) -+{ -+ ASSERT(stk == NULL && stksize == 0); -+ kernel_thread((int (*)(void *))proc, arg, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); -+ return (void *)1; /* non-null value */ -+} -+ -+#define kernel_thread_create(proc,arg) kernel_thread_wrap(NULL,0,(void (*)(void *))proc,arg) -+#define kernel_thread_exit() ((void) 0) -+#define kernel_thread_become_highpri() ((void) 0) -+ -+#endif /* __QSNET_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/list.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/list.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/list.h 2005-06-01 23:12:54.754415544 -0400 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: list.h,v 1.5 2003/10/27 13:55:33 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/list.h,v $*/ -+ -+#ifndef __QSNET_LIST_H -+#define __QSNET_LIST_H -+ -+/* Implementation of doubly linked lists - compatible with linux */ -+struct list_head -+{ -+ struct list_head *next; -+ struct list_head *prev; -+}; -+ -+#if !defined(LINUX) -+#if ! defined( offsetof ) -+#define offsetof(T,F) ((int )&(((T *)0)->F)) -+#endif -+ -+#define LIST_HEAD_INIT(name) { &(name), &(name) } -+ -+#define LIST_HEAD(name) \ -+ struct list_head name = LIST_HEAD_INIT(name) -+#endif -+ -+#define list_entry(ptr, type, off) \ -+ ((type *) ((unsigned long)(ptr) - offsetof (type,off))) -+ -+#define INIT_LIST_HEAD(list) \ -+MACRO_BEGIN \ -+ (list)->next = (list)->prev = (list); \ -+MACRO_END -+ -+#define list_add(new, list) \ -+MACRO_BEGIN \ -+ (list)->next->prev = (new); \ -+ (new)->next = (list)->next; \ -+ (new)->prev = (list); \ -+ (list)->next = (new); \ -+MACRO_END -+ -+#define list_add_tail(new, list) \ -+MACRO_BEGIN \ -+ (list)->prev->next = new; \ -+ (new)->prev = (list)->prev; \ -+ (new)->next = (list); \ -+ (list)->prev = (new); \ -+MACRO_END -+ -+#define list_del(entry) \ -+MACRO_BEGIN \ -+ (entry)->prev->next = (entry)->next; \ -+ (entry)->next->prev = (entry)->prev; \ -+MACRO_END -+ -+#define list_del_init(entry) \ -+MACRO_BEGIN \ -+ (entry)->prev->next = (entry)->next; \ -+ (entry)->next->prev = (entry)->prev; \ -+ (entry)->next = (entry)->prev = (entry); \ -+MACRO_END -+ -+#define list_empty(list) \ -+ ((list)->next == (list)) -+ -+#define list_for_each(pos,list) \ -+ for (pos = (list)->next; pos != (list); \ -+ pos = (pos)->next) -+ -+#define list_for_each_safe(pos,n,list) \ -+ for (pos = (list)->next, n = (pos)->next; pos != (list); \ -+ pos = n, n = (pos)->next) -+ -+#endif /* __QSNET_LIST_H */ -Index: linux-2.4.21/include/qsnet/mutex.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/mutex.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/mutex.h 2005-06-01 23:12:54.754415544 -0400 -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+#if !defined(_LINUX_MUTEX_H) -+#define _LINUX_MUTEX_H -+#if defined(__KERNEL__) -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define PID_NONE 0 -+ -+typedef struct -+{ -+ struct semaphore sem; -+ pid_t holder; -+} kmutex_t; -+ -+extern __inline__ void -+kmutex_init (kmutex_t *l) -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0) -+ l->sem = MUTEX; -+#else -+ init_MUTEX(&l->sem); -+#endif -+ l->holder = PID_NONE; -+} -+ -+extern __inline__ void -+kmutex_destroy (kmutex_t *l) -+{ -+ ASSERT (l->holder == PID_NONE); -+} -+ -+extern __inline__ void -+kmutex_lock (kmutex_t *l) -+{ -+ ASSERT(l->holder != current->pid); -+ down (&l->sem); -+ l->holder = current->pid; -+} -+ -+extern __inline__ void -+kmutex_unlock (kmutex_t *l) -+{ -+ ASSERT(l->holder == current->pid); -+ -+ l->holder = PID_NONE; -+ up (&l->sem); -+} -+ -+extern __inline__ int -+kmutex_trylock (kmutex_t *l) -+{ -+ if (down_trylock (&l->sem) == 0) -+ { -+ l->holder = current->pid; -+ return (1); -+ } -+ return (0); -+} -+ -+extern __inline__ int -+kmutex_is_locked (kmutex_t *l) -+{ -+ return (l->holder == current->pid); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_MUTEX_H */ -Index: linux-2.4.21/include/qsnet/procfs_linux.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/procfs_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/procfs_linux.h 2005-06-01 23:12:54.755415392 -0400 -@@ -0,0 +1,234 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __PROCFS_LINUX_H -+#define __PROCFS_LINUX_H -+ -+#ident "$Id: procfs_linux.h,v 1.6.2.6 2004/12/06 17:36:24 robin Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/procfs_linux.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+#include -+#include -+#include -+ -+extern gid_t qsnet_procfs_gid; -+ -+/* borrowed from fs/proc/proc_misc - helper for proc_read_int */ -+static inline int -+qsnet_proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) -+{ -+ if (len <= off+count) *eof = 1; -+ *start = page + off; -+ len -= off; -+ if (len>count) len = count; -+ if (len<0) len = 0; -+ return len; -+} -+ -+static inline int -+qsnet_proc_write_int(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char tmpbuf[16]; -+ int res = count; -+ -+ if (count > sizeof(tmpbuf) - 1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user(tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ *(int *)data = simple_strtoul(tmpbuf, NULL, 0); -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_int(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ MOD_INC_USE_COUNT; -+ -+ len = sprintf(page, "%d\n", *(int *)data); -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_int(struct proc_dir_entry *dir, char *path, int *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_int; -+ p->read_proc = qsnet_proc_read_int; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+static inline int -+qsnet_proc_write_hex(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char tmpbuf[16]; -+ int res = count; -+ -+ if (count > sizeof(tmpbuf) - 1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user(tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ *(int *)data = simple_strtoul(tmpbuf, NULL, 0); -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_hex(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ MOD_INC_USE_COUNT; -+ -+ len = sprintf(page, "0x%x\n", *(int *)data); -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_hex(struct proc_dir_entry *dir, char *path, int *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_hex; -+ p->read_proc = qsnet_proc_read_hex; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+#define QSNET_PROC_STR_LEN_MAX ((int)256) -+ -+static inline int -+qsnet_proc_write_str(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ int res = count; -+ -+ if (count > (QSNET_PROC_STR_LEN_MAX - 1)) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user((char *)data, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ ((char *)data)[count] = '\0'; -+ /* remove linefeed */ -+ if ( (count) && (((char *)data)[count -1] == '\n')) -+ ((char *)data)[count -1] = '\0'; -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_str(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ if ( strlen(data) > (count + 1)) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ /* cant output too much */ -+ if ( strlen(data) > (count + 1)) -+ { -+ MOD_DEC_USE_COUNT; -+ return (-EINVAL); -+ } -+ -+ -+ len = sprintf(page, "%s\n", (char *)data); -+ if (len > count) -+ { -+ MOD_DEC_USE_COUNT; -+ return (-EINVAL); -+ } -+ -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_str(struct proc_dir_entry *dir, char *path, char *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_str; -+ p->read_proc = qsnet_proc_read_str; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+extern struct proc_dir_entry *qsnet_procfs_root; -+extern struct proc_dir_entry *qsnet_procfs_config; -+ -+#ifdef NO_PDE -+static inline struct proc_dir_entry *PDE(const struct inode *inode) -+{ -+ return inode->u.generic_ip; -+} -+#endif -+#endif /* __KERNEL__ */ -+ -+#define QSNET_PROCFS_IOCTL "/proc/qsnet/ioctl" -+#define QSNET_PROCFS_KMEM_DEBUG "/proc/qsnet/kmem_debug" -+#define QSNET_PROCFS_VERSION "/proc/qsnet/version" -+ -+#endif /* __PROCFS_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/pthread.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/pthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/pthread.h 2005-06-01 23:12:54.755415392 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* $Id: pthread.h,v 1.5 2004/06/07 10:47:06 addy Exp $ */ -+/* $Source: /cvs/master/quadrics/qsnet/pthread.h,v $*/ -+ -+#ifndef _CONFIG_PTHREAD_H -+#define _CONFIG_PTHREAD_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if defined(__ELAN__) -+ -+/* No pthread support on Elan co-processor */ -+ -+#define MUTEX unsigned long long -+#define MUTEX_INIT(X) ; -+#define MUTEX_LOCK(X) ; -+#define MUTEX_UNLOCK(X) ; -+ -+#else -+#if defined(DIGITAL_UNIX) -+#include -+#define MUTEX pthread_mutex_t -+#define MUTEX_INIT(X) tis_mutex_init(X) -+#define MUTEX_LOCK(X) tis_mutex_lock(X) -+#define MUTEX_UNLOCK(X) tis_mutex_unlock(X) -+#define MUTEX_TRYLOCK(X) (tis_mutex_trylock(X) == 0) -+ -+#else /* Linux... */ -+ -+/* Use standard pthread calls */ -+#include -+#define MUTEX pthread_mutex_t -+#define MUTEX_INIT(X) pthread_mutex_init(X, NULL) -+#define MUTEX_LOCK(X) pthread_mutex_lock(X) -+#define MUTEX_UNLOCK(X) pthread_mutex_unlock(X) -+#define MUTEX_TRYLOCK(X) (pthread_mutex_trylock(X) == 0) -+ -+#endif /* DIGITAL_UNIX */ -+#endif /* __ELAN__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _CONFIG_PTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.4.21/include/qsnet/statsformat.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/statsformat.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/statsformat.h 2005-06-01 23:12:54.756415240 -0400 -@@ -0,0 +1,25 @@ -+#ifndef _QSNET_STATSFORMAT_H -+#define _QSNET_STATSFORMAT_H -+ -+#ident "$Id: statsformat.h,v 1.2 2003/05/22 19:37:14 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/statsformat.h,v $*/ -+ -+#include -+ -+/* -+ * format of an Elan stats record -+ * -+ * type char(8), type of statistic, e.g. FPAGE, ELAN3, TPORT -+ * time uint64, 10 digits, time in millisecs since counters initialised -+ * device uint, 2 digits, Elan device id -+ * name char(32), name of the statistic -+ * value uint64, current value of statistic -+ */ -+ -+#ifdef _ILP32 -+#define ELAN_STATSFORMAT "%-8s %10llu %2d %-32s %llu\n" -+#else -+#define ELAN_STATSFORMAT "%-8s %10lu %2d %-32s %lu\n" -+#endif -+ -+#endif -Index: linux-2.4.21/include/qsnet/types.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/types.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/types.h 2005-06-01 23:12:54.756415240 -0400 -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_TYPES_H -+#define __QSNET_TYPES_H -+ -+#ident "$Id: types.h,v 1.16 2003/08/01 16:21:38 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/types.h,v $*/ -+ -+/* -+ * Include typedefs for ISO/IEC 9899:1990 standard types -+ * -+ * -+ * The following integer typedefs are used: -+ * -+ * int8_t, int16_t, int32_t, int64_t, intptr_t -+ * uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t -+ * uchar_t, ushort_t, uint_t, ulong_t -+ * -+ * also defines the following: -+ * u_char, u_short, u_int, u_long, caddr_t -+ */ -+ -+#include -+ -+#if defined(SOLARIS) && defined(__KERNEL__) -+# include -+#endif -+ -+#if defined(SOLARIS) && !defined(__KERNEL__) -+# include -+# include -+#endif -+ -+#if defined(DIGITAL_UNIX) && defined(__KERNEL__) -+# include -+#endif -+ -+#if defined(DIGITAL_UNIX) && !defined(__KERNEL__) -+# include -+# include -+#endif -+ -+#if defined(LINUX) && defined(__KERNEL__) -+# include -+#endif -+ -+#if defined(LINUX) && !defined(__KERNEL__) -+# include -+# include -+# include -+ -+typedef unsigned char uchar_t; -+typedef unsigned short ushort_t; -+typedef unsigned int uint_t; -+typedef unsigned long ulong_t; -+#endif -+ -+#if defined(QNX) -+# include -+# include -+#endif -+ -+/* Define a type that will represent a Main CPU pointer -+ * on both the Main and the Elan -+ */ -+#ifdef __ELAN__ -+ -+#if defined(_MAIN_LP64) -+#define QSNET_MAIN_PTR uint64_t -+#else -+#define QSNET_MAIN_PTR uint32_t -+#endif -+ -+#else -+ -+#ifdef _LP64 -+#define QSNET_MAIN_PTR uint64_t -+#else -+#define QSNET_MAIN_PTR uint32_t -+#endif -+ -+#endif -+ -+ -+#endif /* __QSNET_TYPES_H */ -Index: linux-2.4.21/include/qsnet/workarounds.h -=================================================================== ---- linux-2.4.21.orig/include/qsnet/workarounds.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/qsnet/workarounds.h 2005-06-01 23:12:54.756415240 -0400 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _QSNET_WORKAROUNDS_H -+#define _QSNET_WORKAROUNDS_H -+ -+#ident "$Id: workarounds.h,v 1.11 2002/08/09 11:15:55 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/workarounds.h,v $ */ -+ -+/* Elan workarounds */ -+#undef ELAN_REVA_SUPPORTED /* rev a elans no longer supported. */ -+#undef ELITE_REVA_SUPPORTED /* removed since RMS disables broadcast on rev A elites. */ -+#define ELAN_REVB_BUG_1 -+/* WORKAROUND for GNAT hw-elan3/3263 */ -+#define ELAN_REVB_BUG_2 -+ -+/* WORKAROUND for GNATs ic-elan3/3637 & ic-elan3/3550 */ -+#define ELAN_REVB_BUG_3 -+ -+#endif /* _QSNET_WORKAROUNDS_H */ -Index: linux-2.4.21/include/rms/rmscall.h -=================================================================== ---- linux-2.4.21.orig/include/rms/rmscall.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/rms/rmscall.h 2005-06-01 23:12:54.757415088 -0400 -@@ -0,0 +1,144 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * rmscall.h: user interface to rms kernel module -+ * -+ * $Id: rmscall.h,v 1.25 2004/05/14 08:55:57 duncan Exp $ -+ * $Source: /cvs/master/quadrics/rmsmod/rmscall.h,v $ -+ * -+ */ -+ -+#ifndef RMSCALL_H_INCLUDED -+#define RMSCALL_H_INCLUDED 1 -+ -+#ident "$Id: rmscall.h,v 1.25 2004/05/14 08:55:57 duncan Exp $" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * flags for rms_fork_register -+ * -+ * RMS_IOF is not in a public header file -+ */ -+#define RMS_IOF 1 /* inherit on fork */ -+ -+#ifndef __KERNEL__ -+#include -+#endif -+ -+#include -+#include -+ -+#define MAXCOREPATHLEN 32 -+ -+#if defined(SOLARIS) -+typedef long long rmstime_t; -+#else /* DIGITAL_UNIX */ -+typedef long rmstime_t; -+#endif -+ -+typedef enum { -+ -+ PRG_RUNNING = 0x01, /* program is running */ -+ PRG_ZOMBIE = 0x02, /* last process on a node has exited */ -+ PRG_NODE = 0x04, /* stats are complete for this node */ -+ PRG_KILLED = 0x08, /* program was killed */ -+ PRG_SUSPEND = 0x10 /* program is suspended */ -+ -+} PRGSTATUS_FLAGS; -+ -+/* -+ * program time statistics extended in version 5 of the kernel module -+ */ -+typedef struct { -+ rmstime_t etime; /* elapsed cpu time (milli-secs) */ -+ rmstime_t atime; /* allocated cpu time (cpu milli-secs) */ -+ rmstime_t utime; /* user cpu time (cpu milli-secs) */ -+ rmstime_t stime; /* system cpu time (cpu milli-secs) */ -+ int ncpus; /* number of cpus allocated */ -+ int flags; /* program status flags */ -+ int mem; /* max memory size in MBytes */ -+ int pageflts; /* number of page faults */ -+ rmstime_t memint; /* memory integral */ -+} prgstats_old_t; -+ -+typedef struct { -+ uint64_t etime; /* elapsed cpu time (milli-secs) */ -+ uint64_t atime; /* allocated cpu time (cpu milli-secs) */ -+ uint64_t utime; /* user cpu time (cpu milli-secs) */ -+ uint64_t stime; /* system cpu time (cpu milli-secs) */ -+ uint64_t pageflts; /* number of page faults */ -+ uint64_t memint; /* memory integral */ -+ uint64_t ebytes; /* data transferred by the Elan(s) */ -+ uint64_t exfers; /* number of Elan data transfers */ -+ uint64_t spare64[4]; /* expansion space */ -+ int ncpus; /* number of cpus allocated */ -+ int flags; /* program status flags */ -+ int mem; /* max memory size in MBytes */ -+ int spare32[5]; /* expansion space */ -+} prgstats_t; -+ -+int rmsmod_init(void); -+void rmsmod_fini(void); -+ -+int rms_setcorepath(caddr_t path); -+int rms_getcorepath(pid_t pid, caddr_t path, int maxlen); -+int rms_prgcreate(int id, uid_t uid, int cpus); -+int rms_prgdestroy(int id); -+int rms_prgids(int maxids, int *prgids, int *nprgs); -+int rms_prginfo(int id, int maxpids, pid_t *pids, int *nprocs); -+int rms_prgaddcap(int id, int index, ELAN_CAPABILITY *cap); -+ -+int rms_prgsuspend(int id); -+int rms_prgresume(int id); -+int rms_prgsignal(int id, int signo); -+ -+int rms_getprgid(pid_t pid, int *id); -+int rms_ncaps(int *ncaps); -+int rms_getcap(int index, ELAN_CAPABILITY *cap); -+int rms_mycap(int *index); -+int rms_setcap(int index, int ctx); -+int rms_prefcap(int nprocess, int *index); -+ -+int rms_prggetstats(int id, prgstats_t *stats); -+void rms_accumulatestats(prgstats_t *total, prgstats_t *stats); -+char *rms_statsreport(prgstats_t *stats, char *buf); -+ -+int rms_elaninitdone(int vp); -+int rms_prgelanpids(int id, int maxpids, int *vps, pid_t *pids, int *npids); -+int rms_setelanstats(int id, uint64_t ebytes, uint64_t exfers); -+ -+int rms_setpset(int psid); -+int rms_getpset(int id, int *psid); -+int rms_modversion(); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+ -+#if defined(__KERNEL__) -+ -+int rms_init(void); -+int rms_fini(void); -+int rms_reconfigure(void); -+ -+extern int rms_debug; -+ -+#if 1 -+#define DBG(x) do if (rms_debug) x ; while (0) -+#else -+#define DBG(x) -+#endif -+ -+#endif -+ -+#endif /* RMSCALL_H_INCLUDED */ -+ -+ -+ -+ -Index: linux-2.4.21/include/rms/rmsio.h -=================================================================== ---- linux-2.4.21.orig/include/rms/rmsio.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/include/rms/rmsio.h 2005-06-01 23:12:54.757415088 -0400 -@@ -0,0 +1,185 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: rmsio.h,v 1.6 2004/05/14 08:55:57 duncan Exp $" -+/* $Source: /cvs/master/quadrics/rmsmod/rmsio.h,v $*/ -+ -+ -+#ifndef __RMSMOD_RMSIO_H -+#define __RMSMOD_RMSIO_H -+ -+/* arg is corepath string */ -+#define RMSIO_SETCOREPATH _IOW ('r', 1, char) -+ -+typedef struct rmsio_getcorepath_struct -+{ -+ pid_t pid; -+ char *corepath; -+ int maxlen; -+} RMSIO_GETCOREPATH_STRUCT; -+#define RMSIO_GETCOREPATH _IOW ('r', 2, RMSIO_GETCOREPATH_STRUCT) -+ -+typedef struct rmsio_prgcreate_struct -+{ -+ int id; -+ uid_t uid; -+ int cpus; -+} RMSIO_PRGCREATE_STRUCT; -+#define RMSIO_PRGCREATE _IOW ('r', 3, RMSIO_PRGCREATE_STRUCT) -+ -+typedef struct rmsio_prginfo_struct -+{ -+ int id; -+ int maxpids; -+ pid_t *pids; -+ int *nprocs; -+} RMSIO_PRGINFO_STRUCT; -+#define RMSIO_PRGINFO _IOW ('r', 4, RMSIO_PRGINFO_STRUCT) -+ -+typedef struct rmsio_prgsignal_struct -+{ -+ int id; -+ int signo; -+} RMSIO_PRGSIGNAL_STRUCT; -+#define RMSIO_PRGSIGNAL _IOW ('r', 5, RMSIO_PRGSIGNAL_STRUCT) -+ -+typedef struct rmsio_prgaddcap_struct -+{ -+ int id; -+ int index; -+ ELAN_CAPABILITY *cap; -+} RMSIO_PRGADDCAP_STRUCT; -+#define RMSIO_PRGADDCAP _IOW ('r', 6, RMSIO_PRGADDCAP_STRUCT) -+typedef struct rmsio_setcap_struct -+{ -+ int index; -+ int ctx; -+} RMSIO_SETCAP_STRUCT; -+#define RMSIO_SETCAP _IOW ('r', 7, RMSIO_SETCAP_STRUCT) -+ -+typedef struct rmsio_getcap_struct -+{ -+ int index; -+ ELAN_CAPABILITY *cap; -+} RMSIO_GETCAP_STRUCT; -+#define RMSIO_GETCAP _IOW ('r', 8, RMSIO_GETCAP_STRUCT) -+ -+typedef struct rmsio_getcap_struct32 -+{ -+ int index; -+ unsigned int capptr; -+} RMSIO_GETCAP_STRUCT32; -+#define RMSIO_GETCAP32 _IOW ('r', 8, RMSIO_GETCAP_STRUCT32) -+ -+/* arg is pointer to ncaps */ -+#define RMSIO_NCAPS _IOW ('r', 9, int) -+ -+typedef struct rmsio_prggetstats_struct -+{ -+ int id; -+ prgstats_old_t *stats; -+} RMSIO_PRGGETSTATS_STRUCT; -+#define RMSIO_PRGGETSTATS _IOW ('r', 10, RMSIO_PRGGETSTATS_STRUCT) -+ -+/* arg is program id */ -+#define RMSIO_PRGSUSPEND _IOW ('r', 11, int) -+#define RMSIO_PRGRESUME _IOW ('r', 12, int) -+#define RMSIO_PRGDESTROY _IOW ('r', 13, int) -+ -+typedef struct rmsio_getprgid_struct -+{ -+ pid_t pid; -+ int *id; -+} RMSIO_GETPRGID_STRUCT; -+#define RMSIO_GETPRGID _IOW ('r', 14, RMSIO_GETPRGID_STRUCT) -+ -+typedef struct rmsio_getprgid_struct32 -+{ -+ pid_t pid; -+ unsigned int idptr; -+} RMSIO_GETPRGID_STRUCT32; -+#define RMSIO_GETPRGID32 _IOW ('r', 14, RMSIO_GETPRGID_STRUCT32) -+ -+/* arg is pointer to index */ -+#define RMSIO_GETMYCAP _IOW ('r', 15, int) -+ -+typedef struct rmsio_prgids_struct -+{ -+ int maxids; -+ int *prgids; -+ int *nprgs; -+} RMSIO_PRGIDS_STRUCT; -+#define RMSIO_PRGIDS _IOW ('r', 16, RMSIO_PRGIDS_STRUCT) -+ -+/* arg is pointer to vp */ -+#define RMSIO_ELANINITDONE _IOW ('r', 17, int) -+ -+typedef struct rmsio_prgelanpids_struct -+{ -+ int id; -+ int maxpids; -+ int *vps; -+ int *pids; -+ int *npids; -+} RMSIO_PRGELANPIDS_STRUCT; -+#define RMSIO_PRGELANPIDS _IOW ('r', 18, RMSIO_PRGELANPIDS_STRUCT) -+ -+typedef struct rmsio_setpset_struct -+{ -+ int id; -+ int psid; -+} RMSIO_SETPSET_STRUCT; -+#define RMSIO_SETPSET _IOW ('r', 19, RMSIO_SETPSET_STRUCT) -+ -+typedef struct rmsio_getpset_struct -+{ -+ int id; -+ int *psid; -+} RMSIO_GETPSET_STRUCT; -+#define RMSIO_GETPSET _IOW ('r', 20, RMSIO_GETPSET_STRUCT) -+ -+/* -+ * have to pass a pointer to the stats, the switch -+ * statement goes wrong in the module of the size -+ * is too large -+ */ -+typedef struct { -+ uint64_t ebytes; -+ uint64_t exfers; -+} elanstats_t; -+ -+typedef struct rmsio_setelanstats_struct -+{ -+ int id; -+ elanstats_t *estats; -+} RMSIO_SETELANSTATS_STRUCT; -+#define RMSIO_SETELANSTATS _IOW ('r', 21, RMSIO_SETELANSTATS_STRUCT) -+ -+typedef struct rmsio_prggetstats2_struct -+{ -+ int id; -+ prgstats_t *stats; -+} RMSIO_PRGGETSTATS2_STRUCT; -+#define RMSIO_PRGGETSTATS2 _IOW ('r', 22, RMSIO_PRGGETSTATS2_STRUCT) -+ -+typedef struct rmsio_modversion_struct -+{ -+ int *version; -+} RMSIO_MODVERSION_STRUCT; -+#define RMSIO_MODVERSION _IOW ('r', 23, RMSIO_MODVERSION_STRUCT) -+ -+ -+#endif /* __RMSMOD_RMSIO_H */ -+ -+ -+ -+ -+ -+ -+ -+ -+ -Index: linux-2.4.21/ipc/shm.c -=================================================================== ---- linux-2.4.21.orig/ipc/shm.c 2005-06-01 22:51:50.000000000 -0400 -+++ linux-2.4.21/ipc/shm.c 2005-06-01 23:12:54.758414936 -0400 -@@ -723,6 +723,44 @@ - return retval; - } - -+/* -+ * Mark all segments created by this process for destruction -+ */ -+asmlinkage int shm_cleanup () -+{ -+ int i; -+ -+ down(&shm_ids.sem); -+ -+ for(i = 0; i <= shm_ids.max_id; i++) { -+ struct shmid_kernel* shp; -+ -+ shp = shm_lock(i); -+ if(shp!=NULL) { -+ -+ /* Mark this segment for destruction if we created it */ -+ if (current->pid == shp->shm_cprid) -+ { -+ /* Copy of IPC_RMID code */ -+ if (shp->shm_nattch){ -+ shp->shm_flags |= SHM_DEST; -+ /* Do not find it any more */ -+ shp->shm_perm.key = IPC_PRIVATE; -+ } else { -+ shm_destroy(shp); -+ continue; -+ } -+ } -+ -+ shm_unlock(i); -+ } -+ } -+ -+ up(&shm_ids.sem); -+ -+ return 0; -+} -+ - #ifdef CONFIG_PROC_FS - static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) - { -Index: linux-2.4.21/kernel/exit.c -=================================================================== ---- linux-2.4.21.orig/kernel/exit.c 2005-06-01 22:58:09.055062312 -0400 -+++ linux-2.4.21/kernel/exit.c 2005-06-01 23:12:54.759414784 -0400 -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -705,6 +706,10 @@ - if (current->tux_info) - current->tux_exit(); - acct_process(code); -+ -+ /* Notify any ptrack callbacks of the process exit */ -+ ptrack_call_callbacks(PTRACK_PHASE_EXIT, NULL); -+ - if (isaudit(tsk)) - audit_exit(tsk, code); - __exit_mm(tsk); -Index: linux-2.4.21/kernel/fork.c -=================================================================== ---- linux-2.4.21.orig/kernel/fork.c 2005-06-01 22:58:09.055062312 -0400 -+++ linux-2.4.21/kernel/fork.c 2005-06-01 23:12:54.760414632 -0400 -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -308,6 +309,7 @@ - /* unlimited stack is larger than TASK_SIZE */ - mm->non_executable_cache = NON_EXECUTABLE_CACHE(current); - mm->pgd = pgd_alloc(mm); -+ mm->coproc_ops = NULL; - mm->def_flags = 0; - if (mm->pgd) - return mm; -@@ -1110,6 +1112,12 @@ - p->vfork_done = &vfork; - init_completion(&vfork); - } -+ -+ if (ptrack_call_callbacks (PTRACK_PHASE_CLONE, p)) { -+ /* start up with an immediate SIGKILL. */ -+ sigaddset (&p->pending.signal, SIGKILL); -+ p->sigpending = 1; -+ } - - if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { - /* -Index: linux-2.4.21/kernel/ksyms.c -=================================================================== ---- linux-2.4.21.orig/kernel/ksyms.c 2005-06-01 23:12:40.911519984 -0400 -+++ linux-2.4.21/kernel/ksyms.c 2005-06-01 23:12:54.760414632 -0400 -@@ -43,6 +43,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -104,6 +105,10 @@ - - #endif - -+EXPORT_SYMBOL_GPL(ptrack_register); -+EXPORT_SYMBOL_GPL(ptrack_deregister); -+EXPORT_SYMBOL_GPL(ptrack_registered); -+ - /* process memory management */ - EXPORT_SYMBOL(do_mmap_pgoff); - EXPORT_SYMBOL(do_munmap); -@@ -113,6 +118,7 @@ - EXPORT_SYMBOL(exit_files); - EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); -+EXPORT_SYMBOL(make_pages_present); - EXPORT_SYMBOL(unshare_files); - EXPORT_SYMBOL(mmput); - -@@ -589,6 +595,10 @@ - EXPORT_SYMBOL(kernel_read); - EXPORT_SYMBOL(open_exec); - -+/* QSW Shared-memory cleanup hook for rmsmod */ -+extern int shm_cleanup(); -+EXPORT_SYMBOL_GPL(shm_cleanup); -+ - /* Miscellaneous access points */ - EXPORT_SYMBOL(si_meminfo); - -Index: linux-2.4.21/kernel/Makefile -=================================================================== ---- linux-2.4.21.orig/kernel/Makefile 2005-06-01 22:51:53.000000000 -0400 -+++ linux-2.4.21/kernel/Makefile 2005-06-01 23:12:54.760414632 -0400 -@@ -18,6 +18,10 @@ - signal.o sys.o kmod.o context.o \ - futex.o pid.o kksymoops.o - -+# Quadrics additions -+export-objs += ptrack.o -+obj-y += ptrack.o -+ - obj-$(CONFIG_UID16) += uid16.o - obj-$(CONFIG_MODULES) += ksyms.o - obj-$(CONFIG_COMPAT) += compat.o -Index: linux-2.4.21/kernel/ptrack.c -=================================================================== ---- linux-2.4.21.orig/kernel/ptrack.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.4.21/kernel/ptrack.c 2005-06-01 23:12:54.761414480 -0400 -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * Derived from exit_actn.c by -+ * Copyright (C) 2003 Quadrics Ltd. -+ */ -+ -+ -+#include -+#include -+#include -+#include -+#include -+ -+int -+ptrack_register (ptrack_callback_t callback, void *arg) -+{ -+ struct ptrack_desc *desc = kmalloc (sizeof (struct ptrack_desc), GFP_KERNEL); -+ -+ if (desc == NULL) -+ return -ENOMEM; -+ -+ desc->callback = callback; -+ desc->arg = arg; -+ -+ list_add_tail (&desc->link, ¤t->ptrack_list); -+ -+ return 0; -+} -+ -+void -+ptrack_deregister (ptrack_callback_t callback, void *arg) -+{ -+ struct list_head *el, *nel; -+ -+ list_for_each_safe (el, nel, ¤t->ptrack_list) { -+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); -+ -+ if (desc->callback == callback && desc->arg == arg) { -+ list_del (&desc->link); -+ kfree (desc); -+ } -+ } -+} -+ -+int -+ptrack_registered (ptrack_callback_t callback, void *arg) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, ¤t->ptrack_list) { -+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); -+ -+ if (desc->callback == callback && desc->arg == arg) -+ return 1; -+ } -+ return 0; -+} -+ -+int -+ptrack_call_callbacks (int phase, struct task_struct *child) -+{ -+ struct list_head *el, *nel; -+ struct ptrack_desc *new; -+ int res; -+ -+ if (phase == PTRACK_PHASE_CLONE) -+ INIT_LIST_HEAD (&child->ptrack_list); -+ -+ /* if init process, ignore */ -+ if (current->pid == 0) -+ return 0; -+ -+ list_for_each_safe (el, nel, ¤t->ptrack_list) { -+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); -+ -+ res = desc->callback (desc->arg, phase, child); -+ -+ switch (phase) -+ { -+ case PTRACK_PHASE_EXIT: -+ list_del (&desc->link); -+ kfree (desc); -+ break; -+ -+ case PTRACK_PHASE_CLONE: -+ switch (res) -+ { -+ case PTRACK_FINISHED: -+ break; -+ -+ case PTRACK_INNHERIT: -+ if ((new = kmalloc (sizeof (struct ptrack_desc), GFP_ATOMIC)) == NULL) -+ { -+ /* allocation failed - notify that this process is not going -+ * to be started by signalling clone failure. -+ */ -+ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child); -+ -+ goto failed; -+ } -+ -+ new->callback = desc->callback; -+ new->arg = desc->arg; -+ -+ list_add_tail (&new->link, &child->ptrack_list); -+ break; -+ -+ case PTRACK_DENIED: -+ goto failed; -+ } -+ break; -+ } -+ } -+ -+ return 0; -+ -+ failed: -+ while (! list_empty (&child->ptrack_list)) -+ { -+ struct ptrack_desc *desc = list_entry (child->ptrack_list.next, struct ptrack_desc, link); -+ -+ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child); -+ -+ list_del (&desc->link); -+ kfree (desc); -+ } -+ return 1; -+} -Index: linux-2.4.21/mm/filemap.c -=================================================================== ---- linux-2.4.21.orig/mm/filemap.c 2005-06-01 23:12:41.100491256 -0400 -+++ linux-2.4.21/mm/filemap.c 2005-06-01 23:12:54.763414176 -0400 -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2468,6 +2469,7 @@ - flush_cache_range(vma, end - size, end); - if (address >= end) - BUG(); -+ coproc_sync_range (vma->vm_mm, address, end); - do { - error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags); - address = (address + PGDIR_SIZE) & PGDIR_MASK; -Index: linux-2.4.21/mm/memory.c -=================================================================== ---- linux-2.4.21.orig/mm/memory.c 2005-06-01 22:52:04.000000000 -0400 -+++ linux-2.4.21/mm/memory.c 2005-06-01 23:13:59.371592240 -0400 -@@ -42,6 +42,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -632,6 +633,7 @@ - BUG_ON(address >= end); - - spin_lock(&mm->page_table_lock); -+ coproc_invalidate_range (mm, address, end); - flush_cache_range(vma, start, end); - tlb = tlb_gather_mmu(vma); - -@@ -1302,6 +1304,7 @@ - BUG(); - - spin_lock(&mm->page_table_lock); -+ coproc_invalidate_range (mm, beg, end); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); - error = -ENOMEM; -@@ -1313,6 +1316,7 @@ - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); -+ coproc_update_range(mm, beg, end); - spin_unlock(&mm->page_table_lock); - flush_tlb_range(vma, beg, end); - return error; -@@ -1391,6 +1395,7 @@ - BUG(); - - spin_lock(&mm->page_table_lock); -+ coproc_invalidate_range(mm, beg, end); - do { - pmd_t *pmd = pmd_alloc(mm, dir, from); - error = -ENOMEM; -@@ -1402,6 +1407,7 @@ - from = (from + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (from && (from < end)); -+ coproc_update_range(mm, beg, end); - spin_unlock(&mm->page_table_lock); - flush_tlb_range(vma, beg, end); - return error; -@@ -1497,8 +1503,10 @@ - unlock_page(old_page); - flush_cache_page(vma, address); - entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)), vma); -+ coproc_invalidate_page(vma, address); - establish_pte(vma, address, page_table, entry); - pte_unmap(page_table); -+ coproc_update_page(vma, address); - spin_unlock(&mm->page_table_lock); - return 1; /* Minor fault */ - } -@@ -1528,6 +1536,7 @@ - if (PageReserved(old_page)) - ++mm->rss; - page_remove_rmap(old_page, page_table); -+ coproc_invalidate_page(vma, address); - break_cow(vma, new_page, address, page_table); - pte_chain = page_add_rmap(new_page, page_table, pte_chain); - lru_cache_add(new_page); -@@ -1536,6 +1545,7 @@ - new_page = old_page; - } - pte_unmap(page_table); -+ coproc_update_page(vma, address); - spin_unlock(&mm->page_table_lock); - if (old_page_locked) - unlock_page(old_page); -@@ -1748,6 +1758,7 @@ - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, address, pte); - pte_unmap(page_table); -+ coproc_update_page(vma, address); - spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); - return ret; -@@ -1804,6 +1815,7 @@ - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, entry); - pte_unmap(page_table); -+ coproc_update_page(vma, addr); - spin_unlock(&mm->page_table_lock); - ret = 1; /* Minor fault */ - goto out; -@@ -1902,6 +1914,7 @@ - - /* no need to invalidate: a not-present page shouldn't be cached */ - update_mmu_cache(vma, address, entry); -+ coproc_update_page(vma, address); - spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); - return 2; /* Major fault */ -@@ -1958,8 +1971,10 @@ - entry = pte_mkdirty(entry); - } - entry = pte_mkyoung(entry); -+ coproc_invalidate_page(vma, address); - establish_pte(vma, address, pte, entry); - pte_unmap(pte); -+ coproc_update_page(vma, address); - spin_unlock(&mm->page_table_lock); - return 1; - } -Index: linux-2.4.21/mm/mmap.c -=================================================================== ---- linux-2.4.21.orig/mm/mmap.c 2005-06-01 22:51:50.000000000 -0400 -+++ linux-2.4.21/mm/mmap.c 2005-06-01 23:12:54.767413568 -0400 -@@ -30,6 +30,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1450,6 +1451,7 @@ - release_segments(mm); - - spin_lock(&mm->page_table_lock); -+ coproc_release(mm); - mpnt = mm->mmap; - mm->mmap = mm->mmap_cache = NULL; - mm->mm_rb = RB_ROOT; -Index: linux-2.4.21/mm/mprotect.c -=================================================================== ---- linux-2.4.21.orig/mm/mprotect.c 2005-06-01 22:51:50.000000000 -0400 -+++ linux-2.4.21/mm/mprotect.c 2005-06-01 23:12:54.767413568 -0400 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -106,6 +107,7 @@ - if (start >= end) - BUG(); - spin_lock(¤t->mm->page_table_lock); -+ coproc_change_protection (current->mm, start, end, newprot); - do { - change_pmd_range(vma, dir, start, end - start, newprot); - start = (start + PGDIR_SIZE) & PGDIR_MASK; -Index: linux-2.4.21/mm/mremap.c -=================================================================== ---- linux-2.4.21.orig/mm/mremap.c 2005-06-01 22:51:50.000000000 -0400 -+++ linux-2.4.21/mm/mremap.c 2005-06-01 23:12:54.768413416 -0400 -@@ -26,6 +26,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -160,7 +161,10 @@ - unsigned long new_addr, unsigned long old_addr, unsigned long len) - { - unsigned long offset = len; -+ struct mm_struct *mm = vma->vm_mm; - -+ coproc_invalidate_range(mm, old_addr, old_addr+len); -+ coproc_invalidate_range(mm, new_addr, new_addr+len); - flush_cache_range(vma, old_addr, old_addr + len); - - /* -Index: linux-2.4.21/mm/rmap.c -=================================================================== ---- linux-2.4.21.orig/mm/rmap.c 2005-06-01 22:51:50.000000000 -0400 -+++ linux-2.4.21/mm/rmap.c 2005-06-01 23:12:54.768413416 -0400 -@@ -26,6 +26,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -449,6 +450,7 @@ - } - - /* Nuke the page table entry. */ -+ coproc_invalidate_page(vma, address); - pte = vm_ptep_get_and_clear(vma, address, ptep); - flush_tlb_page(vma, address); - flush_cache_page(vma, address); diff --git a/lustre/kernel_patches/patches/qsnet-rhel4-2.6.patch b/lustre/kernel_patches/patches/qsnet-rhel4-2.6.patch index a17f058..b4a8280 100644 --- a/lustre/kernel_patches/patches/qsnet-rhel4-2.6.patch +++ b/lustre/kernel_patches/patches/qsnet-rhel4-2.6.patch @@ -1,7 +1,79 @@ -diff -urN clean/arch/i386/defconfig linux-2.6.9/arch/i386/defconfig ---- clean/arch/i386/defconfig 2004-10-18 17:54:38.000000000 -0400 -+++ linux-2.6.9/arch/i386/defconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -119,6 +119,8 @@ +Index: linux-269-5502/fs/open.c +=================================================================== +--- linux-269-5502.orig/fs/open.c ++++ linux-269-5502/fs/open.c +@@ -1029,6 +1029,8 @@ out_error: + goto out; + } + ++EXPORT_SYMBOL(sys_open); ++ + #ifndef __alpha__ + + /* +Index: linux-269-5502/fs/read_write.c +=================================================================== +--- linux-269-5502.orig/fs/read_write.c ++++ linux-269-5502/fs/read_write.c +@@ -145,6 +145,7 @@ asmlinkage off_t sys_lseek(unsigned int + bad: + return retval; + } ++EXPORT_SYMBOL(sys_lseek); + + #ifdef __ARCH_WANT_SYS_LLSEEK + asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, +Index: linux-269-5502/fs/select.c +=================================================================== +--- linux-269-5502.orig/fs/select.c ++++ linux-269-5502/fs/select.c +@@ -539,3 +539,4 @@ out_fds: + poll_freewait(&table); + return err; + } ++EXPORT_SYMBOL_GPL(sys_poll); +Index: linux-269-5502/fs/exec.c +=================================================================== +--- linux-269-5502.orig/fs/exec.c ++++ linux-269-5502/fs/exec.c +@@ -56,6 +56,8 @@ + #include + #endif + ++#include ++ + int core_uses_pid; + char core_pattern[65] = "core"; + int suid_dumpable = 0; +@@ -1214,6 +1216,9 @@ int do_execve(char * filename, + if (retval < 0) + goto out; + ++ /* notify any ptrack callbacks of the process exec */ ++ ptrack_call_callbacks(PTRACK_PHASE_EXEC, NULL); ++ + retval = search_binary_handler(bprm,regs); + if (retval >= 0) { + free_arg_pages(bprm); +Index: linux-269-5502/arch/i386/Kconfig +=================================================================== +--- linux-269-5502.orig/arch/i386/Kconfig ++++ linux-269-5502/arch/i386/Kconfig +@@ -960,6 +960,9 @@ config REGPARM + generate incorrect output with certain kernel constructs when + -mregparm=3 is used. + ++source "mm/Kconfig" ++source "kernel/Kconfig" ++ + endmenu + + +Index: linux-269-5502/arch/i386/defconfig +=================================================================== +--- linux-269-5502.orig/arch/i386/defconfig ++++ linux-269-5502/arch/i386/defconfig +@@ -119,6 +119,8 @@ CONFIG_MTRR=y CONFIG_IRQBALANCE=y CONFIG_HAVE_DEC_LOCK=y # CONFIG_REGPARM is not set @@ -10,23 +82,25 @@ diff -urN clean/arch/i386/defconfig linux-2.6.9/arch/i386/defconfig # # Power management options (ACPI, APM) -diff -urN clean/arch/i386/Kconfig linux-2.6.9/arch/i386/Kconfig ---- clean/arch/i386/Kconfig 2005-05-13 13:39:03.000000000 -0400 -+++ linux-2.6.9/arch/i386/Kconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -946,6 +946,9 @@ - support. As of this writing the exact hardware interface is - strongly in flux, so no good recommendation can be made. +Index: linux-269-5502/arch/ia64/Kconfig +=================================================================== +--- linux-269-5502.orig/arch/ia64/Kconfig ++++ linux-269-5502/arch/ia64/Kconfig +@@ -316,6 +316,9 @@ config IA64_PALINFO + To use this option, you have to ensure that the "/proc file system + support" (CONFIG_PROC_FS) is enabled, too. +source "mm/Kconfig" +source "kernel/Kconfig" -+ - endmenu - ++ + source "drivers/firmware/Kconfig" -diff -urN clean/arch/ia64/defconfig linux-2.6.9/arch/ia64/defconfig ---- clean/arch/ia64/defconfig 2004-10-18 17:53:12.000000000 -0400 -+++ linux-2.6.9/arch/ia64/defconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -83,6 +83,8 @@ + source "fs/Kconfig.binfmt" +Index: linux-269-5502/arch/ia64/defconfig +=================================================================== +--- linux-269-5502.orig/arch/ia64/defconfig ++++ linux-269-5502/arch/ia64/defconfig +@@ -83,6 +83,8 @@ CONFIG_IA32_SUPPORT=y CONFIG_COMPAT=y CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y @@ -35,23 +109,25 @@ diff -urN clean/arch/ia64/defconfig linux-2.6.9/arch/ia64/defconfig # # Firmware Drivers -diff -urN clean/arch/ia64/Kconfig linux-2.6.9/arch/ia64/Kconfig ---- clean/arch/ia64/Kconfig 2005-05-13 13:39:00.000000000 -0400 -+++ linux-2.6.9/arch/ia64/Kconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -299,6 +299,9 @@ - To use this option, you have to ensure that the "/proc file system - support" (CONFIG_PROC_FS) is enabled, too. +Index: linux-269-5502/arch/x86_64/Kconfig +=================================================================== +--- linux-269-5502.orig/arch/x86_64/Kconfig ++++ linux-269-5502/arch/x86_64/Kconfig +@@ -401,6 +401,9 @@ config X86_MCE_AMD + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. +source "mm/Kconfig" +source "kernel/Kconfig" + - source "drivers/firmware/Kconfig" + endmenu - source "fs/Kconfig.binfmt" -diff -urN clean/arch/x86_64/defconfig linux-2.6.9/arch/x86_64/defconfig ---- clean/arch/x86_64/defconfig 2004-10-18 17:54:39.000000000 -0400 -+++ linux-2.6.9/arch/x86_64/defconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -87,6 +87,8 @@ + +Index: linux-269-5502/arch/x86_64/defconfig +=================================================================== +--- linux-269-5502.orig/arch/x86_64/defconfig ++++ linux-269-5502/arch/x86_64/defconfig +@@ -87,6 +87,8 @@ CONFIG_NR_CPUS=8 CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y @@ -60,106 +136,357 @@ diff -urN clean/arch/x86_64/defconfig linux-2.6.9/arch/x86_64/defconfig # # Power management options -diff -urN clean/arch/x86_64/Kconfig linux-2.6.9/arch/x86_64/Kconfig ---- clean/arch/x86_64/Kconfig 2005-05-13 13:39:03.000000000 -0400 -+++ linux-2.6.9/arch/x86_64/Kconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -327,6 +327,9 @@ - machine check error logs. See - ftp://ftp.x86-64.org/pub/linux/tools/mcelog - -+source "mm/Kconfig" -+source "kernel/Kconfig" -+ - endmenu - - -diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc.txt ---- clean/Documentation/vm/ioproc.txt 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/Documentation/vm/ioproc.txt 2005-10-10 17:47:17.000000000 -0400 -@@ -0,0 +1,468 @@ -+Linux IOPROC patch overview -+=========================== +Index: linux-269-5502/kernel/ptrack.c +=================================================================== +--- /dev/null ++++ linux-269-5502/kernel/ptrack.c +@@ -0,0 +1,145 @@ ++/* ++ * Copyright (C) 2000 Regents of the University of California ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * Derived from exit_actn.c by ++ * Copyright (C) 2003 Quadrics Ltd. ++ */ + -+The network interface for an HPC network differs significantly from -+network interfaces for traditional IP networks. HPC networks tend to -+be used directly from user processes and perform large RDMA transfers -+between theses processes address space. They also have a requirement -+for low latency communication, and typically achieve this by OS bypass -+techniques. This then requires a different model to traditional -+interconnects, in that a process may need to expose a large amount of -+it's address space to the network RDMA. + -+Locking down of memory has been a common mechanism for performing -+this, together with a pin-down cache implemented in user -+libraries. The disadvantage of this method is that large portions of -+the physical memory can be locked down for a single process, even if -+it's working set changes over the different phases of it's -+execution. This leads to inefficient memory utilisation - akin to the -+disadvantage of swapping compared to paging. ++#include ++#include ++#include ++#include ++#include ++#include + -+This model also has problems where memory is being dynamically -+allocated and freed, since the pin down cache is unaware that memory -+may have been released by a call to munmap() and so it will still be -+locking down the now unused pages. ++#include + -+Some modern HPC network interfaces implement their own MMU and are -+able to handle a translation fault during a network access. The -+Quadrics (http://www.quadrics.com) devices (Elan3 and Elan4) have done -+this for some time and we expect others to follow the same route in -+the relatively near future. These NICs are able to operate in an -+environment where paging occurs and do not require memory to be locked -+down. The advantage of this is that the user process can expose large -+portions of it's address space without having to worry about physical -+memory constraints. ++int ++ptrack_register (ptrack_callback_t callback, void *arg) ++{ ++ struct ptrack_desc *desc = kmalloc (sizeof (struct ptrack_desc), GFP_KERNEL); ++ ++ if (desc == NULL) ++ return -ENOMEM; + -+However should the operating system decide to swap a page to disk, -+then the NIC must be made aware that it should no longer read/write -+from this memory, but should generate a translation fault instead. ++ desc->callback = callback; ++ desc->arg = arg; ++ ++ list_add_tail (&desc->link, ¤t->ptrack_list); ++ ++ return 0; ++} + -+The ioproc patch has been developed to provide a mechanism whereby the -+device driver for a NIC can be aware of when a user process's address -+translations change, either by paging or by explicitly mapping or -+unmapping memory. ++void ++ptrack_deregister (ptrack_callback_t callback, void *arg) ++{ ++ struct list_head *el, *nel; ++ ++ list_for_each_safe (el, nel, ¤t->ptrack_list) { ++ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); ++ ++ if (desc->callback == callback && desc->arg == arg) { ++ list_del (&desc->link); ++ kfree (desc); ++ } ++ } ++} + -+The patch involves inserting callbacks where translations are being -+invalidated to notify the NIC that the memory behind those -+translations is no longer visible to the application (and so should -+not be visible to the NIC). This callback is then responsible for -+ensuring that the NIC will not access the physical memory that was -+being mapped. ++int ++ptrack_registered (ptrack_callback_t callback, void *arg) ++{ ++ struct list_head *el; ++ ++ list_for_each (el, ¤t->ptrack_list) { ++ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); ++ ++ if (desc->callback == callback && desc->arg == arg) ++ return 1; ++ } ++ return 0; ++} ++ ++int ++ptrack_call_callbacks (int phase, struct task_struct *child) ++{ ++ struct list_head *el, *nel; ++ struct ptrack_desc *new; ++ int res; + -+An ioproc invalidate callback in the kswapd code could be utilised to -+prevent memory from being paged out if the NIC is unable to support -+network page faulting. ++ if (phase == PTRACK_PHASE_CLONE) ++ INIT_LIST_HEAD (&child->ptrack_list); + -+For NICs which support network page faulting, there is no requirement -+for a user level pin down cache, since they are able to page-in their -+translations on the first communication using a buffer. However this -+is likely to be inefficient, resulting in slow first use of the -+buffer. If the communication buffers were continually allocated and -+freed using mmap based malloc() calls then this would lead to all -+communications being slower than desirable. ++ list_for_each_safe (el, nel, ¤t->ptrack_list) { ++ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); ++ ++ res = desc->callback (desc->arg, phase, child); ++ ++ switch (phase) ++ { ++ case PTRACK_PHASE_EXIT: ++ list_del (&desc->link); ++ kfree (desc); ++ break; ++ ++ case PTRACK_PHASE_CLONE: ++ switch (res) ++ { ++ case PTRACK_FINISHED: ++ break; + -+To optimise these warm-up cases the ioproc patch adds calls to -+ioproc_update wherever the kernel is creating translations for a user -+process. These then allows the device driver to preload translations -+so that they are already present for the first network communication -+from a buffer. ++ case PTRACK_INNHERIT: ++ if ((new = kmalloc (sizeof (struct ptrack_desc), GFP_ATOMIC)) == NULL) ++ { ++ /* allocation failed - notify that this process is not going ++ * to be started by signalling clone failure. ++ */ ++ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child); ++ ++ goto failed; ++ } + -+Linux 2.6 IOPROC implementation details -+======================================= ++ new->callback = desc->callback; ++ new->arg = desc->arg; ++ ++ list_add_tail (&new->link, &child->ptrack_list); ++ break; + -+The Linux IOPROC patch adds hooks to the Linux VM code whenever page -+table entries are being created and/or invalidated. IOPROC device -+drivers can register their interest in being informed of such changes -+by registering an ioproc_ops structure which is defined as follows; ++ case PTRACK_DENIED: ++ goto failed; ++ } ++ break; ++ } ++ } + -+extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip); -+extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip); ++ return 0; + -+typedef struct ioproc_ops { -+ struct ioproc_ops *next; -+ void *arg; ++ failed: ++ while (! list_empty (&child->ptrack_list)) ++ { ++ struct ptrack_desc *desc = list_entry (child->ptrack_list.next, struct ptrack_desc, link); ++ ++ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child); ++ ++ list_del (&desc->link); ++ kfree (desc); ++ } ++ return 1; ++} ++EXPORT_SYMBOL(ptrack_register); ++EXPORT_SYMBOL(ptrack_deregister); ++EXPORT_SYMBOL(ptrack_registered); +Index: linux-269-5502/kernel/signal.c +=================================================================== +--- linux-269-5502.orig/kernel/signal.c ++++ linux-269-5502/kernel/signal.c +@@ -2329,6 +2329,7 @@ sys_kill(int pid, int sig) + + return kill_something_info(sig, &info, pid); + } ++EXPORT_SYMBOL_GPL(sys_kill); + + /** + * sys_tgkill - send signal to one specific thread +Index: linux-269-5502/kernel/Kconfig +=================================================================== +--- /dev/null ++++ linux-269-5502/kernel/Kconfig +@@ -0,0 +1,14 @@ ++# ++# Kernel subsystem specific config ++# ++ ++# Support for Process Tracking callbacks ++# ++config PTRACK ++ bool "Enable PTRACK process tracking hooks" ++ default y ++ help ++ This option enables hooks to be called when processes are ++ created and destoryed in order for a resource management ++ system to know which processes are a member of a "job" and ++ to be able to clean up when the job is terminated. +Index: linux-269-5502/kernel/Makefile +=================================================================== +--- linux-269-5502.orig/kernel/Makefile ++++ linux-269-5502/kernel/Makefile +@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_PTRACK) += ptrack.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +Index: linux-269-5502/kernel/exit.c +=================================================================== +--- linux-269-5502.orig/kernel/exit.c ++++ linux-269-5502/kernel/exit.c +@@ -32,6 +32,8 @@ + #include + #include + ++#include ++ + extern void sem_exit (void); + extern struct task_struct *child_reaper; + +@@ -825,6 +827,9 @@ asmlinkage NORET_TYPE void do_exit(long + current->tux_exit(); + } + ++ /* Notify any ptrack callbacks of the process exit */ ++ ptrack_call_callbacks(PTRACK_PHASE_EXIT, NULL); ++ + if (unlikely(tsk->audit_context)) + audit_free(tsk); + __exit_mm(tsk); +Index: linux-269-5502/kernel/fork.c +=================================================================== +--- linux-269-5502.orig/kernel/fork.c ++++ linux-269-5502/kernel/fork.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -443,6 +444,9 @@ static struct mm_struct * mm_init(struct + mm->page_table_lock = SPIN_LOCK_UNLOCKED; + mm->ioctx_list_lock = RW_LOCK_UNLOCKED; + mm->ioctx_list = NULL; ++#ifdef CONFIG_IOPROC ++ mm->ioproc_ops = NULL; ++#endif + mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); + mm->free_area_cache = TASK_UNMAPPED_BASE; + +@@ -1312,6 +1316,11 @@ long do_fork(unsigned long clone_flags, + set_tsk_thread_flag(p, TIF_SIGPENDING); + } + ++ if (ptrack_call_callbacks(PTRACK_PHASE_CLONE, p)) { ++ sigaddset(&p->pending.signal, SIGKILL); ++ set_tsk_thread_flag(p, TIF_SIGPENDING); ++ } ++ + if (!(clone_flags & CLONE_STOPPED)) + wake_up_new_task(p, clone_flags); + else +Index: linux-269-5502/Makefile +=================================================================== +--- linux-269-5502.orig/Makefile ++++ linux-269-5502/Makefile +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 9 +-EXTRAVERSION = -prep ++EXTRAVERSION = -prep.qp3.5.34.4qsnet + RHEL_VERSION = 4 + RHEL_UPDATE = 5 + NAME=AC 1 +Index: linux-269-5502/Documentation/vm/ioproc.txt +=================================================================== +--- /dev/null ++++ linux-269-5502/Documentation/vm/ioproc.txt +@@ -0,0 +1,467 @@ ++Linux IOPROC patch overview ++=========================== ++ ++The network interface for an HPC network differs significantly from ++network interfaces for traditional IP networks. HPC networks tend to ++be used directly from user processes and perform large RDMA transfers ++between theses processes address space. They also have a requirement ++for low latency communication, and typically achieve this by OS bypass ++techniques. This then requires a different model to traditional ++interconnects, in that a process may need to expose a large amount of ++it's address space to the network RDMA. ++ ++Locking down of memory has been a common mechanism for performing ++this, together with a pin-down cache implemented in user ++libraries. The disadvantage of this method is that large portions of ++the physical memory can be locked down for a single process, even if ++it's working set changes over the different phases of it's ++execution. This leads to inefficient memory utilisation - akin to the ++disadvantage of swapping compared to paging. ++ ++This model also has problems where memory is being dynamically ++allocated and freed, since the pin down cache is unaware that memory ++may have been released by a call to munmap() and so it will still be ++locking down the now unused pages. ++ ++Some modern HPC network interfaces implement their own MMU and are ++able to handle a translation fault during a network access. The ++Quadrics (http://www.quadrics.com) devices (Elan3 and Elan4) have done ++this for some time and we expect others to follow the same route in ++the relatively near future. These NICs are able to operate in an ++environment where paging occurs and do not require memory to be locked ++down. The advantage of this is that the user process can expose large ++portions of it's address space without having to worry about physical ++memory constraints. ++ ++However should the operating system decide to swap a page to disk, ++then the NIC must be made aware that it should no longer read/write ++from this memory, but should generate a translation fault instead. ++ ++The ioproc patch has been developed to provide a mechanism whereby the ++device driver for a NIC can be aware of when a user process's address ++translations change, either by paging or by explicitly mapping or ++unmapping memory. ++ ++The patch involves inserting callbacks where translations are being ++invalidated to notify the NIC that the memory behind those ++translations is no longer visible to the application (and so should ++not be visible to the NIC). This callback is then responsible for ++ensuring that the NIC will not access the physical memory that was ++being mapped. ++ ++An ioproc invalidate callback in the kswapd code could be utilised to ++prevent memory from being paged out if the NIC is unable to support ++network page faulting. ++ ++For NICs which support network page faulting, there is no requirement ++for a user level pin down cache, since they are able to page-in their ++translations on the first communication using a buffer. However this ++is likely to be inefficient, resulting in slow first use of the ++buffer. If the communication buffers were continually allocated and ++freed using mmap based malloc() calls then this would lead to all ++communications being slower than desirable. ++ ++To optimise these warm-up cases the ioproc patch adds calls to ++ioproc_update wherever the kernel is creating translations for a user ++process. These then allows the device driver to preload translations ++so that they are already present for the first network communication ++from a buffer. ++ ++Linux 2.6 IOPROC implementation details ++======================================= ++ ++The Linux IOPROC patch adds hooks to the Linux VM code whenever page ++table entries are being created and/or invalidated. IOPROC device ++drivers can register their interest in being informed of such changes ++by registering an ioproc_ops structure which is defined as follows; ++ ++extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip); ++extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip); ++ ++typedef struct ioproc_ops { ++ struct ioproc_ops *next; ++ void *arg; + + void (*release)(void *arg, struct mm_struct *mm); + void (*sync_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -177,7 +504,7 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +ioproc_register_ops +=================== +This function should be called by the IOPROC device driver to register -+its interest in PTE changes for the process associated with the passed ++it's interest in PTE changes for the process associated with the passed +in mm_struct. + +The ioproc registration is not inherited across fork() and should be @@ -200,7 +527,7 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +ioproc_ops struct +================= +A linked list ioproc_ops structures is hung off the user process -+mm_struct (linux/sched.h). At each hook point in the patched kernel ++mm_struct (linux/sched.h). At each hook point in the patched kernel, +the ioproc patch will call the associated ioproc_ops callback function +pointer in turn for each registered structure. + @@ -209,12 +536,12 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +(e.g. find_pte_map()). These callbacks should not modify the Linux +kernel VM state or PTE entries. + -+The ioproc_ops callback function pointers are defined as follows; ++The ioproc_ops callback function pointers are: + +ioproc_release +============== -+The release hook is called when a program exits and all its vma areas -+are torn down and unmapped. i.e. during exit_mmap(). Before each ++The release hook is called when a program exits and all it's vma areas ++are torn down and unmapped, i.e. during exit_mmap(). Before each +release hook is called the ioproc_ops structure is unlinked from the +mm_struct. + @@ -228,7 +555,7 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +or write by the IOPROC device to the associated pages should cause the +page to be marked as referenced or modified. + -+Called holding the mm->page_table_lock ++Called holding the mm->page_table_lock. + +ioproc_invalidate_[range|page] +============================== @@ -237,7 +564,7 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +kernel. After this call the IOPROC must not access the physical memory +again unless a new translation is loaded. + -+Called holding the mm->page_table_lock ++Called holding the mm->page_table_lock. + +ioproc_update_[range|page] +========================== @@ -247,7 +574,7 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +opportunity to load translations speculatively, which can improve +performance by avoiding device translation faults. + -+Called holding the mm->page_table_lock ++Called holding the mm->page_table_lock. + +ioproc_change_protection +======================== @@ -257,16 +584,16 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +The IOPROC must not be able to write to a read-only page, so if the +permissions are downgraded then it must honour them. If they are +upgraded it can treat this in the same way as the -+ioproc_update_[range|page]() calls ++ioproc_update_[range|page]() calls. + -+Called holding the mm->page_table_lock ++Called holding the mm->page_table_lock. + + +Linux 2.6 IOPROC patch details +============================== + +Here are the specific details of each ioproc hook added to the Linux -+2.6 VM system and the reasons for doing so; ++2.6 VM system and the reasons for doing so: + +++++ FILE + mm/fremap.c @@ -543,96440 +870,381 @@ diff -urN clean/Documentation/vm/ioproc.txt linux-2.6.9/Documentation/vm/ioproc. +ADDED HOOK + ioproc_invalidate_range + -+ -+-- Last update DavidAddison - 17 Aug 2004 -diff -urN clean/drivers/net/qsnet/eip/eip_linux.c linux-2.6.9/drivers/net/qsnet/eip/eip_linux.c ---- clean/drivers/net/qsnet/eip/eip_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/eip/eip_linux.c 2005-09-07 10:34:58.000000000 -0400 -@@ -0,0 +1,1575 @@ ++-- Last update Daniel J Blueman - 24 Mar 2006 +Index: linux-269-5502/mm/ioproc.c +=================================================================== +--- /dev/null ++++ linux-269-5502/mm/ioproc.c +@@ -0,0 +1,52 @@ +/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file ++ * Copyright (C) 2006 Quadrics Ltd ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + -+#ident "@(#)$Id: eip_linux.c,v 1.96.2.3 2005/09/07 14:34:58 mike Exp $" -+ -+#include -+#include ++/* ++ * Registration for IO processor page table updates. ++ */ + -+#include -+#include -+#include -+#include -+#include -+#include +#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#undef ASSERT -+#include -+#include -+ -+ -+ -+#include -+#include -+ -+#include "eip_linux.h" -+#include "eip_stats.h" -+ -+#ifdef UNUSED -+static void eip_skb_display(struct sk_buff *); -+#endif -+static void eip_iph_display(struct iphdr *); -+#ifdef UNUSED -+static void eip_eiph_display(EIP_HEADER *); -+static void eip_packet_display(unsigned char *); -+#endif -+static void eip_tmd_display(EIP_TMD *); -+static void eip_tmd_head_display(EIP_TMD_HEAD *); -+static void eip_rmd_display(EIP_RMD *); -+static void eip_rmd_head_display(EIP_RMD_HEAD *); -+ -+static void eip_rmd_reclaim(EIP_RMD *); -+ -+static inline EP_NMH *eip_dma_reserve(int, int); -+static inline void __eip_tmd_load(EIP_TMD *, EP_RAILMASK *); -+static inline void __eip_tmd_unload(EIP_TMD *); -+static inline unsigned long eip_buff_alloc(int, int); -+static inline void eip_buff_free(unsigned long, int); -+static struct iphdr *eip_ipfrag_get(char *); -+static inline void eip_rmd_free(EIP_RMD *); -+static inline void eip_skb_load(EIP_RMD *); -+static inline void eip_skb_unload(EIP_RMD *); -+static inline void eip_rmd_requeue(EIP_RMD *); -+static EIP_RMD *eip_rmd_alloc(int, int); -+static int eip_rmd_alloc_replace(EIP_RMD *, int, int); -+static int eip_rmd_alloc_queue(int, int, int, int); -+static int eip_rmds_alloc(void); -+static void eip_rxhandler(EP_RXD *); -+static void eip_rx_tasklet(unsigned long); -+static inline void eip_tmd_init(EIP_TMD *, unsigned long, EIP_TMD_HEAD *, unsigned long, int); -+static inline EIP_TMD *eip_tmd_get(int); -+static inline void eip_tmd_put(EIP_TMD *); -+static inline void eip_tmd_load(EIP_TMD *); -+static inline void eip_tmd_unload(EIP_TMD *); -+static inline EIP_TMD *eip_tmd_alloc_queue(EIP_TMD *, EIP_TMD_HEAD *, int); -+static inline EIP_TMD *eip_tmd_alloc_queue_copybreak(EIP_TMD_HEAD *, int); -+static inline EIP_TMD *eip_tmd_alloc_queue_aggreg(EIP_TMD_HEAD *, int); -+static int eip_tmds_alloc(void); -+int eip_hard_start_xmit(struct sk_buff *, struct net_device *); -+static inline int eip_do_xmit(EIP_TMD *, EP_NMD *i, EP_PAYLOAD *); -+static void eip_txhandler(EP_TXD *, void *, EP_STATUS); -+static void eip_tx_tasklet(unsigned long); -+void eip_stop_queue(void); -+void eip_start_queue(void); -+static int eip_open(struct net_device *); -+static int eip_close(struct net_device *); -+static struct net_device_stats *eip_get_stats(struct net_device *); -+static int eip_change_mtu(struct net_device *, int); -+ -+static int eip_rx_dropping = 0; -+static int eip_rx_tasklet_locked = 1; -+ -+/* Global */ -+struct timer_list eip_rx_tasklet_timer; -+ -+EIP_RX *eip_rx = NULL; -+EIP_TX *eip_tx = NULL; -+int eip_checksum_state=CHECKSUM_NONE; ++#include + -+int tmd_max = EIP_TMD_MAX_NR; -+int rmd_max = EIP_RMD_MAX_NR; -+int rx_envelope_nr = EIP_RX_ENVELOPE_NR; -+int rx_granularity = EIP_RX_GRANULARITY; -+int tx_copybreak_max = EIP_TX_COPYBREAK_MAX; -+EP_RAILMASK tx_railmask = EP_RAILMASK_ALL; -+int eipdebug = 0; ++#include ++#include + -+#ifdef UNUSED -+static void eip_skb_display(struct sk_buff *skb) -+{ -+ if (skb) { -+ __EIP_DBG_PRINTF("SKB [%p] : len %d truesize %d proto %x pkt type %x cloned %d users %d summed %d\n", -+ skb, skb->len, skb->truesize, skb->protocol, skb->pkt_type, skb->cloned, atomic_read(&skb->users), skb->ip_summed); -+ __EIP_DBG_PRINTF("SKB [%p] : skb_shinfo dataref %d nr_frags %d frag_list[%p] (device %p)\n", skb, -+ atomic_read(&skb_shinfo(skb)->dataref), skb_shinfo(skb)->nr_frags, skb_shinfo(skb)->frag_list, skb->dev); -+ __EIP_DBG_PRINTF("SKB [%p] : head[%p] data[%p] tail [%p] end [%p] data_len [%d]\n", skb, skb->head, skb->data, -+ skb->tail, skb->end, skb->data_len); -+ __EIP_DBG_PRINTF("SKB [%p] : Transport Layer h.(th, uh, icmph, raw)[%p]\n", skb, skb->h.th); -+ __EIP_DBG_PRINTF("SKB [%p] : Network Layer nh.(iph, arph, raw)[%p]\n", skb, skb->nh.iph); -+ __EIP_DBG_PRINTF("SKB [%p] : Link Layer mac.(ethernet, raw)[%p]\n", skb, skb->mac.ethernet); -+ return; -+ } -+ EIP_ERR_PRINTF("SKB IS NULL - NO SKB TO DISPLAY\n"); -+} -+#endif -+static void eip_iph_display(struct iphdr *iph) -+{ -+ if (iph) { -+ __EIP_DBG_PRINTF("IPH [%p] : version %d header len %d TOS 0x%x Total len %d\n", -+ iph, iph->version, iph->ihl, htons(iph->tos), htons(iph->tot_len)); -+ __EIP_DBG_PRINTF("IPH [%p] : id %d frag flags 0x%x offset %d\n", -+ iph, htons(iph->id), (iph->frag_off & htons(IP_CE | IP_DF | IP_MF)) >> 4, -+ (htons(iph->frag_off) << 3) & IP_OFFSET); -+ __EIP_DBG_PRINTF("IPH [%p] : TTL %d proto %d header checksum 0x%x\n", iph, iph->ttl, iph->protocol, iph->check); -+ __EIP_DBG_PRINTF("IPH [%p] : IP src %u.%u.%u.%u dest %u.%u.%u.%u\n", iph, -+ ((unsigned char *)&(iph->saddr))[0],((unsigned char *)&(iph->saddr))[1], ((unsigned char *)&(iph->saddr))[2],((unsigned char *)&(iph->saddr))[3], -+ ((unsigned char *)&(iph->daddr))[0],((unsigned char *)&(iph->daddr))[1], ((unsigned char *)&(iph->daddr))[2],((unsigned char *)&(iph->daddr))[3]); -+ return; -+ } -+ EIP_ERR_PRINTF("IPH IS NULL - NO IPH TO DISPLAY\n"); -+} -+#ifdef UNUSED -+static void eip_eiph_display(EIP_HEADER * eiph) -+{ -+ if (eiph) { -+ __EIP_DBG_PRINTF("EIPH [%p] : dhost %04x.%04x.%04x sap %x\n", eiph, eiph->h_dhost.ip_bcast, eiph->h_dhost.ip_inst, -+ eiph->h_dhost.ip_addr, eiph->h_sap); -+ __EIP_DBG_PRINTF("EIPH [%p] : shost %04x.%04x.%04x \n", eiph, eiph->h_shost.ip_bcast, eiph->h_shost.ip_inst, -+ eiph->h_shost.ip_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("EIPH IS NULL - NO EIPH TO DISPLAY\n"); -+} -+static void eip_packet_display(unsigned char *data) -+{ -+ eip_eiph_display((EIP_HEADER *) data); -+ eip_iph_display((struct iphdr *) (data + EIP_HEADER_PAD + ETH_HLEN)); -+} -+#endif -+static void eip_tmd_display(EIP_TMD * tmd) -+{ -+ if (tmd) { -+ __EIP_DBG_PRINTF("\t\tTMD [%p] : next[%p] skb[%p] DVMA[%d]\n", tmd, tmd->chain.next, tmd->skb, tmd->dvma_idx); -+ if (tmd->dma_base) -+ __EIP_DBG_PRINTF("TMD [%p] : head[%p] *data 0x%lx\n", tmd, tmd->head, *((unsigned long *) tmd->dma_base)); -+ else -+ __EIP_DBG_PRINTF("TMD [%p] : head[%p] NO DATA !!!\n", tmd, tmd->head); -+ __EIP_DBG_PRINTF("TMD [%p] : DMA(%lx,%d,%d) ebase[%x]\n",tmd, tmd->dma_base, tmd->dma_len, tmd->nmd.nmd_len, -+ tmd->nmd.nmd_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("TMD IS NULL - NO TMD TO DISPLAY\n"); -+ -+} -+static void eip_ipf_display(EIP_IPFRAG * ipf) ++int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip) +{ -+ if (ipf) { -+ __EIP_DBG_PRINTF("IPF[%p] : datagram len %d dma correction %d uts %lx frag_nr %d\n", ipf, ipf->datagram_len, -+ ipf->dma_correction, ipf->timestamp.tv_usec, ipf->frag_nr); -+ eip_tmd_display((EIP_TMD *) ipf); -+ return; -+ } -+ EIP_ERR_PRINTF("IPF IS NULL - NO IPF TO DISPLAY\n"); -+} ++ ip->next = mm->ioproc_ops; ++ mm->ioproc_ops = ip; + -+static void eip_tmd_head_display(EIP_TMD_HEAD * head) -+{ -+ if (head) { -+ __EIP_DBG_PRINTF("TMD HEAD [%p] : handle[%p] tmds[%p] %3.3d/%3.3d/%3.3d\n", head, head->handle, head->tmd, -+ EIP_STAT_QUEUED_GET(&head->stats), EIP_STAT_ALLOC_GET(&head->stats), -+ eip_tx->tmd_max_nr); -+ return; -+ } -+ EIP_ERR_PRINTF("TMD HEAD IS NULL - NO TMD HEAD TO DISPLAY\n"); -+} -+static void eip_rmd_display(EIP_RMD * rmd) -+{ -+ if (rmd) { -+ __EIP_DBG_PRINTF("RMD [%p] : next[%p] rxd[%p] DVMA[%d]\n", rmd, rmd->chain.next, rmd->rxd, rmd->dvma_idx); -+ __EIP_DBG_PRINTF("RMD [%p] : head[%p]\n", rmd, rmd->head); -+ __EIP_DBG_PRINTF("RMD [%p] : ebase[%x]\n", rmd, rmd->nmd.nmd_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("RMD IS NULL - NO RMD TO DISPLAY\n"); -+} -+static void eip_rmd_head_display(EIP_RMD_HEAD * head) -+{ -+ if (head) { -+ __EIP_DBG_PRINTF("RMD HEAD [%p] : rcvr[%p] handle[%p] busy list[%p]\n", head, head->rcvr, head->handle, head->busy_list); -+ __EIP_DBG_PRINTF("RMD HEAD [%p] : %3.3d/%3.3d/%3.3d\n", head, -+ EIP_STAT_QUEUED_GET(&head->stats), EIP_STAT_ALLOC_GET(&head->stats), eip_rx->rmd_max_nr); -+ return; -+ } -+ EIP_ERR_PRINTF("RMD HEAD IS NULL - NO RMD HEAD TO DISPLAY\n"); ++ return 0; +} + -+/* END - DISPLAY FUNCTIONS */ -+static inline EP_NMH *eip_dma_reserve(int pages_nr, int perm) -+{ -+ EP_NMH *handle = ep_dvma_reserve(eip_tx->ep_system, pages_nr, perm); -+ -+ if (handle) -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "HANDLE [%p] %d pages of elan address space reserved\n", -+ handle, pages_nr); -+ else -+ EIP_ERR_PRINTF("cannot reserve %d page(s) of elan address space\n", pages_nr); -+ -+ return handle; -+} ++EXPORT_SYMBOL_GPL(ioproc_register_ops); + -+static inline void __eip_tmd_load(EIP_TMD * tmd, EP_RAILMASK *rmask) ++int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip) +{ -+ EIP_ASSERT(tmd->nmd.nmd_len > 0); -+ -+ ep_dvma_load(eip_tx->ep_system, NULL, (caddr_t) tmd->dma_base, tmd->nmd.nmd_len, tmd->head->handle, -+ tmd->dvma_idx, rmask, &tmd->nmd); -+} ++ struct ioproc_ops **tmp; + -+static inline void __eip_tmd_unload(EIP_TMD * tmd) -+{ -+ EIP_ASSERT(tmd->nmd.nmd_addr && tmd->head->handle); -+ -+ ep_dvma_unload(eip_tx->ep_system, tmd->head->handle, &tmd->nmd); -+ tmd->nmd.nmd_addr = 0; -+} -+static inline unsigned long eip_buff_alloc(int buff_len, int gfp) -+{ -+ unsigned long buff_base = (buff_len < PAGE_SIZE) ? -+ (unsigned long) kmalloc(buff_len, gfp) : -+ __get_dma_pages(gfp, get_order(buff_len)); -+ -+ if (likely(buff_base)) -+ return buff_base; ++ for (tmp = &mm->ioproc_ops; *tmp && *tmp != ip; tmp = &(*tmp)->next) ; ++ if (*tmp) { ++ *tmp = ip->next; ++ return 0; ++ } + -+ EIP_ERR_PRINTF("cannot allocate %db of memory\n", buff_len); -+ return 0; -+} -+static inline void eip_buff_free(unsigned long buff_base, int buff_len) -+{ -+ (buff_len < PAGE_SIZE) ? kfree((void *) buff_base) : -+ free_pages(buff_base, get_order(buff_len)); ++ return -EINVAL; +} -+static struct iphdr *eip_ipfrag_get(char *data) -+{ -+ struct ethhdr *eh = (struct ethhdr *) (data); -+ struct iphdr *iph; -+ -+ if (eh->h_proto == htons(ETH_P_IP)) { -+ iph = (struct iphdr *) ((char *) eh + ETH_HLEN); -+ -+ /* EIP_DBG(eip_iph_display(iph)); */ -+ -+ if ((iph->frag_off & htons(IP_MF | IP_OFFSET))) -+ return iph; -+ } -+ return NULL; -+} -+ -+static inline void eip_rmd_free(EIP_RMD * rmd) -+{ -+ EIP_ASSERT2(rmd->nmd.nmd_addr == 0, eip_rmd_display, rmd); -+ -+ if ( rmd->skb != NULL) -+ kfree_skb (rmd->skb); -+ -+ kfree(rmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_MEMFREE, "RMD [%p] : FREED\n", rmd); -+} -+static inline void eip_skb_load(EIP_RMD * rmd) -+{ -+ EP_RAILMASK rmask = rmd->rxd ? ep_rxd_railmask (rmd->rxd) : 0; -+ -+ EIP_ASSERT(skb_tailroom(rmd->skb) > 0); -+ -+ ep_dvma_load(eip_tx->ep_system, NULL, (caddr_t) rmd->skb->data, skb_tailroom(rmd->skb), rmd->head->handle, -+ rmd->dvma_idx, &rmask, &rmd->nmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_EP_DVMA, "RMD [%p] : LOADED\n", rmd); -+} -+static inline void eip_skb_unload(EIP_RMD * rmd) -+{ -+ EIP_ASSERT(rmd->nmd.nmd_addr && rmd->head->handle); -+ -+ ep_dvma_unload(eip_tx->ep_system, rmd->head->handle, &rmd->nmd); -+ rmd->nmd.nmd_addr = 0; -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_EP_DVMA, "RMD [%p] : UNLOADED\n", rmd); -+} -+static inline void eip_rmd_requeue(EIP_RMD * rmd) -+{ -+ EIP_ASSERT(rmd->rxd); -+ -+ rmd->chain.next = NULL; -+ -+ ep_requeue_receive(rmd->rxd, eip_rxhandler, rmd, &rmd->nmd, EP_NO_ALLOC|EP_NO_SLEEP ); -+ -+ atomic_inc(&rmd->head->stats); -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "RMD [%p] : REQUEUED\n", rmd); -+} -+static EIP_RMD * eip_rmd_alloc(int svc, int gfp) -+{ -+ int buff_len = EIP_SVC_SMALLEST_LEN << svc; -+ EIP_RMD *rmd; -+ struct sk_buff *skb; -+ -+ if (!(skb = alloc_skb((buff_len - EIP_EXTRA), gfp))) -+ return NULL; -+ -+ skb_reserve(skb, 2); -+ -+ if (!(rmd = (EIP_RMD *) kmalloc(buff_len, gfp))) { -+ kfree_skb(skb); -+ return NULL; -+ } -+ -+ rmd->skb = skb; -+ -+ rmd->chain.next = NULL; -+ rmd->rxd = NULL; -+ rmd->head = &eip_rx->head[svc]; -+ -+ return rmd; -+} -+ -+static int eip_rmd_alloc_replace(EIP_RMD *rmd, int svc, int gfp) -+{ -+ struct sk_buff *skb,*old; -+ int buff_len = EIP_SVC_SMALLEST_LEN << svc; -+ -+ if (!(skb = alloc_skb(buff_len, gfp))) -+ return 1; -+ -+ skb_reserve(skb, 2); -+ -+ eip_skb_unload(rmd); -+ -+ old = rmd->skb; -+ rmd->skb = skb; -+ -+ eip_skb_load(rmd); -+ -+ eip_rmd_requeue(rmd); -+ -+ kfree_skb(old); -+ -+ return 0; -+} -+ -+static int eip_rmd_alloc_queue(int svc, int dvma_idx, int gfp, int attr) -+{ -+ EIP_RMD * rmd = eip_rmd_alloc(svc, gfp); -+ -+ if (!rmd) -+ return 1; -+ -+ EIP_STAT_ALLOC_ADD(&rmd->head->stats, 1); -+ -+ rmd->dvma_idx = dvma_idx; -+ eip_skb_load(rmd); -+ -+ EIP_DBG2(EIP_DBG_RMD, eip_rmd_display, rmd, "RMD [%p] : ALLOCATED for SVC 0x%x\n", rmd, svc); -+ -+ if (ep_queue_receive(rmd->head->rcvr, eip_rxhandler, (void *) rmd, &rmd->nmd, attr) == ESUCCESS) { -+ atomic_inc(&rmd->head->stats); -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "RMD [%p] : QUEUED on SVC 0x%x\n", rmd, svc); -+ return 0; -+ } -+ -+ EIP_ERR_PRINTF("RMD [%p] : couldn't be QUEUED on SVC 0x%x\n", rmd, svc); -+ -+ EIP_STAT_ALLOC_SUB(&rmd->head->stats, 1); -+ -+ eip_skb_unload(rmd); -+ eip_rmd_free(rmd); -+ -+ return 1; -+} -+ -+static int eip_rmds_alloc(void) -+{ -+ int idx, svc; -+ -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ eip_rx->head[svc].rcvr = ep_alloc_rcvr(eip_tx->ep_system, EIP_SVC_EP(svc), rx_envelope_nr); -+ if (!eip_rx->head[svc].rcvr) { -+ EIP_ERR_PRINTF("Cannot install receiver for SVC 0x%x - maybe cable is disconnected\n", svc); -+ return -EAGAIN; -+ } -+ -+ eip_rx->head[svc].handle = -+ eip_dma_reserve(EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)) * eip_rx->rmd_max_nr, -+ EP_PERM_WRITE); -+ if (!eip_rx->head[svc].handle) -+ return -ENOMEM; -+ -+ EIP_DBG(EIP_DBG_RMD_HEAD, eip_rmd_head_display, &eip_rx->head[svc]); -+ -+ for (idx = 0; idx < EIP_RMD_NR; idx++) { -+ if (eip_rmd_alloc_queue(svc, idx * EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)), -+ GFP_KERNEL, EP_NO_SLEEP)) -+ return -ENOMEM; -+ } -+ } -+ return 0; -+} -+static void eip_rmds_free(void) -+{ -+ unsigned long flags; -+ EIP_RMD *rmd; -+ int svc; -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ rmd = eip_rx->irq_list; -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ eip_rmd_reclaim(rmd); -+ -+ for (svc = 0; svc < EIP_SVC_NR ; svc++) { -+ -+ while ((rmd = eip_rx->head[svc].busy_list)) { -+ eip_rx->head[svc].busy_list = NULL; -+ eip_rmd_reclaim(rmd); -+ if (eip_rx->head[svc].busy_list) { -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "Still RMD [%p] on BUSY list SVC 0x%d - Scheduling\n", rmd, svc); -+ schedule(); -+ } -+ } -+ -+ EIP_ASSERT(EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats) == EIP_STAT_ALLOC_GET(&eip_rx->head[svc].stats)); -+ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "HEAD[%p] : FREEING RCVR [%p]\n", &eip_rx->head[svc], -+ eip_rx->head[svc].rcvr); -+ -+ ep_free_rcvr(eip_rx->head[svc].rcvr); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "HEAD[%p] : RELEASING DVMA [%p]\n", &eip_rx->head[svc], -+ eip_rx->head[svc].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_rx->head[svc].handle); -+ } -+ -+} -+static int eip_rx_queues_low (void) { -+ int svc; -+ for (svc = 0; svc < EIP_SVC_NR; svc++) -+ if (EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats) < EIP_RMD_ALLOC_THRESH) -+ return (1); -+ return (0); -+} -+static void eip_rxhandler(EP_RXD * rxd) -+{ -+ EIP_RMD *rmd = (EIP_RMD *) ep_rxd_arg(rxd); -+ EP_STATUS ret = ep_rxd_status(rxd); -+ EP_PAYLOAD * payload = ep_rxd_payload(rxd); -+ unsigned long data = (unsigned long) rmd->skb->data; -+ int frag_nr = 0; -+ int len; -+ -+ struct sk_buff *skb; -+ static char count = 0; -+ -+ atomic_dec(&rmd->head->stats); -+ rmd->rxd = rxd; -+ -+ if (likely(ret == EP_SUCCESS)) { -+ -+ rmd->head->dma++; -+ -+ if ( eip_rx_dropping) { -+ eip_rmd_requeue(rmd); -+ return; -+ } -+ -+ len = (payload) ? payload->Data[frag_nr++] : ep_rxd_len(rxd); -+ -+ EIP_DBG(EIP_DBG_RMD, eip_rmd_display, rmd); -+ -+again: -+ if ( (skb = skb_clone(rmd->skb, GFP_ATOMIC)) ) { -+ unsigned int off = (data - (unsigned long) rmd->skb->data); -+ -+ /* have to set the length before calling -+ * skb pull as it will not allow you to -+ * pull past the end */ -+ -+ skb_put (skb, off + len); -+ skb_pull (skb, off); -+ -+ skb->protocol = eth_type_trans(skb, eip_rx->net_device); -+ skb->ip_summed = eip_checksum_state; -+ skb->dev = eip_rx->net_device; -+ -+ /* Fabien/David/Mike this is a hack/fix to allow aggrigation of packets to work. -+ * The problem is ip_frag looks at the truesize to see if it is caching too much space. -+ * As we are reusing a large skb (cloned) for a number of small fragments, they appear to take up alot of space. -+ * so ip_frag dropped them after 4 frags (not good). So we lie and set the truesize to just bigger than the data. -+ */ -+ if (payload) -+ skb->truesize = SKB_DATA_ALIGN(skb->len + EIP_HEADER_PAD) +sizeof(struct sk_buff); -+ -+ } -+ if ( (skb) && -+ (netif_rx(skb) != NET_RX_DROP)){ -+ -+ eip_rx->bytes += len; -+ -+ if (payload && payload->Data[frag_nr] ) { -+ data += EIP_IP_ALIGN(len); -+ len = payload->Data[frag_nr++]; -+ goto again; -+ } -+ eip_rx->packets += ++frag_nr; -+ } else if ( (eip_rx->dropped++ % 20) == 0) -+ __EIP_DBG_PRINTK("Packet dropped by the TCP/IP stack - increase /proc/sys/net/core/netdev_max_backlog\n"); -+ } else if (ret == EP_SHUTDOWN ) { -+ EIP_DBG2(EIP_DBG_RMD, eip_rmd_display, rmd, "ABORTING\n"); -+ ep_complete_receive(rxd); -+ eip_skb_unload(rmd); -+ EIP_STAT_ALLOC_SUB(&rmd->head->stats, 1); -+ eip_rmd_free(rmd); -+ return; -+ } else { -+ EP_ENVELOPE *env = ep_rxd_envelope(rxd); -+ EP_NMD *nmd ; -+ -+ EIP_ERR_PRINTF("RMD[%p] : RECEIVE ret = %d\n", rmd, ret); -+ -+ for (len = 0 ; len < env->nFrags ; len++) { -+ nmd = &env->Frags[len]; -+ EIP_ERR_PRINTF("RMD[%p] : ep_frag #%d nmd_addr [%x] nmd_len %d\n", rmd, len, -+ (unsigned int) nmd->nmd_addr, nmd->nmd_len); -+ } -+ eip_rx->errors++; -+ EIP_ASSERT2(atomic_read(&skb_shinfo(rmd->skb)->dataref) == 1, eip_rmd_display, rmd); -+ } -+ -+ /* data is used to store the irq flags */ -+ spin_lock_irqsave(&eip_rx->lock, data); -+ rmd->chain.next = eip_rx->irq_list; -+ eip_rx->irq_list = rmd; -+ eip_rx->irq_list_nr++; -+ spin_unlock_irqrestore(&eip_rx->lock, data); -+ -+ if (((count++ % eip_rx->sysctl_granularity) == 0) /* and either we have passed up a number of them */ -+ || eip_rx_queues_low()) /* or we are low */ -+ tasklet_schedule(&eip_rx->tasklet); -+ else -+ { -+ if ( !timer_pending (&eip_rx_tasklet_timer) ) /* the timer not already set */ -+ mod_timer (&eip_rx_tasklet_timer, lbolt); -+ } -+} -+ -+/* dest ; if the buffer still reference on it mocve the rmd to the dest list */ -+static void eip_rmd_reclaim(EIP_RMD *rmd) -+{ -+ EIP_RMD *rmd_next = rmd; -+ int dataref; -+ -+ while (rmd_next) { -+ rmd = rmd_next; -+ rmd_next = rmd_next->chain.next; -+ -+ dataref = atomic_read(&skb_shinfo(rmd->skb)->dataref); -+ EIP_ASSERT(dataref > 0); -+ -+ if (dataref == 1) { -+ eip_rmd_requeue(rmd); -+ } else { -+ rmd->chain.next = rmd->head->busy_list; -+ rmd->head->busy_list = rmd; -+ } -+ } -+} -+static void eip_rx_tasklet(unsigned long arg) -+{ -+ EIP_RMD *rmd, *rmd_next; -+ unsigned long flags; -+ short svc, queued; -+ int needs_reschedule; -+ -+ if (eip_rx_tasklet_locked) /* we dont want the tasklet to do anything when we are finishing */ -+ return; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ rmd = eip_rx->head[svc].busy_list; -+ eip_rx->head[svc].busy_list = NULL; -+ eip_rmd_reclaim(rmd); -+ } -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ rmd = eip_rx->irq_list; -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ eip_rmd_reclaim(rmd); -+ -+ needs_reschedule = 0; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ /* the plan is : allocate some more if possible or steall some dvma space from those on the EIP_BUSY_LIST */ -+ queued = EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats); -+ -+ EIP_ASSERT(queued >= 0 && queued <= EIP_RMD_MAX_NR); -+ -+ if (queued < EIP_RMD_ALLOC_THRESH) { -+ short allocated = EIP_STAT_ALLOC_GET(&eip_rx->head[svc].stats); -+ short how_many; -+ -+ EIP_ASSERT(allocated >= 0 && allocated <= EIP_RMD_MAX_NR); -+ -+ if (likely(allocated < eip_rx->rmd_max_nr)) { -+ -+ how_many = (((allocated / EIP_RMD_ALLOC_STEP) + 1) * EIP_RMD_ALLOC_STEP); -+ if (how_many > eip_rx->rmd_max_nr) -+ how_many = eip_rx->rmd_max_nr; -+ -+ for (; allocated < how_many && -+ (eip_rmd_alloc_queue(svc, allocated * EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)), -+ GFP_ATOMIC, EP_NO_ALLOC|EP_NO_SLEEP) == 0) ; allocated++); -+ if ( allocated != how_many ) { -+ eip_rx->reschedule++; -+ needs_reschedule = 1; -+ } -+ } else { -+ /* steal how_many rmds and put them on the aside list */ -+ how_many = EIP_RMD_ALLOC_THRESH - queued; -+ -+ EIP_ASSERT(how_many >= 0 && how_many <= EIP_RMD_ALLOC_THRESH); -+ -+ rmd_next = eip_rx->head[svc].busy_list; -+ eip_rx->head[svc].busy_list = NULL; -+ -+ while (how_many-- && rmd_next) { -+ rmd = rmd_next; -+ rmd_next = rmd_next->chain.next; -+ -+ if (eip_rmd_alloc_replace(rmd, svc, GFP_ATOMIC)) { -+ rmd_next = rmd; -+ break; -+ } -+ } -+ eip_rx->head[svc].busy_list = rmd_next; -+ if ( how_many ) -+ needs_reschedule = 1; -+ } -+ } -+ } -+ -+ if (needs_reschedule) -+ { -+ if ( !timer_pending (&eip_rx_tasklet_timer)) -+ mod_timer (&eip_rx_tasklet_timer, lbolt); -+ } -+} -+static void eip_rx_tasklet_resched(unsigned long arg) -+{ -+ tasklet_schedule(&eip_rx->tasklet); -+} -+ -+static inline void eip_tmd_init(EIP_TMD * tmd, unsigned long buff_base, EIP_TMD_HEAD * head, unsigned long buff_len, -+ int dvma_idx) -+{ -+ tmd->dvma_idx = dvma_idx; -+ tmd->dma_base = buff_base; -+ tmd->dma_len = -1; -+ tmd->skb = NULL; -+ tmd->head = head; -+ tmd->chain.next = NULL; -+ -+ if (tmd->head != &eip_tx->head[EIP_TMD_STD]) { -+ tmd->nmd.nmd_len = buff_len; -+ eip_tmd_load(tmd); -+ } else { -+ tmd->nmd.nmd_len = -1; -+ tmd->nmd.nmd_addr = 0; -+ } -+} -+ -+static inline EIP_TMD *eip_tmd_get(int id) -+{ -+ unsigned long flags; -+ EIP_TMD *tmd = NULL; -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ while ((tmd = eip_tx->head[id].tmd) == NULL) { -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ if (ep_enable_txcallbacks(eip_tx->xmtr) == 0) { -+ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ if (eip_tx->head[id].tmd == NULL) { -+ __EIP_DBG_PRINTF("Cannot get a TMD on head %d ... stopping queue\n", id); -+ -+ eip_stop_queue (); -+ -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ return NULL; -+ } -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ } -+ -+ ep_disable_txcallbacks(eip_tx->xmtr); -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ } -+ eip_tx->head[id].tmd = tmd->chain.next; -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ atomic_dec(&tmd->head->stats); -+ return tmd; -+} -+ -+static inline void eip_tmd_put(EIP_TMD * tmd) -+{ -+ unsigned long flags; -+ -+ tmd->skb = NULL; -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ tmd->chain.next = tmd->head->tmd; -+ tmd->head->tmd = tmd; -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ atomic_inc(&tmd->head->stats); -+ -+ eip_start_queue(); -+ -+ EIP_DBG_PRINTF(EIP_DBG_TMD_QUEUE, "TMD [%p] : REQUEUED\n", tmd); -+} -+static inline void eip_tmd_load(EIP_TMD * tmd) -+{ -+ EP_RAILMASK rmask = tx_railmask; -+ -+ __eip_tmd_load(tmd, &rmask); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "TMD [%p] : LOADED\n", tmd); -+} -+static inline void eip_tmd_unload(EIP_TMD * tmd) -+{ -+ __eip_tmd_unload(tmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "TMD [%p] : UNLOADED\n", tmd); -+} -+static inline void eip_tmd_free(EIP_TMD * tmd) -+{ -+ eip_buff_free(tmd->dma_base, tmd->nmd.nmd_len); -+ -+ EIP_DBG_PRINTF(EIP_DBG_MEMFREE, "TMD [%p] : FREED\n", tmd); -+ -+ EIP_STAT_ALLOC_SUB(&tmd->head->stats, 1); -+} -+ -+/* tmd on a separate block */ -+static inline EIP_TMD *eip_tmd_alloc_queue(EIP_TMD * tmd, EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ eip_tmd_init(tmd, 0, head, -1, dvma_idx); -+ -+ eip_tmd_put(tmd); -+ -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+/* tmd on the buffer */ -+static inline EIP_TMD *eip_tmd_alloc_queue_copybreak(EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ EIP_TMD *tmd; -+ unsigned long buff_base; -+ -+ if (!(buff_base = eip_buff_alloc(tx_copybreak_max + sizeof(EIP_TMD), GFP_KERNEL))) -+ return NULL; -+ -+ tmd = (EIP_TMD *) (buff_base + tx_copybreak_max); -+ eip_tmd_init(tmd, buff_base, head, tx_copybreak_max, dvma_idx); -+ -+ eip_tmd_put(tmd); -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+ -+/* ipf are on the buffer */ -+static inline EIP_TMD *eip_tmd_alloc_queue_aggreg(EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ EIP_TMD *tmd; -+ unsigned long buff_base; -+ -+ if (!(buff_base = eip_buff_alloc(EIP_SVC_BIGGEST_LEN, GFP_KERNEL))) -+ return NULL; -+ -+ tmd = (EIP_TMD *) (buff_base + EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG)); -+ eip_tmd_init(tmd, buff_base, head, EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG), dvma_idx); -+ -+ eip_tmd_put(tmd); -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+ -+static int eip_tmds_alloc() -+{ -+ int i; -+ int page_nr; -+ EIP_TMD *tmd; -+ -+ page_nr = EIP_DVMA_PAGES(tx_copybreak_max); -+ -+ eip_tx->head[EIP_TMD_COPYBREAK].handle = eip_dma_reserve(page_nr * eip_tx->tmd_max_nr, EP_PERM_READ); -+ -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_COPYBREAK]); -+ -+ for (i = 0; i < EIP_TMD_NR; i++) { -+ if (!eip_tmd_alloc_queue_copybreak(&eip_tx->head[EIP_TMD_COPYBREAK], i * page_nr)) -+ return -ENOMEM; -+ } -+ -+ eip_tx->head[EIP_TMD_STD].handle = -+ eip_dma_reserve(EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN) * eip_tx->tmd_max_nr, EP_PERM_READ); -+ -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_STD]); -+ -+ tmd = kmalloc(sizeof(EIP_TMD) * EIP_TMD_NR, GFP_KERNEL); -+ if (!tmd) { -+ EIP_ERR_PRINTF("Cannot ALLOCATE %d of tmds\n", (int) sizeof(EIP_TMD) * EIP_TMD_NR); -+ return -ENOMEM; -+ } -+ -+ page_nr = EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN); -+ -+ for (i = 0; i < EIP_TMD_NR; i++, tmd++) { -+ if (!eip_tmd_alloc_queue(tmd, &eip_tx->head[EIP_TMD_STD], i * page_nr)) -+ return -ENOMEM; -+ } -+ -+ page_nr = EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN); -+ -+ eip_tx->head[EIP_TMD_AGGREG].handle = eip_dma_reserve(page_nr * eip_tx->tmd_max_nr, EP_PERM_READ); -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_AGGREG]); -+ -+ for (i = 0; i < EIP_TMD_NR; i++) { -+ if (!eip_tmd_alloc_queue_aggreg(&eip_tx->head[EIP_TMD_AGGREG], i * page_nr)) -+ return -ENOMEM; -+ } -+ return 0; -+} -+ -+static void eip_tmds_free(void) -+{ -+ EIP_TMD *tmd; -+ EIP_TMD *tmd_next; -+ int i; -+ -+ ep_poll_transmits(eip_tx->xmtr); -+ -+ for (i = 0 ; i < 3 ; i++) { -+again: -+ if (EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats) < EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats)) { -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "Polling XMTR [%p]\n", eip_tx->xmtr); -+ ep_poll_transmits(eip_tx->xmtr); -+ goto again; -+ } -+ } -+ /* everything should be queued */ -+ if ((tmd = eip_tx->head[EIP_TMD_COPYBREAK].tmd)) { -+ do { -+ tmd_next = tmd->chain.next; -+ eip_tmd_unload(tmd); -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ -+ eip_tmd_free(tmd); -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "HEAD[EIP_TMD_COPYBREAK] release DVMA [%p]\n", -+ eip_tx->head[EIP_TMD_COPYBREAK].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_COPYBREAK].handle); -+ -+ /* these ones have been allocated as a block */ -+ if ((tmd = eip_tx->head[EIP_TMD_STD].tmd)) { -+ do { -+ if (tmd->dvma_idx == 0 ) { -+ kfree(tmd); -+ /* eip_tmd_free(tmd); */ -+ EIP_STAT_ALLOC_SUB(&tmd->head->stats, EIP_TMD_NR); -+ tmd_next = NULL; -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "TMD HEAD[%p] : [EIP_TMD_STD] BLOCK FREED\n", tmd); -+ } else -+ tmd_next = tmd->chain.next; -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "HEAD[EIP_TMD_STD] release DVMA [%p]\n", -+ eip_tx->head[EIP_TMD_STD].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_STD].handle); -+ -+ if ((tmd = eip_tx->head[EIP_TMD_AGGREG].tmd)) { -+ do { -+ tmd_next = tmd->chain.next; -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ -+ eip_tmd_unload(tmd); -+ eip_tmd_free(tmd); -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "TMD HEAD[%p] : [EIP_TMD_AGGREG] release DVMA\n", -+ eip_tx->head[EIP_TMD_AGGREG].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_AGGREG].handle); -+ -+ ep_free_xmtr(eip_tx->xmtr); -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "XMTR[%p] : FREED\n", eip_tx->xmtr); -+} -+ -+static inline void eip_ipf_skb_add(EIP_IPFRAG * ipf, struct sk_buff *skb) -+{ -+ int align = EIP_IP_ALIGN(skb->len); -+ -+ -+ if (ipf->dma_len == -1) { /* like a virgin; touched for the very first time */ -+ do_gettimeofday(&ipf->timestamp); -+ /* FIXE ME put that in release tmd code */ -+ ipf->frag_nr = 0; -+ ipf->dma_len = 0; -+ ipf->datagram_len = -1; -+ ipf->dma_correction = 0; -+ } -+ -+ memcpy((void *) (ipf->dma_base + ipf->dma_len), skb->data, skb->len); -+ -+ if (ipf->datagram_len == -1) { -+ struct iphdr * iph = skb->nh.iph; -+ int offset = ntohs(iph->frag_off); -+ -+ /* last one ? ; offset & ~IP_OFFSET = IP fragment flags */ -+ if (((offset & ~IP_OFFSET) & IP_MF) == 0) { -+ offset &= IP_OFFSET; -+ offset <<= 3; -+ ipf->datagram_len = offset + htons(iph->tot_len) - sizeof(struct iphdr); -+ } -+ } -+ -+ skb->next = ipf->skb; -+ ipf->skb = skb; -+ ipf->payload.Data[ipf->frag_nr] = skb->len; -+ ipf->dma_len += align; -+ ipf->dma_correction += align - skb->len + ETH_HLEN + sizeof(struct iphdr); -+ /* FIXME ; Count got wrong if ip header has options */ -+ -+ ipf->frag_nr++; -+ -+ EIP_DBG2(EIP_DBG_TMD, eip_ipf_display, ipf, "ADDED skb[%p] len %db ALIGNED(%db)\n", skb, skb->len, EIP_IP_ALIGN(skb->len)); -+} -+ -+#define eip_ipf_hasroom(ipf, skb) ((ipf->dma_len + EIP_IP_ALIGN(skb->len) < eip_tx->sysctl_ipfrag_copybreak)) -+int eip_hard_start_xmit(struct sk_buff *skb, struct net_device *devnet) -+{ -+ -+ EIP_TMD *tmd; -+ EP_NMD nmd; -+ struct iphdr *iph; -+ int j; -+ -+ if (skb->destructor){ -+ atomic_inc(&eip_tx->destructor); -+ tasklet_schedule(&eip_tx->tasklet); -+ } -+ -+ if (!(iph = eip_ipfrag_get(skb->data)) || (eip_tx->sysctl_aggregation == 0)) { /* not ip fragment */ -+no_aggreg: -+ j = (skb->len < eip_tx->sysctl_copybreak) ? EIP_TMD_COPYBREAK : EIP_TMD_STD; /* j = head id */ -+ -+ if (!(tmd = eip_tmd_get(j))) { -+ if (skb->destructor) -+ atomic_dec(&eip_tx->destructor); -+ return 1; -+ } -+ -+ tmd->dma_len = skb->len; -+ tmd->skb = skb; -+ tmd->skb->next = NULL; -+ tmd->chain.next = NULL; -+ -+ if (j == EIP_TMD_COPYBREAK) { -+ memcpy((void *) tmd->dma_base, skb->data, skb->len); -+ -+ ep_nmd_subset(&nmd, &tmd->nmd, 0, skb->len); -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak++; -+#endif -+ return eip_do_xmit(tmd, &nmd, NULL); -+ } -+ tmd->dma_base = (unsigned long) skb->data; -+ tmd->nmd.nmd_len = skb->len; -+ eip_tmd_load(tmd); -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_std++; -+#endif -+ return eip_do_xmit(tmd, &tmd->nmd, NULL); -+ } else if ( skb->len > EIP_SVC_BIGGEST_LEN/2 ) { -+ /* don't aggregate when we have a full mtu of data */ -+ /* or more than 32k ; in this case it is cheaper */ -+ /* to just map the buffer and send it */ -+ goto no_aggreg; -+ } else { -+ EIP_IPFRAG *ipf = NULL; -+ unsigned long flags; -+ struct list_head *l; -+ struct iphdr *iph2; -+ int i; -+ __u16 id = iph->id; -+ __u32 saddr = iph->saddr; -+ __u32 daddr = iph->daddr; -+ __u8 protocol = iph->protocol; -+ -+ EIP_DBG(EIP_DBG_IPH, eip_iph_display, iph); -+ -+ j = 0; -+ -+ /* here we can't have full mtu size aggregated packet */ -+ EIP_ASSERT_RET(skb->len < eip_tx->sysctl_ipfrag_copybreak, 0); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ list_for_each(l, &eip_tx->ipfrag) { -+ ipf = list_entry(l, EIP_IPFRAG, list); -+ iph2 = eip_ipfrag_get((char *) ipf->dma_base); -+ -+ EIP_ASSERT(iph2); -+ -+ if ((iph2->id == id) && -+ (get_unaligned(&iph2->saddr) == saddr) && -+ (get_unaligned(&iph2->daddr) == daddr) && -+ (iph2->protocol == protocol)) { -+ /* || timeout */ -+ if (eip_ipf_hasroom(ipf, skb)) { -+ -+ eip_ipf_skb_add(ipf, skb); -+ -+ if ((ipf->datagram_len != -1) && -+ (ipf->dma_len == (ipf->datagram_len + ipf->dma_correction) || -+ ipf->frag_nr == (128 / sizeof(uint32_t)))) { -+send_aggreg: -+ ipf->payload.Data[ipf->frag_nr] = 0; -+ list_del(&ipf->list); -+ eip_tx->ipfrag_count--; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ -+ ep_nmd_subset(&nmd, &ipf->nmd, 0, ipf->dma_len); -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_aggreg++; -+#endif -+ if ((i = eip_do_xmit((EIP_TMD *) ipf, &nmd, &ipf->payload)) != EP_SUCCESS) -+ return i; -+ if (j) -+ goto new; -+ return 0; -+ } -+ -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ tasklet_schedule(&eip_tx->tasklet); -+ return 0; -+ } else { -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "IPF[%p] : FULL %db full - sending it\n", ipf, ipf->dma_len); -+ j = 1; -+ goto send_aggreg; -+ } -+ } -+ } -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+new: -+ if (!(ipf = (EIP_IPFRAG *) eip_tmd_get(EIP_TMD_AGGREG))) -+ goto no_aggreg; -+ -+ eip_ipf_skb_add(ipf, skb); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ list_add_tail(&ipf->list, &eip_tx->ipfrag); -+ eip_tx->ipfrag_count++; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ tasklet_schedule(&eip_tx->tasklet); -+ } -+ return 0; -+} -+static int eip_do_xmit(EIP_TMD * tmd, EP_NMD *nmd, EP_PAYLOAD *payload) -+{ -+ EIP_HEADER *eiph = (EIP_HEADER *) tmd->dma_base; -+ int attr = EP_SET_DATA((EP_NO_SLEEP | EP_NO_INTERRUPT | EP_NO_FAILOVER), EP_TYPE_SVC_INDICATOR, EP_SVC_EIP); -+ unsigned long flags; -+ int svc, rnum; -+ -+ SIZE_TO_SVC(nmd->nmd_len, svc); -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ /* EIP_DBG(eip_eiph_display(eiph)); */ -+ -+ if (unlikely (eiph->h_dhost.ip_bcast)) -+ rnum = ep_pickRail (EP_NMD_RAILMASK (nmd) & tx_railmask & ep_xmtr_availrails(eip_tx->xmtr)); -+ else -+ rnum = ep_pickRail (EP_NMD_RAILMASK (nmd) & tx_railmask & ep_xmtr_noderails(eip_tx->xmtr, ntohs(eiph->h_dhost.ip_addr))); -+ -+ if (rnum >= 0) -+ attr = EP_SET_PREFRAIL(attr, rnum); -+ -+ /* add to inuse list */ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ list_add_tail (&tmd->chain.link, &eip_tx->inuse); -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ /* ENOMEM EINVAL ECONNREFUSED ESUCCESS */ -+ svc = (unlikely(eiph->h_dhost.ip_bcast)) ? -+ ep_multicast_message(eip_tx->xmtr, -1, -1, NULL, EIP_SVC_EP(svc), attr | EP_NOT_MYSELF, eip_txhandler, tmd, payload, nmd, 1) : -+ -+ ep_transmit_message(eip_tx->xmtr, ntohs(eiph->h_dhost.ip_addr), EIP_SVC_EP(svc), attr, eip_txhandler, tmd, payload, nmd, 1); -+ -+ if (likely(svc == EP_SUCCESS)) -+ return 0; -+ else if (svc == ENOMEM) { -+ EIP_ERR_PRINTF("%s", "Memory allocation error ...\n"); -+ eip_tx->errors++; -+ } -+ else -+ { -+ /* EP_EINVAL occurs when the svc has a bad value or the iovec has too many frag; */ -+ /* we don't use the latter option here */ -+ __EIP_DBG_PRINTF("TMD [%p] : DROPPED skb[%p] status = %d from ep_?_message\n", tmd, tmd->skb, svc); -+ -+ eip_tx->dropped++; -+ } -+ -+ eip_txhandler(NULL, tmd, -99); -+ -+ /* Quadrics GNAT sw-elan/4397 - since we will "never" be able to send this packet to the */ -+ /* destination node, we drop it and feign success - this has the same behaviour as an */ -+ /* ethernet where it sticks the packet on the wire, but no-one receives it. */ -+ return 0; -+} -+ -+static void eip_txhandler(EP_TXD * txd, void *arg, EP_STATUS status) -+{ -+ EIP_TMD *tmd = (EIP_TMD *) arg; -+ struct sk_buff *skb_next; -+ unsigned long flags; -+ int svc = 0; -+ -+ if (likely(status == EP_SUCCESS)) { -+ SIZE_TO_SVC(tmd->dma_len, svc); -+ eip_tx->dma[svc]++; -+ eip_tx->bytes += tmd->dma_len; -+ -+ if (tmd->head == &eip_tx->head[EIP_TMD_AGGREG]) { -+ EIP_IPFRAG *ipf = (EIP_IPFRAG *) tmd; -+ eip_tx->packets += ipf->frag_nr; -+ } else -+ eip_tx->packets++; -+ } else { -+ if (tmd->head == &eip_tx->head[EIP_TMD_AGGREG]) { -+ EIP_IPFRAG *ipf = (EIP_IPFRAG *) tmd; -+ eip_tx->dropped += ipf->frag_nr; -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "txhandler aggreg packet dropped status = %d\n", status); -+ } else { -+ eip_tx->dropped++; -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "txhandler packet dropped status = %d\n", status); -+ } -+ } -+ -+ if (tmd->head == &eip_tx->head[EIP_TMD_STD]) { -+ eip_tmd_unload(tmd); -+ tmd->dma_base = 0; -+ tmd->nmd.nmd_len = -1; -+ } -+ -+ tmd->dma_len = -1; -+ -+ svc = 0; -+ while (tmd->skb) { -+ svc++; -+ -+ if (tmd->skb->destructor) -+ atomic_dec(&eip_tx->destructor); -+ -+ skb_next = tmd->skb->next; -+ dev_kfree_skb_any(tmd->skb); -+ tmd->skb = skb_next; -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "IPF/TMD [%p] : %d skb RELEASE/FREED\n", tmd, svc); -+ -+ /* remove from inuse list */ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ list_del (&tmd->chain.link); -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ eip_tmd_put(tmd); -+} -+ -+static void eip_tx_tasklet(unsigned long arg) -+{ -+ struct timeval now; -+ unsigned long flags; -+ EIP_IPFRAG *ipf, *ipfq = NULL; -+ EP_NMD nmd; -+ struct list_head *list; -+ struct list_head *tmp; -+ char resched = 0; -+ char poll = 1; -+ -+ do_gettimeofday(&now); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ if (eip_tx->ipfrag_count) { -+ list_for_each_safe(list, tmp, &eip_tx->ipfrag) { -+ ipf = list_entry(list, EIP_IPFRAG, list); -+ /* delta = (((now.tv_sec - ipf->timestamp.tv_sec) * 1000000UL) + now.tv_usec) - ipf->timestamp.tv_usec; */ -+ if (((((now.tv_sec - ipf->timestamp.tv_sec) * 1000000UL) + now.tv_usec) - -+ ipf->timestamp.tv_usec) >= (1000UL * eip_tx->sysctl_ipfrag_to)) { -+ list_del(&ipf->list); -+ eip_tx->ipfrag_count--; -+ ipf->chain.next = (EIP_TMD *) ipfq; -+ ipfq = ipf; -+ } -+ } -+ } -+ if (eip_tx->ipfrag_count) -+ resched = 1; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ -+ while (ipfq) { -+ poll = 0; -+ -+ ep_nmd_subset(&nmd, &ipfq->nmd, 0, ipfq->dma_len); -+ -+ ipfq->payload.Data[ipfq->frag_nr] = 0; -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_aggreg++; -+#endif -+ ipf = (EIP_IPFRAG *) ipfq->chain.next; -+ eip_do_xmit((EIP_TMD *) ipfq, &nmd, &ipfq->payload); -+ ipfq = ipf; -+ } -+ -+ if (poll) -+ ep_poll_transmits(eip_tx->xmtr); -+ -+ if (atomic_read(&eip_tx->destructor) || resched ) -+ tasklet_schedule(&eip_tx->tasklet); -+} -+void eip_start_queue() -+{ -+ if (netif_queue_stopped(eip_tx->net_device)) { -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Waking up %s queue\n", eip_tx->net_device->name); -+ netif_wake_queue(eip_tx->net_device); -+ } -+} -+void eip_stop_queue() -+{ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Stopping %s queue\n", eip_tx->net_device->name); -+ netif_stop_queue(eip_tx->net_device); -+} -+ -+static int eip_open(struct net_device *devnet) -+{ -+ if (devnet->flags & IFF_PROMISC) -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "%s entering in promiscuous mode\n", devnet->name); -+ -+ netif_start_queue(devnet); -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x up\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ return 0; -+} -+ -+static int eip_close(struct net_device *devnet) -+{ -+ if (devnet->flags & IFF_PROMISC) -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "%s leaving promiscuous mode\n", devnet->name); -+ -+ netif_stop_queue(devnet); -+ -+ eip_rx_tasklet(0); -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x down\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ return 0; -+} -+ -+static struct net_device_stats *eip_get_stats(struct net_device *devnet) -+{ -+ static struct net_device_stats stats; -+ -+ stats.rx_packets = eip_rx->packets; -+ stats.rx_bytes = eip_rx->bytes; -+ stats.rx_errors = eip_rx->errors; -+ stats.rx_dropped = eip_rx->dropped; -+ -+ stats.tx_packets = eip_tx->packets; -+ stats.tx_bytes = eip_tx->bytes; -+ stats.tx_errors = eip_tx->errors; -+ stats.tx_dropped = eip_tx->dropped; -+ return &stats; -+} -+ -+static int eip_change_mtu(struct net_device *devnet, int mtu) -+{ -+ if (mtu <= EIP_MTU_MAX) { -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "MTU size changed from %d to %d\n", devnet->mtu, mtu); -+ devnet->mtu = mtu; -+ } -+ return 0; -+} -+ -+#ifdef MODULE -+int eip_init(void) -+{ -+ struct net_device *devnet; -+ int errno = 0; -+ -+ eip_rx_dropping = 0; -+ eip_rx_tasklet_locked = 1; -+ -+ /* timer up but not started */ -+ init_timer (&eip_rx_tasklet_timer); -+ eip_rx_tasklet_timer.function = eip_rx_tasklet_resched; -+ eip_rx_tasklet_timer.data = (unsigned long) 0; -+ eip_rx_tasklet_timer.expires = lbolt + hz; -+ -+ devnet = alloc_etherdev(sizeof(EIP_RX) + sizeof(EIP_TX)); -+ if (!devnet) { -+ EIP_ERR_PRINTF("Unable to ALLOCATE etherdev structure\n"); -+ return -ENOMEM; -+ } -+ strcpy (devnet->name, "eip0"); -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Enabling aggregation code\n"); -+ devnet->change_mtu = eip_change_mtu; -+ devnet->mtu = EIP_MTU_MAX; -+ devnet->open = eip_open; -+ devnet->stop = eip_close; -+ devnet->hard_start_xmit = eip_hard_start_xmit; -+ devnet->get_stats = eip_get_stats; -+ -+ /* devnet->features |= (NETIF_F_DYNALLOC); */ -+ /* devnet->features = (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA); */ -+ /* devnet->features |= (NETIF_F_SG|NETIF_F_FRAGLIST|NETIF_F_HIGHDMA|NETIF_F_HW_CSUM); */ -+ -+ eip_rx = (EIP_RX *) devnet->priv; -+ eip_tx = (EIP_TX *) (eip_rx + 1); -+ -+ /* instance 0 */ -+ eip_tx->ep_system = ep_system(); -+ if (eip_tx->ep_system == NULL) { -+ EIP_ERR_PRINTF("kernel comms for iface %s does not exist\n", devnet->name); -+ errno = -ENXIO; -+ goto out; -+ } -+ if (ep_waitfor_nodeid(eip_tx->ep_system) == ELAN_INVALID_NODE) { -+ EIP_ERR_PRINTF("network position not found\n"); -+ errno = -EAGAIN; -+ goto out; -+ } -+ eip_tx->xmtr = ep_alloc_xmtr(eip_tx->ep_system); -+ if (!eip_tx->xmtr) { -+ EIP_ERR_PRINTF("Cannot create allocated transmitter - maybe cable is disconnected\n"); -+ errno = -EAGAIN; -+ goto out; -+ } -+ /* assign MAC address */ -+ *((int *) &devnet->dev_addr[4]) = htons(ep_nodeid(eip_tx->ep_system)); -+ eip_rx->net_device = devnet; -+ eip_tx->net_device = devnet; -+ -+ atomic_set(&eip_tx->destructor, 0); -+ -+ if ((tmd_max >= EIP_TMD_MIN_NR) && (tmd_max <= EIP_TMD_MAX_NR)) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting tmd_max_nr to %d\n", tmd_max); -+ eip_tx->tmd_max_nr = tmd_max; -+ } else { -+ EIP_ERR_PRINTF("parameter error : %d <= tmd_max(%d) <= %d using default %d\n", -+ EIP_TMD_MIN_NR, tmd_max, EIP_TMD_MAX_NR, EIP_TMD_MAX_NR); -+ eip_tx->tmd_max_nr = EIP_TMD_MAX_NR; -+ } -+ -+ if ((rmd_max >= EIP_RMD_MIN_NR) && (rmd_max <= EIP_RMD_MAX_NR)) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting rmd_max_nr to %d\n", rmd_max); -+ eip_rx->rmd_max_nr = rmd_max; -+ } else { -+ EIP_ERR_PRINTF("parameter error : %d <= rmd_max(%d) <= %d using default %d\n", EIP_RMD_MIN_NR, -+ rmd_max, EIP_RMD_MAX_NR, EIP_RMD_MAX_NR); -+ eip_rx->rmd_max_nr = EIP_RMD_MAX_NR; -+ } -+ -+ if ((rx_envelope_nr > 0) && (rx_envelope_nr <= 1024)) { /* > 1024 don't be silly */ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Setting rx_envelope_nr to %d\n", rx_envelope_nr); -+ } else { -+ EIP_ERR_PRINTF("parameter error : 0 < rx_envelope_nr(%d) <= 1024 using default %d\n", -+ rx_envelope_nr, EIP_RX_ENVELOPE_NR); -+ rx_envelope_nr = EIP_RX_ENVELOPE_NR; -+ } -+ -+ if (tx_copybreak_max <= EIP_TX_COPYBREAK_MAX) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting tx_copybreak_max to %d\n", tx_copybreak_max); -+ } else { -+ EIP_ERR_PRINTF("parameter error : tx_copybreak_max > %d using default %d\n", -+ EIP_TX_COPYBREAK_MAX, EIP_TX_COPYBREAK_MAX); -+ tx_copybreak_max = EIP_TX_COPYBREAK_MAX; -+ } -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak = 0; -+ eip_tx->sent_std = 0; -+ eip_tx->sent_aggreg = 0; -+#endif -+ -+ eip_tx->ipfrag_count = 0; -+ eip_aggregation_set(1); -+ eip_rx_granularity_set(rx_granularity); -+ eip_tx_copybreak_set(EIP_TX_COPYBREAK); -+ eip_ipfrag_to_set(EIP_IPFRAG_TO); -+ eip_ipfrag_copybreak_set(EIP_IPFRAG_COPYBREAK); -+ -+ spin_lock_init(&eip_tx->lock); -+ spin_lock_init(&eip_tx->ipfraglock); -+ spin_lock_init(&eip_rx->lock); -+ tasklet_init(&eip_rx->tasklet, eip_rx_tasklet, 0); -+ tasklet_init(&eip_tx->tasklet, eip_tx_tasklet, 0); -+ INIT_LIST_HEAD(&eip_tx->ipfrag); -+ INIT_LIST_HEAD(&eip_tx->inuse); -+ -+ /* if we fail here cannot do much yet; waiting for rcvr remove code in ep. */ -+ errno = eip_tmds_alloc(); -+ if (errno) -+ goto out; -+ -+ errno = eip_rmds_alloc(); -+ if (errno) -+ goto out; -+ -+ errno = eip_stats_init(); -+ if (errno) -+ goto out; -+ -+ if (ep_svc_indicator_set(eip_tx->ep_system, EP_SVC_EIP) != EP_SUCCESS) { -+ EIP_ERR_PRINTF("Cannot set the service indicator\n"); -+ errno = -EINVAL; -+ goto out; -+ } -+ -+ eip_rx_tasklet_locked = 0; -+ tasklet_schedule(&eip_rx->tasklet); -+ -+ SET_MODULE_OWNER(eip_tx->net_device); -+ -+ if (register_netdev(devnet)) { -+ printk("eip: failed to register netdev\n"); -+ goto out; -+ } -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x ready\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ -+ return 0; -+ out: -+ unregister_netdev(devnet); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 25) -+ kfree(devnet); -+#else -+ free_netdev(devnet); -+#endif -+ -+ return errno; -+} -+void eip_exit(void) -+{ -+ int i; -+ -+ eip_rx_dropping = 1; /* means that new messages wont be sent to tcp stack */ -+ eip_rx_tasklet_locked = 1; -+ -+ netif_stop_queue(eip_tx->net_device); -+ -+ if (ep_svc_indicator_clear(eip_tx->ep_system, EP_SVC_EIP) != EP_SUCCESS) { -+ EIP_ERR_PRINTF("Cannot unset the service indicator\n"); -+ } -+ -+ schedule_timeout(10); -+ -+ del_timer_sync (&eip_rx_tasklet_timer); -+ -+ tasklet_disable(&eip_rx->tasklet); -+ tasklet_disable(&eip_tx->tasklet); -+ -+ tasklet_kill(&eip_tx->tasklet); -+ tasklet_kill(&eip_rx->tasklet); -+ -+ eip_rmds_free(); -+ eip_tmds_free(); -+ -+ /* that things freed */ -+ for (i = 0 ; i < EIP_SVC_NR ; i++) { -+ if ( EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats) != 0 ) -+ EIP_ERR_PRINTF("%d RMDs not FREED on SVC[%d]\n", EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), i); -+ } -+ for (i = 0 ; i < 3 ; i++) { -+ if ( EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats) != 0 ) -+ EIP_ERR_PRINTF("%d TMDs not freed on TX HEAD[%d]\n", EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), i); -+ -+ } -+ unregister_netdev(eip_tx->net_device); -+ kfree(eip_tx->net_device); -+ -+ eip_stats_cleanup(); -+} -+ -+module_init(eip_init); -+module_exit(eip_exit); -+ -+module_param(eipdebug, uint, 0); -+MODULE_PARM_DESC(eipdebug, "Set debug flags"); -+ -+module_param(rx_envelope_nr, uint, 0); -+MODULE_PARM_DESC(rx_enveloppe_nr, "Number of allocated enveloppe on the rx side"); -+ -+module_param(tx_copybreak_max, uint, 0); -+MODULE_PARM_DESC(tx_copybreak_max, "Maximum size of the tx copybreak limit (default 512)"); -+ -+module_param(tmd_max, uint, 0); -+module_param(rmd_max, uint, 0); -+MODULE_PARM_DESC(tmd_max, "Maximun number of transmit buffers (default 64)"); -+MODULE_PARM_DESC(rmd_max, "Maximun number of receive buffers (default 64)"); -+ -+module_param(tx_railmask, ushort, 0); -+MODULE_PARM_DESC(tx_railmask, "Mask of which rails transmits can be queued on"); -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan IP driver"); -+MODULE_LICENSE("GPL"); -+#endif /* MODULE */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/eip/eip_linux.h linux-2.6.9/drivers/net/qsnet/eip/eip_linux.h ---- clean/drivers/net/qsnet/eip/eip_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/eip/eip_linux.h 2004-10-01 06:49:29.000000000 -0400 -@@ -0,0 +1,399 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: eip_linux.h,v 1.47 2004/10/01 10:49:29 mike Exp $" -+ -+#ifndef __EIP_LINUX_H -+#define __EIP_LINUX_H -+ -+#define EIP_WATERMARK (0xfab1e) -+ -+#define EIP_PAGES(s) (((s - 1) >> PAGE_SHIFT) + 1) -+#define EIP_DVMA_PAGES(s) ((s < PAGE_SIZE) ? EIP_PAGES(s) + 1 : EIP_PAGES(s)) -+ -+#define EIP_SVC_SMALLEST_LEN (1 << 9) /* 512 */ -+#define EIP_SVC_BIGGEST_LEN (1 << 16) /* 64k */ -+ -+#define EIP_SVC_SMALLEST (0) -+#define EIP_SVC_BIGGEST (7) -+ -+#define EIP_SVC_NR (8) -+#define EIP_SVC_EP(s) (s + EP_MSG_SVC_EIP512) -+ -+#define EIP_STAT_ALLOC_SHIFT (8) -+#define EIP_STAT_ALLOC_GET(atomicp) ((int) atomic_read(atomicp) >> EIP_STAT_ALLOC_SHIFT) -+#define EIP_STAT_ALLOC_ADD(atomicp, v) (atomic_add((v << EIP_STAT_ALLOC_SHIFT), atomicp)) -+#define EIP_STAT_ALLOC_SUB(atomicp, v) (atomic_sub((v << EIP_STAT_ALLOC_SHIFT), atomicp)) -+ -+#define EIP_STAT_QUEUED_MASK (0xff) -+#define EIP_STAT_QUEUED_GET(atomicp) ((int) atomic_read(atomicp) & EIP_STAT_QUEUED_MASK) -+ -+#define EIP_RMD_NR (8) -+#define EIP_RMD_MIN_NR (8) -+#define EIP_RMD_MAX_NR (64) /* should be < than (1 << EIP_STAT_ALLOC_SHIFT) */ -+ -+#define EIP_RMD_ALLOC_STEP (8) -+#define EIP_RMD_ALLOC_THRESH (16) -+ -+#define EIP_RMD_ALLOC (1) -+#define EIP_RMD_REPLACE (0) -+ -+#define EIP_TMD_NR (64) -+#define EIP_TMD_MIN_NR (16) -+#define EIP_TMD_MAX_NR (64) /* should be < than (1 << EIP_STAT_ALLOC_SHIFT) */ -+ -+#define EIP_TMD_TYPE_NR (3) -+#define EIP_TMD_COPYBREAK (0x0) -+#define EIP_TMD_STD (0x1) -+#define EIP_TMD_AGGREG (0x2) -+ -+#define EIP_TX_COPYBREAK (512) -+#define EIP_TX_COPYBREAK_MAX (1024) -+ -+#define EIP_IPFRAG_TO (50) /* time out before a frag is sent in msec */ -+#define EIP_IPFRAG_COPYBREAK (EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG) - EIP_HEADER_PAD) -+ -+#define EIP_RX_ENVELOPE_NR ((EIP_RMD_MAX_NR*EIP_SVC_NR)/2) -+#define EIP_RX_GRANULARITY (1) -+ -+#define EIP_IP_ALIGN(X) (((X) + (15)) & ~(15)) -+#define EIP_EXTRA roundup (sizeof(EIP_RMD), 256) -+#define EIP_RCV_DMA_LEN(s) (s - EIP_EXTRA - EIP_HEADER_PAD) -+#define EIP_MTU_MAX (EIP_RCV_DMA_LEN(EIP_SVC_BIGGEST_LEN) - (ETH_HLEN)) -+ -+#define SIZE_TO_SVC(s, svc) \ -+ do { \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 9))) {svc = 0;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 10))) {svc = 1;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 11))) {svc = 2;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 12))) {svc = 3;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 13))) {svc = 4;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 14))) {svc = 5;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 15))) {svc = 6;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 16))) {svc = 7;break;} \ -+ svc = -666; \ -+ EIP_ASSERT(1 == 0); \ -+ } while (0) -+ -+extern int eipdebug; -+#define EIP_ASSERT_ON -+/* #define NO_DEBUG */ -+ -+ -+/* ######################## */ -+#ifdef NO_DEBUG -+#define __EIP_DBG_PRINTF(fmt, args...) -+#define EIP_DBG_PRINTF(flag, fmt, args...) -+#else -+ -+#define EIP_DBG_RMD 0x1 -+#define EIP_DBG_TMD 0x2 -+#define EIP_DBG_RMD_HEAD 0x4 -+#define EIP_DBG_TMD_HEAD 0x8 -+#define EIP_DBG_EIPH 0x10 -+#define EIP_DBG_IPH 0x20 -+#define EIP_DBG_RMD_EP_DVMA 0x40 -+#define EIP_DBG_TMD_EP_DVMA 0x80 -+#define EIP_DBG_EP_DVMA (EIP_DBG_RMD_EP_DVMA|EIP_DBG_TMD_EP_DVMA) -+#define EIP_DBG_MEMALLOC 0x100 -+#define EIP_DBG_MEMFREE 0x200 -+#define EIP_DBG_RMD_QUEUE 0x400 -+#define EIP_DBG_TMD_QUEUE 0x800 -+#define EIP_DBG_GEN 0x1000 -+#define EIP_DBG_DEBUG 0x2000 -+ -+#define __EIP_DBG_PRINTF(fmt, args...) (qsnet_debugf (QSNET_DEBUG_BUFFER, " CPU #%d %s: " fmt, smp_processor_id(), __func__, ## args)) -+#define EIP_DBG_PRINTF(flag, fmt, args...) (unlikely(eipdebug & flag) ? __EIP_DBG_PRINTF(fmt, ## args):(void)0) -+ -+#define __EIP_DBG_PRINTK(fmt, args...) (qsnet_debugf (QSNET_DEBUG_BUF_CON, " CPU #%d %s: " fmt, smp_processor_id(), __func__, ## args)) -+#define EIP_DBG_PRINTK(flag, fmt, args...) (unlikely(eipdebug & flag) ? __EIP_DBG_PRINTF(fmt, ## args):(void)0) -+ -+#define EIP_ERR_PRINTF(fmt, args...) __EIP_DBG_PRINTK("!!! ERROR !!! - " fmt, ## args) -+ -+ -+#define EIP_DBG2(flag, fn, fn_arg, fmt, args...) \ -+ if (unlikely(eipdebug & flag)) { \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "+CPU #%d %s: " fmt, smp_processor_id(), __func__, ##args); \ -+ (void)(fn)(fn_arg); \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "-CPU #%d %s: " fmt, smp_processor_id(), __func__, ##args); \ -+ } -+ -+ -+#define EIP_DBG(flag, fn, args...) \ -+ if (unlikely(eipdebug & flag)) { \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "+CPU #%d %s\n", smp_processor_id(), __func__); \ -+ (void)(fn)(args); \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "-CPU #%d %s :\n", smp_processor_id(), __func__); \ -+ } -+#endif /* NO_DEBUG */ -+ -+ -+#ifdef EIP_ASSERT_ON -+ -+#define __EIP_ASSERT_PRINT(exp) \ -+ eipdebug = 0xffff; \ -+ EIP_ERR_PRINTF("ASSERT : %s, %s::%d\n", \ -+ #exp, __BASE_FILE__, __LINE__); -+ -+#define EIP_ASSERT(exp) \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ netif_stop_queue(eip_tx->net_device); \ -+ } -+ -+#define EIP_ASSERT2(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_BUG(exp) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ BUG(); \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_GOTO(exp, label, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ goto label; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RET(exp, ret) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ return ret; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RETURN(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ return; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RETNULL(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ return NULL; \ -+ } \ -+ } while (0) -+ -+#else -+ -+#define EIP_ASSERT(exp) do {} while(0) -+#define EIP_ASSERT_OUT(exp) do {} while(0) -+#define EIP_ASSERT_RETURN(exp) do {} while(0) -+#define EIP_ASSERT_RETNULL(exp) do {} while(0) -+#define EIP_ASSERT_BUG(exp) do {} while(0) -+ -+#endif /* EIP_ASSERT */ -+ -+ -+ -+typedef struct { -+ u_short ip_bcast; -+ u_short ip_inst; -+ u_short ip_addr; -+} EIP_ADDRESS; -+ -+typedef struct { -+ EIP_ADDRESS h_dhost; -+ EIP_ADDRESS h_shost; -+ u_short h_sap; -+} EIP_HEADER; -+#define EIP_HEADER_PAD (2) -+ -+typedef struct eip_proc_fs { -+ const char *name; -+ struct proc_dir_entry **parent; -+ read_proc_t *read; -+ write_proc_t *write; -+ unsigned char allocated; -+ struct proc_dir_entry *entry; -+} EIP_PROC_FS; -+ -+#define EIP_PROC_ROOT_DIR "eip" -+ -+#define EIP_PROC_DEBUG_DIR "debug" -+#define EIP_PROC_DEBUG_RX_FLUSH "rx_flush" -+#define EIP_PROC_DEBUG_TX_FLUSH "tx_flush" -+ -+#define EIP_PROC_AGGREG_DIR "aggregation" -+#define EIP_PROC_AGGREG_ONOFF "enable" -+#define EIP_PROC_AGGREG_TO "timeout" -+#define EIP_PROC_AGGREG_COPYBREAK "copybreak" -+ -+#define EIP_PROC_TX_COPYBREAK "tx_copybreak" -+#define EIP_PROC_STATS "stats" -+#define EIP_PROC_RX_GRAN "rx_granularity" -+#define EIP_PROC_TX_RAILMASK "tx_railmask" -+#define EIP_PROC_TMD_INUSE "tmd_inuse" -+#define EIP_PROC_EIPDEBUG "eipdebug" -+#define EIP_PROC_CHECKSUM "checksum" -+ -+/* RX */ -+/* dma_len is used to keep the len of a received packet */ -+/* nmd.nmd_len is the max dma that can be received */ -+/* */ -+struct eip_rmd { -+ struct sk_buff *skb; -+ -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ EP_RXD *rxd; -+ struct eip_rmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_rmd *next; /* all other lists */ -+ } chain; -+}; -+typedef struct eip_rmd EIP_RMD; -+struct eip_rmd_head { -+ EP_NMH *handle; -+ -+ EP_RCVR *rcvr; -+ EIP_RMD *busy_list; -+ -+ /* stats */ -+ atomic_t stats; -+ unsigned long dma; -+}; -+ -+typedef struct eip_rmd_head EIP_RMD_HEAD; -+typedef struct eip_rx { -+ struct eip_rmd_head head[EIP_SVC_NR]; -+ -+ EIP_RMD *irq_list; -+ short irq_list_nr; -+ -+ /* stats */ -+ unsigned long packets; -+ unsigned long bytes; -+ unsigned long errors; -+ unsigned long dropped; -+ unsigned long reschedule; -+ -+ spinlock_t lock; -+ struct tasklet_struct tasklet; -+ unsigned char rmd_max_nr; -+ unsigned char sysctl_granularity; -+ struct net_device *net_device; -+} EIP_RX; -+ -+/* TX */ -+/* dma_len_max is the maximum len for a given DMA */ -+/* where mnd.nmd_len is the len of the packet to send ~> than skb->len */ -+typedef struct eip_ipfrag_handle { -+ /* common with tmd */ -+ unsigned long dma_base; -+ int dma_len; -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ struct sk_buff *skb; -+ struct eip_tmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_tmd *next; /* all other lists */ -+ } chain; -+ -+ /* private */ -+ struct list_head list; -+ struct timeval timestamp; -+ unsigned int frag_nr; -+ int datagram_len; /* Ip data */ -+ int dma_correction; -+ EP_PAYLOAD payload; -+} EIP_IPFRAG; -+ -+struct eip_tmd { -+ unsigned long dma_base; -+ int dma_len; -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ struct sk_buff *skb; -+ struct eip_tmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_tmd *next; /* all other lists */ -+ } chain; -+}; -+ -+struct eip_tmd_head { -+ EP_NMH *handle; -+ -+ struct eip_tmd *tmd; -+ atomic_t stats; -+}; -+ -+typedef struct eip_tmd EIP_TMD; -+typedef struct eip_tmd_head EIP_TMD_HEAD; -+ -+/* #define EIP_MORE_STATS */ -+ -+typedef struct eip_tx { -+ struct net_device *net_device; -+ EP_XMTR *xmtr; -+ EP_SYS *ep_system; -+ -+ struct eip_tmd_head head[EIP_TMD_TYPE_NR]; -+ struct list_head inuse; -+ atomic_t destructor; -+ -+ /* stats */ -+ unsigned long packets; -+ unsigned long bytes; -+ unsigned long errors; -+ unsigned long dropped; -+ unsigned long dma[EIP_SVC_NR]; -+ -+#ifdef EIP_MORE_STATS -+ unsigned long sent_copybreak; -+ unsigned long sent_std; -+ unsigned long sent_aggreg; -+#endif -+ -+ unsigned char tmd_max_nr; -+ -+ unsigned short sysctl_copybreak; -+ unsigned short sysctl_ipfrag_to; -+ unsigned short sysctl_ipfrag_copybreak; -+ unsigned short sysctl_aggregation; -+ -+ unsigned short ipfrag_count; -+ struct list_head ipfrag; -+ spinlock_t ipfraglock; -+ -+ spinlock_t lock; -+ struct tasklet_struct tasklet; -+} EIP_TX; -+ -+/* =============================================== */ -+ /* unsigned long multicast; */ -+#endif /* __EIP_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/eip/eip_stats.c linux-2.6.9/drivers/net/qsnet/eip/eip_stats.c ---- clean/drivers/net/qsnet/eip/eip_stats.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/eip/eip_stats.c 2005-09-07 10:34:58.000000000 -0400 -@@ -0,0 +1,374 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * $Id: eip_stats.c,v 1.36.2.2 2005/09/07 14:34:58 mike Exp $ -+ * $Source: /cvs/master/quadrics/eipmod/eip_stats.c,v $ -+ */ -+ -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+ -+#include -+ -+#include -+ -+#include "eip_linux.h" -+#include "eip_stats.h" -+ -+extern EIP_RX *eip_rx; -+extern EIP_TX *eip_tx; -+extern int tx_copybreak_max; -+extern EP_RAILMASK tx_railmask; -+extern int eip_checksum_state; -+extern void eip_stop_queue(void); -+extern void eip_start_queue(void); -+ -+static int eip_stats_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int i, outlen = 0; -+ -+ *buf = '\0'; -+ strcat(buf, "\n"); -+ strcat(buf, "--------------------------------------------+------------+-----------------+\n"); -+ strcat(buf, " SKB/DMA | | Rx | Tx | TMD TYPE |\n"); -+ strcat(buf, "--------------------------------------------+------------|-----------------+\n"); -+ -+ i = 0; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #1[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #2[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #3[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld +-----------------+\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ strcat(buf, "--------------------------------------------+------------+\n"); -+ sprintf(buf + strlen(buf), " RMD IRQ %4.4d %10lu | %10lu |\n", -+ eip_rx->irq_list_nr, -+ eip_rx->packets, eip_tx->packets); -+ strcat(buf, "--------------------------------------------+------------+\n"); -+ -+#ifdef EIP_MORE_STATS -+ strcat(buf, "\n"); -+ sprintf(buf + strlen(buf), " Copybreak %10ld Std %10ld Aggreg %10ld\n", -+ eip_tx->sent_copybreak, eip_tx->sent_std, eip_tx->sent_aggreg); -+#endif -+ -+ -+ strcat(buf, "\n"); -+ sprintf(buf + strlen(buf), "Rx bytes: %lu (%lu Mb) errors: %lu dropped: %lu reschedule: %lu\n", -+ eip_rx->bytes, eip_rx->bytes / (1024 * 1024), eip_rx->errors, eip_rx->dropped, eip_rx->reschedule); -+ sprintf(buf + strlen(buf), "Tx bytes: %lu (%lu Mb) errors: %lu dropped: %lu\n", -+ eip_tx->bytes, eip_tx->bytes / (1024 * 1024), eip_tx->errors, eip_tx->dropped); -+ strcat(buf, "\n"); -+ -+ outlen = strlen(buf); -+ ASSERT(outlen < PAGE_SIZE); -+ *eof = 1; -+ return outlen; -+} -+ -+void eip_stats_dump(void) -+{ -+ int eof; -+ -+ char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ -+ if (buf == NULL) -+ { -+ printk("no memory to produce eip_stats\n"); -+ return; -+ } -+ -+ eip_stats_read(buf, NULL, 0, 0, &eof, NULL); -+ -+ printk(buf); -+ -+ kfree(buf); -+} -+ -+static int eip_stats_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ eip_rx->packets = 0; -+ eip_rx->bytes = 0; -+ eip_rx->errors = 0; -+ eip_rx->dropped = 0; -+ eip_rx->reschedule = 0; -+ for (i = 0; i < EIP_SVC_NR; eip_rx->head[i].dma = 0, i++); -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ eip_tx->packets = 0; -+ eip_tx->bytes = 0; -+ eip_tx->errors = 0; -+ eip_tx->dropped = 0; -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak = 0; -+ eip_tx->sent_std = 0; -+ eip_tx->sent_aggreg = 0; -+#endif -+ for (i = 0; i < EIP_SVC_NR; eip_tx->dma[i] = 0, i++); -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ -+ return count; -+} -+ -+#define eip_stats_var_write(name) \ -+static int eip_stats_##name##_write(struct file *file, const char *buf, unsigned long count, void *data) \ -+{ \ -+ char * b = (char *) buf; \ -+ *(b + count) = '\0'; \ -+ eip_##name##_set((int) simple_strtoul(b, NULL, 10)); \ -+ return count; \ -+} -+ -+#define eip_stats_var_read(name, var) \ -+static int eip_stats_##name##_read(char *buf, char **start, off_t off, int count, int *eof, void *data) \ -+{ \ -+ sprintf(buf, "%d\n", var); \ -+ *eof = 1; \ -+ return strlen(buf); \ -+} -+ -+ -+#define eip_stats_var_set(name, min, max, default, var) \ -+void eip_##name##_set(int i) \ -+{ \ -+ if ( (i >= min) && (i <= max)) { \ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Setting " #name " to %d\n", i); \ -+ var =(unsigned short) i; \ -+ } \ -+ else { \ -+ EIP_ERR_PRINTF("parameter error : %d <= " #name "(%d) <= %d using default %d\n", min, i, (int) max, (int) default); \ -+ } \ -+} -+ -+eip_stats_var_set(tx_copybreak, 0, tx_copybreak_max, EIP_TX_COPYBREAK, eip_tx->sysctl_copybreak); -+eip_stats_var_set(rx_granularity, 1, EIP_RMD_MIN_NR, EIP_RX_GRANULARITY, eip_rx->sysctl_granularity); -+eip_stats_var_set(tx_railmask, 0, EP_RAILMASK_ALL, EP_RAILMASK_ALL, tx_railmask); -+eip_stats_var_set(ipfrag_to, 0, (1 << 16), EIP_IPFRAG_TO, eip_tx->sysctl_ipfrag_to); -+eip_stats_var_set(aggregation, 0, 1, 1, eip_tx->sysctl_aggregation); -+eip_stats_var_set(ipfrag_copybreak, 0, EIP_IPFRAG_COPYBREAK, EIP_IPFRAG_COPYBREAK, eip_tx->sysctl_ipfrag_copybreak); -+/* eip_stats_var_set(eipdebug, 0, , 0, eipdebug); */ -+ -+eip_stats_var_read(aggregation, eip_tx->sysctl_aggregation); -+eip_stats_var_read(ipfrag_count, eip_tx->ipfrag_count); -+eip_stats_var_read(ipfrag_to, eip_tx->sysctl_ipfrag_to); -+eip_stats_var_read(ipfrag_copybreak, eip_tx->sysctl_ipfrag_copybreak); -+eip_stats_var_read(tx_copybreak, eip_tx->sysctl_copybreak); -+eip_stats_var_read(rx_granularity, eip_rx->sysctl_granularity); -+eip_stats_var_read(tx_railmask, tx_railmask); -+ -+eip_stats_var_write(aggregation); -+eip_stats_var_write(ipfrag_to); -+eip_stats_var_write(ipfrag_copybreak); -+eip_stats_var_write(tx_copybreak); -+eip_stats_var_write(rx_granularity); -+eip_stats_var_write(tx_railmask); -+ -+ -+static int eip_checksum_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char * b = (char *) buf; -+ int value; -+ -+ *(b + count) = '\0'; -+ -+ value = (int) simple_strtoul(b, NULL, 10); -+ if ((value >= CHECKSUM_NONE) && (value <= CHECKSUM_UNNECESSARY)) -+ eip_checksum_state = value; -+ else -+ EIP_ERR_PRINTF("%d <= checksum(%d) <= %d using old value %d\n", CHECKSUM_NONE, value, CHECKSUM_UNNECESSARY, eip_checksum_state); -+ -+ return count; -+} -+ -+static int eip_checksum_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ switch ( eip_checksum_state ) -+ { -+ case 0 : sprintf(buf, "0 CHECKSUM_NONE\n"); break; -+ case 1 : sprintf(buf, "1 CHECKSUM_HW\n"); break; -+ case 2 : sprintf(buf, "2 CHECKSUM_UNNECESSARY\n"); break; -+ default : sprintf(buf, "%d INVALID VALUE\n", eip_checksum_state); break; -+ } -+ *eof = 1; -+ return strlen(buf); -+} -+ -+static int eip_stats_eipdebug_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ *buf = '\0'; -+ sprintf(buf + strlen(buf), "0x%x\n", eipdebug); -+ *eof = 1; -+ return strlen(buf); -+} -+static int eip_stats_eipdebug_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char * p = (char *) buf; -+ *(p + count - 1) = '\0'; -+ eipdebug = simple_strtoul(p, NULL, 0); -+ __EIP_DBG_PRINTK("Setting eipdebug to 0x%x\n", eipdebug); -+ return count; -+} -+ -+static int eip_stats_tmd_inuse_read(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ struct list_head *lp; -+ unsigned long flags; -+ unsigned int len = 0; -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ list_for_each (lp, &eip_tx->inuse) { -+ EIP_TMD *tmd = list_entry (lp, EIP_TMD, chain.link); -+ EIP_HEADER *eiph = (EIP_HEADER *) tmd->dma_base; -+ -+ len += sprintf(page+len, "tmd=%p id=%d len=%d\n", -+ tmd, eiph ? ntohs(eiph->h_dhost.ip_addr) : -1, -+ tmd->dma_len); -+ -+ if (len + 40 >= count) -+ break; -+ } -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ -+ return qsnet_proc_calc_metrics (page, start, off, count, eof, len); -+} -+ -+static int eip_stats_debug_rx_flush(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Flushing rx ...\n"); -+ tasklet_schedule(&eip_rx->tasklet); -+ return count; -+} -+static int eip_stats_debug_tx_flush(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Flushing tx ... %d tmds reclaimed\n", ep_enable_txcallbacks(eip_tx->xmtr)); -+ ep_disable_txcallbacks(eip_tx->xmtr); -+ tasklet_schedule(&eip_tx->tasklet); -+ return count; -+} -+ -+#define EIP_PROC_PARENT_NR (3) -+/* NOTE : the parents should be declared b4 the children */ -+static EIP_PROC_FS eip_procs[] = { -+ /* {name, parent, read fn, write fn, allocated, entry}, */ -+ {EIP_PROC_ROOT_DIR, &qsnet_procfs_root, NULL, NULL, 0, NULL}, -+ {EIP_PROC_DEBUG_DIR, &eip_procs[0].entry, NULL, NULL, 0, NULL}, -+ {EIP_PROC_AGGREG_DIR, &eip_procs[0].entry, NULL, NULL, 0, NULL}, /* end of parents */ -+ {EIP_PROC_STATS, &eip_procs[0].entry, eip_stats_read, eip_stats_write, 0, NULL}, -+ {EIP_PROC_TX_COPYBREAK, &eip_procs[0].entry, eip_stats_tx_copybreak_read, eip_stats_tx_copybreak_write, 0, NULL}, -+ {EIP_PROC_RX_GRAN, &eip_procs[0].entry, eip_stats_rx_granularity_read, eip_stats_rx_granularity_write, 0, NULL}, -+ {EIP_PROC_TX_RAILMASK, &eip_procs[0].entry, eip_stats_tx_railmask_read, eip_stats_tx_railmask_write, 0, NULL}, -+ {EIP_PROC_TMD_INUSE, &eip_procs[0].entry, eip_stats_tmd_inuse_read, NULL, 0, NULL}, -+ {EIP_PROC_EIPDEBUG, &eip_procs[0].entry, eip_stats_eipdebug_read, eip_stats_eipdebug_write, 0, NULL}, -+ {EIP_PROC_CHECKSUM, &eip_procs[0].entry, eip_checksum_read, eip_checksum_write, 0, NULL}, -+ {EIP_PROC_DEBUG_RX_FLUSH, &eip_procs[1].entry, NULL, eip_stats_debug_rx_flush, 0, NULL}, -+ {EIP_PROC_DEBUG_TX_FLUSH, &eip_procs[1].entry, NULL, eip_stats_debug_tx_flush, 0, NULL}, -+ {"ipfrag_count", &eip_procs[2].entry, eip_stats_ipfrag_count_read, NULL, 0, NULL}, -+ {EIP_PROC_AGGREG_TO, &eip_procs[2].entry, eip_stats_ipfrag_to_read, eip_stats_ipfrag_to_write, 0, NULL}, -+ {EIP_PROC_AGGREG_ONOFF, &eip_procs[2].entry, eip_stats_aggregation_read, eip_stats_aggregation_write, 0, NULL}, -+ {EIP_PROC_AGGREG_COPYBREAK, &eip_procs[2].entry, eip_stats_ipfrag_copybreak_read, eip_stats_ipfrag_copybreak_write, 0, NULL}, -+ {NULL, NULL, NULL, NULL, 1, NULL}, -+}; -+ -+int eip_stats_init(void) -+{ -+ int p; -+ -+ for (p = 0; !eip_procs[p].allocated; p++) { -+ if (p < EIP_PROC_PARENT_NR) -+ eip_procs[p].entry = proc_mkdir(eip_procs[p].name, *eip_procs[p].parent); -+ else -+ eip_procs[p].entry = create_proc_entry(eip_procs[p].name, 0, *eip_procs[p].parent); -+ -+ if (!eip_procs[p].entry) { -+ EIP_ERR_PRINTF("%s\n", "Cannot allocate proc entry"); -+ eip_stats_cleanup(); -+ return -ENOMEM; -+ } -+ -+ eip_procs[p].entry->owner = THIS_MODULE; -+ eip_procs[p].entry->write_proc = eip_procs[p].write; -+ eip_procs[p].entry->read_proc = eip_procs[p].read; -+ eip_procs[p].allocated = 1; -+ } -+ eip_procs[p].allocated = 0; -+ return 0; -+} -+ -+void eip_stats_cleanup(void) -+{ -+ int p; -+ for (p = (sizeof (eip_procs)/sizeof (eip_procs[0]))-1; p >= 0; p--) -+ if (eip_procs[p].allocated) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Removing %s from proc\n", eip_procs[p].name); -+ remove_proc_entry(eip_procs[p].name, *eip_procs[p].parent); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/eip/eip_stats.h linux-2.6.9/drivers/net/qsnet/eip/eip_stats.h ---- clean/drivers/net/qsnet/eip/eip_stats.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/eip/eip_stats.h 2004-05-10 10:47:47.000000000 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: eip_stats.h,v 1.14 2004/05/10 14:47:47 daniel Exp $" -+ -+#ifndef __EIP_STATS_H -+#define __EIP_STATS_H -+ -+int eip_stats_init(void); -+void eip_stats_cleanup(void); -+void eip_rx_granularity_set(int); -+void eip_tx_copybreak_set(int); -+void eip_ipfrag_to_set(int); -+void eip_aggregation_set(int); -+void eip_ipfrag_copybreak_set(int); -+void eip_stats_dump(void); -+ -+#endif /* __EIP_STATS_H */ -diff -urN clean/drivers/net/qsnet/eip/Makefile linux-2.6.9/drivers/net/qsnet/eip/Makefile ---- clean/drivers/net/qsnet/eip/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/eip/Makefile 2005-10-10 17:47:30.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/eip/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_EIP) += eip.o -+eip-objs := eip_linux.o eip_stats.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/eip/Makefile.conf linux-2.6.9/drivers/net/qsnet/eip/Makefile.conf ---- clean/drivers/net/qsnet/eip/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/eip/Makefile.conf 2005-09-07 10:39:48.000000000 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = eip.o -+MODULENAME = eip -+KOBJFILES = eip_linux.o eip_stats.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_EIP -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/eip/quadrics_version.h linux-2.6.9/drivers/net/qsnet/eip/quadrics_version.h ---- clean/drivers/net/qsnet/eip/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/eip/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/elan/bitmap.c linux-2.6.9/drivers/net/qsnet/elan/bitmap.c ---- clean/drivers/net/qsnet/elan/bitmap.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/bitmap.c 2004-01-20 12:32:17.000000000 -0500 -@@ -0,0 +1,287 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: bitmap.c,v 1.5 2004/01/20 17:32:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/shared/bitmap.c,v $*/ -+ -+#if defined(__KERNEL__) -+#include -+#endif -+#include -+#include -+ -+/* -+ * Return the index of the first available bit in the -+ * bitmap , or -1 for failure -+ */ -+int -+bt_freebit (bitmap_t *bitmap, int nbits) -+{ -+ int last = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for a word with a bit off */ -+ for (i = 0; i <= last; i++) -+ if (bitmap[i] != ~((bitmap_t) 0)) -+ break; -+ -+ if (i <= last) -+ { -+ /* found an word with a bit off, now see which bit it is */ -+ maxbit = (i == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if ((bitmap[i] & (1 << j)) == 0) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ return (-1); -+ -+} -+ -+/* -+ * bt_lowbit: -+ * Return the index of the lowest set bit in the -+ * bitmap, or -1 for failure. -+ */ -+int -+bt_lowbit (bitmap_t *bitmap, int nbits) -+{ -+ int last = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for a word with a bit on */ -+ for (i = 0; i <= last; i++) -+ if (bitmap[i] != 0) -+ break; -+ if (i <= last) -+ { -+ /* found a word bit a bit on, now see which bit it is */ -+ maxbit = (i == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if (bitmap[i] & (1 << j)) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ -+ return (-1); -+} -+ -+/* -+ * Return the index of the first available bit in the -+ * bitmap , or -1 for failure -+ */ -+int -+bt_nextbit (bitmap_t *bitmap, int nbits, int last, int isset) -+{ -+ int first = ((last+1) + BT_NBIPUL-1) >> BT_ULSHIFT; -+ int end = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for bits before the first whole word */ -+ if (((last+1) & BT_ULMASK) != 0) -+ { -+ maxbit = ((first-1) == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = ((last+1) & BT_ULMASK); j <= maxbit; j++) -+ if ((bitmap[first-1] & (1 << j)) == (isset << j)) -+ return (((first-1) << BT_ULSHIFT) | j); -+ } -+ -+ /* look for a word with a bit off */ -+ for (i = first; i <= end; i++) -+ if (bitmap[i] != (isset ? 0 : ~((bitmap_t) 0))) -+ break; -+ -+ if (i <= end) -+ { -+ /* found an word with a bit off, now see which bit it is */ -+ maxbit = (i == end) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if ((bitmap[i] & (1 << j)) == (isset << j)) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ return (-1); -+} -+ -+void -+bt_copy (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ b[i] = a[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST(a, i)) -+ BT_SET(b,i); -+ else -+ BT_CLEAR(b,i); -+} -+ -+void -+bt_zero (bitmap_t *bitmap, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ bitmap[i] = 0; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ BT_CLEAR(bitmap,i); -+} -+ -+void -+bt_fill (bitmap_t *bitmap, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ bitmap[i] = ~((bitmap_t) 0); -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ BT_SET(bitmap,i); -+} -+ -+int -+bt_cmp (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ if (a[i] != b[i]) -+ return (1); -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (a, i) != BT_TEST(b, i)) -+ return (1); -+ return (0); -+} -+ -+void -+bt_intersect (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] &= b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (a, i) && BT_TEST (b, i)) -+ BT_SET (a, i); -+ else -+ BT_CLEAR (a, i); -+} -+ -+void -+bt_remove (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] &= ~b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (b, i)) -+ BT_CLEAR (a, i); -+} -+ -+void -+bt_add (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] |= b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST(b, i)) -+ BT_SET (a, i); -+} -+ -+/* -+ * bt_spans : partition a spans partition b -+ * == all bits set in 'b' are set in 'a' -+ */ -+int -+bt_spans (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ if (BT_TEST (b, i) && !BT_TEST (a, i)) -+ return (0); -+ return (1); -+} -+ -+/* -+ * bt_subset: copy [base,base+nbits-1] from 'a' to 'b' -+ */ -+void -+bt_subset (bitmap_t *a, bitmap_t *b, int base, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (BT_TEST (a, base+i)) -+ BT_SET(b,i); -+ else -+ BT_CLEAR (b,i); -+ } -+} -+ -+void -+bt_up (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (!BT_TEST (a, i) && BT_TEST (b, i)) -+ { -+ BT_SET (c, i); -+ } -+ else -+ { -+ BT_CLEAR (c, i); -+ } -+ } -+} -+ -+void -+bt_down (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (BT_TEST (a, i) && !BT_TEST (b, i)) -+ { -+ BT_SET (c, i); -+ } -+ else -+ { -+ BT_CLEAR (c, i); -+ } -+ } -+} -+ -+int -+bt_nbits (bitmap_t *a, int nbits) -+{ -+ int i, c; -+ for (i = 0, c = 0; i < nbits; i++) -+ if (BT_TEST (a, i)) -+ c++; -+ return (c); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/capability.c linux-2.6.9/drivers/net/qsnet/elan/capability.c ---- clean/drivers/net/qsnet/elan/capability.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/capability.c 2005-07-21 06:42:36.000000000 -0400 -@@ -0,0 +1,796 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability.c,v 1.19.2.2 2005/07/21 10:42:36 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/capability.c,v $ */ -+ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_cap_list); -+ -+typedef struct elan_vp_struct -+{ -+ struct list_head list; -+ ELAN_CAPABILITY vp; -+} ELAN_VP_NODE_STRUCT; -+ -+/* There is an array of these structs for each process/context in the CAP -+ * This is then replicated for each rail. The usercopy handle stuff is -+ * only maintained in rail 0 though -+ */ -+typedef struct elan_attached_struct -+{ -+ void *cb_args; -+ ELAN_DESTROY_CB cb_func; -+ struct task_struct *handle; /* usercopy: attached task handle */ -+ struct task_struct *owner; /* usercopy: attached task handle owner */ -+} ELAN_ATTACHED_STRUCT; -+ -+typedef struct elan_cap_node_struct -+{ -+ struct list_head list; -+ ELAN_CAP_STRUCT node; -+ ELAN_ATTACHED_STRUCT *attached[ELAN_MAX_RAILS]; -+ struct list_head vp_list; -+} ELAN_CAP_NODE_STRUCT; -+ -+ -+ELAN_CAP_NODE_STRUCT * -+find_cap_node(ELAN_CAPABILITY *cap) -+{ -+ struct list_head *tmp; -+ ELAN_CAP_NODE_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ /* is it an exact match (key not checked) */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) -+ && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap)) { -+ return ptr; -+ } -+ } -+ return ptr; -+} -+ -+ELAN_VP_NODE_STRUCT * -+find_vp_node( ELAN_CAP_NODE_STRUCT *cap_node,ELAN_CAPABILITY *map) -+{ -+ struct list_head * tmp; -+ ELAN_VP_NODE_STRUCT * ptr = NULL; -+ -+ list_for_each(tmp, &cap_node->vp_list) { -+ ptr = list_entry(tmp, ELAN_VP_NODE_STRUCT , list); -+ /* is it an exact match (key not checked) */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->vp,map) -+ && ELAN_CAP_GEOM_MATCH(&ptr->vp,map)){ -+ return ptr; -+ } -+ } -+ return ptr; -+} -+ -+int -+elan_validate_cap(ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* check versions */ -+ if (cap->cap_version != ELAN_CAP_VERSION_NUMBER) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->Version != ELAN_CAP_VERSION) %d %d\n", cap->cap_version, ELAN_CAP_VERSION_NUMBER); -+ return (EINVAL); -+ } -+ -+ /* check its not HWTEST */ -+ if ( cap->cap_type & ELAN_CAP_TYPE_HWTEST ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_cap: failed type = ELAN_CAP_TYPE_HWTEST \n"); -+ return (EINVAL); -+ } -+ -+ /* check its type */ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_KERNEL : -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_cap: failed type = ELAN_CAP_TYPE_KERNEL \n"); -+ return (EINVAL); -+ -+ /* check it has a valid type */ -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ break; -+ -+ /* all others are failed as well */ -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap: failed unknown type = %x \n", (cap->cap_type & ELAN_CAP_TYPE_MASK)); -+ return (EINVAL); -+ } -+ -+ if ((cap->cap_lowcontext == ELAN_CAP_UNINITIALISED) || (cap->cap_highcontext == ELAN_CAP_UNINITIALISED) -+ || (cap->cap_lownode == ELAN_CAP_UNINITIALISED) || (cap->cap_highnode == ELAN_CAP_UNINITIALISED)) -+ { -+ -+ ELAN_DEBUG4 (ELAN_DBG_VP,"elan_validate_cap: ELAN_CAP_UNINITIALISED LowNode %d HighNode %d LowContext %d highContext %d\n", -+ cap->cap_lownode , cap->cap_highnode, -+ cap->cap_lowcontext , cap->cap_highcontext); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_lowcontext > cap->cap_highcontext) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->cap_lowcontext > cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_lownode > cap->cap_highnode) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->cap_lownode > cap->cap_highnode) %d %d\n",cap->cap_lownode, cap->cap_highnode); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_mycontext != ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap: failed cap->cap_mycontext is set %d \n", cap->cap_mycontext); -+ return (EINVAL); -+ } -+ -+ -+ if ((ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap)) > ELAN_MAX_VPS) -+ { -+ ELAN_DEBUG6 (ELAN_DBG_VP,"elan_validate_cap: too many vps LowNode %d HighNode %d LowContext %d highContext %d, %d >% d\n", -+ cap->cap_lownode , cap->cap_highnode, -+ cap->cap_lowcontext , cap->cap_highcontext, -+ (ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap)), -+ ELAN_MAX_VPS); -+ -+ return (EINVAL); -+ } -+ -+ return (ESUCCESS); -+} -+ -+int -+elan_validate_map(ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vptr = NULL; -+ char space[256]; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map \n"); -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_map cap = %s \n",elan_capability_string(cap,space)); -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_map map = %s \n",elan_capability_string(map,space)); -+ -+ /* does cap exist */ -+ ptr = find_cap_node(cap); -+ if ( ptr == NULL ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap not found \n"); -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ /* is it active */ -+ if ( ! ptr->node.active ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap not active \n"); -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ -+ /* are they the same */ -+ if ( ELAN_CAP_TYPE_MATCH(cap,map) -+ && ELAN_CAP_GEOM_MATCH(cap,map)) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap == map passed\n"); -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ -+ /* is map in map list */ -+ vptr = find_vp_node(ptr, map); -+ if ( vptr == NULL ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: map not found\n"); -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: map passed\n"); -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+int -+elan_create_cap(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ int i, rail; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_create_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* need to check that the cap does not over lap another one -+ or is an exact match with only the userkey changing */ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match (key not checked) */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) -+ && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap) -+ && (ptr->node.owner == owner)) { -+ if ( ptr->node.active ) { -+ /* dont inc attached count as its like a create */ -+ ptr->node.cap.cap_userkey = cap->cap_userkey; -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ else -+ { -+ ELAN_DEBUG (ELAN_DBG_VP,"elan_create_cap failed %s\n", -+ elan_capability_string(&ptr->node.cap,space)); -+ ELAN_DEBUG (ELAN_DBG_VP,"elan_create_cap failed ptr %p owner %p attached %d\n", -+ ptr, owner, ptr->node.attached); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ } -+ -+ /* does it overlap, even with ones being destroyed */ -+ if (elan_cap_overlap(&ptr->node.cap,cap)) -+ { -+ ELAN_DEBUG (ELAN_DBG_VP,"elan_create_cap failed overlap %s\n", -+ elan_capability_string(&ptr->node.cap,space)); -+ ELAN_DEBUG (ELAN_DBG_VP,"elan_create_cap failed overlap ptr %p owner %p attached %d active %d\n", -+ ptr, owner, ptr->node.attached, ptr->node.active); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EACCES; -+ } -+ } -+ -+ /* create it */ -+ KMEM_ALLOC(ptr, ELAN_CAP_NODE_STRUCT *, sizeof(ELAN_CAP_NODE_STRUCT), 1); -+ if (ptr == NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ENOMEM; -+ } -+ -+ /* create per rail space for the attached array */ -+ for(rail=0;railattached[rail]=NULL; -+ /* GNAT 7685: Always need to allocate an attached structure in rail 0 for the usercopy device */ -+ if ( ELAN_CAP_IS_RAIL_SET(cap,rail) || rail == 0 ) -+ { -+ KMEM_ALLOC(ptr->attached[rail], ELAN_ATTACHED_STRUCT *, sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(cap), 1); -+ if (ptr->attached[rail] == NULL) -+ { -+ for(;rail>=0;rail--) -+ if ( ptr->attached[rail] ) -+ KMEM_FREE(ptr->attached[rail], sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(cap)); -+ -+ KMEM_FREE(ptr, sizeof(ELAN_CAP_NODE_STRUCT)); -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ENOMEM; -+ } -+ /* blank the per context attached array */ -+ for(i=0;iattached[rail][i].cb_func = NULL; -+ /* user-to-user copy */ -+ ptr->attached[rail][i].handle = NULL; -+ ptr->attached[rail][i].owner = NULL; -+ } -+ } -+ } -+ -+ ptr->node.owner = owner; -+ ptr->node.cap = *cap; -+ ptr->node.attached = 1; /* creator counts as attached */ -+ ptr->node.active = 1; -+ ptr->vp_list.next = &(ptr->vp_list); -+ ptr->vp_list.prev = &(ptr->vp_list); -+ -+ list_add_tail(&ptr->list, &elan_cap_list); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+void -+elan_destroy_cap_test(ELAN_CAP_NODE_STRUCT *cap_ptr) -+{ -+ /* called by someone holding the mutex */ -+ struct list_head * vp_tmp; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ int rail; -+ -+ ASSERT(cap_ptr->node.attached >= 0); -+ -+ /* check to see if it can be deleted now */ -+ if ( cap_ptr->node.attached == 0 ) { -+ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elan_destroy_cap_test: %p attached == 0\n", cap_ptr); -+ -+ /* delete the vp list */ -+ list_for_each(vp_tmp, &(cap_ptr->vp_list)) { -+ vp_ptr = list_entry(vp_tmp, ELAN_VP_NODE_STRUCT , list); -+ list_del(&vp_ptr->list); -+ KMEM_FREE( vp_ptr, sizeof(ELAN_VP_NODE_STRUCT)); -+ } -+ -+ list_del(&cap_ptr->list); -+ -+ /* delete space for the attached array */ -+ for(rail=0;railattached[rail]) -+ KMEM_FREE(cap_ptr->attached[rail], sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(&(cap_ptr->node.cap))); -+ -+ KMEM_FREE(cap_ptr, sizeof(ELAN_CAP_NODE_STRUCT)); -+ } -+ else -+ ELAN_DEBUG2(ELAN_DBG_CAP,"elan_destroy_cap_test: %p attached = %d\n", -+ cap_ptr, cap_ptr->node.attached); -+ -+} -+ -+int -+elan_destroy_cap(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ struct list_head * el; -+ struct list_head * nel; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ int i, rail; -+ int found = 0; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ ELAN_DEBUG1 (ELAN_DBG_CAP,"elan_destroy_cap %s\n",elan_capability_string(cap,space)); -+ -+ list_for_each_safe (el, nel, &elan_cap_list) { -+ ptr = list_entry(el, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if ( (ptr->node.owner == owner ) -+ && ( (cap == NULL) -+ || (ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap)))) { -+ -+ if ( ptr->node.active ) { -+ -+ /* mark as in active and dec attached count */ -+ ptr->node.active = 0; -+ ptr->node.attached--; -+ ptr->node.owner = 0; /* no one own's it now */ -+ -+ ASSERT(ptr->node.attached >= 0); -+ -+ /* need to tell any one who was attached that this has been destroy'd */ -+ for(rail=0;railnode.cap), rail)) { -+ for(i=0;i< ELAN_CAP_NUM_CONTEXTS(&(ptr->node.cap));i++) -+ if ( ptr->attached[rail][i].cb_func != NULL) -+ ptr->attached[rail][i].cb_func(ptr->attached[rail][i].cb_args, cap, NULL); -+ } -+ -+ /* now try to destroy it */ -+ elan_destroy_cap_test(ptr); -+ -+ /* found it */ -+ found = 1; -+ } -+ } -+ } -+ -+ if ( found ) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ -+ /* failed */ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elan_destroy_cap: %p didnt find it \n", cap); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+} -+ -+int -+elan_get_caps(uint *number_of_results, uint array_size, ELAN_CAP_STRUCT *caps) -+{ -+ uint results = 0; -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_get_caps\n"); -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ copyout(&ptr->node, &caps[results], sizeof (ELAN_CAP_STRUCT)); -+ -+ results++; -+ -+ if ( results >= array_size ) -+ { -+ copyout(&results, number_of_results, sizeof(uint)); -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ } -+ -+ copyout(&results, number_of_results, sizeof(uint)); -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+int -+elan_create_vp(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * cap_ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_create_vp\n"); -+ -+ /* the railmasks must match */ -+ if ( cap->cap_railmask != map->cap_railmask) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ -+ /* does the cap exist */ -+ cap_ptr = find_cap_node(cap); -+ if ((cap_ptr == NULL) || ( cap_ptr->node.owner != owner ) || (! cap_ptr->node.active) ) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ -+ /* is there already a mapping */ -+ vp_ptr = find_vp_node(cap_ptr,map); -+ if ( vp_ptr != NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ -+ /* create space for mapping */ -+ KMEM_ALLOC(vp_ptr, ELAN_VP_NODE_STRUCT *, sizeof(ELAN_VP_NODE_STRUCT), 1); -+ if (vp_ptr == NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ENOMEM; -+ } -+ -+ /* copy map */ -+ vp_ptr->vp = *map; -+ list_add_tail(&vp_ptr->list, &(cap_ptr->vp_list)); -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+int -+elan_destroy_vp(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * cap_ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ int i, rail; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_destroy_vp\n"); -+ -+ cap_ptr = find_cap_node(cap); -+ if ((cap_ptr!=NULL) && (cap_ptr->node.owner == owner) && ( cap_ptr->node.active)) -+ { -+ vp_ptr = find_vp_node( cap_ptr, map ); -+ if ( vp_ptr != NULL ) -+ { -+ list_del(&vp_ptr->list); -+ KMEM_FREE(vp_ptr, sizeof(ELAN_VP_NODE_STRUCT)); -+ -+ /* need to tell those who are attached that map is nolonger in use */ -+ for(rail=0;railnode.cap));i++) -+ if ( cap_ptr->attached[rail][i].cb_func != NULL) -+ cap_ptr->attached[rail][i].cb_func( cap_ptr->attached[rail][i].cb_args, cap, map); -+ } -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ } -+ -+ /* didnt find it */ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+} -+ -+int -+elan_attach_cap(ELAN_CAPABILITY *cap, unsigned int rail, void *args, ELAN_DESTROY_CB func) -+{ -+ char space[127]; -+ struct list_head *el; -+ -+ ELAN_DEBUG1 (ELAN_DBG_CAP,"elan_attach_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* currently must provide a call back, as null mean something */ -+ if ( func == NULL) -+ return (EINVAL); -+ -+ /* mycontext must be set and correct */ -+ if ( ! ELAN_CAP_VALID_MYCONTEXT(cap)) -+ return (EINVAL); -+ -+ /* rail must be one of the rails in railmask */ -+ if (((1 << rail) & cap->cap_railmask) == 0) -+ return (EINVAL); -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ list_for_each(el, &elan_cap_list) { -+ ELAN_CAP_NODE_STRUCT *cap_ptr = list_entry(el, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if (ELAN_CAP_MATCH(&cap_ptr->node.cap,cap) && cap_ptr->node.active) { -+ unsigned int attached_index = cap->cap_mycontext - cap->cap_lowcontext; -+ -+ if ( cap_ptr->attached[rail][attached_index].cb_func != NULL ) /* only one per ctx per rail */ -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EBUSY; -+ } -+ -+ /* keep track of who attached as we might need to tell them when */ -+ /* cap or maps get destroyed */ -+ cap_ptr->attached[rail][ attached_index ].cb_func = func; -+ cap_ptr->attached[rail][ attached_index ].cb_args = args; -+ cap_ptr->node.attached++; -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_attach_cap: passed\n"); -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ } -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_attach_cap: failed to find \n"); -+ -+ /* didnt find one */ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+} -+ -+int -+elan_detach_cap(ELAN_CAPABILITY *cap, unsigned int rail) -+{ -+ struct list_head *el, *nel; -+ char space[256]; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elan_detach_cap %s\n",elan_capability_string(cap,space)); -+ list_for_each_safe (el, nel, &elan_cap_list) { -+ ELAN_CAP_NODE_STRUCT *ptr = list_entry (el, ELAN_CAP_NODE_STRUCT, list); -+ -+ /* is it an exact match (key not checked) */ -+ if (ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) && -+ ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap) && -+ (ptr->node.cap.cap_railmask & cap->cap_railmask) == cap->cap_railmask) { -+ -+ unsigned int attached_index = cap->cap_mycontext - cap->cap_lowcontext; -+ -+ if ( ptr->attached[rail][ attached_index ].cb_func == NULL ) { -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elanmod_detach_cap already removed \n"); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ -+ ptr->attached[rail][ attached_index ].cb_func = NULL; -+ ptr->attached[rail][ attached_index ].cb_args = (void *)0; -+ -+ ptr->node.attached--; -+ -+ ASSERT(ptr->node.attached >= 0); -+ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elanmod_detach_cap new attach count %d \n", ptr->node.attached); -+ -+ elan_destroy_cap_test(ptr); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_detach_cap: success\n"); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+ } -+ } -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_detach_cap: failed to find\n"); -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+} -+ -+int -+elan_cap_dump() -+{ -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ ELAN_DEBUG2 (ELAN_DBG_ALL, "cap dump: owner %p type %x\n", ptr->node.owner, ptr->node.cap.cap_type); -+ -+ ELAN_DEBUG5 (ELAN_DBG_ALL, "cap dump: LowNode %d HighNode %d LowContext %d mycontext %d highContext %d\n", -+ ptr->node.cap.cap_lownode , ptr->node.cap.cap_highnode, -+ ptr->node.cap.cap_lowcontext , ptr->node.cap.cap_mycontext, ptr->node.cap.cap_highcontext); -+ -+ } -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+int -+elan_usercopy_attach(ELAN_CAPABILITY *cap, ELAN_CAP_NODE_STRUCT **node_ptr, void *handle, void *owner) -+{ -+ struct list_head *el; -+ -+ /* mycontext must be set and correct */ -+ if ( ! ELAN_CAP_VALID_MYCONTEXT(cap)) -+ return -EINVAL; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ /* Search all cap node structs looking for an exact match (including key) */ -+ list_for_each(el, &elan_cap_list) { -+ ELAN_CAP_NODE_STRUCT *cap_ptr = list_entry(el, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if (ELAN_CAP_MATCH(&cap_ptr->node.cap,cap) && cap_ptr->node.active) { -+ char space[127]; -+ /* Work out which local process index we are */ -+ unsigned int attached_index = cap->cap_mycontext - cap->cap_lowcontext; -+ -+ ELAN_DEBUG(ELAN_DBG_CAP, "usercopy_attach: %s\n", -+ elan_capability_string(cap,space)); -+ -+ ELAN_DEBUG(ELAN_DBG_CAP, -+ "usercopy_attach: cap_ptr %p handle %p owner %p idx %d\n", -+ cap_ptr, handle, owner, attached_index); -+ -+ /* Check we're not being called multiple times for the same local process */ -+ if (cap_ptr->attached[0][attached_index].handle) -+ { -+ ELAN_DEBUG(ELAN_DBG_CAP, -+ "usercopy_attach: cap_ptr %p idx %d already attached handle %p owner %p\n", -+ cap_ptr, attached_index, -+ cap_ptr->attached[0][attached_index].handle, -+ cap_ptr->attached[0][attached_index].owner); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return -EAGAIN; -+ } -+ -+ /* Reference count node struct */ -+ cap_ptr->node.attached++; -+ -+ /* Stash our task handle/owner off the cap node array */ -+ cap_ptr->attached[0][attached_index].handle = handle; -+ cap_ptr->attached[0][attached_index].owner = owner; -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ -+ /* Return node pointer to caller */ -+ *node_ptr = cap_ptr; -+ -+ return ESUCCESS; -+ } -+ } -+ -+ /* failed to match a cap */ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return -EINVAL; -+} -+ -+int -+elan_usercopy_detach(ELAN_CAP_NODE_STRUCT *cap_ptr, void *owner) -+{ -+ int i; -+ -+ /* NB: The usercopy code holds a read lock on this rwlock and -+ * hence we will block here if exit_fs() gets called during a -+ * copy to this process -+ */ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ /* Find this process in the attached task handle/owner array */ -+ for(i=0; i< ELAN_CAP_NUM_CONTEXTS((&cap_ptr->node.cap)); i++) -+ { -+ if (cap_ptr->attached[0][i].owner == owner) -+ { -+ ELAN_DEBUG(ELAN_DBG_CAP, -+ "usercopy_detach: cap_ptr %p handle %p owner %p id %d\n", -+ cap_ptr, cap_ptr->attached[0][i].handle, owner, i); -+ -+ /* Clear our task handle/owner off the cap node array */ -+ cap_ptr->attached[0][i].handle = NULL; -+ cap_ptr->attached[0][i].owner = NULL; -+ -+ /* Reference count node struct */ -+ cap_ptr->node.attached--; -+ -+ ASSERT(cap_ptr->node.attached >= 0); -+ -+ /* May need to destroy cap if reference count has hit zero */ -+ elan_destroy_cap_test(cap_ptr); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ -+ return ESUCCESS; -+ } -+ } -+ -+ ELAN_DEBUG(ELAN_DBG_CAP, "usercopy_detach: cap_ptr %p[%d] failed owner %p\n", -+ cap_ptr, cap_ptr->node.attached, owner); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ -+ return -EINVAL; -+} -+ -+/* Returns the associated handle for the supplied ctxId process in the cap node */ -+/* Should be called holding a read lock on the elan_rwlock */ -+int -+elan_usercopy_handle(ELAN_CAP_NODE_STRUCT *cap_ptr, int ctxId, void **handlep) -+{ -+ int res = ESUCCESS; -+ void *handle; -+ -+ /* Sanity check argument */ -+ if (ctxId < 0 || ctxId >= ELAN_CAP_NUM_CONTEXTS(&(cap_ptr->node.cap))) -+ return -EINVAL; -+ -+// ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ /* Get the task handle for the remote process */ -+ if ((handle = cap_ptr->attached[0][ctxId].handle) == NULL) -+ res = -EAGAIN; -+ -+// ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ -+ *handlep = handle; -+ -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/capability_general.c linux-2.6.9/drivers/net/qsnet/elan/capability_general.c ---- clean/drivers/net/qsnet/elan/capability_general.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/capability_general.c 2004-02-25 08:47:59.000000000 -0500 -@@ -0,0 +1,446 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability_general.c,v 1.10 2004/02/25 13:47:59 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/shared/capability_general.c,v $ */ -+ -+#if defined(__KERNEL__) -+ -+#include -+ -+#else -+ -+#include -+#include -+#include -+ -+#endif -+ -+#include -+ -+ -+void -+elan_nullcap (ELAN_CAPABILITY *cap) -+{ -+ register int i; -+ -+ for (i = 0; i < sizeof (cap->cap_userkey)/sizeof(cap->cap_userkey.key_values[0]); i++) -+ cap->cap_userkey.key_values[i] = ELAN_CAP_UNINITIALISED; -+ -+ cap->cap_lowcontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_highcontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_mycontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_lownode = ELAN_CAP_UNINITIALISED; -+ cap->cap_highnode = ELAN_CAP_UNINITIALISED; -+ cap->cap_railmask = ELAN_CAP_UNINITIALISED; -+ cap->cap_type = ELAN_CAP_UNINITIALISED; -+ cap->cap_spare = 0; -+ cap->cap_version = ELAN_CAP_VERSION_NUMBER; -+ -+ for (i = 0; i < sizeof (cap->cap_bitmap)/sizeof (cap->cap_bitmap[0]); i++) -+ cap->cap_bitmap[i] = 0; -+} -+ -+char * -+elan_capability_string (ELAN_CAPABILITY *cap, char *str) -+{ -+ if (cap == NULL) -+ sprintf (str, "[-.-.-.-] cap = NULL\n"); -+ else -+ sprintf (str, "[%x.%x.%x.%x] Version %x Type %x \n" -+ "Context %x.%x.%x Node %x.%x\n", -+ cap->cap_userkey.key_values[0], cap->cap_userkey.key_values[1], -+ cap->cap_userkey.key_values[2], cap->cap_userkey.key_values[3], -+ cap->cap_version, cap->cap_type, -+ cap->cap_lowcontext, cap->cap_mycontext, cap->cap_highcontext, -+ cap->cap_lownode, cap->cap_highnode); -+ -+ return (str); -+} -+ -+ELAN_LOCATION -+elan_vp2location (u_int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN_LOCATION location; -+ int i, vp, node, context, nnodes, nctxs; -+ -+ vp = 0; -+ -+ location.loc_node = ELAN_INVALID_NODE; -+ location.loc_context = -1; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (node = 0, i = 0; node < nnodes; node++) -+ { -+ for (context = 0; context < nctxs; context++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, context + (node * nctxs))) -+ { -+ if (vp == process) -+ { -+ /* Return relative indices within the capability box */ -+ location.loc_node = node; -+ location.loc_context = context; -+ -+ return (location); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (context = 0, i = 0; context < nctxs; context++) -+ { -+ for (node = 0; node < nnodes; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (context * nnodes))) -+ { -+ if (vp == process) -+ { -+ location.loc_node = node; -+ location.loc_context = context; -+ -+ return (location); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ } -+ -+ return( location ); -+} -+ -+int -+elan_location2vp (ELAN_LOCATION location, ELAN_CAPABILITY *cap) -+{ -+ int vp, node, context, nnodes, nctxs; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ vp = 0; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (node = 0 ; node < nnodes ; node++) -+ { -+ for (context = 0; context < nctxs; context++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, context + (node * nctxs))) -+ { -+ if ((location.loc_node == node) && (location.loc_context == context)) -+ { -+ /* Found it ! */ -+ return( vp ); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (context = 0; context < nctxs; context++) -+ { -+ for (node = 0; node < nnodes; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (context * nnodes))) -+ { -+ if ((location.loc_node == node) && (location.loc_context == context)) -+ { -+ /* Found it ! */ -+ return( vp ); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ } -+ -+ /* Failed to find it */ -+ return( -1 ); -+} -+ -+/* Return the number of processes as described by a capability */ -+int -+elan_nvps (ELAN_CAPABILITY *cap) -+{ -+ int i, c, nbits = ELAN_CAP_BITMAPSIZE(cap); -+ -+ if (cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) -+ return (nbits); -+ -+ for (i = 0, c = 0; i < nbits; i++) -+ if (BT_TEST (cap->cap_bitmap, i)) -+ c++; -+ -+ return (c); -+} -+ -+/* Return the number of local processes on a given node as described by a capability */ -+int -+elan_nlocal (int node, ELAN_CAPABILITY *cap) -+{ -+ int vp; -+ ELAN_LOCATION loc; -+ int nLocal = 0; -+ -+ for (vp = 0; vp < elan_nvps(cap); vp++) -+ { -+ loc = elan_vp2location(vp, cap); -+ if (loc.loc_node == node) -+ nLocal++; -+ } -+ -+ return (nLocal); -+} -+ -+/* Return the maximum number of local processes on any node as described by a capability */ -+int -+elan_maxlocal (ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highcontext - cap->cap_lowcontext + 1); -+} -+ -+/* Return the vps of the local processes on a given node as described by a capability */ -+int -+elan_localvps (int node, ELAN_CAPABILITY *cap, int *vps, int size) -+{ -+ int context; -+ ELAN_LOCATION loc; -+ int nLocal = 0; -+ -+ loc.loc_node = node; -+ -+ for (context = 0; context < MIN(size, elan_maxlocal(cap)); context++) -+ { -+ loc.loc_context = context; -+ -+ /* Should return -1 if none found */ -+ if ( (vps[context] = elan_location2vp( loc, cap )) != -1) -+ nLocal++; -+ } -+ -+ return (nLocal); -+} -+ -+/* Return the number of rails that this capability utilises */ -+int -+elan_nrails (ELAN_CAPABILITY *cap) -+{ -+ int nrails = 0; -+ unsigned int railmask; -+ -+ /* Test for a multi-rail capability */ -+ if (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL) -+ { -+ /* Grab rail bitmask from capability */ -+ railmask = cap->cap_railmask; -+ -+ while (railmask) -+ { -+ if (railmask & 1) -+ nrails++; -+ -+ railmask >>= 1; -+ } -+ } -+ else -+ /* Default to just one rail */ -+ nrails = 1; -+ -+ return (nrails); -+} -+ -+/* Fill out an array giving the physical rail numbers utilised by a capability */ -+int -+elan_rails (ELAN_CAPABILITY *cap, int *rails) -+{ -+ int nrails, rail; -+ unsigned int railmask; -+ -+ /* Test for a multi-rail capability */ -+ if (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL) -+ { -+ /* Grab rail bitmask from capability */ -+ railmask = cap->cap_railmask; -+ -+ nrails = rail = 0; -+ while (railmask) -+ { -+ if (railmask & 1) -+ rails[nrails++] = rail; -+ -+ rail++; -+ railmask >>= 1; -+ } -+ } -+ else -+ { -+ /* Default to just one rail */ -+ rails[0] = 0; -+ nrails = 1; -+ } -+ -+ return( nrails ); -+} -+ -+int -+elan_cap_overlap(ELAN_CAPABILITY *cap1, ELAN_CAPABILITY *cap2) -+{ -+ /* by context */ -+ if ( cap1->cap_highcontext < cap2->cap_lowcontext ) return (0); -+ if ( cap1->cap_lowcontext > cap2->cap_highcontext) return (0); -+ -+ /* by node */ -+ if ( cap1->cap_highnode < cap2->cap_lownode ) return (0); -+ if ( cap1->cap_lownode > cap2->cap_highnode) return (0); -+ -+ /* by rail */ -+ /* they overlap if they have a rail in common */ -+ return (cap1->cap_railmask & cap2->cap_railmask); -+} -+ -+#if !defined(__KERNEL__) -+ -+/* Fill out an array that hints at the best use of the rails on a -+ * per process basis. The library user can then decide whether or not -+ * to take this into account (e.g. TPORTs) -+ * All processes calling this fn will be returned the same information. -+ */ -+int -+elan_prefrails(ELAN_CAPABILITY *cap, int *pref, int nvp) -+{ -+ int i; -+ int nrails = elan_nrails(cap); -+ int maxlocal = elan_maxlocal(cap); -+ -+ /* Test for a multi-rail capability */ -+ if (! (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL)) -+ { -+ /* Default to just one rail */ -+ for (i = 0; i < nvp; i++) -+ pref[i] = 0; -+ -+ return( 0 ); -+ } -+ -+ /* -+ * We allocate rails on a per node basis sharing our the rails -+ * equally amongst the local processes. However, if there is only -+ * one process per node and multiple rails, then we use a different -+ * algorithm where rails are allocated across all the processes in -+ * a round-robin fashion -+ */ -+ -+ if (maxlocal == 1) -+ { -+ /* Allocate rails in a round-robin manner */ -+ for (i = 0; i < nvp; i++) -+ *pref++ = i % nrails; -+ } -+ else -+ { -+ int node; -+ int *vps; -+ int nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ -+ vps = (int *) malloc(sizeof(int)*maxlocal); -+ -+ /* Grab the local process info for each node and allocate -+ * rails to those vps on an equal basis -+ */ -+ for (node = 0; node < nnodes; node++) -+ { -+ int nlocal; -+ int pprail; -+ -+ /* Grab an array of local vps */ -+ nlocal = elan_localvps(node, cap, vps, maxlocal); -+ -+ /* Calculate the number processes per rail */ -+ if ((pprail = nlocal/nrails) == 0) -+ pprail = 1; -+ -+ /* Allocate processes to rails */ -+ for (i = 0; i < nlocal; i++) -+ { -+ pref[vps[i]] = (i / pprail) % nrails; -+ } -+ } -+ -+ free(vps); -+ } -+ -+ return( 0 ); -+} -+ -+void -+elan_get_random_key(ELAN_USERKEY *key) -+{ -+ int i; -+ for (i = 0; i < sizeof(key->key_values) / sizeof(key->key_values[0]); i++) -+ key->key_values[i] = lrand48(); -+} -+ -+int elan_lowcontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_lowcontext); -+} -+ -+int elan_mycontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_mycontext); -+} -+ -+int elan_highcontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highcontext); -+} -+ -+int elan_lownode(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_lownode); -+} -+ -+int elan_highnode(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highnode); -+} -+ -+int elan_captype(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_type); -+} -+ -+int elan_railmask(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_railmask); -+} -+ -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/device.c linux-2.6.9/drivers/net/qsnet/elan/device.c ---- clean/drivers/net/qsnet/elan/device.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/device.c 2005-04-13 05:31:47.000000000 -0400 -@@ -0,0 +1,147 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.c,v 1.6 2005/04/13 09:31:47 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/device.c,v $*/ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_dev_list); -+ -+ELAN_DEV_STRUCT * -+elan_dev_find (ELAN_DEV_IDX devidx) -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ if (ptr->devidx == devidx) -+ return ptr; -+ if (ptr->devidx > devidx) -+ return ERR_PTR(-ENXIO); -+ } -+ -+ return ERR_PTR(-EINVAL); -+} -+ -+ELAN_DEV_STRUCT * -+elan_dev_find_byrail (unsigned short deviceid, unsigned rail) -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ -+ ELAN_DEBUG5 (ELAN_DBG_ALL,"elan_dev_find_byrail devidx %d - %04x %04x, %d %d \n", ptr->devidx, -+ ptr->devinfo->dev_device_id, deviceid, ptr->devinfo->dev_rail, rail); -+ -+ if (ptr->devinfo->dev_device_id == deviceid && ptr->devinfo->dev_rail == rail) -+ return ptr; -+ } -+ -+ return NULL; -+} -+ -+ELAN_DEV_IDX -+elan_dev_register (ELAN_DEVINFO *devinfo, ELAN_DEV_OPS *ops, void * user_data) -+{ -+ ELAN_DEV_STRUCT *ptr; -+ ELAN_DEV_IDX devidx = 0; -+ struct list_head *tmp; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ /* is it already registered */ -+ if ((ptr = elan_dev_find_byrail(devinfo->dev_device_id, devinfo->dev_rail)) != NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ -+ /* find a free device idx */ -+ list_for_each (tmp, &elan_dev_list) { -+ if (list_entry (tmp, ELAN_DEV_STRUCT, node)->devidx != devidx) -+ break; -+ devidx++; -+ } -+ -+ /* create it and add */ -+ KMEM_ALLOC(ptr, ELAN_DEV_STRUCT *, sizeof(ELAN_DEV_STRUCT), 1); -+ if (ptr == NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ENOMEM; -+ } -+ -+ ptr->devidx = devidx; -+ ptr->ops = ops; -+ ptr->devinfo = devinfo; -+ ptr->user_data = user_data; -+ -+ /* insert this entry *before* the last entry we've found */ -+ list_add_tail(&ptr->node, tmp); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+int -+elan_dev_deregister (ELAN_DEVINFO *devinfo) -+{ -+ ELAN_DEV_STRUCT *target; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ if ((target = elan_dev_find_byrail (devinfo->dev_device_id, devinfo->dev_rail)) == NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return EINVAL; -+ } -+ -+ list_del(&target->node); -+ -+ /* delete target entry */ -+ KMEM_FREE(target, sizeof(ELAN_DEV_STRUCT)); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+int -+elan_dev_dump () -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ -+ ELAN_DEBUG3 (ELAN_DBG_ALL,"dev dump: index %u rail %u elan%c\n", -+ ptr->devidx, ptr->devinfo->dev_rail, '3' + ptr->devinfo->dev_device_id); -+ ELAN_DEBUG5 (ELAN_DBG_ALL,"dev dump: Vid %x Did %x Rid %x DR %d DVal %x\n", -+ ptr->devinfo->dev_vendor_id, -+ ptr->devinfo->dev_device_id, -+ ptr->devinfo->dev_revision_id, -+ ptr->devinfo->dev_driver_version, -+ ptr->devinfo->dev_num_down_links_value); -+ -+ } -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/devinfo.c linux-2.6.9/drivers/net/qsnet/elan/devinfo.c ---- clean/drivers/net/qsnet/elan/devinfo.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/devinfo.c 2005-04-13 05:31:47.000000000 -0400 -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: devinfo.c,v 1.6 2005/04/13 09:31:47 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/devinfo.c,v $*/ -+ -+#include -+#include -+ -+int -+elan_get_devinfo(ELAN_DEV_IDX devidx, ELAN_DEVINFO *devinfo) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ target = elan_dev_find (devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR(target); -+ else -+ { -+ copyout(target->devinfo, devinfo, sizeof(ELAN_DEVINFO)); -+ res = ESUCCESS; -+ } -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return res; -+} -+ -+int -+elan_get_position(ELAN_DEV_IDX devidx, ELAN_POSITION *position) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ target = elan_dev_find(devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR(target); -+ else -+ res = target->ops->get_position(target->user_data, position); -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return res; -+} -+ -+int -+elan_set_position(ELAN_DEV_IDX devidx, unsigned short nodeId, unsigned short numNodes) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ target = elan_dev_find(devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR (target); -+ else -+ res = target->ops->set_position(target->user_data, nodeId, numNodes); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/elanmod.c linux-2.6.9/drivers/net/qsnet/elan/elanmod.c ---- clean/drivers/net/qsnet/elan/elanmod.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/elanmod.c 2005-04-13 05:31:47.000000000 -0400 -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+#ident "@(#)$Id: elanmod.c,v 1.12 2005/04/13 09:31:47 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod.c,v $*/ -+ -+#include -+#include -+ -+ELANMOD_RWLOCK elan_rwlock; -+ -+int -+elan_init() -+{ -+ ELANMOD_RWLOCK_INIT(&elan_rwlock); -+ return (ESUCCESS); -+} -+ -+int -+elan_fini() -+{ -+ ELANMOD_RWLOCK_DESTROY(&elan_rwlock); -+ return (ESUCCESS); -+} -+ -+int -+elanmod_classify_cap (ELAN_POSITION *position, ELAN_CAPABILITY *cap, unsigned use) -+{ -+ if (cap->cap_version != ELAN_CAP_VERSION_NUMBER) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: (cap->Version != ELAN_CAP_VERSION) %d %d\n", cap->cap_version, ELAN_CAP_VERSION_NUMBER); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext == ELAN_CAP_UNINITIALISED || cap->cap_highcontext == ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: LowContext %d HighContext %d MyContext %d\n", -+ cap->cap_lowcontext , cap->cap_highcontext, cap->cap_mycontext); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext > cap->cap_highcontext) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: (cap->cap_lowcontext > cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext); -+ return (-EINVAL); -+ } -+ -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ if (position->pos_mode == ELAN_POS_UNKNOWN) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: Position Unknown \n"); -+ return (-EAGAIN); -+ } -+ -+ if ( ! ( ELAN_USER_CONTEXT(cap->cap_lowcontext) && ELAN_USER_CONTEXT(cap->cap_highcontext))) -+ { -+ ELAN_DEBUG4 (ELAN_DBG_VP, "elanmod_classify_cap: USER_BASE_CONTEXT %d %d %d %d \n" , ELAN_USER_BASE_CONTEXT_NUM,cap->cap_lowcontext, cap->cap_highcontext ,ELAN_USER_TOP_CONTEXT_NUM); -+ return (-EINVAL); -+ } -+ if (cap->cap_lownode == ELAN_CAP_UNINITIALISED) -+ cap->cap_lownode = position->pos_nodeid; -+ if (cap->cap_highnode == ELAN_CAP_UNINITIALISED) -+ cap->cap_highnode = position->pos_nodeid; -+ -+ if (cap->cap_lownode < 0 || cap->cap_highnode >= position->pos_nodes || cap->cap_lownode > cap->cap_highnode) -+ { -+ ELAN_DEBUG3 ( ELAN_DBG_VP,"elanmod_classify_cap: low %d high %d pos %d \n" , cap->cap_lownode ,cap->cap_highnode, position->pos_nodes); -+ -+ return (-EINVAL); -+ } -+ -+ if ((cap->cap_highnode < position->pos_nodeid) || (cap->cap_lownode > position->pos_nodeid)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: node not i range low %d high %d this %d\n", -+ cap->cap_lownode, cap->cap_highnode, position->pos_nodeid); -+ return (-EINVAL); -+ } -+ -+ break; -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP, "elanmod_classify_cap: cant decode type %x \n", cap->cap_type & ELAN_CAP_TYPE_MASK); -+ return (-EINVAL); -+ -+ } -+ -+ switch (use) -+ { -+ case ELAN_USER_ATTACH: -+ case ELAN_USER_DETACH: -+ if (cap->cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: cap->cap_mycontext == ELAN_CAP_UNINITIALISED"); -+ return (-EINVAL); -+ } -+ -+ if ((cap->cap_mycontext != ELAN_CAP_UNINITIALISED) && -+ (cap->cap_mycontext < cap->cap_lowcontext || cap->cap_mycontext > cap->cap_highcontext)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: cap->cap_mycontext out of range %d %d %d \n", cap->cap_lowcontext,cap->cap_mycontext,cap->cap_highcontext); -+ return (-EINVAL); -+ } -+ break; -+ -+ case ELAN_USER_P2P: -+ break; -+ -+ case ELAN_USER_BROADCAST: -+ if (! (cap->cap_type & ELAN_CAP_TYPE_BROADCASTABLE)) { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: use ELAN_USER_BROADCAST but cap not ELAN_CAP_TYPE_BROADCASTABLE\n"); -+ return (-EINVAL); -+ } -+ break; -+ -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP, "elanmod_classify_cap: unknown use (%d)\n",use); -+ return (-EINVAL); -+ } -+ -+ -+ -+ /* is any ctxt an rms one ?? */ -+ if (ELAN_RMS_CONTEXT(cap->cap_lowcontext) || ELAN_RMS_CONTEXT(cap->cap_highcontext)) -+ { -+ /* so both low and high must be */ -+ if (!(ELAN_RMS_CONTEXT(cap->cap_lowcontext) && ELAN_RMS_CONTEXT(cap->cap_highcontext))) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: not rms ctxt %x %x\n",cap->cap_lowcontext,cap->cap_highcontext ); -+ return (-EINVAL); -+ } -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: returning ELAN_CAP_RMS\n"); -+ return (ELAN_CAP_RMS); -+ } -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: returning ELAN_CAP_OK\n"); -+ return (ELAN_CAP_OK); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/elanmod_linux.c linux-2.6.9/drivers/net/qsnet/elan/elanmod_linux.c ---- clean/drivers/net/qsnet/elan/elanmod_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/elanmod_linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,544 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod_linux.c,v 1.23.2.6 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan support module"); -+ -+MODULE_LICENSE("GPL"); -+ -+/* elanmod.c */ -+EXPORT_SYMBOL(elanmod_classify_cap); -+ -+/* bitmap.c */ -+#include -+ -+EXPORT_SYMBOL(bt_freebit); -+EXPORT_SYMBOL(bt_lowbit); -+EXPORT_SYMBOL(bt_nextbit); -+EXPORT_SYMBOL(bt_copy); -+EXPORT_SYMBOL(bt_zero); -+EXPORT_SYMBOL(bt_fill); -+EXPORT_SYMBOL(bt_cmp); -+EXPORT_SYMBOL(bt_intersect); -+EXPORT_SYMBOL(bt_remove); -+EXPORT_SYMBOL(bt_add); -+EXPORT_SYMBOL(bt_spans); -+EXPORT_SYMBOL(bt_subset); -+EXPORT_SYMBOL(bt_up); -+EXPORT_SYMBOL(bt_down); -+EXPORT_SYMBOL(bt_nbits); -+ -+/* capability.c */ -+EXPORT_SYMBOL(elan_nullcap); -+EXPORT_SYMBOL(elan_detach_cap); -+EXPORT_SYMBOL(elan_attach_cap); -+EXPORT_SYMBOL(elan_validate_map); -+ -+/* stats.c */ -+EXPORT_SYMBOL(elan_stats_register); -+EXPORT_SYMBOL(elan_stats_deregister); -+ -+/* device.c */ -+EXPORT_SYMBOL(elan_dev_deregister); -+EXPORT_SYMBOL(elan_dev_register); -+ -+/* debug */ -+int elan_debug_mode = QSNET_DEBUG_BUFFER; -+int elan_debug_mask; -+ -+static struct proc_dir_entry *elan_procfs_root; -+ -+extern void elan_procfs_init(void); -+extern void elan_procfs_fini(void); -+ -+static int elan_open (struct inode *ino, struct file *fp); -+static int elan_release (struct inode *ino, struct file *fp); -+static int elan_ioctl (struct inode *ino, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static int elan_user_open (struct inode *ino, struct file *fp); -+static int elan_user_release (struct inode *ino, struct file *fp); -+static int elan_user_ioctl (struct inode *ino, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static struct file_operations elan_fops = -+{ -+ ioctl: elan_ioctl, -+ open: elan_open, -+ release: elan_release, -+}; -+ -+static struct file_operations elan_user_fops = -+{ -+ ioctl: elan_user_ioctl, -+ open: elan_user_open, -+ release: elan_user_release, -+}; -+ -+static int __init elan_start(void) -+{ -+ int res; -+ -+ elan_procfs_init(); -+ -+ if ((res = elan_init()) != ESUCCESS) -+ { -+ elan_procfs_fini(); -+ return (-res); -+ } -+ -+ return (0); -+} -+ -+static void __exit elan_exit(void) -+{ -+ elan_fini(); -+ elan_procfs_fini(); -+} -+ -+ -+/* Declare the module init and exit functions */ -+void -+elan_procfs_init() -+{ -+ struct proc_dir_entry *p; -+ -+ elan_procfs_root = proc_mkdir("elan", qsnet_procfs_root); -+ -+ if (elan_procfs_root == NULL) -+ return; -+ -+ qsnet_proc_register_hex(elan_procfs_root, "debug_mask", &elan_debug_mask, 0); -+ qsnet_proc_register_hex(elan_procfs_root, "debug_mode", &elan_debug_mode, 0); -+ -+ if ((p = create_proc_entry ("ioctl", 0, elan_procfs_root)) != NULL) -+ { -+ p->proc_fops = &elan_fops; -+ p->data = 0; -+ p->owner = THIS_MODULE; -+ } -+ -+ /* user entry point */ -+ if ((p = create_proc_entry ("user", 0, elan_procfs_root)) != NULL) -+ { -+ p->proc_fops = &elan_user_fops; -+ p->data = 0; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+elan_procfs_fini() -+{ -+ if (elan_procfs_root == NULL) -+ return; -+ -+ remove_proc_entry ("debug_mask", elan_procfs_root); -+ remove_proc_entry ("debug_mode", elan_procfs_root); -+ -+ remove_proc_entry ("ioctl", elan_procfs_root); -+ -+ /* remove user entry point */ -+ remove_proc_entry ("user", elan_procfs_root); -+ -+ remove_proc_entry ("elan", qsnet_procfs_root); -+} -+ -+module_init(elan_start); -+module_exit(elan_exit); -+ -+static int -+elan_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+elan_release (struct inode *inode, struct file *fp) -+{ -+ /* mark all caps owned by fp to be destroyed */ -+ elan_destroy_cap(fp,NULL); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+elan_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int rep = 0; -+ -+ switch (cmd) -+ { -+ case ELANCTRL_STATS_GET_NEXT : -+ { -+ ELANCTRL_STATS_GET_NEXT_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_NEXT_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_next_index(args.statidx, args.next_statidx) != 0 ) -+ return (-EINVAL); -+ -+ break; -+ } -+ case ELANCTRL_STATS_FIND_INDEX : -+ { -+ ELANCTRL_STATS_FIND_INDEX_STRUCT args; -+ char block_name[ELAN_STATS_NAME_MAX_LEN+1]; -+ int res; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_FIND_INDEX_STRUCT))) -+ return (-EFAULT); -+ -+ res = strncpy_from_user (block_name, args.block_name, sizeof (block_name)); -+ -+ if (res == 0 || res == sizeof (block_name)) -+ return -ERANGE; -+ if (res < 0) -+ return res; -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_find_index(block_name, args.statidx, args.num_entries) != 0 ) -+ return (-EINVAL); -+ -+ break; -+ } -+ case ELANCTRL_STATS_GET_BLOCK_INFO : -+ { -+ ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_block_info(args.statidx, args.block_name, args.num_entries) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_GET_INDEX_NAME : -+ { -+ ELANCTRL_STATS_GET_INDEX_NAME_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_INDEX_NAME_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_index_name(args.statidx, args.index, args.name) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_CLEAR_BLOCK : -+ { -+ ELANCTRL_STATS_CLEAR_BLOCK_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_CLEAR_BLOCK_STRUCT))) -+ return (-EFAULT); -+ -+ /* statidx is not a pointer */ -+ if (elan_stats_clear_block(args.statidx) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_GET_BLOCK : -+ { -+ ELANCTRL_STATS_GET_BLOCK_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_BLOCK_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_block(args.statidx, args.entries, args.values) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_GET_DEVINFO : -+ { -+ ELANCTRL_GET_DEVINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_DEVINFO_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_devinfo(args.devidx, args.devinfo) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_GET_POSITION : -+ { -+ ELANCTRL_GET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_position(args.devidx, args.position) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_SET_POSITION : -+ { -+ ELANCTRL_SET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_SET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_set_position(args.devidx, args.nodeId, args.numNodes) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_CREATE_CAP : -+ { -+ ELANCTRL_CREATE_CAP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_CREATE_CAP_STRUCT *, sizeof(ELANCTRL_CREATE_CAP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_CREATE_CAP_STRUCT))) -+ return (-EFAULT); -+ else -+ { -+ if (((rep = elan_validate_cap(&args->cap)) != 0) || ((rep = elan_create_cap(fp,&args->cap)) != 0)) -+ rep = (-rep); -+ } -+ -+ /* free the space */ -+ KMEM_FREE(args, sizeof(ELANCTRL_CREATE_CAP_STRUCT)); -+ -+ break; -+ } -+ case ELANCTRL_DESTROY_CAP : -+ { -+ ELANCTRL_DESTROY_CAP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_DESTROY_CAP_STRUCT *, sizeof(ELANCTRL_DESTROY_CAP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_DESTROY_CAP_STRUCT))) -+ rep = (-EFAULT); -+ else -+ { -+ if (elan_destroy_cap(fp, &args->cap) != 0 ) -+ rep = (-EINVAL); -+ } -+ -+ /* free the space */ -+ KMEM_FREE(args, sizeof(ELANCTRL_DESTROY_CAP_STRUCT)); -+ -+ break; -+ } -+ case ELANCTRL_CREATE_VP : -+ { -+ ELANCTRL_CREATE_VP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_CREATE_VP_STRUCT *, sizeof(ELANCTRL_CREATE_VP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_CREATE_VP_STRUCT))) -+ return (-EFAULT); -+ else -+ { -+ if ((elan_validate_cap( &args->map) != 0) || (elan_create_vp(fp, &args->cap, &args->map) != 0 )) -+ rep = (-EINVAL); -+ } -+ -+ KMEM_FREE(args, sizeof(ELANCTRL_CREATE_VP_STRUCT )); -+ -+ break; -+ } -+ case ELANCTRL_DESTROY_VP : -+ { -+ ELANCTRL_DESTROY_VP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_DESTROY_VP_STRUCT *, sizeof(ELANCTRL_DESTROY_VP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_DESTROY_VP_STRUCT))) -+ rep = (-EFAULT); -+ else -+ { -+ if (elan_destroy_vp(fp, &args->cap, &args->map) != 0 ) -+ rep = (-EINVAL); -+ } -+ -+ KMEM_FREE(args, sizeof(ELANCTRL_DESTROY_VP_STRUCT )); -+ -+ break; -+ } -+ -+ case ELANCTRL_GET_CAPS : -+ { -+ ELANCTRL_GET_CAPS_STRUCT args; -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_CAPS_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_caps(args.number_of_results, args.array_size, args.caps) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_DEBUG_DUMP : -+ { -+ elan_cap_dump(); -+ elan_dev_dump(); -+ -+ break; -+ } -+ case ELANCTRL_DEBUG_BUFFER : -+ { -+ ELANCTRL_DEBUG_BUFFER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if ((args.size = qsnet_debug_buffer (args.buffer, args.size)) != -1 && -+ copy_to_user ((void *) arg, &args, sizeof (ELANCTRL_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ break; -+ } -+ default: -+ return (-EINVAL); -+ break; -+ } -+ -+ return (rep); -+} -+ -+ -+static int -+elan_user_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+elan_user_release (struct inode *inode, struct file *fp) -+{ -+ struct elan_cap_node_struct *cap_ptr = (struct elan_cap_node_struct *)fp->private_data; -+ -+ if (cap_ptr) { -+ /* Remove this process from usercopy system */ -+ /* GNAT 7498: New to pass in a common owner pointer */ -+ if (elan_usercopy_detach (cap_ptr, fp) == 0) -+ fp->private_data = NULL; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+elan_user_ioctl (struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int rep = 0; -+#if !defined(NO_MMPUT) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9) -+ struct elan_cap_node_struct *cap_ptr = (struct elan_cap_node_struct *)fp->private_data; -+#endif -+ -+ switch (cmd) -+ { -+#if !defined(NO_MMPUT) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9) -+ case ELANCTRL_USERCOPY_ATTACH: -+ { -+ ELANCTRL_USERCOPY_ATTACH_STRUCT args; -+ -+ /* Are we already attached ? */ -+ if (cap_ptr != NULL) -+ return -EAGAIN; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_USERCOPY_ATTACH_STRUCT))) -+ return -EFAULT; -+ -+ /* Lookup the associated cap node and can check we are allowed to -+ * access it using the supplied capability. If allowed, then associate -+ * our task with that cap node -+ * We also reference count it as we then hang it off the file pointer -+ */ -+ /* GNAT 7498: New to pass in a common owner pointer */ -+ if ((rep = elan_usercopy_attach(&args.cap, &cap_ptr, current, fp)) < 0) -+ return -EAGAIN; -+ -+ /* Hang cap node off file pointer for future usercopy ioctls */ -+ fp->private_data = (void *) cap_ptr; -+ -+ break; -+ } -+ case ELANCTRL_USERCOPY_DETACH: -+ { -+ /* Detach process */ -+ if (cap_ptr) { -+ /* Remove this process from usercopy system */ -+ /* GNAT 7498: New to pass in a common owner pointer */ -+ if ((rep = elan_usercopy_detach (cap_ptr, fp)) == 0) -+ fp->private_data = NULL; -+ } -+ else -+ rep = -EINVAL; -+ -+ break; -+ } -+ case ELANCTRL_USERCOPY: -+ { -+ ELANCTRL_USERCOPY_STRUCT args; -+ -+ /* Check that we have previously successfully attached */ -+ if (cap_ptr == NULL) -+ return -EAGAIN; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_USERCOPY_STRUCT))) -+ return (-EFAULT); -+ -+ /* Perform user-to-user copy */ -+ rep = elan_usercopy(args.remote, args.local, args.len, args.write, args.ctxId, cap_ptr); -+ -+ break; -+ } -+#endif /* !defined(NO_MMPUT) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9) */ -+ default: -+ return (-EINVAL); -+ break; -+ } -+ -+ return (rep); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/Makefile linux-2.6.9/drivers/net/qsnet/elan/Makefile ---- clean/drivers/net/qsnet/elan/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/Makefile 2005-10-10 17:47:30.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_QSNET) += elan.o -+elan-objs := elanmod.o device.o stats.o devinfo.o capability.o usercopy.o elanmod_linux.o capability_general.o bitmap.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/elan/Makefile.conf linux-2.6.9/drivers/net/qsnet/elan/Makefile.conf ---- clean/drivers/net/qsnet/elan/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/Makefile.conf 2005-09-07 10:39:36.000000000 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan.o -+MODULENAME = elan -+KOBJFILES = elanmod.o device.o stats.o devinfo.o capability.o usercopy.o elanmod_linux.o capability_general.o bitmap.o -+EXPORT_KOBJS = elanmod_linux.o -+CONFIG_NAME = CONFIG_QSNET -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/elan/quadrics_version.h linux-2.6.9/drivers/net/qsnet/elan/quadrics_version.h ---- clean/drivers/net/qsnet/elan/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/elan/stats.c linux-2.6.9/drivers/net/qsnet/elan/stats.c ---- clean/drivers/net/qsnet/elan/stats.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/stats.c 2005-04-13 05:31:47.000000000 -0400 -@@ -0,0 +1,277 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.c,v 1.7 2005/04/13 09:31:47 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/stats.c,v $*/ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_stats_list); -+static ELAN_STATS_IDX elan_next_statidx=0; -+ -+ELAN_STATS_STRUCT * -+elan_stats_find(ELAN_STATS_IDX statidx) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ if ( ptr->statidx == statidx ) -+ return ptr; -+ } -+ -+ ELAN_DEBUG1 (ELAN_DBG_CTRL, "elan_stats_find failed %d\n", statidx); -+ return NULL; -+} -+ -+ELAN_STATS_STRUCT * -+elan_stats_find_by_name(caddr_t block_name) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ if (!strcmp(ptr->block_name, block_name)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_CTRL, "elan_stats_find_by_name found %s (%d,%d)\n", block_name, ptr->statidx, ptr->num_entries); -+ return ptr; -+ } -+ } -+ -+ ELAN_DEBUG1 (ELAN_DBG_CTRL, "elan_stats_find_by_name failed %s\n", block_name); -+ return NULL; -+} -+ -+ELAN_STATS_STRUCT * -+elan_stats_find_next(ELAN_STATS_IDX statidx) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ -+ if ( ptr->statidx > statidx ) -+ return ptr; -+ } -+ -+ return NULL; -+} -+ -+int -+elan_stats_get_next_index (ELAN_STATS_IDX statidx, ELAN_STATS_IDX *next_block) -+{ -+ ELAN_STATS_STRUCT *target; -+ ELAN_STATS_IDX next = 0; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ if ((target = elan_stats_find_next(statidx)) != NULL) -+ next = target->statidx; -+ -+ copyout(&next, next_block, sizeof(ELAN_STATS_IDX) ); -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return 0; -+} -+ -+int -+elan_stats_find_index (caddr_t block_name, ELAN_STATS_IDX *statidx, uint *num_entries) -+ -+{ -+ ELAN_STATS_STRUCT *target; -+ ELAN_STATS_IDX index = 0; -+ uint entries = 0; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_find_index %s \n", block_name); -+ -+ if ((target = elan_stats_find_by_name(block_name)) != NULL) -+ { -+ index = target->statidx; -+ entries = target->num_entries; -+ } -+ -+ ELAN_DEBUG3(ELAN_DBG_CTRL, "elan_stats_find_index found %d %d (target=%p)\n", index, entries, target); -+ -+ copyout(&index, statidx, sizeof(ELAN_STATS_IDX)); -+ copyout(&entries, num_entries, sizeof(uint)); -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ESUCCESS; -+} -+ -+int -+elan_stats_get_block_info (ELAN_STATS_IDX statidx, caddr_t block_name, uint *num_entries) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_get_block_info statidx %d\n",statidx); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ ELAN_DEBUG2(ELAN_DBG_CTRL, "elan_stats_get_block_info name %s entries %d\n",block_name, *num_entries); -+ -+ copyout( target->block_name, block_name, ELAN_STATS_NAME_MAX_LEN); -+ copyout(&target->num_entries, num_entries, sizeof(uint)); -+ -+ res = ESUCCESS; -+ } -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return res; -+} -+ -+int -+elan_stats_get_index_name (ELAN_STATS_IDX statidx, uint index, caddr_t name) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ ELAN_DEBUG2(ELAN_DBG_CTRL, "elan_stats_get_index_name statidx %d index %d\n",statidx, index); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_get_name== NULL) -+ { -+ ELAN_DEBUG0(ELAN_DBG_CTRL, "elan_stats_get_index_name no callback\n"); -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return res; -+ } -+ -+ if ((res = target->ops->elan_stats_get_name(target->arg, index, name)) == 0) -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_get_index_name name %s\n",name); -+ -+ } -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return res; -+} -+ -+int -+elan_stats_get_block (ELAN_STATS_IDX statidx, uint entries, ulong *values) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_get_block == NULL) -+ { -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return res; -+ } -+ -+ res = target->ops->elan_stats_get_block(target->arg, entries, values); -+ } -+ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return res; -+} -+ -+int -+elan_stats_clear_block (ELAN_STATS_IDX statidx) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_clear_block == NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return res; -+ } -+ -+ res = target->ops->elan_stats_clear_block(target->arg); -+ } -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return res; -+} -+ -+void -+elan_stats_next_statidx(void) -+{ -+ /* XXXXX need to put not in use check here incase we loop MRH */ -+ /* tho its a bigish loop :) */ -+ elan_next_statidx++; -+ if (!elan_next_statidx) -+ elan_next_statidx++; -+} -+ -+int -+elan_stats_register (ELAN_STATS_IDX *statidx, -+ char *block_name, -+ uint num_entries, -+ ELAN_STATS_OPS *ops, -+ void *arg) -+{ -+ ELAN_STATS_STRUCT *target; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ -+ /* create it and add */ -+ KMEM_ALLOC(target, ELAN_STATS_STRUCT *, sizeof(ELAN_STATS_STRUCT), 1); -+ if (target == NULL) -+ { -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return ENOMEM; -+ } -+ -+ elan_stats_next_statidx(); -+ -+ *statidx = elan_next_statidx; -+ -+ target->statidx = elan_next_statidx; -+ target->num_entries = num_entries; -+ target->ops = ops; -+ target->arg = arg; -+ strcpy(target->block_name, block_name); -+ -+ list_add_tail(&target->node, &elan_stats_list); -+ -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ return 0; -+} -+ -+int -+elan_stats_deregister (ELAN_STATS_IDX statidx) -+{ -+ ELAN_STATS_STRUCT *target; -+ -+ ELANMOD_RWLOCK_WRITE(&elan_rwlock); -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ -+ list_del(&target->node); -+ -+ /* delete target entry */ -+ KMEM_FREE(target, sizeof(ELAN_STATS_STRUCT)); -+ } -+ ELANMOD_RWLOCK_WRITE_UNLOCK(&elan_rwlock); -+ -+ return target == NULL ? EINVAL : 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan/usercopy.c linux-2.6.9/drivers/net/qsnet/elan/usercopy.c ---- clean/drivers/net/qsnet/elan/usercopy.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan/usercopy.c 2005-09-06 05:06:58.000000000 -0400 -@@ -0,0 +1,198 @@ -+/* -+ * Copyright (c) 2005 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: usercopy.c,v 1.10.2.6 2005/09/06 09:06:58 addy Exp $" -+/*$Source: /cvs/master/quadrics/elanmod/modsrc/usercopy.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * Access another process' address space copying directly to/from user space (current) -+ * -+ * Remote is the non-local process memory address, which we access using get_user_pages() and kmap() -+ * For the local memory (i.e. owned by current task) we use the standard copy_[to|from]_user interfaces -+ * -+ * Code based on linux/kernel/ptrace.c -+ */ -+ -+#if defined(NO_MMPUT) || LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9) -+static size_t -+rw_process_vm (struct task_struct *tsk, unsigned long remote, void *local, size_t len, int write) -+{ -+#warning "NO EXPORTED MMPUT - usercopy not possible" -+ -+ /* Without an exported mmput() function we cannot make this -+ * safe as the remote process may be torn down during the copy -+ * I experimented with taking a write lock on the remote mmap_sem -+ * but this seemed to lead to deadlocks when pagefaulting -+ */ -+ /* GNAT 7768: We have also found that some older versions of the get_task_mm() code -+ * in linux/sched.h call mmgrab() which is not exported in any 2.6.X kernel -+ */ -+ return 0; -+} -+ -+#else -+static size_t -+rw_process_vm (struct task_struct *tsk, unsigned long remote, void *local, size_t len, int write) -+{ -+ struct mm_struct *mm; -+ struct vm_area_struct *vma; -+ struct page *page; -+ void *old_buf = local; -+ -+ if (write) -+ ELAN_DEBUG5(ELAN_DBG_USERCOPY, "%p remote write from %p to %lx len %ld tsk %p\n", -+ current, local, remote, (long)len, tsk); -+ else -+ ELAN_DEBUG5(ELAN_DBG_USERCOPY, "%p remote read from %lx to %p len %ld tsk %p\n", -+ current, remote, local, (long)len, tsk); -+ -+ /* This locks the task, grabs a reference to the mm and then unlocks the task */ -+ mm = get_task_mm(tsk); -+ -+ if (!mm) -+ { -+ /* GNAT 7777: Must drop lock before returning */ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return 0; -+ } -+ -+ /* Do not try and copy from ourselves! */ -+ if (mm == current->mm) -+ { -+ /* GNAT 7777: Must now drop the elanmod lock as otherwise we can create a deadlock -+ * during the mmput() due it it calling exit_mmap() for the remote process -+ */ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ mmput(mm); -+ return 0; -+ } -+ -+ down_read(&mm->mmap_sem); -+ -+ /* ignore errors, just check how much was sucessfully transfered */ -+ while (len) { -+ size_t bytes, ret, offset; -+ void *maddr; -+ -+ ret = get_user_pages(tsk, mm, remote, 1, write, 1, &page, &vma); -+ if (ret <= 0) -+ break; -+ -+ bytes = len; -+ offset = remote & (PAGE_SIZE-1); -+ if (bytes > PAGE_SIZE-offset) -+ bytes = PAGE_SIZE-offset; -+ -+ maddr = kmap(page); -+ if (write) { -+ if (copy_from_user(/* remote to */maddr + offset, /* user from */local, bytes)) { -+ kunmap(page); -+ page_cache_release(page); -+ break; -+ } -+ set_page_dirty_lock(page); -+ } else { -+ if (copy_to_user(/* user to */local, /* remote from */maddr + offset, bytes)) { -+ kunmap(page); -+ page_cache_release(page); -+ break; -+ } -+ } -+ kunmap(page); -+ page_cache_release(page); -+ len -= bytes; -+ local += bytes; -+ remote += bytes; -+ } -+ -+ up_read(&mm->mmap_sem); -+ -+ /* GNAT 7777: Must now drop the elanmod lock as otherwise can we create a deadlock -+ * during the mmput() due it it calling exit_mmap() in the remote process -+ */ -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ mmput(mm); -+ -+ /* Return num bytes copied */ -+ return local - old_buf; -+} -+#endif /* !defined(NO_MMPUT) || LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9) */ -+ -+int -+elan_usercopy (void *remote, void *local, size_t len, int write, int ctxId, struct elan_cap_node_struct *cap_ptr) -+{ -+ int ret = 0; -+ size_t bytes; -+ -+ struct task_struct *tsk; -+ -+ /* Grab a read lock on elanmod lock -+ * -+ * This prevents any process from exiting whilst the copy is in progress -+ * as it will need to take a write lock on the elanmod lock in order to do so -+ * As exit_fs() is called before the task is destroyed this should prevent -+ * the remote tsk from being torn down during the copy -+ * -+ * It would be much easier if we could just use get_task_struct()/put_task_struct() -+ * but __put_task_struct() is not exported by the 2.6.X kernels - sigh. -+ */ -+ ELANMOD_RWLOCK_READ(&elan_rwlock); -+ -+ /* Get the task handle from the cap node for the supplied ctxId */ -+ if ((ret = elan_usercopy_handle(cap_ptr, ctxId, (void **)&tsk)) < 0) -+ { -+ ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ return ret; -+ } -+ -+ ELAN_DEBUG6(ELAN_DBG_USERCOPY, -+ "elan_usercopy: remote %p local %p len %ld write %d ctxId %d tsk %p\n", -+ remote, local, (long) len, write, ctxId, tsk); -+ -+ ASSERT(tsk); -+ -+ /* The BKL - why ??? (arch/[i386|ia64]/kernel/ptrace.c seems to hold it) */ -+// lock_kernel(); -+ -+ bytes = rw_process_vm(tsk, (unsigned long)remote, local, len, write); -+ -+ if (bytes != len) -+ { -+ ELAN_DEBUG2(ELAN_DBG_USERCOPY, "elan_usercopy: Failed to read %ld bytes (%ld copied)\n", -+ (long)len, (long)bytes); -+ ret = -EPERM; -+ } -+ -+ /* The BKL - why ??? (arch/[i386|ia64]/kernel/ptrace.c seems to hold it) */ -+// unlock_kernel(); -+ -+ /* GNAT 7777: rw_process_vm() now drops the elanmod lock -+ * -+ * ELANMOD_RWLOCK_READ_UNLOCK(&elan_rwlock); -+ */ -+ -+ return ret; -+} -+ -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/context.c linux-2.6.9/drivers/net/qsnet/elan3/context.c ---- clean/drivers/net/qsnet/elan3/context.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/context.c 2005-07-20 07:35:36.000000000 -0400 -@@ -0,0 +1,2101 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: context.c,v 1.117.2.1 2005/07/20 11:35:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/context.c,v $ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+/* -+ * Global variables configurable from /etc/system file -+ * (OR /etc/sysconfigtab on Digital UNIX) -+ */ -+int ntrapped_threads = 64; -+int ntrapped_dmas = 64; -+int ntrapped_events = E3_NonSysCntxQueueSize + 128; -+int ntrapped_commands = 64; -+int noverflow_commands = 1024; -+int nswapped_threads = 64; -+int nswapped_dmas = 64; -+ -+#define NUM_HALTOPS 8 -+ -+void *SwapListsLockInfo; -+void *CmdLockInfo; -+ -+static void HaltSwapContext (ELAN3_DEV *dev, void *arg); -+ -+static char *OthersStateStrings[] = {"others_running", "others_halting", "others_swapping", -+ "others_halting_more", "others_swapping_more", "others_swapped"}; -+ -+ELAN3_CTXT * -+elan3_alloc (ELAN3_DEV *dev, int kernel) -+{ -+ ELAN3_CTXT *ctxt; -+ int i; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_FN, "elan3_alloc: %s\n", kernel ? "kernel" : "user"); -+ -+ KMEM_ZALLOC (ctxt, ELAN3_CTXT *, sizeof (ELAN3_CTXT), TRUE); -+ -+ if (ctxt == NULL) -+ return (NULL); -+ -+ elan_nullcap (&ctxt->Capability); -+ -+ ctxt->Device = dev; -+ ctxt->OthersState = CTXT_OTHERS_SWAPPED; -+ ctxt->RefCnt = 1; -+ ctxt->Position = dev->Position; -+ -+ if (kernel) -+ ctxt->Status = CTXT_DETACHED | CTXT_SWAPPED_OUT | CTXT_KERNEL; -+ else -+ ctxt->Status = CTXT_DETACHED | CTXT_SWAPPED_OUT | CTXT_NO_LWPS; -+ -+ ctxt->Elan3mmu = elan3mmu_alloc (ctxt); -+ -+ kcondvar_init (&ctxt->Wait); -+ kcondvar_init (&ctxt->CommandPortWait); -+ kcondvar_init (&ctxt->LwpWait); -+ kcondvar_init (&ctxt->HaltWait); -+ -+ spin_lock_init (&ctxt->InputFaultLock); -+ -+ kmutex_init (&ctxt->SwapListsLock); -+ kmutex_init (&ctxt->CmdPortLock); -+ kmutex_init (&ctxt->NetworkErrorLock); -+ kmutex_init (&ctxt->CmdLock); -+ -+ krwlock_init (&ctxt->VpLock); -+ -+ KMEM_GETPAGES (ctxt->FlagPage, ELAN3_FLAGSTATS *, 1, TRUE); -+ if (!ctxt->FlagPage) -+ goto error; -+ bzero ((char *) ctxt->FlagPage, PAGESIZE); -+ -+ KMEM_ZALLOC (ctxt->CommandTraps, COMMAND_TRAP *, sizeof (COMMAND_TRAP) * ntrapped_commands, TRUE); -+ if (!ctxt->CommandTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->ThreadTraps, THREAD_TRAP *, sizeof (THREAD_TRAP) * ntrapped_threads, TRUE); -+ if (!ctxt->ThreadTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->DmaTraps, DMA_TRAP *, sizeof (DMA_TRAP) * ntrapped_dmas, TRUE); -+ if (!ctxt->DmaTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->EventCookies, EVENT_COOKIE *, sizeof (EVENT_COOKIE) * ntrapped_events, TRUE); -+ if (!ctxt->EventCookies) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->Commands, CProcTrapBuf_BE *, sizeof (CProcTrapBuf_BE) * noverflow_commands,TRUE); -+ if (!ctxt->Commands) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->SwapThreads, E3_Addr *, sizeof (E3_Addr) * nswapped_threads, TRUE); -+ if (!ctxt->SwapThreads) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->SwapDmas, E3_DMA_BE *, sizeof (E3_DMA_BE) * nswapped_dmas, TRUE); -+ if (!ctxt->SwapDmas) -+ goto error; -+ -+ /* -+ * "slop" is defined as follows : -+ * number of entries REQUIRED to be left spare to consume all other traps -+ * up until the time that the context can be swapped out. -+ * -+ * CommandTrapQ : 1 command issued by main + 1 issued by the thread processor per elan -+ * ThreadTrapQ : 2 from command + 2 input -+ * DmaTrapQ : 2 from command + 2 input -+ * EventTrapQ : 2 from command + 1 thread + 1 dma + 2 input + E3_NonSysCntxQueueSize -+ */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ELAN3_QUEUE_INIT (ctxt->CommandTrapQ, ntrapped_commands, 2); -+ ELAN3_QUEUE_INIT (ctxt->ThreadTrapQ, ntrapped_threads, 4); -+ ELAN3_QUEUE_INIT (ctxt->DmaTrapQ, ntrapped_dmas, 4); -+ ELAN3_QUEUE_INIT (ctxt->EventCookieQ, ntrapped_events, MIN(E3_NonSysCntxQueueSize + 6, ntrapped_events - 6)); -+ ELAN3_QUEUE_INIT (ctxt->CommandQ, noverflow_commands, 0); -+ ELAN3_QUEUE_INIT (ctxt->SwapThreadQ, nswapped_threads, 0); -+ ELAN3_QUEUE_INIT (ctxt->SwapDmaQ, nswapped_dmas, 0); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+#if defined(DIGITAL_UNIX) -+ /* Allocate the segelan for the command port */ -+ if (! kernel && elan3_segelan3_create (ctxt) == NULL) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ return ((ELAN3_CTXT *) NULL); -+ } -+#endif -+ -+ /* -+ * Initialise the Input Fault list -+ */ -+ spin_lock (&ctxt->InputFaultLock); -+ for (i = 0; i < NUM_INPUT_FAULT_SAVE; i++) -+ ctxt->InputFaults[i].Next = (i == (NUM_INPUT_FAULT_SAVE-1)) ? NULL : &ctxt->InputFaults[i+1]; -+ ctxt->InputFaultList = &ctxt->InputFaults[0]; -+ spin_unlock (&ctxt->InputFaultLock); -+ -+ ReserveHaltOperations (dev, NUM_HALTOPS, TRUE); -+ -+ if ((ctxt->RouteTable = AllocateRouteTable (ctxt->Device, ELAN3_MAX_VPS)) == NULL) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_FN, "elan3_alloc: cannot map route table\n"); -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ return ((ELAN3_CTXT *) NULL); -+ } -+ -+ return (ctxt); -+ -+ -+ error: -+ -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ if (ctxt->FlagPage) -+ KMEM_FREEPAGES ((void *) ctxt->FlagPage, 1); -+ if (ctxt->CommandTraps) -+ KMEM_FREE ((void *) ctxt->CommandTraps, sizeof (COMMAND_TRAP) * ntrapped_commands); -+ if (ctxt->ThreadTraps) -+ KMEM_FREE ((void *) ctxt->ThreadTraps, sizeof (THREAD_TRAP) * ntrapped_threads); -+ if (ctxt->DmaTraps) -+ KMEM_FREE ((void *) ctxt->DmaTraps, sizeof (DMA_TRAP) * ntrapped_dmas); -+ if (ctxt->EventCookies) -+ KMEM_FREE ((void *) ctxt->EventCookies, sizeof (EVENT_COOKIE) * ntrapped_events); -+ if (ctxt->Commands) -+ KMEM_FREE ((void *) ctxt->Commands, sizeof (CProcTrapBuf_BE) * noverflow_commands); -+ if (ctxt->SwapThreads) -+ KMEM_FREE ((void *) ctxt->SwapThreads, sizeof (E3_Addr) * nswapped_threads); -+ if (ctxt->SwapDmas) -+ KMEM_FREE ((void *) ctxt->SwapDmas, sizeof (E3_DMA_BE) * nswapped_dmas); -+ -+ kcondvar_destroy (&ctxt->Wait); -+ kcondvar_destroy (&ctxt->CommandPortWait); -+ kcondvar_destroy (&ctxt->LwpWait); -+ kcondvar_destroy (&ctxt->HaltWait); -+ -+ kmutex_destroy (&ctxt->SwapListsLock); -+ kmutex_destroy (&ctxt->CmdLock); -+ kmutex_destroy (&ctxt->NetworkErrorLock); -+ spin_lock_destroy (&ctxt->InputFaultLock); -+ -+ krwlock_destroy (&ctxt->VpLock); -+ -+ KMEM_FREE (ctxt, sizeof (ELAN3_CTXT)); -+ -+ return (NULL); -+} -+ -+void -+elan3_free (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ NETERR_FIXUP *nef; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_free: %p \n", ctxt); -+ -+ elan3_removevp (ctxt, ELAN3_INVALID_PROCESS); /* Remove any virtual process mappings */ -+ -+#if defined(DIGITAL_UNIX) -+ WaitForContext (ctxt); /* wait for all references to this context to go away */ -+#endif -+ -+ if (ctxt->RouteTable) -+ FreeRouteTable (dev, ctxt->RouteTable); -+ ctxt->RouteTable = NULL; -+ -+ elan3mmu_free (ctxt->Elan3mmu); /* free of our Elan3mmu */ -+ -+ if (ctxt->Private) /* Call back to "user" to free off */ -+ ELAN3_OP_FREE_PRIVATE (ctxt); /* private data */ -+ -+#if defined(DIGITAL_UNIX) -+ if (! CTXT_IS_KERNEL(ctxt)) -+ elan3_segelan3_destroy (ctxt); /* Unmap the command port from the users address space. */ -+#endif -+ -+ ReleaseHaltOperations (dev, NUM_HALTOPS); -+ -+ if (ctxt->Input0Resolver) -+ CancelNetworkErrorResolver (ctxt->Input0Resolver); -+ -+ if (ctxt->Input1Resolver) -+ CancelNetworkErrorResolver (ctxt->Input1Resolver); -+ -+ while ((nef = ctxt->NetworkErrorFixups) != NULL) -+ { -+ ctxt->NetworkErrorFixups = nef->Next; -+ -+ CompleteNetworkErrorFixup (ctxt, nef, ESRCH); -+ } -+ -+ KMEM_FREEPAGES ((void *) ctxt->FlagPage, 1); -+ -+ KMEM_FREE ((void *) ctxt->CommandTraps, sizeof (COMMAND_TRAP) * ntrapped_commands); -+ KMEM_FREE ((void *) ctxt->ThreadTraps, sizeof (THREAD_TRAP) * ntrapped_threads); -+ KMEM_FREE ((void *) ctxt->DmaTraps, sizeof (DMA_TRAP) * ntrapped_dmas); -+ KMEM_FREE ((void *) ctxt->EventCookies, sizeof (EVENT_COOKIE) * ntrapped_events); -+ KMEM_FREE ((void *) ctxt->Commands, sizeof (CProcTrapBuf_BE) * noverflow_commands); -+ KMEM_FREE ((void *) ctxt->SwapThreads, sizeof (E3_Addr) * nswapped_threads); -+ KMEM_FREE ((void *) ctxt->SwapDmas, sizeof (E3_DMA_BE) * nswapped_dmas); -+ -+ kcondvar_destroy (&ctxt->Wait); -+ kcondvar_destroy (&ctxt->CommandPortWait); -+ kcondvar_destroy (&ctxt->LwpWait); -+ kcondvar_destroy (&ctxt->HaltWait); -+ -+ kmutex_destroy (&ctxt->SwapListsLock); -+ kmutex_destroy (&ctxt->CmdLock); -+ kmutex_destroy (&ctxt->NetworkErrorLock); -+ spin_lock_destroy (&ctxt->InputFaultLock); -+ -+ krwlock_destroy (&ctxt->VpLock); -+ -+ KMEM_FREE (ctxt, sizeof (ELAN3_CTXT)); -+} -+ -+int -+elan3_doattach(ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ unsigned long pgnum = ((cap->cap_mycontext & MAX_ROOT_CONTEXT_MASK) * sizeof (E3_CommandPort)) / PAGE_SIZE; -+ unsigned long pgoff = ((cap->cap_mycontext & MAX_ROOT_CONTEXT_MASK) * sizeof (E3_CommandPort)) & (PAGE_SIZE-1); -+ ELAN3_DEV *dev = ctxt->Device; -+ int res = ESUCCESS; -+ unsigned long flags; -+ -+ /* Map in the command port for this context */ -+ if (MapDeviceRegister (dev, ELAN3_BAR_COMMAND_PORT, &ctxt->CommandPage, pgnum * PAGE_SIZE, PAGE_SIZE, &ctxt->CommandPageHandle) != ESUCCESS) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_doattach: MapDeviceRegister failed"); -+ return (EINVAL); -+ } -+ -+ ctxt->CommandPort = ctxt->CommandPage + pgoff; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ res = 0; -+ if (ELAN3_DEV_CTX_TABLE(dev,cap->cap_mycontext) != NULL) -+ res = EBUSY; -+ else -+ { -+ if ((res = elan3mmu_attach (ctxt->Device, cap->cap_mycontext, ctxt->Elan3mmu, -+ ctxt->RouteTable->Table, ctxt->RouteTable->Size-1)) == 0) -+ { -+ ELAN3_DEV_CTX_TABLE(dev,cap->cap_mycontext) = ctxt; -+ ctxt->Capability = *cap; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (res == ESUCCESS) -+ elan3_swapin (ctxt, CTXT_DETACHED); -+ else -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ ctxt->CommandPort = (ioaddr_t) 0; -+ } -+ -+ return (res); -+} -+ -+void -+elan3_destroy_callback( void * args, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ if (map == NULL) -+ { -+ /* the cap is being destroyed */ -+ PRINTF0 (NULL, DBG_VP, "elan3_destroy_callback: the cap is being destroyed \n"); -+ } -+ else -+ { -+ /* the map is being destroyed */ -+ PRINTF0 (NULL, DBG_VP, "elan3_destroy_callback: the map is being destroyed \n"); -+ } -+} -+ -+int -+elan3_attach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int type; -+ int res; -+ -+ switch (type = elan3_validate_cap (dev, cap, ELAN_USER_ATTACH)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ((res = elan_attach_cap(cap, dev->Devinfo.dev_rail, ctxt, elan3_destroy_callback)) != 0) -+ return res; -+ break; -+ -+ default: -+ return (EINVAL); -+ } -+ -+ if (((res = elan3_doattach(ctxt,cap)) != ESUCCESS) && (type == ELAN_CAP_RMS)) -+ elan_detach_cap(cap, dev->Devinfo.dev_rail); -+ -+ return res; -+} -+ -+void -+elan3_detach ( ELAN3_CTXT *ctxt ) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int need_to_call_elanmod_detach = 0; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_detach: %p \n", ctxt ); -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_detach: context not attached \n"); -+ return ; -+ } -+ -+ /* must you be in the ctx_table ?? */ -+ -+ switch (ctxt->Capability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ if (ELAN3_SYSTEM_CONTEXT (ctxt->Capability.cap_mycontext)) -+ return ; -+ -+ if (! (ctxt->Capability.cap_type & ELAN_CAP_TYPE_HWTEST)) -+ need_to_call_elanmod_detach = 1; -+ -+ break; -+ } -+ default: -+ return ; -+ } -+ -+ elan3_swapout (ctxt, CTXT_DETACHED); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3mmu_detach (dev, ctxt->Capability.cap_mycontext); -+ ELAN3_DEV_CTX_TABLE(dev,ctxt->Capability.cap_mycontext) = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (ctxt->CommandPage) -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ } -+ -+ if (need_to_call_elanmod_detach) -+ elan_detach_cap(&ctxt->Capability, dev->Devinfo.dev_rail); -+ -+ elan_nullcap (&ctxt->Capability); -+ -+} -+ -+void -+elan3_dodetach ( ELAN3_CTXT *ctxt ) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_dodetach: %p \n", ctxt ); -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_dodetach: context not attached \n"); -+ return ; -+ } -+ -+ elan3_swapout (ctxt, CTXT_DETACHED); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3mmu_detach (dev, ctxt->Capability.cap_mycontext); -+ ELAN3_DEV_CTX_TABLE(dev,ctxt->Capability.cap_mycontext) = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (ctxt->CommandPage) -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ } -+ -+ elan_nullcap (&ctxt->Capability); -+} -+ -+void -+elan3_swapin (ELAN3_CTXT *ctxt, int reason) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (ctxt->Status & CTXT_SWAPPED_REASONS); -+ -+ PRINTF3 (ctxt, DBG_SWAP, "elan3_swapin: status %x State %s reason %x\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState], reason); -+ -+ while (ctxt->Status & CTXT_SWAPPING_OUT) /* In transition */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); -+ -+ if (reason == CTXT_NO_LWPS && ctxt->LwpCount++ != 0) /* Added another LWP */ -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ if ((ctxt->Status & ~reason) & CTXT_SWAPPED_REASONS) -+ ctxt->Status &= ~reason; -+ else -+ { -+ ASSERT (ctxt->Status & CTXT_SWAPPED_OUT); -+ ASSERT (ctxt->OthersState == CTXT_OTHERS_SWAPPED); -+ -+ /* -+ * Will not be swapped out anymore, so ask the "user" to perform -+ * any swapping in he needs before letting the context run again. -+ */ -+ -+ ctxt->Status &= ~(CTXT_SWAPPED_OUT | CTXT_QUEUES_EMPTY | reason); -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_OK && ctxt->Input1Trap.State == CTXT_STATE_OK) -+ SetInputterStateForContext (ctxt, 0, NULL); -+ -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ } -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapin: all done - status %x state %s\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+ -+void -+elan3_swapout (ELAN3_CTXT *ctxt, int reason) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int cansleep; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ PRINTF3 (ctxt, DBG_SWAP, "elan3_swapout: status %x state %s reason %x\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState], reason); -+ -+ if (reason == CTXT_NO_LWPS) -+ { -+ if (--ctxt->LwpCount != 0) /* Still other LWPs running */ -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); /* Wakeup anyone waiting on LwpCount */ -+ } -+ -+ ctxt->Status |= reason; -+ -+ while (ctxt->Status & CTXT_SWAPPING_OUT) /* wait for someone else to finish swapping */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); /* out */ -+ -+ if (ctxt->Status & CTXT_SWAPPED_OUT) -+ { -+ if (reason == CTXT_NO_LWPS) /* Wakeup other thread waiting on LWP exit */ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ /* -+ * mark the context as swapping out. -+ */ -+ ctxt->Status |= CTXT_SWAPPING_OUT; -+ -+ if (reason != CTXT_FIXUP_NETERR) -+ { -+ /* -+ * Stop all of the lwps. -+ */ -+ while (ctxt->LwpCount) -+ { -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); /* Wake up any lwps */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); /* then wait for them to enter elan3_swapout */ -+ } -+ } -+ -+ StartSwapoutContext (ctxt, 0, NULL); -+ for (;;) -+ { -+ PRINTF0 (ctxt, DBG_SWAP, "elan3_swapout: HandleExceptions\n"); -+ -+ cansleep = (HandleExceptions(ctxt, &flags) == ESUCCESS); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapout: OthersState=%d cansleep=%d\n", ctxt->OthersState, cansleep); -+ -+ if (ctxt->OthersState == CTXT_OTHERS_SWAPPED) -+ break; -+ -+ if (cansleep) -+ kcondvar_wait (&ctxt->Wait, &dev->IntrLock, &flags); -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "elan3_swapout: swapped out\n"); -+ -+ ASSERT (ELAN3_QUEUE_EMPTY (ctxt->DmaTrapQ)); -+ ASSERT (ELAN3_QUEUE_EMPTY (ctxt->ThreadTrapQ)); -+ -+ ctxt->Status |= CTXT_SWAPPED_OUT; -+ ctxt->Status &= ~CTXT_SWAPPING_OUT; -+ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapout: all done - status %x state %s\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+elan3_pagefault (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSave, int npages) -+{ -+ E3_Addr elanAddr = FaultSave->s.FaultAddress; -+ int writeable; -+ int res; -+ -+ PRINTF3 (ctxt, DBG_FAULT, "elan3_pagefault: elanAddr %08x FSR %08x : %s\n", elanAddr, FaultSave->s.FSR.Status, -+ FaultSave->s.FSR.s.ProtFault ? "protection fault" : "pte invalid"); -+ -+ /* Look at the FSR to determine the fault type etc */ -+ -+ if (FaultSave->s.FSR.Status == 0) /* this is a target abort/parity error, so look */ -+ { /* at the PCI config space registers to determine */ -+ ElanBusError (ctxt->Device); -+ return (EFAULT); -+ } -+ -+ if (FaultSave->s.FSR.s.AlignmentErr) /* Alignment errors are always fatal. */ -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: Alignment error\n"); -+ return (EFAULT); -+ } -+ -+ if (FaultSave->s.FSR.s.WalkBadData) /* Memory ECC error during a walk */ -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: Memory ECC error during walk\n"); -+ return (EFAULT); -+ } -+ -+ if (!FaultSave->s.FSR.s.ProtFault && /* DMA memory type changed */ -+ !FaultSave->s.FSR.s.Walking) -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: DMA memory type changed\n"); -+ return (EFAULT); -+ } -+ -+ ASSERT (FaultSave->s.FSR.s.ProtFault ? /* protection errors, should always have a valid pte */ -+ (!FaultSave->s.FSR.s.Walking || !(FaultSave->s.FSR.s.Level==3) || FaultSave->s.FSR.s.FaultPte == ELAN3_ET_PTE) : -+ FaultSave->s.FSR.s.FaultPte == ELAN3_ET_INVALID); /* otherwise it must be an invalid pte */ -+ -+ /* -+ * Determine whether to fault for a 'write' from the access permissions we need, and not -+ * from the access type (WrAcc). -+ */ -+ writeable = (FaultSave->s.FSR.s.AccTypePerm & (1 << FSR_WritePermBit)); -+ -+ /* Check that we have the right permissions for this access type. */ -+ if ((res = elan3mmu_checkperm (ctxt->Elan3mmu, (elanAddr&PAGEMASK), npages*PAGESIZE, FaultSave->s.FSR.s.AccTypePerm)) != 0) -+ { -+ PRINTF1 (ctxt, DBG_FAULT, "elan3_pagefault: %s\n", (res == ENOMEM) ? "no protection mapping" : "protection error"); -+ -+ return (res); -+ } -+ -+ res = LoadElanTranslation (ctxt, (elanAddr&PAGEMASK), npages*PAGESIZE, FaultSave->s.FSR.s.ProtFault, writeable); -+ -+ if (res == ESUCCESS) -+ { -+ BumpStat (ctxt->Device, PageFaults); -+ BumpUserStat (ctxt, PageFaults); -+ } -+ -+ PRINTF1 (ctxt, DBG_FAULT, "elan3_pagefault: -> %d\n", res); -+ -+ return (res); -+} -+ -+void -+elan3_block_inputter (ELAN3_CTXT *ctxt, int block) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (block) -+ ctxt->Status |= CTXT_USER_FILTERING; -+ else -+ ctxt->Status &= ~CTXT_USER_FILTERING; -+ -+ if (ctxt->Capability.cap_mycontext != ELAN_CAP_UNINITIALISED) -+ SetInputterStateForContext (ctxt, 0, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+FixupNetworkErrors (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ NETERR_FIXUP *nef; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ctxt->NetworkErrorFixups == NULL) -+ return (ESUCCESS); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ kmutex_lock (&ctxt->NetworkErrorLock); /* single thread while fixing up errors */ -+ elan3_swapout (ctxt, CTXT_FIXUP_NETERR); -+ -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ while ((nef = ctxt->NetworkErrorFixups) != NULL) -+ { -+ ctxt->NetworkErrorFixups = nef->Next; -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ if (ELAN3_OP_FIXUP_NETWORK_ERROR (ctxt, nef) == OP_FAILED) -+ CompleteNetworkErrorFixup (ctxt, nef, EINVAL); -+ -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ elan3_swapin (ctxt, CTXT_FIXUP_NETERR); -+ -+ kmutex_unlock (&ctxt->NetworkErrorLock); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+} -+ -+int -+CompleteNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER *rvp) -+{ -+ int state; -+ -+ switch (rvp->Status) -+ { -+ case ESUCCESS: -+ /* -+ * the item still existed at the source - if it's a wait for EOP transaction -+ * then the source will retry - otherwise the remote event will have been -+ * cleared and we should execute it -+ */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: ESUCCESS zero WaitForEopTransaction %p\n", trap->WaitForEopTransaction); -+ -+ state = trap->WaitForEopTransaction ? CTXT_STATE_OK : CTXT_STATE_NEEDS_RESTART; -+ -+ break; -+ -+ case ESRCH: -+ /* -+ * the item was not found at the source - we should always execute the transaction -+ * since it will never be resent -+ */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: ESRCH execute WaitForEopTransaction %p\n", trap->WaitForEopTransaction); -+ state = CTXT_STATE_NEEDS_RESTART; -+ break; -+ -+ default: /* other errors */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: %d\n", rvp->Status); -+ if (ElanException (ctxt, EXCEPTION_NETWORK_ERROR, INPUT_PROC, trap, &rvp) == OP_HANDLED) -+ state = CTXT_STATE_NEEDS_RESTART; -+ else -+ state = CTXT_STATE_OK; -+ break; -+ } -+ -+ FreeNetworkErrorResolver (rvp); -+ -+ return (state); -+} -+ -+int -+HandleExceptions (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ THREAD_TRAP tproc; -+ DMA_TRAP dproc; -+ NETERR_RESOLVER *rvp; -+ int state; -+ -+ if (ctxt->Status & CTXT_COMMAND_OVERFLOW_ERROR) -+ { -+ ctxt->Status &= ~CTXT_COMMAND_OVERFLOW_ERROR; -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ElanException (ctxt, EXCEPTION_COMMAND_OVERFLOW, COMMAND_PROC, NULL); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (! ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ)) -+ { -+ /* XXXX: unmap translations to the command port */ -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveCProcTrap (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_TRAPPED) -+ { -+ ctxt->Input0Trap.State = CTXT_STATE_RESOLVING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveIProcTrap (ctxt, &ctxt->Input0Trap, &ctxt->Input0Resolver); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input1Trap.State == CTXT_STATE_TRAPPED) -+ { -+ ctxt->Input1Trap.State = CTXT_STATE_RESOLVING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveIProcTrap (ctxt, &ctxt->Input1Trap, &ctxt->Input1Resolver); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if ((rvp = ctxt->Input0Resolver) != NULL && rvp->Completed) -+ { -+ ASSERT (ctxt->Input0Trap.State == CTXT_STATE_NETWORK_ERROR); -+ -+ ctxt->Input0Resolver = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ state = CompleteNetworkErrorResolver (ctxt, &ctxt->Input0Trap, rvp); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ ctxt->Input0Trap.State = state; -+ return (EAGAIN); -+ } -+ -+ if ((rvp = ctxt->Input1Resolver) != NULL && rvp->Completed) -+ { -+ ASSERT (ctxt->Input1Trap.State == CTXT_STATE_NETWORK_ERROR); -+ -+ ctxt->Input1Resolver = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ state = CompleteNetworkErrorResolver (ctxt,&ctxt->Input1Trap, rvp); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ ctxt->Input1Trap.State = state; -+ return (EAGAIN); -+ } -+ -+ if (NextTProcTrap (ctxt, &tproc)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveTProcTrap (ctxt, &tproc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_THREAD_QUEUE_FULL; -+ -+ if (NextDProcTrap (ctxt, &dproc)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveDProcTrap (ctxt, &dproc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_DMA_QUEUE_FULL; -+ -+ /* Handle all event interrupts. */ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->EventCookieQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->EventCookieQ)) -+ { -+ E3_uint32 cookie = *ELAN3_QUEUE_FRONT (ctxt->EventCookieQ, ctxt->EventCookies); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->EventCookieQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ if (ELAN3_OP_EVENT (ctxt, cookie, OP_LWP) != OP_DEFER) -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ else -+ { -+ spin_lock_irqsave (&dev->IntrLock, *flags); /* place the cookie back on the queue. */ -+ /* note we place it on the front to ensure */ -+ ELAN3_QUEUE_ADD_FRONT (ctxt->EventCookieQ); /* event ordering. */ -+ *ELAN3_QUEUE_FRONT (ctxt->EventCookieQ, ctxt->EventCookies) = cookie; -+ } -+ } -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_EVENT_QUEUE_FULL; -+ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->SwapDmaQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->SwapDmaQ)) -+ { -+ E3_DMA_BE DmaDesc = *ELAN3_QUEUE_FRONT (ctxt->SwapDmaQ, ctxt->SwapDmas); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->SwapDmaQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartDmaDesc (ctxt, &DmaDesc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ return (EAGAIN); -+ } -+ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->SwapThreadQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->SwapThreadQ)) -+ { -+ E3_Addr StackPointer = *ELAN3_QUEUE_FRONT (ctxt->SwapThreadQ, ctxt->SwapThreads); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->SwapThreadQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ReissueStackPointer (ctxt, StackPointer); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ return (EAGAIN); -+ } -+ -+ switch (ctxt->OthersState) -+ { -+ case CTXT_OTHERS_SWAPPING: -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ else -+ ctxt->OthersState = CTXT_OTHERS_SWAPPED; -+ -+ PRINTF1 (ctxt, DBG_LWP, "HandleExceptions: OthersState : swapping -> %s\n", OthersStateStrings[ctxt->OthersState]); -+ -+ break; -+ -+ case CTXT_OTHERS_SWAPPING_MORE: -+ ctxt->OthersState = CTXT_OTHERS_HALTING_MORE; -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted | INT_TProcHalted, HaltSwapContext, ctxt); -+ -+ PRINTF1 (ctxt, DBG_LWP, "HandleExceptions: OthersState : swapping_more -> %s\n", OthersStateStrings[ctxt->OthersState]); -+ break; -+ } -+ return (ESUCCESS); -+} -+ -+int -+RestartContext (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ PRINTF1 (ctxt, DBG_LWP, "RestartContext: status %x\n", ctxt->Status); -+ -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ if (! ELAN3_QUEUE_FRONT_EMPTY (ctxt->CommandTrapQ) || ! ELAN3_QUEUE_EMPTY(ctxt->CommandQ)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartCProcTrap (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_NEEDS_RESTART) -+ { -+ ctxt->Input0Trap.State = CTXT_STATE_EXECUTING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ res = RestartIProcTrap (ctxt, &ctxt->Input0Trap); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ -+ if (res == ESUCCESS) -+ ctxt->Input0Trap.State = CTXT_STATE_OK; -+ else -+ ctxt->Input0Trap.State = CTXT_STATE_NEEDS_RESTART; -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input1Trap.State == CTXT_STATE_NEEDS_RESTART) -+ { -+ ctxt->Input1Trap.State = CTXT_STATE_EXECUTING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ res = RestartIProcTrap (ctxt, &ctxt->Input1Trap); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ -+ if (res == ESUCCESS) -+ ctxt->Input1Trap.State = CTXT_STATE_OK; -+ else -+ ctxt->Input1Trap.State = CTXT_STATE_NEEDS_RESTART; -+ return (EAGAIN); -+ } -+ -+ if (SetEventsNeedRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartSetEvents (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ SetInputterStateForContext (ctxt, 0, NULL); -+ -+ if (TProcNeedsRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ LoadCommandPortTranslation (ctxt); -+ RestartTProcItems (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (DProcNeedsRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartDProcItems (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) -+ { -+ PRINTF1 (ctxt, DBG_LWP, "RestartContext: setting Command Flag at %p to 0\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 0; -+ -+ if (ctxt->Status & CTXT_WAITING_COMMAND) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "RestartContext: waking up threads waiting for commandport\n"); -+ -+ ctxt->Status &= ~CTXT_WAITING_COMMAND; -+ -+ kcondvar_wakeupall (&ctxt->CommandPortWait, &dev->IntrLock); -+ } -+ } -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+HaltSwapContext (ELAN3_DEV *dev, void *arg) -+{ -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) arg; -+ int SysCntx = (ctxt->Capability.cap_mycontext & SYS_CONTEXT_BIT); -+ E3_ThreadQueue_BE thread; -+ E3_DMA_BE dma; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ u_int *runCount; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (ctxt->OthersState == CTXT_OTHERS_HALTING || ctxt->OthersState == CTXT_OTHERS_HALTING_MORE); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "HaltSwapContext: status %x state %s\n", ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING_MORE) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if (--(*runCount) == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: no more reason to swap -> others_running\n"); -+ -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ /* -+ * Capture all other processors since we're not being responsive to -+ * the command processor interrupt. -+ */ -+ CAPTURE_CPUS(); -+ -+ if (SysCntx) -+ { -+ FPtr = read_reg32 (dev, TProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, TProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[E3_SysCntxQueueSize-1]); -+ } -+ else -+ { -+ FPtr = read_reg32 (dev, TProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, TProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[E3_NonSysCntxQueueSize-1]); -+ } -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, (void *) &thread, sizeof (E3_ThreadQueue_BE)); -+ -+ if (thread.s.Context == ctxt->Capability.cap_mycontext) -+ { -+ if (ELAN3_QUEUE_FULL (ctxt->SwapThreadQ)) -+ break; -+ -+ *ELAN3_QUEUE_BACK(ctxt->SwapThreadQ, ctxt->SwapThreads) = thread.s.Thread; -+ ELAN3_QUEUE_ADD (ctxt->SwapThreadQ); -+ -+ /* -+ * Remove this entry from the queue by replacing it with -+ * the "magic" thread value. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ thread.s.Context = SysCntx ? SYS_CONTEXT_BIT : 0; -+ thread.s.Thread = VanishingStackPointer; -+ -+ elan3_sdram_copyq_to_sdram (dev, (void *) &thread, FPtr, sizeof (E3_ThreadQueue_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_ThreadQueue); -+ } -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ if (SysCntx) -+ { -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ } -+ else -+ { -+ FPtr = read_reg32 (dev, DProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[E3_NonSysCntxQueueSize-1]); -+ } -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ if (dma.s.dma_u.s.Context == ctxt->Capability.cap_mycontext) -+ { -+ if (ELAN3_QUEUE_FULL (ctxt->SwapDmaQ)) -+ break; -+ -+ *ELAN3_QUEUE_BACK (ctxt->SwapDmaQ, ctxt->SwapDmas) = dma; -+ ELAN3_QUEUE_ADD (ctxt->SwapDmaQ); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = ((SysCntx ? SYS_CONTEXT_BIT : 0) << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = (E3_Addr) 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = (E3_Addr) 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ /* -+ * Release the other processors now before signalling the LWP. -+ */ -+ RELEASE_CPUS(); -+ -+ if (! ELAN3_QUEUE_FULL (ctxt->SwapDmaQ) && !ELAN3_QUEUE_FULL (ctxt->SwapThreadQ)) -+ { -+ /* -+ * We've compleletly emptied the elan queues of items in this -+ * context, so we now mark it as fully swapped out. -+ */ -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING_MORE) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if (--(*runCount) == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: queues emptied -> others_swapping\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING; -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ } -+ else -+ { -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if ((*runCount)++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: queues not emptied -> others_swapping_more\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING_MORE; -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+UnloadCommandPageMapping (ELAN3_CTXT *ctxt) -+{ -+ /* -+ * Unload the Elan translations, and flag the main processor to stall after -+ * issueing its next command. -+ */ -+ if (ctxt->CommandPageMapping != NULL && (ctxt->Status & CTXT_COMMAND_MAPPED_ELAN)) -+ { -+ ELAN3MMU_RGN *rgn = elan3mmu_rgnat_main (ctxt->Elan3mmu, ctxt->CommandPageMapping); -+ -+ if (rgn != NULL) -+ { -+ E3_Addr eaddr = rgn->rgn_ebase + (ctxt->CommandPageMapping - rgn->rgn_mbase); -+ -+ PRINTF1 (ctxt, DBG_INTR, "UnloadCommandPageMapping: unmapping command port at addr %08x\n", eaddr); -+ -+ elan3mmu_unload (ctxt->Elan3mmu, eaddr, PAGESIZE, PTE_UNLOAD); -+ } -+ -+ ctxt->Status &= ~CTXT_COMMAND_MAPPED_ELAN; -+ } -+} -+ -+void -+StartSwapoutContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int SysCntx = (ctxt->Capability.cap_mycontext & SYS_CONTEXT_BIT); -+ u_int *runCount; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "StartSwapoutContext: Status %x OthersState %s\n", -+ ctxt->Status, OthersStateStrings [ctxt->OthersState]); -+ /* -+ * Disable the inputters, we should already have a reason for it. -+ */ -+ SetInputterStateForContext (ctxt, Pend, Maskp); -+ -+ UnloadCommandPageMapping (ctxt); -+ -+ /* -+ * Flag main processor to stall after issueing next command -+ */ -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: setting Command Flag at %p to 1\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 1; -+ -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: OthersState=%d\n", ctxt->OthersState); -+ -+ /* -+ * And queue a haltop to stop the queues and clear it out. -+ */ -+ switch (ctxt->OthersState) -+ { -+ case CTXT_OTHERS_RUNNING: -+ PRINTF0 (ctxt, DBG_SWAP, "StartSwapoutContext: -> others_halting\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_HALTING; -+ -+ QueueHaltOperation (dev, Pend, Maskp, INT_DProcHalted | INT_TProcHalted, HaltSwapContext, ctxt); -+ break; -+ -+ case CTXT_OTHERS_SWAPPING: -+ PRINTF0 (ctxt, DBG_SWAP, "StartSwapoutContext: -> others_swapping_more\n"); -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING_MORE; -+ -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if ((*runCount)++ == 0) -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ break; -+ default: -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: OthersState=%d\n", ctxt->OthersState); -+ break; -+ } -+} -+ -+#if defined(DIGITAL_UNIX) -+/* temporary tweaks to priority bump */ -+int lwp_do_prio = 1; -+int lwp_do_nxm = 1; -+int lwp_prio = BASEPRI_USER-1; -+#elif defined(LINUX) -+/* This is the default nice level for the helper LWP */ -+int LwpNice = -1; -+#endif -+ -+int -+elan3_lwp (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_LWP, "elan3_lwp: started, context 0x%x\n", ctxt->Capability.cap_mycontext); -+ -+#if defined(DIGITAL_UNIX) -+ { -+ thread_t mythread = current_thread(); -+ if (lwp_do_prio && (lwp_do_nxm || !IS_NXM_TASK(mythread->task))) -+ { -+ mythread->priority = mythread->sched_pri = lwp_prio; -+ mythread->max_priority = BASEPRI_HIGHEST; -+ (void) thread_priority(mythread, lwp_prio, 0, 1); -+ } -+ } -+#elif defined(LINUX) -+ { -+ /* Do the priority trick for the helper LWP so that it -+ * runs in preferance to the user threads which may be -+ * burning CPU waiting for a trap to be fixed up -+ */ -+#ifdef NO_O1_SCHED -+ if (LwpNice >= -20 && LwpNice < 20) -+ current->nice = LwpNice; -+#else -+ set_user_nice(current, LwpNice); -+#endif -+ } -+#endif -+ -+ elan3_swapin (ctxt, CTXT_NO_LWPS); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ /* If we're swapped out, and not detached (or exiting) then wait until we're swapped back in */ -+ /* since otherwise we could "spin" forever continually calling elan3_lwp() */ -+ if ((ctxt->Status & CTXT_SWAPPED_REASONS) && ! (ctxt->Status & (CTXT_DETACHED|CTXT_EXITING))) -+ kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags); -+ -+ for (;;) -+ { -+#if defined(DIGITAL_UNIX) -+ if (thread_should_halt(current_thread()) || -+ CURSIG_CHECK(task_to_proc(current_thread()->task), u.np_uthread)) -+ { -+ PRINTF1 (ctxt, DBG_LWP, "elan3_lwp: exiting on %s\n", -+ thread_should_halt(current_thread()) ? "halt" : "signal"); -+ break; -+ } -+#endif -+ -+ if (ctxt->Status & CTXT_SWAPPED_REASONS) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting on swapped reasons\n"); -+ break; -+ } -+ -+ if (! (ctxt->inhibit)) -+ { -+ if (FixupNetworkErrors (ctxt, &flags) == ESUCCESS && -+ HandleExceptions (ctxt, &flags) == ESUCCESS && -+ RestartContext (ctxt, &flags) == ESUCCESS) -+ { -+ if (kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags) == 0) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting by kcondvar_wait_sig()\n"); -+ break; -+ } -+ } -+ } -+ else -+ { -+ printk("elan3_lwp :: skipping as inhibited\n"); -+ if (kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags) == 0) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting by kcondvar_wait_sig()\n"); -+ break; -+ } -+ } -+ -+ } -+ -+ /* Return EINVAL to elan3_syscall_lwp() when we want it to exit */ -+ res = (ctxt->Status & (CTXT_DETACHED|CTXT_EXITING)) ? EINVAL : 0; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ elan3_swapout (ctxt, CTXT_NO_LWPS); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ FixupNetworkErrors (ctxt, &flags); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+SetInputterStateForContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ ELAN3_DEV *dev = NULL; -+ int new_disabled = 0; -+ int ctxnum; -+ -+ ASSERT (ctxt != NULL); -+ dev = ctxt->Device; -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ new_disabled = (ctxt->Input0Trap.State != CTXT_STATE_OK || -+ ctxt->Input1Trap.State != CTXT_STATE_OK || -+ (ctxt->Status & CTXT_INPUTTER_REASONS) != 0); -+ -+ -+ ctxnum = ctxt->Capability.cap_mycontext; -+ -+#ifndef __lock_lint -+ PRINTF2 (ctxt , DBG_IPROC, "SetInputterState: ctxnum %x %s attached\n", ctxnum, ctxt->Disabled ? "disabled " : ""); -+#endif /* __lock_lint */ -+ -+ if (ctxt->Disabled != new_disabled) -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "SetInputterState: ctxnum %x change %s\n", ctxnum, new_disabled ? "enabled to disabled" : "disabled to enabled"); -+ -+ ctxt->Disabled = new_disabled; -+ -+ /* synchronize the context filter for this context */ -+ elan3mmu_set_context_filter (dev, ctxnum, new_disabled, Pend, Maskp); -+ } -+} -+ -+int -+CheckCommandQueueFlushed (ELAN3_CTXT *ctxt, E3_uint32 cflags, int how, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int delay = 1; -+ int i, SeenComQueueEmpty; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ ASSERT (cflags != DmaComQueueNotEmpty || dev->HaltDmaDequeueCount != 0); -+ -+ /* -+ * Flush the command processor queues and poll the queue to see it it empties. -+ */ -+ if (dev->FlushCommandCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ /* -+ * Ensure previous writes have been flushed through the write buffers -+ */ -+ wmb(); mmiob(); -+ -+ /* -+ * If the command processor traps, or it's taking too long to observe -+ * the queue as emtpy, then we need to force the interrupt handler to -+ * run for us. So queue a halt operation for the dma processor. -+ */ -+ SeenComQueueEmpty = !(read_reg32 (dev, ComQueueStatus) & cflags); -+ for (i = 20; i > 0 || (how & ISSUE_COMMAND_CANT_WAIT); i--) -+ { -+ if (SeenComQueueEmpty || (read_reg32 (dev, Exts.InterruptReg) & (INT_CProc | INT_ComQueue))) -+ break; -+ -+ mb(); -+ DELAY (delay); -+ -+ if ((delay <<= 1) == 0) delay = 1; -+ -+ SeenComQueueEmpty = !(read_reg32 (dev, ComQueueStatus) & cflags); -+ } -+ -+ if (--dev->FlushCommandCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ /* -+ * If we've seen the command queue that we're interested in with nothing in it -+ * and the command processor has not trapped then the commands we've -+ * issued have been successfully processed. -+ */ -+ if (SeenComQueueEmpty && ! (read_reg32 (dev, Exts.InterruptReg) & (INT_CProc | INT_ComQueue))) -+ { -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: observed dma queue empty and command proc not trapped\n"); -+ -+ if (cflags == DmaComQueueNotEmpty && --dev->HaltDmaDequeueCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ return (ISSUE_COMMAND_OK); -+ } -+ -+ if ((how & ISSUE_COMMAND_CANT_WAIT) != 0) -+ return (ISSUE_COMMAND_WAIT); -+ -+ /* -+ * Halt the dma processor and wait for it to halt, if the command we've issued has -+ * trapped then the interrupt handler will have moved it to the context structure. -+ */ -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: waiting for dproc to halt\n"); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted, WakeupLwp, ctxt); -+ while (! ctxt->Halted) -+ { -+ PRINTF1 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: waiting for Halted - %d\n", ctxt->Halted); -+ -+ kcondvar_wait (&ctxt->HaltWait, &dev->IntrLock, flags); -+ -+ PRINTF1 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: woken for Halted - %d\n", ctxt->Halted); -+ } -+ ctxt->Halted = 0; -+ -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: dproc halted, checking for trap\n"); -+ -+ if (cflags == DmaComQueueNotEmpty && --dev->HaltDmaDequeueCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ return (ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ) ? ISSUE_COMMAND_OK : ISSUE_COMMAND_TRAPPED); -+} -+ -+int -+WaitForCommandPort (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (ctxt->Status & CTXT_DETACHED) -+ res = EINVAL; -+ else -+ { -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ ctxt->Status |= CTXT_WAITING_COMMAND; -+ if (CTXT_IS_KERNEL(ctxt)) -+ kcondvar_wait (&ctxt->CommandPortWait, &dev->IntrLock, &flags); -+ else -+ kcondvar_waitsig (&ctxt->CommandPortWait, &dev->IntrLock, &flags); -+ } -+ -+ res = (!ELAN3_QUEUE_EMPTY(ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) ? EAGAIN : 0; -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+static char * -+CommandName (int offset) -+{ -+ switch (offset) -+ { -+ case offsetof (E3_CommandPort, PutDma): return ("PutDma"); -+ case offsetof (E3_CommandPort, GetDma): return ("GetDma"); -+ case offsetof (E3_CommandPort, RunThread): return ("RunThread"); -+ case offsetof (E3_CommandPort, WaitEvent0): return ("WaitEvent0"); -+ case offsetof (E3_CommandPort, WaitEvent1): return ("WaitEvent1"); -+ case offsetof (E3_CommandPort, SetEvent): return ("SetEvent"); -+ default: return ("Bad Command"); -+ } -+} -+ -+int -+IssueCommand (ELAN3_CTXT *ctxt, unsigned cmdoff, E3_Addr value, int cflags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((! (cflags & ISSUE_COMMAND_FOR_CPROC) && !ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ /* -+ * Cannot issue commands for non-cproc traps if command port is trapped, -+ * nor if the dma/thread trap queues are full, or we're swapping out -+ */ -+ PRINTF2 (ctxt, DBG_CMD, "IssueCommand: %s %08x -> ISSUE_COMMAND_RETRY\n", -+ CommandName (cmdoff), value); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueCommand: %s %08x -> ISSUE_COMMAND_OK\n", -+ CommandName (cmdoff), value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, (void *)(ctxt->CommandPort + cmdoff)); /* issue command */ -+ mmiob(); /* and flush through IO writes */ -+ -+ res = ISSUE_COMMAND_OK; -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+int -+IssueDmaCommand (ELAN3_CTXT *ctxt, E3_Addr value, void *item, int how) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ /* -+ * Since we may be issuing a command that could trap, and we're interested in -+ * the outcome, the command port trap resolving code must be locked out. -+ */ -+ kmutex_lock (&ctxt->CmdLock); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((! (how & ISSUE_COMMAND_FOR_CPROC) && !ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p] -> ISSUE_COMMAND_RETRY\n", value, item); -+ -+ /* -+ * Cannot issue commands for non-cproc traps if command port is trapped, -+ * nor if the dma/thread trap queues are full, or we're swapping out -+ */ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ -+ ASSERT (item == NULL || ctxt->CommandPortItem == NULL); -+ -+ /* -+ * Stop the DMA processor from removing entries from the -+ * command port, and force the command processor to do this. -+ * This means that if a trap occurs then it will be the command -+ * processor that traps. -+ */ -+ if (dev->HaltDmaDequeueCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p]\n", value, item); -+ -+ /* -+ * Always issue the DMA to the 'write' command, since we've asserted HaltDmaDequeue -+ * the command processor will read the descriptor and transfer it to the run queue. -+ * The command processor looks at the dma_direction field to determine whether it is -+ * a read or a write and whether to alter the dma_souce of the descriptr on the run -+ * queue -+ */ -+ mb(); /* ensure writes to main memory ccompleted */ -+ writel (value, (void *) (ctxt->CommandPort + offsetof (E3_CommandPort, PutDma))); -+ mmiob(); /* and flush through IO writes */ -+ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, how, &flags); -+ -+ if (res == ISSUE_COMMAND_TRAPPED) -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p] -> ISSUE_COMMAND_TRAPPED\n", value, item); -+ /* -+ * Remember the item we're issueing so that if the command port traps the item will not -+ * get freed off until the descriptor has been read after the command trap has been fixed -+ * up. -+ */ -+ if (item != NULL) -+ ctxt->CommandPortItem = item; -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+ -+ return (res); -+} -+ -+int -+WaitForDmaCommand (ELAN3_CTXT *ctxt, void *item, int how) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, how, &flags); -+ -+ if (res == ISSUE_COMMAND_TRAPPED && item != NULL) -+ ctxt->CommandPortItem = item; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+FixupEventTrap (ELAN3_CTXT *ctxt, int proc, void *trap, E3_uint32 TrapType, E3_FaultSave_BE *FaultSaveArea, int flags) -+{ -+ ASSERT (! CTXT_IS_KERNEL (ctxt)); -+ -+ /* -+ * This code re-issues the part of the set event that trapped. -+ */ -+ switch (TrapType) -+ { -+ case MI_ChainedEventError: -+ ElanException (ctxt, EXCEPTION_CHAINED_EVENT, proc, trap, FaultSaveArea->s.EventAddress); -+ break; -+ -+ -+ case MI_SetEventReadWait: -+ /* -+ * Fault occured on the read for the event location. Just re-issue -+ * setevent using EventAddress in E3_FaultSave -+ */ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_SetEventReadWait: re-issuing setevent %08x\n", -+ FaultSaveArea->s.EventAddress); -+ -+ ReissueEvent (ctxt, (E3_Addr) FaultSaveArea->s.EventAddress, flags); -+ break; -+ -+ case MI_DoSetEvent: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * perform the set. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check that the event has the block copy bit -+ * set in it, since we couldn't trap here if it -+ * didn't -+ */ -+ if ((EventType & EV_TYPE_BCOPY) != EV_TYPE_BCOPY) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_DoSetEvent: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_DoSetEvent: RunEventType %x\n", EventType); -+ -+ if (RunEventType (ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ -+ break; -+ } -+ -+ case MI_ThreadUpdateNonSysCntxBack: -+ case MI_ThreadUpdateSysCntxBack: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the thread. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_THREAD|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_THREAD)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_ThreadUpdateCntx0Back: Unexpected type=%x for setevent trap. Should be thread\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_ThreadUpdateCntx0Back: RunEventType %x\n", EventType); -+ if (RunEventType (ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_EventIntUpdateBPtr: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the dma. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_EVIRQ)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_EventIntUpdateBPtr: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_EventIntUpdateBPtr: RunEventType %x\n", EventType); -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_RunDmaDesc: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the dma. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_DMA|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_DMA)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_RunDmaDesc: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_RunDmaDesc: RunEventType %x\n", EventType); -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_WaitForCntxDmaDescRead: -+ case MI_WaitForNonCntxDmaDescRead: -+ /* -+ * Fault occured on the read of the dma descriptor. Run dma using the -+ * Fault Address in FaultSave. -+ */ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_WaitForCntxDmaDescRead: re-issue dma at %08x\n", FaultSaveArea->s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, FaultSaveArea->s.FaultAddress); -+ break; -+ -+ case MI_FinishedSetEvent: -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Simulate the block copy. -+ */ -+ if (SimulateBlockCopy (ctxt, FaultSaveArea->s.EventAddress)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ -+ case MI_BlockCopyEvent: -+ case MI_BlockCopyWaitForReadData: -+ { -+ /* -+ * Fault occured on the read or write of the data for a block copy -+ * event. Simulate the block copy using EventAddress in E3_FaultSave. Must also sample -+ * the event type and then perform a run. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ PRINTF0 (ctxt, DBG_EVENT, "FixupEventTrap: MI_BlockCopyWaitForReadData: BCopy read fault in BCopy event. Simulating BCopy.\n"); -+ -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ /* XXXX: should handle queue overflow */ -+ PRINTF0 (ctxt, DBG_EVENT, "FixupEventTrap: Queue overflow\n"); -+ -+ ElanException (ctxt, EXCEPTION_QUEUE_OVERFLOW, proc, trap, FaultSaveArea, TrapType); -+ break; -+ -+ default: -+ ElanException (ctxt, EXCEPTION_BUS_ERROR, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+} -+ -+int -+SimulateBlockCopy (ELAN3_CTXT *ctxt, E3_Addr EventAddress) -+{ -+ E3_Addr SourcePtrElan; -+ E3_Addr DestPtrElan; -+ unsigned DataType; -+ int i; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ElanException (ctxt, EXCEPTION_FAULTED, EVENT_PROC, NULL, EventAddress); -+ return (TRUE); -+ } -+ -+ SourcePtrElan = ELAN3_OP_LOAD32 (ctxt, EventAddress + offsetof (E3_BlockCopyEvent, ev_Source)); -+ DestPtrElan = ELAN3_OP_LOAD32 (ctxt, EventAddress + offsetof (E3_BlockCopyEvent, ev_Dest)); -+ DataType = DestPtrElan & EV_BCOPY_DTYPE_MASK; -+ DestPtrElan &= ~EV_BCOPY_DTYPE_MASK; -+ -+ -+ PRINTF3 (ctxt, DBG_EVENT, "SimulateBlockCopy: Event %08x SourcePtr %08x DestPtr %08x\n", -+ EventAddress, SourcePtrElan, DestPtrElan); -+ -+ if (SourcePtrElan & EV_WCOPY) -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan, SourcePtrElan); -+ else -+ { -+ /* -+ * NOTE: since the block copy could be to sdram, we issue the writes backwards, -+ * except we MUST ensure that the last item in the block is written last. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+ /* -+ * For little endian cpu's we don't need to worry about the data type. -+ */ -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+#else -+ switch (DataType) -+ { -+ case EV_TYPE_BCOPY_BYTE: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint8)); i >= 0; i -= sizeof (E3_uint8)) -+ ELAN3_OP_STORE8 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD8 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint8); -+ ELAN3_OP_STORE8 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD8 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_HWORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint16)); i >= 0; i -= sizeof (E3_uint16)) -+ ELAN3_OP_STORE16 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD16 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint16); -+ ELAN3_OP_STORE16 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD16 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_WORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint32)); i >= 0; i -= sizeof (E3_uint32)) -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD32 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint32); -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD32 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_DWORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ break; -+ } -+#endif -+ } -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ return (FALSE); -+} -+ -+void -+ReissueEvent (ELAN3_CTXT *ctxt, E3_Addr addr, int flags) -+{ -+ PRINTF1 (ctxt, DBG_CMD, "ReissueEvent : Event=%08x\n", addr); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), addr, flags) == ISSUE_COMMAND_RETRY) -+ { -+ PRINTF1 (ctxt, DBG_CMD, "ReissueEvent: queue event %08x\n", addr); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ctxt->ItemCount[LIST_SETEVENT]++; -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_SETEVENT, addr); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ } -+} -+ -+int -+SetEventsNeedRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_SETEVENT] != 0); -+} -+ -+void -+RestartSetEvents (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_uint32 EventPointer; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ -+ while (ctxt->ItemCount[LIST_SETEVENT]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_SETEVENT, &item, &EventPointer)) -+ ctxt->ItemCount[LIST_SETEVENT] = 0; -+ else -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), EventPointer, FALSE) == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_SETEVENT, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_SETEVENT]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+int -+RunEventType(ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSaveArea, E3_uint32 EventType) -+{ -+ int failed = FALSE; -+ -+ if ((EventType & EV_TYPE_BCOPY) != 0) -+ failed = SimulateBlockCopy(ctxt, FaultSaveArea->s.EventAddress); -+ -+ if ((EventType & EV_TYPE_MASK) == EV_TYPE_THREAD) -+ ReissueStackPointer (ctxt, EventType & ~(EV_TYPE_MASK_THREAD|EV_TYPE_MASK_BCOPY)); -+ else if ((EventType & EV_TYPE_MASK) == EV_TYPE_DMA) -+ RestartDmaPtr (ctxt, EventType & ~(EV_TYPE_MASK_DMA|EV_TYPE_MASK_BCOPY)); -+ else if ((EventType & EV_TYPE_EVIRQ) != 0) -+ QueueEventInterrupt (ctxt, EventType & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)); -+ else /* Chained event */ -+ { -+ if ((EventType & ~EV_TYPE_BCOPY) != 0) /* not null setevent */ -+ ReissueEvent (ctxt, EventType & ~(EV_TYPE_MASK_CHAIN|EV_TYPE_MASK_BCOPY), FALSE); -+ } -+ -+ return (failed); -+} -+ -+void -+WakeupLwp (ELAN3_DEV *dev, void *arg) -+{ -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) arg; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_INTR, "WakeupLwp: %d\n", SPINLOCK_HELD (&dev->IntrLock)); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ctxt->Halted = 1; -+ kcondvar_wakeupone (&ctxt->HaltWait, &dev->IntrLock); -+ -+ PRINTF0 (ctxt, DBG_INTR, "WakeupLwp: woken up context\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+QueueEventInterrupt (ELAN3_CTXT *ctxt, E3_uint32 cookie) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_EVENT, "QueueEventInterrupt: cookie %08x\n", cookie); -+ -+ if (ELAN3_OP_EVENT (ctxt, cookie, OP_INTR) == OP_DEFER) -+ { -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->EventCookieQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, 0, NULL); -+ } -+ else -+ { -+ *(ELAN3_QUEUE_BACK (ctxt->EventCookieQ, ctxt->EventCookies)) = cookie; -+ -+ ELAN3_QUEUE_ADD (ctxt->EventCookieQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ if (ELAN3_QUEUE_FULL (ctxt->EventCookieQ)) -+ { -+ ctxt->Status |= CTXT_EVENT_QUEUE_FULL; -+ StartSwapoutContext (ctxt, 0, NULL); -+ } -+ } -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ } -+} -+ -+int -+ElanException (ELAN3_CTXT *ctxt, int type, int proc, void *trap, ...) -+{ -+ int res; -+ va_list ap; -+ -+ va_start (ap, trap); -+ -+ PRINTF2 (ctxt, DBG_FN, "ElanException: proc %d type %d\n", proc, type); -+ -+ res = ELAN3_OP_EXCEPTION (ctxt, type, proc, trap, ap); -+ -+ va_end (ap); -+ -+ return (res); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/context_linux.c linux-2.6.9/drivers/net/qsnet/elan3/context_linux.c ---- clean/drivers/net/qsnet/elan3/context_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/context_linux.c 2004-10-28 07:51:00.000000000 -0400 -@@ -0,0 +1,229 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: context_linux.c,v 1.32 2004/10/28 11:51:00 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/context_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int -+LoadElanTranslation (ELAN3_CTXT *ctxt, E3_Addr addr, int len, int protFault, int writeable) -+{ -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3MMU_RGN *rgn; -+ caddr_t mainAddr; -+ int perm; -+ unsigned int off; -+ unsigned long flags; -+ -+ ASSERT (PAGE_ALIGNED (addr) && PAGE_ALIGNED (len)); -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: addr %08x len %08x%s%s\n", -+ addr, len, protFault ? " prot fault" : "", writeable ? " writeable" : ""); -+ -+ /* Ensure there's enough elan mmu tables for us to use */ -+ elan3mmu_expand (elan3mmu, addr, len, PTBL_LEVEL_3, 0); -+ -+ while (len > 0) -+ { -+ /* -+ * Retrieve permission region and calculate main address -+ */ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ rgn = elan3mmu_rgnat_elan (elan3mmu, addr); -+ if (rgn == NULL) { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: no permission region at %lx %p\n", -+ (u_long) addr, rgn); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (EFAULT); -+ } -+ mainAddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ -+ ASSERT (PAGE_ALIGNED ((unsigned long)mainAddr)); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ /* -+ * If we're tying to load a translation to the elan command port, -+ * then don't do it now, but mark the context to have it reloaded -+ * just before we restart any threads. We do this because we don't -+ * want to call into the segment driver since we could then block -+ * waiting for the command port to become available. -+ */ -+ if (mainAddr == ctxt->CommandPageMapping) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: addr=%08x maps command port\n", addr); -+ -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ UnloadCommandPageMapping (ctxt); -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ } -+ else -+ { -+ struct vm_area_struct *area; -+ struct mm_struct *mm = current->mm; -+ pte_t *ptep_ptr; -+ pte_t ptep_value; -+ -+ down_read (¤t->mm->mmap_sem); -+ -+ if ((area = find_vma_intersection(mm, (unsigned long)mainAddr, (unsigned long)mainAddr + PAGESIZE)) == NULL) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p no vma\n", mainAddr); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ if (writeable && !(area->vm_flags & VM_WRITE)) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p not writeable\n", mainAddr); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ spin_lock (&mm->page_table_lock); -+ -+ /* dont deference the pointer after the unmap */ -+ ptep_ptr = find_pte_map (mm, (unsigned long)mainAddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p %s %s\n", -+ mainAddr, writeable ? "writeable" : "readonly", -+ !ptep_ptr ? "invalid" : pte_none(ptep_value) ? "none " : !pte_present(ptep_value) ? "swapped " : -+ writeable && !pte_write(ptep_value) ? "COW" : "OK"); -+ -+ if (!ptep_ptr || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value))) -+ { -+ spin_unlock (&mm->page_table_lock); -+ -+ get_user_pages (current, current->mm, (unsigned long) mainAddr, PAGE_SIZE, -+ (area->vm_flags & VM_WRITE), 0, NULL, NULL); -+ -+ spin_lock (&mm->page_table_lock); -+ -+ /* dont deference the pointer after the unmap */ -+ ptep_ptr = find_pte_map (mm, (unsigned long)mainAddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ if (!ptep_ptr || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value))) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ } -+ -+ /* don't allow user write access to kernel pages if not kernel */ -+ if (!pte_read(ptep_value)) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ if (writeable) -+ pte_mkdirty(ptep_value); -+ pte_mkyoung (ptep_value); -+ -+ /* now load the elan pte */ -+ if (writeable) -+ perm = rgn->rgn_perm; -+ else -+ perm = ELAN3_PERM_READONLY(rgn->rgn_perm & ELAN3_PTE_PERM_MASK) | (rgn->rgn_perm & ~ELAN3_PTE_PERM_MASK); -+ -+ for (off = 0; off < PAGE_SIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, addr + off, pte_phys(ptep_value) + off, perm, PTE_LOAD | PTE_NO_SLEEP); -+ -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ } -+ -+ len -= PAGESIZE; -+ addr += PAGESIZE; -+ } -+ return (ESUCCESS); -+} -+ -+ -+/* -+ * LoadCommandPortTranslation: -+ * explicitly load an elan translation to the command port. -+ * but only do it if the command port is accessible. -+ * -+ * we call this function just after we have restarted -+ * and trapped commands, since when a command traps -+ * the elan translation to the command port is unloaded. -+ */ -+void -+LoadCommandPortTranslation (ELAN3_CTXT *ctxt) -+{ -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3MMU_RGN *rgn; -+ E3_Addr addr; -+ int perm; -+ physaddr_t phys; -+ unsigned int off; -+ unsigned long flags; -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadCommandPortTranslation: SegAddr=%p Status=%x\n", ctxt->CommandPageMapping, ctxt->Status); -+ -+ if (ctxt->CommandPageMapping != NULL && !(ctxt->Status & CTXT_COMMAND_MAPPED_ELAN)) -+ { -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ rgn = elan3mmu_rgnat_main (elan3mmu, ctxt->CommandPageMapping); -+ if (rgn == (ELAN3MMU_RGN *) NULL) -+ { -+ PRINTF(ctxt, DBG_FAULT, "LoadCommandPortTranslation: no permission for command port\n"); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return; -+ } -+ -+ addr = rgn->rgn_ebase + (ctxt->CommandPageMapping - rgn->rgn_mbase); -+ perm = rgn->rgn_perm; -+ phys = kmem_to_phys((caddr_t) ctxt->CommandPage); -+ -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ if (ELAN3_QUEUE_EMPTY(ctxt->CommandTrapQ) && !(ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF(ctxt, DBG_FAULT, "LoadCommandPortTranslation: load xlation addr=%08x phys=%llx perm=%d\n", -+ addr, (unsigned long long)phys, perm); -+ -+ ctxt->Status |= CTXT_COMMAND_MAPPED_ELAN; -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, addr + off, phys + off, perm, PTE_LOAD | PTE_NO_SLEEP); -+ } -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/cproc.c linux-2.6.9/drivers/net/qsnet/elan3/cproc.c ---- clean/drivers/net/qsnet/elan3/cproc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/cproc.c 2004-02-10 10:05:10.000000000 -0500 -@@ -0,0 +1,539 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cproc.c,v 1.46 2004/02/10 15:05:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/cproc.c,v $ */ -+ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+void -+HandleCProcTrap (ELAN3_DEV *dev, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ E3_FaultSave_BE FaultSave; -+ CProcTrapBuf_BE TrapBuf; -+ COMMAND_TRAP *trap; -+ ELAN3_CTXT *ctxt; -+ sdramaddr_t CurrTrap; -+ sdramaddr_t LastTrapAddr; -+ int NTrapEntries; -+ int NewPend; -+ unsigned long flags; -+ -+ /* -+ * Temporarily mask out the command processor interrupt, since -+ * we may cause it be re-asserted when we re-issue the commands -+ * from the overflow queue area. -+ */ -+ DISABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ NewPend = read_reg32 (dev, Exts.InterruptReg); -+ -+ do { -+ if (NewPend & INT_ComQueue) -+ { -+ if ((read_reg32 (dev, ComQueueStatus) & ComQueueError) != 0) -+ { -+ printk ("elan%d: InterruptReg=%x ComQueueStatus=%x\n", dev->Instance, -+ read_reg32 (dev, Exts.InterruptReg), read_reg32 (dev, ComQueueStatus)); -+ panic ("elan: command queue has overflowed !!"); -+ /* NOTREACHED */ -+ } -+ -+ BumpStat (dev, ComQueueHalfFull); -+ -+ /* -+ * Capture the other cpus and stop the threads processor then -+ * allow the command processor to eagerly flush the command queue. -+ */ -+ dev->FlushCommandCount++; dev->HaltThreadCount++; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ CAPTURE_CPUS(); -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ mb(); -+ -+ /* -+ * Let the threads processor run again, and release the cross call. -+ */ -+ RELEASE_CPUS(); -+ -+ dev->FlushCommandCount--; dev->HaltThreadCount--; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ /* -+ * Re-sample the interrupt register to see if the command processor -+ * has trapped while flushing the queue. Preserve the INT_ComQueue -+ * bit, so we can clear the ComQueueStatus register later. -+ */ -+ NewPend = (read_reg32 (dev, Exts.InterruptReg) | INT_ComQueue); -+ } -+ -+ CurrTrap = dev->CommandPortTraps[dev->CurrentCommandPortTrap]; -+ -+ if (NewPend & INT_CProc) -+ { -+ BumpStat (dev, CProcTraps); -+ -+ /* -+ * Copy the MMU Fault Save area and zero it out for future traps. -+ */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), &FaultSave, sizeof (E3_FaultSave)); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), sizeof (E3_FaultSave)); -+ -+ /* -+ * First entry in the cproc trap save area is the value of Areg and Breg for the -+ * uWord before the address fault. -+ */ -+ TrapBuf.Align64 = elan3_sdram_readq (dev, CurrTrap); CurrTrap += sizeof (TrapBuf.Align64); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, (TrapBuf.r.Breg >> 16)); -+ if (ctxt == NULL) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: context invalid [%08x.%08x]\n", TrapBuf.r.Areg, TrapBuf.r.Breg); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandTrapQ)) -+ { -+ if ((ctxt->Status & CTXT_COMMAND_OVERFLOW_ERROR) == 0) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, Maskp); -+ } -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->CommandTrapQ, ctxt->CommandTraps); -+ -+ trap->FaultSave = FaultSave; -+ trap->Status.Status = read_reg32 (dev, Exts.CProcStatus.Status); -+ trap->TrapBuf = TrapBuf; -+ -+ /* -+ * The command processor does not stop after it has trapped. It will continue -+ * to save commands for other contexts into the commands port save area. -+ * The valid context for the trap is held in FaultSave. As some of this -+ * trap code uses the context in the status register the local copy must be -+ * updated with the trap context. -+ */ -+ trap->Status.s.Context = (TrapBuf.r.Breg >> 16); -+ -+ PRINTF4 (ctxt, DBG_INTR, "HandleCProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName(trap->Status.s.TrapType)); -+ PRINTF2 (ctxt, DBG_INTR, "HandleCProcTrap: Areg=%08x Breg=%08x\n", -+ trap->TrapBuf.r.Areg, trap->TrapBuf.r.Breg); -+ -+ if (ELAN3_OP_CPROC_TRAP (ctxt, trap) == OP_DEFER) -+ { -+ ELAN3_QUEUE_ADD (ctxt->CommandTrapQ); -+ -+ PRINTF1 (ctxt, DBG_INTR, "HandleCProcTrap: setting Command Flag at %p to 1\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 1; -+ -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ } -+ } -+ -+ UnloadCommandPageMapping (ctxt); -+ } -+ } -+ -+ /* -+ * Now change the CommandPortTrap queue. -+ * Must stop the command processor, wait for it to stop, find the final -+ * entry in the current cproc trap save area, reset the comm port -+ * trap save address to the other queue, clear the command port interrupt and -+ * set it running normally again, and then let it go again. This is not very -+ * time critical but it would be a good idea to prevent a higher priority -+ * interrupt from slowing down the process to prevent to fifos filling. -+ */ -+ spin_lock_irqsave (&dev->CProcLock, flags); -+ -+ SET_SCHED_STATUS (dev, CProcStop); -+ -+ while ((read_reg32 (dev, Exts.SchCntReg) & CProcStopped) == 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: waiting for command processor to stop\n"); -+ mb(); -+ } -+ -+ /* -+ * Remember how many entries are in the saved command queue, and -+ * re-initialise it, before restarting the command processor. -+ */ -+ NTrapEntries = (read_reg32 (dev, CProc_TrapSave_Addr) - dev->CommandPortTraps[dev->CurrentCommandPortTrap])/sizeof (E3_uint64); -+ LastTrapAddr = dev->CommandPortTraps[dev->CurrentCommandPortTrap] + NTrapEntries*sizeof (TrapBuf); -+ -+ dev->CurrentCommandPortTrap ^= 1; -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[dev->CurrentCommandPortTrap]); -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: command trap queue has %d entries\n", NTrapEntries); -+ -+ if (NTrapEntries > ELAN3_COMMAND_TRAP_SIZE/sizeof (E3_uint64)) -+ panic ("HandleCProcTrap: command trap queue has overflowed\n"); -+ -+ if (NewPend & INT_CProc) -+ { -+ /* -+ * Clear the CProc interrupt and set it running normally again. Nothing should -+ * be running now that could issue commands apart from this trap handler. -+ */ -+ PULSE_SCHED_STATUS (dev, RestartCProc); -+ } -+ -+ if (NewPend & INT_ComQueue) -+ { -+ /* -+ * Write any value here to clear out the half full and error bits of the command -+ * overflow queues. This will also remove the overflow interrupt. -+ */ -+ write_reg32 (dev, ComQueueStatus, 0); -+ } -+ -+ /* -+ * And let the command processor start again -+ */ -+ CLEAR_SCHED_STATUS (dev, CProcStop); -+ -+ /* -+ * Now re-issue all the commands that were issued after the command port trapped. -+ * Should halt the dma processor and force command sto be put onto the run queues -+ * to ensure that a remote re-issued command is handled correctly. NOTE it is -+ * not necessary to wait for the dma processor to stop and this will reduce the -+ * performance impact. As CProcHalt is asserted all commands will be flushed -+ * to the queues. -+ */ -+ dev->HaltDmaDequeueCount++; dev->FlushCommandCount++; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ /* -+ * XXXX: should we do a capture/release if the trap overflow -+ * area has a "large" number of commands in it, since -+ * we will just stuff them all back in, together with -+ * all those issued by the other cpus/thread processors. -+ */ -+ while (CurrTrap != LastTrapAddr) -+ { -+ /* Read the next saved (but not trapped) command */ -+ TrapBuf.Align64 = elan3_sdram_readq (dev, CurrTrap); CurrTrap += sizeof (TrapBuf); -+ -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, (TrapBuf.s.ContextType >> 16)); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: context %x invalid\n", TrapBuf.s.ContextType >> 16); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (!ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF3 (ctxt, DBG_INTR, "HandleCProcTrap: save command %x context %x - %08x\n", -+ (TrapBuf.s.ContextType>>3) & 0x3ff, TrapBuf.s.ContextType >> 17, TrapBuf.s.Addr); -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, Maskp); -+ } -+ else -+ { -+ *ELAN3_QUEUE_BACK(ctxt->CommandQ, ctxt->Commands) = TrapBuf; -+ -+ ELAN3_QUEUE_ADD (ctxt->CommandQ); -+ } -+ continue; -+ } -+ -+ /* Reissue the command to the command port for this context */ -+ PRINTF2 (ctxt, DBG_INTR, "HandleCProcTrap: re-issue command %x - %08x\n", -+ (TrapBuf.s.ContextType>>5) & 0xff, TrapBuf.s.Addr); -+ -+ mb(); -+ if (ELAN3_OP_CPROC_REISSUE(ctxt, &TrapBuf) != OP_HANDLED) -+ ((E3_uint32 *) ctxt->CommandPort)[(TrapBuf.s.ContextType>>5) & 0xff] = TrapBuf.s.Addr; -+ mmiob(); -+ } -+ } -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: waiting for queues to empty after reissueing commands\n"); -+ mb(); -+ } -+ -+ dev->HaltDmaDequeueCount--; dev->FlushCommandCount--; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ spin_unlock_irqrestore (&dev->CProcLock, flags); -+ -+ /* -+ * Re-read the interrupt register and see if we've got another command -+ * port interrupt -+ */ -+ NewPend = read_reg32 (dev, Exts.InterruptReg); -+ } while ((NewPend & (INT_CProc | INT_ComQueue)) != 0); -+ -+ -+ /* -+ * Re-enable the command processor interrupt as we've finished -+ * polling it. -+ */ -+ ENABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+} -+ -+void -+ResolveCProcTrap (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ COMMAND_TRAP *trap; -+ int res; -+ unsigned long flags; -+ -+ kmutex_lock (&ctxt->CmdLock); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ while (! ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ)) -+ { -+ trap = ELAN3_QUEUE_MIDDLE(ctxt->CommandTrapQ, ctxt->CommandTraps); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_EventIntUpdateBPtr: -+ case MI_ChainedEventError: -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ PRINTF1 (ctxt, DBG_CPROC, "ResolveCProcTrap: %s\n", MiToName (trap->Status.s.TrapType)); -+ break; -+ -+ default: -+ /* All other traps are MMU related, we should have a fault address and FSR */ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_CPROC, "ResolveCProcTrap: elan3_pagefault failed for address %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, COMMAND_PROC, trap, &trap->FaultSave, res); -+ -+ /* Set the trap type to 0 so the command does not get re-issued */ -+ trap->Status.s.TrapType = 0; -+ } -+ break; -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ELAN3_QUEUE_CONSUME (ctxt->CommandTrapQ); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+} -+ -+int -+RestartCProcTrap (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ COMMAND_TRAP trap; -+ void *item; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ while (! ELAN3_QUEUE_FRONT_EMPTY (ctxt->CommandTrapQ)) -+ { -+ trap = (*ELAN3_QUEUE_FRONT (ctxt->CommandTrapQ, ctxt->CommandTraps)); -+ ELAN3_QUEUE_REMOVE (ctxt->CommandTrapQ); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ BumpUserStat (ctxt, CProcTraps); -+ -+ switch (trap.Status.s.TrapType) -+ { -+ case 0: -+ res = ISSUE_COMMAND_OK; -+ break; -+ -+ case MI_WaitForWaitEventDesc: -+ /* -+ * Fault occured on the read of wait event descriptor for wait event type 0. -+ * Fault already fixed. Just re-issue the wait command. Wait event descriptor addr -+ * is in the Areg save value. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type0 desc read fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent0), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForEventReadTy0: -+ /* -+ * Fault occured on the read of event location for wait event type 0. -+ * Fault already fixed. Just re-issue the wait command. Wait event descriptor addr -+ * is in the Areg save value. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type0 event loc fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent0), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForEventReadTy1: -+ /* -+ * Fault occured on the read of the event location for wait event type 1. -+ * Areg has the original ptr and count. -+ * Fault already fixed. Just re-issue the wait command using Areg and context. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type1 event location read fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent1), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForCntxDmaDescRead: -+ case MI_WaitForNonCntxDmaDescRead: -+ /* -+ * Fault occured on the read of the dma descriptor. Run dma using the -+ * Fault Address in FaultSave. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: MI_WaitForCntxDmaDescRead: re-issue dma at %08x\n", -+ trap.FaultSave.s.FaultAddress); -+ -+ res = IssueDmaCommand (ctxt, trap.FaultSave.s.FaultAddress, NULL, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ default: -+ /* -+ * Assume the fault will be fixed by FixupEventTrap. -+ */ -+ FixupEventTrap (ctxt, COMMAND_PROC, &trap, trap.Status.s.TrapType, &trap.FaultSave, ISSUE_COMMAND_FOR_CPROC); -+ -+ res = ISSUE_COMMAND_OK; -+ break; -+ } -+ -+ switch (res) -+ { -+ case ISSUE_COMMAND_OK: /* command re-issued ok*/ -+ break; -+ -+ case ISSUE_COMMAND_TRAPPED: /* command trapped, it will have been copied */ -+ return (EAGAIN); /* to the back of the trap queue */ -+ -+ case ISSUE_COMMAND_RETRY: /* didn't issue command, so place back at front for */ -+ spin_lock_irqsave (&dev->IntrLock, flags); /* later (after resolving other traps */ -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandTrapQ)) -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ else -+ { -+ ELAN3_QUEUE_ADD_FRONT(ctxt->CommandTrapQ); -+ (*ELAN3_QUEUE_FRONT (ctxt->CommandTrapQ, ctxt->CommandTraps)) = trap; -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ -+ default: -+ return (EINVAL); -+ } -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ } -+ -+ /* -+ * GNAT 5409 - if CommandPortItem was not NULL, but other reasons were set, -+ * then we'd not free the CommandPortItem even though we'd re- -+ * issued all trapped and overflowed commands. Hence only return -+ * without clearing CommandPortItem if we will be called again as -+ * either CommandTrapQ or CommandQ is not empty. -+ */ -+ -+ /* Now run the overflowed commands for this context */ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandQ)) -+ { -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF0 (ctxt, DBG_CPROC, "RestartCProcTrap: cannot issue overflowed commands\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ } -+ -+ /* -+ * Just re-issue the commands, if one traps then the remainder will -+ * just get placed in the overflow queue again and the interrupt handler -+ * will copy them back in here. -+ * -+ * Stop the dma processor from taking commands, since one of the commands -+ * could be a re-issued remote dma, which must be processed by the command -+ * processor. -+ */ -+ -+ if (dev->HaltDmaDequeueCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ while (! ELAN3_QUEUE_EMPTY (ctxt->CommandQ)) -+ { -+ CProcTrapBuf_BE *TrapBuf = ELAN3_QUEUE_FRONT (ctxt->CommandQ, ctxt->Commands); -+ -+ PRINTF2 (ctxt, DBG_CPROC, "RestartCProcTrap: re-issue command %x - %08x\n", -+ (TrapBuf->s.ContextType>>5) & 0xff, TrapBuf->s.Addr); -+ mb(); /* ensure writes to main memory completed */ -+ ((E3_uint32 *) ctxt->CommandPort)[(TrapBuf->s.ContextType>>5) & 0xff] = TrapBuf->s.Addr; -+ mmiob(); /* and flush through IO writes */ -+ -+ ELAN3_QUEUE_REMOVE (ctxt->CommandQ); -+ } -+ -+ /* observe the command processor having halted */ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, 0, &flags); -+ -+ if (res != ISSUE_COMMAND_OK) -+ { -+ PRINTF0 (ctxt, DBG_CPROC, "RestartCProcTrap: trapped after issueing overflowed commands\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ } -+ } -+ -+ /* remove the command port item, while holding the lock */ -+ item = ctxt->CommandPortItem; -+ ctxt->CommandPortItem = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (item != NULL) /* Free of any item that may have been stored */ -+ { /* because of the commandport trap */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: commandPortItem %p\n", item); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ELAN3_OP_FREE_BLOCK_ITEM (ctxt, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ } -+ -+ return (ESUCCESS); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/dproc.c linux-2.6.9/drivers/net/qsnet/elan3/dproc.c ---- clean/drivers/net/qsnet/elan3/dproc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/dproc.c 2003-09-24 09:57:25.000000000 -0400 -@@ -0,0 +1,553 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: dproc.c,v 1.52 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/dproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DMA_RETRY_FAIL_COUNT 8 -+ -+static void PrintUserDma (ELAN3_CTXT *ctxt, E3_Addr addr); -+ -+int -+HandleDProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits) -+{ -+ DMA_TRAP *trap = dev->DmaTrap; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ /* Scoop out the trap information, before restarting the Elan */ -+ trap->Status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ -+ ASSERT(trap->Status.s.WakeupFunction == WakeupNever); -+ -+ /* copy the normal dma access fault type */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), &trap->FaultSave, sizeof (E3_FaultSave_BE)); -+ -+ /* copy all 4 of the dma data fault type */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), &trap->Data0, 4*sizeof (E3_FaultSave_BE)); -+ -+ /* Copy the DMA descriptor */ -+ copy_dma_regs (dev, &trap->Desc); -+ -+ /* Copy the packet info */ -+ trap->PacketInfo.Value = read_reg32 (dev, Exts.Dmas.DmaRds.DMA_PacketInfo.Value); -+ -+ /* update device statistics */ -+ BumpStat (dev, DProcTraps); -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ if (trap->PacketInfo.s.PacketTimeout) -+ BumpStat (dev, DmaOutputTimeouts); -+ else if (trap->PacketInfo.s.PacketAckValue == C_ACK_ERROR) -+ BumpStat (dev, DmaPacketAckErrors); -+ break; -+ -+ case MI_DmaFailCountError: -+ BumpStat (dev, DmaRetries); -+ break; -+ } -+ -+ /* Must now zero all the FSRs so that a subsequent fault can be seen */ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), sizeof (E3_FaultSave)); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), 4*sizeof (E3_FaultSave)); -+ -+ *RestartBits |= RestartDProc; -+ return (TRUE); -+} -+ -+void -+DeliverDProcTrap (ELAN3_DEV *dev, DMA_TRAP *dmaTrap, E3_uint32 Pend) -+{ -+ ELAN3_CTXT *ctxt; -+ E3_FaultSave_BE *FaultArea; -+ DMA_TRAP *trap; -+ register int i; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, dmaTrap->Status.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "DeliverDProcTrap: context %x invalid\n", dmaTrap->Status.s.Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_OP_DPROC_TRAP (ctxt, dmaTrap) == OP_DEFER) -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->DmaTrapQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->DmaTrapQ, ctxt->DmaTraps); -+ -+ bcopy (dmaTrap, trap, sizeof (DMA_TRAP)); -+ -+ PRINTF5 (ctxt, DBG_INTR, "DeliverDProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x PacketInfo=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, trap->PacketInfo.Value, MiToName (trap->Status.s.TrapType)); -+ PRINTF3 (ctxt, DBG_INTR, " FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ PRINTF4 (ctxt, DBG_INTR, " %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_INTR, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ PRINTF2 (ctxt, DBG_INTR, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ PRINTF2 (ctxt, DBG_INTR, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ ELAN3_QUEUE_ADD (ctxt->DmaTrapQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ if (ELAN3_QUEUE_FULL (ctxt->DmaTrapQ)) -+ { -+ PRINTF0 (ctxt, DBG_INTR, "DeliverDProcTrap: dma queue full, must swap out\n"); -+ ctxt->Status |= CTXT_DMA_QUEUE_FULL; -+ -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+ } -+} -+ -+int -+NextDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->DmaTrapQ)) -+ return (0); -+ -+ *trap = *ELAN3_QUEUE_FRONT (ctxt->DmaTrapQ, ctxt->DmaTraps); -+ ELAN3_QUEUE_REMOVE (ctxt->DmaTrapQ); -+ -+ return (1); -+} -+ -+void -+ResolveDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ E3_FaultSave_BE *FaultArea; -+ int FaultHandled = 0; -+ int res; -+ register int i; -+ -+ PRINTF4 (ctxt, DBG_DPROC, "ResolveDProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ PRINTF3 (ctxt, DBG_DPROC, " FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ PRINTF4 (ctxt, DBG_DPROC, " %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_DPROC, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ PRINTF2 (ctxt, DBG_DPROC, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ PRINTF2 (ctxt, DBG_DPROC, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+ BumpUserStat (ctxt, DProcTraps); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ /* -+ * Faulted due to packet timeout or a PAckError. -+ * Reset fail count and reissue the same desc. -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: got a PAckError or the output timed out. Rescheduling dma.\n"); -+ if (ElanException (ctxt, EXCEPTION_PACKET_TIMEOUT, DMA_PROC, trap) == OP_IGNORE) -+ { -+ BumpUserStat (ctxt, DmaRetries); -+ -+ trap->Desc.s.dma_failCount = DMA_RETRY_FAIL_COUNT; -+ -+ RestartDmaTrap (ctxt, trap); -+ } -+ return; -+ -+ case MI_DmaFailCountError: -+ /* -+ * Faulted due to dma fail count. -+ * Reset fail count and reissue the same desc. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: Reset dma fail count to %d\n", DMA_RETRY_FAIL_COUNT); -+ -+ if (ElanException (ctxt, EXCEPTION_DMA_RETRY_FAIL, DMA_PROC, trap) == OP_IGNORE) -+ { -+ BumpUserStat (ctxt, DmaRetries); -+ -+ trap->Desc.s.dma_failCount = DMA_RETRY_FAIL_COUNT; -+ -+ RestartDmaTrap (ctxt, trap); -+ } -+ return; -+ -+ case MI_TimesliceDmaQueueOverflow: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: dma timeslice queue overflow\n"); -+ RestartDmaTrap (ctxt, trap); -+ return; -+ -+ case MI_UnimplementedError: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: unimplemented dma trap\n"); -+ if (ElanException (ctxt, EXCEPTION_UNIMPLEMENTED, DMA_PROC, trap) == OP_IGNORE) -+ RestartDmaTrap (ctxt, trap); -+ return; -+ -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped on a write set event.\n"); -+ FixupEventTrap (ctxt, DMA_PROC, trap, trap->Status.s.TrapType, &trap->FaultSave, 0); -+ return; -+ -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ case MI_DequeueNonSysCntxDma: -+ case MI_DequeueSysCntxDma: -+ /* -+ * The DMA processor has trapped due to outstanding prefetches from the previous -+ * dma. The "current" dma has not been consumed, so we just ignore the trap -+ */ -+ return; -+ -+ case MI_WaitForRemoteDescRead2: -+ case MI_ExecuteDmaDescriptorForRun: -+ /* -+ * The DMA processor has trapped while fetching the dma descriptor, so -+ * zero it out to not confuse the user on an error -+ */ -+ bzero (&trap->Desc, sizeof (trap->Desc)); -+ break; -+ } -+ -+ /* -+ * All other uWords will have updated one of the fault areas, so fix -+ * any faults found in them. If there were no faults found then it -+ * must have been a bus error -+ */ -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ { -+ if (FaultArea->s.FSR.Status != 0) -+ { -+ FaultHandled++; -+ -+ ASSERT ((FaultArea->s.FSR.Status & FSR_SizeMask) == FSR_Block64 || -+ (FaultArea->s.FSR.Status & FSR_SizeMask) == FSR_Block32); -+ -+ ASSERT (FaultArea->s.FaultContext == trap->Status.s.Context); -+ -+ if (((trap->Desc.s.dma_source & PAGEOFFSET) >= (PAGESIZE-E3_BLK_SIZE)) && -+ ((trap->Desc.s.dma_source & PAGEMASK) != ((trap->Desc.s.dma_source + trap->Desc.s.dma_size-1) & PAGEMASK))) -+ { -+ /* XXXX: dma started within last 64 bytes of the page -+ * terminate the process if it has pagefaulted */ -+ if (FaultArea->s.FaultAddress == (trap->Desc.s.dma_source & ~(E3_BLK_SIZE-1))) -+ { -+ printk ("elan%d: invalid dma - context=%x source=%x\n", ctxt->Device->Instance, -+ ctxt->Capability.cap_mycontext, trap->Desc.s.dma_source); -+ -+ if (ElanException (ctxt, EXCEPTION_BAD_DMA, DMA_PROC, trap, NULL, 0) != OP_IGNORE) -+ return; -+ } -+ } -+ -+ if (trap->Desc.s.dma_size != 0 && (res = elan3_pagefault (ctxt, FaultArea, 1)) != ESUCCESS) -+ { -+ /* XXXX: Rev B Elans can prefetch data passed the end of the dma descriptor */ -+ /* if the fault relates to this, then just ignore it */ -+ if (FaultArea->s.FaultAddress < (trap->Desc.s.dma_source+trap->Desc.s.dma_size) || -+ FaultArea->s.FaultAddress > (trap->Desc.s.dma_source+trap->Desc.s.dma_size+E3_BLK_SIZE*2)) -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: elan3_pagefault failed for address %x\n", -+ FaultArea->s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, FaultArea, res) != OP_IGNORE) -+ return; -+ } -+ } -+ } -+ } -+ -+ if (trap->FaultSave.s.FSR.Status != 0) -+ { -+ FaultHandled++; -+ -+ ASSERT (trap->FaultSave.s.FaultContext == trap->Status.s.Context); -+ -+ if ((trap->FaultSave.s.FSR.Status & FSR_SizeMask) == FSR_RouteFetch) -+ { -+ res = ResolveVirtualProcess (ctxt, trap->FaultSave.s.FaultAddress & 0xffff); /* mask out cookie */ -+ -+ switch (res) -+ { -+ default: -+ if (ElanException (ctxt, EXCEPTION_INVALID_PROCESS, DMA_PROC, trap, trap->FaultSave.s.FaultAddress, res) != OP_IGNORE) -+ return; -+ -+ case EAGAIN: -+ /* XXXX; wait on trail blazing code */ -+ -+ case 0: -+ break; -+ } -+ } -+ else -+ { -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: elan3_pagefault failed for address %x\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, &trap->FaultSave, res) != OP_IGNORE) -+ return; -+ } -+ } -+ } -+ -+ if (! FaultHandled) -+ { -+ ElanBusError (ctxt->Device); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, &trap->FaultSave, EFAULT) != OP_IGNORE) -+ return; -+ } -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_WaitForRemoteDescRead2: -+ /* -+ * Faulted while trying to read the dma descriptor for a read dma. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a remote dma descriptor at %x.\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, trap->FaultSave.s.FaultAddress); -+ break; -+ -+ case MI_ExecuteDmaDescriptorForRun: -+ /* -+ * Faulted while trying to read the dma descriptor for a write dma. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a write dma descriptor at %x.\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, trap->FaultSave.s.FaultAddress); -+ break; -+ -+ case MI_WaitForRemoteRoutes1: -+ case MI_WaitForRemoteRoutes2: -+ case MI_SendRemoteDmaDesc: -+ case MI_SendDmaIdentify: -+ case MI_SendRemoteDmaRoutes2: -+ case MI_WaitForDmaRoutes1: -+ case MI_DmaLoop: -+ case MI_ExitDmaLoop: -+ case MI_GetDestEventValue: -+ case MI_SendFinalUnlockTrans: -+ case MI_SendNullSetEvent: -+ case MI_SendFinalSetEvent: -+ case MI_SendDmaEOP: -+ /* -+ * Faulted either fetching routes or fetching dma data. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ -+ case MI_SendEOPforRemoteDma: -+ case MI_LookAtRemoteAck: -+ case MI_FailedAckIfCCis0: -+ /* -+ * Possible fault when reading the remote desc into the dma data buffers -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a dma data or fetching a route\n"); -+ RestartDmaTrap (ctxt, trap); -+ break; -+ -+ case MI_DequeueSysCntxDma: -+ case MI_DequeueNonSysCntxDma: -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ /* -+ * It is possible that a dma can get back onto the queue while outstanding dma -+ * have not finished trapping. In this case the trap can be ignored as the dma -+ * state has been saved. It might trap again the next time it comes to the front -+ * of the queue and be fixed then. -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trap after dma has finished. ignored\n"); -+ break; -+ -+ default: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped on a write set event.\n"); -+ FixupEventTrap (ctxt, DMA_PROC, trap, trap->Status.s.TrapType, &trap->FaultSave, 0); -+ break; -+ } -+} -+ -+int -+DProcNeedsRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_DMA_PTR] != 0 || -+ ctxt->ItemCount[LIST_DMA_DESC] != 0); -+} -+ -+void -+RestartDProcItems (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_Addr value; -+ int res; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ while (ctxt->ItemCount[LIST_DMA_PTR]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_DMA_PTR, &item, &value)) -+ ctxt->ItemCount[LIST_DMA_PTR] = 0; -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "RestartDProc: issue write dma at %x\n", value); -+ PrintUserDma (ctxt, value); -+ -+ res = IssueDmaCommand (ctxt, value, NULL, 0); -+ -+ if (res == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_DMA_PTR, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_DMA_PTR]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ -+ while (ctxt->ItemCount[LIST_DMA_DESC]) -+ { -+ if (! ELAN3_OP_GET_BLOCK_ITEM (ctxt, LIST_DMA_DESC, &item, &value)) -+ ctxt->ItemCount[LIST_DMA_DESC] = 0; -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "RestartDProc: issue dma desc at %x\n", value); -+ PrintUserDma (ctxt, value); -+ -+ res = IssueDmaCommand (ctxt, value, item, 0); -+ -+ switch (res) -+ { -+ case ISSUE_COMMAND_OK: -+ ctxt->ItemCount[LIST_DMA_DESC]--; -+ ELAN3_OP_FREE_BLOCK_ITEM (ctxt, item); -+ break; -+ -+ case ISSUE_COMMAND_RETRY: -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_DMA_DESC, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ -+ case ISSUE_COMMAND_TRAPPED: -+ ctxt->ItemCount[LIST_DMA_DESC]--; -+ /* The item will be freed off when the command port trap */ -+ /* fixed up and the command successfully re-issued */ -+ break; -+ } -+ } -+ } -+ -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+void -+RestartDmaDesc(ELAN3_CTXT *ctxt, E3_DMA_BE *desc) -+{ -+ kmutex_lock (&ctxt->SwapListsLock); -+ if (desc->s.dma_direction != DMA_WRITE) -+ desc->s.dma_direction = (desc->s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ ELAN3_OP_PUT_BLOCK_ITEM (ctxt, LIST_DMA_DESC, (E3_uint32 *) desc); -+ ctxt->ItemCount[LIST_DMA_DESC]++; -+ -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+void -+RestartDmaTrap(ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ /* Negative length DMAs are illegal, since they hangup the dma processor, -+ * if they got generated then they will have been spotted by PollForDmahungup, -+ * and delivered to us with a Dequeue suspend address, -+ * -+ * GNAT sw-elan3/3908: Moved this check into this new function to avoid -+ * it sampling old or invalid register state -+ */ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ ElanException (ctxt, EXCEPTION_BAD_DMA, DMA_PROC, trap, NULL, 0); -+ else -+ RestartDmaDesc (ctxt, &trap->Desc); -+} -+ -+void -+RestartDmaPtr (ELAN3_CTXT *ctxt, E3_Addr ptr) -+{ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_DMA_PTR, ptr); -+ ctxt->ItemCount[LIST_DMA_PTR]++; -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+static void -+PrintUserDma (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_DMA *dma; -+ -+ /* Dont call a function which takes locks unless we need to */ -+ if (!(elan3_debug & DBG_DPROC)) -+ return; -+ -+ dma = (E3_DMA *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ PRINTF4 (ctxt, DBG_DPROC, "DMA: type %08x size %08x source %08x dest %08x\n", -+ fuword ((int *) &dma->dma_type), fuword ((int *) &dma->dma_size), -+ fuword ((int *) &dma->dma_source), fuword ((int *) &dma->dma_dest)); -+ PRINTF4 (ctxt, DBG_DPROC, "DMA: Dest %08x %08x Local %08x %08x\n", -+ fuword ((int *) &dma->dma_destEvent), fuword ((int *) &dma->dma_destCookieProc), -+ fuword ((int *) &dma->dma_srcEvent), fuword ((int *) &dma->dma_srcCookieProc)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/elan3mmu_generic.c linux-2.6.9/drivers/net/qsnet/elan3/elan3mmu_generic.c ---- clean/drivers/net/qsnet/elan3/elan3mmu_generic.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/elan3mmu_generic.c 2004-12-14 05:19:38.000000000 -0500 -@@ -0,0 +1,3255 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3mmu_generic.c,v 1.76 2004/12/14 10:19:38 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/vm/elan3mmu_generic.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_MPSAS -+# define zero_all_ptbls -+#endif -+ -+/* -+ * Debugging -+ */ -+int elan3mmu_debug = 0; -+ -+#define N_L3PTBL_MTX (0x20) -+#define N_L2PTBL_MTX (0x40) -+#define N_L1PTBL_MTX (0x20) -+ -+#define L3PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L3PTBL_MTX - 1)) -+static spinlock_t l3ptbl_lock[N_L3PTBL_MTX]; -+ -+#define L2PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L2PTBL_MTX - 1)) -+static spinlock_t l2ptbl_lock[N_L2PTBL_MTX]; -+ -+#define L1PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L1PTBL_MTX - 1)) -+static spinlock_t l1ptbl_lock[N_L1PTBL_MTX]; -+ -+ -+#define BASE2VA(p) ((E3_Addr)((p)->ptbl_base << 16)) -+#define VA2BASE(v) ((u_short)(((uintptr_t)(v)) >> 16)) -+ -+ELAN3MMU_GLOBAL_STATS elan3mmu_global_stats; -+ -+static void elan3mmu_flush_context_filter (ELAN3_DEV *dev, void *); -+static void elan3mmu_unload_loop (ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl, int first_valid, int nptes, int flags); -+ -+static ELAN3_PTBL *elan3mmu_create_ptbls (ELAN3_DEV *dev, int level, int attr, int keep); -+static ELAN3_PTBL *elan3mmu_ta_to_ptbl (ELAN3MMU *elan3mmu, ELAN3_PTP *ptp); -+ -+static ELAN3_PTBL *elan3mmu_alloc_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, int *idx); -+void elan3mmu_free_lXptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl); -+ -+void elan3mmu_free_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl_ptr, int idx); -+ -+static ELAN3_PTBL *elan3mmu_alloc_l1ptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu); -+static ELAN3_PTBL *elan3mmu_alloc_l2ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, -+ E3_Addr base, spinlock_t **plock, unsigned long *flags); -+static ELAN3_PTBL *elan3mmu_alloc_l3ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, -+ E3_Addr base, spinlock_t **plock, unsigned long *flags); -+ -+static int elan3mmu_steal_this_ptbl (ELAN3_DEV *dev, ELAN3_PTBL *l3ptbl); -+static ELAN3_PTBL *elan3mmu_steal_l3ptbl (ELAN3_DEV *dev, int attr); -+ -+static spinlock_t *elan3mmu_ptbl_to_lock (int level, ELAN3_PTBL *ptbl); -+ -+/* -+ * Encoding of MMU permissions against access type, -+ * to allow quick permission checking against access -+ * type. -+ */ -+u_char elan3mmu_permissionTable[] = -+{ -+ 0xcc, /* 11001100 ELAN3_PERM_NULL */ -+ 0x01, /* 00000001 ELAN3_PERM_LOCALREAD */ -+ 0x05, /* 00000101 ELAN3_PERM_READ */ -+ 0x33, /* 00110011 ELAN3_PERM_NOREMOTE */ -+ 0x37, /* 00110111 ELAN3_PERM_REMOTEREAD */ -+ 0x3f, /* 00111111 ELAN3_PERM_REMOTEWRITE */ -+ 0xf7, /* 11110111 ELAN3_PERM_REMOTEEVENT */ -+ 0xff, /* 11111111 ELAN3_PERM_REMOTEALL */ -+} ; -+ -+void -+elan3mmu_init() -+{ -+ register int i; -+ -+ HAT_PRINTF0 (1, "elan3mmu_init: initialising elan mmu\n"); -+ -+ for (i = 0; i < N_L1PTBL_MTX; i++) -+ spin_lock_init (&l1ptbl_lock[i]); -+ -+ for (i = 0; i < N_L2PTBL_MTX; i++) -+ spin_lock_init (&l2ptbl_lock[i]); -+ -+ for (i = 0; i < N_L3PTBL_MTX; i++) -+ spin_lock_init (&l3ptbl_lock[i]); -+ -+ elan3mmu_global_stats.version = ELAN3MMU_STATS_VERSION; -+ -+ elan3mmu_init_osdep(); -+} -+ -+void -+elan3mmu_fini() -+{ -+ register int i; -+ -+ HAT_PRINTF0 (1, "elan3mmu_fini: finalising elan mmu\n"); -+ -+ for (i = 0; i < N_L1PTBL_MTX; i++) -+ spin_lock_destroy (&l1ptbl_lock[i]); -+ -+ for (i = 0; i < N_L2PTBL_MTX; i++) -+ spin_lock_destroy (&l2ptbl_lock[i]); -+ -+ for (i = 0; i < N_L3PTBL_MTX; i++) -+ spin_lock_destroy (&l3ptbl_lock[i]); -+ -+ elan3mmu_fini_osdep(); -+} -+ -+ELAN3MMU * -+elan3mmu_alloc (ELAN3_CTXT *ctxt) -+{ -+ ELAN3MMU *elan3mmu; -+ ELAN3_PTBL *l1ptbl; -+ -+ ALLOC_ELAN3MMU (elan3mmu, TRUE); -+ -+ spin_lock_init (&elan3mmu->elan3mmu_lock); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ elan3mmu->elan3mmu_ergns = NULL; -+ elan3mmu->elan3mmu_etail = NULL; -+ elan3mmu->elan3mmu_ergnlast = NULL; -+ elan3mmu->elan3mmu_mrgns = NULL; -+ elan3mmu->elan3mmu_mtail = NULL; -+ elan3mmu->elan3mmu_mrgnlast = NULL; -+ elan3mmu->elan3mmu_ctxt = ctxt; -+ -+ spin_lock_init (&elan3mmu->elan3mmu_lXptbl_lock); -+ elan3mmu->elan3mmu_lXptbl = NULL; -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ l1ptbl = elan3mmu_alloc_l1ptbl(ctxt->Device, 0, elan3mmu); -+ -+ elan3mmu->elan3mmu_ctp = (sdramaddr_t) 0; -+ elan3mmu->elan3mmu_dev = ctxt->Device; -+ elan3mmu->elan3mmu_l1ptbl = l1ptbl; -+ -+ /* Ensure that there are at least some level 3 page tables, since if a level 2 and */ -+ /* a level 3 table are allocated together, then the level 3 is allocated with the NO_ALLOC */ -+ /* flag, thus there MUST be at least one that can be stolen or on the free list */ -+ if (elan3mmu->elan3mmu_dev->Level[PTBL_LEVEL_3].PtblFreeList == NULL) -+ elan3mmu_create_ptbls (elan3mmu->elan3mmu_dev, PTBL_LEVEL_3, 0, 0); -+ -+ HAT_PRINTF1 (1, "elan3mmu_alloc: elan3mmu %p\n", elan3mmu); -+ -+ elan3mmu_alloc_osdep (elan3mmu); -+ -+ return (elan3mmu); -+} -+ -+void -+elan3mmu_free (ELAN3MMU *elan3mmu) -+{ -+ ELAN3MMU_RGN *rgn; -+ ELAN3_PTBL *l1ptbl; -+ spinlock_t *l1lock; -+ unsigned long l1flags; -+ unsigned long flags; -+ -+ HAT_PRINTF1 (1, "elan3mmu_free : elan3mmu %p\n", elan3mmu); -+ -+ /* -+ * Invalidate the level1 page table, since it's already removed -+ * from the context table, there is no need to flush the tlb. -+ */ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ elan3mmu->elan3mmu_l1ptbl = NULL; -+ -+ if (elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, (E3_Addr) 0, PTBL_LEVEL_1, &l1lock, &l1flags) == LK_PTBL_OK) -+ { -+ elan3mmu_l1inval (elan3mmu, l1ptbl, PTE_UNLOAD_NOFLUSH); -+ elan3mmu_free_l1ptbl (elan3mmu->elan3mmu_dev, l1ptbl, l1lock, l1flags); -+ } -+ -+ /* -+ * Free of any permission regions. -+ */ -+ spin_lock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ while ((rgn = elan3mmu->elan3mmu_mrgns) != NULL) -+ { -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); /* lock_lint */ -+ elan3mmu_removergn_elan (elan3mmu, rgn->rgn_ebase); -+ elan3mmu_removergn_main (elan3mmu, rgn->rgn_mbase); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); /* lock_lint */ -+ -+ FREE_ELAN3MMU_RGN (rgn); -+ } -+ elan3mmu->elan3mmu_mrgnlast = NULL; -+ elan3mmu->elan3mmu_ergnlast = NULL; -+ -+ /* -+ * Free the lXptbl list -+ */ -+ ASSERT (elan3mmu->elan3mmu_lXptbl == NULL); /* XXXX MRH need to add list removal */ -+ -+ elan3mmu->elan3mmu_lXptbl = NULL; -+ spin_lock_destroy (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ spin_lock_destroy (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU (elan3mmu); -+} -+ -+/*================================================================================*/ -+/* Interface routines to device driver */ -+static void -+elan3mmu_flush_context_filter (ELAN3_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ASSERT ((read_reg32 (dev, Exts.InterruptReg) & (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx)) == -+ (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx)); -+ -+ dev->FilterHaltQueued = 0; -+ -+ write_reg32 (dev, Input_Context_Fil_Flush, 0); -+ -+ HAT_PRINTF0 (1, "elan3mmu_flush_context_filter completed\n"); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+elan3mmu_set_context_filter (ELAN3_DEV *dev, int ctx, int disabled, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ int mctx = ctx & MAX_ROOT_CONTEXT_MASK; -+ sdramaddr_t ctp = dev->ContextTable + mctx * sizeof (E3_ContextControlBlock); -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ASSERT ((mctx < 32 || mctx >= ELAN3_KCOMM_BASE_CONTEXT_NUM) ? (ctx & SYS_CONTEXT_BIT) : ! (ctx & SYS_CONTEXT_BIT)); -+ -+ elan3_sdram_writel (dev, ctp + offsetof (E3_ContextControlBlock, filter), -+ ((ctx & SYS_CONTEXT_BIT) ? E3_CCB_CNTX0 : 0) | (disabled ? E3_CCB_DISCARD_ALL : 0)); -+ -+ HAT_PRINTF4 (1, "elan3mmu_set_context_filter: ctx %x [%lx] -> %s (%x)\n", ctx, ctp, -+ disabled ? "up" : "down", elan3_sdram_readl (dev, ctp + offsetof (E3_ContextControlBlock, filter))); -+ -+ /* queue a halt operation to flush the context filter while the inputter is halted */ -+ if (dev->FilterHaltQueued == 0) -+ { -+ dev->FilterHaltQueued = 1; -+ QueueHaltOperation (dev, Pend, Maskp, INT_DiscardingSysCntx | INT_DiscardingNonSysCntx, -+ elan3mmu_flush_context_filter, NULL); -+ } -+} -+ -+int -+elan3mmu_attach (ELAN3_DEV *dev, int ctx, ELAN3MMU *elan3mmu, sdramaddr_t routeTable, E3_uint32 routeMask) -+{ -+ sdramaddr_t ctp; -+ ELAN3_PTP trootptp; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return (EINVAL); -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ trootptp = elan3_readptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP)); -+ -+ if (ELAN3_PTP_TYPE(trootptp) != ELAN3_ET_INVALID) -+ return (EBUSY); -+ -+ elan3mmu->elan3mmu_ctp = ctp; -+ -+ trootptp = PTBL_TO_PTADDR (elan3mmu->elan3mmu_l1ptbl) | ELAN3_ET_PTP; -+ -+ HAT_PRINTF4 (1, "elan3mmu_attach: ctp at %08lx : trootptp=%08x VPT_ptr=%08lx VPT_mask=%08x\n", -+ ctp, trootptp, routeTable, routeMask); -+ -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP), trootptp); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_ptr), routeTable); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_mask), routeMask); -+ -+ return (ESUCCESS); -+} -+ -+void -+elan3mmu_detach (ELAN3_DEV *dev, int ctx) -+{ -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ sdramaddr_t ctp; -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return; -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ HAT_PRINTF1 (1, "elan3mmu_detach: clearing ptp at %lx\n", ctp); -+ -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP), invalidptp); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_mask), 0); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_ptr), 0); -+ -+ ElanFlushTlb (dev); -+} -+ -+int -+elan3mmu_reference (ELAN3MMU *elan3mmu, int ctx) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ sdramaddr_t ctp; -+ E3_ContextControlBlock ccb; -+ ELAN3_PTP trootptp; -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return (EINVAL); -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ trootptp = elan3_readptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP)); -+ -+ if (ELAN3_PTP_TYPE(trootptp) != ELAN3_ET_INVALID) -+ return (EBUSY); -+ -+ elan3_sdram_copyl_from_sdram (dev, elan3mmu->elan3mmu_ctp, &ccb, sizeof (E3_ContextControlBlock)); -+ elan3_sdram_copyl_to_sdram (dev, &ccb, ctp, sizeof (E3_ContextControlBlock)); -+ -+ return (ESUCCESS); -+ -+} -+/*================================================================================*/ -+/* Elan permission regions. */ -+ -+/* elan address region management */ -+ELAN3MMU_RGN * -+elan3mmu_findrgn_elan (ELAN3MMU *elan3mmu, -+ E3_Addr addr, int tail) -+{ -+ ELAN3MMU_RGN *next = NULL; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *hirgn; -+ ELAN3MMU_RGN *lorgn; -+ E3_Addr base; -+ E3_Addr lastaddr; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) || SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (elan3mmu->elan3mmu_ergns == NULL) -+ return (NULL); -+ -+ rgn = elan3mmu->elan3mmu_ergnlast; -+ if (rgn == NULL) -+ rgn = elan3mmu->elan3mmu_ergns; -+ -+ forward = 0; -+ if ((u_long) (base = rgn->rgn_ebase) < (u_long)addr) -+ { -+ if ((u_long)addr <= ((u_long) base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = elan3mmu->elan3mmu_etail; -+ -+ if ((u_long) (lastaddr = (hirgn->rgn_ebase + hirgn->rgn_len - 1)) < (u_long) addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((u_long) (addr - base) > (u_long) (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_enext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = elan3mmu->elan3mmu_ergns; -+ -+ if ((u_long)lorgn->rgn_ebase > (u_long) addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((u_long)(addr - lorgn->rgn_ebase) < (u_long) (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((u_long)(rgn->rgn_ebase + rgn->rgn_len - 1) < (u_long)addr) -+ rgn = rgn->rgn_enext; -+ -+ if ((u_long)rgn->rgn_ebase <= (u_long)addr) -+ elan3mmu->elan3mmu_ergnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while ((u_long)rgn->rgn_ebase > (u_long)addr) -+ { -+ next = rgn; -+ rgn = rgn->rgn_eprev; -+ } -+ -+ if ((u_long) (rgn->rgn_ebase + rgn->rgn_len - 1) < (u_long)addr) -+ return (next); -+ else -+ { -+ elan3mmu->elan3mmu_ergnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+int -+elan3mmu_addrgn_elan (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, nrgn->rgn_ebase, 1); -+ E3_Addr nbase = nrgn->rgn_ebase; -+ E3_Addr ntop = nbase + nrgn->rgn_len - 1; /* avoid wrap */ -+ E3_Addr base; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL) -+ { -+ elan3mmu->elan3mmu_ergns = elan3mmu->elan3mmu_etail = nrgn; -+ nrgn->rgn_enext = nrgn->rgn_eprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_ebase; -+ -+ if ((u_long)(base + rgn->rgn_len - 1) < (u_long)nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_eprev = rgn; /* of list */ -+ nrgn->rgn_enext = NULL; -+ rgn->rgn_enext = elan3mmu->elan3mmu_etail = nrgn; -+ } -+ else -+ { -+ if ((u_long)nbase >= (u_long)base || (u_long)ntop >= (u_long)base) -+ return (-1); /* overlapping region */ -+ -+ nrgn->rgn_enext = rgn; /* insert before region */ -+ nrgn->rgn_eprev = rgn->rgn_eprev; -+ rgn->rgn_eprev = nrgn; -+ if (elan3mmu->elan3mmu_ergns == rgn) -+ elan3mmu->elan3mmu_ergns = nrgn; -+ else -+ nrgn->rgn_eprev->rgn_enext = nrgn; -+ } -+ } -+ elan3mmu->elan3mmu_ergnlast = nrgn; -+ -+ return (0); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_removergn_elan (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL || rgn->rgn_ebase != addr) -+ return (NULL); -+ -+ elan3mmu->elan3mmu_ergnlast = rgn->rgn_enext; -+ if (rgn == elan3mmu->elan3mmu_etail) -+ elan3mmu->elan3mmu_etail = rgn->rgn_eprev; -+ else -+ rgn->rgn_enext->rgn_eprev = rgn->rgn_eprev; -+ -+ if (rgn == elan3mmu->elan3mmu_ergns) -+ elan3mmu->elan3mmu_ergns = rgn->rgn_enext; -+ else -+ rgn->rgn_eprev->rgn_enext = rgn->rgn_enext; -+ -+ return (rgn); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_rgnat_elan (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); -+ E3_Addr base; -+ -+ if (rgn != NULL && (u_long)(base = rgn->rgn_ebase) <= (u_long)addr && (u_long)addr <= (u_long)(base + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+/* main address region management */ -+ELAN3MMU_RGN * -+elan3mmu_findrgn_main (ELAN3MMU *elan3mmu, -+ caddr_t addr, int tail) -+{ -+ ELAN3MMU_RGN *next = NULL; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *hirgn; -+ ELAN3MMU_RGN *lorgn; -+ caddr_t lastaddr; -+ caddr_t base; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) || SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (elan3mmu->elan3mmu_mrgns == NULL) -+ return (NULL); -+ -+ rgn = elan3mmu->elan3mmu_mrgnlast; -+ if (rgn == NULL) -+ rgn = elan3mmu->elan3mmu_mrgns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_mbase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = elan3mmu->elan3mmu_mtail; -+ if ((lastaddr = hirgn->rgn_mbase + hirgn->rgn_len - 1) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_mnext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = elan3mmu->elan3mmu_mrgns; -+ if (lorgn->rgn_mbase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_mbase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_mnext; -+ -+ if (rgn->rgn_mbase <= addr) -+ elan3mmu->elan3mmu_mrgnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_mbase > addr) -+ { -+ next = rgn; -+ rgn = rgn->rgn_mprev; -+ } -+ if ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ return (next); -+ else -+ { -+ elan3mmu->elan3mmu_mrgnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+int -+elan3mmu_addrgn_main (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, nrgn->rgn_mbase, 1); -+ caddr_t nbase = nrgn->rgn_mbase; -+ caddr_t ntop = nbase + nrgn->rgn_len - 1; -+ caddr_t base; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL) -+ { -+ elan3mmu->elan3mmu_mrgns = elan3mmu->elan3mmu_mtail = nrgn; -+ nrgn->rgn_mnext = nrgn->rgn_mprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_mbase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_mprev = rgn; /* of list */ -+ nrgn->rgn_mnext = NULL; -+ rgn->rgn_mnext = elan3mmu->elan3mmu_mtail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) -+ return (-1); /* overlapping region */ -+ -+ nrgn->rgn_mnext = rgn; /* insert before region */ -+ nrgn->rgn_mprev = rgn->rgn_mprev; -+ rgn->rgn_mprev = nrgn; -+ if (elan3mmu->elan3mmu_mrgns == rgn) -+ elan3mmu->elan3mmu_mrgns = nrgn; -+ else -+ nrgn->rgn_mprev->rgn_mnext = nrgn; -+ } -+ } -+ elan3mmu->elan3mmu_mrgnlast = nrgn; -+ -+ return (0); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_removergn_main (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL || rgn->rgn_mbase != addr) -+ return (NULL); -+ -+ elan3mmu->elan3mmu_mrgnlast = rgn->rgn_mnext; -+ if (rgn == elan3mmu->elan3mmu_mtail) -+ elan3mmu->elan3mmu_mtail = rgn->rgn_mprev; -+ else -+ rgn->rgn_mnext->rgn_mprev = rgn->rgn_mprev; -+ -+ if (rgn == elan3mmu->elan3mmu_mrgns) -+ elan3mmu->elan3mmu_mrgns = rgn->rgn_mnext; -+ else -+ rgn->rgn_mprev->rgn_mnext = rgn->rgn_mnext; -+ -+ return (rgn); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_rgnat_main (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ caddr_t base; -+ -+ if (rgn != NULL && (base = rgn->rgn_mbase) <= addr && addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+int -+elan3mmu_setperm (ELAN3MMU *elan3mmu, -+ caddr_t maddr, -+ E3_Addr eaddr, -+ u_int len, -+ u_int perm) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3MMU_RGN *nrgn; -+ unsigned long flags; -+ -+ HAT_PRINTF4 (1, "elan3mmu_setperm: user %p elan %08x len %x perm %x\n", maddr, eaddr, len, perm); -+ -+ if ((((uintptr_t) maddr) & PAGEOFFSET) || (eaddr & PAGEOFFSET) || (len & PAGEOFFSET)) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: alignment failure\n"); -+ return (EINVAL); -+ } -+ -+ if (((uintptr_t) maddr + len - 1) < (uintptr_t) maddr || ((u_long)eaddr + len - 1) < (u_long)eaddr) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: range failure\n"); -+ return (EINVAL); -+ } -+ -+ ALLOC_ELAN3MMU_RGN(nrgn, TRUE); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ nrgn->rgn_mbase = maddr; -+ nrgn->rgn_ebase = eaddr; -+ nrgn->rgn_len = len; -+ nrgn->rgn_perm = perm; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if (elan3mmu_addrgn_elan (elan3mmu, nrgn) < 0) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: elan address exists\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU_RGN (nrgn); -+ return (EINVAL); -+ } -+ -+ if (elan3mmu_addrgn_main (elan3mmu, nrgn) < 0) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: main address exists\n"); -+ elan3mmu_removergn_elan (elan3mmu, eaddr); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU_RGN (nrgn); -+ return (EINVAL); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (ESUCCESS); -+} -+ -+void -+elan3mmu_clrperm (ELAN3MMU *elan3mmu, -+ E3_Addr addr, -+ u_int len) -+{ -+ E3_Addr raddr; -+ E3_Addr rtop; -+ ELAN3MMU_RGN *nrgn; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *rgn_next; -+ u_int ssize; -+ unsigned long flags; -+ int res; -+ -+ HAT_PRINTF2 (1, "elan3mmu_clrperm: elan %08x len %x\n", addr, len); -+ -+ raddr = (addr & PAGEMASK); -+ rtop = ((addr + len - 1) & PAGEMASK) + PAGEOFFSET; -+ -+ ALLOC_ELAN3MMU_RGN (nrgn, TRUE); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); rgn != NULL; rgn = rgn_next) -+ { -+ if (rtop < rgn->rgn_ebase) /* rtop was in a gap */ -+ break; -+ -+ rgn_next = rgn->rgn_enext; /* Save next region pointer */ -+ -+ if (raddr <= rgn->rgn_ebase && rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* whole region is cleared */ -+ elan3mmu_unload (elan3mmu, rgn->rgn_ebase, rgn->rgn_len, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ elan3mmu_removergn_elan (elan3mmu, rgn->rgn_ebase); -+ elan3mmu_removergn_main (elan3mmu, rgn->rgn_mbase); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ FREE_ELAN3MMU_RGN (rgn); -+ } -+ else if (raddr <= rgn->rgn_ebase) -+ { -+ /* clearing at beginning, so shrink size and increment base ptrs */ -+ ssize = rtop - rgn->rgn_ebase + 1; -+ -+ elan3mmu_unload (elan3mmu, rgn->rgn_ebase, ssize, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ rgn->rgn_mbase += ssize; -+ rgn->rgn_ebase += ssize; -+ rgn->rgn_len -= ssize; -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ } -+ else if (rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* clearing at end, so just shrink length of region */ -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ -+ elan3mmu_unload (elan3mmu, raddr, ssize, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ rgn->rgn_len -= ssize; -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ } -+ else -+ { -+ /* the section to go is in the middle, so need to */ -+ /* split it into two regions */ -+ elan3mmu_unload (elan3mmu, raddr, rtop - raddr + 1, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ ASSERT (nrgn != NULL); -+ -+ nrgn->rgn_mbase = rgn->rgn_mbase + (rtop - rgn->rgn_ebase + 1);; -+ nrgn->rgn_ebase = rtop + 1; -+ nrgn->rgn_len = ((rgn->rgn_ebase + rgn->rgn_len - 1) - rtop); -+ nrgn->rgn_perm = rgn->rgn_perm; -+ -+ rgn->rgn_len = (raddr - rgn->rgn_ebase); /* shrink original region */ -+ -+ res = elan3mmu_addrgn_elan (elan3mmu, nrgn); /* insert new region */ -+ ASSERT (res == 0); /* which cannot fail */ -+ -+ res = elan3mmu_addrgn_main (elan3mmu, nrgn); -+ ASSERT (res == 0); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ nrgn = NULL; -+ } -+ } -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ if (nrgn != NULL) -+ FREE_ELAN3MMU_RGN (nrgn); -+} -+ -+int -+elan3mmu_checkperm (ELAN3MMU *elan3mmu, -+ E3_Addr addr, -+ u_int len, -+ u_int access) -+{ -+ E3_Addr raddr = (((E3_Addr) addr) & PAGEMASK); -+ u_int rtop = ((addr + len - 1) & PAGEMASK) + PAGEOFFSET; -+ u_int rsize = rtop - raddr + 1; -+ ELAN3MMU_RGN *rgn; -+ -+ HAT_PRINTF3 (1, "elan3mmu_checkperm: user %08x len %x access %x\n", addr, len, access); -+ -+ -+ if ((raddr + rsize - 1) < raddr) -+ return (ENOMEM); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_elan (elan3mmu, raddr)) == (ELAN3MMU_RGN *) NULL) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (ENOMEM); -+ } -+ else -+ { -+ register int ssize; -+ -+ for (; rsize != 0; rsize -= ssize, raddr += ssize) -+ { -+ if (raddr > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ rgn = rgn->rgn_enext; -+ -+ if (rgn == NULL || raddr != rgn->rgn_ebase) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (ENOMEM); -+ } -+ } -+ if ((raddr + rsize - 1) > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ else -+ ssize = rsize; -+ -+ HAT_PRINTF4 (1, "elan3mmu_checkperm : rgn %x -> %x perm %x access %x\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len, rgn->rgn_perm, access); -+ -+ if (ELAN3_INCOMPAT_ACCESS (rgn->rgn_perm, access)) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (EACCES); -+ } -+ } -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (ESUCCESS); -+} -+ -+caddr_t -+elan3mmu_mainaddr (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn; -+ caddr_t raddr; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_elan (elan3mmu, addr)) == (ELAN3MMU_RGN *) NULL) -+ raddr = NULL; -+ else -+ raddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (raddr); -+} -+ -+E3_Addr -+elan3mmu_elanaddr (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn; -+ E3_Addr raddr; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_main (elan3mmu, addr)) == (ELAN3MMU_RGN *) NULL) -+ raddr = (E3_Addr) 0; -+ else -+ raddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (raddr); -+} -+ -+void -+elan3mmu_displayrgns(ELAN3MMU *elan3mmu) -+{ -+ ELAN3MMU_RGN *rgn; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ HAT_PRINTF0 (1, "elan3mmu_displayrgns: main regions\n"); -+ for (rgn = elan3mmu->elan3mmu_mrgns; rgn; rgn = (rgn->rgn_mnext == elan3mmu->elan3mmu_mrgns) ? NULL : rgn->rgn_mnext) -+ HAT_PRINTF5 (1, " RGN %p ebase %08x mbase %p len %08x perm %08x\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len, rgn->rgn_perm); -+ HAT_PRINTF0 (1, "elan3mmu_displayrgns: elan regions\n"); -+ for (rgn = elan3mmu->elan3mmu_ergns; rgn; rgn = (rgn->rgn_enext == elan3mmu->elan3mmu_ergns) ? NULL : rgn->rgn_enext) -+ HAT_PRINTF5 (1, " RGN %p ebase %08x mbase %p len %08x perm %08x\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len, rgn->rgn_perm); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/*============================================================================*/ -+/* Private functions */ -+#define ELAN3_PTE_IS_VALID(ptbl, pte) \ -+ ((ptbl->ptbl_flags & PTBL_KERNEL) ? \ -+ (pte&(~ELAN3_PTE_REF)) != elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu) : \ -+ ELAN3_PTE_VALID(pte)) -+ -+void -+elan3mmu_expand (ELAN3MMU *elan3mmu, E3_Addr addr, int len, int level, int attr) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ spinlock_t *lock; -+ u_int span; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_expand: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + len); -+ -+ for ( ; len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ -+ switch (level) -+ { -+ case PTBL_LEVEL_3: -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ break; -+ case PTBL_LEVEL_2: -+ span = MIN(len, ELAN3_L2_PTSIZE - ((E3_Addr) addr & ELAN3_L2_PTOFFSET)); -+ break; -+ default: -+ span = len; -+ break; -+ } -+ -+ if (pte != (sdramaddr_t) 0) -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+} -+ -+void -+elan3mmu_reserve (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *ptes) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ spinlock_t *lock; -+ u_int span; -+ int len; -+ int i; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_reserve: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + (npages << ELAN3_PAGE_SHIFT)); -+ -+ for (len = (npages << ELAN3_PAGE_SHIFT); len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptealloc (elan3mmu, addr, 3, &ptbl, &lock, 0, &flags); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ if (ptes != NULL) -+ { -+ for (i = 0; i < span; i += ELAN3_PAGE_SIZE, pte += ELAN3_PTE_SIZE) -+ *ptes++ = pte; -+ ptbl->ptbl_valid += (span >> ELAN3_PAGE_SHIFT); -+ -+ HAT_PRINTF4 (2, "elan3mmu_reserve: inc valid for level %d ptbl %p to %d (%d)\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid, (span >> ELAN3_PAGE_SHIFT)); -+ -+ } -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+} -+ -+void -+elan3mmu_release (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *ptes) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ ELAN3_PTE tpte; -+ spinlock_t *lock; -+ u_int span; -+ int len; -+ int i; -+ int level; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_release: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + (npages << ELAN3_PAGE_SHIFT)); -+ -+ if (ptes == NULL) -+ return; -+ -+ tpte = elan3mmu_kernel_invalid_pte (elan3mmu); -+ -+ for (len = (npages << ELAN3_PAGE_SHIFT); len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptefind(elan3mmu, addr, &level, &ptbl, &lock, &flags); -+ ASSERT (level == PTBL_LEVEL_3); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ -+ for (i = 0 ; i < span; i += ELAN3_PAGE_SIZE, pte += ELAN3_PTE_SIZE) -+ elan3_writepte (dev, pte, tpte); -+ ptbl->ptbl_valid -= (span >> ELAN3_PAGE_SHIFT); -+ -+ HAT_PRINTF3 (2, "elan3mmu_release: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+} -+ -+void -+elan3mmu_pteload (ELAN3MMU *elan3mmu, int level, E3_Addr addr, physaddr_t paddr, int perm, int attr) -+ -+{ -+ ELAN3_DEV *dev; -+ ELAN3_PTBL *ptbl; -+ spinlock_t *lock; -+ unsigned long flags; -+ ELAN3_PTE newpte; -+ ELAN3_PTE oldpte; -+ sdramaddr_t pte; -+ -+ ASSERT((level == PTBL_LEVEL_2) || (level == PTBL_LEVEL_3)); -+ -+ /* Generate the new pte which we're going to load */ -+ dev = elan3mmu->elan3mmu_dev; -+ -+ newpte = elan3mmu_phys_to_pte (dev, paddr, perm); -+ -+ if (attr & PTE_LOAD_BIG_ENDIAN) -+ newpte |= ELAN3_PTE_BIG_ENDIAN; -+ -+ HAT_PRINTF4 (1, "elan3mmu_pteload: elan3mmu %p level %d addr %x pte %llx\n", elan3mmu, level, addr, (long long) newpte); -+ HAT_PRINTF5 (1, "elan3mmu_pteload:%s%s%s perm=%d phys=%llx\n", -+ (newpte & ELAN3_PTE_LOCAL) ? " local" : "", -+ (newpte & ELAN3_PTE_64_BIT) ? " 64 bit" : "", -+ (newpte & ELAN3_PTE_BIG_ENDIAN) ? " big-endian" : " little-endian", -+ (u_int) (newpte & ELAN3_PTE_PERM_MASK) >> ELAN3_PTE_PERM_SHIFT, -+ (unsigned long long) (newpte & ELAN3_PTE_PFN_MASK)); -+ -+ if (level == PTBL_LEVEL_3) -+ pte = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ else -+ { -+ sdramaddr_t ptp = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ -+ pte = elan3mmu_ptp2pte (elan3mmu, ptp, level); -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: level %d ptp at %lx => pte at %lx\n", level, ptp, pte); -+ } -+ -+ if (pte == (sdramaddr_t) 0) -+ { -+ ASSERT (level == PTBL_LEVEL_3 && (attr & (PTE_NO_SLEEP | PTE_NO_STEAL)) == (PTE_NO_SLEEP | PTE_NO_STEAL)); -+ return; -+ } -+ -+ ASSERT (ptbl->ptbl_elan3mmu == elan3mmu); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == level); -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ -+ oldpte = elan3_readpte (dev, pte); -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: modify pte at %lx from %llx to %llx\n", pte, (long long) oldpte, (long long) newpte); -+ -+ if (ELAN3_PTE_IS_VALID(ptbl, oldpte)) -+ { -+ ELAN3MMU_STAT(ptereload); -+ -+ ASSERT ((newpte & ~((E3_uint64)ELAN3_PTE_PERM_MASK | ELAN3_RM_MASK)) == (oldpte & ~((E3_uint64)ELAN3_PTE_PERM_MASK | ELAN3_RM_MASK))); -+ -+ if ((newpte & ~ELAN3_RM_MASK) != (oldpte & ~ELAN3_RM_MASK)) -+ { -+ /* We're modifying a valid translation, it must be mapping the same page */ -+ /* so we use elan3_modifypte to not affect the referenced and modified bits */ -+ elan3_modifypte (dev, pte, newpte); -+ -+ -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+ } -+ } -+ else -+ { -+ ELAN3MMU_STAT(pteload); -+ -+ ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_pteload: write pte %lx to %llx\n", pte, (long long) newpte); -+ -+ elan3_writepte (dev, pte, newpte); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+ -+ } -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+} -+ -+void -+elan3mmu_unload (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, int attr) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t ptp; -+ spinlock_t *lock; -+ int level; -+ u_int span; -+ unsigned long flags; -+ -+ HAT_PRINTF3(1, "elan3mmu_unload (elan3mmu %p addr %x -> %x)\n", elan3mmu, addr, addr+len-1); -+ -+ for (; len != 0; addr += span, len -= span) -+ { -+ ptp = elan3mmu_ptefind(elan3mmu, addr, &level, &ptbl, &lock, &flags); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ if (ptp != (sdramaddr_t) 0) -+ { -+ HAT_PRINTF2 (2, "elan3mmu_unload: unload [%x,%x]\n", addr, addr + span); -+ -+ if ( level == PTBL_LEVEL_3 ) -+ elan3mmu_unload_loop (elan3mmu, ptbl, ptp - PTBL_TO_PTADDR(ptbl), span >> ELAN3_PAGE_SHIFT, attr); -+ else -+ { -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *lXptbl; -+ ELAN3_PTP tptp; -+ int idx; -+ -+ tptp = elan3_readptp (elan3mmu->elan3mmu_dev, ptp); -+ -+ ASSERT (ELAN3_PTP_TYPE(tptp) == ELAN3_ET_PTE); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tptp); -+ idx = (PTP_TO_PT_PADDR(tptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ if ( level == PTBL_LEVEL_1) -+ span = MIN(len, ELAN3_L2_PTSIZE - ((E3_Addr) addr & ELAN3_L2_PTOFFSET)); -+ else -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ /* invalidate the ptp. */ -+ elan3_writeptp (dev, ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_unload: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ } -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+ } -+} -+ -+static void -+elan3mmu_unload_loop (ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl, int first_valid, int nptes, int flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ sdramaddr_t pte; -+ ELAN3_PTE tpte; -+ int last_valid = first_valid + nptes; -+ int i; -+ -+ HAT_PRINTF3 (1, "elan3mmu_unloadloop: ptbl %p entries [%d->%d]\n", ptbl, first_valid, last_valid); -+ -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ pte = PTBL_TO_PTADDR(ptbl) + first_valid; -+ -+ for (i = first_valid; i < last_valid; i++, pte += ELAN3_PTE_SIZE) -+ { -+ if (ptbl->ptbl_valid == 0) -+ break; -+ -+ tpte = elan3_readpte (dev, pte); -+ if (! ELAN3_PTE_IS_VALID(ptbl, tpte)) -+ continue; -+ -+ elan3mmu_pteunload (ptbl, pte, flags, NO_MLIST_LOCK); -+ } -+} -+ -+void -+elan3mmu_pteunload (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock) -+{ -+ ELAN3_DEV *dev = ptbl->ptbl_elan3mmu->elan3mmu_dev; -+ ELAN3_PTE tpte; -+ -+ ASSERT (PTBL_LEVEL (ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ -+ HAT_PRINTF2 (1, "elan3mmu_pteunload: ptbl %p pte %lx\n", ptbl, pte); -+ -+ ELAN3MMU_STAT (pteunload); -+ -+ elan3_invalidatepte (dev, pte); -+ -+ if (! (flags & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ tpte = ELAN3_INVALID_PTE; -+ elan3_writepte (dev, pte, tpte); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) -+ { -+ tpte = elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu); -+ -+ elan3_writepte (dev, pte, tpte); -+ } -+ -+ ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteunload: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+} -+ -+void -+elan3mmu_ptesync (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock) -+{ -+ -+} -+ -+/* -+ * Create more page tables at a given level for this Elan. -+ */ -+static ELAN3_PTBL * -+elan3mmu_create_ptbls (ELAN3_DEV *dev, int level, int attr, int keep) -+{ -+ sdramaddr_t pts; -+ ELAN3_PTBL *ptbl; -+ ELAN3_PTBL *first; -+ ELAN3_PTBL *last; -+ ELAN3_PTBL_GR *ptg; -+ register int i; -+ register int inc; -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: create level %d ptbls\n", level); -+ -+ pts = elan3_sdram_alloc (dev, PTBL_GROUP_SIZE); -+ if (pts == (sdramaddr_t) 0) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_create_ptbls: cannot map elan pages\n"); -+ -+ ELAN3MMU_STAT (create_ptbl_failed); -+ return (NULL); -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: pts at %lx\n", pts); -+ -+ ALLOC_PTBL_GR (ptg, !(attr & PTE_NO_SLEEP)); /* Allocate the group of page tables */ -+ if (ptg == NULL) /* for this page */ -+ { -+ HAT_PRINTF0 (2, "elan3mmu_create_ptbls: cannot allocate page table group\n"); -+ -+ elan3_sdram_free (dev, pts, PTBL_GROUP_SIZE); -+ -+ ELAN3MMU_STAT (create_ptbl_failed); -+ return (NULL); -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: ptg is %p\n", ptg); -+ -+ ElanSetPtblGr (dev, pts, ptg); -+ -+ HAT_PRINTF4 (2, "elan3mmu_create_ptbls: zeroing %d bytes at %lx, %d bytes at %p\n", -+ PTBL_GROUP_SIZE, pts, (int) sizeof (ELAN3_PTBL_GR), ptg); -+ -+#ifndef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, pts, PTBL_GROUP_SIZE); /* Ensure that all PTEs/PTPs are invalid */ -+#endif -+ bzero ((caddr_t) ptg, sizeof (ELAN3_PTBL_GR)); -+ -+ ptg->pg_addr = pts; -+ ptg->pg_level = level; -+ -+ ptbl = ptg->pg_ptbls; /* Initialise the index in all page tables */ -+ for (i = 0; i < PTBLS_PER_GROUP_MAX; i++) -+ { -+ ptbl->ptbl_index = (u_char) i; -+ ptbl->ptbl_next = (ELAN3_PTBL *) 0xdeaddead; -+ ptbl++; -+ } -+ -+ switch (level) /* Determine the number of ptbls we can */ -+ { /* allocate from this page, by jumping */ -+ case PTBL_LEVEL_X: inc = PTBLS_PER_PTBL_LX; break; /* multiples of the smallest. */ -+ case PTBL_LEVEL_1: inc = PTBLS_PER_PTBL_L1; break; -+ case PTBL_LEVEL_2: inc = PTBLS_PER_PTBL_L2; break; -+ case PTBL_LEVEL_3: inc = PTBLS_PER_PTBL_L3; break; -+ default: inc = PTBLS_PER_PTBL_L3; break; -+ } -+ -+ ptbl = ptg->pg_ptbls; /* Chain them together */ -+ for (i = 0; i < PTBLS_PER_GROUP_MAX; i += inc, ptbl += inc) -+ ptbl->ptbl_next = ptbl + inc; -+ -+ first = ptg->pg_ptbls; /* Determine list of */ -+ last = first + PTBLS_PER_GROUP_MAX - inc; /* ptbls to add to free list */ -+ if (! keep) -+ ptbl = NULL; -+ else -+ { -+ ptbl = first; -+ first = first->ptbl_next; -+ } -+ -+ spin_lock (&dev->Level[level].PtblLock); -+ dev->Level[level].PtblTotal += PTBLS_PER_GROUP_MAX/inc; /* Increment the counts */ -+ dev->Level[level].PtblFreeCount += PTBLS_PER_GROUP_MAX/inc; -+ -+ ELAN3MMU_SET_STAT (num_ptbl_level[level], dev->Level[level].PtblTotal); -+ -+ if (keep) -+ dev->Level[level].PtblFreeCount--; -+ -+ last->ptbl_next = dev->Level[level].PtblFreeList; /* And add to free list */ -+ dev->Level[level].PtblFreeList = first; -+ spin_unlock (&dev->Level[level].PtblLock); -+ -+ spin_lock (&dev->PtblGroupLock); -+ ptg->pg_next = dev->Level[level].PtblGroupList; -+ dev->Level[level].PtblGroupList = ptg; -+ spin_unlock (&dev->PtblGroupLock); -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: returning ptbl %p\n", ptbl); -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_ta_to_ptbl (ELAN3MMU *elan3mmu, ELAN3_PTP *ptp) -+{ -+ E3_Addr ptpa = PTP_TO_PT_PADDR(*ptp); -+ ELAN3_PTBL_GR *pg = ElanGetPtblGr (elan3mmu->elan3mmu_dev, (sdramaddr_t)ptpa & ~(PTBL_GROUP_SIZE-1)); -+ -+ return (pg->pg_ptbls + ((ptpa - pg->pg_addr) >> ELAN3_PT_SHIFT)); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_lXptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ if (dev->Level[PTBL_LEVEL_X].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_X].PtblFreeList; -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: found ptbl %p on free list\n", ptbl); -+ -+ dev->Level[PTBL_LEVEL_X].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_X].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_X, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: created level X ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_lXptbl: not allowed to steal ptbl for use at level 2\n"); -+ return NULL; -+ } -+ -+ ELAN3MMU_STAT(lX_alloc_l3); -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: stolen level3 ptbl %p used as level 2\n", ptbl); -+ } -+ -+ ptbl->ptbl_elan3mmu = elan3mmu; -+ ptbl->ptbl_base = 0; -+ ptbl->ptbl_parent = 0; -+ ptbl->ptbl_flags = PTBL_LEVEL_X | PTBL_ALLOCED; -+ -+ HAT_PRINTF2 (2, "elan3mmu_alloc_lXptbl: ptbl %p dev %p\n", ptbl, dev); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zero_sdarm (dev, PTBL_TO_PTADDR(ptbl), ELAN3_LX_ENTRIES*ELAN3_PTE_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, int *idx) -+{ -+ ELAN3_PTBL * ptbl_ptr; -+ int index; -+ -+ /* lock whilst looking for space */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* walk the lXptbl list */ -+ ptbl_ptr = elan3mmu->elan3mmu_lXptbl; -+ while ( ptbl_ptr != NULL ) -+ { -+ /* does this ptlb have any free ones */ -+ if ( (index = ptbl_ptr->ptbl_valid) < ELAN3_LX_ENTRIES) -+ { -+ /* better to search from valid count as its likly to be free */ -+ index = ptbl_ptr->ptbl_valid; -+ do { -+ if ((ptbl_ptr->ptbl_base & (1 << index)) == 0) -+ goto found; -+ -+ /* move index on and wrap back to start if needed */ -+ if ((++index) == ELAN3_LX_ENTRIES) -+ index = 0; -+ } while (index != ptbl_ptr->ptbl_valid); -+ -+ panic ("elan3mmu_alloc_pte: has ptbl valid < 32 when but no free pte's"); -+ } -+ ptbl_ptr = ptbl_ptr->ptbl_parent; -+ } -+ -+ /* unlock so we can create space */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* if create some more */ -+ ptbl_ptr = elan3mmu_alloc_lXptbl(dev, 0, elan3mmu); -+ -+ /* get the lock again */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* add to front of list as its obviously got free ones on it */ -+ ptbl_ptr->ptbl_parent = elan3mmu->elan3mmu_lXptbl; -+ elan3mmu->elan3mmu_lXptbl = ptbl_ptr; -+ -+ /* grap the first one */ -+ index = 0; -+ -+ found: -+ ptbl_ptr->ptbl_base |= (1 << index); -+ ptbl_ptr->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_pte: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl_ptr->ptbl_flags), ptbl_ptr, ptbl_ptr->ptbl_valid); -+ -+ /* release the loc and return it */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ *idx = index; -+ return (ptbl_ptr); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l1ptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i,j; -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ if (dev->Level[PTBL_LEVEL_1].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_1].PtblFreeList; -+ dev->Level[PTBL_LEVEL_1].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_1].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ -+ if (ptbl == NULL) -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_1, attr, 1); -+ -+ if (ptbl == NULL) -+ panic ("elan3mmu_alloc_l1ptbl: cannot alloc ptbl"); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L1; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (j); -+ p->ptbl_flags = PTBL_LEVEL_1 | PTBL_GROUPED; -+ p->ptbl_parent = NULL; -+ -+ j += L1_VA_PER_PTBL; -+ } -+ -+ /* Now mark the real page table as allocated */ -+ /* level 1 ptbls are returned unlocked */ -+ ptbl->ptbl_flags = PTBL_LEVEL_1 | PTBL_ALLOCED; -+ -+ HAT_PRINTF2 (2, "elan3mmu_alloc_l1ptbl: ptbl %p dev %p\n", ptbl, dev); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L1_ENTRIES*ELAN3_PTP_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l2ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, E3_Addr base, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i; -+ int j; -+ unsigned long ptbl_flags; -+ -+ spin_lock_irqsave (&dev->Level[PTBL_LEVEL_2].PtblLock, ptbl_flags); -+ if (dev->Level[PTBL_LEVEL_2].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_2].PtblFreeList; -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: found ptbl %p on free list\n", ptbl); -+ -+ dev->Level[PTBL_LEVEL_2].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_2].PtblFreeCount--; -+ } -+ spin_unlock_irqrestore (&dev->Level[PTBL_LEVEL_2].PtblLock, ptbl_flags); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_2, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: created level 2 ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_l2ptbl: not allowted to steal ptbl for use at level 2\n"); -+ return (NULL); -+ } -+ -+ ELAN3MMU_STAT(l2_alloc_l3); -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: stolen level3 ptbl %p used as level 2\n", ptbl); -+ } -+ -+ *plock = elan3mmu_ptbl_to_lock (PTBL_LEVEL_2, ptbl); -+ spin_lock_irqsave (*plock, *flags); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L2; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (base + j); -+ p->ptbl_flags = PTBL_LEVEL_2 | PTBL_GROUPED; -+ p->ptbl_parent = parent; -+ -+ j += L2_VA_PER_PTBL; -+ } -+ -+ ptbl->ptbl_flags = PTBL_LEVEL_2 | PTBL_ALLOCED | PTBL_LOCKED; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_l2ptbl: ptbl %p dev %p base %x\n", ptbl, dev, base); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zero_sdarm (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L2_ENTRIES*ELAN3_PTP_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l3ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, E3_Addr base, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i; -+ int j; -+ unsigned long ptbl_flags; -+ -+ spin_lock_irqsave (&dev->Level[PTBL_LEVEL_3].PtblLock, ptbl_flags); -+ if (dev->Level[PTBL_LEVEL_3].PtblFreeList) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: found ptbl %p on free list\n", ptbl); -+ -+ ptbl = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount--; -+ } -+ spin_unlock_irqrestore (&dev->Level[PTBL_LEVEL_3].PtblLock, ptbl_flags); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_3, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: created level 3 ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_l3ptbl: not allowed to steal ptbl for use at level 3\n"); -+ return (NULL); -+ } -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: stolen level3 ptbl %p\n", ptbl); -+ } -+ -+ *plock = elan3mmu_ptbl_to_lock (PTBL_LEVEL_3, ptbl); -+ spin_lock_irqsave (*plock,*flags); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L3; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (base + j); -+ p->ptbl_flags = PTBL_LEVEL_3 | PTBL_GROUPED; -+ p->ptbl_parent = parent; -+ -+ j += L3_VA_PER_PTBL; -+ } -+ -+ ptbl->ptbl_flags = PTBL_LEVEL_3 | PTBL_ALLOCED | PTBL_LOCKED; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_l3ptbl: ptbl %p dev %p base %x\n", ptbl, dev, base); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L3_ENTRIES*ELAN3_PTE_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+void -+elan3mmu_free_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl_ptr, int idx) -+{ -+ sdramaddr_t pte = PTBL_TO_PTADDR (ptbl_ptr) | (idx * sizeof (ELAN3_PTE)); -+ ELAN3_PTE tpte = ELAN3_INVALID_PTE; -+ ELAN3_PTBL *prev; -+ -+ /* ensure that the pte is invalid when free */ -+ elan3_writepte (dev, pte, tpte); -+ -+ /* lock whilst removing */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ HAT_PRINTF4 (2, "elan3mmu_free_pte idx %d ptbl_ptr %p ptbl_base %x ptbl_ptr->ptbl_valid %d \n", -+ idx, ptbl_ptr, ptbl_ptr->ptbl_base, ptbl_ptr->ptbl_valid); -+ /* make sure it was set */ -+ ASSERT ( ptbl_ptr->ptbl_base & (1 << idx) ); -+ ASSERT ( ptbl_ptr->ptbl_valid > 0 ); -+ -+ ptbl_ptr->ptbl_base &= ~(1 << idx); -+ ptbl_ptr->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_free_pte: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl_ptr->ptbl_flags), ptbl_ptr, ptbl_ptr->ptbl_valid); -+ -+ /* was that the last one on this page */ -+ if ( ! ptbl_ptr->ptbl_valid ) -+ { -+ /* so no bits should be set then */ -+ ASSERT ( ptbl_ptr->ptbl_base == 0 ); -+ -+ /* is this the first page ?? */ -+ if ( elan3mmu->elan3mmu_lXptbl == ptbl_ptr ) -+ { -+ /* make the list start at the second element */ -+ elan3mmu->elan3mmu_lXptbl = ptbl_ptr->ptbl_parent; -+ -+ /* put ptbl back on free list */ -+ elan3mmu_free_lXptbl(dev, ptbl_ptr); -+ -+ /* unlock and return */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ return ; -+ } -+ -+ /* scan thro list looking for this page */ -+ prev = elan3mmu->elan3mmu_lXptbl; -+ while ( prev->ptbl_parent != NULL ) -+ { -+ if ( prev->ptbl_parent == ptbl_ptr ) /* its the next one */ -+ { -+ /* remove element from chain */ -+ prev->ptbl_parent = ptbl_ptr->ptbl_parent; -+ -+ /* put ptbl back on free list */ -+ elan3mmu_free_lXptbl(dev, ptbl_ptr); -+ -+ /* unlock and return */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ return ; -+ } -+ prev = prev->ptbl_parent; -+ } -+ -+ panic ("elan3mmu_free_pte: failed to find ptbl in chain"); -+ /* NOTREACHED */ -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+} -+ -+void -+elan3mmu_free_lXptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl) -+{ -+ ELAN3_PTBL_GR *ptg; -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_lXptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_X); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ ptbl->ptbl_flags = 0; -+ -+ ptg = PTBL_TO_GR(ptbl); -+ -+ if (ptg->pg_level == PTBL_LEVEL_3) -+ { -+ ELAN3MMU_STAT(lX_freed_l3); -+ -+ HAT_PRINTF1 (2, "elan3mmu_free_lXptbl: freeing stolen level 3 ptbl %p\n", ptbl); -+ -+ /* this was really a level 3 ptbl which we had to steal */ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ } -+ else -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_X].PtblFreeList; -+ dev->Level[PTBL_LEVEL_X].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_X].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ } -+} -+ -+void -+elan3mmu_free_l1ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ HAT_PRINTF3 (2, "elan3mmu_free_l1ptbl: dev %p ptbl %p ptbl->ptbl_valid %x \n", dev, ptbl, ptbl->ptbl_valid); -+ -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_1); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l1ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_1].PtblFreeList; -+ dev->Level[PTBL_LEVEL_1].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_1].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_free_l2ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ELAN3_PTBL_GR *ptg; -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l2ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ASSERT (PTBL_IS_LOCKED(ptbl->ptbl_flags)); -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ ptg = PTBL_TO_GR(ptbl); -+ -+ if (ptg->pg_level == PTBL_LEVEL_3) -+ { -+ ELAN3MMU_STAT(l2_freed_l3); -+ -+ HAT_PRINTF1 (2, "elan3mmu_free_l2ptbl: freeing stolen level 3 ptbl %p\n", ptbl); -+ -+ /* this was really a level 3 ptbl which we had to steal */ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ } -+ else -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_2].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_2].PtblFreeList; -+ dev->Level[PTBL_LEVEL_2].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_2].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_2].PtblLock); -+ } -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_free_l3ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ASSERT (PTBL_IS_LOCKED(ptbl->ptbl_flags)); -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l3ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) /* if the ptbl has been used by the kernel */ -+ { /* then zero all the pte's, since they will */ -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L3_ENTRIES*ELAN3_PTE_SIZE); -+ } -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_kernel_l3ptbl (ELAN3_PTBL *ptbl) -+{ -+ ELAN3_DEV *dev = ptbl->ptbl_elan3mmu->elan3mmu_dev; -+ sdramaddr_t pte = PTBL_TO_PTADDR(ptbl); -+ ELAN3_PTE tpte = elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu); -+ int i; -+ -+ ptbl->ptbl_flags |= PTBL_KERNEL; -+ for (i = 0; i < ELAN3_L3_ENTRIES; i++, pte += ELAN3_PTE_SIZE) -+ { -+ elan3_writepte (dev, pte, tpte); -+ } -+} -+ -+#define PTBL_CAN_STEAL(flag) (((flag) & (PTBL_KERNEL|PTBL_KEEP)) == 0 && (((flag) & PTBL_ALLOCED) && PTBL_LEVEL(flag) == PTBL_LEVEL_3)) -+#define PTBL_MAY_STEAL(flag) (((flag) & (PTBL_KERNEL|PTBL_KEEP|PTBL_LOCKED)) == 0 && (((flag) & PTBL_ALLOCED) && PTBL_LEVEL(flag) == PTBL_LEVEL_3)) -+ -+static int -+elan3mmu_steal_this_ptbl (ELAN3_DEV *dev, ELAN3_PTBL *l3ptbl) -+{ -+ ELAN3_PTBL *l2ptbl = l3ptbl->ptbl_parent; -+ E3_Addr l2addr = BASE2VA(l2ptbl); -+ E3_Addr l3addr = BASE2VA(l3ptbl); -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ sdramaddr_t l2ptp; -+ spinlock_t *l2lock; -+ unsigned long l2flags; -+ -+ HAT_PRINTF5 (1, "elan3mmu_steal_this_ptbl: l3ptbl %p (%x) l2ptbl %p (%x) l2addr %x\n", -+ l3ptbl, l3ptbl->ptbl_flags, l2ptbl, l2ptbl->ptbl_flags, l2addr); -+ -+ if (PTBL_CAN_STEAL (l3ptbl->ptbl_flags) && -+ elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_NOWAIT, l3ptbl->ptbl_elan3mmu, l2addr, PTBL_LEVEL_2, &l2lock, &l2flags) == LK_PTBL_OK) -+ { -+ ELAN3MMU_STAT(stolen_ptbls); -+ -+ /* Locked both L3 and L2 page tables. */ -+ l2ptp = PTBL_TO_PTADDR (l2ptbl) + ELAN3_L2_INDEX(l3addr)*ELAN3_PTP_SIZE; -+ -+ /* detach the level 3 page table */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ ElanFlushTlb (dev); -+ -+ l2ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_steal_this_ptbl: dec valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_unload_loop (l3ptbl->ptbl_elan3mmu, l3ptbl, 0, ELAN3_L3_ENTRIES, PTE_UNLOAD_NOFLUSH); -+ -+ ASSERT (l3ptbl->ptbl_valid == 0); -+ -+ l3ptbl->ptbl_flags = 0; -+ return (1); -+ } -+ return (0); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_steal_l3ptbl (ELAN3_DEV *dev, int attr) -+{ -+ ELAN3_PTBL_GR *ptg; -+ ELAN3_PTBL *ptbl; -+ spinlock_t *lock; -+ unsigned long group_flags; -+ unsigned long ptbl_flags; -+ register int i; -+ -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: attr %x\n", attr); -+ -+ spin_lock_irqsave (&dev->PtblGroupLock, group_flags); -+ -+ ptg = dev->Level3PtblGroupHand; -+ -+ if (ptg == NULL) -+ ptg = dev->Level[PTBL_LEVEL_3].PtblGroupList; -+ -+ for (;;) -+ { -+ while (ptg) -+ { -+ for (i = 0, ptbl = ptg->pg_ptbls; i < PTBLS_PER_GROUP_MAX; i++, ptbl++) -+ { -+ if (PTBL_MAY_STEAL (ptbl->ptbl_flags) && -+ elan3mmu_lock_this_ptbl (ptbl, LK_PTBL_NOWAIT, &lock, &ptbl_flags) == LK_PTBL_OK) -+ { -+ if (elan3mmu_steal_this_ptbl (dev, ptbl )) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: stolen ptbl %p\n", ptbl); -+ -+ elan3mmu_unlock_ptbl (ptbl, lock,ptbl_flags); -+ -+ dev->Level3PtblGroupHand = ptg->pg_next; -+ -+ spin_unlock_irqrestore (&dev->PtblGroupLock, group_flags); -+ -+ return (ptbl); -+ } -+ elan3mmu_unlock_ptbl (ptbl, lock, ptbl_flags); -+ } -+ } -+ ptg = ptg->pg_next; -+ } -+ -+ if (dev->Level[PTBL_LEVEL_3].PtblFreeList) -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ if (ptbl != NULL) -+ { -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ -+ if (ptbl != NULL) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: found ptbl %p on free list\n", ptbl); -+ break; -+ } -+ } -+ -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_3, attr, 1); -+ -+ if (ptbl != NULL) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: created new ptbl %p\n", ptbl); -+ break; -+ } -+ -+ HAT_PRINTF0 (1, "elan3mmu_steal_l3ptbl: cannot find a ptbl, retrying\n"); -+ ptg = dev->Level[PTBL_LEVEL_3].PtblGroupList; -+ } -+ -+ spin_unlock (&dev->PtblGroupLock); -+ return (ptbl); -+} -+ -+sdramaddr_t -+elan3mmu_ptefind (ELAN3MMU *elan3mmu, E3_Addr addr, int *level, -+ ELAN3_PTBL **pptbl, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ sdramaddr_t l1ptp; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ spinlock_t *l1lock; -+ spinlock_t *l2lock; -+ spinlock_t *l3lock; -+ unsigned long l1flags; -+ unsigned long l2flags; -+ unsigned long l3flags; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptefind: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ *level = 0; -+ -+ if (l1ptbl == NULL) -+ return ((sdramaddr_t) NULL); -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+retryl1: -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptefind: l1ptbl %p l1ptp %lx l1base %x : tl1ptp %x\n", l1ptbl, l1ptp, l1base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ *level = 1; -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return with l1lock */ -+ return (l1ptp); -+ -+ case ELAN3_ET_INVALID: -+ return ((sdramaddr_t) 0); -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_ptefind: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptefind: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptefind: l2ptbl %p l2ptp %lx l2base %x : tl2ptp %x\n", l2ptbl, l2ptp, l2base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ switch (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ *level = 2; -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return with l2lock */ -+ return (l2ptp); -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptefind: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptefind: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ case ELAN3_ET_INVALID: -+ return ((sdramaddr_t) 0); -+ -+ case ELAN3_ET_PTP: -+ break; -+ default: -+ panic ("elan3mmu_ptefind: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptefind: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptefind: l3ptbl %p l3pte %lx\n", l3ptbl, l3pte); -+ -+ switch (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags)) -+ { -+ case LK_PTBL_OK: -+ *level = 3; -+ *plock = l3lock; -+ *pptbl = l3ptbl; -+ *flags = l3flags; -+ -+ return (l3pte); -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptefind: l3 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptefind: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_elan3mmu, l3ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptefind: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ /* NOTREACHED */ -+ return ((sdramaddr_t) 0); -+} -+ -+sdramaddr_t -+elan3mmu_ptp2pte (ELAN3MMU *elan3mmu, sdramaddr_t ptp, int level) -+{ -+ ELAN3_PTP tptp = elan3_readptp (elan3mmu->elan3mmu_dev, ptp); -+ -+ ASSERT (level != 3 && ELAN3_PTP_TYPE(tptp) == ELAN3_ET_PTE); -+ -+ return PTP_TO_PT_PADDR(tptp); -+} -+ -+sdramaddr_t -+elan3mmu_ptealloc (ELAN3MMU *elan3mmu, E3_Addr addr, int level, -+ ELAN3_PTBL **pptbl, spinlock_t **plock, int attr, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ sdramaddr_t l1ptp; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ spinlock_t *l1lock; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ spinlock_t *l2lock; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ E3_Addr l3base; -+ spinlock_t *l3lock; -+ -+ unsigned long l1flags; -+ unsigned long l2flags; -+ unsigned long l3flags; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ if (l1ptbl == NULL) -+ return ((sdramaddr_t) 0); -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+retryl1: -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF5 (2, "elan3mmu_ptealloc: l1ptbl %p 1ptp %lx l1base %x (%x) : tl1ptp %x\n", -+ l1ptbl, l1ptp, l1base, l1ptbl->ptbl_base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ if (level == PTBL_LEVEL_1) -+ { -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return holding l1lock */ -+ return (l1ptp); -+ } -+ panic ("elan3mmu_ptealloc: found pte in level 1 page table"); -+ /* NOTREACHED */ -+ -+ case ELAN3_ET_PTP: -+ if (level == PTBL_LEVEL_1) -+ panic ("elan3mmu_ptealloc: found PTP when loading a level 1 PTE\n"); -+ break; -+ -+ case ELAN3_ET_INVALID: -+ if (level == PTBL_LEVEL_1) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ /* raced with someone else, whose got there first */ -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ /* drop the l1lock and retry */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ tl1ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return holding l1lock */ -+ return (l1ptp); -+ } -+ -+ if (level == PTBL_LEVEL_2) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if ((l2ptbl = elan3mmu_alloc_l2ptbl (dev, attr, l1ptbl, elan3mmu, ELAN3_L2_BASE(addr), &l2lock, &l2flags)) == NULL) -+ { -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ return ((sdramaddr_t) 0); -+ } -+ -+ /* Connect l2ptbl to the new LX pte */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr) * ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ /* Now need to lock the l1 ptbl */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l2 ptbl/lx pte\n"); -+ -+ tl2ptp = ELAN3_INVALID_PTP; -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ elan3mmu_free_l2ptbl (dev, l2ptbl, l2lock, l2flags); -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ -+ goto retryl1; -+ } -+ -+ /* Now have L1 locked, so install the L2 ptbl */ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl1ptp = PTBL_TO_PTADDR(l2ptbl) | ELAN3_ET_PTP; -+ l1ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l1ptp %lx to %x\n", l1ptp, tl1ptp); -+ -+ /* unordered unlock - lock l1ptbl, lock l2ptbl, unlock l1ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l2flags); /* need to unlock with the l2flags to keep irq order correct */ -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l1flags; /* return the l1flags here as we have released the l2flags already to keep order */ -+ -+ /* return holding l2lock */ -+ return (l2ptp); -+ } -+ -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: allocating level 2 and level 3 page tables\n"); -+ -+ /* Allocate a level 2 and level 3 page table and link them together */ -+ if ((l2ptbl = elan3mmu_alloc_l2ptbl (dev, attr, l1ptbl, elan3mmu, ELAN3_L2_BASE(addr), &l2lock, &l2flags)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if ((l3ptbl = elan3mmu_alloc_l3ptbl (dev, attr | PTE_NO_SLEEP, l2ptbl, elan3mmu, ELAN3_L3_BASE(addr), &l3lock, &l3flags)) == NULL) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ return ((sdramaddr_t) 0); -+ } -+ -+ ASSERT (PTBL_IS_LOCKED (l2ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: l2ptbl %p (%x,%x) l3ptbl %p (%x,%x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_base, -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_base); -+ -+ if (CTXT_IS_KERNEL (elan3mmu->elan3mmu_ctxt)) -+ { -+ l2ptbl->ptbl_flags |= PTBL_KERNEL; -+ elan3mmu_kernel_l3ptbl (l3ptbl); -+ } -+ -+ /* -+ * Connect L3 ptbl to the new L2 ptbl. -+ */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr) * ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(l3ptbl) | ELAN3_ET_PTP; -+ -+ l2ptbl->ptbl_valid = 1; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: set valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ /* -+ * Now need to lock the l1 ptbl - to maintain lock ordering -+ * we set the PTBL_KEEP bit to stop the l3 ptbl from being -+ * stolen and drop the locks in the order we aquired them -+ */ -+ l3ptbl->ptbl_flags |= PTBL_KEEP; -+ -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ -+ /* Now have l1 and l3 ptbls locked, so install the new l2 ptbl into the l1. */ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: l1ptp %lx is %x\n", l1ptp, tl1ptp); -+ -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l2/l3 ptbls\n"); -+ -+ /* free off the level 3 page table */ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l3 ptbl %p (%x)\n", l3ptbl, l3ptbl->ptbl_flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ /* and unlock the level 1 ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ -+ /* lock the level 2 page table, and clear out the PTP, then free it */ -+ (void) elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: locked l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ tl2ptp = ELAN3_INVALID_PTP; -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ l2ptbl->ptbl_valid = 0; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: set to 0 valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ elan3mmu_free_l2ptbl (dev, l2ptbl, l2lock, l2flags); -+ -+ goto retryl1; -+ } -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptealloc: l1ptbl is %p (%x), l3ptbl is %p (%x)\n", -+ l1ptbl, l1ptbl->ptbl_flags, l3ptbl, l3ptbl->ptbl_flags); -+ -+ /* Now have L1 and L3 locked, so install the L2 ptbl */ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl1ptp = PTBL_TO_PTADDR(l2ptbl) | ELAN3_ET_PTP; -+ l1ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l1ptp %lx to %x\n", l1ptp, tl1ptp); -+ -+ /* unordered unlock - lock l1ptbl, lock l3ptbl, unlock l1ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l3flags); /* free using l3flags to keep irq ordering */ -+ -+ l3pte = PTBL_TO_PTADDR (l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ /* Level 3 ptbl is already locked, so just return the pte */ -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l1flags; /* return l1flags to keep irq ordering */ -+ -+ return (l3pte); -+ -+ default: -+ panic ("elan3mmu_ptealloc: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptealloc: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF5 (2, "elan3mmu_ptealloc: l2ptbl %p l2ptp %lx l2base %x (%x) : tl2ptp %x\n", -+ l2ptbl, l2ptp, l2base, l2ptbl->ptbl_base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ if (level == PTBL_LEVEL_2) { -+ /* this is a pointer to a pte, we should just return it */ -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptealloc: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptealloc: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return holdind l2lock */ -+ return (l2ptp); -+ } -+ panic ("elan3mmu: found pte in level 2 page table"); -+ /* NOTREACHED */ -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ case ELAN3_ET_INVALID: -+ if (level == PTBL_LEVEL_2) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptealloc: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptealloc: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free lx pte\n"); -+ -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ /* Connect l2ptbl to the new LX pte */ -+ tl2ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l2ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return holding l2lock */ -+ return (l2ptp); -+ } -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: allocate level 3 page table\n"); -+ -+ if ((l3ptbl = elan3mmu_alloc_l3ptbl (dev, attr, l2ptbl, elan3mmu, ELAN3_L3_BASE(addr), &l3lock, &l3flags)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if (CTXT_IS_KERNEL (elan3mmu->elan3mmu_ctxt)) -+ elan3mmu_kernel_l3ptbl (l3ptbl); -+ -+ /* -+ * Now need to lock the l2 ptbl - to maintain lock ordering -+ * we set the PTBL_KEEP bit to stop the l3 ptbl from being -+ * stolen and drop the locks in the order we aquired them -+ */ -+ l3ptbl->ptbl_flags |= PTBL_KEEP; -+ -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ -+ if (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags) == LK_PTBL_MISMATCH) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: l2ptbl freed, free l3 ptbl and try again\n"); -+ -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ /* free off the level 3 page table, and try again */ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ goto retryl1; -+ } -+ -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ -+ /* Now have L2 and L3 ptbls locked, see if someone has beaten us to it. */ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: l2ptp at %lx is %x\n", l2ptp, tl2ptp); -+ -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l3 ptbl and try again\n"); -+ -+ /* free off the level 3 page table, and try again */ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ /* Someone has allocated the ptbl before us */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ goto retryl1; -+ } -+ -+ ASSERT (PTBL_IS_LOCKED (l2ptbl->ptbl_flags)); -+ -+ /* Install the L3 ptbl into the L2 one */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(l3ptbl) | ELAN3_ET_PTP; -+ l2ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ /* unordered unlock - lock l2ptbl, lock l3ptbl, unlock l2ptbl */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l3flags); /* free with the l3flags to keep irq ordering */ -+ -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ /* Level 3 ptbl is already locked, so just return the pte */ -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l2flags; /* return l2flags to keep irq ordering */ -+ -+ return (l3pte); -+ -+ default: -+ panic ("elan3mmu_ptealloc: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptealloc: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ l3base = ELAN3_L3_BASE(addr); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptealloc: l3ptbl %p 3pte %lx l3base %x (%x)\n", -+ l3ptbl, l3pte, l3base, l3ptbl->ptbl_base); -+ -+ if (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags) == LK_PTBL_OK) -+ { -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l3flags; -+ -+ return (l3pte); -+ } -+ -+ /* got all the way down here, but its been nicked before we could lock it */ -+ /* so try all over again */ -+ goto retryl1; -+} -+ -+void -+elan3mmu_l1inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l1ptbl, int attr) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_PTP tl1ptp; -+ sdramaddr_t l1ptp; -+ E3_Addr addr; -+ spinlock_t *l2lock; -+ ELAN3_PTBL *l2ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ int i; -+ int ret; -+ unsigned long flags; -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl); -+ -+ HAT_PRINTF2 (1, "elan3mmu_l1inval: l1ptbl %p l1ptp %lx\n", l1ptbl, l1ptp); -+ -+ for (i = 0, addr = 0; i < ELAN3_L1_ENTRIES; i++, l1ptp += ELAN3_PTP_SIZE) -+ { -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ idx = (PTP_TO_PT_PADDR(tl1ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: l1ptbl %p : lXptbl %p idx %d\n", -+ l1ptbl, lXptbl, idx); -+ -+ /* invalidate the L1 pte. */ -+ elan3_writeptp (dev, l1ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ l1ptbl->ptbl_valid--; -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ break; -+ -+ case ELAN3_ET_PTP: -+ HAT_PRINTF5 (2, "elan3mmu_l1inval: l1ptbl %p : ptp %lx (%x) addr %x (%d)\n", -+ l1ptbl, l1ptp, tl1ptp, addr, i); -+ -+ /* invalidate the L1 ptp. */ -+ elan3_writeptp (dev, l1ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ /* invalidate the level 2 page table */ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ ret = elan3mmu_l2inval (elan3mmu, l2ptbl, attr | PTE_UNLOAD_NOFLUSH, addr, &l2lock, &flags); -+ -+ ASSERT ((l2ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ -+ if (ret == LK_PTBL_OK) -+ { -+ if (((l2ptbl->ptbl_flags & PTBL_KEEP) == 0) && l2ptbl->ptbl_valid == 0) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_l1inval: free l2ptbl %p\n", l2ptbl); -+ -+ l1ptbl->ptbl_valid--; -+ elan3mmu_free_l2ptbl (elan3mmu->elan3mmu_dev, l2ptbl, l2lock, flags); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ } -+ else -+ { -+ /* need to keep this page table, so even though its now empty, */ -+ /* chain it back in */ -+ HAT_PRINTF1 (2, "elan3mmu_l1inval: keep l2ptbl %p\n", l2ptbl); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, flags); -+ } -+ } -+ else -+ { -+ l1ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ } -+ break; -+ -+ case ELAN3_ET_INVALID: -+ break; -+ -+ default: -+ panic ("elan3mmu_l1inval: found invalid entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ if (l1ptbl->ptbl_valid == 0) -+ break; -+ -+ addr += ELAN3_L1_SIZE; -+ } -+} -+ -+int -+elan3mmu_l2inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l2ptbl, int attr, E3_Addr addr, spinlock_t **pl2lock, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_PTP tl2ptp; -+ sdramaddr_t l2ptp; -+ spinlock_t *l3lock; -+ unsigned long l3flags; -+ ELAN3_PTBL *l3ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ int i; -+ int ret; -+ -+ HAT_PRINTF2 (1, "elan3mmu_l2inval: l2ptbl %p addr %x\n", l2ptbl, addr); -+ -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_parent->ptbl_flags) == PTBL_LEVEL_1); -+ -+ ret = elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, pl2lock, flags); -+ -+ ASSERT (ret == LK_PTBL_OK); -+ ASSERT (l2ptbl->ptbl_elan3mmu == elan3mmu); -+ ASSERT (l2ptbl->ptbl_parent->ptbl_elan3mmu == elan3mmu); -+ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl); -+ -+ for (i = 0; i < ELAN3_L2_ENTRIES; i++, l2ptp += ELAN3_PTP_SIZE) -+ { -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ idx = (PTP_TO_PT_PADDR(tl2ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: l2ptbl %p : lXptbl %p idx %d\n", -+ l2ptbl, lXptbl, idx); -+ -+ /* invalidate the L2 pte. */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ l2ptbl->ptbl_valid--; -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ break; -+ -+ case ELAN3_ET_PTP: -+ HAT_PRINTF5 (2, "elan3mmu_l2inval: l2ptbl %p : ptp %lx (%x) addr %x (%d)\n", -+ l2ptbl, l2ptp, tl2ptp, addr, i); -+ -+ /* invalidate the L2 ptp. */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ /* unload the level 3 page table */ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ ret = elan3mmu_l3inval (elan3mmu, l3ptbl, attr | PTE_UNLOAD_NOFLUSH, addr, &l3lock, &l3flags); -+ -+ if (ret == LK_PTBL_OK) -+ { -+ if ((l3ptbl->ptbl_flags & PTBL_KEEP) == 0 && l3ptbl->ptbl_valid == 0) -+ { -+ /* decrement the valid count of the level 2 page table, and */ -+ /* free off the level 3 page table */ -+ HAT_PRINTF1 (2, "elan3mmu_l2inval: free l3ptbl %p\n", l3ptbl); -+ -+ l2ptbl->ptbl_valid--; -+ elan3mmu_free_l3ptbl (elan3mmu->elan3mmu_dev, l3ptbl, l3lock, l3flags); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ } -+ else -+ { -+ /* need to keep this page table, so even though its now empty, */ -+ /* chain it back in */ -+ HAT_PRINTF1 (2, "elan3mmu_l2inval: keep l3ptbl %p\n", l3ptbl); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ } -+ } -+ else -+ { -+ l2ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ } -+ break; -+ -+ case ELAN3_ET_INVALID: -+ break; -+ -+ default: -+ panic ("elan3mmu_l2inval: found pte in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ if (l2ptbl->ptbl_valid == 0) -+ break; -+ -+ addr += ELAN3_L2_SIZE; -+ } -+ -+ ASSERT (PTBL_IS_LOCKED(l2ptbl->ptbl_flags)); -+ -+ return (ret); -+} -+ -+int -+elan3mmu_l3inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l3ptbl, int attr, E3_Addr addr, spinlock_t **pl3lock, unsigned long *flags) -+{ -+ int ret; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l3inval: l3ptbl %p parent %p addr %x\n", l3ptbl, l3ptbl->ptbl_parent, addr); -+ -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_parent->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_parent->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (l3ptbl->ptbl_parent->ptbl_elan3mmu == elan3mmu); -+ ASSERT (l3ptbl->ptbl_parent->ptbl_base == VA2BASE (ELAN3_L2_BASE(addr))); -+ -+ ret = elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, pl3lock, flags); -+ -+ ASSERT (ret == LK_PTBL_OK); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ elan3mmu_unload_loop (elan3mmu, l3ptbl, 0, ELAN3_L3_ENTRIES, attr); -+ -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_flags)); -+ -+ return (ret); -+ } -+ -+int -+elan3mmu_lock_this_ptbl (ELAN3_PTBL *ptbl, int flag, spinlock_t **plock, unsigned long *flags) -+{ -+ int level = PTBL_LEVEL (ptbl->ptbl_flags); -+ spinlock_t *lock = elan3mmu_ptbl_to_lock (level, ptbl); -+ -+ local_irq_save (*flags); -+ -+ if ((flag & LK_PTBL_NOWAIT) == 0) -+ spin_lock (lock); -+ else if (! spin_trylock (lock)) { -+ local_irq_restore (*flags); -+ return (LK_PTBL_FAILED); -+ } -+ -+ if (level != PTBL_LEVEL (ptbl->ptbl_flags)) -+ { -+ spin_unlock (lock); -+ local_irq_restore (*flags); -+ return (LK_PTBL_MISMATCH); -+ } -+ -+ ptbl->ptbl_flags |= PTBL_LOCKED; -+ *plock = lock; -+ return (LK_PTBL_OK); -+} -+ -+int -+elan3mmu_lock_ptbl (ELAN3_PTBL *ptbl, u_int flag, ELAN3MMU *elan3mmu, E3_Addr va, int level, spinlock_t **plock, unsigned long *flags) -+{ -+ spinlock_t *lock = elan3mmu_ptbl_to_lock (level, ptbl); -+ int res = LK_PTBL_MISMATCH; -+ -+ local_irq_save (*flags); -+ -+ if ((flag & LK_PTBL_NOWAIT) == 0) -+ spin_lock (lock); -+ else if (spin_trylock (lock) == 0) { -+ local_irq_restore(*flags); -+ return (LK_PTBL_FAILED); -+ } -+ -+ if (PTBL_LEVEL (ptbl->ptbl_flags) != level) -+ { -+ res = LK_PTBL_MISMATCH; -+ goto mismatch; -+ } -+ -+ /* We have the right mutex, so check that its the ptbl we want. */ -+ switch (level) -+ { -+ case PTBL_LEVEL_1: va = ELAN3_L1_BASE(va); break; -+ case PTBL_LEVEL_2: va = ELAN3_L2_BASE(va); break; -+ case PTBL_LEVEL_3: va = ELAN3_L3_BASE(va); break; -+ } -+ -+ if (ptbl->ptbl_elan3mmu != elan3mmu || ptbl->ptbl_base != VA2BASE(va)) -+ { -+ res = LK_PTBL_MISMATCH; -+ goto mismatch; -+ } -+ -+ ASSERT ((ptbl->ptbl_flags & PTBL_LOCKED) == 0); -+ ptbl->ptbl_flags |= PTBL_LOCKED; -+ -+ *plock = lock; -+ return (LK_PTBL_OK); -+ -+mismatch: -+ if (! (flag & LK_PTBL_FAILOK)) -+ panic ("elan3mmu: failed to lock ptbl\n"); -+ -+ spin_unlock (lock); -+ local_irq_restore(*flags); -+ return (res); -+} -+ -+void -+elan3mmu_unlock_ptbl (ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ptbl->ptbl_flags &= ~PTBL_LOCKED; -+ spin_unlock_irqrestore (lock,flags); -+} -+ -+static spinlock_t * -+elan3mmu_ptbl_to_lock (int level, ELAN3_PTBL *ptbl) -+{ -+ switch (level) -+ { -+ case PTBL_LEVEL_3: return (&l3ptbl_lock[L3PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_2: return (&l2ptbl_lock[L2PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_1: return (&l1ptbl_lock[L1PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_X: -+ panic ("elan3mmu: ptbl_to_lock, bad level X"); -+ default: -+ panic ("elan3mmu: ptbl_to_lock, bad level"); -+ /* NOTREACHED */ -+ } -+ return (NULL); -+} -+ -+void -+elan3mmu_display (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ sdramaddr_t l1ptp; -+ spinlock_t *l1lock; -+ ELAN3_PTE tl1pte; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTE tl2pte; -+ spinlock_t *l2lock; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ ELAN3_PTE tl3pte; -+ spinlock_t *l3lock; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ unsigned long flags; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ -+ if (l1ptbl == NULL) -+ return; -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l1ptbl %p l1ptp %lx l1base %x : tl1ptp %x\n", l1ptbl, l1ptp, l1base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: level 1 page table for pte %x\n", tl1ptp); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ idx = (PTP_TO_PT_PADDR(tl1ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lXptbl %p idx %d\n",lXptbl, idx); -+ -+ tl1pte = elan3_readpte (dev,(PTBL_TO_PTADDR (lXptbl) + idx * ELAN3_PTE_SIZE)); -+ -+ switch (elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lvl 1 l1pte matches value %llx\n", (long long) tl1pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l1 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : lvl 1 ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l1ptbl, l1ptbl->ptbl_flags, l1ptbl->ptbl_elan3mmu, l1ptbl->ptbl_base, elan3mmu, addr, (long long)tl1pte); -+ -+ break; -+ default: -+ panic ("elan3mmu_display: lvl 1 elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ return; -+ -+ case ELAN3_ET_INVALID: -+ return; -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_display: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l2ptbl %p l2ptp %lx l2base %x : tl2ptp %x\n", -+ l2ptbl, l2ptp, l2base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: level 2 page table for pte %x\n", tl2ptp); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ idx = (PTP_TO_PT_PADDR(tl2ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lXptbl %p idx %d\n",lXptbl, idx); -+ -+ tl2pte = elan3_readpte (dev,(PTBL_TO_PTADDR (lXptbl) + idx * ELAN3_PTE_SIZE)); -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lvl 2 l1pte matches value %llx\n", (long long)tl2pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : lvl 2 ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr, (long long) tl2pte); -+ -+ break; -+ default: -+ panic ("elan3mmu_display: lvl 2 elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ return; -+ -+ case ELAN3_ET_INVALID: -+ return; -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_display: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l3ptbl %p l3pte %lx\n",l3ptbl, l3pte); -+ -+ tl3pte = elan3_readpte (dev, l3pte); -+ switch (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l3pte matches value %llx\n", (long long) tl3pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l3 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_elan3mmu, l3ptbl->ptbl_base, elan3mmu, addr, (long long) tl3pte); -+ -+ break; -+ -+ default: -+ panic ("elan3mmu_display: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/elan3mmu_linux.c linux-2.6.9/drivers/net/qsnet/elan3/elan3mmu_linux.c ---- clean/drivers/net/qsnet/elan3/elan3mmu_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/elan3mmu_linux.c 2004-12-14 05:19:38.000000000 -0500 -@@ -0,0 +1,284 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3mmu_linux.c,v 1.53 2004/12/14 10:19:38 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/vm/elan3mmu_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * Strategy for syncing main <-> elan pte's: -+ * -+ * Install callbacks for linux flush_tlb_page(), flush_tlb_range(), -+ * flush_tlb_all(), and flush_tlb_mm() so when a main PTE changes, -+ * the elan translations, if any, are invalidated. They can then be -+ * faulted in again with the correct physical page, perms, etc., on demand. -+ * -+ * Callbacks are stacked on the mm_struct, one per context. We also stack -+ * a ctxt pointer so we don't have to do lookups on every call. -+ * -+ * Sanity check -- we clearly want to flush the elan PTEs in these -+ * situations, all of which are covered by tlb_flush_{page,range}() -+ * -+ * 1) kernel/vmscan.c::try_to_swap_out() swaps out a page -+ * -+ * 2) kernel/mremap.c::copy_one_pte() moves a page as a result of the -+ * mremap system call -+ * -+ * 3) kernel/mprotect.c::change_pte_range() changes the permissions of a -+ * page as the result of the mprotect system call -+ * -+ * Other Notes: -+ * -+ * Dirty a page in the mains page tables when it is faulted into the elan. -+ * This way it will not be thrown away by the swapper. -+ * -+ * Pages write protected for COW are copied by elan3mmu_main_pagefault() -+ * when a writeable translation is loaded into the elan. -+ */ -+ -+caddr_t elan3mmu_kernel_invalid_space; -+ELAN3_PTE elan3mmu_kernel_invalid_pte_val; -+ -+void -+elan3mmu_init_osdep (void) -+{ -+ pte_t *pte; -+ -+ KMEM_GETPAGES (elan3mmu_kernel_invalid_space, caddr_t, 1, TRUE); -+ -+ ASSERT(elan3mmu_kernel_invalid_space != NULL); -+ -+ pte = find_pte_kernel ((unsigned long) elan3mmu_kernel_invalid_space); -+ -+ elan3mmu_kernel_invalid_pte_val = ELAN3_PTE_64_BIT | (pte_phys(*pte) & ELAN3_PTE_PFN_MASK) | ELAN3_PERM_REMOTEREAD | ELAN3_ET_PTE; -+ -+#ifdef __alpha -+ /* -+ * NOTE: Elan sign-extends bit 48 of the physical address, so if we need to -+ * set any of bits 63:48, then we will set them all by setting bit 48/ -+ */ -+ if (alpha_mv.pci_dac_offset & 0xFFFF000000000000ull) -+ elan3mmu_kernel_invalid_pte_val |= (1ull << 48); -+ else -+ elan3mmu_kernel_invalid_pte_val |= alpha_mv.pci_dac_offset; -+#endif -+ -+ HAT_PRINTF(0x10, "elan3mmu_invalid_space at %p phys=%llx pte=%llx\n", elan3mmu_kernel_invalid_space, -+ (unsigned long long) pte_phys(*pte), (unsigned long long) elan3mmu_kernel_invalid_pte_val); -+} -+ -+void -+elan3mmu_fini_osdep() -+{ -+ KMEM_FREEPAGES (elan3mmu_kernel_invalid_space, 1); -+} -+ -+void -+elan3mmu_alloc_osdep (ELAN3MMU *elan3mmu) -+{ -+ elan3mmu->elan3mmu_coproc_mm = current->mm; -+} -+ -+/* -+ * Convert physical page frame number to elan pte. -+ */ -+ELAN3_PTE -+elan3mmu_phys_to_pte (ELAN3_DEV *dev, physaddr_t paddr, int perm) -+{ -+ ELAN3_PTE newpte; -+ -+ ASSERT (paddr != 0); -+ -+ if ((paddr & dev->SdramPhysMask) == dev->SdramPhysBase) /* SDRAM, turn on PTE_LOCAL bit */ -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx SDRAM\n", (unsigned long long) paddr); -+ -+ newpte = ELAN3_PTE_LOCAL | (paddr & ELAN3_PTE_PFN_MASK & ~dev->SdramPhysMask) | perm | ELAN3_ET_PTE; -+ } -+#if defined(LINUX_ALPHA) -+ else if ((paddr & dev->PciPhysMask) == dev->PciPhysBase) -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx PCI\n", (unsigned long long) paddr); -+ newpte = ELAN3_PTE_64_BIT | (paddr & ELAN3_PTE_PFN_MASK & ~dev->PciPhysMask) | perm | ELAN3_ET_PTE; -+ } -+#endif -+ else /* main memory, must convert to PCI view */ -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx is main memory\n", (unsigned long long) paddr); -+ -+ /* main memory, just set the architecture specific PTE_BYPASS bit */ -+ /* This requires the Tsunami chipset being programmed to support -+ * the monster window option. This is in linux-2.4.5 and later kernels -+ * and is also patched into the RH 7.1/2.4.3-12 Alpha kernel -+ */ -+ newpte = ELAN3_PTE_64_BIT | (paddr & ELAN3_PTE_PFN_MASK) | perm | ELAN3_ET_PTE; -+ -+#ifdef __alpha -+ /* -+ * NOTE: Elan sign-extends bit 48 of the physical address, so if we need to -+ * set any of bits 63:48, then we will set them all by setting bit 48/ -+ */ -+ if (alpha_mv.pci_dac_offset & 0xFFFF000000000000ull) -+ newpte |= (1ull << 48); -+ else -+ newpte |= alpha_mv.pci_dac_offset; -+#endif -+ } -+ -+ if ( ELAN3_PERM_WRITEABLE( perm )) -+ newpte |= ( ELAN3_PTE_MOD | ELAN3_PTE_REF ); -+ else -+ newpte |= ( ELAN3_PTE_REF ) ; -+ -+ return (newpte); -+} -+ -+ELAN3_PTE -+elan3mmu_kernel_invalid_pte (ELAN3MMU *elan3mmu) -+{ -+ if (elan3mmu->elan3mmu_dev->Devinfo.dev_revision_id == PCI_REVISION_ID_ELAN3_REVB) -+ return (elan3mmu_kernel_invalid_pte_val); -+ return (ELAN3_INVALID_PTE); -+} -+ -+/* -+ * Invalidate a range of addresses for specified context. -+ */ -+void -+elan3mmu_pte_range_unload (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len) -+{ -+ E3_Addr eaddr; -+ ELAN3MMU_RGN *rgn; -+ unsigned long span; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (; len; len -= span, addr += span) -+ { -+ rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < addr) -+ span = len; -+ else if (rgn->rgn_mbase > addr) -+ span = MIN(len, rgn->rgn_mbase - addr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - addr); -+ eaddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ HAT_PRINTF(0x10, " unloading eaddr %x main %p (%ld pages)\n", -+ eaddr, addr, btopr(span)); -+ elan3mmu_unload (elan3mmu, eaddr, span, PTE_UNLOAD); -+ } /* takes care of elan tlb flush also */ -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/* -+ * -+ */ -+void -+elan3mmu_update_range (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t vaddr, E3_Addr eaddr, u_int len, u_int perm) -+{ -+ u_int roperm = ELAN3_PERM_READONLY(perm & ELAN3_PTE_PERM_MASK) | (perm & ~ELAN3_PTE_PERM_MASK); -+ u_int off; -+ -+ HAT_PRINTF3(1, "elan3mmu_update_range (elan3mmu %p addr %p -> %p)\n", elan3mmu, vaddr, vaddr+len-1); -+ -+ while (len > 0) -+ { -+ pte_t *pte_ptr; -+ pte_t pte_value; -+ -+ pte_ptr = find_pte_map(mm, (unsigned long)vaddr); -+ if (pte_ptr) { -+ pte_value = *pte_ptr; -+ pte_unmap(pte_ptr); -+ } -+ -+ HAT_PRINTF(0x10, " elan3mmu_update_range %x (%p) %s\n", eaddr, vaddr, -+ !pte_ptr ? "invalid" : pte_none(pte_value) ? "none " : !pte_present(pte_value) ? "swapped " : -+ !pte_write(pte_value) ? "RO/COW" : "OK"); -+ -+ if (pte_ptr && !pte_none(pte_value) && pte_present(pte_value)) -+ for (off = 0; off < PAGE_SIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, eaddr + off, pte_phys(pte_value) + off, pte_write(pte_value) ? perm : roperm, PTE_LOAD|PTE_NO_SLEEP|PTE_NO_STEAL); -+ vaddr += PAGESIZE; -+ eaddr += PAGESIZE; -+ len -= PAGESIZE; -+ } -+} -+ -+/* -+ * Update a range of addresses for specified context. -+ */ -+void -+elan3mmu_pte_range_update (ELAN3MMU *elan3mmu, struct mm_struct *mm,caddr_t vaddr, unsigned long len) -+{ -+ E3_Addr eaddr; -+ ELAN3MMU_RGN *rgn; -+ unsigned long span; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (; len; len -= span, vaddr += span) -+ { -+ rgn = elan3mmu_findrgn_main (elan3mmu, vaddr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < vaddr) -+ span = len; -+ else if (rgn->rgn_mbase > vaddr) -+ span = MIN(len, rgn->rgn_mbase - vaddr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - vaddr); -+ eaddr = rgn->rgn_ebase + (vaddr - rgn->rgn_mbase); -+ -+ HAT_PRINTF(0x10, " updating eaddr %u main %p (%ld pages)\n", -+ eaddr, vaddr, btopr(span)); -+ -+ elan3mmu_update_range(elan3mmu, mm, vaddr, eaddr, span, rgn->rgn_perm); -+ } -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/* -+ * Invalidate all ptes for the given context. -+ */ -+void -+elan3mmu_pte_ctxt_unload(ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *l1ptbl = (elan3mmu ? elan3mmu->elan3mmu_l1ptbl : NULL); -+ spinlock_t *l1mtx; -+ unsigned long flags; -+ -+ if (l1ptbl && elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, (E3_Addr) 0, 1, &l1mtx, &flags) == LK_PTBL_OK) -+ { -+ elan3mmu_l1inval(elan3mmu, elan3mmu->elan3mmu_l1ptbl, 0); -+ elan3mmu_unlock_ptbl (l1ptbl, l1mtx, flags); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/elan3ops.c linux-2.6.9/drivers/net/qsnet/elan3/elan3ops.c ---- clean/drivers/net/qsnet/elan3/elan3ops.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/elan3ops.c 2003-09-24 09:57:25.000000000 -0400 -@@ -0,0 +1,170 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3ops.c,v 1.4 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elan3ops.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+extern ELAN_STATS_OPS elan3_device_stats_ops; -+ -+ELAN_DEV_OPS elan3_dev_ops = { -+ -+ get_position, -+ set_position, -+ -+ ELAN_DEV_OPS_VERSION -+}; -+ -+ELAN_STATS_OPS elan3_device_stats_ops = { -+ ELAN_STATS_OPS_VERSION, -+ -+ stats_get_index_name, -+ stats_get_block, -+ stats_clear_block -+}; -+ -+static char *elan3_device_stats_names[ELAN3_NUM_STATS] = -+{ -+ "version field", /* not cleared */ -+ "elan interrupts", -+ "tlb flushes", -+ "traps with invalid context", -+ "interrupts com queue half full", -+ "cproc traps", -+ "dproc traps", -+ "tproc traps", -+ "iproc traps", -+ "event interrupts", -+ "elan page faults", -+ "EopBadAcks", -+ "EopResets", -+ "InputterBadLength", -+ "InputterCRCDiscards", -+ "InputterCRCErrors", -+ "InputterCRCBad", -+ "errors in dma data", -+ "errors after dma identify", -+ "errors after thread identify", -+ "dma retries", -+ "dma output timeouts", -+ "dma packet ack errors", -+ "forced tproc traps", -+ "too many instruction traps", -+ "output timeouts", -+ "packet ack errors", -+ "LockError", -+ "DeskewError", -+ "PhaseError", -+ "DataError", -+ "FifoOvFlow0", -+ "FifoOvFlow1", -+ "link error value on data error", -+ "correctable ecc errors", -+ "uncorrectable ecc errors", -+ "multiple ecc errors", -+ "sdram bytes free", /* not cleared */ -+ "longest interrupt in ticks", -+ "punts of event int's to thread", -+ "reschedules of event int's thread" -+}; -+ -+int -+stats_get_index_name (void *arg, uint index, caddr_t name) -+{ -+ copyout (elan3_device_stats_names[index], name, strlen (elan3_device_stats_names[index]) + 1 /* with \0 */); -+ -+ return (0); -+} -+ -+int -+stats_get_block (void *arg, uint entries, ulong *value) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ -+ if ( entries > ELAN3_NUM_STATS ) /* if space too big only send valid portion */ -+ entries = ELAN3_NUM_STATS; -+ -+ copyout(&dev->Stats, value, sizeof(ulong) * entries); -+ -+ return (0); -+} -+ -+int -+stats_clear_block (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ u_long *ptr = (u_long *) &dev->Stats; -+ int n; -+ -+ for (n = 0; n < ELAN3_NUM_STATS; n++) -+ { -+ switch (n) -+ { -+ case offsetof (ELAN3_STATS, Version)/sizeof(u_long): -+ case offsetof (ELAN3_STATS, SdramBytesFree)/sizeof(u_long): -+ break; -+ default: -+ ptr[n] = (ulong)0; -+ } -+ } -+ return (0); -+} -+ -+int -+get_position (void *user_data, ELAN_POSITION *position) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *)user_data; -+ -+ copyout(&dev->Position, position, sizeof(ELAN_POSITION)); -+ -+ return (0); -+} -+ -+int -+set_position (void *user_data, unsigned short nodeId, unsigned short numNodes) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *)user_data; -+ -+ if (ComputePosition (&dev->Position, nodeId, numNodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ return (EINVAL); -+ -+ return (0); -+} -+ -+int -+elan3_register_dev_stats(ELAN3_DEV * dev) -+{ -+ char name[ELAN_STATS_NAME_MAX_LEN+1]; -+ -+ sprintf (name, ELAN3_STATS_DEV_FMT, dev->Instance); -+ -+ elan_stats_register(&dev->StatsIndex, -+ name, -+ sizeof (elan3_device_stats_names)/sizeof (elan3_device_stats_names[0]), -+ &elan3_device_stats_ops, -+ (void *)dev); -+ -+ return (0); -+} -+ -+void -+elan3_deregister_dev_stats(ELAN3_DEV * dev) -+{ -+ elan_stats_deregister(dev->StatsIndex); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/elandebug.c linux-2.6.9/drivers/net/qsnet/elan3/elandebug.c ---- clean/drivers/net/qsnet/elan3/elandebug.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/elandebug.c 2003-09-24 09:57:25.000000000 -0400 -@@ -0,0 +1,151 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elandebug.c,v 1.25 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandebug.c,v $*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+void -+elan3_debugf (void *p, unsigned int mode, char *fmt,...) -+{ -+ char prefix[128]; -+ -+#if defined (DIGITAL_UNIX) -+#define PREFIX_FMT "[%lx.%08x]" -+#define PREFIX_VAL (int)CURTHREAD() -+#else -+#define PREFIX_FMT "[%lx.%04d]" -+#define PREFIX_VAL (current->pid) -+#endif -+ -+ if ((unsigned long) p > DBG_NTYPES) -+ { -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) p; -+ -+ if (elan3_debug_display_ctxt && (ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK) != elan3_debug_display_ctxt) -+ return; -+ if (elan3_debug_ignore_ctxt && (ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK) == elan3_debug_ignore_ctxt) -+ return; -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ sprintf (prefix, PREFIX_FMT " (XXX) ", lbolt, PREFIX_VAL); -+ else -+ sprintf (prefix, PREFIX_FMT " (%03x) ", lbolt, PREFIX_VAL, -+ ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK); -+ } -+ else -+ { -+ char *what; -+ -+ if (elan3_debug_ignore_dev & (1 << ((unsigned long) p))) -+ return; -+ -+ switch ((unsigned long) p) -+ { -+ case (int) DBG_DEVICE: what = "dev"; break; -+ case (int) DBG_KCOMM: what = "kcm"; break; -+ case (int) DBG_ICS: what = "ics"; break; -+ case (int) DBG_USER: what = "usr"; break; -+ default: what = NULL; break; -+ } -+ -+ if (what) -+ sprintf (prefix, PREFIX_FMT " [%s] ", lbolt, PREFIX_VAL, what); -+ else -+ sprintf (prefix, PREFIX_FMT " [%3d] ", lbolt, PREFIX_VAL, (int)(long)what); -+ } -+ -+ { -+ va_list ap; -+ -+ va_start (ap, fmt); -+ qsnet_vdebugf ((((mode & elan3_debug_buffer)?QSNET_DEBUG_BUFFER:0)|((mode & elan3_debug_console)?QSNET_DEBUG_CONSOLE:0)) , prefix, fmt, ap); -+ va_end (ap); -+ } -+} -+ -+ -+void -+elan3_alloc_panicstate (ELAN3_DEV *dev, int allocsdram) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr == NULL) -+ KMEM_ZALLOC (dev->PanicState.RegPtr, E3_Regs *, sizeof (E3_Regs), 1); -+ -+ if (allocsdram) -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->PanicState.Sdram[bank] == NULL && dev->SdramBanks[bank].Size) -+ KMEM_ZALLOC (dev->PanicState.Sdram[bank], char *, dev->SdramBanks[bank].Size, 1); -+} -+ -+void -+elan3_free_panicstate (ELAN3_DEV *dev) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr != NULL) -+ KMEM_FREE (dev->PanicState.RegPtr, sizeof (E3_Regs)); -+ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->PanicState.Sdram[bank] != NULL && dev->SdramBanks[bank].Size) -+ KMEM_FREE (dev->PanicState.Sdram[bank], dev->SdramBanks[bank].Size); -+ -+ bzero (&dev->PanicState, sizeof (dev->PanicState)); -+} -+ -+void -+elan3_save_panicstate (ELAN3_DEV *dev) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr) -+ { -+ printk ("elan%d: saving state on panic .....\n", dev->Devinfo.dev_instance); -+ -+ bcopy ((void *) dev->RegPtr, (void *) dev->PanicState.RegPtr, sizeof (E3_Regs)); -+ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->SdramBanks[bank].Size && dev->PanicState.Sdram[bank]) -+ elan3_sdram_copyq_from_sdram (dev, (bank << ELAN3_SDRAM_BANK_SHIFT), dev->PanicState.Sdram[bank], dev->SdramBanks[bank].Size); -+ -+ } -+} -+ -+int -+elan3_assfail (ELAN3_DEV *dev, char *string, char *file, int line) -+{ -+ if (panicstr) -+ return (0); -+ -+ printk ("elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+ -+#if defined(LINUX) -+ elan3_save_panicstate (dev); -+ -+ panic ("elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+#else -+ cmn_err (CE_PANIC, "elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+#endif -+ /*NOTREACHED*/ -+ return (0); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/elandev_generic.c linux-2.6.9/drivers/net/qsnet/elan3/elandev_generic.c ---- clean/drivers/net/qsnet/elan3/elandev_generic.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/elandev_generic.c 2005-07-20 07:35:36.000000000 -0400 -@@ -0,0 +1,1867 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elandev_generic.c,v 1.115.2.2 2005/07/20 11:35:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandev_generic.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * Module globals, configurable from system file. -+ */ -+u_int elan3_debug = 0; -+u_int elan3_debug_console = 0; -+u_int elan3_debug_buffer = -1; -+u_int elan3_debug_ignore_dev = 0; -+u_int elan3_debug_ignore_kcomm = 0; -+u_int elan3_debug_ignore_ctxt = 0; -+u_int elan3_debug_display_ctxt = 0; -+ -+int eventint_punt_loops; -+int eventint_punt_ticks; -+int eventint_resched_ticks; -+ -+static void InitialiseDmaBuffers (ELAN3_DEV *dev, ioaddr_t CmdPort); -+static int ProbeSdram (ELAN3_DEV *dev); -+static void InitialiseSdram (ELAN3_DEV *dev); -+static void ReEnableErrorInterrupts (void *arg); -+void PollForDmaHungup (void *arg); -+static void elan3_event_interrupt (ELAN3_DEV *dev); -+ -+/* -+ * BaseAddr is ptr to the start of a table aligned on a power of two byte address. -+ * SizePower must be in the range of 6 to 12. It defines the number of valid contexts as -+ * shown below. -+ * -+ * SizePower Valid Contexts Table size in bytes. -+ * 6 64 1k -+ * 7 128 2k -+ * 8 256 4K -+ * 9 512 8k -+ * 10 1024 16k -+ * 11 2048 32k -+ * 12 4096 64k -+ */ -+#define GEN_CONTEXT_PTR(BaseAddr, SizePower) (((E3_uint32) BaseAddr) | \ -+ (~((1 << ((SizePower) - 6)) - 1) & 0x3f)) -+ -+int -+InitialiseElan (ELAN3_DEV *dev, ioaddr_t CmdPort) -+{ -+ E3_IprocTrapHeader_BE TrapCleanup[4]; -+ E3_ContextControlBlock ContextControlBlock; -+ sdramaddr_t ptr; -+ int res; -+ int i; -+ -+ eventint_punt_loops = 100; -+ eventint_punt_ticks = (hz/100); -+ eventint_resched_ticks = (hz/4); -+ -+ dev->Stats.Version = ELAN3_STATS_VERSION; -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* -+ * The elan should have already been reset, so the interrupt mask -+ * should be 0 and the schedule status register should be set to -+ * its initial state -+ */ -+ ASSERT (dev->InterruptMask == 0); -+ ASSERT ((read_reg32 (dev, Exts.SchCntReg) & HaltStopAndExtTestMask) == Sched_Initial_Value); -+ -+ /* -+ * Write any value here to clear out the half full and error bits of the command -+ * overflow queues. -+ */ -+ write_reg32 (dev, ComQueueStatus, 0); -+ -+ /* Initialise the cache tags before touching the SDRAM */ -+ /* we initialise them to "map" the bottom of SDRAM */ -+ for (i = 0; i < E3_NumCacheLines; i++) -+ { -+ write_cache_tag (dev, Tags[i][0].Value, 0x0000000000000000ULL); -+ write_cache_tag (dev, Tags[i][1].Value, 0x0000080000000000ULL); -+ write_cache_tag (dev, Tags[i][2].Value, 0x0000100000000000ULL); -+ write_cache_tag (dev, Tags[i][3].Value, 0x0000180000000000ULL); -+ } -+ -+#ifndef CONFIG_MPSAS -+ for (i = 0; i < E3_NumCacheLines*(E3_CACHELINE_SIZE/sizeof(E3_uint64)); i++) -+ { -+ write_cache_set (dev, Set0[i], 0xcac1ecac1ecac1e0ULL); -+ write_cache_set (dev, Set1[i], 0xcac1ecac1ecac1e1ULL); -+ write_cache_set (dev, Set2[i], 0xcac1ecac1ecac1e2ULL); -+ write_cache_set (dev, Set3[i], 0xcac1ecac1ecac1e3ULL); -+ } -+#endif -+ -+ if ((res = ProbeSdram(dev)) != ESUCCESS) -+ return (res); -+ -+ /* Enable all cache sets before initialising the sdram allocators */ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, (dev->Cache_Control_Reg |= CONT_EN_ALL_SETS)); -+ -+ InitialiseSdram (dev); -+ -+ dev->TAndQBase = elan3_sdram_alloc (dev, ELAN3_TANDQ_SIZE); -+ dev->ContextTable = elan3_sdram_alloc (dev, ELAN3_CONTEXT_SIZE); -+ dev->ContextTableSize = ELAN3_NUM_CONTEXTS; -+ dev->CommandPortTraps[0] = elan3_sdram_alloc (dev, ELAN3_COMMAND_TRAP_SIZE); -+ dev->CommandPortTraps[1] = elan3_sdram_alloc (dev, ELAN3_COMMAND_TRAP_SIZE); -+ dev->CurrentCommandPortTrap = 0; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "InitialiseElan: ContextTable %08lx TAndQ %08lx CommandPortTrap %08lx\n", -+ dev->ContextTable, dev->TAndQBase, dev->CommandPortTraps[0]); -+ -+ /* Allocate the thread amd dma trap areas */ -+ KMEM_ZALLOC (dev->ThreadTrap, THREAD_TRAP *, sizeof (THREAD_TRAP), TRUE); -+ KMEM_ZALLOC (dev->DmaTrap, DMA_TRAP *, sizeof (DMA_TRAP), TRUE); -+ -+ /* Allocate the ctxt table */ -+ KMEM_ZALLOC (dev->CtxtTable, ELAN3_CTXT **, dev->ContextTableSize * sizeof ( ELAN3_CTXT *), TRUE); -+ -+ /* Initialise halt queue list */ -+ dev->HaltOperationsTailpp = &dev->HaltOperations; -+ -+ /* From elan3/code/harness/elanstuff.c */ -+ /* Init the clock. */ -+ write_ureg64 (dev, Clock.NanoSecClock, 0); -+ -+ /* Init the instruction count reg. */ -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ -+ /* Init the stats control reg. Must be done before the count regs.*/ -+ write_ureg32 (dev, StatCont.StatsControl, 0); -+ -+ /* Init the stats count regs. */ -+ write_ureg32 (dev, StatCounts[0].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[1].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[2].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[3].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[4].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[5].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[6].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[7].s.StatsCount, 0); -+ -+ /* -+ * Initialise the Context_Ptr and Fault_Base_Ptr -+ */ -+ write_reg32 (dev, Fault_Base_Ptr, dev->TAndQBase + offsetof(E3_TrapAndQueue, IProcSysCntx)); -+ write_reg32 (dev, Context_Ptr, GEN_CONTEXT_PTR (dev->ContextTable, ELAN3_LN2_NUM_CONTEXTS)); -+ -+ /* scrub the TProc Registers */ -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Globals[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Outs[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Locals[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Ins[i], 0xdeadbabe); -+ -+ /* -+ * Initialise the Queue pointers. Arrange them so that the starting positions are -+ * farthest apart in one set of the cache. Thus 512 bytes apart, but with cntx0 -+ * thread the same as the interrupt queue. -+ */ -+ write_reg32 (dev, TProc_NonSysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0xc0])); -+ write_reg32 (dev, TProc_NonSysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0xc0])); -+ write_reg32 (dev, TProc_SysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0x80])); -+ write_reg32 (dev, TProc_SysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0x80])); -+ -+ write_reg32 (dev, DProc_NonSysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0])); -+ write_reg32 (dev, DProc_NonSysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0])); -+ write_reg32 (dev, DProc_SysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0x10])); -+ write_reg32 (dev, DProc_SysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0x10])); -+ -+ dev->Event_Int_Queue_FPtr = dev->TAndQBase + offsetof (E3_TrapAndQueue, EventIntQueue[0x80]); -+ write_reg32 (dev, Event_Int_Queue_FPtr, dev->Event_Int_Queue_FPtr); -+ write_reg32 (dev, Event_Int_Queue_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, EventIntQueue[0x80])); -+ -+ -+ /* Initialise Input_Trap_Base to last 8 Kbytes of trap area, uCode adds the right offset */ -+ write_reg32 (dev, Input_Trap_Base, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0])); -+ -+ /* Ptr to word used to save the SP to when a thread deschedules */ -+ write_reg32 (dev, Thread_SP_Save_Ptr, dev->TAndQBase + offsetof (E3_TrapAndQueue, Thread_SP_Save)); -+ -+ /* Initialise the command trap base */ -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[0]); -+ -+ /* Initialise the set event tracing registers */ -+ write_reg32 (dev, Event_Trace_Ptr, 0); -+ write_reg32 (dev, Event_Trace_Mask, 0); -+ -+ /* Initialise Tlb_Line_Value to zero. The TLB cannot be read while either the */ -+ /* uCode or thread proc might be running. Must be set to 0. */ -+ write_reg64 (dev, Tlb_Line_Value, 0); -+ -+ /* Control register. Cache everything, Enable MMU, RefreshRate=3, CasLatency=1, StartSDR */ -+ dev->Cache_Control_Reg |= CONT_MMU_ENABLE | CONT_EN_ALL_SETS | CONT_CACHE_ALL | CONT_ENABLE_ECC; -+ -+#if ELAN3_PAGE_SHIFT == 13 -+ dev->Cache_Control_Reg |= CONT_ENABLE_8K_PAGES; -+#endif -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg); -+ -+ /* -+ * Initialise the context table to be discard for all contexts -+ */ -+ ContextControlBlock.rootPTP = 0; -+ ContextControlBlock.filter = E3_CCB_DISCARD_ALL; -+ ContextControlBlock.VPT_mask = 0; -+ ContextControlBlock.VPT_ptr = 0; -+ -+ for (i = 0, ptr = dev->ContextTable; i < ELAN3_NUM_CONTEXTS; i++, ptr += sizeof (E3_ContextControlBlock)) -+ elan3_sdram_copyl_to_sdram (dev, &ContextControlBlock, ptr, sizeof (E3_ContextControlBlock)); -+ -+ /* From elan3/code/trap_handler/init.c */ -+ /* -+ * Initialise the Trap And Queue area in Elan SDRAM. -+ */ -+ TrapCleanup[0].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[0].s.TrAddr = 0; -+ TrapCleanup[0].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[0].s.TrData0 = 0; -+ TrapCleanup[1].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[1].s.TrAddr = 0; -+ TrapCleanup[1].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[1].s.TrData0 = 0; -+ TrapCleanup[2].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[2].s.TrAddr = 0; -+ TrapCleanup[2].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[2].s.TrData0 = 0; -+ TrapCleanup[3].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[3].s.TrAddr = 0; -+ TrapCleanup[3].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[3].s.TrData0 = 0; -+ -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx.s.FaultContext), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx.s.FSR.Status), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx.s.FaultContext), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx.s.FSR.Status), 0); -+ -+ /* Must now zero all the FSRs so that a subsequent Fault can be seen */ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), 16); -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), 64); -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), 16); -+ -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrHead[0]), 64); -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrHead[0]), 64); -+ -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrHead[0]), 64); -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrHead[0]), 64); -+ -+ InitialiseDmaBuffers(dev, CmdPort); -+ -+ /* reserve a halt operation for flushing the context filter */ -+ ReserveHaltOperations (dev, 1, TRUE); -+ -+ /* Allow the Thread/Dma to run */ -+ CLEAR_SCHED_STATUS (dev, HaltThread | HaltDmas); -+ -+ /* Enable All Interrrupts */ -+ SET_INT_MASK (dev, (INT_PciMemErr | INT_SDRamInt | INT_EventInterrupt | INT_LinkError | INT_ComQueue | -+ INT_TProc | INT_CProc | INT_DProc | INT_IProcCh1NonSysCntx | -+ INT_IProcCh1SysCntx | INT_IProcCh0NonSysCntx | INT_IProcCh0SysCntx)); -+ -+ /* Take the link out of boundary scan */ -+ SET_SCHED_LINK_VALUE (dev, 0, 0); -+ -+ /* And clear any link errors */ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ /* XXXX: clear discard context 0, AFTER setting up the kernel comms */ -+ CLEAR_SCHED_STATUS (dev, DiscardSysCntxIn | DiscardNonSysCntxIn); -+ -+ /* Start a thread to handle excessive Event Interrrupts */ -+ if (kernel_thread_create (elan3_event_interrupt, (caddr_t) dev) == NULL) -+ { -+ panic ("InitialiseElan: cannot start elan3_event_interrupt\n"); -+ return (EFAIL); -+ } -+ dev->EventInterruptThreadStarted = 1; -+ -+ ReserveHaltOperations (dev, 1, TRUE); -+ -+ PollForDmaHungup (dev); -+ -+#if defined(IOPROC_PATCH_APPLIED) && !defined(NO_PTRACK) -+ /* Register the device and stats with elanmod for RMS -+ * but only if we've got the coproc patch applied */ -+ dev->DeviceIdx = elan_dev_register(&dev->Devinfo, &elan3_dev_ops, (void *) dev); -+ -+ elan3_register_dev_stats(dev); -+#endif -+ -+ return (ESUCCESS); -+} -+ -+static void -+InitialiseDmaBuffers(ELAN3_DEV *dev, ioaddr_t CmdPort) -+{ -+ register int i; -+ -+ /* GNAT sw-elan3/3908: -+ * Clear down the power on state of the Dma_Desc registers to make sure we don't -+ * try and interpret them when a trap happens. -+ */ -+ write_reg32 (dev, Dma_Desc.dma_type, 0); -+ write_reg32 (dev, Dma_Desc.dma_size, 0); -+ write_reg32 (dev, Dma_Desc.dma_source, 0); -+ write_reg32 (dev, Dma_Desc.dma_dest, 0); -+ write_reg32 (dev, Dma_Desc.dma_destEvent, 0); -+ write_reg32 (dev, Dma_Desc.dma_destCookieVProc, 0); -+ write_reg32 (dev, Dma_Desc.dma_srcEvent, 0); -+ write_reg32 (dev, Dma_Desc.dma_srcCookieVProc, 0); -+ -+ /* -+ * The following is a sequence of writes to remove X's from the dma buffers and -+ * registers. It is only safe to write these registers after reset and before any -+ * dma's have been issued. The chip will NOT function corectly if they are written at -+ * any other time or in a different order. -+ */ -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.ResetAckNLdBytesToWr, ((u_longlong_t)0x1000) << 32); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdBytesToRd, ((u_longlong_t)0x100) << 32); -+ -+ for (i=0;i<(4*8);i++) -+ write_reg64 (dev, Dma_Alignment_Port[0], 0); -+ -+ /* -+ * This is used to clear out X's from some of the trap registers. This is required to -+ * prevent the first traps from possibly writting X's into the SDram and upsetting the -+ * ECC value. It requires that the trap save area registers have been set up but does -+ * not require any translations to be ready. -+ */ -+ writel (-1, (void *)(CmdPort + offsetof (E3_CommandPort, SetEvent))); -+ while ((read_reg32 (dev, Exts.InterruptReg) & INT_CProc) == 0) -+ { -+ mb(); -+ DELAY (1); -+ } -+ -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[dev->CurrentCommandPortTrap]); -+ -+ PULSE_SCHED_STATUS(dev, RestartCProc); -+} -+ -+void -+FinaliseElan (ELAN3_DEV *dev) -+{ -+ ELAN3_PTBL_GR *ptg; -+ ELAN3_HALTOP *op; -+ ELAN3_HALTOP *chain = NULL; -+ int bank; -+ int indx; -+ int size; -+ unsigned long flags; -+ int level; -+ -+#if defined(IOPROC_PATCH_APPLIED) && !defined(NO_PTRACK) -+ elan_stats_deregister (dev->StatsIndex); -+ elan_dev_deregister(&dev->Devinfo); -+#endif -+ -+ /* Cancel the dma poller */ -+ cancel_timer_fn (&dev->DmaPollTimeoutId); -+ -+ /* release it's halt operation */ -+ ReleaseHaltOperations (dev, 1); -+ -+ /* stop all kernel threads */ -+ dev->ThreadsShouldStop = 1; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ while (dev->EventInterruptThreadStarted && !dev->EventInterruptThreadStopped) -+ { -+ kcondvar_wakeupall (&dev->IntrWait, &dev->IntrLock); -+ kcondvar_wait (&dev->IntrWait, &dev->IntrLock, &flags); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ /* Set the interrupt mask to 0 and the schedule control register to run nothing */ -+ SET_INT_MASK (dev, 0); -+ SET_SCHED_STATUS (dev, DiscardNonSysCntxIn | DiscardSysCntxIn | HaltThread | HaltDmas); -+ -+ /* Cancel any link error timeout */ -+ if (timer_fn_queued(&dev->ErrorTimeoutId)) -+ cancel_timer_fn (&dev->ErrorTimeoutId); -+ -+ /* Free of and page tables that have been allocated */ -+ spin_lock (&dev->PtblGroupLock); -+ for(level=0; level<4; level++) -+ { -+ while ((ptg = dev->Level[level].PtblGroupList) != NULL) -+ { -+ dev->Level[level].PtblGroupList = ptg->pg_next; -+ -+ elan3_sdram_free (dev, ptg->pg_addr, PTBL_GROUP_SIZE); -+ FREE_PTBL_GR(ptg); -+ } -+ } -+ -+ spin_unlock (&dev->PtblGroupLock); -+ -+ /* Free of all halt operations */ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ while ((op = dev->FreeHaltOperations) != NULL) -+ { -+ dev->FreeHaltOperations = op->Next; -+ -+ /* Keep a list of 'freed' ops for later KMEM_FREE call */ -+ op->Next = chain; -+ chain = op; -+ } -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ /* Have now dropped the spinlock - can call KMEM_FREE */ -+ while ((op = chain) != NULL) -+ { -+ chain = op->Next; -+ -+ KMEM_FREE (op, sizeof (ELAN3_HALTOP)); -+ } -+ -+ /* Free of the ctxt table */ -+ KMEM_FREE (dev->CtxtTable, dev->ContextTableSize * sizeof (ELAN3_CTXT *)); -+ -+ /* Free of the thread and dma atrap areas */ -+ KMEM_FREE (dev->ThreadTrap, sizeof (THREAD_TRAP)); -+ KMEM_FREE (dev->DmaTrap, sizeof (DMA_TRAP)); -+ -+ /* Free of the memsegs and pages */ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ { -+ if (dev->SdramBanks[bank].Size) -+ { -+ UnmapDeviceRegister (dev, &dev->SdramBanks[bank].Handle); -+ -+ KMEM_FREE (dev->SdramBanks[bank].PtblGroups, sizeof (ELAN3_PTBL_GR *) * (dev->SdramBanks[bank].Size / PTBL_GROUP_SIZE)); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= dev->SdramBanks[bank].Size; indx++, size <<= 1) -+ KMEM_FREE (dev->SdramBanks[bank].Bitmaps[indx], sizeof (bitmap_t)*BT_BITOUL(dev->SdramBanks[bank].Size/size)); -+ } -+ } -+ elan3_sdram_fini (dev); -+} -+ -+#define INIT_PATTERN(offset) (0xBEEC000000000011ull | ((u_longlong_t)(offset)) << 16) -+#define FREE_PATTERN(offset) (0xBEEC000000000022ull | ((u_longlong_t)(offset)) << 16) -+ -+static int -+ProbeSdram (ELAN3_DEV *dev) -+{ -+ int Instance; -+ u_int Bank; -+ int MemSpaceSize; -+ int BankMaxSize; -+ int BankOffset; -+ int BankSize; -+ ioaddr_t BankBase; -+ ioaddr_t PageBase; -+ ioaddr_t PageBase1; -+ ioaddr_t PageBase2; -+ DeviceMappingHandle BankHandle; -+ DeviceMappingHandle PageHandle; -+ DeviceMappingHandle PageHandle1; -+ DeviceMappingHandle PageHandle2; -+ register int i; -+ u_longlong_t value; -+ extern int sdram_bank_limit; -+ -+ /* NOTE: The Cache control register is set to only enable cache set 0 */ -+ /* and has ECC disabled */ -+ Instance = dev->Instance; -+ -+ /* Determine the size of the SDRAM from the BAR register */ -+ if (DeviceRegisterSize (dev, ELAN3_BAR_SDRAM, &MemSpaceSize) != ESUCCESS) -+ { -+ printk ("elan%d: cannot determine SDRAM size\n", Instance); -+ return (EFAIL); -+ } -+ -+ elan3_sdram_init (dev); -+ -+ BankMaxSize = MemSpaceSize / ELAN3_SDRAM_NUM_BANKS; -+ -+ for (Bank = 0; Bank < ELAN3_SDRAM_NUM_BANKS; Bank++) -+ { -+ BankOffset = Bank * BankMaxSize; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "elan%d: Probing RAM Bank %d (max size %08x)\n", Instance, Bank, BankMaxSize); -+ -+ /* Probe the memory bank by mapping two pages that are the size of the cache apart */ -+ /* this guarantees that when we store the second pattern we displace the first pattern */ -+ /* from the cache, also store the second pattern again the size of the cache up again */ -+ /* to ensure that the SDRAM wires don't stay floating at pattern1 */ -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &BankBase, BankOffset, PAGESIZE, &BankHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ continue; -+ } -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase1, BankOffset + ELAN3_MAX_CACHE_SIZE, PAGESIZE, &PageHandle1) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ UnmapDeviceRegister (dev, &BankHandle); -+ continue; -+ } -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase2, BankOffset + 2*ELAN3_MAX_CACHE_SIZE, PAGESIZE, &PageHandle2) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ continue; -+ } -+ -+#define PATTERN0 (0x5555555555555555ull) -+#define PATTERN1 (0xAAAAAAAAAAAAAAAAull) -+ writeq (PATTERN0, (u_longlong_t *) BankBase); -+ writeq (PATTERN1, (u_longlong_t *) PageBase1); -+ writeq (PATTERN1, (u_longlong_t *) PageBase2); -+ -+ mmiob(); -+ -+ value = readq ((u_longlong_t *) BankBase); -+ -+ if (value != PATTERN0) -+ { -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ continue; -+ } -+ -+ writeq (PATTERN1, (u_longlong_t *) BankBase); -+ writeq (PATTERN0, (u_longlong_t *) PageBase1); -+ writeq (PATTERN0, (u_longlong_t *) PageBase2); -+ -+ mmiob(); -+ -+ value = readq ((u_longlong_t *) BankBase); -+ if (value != PATTERN1) -+ { -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ continue; -+ } -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ -+ /* Bank is present, so work out its size, we store tha maximum size at the base */ -+ /* and then store the address at each address on every power of two address until */ -+ /* we reach the minimum mappable size (PAGESIZE), we then read back the value at the */ -+ /* base to determine the bank size */ -+ writeq ((u_longlong_t) BankMaxSize, (u_longlong_t *) BankBase); -+ -+ for (BankSize = (BankMaxSize>>1); BankSize > PAGESIZE; BankSize >>= 1) -+ { -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase, BankOffset + BankSize, PAGESIZE, &PageHandle) == ESUCCESS) -+ { -+ writeq (BankSize, (u_longlong_t *) PageBase); -+ UnmapDeviceRegister (dev, &PageHandle); -+ } -+ } -+ mmiob(); -+ -+ BankSize = (u_long) readq ((u_longlong_t *) BankBase); -+ -+ if (sdram_bank_limit == 0 || BankSize <= (sdram_bank_limit * 1024 * 1024)) -+ printk ("elan%d: memory bank %d is %dK\n", Instance, Bank, BankSize / 1024); -+ else -+ { -+ BankSize = (sdram_bank_limit * 1024 * 1024); -+ printk ("elan%d: limit memory bank %d to %dK\n", Instance, Bank, BankSize / 1024); -+ } -+ -+ UnmapDeviceRegister (dev, &BankHandle); -+ -+ /* Now map all of this bank into the kernel */ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &BankBase, BankOffset, BankSize, &BankHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot initialise memory bank %d\n", Instance, Bank); -+ continue; -+ } -+ -+ dev->SdramBanks[Bank].Size = BankSize; -+ dev->SdramBanks[Bank].Mapping = BankBase; -+ dev->SdramBanks[Bank].Handle = BankHandle; -+ -+#ifndef CONFIG_MPSAS -+ /* Initialise it for ECC */ -+ preemptable_start { -+ for (i = 0; i < BankSize; i += 8) -+ { -+ elan3_sdram_writeq (dev, (Bank << ELAN3_SDRAM_BANK_SHIFT) | i, INIT_PATTERN(BankOffset+i)); -+ -+ preemptable_check(); -+ } -+ } preemptable_end; -+#endif -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+InitialiseSdram (ELAN3_DEV *dev) -+{ -+ int indx, size, b; -+ -+ for (b = 0; b < ELAN3_SDRAM_NUM_BANKS; b++) -+ { -+ ELAN3_SDRAM_BANK *bank = &dev->SdramBanks[b]; -+ -+ if (bank->Size == 0) -+ continue; -+ -+ /* allocate a ptbl group pointer for each possible ptbl group in this bank */ -+ KMEM_ZALLOC (bank->PtblGroups, ELAN3_PTBL_GR **, sizeof (ELAN3_PTBL_GR *) * bank->Size/PTBL_GROUP_SIZE, TRUE); -+ -+ /* allocate the buddy allocator bitmaps */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->Size; indx++, size <<= 1) -+ KMEM_ZALLOC (bank->Bitmaps[indx], bitmap_t *, sizeof (bitmap_t)*BT_BITOUL(bank->Size/size), TRUE); -+ -+ /* and add it to the sdram buddy allocator */ -+ elan3_sdram_add (dev, (b << ELAN3_SDRAM_BANK_SHIFT), (b << ELAN3_SDRAM_BANK_SHIFT) + bank->Size); -+ } -+} -+ -+#include -+ -+int -+ReadVitalProductData (ELAN3_DEV *dev, int *CasLatency) -+{ -+ DeviceMappingHandle RomHandle; -+ unsigned char *RomBase; -+ unsigned char *PCIDataPtr; -+ unsigned char *VPDPtr; -+ unsigned char *lim; -+ int type; -+ int i, len, len2; -+ char name[3] = "XX"; -+ char value[256]; -+ int finished = 0; -+ -+ -+ /* default valud for CAS latency is 3 */ -+ (*CasLatency) = CAS_LATENCY_3; -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_EBUS, (ioaddr_t *) &RomBase, ELAN3_EBUS_ROM_OFFSET, ELAN3_EBUS_ROM_SIZE, &RomHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot map ROM\n", dev->Instance); -+ return (EFAIL); -+ } -+ -+ /* Check the ROM signature */ -+ if (RomBase[0] != 0x55 || RomBase[1] != 0xAA) -+ { -+ printk ("elan%d: Invalid ROM signature %02x %02x\n", dev->Instance, RomBase[0], RomBase[1]); -+ return (ESUCCESS); -+ } -+ -+ PCIDataPtr = RomBase + ((RomBase[0x19] << 8) | RomBase[0x18]); -+ -+ /* check the pci data structure */ -+ if (PCIDataPtr[0] != 'P' || PCIDataPtr[1] != 'C' || PCIDataPtr[2] != 'I' || PCIDataPtr[3] != 'R') -+ { -+ printk ("elan%d: Invalid PCI Data structure\n", dev->Instance); -+ return (ESUCCESS); -+ } -+ -+ /* Extract the VPD pointer */ -+ VPDPtr = RomBase + ((PCIDataPtr[9] << 8) | PCIDataPtr[8]); -+ -+ if (VPDPtr == RomBase) -+ { -+ printk ("elan%d: No Vital Product Data\n", dev->Instance); -+ return (ESUCCESS); -+ } -+ -+ while (! finished) -+ { -+ type = *VPDPtr++; -+ -+ if (type & LARGE_RESOURCE_BIT) -+ { -+ len = *(VPDPtr++); -+ len += *(VPDPtr++) << 8; -+ -+ switch (type & ~LARGE_RESOURCE_BIT) -+ { -+ case LARGE_RESOURCE_STRING: -+ printk ("elan%d: ", dev->Instance); -+ for (i = 0; i < len; i++) -+ printk ("%c", *VPDPtr++); -+ printk ("\n"); -+ break; -+ -+ case LARGE_RESOURCE_VENDOR_DEFINED: -+ VPDPtr += len; -+ break; -+ -+ case LARGE_RESOURCE_VITAL_PRODUCT_DATA: -+ for (lim = VPDPtr + len; VPDPtr < lim; ) -+ { -+ name[0] = *VPDPtr++; -+ name[1] = *VPDPtr++; -+ len2 = *VPDPtr++; -+ -+ for (i = 0; i < len2 && VPDPtr < lim; i++) -+ value[i] = *VPDPtr++; -+ value[i] = '\0'; -+ -+ if (! strcmp (name, "SN")) -+ printk ("elan%d: Serial Number - %s\n", dev->Instance, value); -+ -+ if (! strcmp (name, "Z0")) -+ (*CasLatency) = (strcmp (value, "CAS_LATENCY_2") ? CAS_LATENCY_3 : CAS_LATENCY_2); -+ } -+ break; -+ -+ default: -+ printk ("elan%d: unknown large resource %x\n", dev->Instance, type); -+ finished = 1; -+ break; -+ } -+ } -+ else -+ { -+ len = type & 0x7; -+ -+ switch (type >> 3) -+ { -+ case SMALL_RESOURCE_COMPATIBLE_DEVICE_ID: -+ VPDPtr += len; -+ break; -+ -+ case SMALL_RESOURCE_VENDOR_DEFINED: -+ VPDPtr += len; -+ break; -+ -+ case SMALL_RESOURCE_END_TAG: -+ finished = 1; -+ break; -+ -+ default: -+ printk ("elan%d: unknown small resource %x\n", dev->Instance, type >> 3); -+ finished = 1; -+ break; -+ } -+ } -+ } -+ -+ UnmapDeviceRegister (dev, &RomHandle); -+ return (ESUCCESS); -+} -+ -+void -+ElanSetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset, ELAN3_PTBL_GR *ptg) -+{ -+ int bank = offset >> ELAN3_SDRAM_BANK_SHIFT; -+ -+ dev->SdramBanks[bank].PtblGroups[(offset & (ELAN3_SDRAM_BANK_SIZE-1)) / PTBL_GROUP_SIZE] = ptg; -+} -+ -+ELAN3_PTBL_GR * -+ElanGetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset) -+{ -+ int bank = offset >> ELAN3_SDRAM_BANK_SHIFT; -+ -+ return (dev->SdramBanks[bank].PtblGroups[(offset & (ELAN3_SDRAM_BANK_SIZE-1)) / PTBL_GROUP_SIZE]); -+} -+ -+void -+ElanFlushTlb (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ BumpStat (dev, TlbFlushes); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | MMU_FLUSH); -+ mmiob(); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ while (! (read_reg32 (dev, Cache_Control_Reg.ContReg) & MMU_FLUSHED)) -+ mb(); -+} -+ -+void -+KillNegativeDma (ELAN3_DEV *dev, void *arg) -+{ -+ DMA_TRAP *trap = dev->DmaTrap; -+ E3_Status_Reg status; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (read_reg32 (dev, Exts.InterruptReg) & INT_DProcHalted); -+ -+ /* Initialise the trap to deliver to the offending user process */ -+ trap->Status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ trap->PacketInfo.Value = 0; -+ -+ bzero (&trap->FaultSave, sizeof (trap->FaultSave)); -+ bzero (&trap->Data0, sizeof (trap->Data0)); -+ bzero (&trap->Data1, sizeof (trap->Data1)); -+ bzero (&trap->Data2, sizeof (trap->Data2)); -+ bzero (&trap->Data3, sizeof (trap->Data3)); -+ -+ /* run down the kernel dma run queue and panic on a -ve length dma */ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &trap->Desc, sizeof (E3_DMA_BE)); -+ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ panic ("KillNegativeDma: -ve sized kernel dma\n"); -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ /* run down the user dma run queue and "remove" and -ve length dma's */ -+ FPtr = read_reg32 (dev, DProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[E3_NonSysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &trap->Desc, sizeof (E3_DMA_BE)); -+ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ { -+ PRINTF3 (NULL, DBG_INTR, "KillNegativeDma: remove dma - context %d size %d SuspendAddr %x\n", -+ trap->Desc.s.dma_u.s.Context, trap->Desc.s.dma_size, trap->Status.s.SuspendAddr); -+ -+ trap->Status.s.TrapType = trap->Status.s.SuspendAddr; -+ trap->Status.s.Context = trap->Desc.s.dma_u.s.Context; -+ -+ DeliverDProcTrap (dev, trap, 0); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ trap->Desc.s.dma_type = 0; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = (E3_Addr) 0; -+ trap->Desc.s.dma_srcEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_srcCookieVProc = (E3_Addr) 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &trap->Desc, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ -+ if (status.s.SuspendAddr == MI_DequeueNonSysCntxDma || -+ status.s.SuspendAddr == MI_DequeueSysCntxDma || -+ status.s.SuspendAddr == MI_DmaLoop) -+ { -+ PRINTF0 (NULL, DBG_INTR, "KillNegativeDma: unlock dma processor\n"); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ mmiob(); -+ -+ DELAY (10); -+ -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ mmiob(); -+ } -+ -+ PRINTF0 (NULL, DBG_INTR, "KillNegativeDma: dma processor restarted\n"); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 1); -+} -+ -+void -+ForceTProcTrap (ELAN3_DEV *dev, void *arg) -+{ -+ printk ("elan%d: forced tproc trap .....\n", dev->Instance); -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 1); -+} -+ -+void -+PollForDmaHungup (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ unsigned long flags; -+ E3_Status_Reg status; -+ E3_uint32 insn1, insn3; -+ register int i; -+ -+ if (read_reg32 (dev, Dma_Desc.dma_size) > E3_MAX_DMA_SIZE) -+ { -+ status.Status = read_reg32 (dev, Exts.DProcStatus); -+ -+ PRINTF2 (NULL, DBG_INTR, "PollForDmaHungup: size %x SuspendAddr %x\n", read_reg32 (dev, Dma_Desc.dma_size), status.s.SuspendAddr); -+ -+ if (status.s.SuspendAddr == MI_DequeueNonSysCntxDma || -+ status.s.SuspendAddr == MI_DequeueSysCntxDma || -+ status.s.SuspendAddr == MI_DmaLoop) -+ { -+ printk ("elan%d: PollForDmaHungup: size %x context %d SuspendAddr %x\n", -+ dev->Instance, read_reg32 (dev, Dma_Desc.dma_size), -+ status.s.Context, status.s.SuspendAddr); -+ -+ PRINTF2 (NULL, DBG_INTR, "PollForDmaHungup: dma_size %x status %x\n", -+ read_reg32 (dev, Dma_Desc.dma_size), status.Status); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted, KillNegativeDma, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return; -+ } -+ } -+ -+ status.Status = read_reg32 (dev, Exts.TProcStatus); -+ if (status.s.WakeupFunction == WakeupStopped) -+ { -+ E3_uint32 PC = read_reg32 (dev, ExecutePC); -+ -+ /* See if it's likely that the thread is really "stuck" on a waitevent/break -+ * instruction ......... */ -+ for (i = 0; i < 10; i++) -+ { -+ status.Status = read_reg32 (dev, Exts.TProcStatus); -+ insn1 = read_reg32 (dev, IBufferReg[1]); -+ insn3 = read_reg32 (dev, IBufferReg[3]); -+ -+ if (! (status.s.WakeupFunction == WakeupStopped && read_reg32 (dev, ExecutePC) == PC && /* stopping and it could be a break/waitevent */ -+ (insn1 == 0x81a00000 || insn3 == 0x81a00000 || /* break instruction */ -+ insn1 == 0x81b00000 || insn3 == 0x81b00000))) /* waitevent instruction */ -+ break; -+ } -+ -+ if (i == 10) -+ { -+ printk ("elan%d: forcing tproc trap from %s instruction at pc %x\n", dev->Instance, -+ (insn1 == 0x81a00000 || insn3 == 0x81a00000) ? "break" : "waitevent", PC); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_TProcHalted, ForceTProcTrap, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ } -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 10); -+} -+ -+/*=======================================================================================*/ -+/* -+ * Interrupt handler. -+ */ -+static void -+ReEnableErrorInterrupts (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "ReEnableErrorInterrupts: IntMask=%x\n", read_reg32 (dev, Exts.InterruptMask)); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+CheckForExcessiveErrorRate (ELAN3_DEV *dev) -+{ -+ if (dev->ErrorTime == (lbolt/hz)) -+ { -+ if (dev->ErrorsPerTick++ > 100) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "CheckForExcessiveErrorRate: too many links errors, disabling interrupt\n"); -+ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ schedule_timer_fn (&dev->ErrorTimeoutId, ReEnableErrorInterrupts, (void *) dev, hz); -+ } -+ } -+ else -+ { -+ dev->ErrorTime = (lbolt/hz); -+ dev->ErrorsPerTick = 0; -+ } -+} -+/*=======================================================================================*/ -+/* -+ * Interrupt handler. -+ */ -+static void -+HandlePciMemErr (ELAN3_DEV *dev) -+{ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandlePciMemErr : masking out interrupt\n"); -+ -+ ElanBusError (dev); -+ panic ("elan pci memory error\n"); -+} -+ -+static void -+HandleSDRamInterrupt (ELAN3_DEV *dev) -+{ -+ E3_uint32 EccStatus0 = read_reg32 (dev, ECC_STATUS0); -+ E3_uint32 EccStatus1 = read_reg32 (dev, ECC_STATUS1); -+ unsigned long flags; -+ -+ PRINTF5 (DBG_DEVICE, DBG_INTR, "elan: ECC error - Addr=%x UE=%x CE=%x ME=%x Syn=%x\n", -+ EccStatus0 & ECC_ADDR_MASK, EccStatus0 & ECC_UE_MASK, -+ EccStatus0 & ECC_CE_MASK, EccStatus0 & ECC_ME_MASK, -+ EccStatus1 & ECC_SYN_MASK); -+ -+ if (EccStatus0 & (ECC_UE_MASK|ECC_CE_MASK)) -+ { -+ printk ("elan%d: ECC memory error (Address=%08x Syndrome=%02x %s%s%s)\n", -+ dev->Instance, -+ (EccStatus0 & ECC_ADDR_MASK), (EccStatus1 & ECC_SYN_MASK), -+ (EccStatus0 & ECC_UE_MASK) ? "Uncorrectable " : "", -+ (EccStatus0 & ECC_CE_MASK) ? "Correctable " : "", -+ (EccStatus0 & ECC_ME_MASK) ? "Multiple Errors " : ""); -+ } -+ -+ if (EccStatus0 & ECC_UE_MASK) -+ panic ("elan: Uncorrectable ECC memory error"); -+ if (EccStatus0 & ECC_CE_MASK) -+ BumpStat (dev, CorrectableErrors); -+ if (EccStatus0 & ECC_ME_MASK) -+ BumpStat (dev, MultipleErrors); -+ -+ /* -+ * Clear the interrupt and reset the error flags. -+ * Note. Might loose an UE or CE if it occurs between reading the status and -+ * clearing the interrupt. I don't think this matters very much as the -+ * status reg will only be used to identify a bad simm. -+ */ -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | CLEAR_SDRAM_ERROR); -+ mmiob(); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ CheckForExcessiveErrorRate (dev); -+} -+ -+static int -+HandleEventInterrupt (ELAN3_DEV *dev, int nticks, unsigned long *flags) -+{ -+ E3_uint32 Fptr = dev->Event_Int_Queue_FPtr; -+ E3_uint32 Bptr = read_reg32 (dev, Event_Int_Queue_BPtr); /* PCI read */ -+ long tlim = lbolt + nticks; -+ long count = 0; -+ ELAN3_CTXT *ctxt; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ ASSERT ((dev->InterruptMask & INT_EventInterrupt) == 0); -+ -+ while (Fptr != Bptr) -+ { -+ while (Fptr != Bptr) -+ { -+ E3_EventInt_BE EvInt; -+ E3_uint32 Context; -+ -+ /* If we're running in the interrupt handler and have seen a high -+ * rate of event interrupts then punt to the thread - however on -+ * Linux the elan interrupt handler can block the timer interrupt, -+ * and so lbolt (jiffies) is not incremented, hence we punt after -+ a number of loops instead */ -+#if defined(LINUX) -+ if (in_interrupt() && ++count > eventint_punt_loops) -+ return (EAGAIN); -+#endif -+ -+ if (nticks && ((int) (lbolt - tlim)) > 0) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Fptr %x Bptr %x punting to thread\n", Fptr, Bptr); -+ return (EAGAIN); -+ } -+ -+ elan3_sdram_copyq_from_sdram (dev, Fptr, (void *) &EvInt, 8); /* PCI read */ -+ -+ /* The context number is held in the top 16 bits of the EventContext */ -+ Context = (EvInt.s.EventContext >> 16) & MAX_ROOT_CONTEXT_MASK; -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Context %d : Cookie %x\n", Context, EvInt.s.IntCookie); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, Context); -+ -+ /* Work out new fptr, and store it in the device, since we'll be dropping the IntrLock */ -+ Fptr = E3_EVENT_INTQ_NEXT(Fptr); -+ dev->Event_Int_Queue_FPtr = Fptr; -+ -+ if (ctxt == NULL) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Fptr %x Bptr %x context %d invalid\n", -+ Fptr, Bptr, Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ BumpStat (dev, EventInterrupts); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ QueueEventInterrupt (ctxt, EvInt.s.IntCookie); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ -+ /* Re-read the FPtr, since we've dropped the IntrLock */ -+ Fptr = dev->Event_Int_Queue_FPtr; -+ -+ /* Store the new FPtr to the elan, this also clears the interrupt. */ -+ write_reg32 (dev, Event_Int_Queue_FPtr, Fptr); /* PCI write */ -+ -+ mmiob(); -+ } -+ -+ mb(); -+ Bptr = read_reg32 (dev, Event_Int_Queue_BPtr); /* PCI read */ -+ } -+ -+ return (ESUCCESS); -+} -+ -+int -+SetLinkBoundaryScan (ELAN3_DEV *dev) -+{ -+ int res = ESUCCESS; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ res = EAGAIN; -+ else -+ { -+ PRINTF0 (DBG_DEVICE, DBG_BSCAN, "SetLinkBoundaryScan: setting link into boundary scan mode\n"); -+ -+ /* -+ * We're going to set the link into boundary scan mode, so firstly -+ * set the inputters to discard everything. -+ */ -+ if (dev->DiscardAllCount++ == 0) -+ SetSchedStatusRegister (dev, read_reg32 (dev, Exts.InterruptReg), NULL); -+ -+ /* -+ * Now disable the error interrupts -+ */ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And set the link into boundary scan mode, and drive -+ * a reset token onto the link. -+ */ -+ SET_SCHED_LINK_VALUE (dev, 1, LinkResetToken); -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+ClearLinkBoundaryScan (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_BSCAN, "ClearLinkBoundaryScan: taking link out of boundary scan mode\n"); -+ -+ /* -+ * Take the link out of boundary scan -+ */ -+ SET_SCHED_LINK_VALUE (dev, 0, 0); -+ -+ /* -+ * Clear any link errors. -+ */ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ /* -+ * Re-enable the error interrupts. -+ */ -+ if (! timer_fn_queued(&dev->ErrorTimeoutId)) -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And stop the inputter from discarding all packets. -+ */ -+ if (--dev->DiscardAllCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+WriteBoundaryScanValue (ELAN3_DEV *dev, int value) -+{ -+ int res = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "WriteBoundaryScanValue: driving value 0x%x onto link\n", value); -+ SET_SCHED_LINK_VALUE (dev, 1, value); -+ -+ res = read_reg32 (dev, Exts.LinkState); -+ -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "WriteBoundaryScanValue: return 0x%x\n", res); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+int -+ReadBoundaryScanValue(ELAN3_DEV *dev, int link) -+{ -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "ReadBoundaryScanValue: set linkval 0x%x\n", link); -+ SET_SCHED_LINK_VALUE (dev, 0, link); -+ } -+ res = read_reg32 (dev, Exts.LinkState); -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "ReadBoundaryScanValue: return 0x%x\n", res); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+static int -+ReadLinkVal (ELAN3_DEV *dev, int link) -+{ -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ SET_SCHED_LINK_VALUE (dev, 0, link); -+ -+ return (read_reg32 (dev, Exts.LinkState)); -+} -+ -+static void -+HandleLinkError (ELAN3_DEV *dev) -+{ -+ E3_uint32 value = read_reg32 (dev, Exts.LinkErrorTypes); -+ -+ PRINTF1 (DBG_DEVICE, DBG_LINKERR, "HandleLinkError: LinkErrorTypes %08x - clearing\n", value); -+ -+ if (value & LS_LockError) BumpStat (dev, LockError); -+ if (value & LS_DeskewError) BumpStat (dev, DeskewError); -+ if (value & LS_PhaseError) BumpStat (dev, PhaseError); -+ if (value & LS_DataError) BumpStat (dev, DataError); -+ if (value & LS_FifoOvFlow0) BumpStat (dev, FifoOvFlow0); -+ if (value & LS_FifoOvFlow1) BumpStat (dev, FifoOvFlow1); -+ -+ if (value & LS_DataError) -+ dev->Stats.LinkErrorValue = ReadLinkVal (dev, 12) | (ReadLinkVal (dev, 13) << 9); -+ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ CheckForExcessiveErrorRate (dev); -+} -+ -+static void -+HandleErrorInterrupt (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ if (Pend & INT_PciMemErr) -+ HandlePciMemErr (dev); -+ -+ if (Pend & INT_SDRamInt) -+ HandleSDRamInterrupt (dev); -+ -+ if (Pend & INT_LinkError) -+ HandleLinkError (dev); -+} -+ -+static void -+HandleAnyIProcTraps (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ E3_uint32 RestartBits = 0; -+ -+ if (Pend & INT_IProcCh0SysCntx) -+ { -+ HandleIProcTrap (dev, 0, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrData[0])); -+ -+ RestartBits |= RestartCh0SysCntx; -+ } -+ -+ if (Pend & INT_IProcCh1SysCntx) -+ { -+ HandleIProcTrap (dev, 1, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrData[0])); -+ -+ RestartBits |= RestartCh1SysCntx; -+ } -+ -+ if (Pend & INT_IProcCh0NonSysCntx) -+ { -+ HandleIProcTrap (dev, 0, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrData[0])); -+ -+ RestartBits |= RestartCh0NonSysCntx; -+ } -+ -+ -+ if (Pend & INT_IProcCh1NonSysCntx) -+ { -+ HandleIProcTrap (dev, 1, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrData[0])); -+ RestartBits |= RestartCh1NonSysCntx; -+ } -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); -+} -+ -+static void -+elan3_event_interrupt (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ kernel_thread_init("elan3_event_int"); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ for (;;) -+ { -+ /* Make sure we never sleep with the EventInterrupt disabled */ -+ if (! (dev->InterruptMask & INT_EventInterrupt)) -+ { -+ if (HandleEventInterrupt (dev, eventint_resched_ticks, &flags) != ESUCCESS) -+ BumpStat (dev, EventRescheds); -+ -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); -+ } -+ -+ if (dev->ThreadsShouldStop) -+ break; -+ -+ kcondvar_wait (&dev->IntrWait, &dev->IntrLock, &flags); -+ } -+ -+ dev->EventInterruptThreadStopped = 1; -+ kcondvar_wakeupall (&dev->IntrWait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ kernel_thread_exit (); -+} -+ -+int -+InterruptHandler (ELAN3_DEV *dev) -+{ -+ E3_uint32 Mask; -+ E3_uint32 Pend; -+ E3_uint32 RestartBits; -+ int deliverDProcTrap; -+ int deliverTProcTrap; -+ static long lboltsave; -+ int loop_count = 0; -+ unsigned long flags; -+ int tproc_delivered; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ BumpStat (dev, Interrupts); -+ -+ Mask = dev->InterruptMask; -+ Pend = read_reg32 (dev, Exts.InterruptReg); /* PCI read */ -+ -+ /* Save the lbolt so we know how long in do loop or in event handling */ -+ lboltsave = lbolt; -+ -+ if ((Pend & Mask) == INT_EventInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_EventInterrupt); -+ -+ if (HandleEventInterrupt (dev, eventint_punt_ticks, &flags) == ESUCCESS) -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); -+ else -+ { -+ BumpStat (dev, EventPunts); -+ -+ kcondvar_wakeupone (&dev->IntrWait, &dev->IntrLock); -+ } -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (ESUCCESS); -+ } -+ -+ if ((Pend & Mask) == 0) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Spurious Pend %x Mask %x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EFAIL); -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Pend %x Mask %08x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ -+ do { -+ loop_count++; -+ RestartBits = 0; -+ -+ if (Pend & Mask & (INT_CProc | INT_ComQueue)) -+ HandleCProcTrap (dev, Pend, &Mask); -+ -+ tproc_delivered = 0; -+ -+ if (Pend & Mask & INT_TProc) { -+ ELAN_REG_REC(Pend); -+ tproc_delivered = 1; -+ deliverTProcTrap = HandleTProcTrap (dev, &RestartBits); -+ } -+ else -+ deliverTProcTrap = 0; -+ -+ if (Pend & Mask & INT_DProc) -+ deliverDProcTrap = HandleDProcTrap (dev, &RestartBits); -+ else -+ deliverDProcTrap = 0; -+ -+ ASSERT ((RestartBits & RestartDProc) == 0 || (read_reg32 (dev, Exts.DProcStatus.Status) >> 29) == 4); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); /* Restart any processors which had trapped. */ -+ SET_INT_MASK (dev, Mask); /* And install the new interrupt mask */ -+ -+ if ((Pend & Mask & INT_TProc) && deliverTProcTrap) -+ DeliverTProcTrap (dev, dev->ThreadTrap, Pend); -+ -+ if ((Pend & Mask & INT_DProc) && deliverDProcTrap) -+ DeliverDProcTrap (dev, dev->DmaTrap, Pend); -+ -+ if (Pend & Mask & INT_Inputters) -+ HandleAnyIProcTraps (dev, Pend); -+ -+ if (Pend & Mask & INT_EventInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_EventInterrupt); -+ -+ if (loop_count == 1 && HandleEventInterrupt (dev, eventint_punt_ticks, &flags) == ESUCCESS) /* always punt to the thread if we've */ -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); /* been round the loop once */ -+ else -+ { -+ BumpStat (dev, EventPunts); -+ -+ kcondvar_wakeupone (&dev->IntrWait, &dev->IntrLock); -+ } -+ } -+ -+ if (Pend & (INT_Halted | INT_Discarding)) -+ ProcessHaltOperations (dev, Pend); -+ -+ if (Pend & Mask & INT_ErrorInterrupts) -+ HandleErrorInterrupt (dev, Pend); -+ -+ Mask = dev->InterruptMask; -+ Pend = read_reg32 (dev, Exts.InterruptReg); /* PCI read */ -+ -+ if (tproc_delivered) -+ ELAN_REG_REC(Pend); -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Pend %x Mask %08x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ } while ((Pend & Mask) != 0); -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "InterruptHandler: lbolt is %lx; start lbolt is %lx\n", -+ lbolt, lboltsave); -+ -+ return (ESUCCESS); -+} -+ -+void -+SetSchedStatusRegister (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp) -+{ -+ E3_uint32 HaltMask = dev->HaltOperationsMask; -+ E3_uint32 Mask = Maskp ? *Maskp : dev->InterruptMask; -+ E3_uint32 ClearBits = 0; -+ E3_uint32 SetBits = 0; -+ -+ PRINTF5 (DBG_DEVICE, DBG_INTR, "SetSchedStatusRegister: HaltOperationsMask=%x HaltAll=%d HaltDmaDequeue=%d HaltThread=%d DiscardAll=%d\n", -+ HaltMask, dev->HaltAllCount, dev->HaltDmaDequeueCount, dev->HaltThreadCount, dev->DiscardAllCount); -+ -+ if (dev->FlushCommandCount) -+ SetBits |= FlushCommandQueues; -+ -+ if ((HaltMask & INT_DProcHalted) || dev->HaltAllCount) -+ { -+ SetBits |= HaltDmas | HaltDmaDequeue; -+ if (Pend & INT_DProcHalted) -+ Mask &= ~INT_DProcHalted; -+ else -+ Mask |= INT_DProcHalted; -+ } -+ -+ if (dev->HaltDmaDequeueCount) -+ { -+ SetBits |= HaltDmaDequeue; -+ if (Pend & INT_DProcHalted) -+ Mask &= ~INT_DProcHalted; -+ else -+ Mask |= INT_DProcHalted; -+ } -+ -+ if ((HaltMask & INT_TProcHalted) || dev->HaltAllCount || dev->HaltThreadCount) -+ { -+ SetBits |= HaltThread; -+ if (Pend & INT_TProcHalted) -+ Mask &= ~INT_TProcHalted; -+ else -+ Mask |= INT_TProcHalted; -+ } -+ -+ if ((HaltMask & INT_DiscardingSysCntx) || dev->DiscardAllCount) -+ { -+ SetBits |= DiscardSysCntxIn; -+ if (Pend & INT_DiscardingSysCntx) -+ Mask &= ~INT_DiscardingSysCntx; -+ else -+ Mask |= INT_DiscardingSysCntx; -+ } -+ -+ if ((HaltMask & INT_DiscardingNonSysCntx) || dev->DiscardNonContext0Count || dev->DiscardAllCount) -+ { -+ SetBits |= DiscardNonSysCntxIn; -+ if (Pend & INT_DiscardingNonSysCntx) -+ Mask &= ~INT_DiscardingNonSysCntx; -+ else -+ Mask |= INT_DiscardingNonSysCntx; -+ } -+ -+ if (dev->HaltNonContext0Count) -+ SetBits |= StopNonSysCntxs; -+ -+ ClearBits = SetBits ^ (FlushCommandQueues | HaltDmas | HaltDmaDequeue | HaltThread | -+ DiscardSysCntxIn | DiscardNonSysCntxIn | StopNonSysCntxs); -+ -+ PRINTF4 (DBG_DEVICE, DBG_INTR, "SetSchedStatusRegister: SetBits=%x InterruptMask=%x InterruptReg=%x Mask=%x\n", -+ SetBits, dev->InterruptMask, read_reg32 (dev, Exts.InterruptReg), Mask); -+ -+ MODIFY_SCHED_STATUS (dev, SetBits, ClearBits); -+ -+ if (Maskp) -+ *Maskp = Mask; /* copyback new interrupt mask */ -+ else -+ SET_INT_MASK(dev, Mask); -+} -+ -+void -+FreeHaltOperation (ELAN3_DEV *dev, ELAN3_HALTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ op->Next = dev->FreeHaltOperations; -+ dev->FreeHaltOperations = op; -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+} -+ -+int -+ReserveHaltOperations (ELAN3_DEV *dev, int count, int cansleep) -+{ -+ ELAN3_HALTOP *op; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ while ((dev->NumHaltOperations - dev->ReservedHaltOperations) < count) -+ { -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ KMEM_ZALLOC (op, ELAN3_HALTOP *, sizeof (ELAN3_HALTOP), cansleep); -+ -+ if (op == NULL) -+ return (FALSE); -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ -+ dev->NumHaltOperations++; -+ -+ op->Next = dev->FreeHaltOperations; -+ dev->FreeHaltOperations = op; -+ } -+ -+ dev->ReservedHaltOperations += count; -+ -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ return (TRUE); -+} -+ -+void -+ReleaseHaltOperations (ELAN3_DEV *dev, int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ dev->ReservedHaltOperations -= count; -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+} -+ -+void -+QueueHaltOperation (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp, -+ E3_uint32 ReqMask, void (*Function)(ELAN3_DEV *, void *), void *Arguement) -+{ -+ ELAN3_HALTOP *op; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ spin_lock (&dev->FreeHaltLock); -+ op = dev->FreeHaltOperations; -+ -+ ASSERT (op != NULL); -+ -+ dev->FreeHaltOperations = op->Next; -+ spin_unlock (&dev->FreeHaltLock); -+ -+ op->Mask = ReqMask; -+ op->Function = (void (*)(void *, void *))Function; -+ op->Arguement = Arguement; -+ -+ dev->HaltOperationsMask |= ReqMask; /* Add our bits to the global bits needed. */ -+ SetSchedStatusRegister (dev, Pend, Maskp); /* Set the control register and the interrupt mask */ -+ -+ /* -+ * If the condition is already satisfied, then SetSchedStatusRegister will -+ * have masked out the interrupt, so re-enable it now to take it straight -+ * away -+ */ -+ if (Maskp == NULL) -+ { -+ if ((read_reg32 (dev, Exts.InterruptReg) & ReqMask) == ReqMask) -+ ENABLE_INT_MASK (dev, ReqMask); -+ } -+ else -+ { -+ if ((Pend & ReqMask) == ReqMask) -+ *Maskp |= ReqMask; -+ } -+ -+ *dev->HaltOperationsTailpp = op; /* Queue at end of list, since ProcessHaltOperations */ -+ dev->HaltOperationsTailpp = &op->Next; /* drops the IntrLock while running down the list */ -+ op->Next = NULL; -+} -+ -+void -+ProcessHaltOperations (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ E3_uint32 Mask; -+ ELAN3_HALTOP *op; -+ ELAN3_HALTOP **prevp; -+ E3_uint32 haltMask; -+ ELAN3_HALTOP *next; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: Pend %x\n", Pend); -+ -+ for (;;) -+ { -+ ELAN3_HALTOP *head = NULL; -+ ELAN3_HALTOP **tailp = &head; -+ -+ /* -+ * Generate a list of halt operations which can be called now. -+ */ -+ for (haltMask = 0, prevp = &dev->HaltOperations; (op = *prevp) != NULL; ) -+ { -+ if ((Pend & op->Mask) != op->Mask) -+ { -+ haltMask |= op->Mask; -+ prevp = &op->Next; -+ } -+ else -+ { -+ *prevp = op->Next; /* remove from list */ -+ if (op->Next == NULL) -+ dev->HaltOperationsTailpp = prevp; -+ -+ *tailp = op; /* add to local list */ -+ op->Next = NULL; -+ tailp = &op->Next; -+ } -+ } -+ -+ if (head == NULL) /* nothing to do, so update */ -+ { /* the schedule status register */ -+ dev->HaltOperationsMask = haltMask; /* and the interrupt mask */ -+ SetSchedStatusRegister (dev, Pend, NULL); -+ return; -+ } -+ -+ /* -+ * flush the command queues, before calling any operations -+ */ -+ Mask = dev->InterruptMask; -+ -+ if (dev->FlushCommandCount++ == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ if ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ { -+ if (dev->HaltThreadCount++ == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ CAPTURE_CPUS(); -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ mb(); -+ -+ RELEASE_CPUS(); -+ -+ if (--dev->HaltThreadCount == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ } -+ -+ if (read_reg32 (dev, Exts.InterruptReg) & INT_CProc) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: command processor has trapped\n"); -+ HandleCProcTrap (dev, Pend, &Mask); -+ } -+ -+ if (--dev->FlushCommandCount == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: interrupt mask %08x -> %08x\n", -+ dev->InterruptMask, Mask); -+ -+ SET_INT_MASK (dev, Mask); -+ spin_unlock (&dev->IntrLock); -+ -+ /* -+ * now process the list of operations -+ * we have -+ */ -+ for (op = head; op != NULL; op = next) -+ { -+ next = op->Next; -+ -+ op->Function (dev, op->Arguement); -+ -+ FreeHaltOperation (dev, op); -+ } -+ -+ spin_lock (&dev->IntrLock); -+ } -+} -+ -+int -+ComputePosition (ELAN_POSITION *pos, unsigned nodeId, unsigned numNodes, unsigned numDownLinksVal) -+{ -+ int i, lvl, n; -+ char numDownLinks[ELAN_MAX_LEVELS]; -+ -+ if (nodeId >= numNodes) -+ return (EINVAL); -+ -+ for (i = 0; i < ELAN_MAX_LEVELS; i++, numDownLinksVal >>= 4) -+ numDownLinks[i] = numDownLinksVal & 7; -+ -+ for (lvl = 0, n = numNodes; n > ((lvl % 3) == 2 ? 8 : 4) && lvl < ELAN_MAX_LEVELS; lvl++) -+ { -+ if (numDownLinks[lvl] == 0) -+ numDownLinks[lvl] = 4; -+ -+ if ((n % numDownLinks[lvl]) != 0) -+ return (EINVAL); -+ -+ n /= numDownLinks[lvl]; -+ } -+ -+ if (numDownLinks[lvl] == 0) -+ numDownLinks[lvl] = n; -+ -+ if (numDownLinks[lvl] != n) -+ return (EINVAL); -+ -+ for (i = 0; i <= lvl; i++) -+ pos->pos_arity[i] = numDownLinks[lvl - i]; -+ -+ pos->pos_nodes = numNodes; -+ pos->pos_levels = lvl + 1; -+ pos->pos_nodeid = nodeId; -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/elandev_linux.c linux-2.6.9/drivers/net/qsnet/elan3/elandev_linux.c ---- clean/drivers/net/qsnet/elan3/elandev_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/elandev_linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,2395 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: elandev_linux.c,v 1.112.2.7 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandev_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,2,0) -+#error please use a 2.2 series kernel or newer -+#endif -+ -+/* Minor numbers encoded as : -+ * [5:0] device number -+ * [15:6] function number -+ */ -+#define ELAN3_DEVICE_MASK 0x3F -+ -+#define ELAN3_MINOR_CONTROL 0 -+#define ELAN3_MINOR_MEM 1 -+#define ELAN3_MINOR_USER 2 -+#define ELAN3_MINOR_SHIFT 6 -+ -+#define ELAN3_DEVICE(inode) (MINOR(inode->i_rdev) & ELAN3_DEVICE_MASK) -+#define ELAN3_MINOR(inode) (MINOR(inode->i_rdev) >> ELAN3_MINOR_SHIFT) -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+# define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -+# define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) -+typedef void irqreturn_t; -+#endif -+# define IRQ_NONE -+# define IRQ_HANDLED -+# define IRQ_RETVAL(x) -+#endif -+ -+#if defined(LINUX_SPARC) || defined(LINUX_PPC64) -+#define __io_remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#elif defined(NO_RMAP) -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#else -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(vma,from,offset,size,prot) -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) -+#define __remap_page_range(from,offset,size,prot) remap_pfn_range(vma,from,(offset)>>PAGE_SHIFT,size,prot) -+#else -+#define __remap_page_range(from,offset,size,prot) remap_page_range(vma,from,offset,size,prot) -+#endif -+#endif -+ -+/* -+ * Function prototypes. -+ */ -+static int elanattach(int instance, struct pci_dev *pcidev); -+static int elandetach(int instance); -+ -+static int elan3_open (struct inode *inode, struct file *file); -+static int elan3_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg); -+static int elan3_mmap (struct file *file, struct vm_area_struct *vm_area); -+static int elan3_release (struct inode *inode, struct file *file); -+ -+static int elan3_reboot_event (struct notifier_block *self, unsigned long event, void *buffer); -+static int elan3_panic_event (struct notifier_block *self, unsigned long event, void *buffer); -+ -+static irqreturn_t InterruptHandlerWrapper(int irq, void *dev_id, struct pt_regs *regs); -+ -+static int ConfigurePci(ELAN3_DEV *dev); -+static int ResetElan(ELAN3_DEV *dev, ioaddr_t intPalAddr); -+ -+static void elan3_shutdown_devices(int panicing); -+ -+/* -+ * Globals. -+ */ -+static ELAN3_DEV *elan3_devices[ELAN3_MAX_CONTROLLER]; -+static int NodeId = ELAN3_INVALID_NODE; -+static int NumNodes; -+static int DownLinks; -+static int RandomRoutingDisabled; -+int BackToBackMaster; -+int BackToBackSlave; -+int enable_sdram_writecombining; -+int sdram_bank_limit; -+extern int LwpNice; -+ -+char * elan_reg_rec_file [ELAN_REG_REC_MAX]; -+int elan_reg_rec_line [ELAN_REG_REC_MAX]; -+long elan_reg_rec_lbolt[ELAN_REG_REC_MAX]; -+int elan_reg_rec_cpu [ELAN_REG_REC_MAX]; -+E3_uint32 elan_reg_rec_reg [ELAN_REG_REC_MAX]; -+int elan_reg_rec_index; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan3 Device Driver"); -+ -+MODULE_LICENSE("GPL"); -+ -+module_param(NodeId,uint, 0); -+module_param(NumNodes,uint, 0); -+module_param(RandomRoutingDisabled,uint, 0); -+module_param(DownLinks,uint, 0); -+module_param(BackToBackMaster,uint, 0); -+module_param(BackToBackSlave,uint, 0); -+module_param(LwpNice, uint, 0); -+module_param(elan3_debug, uint, 0); -+module_param(elan3_debug_console, uint, 0); -+module_param(elan3_debug_buffer, uint, 0); -+module_param(elan3mmu_debug, uint, 0); -+module_param(sdram_bank_limit, uint, 0); -+ -+/* elan3/os/context.c */ -+EXPORT_SYMBOL(elan3_alloc); -+EXPORT_SYMBOL(elan3_attach); -+EXPORT_SYMBOL(elan3_doattach); -+EXPORT_SYMBOL(elan3_free); -+EXPORT_SYMBOL(elan3_detach); -+EXPORT_SYMBOL(elan3_dodetach); -+EXPORT_SYMBOL(elan3_block_inputter); -+EXPORT_SYMBOL(CheckCommandQueueFlushed); -+ -+/* elan3/os/sdram.c */ -+EXPORT_SYMBOL(elan3_sdram_alloc); -+EXPORT_SYMBOL(elan3_sdram_free); -+EXPORT_SYMBOL(elan3_sdram_to_phys); -+EXPORT_SYMBOL(elan3_sdram_writeb); -+EXPORT_SYMBOL(elan3_sdram_writew); -+EXPORT_SYMBOL(elan3_sdram_writel); -+EXPORT_SYMBOL(elan3_sdram_writeq); -+EXPORT_SYMBOL(elan3_sdram_readb); -+EXPORT_SYMBOL(elan3_sdram_readw); -+EXPORT_SYMBOL(elan3_sdram_readl); -+EXPORT_SYMBOL(elan3_sdram_readq); -+EXPORT_SYMBOL(elan3_sdram_zerob_sdram); -+EXPORT_SYMBOL(elan3_sdram_zerow_sdram); -+EXPORT_SYMBOL(elan3_sdram_zerol_sdram); -+EXPORT_SYMBOL(elan3_sdram_zeroq_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyb_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyw_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyl_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyq_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyb_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyw_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyl_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyq_from_sdram); -+ -+/* elan3/os/tproc.c */ -+EXPORT_SYMBOL(DeliverTProcTrap); -+EXPORT_SYMBOL(HandleTProcTrap); -+EXPORT_SYMBOL(SaveThreadToStack); -+ -+/* elan3/os/tprocinsts.c */ -+EXPORT_SYMBOL(RollThreadToClose); -+ -+/* elan3/os/iproc.c */ -+EXPORT_SYMBOL(InspectIProcTrap); -+EXPORT_SYMBOL(IProcTrapString); -+EXPORT_SYMBOL(SimulateUnlockQueue); -+ -+/* elan3/os/cproc.c */ -+EXPORT_SYMBOL(HandleCProcTrap); -+ -+/* elan3/os/route_table.c */ -+EXPORT_SYMBOL(GenerateRoute); -+EXPORT_SYMBOL(LoadRoute); -+EXPORT_SYMBOL(InvalidateRoute); -+EXPORT_SYMBOL(ValidateRoute); -+EXPORT_SYMBOL(ClearRoute); -+EXPORT_SYMBOL(GenerateProbeRoute); -+EXPORT_SYMBOL(GenerateCheckRoute); -+ -+/* elan3/os/elandev_generic.c */ -+EXPORT_SYMBOL(elan3_debug); -+EXPORT_SYMBOL(QueueHaltOperation); -+EXPORT_SYMBOL(ReleaseHaltOperations); -+EXPORT_SYMBOL(ReserveHaltOperations); -+ -+/* elan3/vm/elan3mmu_generic.c */ -+EXPORT_SYMBOL(elan3mmu_pteload); -+EXPORT_SYMBOL(elan3mmu_unload); -+EXPORT_SYMBOL(elan3mmu_set_context_filter); -+EXPORT_SYMBOL(elan3mmu_reserve); -+EXPORT_SYMBOL(elan3mmu_attach); -+EXPORT_SYMBOL(elan3mmu_detach); -+EXPORT_SYMBOL(elan3mmu_release); -+/* elan3/vm/elan3mmu_linux.c */ -+EXPORT_SYMBOL(elan3mmu_phys_to_pte); -+EXPORT_SYMBOL(elan3mmu_kernel_invalid_pte); -+ -+/* elan3/os/elan3_debug.c */ -+EXPORT_SYMBOL(elan3_debugf); -+ -+/* elan3/os/minames.c */ -+EXPORT_SYMBOL(MiToName); -+ -+/* elan3/os/elandev_generic.c */ -+EXPORT_SYMBOL(MapDeviceRegister); -+EXPORT_SYMBOL(UnmapDeviceRegister); -+ -+EXPORT_SYMBOL(elan_reg_rec_lbolt); -+EXPORT_SYMBOL(elan_reg_rec_file); -+EXPORT_SYMBOL(elan_reg_rec_index); -+EXPORT_SYMBOL(elan_reg_rec_cpu); -+EXPORT_SYMBOL(elan_reg_rec_reg); -+EXPORT_SYMBOL(elan_reg_rec_line); -+ -+/* -+ * Standard device entry points. -+ */ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int elan3_dump_event (struct notifier_block *self, unsigned long event, void *buffer); -+ -+static struct notifier_block elan3_dump_notifier = -+{ -+ notifier_call: elan3_dump_event, -+ priority: 0, -+}; -+ -+static int -+elan3_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ( event == DUMP_BEGIN ) -+ elan3_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+ -+#endif -+ -+static struct file_operations elan3_fops = { -+ ioctl: elan3_ioctl, /* ioctl */ -+ mmap: elan3_mmap, /* mmap */ -+ open: elan3_open, /* open */ -+ release: elan3_release, /* release */ -+}; -+ -+static struct notifier_block elan3_reboot_notifier = -+{ -+ notifier_call: elan3_reboot_event, -+ priority: 0, -+}; -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ -+static int elan3_panic_event (struct notifier_block *self, unsigned long event, void *buffer); -+ -+static struct notifier_block elan3_panic_notifier = -+{ -+ notifier_call: elan3_panic_event, -+ priority: 0, -+}; -+ -+static int -+elan3_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ elan3_shutdown_devices (TRUE); -+ -+ return (NOTIFY_DONE); -+} -+ -+#endif /* !defined(NO_PANIC_NOTIFIER) */ -+ -+ELAN3_DEV * -+elan3_device (int instance) -+{ -+ if (instance < 0 || instance >= ELAN3_MAX_CONTROLLER) -+ return ((ELAN3_DEV *) NULL); -+ return elan3_devices[instance]; -+} -+EXPORT_SYMBOL(elan3_device); -+ -+/* -+ * Called at rmmod time. elandetach() for each card + general cleanup. -+ */ -+#ifdef MODULE -+static void __exit elan3_exit(void) -+{ -+ int i; -+ -+ printk("elan: preparing to remove module\n"); -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&elan3_dump_notifier); -+#endif -+ unregister_reboot_notifier (&elan3_reboot_notifier); -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_unregister (&panic_notifier_list, &elan3_panic_notifier); -+#endif -+ -+ /* call elandetach() for each device configured. */ -+ for (i = 0; i < ELAN3_MAX_CONTROLLER; i++) -+ if (elan3_devices[i] != NULL) -+ elandetach(i); -+ -+ FinaliseNetworkErrorResolver(); -+ elan3mmu_fini(); -+ -+ cookie_fini(); -+ unregister_chrdev(ELAN3_MAJOR, ELAN3_NAME); -+ -+ elan3_procfs_fini(); -+ -+ printk("elan: module removed\n"); -+} -+ -+/* -+ * Called at insmod time. First we perform general driver initialization, -+ * then call elanattach() for each card. -+ */ -+#ifdef MODULE -+static int __init elan3_init(void) -+#else -+__initfunc(int elan3_init(void)) -+#endif -+{ -+ int e; -+ int boards; -+ struct pci_dev *dev; -+ char revid; -+ -+ elan_reg_rec_index=0; -+ { -+ int i; -+ for(i=0;islot_name); -+#else -+ printk ("elan at pci %s - RevA device not supported\n", pci_name(dev)); -+#endif -+ else -+ { -+ if (boards < ELAN3_MAX_CONTROLLER) -+ /* Count successfully attached devices */ -+ boards += ((elanattach(boards, dev) == 0) ? 1 : 0); -+ else -+ { -+ printk ("elan: max controllers = %d\n", ELAN3_MAX_CONTROLLER); -+ break; -+ } -+ } -+ } -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&elan3_dump_notifier); -+#endif -+ register_reboot_notifier (&elan3_reboot_notifier); -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_register (&panic_notifier_list, &elan3_panic_notifier); -+#endif -+ -+ return 0; -+} -+ -+/* Declare the module init and exit functions */ -+module_init(elan3_init); -+module_exit(elan3_exit); -+ -+#endif -+ -+static void -+elan3_shutdown_devices(int panicing) -+{ -+ ELAN3_DEV *dev; -+ unsigned long flags; -+ register int i; -+ -+ local_irq_save (flags); -+ for (i = 0; i < ELAN3_MAX_CONTROLLER; i++) -+ { -+ if ((dev = elan3_devices[i]) != NULL) -+ { -+ if (! panicing) spin_lock (&dev->IntrLock); -+ -+ printk(KERN_INFO "elan%d: forcing link into reset\n", dev->Instance); -+ -+ /* -+ * We're going to set the link into boundary scan mode, so firstly -+ * set the inputters to discard everything. -+ */ -+ if (dev->DiscardAllCount++ == 0) -+ SetSchedStatusRegister (dev, read_reg32 (dev, Exts.InterruptReg), NULL); -+ -+ dev->LinkShutdown = 1; -+ -+ /* -+ * Now disable the error interrupts -+ */ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And set the link into boundary scan mode, and drive -+ * a reset token onto the link. -+ */ -+ SET_SCHED_LINK_VALUE (dev, 1, LinkResetToken); -+ -+ if (! panicing) spin_unlock (&dev->IntrLock); -+ } -+ } -+ local_irq_restore (flags); -+} -+ -+static int -+elan3_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (! (event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ return (NOTIFY_DONE); -+ -+ elan3_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+ -+#include -+/* -+ * Called by init_module() for each card discovered on PCI. -+ */ -+static int -+elanattach(int instance, struct pci_dev *pcidev) -+{ -+ ELAN3_DEV *dev; -+ int ramSize; -+ int level; -+ ioaddr_t sdramAddr, cmdPortAddr, intPalAddr; -+ DeviceMappingHandle handle; -+ -+ printk("elan%d: attach, irq=%d\n", instance, pcidev->irq); -+ -+ /* -+ * Allocate the ELAN3_DEV structure. -+ */ -+ KMEM_ZALLOC(dev, ELAN3_DEV *, sizeof(ELAN3_DEV), TRUE); -+ if (dev == NULL) { -+ printk ("elan%d: KMEM_ALLOC failed\n", instance); -+ return (-ENOMEM); -+ } -+ elan3_devices[instance] = dev; -+ dev->Osdep.pci = pcidev; -+ -+ dev->Instance = instance; -+ -+ /* Initialise the device information */ -+ pci_read_config_word (pcidev, PCI_VENDOR_ID, &dev->Devinfo.dev_vendor_id); -+ pci_read_config_word (pcidev, PCI_DEVICE_ID, &dev->Devinfo.dev_device_id); -+ pci_read_config_byte (pcidev, PCI_REVISION_ID, &dev->Devinfo.dev_revision_id); -+ -+ dev->Devinfo.dev_instance = instance; -+ dev->Devinfo.dev_rail = instance; -+ dev->Devinfo.dev_driver_version = 0; -+ dev->Devinfo.dev_num_down_links_value = DownLinks; -+ -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ dev->Position.pos_random_disabled = RandomRoutingDisabled; -+ -+ /* -+ * Set up PCI config regs. -+ */ -+ if (ConfigurePci(dev) != ESUCCESS) -+ goto fail0; -+ -+ /* -+ * Determine the PFnums of the SDRAM and command port -+ */ -+ if (MapDeviceRegister(dev, ELAN3_BAR_SDRAM, &sdramAddr, 0, PAGESIZE, &handle) != ESUCCESS) -+ goto fail1; -+ -+ DeviceRegisterSize(dev, ELAN3_BAR_SDRAM, &ramSize); -+ -+ dev->SdramPhysMask = ~((physaddr_t) ramSize - 1); -+ dev->SdramPhysBase = kmem_to_phys((void *) sdramAddr); -+ -+ UnmapDeviceRegister (dev, &handle); -+ -+#if defined(LINUX_ALPHA) -+ /* -+ * consider a physical address to be on the same pci bus -+ * as us if it's physical address is "close" to our sdram -+ * physical address. -+ * this is almost certainly incorrect for large memory (> 2Gb) -+ * i386 machines - and is only correct for alpha for 32 bit -+ * base address registers. -+ * -+ * Modified this to match the Tru64 driver value; -+ * i.e. PciPhysMask = 0xfffffffffffc0000 -+ */ -+# define PCI_ADDR_MASK (0x7FFFFFFFl) -+ -+ dev->PciPhysMask = ~PCI_ADDR_MASK; -+ dev->PciPhysBase = dev->SdramPhysBase & dev->PciPhysMask; -+#endif -+ /* -+ * Now reset the elan chip. -+ */ -+ if (MapDeviceRegister(dev, ELAN3_BAR_REGISTERS, &dev->RegPtr, 0, 0, &dev->RegHandle) != ESUCCESS) -+ goto fail1; -+ -+ if (MapDeviceRegister(dev, ELAN3_BAR_EBUS, &intPalAddr, ELAN3_EBUS_INTPAL_OFFSET, PAGESIZE, -+ &handle) != ESUCCESS) -+ goto fail2; -+ -+ ResetElan(dev, intPalAddr); -+ -+ UnmapDeviceRegister (dev, &handle); -+ -+ /* -+ * Initialise the device mutex's which must be accessible from the -+ * interrupt handler. -+ */ -+ kcondvar_init (&dev->IntrWait); -+ spin_lock_init (&dev->IntrLock); -+ spin_lock_init (&dev->TlbLock); -+ spin_lock_init (&dev->CProcLock); -+ spin_lock_init (&dev->FreeHaltLock); -+ for(level=0; level<4; level++) -+ spin_lock_init (&dev->Level[level].PtblLock); -+ spin_lock_init (&dev->PtblGroupLock); -+ -+ /* -+ * Add the interrupt handler, -+ */ -+ if (request_irq(dev->Osdep.pci->irq, InterruptHandlerWrapper, -+ SA_SHIRQ, "elan3", dev) != 0) { -+ printk ("elan%d: request_irq failed\n", instance); -+ goto fail3; -+ } -+ -+ if (MapDeviceRegister(dev, ELAN3_BAR_COMMAND_PORT, &cmdPortAddr, 0, PAGESIZE, &handle) != ESUCCESS) -+ goto fail4; -+ -+ if (InitialiseElan(dev, cmdPortAddr) == EFAIL) { -+ printk ("elan%d: InitialiseElan failed\n", instance); -+ UnmapDeviceRegister (dev, &handle); -+ goto fail4; -+ } -+ UnmapDeviceRegister (dev, &handle); -+ -+ /* If our nodeid is defined, then set it now */ -+ if (NodeId != ELAN3_INVALID_NODE && ComputePosition (&dev->Position, NodeId, NumNodes, DownLinks) == 0) -+ { -+ if (RandomRoutingDisabled & ((1 << (dev->Position.pos_levels-1))-1)) -+ printk ("elan%d: NodeId=%d NodeLevel=%d NumNodes=%d (random routing disabled 0x%x)\n", -+ dev->Instance, dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes, RandomRoutingDisabled); -+ else -+ printk ("elan%d: NodeId=%d NodeLevel=%d NumNodes=%d (random routing ok)\n", -+ dev->Instance, dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes); -+ } -+ -+ if (BackToBackMaster || BackToBackSlave) -+ { -+ dev->Position.pos_mode = ELAN_POS_MODE_BACKTOBACK; -+ dev->Position.pos_nodeid = (BackToBackMaster == 0); -+ dev->Position.pos_nodes = 2; -+ dev->Position.pos_levels = 1; -+ dev->Position.pos_arity[0] = 2; -+ -+ printk ("elan%d: back-to-back %s - elan node %d\n", dev->Instance, -+ BackToBackMaster ? "master" : "slave", dev->Position.pos_nodeid); -+ } -+ -+ elan3_procfs_device_init (dev); -+ -+ /* Success */ -+ return (0); -+ -+fail4: -+ free_irq(dev->Osdep.pci->irq, dev); -+ -+fail3: -+ kcondvar_destroy (&dev->IntrWait); -+ spin_lock_destroy (&dev->IntrLock); -+ spin_lock_destroy (&dev->InfoLock); -+ spin_lock_destroy (&dev->TlbLock); -+ spin_lock_destroy (&dev->CProcLock); -+ spin_lock_destroy (&dev->FreeHaltLock); -+ spin_lock_destroy (&dev->Level1PtblLock); -+ spin_lock_destroy (&dev->Level2PtblLock); -+ spin_lock_destroy (&dev->Level3PtblLock); -+ spin_lock_destroy (&dev->PtblGroupLock); -+ -+fail2: -+ UnmapDeviceRegister (dev, &dev->RegHandle); -+ -+fail1: -+ pci_disable_device (dev->Osdep.pci); -+fail0: -+ KMEM_FREE(dev, sizeof(ELAN3_DEV)); -+ -+ elan3_devices[instance] = NULL; -+ -+ /* Failure */ -+ return (-ENODEV); -+} -+ -+/* -+ * Called by elan3_exit() for each board found on PCI. -+ */ -+static int -+elandetach(int instance) -+{ -+ ELAN3_DEV *dev = elan3_devices[instance]; -+ -+ printk("elan%d: detach\n", instance); -+ -+ elan3_procfs_device_fini (dev); -+ -+ FinaliseElan (dev); -+ -+ UnmapDeviceRegister (dev, &dev->RegHandle); -+ -+ free_irq(dev->Osdep.pci->irq, dev); -+ -+ pci_disable_device(dev->Osdep.pci); -+ -+ kcondvar_destroy (&dev->IntrWait); -+ spin_lock_destroy (&dev->IntrLock); -+ spin_lock_destroy (&dev->InfoLock); -+ spin_lock_destroy (&dev->TlbLock); -+ spin_lock_destroy (&dev->CProcLock); -+ spin_lock_destroy (&dev->FreeHaltLock); -+ spin_lock_destroy (&dev->Level1PtblLock); -+ spin_lock_destroy (&dev->Level2PtblLock); -+ spin_lock_destroy (&dev->Level3PtblLock); -+ spin_lock_destroy (&dev->PtblGroupLock); -+ -+ KMEM_FREE(dev, sizeof(ELAN3_DEV)); -+ elan3_devices[instance] = NULL; -+ -+ return 0; -+} -+ -+/* -+ * generic ioctls - available on control and user devices. -+ */ -+ -+static int -+device_stats_ioctl (ELAN3_DEV *dev, unsigned long arg) -+{ -+ ELAN3IO_STATS_STRUCT *args; -+ -+ KMEM_ALLOC(args, ELAN3IO_STATS_STRUCT *, sizeof(ELAN3IO_STATS_STRUCT), TRUE); -+ -+ if (args == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN3IO_STATS_STRUCT))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ switch (args->which) -+ { -+ case ELAN3_SYS_STATS_DEVICE: -+ if (copy_to_user (args->ptr, &dev->Stats, sizeof (ELAN3_STATS))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (0); -+ -+ case ELAN3_SYS_STATS_MMU: -+ if (copy_to_user (args->ptr, &elan3mmu_global_stats, sizeof (ELAN3MMU_GLOBAL_STATS))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (0); -+ -+ default: -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EINVAL); -+ } -+} -+ -+/* -+ * /dev/elan3/controlX - control device -+ * -+ */ -+ -+typedef struct control_private -+{ -+ u_int pr_boundary_scan; -+} CONTROL_PRIVATE; -+ -+static int -+control_open (struct inode *inode, struct file *file) -+{ -+ CONTROL_PRIVATE *pr; -+ -+ KMEM_ALLOC(pr, CONTROL_PRIVATE *, sizeof (CONTROL_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_boundary_scan = 0; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ -+ return (0); -+} -+ -+static int -+control_release (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ -+ if (pr->pr_boundary_scan) -+ ClearLinkBoundaryScan(dev); -+ -+ KMEM_FREE (pr, sizeof(CONTROL_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+control_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ int res; -+ -+ switch (cmd) -+ { -+ case ELAN3IO_SET_BOUNDARY_SCAN: -+ if (SetLinkBoundaryScan (dev) == 0) -+ pr->pr_boundary_scan = 1; -+ return (0); -+ -+ case ELAN3IO_CLEAR_BOUNDARY_SCAN: -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ pr->pr_boundary_scan = 0; -+ -+ ClearLinkBoundaryScan (dev); -+ return (0); -+ -+ case ELAN3IO_READ_LINKVAL: -+ { -+ E3_uint32 val; -+ -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ if (copy_from_user(&val, (E3_uint32 *)arg, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ val = ReadBoundaryScanValue (dev, val); -+ -+ if (copy_to_user((E3_uint32 *)arg, &val, sizeof(E3_uint32))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN3IO_WRITE_LINKVAL: -+ { -+ E3_uint32 val; -+ -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ if (copy_from_user(&val, (E3_uint32 *)arg, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ val = WriteBoundaryScanValue (dev, val); -+ -+ if (copy_to_user((E3_uint32 *)arg, &val, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN3IO_SET_POSITION: -+ { -+ ELAN3IO_SET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ if (ComputePosition (&dev->Position, args.nodeId, args.numNodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ return (-EINVAL); -+ -+ return (0); -+ } -+ -+ case ELAN3IO_SET_DEBUG: -+ { -+ ELAN3IO_SET_DEBUG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SET_DEBUG_STRUCT))) -+ return (-EFAULT); -+ -+ if (! strcmp (args.what, "elan3_debug")) -+ elan3_debug = args.value; -+ else if (! strcmp (args.what, "elan3_debug_console")) -+ elan3_debug_console = args.value; -+ else if (! strcmp (args.what, "elan3_debug_buffer")) -+ elan3_debug_buffer = args.value; -+ else if (! strcmp (args.what, "elan3_debug_ignore_dev")) -+ elan3_debug_ignore_dev = args.value; -+ else if (! strcmp (args.what, "elan3_debug_ignore_ctxt")) -+ elan3_debug_ignore_ctxt = args.value; -+ else if (! strcmp (args.what, "elan3mmu_debug")) -+ elan3mmu_debug = args.value; -+ -+ return (0); -+ } -+ -+ case ELAN3IO_NETERR_SERVER: -+ { -+ ELAN3IO_NETERR_SERVER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_NETERR_SERVER_STRUCT))) -+ return (-EFAULT); -+ -+ res = AddNeterrServerSyscall (args.elanid, args.addr, args.name, NULL); -+ return (set_errno (res)); -+ } -+ -+ case ELAN3IO_NETERR_FIXUP: -+ { -+ NETERR_MSG *msg; -+ -+ KMEM_ALLOC(msg, NETERR_MSG *, sizeof (NETERR_MSG), TRUE); -+ -+ if (msg == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (msg, (void *) arg, sizeof (NETERR_MSG))) -+ res = EFAULT; -+ else -+ res = ExecuteNetworkErrorFixup (msg); -+ -+ KMEM_FREE (msg, sizeof (NETERR_MSG)); -+ return (set_errno (res)); -+ } -+ -+ case ELAN3IO_STATS: -+ return (device_stats_ioctl (dev, arg)); -+ -+ case ELAN3IO_GET_DEVINFO: -+ { -+ if (copy_to_user ((void *) arg, &dev->Devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN3IO_GET_POSITION: -+ { -+ if (copy_to_user ((void *) arg, &dev->Position, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ return (0); -+ } -+ default: -+ return (-EINVAL); -+ } -+} -+ -+static int -+control_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(file->f_dentry->d_inode)]; -+ int space = OFF_TO_SPACE(vma->vm_pgoff << PAGE_SHIFT); -+ int off = OFF_TO_OFFSET(vma->vm_pgoff << PAGE_SHIFT); -+ int size; -+ ioaddr_t addr; -+ DeviceMappingHandle handle; -+ physaddr_t phys; -+ -+ if (space < ELAN3_BAR_SDRAM || space > ELAN3_BAR_EBUS) -+ return (-EINVAL); -+ -+ if (off < 0 || DeviceRegisterSize (dev, space, &size) != ESUCCESS || off > size) -+ return (-EINVAL); -+ -+ if (MapDeviceRegister(dev, space, &addr, off, PAGESIZE, &handle) != ESUCCESS) -+ return (-EINVAL); -+ -+ phys = kmem_to_phys((caddr_t) addr); -+ UnmapDeviceRegister(dev, &handle); -+ -+ if (__remap_page_range(vma->vm_start, phys, vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ return (-EAGAIN); -+ -+ return (0); -+} -+ -+/* -+ * /dev/elan3/sdramX - sdram access device -+ */ -+typedef struct mem_page -+{ -+ struct mem_page *pg_next; -+ sdramaddr_t pg_addr; -+ u_long pg_pgoff; -+ u_int pg_ref; -+} MEM_PAGE; -+ -+#define MEM_HASH_SIZE 32 -+#define MEM_HASH(pgoff) ((pgoff) & (MEM_HASH_SIZE-1)) -+ -+typedef struct mem_private -+{ -+ ELAN3_DEV *pr_dev; -+ MEM_PAGE *pr_pages[MEM_HASH_SIZE]; -+ spinlock_t pr_lock; -+} MEM_PRIVATE; -+ -+static void -+mem_freepage (MEM_PRIVATE *pr, MEM_PAGE *pg) -+{ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_freepage: pr=%p pgoff=%lx pg=%p ref=%d\n", pr, pg->pg_pgoff, pg, pg->pg_ref); -+ -+ elan3_sdram_free (pr->pr_dev, pg->pg_addr, PAGE_SIZE); -+ KMEM_FREE (pg, sizeof(MEM_PAGE)); -+} -+ -+static MEM_PAGE * -+mem_getpage (MEM_PRIVATE *pr, u_long pgoff, virtaddr_t addr) -+{ -+ int hashval = MEM_HASH (pgoff); -+ MEM_PAGE *npg = NULL; -+ MEM_PAGE *pg; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_getpage: pr=%p pgoff=%lx addr=%lx\n", pr, pgoff, addr); -+ -+ again: -+ spin_lock (&pr->pr_lock); -+ for (pg = pr->pr_pages[hashval]; pg; pg = pg->pg_next) -+ if (pg->pg_pgoff == pgoff) -+ break; -+ -+ if (pg != NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_getpage: pr=%p pgoff=%lx addr=%lx -> found %p addr=%lx\n", pr, pgoff, addr, pg, pg->pg_addr); -+ -+ pg->pg_ref++; -+ spin_unlock (&pr->pr_lock); -+ -+ if (npg != NULL) /* we'd raced and someone else had created */ -+ mem_freepage (pr, npg); /* this page - so free of our new one*/ -+ return (pg); -+ } -+ -+ if (npg != NULL) /* didn't find the page, so inset the */ -+ { /* new one we've just created */ -+ npg->pg_next = pr->pr_pages[hashval]; -+ pr->pr_pages[hashval] = npg; -+ -+ spin_unlock (&pr->pr_lock); -+ return (npg); -+ } -+ -+ spin_unlock (&pr->pr_lock); /* drop spinlock before creating a new page */ -+ -+ KMEM_ALLOC(npg, MEM_PAGE *, sizeof (MEM_PAGE), TRUE); -+ -+ if (npg == NULL) -+ return (NULL); -+ -+ if ((npg->pg_addr = elan3_sdram_alloc (pr->pr_dev, PAGE_SIZE)) == 0) -+ { -+ KMEM_FREE (npg, sizeof (MEM_PAGE)); -+ return (NULL); -+ } -+ -+ /* zero the page before returning it to the user */ -+ elan3_sdram_zeroq_sdram (pr->pr_dev, npg->pg_addr, PAGE_SIZE); -+ -+ npg->pg_pgoff = pgoff; -+ npg->pg_ref = 1; -+ -+ /* created a new page - so have to rescan before inserting it */ -+ goto again; -+} -+ -+static void -+mem_droppage (MEM_PRIVATE *pr, u_long pgoff, int dontfree) -+{ -+ MEM_PAGE **ppg; -+ MEM_PAGE *pg; -+ -+ spin_lock (&pr->pr_lock); -+ for (ppg = &pr->pr_pages[MEM_HASH(pgoff)]; *ppg; ppg = &(*ppg)->pg_next) -+ if ((*ppg)->pg_pgoff == pgoff) -+ break; -+ -+ pg = *ppg; -+ -+ ASSERT (*ppg != NULL); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_droppage: pr=%p pgoff=%lx pg=%p ref=%d dontfree=%d\n", pr, pgoff, (*ppg), (*ppg)->pg_ref, dontfree); -+ -+ if (--pg->pg_ref == 0 && !dontfree) -+ { -+ *ppg = pg->pg_next; -+ -+ mem_freepage (pr, pg); -+ } -+ -+ spin_unlock (&pr->pr_lock); -+} -+ -+static int -+mem_open (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ MEM_PRIVATE *pr; -+ register int i; -+ -+ KMEM_ALLOC(pr, MEM_PRIVATE *, sizeof (MEM_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ spin_lock_init (&pr->pr_lock); -+ pr->pr_dev = dev; -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ pr->pr_pages[i] = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+mem_release (struct inode *node, struct file *file) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg, *next; -+ int i; -+ -+ /* free off any pages that we'd allocated */ -+ spin_lock (&pr->pr_lock); -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ { -+ for (pg = pr->pr_pages[i]; pg; pg = next) -+ { -+ next = pg->pg_next; -+ mem_freepage (pr, pg); -+ } -+ } -+ spin_unlock (&pr->pr_lock); -+ -+ KMEM_FREE (pr, sizeof (MEM_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+mem_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ return (-EINVAL); -+} -+ -+static void mem_vma_open(struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ preemptable_start { -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) { -+ mem_getpage (pr, pgoff, addr); -+ preemptable_check(); -+ } -+ } preemptable_end; -+} -+ -+static void mem_vma_close(struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the call to close may not have the same vm_start/vm_end values as -+ * were passed into mmap()/open() - since if an partial unmap had occured -+ * then the vma could have been shrunk or even split. -+ * -+ * if a the vma is split then an vma_open() will be called for the top -+ * portion - thus causing the reference counts to become incorrect. -+ * -+ * We drop the reference to any pages we're notified about - so they get freed -+ * earlier than when the device is finally released. -+ */ -+ for (pgoff = vma->vm_pgoff, addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_droppage (pr, pgoff, 0); -+} -+ -+static struct vm_operations_struct mem_vm_ops = { -+ open: mem_vma_open, -+ close: mem_vma_close, -+}; -+ -+static int -+mem_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: vm_mm=%p start=%lx end=%lx pgoff=%lx prot=%llx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, (long long)vma->vm_page_prot.pgprot , file); -+ -+ preemptable_start { -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ if ((pg = mem_getpage (pr, pgoff, addr)) == NULL) -+ goto failed; -+ -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ -+#if defined(__ia64__) -+ if (enable_sdram_writecombining) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+#endif -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: addr %lx -> pg=%p addr=%lx phys=%llx flags=%lx prot=%llx\n", -+ addr, pg, pg->pg_addr, (long long) elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), vma->vm_flags, (long long)vma->vm_page_prot.pgprot); -+ -+ if (__remap_page_range(addr, elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), PAGE_SIZE, vma->vm_page_prot)) -+ { -+ mem_droppage (pr, pgoff, 0); /* drop our reference to this page */ -+ goto failed; -+ } -+ -+ preemptable_check(); -+ } -+ } preemptable_end; -+ -+ /* Don't try to swap out Elan SDRAM pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump SDRAM pages to a core file -+ * (Pity I would really like to do this but it crashes in elf_core_dump() as -+ * it can only handle pages that are in the mem_map area (addy 11/01/2002)) -+ */ -+ vma->vm_flags |= VM_IO; -+ -+ vma->vm_ops = &mem_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+ -+ failed: -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: failed\n"); -+ -+ /* free of any pages we've already allocated/referenced */ -+ while (pgoff-- > vma->vm_pgoff) -+ mem_droppage (pr, pgoff, 0); -+ -+ return (-ENOMEM); -+} -+ -+#if !defined(NO_PTRACK) && defined(IOPROC_PATCH_APPLIED) -+ -+#include -+ -+/* -+ * /dev/elan3/userX - control device -+ * -+ * "user_private" can be referenced from a number of places -+ * 1) the "file" structure. -+ * 2) the "mm" ioproc ops -+ * 3) the "mmap" of the command port. -+ * -+ */ -+typedef struct user_private -+{ -+ spinlock_t pr_lock; -+ atomic_t pr_mappings; -+ atomic_t pr_ref; -+ ELAN3_CTXT *pr_ctxt; -+ struct mm_struct *pr_mm; -+ struct ioproc_ops pr_ioproc; -+} USER_PRIVATE; -+ -+static void -+user_free (USER_PRIVATE *pr) -+{ -+ /* Have to unreserve the FlagPage or else we leak memory like a sieve! */ -+ ClearPageReserved(pte_page(*find_pte_kernel((unsigned long) pr->pr_ctxt->FlagPage))); -+ -+ elan3_detach(pr->pr_ctxt); -+ elan3_free (pr->pr_ctxt); -+ -+ KMEM_FREE (pr, sizeof(USER_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+static void -+user_ioproc_release (void *arg, struct mm_struct *mm) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF3 (pr->pr_ctxt, DBG_SEG, "user_ioproc_release: ctxt=%p pr=%p ref=%d\n", -+ pr->pr_ctxt, pr, atomic_read (&pr->pr_ref)); -+ -+ elan3mmu_pte_ctxt_unload (pr->pr_ctxt->Elan3mmu); -+ -+ pr->pr_mm = NULL; -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+} -+ -+/* -+ * On 2.4 kernels we get passed a mm_struct, whereas on 2.6 kernels -+ * we get the vma which is more usefull -+ */ -+#if defined(IOPROC_MM_STRUCT_ARG) -+static void -+user_ioproc_sync_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ ASSERT(start <= end && ((start & PAGEOFFSET) == 0) && ((end & PAGEOFFSET) == 0)); -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_update_range: start=%lx end=%lx\n", start, end); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu, mm,(caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+#else -+ -+static void -+user_ioproc_sync_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ ASSERT(start <= end && ((start & PAGEOFFSET) == 0) && ((end & PAGEOFFSET) == 0)); -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_update_range: start=%lx end=%lx\n", start, end); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+#endif /* defined(IOPROC_NO_VMA_RANGE) */ -+ -+static void -+user_ioproc_sync_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_ioproc_sync_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+static void -+user_ioproc_invalidate_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_ioproc_invalidate_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+static void -+user_ioproc_update_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_ioproc_update_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu,vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+int -+user_ptrack_handler (void *arg, int phase, struct task_struct *child) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ -+ PRINTF5 (pr->pr_ctxt, DBG_FN, "user_ptrack_handler: ctxt=%p pr=%p ref=%d phase %d mm->ref %d\n", -+ pr->pr_ctxt, pr, atomic_read (&pr->pr_ref), phase, atomic_read (¤t->mm->mm_count)); -+ -+ if (phase == PTRACK_PHASE_EXIT) -+ { -+ /* this will force the helper thread to exit */ -+ elan3_swapout (ctxt, CTXT_EXITING); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+ } -+ return PTRACK_FINISHED; -+} -+ -+static int -+user_open (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ USER_PRIVATE *pr; -+ ELAN3_CTXT *ctxt; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC(pr, USER_PRIVATE *, sizeof (USER_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ if ((ctxt = elan3_alloc (dev, 0)) == NULL) -+ { -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ if (sys_init (ctxt) == NULL) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ /* initialise refcnt to 3 - one for "file", one for XA handler, one for the ioproc ops */ -+ atomic_set (&pr->pr_ref, 3); -+ -+ atomic_set (&pr->pr_mappings, 0); -+ spin_lock_init (&pr->pr_lock); -+ -+ pr->pr_ctxt = ctxt; -+ pr->pr_mm = current->mm; -+ -+ /* register an ptrack handler to force the helper thread to exit when we do */ -+ if (ptrack_register (user_ptrack_handler, pr) < 0) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ /* register a ioproc callback to notify us of translation changes */ -+ -+ pr->pr_ioproc.arg = (void *) pr; -+ pr->pr_ioproc.release = user_ioproc_release; -+ pr->pr_ioproc.sync_range = user_ioproc_sync_range; -+ pr->pr_ioproc.invalidate_range = user_ioproc_invalidate_range; -+ pr->pr_ioproc.update_range = user_ioproc_update_range; -+ pr->pr_ioproc.change_protection = user_ioproc_change_protection; -+ pr->pr_ioproc.sync_page = user_ioproc_sync_page; -+ pr->pr_ioproc.invalidate_page = user_ioproc_invalidate_page; -+ pr->pr_ioproc.update_page = user_ioproc_update_page; -+ -+ spin_lock (¤t->mm->page_table_lock); -+ ioproc_register_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ file->private_data = (void *) pr; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_FN, "user_open: done ctxt=%p pr=%p\n", ctxt, pr); -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+user_release (struct inode *inode, struct file *file) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ -+ PRINTF3 (pr->pr_ctxt, DBG_FN, "user_release: ctxt=%p pr=%p ref=%d\n", pr->pr_ctxt, pr, -+ atomic_read (&pr->pr_ref)); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+ -+ return (0); -+} -+ -+static int -+user_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ int res = 0; -+ -+ if (current->mm != pr->pr_mm) -+ return (-EINVAL); -+ -+ PRINTF4 (ctxt, DBG_FN, "user_ioctl: ctxt=%p cmd=%x(%d) arg=%lx\n", ctxt, cmd, _IOC_NR(cmd), arg); -+ -+ switch (cmd) -+ { -+ case ELAN3IO_FREE: -+ if (atomic_read (&pr->pr_mappings) > 0) -+ return (-EINVAL); -+ -+ spin_lock (¤t->mm->page_table_lock); -+ if (pr->pr_mm != current->mm) -+ spin_unlock (¤t->mm->page_table_lock); -+ else -+ { -+ ioproc_unregister_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ user_ioproc_release (pr, current->mm); -+ } -+ -+ if (ptrack_registered (user_ptrack_handler, pr)) -+ { -+ ptrack_deregister (user_ptrack_handler, pr); -+ user_ptrack_handler (pr, PTRACK_PHASE_EXIT, NULL); -+ } -+ break; -+ -+ case ELAN3IO_ATTACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ KMEM_ALLOC(cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), TRUE); -+ -+ if (cap == NULL) -+ return (set_errno (EFAULT)); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = EFAULT; -+ else -+ { -+ if ((res = elan3_attach (ctxt, cap)) == 0) -+ { -+ if (copy_to_user ((void *) arg, cap, sizeof (ELAN_CAPABILITY))) -+ { -+ elan3_detach (ctxt); -+ res = EFAULT; -+ } -+ } -+ } -+ KMEM_FREE (cap, sizeof(ELAN_CAPABILITY)); -+ break; -+ } -+ -+ case ELAN3IO_DETACH: -+ spin_lock (&pr->pr_lock); -+ if (atomic_read (&pr->pr_mappings) > 0) -+ res = EINVAL; -+ else -+ elan3_detach (ctxt); -+ spin_unlock (&pr->pr_lock); -+ break; -+ -+ case ELAN3IO_ADDVP: -+ { -+ ELAN3IO_ADDVP_STRUCT *args; -+ -+ KMEM_ALLOC(args, ELAN3IO_ADDVP_STRUCT *, sizeof (ELAN3IO_ADDVP_STRUCT), TRUE); -+ -+ if (args == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN3IO_ADDVP_STRUCT))) -+ res = EFAULT; -+ else -+ { -+ if ( (res=elan3_addvp (ctxt, args->process, &args->capability)) != 0) -+ PRINTF0 (ctxt, DBG_FN, "ELAN3IO_ADDVP elan3_addvp failed \n"); -+ } -+ -+ KMEM_FREE (args, sizeof (ELAN3IO_ADDVP_STRUCT)); -+ break; -+ } -+ -+ case ELAN3IO_REMOVEVP: -+ res = elan3_removevp (ctxt, arg); -+ break; -+ -+ case ELAN3IO_BCASTVP: -+ { -+ ELAN3IO_BCASTVP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_BCASTVP_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_addbcastvp (ctxt, args.process, args.lowvp, args.highvp); -+ break; -+ } -+ -+ case ELAN3IO_LOAD_ROUTE: -+ { -+ ELAN3IO_LOAD_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_load_route (ctxt, args.process, args.flits); -+ break; -+ } -+ -+ case ELAN3IO_CHECK_ROUTE: -+ { -+ ELAN3IO_CHECK_ROUTE_STRUCT args; -+ -+ args.routeError = 0; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = elan3_check_route (ctxt, args.process, args.flits, & args.routeError)) == ESUCCESS) -+ { -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ } -+ break; -+ } -+ -+ case ELAN3IO_PROCESS_2_LOCATION: -+ { -+ ELAN3IO_PROCESS_2_LOCATION_STRUCT args; -+ ELAN_LOCATION loc; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_PROCESS_2_LOCATION_STRUCT))) -+ return (-EFAULT); -+ -+ krwlock_write (&ctxt->VpLock); -+ loc = ProcessToLocation (ctxt, NULL, args.process , NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ args.loc = loc; -+ -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_PROCESS_2_LOCATION_STRUCT))) -+ return (-EFAULT); -+ -+ break; -+ } -+ -+ case ELAN3IO_GET_ROUTE: -+ { -+ ELAN3IO_GET_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = elan3_get_route (ctxt, args.process, args.flits)) == ESUCCESS) -+ { -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_GET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ } -+ break; -+ } -+ -+ case ELAN3IO_RESET_ROUTE: -+ { -+ ELAN3IO_RESET_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_RESET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_reset_route (ctxt, args.process); -+ break; -+ } -+ -+ case ELAN3IO_VP2NODEID: -+ { -+ ELAN3IO_VP2NODEID_STRUCT *vp2nodeId; -+ ELAN_LOCATION location; -+ -+ KMEM_ALLOC (vp2nodeId, ELAN3IO_VP2NODEID_STRUCT *, sizeof(ELAN3IO_VP2NODEID_STRUCT), TRUE); -+ if (vp2nodeId == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (vp2nodeId, (void *) arg, sizeof (ELAN3IO_VP2NODEID_STRUCT))) { -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ krwlock_write (&ctxt->VpLock); -+ location = ProcessToLocation (ctxt, NULL, vp2nodeId->process , NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ vp2nodeId->nodeId = location.loc_node; -+ if (copy_to_user ( (void *) arg, vp2nodeId, sizeof (ELAN3IO_VP2NODEID_STRUCT))) { -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ -+ break; -+ } -+ -+ case ELAN3IO_PROCESS: -+ return (elan3_process (ctxt)); -+ -+ case ELAN3IO_SETPERM: -+ { -+ ELAN3IO_SETPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SETPERM_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3mmu_setperm (ctxt->Elan3mmu, args.maddr, args.eaddr, args.len, args.perm); -+ break; -+ } -+ -+ case ELAN3IO_CLEARPERM: -+ { -+ ELAN3IO_CLEARPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_CLEARPERM_STRUCT))) -+ return (-EFAULT); -+ -+ elan3mmu_clrperm (ctxt->Elan3mmu, args.eaddr, args.len); -+ break; -+ } -+ -+ case ELAN3IO_CHANGEPERM: -+ { -+ ELAN3IO_CHANGEPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_CHANGEPERM_STRUCT))) -+ return (-EFAULT); -+ -+ res = EINVAL; -+ break; -+ } -+ -+ case ELAN3IO_HELPER_THREAD: -+ res = elan3_lwp (ctxt); -+ break; -+ -+ case ELAN3IO_WAITCOMMAND: -+ res = WaitForCommandPort (ctxt); -+ break; -+ -+ case ELAN3IO_BLOCK_INPUTTER: -+ elan3_block_inputter (ctxt, arg); -+ break; -+ -+ case ELAN3IO_SET_FLAGS: -+ sctx->Flags = arg; -+ break; -+ -+ case ELAN3IO_SET_SIGNAL: -+ sctx->signal = arg; -+ break; -+ -+ case ELAN3IO_WAITEVENT: -+ res = sys_waitevent (ctxt, (E3_Event *) arg); -+ break; -+ -+ case ELAN3IO_ALLOC_EVENTCOOKIE: -+ res = cookie_alloc_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_FREE_EVENTCOOKIE: -+ res = cookie_free_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_ARM_EVENTCOOKIE: -+ res = cookie_arm_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_WAIT_EVENTCOOKIE: -+ res = cookie_wait_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_SWAPSPACE: -+ if (fuword (&((SYS_SWAP_SPACE *) arg)->Magic) != SYS_SWAP_MAGIC) -+ return (set_errno (EINVAL)); -+ -+ ((SYS_CTXT *) ctxt->Private)->Swap = (SYS_SWAP_SPACE *) arg; -+ break; -+ -+ case ELAN3IO_EXCEPTION_SPACE: -+ if (fuword (&((SYS_EXCEPTION_SPACE *) arg)->Magic) != SYS_EXCEPTION_MAGIC) -+ return (set_errno (EINVAL)); -+ -+ ((SYS_CTXT *) ctxt->Private)->Exceptions = (SYS_EXCEPTION_SPACE *) arg; -+ break; -+ -+ case ELAN3IO_GET_EXCEPTION: -+ { -+ SYS_EXCEPTION *exception; -+ -+ if (((SYS_CTXT *) ctxt->Private)->Exceptions == NULL) -+ return (set_errno (EINVAL)); -+ -+ KMEM_ALLOC(exception, SYS_EXCEPTION *, sizeof (SYS_EXCEPTION), TRUE); -+ -+ if (exception == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if ((res = sys_getException (((SYS_CTXT *) ctxt->Private), exception)) == 0 && -+ copy_to_user ((void *) arg, exception, sizeof (SYS_EXCEPTION))) -+ res = EFAULT; -+ -+ KMEM_FREE (exception, sizeof (SYS_EXCEPTION)); -+ break; -+ } -+ -+ case ELAN3IO_UNLOAD: -+ { -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3IO_UNLOAD_STRUCT args; -+ int span; -+ unsigned long flags; -+ E3_Addr eaddr; -+ caddr_t addr; -+ size_t len; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_UNLOAD_STRUCT))) -+ return (-EFAULT); -+ -+ addr = (caddr_t) args.addr; -+ len = args.len; -+ -+ if (((unsigned long) addr & PAGEMASK) || (len & PAGEMASK) || (len < 0)) -+ return -EINVAL; -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_lock, flags); -+ for (; len; len -= span, addr += span) -+ { -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < addr) -+ span = len; -+ else if (rgn->rgn_mbase > addr) -+ span = MIN(len, rgn->rgn_mbase - addr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - addr); -+ eaddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ elan3mmu_unload (elan3mmu, eaddr, span, PTE_UNLOAD); -+ } -+ } -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_lock, flags); -+ -+ return 0; -+ } -+ -+ case ELAN3IO_GET_DEVINFO: -+ { -+ ELAN3IO_GET_DEVINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_DEVINFO_STRUCT))) -+ return (-EFAULT); -+ -+ if (copy_to_user ((void *) args.devinfo, &ctxt->Device->Devinfo, sizeof (ELAN_DEVINFO))) -+ res = EFAULT; -+ break; -+ } -+ -+ case ELAN3IO_GET_POSITION: -+ { -+ ELAN3IO_GET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ if (copy_to_user ((void *) args.position, &ctxt->Device->Position, sizeof (ELAN_POSITION))) -+ res = EFAULT; -+ break; -+ } -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ return (res ? set_errno (res) : 0); -+} -+ -+static void user_vma_open(struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ if (vma->vm_pgoff == ELAN3IO_OFF_COMMAND_PAGE) -+ if (atomic_dec_and_test (&pr->pr_mappings)) -+ pr->pr_ctxt->CommandPageMapping = NULL; -+} -+ -+static void user_vma_close(struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ if (vma->vm_pgoff == ELAN3IO_OFF_COMMAND_PAGE) -+ atomic_inc (&pr->pr_mappings); -+} -+ -+static struct vm_operations_struct user_vm_ops = { -+ open: user_vma_open, -+ close: user_vma_close, -+}; -+ -+static int -+user_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ ioaddr_t ioaddr; -+ -+ /* -+ * NOTE - since we need to maintain the reference count on -+ * the user_private we only permit single page -+ * mmaps - this means that we will certainly see -+ * the correct number of closes to maintain the -+ * the reference count correctly. -+ */ -+ -+ if ((vma->vm_end - vma->vm_start) != PAGE_SIZE) -+ return (-EINVAL); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: vm_mm=%p start=%lx end=%lx pgoff=%lx flags=%lx prot=%llx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_flags, (long long)vma->vm_page_prot.pgprot, vma->vm_file); -+ -+ switch (vma->vm_pgoff) -+ { -+ default: -+ return (-EINVAL); -+ -+ case ELAN3IO_OFF_COMMAND_PAGE: -+ spin_lock (&pr->pr_lock); -+ if (ctxt->CommandPage == (ioaddr_t) 0 || atomic_read (&pr->pr_mappings) != 0) -+ { -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: command port - %s\n", ctxt->CommandPort ? "already mapped" : "not attached"); -+ spin_unlock (&pr->pr_lock); -+ return (-EINVAL); -+ } -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: commandport at %lx phys %llx prot %llx\n", -+ vma->vm_start, (unsigned long long) kmem_to_phys ((void *) ctxt->CommandPort), (long long)vma->vm_page_prot.pgprot); -+ -+ /* Don't try to swap out physical pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump addresses that are not real memory to a core file. -+ */ -+ vma->vm_flags |= VM_IO; -+ -+ if (__remap_page_range(vma->vm_start, kmem_to_phys ((void *) ctxt->CommandPage), vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ { -+ spin_unlock (&pr->pr_lock); -+ return (-EAGAIN); -+ } -+ ctxt->CommandPageMapping = (void *) vma->vm_start; -+ -+ atomic_inc (&pr->pr_mappings); -+ -+ spin_unlock (&pr->pr_lock); -+ break; -+ -+ case ELAN3IO_OFF_UREG_PAGE: -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ ioaddr = ctxt->Device->RegPtr + (offsetof (E3_Regs, URegs) & PAGEMASK); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: user_regs at %lx phys %llx prot %llx\n", vma->vm_start, -+ (unsigned long long) kmem_to_phys ((void *) ioaddr), (long long)vma->vm_page_prot.pgprot); -+ -+ /* Don't try to swap out physical pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump addresses that are not real memory to a core file. -+ */ -+ vma->vm_flags |= VM_IO; -+ if (__remap_page_range (vma->vm_start, kmem_to_phys ((void *) ioaddr), -+ vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ return (-EAGAIN); -+ break; -+ -+ case ELAN3IO_OFF_FLAG_PAGE: -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: flag page at %lx phys %llx\n", vma->vm_start, -+ (unsigned long long) kmem_to_phys ((void *) ctxt->FlagPage)); -+ -+ /* we do not want to have this area swapped out, lock it */ -+ vma->vm_flags |= VM_LOCKED; -+ -+ /* Mark the page as reserved or else the remap_page_range() doesn't remap it */ -+ SetPageReserved(pte_page(*find_pte_kernel((unsigned long) ctxt->FlagPage))); -+ -+ if (__remap_page_range (vma->vm_start, kmem_to_phys ((void *) ctxt->FlagPage), -+ vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ return (-EAGAIN); -+ break; -+ } -+ -+ ASSERT (vma->vm_ops == NULL); -+ -+ vma->vm_ops = &user_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+} -+ -+#else /* defined(NO_PTRACK) || !defined(IOPROC_PATCH_APPLIED) */ -+ -+static int -+user_open (struct inode *inode, struct file *file) -+{ -+ return -ENXIO; -+} -+ -+static int -+user_release (struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static int -+user_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ return 0; -+} -+ -+static int -+user_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ return 0; -+} -+#endif -+ -+/* driver entry points */ -+static int -+elan3_open (struct inode *inode, struct file *file) -+{ -+ if (elan3_devices[ELAN3_DEVICE(inode)] == NULL) -+ return (-ENXIO); -+ -+ PRINTF (DBG_DEVICE, DBG_FN, "elan3_open: device %d minor %d file=%p\n", ELAN3_DEVICE(inode), ELAN3_MINOR(inode), file); -+ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_open (inode, file)); -+ case ELAN3_MINOR_MEM: -+ return (mem_open (inode, file)); -+ case ELAN3_MINOR_USER: -+ return (user_open (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan3_release (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_DEVICE, DBG_FN, "elan3_release: device %d minor %d file=%p\n", ELAN3_DEVICE(inode), ELAN3_MINOR(inode), file); -+ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_release (inode, file)); -+ case ELAN3_MINOR_MEM: -+ return (mem_release (inode, file)); -+ case ELAN3_MINOR_USER: -+ return (user_release (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan3_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_ioctl (inode, file, cmd, arg)); -+ case ELAN3_MINOR_MEM: -+ return (mem_ioctl (inode, file, cmd, arg)); -+ case ELAN3_MINOR_USER: -+ return (user_ioctl (inode, file, cmd, arg)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+ -+static int -+elan3_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ PRINTF (DBG_DEVICE, DBG_SEG, "elan3_mmap: instance %d minor %d start=%lx end=%lx pgoff=%lx flags=%lx prot=%llx\n", -+ ELAN3_DEVICE (file->f_dentry->d_inode), ELAN3_MINOR (file->f_dentry->d_inode), -+ vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_flags, (long long)vma->vm_page_prot.pgprot); -+ -+ switch (ELAN3_MINOR (file->f_dentry->d_inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_mmap (file, vma)); -+ case ELAN3_MINOR_MEM: -+ return (mem_mmap (file, vma)); -+ case ELAN3_MINOR_USER: -+ return (user_mmap (file, vma)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static irqreturn_t -+InterruptHandlerWrapper(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ if (InterruptHandler ((ELAN3_DEV *)dev_id) == 0) -+ return IRQ_HANDLED; -+ else -+ return IRQ_NONE; -+} -+ -+ -+/* -+ * Elan specific PCI configuration registers. -+ */ -+ -+#define PCI_CONF_PARITY_PHYS_LO 0x40 -+#define PCI_CONF_PARITY_PHYS_HI 0x44 -+#define PCI_CONF_PARITY_PHASE_ADDR 0x46 -+#define PCI_CONF_PARITY_MASTER_TYPE 0x47 -+#define PCI_CONF_ELAN3_CTRL 0x48 -+ -+#define ECTRL_EXTEND_LATENCY (1 << 0) -+#define ECTRL_ENABLE_PREFETCH (1 << 1) -+#define ECTRL_SOFTWARE_INTERNAL_RESET (1 << 2) -+#define ECTRL_REDUCED_RETRY_RATE (1 << 3) -+#define ECTRL_CLOCK_DIVIDE_RATE_SHIFT 4 -+#define ECTRL_COMMS_DIVIDE_RATE_SHIFT 10 -+#define ECTRL_FORCE_COMMSCLK_LOCAL (1 << 14) -+ -+/* -+ * Configure PCI. -+ */ -+static int -+ConfigurePci(ELAN3_DEV *dev) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ u32 rom_address; -+ -+ if (pci_enable_device(pci)) -+ return (ENXIO); -+ -+ /* disable ROM */ -+ pci_read_config_dword(pci, PCI_ROM_ADDRESS, &rom_address); -+ rom_address &= ~PCI_ROM_ADDRESS_ENABLE; -+ pci_write_config_dword(pci, PCI_ROM_ADDRESS, rom_address); -+ mb(); -+ -+ /* this is in 32-bit WORDS */ -+ pci_write_config_byte(pci, PCI_CACHE_LINE_SIZE, (64 >> 2)); -+ mb(); -+ -+ /* allow 40 ticks to respond, 16 data phases */ -+ pci_write_config_byte(pci, PCI_LATENCY_TIMER, 255); -+ mb(); -+ -+ /* don't enable PCI_COMMAND_SERR--see note in elandev_dunix.c */ -+ pci_write_config_word(pci, PCI_COMMAND, PCI_COMMAND_MEMORY -+ | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY); -+ mb(); -+ -+ return ESUCCESS; -+} -+ -+/* -+ * Reset chip to a known state. -+ */ -+static int -+ResetElan(ELAN3_DEV *dev, ioaddr_t intPalAddr) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ int instance = dev->Instance; -+ u32 val; -+ u8 revid; -+ int CasLatency; -+ int res; -+ -+ /* determine rev of board */ -+ pci_read_config_byte(pci, PCI_REVISION_ID, &revid); -+ -+ /* GNAT 2328 - don't set ECTRL_ENABLE_PREFETCH on Elan rev A */ -+ val = ECTRL_EXTEND_LATENCY | (39 << ECTRL_CLOCK_DIVIDE_RATE_SHIFT) -+ | (6 << ECTRL_COMMS_DIVIDE_RATE_SHIFT); -+ switch (revid) -+ { -+ case PCI_REVISION_ID_ELAN3_REVA: -+ printk("elan%d: is an elan3 (revision a) - not supported\n", instance); -+ return (EFAIL); -+ -+ case PCI_REVISION_ID_ELAN3_REVB: -+ val |= ECTRL_ENABLE_PREFETCH; -+ if (BackToBackMaster) -+ val |= ECTRL_FORCE_COMMSCLK_LOCAL; -+ printk("elan%d: is an elan3 (revision b)\n", instance); -+ break; -+ default: -+ printk("elan%d: unsupported elan3 revision %d\n", -+ instance, revid); -+ return EFAIL; -+ } -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, val); -+ mb(); -+ -+ /* -+ * GNAT: 2474 -+ * Hit reset on the Elan, then we MUST initialise the schedule status -+ * register to drive reset on the link before the link can come out -+ * of reset (15 uS). We need to keep it like this until we've -+ * initialised SDRAM -+ */ -+ pci_read_config_dword(pci, PCI_CONF_ELAN3_CTRL, &val); -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, -+ val | ECTRL_SOFTWARE_INTERNAL_RESET); -+ mb(); -+ -+ /* Read the Vital Product Data to determine the cas latency */ -+ if ((res = ReadVitalProductData (dev, &CasLatency)) != ESUCCESS) -+ return (res); -+ -+ /* -+ * Now clear the Software internal reset bit, and start the sdram -+ */ -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, val); -+ mb(); -+ -+ /* -+ * Enable SDRAM before sizing and initalising it for ECC. -+ * NOTE: don't enable all sets of the cache (yet), nor ECC -+ */ -+ dev->Cache_Control_Reg = (CasLatency | REFRESH_RATE_16US); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, (dev->Cache_Control_Reg | SETUP_SDRAM)); -+ mb(); -+ -+ INIT_SCHED_STATUS(dev, Sched_Initial_Value); -+ -+ /* -+ * Set the interrupt mask to 0 and enable the interrupt PAL -+ * by writing any value to it. -+ */ -+ SET_INT_MASK (dev, 0); -+ writeb (0, (void *) intPalAddr); -+ -+ return ESUCCESS; -+} -+ -+/* -+ * Determine the size of elan PCI address spaces. EFAIL is returned if -+ * unused or invalid BAR is specified, or if board reports I/O mapped space. -+ */ -+int -+DeviceRegisterSize(ELAN3_DEV *dev, int rnumber, int *sizep) -+{ -+ struct pci_dev *pdev = dev->Osdep.pci; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ *sizep = pci_resource_size(pdev, rnumber); -+#else -+ *sizep = pci_resource_end(pdev, rnumber) - pci_resource_start(pdev, rnumber) + 1; -+#endif -+ return ESUCCESS; -+} -+ -+/* -+ * Map PCI memory into kernel virtual address space. On the alpha, -+ * we just return appropriate kseg address, and Unmap is a no-op. -+ */ -+int -+MapDeviceRegister(ELAN3_DEV *dev, int rnumber, ioaddr_t *addrp, -+ int off, int len, DeviceMappingHandle *handlep) -+{ -+ struct pci_dev *pdev = dev->Osdep.pci; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ u64 base = pci_get_base_address(pdev, rnumber); -+ *addrp = (ioaddr_t) pci_base_to_kseg(base + off, pdev->bus->number); -+ -+#else -+ if (len == 0) -+ len = pci_resource_end(pdev, rnumber) - pci_resource_start(pdev, rnumber) + 1; -+ -+ if (len == 0) -+ return (EINVAL); -+ -+ *addrp = (ioaddr_t) ioremap_nocache (pci_resource_start(pdev, rnumber) + off, len); -+#endif -+ -+ *handlep = (void *) *addrp; -+ -+ return (*addrp ? ESUCCESS : ENOMEM); -+} -+void -+UnmapDeviceRegister(ELAN3_DEV *dev, DeviceMappingHandle *handlep) -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+ iounmap (*handlep); -+#endif -+} -+ -+void -+ElanBusError (ELAN3_DEV *dev) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ u8 phaseaddr, type; -+ u16 status, cmd, physhi; -+ u32 physlo; -+ -+ printk("elan%d: bus error occured\n", dev->Instance); -+ -+ pci_read_config_word (pci, PCI_STATUS, &status); -+ pci_read_config_word (pci, PCI_COMMAND, &cmd); -+ pci_read_config_dword(pci, PCI_CONF_PARITY_PHYS_LO, &physlo); -+ pci_read_config_word (pci, PCI_CONF_PARITY_PHYS_HI, &physhi); -+ pci_read_config_byte (pci, PCI_CONF_PARITY_PHASE_ADDR, &phaseaddr); -+ pci_read_config_byte (pci, PCI_CONF_PARITY_MASTER_TYPE, &type); -+ -+#define PCI_CONF_STAT_FORMAT "\20" \ -+ "\6SIXTY_SIX_MHZ\7UDF\10FAST_BACK\11PARITY" \ -+ "\14SIG_TARGET_ABORT\15REC_TARGET_ABORT\16REC_MASTER_ABORT" \ -+ "\17SIG_SYSTEM_ERROR\20DETECTED_PARITY" -+ -+ printk ("elan%d: status %x cmd %4x physaddr %04x%08x phase %x type %x\n", -+ dev->Instance, status, cmd, physhi, physlo, phaseaddr, type); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/elansyscall.c linux-2.6.9/drivers/net/qsnet/elan3/elansyscall.c ---- clean/drivers/net/qsnet/elan3/elansyscall.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/elansyscall.c 2004-11-01 13:01:51.000000000 -0500 -@@ -0,0 +1,1230 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elansyscall.c,v 1.100 2004/11/01 18:01:51 robin Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elansyscall.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static int sys_exception (ELAN3_CTXT *ctxt, int type, int proc, void *trap, va_list ap); -+static int sys_getWordItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep); -+static int sys_getBlockItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep); -+static void sys_putWordItem (ELAN3_CTXT *ctxt, int list, E3_uint32 value); -+static void sys_putBlockItem (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr); -+static void sys_putbackItem (ELAN3_CTXT *ctxt, int list, void *item); -+static void sys_freeWordItem (ELAN3_CTXT *ctxt, void *item); -+static void sys_freeBlockItem (ELAN3_CTXT *ctxt, void *item); -+static int sys_countItems (ELAN3_CTXT *ctxt, int list); -+static int sys_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+static void sys_swapin (ELAN3_CTXT *ctxt); -+static void sys_swapout (ELAN3_CTXT *ctxt); -+static void sys_freePrivate (ELAN3_CTXT *ctxt); -+static int sys_fixupNetworkError (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef); -+static int sys_startFaultCheck (ELAN3_CTXT *ctxt); -+static void sys_endFaultCheck (ELAN3_CTXT *ctxt); -+static E3_uint8 sys_load8 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+static E3_uint16 sys_load16 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+static E3_uint32 sys_load32 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+static E3_uint64 sys_load64 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+static ELAN3_OPS elan3_sys_ops = { -+ ELAN3_OPS_VERSION, /* Version */ -+ -+ sys_exception, /* Exception */ -+ sys_getWordItem, /* GetWordItem */ -+ sys_getBlockItem, /* GetBlockItem */ -+ sys_putWordItem, /* PutWordItem */ -+ sys_putBlockItem, /* PutBlockItem */ -+ sys_putbackItem, /* PutbackItem */ -+ sys_freeWordItem, /* FreeWordItem */ -+ sys_freeBlockItem, /* FreeBlockItem */ -+ sys_countItems, /* CountItems */ -+ sys_event, /* Event */ -+ sys_swapin, /* Swapin */ -+ sys_swapout, /* Swapout */ -+ sys_freePrivate, /* FreePrivate */ -+ sys_fixupNetworkError, /* FixupNetworkError */ -+ NULL, /* DProcTrap */ -+ NULL, /* TProcTrap */ -+ NULL, /* IProcTrap */ -+ NULL, /* CProcTrap */ -+ NULL, /* CProcReissue */ -+ sys_startFaultCheck, /* StartFaultCheck */ -+ sys_endFaultCheck, /* EndFaultCheck */ -+ sys_load8, /* Load8 */ -+ sys_store8, /* Store8 */ -+ sys_load16, /* Load16 */ -+ sys_store16, /* Store16 */ -+ sys_load32, /* Load32 */ -+ sys_store32, /* Store32 */ -+ sys_load64, /* Load64 */ -+ sys_store64 /* Store64 */ -+}; -+ -+va_list null_valist; -+ -+SYS_CTXT * -+sys_init (ELAN3_CTXT *ctxt) -+{ -+ SYS_CTXT *sctx; -+ -+ /* Allocate and initialise the context private data */ -+ KMEM_ZALLOC (sctx, SYS_CTXT *, sizeof (SYS_CTXT), TRUE); -+ -+ if (sctx == NULL) -+ return ((SYS_CTXT *) NULL); -+ -+ sctx->Swap = NULL; -+ sctx->Armed = 0; -+ sctx->Backoff = 1; -+ sctx->Table = cookie_alloc_table ((unsigned long) ELAN3_MY_TASK_HANDLE(), 0); -+ sctx->signal = SIGSEGV; -+ -+ if (sctx->Table == NULL) -+ { -+ KMEM_FREE (sctx, sizeof (SYS_CTXT)); -+ return ((SYS_CTXT *) NULL); -+ } -+ -+ kmutex_init (&sctx->Lock); -+ spin_lock_init (&sctx->WaitLock); -+ kcondvar_init (&sctx->NetworkErrorWait); -+ -+ /* Install my context operations and private data */ -+ ctxt->Operations = &elan3_sys_ops; -+ ctxt->Private = (void *) sctx; -+ -+ return (sctx); -+} -+ -+/* returns -ve on error or ELAN_CAP_OK or ELAN_CAP_RMS */ -+/* use = ELAN_USER_ATTACH, ELAN_USER_P2P, ELAN_USER_BROADCAST */ -+int -+elan3_validate_cap(ELAN3_DEV *dev, ELAN_CAPABILITY *cap ,int use) -+{ -+ /* Don't allow a user process to attach to system context */ -+ if (ELAN3_SYSTEM_CONTEXT (cap->cap_lowcontext) || ELAN3_SYSTEM_CONTEXT (cap->cap_highcontext) -+ || cap->cap_highcontext <= ELAN_USER_BASE_CONTEXT_NUM || cap->cap_highcontext <= ELAN_USER_BASE_CONTEXT_NUM) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: lctx %x hctx %x \n",cap->cap_lowcontext, cap->cap_highcontext); -+ PRINTF3 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: bit %x low %x high %x\n", ((cap->cap_lowcontext) & SYS_CONTEXT_BIT), -+ E3_NUM_CONTEXT_0, ELAN3_KCOMM_BASE_CONTEXT_NUM); -+ -+ -+ PRINTF0 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: user process cant attach to system cap\n"); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_type & ELAN_CAP_TYPE_HWTEST) -+ { -+ if (!(cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP)) /* cant have a bit map */ -+ { -+ PRINTF0 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST must have ELAN_CAP_TYPE_NO_BITMAP\n"); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext != cap->cap_highcontext) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST (cap->cap_lowcontext != cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext) ; -+ return (-EINVAL); -+ } -+ -+ if ( ! (ELAN3_HWTEST_CONTEXT(cap->cap_lowcontext) && ELAN3_HWTEST_CONTEXT(cap->cap_highcontext))) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST HWTEST_BASE_CONTEXT %d %d %d \n" , ELAN3_HWTEST_BASE_CONTEXT_NUM,cap->cap_lowcontext ,ELAN3_HWTEST_TOP_CONTEXT_NUM); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lownode != ELAN_CAP_UNINITIALISED || cap->cap_highnode != ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST nodes != ELAN_CAP_UNINITIALISED\n"); -+ return (-EINVAL); -+ } -+ -+ return ELAN_CAP_OK; -+ } -+ -+ return elanmod_classify_cap(&dev->Position, cap, use); -+} -+ -+int -+sys_waitevent (ELAN3_CTXT *ctxt, E3_Event *event) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ EVENT_COOKIE cookie; -+ -+ if (ctxt->Device->Devinfo.dev_revision_id == PCI_REVISION_ID_ELAN3_REVA) -+ return (EINVAL); -+ -+ cookie = fuword ((int *) &event->ev_Type) & ~(EV_TYPE_MASK_EVIRQ | EV_TYPE_MASK_BCOPY); -+ -+ if (cookie_alloc_cookie (sctx->Table, cookie) != ESUCCESS) -+ return (EINVAL); -+ -+ cookie_arm_cookie (sctx->Table, cookie); -+ -+ if (fuword ((int *) &event->ev_Count) > 0) -+ cookie_wait_cookie (sctx->Table, cookie); -+ -+ cookie_free_cookie (sctx->Table, cookie); -+ -+ return (ESUCCESS); -+} -+ -+static void * -+sys_getItem (SYS_SWAP_SPACE *sp, int list) -+{ -+ void *itemp = (void *) fuptr_noerr ((void **) &sp->ItemListsHead[list]); -+ void *next; -+ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_getItem: sp=%p list=%d head=%p itemp=%p\n", -+ sp, list, &sp->ItemListsHead[list], itemp); -+ -+ if (itemp == NULL) -+ return (NULL); -+ -+ next = (void *) fuptr_noerr ((void *) itemp); -+ -+ suptr_noerr ((void *) &sp->ItemListsHead[list], (void *) next); -+ if (next == NULL) -+ suptr_noerr ((void *) &sp->ItemListsTailp[list], (void *)&sp->ItemListsHead[list]); -+ return (itemp); -+} -+ -+static void -+sys_putItemBack (SYS_SWAP_SPACE *sp, int list, void *itemp) -+{ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_putItemBack: sp=%p list=%d itemp=%p value=%08x\n", -+ sp, list, itemp, fuword_noerr ((int *) &((SYS_WORD_ITEM *) itemp)->Value)); -+ -+ suptr_noerr ((void **) itemp, NULL); /* item->Next = NULL */ -+ suptr_noerr ((void **) fuptr_noerr ((void **) &sp->ItemListsTailp[list]), (void *)itemp); /* *Tailp = item */ -+ suptr_noerr ((void **) &sp->ItemListsTailp[list], (void *) itemp); /* Tailp = &item->Next */ -+} -+ -+static void -+sys_putItemFront (SYS_SWAP_SPACE *sp, int list, void *itemp) -+{ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_putItemFront: sp=%p list=%d itemp=%p value=%08x\n", -+ sp, list, itemp, fuword_noerr ((int *) &((SYS_WORD_ITEM *) itemp)->Value)); -+ -+ suptr_noerr ((void **) itemp, fuptr_noerr ((void **) &sp->ItemListsHead[list])); /* item->Next = Head */ -+ suptr_noerr ((void **) &sp->ItemListsHead[list], (void *) itemp); /* Head = item */ -+ -+ if (fuptr_noerr ((void **) &sp->ItemListsTailp[list]) == (void *) &sp->ItemListsHead[list]) /* if (Tailp == &Head) */ -+ suptr_noerr ((void **) &sp->ItemListsTailp[list], (void *) itemp); /* Tailp = &Item->Next */ -+} -+ -+ -+static int -+sys_getWordItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_WORD_ITEM *item; -+ int res; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ item = (SYS_WORD_ITEM *) sys_getItem (sp, list); -+ -+ if (item == NULL) -+ res = 0; -+ else -+ { -+ if (list == LIST_DMA_PTR) -+ sctx->Armed = TRUE; -+ -+ *itemp = (void *) item; -+ *valuep = (E3_Addr) fuword_noerr ((E3_int32 *) &item->Value); -+ -+ PRINTF3 (ctxt, DBG_SYSCALL, "sys_getWordItem: list=%d -> item=%p value=%08x\n", list, *itemp, *valuep); -+ -+ res = 1; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+static int -+sys_getBlockItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item; -+ int res; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ item = sys_getItem (sp, list); -+ -+ if (item == NULL) -+ res = 0; -+ else -+ { -+ E3_uint32 *dest = fuptr_noerr ((void **) &item->Pointer); -+ -+ if (list == LIST_DMA_DESC) -+ sctx->Armed = TRUE; -+ -+ *itemp = (void *) item; -+ *valuep = elan3mmu_elanaddr (ctxt->Elan3mmu, (caddr_t) dest); -+ -+ PRINTF3 (ctxt, DBG_SYSCALL, "sys_getBlockItem: list=%d -> item=%p addr=%08x\n", list, *itemp, *valuep); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[0]), fuword_noerr ((int *) &dest[1]), -+ fuword_noerr ((int *) &dest[2]), fuword_noerr ((int *) &dest[3])); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[4]), fuword_noerr ((int *) &dest[5]), -+ fuword_noerr ((int *) &dest[6]), fuword_noerr ((int *) &dest[7])); -+ -+ -+ res = 1; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+static void -+sys_putWordItem (ELAN3_CTXT *ctxt, int list, E3_Addr value) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_WORD_ITEM *item; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ PRINTF2 (ctxt,DBG_SYSCALL, "sys_putWordItem: list=%x value=%x\n", list, value); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ item = sys_getItem (sp, LIST_FREE_WORD); -+ -+ PRINTF1 (ctxt, DBG_SYSCALL, "sys_putWordItem: item=%p\n", item); -+ -+ if (item == NULL) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAILED, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putWordItem: storing value=%08x at %p\n", value, &item->Value); -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putWordItem: item=%p value=%08x\n", item, value); -+ -+ suword_noerr ((E3_int32 *) &item->Value, value); /* write "value" into item */ -+ -+ sys_putItemBack (sp, list, item); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_putBlockItem (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item; -+ label_t ljp; -+ E3_uint32 *source; -+ E3_uint32 *dest; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putBlockItem: list=%x ptr=%p\n", list, ptr); -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ item = sys_getItem (sp, LIST_FREE_BLOCK); /* get an item from the freelist. */ -+ -+ if (item == NULL) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAILED, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ /* -+ * The block will have been read using 64 bit reads, since we have -+ * to write it to user memory using 32 bit writes, we need to perform -+ * an endian swap on the Ultrasparc. -+ */ -+ dest = (E3_uint32 *) fuptr_noerr ((void **) &item->Pointer); -+ source = (E3_uint32 *) ptr; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putBlockItem: item=%p dest=%p\n",item, dest); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ source[0^WordEndianFlip], source[1^WordEndianFlip], source[2^WordEndianFlip], source[3^WordEndianFlip]); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ source[4^WordEndianFlip], source[5^WordEndianFlip], source[6^WordEndianFlip], source[7^WordEndianFlip]); -+ -+ suword_noerr ((E3_int32 *) &dest[7], (E3_int32) source[7^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[6], (E3_int32) source[6^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[5], (E3_int32) source[5^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[4], (E3_int32) source[4^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[3], (E3_int32) source[3^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[2], (E3_int32) source[2^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[1], (E3_int32) source[1^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[0], (E3_int32) source[0^WordEndianFlip]); -+ -+ sys_putItemBack (sp, list, item); /* chain onto list of items. */ -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_freeWordItem (ELAN3_CTXT *ctxt, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, LIST_FREE_WORD, (void *) NULL, null_valist); -+ return; -+ } -+ -+ sys_putItemBack (sp, LIST_FREE_WORD, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_freeBlockItem (ELAN3_CTXT *ctxt, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item = (SYS_BLOCK_ITEM *)itemp; -+ E3_uint32 *dest; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, LIST_FREE_BLOCK, (void *) NULL, null_valist); -+ return; -+ } -+#ifdef DEBUG_PRINTF -+ dest = (E3_uint32 *) fuptr_noerr ((void **) &item->Pointer); -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_freeBlockItem: item=%p dest=%p\n", item, dest); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[0]), fuword_noerr ((int *) &dest[1]), -+ fuword_noerr ((int *) &dest[2]), fuword_noerr ((int *) &dest[3])); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[4]), fuword_noerr ((int *) &dest[5]), -+ fuword_noerr ((int *) &dest[6]), fuword_noerr ((int *) &dest[7])); -+#endif -+ -+ sys_putItemBack (sp, LIST_FREE_BLOCK, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_putbackItem (ELAN3_CTXT *ctxt, int list, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ sys_putItemFront (sp, list, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static int -+sys_countItems (ELAN3_CTXT *ctxt, int list) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ int count = 0; -+ void *item; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ for (item = (void *) fuptr_noerr ((void **) &sp->ItemListsHead[list]); -+ item != NULL; -+ item = (void *) fuptr_noerr ((void **) item)) -+ { -+ count++; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (count); -+} -+ -+ -+long sys_longTime; -+long sys_shortTime; -+int sys_waitTicks; -+int sys_maxBackoff; -+ -+#define SYS_LONG_TIME MAX((hz * 5) / 1000, 1) /* 5 ms */ -+#define SYS_SHORT_TIME MAX((hz * 2) / 1000, 1) /* 2 ms */ -+#define SYS_WAIT_TICKS MAX((hz * 1) / 1000, 1) /* 1 ms - backoff granularity */ -+#define SYS_MAX_BACKOFF MAX((hz * 5) / 1000, 1) /* 5 ms - max backoff for "nacked" packets*/ -+#define SYS_TIMEOUT_BACKOFF MAX((hz * 10) / 1000, 1) /* 10 ms - backoff for output timeout (point to point) */ -+#define SYS_BCAST_BACKOFF MAX((hz * 50) / 1000, 1) /* 50 ms - backoff for output timeout (broadcast) */ -+#define SYS_NETERR_BACKOFF MAX((hz * 10) / 1000, 1) /* 10 ms - delay for network error in dma data */ -+ -+static void -+sys_backoffWait (ELAN3_CTXT *ctxt, int ticks) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ long t; -+ -+ spin_lock (&sctx->WaitLock); -+ -+ t = lbolt - sctx->Time; -+ -+ if (sys_longTime == 0) sys_longTime = SYS_LONG_TIME; -+ if (sys_shortTime == 0) sys_shortTime = SYS_SHORT_TIME; -+ if (sys_waitTicks == 0) sys_waitTicks = SYS_WAIT_TICKS; -+ if (sys_maxBackoff == 0) sys_maxBackoff = SYS_MAX_BACKOFF; -+ -+ if (t > sys_longTime) /* It's a long time since the last trap */ -+ sctx->Backoff = 0; /* so set the backoff back down to 0 */ -+ -+ if (ticks) -+ { -+ PRINTF2 (ctxt, DBG_DPROC, "sys_backoffWait : Waiting - %d ticks [%lx]\n", ticks, t); -+ kcondvar_timedwait (&sctx->NetworkErrorWait, &sctx->WaitLock, NULL, lbolt + ticks); -+ } -+ else if (sctx->Armed) -+ { -+ if (t < sys_shortTime) /* It's been a short time since the last */ -+ { /* trap, so increase the backoff */ -+ sctx->Backoff++; -+ -+ if (sctx->Backoff > sys_maxBackoff) -+ sctx->Backoff = sys_maxBackoff; -+ } -+ -+ PRINTF2 (ctxt, DBG_DPROC, "sys_backoffWait : Waiting - %d [%lx]\n", sctx->Backoff, t); -+ -+ if (sctx->Backoff) -+ kcondvar_timedwaitsig (&sctx->NetworkErrorWait, &sctx->WaitLock, NULL, lbolt + sctx->Backoff * sys_waitTicks); -+ -+ sctx->Armed = 0; -+ } -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "sys_backoffWait : Not Waiting - %d\n", sctx->Backoff); -+ -+ } -+ sctx->Time = lbolt; -+ -+ spin_unlock (&sctx->WaitLock); -+} -+ -+static int -+trapSize (int proc) -+{ -+ switch (proc) -+ { -+ case DMA_PROC: return (sizeof (DMA_TRAP)); -+ case THREAD_PROC: return (sizeof (THREAD_TRAP)); -+ case COMMAND_PROC: return (sizeof (COMMAND_TRAP)); -+ case INPUT_PROC: return (sizeof (INPUT_TRAP)); -+ default: return (0); -+ } -+} -+ -+static int -+sys_exception (ELAN3_CTXT *ctxt, int type, int proc, void *trapp, va_list ap) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ int res; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_exception: type %d proc %d\n", type, proc); -+ -+ switch (type) -+ { -+ case EXCEPTION_INVALID_ADDR: -+ { -+ E3_FaultSave_BE *faultSave = va_arg (ap, E3_FaultSave_BE *); -+ int res = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), faultSave, res, 0); -+ break; -+ } -+ -+ case EXCEPTION_UNIMP_INSTR: -+ { -+ E3_uint32 instr = va_arg (ap, E3_uint32); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, instr); -+ break; -+ } -+ -+ case EXCEPTION_INVALID_PROCESS: -+ { -+ E3_uint32 vproc = va_arg (ap, E3_uint32); -+ int res = va_arg (ap, int); -+ -+ switch (proc) -+ { -+ case DMA_PROC: -+ if (sctx->Flags & ELAN3_SYS_FLAG_DMA_BADVP) -+ { -+ DMA_TRAP *trap = (DMA_TRAP *) trapp; -+ -+ if (trap->Desc.s.dma_direction != DMA_WRITE) -+ trap->Desc.s.dma_srcEvent = trap->Desc.s.dma_destEvent; -+ -+ trap->Desc.s.dma_direction = DMA_WRITE; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = 0; -+ trap->Desc.s.dma_srcCookieVProc = 0; -+ -+ return (OP_IGNORE); -+ } -+ break; -+ -+ case THREAD_PROC: -+ if (sctx->Flags & ELAN3_SYS_FLAG_THREAD_BADVP) -+ { -+ THREAD_TRAP *trap = (THREAD_TRAP *) trapp; -+ -+ trap->TrapBits.s.PacketAckValue = E3_PAckError; -+ -+ return (OP_IGNORE); -+ } -+ break; -+ } -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, res, vproc); -+ break; -+ } -+ -+ case EXCEPTION_FAULTED: -+ { -+ E3_Addr addr = va_arg (ap, E3_Addr); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, addr); -+ break; -+ } -+ -+ case EXCEPTION_QUEUE_OVERFLOW: -+ { -+ E3_FaultSave_BE *faultSave = va_arg (ap, E3_FaultSave_BE *); -+ int trapType = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), faultSave, 0, trapType); -+ break; -+ } -+ -+ case EXCEPTION_COMMAND_OVERFLOW: -+ { -+ int count = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, count); -+ break; -+ } -+ -+ case EXCEPTION_CHAINED_EVENT: -+ { -+ E3_Addr addr = va_arg (ap, E3_Addr); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, addr); -+ break; -+ } -+ -+ case EXCEPTION_DMA_RETRY_FAIL: -+ case EXCEPTION_PACKET_TIMEOUT: -+ if (proc != DMA_PROC) -+ sys_backoffWait (ctxt, SYS_TIMEOUT_BACKOFF); -+ else -+ { -+ DMA_TRAP *trap = (DMA_TRAP *) trapp; -+ -+ if (sctx->Flags & ELAN3_SYS_FLAG_DMAFAIL) -+ { -+ E3_BlockCopyEvent *event; -+ -+ if (trap->Desc.s.dma_direction != DMA_WRITE) -+ trap->Desc.s.dma_srcEvent = trap->Desc.s.dma_destEvent; -+ -+ /* change the source word to be E3_EVENT_FAILED */ -+ if ((event = (E3_BlockCopyEvent *) elan3mmu_mainaddr (ctxt->Elan3mmu, trap->Desc.s.dma_srcEvent)) == NULL) -+ { -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, 0); -+ break; -+ } -+ -+ suword (&event->ev_Source, E3_EVENT_FAILED); -+ wmb(); mmiob(); -+ -+ trap->Desc.s.dma_direction = DMA_WRITE; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = 0; -+ trap->Desc.s.dma_srcCookieVProc = 0; -+ -+ return (OP_IGNORE); -+ } -+ -+ if (type == EXCEPTION_DMA_RETRY_FAIL) -+ sys_backoffWait (ctxt, 0); -+ else -+ { -+ ELAN_LOCATION location; -+ -+ krwlock_read (&ctxt->VpLock); -+ location = ProcessToLocation (ctxt, NULL, trap->Desc.s.dma_direction == DMA_WRITE ? -+ trap->Desc.s.dma_destVProc : trap->Desc.s.dma_srcVProc, NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ sys_backoffWait (ctxt, location.loc_node == ELAN3_INVALID_NODE ? SYS_BCAST_BACKOFF : SYS_TIMEOUT_BACKOFF); -+ } -+ } -+ return (OP_IGNORE); -+ -+ case EXCEPTION_NETWORK_ERROR: -+ { -+ INPUT_TRAP *trap = (INPUT_TRAP *) trapp; -+ NETERR_RESOLVER **rvpp = va_arg (ap, NETERR_RESOLVER **); -+ -+ ASSERT (trap->State == CTXT_STATE_NETWORK_ERROR); -+ -+ if (! (sctx->Flags & ELAN3_SYS_FLAG_NETERR) && (trap->DmaIdentifyTransaction || trap->ThreadIdentifyTransaction)) -+ { -+ if ((*rvpp) != (NETERR_RESOLVER *) NULL) -+ res = (*rvpp)->Status; -+ else if ((res = QueueNetworkErrorResolver (ctxt, trap, rvpp)) == ESUCCESS) -+ { -+ /* Successfully queued the network error resolver */ -+ return (OP_HANDLED); -+ } -+ -+ /* network error resolution has failed - either a bad cookie or */ -+ /* an rpc error has occured */ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, res, 0); -+ } -+ else -+ { -+ /* Must be an overlaped dma packet. Must wait long enough to -+ * ensure that the sending dma'er has tried to send the next -+ * packet and had it discarded. In the real world this should -+ * be greater than an output timeout. (About 8mSec) */ -+ -+ sys_backoffWait (ctxt, SYS_NETERR_BACKOFF); -+ -+ /* set this inputter state to be ok, since we've been called -+ * by the lwp it will lower the context filter for us, so -+ * re-enabling the inputter, note we don't need to execute -+ * any of the packet since the dma process will re-transmit -+ * it after receiving a nack for the next packet */ -+ trap->State = CTXT_STATE_OK; -+ -+ return (OP_HANDLED); -+ } -+ break; -+ } -+ -+ default: -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, 0); -+ break; -+ } -+ -+ if (type != EXCEPTION_DEBUG) -+#ifdef LINUX -+#ifdef NO_NPTL -+ psignal (CURPROC()->p_opptr, sctx->signal); -+#else -+ psignal (CURPROC()->parent, sctx->signal); -+#endif -+#else -+ psignal (CURPROC(), sctx->signal); -+#endif -+ return (OP_HANDLED); -+} -+ -+static int -+sys_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ -+ cookie_fire_cookie (sctx->Table, cookie); -+ -+ return (OP_HANDLED); -+} -+ -+static void -+sys_swapin (ELAN3_CTXT *ctxt) -+{ -+ PRINTF0 (ctxt, DBG_SYSCALL, "sys_swapin\n"); -+} -+ -+static void -+sys_swapout (ELAN3_CTXT *ctxt) -+{ -+ PRINTF0 (ctxt, DBG_SYSCALL, "sys_swapout\n"); -+} -+ -+static void -+sys_freePrivate (ELAN3_CTXT *ctxt) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ -+ cookie_free_table (sctx->Table); -+ -+ kmutex_destroy (&sctx->Lock); -+ spin_lock_destroy (&sctx->WaitLock); -+ kcondvar_destroy (&sctx->NetworkErrorWait); -+ -+ KMEM_FREE (sctx, sizeof (SYS_CTXT)); -+ ctxt->Private = NULL; -+} -+ -+static int -+sys_checkThisDma (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, E3_DMA *dma) -+{ -+ E3_DmaType type; -+ E3_uint32 cookie; -+ E3_uint32 cvproc; -+ int ignore; -+ int match; -+ -+ type.type = fuword_noerr ((int *) &dma->dma_type); -+ -+ if (type.s.direction == DMA_WRITE) -+ { -+ cookie = fuword_noerr ((int *) &dma->dma_srcCookieVProc); -+ cvproc = fuword_noerr ((int *) &dma->dma_destCookieVProc); -+ } -+ else -+ { -+ cookie = fuword_noerr ((int *) &dma->dma_destCookieVProc); -+ cvproc = fuword_noerr ((int *) &dma->dma_srcCookieVProc); -+ } -+ -+ PRINTF5 (ctxt, DBG_NETERR, "sys_checkThisDma: dir = %d cookie = %08x cvproc = %08x CookieVProc %08x DstProcess %04x\n", -+ type.s.direction, cookie, cvproc, nef->Message.CookieVProc, nef->Message.DstProcess); -+ -+ /* A DMA matches a network errror fixup if it's going to the right place (or is a broadcast) -+ * and the approriate cookie matches, except that we ignore DMA's which don't have a destEvent -+ * since they don't have any atomic behaviour (though they still send the identify) */ -+ -+ ignore = (type.s.direction == DMA_WRITE && cookie == 0 && -+ fuword_noerr ((int *) &dma->dma_destEvent) == 0); -+ match = (nef->Message.CookieVProc == cookie && -+ (nef->Message.DstProcess == (cvproc & DMA_PROCESS_MASK) || nef->Message.WaitForEop)); -+ -+ PRINTF2 (ctxt, DBG_NETERR, " -> %s %s\n", ignore ? "ignore" : match ? "matched" : "not-matched", nef->Message.WaitForEop ? "wait for eop" : ""); -+ -+ if (match && !ignore && !nef->Message.WaitForEop) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "sys_checkThisDma: nuking the dma\n"); -+ -+ /* NOTE - we access the dma descriptor backwards since it could exist in sdram */ -+ if (type.s.direction != DMA_WRITE) -+ suword_noerr ((int *) &dma->dma_srcEvent, 0); -+ -+ suword_noerr ((int *) &dma->dma_destEvent, 0); -+ suword_noerr ((int *) &dma->dma_dest, 0); -+ suword_noerr ((int *) &dma->dma_source, 0); -+ suword_noerr ((int *) &dma->dma_size, 0); -+ -+ if (type.s.direction != DMA_WRITE) -+ suword_noerr ((int *) &dma->dma_type, fuword_noerr ((int *) &dma->dma_type) & E3_DMA_CONTEXT_MASK); -+ -+ wmb(); mmiob(); -+ } -+ -+ return (match && !ignore); -+} -+ -+static int -+sys_fixupNetworkError (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ int matched = 0; -+ SYS_WORD_ITEM *wordp; -+ SYS_BLOCK_ITEM *blockp; -+ label_t ljb; -+ int res; -+ -+ PRINTF3 (ctxt, DBG_NETERR, "sys_fixupnetworkError %08x %08x %08x\n", -+ nef->Message.CookieAddr, nef->Message.CookieVProc, nef->Message.NextCookie); -+ -+ if (nef->Message.CookieAddr == (E3_Addr) 0) /* It's a DMA which requires fixing up */ -+ { -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ res = EFAULT; -+ else -+ { -+ /* scan the dma ptr list */ -+ for (wordp = (SYS_WORD_ITEM *) fuptr_noerr ((void **) &sp->ItemListsHead[LIST_DMA_PTR]); -+ wordp != NULL; -+ wordp = (SYS_WORD_ITEM *) fuptr_noerr ((void **) &wordp->Next)) -+ { -+ E3_uint32 value = fuword_noerr ((int *) &wordp->Value); -+ E3_DMA *dma = (E3_DMA *) elan3mmu_mainaddr (ctxt->Elan3mmu, value); -+ -+ PRINTF3 (ctxt, DBG_NETERR, "sys_fixupnetworkError: check block item %p Value %08x dma %p\n", wordp, value, dma); -+ -+ matched += sys_checkThisDma (ctxt, nef, dma); -+ } -+ -+ /* scan the dma desc list */ -+ for (blockp = (SYS_BLOCK_ITEM *) fuptr_noerr ((void **) &sp->ItemListsHead[LIST_DMA_DESC]); -+ blockp != NULL; -+ blockp = (SYS_BLOCK_ITEM *) fuptr_noerr ((void **) &blockp->Next)) -+ { -+ E3_DMA *dma = (E3_DMA *) fuptr_noerr ((void *) &blockp->Pointer); -+ -+ PRINTF2 (ctxt, DBG_NETERR, "sys_fixupnetworkError: check block item %p Pointer %p\n", blockp, dma); -+ -+ matched += sys_checkThisDma (ctxt, nef, dma); -+ } -+ -+ /* If we've still not found it, then check the command port item */ -+ /* it MUST be present as a command waiting to be executed, as */ -+ /* otherwise it could have already happened and we will claim to */ -+ /* have found it, but not realy */ -+ if (ctxt->CommandPortItem != NULL) -+ { -+ E3_DMA *dma = (E3_DMA *) fuptr_noerr ((void *) &((SYS_BLOCK_ITEM *) ctxt->CommandPortItem)->Pointer); -+ -+ if (sys_checkThisDma (ctxt, nef, dma)) -+ { -+ printk ("!!! it's the command port item - need to ensure that the command exists\n"); -+ matched++; -+ } -+ } -+ -+ res = matched ? ESUCCESS : ESRCH; -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ if (matched > 1) -+ ElanException (ctxt, EXCEPTION_COOKIE_ERROR, DMA_PROC, NULL, NULL, nef->Message.CookieVProc); -+ } -+ else /* It's a thread which requires fixing up */ -+ { -+ E3_int32 *cookiePtr = (E3_int32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, nef->Message.CookieAddr); -+ E3_uint32 curval = fuword_noerr (cookiePtr); -+ -+ if (curval == nef->Message.CookieVProc) /* thread doesn't think it's been done */ -+ { -+ if (! nef->Message.WaitForEop) -+ { -+ suword_noerr (cookiePtr, nef->Message.NextCookie); -+ mb(); mmiob(); -+ } -+ -+ res = ESUCCESS; -+ } -+ else /* thread thinks that it's been executed */ -+ { -+ res = ESRCH; -+ } -+ } -+ -+ CompleteNetworkErrorFixup (ctxt, nef, res); -+ -+ return (OP_HANDLED); -+} -+ -+ -+static int -+sys_startFaultCheck (ELAN3_CTXT *ctxt) -+{ -+ return (0); -+} -+ -+static void -+sys_endFaultCheck (ELAN3_CTXT *ctxt) -+{ -+ wmb(); -+} -+ -+static E3_uint8 -+sys_load8 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint8 *maddr = (E3_uint8 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fubyte_noerr (maddr)); -+} -+ -+static void -+sys_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val) -+{ -+ E3_uint8 *maddr = (E3_uint8 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ subyte_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint16 -+sys_load16 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint16 *maddr = (E3_uint16 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fusword_noerr (maddr)); -+} -+ -+static void -+sys_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val) -+{ -+ E3_uint16 *maddr = (E3_uint16 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ susword_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint32 -+sys_load32 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint32 *maddr = (E3_uint32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fuword_noerr (maddr)); -+} -+ -+static void -+sys_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val) -+{ -+ E3_uint32 *maddr = (E3_uint32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ suword_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint64 -+sys_load64 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint64 *maddr = (E3_uint64 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fulonglong_noerr ((long long *) maddr)); -+} -+ -+static void -+sys_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val) -+{ -+ E3_uint64 *maddr = (E3_uint64 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ sulonglong_noerr ((long long *) maddr, val); -+ wmb(); mmiob(); -+} -+ -+ -+void -+sys_addException (SYS_CTXT *sctx, int type, int proc, caddr_t trapp, int size, -+ E3_FaultSave_BE *faultSave, u_long res, u_long value) -+{ -+ SYS_EXCEPTION *ex_ptr; -+ int front; -+ int back; -+ int count; -+ label_t ljp; -+ -+ PRINTF4 (DBG_DEVICE, DBG_FN, "sys_addException: type %d proc %d res %ld value %ld\n", -+ type, proc, res, value); -+ -+ KMEM_ZALLOC (ex_ptr, SYS_EXCEPTION *, sizeof (SYS_EXCEPTION), TRUE); -+ -+ if (ex_ptr != NULL) -+ { -+ bzero ((caddr_t) ex_ptr, sizeof (SYS_EXCEPTION)); -+ -+ ex_ptr->Type = type; -+ ex_ptr->Proc = proc; -+ ex_ptr->Res = res; -+ ex_ptr->Value = value; -+ -+ if (trapp && size) -+ bcopy (trapp, (caddr_t) &ex_ptr->Union, size); -+ if (faultSave) -+ bcopy ((caddr_t) faultSave, (caddr_t) &ex_ptr->FaultArea, sizeof (E3_FaultSave_BE)); -+ } -+ -+ kmutex_lock (&sctx->Lock); -+ if (! on_fault (&ljp)) -+ { -+ front = fuword_noerr (&sctx->Exceptions->Front); -+ back = fuword_noerr (&sctx->Exceptions->Back); -+ count = fuword_noerr (&sctx->Exceptions->Count); -+ -+ if (count <= 0 || front < 0 || back < 0 || front >= count || back >= count) -+ suword_noerr (&sctx->Exceptions->Overflow, fuword_noerr (&sctx->Exceptions->Overflow) + 1); -+ else if (((front+1) % count ) == back) -+ suword_noerr (&sctx->Exceptions->Overflow, fuword_noerr (&sctx->Exceptions->Overflow) + 1); -+ else -+ { -+ if (ex_ptr != NULL) -+ copyout_noerr ((caddr_t) ex_ptr, (caddr_t) &sctx->Exceptions->Exceptions[front], sizeof (SYS_EXCEPTION)); -+ else -+ { -+ suword_noerr (&sctx->Exceptions->Exceptions[front].Type, EXCEPTION_ENOMEM); -+ suword_noerr (&sctx->Exceptions->Exceptions[front].Proc, 0); -+ } -+ suword_noerr (&sctx->Exceptions->Front, (front + 1) % count); -+ } -+ -+ /* always reset the magic number in case it's been overwritten */ -+ /* so that 'edb' can find the exception page in the core file */ -+ suword_noerr (&sctx->Exceptions->Magic, SYS_EXCEPTION_MAGIC); -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ if (ex_ptr != NULL) -+ KMEM_FREE (ex_ptr, sizeof (SYS_EXCEPTION)); -+} -+ -+int -+sys_getException (SYS_CTXT *sctx, SYS_EXCEPTION *ex) -+{ -+ int front; -+ int back; -+ int count; -+ int res; -+ label_t ljp; -+ -+ if (sctx->Exceptions == NULL) -+ return (EINVAL); -+ -+ kmutex_lock (&sctx->Lock); -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ return (EFAULT); -+ } -+ -+ front = fuword_noerr (&sctx->Exceptions->Front); -+ back = fuword_noerr (&sctx->Exceptions->Back); -+ count = fuword_noerr (&sctx->Exceptions->Count); -+ -+ if (count <= 0 || front < 0 || back < 0 || front >= count || back >= count || back == front) -+ res = EINVAL; -+ else -+ { -+ copyin_noerr ((caddr_t) &sctx->Exceptions->Exceptions[back], (caddr_t) ex, sizeof (SYS_EXCEPTION)); -+ suword_noerr (&sctx->Exceptions->Back, (back+1) % count); -+ -+ res = ESUCCESS; -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/eventcookie.c linux-2.6.9/drivers/net/qsnet/elan3/eventcookie.c ---- clean/drivers/net/qsnet/elan3/eventcookie.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/eventcookie.c 2003-08-13 06:03:03.000000000 -0400 -@@ -0,0 +1,324 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: eventcookie.c,v 1.7 2003/08/13 10:03:03 fabien Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/eventcookie.c,v $*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static EVENT_COOKIE_TABLE *cookie_tables; -+static spinlock_t cookie_table_lock; -+ -+/* -+ * cookie_drop_entry: -+ * drop the reference to a cookie held -+ * by the cookie table -+ */ -+static void -+cookie_drop_entry (EVENT_COOKIE_ENTRY *ent) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ent->ent_lock, flags); -+ if (--ent->ent_ref != 0) -+ { -+ ent->ent_fired = ent->ent_cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (EVENT_COOKIE_ENTRY)); -+ } -+} -+ -+void -+cookie_init() -+{ -+ spin_lock_init (&cookie_table_lock); -+} -+ -+void -+cookie_fini() -+{ -+ spin_lock_destroy (&cookie_table_lock); -+} -+ -+EVENT_COOKIE_TABLE * -+cookie_alloc_table (unsigned long task, unsigned long handle) -+{ -+ EVENT_COOKIE_TABLE *tbl, *ntbl; -+ -+ KMEM_ZALLOC (ntbl, EVENT_COOKIE_TABLE *, sizeof (EVENT_COOKIE_TABLE), TRUE); -+ -+ if (ntbl == NULL) -+ return (NULL); -+ -+ spin_lock (&cookie_table_lock); -+ -+ for (tbl = cookie_tables; tbl; tbl = tbl->tbl_next) -+ if (tbl->tbl_task == task && tbl->tbl_handle == handle) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ else -+ { -+ spin_lock_init (&ntbl->tbl_lock); -+ -+ ntbl->tbl_task = task; -+ ntbl->tbl_handle = handle; -+ ntbl->tbl_ref = 1; -+ ntbl->tbl_entries = NULL; -+ -+ if ((ntbl->tbl_next = cookie_tables) != NULL) -+ cookie_tables->tbl_prev = ntbl; -+ cookie_tables = ntbl; -+ ntbl->tbl_prev = NULL; -+ } -+ spin_unlock (&cookie_table_lock); -+ -+ if (tbl == NULL) -+ return (ntbl); -+ else -+ { -+ KMEM_FREE (ntbl, sizeof (EVENT_COOKIE_TABLE)); -+ return (tbl); -+ } -+} -+ -+void -+cookie_free_table (EVENT_COOKIE_TABLE *tbl) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ -+ spin_lock (&cookie_table_lock); -+ if (tbl->tbl_ref > 1) -+ { -+ tbl->tbl_ref--; -+ spin_unlock (&cookie_table_lock); -+ return; -+ } -+ -+ if (tbl->tbl_prev) -+ tbl->tbl_prev->tbl_next = tbl->tbl_next; -+ else -+ cookie_tables = tbl->tbl_next; -+ if (tbl->tbl_next) -+ tbl->tbl_next->tbl_prev = tbl->tbl_prev; -+ -+ spin_unlock (&cookie_table_lock); -+ -+ /* NOTE - table no longer visible to other threads -+ * no need to aquire tbl_lock */ -+ while ((ent = tbl->tbl_entries) != NULL) -+ { -+ if ((tbl->tbl_entries = ent->ent_next) != NULL) -+ ent->ent_next->ent_prev = NULL; -+ -+ cookie_drop_entry (ent); -+ } -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl, sizeof (EVENT_COOKIE_TABLE)); -+} -+ -+int -+cookie_alloc_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent, *nent; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (nent, EVENT_COOKIE_ENTRY *, sizeof (EVENT_COOKIE_ENTRY), TRUE); -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ kcondvar_init (&nent->ent_wait); -+ spin_lock_init (&nent->ent_lock); -+ -+ nent->ent_ref = 1; -+ nent->ent_cookie = cookie; -+ -+ if ((nent->ent_next = tbl->tbl_entries) != NULL) -+ tbl->tbl_entries->ent_prev = nent; -+ tbl->tbl_entries = nent; -+ nent->ent_prev = NULL; -+ } -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ if (ent == NULL) -+ return (ESUCCESS); -+ else -+ { -+ KMEM_FREE (nent, sizeof (EVENT_COOKIE_ENTRY)); -+ return (EINVAL); -+ } -+} -+ -+int -+cookie_free_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ if (ent->ent_prev == NULL) -+ tbl->tbl_entries = ent->ent_next; -+ else -+ ent->ent_prev->ent_next = ent->ent_next; -+ -+ if (ent->ent_next != NULL) -+ ent->ent_next->ent_prev = ent->ent_prev; -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ cookie_drop_entry (ent); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * cookie_fire_cookie: -+ * fire the cookie - this is called from the event interrupt. -+ */ -+int -+cookie_fire_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * cookie_wait_cookie: -+ * deschedule on a cookie if it has not already fired. -+ * note - if the cookie is removed from the table, then -+ * we free it off when we're woken up. -+ */ -+int -+cookie_wait_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ spin_unlock (&tbl->tbl_lock); -+ -+ if (ent->ent_fired != 0) -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ return (ESUCCESS); -+ } -+ -+ ent->ent_ref++; -+ kcondvar_waitsig (&ent->ent_wait, &ent->ent_lock, &flags); -+ -+ if (--ent->ent_ref > 0) -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (EVENT_COOKIE_ENTRY)); -+ } -+ return (ESUCCESS); -+} -+ -+int -+cookie_arm_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = 0; -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/iproc.c linux-2.6.9/drivers/net/qsnet/elan3/iproc.c ---- clean/drivers/net/qsnet/elan3/iproc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/iproc.c 2003-09-24 09:57:25.000000000 -0400 -@@ -0,0 +1,925 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: iproc.c,v 1.47 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/iproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static int TrSizeTable[] = {0, 8, 16, 32, 64}; -+ -+static void ConvertTransactionToSetEvent (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_Addr Addr); -+static void SimulateBlockWrite (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateWriteWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateWriteDWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateTraceRoute (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void BumpInputterStats (ELAN3_DEV *dev, E3_IprocTrapHeader_BE *hdrp); -+ -+void -+HandleIProcTrap (ELAN3_DEV *dev, -+ int Channel, -+ E3_uint32 Pend, -+ sdramaddr_t FaultSaveOff, -+ sdramaddr_t TransactionsOff, -+ sdramaddr_t DataOff) -+{ -+ E3_IprocTrapHeader_BE Transaction0; -+ ELAN3_CTXT *ctxt; -+ INPUT_TRAP *trap; -+ register int i; -+ -+ /* -+ * Read the 1st set of transactions, so we can determine the -+ * context for the trap -+ */ -+ elan3_sdram_copyq_from_sdram (dev, TransactionsOff, (void *) &Transaction0, 16); -+ -+ BumpStat (dev, IProcTraps); -+ BumpInputterStats (dev, &Transaction0); -+ -+ if (Transaction0.s.TrTypeCntx.s.TypeCntxInvalid) -+ { -+ /* -+ * The context is not valid. This will occur if the packet -+ * trapped for an EopError with no IdentTrans or an error corrupted the context -+ * giving a CRC error on the first transaction and the Ack had not been returned. -+ */ -+ if (Transaction0.s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_IPROC, "iproc: Error on EOP without a good context, ignoring trap\n"); -+ } -+ else -+ { -+ /* Check that only crap has been received. If not then die. */ -+ if (! Transaction0.s.IProcTrapStatus.s.BadLength && -+ (Transaction0.s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_GOOD) -+ { -+ printk ("iproc: Did not have a valid context for the trap area.\n"); -+ printk ("iproc: TrTypeCntx=%x TrAddr=%x TrData0=%x IProcTrapStatus=%x\n", -+ Transaction0.s.TrTypeCntx.TypeContext, Transaction0.s.TrAddr, -+ Transaction0.s.TrData0, Transaction0.s.IProcTrapStatus.Status); -+ panic ("elan3: iproc did not have a valid context"); -+ /* NOTREACHED */ -+ } -+ PRINTF0 (DBG_DEVICE, DBG_IPROC, "iproc: First transaction is bad, ignoring trap\n"); -+ } -+ } -+ else -+ { -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, Transaction0.s.TrTypeCntx.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleIProcTrap: context %x invalid\n", -+ Transaction0.s.TrTypeCntx.s.Context); -+ -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ trap = (Channel == 0) ? &ctxt->Input0Trap : &ctxt->Input1Trap; -+ -+ ASSERT (trap->State == CTXT_STATE_OK); -+ -+ trap->Transactions[0] = Transaction0; -+ -+ PRINTF1 (ctxt, DBG_INTR, "HandleIProcTrap: %s\n", IProcTrapString (&trap->Transactions[0], NULL)); -+ /* -+ * Copy the rest of the transactions into the trap area. -+ */ -+ for (i = 0; !(trap->Transactions[i].s.TrTypeCntx.s.LastTrappedTrans);) -+ { -+ if (++i >= MAX_TRAPPED_TRANS) -+ { -+ trap->Overflow = 1; -+ break; -+ } -+ -+ elan3_sdram_copyq_from_sdram (dev, TransactionsOff + i*sizeof (E3_IprocTrapHeader), (void *) &trap->Transactions[i], 16); -+ -+ PRINTF1 (ctxt, DBG_INTR, " %s\n", IProcTrapString (&trap->Transactions[i], NULL)); -+ -+ BumpInputterStats (dev, &trap->Transactions[i]); -+ } -+ -+ /* -+ * Remember the number of transactions we've copied. -+ */ -+ trap->NumTransactions = i+1; -+ -+ PRINTF1 (ctxt, DBG_INTR, " NumTransactions = %d\n", trap->NumTransactions); -+ -+ /* -+ * Copy all the data blocks in one go to let the Elan prefetcher work -+ */ -+ elan3_sdram_copyq_from_sdram (dev, DataOff, trap->DataBuffers, trap->NumTransactions*sizeof (E3_IprocTrapData)); -+ -+ /* -+ * Copy fault save area and clear out for next time round. -+ */ -+ elan3_sdram_copyq_from_sdram (dev, FaultSaveOff, (void *) &trap->FaultSave, 16); -+ elan3_sdram_zeroq_sdram (dev, FaultSaveOff, 16); -+ -+ if (ELAN3_OP_IPROC_TRAP (ctxt, trap, Channel) == OP_DEFER) -+ { -+ /* -+ * Mark the trap as valid and set the inputter state to -+ * raise the context filter. -+ */ -+ trap->State = CTXT_STATE_TRAPPED; -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ SetInputterStateForContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+} -+ -+void -+InspectIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap) -+{ -+ int i; -+ int StatusValid; -+ -+ trap->AckSent = 0; -+ trap->BadTransaction = 0; -+ -+ trap->TrappedTransaction = NULL; -+ trap->TrappedDataBuffer = NULL; -+ trap->WaitForEopTransaction = NULL; -+ trap->WaitForEopDataBuffer = NULL; -+ trap->DmaIdentifyTransaction = NULL; -+ trap->ThreadIdentifyTransaction = NULL; -+ trap->LockQueuePointer = (E3_Addr) 0; -+ trap->UnlockQueuePointer = (E3_Addr) 0; -+ -+ /* -+ * Now scan all the transactions received -+ */ -+ for (i = 0; i < trap->NumTransactions ; i++) -+ { -+ E3_IprocTrapHeader_BE *hdrp = &trap->Transactions[i]; -+ E3_IprocTrapData_BE *datap = &trap->DataBuffers[i]; -+ -+ StatusValid = hdrp->s.TrTypeCntx.s.StatusRegValid != 0; -+ -+ if (StatusValid && hdrp->s.IProcTrapStatus.s.AckSent) /* Remember if we've sent the ack back */ -+ trap->AckSent = 1; -+ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) /* Check for EOP */ -+ { -+ ASSERT (i == trap->NumTransactions - 1); -+ -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_GOOD: -+ /* if we get an EOP_GOOD then the outputer should have received a PAckOk. */ -+ /* unless it was a flood, in which case someone must have sent an ack */ -+ /* but not necessarily us */ -+ break; -+ -+ case EOP_BADACK: -+ BumpUserStat (ctxt, EopBadAcks); -+ -+ /* if we get an EOP_BADACK then the outputer did not receive a PAckOk even if -+ * we sent a PAckOk. We can clear tinfo.AckSent. */ -+ if (trap->AckSent == 1) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: Network error destroyed PAckOk\n"); -+ trap->AckSent = 0; -+ } -+ break; -+ -+ case EOP_ERROR_RESET: -+ BumpUserStat (ctxt, EopResets); -+ -+ /* if we get an EOP_ERROR_RESET then the outputer may or may not have got a PAckOk. */ -+ trap->BadTransaction = 1; -+ break; -+ -+ default: -+ panic ("InspectIProcTrap: invalid EOP type in status register\n"); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ -+ PRINTF2 (ctxt, DBG_IPROC, "InspectIProcTrap: %2d: %s\n", i, IProcTrapString (hdrp, datap)); -+ -+ if (! StatusValid) /* We're looking at transactions stored before the trap */ -+ { /* these should only be identifies and lock transactions */ -+ -+ if (hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) -+ panic ("InspectIProcTrap: writeblock transaction found in input trap header before trap occured\n"); -+ -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_LOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (trap->LockQueuePointer) /* Already seen a LOCKQUEUE transaction in this packet, */ -+ { /* the user program should not have done this !! */ -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ -+ trap->LockQueuePointer = (E3_Addr) hdrp->s.TrAddr; /* Remember the queue pointer in case we need to unlock it */ -+ break; -+ -+ case TR_DMAIDENTIFY & TR_OPCODE_TYPE_MASK: -+ if (trap->DmaIdentifyTransaction || /* Already seen an identify transaction in this packet */ -+ trap->ThreadIdentifyTransaction) /* the user program should not have done this */ -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->DmaIdentifyTransaction = hdrp; -+ break; -+ -+ case TR_THREADIDENTIFY & TR_OPCODE_TYPE_MASK: -+ if (trap->DmaIdentifyTransaction || /* Already seen an identify transaction in this packet */ -+ trap->ThreadIdentifyTransaction) /* the user program should not have done this */ -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->ThreadIdentifyTransaction = hdrp; -+ break; -+ -+ default: -+ panic ("InspectIProcTrap: invalid transaction found in input trap header before trap occured\n"); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ -+ if (StatusValid && trap->TrappedTransaction == NULL) /* Remember the transaction which caused the */ -+ { /* trap */ -+ trap->TrappedTransaction = hdrp; -+ trap->TrappedDataBuffer = datap; -+ } -+ -+ if(hdrp->s.IProcTrapStatus.s.BadLength || -+ ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_ERROR) || -+ ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_BAD)) -+ { -+ int j; -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: transaction has a bad crc\n"); -+ for (j=0; jTrData[j], datap->TrData[j+1], datap->TrData[j+2], datap->TrData[j+3]); -+ trap->BadTransaction = 1; -+ continue; -+ } -+ -+ /* No more to do if it's a writeblock transaction */ -+ if (hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) -+ continue; -+ -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) == MI_InputDoTrap && -+ (hdrp->s.TrTypeCntx.s.Type & TR_WAIT_FOR_EOP) != 0) -+ { -+ /* -+ * This is a wait for eop transaction that has trapped because the inputer -+ * then received a EopError. The next transaction saved should always be an -+ * EopError. -+ */ -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: got a trapped WaitForEop transaction due to EopError\n"); -+ -+ trap->WaitForEopTransaction = hdrp; -+ trap->WaitForEopDataBuffer = datap; -+ continue; -+ } -+ -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (trap->UnlockQueuePointer) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->UnlockQueuePointer = (E3_Addr) hdrp->s.TrAddr; -+ break; -+ } -+ } -+} -+ -+void -+ResolveIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ ASSERT (! CTXT_IS_KERNEL (ctxt)); -+ -+ BumpUserStat (ctxt, IProcTraps); -+ -+ InspectIProcTrap (ctxt, trap); -+ -+ /* -+ * fixup page fault if we've trapped because of one. -+ */ -+ if (trap->FaultSave.s.FaultContext != 0) -+ { -+ /* -+ * If it's a WRITEBLOCK transaction, then see if we remember faulting -+ * before it, and try and prefault in a sensible amount past it. -+ */ -+ int fixedFault = FALSE; -+ INPUT_FAULT_SAVE *entry; -+ INPUT_FAULT_SAVE **predp; -+ int npages; -+ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0 && /* a DMA packet */ -+ trap->LockQueuePointer == (E3_Addr) 0 && /* but not a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr != 0) /* and not a DMA to 0 */ -+ { -+ spin_lock (&ctxt->InputFaultLock); -+ -+ for (predp = &ctxt->InputFaultList; (entry = *predp)->Next != NULL ; predp = &entry->Next) -+ { -+ if (entry->Addr == trap->TrappedTransaction->s.TrAddr) -+ break; -+ } -+ -+ *predp = entry->Next; -+ entry->Next = ctxt->InputFaultList; -+ ctxt->InputFaultList = entry; -+ -+ if (entry->Addr == trap->TrappedTransaction->s.TrAddr) -+ { -+ if ((entry->Count <<= 1) > MAX_INPUT_FAULT_PAGES) -+ entry->Count = MAX_INPUT_FAULT_PAGES; -+ } -+ else -+ { -+ entry->Count = MIN_INPUT_FAULT_PAGES; -+ } -+ -+ entry->Addr = trap->TrappedTransaction->s.TrAddr + (entry->Count * PAGESIZE); -+ npages = entry->Count; -+ -+ spin_unlock (&ctxt->InputFaultLock); -+ -+ if (elan3_pagefault (ctxt, &trap->FaultSave, npages) != ESUCCESS) -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "ResolveIProcTrap: pagefaulting %d pages at %08x - failed\n", -+ npages, trap->TrappedTransaction->s.TrAddr); -+ } -+ else -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "ResolveIProcTrap: pagefaulting %d pages at %08x - succeeded\n", -+ npages, trap->TrappedTransaction->s.TrAddr); -+ -+ fixedFault = TRUE; -+ } -+ } -+ -+ /* Workaround WRITEBLOCK transaction executed when LOCKQUEUE transaction missed */ -+ /* the packet will have been nacked */ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) && /* a DMA packet */ -+ trap->LockQueuePointer == 0 && trap->UnlockQueuePointer && /* a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr == trap->FaultSave.s.FaultAddress) /* and missed lockqueue */ -+ { -+ fixedFault = TRUE; -+ } -+ -+ if (! fixedFault) -+ { -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_IPROC, "ResolveIProcTrap: elan3_pagefault failed at %x\n", -+ trap->FaultSave.s.FaultAddress); -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, INPUT_PROC, trap, &trap->FaultSave, res); -+ return; -+ } -+ } -+ } -+ -+ if (! trap->AckSent && trap->LockQueuePointer) /* Queued DMA */ -+ { /* The ack was not sent, so the queue will be locked. */ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, FALSE); /* We must unlock it. */ -+ } -+ -+ if (trap->AckSent && trap->BadTransaction) -+ { -+ if (trap->DmaIdentifyTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: Dma identify needs network resultion\n"); -+ -+ BumpStat (dev, DmaIdentifyNetworkErrors); -+ BumpUserStat (ctxt, DmaIdentifyNetworkErrors); -+ -+ if (trap->WaitForEopTransaction) -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: have delayed wait for eop transaction\n"); -+ } -+ else if (trap->ThreadIdentifyTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: Thread identify needs network resolution\n"); -+ -+ BumpStat (dev, ThreadIdentifyNetworkErrors); -+ BumpUserStat (ctxt, ThreadIdentifyNetworkErrors); -+ -+ if (trap->WaitForEopTransaction) -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: have delayed wait for eop transaction\n"); -+ } -+ else -+ { -+ BumpStat (dev, DmaNetworkErrors); -+ BumpUserStat (ctxt, DmaNetworkErrors); -+ } -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (! trap->AckSent) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack not sent, lowering context filter\n"); -+ -+ trap->State = CTXT_STATE_OK; -+ } -+ else -+ { -+ if (trap->BadTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack sent, waiting on bad transaction\n"); -+ trap->State = CTXT_STATE_NETWORK_ERROR; -+ } -+ else -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack sent, waiting on packet to be re-executed\n"); -+ trap->State = CTXT_STATE_NEEDS_RESTART; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (trap->AckSent && trap->BadTransaction) -+ ElanException (ctxt, EXCEPTION_NETWORK_ERROR, INPUT_PROC, trap, rvpp); -+} -+ -+int -+RestartIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap) -+{ -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: %d transactions\n", trap->NumTransactions); -+ -+ if (trap->TrappedTransaction == NULL) /* No transaction trapped - probably a network */ -+ return (ESUCCESS); /* error */ -+ -+ while (! trap->TrappedTransaction->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ E3_IprocTrapHeader_BE *hdrp = trap->TrappedTransaction; -+ E3_IprocTrapData_BE *datap = trap->TrappedDataBuffer; -+ -+ ASSERT (hdrp->s.TrTypeCntx.s.StatusRegValid != 0); -+ -+ PRINTF2 (ctxt, DBG_IPROC, "RestartIProc: TrType=0x%x Status=0x%x\n", -+ hdrp->s.TrTypeCntx.TypeContext, hdrp->s.IProcTrapStatus.Status); -+ -+ if ((hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0) -+ { -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: WRITEBLOCK : Addr %x\n", hdrp->s.TrAddr); -+ SimulateBlockWrite (ctxt, hdrp, datap); -+ } -+ else -+ { -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: SETEVENT : %x\n", hdrp->s.TrAddr); -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) != MI_InputDoTrap) -+ FixupEventTrap (ctxt, INPUT_PROC, trap, GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus), &trap->FaultSave, FALSE); -+ else if (hdrp->s.TrAddr) -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), hdrp->s.TrAddr, FALSE) != ISSUE_COMMAND_OK) -+ return (EAGAIN); -+ } -+ break; -+ -+ case TR_WRITEWORD & TR_OPCODE_TYPE_MASK: -+ SimulateWriteWord (ctxt, hdrp, datap); -+ break; -+ -+ case TR_WRITEDOUBLEWORD & TR_OPCODE_TYPE_MASK: -+ SimulateWriteDWord (ctxt, hdrp, datap); -+ break; -+ -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) == MI_InputDoTrap) -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ else -+ { -+ switch (GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus)) -+ { -+ case MI_WaitForUnLockDescRead: -+ /* -+ * Fault occured on the read of the queue descriptor - since the ack -+ * has been sent we need to move the queue on one slot. -+ */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: TR_UNLOCKQUEUE : desc read fault\n"); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, TRUE); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), -+ hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue setevent to complete queue unlock, since we've already unlocked */ -+ /* the queue, we should "convert" this transaction into a setevent transaction that */ -+ /* hasn't trapped */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: could not issue setevent for SimulateUnlockQueue\n"); -+ -+ ConvertTransactionToSetEvent (ctxt, hdrp, hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET); -+ return (EAGAIN); -+ } -+ break; -+ -+ case MI_DoSetEvent: -+ /* -+ * Fault occured on either the write to unlock the queue or during -+ * processing of the event. Test the fault address against the -+ * queue address to find out which - in this case, since the ack -+ * has been sent we need to move the queue on one slot. -+ */ -+ if (trap->FaultSave.s.FaultAddress == trap->LockQueuePointer) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: fixed unlock queue write to unlock fault\n"); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, TRUE); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), -+ hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue setevent to complete queue unlock, since we've already unlocked */ -+ /* the queue, we should "convert" this transaction into a setevent transaction that */ -+ /* hasn't trapped */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: could not issue setevent for SimulateUnlockQueue\n"); -+ -+ ConvertTransactionToSetEvent (ctxt, hdrp, hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET); -+ return (EFAIL); -+ } -+ break; -+ } -+ /*DROPTHROUGH*/ -+ -+ default: -+ FixupEventTrap (ctxt, INPUT_PROC, trap, GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus), -+ &trap->FaultSave, FALSE); -+ break; -+ } -+ trap->LockQueuePointer = trap->UnlockQueuePointer = 0; -+ } -+ break; -+ -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: -+ /* Just ignore send-discard transactions */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: ignore SENDDISCARD\n"); -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: REMOTEDMA\n"); -+ -+ /* modify the dma type since it will still be a "read" dma */ -+ ((E3_DMA_BE *) datap)->s.dma_type &= ~(DMA_TYPE_READ | E3_DMA_CONTEXT_MASK); -+ ((E3_DMA_BE *) datap)->s.dma_type |= DMA_TYPE_ISREMOTE; -+ -+ RestartDmaDesc (ctxt, (E3_DMA_BE *) datap); -+ break; -+ -+ case TR_TRACEROUTE & TR_OPCODE_TYPE_MASK: -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: TRACEROUTE\n"); -+ SimulateTraceRoute (ctxt, hdrp, datap); -+ break; -+ -+ default: -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ break; -+ } -+ } -+ -+ /* -+ * We've successfully processed this transaction, so move onto the -+ * next one. -+ */ -+ trap->TrappedTransaction++; -+ trap->TrappedDataBuffer++; -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+ConvertTransactionToSetEvent (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_Addr Addr) -+{ -+ hdrp->s.TrTypeCntx.s.Type = TR_SETEVENT; -+ hdrp->s.TrTypeCntx.s.StatusRegValid = 0; -+ hdrp->s.TrAddr = Addr; -+} -+ -+void -+SimulateBlockWrite (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ void *saddr = (void *) ((unsigned long) datap + (hdrp->s.TrAddr & 0x3f)); -+ unsigned nbytes = (hdrp->s.TrTypeCntx.s.Type) & TR_PARTSIZE_MASK; -+ int i; -+ -+ if (nbytes == 0) -+ nbytes = sizeof (E3_IprocTrapData_BE); -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateBlockWrite: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ /* -+ * NOTE: since the block copy could be to sdram, we issue the writes backwards, -+ * except we MUST ensure that the last item in the block is written last. -+ */ -+ switch (((hdrp->s.TrTypeCntx.s.Type) >> TR_TYPE_SHIFT) & TR_TYPE_MASK) -+ { -+ case TR_TYPE_BYTE: /* 8 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint8)); i >= 0; i -= sizeof (E3_uint8)) -+ ELAN3_OP_STORE8 (ctxt, hdrp->s.TrAddr + i, ((E3_uint8 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint8); -+ ELAN3_OP_STORE8 (ctxt, hdrp->s.TrAddr + i, ((E3_uint8 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_SHORT: /* 16 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint16)); i >= 0; i -= sizeof (E3_uint16)) -+ ELAN3_OP_STORE16 (ctxt, hdrp->s.TrAddr + i, ((E3_uint16 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint16); -+ ELAN3_OP_STORE16 (ctxt, hdrp->s.TrAddr + i, ((E3_uint16 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_WORD: /* 32 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint32)); i >= 0; i -= sizeof (E3_uint32)) -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + i, ((E3_uint32 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint32); -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + i, ((E3_uint32 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_DWORD: /* 64 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr + i, ((E3_uint64 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr + i, ((E3_uint64 *) saddr)[i]); -+ break; -+ } -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateWriteWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateWriteWord: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr, ((E3_uint32 *) datap)[WordEndianFlip]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateWriteDWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateWriteDWord: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr, ((E3_uint64 *) datap)[0]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateTraceRoute (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ E3_uint32 *saddr = (E3_uint32 *) ((unsigned long) datap + (hdrp->s.TrAddr & 0x3f)); -+ unsigned nwords = TrSizeTable[(hdrp->s.TrTypeCntx.s.Type >> TR_SIZE_SHIFT) & TR_SIZE_MASK] / sizeof (E3_uint32); -+ int i; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateTraceRoute: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ for (i = nwords-2; i >= 0; i--) -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + (i * sizeof (E3_uint32)), saddr[i ^ WordEndianFlip]); -+ -+ i = nwords-1; -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + (i * sizeof (E3_uint32)), saddr[i ^ WordEndianFlip]); -+ -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateUnlockQueue (ELAN3_CTXT *ctxt, E3_Addr QueuePointer, int SentAck) -+{ -+ E3_uint32 QueueLock; -+ E3_Addr QueueBPTR; -+ E3_Addr QueueFPTR; -+ E3_uint64 QueueStateAndBPTR; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "UnlockQueue: faulted with QueuePointer %x\n", QueuePointer); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, QueuePointer); -+ return; -+ } -+ -+ if (SentAck) -+ { -+ QueueBPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_bptr)); -+ QueueFPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_fptr)); -+ -+ if (QueueBPTR == ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_top))) /* move on back pointer */ -+ QueueBPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_base)); -+ else -+ QueueBPTR += ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_size)); -+ -+ QueueLock = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state)); -+ -+ if (QueueBPTR == QueueFPTR) /* and set full bit if fptr == bptr */ -+ QueueLock |= E3_QUEUE_FULL; -+ -+ QueueLock &= ~E3_QUEUE_LOCKED; -+ -+ QueueStateAndBPTR = (E3_uint64)QueueLock << 32 | QueueBPTR; -+ -+ ELAN3_OP_STORE64 (ctxt, QueuePointer + offsetof (E3_Queue, q_state), QueueStateAndBPTR); -+ } -+ else -+ { -+ QueueLock = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state)); -+ -+ QueueLock &= ~E3_QUEUE_LOCKED; -+ -+ ELAN3_OP_STORE32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state), QueueLock); -+ } -+ -+ no_fault(); -+} -+ -+static void -+BumpInputterStats (ELAN3_DEV *dev, E3_IprocTrapHeader_BE *hdrp) -+{ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) /* EOP */ -+ { -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_BADACK: -+ BumpStat (dev, EopBadAcks); -+ break; -+ case EOP_ERROR_RESET: -+ BumpStat (dev, EopResets); -+ break; -+ } -+ } -+ else if (hdrp->s.TrTypeCntx.s.StatusRegValid) -+ { -+ /* -+ * Errors are tested in order of badness. i.e. badlength will prevent a BadCrc and so on... -+ */ -+ if (hdrp->s.IProcTrapStatus.s.BadLength) -+ BumpStat (dev, InputterBadLength); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_BAD) -+ BumpStat (dev, InputterCRCBad); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_ERROR) -+ BumpStat (dev, InputterCRCErrors); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_DISCARD) -+ BumpStat (dev, InputterCRCDiscards); -+ } -+} -+ -+char * -+IProcTrapString (E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ static char buffer[256]; -+ static char typeString[256]; -+ static char statusString[256]; -+ char *ptr; -+ E3_Addr Addr = hdrp->s.TrAddr; -+ E3_uint32 Type = hdrp->s.TrTypeCntx.s.Type; -+ E3_uint32 Context = hdrp->s.TrTypeCntx.s.Context; -+ E3_uint32 StatusValid = hdrp->s.TrTypeCntx.s.StatusRegValid; -+ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_GOOD: sprintf (typeString, "EOP GOOD"); break; -+ case EOP_BADACK: sprintf (typeString, "EOP BADACK"); break; -+ case EOP_ERROR_RESET: sprintf (typeString, "EOP ERROR RESET"); break; -+ default: sprintf (typeString, "EOP - bad status"); break; -+ } -+ sprintf (buffer, "%15s Cntx=%08x", typeString, Context); -+ } -+ else -+ { -+ if (Type & TR_WRITEBLOCK_BIT) -+ { -+ switch ((Type >> TR_TYPE_SHIFT) & TR_TYPE_MASK) -+ { -+ case TR_TYPE_BYTE: ptr = "Byte"; break; -+ case TR_TYPE_SHORT: ptr = "Short"; break; -+ case TR_TYPE_WORD: ptr = "Word"; break; -+ case TR_TYPE_DWORD: ptr = "Double"; break; -+ default: ptr = "Unknown"; break; -+ } -+ -+ sprintf (typeString, "WriteBlock Type=%s Size=%2d", ptr, Type & TR_PARTSIZE_MASK); -+ } -+ else -+ { -+ switch (Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Setevent"); break; -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Remote DMA"); break; -+ case TR_LOCKQUEUE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Lock Queue"); break; -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Unlock Queue"); break; -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Send Discard"); break; -+ case TR_DMAIDENTIFY & TR_OPCODE_TYPE_MASK: sprintf (typeString, "DMA Identify"); break; -+ case TR_THREADIDENTIFY & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Thread Identify"); break; -+ case TR_GTE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "GTE"); break; -+ case TR_LT & TR_OPCODE_TYPE_MASK: sprintf (typeString, "LT"); break; -+ case TR_EQ & TR_OPCODE_TYPE_MASK: sprintf (typeString, "EQ"); break; -+ case TR_NEQ & TR_OPCODE_TYPE_MASK: sprintf (typeString, "NEQ"); break; -+ case TR_WRITEWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Write Word"); break; -+ case TR_WRITEDOUBLEWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Write Double"); break; -+ case TR_ATOMICADDWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Atomic Add"); break; -+ case TR_TESTANDWRITE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Test and Write"); break; -+ default: sprintf (typeString, "Type=%d", Type & TR_OPCODE_TYPE_MASK); break; -+ } -+ } -+ sprintf (buffer, "%15s Addr=%08x Cntx=%08x", typeString, Addr, Context); -+ /*(Type & TR_SENDACK) ? " Sendack" : "", */ -+ /*(Type & TR_LAST_TRANS) ? " LastTrans" : "", */ -+ /*(Type & TR_WAIT_FOR_EOP) ? " WaitForEop" : ""); */ -+ } -+ -+ if (StatusValid) -+ { -+ sprintf (statusString, " Type=%s %x", MiToName (hdrp->s.IProcTrapStatus.s.TrapType), hdrp->s.IProcTrapStatus.Status); -+ strcat (buffer, statusString); -+ -+ if (hdrp->s.IProcTrapStatus.s.BadLength) -+ strcat (buffer, " BadLength"); -+ switch (hdrp->s.IProcTrapStatus.Status & CRC_MASK) -+ { -+ case CRC_STATUS_DISCARD: -+ strcat (buffer, " CRC Discard"); -+ break; -+ case CRC_STATUS_ERROR: -+ strcat (buffer, " CRC Error"); -+ break; -+ -+ case CRC_STATUS_BAD: -+ strcat (buffer, " CRC Bad"); -+ break; -+ } -+ } -+ -+ return (buffer); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/Makefile linux-2.6.9/drivers/net/qsnet/elan3/Makefile ---- clean/drivers/net/qsnet/elan3/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/Makefile 2005-10-10 17:47:30.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan3/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_ELAN3) += elan3.o -+elan3-objs := context.o cproc.o dproc.o elandebug.o elandev_generic.o elansyscall.o eventcookie.o iproc.o sdram.o minames.o network_error.o route_table.o tproc.o tprocinsts.o routecheck.o virtual_process.o elan3ops.o context_linux.o elandev_linux.o procfs_linux.o tproc_linux.o elan3mmu_generic.o elan3mmu_linux.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/elan3/Makefile.conf linux-2.6.9/drivers/net/qsnet/elan3/Makefile.conf ---- clean/drivers/net/qsnet/elan3/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/Makefile.conf 2005-09-07 10:39:38.000000000 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan3.o -+MODULENAME = elan3 -+KOBJFILES = context.o cproc.o dproc.o elandebug.o elandev_generic.o elansyscall.o eventcookie.o iproc.o sdram.o minames.o network_error.o route_table.o tproc.o tprocinsts.o routecheck.o virtual_process.o elan3ops.o context_linux.o elandev_linux.o procfs_linux.o tproc_linux.o elan3mmu_generic.o elan3mmu_linux.o -+EXPORT_KOBJS = elandev_linux.o procfs_linux.o -+CONFIG_NAME = CONFIG_ELAN3 -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/elan3/minames.c linux-2.6.9/drivers/net/qsnet/elan3/minames.c ---- clean/drivers/net/qsnet/elan3/minames.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/minames.c 2003-06-07 11:57:49.000000000 -0400 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: minames.c,v 1.12 2003/06/07 15:57:49 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/minames.c,v $*/ -+ -+#include -+#include -+ -+caddr_t -+MiToName (int mi) -+{ -+ static char space[32]; -+ static struct { -+ int mi; -+ char *name; -+ } info[] = { -+#include -+ }; -+ register int i; -+ -+ -+ for (i = 0; i < sizeof(info)/sizeof(info[0]); i++) -+ if (info[i].mi == mi) -+ return (info[i].name); -+ sprintf (space, "MI %x", mi); -+ return (space); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/network_error.c linux-2.6.9/drivers/net/qsnet/elan3/network_error.c ---- clean/drivers/net/qsnet/elan3/network_error.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/network_error.c 2004-10-28 07:51:00.000000000 -0400 -@@ -0,0 +1,777 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: network_error.c,v 1.33 2004/10/28 11:51:00 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/network_error.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef DIGITAL_UNIX -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+typedef xdrproc_t kxdrproc_t; -+#endif -+ -+#ifdef LINUX -+#include -+#include -+#include -+#include -+ -+#include -+#define SYS_NMLN __NEW_UTS_LEN -+#endif -+ -+#include -+ -+spinlock_t ResolveRequestLock; -+kcondvar_t ResolveRequestWait; -+ -+NETERR_RESOLVER *ResolveRequestHead; -+NETERR_RESOLVER **ResolveRequestTailp = &ResolveRequestHead; -+int ResolveRequestCount; -+int ResolveRequestThreads; -+int ResolveRequestMaxThreads = 4; -+int ResolveRequestTimeout = 60; -+ -+typedef struct neterr_server -+{ -+ struct neterr_server *Next; -+ struct neterr_server *Prev; -+ unsigned ElanId; -+ -+ char *Name; -+ int RefCount; -+ struct sockaddr_in Addr; -+} NETERR_SERVER; -+ -+#define NETERR_HASH_ENTRIES 64 -+#define NETERR_HASH(elanid) (((unsigned) elanid) % NETERR_HASH_ENTRIES) -+NETERR_SERVER *NeterrServerHash[NETERR_HASH_ENTRIES]; -+kmutex_t NeterrServerLock; -+ -+static NETERR_SERVER *FindNeterrServer (int elanId); -+static void DereferenceNeterrServer (NETERR_SERVER *server); -+static int CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg); -+ -+void -+InitialiseNetworkErrorResolver () -+{ -+ spin_lock_init (&ResolveRequestLock); -+ kcondvar_init (&ResolveRequestWait); -+ -+ ResolveRequestHead = NULL; -+ ResolveRequestTailp = &ResolveRequestHead; -+ -+ kmutex_init (&NeterrServerLock); -+} -+ -+void -+FinaliseNetworkErrorResolver () -+{ -+ spin_lock_destroy (&ResolveRequestLock); -+ kcondvar_destroy (&ResolveRequestWait); -+ -+ kmutex_destroy (&NeterrServerLock); -+} -+ -+static NETERR_RESOLVER * -+AllocateNetworkErrorResolver (void) -+{ -+ NETERR_RESOLVER *rvp; -+ -+ KMEM_ZALLOC (rvp, NETERR_RESOLVER *, sizeof (NETERR_RESOLVER), TRUE); -+ spin_lock_init (&rvp->Lock); -+ -+ return (rvp); -+} -+ -+void -+FreeNetworkErrorResolver (NETERR_RESOLVER *rvp) -+{ -+ spin_lock_destroy (&rvp->Lock); -+ KMEM_FREE (rvp, sizeof (NETERR_RESOLVER)); -+} -+ -+static void -+elan3_neterr_resolver (void) -+{ -+ NETERR_RESOLVER *rvp; -+ NETERR_SERVER *server; -+ int status; -+ unsigned long flags; -+ -+ kernel_thread_init("elan3_neterr_resolver"); -+ spin_lock (&ResolveRequestLock); -+ -+ while ((rvp = ResolveRequestHead) != NULL) -+ { -+ if ((ResolveRequestHead = rvp->Next) == NULL) -+ ResolveRequestTailp = &ResolveRequestHead; -+ -+ spin_unlock (&ResolveRequestLock); -+ -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: rvp = %p\n", rvp); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " Rail %d\n", rvp->Message.Rail); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " SrcCapability %s\n", CapabilityString (&rvp->Message.SrcCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " DstCapability %s\n", CapabilityString (&rvp->Message.DstCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieAddr %08x\n", rvp->Message.CookieAddr); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieVProc %08x\n", rvp->Message.CookieVProc); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " NextCookie %08x\n", rvp->Message.NextCookie); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " WaitForEop %08x\n", rvp->Message.WaitForEop); -+ -+ if ((server = FindNeterrServer (rvp->Location.loc_node)) == NULL) -+ status = ECONNREFUSED; -+ else if (ResolveRequestTimeout && ((int)(lbolt - rvp->Timestamp)) > (ResolveRequestTimeout*HZ)) -+ { -+ printk ("elan_neterr: rpc to '%s' timedout - context %d killed\n", server->Name, rvp->Message.SrcCapability.cap_mycontext); -+ status = ECONNABORTED; -+ } -+ else -+ { -+ status = CallNeterrServer (server, &rvp->Message); -+ -+ DereferenceNeterrServer (server); -+ } -+ -+ if ((status == EINTR || status == ETIMEDOUT) && rvp->Ctxt != NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: retry rvp=%p\n", rvp); -+ spin_lock (&ResolveRequestLock); -+ rvp->Next = NULL; -+ *ResolveRequestTailp = rvp; -+ ResolveRequestTailp = &rvp->Next; -+ } -+ else -+ { -+ rvp->Status = status; -+ -+ spin_lock (&rvp->Lock); -+ -+ if (rvp->Ctxt != NULL) -+ { -+ PRINTF2 (rvp->Ctxt, DBG_NETERR, "elan3_neterr_resolver: completing rvp %p for ctxt %p\n", rvp, rvp->Ctxt); -+ spin_lock_irqsave (&rvp->Ctxt->Device->IntrLock, flags); -+ -+ rvp->Completed = TRUE; -+ -+ kcondvar_wakeupall (&rvp->Ctxt->Wait, &rvp->Ctxt->Device->IntrLock); -+ -+ /* -+ * drop the locks out of order since the rvp can get freeed -+ * as soon as we drop the IntrLock - so cannot reference the -+ * rvp after this. -+ */ -+ -+ spin_unlock (&rvp->Lock); -+ spin_unlock_irqrestore (&rvp->Ctxt->Device->IntrLock, flags); -+ } -+ else -+ { -+ PRINTF2 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: completing rvp %p for deceased ctxt %p\n", rvp, rvp->Ctxt); -+ spin_unlock (&rvp->Lock); -+ FreeNetworkErrorResolver (rvp); -+ } -+ -+ spin_lock (&ResolveRequestLock); -+ ResolveRequestCount--; -+ } -+ } -+ -+ ResolveRequestThreads--; -+ -+ spin_unlock (&ResolveRequestLock); -+ kernel_thread_exit(); -+} -+ -+int -+QueueNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp) -+{ -+ int isdma = trap->DmaIdentifyTransaction != NULL; -+ E3_IprocTrapHeader_BE *hdrp = isdma ? trap->DmaIdentifyTransaction : trap->ThreadIdentifyTransaction; -+ E3_uint32 process = isdma ? (hdrp->s.TrAddr & 0xFFFF) : (hdrp->s.TrData0 & 0xFFFF); -+ NETERR_RESOLVER *rvp; -+ -+ PRINTF2 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: process = %d %s\n", process, isdma ? "(dma)" : "(thread)"); -+ -+ if ((rvp = AllocateNetworkErrorResolver()) == NULL) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: cannot allocate resolver\n"); -+ return (ENOMEM); -+ } -+ -+ rvp->Message.Rail = ctxt->Device->Devinfo.dev_rail; -+ -+ krwlock_read (&ctxt->VpLock); -+ rvp->Location = ProcessToLocation (ctxt, NULL, process, &rvp->Message.SrcCapability); -+ krwlock_done (&ctxt->VpLock); -+ -+ if (rvp->Location.loc_node == ELAN3_INVALID_NODE) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: invalid elan id\n"); -+ -+ FreeNetworkErrorResolver (rvp); -+ return (EINVAL); -+ } -+ -+ rvp->Message.DstCapability = ctxt->Capability; -+ rvp->Message.DstProcess = elan3_process (ctxt); -+ rvp->Message.WaitForEop = (trap->WaitForEopTransaction != NULL); -+ -+ if (isdma) -+ { -+ rvp->Message.CookieAddr = 0; -+ rvp->Message.CookieVProc = hdrp->s.TrAddr; -+ rvp->Message.NextCookie = 0; -+ } -+ else -+ { -+ rvp->Message.CookieAddr = hdrp->s.TrAddr; -+ rvp->Message.CookieVProc = hdrp->s.TrData0; -+ rvp->Message.NextCookie = hdrp->s.TrData1; -+ } -+ -+ rvp->Completed = FALSE; -+ rvp->Ctxt = ctxt; -+ rvp->Timestamp = lbolt; -+ -+ spin_lock (&ResolveRequestLock); -+ -+ rvp->Next = NULL; -+ *ResolveRequestTailp = rvp; -+ ResolveRequestTailp = &rvp->Next; -+ ResolveRequestCount++; -+ -+ kcondvar_wakeupone (&ResolveRequestWait, &ResolveRequestLock); -+ -+ if (ResolveRequestCount < ResolveRequestThreads || ResolveRequestThreads >= ResolveRequestMaxThreads) -+ spin_unlock (&ResolveRequestLock); -+ else -+ { -+ ResolveRequestThreads++; -+ -+ spin_unlock (&ResolveRequestLock); -+ if (kernel_thread_create (elan3_neterr_resolver, NULL) == NULL) -+ { -+ spin_lock (&ResolveRequestLock); -+ ResolveRequestThreads--; -+ spin_unlock (&ResolveRequestLock); -+ -+ if (ResolveRequestThreads == 0) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: cannot thread pool\n"); -+ -+ FreeNetworkErrorResolver (rvp); -+ return (ENOMEM); -+ } -+ } -+ } -+ -+ *rvpp = rvp; -+ return (ESUCCESS); -+} -+ -+void -+CancelNetworkErrorResolver (NETERR_RESOLVER *rvp) -+{ -+ spin_lock (&rvp->Lock); -+ -+ PRINTF2 (rvp->Ctxt, DBG_NETERR, "CancelNetworkErrorResolver: rvp=%p %s\n", rvp, rvp->Completed ? "Completed" : "Pending"); -+ -+ if (rvp->Completed) -+ { -+ spin_unlock (&rvp->Lock); -+ FreeNetworkErrorResolver (rvp); -+ } -+ else -+ { -+ rvp->Ctxt = NULL; -+ spin_unlock (&rvp->Lock); -+ } -+} -+ -+static NETERR_FIXUP * -+AllocateNetworkErrorFixup (void) -+{ -+ NETERR_FIXUP *nef; -+ -+ KMEM_ZALLOC (nef, NETERR_FIXUP *, sizeof (NETERR_FIXUP), TRUE); -+ -+ if (nef == (NETERR_FIXUP *) NULL) -+ return (NULL); -+ -+ kcondvar_init (&nef->Wait); -+ -+ return (nef); -+} -+ -+static void -+FreeNetworkErrorFixup (NETERR_FIXUP *nef) -+{ -+ kcondvar_destroy (&nef->Wait); -+ KMEM_FREE (nef, sizeof (NETERR_FIXUP)); -+} -+ -+int -+ExecuteNetworkErrorFixup (NETERR_MSG *msg) -+{ -+ ELAN3_DEV *dev; -+ ELAN3_CTXT *ctxt; -+ NETERR_FIXUP *nef; -+ NETERR_FIXUP **predp; -+ int rc; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "ExecuteNetworkErrorFixup: msg = %p\n", msg); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " Rail %d\n", msg->Rail); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " SrcCapability %s\n", CapabilityString (&msg->SrcCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " DstCapability %s\n", CapabilityString (&msg->DstCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieAddr %08x\n", msg->CookieAddr); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieVProc %08x\n", msg->CookieVProc); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " NextCookie %08x\n", msg->NextCookie); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " WaitForEop %08x\n", msg->WaitForEop); -+ -+ if ((dev = elan3_device (msg->Rail)) == NULL) -+ return (ESRCH); -+ -+ if ((nef = AllocateNetworkErrorFixup()) == NULL) -+ return (ENOMEM); -+ -+ if (nef == (NETERR_FIXUP *) NULL) -+ return (ENOMEM); -+ -+ bcopy (msg, &nef->Message, sizeof (NETERR_MSG)); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, msg->SrcCapability.cap_mycontext); -+ -+ if (ctxt == NULL) -+ rc = ESRCH; -+ else if (!ELAN_CAP_MATCH (&msg->SrcCapability, &ctxt->Capability)) -+ rc = EPERM; -+ else -+ { -+ if (ctxt->Status & CTXT_NO_LWPS) -+ rc = EAGAIN; -+ else -+ { -+ for (predp = &ctxt->NetworkErrorFixups; *predp != NULL; predp = &(*predp)->Next) -+ ; -+ nef->Next = NULL; -+ *predp = nef; -+ -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ while (! nef->Completed) -+ kcondvar_wait (&nef->Wait, &dev->IntrLock, &flags); -+ -+ rc = nef->Status; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ FreeNetworkErrorFixup (nef); -+ -+ return (rc); -+} -+ -+void -+CompleteNetworkErrorFixup (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, int status) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF2 (ctxt, DBG_NETERR, "CompleteNetworkErrorFixup: %p %d\n", nef, status); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ nef->Status = status; -+ nef->Completed = TRUE; -+ kcondvar_wakeupone (&nef->Wait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+ -+static NETERR_SERVER * -+NewNeterrServer (int elanId, struct sockaddr_in *addr, char *name) -+{ -+ NETERR_SERVER *server; -+ -+ KMEM_ZALLOC (server, NETERR_SERVER *, sizeof (NETERR_SERVER), TRUE); -+ KMEM_ALLOC (server->Name, char *, strlen (name)+1, TRUE); -+ -+ bcopy (addr, &server->Addr, sizeof (struct sockaddr_in)); -+ bcopy (name, server->Name, strlen (name)+1); -+ -+ server->ElanId = elanId; -+ server->RefCount = 1; -+ -+ return (server); -+} -+ -+static void -+DeleteNeterrServer (NETERR_SERVER *server) -+{ -+ KMEM_FREE (server->Name, strlen(server->Name)+1); -+ KMEM_FREE (server, sizeof (NETERR_SERVER)); -+} -+ -+static NETERR_SERVER * -+FindNeterrServer (int elanId) -+{ -+ NETERR_SERVER *server; -+ -+ kmutex_lock (&NeterrServerLock); -+ -+ for (server = NeterrServerHash[NETERR_HASH(elanId)]; server != NULL; server = server->Next) -+ if (server->ElanId == elanId) -+ break; -+ -+ if (server != NULL) -+ server->RefCount++; -+ kmutex_unlock (&NeterrServerLock); -+ -+ return (server); -+} -+ -+static void -+DereferenceNeterrServer (NETERR_SERVER *server) -+{ -+ kmutex_lock (&NeterrServerLock); -+ if ((--server->RefCount) == 0) -+ DeleteNeterrServer (server); -+ kmutex_unlock (&NeterrServerLock); -+} -+ -+int -+AddNeterrServer (int elanId, struct sockaddr_in *addr, char *name) -+{ -+ NETERR_SERVER *server; -+ NETERR_SERVER *old; -+ int hashval = NETERR_HASH(elanId); -+ -+ server = NewNeterrServer (elanId, addr, name); -+ -+ if (server == NULL) -+ return (ENOMEM); -+ -+ kmutex_lock (&NeterrServerLock); -+ for (old = NeterrServerHash[hashval]; old != NULL; old = old->Next) -+ if (old->ElanId == elanId) -+ break; -+ -+ /* remove "old" server from hash table */ -+ if (old != NULL) -+ { -+ if (old->Prev) -+ old->Prev->Next = old->Next; -+ else -+ NeterrServerHash[hashval] = old->Next; -+ if (old->Next) -+ old->Next->Prev = old->Prev; -+ } -+ -+ /* insert "new" server into hash table */ -+ if ((server->Next = NeterrServerHash[hashval]) != NULL) -+ server->Next->Prev = server; -+ server->Prev = NULL; -+ NeterrServerHash[hashval] = server; -+ -+ kmutex_unlock (&NeterrServerLock); -+ -+ if (old != NULL) -+ DereferenceNeterrServer (old); -+ -+ return (ESUCCESS); -+} -+ -+int -+AddNeterrServerSyscall (int elanId, void *addrp, void *namep, char *unused) -+{ -+ struct sockaddr_in addr; -+ char *name; -+ int error; -+ int nob; -+ -+ /* Sanity check the supplied elanId argument */ -+ if (elanId < 0) -+ return ( set_errno(EINVAL) ); -+ -+ KMEM_ALLOC (name, caddr_t, SYS_NMLN, TRUE); -+ -+ if (copyin ((caddr_t) addrp, (caddr_t) &addr, sizeof (addr)) || -+ copyinstr ((caddr_t) namep, name, SYS_NMLN, &nob)) -+ { -+ error = EFAULT; -+ } -+ else -+ { -+ PRINTF2 (DBG_DEVICE, DBG_NETERR, "AddNeterrServer: '%s' at elanid %d\n", name, elanId); -+ -+ error = AddNeterrServer (elanId, &addr, name); -+ } -+ KMEM_FREE (name, SYS_NMLN); -+ -+ return (error ? set_errno(error) : ESUCCESS); -+} -+ -+ -+#if defined(DIGITAL_UNIX) -+static int -+CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg) -+{ -+ cred_t *cr = crget(); -+ struct rpc_err rpcerr; -+ extern cred_t *kcred; -+ struct timeval wait; -+ enum clnt_stat rc; -+ int status; -+ CLIENT *clnt; -+ int error; -+ -+ PRINTF4 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) - family=%d port=%d addr=%08x\n", server->Name, -+ server->Addr.sin_family, server->Addr.sin_port, server->Addr.sin_addr.s_addr); -+ -+ if ((clnt = clntkudp_create (&server->Addr, (struct sockaddr_in *)0, NETERR_PROGRAM, NETERR_VERSION, 1, cr)) == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): clntkudp_create error\n", server->Name); -+ -+ return (ENOMEM); -+ } -+ -+ wait.tv_sec = NETERR_RPC_TIMEOUT; -+ wait.tv_usec = 0; -+ -+ PRINTF2 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): CLNT_CALL timeout = %d\n", server->Name, NETERR_RPC_TIMEOUT); -+ -+ rc = CLNT_CALL(clnt, NETERR_FIXUP_RPC, xdr_neterr_msg, (void *)msg, xdr_int, (void *) &status, wait); -+ -+ PRINTF3 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): CLNT_CALL -> %d (%s)\n", server->Name, rc, clnt_sperrno(rc));; -+ -+ switch (rc) -+ { -+ case RPC_SUCCESS: -+ break; -+ -+ case RPC_INTR: -+ status = EINTR; -+ break; -+ -+ case RPC_TIMEDOUT: -+ status = ETIMEDOUT; -+ break; -+ -+ default: -+ printf ("CallNeterrServer(%s): %s\n", server->Name, clnt_sperrno(status)); -+ status = ENOENT; -+ break; -+ } -+ -+ CLNT_DESTROY(clnt); -+ -+ crfree(cr); -+ -+ ASSERT(rc == RPC_SUCCESS || status != 0); -+ -+ PRINTF2 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): status=%d\n", server->Name, status); -+ -+ return (status); -+} -+#endif -+ -+#if defined(LINUX) -+ -+#define xdrsize(type) ((sizeof(type) + 3) >> 2) -+ -+static int -+xdr_error(struct rpc_rqst *req, u32 *p, void *dummy) -+{ -+ return -EIO; -+} -+ -+static int -+xdr_decode_int(struct rpc_rqst *req, u32 *p, int *res) -+{ -+ *res = ntohl(*p++); -+ return 0; -+} -+ -+#define XDR_capability_sz ((12 + BT_BITOUL(ELAN3_MAX_VPS)) * sizeof (u32)) -+ -+static int -+xdr_encode_capability(u32 *p, ELAN_CAPABILITY *cap) -+{ -+ u32 *pp = p; -+ -+ /* basic xdr unit is u32 - for opaque types we must round up to that */ -+ memcpy(p, &cap->cap_userkey, sizeof(cap->cap_userkey)); -+ p += xdrsize(cap->cap_userkey); -+ -+ *p++ = htonl(cap->cap_version); -+ ((u16 *) (p++))[1] = htons(cap->cap_type); -+ *p++ = htonl(cap->cap_lowcontext); -+ *p++ = htonl(cap->cap_highcontext); -+ *p++ = htonl(cap->cap_mycontext); -+ *p++ = htonl(cap->cap_lownode); -+ *p++ = htonl(cap->cap_highnode); -+ *p++ = htonl(cap->cap_railmask); -+ -+ memcpy(p, &cap->cap_bitmap[0], sizeof(cap->cap_bitmap)); -+ p += xdrsize(cap->cap_bitmap); -+ -+ ASSERT (((unsigned long) p - (unsigned long) pp) == XDR_capability_sz); -+ -+ return (p - pp); -+} -+ -+ -+#define XDR_neterr_sz (((1 + 5) * sizeof (u32)) + (2*XDR_capability_sz)) -+ -+static int -+xdr_encode_neterr_msg(struct rpc_rqst *req, u32 *p, NETERR_MSG *msg) -+{ -+ u32 *pp = p; -+ -+ *p++ = htonl(msg->Rail); -+ -+ p += xdr_encode_capability(p, &msg->SrcCapability); -+ p += xdr_encode_capability(p, &msg->DstCapability); -+ -+ *p++ = htonl(msg->DstProcess); -+ *p++ = htonl(msg->CookieAddr); -+ *p++ = htonl(msg->CookieVProc); -+ *p++ = htonl(msg->NextCookie); -+ *p++ = htonl(msg->WaitForEop); -+ -+ ASSERT (((unsigned long) p - (unsigned long) pp) == XDR_neterr_sz); -+ -+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); -+ -+ return 0; -+} -+ -+static struct rpc_procinfo neterr_procedures[2] = -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+# define RPC_ID_NULL "neterr_null" -+# define RPC_ID_FIXUP_RPC "neterr_fixup_rpc" -+#else -+# define RPC_ID_NULL NETERR_NULL_RPC -+# define RPC_ID_FIXUP_RPC NETERR_FIXUP_RPC -+#endif -+ { -+ RPC_ID_NULL, /* procedure name or number*/ -+ (kxdrproc_t) xdr_error, /* xdr encode fun */ -+ (kxdrproc_t) xdr_error, /* xdr decode fun */ -+ 0, /* req buffer size */ -+ 0, /* call count */ -+ }, -+ { -+ RPC_ID_FIXUP_RPC, -+ (kxdrproc_t) xdr_encode_neterr_msg, -+ (kxdrproc_t) xdr_decode_int, -+ XDR_neterr_sz, -+ 0, -+ }, -+}; -+ -+static struct rpc_version neterr_version1 = -+{ -+ 1, /* version */ -+ 2, /* number of procedures */ -+ neterr_procedures /* procedures */ -+}; -+ -+static struct rpc_version *neterr_version[] = -+{ -+ NULL, -+ &neterr_version1, -+}; -+ -+static struct rpc_stat neterr_stats; -+ -+static struct rpc_program neterr_program = -+{ -+ NETERR_SERVICE, -+ NETERR_PROGRAM, -+ sizeof(neterr_version)/sizeof(neterr_version[0]), -+ neterr_version, -+ &neterr_stats, -+}; -+ -+static int -+CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg) -+{ -+ struct rpc_xprt *xprt; -+ struct rpc_clnt *clnt; -+ struct rpc_timeout to; -+ int rc, status; -+ -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s)\n", server->Name); -+ -+ xprt_set_timeout(&to, 1, NETERR_RPC_TIMEOUT * HZ); -+ -+ if ((xprt = xprt_create_proto(IPPROTO_UDP, &server->Addr, &to)) == NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) xprt_create_proto failed\n", server->Name); -+ return EFAIL; -+ } -+ -+ if ((clnt = rpc_create_client(xprt, server->Name, &neterr_program, NETERR_VERSION, RPC_AUTH_NULL)) == NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) rpc_create_client failed\n", server->Name); -+ xprt_destroy (xprt); -+ -+ return EFAIL; -+ } -+ -+ clnt->cl_softrtry = 1; -+ clnt->cl_chatty = 0; -+ clnt->cl_oneshot = 1; -+ clnt->cl_intr = 0; -+ -+ if ((rc = rpc_call(clnt, NETERR_FIXUP_RPC, msg, &status, 0)) < 0) -+ { -+ /* RPC error has occured - determine whether we should retry */ -+ -+ status = ETIMEDOUT; -+ } -+ -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): -> %d\n", server->Name, status); -+ -+ return (status); -+} -+ -+#endif /* defined(LINUX) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/procfs_linux.c linux-2.6.9/drivers/net/qsnet/elan3/procfs_linux.c ---- clean/drivers/net/qsnet/elan3/procfs_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/procfs_linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_linux.c,v 1.21.8.2 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/procfs_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+struct proc_dir_entry *elan3_procfs_root; -+struct proc_dir_entry *elan3_config_root; -+ -+static int -+proc_read_position (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) data; -+ int len; -+ -+ if (dev->Position.pos_mode == ELAN_POS_UNKNOWN) -+ len = sprintf (page, "\n"); -+ else -+ len = sprintf (page, -+ "NodeId %d\n" -+ "NumLevels %d\n" -+ "NumNodes %d\n", -+ dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_position (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) data; -+ unsigned nodeid = ELAN3_INVALID_NODE; -+ unsigned numnodes = 0; -+ char *page, *p; -+ int res; -+ -+ if (count == 0) -+ return (0); -+ -+ if (count >= PAGE_SIZE) -+ return (-EINVAL); -+ -+ if ((page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (page, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ page[count] = '\0'; -+ -+ if (page[count-1] == '\n') -+ page[count-1] = '\0'; -+ -+ if (! strcmp (page, "")) -+ { -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ dev->Position.pos_nodeid = ELAN3_INVALID_NODE; -+ dev->Position.pos_nodes = 0; -+ dev->Position.pos_levels = 0; -+ } -+ else -+ { -+ for (p = page; *p; ) -+ { -+ while (isspace (*p)) -+ p++; -+ -+ if (! strncmp (p, "NodeId=", strlen("NodeId="))) -+ nodeid = simple_strtoul (p + strlen ("NodeId="), NULL, 0); -+ if (! strncmp (p, "NumNodes=", strlen ("NumNodes="))) -+ numnodes = simple_strtoul (p + strlen ("NumNodes="), NULL, 0); -+ -+ while (*p && !isspace(*p)) -+ p++; -+ } -+ -+ if (ComputePosition (&dev->Position, nodeid, numnodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ printk ("elan%d: invalid values for NodeId=%d NumNodes=%d\n", dev->Instance, nodeid, numnodes); -+ else -+ printk ("elan%d: setting NodeId=%d NumNodes=%d NumLevels=%d\n", dev->Instance, dev->Position.pos_nodeid, -+ dev->Position.pos_nodes, dev->Position.pos_levels); -+ } -+ } -+ -+ MOD_DEC_USE_COUNT; -+ free_page ((unsigned long) page); -+ -+ return (count); -+} -+ -+ -+void -+elan3_procfs_device_init (ELAN3_DEV *dev) -+{ -+ struct proc_dir_entry *dir, *p; -+ char name[NAME_MAX]; -+ -+ sprintf (name, "device%d", dev->Instance); -+ dir = dev->Osdep.procdir = proc_mkdir (name, elan3_procfs_root); -+ -+ if ((p = create_proc_entry ("position", 0, dir)) != NULL) -+ { -+ p->read_proc = proc_read_position; -+ p->write_proc = proc_write_position; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ -+} -+ -+void -+elan3_procfs_device_fini (ELAN3_DEV *dev) -+{ -+ struct proc_dir_entry *dir = dev->Osdep.procdir; -+ char name[NAME_MAX]; -+ -+ remove_proc_entry ("position", dir); -+ -+ sprintf (name, "device%d", dev->Instance); -+ remove_proc_entry (name, elan3_procfs_root); -+} -+ -+void -+elan3_procfs_init() -+{ -+ extern int eventint_punt_loops; -+ extern int ResolveRequestTimeout; -+ -+ elan3_procfs_root = proc_mkdir("elan3", qsnet_procfs_root); -+ -+ elan3_config_root = proc_mkdir("config", elan3_procfs_root); -+ -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug", &elan3_debug, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug_console", &elan3_debug_console, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug_buffer", &elan3_debug_buffer, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3mmu_debug", &elan3mmu_debug, 0); -+ qsnet_proc_register_int (elan3_config_root, "eventint_punt_loops", &eventint_punt_loops, 0); -+ qsnet_proc_register_int (elan3_config_root, "neterr_timeout", &ResolveRequestTimeout, 0); -+ -+#if defined(__ia64__) -+ { -+ extern int enable_sdram_writecombining; -+ qsnet_proc_register_int (elan3_config_root, "enable_sdram_writecombining", &enable_sdram_writecombining, 0); -+ } -+#endif -+} -+ -+void -+elan3_procfs_fini() -+{ -+#if defined(__ia64__) -+ remove_proc_entry ("enable_sdram_writecombining", elan3_config_root); -+#endif -+ remove_proc_entry ("neterr_timeout", elan3_config_root); -+ remove_proc_entry ("eventint_punt_loops", elan3_config_root); -+ remove_proc_entry ("elan3mmu_debug", elan3_config_root); -+ remove_proc_entry ("elan3_debug_buffer", elan3_config_root); -+ remove_proc_entry ("elan3_debug_console", elan3_config_root); -+ remove_proc_entry ("elan3_debug", elan3_config_root); -+ -+ remove_proc_entry ("config", elan3_procfs_root); -+ remove_proc_entry ("version", elan3_procfs_root); -+ -+ remove_proc_entry ("elan3", qsnet_procfs_root); -+} -+ -+EXPORT_SYMBOL(elan3_procfs_root); -+EXPORT_SYMBOL(elan3_config_root); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/quadrics_version.h linux-2.6.9/drivers/net/qsnet/elan3/quadrics_version.h ---- clean/drivers/net/qsnet/elan3/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/elan3/routecheck.c linux-2.6.9/drivers/net/qsnet/elan3/routecheck.c ---- clean/drivers/net/qsnet/elan3/routecheck.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/routecheck.c 2003-09-24 09:57:25.000000000 -0400 -@@ -0,0 +1,313 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* ------------------------------------------------------------- */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* ---------------------------------------------------------------------- */ -+typedef struct elan3_net_location { -+ int netid; -+ int plane; -+ int level; -+} ELAN3_NET_LOCATION; -+/* ---------------------------------------------------------------------- */ -+#define FLIT_LINK_ARRAY_MAX (ELAN3_MAX_LEVELS*2) -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_follow_link( ELAN3_CTXT *ctxt, ELAN3_NET_LOCATION *loc, int link) -+{ -+ ELAN_POSITION *pos = &ctxt->Position; -+ -+ if ((link<0) || (link>7)) -+ { -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_follow_link: link (%d) out of range \n",link); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ -+ /* going up or down ? */ -+ if ( link >= pos->pos_arity[loc->level] ) -+ { -+ /* Up */ -+ if (loc->level >= pos->pos_levels) -+ loc->plane = 0; -+ else -+ { -+ if ((loc->level == 1) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->plane = (16 * ( loc->plane / 8 )) + (4 * ( loc->plane % 4)) -+ +(link - pos->pos_arity[loc->level]); -+ else -+ loc->plane = (loc->plane * (8 - pos->pos_arity[loc->level])) -+ +(link - pos->pos_arity[loc->level]); -+ } -+ loc->level--; -+ if ( loc->level < 0 ) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_follow_link: link goes off the top\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ loc->netid = loc->netid / pos->pos_arity[loc->level]; -+ } -+ else -+ { -+ /* going down */ -+ if ((loc->level == 0) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->netid = link % 2; -+ else -+ loc->netid =(loc->netid * pos->pos_arity[loc->level])+link; -+ -+ loc->level++; -+ if (loc->level > pos->pos_levels) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_follow_link: link goes off the bottom\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ -+ if ( loc->level >= (pos->pos_levels-1)) -+ loc->plane = 0; -+ else -+ if ((loc->level == 1) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->plane = (((loc->plane)>>2)*2) - ( ((loc->plane)>>2) & 3 ) + ((link<2)?0:4); /* ((p/4) % 4) */ -+ else -+ loc->plane = loc->plane/(8-pos->pos_arity[loc->level]); -+ } -+ return (ELAN3_ROUTE_SUCCESS); -+} -+/* ---------------------------------------------------------------------- */ -+int /* assumes they are connected, really only used for finding the MyLink */ -+elan3_route_get_mylink (ELAN_POSITION *pos, ELAN3_NET_LOCATION *locA, ELAN3_NET_LOCATION *locB) -+{ -+ /* whats the My Link for locA to LocB */ -+ if ( locA->level > locB->level ) -+ return locB->plane - (locA->plane * (8 - pos->pos_arity[locA->level])) + pos->pos_arity[locA->level]; -+ -+ return locB->netid - (locA->netid * pos->pos_arity[locA->level]); -+} -+/* ---------------------------------------------------------------------- */ -+#define FIRST_GET_HIGH_PRI(FLIT) (FLIT & FIRST_HIGH_PRI) -+#define FIRST_GET_AGE(FLIT) ((FLIT & FIRST_AGE(15))>>11) -+#define FIRST_GET_TIMEOUT(FLIT) ((FLIT & FIRST_TIMEOUT(3))>>9) -+#define FIRST_GET_NEXT(FLIT) ((FLIT & FIRST_PACKED(3))>>7) -+#define FIRST_GET_ROUTE(FLIT) (FLIT & 0x7f) -+#define FIRST_GET_BCAST(FLIT) (FLIT & 0x40) -+#define FIRST_GET_IS_INVALID(FLIT) ((FLIT & 0x78) == 0x08) -+#define FIRST_GET_TYPE(FLIT) ((FLIT & 0x30)>>4) -+#define PRF_GET_ROUTE(FLIT,N) ((FLIT >> (N*4)) & 0x0F) -+#define PRF_GET_IS_MYLINK(ROUTE) (ROUTE == PACKED_MYLINK) -+#define PRF_GET_IS_NORMAL(ROUTE) (ROUTE & 0x8) -+#define PRF_GET_NORMAL_LINK(ROUTE) (ROUTE & 0x7) -+#define PRF_MOVE_ON(INDEX,NEXT) do { if (NEXT==3) {NEXT=0;INDEX++;} else {NEXT++; }} while (0); -+/* ---------------------------------------------------------------------- */ -+int /* turn level needed or -1 if not possible */ -+elan3_route_get_min_turn_level( ELAN_POSITION *pos, int nodeId) -+{ -+ int l,range = 1; -+ -+ for(l=pos->pos_levels-1;l>=0;l--) -+ { -+ range = range * pos->pos_arity[l]; -+ -+ if ( ((pos->pos_nodeid - (pos->pos_nodeid % range)) <= nodeId ) -+ && (nodeId <= (pos->pos_nodeid - (pos->pos_nodeid % range)+range -1))) -+ return l; -+ } -+ return -1; -+} -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_check(ELAN3_CTXT *ctxt, E3_uint16 *flits, int destNodeId) -+{ -+ ELAN3_NET_LOCATION lastLoc,currLoc; -+ int err; -+ int turnLevel; -+ int goingDown; -+ int lnk,index,next,val; -+ ELAN_POSITION *pos = &ctxt->Position; -+ -+ /* is the dest possible */ -+ if ( (destNodeId <0 ) || (destNodeId >= pos->pos_nodes)) -+ return (ELAN3_ROUTE_PROC_RANGE); -+ -+ /* -+ * walk the route, -+ * - to see if we get there -+ * - checking we dont turn around -+ */ -+ currLoc.netid = pos->pos_nodeid; /* the elan */ -+ currLoc.plane = 0; -+ currLoc.level = pos->pos_levels; -+ -+ turnLevel = currLoc.level; /* track the how far the route goes in */ -+ goingDown = 0; /* once set we cant go up again ie only one change of direction */ -+ -+ /* move onto the network from the elan */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,4)) != ELAN3_ROUTE_SUCCESS) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: initial elan3_route_follow_link failed\n"); -+ return err; -+ } -+ /* do the first part of flit */ -+ switch ( FIRST_GET_TYPE(flits[0]) ) -+ { -+ case 0 /* sent */ : { lnk = (flits[0] & 0x7); break; } -+ case PACKED_MYLINK : { lnk = pos->pos_nodeid % pos->pos_arity[pos->pos_levels-1]; break; } -+ case PACKED_ADAPTIVE : { lnk = 7; /* all routes are the same just check one */ break; } -+ default : -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_check: unexpected first flit (%d)\n",flits[0]); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ -+ /* move along this link and check new location */ -+ memcpy(&lastLoc,&currLoc,sizeof(ELAN3_NET_LOCATION)); /* keep track of last loc */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,lnk)) != ELAN3_ROUTE_SUCCESS ) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: elan3_route_follow_link failed\n"); -+ return err; -+ } -+ if ((currLoc.level > pos->pos_levels) || (currLoc.level < 0 )) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route leaves machine\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ if ( lastLoc.level < currLoc.level ) -+ { -+ turnLevel = lastLoc.level; -+ goingDown = 1; -+ } -+ else -+ { -+ if (turnLevel > currLoc.level) -+ turnLevel = currLoc.level; -+ if (goingDown) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route ocilated\n"); -+ return (ELAN3_ROUTE_OCILATES); -+ } -+ } -+ -+ /* loop on doing the remaining flits */ -+ index = 1; -+ next = FIRST_GET_NEXT(flits[0]); -+ val = PRF_GET_ROUTE(flits[index],next); -+ while(val) -+ { -+ if (PRF_GET_IS_NORMAL(val) ) -+ lnk = PRF_GET_NORMAL_LINK(val); -+ else -+ { -+ switch ( val ) -+ { -+ case PACKED_MYLINK : -+ { -+ lnk = elan3_route_get_mylink(pos, &currLoc,&lastLoc); -+ break; -+ } -+ default : -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_check: unexpected packed flit (%d)\n",val); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ } -+ -+ /* move along this link and check new location */ -+ memcpy(&lastLoc,&currLoc,sizeof(ELAN3_NET_LOCATION)); /* keep track of last loc */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,lnk)) != ELAN3_ROUTE_SUCCESS) -+ return err; -+ -+ if ((currLoc.level > pos->pos_levels ) || ( currLoc.level < 0 )) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route leaves machine\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ -+ if ( lastLoc.level < currLoc.level ) -+ goingDown = 1; -+ else -+ { -+ if (turnLevel > currLoc.level) -+ turnLevel = currLoc.level; -+ if (goingDown) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route ocilated\n"); -+ return (ELAN3_ROUTE_OCILATES); -+ } -+ } -+ -+ /* move to next part of flit */ -+ PRF_MOVE_ON(index,next); -+ if ( index >= MAX_FLITS) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route too long\n"); -+ return (ELAN3_ROUTE_TOO_LONG); -+ } -+ /* extract the new value */ -+ val = PRF_GET_ROUTE(flits[index],next); -+ } -+ -+ /* have we got to where we want ? */ -+ if ((currLoc.level != pos->pos_levels) || (currLoc.netid != destNodeId)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_route_check: goes to %d instead of %d\n",currLoc.netid , destNodeId ); -+ return (ELAN3_ROUTE_WRONG_DEST); -+ } -+ -+ /* -+ * there is the case of src == dest -+ * getTurnLevel returns pos->pos_levels, and turnLevel is (pos->pos_levels -1) -+ * then we assume they really want to go onto the network. -+ * otherwise we check that the turn at the appriate level -+ */ -+ if ( (pos->pos_nodeid != destNodeId) || ( turnLevel != (pos->pos_levels -1)) ) -+ { -+ int lev; -+ if ((lev = elan3_route_get_min_turn_level(pos,destNodeId)) == -1) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: cant calculate turn level\n"); -+ return (ELAN3_ROUTE_INVALID); /* not sure this can happen here as checks above should protect me */ -+ } -+ if (turnLevel != lev) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_route_check: turn level should be %d but is %d \n", lev, turnLevel); -+ return (ELAN3_ROUTE_TURN_LEVEL); -+ } -+ } -+ return (ELAN3_ROUTE_SUCCESS); -+} -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_broadcast_check(ELAN3_CTXT *ctxt , E3_uint16 *flits, int lowNode, int highNode ) -+{ -+ E3_uint16 flitsTmp[MAX_FLITS]; -+ int nflits,i; -+ -+ nflits = GenerateRoute (&ctxt->Position, flitsTmp, lowNode, highNode, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ for(i=0;i -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static sdramaddr_t -+AllocateLargeRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int ctxnum, E3_uint64 *smallRoute) -+{ -+ int bit = -1; -+ ELAN3_ROUTES *rent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ for (rent = tbl->LargeRoutes; rent; rent = rent->Next) -+ { -+ if ((bit = bt_freebit (rent->Bitmap, NROUTES_PER_BLOCK)) != -1) -+ break; -+ } -+ -+ if (bit == -1) /* No spare entries in large routes */ -+ { /* so allocate a new page */ -+ PRINTF0 (DBG_DEVICE, DBG_VP, "AllocateLargeRoute: allocate route entries\n"); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ -+ KMEM_ZALLOC(rent, ELAN3_ROUTES *, sizeof (ELAN3_ROUTES), TRUE); -+ -+ if (rent == (ELAN3_ROUTES *) NULL) -+ return ((sdramaddr_t) 0); -+ -+ rent->Routes = elan3_sdram_alloc (dev, PAGESIZE); -+ if (rent->Routes == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (rent, sizeof (ELAN3_ROUTES)); -+ return ((sdramaddr_t) 0); -+ } -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* Add to list of large routes */ -+ rent->Next = tbl->LargeRoutes; -+ tbl->LargeRoutes = rent; -+ -+ /* and use entry 0 */ -+ bit = 0; -+ } -+ -+ /* Set the bit in the bitmap to mark this route as allocated */ -+ BT_SET (rent->Bitmap, bit); -+ -+ /* And generate the small route pointer and the pointer to the large routes */ -+ (*smallRoute) = BIG_ROUTE_PTR(rent->Routes + (bit*NBYTES_PER_LARGE_ROUTE), ctxnum); -+ -+ PRINTF4 (DBG_DEVICE, DBG_VP, "AllocateLargeRoute: rent %p using entry %d at %lx with route pointer %llx\n", -+ rent, bit, rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE), (long long) (*smallRoute)); -+ -+ /* Invalidate the large route */ -+ elan3_sdram_zeroq_sdram (dev, rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE), NBYTES_PER_LARGE_ROUTE); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ -+ return (rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE)); -+} -+ -+static void -+FreeLargeRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, E3_uint64 smallRoute) -+{ -+ E3_Addr addr = (E3_Addr) (smallRoute & ((1ULL << ROUTE_CTXT_SHIFT)-1)); -+ ELAN3_ROUTES *rent; -+ -+ PRINTF1 (DBG_DEVICE, DBG_VP, "FreeLargeRoute: free route %llx\n", (long long) smallRoute); -+ -+ ASSERT (SPINLOCK_HELD (&tbl->Lock)); -+ -+ for (rent = tbl->LargeRoutes; rent; rent = rent->Next) -+ { -+ if (rent->Routes <= addr && (rent->Routes + ROUTE_BLOCK_SIZE) > addr) -+ { -+ int indx = (addr - rent->Routes)/NBYTES_PER_LARGE_ROUTE; -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "FreeLargeRoute: rent=%p indx=%d\n", rent, indx); -+ -+ BT_CLEAR(rent->Bitmap, indx); -+ return; -+ } -+ } -+ -+ panic ("elan: FreeLargeRoute - route not found in large route tables"); -+} -+ -+static void -+FreeLargeRoutes (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl) -+{ -+ ELAN3_ROUTES *rent; -+ -+ while ((rent = tbl->LargeRoutes) != NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_VP, "FreeLargeRoutes: free rent %p\n", rent); -+ -+ tbl->LargeRoutes = rent->Next; -+ -+ elan3_sdram_free (dev, rent->Routes, PAGESIZE); -+ -+ KMEM_FREE (rent, sizeof(ELAN3_ROUTES)); -+ } -+} -+ -+int -+GetRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, E3_uint16 *flits) -+{ -+ E3_uint64 routeValue; -+ sdramaddr_t largeRouteOff; -+ -+ if (process < 0 || process >= tbl->Size) -+ return (EINVAL); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ if (routeValue & ROUTE_PTR) -+ { -+ largeRouteOff = (routeValue & ROUTE_PTR_MASK); -+ -+ routeValue = elan3_sdram_readq (dev, largeRouteOff + 0); -+ flits[0] = routeValue & 0xffff; -+ flits[1] = (routeValue >> 16) & 0xffff; -+ flits[2] = (routeValue >> 32) & 0xffff; -+ flits[3] = (routeValue >> 48) & 0xffff; -+ -+ routeValue = elan3_sdram_readq (dev, largeRouteOff + 8); -+ flits[4] = routeValue & 0xffff; -+ flits[5] = (routeValue >> 16) & 0xffff; -+ flits[6] = (routeValue >> 32) & 0xffff; -+ flits[6] = (routeValue >> 48) & 0xffff; -+ } -+ else -+ { -+ flits[0] = routeValue & 0xffff; -+ flits[1] = (routeValue >> 16) & 0xffff; -+ flits[2] = (routeValue >> 32) & 0xffff; -+ } -+ -+ return (ESUCCESS); -+} -+ -+ELAN3_ROUTE_TABLE * -+AllocateRouteTable (ELAN3_DEV *dev, int size) -+{ -+ ELAN3_ROUTE_TABLE *tbl; -+ -+ KMEM_ZALLOC (tbl, ELAN3_ROUTE_TABLE *, sizeof (ELAN3_ROUTE_TABLE), TRUE); -+ -+ if (tbl == (ELAN3_ROUTE_TABLE *) NULL) -+ return (NULL); -+ -+ tbl->Size = size; -+ tbl->Table = elan3_sdram_alloc (dev, size*NBYTES_PER_SMALL_ROUTE); -+ -+ if (tbl->Table == 0) -+ { -+ KMEM_FREE (tbl, sizeof (ELAN3_ROUTE_TABLE)); -+ return (NULL); -+ } -+ spin_lock_init (&tbl->Lock); -+ -+ /* zero the route table */ -+ elan3_sdram_zeroq_sdram (dev, tbl->Table, size*NBYTES_PER_SMALL_ROUTE); -+ -+ return (tbl); -+} -+ -+void -+FreeRouteTable (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl) -+{ -+ elan3_sdram_free (dev, tbl->Table, tbl->Size*NBYTES_PER_SMALL_ROUTE); -+ -+ FreeLargeRoutes (dev, tbl); -+ -+ spin_lock_destroy (&tbl->Lock); -+ -+ KMEM_FREE (tbl, sizeof (ELAN3_ROUTE_TABLE)); -+} -+ -+int -+LoadRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, int ctxnum, int nflits, E3_uint16 *flits) -+{ -+ E3_uint64 routeValue; -+ E3_uint64 largeRouteValue; -+ sdramaddr_t largeRouteOff; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return (EINVAL); -+ -+ PRINTF3 (DBG_DEVICE, DBG_VP, "LoadRoute: table %lx process %d ctxnum %x\n", tbl->Table ,process, ctxnum); -+ -+ if (nflits < 4) -+ { -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* See if we're replacing a "large" route */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ if (routeValue & ROUTE_PTR) -+ FreeLargeRoute (dev, tbl, routeValue); -+ -+ routeValue = SMALL_ROUTE(flits, ctxnum); -+ -+ if ( routeValue & ROUTE_PTR) -+ PRINTF0 (DBG_DEVICE, DBG_VP, "SHOULD BE A SMALL ROUTE !!!!!!!\n"); -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "LoadRoute: loading small route %d %llx\n", process, (long long) routeValue); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, routeValue); -+ } -+ else -+ { -+ E3_uint64 value0 = BIG_ROUTE0(flits); -+ E3_uint64 value1 = BIG_ROUTE1(flits); -+ -+ if ((largeRouteOff = AllocateLargeRoute (dev, tbl, ctxnum, &largeRouteValue)) == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ if ((routeValue & ROUTE_PTR) == 0) -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, largeRouteValue); -+ else -+ { -+ FreeLargeRoute (dev, tbl, largeRouteValue); -+ -+ largeRouteOff = (routeValue & ROUTE_PTR_MASK); -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_VP, "LoadRoute: loading large route %d - %llx %llx\n", process, -+ (long long) value0, (long long) value1); -+ -+ elan3_sdram_writeq (dev, largeRouteOff + 0, value0); -+ elan3_sdram_writeq (dev, largeRouteOff + 8, value1); -+ } -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ return (ESUCCESS); -+} -+void -+InvalidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* unset ROUTE_VALID -+ * does not matter if its short or long, will check when we re-use it -+ */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, (routeValue & (~ROUTE_VALID))); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+void -+ValidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "ValidateRoute: table %ld process %d \n", tbl->Table ,process); -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* set ROUTE_VALID -+ */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, (routeValue | ROUTE_VALID)); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+void -+ClearRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "ClearRoute: table %ld process %d \n", tbl->Table ,process); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, 0); -+ -+ if (routeValue & ROUTE_PTR) -+ FreeLargeRoute (dev, tbl, routeValue); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+ -+static int -+ElanIdEqual (ELAN_POSITION *pos, int level, int ida, int idb) -+{ -+ int l; -+ -+ for (l = pos->pos_levels-1; l >= level; l--) -+ { -+ ida /= pos->pos_arity[l]; -+ idb /= pos->pos_arity[l]; -+ } -+ -+ return (ida == idb); -+} -+ -+static int -+RouteDown (ELAN_POSITION *pos, int level, int elanid) -+{ -+ int l; -+ -+ for (l = (pos->pos_levels - 1); level < pos->pos_levels - 1; level++, l--) -+ { -+ if ( pos->pos_arity[l] ) -+ elanid /= pos->pos_arity[l]; -+ } -+ elanid %= pos->pos_arity[l]; -+ -+ return elanid; -+} -+ -+static int -+InitPackedAndFlits (u_char *packed, E3_uint16 *flits) -+{ -+ int rb = 0; -+ -+ bzero ((caddr_t) packed, MAX_PACKED+4); -+ bzero ((caddr_t) flits, MAX_FLITS * sizeof (E3_uint16)); -+ -+ /* Initialise 4 bytes of packed, so that the "padding" */ -+ /* NEVER terminates with 00, as this is recognised as */ -+ /* as CRC flit */ -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ -+ return (rb); -+} -+ -+static int -+PackThemRoutesUp (E3_uint16 *flits, u_char *packed, int rb, int timeout, int highPri) -+{ -+ int i, nflits; -+ -+ flits[0] |= FIRST_TIMEOUT(timeout); -+ if (highPri) -+ flits[0] |= FIRST_HIGH_PRI; -+ -+ /* round up the number of route bytes to flits */ -+ /* and subtract the 4 extra we've padded out with */ -+ nflits = (rb-1)/4; -+ -+ for (i = nflits; i > 0; i--) -+ { -+ flits[i] = (packed[rb-1] << 12 | -+ packed[rb-2] << 8 | -+ packed[rb-3] << 4 | -+ packed[rb-4] << 0); -+ rb -= 4; -+ } -+ -+ /* Now set the position of the first packed route */ -+ /* byte in the 2nd 16 bit flit, taking account of the */ -+ /* 4 byte padding */ -+ flits[0] |= FIRST_PACKED (4-rb); -+ -+ return (nflits+1); -+} -+ -+int -+GenerateRoute (ELAN_POSITION *pos, E3_uint16 *flits, int lowid, int highid, int timeout, int highPri) -+{ -+ int broadcast = (lowid != highid); -+ int rb = 0; -+ int first = 1; -+ int noRandom = 0; -+ int level; -+ u_char packed[MAX_PACKED+4]; -+ int numDownLinks; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ for (level = pos->pos_levels-1; /* Move up out of the elan */ -+ level > 0 && ! (ElanIdEqual (pos, level, pos->pos_nodeid, lowid) && -+ ElanIdEqual (pos, level, pos->pos_nodeid, highid)); level--) -+ { -+ noRandom |= pos->pos_random_disabled & (1 << (pos->pos_levels-1-level)); -+ } -+ -+ for (level = pos->pos_levels-1; /* Move up out of the elan */ -+ level > 0 && ! (ElanIdEqual (pos, level, pos->pos_nodeid, lowid) && -+ ElanIdEqual (pos, level, pos->pos_nodeid, highid)); level--) -+ { -+ numDownLinks = pos->pos_arity [level]; -+ if (first) -+ { -+ if (broadcast || noRandom) -+ flits[0] = FIRST_BCAST_TREE; -+ else -+ { -+ if (numDownLinks == 4) -+ flits[0] = FIRST_ADAPTIVE; -+ else -+ flits[0] = FIRST_ROUTE( numDownLinks + ( lowid % (8-numDownLinks) )); -+ } -+ first = 0; -+ } -+ else -+ { -+ if (broadcast || noRandom) -+ packed[rb++] = PACKED_BCAST_TREE; -+ else -+ { -+ if (numDownLinks == 4) -+ packed[rb++] = PACKED_ADAPTIVE; -+ else -+ packed[rb++] = PACKED_ROUTE( numDownLinks + ( lowid % (8-numDownLinks) )); -+ } -+ } -+ } -+ -+ while (level < pos->pos_levels) -+ { -+ int lowRoute = RouteDown (pos, level, lowid); -+ int highRoute = RouteDown (pos, level, highid); -+ -+ if (first) -+ { -+ if (broadcast) -+ flits[0] = FIRST_BCAST(highRoute, lowRoute); -+ else -+ flits[0] = FIRST_ROUTE(lowRoute); -+ -+ first = 0; -+ } -+ else -+ { -+ if (broadcast) -+ { -+ packed[rb++] = PACKED_BCAST0(highRoute, lowRoute); -+ packed[rb++] = PACKED_BCAST1(highRoute, lowRoute); -+ } -+ else -+ packed[rb++] = PACKED_ROUTE(lowRoute); -+ } -+ -+ level++; -+ } -+ -+#ifdef ELITE_REVA_SUPPORTED -+ if (broadcast && (pos->pos_levels == 3)) -+ { -+ packed[rb++] = PACKED_BCAST0(0, 0); -+ packed[rb++] = PACKED_BCAST1(0, 0); -+ } -+#endif -+ -+ return (PackThemRoutesUp (flits, packed, rb, timeout, highPri)); -+} -+ -+int -+GenerateCheckRoute (ELAN_POSITION *pos, E3_uint16 *flits, int level, int adaptive) -+{ -+ int notfirst = 0; -+ int l, rb; -+ u_char packed[MAX_PACKED+4]; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ for (l = pos->pos_levels-1; l > level; l--) -+ if (! notfirst++) -+ flits[0] = adaptive ? FIRST_ADAPTIVE : FIRST_BCAST_TREE; -+ else -+ packed[rb++] = adaptive ? PACKED_ADAPTIVE : PACKED_BCAST_TREE; -+ -+ if (! notfirst++ ) -+ flits[0] = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ for (l++ /* consume mylink */; l < pos->pos_levels; l++) -+ if (! notfirst++) -+ flits[0] = FIRST_ROUTE (RouteDown (pos, l, pos->pos_nodeid)); -+ else -+ packed[rb++] = PACKED_ROUTE (RouteDown (pos, l, pos->pos_nodeid)); -+ -+ -+ return (PackThemRoutesUp (flits, packed, rb, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY)); -+} -+ -+ -+/* -+ * In this case "level" is the number of levels counted from the bottom. -+ */ -+int -+GenerateProbeRoute (E3_uint16 *flits, int nodeid, int level, int *linkup, int *linkdown, int adaptive ) -+{ -+ int first = 1; -+ int i, rb; -+ u_char packed[MAX_PACKED+4]; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < level; i++) -+ { -+ if (first) -+ flits[0] = linkup ? FIRST_ROUTE(linkup[i]) : adaptive ? FIRST_ADAPTIVE : FIRST_BCAST_TREE; -+ else -+ packed[rb++] = linkup ? PACKED_ROUTE(linkup[i]) : adaptive ? PACKED_ADAPTIVE : PACKED_BCAST_TREE; -+ first = 0; -+ } -+ -+ /* Generate a "to-me" route down */ -+ if (first) -+ flits[0] = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ for (i = level-1; i >= 0; i--) -+ packed[rb++] = PACKED_ROUTE(linkdown[i]); -+ -+ return (PackThemRoutesUp (flits, packed, rb, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/sdram.c linux-2.6.9/drivers/net/qsnet/elan3/sdram.c ---- clean/drivers/net/qsnet/elan3/sdram.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/sdram.c 2003-09-24 09:57:25.000000000 -0400 -@@ -0,0 +1,807 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: sdram.c,v 1.17 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/sdram.c,v $*/ -+ -+ -+#include -+ -+#include -+#include -+#include -+ -+/* sdram access functions */ -+#define sdram_off_to_bank(dev,off) (&dev->SdramBanks[(off) >> ELAN3_SDRAM_BANK_SHIFT]) -+#define sdram_off_to_offset(dev,off) ((off) & (ELAN3_SDRAM_BANK_SIZE-1)) -+#define sdram_off_to_bit(dev,indx,off) (sdram_off_to_offset(dev,off) >> (SDRAM_MIN_BLOCK_SHIFT+(indx))) -+ -+#define sdram_off_to_mapping(dev,off) (sdram_off_to_bank(dev,off)->Mapping + sdram_off_to_offset(dev,off)) -+ -+unsigned char -+elan3_sdram_readb (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readb ((unsigned char *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned short -+elan3_sdram_readw (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readw ((unsigned short *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned int -+elan3_sdram_readl (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readl ((unsigned int *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned long long -+elan3_sdram_readq (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readq ((unsigned long long *) sdram_off_to_mapping(dev, off))); -+} -+ -+void -+elan3_sdram_writeb (ELAN3_DEV *dev, sdramaddr_t off, unsigned char val) -+{ -+ writeb (val, (unsigned char *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writew (ELAN3_DEV *dev, sdramaddr_t off, unsigned short val) -+{ -+ writew (val, (unsigned short *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writel (ELAN3_DEV *dev, sdramaddr_t off, unsigned int val) -+{ -+ writel (val, (unsigned int *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writeq (ELAN3_DEV *dev, sdramaddr_t off, unsigned long long val) -+{ -+ writeq (val, (unsigned long long *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_copyb_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+} -+ -+void -+elan3_sdram_copyw_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+void -+elan3_sdram_copyl_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+void -+elan3_sdram_copyq_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+#define E3_WRITEBUFFER_SIZE 16 -+#define E3_WRITEBUFFER_OFFSET(x) (((unsigned long) x) & (E3_WRITEBUFFER_SIZE-1)) -+#define E3_WRITEBUFFER_BASE(x) (((unsigned long) x) & ~((unsigned long) (E3_WRITEBUFFER_SIZE-1))) -+ -+void -+elan3_sdram_copyb_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint8_t)) + sizeof (uint8_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) from)[i], &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) slim)[i], &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ for (i = 0; i < E3_WRITEBUFFER_SIZE/sizeof (uint8_t); i++) -+ writeb (((uint8_t *) slim)[i], &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) from)[i], &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerob_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint8_t)) + sizeof (uint8_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyw_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint16_t)) + sizeof (uint16_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint16_t); i++) -+ writew (((uint16_t *) from)[i], &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint16_t); i++) -+ writew (((uint16_t *) slim)[i], &((uint16_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writew (((uint16_t *) slim)[0], &((uint16_t *) dlim)[0]); -+ writew (((uint16_t *) slim)[1], &((uint16_t *) dlim)[1]); -+ writew (((uint16_t *) slim)[2], &((uint16_t *) dlim)[2]); -+ writew (((uint16_t *) slim)[3], &((uint16_t *) dlim)[3]); -+ writew (((uint16_t *) slim)[4], &((uint16_t *) dlim)[4]); -+ writew (((uint16_t *) slim)[5], &((uint16_t *) dlim)[5]); -+ writew (((uint16_t *) slim)[6], &((uint16_t *) dlim)[6]); -+ writew (((uint16_t *) slim)[7], &((uint16_t *) dlim)[7]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint16_t); i++) -+ writew (((uint16_t *) from)[i], &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerow_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint16_t)) + sizeof (uint16_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyl_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint32_t)) + sizeof (uint32_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint32_t); i++) -+ writel (((uint32_t *) from)[i], &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint32_t); i++) -+ writel (((uint32_t *) slim)[i], &((uint32_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writel (((uint32_t *) slim)[0], &((uint32_t *) dlim)[0]); -+ writel (((uint32_t *) slim)[1], &((uint32_t *) dlim)[1]); -+ writel (((uint32_t *) slim)[2], &((uint32_t *) dlim)[2]); -+ writel (((uint32_t *) slim)[3], &((uint32_t *) dlim)[3]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint32_t); i++) -+ writel (((uint32_t *) from)[i], &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerol_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint32_t)) + sizeof (uint32_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyq_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint64_t)) + sizeof (uint64_t); -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ writeq (((uint64_t *) from)[0], &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ writeq (((uint64_t *) slim)[0], &((uint64_t *) dlim)[0]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (((uint64_t *) slim)[0], &((uint64_t *) dlim)[0]); -+ writeq (((uint64_t *) slim)[1], &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ writeq (((uint64_t *) from)[0], &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zeroq_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint64_t)) + sizeof (uint64_t); -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ writeq (0, &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ writeq (0, &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ } -+} -+ -+physaddr_t -+elan3_sdram_to_phys (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+#if defined(DIGITAL_UNIX) -+ return (KSEG_TO_PHYS (sdram_off_to_mapping (dev, off))); -+#elif defined(LINUX) -+ return (kmem_to_phys ((void *) sdram_off_to_mapping (dev, off))); -+#endif -+} -+ -+/* sdram buddy allocator */ -+#define read_next(dev, block) elan3_sdram_readl(dev, block + 0) -+#define read_prev(dev, block) elan3_sdram_readl(dev, block + 4) -+#define write_next(dev, block, val) (elan3_sdram_writel(dev, block + 0, val), val) -+#define write_prev(dev, block, val) (elan3_sdram_writel(dev, block + 4, val), val) -+ -+#define freelist_insert(dev,idx,block)\ -+do {\ -+ sdramaddr_t next = dev->SdramFreeLists[(idx)];\ -+\ -+ /*\ -+ * block->prev = NULL;\ -+ * block->next = next;\ -+ * if (next != NULL)\ -+ * next->prev = block;\ -+ * freelist = block;\ -+ */\ -+ write_prev (dev, block, (sdramaddr_t) 0);\ -+ write_next (dev, block, next);\ -+ if (next != (sdramaddr_t) 0)\ -+ write_prev (dev, next, block);\ -+ dev->SdramFreeLists[idx] = block;\ -+\ -+ dev->SdramFreeCounts[idx]++;\ -+ dev->Stats.SdramBytesFree += (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#define freelist_remove(dev,idx,block)\ -+do {\ -+ /*\ -+ * if (block->prev)\ -+ * block->prev->next = block->next;\ -+ * else\ -+ * dev->SdramFreeLists[idx] = block->next;\ -+ * if (block->next)\ -+ * block->next->prev = block->prev;\ -+ */\ -+ sdramaddr_t blocknext = read_next (dev, block);\ -+ sdramaddr_t blockprev = read_prev (dev, block);\ -+\ -+ if (blockprev)\ -+ write_next (dev, blockprev, blocknext);\ -+ else\ -+ dev->SdramFreeLists[idx] = blocknext;\ -+ if (blocknext)\ -+ write_prev (dev, blocknext, blockprev);\ -+\ -+ dev->SdramFreeCounts[idx]--;\ -+ dev->Stats.SdramBytesFree -= (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#define freelist_removehead(dev,idx,block)\ -+do {\ -+ sdramaddr_t blocknext = read_next (dev, block);\ -+\ -+ if ((dev->SdramFreeLists[idx] = blocknext) != 0)\ -+ write_prev (dev, blocknext, 0);\ -+\ -+ dev->SdramFreeCounts[idx]--;\ -+ dev->Stats.SdramBytesFree -= (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#if defined(DEBUG) -+static int -+display_blocks (ELAN3_DEV *dev, int indx, char *string) -+{ -+ sdramaddr_t block; -+ int nbytes = 0; -+ -+ printk ("%s - indx %d\n", string, indx); -+ for (block = dev->SdramFreeLists[indx]; block != (sdramaddr_t) 0; block = read_next (dev, block)) -+ { -+ printk (" %lx", block); -+ nbytes += (SDRAM_MIN_BLOCK_SIZE << indx); -+ } -+ printk ("\n"); -+ -+ return (nbytes); -+} -+ -+ -+void -+elan3_sdram_display (ELAN3_DEV *dev, char *string) -+{ -+ int indx; -+ int nbytes = 0; -+ -+ printk ("elan3_sdram_display: dev=%p\n", dev); -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ if (dev->SdramFreeLists[indx] != (sdramaddr_t) 0) -+ nbytes += display_blocks (dev, indx, string); -+ printk ("\n%d bytes free\n", nbytes); -+} -+ -+void -+elan3_sdram_verify (ELAN3_DEV *dev) -+{ -+ int indx, size, nbits, i, b; -+ sdramaddr_t block; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ unsigned count = 0; -+ -+ for (block = dev->SdramFreeLists[indx]; block; block = read_next (dev, block), count++) -+ { -+ ELAN3_SDRAM_BANK *bank = sdram_off_to_bank (dev, block); -+ unsigned off = sdram_off_to_offset (dev, block); -+ int bit = sdram_off_to_bit (dev, indx, block); -+ -+ if ((block & (size-1)) != 0) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - not aligned\n", block, indx); -+ -+ if (bank == NULL || off > bank->Size) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - outside bank\n", block, indx); -+ else if (BT_TEST (bank->Bitmaps[indx], bit) == 0) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - bit not set\n", block, indx); -+ else -+ { -+ for (i = indx-1, nbits = 2; i >= 0; i--, nbits <<= 1) -+ { -+ bit = sdram_off_to_bit (dev, i, block); -+ -+ for (b = 0; b < nbits; b++) -+ if (BT_TEST(bank->Bitmaps[i], bit + b)) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - also free i=%d bit=%x\n", block, indx, i, bit+b); -+ } -+ } -+ } -+ -+ if (dev->SdramFreeCounts[indx] != count) -+ printk ("elan3_sdram_verify: indx=%x expected %d got %d\n", indx, dev->SdramFreeCounts[indx], count); -+ } -+} -+ -+#endif /* defined(DEBUG) */ -+ -+static void -+free_block (ELAN3_DEV *dev, sdramaddr_t block, int indx) -+{ -+ ELAN3_SDRAM_BANK *bank = sdram_off_to_bank (dev, block); -+ unsigned bit = sdram_off_to_bit(dev, indx, block); -+ unsigned size = SDRAM_MIN_BLOCK_SIZE << indx; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: block=%lx indx=%d bit=%x\n", block, indx, bit); -+ -+ ASSERT ((block & (size-1)) == 0); -+ ASSERT (BT_TEST (bank->Bitmaps[indx], bit) == 0); -+ -+ while (BT_TEST (bank->Bitmaps[indx], bit ^ 1)) -+ { -+ sdramaddr_t buddy = block ^ size; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: merge block=%lx buddy=%lx indx=%d\n", block, buddy, indx); -+ -+ BT_CLEAR (bank->Bitmaps[indx], bit ^ 1); -+ -+ freelist_remove (dev, indx, buddy); -+ -+ block = (block < buddy) ? block : buddy; -+ indx++; -+ size <<= 1; -+ bit >>= 1; -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: free block=%lx indx=%d bit=%x\n", block, indx, bit); -+ -+ freelist_insert (dev, indx, block); -+ -+ BT_SET (bank->Bitmaps[indx], bit); -+} -+ -+void -+elan3_sdram_init (ELAN3_DEV *dev) -+{ -+ int indx; -+ -+ spin_lock_init (&dev->SdramLock); -+ -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ { -+ dev->SdramFreeLists[indx] = (sdramaddr_t) 0; -+ dev->SdramFreeCounts[indx] = 0; -+ } -+} -+ -+void -+elan3_sdram_fini (ELAN3_DEV *dev) -+{ -+ spin_lock_destroy (&dev->SdramLock); -+} -+ -+void -+elan3_sdram_add (ELAN3_DEV *dev, sdramaddr_t base, sdramaddr_t top) -+{ -+ register int indx; -+ register unsigned long size; -+ -+ /* align to the minimum block size */ -+ base = (base + SDRAM_MIN_BLOCK_SIZE - 1) & ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ top &= ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ -+ /* don't allow 0 as a valid "base" */ -+ if (base == 0) -+ base = E3_CACHE_SIZE; -+ -+ /* carve the bottom to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((base & size) == 0) -+ continue; -+ -+ if ((base + size) > top) -+ break; -+ -+ free_block (dev, base, indx); -+ -+ base += size; -+ } -+ -+ /* carve the top down to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((top & size) == 0) -+ continue; -+ -+ if ((top - size) < base) -+ break; -+ -+ free_block (dev, (top - size), indx); -+ -+ top -= size; -+ } -+ -+ /* now free of the space in between */ -+ while (base < top) -+ { -+ free_block (dev, base, (SDRAM_NUM_FREE_LISTS-1)); -+ -+ base += SDRAM_MAX_BLOCK_SIZE; -+ } -+} -+ -+sdramaddr_t -+elan3_sdram_alloc (ELAN3_DEV *dev, int nbytes) -+{ -+ sdramaddr_t block; -+ register int i, indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->SdramLock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: nbytes=%d indx=%d\n", nbytes, indx); -+ -+ /* find the smallest block which is big enough for this allocation */ -+ for (i = indx; i < SDRAM_NUM_FREE_LISTS; i++, size <<= 1) -+ if (dev->SdramFreeLists[i]) -+ break; -+ -+ if (i == SDRAM_NUM_FREE_LISTS) -+ { -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+ return ((sdramaddr_t) 0); -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: use block=%lx indx=%d\n", dev->SdramFreeLists[i], i); -+ -+ /* remove the block from the free list */ -+ freelist_removehead (dev, i, (block = dev->SdramFreeLists[i])); -+ -+ /* clear the approriate bit in the bitmap */ -+ BT_CLEAR (sdram_off_to_bank (dev, block)->Bitmaps[i], sdram_off_to_bit (dev,i, block)); -+ -+ /* and split it up as required */ -+ while (i-- > indx) -+ free_block (dev, block + (size >>= 1), i); -+ -+ PRINTF1 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: return block=%lx\n", block); -+ -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+ -+ ASSERT ((block & ((SDRAM_MIN_BLOCK_SIZE << (indx))-1)) == 0); -+ -+ return ((sdramaddr_t) block); -+} -+ -+void -+elan3_sdram_free (ELAN3_DEV *dev, sdramaddr_t block, int nbytes) -+{ -+ register int indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->SdramLock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_free: indx=%d block=%lx\n", indx, block); -+ -+ free_block (dev, block, indx); -+ -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+} -+ -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/tproc.c linux-2.6.9/drivers/net/qsnet/elan3/tproc.c ---- clean/drivers/net/qsnet/elan3/tproc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/tproc.c 2004-11-15 06:14:12.000000000 -0500 -@@ -0,0 +1,778 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: tproc.c,v 1.52 2004/11/15 11:14:12 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int -+HandleTProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits) -+{ -+ THREAD_TRAP *trap = dev->ThreadTrap; -+ int delay = 1; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ trap->Status.Status = read_reg32 (dev, Exts.TProcStatus); -+ trap->sp = read_reg32 (dev, Thread_Desc_SP); -+ trap->pc = read_reg32 (dev, ExecutePC); -+ trap->npc = read_reg32 (dev, ExecuteNPC); -+ trap->StartPC = read_reg32 (dev, StartPC); -+ trap->mi = GET_STATUS_TRAPTYPE(trap->Status); -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ trap->DirtyBits.Bits = read_reg32 (dev, DirtyBits.Bits); -+ -+ if ( ! (trap->Status.s.WakeupFunction == SleepOneTick) ) { -+ int p,i; -+ E3_uint32 reg = read_reg32 (dev, Exts.InterruptReg); -+ -+ ELAN_REG_REC(reg); -+ p = elan_reg_rec_index; -+ for(i=0;iStatus.s.WakeupFunction == SleepOneTick); -+ -+ /* copy the four access fault areas */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), (void *) &trap->FaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), (void *) &trap->DataFaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), (void *) &trap->InstFaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), (void *) &trap->OpenFaultSave, 16); -+ -+ /* copy the registers, note the endian swap flips the odd registers into the even registers -+ and visa versa. */ -+ copy_thread_regs (dev, trap->Registers); -+ -+ /* -+ * If the output was open then the ack may not have returned yet. Must wait for the -+ * ack to become valid and update trap_dirty with the new value. Will simulate the -+ * instructions later. -+ */ -+ if (trap->TrapBits.s.OutputWasOpen) -+ { -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ while (! trap->TrapBits.s.AckBufferValid) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "tproc: waiting for ack to become valid\n"); -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ DELAY (delay); -+ -+ if ((delay <<= 1) == 0) delay = 1; -+ } -+ } -+ -+ /* update device statistics */ -+ BumpStat (dev, TProcTraps); -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ BumpStat (dev, ForcedTProcTraps); -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ if (trap->TrapBits.s.PacketTimeout) -+ BumpStat (dev, ThreadOutputTimeouts); -+ else if (trap->TrapBits.s.PacketAckValue == E3_PAckError) -+ BumpStat (dev, ThreadPacketAckErrors); -+ } -+ if (trap->TrapBits.s.TrapForTooManyInsts) -+ BumpStat (dev, TrapForTooManyInsts); -+ break; -+ } -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), 16); -+ -+ *RestartBits |= RestartTProc; -+ -+ return (TRUE); -+} -+ -+void -+DeliverTProcTrap (ELAN3_DEV *dev, THREAD_TRAP *threadTrap, E3_uint32 Pend) -+{ -+ ELAN3_CTXT *ctxt; -+ THREAD_TRAP *trap; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, threadTrap->Status.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "DeliverTProcTrap: context %x invalid\n", threadTrap->Status.s.Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_OP_TPROC_TRAP (ctxt, threadTrap) == OP_DEFER) -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->ThreadTrapQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->ThreadTrapQ, ctxt->ThreadTraps); -+ -+ bcopy (threadTrap, trap, sizeof (THREAD_TRAP)); -+ -+ PRINTF4 (ctxt, DBG_INTR, "DeliverTProcTrap: SP=%08x PC=%08x NPC=%08x StartPC %08x\n", -+ trap->sp, trap->pc, trap->npc, trap->StartPC); -+ PRINTF3 (ctxt, DBG_INTR, " mi=%s trap=%08x dirty=%08x\n", -+ MiToName (trap->mi), trap->TrapBits.Bits, trap->DirtyBits.Bits); -+ PRINTF3 (ctxt, DBG_INTR, " FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_INTR, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ ELAN3_QUEUE_ADD (ctxt->ThreadTrapQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ if (ELAN3_QUEUE_FULL (ctxt->ThreadTrapQ)) -+ { -+ PRINTF0 (ctxt, DBG_INTR, "DeliverTProcTrap: thread queue full, must swap out\n"); -+ ctxt->Status |= CTXT_THREAD_QUEUE_FULL; -+ -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+ } -+} -+ -+int -+NextTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->ThreadTrapQ)) -+ return (0); -+ -+ *trap = *ELAN3_QUEUE_FRONT (ctxt->ThreadTrapQ, ctxt->ThreadTraps); -+ ELAN3_QUEUE_REMOVE (ctxt->ThreadTrapQ); -+ -+ return (1); -+} -+ -+void -+ResolveTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ int i; -+ int res; -+ E3_Addr StackPointer; -+ -+ PRINTF4 (ctxt, DBG_TPROC, "ResolveTProcTrap: SP=%08x PC=%08x NPC=%08x StartPC %08x\n", -+ trap->sp, trap->pc, trap->npc, trap->StartPC); -+ PRINTF3 (ctxt, DBG_TPROC, " mi=%s trap=%08x dirty=%08x\n", -+ MiToName (trap->mi), trap->TrapBits.Bits, trap->DirtyBits.Bits); -+ PRINTF3 (ctxt, DBG_TPROC, " FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_TPROC, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ -+ BumpUserStat (ctxt, TProcTraps); -+ -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ { -+ /* -+ * This occurs if the threads processor trapped. All other cases will be for the ucode -+ * thread trapping. -+ */ -+ int restart = 1; -+ int skip = 0; -+ -+ PRINTF1 (ctxt, DBG_TPROC, "TProc: Mi=Unimp. Using trap->TrapBits=%x\n", trap->TrapBits.Bits); -+ -+ /* -+ * Data Access Exception. -+ */ -+ if (trap->TrapBits.s.DataAccessException) -+ { -+ ASSERT (CTXT_IS_KERNEL(ctxt) || trap->DataFaultSave.s.FSR.Status == 0 || -+ ctxt->Capability.cap_mycontext == trap->DataFaultSave.s.FaultContext); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: DataAccessException %08x\n", trap->DataFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->DataFaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed for data %08x\n", -+ trap->DataFaultSave.s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->DataFaultSave, res) != OP_IGNORE) -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Instruction Access Exception. -+ */ -+ if (trap->TrapBits.s.InstAccessException) -+ { -+ ASSERT (CTXT_IS_KERNEL (ctxt) || trap->InstFaultSave.s.FSR.Status == 0 || -+ ctxt->Capability.cap_mycontext == trap->InstFaultSave.s.FaultContext); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: InstAccessException %08x\n", trap->InstFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->InstFaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed for inst %08x\n", -+ trap->InstFaultSave.s.FaultAddress); -+ -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->InstFaultSave, res); -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Forced TProc trap/Unimplemented instruction -+ * -+ * If there is a force tproc trap then don't look at -+ * the unimplemented instruction bit - since it can -+ * be set in obscure circumstances. -+ */ -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: forced tproc trap, restarting\n"); -+ else if (trap->TrapBits.s.Unimplemented) -+ { -+ E3_uint32 instr = ELAN3_OP_LOAD32 (ctxt, trap->pc & PC_MASK); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: unimplemented instruction %08x\n", instr); -+ -+ if ((instr & OPCODE_MASK) == OPCODE_Ticc && -+ (instr & OPCODE_IMM) == OPCODE_IMM && -+ (Ticc_COND(instr) == Ticc_TA)) -+ { -+ switch (INSTR_IMM(instr)) -+ { -+ case ELAN3_ELANCALL_TRAPNUM: -+ /* -+ * Since the thread cannot easily access the global variable which holds -+ * the elan system call number, we provide a different trap for the elan -+ * system call, and copy the system call number into %g1 before calling -+ * ThreadSyscall(). -+ */ -+ BumpUserStat (ctxt, ThreadElanCalls); -+ -+ if (ThreadElancall (ctxt, trap, &skip) != ESUCCESS) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_SYSCALL, THREAD_PROC, trap); -+ restart = 0; -+ } -+ break; -+ -+ case ELAN3_SYSCALL_TRAPNUM: -+ BumpUserStat (ctxt, ThreadSystemCalls); -+ -+ if (ThreadSyscall (ctxt, trap, &skip) != ESUCCESS) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_SYSCALL, THREAD_PROC, trap); -+ restart = 0; -+ } -+ break; -+ -+ case ELAN3_DEBUG_TRAPNUM: -+ ElanException (ctxt, EXCEPTION_DEBUG, THREAD_PROC, trap); -+ skip = 1; -+ break; -+ -+ case ELAN3_ABORT_TRAPNUM: -+ default: -+ ElanException (ctxt, EXCEPTION_UNIMP_INSTR, THREAD_PROC, trap, instr); -+ restart = 0; -+ break; -+ } -+ -+ } -+ else -+ { -+ ElanException (ctxt, EXCEPTION_UNIMP_INSTR, THREAD_PROC, trap, instr); -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Faulted fetching routes. -+ */ -+ if (trap->TrapBits.s.OpenRouteFetch) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: OpenRouteFetch %08x\n", trap->OpenFaultSave.s.FaultAddress); -+ -+ if ((res = ResolveVirtualProcess (ctxt, trap->OpenFaultSave.s.FaultAddress)) != ESUCCESS && -+ ElanException (ctxt, EXCEPTION_INVALID_PROCESS, THREAD_PROC, trap, trap->DataFaultSave.s.FaultAddress, res) != OP_IGNORE) -+ { -+ restart = 0; -+ } -+ else if (RollThreadToClose (ctxt, trap, E3_PAckDiscard) != ESUCCESS) /* Force a discard */ -+ { -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Thread Timeout -+ */ -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ if (ElanException (ctxt, EXCEPTION_PACKET_TIMEOUT, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ else -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: timeout or PAckError!\n"); -+ -+ /* Might deschedule the thread for a while or mark the link error here. */ -+ if (! trap->TrapBits.s.OutputWasOpen && RollThreadToClose (ctxt, trap, trap->TrapBits.s.PacketAckValue) != ESUCCESS) -+ { -+ restart = 0; -+ } -+ } -+ } -+ -+ /* -+ * Open exception -+ */ -+ if (trap->TrapBits.s.OpenException) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: open exception\n"); -+ if (ElanException (ctxt, EXCEPTION_THREAD_KILLED, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ } -+ -+ /* -+ * Too many instructions. -+ */ -+ if (trap->TrapBits.s.TrapForTooManyInsts) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: too many instructions\n"); -+ if (ElanException (ctxt, EXCEPTION_THREAD_KILLED, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ } -+ -+ if (restart) -+ { -+ /* -+ * If the output was open when the trap was taken then the trap code must move -+ * the PC on past the close instruction and simulate the effect of all the instructions -+ * that do not output onto the link. The value of the ack received is then used to -+ * simulate the close instruction. -+ */ -+ if (trap->TrapBits.s.OutputWasOpen && RollThreadToClose(ctxt, trap, trap->TrapBits.s.PacketAckValue) != ESUCCESS) -+ { -+ /* -+ * Don't restart if we couldn't roll it forweards -+ * to a close instruction. -+ */ -+ break; -+ } -+ -+ /* -+ * We must check back 3 instructions from the PC, and if we see the -+ * c_close_cookie() sequence then we must execute the instructions to -+ * the end of it. -+ */ -+ /* XXXX: code to be written */ -+ -+ StackPointer = SaveThreadToStack (ctxt, trap, skip); -+ -+ ReissueStackPointer (ctxt, StackPointer); -+ } -+ -+ break; -+ } -+ -+ /* -+ * This case is different from the others as %o6 has been overwritten with -+ * the SP. The real PC can be read from StartPC and written back -+ * into %o6 on the stack. -+ */ -+ case MI_TProcNext: /* Reading the outs block */ -+ { -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing StartPc to o6\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[6]), trap->StartPC & PC_MASK); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ /* DROPTHROUGH */ -+ } -+ /* -+ * all of these will be generated when starting up a thread. -+ * Just re-issue the command after fixing the trap. The ucode keeps the startup -+ * from trap information in Thread_Desc_SP while it is still loading the regs. -+ */ -+ case MI_WaitForGlobalsRead: /* Reading the globals block (trap restart) */ -+ case MI_WaitForNPCRead: /* Reading the nPC, V and C (trap restart) */ -+ case MI_WaitForPCload: /* Reading the PC, N and Z (trap restart) */ -+ case MI_WaitForInsRead: /* Reading the ins block (trap restart) */ -+ case MI_WaitForLocals: /* Reading the ins block (trap restart) */ -+ case MI_WaitForPCload2: /* Reading the PC (normal thread start) */ -+ case MI_WaitForSpStore: /* Writing the SP to the outs block */ -+ PRINTF2 (ctxt, DBG_TPROC, "ResolveTProcTrap: %s %08x\n", MiToName (trap->mi), trap->InstFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed at %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, &trap->FaultSave, trap, res) != OP_IGNORE) -+ break; -+ } -+ -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ -+ /* -+ * These traps could occur after the threads proc has stopped (either for a wait, -+ * break, or suspend, but not a trap). Must simulate the uCode's job. -+ */ -+ case MI_WaitForOutsWrite: /* Writing the outs block */ -+ case MI_WaitForNPCWrite: /* Writing the nPC block */ -+ { -+ E3_uint32 DeschedBits = (trap->TrapBits.Bits & E3_TProcDescheduleMask); -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: trapped on %s while stopping a thread\n", MiToName(trap->mi)); -+ -+ /* -+ * Copy npc into o6. -+ */ -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = trap->npc; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing outs to stack\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ -+ /* -+ * Now write the outs back to the stack. NOTE then endian flip is undone. -+ */ -+ for (i = 0; i < 8; i++) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[i]), trap->Registers[REG_OUTS+(i^WordEndianFlip)]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ /* -+ * thread has been saved. Now find out why the thread proc stopped. -+ */ -+ if (DeschedBits == E3_TProcDescheduleSuspend) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: suspend instruction executed\n"); -+ break; -+ } -+ -+ /* -+ * Break. Just reissue the command. -+ */ -+ if (DeschedBits == E3_TProcDescheduleBreak) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: break instruction, reissue sp %08x\n", trap->sp); -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ } -+ -+ ASSERT (DeschedBits == E3_TProcDescheduleWait); -+ -+ /* DROPTHROUGH to fix up a wait event */ -+ } -+ -+ /* -+ * Trapped here trying to execute a wait instruction. All the thread state has already -+ * been saved and the trap has been fixed so simplest thing to do is to start the -+ * thread up at the wait instruction again. -+ */ -+ case MI_WaitForEventWaitAddr: /* Reading back the %o0,%o1 pair for a -+ wait event instr. */ -+ case MI_WaitForWaitEventAccess: /* Locked dword read of the event location. -+ Note that this read is done with write -+ permissions so we never get a trap on the write */ -+ { -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed at %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->DataFaultSave, res) != OP_IGNORE) -+ break; -+ } -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing pc to stack\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[6]), trap->pc); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ } -+ -+ /* -+ * Assume the fault will be fixed by FixupEventTrap. -+ */ -+ default: -+ FixupEventTrap (ctxt, THREAD_PROC, trap, trap->mi, &trap->FaultSave, 0); -+ break; -+ } -+} -+ -+int -+TProcNeedsRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_THREAD] != 0); -+} -+ -+void -+RestartTProcItems (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_uint32 StackPointer; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ -+ while (ctxt->ItemCount[LIST_THREAD]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_THREAD, &item, &StackPointer)) -+ ctxt->ItemCount[LIST_THREAD] = 0; -+ else -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, RunThread), StackPointer, 0) == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_THREAD, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_THREAD]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+E3_Addr -+SaveThreadToStack (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int SkipInstruction) -+{ -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ E3_Addr orflag; -+ register int i; -+ -+ /* -+ * When the thread deschedules normally, the N & Z flags are written -+ * to the stack in o6, and the V & C flags are lost. -+ * Since the Elan will store the NPC into o6 (to skip the instruction), -+ * the CC flags are visible to the trap handler in the trapped PC and NPC. -+ * If the instruction needs to be re-executed then the CC flags need to be -+ * kept in the right place to be read in when the thread re-starts. -+ * -+ * PC has N & Z from trapped NPC. -+ * NPC has V & C from trapped PC. -+ */ -+ if (SkipInstruction) -+ { -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = trap->npc; -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)] = ((trap->npc & PC_MASK) + 4) | (trap->pc & CC_MASK); -+ } -+ else -+ { -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = (trap->pc & PC_MASK) | (trap->npc & CC_MASK); -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)] = (trap->npc & PC_MASK) | (trap->pc & CC_MASK); -+ } -+ -+ if (ELAN3_OP_START_FAULT_CHECK(ctxt)) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "RestartThread: faulted writing out thread\n"); -+ ELAN3_OP_END_FAULT_CHECK(ctxt); -+ -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ return ((E3_Addr) 0); -+ } -+ -+ -+#ifdef DEBUG_PRINTF -+ PRINTF4 (ctxt, DBG_TPROC, "SaveThreadToStack: SP=%08x PC=%08x NPC=%08x DIRTY=%08x\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits); -+ if (trap->DirtyBits.s.GlobalsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.OutsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.LocalsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.InsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ } -+#endif -+ -+ PRINTF1 (ctxt, DBG_TPROC, "flushing registers to stack %08x\n", stack); -+ -+ /* -+ * NOTE - store the register to the stack in reverse order, since the stack -+ * will be allocated in sdram, and we cannot use the sdram accessing functions -+ * here, as it is "mapped" in user-space. -+ */ -+ for (i = 0; i < 8; i++) -+ { -+ if (trap->DirtyBits.s.GlobalsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Globals[i]), trap->Registers[REG_GLOBALS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.OutsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[i]), trap->Registers[REG_OUTS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.LocalsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Locals[i]), trap->Registers[REG_LOCALS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.InsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Ins[i]), trap->Registers[REG_INS+(i^WordEndianFlip)]); -+ } -+ -+ /* always restore all registers */ -+ orflag = ThreadRestartFromTrapBit | ThreadReloadAllRegs; -+ -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ return (trap->sp | orflag); -+} -+ -+void -+ReissueStackPointer (ELAN3_CTXT *ctxt, E3_Addr StackPointer) -+{ -+ PRINTF1 (ctxt, DBG_TPROC, "ReissueStackPointer : Queue SP %08x\n", StackPointer); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ctxt->ItemCount[LIST_THREAD]++; -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_THREAD, StackPointer); -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/tprocinsts.c linux-2.6.9/drivers/net/qsnet/elan3/tprocinsts.c ---- clean/drivers/net/qsnet/elan3/tprocinsts.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/tprocinsts.c 2003-09-24 09:57:25.000000000 -0400 -@@ -0,0 +1,401 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: tprocinsts.c,v 1.20 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tprocinsts.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define MAXINSTR 256 /* # Instructions to look at while looking for close */ -+ -+static E3_uint32 ALU (ELAN3_CTXT *ctxt, -+ E3_uint32 fcode, E3_uint32 X, E3_uint32 Y, -+ E3_uint32 *Z, E3_uint32 *N, E3_uint32 *C, E3_uint32 *V); -+ -+char *OpcodeNames[] = -+{ -+ "ADD ", -+ "AND ", -+ "OR ", -+ "XOR ", -+ "SUB ", -+ "ANDN ", -+ "ORN ", -+ "XNOR ", -+ "ADDX ", -+ "UNIP ", -+ "UMUL ", -+ "SMUL ", -+ "SUBX ", -+ "UNIP ", -+ "UDIV ", -+ "SDIV ", -+ "ADDcc ", -+ "ANDcc ", -+ "ORcc ", -+ "XORcc ", -+ "SUBcc ", -+ "ANDNcc", -+ "ORNcc ", -+ "XNORcc", -+ "ADDXcc", -+ "UNIPcc", -+ "UMULcc", -+ "SMULcc", -+ "SUBXcc", -+ "UNIPcc", -+ "UDIVcc", -+ "SDIVcc" -+}; -+ -+#define REGISTER_VALUE(trap, rN) (((rN) == 0) ? 0 : (trap)->Registers[(rN)^WordEndianFlip]) -+#define ASSIGN_REGISTER(trap, rN, value) ((rN) != 0 ? trap->Registers[(rN)^WordEndianFlip] = (value) : 0) -+ -+int -+RollThreadToClose (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, E3_uint32 PAckVal) -+{ -+ E3_Addr pc = (trap->pc & PC_MASK); -+ E3_Addr npc = (trap->npc & PC_MASK); -+ E3_uint32 Z = (trap->npc & PSR_Z_BIT) ? 1 : 0; -+ E3_uint32 N = (trap->npc & PSR_N_BIT) ? 1 : 0; -+ E3_uint32 C = (trap->pc & PSR_C_BIT) ? 1 : 0; -+ E3_uint32 V = (trap->pc & PSR_V_BIT) ? 1 : 0; -+ E3_uint32 instr; -+ E3_Addr addr; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ failed: -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ElanException (ctxt, EXCEPTION_SIMULATION_FAILED, THREAD_PROC, trap); -+ return (EFAULT); -+ } -+ -+ /* -+ * Thread trapped with output open, or while closing, -+ * so roll the PC forwards to the instruction after the -+ * next c_close, and execute that with the register -+ * specified in c_close set to the trap which occured. -+ * (This is not 1 which means an ACK) -+ */ -+ PRINTF1 (ctxt, DBG_TPROC, "RollThreadToClose: roll pc %x to c_close\n", pc); -+ -+ for (;;) -+ { -+ instr = ELAN3_OP_LOAD32 (ctxt, pc); -+ -+ PRINTF2 (ctxt, DBG_TPROC, "RollThreadToClose: PC=%x INSTR=%x\n", pc, instr); -+ -+ switch (OPCODE_CLASS(instr)) -+ { -+ case OPCODE_CLASS_0: -+ switch ((instr) & OPCODE_CLASS0_MASK) -+ { -+ case OPCODE_SETHI: -+ PRINTF3 (ctxt, DBG_TPROC, "PC %x : sethi r%d = %x\n", pc, INSTR_RD(instr), instr << 10); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), instr << 10); -+ break; -+ -+ case OPCODE_SENDREG: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : sendreg\n", pc); -+ break; -+ -+ case OPCODE_SENDMEM: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : sendmem\n", pc); -+ break; -+ -+ case OPCODE_BICC: -+ { -+ int DoBranch = (instr >> 28) & 1; -+ int CondBranch = 1; -+ E3_Addr OldnPC = npc; -+ -+ PRINTF5 (ctxt, DBG_TPROC, "PC %x : Bicc Z=%x N=%x C=%x V=%x ", pc, Z, N, C, V); -+ switch (instr & OPCODE_BICC_MASK) -+ { -+ case OPCODE_BICC_BN: CondBranch = 0; break; -+ case OPCODE_BICC_BE: DoBranch ^= Z; break; -+ case OPCODE_BICC_BLE: DoBranch ^= Z | (N ^ V); break; -+ case OPCODE_BICC_BL: DoBranch ^= N ^ V; break; -+ case OPCODE_BICC_BLEU: DoBranch ^= C | Z; break; -+ case OPCODE_BICC_BCS: DoBranch ^= C; break; -+ case OPCODE_BICC_BNEG: DoBranch ^= N; break; -+ case OPCODE_BICC_BVS: DoBranch ^= V; break; -+ } -+ -+ /* Do the branch */ -+ if (DoBranch != 0) -+ { -+ npc = pc + (((instr & 0x3fffff) << 2) | -+ (((instr & 0x200000) != 0) ? 0xff000000 : 0)); -+ -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : branch taken to %x\n", pc, npc); -+ } -+ else -+ { -+ npc = npc + 4; -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : branch not taken\n", pc); -+ } -+ pc = OldnPC; -+ -+ /* Test if the next is annuled */ -+ if (((instr & OPCODE_BICC_ANNUL) != 0) & -+ ((DoBranch == 0) | (CondBranch == 0))) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : branch annulled\n", pc); -+ -+ pc = npc; -+ npc += 4; -+ } -+ -+ /* -+ * we've already consumed the instruction - so continue rather -+ * than break; -+ */ -+ continue; -+ } -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 0 instr %x\n", pc, instr); -+ goto failed; -+ } -+ break; -+ -+ case OPCODE_CLASS_1: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 1 instr %x\n", pc, instr); -+ goto failed; -+ -+ case OPCODE_CLASS_2: -+ { -+ E3_uint32 X = REGISTER_VALUE (trap, INSTR_RS1(instr)); -+ E3_uint32 Y = (instr & OPCODE_IMM) ? INSTR_IMM(instr) : REGISTER_VALUE (trap, INSTR_RS2(instr)); -+ -+ if ((instr & OPCODE_NOT_ALUOP) == 0) -+ { -+ E3_uint32 fcode = (instr >> OPCODE_FCODE_SHIFT) & OPCODE_FCODE_MASK; -+ E3_uint32 result = ALU (ctxt, fcode, X, Y, &Z, &N, &C, &V); -+ -+ PRINTF5 (ctxt, DBG_TPROC, "PC %x : %s %x %x -> %x", pc, OpcodeNames[fcode], X, Y, result); -+ PRINTF4 (ctxt, DBG_TPROC, " Z=%x N=%x C=%x V=%x\n", Z, N, C, V); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), result); -+ } -+ else -+ { -+ switch (instr & OPCODE_MASK) -+ { -+ case OPCODE_OPEN: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_open\n", pc); -+ break; -+ -+ case OPCODE_CLOSE: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_close\n", pc); -+ goto found_close; -+ -+ case OPCODE_SLL: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SLL\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X << Y); -+ break; -+ -+ case OPCODE_SRL: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SRL\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X >> Y); -+ break; -+ -+ case OPCODE_SRA: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SRA\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X >> Y); -+ break; -+ -+ case OPCODE_BREAKTEST: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : BREAKTEST not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_BREAK: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : BREAK not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_SUSPEND: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SUSPEND not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_WAIT: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : WAIT not allowed while open\n", pc); -+ goto failed; -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 2 instr %x\n", pc, instr); -+ goto failed; -+ } -+ } -+ break; -+ } -+ -+ case OPCODE_CLASS_3: -+ { -+ if ((instr & OPCODE_IMM) != 0) -+ addr = REGISTER_VALUE (trap, INSTR_RS1(instr)) + INSTR_IMM(instr); -+ else -+ addr = (REGISTER_VALUE (trap, INSTR_RS1(instr)) + -+ REGISTER_VALUE (trap, INSTR_RS2(instr))); -+ -+ switch (instr & OPCODE_MASK) -+ { -+ case OPCODE_LD: -+ PRINTF3 (ctxt, DBG_TPROC, "PC %x : LD [%x], r%d\n", pc, addr, INSTR_RD(instr)); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), ELAN3_OP_LOAD32 (ctxt, addr)); -+ break; -+ -+ case OPCODE_LDD: -+ case OPCODE_LDBLOCK16: -+ case OPCODE_LDBLOCK32: -+ case OPCODE_LDBLOCK64: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : LDBLOCKx @ %x is not possible while output open\n", pc, addr); -+ goto failed; -+ -+ case OPCODE_ST: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : ST @ %x\n", pc, addr); -+ -+ ELAN3_OP_STORE32 (ctxt, addr, REGISTER_VALUE (trap, INSTR_RD(instr))); -+ break; -+ -+ case OPCODE_STD: -+ case OPCODE_STBLOCK16: -+ case OPCODE_STBLOCK32: -+ case OPCODE_STBLOCK64: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : STD @ %x is not posisble while output open\n", pc, addr); -+ goto failed; -+ -+ case OPCODE_SWAP: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : SWAP @ %x is not posible while output open\n", pc, addr); -+ goto failed; -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 3 instr %x\n", pc, instr); -+ goto failed; -+ } -+ break; -+ }} -+ -+ pc = npc; -+ npc += 4; -+ } -+ -+found_close: -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_close\n", pc); -+ -+ /* -+ * Found the new pc, and have the close instruction in *instr -+ */ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), PAckVal); -+ -+ /* -+ * Move to instruction after close. -+ */ -+ trap->pc = npc; -+ -+ /* Insert the value of Z and N from the close inst */ -+ trap->npc = (npc + 4) | ((PAckVal == E3_PAckOk) ? 1 : -+ (PAckVal == E3_PAckTestFail) ? 2 : 0); -+ -+ return (ESUCCESS); -+} -+ -+E3_uint32 -+ALU (ELAN3_CTXT *ctxt, -+ E3_uint32 fcode, E3_uint32 X, E3_uint32 Y, -+ E3_uint32 *Z, E3_uint32 *N, E3_uint32 *C, E3_uint32 *V) -+{ -+ E3_uint32 XMSB, YMSB, ZMSB, Cprime; -+ E3_uint32 Yprime; -+ E3_uint32 Result=0; -+ -+ Yprime = ((fcode >> 2) & 1) ? ~Y : Y; -+ Cprime = ((fcode >> 2) & 1) ^ (*C & ((fcode >> 3) & 1)); -+ XMSB = (X >> 31) & 1; -+ YMSB = (Yprime >> 31) & 1; -+ /* mul or div */ -+ if ((fcode & 0xa) == 0xa) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ALU: tried a multiply or a divide\n"); -+ return (0); -+ } -+ -+ switch (fcode & 3) -+ { -+ /*ADD */ -+ case 0: -+ Result = X + Yprime + Cprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ ZMSB = Result >> 31; -+ *V = ((XMSB & YMSB & ~ZMSB) | (~XMSB &~YMSB & ZMSB)); -+ *C = ((fcode >> 2) & 1) ^ ( (XMSB & YMSB) | (~ZMSB & (XMSB | YMSB))); -+ break; -+ -+ /*AND */ -+ case 1: -+ Result = X & Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ -+ /*OR */ -+ case 2: -+ Result = X | Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ -+ /*XOR */ -+ case 3: -+ Result = X ^ Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ } -+ -+ *Z = (Result == 0) ? 1 : 0; -+ *N = (Result >> 31) & 1; -+ -+ return (Result); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/tproc_linux.c linux-2.6.9/drivers/net/qsnet/elan3/tproc_linux.c ---- clean/drivers/net/qsnet/elan3/tproc_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/tproc_linux.c 2005-05-31 06:29:07.000000000 -0400 -@@ -0,0 +1,223 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: tproc_linux.c,v 1.22.2.1 2005/05/31 10:29:07 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tproc_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+ -+#ifdef NO_ABI -+#include -+extern asmlinkage long sys_open(const char *, int, int); -+extern asmlinkage ssize_t sys_write(unsigned int, const char *, size_t); -+extern asmlinkage ssize_t sys_read(unsigned int, char *, size_t); -+extern asmlinkage off_t sys_lseek(unsigned int, off_t, unsigned int); -+extern asmlinkage long sys_poll(struct pollfd *, unsigned int, long); -+extern asmlinkage long sys_kill(int, int); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+# include -+#else -+# include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * NOTE: system calls from kernel on Linux are different on alpha and i386 -+ * on alpha they return -errno on failure -+ * on i386 they return -1 on failure and set errno -+ */ -+ -+static void -+ReturnSyscall (THREAD_TRAP *trap, unsigned long rc, int *skip) -+{ -+ if (rc >= (unsigned long) (-130)) -+ { -+ trap->pc |= PSR_C_BIT; /* clear carry to indicate failure */ -+ -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)] = -rc; -+ } -+ else -+ { -+ trap->pc &= ~PSR_C_BIT; /* set carry to indicate success */ -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)] = rc; -+ } -+ trap->Registers[REG_OUTS+(1^WordEndianFlip)] = 0; -+ *skip = 1; -+} -+ -+static void -+dump_regs(ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ PRINTF (ctxt, DBG_TPROC, " OUTS %08x %08x %08x %08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF (ctxt, DBG_TPROC, " %08x %08x %08x %08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+} -+ -+int -+ThreadSyscall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip) -+{ -+ int code; -+ caddr_t maddr; -+ struct file *file; -+ unsigned long rc; -+ int i; -+ uintptr_t av[6]; -+ uintptr_t ptr; -+ -+ PRINTF (ctxt, DBG_TPROC, "ThreadSyscall: PC %08x G1 %08x\n", -+ trap->pc, trap->Registers[REG_GLOBALS+(1^WordEndianFlip)]); -+ dump_regs(ctxt, trap); -+ -+ code = trap->Registers[REG_GLOBALS+(1^WordEndianFlip)]; -+ -+ /* Copy the system call arguments from %o0-%o5 */ -+ for (i = 0; i < 6; i++) -+ av[i] = trap->Registers[REG_OUTS+(i^WordEndianFlip)]; -+ -+ rc = (unsigned long) -EINVAL; -+ -+ switch (code) { -+#if defined(IOPROC_PATCH_APPLIED) -+ case ELAN3_SYS_open: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ if (maddr != NULL) -+ rc = sys_open((const char *)maddr, av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_close: -+ rc = sys_close(av[0]); -+ break; -+ -+ case ELAN3_SYS_write: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[1]); -+ if (maddr != NULL) -+ rc = sys_write(av[0], (const char *)maddr, av[2]); -+ break; -+ -+ case ELAN3_SYS_read: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[1]); -+ if (maddr != NULL) -+ rc = sys_read(av[0], (char *)maddr, av[2]); -+ break; -+ -+ case ELAN3_SYS_poll: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ if (maddr != NULL) -+ rc = sys_poll((struct pollfd *)maddr, av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_lseek: -+ rc = sys_lseek(av[0], av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_mmap: -+ if ((E3_Addr) av[0] == (E3_Addr) 0) -+ maddr = NULL; -+ else if ((maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0])) == NULL) -+ break; -+ -+ file = NULL; -+ /* GNAT 5515: If *not* anonymous memory need to do fget */ -+ if ((av[3] & MAP_ANONYMOUS) == 0 && (file = fget (av[4])) == NULL) -+ { -+ rc = -EBADF; -+ break; -+ } -+ -+ down_write (¤t->mm->mmap_sem); -+ ptr = do_mmap_pgoff (file, (unsigned long) maddr, av[1], av[2], av[3], av[5] >>PAGE_SHIFT); -+ up_write (¤t->mm->mmap_sem); -+ -+ if (file) -+ fput (file); -+ -+ if (IS_ERR((void *) ptr)) -+ rc = PTR_ERR((void *) ptr); -+ else -+ rc = elan3mmu_elanaddr (ctxt->Elan3mmu, (caddr_t)ptr); -+ -+ break; -+ -+ case ELAN3_SYS_munmap: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ -+#ifdef AC -+ if (maddr != NULL) -+ rc = do_munmap(current->mm, (unsigned long) maddr, av[1], 1); -+#else -+ if (maddr != NULL) -+ rc = do_munmap(current->mm, (unsigned long) maddr, av[1]); -+#endif -+ break; -+ -+ case ELAN3_SYS_kill: -+ rc = sys_kill(av[0], av[1]); -+ break; -+ -+ case ELAN3_SYS_getpid: -+ rc = current->pid; -+ break; -+#else -+ -+#warning "NO IOPROC patch applied - thread cannot perform system calls" -+ -+#endif /* defined(IOPROC_PATCH_APPLIED) */ -+ -+ default: -+ return EINVAL; -+ } -+ ReturnSyscall(trap, rc, skip); -+ return ESUCCESS; -+} -+ -+ -+int -+ThreadElancall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip) -+{ -+ int ret = ESUCCESS; -+ -+ PRINTF (ctxt, DBG_TPROC, "ThreadElancall: PC %08x\n", trap->pc); -+ dump_regs(ctxt, trap); -+ -+ /* -+ * Elan system call 'type' is passed in o0 -+ */ -+ switch (trap->Registers[REG_OUTS+(0^WordEndianFlip)]) -+ { -+ default: -+ ret = EINVAL; -+ break; -+ } -+ return ret; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan3/virtual_process.c linux-2.6.9/drivers/net/qsnet/elan3/virtual_process.c ---- clean/drivers/net/qsnet/elan3/virtual_process.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan3/virtual_process.c 2004-06-07 09:50:10.000000000 -0400 -@@ -0,0 +1,884 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: virtual_process.c,v 1.68 2004/06/07 13:50:10 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/virtual_process.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static ELAN3_VPSEG * -+InstallSegment (ELAN3_CTXT *ctxt, int process, int entries) -+{ -+ ELAN3_VPSEG **prevSeg, *seg; -+ int lastTop = -1; -+ int top = process + entries-1; -+ -+ ASSERT (krwlock_is_write_locked (&ctxt->VpLock)); -+ -+ for (prevSeg = &ctxt->VpSegs; (seg = (*prevSeg)) != NULL; prevSeg = &seg->Next) -+ { -+ int thisTop = seg->Process + seg->Entries - 1; -+ -+ if (process < seg->Process && (process <= lastTop || top >= seg->Process)) -+ { -+ /* -+ * Overlaps with last segment, or this one -+ */ -+ return (NULL); -+ } -+ if (seg->Process > process) -+ break; -+ -+ lastTop = thisTop; -+ } -+ -+ KMEM_ZALLOC (seg, ELAN3_VPSEG *, sizeof (ELAN3_VPSEG), TRUE); -+ -+ if (seg == (ELAN3_VPSEG *) NULL) -+ return (NULL); -+ -+ seg->Process = process; -+ seg->Entries = entries; -+ -+ -+ PRINTF2 (ctxt, DBG_VP, "InstallSegment: add seg %p before %p\n", seg, *prevSeg); -+ -+ seg->Next = *prevSeg; -+ *prevSeg = seg; -+ -+ return (seg); -+} -+ -+static int -+RemoveSegment (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg) -+{ -+ ELAN3_VPSEG **prevSeg, *thisSeg; -+ -+ ASSERT (krwlock_is_write_locked (&ctxt->VpLock)); -+ -+ for (prevSeg = &ctxt->VpSegs; (thisSeg = (*prevSeg)) != NULL; prevSeg = &thisSeg->Next) -+ { -+ if (thisSeg == seg) -+ break; -+ } -+ -+ if (thisSeg == (ELAN3_VPSEG *) NULL) -+ return (EINVAL); -+ -+ -+ PRINTF2 (ctxt, DBG_VP, "RemoveSegment: remove seg %p next %p\n", thisSeg, thisSeg->Next); -+ -+ *prevSeg = thisSeg->Next; -+ -+ KMEM_FREE ((caddr_t) seg, sizeof (ELAN3_VPSEG)); -+ -+ return (ESUCCESS); -+} -+ -+static ELAN3_VPSEG * -+FindSegment (ELAN3_CTXT *ctxt, int low, int high) -+{ -+ ELAN3_VPSEG *seg; -+ -+ ASSERT(krwlock_is_locked (&ctxt->VpLock)); -+ -+ for (seg = ctxt->VpSegs; seg; seg = seg->Next) -+ { -+ if (seg->Process <= low && (seg->Process + seg->Entries) > high) -+ return (seg); -+ } -+ -+ return ((ELAN3_VPSEG *) NULL); -+} -+ -+ELAN_LOCATION -+ProcessToLocation (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN_LOCATION location; -+ int nnodes,nctxs; -+ int node,ctx,i; -+ -+ ASSERT(krwlock_is_locked (&ctxt->VpLock)); -+ -+ location.loc_node = ELAN3_INVALID_NODE; -+ location.loc_context = -1; -+ -+ PRINTF3 (ctxt, DBG_VP, "ProcessToLocation: process %d seg %p cap %p\n", process, seg, cap); -+ -+ if (seg == NULL) -+ seg = FindSegment (ctxt, process, process); -+ -+ if (!seg || (seg->Type != ELAN3_VPSEG_P2P)) -+ return (location); -+ -+ cap = &seg->SegCapability; -+ nnodes = ELAN_CAP_NUM_NODES (cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (cap); -+ -+ switch (seg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (node = 0, i = 0; node < nnodes && i < entries; node++) -+ { -+ for (ctx = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ if (( seg->SegCapability.cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->SegCapability.cap_bitmap, ctx + (node * nctxs))) -+ { -+ if (i++ == (process - seg->Process)) -+ { -+ location.loc_node = seg->SegCapability.cap_lownode + node; -+ location.loc_context = seg->SegCapability.cap_lowcontext + ctx; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (ctx = 0, i = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ for (node = 0; node < nnodes && i < entries; node++) -+ { -+ if ((seg->SegCapability.cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->SegCapability.cap_bitmap, node + (ctx * nnodes))) -+ { -+ if (i++ == (process - seg->Process)) -+ { -+ location.loc_node = seg->SegCapability.cap_lownode + node; -+ location.loc_context = seg->SegCapability.cap_lowcontext + ctx; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ default: -+ break; -+ } -+ -+ found: -+ -+ PRINTF3 (ctxt, DBG_VP, "ProcessToLocation: process %d -> Node %d Context %d\n", process, location.loc_node, location.loc_context); -+ -+ if (cap != NULL) -+ { -+ bcopy ((caddr_t) &seg->SegCapability, (caddr_t) cap, sizeof (ELAN_CAPABILITY)); -+ cap->cap_mycontext = location.loc_context; -+ } -+ -+ return (location); -+} -+ -+int -+LocationToProcess (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, ELAN_LOCATION loc, ELAN_CAPABILITY *cap) -+{ -+ int nnodes,nctxs; -+ int node,ctx,i; -+ -+ if (seg == NULL) -+ return ELAN3_INVALID_PROCESS; -+ -+ if (!seg || (seg->Type != ELAN3_VPSEG_P2P)) -+ return ELAN3_INVALID_PROCESS; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (node = 0, i = 0; node < nnodes && i < entries; node++) -+ { -+ for (ctx = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctx + (node * nctxs))) -+ { -+ if ((loc.loc_node == (cap->cap_lownode + node) ) -+ && (loc.loc_context == (cap->cap_lowcontext + ctx) )) -+ { -+ return (i + seg->Process); -+ } -+ i++; -+ } -+ } -+ } -+ break; -+ } -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (ctx = 0, i = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ for (node = 0; node < nnodes && i < entries; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (ctx * nnodes))) -+ { -+ if ((loc.loc_node == (cap->cap_lownode + node) ) -+ && (loc.loc_context == (cap->cap_lowcontext + ctx) )) -+ { -+ return (i + seg->Process); -+ } -+ i++; -+ -+ } -+ } -+ } -+ break; -+ } -+ default: -+ break; -+ } -+ -+ return ELAN3_INVALID_PROCESS; -+} -+ -+int -+elan3_addvp (ELAN3_CTXT *ctxt, int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ ELAN_POSITION *pos = &ctxt->Position; -+ ELAN3_VPSEG *seg; -+ int i; -+ int nodeOff; -+ int ctxOff; -+ int nnodes; -+ int nctxs; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ int entries; -+ -+ PRINTF2 (ctxt, DBG_VP, "elan3_addvp: %d -> %s\n", process, CapabilityString (cap)); -+ -+ entries = ELAN_CAP_ENTRIES(cap); -+ if (entries <= 0 || (process + entries) > ELAN3_MAX_VPS) -+ return (EINVAL); -+ -+ /* -+ * Scan the virtual process segment list, to add this entry, and ensure that -+ * the ranges don't overlap. -+ */ -+ krwlock_write (&ctxt->VpLock); -+ -+ /* check cap. */ -+ switch (elan3_validate_cap (ctxt->Device, cap, ELAN_USER_P2P)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(cap, cap) != ESUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = InstallSegment (ctxt, process, entries)) == NULL) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_addvp: failed to find a seg\n"); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ seg->Type = ELAN3_VPSEG_P2P; -+ seg->SegCapability = *cap; -+ seg->SegCapability.cap_mycontext = ELAN_CAP_UNINITIALISED; -+ -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: segment type %x %d %d\n", -+ seg->SegCapability.cap_type, seg->Process, entries); -+ -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ /* position not determined, so cannot load any routes, the hwtest -+ * process must explicitly set it's own routes */ -+ -+ if (!(cap->cap_type & ELAN_CAP_TYPE_HWTEST) && (pos->pos_mode != ELAN_POS_UNKNOWN)) -+ { -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, i = 0; nodeOff < nnodes && i < entries; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs && i < entries; ctxOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ /* Don't load a route if there's no switch and trying to talk to myself */ -+ if (pos->pos_mode == ELAN_POS_MODE_SWITCHED || -+ (pos->pos_mode == ELAN_POS_MODE_LOOPBACK && cap->cap_lownode + nodeOff == pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && cap->cap_lownode + nodeOff != pos->pos_nodeid)) -+ { -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: virtual process %d -> node %d context %d\n", -+ seg->Process + i, cap->cap_lownode +nodeOff, cap->cap_lowcontext +ctxOff); -+ -+ nflits = GenerateRoute (pos, flits, cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, -+ DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ -+ -+ LoadRoute (dev, ctxt->RouteTable, seg->Process+i, cap->cap_lowcontext + ctxOff, nflits, flits); -+ } -+ -+ i++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, i = 0; ctxOff < nctxs && i < entries; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes && i < entries; nodeOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ /* Don't load a route if there's no switch and trying to talk to myself */ -+ if (pos->pos_mode == ELAN_POS_MODE_SWITCHED || -+ (pos->pos_mode == ELAN_POS_MODE_LOOPBACK && cap->cap_lownode + nodeOff == pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && cap->cap_lownode + nodeOff != pos->pos_nodeid)) -+ { -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: virtual process %d -> node %d context %d\n", -+ seg->Process + i, cap->cap_lownode + nodeOff, cap->cap_lowcontext +ctxOff); -+ -+ nflits = GenerateRoute (pos, flits, cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, -+ DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ -+ LoadRoute (dev, ctxt->RouteTable, seg->Process+i, cap->cap_lowcontext +ctxOff, nflits, flits); -+ } -+ i++; -+ } -+ } -+ } -+ break; -+ default: -+ break; -+ } -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_removevp (ELAN3_CTXT *ctxt, int process) -+{ -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *next; -+ int i; -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_removevp: remove process %d\n", process); -+ -+ if (process == ELAN3_INVALID_PROCESS) -+ seg = ctxt->VpSegs; -+ else -+ seg = FindSegment (ctxt, process, process); -+ -+ if (seg == (ELAN3_VPSEG *) NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ do { -+ PRINTF3 (ctxt, DBG_VP, "elan3_removevp: segment is %p [%x,%x]\n", -+ seg, seg->Process, seg->Process+seg->Entries); -+ -+ for (i = 0; i < seg->Entries; i++) -+ ClearRoute (ctxt->Device, ctxt->RouteTable, seg->Process+i); -+ -+ /* get Next pointer value before structure is free'd */ -+ next = seg->Next; -+ RemoveSegment (ctxt, seg); -+ -+ } while (process == ELAN3_INVALID_PROCESS && (seg = next) != NULL); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_addbcastvp (ELAN3_CTXT *ctxt, int process, int lowProc, int highProc) -+{ -+ ELAN_POSITION *pos = &ctxt->Position; -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *aseg; -+ int virtualProcess; -+ E3_uint64 routeValue; -+ -+ PRINTF3 (ctxt, DBG_VP, "elan3_addbcastvp: process %d [%d,%d]\n", process, lowProc, highProc); -+ -+ if (lowProc > highProc || pos->pos_mode != ELAN_POS_MODE_SWITCHED) -+ return (EINVAL); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ if ((aseg = FindSegment (ctxt, lowProc, highProc)) == NULL || (aseg->Type != ELAN3_VPSEG_P2P)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_addbcastvp: process [%d,%d] does not map to p2p segment\n", lowProc, highProc); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* check aseg->SegCapability */ -+ switch (elan3_validate_cap (ctxt->Device, &aseg->SegCapability, ELAN_USER_BROADCAST)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(&ctxt->Capability, &aseg->SegCapability) != ESUCCESS ) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ( ProcessToLocation (ctxt, aseg, lowProc, NULL).loc_context != -+ ProcessToLocation (ctxt, aseg, highProc, NULL).loc_context) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_addbcastvp: process [%d,%d] does not map to single context\n", lowProc, highProc); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = InstallSegment (ctxt, process, 1)) == NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ seg->Type = ELAN3_VPSEG_BROADCAST; -+ seg->SegLowProc = lowProc; -+ seg->SegHighProc = highProc; -+ -+ PRINTF4 (ctxt, DBG_VP, "elan3_addbcastvp: installed seg %p Type %d LowProc %d HighProc %d\n", -+ seg, seg->Type, seg->SegLowProc, seg->SegHighProc); -+ -+ for (virtualProcess = lowProc; virtualProcess <= highProc; virtualProcess++) -+ { -+ if (virtualProcess < 0 || virtualProcess >= ctxt->RouteTable->Size) -+ routeValue = 0; -+ else -+ routeValue = elan3_sdram_readq ( ctxt->Device, ctxt->RouteTable->Table + virtualProcess * NBYTES_PER_SMALL_ROUTE); -+ -+ if (! (routeValue & ROUTE_VALID)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "loadvp[%x]: broadcast %x not valid\n", -+ ctxt->Capability.cap_mycontext, virtualProcess); -+ break; -+ } -+ } -+ -+ if (virtualProcess > highProc) /* All vps now present */ -+ { /* so load up broadcast route */ -+ E3_uint16 flits[MAX_FLITS]; -+ ELAN_LOCATION low = ProcessToLocation (ctxt, aseg, lowProc, NULL); -+ ELAN_LOCATION high = ProcessToLocation (ctxt, aseg, highProc, NULL); -+ int nflits = GenerateRoute (pos, flits, low.loc_node, high.loc_node, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ PRINTF6 (ctxt, DBG_VP, "loadvp[%x]: broadcast %d -> %x.%x [%x.%x]\n", ctxt->Capability.cap_mycontext, -+ seg->Process, low.loc_node, high.loc_node, -+ low.loc_context, high.loc_context); -+ -+ LoadRoute ( ctxt->Device, ctxt->RouteTable, seg->Process, low.loc_context, nflits, flits); -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_process (ELAN3_CTXT *ctxt) -+{ -+ int res = ELAN3_INVALID_PROCESS; -+ ELAN3_VPSEG *seg; -+ ELAN_LOCATION loc; -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ loc.loc_node = ctxt->Position.pos_nodeid; -+ loc.loc_context = ctxt->Capability.cap_mycontext; -+ -+ for (seg = ctxt->VpSegs ; seg; seg = seg->Next) -+ { -+ if (seg->Type == ELAN3_VPSEG_P2P && -+ seg->SegCapability.cap_lowcontext <= ctxt->Capability.cap_mycontext && -+ seg->SegCapability.cap_highcontext >= ctxt->Capability.cap_mycontext && -+ seg->SegCapability.cap_lownode <= ctxt->Position.pos_nodeid && -+ seg->SegCapability.cap_highnode >= ctxt->Position.pos_nodeid) -+ { -+ if ((res=LocationToProcess (ctxt,seg,loc,&ctxt->Capability)) != ELAN3_INVALID_PROCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return res; -+ } -+ } -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_check_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits, E3_uint32 *routeError) -+{ -+ PRINTF5 (ctxt, DBG_VP, "elan3_check_route: vp=%d flits=%04x %04x %04x %04x\n", -+ process, flits[0], flits[1], flits[2], flits[3]); -+ PRINTF4 (ctxt, DBG_VP, " %04x %04x %04x %04x\n", -+ flits[4], flits[5], flits[6], flits[7]); -+ -+ krwlock_read (&ctxt->VpLock); -+ *routeError=elan3_route_check(ctxt,flits,ProcessToLocation (ctxt, NULL, process, NULL).loc_node); -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); /* the call is a success tho the errorcode may be set */ -+} -+ -+int -+elan3_load_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits) -+{ -+ ELAN3_VPSEG *seg; -+ int res = 0; -+ int nflits; -+ int err; -+ -+ PRINTF5 (ctxt, DBG_VP, "elan3_load_route: vp=%d flits=%04x %04x %04x %04x\n", -+ process, flits[0], flits[1], flits[2], flits[3]); -+ PRINTF4 (ctxt, DBG_VP, " %04x %04x %04x %04x\n", -+ flits[4], flits[5], flits[6], flits[7]); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ /* check the route is valid */ -+ if (!(ctxt->Capability.cap_type & ELAN_CAP_TYPE_HWTEST)) -+ { -+ /* must have already attached to define my context number */ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((err=elan3_route_check(ctxt,flits,ProcessToLocation (ctxt, NULL, process, NULL).loc_node)) != ELAN3_ROUTE_SUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ } -+ -+ if ((seg = FindSegment (ctxt, process, process)) == NULL || seg->Type != ELAN3_VPSEG_P2P) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* Calculate number of flits in this route */ -+ for (nflits = 0; nflits < MAX_FLITS && flits[nflits]; nflits++) -+ ; -+ -+ res = LoadRoute (ctxt->Device, ctxt->RouteTable, process, ProcessToLocation (ctxt, seg, process, NULL).loc_context, nflits, flits); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_get_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits) -+{ -+ ELAN3_VPSEG *seg; -+ int res = 0; -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_get_route: vp=%d \n", process); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ if (ctxt->RouteTable == NULL) /* is there a route table */ -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = FindSegment (ctxt, process, process)) != NULL && seg->Type != ELAN3_VPSEG_P2P) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if (seg == NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ res = GetRoute (ctxt->Device, ctxt->RouteTable, process, flits); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_reset_route (ELAN3_CTXT *ctxt, int process) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_reset_route: vp=%d \n", process); -+ -+ GenerateRoute (&ctxt->Position, flits, process, process, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ return elan3_load_route(ctxt,process,flits); -+} -+ -+int -+ResolveVirtualProcess (ELAN3_CTXT *ctxt, int process) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ ELAN3_DEV *dev = ctxt->Device; -+ int res = ESUCCESS; -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *aseg; -+ E3_uint64 routeValue; -+ -+ krwlock_read (&ctxt->VpLock); -+ -+ PRINTF1 (ctxt, DBG_VP, "ResolveVirtualProcess: vp=%d \n", process); -+ -+ if (ctxt->RouteTable == NULL || process < 0 || process >= ctxt->RouteTable->Size) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if (! (seg = FindSegment (ctxt, process, process))) -+ { -+ PRINTF1 (ctxt, DBG_VP, "ResolveVirtualProcess: cannot find segment for virtual process %d\n", process); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* check cap. */ -+ switch (elan3_validate_cap (ctxt->Device, &seg->SegCapability, ((seg->Type == ELAN3_VPSEG_P2P) ? ELAN_USER_P2P : ELAN_USER_BROADCAST))) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(&ctxt->Capability, &seg->SegCapability) != ESUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ BumpUserStat (ctxt, LoadVirtualProcess); -+ -+ routeValue = elan3_sdram_readq (dev, ctxt->RouteTable->Table + process * NBYTES_PER_SMALL_ROUTE); -+ if (routeValue & ROUTE_VALID) /* Virtual process already */ -+ { /* loaded */ -+ krwlock_done (&ctxt->VpLock); -+ return (ESUCCESS); -+ } -+ -+ switch (seg->Type) -+ { -+ case ELAN3_VPSEG_P2P: -+ switch (seg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ if ((res = elan_validate_map (&ctxt->Capability,&seg->SegCapability)) == ESUCCESS && -+ (res = GetRoute(dev, ctxt->RouteTable ,process, flits)) == ESUCCESS) -+ { -+ if (elan3_route_check(ctxt, flits, ProcessToLocation (ctxt, seg, process, NULL).loc_node)) -+ res = EINVAL; -+ else -+ ValidateRoute(dev, ctxt->RouteTable, process); -+ } -+ break; -+ default: -+ res = EINVAL; -+ break; -+ } -+ break; -+ -+ case ELAN3_VPSEG_BROADCAST: -+ /* Find the segment that this broadcast range spans. */ -+ aseg = FindSegment (ctxt, seg->SegLowProc, seg->SegHighProc); -+ -+ if (aseg == NULL || (aseg->Type != ELAN3_VPSEG_P2P) || !(aseg->SegCapability.cap_type & ELAN_CAP_TYPE_BROADCASTABLE)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "resolveVirtualProcess: %d -> EINVAL (%s)\n", process, -+ (aseg == NULL ? "no segment" : ((seg->Type != ELAN3_VPSEG_P2P) ? "not point to point" : -+ "not broadcastable"))); -+ res = EINVAL; -+ break; -+ } -+ -+ switch (aseg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ ELAN_LOCATION lowNode = ProcessToLocation (ctxt,aseg,seg->SegLowProc , NULL); -+ ELAN_LOCATION highNode = ProcessToLocation (ctxt,aseg,seg->SegHighProc , NULL); -+ -+ -+ if ((res = elan_validate_map (&ctxt->Capability,&aseg->SegCapability)) == ESUCCESS && -+ (res=GetRoute(dev, ctxt->RouteTable ,process, flits)) == ESUCCESS) -+ { -+ if (elan3_route_broadcast_check(ctxt,flits, lowNode.loc_node , highNode.loc_node ) != ELAN3_ROUTE_SUCCESS ) -+ res = EINVAL; -+ else -+ ValidateRoute(dev, ctxt->RouteTable, process); -+ } -+ break; -+ } -+ -+ default: -+ res = EINVAL; -+ break; -+ } -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ return (res); -+} -+ -+void -+UnloadVirtualProcess (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ ELAN3_VPSEG *seg; -+ ELAN_CAPABILITY *scap; -+ int i; -+ -+ for (seg = ctxt->VpSegs; seg; seg = seg->Next) -+ { -+ switch (seg->Type) -+ { -+ case ELAN3_VPSEG_P2P: -+ scap = &seg->SegCapability; -+ -+ if (cap == NULL || ELAN_CAP_MATCH (scap, cap)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "unloadvp: segment [%x.%x]\n", -+ seg->Process, seg->Process + seg->Entries-1); -+ -+ for (i = 0; i < seg->Entries; i++) -+ InvalidateRoute (dev, ctxt->RouteTable, seg->Process+i); -+ } -+ break; -+ -+ case ELAN3_VPSEG_BROADCAST: -+ for (i = 0; i < seg->Entries; i++) -+ { -+ ELAN3_VPSEG *aseg = FindSegment (ctxt, seg->SegLowProc, seg->SegHighProc); -+ -+ if (aseg != NULL && ELAN_CAP_MATCH(&aseg->SegCapability, cap)) -+ { -+ PRINTF1 (ctxt, DBG_VP, "unloadvp: broadcast vp %d\n", seg->Process); -+ -+ InvalidateRoute (dev, ctxt->RouteTable, seg->Process+i); -+ } -+ } -+ } -+ } -+} -+ -+caddr_t -+CapabilityString (ELAN_CAPABILITY *cap) -+{ -+#define CAPSTR_LEN 200 -+#define NCAPSTRS 4 -+ static char space[CAPSTR_LEN*NCAPSTRS]; -+ static int bufnum; -+ static spinlock_t lock; -+ static int lockinitialised; -+ int num; -+ unsigned long flags; -+ -+ if (! lockinitialised) -+ { -+ spin_lock_init (&lock); -+ lockinitialised = 1; -+ } -+ -+ spin_lock_irqsave (&lock, flags); -+ -+ if ((num = ++bufnum) == NCAPSTRS) -+ num = bufnum = 0; -+ spin_unlock_irqrestore (&lock, flags); -+ -+ sprintf (space + (num * CAPSTR_LEN), "%4x %4x %4x %4x %4x %4x %4x [%x.%x.%x.%x]", cap->cap_type, -+ cap->cap_lownode, cap->cap_highnode, -+ cap->cap_lowcontext, cap->cap_mycontext, cap->cap_highcontext, ELAN_CAP_ENTRIES(cap), -+ cap->cap_userkey.key_values[0], cap->cap_userkey.key_values[1], -+ cap->cap_userkey.key_values[2], cap->cap_userkey.key_values[3]); -+ -+ return (space + (num * CAPSTR_LEN)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/debug.c linux-2.6.9/drivers/net/qsnet/elan4/debug.c ---- clean/drivers/net/qsnet/elan4/debug.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/debug.c 2005-03-23 06:06:15.000000000 -0500 -@@ -0,0 +1,146 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.17 2005/03/23 11:06:15 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/debug.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+unsigned elan4_debug = 0; -+unsigned elan4_debug_toconsole = 0; -+unsigned elan4_debug_tobuffer = DBG_ALL; -+ -+unsigned elan4_debug_display_ctxt; -+unsigned elan4_debug_ignore_ctxt; -+unsigned elan4_debug_ignore_type; -+ -+void -+elan4_debug_init() -+{ -+ if ((elan4_debug & elan4_debug_tobuffer) != 0) -+ qsnet_debug_alloc(); -+} -+ -+void -+elan4_debug_fini() -+{ -+} -+ -+void -+elan4_debugf (void *type, int mode, char *fmt,...) -+{ -+ char prefix[128]; -+ int where = 0; -+ va_list ap; -+ -+ if ((mode & elan4_debug_tobuffer) != 0 || type == DBG_BUFFER) -+ where |= QSNET_DEBUG_BUFFER; -+ if ((mode & elan4_debug_toconsole) != 0 || type == DBG_CONSOLE) -+ where |= QSNET_DEBUG_CONSOLE; -+ -+ if (where == 0) -+ return; -+ -+ if ((unsigned long) type > DBG_NTYPES) -+ { -+ ELAN4_CTXT *ctxt = (ELAN4_CTXT *) type; -+ -+ if (elan4_debug_display_ctxt && ctxt->ctxt_num != elan4_debug_display_ctxt) -+ return; -+ if (elan4_debug_ignore_ctxt && ctxt->ctxt_num == elan4_debug_ignore_ctxt) -+ return; -+ -+ sprintf (prefix, "[%08ld.%04d] elan4 (%03x) ", lbolt, current->pid, ctxt->ctxt_num); -+ } -+ else if ((unsigned long) type == (int) DBG_CONSOLE) -+ prefix[0] = '\0'; -+ else -+ { -+ char *what; -+ -+ if (elan4_debug_ignore_type & (1 << ((unsigned long) type))) -+ return; -+ -+ switch ((unsigned long) type) -+ { -+ case (int) DBG_DEVICE: what = "dev"; break; -+ case (int) DBG_USER: what = "usr"; break; -+ default: what = NULL; break; -+ } -+ -+ if (what) -+ sprintf (prefix, "[%08ld.%04d] elan4 [%s] ", lbolt, current->pid, what); -+ else -+ sprintf (prefix, "[%08ld.%04d] elan4 [%3d] ", lbolt, current->pid, (int)(long)type); -+ } -+ -+ va_start(ap,fmt); -+ qsnet_vdebugf (where, prefix, fmt, ap); -+ va_end (ap); -+} -+ -+int -+elan4_assfail (ELAN4_CTXT *ctxt, const char *ex, const char *func, const char *file, const int line) -+{ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "elan%d: assertion failure: %s, function: %s, file %s, line: %d\n", -+ ctxt->ctxt_dev->dev_instance, ex, func, file, line); -+ -+ printk (KERN_EMERG "elan%d: assertion failure: %s, function: %s, file %s, line: %d\n", -+ ctxt->ctxt_dev->dev_instance, ex, func, file, line); -+ -+ if (panicstr) -+ return 0; -+ -+ if (assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (assfail_mode & 2) -+ panic ("elan%d: assertion failure: %s, function: %s, file %s, line: %d\n", -+ ctxt->ctxt_dev->dev_instance, ex, func, file, line); -+ -+ if (assfail_mode & 4) -+ elan4_debug = 0; -+ -+ return 0; -+ -+} -+ -+int -+elan4_debug_trigger (ELAN4_CTXT *ctxt, const char *func, const char *file, const int line, const char *fmt, ...) -+{ -+ va_list ap; -+ -+ va_start (ap, fmt); -+ qsnet_vdebugf (QSNET_DEBUG_CONSOLE|QSNET_DEBUG_BUFFER, "", fmt, ap); -+ va_end (ap); -+ -+ printk (KERN_EMERG "elan%d: debug trigger: function: %s, file %s, line: %d\n", ctxt->ctxt_dev->dev_instance, func, file, line); -+ -+ if (panicstr) -+ return 0; -+ -+ if (assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (assfail_mode & 2) -+ panic ("elan%d: debug trigger: function: %s, file %s, line: %d\n", ctxt->ctxt_dev->dev_instance, func, file, line); -+ -+ if (assfail_mode & 4) -+ elan4_debug = 0; -+ -+ return 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/device.c linux-2.6.9/drivers/net/qsnet/elan4/device.c ---- clean/drivers/net/qsnet/elan4/device.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/device.c 2005-08-09 05:57:04.000000000 -0400 -@@ -0,0 +1,3127 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.c,v 1.106.2.5 2005/08/09 09:57:04 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+/* allow this code to compile against an Eagle elanmod */ -+#ifdef __ELANMOD_DEVICE_H -+#define ELAN_DEV_OPS ELANMOD_DEV_OPS -+#define ELAN_DEV_OPS_VERSION ELANMOD_DEV_OPS_VERSION -+#define elan_dev_register elanmod_dev_register -+#define elan_dev_deregister elanmod_dev_deregister -+#endif -+ -+/* XXXX configurational defines */ -+ -+#if defined (CONFIG_MPSAS) -+#define HASH_0_SIZE_VAL (12 + 6) -+#define HASH_1_SIZE_VAL (2 + 6) -+#define CTXT_TABLE_SHIFT 8 -+#define LN2_MAX_CQS 8 /* 256 */ -+#else -+#define HASH_0_SIZE_VAL (13 + 6) -+#define HASH_1_SIZE_VAL (2 + 6) -+#define CTXT_TABLE_SHIFT 12 -+#define LN2_MAX_CQS 10 /* 1024 */ -+#endif -+ -+unsigned int elan4_hash_0_size_val = HASH_0_SIZE_VAL; -+unsigned int elan4_hash_1_size_val = HASH_1_SIZE_VAL; -+unsigned int elan4_ctxt_table_shift = CTXT_TABLE_SHIFT; -+unsigned int elan4_ln2_max_cqs = LN2_MAX_CQS; -+unsigned int elan4_dmaq_highpri_size = 2; /* 8192 entries */ -+unsigned int elan4_threadq_highpri_size = 1; /* 1024 entries */ -+unsigned int elan4_dmaq_lowpri_size = 2; /* 8192 entries */ -+unsigned int elan4_threadq_lowpri_size = 1; /* 1024 entries */ -+unsigned int elan4_interruptq_size = 0; /* 1024 entries */ -+unsigned int elan4_mainint_punt_loops = 1; -+unsigned int elan4_mainint_resched_ticks = 0; -+unsigned int elan4_linkport_lock = 0xbe0fcafe; /* default link port lock */ -+unsigned int elan4_eccerr_recheck = 1; -+ -+static int -+elan4_op_get_position (void *arg, ELAN_POSITION *ptr) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *)arg; -+ ELAN_POSITION pos; -+ -+ elan4_get_position (dev, &pos); -+ -+ return copyout (&pos, ptr, sizeof (ELAN_POSITION)); -+} -+ -+static int -+elan4_op_set_position (void *arg, unsigned short nodeid, unsigned short numnodes) -+{ -+ /* XXXXX -+ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ compute_position (&pos, nodeid, numnode, num_down_links_value); -+ -+ return elan4_set_position (dev, pos); -+ */ -+ return EINVAL; -+} -+ -+ELAN_DEV_OPS elan4_dev_ops = -+{ -+ elan4_op_get_position, -+ elan4_op_set_position, -+ -+ ELAN_DEV_OPS_VERSION -+}; -+ -+static E4_uint32 -+elan4_read_filter (ELAN4_DEV *dev, unsigned networkctx) -+{ -+ return (elan4_sdram_readl (dev, dev->dev_ctxtable + (networkctx * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, Filter))); -+} -+ -+static void -+elan4_write_filter (ELAN4_DEV *dev, unsigned networkctx, E4_uint32 value) -+{ -+ elan4_sdram_writel (dev, (dev->dev_ctxtable + (networkctx * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, Filter)), value); -+ pioflush_sdram(dev); -+} -+ -+void -+elan4_set_schedstatus (ELAN4_DEV *dev, E4_uint32 intreg) -+{ -+ E4_uint32 setbits = 0; -+ E4_uint32 intmask = 0; -+ E4_uint32 haltmask; -+ E4_uint32 next_sched; -+ E4_uint32 next_intmask; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intmask_lock, flags); -+ -+ haltmask = (dev->dev_haltop_mask | dev->dev_haltop_active); -+ -+ if ((haltmask & INT_DProcHalted) || dev->dev_halt_all_count || dev->dev_halt_dproc_count) -+ setbits |= SCH_DProcHalt; -+ -+ if ((haltmask & INT_TProcHalted) || dev->dev_halt_all_count || dev->dev_halt_tproc_count) -+ setbits |= SCH_TProcHalt; -+ -+ if ((haltmask & INT_CProcHalted) || dev->dev_halt_all_count || dev->dev_halt_cproc_count) -+ setbits |= SCH_CProcHalt; -+ -+ if ((haltmask & INT_DiscardingLowPri) || dev->dev_discard_all_count || dev->dev_discard_lowpri_count) -+ setbits |= SCH_DiscardLowPriInput; -+ -+ if ((haltmask & INT_DiscardingHighPri) || dev->dev_discard_all_count || dev->dev_discard_highpri_count) -+ setbits |= SCH_DiscardHighPriInput; -+ -+ if (dev->dev_halt_lowpri_count) -+ setbits |= SCH_StopLowPriQueues; -+ -+ if (haltmask & INT_DProcHalted) intmask |= INT_DProcHalted; -+ if (haltmask & INT_TProcHalted) intmask |= INT_TProcHalted; -+ if (haltmask & INT_CProcHalted) intmask |= INT_CProcHalted; -+ if (haltmask & INT_DiscardingLowPri) intmask |= INT_DiscardingLowPri; -+ if (haltmask & INT_DiscardingHighPri) intmask |= INT_DiscardingHighPri; -+ -+ next_intmask = (dev->dev_intmask & ~(INT_Halted | INT_Discarding)) | (intmask & ~intreg); -+ next_sched = (dev->dev_schedstatus & ~(SCH_Halt | SCH_Discard)) | setbits; -+ -+ PRINTF5 (DBG_DEVICE, DBG_REGISTER, "elan4_set_schedstatus: haltmask=%x setbits=%x intmask=%x next_sched=%x next_intmask=%x\n", -+ haltmask, setbits, intmask, next_sched, next_intmask); -+ -+ CHANGE_INT_MASK (dev, next_intmask); -+ CHANGE_SCHED_STATUS (dev, next_sched); -+ -+ spin_unlock_irqrestore (&dev->dev_intmask_lock, flags); -+} -+ -+int -+elan4_route2str (E4_VirtualProcessEntry *route, char *routeStr) -+{ -+ int part = 0; -+ int shift; -+ int broadcast; -+ E4_uint64 value; -+ char *ptr = routeStr; -+ int b; -+ -+ /* unpack first */ -+ value = route->Values[part] & 0x7f; -+ if ( (value & 0x78) == 0) { -+ /* empty route */ -+ strcpy(routeStr,"Invalid lead route"); -+ return (-EINVAL); -+ } -+ -+ if ( value & 0x40 ) { -+ /* broad cast */ -+ strcpy(routeStr,"Broadcast"); -+ return (-EINVAL); -+ } else { -+ switch ((value & 0x30) >> 4) { -+ case 0: { *ptr++ = '0' + (value & 0x7); break; } -+ case 1: { *ptr++ = 'M'; break; } -+ case 2: { *ptr++ = 'U'; break; } -+ case 3: { *ptr++ = 'A'; break; } -+ } -+ } -+ -+ shift = 16; -+ broadcast = 0; -+ while ( 1 ) { -+ b = (route->Values[part] >> shift) & 0xf; -+ -+ if ( broadcast ) { -+ /* about to pick up the second byte of a broadcast pair */ -+ broadcast = 0; -+ } else { -+ if ( b & 0x8) { -+ /* output link */ -+ *ptr++ = '0' + (b & 0x7); -+ } else { -+ if ( b & 0x4) { -+ /* broad cast */ -+ broadcast = 1; -+ } else { -+ switch ( b & 0x3 ) { -+ case 0: { *ptr++ = 0 ; return (0); break; } -+ case 1: { *ptr++ = 'M'; break; } -+ case 2: { *ptr++ = 'U'; break; } -+ case 3: { *ptr++ = 'A'; break; } -+ } -+ } -+ } -+ } -+ -+ shift += 4; -+ if ( part != 0 ) { -+ if ( shift > 36) { -+ /* too far, now in the crc value */ -+ strcpy(routeStr,"Invalid route length"); -+ return (-EINVAL); -+ } -+ } else { -+ if ( shift >= 64) { -+ /* move to the next 64 bits */ -+ part = 1; -+ shift = 2; -+ } -+ } -+ } -+ -+ /* never reached */ -+ return (-EINVAL); -+} -+ -+static int elan4_hardware_lock_count = 0; -+ -+void -+elan4_hardware_lock_check(ELAN4_DEV *dev, char *from) -+{ -+ -+ int reg = read_reg32 (dev, CommandSchedDataPort[2]); -+ -+ /* dont spam too much */ -+ if ( elan4_hardware_lock_count++ > 10) return; -+ -+ printk ("elan%d: %s timed out intmask=0x%x InterruptReg=0x%x (%d)\n", dev->dev_instance, from, dev->dev_intmask, read_reg32 (dev, InterruptReg), elan4_hardware_lock_count); -+ -+ /* an 0xF in either and we need to output more */ -+ if ((reg & 0xf0) || ( reg & 0x0f)) { -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ printk ("elan%d: CommandSchedDataPort[0] 0x%016x 0x%016x 0x%016x 0x%016x\n", -+ dev->dev_instance, -+ read_reg32 (dev, CommandSchedDataPort[0]), -+ read_reg32 (dev, CommandSchedDataPort[1]), -+ reg, -+ read_reg32 (dev, CommandSchedDataPort[3]) -+ ); -+ /* dump out /proc/qsnet/elan4/deviceN/stats/cproctimeoutroutes */ -+ printk ("elan%d: cat of /proc/qsnet/elan4/device%d/stats/cproctimeoutroutes\n", dev->dev_instance, dev->dev_instance); -+ -+ ringbuf = &dev->dev_cproc_timeout_routes; -+ -+ if (!ringbuf) -+ printk ("elan%d: No stats available\n", dev->dev_instance); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ -+ memset(&routestr, 0, 33); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ printk ( "elan%d: Route %llx %llx->%s\n", dev->dev_instance, (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ printk ( "elan%d: Route %llx %llx->%s\n", dev->dev_instance, (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ } -+ } -+} -+static void -+dev_haltop_timer_func (unsigned long arg) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ elan4_hardware_lock_check(dev,"haltop"); -+} -+ -+void -+elan4_queue_haltop (ELAN4_DEV *dev, ELAN4_HALTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ /* add to the end of the halt operations list */ -+ list_add_tail (&op->op_link, &dev->dev_haltop_list); -+ -+ if ((dev->dev_haltop_mask & op->op_mask) != op->op_mask) -+ { -+ dev->dev_haltop_mask |= op->op_mask; -+ -+ elan4_set_schedstatus (dev, 0); -+ } -+ -+ mod_timer (&dev->dev_haltop_timer, (jiffies + (HZ*10))); /* 10 seconds */ -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+void -+elan4_queue_intop (ELAN4_DEV *dev, ELAN4_CQ *cq, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ -+ op->op_cookie = INTOP_ONESHOT | ((dev->dev_intop_cookie++) & INTOP_VALUE_MASK); -+ -+ list_add_tail (&op->op_link, &dev->dev_intop_list); -+ -+ writeq ((op->op_cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD, (void *)(cq->cq_mapping)); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+void -+elan4_register_intop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ -+ op->op_cookie = INTOP_PERSISTENT | ((dev->dev_intop_cookie++) & INTOP_VALUE_MASK); -+ -+ list_add_tail (&op->op_link, &dev->dev_intop_list); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+void -+elan4_deregister_intop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ list_del (&op->op_link); -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+static __inline__ void -+__issue_dma_flushop_cmd (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ E4_uint64 eventaddr = dev->dev_tproc_space + 64; -+ -+ writeq (WAIT_EVENT_CMD | eventaddr, (void *)(cq->cq_mapping)); -+ writeq (0, (void *)(cq->cq_mapping)); -+ writeq (0, (void *)(cq->cq_mapping)); -+ writeq (0, (void *)(cq->cq_mapping)); -+ -+ writeq (DMA_ShMemWrite | RUN_DMA_CMD, (void *)(cq->cq_mapping)); -+ writeq (0 /* cookie */, (void *)(cq->cq_mapping)); -+ writeq (0 /* vproc */, (void *)(cq->cq_mapping)); -+ writeq (0 /* srcAddr */, (void *)(cq->cq_mapping)); -+ writeq (0 /* dstAddr */, (void *)(cq->cq_mapping)); -+ writeq (0 /* srcEvent */, (void *)(cq->cq_mapping)); -+ writeq (0 /* dstEvent */, (void *)(cq->cq_mapping)); -+ writeq (SET_EVENT_CMD, (void *)(cq->cq_mapping)); -+} -+ -+static void -+handle_dma_flushops_intop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned int hipri = ((unsigned long) arg & 1); -+ E4_uint64 status = dev->dev_dma_flushop[hipri].status; -+ ELAN4_CQ *cq = dev->dev_dma_flushop[hipri].cq; -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ E4_uint32 completedPtr = CQ_CompletedPtr(queuePtrs); -+ E4_uint32 size = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ unsigned long flags; -+ -+ /* -+ * Since we're called from a main interrupt which was issued through the approriate -+ * flushcq the command queue descriptor for dma flushing can no longer be in the -+ * insert cache, nor can it be in the extractor (as it's trapped), hence it is -+ * safe to modify the completed pointer -+ */ -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ ASSERT (status != 0); -+ -+ /* skip over either the DMA/SETEVENT or just the SETEVENT depending on the trap type */ -+ if (CPROC_TrapType (status) == CommandProcDmaQueueOverflow) -+ completedPtr = (completedPtr & ~(size-1)) | ((completedPtr + 64) & (size - 1)); -+ else -+ completedPtr = (completedPtr & ~(size-1)) | ((completedPtr + 8) & (size - 1)); -+ -+ elan4_sdram_writel (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs) + 4, -+ ((queuePtrs >> 32) & ~CQ_PtrOffsetMask) | (completedPtr & CQ_PtrOffsetMask)); -+ -+ elan4_restartcq (dev, dev->dev_dma_flushop[hipri].cq); -+ -+ if (! list_empty (&dev->dev_dma_flushop[hipri].list)) -+ __issue_dma_flushop_cmd (dev, dev->dev_dma_flushop[hipri].cq); -+ -+ dev->dev_dma_flushop[hipri].status = 0; -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+} -+ -+static void -+handle_dma_flushops (ELAN4_DEV *dev, E4_uint64 status, int cqnum) -+{ -+ unsigned int hipri = (cqnum == elan4_cq2num(dev->dev_dma_flushop[1].cq) ? 1 : 0); -+ ELAN4_CQ *cq = dev->dev_dma_flushop[hipri].cq; -+ ELAN4_CQ *flushq = dev->dev_flush_cq[elan4_cq2num(cq) & (COMMAND_INSERTER_CACHE_ENTRIES-1)]; -+ struct list_head *ops; -+ unsigned long flags; -+ int qfull,count; -+ E4_uint64 queuePtrs; -+ LIST_HEAD(list); -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ ASSERT (cqnum == elan4_cq2num (dev->dev_dma_flushop[hipri].cq)); -+ ASSERT (! list_empty (&dev->dev_dma_flushop[hipri].list)); -+ ASSERT (dev->dev_dma_flushop[hipri].status == 0); -+ -+ /* remove the whole list */ -+ ops = dev->dev_dma_flushop[hipri].list.next; -+ -+ list_del_init (&dev->dev_dma_flushop[hipri].list); -+ -+ /* and add it to our local list */ -+ list_add_tail (&list, ops); -+ -+ /* now determine whether the queue was full - since it cannot be empty -+ * then if the front and back pointers are the same then it is full */ -+ queuePtrs = hipri ? read_reg64 (dev, DProcHighPriPtrs) : read_reg64 (dev, DProcLowPriPtrs); -+ qfull = (E4_QueueFrontPointer (queuePtrs) == E4_QueueBackPointer (queuePtrs)); -+ -+ if (CPROC_TrapType(status) == CommandProcDmaQueueOverflow && !qfull) -+ printk (" ******* queue overflow trap - but queue not full\n"); -+ -+ if (qfull && CPROC_TrapType(status) != CommandProcDmaQueueOverflow) -+ printk (" ****** queue full - but not overflow trap : %llx %llx %x\n", -+ read_reg64 (dev, DProcLowPriPtrs), read_reg64 (dev, DProcHighPriPtrs), CPROC_TrapType(status)); -+ -+ /* Store the status register, this also indicates that the intop is pending */ -+ dev->dev_dma_flushop[hipri].status = status; -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ /* Issue a main interrupt command to the approriate flush command queue, -+ * which will then safely update the completed pointer to skip over the -+ * command which has trapped, also prevent any new commands to be issued -+ * to the command queue. -+ */ -+ dev->dev_dma_flushop[hipri].intop.op_function = handle_dma_flushops_intop; -+ dev->dev_dma_flushop[hipri].intop.op_arg = (void *) (unsigned long) hipri; -+ -+ elan4_queue_intop (dev, flushq, &dev->dev_dma_flushop[hipri].intop); -+ -+ /* now execute all operations */ -+ for (count = 0; ! list_empty (&list); count++) -+ { -+ ELAN4_DMA_FLUSHOP *op = list_entry (list.next, ELAN4_DMA_FLUSHOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ (*op->op_function) (dev, op->op_arg, qfull); -+ } -+ -+ /* finally release the "reasons" for halting */ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ if ((dev->dev_halt_dproc_count -= count) == 0) -+ elan4_set_schedstatus (dev, 0); -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ return; -+} -+ -+void -+elan4_queue_dma_flushop (ELAN4_DEV *dev, ELAN4_DMA_FLUSHOP *op, int hipri) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ if (dev->dev_halt_dproc_count++ == 0) /* ensure that the DMA processor cannot */ -+ elan4_set_schedstatus (dev, 0); /* execute the DMA we issue. */ -+ -+ if (list_empty (&dev->dev_dma_flushop[hipri].list) && dev->dev_dma_flushop[hipri].status == 0) -+ __issue_dma_flushop_cmd (dev, dev->dev_dma_flushop[hipri].cq); -+ -+ list_add_tail (&op->op_link, &dev->dev_dma_flushop[hipri].list); -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+static void -+enable_elan_errors (void *arg) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+} -+ -+#define ERROR_DISABLE_PERIOD (hz/2) -+#define ERROR_SAMPLE_PERIOD (hz/10) -+#define ERROR_LIMIT (100) -+ -+static __inline__ void -+check_error_rate (ELAN4_DEV *dev) -+{ -+ if (dev->dev_error_time == (lbolt/ERROR_SAMPLE_PERIOD)) -+ { -+ if (++dev->dev_errors_per_period >= ERROR_LIMIT && (dev->dev_intmask & INT_ErrorInterrupts)) -+ { -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ schedule_timer_fn (&dev->dev_error_timeoutid, enable_elan_errors, (void *) dev, ERROR_DISABLE_PERIOD); -+ } -+ } -+ else -+ { -+ dev->dev_error_time = (lbolt/ERROR_SAMPLE_PERIOD); -+ dev->dev_errors_per_period = 0; -+ } -+} -+ -+static __inline__ int -+handle_mainints (ELAN4_DEV *dev, int nticks, int nintr) -+{ -+ E4_uint32 nfptr = dev->dev_interruptq_nfptr; -+ E4_uint32 bptr = read_reg32 (dev, MainIntQueuePtrs.s.Back); -+ E4_uint32 qsize = E4_QueueSize(elan4_interruptq_size); -+ E4_uint32 qmask = qsize - 1; -+ long tlim = lbolt + nticks; -+ int done = 0; -+ unsigned long flags; -+ -+ do { -+ int todo = ((bptr - nfptr) & qmask) / E4_MainIntEntrySize; -+ -+ ASSERT (todo > 0); -+ -+ PRINTF4 (DBG_DEVICE, DBG_MAININT, "handle_mainints: fptr %x nfptr %x bptr %x : %d todo\n", -+ read_reg32 (dev, MainIntQueuePtrs.s.Front), nfptr, bptr, todo); -+ -+ if (nintr >= 0 && (done + todo) > nintr) /* punt because too may to do in interrupt */ -+ { -+ PRINTF4 (DBG_DEVICE, DBG_MAININT, "handle_mainints: punting (done %d todo %d) (bptr %x fptr %x)\n", -+ done, todo, bptr, read_reg32 (dev, MainIntQueuePtrs.s.Front)); -+ -+ return 1; -+ } -+ -+ BucketDevStat (dev, s_mainints, todo, MainIntBuckets); -+ -+ /* consume all the entries in the queue which we think are there */ -+ do { -+ E4_uint64 value = elan4_sdram_readq (dev, nfptr); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, E4_MAIN_INT_CTX (value)); -+ E4_uint32 fptr = nfptr; -+ -+ PRINTF2 (DBG_DEVICE, DBG_MAININT, "handle_mainints: process cookie %llx - write fptr=%x\n", value, nfptr); -+ -+ if (ctxt == NULL) -+ ELAN4_DEBUG_TRIGGER (&dev->dev_ctxt, "elan4:handle_mainints: context %d invalid\n", E4_MAIN_INT_CTX (value)); -+ else -+ ctxt->ctxt_ops->op_interrupt (ctxt, E4_MAIN_INT_COOKIE(value)); -+ -+ /* compute the next queue front pointer, before updating the front pointer -+ * since we need to ensure that elan4_queue_mainintop doesn't see the queue -+ * as being empty if an extra interrupt is queued in between */ -+ dev->dev_interruptq_nfptr = nfptr = (nfptr & ~qmask) | ((nfptr + sizeof (E4_uint64)) & qmask); -+ -+ /* update the queue front pointer, doing this will clear the -+ * interrupt for *all* interrupt cookies which have previously -+ * been added to the queue */ -+ write_reg32 (dev, MainIntQueuePtrs.s.Front, E4_QueueFrontValue (fptr, elan4_interruptq_size)); -+ pioflush_reg (dev); -+ } while (bptr != nfptr); -+ -+ /* re-sample the back pointer and if it's different from the previous -+ * queue front pointer, then the queue has something on it again */ -+ done += todo; -+ -+ if ((nticks > 0 && ((int) (lbolt - tlim)) > 0)) /* been executing for too long in thread */ -+ return 1; -+ -+ bptr = read_reg32 (dev, MainIntQueuePtrs.s.Back); -+ -+ PRINTF3 (DBG_DEVICE, DBG_MAININT, "handle_mainints: resample : fptr %x nfptr %x bptr %x\n", -+ read_reg32 (dev, MainIntQueuePtrs.s.Front), nfptr, bptr); -+ -+ /* at this point we've made some space in the interrupt queue, -+ * so check to see if we've got anything to restart */ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ while (! list_empty (&dev->dev_interruptq_list)) -+ { -+ ELAN4_INTOP *op = list_entry (dev->dev_interruptq_list.next, ELAN4_INTOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ op->op_function (dev, op->op_arg); -+ } -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ } while (bptr != nfptr); -+ -+ return 0; -+} -+ -+static void -+elan4_mainint_thread (ELAN4_DEV *dev) -+{ -+ unsigned long flags; -+ -+ kernel_thread_init ("elan4_mainint"); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ for (;;) -+ { -+ if (dev->dev_stop_threads) -+ break; -+ -+ if (! (dev->dev_intmask & INT_MainInterrupt)) -+ { -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ if (handle_mainints (dev, elan4_mainint_resched_ticks, -1)) -+ BumpDevStat (dev, s_mainint_rescheds); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ ENABLE_INT_MASK (dev, INT_MainInterrupt); -+ } -+ -+ kcondvar_wait (&dev->dev_mainint_wait, &dev->dev_mainint_lock, &flags); -+ } -+ -+ dev->dev_mainint_stopped = 1; -+ kcondvar_wakeupall (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ kernel_thread_exit(); -+} -+ -+void -+elan4_queue_mainintop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ if (dev->dev_interruptq_nfptr == read_reg32 (dev, MainIntQueuePtrs.s.Back)) -+ op->op_function (dev, op->op_arg); -+ else -+ list_add_tail (&op->op_link, &dev->dev_interruptq_list); -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+} -+ -+static __inline__ E4_uint32 -+handle_cproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint32 cqptr = read_reg32 (dev, CommandControl.CommandQueueDescsBase) & E4_QueueDescPtrMask; -+ unsigned cqnum = ((cqptr - dev->dev_cqaddr) / sizeof (E4_CommandQueueDesc)); -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 control = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ E4_uint64 status = read_reg64 (dev, CProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, CQ_Context (control)); -+ -+ PRINTF4 (DBG_DEVICE, DBG_INTR, "handle_cproc_trap: cqnum=%d status=%016llx control=%016llx TrapType\n", -+ cqnum, status, control, CPROC_TrapType (status)); -+ PRINTF4 (DBG_DEVICE, DBG_INTR, " %016llx %016llx %016llx %016llx\n", -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control))); -+ -+ BumpDevStat (dev, s_cproc_traps); -+ -+ if (ctxt == NULL) -+ ELAN4_DEBUG_TRIGGER (&dev->dev_ctxt, "elan4:handle_cproc_trap: context %d is invalid\n", CQ_Context (control)); -+ else -+ ctxt->ctxt_ops->op_cproc_trap (ctxt, status, cqnum); -+ -+ return (CPROC_TrapType (status) == CommandProcWaitTrap ? SCH_RestartCProc | SCH_RestartEProc : SCH_RestartCProc); -+} -+ -+static __inline__ E4_uint32 -+handle_dproc_trap (ELAN4_DEV *dev, int unit) -+{ -+ E4_uint64 status = (unit == 0) ? read_reg64 (dev, DProc0Status) : read_reg64 (dev, DProc1Status); -+ E4_uint32 restart = (unit == 0) ? SCH_RestartDma0Proc : SCH_RestartDma1Proc; -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, DPROC_Context (status)); -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "handle_dproc_trap: unit %d context %d%s\n", unit, DPROC_Context(status), -+ DPROC_PrefetcherFault(status) ? " (prefetcher)" : ""); -+ -+ if (DPROC_PrefetcherFault (status)) -+ restart |= SCH_RestartDmaPrefetchProc; -+ -+ BumpDevStat (dev, s_dproc_traps); -+ -+ if (ctxt == NULL) -+ ELAN4_DEBUG_TRIGGER (&dev->dev_ctxt, "elan4:handle_dproc_trap: context %d is invalid\n", DPROC_Context (status)); -+ else -+ ctxt->ctxt_ops->op_dproc_trap (ctxt, status, unit); -+ -+ return (restart); -+} -+ -+static __inline__ E4_uint32 -+handle_eproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, EProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, EPROC_Context (status)); -+ -+ BumpDevStat (dev, s_eproc_traps); -+ -+ if (ctxt == NULL) -+ ELAN4_DEBUG_TRIGGER (&dev->dev_ctxt, "elan4:handle_eproc_trap: context %d is invalid\n", EPROC_Context (status)); -+ else -+ ctxt->ctxt_ops->op_eproc_trap (ctxt, status); -+ -+ return (SCH_RestartEProc); -+} -+ -+static __inline__ E4_uint32 -+handle_tproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, TProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, TPROC_Context (status)); -+ -+ BumpDevStat (dev, s_tproc_traps); -+ -+ if (ctxt == NULL) -+ ELAN4_DEBUG_TRIGGER (&dev->dev_ctxt, "elan4:handle_tproc_trap: context %d is invalid\n", TPROC_Context (status)); -+ else -+ ctxt->ctxt_ops->op_tproc_trap (ctxt, status); -+ -+ return (SCH_RestartTProc); -+} -+ -+static __inline__ void -+handle_haltints (ELAN4_DEV *dev, E4_uint32 intreg) -+{ -+ struct list_head list = LIST_HEAD_INIT(list); -+ E4_uint32 mask = 0; -+ E4_uint32 active = 0; -+ struct list_head *entry; -+ struct list_head *next; -+ unsigned long flags; -+ -+ BumpDevStat (dev, s_haltints); -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ list_for_each_safe (entry, next, &dev->dev_haltop_list) { -+ ELAN4_HALTOP *op = list_entry (entry, ELAN4_HALTOP, op_link); -+ -+ PRINTF (DBG_DEVICE, DBG_INTR, "handle_haltints: op=%p op_mask=%x intreg=%x\n", op, op->op_mask, intreg); -+ -+ if ((op->op_mask & intreg) != op->op_mask) -+ mask |= op->op_mask; -+ else -+ { -+ list_del (&op->op_link); /* remove from list */ -+ list_add_tail (&op->op_link, &list); /* add to local list */ -+ -+ active |= op->op_mask; -+ } -+ } -+ -+ ASSERT (dev->dev_haltop_mask == (mask | active)); -+ -+ dev->dev_haltop_mask = mask; -+ -+ if (list_empty (&dev->dev_haltop_list)) { -+ del_timer(&dev->dev_haltop_timer); -+ } -+ -+ if (list_empty (&list)) -+ elan4_set_schedstatus (dev, intreg); -+ else -+ { -+ dev->dev_haltop_active = active; -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ ELAN4_HALTOP *op = list_entry (list.next, ELAN4_HALTOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ (*op->op_function) (dev, op->op_arg); -+ } -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ dev->dev_haltop_active = 0; -+ -+ elan4_set_schedstatus (dev, 0); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+static __inline__ E4_uint32 -+handle_iproc_trap (ELAN4_DEV *dev, unsigned unit) -+{ -+ sdramaddr_t hdroff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrHeader[0][unit]); -+ E4_uint64 status = elan4_sdram_readq (dev, hdroff + offsetof (E4_IprocTrapHeader, IProcStatusCntxAndTrType)); -+ E4_uint32 filter = elan4_read_filter (dev, IPROC_NetworkContext (status)); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, filter & E4_FILTER_CONTEXT_MASK); -+ -+ /* -+ * The context is not valid in the following case : -+ * ack not been sent AND bad CRC/bad length. -+ * -+ * NOTE TransCRCStatus and BadLength only valid if NOT an EopTrap. -+ */ -+ ASSERT ((IPROC_GoodAckSent (status) & (1 << IPROC_InputterChan (status))) || IPROC_EOPTrap (status) || -+ (IPROC_TransCRCStatus (status) == CRC_STATUS_GOOD && !IPROC_BadLength (status))); -+ -+ BumpDevStat (dev, s_iproc_traps); -+ -+ if (ctxt == NULL) -+ { -+ ELAN4_DEBUG_TRIGGER (&dev->dev_ctxt, "elan4:handle_iproc_trap: network %d context %d (%x) is invalid\n", IPROC_NetworkContext (status), -+ filter & E4_FILTER_CONTEXT_MASK, filter); -+ -+ elan4_write_filter (dev, IPROC_NetworkContext (status), E4_FILTER_DISCARD_ALL); -+ } -+ else -+ ctxt->ctxt_ops->op_iproc_trap (ctxt, status, unit); -+ -+ return (SCH_RestartCh0LowPriInput << unit); -+} -+ -+void -+handle_pcimemerr (ELAN4_DEV *dev) -+{ -+ elan4_pcierror (dev); -+ -+ check_error_rate (dev); -+} -+ -+void -+handle_sdramint (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, SDRamECCStatus); -+ E4_uint64 ConfigRegValue = read_reg64 (dev, SDRamConfigReg); -+ char errstr[200]; -+ int i; -+ int Found = 0; -+ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "handle_sdramint\n"); -+ -+ printk ("elan%d: ECC Error %s status=%llx\n", -+ dev->dev_instance, elan4_sdramerr2str (dev, status, ConfigRegValue, errstr), (long long)status); -+ -+ if (!ECC_UncorrectableErr(status) && !ECC_MultUncorrectErrs(status)) -+ printk ("elan%d: ECC error data=%016llx\n", dev->dev_instance, elan4_sdram_readq (dev, ECC_Addr(status))); -+ -+ if (ECC_CorrectableErr (status)) -+ BumpDevStat (dev, s_correctable_errors); -+ if (ECC_MultCorrectErrs (status)) -+ BumpDevStat (dev, s_multiple_errors); -+ -+ if (ECC_UncorrectableErr(status)) -+ panic ("elan%d: uncorrectable ECC error\n", dev->dev_instance); -+ if (ECC_MultUncorrectErrs(status)) -+ panic ("elan%d: muliple uncorrectable ECC error\n", dev->dev_instance); -+ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_SDRAM_ERROR); -+ -+ /* -+ * Now try to test for a read/write error type. -+ * This can only be done if it was a correctable error as an uncorrectable error might lockup the node. -+ * It should not be attempted if the data is in the dcache because fetching again would not generate an -+ * error even if the problem was a read, and flushing the cache line would fix a write probelm. -+ * Reading the same location again should cause a new error if the problem was caused by a bad write. -+ */ -+ if (elan4_eccerr_recheck && -+ (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA) && -+ ECC_CorrectableErr(status) && !ECC_UncorrectableErr(status)) -+ { -+ E4_uint64 status2; -+ E4_uint64 Addr = ECC_Addr(status) & ~(E4_CACHELINE_SIZE-1); -+ E4_uint32 SetIndex = (Addr >> 6) & ~(E4_NumCacheLines-1); -+ int InCache = 0; -+ -+ /* check the cache tags to see if the data has been read into a cache line. */ -+ for (i=0; idev_regs + offsetof(E4_Registers, Tags.Tags[i][SetIndex].Value)) & 0x7fffe000) == (Addr & 0x7fffe000)) -+ { -+ InCache = 1; -+ break; -+ } -+ -+ if (InCache == 0) -+ { -+ printk ("elan%d: checking if ECC error was read or write\n", dev->dev_instance); -+ -+ /* Now read and throw away the answer. A read of a word will schedule a block read of sdram */ -+ elan4_sdram_readq (dev, Addr); -+ status2 = read_reg64 (dev, SDRamECCStatus); -+ if ((Addr == (ECC_Addr(status2) & ~(E4_CACHELINE_SIZE-1))) && ECC_CorrectableErr(status2)) // Write error. -+ { -+ status = (status & ~0x0030000000000000ULL) | 0x0010000000000000ULL; -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_SDRAM_ERROR); -+ } -+ else -+ status = (status & ~0x0030000000000000ULL) | 0x0020000000000000ULL; -+ } -+ else -+ status = status | 0x0030000000000000ULL; -+ } -+ else -+ status &= ~0x0030000000000000ULL; -+ -+ /* search for this error already being logged */ -+ for (i = sizeof (dev->dev_sdramerrs)/sizeof (dev->dev_sdramerrs[0]) - 1; i >= 0; i--) -+ if ((dev->dev_sdramerrs[i].EccStatus == status) && (dev->dev_sdramerrs[i].ConfigReg == ConfigRegValue)) -+ { -+ Found = 1; -+ dev->dev_sdramerrs[i].ErrorCount += 1; // Keep a count. -+ break; -+ } -+ -+ /* stash the status for /proc */ -+ if (!Found) -+ { -+ for (i = sizeof (dev->dev_sdramerrs)/sizeof (dev->dev_sdramerrs[0]) - 1; i > 0; i--) -+ dev->dev_sdramerrs[i] = dev->dev_sdramerrs[i-1]; -+ dev->dev_sdramerrs[0].EccStatus = status; -+ dev->dev_sdramerrs[0].ConfigReg = ConfigRegValue; -+ dev->dev_sdramerrs[0].ErrorCount = 1; // First error -+ } -+ -+ check_error_rate (dev); -+} -+ -+static void -+clear_linkerr_led (void *arg) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_ClearLinkError); -+} -+ -+void -+handle_linkerror (ELAN4_DEV *dev) -+{ -+ E4_uint32 LinkState; -+ E4_uint32 CurrState = read_reg32 (dev, LinkControlReg); -+ -+ /* Set for reading errors. */ -+ write_reg32 (dev, LinkControlReg, -+ (CurrState = CurrState & ~((LCONT_TEST_CONTROL_MASK << LCONT_TEST_CONTROL_SHIFT) | -+ (LCONT_TEST_VALUE_MASK << LCONT_TEST_VALUE_SHIFT)))); -+ LinkState = LCONT_LINK_STATE(CurrState = read_reg32 (dev, LinkControlReg)); -+ -+#ifdef DEBUG -+ { -+ E4_uint8 ErrorMsg[256], DataErrorVal[64]; -+ -+ strcpy (ErrorMsg, "handle_linkerror:"); -+ if (LinkState & LS_LockError) strcat (ErrorMsg, " LockError"); -+ if (LinkState & LS_DeskewError) strcat (ErrorMsg, " DeskewError"); -+ if (LinkState & LS_PhaseError) strcat (ErrorMsg, " PhaseError"); -+ if (LinkState & LS_DataError) -+ { -+ E4_uint32 error[4]; -+ E4_uint32 i; -+ strcat (ErrorMsg, " DataError"); -+ /* Errors */ -+ for(i = LRS_ErrorVal8to0; i <= LRS_ErrorVal35to27; i++) -+ { -+ write_reg32 (dev, LinkControlReg, -+ CurrState | LCONT_TEST_VALUE(i) | (LCONT_READ_STATE << LCONT_TEST_CONTROL_SHIFT)); -+ error[i - LRS_ErrorVal8to0] = LCONT_LINK_STATE(read_reg32 (dev, LinkControlReg)); -+ } -+ sprintf (DataErrorVal, " Link State Error Val: %09llx %03x %03x %03x %03x", -+ (unsigned long long) ((error[0] & 0x1ffUL) | ((error[1] & 0x1ffUL) << 9) | -+ ((error[2] & 0x1ffUL) << 18) | ((error[3] & 0x1ffUL) << 27)), -+ error[3], error[2], error[1], error[0]); -+ strcat (ErrorMsg, DataErrorVal); -+ } -+ if (LinkState & LS_FifoOvFlow0) strcat (ErrorMsg, " FifoOvFlow0"); -+ if (LinkState & LS_FifoOvFlow1) strcat (ErrorMsg, " FifoOvFlow1"); -+ if (LinkState & LS_Mod45Changed) strcat (ErrorMsg, " Mod45Changed"); -+ if (LinkState & LS_PAckNotSeenError) strcat (ErrorMsg, " PAckNotSeenError"); -+ strcat (ErrorMsg, "\n"); -+ PRINTF0 (DBG_DEVICE, DBG_INTR, ErrorMsg); -+ } -+#endif -+ -+ BumpDevStat (dev, s_link_errors); -+ -+ if (LinkState & LS_LockError) BumpDevStat (dev, s_lock_errors); -+ if (LinkState & LS_DeskewError) BumpDevStat (dev, s_deskew_errors); -+ if (LinkState & LS_PhaseError) BumpDevStat (dev, s_phase_errors); -+ if (LinkState & LS_DataError) BumpDevStat (dev, s_data_errors); -+ if (LinkState & LS_FifoOvFlow0) BumpDevStat (dev, s_fifo_overflow0); -+ if (LinkState & LS_FifoOvFlow1) BumpDevStat (dev, s_fifo_overflow1); -+ if (LinkState & LS_Mod45Changed) BumpDevStat (dev, s_mod45changed); -+ if (LinkState & LS_PAckNotSeenError) BumpDevStat (dev, s_pack_not_seen); -+ -+ PULSE_SCHED_RESTART (dev, SCH_ClearLinkErrorInt); -+ -+ /* schedule a timer to clear the link error LED, so that it stays on -+ * for a second for every link error that occurs */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && !timer_fn_queued (&dev->dev_linkerr_timeoutid)) -+ schedule_timer_fn (&dev->dev_linkerr_timeoutid, clear_linkerr_led, (void *) dev, HZ); -+ -+ /* -+ * Signal the link error to the switch by -+ * enabling the INT_LinkPortKeyFail bit. -+ * Always clear the error bit as the switch -+ * might have produced a spurious "ack" ... -+ */ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_LINKPORT_INT); -+ -+ if (dev->dev_linkerr_signalled == 0) -+ dev->dev_linkerr_signalled = 1; -+ else -+ dev->dev_linkerr_signalled = 2; -+ -+ ENABLE_INT_MASK (dev, INT_LinkPortKeyFail); -+ -+ check_error_rate (dev); -+} -+ -+void -+handle_linkportkeyfail (ELAN4_DEV *dev) -+{ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "handle_linkportkeyfail\n"); -+ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_LINKPORT_INT); -+ -+ if (! dev->dev_linkerr_signalled) -+ { -+ /* Hmmm - they're not playing ball */ -+ BumpDevStat (dev, s_linkport_keyfail); -+ -+ DISABLE_INT_MASK (dev, INT_LinkPortKeyFail); -+ } -+ else -+ { -+ /* If more link errors have occured since we -+ * signalled the error, then leave it signalled. */ -+ if (--dev->dev_linkerr_signalled == 0) -+ DISABLE_INT_MASK (dev, INT_LinkPortKeyFail); -+ } -+} -+ -+ -+static __inline__ void -+__elan4_4msi0 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ unsigned long flags; -+ -+ if (intreg & intmask & INT_MainInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_MainInterrupt); -+ -+ if (handle_mainints (dev, -1, elan4_mainint_punt_loops) == 0) -+ ENABLE_INT_MASK (dev, INT_MainInterrupt); -+ else -+ { -+ BumpDevStat (dev, s_mainint_punts); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ kcondvar_wakeupone (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ } -+ } -+} -+ -+static __inline__ void -+__elan4_4msi1 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ E4_uint32 restart = 0; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi1: %x\n", intreg); -+ -+ spin_lock (&dev->dev_trap_lock); -+ -+ if (intreg & intmask & INT_CProc) -+ restart |= handle_cproc_trap (dev); -+ if (intreg & intmask & INT_EProc) -+ restart |= handle_eproc_trap (dev); -+ if (intreg & intmask & INT_Dma0Proc) -+ restart |= handle_dproc_trap (dev, 0); -+ if (intreg & intmask & INT_Dma1Proc) -+ restart |= handle_dproc_trap (dev, 1); -+ if (intreg & intmask & INT_TProc) -+ restart |= handle_tproc_trap (dev); -+ -+ PULSE_SCHED_RESTART (dev, restart); -+ -+ spin_unlock (&dev->dev_trap_lock); -+ -+ if (intreg & (INT_Halted|INT_Discarding)) -+ handle_haltints (dev, intreg); -+} -+ -+static __inline__ void -+__elan4_4msi2 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ E4_uint32 restart = 0; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi2: %x\n", intreg); -+ -+ spin_lock (&dev->dev_trap_lock); -+ if (intreg & intmask & INT_IProcCh0LowPri) -+ restart |= handle_iproc_trap (dev, 0); -+ -+ if (intreg & intmask & INT_IProcCh1LowPri) -+ restart |= handle_iproc_trap (dev, 1); -+ -+ if (intreg & intmask & INT_IProcCh0HighPri) -+ restart |= handle_iproc_trap (dev, 2); -+ -+ if (intreg & intmask & INT_IProcCh1HighPri) -+ restart |= handle_iproc_trap (dev, 3); -+ -+ PULSE_SCHED_RESTART (dev, restart); -+ -+ spin_unlock (&dev->dev_trap_lock); -+} -+ -+static __inline__ void -+__elan4_4msi3 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi3: %x\n", intreg); -+ -+ if (intreg & intmask & INT_PciMemErr) -+ handle_pcimemerr (dev); -+ -+ if (intreg & intmask & INT_SDRamInt) -+ handle_sdramint (dev); -+ -+ if (intreg & intmask & INT_LinkError) -+ handle_linkerror (dev); -+ -+ if (intreg & intmask & INT_LinkPortKeyFail) -+ handle_linkportkeyfail (dev); -+} -+ -+int -+elan4_1msi0 (ELAN4_DEV *dev) -+{ -+ E4_uint32 intmask = dev->dev_intmask; -+ E4_uint32 intreg; -+ -+ if (intmask == 0 || ((intreg = read_reg32 (dev, InterruptReg)) & intmask) == 0) -+ return (0); -+ -+ BumpDevStat (dev, s_interrupts); -+ -+ do { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "elan4_1msi0: %x\n", intreg); -+ -+ if (intreg & intmask & INT_MSI0) -+ __elan4_4msi0(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI1) -+ __elan4_4msi1(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI2) -+ __elan4_4msi2(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI3) -+ __elan4_4msi3(dev, intreg, intmask); -+ -+ if (intreg & INT_LinkPortKeyFail) -+ handle_linkportkeyfail (dev); -+ -+ /* must ensure that the read of the interrupt mask -+ * completes before the read of the interrupt register -+ * since the main interrupt thread clears it's interrupt -+ * and then re-enables it in the interrupt mask. */ -+ intmask = dev->dev_intmask; -+ mb(); -+ intreg = read_reg32 (dev, InterruptReg); -+ -+ } while ((intreg & intmask) != 0); -+ -+ return (1); -+} -+ -+/* local context management */ -+int -+elan4_insertctxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt, ELAN4_TRAP_OPS *ops) -+{ -+ unsigned long flags; -+ int tbl; -+ int i; -+ -+ ctxt->ctxt_dev = dev; -+ ctxt->ctxt_ops = ops; -+ ctxt->ctxt_features = dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES]; -+ -+ INIT_LIST_HEAD (&ctxt->ctxt_cqalist); -+ spin_lock_init (&ctxt->ctxt_mmulock); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ ctxt->shuffle_needed[tbl] = 0; -+ for(i=0; ishuffle[tbl][i] = -1; -+ -+ KMEM_ZALLOC (ctxt->ctxt_mmuhash[tbl], ELAN4_HASH_ENTRY **, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *), 1); -+ -+ if (ctxt->ctxt_mmuhash[tbl] == NULL) -+ { -+ if (tbl != 0) -+ KMEM_FREE (ctxt->ctxt_mmuhash[0], dev->dev_hashsize[0] * sizeof (ELAN4_HASH_ENTRY *)); -+ spin_lock_destroy (&ctxt->ctxt_mmulock); -+ return (-ENOMEM); -+ } -+ } -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ if ((ctxt->ctxt_num = bt_freebit (dev->dev_ctxmap, (1 << dev->dev_ctxtableshift))) >= 0) -+ { -+ /* chain onto the lists of all contexts */ -+ list_add (&ctxt->ctxt_link, &dev->dev_ctxt_list); -+ -+ BT_SET (dev->dev_ctxmap, ctxt->ctxt_num); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ if (ctxt->ctxt_num >= 0) -+ proc_insertctxt(dev, ctxt); -+ -+ return (ctxt->ctxt_num < 0 ? -ENOMEM : 0); -+} -+ -+void -+elan4_removectxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt) -+{ -+ unsigned long flags; -+ int tbl; -+ -+ proc_removectxt(dev, ctxt); -+ -+ /* remove from list of contexts */ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ list_del (&ctxt->ctxt_link); -+ -+ BT_CLEAR (dev->dev_ctxmap, ctxt->ctxt_num); -+ -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ spin_lock_destroy (&ctxt->ctxt_info_lock); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ KMEM_FREE (ctxt->ctxt_mmuhash[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *)); -+ -+ spin_lock_destroy (&ctxt->ctxt_mmulock); -+} -+ -+ELAN4_CTXT * -+elan4_localctxt (ELAN4_DEV *dev, unsigned num) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ list_for_each (entry, &dev->dev_ctxt_list) { -+ ELAN4_CTXT *ctxt = list_entry (entry, ELAN4_CTXT, ctxt_link); -+ -+ if (ctxt->ctxt_num == num) -+ { -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ return (ctxt); -+ } -+ } -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return ((ELAN4_CTXT *) NULL); -+} -+ -+ELAN4_CTXT * -+elan4_networkctxt (ELAN4_DEV *dev, unsigned num) -+{ -+ E4_uint32 filter = elan4_read_filter (dev, num); -+ -+ if ((filter & E4_FILTER_CONTEXT_MASK) == INVALID_CONTEXT) -+ return NULL; -+ else -+ return elan4_localctxt (dev, filter & E4_FILTER_CONTEXT_MASK); -+} -+ -+/* network context management */ -+int -+elan4_attach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int res = 0; -+ E4_uint32 filter; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ filter = elan4_read_filter (dev, ctxnum); -+ if ((filter & E4_FILTER_CONTEXT_MASK) != INVALID_CONTEXT) -+ { -+ PRINTF2 (ctxt, DBG_NETWORK_CTX, "elan4_attach_filter: ctx=%d filter=%x -> EBUSY\n", ctxnum, filter); -+ res = -EBUSY; -+ } -+ else -+ { -+ PRINTF1 (ctxt, DBG_NETWORK_CTX, "elan4_attach_filter: ctx=%d - SUCCESS\n", ctxnum); -+ -+ elan4_write_filter (dev, ctxnum, ctxt->ctxt_num | E4_FILTER_DISCARD_ALL); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+ } -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return (res); -+} -+ -+void -+elan4_detach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ PRINTF1 (ctxt, DBG_NETWORK_CTX, "elan4_detach_filter: detach from network context %d\n", ctxnum); -+ -+ elan4_write_filter (dev, ctxnum, INVALID_CONTEXT | E4_FILTER_DISCARD_ALL); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+} -+ -+void -+elan4_set_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum, E4_uint32 state) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ PRINTF6 (ctxt, DBG_NETWORK_CTX, "elan4_set_filter: set filter state %x for network context %d <%s%s%s%s>\n", state, ctxnum, -+ (state & E4_FILTER_DISCARD_ALL) ? "discard," : "", -+ (state & E4_FILTER_ACKOK_ALL) ? "ack-ok," : "", -+ (state & E4_FILTER_HIGH_PRI) ? "high-pri," : "", -+ (state & E4_FILTER_STATS) ? "stats," : ""); -+ -+ elan4_write_filter (dev, ctxnum, ctxt->ctxt_num | state); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+} -+ -+void -+elan4_set_routetable (ELAN4_CTXT *ctxt, ELAN4_ROUTE_TABLE *tbl) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_uint32 value = tbl ? (E4_VPT_VALID | E4_VPT_VALUE(tbl->tbl_entries, tbl->tbl_size)) : 0; -+ -+ /* and insert into the vp table */ -+ elan4_sdram_writel (dev, (dev->dev_ctxtable + (ctxt->ctxt_num * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, VirtualProcessTable)), value); -+ pioflush_sdram(dev); -+ -+ PULSE_SYSCONTROL (dev, CONT_ROUTE_FLUSH); -+} -+ -+/* command queue management */ -+ELAN4_CQA * -+elan4_getcqa (ELAN4_CTXT *ctxt, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ struct list_head *el; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each (el, &ctxt->ctxt_cqalist) { -+ ELAN4_CQA *cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_idx == idx) -+ { -+ cqa->cqa_ref++; -+ -+ spin_unlock (&dev->dev_cqlock); -+ return cqa; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ return NULL; -+} -+ -+void -+elan4_putcqa (ELAN4_CTXT *ctxt, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ struct list_head *el, *nel; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each_safe (el, nel, &ctxt->ctxt_cqalist) { -+ ELAN4_CQA *cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_idx == idx) -+ { -+ if (--cqa->cqa_ref || bt_lowbit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA) != -1) -+ spin_unlock (&dev->dev_cqlock); -+ else -+ { -+ list_del (&cqa->cqa_link); -+ -+ BT_CLEAR (ctxt->ctxt_cqamap, cqa->cqa_idx); -+ BT_CLEAR (dev->dev_cqamap, cqa->cqa_cqnum/ELAN4_CQ_PER_CQA); -+ spin_unlock (&dev->dev_cqlock); -+ -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ } -+ return; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ -+ printk ("elan4_putcqa: idx %d not found\n", idx); -+ BUG(); -+} -+ -+static ELAN4_CQ * -+elan4_getcq (ELAN4_CTXT *ctxt, unsigned int type) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQA *cqa; -+ struct list_head *el; -+ int cidx, didx; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each (el, &ctxt->ctxt_cqalist) { -+ cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_type == type && (cidx = bt_freebit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA)) >=0) -+ { -+ BT_SET (cqa->cqa_bitmap, cidx); -+ -+ spin_unlock (&dev->dev_cqlock); -+ return &cqa->cqa_cq[cidx]; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ -+ /* allocate a new cqa and it's chunk of command queue descriptors */ -+ KMEM_ZALLOC (cqa, ELAN4_CQA *, sizeof (ELAN4_CQA), 1); -+ if (cqa == NULL) -+ return NULL; -+ -+ spin_lock (&dev->dev_cqlock); -+ cidx = bt_freebit (ctxt->ctxt_cqamap, ELAN4_MAX_CQA); -+ -+ /* On architectures which have MTRR registers for write-combinig -+ * the top command queues from dev->dev_cqreorder upwards are -+ * used for reordered queues. Without MTRR registers any page -+ * sized group can use write combinig through the ptes. */ -+ if (dev->dev_cqreorder == 0) -+ didx = bt_freebit (dev->dev_cqamap, dev->dev_cqcount/ELAN4_CQ_PER_CQA); -+ else -+ { -+ if ((type & CQ_Reorder) != 0) -+ didx = bt_nextbit (dev->dev_cqamap, dev->dev_cqcount/ELAN4_CQ_PER_CQA, (dev->dev_cqreorder/ELAN4_CQ_PER_CQA) - 1, 0); -+ else -+ didx = bt_freebit (dev->dev_cqamap, dev->dev_cqreorder/ELAN4_CQ_PER_CQA); -+ } -+ -+ if (cidx < 0 || didx < 0) -+ { -+ spin_unlock (&dev->dev_cqlock); -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ return NULL; -+ } -+ -+ BT_SET (ctxt->ctxt_cqamap, cidx); -+ BT_SET (dev->dev_cqamap, didx); -+ -+ cqa->cqa_idx = cidx; -+ cqa->cqa_type = type; -+ cqa->cqa_cqnum = (didx * ELAN4_CQ_PER_CQA); -+ -+ list_add_tail (&cqa->cqa_link, &ctxt->ctxt_cqalist); -+ -+ /* initialise the cqa struct */ -+ for (cidx = 0; cidx < ELAN4_CQ_PER_CQA; cidx++) -+ { -+ cqa->cqa_cq[cidx].cq_idx = cidx; -+ cqa->cqa_cq[cidx].cq_cqa = cqa; -+ } -+ -+ /* no mappings yet */ -+ cqa->cqa_ref = 0; -+ -+ /* we're going to return entry zero */ -+ BT_SET (cqa->cqa_bitmap, 0); -+ spin_unlock (&dev->dev_cqlock); -+ -+ return &cqa->cqa_cq[0]; -+} -+ -+static void -+elan4_putcq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQA *cqa = cq->cq_cqa; -+ -+ spin_lock (&dev->dev_cqlock); -+ -+ BT_CLEAR (cqa->cqa_bitmap, cq->cq_idx); -+ -+ if (bt_lowbit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA) != -1 || cqa->cqa_ref) -+ spin_unlock (&dev->dev_cqlock); -+ else -+ { -+ list_del (&cqa->cqa_link); -+ -+ BT_CLEAR (ctxt->ctxt_cqamap, cqa->cqa_idx); -+ BT_CLEAR (dev->dev_cqamap, cqa->cqa_cqnum/ELAN4_CQ_PER_CQA); -+ spin_unlock (&dev->dev_cqlock); -+ -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ } -+} -+ -+ELAN4_CQ * -+elan4_alloccq (ELAN4_CTXT *ctxt, unsigned cqsize, unsigned perm, unsigned cqtype) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQ *cq; -+ int cqnum; -+ sdramaddr_t cqdesc; -+ unsigned offset; -+ E4_uint64 value; -+ -+ if ((cq = elan4_getcq (ctxt, cqtype)) == NULL) -+ return NULL; -+ -+ cqnum = elan4_cq2num(cq); -+ -+ cq->cq_space = elan4_sdram_alloc (dev, CQ_Size(cqsize)); -+ if (cq->cq_space == (virtaddr_t) 0) -+ { -+ elan4_putcq (ctxt, cq); -+ return (NULL); -+ } -+ -+ cq->cq_size = cqsize; -+ cq->cq_perm = perm; -+ -+ /* and finally initialise the command queue descriptor */ -+ cqdesc = dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)); -+ -+ value = CQ_QueuePtrsValue (cqsize, cq->cq_space, cq->cq_space); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ value |= ((cqtype & CQ_Priority) ? CQ_RevA_Priority : 0); -+ else -+ value |= (((cqtype & CQ_Priority) ? CQ_RevB_Priority : 0) | -+ ((cqtype & CQ_Reorder) ? CQ_RevB_ReorderingQueue : CQ_RevB_32bitWriteQueue)); -+ -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs), value); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue), 0); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers), 0); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control), CQ_ControlValue (ctxt->ctxt_num, 2, perm)); -+ pioflush_sdram (dev); -+ -+ offset = (cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ cq->cq_mapping = elan4_map_device (dev, ELAN4_BAR_REGISTERS, (offset & ~(PAGE_SIZE-1)), -+ PAGE_SIZE, &cq->cq_handle) + (offset & (PAGE_SIZE-1)); -+#ifdef CONFIG_MPSAS -+ if (ctxt == &dev->dev_ctxt) -+ return (cq); -+#endif -+ -+ elan4_sdram_flushcache (dev, cq->cq_space, CQ_Size(cqsize)); -+ -+ return (cq); -+} -+ -+void -+elan4_freecq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned offset = (elan4_cq2num(cq) + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ elan4_flushcq (dev, cq); -+ -+ elan4_unmap_device (dev, cq->cq_mapping - (offset & (PAGE_SIZE-1)), PAGE_SIZE, &cq->cq_handle); -+ elan4_sdram_free (dev, cq->cq_space, CQ_Size (cq->cq_size)); -+ -+ elan4_putcq (ctxt, cq); -+} -+ -+void -+elan4_restartcq (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ int hipri; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restarting cq %p\n", cq); -+ -+ spin_lock_irqsave (&dev->dev_requeue_lock, flags); -+ -+ while (read_reg32 (dev, CommandControl.CommandRequeuePtr) & E4_CommandRequeueBusy) -+ ; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ hipri = (elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)) & CQ_RevA_Priority) != 0; -+ else -+ hipri = (elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)) & CQ_RevB_Priority) != 0; -+ -+ if (hipri) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restart cq %d as high pri\n", elan4_cq2num(cq)); -+ write_reg32 (dev, CommandControl.CommandRequeuePtr, cqdesc | E4_CommandRequeueHighPri); -+ } -+ else -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restart cq %d as low pri\n", elan4_cq2num(cq)); -+ write_reg32 (dev, CommandControl.CommandRequeuePtr, cqdesc); -+ } -+ pioflush_reg (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_requeue_lock, flags); -+} -+ -+static void -+flushcq_intop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ dev->dev_flush_finished |= (1 << (unsigned long) arg); -+ kcondvar_wakeupall (&dev->dev_flush_wait, &dev->dev_flush_lock); -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+void -+elan4_flushcq (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ int flushqnum = elan4_cq2num(cq) & (COMMAND_INSERTER_CACHE_ENTRIES-1); -+ ELAN4_CQ *flushq = dev->dev_flush_cq[flushqnum]; -+ unsigned long flags; -+ -+ PRINTF (DBG_DEVICE, DBG_FLUSH, "elan4_flushcq: cqnum=%d\n", elan4_cq2num(cq)); -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ while (! (dev->dev_flush_finished & (1 << flushqnum))) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ dev->dev_flush_finished &= ~(1 << flushqnum); -+ -+ dev->dev_flush_op[flushqnum].op_function = flushcq_intop; -+ dev->dev_flush_op[flushqnum].op_arg = (void *) (unsigned long) flushqnum; -+ -+ elan4_queue_intop (dev, flushq, &dev->dev_flush_op[flushqnum]); -+ -+ while (! (dev->dev_flush_finished & (1 << flushqnum))) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_updatecq (ELAN4_DEV *dev, ELAN4_CQ *cq, unsigned perm, unsigned restart) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint32 control = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ -+ /* Write the command queues control word, but ensure that the ChannelNotCompleted fields -+ * are not modified. We use this to just alter the RestartCount/Permissions fields */ -+ -+ elan4_sdram_writel (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control), -+ CQ_ControlValue (CQ_Context (control), restart ? restart : CQ_RestartCount (control), perm)); -+} -+ -+/* instruction cache flush */ -+static __inline__ void -+elan4_flush_icache_locked (ELAN4_DEV *dev) -+{ -+ int i, j; -+ -+ PRINTF0 (DBG_DEVICE, DBG_FLUSH, "elan4_flush_icache_locked: flushing icache\n"); -+ -+ for (i = 0; i < (E4_ICacheLines/E4_ICachePortSize); i++) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, i << E4_ICacheTagAddrShift); -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], E4_InvalidTagValue); -+ } -+ -+ /* -+ * Initialise the top of the ICache Set0 with a instruction which will -+ * cause a know trap fingerprint so that the application can identify it -+ * and ignore the trap. -+ */ -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_ICacheFixupOffset | E4_AccessICacheRams); -+ -+ /* Errata 24: must ensure that the DCache is flushed after loading -+ * code for the thread processor. */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ pioflush_reg (dev); -+} -+ -+static void -+device_iflush_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ elan4_flush_icache_locked (dev); -+ -+ dev->dev_iflush_queued = 0; -+ -+ kcondvar_wakeupall (&dev->dev_flush_wait, &dev->dev_flush_lock); -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_flush_icache_halted (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ elan4_flush_icache_locked (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_flush_icache (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ PRINTF1 (DBG_DEVICE, DBG_FLUSH, "elan4_flush_icache: queued=%d\n", dev->dev_iflush_queued); -+ -+ if (! dev->dev_iflush_queued) -+ { -+ dev->dev_iflush_queued = 1; -+ -+ elan4_queue_haltop (dev, &dev->dev_iflush_haltop); -+ } -+ -+ while (dev->dev_iflush_queued) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+/* device context operations */ -+static void -+device_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CPROC_TRAP *trap = &dev->dev_cproc_trap; -+ -+ elan4_extract_cproc_trap (dev, status, trap, cqnum); -+ -+ DBGCMD (DBG_DEVICE, DBG_FLUSH, elan4_display_cproc_trap (DBG_DEVICE, DBG_FLUSH, "device_cproc_trap", trap)); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcInterruptQueueOverflow: -+ PRINTF (ctxt, DBG_FLUSH, "device_cproc_trap: cqnum=%d\n", cqnum); -+ -+ /* XXXX: we could either just hit restart (and hope) - or we could extract -+ * the event interrupt cookie out and "complete" the command before -+ * restarting it */ -+ elan4_restartcq (dev, dev->dev_flush_cq[cqnum]); -+ return; -+ -+ case CommandProcDmaQueueOverflow: -+ case CommandProcPermissionTrap: -+ handle_dma_flushops (dev, status, cqnum); -+ return; -+ -+ default: -+ printk ("device_cproc_trap: status=%llx control=%llx TrapType=%x cqnum=%d\n", (long long) trap->tr_status, -+ elan4_sdram_readq (dev, dev->dev_cqaddr + cqnum * sizeof (E4_CommandQueueDesc) + -+ offsetof (E4_CommandQueueDesc, CQ_Control)), -+ (int) CPROC_TrapType(trap->tr_status), cqnum); -+ ELAN4_DEBUG_TRIGGER (ctxt, "elan4:device_cproc_trap\n"); -+ } -+} -+ -+static void -+device_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ ELAN4_TPROC_TRAP trap; -+ -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, &trap); -+ -+ elan4_display_tproc_trap (DBG_CONSOLE, DBG_TRAP, "device_tproc_trap", &trap); -+ ELAN4_DEBUG_TRIGGER (ctxt, "elan4:device_tproc_trap\n"); -+} -+ -+static void -+device_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ ELAN4_DPROC_TRAP trap; -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, &trap, unit); -+ -+ elan4_display_dproc_trap (DBG_CONSOLE, DBG_TRAP, "device_dproc_trap", &trap); -+ -+ ELAN4_DEBUG_TRIGGER (ctxt, "elan4:device_dproc_trap\n"); -+} -+ -+static void -+device_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) ctxt; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ PRINTF (ctxt, DBG_FLUSH, "device_interrupt: cookie=%llx\n", cookie); -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ list_for_each_safe (el, nel, &dev->dev_intop_list) { -+ ELAN4_INTOP *op = list_entry (el, ELAN4_INTOP, op_link); -+ -+ if (op->op_cookie == cookie) -+ { -+ if ((op->op_cookie & INTOP_TYPE_MASK) == INTOP_ONESHOT) -+ list_del (&op->op_link); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+ -+ (*op->op_function)(dev, op->op_arg); -+ return; -+ } -+ } -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+ -+ panic ("device_interrupt: interrupt cookie %llx not found\n", (long long)cookie); -+} -+ -+static void -+device_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_IPROC_TRAP *trap = &dev->dev_iproc_trap; -+ -+ elan4_extract_iproc_trap (dev, status, trap, unit); -+ elan4_inspect_iproc_trap (trap); -+ -+ DBGCMD (ctxt, DBG_IPROC, elan4_display_iproc_trap (ctxt, DBG_IPROC, "device_iproc_trap", trap)); -+ -+ if (elan4_neterr_iproc_trap (dev, trap)) -+ return; -+ -+ elan4_display_iproc_trap (DBG_CONSOLE, DBG_TRAP, "device_iproc_trap", trap); -+ panic ("device_iproc_trap: unexpected trap\n"); -+} -+ -+static void -+device_needs_shuffle (ELAN4_CTXT *ctxt, int tbl, int hashidx) -+{ -+ /* XXXX currently this doesnt need to do anything -+ as the chains have only 2 entries */ -+} -+ -+ELAN4_TRAP_OPS device_trap_ops = -+{ -+ NULL, -+ device_cproc_trap, -+ device_dproc_trap, -+ device_tproc_trap, -+ device_iproc_trap, -+ device_interrupt, -+ NULL, -+ device_needs_shuffle, -+}; -+ -+/* -+ * elan4_initialise_device -+ * initialise the ELAN4_DEV struct - spinlocks,cvs etc. -+ * map the registers, sdram etc -+ */ -+int -+elan4_initialise_device (ELAN4_DEV *dev) -+{ -+ int i, bit; -+ -+ if (elan4_mainint_resched_ticks == 0) -+ elan4_mainint_resched_ticks = (hz/4); -+ -+ /* map the registers */ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ dev->dev_regs = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVA_REG_OFFSET, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ -+ dev->dev_rom = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVA_EBUS_OFFSET + ELAN4_REVA_EBUS_ROM_OFFSET, -+ ELAN4_REVA_EBUS_ROM_SIZE, &dev->dev_rom_handle); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ dev->dev_regs = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVB_REG_OFFSET, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ dev->dev_rom = (ioaddr_t) 0; -+ dev->dev_i2c = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVB_I2C_OFFSET, ELAN4_REVB_I2C_SIZE, &dev->dev_i2c_handle); -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ /* XXXX: parse the ebus rom to determine the sdram configuration */ -+ { -+ extern long long sdram_cfg; -+ -+ if (sdram_cfg == 0) -+ dev->dev_sdram_cfg = SDRAM_STARTUP_VALUE; -+ else -+ dev->dev_sdram_cfg = sdram_cfg; -+ } -+ -+ for (bit = 0; ((1 << bit) & elan4_resource_len (dev, ELAN4_BAR_SDRAM)) == 0; bit++) -+ ; -+ -+ switch ((dev->dev_sdram_cfg >> SDRAM_RamSize_SH) & 3) -+ { -+ case 0: /* 64Mbit, 128Mbit, 256Mbit, 512Mbit or 1Gbit (16-bit output) */ -+ dev->dev_sdram_numbanks = 4; bit -= 2; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = (i << bit); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 1: /* 64Mbit, 128Mbit, 256Mbit or 512Mbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 4; bit -= 2; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = ((i & 2) << (bit)) | ((i & 1) << (bit-1)); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 2: /* 2Gbit (16-bit output) or 1Gbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 2; bit--; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = (i << bit); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 3: /* 4Gbit (16-bit output) or 2Gbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 1; -+ dev->dev_sdram_banks[0].b_base = 0; -+ dev->dev_sdram_banks[0].b_size = (1 << bit); -+ break; -+ } -+ -+ elan4_sdram_init (dev); -+ -+ /* initialise locks for classes of interrupts */ -+ spin_lock_init (&dev->dev_trap_lock); -+ spin_lock_init (&dev->dev_intop_lock); -+ spin_lock_init (&dev->dev_haltop_lock); -+ spin_lock_init (&dev->dev_mainint_lock); -+ -+ init_timer (&dev->dev_haltop_timer); -+ dev->dev_haltop_timer.function = dev_haltop_timer_func; -+ dev->dev_haltop_timer.data = (unsigned long) dev; -+ -+ /* initialise other locks */ -+ spin_lock_init (&dev->dev_i2c_lock); -+ -+ spin_lock_init (&dev->dev_mmulock); -+ spin_lock_init (&dev->dev_cqlock); -+ spin_lock_init (&dev->dev_ctxlock); -+ -+ spin_lock_init (&dev->dev_intmask_lock); -+ spin_lock_init (&dev->dev_syscontrol_lock); -+ -+ spin_lock_init (&dev->dev_ctxt_lock); -+ spin_lock_init (&dev->dev_flush_lock); -+ spin_lock_init (&dev->dev_requeue_lock); -+ -+ kmutex_init (&dev->dev_lock); -+ -+ kcondvar_init (&dev->dev_mainint_wait); -+ kcondvar_init (&dev->dev_flush_wait); -+ -+ /* initialsie lists */ -+ INIT_LIST_HEAD (&dev->dev_ctxt_list); -+ INIT_LIST_HEAD (&dev->dev_intop_list); -+ INIT_LIST_HEAD (&dev->dev_interruptq_list); -+ INIT_LIST_HEAD (&dev->dev_hc_list); -+ INIT_LIST_HEAD (&dev->dev_haltop_list); -+ INIT_LIST_HEAD (&dev->dev_dma_flushop[0].list); -+ INIT_LIST_HEAD (&dev->dev_dma_flushop[1].list); -+ -+ dev->dev_state = ELAN4_STATE_STOPPED; -+ -+ return (0); -+} -+ -+void -+elan4_finalise_device (ELAN4_DEV *dev) -+{ -+ kcondvar_destroy (&dev->dev_flush_wait); -+ kcondvar_destroy (&dev->dev_mainint_wait); -+ -+ kmutex_destroy (&dev->dev_lock); -+ -+ spin_lock_destroy (&dev->dev_requeue_lock); -+ spin_lock_destroy (&dev->dev_flush_lock); -+ spin_lock_destroy (&dev->dev_ctxt_lock); -+ -+ spin_lock_destroy (&dev->dev_syscontrol_lock); -+ spin_lock_destroy (&dev->dev_intmask_lock); -+ -+ spin_lock_destroy (&dev->dev_ctxlock); -+ spin_lock_destroy (&dev->dev_cqlock); -+ spin_lock_destroy (&dev->dev_mmulock); -+ -+ spin_lock_destroy (&dev->dev_i2c_lock); -+ -+ spin_lock_destroy (&dev->dev_mainint_lock); -+ spin_lock_destroy (&dev->dev_haltop_lock); -+ spin_lock_destroy (&dev->dev_intop_lock); -+ spin_lock_destroy (&dev->dev_trap_lock); -+ -+ del_timer_sync (&dev->dev_haltop_timer); -+ -+ while (! list_empty (&dev->dev_hc_list)) -+ { -+ ELAN4_HASH_CHUNK *hc = list_entry (dev->dev_hc_list.next, ELAN4_HASH_CHUNK, hc_link); -+ -+ list_del (&hc->hc_link); -+ -+ KMEM_FREE(hc, sizeof (ELAN4_HASH_CHUNK)); -+ } -+ -+ elan4_sdram_fini (dev); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ elan4_unmap_device (dev, dev->dev_rom, ELAN4_REVA_EBUS_ROM_SIZE, &dev->dev_rom_handle); -+ elan4_unmap_device (dev, dev->dev_regs, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ break; -+ case PCI_REVISION_ID_ELAN4_REVB: -+ elan4_unmap_device (dev, dev->dev_i2c, ELAN4_REVB_I2C_SIZE, &dev->dev_i2c_handle); -+ elan4_unmap_device (dev, dev->dev_regs, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ break; -+ } -+} -+ -+static int -+measure_sysclk (ELAN4_DEV *dev) -+{ -+ E4_uint64 val0, val1; -+ E4_uint32 ticks, ns; -+ -+ write_ureg64 (dev, StatCont, STP_SYS_CLOCK_RATE0); -+ -+ val0 = read_ureg64 (dev, StatCounts[0]); -+ udelay (1000); -+ val1 = read_ureg64 (dev, StatCounts[0]); -+ -+ -+ ticks = ((val1 >> 32) - (val0 >> 32)); -+ ns = ((val1 & 0xffffffff) - (val0 & 0xffffffff)); -+ -+ return (ticks / (ns / 1000)); -+} -+ -+static void -+initialise_cache (ELAN4_DEV *dev) -+{ -+ register int set, line; -+ -+ /* Initialise the cache to "map" the bottom of sdram - we will use -+ * this space for cache flushing, so require the cache to be set -+ * up so that cachelines for this are in the correct set. -+ * -+ * XXXX: for MPSAS we set bit 28, to ensure that any access to -+ * sdram causes the line to be filled first to expunge any -+ * Xs. */ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], (((E4_uint64) set) << 29) | (1 << 28) | (line << 16)); -+} -+ -+#ifndef CONFIG_MPSAS -+static void -+initialise_cache_tags (ELAN4_DEV *dev, unsigned addr) -+{ -+ register int set, line; -+ -+ /* Initialise the whole cache to hold sdram at "addr" as direct mapped */ -+ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], addr | (set << 13) | (1 << 11)); -+} -+ -+static void -+initialise_ecc (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ register int i, addr; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ { -+ initialise_cache_tags (dev, E4_CacheSize); -+ for (addr = 0; addr < bank->b_size; addr += E4_CacheSize) -+ { -+ for (i = 0; i < E4_CacheSize; i += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr | i, (void *)(bank->b_ioaddr + addr + i)); -+ initialise_cache_tags (dev, addr); -+ } -+ } -+ else -+ { -+ /* Write the whole of this bank of sdram. */ -+ for (addr = 0; addr < bank->b_size; addr += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr, (void *)(bank->b_ioaddr + addr)); -+ -+ /* Now flush out the top out of the cache */ -+ for (addr = 0; addr < E4_CacheSize; addr += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr, (void *)(bank->b_ioaddr + addr)); -+ -+ /* Now read the top value of sdram to guarantee the write has occured before the ecc is enabled */ -+ __elan4_readq (dev, bank->b_ioaddr + bank->b_size - sizeof (E4_uint64)); -+ } -+} -+#endif -+ -+#ifdef CONFIG_MPSAS -+static void -+do_initdma (ELAN4_DEV *dev) -+{ -+#define VIRTUAL_ADDRESS 0x10000000ull -+ ELAN4_CQ *cq = dev->dev_flush_cq[0]; -+ E4_uint64 value; -+ E4_uint32 intreg; -+ E4_uint64 status; -+ -+ PRINTF (DBG_DEVICE, DBG_CONFIG, "elan: performing initialising dma\n"); -+ -+ DISABLE_INT_MASK (dev, INT_Dma0Proc | INT_Dma1Proc); -+ -+ /* initialise the context filter */ -+ elan4_attach_filter (&dev->dev_ctxt, 0); -+ -+ /* now issue a DMA - we expect this to trap */ -+ writeq (E4_DMA_TYPE_SIZE (128*4, DMA_DataTypeByte, 0, 0) | RUN_DMA_CMD, cq->cq_mapping + (0 << 3)); -+ writeq (0, cq->cq_mapping + (1 << 3)); -+ writeq (0, cq->cq_mapping + (2 << 3)); -+ writeq (dev->dev_tproc_space, cq->cq_mapping + (3 << 3)); -+ writeq (dev->dev_tproc_space, cq->cq_mapping + (4 << 3)); -+ writeq (0, cq->cq_mapping + (5 << 3)); -+ writeq (0, cq->cq_mapping + (6 << 3)); -+ -+ /* spin waiting for it to trap - then restart the dma processor */ -+ do { -+ value = read_reg64 (dev, IntAndMaskReg); -+ intreg = (value >> E4_INTERRUPT_REG_SHIFT); -+ } while ((intreg & (INT_Dma0Proc | INT_Dma1Proc)) == 0); -+ -+ /* check it trapped for the right reason */ -+ status = (intreg & INT_Dma0Proc) ? read_reg64 (dev, DProc0Status) : read_reg64 (dev, DProc1Status); -+ -+ if (DPROC_PrefetcherFault (status) || (DPROC_TrapType(status) != DmaProcFailCountError && DPROC_TrapType(status) != DmaProcPacketAckError)) -+ { -+ printk ("elan: bad dma trap, status = %lx\n", (long)status); -+ panic ("elan: bad dma trap\n"); -+ } -+ -+ PULSE_SCHED_RESTART (dev, SCH_RestartDma0Proc | SCH_RestartDma1Proc | SCH_RestartDmaPrefetchProc); -+ -+ elan4_detach _filter (&dev->dev_ctxt, 0); -+ -+ ENABLE_INT_MASK (dev, INT_Dma0Proc | INT_Dma1Proc); -+#undef VIRTUAL_ADDRESS -+} -+#endif -+ -+static int -+ebus_read_vpd (ELAN4_DEV *dev, unsigned char *data, unsigned int nob) -+{ -+ unsigned int pci_data_ptr; -+ unsigned int vpd_ptr; -+ register int i; -+ -+ if (read_ebus_rom (dev, 0) != 0x55 || read_ebus_rom (dev, 1) != 0xaa) -+ { -+ printk ("elan%d: invalid rom signature in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ pci_data_ptr = (read_ebus_rom (dev, 0x19) << 8) | read_ebus_rom (dev, 0x18); -+ -+ /* check the pci data structure */ -+ if (read_ebus_rom (dev, pci_data_ptr + 0) != 'P' || -+ read_ebus_rom (dev, pci_data_ptr + 1) != 'C' || -+ read_ebus_rom (dev, pci_data_ptr + 2) != 'I' || -+ read_ebus_rom (dev, pci_data_ptr + 3) != 'R') -+ { -+ printk ("elan%d: invalid pci data structure in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ /* extract the VPD pointer */ -+ vpd_ptr = (read_ebus_rom (dev, pci_data_ptr + 9) << 8) | read_ebus_rom (dev, pci_data_ptr + 8); -+ -+ if (vpd_ptr == 0) -+ { -+ printk ("elan%d: no vital product data in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ /* read the vpd data */ -+ for (i = 0; i < nob; i++) -+ data[i] = read_ebus_rom (dev, vpd_ptr + i); -+ -+ return 0; -+} -+ -+int -+elan4_read_vpd (ELAN4_DEV *dev, unsigned char *tag, unsigned char *result) -+{ -+ unsigned char vpd[I2C_ELAN_EEPROM_VPD_SIZE]; -+ unsigned char *ptr = vpd; -+ unsigned int finished = 0; -+ unsigned char *lim; -+ unsigned char name[3]; -+ unsigned char value[256]; -+ unsigned char type; -+ unsigned int len, len2; -+ register int i; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ { -+ if (ebus_read_vpd (dev, vpd, I2C_ELAN_EEPROM_VPD_SIZE) < 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unable to read serial number from EBUS rom\n", dev->dev_instance); -+ return -EINVAL ; -+ } -+ } -+ else -+ { -+ if (i2c_read_rom (dev, I2C_ELAN_EEPROM_VPD_BASEADDR, I2C_ELAN_EEPROM_VPD_SIZE, vpd) < 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unable to read serial number from I2C rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ } -+ -+ result[0] = 0; -+ while (! finished) -+ { -+ type = *ptr++; -+ -+ if (type & LARGE_RESOURCE_BIT) -+ { -+ len = *(ptr++); -+ len += *(ptr++) << 8; -+ -+ switch (type & ~LARGE_RESOURCE_BIT) -+ { -+ case LARGE_RESOURCE_STRING: -+ case LARGE_RESOURCE_VENDOR_DEFINED: -+ ptr += len; -+ break; -+ -+ case LARGE_RESOURCE_VITAL_PRODUCT_DATA: -+ for (lim = ptr + len; ptr < lim; ) -+ { -+ name[0] = *ptr++; -+ name[1] = *ptr++; -+ name[2] = '\0'; -+ len2 = *ptr++; -+ -+ for (i = 0; i < len2 && ptr < lim; i++) -+ value[i] = *ptr++; -+ value[i] = '\0'; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, %s: $s\n", dev->dev_instance, name, value); -+ -+ if (tag != NULL) -+ { /* looking for just one tag */ -+ if (!strcmp (name, tag)) -+ strcpy(result, value); -+ } -+ else -+ { /* get all tags */ -+ strcat(result,name); -+ strcat(result,": "); -+ strcat(result,value); -+ strcat(result,"\n"); -+ } -+ } -+ break; -+ -+ default: -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unknown large resource %x\n", dev->dev_instance, type); -+ finished = 1; -+ break; -+ } -+ } -+ else -+ { -+ len = type & 0x7; -+ -+ switch (type >> 3) -+ { -+ case SMALL_RESOURCE_COMPATIBLE_DEVICE_ID: -+ ptr += len; -+ break; -+ -+ case SMALL_RESOURCE_VENDOR_DEFINED: -+ ptr += len; -+ break; -+ -+ case SMALL_RESOURCE_END_TAG: -+ finished = 1; -+ break; -+ -+ default: -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unknown small resource %x\n", dev->dev_instance, type >> 3); -+ finished = 1; -+ break; -+ } -+ } -+ } -+ -+ if ( result[0] == 0 ) { -+ if ( tag != 0 ) -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, failed to find tag %s\n", dev->dev_instance, tag); -+ else -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, failed to find any tags\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ return (0); -+} -+ -+int -+elan4_start_device (ELAN4_DEV *dev) -+{ -+ E4_VirtualProcessEntry entry; -+ unsigned pagesizeval[2]; -+ unsigned hashsizeval[2]; -+ register int i, j, tbl, res; -+ unsigned attempts = 0; -+ E4_PAGE_SIZE_TABLE; -+ unsigned char serial[256]; -+ unsigned int sdram_factor = SDRAM_166_DLL_CORRECTION_FACTOR; -+ -+ PRINTF (DBG_DEVICE, DBG_ALL, "elan4_start_device: entered\n"); -+ -+ dev->dev_state = ELAN4_STATE_STARTING; -+ -+ tryagain: -+ /* Initialise the pci config space */ -+ if ((res = elan4_pciinit (dev)) < 0) -+ return (res); -+ -+ /* Display the serial number */ -+ if (elan4_read_vpd (dev, "SN", serial)) -+ printk("elan%d: SN: failed to read\n", dev->dev_instance); -+ else -+ printk("elan%d: SN: %s\n", dev->dev_instance, serial); -+ -+ /* initialise the interrupt mask to zero */ -+ SET_INT_MASK (dev, 0); -+ -+ /* Initialise the device registers */ -+ write_reg64 (dev, TlbLineValue, 0); -+ write_reg64 (dev, SysControlReg, 0); -+ -+ /* Initialise the SDRAM using the configuration value from the ROM */ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg | SDRAM_SETUP); -+ -+ /* Setup the linkport registers */ -+ write_reg64 (dev, LinkPortLock, elan4_linkport_lock); -+ -+ /* Setup the tick rates, start the clock, and init the stats registers */ -+ write_ureg32 (dev, ClockTickRate.s.TickRates, ELAN4_CLOCK_TICK_RATE); -+ write_ureg64 (dev, Clock, 0); -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ for (i = 0; i < 8; i++) -+ write_ureg32 (dev, StatCounts[i].s.StatsCount, 0); -+ -+ /* Initialise the Link Control register - disable the TLB prefetcher on RevB -+ * as it can cause very occasional data corruption. */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ write_reg32 (dev, LinkControlReg, LCONT_EN_SYS_READS | LCONT_REVB_DISABLE_TLB_PREFETCH); -+ else -+ write_reg32 (dev, LinkControlReg, LCONT_EN_SYS_READS); -+ -+ /* Initialise the Link Control Settings to set the PLL Reference Value */ -+ write_reg32 (dev, LinkContSettings, -+ (elan4_mod45disable ? LCONT_MOD45_DISABLE : 0) | -+ (3 << LCONT_CONFIG_PHASE_SHIFT) | -+ ((elan4_pll_div & LCONT_PLL_REF_VAL_BITS_MASK) << LCONT_PLL_REF_VAL_BITS_SHIFT) | -+ (LCONT_VOD_360 << LCONT_LVDS_VOLTAGE_BITS_SHIFT) | -+ (LCONT_TERM_AUTO_OHM << LCONT_LVDS_TERMINATION_SHIFT)); -+ -+ /* Clear the link error LED on RevB and above */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA) -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_ClearLinkError); -+ -+ /* Compute the SysClk frequency and update the PLL if necessary */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA) -+ { -+ int mhz = measure_sysclk (dev); -+ -+ if (elan4_pll_cfg != 0 || mhz > 190 || mhz < 170) -+ printk ("elan%d: SysClk running at %d Mhz\n", dev->dev_instance, measure_sysclk (dev)); -+ else -+ { -+ sdram_factor = SDRAM_150_DLL_CORRECTION_FACTOR; -+ -+ elan4_updatepll (dev, ECTRL_SYS_CLOCK_RATIO_4_3); -+ -+ printk ("elan%d: SysClk now running at %d Mhz\n", dev->dev_instance, measure_sysclk (dev)); -+ } -+ } -+ -+ initialise_cache (dev); -+ -+ /* Initialise the MMU hash table parameters */ -+ /* Select the largest elan pagesize which is spanned by the -+ * system pagesize for mmu table 0*/ -+ for (i = 0; i < E4_PAGE_SIZE_TABLE_SIZE; i++) -+ if (PageSizeTable[i] > PAGE_SHIFT) -+ break; -+ -+ pagesizeval[0] = i - 1; -+ hashsizeval[0] = elan4_hash_0_size_val; -+ -+ /* Select a suitable elan pagesize to match any "large" page -+ * support that the OS provides. */ -+ pagesizeval[1] = PAGE_SIZE_4M; -+ hashsizeval[1] = elan4_hash_1_size_val; -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ dev->dev_pagesizeval[tbl] = pagesizeval[tbl]; -+ dev->dev_pageshift[tbl] = PageSizeTable[pagesizeval[tbl]]; -+ dev->dev_hashsize[tbl] = (1 << hashsizeval[tbl])/sizeof (E4_HashTableEntry); -+ dev->dev_rsvd_hashmask[tbl] = ((1 << (27 - dev->dev_pageshift[tbl]))-1) & ~((1 << hashsizeval[tbl])-1); -+ dev->dev_rsvd_hashval[tbl] = 0xFFFFFFFF; -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: pageshifts %d,%d\n", dev->dev_pageshift[0], -+ NUM_HASH_TABLES == 2 ? dev->dev_pageshift[1] : 0); -+ -+ /* Initialise the control register to the desired value */ -+ dev->dev_syscontrol = (CONT_EN_ALL_SETS | CONT_MMU_ENABLE | CONT_CACHE_ALL | CONT_2K_NOT_1K_DMA_PACKETS | -+ (pagesizeval[0] << CONT_TABLE0_PAGE_SIZE_SHIFT) | (hashsizeval[0] << CONT_TABLE0_MASK_SIZE_SHIFT)); -+ -+ if (NUM_HASH_TABLES == 2) -+ dev->dev_syscontrol |= CONT_TWO_HASH_TABLES | (pagesizeval[1] << CONT_TABLE1_PAGE_SIZE_SHIFT) | (hashsizeval[1] << CONT_TABLE1_MASK_SIZE_SHIFT); -+ -+ write_reg64 (dev, SysControlReg, dev->dev_syscontrol); -+ -+ /* use direct mapped pci writes during sdram initialisation, since for -+ * cache flushing to work, we need to ensure that the cacheflush page -+ * never gets lines into the incorrect cache set. */ -+ SET_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ elan4_sdram_setup_delay_lines(dev, sdram_factor); -+ -+ for (i = res = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_size) -+ res |= elan4_sdram_init_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ if (! res) -+ { -+ if (dev->dev_devinfo.dev_device_id == PCI_REVISION_ID_ELAN4_REVB && ++attempts < 5) -+ { -+ printk ("elan%d: sdram not working, resetting\n", dev->dev_instance); -+ goto tryagain; -+ } -+ -+ printk ("elan%d: could not find any sdram banks\n", dev->dev_instance); -+ goto failed; -+ } -+ -+#ifndef CONFIG_MPSAS -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: initialising for ECC\n"); -+ -+ for (i = 0 ; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ initialise_ecc (dev, &dev->dev_sdram_banks[i]); -+#endif -+ -+ dev->dev_sdram_initial_ecc_val = read_reg64 (dev, SDRamECCStatus); -+ -+ /* Now enable ECC after we've scrubbed the memory */ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg | SDRAM_ENABLE_ECC); -+ -+ /* clear any errors, and flush the tlb/route cache */ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH | CONT_ROUTE_FLUSH | CONT_CLEAR_LINKPORT_INT | CONT_CLEAR_SDRAM_ERROR); -+ -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ -+ /* Initialise the thread processor's register file */ -+ for (i = 0; i < 64; i++) -+ write_reg64 (dev, TProcRegs[i], 0); -+ -+ /* Initialise the thread processor's ICache tags */ -+ for (i = 0; i < (E4_ICacheLines/E4_ICachePortSize); i++) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, i << E4_ICacheTagAddrShift); -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], E4_InvalidTagValue); -+ } -+ -+ /* -+ * Initialise the ICache with a sethi %hi(addr << 7), %r0 -+ * writing 8 64 bit values per loop of sethi %g0 values ending in 77 for something different?? -+ */ -+ for (i = 0; i < E4_ICacheSizeInBytes; i += (E4_ICachePortSize << 3)) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_AccessICacheRams | (i >> 3)); -+ -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], -+ (E4_uint64) (((E4_uint64)i << (4+7)) + ((E4_uint64)j << (1+7)) + (0x077)) | -+ (E4_uint64) (((E4_uint64)i << (4+7+32)) + ((E4_uint64)j << (1+7+32)) + (0x0e7)) << 32); -+ } -+ -+ /* -+ * Initialise the top of the ICache Set0 with a instruction which will -+ * cause a know trap fingerprint so that the application can identify it -+ * and ignore the trap. -+ */ -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_ICacheFixupOffset | E4_AccessICacheRams); -+ for (i = 0; i < E4_ICachePortSize; i++) -+ write_reg64 (dev, ICachePort[i], E4_ICacheFixupInsn | (E4_ICacheFixupInsn << 32)); -+ -+ /* create the buddy allocator for SDRAM */ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ elan4_sdram_add_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ dev->dev_ctxtableshift = elan4_ctxt_table_shift; -+ dev->dev_cqcount = (1 << elan4_ln2_max_cqs); -+ dev->dev_cqreorder = 0; -+ -+ /* allocate the sdram for cache flushing whilst still in direct mapped mode */ -+ dev->dev_cacheflush_space = elan4_sdram_alloc (dev, E4_CacheSize); -+ -+ /* and longer need direct mapped pci writes */ -+ CLEAR_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ /* allocate the hash tables, command queues, context tables etc */ -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: allocating hash tables, command queueus, context tables\n"); -+ -+ dev->dev_comqlowpri = elan4_sdram_alloc (dev, (1 << COMMAND_RUN_QUEUE_BITS)); -+ dev->dev_comqhighpri = elan4_sdram_alloc (dev, (1 << COMMAND_RUN_QUEUE_BITS)); -+ dev->dev_cqaddr = elan4_sdram_alloc (dev, sizeof (E4_CommandQueueDesc) * dev->dev_cqcount); -+ dev->dev_dmaqhighpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_dmaq_highpri_size)); -+ dev->dev_dmaqlowpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_dmaq_lowpri_size)); -+ dev->dev_threadqhighpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_threadq_highpri_size)); -+ dev->dev_threadqlowpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_threadq_lowpri_size)); -+ dev->dev_interruptq = elan4_sdram_alloc (dev, E4_QueueSize(elan4_interruptq_size)); -+ -+ dev->dev_ctxtable = elan4_sdram_alloc (dev, (1 << dev->dev_ctxtableshift) * sizeof (E4_ContextControlBlock)); -+ dev->dev_faultarea = elan4_sdram_alloc (dev, CUN_Entries * sizeof (E4_FaultSave)); -+ dev->dev_inputtraparea = elan4_sdram_alloc (dev, sizeof (E4_IprocTrapState)); -+ -+ dev->dev_sdrampages[0] = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ dev->dev_sdrampages[1] = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ dev->dev_hashtable[tbl] = elan4_sdram_alloc (dev, dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+#ifndef CONFIG_MPSAS -+ /* Initialise hash tables to invalid (zero) */ -+ elan4_sdram_zeroq_sdram (dev, dev->dev_hashtable[tbl], dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+#endif -+ } -+ -+ /* Initialise all context filters to discard */ -+#ifdef CONFIG_MPSAS -+ if (sas_memset_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, dev->dev_ctxtable, -+ E4_FILTER_DISCARD_ALL, (1 << (dev->dev_ctxtableshift-1))) < 0) -+ { -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i++) -+ elan4_write_filter (dev, i, E4_FILTER_DISCARD_ALL); -+ } -+#else -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i++) -+ elan4_write_filter (dev, i, E4_FILTER_DISCARD_ALL); -+#endif -+ -+ PRINTF4 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: hashtables %x,%x, %x,%x\n", dev->dev_hashtable[0], -+ dev->dev_hashsize[0], dev->dev_hashtable[1], dev->dev_hashsize[1]); -+ -+ /* install the hash table pointers */ -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: initialise registers with table addresses\n"); -+ write_reg64 (dev, MmuTableBasePtrs, (((E4_uint64) dev->dev_hashtable[0]) | ((E4_uint64) dev->dev_hashtable[1]) << 32)); -+ write_reg64 (dev, MmuFaultAndRootCntxPtr, (((E4_uint64) dev->dev_ctxtableshift) | -+ ((E4_uint64) dev->dev_ctxtable) | -+ ((E4_uint64) dev->dev_faultarea) << 32)); -+ write_reg64 (dev, InputTrapAndFilter, (((E4_uint64) dev->dev_ctxtableshift) | -+ ((E4_uint64) dev->dev_ctxtable) | -+ ((E4_uint64) dev->dev_inputtraparea) << 32)); -+ /* -+ * The run ptrs have this format: (Front << 32) | Back -+ * The base for both the front and back is uses the high bits of the back pointer. -+ * So writting just the base value is good enough. -+ */ -+ write_reg64 (dev, CommandLowPriRunPtrs, dev->dev_comqlowpri); -+ write_reg64 (dev, CommandHighPriRunPtrs, dev->dev_comqhighpri); -+ -+ /* Initialise the run queues */ -+ write_reg64 (dev, DProcHighPriPtrs, E4_QueueValue (dev->dev_dmaqhighpri, elan4_dmaq_highpri_size)); -+ write_reg64 (dev, DProcLowPriPtrs, E4_QueueValue (dev->dev_dmaqlowpri, elan4_dmaq_lowpri_size)); -+ write_reg64 (dev, TProcHighPriPtrs, E4_QueueValue (dev->dev_threadqhighpri, elan4_threadq_highpri_size)); -+ write_reg64 (dev, TProcLowPriPtrs, E4_QueueValue (dev->dev_threadqlowpri, elan4_threadq_lowpri_size)); -+ -+ /* Initialise the interrupt queue as "empty" - this is actually with one entry on it */ -+ write_reg64 (dev, MainIntQueuePtrs.Value, (((E4_uint64) E4_QueueFrontValue (dev->dev_interruptq, elan4_interruptq_size) << 32) | -+ ((E4_uint64) E4_QueueBackPointer(dev->dev_interruptq + E4_MainIntEntrySize)))); -+ -+ dev->dev_interruptq_nfptr = dev->dev_interruptq + E4_MainIntEntrySize; -+ -+ /* -+ * Flush the context filter before dropping the Discard all bits in the schedule status register. -+ * Also hit the SCH_RestartTProc to clear out X's from the trap state and -+ * hit the SCH_RestartDmaPrefetchProc to clear out X's from the prev register. -+ */ -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush | SCH_RestartTProc | SCH_RestartDmaPrefetchProc); -+ -+ /* setup the schedule status register. */ -+ SET_SCHED_STATUS (dev, SCH_CProcTimeout6p2us | SCH_DProcTimeslice512us); -+ -+ /* -+ * Now initialise the inserter cache.s -+ * Bit 31 of the first word of the descriptor is a valid bit. This must be cleared. -+ * Bit 31 becomes a used bit in the descriptors in memory. -+ */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ { -+ write_reg32 (dev, CommandControl.CommandQueueDescsBase, i); /* select a cache line */ -+ write_reg64 (dev, CommandCacheTestPort, 0); /* Mark it invalid */ -+ } -+ -+ /* Setup the pointer to the command descriptors */ -+ /* the table must be aligned on a CQ_CommandDescsAlignement boundary */ -+ /* since we've allocated a small table - we work out the offset of the */ -+ /* first entry in our table for mapping in the command ports later */ -+ dev->dev_cqoffset = (dev->dev_cqaddr & (CQ_CommandDescsAlignment-1)) / sizeof (E4_CommandQueueDesc); -+ -+ write_reg32 (dev, CommandControl.CommandQueueDescsBase, (dev->dev_cqaddr & ~(CQ_CommandDescsAlignment-1)) | COM_ENABLE_DEQUEUE); -+ -+ /* allocate the bitmaps for cq,ctxt allocation */ -+ KMEM_ZALLOC (dev->dev_cqamap, bitmap_t *, BT_BITOUL(dev->dev_cqcount/ELAN4_CQ_PER_CQA) * sizeof (bitmap_t), 1); -+ KMEM_ZALLOC (dev->dev_ctxmap, bitmap_t *, BT_BITOUL(1 << dev->dev_ctxtableshift) * sizeof (bitmap_t), 1); -+ -+ if (dev->dev_cqamap == NULL || dev->dev_ctxmap == NULL) -+ goto failed; -+ -+ /* Make every fourth context be invalid for ICache fixup. -+ * context 0 is also invalid - since it is used to indicate -+ * an invalid tag. */ -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i += 4) -+ BT_SET (dev->dev_ctxmap, i); -+ -+ /* initialise the halt operations */ -+ dev->dev_haltop_mask = 0; -+ dev->dev_haltop_active = 0; -+ -+ /* allocate the hash table shadow structures - and place all blocks on the free lists */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ KMEM_ZALLOC (dev->dev_mmuhash[tbl], ELAN4_HASH_ENTRY *, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY), 1); -+ -+ if (dev->dev_mmuhash[tbl] == NULL) -+ goto failed; -+ -+ for (i = 0; i < dev->dev_hashsize[tbl]; i++) -+ dev->dev_mmuhash[tbl][i].he_entry = dev->dev_hashtable[tbl] + (i * sizeof (E4_HashTableEntry)); -+ } -+ -+ /* setup the interrupt mask register */ -+ SET_INT_MASK (dev, (INT_MSI0 | INT_MSI1 | INT_MSI2 | INT_MSI3) & ~(INT_Discarding | INT_Halted | INT_LinkPortKeyFail)); -+ -+ /* start a thread to handle excessive main interrupts */ -+ if (kernel_thread_create (elan4_mainint_thread, (caddr_t) dev) == NULL) -+ goto failed; -+ dev->dev_mainint_started = 1; -+ -+ /* install the device context - and allocate the first 16 command queues */ -+ if (elan4_insertctxt (dev, &dev->dev_ctxt, &device_trap_ops) != 0) -+ goto failed; -+ -+ /* Allocate command queues, one for each entry in the inserter cache, -+ * we'll use these queues to flush the insert cache */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ { -+ if ((dev->dev_flush_cq[i] = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit | CQ_InterruptEnableBit, -+ CQ_Priority)) == NULL) -+ goto failed; -+ -+ ASSERT (elan4_cq2num(dev->dev_flush_cq[i]) == i); -+ -+ dev->dev_flush_finished |= (1 << i); -+ } -+ -+ /* Allocate command queues for dma halt operations */ -+ if ((dev->dev_dma_flushop[0].cq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit|CQ_WaitEventEnableBit, 0)) == NULL || -+ (dev->dev_dma_flushop[1].cq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit|CQ_WaitEventEnableBit, CQ_Priority)) == NULL) -+ goto failed; -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+#endif -+ -+ /* initialise halt operation for flushing the icache */ -+ dev->dev_iflush_haltop.op_function = device_iflush_haltop; -+ dev->dev_iflush_haltop.op_arg = dev; -+ dev->dev_iflush_haltop.op_mask = INT_TProcHalted; -+ -+ /* Allocate a route table, and create a valid route for vp==0, this is used -+ * when a DMA is removed from the dma run queue */ -+ if ((dev->dev_routetable = elan4_alloc_routetable (dev, 0)) == NULL) -+ goto failed; -+ -+ elan4_set_routetable (&dev->dev_ctxt, dev->dev_routetable); -+ -+ entry.Values[0] = FIRST_MYLINK; -+ entry.Values[1] = 0; -+ -+ elan4_write_route (dev, dev->dev_routetable, 0, &entry); -+ -+ /* map the sdram pages into the elan */ -+ dev->dev_tproc_suspend = DEVICE_TPROC_SUSPEND_ADDR; -+ dev->dev_tproc_space = DEVICE_TPROC_SPACE_ADDR; -+ -+ -+ elan4mmu_pteload (&dev->dev_ctxt, 0, dev->dev_tproc_suspend, HE_TYPE_SDRAM, (dev->dev_sdrampages[0] >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocExecute)); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, dev->dev_tproc_space, HE_TYPE_SDRAM, (dev->dev_sdrampages[1] >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocEventWrite)); -+ -+ /* and store the thread suspend sequence in it for use when a thread is removed from the run queue */ -+ elan4_sdram_writel (dev, dev->dev_sdrampages[0], DEVICE_TPROC_SUSPEND_INSTR); -+ -+ /* and initialise the dma flush event in sdrampage[1] */ -+ elan4_sdram_writeq (dev, dev->dev_sdrampages[1] + 64, E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+#ifdef CONFIG_MPSAS -+ do_initdma (dev); -+#endif -+ -+ if (!elan4_neterr_init (dev)) -+ goto failed; -+ -+ elan4_configure_writecombining (dev); -+ -+ /* finally register the device with elanmod for rms */ -+ dev->dev_idx = elan_dev_register (&dev->dev_devinfo, &elan4_dev_ops, (void *) dev); -+ -+ dev->dev_state = ELAN4_STATE_STARTED; -+ -+ return (0); -+ -+ failed: -+ printk ("elan%d: failed to start elan4 device - stopping\n", dev->dev_instance); -+ -+ elan4_stop_device (dev); -+ return (-ENOMEM); -+} -+ -+void -+elan4_stop_device (ELAN4_DEV *dev) -+{ -+ unsigned long flags; -+ int i, tbl; -+ -+ dev->dev_state = ELAN4_STATE_STOPPING; -+ -+ elan_dev_deregister (&dev->dev_devinfo); -+ -+ elan4_unconfigure_writecombining (dev); -+ -+ elan4_neterr_destroy (dev); -+ -+ if (dev->dev_tproc_suspend) -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, dev->dev_tproc_suspend, 1 << dev->dev_pageshift[0]); -+ -+ if (dev->dev_tproc_space) -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, dev->dev_tproc_space, 1 << dev->dev_pageshift[0]); -+ -+ if (dev->dev_routetable) -+ { -+ elan4_set_routetable (&dev->dev_ctxt, NULL); -+ elan4_free_routetable (dev, dev->dev_routetable); -+ } -+ -+ for (i = 0; i < 2; i++) -+ if (dev->dev_dma_flushop[i].cq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_dma_flushop[i].cq); -+ -+ /* free of the device context - and insert cache flushing command queues */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ if (dev->dev_flush_cq[i]) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_flush_cq[i]); -+ -+ if (dev->dev_ctxt.ctxt_dev) -+ elan4_removectxt (dev, &dev->dev_ctxt); -+ -+ /* stop the mainint thread */ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ dev->dev_stop_threads = 1; -+ -+ while (dev->dev_mainint_started && !dev->dev_mainint_stopped) -+ { -+ kcondvar_wakeupall (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ kcondvar_wait (&dev->dev_mainint_wait, &dev->dev_mainint_lock, &flags); -+ } -+ dev->dev_mainint_started = dev->dev_mainint_stopped = 0; -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ /* cancel any error interrupt timeouts */ -+ if (timer_fn_queued (&dev->dev_error_timeoutid)) -+ cancel_timer_fn (&dev->dev_error_timeoutid); -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && timer_fn_queued (&dev->dev_linkerr_timeoutid)) -+ cancel_timer_fn (&dev->dev_linkerr_timeoutid); -+ -+ /* reset the interrupt mask register to zero */ -+ if (dev->dev_regs) -+ SET_INT_MASK (dev, 0); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ if (dev->dev_mmuhash[tbl]) -+ KMEM_FREE (dev->dev_mmuhash[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY)); -+ if (dev->dev_hashtable[tbl]) -+ elan4_sdram_free (dev, dev->dev_hashtable[tbl], dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+ } -+ -+ if (dev->dev_cqamap) -+ KMEM_FREE (dev->dev_cqamap, BT_BITOUL (dev->dev_cqcount/ELAN4_CQ_PER_CQA) * sizeof (bitmap_t)); -+ if (dev->dev_ctxmap) -+ KMEM_FREE (dev->dev_ctxmap, BT_BITOUL(1 << dev->dev_ctxtableshift) * sizeof (bitmap_t)); -+ -+ if (dev->dev_comqlowpri) -+ elan4_sdram_free (dev, dev->dev_comqlowpri, (1 << COMMAND_RUN_QUEUE_BITS)); -+ if (dev->dev_comqhighpri) -+ elan4_sdram_free (dev, dev->dev_comqhighpri, (1 << COMMAND_RUN_QUEUE_BITS)); -+ if (dev->dev_cqaddr) -+ elan4_sdram_free (dev, dev->dev_cqaddr, sizeof (E4_CommandQueueDesc) * dev->dev_cqcount); -+ if (dev->dev_dmaqhighpri) -+ elan4_sdram_free (dev, dev->dev_dmaqhighpri, E4_QueueSize(elan4_dmaq_highpri_size)); -+ if (dev->dev_dmaqlowpri) -+ elan4_sdram_free (dev, dev->dev_dmaqlowpri, E4_QueueSize(elan4_dmaq_lowpri_size)); -+ if (dev->dev_threadqhighpri) -+ elan4_sdram_free (dev, dev->dev_threadqhighpri, E4_QueueSize(elan4_threadq_highpri_size)); -+ if (dev->dev_threadqlowpri) -+ elan4_sdram_free (dev, dev->dev_threadqlowpri, E4_QueueSize(elan4_threadq_lowpri_size)); -+ if (dev->dev_interruptq) -+ elan4_sdram_free (dev, dev->dev_interruptq, E4_QueueSize(elan4_interruptq_size)); -+ -+ if (dev->dev_ctxtable) -+ elan4_sdram_free (dev, dev->dev_ctxtable, (1 << dev->dev_ctxtableshift) * sizeof (E4_ContextControlBlock)); -+ if (dev->dev_faultarea) -+ elan4_sdram_free (dev, dev->dev_faultarea, CUN_Entries * sizeof (E4_FaultSave)); -+ if (dev->dev_inputtraparea) -+ elan4_sdram_free (dev, dev->dev_inputtraparea, sizeof (E4_IprocTrapState)); -+ -+ if (dev->dev_sdrampages[0]) -+ elan4_sdram_free (dev, dev->dev_sdrampages[0], SDRAM_PAGE_SIZE); -+ if (dev->dev_sdrampages[1]) -+ elan4_sdram_free (dev, dev->dev_sdrampages[1], SDRAM_PAGE_SIZE); -+ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ elan4_sdram_fini_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ elan4_pcifini (dev); -+ -+ dev->dev_state = ELAN4_STATE_STOPPED; -+ -+ if (dev->dev_ack_errors) -+ kfree(dev->dev_ack_errors); -+ if (dev->dev_dproc_timeout) -+ kfree(dev->dev_dproc_timeout); -+ if (dev->dev_cproc_timeout) -+ kfree(dev->dev_cproc_timeout); -+} -+ -+static __inline__ int -+compute_arity (int lvl, unsigned n, char *arity) -+{ -+ if (arity[lvl] == 0) -+ { -+ if (n <= 8) -+ arity[lvl] = n; -+ else -+ arity[lvl] = 4; -+ } -+ -+ return (arity[lvl]); -+} -+ -+int -+elan4_compute_position (ELAN_POSITION *pos, unsigned nodeid, unsigned numnodes, unsigned arityval) -+{ -+ int i, lvl, n; -+ char arity[ELAN_MAX_LEVELS]; -+ -+ if (nodeid >= numnodes) -+ return -EINVAL; -+ -+ for (i = 0; i < ELAN_MAX_LEVELS; i++, arityval >>= 4) -+ arity[i] = arityval & 7; -+ -+ for (lvl = 0, n = numnodes; n > compute_arity(lvl, n, arity) && lvl < ELAN_MAX_LEVELS; lvl++) -+ { -+ if ((n % arity[lvl]) != 0) -+ return -EINVAL; -+ -+ n /= arity[lvl]; -+ } -+ -+ if (arity[lvl] != n) -+ return -EINVAL; -+ -+ for (i = 0; i <= lvl; i++) -+ pos->pos_arity[i] = arity[lvl - i]; -+ -+ pos->pos_nodes = numnodes; -+ pos->pos_levels = lvl + 1; -+ pos->pos_nodeid = nodeid; -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ -+ return 0; -+} -+ -+int -+elan4_get_position (ELAN4_DEV *dev, ELAN_POSITION *pos) -+{ -+ kmutex_lock (&dev->dev_lock); -+ *pos = dev->dev_position; -+ kmutex_unlock (&dev->dev_lock); -+ -+ return (pos->pos_mode); -+} -+ -+int -+elan4_set_position (ELAN4_DEV *dev, ELAN_POSITION *pos) -+{ -+ int forceLocal = 0; -+ int nnodes, i; -+ unsigned int *ack_errors; -+ unsigned int *dproc_timeout; -+ unsigned int *cproc_timeout; -+ -+ switch (pos->pos_mode) -+ { -+ case ELAN_POS_UNKNOWN: -+ break; -+ -+ case ELAN_POS_MODE_SWITCHED: -+ if (pos->pos_levels > ELAN_MAX_LEVELS) -+ return (-EINVAL); -+ -+ for (i = 0, nnodes = 1; i < pos->pos_levels; i++) -+ { -+ -+ if (pos->pos_arity[i] <= 0 || (i == 0 ? pos->pos_arity[i] > 8 : pos->pos_arity[i] >= 8)) /* allow an 8 way top-switch */ -+ return (-EINVAL); -+ -+ nnodes *= pos->pos_arity[i]; -+ } -+ -+ if (pos->pos_nodes > nnodes || pos->pos_nodeid >= pos->pos_nodes) -+ return (-EINVAL); -+ break; -+ -+ case ELAN_POS_MODE_LOOPBACK: -+ if (pos->pos_levels != 1 || pos->pos_nodes != 1 || pos->pos_nodeid != 0 || pos->pos_arity[0] != 1) -+ return (-EINVAL); -+ -+ forceLocal = 1; -+ break; -+ -+ case ELAN_POS_MODE_BACKTOBACK: -+ if (pos->pos_levels != 1 || pos->pos_nodes != 2 || pos->pos_nodeid >= 2 || pos->pos_arity[0] != 2) -+ return (-EINVAL); -+ -+ forceLocal = (pos->pos_nodeid == 0); -+ break; -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ ack_errors = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!ack_errors) -+ return (-EINVAL); -+ memset(ack_errors, 0, pos->pos_nodes * sizeof(unsigned int)); -+ dproc_timeout = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!dproc_timeout) -+ { -+ kfree(ack_errors); -+ return (-EINVAL); -+ } -+ memset(dproc_timeout, 0, pos->pos_nodes * sizeof(unsigned int)); -+ cproc_timeout = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!cproc_timeout) -+ { -+ kfree(ack_errors); -+ kfree(dproc_timeout); -+ return (-EINVAL); -+ } -+ memset(cproc_timeout, 0, pos->pos_nodes * sizeof(unsigned int)); -+ -+ kmutex_lock (&dev->dev_lock); -+ dev->dev_position = *pos; -+ dev->dev_ack_errors = ack_errors; -+ dev->dev_dproc_timeout = dproc_timeout; -+ dev->dev_cproc_timeout = cproc_timeout; -+ -+ if (forceLocal) -+ write_reg32 (dev, LinkContSettings, read_reg32 (dev, LinkContSettings) | LCONT_FORCE_COMMSCLK_LOCAL); -+ else -+ write_reg32 (dev, LinkContSettings, read_reg32 (dev, LinkContSettings) & ~LCONT_FORCE_COMMSCLK_LOCAL); -+ -+ pioflush_reg (dev); -+ kmutex_unlock (&dev->dev_lock); -+ -+ return (0); -+} -+ -+void -+elan4_get_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short *mask) -+{ -+ kmutex_lock (&dev->dev_lock); -+ -+ *mask = dev->dev_devinfo.dev_params_mask; -+ memcpy (params, &dev->dev_devinfo.dev_params, sizeof (ELAN_PARAMS)); -+ -+ kmutex_unlock (&dev->dev_lock); -+} -+ -+void -+elan4_set_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short mask) -+{ -+ int i; -+ -+ kmutex_lock (&dev->dev_lock); -+ for (i = 0; i < ELAN4_PARAM_COUNT; i++) -+ if (mask & (1 << i)) -+ dev->dev_devinfo.dev_params.values[i] = params->values[i]; -+ -+ dev->dev_devinfo.dev_params_mask |= mask; -+ kmutex_unlock (&dev->dev_lock); -+} -+ -+ -+EXPORT_SYMBOL(elan4_get_position); -+EXPORT_SYMBOL(elan4_set_position); -+ -+EXPORT_SYMBOL(elan4_queue_haltop); -+EXPORT_SYMBOL(elan4_queue_dma_flushop); -+EXPORT_SYMBOL(elan4_queue_mainintop); -+ -+EXPORT_SYMBOL(elan4_insertctxt); -+EXPORT_SYMBOL(elan4_removectxt); -+ -+EXPORT_SYMBOL(elan4_attach_filter); -+EXPORT_SYMBOL(elan4_detach_filter); -+EXPORT_SYMBOL(elan4_set_filter); -+EXPORT_SYMBOL(elan4_set_routetable); -+ -+EXPORT_SYMBOL(elan4_alloccq); -+EXPORT_SYMBOL(elan4_freecq); -+EXPORT_SYMBOL(elan4_restartcq); -+ -+EXPORT_SYMBOL(elan4_flush_icache); -+EXPORT_SYMBOL(elan4_hardware_lock_check); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/device_Linux.c linux-2.6.9/drivers/net/qsnet/elan4/device_Linux.c ---- clean/drivers/net/qsnet/elan4/device_Linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/device_Linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,3034 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device_Linux.c,v 1.110.2.9 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device_Linux.c,v $*/ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#ifdef CONFIG_MTRR -+#include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#include -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) -+typedef void irqreturn_t; -+#endif -+# define IRQ_NONE -+# define IRQ_HANDLED -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+#error please use a 2.4.0 series kernel or newer -+#endif -+ -+ -+#if defined(LINUX_SPARC) || defined(LINUX_PPC64) -+#define __io_remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#elif defined(NO_RMAP) -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#else -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(vma,from,offset,size,prot) -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9) -+#define __remap_page_range(from,offset,size,prot) remap_pfn_range(vma,from,(offset)>>PAGE_SHIFT,size,prot) -+#else -+#define __remap_page_range(from,offset,size,prot) remap_page_range(vma,from,offset,size,prot) -+#endif -+#endif -+ -+#if defined (X86_FEATURE_PAT) -+static unsigned int pat_pteval = -1; -+#endif -+ -+#if defined(__alpha) -+static inline physaddr_t bar2phys (unsigned long addr) -+{ -+ return virt_to_phys((void *) ioremap(addr, PAGE_SIZE)); -+} -+#elif defined(__ia64) -+static inline physaddr_t bar2phys (unsigned long addr) -+{ -+ return ((addr) & ~__IA64_UNCACHED_OFFSET); -+} -+#elif defined(__powerpc64__) -+ -+#ifdef CONFIG_PPC_PSERIES -+#include -+ -+static inline physaddr_t bar2phys (unsigned long addr) -+{ -+ return eeh_token_to_phys (addr); -+} -+#endif -+ -+#else -+static inline physaddr_t bar2phys (unsigned long addr) -+{ -+ return (addr); -+} -+#endif -+ -+#ifndef pgprot_noncached -+static inline pgprot_t pgprot_noncached(pgprot_t _prot) -+{ -+ unsigned long prot = pgprot_val(_prot); -+#if defined(__powerpc__) -+ prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; -+#elif defined(__sparc__) -+ prot &= ~(_PAGE_CACHE); -+ prot |= _PAGE_IE; -+#endif -+ return __pgprot(prot); -+} -+#endif -+ -+#ifndef pgprot_writecombine -+static inline pgprot_t pgprot_writecombine (pgprot_t _prot) -+{ -+ unsigned long prot = pgprot_val(_prot); -+ -+#if defined (X86_FEATURE_PAT) -+ if (pat_pteval != -1) -+ prot = (prot & ~(_PAGE_PCD | _PAGE_PWT | _PAGE_PSE)) | pat_pteval; -+#endif -+ return __pgprot (prot); -+} -+#endif -+ -+#define ELAN4_DRIVER_VERSION 0x103 /* 16 bit value */ -+ -+/* -+ * Function prototypes. -+ */ -+static int elan4_attach_device (int instance, struct pci_dev *pdev); -+static void elan4_detach_device (ELAN4_DEV *dev); -+ -+static int elan4_open (struct inode *inode, struct file *file); -+static int elan4_release(struct inode *inode, struct file *file); -+static int elan4_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg); -+static int elan4_mmap (struct file *file, struct vm_area_struct *vm_area); -+ -+static irqreturn_t elan4_irq (int irq, void *arg, struct pt_regs *regs); -+ -+static void elan4_shutdown_devices(int panicing); -+ -+static int disabled; /* bitmask of which devices not to start */ -+unsigned int elan4_pll_cfg = 0; -+int elan4_pll_div = 31; /* RevC PCB */ -+int elan4_mod45disable = 0; -+static int optimise_pci_bus = 1; /* 0 => don't, 1 => if ok, 2 => always */ -+static int default_features = 0; /* default values for dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] */ -+int assfail_mode = 0; -+ -+long long sdram_cfg = SDRAM_STARTUP_VALUE; -+static int sdram_cfg_lo; -+static int sdram_cfg_hi; -+int sdram_bank_limit; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan 4 Device Driver"); -+MODULE_LICENSE("GPL"); -+ -+module_param(elan4_debug, uint, 0); -+module_param(elan4_debug_toconsole, uint, 0); -+module_param(elan4_debug_tobuffer, uint, 0); -+module_param(elan4_debug_mmu, uint, 0); -+module_param(elan4_pll_cfg, uint, 0); -+module_param(elan4_pll_div, uint, 0); -+module_param(elan4_mod45disable, uint, 0); -+module_param(optimise_pci_bus, uint, 0); -+module_param(default_features, uint, 0); -+module_param(assfail_mode, uint, 0); -+ -+module_param(disabled, uint, 0); -+module_param(sdram_cfg_lo, uint, 0); -+module_param(sdram_cfg_hi, uint, 0); -+module_param(sdram_bank_limit, uint, 0); -+ -+module_param(elan4_hash_0_size_val, uint, 0); -+module_param(elan4_hash_1_size_val, uint, 0); -+module_param(elan4_ctxt_table_shift, uint, 0); -+module_param(elan4_ln2_max_cqs, uint, 0); -+module_param(elan4_dmaq_highpri_size, uint, 0); -+module_param(elan4_threadq_highpri_size, uint, 0); -+module_param(elan4_dmaq_lowpri_size, uint, 0); -+module_param(elan4_threadq_lowpri_size, uint, 0); -+module_param(elan4_interruptq_size, uint, 0); -+ -+module_param(elan4_mainint_punt_loops, uint, 0); -+module_param(elan4_mainint_resched_ticks, uint, 0); -+module_param(elan4_linkport_lock, uint, 0); -+module_param(elan4_eccerr_recheck, uint, 0); -+ -+module_param(user_p2p_route_options, uint, 0); -+module_param(user_bcast_route_options, uint, 0); -+module_param(user_dproc_retry_count, uint, 0); -+module_param(user_cproc_retry_count, uint, 0); -+module_param(user_ioproc_enabled, uint, 0); -+module_param(user_pagefault_enabled, uint, 0); -+ -+/* -+ * Standard device entry points. -+ */ -+static struct file_operations elan4_fops = { -+ ioctl: elan4_ioctl, -+ mmap: elan4_mmap, -+ open: elan4_open, -+ release: elan4_release, -+}; -+ -+ELAN4_DEV *elan4_devices[ELAN4_MAX_CONTROLLER]; -+ -+#if defined(CONFIG_DEVFS_FS) -+static devfs_handle_t devfs_handle; -+#endif -+ -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+elan4_ioctl32_cmds[] = -+{ /* /dev/elan/control */ -+ ELAN4IO_DEVINFO, -+ ELAN4IO_GET_POSITION, -+ ELAN4IO_SET_POSITION, -+ ELAN4IO_GET_PARAMS, -+ ELAN4IO_SET_PARAMS, -+ -+ /* /dev/elan4/user */ -+ ELAN4IO_POSITION, -+ ELAN4IO_FREE, -+ ELAN4IO_ATTACH, -+ ELAN4IO_DETACH, -+ ELAN4IO_BLOCK_INPUTTER, -+ -+ ELAN4IO_ADD_P2PVP, -+ ELAN4IO_ADD_BCASTVP, -+ ELAN4IO_REMOVEVP, -+ ELAN4IO_SET_ROUTE, -+ ELAN4IO_RESET_ROUTE, -+ ELAN4IO_GET_ROUTE, -+ ELAN4IO_CHECK_ROUTE, -+ -+ ELAN4IO_ALLOCCQ, -+ ELAN4IO_FREECQ, -+ ELAN4IO_SETPERM32, -+ ELAN4IO_CLRPERM32, -+ ELAN4IO_TRAPSIG, -+ ELAN4IO_TRAPHANDLER32, -+ ELAN4IO_REQUIRED_MAPPINGS, -+ -+ ELAN4IO_RESUME_EPROC_TRAP, -+ ELAN4IO_RESUME_CPROC_TRAP, -+ ELAN4IO_RESUME_DPROC_TRAP, -+ ELAN4IO_RESUME_TPROC_TRAP, -+ ELAN4IO_RESUME_IPROC_TRAP, -+ -+ ELAN4IO_FLUSH_ICACHE, -+ -+ ELAN4IO_STOP_CTXT, -+ -+ ELAN4IO_ALLOC_INTCOOKIE, -+ ELAN4IO_FREE_INTCOOKIE, -+ ELAN4IO_ARM_INTCOOKIE, -+ ELAN4IO_WAIT_INTCOOKIE, -+ -+ ELAN4IO_ALLOC_TRAP_QUEUES, -+ ELAN4IO_NETERR_MSG, -+ ELAN4IO_NETERR_TIMER, -+ ELAN4IO_NETERR_FIXUP, -+ -+ ELAN4IO_DUMPCQ32, -+}; -+ -+static int elan4_ioctl32 (unsigned int fd, unsigned int cmd, -+ unsigned long arg, struct file *file); -+#endif -+ -+/* -+ * Standard device entry points. -+ */ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int -+elan4_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (event == DUMP_BEGIN) -+ elan4_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+static struct notifier_block elan4_dump_notifier = -+{ -+ notifier_call: elan4_dump_event, -+ priority: 0, -+}; -+ -+#endif -+ -+static int -+elan4_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ((event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ elan4_shutdown_devices (0); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block elan4_reboot_notifier = -+{ -+ notifier_call: elan4_reboot_event, -+ priority: 0, -+}; -+ -+#if !defined(NO_PANIC_NOTIFIER) -+static int -+elan4_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ elan4_shutdown_devices (1); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block elan4_panic_notifier = -+{ -+ notifier_call: elan4_panic_event, -+ priority: 0, -+}; -+#endif -+ -+static int __init -+elan4_init (void) -+{ -+ int err; -+ struct pci_dev *pdev; -+ int count; -+#if defined(__ia64) -+ int seenRevA = 0; -+#endif -+ -+ if ((err = register_chrdev (ELAN4_MAJOR, ELAN4_NAME, &elan4_fops)) < 0) -+ return (err); -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_handle = devfs_mk_dir (NULL, "elan4", NULL); -+#endif -+ -+ intcookie_init(); -+ elan4_debug_init(); -+ elan4_procfs_init(); -+ -+#ifdef CONFIG_MPSAS -+ sas_init(); -+#endif -+ -+ if (sdram_cfg_lo != 0 && sdram_cfg_hi != 0) -+ sdram_cfg = (((unsigned long long) sdram_cfg_hi) << 32) | ((unsigned long long) sdram_cfg_lo); -+ -+ for (count = 0, pdev = NULL; (pdev = pci_find_device(PCI_VENDOR_ID_QUADRICS, PCI_DEVICE_ID_ELAN4, pdev)) != NULL ; count++) -+ { -+#if defined(__ia64) -+ unsigned char revid; -+ -+ pci_read_config_byte (pdev, PCI_REVISION_ID, &revid); -+ -+ if (revid == PCI_REVISION_ID_ELAN4_REVA && seenRevA++ != 0 && pci_find_device (PCI_VENDOR_ID_HP, 0x122e, NULL)) -+ { -+ printk ("elan: only a single elan4a supported on rx2600\n"); -+ continue; -+ } -+#endif -+ -+ if (count < ELAN4_MAX_CONTROLLER) -+ elan4_attach_device (count, pdev); -+ } -+ -+ if (count >= ELAN4_MAX_CONTROLLER) -+ printk ("elan: found %d elan4 devices - only support %d\n", count, ELAN4_MAX_CONTROLLER); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); -+ register int i; -+ for (i = 0; i < sizeof (elan4_ioctl32_cmds)/sizeof(elan4_ioctl32_cmds[0]); i++) -+ register_ioctl32_conversion (elan4_ioctl32_cmds[i], elan4_ioctl32); -+ } -+ unlock_kernel(); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&elan4_dump_notifier); -+#endif -+ register_reboot_notifier (&elan4_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_register (&panic_notifier_list, &elan4_panic_notifier); -+#endif -+ -+ return (0); -+} -+ -+#ifdef MODULE -+static void __exit -+elan4_exit (void) -+{ -+ int i; -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern void unregister_ioctl32_conversion(unsigned int cmd); -+ -+ for (i = 0; i < sizeof (elan4_ioctl32_cmds)/sizeof(elan4_ioctl32_cmds[0]); i++) -+ unregister_ioctl32_conversion (elan4_ioctl32_cmds[i]); -+ } -+ unlock_kernel(); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&elan4_dump_notifier); -+#endif -+ unregister_reboot_notifier (&elan4_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_unregister (&panic_notifier_list, &elan4_panic_notifier); -+#endif -+ -+ for (i = 0; i < ELAN4_MAX_CONTROLLER; i++) -+ if (elan4_devices[i] != NULL) -+ elan4_detach_device (elan4_devices[i]); -+ -+ elan4_procfs_fini(); -+ elan4_debug_fini(); -+ intcookie_fini(); -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_unregister (devfs_handle); -+#endif -+ -+ unregister_chrdev(ELAN4_MAJOR, ELAN4_NAME); -+} -+ -+module_init (elan4_init); -+module_exit (elan4_exit); -+ -+#else -+__initcall (elan4_init); -+#endif -+ -+/* -+ * Minor numbers encoded as : -+ * [5:0] device number -+ * [15:6] function number -+ */ -+#define ELAN4_DEVICE_MASK 0x3F -+#define ELAN4_DEVICE(inode) (MINOR((inode)->i_rdev) & ELAN4_DEVICE_MASK) -+ -+#define ELAN4_MINOR_CONTROL 0 -+#define ELAN4_MINOR_MEM 1 -+#define ELAN4_MINOR_USER 2 -+ -+#define ELAN4_MINOR_SHIFT 6 -+#define ELAN4_MINOR(inode) (MINOR((inode)->i_rdev) >> ELAN4_MINOR_SHIFT) -+ -+/* -+ * Called by init_module() for each card discovered on PCI. -+ */ -+static int -+elan4_attach_device (int instance, struct pci_dev *pdev) -+{ -+ ELAN4_DEV *dev; -+ int res; -+ -+ KMEM_ALLOC (dev, ELAN4_DEV *, sizeof (ELAN4_DEV), 1); -+ if ((dev == NULL)) -+ return (-ENOMEM); -+ memset (dev, 0, sizeof (ELAN4_DEV)); -+ -+ /* setup os dependent section of ELAN4_DEV */ -+ dev->dev_instance = instance; -+ dev->dev_osdep.pdev = pdev; -+ -+#if !defined(IOPROC_PATCH_APPLIED) -+ printk ("elan%d: pinning down pages as no ioproc patch\n", dev->dev_instance); -+ -+ default_features |= ELAN4_FEATURE_NO_IOPROC | ELAN4_FEATURE_PIN_DOWN; -+ -+ /* Also change this flag so as to make the /proc entry consistent */ -+ user_ioproc_enabled = 0; -+#endif -+ -+ /* initialise the devinfo */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_VENDOR_ID, &dev->dev_devinfo.dev_vendor_id); -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_DEVICE_ID, &dev->dev_devinfo.dev_device_id); -+ pci_read_config_byte (dev->dev_osdep.pdev, PCI_REVISION_ID, &dev->dev_devinfo.dev_revision_id); -+ -+ dev->dev_devinfo.dev_rail = instance; -+ dev->dev_devinfo.dev_driver_version = ELAN4_DRIVER_VERSION; -+ dev->dev_devinfo.dev_num_down_links_value = 0; -+ dev->dev_devinfo.dev_params_mask = (1 << ELAN4_PARAM_DRIVER_FEATURES); -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] = default_features; -+ -+ dev->dev_position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ dev->dev_regs_phys = bar2phys (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ dev->dev_sdram_phys = bar2phys (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ -+ /* initialise the data structures and map the device */ -+ if ((res = elan4_initialise_device (dev)) != 0) -+ { -+ kfree (dev); -+ return res; -+ } -+ -+#if defined(CONFIG_DEVFS_FS) -+ { -+ char name[16]; -+ -+ sprintf (name, "control%d", dev->dev_instance); -+ dev->dev_osdep.devfs_control = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_CONTROL << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR | S_IWUSR, -+ &elan4_fops, NULL); -+ sprintf (name, "sdram%d", dev->dev_instance); -+ dev->dev_osdep.devfs_sdram = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_MEM << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH, -+ &elan4_fops, NULL); -+ sprintf (name, "user%d", dev->dev_instance); -+ dev->dev_osdep.devfs_user = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_USER << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH, -+ &elan4_fops, NULL); -+ } -+#endif -+ -+ /* add the procfs entry */ -+ elan4_procfs_device_init (dev); -+ -+ /* allow the device to be referenced now */ -+ elan4_devices[instance] = dev; -+ -+ if ((disabled & (1 << instance)) == 0) -+ { -+ if (elan4_start_device (dev) != 0) -+ { -+ printk ("elan%d: auto-start of device failed\n", dev->dev_instance); -+ -+ elan4_detach_device (dev); -+ return (-ENXIO); -+ } -+ -+ dev->dev_state = ELAN4_STATE_STARTED; -+ } -+ -+#if defined (__sparc) -+ printk ("elan%d: at pci %s (irq = %s)\n", instance, pdev->slot_name, __irq_itoa(pdev->irq)); -+#else -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -+ printk ("elan%d: at pci %s (irq = %d)\n", instance, pdev->slot_name, pdev->irq); -+#else -+ printk ("elan%d: at pci %s (irq = %d)\n", instance, pci_name (pdev), pdev->irq); -+#endif -+#endif -+ -+ return (0); -+} -+ -+/* -+ * Called by cleanup_module() for each board found on PCI. -+ */ -+static void -+elan4_detach_device (ELAN4_DEV *dev) -+{ -+ /* stop the chip and free of resources */ -+ if (dev->dev_state == ELAN4_STATE_STARTED) -+ elan4_stop_device (dev); -+ -+ elan4_devices[dev->dev_instance] = NULL; -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_unregister (dev->dev_osdep.devfs_control); -+ devfs_unregister (dev->dev_osdep.devfs_sdram); -+ devfs_unregister (dev->dev_osdep.devfs_user); -+#endif -+ -+ /* remove the procfs entry */ -+ elan4_procfs_device_fini (dev); -+ -+ /* unmap the device and finalise the data structures */ -+ elan4_finalise_device (dev); -+ -+ KMEM_FREE (dev, sizeof(*dev)); -+} -+ -+/* -+ * Maintain reference counts on the device -+ */ -+ELAN4_DEV * -+elan4_reference_device (int instance, int state) -+{ -+ ELAN4_DEV *dev = elan4_devices[instance]; -+ -+ if (dev == NULL) -+ return (NULL); -+ -+ kmutex_lock (&dev->dev_lock); -+ -+ if ((dev->dev_state & state) == 0) -+ { -+ kmutex_unlock (&dev->dev_lock); -+ return (NULL); -+ } -+ -+ dev->dev_references++; -+ kmutex_unlock (&dev->dev_lock); -+ -+#ifdef MODULE -+ MOD_INC_USE_COUNT; -+#endif -+ -+#ifdef CONFIG_MPSAS -+ sas_set_position(dev); -+#endif -+ -+ return (dev); -+} -+ -+void -+elan4_dereference_device (ELAN4_DEV *dev) -+{ -+ kmutex_lock (&dev->dev_lock); -+ dev->dev_references--; -+ kmutex_unlock (&dev->dev_lock); -+ -+#ifdef MODULE -+ MOD_DEC_USE_COUNT; -+#endif -+} -+ -+static void -+elan4_shutdown_devices(int panicing) -+{ -+ ELAN4_DEV *dev; -+ unsigned long flags; -+ register int i; -+ -+ local_irq_save (flags); -+ for (i = 0; i < ELAN4_MAX_CONTROLLER; i++) -+ { -+ if ((dev = elan4_devices[i]) != NULL) -+ { -+ printk(KERN_INFO "elan%d: forcing link into reset\n", dev->dev_instance); -+ -+ /* set the inputters to discard everything */ -+ if (! panicing) spin_lock (&dev->dev_haltop_lock); -+ -+ if (dev->dev_discard_lowpri_count++ == 0) -+ elan4_set_schedstatus (dev, 0); -+ if (dev->dev_discard_highpri_count++ == 0) -+ elan4_set_schedstatus (dev, 0); -+ -+ if (! panicing) spin_unlock (&dev->dev_haltop_lock); -+ -+ /* ideally we'd like to halt all the outputters too, -+ * however this will prevent the kernel comms flushing -+ * to work correctly ..... -+ */ -+ } -+ } -+ local_irq_restore (flags); -+} -+ -+/* -+ * /dev/elan4/controlX - control device -+ * -+ */ -+static int -+control_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STOPPED | ELAN4_STATE_STARTED); -+ CONTROL_PRIVATE *pr; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC (pr, CONTROL_PRIVATE *, sizeof (CONTROL_PRIVATE), 1); -+ if ((pr == NULL)) -+ { -+ elan4_dereference_device (dev); -+ -+ return (-ENOMEM); -+ } -+ -+ PRINTF (DBG_USER, DBG_FILE, "control_open: dev=%p pr=%p\n", dev, pr); -+ -+ pr->pr_dev = dev; -+ pr->pr_boundary_scan = 0; -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+control_release (struct inode *inode, struct file *file) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ ELAN4_DEV *dev = pr->pr_dev; -+ -+ PRINTF (DBG_DEVICE, DBG_FILE, "control_release: pr=%p\n", pr); -+ -+ //if (pr->pr_boundary_scan) -+ // elan4_clear_boundary_scan (dev, pr); -+ -+ elan4_dereference_device (dev); -+ -+ KMEM_FREE (pr, sizeof(*pr)); -+ -+ return (0); -+} -+ -+static int -+control_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_FILE, "control_ioctl: cmd=%x arg=%lx\n", cmd, arg); -+ -+ switch (cmd) -+ { -+ case ELAN4IO_DEVINFO: -+ if (copy_to_user ((void *) arg, &pr->pr_dev->dev_devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ -+ case ELAN4IO_GET_POSITION: -+ { -+ ELAN_POSITION pos; -+ -+ elan4_get_position (pr->pr_dev, &pos); -+ -+ if (copy_to_user ((void *) arg, &pos, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SET_POSITION: -+ { -+ ELAN_POSITION pos; -+ -+ if (copy_from_user (&pos, (void *) arg, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ -+ return (elan4_set_position (pr->pr_dev, &pos)); -+ } -+ -+ case ELAN4IO_OLD_GET_PARAMS: -+ { -+ ELAN_PARAMS params; -+ unsigned short mask; -+ -+ elan4_get_params (pr->pr_dev, ¶ms, &mask); -+ -+ if (copy_to_user ((void *) arg, ¶ms, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_OLD_SET_PARAMS: -+ { -+ ELAN_PARAMS params; -+ -+ if (copy_from_user (¶ms, (void *) arg, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ elan4_set_params (pr->pr_dev, ¶ms, 3); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SET_PARAMS: -+ { -+ ELAN4IO_PARAMS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PARAMS_STRUCT))) -+ return (-EFAULT); -+ -+ elan4_set_params (pr->pr_dev, &args.p_params, args.p_mask); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_GET_PARAMS: -+ { -+ ELAN4IO_PARAMS_STRUCT args; -+ -+ elan4_get_params (pr->pr_dev, &args.p_params, &args.p_mask); -+ -+ if (copy_to_user ((void *) arg, &args, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ } -+ -+ return (-EINVAL); -+} -+ -+static int -+control_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ unsigned bar = OFF_TO_BAR (vma->vm_pgoff << PAGE_SHIFT); -+ unsigned long off = OFF_TO_OFFSET (vma->vm_pgoff << PAGE_SHIFT); -+ long len = vma->vm_end - vma->vm_start; -+ -+ PRINTF (DBG_USER, DBG_FILE, "control_mmap: pr=%p bar=%x off=%x\n", pr, bar, off); -+ -+ /* check bar number and translate the standard psuedo bars */ -+ switch (bar) -+ { -+ case ELAN4_BAR_SDRAM: -+ case ELAN4_BAR_REGISTERS: -+ break; -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ if (off < 0 || (off + len) > pci_resource_len (pr->pr_dev->dev_osdep.pdev, bar)) -+ return (-EINVAL); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (__io_remap_page_range (vma->vm_start, pci_resource_start (pr->pr_dev->dev_osdep.pdev, bar) + off, len, vma->vm_page_prot)) -+ return (-EAGAIN); -+ -+ return (0); -+} -+ -+/* -+ * /dev/elan4/sdramX - sdram access device -+ */ -+static void -+mem_freepage (MEM_PRIVATE *pr, MEM_PAGE *pg) -+{ -+ PRINTF (DBG_USER, DBG_MEM, "mem_freepage: pr=%p pgoff=%lx pg=%p ref=%d\n", pr, pg->pg_pgoff, pg, pg->pg_ref); -+ -+ elan4_sdram_free (pr->pr_dev, pg->pg_addr, SDRAM_PAGE_SIZE); -+ -+ KMEM_FREE(pg, sizeof(*pg)); -+} -+ -+static MEM_PAGE * -+mem_getpage (MEM_PRIVATE *pr, unsigned long pgoff) -+{ -+ int hashval = MEM_HASH (pgoff); -+ MEM_PAGE *npg = NULL; -+ MEM_PAGE *pg; -+ -+ ASSERT ((pgoff & SDRAM_PGOFF_OFFSET) == 0); -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_getpage: pr=%p pgoff=%lx\n", pr, pgoff); -+ -+ again: -+ spin_lock (&pr->pr_lock); -+ for (pg = pr->pr_pages[hashval]; pg; pg = pg->pg_next) -+ if (pg->pg_pgoff == pgoff) -+ break; -+ -+ if (pg != NULL) -+ { -+ PRINTF (DBG_USER, DBG_MEM, "mem_getpage: pr=%p pgoff=%lx -> found %p addr=%x\n", pr, pgoff, pg, pg->pg_addr); -+ -+ pg->pg_ref++; -+ spin_unlock (&pr->pr_lock); -+ -+ if (npg != NULL) /* we'd raced and someone else had created */ -+ mem_freepage (pr, npg); /* this page - so free of our new one*/ -+ return (pg); -+ } -+ -+ if (npg != NULL) /* didn't find the page, so inset the */ -+ { /* new one we've just created */ -+ npg->pg_next = pr->pr_pages[hashval]; -+ pr->pr_pages[hashval] = npg; -+ -+ spin_unlock (&pr->pr_lock); -+ return (npg); -+ } -+ -+ spin_unlock (&pr->pr_lock); /* drop spinlock before creating a new page */ -+ -+ KMEM_ALLOC (npg, MEM_PAGE *, sizeof (MEM_PAGE), 1); -+ if ((npg == NULL)) -+ return (NULL); -+ -+ if ((npg->pg_addr = elan4_sdram_alloc (pr->pr_dev, SDRAM_PAGE_SIZE)) == 0) -+ { -+ KMEM_FREE(npg, sizeof(*npg)); -+ return (NULL); -+ } -+ -+#ifndef CONFIG_MPSAS -+ /* zero the page before returning it to the user */ -+ elan4_sdram_zeroq_sdram (pr->pr_dev, npg->pg_addr, SDRAM_PAGE_SIZE); -+#endif -+ -+ npg->pg_pgoff = pgoff; -+ npg->pg_ref = 1; -+ -+ /* created a new page - so have to rescan before inserting it */ -+ goto again; -+} -+ -+static void -+mem_droppage (MEM_PRIVATE *pr, unsigned long pgoff, int dontfree) -+{ -+ MEM_PAGE **ppg; -+ MEM_PAGE *pg; -+ -+ spin_lock (&pr->pr_lock); -+ for (ppg = &pr->pr_pages[MEM_HASH(pgoff)]; *ppg; ppg = &(*ppg)->pg_next) -+ if ((*ppg)->pg_pgoff == pgoff) -+ break; -+ -+ pg = *ppg; -+ -+ ASSERT (*ppg != NULL); -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_droppage: pr=%p pgoff=%lx pg=%p ref=%d dontfree=%d\n", pr, pgoff, (*ppg), (*ppg)->pg_ref, dontfree); -+ -+ if (--pg->pg_ref == 0 && !dontfree) -+ { -+ *ppg = pg->pg_next; -+ -+ mem_freepage (pr, pg); -+ } -+ -+ spin_unlock (&pr->pr_lock); -+} -+ -+static int -+mem_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STARTED); -+ MEM_PRIVATE *pr; -+ register int i; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC (pr, MEM_PRIVATE *, sizeof (MEM_PRIVATE), 1); -+ if ((pr == NULL)) -+ { -+ elan4_dereference_device (dev); -+ return (-ENOMEM); -+ } -+ -+ spin_lock_init (&pr->pr_lock); -+ pr->pr_dev = dev; -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ pr->pr_pages[i] = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+mem_release (struct inode *node, struct file *file) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg, *next; -+ int i; -+ -+ /* free off any pages that we'd allocated */ -+ spin_lock (&pr->pr_lock); -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ { -+ for (pg = pr->pr_pages[i]; pg; pg = next) -+ { -+ next = pg->pg_next; -+ mem_freepage (pr, pg); -+ } -+ } -+ spin_unlock (&pr->pr_lock); -+ -+ elan4_dereference_device (pr->pr_dev); -+ KMEM_FREE(pr, sizeof(*pr)); -+ -+ return (0); -+} -+ -+static int -+mem_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ return (-EINVAL); -+} -+ -+static void -+mem_vma_open (struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_getpage (pr, pgoff & SDRAM_PGOFF_MASK); -+} -+ -+static void -+mem_vma_close (struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the call to close may not have the same vm_start/vm_end values as -+ * were passed into mmap()/open() - since if an partial unmap had occurred -+ * then the vma could have been shrunk or even split. -+ * -+ * if a the vma is split then an vma_open() will be called for the top -+ * portion - thus causing the reference counts to become incorrect. -+ * -+ * We drop the reference to any pages we're notified about - so they get freed -+ * earlier than when the device is finally released. -+ */ -+ for (pgoff = vma->vm_pgoff, addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); -+} -+ -+struct vm_operations_struct mem_vm_ops = { -+ open: mem_vma_open, -+ close: mem_vma_close, -+}; -+ -+static int -+mem_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_mmap: vma=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma, vma->vm_start, vma->vm_end, vma->vm_pgoff, file); -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ vma->vm_ops = &mem_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ if ((pg = mem_getpage (pr, pgoff & SDRAM_PGOFF_MASK)) == NULL) -+ goto failed; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_mmap: addr %lx -> pg=%p sdram=%x+%x bar=%lx\n", -+ addr, pg, pg->pg_addr, (pgoff & SDRAM_PGOFF_OFFSET) * PAGE_SIZE, -+ pci_resource_start (pr->pr_dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (! (pr->pr_dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ -+ if (__io_remap_page_range (addr, pci_resource_start (pr->pr_dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + -+ pg->pg_addr + (pgoff & SDRAM_PGOFF_OFFSET) * PAGE_SIZE, PAGE_SIZE, vma->vm_page_prot)) -+ { -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); /* drop our reference to this page */ -+ goto failed; -+ } -+ -+#if defined(conditional_schedule) -+ conditional_schedule(); -+#endif -+ } -+ -+ return (0); -+ -+ failed: -+ /* free of any pages we've already allocated/referenced */ -+ while (pgoff-- > vma->vm_pgoff) -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); -+ -+ return (-ENOMEM); -+} -+ -+int -+mem_pteload (struct vm_area_struct *vma, unsigned long maddr, USER_CTXT *uctx, E4_Addr eaddr, int perm) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ unsigned long pgoff = vma->vm_pgoff + ((maddr - vma->vm_start) >> PAGE_SHIFT); -+ sdramaddr_t pgaddr; -+ MEM_PAGE *pg; -+ register int i, res; -+ -+ if (pr->pr_dev != dev) -+ return -EINVAL; -+ -+ if ((pg = mem_getpage (pr, pgoff & SDRAM_PGOFF_MASK)) == NULL) -+ return -ENOMEM; -+ -+ pgaddr = pg->pg_addr + ((pgoff & SDRAM_PGOFF_OFFSET) << PAGE_SHIFT); -+ -+ if (! elan4mmu_sdram_aliascheck (&uctx->uctx_ctxt, eaddr, pgaddr)) -+ return -EINVAL; -+ -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = ((pgaddr + i) >> PTE_PADDR_SHIFT) | PTE_SetPerm (perm); -+ -+ if ((res = elan4mmu_pteload (&uctx->uctx_ctxt, 0, eaddr + i, HE_TYPE_SDRAM, newpte)) < 0) -+ return res; -+ } -+ -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); -+ -+ return 0; -+} -+ -+ -+/* -+ * /dev/elan4/userX - control device -+ * -+ */ -+static inline void -+user_private_free (USER_PRIVATE *pr) -+{ -+ ELAN4_DEV *dev = pr->pr_uctx->uctx_ctxt.ctxt_dev; -+ -+ ASSERT (atomic_read (&pr->pr_ref) == 0); -+ -+ user_free (pr->pr_uctx); -+ KMEM_FREE(pr, sizeof(*pr)); -+ -+ elan4_dereference_device (dev); -+} -+ -+#if defined(IOPROC_PATCH_APPLIED) -+static void -+user_ioproc_release (void *arg, struct mm_struct *mm) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_release: ref=%d\n", atomic_read (&pr->pr_ref)); -+ -+ elan4mmu_invalidate_ctxt (&pr->pr_uctx->uctx_ctxt); -+ -+ pr->pr_mm = NULL; -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_private_free (pr); -+} -+ -+/* -+ * On 2.4 kernels we get passed a mm_struct, whereas on 2.6 kernels -+ * we get the vma which is more usefull -+ */ -+#if defined(IOPROC_MM_STRUCT_ARG) -+static void -+user_ioproc_sync_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ /* XXXX: this is intended to sync the modified bit from our page tables, -+ * into the main cpu's modified bits - however since we do not -+ * syncronize our modified bit on a ioproc_invalidate_page() call, -+ * then it could get lost if we modify the page after the last -+ * modification and writepage has occurred. Hence we invalidate -+ * all translations and allow it to refault. -+ */ -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ struct vm_area_struct *vma; -+ -+ if (pr->pr_uctx->uctx_ctxt.ctxt_dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_IOPROC_UPDATE) -+ return; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_update_range: start=%lx end=%lx\n", start, end); -+ -+ vma = find_vma_intersection (mm, start, end); -+ -+ user_update_main (pr->pr_uctx, mm, vma, start, end - start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+ -+#else -+ -+static void -+user_ioproc_sync_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ /* XXXX: this is intended to sync the modified bit from our page tables, -+ * into the main cpu's modified bits - however since we do not -+ * syncronize our modified bit on a ioproc_invalidate_page() call, -+ * then it could get lost if we modify the page after the last -+ * modification and writepage has occurred. Hence we invalidate -+ * all translations and allow it to refault. -+ */ -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ if (pr->pr_uctx->uctx_ctxt.ctxt_dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_IOPROC_UPDATE) -+ return; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_update_range: start=%lx end=%lx\n", start, end); -+ -+ user_update_main (pr->pr_uctx, vma->vm_mm, vma, start, end - start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+#endif /* defined(IOPROC_NO_VMA_RANGE) */ -+ -+static void -+user_ioproc_sync_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_sync_page: addr=%lx\n", addr); -+ -+ user_unload_main (pr->pr_uctx, addr & PAGE_MASK, PAGE_SIZE); -+} -+ -+static void -+user_ioproc_invalidate_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_invalidate_page: addr=%lx\n", addr); -+ -+ user_unload_main (pr->pr_uctx, addr & PAGE_MASK, PAGE_SIZE); -+} -+ -+static void -+user_ioproc_update_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ if (pr->pr_uctx->uctx_ctxt.ctxt_dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_IOPROC_UPDATE) -+ return; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_update_page: addr=%lx\n", addr); -+ -+ user_update_main (pr->pr_uctx, vma->vm_mm, vma, addr & PAGE_MASK, PAGE_SIZE); -+} -+#endif /* defined(IOPROC_PATCH_APPLIED) */ -+ -+static int -+user_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev; -+ USER_PRIVATE *pr; -+ USER_CTXT *uctx; -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_open: mm=%p users=%d count=%d\n", current->mm, -+ atomic_read (¤t->mm->mm_users), atomic_read (¤t->mm->mm_count)); -+ -+ if ((dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STARTED)) == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC (pr, USER_PRIVATE *, sizeof (USER_PRIVATE), 1); -+ if ((pr == NULL)) -+ { -+ elan4_dereference_device (dev); -+ return (-ENOMEM); -+ } -+ -+ uctx = user_alloc (dev); -+ -+ if (IS_ERR(uctx)) -+ { -+ elan4_dereference_device (dev); -+ KMEM_FREE(pr, sizeof(*pr)); -+ -+ return PTR_ERR(uctx); -+ } -+ -+ /* initialise refcnt to 1 - one for "file" */ -+ atomic_set (&pr->pr_ref, 1); -+ -+ pr->pr_uctx = uctx; -+ pr->pr_mm = current->mm; -+ -+#if defined(IOPROC_PATCH_APPLIED) -+ if (! (uctx->uctx_ctxt.ctxt_features & ELAN4_FEATURE_NO_IOPROC)) -+ { -+ /* register a ioproc callback to notify us of translation changes */ -+ pr->pr_ioproc.arg = (void *) pr; -+ pr->pr_ioproc.release = user_ioproc_release; -+ pr->pr_ioproc.sync_range = user_ioproc_sync_range; -+ pr->pr_ioproc.invalidate_range = user_ioproc_invalidate_range; -+ pr->pr_ioproc.update_range = user_ioproc_update_range; -+ pr->pr_ioproc.change_protection = user_ioproc_change_protection; -+ pr->pr_ioproc.sync_page = user_ioproc_sync_page; -+ pr->pr_ioproc.invalidate_page = user_ioproc_invalidate_page; -+ pr->pr_ioproc.update_page = user_ioproc_update_page; -+ -+ /* add an extra reference for the ioproc ops */ -+ atomic_inc (&pr->pr_ref); -+ -+ spin_lock (¤t->mm->page_table_lock); -+ ioproc_register_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ } -+#endif -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+user_release (struct inode *inode, struct file *file) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ -+ PRINTF (pr->pr_uctx, DBG_FILE, "user_release: ref=%d\n", atomic_read (&pr->pr_ref)); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_private_free (pr); -+ -+ return (0); -+} -+ -+static int -+user_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ int res = 0; -+ -+ PRINTF (uctx, DBG_FILE, "user_ioctl: cmd=%x arg=%lx\n", cmd, arg); -+ -+ if (current->mm != pr->pr_mm) -+ return (-EINVAL); -+ -+ switch (cmd) -+ { -+ case ELAN4IO_DEVINFO: -+ { -+ ELAN_DEVINFO devinfo = uctx->uctx_ctxt.ctxt_dev->dev_devinfo; -+ -+ if ((devinfo.dev_params_mask & (1 << ELAN4_PARAM_DRIVER_FEATURES)) != 0) -+ devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] = uctx->uctx_ctxt.ctxt_features; -+ -+ if (copy_to_user ((void *) arg, &devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN4IO_POSITION: -+ { -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ -+ if (copy_to_user ((void *) arg, &dev->dev_position, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN4IO_FREE: -+#if defined(IOPROC_PATCH_APPLIED) -+ if (! (uctx->uctx_ctxt.ctxt_features & ELAN4_FEATURE_NO_IOPROC)) -+ { -+ spin_lock (¤t->mm->page_table_lock); -+ if (pr->pr_mm != current->mm) -+ spin_unlock (¤t->mm->page_table_lock); -+ else -+ { -+ ioproc_unregister_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ user_ioproc_release (pr, current->mm); -+ } -+ } -+#endif -+ return (0); -+ -+ case ELAN4IO_ATTACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else if ((res = user_attach (uctx, cap)) == 0 && -+ copy_to_user ((void *) arg, cap, sizeof (ELAN_CAPABILITY))) -+ { -+ user_detach (uctx, cap); -+ res = -EFAULT; -+ } -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ return (res); -+ } -+ -+ case ELAN4IO_DETACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else -+ user_detach (uctx, cap); -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ return (res); -+ } -+ -+ case ELAN4IO_BLOCK_INPUTTER: -+ user_block_inputter (uctx, arg); -+ return (0); -+ -+ case ELAN4IO_ADD_P2PVP: -+ { -+ ELAN4IO_ADD_P2PVP_STRUCT *args; -+ -+ KMEM_ALLOC (args, ELAN4IO_ADD_P2PVP_STRUCT *, sizeof (ELAN4IO_ADD_P2PVP_STRUCT), 1); -+ if ((args == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN4IO_ADD_P2PVP_STRUCT))) -+ res = -EFAULT; -+ else -+ res = user_add_p2pvp (uctx, args->vp_process, &args->vp_capability); -+ -+ KMEM_FREE(args, sizeof(*args)); -+ return (res); -+ } -+ -+ case ELAN4IO_ADD_BCASTVP: -+ { -+ ELAN4IO_ADD_BCASTVP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ADD_BCASTVP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_add_bcastvp (uctx, args.vp_process, args.vp_lowvp, args.vp_highvp)); -+ } -+ -+ case ELAN4IO_REMOVEVP: -+ return (user_removevp (uctx, arg)); -+ -+ case ELAN4IO_SET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_set_route (uctx, args.rt_process, &args.rt_route)); -+ } -+ -+ case ELAN4IO_RESET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_reset_route (uctx, args.rt_process)); -+ } -+ -+ case ELAN4IO_GET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = user_get_route (uctx, args.rt_process, &args.rt_route)) == 0 && -+ copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ res = -EFAULT; -+ -+ return (res); -+ } -+ -+ case ELAN4IO_CHECK_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = user_check_route (uctx, args.rt_process, &args.rt_route, &args.rt_error)) == 0 && -+ copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ res = -EFAULT; -+ -+ return (res); -+ } -+ -+ case ELAN4IO_ALLOCCQ: -+ { -+ ELAN4IO_ALLOCCQ_STRUCT args; -+ USER_CQ *ucq; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ALLOCCQ_STRUCT))) -+ return (-EFAULT); -+ -+ ucq = user_alloccq (uctx, args.cq_size & CQ_SizeMask, args.cq_perm & CQ_PermissionMask, -+ (args.cq_type & ELAN4IO_CQ_TYPE_REORDER) ? UCQ_REORDER : 0); -+ if (IS_ERR (ucq)) -+ return PTR_ERR (ucq); -+ -+ args.cq_indx = elan4_cq2idx (ucq->ucq_cq); -+ -+ if (copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ALLOCCQ_STRUCT))) -+ { -+ user_dropcq (uctx, ucq); -+ return (-EFAULT); -+ } -+ -+ /* don't drop the reference on the cq until the context is freed, -+ * or the caller explicitly frees the cq */ -+ return (0); -+ } -+ -+ case ELAN4IO_FREECQ: -+ { -+ USER_CQ *ucq; -+ unsigned indx; -+ -+ if (copy_from_user (&indx, (void *) arg, sizeof (unsigned))) -+ return (-EFAULT); -+ -+ if ((ucq = user_findcq (uctx, indx)) == NULL) /* can't free unallocated cq */ -+ return (-EINVAL); -+ -+ user_dropcq (uctx, ucq); /* drop the reference we've just taken */ -+ -+ if ((ucq->ucq_flags & UCQ_SYSTEM)) /* can't free device driver cq */ -+ return (-EINVAL); -+ -+ user_dropcq (uctx, ucq); /* and the one held from the alloccq call */ -+ -+ return (0); -+ } -+ -+ case ELAN4IO_DUMPCQ: -+ { -+ ELAN4IO_DUMPCQ_STRUCT args; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CQ *ucq; -+ void *buf; -+ int i; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof(ELAN4IO_DUMPCQ_STRUCT))) -+ return (-EFAULT); -+ -+ if ((ucq = user_findcq (uctx, args.cq_indx)) == NULL) -+ return (-EINVAL); -+ -+ if (args.bufsize) -+ { -+ E4_uint32 usedBufSize = min(args.cq_size, args.bufsize); -+ -+ KMEM_ALLOC (buf, void *, usedBufSize, 1); -+ -+ if (buf == NULL) -+ return (-ENOMEM); -+ -+ for (i=0; iucq_cq->cq_space + i); -+ -+ if (copy_to_user((void *)args.buffer, buf, usedBufSize)) -+ { -+ KMEM_FREE(buf, args.bufsize); -+ return (-EFAULT); -+ } -+ KMEM_FREE(buf, usedBufSize); -+ args.bufsize = usedBufSize; -+ } -+ -+ args.cq_size = CQ_Size(ucq->ucq_cq->cq_size); -+ args.cq_space = ucq->ucq_cq->cq_space; -+ -+ -+ if (copy_to_user((void *)arg, &args, sizeof(ELAN4IO_DUMPCQ_STRUCT))) -+ { -+ return (-EFAULT); -+ } -+ -+ user_dropcq (uctx, ucq); /* drop the reference we've just taken */ -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SETPERM: -+ { -+ ELAN4IO_PERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_setperm (uctx, args.ps_maddr, args.ps_eaddr, args.ps_len, args.ps_perm)); -+ } -+ -+ case ELAN4IO_CLRPERM: -+ { -+ ELAN4IO_PERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT))) -+ return (-EFAULT); -+ -+ user_clrperm (uctx, args.ps_eaddr, args.ps_len); -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPSIG: -+ { -+ ELAN4IO_TRAPSIG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPSIG_STRUCT))) -+ return (-EFAULT); -+ -+ pr->pr_uctx->uctx_trap_pid = current->pid; -+ pr->pr_uctx->uctx_trap_signo = args.ts_signo; -+ -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPHANDLER: -+ { -+ ELAN4IO_TRAPHANDLER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPHANDLER_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_trap_handler (pr->pr_uctx, (ELAN4_USER_TRAP *)args.th_trapp, args.th_nticks)); -+ } -+ -+ case ELAN4IO_REQUIRED_MAPPINGS: -+ { -+ ELAN4IO_REQUIRED_MAPPINGS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_REQUIRED_MAPPINGS_STRUCT))) -+ return (-EFAULT); -+ -+ pr->pr_uctx->uctx_upage_addr = args.rm_upage_addr; -+ pr->pr_uctx->uctx_trestart_addr = args.rm_trestart_addr; -+ -+ return (0); -+ } -+ -+ case ELAN4IO_ALLOC_TRAP_QUEUES: -+ { -+ ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_alloc_trap_queues (uctx, args.tq_ndproc_traps, args.tq_neproc_traps, -+ args.tq_ntproc_traps, args.tq_nthreads, args.tq_ndmas)); -+ } -+ -+ case ELAN4IO_RESUME_EPROC_TRAP: -+ { -+ ELAN4IO_RESUME_EPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_EPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_eproc_trap (pr->pr_uctx, args.rs_addr)); -+ } -+ -+ case ELAN4IO_RESUME_CPROC_TRAP: -+ { -+ ELAN4IO_RESUME_CPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_CPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_cproc_trap (pr->pr_uctx, args.rs_indx)); -+ } -+ -+ case ELAN4IO_RESUME_DPROC_TRAP: -+ { -+ ELAN4IO_RESUME_DPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_DPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_dproc_trap (pr->pr_uctx, &args.rs_desc)); -+ } -+ -+ case ELAN4IO_RESUME_TPROC_TRAP: -+ { -+ ELAN4IO_RESUME_TPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_TPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_tproc_trap (pr->pr_uctx, &args.rs_regs)); -+ } -+ -+ case ELAN4IO_RESUME_IPROC_TRAP: -+ { -+ ELAN4IO_RESUME_IPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_IPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_iproc_trap (pr->pr_uctx, args.rs_channel, args.rs_trans, -+ &args.rs_header, &args.rs_data)); -+ } -+ -+ case ELAN4IO_FLUSH_ICACHE: -+ elan4_flush_icache (&uctx->uctx_ctxt); -+ return (0); -+ -+ case ELAN4IO_STOP_CTXT: -+ if (arg) -+ user_swapout (uctx, UCTX_USER_STOPPED); -+ else -+ user_swapin (uctx, UCTX_USER_STOPPED); -+ return (0); -+ -+ case ELAN4IO_ALLOC_INTCOOKIE_TABLE: -+ { -+ ELAN_CAPABILITY *cap; -+ INTCOOKIE_TABLE *tbl; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else -+ { -+ tbl = intcookie_alloc_table(cap); -+ -+ if (tbl == NULL) -+ res = -ENOMEM; -+ else -+ { -+ /* Install the intcookie table we've just created */ -+ spin_lock (&uctx->uctx_spinlock); -+ if (uctx->uctx_intcookie_table != NULL) -+ res = -EBUSY; -+ else -+ uctx->uctx_intcookie_table = tbl; -+ spin_unlock (&uctx->uctx_spinlock); -+ -+ /* drop the table we created if there already was one */ -+ if (res != 0) -+ intcookie_free_table (tbl); -+ } -+ } -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ -+ return (res); -+ } -+ -+ case ELAN4IO_FREE_INTCOOKIE_TABLE: -+ { -+ INTCOOKIE_TABLE *tbl; -+ -+ spin_lock (&uctx->uctx_spinlock); -+ tbl = uctx->uctx_intcookie_table; -+ uctx->uctx_intcookie_table = NULL; -+ spin_unlock (&uctx->uctx_spinlock); -+ -+ if (tbl != NULL) -+ intcookie_free_table (tbl); -+ -+ return (tbl == NULL ? -EINVAL : 0); -+ } -+ -+ case ELAN4IO_ALLOC_INTCOOKIE: -+ { -+ /* For backwards compatibility with the old libs (pre 1.8.0) -+ * we allocate an intcookie table on the first cookie -+ * alloc if one hasn't be created already -+ */ -+ if (uctx->uctx_intcookie_table == NULL) -+ { -+ ELAN_CAPABILITY *cap; -+ INTCOOKIE_TABLE *tbl; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ /* Create a dummy capability */ -+ elan_nullcap(cap); -+ -+ /* Must be unique for each process on a node */ -+ cap->cap_mycontext = (int) ELAN4_TASK_HANDLE(); -+ -+ /* Create a new intcookie table */ -+ tbl = intcookie_alloc_table(cap); -+ -+ /* Hang intcookie table off uctx */ -+ spin_lock (&uctx->uctx_spinlock); -+ if (uctx->uctx_intcookie_table == NULL) -+ { -+ uctx->uctx_intcookie_table = tbl; -+ spin_unlock (&uctx->uctx_spinlock); -+ } -+ else -+ { -+ spin_unlock (&uctx->uctx_spinlock); -+ intcookie_free_table(tbl); -+ } -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ } -+ -+ return (intcookie_alloc (uctx->uctx_intcookie_table, arg)); -+ } -+ -+ case ELAN4IO_FREE_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_free (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_ARM_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_arm (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_WAIT_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_wait (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_FIRE_INTCOOKIE: -+ { -+ ELAN4IO_FIRECAP_STRUCT *args; -+ -+ KMEM_ALLOC (args, ELAN4IO_FIRECAP_STRUCT *, sizeof (ELAN4IO_FIRECAP_STRUCT), 1); -+ if ((args == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN4IO_FIRECAP_STRUCT))) -+ res = -EFAULT; -+ else -+ res = intcookie_fire_cap (&args->fc_capability, args->fc_cookie); -+ -+ KMEM_FREE(args, sizeof(*args)); -+ -+ return (res); -+ } -+ -+ case ELAN4IO_NETERR_MSG: -+ { -+ ELAN4IO_NETERR_MSG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_NETERR_MSG_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_send_neterr_msg (uctx, args.nm_vp, args.nm_nctx, args.nm_retries, &args.nm_msg)); -+ } -+ -+ case ELAN4IO_NETERR_TIMER: -+ { -+ unsigned long ticks = ((unsigned long) arg * HZ) / 1000; -+ -+ PRINTF (uctx, DBG_NETERR, "elan4_neterr_timer: arg %ld inc %ld\n", arg, ticks); -+ -+ mod_timer (&uctx->uctx_neterr_timer, (jiffies + (ticks > 0 ? ticks : 1))); -+ return 0; -+ } -+ -+ case ELAN4IO_NETERR_FIXUP: -+ { -+ ELAN4IO_NETERR_FIXUP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_NETERR_FIXUP_STRUCT))) -+ return (-EFAULT); -+ -+ if (args.nf_sten) -+ return (user_neterr_sten (uctx, args.nf_vp, args.nf_cookie, args.nf_waitforeop)); -+ else -+ return (user_neterr_dma (uctx, args.nf_vp, args.nf_cookie, args.nf_waitforeop)); -+ } -+ -+ case ELAN4IO_LOAD_TRANSLATION: -+ { -+ ELAN4IO_TRANSLATION_STRUCT args; -+ unsigned long base, top; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRANSLATION_STRUCT))) -+ return (-EFAULT); -+ -+ top = (args.tr_addr + args.tr_len - 1) | (PAGE_SIZE-1); -+ base = args.tr_addr & PAGE_MASK; -+ -+ return user_load_range (uctx, base, top - base + 1, args.tr_access); -+ } -+ case ELAN4IO_UNLOAD_TRANSLATION: -+ { -+ ELAN4IO_TRANSLATION_STRUCT args; -+ unsigned long base, top; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRANSLATION_STRUCT))) -+ return (-EFAULT); -+ -+ top = (args.tr_addr + args.tr_len - 1) | (PAGE_SIZE-1); -+ base = args.tr_addr & PAGE_MASK; -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, base, top - base + 1); -+ -+ return 0; -+ } -+ -+ default: -+ PRINTF (uctx, DBG_FILE, "user_ioctl: invalid ioctl %x\n", cmd); -+ return (-EINVAL); -+ } -+} -+ -+static void -+user_vma_open (struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (uctx, DBG_FILE, "user_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ elan4_getcqa (&uctx->uctx_ctxt, pgoff); -+} -+ -+static void -+user_vma_close (struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (uctx, DBG_FILE, "user_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the same comments apply as mem_vma_close */ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ if (elan4_getcqa (&uctx->uctx_ctxt, pgoff) != NULL) -+ { -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* drop the reference we've just taken */ -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* and the one held by the mmap */ -+ } -+} -+ -+struct vm_operations_struct user_vm_ops = { -+ open: user_vma_open, -+ close: user_vma_close, -+}; -+ -+static int -+user_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN4_CQA *cqa; -+ unsigned long addr; -+ unsigned long pgoff; -+ int res; -+ ioaddr_t ioaddr; -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ vma->vm_ops = &user_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ switch (pgoff) -+ { -+ default: -+ PRINTF (uctx, DBG_FILE, "user_mmap: command queue %ld mapping at %lx\n", pgoff, addr); -+ -+ if ((cqa = elan4_getcqa (&uctx->uctx_ctxt, pgoff)) == NULL) -+ { -+ res = -EINVAL; -+ goto failed; -+ } -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: cqa=%p idx=%d num=%d ref=%d\n", cqa, cqa->cqa_idx, cqa->cqa_cqnum, cqa->cqa_ref); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (! (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_WRITE_COMBINE) && (cqa->cqa_type & CQ_Reorder) != 0) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (%lx, %lx, %lx, %lx)\n", -+ addr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (cqa->cqa_cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize, PAGE_SIZE, -+ vma->vm_page_prot); -+ -+ if (__io_remap_page_range (addr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (cqa->cqa_cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize, -+ PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range failed\n"); -+ -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_USER_REGS: -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ ioaddr = pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + ELAN4_REVA_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ ioaddr = pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + ELAN4_REVB_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ default: -+ res = -EINVAL; -+ goto failed; -+ } -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: user_regs at %lx ioaddr %lx prot %lx\n", -+ addr, ioaddr, vma->vm_page_prot.pgprot); -+ -+ if (__io_remap_page_range (addr, (ioaddr & PAGEMASK), PAGE_SIZE, vma->vm_page_prot)) -+ { -+ res = -EAGAIN; -+ goto failed; -+ } -+ -+ break; -+ -+ case ELAN4_OFF_USER_PAGE: -+ PRINTF (uctx, DBG_FILE, "user_mmap: shared user page - kaddr=%lx uaddr=%lx phys=%lx\n", -+ uctx->uctx_upage, addr, kmem_to_phys (uctx->uctx_upage)); -+ -+ /* we do not want to have this area swapped out, lock it */ -+ vma->vm_flags |= VM_LOCKED; -+ -+ /* Mark the page as reserved or else the remap_page_range() doesn't remap it */ -+ SetPageReserved(pte_page(*find_pte_kernel((unsigned long) uctx->uctx_upage))); -+ -+ if (__remap_page_range (addr, kmem_to_phys (uctx->uctx_upage), PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (user_page) failed\n"); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_TPROC_TRAMPOLINE: -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: tproc trampoline - kaddr=%lx uaddr=%lx phys=%lx\n", uctx->uctx_trampoline, addr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + uctx->uctx_trampoline + (addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT))); -+ -+ if (__io_remap_page_range (addr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + -+ uctx->uctx_trampoline + (addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)), -+ PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (tproc_trampoline) failed\n"); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_DEVICE_STATS: -+ printk ("user_mmap: device_stats\n"); -+ break; -+ } -+ -+ } -+ -+ return (0); -+ -+ failed: -+ for (addr -= PAGE_SIZE, pgoff--; addr >= vma->vm_start; addr -= PAGE_SIZE, pgoff--) -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* drop the reference we've just taken */ -+ return (res); -+} -+ -+int -+user_pteload (struct vm_area_struct *vma, unsigned long maddr, USER_CTXT *uctx, E4_Addr eaddr, int perm) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ unsigned long pgoff = vma->vm_pgoff + ((maddr - vma->vm_start) >> PAGE_SHIFT); -+ register int i, res; -+ -+ if (pr->pr_uctx != uctx) -+ return -EINVAL; -+ -+ switch (pgoff) -+ { -+ default: -+ { -+ ELAN4_CQA *cqa; -+ unsigned long cqaddr; -+ -+ if ((cqa = elan4_getcqa (&uctx->uctx_ctxt, pgoff)) == NULL) -+ return -EINVAL; -+ -+ cqaddr = (cqa->cqa_cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = ((cqaddr + i) >> PTE_PADDR_SHIFT) | PTE_SetPerm (perm) | PTE_CommandQueue; -+ -+ if ((res = elan4mmu_pteload (&uctx->uctx_ctxt, 0, eaddr + i, HE_TYPE_COMMAND, newpte)) < 0) -+ return res; -+ } -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); -+ -+ return 0; -+ } -+ -+ case ELAN4_OFF_USER_REGS: -+ { -+ u32 blow, bhigh; -+ physaddr_t ioaddr; -+ -+ /* compute a local pci address from our register BAR */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_BASE_ADDRESS_2, &blow); -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_BASE_ADDRESS_3, &bhigh); -+ -+ ioaddr = (((physaddr_t) bhigh) << 32) | (blow & PCI_BASE_ADDRESS_MEM_MASK); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ ioaddr |= ELAN4_REVA_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ ioaddr |= ELAN4_REVB_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = (((ioaddr & PAGE_MASK) | i) >> PTE_PADDR_SHIFT) | PTE_SetPerm (perm) | PTE_PciNotLocal; -+ -+ if ((res = elan4mmu_pteload (&uctx->uctx_ctxt, 0, eaddr + i, HE_TYPE_REGS, newpte)) < 0) -+ return res; -+ } -+ -+ return 0; -+ } -+ -+ case ELAN4_OFF_USER_PAGE: -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ if ((res = elan4mmu_pteload_page (&uctx->uctx_ctxt, 0, eaddr, pte_page(*find_pte_kernel((unsigned long) uctx->uctx_upage)), perm)) < 0) -+ return res; -+ return 0; -+ -+ case ELAN4_OFF_TPROC_TRAMPOLINE: -+ { -+ sdramaddr_t trampoline = uctx->uctx_trampoline + (maddr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)); -+ -+ if (! elan4mmu_sdram_aliascheck (&uctx->uctx_ctxt, eaddr, trampoline)) -+ return -EINVAL; -+ -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = ((trampoline + i) >> PTE_PADDR_SHIFT) | PTE_SetPerm (perm); -+ -+ if ((res = elan4mmu_pteload (&uctx->uctx_ctxt, 0, eaddr + i, HE_TYPE_SDRAM, newpte)) < 0) -+ return res; -+ } -+ return 0; -+ } -+ } -+} -+ -+/* driver entry points */ -+static int -+elan4_open (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_open: device %d minor %d file=%p\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), file); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_open (inode, file)); -+ case ELAN4_MINOR_MEM: -+ return (mem_open (inode, file)); -+ case ELAN4_MINOR_USER: -+ return (user_open (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan4_release (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_release: device %d minor %d file=%p\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), file); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_release (inode, file)); -+ case ELAN4_MINOR_MEM: -+ return (mem_release (inode, file)); -+ case ELAN4_MINOR_USER: -+ return (user_release (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan4_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl: device %d minor %d cmd %x\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), cmd); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_ioctl (inode, file, cmd, arg)); -+ case ELAN4_MINOR_MEM: -+ return (mem_ioctl (inode, file, cmd, arg)); -+ case ELAN4_MINOR_USER: -+ return (user_ioctl (inode, file, cmd, arg)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+elan4_ioctl32 (unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file) -+{ -+ struct inode *inode = file->f_dentry->d_inode; -+ extern int sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg); -+ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl32: device %d minor %d cmd %x\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), cmd); -+ -+ if (ELAN4_MINOR (inode) == ELAN4_MINOR_USER) -+ { -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ -+ if (current->mm != pr->pr_mm) -+ return -EINVAL; -+ -+ switch (cmd) -+ { -+ case ELAN4IO_SETPERM32: -+ { -+ ELAN4IO_PERM_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: setperm maddr=%x eaddr=%llx len=%llxx perm=%d\n", -+ args.ps_maddr, args.ps_eaddr,args.ps_len, args.ps_perm); -+ -+ return (user_setperm (uctx, args.ps_maddr, args.ps_eaddr, args.ps_len, args.ps_perm)); -+ } -+ -+ case ELAN4IO_CLRPERM32: -+ { -+ ELAN4IO_PERM_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: clrperm eaddr=%llx len=%ll\n", -+ args.ps_eaddr, args.ps_len); -+ -+ user_clrperm (uctx, args.ps_eaddr, args.ps_len); -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPHANDLER32: -+ { -+ ELAN4IO_TRAPHANDLER_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPHANDLER_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: traphandler trapp=%x nticks=%d\n", -+ args.th_trapp, args.th_nticks); -+ -+ return (user_trap_handler (pr->pr_uctx, (ELAN4_USER_TRAP *)(unsigned long)args.th_trapp, args.th_nticks)); -+ } -+ } -+ } -+ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl32: fd=%d cmd=%x arg=%lx file=%p\n", fd, cmd, arg, file); -+ return (sys_ioctl (fd, cmd, arg)); -+} -+#endif -+ -+ -+ -+static int -+elan4_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_mmap: instance %d minor %d start=%lx end=%lx pgoff=%lx\n", -+ ELAN4_DEVICE (file->f_dentry->d_inode), ELAN4_MINOR (file->f_dentry->d_inode), -+ vma->vm_start, vma->vm_end, vma->vm_pgoff); -+ -+ switch (ELAN4_MINOR (file->f_dentry->d_inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_mmap (file, vma)); -+ case ELAN4_MINOR_MEM: -+ return (mem_mmap (file, vma)); -+ case ELAN4_MINOR_USER: -+ return (user_mmap (file, vma)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+void -+elan4_update_intel_p64h2 (ELAN4_DEV *dev, struct pci_dev *bridge) -+{ -+ u16 cnf; -+ -+ pci_read_config_word (bridge, 0x40 /* CNF */, &cnf); -+ -+ /* We expect the CNF register to be configured as follows -+ * -+ * [8] == 1 PMODE PCI Mode -+ * [7:6] == 2/3 PFREQ PCI Frequency (100/133) -+ * [5] == 0 RSDIS Restreaming Disable -+ * [4:3] == 0x PP Prefetch Policy -+ * [2] == 0 DTD Delayed Transaction Depth -+ * [1:0] == 10 MDT MaximumDelaedTransactions -+ */ -+ -+ if ((cnf & (1 << 8)) == 0) -+ printk ("elan%d: strangeness - elan reports PCI-X but P64H2 reports PCI mode !\n", dev->dev_instance); -+ else if ((cnf & 0xb7) != 0x82 && (cnf & 0xb7) != 0x84 && optimise_pci_bus < 2) -+ printk ("elan%d: P64H2 CNF is not configured as expected : RSDIS=%d PP=%d DTD=%d MDT=%d\n", -+ dev->dev_instance, (cnf >> 5) & 1, (cnf >> 3) & 3, (cnf >> 2) & 1, cnf & 3); -+ else -+ { -+ switch ((cnf >> 6) & 3) -+ { -+ case 2: /* PCI-X 100 */ -+ pci_write_config_word (bridge, 0xfc /* PC100 */, 0x7777); -+ -+ printk ("elan%d: optimise P64H2 : setting MDT=0, DTD=1, PFC=777 for PCI-X 100\n", dev->dev_instance); -+ -+ break; -+ -+ case 3: /* PCI-X 133 */ -+ pci_write_config_word (bridge, 0xfe /* PC133 */, 0x7777); -+ -+ printk ("elan%d: optimise P64H2 : setting MDT=0, DTD=1, PFC=777 for PCI-X 133\n", dev->dev_instance); -+ break; -+ } -+ -+ pci_write_config_word (bridge, 0x40 /* CNF */, (cnf & 0xfff8) | 0x4); /* DTD=1 MDT=0 */ -+ } -+} -+ -+int -+elan4_optimise_intel_p64h2 (ELAN4_DEV *dev, struct pci_dev *pdev) -+{ -+ struct pci_bus *bus = pdev->bus; -+ struct pci_dev *bridge = bus->self; -+ unsigned int devcount = 0; -+ u8 revision; -+ u32 ectrl; -+ struct list_head *el; -+ -+ pci_read_config_dword (pdev, PCI_ELAN_CONTROL, &ectrl); -+ -+ /* We can only run in PCI-Xmode with a B1 stepping P64H2 because of P64H2 Errata 3 */ -+ pci_read_config_byte (bridge, PCI_REVISION_ID, &revision); -+ if (revision < 0x04) -+ { -+ if ((ectrl & ECTRL_INITIALISATION_MODE) != Pci2_2) -+ { -+ static const char *p64h2_stepping[4] = {"UNKNOWN", "UNKNOWN", "UNKNOWN", "B0"}; -+ -+ printk ("elan%d: unable to use device because of P64H2 Errata 3 on\n" -+ " %s stepping part and running in a PCI-X slot\n", -+ dev->dev_instance, p64h2_stepping[revision]); -+ return -EINVAL; -+ } -+ } -+ -+ /* We can only alter the bus configuration registers if the Elan is the only device -+ * on the bus ... */ -+ list_for_each (el, &bus->devices) { -+ struct pci_dev *pcip = list_entry (el, struct pci_dev, bus_list); -+ -+ if (pcip == pdev || (pcip->vendor == PCI_VENDOR_ID_INTEL && pcip->device == 0x1462 /* P64H2 HOTPLUG */)) -+ continue; -+ -+ devcount++; -+ } -+ -+ if (devcount > 0 || !list_empty (&bus->children)) -+ { -+ printk ("elan%d: unable to optimise P64H2 settings as %s%s\n", dev->dev_instance, -+ (devcount > 0) ? "more than one device on bus" : "", -+ ! list_empty (&bus->children) ? "has child buses" : ""); -+ return 0; -+ } -+ -+#ifdef __ia64 -+ if ((ectrl & ECTRL_INITIALISATION_MODE) == PciX100to133MHz) -+ { -+ struct pci_dev *pcip; -+ unsigned int sioh_good = 0; -+ unsigned int sioh_downgrade = 0; -+ unsigned int snc_good = 0; -+ unsigned int snc_downgrade = 0; -+ -+ /* Search for the associated SIOH and SNC on ia64, -+ * if we have a C2 SIOH and a C0/C1 SNC, then we can -+ * reconfigure the P64H2 as follows: -+ * CNF:MDT = 0 -+ * CNF:DTD = 1 -+ * CNF:PC133 = 7777 -+ * -+ * if not, then issue a warning that down rev parts -+ * affect bandwidth. -+ */ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_INTEL, 0x500, pcip)); ) -+ { -+ pci_read_config_byte (pcip, PCI_REVISION_ID, &revision); -+ -+ if (revision >= 0x21) -+ snc_good++; -+ else -+ { -+ printk ("elan%d: SNC revision %x (%s)\n", dev->dev_instance, revision, -+ revision == 0x00 ? "A0" : revision == 0x01 ? "A1" : -+ revision == 0x02 ? "A2" : revision == 0x03 ? "A3" : -+ revision == 0x10 ? "B0" : revision == 0x20 ? "C0" : -+ revision == 0x21 ? "C1" : "UNKNOWN"); -+ -+ snc_downgrade++; -+ } -+ } -+ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_INTEL, 0x510, pcip)) != NULL; ) -+ { -+ pci_read_config_byte (pcip, PCI_REVISION_ID, &revision); -+ -+ -+ if (revision >= 0x22) -+ sioh_good++; -+ else -+ { -+ printk ("elan%d: SIOH revsision %x (%s)\n", dev->dev_instance, revision, -+ revision == 0x10 ? "C0" : revision == 0x20 ? "C0" : -+ revision == 0x21 ? "C1" : revision == 0x22 ? "C2" : "UNKNOWN"); -+ -+ sioh_downgrade++; -+ } -+ } -+ -+ if (optimise_pci_bus < 2 && (sioh_downgrade || snc_downgrade)) -+ printk ("elan%d: unable to optimise as SNC/SIOH below required C1/C2 steppings\n", dev->dev_instance); -+ else if (optimise_pci_bus < 2 && (sioh_good == 0 || snc_good == 0)) -+ printk ("elan%d: unable to optimise as cannot determine SNC/SIOH revision\n", dev->dev_instance); -+ else -+ elan4_update_intel_p64h2 (dev, bridge); -+ } -+#endif -+ -+#ifdef __i386 -+ if ((ectrl & ECTRL_INITIALISATION_MODE) == PciX100to133MHz) -+ elan4_update_intel_p64h2 (dev, bridge); -+#endif -+ return 0; -+} -+ -+int -+elan4_optimise_intel_pxh (ELAN4_DEV *dev, struct pci_dev *pdev) -+{ -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] |= ELAN4_FEATURE_64BIT_READ; -+ -+ return 0; -+} -+ -+void -+elan4_optimise_serverworks_ciobx2 (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pdev = dev->dev_osdep.pdev; -+ struct pci_dev *pcip; -+ unsigned char bus; -+ unsigned int dor; -+ -+ /* Find the CIOBX2 for our bus number */ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_SERVERWORKS, 0x0101, pcip)) != NULL;) -+ { -+ pci_read_config_byte (pcip, 0x44 /* BUSNUM */, &bus); -+ -+ if (pdev->bus->number == bus) -+ { -+ printk ("elan%d: optimise CIOBX2 : setting DOR to disable read pipe lining\n", dev->dev_instance); -+ -+ pci_read_config_dword (pcip, 0x78 /* DOR */, &dor); -+ pci_write_config_dword (pcip, 0x78 /* DOR */, dor | (1 << 16)); -+ -+ printk ("elan%d: disabling write-combining on ServerWorks chipset\n", dev->dev_instance); -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] |= ELAN4_FEATURE_NO_WRITE_COMBINE; -+ } -+ } -+} -+ -+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_PPC_PSERIES) || defined(__alpha) -+int -+elan4_optimise_pci_map (ELAN4_DEV *dev, unsigned int features) -+{ -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] |= features; -+ -+ if (pci_set_dma_mask (dev->dev_osdep.pdev, (u64) 0XFFFFFFFFFFFFFFFFull) || -+ pci_set_consistent_dma_mask (dev->dev_osdep.pdev, (u64) 0XFFFFFFFFFFFFFFFFull)) -+ { -+ printk (KERN_ERR "elan%d: unable to set DAC mode\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+#endif -+ -+int -+elan4_optimise_bus (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pdev = dev->dev_osdep.pdev; -+ -+ if (pdev->bus && pdev->bus->self) -+ { -+ struct pci_dev *bridge = pdev->bus->self; -+ -+ if (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x1460 /* Intel P64H2 */) -+ return elan4_optimise_intel_p64h2 (dev, pdev); -+ -+ /* See http://pciids.sourceforge.net/iii/?i=8086 */ -+ -+ if ((bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0329) /* Intel 6700PXH Fn 0 */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x032a) /* Intel 6700PXH Fn 2 */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x032c) /* Intel 6702PXH */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0320) /* Intel PXH-D */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0330) /* Intel 80332 (A segment) */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0332) /* Intel 80332 (B segment) */ -+ ) -+ return elan4_optimise_intel_pxh (dev, pdev); -+ } -+ -+ if (pci_find_device (PCI_VENDOR_ID_HP, 0x122e, NULL) != NULL) /* on HP ZX1 set the relaxed ordering */ -+ dev->dev_pteval = PTE_RelaxedOrder; /* bit to get better DMA bandwidth. */ -+ -+ if (pci_find_device (PCI_VENDOR_ID_SERVERWORKS, 0x0101, NULL) != NULL) /* ServerWorks CIOBX2 */ -+ elan4_optimise_serverworks_ciobx2 (dev); -+ -+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_PPC_PESERIES) -+ return elan4_optimise_pci_map (dev, ELAN4_FEATURE_NO_WRITE_COMBINE | ELAN4_FEATURE_PCI_MAP | ELAN4_FEATURE_NO_PREFETCH); -+#endif -+ -+#ifdef __alpha -+ return elan4_optimise_pci_map (dev, ELAN4_FEATURE_PCI_MAP); -+#endif -+ -+#ifdef __sparc -+ if (! (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_PCI_MAP)) -+ dev->dev_pteval |= (0xfffe000000000000 >> PTE_PADDR_SHIFT); -+#endif -+ -+ return 0; -+} -+ -+int -+elan4_pciinit (ELAN4_DEV *dev) -+{ -+ int res; -+ u32 value; -+ u16 command; -+ u8 cacheline; -+ unsigned long flags; -+ -+ if (optimise_pci_bus && (res = elan4_optimise_bus (dev)) <0) -+ return (res); -+ -+ if ((res = pci_enable_device (dev->dev_osdep.pdev)) < 0) -+ return (res); -+ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ if ((value & ECTRL_INITIALISATION_MODE) == Pci2_2) -+ printk ("elan%d: is an elan4%c (PCI-2.2)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ else -+ { -+ switch (value & ECTRL_INITIALISATION_MODE) -+ { -+ case PciX50To66MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 50-66)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ case PciX66to100MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 66-100)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ case PciX100to133MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 100-133)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ default: -+ printk ("elan%d: Invalid PCI-X mode\n", dev->dev_instance); -+ return (-EINVAL); -+ } -+ } -+ -+ /* initialise the elan pll control register */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, &value); -+ -+ if (elan4_pll_cfg) -+ { -+ printk ("elan%d: setting pll control to %08x\n", dev->dev_instance, elan4_pll_cfg); -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, elan4_pll_cfg); -+ } -+ else -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | ECTRL_SYS_CLOCK_RATIO_4_3); -+ else -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | ECTRL_SYS_CLOCK_RATIO_6_5 | SysPll_FeedForwardISel0 | SysPll_FeedForwardISel1); -+ } -+ -+ /* initialise the elan control register */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ -+ value = ((15 << ECTRL_IPROC_HIGH_PRI_TIME_SHIFT) | -+ (15 << ECTRL_OTHER_HIGH_PRI_TIME_SHIFT) | -+ (value & ECTRL_28_NOT_30_BIT_LOCAL_BAR) | -+ (dev->dev_topaddrmode ? ECTRL_ExtraMasterAddrBits : 0) | -+ ECTRL_ENABLE_LATENCY_RESET | -+ ECTRL_ENABLE_WRITEBURSTS | -+ ECTRL_ENABLE_2_2READBURSTS); -+ -+#ifdef LINUX_SPARC -+ value &= ~(ECTRL_ENABLE_LATENCY_RESET | ECTRL_ENABLE_WRITEBURSTS); -+#endif -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value | ECTRL_SOFTWARE_INTERNAL_RESET); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ /* Delay 10ms here if we've changed the sysclock ratio */ -+ /* to allow the PLL to stabalise before proceeding */ -+ udelay (10000); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ { -+ unsigned char val = read_i2c (dev, I2cLedsValue); -+ -+ /* On RevB we have to explicitly reset the PLLs */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_COMMAND, &command); -+ -+ write_i2c (dev, I2cLedsValue, val | 0x80); -+ udelay (1000); -+ -+ /* Issue the PLL counter reset and immediately inhibit all pci interaction -+ * while the PLL is recovering. The write to the PCI_COMMAND register has -+ * to occur within 50uS of the write to the i2c registers */ -+ local_irq_save (flags); -+ write_i2c (dev, I2cLedsValue, val & ~0x80); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, (1 << 10) /* PCI_COMMAND_DISABLE_INT */); -+ local_irq_restore (flags); -+ -+ /* Wait for the write to occur and for the PLL to regain lock */ -+ udelay (20000); udelay (20000); -+ -+ /* Re-enable pci interaction and clear any spurious errors deteced */ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_STATUS, PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, command); -+ break; -+ } -+ } -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value); -+ -+ /* Enable master accesses */ -+ pci_set_master (dev->dev_osdep.pdev); -+ -+ /* Verify that the memWrInvalidate bit is set */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_COMMAND, &command); -+ pci_read_config_byte (dev->dev_osdep.pdev, PCI_CACHE_LINE_SIZE, &cacheline); -+ -+ if ((command & PCI_COMMAND_INVALIDATE) == 0) -+ { -+ printk ("elan%d: enable MemWrInvalidate (cacheline %d)\n", -+ dev->dev_instance, cacheline * 4); -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, command | PCI_COMMAND_INVALIDATE); -+ } -+ -+ if (pci_request_regions(dev->dev_osdep.pdev, "elan4")) -+ return -ENODEV; -+ -+ /* add the interrupt handler */ -+ if (request_irq (dev->dev_osdep.pdev->irq, elan4_irq, SA_SHIRQ, "elan4", dev) != 0) -+ { -+ pci_release_regions (dev->dev_osdep.pdev); -+ return -ENXIO; -+ } -+ -+ return (0); -+} -+ -+void -+elan4_updatepll (ELAN4_DEV *dev, unsigned int val) -+{ -+ u32 value; -+ -+ if (elan4_pll_cfg == 0) -+ { -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, &value); -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | val); -+ -+ /* Delay 10ms here if we've changed the sysclock ratio */ -+ /* to allow the PLL to stabalise before proceeding */ -+ udelay (10000); -+ } -+} -+ -+void -+elan4_pcifini (ELAN4_DEV *dev) -+{ -+ u32 value; -+ -+ /* release the interrupt handler */ -+ free_irq (dev->dev_osdep.pdev->irq, dev); -+ -+ /* release the address space */ -+ pci_release_regions (dev->dev_osdep.pdev); -+ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value | ECTRL_SOFTWARE_INTERNAL_RESET); -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value); -+ -+ pci_disable_device (dev->dev_osdep.pdev); -+} -+ -+void -+elan4_pcierror (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pci = dev->dev_osdep.pdev; -+ u8 type; -+ u16 status, cmd; -+ u32 physlo, physhi, control; -+ -+ printk("elan%d: pci error has occurred\n", dev->dev_instance); -+ -+ pci_read_config_word (pci, PCI_STATUS, &status); -+ pci_read_config_word (pci, PCI_COMMAND, &cmd); -+ pci_read_config_dword (pci, PCI_ELAN_CONTROL, &control); -+ -+ if (control & ECTRL_REC_SPLIT_COMP_MESSAGE) -+ { -+ u32 message, attr; -+ -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control & ~ECTRL_SELECT_SPLIT_MESS_ATTR); -+ pci_read_config_dword (pci, PCI_ELAN_SPLIT_MESSAGE_VALUE, &message); -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control | ECTRL_SELECT_SPLIT_MESS_ATTR); -+ pci_read_config_dword (pci, PCI_ELAN_SPLIT_MESSAGE_VALUE, &attr); -+ -+ printk ("elan%d: pcierror - received split completion message - attr=%08x, message=%08x\n", -+ dev->dev_instance, attr, message); -+ -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control | ECTRL_REC_SPLIT_COMP_MESSAGE); /* clear the error */ -+ } -+ else -+ { -+ pci_read_config_dword (pci, PCI_ELAN_PARITY_ADDR_LO, &physlo); -+ pci_read_config_dword (pci, PCI_ELAN_PARITY_ADDR_HI, &physhi); -+ pci_read_config_byte (pci, PCI_ELAN_PARITY_TYPE, &type); -+ -+ printk ("elan%d: pcierror - status %x cmd %4x physaddr %08x%08x type %x\n", -+ dev->dev_instance, status, cmd, physhi, physlo, type); -+ -+ if (status & PCI_STATUS_PARITY) -+ printk ("elan%d: parity error signalled (PERR)\n", dev->dev_instance); -+ if (status & PCI_STATUS_DETECTED_PARITY) -+ printk ("elan%d: detected parity error\n", dev->dev_instance); -+ if (status & PCI_STATUS_REC_MASTER_ABORT) -+ printk ("elan%d: received master abort\n", dev->dev_instance); -+ if (status & PCI_STATUS_REC_TARGET_ABORT) -+ printk ("elan%d: received target abort\n", dev->dev_instance); -+ if (status & PCI_STATUS_SIG_SYSTEM_ERROR) -+ printk ("elan%d: signalled SERR\n", dev->dev_instance); -+ if (status & PCI_STATUS_SIG_TARGET_ABORT) -+ printk ("elan%d: signalled target abort\n", dev->dev_instance); -+ -+ pci_write_config_word (pci, PCI_STATUS, status); /* clear the errors */ -+ } -+ -+ DISABLE_INT_MASK (dev, INT_PciMemErr); -+ -+#ifdef notdef -+ panic ("elan%d: pcierror\n", dev->dev_instance); /* better panic ! */ -+#endif -+} -+ -+static irqreturn_t -+elan4_irq (int irq, void *arg, struct pt_regs *regs) -+{ -+ if (elan4_1msi0 ((ELAN4_DEV *) arg)) -+ return IRQ_HANDLED; -+ else -+ return IRQ_NONE; -+} -+ -+ioaddr_t -+elan4_map_device (ELAN4_DEV *dev, unsigned bar, unsigned off, unsigned size, ELAN4_MAP_HANDLE *handle) -+{ -+ return (ioaddr_t) ioremap_nocache (pci_resource_start (dev->dev_osdep.pdev, bar) + off, size); -+} -+ -+void -+elan4_unmap_device (ELAN4_DEV *dev, ioaddr_t ptr, unsigned size, ELAN4_MAP_HANDLE *handle) -+{ -+ iounmap ((void *) ptr); -+} -+ -+unsigned long -+elan4_resource_len (ELAN4_DEV *dev, unsigned bar) -+{ -+ return (pci_resource_len (dev->dev_osdep.pdev, bar)); -+} -+ -+void -+elan4_configure_writecombining (ELAN4_DEV *dev) -+{ -+#ifdef CONFIG_MTRR -+ dev->dev_osdep.sdram_mtrr = dev->dev_osdep.regs_mtrr = -1; -+#endif -+ -+ if ((dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ return; -+ -+#if (defined(__i386) || defined(__x86_64)) -+ -+#if defined (X86_FEATURE_PAT) -+ -+#ifndef boot_cpu_has -+# define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) -+#endif -+ -+ /* Try to utilise PAT entries which already exist */ -+ if (boot_cpu_has (X86_FEATURE_PAT)) -+ { -+ unsigned int val0, val1, i; -+ int slot = -1; -+ -+ /* Read the IA32CR_PAT MSR register and see if a slot is -+ * set for write-combinig. Note we assume that all CPUs -+ * are configured the same like they're supposed to. */ -+ rdmsr (0x277, val0, val1); -+ -+ /* Check for PAT write combining entry (value 0x01) */ -+ for (i = 0; i < 4; i++, val0 >>= 8) -+ if ((val0 & 0xff) == 0x01) -+ slot = i; -+ for (i = 4; i < 8; i++, val1 >>= 8) -+ if ((val1 & 0xff) == 0x01) -+ slot = i; -+ -+ if (slot >= 0) -+ { -+ printk ("elan%d: using PAT for write combining (slot %d)\n", dev->dev_instance, slot); -+ -+ pat_pteval = ((slot & 4) ? _PAGE_PSE : 0) | ((slot & 2) ? _PAGE_PCD : 0) | ((slot & 1) ? _PAGE_PWT : 0); -+ return; -+ } -+ } -+#endif -+ -+#ifdef CONFIG_MTRR -+ /* try and initialise the MTRR registers to enable write-combining */ -+ dev->dev_osdep.sdram_mtrr = mtrr_add (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ MTRR_TYPE_WRCOMB, 1); -+ if (dev->dev_osdep.sdram_mtrr < 0) -+ printk ("elan%d: cannot configure MTRR for sdram\n", dev->dev_instance); -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ { -+ unsigned int cqreorder = dev->dev_cqcount >> 1; -+ unsigned int cqcount = dev->dev_cqcount - cqreorder; -+ -+ dev->dev_osdep.regs_mtrr = mtrr_add (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (dev->dev_cqoffset + cqreorder) * CQ_CommandMappingSize, -+ CQ_CommandMappingSize * cqcount, -+ MTRR_TYPE_WRCOMB, 1); -+ -+ if (dev->dev_osdep.regs_mtrr < 0) -+ printk ("elan%d: cannot configure MTRR for command ports\n", dev->dev_instance); -+ else -+ { -+ dev->dev_cqreorder = cqreorder; -+ return; -+ } -+ } -+#endif -+ -+ /* Set flag so that userspace knows write-combining is disabled */ -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] |= ELAN4_FEATURE_NO_WRITE_COMBINE; -+#endif -+ -+} -+ -+void -+elan4_unconfigure_writecombining (ELAN4_DEV *dev) -+{ -+#if defined (X86_FEATURE_PAT) -+ if (pat_pteval != -1) return; -+#endif -+ -+#ifdef CONFIG_MTRR -+ if (dev->dev_osdep.sdram_mtrr >=0 ) -+ mtrr_del (dev->dev_osdep.sdram_mtrr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ -+ if (dev->dev_cqreorder && dev->dev_osdep.regs_mtrr >= 0) -+ mtrr_del (dev->dev_osdep.regs_mtrr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (dev->dev_cqoffset + dev->dev_cqreorder) * CQ_CommandMappingSize, -+ CQ_CommandMappingSize * (dev->dev_cqcount >> 1)); -+#endif -+} -+ -+EXPORT_SYMBOL(elan4_reference_device); -+EXPORT_SYMBOL(elan4_dereference_device); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/i2c.c linux-2.6.9/drivers/net/qsnet/elan4/i2c.c ---- clean/drivers/net/qsnet/elan4/i2c.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/i2c.c 2004-01-07 08:37:45.000000000 -0500 -@@ -0,0 +1,248 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: i2c.c,v 1.4 2004/01/07 13:37:45 jon Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/i2c.c,v $*/ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#define I2C_POLL_LIMIT 8 -+ -+static int -+i2c_poll_busy (ELAN4_DEV *dev) -+{ -+ int t = 100; -+ int loop = 0; -+ volatile unsigned char val; -+ -+ /* wait for any led I2C operation to finish */ -+ while (((val = read_i2c (dev, I2cPortControl)) & I2cCntl_I2cPortBusy) && loop++ < I2C_POLL_LIMIT) -+ { -+ DELAY (t); -+ -+ if (t < 500000) -+ t <<= 1; -+ } -+ if (loop >= I2C_POLL_LIMIT) -+ { -+ printk ("elan%d: I2c has timed out waiting for I2cPortBusy to clear!\n", dev->dev_instance); -+ printk ("elan%d: I2cPortControl=%x I2cLedBase=%x I2cStatus=%x\n", -+ dev->dev_instance, val, read_i2c (dev, I2cLedBase), read_i2c (dev, I2cStatus)); -+ } -+ -+ return val; -+} -+ -+static int -+i2c_poll_stopped (ELAN4_DEV *dev) -+{ -+ int t = 100; -+ int loop = 0; -+ unsigned char val=0, newval; -+ -+ /* wait for any led I2C operation to finish. Must see it stopped at least twice */ -+ while (!(((newval = read_i2c (dev, I2cPortControl)) & I2cCntl_I2cStopped) && -+ (val & I2cCntl_I2cStopped)) && -+ (loop++ < I2C_POLL_LIMIT)) -+ { -+ DELAY (t); -+ -+ if (t < 500000) -+ t <<= 1; -+ val = newval; -+ } -+ -+ return val; -+} -+ -+int -+i2c_disable_auto_led_update (ELAN4_DEV *dev) -+{ -+ spin_lock (&dev->dev_i2c_lock); -+ -+ if (dev->dev_i2c_led_disabled++ == 0) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) & ~I2cCntl_I2cUpdatingLedReg); -+ -+ if (! (i2c_poll_stopped (dev) & I2cCntl_I2cStopped)) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) | I2cCntl_I2cUpdatingLedReg); -+ -+ spin_unlock (&dev->dev_i2c_lock); -+ -+ return -EAGAIN; -+ } -+ -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) & ~I2cCntl_SampleNewLedValues); -+ } -+ -+ spin_unlock (&dev->dev_i2c_lock); -+ -+ return 0; -+} -+ -+void -+i2c_enable_auto_led_update (ELAN4_DEV *dev) -+{ -+ spin_lock (&dev->dev_i2c_lock); -+ if (--dev->dev_i2c_led_disabled == 0) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) | I2cCntl_I2cUpdatingLedReg); -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_SampleNewLedValues); -+ } -+ -+ spin_unlock (&dev->dev_i2c_lock); -+} -+ -+int -+i2c_write (ELAN4_DEV *dev, unsigned int address, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, data[i]); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | (i == (count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_read (ELAN4_DEV *dev, unsigned int address, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_READ_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, 0xff); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortRead | ((i == count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ -+ i2c_poll_busy (dev); -+ -+ data[i] = read_i2c (dev, I2cRdData); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_writereg (ELAN4_DEV *dev, unsigned int address, unsigned int reg, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ write_i2c (dev, I2cWrData, reg); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, data[i]); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | ((i == count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ printk (" i2c_writereg: off %d failed\n", i); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_readreg (ELAN4_DEV *dev, unsigned int address, unsigned int reg, unsigned int count, unsigned char *data) -+{ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ write_i2c (dev, I2cWrData, reg); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | I2cCntl_I2cPortGenStopBit); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ return i2c_read (dev, address, count, data); -+} -+ -+int -+i2c_read_rom (ELAN4_DEV *dev, unsigned int addr, unsigned int len, unsigned char *data) -+{ -+ unsigned int top = addr + len; -+ int res; -+ -+ if ((res = i2c_disable_auto_led_update (dev)) == 0) -+ { -+ /* read the rom in chunks that don't span the block boundary */ -+ while (addr < top) -+ { -+ unsigned int thisnob = top - addr; -+ unsigned int blocknob = I2C_24LC16B_BLOCKSIZE - I2C_24LC16B_BLOCKOFFSET(addr); -+ -+ if (thisnob > blocknob) -+ thisnob = blocknob; -+ -+ if ((res = i2c_readreg (dev, I2C_EEPROM_ADDR + I2C_24LC16B_BLOCKADDR(addr), -+ I2C_24LC16B_BLOCKOFFSET(addr), thisnob, data)) < 0) -+ break; -+ -+ addr += thisnob; -+ data += thisnob; -+ } -+ -+ i2c_enable_auto_led_update (dev); -+ } -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/intcookie.c linux-2.6.9/drivers/net/qsnet/elan4/intcookie.c ---- clean/drivers/net/qsnet/elan4/intcookie.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/intcookie.c 2005-02-03 11:24:44.000000000 -0500 -@@ -0,0 +1,371 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: intcookie.c,v 1.15 2005/02/03 16:24:44 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/intcookie.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+static INTCOOKIE_TABLE *intcookie_tables; -+static spinlock_t intcookie_table_lock; -+ -+/* -+ * intcookie_drop_entry: -+ * drop the reference to a cookie held -+ * by the cookie table -+ */ -+static void -+intcookie_drop_entry (INTCOOKIE_ENTRY *ent) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ent->ent_lock, flags); -+ if (--ent->ent_ref != 0) -+ { -+ ent->ent_fired = ent->ent_cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (INTCOOKIE_ENTRY)); -+ } -+} -+ -+void -+intcookie_init() -+{ -+ spin_lock_init (&intcookie_table_lock); -+} -+ -+void -+intcookie_fini() -+{ -+ spin_lock_destroy (&intcookie_table_lock); -+} -+ -+INTCOOKIE_TABLE * -+intcookie_alloc_table (ELAN_CAPABILITY *cap) -+{ -+ INTCOOKIE_TABLE *tbl, *ntbl; -+ ELAN_CAPABILITY *ncap; -+ -+ KMEM_ZALLOC (ntbl, INTCOOKIE_TABLE *, sizeof (INTCOOKIE_TABLE), 1); -+ -+ if (ntbl == NULL) -+ return (NULL); -+ -+ KMEM_ALLOC (ncap, ELAN_CAPABILITY *, ELAN_CAP_SIZE(cap), 1); -+ -+ if (ncap == NULL) -+ { -+ KMEM_FREE (ntbl, sizeof (INTCOOKIE_TABLE)); -+ return (NULL); -+ } -+ -+ spin_lock (&intcookie_table_lock); -+ -+ for (tbl = intcookie_tables; tbl; tbl = tbl->tbl_next) -+ if (ELAN_CAP_MATCH (tbl->tbl_cap, cap) && tbl->tbl_cap->cap_mycontext == cap->cap_mycontext) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ else -+ { -+ spin_lock_init (&ntbl->tbl_lock); -+ -+ ntbl->tbl_cap = ncap; -+ ntbl->tbl_ref = 1; -+ ntbl->tbl_entries = NULL; -+ -+ /* Save supplied cap */ -+ memcpy (ncap, cap, ELAN_CAP_SIZE(cap)); -+ -+ if ((ntbl->tbl_next = intcookie_tables) != NULL) -+ intcookie_tables->tbl_prev = ntbl; -+ intcookie_tables = ntbl; -+ ntbl->tbl_prev = NULL; -+ } -+ spin_unlock (&intcookie_table_lock); -+ -+ if (tbl == NULL) -+ return (ntbl); -+ else -+ { -+ KMEM_FREE (ntbl, sizeof (INTCOOKIE_TABLE)); -+ KMEM_FREE (ncap, ELAN_CAP_SIZE(cap)); -+ return (tbl); -+ } -+} -+ -+void -+intcookie_free_table (INTCOOKIE_TABLE *tbl) -+{ -+ INTCOOKIE_ENTRY *ent; -+ -+ spin_lock (&intcookie_table_lock); -+ if (tbl->tbl_ref > 1) -+ { -+ tbl->tbl_ref--; -+ spin_unlock (&intcookie_table_lock); -+ return; -+ } -+ -+ if (tbl->tbl_prev) -+ tbl->tbl_prev->tbl_next = tbl->tbl_next; -+ else -+ intcookie_tables = tbl->tbl_next; -+ if (tbl->tbl_next) -+ tbl->tbl_next->tbl_prev = tbl->tbl_prev; -+ -+ spin_unlock (&intcookie_table_lock); -+ -+ /* NOTE - table no longer visible to other threads -+ * no need to aquire tbl_lock */ -+ while ((ent = tbl->tbl_entries) != NULL) -+ { -+ if ((tbl->tbl_entries = ent->ent_next) != NULL) -+ ent->ent_next->ent_prev = NULL; -+ -+ intcookie_drop_entry (ent); -+ } -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl->tbl_cap, ELAN_CAP_SIZE(tbl->tbl_cap)); -+ KMEM_FREE (tbl, sizeof (INTCOOKIE_TABLE)); -+} -+ -+int -+intcookie_alloc (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent, *nent; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (nent, INTCOOKIE_ENTRY *, sizeof (INTCOOKIE_ENTRY), 1); -+ -+ if (nent == NULL) -+ return (-ENOMEM); -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ kcondvar_init (&nent->ent_wait); -+ spin_lock_init (&nent->ent_lock); -+ -+ nent->ent_ref = 1; -+ nent->ent_cookie = cookie; -+ -+ if ((nent->ent_next = tbl->tbl_entries) != NULL) -+ tbl->tbl_entries->ent_prev = nent; -+ tbl->tbl_entries = nent; -+ nent->ent_prev = NULL; -+ } -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ if (ent == NULL) -+ return (0); -+ else -+ { -+ KMEM_FREE (nent, sizeof (INTCOOKIE_ENTRY)); -+ return (-EINVAL); -+ } -+} -+ -+int -+intcookie_free (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ if (ent->ent_prev == NULL) -+ tbl->tbl_entries = ent->ent_next; -+ else -+ ent->ent_prev->ent_next = ent->ent_next; -+ -+ if (ent->ent_next != NULL) -+ ent->ent_next->ent_prev = ent->ent_prev; -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ intcookie_drop_entry (ent); -+ -+ return (0); -+} -+ -+/* -+ * intcookie_fire_cookie: -+ * fire the cookie - this is called from the event interrupt. -+ */ -+int -+intcookie_fire (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (0); -+} -+ -+int -+intcookie_fire_cap (ELAN_CAPABILITY *cap, ELAN4_INTCOOKIE cookie) -+{ -+ int res; -+ INTCOOKIE_TABLE *tbl; -+ -+ spin_lock (&intcookie_table_lock); -+ -+ for (tbl = intcookie_tables; tbl; tbl = tbl->tbl_next) -+ if (ELAN_CAP_MATCH (tbl->tbl_cap, cap) && tbl->tbl_cap->cap_mycontext == cap->cap_mycontext) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ -+ spin_unlock (&intcookie_table_lock); -+ -+ /* No matching table found */ -+ if (tbl == NULL) -+ return (-EINVAL); -+ -+ /* Fire the correct cookie */ -+ res = intcookie_fire (tbl, cookie); -+ -+ /* Decrement reference count (and free if necessary) */ -+ intcookie_free_table (tbl); -+ -+ return (res); -+} -+ -+/* -+ * intcookie_wait_cookie: -+ * deschedule on a cookie if it has not already fired. -+ * note - if the cookie is removed from the table, then -+ * we free it off when we're woken up. -+ */ -+int -+intcookie_wait (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ spin_unlock (&tbl->tbl_lock); -+ -+ if (ent->ent_fired != 0) -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ return (0); -+ } -+ -+ ent->ent_ref++; -+ kcondvar_waitsig (&ent->ent_wait, &ent->ent_lock, &flags); -+ -+ res = ent->ent_fired ? 0 : -EINTR; -+ -+ if (--ent->ent_ref > 0) -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (INTCOOKIE_ENTRY)); -+ } -+ -+ return (res); -+} -+ -+int -+intcookie_arm (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = 0; -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/Makefile linux-2.6.9/drivers/net/qsnet/elan4/Makefile ---- clean/drivers/net/qsnet/elan4/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/Makefile 2005-10-10 17:47:30.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan4/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_ELAN4) += elan4.o -+elan4-objs := device.o i2c.o mmu.o sdram.o debug.o routetable.o trap.o user.o user_ddcq.o regions.o intcookie.o neterr.o device_Linux.o user_Linux.o procfs_Linux.o mmu_Linux.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/elan4/Makefile.conf linux-2.6.9/drivers/net/qsnet/elan4/Makefile.conf ---- clean/drivers/net/qsnet/elan4/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/Makefile.conf 2005-09-07 10:39:42.000000000 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan4.o -+MODULENAME = elan4 -+KOBJFILES = device.o i2c.o mmu.o sdram.o debug.o routetable.o trap.o user.o user_ddcq.o regions.o intcookie.o neterr.o device_Linux.o user_Linux.o procfs_Linux.o mmu_Linux.o -+EXPORT_KOBJS = device.o device_Linux.o mmu.o mmu_Linux.o procfs_Linux.o routetable.o sdram.o trap.o -+CONFIG_NAME = CONFIG_ELAN4 -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/elan4/mmu.c linux-2.6.9/drivers/net/qsnet/elan4/mmu.c ---- clean/drivers/net/qsnet/elan4/mmu.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/mmu.c 2005-07-14 09:34:12.000000000 -0400 -@@ -0,0 +1,1552 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu.c,v 1.47.2.3 2005/07/14 13:34:12 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+int elan4_debug_mmu; -+int elan4_mmuhash_chain_reduction = 1; -+int elan4_mmuhash_chain_end_reduce = 0; -+int elan4_mmuhash_chain_middle_reduce = 0; -+int elan4_mmuhash_chain_middle_fail = 0; -+int elan4_mmuhash_shuffle_attempts = 0; -+int elan4_mmuhash_shuffle_done = 0; -+ -+/* Permission table - see ELAN4 MMU documentation */ -+u_char elan4_permtable[] = -+{ -+ 0x00, /* 0x000000 - Disable */ -+ 0x00, /* 0x000000 - Unused */ -+ 0x01, /* 0x000001 - Local Data Read */ -+ 0x03, /* 0x000011 - Local Data Write */ -+ 0x11, /* 0x010001 - Local Read */ -+ 0x10, /* 0x010000 - Local Execute */ -+ 0x05, /* 0x000101 - Read Only */ -+ 0x13, /* 0x010011 - Local Write */ -+ 0x20, /* 0x100000 - Local Event Access */ -+ 0x23, /* 0x100011 - Local Event Write Ac */ -+ 0xa3, /* 1x100011 - Remote Ev Loc Write */ -+ 0xaf, /* 1x101111 - Remote All */ -+ 0x07, /* 0x000111 - Remote Read Only */ -+ 0x0d, /* 0x001101 - Remote Write Only */ -+ 0x0f, /* 0x001111 - Remote Read/Write */ -+ 0xbf, /* 1x111111 - No Fault */ -+}; -+ -+u_char elan4_permreadonly[] = -+{ -+ PERM_Disabled, /* PERM_Disabled */ -+ PERM_Disabled, /* PERM_Unused */ -+ PERM_LocDataRead, /* PERM_LocDataRead */ -+ PERM_LocDataRead, /* PERM_LocDataWrite */ -+ PERM_LocRead, /* PERM_LocRead */ -+ PERM_LocExecute, /* PERM_LocExecute */ -+ PERM_ReadOnly, /* PERM_ReadOnly */ -+ PERM_LocRead, /* PERM_LocWrite */ -+ PERM_LocEventOnly, /* PERM_LocEventOnly */ -+ PERM_LocDataRead, /* PERM_LocEventWrite */ -+ PERM_LocDataRead, /* PERM_RemoteEvent */ -+ PERM_ReadOnly, /* PERM_RemoteAll */ -+ PERM_RemoteReadOnly, /* PERM_RemoteReadOnly */ -+ PERM_ReadOnly, /* PERM_RemoteWriteLocRead */ -+ PERM_ReadOnly, /* PERM_DataReadWrite */ -+ PERM_ReadOnly, /* PERM_NoFault */ -+}; -+ -+static void -+elan4mmu_synctag (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx) -+{ -+ E4_uint64 value = (he->he_tag[tagidx] & HE_TAG_VALID) ? he->he_tag[tagidx] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK) : INVALID_CONTEXT; -+ -+ if (he->he_next) -+ value |= ((tagidx == 0) ? -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) : -+ ((he->he_next->he_entry << TAG_CHAINPTR_LOW_SHIFT) & TAG_CHAINPTR_18TO6_MASK)); -+ else if (tagidx == 0) -+ value |= TAG_CHAINPTR_30TO19_MASK; -+ -+ MPRINTF (DBG_DEVICE, 4, "elan4mmu_synctag: he=%p tagidx=%d he->he_tag=%llx -> value=%llx\n", he, tagidx, he->he_tag[tagidx], value); -+ -+ elan4_sdram_writeq (dev, he->he_entry + E4MMU_TAG_OFFSET(tagidx), value); -+} -+ -+static void -+elan4mmu_chain_hents (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *phe, ELAN4_HASH_ENTRY *he) -+{ -+ ASSERT ((elan4_sdram_readq (dev, phe->he_entry + E4MMU_TAG_OFFSET(0)) & TAG_CHAINPTR_30TO19_MASK) == TAG_CHAINPTR_30TO19_MASK); -+ -+ elan4_sdram_writeq (dev, phe->he_entry + E4MMU_TAG_OFFSET(1), -+ ((phe->he_tag[1] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK)) | ((he->he_entry << TAG_CHAINPTR_LOW_SHIFT) & TAG_CHAINPTR_18TO6_MASK))); -+ elan4_sdram_writeq (dev, phe->he_entry + E4MMU_TAG_OFFSET(0), -+ ((phe->he_tag[0] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK)) | ((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK))); -+} -+ -+static void -+elan4mmu_writepte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx, E4_uint64 value) -+{ -+ /* -+ * NOTE - we can only change a valid PTE if we're upgrading it's permissions, -+ * any other changes should have invalidated it first. */ -+ -+ MPRINTF (DBG_DEVICE, 4, "elan4mmu_writepte: he=%p tagidx=%d pteidx=%x value=%llx\n", he, tagidx, pteidx, (unsigned long long) value); -+ -+ if (pteidx == 3) -+ { -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD1_OFFSET(tagidx), (value >> 16) & 0xFFFF); -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD2_OFFSET(tagidx), (value >> 32) & 0xFFFF); -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx), (value >> 0) & 0xFFFF); -+ } -+ else -+ { -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE_HIGH_OFFSET(tagidx, pteidx), (value >> 32) & 0xFFFF); -+ elan4_sdram_writel (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx), value & 0xFFFFFFFF); -+ } -+} -+ -+static void -+elan4mmu_invalidatepte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx) -+{ -+ if (pteidx == 3) -+ elan4_sdram_writeb (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx), PTE_SetPerm (PERM_Disabled)); -+ else -+ elan4_sdram_writeb (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx), PTE_SetPerm (PERM_Disabled)); -+} -+ -+static E4_uint64 -+elan4mmu_readpte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx) -+{ -+ if (pteidx == 3) -+ return (((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx)) << 0) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD1_OFFSET(tagidx)) << 16) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD2_OFFSET(tagidx)) << 32)); -+ else -+ return ((E4_uint64) elan4_sdram_readl (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx)) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE_HIGH_OFFSET(tagidx, pteidx)) << 32)); -+} -+ -+ -+void -+elan4mmu_flush_tlb (ELAN4_DEV *dev) -+{ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH); -+ -+ while (read_reg64 (dev, SysControlReg) & CONT_TLB_FLUSH) -+ DELAY (1); -+} -+ -+/* -+ * elanmmu_flush_tlb_hash - this flushes the hash copy entries and the elan -+ * tlb. However after the write to the hash copy entry if the elan was -+ * in the process of walking, then it could write the hash copy with a valid -+ * entry which we had just invalidated. However once we've seen the tlb flushed -+ * then if the walk engine had done a write - then we need to invaldate the -+ * hash copy entries again and reflush the tlb. -+ * -+ * If we're invalidating a lot of hash blocks, then the chances are that the -+ * walk engine will perform a write - so we flush the tlb first, then invalidate -+ * the hash copy entries, then flush the tlb again. -+ */ -+static void -+elan4mmu_flush_tlb_hash (ELAN4_DEV *dev, int tbl, unsigned baseidx, unsigned topidx) -+{ -+ int notmany = (abs(topidx - baseidx) < 5) ? 1 : 0; -+ int hashidx; -+ E4_uint32 reg; -+ -+ if (notmany) -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_WALK_WROTE_TABLES); -+ else -+ elan4mmu_flush_tlb(dev); -+ -+ do { -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ if (dev->dev_mmuhash[tbl][hashidx].he_tag[0] & HE_TAG_COPY) -+ { -+ ASSERT ((dev->dev_mmuhash[tbl][hashidx].he_tag[0] & HE_TAG_VALID) == 0); -+ ASSERT ((dev->dev_mmuhash[tbl][hashidx].he_tag[1] & HE_TAG_VALID) == 0); -+ -+ elan4mmu_synctag (dev, &dev->dev_mmuhash[tbl][hashidx], 0); -+ elan4mmu_synctag (dev, &dev->dev_mmuhash[tbl][hashidx], 1); -+ } -+ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH); -+ -+ while ((reg = read_reg64 (dev, SysControlReg)) & CONT_TLB_FLUSH) -+ DELAY (1); -+ -+ } while (notmany-- && (reg & CONT_CLEAR_WALK_WROTE_TABLES) != 0); -+} -+ -+void -+elan4mmu_display_hent (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int hashidx) -+{ -+ int tagidx; -+ -+ elan4_debugf (DBG_DEVICE, DBG_MMU, "elan4mmu_display_hent: hashidx=%d he=%p entry at %lx\n", hashidx, he, he->he_entry); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " next=%p prev=%p chain=%p,%p\n", he->he_next, he->he_prev, he->he_chain[0], he->he_chain[1]); -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ { -+ E4_uint64 tag = elan4_sdram_readq (dev, he->he_entry + E4MMU_TAG_OFFSET(tagidx)); -+ E4_uint64 pte0 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 0)); -+ E4_uint64 pte1 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 1)); -+ E4_uint64 pte2 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 2)); -+ E4_uint64 pte3 = ((pte0 >> 48) | (pte1 >> 32) | (pte2 >> 16)); -+ -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Tag %d (%llx,%08x) context=%04x vaddr=%llx\n", tagidx, he->he_tag[tagidx], he->he_pte[tagidx], (int) (tag & TAG_CONTEXT_MASK), (tag & TAG_ADDRESS_MASK)); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 0 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte0 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte0 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte0 & PTE_TYPE_MASK), (pte0 & PTE_MOD_MASK) ? " mod" : "", (pte0 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 1 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte1 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte1 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte1 & PTE_TYPE_MASK), (pte1 & PTE_MOD_MASK) ? " mod" : "", (pte1 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 2 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte2 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte2 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte2 & PTE_TYPE_MASK), (pte2 & PTE_MOD_MASK) ? " mod" : "", (pte2 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 3 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte3 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte3 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte3 & PTE_TYPE_MASK), (pte3 & PTE_MOD_MASK) ? " mod" : "", (pte3 & PTE_REF_MASK) ? " ref" : ""); -+ } -+} -+ -+static __inline__ ELAN4_HASH_ENTRY * -+he_ctxt_next (ELAN4_HASH_ENTRY *he, int ctxnum) -+{ -+ return ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxnum) ? he->he_chain[0] : he->he_chain[1]; -+} -+ -+static __inline__ ELAN4_HASH_ENTRY * -+he_ctxt_unlink (ELAN4_CTXT *ctxt, int tbl, int hashidx, ELAN4_HASH_ENTRY *prevhe, ELAN4_HASH_ENTRY *he, ELAN4_HASH_ENTRY *next) -+{ -+ /* Check whether either tag is in use by this context */ -+ if ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num || (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ return he; -+ -+ if (prevhe == NULL) -+ ctxt->ctxt_mmuhash[tbl][hashidx] = next; -+ else -+ { -+ /* previous he, ensure that both chain pointers are changed is this ctxt is using both tags */ -+ ASSERT ((prevhe->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num || (prevhe->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num); -+ -+ if ((prevhe->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ prevhe->he_chain[0] = next; -+ if ((prevhe->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ prevhe->he_chain[1] = next; -+ } -+ -+ return prevhe; -+} -+ -+void -+elan4mmu_display (ELAN4_CTXT *ctxt, int tbl, const char *tag) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_HASH_ENTRY *he; -+ int hashidx; -+ -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxt->ctxt_num)) -+ { -+ elan4_debugf (DBG_DEVICE, DBG_MMU, "%s: hashidx=%d he=%p tags <%llx,%llx>\n", tag, hashidx, he, -+ (he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num ? E4MMU_TAG2VADDR (he->he_tag[0], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1) : 0, -+ (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num ? E4MMU_TAG2VADDR (he->he_tag[1], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1) : 0); -+ elan4mmu_display_hent (dev, he, hashidx); -+ } -+} -+static ELAN4_HASH_ENTRY * -+elan4mmu_find_next_free (ELAN4_HASH_ENTRY *he) -+{ -+ /* the current one could be free */ -+ /* return NULL if not free one */ -+ while ( he ) -+ { -+ if ( ((he->he_tag[0] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) || ((he->he_tag[1] & TAG_CONTEXT_MASK) == INVALID_CONTEXT)) -+ return he; -+ he = he->he_next; -+ } -+ return (NULL); -+} -+static ELAN4_HASH_ENTRY * -+elan4mmu_alloc_hent (ELAN4_DEV *dev, int tbl, int hashidx, E4_uint64 newtag, int *tagidx) -+{ -+ ELAN4_HASH_ENTRY *he, *phe; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ /* see if there are any partial free blocks */ -+ if ((he = elan4mmu_find_next_free (&dev->dev_mmuhash[tbl][hashidx])) != NULL) -+ { -+ *tagidx = ((he->he_tag[0] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) ? 0 : 1; -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_alloc_hent: allocate he=%p idx=%d%s\n", he, *tagidx, (he == &dev->dev_mmuhash[tbl][hashidx]) ? " hash-block" : ""); -+ -+ he->he_tag[*tagidx] = newtag | HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, *tagidx); -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return (he); -+ } -+ -+ if ((he = dev->dev_mmufreelist) != NULL) -+ dev->dev_mmufreelist = he->he_next; -+ else -+ { -+ ELAN4_HASH_CHUNK *hc; -+ sdramaddr_t entry; -+ -+ KMEM_ALLOC (hc, ELAN4_HASH_CHUNK *, sizeof (ELAN4_HASH_CHUNK), 0); -+ -+ if (hc == NULL) -+ { -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return ((ELAN4_HASH_ENTRY *) NULL); -+ } -+ -+ if ((entry = elan4_sdram_alloc (dev, sizeof (E4_HashTableEntry) * ELAN4_HENT_CHUNKS)) == (sdramaddr_t) 0) -+ { -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ -+ KMEM_FREE (hc, sizeof (ELAN4_HASH_CHUNK)); -+ return ((ELAN4_HASH_ENTRY *) NULL); -+ } -+ -+ list_add_tail (&hc->hc_link, &dev->dev_hc_list); -+ -+ elan4_sdram_zeroq_sdram (dev, entry, sizeof (E4_HashTableEntry) * ELAN4_HENT_CHUNKS); -+ -+ /* no initialise all chunks and chain all but the first onto the freelist */ -+ for (i = 0; i < ELAN4_HENT_CHUNKS; i++, entry += sizeof (E4_HashTableEntry)) -+ { -+ hc->hc_hents[i].he_entry = entry; -+ -+ if (i == 0) -+ he = &hc->hc_hents[0]; -+ else -+ { -+ hc->hc_hents[i].he_next = dev->dev_mmufreelist; -+ dev->dev_mmufreelist = &hc->hc_hents[i]; -+ } -+ } -+ } -+ -+ /* Initialise hash entry, using slot 0 */ -+ *tagidx = 0; -+ -+ he->he_next = NULL; -+ he->he_prev = NULL; -+ he->he_chain[0] = NULL; -+ he->he_chain[1] = NULL; -+ he->he_tag[0] = newtag | HE_TAG_VALID; -+ he->he_tag[1] = E4MMU_TAG(0, INVALID_CONTEXT); -+ he->he_pte[0] = 0; -+ he->he_pte[1] = 0; -+ -+ elan4mmu_synctag (dev, he, 0); -+ -+ /* add to mmuhash lists */ -+ for (phe = &dev->dev_mmuhash[tbl][hashidx]; phe->he_next; phe = phe->he_next) -+ ; -+ phe->he_next = he; -+ he->he_prev = phe; -+ he->he_next = NULL; -+ -+ /* finally chain the hash block into the hash tables */ -+ elan4mmu_chain_hents (dev, phe, he); -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return (he); -+} -+void -+elan4mmu_set_shuffle(ELAN4_CTXT *ctxt, int tbl, int hashidx) -+{ -+ int i; -+ -+ for(i=0; (ishuffle[tbl][i]>=0) && (ctxt->shuffle[tbl][i]!=hashidx); i++) -+ ; -+ if (ishuffle_needed[tbl] = 1; -+ ctxt->shuffle[tbl][i] = hashidx; -+ } -+} -+static int -+elan4mmm_try_to_free_hent(ELAN4_DEV *dev, int tbl, int hashidx, ELAN4_HASH_ENTRY *he) -+{ -+ ELAN4_HASH_ENTRY *prev; -+ int t; -+ ELAN4_CTXT *ctxt; -+ -+ -+ while (he) { -+ if ( ((he->he_tag[0] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) -+ && ((he->he_tag[1] & TAG_CONTEXT_MASK) == INVALID_CONTEXT)) { -+ /* Both tags are now free */ -+ -+ if (he != &dev->dev_mmuhash[tbl][hashidx]) { -+ /* its not the hash entry block */ -+ -+ if ( he->he_next == NULL ) { -+ /* its the end one so just remove it */ -+ prev = he->he_prev; -+ -+ /* make the previous entry the end one and sync it */ -+ prev->he_next = NULL; -+ elan4mmu_synctag (dev, prev, 0); -+ -+ /* make sure the elan had finished traversing the list */ -+ elan4mmu_flush_tlb(dev); -+ -+ /* now we have a free he in our hands put it onto the free list */ -+ he->he_next = dev->dev_mmufreelist; -+ dev->dev_mmufreelist = he; -+ -+ elan4_mmuhash_chain_end_reduce++; -+ -+ he = prev; -+ } else { -+ /* can only remove if my he_entry high bits = next he_entry high bits. */ -+ -+ if (((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) == -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK)) -+ { -+ prev = he->he_prev; -+ -+ /* make the previous entry jump over us and sync it */ -+ prev->he_next = he->he_next; -+ elan4mmu_synctag (dev, prev, 1); -+ he->he_next->he_prev = prev; -+ -+ /* make sure the elan had finished traversing the list */ -+ elan4mmu_flush_tlb(dev); -+ -+ /* now we have a free he in our hands put it onto the free list */ -+ he->he_next = dev->dev_mmufreelist; -+ dev->dev_mmufreelist = he; -+ -+ elan4_mmuhash_chain_middle_reduce++; -+ -+ he = prev; -+ -+ } else { -+ elan4_mmuhash_chain_middle_fail++; -+ /* at this point we wanted to remove an entry but we cant because this would mean -+ chanaging the high bits of the perivious pointer. -+ -+ It is assumed that this is a fairly rare occurance. -+ -+ The plan is. to tell the ctxt's in the end entry (which can always be removed) -+ to shuffle down. They need to do this as its guarded by a ctxt lock i dont have. -+ -+ Note the ctxt entry might not exist by the time they get round to shuffling. -+ and/or the empty node we want to shuffle to might have gone. so there is no -+ value in storing info about what you want to shuffle. -+ -+ just tell the ctxt to shuffle this hashidx. rather than allocate a block -+ of memory the size of the number of hashidx's to handle this we will use -+ a short array. assuming its rarely going to fill. if it does the all the ctxt's -+ hashidx's are shuffled (as its really unlikely to happen -+ */ -+ -+ /* mark all up to the end as needing shuffle */ -+ while (he->he_next) { -+ for(t=0;t<2;t++) { -+ if ((he->he_tag[t] & TAG_CONTEXT_MASK)!=INVALID_CONTEXT) { -+ ctxt = elan4_localctxt (dev, (he->he_tag[t] & TAG_CONTEXT_MASK)); -+ if (ctxt) { -+ ASSERT(ctxt->ctxt_ops); -+ if (ctxt->ctxt_ops->op_need_shuffle) -+ ctxt->ctxt_ops->op_need_shuffle (ctxt, tbl, hashidx); -+ } -+ } -+ } -+ he = he->he_next; -+ } -+ -+ he = NULL; -+ } -+ } -+ } else he = NULL; -+ } else he = NULL; -+ } -+ return (0); -+} -+static void -+elan4mmu_free_hent_nolock (ELAN4_DEV *dev, int tbl, int hashidx, ELAN4_HASH_ENTRY *he, int tagidx) -+{ -+ /* assumes some one has the mmulock before this is called */ -+ int pteidx; -+ -+ /* Invalidate the tag, and zero all ptes */ -+ for (pteidx = 0; pteidx < 4; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx) != HE_TYPE_INVALID) -+ elan4mmu_writepte (dev, he, tagidx, pteidx, 0); -+ -+ he->he_tag[tagidx] = E4MMU_TAG(0, INVALID_CONTEXT); -+ he->he_pte[tagidx] = 0; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ -+ if ((he->he_tag[tagidx^1] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) /* Both tags are now free */ -+ { -+ if (he == &dev->dev_mmuhash[tbl][hashidx]) /* it's the hash block entry */ -+ { /* so as it's already on the freelist */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; /* just copy it's chain pointers */ -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => all free but hashblk\n", tbl, hashidx, tagidx, he); -+ } -+ else -+ { -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => all free\n", tbl, hashidx, tagidx, he); -+ -+ /* remove it from the hash table, and place back on the anonymous freelist */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; -+ -+ if (elan4_mmuhash_chain_reduction) { -+ elan4mmm_try_to_free_hent (dev, tbl, hashidx, he); -+ } -+ } -+ } -+ else -+ { -+ /* Other tag still in use */ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => other tag in use\n", tbl, hashidx, tagidx, he); -+ } -+} -+static void -+elan4mmu_free_hent (ELAN4_DEV *dev, int tbl, int hashidx, ELAN4_HASH_ENTRY *he, int tagidx) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ elan4mmu_free_hent_nolock (dev, tbl, hashidx, he, tagidx); -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+} -+void -+print_dev(ELAN4_DEV *dev, int tbl, int index) -+{ -+ ELAN4_HASH_ENTRY *he = &dev->dev_mmuhash[tbl][index]; -+ int count=0; -+ -+ while (he) { -+ qsnet_debugf(1,"(dev) he%s %p entry 0x%010lx he_(%p,%p) chain(%p,%p) tag(0x%016llx,0x%016llx) pte(0x%010x,0x%010x)%s\n", -+ (he==&dev->dev_mmuhash[tbl][index])?"*":" ", he, -+ he->he_entry, he->he_next, he->he_prev, he->he_chain[0], he->he_chain[1], -+ (long long)he->he_tag[0], (long long)he->he_tag[1], he->he_pte[0], he->he_pte[1], -+ (he->he_next)? (( ((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) == -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK))?" ":"*"):" "); -+ he = he->he_next; -+ -+ if (count++ > 1000) { -+ qsnet_debugf(1,"List Failed\n"); -+ he = NULL; -+ elan4_mmuhash_chain_reduction = 0; -+ } -+ } -+} -+void -+print_ctx(ELAN4_CTXT *ctxt, int tbl, int index) -+{ -+ ELAN4_HASH_ENTRY *he = ctxt->ctxt_mmuhash[tbl][index]; -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int count=0; -+ -+ while (he) { -+ qsnet_debugf(1,"(%04d) he%s %p entry 0x%010lx he_(%p,%p) chain(%p,%p) tag(0x%016llx,0x%016llx) pte(0x%010x,0x%010x)%s\n", -+ ctxt->ctxt_num, (he==&dev->dev_mmuhash[tbl][index])?"*":" ", he, -+ he->he_entry, he->he_next, he->he_prev, he->he_chain[0], he->he_chain[1], -+ (long long)he->he_tag[0], (long long)he->he_tag[1], he->he_pte[0], he->he_pte[1], -+ (he->he_next)?(( ((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) == -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK))?" ":"*"):" "); -+ -+ if (((he->he_tag[0] & TAG_CONTEXT_MASK) != ctxt->ctxt_num) && ((he->he_tag[1] & TAG_CONTEXT_MASK) != ctxt->ctxt_num)) { -+ qsnet_debugf(1,"(%04d) neither tag is us so stopping 0x%llx 0x%llx \n", ctxt->ctxt_num, (long long)(he->he_tag[0] & TAG_CONTEXT_MASK), (long long)(he->he_tag[1] & TAG_CONTEXT_MASK)); -+ he = NULL; -+ } else { -+ he = he_ctxt_next (he, ctxt->ctxt_num); -+ } -+ if (count++ > 1000) { -+ qsnet_debugf(1,"List Failed\n"); -+ he = NULL; -+ elan4_mmuhash_chain_reduction = 0; -+ } -+ } -+} -+int -+dev_count(ELAN4_DEV *dev, int tbl, int index, int ctxt_num) -+{ -+ ELAN4_HASH_ENTRY *he = &dev->dev_mmuhash[tbl][index]; -+ int count = 0; -+ while (he) { -+ -+ if ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt_num) count++; -+ if ((he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt_num) count++; -+ -+ he = he->he_next; -+ } -+ return (count); -+} -+int -+ctx_count(ELAN4_CTXT *ctxt, int tbl, int index) -+{ -+ ELAN4_HASH_ENTRY *he = ctxt->ctxt_mmuhash[tbl][index]; -+ int count = 0; -+ while (he) { -+ -+ if ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) count++; -+ if ((he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) count++; -+ -+ if (((he->he_tag[0] & TAG_CONTEXT_MASK) != ctxt->ctxt_num) && ((he->he_tag[1] & TAG_CONTEXT_MASK) != ctxt->ctxt_num)) { -+ he = NULL; -+ } else { -+ he = he_ctxt_next (he, ctxt->ctxt_num); -+ } -+ } -+ return (count); -+} -+void -+elan4mmu_shuffle_up (ELAN4_CTXT *ctxt, int tbl, int hashidx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_HASH_ENTRY *ctxt_prev = NULL; -+ ELAN4_HASH_ENTRY *ctxt_he = NULL; -+ ELAN4_HASH_ENTRY *ctxt_next = NULL; -+ ELAN4_HASH_ENTRY *hole; -+ ELAN4_HASH_ENTRY *tmp; -+ ELAN4_HASH_ENTRY *ctxt_remember; -+ int hole_tagidx; -+ int ctxt_tagidx; -+ int pteidx; -+ E4_uint64 value; -+ -+ elan4_mmuhash_shuffle_attempts++; -+ -+ /* find the first hole */ -+ hole = elan4mmu_find_next_free ( &dev->dev_mmuhash[tbl][hashidx] ) ; -+ if (hole == NULL) return; -+ -+ /* find the last ctx */ -+ /* 1 move tmp to the end */ -+ for(ctxt_he = hole; (ctxt_he->he_next != NULL); ctxt_he = ctxt_he->he_next) -+ ; -+ /* 2 move tmp back looking for either hole or ctxt */ -+ while ((ctxt_he != hole) -+ && ((ctxt_he->he_tag[0] & TAG_CONTEXT_MASK) != ctxt->ctxt_num ) -+ && ((ctxt_he->he_tag[1] & TAG_CONTEXT_MASK) != ctxt->ctxt_num )) -+ ctxt_he = ctxt_he->he_prev; -+ -+ /* if we found hole then there is not usefull swap to do */ -+ if (ctxt_he == hole) return; -+ -+ while (ctxt_he != hole) { -+ /***********/ -+ /* do swap */ -+ /***********/ -+ elan4_mmuhash_shuffle_done++; -+ -+ /* now we can move this ctxt's entry in ctxt_he to hole */ -+ if ( (hole->he_tag[0] & TAG_CONTEXT_MASK) == INVALID_CONTEXT ) hole_tagidx = 0; -+ else hole_tagidx = 1; -+ -+ if ( (ctxt_he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num ) ctxt_tagidx = 0; -+ else ctxt_tagidx = 1; -+ -+ /* find us in list do this now before list is played with */ -+ ctxt_prev = NULL; -+ tmp = ctxt->ctxt_mmuhash[tbl][hashidx]; -+ ctxt_next = he_ctxt_next (tmp, ctxt->ctxt_num); -+ while (tmp != ctxt_he) { -+ ctxt_prev = tmp; -+ tmp = ctxt_next; -+ ctxt_next = he_ctxt_next (tmp, ctxt->ctxt_num); -+ } -+ -+ /* copy over software chain and pte */ -+ hole->he_pte[hole_tagidx] = ctxt_he->he_pte[ctxt_tagidx]; -+ -+ /* copy over the valid elan pte's */ -+ /* not preserving the modified and referene bits */ -+ for (pteidx = 0; pteidx <= 3; pteidx++) -+ if (HE_GET_PTE(hole, hole_tagidx, pteidx)) -+ { -+ /* copy the pg_page and pg_dma_addr */ -+ hole->he_pg[hole_tagidx][pteidx] = ctxt_he->he_pg[ctxt_tagidx][pteidx]; -+ -+ value = elan4mmu_readpte (dev, ctxt_he, ctxt_tagidx, pteidx); -+ elan4mmu_writepte (dev, hole, hole_tagidx, pteidx, value); -+ } -+ -+ /* copy over tag and sync it*/ -+ hole->he_tag[hole_tagidx] = ctxt_he->he_tag[ctxt_tagidx]; -+ elan4mmu_synctag (dev, hole, hole_tagidx); -+ -+ /* before we remove it check if its going to get free'd */ -+ if ((ctxt_he->he_tag[ctxt_tagidx ^ 1] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) { -+ /* this is ok as the existence of a hole guards agains falling off front of list */ -+ ctxt_remember = ctxt_he->he_prev; -+ } else ctxt_remember = ctxt_he; -+ -+ /* invalidate pte and tag */ -+ ctxt_he->he_tag[ctxt_tagidx] = E4MMU_TAG(0, INVALID_CONTEXT); -+ elan4mmu_synctag (dev, ctxt_he, ctxt_tagidx); -+ -+ /* should ensure that any walk in progress has completed */ -+ elan4mmu_flush_tlb(dev); -+ -+ for (pteidx = 0; pteidx <= 3; pteidx++) -+ if (HE_GET_PTE(ctxt_he, ctxt_tagidx, pteidx)) -+ elan4mmu_invalidatepte (dev, ctxt_he, ctxt_tagidx, pteidx); -+ -+ /* remove from the source end */ -+ elan4mmu_free_hent_nolock (dev, tbl, hashidx, ctxt_he, ctxt_tagidx); -+ -+ -+ /* sort out the ctxt links */ -+ /* first the hole */ -+ if ((hole->he_tag[hole_tagidx^1]& TAG_CONTEXT_MASK) == ctxt->ctxt_num) { -+ /* already in the list */ -+ hole->he_chain[hole_tagidx] = hole->he_chain[hole_tagidx^1]; -+ } else { -+ /* hole not in list */ -+ hole->he_chain[hole_tagidx] = ctxt->ctxt_mmuhash[tbl][hashidx]; -+ ctxt->ctxt_mmuhash[tbl][hashidx] = hole; -+ -+ /* this is one i missed for a bit */ -+ /* if we put the hole onto the list it might become the previous */ -+ if (ctxt_prev == NULL) ctxt_prev = hole; -+ } -+ -+ /* second remove the old one */ -+ if ((ctxt_he->he_tag[ctxt_tagidx^1]& TAG_CONTEXT_MASK) == ctxt->ctxt_num) { -+ /* Nothing to do as still in list as other tag is ours */ -+ } else { -+ if (ctxt_prev == NULL) { -+ ctxt->ctxt_mmuhash[tbl][hashidx] = ctxt_next; -+ } else { -+ if ((ctxt_prev->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ ctxt_prev->he_chain[0] = ctxt_next; -+ if ((ctxt_prev->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ ctxt_prev->he_chain[1] = ctxt_next; -+ } -+ } -+ -+ /***********/ -+ /* move on */ -+ /***********/ -+ ctxt_he = ctxt_remember; -+ -+ /* the hole is still a valid place to start looking */ -+ /* cant use elan4mmu_find_next_free as we need to stop if we pass ctxt_he */ -+ if (hole == ctxt_he) return; -+ while ( hole -+ && ((hole->he_tag[0] & TAG_CONTEXT_MASK) != INVALID_CONTEXT) -+ && ((hole->he_tag[1] & TAG_CONTEXT_MASK) != INVALID_CONTEXT)) -+ { -+ hole = hole->he_next; -+ if (hole == ctxt_he) return; -+ } -+ if (hole == NULL) return; -+ -+ /* start looking for the next ctxt */ -+ while ((ctxt_he != hole) -+ && ((ctxt_he->he_tag[0] & TAG_CONTEXT_MASK) != ctxt->ctxt_num ) -+ && ((ctxt_he->he_tag[1] & TAG_CONTEXT_MASK) != ctxt->ctxt_num )) -+ ctxt_he = ctxt_he->he_prev; -+ } -+ -+ /* if we found hole then there is not usefull swap to do */ -+ return; -+} -+void -+elan4mmu_do_shuffle (ELAN4_CTXT *ctxt, int tbl) -+{ -+ int i; -+ ELAN4_DEV *dev; -+ unsigned long flags; -+ -+ if (!ctxt) return; -+ dev = ctxt->ctxt_dev; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ for(i=0; i < ELAN4_CTXT_MAX_SHUFFLE ;i++) -+ { -+ if (ctxt->shuffle[tbl][i] != -1) -+ { -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ elan4mmu_shuffle_up(ctxt, tbl, ctxt->shuffle[tbl][i]); -+ ctxt->shuffle[tbl][i] = -1; -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ } -+ } -+ -+ ctxt->shuffle_needed[tbl] = 0; -+ -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+ELAN4_HASH_ENTRY * -+elan4mmu_ptealloc (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, unsigned int *tagidxp) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned ctxnum = ctxt->ctxt_num; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxnum, vaddr, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_uint64 newtag = E4MMU_TAG(vaddr, ctxnum); -+ ELAN4_HASH_ENTRY *he = &dev->dev_mmuhash[tbl][hashidx]; -+ unsigned tagidx; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: tbl=%d ctxnum=%d vaddr=%llx -> hashidx %d\n", tbl, ctxnum, vaddr, hashidx); -+ -+ /* 1st) check whether we're reloading an existing entry */ -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ { -+ ASSERT ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxnum || (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ { -+ if ((he->he_tag[tagidx] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK | HE_TAG_VALID)) == (newtag | HE_TAG_VALID)) -+ { -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: return old he %p tagidx %d\n", he, tagidx); -+ -+ *tagidxp = tagidx; -+ return he; -+ } -+ } -+ } -+ -+ if ((he = elan4mmu_alloc_hent (dev, tbl, hashidx, newtag, &tagidx)) == NULL) -+ return NULL; -+ -+ /* chain onto context hash */ -+ if ((he->he_tag[tagidx ^ 1] & TAG_CONTEXT_MASK) == ctxnum) /* already chained using other link */ -+ { /* so ensure both slots are chained the same */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; -+ } -+ else -+ { -+ he->he_chain[tagidx] = ctxt->ctxt_mmuhash[tbl][hashidx]; -+ ctxt->ctxt_mmuhash[tbl][hashidx] = he; -+ } -+ -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: return new he %p tagidx %d\n", he, tagidx); -+ -+ *tagidxp = tagidx; -+ -+ return he; -+} -+ -+int -+elan4mmu_pteload (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, int type, E4_uint64 newpte) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(vaddr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx; -+ ELAN4_HASH_ENTRY *he; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_pteload: ctx=%d tbl=%d pteidx=%d vaddr=%llx type=%d pte=%llx\n", -+ ctxt->ctxt_num, tbl, pteidx, (unsigned long long)vaddr, type, newpte); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ if ((he = elan4mmu_ptealloc (ctxt, tbl, vaddr, &tagidx)) == NULL) -+ { -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return -ENOMEM; -+ } -+ -+ MPRINTF (ctxt, 1, "elan4mmu_pteload: %s he=%p tagidx=%d pteidx=%d\n", HE_GET_PTE(he,0,pteidx) ? "reloading" : "loading", he, tagidx, pteidx); -+ -+ if (HE_GET_PTE(he,tagidx,pteidx) != HE_TYPE_INVALID && /* invalid -> valid */ -+ (elan4mmu_readpte (dev, he, tagidx, pteidx) & PTE_PPN_MASK) != (newpte & PTE_PPN_MASK)) /* or same phys address */ -+ { -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return -EINVAL; -+ } -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, newpte); -+ -+ HE_SET_PTE(he, tagidx, pteidx, type); -+ -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return 0; -+} -+ -+int -+elan4mmu_pteload_page (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, struct page *page, int perm) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int pteidx = E4MMU_SHIFT_ADDR(vaddr, dev->dev_pageshift[tbl]) & 3; -+ unsigned int tagidx; -+ unsigned int type; -+ E4_uint64 newpte; -+ int topaddr; -+ ELAN4_HASH_ENTRY *he; -+ ELAN4_PTE_PAGE *pg; -+ -+ MPRINTF (ctxt, 1, "elan4mmu_pteload_page: ctx=%d tbl=%d pteidx=%d vaddr=%llx page=%p\n", -+ ctxt->ctxt_num, tbl, pteidx, (unsigned long long)vaddr, page); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ if ((he = elan4mmu_ptealloc (ctxt, tbl, vaddr, &tagidx)) == NULL) -+ { -+ MPRINTF (ctxt, 1, "elan4mmu_pteload_page: ctx=%d failed ENOMEM\n", ctxt->ctxt_num); -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return -ENOMEM; -+ } -+ -+ pg = &he->he_pg[tagidx][pteidx]; -+ -+ if (HE_GET_PTE(he,tagidx,pteidx) != HE_TYPE_INVALID && pg->pg_page != page) /* invalid -> valid, or same page*/ -+ { -+ MPRINTF (ctxt, 1, "elan4mmu_pteload_page: ctx=%d failed: pg_page=%p page=%p PTE=%x EINVAL\n", -+ ctxt->ctxt_num, pg->pg_page, page, HE_GET_PTE(he,tagidx,pteidx)); -+ -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return -EINVAL; -+ } -+ -+ if (HE_GET_PTE (he,tagidx,pteidx) == HE_TYPE_INVALID) -+ { -+ if ((ctxt->ctxt_features & ELAN4_FEATURE_PIN_DOWN) != 0) -+ page_cache_get (page); -+ -+ pg->pg_page = page; -+ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_PCI_MAP) -+ { -+ struct scatterlist sg; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -+ sg.address = NULL; -+#endif -+ sg.page = page; -+ sg.offset = 0; -+ sg.length = PAGE_SIZE; -+ -+ if (pci_map_sg (dev->dev_osdep.pdev, &sg, 1, PCI_DMA_BIDIRECTIONAL) == 0 || sg.length == 0) -+ { -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return -EFAULT; -+ } -+ -+ pg->pg_dma_addr = sg.dma_address | (vaddr & (PAGE_SIZE-1)); -+ -+ MPRINTF (ctxt, 1, "elan4mmu_pteload_page: pci_map_sg -> %lx\n", pg->pg_dma_addr); -+ } -+ else -+ { -+ pg->pg_dma_addr = (page_to_pfn (page) << PAGE_SHIFT) | (vaddr & (PAGE_SIZE-1)); -+ -+ MPRINTF (ctxt, 1, "elan4mmu_pteload_page: directmap -> %lx\n", pg->pg_dma_addr); -+ } -+ } -+ -+#if defined(__BIG_ENDIAN__) -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal | PTE_BigEndian; -+#else -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal; -+#endif -+ -+ topaddr = elan4mmu_alloc_topaddr (dev, pg->pg_dma_addr, type); -+ -+ if (dev->dev_topaddrmode) -+ newpte = dev->dev_pteval | (pg->pg_dma_addr >> PTE_PADDR_SHIFT) | (type & ~0xc) | (topaddr << 2); -+ else -+ newpte = dev->dev_pteval | ((pg->pg_dma_addr >> PTE_PADDR_SHIFT) & ~PTE_TOPADDR_MASK) | (((E4_uint64) topaddr) << 45) | type; -+ -+ ASSERT (HE_GET_PTE(he,tagidx,pteidx) == HE_TYPE_INVALID || /* invalid -> valid */ -+ (elan4mmu_readpte (dev, he, tagidx, pteidx) & PTE_PPN_MASK) == (newpte & PTE_PPN_MASK)); /* or same phys address */ -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, newpte); -+ -+ HE_SET_PTE (he, tagidx, pteidx, HE_TYPE_PAGE); -+ -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ return 0; -+} -+ -+void -+elan4mmu_pteunload (ELAN4_CTXT *ctxt, ELAN4_HASH_ENTRY *he, unsigned int tagidx, unsigned int pteidx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_PTE_PAGE *pg = &he->he_pg[tagidx][pteidx]; -+ -+ switch (HE_GET_PTE(he,tagidx,pteidx)) -+ { -+ case HE_TYPE_PAGE: -+ MPRINTF (DBG_DEVICE, 1, "elan4mmu_pteunload: he=%p tagidx=%d pteidx=%d page=%p -> %lx\n", -+ he, tagidx, pteidx, pg->pg_page, pg->pg_dma_addr); -+ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_PCI_MAP) -+ { -+ struct scatterlist sg; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -+ sg.address = NULL; -+#endif -+ sg.page = pg->pg_page; -+ sg.offset = 0; -+ sg.length = PAGE_SIZE; -+ sg.dma_address = pg->pg_dma_addr; -+ -+ pci_unmap_sg (dev->dev_osdep.pdev, &sg, 1, PCI_DMA_BIDIRECTIONAL); -+ } -+ -+ if ((ctxt->ctxt_features & ELAN4_FEATURE_PIN_DOWN) != 0) -+ page_cache_release (pg->pg_page); -+ -+ pg->pg_page = NULL; -+ pg->pg_dma_addr = 0; -+ break; -+ } -+ -+ HE_SET_PTE(he, tagidx, pteidx, HE_TYPE_INVALID); -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, 0); -+} -+ -+ -+void -+elan4mmu_unload_range (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned long len) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned ctxnum = ctxt->ctxt_num; -+ unsigned long tagspan = (1 << (dev->dev_pageshift[tbl] + 2)); -+ E4_Addr end = start + len - 1; -+ int needflush = 0; -+ unsigned baseidx, topidx; -+ unsigned hashidx, tagidx, pteidx; -+ ELAN4_HASH_ENTRY *he, *prevhe, *next; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_unload_range: tbl=%d start=%llx end=%llx len=%lx\n", tbl, start, end, len); -+ -+ /* determine how much of the hash table we've got to scan */ -+ -+ /* GNAT 6760: When we have a Main page size which maps onto multiple Elan pages -+ * we need to do something a bit more clever here or else it takes ms per page invalidate -+ * This change helps in the meantime -+ */ -+ /* if (len <= (1 << dev->dev_pageshift[tbl])) */ -+ if (len <= PAGE_SIZE) -+ { -+ baseidx = E4MMU_HASH_INDEX (ctxnum, start, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ topidx = E4MMU_HASH_INDEX (ctxnum, end, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ -+ if (baseidx != topidx) -+ { -+ /* GNAT 6760: Need to search whole of the hash table (slow!) */ -+ baseidx = 0; -+ topidx = dev->dev_hashsize[tbl] - 1; -+ } -+ } -+ else -+ { -+ baseidx = 0; -+ topidx = dev->dev_hashsize[tbl] - 1; -+ } -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: baseidx=%d topidx=%d\n", baseidx, topidx); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ /* 1st - invalidate the tag for all hash blocks which are completely invalidated, -+ * and remember the first/last hash blocks */ -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ { -+ E4_Addr base = E4MMU_TAG2VADDR (he->he_tag[tagidx], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_Addr top = base + (tagspan -1); -+ -+ if (start < top && end > base) -+ { -+ unsigned bidx = (start <= base) ? 0 : (start & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ unsigned tidx = (end >= top) ? 3 : (end & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx hashidx=%d bidx=%d tidx=%d\n", he, base, top, hashidx, bidx, tidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx) != HE_TYPE_INVALID) -+ { -+ elan4mmu_invalidatepte (dev, he, tagidx, pteidx); -+ needflush = 1; -+ } -+ } -+ else if (base >= start && top <= end) /* hash entry completely spanned */ -+ { /* so invalidate the tag */ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx spanned\n", he, base, top); -+ -+ he->he_tag[tagidx] &= ~HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ needflush = 1; -+ } -+ } -+ -+ if (needflush) -+ { -+ /* 2nd invalidate the first/last hash blocks if they are partially invalidated -+ * and flush the tlb/hash copy blocks */ -+ elan4mmu_flush_tlb_hash (dev, tbl, baseidx, topidx); -+ -+ /* 3rd free off the hash entries which are completely invalidated */ -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ for (prevhe = NULL, he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = next) -+ { -+ next = he_ctxt_next (he, ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ { -+ E4_Addr base = E4MMU_TAG2VADDR (he->he_tag[tagidx], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_Addr top = base + (tagspan -1); -+ -+ if (start < top && end > base) -+ { -+ unsigned bidx = (start <= base) ? 0 : (start & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ unsigned tidx = (end >= top) ? 3 : (end & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx bidx=%d tidx=%d\n", he, base, top, bidx, tidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx) != HE_TYPE_INVALID) -+ elan4mmu_pteunload (ctxt, he,tagidx, pteidx); -+ } -+ -+ if ((base >= start && top <= end) || he->he_pte[tagidx] == 0) /* hash entry completely spanned or all pte's cleared */ -+ { /* so invalidate the pte's and free it */ -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx spanned or empty\n", he, base, top); -+ -+ elan4mmu_free_hent (dev, tbl, hashidx, he, tagidx); -+ } -+ } -+ -+ prevhe = he_ctxt_unlink (ctxt, tbl, hashidx, prevhe, he, next); -+ } -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+void -+elan4mmu_invalidate_ctxt (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int ctxnum = ctxt->ctxt_num; -+ ELAN4_HASH_ENTRY *he; -+ int tbl, hashidx, tagidx, pteidx; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_invalidate_ctxt: invalidating ctxnum=%d\n", ctxnum); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ /* 1st invalidate all tags belonging to me */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) /* own tag block */ -+ { -+ MPRINTF (ctxt, 1, "elan4mmu_invalidate_ctxt: he=%p addr=%llx hashidx=%d tagidx=%d\n", -+ he, he->he_tag[tagidx] & TAG_ADDRESS_MASK, hashidx, tagidx); -+ -+ he->he_tag[tagidx] &= ~HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ } -+ -+ /* 2nd flush the tlb & cached hash block */ -+ elan4mmu_flush_tlb (dev); -+ -+ /* 3rd invalidate all pte's and free off the hash entries */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ while ((he = ctxt->ctxt_mmuhash[tbl][hashidx]) != NULL) -+ { -+ ctxt->ctxt_mmuhash[tbl][hashidx] = he_ctxt_next (he, ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ { -+ for (pteidx = 0; pteidx < 4; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx) != HE_TYPE_INVALID) -+ elan4mmu_pteunload (ctxt, he, tagidx, pteidx); -+ -+ elan4mmu_free_hent (dev, tbl, hashidx, he, tagidx); -+ } -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+ELAN4_HASH_CACHE * -+elan4mmu_reserve (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned int npages, int cansleep) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_Addr end = start + (npages << dev->dev_pageshift[tbl]) - 1; -+ unsigned long tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr base = (start & ~(tagspan-1)); -+ E4_Addr top = (end & ~(tagspan-1)) + (tagspan-1); -+ unsigned int nhes = (top - base + 1) >> tagshift; -+ ELAN4_HASH_CACHE *hc; -+ unsigned int tagidx, pteidx; -+ E4_Addr addr; -+ int i; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_reserve: start=%llx npages=%d\n", start, npages); -+ MPRINTF (ctxt, 0, " pageshift=%d tagspan=%lx base=%llx top=%llx end=%llx nhes=%d\n", -+ dev->dev_pageshift[tbl], tagspan, base, top, end, nhes); -+ -+ KMEM_ALLOC (hc, ELAN4_HASH_CACHE *, offsetof (ELAN4_HASH_CACHE, hc_hes[nhes]), cansleep); -+ -+ if (hc == NULL) -+ return NULL; -+ -+ hc->hc_start = start; -+ hc->hc_end = end; -+ hc->hc_tbl = tbl; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ for (addr = base, i = 0; i < nhes; addr += tagspan, i++) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[tbl] : 3; -+ -+ -+ if ((hc->hc_hes[i] = elan4mmu_ptealloc (ctxt, tbl, addr & ~(tagspan-1), &tagidx)) == NULL) -+ goto failed; -+ -+ -+ MPRINTF (ctxt, 2, "elan4mmu_reserve: tbl=%d addr=%llx -> hashidx=%d tagidx=%d\n", tbl, addr & ~(tagspan-1), -+ E4MMU_HASH_INDEX (ctxt->ctxt_num, (addr & ~(tagspan-1)), dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1), tagidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ { -+ ASSERT (HE_GET_PTE (hc->hc_hes[i], tagidx, pteidx) == HE_TYPE_INVALID); -+ -+ MPRINTF (ctxt, 2, "elan4mmu_reserve: i=%d addr=%llx he=%p (tagidx=%d pteidx=%d)\n", -+ i, addr, hc->hc_hes[i], tagidx, pteidx); -+ -+ HE_SET_PTE (hc->hc_hes[i], tagidx, pteidx, HE_TYPE_RESERVED); -+ } -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ return hc; -+ -+ failed: -+ for (i--, addr -= tagspan; i >= 0; i--, addr -= tagspan) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[tbl] : 3; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ unsigned tagidx = (addr == E4MMU_TAG2VADDR (hc->hc_hes[i]->he_tag[0], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1)) ? 0 : 1; -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ HE_SET_PTE(hc->hc_hes[i], tagidx, pteidx, HE_TYPE_INVALID); -+ -+ if (hc->hc_hes[i]->he_pte[tagidx] == 0) -+ elan4mmu_free_hent (dev, tbl, hashidx, hc->hc_hes[i], tagidx); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ KMEM_FREE (hc, offsetof (ELAN4_HASH_CACHE, hc_hes[nhes])); -+ -+ return NULL; -+} -+ -+void -+elan4mmu_release (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_Addr start = hc->hc_start; -+ E4_Addr end = hc->hc_end; -+ unsigned long tagshift = dev->dev_pageshift[hc->hc_tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr base = (start & ~(tagspan-1)); -+ E4_Addr top = (end & ~(tagspan-1)) + (tagspan-1); -+ unsigned int nhes = (top - base + 1) >> tagshift; -+ ELAN4_HASH_ENTRY *prevhe, *he, *next; -+ E4_Addr addr; -+ unsigned int pteidx; -+ int i; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ MPRINTF (ctxt, 0, "elan4mmu_release: base=%llx top=%llx\n", base, top); -+ -+ for (addr = base, i = 0; i < nhes; addr += tagspan, i++) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[hc->hc_tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[hc->hc_tbl] : 3; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[hc->hc_tbl], dev->dev_hashsize[hc->hc_tbl]-1); -+ unsigned tagidx = (addr == E4MMU_TAG2VADDR (hc->hc_hes[i]->he_tag[0], hashidx, dev->dev_pageshift[hc->hc_tbl], dev->dev_hashsize[hc->hc_tbl]-1)) ? 0 : 1; -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ { -+ elan4mmu_invalidatepte (dev, hc->hc_hes[i], tagidx, pteidx); -+ -+ HE_SET_PTE(hc->hc_hes[i], tagidx, pteidx, HE_TYPE_INVALID); -+ } -+ -+ MPRINTF (ctxt, 2, "elan4mmu_release: i=%d addr=%llx he=%p (hashidx=%d tagidx=%d pteidx=%d) pte=%x\n", -+ i, addr, hc->hc_hes[i], hashidx, tagidx, pteidx, hc->hc_hes[i]->he_pte[tagidx]); -+ -+ /* remove from context hash */ -+ /* need to move to the hc->hc_hes[i] in the ctxt list and set prevhe, he, next */ -+ prevhe = NULL; -+ he = ctxt->ctxt_mmuhash[hc->hc_tbl][hashidx]; -+ next = he_ctxt_next (he, ctxt->ctxt_num); -+ -+ while(he != hc->hc_hes[i]) { -+ prevhe = he; -+ he = next; -+ next = he_ctxt_next (he, ctxt->ctxt_num); -+ } -+ -+ if (he->he_pte[tagidx] == 0) -+ elan4mmu_free_hent (dev, hc->hc_tbl, hashidx, he, tagidx); -+ -+ he_ctxt_unlink (ctxt, hc->hc_tbl, hashidx, prevhe, he, next); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+void -+elan4mmu_set_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx, E4_uint64 newpte) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_set_pte: idx=%d addr=%llx he=%p (tagidx=%d pteidx=%d) newpte=%llx\n", idx, addr, he, tagidx, pteidx, newpte); -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, newpte); -+} -+ -+E4_uint64 -+elan4mmu_get_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ return elan4mmu_readpte (dev, he, tagidx, pteidx); -+} -+ -+void -+elan4mmu_clear_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_clear_pte: idx=%d addr=%llx he=%p (tagidx=%d pteidx=%d)\n", idx, addr, he, tagidx, pteidx); -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ elan4mmu_invalidatepte (dev, he, tagidx, pteidx); -+} -+ -+int -+elan4mmu_display_mmuhash(ELAN4_DEV *dev, int tbl, int *index_ptr, char *page, int count) -+{ -+ char *p = page; -+ unsigned long flags; -+ ELAN4_HASH_ENTRY *he; -+ int index = *index_ptr; -+ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ he = &dev->dev_mmuhash[tbl][index]; -+ -+ /* move to the next entry that actually has contents in its chain */ -+ while ((he->he_next == NULL) && ((he->he_tag[0] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) -+ && ((he->he_tag[1] & TAG_CONTEXT_MASK) == INVALID_CONTEXT)) -+ { -+ index++; -+ if ( index >= dev->dev_hashsize[tbl] ) { -+ /* didnt find anything and have looped */ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ *index_ptr = dev->dev_hashsize[tbl]; -+ return (p - page); -+ } -+ he = &dev->dev_mmuhash[tbl][index]; -+ } -+ *index_ptr = index; /* the actual one we will print */ -+ -+ -+ while (he) { -+ if ( ((p - page)+175) > count ) { -+ /* might not fit in */ -+ p += sprintf( p , "...\n"); -+ he = NULL; -+ } else { -+ int ctxt0_bit = 0; -+ int ctxt1_bit = 0; -+ ELAN4_CTXT *ctxt0; -+ ELAN4_CTXT *ctxt1; -+ -+ if ( (he->he_tag[0] & TAG_CONTEXT_MASK) != INVALID_CONTEXT) { -+ ctxt0 = elan4_localctxt (dev, (he->he_tag[0] & TAG_CONTEXT_MASK)); -+ ctxt0_bit = ctxt0->shuffle_needed[0]; -+ } -+ if ( (he->he_tag[1] & TAG_CONTEXT_MASK) != INVALID_CONTEXT) { -+ ctxt1 = elan4_localctxt (dev, (he->he_tag[1] & TAG_CONTEXT_MASK)); -+ ctxt1_bit = ctxt1->shuffle_needed[0]; -+ } -+ -+ p += sprintf(p ,"(%4d,%1d) he%s %p entry 0x%010lx he_(%p,%p) chain(%p,%p) tag(0x%016llx-%d,0x%016llx-%d) pte(0x%010x,0x%010x)%s\n", -+ index,tbl, (he==&dev->dev_mmuhash[tbl][index])?"*":" ", he, -+ he->he_entry, he->he_next, he->he_prev, he->he_chain[0], he->he_chain[1], -+ (long long)he->he_tag[0], ctxt0_bit, (long long)he->he_tag[1], ctxt1_bit, he->he_pte[0], he->he_pte[1], -+ (he->he_next)? (( ((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) == -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK))?" ":"*"):" "); -+ he = he->he_next; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ -+ return (p - page); -+} -+ -+int -+elan4mmu_display_ctxt_mmuhash(ELAN4_CTXT *ctxt, int tbl, int *index_ptr, char *page, int count) -+{ -+ ELAN4_HASH_ENTRY *he; -+ char *p = page; -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int index = *index_ptr; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ he = ctxt->ctxt_mmuhash[tbl][index]; -+ while (! he ) { -+ index++; -+ if ( index >= dev->dev_hashsize[tbl] ) { -+ /* didnt find anything and have looped */ -+ spin_unlock (&ctxt->ctxt_mmulock); -+ *index_ptr = dev->dev_hashsize[tbl]; -+ return (p - page); -+ } -+ he = ctxt->ctxt_mmuhash[tbl][index]; -+ } -+ *index_ptr = index; /* the actual one we will print */ -+ -+ while (he) { -+ if ( ((p - page)+175) > count ) { -+ /* might not fit in */ -+ p += sprintf( p , "...\n"); -+ he = NULL; -+ } else { -+ p += sprintf(p ,"(%4d,%1d) he%s %p entry 0x%010lx he_(%p,%p) chain(%p,%p) tag(0x%016llx,0x%016llx) pte(0x%010x,0x%010x)%s\n", -+ index,tbl, (he==&dev->dev_mmuhash[tbl][index])?"*":" ", he, -+ he->he_entry, he->he_next, he->he_prev, he->he_chain[0], he->he_chain[1], -+ (long long)he->he_tag[0], (long long)he->he_tag[1], he->he_pte[0], he->he_pte[1], -+ (he->he_next)?(( ((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) == -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK))?" ":"*"):" "); -+ -+ he = he_ctxt_next (he, ctxt->ctxt_num); -+ } -+ } -+ -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ return (p - page); -+} -+ -+int -+elan4mmu_display_bucket_mmuhash(ELAN4_DEV *dev, int tbl, int *buckets, int nBuckets, char *page, int c) -+{ -+ ELAN4_HASH_ENTRY *he; -+ unsigned long flags; -+ char *p = page; -+ int b; -+ int index; -+ int count; -+ int totals[nBuckets]; -+ -+ for (b=0;bdev_hashsize[tbl]; index++) { -+ -+ /* how long is this chain */ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ he = &dev->dev_mmuhash[tbl][index]; -+ count = 0; -+ while (he) { -+ count++; -+ ASSERT(count < 1000000); /* seems we have a loop */ -+ he = he->he_next; -+ } -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ -+ /* bucket the lenth */ -+ for(b=0;b -+ -+#include -+#include -+ -+#include -+#include -+ -+int -+elan4mmu_sdram_aliascheck (ELAN4_CTXT *ctxt, E4_Addr addr, sdramaddr_t phys) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ /* -+ * On MPSAS we don't allocate a large enough context table, so -+ * if we see an address/context pair which would "alias" because -+ * they differ in unchecked hash bits to a previous pteload, -+ * then we kill the application. -+ */ -+ unsigned hashval = (E4MMU_SHIFT_ADDR(addr, (dev->dev_pageshift[0]) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(ctxt->ctxt_num)); -+ -+ if (dev->dev_rsvd_hashval[0] == 0xFFFFFFFF) -+ dev->dev_rsvd_hashval[0] = hashval & dev->dev_rsvd_hashmask[0]; -+ -+ if ((hashval & dev->dev_rsvd_hashmask[0]) != dev->dev_rsvd_hashval[0]) -+ { -+ printk ("elan4mmu_sdram_aliascheck: vaddr=%016llx ctxnum=%x -> [%x] overlaps %x - %x [hashidx=%x]\n", (unsigned long long) addr, -+ ctxt->ctxt_num, hashval, hashval & dev->dev_rsvd_hashmask[0], dev->dev_rsvd_hashval[0], -+ E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[0], dev->dev_hashsize[0]-1)); -+ -+ return 0; -+ } -+ -+ if (((addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (phys & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)))) -+ { -+ printk ("elan4mmu_sdram_aliascheck: vaddr=%016llx incorrectly alias sdram at %lx\n", (unsigned long long) addr, phys); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+int -+elan4mmu_alloc_topaddr (ELAN4_DEV *dev, physaddr_t paddr, unsigned type) -+{ -+#if defined(__i386) && !defined(CONFIG_X86_PAE) -+ if (dev->dev_topaddrvalid == 0) -+ { -+ dev->dev_topaddrvalid = 1; -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(0), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(1), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(2), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(3), 0); -+ } -+ return (0); -+#else -+ register int i; -+ E4_uint16 match; -+ -+ if (dev->dev_topaddrmode) /* ExtraMasterAddrBits=1 => match {paddr[63:50],type[3:2]} */ -+ match = ((paddr >> 48) & ~3) | ((type >> 2) & 3); -+ else /* ExtraMasterAddrBits=0 => match {paddr[63:48]} */ -+ match = (paddr >> 48); -+ -+ MPRINTF (DBG_DEVICE, 2, "elan4mmu_alloc_topaddr: mode=%d paddr=%lx type=%x match=%x [%x %x.%x.%x.%x]\n", -+ dev->dev_topaddrmode, paddr, type, match, dev->dev_topaddrvalid, -+ dev->dev_topaddr[0], dev->dev_topaddr[1], dev->dev_topaddr[2], dev->dev_topaddr[3]); -+ -+ for (i = 0; i < 4; i++) -+ if ((dev->dev_topaddrvalid & (1 << i)) && dev->dev_topaddr[i] == match) -+ return (i); -+ -+ for (i = 0; i < 4; i++) -+ { -+ if ((dev->dev_topaddrvalid & (1 << i)) == 0) -+ { -+ MPRINTF (DBG_DEVICE, 2, "elan4mmu_alloc_topaddr: allocate slot %d for %x\n", i, match); -+ -+ dev->dev_topaddrvalid |= (1 << i); -+ dev->dev_topaddr[i] = match; -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(i), match); -+ return (i); -+ } -+ } -+ -+ panic ("elan4mmu_alloc_topaddr: all topaddrs in use\n"); -+ return (0); -+#endif -+} -+ -+/* -+ * Convert a physical address into an pte. This should generate a "local" pte for -+ * physical addresses which are elan4 sdram or elan4 command queues. For elan4 -+ * registers and other addresses on the same bus, this should be the local pci -+ * bus address. All other addresses should access the physical address via the -+ * PCI bridge. -+ */ -+ -+int -+elan4mmu_categorise_paddr (ELAN4_DEV *dev, physaddr_t *physp) -+{ -+ physaddr_t sdram_base = dev->dev_sdram_phys; -+ physaddr_t sdram_top = dev->dev_sdram_phys + pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM); -+ physaddr_t regs_base = dev->dev_regs_phys; -+ physaddr_t regs_top = dev->dev_regs_phys + pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS); -+ physaddr_t phys = *physp; -+ int iscommand; -+ -+ if (phys >= sdram_base && phys <= sdram_top) -+ { -+ (*physp) = (phys ^ sdram_base); -+ return HE_TYPE_SDRAM; -+ } -+ -+ if (phys >= regs_base && phys < regs_top) -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ iscommand = (phys < (regs_base + ELAN4_REVA_REG_OFFSET)); -+ else -+ iscommand = (phys < (regs_base + ELAN4_REVB_I2C_OFFSET)); -+ -+ if (iscommand) -+ { -+ (*physp) = phys ^ regs_base; -+ -+ return HE_TYPE_COMMAND; -+ } -+ else -+ { -+ u32 blow, bhigh; -+ -+ /* compute a local pci address from our register BAR */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_BASE_ADDRESS_2, &blow); -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_BASE_ADDRESS_3, &bhigh); -+ -+ (*physp) = (((physaddr_t) bhigh) << 32) | (blow & PCI_BASE_ADDRESS_MEM_MASK) | (phys ^ regs_base); -+ -+ return HE_TYPE_REGS; -+ } -+ } -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ if (VALID_PAGE (virt_to_page (phys_to_virt (phys)))) -+#else -+ if (virt_addr_valid (phys_to_virt (phys))) -+#endif -+ return HE_TYPE_PAGE; -+ -+ return HE_TYPE_OTHER; -+} -+ -+E4_uint64 -+elan4mmu_phys2pte (ELAN4_DEV *dev, physaddr_t phys, unsigned perm) -+{ -+ unsigned int type = 0; -+ E4_uint64 pte; -+ -+ switch (elan4mmu_categorise_paddr (dev, &phys)) -+ { -+ case HE_TYPE_SDRAM: -+ type = PTE_SetPerm (perm); -+ break; -+ -+ case HE_TYPE_COMMAND: -+ type = PTE_SetPerm (perm) | PTE_CommandQueue; -+ break; -+ -+ case HE_TYPE_REGS: -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal; -+ break; -+ -+ case HE_TYPE_PAGE: -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_PCI_MAP) -+ { -+ struct scatterlist list; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -+ list.address = NULL; -+#endif -+ list.page = virt_to_page (phys_to_virt (phys));; -+ list.offset = (phys & (PAGE_SIZE-1)); -+ list.length = (1 << dev->dev_pageshift[0]); -+ -+ if (pci_map_sg (dev->dev_osdep.pdev, &list, 1, PCI_DMA_BIDIRECTIONAL) == 0) -+ { -+ printk ("elan4mmu_phys2pte: pci_map_sg failed\n"); -+ return -EFAULT; -+ } -+ -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal | dev->dev_pteval; -+ phys = list.dma_address; -+ break; -+ } -+ /* DROPTHROUGH */ -+ -+ case HE_TYPE_OTHER: -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_PCI_MAP) -+ return -EFAULT; -+ -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal | dev->dev_pteval; -+ break; -+ } -+ -+ if ((type & PTE_PciNotLocal) == 0) -+ pte = (phys >> PTE_PADDR_SHIFT) | type; -+ else -+ { -+ unsigned topaddr = elan4mmu_alloc_topaddr (dev, phys, type); -+ -+ if (dev->dev_topaddrmode) -+ pte = (phys >> PTE_PADDR_SHIFT) | (type & ~0xc) | (topaddr << 2); -+ else -+ pte = ((phys >> PTE_PADDR_SHIFT) & ~PTE_TOPADDR_MASK) | (((E4_uint64) topaddr) << 45) | type; -+ } -+ -+ return pte; -+} -+ -+physaddr_t -+elan4mmu_pte2phys (ELAN4_DEV *dev, E4_uint64 pte) -+{ -+ physaddr_t sdram_base = dev->dev_sdram_phys; -+ physaddr_t regs_base = dev->dev_regs_phys; -+ physaddr_t phys; -+ -+ if (pte & PTE_PciNotLocal) -+ { -+ if (dev->dev_topaddrmode) -+ phys = ((physaddr_t)(dev->dev_topaddr[(pte >> 2) & 3] & 0xfffc) << 48) | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT); -+ else -+ phys = ((physaddr_t)(dev->dev_topaddr[(pte >> 45) & 3] & 0xffff) << 48)| ((pte & PTE_PPN_MASK & ~PTE_TOPADDR_MASK) << PTE_PADDR_SHIFT); -+ -+#if defined(__alpha) -+ phys ^= alpha_mv.pci_dac_offset; -+#elif defined(__sparc) -+ phys ^= 0xfffe000000000000; -+#endif -+ return phys; -+ } -+ -+ if (pte & PTE_CommandQueue) -+ return (regs_base | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT)); -+ -+ /* sdram */ -+ return (sdram_base | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT)); -+} -+ -+EXPORT_SYMBOL(elan4mmu_phys2pte); -+EXPORT_SYMBOL(elan4mmu_pte2phys); -diff -urN clean/drivers/net/qsnet/elan4/neterr.c linux-2.6.9/drivers/net/qsnet/elan4/neterr.c ---- clean/drivers/net/qsnet/elan4/neterr.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/neterr.c 2005-07-20 07:35:36.000000000 -0400 -@@ -0,0 +1,270 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr.c,v 1.8.2.1 2005/07/20 11:35:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/neterr.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+typedef struct neterr_inputq -+{ -+ E4_InputQueue inputq; /* input queue */ -+ E4_Event32 qevent; /* input queue event */ -+ E4_uint64 sent; /* # messages sent (cq flow control)*/ -+} NETERR_INPUTQ; -+ -+#define NETERR_NSLOTS 64 /* single page of queue space (4Kb) */ -+ -+#define NETERR_RETRIES 16 -+#define NETERR_CQ_SIZE CQ_Size8K -+#define NETERR_CQ_MSGS (CQ_Size(NETERR_CQ_SIZE) / (21*8)) -+#define NETERR_VP_COUNT 64 /* this *must* be > NETERR_CQ_MSGS */ -+#define NETERR_VP_BASE 1 /* use vp 1 upwards */ -+ -+void -+elan4_neterr_interrupt (ELAN4_DEV *dev, void *arg) -+{ -+ E4_Addr qfptr = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr)); -+ E4_Addr qbptr = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_bptr)); -+ E4_Addr qfirst = DEVICE_NETERR_SLOTS_ADDR; -+ E4_Addr qlast = qfirst + (NETERR_NSLOTS-1) * ELAN4_NETERR_MSG_SIZE; -+ ELAN4_CQ *cq = dev->dev_neterr_intcq; -+ int count = 0; -+ ELAN4_CTXT *ctxt; -+ ELAN4_NETERR_MSG msg; -+ -+ while (qfptr != qbptr) -+ { -+ elan4_sdram_copyq_from_sdram (dev, dev->dev_neterr_slots + (qfptr - qfirst), &msg, ELAN4_NETERR_MSG_SIZE); -+ -+ ctxt = elan4_networkctxt (dev, msg.msg_context); -+ -+ if (ctxt != NULL && ctxt->ctxt_ops->op_neterrmsg) -+ ctxt->ctxt_ops->op_neterrmsg (ctxt, &msg); -+ else -+ PRINTF (DBG_DEVICE, DBG_NETERR, "elan4_neterr_interrupt: no process - sender %d.%d\n", msg.msg_sender.loc_node, msg.msg_sender.loc_context); -+ -+ count++; -+ -+ /* move on the from pointer */ -+ qfptr = (qfptr == qlast) ? qfirst : qfptr + ELAN4_NETERR_MSG_SIZE; -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr), qfptr); -+ } -+ -+ if (count == 0) -+ { -+ printk ("elan4_neterr_interrupt: spurious\n"); -+ return; -+ } -+ -+ /* Issue the waitevent to the interrupt queue */ -+ writeq (WAIT_EVENT_CMD | (DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)), (void *)cq->cq_mapping); -+ writeq ( E4_EVENT_INIT_VALUE (-32 * count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), (void *)cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INTCQ_ADDR, (void *)cq->cq_mapping); -+ writeq (INTERRUPT_CMD | (dev->dev_neterr_intop.op_cookie << E4_MAIN_INT_SHIFT), (void *)cq->cq_mapping); -+ -+ pioflush_reg (dev); -+} -+ -+int -+elan4_neterr_init (ELAN4_DEV *dev) -+{ -+ unsigned int intqaddr; -+ E4_Addr qfirst, qlast; -+ -+ if ((dev->dev_neterr_inputq = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE)) == 0) -+ return 0; -+ -+ if ((dev->dev_neterr_slots = elan4_sdram_alloc (dev, roundup (NETERR_NSLOTS * ELAN4_NETERR_MSG_SIZE, SDRAM_PAGE_SIZE))) == 0) -+ return 0; -+ -+ if ((dev->dev_neterr_msgcq = elan4_alloccq (&dev->dev_ctxt, NETERR_CQ_SIZE, CQ_STENEnableBit | CQ_WriteEnableBit, CQ_Priority)) == NULL) -+ return 0; -+ -+ if ((dev->dev_neterr_intcq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_WaitEventEnableBit | CQ_InterruptEnableBit, CQ_Priority)) == NULL) -+ return 0; -+ -+ intqaddr = (dev->dev_cqoffset + elan4_cq2num (dev->dev_neterr_intcq)) * CQ_CommandMappingSize; -+ qfirst = DEVICE_NETERR_SLOTS_ADDR; -+ qlast = qfirst + (NETERR_NSLOTS-1) * ELAN4_NETERR_MSG_SIZE; -+ -+ spin_lock_init (&dev->dev_neterr_lock); -+ -+ /* Register an interrupt operation */ -+ dev->dev_neterr_intop.op_function = elan4_neterr_interrupt; -+ dev->dev_neterr_intop.op_arg = NULL; -+ -+ elan4_register_intop (dev, &dev->dev_neterr_intop); -+ -+ /* Initialise the inputq descriptor and event */ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr), qfirst); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_bptr), qfirst); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_control), E4_InputQueueControl (qfirst, qlast, ELAN4_NETERR_MSG_SIZE)); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_event), DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)); -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_CountAndType), E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_WritePtr), DEVICE_NETERR_INTCQ_ADDR); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_WriteValue), (dev->dev_neterr_intop.op_cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD); -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, sent), 0); -+ -+ /* Map them all into the device context */ -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_INPUTQ_ADDR, HE_TYPE_SDRAM, (dev->dev_neterr_inputq >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_RemoteAll)); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_INTCQ_ADDR, HE_TYPE_SDRAM, (intqaddr >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocDataWrite) | PTE_CommandQueue); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_SLOTS_ADDR, HE_TYPE_SDRAM, (dev->dev_neterr_slots >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_DataReadWrite)); -+ -+ /* finally attach to the neterr context */ -+ if (elan4_attach_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM) != 0) -+ panic ("elan4_neterr_init: failed to attach to neterr context\n"); -+ -+ /* and drop the context filter */ -+ elan4_set_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM, E4_FILTER_HIGH_PRI); -+ -+ return 1; -+} -+ -+void -+elan4_neterr_destroy (ELAN4_DEV *dev) -+{ -+ if (dev->dev_neterr_intcq) -+ { -+ elan4_detach_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM); -+ -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_SLOTS_ADDR, 1 << dev->dev_pageshift[0]); -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_INTCQ_ADDR, 1 << dev->dev_pageshift[0]); -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_INPUTQ_ADDR, 1 << dev->dev_pageshift[0]); -+ -+ spin_lock_destroy (&dev->dev_neterr_lock); -+ } -+ -+ if (dev->dev_neterr_intcq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_neterr_intcq); -+ dev->dev_neterr_intcq = NULL; -+ -+ if (dev->dev_neterr_msgcq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_neterr_msgcq); -+ dev->dev_neterr_msgcq = NULL; -+ -+ if (dev->dev_neterr_slots) -+ elan4_sdram_free (dev, dev->dev_neterr_slots, roundup (NETERR_NSLOTS * ELAN4_NETERR_MSG_SIZE, SDRAM_PAGE_SIZE)); -+ dev->dev_neterr_slots = 0; -+ -+ if (dev->dev_neterr_inputq) -+ elan4_sdram_free (dev, dev->dev_neterr_inputq, SDRAM_PAGE_SIZE); -+ dev->dev_neterr_inputq = 0; -+} -+ -+int -+elan4_neterr_sendmsg (ELAN4_DEV *dev, unsigned int nodeid, unsigned int retries, ELAN4_NETERR_MSG *msg) -+{ -+ ELAN4_CQ *cq = dev->dev_neterr_msgcq; -+ E4_uint64 sent; -+ E4_VirtualProcessEntry route; -+ unsigned int vp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_neterr_lock, flags); -+ -+ sent = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, sent)); -+ -+ PRINTF (DBG_DEVICE, DBG_NETERR, "elan4_neterr_sendmsg: nodeid=%d retries=%d cookie=%llx sender=%d,%d%s\n", -+ nodeid, retries, msg->msg_cookies[0], msg->msg_sender.loc_node, msg->msg_sender.loc_context, -+ (dev->dev_neterr_queued - sent) >= NETERR_CQ_MSGS ? " - no cq space" : ""); -+ -+ if ((dev->dev_neterr_queued - sent) >= NETERR_CQ_MSGS) -+ { -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 0; -+ } -+ -+ vp = NETERR_VP_BASE + (dev->dev_neterr_queued % NETERR_VP_COUNT); -+ -+ if (elan4_generate_route (&dev->dev_position, &route, ELAN4_NETERR_CONTEXT_NUM, nodeid, nodeid, FIRST_SYSTEM_PACKET | FIRST_HIGH_PRI) < 0) -+ { -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 0; -+ } -+ -+ elan4_write_route (dev, dev->dev_routetable, vp, &route); -+ -+ writeq ((GUARD_CMD | GUARD_CHANNEL(0) | GUARD_RESET(retries)), (void *)cq->cq_mapping); -+ writeq (NOP_CMD, (void *)cq->cq_mapping); -+ -+ writeq (OPEN_STEN_PKT_CMD | OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, vp), (void *)cq->cq_mapping); -+ writeq (SEND_TRANS_CMD | (TR_INPUT_Q_GETINDEX << 16), (void *)cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq), (void *)cq->cq_mapping); -+ -+ writeq (SEND_TRANS_CMD | (TR_WRITE (64 >> 3, 0, TR_DATATYPE_DWORD) << 16), (void *)cq->cq_mapping); -+ writeq ( 0 /* address */, (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[0], (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[1], (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[2], (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[3], (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[4], (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[5], (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[6], (void *)cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[7], (void *)cq->cq_mapping); -+ -+ writeq (SEND_TRANS_CMD | (TR_INPUT_Q_COMMIT << 16), (void *)cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq), (void *)cq->cq_mapping); -+ writeq ( 0 /* cookie */, (void *)cq->cq_mapping); -+ -+ writeq (GUARD_CMD | GUARD_CHANNEL(0) | GUARD_RESET(NETERR_RETRIES), (void *)cq->cq_mapping); -+ writeq (WRITE_DWORD_CMD | (DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, sent)), (void *)cq->cq_mapping); -+ writeq ( ++dev->dev_neterr_queued, (void *)cq->cq_mapping); -+ -+ pioflush_reg (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ -+ return 1; -+} -+ -+int -+elan4_neterr_iproc_trap (ELAN4_DEV *dev, ELAN4_IPROC_TRAP *trap) -+{ -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[trap->tr_trappedTrans]; -+ unsigned long flags; -+ -+ switch (IPROC_TrapValue (hdrp->IProcStatusCntxAndTrType)) -+ { -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ case InputCrcErrorAfterPAckOk: -+ return 1; -+ -+ case InputEventEngineTrapped: -+ printk ("elan%d: device_iproc_trap: InputEventEngineTrapped - Trans=%x TrAddr=%llx\n", -+ dev->dev_instance, (int)IPROC_TransactionType (hdrp->IProcStatusCntxAndTrType), (long long) hdrp->TrAddr); -+ -+ if ((IPROC_TransactionType (hdrp->IProcStatusCntxAndTrType) & TR_OPCODE_MASK) == (TR_INPUT_Q_COMMIT & TR_OPCODE_MASK) && -+ hdrp->TrAddr == DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq)) -+ { -+ spin_lock_irqsave (&dev->dev_neterr_lock, flags); -+ writeq ((DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)) | SET_EVENT_CMD, (void *)(dev->dev_neterr_msgcq->cq_mapping)); -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 1; -+ } -+ -+ default: -+ return 0; -+ } -+} -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/procfs_Linux.c linux-2.6.9/drivers/net/qsnet/elan4/procfs_Linux.c ---- clean/drivers/net/qsnet/elan4/procfs_Linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/procfs_Linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,1426 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_Linux.c,v 1.43.2.6 2005/09/07 14:35:03 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/elan4mod/procfs_Linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+/* -+ * -+ * procfs format for elan4: -+ * -+ * /proc/qsnet/elan4/config -+ * elan4_debug -+ * elan4_debug_toconsole -+ * elan4_debug_tobuffer -+ * elan4_debug_display_ctxt -+ * elan4_debug_ignore_ctxt -+ * elan4_debug_ignore_type -+ * elan4_debug_mmu -+ * elan4_mainint_punt_loops -+ * user_p2p_route_options -+ * user_bcast_route_options -+ * -+ * /proc/qsnet/elan4/deviceN -+ * stats -+ * position -+ * vpd -+ */ -+ -+struct proc_dir_entry *elan4_procfs_root; -+struct proc_dir_entry *elan4_config_root; -+ -+/* borrowed from fs/proc/proc_misc - helper for proc_read_int */ -+static int -+proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) -+{ -+ if (len <= off+count) *eof = 1; -+ *start = page + off; -+ len -= off; -+ if (len>count) len = count; -+ if (len<0) len = 0; -+ return len; -+} -+ -+static int -+proc_read_devinfo (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len = 0; -+ -+ if (! dev) -+ len = sprintf (page, "\n"); -+ else -+ { -+ len += sprintf (page + len, "dev_vendor_id 0x%x\n", dev->dev_devinfo.dev_vendor_id); -+ len += sprintf (page + len, "dev_device_id 0x%x\n", dev->dev_devinfo.dev_vendor_id); -+ len += sprintf (page + len, "dev_revision_id 0x%x\n", dev->dev_devinfo.dev_revision_id); -+ len += sprintf (page + len, "dev_instance 0x%x\n", dev->dev_devinfo.dev_instance); -+ len += sprintf (page + len, "dev_rail 0x%x\n", dev->dev_devinfo.dev_rail); -+ len += sprintf (page + len, "dev_driver_version 0x%x\n", dev->dev_devinfo.dev_driver_version); -+ len += sprintf (page + len, "dev_params_mask 0x%x\n", dev->dev_devinfo.dev_params_mask); -+ len += sprintf (page + len, "dev_params: \n"); -+ len += sprintf (page + len, " 0 - PciCmdQPadFlag 0x%x\n", dev->dev_devinfo.dev_params.values[0]); -+ len += sprintf (page + len, " 1 - EventCopyWinPt 0x%x\n", dev->dev_devinfo.dev_params.values[1]); -+ len += sprintf (page + len, " 2 - PciWriteCombining 0x%x\n", dev->dev_devinfo.dev_params.values[2]); -+ len += sprintf (page + len, " 3 - 0x%x\n", dev->dev_devinfo.dev_params.values[3]); -+ len += sprintf (page + len, " 4 - 0x%x\n", dev->dev_devinfo.dev_params.values[4]); -+ len += sprintf (page + len, " 5 - 0x%x\n", dev->dev_devinfo.dev_params.values[5]); -+ len += sprintf (page + len, " 6 - 0x%x\n", dev->dev_devinfo.dev_params.values[6]); -+ len += sprintf (page + len, " 7 - 0x%x\n", dev->dev_devinfo.dev_params.values[7]); -+ len += sprintf (page + len, " 8 - 0x%x\n", dev->dev_devinfo.dev_params.values[8]); -+ len += sprintf (page + len, " 9 - 0x%x\n", dev->dev_devinfo.dev_params.values[9]); -+ len += sprintf (page + len, " 10 - 0x%x\n", dev->dev_devinfo.dev_params.values[10]); -+ len += sprintf (page + len, " 11 - features 0x%x\n", dev->dev_devinfo.dev_params.values[11]); -+ len += sprintf (page + len, "dev_num_down_links_value 0x%x\n", dev->dev_devinfo.dev_num_down_links_value); -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_position (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ if (dev->dev_position.pos_mode == ELAN_POS_UNKNOWN) -+ len = sprintf (page, "\n"); -+ else -+ len = sprintf (page, -+ "NodeId %d\n" -+ "NumLevels %d\n" -+ "NumNodes %d\n", -+ dev->dev_position.pos_nodeid, -+ dev->dev_position.pos_levels, -+ dev->dev_position.pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_position (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned nodeid = ELAN_INVALID_NODE; -+ unsigned numnodes = 0; -+ char *page, *p; -+ int res; -+ ELAN_POSITION pos; -+ -+ if (count == 0) -+ return (0); -+ -+ if (count >= PAGE_SIZE) -+ return (-EINVAL); -+ -+ if ((page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (page, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ page[count] = '\0'; -+ -+ if (page[count-1] == '\n') -+ page[count-1] = '\0'; -+ -+ if (! strcmp (page, "")) -+ { -+ pos.pos_mode = ELAN_POS_UNKNOWN; -+ pos.pos_nodeid = ELAN_INVALID_NODE; -+ pos.pos_nodes = 0; -+ pos.pos_levels = 0; -+ } -+ else -+ { -+ for (p = page; *p; ) -+ { -+ while (isspace (*p)) -+ p++; -+ -+ if (! strncmp (p, "NodeId=", strlen("NodeId="))) -+ nodeid = simple_strtoul (p + strlen ("NodeId="), NULL, 0); -+ if (! strncmp (p, "NumNodes=", strlen ("NumNodes="))) -+ numnodes = simple_strtoul (p + strlen ("NumNodes="), NULL, 0); -+ -+ while (*p && !isspace(*p)) -+ p++; -+ } -+ -+ if (elan4_compute_position (&pos, nodeid, numnodes, dev->dev_devinfo.dev_num_down_links_value) != 0) -+ printk ("elan%d: invalid values for NodeId=%d NumNodes=%d\n", dev->dev_instance, nodeid, numnodes); -+ else -+ { -+ printk ("elan%d: setting NodeId=%d NumNodes=%d NumLevels=%d\n", dev->dev_instance, pos.pos_nodeid, -+ pos.pos_nodes, pos.pos_levels); -+ -+ if (elan4_set_position (dev, &pos) < 0) -+ printk ("elan%d: failed to set device position\n", dev->dev_instance); -+ } -+ } -+ } -+ -+ MOD_DEC_USE_COUNT; -+ free_page ((unsigned long) page); -+ -+ return (count); -+} -+ -+static int -+proc_read_temp (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned char values[2]; -+ int len; -+ -+ if (i2c_disable_auto_led_update (dev) < 0) -+ len = sprintf (page, ""); -+ else -+ { -+ if (i2c_read (dev, I2C_TEMP_ADDR, 2, values) < 0) -+ len = sprintf (page, ""); -+ else -+ len = sprintf (page, "%s%d%s\n", (values[0] & 0x80) ? "-" : "", -+ (values[0] & 0x80) ? -((signed char)values[0]) - 1 : values[0], -+ (values[1] & 0x80) ? ".5" : ".0"); -+ -+ i2c_enable_auto_led_update (dev); -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_eccerr (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char errstr[200]; -+ register int i, len = 0; -+ -+ *page = '\0'; -+ -+ for (i = 0; i < sizeof (dev->dev_sdramerrs)/sizeof(dev->dev_sdramerrs[0]); i++) -+ if (dev->dev_sdramerrs[i].ErrorCount != 0) -+ len += sprintf (page + len, "%s occured %0d times\n", -+ elan4_sdramerr2str (dev, dev->dev_sdramerrs[i].EccStatus, dev->dev_sdramerrs[i].ConfigReg, errstr), -+ dev->dev_sdramerrs[i].ErrorCount); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_vpd (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ if ( elan4_read_vpd (dev, NULL, page) ) -+ len = sprintf (page, "no vpd tags found\n"); -+ else -+ len = strlen(page)+1; -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_linkportkey (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ len = sprintf (page, "%llx\n", read_reg64 (dev, LinkPortLock)); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_linkportkey (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int res = 0; -+ char tmpbuf[30]; -+ -+ if (count > sizeof (tmpbuf) - 1) -+ return -EINVAL; -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ write_reg64 (dev, LinkPortLock, simple_strtoull (tmpbuf, NULL, 16)); -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (count); -+} -+ -+static int -+proc_read_stats_translations (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_TRANS_STATS *pr = (ELAN4_TRANS_STATS *)data; -+ int tbl = pr->tbl; -+ ELAN4_DEV *dev = list_entry(pr, ELAN4_DEV, trans_stats[tbl] ); -+ char *p = page; -+ -+ if (off) return (0); -+ -+ p += elan4mmu_display_bucket_mmuhash(dev, tbl, pr->buckets, ELAN4_TRANS_STATS_NUM_BUCKETS , p, count); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_write_stats_translations (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN4_TRANS_STATS *pr = (ELAN4_TRANS_STATS *)data; -+ int b0, b1, b2, b3, b4, b5, b6; -+ -+ int res = 0; -+ char tmpbuf[30]; -+ -+ if (count > sizeof (tmpbuf) - 1) -+ return -EINVAL; -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ sscanf(tmpbuf,"%d %d %d %d %d %d %d\n", &b0, &b1, &b2, &b3, &b4, &b5, &b6); -+ -+ pr->buckets[0] = b0; -+ pr->buckets[1] = b1; -+ pr->buckets[2] = b2; -+ pr->buckets[3] = b3; -+ pr->buckets[4] = b4; -+ pr->buckets[5] = b5; -+ pr->buckets[6] = b6; -+ pr->buckets[6] = 99999999; -+ -+ b0 = 1; -+ for(b1=0 ; b1 < ELAN4_TRANS_STATS_NUM_BUCKETS; b1++) { -+ if ( pr->buckets[b1] < b0) -+ pr->buckets[b1] = 99999999; -+ b0 = pr->buckets[b1]; -+ } -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (count); -+} -+static int -+elan4_read_mmuhash_reduction_func (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ char *p = page; -+ -+ if (off) return (0); -+ -+ p += sprintf(p ,"elan4mmu hash reduction : %s\nend reductions %d\nmiddle reductions %d\nmiddle failed %d\n", -+ elan4_mmuhash_chain_reduction?"On":"Off", -+ elan4_mmuhash_chain_end_reduce, -+ elan4_mmuhash_chain_middle_reduce, -+ elan4_mmuhash_chain_middle_fail); -+ p += sprintf(p ,"shuffle attempts %d\nshuffle done %d\n", -+ elan4_mmuhash_shuffle_attempts, -+ elan4_mmuhash_shuffle_done -+ ); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+elan4_write_mmuhash_reduction_func (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ int res = 0; -+ char tmpbuf[30]; -+ -+ if (count > sizeof (tmpbuf) - 1) -+ return -EINVAL; -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ if (tmpbuf[0] == '0') elan4_mmuhash_chain_reduction = 0; -+ if (tmpbuf[0] == '1') elan4_mmuhash_chain_reduction = 1; -+ -+ tmpbuf[count] = '\0'; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (count); -+} -+ -+typedef struct elan4_trans_private -+{ -+ ELAN4_DEV *pr_dev; -+ ELAN4_CTXT *pr_ctxt; -+ int pr_index; -+ int pr_tbl; -+ char *pr_page; -+ unsigned pr_off; -+ unsigned pr_len; -+ -+ int pr_changed; -+} ELAN4_TRANS_PRIVATE; -+ -+static int -+elan4_ctxt_trans_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_CTXT_TRANS_INDEX *trans = (ELAN4_CTXT_TRANS_INDEX *)( PDE(inode)->data ); -+ ELAN4_TRANS_PRIVATE *pr; -+ -+ if ((pr = kmalloc (sizeof (ELAN4_TRANS_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_tbl = trans->tbl; -+ pr->pr_ctxt = list_entry(trans, ELAN4_CTXT, trans_index[trans->tbl] ); -+ -+ pr->pr_index = 0; -+ -+ pr->pr_len = 0; -+ pr->pr_off = 0; -+ pr->pr_changed = 1; -+ pr->pr_page = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+elan4_ctxt_trans_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ ELAN4_TRANS_PRIVATE *pr = (ELAN4_TRANS_PRIVATE *) file->private_data; -+ ELAN4_CTXT *ctxt = pr->pr_ctxt; -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int error; -+ -+ if ( pr->pr_index >= dev->dev_hashsize[pr->pr_tbl] ) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (pr->pr_page == NULL && (pr->pr_page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ if (elan4mmu_display_ctxt_mmuhash(ctxt, pr->pr_tbl, &pr->pr_index, pr->pr_page, count)) -+ pr->pr_len = strlen (pr->pr_page); -+ else -+ pr->pr_len = 0; -+ -+ pr->pr_off = 0; -+ pr->pr_changed = 0; -+ pr->pr_index++; -+ } -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_page + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ free_page ((unsigned long) pr->pr_page); -+ pr->pr_page = NULL; -+ } -+ -+ return (count); -+} -+ -+static int -+elan4_ctxt_trans_release (struct inode *inode, struct file *file) -+{ -+ ELAN4_TRANS_PRIVATE *pr = (ELAN4_TRANS_PRIVATE *) file->private_data; -+ -+ if (pr->pr_page) -+ free_page ((unsigned long) pr->pr_page); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static struct file_operations qsnet_ctxt_trans_fops = -+{ -+ open: elan4_ctxt_trans_open, -+ release: elan4_ctxt_trans_release, -+ read: elan4_ctxt_trans_read, -+}; -+ -+void -+proc_insertctxt(ELAN4_DEV *dev, ELAN4_CTXT *ctxt) -+{ -+ struct proc_dir_entry *p; -+ char name[32]; -+ int t; -+ -+ /* GNAT 7565: Need to hold kernel lock when adding/removing -+ * procfs entries outside the module init/fini paths -+ */ -+ lock_kernel(); -+ -+ if (dev->dev_osdep.ctxtdir) -+ { -+ sprintf(name,"%d", ctxt->ctxt_num); -+ if ((ctxt->procdir = proc_mkdir (name, dev->dev_osdep.ctxtdir)) != NULL) -+ { -+ for (t = 0; t < NUM_HASH_TABLES; t++) -+ { -+ sprintf(name , "translations_%d", t); -+ -+ ctxt->trans_index[t].tbl = t; -+ ctxt->trans_index[t].index = 0; -+ -+ if ((p = create_proc_entry (name, 0, ctxt->procdir)) != NULL) -+ { -+ p->proc_fops = &qsnet_ctxt_trans_fops; -+ p->data = & ctxt->trans_index[t]; -+ p->owner = THIS_MODULE; -+ } -+ } -+ } -+ } -+ -+ unlock_kernel(); -+} -+ -+void -+proc_removectxt(ELAN4_DEV *dev, ELAN4_CTXT *ctxt) -+{ -+ int t; -+ char name[32]; -+ -+ /* GNAT 7565: Need to hold kernel lock when adding/removing -+ * procfs entries outside the module init/fini paths -+ */ -+ lock_kernel(); -+ -+ if (dev->dev_osdep.ctxtdir && ctxt->procdir != NULL) -+ { -+ for (t = 0; t < NUM_HASH_TABLES; t++) -+ { -+ sprintf(name , "translations_%d", t); -+ remove_proc_entry (name, ctxt->procdir); -+ } -+ -+ sprintf(name,"%d", ctxt->ctxt_num); -+ remove_proc_entry (name, dev->dev_osdep.ctxtdir); -+ } -+ -+ unlock_kernel(); -+} -+ -+static struct device_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+ unsigned minrev; -+} device_info[] = { -+ {"devinfo", proc_read_devinfo, NULL, 0}, -+ {"position", proc_read_position, proc_write_position, 0}, -+ {"temp", proc_read_temp, NULL, 1}, -+ {"eccerr", proc_read_eccerr, NULL, 0}, -+ {"vpd", proc_read_vpd, NULL, 0}, -+ {"linkportkey", proc_read_linkportkey, proc_write_linkportkey, 0}, -+}; -+ -+static int -+proc_read_link_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "link_errors", dev->dev_stats.s_link_errors); -+ p += sprintf (p, "%20s %ld\n", "lock_errors", dev->dev_stats.s_lock_errors); -+ p += sprintf (p, "%20s %ld\n", "deskew_errors", dev->dev_stats.s_deskew_errors); -+ p += sprintf (p, "%20s %ld\n", "phase_errors", dev->dev_stats.s_phase_errors); -+ -+ p += sprintf (p, "%20s %ld\n", "data_errors", dev->dev_stats.s_data_errors); -+ p += sprintf (p, "%20s %ld\n", "fifo_overflow0", dev->dev_stats.s_fifo_overflow0); -+ p += sprintf (p, "%20s %ld\n", "fifo_overflow1", dev->dev_stats.s_fifo_overflow1); -+ p += sprintf (p, "%20s %ld\n", "mod45changed", dev->dev_stats.s_mod45changed); -+ p += sprintf (p, "%20s %ld\n", "pack_not_seen", dev->dev_stats.s_pack_not_seen); -+ -+ p += sprintf (p, "%20s %ld\n", "linkport_keyfail", dev->dev_stats.s_linkport_keyfail); -+ p += sprintf (p, "%20s %ld\n", "eop_reset", dev->dev_stats.s_eop_reset); -+ p += sprintf (p, "%20s %ld\n", "bad_length", dev->dev_stats.s_bad_length); -+ p += sprintf (p, "%20s %ld\n", "crc_error", dev->dev_stats.s_crc_error); -+ p += sprintf (p, "%20s %ld\n", "crc_bad", dev->dev_stats.s_crc_bad); -+ -+ p += sprintf (p, "%20s %ld\n", "cproc_timeout", dev->dev_stats.s_cproc_timeout); -+ p += sprintf (p, "%20s %ld\n", "dproc_timeout", dev->dev_stats.s_dproc_timeout); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static char * -+proc_sprintf_bucket_stat (char *p, char *name, unsigned long *stats, int *buckets) -+{ -+ int i; -+ -+ p += sprintf (p, "%20s ", name); -+ -+ for (i = 0; i < ELAN4_DEV_STATS_BUCKETS-1; i++) -+ p += sprintf (p, "%ld(<=%d) ", stats[i], buckets[i]); -+ p += sprintf (p, "%ld(>%d)\n", stats[i], buckets[i-1]); -+ -+ return p; -+} -+ -+static int -+proc_read_intr_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "interrupts", dev->dev_stats.s_interrupts); -+ p += sprintf (p, "%20s %ld\n", "haltints", dev->dev_stats.s_haltints); -+ -+ p += sprintf (p, "%20s %ld\n", "mainint_punts", dev->dev_stats.s_mainint_punts); -+ p += sprintf (p, "%20s %ld\n", "mainint_rescheds", dev->dev_stats.s_mainint_rescheds); -+ -+ p = proc_sprintf_bucket_stat (p, "mainints", dev->dev_stats.s_mainints, MainIntBuckets); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "cproc_traps", dev->dev_stats.s_cproc_traps); -+ p += sprintf (p, "%20s %ld\n", "dproc_traps", dev->dev_stats.s_dproc_traps); -+ p += sprintf (p, "%20s %ld\n", "eproc_traps", dev->dev_stats.s_eproc_traps); -+ p += sprintf (p, "%20s %ld\n", "iproc_traps", dev->dev_stats.s_iproc_traps); -+ p += sprintf (p, "%20s %ld\n", "tproc_traps", dev->dev_stats.s_tproc_traps); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_cproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const CProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_cproc_trap_types)/sizeof(dev->dev_stats.s_cproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", CProcTrapNames[i], dev->dev_stats.s_cproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_dproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const DProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_dproc_trap_types)/sizeof(dev->dev_stats.s_dproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", DProcTrapNames[i], dev->dev_stats.s_dproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_eproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const EProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_eproc_trap_types)/sizeof(dev->dev_stats.s_eproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", EProcTrapNames[i], dev->dev_stats.s_eproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_iproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const IProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_iproc_trap_types)/sizeof(dev->dev_stats.s_iproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", IProcTrapNames[i], dev->dev_stats.s_iproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_tproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const TProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_tproc_trap_types)/sizeof(dev->dev_stats.s_tproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", TProcTrapNames[i], dev->dev_stats.s_tproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_sdram_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "correctable_errors", dev->dev_stats.s_correctable_errors); -+ p += sprintf (p, "%20s %ld\n", "multiple_errors", dev->dev_stats.s_multiple_errors); -+ p += sprintf (p, "%20s %ldK\n", "sdram_bytes_free", dev->dev_stats.s_sdram_bytes_free/1024); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+void -+elan4_ringbuf_store (ELAN4_ROUTE_RINGBUF *ringbuf, E4_VirtualProcessEntry *route, ELAN4_DEV *dev) -+{ -+ int newend; -+ -+ ASSERT (kmutex_is_locked (&dev->dev_lock)); -+ -+ memcpy(&ringbuf->routes[ringbuf->end], route, sizeof(E4_VirtualProcessEntry)); -+ newend = ringbuf->end + 1; -+ if (newend >= DEV_STASH_ROUTE_COUNT) -+ newend -= DEV_STASH_ROUTE_COUNT; -+ if (newend == ringbuf->start) -+ ringbuf->start += 1; -+ if (ringbuf->start >= DEV_STASH_ROUTE_COUNT) -+ ringbuf->start -= DEV_STASH_ROUTE_COUNT; -+ ringbuf->end = newend; -+} -+ -+static int -+proc_read_dproc_timeout_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *dproc_timeout; -+ -+ dproc_timeout = dev->dev_dproc_timeout; -+ -+ if (!dproc_timeout) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (dproc_timeout[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, dproc_timeout[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_dproc_timeout_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_dproc_timeout_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ -+ memset(&routestr, 0, 33); -+ -+ kmutex_lock(&dev->dev_lock); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ kmutex_unlock(&dev->dev_lock); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+ -+static int -+proc_read_cproc_timeout_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *cproc_timeout; -+ -+ cproc_timeout = dev->dev_cproc_timeout; -+ -+ if (!cproc_timeout) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (cproc_timeout[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, cproc_timeout[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_cproc_timeout_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_cproc_timeout_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ -+ memset(&routestr, 0, 33); -+ -+ kmutex_lock(&dev->dev_lock); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ kmutex_unlock(&dev->dev_lock); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_traperr_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *ack_errors; -+ -+ ack_errors = dev->dev_ack_errors; -+ -+ if (!ack_errors) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (ack_errors[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, ack_errors[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_ackerror_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_ack_error_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ -+ memset(&routestr, 0, 33); -+ -+ kmutex_lock(&dev->dev_lock); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", (long long)ringbuf->routes[i].Values[0], (long long)ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ kmutex_unlock(&dev->dev_lock); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static struct stats_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} stats_info[] = { -+ {"link", proc_read_link_stats, NULL}, -+ {"intr", proc_read_intr_stats, NULL}, -+ {"trap", proc_read_trap_stats, NULL}, -+ {"cproc", proc_read_cproc_trap_stats, NULL}, -+ {"dproc", proc_read_dproc_trap_stats, NULL}, -+ {"eproc", proc_read_eproc_trap_stats, NULL}, -+ {"iproc", proc_read_iproc_trap_stats, NULL}, -+ {"tproc", proc_read_tproc_trap_stats, NULL}, -+ {"sdram", proc_read_sdram_stats, NULL}, -+ {"trapdmaerr", proc_read_traperr_stats, NULL}, -+ {"dproctimeout", proc_read_dproc_timeout_stats, NULL}, -+ {"cproctimeout", proc_read_cproc_timeout_stats, NULL}, -+ {"dproctimeoutroutes", proc_read_dproc_timeout_routes, NULL}, -+ {"cproctimeoutroutes", proc_read_cproc_timeout_routes, NULL}, -+ {"ackerrroutes", proc_read_ackerror_routes, NULL}, -+}; -+ -+static int -+proc_read_sysconfig (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ E4_uint32 syscontrol = dev->dev_syscontrol; -+ int len = 0; -+ -+ *eof = 1; -+ if (off != 0) -+ return (0); -+ -+ if (syscontrol & CONT_EN_ALL_SETS) -+ len += sprintf (page + len, "%sEN_ALL_SETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_MMU_ENABLE) -+ len += sprintf (page + len, "%sMMU_ENABLE", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_HASH_TABLE) -+ len += sprintf (page + len, "%sCACHE_HASH_TABLE", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_CHAINS) -+ len += sprintf (page + len, "%sCACHE_CHAINS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_ROOT_CNTX) -+ len += sprintf (page + len, "%sCACHE_ROOT_CNTX", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_STEN_ROUTES) -+ len += sprintf (page + len, "%sCACHE_STEN_ROUTES", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_DMA_ROUTES) -+ len += sprintf (page + len, "%sCACHE_DMA_ROUTES", len == 0 ? "" : " "); -+ if (syscontrol & CONT_INHIBIT_MAX_CHAIN_ITEMS) -+ len += sprintf (page + len, "%sINHIBIT_MAX_CHAIN_ITEMS", len == 0 ? "" : " "); -+ -+ len += sprintf (page + len, "%sTABLE0_MASK_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE0_MASK_SIZE_SHIFT) & PAGE_MASK_MASK); -+ len += sprintf (page + len, "%sTABLE0_PAGE_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE0_PAGE_SIZE_SHIFT) & PAGE_SIZE_MASK); -+ len += sprintf (page + len, "%sTABLE1_MASK_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE1_MASK_SIZE_SHIFT) & PAGE_MASK_MASK); -+ len += sprintf (page + len, "%sTABLE1_PAGE_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE1_PAGE_SIZE_SHIFT) & PAGE_SIZE_MASK); -+ -+ if (syscontrol & CONT_2K_NOT_1K_DMA_PACKETS) -+ len += sprintf (page + len, "%s2K_NOT_1K_DMA_PACKETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_ALIGN_ALL_DMA_PACKETS) -+ len += sprintf (page + len, "%sALIGN_ALL_DMA_PACKETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_DIRECT_MAP_PCI_WRITES) -+ len += sprintf (page + len, "%sDIRECT_MAP_PCI_WRITES", len == 0 ? "" : " "); -+ -+ len += sprintf (page + len, "\n"); -+ -+ *start = page; -+ return (len); -+} -+ -+static int -+proc_write_sysconfig (struct file *file, const char *ubuffer, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned long page = __get_free_page (GFP_KERNEL); -+ char *buffer = (char *)page; -+ int add = 0; -+ int sub = 0; -+ -+ count = MIN (count, PAGE_SIZE - 1); -+ if (copy_from_user (buffer, ubuffer, count)) -+ { -+ free_page (page); -+ return (-EFAULT); -+ } -+ -+ buffer[count] = 0; /* terminate string */ -+ -+ while (*buffer != 0) -+ { -+ char *ptr; -+ char *end; -+ int ch; -+ int val; -+ int op; -+ -+ ch = *buffer; -+ if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') -+ { -+ buffer++; -+ continue; -+ } -+ -+ op = *buffer; -+ if (op == '+' || op == '-') -+ buffer++; -+ -+ for (end = buffer; *end != 0; end++) -+ if (*end == ' ' || *end == '\t' || -+ *end == '\r' || *end == '\n') -+ break; -+ -+ if (end == buffer) -+ break; -+ -+ ch = *end; -+ *end = 0; -+ -+ for (ptr = buffer; *ptr != 0; ptr++) -+ if ('a' <= *ptr && *ptr <= 'z') -+ *ptr = *ptr + 'A' - 'a'; -+ -+ if (!strcmp (buffer, "EN_ALL_SETS")) -+ val = CONT_EN_ALL_SETS; -+ if (!strcmp (buffer, "CACHE_HASH_TABLE")) -+ val = CONT_CACHE_HASH_TABLE; -+ else if (!strcmp (buffer, "CACHE_CHAINS")) -+ val = CONT_CACHE_CHAINS; -+ else if (!strcmp (buffer, "CACHE_ROOT_CNTX")) -+ val = CONT_CACHE_ROOT_CNTX; -+ else if (!strcmp (buffer, "CACHE_STEN_ROUTES")) -+ val = CONT_CACHE_STEN_ROUTES; -+ else if (!strcmp (buffer, "CACHE_DMA_ROUTES")) -+ val = CONT_CACHE_DMA_ROUTES; -+ else if (!strcmp (buffer, "2K_NOT_1K_DMA_PACKETS")) -+ val = CONT_2K_NOT_1K_DMA_PACKETS; -+ else if (!strcmp (buffer, "ALIGN_ALL_DMA_PACKETS")) -+ val = CONT_ALIGN_ALL_DMA_PACKETS; -+ else -+ val = 0; -+ -+ if (op == '+') -+ add |= val; -+ else if (op == '-') -+ sub |= val; -+ -+ *end = ch; -+ buffer = end; -+ } -+ -+ if ((add | sub) & CONT_EN_ALL_SETS) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ CHANGE_SYSCONTROL (dev, add, sub); -+ -+ if ((add | sub) & CONT_EN_ALL_SETS) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ free_page (page); -+ return (count); -+} -+ -+static struct config_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} config_info[] = { -+ {"sysconfig", proc_read_sysconfig, proc_write_sysconfig}, -+}; -+ -+static int -+elan4_trans_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_TRANS_INDEX *trans = (ELAN4_TRANS_INDEX *)( PDE(inode)->data ); -+ ELAN4_TRANS_PRIVATE *pr; -+ -+ if ((pr = kmalloc (sizeof (ELAN4_TRANS_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_tbl = trans->tbl; -+ pr->pr_dev = list_entry(trans, ELAN4_DEV, trans_index[trans->tbl] ); -+ pr->pr_index = 0; -+ -+ pr->pr_len = 0; -+ pr->pr_off = 0; -+ pr->pr_changed = 1; -+ pr->pr_page = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+elan4_trans_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ ELAN4_TRANS_PRIVATE *pr = (ELAN4_TRANS_PRIVATE *) file->private_data; -+ ELAN4_DEV *dev = pr->pr_dev; -+ int error; -+ -+ if ( pr->pr_index >= dev->dev_hashsize[pr->pr_tbl] ) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (pr->pr_page == NULL && (pr->pr_page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ if (elan4mmu_display_mmuhash(dev, pr->pr_tbl, &pr->pr_index, pr->pr_page, count)) -+ pr->pr_len = strlen (pr->pr_page); -+ else -+ pr->pr_len = 0; -+ -+ pr->pr_off = 0; -+ pr->pr_changed = 0; -+ pr->pr_index++; -+ } -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_page + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ free_page ((unsigned long) pr->pr_page); -+ pr->pr_page = NULL; -+ } -+ -+ return (count); -+} -+ -+static int -+elan4_trans_release (struct inode *inode, struct file *file) -+{ -+ ELAN4_TRANS_PRIVATE *pr = (ELAN4_TRANS_PRIVATE *) file->private_data; -+ -+ if (pr->pr_page) -+ free_page ((unsigned long) pr->pr_page); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static struct file_operations qsnet_trans_fops = -+{ -+ open: elan4_trans_open, -+ release: elan4_trans_release, -+ read: elan4_trans_read, -+}; -+ -+void -+elan4_procfs_device_init (ELAN4_DEV *dev) -+{ -+ struct proc_dir_entry *p; -+ char name[NAME_MAX]; -+ int i; -+ -+ sprintf (name, "device%d", dev->dev_instance); -+ dev->dev_osdep.procdir = proc_mkdir (name, elan4_procfs_root); -+ -+ for (i = 0; i < sizeof (device_info)/sizeof (device_info[0]); i++) -+ { -+ if (dev->dev_devinfo.dev_revision_id < device_info[i].minrev) -+ continue; -+ -+ if ((p = create_proc_entry (device_info[i].name, 0, dev->dev_osdep.procdir)) != NULL) -+ { -+ p->read_proc = device_info[i].read_func; -+ p->write_proc = device_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ for(i = 0; i < NUM_HASH_TABLES; i++) { -+ sprintf (name, "translations_%d",i); -+ -+ dev->trans_index[i].tbl = i; -+ -+ if ((p = create_proc_entry (name, 0, dev->dev_osdep.procdir)) != NULL) -+ { -+ p->proc_fops = &qsnet_trans_fops; -+ p->data = & dev->trans_index[i]; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ dev->dev_osdep.configdir = proc_mkdir ("config", dev->dev_osdep.procdir); -+ for (i = 0; i < sizeof (config_info)/sizeof (config_info[0]); i++) -+ { -+ if ((p = create_proc_entry (config_info[i].name, 0, dev->dev_osdep.configdir)) != NULL) -+ { -+ p->read_proc = config_info[i].read_func; -+ p->write_proc = config_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ dev->dev_osdep.statsdir = proc_mkdir ("stats", dev->dev_osdep.procdir); -+ for (i = 0; i < sizeof (stats_info)/sizeof (stats_info[0]); i++) -+ { -+ if ((p = create_proc_entry (stats_info[i].name, 0, dev->dev_osdep.statsdir)) != NULL) -+ { -+ p->read_proc = stats_info[i].read_func; -+ p->write_proc = stats_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+ for(i = 0; i < NUM_HASH_TABLES; i++) { -+ sprintf (name, "translations_%d",i); -+ -+ dev->trans_stats[i].tbl = i; -+ dev->trans_stats[i].buckets[0] = 1; -+ dev->trans_stats[i].buckets[1] = 5; -+ dev->trans_stats[i].buckets[2] = 10; -+ dev->trans_stats[i].buckets[3] = 50; -+ dev->trans_stats[i].buckets[4] = 100; -+ dev->trans_stats[i].buckets[5] = 200; -+ dev->trans_stats[i].buckets[6] = 99999999; -+ -+ if ((p = create_proc_entry (name, 0, dev->dev_osdep.statsdir)) != NULL) -+ { -+ p->read_proc = proc_read_stats_translations; -+ p->write_proc = proc_write_stats_translations; -+ p->data = & dev->trans_stats[i]; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ dev->dev_osdep.ctxtdir = proc_mkdir ("ctxt", dev->dev_osdep.procdir); -+} -+ -+void -+elan4_procfs_device_fini (ELAN4_DEV *dev) -+{ -+ char name[NAME_MAX]; -+ int i; -+ -+ if (dev->dev_osdep.ctxtdir) -+ remove_proc_entry ("ctxt", dev->dev_osdep.procdir); -+ -+ for (i = 0; i < sizeof (stats_info)/sizeof (stats_info[0]); i++) -+ remove_proc_entry (stats_info[i].name, dev->dev_osdep.statsdir); -+ -+ for (i = 0; i < NUM_HASH_TABLES; i++) { -+ sprintf(name , "translations_%d", i); -+ remove_proc_entry (name, dev->dev_osdep.statsdir); -+ } -+ remove_proc_entry ("stats", dev->dev_osdep.procdir); -+ -+ for (i = 0; i < sizeof (config_info)/sizeof (config_info[0]); i++) -+ remove_proc_entry (config_info[i].name, dev->dev_osdep.configdir); -+ remove_proc_entry ("config", dev->dev_osdep.procdir); -+ -+ for (i = 0; i < sizeof (device_info)/sizeof (device_info[0]); i++) -+ { -+ if (dev->dev_devinfo.dev_revision_id < device_info[i].minrev) -+ continue; -+ -+ remove_proc_entry (device_info[i].name, dev->dev_osdep.procdir); -+ } -+ -+ for (i = 0; i < NUM_HASH_TABLES; i++) { -+ sprintf(name , "translations_%d", i); -+ remove_proc_entry (name, dev->dev_osdep.procdir); -+ } -+ -+ sprintf (name, "device%d", dev->dev_instance); -+ remove_proc_entry (name, elan4_procfs_root); -+} -+ -+void -+elan4_procfs_init(void) -+{ -+ struct proc_dir_entry *p; -+ -+ elan4_procfs_root = proc_mkdir("elan4", qsnet_procfs_root); -+ elan4_config_root = proc_mkdir("config", elan4_procfs_root); -+ -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug", &elan4_debug, 0); -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug_toconsole", &elan4_debug_toconsole, 0); -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug_tobuffer", &elan4_debug_tobuffer, 0); -+ qsnet_proc_register_int (elan4_config_root, "elan4_debug_mmu", &elan4_debug_mmu, 0); -+ qsnet_proc_register_int (elan4_config_root, "elan4_mainint_punt_loops", &elan4_mainint_punt_loops, 0); -+ qsnet_proc_register_hex (elan4_config_root, "user_p2p_route_options", &user_p2p_route_options, 0); -+ qsnet_proc_register_hex (elan4_config_root, "user_bcast_route_options", &user_bcast_route_options, 0); -+ qsnet_proc_register_int (elan4_config_root, "user_dproc_retry_count", &user_dproc_retry_count, 0); -+ qsnet_proc_register_int (elan4_config_root, "user_cproc_retry_count", &user_cproc_retry_count, 0); -+ qsnet_proc_register_int (elan4_config_root, "user_pagefault_enabled", &user_pagefault_enabled, 0); -+ qsnet_proc_register_int (elan4_config_root, "num_fault_save", &num_fault_save, 0); -+ qsnet_proc_register_int (elan4_config_root, "min_fault_pages", &min_fault_pages, 0); -+ qsnet_proc_register_int (elan4_config_root, "max_fault_pages", &max_fault_pages, 0); -+ qsnet_proc_register_int (elan4_config_root, "assfail_mode", &assfail_mode, 0); -+ -+ if ((p = create_proc_entry ("mmuhash_reduction", 0, elan4_config_root)) != NULL) -+ { -+ p->read_proc = elan4_read_mmuhash_reduction_func; -+ p->write_proc = elan4_write_mmuhash_reduction_func; -+ p->data = NULL; -+ p->owner = THIS_MODULE; -+ } -+ -+#if defined(IOPROC_PATCH_APPLIED) -+ qsnet_proc_register_int (elan4_config_root, "user_ioproc_enabled", &user_ioproc_enabled, 0); -+#endif -+} -+ -+void -+elan4_procfs_fini(void) -+{ -+#if defined(IOPROC_PATCH_APPLIED) -+ remove_proc_entry ("user_ioproc_enabled", elan4_config_root); -+#endif -+ -+ remove_proc_entry ("mmuhash_reduction", elan4_config_root); -+ -+ remove_proc_entry ("assfail_mode", elan4_config_root); -+ remove_proc_entry ("max_fault_pages", elan4_config_root); -+ remove_proc_entry ("min_fault_pages", elan4_config_root); -+ remove_proc_entry ("num_fault_save", elan4_config_root); -+ remove_proc_entry ("user_pagefault_enabled", elan4_config_root); -+ remove_proc_entry ("user_cproc_retry_count", elan4_config_root); -+ remove_proc_entry ("user_dproc_retry_count", elan4_config_root); -+ remove_proc_entry ("user_bcast_route_options", elan4_config_root); -+ remove_proc_entry ("user_p2p_route_options", elan4_config_root); -+ remove_proc_entry ("elan4_mainint_punt_loops", elan4_config_root); -+ remove_proc_entry ("elan4_debug_mmu", elan4_config_root); -+ remove_proc_entry ("elan4_debug_tobuffer", elan4_config_root); -+ remove_proc_entry ("elan4_debug_toconsole", elan4_config_root); -+ remove_proc_entry ("elan4_debug", elan4_config_root); -+ -+ remove_proc_entry ("config", elan4_procfs_root); -+ remove_proc_entry ("elan4", qsnet_procfs_root); -+} -+ -+EXPORT_SYMBOL(elan4_procfs_root); -+EXPORT_SYMBOL(elan4_config_root); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/quadrics_version.h linux-2.6.9/drivers/net/qsnet/elan4/quadrics_version.h ---- clean/drivers/net/qsnet/elan4/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/elan4/regions.c linux-2.6.9/drivers/net/qsnet/elan4/regions.c ---- clean/drivers/net/qsnet/elan4/regions.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/regions.c 2004-10-21 11:31:12.000000000 -0400 -@@ -0,0 +1,609 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: regions.c,v 1.22 2004/10/21 15:31:12 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/regions.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+/*================================================================================*/ -+/* elan address region management */ -+USER_RGN * -+user_findrgn_elan (USER_CTXT *uctx, E4_Addr addr, int tail) -+{ -+ USER_RGN *rgn; -+ USER_RGN *hirgn; -+ USER_RGN *lorgn; -+ E4_Addr base; -+ E4_Addr lastaddr; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) || kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (uctx->uctx_ergns == NULL) -+ return (NULL); -+ -+ rgn = uctx->uctx_ergnlast; -+ if (rgn == NULL) -+ rgn = uctx->uctx_ergns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_ebase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = uctx->uctx_etail; -+ -+ if ((lastaddr = (hirgn->rgn_ebase + hirgn->rgn_len - 1)) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_enext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = uctx->uctx_ergns; -+ -+ if (lorgn->rgn_ebase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_ebase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_ebase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_enext; -+ -+ if (rgn->rgn_ebase <= addr) -+ uctx->uctx_ergnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_ebase > addr) -+ rgn = rgn->rgn_eprev; -+ -+ if ((rgn->rgn_ebase + rgn->rgn_len - 1) < addr) -+ return (rgn->rgn_enext); -+ else -+ { -+ uctx->uctx_ergnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+static int -+user_addrgn_elan (USER_CTXT *uctx, USER_RGN *nrgn) -+{ -+ USER_RGN *rgn = user_findrgn_elan (uctx, nrgn->rgn_ebase, 1); -+ E4_Addr nbase = nrgn->rgn_ebase; -+ E4_Addr ntop = nbase + nrgn->rgn_len - 1; -+ E4_Addr base; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (rgn == NULL) -+ { -+ uctx->uctx_ergns = uctx->uctx_etail = nrgn; -+ nrgn->rgn_enext = nrgn->rgn_eprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_ebase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_eprev = rgn; /* of list */ -+ nrgn->rgn_enext = NULL; -+ rgn->rgn_enext = uctx->uctx_etail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) /* overlapping region */ -+ return (-1); -+ -+ nrgn->rgn_enext = rgn; /* insert before region */ -+ nrgn->rgn_eprev = rgn->rgn_eprev; -+ rgn->rgn_eprev = nrgn; -+ if (uctx->uctx_ergns == rgn) -+ uctx->uctx_ergns = nrgn; -+ else -+ nrgn->rgn_eprev->rgn_enext = nrgn; -+ } -+ } -+ uctx->uctx_ergnlast = nrgn; -+ -+ return (0); -+} -+ -+static USER_RGN * -+user_removergn_elan (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ uctx->uctx_ergnlast = rgn->rgn_enext; -+ if (rgn == uctx->uctx_etail) -+ uctx->uctx_etail = rgn->rgn_eprev; -+ else -+ rgn->rgn_enext->rgn_eprev = rgn->rgn_eprev; -+ -+ if (rgn == uctx->uctx_ergns) -+ uctx->uctx_ergns = rgn->rgn_enext; -+ else -+ rgn->rgn_eprev->rgn_enext = rgn->rgn_enext; -+ -+ return (rgn); -+} -+ -+USER_RGN * -+user_rgnat_elan (USER_CTXT *uctx, E4_Addr addr) -+{ -+ USER_RGN *rgn = user_findrgn_elan (uctx, addr, 0); -+ -+ if (rgn != NULL && rgn->rgn_ebase <= addr && addr <= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ return (rgn); -+ -+ return (NULL); -+} -+ -+/* main address region management */ -+USER_RGN * -+user_findrgn_main (USER_CTXT *uctx, virtaddr_t addr, int tail) -+{ -+ USER_RGN *rgn; -+ USER_RGN *hirgn; -+ USER_RGN *lorgn; -+ virtaddr_t lastaddr; -+ virtaddr_t base; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) || kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (uctx->uctx_mrgns == NULL) -+ return (NULL); -+ -+ rgn = uctx->uctx_mrgnlast; -+ if (rgn == NULL) -+ rgn = uctx->uctx_mrgns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_mbase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = uctx->uctx_mtail; -+ if ((lastaddr = hirgn->rgn_mbase + hirgn->rgn_len - 1) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_mnext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = uctx->uctx_mrgns; -+ if (lorgn->rgn_mbase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_mbase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_mnext; -+ -+ if (rgn->rgn_mbase <= addr) -+ uctx->uctx_mrgnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_mbase > addr) -+ rgn = rgn->rgn_mprev; -+ -+ if ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ return (rgn->rgn_mnext); -+ else -+ { -+ uctx->uctx_mrgnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+static int -+user_addrgn_main (USER_CTXT *uctx, USER_RGN *nrgn) -+{ -+ USER_RGN *rgn = user_findrgn_main (uctx, nrgn->rgn_mbase, 1); -+ virtaddr_t nbase = nrgn->rgn_mbase; -+ virtaddr_t ntop = nbase + nrgn->rgn_len - 1; -+ virtaddr_t base; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (rgn == NULL) -+ { -+ uctx->uctx_mrgns = uctx->uctx_mtail = nrgn; -+ nrgn->rgn_mnext = nrgn->rgn_mprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_mbase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_mprev = rgn; /* of list */ -+ nrgn->rgn_mnext = NULL; -+ rgn->rgn_mnext = uctx->uctx_mtail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) /* overlapping region */ -+ return (-1); -+ -+ nrgn->rgn_mnext = rgn; /* insert before region */ -+ nrgn->rgn_mprev = rgn->rgn_mprev; -+ rgn->rgn_mprev = nrgn; -+ if (uctx->uctx_mrgns == rgn) -+ uctx->uctx_mrgns = nrgn; -+ else -+ nrgn->rgn_mprev->rgn_mnext = nrgn; -+ } -+ } -+ uctx->uctx_mrgnlast = nrgn; -+ -+ return (0); -+} -+ -+static USER_RGN * -+user_removergn_main (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ uctx->uctx_mrgnlast = rgn->rgn_mnext; -+ if (rgn == uctx->uctx_mtail) -+ uctx->uctx_mtail = rgn->rgn_mprev; -+ else -+ rgn->rgn_mnext->rgn_mprev = rgn->rgn_mprev; -+ -+ if (rgn == uctx->uctx_mrgns) -+ uctx->uctx_mrgns = rgn->rgn_mnext; -+ else -+ rgn->rgn_mprev->rgn_mnext = rgn->rgn_mnext; -+ -+ return (rgn); -+} -+ -+/* Remove whole region from both lists */ -+static void -+user_removergn (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, rgn->rgn_ebase, rgn->rgn_len); -+ -+ user_removergn_elan (uctx, rgn); -+ user_removergn_main (uctx, rgn); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ KMEM_FREE (rgn, sizeof (USER_RGN)); -+} -+ -+/* Remove all allocated regions */ -+void -+user_freergns (USER_CTXT *uctx) -+{ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ while (uctx->uctx_mrgns) -+ user_removergn(uctx, uctx->uctx_mrgns); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ ASSERT (uctx->uctx_ergns == NULL); -+} -+ -+USER_RGN * -+user_rgnat_main (USER_CTXT *uctx, virtaddr_t addr) -+{ -+ USER_RGN *rgn = user_findrgn_main (uctx, addr, 0); -+ -+ if (rgn != NULL && rgn->rgn_mbase <= addr && addr <= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+int -+user_setperm (USER_CTXT *uctx, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, unsigned perm) -+{ -+ USER_RGN *nrgn; -+ -+ PRINTF4 (uctx, DBG_PERM, "user_setperm: user %lx elan %llx len %lx perm %x\n", maddr, (long long) eaddr, len, perm); -+ -+ if ((maddr & PAGEOFFSET) || (eaddr & PAGEOFFSET) || (len & PAGEOFFSET)) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: alignment failure\n"); -+ return (-EINVAL); -+ } -+ -+ if ((maddr + len - 1) <= maddr || (eaddr + len - 1) <= eaddr) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: range failure\n"); -+ return (-EINVAL); -+ } -+ -+ KMEM_ALLOC (nrgn, USER_RGN *, sizeof (USER_RGN), 1); -+ -+ if (nrgn == NULL) -+ return (-ENOMEM); -+ -+ nrgn->rgn_mbase = maddr; -+ nrgn->rgn_ebase = eaddr; -+ nrgn->rgn_len = len; -+ nrgn->rgn_perm = perm; -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if (user_addrgn_elan (uctx, nrgn) < 0) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: elan address exists\n"); -+ spin_unlock (&uctx->uctx_rgnlock); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ KMEM_FREE (nrgn, sizeof (USER_RGN)); -+ return (-EINVAL); -+ } -+ -+ if (user_addrgn_main (uctx, nrgn) < 0) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: main address exists\n"); -+ user_removergn_elan (uctx, nrgn); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ KMEM_FREE (nrgn, sizeof (USER_RGN)); -+ return (-EINVAL); -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ if ((perm & PERM_Preload)) -+ user_preload_main (uctx, maddr, len); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ return (0); -+} -+ -+void -+user_clrperm (USER_CTXT *uctx, E4_Addr addr, unsigned long len) -+{ -+ E4_Addr raddr; -+ E4_Addr rtop; -+ USER_RGN *nrgn; -+ USER_RGN *rgn; -+ USER_RGN *rgn_next; -+ unsigned long ssize; -+ int res; -+ -+ PRINTF2 (uctx, DBG_PERM, "user_clrperm: elan %llx len %lx\n", addr, len); -+ -+ raddr = (addr & PAGEMASK); -+ rtop = ((addr + len - 1) & PAGEMASK) + (PAGESIZE-1); -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ for (rgn = user_findrgn_elan (uctx, addr, 0); rgn != NULL; rgn = rgn_next) -+ { -+ if (rtop < rgn->rgn_ebase) /* rtop was in a gap */ -+ break; -+ -+ rgn_next = rgn->rgn_enext; /* Save next region pointer */ -+ -+ PRINTF (uctx, DBG_PERM, " elan %llx->%llx main %p->%p\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1, -+ rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len-1); -+ -+ if (raddr <= rgn->rgn_ebase && rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* whole region is cleared */ -+ -+ PRINTF (uctx, DBG_PERM, " whole region\n"); -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1); -+ user_removergn (uctx, rgn); -+ } -+ else if (raddr <= rgn->rgn_ebase) -+ { -+ /* clearing at beginning, so shrink size and increment base ptrs */ -+ ssize = rtop - rgn->rgn_ebase + 1; -+ -+ PRINTF (uctx, DBG_PERM, " clear at beginning %x\n", ssize); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", rgn->rgn_ebase, rgn->rgn_ebase + ssize-1); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, rgn->rgn_ebase, ssize); -+ -+ rgn->rgn_mbase += ssize; -+ rgn->rgn_ebase += ssize; -+ rgn->rgn_len -= ssize; -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ else if (rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* clearing at end, so just shrink length of region */ -+ ssize = (rgn->rgn_ebase + rgn->rgn_len - 1) - raddr + 1; -+ -+ PRINTF (uctx, DBG_PERM, " clear at end %x\n", ssize); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", raddr, raddr+ssize-1); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, raddr, ssize); -+ -+ rgn->rgn_len -= ssize; -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ else -+ { -+ /* the section to go is in the middle, so need to */ -+ /* split it into two regions */ -+ KMEM_ALLOC (nrgn, USER_RGN *, sizeof (USER_RGN), 1); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", raddr, rtop); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, raddr, rtop - raddr + 1); -+ -+ nrgn->rgn_mbase = rgn->rgn_mbase + (rtop - rgn->rgn_ebase + 1); -+ nrgn->rgn_ebase = rtop + 1; -+ nrgn->rgn_len = (rgn->rgn_ebase + rgn->rgn_len - 1) - rtop; -+ nrgn->rgn_perm = rgn->rgn_perm; -+ -+ PRINTF (uctx, DBG_PERM, " new elan %llx->%llx main %p->%p\n", -+ nrgn->rgn_ebase, nrgn->rgn_ebase + nrgn->rgn_len-1, -+ nrgn->rgn_mbase, nrgn->rgn_mbase + nrgn->rgn_len-1); -+ -+ rgn->rgn_len = (raddr - rgn->rgn_ebase); /* shrink original region */ -+ -+ PRINTF (uctx, DBG_PERM, " old elan %llx->%llx main %p->%p\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1, -+ rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len-1); -+ -+ res = user_addrgn_elan (uctx, nrgn); /* insert new region */ -+ ASSERT (res == 0); /* which cannot fail */ -+ -+ res = user_addrgn_main (uctx, nrgn); -+ ASSERT (res == 0); -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ } -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+} -+ -+int -+user_checkperm (USER_CTXT *uctx, E4_Addr raddr, unsigned long rsize, unsigned access) -+{ -+ USER_RGN *rgn; -+ -+ PRINTF3 (uctx, DBG_PERM, "user_checkperm: elan %lx len %lx access %x\n", raddr, rsize, access); -+ -+ if ((raddr + rsize - 1) < raddr) -+ return (-ENOMEM); -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ if ((rgn = user_rgnat_elan (uctx, raddr)) == (USER_RGN *) NULL) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-ENOMEM); -+ } -+ else -+ { -+ register int ssize; -+ -+ for (; rsize != 0; rsize -= ssize, raddr += ssize) -+ { -+ if (raddr > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ rgn = rgn->rgn_enext; -+ -+ if (rgn == NULL || raddr != rgn->rgn_ebase) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-ENOMEM); -+ } -+ } -+ if ((raddr + rsize - 1) > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ else -+ ssize = rsize; -+ -+ PRINTF4 (uctx, DBG_PERM, "user_checkperm : rgn %lx -> %lx perm %x access %x\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + (E4_Addr)rgn->rgn_len, rgn->rgn_perm, access); -+ -+ if (ELAN4_INCOMPAT_ACCESS (rgn->rgn_perm, access)) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EACCES); -+ } -+ } -+ } -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ return (0); -+} -+ -+virtaddr_t -+user_elan2main (USER_CTXT *uctx, E4_Addr addr) -+{ -+ USER_RGN *rgn; -+ virtaddr_t raddr; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if ((rgn = user_rgnat_elan (uctx, addr)) == (USER_RGN *) NULL) -+ raddr = (virtaddr_t) 0; -+ else -+ raddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ return (raddr); -+} -+ -+E4_Addr -+user_main2elan (USER_CTXT *uctx, virtaddr_t addr) -+{ -+ USER_RGN *rgn; -+ E4_Addr raddr; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if ((rgn = user_rgnat_main (uctx, addr)) == (USER_RGN *) NULL) -+ raddr = (virtaddr_t) 0; -+ else -+ raddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ return (raddr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/routetable.c linux-2.6.9/drivers/net/qsnet/elan4/routetable.c ---- clean/drivers/net/qsnet/elan4/routetable.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/routetable.c 2005-04-15 08:38:22.000000000 -0400 -@@ -0,0 +1,254 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: routetable.c,v 1.17 2005/04/15 12:38:22 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/routetable.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+ELAN4_ROUTE_TABLE * -+elan4_alloc_routetable (ELAN4_DEV *dev, unsigned size) -+{ -+ ELAN4_ROUTE_TABLE *tbl; -+ -+ KMEM_ZALLOC (tbl, ELAN4_ROUTE_TABLE *, sizeof (ELAN4_ROUTE_TABLE), 1); -+ -+ if (tbl == (ELAN4_ROUTE_TABLE *) NULL) -+ return (NULL); -+ -+ tbl->tbl_size = (size & E4_VPT_SIZE_MASK); -+ tbl->tbl_entries = elan4_sdram_alloc (dev, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ if (tbl->tbl_entries == 0) -+ { -+ KMEM_FREE (tbl, sizeof (ELAN4_ROUTE_TABLE)); -+ return ((ELAN4_ROUTE_TABLE *) NULL); -+ } -+ -+ spin_lock_init (&tbl->tbl_lock); -+ -+ /* zero the route table */ -+ elan4_sdram_zeroq_sdram (dev, tbl->tbl_entries, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ return (tbl); -+} -+ -+void -+elan4_free_routetable (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl) -+{ -+ elan4_sdram_free (dev, tbl->tbl_entries, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl, sizeof (ELAN4_ROUTE_TABLE)); -+} -+ -+void -+elan4_write_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1]), entry->Values[1]); -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0]), entry->Values[0]); -+ pioflush_sdram (dev); -+} -+ -+void -+elan4_read_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ entry->Values[0] = elan4_sdram_readq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0])); -+ entry->Values[1] = elan4_sdram_readq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1])); -+} -+ -+void -+elan4_invalidate_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0]), 0); -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1]), 0); -+ pioflush_sdram (dev); -+} -+ -+static void -+pack_them_routes (E4_VirtualProcessEntry *entry, E4_uint16 first, E4_uint8 *packed, unsigned ctx) -+{ -+ E4_uint64 value0 = first; -+ E4_uint64 value1 = ROUTE_CTXT_VALUE(ctx); -+ E4_uint32 ThirdRouteBCastVal; -+ register int i; -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ value0 |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ value1 |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ /* DMA fix for large broadcast route values that fall into the double issue of route value 3 bug. */ -+ /* NOTE - this is only required when the link is running in Mod45 mode, it could be automatically -+ * disabled when Mod44 is detected */ -+ -+ /* First seach for the alignment type. The bug is only sensitive to an odd bcast aligment on the 3rd word. */ -+ for (i=4;i<16;i++) -+ if (((value0 >> (i*4)) & 0xc) == 4) -+ i++; -+ -+ if (i == 17) -+ { -+ ThirdRouteBCastVal = value1 & 0xcccccccc; -+ if (((value1 & 0xfffff0000000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x04444444)) -+ value1 |= 0x140000000ULL; -+ else if (((value1 & 0xfffffff00000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00044444)) -+ value1 |= 0x1400000ULL; -+ else if (((value1 & 0xfffffffff000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00000444)) -+ value1 |= 0x14000ULL; -+ else if (((value1 & 0xfffffffffff0ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00000004)) -+ value1 |= 0x140ULL; -+ } -+ -+ entry->Values[0] = value0; -+ entry->Values[1] = value1; -+} -+ -+int -+elan4_generate_route (ELAN_POSITION *pos, E4_VirtualProcessEntry *route, unsigned ctx, unsigned lowid, unsigned highid, unsigned options) -+{ -+ unsigned int broadcast = (lowid != highid); -+ unsigned int noadaptive = 0; -+ int padbcast = 0; -+ E4_uint16 first; -+ int rb; -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ int level, llink, hlink; -+ -+ /* sanity check on lowid highid */ -+ if (highid < lowid) return (-EINVAL); -+ if (lowid < 0) return (-EINVAL); -+ if (highid >= pos->pos_nodes) return (-EINVAL); -+ -+ regenerate_routes: -+ first = 0; -+ rb = 0; -+ -+ switch (pos->pos_mode) -+ { -+ case ELAN_POS_MODE_LOOPBACK: -+ if (lowid != highid || lowid != pos->pos_nodeid) -+ return (-EINVAL); -+ -+ route->Values[0] = FIRST_MYLINK; -+ route->Values[1] = ROUTE_CTXT_VALUE (ctx); -+ return (0); -+ -+ case ELAN_POS_MODE_BACKTOBACK: -+ if (lowid != highid || lowid == pos->pos_nodeid) -+ return (-EINVAL); -+ -+ route->Values[0] = FIRST_MYLINK; -+ route->Values[1] = ROUTE_CTXT_VALUE (ctx); -+ return (0); -+ -+ case ELAN_POS_MODE_SWITCHED: -+ { -+ unsigned char *arityp = &pos->pos_arity[pos->pos_levels - 1]; -+ unsigned int spanned = *arityp; -+ unsigned int broadcasting = 0; -+ -+ bzero (packed, sizeof (packed)); -+ -+ /* XXXX compute noadaptive ? */ -+ -+ for (level = 0; -+ level < pos->pos_levels && ! ((pos->pos_nodeid / spanned) == (lowid / spanned) && -+ (pos->pos_nodeid / spanned) == (highid / spanned)); -+ level++, spanned *= *(--arityp)) -+ { -+ if (first == 0) -+ first = (broadcast || noadaptive) ? FIRST_BCAST_TREE : FIRST_ADAPTIVE; -+ else if (broadcast && padbcast) -+ { -+ padbcast = 0; -+ packed[rb++] = PACKED_BCAST0(4, 4); -+ packed[rb++] = PACKED_BCAST1(4, 4); -+ } -+ else -+ packed[rb++] = (broadcast || noadaptive) ? PACKED_BCAST_TREE : PACKED_ADAPTIVE; -+ } -+ -+ while (level >= 0) -+ { -+ spanned /= *arityp; -+ -+ llink = (lowid / spanned) % *arityp; -+ hlink = (highid / spanned) % *arityp; -+ -+ if (llink != hlink || broadcasting) -+ { -+ broadcasting = 1; -+ -+ if (first == 0) -+ first = FIRST_BCAST (hlink, llink); -+ else -+ { -+ packed[rb++] = PACKED_BCAST0(hlink, llink); -+ -+ if ((rb % 4) == 0 && PACKED_BCAST1(hlink, llink) == 0) -+ { -+ padbcast = 1; -+ goto regenerate_routes; -+ } -+ -+ packed[rb++] = PACKED_BCAST1(hlink, llink); -+ } -+ } -+ else -+ { -+ if (first == 0) -+ first = FIRST_ROUTE(llink); -+ else -+ packed[rb++] = PACKED_ROUTE(llink); -+ } -+ -+ level--; -+ arityp++; -+ } -+ -+ pack_them_routes (route, first | (options & FIRST_OPTIONS_MASK), packed, ctx); -+ return (0); -+ } -+ } -+ -+ return (-EINVAL); -+} -+ -+int -+elan4_check_route (ELAN_POSITION *postiion, ELAN_LOCATION location, E4_VirtualProcessEntry *route, unsigned flags) -+{ -+ /* XXXX - TBD */ -+ return (0); -+} -+ -+EXPORT_SYMBOL(elan4_alloc_routetable); -+EXPORT_SYMBOL(elan4_free_routetable); -+EXPORT_SYMBOL(elan4_write_route); -+EXPORT_SYMBOL(elan4_read_route); -+EXPORT_SYMBOL(elan4_invalidate_route); -+EXPORT_SYMBOL(elan4_generate_route); -+EXPORT_SYMBOL(elan4_check_route); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/sdram.c linux-2.6.9/drivers/net/qsnet/elan4/sdram.c ---- clean/drivers/net/qsnet/elan4/sdram.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/sdram.c 2005-07-20 07:35:36.000000000 -0400 -@@ -0,0 +1,1039 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: sdram.c,v 1.34.2.1 2005/07/20 11:35:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/sdram.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+EXPORT_SYMBOL_GPL(elan4_sdram_readb); -+EXPORT_SYMBOL_GPL(elan4_sdram_readw); -+EXPORT_SYMBOL_GPL(elan4_sdram_readl); -+EXPORT_SYMBOL_GPL(elan4_sdram_readq); -+EXPORT_SYMBOL_GPL(elan4_sdram_writeb); -+EXPORT_SYMBOL_GPL(elan4_sdram_writew); -+EXPORT_SYMBOL_GPL(elan4_sdram_writel); -+EXPORT_SYMBOL_GPL(elan4_sdram_writeq); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerob_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerow_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerol_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zeroq_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyb_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyw_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyl_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyq_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyb_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyw_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyl_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyq_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_alloc); -+EXPORT_SYMBOL_GPL(elan4_sdram_free); -+EXPORT_SYMBOL_GPL(elan4_sdram_flushcache); -+ -+#define SDRAM_MIN_BANK_SIZE ((1 << 15) * 8) /* 256 Kbytes */ -+ -+static inline ELAN4_SDRAM_BANK * -+sdramaddr_to_bank (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ register int i; -+ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ ELAN4_SDRAM_BANK *bank = &dev->dev_sdram_banks[i]; -+ -+ if (saddr >= bank->b_base && saddr < (bank->b_base + bank->b_size)) -+ return (bank); -+ } -+ printk ("sdramaddr_to_bank: sdram address %lx not in a sdram bank\n", saddr); -+ BUG(); -+ -+ return (NULL); /* NOTREACHED */ -+} -+ -+static inline int -+sdramaddr_to_bankoffset (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ return (saddr & (sdramaddr_to_bank (dev, saddr)->b_size-1)); -+} -+ -+static inline int -+sdramaddr_to_bit(ELAN4_DEV *dev, int indx, sdramaddr_t saddr) -+{ -+ return (sdramaddr_to_bankoffset(dev, saddr) >> (SDRAM_MIN_BLOCK_SHIFT+(indx))); -+} -+ -+static inline ioaddr_t -+sdramaddr_to_ioaddr (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, saddr); -+ -+ return (bank->b_ioaddr + (saddr - bank->b_base)); -+} -+ -+unsigned char -+elan4_sdram_readb (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readb (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned short -+elan4_sdram_readw (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readw (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned int -+elan4_sdram_readl (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readl (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned long long -+elan4_sdram_readq (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readq (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+void -+elan4_sdram_writeb (ELAN4_DEV *dev, sdramaddr_t off, unsigned char val) -+{ -+ writeb (val, (void *) sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writew (ELAN4_DEV *dev, sdramaddr_t off, unsigned short val) -+{ -+ writew (val, (void *) sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writel (ELAN4_DEV *dev, sdramaddr_t off, unsigned int val) -+{ -+ writel (val, (void *) (sdramaddr_to_ioaddr(dev, off))); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writeq (ELAN4_DEV *dev, sdramaddr_t off, unsigned long long val) -+{ -+ writeq (val, (void *) (sdramaddr_to_ioaddr(dev, off))); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_zerob_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (0, (void *) dest); -+} -+ -+void -+elan4_sdram_zerow_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (0, (void *) dest); -+} -+ -+void -+elan4_sdram_zerol_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u32)) -+ writel (0, (void *) dest); -+} -+ -+void -+elan4_sdram_zeroq_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_memset_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, to, 0, nbytes) == 0) -+ return; -+#endif -+ -+ for (; dest < lim; dest += sizeof (u64)) -+ writeq (0, (void *) dest); -+} -+ -+void -+elan4_sdram_copyb_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u8 *dest = (u8 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u8)) -+ *dest++ = __elan4_readb (dev, src); -+} -+ -+void -+elan4_sdram_copyw_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u16 *dest = (u16 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u16)) -+ *dest++ = __elan4_readw (dev, src); -+} -+ -+void -+elan4_sdram_copyl_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u32 *dest = (u32 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u32)) -+ *dest++ = __elan4_readl (dev, src); -+} -+ -+void -+elan4_sdram_copyq_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u64 *dest = (u64 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyfrom_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, from, (unsigned long) to, nbytes) == 0) -+ return; -+#endif -+ -+ for (; src < lim; src += sizeof (u64)) -+ *dest++ = __elan4_readq (dev, src); -+} -+ -+void -+elan4_sdram_copyb_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u8 *src = (u8 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyw_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u16 *src = (u16 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u16)) -+ writew (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyl_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u32 *src = (u32 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u16)) -+ writew (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyq_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u64 *src = (u64 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyto_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, to, (unsigned long) from, nbytes) == 0) -+ return; -+#endif -+ -+ for (; dest < lim; dest += sizeof (u64)) -+ writeq (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+/* sdram buddy allocator */ -+typedef struct sdramblock -+{ -+ sdramaddr_t next; -+ sdramaddr_t prev; -+} sdramblock_t; -+ -+static inline sdramaddr_t -+read_next (ELAN4_DEV *dev, sdramaddr_t block) -+{ -+ return __elan4_readl (dev, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, next))); -+} -+ -+static inline sdramaddr_t -+read_prev (ELAN4_DEV *dev, sdramaddr_t block) -+{ -+ return __elan4_readl (dev, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, prev))); -+} -+ -+static inline void -+write_next (ELAN4_DEV *dev, sdramaddr_t block, sdramaddr_t val) -+{ -+ writel (val, (void *) (sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, next)))); -+} -+ -+static inline void -+write_prev (ELAN4_DEV *dev, sdramaddr_t block, sdramaddr_t val) -+{ -+ writel (val, (void *) (sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, prev)))); -+} -+ -+static inline void -+freelist_insert (ELAN4_DEV *dev, int idx, sdramaddr_t block) -+{ -+ sdramaddr_t next = dev->dev_sdram_freelists[(idx)]; -+ -+ /* -+ * block->prev = NULL; -+ * block->next = next; -+ * if (next != NULL) -+ * next->prev = block; -+ * freelist = block; -+ */ -+ write_prev (dev, block, (sdramaddr_t) 0); -+ write_next (dev, block, next); -+ if (next != (sdramaddr_t) 0) -+ write_prev (dev, next, block); -+ dev->dev_sdram_freelists[idx] = block; -+ -+ dev->dev_sdram_freecounts[idx]++; -+ dev->dev_stats.s_sdram_bytes_free += (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+static inline void -+freelist_remove (ELAN4_DEV *dev,int idx, sdramaddr_t block) -+{ -+ /* -+ * if (block->prev) -+ * block->prev->next = block->next; -+ * else -+ * dev->dev_sdram_freelists[idx] = block->next; -+ * if (block->next) -+ * block->next->prev = block->prev; -+ */ -+ sdramaddr_t blocknext = read_next (dev, block); -+ sdramaddr_t blockprev = read_prev (dev, block); -+ -+ if (blockprev) -+ write_next (dev, blockprev, blocknext); -+ else -+ dev->dev_sdram_freelists[idx] = blocknext; -+ if (blocknext) -+ write_prev (dev, blocknext, blockprev); -+ -+ dev->dev_sdram_freecounts[idx]--; -+ dev->dev_stats.s_sdram_bytes_free -= (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+static inline void -+freelist_removehead(ELAN4_DEV *dev, int idx, sdramaddr_t block) -+{ -+ sdramaddr_t blocknext = read_next (dev, block); -+ -+ if ((dev->dev_sdram_freelists[idx] = blocknext) != 0) -+ write_prev (dev, blocknext, 0); -+ -+ dev->dev_sdram_freecounts[idx]--; -+ dev->dev_stats.s_sdram_bytes_free -= (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+#ifdef DEBUG -+static int -+display_blocks (ELAN4_DEV *dev, int indx, char *string) -+{ -+ sdramaddr_t block; -+ int nbytes = 0; -+ -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "%s - indx %d\n", string, indx); -+ for (block = dev->dev_sdram_freelists[indx]; block != (sdramaddr_t) 0; block = read_next (dev, block)) -+ { -+ PRINTF (DBG_DEVICE, DBG_SDRAM, " %x\n", block); -+ nbytes += (SDRAM_MIN_BLOCK_SIZE << indx); -+ } -+ -+ return (nbytes); -+} -+ -+void -+elan4_sdram_display (ELAN4_DEV *dev, char *string) -+{ -+ int indx; -+ int nbytes = 0; -+ -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_display: dev=%p\n", dev); -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ if (dev->dev_sdram_freelists[indx] != (sdramaddr_t) 0) -+ nbytes += display_blocks (dev, indx, string); -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "\n%d bytes free - %d pages free\n", nbytes, nbytes/SDRAM_PAGE_SIZE); -+} -+ -+void -+elan4_sdram_verify (ELAN4_DEV *dev) -+{ -+ int indx, size, nbits, i, b; -+ sdramaddr_t block; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ unsigned count = 0; -+ -+ for (block = dev->dev_sdram_freelists[indx]; block; block = read_next (dev, block), count++) -+ { -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, block); -+ unsigned off = sdramaddr_to_bankoffset (dev, block); -+ int bit = sdramaddr_to_bit (dev, indx, block); -+ -+ if ((block & (size-1)) != 0) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - not aligned\n", block, indx); -+ -+ if (bank == NULL || off > bank->b_size) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - outside bank\n", block, indx); -+ else if (BT_TEST (bank->b_bitmaps[indx], bit) == 0) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - bit not set\n", block, indx); -+ else -+ { -+ for (i = indx-1, nbits = 2; i >= 0; i--, nbits <<= 1) -+ { -+ bit = sdramaddr_to_bit (dev, i, block); -+ -+ for (b = 0; b < nbits; b++) -+ if (BT_TEST(bank->b_bitmaps[i], bit + b)) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - also free i=%d bit=%x\n", block, indx, i, bit+b); -+ } -+ } -+ } -+ -+ if (dev->dev_sdram_freecounts[indx] != count) -+ printk ("elan4_sdram_verify: indx=%x expected %d got %d\n", indx, dev->dev_sdram_freecounts[indx], count); -+ } -+} -+ -+#endif -+ -+static void -+free_block (ELAN4_DEV *dev, sdramaddr_t block, int indx) -+{ -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, block); -+ unsigned bit = sdramaddr_to_bit (dev, indx, block); -+ unsigned size = SDRAM_MIN_BLOCK_SIZE << indx; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: block=%x indx=%d bit=%x\n", block, indx, bit); -+ -+ ASSERT ((block & (size-1)) == 0); -+ ASSERT (BT_TEST (bank->b_bitmaps[indx], bit) == 0); -+ -+ while (BT_TEST (bank->b_bitmaps[indx], bit ^ 1)) -+ { -+ sdramaddr_t buddy = block ^ size; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: merge block=%x buddy=%x indx=%d\n", block, buddy, indx); -+ -+ BT_CLEAR (bank->b_bitmaps[indx], bit ^ 1); -+ -+ freelist_remove (dev, indx, buddy); -+ -+ block = (block < buddy) ? block : buddy; -+ indx++; -+ size <<= 1; -+ bit >>= 1; -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: free block=%x indx=%d bit=%x\n", block, indx, bit); -+ -+ freelist_insert (dev, indx, block); -+ -+ BT_SET (bank->b_bitmaps[indx], bit); -+} -+ -+void -+elan4_sdram_init (ELAN4_DEV *dev) -+{ -+ int indx; -+ -+ spin_lock_init (&dev->dev_sdram_lock); -+ -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ { -+ dev->dev_sdram_freelists[indx] = (sdramaddr_t) 0; -+ dev->dev_sdram_freecounts[indx] = 0; -+ } -+} -+ -+void -+elan4_sdram_fini (ELAN4_DEV *dev) -+{ -+ spin_lock_destroy (&dev->dev_sdram_lock); -+} -+ -+#ifdef CONFIG_MPSAS -+/* size of Elan SDRAM in simulation */ -+#define SDRAM_used_addr_bits (16) -+#define SDRAM_SIMULATION_BANK_SIZE ((1 << SDRAM_used_addr_bits) * 8) /* 128 kbytes */ -+ -+static int -+elan4_sdram_probe_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ printk ("elan%d: memory bank %d is %d Kb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (SDRAM_SIMULATION_BANK_SIZE / 1024)); -+ -+ bank->b_size = SDRAM_SIMULATION_BANK_SIZE; -+ -+ return 1; -+} -+ -+#else -+ -+static void -+initialise_cache_tags (ELAN4_DEV *dev, unsigned addr) -+{ -+ register int set, line; -+ -+ mb(); -+ -+ /* Initialise the whole cache to hold sdram at "addr" as direct mapped */ -+ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], addr | (set << 13) | (1 << 11)); -+ -+ read_tag (dev, Tags[set][line]); /* read it back to guarantee the memory system is quite again */ -+ mb(); -+} -+ -+static __inline__ int -+sdram_GreyToBinary(int GreyVal, int NoOfBits) -+{ -+ int Bit; -+ int BinaryVal=0; -+ for (Bit=(1 << (NoOfBits-1)); Bit != 0; Bit >>= 1) -+ BinaryVal ^= (GreyVal & Bit) ^ ((BinaryVal >> 1) & Bit); -+ return (BinaryVal); -+} -+ -+static __inline__ int -+sdram_BinaryToGrey(int BinaryVal) -+{ -+ return (BinaryVal ^ (BinaryVal >> 1)); -+} -+ -+void -+elan4_sdram_setup_delay_lines (ELAN4_DEV *dev, int factor) -+{ -+ /* This is used to fix the SDRAM delay line values */ -+ int i, AutoGenDelayValue=0; -+ int NewDelayValue; -+ -+ if (dev->dev_sdram_cfg & SDRAM_FIXED_DELAY_ENABLE) /* already setup. */ -+ return; -+ -+ /* now get an average of 10 dll values */ -+ for (i=0;i<10;i++) -+ AutoGenDelayValue += sdram_GreyToBinary(SDRAM_GET_DLL_DELAY(read_reg64 (dev, SDRamConfigReg)), -+ SDRAM_FIXED_DLL_DELAY_BITS); -+ -+ NewDelayValue = factor + (AutoGenDelayValue / 10); /* Mean of 10 values */ -+ -+ dev->dev_sdram_cfg = (dev->dev_sdram_cfg & ~(SDRAM_FIXED_DLL_DELAY_MASK << SDRAM_FIXED_DLL_DELAY_SHIFT)) | -+ SDRAM_FIXED_DELAY_ENABLE | SDRAM_FIXED_DLL_DELAY(sdram_BinaryToGrey(NewDelayValue)); -+ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg); /* Put back the new value */ -+ -+ pioflush_reg (dev); -+} -+ -+static int -+elan4_sdram_probe_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ unsigned long mappedsize = bank->b_size; -+ ioaddr_t ioaddr; -+ unsigned long long value, size; -+ register int i; -+ extern int sdram_bank_limit; -+ -+ if (mappedsize > SDRAM_MAX_BLOCK_SIZE) -+ mappedsize = SDRAM_MAX_BLOCK_SIZE; -+ -+ while ((ioaddr = elan4_map_device (dev, ELAN4_BAR_SDRAM, bank->b_base, mappedsize, &bank->b_handle)) == 0) -+ { -+ if (mappedsize <= (64*1024*1024)) /* boards normally populated with 64mb, so winge if we can't see this much */ -+ printk ("elan%d: could not map bank %d size %dMb\n", dev->dev_instance, (int)(bank - dev->dev_sdram_banks), (int)mappedsize/(1024*1024)); -+ -+ if ((mappedsize >>= 1) < (1024*1024)) -+ return 0; -+ } -+ -+ /* first probe to see if the memory bank is present */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, E4_CacheSize); -+ -+ for (i = 0; i < 64; i++) -+ { -+ unsigned long long pattern = (1ull << i); -+ -+ writeq (pattern, (void *)ioaddr); /* write pattern at base */ -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 0); -+ -+ writeq (~pattern, (void *)(ioaddr + E4_CacheSize)); /* write ~pattern at cachesize */ -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, E4_CacheSize); -+ -+ writeq (~pattern, (void *)(ioaddr + 2*E4_CacheSize)); /* write ~pattern at 2*cachesize */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 2*E4_CacheSize); -+ -+ value = __elan4_readq (dev, ioaddr); /* read pattern back at 0 */ -+ -+ if (value != pattern) -+ { -+ printk ("elan%d: sdram bank %d not present\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 0; -+ } -+ } -+ -+ /* sdram bank is present, so work out it's size. We store the maximum size at the base -+ * and then store the address at each address on every power of two address until -+ * we reach the minimum mappable size (PAGESIZE), we then read back the value at the -+ * base to determine the bank size */ -+ writeq (mappedsize, (void *)(ioaddr)); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 0); -+ -+ for (size = mappedsize >> 1; size > PAGE_SIZE; size >>= 1) -+ { -+ writeq (size, (void *)(ioaddr + (long)size)); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, size); -+ } -+ -+ if ((size = __elan4_readq (dev, ioaddr)) < SDRAM_MIN_BANK_SIZE) -+ { -+ printk ("elan%d: memory bank %d dubious\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 0; -+ } -+ -+ if (sdram_bank_limit == 0 || size <= (sdram_bank_limit * 1024 * 1024)) -+ printk ("elan%d: memory bank %d is %d Mb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (size / (1024*1024))); -+ else -+ { -+ size = (sdram_bank_limit * 1024 * 1024); -+ printk ("elan%d: limit bank %d to %d Mb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (size / (1024*1024))); -+ } -+ -+ bank->b_size = size; -+ -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 1; -+} -+#endif -+ -+int -+elan4_sdram_init_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ int indx, size; -+ -+ bank->b_ioaddr = 0; -+ -+ if (! elan4_sdram_probe_bank (dev, bank)) -+ return 0; -+ -+ if ((bank->b_ioaddr = elan4_map_device (dev, ELAN4_BAR_SDRAM, bank->b_base, bank->b_size, &bank->b_handle)) == (ioaddr_t) 0) -+ { -+ printk ("elan%d: could not map sdrambank %d\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ return 0; -+ } -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->b_size; indx++, size <<= 1) /* allocate the buddy allocator bitmaps */ -+ KMEM_ZALLOC (bank->b_bitmaps[indx], bitmap_t *, sizeof (bitmap_t) * BT_BITOUL(bank->b_size/size), 1); -+ -+ return 1; -+} -+ -+void -+elan4_sdram_fini_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ int indx, size; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->b_size; indx++, size <<= 1) -+ KMEM_FREE (bank->b_bitmaps[indx], sizeof (bitmap_t) * BT_BITOUL(bank->b_size/size)); -+ -+ elan4_unmap_device (dev, bank->b_ioaddr, bank->b_size, &bank->b_handle); -+} -+ -+void -+elan4_sdram_add_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ sdramaddr_t base = bank->b_base; -+ sdramaddr_t top = bank->b_base + bank->b_size; -+ register int indx; -+ register unsigned long size; -+ -+ /* align to the minimum block size */ -+ base = (base + SDRAM_MIN_BLOCK_SIZE - 1) & ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ top &= ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ -+ /* don't allow 0 as a valid "base" */ -+ if (base == 0) -+ base = SDRAM_MIN_BLOCK_SIZE; -+ -+ /* carve the bottom to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((base & size) == 0) -+ continue; -+ -+ if ((base + size) > top) -+ break; -+ -+ free_block (dev, base, indx); -+ -+ base += size; -+ } -+ -+ /* carve the top down to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((top & size) == 0) -+ continue; -+ -+ if ((top - size) < base) -+ break; -+ -+ free_block (dev, (top - size), indx); -+ -+ top -= size; -+ } -+ -+ /* now free of the space in between */ -+ while (base < top) -+ { -+ free_block (dev, base, (SDRAM_NUM_FREE_LISTS-1)); -+ -+ base += SDRAM_MAX_BLOCK_SIZE; -+ } -+} -+ -+sdramaddr_t -+elan4_sdram_alloc (ELAN4_DEV *dev, int nbytes) -+{ -+ sdramaddr_t block; -+ register int i, indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_sdram_lock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_alloc: nbytes=%d indx=%d\n", nbytes, indx); -+ -+ /* need to split a bigger block up */ -+ for (i = indx; i < SDRAM_NUM_FREE_LISTS; i++, size <<= 1) -+ if (dev->dev_sdram_freelists[i]) -+ break; -+ -+ if (i == SDRAM_NUM_FREE_LISTS) -+ { -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+ printk ("elan4_sdram_alloc: %d bytes failed\n", nbytes); -+ return ((sdramaddr_t) 0); -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_alloc: use block=%x indx=%d\n", dev->dev_sdram_freelists[i], i); -+ -+ /* remove the block from the free list */ -+ freelist_removehead (dev, i, (block = dev->dev_sdram_freelists[i])); -+ -+ /* clear the approriate bit in the bitmap */ -+ BT_CLEAR (sdramaddr_to_bank (dev, block)->b_bitmaps[i], sdramaddr_to_bit (dev,i, block)); -+ -+ /* and split it up as required */ -+ while (i-- > indx) -+ free_block (dev, block + (size >>= 1), i); -+ -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+ -+ ASSERT ((block & ((SDRAM_MIN_BLOCK_SIZE << (indx))-1)) == 0); -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_zeroq_sdram (dev, block, sizeof (sdramblock_t)); -+#endif -+ -+ return ((sdramaddr_t) block); -+} -+ -+void -+elan4_sdram_free (ELAN4_DEV *dev, sdramaddr_t block, int nbytes) -+{ -+ register int indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_sdram_lock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_free: indx=%d block=%x\n", indx, block); -+ -+ free_block (dev, block, indx); -+ -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+} -+ -+void -+elan4_sdram_flushcache (ELAN4_DEV *dev, sdramaddr_t addr, int len) -+{ -+ int set, off; -+ -+ SET_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ /* -+ * if flushing more than a single set (8K), then you have to flush the whole cache. -+ * NOTE - in the real world we will probably want to generate a burst across -+ * the pci bus. -+ */ -+ if (len >= E4_CacheSetSize) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => whole cache\n", addr, len, addr + len); -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space, E4_CacheSize); -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (off = 0; off < E4_CacheSetSize; off += E4_CacheLineSize) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+#endif -+ } -+ else -+ { -+ unsigned base = addr & ~(E4_CACHELINE_SIZE-1); -+ unsigned top = (addr + len + (E4_CACHELINE_SIZE-1)) & ~(E4_CACHELINE_SIZE-1); -+ unsigned baseoff = base & (E4_CacheSetSize-1); -+ unsigned topoff = top & (E4_CacheSetSize-1); -+ -+ if ((base ^ top) & E4_CacheSetSize) /* wraps */ -+ { -+ PRINTF7 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => split cache (%x,%x %x,%x)\n", -+ addr, len, addr + len, 0, topoff, baseoff, E4_CacheSetSize); -+ -+#ifdef CONFIG_MPSAS -+ for (set = 0; set < E4_NumCacheSets; set++) -+ { -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize), topoff); -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + baseoff, E4_CacheSetSize - baseoff); -+ } -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ { -+ for (off = 0; off < (top & (E4_CacheSetSize-1)); off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+ -+ for (off = (base & (E4_CacheSetSize-1)); off < E4_CacheSetSize; off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+ } -+#endif -+ } -+ else -+ { -+ PRINTF5 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => part cache (%x,%x)\n", -+ addr, len, addr + len, baseoff, topoff); -+ -+#ifdef CONFIG_MPSAS -+ for (set = 0; set < E4_NumCacheSets; set++) -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + baseoff, topoff - baseoff); -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (off = (base & (E4_CacheSetSize-1)); off < (top & (E4_CacheSetSize-1)); off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+#endif -+ } -+ } -+ pioflush_sdram (dev); -+ -+ CLEAR_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+} -+ -+static char * -+get_correctableErr_bitpos(uint SyndromeBits) -+{ -+ switch (SyndromeBits) -+ { -+ case 0x00: return ("NoErr"); -+ case 0x31: return ("00"); -+ case 0x32: return ("01"); -+ case 0xc4: return ("02"); -+ case 0xc8: return ("03"); -+ case 0x26: return ("04"); -+ case 0x91: return ("05"); -+ case 0x89: return ("06"); -+ case 0x64: return ("07"); -+ case 0xc1: return ("08"); -+ case 0xf2: return ("09"); -+ case 0x34: return ("10"); -+ case 0xf8: return ("11"); -+ case 0xf1: return ("12"); -+ case 0xc2: return ("13"); -+ case 0xf4: return ("14"); -+ case 0x38: return ("15"); -+ case 0xd6: return ("16"); -+ case 0xa1: return ("17"); -+ case 0x79: return ("18"); -+ case 0xa4: return ("19"); -+ case 0xd9: return ("20"); -+ case 0xa2: return ("21"); -+ case 0x76: return ("22"); -+ case 0xa8: return ("23"); -+ case 0xe6: return ("24"); -+ case 0x51: return ("25"); -+ case 0xb9: return ("26"); -+ case 0x54: return ("27"); -+ case 0xe9: return ("28"); -+ case 0x52: return ("29"); -+ case 0xb6: return ("30"); -+ case 0x58: return ("31"); -+ case 0x13: return ("32"); -+ case 0x23: return ("33"); -+ case 0x4c: return ("34"); -+ case 0x8c: return ("35"); -+ case 0x62: return ("36"); -+ case 0x19: return ("37"); -+ case 0x98: return ("38"); -+ case 0x46: return ("39"); -+ case 0x1c: return ("40"); -+ case 0x2f: return ("41"); -+ case 0x43: return ("42"); -+ case 0x8f: return ("43"); -+ case 0x1f: return ("44"); -+ case 0x2c: return ("45"); -+ case 0x4f: return ("46"); -+ case 0x83: return ("47"); -+ case 0x6d: return ("48"); -+ case 0x1a: return ("49"); -+ case 0x97: return ("50"); -+ case 0x4a: return ("51"); -+ case 0x9d: return ("52"); -+ case 0x2a: return ("53"); -+ case 0x67: return ("54"); -+ case 0x8a: return ("55"); -+ case 0x6e: return ("56"); -+ case 0x15: return ("57"); -+ case 0x9b: return ("58"); -+ case 0x45: return ("59"); -+ case 0x9e: return ("60"); -+ case 0x25: return ("61"); -+ case 0x6b: return ("62"); -+ case 0x85: return ("63"); -+ case 0x01: return ("C0"); -+ case 0x02: return ("C1"); -+ case 0x04: return ("C2"); -+ case 0x08: return ("C3"); -+ case 0x10: return ("C4"); -+ case 0x20: return ("C5"); -+ case 0x40: return ("C6"); -+ case 0x80: return ("C7"); -+ -+ case 0x07: case 0x0b: case 0x0d: case 0x0e: case 0x3d: case 0x3e: case 0x70: case 0x7c: // T -+ case 0xb0: case 0xbc: case 0xc7: case 0xcb: case 0xd0: case 0xd3: case 0xe0: case 0xe3: // T -+ return ("triple"); -+ -+ case 0x0f: case 0x55: case 0x5a: case 0xa5: case 0xaa: case 0xf0: case 0xff: // Q -+ return ("quadruple"); -+ -+ case 0x16: case 0x29: case 0x37: case 0x3b: case 0x49: case 0x57: case 0x5b: case 0x5d: case 0x5e: case 0x61: // M -+ case 0x68: case 0x73: case 0x75: case 0x7a: case 0x7f: case 0x86: case 0x92: case 0x94: case 0xa7: case 0xab: // M -+ case 0xad: case 0xae: case 0xb3: case 0xb5: case 0xba: case 0xbf: case 0xcd: case 0xce: case 0xd5: case 0xda: // M -+ case 0xdc: case 0xdf: case 0xe5: case 0xea: case 0xec: case 0xef: case 0xf7: case 0xfb: case 0xfd: case 0xfe: // M -+ return ("multiple"); -+ -+ default: // all other cases -+ return ("double"); -+ } -+} -+ -+char * -+elan4_sdramerr2str (ELAN4_DEV *dev, E4_uint64 status, E4_uint64 ConfigReg, char *str) -+{ -+ E4_uint64 StartupSyndrome = dev->dev_sdram_initial_ecc_val; -+ int RisingDQSsyndrome = ((ECC_RisingDQSSyndrome(status) == ECC_RisingDQSSyndrome(StartupSyndrome)) ? -+ 0 : ECC_RisingDQSSyndrome(status)); -+ int FallingDQSsyndrome = ((ECC_FallingDQSSyndrome(status) == ECC_FallingDQSSyndrome(StartupSyndrome)) ? -+ 0 : ECC_FallingDQSSyndrome(status)); -+ E4_uint64 Addr = ECC_Addr(status); -+ int Bank = (Addr >> 6) & 3; -+ int Cas = ((Addr >> 3) & 7) | ((Addr >> (8 - 3)) & 0xf8) | ((Addr >> (25 - 8)) & 0x100) | -+ ((Addr >> (27 - 9)) & 0x200) | ((Addr >> (29 - 10)) & 0xc00); -+ int Ras = ((Addr >> 13) & 0xfff) | ((Addr >> (26 - 12)) & 0x1000) | ((Addr >> (28 - 13)) & 0x2000) | -+ ((Addr >> (30 - 14)) & 0x4000); -+ -+ sprintf (str, "Addr=%07llx Bank=%x Ras=%x Cas=%x Falling DQS=%s Rising DQS=%s Syndrome=%x%s%s%s%s Type=%s SDRamDelay=%s,%0d", /* 41 + 16 + 8 + 15 + 24 + 13 + 22 + 10 + 10 == 151 */ -+ (long long)Addr, Bank, Ras, Cas, -+ get_correctableErr_bitpos(FallingDQSsyndrome), -+ get_correctableErr_bitpos(RisingDQSsyndrome), -+ (int)ECC_Syndrome(status), -+ ECC_UncorrectableErr(status) ? " Uncorrectable" : "", -+ ECC_MultUncorrectErrs(status) ? " Multiple-Uncorrectable" : "", -+ ECC_CorrectableErr(status) ? " Correctable" : "", -+ ECC_MultCorrectErrs(status) ? " Multiple-Correctable" : "", -+ (status & 0x0010000000000000ull) ? "W" : -+ (status & 0x0020000000000000ull) ? "R" : -+ (status & 0x0030000000000000ull) ? "C" : "-", -+ (ConfigReg & SDRAM_FIXED_DELAY_ENABLE) ? "F" : "A", -+ sdram_GreyToBinary(SDRAM_GET_DLL_DELAY(ConfigReg), SDRAM_FIXED_DLL_DELAY_BITS)); -+ -+ return str; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/trap.c linux-2.6.9/drivers/net/qsnet/elan4/trap.c ---- clean/drivers/net/qsnet/elan4/trap.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/trap.c 2005-07-20 07:35:36.000000000 -0400 -@@ -0,0 +1,781 @@ -+/* -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: trap.c,v 1.23.2.1 2005/07/20 11:35:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/trap.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+char * const PermTypes[16] = -+{ -+ "Disabled", "Unused", "LocalDataRead", "LocalDataWrite", -+ "LocalRead", "LocalExecute", "ReadOnly", "LocalWrite", -+ "LocalEventOnly", "LocalEventWrite", "RemoteEvent", "RemoteAll", -+ "RemoteReadOnly", "RemoteWriteOnly", "DataReadWrite", "NoFault", -+}; -+ -+char * const AccTypes[] = -+{ -+ "LocalDataRead ", "LocalDataWrite", "RemoteRead ", "RemoteWrite ", -+ "Execute ", "LocalEvent ", "Unused ", "RemoteEvent " -+}; -+char * const DataTypes[] = {"Byte ", "HWord", "Word ", "DWord"}; -+char * const PhysTypes[] = {"Special Read", "Special Write", "Physical Read", "Physical Write"}; -+ -+char * const EProcTrapNames[] = { -+ "EventProcNoFault", -+ "EventProcAddressAlignment", -+ "EventProcMemoryFault", -+ "EventProcCountWrapError", -+}; -+ -+char * const CProcTrapNames[] = { -+ "CommandProcNoFault", -+ "CommandProcInserterError", -+ "CommandProcPermissionTrap", -+ "CommandProcSendTransInvalid", -+ "CommandProcSendTransExpected", -+ "CommandProcDmaQueueOverflow", -+ "CommandProcInterruptQueueOverflow", -+ "CommandProcMemoryFault", -+ "CommandProcRouteFetchFault", -+ "CommandProcFailCountZero", -+ "CommandProcAddressAlignment", -+ "CommandProcWaitTrap", -+ "CommandProcMultipleGuards", -+ "CommandProcOpenOnGuardedChan", -+ "CommandProcThreadQueueOverflow", -+ "CommandProcBadData", -+}; -+ -+char *const CProcInsertError[] = { -+ "No Error", -+ "Overflowed", -+ "Invalid Write Size", -+ "Invalid Write Order", -+}; -+ -+char * const DProcTrapNames[] = { -+ "DmaProcNoFault", -+ "DmaProcRouteFetchFault", -+ "DmaProcFailCountError", -+ "DmaProcPacketAckError", -+ "DmaProcRunQueueReadFault", -+ "DmaProcQueueOverFlow", -+ "DmaProcPrefetcherFault", /* addy: Added new trap type for Prefetcher faults */ -+}; -+ -+char *const IProcTrapNames[] = { -+ "InputNoFault", -+ "InputAddressAlignment", -+ "InputMemoryFault", -+ "InputInvalidTransType", -+ "InputDmaQueueOverflow", -+ "InputEventEngineTrapped", -+ "InputCrcErrorAfterPAckOk", -+ "InputEopErrorOnWaitForEop", -+ "InputEopErrorTrap", -+ "InputDiscardAfterAckOk", -+}; -+ -+char *const TProcTrapNames[] = { -+ "HaltThread", -+ "TrapForTooManyInstructions", -+ "InstAccessException", -+ "Unimplemented", -+ "DataAccessException", -+ "DataAlignmentError", -+ "TrapForUsingBadData", -+}; -+ -+#define declare_spaces(space, str) char space[64]; do { int i; for (i = 0; i < strlen(str); i++) spaces[i] = ' '; space[i] = '\0'; } while (0) -+#define declare_prefix(space, spaces, str) char space[64]; do { strcpy (space, spaces); strcat (space, str); } while (0) -+ -+void -+elan4_display_farea (void *type, int mode, char *str, E4_FaultSave *farea) -+{ -+ E4_uint32 FSR = FaultSaveFSR(farea->FSRAndFaultContext); -+ -+ declare_spaces(spaces, str); -+ -+ elan4_debugf (type, mode, "%s Fault occurred at %016llx for context %4x\n", str, -+ farea->FaultAddress, FaultSaveContext(farea->FSRAndFaultContext)); -+ -+ if (FSR & AT_VirtualWriteAccBit) /* Virtual write access */ -+ elan4_debugf (type, mode, "%s FSR=%x: Virtual Write. DWSize=0x%x EndP=0x%x Access=%s DT=%s\n", -+ spaces, FSR, FSR & AT_VirtualWriteSizeMask, -+ (FSR >> AT_VirtualWriteEndPtrShift) & AT_VirtualWriteEndPtrMask, -+ AccTypes[(FSR >> AT_PermBitsShift) & AT_PermBitsMask], -+ DataTypes[(FSR >> AT_BlkDataTyShift) & AT_BlkDataTyMask]); -+ else if (FSR & AT_VirtualReadAccBit) /* Virtual read access */ -+ elan4_debugf (type, mode, "%s FSR=%x: Virtual Read. DWSize=0x%x Access=%s DT=%s\n", -+ spaces, FSR, FSR & AT_VirtualReadSizeMask, -+ AccTypes[(FSR >> AT_PermBitsShift) & AT_PermBitsMask], -+ DataTypes[(FSR >> AT_BlkDataTyShift) & AT_BlkDataTyMask]); -+ else -+ elan4_debugf (type, mode, "%s FSR=%x: %s. Size=0x%x\n", spaces, -+ FSR, PhysTypes[(FSR >> AT_SelBitsShift) & AT_SelBitsMask], -+ FSR & AT_OtherSizeMask); -+ elan4_debugf (type, mode, "%s FSR: %s %s%s %sWalking\n", spaces, -+ (FSR & AT_NonAlloc) ? "NonAlloc" : "Alloc", -+ (FSR & AT_DmaData) ? "Dma " : "", -+ (FSR & FSR_WalkForThread) ? "ThreadAcc" : "UnitsAcc", -+ (FSR & FSR_Walking) ? "" : "Not"); -+ PRINTF (type, mode, "%s FSR: %s%sHashTable=%s\n", spaces, -+ (FSR & FSR_NoTranslationsFound) ? "NoTranslationsFound " : "", -+ (FSR & FSR_WalkingProtectionFault) ? "WalkingProtectionFault " : "", -+ (FSR & FSR_HashTable1) ? "1" : "0"); -+ if (FSR & (FSR_RouteVProcErr | FSR_FaultForBadData)) -+ elan4_debugf (type, mode, "%s FSR: %s%s\n", spaces, -+ (FSR & FSR_RouteVProcErr) ? "RouteVProcErr " : "", -+ (FSR & FSR_FaultForBadData) ? "FaultForBadData " : ""); -+} -+ -+void -+elan4_display_eproc_trap (void *type, int mode, char *str, ELAN4_EPROC_TRAP *trap) -+{ -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s Status=%016llx %s EventAddr=%016llx CountAndType=%016llx\n", str, -+ trap->tr_status, EProcTrapNames[EPROC_TrapType(trap->tr_status)], -+ trap->tr_eventaddr, trap->tr_event.ev_CountAndType); -+ elan4_debugf (type, mode, "%s Param=%016llx.%016llx\n", spaces, -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ -+ elan4_display_farea (type, mode, strcat (spaces, EPROC_Port0Fault(trap->tr_status) ? " EPROC0" : " EPROC1"), &trap->tr_faultarea); -+} -+ -+void -+elan4_display_cproc_trap (void *type, int mode, char *str, ELAN4_CPROC_TRAP *trap) -+{ -+ declare_spaces(spaces, str); -+ -+ elan4_debugf (type, mode, "%s Status=%llx %s Command=%llx\n", str, trap->tr_status, -+ CProcTrapNames[CPROC_TrapType(trap->tr_status)], trap->tr_command); -+ elan4_debugf (type, mode, "%s Desc=%016llx %016llx %016llx %016llx\n", str, -+ trap->tr_qdesc.CQ_QueuePtrs, trap->tr_qdesc.CQ_HoldingValue, -+ trap->tr_qdesc.CQ_AckBuffers, trap->tr_qdesc.CQ_Control); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcInserterError: -+ elan4_debugf (type, mode, "%s %s\n", str, CProcInsertError[CQ_RevB_ErrorType(trap->tr_qdesc.CQ_QueuePtrs)]); -+ break; -+ -+ case CommandProcWaitTrap: -+ elan4_display_eproc_trap (type, mode, spaces, &trap->tr_eventtrap); -+ break; -+ -+ default: -+ elan4_display_farea (type, mode, spaces, &trap->tr_faultarea); -+ break; -+ } -+} -+ -+void -+elan4_display_dproc_trap (void *type, int mode, char *str, ELAN4_DPROC_TRAP *trap) -+{ -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s status %llx - %s\n", str, -+ trap->tr_status, DProcTrapNames[DPROC_TrapType(trap->tr_status)]); -+ -+ elan4_debugf (type, mode, "%s DESC %016llx %016llx %016llx %016llx\n", spaces, trap->tr_desc.dma_typeSize, -+ trap->tr_desc.dma_cookie, trap->tr_desc.dma_vproc, trap->tr_desc.dma_srcAddr); -+ elan4_debugf (type, mode, "%s %016llx %016llx %016llx\n", spaces, trap->tr_desc.dma_dstAddr, -+ trap->tr_desc.dma_srcEvent, trap->tr_desc.dma_dstEvent); -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ elan4_display_farea (type, mode, spaces, &trap->tr_prefetchFault); -+} -+ -+void -+elan4_display_tproc_trap (void *type, int mode, char *str, ELAN4_TPROC_TRAP *trap) -+{ -+ register int i; -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s PC=%016llx nPC=%016llx State=%016llx Status=%016llx -%s%s%s%s\n", str, -+ trap->tr_pc, trap->tr_npc, trap->tr_state, trap->tr_status, -+ (trap->tr_state & TS_TrapForTooManyInstructions) ? " TrapForTooManyInstructions" : "", -+ (trap->tr_state & TS_Unimplemented) ? " Unimplemented" : "", -+ (trap->tr_state & TS_DataAlignmentError) ? " DataAlignmentError" : "", -+ (trap->tr_state & TS_InstAccessException) ? " InstAccessException" : "", -+ (trap->tr_state & TS_DataAccessException) ? " DataAlignmentError" : ""); -+ -+ for (i = 0; i < 64; i += 4) -+ elan4_debugf (type, mode, "%s r%d - %016llx %016llx %016llx %016llx\n", spaces, i, -+ trap->tr_regs[i], trap->tr_regs[i+1], trap->tr_regs[i+2], trap->tr_regs[i+3]); -+ -+ if (trap->tr_state & TS_InstAccessException) -+ { -+ declare_prefix (prefix, spaces, "Inst"); -+ -+ elan4_display_farea (type, mode, prefix, &trap->tr_instFault); -+ } -+ -+ if (trap->tr_state & TS_DataAccessException) -+ { -+ declare_prefix (prefix, spaces, "Data"); -+ elan4_display_farea (type, mode, prefix, &trap->tr_dataFault); -+ } -+} -+ -+void -+elan4_display_iproc_trap (void *type, int mode, char *str, ELAN4_IPROC_TRAP *trap) -+{ -+ register int i; -+ declare_spaces (spaces, str); -+ -+ for (i = 0; i < trap->tr_numTransactions; i++) -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[i]; -+ E4_uint64 status = hdrp->IProcStatusCntxAndTrType; -+ E4_Addr addr = hdrp->TrAddr; -+ char *typeString; -+ char buffer[256]; -+ char *ptr = buffer; -+ -+ if (IPROC_EOPTrap(status)) -+ { -+ switch (IPROC_EOPType(status)) -+ { -+ case EOP_GOOD: typeString = "EopGood"; break; -+ case EOP_BADACK: typeString = "EopBadAck"; break; -+ case EOP_ERROR_RESET: typeString = "EopReset"; break; -+ default: typeString = "EopBad"; break; -+ } -+ -+ ptr += sprintf (ptr, "%15s Cntx=%-6d", typeString, IPROC_NetworkContext(status)); -+ } -+ else -+ { -+ if (IPROC_BadLength(status)) -+ typeString = "BadLength"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_DISCARD) -+ typeString = "DiscardCrc"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_ERROR) -+ typeString = "ErrorCrc Remote Network error"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_BAD) -+ typeString = "BadCrc Cable error into this node."; -+ else -+ { -+ if ((IPROC_TransactionType(status) & TR_BLOCK_OPCODE_MASK) == TR_WRITEBLOCK) -+ typeString = "WriteBlock"; -+ else -+ { -+ switch (IPROC_TransactionType(status) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT_IDENTIFY & TR_OPCODE_MASK: typeString = "SetEvent"; break; -+ case TR_REMOTEDMA & TR_OPCODE_MASK: typeString = "RemoteDma"; break; -+ case TR_SENDDISCARD & TR_OPCODE_MASK: typeString = "SendDiscard"; break; -+ case TR_GTE & TR_OPCODE_MASK: typeString = "GTE"; break; -+ case TR_LT & TR_OPCODE_MASK: typeString = "LT"; break; -+ case TR_EQ & TR_OPCODE_MASK: typeString = "EQ"; break; -+ case TR_NEQ & TR_OPCODE_MASK: typeString = "NEQ"; break; -+ case TR_IDENTIFY & TR_OPCODE_MASK: typeString = "Idenfity"; break; -+ case TR_ADDWORD & TR_OPCODE_MASK: typeString = "AddWord"; break; -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: typeString = "InputQCommit"; break; -+ case TR_TESTANDWRITE & TR_OPCODE_MASK: typeString = "TestAndWrite"; break; -+ case TR_INPUT_Q_GETINDEX & TR_OPCODE_MASK: typeString = "InputQGetIndex"; break; -+ case TR_TRACEROUTE_TRANS & TR_OPCODE_MASK: typeString = "TraceRoute"; break; -+ default: typeString = "Unknown"; break; -+ } -+ } -+ } -+ -+ ptr += sprintf (ptr, "%15s Cntx=%-6d Addr=%016llx", typeString, IPROC_NetworkContext(status), (unsigned long long) addr); -+ } -+ -+ -+ if (IPROC_TrapValue(status) != InputNoFault) -+ { -+ ptr += sprintf (ptr, " TrType=%2d ChanTrapped=%x GoodAck=%x BadAck=%x InputterChan=%d", IPROC_TrapValue(status), -+ IPROC_ChannelTrapped(status), IPROC_GoodAckSent(status), IPROC_BadAckSent(status), -+ IPROC_InputterChan(status)); -+ if (IPROC_EOPTrap(status)) -+ ptr += sprintf (ptr, " EOPType=%d", IPROC_EOPType(status)); -+ else -+ ptr += sprintf (ptr, " %s%s%s%s", -+ IPROC_FirstTrans(status) ? " FirstTrans" : "", -+ IPROC_LastTrans(status) ? " LastTrans" : "", -+ (IPROC_TransactionType(status) & TR_WAIT_FOR_EOP) ? " WaitForEop" : "", -+ (IPROC_GoodAckSent(status) & (1 << IPROC_Channel(status))) ? " AckSent" : ""); -+ } -+ -+ elan4_debugf (type, mode, "%s %s\n", str, buffer); -+ -+ str = spaces; -+ } -+ -+ elan4_display_farea (type, mode, spaces, &trap->tr_faultarea); -+} -+ -+#define elan4_sdram_copy_faultarea(dev, unit, farea) \ -+ elan4_sdram_copyq_from_sdram ((dev), (dev)->dev_faultarea + (unit) * sizeof (E4_FaultSave), (E4_uint64 *) farea, sizeof (E4_FaultSave)); -+ -+void -+elan4_extract_eproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_EPROC_TRAP *trap, int iswaitevent) -+{ -+ /* only one of the memory ports can fault at a time */ -+ ASSERT (EPROC_TrapType(status) != EventProcMemoryFault || (EPROC_Port0Fault(status) ^ EPROC_Port1Fault(status)) == 1); -+ -+ trap->tr_status = status; -+ -+ if (EPROC_Port0Fault(status)) -+ elan4_sdram_copy_faultarea (dev, CUN_EventProc0, &trap->tr_faultarea); -+ if (EPROC_Port1Fault(status)) -+ elan4_sdram_copy_faultarea (dev, CUN_EventProc1, &trap->tr_faultarea); -+ -+ if (iswaitevent) -+ { -+ /* -+ * for waitevents the Event address is always taken from the command processor -+ * -+ * if we trapped during the copy then we take the "Event" from the event processor -+ * since we need to complete the copy. Otherwise we'll be reissuing the original -+ * command again -+ */ -+ E4_uint32 fsr = FaultSaveFSR(trap->tr_faultarea.FSRAndFaultContext); -+ -+ trap->tr_eventaddr = read_reg64 (dev, CommandHold) ^ WAIT_EVENT_CMD; -+ -+ if (EPROC_TrapType(trap->tr_status) == EventProcMemoryFault && -+ (AT_Perm(fsr) == AT_PermLocalDataRead || AT_Perm(fsr) == AT_PermLocalDataWrite)) -+ { -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, EventCountAndType); -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, EventParameters[0]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, EventParameters[1]); -+ } -+ else -+ { -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, CommandCopy[5]); -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, CommandCopy[4]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, CommandCopy[6]); -+ -+ } -+ } -+ else -+ { -+ trap->tr_eventaddr = read_reg64 (dev, EventAddress); -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, EventCountAndType); -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, EventParameters[0]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, EventParameters[1]); -+ } -+ -+ BumpDevStat (dev, s_eproc_trap_types[EPROC_TrapType(status)]); -+} -+ -+int -+cproc_open_extract_vp (ELAN4_DEV *dev, ELAN4_CQ *cq, int chan) -+{ -+ /* cq = ucq->ucq_cq */ -+ if ((cq->cq_perm & CQ_STENEnableBit) != 0) -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ sdramaddr_t insertPtr = (queuePtrs & CQ_PtrMask); -+ sdramaddr_t commandPtr = CQ_CompletedPtr (queuePtrs); -+ unsigned int cqSize = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && (queuePtrs & CQ_RevB_ReorderingQueue)) -+ { -+ E4_uint32 oooMask = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)); -+ -+ for (; (oooMask & 1) != 0; oooMask >>= 1) -+ insertPtr = (insertPtr & ~(cqSize-1)) | ((insertPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ } -+ -+ while (commandPtr != insertPtr) -+ { -+ E4_uint64 command = elan4_sdram_readq (dev, commandPtr); -+ unsigned int cmdSize; -+ -+ switch (__categorise_command (command, &cmdSize)) -+ { -+ case 0: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 1: /* open */ -+ if (((chan << 4) == (command & (1<<4)))) -+ /* Matches supplied channel */ -+ return (command >> 32); -+ else -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 2: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ case 3: -+ printk ("cproc_open_extract_vp: invalid command %llx\n", (long long)command); -+ return -1; -+ } -+ } /* while */ -+ } -+ -+ return -1; -+} -+ -+void -+elan4_extract_cproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_CPROC_TRAP *trap, unsigned cqnum) -+{ -+ /* extract the state from the device */ -+ elan4_sdram_copy_faultarea (dev, CUN_CommandProc, &trap->tr_faultarea); -+ -+ trap->tr_status = status; -+ trap->tr_command = read_reg64 (dev, CommandHold); -+ -+ elan4_sdram_copyq_from_sdram (dev, dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)), &trap->tr_qdesc, sizeof (E4_CommandQueueDesc)); -+ -+ if (CPROC_TrapType (status) == CommandProcWaitTrap) -+ elan4_extract_eproc_trap (dev, read_reg64 (dev, EProcStatus), &trap->tr_eventtrap, 1); -+ -+ BumpDevStat (dev, s_cproc_trap_types[CPROC_TrapType(status)]); -+ -+ if (PackValue(trap->tr_qdesc.CQ_AckBuffers, 0) == PackTimeout || PackValue(trap->tr_qdesc.CQ_AckBuffers, 1) == PackTimeout) -+ BumpDevStat (dev, s_cproc_timeout); -+} -+ -+void -+elan4_extract_dproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_DPROC_TRAP *trap, unsigned unit) -+{ -+ trap->tr_status = status; -+ -+ if (unit == 0) -+ { -+ trap->tr_desc.dma_typeSize = read_reg64 (dev, Dma0Desc.dma_typeSize); -+ trap->tr_desc.dma_cookie = read_reg64 (dev, Dma0Desc.dma_cookie); -+ trap->tr_desc.dma_vproc = read_reg64 (dev, Dma0Desc.dma_vproc); -+ trap->tr_desc.dma_srcAddr = read_reg64 (dev, Dma0Desc.dma_srcAddr); -+ trap->tr_desc.dma_dstAddr = read_reg64 (dev, Dma0Desc.dma_dstAddr); -+ trap->tr_desc.dma_srcEvent = read_reg64 (dev, Dma0Desc.dma_srcEvent); -+ trap->tr_desc.dma_dstEvent = read_reg64 (dev, Dma0Desc.dma_dstEvent); -+ -+ elan4_sdram_copy_faultarea (dev, CUN_DProcPA0, &trap->tr_packAssemFault); -+ } -+ else -+ { -+ trap->tr_desc.dma_typeSize = read_reg64 (dev, Dma1Desc.dma_typeSize); -+ trap->tr_desc.dma_cookie = read_reg64 (dev, Dma1Desc.dma_cookie); -+ trap->tr_desc.dma_vproc = read_reg64 (dev, Dma1Desc.dma_vproc); -+ trap->tr_desc.dma_srcAddr = read_reg64 (dev, Dma1Desc.dma_srcAddr); -+ trap->tr_desc.dma_dstAddr = read_reg64 (dev, Dma1Desc.dma_dstAddr); -+ trap->tr_desc.dma_srcEvent = read_reg64 (dev, Dma1Desc.dma_srcEvent); -+ trap->tr_desc.dma_dstEvent = read_reg64 (dev, Dma1Desc.dma_dstEvent); -+ -+ elan4_sdram_copy_faultarea (dev, CUN_DProcPA1, &trap->tr_packAssemFault); -+ } -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ { -+ elan4_sdram_copy_faultarea (dev, (CUN_DProcData0 | DPROC_FaultUnitNo(trap->tr_status)), &trap->tr_prefetchFault); -+ /* addy: Added new trap type for Prefetcher faults */ -+ BumpDevStat (dev, s_dproc_trap_types[6]); -+ } -+ else if (DPROC_PacketTimeout (trap->tr_status)) -+ BumpDevStat (dev, s_dproc_timeout); -+ else -+ BumpDevStat (dev, s_dproc_trap_types[DPROC_TrapType(status)]); -+} -+ -+void -+elan4_extract_tproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_TPROC_TRAP *trap) -+{ -+ int i; -+ -+ trap->tr_status = status; -+ trap->tr_state = read_reg64 (dev, Thread_Trap_State); -+ trap->tr_pc = read_reg64 (dev, PC_W); -+ trap->tr_npc = read_reg64 (dev, nPC_W); -+ trap->tr_dirty = read_reg64 (dev, DirtyBits); -+ trap->tr_bad = read_reg64 (dev, BadBits); -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyfrom_dev (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS, -+ ((dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) ? ELAN4_REVA_REG_OFFSET : ELAN4_REVB_REG_OFFSET) + -+ offsetof (E4_Registers, Regs.TProcRegs), (unsigned long) &trap->tr_regs, 64*sizeof (E4_uint64)) < 0) -+ { -+ for (i = 0; i < 64; i++) -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ trap->tr_regs[i] = read_reg64 (dev, TProcRegs[i]); -+ } -+ -+ for (i = 0; i < 64; i++) -+ if (! (trap->tr_dirty & ((E4_uint64) 1 << i))) -+ trap->tr_regs[i] = 0xdeadbabedeadbabeULL; -+#else -+ for (i = 0; i < 64; i++) -+ { -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ trap->tr_regs[i] = read_reg64 (dev, TProcRegs[i]); -+ else -+ trap->tr_regs[i] = 0xdeadbabedeadbabeULL; -+ } -+#endif -+ -+ if (trap->tr_state & TS_DataAccessException) -+ elan4_sdram_copy_faultarea (dev, CUN_TProcData0 | TS_DataPortNo (trap->tr_state), &trap->tr_dataFault); -+ -+ if (trap->tr_state & TS_InstAccessException) -+ elan4_sdram_copy_faultarea (dev, CUN_TProcInst, &trap->tr_instFault); -+ -+ for (i = 0; i < 7; i++) -+ if (trap->tr_state & (1 << i)) -+ BumpDevStat (dev, s_tproc_trap_types[i]); -+} -+ -+void -+elan4_extract_iproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_IPROC_TRAP *trap, unsigned unit) -+{ -+ sdramaddr_t hdroff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrHeader[0][unit]); -+ sdramaddr_t dataoff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrData[0][unit]); -+ register int i, j; -+ int CurrUnitNo = (unit >= 2) ? CUN_IProcHighPri : CUN_IProcLowPri; -+ sdramaddr_t CurrFaultArea = dev->dev_faultarea + (CurrUnitNo * sizeof (E4_FaultSave)); -+ -+ /* Finally copy the fault area */ -+ elan4_sdram_copy_faultarea (dev, CurrUnitNo, &trap->tr_faultarea); -+ -+ /* -+ * Clear out the fault save area after reading to allow a fault on the write of the back pointer of -+ * an InputQCommit to be obsurved if a simultaneous event proc trap occurs. -+ */ -+ elan4_sdram_writeq (dev, CurrFaultArea + offsetof(E4_FaultSave, FSRAndFaultContext), 0x0ULL); -+ elan4_sdram_writeq (dev, CurrFaultArea + offsetof(E4_FaultSave, FaultAddress), 0x0ULL); -+ -+ /* copy the transaction headers */ -+ trap->tr_transactions[0].IProcStatusCntxAndTrType = status; -+ trap->tr_transactions[0].TrAddr = elan4_sdram_readq (dev, hdroff + offsetof (E4_IprocTrapHeader, TrAddr)); -+ -+ for (i = 0; !IPROC_EOPTrap(trap->tr_transactions[i].IProcStatusCntxAndTrType);) -+ { -+ if (IPROC_BadLength (trap->tr_transactions[i].IProcStatusCntxAndTrType)) -+ BumpDevStat (dev, s_bad_length); -+ else if (IPROC_TransCRCStatus (trap->tr_transactions[i].IProcStatusCntxAndTrType) == CRC_STATUS_BAD) -+ BumpDevStat (dev, s_crc_bad); -+ else if (IPROC_TransCRCStatus (trap->tr_transactions[i].IProcStatusCntxAndTrType) == CRC_STATUS_ERROR) -+ BumpDevStat (dev, s_crc_error); -+ -+ BumpDevStat (dev, s_iproc_trap_types[IPROC_TrapValue (trap->tr_transactions[i].IProcStatusCntxAndTrType)]); -+ -+ hdroff += NO_OF_INPUT_CHANNELS*sizeof (E4_IprocTrapHeader); -+ -+ if (++i == MAX_TRAPPED_TRANS) -+ break; -+ -+ elan4_sdram_copyq_from_sdram (dev, hdroff, &trap->tr_transactions[i], sizeof (E4_IprocTrapHeader)); -+ } -+ -+ if (IPROC_EOPType (trap->tr_transactions[i].IProcStatusCntxAndTrType) == EOP_ERROR_RESET) -+ BumpDevStat (dev, s_eop_reset); -+ -+ /* Remember the number of transactions we've copied */ -+ trap->tr_numTransactions = i + 1; -+ -+ /* Copy all the data blocks in one go */ -+ for (i = 0; i < MIN (trap->tr_numTransactions, MAX_TRAPPED_TRANS); i++, dataoff += NO_OF_INPUT_CHANNELS*sizeof (E4_IprocTrapData)) -+ { -+ if (IPROC_BadLength(status) || IPROC_TransCRCStatus (status) != CRC_STATUS_GOOD) -+ elan4_sdram_copyq_from_sdram (dev, dataoff, trap->tr_dataBuffers[i].Data, TRANS_DATA_DWORDS*sizeof(E4_uint64)); -+ else -+ { -+ int trtype = IPROC_TransactionType(trap->tr_transactions[i].IProcStatusCntxAndTrType); -+ int ndwords = (trtype & TR_SIZE_MASK) >> TR_SIZE_SHIFT; -+ -+ elan4_sdram_copyq_from_sdram (dev, dataoff, trap->tr_dataBuffers[i].Data, ndwords*sizeof(E4_uint64)); -+ -+ for (j = ndwords; j < TRANS_DATA_DWORDS; j++) -+ trap->tr_dataBuffers[i].Data[j] = 0xbeec0f212345678ull; -+ } -+ } -+ -+} -+ -+void -+elan4_inspect_iproc_trap (ELAN4_IPROC_TRAP *trap) -+{ -+ int i; -+ -+ trap->tr_flags = 0; -+ trap->tr_trappedTrans = TR_TRANS_INVALID; -+ trap->tr_waitForEopTrans = TR_TRANS_INVALID; -+ trap->tr_identifyTrans = TR_TRANS_INVALID; -+ -+ if (trap->tr_numTransactions > MAX_TRAPPED_TRANS) -+ trap->tr_flags = TR_FLAG_TOOMANY_TRANS; -+ -+ /* -+ * Now scan all the transactions received -+ */ -+ for (i = 0; i < MIN(trap->tr_numTransactions, MAX_TRAPPED_TRANS) ; i++) -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[i]; -+ E4_uint64 status = hdrp->IProcStatusCntxAndTrType; -+ -+ if (trap->tr_identifyTrans == TR_TRANS_INVALID) -+ { -+ switch (IPROC_TransactionType (status) & (TR_OPCODE_MASK | TR_SIZE_MASK)) -+ { -+ case TR_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_REMOTEDMA & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_SETEVENT_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_INPUT_Q_COMMIT & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_ADDWORD & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_TESTANDWRITE & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ trap->tr_identifyTrans = i; -+ break; -+ } -+ } -+ -+ if (IPROC_TrapValue(status) == InputNoFault) /* We're looking at transactions stored before the trap */ -+ continue; /* these should only be identifies */ -+ -+ if (trap->tr_trappedTrans == TR_TRANS_INVALID) /* Remember the transaction which caused the */ -+ trap->tr_trappedTrans = i; /* trap */ -+ -+ if (IPROC_GoodAckSent (status) & (1 << IPROC_InputterChan (status))) -+ trap->tr_flags |= TR_FLAG_ACK_SENT; -+ -+ if (IPROC_EOPTrap(status)) /* Check for EOP */ -+ { -+ ASSERT (i == trap->tr_numTransactions - 1); -+ -+ switch (IPROC_EOPType(status)) -+ { -+ case EOP_GOOD: -+ /* if we get an EOP_GOOD then the outputer should have received a PAckOk. */ -+ /* unless it was a flood, in which case someone must have sent an ack */ -+ /* but not necessarily us */ -+ break; -+ -+ case EOP_BADACK: -+ /* if we get an EOP_BADACK then the outputer did not receive a PAckOk even if -+ * we sent a PAckOk. WFlag this to ignore the AckSent. */ -+ trap->tr_flags |= TR_FLAG_EOP_BAD; -+ break; -+ -+ case EOP_ERROR_RESET: -+ /* if we get an EOP_ERROR_RESET then the outputer may or may not have got a PAckOk. */ -+ trap->tr_flags |= TR_FLAG_EOP_ERROR; -+ break; -+ -+ default: -+ printk ("elan4_inspect_iproc_trap: unknown eop type %d", IPROC_EOPType(status)); -+ BUG(); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ else -+ { -+ if (IPROC_BadLength(status) || (IPROC_TransCRCStatus (status) == CRC_STATUS_ERROR || -+ IPROC_TransCRCStatus (status) == CRC_STATUS_BAD)) -+ { -+ { -+ register int j; -+ if (IPROC_BadLength(status)) -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: Trapped on bad length data. status=%016llx Address=%016llx\n", -+ status, hdrp->TrAddr); -+ else -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: Trapped with bad CRC. status=%016llx Address=%016llx\n", -+ status, hdrp->TrAddr); -+ for (j = 0; j < TRANS_DATA_DWORDS; j++) -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: DataBuffers[%d] : %016llx\n", j, trap->tr_dataBuffers[i].Data[j]); -+ } -+ -+ trap->tr_flags |= TR_FLAG_BAD_TRANS; -+ continue; -+ } -+ -+ if (IPROC_TransCRCStatus (status) == CRC_STATUS_DISCARD) -+ continue; -+ -+ if ((((IPROC_TransactionType(status) & TR_BLOCK_OPCODE_MASK) == TR_WRITEBLOCK) || -+ (IPROC_TransactionType(status) == TR_TRACEROUTE_TRANS)) && -+ (trap->tr_flags & TR_FLAG_ACK_SENT) && trap->tr_identifyTrans == TR_TRANS_INVALID) -+ { -+ /* -+ * Writeblock after the ack is sent without an identify transaction - this is -+ * considered to be a DMA packet and requires the next packet to be nacked - since -+ * the DMA processor will send this in a deterministic time and there's an upper -+ * limit on the network latency (the output timeout) we just need to hold the context -+ * filter up for a while. -+ */ -+ trap->tr_flags |= TR_FLAG_DMA_PACKET; -+ } -+ -+ if (IPROC_LastTrans(status) && (IPROC_TransactionType(status) & TR_WAIT_FOR_EOP)) -+ { -+ /* -+ * WaitForEop transactions - if we have to do network error fixup -+ * then we may need to execute/ignore this transaction dependant -+ * on whether the source will be resending it. -+ */ -+ trap->tr_waitForEopTrans = i; -+ } -+ -+ /* -+ * This is a special case caused by a minor input processor bug. -+ * If simultaneous InputMemoryFault and InputEventEngineTrapped occur then the chip will probably return -+ * InputEventEngineTrapped even though the write of the back pointer has not occured and must be done by -+ * the trap handler. -+ * In this case the fault address will equal q->q_bptr. If there has been only EventEngineTrap then the -+ * the fault address should be zero as the trap handler now always zeros this after every input trap. -+ */ -+ if ((IPROC_TransactionType (status) & TR_OPCODE_MASK) == (TR_INPUT_Q_COMMIT & TR_OPCODE_MASK) && -+ trap->tr_faultarea.FaultAddress == hdrp->TrAddr + offsetof(E4_InputQueue, q_bptr) && -+ IPROC_TrapValue(status) == InputEventEngineTrapped) -+ { -+ hdrp->IProcStatusCntxAndTrType = (status & 0xFFFFFFF0FFFFFFFFull) | ((E4_uint64) InputMemoryFault << 32); -+ } -+ } -+ -+ PRINTF (DBG_DEVICE, DBG_INTR, "inspect[%d] status=%llx TrapValue=%d -> flags %x\n", i, status, IPROC_TrapValue(status), trap->tr_flags); -+ } -+} -+ -+E4_uint64 -+elan4_trapped_open_command (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc); -+ E4_uint64 cqcontrol = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ E4_uint32 extractOff = CQ_ExtractPtr (cqcontrol) & (CQ_Size(cq->cq_size)-1); -+ -+ if (extractOff == 0) -+ extractOff = CQ_Size(cq->cq_size) - sizeof (E4_uint64); -+ else -+ extractOff -= sizeof (E4_uint64); -+ -+ return (elan4_sdram_readq (dev, cq->cq_space + extractOff)); -+} -+ -+EXPORT_SYMBOL(elan4_extract_eproc_trap); -+EXPORT_SYMBOL(elan4_display_eproc_trap); -+EXPORT_SYMBOL(elan4_extract_cproc_trap); -+EXPORT_SYMBOL(elan4_display_cproc_trap); -+EXPORT_SYMBOL(elan4_extract_dproc_trap); -+EXPORT_SYMBOL(elan4_display_dproc_trap); -+EXPORT_SYMBOL(elan4_extract_tproc_trap); -+EXPORT_SYMBOL(elan4_display_tproc_trap); -+EXPORT_SYMBOL(elan4_extract_iproc_trap); -+EXPORT_SYMBOL(elan4_inspect_iproc_trap); -+EXPORT_SYMBOL(elan4_display_iproc_trap); -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/user.c linux-2.6.9/drivers/net/qsnet/elan4/user.c ---- clean/drivers/net/qsnet/elan4/user.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/user.c 2005-07-19 09:45:36.000000000 -0400 -@@ -0,0 +1,3443 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user.c,v 1.89.2.2 2005/07/19 13:45:36 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+/* allow this code to compile against an Eagle elanmod */ -+#ifdef __ELANMOD_DEVICE_H -+#define elan_attach_cap(cap,rnum,args,func) elanmod_attach_cap(cap,args,func) -+#define elan_detach_cap(cap,rnum) elanmod_detach_cap(cap) -+#endif -+ -+#define NETERR_MSGS 16 -+ -+int user_p2p_route_options = FIRST_TIMEOUT(3); -+int user_bcast_route_options = FIRST_TIMEOUT(3); -+int user_dproc_retry_count = 15; -+int user_cproc_retry_count = 2; -+int user_ioproc_enabled = 1; -+int user_pagefault_enabled = 1; -+ -+int num_fault_save = 30; -+int min_fault_pages = 1; -+int max_fault_pages = 128; -+ -+static int -+user_validate_cap (USER_CTXT *uctx, ELAN_CAPABILITY *cap, unsigned use) -+{ -+ /* Don't allow a user process to attach to system context */ -+ if (ELAN4_SYSTEM_CONTEXT (cap->cap_lowcontext) || ELAN4_SYSTEM_CONTEXT (cap->cap_highcontext)) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_VP,"user_validate_cap: lctx %x hctx %x high %x\n", cap->cap_lowcontext, cap->cap_highcontext, ELAN4_KCOMM_BASE_CONTEXT_NUM); -+ PRINTF0 (DBG_DEVICE, DBG_VP,"user_validate_cap: user process cant attach to system cap\n"); -+ return (EINVAL); -+ } -+ -+ return elanmod_classify_cap(&uctx->uctx_position, cap, use); -+} -+ -+static __inline__ void -+__user_signal_trap (USER_CTXT *uctx) -+{ -+ switch (uctx->uctx_trap_state) -+ { -+ case UCTX_TRAP_IDLE: -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: deliver signal %d to pid %d\n", uctx->uctx_trap_signo, uctx->uctx_trap_pid); -+ -+ if (uctx->uctx_trap_signo) -+ kill_proc (uctx->uctx_trap_pid, uctx->uctx_trap_signo, 1); -+ break; -+ -+ case UCTX_TRAP_SLEEPING: -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: wakeup sleeping trap handler\n"); -+ -+ kcondvar_wakeupone (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ break; -+ } -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+} -+ -+static void -+user_signal_timer (unsigned long arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_timer: state=%d pid=%d signal=%d (now %d start %d)\n", -+ uctx->uctx_trap_state, uctx->uctx_trap_pid, uctx->uctx_trap_signo, jiffies, -+ uctx->uctx_int_start); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ __user_signal_trap (uctx); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+#define MAX_INTS_PER_TICK 50 -+#define MIN_INTS_PER_TICK 20 -+ -+static void -+user_shuffle_signal_trap (USER_CTXT *uctx) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ PRINTF (uctx, DBG_TRAP, "user_shuffle_signal_trap: signal=%d%s\n", -+ uctx->uctx_trap_signo, timer_pending(&uctx->uctx_shuffle_timer) ? " (timer-pending)" : ""); -+ -+ if (timer_pending (&uctx->uctx_shuffle_timer)) -+ return; -+ -+ uctx->uctx_shuffle_timer.expires = jiffies + (HZ*2); -+ -+ add_timer (&uctx->uctx_shuffle_timer); -+} -+ -+static void -+user_signal_trap (USER_CTXT *uctx) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: state=%d pid=%d signal=%d%s\n", uctx->uctx_trap_state, -+ uctx->uctx_trap_pid, uctx->uctx_trap_signo, timer_pending(&uctx->uctx_int_timer) ? " (timer-pending)" : ""); -+ -+ uctx->uctx_int_count++; -+ -+ if (timer_pending (&uctx->uctx_int_timer)) -+ return; -+ -+ if (uctx->uctx_int_count > ((int)(jiffies - uctx->uctx_int_start) * MAX_INTS_PER_TICK)) -+ { -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: deferring signal for %d ticks (count %d ticks %d -> %d)\n", -+ uctx->uctx_int_delay + 1, uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start), -+ ((int)(jiffies - uctx->uctx_int_start) * MAX_INTS_PER_TICK)); -+ -+ /* We're interrupting too fast, so defer this signal */ -+ uctx->uctx_int_timer.expires = jiffies + (++uctx->uctx_int_delay); -+ -+ add_timer (&uctx->uctx_int_timer); -+ } -+ else -+ { -+ __user_signal_trap (uctx); -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: check signal for %d ticks (count %d ticks %d -> %d)\n", -+ uctx->uctx_int_delay + 1, uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start), -+ (int)(jiffies - uctx->uctx_int_start) * MIN_INTS_PER_TICK); -+ -+ if (uctx->uctx_int_count < ((int) (jiffies - uctx->uctx_int_start)) * MIN_INTS_PER_TICK) -+ { -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: reset interrupt throttle (count %d ticks %d)\n", -+ uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start)); -+ -+ uctx->uctx_int_start = jiffies; -+ uctx->uctx_int_count = 0; -+ uctx->uctx_int_delay = 0; -+ } -+ } -+} -+ -+static void -+user_neterr_timer (unsigned long arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ uctx->uctx_status |= UCTX_NETERR_TIMER; -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_flush_dma_runqueue (ELAN4_DEV *dev, USER_CTXT *uctx, int qfull) -+{ -+ E4_uint64 qptrs = read_reg64 (dev, DProcLowPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 typeSize = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_typeSize)); -+ -+ if (DMA_Context (typeSize) == uctx->uctx_ctxt.ctxt_num) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_DProcQueueEntry)); -+ -+ PRINTF4 (uctx, DBG_SWAP, "user_flush_dma_runqueue: %016llx %016llx %016llx %016llx\n", qentry.Desc.dma_typeSize, -+ qentry.Desc.dma_cookie, qentry.Desc.dma_vproc, qentry.Desc.dma_srcAddr); -+ PRINTF3 (uctx, DBG_SWAP, " %016llx %016llx %016llx\n", qentry.Desc.dma_dstAddr, -+ qentry.Desc.dma_srcEvent, qentry.Desc.dma_dstEvent); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_dmaQ)) -+ { -+ PRINTF (uctx, DBG_SWAP, "user_flush_dma_runqueue: queue overflow\n"); -+ uctx->uctx_status |= UCTX_DPROC_QUEUE_OVERFLOW; -+ } -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = qentry.Desc; -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ } -+ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+} -+ -+static void -+user_flush_thread_runqueue (ELAN4_DEV *dev, USER_CTXT *uctx, int qfull) -+{ -+ E4_uint64 qptrs = read_reg64 (dev, TProcLowPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_TProcQueueEntry qentry; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 context = elan4_sdram_readq (dev, qfptr + offsetof (E4_TProcQueueEntry, Context)); -+ -+ if (TPROC_Context (context) == uctx->uctx_ctxt.ctxt_num) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_TProcQueueEntry)); -+ -+ PRINTF (uctx, DBG_SWAP, "user_flush_thread_runqueue: %016llx %016llx %016llx %016llx\n", qentry.Regs.Registers[0], -+ qentry.Regs.Registers[1], qentry.Regs.Registers[2], qentry.Regs.Registers[3]); -+ PRINTF (uctx, DBG_SWAP, " %016llx %016llx %016llx\n", -+ qentry.Regs.Registers[4], qentry.Regs.Registers[5], qentry.Regs.Registers[6]); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_threadQ)) -+ uctx->uctx_status |= UCTX_TPROC_QUEUE_OVERFLOW; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = qentry.Regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ } -+ -+ /* change the thread to execute the suspend sequence */ -+ qentry.Regs.Registers[0] = dev->dev_tproc_suspend; -+ qentry.Regs.Registers[1] = dev->dev_tproc_space; -+ qentry.Context = dev->dev_ctxt.ctxt_num; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_TProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_TProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+} -+ -+static void -+user_flush_dmas (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ ASSERT ((read_reg32 (dev, InterruptReg) & INT_DProcHalted) != 0); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if ((uctx->uctx_status & (UCTX_SWAPPED_REASONS|UCTX_STOPPED_REASONS)) == 0) -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush_dmas: status %x - no more reasons\n", uctx->uctx_status); -+ -+ uctx->uctx_status &= ~UCTX_STOPPING; -+ -+ user_signal_trap (uctx); -+ } -+ else -+ { -+ user_flush_dma_runqueue (dev, uctx, qfull); -+ -+ uctx->uctx_status = (uctx->uctx_status | UCTX_STOPPED) & ~UCTX_STOPPING; -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_flush_dmas: statux %x - stopped\n", uctx->uctx_status); -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_flush (ELAN4_DEV *dev, void *arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ struct list_head *entry; -+ unsigned long flags; -+ -+ ASSERT ((read_reg32 (dev, InterruptReg) & (INT_Halted|INT_Discarding)) == (INT_Halted|INT_Discarding)); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if ((uctx->uctx_status & (UCTX_SWAPPED_REASONS|UCTX_STOPPED_REASONS)) == 0) -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: status %x - no more reasons\n", uctx->uctx_status); -+ -+ uctx->uctx_status &= ~UCTX_STOPPING; -+ -+ user_signal_trap (uctx); -+ } -+ else -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: status %x - flushing context\n", uctx->uctx_status); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_RUNNING) -+ { -+ /* NOTE: since the inserter can still be running we modify the permissions -+ * to zero then when the extractor starts up again it will trap */ -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: stopping cq indx=%d\n", elan4_cq2idx(ucq->ucq_cq)); -+ -+ elan4_updatecq (dev, ucq->ucq_cq, 0, 0); -+ } -+ } -+ -+ user_flush_thread_runqueue (dev, uctx, TPROC_LowRunQueueFull(read_reg64 (dev, TProcStatus))); -+ -+ /* since we can't determine whether the dma run queue is full or empty, we use a dma -+ * halt operation to do the flushing - as the reason for halting the dma processor -+ * will be released when we return, we keep it halted until the flush has completed */ -+ elan4_queue_dma_flushop (dev, &uctx->uctx_dma_flushop, 0); -+ -+ if (uctx->uctx_status & UCTX_EXITING) -+ elan4_flush_icache_halted (&uctx->uctx_ctxt); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_set_filter (USER_CTXT *uctx, E4_uint32 state) -+{ -+ struct list_head *entry; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ list_for_each (entry, &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (entry, USER_CTXT_ENTRY, cent_link); -+ -+ elan4_set_filter (&uctx->uctx_ctxt, cent->cent_cap->cap_mycontext, state); -+ } -+} -+ -+static void -+user_start_nacking (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_start_nacking: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ if (UCTX_NACKING(uctx)) -+ uctx->uctx_status |= reason; -+ else -+ { -+ uctx->uctx_status |= reason; -+ -+ user_set_filter (uctx, E4_FILTER_STATS | E4_FILTER_DISCARD_ALL); -+ } -+} -+ -+static void -+user_stop_nacking (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_stop_nacking: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ uctx->uctx_status &= ~reason; -+ -+ if (! UCTX_NACKING (uctx)) -+ user_set_filter (uctx, E4_FILTER_STATS); -+} -+ -+static void -+user_start_stopping (USER_CTXT *uctx, unsigned reason) -+{ -+ ELAN4_DEV *dev =uctx->uctx_ctxt.ctxt_dev; -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_start_stopping: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (! (uctx->uctx_status & UCTX_STOPPED)); -+ -+ user_start_nacking (uctx, reason); -+ -+ if ((uctx->uctx_status & UCTX_STOPPING) != 0) -+ return; -+ -+ uctx->uctx_status |= UCTX_STOPPING; -+ -+ /* queue the halt operation to remove all threads/dmas/cqs from the run queues */ -+ /* and also flush through the context filter change */ -+ elan4_queue_haltop (dev, &uctx->uctx_haltop); -+} -+ -+static void -+user_stop_stopping (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_stop_stopping: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ user_stop_nacking (uctx, reason); -+ -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ uctx->uctx_status &= ~UCTX_STOPPED; -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_stop_stopping: no more reasons => %x\n", uctx->uctx_status); -+ -+ user_signal_trap (uctx); -+ } -+} -+ -+void -+user_swapout (USER_CTXT *uctx, unsigned reason) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_swapout: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ user_start_nacking (uctx, reason); -+ -+ while (uctx->uctx_status & (UCTX_SWAPPING|UCTX_STOPPING) && /* wait for someone else to finish */ -+ uctx->uctx_trap_count > 0) /* and for trap handlers to notice */ -+ { /* and exit */ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapout: waiting for %d trap handlers to exit/previous swapout\n", uctx->uctx_trap_count); -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_SWAPPED) /* already swapped out */ -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return; -+ } -+ -+ uctx->uctx_status |= (UCTX_SWAPPING|UCTX_STOPPING); /* mark the context as swapping & stopping */ -+ -+ /* queue the halt operation to remove all threads/dmas/cqs from the run queues */ -+ /* and also flush through the context filter change */ -+ elan4_queue_haltop (dev, &uctx->uctx_haltop); -+ -+ while (! (uctx->uctx_status & UCTX_STOPPED)) -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); -+ -+ /* all state has been removed from the elan - we can now "tidy" it up */ -+ -+ PRINTF0 (uctx, DBG_SWAP, "user_swapout: swapped out\n"); -+ -+ uctx->uctx_status = (uctx->uctx_status & ~UCTX_SWAPPING) | UCTX_SWAPPED; -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapout: all done - status %x\n", uctx->uctx_status); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+void -+user_swapin (USER_CTXT *uctx, unsigned reason) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ ASSERT (uctx->uctx_status & UCTX_SWAPPED_REASONS); -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_swapin: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ while (uctx->uctx_status & (UCTX_SWAPPING|UCTX_STOPPING)) /* wait until other threads have */ -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); /* completed their swap operation */ -+ -+ ASSERT (uctx->uctx_status & (UCTX_SWAPPED | UCTX_STOPPED)); -+ -+ user_stop_nacking (uctx, reason); -+ -+ if (! (uctx->uctx_status & UCTX_SWAPPED_REASONS)) -+ { -+ uctx->uctx_status &= ~UCTX_SWAPPED; -+ -+ /* no longer swapped out - wakeup anyone sleeping waiting for swapin */ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ if (! (uctx->uctx_status & UCTX_STOPPED_REASONS)) -+ { -+ uctx->uctx_status &= ~UCTX_STOPPED; -+ user_signal_trap (uctx); -+ } -+ } -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapin: all done - status %x\n", uctx->uctx_status); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+void -+user_destroy_callback (void *arg, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ -+ PRINTF (uctx, DBG_VP, "user_destroy_callback: %s\n", map == NULL ? "cap destoyed" : "map destroyed"); -+} -+ -+int -+user_attach (USER_CTXT *uctx, ELAN_CAPABILITY *cap) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CTXT_ENTRY *cent; -+ unsigned long flags; -+ int ctype, res; -+ -+ if ((ctype = user_validate_cap (uctx, cap, ELAN_USER_ATTACH)) < 0) -+ return ctype; -+ -+ if ((ctype == ELAN_CAP_RMS) && (res = elan_attach_cap (cap, dev->dev_devinfo.dev_rail, uctx, user_destroy_callback)) != 0) -+ { -+ /* NOTE: elan_attach_cap returns +ve errnos */ -+ return -res; -+ } -+ -+ KMEM_ALLOC (cent, USER_CTXT_ENTRY *, sizeof (USER_CTXT_ENTRY), 1); -+ if (cent == NULL) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ return -ENOMEM; -+ } -+ -+ KMEM_ALLOC (cent->cent_cap, ELAN_CAPABILITY *, ELAN_CAP_SIZE(cap), 1); -+ if (cent->cent_cap == NULL) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ return -ENOMEM; -+ } -+ -+ memcpy (cent->cent_cap, cap, ELAN_CAP_SIZE(cap)); -+ -+ if ((res = elan4_attach_filter (&uctx->uctx_ctxt, cap->cap_mycontext)) != 0) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent->cent_cap, ELAN_CAP_SIZE (cap)); -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ -+ return res; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_add_tail (¢->cent_link, &uctx->uctx_cent_list); -+ -+ if (! UCTX_NACKING (uctx)) -+ user_set_filter (uctx, E4_FILTER_STATS); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (0); -+ -+} -+ -+void -+user_detach (USER_CTXT *uctx, ELAN_CAPABILITY *cap) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ struct list_head *entry; -+ struct list_head *next; -+ struct list_head list; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&list); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF (uctx, DBG_NETWORK_CTX, cap ? "user_detach: network context %d\n" : "user_detach: all network contexts\n", cap ? cap->cap_mycontext : 0); -+ -+ list_for_each_safe (entry, next, &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (entry, USER_CTXT_ENTRY, cent_link); -+ -+ if (cap == NULL || ELAN_CAP_MATCH (cap, cent->cent_cap)) -+ { -+ PRINTF1 (uctx, DBG_NETWORK_CTX, "user_detach: detach from network context %d\n", cent->cent_cap->cap_mycontext); -+ -+ elan4_detach_filter (&uctx->uctx_ctxt, cent->cent_cap->cap_mycontext); -+ -+ list_del (¢->cent_link); -+ list_add_tail (¢->cent_link, &list); -+ } -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ USER_CTXT_ENTRY *cent = list_entry (list.next, USER_CTXT_ENTRY, cent_link); -+ -+ list_del (¢->cent_link); -+ -+ if (user_validate_cap (uctx, cent->cent_cap, ELAN_USER_DETACH) == ELAN_CAP_RMS) -+ elan_detach_cap (cent->cent_cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent->cent_cap, ELAN_CAP_SIZE (cent->cent_cap)); -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ } -+} -+ -+void -+user_block_inputter (USER_CTXT *uctx, unsigned blocked) -+{ -+ unsigned long flags; -+ int isblocked; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ isblocked = (uctx->uctx_status & UCTX_USER_FILTERING); -+ -+ if (blocked && !isblocked) -+ user_start_nacking (uctx, UCTX_USER_FILTERING); -+ -+ if (!blocked && isblocked) -+ user_stop_nacking (uctx, UCTX_USER_FILTERING); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static USER_VPSEG * -+user_install_vpseg (USER_CTXT *uctx, unsigned process, unsigned entries) -+{ -+ struct list_head *entry; -+ USER_VPSEG *seg; -+ -+ if ((process + entries) > (E4_VPT_MIN_ENTRIES << uctx->uctx_routetable->tbl_size)) -+ return (NULL); -+ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (process <= (seg->vps_process + seg->vps_entries-1) && -+ (process + entries - 1) >= seg->vps_process) -+ return ((USER_VPSEG *) NULL); -+ } -+ -+ KMEM_ZALLOC (seg, USER_VPSEG *, sizeof (USER_VPSEG), 1); -+ -+ if (seg == (USER_VPSEG *) NULL) -+ return ((USER_VPSEG *) NULL); -+ -+ seg->vps_process = process; -+ seg->vps_entries = entries; -+ -+ list_add_tail (&seg->vps_link, &uctx->uctx_vpseg_list); -+ -+ return (seg); -+} -+ -+static void -+user_remove_vpseg (USER_CTXT *uctx, USER_VPSEG *seg) -+{ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_del (&seg->vps_link); -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+ /* These pointers (union) are only valid for P2P segs */ -+ if (seg->vps_p2p_routes) -+ KMEM_FREE (seg->vps_p2p_routes, sizeof (E4_VirtualProcessEntry) * seg->vps_entries); -+ -+ if (seg->vps_p2p_cap) -+ KMEM_FREE (seg->vps_p2p_cap, ELAN_CAP_SIZE(seg->vps_p2p_cap)); -+ -+ break; -+ -+ case USER_VPSEG_BCAST: -+ ; -+ } -+ -+ KMEM_FREE (seg, sizeof (USER_VPSEG)); -+} -+ -+static USER_VPSEG * -+user_find_vpseg (USER_CTXT *uctx, unsigned low, unsigned high) -+{ -+ struct list_head *entry; -+ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ USER_VPSEG *seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (seg->vps_process <= low && (seg->vps_process + seg->vps_entries) > high) -+ return (seg); -+ } -+ -+ return ((USER_VPSEG *) NULL); -+} -+ -+static ELAN_LOCATION -+user_process2location (USER_CTXT *uctx, USER_VPSEG *seg, unsigned process) -+{ -+ ELAN_LOCATION location; -+ int nnodes, nctxs; -+ int nodeOff, ctxOff, vpOff; -+ -+ location.loc_node = ELAN_INVALID_NODE; -+ location.loc_context = -1; -+ -+ if (seg == NULL) -+ seg = user_find_vpseg (uctx, process, process); -+ -+ if (seg == NULL || (seg->vps_type != USER_VPSEG_P2P)) -+ return (location); -+ -+ nnodes = ELAN_CAP_NUM_NODES (seg->vps_p2p_cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (seg->vps_p2p_cap); -+ -+ switch (seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (vpOff++ == (process - seg->vps_process)) -+ { -+ location.loc_node = seg->vps_p2p_cap->cap_lownode + nodeOff; -+ location.loc_context = seg->vps_p2p_cap->cap_lowcontext + ctxOff; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (vpOff++ == (process - seg->vps_process)) -+ { -+ location.loc_node = seg->vps_p2p_cap->cap_lownode + nodeOff; -+ location.loc_context = seg->vps_p2p_cap->cap_lowcontext + ctxOff; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ -+ found: -+ return (location); -+} -+ -+static unsigned -+user_location2process (USER_CTXT *uctx, ELAN_LOCATION location) -+{ -+ unsigned int process = ELAN_INVALID_PROCESS; -+ struct list_head *entry; -+ int nnodes, nctxs; -+ int nodeOff, ctxOff, vpOff; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ USER_VPSEG *seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (seg->vps_type != USER_VPSEG_P2P) -+ continue; -+ -+ if (location.loc_node >= seg->vps_p2p_cap->cap_lownode && location.loc_node <= seg->vps_p2p_cap->cap_highnode && -+ location.loc_context >= seg->vps_p2p_cap->cap_lowcontext && location.loc_context <= seg->vps_p2p_cap->cap_highcontext) -+ { -+ nnodes = ELAN_CAP_NUM_NODES (seg->vps_p2p_cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (seg->vps_p2p_cap); -+ -+ switch (seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (location.loc_node == seg->vps_p2p_cap->cap_lownode + nodeOff && -+ location.loc_context == seg->vps_p2p_cap->cap_lowcontext + ctxOff) -+ { -+ process = seg->vps_process + vpOff; -+ goto found; -+ } -+ vpOff++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (location.loc_node == seg->vps_p2p_cap->cap_lownode + nodeOff && -+ location.loc_context == seg->vps_p2p_cap->cap_lowcontext + ctxOff) -+ { -+ process = seg->vps_process + vpOff; -+ goto found; -+ } -+ vpOff++; -+ } -+ } -+ } -+ break; -+ } -+ } -+ } -+ found: -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (process); -+} -+ -+static void -+user_loadroute_vpseg (USER_CTXT *uctx, USER_VPSEG *seg, ELAN_POSITION *pos) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN_CAPABILITY *cap = seg->vps_p2p_cap; -+ unsigned nnodes = ELAN_CAP_NUM_NODES (cap); -+ unsigned nctxs = ELAN_CAP_NUM_CONTEXTS (cap); -+ E4_VirtualProcessEntry route; -+ unsigned nodeOff; -+ unsigned ctxOff; -+ unsigned vpOff; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (seg->vps_p2p_routes != NULL) -+ route = seg->vps_p2p_routes[vpOff]; -+ else if (elan4_generate_route (&uctx->uctx_position, &route, cap->cap_lowcontext + ctxOff, -+ cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, user_p2p_route_options) < 0) -+ { -+ vpOff++; -+ continue; -+ } -+ -+ PRINTF5 (uctx, DBG_VP, "user_loadroute_vpseg: virtual process %d -> node %d context %d [%016llx.%016llx]\n", -+ seg->vps_process + vpOff, cap->cap_lownode + nodeOff, cap->cap_lowcontext + ctxOff, -+ route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process + vpOff, &route); -+ -+ vpOff++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (seg->vps_p2p_routes != NULL) -+ route = seg->vps_p2p_routes[vpOff]; -+ else if (elan4_generate_route (&uctx->uctx_position, &route, cap->cap_lowcontext + ctxOff, -+ cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, user_p2p_route_options) < 0) -+ { -+ vpOff++; -+ continue; -+ } -+ -+ PRINTF5 (uctx, DBG_VP, "user_loadroute_vpseg: virtual process %d -> node %d context %d [%016llx.%016llx]\n", -+ seg->vps_process + vpOff, cap->cap_lownode + nodeOff, cap->cap_lowcontext + ctxOff, -+ route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process + vpOff, &route); -+ -+ vpOff++; -+ } -+ } -+ } -+ break; -+ } -+} -+ -+static int -+user_loadroute_bcast (USER_CTXT *uctx, USER_VPSEG *seg) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN_POSITION *pos = &uctx->uctx_position; -+ E4_VirtualProcessEntry route; -+ USER_VPSEG *aseg; -+ int res; -+ ELAN_LOCATION low; -+ ELAN_LOCATION high; -+ -+ if ((aseg = user_find_vpseg (uctx, seg->vps_bcast_lowvp, seg->vps_bcast_highvp)) == NULL || aseg->vps_type != USER_VPSEG_P2P) -+ return (-EINVAL); -+ -+#ifdef use_elanmod -+ if ((res = user_validate_cap (dev, aseg->vps_p2p_cap, ELAN_USER_BROADCAST)) < 0) -+ return (res); -+#endif -+ -+ low = user_process2location (uctx, aseg, seg->vps_bcast_lowvp); -+ high = user_process2location (uctx, aseg, seg->vps_bcast_highvp); -+ -+ if (low.loc_context != high.loc_context) -+ return (-EINVAL); -+ -+ /* NOTE: if loopback can only broadcast to ourself - -+ * if back-to-back can only broadcast to other node */ -+ if ((pos->pos_mode == ELAN_POS_MODE_LOOPBACK && low.loc_node != high.loc_node && low.loc_node != pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && low.loc_node != high.loc_node && low.loc_node == pos->pos_nodeid)) -+ { -+ return (-EINVAL); -+ } -+ -+ if ((res = elan4_generate_route (pos, &route, low.loc_context, low.loc_node, high.loc_node, user_bcast_route_options)) < 0) -+ return (res); -+ -+ PRINTF (uctx, DBG_VP, "user_loadroute_bcast: virtual process %d -> nodes %d.%d context %d [%016llx.%016llx]\n", -+ seg->vps_process, low.loc_node, high.loc_node, low.loc_context, route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process, &route); -+ return (0); -+} -+ -+int -+user_add_p2pvp (USER_CTXT *uctx, unsigned process, ELAN_CAPABILITY *cap) -+{ -+ USER_VPSEG *seg; -+ ELAN_CAPABILITY *ncap; -+ unsigned entries; -+ -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) == 0) -+ entries = bt_nbits (cap->cap_bitmap , ELAN_CAP_BITMAPSIZE(cap)); -+ else -+ entries = ELAN_CAP_BITMAPSIZE(cap); -+ -+ if ((process + entries) > (E4_VPT_MIN_ENTRIES << uctx->uctx_routetable->tbl_size)) -+ return (-EINVAL); -+ -+ KMEM_ALLOC (ncap, ELAN_CAPABILITY *, ELAN_CAP_SIZE (cap), 1); -+ -+ if (ncap == NULL) -+ return (-ENOMEM); -+ -+ memcpy (ncap, cap, ELAN_CAP_SIZE (cap)); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_install_vpseg (uctx, process, entries)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ seg->vps_type = USER_VPSEG_P2P; -+ seg->vps_p2p_cap = ncap; -+ seg->vps_p2p_routes = NULL; -+ -+ user_loadroute_vpseg (uctx, seg, &uctx->uctx_position); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_add_bcastvp (USER_CTXT *uctx, unsigned process, unsigned lowvp, unsigned highvp) -+{ -+ USER_VPSEG *seg; -+ int res; -+ -+ if (lowvp > highvp || process >= (E4_VPT_MIN_ENTRIES << uctx->uctx_routetable->tbl_size)) -+ return (-EINVAL); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_install_vpseg (uctx, process, 1)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ seg->vps_type = USER_VPSEG_BCAST; -+ seg->vps_bcast_lowvp = lowvp; -+ seg->vps_bcast_highvp = highvp; -+ -+ if ((res = user_loadroute_bcast (uctx, seg)) < 0) -+ user_remove_vpseg (uctx, seg); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (res); -+} -+ -+int -+user_removevp (USER_CTXT *uctx, unsigned process) -+{ -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if (process == ELAN_INVALID_PROCESS) -+ seg = list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link); -+ else -+ seg = user_find_vpseg (uctx, process, process); -+ -+ if (seg == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ do { -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int i; -+ -+ for (i = 0; i < seg->vps_entries; i++) -+ elan4_invalidate_route (dev, uctx->uctx_routetable, seg->vps_process + i); -+ -+ user_remove_vpseg (uctx, seg); -+ -+ } while (process == ELAN_INVALID_PROCESS && (seg = list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link)) != NULL); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_set_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ /* check that the route supplied is valid and goes to the correct place */ -+ location = user_process2location (uctx, seg, process); -+ -+ if (elan4_check_route (&uctx->uctx_position, location, route, 0) != 0) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ if (seg->vps_p2p_routes == NULL) -+ KMEM_ZALLOC (seg->vps_p2p_routes, E4_VirtualProcessEntry *, sizeof (E4_VirtualProcessEntry) * seg->vps_entries, 1); -+ -+ if (seg->vps_p2p_routes == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-ENOMEM); -+ } -+ -+ seg->vps_p2p_routes[process - seg->vps_process].Values[0] = route->Values[0]; -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1] = ROUTE_CTXT_VALUE(location.loc_context) | (route->Values[1] & ~ROUTE_CTXT_MASK); -+ -+ PRINTF (uctx, DBG_ROUTE, "user_set_route: vp=%d -> %016llx%016llx\n", process, -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1], seg->vps_p2p_routes[process - seg->vps_process].Values[0]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, process, &seg->vps_p2p_routes[process - seg->vps_process]); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_reset_route (USER_CTXT *uctx, unsigned process) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ ELAN_LOCATION location; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ if (seg->vps_p2p_routes != NULL) -+ { -+ seg->vps_p2p_routes[process - seg->vps_process].Values[0] = 0; -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1] = 0; -+ } -+ -+ /* generate the default route to this location */ -+ location = user_process2location (uctx, seg, process); -+ -+ PRINTF (uctx, DBG_ROUTE, "user_reset_route: vp=%d\n", process); -+ -+ if (elan4_generate_route (&uctx->uctx_position, &route, location.loc_context, location.loc_node, location.loc_node, 0) < 0) -+ elan4_invalidate_route (dev, uctx->uctx_routetable, process); -+ else -+ elan4_write_route (dev, uctx->uctx_routetable, process, &route); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_get_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ elan4_read_route (dev, uctx->uctx_routetable, process, route); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (0); -+} -+ -+int -+user_check_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route, unsigned *error) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ elan4_read_route (dev, uctx->uctx_routetable, process, route); -+ -+ *error = elan4_check_route (&uctx->uctx_position, user_process2location (uctx, seg, process), route, 0); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (0); -+} -+ -+int -+user_send_neterr_msg (USER_CTXT *uctx, unsigned int vp, unsigned int nctx, unsigned int retries, ELAN4_NETERR_MSG *msg) -+{ -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ unsigned long flags; -+ int res, found = 0; -+ struct list_head *el; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ /* determine the location of the virtual process */ -+ if ((seg = user_find_vpseg (uctx, vp, vp)) == NULL) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: vp=%d has no vpseg\n", vp); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return -EINVAL; -+ } -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+ location = user_process2location (uctx, seg, vp); -+ break; -+ -+ case USER_VPSEG_BCAST: -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: vp=%d is a bcast vp\n", vp); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return -EINVAL; -+ } -+ -+ /* check that we're attached to the network context */ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_for_each (el , &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (el, USER_CTXT_ENTRY, cent_link); -+ -+ if (cent->cent_cap->cap_mycontext == nctx) -+ found++; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if (! found) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: nctx=%d not attached\n", nctx); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return -EINVAL; -+ } -+ -+ /* Update the fields which the user might have "faked" */ -+ msg->msg_context = location.loc_context; -+ msg->msg_sender.loc_node = uctx->uctx_position.pos_nodeid; -+ msg->msg_sender.loc_context = nctx; -+ -+ res = elan4_neterr_sendmsg (uctx->uctx_ctxt.ctxt_dev, location.loc_node, retries, msg); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (res); -+} -+ -+ -+static int -+user_resolvevp (USER_CTXT *uctx, unsigned process) -+{ -+ int res = 0; -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ E4_VirtualProcessEntry route; -+ -+ PRINTF1 (uctx, DBG_VP, "user_resolvevp: process=%d\n", process); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+#ifdef use_elanmod -+ if ((res = user_validate_cap (uctx, seg->vps_p2p_cap, ELAN_USER_P2P)) != 0) -+ break; -+#endif -+ -+ location = user_process2location (uctx, seg, process); -+ -+ PRINTF (uctx, DBG_VP, "user_resolvevp: vp=%d -> node=%d ctx=%d\n", process, location.loc_node, location.loc_context); -+ -+ if (seg->vps_p2p_routes != NULL && seg->vps_p2p_routes[process - seg->vps_process].Values[0] != 0) -+ route = seg->vps_p2p_routes[process - seg->vps_process]; -+ else if ((res = elan4_generate_route (&uctx->uctx_position, &route, location.loc_context, location.loc_node, location.loc_node, user_p2p_route_options)) < 0) -+ break;; -+ -+ elan4_write_route (uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, process, &route); -+ break; -+ -+ case USER_VPSEG_BCAST: -+ res = user_loadroute_bcast (uctx, seg); -+ break; -+ -+ default: -+ res = -EINVAL; -+ break; -+ } -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (res); -+} -+ -+static void -+user_eproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_eprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_EPROC, "user_eproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ elan4_extract_eproc_trap (ctxt->ctxt_dev, status, RING_QUEUE_BACK (uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps), 0); -+ -+ DBGCMD (ctxt, DBG_EPROC, elan4_display_eproc_trap (ctxt, DBG_EPROC, "user_eproc_trap", RING_QUEUE_BACK(uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps))); -+ -+ if (RING_QUEUE_ADD (uctx->uctx_eprocTrapQ)) -+ user_start_stopping (uctx, UCTX_EPROC_QUEUE_FULL); -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ USER_CQ *ucq = NULL; -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2num(ucq->ucq_cq) == cqnum) -+ break; -+ } -+ -+ ASSERT (ucq != NULL); -+ -+ if (ucq->ucq_state != UCQ_RUNNING && CPROC_TrapType (status) == CommandProcInserterError) -+ { -+ PRINTF (ctxt, DBG_TRAP, "user_cproc_trap CommandProcInserterError\n"); -+ ucq->ucq_errored++; -+ } -+ else -+ { -+ ASSERT (ucq->ucq_state == UCQ_RUNNING); -+ -+ elan4_extract_cproc_trap (ctxt->ctxt_dev, status, &ucq->ucq_trap, cqnum); -+ -+ DBGCMD (ctxt, DBG_CPROC, elan4_display_cproc_trap (ctxt, DBG_CPROC, "user_cproc_trap", &ucq->ucq_trap)); -+ -+ ucq->ucq_state = UCQ_TRAPPED; -+ -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_dprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_DPROC, "user_dproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_DPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ ELAN4_DPROC_TRAP *trap = RING_QUEUE_BACK (uctx->uctx_dprocTrapQ, uctx->uctx_dprocTraps); -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, trap, unit); -+ -+ DBGCMD (ctxt, DBG_DPROC, elan4_display_dproc_trap (ctxt, DBG_DPROC, "user_dproc_trap", trap)); -+ -+ if (!DPROC_PrefetcherFault (status) && DPROC_TrapType(status) == DmaProcFailCountError && !RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ { -+ trap->tr_desc.dma_typeSize |= DMA_FailCount (user_dproc_retry_count); -+ -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = trap->tr_desc; -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ } -+ else -+ { -+ if (RING_QUEUE_ADD (uctx->uctx_dprocTrapQ)) -+ user_start_stopping (uctx, UCTX_DPROC_QUEUE_FULL); -+ } -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_tprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_TPROC, "user_tproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_TPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, RING_QUEUE_BACK (uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps)); -+ -+ DBGCMD (ctxt, DBG_TPROC, elan4_display_tproc_trap (ctxt, DBG_TPROC, "user_tproc_trap", RING_QUEUE_BACK (uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps))); -+ -+ if (RING_QUEUE_ADD (uctx->uctx_tprocTrapQ)) -+ user_start_stopping (uctx, UCTX_TPROC_QUEUE_FULL); -+ } -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ USER_IPROC_TRAP *utrap = &uctx->uctx_iprocTrap[unit & 1]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ ASSERT (utrap->ut_state == UTS_IPROC_RUNNING); -+ -+ elan4_extract_iproc_trap (ctxt->ctxt_dev, status, &utrap->ut_trap, unit); -+ DBGCMD (ctxt, DBG_IPROC, elan4_display_iproc_trap (ctxt, DBG_IPROC, "user_iproc_trap", &utrap->ut_trap)); -+ -+ utrap->ut_state = UTS_IPROC_TRAPPED; -+ -+ user_start_nacking (uctx, unit ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF1 (uctx, DBG_TRAP, "user_interrupt: cookie=%llx\n", cookie); -+ -+ switch (cookie) -+ { -+ case ELAN4_INT_COOKIE_DDCQ: -+ uctx->uctx_ddcq_intr--; -+ -+ user_signal_trap (uctx); -+ break; -+ -+ default: -+ if (uctx->uctx_intcookie_table == NULL || intcookie_fire (uctx->uctx_intcookie_table, cookie) != 0) -+ { -+ PRINTF2 (uctx, DBG_TRAP, "user_interrupt: cookie=%llx %s\n", cookie, uctx->uctx_intcookie_table ? "not found" : "no table"); -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_ERROR; -+ user_signal_trap (uctx); -+ } -+ break; -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+static void -+user_needs_shuffle (ELAN4_CTXT *ctxt, int tbl, int hashidx) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ elan4mmu_set_shuffle(ctxt, tbl, hashidx); -+ -+ if (ctxt->shuffle_needed[tbl]) -+ user_shuffle_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+} -+static void -+user_neterrmsg (ELAN4_CTXT *ctxt, ELAN4_NETERR_MSG *msg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (! RING_QUEUE_FULL (uctx->uctx_msgQ)) -+ { -+ memcpy (RING_QUEUE_BACK (uctx->uctx_msgQ, uctx->uctx_msgs), msg, sizeof (ELAN4_NETERR_MSG)); -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_msgQ); -+ -+ user_signal_trap (uctx); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+ELAN4_TRAP_OPS user_trap_ops = -+{ -+ user_eproc_trap, -+ user_cproc_trap, -+ user_dproc_trap, -+ user_tproc_trap, -+ user_iproc_trap, -+ user_interrupt, -+ user_neterrmsg, -+ user_needs_shuffle, -+}; -+ -+static int -+deliver_trap (ELAN4_USER_TRAP *utrapp, int type, unsigned proc, void *trap, ...) -+{ -+ register int i, len; -+ va_list ap; -+ -+ PRINTF (NULL, DBG_TRAP, "deliver_trap: type=%d proc=%d\n", type, proc); -+ -+ switch (proc) -+ { -+ case UTS_CPROC: len = sizeof (ELAN4_CPROC_TRAP); break; -+ case UTS_DPROC: len = sizeof (ELAN4_DPROC_TRAP); break; -+ case UTS_EPROC: len = sizeof (ELAN4_EPROC_TRAP); break; -+ case UTS_IPROC: len = sizeof (ELAN4_IPROC_TRAP); break; -+ case UTS_TPROC: len = sizeof (ELAN4_TPROC_TRAP); break; -+ case UTS_NETERR_MSG: len = sizeof (ELAN4_NETERR_MSG); break; -+ default: len = 0; break; -+ } -+ -+ if (put_user (type, &utrapp->ut_type) || put_user (proc, &utrapp->ut_proc) || copy_to_user (&utrapp->ut_trap, trap, len)) -+ return (UTS_EFAULT); -+ -+ va_start (ap, trap); -+ for (i = 0; i < sizeof (utrapp->ut_args)/sizeof (utrapp->ut_args[0]); i++) -+ if (put_user (va_arg (ap, unsigned long), &utrapp->ut_args[i])) -+ return (UTS_EFAULT); -+ va_end (ap); -+ -+ return (type); -+} -+ -+static int -+user_pagefault (USER_CTXT *uctx, E4_FaultSave *farea) -+{ -+ E4_Addr addr = farea->FaultAddress; -+ E4_uint32 fsr = FaultSaveFSR(farea->FSRAndFaultContext); -+ FAULT_SAVE *entry; -+ FAULT_SAVE **predp; -+ int count; -+ -+ PRINTF2 (uctx, DBG_FAULT, "user_pagefault: addr=%llx fsr %x\n", (unsigned long long) addr, fsr); -+ -+ if ((fsr & FSR_FaultForBadData) != 0) /* Memory ECC error during walk */ -+ { -+ PRINTF0 (uctx, DBG_FAULT, "user_pagefault: ECC error during walk\n"); -+ return (-EFAULT); -+ } -+ -+ if ((fsr & FSR_FaultForMaxChainCount) != 0) /* Have walked a chain of 1024 items */ -+ { -+ PRINTF0 (uctx, DBG_FAULT, "user_pagefault: pte chain too long\n"); -+ return (-EFAULT); -+ } -+ -+ if (! user_pagefault_enabled) -+ return (-EFAULT); -+ -+ if (uctx->uctx_num_fault_save) -+ { -+ spin_lock (&uctx->uctx_fault_lock); -+ for( predp = &uctx->uctx_fault_list; (entry = *predp)->next != NULL; predp = &entry->next) -+ { -+ if (entry->addr == (addr & ~((E4_Addr) PAGE_SIZE-1))) -+ break; -+ } -+ -+ *predp = entry->next; -+ entry->next = uctx->uctx_fault_list; -+ uctx->uctx_fault_list = entry; -+ -+ if (entry->addr == (addr & ~((E4_Addr) PAGE_SIZE-1))) -+ { -+ if ((entry->count <<= 1) > max_fault_pages) -+ entry->count = max_fault_pages; -+ } -+ else -+ entry->count = min_fault_pages; -+ -+ entry->addr = (addr & ~((E4_Addr) PAGE_SIZE-1))+(entry->count * PAGE_SIZE); -+ count = entry->count; -+ spin_unlock (&uctx->uctx_fault_lock); -+ -+ if (user_load_range (uctx, addr & ~((E4_Addr) PAGE_SIZE-1), count * PAGESIZE, fsr) == 0) -+ return 0; -+ -+ /* else pre-faulting has failed, try just this page */ -+ } -+ -+ return (user_load_range (uctx, addr & ~((E4_Addr) PAGE_SIZE-1), PAGE_SIZE, fsr)); -+ -+} -+ -+static int -+queue_dma_for_retry (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, E4_DMA *dma) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ PRINTF (uctx, DBG_DPROC, "queue_dma_for_retry: overflow\n"); -+ -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, UCTX_DPROC_QUEUE_OVERFLOW)); -+ } -+ -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = *dma; -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+queue_thread_for_retry (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, E4_ThreadRegs *regs) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_FULL (uctx->uctx_threadQ)) -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ PRINTF (uctx, DBG_TPROC, "queue_thread_for_retry: overflow\n"); -+ -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, UCTX_TPROC_QUEUE_OVERFLOW)); -+ } -+ -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = *regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+fixup_eproc_trap (USER_CTXT *uctx, ELAN4_EPROC_TRAP *trap, int waitevent) -+{ -+ E4_FaultSave *farea = &trap->tr_faultarea; -+ E4_uint32 fsr = FaultSaveFSR(farea->FSRAndFaultContext); -+ E4_uint64 CountAndType; -+ E4_uint64 CopySource; -+ E4_uint64 CopyDest; -+ -+ /* -+ * Event processor can trap as follows : -+ * 1) Event location read (faddr == event location & Event Permission) -+ * 2) Event location write (faddr == event location & Event Permission) -+ * 3) Copy Source read Read Access -+ * 4) Copy/Write dest write other -+ * -+ * NOTE - it is possible to see both 3) and 4) together - but only with physical errors. -+ */ -+ if (AT_Perm(fsr) == AT_PermLocalDataRead || AT_Perm(fsr) == AT_PermLocalDataWrite) -+ { -+ /* -+ * We complete the copy/write by issuing a waitevent 0 of the approriate type. -+ * - NB mask off bottom bits of EventAddr in case of partial setevent -+ */ -+ E4_uint64 EventAddr = trap->tr_eventaddr & ~((E4_uint64) E4_EVENT_ALIGN-1); -+ -+ if (! user_ddcq_check (uctx, 4)) -+ return (0); -+ -+ if ((trap->tr_event.ev_CountAndType & E4_EVENT_COPY_TYPE_MASK) == E4_EVENT_WRITE) -+ { -+ /* case 4) faulted on write word to destination */ -+ -+ CountAndType = trap->tr_event.ev_CountAndType & E4_EVENT_TYPE_MASK; -+ -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: write Event=%llx CountAndType=%llx\n", EventAddr, CountAndType); -+ PRINTF (uctx, DBG_TRAP, " WritePtr=%llx WriteValue=%llx\n", -+ trap->tr_event.ev_WritePtr, trap->tr_event.ev_WriteValue); -+ -+ user_ddcq_waitevent (uctx, EventAddr, CountAndType, trap->tr_event.ev_WritePtr, trap->tr_event.ev_WriteValue); -+ } -+ else -+ { -+ /* case 3) or case 4) faulted on read/write of copy */ -+ if (AT_Perm (fsr) == AT_PermLocalDataRead) -+ { -+ CountAndType = (trap->tr_event.ev_CountAndType & E4_EVENT_DATA_TYPE_MASK) | EPROC_CopySize(trap->tr_status); -+ CopySource = trap->tr_event.ev_CopySource - EVENT_COPY_BLOCK_SIZE; -+ CopyDest = trap->tr_event.ev_CopyDest; -+ } -+ else -+ { -+ CountAndType = ((trap->tr_event.ev_CountAndType & E4_EVENT_DATA_TYPE_MASK) | -+ ((EPROC_CopySize(trap->tr_status) + EVENT_COPY_NDWORDS) & E4_EVENT_COPY_SIZE_MASK)); -+ CopySource = trap->tr_event.ev_CopySource - EVENT_COPY_BLOCK_SIZE; -+ CopyDest = trap->tr_event.ev_CopyDest - EVENT_COPY_BLOCK_SIZE; -+ } -+ -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: copy Event=%llx CountAndType=%llx\n", EventAddr, CountAndType); -+ PRINTF (uctx, DBG_TRAP, " CopySource=%llx CopyDest=%llx\n", CopySource, CopyDest); -+ -+ user_ddcq_waitevent (uctx, EventAddr, CountAndType, CopySource, CopyDest); -+ } -+ } -+ else -+ { -+ E4_uint64 EventAddr = trap->tr_eventaddr & ~((E4_uint64) E4_EVENT_ALIGN-1); -+ E4_uint32 EventCount = trap->tr_eventaddr & (E4_EVENT_ALIGN-1); -+ -+ /* case 1) or 2) - just reissue the event */ -+ if (! waitevent) -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: setevent EventAddr=%llx EventCount=%x\n", EventAddr, EventCount); -+ else -+ { -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: waitevent Event=%llx CountAndType=%llx\n", EventAddr, trap->tr_event.ev_CountAndType); -+ PRINTF (uctx, DBG_TRAP, " Param[0]=%llx Param[1]=%llx\n", -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ } -+ -+ if (! user_ddcq_check (uctx, waitevent ? 4 : 2)) -+ return (0); -+ -+ if (waitevent) -+ user_ddcq_waitevent (uctx, EventAddr, trap->tr_event.ev_CountAndType, -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ else -+ user_ddcq_seteventn (uctx, EventAddr, EventCount); -+ } -+ -+ return (1); -+} -+ -+ -+static int -+resolve_eproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_EPROC_TRAP *trap) -+{ -+ switch (EPROC_TrapType (trap->tr_status)) -+ { -+ case EventProcNoFault: -+ PRINTF (uctx, DBG_TRAP, "resolve_eproc_trap: EventProcNoFault\n"); -+ -+ return (UTS_FINISHED); -+ -+ case EventProcAddressAlignment: -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_EPROC, trap)); -+ -+ case EventProcMemoryFault: -+ PRINTF (uctx, DBG_TRAP, "resolve_eproc_trap: EventProcMemoryFault @ %llx\n", trap->tr_faultarea.FaultAddress); -+ -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_EPROC, trap)); -+ -+ return (UTS_FINISHED); -+ -+ case EventProcCountWrapError: -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_EPROC, trap)); -+ -+ default: -+ printk ("resolve_eproc_trap: bad trap type %d\n", EPROC_TrapType (trap->tr_status)); -+ BUG(); -+ } -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_cproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, USER_CQ *ucq) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN4_CPROC_TRAP *trap = &ucq->ucq_trap; -+ E4_uint64 command; -+ int res; -+ int chan; -+ -+ ELAN_LOCATION location; -+ int vp, node; -+ -+ PRINTF2 (uctx, DBG_CPROC, "resolve_cproc_trap: cq %p is trapped - Status %lx\n", ucq, trap->tr_status); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcDmaQueueOverflow: -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcDmaQueueOverflow\n"); -+ /* -+ * XXXX: should wait for the queue to become emptier if we're -+ * responsible for it being very full -+ */ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcInterruptQueueOverflow: -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcInterruptQueueOverflow\n"); -+ /* -+ * XXXX: should wait for the queue to become emptier if we're -+ * responsible for it being very full -+ */ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcWaitTrap: -+ PRINTF0 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcWaitTrap\n"); -+ -+ if ((res = resolve_eproc_trap (uctx, utrapp, &trap->tr_eventtrap)) != UTS_FINISHED) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (res); -+ } -+ -+ if (fixup_eproc_trap (uctx, &trap->tr_eventtrap, 1) == 0) -+ return UTS_RESCHEDULE; -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcMemoryFault: -+ PRINTF1 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcMemoryFault at %llx\n", trap->tr_faultarea.FaultAddress); -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ } -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcRouteFetchFault: -+ command = elan4_trapped_open_command (dev, ucq->ucq_cq); -+ -+ PRINTF1 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcRouteFetchFault to vp %d\n", (int) (command >> 32)); -+ -+ if (user_resolvevp (uctx, (unsigned) (command >> 32)) != 0) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_VPROC, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq), (long) (command >> 32))); -+ } -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcFailCountZero: -+ PRINTF0 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcFailCountZero - reset failcount\n"); -+ -+ /* Update CPROC timeout route statistics */ -+ for (chan = 0; chan <= 1; chan++) -+ { -+ /* Was there a timeout on this channel ? */ -+ if (PackValue(trap->tr_qdesc.CQ_AckBuffers, chan) == PackTimeout) -+ { -+ /* Find the last open command for that channel to extract the relevant vp */ -+ if ((vp = cproc_open_extract_vp(uctx->uctx_ctxt.ctxt_dev, ucq->ucq_cq, chan)) != -1) -+ { -+ E4_VirtualProcessEntry route; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ location = user_process2location(uctx, NULL, vp); -+ elan4_read_route (uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, vp, &route); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ node = location.loc_node; -+ -+ kmutex_lock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ -+ if ((node >= 0) && (node < uctx->uctx_ctxt.ctxt_dev->dev_position.pos_nodes)) -+ { -+ uctx->uctx_ctxt.ctxt_dev->dev_cproc_timeout[node]++; -+ -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_cproc_timeout_routes, -+ &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ -+ kmutex_unlock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ } -+ } -+ } -+ -+ /* NOTE - we must not modify the ChannelNotCompleted bits - so modify */ -+ /* the restart count with a part-word store */ -+ elan4_updatecq (dev, ucq->ucq_cq, ucq->ucq_cq->cq_perm, user_cproc_retry_count); -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcAddressAlignment: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ case CommandProcPermissionTrap: -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(ucq->ucq_cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 control = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcPermissionTrap - %s\n", -+ (control & CQ_PermissionMask) != ucq->ucq_cq->cq_perm ? "resume from stop" : "permission denied"); -+ -+ if ((control & CQ_PermissionMask) == ucq->ucq_cq->cq_perm) -+ return (deliver_trap (utrapp, UTS_PERMISSION_DENIED, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ elan4_updatecq (dev, ucq->ucq_cq, ucq->ucq_cq->cq_perm, 0); -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ } -+ -+ case CommandProcBadData: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_COMMAND, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ default: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ } -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_dproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_DPROC_TRAP *trap) -+{ -+ ELAN_LOCATION location; -+ int node; -+ E4_VirtualProcessEntry route; -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ { -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: PrefetcherFault at %llx\n", trap->tr_prefetchFault.FaultAddress); -+ -+ if (user_pagefault (uctx, &trap->tr_prefetchFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_DPROC, trap)); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc)); -+ } -+ -+ switch (DPROC_TrapType (trap->tr_status)) -+ { -+ case DmaProcRouteFetchFault: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcRouteFetchFault vp %d\n", trap->tr_desc.dma_vproc); -+ -+ if (user_resolvevp (uctx, trap->tr_desc.dma_vproc) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_VPROC, UTS_DPROC, trap, trap->tr_desc.dma_vproc)); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* immediate */)); -+ -+ case DmaProcFailCountError: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcFailCountError - vp %d cookie %llx\n", -+ trap->tr_desc.dma_vproc, trap->tr_desc.dma_cookie); -+ -+ trap->tr_desc.dma_typeSize |= DMA_FailCount (user_dproc_retry_count); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcPacketAckError: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcPacketAckError - %d%s\n", DPROC_PacketAckValue (trap->tr_status), -+ DPROC_PacketTimeout (trap->tr_status) ? " timeout" : ""); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ location = user_process2location(uctx, NULL, trap->tr_desc.dma_vproc); -+ elan4_read_route(uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, trap->tr_desc.dma_vproc, &route); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ node = location.loc_node; -+ -+ /* Update dproc route timeout statistics */ -+ if ((node >= 0) && (node < uctx->uctx_ctxt.ctxt_dev->dev_position.pos_nodes)) -+ { -+ kmutex_lock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ -+ if ((route.Values[0] != 0) || (route.Values[1] != 0)) -+ { -+ if (DPROC_PacketTimeout (trap->tr_status)) -+ { -+ uctx->uctx_ctxt.ctxt_dev->dev_dproc_timeout[node]++; -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_dproc_timeout_routes, -+ &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ else -+ { -+ uctx->uctx_ctxt.ctxt_dev->dev_ack_errors[node]++; -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_ack_error_routes, -+ &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ } -+ -+ kmutex_unlock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ } -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcQueueOverflow: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcQueueOverflow\n"); -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcRunQueueReadFault: -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_DPROC, trap)); -+ -+ default: -+ printk ("resolve_dproc_trap: unknown trap type : %d\n", DPROC_TrapType(trap->tr_status)); -+ BUG(); -+ } -+ return UTS_FINISHED; -+} -+ -+int -+resolve_tproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_TPROC_TRAP *trap) -+{ -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: trap state = %lx\n", trap->tr_state); -+ -+ if (trap->tr_state & TS_TrapForTooManyInstructions) -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_TPROC, trap)); -+ -+ if (trap->tr_state & TS_Unimplemented) -+ return (deliver_trap (utrapp, UTS_UNIMP_INSTR, UTS_TPROC, trap)); -+ -+ if (trap->tr_state & TS_DataAlignmentError) -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_TPROC, trap)); -+ -+ if ((trap->tr_state & TS_InstAccessException) && user_pagefault (uctx, &trap->tr_instFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_TPROC, trap)); -+ -+ if ((trap->tr_state & TS_DataAccessException) && user_pagefault (uctx, &trap->tr_dataFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_TPROC, trap)); -+ -+ /* If we're restarting from trap - then just need to re-issue it */ -+ if (trap->tr_pc == uctx->uctx_trestart_addr || (trap->tr_state & TS_TrappedFlag)) -+ { -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: trapped in trap code PC=%llx SP=%llx\n", trap->tr_pc, trap->tr_regs[1]); -+ -+ trap->tr_regs[0] = uctx->uctx_trestart_addr; -+ } -+ else -+ { -+ E4_uint64 *sp = (E4_uint64 *) user_elan2main (uctx, trap->tr_regs[1]); -+ int i, reload; -+ -+ /* need to store the register on the stack see */ -+ /* lib_tproc_trampoline_elan4_thread.S for stack layout */ -+#define TS_STACK_OFF(REG) ((((REG)&7)) - (((REG)>>3)*8) - 8) -+ for (reload = 0, i = 0; i < 64; i++) -+ { -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ { -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: %%r%d [%016llx] -> %p\n", i, trap->tr_regs[i], &sp[TS_STACK_OFF(i)]); -+ -+ sulonglong ((u64 *) &sp[TS_STACK_OFF(i)], trap->tr_regs[i]); -+ -+ reload |= (1 << (i >> 3)); -+ } -+ } -+#undef TS_STACK_OFF -+ -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: pc %llx npc %llx\n", trap->tr_pc, trap->tr_npc); -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: CC %x reload %x\n", (int) (trap->tr_state >> TS_XCCshift), reload); -+ -+ trap->tr_regs[0] = uctx->uctx_trestart_addr; -+ trap->tr_regs[2] = trap->tr_pc; -+ trap->tr_regs[3] = trap->tr_npc; -+ trap->tr_regs[4] = (trap->tr_state >> TS_XCCshift) & TS_XCCmask; -+ trap->tr_regs[5] = reload; -+ } -+ -+ return (queue_thread_for_retry (uctx, utrapp, (E4_ThreadRegs *) trap->tr_regs)); -+} -+ -+static int -+resolve_iproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int channel) -+{ -+ USER_IPROC_TRAP *utrap = &uctx->uctx_iprocTrap[channel]; -+ ELAN4_IPROC_TRAP *trap = &utrap->ut_trap; -+ unsigned long flags; -+ -+ elan4_inspect_iproc_trap (trap); -+ -+ if (trap->tr_flags & TR_FLAG_TOOMANY_TRANS) -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ ASSERT (trap->tr_trappedTrans >= 0 && trap->tr_trappedTrans < trap->tr_numTransactions); -+ -+ switch (IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)) -+ { -+ case InputMemoryFault: -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ { -+ utrap->ut_state = UTS_IPROC_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_IPROC, trap, channel)); -+ } -+ break; -+ -+ case InputDmaQueueOverflow: -+ case InputEventEngineTrapped: -+ /* nothing to do for these 2 - restarting will simulate the transactions */ -+ break; -+ -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ break; -+ -+ case InputCrcErrorAfterPAckOk: -+ PRINTF (DBG_DEVICE, DBG_IPROC, "InputCrcErrorAfterPAckOk: flags %x\n", trap->tr_flags); -+ -+ ASSERT ((trap->tr_flags & TR_FLAG_ACK_SENT) && ((trap->tr_flags & (TR_FLAG_DMA_PACKET|TR_FLAG_BAD_TRANS)) || -+ ((trap->tr_flags & TR_FLAG_EOP_ERROR) && trap->tr_identifyTrans == TR_TRANS_INVALID))); -+ break; -+ -+ case InputDiscardAfterAckOk: -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ case InputAddressAlignment: -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_IPROC, trap, channel)); -+ -+ case InputInvalidTransType: -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ default: -+ printk ("resolve_iproc_trap: unknown trap type %d\n", IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)); -+ BUG(); -+ /* NOTREACHED */ -+ } -+ -+ if (! (trap->tr_flags & TR_FLAG_ACK_SENT) || (trap->tr_flags & TR_FLAG_EOP_BAD)) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ } -+ else if ((trap->tr_flags & (TR_FLAG_DMA_PACKET | TR_FLAG_BAD_TRANS)) || ((trap->tr_flags & TR_FLAG_EOP_ERROR) && (trap->tr_identifyTrans == TR_TRANS_INVALID))) -+ { -+ /* -+ * TR_FLAG_DMA_PACKET means a DMA packet has faulted. -+ * -+ * TR_FLAG_BAD_TRANS means we have a transaction with a bad CRC after the transaction -+ * which sent the ack - this implies it's an overlapped ack DMA packet -+ * -+ * TR_FLAG_EOP_ERROR means we've received an EOP reset - if we hadn't seen an identify -+ * transaction then it's a DMA packet. -+ * -+ * To ensure that the DMA processor works correctly the next packet must be NACKed to -+ * cause it to resend this one. -+ */ -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: %s during DMA packet\n", -+ (trap->tr_flags & TR_FLAG_BAD_TRANS) ? "BadTransaction" : (trap->tr_flags & TR_FLAG_EOP_ERROR) ? "EopError" : "trap"); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (trap->tr_flags & TR_FLAG_DMA_PACKET) -+ { -+ if (! (trap->tr_flags & TR_FLAG_BAD_TRANS)) -+ utrap->ut_state = UTS_IPROC_EXECUTE_PACKET; -+ else -+ { -+ kcondvar_t waithere; -+ -+ /* We must ensure that the next packet is always nacked, so -+ * we wait here for an output timeout before dropping the -+ * context filter - we just pause here for 4 mS */ -+ kcondvar_init (&waithere); -+ kcondvar_timedwait (&waithere, &uctx->uctx_spinlock, &flags, lbolt + (HZ/250) + 1);; -+ kcondvar_destroy (&waithere); -+ -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ } -+ else -+ { -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ } -+ else if (trap->tr_flags & TR_FLAG_EOP_ERROR) -+ { -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: EopError with identify\n"); -+ -+ utrap->ut_state = UTS_IPROC_NETWORK_ERROR; -+ } -+ else -+ { -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: execute packet\n"); -+ -+ utrap->ut_state = UTS_IPROC_EXECUTE_PACKET; -+ } -+ -+ return UTS_FINISHED; -+} -+ -+ -+static int -+resolve_cproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ struct list_head *entry; -+ int res = UTS_FINISHED; -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_TRAPPED) -+ { -+ res = resolve_cproc_trap (uctx, utrapp, ucq); -+ -+ if (res != UTS_FINISHED) -+ break; -+ } -+ -+ if (ucq->ucq_errored) -+ { -+ ucq->ucq_errored = 0; -+ res = deliver_trap (utrapp, UTS_CPROC_ERROR, UTS_CPROC, &ucq->ucq_trap, elan4_cq2idx(ucq->ucq_cq)); -+ break; -+ } -+ } -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ return (res); -+} -+ -+static int -+resolve_eproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_eprocTrapQ)) -+ { -+ ELAN4_EPROC_TRAP trap = *RING_QUEUE_FRONT (uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_eprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_eproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ if (fixup_eproc_trap (uctx, &trap, 0) == 0) -+ { -+ PRINTF (uctx, DBG_EPROC, "resolve_eproc_trap: could not fixup eproc trap - requeue it\n"); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_REALLY_FULL(uctx->uctx_eprocTrapQ)) -+ { -+ PRINTF (uctx, DBG_EPROC, "resolve_eproc_trap: queue overflow\n"); -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_OVERFLOW; -+ } -+ else -+ { -+ /* Requeue at front to preserve setevent ordering */ -+ /* GNAT 7504: Must move fptr before writing over it */ -+ (void) RING_QUEUE_ADD_FRONT(uctx->uctx_eprocTrapQ); -+ -+ *RING_QUEUE_FRONT(uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps) = trap; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return UTS_RESCHEDULE; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_EPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_EPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_dproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_dprocTrapQ)) -+ { -+ ELAN4_DPROC_TRAP trap = *RING_QUEUE_FRONT(uctx->uctx_dprocTrapQ, uctx->uctx_dprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_dprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_dproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_DPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_DPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_tproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_tprocTrapQ)) -+ { -+ ELAN4_TPROC_TRAP trap = *RING_QUEUE_FRONT(uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_tprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_tproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_TPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_TPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_iproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int i, res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ for (i = 0; i < 2; i++) -+ if (uctx->uctx_iprocTrap[i].ut_state == UTS_IPROC_TRAPPED) -+ { -+ uctx->uctx_iprocTrap[i].ut_state = UTS_IPROC_RESOLVING; -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_iproc_trap (uctx, utrapp, i)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_all_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ int res; -+ -+ if ((res = resolve_iproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_cproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_eproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_dproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_tproc_traps (uctx, utrapp)) != UTS_FINISHED) -+ return (res); -+ -+ if (uctx->uctx_status & UCTX_OVERFLOW_REASONS) -+ { -+ PRINTF (uctx, DBG_TRAP, "resolve_all_traps: overflow reasons %x\n", uctx->uctx_status); -+ -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, uctx->uctx_status)); -+ } -+ -+ if (uctx->uctx_status & UCTX_ERROR_REASONS) -+ return (deliver_trap (utrapp, UTS_QUEUE_ERROR, UTS_NOPROC, NULL, uctx->uctx_status)); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+execute_iproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ for (i = 0; i < 2; i++) -+ switch (uctx->uctx_iprocTrap[i].ut_state) -+ { -+ case UTS_IPROC_EXECUTE_PACKET: -+ uctx->uctx_iprocTrap[i].ut_state = UTS_IPROC_EXECUTING; -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_EXECUTE_PACKET, UTS_IPROC, &uctx->uctx_iprocTrap[i].ut_trap, i)); -+ -+ case UTS_IPROC_NETWORK_ERROR: -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_NETWORK_ERROR_TRAP, UTS_IPROC, &uctx->uctx_iprocTrap[i].ut_trap, i)); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+progress_neterr (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (! RING_QUEUE_EMPTY (uctx->uctx_msgQ)) -+ { -+ ELAN4_NETERR_MSG msg = *RING_QUEUE_FRONT (uctx->uctx_msgQ, uctx->uctx_msgs); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_msgQ); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return deliver_trap (utrapp, UTS_NETWORK_ERROR_MSG, UTS_NETERR_MSG, &msg, user_location2process (uctx, msg.msg_sender)); -+ } -+ -+ if (uctx->uctx_status & UCTX_NETERR_TIMER) -+ { -+ uctx->uctx_status &= ~UCTX_NETERR_TIMER; -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return deliver_trap (utrapp, UTS_NETWORK_ERROR_TIMER, UTS_NOPROC, NULL); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static void -+restart_command_queues (USER_CTXT *uctx) -+{ -+ struct list_head *entry; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_NEEDS_RESTART) -+ { -+ ucq->ucq_state = UCQ_RUNNING; -+ -+ elan4_restartcq (uctx->uctx_ctxt.ctxt_dev, ucq->ucq_cq); -+ } -+ } -+} -+ -+static int -+restart_dmas (USER_CTXT *uctx) -+{ -+ PRINTF (uctx, DBG_TRAP, "restart_dmas: back=%d front=%d\n", uctx->uctx_dmaQ.q_back, uctx->uctx_dmaQ.q_front); -+ -+ while (! RING_QUEUE_EMPTY (uctx->uctx_dmaQ)) -+ { -+ if (! user_ddcq_check (uctx, 7)) -+ return (0); -+ -+ user_ddcq_run_dma (uctx, RING_QUEUE_FRONT(uctx->uctx_dmaQ, uctx->uctx_dmas)); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_dmaQ); -+ } -+ -+ return (1); -+} -+ -+static int -+restart_threads (USER_CTXT *uctx) -+{ -+ PRINTF (uctx, DBG_TRAP, "restart_threads: back=%d front=%d\n", uctx->uctx_threadQ.q_back, uctx->uctx_threadQ.q_front); -+ -+ while (! RING_QUEUE_EMPTY (uctx->uctx_threadQ)) -+ { -+ if (! user_ddcq_check (uctx, 7)) -+ return (0); -+ -+ user_ddcq_run_thread (uctx, RING_QUEUE_FRONT(uctx->uctx_threadQ, uctx->uctx_threads)); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_threadQ); -+ } -+ -+ return (1); -+} -+ -+int -+user_resume_eproc_trap (USER_CTXT *uctx, E4_Addr addr) -+{ -+ PRINTF2 (uctx, DBG_RESUME, "user_resume_eproc_trap: addr=%llx -> %s\n", addr, user_ddcq_check(uctx, 2) ? "success" : "EAGAIN"); -+ -+ if (! user_ddcq_check (uctx, 2)) -+ return (-EAGAIN); -+ -+ user_ddcq_setevent (uctx, addr); -+ -+ return (0); -+} -+ -+int -+user_resume_cproc_trap (USER_CTXT *uctx, unsigned indx) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_cproc_trap: indx=%d\n", indx); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2idx(ucq->ucq_cq) == indx && ucq->ucq_state == UCQ_STOPPED && !(ucq->ucq_flags & UCQ_SYSTEM)) -+ { -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (0); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (-EINVAL); -+} -+ -+int -+user_resume_dproc_trap (USER_CTXT *uctx, E4_DMA *dma) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ res = -ENOMEM; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = *dma; -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ -+ user_signal_trap (uctx); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (res); -+} -+ -+int -+user_resume_tproc_trap (USER_CTXT *uctx, E4_ThreadRegs *regs) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_FULL (uctx->uctx_threadQ)) -+ res = -ENOMEM; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = *regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ -+ user_signal_trap (uctx); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (res); -+} -+ -+int -+user_resume_iproc_trap (USER_CTXT *uctx, unsigned channel, unsigned trans, -+ E4_IprocTrapHeader *hdrp, E4_IprocTrapData *datap) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ if (channel >= 2) -+ return (-EINVAL); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_STOPPED && -+ uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_EXECUTING && -+ uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_NETWORK_ERROR) -+ res = -EINVAL; -+ else -+ { -+ ELAN4_IPROC_TRAP *trap = &uctx->uctx_iprocTrap[channel].ut_trap; -+ -+ if (trans < trap->tr_numTransactions) -+ { -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_iproc_trap: trans=%d -> execute\n", trans); -+ -+ uctx->uctx_iprocTrap[channel].ut_state = UTS_IPROC_EXECUTE_PACKET; -+ trap->tr_trappedTrans = trans; -+ trap->tr_transactions[trans] = *hdrp; -+ trap->tr_dataBuffers[trans] = *datap; -+ } -+ else -+ { -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_iproc_trap: trans=%d -> running\n", trans); -+ -+ uctx->uctx_iprocTrap[channel].ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (res); -+} -+ -+int -+__categorise_command (E4_uint64 command, int *cmdSize) -+{ -+ switch (command & 0x3) -+ { -+ case RUN_THREAD_CMD: *cmdSize = 7; break; -+ -+ default: -+ switch (command & 0x7) -+ { -+ case WRITE_DWORD_CMD: *cmdSize = 2; break; -+ case ADD_DWORD_CMD: *cmdSize = 2; break; -+ -+ default: -+ switch (command & 0xF) -+ { -+ case OPEN_STEN_PKT_CMD: -+ *cmdSize = 1; -+ return 1; -+ -+ case COPY64_CMD: *cmdSize = 2; break; -+ case GUARD_CMD: *cmdSize = 1; break; -+ case INTERRUPT_CMD: *cmdSize = 1; break; -+ case RUN_DMA_CMD: *cmdSize = 7; break; -+ -+ default: -+ switch (command & 0x1f) -+ { -+ case SEND_TRANS_CMD: -+ *cmdSize = 2 + (((command >> 16) & TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ return 2; -+ -+ case SET_EVENT_CMD: *cmdSize = 1; break; -+ case SET_EVENTN_CMD: *cmdSize = 2; break; -+ case WAIT_EVENT_CMD: *cmdSize = 4; break; -+ -+ default: -+ switch (command & 0x3f) -+ { -+ case NOP_CMD: *cmdSize = 1; break; -+ case MAKE_EXT_CLEAN_CMD: *cmdSize = 1; break; -+ default: -+ return 3; -+ } -+ break; -+ } -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+int -+__whole_command (sdramaddr_t *commandPtr, sdramaddr_t insertPtr, unsigned int cqSize, unsigned int cmdSize) -+{ -+ /* Move onto next command */ -+ while (cmdSize-- && (*commandPtr) != insertPtr) -+ *commandPtr = ((*commandPtr) & ~(cqSize-1)) | (((*commandPtr) + sizeof (E4_uint64)) & (cqSize-1)); -+ -+ return cmdSize == -1; -+} -+ -+int -+user_neterr_sten (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int found = 0; -+ struct list_head *el; -+ -+ user_swapout (uctx, UCTX_NETERR_FIXUP); -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ list_for_each (el, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (el, USER_CQ, ucq_link); -+ -+ if ((ucq->ucq_cq->cq_perm & CQ_STENEnableBit) != 0) -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(ucq->ucq_cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ sdramaddr_t insertPtr = (queuePtrs & CQ_PtrMask); -+ sdramaddr_t commandPtr = CQ_CompletedPtr (queuePtrs); -+ unsigned int cqSize = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ E4_uint64 openCommand = 0; -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && (queuePtrs & CQ_RevB_ReorderingQueue)) -+ { -+ E4_uint32 oooMask = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)); -+ -+ for (; (oooMask & 1) != 0; oooMask >>= 1) -+ insertPtr = (insertPtr & ~(cqSize-1)) | ((insertPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ } -+ -+ while (commandPtr != insertPtr) -+ { -+ E4_uint64 command = elan4_sdram_readq (dev, commandPtr); -+ sdramaddr_t identifyPtr; -+ unsigned int cmdSize; -+ -+ switch (__categorise_command (command, &cmdSize)) -+ { -+ case 0: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 1: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cq=%d OPEN %llx\n", elan4_cq2num (ucq->ucq_cq), command); -+ -+ if ((command >> 32) == vp) -+ openCommand = command; -+ -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 2: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cq=%d SENDTRANS %llx\n", elan4_cq2num (ucq->ucq_cq), command); -+ -+ if (openCommand == 0) -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ else -+ { -+ switch ((command >> 16) & (TR_OPCODE_MASK | TR_SIZE_MASK)) -+ { -+ case TR_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_REMOTEDMA & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_IDENTIFY/TR_REMOTEDMA\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_SETEVENT_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_INPUT_Q_COMMIT & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_SETEVENT_IDENTIFY/TR_INPUT_Q_COMMIT\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 2*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_ADDWORD & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_ADDWORD\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 3*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_TESTANDWRITE & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_TESTANDWRITE\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 4*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ default: -+ identifyPtr = 0; -+ } -+ -+ if (! __whole_command (&commandPtr, insertPtr, cqSize, cmdSize)) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: not whole command\n"); -+ openCommand = 0; -+ } -+ -+ else if (identifyPtr) -+ { -+ E4_uint64 tcookie = elan4_sdram_readq (dev, identifyPtr); -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cookie=%llx [%llx]\n", tcookie, cookie); -+ -+ if (tcookie == cookie) -+ { -+ unsigned int vchan = (openCommand >> 4) & 0x1f; -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cookie matches - vchan=%d\n", vchan); -+ -+ if (! waitforeop) -+ { -+ /* Alter the CQ_AckBuffer for this channel to indicate an -+ * ack was received */ -+ E4_uint64 value = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers)); -+ E4_uint64 nvalue = ((value & ~((E4_uint64)0xf << ((vchan & 0xf) << 2))) | -+ ((E4_uint64) PackOk << ((vchan & 0xf) << 2))); -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: CQ_AckBuffers %llx -> %llx\n", value, nvalue); -+ -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers), nvalue); -+ pioflush_sdram (dev); -+ } -+ -+ found++; -+ } -+ openCommand = 0; -+ } -+ -+ if ((command >> 16) & TR_LAST_AND_SEND_ACK) -+ openCommand = 0; -+ } -+ break; -+ -+ case 3: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: invalid command %llx\n", command); -+ kmutex_unlock (&uctx->uctx_cqlock); -+ return -EINVAL; -+ } -+ -+ } -+ } -+ } -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ user_swapin (uctx, UCTX_NETERR_FIXUP); -+ -+ return found; -+} -+ -+int -+user_neterr_dma (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop) -+{ -+ unsigned long flags; -+ int found = 0; -+ int idx; -+ -+ user_swapout (uctx, UCTX_NETERR_FIXUP); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ RING_QUEUE_ITERATE (uctx->uctx_dmaQ, idx) { -+ E4_DMA *dma = &uctx->uctx_dmas[idx]; -+ -+ if (dma->dma_vproc == vp && dma->dma_cookie == cookie) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_dma: dmaQ matches %s\n", waitforeop ? "waitforeop" : "remove remoteness"); -+ -+ if (! waitforeop) -+ { -+ dma->dma_dstEvent = 0; -+ dma->dma_typeSize = DMA_ShMemWrite | DMA_Context (dma->dma_typeSize); -+ } -+ found++; -+ } -+ } -+ -+ RING_QUEUE_ITERATE (uctx->uctx_dprocTrapQ, idx) { -+ ELAN4_DPROC_TRAP *trap = &uctx->uctx_dprocTraps[idx]; -+ -+ if (trap->tr_desc.dma_vproc == vp && trap->tr_desc.dma_cookie == cookie) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_dma: dmaTrapQ matches %s\n", waitforeop ? "waitforeop" : "remove remoteness"); -+ -+ if (! waitforeop) -+ { -+ trap->tr_desc.dma_dstEvent = 0; -+ trap->tr_desc.dma_typeSize = DMA_ShMemWrite | DMA_Context (trap->tr_desc.dma_typeSize); -+ } -+ found++; -+ } -+ } -+ -+ /* The device driver command queue should be empty at this point ! */ -+ if (user_ddcq_flush (uctx) == 0) -+ found = -EAGAIN; -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ /* The device driver command queue should be empty at this point ! */ -+ if (user_ddcq_flush (uctx) == 0) -+ found = -EAGAIN; -+ -+ user_swapin (uctx, UCTX_NETERR_FIXUP); -+ -+ return found; -+} -+ -+int -+user_trap_handler (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int nticks) -+{ -+ unsigned long entered = jiffies; -+ unsigned int need_reenter = 0; -+ unsigned long flags; -+ int res; -+ int tbl; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF1 (uctx, DBG_TRAP, "user_trap_handler: entered state=%d\n", uctx->uctx_trap_state); -+ -+ uctx->uctx_trap_count++; -+ -+ for (;;) -+ { -+ if (uctx->uctx_status & UCTX_SWAPPED_REASONS) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting on swapped reasons\n"); -+ -+ res = UTS_FINISHED; -+ goto no_more_to_do; -+ } -+ -+ if ((long) (jiffies - entered) > HZ) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting for reschedule\n"); -+ res = UTS_RESCHEDULE; -+ goto no_more_to_do; -+ } -+ -+ switch (uctx->uctx_trap_state) -+ { -+ case UCTX_TRAP_ACTIVE: -+ uctx->uctx_trap_state = UCTX_TRAP_SLEEPING; -+ -+ if (nticks == 0 || need_reenter || kcondvar_timedwaitsig (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags, lbolt + nticks) != CV_RET_NORMAL) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting by kcondvar_timedwaitsig\n"); -+ -+ res = UTS_FINISHED; -+ goto no_more_to_do; -+ } -+ -+ /* Have slept above, so resample entered */ -+ entered = jiffies; -+ -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+ continue; -+ -+ case UCTX_TRAP_IDLE: -+ case UCTX_TRAP_SIGNALLED: -+ uctx->uctx_trap_state = UCTX_TRAP_ACTIVE; -+ break; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: resolve traps - state=%d status=%x\n", uctx->uctx_trap_state, uctx->uctx_status); -+ -+ switch ((res = resolve_all_traps (uctx, utrapp))) -+ { -+ case UTS_FINISHED: -+ break; -+ -+ case UTS_RESCHEDULE: -+ need_reenter++; -+ break; -+ -+ default: -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (! user_ddcq_flush (uctx)) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: ddcq not flushed - re-enter\n"); -+ need_reenter++; -+ continue; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ for(tbl=0; tbl < NUM_HASH_TABLES; tbl++) -+ if (uctx->uctx_ctxt.shuffle_needed[tbl]) -+ elan4mmu_do_shuffle (&uctx->uctx_ctxt, tbl); -+ -+ if ((res = progress_neterr (uctx, utrapp)) != UTS_FINISHED) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ if ((res = execute_iproc_traps (uctx, utrapp)) != UTS_FINISHED) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: restart items - state=%d status=%x\n", uctx->uctx_trap_state, uctx->uctx_status); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ restart_command_queues (uctx); -+ -+ if (! restart_threads (uctx) || ! restart_dmas (uctx)) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: ddcq full - re-enter\n"); -+ need_reenter++; -+ } -+ } -+ } -+ no_more_to_do: -+ uctx->uctx_trap_state = UCTX_TRAP_IDLE; -+ -+ /* -+ * Always ensure that the command queue is flushed with a flow control -+ * write, so that on the next trap we (hopefully) find it empty and so -+ * can immediately restart the context. Also if we need to be re-enter -+ * the trap handler and don't have an interrupt outstanding, then issue -+ * one now. -+ */ -+ user_ddcq_flush (uctx); -+ if (need_reenter && uctx->uctx_ddcq_intr == 0) -+ { -+ uctx->uctx_ddcq_intr++; -+ user_ddcq_intr (uctx); -+ } -+ -+ if (--uctx->uctx_trap_count == 0 && (uctx->uctx_status & UCTX_SWAPPING)) -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ /* Should update the user trap area in this case as deliver_trap() -+ * has not been called -+ */ -+ if (res == UTS_RESCHEDULE) -+ put_user (res, &utrapp->ut_type); -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: finished state=%d res=%d\n", uctx->uctx_trap_state, res); -+ -+ return (res == UTS_EFAULT ? -EFAULT : 0); -+} -+ -+USER_CQ * -+user_alloccq (USER_CTXT *uctx, unsigned cqsize, unsigned perm, unsigned uflags) -+{ -+ USER_CQ *ucq; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (ucq, USER_CQ *, sizeof (USER_CQ), 1); -+ -+ if (ucq == (USER_CQ *) NULL) -+ return ERR_PTR(-ENOMEM); -+ -+ /* NOTE - do not allow the user to create high-priority queues as we only flush through the low-priority run queues */ -+ if ((ucq->ucq_cq = elan4_alloccq (&uctx->uctx_ctxt, cqsize, perm, (uflags & UCQ_REORDER) ? CQ_Reorder : 0)) == NULL) -+ { -+ KMEM_FREE (ucq, sizeof (USER_CQ)); -+ -+ PRINTF2 (uctx, DBG_CQ, "user_alloccq: failed elan4_allocq cqsize %d uflags %x\n", cqsize, uflags); -+ -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ atomic_set (&ucq->ucq_ref, 1); -+ -+ ucq->ucq_state = UCQ_RUNNING; -+ ucq->ucq_flags = uflags; -+ -+ PRINTF3 (uctx, DBG_CQ, "user_alloccq: ucq=%p idx=%d cqnum=%d\n", ucq, elan4_cq2idx (ucq->ucq_cq), elan4_cq2num(ucq->ucq_cq)); -+ -+ /* chain it onto the context */ -+ kmutex_lock (&uctx->uctx_cqlock); -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_add (&ucq->ucq_link, &uctx->uctx_cqlist); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ return (ucq); -+} -+ -+USER_CQ * -+user_findcq (USER_CTXT *uctx, unsigned idx) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2idx(ucq->ucq_cq) == idx) -+ { -+ atomic_inc (&ucq->ucq_ref); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (ucq); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (NULL); -+} -+ -+void -+user_dropcq (USER_CTXT *uctx, USER_CQ *ucq) -+{ -+ unsigned long flags; -+ -+ PRINTF2 (uctx, DBG_CQ, "user_dropcq: ucq=%p ref=%d\n", ucq, atomic_read (&ucq->ucq_ref)); -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ if (! atomic_dec_and_test (&ucq->ucq_ref)) -+ { -+ kmutex_unlock (&uctx->uctx_cqlock); -+ return; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_del (&ucq->ucq_link); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ elan4_freecq (&uctx->uctx_ctxt, ucq->ucq_cq); -+ -+ KMEM_FREE (ucq, sizeof (USER_CQ)); -+} -+ -+int -+user_alloc_trap_queues (USER_CTXT *uctx, unsigned ndproc_traps, unsigned neproc_traps, -+ unsigned ntproc_traps, unsigned nthreads, unsigned ndmas) -+{ -+ ELAN4_DPROC_TRAP *dprocs; -+ ELAN4_EPROC_TRAP *eprocs; -+ ELAN4_TPROC_TRAP *tprocs; -+ E4_DMA *dmas; -+ E4_ThreadRegs *threads; -+ ELAN4_NETERR_MSG *msgs; -+ unsigned long flags; -+ -+ int nmsgs = NETERR_MSGS; -+ -+ /* bounds check the values that have been passed in */ -+ if (ndproc_traps < 2 || ndproc_traps > 10000 || -+ ntproc_traps < 1 || ntproc_traps > 100 || -+ neproc_traps < 6 || neproc_traps > 10000 || -+ nthreads < 2 || nthreads > 10000 || -+ ndmas < 2 || ndmas > 10000) -+ return -EINVAL; -+ -+ if (uctx->uctx_dmas != NULL) -+ return -EBUSY; -+ -+ KMEM_ZALLOC (dprocs, ELAN4_DPROC_TRAP *, ndproc_traps * sizeof (ELAN4_DPROC_TRAP), 1); -+ KMEM_ZALLOC (eprocs, ELAN4_EPROC_TRAP *, neproc_traps * sizeof (ELAN4_EPROC_TRAP), 1); -+ KMEM_ZALLOC (tprocs, ELAN4_TPROC_TRAP *, ntproc_traps * sizeof (ELAN4_TPROC_TRAP), 1); -+ KMEM_ZALLOC (threads, E4_ThreadRegs *, nthreads * sizeof (E4_ThreadRegs), 1); -+ KMEM_ZALLOC (dmas, E4_DMA *, ndmas * sizeof (E4_DMA), 1); -+ KMEM_ZALLOC (msgs, ELAN4_NETERR_MSG *, nmsgs * sizeof (ELAN4_NETERR_MSG), 1); -+ -+ if (dprocs == NULL || eprocs == NULL || tprocs == NULL || dmas == NULL || threads == NULL || msgs == NULL) -+ { -+ if (dprocs != NULL) KMEM_FREE (dprocs, ndproc_traps * sizeof (ELAN4_DPROC_TRAP)); -+ if (eprocs != NULL) KMEM_FREE (eprocs, neproc_traps * sizeof (ELAN4_EPROC_TRAP)); -+ if (tprocs != NULL) KMEM_FREE (tprocs, ntproc_traps * sizeof (ELAN4_TPROC_TRAP)); -+ if (threads != NULL) KMEM_FREE (threads, nthreads * sizeof (E4_ThreadRegs)); -+ if (dmas != NULL) KMEM_FREE (dmas, ndmas * sizeof (E4_DMA)); -+ if (msgs != NULL) KMEM_FREE (msgs, nmsgs * sizeof (ELAN4_NETERR_MSG)); -+ -+ return -ENOMEM; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ uctx->uctx_dprocTraps = dprocs; -+ uctx->uctx_eprocTraps = eprocs; -+ uctx->uctx_tprocTraps = tprocs; -+ uctx->uctx_threads = threads; -+ uctx->uctx_dmas = dmas; -+ uctx->uctx_msgs = msgs; -+ -+ RING_QUEUE_INIT (uctx->uctx_dprocTrapQ, ndproc_traps, 1 /* 1 for 2nd dma */); -+ RING_QUEUE_INIT (uctx->uctx_tprocTrapQ, ntproc_traps, 0); -+ RING_QUEUE_INIT (uctx->uctx_eprocTrapQ, neproc_traps, 5 /* 1 for command, 2 for dma, 2 for inputter */); -+ RING_QUEUE_INIT (uctx->uctx_threadQ, nthreads, 1); -+ RING_QUEUE_INIT (uctx->uctx_dmaQ, ndmas, 1); -+ RING_QUEUE_INIT (uctx->uctx_msgQ, nmsgs, 0); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return 0; -+} -+ -+USER_CTXT * -+user_alloc (ELAN4_DEV *dev) -+{ -+ USER_CTXT *uctx; -+ int res; -+ int i; -+ -+ /* Allocate and initialise the context private data */ -+ KMEM_ZALLOC (uctx, USER_CTXT *, sizeof (USER_CTXT), 1); -+ -+ if (uctx == NULL) -+ return ERR_PTR(-ENOMEM); -+ -+ if (elan4_get_position (dev, &uctx->uctx_position) == ELAN_POS_UNKNOWN) -+ { -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-EAGAIN); -+ } -+ -+ if ((res = elan4_insertctxt (dev, &uctx->uctx_ctxt, &user_trap_ops)) != 0) -+ { -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(res); -+ } -+ -+ if (! user_ioproc_enabled) -+ uctx->uctx_ctxt.ctxt_features |= ELAN4_FEATURE_NO_IOPROC | ELAN4_FEATURE_PIN_DOWN; -+ if (! user_pagefault_enabled) -+ uctx->uctx_ctxt.ctxt_features |= ELAN4_FEATURE_NO_PAGEFAULT; -+ -+ KMEM_GETPAGES (uctx->uctx_upage, ELAN4_USER_PAGE *, btopr (sizeof (ELAN4_USER_PAGE)), 1); -+ if (uctx->uctx_upage == NULL) -+ { -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ if ((uctx->uctx_trampoline = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE)) == 0) -+ { -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ if ((uctx->uctx_routetable = elan4_alloc_routetable (dev, 4 /* 512 << 4 == 8192 entries */)) == NULL) -+ { -+ elan4_sdram_free (dev, uctx->uctx_trampoline, SDRAM_PAGE_SIZE); -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ elan4_set_routetable (&uctx->uctx_ctxt, uctx->uctx_routetable); -+ -+ /* initialise the trap and swap queues to be really full */ -+ RING_QUEUE_INIT (uctx->uctx_dprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_tprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_eprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_threadQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_dmaQ, 0, 1); -+ -+ INIT_LIST_HEAD (&uctx->uctx_cent_list); -+ INIT_LIST_HEAD (&uctx->uctx_vpseg_list); -+ INIT_LIST_HEAD (&uctx->uctx_cqlist); -+ -+ uctx->uctx_haltop.op_function = user_flush; -+ uctx->uctx_haltop.op_arg = uctx; -+ uctx->uctx_haltop.op_mask = INT_Halted|INT_Discarding; -+ -+ uctx->uctx_dma_flushop.op_function = user_flush_dmas; -+ uctx->uctx_dma_flushop.op_arg = uctx; -+ -+ kmutex_init (&uctx->uctx_vpseg_lock); -+ kmutex_init (&uctx->uctx_cqlock); -+ kmutex_init (&uctx->uctx_rgnmutex); -+ -+ spin_lock_init (&uctx->uctx_spinlock); -+ spin_lock_init (&uctx->uctx_rgnlock); -+ spin_lock_init (&uctx->uctx_fault_lock); -+ -+ kcondvar_init (&uctx->uctx_wait); -+ -+ if ((uctx->uctx_ddcq = user_alloccq (uctx, CQ_Size1K, CQ_EnableAllBits, UCQ_SYSTEM)) == NULL) -+ { -+ user_free (uctx); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ uctx->uctx_trap_count = 0; -+ uctx->uctx_trap_state = UCTX_TRAP_IDLE; -+ uctx->uctx_status = 0 /* UCTX_DETACHED | UCTX_SWAPPED | UCTX_STOPPED */; -+ -+ init_timer (&uctx->uctx_int_timer); -+ -+ uctx->uctx_int_timer.function = user_signal_timer; -+ uctx->uctx_int_timer.data = (unsigned long) uctx; -+ uctx->uctx_int_start = jiffies; -+ uctx->uctx_int_count = 0; -+ uctx->uctx_int_delay = 0; -+ -+ init_timer (&uctx->uctx_shuffle_timer); -+ -+ uctx->uctx_shuffle_timer.function = user_signal_timer; -+ uctx->uctx_shuffle_timer.data = (unsigned long) uctx; -+ -+ -+ init_timer (&uctx->uctx_neterr_timer); -+ uctx->uctx_neterr_timer.function = user_neterr_timer; -+ uctx->uctx_neterr_timer.data = (unsigned long) uctx; -+ -+ uctx->uctx_upage->upage_ddcq_completed = 0; -+ uctx->uctx_ddcq_completed = 0; -+ uctx->uctx_ddcq_insertcnt = 0; -+ -+ uctx->uctx_num_fault_save = num_fault_save; -+ if (uctx->uctx_num_fault_save) -+ { -+ KMEM_ZALLOC (uctx->uctx_faults, FAULT_SAVE *, (sizeof(FAULT_SAVE) * uctx->uctx_num_fault_save), 1); -+ if ( uctx->uctx_faults == NULL) -+ { -+ user_free (uctx); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ for (i = 0; i < uctx->uctx_num_fault_save; i++) -+ uctx->uctx_faults[i].next = (i == (uctx->uctx_num_fault_save-1) ? NULL : &uctx->uctx_faults[i+1]); -+ -+ } -+ uctx->uctx_fault_list = uctx->uctx_faults; -+ -+ return (uctx); -+} -+ -+void -+user_free (USER_CTXT *uctx) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ -+ user_swapout (uctx, UCTX_EXITING); -+ -+ /* Detach from all input contexts */ -+ user_detach (uctx, NULL); -+ -+ /* since we're single threaded here - (called from close()) */ -+ /* we don't need to hold the lock to drop the command queues */ -+ /* since they cannot be mapped into user space */ -+ while (! list_empty (&uctx->uctx_cqlist)) -+ user_dropcq (uctx, list_entry (uctx->uctx_cqlist.next, USER_CQ, ucq_link)); -+ -+ /* Free off all of vpseg_list */ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ while (! list_empty (&uctx->uctx_vpseg_list)) -+ user_remove_vpseg(uctx, list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link)); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ if (timer_pending (&uctx->uctx_int_timer)) -+ del_timer_sync (&uctx->uctx_int_timer); -+ -+ if (timer_pending (&uctx->uctx_shuffle_timer)) -+ del_timer_sync (&uctx->uctx_shuffle_timer); -+ -+ if (timer_pending (&uctx->uctx_neterr_timer)) -+ del_timer_sync (&uctx->uctx_neterr_timer); -+ -+ if (uctx->uctx_dprocTraps) -+ KMEM_FREE (uctx->uctx_dprocTraps, uctx->uctx_dprocTrapQ.q_size * sizeof (ELAN4_DPROC_TRAP)); -+ if (uctx->uctx_tprocTraps) -+ KMEM_FREE (uctx->uctx_tprocTraps, uctx->uctx_tprocTrapQ.q_size * sizeof (ELAN4_TPROC_TRAP)); -+ if (uctx->uctx_eprocTraps) -+ KMEM_FREE (uctx->uctx_eprocTraps, uctx->uctx_eprocTrapQ.q_size * sizeof (ELAN4_EPROC_TRAP)); -+ if (uctx->uctx_dmas) -+ KMEM_FREE (uctx->uctx_dmas, uctx->uctx_dmaQ.q_size * sizeof (E4_DMA)); -+ if (uctx->uctx_msgs) -+ KMEM_FREE (uctx->uctx_msgs, NETERR_MSGS * sizeof (ELAN4_NETERR_MSG)); -+ if (uctx->uctx_threads) -+ KMEM_FREE (uctx->uctx_threads, uctx->uctx_threadQ.q_size * sizeof (E4_ThreadRegs)); -+ if (uctx->uctx_faults) -+ KMEM_FREE (uctx->uctx_faults, (sizeof(FAULT_SAVE) * uctx->uctx_num_fault_save)); -+ -+ if (uctx->uctx_intcookie_table) -+ intcookie_free_table (uctx->uctx_intcookie_table); -+ -+ elan4_set_routetable (&uctx->uctx_ctxt, NULL); -+ elan4_free_routetable (dev, uctx->uctx_routetable); -+ -+ /* Free off all USER_RGNs */ -+ user_freergns(uctx); -+ -+ elan4_sdram_free (dev, uctx->uctx_trampoline, SDRAM_PAGE_SIZE); -+ -+ /* Clear the PG_Reserved bit before free to avoid a memory leak */ -+ ClearPageReserved(pte_page(*find_pte_kernel((unsigned long) uctx->uctx_upage))); -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ kcondvar_destroy (&uctx->uctx_wait); -+ -+ spin_lock_destroy (&uctx->uctx_rgnlock); -+ spin_lock_destroy (&uctx->uctx_spinlock); -+ -+ kmutex_destroy (&uctx->uctx_rgnmutex); -+ kmutex_destroy (&uctx->uctx_cqlock); -+ kmutex_destroy (&uctx->uctx_vpseg_lock); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/user_ddcq.c linux-2.6.9/drivers/net/qsnet/elan4/user_ddcq.c ---- clean/drivers/net/qsnet/elan4/user_ddcq.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/user_ddcq.c 2005-07-20 07:35:36.000000000 -0400 -@@ -0,0 +1,230 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user_ddcq.c,v 1.16.2.1 2005/07/20 11:35:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user_ddcq.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#if PAGE_SIZE < CQ_CommandMappingSize -+# define ELAN4_COMMAND_QUEUE_MAPPING PAGE_SIZE -+#else -+# define ELAN4_COMMAND_QUEUE_MAPPING CQ_CommandMappingSize -+#endif -+ -+/* The user device driver command queue is used for re-issuing -+ * trapped items. It is allocated as a 1K command queue, and -+ * we insert command flow writes event 256 bytes (32 dwords). -+ */ -+#define USER_CTRLFLOW_COUNT 32 -+ -+/* Flow control of the device driver command queue is handled by periodically -+ * inserting dword writes into the command stream. When you need to know -+ * that the queue has been flushed, then you insert an extra contorl flow -+ * write into the command queue. Should the queue not be flushed, but the -+ * trap handler be returning to user space, then it will also insert and -+ * extra interrupt command to ensure that it is re-entered after the queue -+ * has been flushed. -+ * -+ * Note - we account the space for the interrupt command on each control -+ * flow write so that we do not overflow the queue even if we end up -+ * inserting an interrupt for every command flow write. In general only -+ * a single interrupt should get inserted.... -+ */ -+ -+#define user_ddcq_command_write(value,off) do { \ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_write: cmdptr=%x off=%d value=%llx\n", cmdptr, off, value);\ -+ writeq(value, (void *)(cmdptr + (off << 3))); \ -+} while (0) -+ -+#define user_ddcq_command_space(uctx) \ -+ ((CQ_Size (uctx->uctx_ddcq->ucq_cq->cq_size)>>3) - ((uctx)->uctx_ddcq_insertcnt - (uctx)->uctx_upage->upage_ddcq_completed)) -+ -+#define user_ddcq_command_flow_write(uctx) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+\ -+ (uctx)->uctx_ddcq_completed = ((uctx)->uctx_ddcq_insertcnt += 3);\ -+\ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_command_flow_write: completed=%llx [%llx] addr=%llx\n", (uctx)->uctx_ddcq_completed, \ -+ (uctx)->uctx_upage->upage_ddcq_completed, (uctx)->uctx_upage_addr); \ -+ user_ddcq_command_write (GUARD_CMD | GUARD_ALL_CHANNELS, 0);\ -+ user_ddcq_command_write (WRITE_DWORD_CMD | (uctx)->uctx_upage_addr, 1);\ -+ user_ddcq_command_write ((uctx)->uctx_ddcq_completed, 2);\ -+} while (0) -+ -+#define user_ddcq_command_flow_intr(uctx) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+\ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_command_flow_intr: completed=%llx [%llx] addr=%llx\n", (uctx)->uctx_ddcq_completed, \ -+ (uctx)->uctx_upage->upage_ddcq_completed, (uctx)->uctx_upage_addr); \ -+ user_ddcq_command_write (INTERRUPT_CMD | ELAN4_INT_COOKIE_DDCQ, 3);\ -+} while (0) -+ -+#define user_ddcq_command_prologue(uctx, count) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_prologue: iptr=%llx cmdptr=%x\n", iptr, cmdptr); -+ -+#define user_ddcq_command_epilogue(uctx, count, extra) \ -+ (uctx)->uctx_ddcq_insertcnt = iptr + (count);\ -+\ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_epilogue: iptr=%llx + %x + %x - completed %llx\n", iptr, count, extra, (uctx)->uctx_ddcq_completed);\ -+ if (((iptr) + (count) + (extra)) > ((uctx)->uctx_ddcq_completed + USER_CTRLFLOW_COUNT))\ -+ user_ddcq_command_flow_write(uctx); \ -+} while (0) -+ -+int -+user_ddcq_check (USER_CTXT *uctx, unsigned num) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_check: insert=%llx completed=%llx [%llx] space=%d num=%d\n", -+ uctx->uctx_ddcq_insertcnt, uctx->uctx_ddcq_completed, -+ uctx->uctx_upage->upage_ddcq_completed, -+ user_ddcq_command_space (uctx), -+ num -+ ); -+ -+ /* Ensure that there is enough space for the command we want to issue, -+ * PLUS the guard/writeword for the control flow flush. -+ * PLUS the interrupt command for rescheduling */ -+ if (user_ddcq_command_space (uctx) > (num + 4)) -+ { -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_check: loads of space\n"); -+ -+ return (1); -+ } -+ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_check: not enough space - reschedule\n"); -+ -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+ return (0); -+} -+ -+int -+user_ddcq_flush (USER_CTXT *uctx) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CQ *ucq = uctx->uctx_ddcq; -+ -+ switch (ucq->ucq_state) -+ { -+ case UCQ_TRAPPED: -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: command queue is trapped\n"); -+ return (0); -+ -+ case UCQ_NEEDS_RESTART: -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: restarting command queue\n"); -+ -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ ucq->ucq_state = UCQ_RUNNING; -+ elan4_restartcq (dev, ucq->ucq_cq); -+ } -+ break; -+ } -+ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: insertcnt=%llx completed=%llx [%llx]\n", -+ uctx->uctx_ddcq_insertcnt, uctx->uctx_ddcq_completed, uctx->uctx_upage->upage_ddcq_completed); -+ -+ if (uctx->uctx_ddcq_completed != uctx->uctx_ddcq_insertcnt) -+ user_ddcq_command_flow_write (uctx); -+ -+ return (uctx->uctx_ddcq_completed == uctx->uctx_upage->upage_ddcq_completed); -+} -+ -+void -+user_ddcq_intr (USER_CTXT *uctx) -+{ -+ user_ddcq_command_flow_intr (uctx); -+} -+ -+void -+user_ddcq_run_dma (USER_CTXT *uctx, E4_DMA *dma) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_run_dma: cookie=%llx vproc=%llx\n", dma->dma_cookie, dma->dma_vproc); -+ -+ user_ddcq_command_prologue(uctx, 7) { -+ -+ user_ddcq_command_write ((dma->dma_typeSize & ~DMA_ContextMask) | RUN_DMA_CMD, 0); -+ user_ddcq_command_write (dma->dma_cookie, 1); -+ user_ddcq_command_write (dma->dma_vproc, 2); -+ user_ddcq_command_write (dma->dma_srcAddr, 3); -+ user_ddcq_command_write (dma->dma_dstAddr, 4); -+ user_ddcq_command_write (dma->dma_srcEvent, 5); -+ user_ddcq_command_write (dma->dma_dstEvent, 6); -+ -+ } user_ddcq_command_epilogue (uctx, 7, 0); -+} -+ -+void -+user_ddcq_run_thread (USER_CTXT *uctx, E4_ThreadRegs *regs) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_run_thread: PC=%llx SP=%llx\n", regs->Registers[0], regs->Registers[1]); -+ -+ user_ddcq_command_prologue(uctx, 7) { -+ -+ user_ddcq_command_write (regs->Registers[0] | RUN_THREAD_CMD, 0); -+ user_ddcq_command_write (regs->Registers[1], 1); -+ user_ddcq_command_write (regs->Registers[2], 2); -+ user_ddcq_command_write (regs->Registers[3], 3); -+ user_ddcq_command_write (regs->Registers[4], 4); -+ user_ddcq_command_write (regs->Registers[5], 5); -+ user_ddcq_command_write (regs->Registers[6], 6); -+ -+ } user_ddcq_command_epilogue (uctx, 7, 0); -+} -+ -+void -+user_ddcq_setevent (USER_CTXT *uctx, E4_Addr addr) -+{ -+ user_ddcq_command_prologue (uctx, 1) { -+ -+ user_ddcq_command_write (SET_EVENT_CMD | addr, 0); -+ -+ } user_ddcq_command_epilogue (uctx, 1, 0); -+} -+ -+void -+user_ddcq_seteventn (USER_CTXT *uctx, E4_Addr addr, E4_uint32 count) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_seteventn: addr=%llx count=%lx\n", addr, count); -+ -+ user_ddcq_command_prologue (uctx, 2) { -+ -+ user_ddcq_command_write (SET_EVENTN_CMD, 0); -+ user_ddcq_command_write (addr | count, 1); -+ -+ } user_ddcq_command_epilogue (uctx, 2, 0); -+} -+ -+void -+user_ddcq_waitevent (USER_CTXT *uctx, E4_Addr addr, E4_uint64 CountAndType, E4_uint64 Param0, E4_uint64 Param1) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_waitevent: addr=%llx CountAndType=%llx Param=%llx,%llx\n", addr, CountAndType, Param0, Param1); -+ -+ user_ddcq_command_prologue (uctx, 4) { -+ -+ user_ddcq_command_write (WAIT_EVENT_CMD | addr, 0); -+ user_ddcq_command_write (CountAndType, 1); -+ user_ddcq_command_write (Param0, 2); -+ user_ddcq_command_write (Param1, 3); -+ -+ } user_ddcq_command_epilogue (uctx, 4, 0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/elan4/user_Linux.c linux-2.6.9/drivers/net/qsnet/elan4/user_Linux.c ---- clean/drivers/net/qsnet/elan4/user_Linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/elan4/user_Linux.c 2005-06-09 10:46:55.000000000 -0400 -@@ -0,0 +1,349 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user_Linux.c,v 1.35.2.1 2005/06/09 14:46:55 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user_Linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+ -+#ifdef CONFIG_HUGETLB_PAGE -+#include -+#endif -+ -+#include -+#include -+#include -+ -+extern struct vm_operations_struct mem_vm_ops; -+extern struct vm_operations_struct user_vm_ops; -+extern int mem_pteload (struct vm_area_struct *vma, unsigned long pgoff, ELAN4_CTXT *ctxt, E4_Addr eaddr, int perm); -+extern int user_pteload (struct vm_area_struct *vma, unsigned long pgoff, ELAN4_CTXT *ctxt, E4_Addr eaddr, int perm); -+ -+static inline int -+user_load_page (USER_CTXT *uctx, struct vm_area_struct *vma, unsigned long maddr, E4_Addr eaddr, int perm, int writeable) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ struct page *page = NULL; -+ int i, res = 0; -+ -+ if (get_user_pages (current, current->mm, maddr, 1, writeable, 0, &page, NULL) == 1) -+ { -+ /* NOTE - the page can't be paged out since we've pinned it down. -+ * it also can't be munmap'd since we hold the mmap_sem */ -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_page: %lx %s page %p\n", maddr, writeable ? "writeable" : "readonly", page); -+ -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ if ((res = elan4mmu_pteload_page (&uctx->uctx_ctxt, 0, eaddr + i, page, perm)) < 0) -+ break; -+ -+ page_cache_release (page); -+ } -+ else -+ { -+ if (vma && vma->vm_ops == &mem_vm_ops) -+ res = mem_pteload (vma, maddr, &uctx->uctx_ctxt, eaddr, perm); -+ else if (vma && vma->vm_ops == &user_vm_ops) -+ res = user_pteload (vma, maddr, &uctx->uctx_ctxt, eaddr, perm); -+ else -+ res = -EINVAL; -+ } -+ -+ return res; -+} -+ -+int -+user_load_range (USER_CTXT *uctx, E4_Addr eaddr, unsigned long nbytes, E4_uint32 fsr) -+{ -+ struct mm_struct *mm = current->mm; -+ int writeable = (AT_Perm(fsr) == AT_PermLocalDataWrite || -+ AT_Perm(fsr) == AT_PermRemoteWrite || -+ AT_Perm(fsr) == AT_PermLocalEvent || -+ AT_Perm(fsr) == AT_PermRemoteEvent); -+ struct vm_area_struct *vma; -+ int perm; -+ unsigned long len; -+ unsigned long maddr; -+ int res = 0; -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ while (nbytes > 0) -+ { -+ USER_RGN *rgn = user_rgnat_elan (uctx, eaddr); -+ -+ if (rgn == NULL || ELAN4_INCOMPAT_ACCESS (rgn->rgn_perm, AT_Perm (fsr))) -+ { -+ PRINTF (uctx, DBG_FAULT, "user_load_range: eaddr=%llx -> %s\n", eaddr, rgn == NULL ? "no mapping" : "no permission"); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (rgn == NULL ? -EFAULT : -EPERM); -+ } -+ -+ if (writeable) -+ perm = rgn->rgn_perm; -+ else if (AT_Perm(fsr) == AT_PermExecute) -+ perm = PERM_LocRead | (rgn->rgn_perm & ~PERM_Mask); -+ else -+ perm = ELAN4_PERM_READONLY (rgn->rgn_perm & PERM_Mask) | (rgn->rgn_perm & ~PERM_Mask); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: rgn=%p [%llx.%lx.%x]\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len); -+ -+ len = ((rgn->rgn_ebase + rgn->rgn_len) - eaddr); -+ if (len > nbytes) -+ len = nbytes; -+ nbytes -= len; -+ -+ maddr = rgn->rgn_mbase + (eaddr - rgn->rgn_ebase); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: eaddr=%llx->%llx -> %lx->%lx len=%x perm=%x\n", eaddr, -+ eaddr + len, maddr, maddr + len, len, perm); -+ -+ down_read (&mm->mmap_sem); -+ while (len > 0) -+ { -+#if defined(conditional_schedule) -+ conditional_schedule(); -+#endif -+ if ((vma = find_vma_intersection (mm, maddr, maddr + PAGE_SIZE)) == NULL || -+ (writeable && !(vma->vm_flags & VM_WRITE))) -+ { -+ PRINTF (DBG_USER, DBG_FAULT, "user_load_range: %s %lx\n", vma ? "not writeble at" : "no vma for", maddr); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EFAULT); -+ } -+ -+ if ((res = user_load_page (uctx, vma, maddr, eaddr, perm, writeable)) < 0) -+ { -+ PRINTF (DBG_USER, DBG_FAULT, "user_load_range: failed to load page res=%d\n", res); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return res; -+ } -+ -+ eaddr += PAGE_SIZE; -+ maddr += PAGE_SIZE; -+ len -= PAGE_SIZE; -+ } -+ up_read (&mm->mmap_sem); -+ } -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: alldone\n"); -+ -+ return (0); -+} -+ -+void -+user_preload_main (USER_CTXT *uctx, virtaddr_t addr, unsigned long len) -+{ -+ virtaddr_t lim = addr + len - 1; -+ struct vm_area_struct *vma; -+ -+ down_read (¤t->mm->mmap_sem); -+ -+ if ((vma = find_vma (current->mm, addr)) != NULL) -+ { -+ do { -+ unsigned long start = vma->vm_start; -+ unsigned long end = vma->vm_end; -+ -+ if ((start-1) >= lim) -+ break; -+ -+ if (start < addr) start = addr; -+ if ((end-1) > lim) end = lim+1; -+ -+ if (vma->vm_flags & VM_IO) -+ continue; -+ -+ user_unload_main (uctx, start, end - start); -+ -+ if (get_user_pages (current, current->mm, start, (end - start)/PAGE_SIZE, -+ (vma->vm_flags & VM_WRITE) != 0, 0, NULL, NULL) > 0) -+ user_update_main (uctx, vma->vm_mm, vma, start, end - start); -+ -+ else if (vma->vm_ops == &mem_vm_ops) -+ user_update_main (uctx, vma->vm_mm, vma, start, end - start); -+ else if (vma->vm_ops == &user_vm_ops) -+ user_update_main (uctx, vma->vm_mm, vma, start, end - start); -+ -+ } while ((vma = find_vma (current->mm, vma->vm_end)) != NULL); -+ } -+ up_read (¤t->mm->mmap_sem); -+} -+ -+static void -+user_update_range (USER_CTXT *uctx, int tbl, struct mm_struct *mm, struct vm_area_struct *vma, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, int perm) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int roperm = ELAN4_PERM_READONLY(perm & PERM_Mask) | (perm & ~PERM_Mask); -+ int i, write; -+ pte_t *ptep; -+ struct page *page; -+ -+ if (vma && vma->vm_ops == &mem_vm_ops) -+ { -+ mem_pteload (vma, maddr, &uctx->uctx_ctxt, eaddr, perm); -+ return; -+ } -+ -+ if (vma && vma->vm_ops == &user_vm_ops) -+ { -+ user_pteload (vma, maddr, &uctx->uctx_ctxt, eaddr, perm); -+ return; -+ } -+ -+#ifdef CONFIG_HUGETLB_PAGE -+ /* If the kernel has hugetlb support compiled in, then -+ * we can't walk the pte's unless we know for sure that -+ * they're normal ptes. */ -+ -+ if (vma == NULL || is_vm_hugetlb_page (vma)) -+ return; -+#endif -+ -+ while (len > 0) -+ { -+ if ((ptep = find_pte_map (mm, maddr)) != NULL) -+ { -+ write = (pte_write(*ptep) && pte_dirty(*ptep)); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION (2, 5, 0) -+ page = pte_page (*ptep); -+ if (! VALID_PAGE (page)) -+ page = NULL; -+#else -+ { -+ unsigned long pfn; -+ -+ pfn = pte_pfn (*ptep); -+ page = pfn_valid (pfn) ? pfn_to_page (pfn) : NULL; -+ } -+#endif -+ pte_unmap (ptep); -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_range: maddr=%lx eaddr=%llx -> page %p %lx %s\n", maddr, eaddr, page, page_to_pfn (page), write ? "writeable" : "read-only"); -+ -+ if (page != NULL) -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[tbl])) -+ elan4mmu_pteload_page (&uctx->uctx_ctxt, tbl, eaddr + i, page, write ? perm : roperm); -+ } -+ -+ eaddr += PAGE_SIZE; -+ maddr += PAGE_SIZE; -+ len -= PAGE_SIZE; -+ } -+} -+ -+void -+user_update_main (USER_CTXT *uctx, struct mm_struct *mm, struct vm_area_struct *vma, virtaddr_t start, unsigned long len) -+{ -+ USER_RGN *rgn; -+ unsigned long ssize; -+ virtaddr_t end = start + len - 1; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: start=%lx end=%lx\n", start, end); -+ -+ for (rgn = user_findrgn_main (uctx, start, 0); rgn != NULL; rgn = rgn->rgn_mnext) -+ { -+ if (end < rgn->rgn_mbase) -+ break; -+ -+ if (start <= rgn->rgn_mbase && end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: whole %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len - 1); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, vma, rgn->rgn_mbase, rgn->rgn_ebase, rgn->rgn_len, rgn->rgn_perm); -+ } -+ else if (start <= rgn->rgn_mbase) -+ { -+ ssize = end - rgn->rgn_mbase + 1; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: start %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + ssize); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, vma, rgn->rgn_mbase, rgn->rgn_ebase, ssize, rgn->rgn_perm); -+ } -+ else if (end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ ssize = (rgn->rgn_mbase + rgn->rgn_len) - start; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: end %lx -> %lx\n", start, start + ssize); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, vma, start, rgn->rgn_ebase + (start - rgn->rgn_mbase), ssize, rgn->rgn_perm); -+ } -+ else -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: middle %lx -> %lx\n", start, end); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, vma, start, rgn->rgn_ebase + (start - rgn->rgn_mbase), len, rgn->rgn_perm); -+ } -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+} -+ -+void -+user_unload_main (USER_CTXT *uctx, virtaddr_t start, unsigned long len) -+{ -+ USER_RGN *rgn; -+ unsigned long ssize; -+ virtaddr_t end = start + len - 1; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: start=%lx end=%lx\n", start, end); -+ -+ for (rgn = user_findrgn_main (uctx, start, 0); rgn != NULL; rgn = rgn->rgn_mnext) -+ { -+ if (end < rgn->rgn_mbase) -+ break; -+ -+ if (start <= rgn->rgn_mbase && end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: whole %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len - 1); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase, rgn->rgn_len); -+ } -+ else if (start <= rgn->rgn_mbase) -+ { -+ ssize = end - rgn->rgn_mbase + 1; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: start %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + ssize); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase, ssize); -+ } -+ else if (end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ ssize = (rgn->rgn_mbase + rgn->rgn_len) - start; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: end %lx -> %lx\n", start, start + ssize); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase + (start - rgn->rgn_mbase), ssize); -+ } -+ else -+ { -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: middle %lx -> %lx\n", start, end); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase + (start - rgn->rgn_mbase), len); -+ } -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+} -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/asm_elan4_thread.S linux-2.6.9/drivers/net/qsnet/ep/asm_elan4_thread.S ---- clean/drivers/net/qsnet/ep/asm_elan4_thread.S 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/asm_elan4_thread.S 2003-09-23 09:55:11.000000000 -0400 -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: asm_elan4_thread.S,v 1.1 2003/09/23 13:55:11 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/asm_elan4_thread.S,v $*/ -+ -+#include -+#include -+ -+/* -+ * c_reschedule (E4_uint64 *commandport) -+ */ -+ .global c_reschedule -+c_reschedule: -+ add %sp, -128, %sp -+ st64 %r16, [%sp] // preserve call preserved registers -+ st64 %r24, [%sp + 64] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ mov NOP_CMD, %r23 // "nop" command -+ st64suspend %r16, [%r8] -+3: ld64 [%sp], %r16 -+ ld64 [%sp + 64], %r24 // restore call preserved register -+ jmpl %r2+8, %r0 // and return -+ add %sp, 128, %sp -+ -+ -+/* -+ * c_waitevent (E4_uint64 *commandport, E4_Event *event, E4_uint64 count) -+ */ -+ .global c_waitevent -+c_waitevent: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ st32 %r16, [%sp] // event source block -+ mov MAKE_EXT_CLEAN_CMD, %r23 // "flush command queue desc" command -+ st8 %r23, [%sp+56] // event source block -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r23,%r23 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ -+ or %r9, WAIT_EVENT_CMD, %r16 -+ sll8 %r10, 32, %r17 -+ or %r17, E4_EVENT_TYPE_VALUE(E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), %r17 -+ mov %sp, %r18 -+ mov %r8, %r19 -+ -+ st32suspend %r16, [%r8] -+ -+3: ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r2+8, %r0 // and return -+ add %sp, 192, %sp -+ -diff -urN clean/drivers/net/qsnet/ep/assym_elan4.h linux-2.6.9/drivers/net/qsnet/ep/assym_elan4.h ---- clean/drivers/net/qsnet/ep/assym_elan4.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/assym_elan4.h 2005-09-07 10:39:44.000000000 -0400 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: genassym_elan4.c,v 1.3 2004/04/25 11:26:07 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/genassym_elan4.c,v $*/ -+ -+/* Generated by genassym_elan4 - do not modify */ -+ -+#define EP4_RCVR_THREAD_STALL 0 -+#define EP4_RCVR_PENDING_TAILP 128 -+#define EP4_RCVR_PENDING_HEAD 136 -+#define EP4_RCVR_DEBUG 176 -+#define EP4_RXD_NEXT 664 -+#define EP4_RXD_QUEUED 728 -+#define EP4_RXD_DEBUG 944 -diff -urN clean/drivers/net/qsnet/ep/cm.c linux-2.6.9/drivers/net/qsnet/ep/cm.c ---- clean/drivers/net/qsnet/ep/cm.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/cm.c 2005-05-24 05:19:12.000000000 -0400 -@@ -0,0 +1,2835 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cm.c,v 1.90 2005/05/24 09:19:12 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "debug.h" -+#include "cm.h" -+#include -+ -+#include -+ -+#if defined(LINUX) -+#include "conf_linux.h" -+#endif -+ -+int BranchingRatios[CM_MAX_LEVELS]; -+ -+int MachineId = -1; -+int BrokenLevel = -1; /* Simulates Broken Network */ -+int RejoinCheck = 1; -+int RejoinPanic = 0; -+ -+static int -+SegmentNo (CM_RAIL *cmRail, u_int nodeid, u_int lvl) -+{ -+ int i; -+ -+ ASSERT (lvl < cmRail->NumLevels); -+ -+ for (i = 0; i < lvl; i++) -+ nodeid /= cmRail->Levels[i].NumSegs; -+ -+ return (nodeid % cmRail->Levels[lvl].NumSegs); -+} -+ -+static int -+ClusterIds (CM_RAIL *cmRail, int clvl, int *clmin, int *clmax) -+{ -+ int clid = cmRail->Rail->Position.pos_nodeid - cmRail->Levels[clvl].MinNodeId; -+ -+ if (clvl == 0) -+ *clmin = *clmax = clid; -+ else -+ { -+ *clmin = cmRail->Levels[clvl - 1].MinNodeId - cmRail->Levels[clvl].MinNodeId; -+ *clmax = *clmin + cmRail->Levels[clvl - 1].NumNodes - 1; -+ } -+ return (clid); -+} -+ -+static void -+__Schedule_Timer (CM_RAIL *cmRail, long tick) -+{ -+ if (! timer_pending (&cmRail->HeartbeatTimer) || AFTER (cmRail->NextRunTime, tick)) -+ { -+ cmRail->NextRunTime = tick; -+ -+ mod_timer (&cmRail->HeartbeatTimer, tick); -+ } -+} -+ -+static void -+__Schedule_Discovery (CM_RAIL *cmRail) /* we urgently need to schedule discovery */ -+{ -+ __Schedule_Timer (cmRail, cmRail->NextDiscoverTime = lbolt); -+} -+ -+static int -+MsgBusy (CM_RAIL *cmRail, int msgNumber) -+{ -+ switch (ep_outputq_state (cmRail->Rail, cmRail->MsgQueue, msgNumber)) -+ { -+ case EP_OUTPUTQ_BUSY: /* still busy */ -+ return 1; -+ -+ case EP_OUTPUTQ_FAILED: /* NACKed */ -+ { -+#if defined(DEBUG_PRINTF) -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, msgNumber); -+ uint8_t type = msg->Hdr.Type; -+ uint16_t nmaps = msg->Hdr.NumMaps; -+ int16_t off = msg->Payload.Statemaps[CM_MSG_MAP(0)].offset; -+ -+ CPRINTF4 (((type == CM_MSG_TYPE_DISCOVER_LEADER) || (type == CM_MSG_TYPE_DISCOVER_SUBORDINATE)) ? 6 : 3, /* we expect broadcasts to be NACKed */ -+ "%s: msg %d type %d failed%s\n", cmRail->Rail->Name, msgNumber, type, -+ (type != CM_MSG_TYPE_HEARTBEAT) ? "" : nmaps == 0 ? ": null heartbeat" : -+ off == STATEMAP_RESET ? ": heartbeat with R statemaps" : ": heartbeat with statemaps"); -+#endif -+ return 0; -+ } -+ -+ case EP_OUTPUTQ_FINISHED: -+ return 0; -+ -+ default: -+ panic ("MsgBusy - bad return code from ep_outputq_state\n"); -+ /* NOTREACHED */ -+ } -+ return 0; -+} -+ -+static void -+LaunchMessage (CM_RAIL *cmRail, int msgNumber, int vp, int qnum, int retries, int type, int lvl, int nmaps) -+{ -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, msgNumber); -+ CM_HDR *hdr = &msg->Hdr; -+ -+ ASSERT (nmaps >= 0 && nmaps <= CM_MSG_MAXMAPS); -+ ASSERT (SPINLOCK_HELD (&cmRail->Lock)); -+ -+ hdr->Version = CM_MSG_VERSION; -+ hdr->ParamHash = cmRail->ParamHash; -+ hdr->Timestamp = cmRail->Timestamp; -+ hdr->Checksum = 0; -+ hdr->NodeId = cmRail->Rail->Position.pos_nodeid; -+ hdr->MachineId = MachineId; -+ hdr->NumMaps = nmaps; -+ hdr->Level = lvl; -+ hdr->Type = type; -+ hdr->Checksum = CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)); -+ -+ if (BrokenLevel != -1 && (lvl >= ((BrokenLevel >> (cmRail->Rail->Number*4)) & 0xf))) /* Simulate broken network? */ -+ return; -+ -+ if (ep_outputq_send (cmRail->Rail, cmRail->MsgQueue, msgNumber, -+ CM_MSG_SIZE(nmaps), vp, qnum, retries)); -+ IncrStat (cmRail, LaunchMessageFail); -+} -+ -+static int -+SendMessage (CM_RAIL *cmRail, int nodeId, int lvl, int type) -+{ -+ int msgNumber = CM_NUM_NODE_MSG_BUFFERS + cmRail->NextSpareMsg; -+ int n = CM_NUM_SPARE_MSG_BUFFERS; -+ int retries; -+ -+ ASSERT (type == CM_MSG_TYPE_IMCOMING || /* other types must use SendToSgmt */ -+ type == CM_MSG_TYPE_REJOIN); -+ -+ while (n-- > 0 && MsgBusy (cmRail, msgNumber)) /* search for idle "spare" buffer */ -+ { -+ if (++(cmRail->NextSpareMsg) == CM_NUM_SPARE_MSG_BUFFERS) -+ cmRail->NextSpareMsg = 0; -+ -+ msgNumber = CM_NUM_NODE_MSG_BUFFERS + cmRail->NextSpareMsg; -+ } -+ -+ if (n == 0) /* all "spare" message buffers busy */ -+ { -+ CPRINTF3 (3, "%s: all spare message buffers busy: trying to send type %d to %d\n", -+ cmRail->Rail->Name, type, nodeId); -+ return (0); -+ } -+ -+ /* NB IMCOMING may be echoed by MANY nodes, so we don't (and musn't) have any retries */ -+ retries = (type == CM_MSG_TYPE_IMCOMING) ? 0 : CM_P2P_DMA_RETRIES; -+ -+ LaunchMessage (cmRail, msgNumber, EP_VP_NODE (nodeId), EP_SYSTEMQ_INTR, /* eager receive */ -+ retries, type, lvl, 0); -+ -+ if (++(cmRail->NextSpareMsg) == CM_NUM_SPARE_MSG_BUFFERS) /* check this one last next time */ -+ cmRail->NextSpareMsg = 0; -+ -+ return (1); -+} -+ -+static int -+SendToSgmt (CM_RAIL *cmRail, CM_SGMT *sgmt, int type) -+{ -+ bitmap_t seg; -+ int offset; -+ int nmaps; -+ int sidx; -+ int clvl; -+ -+ ASSERT (sgmt->Level <= cmRail->TopLevel); -+ -+ if (MsgBusy (cmRail, sgmt->MsgNumber)) /* previous message still busy */ -+ { -+ CPRINTF3 (3, "%s: node message buffer busy: trying to send type %d to %d\n", -+ cmRail->Rail->Name, type, sgmt->NodeId); -+ -+ return (0); -+ } -+ -+ switch (type) -+ { -+ case CM_MSG_TYPE_RESOLVE_LEADER: -+ case CM_MSG_TYPE_DISCOVER_LEADER: -+ ASSERT (sgmt->State == CM_SGMT_ABSENT); -+ ASSERT (sgmt->Level == ((cmRail->Role == CM_ROLE_LEADER_CANDIDATE) ? cmRail->TopLevel : cmRail->TopLevel - 1)); -+ ASSERT (sgmt->Level < cmRail->NumLevels); -+ ASSERT (sgmt->Sgmt == cmRail->Levels[sgmt->Level].MySgmt); -+ -+ /* broadcast to me and all my peers at this level (== my segment in the level above) */ -+ sidx = (sgmt->Level == cmRail->NumLevels - 1) ? 0 : cmRail->Levels[sgmt->Level + 1].MySgmt; -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_BCAST (sgmt->Level + 1, sidx), -+ EP_SYSTEMQ_INTR, 0, /* eager rx; no retries */ -+ type, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_DISCOVER_SUBORDINATE: -+ ASSERT (sgmt->Sgmt != cmRail->Levels[sgmt->Level].MySgmt); -+ ASSERT (sgmt->State == CM_SGMT_WAITING); -+ ASSERT (sgmt->Level > 0); /* broadcasting just to subtree */ -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_BCAST (sgmt->Level, sgmt->Sgmt), -+ EP_SYSTEMQ_INTR, 0, /* eager rx; no retries */ -+ CM_MSG_TYPE_DISCOVER_SUBORDINATE, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_NOTIFY: -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_NODE (sgmt->NodeId), -+ EP_SYSTEMQ_INTR, CM_P2P_DMA_RETRIES, /* eager rx; lots of retries */ -+ CM_MSG_TYPE_NOTIFY, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_HEARTBEAT: -+ { -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, sgmt->MsgNumber); -+ CM_HDR *hdr = &msg->Hdr; -+ -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ -+ hdr->AckSeq = sgmt->AckSeq; -+ -+ if (!sgmt->MsgAcked) /* Current message not acknowledged */ -+ { -+ /* must have been something significant to require an ack */ -+ ASSERT (sgmt->SendMaps); -+ ASSERT (sgmt->NumMaps > 0); -+ -+ CPRINTF3 (3, "%s: retrying heartbeat to %d (%d entries)\n", cmRail->Rail->Name, sgmt->NodeId, sgmt->NumMaps); -+ -+ IncrStat (cmRail, RetryHeartbeat); -+ -+ nmaps = sgmt->NumMaps; -+ } -+ else -+ { -+ nmaps = 0; -+ -+ if (sgmt->SendMaps) /* can send maps */ -+ { -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (!sgmt->Maps[clvl].OutputMapValid) -+ continue; -+ -+ while ((offset = statemap_findchange (sgmt->Maps[clvl].OutputMap, &seg, 1)) >= 0) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(nmaps)]; -+ -+ sgmt->Maps[clvl].SentChanges = 1; -+ -+ map->level = clvl; -+ map->offset = offset; -+ map->seg[0] = seg & 0xffff; -+ map->seg[1] = (seg >> 16) & 0xffff; -+#if (BT_ULSHIFT == 6) -+ map->seg[2] = (seg >> 32) & 0xffff; -+ map->seg[3] = (seg >> 48) & 0xffff; -+#elif (BT_ULSHIFT != 5) -+#error "Bad value for BT_ULSHIFT" -+#endif -+ if (++nmaps == CM_MSG_MAXMAPS) -+ goto msg_full; -+ } -+ -+ if (sgmt->Maps[clvl].SentChanges) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(nmaps)]; -+ -+ sgmt->Maps[clvl].SentChanges = 0; -+ -+ map->level = clvl; -+ map->offset = STATEMAP_NOMORECHANGES; -+ -+ if (++nmaps == CM_MSG_MAXMAPS) -+ goto msg_full; -+ } -+ } -+ } -+ -+ ASSERT (nmaps < CM_MSG_MAXMAPS); -+ -+ msg_full: -+ sgmt->NumMaps = nmaps; /* remember how many incase we retry */ -+ -+ if (nmaps == 0) /* no changes to send */ -+ hdr->Seq = sgmt->MsgSeq; /* this one can be dropped */ -+ else -+ { -+ hdr->Seq = ++(sgmt->MsgSeq); /* on to next message number */ -+ sgmt->MsgAcked = 0; /* need this one to be acked before I can send another */ -+ -+ IncrStat (cmRail, MapChangesSent); -+ } -+ } -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_NODE (sgmt->NodeId), -+ EP_SYSTEMQ_POLLED, CM_P2P_DMA_RETRIES, /* polled receive, lots of retries */ -+ CM_MSG_TYPE_HEARTBEAT, sgmt->Level, nmaps); -+ -+ IncrStat (cmRail, HeartbeatsSent); -+ -+ return (1); -+ } -+ -+ default: /* other types must use SendMessage */ -+ printk ("SendToSgmt: invalid type %d\n", type); -+ ASSERT (0); -+ -+ return (1); -+ } -+} -+ -+static char * -+GlobalStatusString (statemap_t *map, int idx) -+{ -+ char *strings[] = {"....", "S...", "C...", "R...", -+ ".s..", "Ss..", "Cs..", "Rs..", -+ "..r.", "S.r.", "C.r.", "R.r.", -+ ".sr.", "Ssr.", "Csr.", "Rsr.", -+ "...R", "S..R", "C..R", "R..R", -+ ".s.R", "Ss.R", "Cs.R", "Rs.R", -+ "..rR", "S.rR", "C.rR", "R.rR", -+ ".srR", "SsrR", "CsrR", "RsrR"}; -+ -+ return (strings[statemap_getbits (map, idx * CM_GSTATUS_BITS, CM_GSTATUS_BITS)]); -+} -+ -+static char * -+MapString (char *name, statemap_t *map, int nnodes, char *trailer) -+{ -+ static char *space; -+ int i; -+ -+ if (space == NULL) -+ KMEM_ALLOC (space, char *, EP_MAX_NODES*(CM_GSTATUS_BITS+1), 0); -+ -+ if (space == NULL) -+ return (""); -+ else -+ { -+ char *ptr = space; -+ -+ sprintf (space, "%s ", name); ptr += strlen (ptr); -+ for (i = 0; i < nnodes; i++, ptr += strlen (ptr)) -+ sprintf (ptr, "%s%s", i == 0 ? "" : ",", GlobalStatusString (map, i)); -+ sprintf (ptr, " %s", trailer); -+ return (space); -+ } -+} -+ -+void -+DisplayMap (DisplayInfo *di, CM_RAIL *cmRail, char *name, statemap_t *map, int nnodes, char *trailer) -+{ -+ char linebuf[256]; -+ char *ptr = linebuf; -+ int i; -+ -+#define NODES_PER_LINE 32 -+ for (i = 0; i < nnodes; i++) -+ { -+ if (ptr == linebuf) -+ { -+ sprintf (ptr, "%4d", i); -+ ptr += strlen (ptr); -+ } -+ -+ sprintf (ptr, ",%s", GlobalStatusString (map, i)); -+ ptr += strlen (ptr); -+ -+ if ((i % NODES_PER_LINE) == (NODES_PER_LINE-1) || (i == (nnodes-1))) -+ { -+ (di->func)(di->arg, "%s: %s %s %s\n", cmRail->Rail->Name, name, linebuf, trailer); -+ ptr = linebuf; -+ } -+ } -+#undef NODES_PER_LINE -+} -+ -+void -+DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail) -+{ -+ int lvl; -+ int clvl; -+ char mapname[128]; -+ -+ (di->func)(di->arg, "%s: Node %d maps...\n", cmRail->Rail->Name, cmRail->Rail->Position.pos_nodeid); -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ int nnodes = cmRail->Levels[clvl].NumNodes; -+ -+ (di->func)(di->arg, "%s: Cluster level %d: Connected %ld - %s%s\n", -+ cmRail->Rail->Name, clvl, cmRail->Levels[clvl].Connected, -+ cmRail->Levels[clvl].Online ? "Online" : "Offline", -+ cmRail->Levels[clvl].Restarting ? ", Restarting" : ""); -+ -+ for (lvl = 0; lvl < cmRail->TopLevel && lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ sprintf (mapname, "%10s%2d", "Level", lvl); -+ DisplayMap (di, cmRail, mapname, level->SubordinateMap[clvl], nnodes, -+ level->SubordinateMapValid[clvl] ? "" : "(invalid)"); -+ } -+ -+ sprintf (mapname, "%12s", "Local"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].LocalMap, nnodes, ""); -+ -+ sprintf (mapname, "%12s", "Subtree"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].SubTreeMap, nnodes, -+ cmRail->Levels[clvl].SubTreeMapValid ? "" : "(invalid)"); -+ -+ sprintf (mapname, "%12s", "Global"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].GlobalMap, nnodes, -+ cmRail->Levels[clvl].GlobalMapValid ? "" : "(invalid)"); -+ -+ sprintf (mapname, "%12s", "LastGlobal"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].LastGlobalMap, nnodes, ""); -+ } -+} -+ -+void -+DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail) -+{ -+ int lvl; -+ int sidx; -+ -+ (di->func)(di->arg, "%s: Node %d segments...\n", cmRail->Rail->Name, cmRail->NodeId); -+ -+ for (lvl = 0; lvl <= cmRail->TopLevel && lvl < cmRail->NumLevels; lvl++) -+ { -+ (di->func)(di->arg, " level %d: ", lvl); -+ -+ for (sidx = 0; sidx < ((lvl == cmRail->TopLevel) ? 1 : cmRail->Levels[lvl].NumSegs); sidx++) -+ { -+ CM_SGMT *sgmt = &cmRail->Levels[lvl].Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ (di->func)(di->arg, "[%d, in: %d out: %d %s%s]", -+ sgmt->NodeId, -+ sgmt->AckSeq, -+ sgmt->MsgSeq, -+ sgmt->MsgAcked ? "A" : "-", -+ sgmt->SendMaps ? "!" : "-"); -+ else -+ (di->func)(di->arg, "[%s]", (sgmt->State == CM_SGMT_ABSENT ? "absent" : -+ sgmt->State == CM_SGMT_WAITING ? "waiting" : -+ sgmt->State == CM_SGMT_COMING ? "coming" : "UNKNOWN")); -+ } -+ (di->func)(di->arg, "\n"); -+ } -+} -+ -+ -+static void -+StartConnecting (CM_RAIL *cmRail, CM_SGMT *sgmt, int NodeId, int Timestamp) -+{ -+ int clvl; -+ -+ CPRINTF4 (2, "%s: lvl %d subtree %d node %d -> connecting\n", cmRail->Rail->Name, sgmt->Level, sgmt->Sgmt, NodeId); -+ -+ /* Only reconnect the same guy if he was reborn */ -+ ASSERT (sgmt->State != CM_SGMT_PRESENT || -+ (sgmt->NodeId == NodeId && sgmt->Timestamp != Timestamp)); -+ -+ /* After we've connected to a new peer, we wait to receive -+ * STATEMAP_RESET before we accumulate changes and we wait for a -+ * complete map to be received before we propagate changes to other -+ * nodes. -+ * -+ * If I'm the subordinate, I can start sending maps right away, since -+ * the leader is ready for them already. If I'm the leader, I hold off -+ * sending maps until I've seen the subordinate's first heartbeat, -+ * because the subordinate might miss my NOTIFY message, still think -+ * she's a leader candidate and ignore my heartbeats. -+ */ -+ sgmt->SendMaps = (sgmt->Level == cmRail->TopLevel); /* I can send maps to my leader (she NOTIFIED me) */ -+ -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_reset (sgmt->Maps[clvl].CurrentInputMap); -+ statemap_reset (sgmt->Maps[clvl].InputMap); -+ statemap_reset (sgmt->Maps[clvl].OutputMap); -+ -+ sgmt->Maps[clvl].InputMapValid = 0; -+ sgmt->Maps[clvl].OutputMapValid = 0; -+ sgmt->Maps[clvl].SentChanges = 0; -+ -+ if (sgmt->Level == cmRail->TopLevel) /* connection to leader */ -+ { -+ ASSERT (sgmt->Sgmt == 0); -+ ASSERT (cmRail->Role == CM_ROLE_SUBORDINATE); -+ -+ if (cmRail->Levels[clvl].SubTreeMapValid) /* already got a subtree map to send up */ -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].SubTreeMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ } -+ } -+ else /* connection to subordinate */ -+ { -+ ASSERT (sgmt->Sgmt != cmRail->Levels[sgmt->Level].MySgmt); -+ -+ if (cmRail->Levels[clvl].GlobalMapValid) /* already got a global map to broadcast */ -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].GlobalMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ } -+ } -+ } -+ -+ /* Initialise sequence counters */ -+ sgmt->MsgSeq = sgmt->AckSeq = 0; -+ sgmt->MsgAcked = 1; /* ready to send a new sequenced message */ -+ -+ sgmt->State = CM_SGMT_PRESENT; -+ sgmt->NodeId = NodeId; -+ sgmt->UpdateTick = lbolt; -+ sgmt->Timestamp = Timestamp; -+} -+ -+static void -+StartSubTreeDiscovery (CM_RAIL *cmRail, CM_SGMT *sgmt) -+{ -+ sgmt->State = CM_SGMT_WAITING; -+ sgmt->UpdateTick = lbolt; -+ sgmt->WaitingTick = lbolt; -+ -+ if (sgmt->Level > 0) -+ __Schedule_Discovery (cmRail); -+} -+ -+void -+StartSubordinateDiscovery (CM_RAIL *cmRail) -+{ -+ int i; -+ int lvl = cmRail->TopLevel - 1; -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ ASSERT (lvl >= 0 && lvl < cmRail->NumLevels); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ if (i != level->MySgmt) /* No-one should connect here */ -+ StartSubTreeDiscovery (cmRail, sgmt); -+ } -+} -+ -+void -+StartLeaderDiscovery (CM_RAIL *cmRail) -+{ -+ int i; -+ int clvl; -+ CM_LEVEL *level = &cmRail->Levels[cmRail->TopLevel]; -+ -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ for (clvl = cmRail->TopLevel; clvl < cmRail->NumLevels; clvl++) -+ { -+ cmRail->Levels[clvl].GlobalMapValid = 0; -+ cmRail->Levels[clvl].SubTreeMapValid = 0; -+ level->SubordinateMapValid[clvl] = 0; -+ } -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ } -+ -+ cmRail->DiscoverStartTick = lbolt; -+ cmRail->Role = CM_ROLE_LEADER_CANDIDATE; -+ -+ __Schedule_Discovery (cmRail); -+} -+ -+static void -+RaiseTopLevel (CM_RAIL *cmRail) -+{ -+ ASSERT (cmRail->NumLevels != 0); -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ CPRINTF2 (2, "%s: RaiseTopLevel %d\n", cmRail->Rail->Name, cmRail->TopLevel + 1); -+ -+ if (++cmRail->TopLevel == cmRail->NumLevels) /* whole machine leader? */ -+ cmRail->Role = CM_ROLE_LEADER; -+ else -+ StartLeaderDiscovery (cmRail); /* look for my leader */ -+ -+ StartSubordinateDiscovery (cmRail); /* and any direct subordinates */ -+} -+ -+static void -+LowerTopLevel (CM_RAIL *cmRail, int lvl) -+{ -+ ASSERT (cmRail->NumLevels != 0); -+ ASSERT (lvl < cmRail->NumLevels); -+ -+ CPRINTF2 (2, "%s: LowerTopLevel %d\n", cmRail->Rail->Name, lvl); -+ -+ if (lvl == 0) -+ cmRail->Timestamp = lbolt; -+ -+ cmRail->TopLevel = lvl; -+ -+ StartLeaderDiscovery (cmRail); /* look for my leader */ -+} -+ -+static int -+IShouldLead (CM_RAIL *cmRail, CM_MSG *msg) -+{ -+ /* NB, this function MUST be consistently calculated on any nodes, just -+ * from the info supplied in the message. Otherwise leadership -+ * arbitration during concurrent discovery will fail. -+ */ -+ return (cmRail->NodeId < msg->Hdr.NodeId); -+} -+ -+static int -+SumCheck (CM_MSG *msg) -+{ -+ CM_HDR *hdr = &msg->Hdr; -+ uint16_t sum = hdr->Checksum; -+ uint16_t nmaps = hdr->NumMaps; -+ -+ if (nmaps > CM_MSG_MAXMAPS) { -+ printk ("SumCheck: nmaps %d > CM_MSG_MAXMAPS\n", nmaps); -+ return 0; -+ } -+ -+ if ((hdr->Type != CM_MSG_TYPE_HEARTBEAT) && nmaps != 0) { -+ printk ("SumCheck: type(%d) not HEARTBEAT and nmaps(%d) != 0\n", hdr->Type, nmaps); -+ return 0; -+ } -+ -+ hdr->Checksum = 0; -+ -+ if (CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)) != sum) { -+ printk ("SumCheck: checksum failed %x %x\n", CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)), sum); -+ -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static void -+ProcessMessage (EP_RAIL *rail, void *arg, void *msgbuf) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ CM_MSG *msg = (CM_MSG *) msgbuf; -+ CM_HDR *hdr = &msg->Hdr; -+ int lvl; -+ int sidx; -+ CM_LEVEL *level; -+ CM_SGMT *sgmt; -+ bitmap_t seg; -+ int i; -+ int delay; -+ static long tlast; -+ static int count; -+ -+ /* Poll the message Version field until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; hdr->Version == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ /* Display a message every 60 seconds if we see an "old" format message */ -+ if (hdr->Version == EP_SYSTEMQ_UNRECEIVED && (((lbolt - tlast) > 60*HZ) ? (count = 0) : ++count) < 1) -+ { -+ printk ("%s: received old protocol message (type %d from node %d)\n", cmRail->Rail->Name, -+ ((uint8_t *) msg)[20], ((uint16_t *) msg)[4]); -+ -+ tlast = lbolt; -+ goto finished; -+ } -+ -+ if (hdr->Version != CM_MSG_VERSION || hdr->ParamHash != cmRail->ParamHash || hdr->MachineId != MachineId) -+ { -+ CPRINTF8 (1, "%s: invalid message : Version %08x (%08x) ParamHash %08x (%08x) MachineId %04x (%04x) Nodeid %d\n", cmRail->Rail->Name, -+ hdr->Version, CM_MSG_VERSION, hdr->ParamHash, cmRail->ParamHash, hdr->MachineId, MachineId, hdr->NodeId); -+ goto finished; -+ } -+ -+ if (!SumCheck (msg)) -+ { -+ printk ("%s: checksum failed on msg from %d?\n", cmRail->Rail->Name, hdr->NodeId); -+ goto finished; -+ } -+ -+ if (hdr->NodeId == cmRail->NodeId) /* ignore my own broadcast */ -+ { -+ CPRINTF3 (6, "%s: node %d type %d: ignored (MESSAGE FROM ME)\n", -+ cmRail->Rail->Name, hdr->NodeId, hdr->Type); -+ -+ if (hdr->Type != CM_MSG_TYPE_DISCOVER_LEADER && hdr->Type != CM_MSG_TYPE_RESOLVE_LEADER) -+ printk ("%s: node %d type %d: ignored (MESSAGE FROM ME)\n", -+ cmRail->Rail->Name, hdr->NodeId, hdr->Type); -+ goto finished; -+ } -+ -+ lvl = hdr->Level; -+ level = &cmRail->Levels[lvl]; -+ -+ if (BrokenLevel != -1 && (lvl >= ((BrokenLevel >> (cmRail->Rail->Number*4)) & 0xf))) /* Simulate broken network? */ -+ goto finished; -+ -+ if (lvl >= cmRail->NumLevels || /* from outer space */ -+ hdr->NodeId < level->MinNodeId || /* from outside this level's subtree */ -+ hdr->NodeId >= level->MinNodeId + level->NumNodes) -+ { -+ printk ("%s: lvl %d node %d type %d: ignored (%s)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId, hdr->Type, -+ lvl >= cmRail->NumLevels ? "level too big for machine" : "outside subtree"); -+ goto finished; -+ } -+ -+ sidx = SegmentNo (cmRail, hdr->NodeId, lvl); -+ sgmt = &level->Sgmts[sidx]; -+ -+ switch (hdr->Type) -+ { -+ case CM_MSG_TYPE_RESOLVE_LEADER: -+ if (lvl >= cmRail->TopLevel) -+ { -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d RESOLVE_LEADER: ignored (above my level)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ /* someone else thinks they lead at the same level as me */ -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d RESOLVE_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d RESOLVE_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ -+ SendMessage (cmRail, hdr->NodeId, lvl, CM_MSG_TYPE_REJOIN); -+ break; -+ -+ case CM_MSG_TYPE_DISCOVER_LEADER: -+ if (lvl > cmRail->TopLevel) -+ { -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: ignored (above my level)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ if (sidx == level->MySgmt) /* someone I led thinks they lead some of my subtrees */ -+ { -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d DISCOVER_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ if (lvl < cmRail->TopLevel) /* I'm the leader of this level */ -+ { -+ if (sgmt->State == CM_SGMT_PRESENT && /* someone thinks someone I lead is dead */ -+ sgmt->NodeId != hdr->NodeId) -+ { -+ /* My subordinate's death could be noticed by one of her peers -+ * before I do. If she _is_ dead, I'll notice before long and -+ * NOTIFY this discover. If this discover completes before I -+ * detect my subordinate's death, the discovering node will -+ * try to take over from me, and then I'll RESET her. -+ */ -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: ignored (got established subordinate)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ return; -+ } -+ -+ if (sgmt->State != CM_SGMT_PRESENT || /* New connection */ -+ sgmt->Timestamp != hdr->Timestamp) /* new incarnation */ -+ StartConnecting (cmRail, sgmt, hdr->NodeId, hdr->Timestamp); -+ -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: !NOTIFY)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_NOTIFY); -+ break; -+ } -+ -+ ASSERT (lvl == cmRail->TopLevel); -+ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ /* I think my leader is alive, in which case she'll NOTIFY this -+ * DISCOVER. If she's dead, I'll start to become a leader -+ * candidate and handle this appropriately. -+ */ -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: ignored (I'm a subordinate)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ ASSERT (cmRail->Role == CM_ROLE_LEADER_CANDIDATE); -+ -+ /* A peer at this level is bidding for leadership along with me */ -+ if (IShouldLead (cmRail, msg)) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: but I should lead\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ /* So there _is_ someone there; She'll be seeing my DISCOVER -+ * messages and extending her discovery period, so that when I -+ * become leader, I'll NOTIFY her. In the meantime I'll flag her -+ * activity, so she remains WAITING. -+ */ -+ sgmt->UpdateTick = lbolt; -+ break; -+ } -+ -+ /* Defer to sender... */ -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: delaying me becoming leader\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ StartLeaderDiscovery (cmRail); -+ break; -+ -+ case CM_MSG_TYPE_DISCOVER_SUBORDINATE: -+ if (lvl <= cmRail->TopLevel) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (from my subtree)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (I'm not looking for a leader)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (hdr->Level > cmRail->BroadcastLevel && AFTER (lbolt, cmRail->BroadcastLevelTick + EP_WITHDRAW_TIMEOUT)) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (broadcast level too low)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ CPRINTF3 (2, "%s: lvl %d node %d DISCOVER_SUBORDINATE: !IMCOMING\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_IMCOMING); -+ break; -+ -+ case CM_MSG_TYPE_IMCOMING: -+ if (lvl > cmRail->TopLevel || /* from peer or node above me */ -+ sgmt->State == CM_SGMT_PRESENT || /* already got a subtree */ -+ sgmt->State == CM_SGMT_ABSENT) /* already written off this subtree */ -+ { -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d IMCOMING: ignored\n", cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d IMCOMING: waiting...\n", cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ sgmt->State = CM_SGMT_COMING; -+ sgmt->UpdateTick = lbolt; -+ break; -+ -+ case CM_MSG_TYPE_NOTIFY: -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE || /* I'm not looking for a leader */ -+ lvl != cmRail->TopLevel) /* at this level */ -+ { -+ /* If this person really should be my leader, my existing leader -+ * will time out, and I'll discover this one. */ -+ CPRINTF4 (2, "%s: lvl %d node %d NOTIFY: ignored (%s)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId, -+ lvl < cmRail->TopLevel ? "already leader" : -+ lvl > cmRail->TopLevel ? "lvl too high" : "already subordinate"); -+ break; -+ } -+ -+ CPRINTF3 (2, "%s: lvl %d node %d NOTIFY: becoming subordinate\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ cmRail->Role = CM_ROLE_SUBORDINATE; /* Now I've found my level */ -+ StartConnecting (cmRail, &level->Sgmts[0], hdr->NodeId, hdr->Timestamp); -+ break; -+ -+ case CM_MSG_TYPE_HEARTBEAT: -+ if (lvl > cmRail->TopLevel) -+ { -+ CPRINTF3 (2, "%s: lvl %d node %d H/BEAT: ignored (lvl too high)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (lvl == cmRail->TopLevel) /* heartbeat from my leader */ -+ { -+ if (cmRail->Role == CM_ROLE_LEADER_CANDIDATE) /* but I've not got one */ -+ { -+ /* I'm probably a new incarnation of myself; I'll keep doing -+ * discovery until my previous existence's leader NOTIFY's me. -+ * If I was this node's leader, she'll time me out (I'm not -+ * sending heartbeats to her) and we'll fight it out for -+ * leadership. */ -+ CPRINTF3 (2, "%s: lvl %d node %d H/BEAT ignored (no leader)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ sidx = 0; -+ sgmt = &level->Sgmts[0]; -+ } -+ -+ if (sgmt->State != CM_SGMT_PRESENT || /* not fully connected with this guy */ -+ sgmt->NodeId != hdr->NodeId || /* someone else impersonating my peer */ -+ sgmt->Timestamp != hdr->Timestamp) /* new incarnation of my peer */ -+ { -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d H/BEAT: !REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d H/BEAT: !REJOIN %s\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, -+ sgmt->State != CM_SGMT_PRESENT ? "not present" : -+ sgmt->NodeId != hdr->NodeId ? "someone else" : "new incarnation"); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ if (!((hdr->Seq == sgmt->AckSeq) || /* NOT duplicate message or */ -+ (hdr->Seq == (CM_SEQ)(sgmt->AckSeq + 1))) || /* expected message */ -+ !((hdr->AckSeq == sgmt->MsgSeq) || /* NOT expected ack or */ -+ (hdr->AckSeq == (CM_SEQ)(sgmt->MsgSeq - 1)))) /* duplicate ack */ -+ { -+ CPRINTF9 (1, "%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (out-of-seq) M(%d,a%d) S%d,A%d\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ (int)hdr->Seq, (int)hdr->AckSeq, (int)sgmt->MsgSeq, (int)sgmt->AckSeq); -+ -+ printk ("%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (out-of-seq) M(%d,a%d) S%d,A%d\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ (int)hdr->Seq, (int)hdr->AckSeq, (int)sgmt->MsgSeq, (int)sgmt->AckSeq); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ IncrStat (cmRail, HeartbeatsRcvd); -+ -+ sgmt->UpdateTick = lbolt; -+ sgmt->SendMaps = 1; -+ -+ if (sgmt->MsgSeq == hdr->AckSeq) /* acking current message */ -+ sgmt->MsgAcked = 1; /* can send the next one */ -+ -+ if (hdr->Seq == sgmt->AckSeq) /* discard duplicate (or NULL heartbeat) */ -+ { -+ CPRINTF6 (6, "%s: lvl %d sidx %d node %d type %d: %s H/BEAT\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ hdr->NumMaps == 0 ? "null" : "duplicate"); -+ break; -+ } -+ -+ CPRINTF7 (6, "%s: lvl %d sidx %d node %d type %d: seq %d maps %d H/BEAT\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, hdr->Seq, hdr->NumMaps); -+ -+ sgmt->AckSeq = hdr->Seq; /* ready to receive next one */ -+ -+ for (i = 0; i < hdr->NumMaps; i++) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(i)]; -+ int clvl = map->level; -+ -+ if (clvl < 0) /* end of message */ -+ break; -+ -+ if (clvl < sgmt->Level) /* bad level */ -+ { -+ CPRINTF6 (1, "%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (bad clevel %d)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, clvl); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ goto finished; -+ } -+ -+ if (map->offset == STATEMAP_NOMORECHANGES) /* end of atomic changes */ -+ { -+ if (!sgmt->Maps[clvl].InputMapValid || /* not set InputMap yet */ -+ statemap_changed (sgmt->Maps[clvl].CurrentInputMap)) /* previously applied changes */ -+ { -+ CPRINTF3 (4, "%s: received new clvl %d map from %d\n", cmRail->Rail->Name, clvl, sgmt->NodeId); -+ -+ statemap_setmap (sgmt->Maps[clvl].InputMap, sgmt->Maps[clvl].CurrentInputMap); -+ sgmt->Maps[clvl].InputMapValid = 1; -+ -+ statemap_clearchanges (sgmt->Maps[clvl].CurrentInputMap); -+ } -+ continue; -+ } -+ -+ seg = ((bitmap_t)map->seg[0]) -+ | (((bitmap_t)map->seg[1]) << 16) -+#if (BT_ULSHIFT == 6) -+ | (((bitmap_t)map->seg[2]) << 32) -+ | (((bitmap_t)map->seg[3]) << 48) -+#elif (BT_ULSHIFT != 5) -+#error "Bad value for BT_ULSHIFT" -+#endif -+ ; -+ statemap_setseg (sgmt->Maps[clvl].CurrentInputMap, map->offset, seg); -+ } -+ break; -+ -+ case CM_MSG_TYPE_REJOIN: -+ CPRINTF5 (1, "%s: lvl %d sidx %d node %d type %d: REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type); -+ printk ("%s: lvl %d sidx %d node %d type %d: REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type); -+ -+ LowerTopLevel (cmRail, 0); -+ -+ IncrStat (cmRail, RejoinRequest); -+ break; -+ -+ default: -+ printk ("%s: lvl=%d unknown message type %d\n", cmRail->Rail->Name, lvl, hdr->Type); -+ break; -+ } -+ finished: -+ hdr->Version = EP_SYSTEMQ_UNRECEIVED; -+} -+ -+static void -+PollInputQueues (CM_RAIL *cmRail) -+{ -+ ep_poll_inputq (cmRail->Rail, cmRail->IntrQueue, 0, ProcessMessage, cmRail); -+ ep_poll_inputq (cmRail->Rail, cmRail->PolledQueue, 0, ProcessMessage, cmRail); -+} -+ -+static void -+IntrQueueCallback (EP_RAIL *rail, void *arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ unsigned long flags; -+ -+ /* If the lock is held, then don't bother spinning for it, -+ * since the messages will be received at this, or the -+ * next heartbeat */ -+ local_irq_save (flags); -+ if (spin_trylock (&cmRail->Lock)) -+ { -+ if (AFTER (lbolt, cmRail->NextRunTime + MSEC2TICKS(CM_TIMER_SCHEDULE_TIMEOUT))) -+ printk ("%s: heartbeat timer stuck - scheduled\n", cmRail->Rail->Name); -+ else -+ ep_poll_inputq (rail, cmRail->IntrQueue, 0, ProcessMessage, cmRail); -+ spin_unlock (&cmRail->Lock); -+ } -+ local_irq_restore (flags); -+} -+ -+char * -+sprintClPeers (char *str, CM_RAIL *cmRail, int clvl) -+{ -+ int clLo = cmRail->Levels[clvl].MinNodeId; -+ int clHi = clLo + cmRail->Levels[clvl].NumNodes - 1; -+ int subClLo = (clvl == 0) ? cmRail->NodeId : cmRail->Levels[clvl - 1].MinNodeId; -+ int subClHi = subClLo + ((clvl == 0) ? 0 : cmRail->Levels[clvl - 1].NumNodes - 1); -+ -+ if (subClHi == clHi) -+ sprintf (str, "[%d-%d]", clLo, subClLo - 1); -+ else if (subClLo == clLo) -+ sprintf (str, "[%d-%d]", subClHi + 1, clHi); -+ else -+ sprintf (str, "[%d-%d][%d-%d]", clLo, subClLo - 1, subClHi + 1, clHi); -+ -+ return (str); -+} -+ -+static void -+RestartComms (CM_RAIL *cmRail, int clvl) -+{ -+ int base; -+ int nodeId; -+ int lstat; -+ int numClNodes; -+ int subClMin; -+ int subClMax; -+ int myClId; -+ int thisClId; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ base = myClId * CM_GSTATUS_BITS; -+ numClNodes = cmRail->Levels[clvl].NumNodes; -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ cmRail->Levels[clvl].Restarting = 1; -+ -+ if (cmRail->Levels[clvl].Online) -+ { -+ cmRail->Levels[clvl].Online = 0; -+ -+ for (thisClId = 0; thisClId < numClNodes; thisClId++) -+ { -+ if (thisClId == subClMin) /* skip sub-cluster; it's just someone in this cluster */ -+ { /* that wants me to restart */ -+ thisClId = subClMax; -+ continue; -+ } -+ -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ /* gstat must == RUNNING */ -+ cmRail->Levels[clvl].Connected--; -+ break; -+ case EP_NODE_DISCONNECTED: -+ /* CLOSING || STARTING || (lstat & RESTART) */ -+ break; -+ } -+ } -+ } -+ } -+} -+ -+static void -+UpdateGlobalStatus (CM_RAIL *cmRail) -+{ -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ int nodeId; -+ int offset; -+ int base; -+ bitmap_t gstat; -+ bitmap_t lgstat; -+ bitmap_t lstat; -+ int clvl; -+ int numClNodes; -+ int subClMin; -+ int subClMax; -+ int myClId; -+ int thisClId; -+ int lastClId; -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (!cmRail->Levels[clvl].GlobalMapValid || /* not got the global map yet */ -+ !statemap_changed (cmRail->Levels[clvl].GlobalMap)) /* no changes to respond to */ -+ { -+ CPRINTF2 (6, "%s: Got invalid or unchanged clvl %d global map\n", cmRail->Rail->Name, clvl); -+ continue; -+ } -+ -+ CPRINTF2 (5, "%s: Got valid changed clvl %d global map\n", cmRail->Rail->Name, clvl); -+ -+ lastClId = -1; -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ numClNodes = cmRail->Levels[clvl].NumNodes; -+ -+ while ((offset = statemap_findchange (cmRail->Levels[clvl].GlobalMap, &gstat, 1)) >= 0) -+ { -+ /* -+ * Check every node that this segment covers - however -+ * if the last node we checked in the previous segmemt -+ * is also the first node in this segment, then skip -+ * it. -+ */ -+ if ((thisClId = (offset/CM_GSTATUS_BITS)) == lastClId) -+ thisClId++; -+ lastClId = (offset + BT_NBIPUL - 1)/CM_GSTATUS_BITS; -+ -+ /* check each node that might have changed */ -+ for ( ; thisClId <= lastClId && thisClId < numClNodes; thisClId++) -+ { -+ base = thisClId * CM_GSTATUS_BITS; -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ -+ if (thisClId >= subClMin && thisClId <= subClMax) /* skip sub-cluster */ -+ continue; -+ -+ /* This isn't me; I need to sense what this node is driving -+ * (just the starting and running bits) and respond -+ * appropriately... -+ */ -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ if (lgstat == gstat) /* no change in peer state */ -+ continue; -+ -+ CPRINTF5 (3, "%s: Node %d: lgstat %s, gstat %s, lstat %s\n", cmRail->Rail->Name, nodeId, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ /* What I'm currently driving as my acknowledgement */ -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ -+ switch (gstat) -+ { -+ case CM_GSTATUS_STARTING: -+ if ((lgstat == CM_GSTATUS_ABSENT || lgstat == CM_GSTATUS_CLOSING) && lstat == CM_GSTATUS_MAY_START) -+ { -+ CPRINTF2 (1, "%s: ===================node %d STARTING\n", cmRail->Rail->Name, nodeId); -+ -+ ASSERT (cmRail->Rail->Nodes[nodeId].State == EP_NODE_DISCONNECTED); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ continue; -+ } -+ break; -+ -+ case CM_GSTATUS_RUNNING: -+ if ((lgstat == CM_GSTATUS_ABSENT && lstat == CM_GSTATUS_MAY_START) || -+ (lgstat == CM_GSTATUS_STARTING && lstat == CM_GSTATUS_MAY_RUN)) -+ { -+ CPRINTF3 (1, "%s: ===================node %d%s RUNNING\n", cmRail->Rail->Name, nodeId, -+ lgstat == CM_GSTATUS_ABSENT ? " Already" : ""); -+ -+ ASSERT (cmRail->Rail->Nodes[nodeId].State == EP_NODE_DISCONNECTED); -+ -+ if (cmRail->Levels[clvl].Online) -+ { -+ ep_connect_node (cmRail->Rail, nodeId); -+ -+ cmRail->Levels[clvl].Connected++; -+ } -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ continue; -+ } -+ break; -+ -+ case CM_GSTATUS_CLOSING: -+ CPRINTF4 (1, "%s: ===================node %d CLOSING%s%s\n", cmRail->Rail->Name, nodeId, -+ (lstat & CM_GSTATUS_RESTART) ? " for Restart" : "", -+ cmRail->Levels[clvl].Online ? "" : " (offline)"); -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ cmRail->Levels[clvl].Connected--; -+ /* DROPTHROUGH */ -+ case EP_NODE_DISCONNECTED: -+ lstat = CM_GSTATUS_MAY_START; -+ break; -+ } -+ } -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_START) /* clear restart if we've disconnected */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ continue; -+ -+ default: -+ break; -+ } -+ -+ /* "unexpected" state change forces me to ask her to restart */ -+ if (! (lstat & CM_GSTATUS_RESTART)) /* not requesting restart already */ -+ { -+ CPRINTF5 (1, "%s: ===================node %d %s, old %s new %s\n", cmRail->Rail->Name, nodeId, -+ (gstat == CM_GSTATUS_ABSENT) ? "ABSENT" : "REQUEST RESTART", -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId)); -+ -+ /* request restart */ -+ if (cmRail->Levels[clvl].Online && lstat == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ cmRail->Levels[clvl].Connected--; -+ /* DROPTHROUGH */ -+ case EP_NODE_DISCONNECTED: -+ lstat = CM_GSTATUS_MAY_START; -+ break; -+ } -+ } -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, lstat | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ continue; -+ } -+ -+ continue; -+ } -+ } -+ -+ /* Now check myself - see what everyone else thinks I'm doing */ -+ base = myClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS); -+ -+ if (lgstat == gstat) /* my state in this cluster hasn't changed */ -+ { -+ CPRINTF3 (6, "%s: my clvl %d global status unchanged from %s\n", cmRail->Rail->Name, -+ clvl, GlobalStatusString (cmRail->Levels[clvl].GlobalMap, myClId)); -+ goto all_done; -+ } -+ -+ if ((gstat & CM_GSTATUS_RESTART) != 0) /* someone wants me to restart */ -+ { -+ if ((lstat & CM_GSTATUS_STATUS_MASK) == CM_GSTATUS_CLOSING) /* I'm already restarting */ -+ goto all_done; -+ -+ CPRINTF2 (1, "%s: ===================RESTART REQUEST from %s\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ printk ("%s: Restart Request from %s\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ RestartComms (cmRail, clvl); -+ goto all_done; -+ } -+ -+ CPRINTF6 (5, "%s: clvl %d: lgstat %s gstat %s, lstat %s%s\n", cmRail->Rail->Name, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, myClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, myClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, myClId), -+ (gstat != lstat) ? " (IGNORED)" : ""); -+ -+ if (gstat != lstat) /* not everyone agrees with me */ -+ goto all_done; -+ -+ switch (lstat) -+ { -+ default: -+ ASSERT (0); /* I never drive this */ -+ -+ case CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START: /* I can restart now (have seen restart go away) */ -+ ASSERT (!cmRail->Levels[clvl].Online); -+ -+ CPRINTF2 (1,"%s: ===================NODES %s AGREE I MAY START\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I MAY START\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_STARTING | CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ goto all_done; -+ -+ case CM_GSTATUS_STARTING | CM_GSTATUS_MAY_RUN: -+ ASSERT (!cmRail->Levels[clvl].Online); -+ -+ CPRINTF2 (1, "%s: ===================NODES %s AGREE I MAY RUN\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I MAY RUN\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_RUNNING | CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ goto all_done; -+ -+ case CM_GSTATUS_RUNNING | CM_GSTATUS_MAY_RUN: -+ if (! cmRail->Levels[clvl].Online) -+ { -+ CPRINTF2 (1, "%s: ===================NODES %s AGREE I'M RUNNING\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I'M RUNNING\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ cmRail->Levels[clvl].Online = 1; -+ -+ for (thisClId = 0; thisClId < numClNodes; thisClId++) -+ { -+ if (thisClId == subClMin) /* skip sub-cluster */ -+ { -+ thisClId = subClMax; -+ continue; -+ } -+ -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ /* Only connect to her if I see her as running and I'm not requesting her -+ * to restart - this means that I was offline when I saw her transition -+ * to running and haven't seen her in a "bad" state since. */ -+ if (gstat == CM_GSTATUS_RUNNING && ! (lstat & CM_GSTATUS_RESTART)) -+ { -+ CPRINTF5 (1, "%s: node %d lgstat %s gstat %s, lstat %s -> CONNECT\n", cmRail->Rail->Name, nodeId, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ if (lstat == CM_GSTATUS_MAY_START) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ -+ ep_connect_node (cmRail->Rail, nodeId); -+ -+ cmRail->Levels[clvl].Connected++; -+ } -+ } -+ } -+ goto all_done; -+ } -+ -+ all_done: -+ statemap_setmap (cmRail->Levels[clvl].LastGlobalMap, cmRail->Levels[clvl].GlobalMap); -+ } -+} -+ -+static void -+ReduceGlobalMap (CM_RAIL *cmRail, int clvl) -+{ -+ int lvl; -+ int sidx; -+ int recompute; -+ CM_LEVEL *level; -+ int cTopLevel; -+ int cRole; -+ -+ if (clvl < cmRail->TopLevel) -+ { -+ cTopLevel = clvl + 1; -+ cRole = CM_ROLE_LEADER; -+ } -+ else -+ { -+ cTopLevel = cmRail->TopLevel; -+ cRole = cmRail->Role; -+ } -+ -+ /* Update cmRail->Levels[*].SubordinateMap[clvl] for all subordinate levels */ -+ for (lvl = 0; lvl < cTopLevel; lvl++) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ /* We need to recompute this level's statemap if... -+ * . Previous level's statemap has changes to propagate OR -+ * . This level's statemap has not been computed yet OR -+ * . A subordinate at this level has sent me a change. -+ * Note that we can only do this if all subordinates from this -+ * level down are present with valid statemaps, or absent (i.e. not -+ * timing out). -+ */ -+ -+ ASSERT (lvl == 0 || cmRail->Levels[lvl - 1].SubordinateMapValid[clvl]); -+ -+ recompute = !level->SubordinateMapValid[clvl] || -+ (lvl > 0 && statemap_changed (cmRail->Levels[lvl - 1].SubordinateMap[clvl])); -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (!(sgmt->State == CM_SGMT_ABSENT || /* absent nodes contribute zeros */ -+ (sgmt->State == CM_SGMT_PRESENT && /* present nodes MUST have received a map to contribute */ -+ sgmt->Maps[clvl].InputMapValid))) -+ { -+ CPRINTF5 (5, "%s: waiting for clvl %d lvl %d seg %d node %d\n", cmRail->Rail->Name, -+ clvl, lvl, sidx, sgmt->NodeId); -+ -+ /* Gotta wait for this guy, so we can't compute this level, -+ * or any higher levels. */ -+ return; -+ } -+ -+ if (statemap_changed (sgmt->Maps[clvl].InputMap)) -+ { -+ ASSERT (sgmt->Maps[clvl].InputMapValid); -+ -+ recompute = 1; -+ -+ CPRINTF7 (5, "%s: %s clvl %d map from @ %d %d (%d) - %s\n", -+ cmRail->Rail->Name, sgmt->State == CM_SGMT_ABSENT ? "newly absent" : "got new", -+ clvl, lvl, sidx, sgmt->NodeId, -+ MapString ("Input", sgmt->Maps[clvl].InputMap, cmRail->Levels[clvl].NumNodes, "")); -+ } -+ } -+ -+ if (recompute) -+ { -+ if (lvl == 0) -+ statemap_reset (cmRail->Levels[clvl].TmpMap); -+ else -+ { -+ ASSERT (cmRail->Levels[lvl - 1].SubordinateMapValid[clvl]); -+ -+ statemap_copy (cmRail->Levels[clvl].TmpMap, cmRail->Levels[lvl - 1].SubordinateMap[clvl]); -+ statemap_clearchanges (cmRail->Levels[lvl - 1].SubordinateMap[clvl]); -+ } -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State != CM_SGMT_ABSENT) /* absent nodes contribute zeroes */ -+ { -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ ASSERT (sgmt->Maps[clvl].InputMapValid); -+ statemap_ormap (cmRail->Levels[clvl].TmpMap, sgmt->Maps[clvl].InputMap); -+ } -+ statemap_clearchanges (sgmt->Maps[clvl].InputMap); -+ } -+ -+ statemap_setmap (level->SubordinateMap[clvl], cmRail->Levels[clvl].TmpMap); -+ level->SubordinateMapValid[clvl] = 1; -+ -+ CPRINTF4 (5, "%s: recompute clvl %d level %d statemap - %s\n", cmRail->Rail->Name, clvl, lvl, -+ MapString ("level", level->SubordinateMap[clvl], cmRail->Levels[clvl].NumNodes, "")); -+ } -+ } -+ -+ if (cRole == CM_ROLE_LEADER_CANDIDATE) /* don't know this cluster's leader yet */ -+ return; -+ -+ ASSERT (cTopLevel == 0 || cmRail->Levels[cTopLevel - 1].SubordinateMapValid[clvl]); -+ -+ /* Update SubTreeMap */ -+ -+ if (!cmRail->Levels[clvl].SubTreeMapValid || -+ statemap_changed (cmRail->Levels[clvl].LocalMap) || -+ (cTopLevel > 0 && statemap_changed (cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]))) -+ { -+ statemap_copy (cmRail->Levels[clvl].TmpMap, cmRail->Levels[clvl].LocalMap); -+ statemap_clearchanges (cmRail->Levels[clvl].LocalMap); -+ -+ if (cTopLevel > 0) -+ { -+ statemap_ormap (cmRail->Levels[clvl].TmpMap, cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]); -+ statemap_clearchanges (cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]); -+ } -+ -+ statemap_setmap (cmRail->Levels[clvl].SubTreeMap, cmRail->Levels[clvl].TmpMap); -+ cmRail->Levels[clvl].SubTreeMapValid = 1; -+ -+ CPRINTF3 (5, "%s: recompute clvl %d subtree map - %s\n", cmRail->Rail->Name, clvl, -+ MapString ("subtree", cmRail->Levels[clvl].SubTreeMap, cmRail->Levels[clvl].NumNodes, "")); -+ } -+ -+ if (cRole == CM_ROLE_SUBORDINATE) /* got a leader (Not me) */ -+ { /* => send SubTreeMap to her */ -+ CM_SGMT *leader = &cmRail->Levels[cmRail->TopLevel].Sgmts[0]; -+ -+ ASSERT (leader->State == CM_SGMT_PRESENT); -+ ASSERT (cmRail->Levels[clvl].SubTreeMapValid); -+ -+ if (!leader->Maps[clvl].OutputMapValid || -+ statemap_changed (cmRail->Levels[clvl].SubTreeMap)) -+ { -+ statemap_setmap (leader->Maps[clvl].OutputMap, cmRail->Levels[clvl].SubTreeMap); -+ leader->Maps[clvl].OutputMapValid = 1; -+ -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ -+ CPRINTF3 (5, "%s: sending clvl %d subtree map to leader (%d)\n", cmRail->Rail->Name, clvl, leader->NodeId); -+ } -+ } -+} -+ -+void -+BroadcastGlobalMap (CM_RAIL *cmRail, int clvl) -+{ -+ int lvl; -+ int sidx; -+ CM_LEVEL *level; -+ CM_SGMT *leader; -+ int cTopLevel; -+ int cRole; -+ -+ if (clvl < cmRail->TopLevel) -+ { -+ cTopLevel = clvl + 1; -+ cRole = CM_ROLE_LEADER; -+ } -+ else -+ { -+ cTopLevel = cmRail->TopLevel; -+ cRole = cmRail->Role; -+ } -+ -+ switch (cRole) -+ { -+ default: -+ ASSERT (0); -+ -+ case CM_ROLE_LEADER_CANDIDATE: /* don't know this cluster's leader yet */ -+ return; -+ -+ case CM_ROLE_LEADER: /* cluster leader: */ -+ ASSERT (clvl < cmRail->TopLevel); /* set GlobalMap from SubTreeMap */ -+ -+ if (!cmRail->Levels[clvl].SubTreeMapValid) /* can't set global map */ -+ return; -+ -+ if (cmRail->Levels[clvl].GlobalMapValid && /* already set global map */ -+ !statemap_changed (cmRail->Levels[clvl].SubTreeMap)) /* no changes to propagate */ -+ return; -+ -+ statemap_setmap (cmRail->Levels[clvl].GlobalMap, cmRail->Levels[clvl].SubTreeMap); -+ cmRail->Levels[clvl].GlobalMapValid = 1; -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ -+ CPRINTF2 (5, "%s: whole cluster %d leader setting global map\n", cmRail->Rail->Name, clvl); -+ -+ UpdateGlobalStatus (cmRail); -+ break; -+ -+ case CM_ROLE_SUBORDINATE: /* cluster subordinate: */ -+ ASSERT (clvl >= cmRail->TopLevel); /* receive GlobalMap from leader */ -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ leader = &cmRail->Levels[cmRail->TopLevel].Sgmts[0]; -+ ASSERT (leader->State == CM_SGMT_PRESENT); -+ -+ if (!leader->Maps[clvl].InputMapValid) /* can't set global map */ -+ return; -+ -+ if (cmRail->Levels[clvl].GlobalMapValid && /* already set global map */ -+ !statemap_changed (leader->Maps[clvl].InputMap)) /* no changes to propagate */ -+ return; -+ -+ statemap_setmap (cmRail->Levels[clvl].GlobalMap, leader->Maps[clvl].InputMap); -+ cmRail->Levels[clvl].GlobalMapValid = 1; -+ statemap_clearchanges (leader->Maps[clvl].InputMap); -+ -+ CPRINTF3 (5, "%s: getting clvl %d global map from leader (%d)\n", cmRail->Rail->Name, clvl, leader->NodeId); -+ -+ UpdateGlobalStatus (cmRail); -+ break; -+ } -+ -+ CPRINTF3 (5, "%s: clvl %d %s\n", cmRail->Rail->Name, clvl, -+ MapString ("global", cmRail->Levels[clvl].GlobalMap, cmRail->Levels[clvl].NumNodes, "")); -+ -+ /* Broadcast global map to all subordinates */ -+ for (lvl = 0; lvl < cTopLevel; lvl++) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].GlobalMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ -+ CPRINTF5 (5, "%s: sending clvl %d global map to subordinate %d %d (%d)\n", -+ cmRail->Rail->Name, clvl, lvl, sidx, sgmt->NodeId); -+ } -+ } -+ } -+} -+ -+static void -+CheckPeerPulse (CM_RAIL *cmRail, CM_SGMT *sgmt) -+{ -+ int clvl, sendRejoin; -+ -+ switch (sgmt->State) -+ { -+ case CM_SGMT_ABSENT: -+ break; -+ -+ case CM_SGMT_WAITING: /* waiting for a subtree */ -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_DISCOVER_TIMEOUT))) -+ break; -+ -+ CPRINTF3 (2, "%s: lvl %d subtree %d contains no live nodes\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_zero (sgmt->Maps[clvl].InputMap); /* need to start propagating zeros (flags change) */ -+ sgmt->Maps[clvl].InputMapValid = 1; /* and must indicate that the map is now valid */ -+ } -+ break; -+ -+ case CM_SGMT_COMING: /* lost/waiting subtree sent me IMCOMING */ -+ ASSERT (sgmt->Level > 0); /* we only do subtree discovery below our own level */ -+ -+ if (AFTER (lbolt, sgmt->WaitingTick + MSEC2TICKS(CM_WAITING_TIMEOUT))) -+ { -+ CPRINTF3 (1, "%s: lvl %d subtree %d waiting too long\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ printk ("%s: lvl %d subtree %d waiting too long\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_zero (sgmt->Maps[clvl].InputMap); /* need to start propagating zeros (flags change) */ -+ sgmt->Maps[clvl].InputMapValid = 1; /* and must indicate that the map is now valid */ -+ } -+ break; -+ } -+ -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_DISCOVER_TIMEOUT))) -+ break; -+ -+ CPRINTF3 (2, "%s: lvl %d subtree %d hasn't connected yet\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_WAITING; -+ sgmt->UpdateTick = lbolt; -+ -+ if (sgmt->Level > 0) -+ __Schedule_Discovery (cmRail); -+ break; -+ -+ case CM_SGMT_PRESENT: -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_HEARTBEAT_TIMEOUT))) -+ break; -+ -+ if (sgmt->Level == cmRail->TopLevel) /* leader died */ -+ { -+ sendRejoin = (sgmt->State == CM_SGMT_PRESENT && sgmt->AckSeq == 0); -+ -+ CPRINTF4 (1, "%s: leader (%d) node %d JUST DIED%s\n", -+ cmRail->Rail->Name, sgmt->Level, sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ printk ("%s: lvl %d leader (%d) JUST DIED%s\n", -+ cmRail->Rail->Name, sgmt->Level, sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ if (sendRejoin) -+ { -+ /* she's not sent us any heartbeats even though she responded to a discover -+ * so tell her to rejoin the tree at the bottom, this will mean that she -+ * has to run the heartbeat timer before being able to rejoin the tree. */ -+ SendMessage (cmRail, sgmt->NodeId, sgmt->Level, CM_MSG_TYPE_REJOIN); -+ } -+ -+ StartLeaderDiscovery (cmRail); -+ break; -+ } -+ -+ sendRejoin = (sgmt->State == CM_SGMT_PRESENT && sgmt->AckSeq == 0); -+ -+ CPRINTF5 (2, "%s: lvl %d subordinate %d (%d) JUST DIED%s\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0]), sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ printk ("%s: lvl %d subordinate %d (%d) JUST DIED%s\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0]), sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ if (sendRejoin) -+ { -+ /* she's not sent us any heartbeats even though she responded to a discover -+ * so tell her to rejoin the tree at the bottom, this will mean that she -+ * has to run the heartbeat timer before being able to rejoin the tree. */ -+ SendMessage (cmRail, sgmt->NodeId, sgmt->Level, CM_MSG_TYPE_REJOIN); -+ } -+ -+ StartSubTreeDiscovery (cmRail, sgmt); -+ break; -+ -+ default: -+ ASSERT (0); -+ } -+} -+ -+static void -+CheckPeerPulses (CM_RAIL *cmRail) -+{ -+ int lvl; -+ int sidx; -+ -+ /* check children are alive */ -+ for (lvl = 0; lvl < cmRail->TopLevel; lvl++) -+ for (sidx = 0; sidx < cmRail->Levels[lvl].NumSegs; sidx++) -+ CheckPeerPulse (cmRail, &cmRail->Levels[lvl].Sgmts[sidx]); -+ -+ /* check leader is alive */ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ ASSERT (cmRail->Levels[cmRail->TopLevel].Sgmts[0].State == CM_SGMT_PRESENT); -+ -+ CheckPeerPulse (cmRail, &cmRail->Levels[cmRail->TopLevel].Sgmts[0]); -+ } -+} -+ -+static void -+SendHeartbeats (CM_RAIL *cmRail) -+{ -+ int lvl; -+ -+ /* Send heartbeats to my children */ -+ for (lvl = 0; lvl < cmRail->TopLevel; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ int sidx; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &cmRail->Levels[lvl].Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_HEARTBEAT); -+ } -+ } -+ -+ /* Send heartbeat to my leader */ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ SendToSgmt (cmRail, &cmRail->Levels[cmRail->TopLevel].Sgmts[0], CM_MSG_TYPE_HEARTBEAT); -+ } -+} -+ -+static int -+BroadcastDiscover (CM_RAIL *cmRail) -+{ -+ int sidx; -+ int lvl; -+ int msgType; -+ CM_LEVEL *level; -+ int urgent; -+ -+ ASSERT (cmRail->TopLevel <= cmRail->NumLevels); -+ ASSERT ((cmRail->Role == CM_ROLE_LEADER) ? (cmRail->TopLevel == cmRail->NumLevels) : -+ (cmRail->Role == CM_ROLE_SUBORDINATE) ? (cmRail->Levels[cmRail->TopLevel].Sgmts[0].State == CM_SGMT_PRESENT) : -+ (cmRail->Role == CM_ROLE_LEADER_CANDIDATE)); -+ -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE) /* got a leader/lead whole machine */ -+ { -+ urgent = 0; /* non-urgent leader discovery */ -+ lvl = cmRail->TopLevel - 1; /* on nodes I lead (resolves leader conflicts) */ -+ msgType = CM_MSG_TYPE_RESOLVE_LEADER; -+ } -+ else -+ { -+ urgent = 1; /* urgent leader discovery */ -+ lvl = cmRail->TopLevel; /* on nodes I'd like to lead */ -+ msgType = CM_MSG_TYPE_DISCOVER_LEADER; -+ } -+ -+ if (lvl >= 0) -+ { -+ if (lvl > cmRail->BroadcastLevel) -+ { -+ /* Unable to broadcast at this level in the spanning tree, so we -+ * just continue doing discovery until we are able to broadcast */ -+ CPRINTF4 (6, "%s: broadcast level %d too low to discover %d at level %d\n", -+ cmRail->Rail->Name, cmRail->BroadcastLevel, msgType, lvl); -+ -+ cmRail->DiscoverStartTick = lbolt; -+ } -+ else -+ { -+ level = &cmRail->Levels[lvl]; -+ SendToSgmt (cmRail, &level->Sgmts[level->MySgmt], msgType); -+ } -+ } -+ -+ while (lvl > 0) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_WAITING) -+ { -+ ASSERT (sidx != level->MySgmt); -+ /* Do subordinate discovery. Existing subordinates will -+ * ignore it, but leader candidates will send IMCOMING. -+ * This is always urgent since we'll assume a subtree is -+ * absent if I don't get IMCOMING within the timeout. -+ */ -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_DISCOVER_SUBORDINATE); -+ urgent = 1; -+ } -+ } -+ lvl--; -+ } -+ -+ return (urgent); -+} -+ -+static void -+CheckBroadcast (CM_RAIL *cmRail) -+{ -+ int clvl; -+ -+ for (clvl = cmRail->NumLevels-1; clvl >= 0 && cmRail->Rail->SwitchBroadcastLevel < cmRail->Levels[clvl].SwitchLevel; clvl--) -+ ; -+ -+ if (cmRail->OfflineReasons || cmRail->Rail->System->Shutdown) -+ clvl = -1; -+ -+ /* if the level at which we can broadcast drops, then we must rejoin the -+ * spanning tree at the highest level for which broadcast is good. */ -+ if (cmRail->BroadcastLevel > clvl && clvl < (int)(cmRail->Role == CM_ROLE_LEADER ? cmRail->TopLevel - 1 : cmRail->TopLevel)) -+ { -+ printk ("%s: REJOINING at level %d because %s\n", cmRail->Rail->Name, clvl+1, -+ (cmRail->OfflineReasons & CM_OFFLINE_MANAGER) ? "of manager thread" : -+ (cmRail->OfflineReasons & CM_OFFLINE_PROCFS) ? "force offline" : -+ cmRail->Rail->System->Shutdown ? "system shutdown" : "broadcast level changed"); -+ LowerTopLevel (cmRail, clvl+1); -+ } -+ -+ if (cmRail->BroadcastLevel != clvl) -+ { -+ cmRail->BroadcastLevel = clvl; -+ cmRail->BroadcastLevelTick = lbolt; -+ } -+ -+ /* schedule the update thread, to withdraw from comms with -+ * nodes "outside" of the valid broadcastable range. */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (cmRail->BroadcastLevel < clvl) -+ { -+ if (AFTER (lbolt, cmRail->BroadcastLevelTick + EP_WITHDRAW_TIMEOUT) && -+ !(cmRail->Levels[clvl].OfflineReasons & CM_OFFLINE_BROADCAST)) -+ { -+ printk ("%s: Withdraw at Level %d\n", cmRail->Rail->Name, clvl); -+ cmRail->Levels[clvl].OfflineReasons |= CM_OFFLINE_BROADCAST; -+ } -+ } -+ else -+ { -+ if (cmRail->Levels[clvl].OfflineReasons & CM_OFFLINE_BROADCAST) -+ { -+ printk ("%s: Rejoin at Level %d\n", cmRail->Rail->Name, clvl); -+ cmRail->Levels[clvl].OfflineReasons &= ~CM_OFFLINE_BROADCAST; -+ } -+ } -+ } -+ -+} -+ -+static void -+CheckManager (CM_RAIL *cmRail) -+{ -+ long time, state = ep_kthread_state (&cmRail->Rail->System->ManagerThread, &time); -+ -+ if (state == KT_STATE_RUNNING && BEFORE (lbolt, time + MSEC2TICKS(CM_THREAD_RUNNING_TIMEOUT))) -+ state = KT_STATE_SLEEPING; -+ if (state != KT_STATE_SLEEPING && BEFORE (lbolt, time + MSEC2TICKS(CM_THREAD_SCHEDULE_TIMEOUT))) -+ state = KT_STATE_SLEEPING; -+ -+ if ((cmRail->OfflineReasons & CM_OFFLINE_MANAGER) && state == KT_STATE_SLEEPING) -+ { -+ printk ("%s: manager thread unstuck\n", cmRail->Rail->Name); -+ -+ cmRail->OfflineReasons &= ~CM_OFFLINE_MANAGER; -+ } -+ -+ if (!(cmRail->OfflineReasons & CM_OFFLINE_MANAGER) && state != KT_STATE_SLEEPING) -+ { -+ printk ("%s: manager thread stuck - %s\n", cmRail->Rail->Name, -+ state == KT_STATE_SCHEDULED ? "scheduled" : -+ state == KT_STATE_RUNNING ? "running" : -+ state == KT_STATE_STALLED ? "stalled" : "unknown"); -+ -+ cmRail->OfflineReasons |= CM_OFFLINE_MANAGER; -+ } -+} -+ -+static void -+CheckOfflineReasons (CM_RAIL *cmRail, int clvl) -+{ -+ int subClMin, subClMax, myClId; -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ -+ if (cmRail->Levels[clvl].OfflineReasons) -+ { -+ if (cmRail->Levels[clvl].Online) -+ { -+ printk ("%s: Withdraw from %s\n", cmRail->Rail->Name, sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ RestartComms (cmRail, clvl); -+ } -+ } -+ else -+ { -+ if (cmRail->Levels[clvl].Restarting && cmRail->Levels[clvl].Connected == 0) -+ { -+ printk ("%s: Rejoin with %s\n", cmRail->Rail->Name, sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ -+ ASSERT (statemap_getbits (cmRail->Levels[clvl].LocalMap, myClId * CM_GSTATUS_BITS, CM_GSTATUS_BITS) == -+ (CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START | CM_GSTATUS_RESTART)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, myClId * CM_GSTATUS_BITS, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ cmRail->Levels[clvl].Restarting = 0; -+ } -+ } -+} -+ -+void -+DoHeartbeatWork (CM_RAIL *cmRail) -+{ -+ long now = lbolt; -+ int clvl; -+ -+ if ((RejoinCheck || RejoinPanic) && -+ AFTER (now, cmRail->NextRunTime + MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT))) /* If I've been unresponsive for too long */ -+ { -+ /* I'd better reconnect to the network because I've not been playing the game */ -+ CPRINTF4 (1, "%s: REJOINING because I was too slow (heartbeat) [%ld,%ld,(%ld)]\n", cmRail->Rail->Name, now, cmRail->NextRunTime, (long int)MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT)); -+ printk ("%s: REJOINING because I was too slow (heartbeat) [%ld,%ld,(%ld)]\n", cmRail->Rail->Name, now, cmRail->NextRunTime, (long int)MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT)); -+ -+ LowerTopLevel (cmRail, 0); -+ -+ IncrStat (cmRail, RejoinTooSlow); -+ -+ if (RejoinPanic) -+ panic ("ep: REJOINING because I was too slow (heartbeat)\n"); -+ } -+ -+ PollInputQueues (cmRail); -+ -+ if (! BEFORE (now, cmRail->NextDiscoverTime)) -+ { -+ if (BroadcastDiscover (cmRail)) /* urgent discovery required? */ -+ cmRail->NextDiscoverTime = now + MSEC2TICKS (CM_URGENT_DISCOVER_INTERVAL); -+ else -+ cmRail->NextDiscoverTime = now + MSEC2TICKS (CM_PERIODIC_DISCOVER_INTERVAL); -+ -+ if (cmRail->Role == CM_ROLE_LEADER_CANDIDATE && AFTER (now, cmRail->DiscoverStartTick + MSEC2TICKS (CM_DISCOVER_TIMEOUT))) -+ RaiseTopLevel (cmRail); -+ } -+ -+ if (! BEFORE (now, cmRail->NextHeartbeatTime)) -+ { -+ CheckPosition (cmRail->Rail); -+ CheckPeerPulses (cmRail); -+ CheckBroadcast (cmRail); -+ CheckManager (cmRail); -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ CheckOfflineReasons (cmRail, clvl); -+ ReduceGlobalMap (cmRail, clvl); -+ BroadcastGlobalMap (cmRail, clvl); -+ } -+ -+ SendHeartbeats (cmRail); -+ -+ /* Compute the next heartbeat time, but "drift" it towards the last -+ * periodic discovery time we saw from the whole machine leader */ -+ cmRail->NextHeartbeatTime = now + MSEC2TICKS (CM_HEARTBEAT_INTERVAL); -+ } -+ -+ if (AFTER (cmRail->NextHeartbeatTime, cmRail->NextDiscoverTime)) -+ cmRail->NextRunTime = cmRail->NextDiscoverTime; -+ else -+ cmRail->NextRunTime = cmRail->NextHeartbeatTime; -+} -+ -+#define CM_SVC_INDICATOR_OFFSET(CMRAIL,CLVL,IND,NODEID) ( ( CMRAIL->Levels[CLVL].NumNodes * CM_GSTATUS_BITS ) \ -+ + ( CMRAIL->Levels[CLVL].NumNodes * IND ) \ -+ + ( NODEID - CMRAIL->Levels[CLVL].MinNodeId ) ) -+int -+cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ -+ EPRINTF2 (DBG_SVC,"cm_svc_indicator_set: rail %p ind %d\n", rail, svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC,"cm_svc_indicator_set: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId), 1, 1); -+ EPRINTF3 (DBG_SVC,"cm_svc_indicator_set: clvl %d nodeId %d offset %d\n", clvl, cmRail->NodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId)); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+int -+cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ -+ EPRINTF2 (DBG_SVC, "cm_svc_indicator_clear: rail %p ind %d\n", rail, svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_clear: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId), 0, 1); -+ EPRINTF3 (DBG_SVC, "cm_svc_indicator_clear: clvl %d nodeId %d offset %d\n", clvl, cmRail->NodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId)); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+int -+cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ bitmap_t bits; -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_is_set: rail %p ind %d nodeId %d (me=%d)\n", rail, svc_indicator, nodeId, cmRail->NodeId); -+ -+ if (svc_indicator < 0 || svc_indicator > EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_is_set: service indicator %d not registered\n", svc_indicator); -+ return (0); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (0); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ if ( clvl == cmRail->NumLevels) { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_is_set: node out of range %d \n", nodeId); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ return (0); -+ } -+ -+ if ( cmRail->NodeId == nodeId ) -+ bits = statemap_getbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ else -+ bits = statemap_getbits (cmRail->Levels[clvl].GlobalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_is_set: clvl %d nodeId %d offset %d %x\n", clvl, nodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), bits); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return ( (bits == 0) ? (0) : (1) ); -+} -+ -+int -+cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ /* or in the bit map */ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int nodeId, clvl; -+ bitmap_t bits; -+ unsigned long flags; -+ int clip_out_low, clip_out_high; -+ int curr_low, curr_high; -+ int check_low, check_high; -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_bitmap: rail %p ind %d low %d high %d\n", rail, svc_indicator, low, (low + nnodes)); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_bitmap: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State != EP_RAIL_STATE_RUNNING) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ clip_out_low = clip_out_high = -1; /* all in */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ -+ /* curr_high/low is the range of the current lvl */ -+ curr_low = cmRail->Levels[clvl].MinNodeId; -+ curr_high = cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes; -+ -+ /* find out how much of low high is in this range and only check that part */ -+ check_low = ( low < curr_low) ? curr_low : low; -+ check_high = ( (low + nnodes) > curr_high) ? curr_high : (low + nnodes); -+ -+ EPRINTF6 (DBG_SVC, "cm_svc_indicator_bitmap: curr(%d,%d) check(%d,%d) clip(%d,%d)\n", curr_low, curr_high, check_low, check_high, clip_out_low, clip_out_high); -+ -+ for(nodeId = check_low; nodeId < check_high; nodeId++) { -+ -+ if ( (clip_out_low <= nodeId) && (nodeId <= clip_out_high)) -+ nodeId = clip_out_high; /* step over the cliped out section */ -+ else { -+ -+ if ( cmRail->NodeId == nodeId ) -+ bits = statemap_getbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ else -+ bits = statemap_getbits (cmRail->Levels[clvl].GlobalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ -+ if ( bits ) { -+ EPRINTF2 (DBG_SVC, "cm_svc_indicator_bitmap: its set nodeId %d (clvl %d)\n", nodeId, clvl); -+ BT_SET ( bitmap , nodeId - low ); -+ } -+ } -+ } -+ -+ /* widen the clip out range */ -+ clip_out_low = curr_low; -+ clip_out_high = curr_high -1; -+ } -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+ -+static void -+cm_heartbeat_timer (unsigned long arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ ASSERT (cmRail->Rail->State == EP_RAIL_STATE_RUNNING); -+ -+ DoHeartbeatWork (cmRail); -+ -+ __Schedule_Timer (cmRail, cmRail->NextRunTime); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+DisplayRailDo (DisplayInfo *di, EP_RAIL *rail) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int i, j; -+ -+ if (rail->State != EP_RAIL_STATE_RUNNING) -+ return; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ (di->func)(di->arg, "NodeId=%d NodeLevel=%d NumLevels=%d NumNodes=%d\n", -+ cmRail->NodeId, cmRail->TopLevel, cmRail->NumLevels, cmRail->Rail->Position.pos_nodes); -+ -+ (di->func)(di->arg, "["); -+ -+ for (i = 0; i < cmRail->NumLevels; i++) -+ { -+ if (i > 0) -+ (di->func)(di->arg, ","); -+ -+ if (i < cmRail->TopLevel) -+ { -+ (di->func)(di->arg, "L "); -+ -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ switch (cmRail->Levels[i].Sgmts[j].State) -+ { -+ case CM_SGMT_PRESENT: (di->func)(di->arg, "p%-4d", cmRail->Levels[i].Sgmts[j].NodeId); break; -+ case CM_SGMT_WAITING: (di->func)(di->arg, "w%4s", ""); break; -+ case CM_SGMT_COMING: (di->func)(di->arg, "c%4s", ""); break; -+ case CM_SGMT_ABSENT: (di->func)(di->arg, ".%4s", ""); break; -+ default: (di->func)(di->arg, "?%4s", ""); break; -+ } -+ } -+ else -+ switch (cmRail->Role) -+ { -+ case CM_ROLE_LEADER_CANDIDATE: -+ (di->func)(di->arg,"l "); -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ (di->func)(di->arg," "); -+ break; -+ -+ case CM_ROLE_SUBORDINATE: -+ switch (cmRail->Levels[i].Sgmts[0].State) -+ { -+ case CM_SGMT_PRESENT: (di->func)(di->arg, "p%-4d", cmRail->Levels[i].Sgmts[0].NodeId); break; -+ case CM_SGMT_WAITING: (di->func)(di->arg, "w%4s", ""); break; -+ case CM_SGMT_COMING: (di->func)(di->arg, "c%4s", ""); break; -+ case CM_SGMT_ABSENT: (di->func)(di->arg, ".%4s", ""); break; -+ default: (di->func)(di->arg, "?%4s", ""); break; -+ } -+ for (j = 1; j < cmRail->Levels[i].NumSegs; j++) -+ (di->func)(di->arg, " "); -+ break; -+ -+ default: -+ (di->func)(di->arg, "####"); -+ break; -+ } -+ } -+ (di->func)(di->arg, "]\n"); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+DisplayRail (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ DisplayRailDo (&di_ep_debug, rail); -+} -+ -+void -+DisplayStatus (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ DisplayNodeMaps (&di_ep_debug, cmRail); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+} -+ -+void -+DisplaySegs (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ DisplayNodeSgmts (&di_ep_debug, cmRail); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+} -+ -+static void -+LoadBroadcastRoute (CM_RAIL *cmRail, int lvl, int sidx) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int nsegs = cmRail->Levels[0].NumSegs; -+ int vp = EP_VP_BCAST(lvl, sidx); -+ int nodes = 1; -+ int baseNode; -+ int i; -+ -+ ASSERT (lvl > 0 && lvl <= cmRail->NumLevels); -+ ASSERT (sidx == 0 || lvl < cmRail->NumLevels); -+ -+ ASSERT (vp >= EP_VP_BCAST_BASE && vp < EP_VP_BCAST_BASE + EP_VP_BCAST_COUNT); -+ -+ for (i = 1; i <= lvl; i++) -+ { -+ nodes *= nsegs; -+ nsegs = (i == cmRail->NumLevels) ? 1 : cmRail->Levels[i].NumSegs; -+ } -+ -+ baseNode = ((cmRail->NodeId / (nodes * nsegs)) * nsegs + sidx) * nodes; -+ -+ CPRINTF5 (2, "%s: broadcast vp lvl %d sidx %d [%d,%d]\n", -+ cmRail->Rail->Name, lvl, sidx, baseNode, baseNode + nodes - 1); -+ -+ rail->Operations.LoadSystemRoute (rail, vp, baseNode, baseNode + nodes - 1); -+} -+ -+static void -+LoadRouteTable (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int i, j; -+ -+ if (cmRail->NumNodes > EP_MAX_NODES) -+ { -+ printk ("More nodes (%d) than point-to-point virtual process table entries (%d)\n", cmRail->NumNodes, EP_MAX_NODES); -+ panic ("LoadRouteTable\n"); -+ } -+ -+ for (i = 0; i < cmRail->NumNodes; i++) -+ rail->Operations.LoadSystemRoute (rail, EP_VP_NODE(i), i, i); -+ -+ /* Generate broadcast routes for subtrees */ -+ for (i = 1; i < cmRail->NumLevels; i++) -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ LoadBroadcastRoute (cmRail, i, j); -+ -+ /* Generate broadcast route for whole machine */ -+ LoadBroadcastRoute (cmRail, cmRail->NumLevels, 0); -+ -+ /* Finally invalidate all the data routes */ -+ for (i = 0; i < cmRail->NumNodes; i++) -+ rail->Operations.UnloadNodeRoute (cmRail->Rail, i); -+} -+ -+void -+cm_node_disconnected (EP_RAIL *rail, unsigned nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int base, lstat, lgstat; -+ int clvl, subClMin, subClMax; -+ int thisClId, myClId; -+ unsigned long flags; -+ -+ ASSERT (nodeId != cmRail->NodeId); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ thisClId = nodeId - cmRail->Levels[clvl].MinNodeId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ ASSERT ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN); -+ -+ CPRINTF7 (2, "%s: cm_node_disconnected: Node %d: clvl %d, lgstat %s, gstat %s, lstat %s -> %sMAY_START\n", -+ cmRail->Rail->Name, nodeId, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId), -+ ((lgstat != CM_GSTATUS_CLOSING) && (lstat & CM_GSTATUS_RESTART)) ? "RESTART|" : ""); -+ -+ switch (lgstat) -+ { -+ case CM_GSTATUS_CLOSING: -+ /* delayed ack of closing - set MAY_START and clear RESTART */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ break; -+ case CM_GSTATUS_STARTING: -+ case CM_GSTATUS_RUNNING: -+ IASSERT (! cmRail->Levels[clvl].Online || lstat & CM_GSTATUS_RESTART); -+ break; -+ case CM_GSTATUS_ABSENT: -+ IASSERT (lstat & CM_GSTATUS_RESTART); -+ } -+ -+ cmRail->Levels[clvl].Connected--; -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+cm_restart_node (EP_RAIL *rail, unsigned nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int base, lstat, lgstat; -+ int clvl, subClMin, subClMax; -+ int thisClId, myClId; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ if (nodeId == rail->Position.pos_nodeid) -+ { -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ RestartComms (cmRail, clvl); -+ } -+ else -+ { -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ thisClId = nodeId - cmRail->Levels[clvl].MinNodeId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ CPRINTF6 (2, "%s: cm_restart_node: Node %d: clvl %d, lgstat %s, gstat %s, lstat %s\n", -+ cmRail->Rail->Name, nodeId, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ if (lgstat != CM_GSTATUS_CLOSING) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, lstat | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ if (offline) -+ cmRail->OfflineReasons |= reason; -+ else -+ cmRail->OfflineReasons &= ~reason; -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+static void -+cm_remove_rail (EP_SUBSYS *subsys, EP_SYS *epsys, EP_RAIL *rail) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ CM_RAIL *cmRail = sys->Rails[rail->Number]; -+ int i, lvl, clvl; -+ -+ cm_procfs_rail_fini (cmRail); -+ -+ sys->Rails[rail->Number] = NULL; -+ rail->ClusterRail = NULL; -+ -+ del_timer_sync (&cmRail->HeartbeatTimer); -+ -+ cmRail->NextRunTime = 0; -+ cmRail->NextDiscoverTime = 0; -+ cmRail->NextHeartbeatTime = 0; -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ for (lvl = 0; lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ statemap_destroy (level->SubordinateMap[clvl]); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ statemap_destroy (level->Sgmts[i].Maps[clvl].CurrentInputMap); -+ statemap_destroy (level->Sgmts[i].Maps[clvl].InputMap); -+ statemap_destroy (level->Sgmts[i].Maps[clvl].OutputMap); -+ } -+ } -+ -+ cmRail->Levels[clvl].Online = 0; -+ -+ statemap_destroy (cmRail->Levels[clvl].TmpMap); -+ statemap_destroy (cmRail->Levels[clvl].GlobalMap); -+ statemap_destroy (cmRail->Levels[clvl].LastGlobalMap); -+ statemap_destroy (cmRail->Levels[clvl].SubTreeMap); -+ statemap_destroy (cmRail->Levels[clvl].LocalMap); -+ } -+ -+ spin_lock_destroy (&cmRail->Lock); -+ -+ ep_free_inputq (cmRail->Rail, cmRail->PolledQueue); -+ ep_free_inputq (cmRail->Rail, cmRail->IntrQueue); -+ ep_free_outputq (cmRail->Rail, cmRail->MsgQueue); -+ -+ KMEM_FREE (cmRail, sizeof (CM_RAIL)); -+} -+ -+static int -+cm_add_rail (EP_SUBSYS *subsys, EP_SYS *epsys, EP_RAIL *rail) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ ELAN_POSITION *pos = &rail->Position; -+ CM_RAIL *cmRail; -+ int lvl, n, nn, clvl, span, i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (cmRail, CM_RAIL *, sizeof (CM_RAIL), 1); -+ -+ if (cmRail == NULL) -+ return (ENOMEM); -+ -+ cmRail->Rail = rail; -+ cmRail->NodeId = pos->pos_nodeid; -+ cmRail->NumNodes = pos->pos_nodes; -+ -+ spin_lock_init (&cmRail->Lock); -+ -+ if ((cmRail->IntrQueue = ep_alloc_inputq (rail, EP_SYSTEMQ_INTR, sizeof (CM_MSG), CM_INPUTQ_ENTRIES, IntrQueueCallback, cmRail)) == NULL || -+ (cmRail->PolledQueue = ep_alloc_inputq (rail, EP_SYSTEMQ_POLLED, sizeof (CM_MSG), CM_INPUTQ_ENTRIES, NULL, 0)) == NULL || -+ (cmRail->MsgQueue = ep_alloc_outputq (rail, sizeof (CM_MSG), CM_NUM_MSG_BUFFERS)) == NULL) -+ { -+ goto failed; -+ } -+ -+ /* point to first "spare" message buffer */ -+ cmRail->NextSpareMsg = 0; -+ -+ /* Compute the branching ratios from the switcy arity */ -+ for (lvl = 0; lvl < CM_MAX_LEVELS; lvl++) -+ BranchingRatios[lvl] = (lvl < pos->pos_levels) ? pos->pos_arity[pos->pos_levels - lvl - 1] : 4; -+ -+ /* now determine the number of levels of hierachy we have */ -+ /* and how many nodes per level there are */ -+ for (lvl = 0, nn = 1, n = pos->pos_nodes; -+ n > 1; -+ nn *= BranchingRatios[lvl], n = n / BranchingRatios[lvl], lvl++) -+ { -+ int nSegs = (n > BranchingRatios[lvl]) ? BranchingRatios[lvl] : n; -+ int nNodes = nn * nSegs; -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ for (clvl = 0, span = pos->pos_arity[pos->pos_levels - clvl - 1]; -+ span < nNodes && clvl < pos->pos_levels - 1; -+ clvl++, span *= pos->pos_arity[pos->pos_levels - clvl - 1]) -+ ; -+ -+ level->SwitchLevel = clvl; -+ level->MinNodeId = (pos->pos_nodeid / nNodes) * nNodes; -+ level->NumNodes = nNodes; -+ level->NumSegs = nSegs; -+ } -+ -+ cmRail->NumLevels = lvl; -+ cmRail->BroadcastLevel = lvl-1; -+ -+ CPRINTF4 (2, "%s: NodeId=%d NumNodes=%d NumLevels=%d\n", -+ rail->Name, pos->pos_nodeid, pos->pos_nodes, cmRail->NumLevels); -+ -+ LoadRouteTable (cmRail); -+ -+ /* Init SGMT constants */ -+ for (lvl = 0; lvl < cmRail->NumLevels; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ level->MySgmt = SegmentNo (cmRail, cmRail->NodeId, lvl); -+ -+ for (i = 0; i < CM_SGMTS_PER_LEVEL; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ sgmt->MsgNumber = lvl * CM_SGMTS_PER_LEVEL + i; -+ sgmt->Level = lvl; -+ sgmt->Sgmt = i; -+ } -+ } -+ -+ /* Init maps for each cluster level */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ int nNodes = cmRail->Levels[clvl].NumNodes; -+ int mapBits = (nNodes * CM_GSTATUS_BITS) + (nNodes * EP_SVC_NUM_INDICATORS); -+ int clmin; -+ int clmax; -+ int clid = ClusterIds (cmRail, clvl, &clmin, &clmax); -+ -+ for (lvl = 0; lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ level->SubordinateMap[clvl] = statemap_create (mapBits); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ level->Sgmts[i].Maps[clvl].CurrentInputMap = statemap_create (mapBits); -+ level->Sgmts[i].Maps[clvl].InputMap = statemap_create (mapBits); -+ level->Sgmts[i].Maps[clvl].OutputMap = statemap_create (mapBits); -+ } -+ } -+ -+ cmRail->Levels[clvl].Online = 0; -+ -+ cmRail->Levels[clvl].TmpMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].GlobalMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].LastGlobalMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].SubTreeMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].LocalMap = statemap_create (mapBits); -+ -+ /* Flag everyone outside my next lower cluster as sensed offline... */ -+ for (i = 0; i < clmin; i++) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, i * CM_GSTATUS_BITS, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ for (i = clmax + 1; i < nNodes; i++) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, i * CM_GSTATUS_BITS, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ /* ...and set my own state */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, clid * CM_GSTATUS_BITS, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ } -+ -+ /* compute parameter hash to add to messages */ -+ cmRail->ParamHash = EP_PROTOCOL_VERSION; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_PERIODIC_DISCOVER_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_URGENT_DISCOVER_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_HEARTBEAT_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_P2P_DMA_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_P2P_MSG_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_BCAST_MSG_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_TIMER_SCHEDULE_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_HEARTBEAT_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_DISCOVER_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + BT_NBIPUL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_GSTATUS_BITS; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + EP_SVC_NUM_INDICATORS; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + cmRail->NumLevels; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + cmRail->NumNodes; -+ for (i = 0; i < cmRail->NumLevels; i++) -+ cmRail->ParamHash = cmRail->ParamHash * 127 + BranchingRatios[i]; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ /* Initialise the timer, but don't add it yet, since -+ * __Schedule_Heartbeat() will do this. */ -+ -+ init_timer (&cmRail->HeartbeatTimer); -+ -+ cmRail->HeartbeatTimer.function = cm_heartbeat_timer; -+ cmRail->HeartbeatTimer.data = (unsigned long) cmRail; -+ cmRail->HeartbeatTimer.expires = lbolt + hz; -+ -+ /* Indicate that heartbeats should be sent -+ * as soon as the timer is run from inside -+ * LowerTopLevel */ -+ cmRail->NextHeartbeatTime = lbolt; -+ -+ /* start discovering who else is out there */ -+ LowerTopLevel (cmRail, 0); -+ -+ /* connect to myself straight away - I know I'm here */ -+ ep_connect_node (rail, cmRail->NodeId); -+ -+ /* add to all rails */ -+ sys->Rails[rail->Number] = cmRail; -+ rail->ClusterRail = (void *) cmRail; -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ /* Enable the input queues */ -+ ep_enable_inputq (rail, cmRail->PolledQueue); -+ ep_enable_inputq (rail, cmRail->IntrQueue); -+ -+ /* Create the procfs entries */ -+ cm_procfs_rail_init (cmRail); -+ -+ return 0; -+ -+ failed: -+ cm_remove_rail (subsys, epsys, rail); -+ return -ENOMEM; -+} -+ -+static void -+cm_fini (EP_SUBSYS *subsys, EP_SYS *epsys) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ -+ cm_procfs_fini(sys); -+ -+ KMEM_FREE (sys, sizeof (CM_SUBSYS)); -+} -+ -+int -+cm_init (EP_SYS *sys) -+{ -+ CM_SUBSYS *subsys; -+ -+ KMEM_ZALLOC (subsys, CM_SUBSYS *, sizeof (CM_SUBSYS), 1); -+ -+ if (subsys == NULL) -+ return (ENOMEM); -+ -+ subsys->Subsys.Sys = sys; -+ subsys->Subsys.Name = "cm"; -+ subsys->Subsys.Destroy = cm_fini; -+ subsys->Subsys.AddRail = cm_add_rail; -+ subsys->Subsys.RemoveRail = cm_remove_rail; -+ -+ ep_subsys_add (sys, &subsys->Subsys); -+ -+ cm_procfs_init (subsys); -+ -+ /* -+ * Initialise the machineid if it wasn't specified by -+ * the modules.conf file - otherwise truncate it to -+ * 16 bits. -+ */ -+ if (MachineId != -1) -+ MachineId = (uint16_t) MachineId; -+ else -+ { -+#if defined(LINUX_ALPHA) -+ MachineId = (uint16_t)((5 << 12) | HZ); -+#elif defined(LINUX_SPARC) -+ MachineId = (uint16_t)((4 << 12) | HZ); -+#elif defined(LINUX_I386) -+ MachineId = (uint16_t)((3 << 12) | HZ); -+#elif defined( LINUX_IA64) -+ MachineId = (uint16_t)((2 << 12) | HZ); -+#elif defined(LINUX_X86_64) -+ MachineId = (uint16_t)((1 << 12) | HZ); -+#else -+ MachineId = (uint16_t)((0 << 12) | HZ); -+#endif -+ } -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/cm.h linux-2.6.9/drivers/net/qsnet/ep/cm.h ---- clean/drivers/net/qsnet/ep/cm.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/cm.h 2005-03-30 09:06:34.000000000 -0500 -@@ -0,0 +1,396 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_CM_H -+#define __ELAN_CM_H -+ -+#ident "@(#)$Id: cm.h,v 1.16 2005/03/30 14:06:34 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.h,v $*/ -+ -+#include -+ -+#if defined(DIGITAL_UNIX) -+/* -+ * On Tru64 - SMP doesn't mean Symmetric - cpu 0 is a master cpu and is responsible -+ * for handling all PCI interrupts and "funneled" operations. When a kernel thread -+ * is made runnable, the scheduler will choose which cpu it will run on at that time, -+ * and will only execute a higher priority thread from another cpu's run queue when -+ * it becomes totally idle (apparently also including user processes). Also the -+ * assert_wait_mesg_timo function uses a per-cpu timeout - these can only get executed -+ * at "preemptable" places - so again have no guarantee on when they will execute if -+ * they happen to be queued on a "hogged" cpu. The combination of these mean that the Tru64 -+ * is incapable of scheduling a high priority kernel thread within a deterministic time -+ * of when it should have become runnable - wonderfull. -+ * -+ * Hence the solution Compaq have proposed it to schedule a timeout onto all of the -+ * cpu's timeouts lists at the maximum frequency that we could want to execute code, -+ * then to handle the scheduling of work between these ourselves. With a bit of luck -+ * ..... at least one cpu will be sufficiently unloaded to allow us to get a chance -+ * to do our important work. -+ * -+ * However ..... this still is not reliable, since timeouts under Tru64 are still -+ * only run when the currently running kernel thread "co-operates" by calling one -+ * of a number of functions which is permitted to run the "lwc"s AND is not holding -+ * any spinlocks AND is running ai IPL 0. However Compaq are unable to provide -+ * any upper limit on the time between the "lwc"'s being run and so it is possible -+ * for all 4 cpus to not run them for an unbounded time. -+ * -+ * The solution proposed is to use the RM_TEMP_BACKDOOR hook which was added to -+ * hardclock() to "solve" this problem for Memory Channel. However, since it -+ * is called within the clock interrupt it is not permissible to aquire any -+ * spinlocks, nor to run for "too long". This means that it is not possible to -+ * call the heartbeat algorithm from this hook. -+ * -+ * Our solution to these limitations is to use the hook to cause an elan interrupt -+ * to be delivered, by issueing a mis-aligned SetEvent command - this causes the device -+ * to trap and ep_cprocTrap() can then run the heartbeat code. However there is a lock -+ * order violation between the elan_dev::IntrLock and ep_dev::Lock, so we have to -+ * use a trylock and if we fail, then hope that when the interrupt is delievered again -+ * some time later we will succeed. -+ * -+ * However this only works if the kernel is able to respond to the Elan interrupt, -+ * so we panic inside the RM_TEMP_BACKDOOR hook if the SetEvent's interrupt has -+ * not been taken for more than an CM_TIMER_SCHEDULE_TIMEOUT interval. -+ * -+ * In fact this is exactly the mechanism that other operating systems use to -+ * execute timeouts, since the hardclock interrupt posts a low priority -+ * "soft interrupt" which "pre-eempts" the currently running thread and then -+ * executes the timeouts.To block timeouts you use splsoftclock() the same as -+ * in Tru64. -+ */ -+#define PER_CPU_TIMEOUT TRUE -+#endif -+ -+ -+#define CM_SGMTS_PER_LEVEL 8 /* maximum nodes in each segment */ -+#define CM_MAX_LEVELS 6 /* maximum depth of tree */ -+ -+/* message buffers/dmas/events etc */ -+#define CM_NUM_NODE_MSG_BUFFERS (CM_MAX_LEVELS * CM_SGMTS_PER_LEVEL) /* subordinates and leader */ -+#define CM_NUM_SPARE_MSG_BUFFERS 8 /* spare msg buffers for non-connected nodes */ -+#define CM_NUM_MSG_BUFFERS (CM_NUM_NODE_MSG_BUFFERS + CM_NUM_SPARE_MSG_BUFFERS) -+ -+#define CM_INPUTQ_ENTRIES 128 /* # entries in input queue */ -+ -+#define CM_PERIODIC_DISCOVER_INTERVAL (5000) /* 5s (infrequent resolution of established leader conflicts) */ -+#define CM_URGENT_DISCOVER_INTERVAL (50) /* 0.05s (more frequently than heartbeats 'cause they don't retry) */ -+#define CM_HEARTBEAT_INTERVAL (125) /* 0.125s */ -+#define CM_TIMER_SCHEDULE_TIMEOUT (4000) /* 4s Maximum time before a timer that's secheduled to run gets to run (eg blocked in interrupt handlers etc) */ -+#define CM_THREAD_SCHEDULE_TIMEOUT (30000) /* 30s Maximum time before a thread that's scheduled to run gets to run */ -+#define CM_THREAD_RUNNING_TIMEOUT (30000) /* 30s Don't expect the manager thread to be running longer than this */ -+ -+#ifdef PER_CPU_TIMEOUT -+#define CM_PERCPU_TIMEOUT_INTERVAL (50) /* 0.05s (must be less than all above intervals) */ -+#define CM_PACEMAKER_INTERVAL (500) /* 0.05s */ -+ -+#define CM_HEARTBEAT_OVERDUE (250) /* 0.25s Maximum time a timeout can be overdue before taking extreme action */ -+#endif -+ -+#define CM_P2P_DMA_RETRIES 31 -+ -+/* We expect at least 1 point-to-point message in CM_P2P_MSG_RETRIES -+ * attempts to send one to be successfully received */ -+#define CM_P2P_MSG_RETRIES 8 -+ -+/* We expect at least 1 broadcast message in CM_BCAST_MSG_RETRIES attempts -+ * to send one to be successfully received. */ -+#define CM_BCAST_MSG_RETRIES 40 -+ -+/* Heartbeat timeout allows for a node stalling and still getting its -+ * heartbeat. The 2 is to allow for unsynchronised polling times. */ -+#define CM_HEARTBEAT_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_P2P_MSG_RETRIES) * CM_HEARTBEAT_INTERVAL) -+ -+/* Discover timeout must be > CM_HEARTBEAT_TIMEOUT to guarantee that people -+ * who don't see discovery are considered dead by their leader. This -+ * ensures that by the time a node "discovers" it is a leader of a segment, -+ * the previous leader of that segment will have been deemed to be dead by -+ * its the parent segment's leader */ -+#define CM_DISCOVER_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_BCAST_MSG_RETRIES) * CM_URGENT_DISCOVER_INTERVAL) -+ -+#define CM_WAITING_TIMEOUT (CM_DISCOVER_TIMEOUT * 100) -+ -+/* -+ * Convert all timeouts specified in mS into "ticks" -+ */ -+#define MSEC2TICKS(MSEC) (((MSEC)*HZ)/1000) -+ -+ -+/* statemap entry */ -+typedef struct cm_state_entry -+{ -+ int16_t level; /* cluster level to apply to */ -+ int16_t offset; /* from statemap_findchange() */ -+ uint16_t seg[BT_NBIPUL/16]; /* ditto */ -+} CM_STATEMAP_ENTRY; -+ -+/* offset is >= 0 for a change to apply and */ -+#define STATEMAP_NOMORECHANGES (-1) /* end of a set of updates */ -+#define STATEMAP_RESET (-2) /* reset the target map */ -+#define STATEMAP_NOOP (-3) /* null token */ -+ -+/* CM message format */ -+typedef int8_t CM_SEQ; /* heartbeat sequence numbers; at least 2 bits, signed */ -+ -+/* -+ * The message header is received into the last 64 byte block of -+ * the input queue and the Version *MUST* be the last word of the -+ * block to ensure that we can see that the whole of the message -+ * has reached main memory after we've seen the input queue pointer -+ * have been updated. -+ */ -+typedef struct ep_cm_hdr -+{ -+ uint32_t Pad0; -+ uint32_t Pad1; -+ -+ uint8_t Type; -+ uint8_t Level; -+ CM_SEQ Seq; /* precision at least 2 bits each*/ -+ CM_SEQ AckSeq; -+ -+ uint16_t NumMaps; -+ uint16_t MachineId; -+ -+ uint16_t NodeId; -+ uint16_t Checksum; -+ -+ uint32_t Timestamp; -+ uint32_t ParamHash; -+ uint32_t Version; -+} CM_HDR; -+ -+#define CM_HDR_SIZE sizeof (CM_HDR) -+ -+typedef struct cm_msg -+{ -+ union { -+ CM_STATEMAP_ENTRY Statemaps[1]; /* piggy-backed statemap updates start here */ -+ uint8_t Space[EP_SYSTEMQ_MSG_MAX - CM_HDR_SIZE]; -+ } Payload; -+ -+ CM_HDR Hdr; -+} CM_MSG; -+ -+/* The maximum number of statemap entries that can fit within an EP_CM_MSG_BUFFER */ -+#define CM_MSG_MAXMAPS (offsetof (CM_MSG, Hdr) / sizeof (CM_STATEMAP_ENTRY)) -+#define CM_MSG_MAP(mapno) (CM_MSG_MAXMAPS - (mapno) - 1) -+ -+/* The actual special message base & size, including 'nmaps' piggy-backed statemap entries */ -+#define CM_MSG_BASE(nmaps) (nmaps == 0 ? offsetof (CM_MSG, Hdr) : offsetof (CM_MSG, Payload.Statemaps[CM_MSG_MAXMAPS - nmaps])) -+#define CM_MSG_SIZE(nmaps) (sizeof (CM_MSG) - CM_MSG_BASE(nmaps)) -+ -+#define CM_MSG_VERSION 0xcad00005 -+#define CM_MSG_TYPE_RESOLVE_LEADER 0 -+#define CM_MSG_TYPE_DISCOVER_LEADER 1 -+#define CM_MSG_TYPE_NOTIFY 2 -+#define CM_MSG_TYPE_DISCOVER_SUBORDINATE 3 -+#define CM_MSG_TYPE_IMCOMING 4 -+#define CM_MSG_TYPE_HEARTBEAT 5 -+#define CM_MSG_TYPE_REJOIN 6 -+ -+/* CM machine segment */ -+typedef struct cm_sgmtMaps -+{ -+ u_char InputMapValid; /* Input map has been set */ -+ u_char OutputMapValid; /* Output map has been set */ -+ u_char SentChanges; /* got an outstanding STATEMAP_NOMORECHANGES to send */ -+ statemap_t *OutputMap; /* state to send */ -+ statemap_t *InputMap; /* state received */ -+ statemap_t *CurrentInputMap; /* state being received */ -+} CM_SGMTMAPS; -+ -+typedef struct cm_sgmt -+{ -+ u_char State; -+ u_char SendMaps; -+ u_char MsgAcked; -+ CM_SEQ MsgSeq; -+ CM_SEQ AckSeq; -+ u_int NodeId; -+ long UpdateTick; -+ long WaitingTick; -+ uint32_t Timestamp; -+ CM_SGMTMAPS Maps[CM_MAX_LEVELS]; /* Maps[i] == state for cluster level i */ -+ u_short MsgNumber; /* msg buffer to use */ -+ u_short NumMaps; /* # maps in message buffer */ -+ u_short Level; -+ u_short Sgmt; -+} CM_SGMT; -+ -+#define CM_SGMT_ABSENT 0 /* no one there at all */ -+#define CM_SGMT_WAITING 1 /* waiting for subtree to connect */ -+#define CM_SGMT_COMING 2 /* expecting a subtree to reconnect */ -+#define CM_SGMT_PRESENT 3 /* connected */ -+ -+typedef struct cm_level -+{ -+ int SwitchLevel; -+ u_int MinNodeId; -+ u_int NumNodes; -+ u_int NumSegs; -+ u_int MySgmt; -+ -+ /* SubordinateMap[i] == OR of all subordinate maps on this level and down for cluster level i */ -+ u_char SubordinateMapValid[CM_MAX_LEVELS]; -+ statemap_t *SubordinateMap[CM_MAX_LEVELS]; -+ -+ /* maps/flags for this cluster level */ -+ u_int Online:1; /* I've gone online (seen myself running) */ -+ u_int Restarting:1; /* driving my owm restart bit */ -+ u_char OfflineReasons; /* forced offline by broadcast */ -+ -+ u_char GlobalMapValid; -+ u_char SubTreeMapValid; -+ u_long Connected; -+ -+ statemap_t *LocalMap; /* state bits I drive */ -+ statemap_t *SubTreeMap; /* OR of my and my subtree states */ -+ statemap_t *GlobalMap; /* OR of all node states */ -+ statemap_t *LastGlobalMap; /* last map I saw */ -+ statemap_t *TmpMap; /* scratchpad */ -+ -+ CM_SGMT Sgmts[CM_SGMTS_PER_LEVEL]; -+} CM_LEVEL; -+ -+#define CM_ROLE_LEADER_CANDIDATE 0 -+#define CM_ROLE_LEADER 1 -+#define CM_ROLE_SUBORDINATE 2 -+ -+/* global status bits */ -+#define CM_GSTATUS_STATUS_MASK 0x03 /* bits nodes drive to broadcast their status */ -+#define CM_GSTATUS_ABSENT 0x00 /* Off the network */ -+#define CM_GSTATUS_STARTING 0x01 /* I'm waiting for everyone to see me online */ -+#define CM_GSTATUS_RUNNING 0x03 /* up and running */ -+#define CM_GSTATUS_CLOSING 0x02 /* I'm waiting for everyone to see me offline */ -+ -+#define CM_GSTATUS_ACK_MASK 0x0c /* bits node drive to ack other status */ -+#define CM_GSTATUS_MAY_START 0x04 /* Everyone thinks I may not start */ -+#define CM_GSTATUS_MAY_RUN 0x08 /* Everyone thinks I may not run */ -+ -+#define CM_GSTATUS_RESTART 0x10 /* Someone thinks I should restart */ -+#define CM_GSTATUS_BITS 5 -+ -+#define CM_GSTATUS_BASE(node) ((node) * CM_GSTATUS_BITS) -+ -+#if defined(PER_CPU_TIMEOUT) -+typedef struct cm_timeout_data -+{ -+ long ScheduledAt; /* lbolt timeout was scheduled to run at */ -+ -+ unsigned long EarlyCount; /* # times run early than NextRun */ -+ unsigned long MissedCount; /* # times run on time - but someone else was running it */ -+ unsigned long WastedCount; /* # times we failed to get the spinlock */ -+ unsigned long WorkCount; /* # times we're the one running */ -+ -+ unsigned long WorstDelay; /* worst scheduling delay */ -+ unsigned long BestDelay; /* best scheduling delay */ -+ -+ unsigned long WorstLockDelay; /* worst delay before getting rail->Lock */ -+ -+ unsigned long WorstHearbeatDelay; /* worst delay before calling DoHeartbeatWork */ -+} CM_TIMEOUT_DATA; -+#endif -+ -+typedef struct cm_rail -+{ -+ EP_RAIL *Rail; /* rail we're associated with */ -+ struct list_head Link; /* and linked on the CM_SUBSYS */ -+ -+ uint32_t ParamHash; /* hash of critical parameters */ -+ uint32_t Timestamp; -+ long DiscoverStartTick; /* when discovery start */ -+ -+ unsigned int NodeId; /* my node id */ -+ unsigned int NumNodes; /* and number of nodes */ -+ unsigned int NumLevels; /* number of levels computed from machine size */ -+ int BroadcastLevel; -+ long BroadcastLevelTick; -+ unsigned int TopLevel; /* level at which I'm not a leader */ -+ unsigned char Role; /* state at TopLevel */ -+ -+ EP_INPUTQ *PolledQueue; /* polled input queue */ -+ EP_INPUTQ *IntrQueue; /* intr input queue */ -+ EP_OUTPUTQ *MsgQueue; /* message */ -+ unsigned int NextSpareMsg; /* next "spare" message buffer to use */ -+ -+ EP_CM_RAIL_STATS Stats; /* statistics */ -+ -+ kmutex_t Mutex; -+ spinlock_t Lock; -+ -+ long NextHeartbeatTime; /* next time to check/send heartbeats */ -+ long NextDiscoverTime; /* next time to progress discovery */ -+ long NextRunTime; /* the earlier of the above two or intr requires inputq poll*/ -+ -+ unsigned int OfflineReasons; /* forced offline by procfs/manager thread stuck */ -+ -+#if defined(PER_CPU_TIMEOUT) -+ spinlock_t HeartbeatTimeoutsLock; /* spinlock to sequentialise per-cpu timeouts */ -+ long HeartbeatTimeoutsStarted; /* bitmap of which timeouts have started */ -+ long HeartbeatTimeoutsStopped; /* bitmap of which timeouts have stopped */ -+ long HeartbeatTimeoutsShouldStop; /* flag to indicate timeouts should stop */ -+ kcondvar_t HeartbeatTimeoutsWait; /* place to sleep waiting for timeouts to stop */ -+ long HeartbeatTimeoutRunning; /* someone is running the timeout - don't try for the lock */ -+ -+ long HeartbeatTimeoutOverdue; /* heartbeat seen as overdue - interrupt requested */ -+ -+ CM_TIMEOUT_DATA *HeartbeatTimeoutsData; /* per timeout data */ -+#else -+ struct timer_list HeartbeatTimer; /* timer for heartbeat/discovery */ -+#endif -+ -+ CM_LEVEL Levels[CM_MAX_LEVELS]; -+} CM_RAIL; -+ -+/* OfflineReasons (both per-rail and */ -+#define CM_OFFLINE_BROADCAST (1 << 0) -+#define CM_OFFLINE_PROCFS (1 << 1) -+#define CM_OFFLINE_MANAGER (1 << 2) -+ -+typedef struct cm_subsys -+{ -+ EP_SUBSYS Subsys; -+ CM_RAIL *Rails[EP_MAX_RAILS]; -+} CM_SUBSYS; -+ -+extern int MachineId; -+ -+extern void cm_node_disconnected (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_node (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_comms (CM_RAIL *cmRail); -+extern int cm_init (EP_SYS *sys); -+ -+extern void DisplayRail(EP_RAIL *rail); -+extern void DisplaySegs (EP_RAIL *rail); -+extern void DisplayStatus (EP_RAIL *rail); -+ -+extern void DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayRailDo (DisplayInfo *di, EP_RAIL *rail); -+ -+extern int cm_read_cluster(EP_RAIL *rail,char *page); -+extern void cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason); -+ -+extern int cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId); -+extern int cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+/* cm_procfs.c */ -+extern void cm_procfs_init (CM_SUBSYS *subsys); -+extern void cm_procfs_fini (CM_SUBSYS *subsys); -+extern void cm_procfs_rail_init (CM_RAIL *rail); -+extern void cm_procfs_rail_fini (CM_RAIL *rail); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_CM_H */ -+ -diff -urN clean/drivers/net/qsnet/ep/cm_procfs.c linux-2.6.9/drivers/net/qsnet/ep/cm_procfs.c ---- clean/drivers/net/qsnet/ep/cm_procfs.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/cm_procfs.c 2004-05-14 05:23:13.000000000 -0400 -@@ -0,0 +1,254 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2005 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cm_procfs.c,v 1.5 2004/05/14 09:23:13 daniel Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm_procfs.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "debug.h" -+#include "cm.h" -+#include -+ -+#include -+ -+extern char *sprintClPeers (char *str, CM_RAIL *cmRail, int clvl); -+ -+static int -+proc_read_cluster(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) data; -+ char *p = page; -+ -+ page[0] = 0; -+ -+ if (cmRail->Rail->State != EP_RAIL_STATE_RUNNING) -+ p += sprintf(p, "\n"); -+ else -+ { -+ CM_LEVEL *cmLevel; -+ unsigned long flags; -+ int i, j; -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ char seperate_with; -+ -+ struct { int val; char *name; } bitvals[] = { -+ {CM_OFFLINE_BROADCAST, "Broadcast"}, -+ {CM_OFFLINE_PROCFS, "Offline"}, -+ {CM_OFFLINE_MANAGER, "Manager"}}; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ for (i = 0; i < cmRail->NumLevels; i++) -+ { -+ cmLevel = &cmRail->Levels[i]; -+ -+ p += sprintf(p, "%23s %7s ", sprintClPeers (clNodeStr, cmRail, i), cmLevel->Online?"Online":"Offline"); -+ -+ if ((cmLevel->Online ) | ( cmLevel->Connected > 0)) -+ p += sprintf(p, "Connected=%lu ", cmLevel->Connected); -+ -+ seperate_with = '<'; -+ -+ if ( cmLevel->Restarting ) { -+ p += sprintf(p, "%cRestarting", seperate_with); -+ seperate_with = ','; -+ } -+ -+ if ( ! (cmLevel->GlobalMapValid & cmLevel->SubTreeMapValid )) { -+ p += sprintf(p, "%cMap Not Valid", seperate_with); -+ seperate_with = ','; -+ } -+ -+ if ( cmLevel->OfflineReasons ) { -+ for (j = 0; j < sizeof (bitvals)/sizeof(bitvals[0]); j++) -+ if (cmLevel->OfflineReasons & bitvals[j].val) { -+ p += sprintf(p, "%c%s", seperate_with, bitvals[j].name); -+ seperate_with = ','; -+ } -+ } -+ if ( cmRail->OfflineReasons ) { -+ for (j = 0; j < sizeof (bitvals)/sizeof(bitvals[0]); j++) -+ if (cmRail->OfflineReasons & bitvals[j].val) { -+ p += sprintf(p, "%c%s", seperate_with, bitvals[j].name); -+ seperate_with = ','; -+ } -+ } -+ -+ if ( seperate_with != '<' ) -+ p += sprintf(p,">\n"); -+ else -+ p += sprintf(p,"\n"); -+ } -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ return qsnet_proc_calc_metrics (page, start, off, count, eof, p - page); -+} -+ -+static struct rail_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} rail_info[] = { -+ {"cluster", proc_read_cluster, NULL}, -+}; -+ -+struct proc_dir_entry *svc_indicators_root; -+ -+typedef struct svc_indicator_data -+{ -+ int svc_indicator; -+ EP_RAIL *rail; -+} SVC_INDICATOR_DATA; -+ -+static SVC_INDICATOR_DATA svc_indicator_data[EP_SVC_NUM_INDICATORS][EP_MAX_RAILS]; -+static char *svc_indicator_names[EP_SVC_NUM_INDICATORS] = EP_SVC_NAMES; -+ -+static int -+proc_read_svc_indicator_rail_bitmap (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ SVC_INDICATOR_DATA *svc_data = (SVC_INDICATOR_DATA *)data; -+ unsigned int nnodes = ep_numnodes (ep_system()); -+ bitmap_t *bitmap; -+ -+ KMEM_ZALLOC (bitmap, bitmap_t *, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t)), 1); -+ -+ cm_svc_indicator_bitmap (svc_data->rail, svc_data->svc_indicator, bitmap, 0, nnodes); -+ -+ ep_sprintf_bitmap (page, PAGESIZE, bitmap, 0, 0, nnodes); -+ -+ KMEM_FREE (bitmap, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t))); -+ -+ strcat (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_svc_indicator_bitmap(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ unsigned int num = (unsigned long) data; -+ EP_SYS *sys = ep_system(); -+ unsigned int nnodes = ep_numnodes (sys); -+ bitmap_t *bitmap; -+ -+ KMEM_ALLOC(bitmap, bitmap_t *, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t)), 1); -+ -+ ep_svc_indicator_bitmap (sys, num, bitmap, 0, nnodes); -+ -+ ep_sprintf_bitmap (page, PAGESIZE, bitmap, 0, 0, nnodes); -+ -+ KMEM_FREE (bitmap, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t))); -+ -+ strcat (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+cm_procfs_rail_init (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ struct proc_dir_entry *p; -+ int i; -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ { -+ if ((p = create_proc_entry (rail_info[i].name, 0, cmRail->Rail->ProcDir)) != NULL) -+ { -+ p->read_proc = rail_info[i].read_func; -+ p->write_proc = rail_info[i].write_func; -+ p->data = cmRail; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ if ((rail->SvcIndicatorDir = proc_mkdir ("svc_indicators", cmRail->Rail->ProcDir)) != NULL) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ { -+ if ((p = create_proc_entry (svc_indicator_names[i], 0, rail->SvcIndicatorDir)) != NULL) -+ { -+ svc_indicator_data[i][rail->Number].svc_indicator = i; -+ svc_indicator_data[i][rail->Number].rail = rail; -+ -+ p->write_proc = NULL; -+ p->read_proc = proc_read_svc_indicator_rail_bitmap; -+ p->data = (void *)&svc_indicator_data[i][rail->Number]; -+ p->owner = THIS_MODULE; -+ } -+ } -+ } -+} -+ -+void -+cm_procfs_rail_fini (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int i; -+ -+ if (rail->SvcIndicatorDir) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ remove_proc_entry (svc_indicator_names[i], rail->SvcIndicatorDir); -+ -+ remove_proc_entry ("svc_indicators", cmRail->Rail->ProcDir); -+ } -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ remove_proc_entry (rail_info[i].name, cmRail->Rail->ProcDir); -+} -+ -+void -+cm_procfs_init (CM_SUBSYS *subsys) -+{ -+ struct proc_dir_entry *p; -+ int i; -+ -+ qsnet_proc_register_hex (ep_config_root, "machine_id", &MachineId, 0); -+ -+ if ((svc_indicators_root = proc_mkdir("svc_indicators", ep_procfs_root)) != NULL) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ { -+ if ((p = create_proc_entry (svc_indicator_names[i], 0, svc_indicators_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_svc_indicator_bitmap; -+ p->data = (void *)(long) i; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ } -+} -+ -+void -+cm_procfs_fini (CM_SUBSYS *subsys) -+{ -+ int i; -+ -+ if (svc_indicators_root) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ remove_proc_entry (svc_indicator_names[i], svc_indicators_root); -+ -+ remove_proc_entry ("svc_indicators", ep_procfs_root); -+ } -+ -+ remove_proc_entry ("machine_id", ep_config_root); -+} -diff -urN clean/drivers/net/qsnet/ep/commands_elan4.c linux-2.6.9/drivers/net/qsnet/ep/commands_elan4.c ---- clean/drivers/net/qsnet/ep/commands_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/commands_elan4.c 2005-07-20 08:01:33.000000000 -0400 -@@ -0,0 +1,173 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: commands_elan4.c,v 1.2.10.1 2005/07/20 12:01:33 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/commands_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+ -+static __inline__ void -+elan4_command_write (ELAN4_CQ *cq, E4_uint64 val, unsigned off) -+{ -+ writeq (val, (void *)(cq->cq_mapping + offsetof (E4_CommandPort, Command[off]))); -+} -+ -+void -+elan4_nop_cmd (ELAN4_CQ *cq, E4_uint64 tag) -+{ -+ elan4_command_write (cq, tag | NOP_CMD, 0); -+} -+ -+void -+elan4_write_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data) -+{ -+ elan4_command_write (cq, addr | WRITE_DWORD_CMD, 0); -+ elan4_command_write (cq, data, 1); -+} -+ -+void -+elan4_add_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data) -+{ -+ elan4_command_write (cq, addr | ADD_DWORD_CMD, 0); -+ elan4_command_write (cq, data, 1); -+} -+ -+void -+elan4_copy64_cmd (ELAN4_CQ *cq, E4_Addr from, E4_Addr to, E4_uint32 datatype) -+{ -+ elan4_command_write (cq, from | (datatype << COPY64_DATA_TYPE_SHIFT) | COPY64_CMD, 0); -+ elan4_command_write (cq, to | (datatype << COPY64_DATA_TYPE_SHIFT), 1); -+} -+ -+void -+elan4_interrupt_cmd (ELAN4_CQ *cq, E4_uint64 cookie) -+{ -+ elan4_command_write (cq, (cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD, 0); -+} -+ -+ -+void -+elan4_run_thread_cmd (ELAN4_CQ *cq, E4_ThreadRegs *regs) -+{ -+ elan4_command_write (cq, regs->Registers[0] | RUN_THREAD_CMD, 0); -+ elan4_command_write (cq, regs->Registers[1], 1); -+ elan4_command_write (cq, regs->Registers[2], 2); -+ elan4_command_write (cq, regs->Registers[3], 3); -+ elan4_command_write (cq, regs->Registers[4], 4); -+ elan4_command_write (cq, regs->Registers[5], 5); -+ elan4_command_write (cq, regs->Registers[6], 6); -+} -+ -+void -+elan4_run_dma_cmd (ELAN4_CQ *cq, E4_DMA *dma) -+{ -+ E4_uint64 *dmaptr = (E4_uint64 *) dma; -+ -+ elan4_command_write (cq, dmaptr[0] | RUN_DMA_CMD, 0); -+ elan4_command_write (cq, dmaptr[1], 1); -+ elan4_command_write (cq, dmaptr[2], 2); -+ elan4_command_write (cq, dmaptr[3], 3); -+ elan4_command_write (cq, dmaptr[4], 4); -+ elan4_command_write (cq, dmaptr[5], 5); -+ elan4_command_write (cq, dmaptr[6], 6); -+} -+ -+void -+elan4_set_event_cmd (ELAN4_CQ *cq, E4_Addr event) -+{ -+ elan4_command_write (cq, event | SET_EVENT_CMD, 0); -+} -+ -+void -+elan4_set_eventn_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint32 count) -+{ -+ elan4_command_write (cq, SET_EVENTN_CMD,0); -+ elan4_command_write (cq, event | count, 1); -+} -+ -+void -+elan4_wait_event_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1) -+{ -+ elan4_command_write (cq, event | WAIT_EVENT_CMD, 0); -+ elan4_command_write (cq, candt, 1); -+ elan4_command_write (cq, param0, 2); -+ elan4_command_write (cq, param1, 3); -+} -+ -+void -+elan4_open_packet (ELAN4_CQ *cq, E4_uint64 command) -+{ -+ elan4_command_write (cq, command | OPEN_STEN_PKT_CMD, 0); -+} -+ -+void -+elan4_guard (ELAN4_CQ *cq, E4_uint64 command) -+{ -+ elan4_command_write (cq, command | GUARD_CMD, 0); -+} -+ -+void -+elan4_sendtrans0 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+} -+ -+void -+elan4_sendtrans1 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ elan4_command_write (cq, p0, 2); -+} -+ -+void -+elan4_sendtrans2 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0, E4_uint64 p1) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ elan4_command_write (cq, p0, 2); -+ elan4_command_write (cq, p1, 3); -+} -+ -+void -+elan4_sendtransn (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, ...) -+{ -+ E4_uint32 ndword = ((trtype & TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ va_list ap; -+ register int i; -+ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ -+ va_start (ap, addr); -+ for (i = 2; i < ndword+2; i++) -+ elan4_command_write (cq, va_arg (ap, E4_uint64), i); -+ va_end (ap); -+} -+ -+void -+elan4_sendtransp (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 *ptr) -+{ -+ E4_uint32 ndword = ((trtype &TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ register int i; -+ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ for (i = 2; i < ndword+2; i++) -+ elan4_command_write (cq, *ptr++, i); -+} -+ -diff -urN clean/drivers/net/qsnet/ep/conf_linux.c linux-2.6.9/drivers/net/qsnet/ep/conf_linux.c ---- clean/drivers/net/qsnet/ep/conf_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/conf_linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,311 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: conf_linux.c,v 1.40.2.3 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/conf_linux.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+ -+#include "conf_linux.h" -+ -+#include -+#include -+#include -+#include -+ -+/* Module parameters */ -+unsigned int epdebug = 0; -+unsigned int epdebug_console = 0; -+unsigned int epdebug_cmlevel = 0; -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+unsigned int epdebug_check_sum = 0; -+#endif -+int disabled = 0; -+int sdram_assert = 0; -+int assfail_mode = 0; -+int txd_stabilise = 7; -+int portals_envelopes = 0; -+ -+/* External module parameters */ -+extern int MaxSwitchLevels; -+extern int RejoinCheck; -+extern int RejoinPanic; -+extern int PositionCheck; -+extern int MachineId; -+ -+/* Module globals */ -+EP_SYS epsys; -+ -+#ifdef MODULE -+MODULE_AUTHOR("Quadrics Ltd"); -+MODULE_DESCRIPTION("Elan Kernel Comms"); -+ -+MODULE_LICENSE("GPL"); -+ -+module_param(epdebug, uint, 0); -+module_param(epdebug_console, uint, 0); -+module_param(epdebug_cmlevel, uint, 0); -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+module_param(epdebug_check_sum, uint, 0); -+#endif -+module_param(disabled, uint, 0); -+ -+module_param(MachineId, uint, 0); -+module_param(RejoinPanic, uint, 0); -+module_param(RejoinCheck, uint, 0); -+module_param(PositionCheck, uint, 0); -+module_param(MaxSwitchLevels, uint, 0); -+ -+module_param(sdram_assert, uint, 0); -+module_param(assfail_mode, uint, 0); -+module_param(txd_stabilise, uint, 0); -+module_param(portals_envelopes,uint, 0); -+ -+/* epcomms.c large message service functions */ -+EXPORT_SYMBOL(ep_alloc_xmtr); -+EXPORT_SYMBOL(ep_free_xmtr); -+EXPORT_SYMBOL(ep_transmit_message); -+EXPORT_SYMBOL(ep_multicast_message); -+EXPORT_SYMBOL(ep_transmit_rpc); -+ -+EXPORT_SYMBOL(ep_alloc_rcvr); -+EXPORT_SYMBOL(ep_free_rcvr); -+EXPORT_SYMBOL(ep_queue_receive); -+EXPORT_SYMBOL(ep_requeue_receive); -+EXPORT_SYMBOL(ep_rpc_put); -+EXPORT_SYMBOL(ep_rpc_get); -+EXPORT_SYMBOL(ep_complete_rpc); -+EXPORT_SYMBOL(ep_complete_receive); -+ -+EXPORT_SYMBOL(ep_poll_transmits); -+EXPORT_SYMBOL(ep_enable_txcallbacks); -+EXPORT_SYMBOL(ep_disable_txcallbacks); -+ -+/* epcomms.c functions for accessing fields of rxds/txds */ -+EXPORT_SYMBOL(ep_rxd_arg); -+EXPORT_SYMBOL(ep_rxd_len); -+EXPORT_SYMBOL(ep_rxd_isrpc); -+EXPORT_SYMBOL(ep_rxd_envelope); -+EXPORT_SYMBOL(ep_rxd_payload); -+EXPORT_SYMBOL(ep_rxd_node); -+EXPORT_SYMBOL(ep_rxd_status); -+EXPORT_SYMBOL(ep_rxd_statusblk); -+EXPORT_SYMBOL(ep_txd_node); -+EXPORT_SYMBOL(ep_txd_statusblk); -+ -+/* kmap.c, nmh.c - handling mapping of pages into network memory */ -+EXPORT_SYMBOL(ep_dvma_reserve); -+EXPORT_SYMBOL(ep_dvma_release); -+EXPORT_SYMBOL(ep_dvma_load); -+EXPORT_SYMBOL(ep_dvma_unload); -+EXPORT_SYMBOL(ep_nmd_subset); -+EXPORT_SYMBOL(ep_nmd_merge); -+ -+EXPORT_SYMBOL(ep_system); -+ -+/* kcomm.c */ -+EXPORT_SYMBOL(ep_nodeid); -+EXPORT_SYMBOL(ep_numnodes); -+EXPORT_SYMBOL(ep_waitfor_nodeid); -+ -+/* railhints.c */ -+EXPORT_SYMBOL(ep_pickRail); -+EXPORT_SYMBOL(ep_xmtr_bcastrail); -+EXPORT_SYMBOL(ep_xmtr_prefrail); -+EXPORT_SYMBOL(ep_xmtr_availrails); -+EXPORT_SYMBOL(ep_xmtr_noderails); -+EXPORT_SYMBOL(ep_rcvr_prefrail); -+EXPORT_SYMBOL(ep_rcvr_availrails); -+EXPORT_SYMBOL(ep_rxd_railmask); -+ -+EXPORT_SYMBOL(ep_svc_indicator_bitmap); -+EXPORT_SYMBOL(ep_svc_indicator_is_set); -+EXPORT_SYMBOL(ep_svc_indicator_clear); -+EXPORT_SYMBOL(ep_svc_indicator_set); -+ -+/* cm.c */ -+EXPORT_SYMBOL(cm_svc_indicator_clear); -+EXPORT_SYMBOL(cm_svc_indicator_set); -+EXPORT_SYMBOL(cm_svc_indicator_is_set); -+EXPORT_SYMBOL(cm_svc_indicator_bitmap); -+ -+#endif -+ -+EP_SYS * -+ep_system() -+{ -+ return (&epsys); -+} -+ -+void -+ep_mod_inc_usecount() -+{ -+ MOD_INC_USE_COUNT; -+} -+ -+void -+ep_mod_dec_usecount() -+{ -+ MOD_DEC_USE_COUNT; -+} -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int -+ep_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (event == DUMP_BEGIN) -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+static struct notifier_block ep_dump_notifier = -+{ -+ notifier_call: ep_dump_event, -+ priority: 0, -+}; -+ -+#endif -+ -+static int -+ep_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ((event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block ep_reboot_notifier = -+{ -+ notifier_call: ep_reboot_event, -+ priority: 0, -+}; -+ -+#if !defined(NO_PANIC_NOTIFIER) -+static int -+ep_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block ep_panic_notifier = -+{ -+ notifier_call: ep_panic_event, -+ priority: 0, -+}; -+#endif -+ -+/* -+ * Module configuration. -+ */ -+#ifdef MODULE -+static int __init ep_init(void) -+#else -+__initfunc(int ep_init(void)) -+#endif -+{ -+ register int rmask = 0; -+ -+ ep_procfs_init (); -+ -+ ep_sys_init (&epsys); -+ -+#if defined(CONFIG_ELAN4) || defined(CONFIG_ELAN4_MODULE) -+ rmask = ep4_create_rails (&epsys, disabled); -+#endif -+ -+ /* If we've brought up an elan4 rail, then disable all elan3 rails. */ -+ if ((rmask & ~disabled) != 0) -+ disabled = ~rmask; -+ -+#if defined(CONFIG_ELAN3) || defined(CONFIG_ELAN3_MODULE) -+ rmask = ep3_create_rails (&epsys, disabled); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&ep_dump_notifier); -+#endif -+ register_reboot_notifier (&ep_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_register (&panic_notifier_list, &ep_panic_notifier); -+#endif -+ -+ return (0); -+} -+ -+/* -+ * Module removal. -+ */ -+#ifdef MODULE -+static void -+__exit ep_exit(void) -+{ -+ register int i; -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&ep_dump_notifier); -+#endif -+ unregister_reboot_notifier (&ep_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_unregister (&panic_notifier_list, &ep_panic_notifier); -+#endif -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (epsys.Rails[i]) -+ { -+ switch (epsys.Rails[i]->State) -+ { -+ case EP_RAIL_STATE_UNINITIALISED: -+ break; -+ -+ case EP_RAIL_STATE_STARTED: -+ case EP_RAIL_STATE_RUNNING: -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ /* remove per-rail CM proc entries */ -+ ep_stop_rail (epsys.Rails[i]); -+ break; -+ } -+ -+ /* remove EP proc rail entries after per-rail CM entries */ -+ ep_procfs_rail_fini (epsys.Rails[i]); -+ ep_destroy_rail (epsys.Rails[i]); -+ } -+ } -+ -+ ep_sys_fini (&epsys); -+ -+ ep_procfs_fini (); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(ep_init); -+module_exit(ep_exit); -+ -+#endif -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/conf_linux.h linux-2.6.9/drivers/net/qsnet/ep/conf_linux.h ---- clean/drivers/net/qsnet/ep/conf_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/conf_linux.h 2003-10-02 10:16:07.000000000 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: conf_linux.h,v 1.6 2003/10/02 14:16:07 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/conf_linux.h,v $*/ -+ -+#ifndef __ELAN_CONF_LINUX_H -+#define __ELAN_CONF_LINUX_H -+ -+extern void ep_procfs_init(void); -+extern void ep_procfs_fini(void); -+extern void ep_procfs_rail_init(EP_RAIL *rail); -+extern void ep_procfs_rail_fini(EP_RAIL *rail); -+ -+extern void ep_procfs_svc_indicator_create(int svc_indicator, char *name); -+extern void ep_procfs_svc_indicator_remove(int svc_indicator, char *name); -+ -+#endif /* __ELAN_CONF_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/debug.c linux-2.6.9/drivers/net/qsnet/ep/debug.c ---- clean/drivers/net/qsnet/ep/debug.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/debug.c 2004-11-12 05:55:03.000000000 -0500 -@@ -0,0 +1,145 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.30 2004/11/12 10:55:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/debug.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+ -+DisplayInfo di_ep_debug = {ep_debugf, DBG_DEBUG}; -+ -+/* -+ * Generate a partial bitmap string, for the bitmap from offset "off" for "count" bits, -+ * to allow for displaying of subsets, treat entry 0 of the bitmap as having value "base". -+ */ -+int -+ep_sprintf_bitmap (char *str, unsigned nbytes, bitmap_t *bitmap, int base, int off, int nbits) -+{ -+ char entry[12]; /* space for N-N */ -+ register int i, j, len; -+ register int notstart = off; -+ register int notfirst = 0; -+ char *p = str; -+ -+ for (i = off; i < nbits; i++) -+ { -+ if (BT_TEST (bitmap, i)) -+ { -+ for (j = i+1; j < nbits; j++) -+ if (! BT_TEST (bitmap, j)) -+ break; -+ -+ if (j == (i+1)) -+ len = (int)sprintf (entry, "%d", base + i); -+ else -+ len = (int)sprintf (entry, "%d-%d", base + i, base + j-1); -+ -+ /* NOTE the 2 is for: one for comma, one for (possible) closing bracket */ -+ if ((p - str) <= (nbytes - (len+3))) -+ p += (int)sprintf (p, "%c%s", notfirst++ ? ',' : notstart ? ' ' : '[', entry); -+ else -+ { -+ /* no more space on this line, so move onto next */ -+ sprintf (p, "%c", notfirst++ ? ',' : '['); -+ -+ return (i); -+ } -+ -+ i = j; -+ } -+ } -+ -+ if (!notfirst) -+ sprintf (str, ""); -+ else -+ strcpy (p, "]"); -+ -+ return (-1); -+} -+ -+void -+ep_display_bitmap (char *prefix, char *tag, bitmap_t *bitmap, unsigned base, unsigned nbits) -+{ -+ /* Tru64 kernel printf() truncates lines at 128 bytes - the man pages for printf (9) -+ * do not mention this restriction, nor that it does not terminate the line with a -+ * carriage return, this is pretty naff. -+ * Linux has a similar limit though is much more generous at 1024 - and you can just -+ * look at the code to see why this has been done. -+ * -+ * Our nodeset information could well be longer than 128 characters, so we're going to -+ * have to split it into a number of lines. */ -+ -+#define LINEBUF_SIZE 128 -+ char *p, linebuf[LINEBUF_SIZE+1]; /* +1 for null termination */ -+ int i, noff, off = 0; -+ -+ do { -+ if (off == 0) -+ p = linebuf + (int)sprintf (linebuf, "%s: %s ", prefix, tag); -+ else -+ { -+ p = linebuf + (int)sprintf (linebuf, "%s: ", prefix); -+ for (i = 0; tag[i] != '\0'; i++) -+ *p++ = ' '; -+ } -+ -+ noff = ep_sprintf_bitmap (p, &linebuf[LINEBUF_SIZE-1]-p, bitmap, base, off, nbits); -+ -+ printk ("%s\n", linebuf); -+ -+ } while ((off = noff) != -1); -+ -+#undef LINEBUF_SIZE -+} -+ -+void -+ep_debugf (long mode, char *fmt, ...) -+{ -+ va_list ap; -+ char prefix[32]; -+ -+ va_start (ap, fmt); -+#if defined(LINUX) -+ sprintf (prefix, "[%08d.%04d] ", (int) lbolt, current->pid); -+#else -+ sprintf (prefix, "[%08d.----] ", (int) lbolt); -+#endif -+ qsnet_vdebugf ((mode & epdebug_console ? QSNET_DEBUG_CONSOLE: 0) | QSNET_DEBUG_BUFFER, prefix, fmt, ap); -+ va_end (ap); -+} -+ -+int -+ep_assfail (EP_RAIL *rail, const char *ex, const char *func, const char *file, const int line) -+{ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ printk (KERN_EMERG "ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ if (panicstr) -+ return (0); -+ -+ if (assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (assfail_mode & 2) -+ panic ("ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ if (assfail_mode & 4) -+ epdebug = 0; -+ -+ return 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/debug_elan4.c linux-2.6.9/drivers/net/qsnet/ep/debug_elan4.c ---- clean/drivers/net/qsnet/ep/debug_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/debug_elan4.c 2004-05-19 06:21:04.000000000 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug_elan4.c,v 1.1 2004/05/19 10:21:04 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/debug_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "conf_linux.h" -+#include "debug.h" -+ -+static void -+ep4_display_ecqs (EP4_RAIL *rail) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ ep_debugf (DBG_DEBUG, "ECQ: type %d: avail %d cqnum %d\n", i, ecq->ecq_avail, elan4_cq2num (ecq->ecq_cq)); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+} -+ -+void -+ep4_debug_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP_SYS *sys = rail->r_generic.System; -+ -+ ep_debugf (DBG_DEBUG, "ep%d: is elan4 %d rev %c\n", rail->r_generic.Number, -+ rail->r_generic.Devinfo.dev_instance, 'a' + rail->r_generic.Devinfo.dev_revision_id); -+ -+ ep4_display_ecqs (rail); -+ -+ ep_display_alloc (&sys->Allocator); -+ ep_display_rmap (sys->Allocator.ResourceMap); -+ -+ ep_display_alloc (&rail->r_generic.ElanAllocator); -+ ep_display_alloc (&rail->r_generic.MainAllocator); -+ -+ ep_display_rmap (rail->r_generic.ElanAllocator.ResourceMap); -+} -+ -diff -urN clean/drivers/net/qsnet/ep/debug.h linux-2.6.9/drivers/net/qsnet/ep/debug.h ---- clean/drivers/net/qsnet/ep/debug.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/debug.h 2005-04-05 12:36:28.000000000 -0400 -@@ -0,0 +1,111 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_EPDEBUG_H -+#define _ELAN3_EPDEBUG_H -+ -+#ident "$Id: debug.h,v 1.21 2005/04/05 16:36:28 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/debug.h,v $ */ -+ -+extern unsigned int epdebug; -+extern unsigned int epdebug_console; -+extern unsigned int epdebug_cmlevel; -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern unsigned int epdebug_check_sum; -+#endif -+#define DBG_CONFIG 0x00000001 /* Module configuration */ -+#define DBG_PROBE 0x00000002 -+#define DBG_ROUTETABLE 0x00000004 -+#define DBG_STATEMAP 0x00000008 -+ -+#define DBG_CM 0x00000020 -+#define DBG_XMTR 0x00000040 -+#define DBG_RCVR 0x00000080 -+#define DBG_FORWARD 0x00000100 -+#define DBG_DISCON 0x00000200 -+#define DBG_EPTRAP 0x00000400 -+#define DBG_COMMAND 0x00000800 -+#define DBG_RETRY 0x00001000 -+#define DBG_DEBUG 0x00002000 -+#define DBG_NETWORK_ERROR 0x00004000 -+#define DBG_MSGSYS 0x00008000 -+#define DBG_MANAGER 0x00010000 -+#define DBG_KMAP 0x00020000 -+#define DBG_FAILOVER 0x00040000 -+#define DBG_MAPNMD 0x00080000 -+#define DBG_KMSG 0x00100000 -+#define DBG_SVC 0x00200000 -+#define DBG_STABILISE 0x00400000 -+ -+#if defined(DEBUG_PRINTF) -+ -+# define EPRINTF0(m,fmt) ((epdebug&(m)) ? ep_debugf(m,fmt) : (void)0) -+# define EPRINTF1(m,fmt,a) ((epdebug&(m)) ? ep_debugf(m,fmt,a) : (void)0) -+# define EPRINTF2(m,fmt,a,b) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b) : (void)0) -+# define EPRINTF3(m,fmt,a,b,c) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c) : (void)0) -+# define EPRINTF4(m,fmt,a,b,c,d) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d) : (void)0) -+# define EPRINTF5(m,fmt,a,b,c,d,e) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e) : (void)0) -+# define EPRINTF6(m,fmt,a,b,c,d,e,f) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f) : (void)0) -+# define EPRINTF7(m,fmt,a,b,c,d,e,f,g) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g) : (void)0) -+# define EPRINTF8(m,fmt,a,b,c,d,e,f,g,h) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h,i) : (void)0) -+# define EPRINTF10(m,fmt,a,b,c,d,e,f,g,h,i,j) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h,i,j) : (void)0) -+ -+# define CPRINTF0(lvl,fmt) (((lvl) <= epdebug_cmlevel) ? EPRINTF0(DBG_CM,fmt) : (void)0) -+# define CPRINTF1(lvl,fmt,a) (((lvl) <= epdebug_cmlevel) ? EPRINTF1(DBG_CM,fmt,a) : (void)0) -+# define CPRINTF2(lvl,fmt,a,b) (((lvl) <= epdebug_cmlevel) ? EPRINTF2(DBG_CM,fmt,a,b) : (void)0) -+# define CPRINTF3(lvl,fmt,a,b,c) (((lvl) <= epdebug_cmlevel) ? EPRINTF3(DBG_CM,fmt,a,b,c) : (void)0) -+# define CPRINTF4(lvl,fmt,a,b,c,d) (((lvl) <= epdebug_cmlevel) ? EPRINTF4(DBG_CM,fmt,a,b,c,d) : (void)0) -+# define CPRINTF5(lvl,fmt,a,b,c,d,e) (((lvl) <= epdebug_cmlevel) ? EPRINTF5(DBG_CM,fmt,a,b,c,d,e) : (void)0) -+# define CPRINTF6(lvl,fmt,a,b,c,d,e,f) (((lvl) <= epdebug_cmlevel) ? EPRINTF6(DBG_CM,fmt,a,b,c,d,e,f) : (void)0) -+# define CPRINTF7(lvl,fmt,a,b,c,d,e,f,g) (((lvl) <= epdebug_cmlevel) ? EPRINTF7(DBG_CM,fmt,a,b,c,d,e,f,g) : (void)0) -+# define CPRINTF8(lvl,fmt,a,b,c,d,e,f,g,h) (((lvl) <= epdebug_cmlevel) ? EPRINTF8(DBG_CM,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define CPRINTF9(lvl,fmt,a,b,c,d,e,f,g,h,i) (((lvl) <= epdebug_cmlevel) ? EPRINTF9(DBG_CM,fmt,a,b,c,d,e,f,g,h,i) : (void)0) -+ -+#if defined __GNUC__ -+extern void ep_debugf (long mode, char *fmt, ...) __attribute__ ((format (printf,2,3))); -+#else -+extern void ep_debugf (long mode, char *fmt, ...); -+#endif -+ -+#else -+ -+# define EPRINTF0(m,fmt) (0) -+# define EPRINTF1(m,fmt,a) (0) -+# define EPRINTF2(m,fmt,a,b) (0) -+# define EPRINTF3(m,fmt,a,b,c) (0) -+# define EPRINTF4(m,fmt,a,b,c,d) (0) -+# define EPRINTF5(m,fmt,a,b,c,d,e) (0) -+# define EPRINTF6(m,fmt,a,b,c,d,e,f) (0) -+# define EPRINTF7(m,fmt,a,b,c,d,e,f,g) (0) -+# define EPRINTF8(m,fmt,a,b,c,d,e,f,g,h) (0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i) (0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i,j) (0) -+ -+# define CPRINTF0(lvl,fmt) (0) -+# define CPRINTF1(lvl,fmt,a) (0) -+# define CPRINTF2(lvl,fmt,a,b) (0) -+# define CPRINTF3(lvl,fmt,a,b,c) (0) -+# define CPRINTF4(lvl,fmt,a,b,c,d) (0) -+# define CPRINTF5(lvl,fmt,a,b,c,d,e) (0) -+# define CPRINTF6(lvl,fmt,a,b,c,d,e,f) (0) -+# define CPRINTF7(lvl,fmt,a,b,c,d,e,f,g) (0) -+# define CPRINTF8(lvl,fmt,a,b,c,d,e,f,g,h) (0) -+# define CPRINTF9(lvl,fmt,a,b,c,d,e,f,g,h,i) (0) -+ -+#endif /* DEBUG */ -+ -+extern DisplayInfo di_ep_debug; -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN3_EPDEBUG_H */ -+ -diff -urN clean/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S linux-2.6.9/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S ---- clean/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S 2004-04-25 07:25:43.000000000 -0400 -@@ -0,0 +1,133 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_asm_elan4_thread.S,v 1.5 2004/04/25 11:25:43 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_asm_elan4_thread.S,v $*/ -+ -+#include -+#include -+ -+#include "assym_elan4.h" -+ -+/* XXXXX - registers.h */ -+#define E4_MAIN_INT_SHIFT 14 -+ -+/* -+ * c_waitevent_interrupt (E4_uint64 *commandport, E4_Event *event, E4_uint64 count, E4_uint64 intcookie) -+ */ -+ .global c_waitevent_interrupt -+c_waitevent_interrupt: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ st32 %r16, [%sp] // event source block -+ mov MAKE_EXT_CLEAN_CMD, %r23 -+ st8 %r23, [%sp+56] // event source block -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r23,%r23 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ or %r9, WAIT_EVENT_CMD, %r16 ! WAIT_EVENT_CMD | event -+ sll8 %r10, 32, %r17 -+ or %r17, E4_EVENT_TYPE_VALUE(E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), %r17 ! ev_CountAndType -+ mov %sp, %r18 ! ev_Source -+ mov %r8, %r19 ! ev_Dest -+ sll8 %r11, E4_MAIN_INT_SHIFT, %r20 -+ or %r20, INTERRUPT_CMD, %r20 ! INTERRUPT_CMD | (cookie << E4_MAIN_INT_SHIFT) -+ mov NOP_CMD, %r21 -+ mov NOP_CMD, %r22 -+ mov NOP_CMD, %r23 -+ -+ st64suspend %r16, [%r8] -+ -+3: ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r2+8, %r0 // and return -+ add %sp, 192, %sp -+ -+ -+#define EP4_RCVR_PENDING_STALLED 1 /* indicates thread has stalled for no descriptor (rcvr_pending_head) */ -+ -+#define RXD_DEBUG(VAL,RXD,TMP) \ -+ mov VAL, TMP; \ -+ st8 TMP, [RXD + EP4_RXD_DEBUG] -+ -+ -+ /* -+ * %r2 - rcvr elan -+ * %r3 - rxd elan -+ */ -+ .global c_queue_rxd -+c_queue_rxd: -+ RXD_DEBUG(1, %r3, %r23) -+ -+ ld16 [%r2 + EP4_RCVR_PENDING_TAILP], %r18 /* r18 == tailp, r19 = head */ -+ add %r3, EP4_RXD_NEXT, %r4 -+ -+ st8 %r0, [%r3 + EP4_RXD_NEXT] /* rxd->rxd_next = NULL */ -+ st8 %r4, [%r2 + EP4_RCVR_PENDING_TAILP] /* tailp = &rxd->rxd_next */ -+ st8 %r3, [%r18] /* *tailp = rxd */ -+ -+ cmp %r19, EP4_RCVR_PENDING_STALLED /* thread stalled ? */ -+ beq 1f -+ mov %r18, %r16 /* must have used %r16, %r19, %r23 */ -+ mov %r3, %r23 -+ -+ RXD_DEBUG(2, %r3, %r23) -+ -+ st8suspend %r16, [%r3 + EP4_RXD_QUEUED] /* no - mark as queued - all done */ -+ -+1: st8 %r16, [%r3 + EP4_RXD_QUEUED] /* mark as queued */ -+ -+ RXD_DEBUG(3, %r3, %r23) -+ -+ mov %r3, %r8 /* return rxd from c_stall_thread */ -+ ba .epcomms_resume_thread /* resume the thread */ -+ ld64 [%r2 + EP4_RCVR_THREAD_STALL], %r0 -+ -+ /* -+ * c_stall_thread (EP4_RCVR_ELAN *rcvrElan) -+ */ -+ .global c_stall_thread -+c_stall_thread: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov EP4_RCVR_PENDING_STALLED, %r9 // Mark rcvr as stalled -+ st8 %r9, [%r8 + EP4_RCVR_PENDING_HEAD] -+ -+ // XXXX _ TBD should generate interrupt -+ -+ mov %r1, %r17 // SP -+ mov %r7, %r23 // return pc -+ -+ st64suspend %r16, [%r8 + EP4_RCVR_THREAD_STALL] -+ -+.epcomms_resume_thread: -+ /* %r8 == rxdElan */ -+ -+ ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r7+8, %r0 // and return -+ add %sp, 192, %sp -+ -diff -urN clean/drivers/net/qsnet/ep/epcomms.c linux-2.6.9/drivers/net/qsnet/ep/epcomms.c ---- clean/drivers/net/qsnet/ep/epcomms.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms.c 2004-11-30 07:02:06.000000000 -0500 -@@ -0,0 +1,484 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms.c,v 1.77 2004/11/30 12:02:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms.c,v $ */ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include "cm.h" -+#include "debug.h" -+ -+static void -+ep_comms_thread (void *arg) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) arg; -+ struct list_head *el; -+ -+ kernel_thread_init ("ep_comms"); -+ -+ /* since ep_alloc_xmtr() has incremented the module use count, -+ * we would be preventing the module from being unloaded, so -+ * we decrement the use count since this thread must terminate -+ * during unload of the module. -+ */ -+ ep_mod_dec_usecount(); -+ -+ for (;;) -+ { -+ long nextRunTime = 0; -+ -+ /* NOTE - subsys->Lock serializes us against flush/relocations -+ * caused by rail nodeset transitions. -+ */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ nextRunTime = ep_check_xmtr (list_entry (el, EP_XMTR, Link), nextRunTime); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ nextRunTime = ep_check_rcvr (list_entry (el, EP_RCVR, Link), nextRunTime); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ep_csum_rxds (subsys); -+#endif -+ nextRunTime = ep_forward_rxds (subsys, nextRunTime); -+ -+ if (ep_kthread_sleep (&subsys->Thread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_mod_inc_usecount(); -+ -+ ep_kthread_stopped (&subsys->Thread); -+ kernel_thread_exit(); -+} -+ -+int -+ep_comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ EP_COMMS_RAIL *commsRail; -+ struct list_head *el; -+ -+ printk ("%s: vendorid=%x deviceid=%x\n", rail->Name, rail->Devinfo.dev_vendor_id, rail->Devinfo.dev_device_id); -+ -+ switch (rail->Devinfo.dev_device_id) -+ { -+#if defined(CONFIG_ELAN3) || defined(CONFIG_ELAN3_MODULE) -+ case PCI_DEVICE_ID_ELAN3: -+ commsRail = ep3comms_add_rail (s, sys, rail); -+ break; -+#endif -+#if defined(CONFIG_ELAN4) || defined(CONFIG_ELAN4_MODULE) -+ case PCI_DEVICE_ID_ELAN4: -+ commsRail = ep4comms_add_rail (s, sys, rail); -+ break; -+#endif -+ default: -+ return 0; -+ } -+ -+ if (commsRail == NULL) -+ return 1; -+ -+ commsRail->Rail = rail; -+ commsRail->Subsys = subsys; -+ -+ kmutex_lock (&subsys->Lock); -+ list_add_tail (&commsRail->Link, &subsys->Rails); -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.AddRail) (rcvr, commsRail); -+ } -+ -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ EP_RAIL_OP (commsRail, Xmtr.AddRail) (xmtr, commsRail); -+ } -+ -+ kmutex_unlock (&subsys->Lock); -+ -+ return 0; -+} -+ -+void -+ep_comms_del_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ EP_COMMS_RAIL *commsRail = NULL; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ /* find out rail entry and remove from system list */ -+ list_for_each (el, &subsys->Rails) { -+ if ((commsRail = list_entry (el, EP_COMMS_RAIL, Link))->Rail == rail) -+ break; -+ } -+ -+ list_del (&commsRail->Link); -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ EP_RAIL_OP(commsRail, Rcvr.DelRail) (rcvr, commsRail); -+ } -+ -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ EP_RAIL_OP(commsRail,Xmtr.DelRail) (xmtr, commsRail); -+ } -+ -+ kmutex_unlock (&subsys->Lock); -+ -+ EP_RAIL_OP (commsRail, DelRail) (commsRail); -+} -+ -+void -+ep_comms_fini (EP_SUBSYS *s, EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ -+ ep_kthread_stop (&subsys->Thread); -+ ep_kthread_destroy (&subsys->Thread); -+ -+ if (subsys->ForwardXmtr) -+ ep_free_xmtr (subsys->ForwardXmtr); -+ -+ spin_lock_destroy (&subsys->ForwardDescLock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ spin_lock_destroy (&subsys->CheckSumDescLock); -+#endif -+ -+ kmutex_destroy (&subsys->Lock); -+ -+ KMEM_FREE (subsys, sizeof (EP_COMMS_SUBSYS)); -+} -+ -+int -+ep_comms_init (EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ -+ KMEM_ZALLOC (subsys, EP_COMMS_SUBSYS *, sizeof (EP_COMMS_SUBSYS), 1); -+ -+ if (subsys == NULL) -+ return (ENOMEM); -+ -+ INIT_LIST_HEAD (&subsys->Rails); -+ INIT_LIST_HEAD (&subsys->Receivers); -+ INIT_LIST_HEAD (&subsys->Transmitters); -+ INIT_LIST_HEAD (&subsys->ForwardDescList); -+ -+ kmutex_init (&subsys->Lock); -+ spin_lock_init (&subsys->ForwardDescLock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&subsys->CheckSumDescList); -+ spin_lock_init (&subsys->CheckSumDescLock); -+#endif -+ -+ subsys->Subsys.Sys = sys; -+ subsys->Subsys.Name = "epcomms"; -+ subsys->Subsys.Destroy = ep_comms_fini; -+ subsys->Subsys.AddRail = ep_comms_add_rail; -+ subsys->Subsys.RemoveRail = ep_comms_del_rail; -+ -+ ep_subsys_add (sys, &subsys->Subsys); -+ ep_kthread_init (&subsys->Thread); -+ -+ if ((subsys->ForwardXmtr = ep_alloc_xmtr (subsys->Subsys.Sys)) == NULL) -+ goto failed; -+ -+ if (kernel_thread_create (ep_comms_thread, subsys) == NULL) -+ goto failed; -+ ep_kthread_started (&subsys->Thread); -+ -+ return (0); -+ -+ failed: -+ ep_subsys_del (sys, &subsys->Subsys); -+ ep_comms_fini (&subsys->Subsys, sys); -+ -+ return (ENOMEM); -+} -+ -+void -+ep_comms_display (EP_SYS *sys, char *how) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME); -+ struct list_head *el; -+ -+ if (how == NULL || !strncmp (how, "rail", 4)) -+ { -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(commsRail, DisplayRail) (commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ } -+ -+ if (how == NULL || !strncmp (how, "xmtr", 4)) -+ list_for_each (el, &subsys->Transmitters) -+ ep_display_xmtr (&di_ep_debug, list_entry (el, EP_XMTR, Link)); -+ -+ if (how == NULL || !strncmp (how, "rcvr", 4)) -+ list_for_each (el, &subsys->Receivers) -+ ep_display_rcvr (&di_ep_debug, list_entry (el, EP_RCVR, Link), (how && how[4] == ',') ? 1 : 0); -+} -+ -+int -+ep_svc_indicator_set (EP_SYS *epsys, int svc_indicator) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_set: %d \n",svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator > EP_SVC_NUM_INDICATORS) -+ return (EP_EINVAL); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_set: ep_subsys_find failed\n"); -+ return (EP_EINVAL); -+ } -+ -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ cm_svc_indicator_set(commsRail->Rail, svc_indicator); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_set: %d success\n",svc_indicator); -+ return (EP_SUCCESS); -+} -+ -+int -+ep_svc_indicator_clear (EP_SYS *epsys, int svc_indicator) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_clear: %d \n",svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (EP_EINVAL); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_clear: ep_subsys_find failed\n"); -+ return (EP_EINVAL); -+ } -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ cm_svc_indicator_clear(commsRail->Rail, svc_indicator); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_clear: %d success\n",svc_indicator); -+ return (EP_SUCCESS); -+} -+ -+int -+ep_svc_indicator_is_set (EP_SYS *epsys, int svc_indicator, int nodeId) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ int set = 0; -+ -+ EPRINTF2 (DBG_SVC,"ep_svc_indicator_is_set: svc %d node %d \n", svc_indicator, nodeId); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_is_set: ep_subsys_find failed\n"); -+ return (0); -+ } -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ set |= cm_svc_indicator_is_set(commsRail->Rail, svc_indicator, nodeId); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF3 (DBG_SVC,"ep_svc_indicator_is_set: svc %d node %d returning %d\n", svc_indicator, nodeId, set); -+ return set; -+} -+ -+int -+ep_svc_indicator_bitmap (EP_SYS *epsys, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_bitmap: svc %d\n", svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (-1); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_bitmap: ep_subsys_find failed\n"); -+ return (-2); -+ } -+ -+ /* clear bitmap */ -+ bt_zero (bitmap, nnodes); -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ /* this will or in each bit map */ -+ cm_svc_indicator_bitmap (commsRail->Rail, svc_indicator, bitmap, low, nnodes); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ return (0); -+} -+ -+int -+ep_xmtr_svc_indicator_bitmap (EP_XMTR *xmtr, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ int i; -+ -+ EPRINTF1 (DBG_SVC,"ep_xmtr_svc_indicator_bitmap: svc %d\n", svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (-1); -+ -+ /* clear bitmap */ -+ bt_zero (bitmap, nnodes); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (xmtr->RailMask & (1 << i) ) -+ { -+ /* this will or in each bit map */ -+ cm_svc_indicator_bitmap (xmtr->Rails[i]->CommsRail->Rail, svc_indicator, bitmap, low, nnodes); -+ } -+ } -+ -+ return (0); -+} -+ -+EP_RAILMASK -+ep_svc_indicator_railmask (EP_SYS *epsys, int svc_indicator, int nodeId) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ EP_RAILMASK rmask=0; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) -+ return (rmask); -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and reading info from Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ if ( cm_svc_indicator_is_set(commsRail->Rail, svc_indicator,nodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ return (rmask); -+} -+ -+EP_RAILMASK -+ep_xmtr_svc_indicator_railmask (EP_XMTR *xmtr, int svc_indicator, int nodeId) -+{ -+ EP_RAILMASK rmask=0; -+ EP_COMMS_RAIL *commsRail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (xmtr->RailMask & (1 << i) ) -+ { -+ commsRail = xmtr->Rails[i]->CommsRail; -+ -+ if ( cm_svc_indicator_is_set(commsRail->Rail, svc_indicator,nodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ } -+ -+ EPRINTF3 (DBG_SVC, "ep_xmtr_svc_indicator_railmask: svc %d node %d mask 0x%x\n", svc_indicator, nodeId, rmask); -+ -+ return (rmask); -+} -+ -+EP_RAILMASK -+ep_rcvr_railmask (EP_SYS *epsys, EP_SERVICE service) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_RAILMASK rmask=0; -+ struct list_head *el; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) -+ return (rmask); -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Service == service) -+ rmask |= rcvr->RailMask; -+ } -+ kmutex_unlock(&subsys->Lock); -+ -+ return (rmask); -+} -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+uint32_t -+ep_calc_check_sum (EP_SYS *sys, EP_ENVELOPE *env, EP_NMD *nmd, int nFrags) -+{ -+ EP_NMH *nmh; -+ int i; -+ uint16_t check_data = 0; -+ uint16_t check_env = 0; -+ -+ for (i = 0; i < nFrags; i++) { -+ /* find the nmh for this frag */ -+ nmh = ep_nmh_find (&sys->MappingTable, &nmd[i]); -+ -+ ASSERT( nmh != NULL); -+ -+ /* add the next frag to the check sum */ -+ check_data = nmh->nmh_ops->op_calc_check_sum (sys, nmh, &nmd[i], check_data); -+ } -+ -+ check_env = rolling_check_sum ((char *) env, offsetof(EP_ENVELOPE, CheckSum), 0); -+ -+ return (EP_ENVELOPE_CHECK_SUM | ( (check_env & 0x7FFF) << 16) | (check_data & 0xFFFF)); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcomms_elan3.c linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan3.c ---- clean/drivers/net/qsnet/ep/epcomms_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan3.c 2004-08-03 07:34:34.000000000 -0400 -@@ -0,0 +1,191 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan3.c,v 1.60 2004/08/03 11:34:34 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+void -+ep3comms_flush_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_flush_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_flush_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep3comms_failover_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_failover_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_failover_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep3comms_disconnect_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_disconnect_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_disconnect_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+EP_COMMS_RAIL * -+ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_COMMS_RAIL *commsRail; -+ EP3_InputQueue qdesc; -+ int i; -+ -+ KMEM_ZALLOC (commsRail, EP3_COMMS_RAIL *, sizeof (EP3_COMMS_RAIL), TRUE); -+ -+ if (commsRail == NULL) -+ return NULL; -+ -+ commsRail->Generic.Ops.DelRail = ep3comms_del_rail; -+ commsRail->Generic.Ops.DisplayRail = ep3comms_display_rail; -+ commsRail->Generic.Ops.Rcvr.AddRail = ep3rcvr_add_rail; -+ commsRail->Generic.Ops.Rcvr.DelRail = ep3rcvr_del_rail; -+ commsRail->Generic.Ops.Rcvr.Check = ep3rcvr_check; -+ commsRail->Generic.Ops.Rcvr.QueueRxd = ep3rcvr_queue_rxd; -+ commsRail->Generic.Ops.Rcvr.RpcPut = ep3rcvr_rpc_put; -+ commsRail->Generic.Ops.Rcvr.RpcGet = ep3rcvr_rpc_get; -+ commsRail->Generic.Ops.Rcvr.RpcComplete = ep3rcvr_rpc_complete; -+ -+ commsRail->Generic.Ops.Rcvr.StealRxd = ep3rcvr_steal_rxd; -+ -+ commsRail->Generic.Ops.Rcvr.FillOutRailStats = ep3rcvr_fillout_rail_stats; -+ -+ commsRail->Generic.Ops.Rcvr.DisplayRcvr = ep3rcvr_display_rcvr; -+ commsRail->Generic.Ops.Rcvr.DisplayRxd = ep3rcvr_display_rxd; -+ -+ commsRail->Generic.Ops.Xmtr.AddRail = ep3xmtr_add_rail; -+ commsRail->Generic.Ops.Xmtr.DelRail = ep3xmtr_del_rail; -+ commsRail->Generic.Ops.Xmtr.Check = ep3xmtr_check; -+ commsRail->Generic.Ops.Xmtr.BindTxd = ep3xmtr_bind_txd; -+ commsRail->Generic.Ops.Xmtr.UnbindTxd = ep3xmtr_unbind_txd; -+ commsRail->Generic.Ops.Xmtr.PollTxd = ep3xmtr_poll_txd; -+ commsRail->Generic.Ops.Xmtr.CheckTxdState = ep3xmtr_check_txd_state; -+ -+ commsRail->Generic.Ops.Xmtr.DisplayXmtr = ep3xmtr_display_xmtr; -+ commsRail->Generic.Ops.Xmtr.DisplayTxd = ep3xmtr_display_txd; -+ -+ commsRail->Generic.Ops.Xmtr.FillOutRailStats = ep3xmtr_fillout_rail_stats; -+ -+ /* Allocate the input queues at their fixed elan address */ -+ if (! (commsRail->QueueDescs = ep_alloc_memory_elan (r, EP_EPCOMMS_QUEUE_BASE, roundup (EP_MSG_NSVC * sizeof (EP3_InputQueue), PAGESIZE), EP_PERM_ALL, 0))) -+ { -+ KMEM_FREE (commsRail, sizeof (EP3_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_base = 0; -+ qdesc.q_top = 0; -+ qdesc.q_fptr = 0; -+ qdesc.q_bptr = 0; -+ qdesc.q_size = 0; -+ qdesc.q_event.ev_Count = 0; -+ qdesc.q_event.ev_Type = 0; -+ -+ /* Initialise all queue entries to be full */ -+ for (i = 0; i < EP_MSG_NSVC; i++) -+ elan3_sdram_copyl_to_sdram (dev, &qdesc, commsRail->QueueDescs + (i * sizeof (EP3_InputQueue)), sizeof (EP3_InputQueue)); -+ -+ ep_register_callback (r, EP_CB_FLUSH_FILTERING, ep3comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FLUSH_FLUSHING, ep3comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FAILOVER, ep3comms_failover_callback, commsRail); -+ ep_register_callback (r, EP_CB_DISCONNECTING, ep3comms_disconnect_callback, commsRail); -+ -+ return (EP_COMMS_RAIL *) commsRail; -+} -+ -+void -+ep3comms_del_rail (EP_COMMS_RAIL *r) -+{ -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) r; -+ EP_RAIL *rail = commsRail->Generic.Rail; -+ -+ ep_remove_callback (rail, EP_CB_FLUSH_FILTERING, ep3comms_flush_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_FLUSH_FLUSHING, ep3comms_flush_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_FAILOVER, ep3comms_failover_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_DISCONNECTING, ep3comms_disconnect_callback, commsRail); -+ -+ ep_free_memory_elan (rail, EP_EPCOMMS_QUEUE_BASE); -+ -+ KMEM_FREE (commsRail, sizeof (EP3_COMMS_RAIL)); -+} -+ -+void -+ep3comms_display_rail (EP_COMMS_RAIL *r) -+{ -+ -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcomms_elan3.h linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan3.h ---- clean/drivers/net/qsnet/ep/epcomms_elan3.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan3.h 2004-11-12 05:55:03.000000000 -0500 -@@ -0,0 +1,330 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EPCOMMS_ELAN3_H -+#define __EPCOMMS_ELAN3_H -+ -+#ident "@(#)$Id: epcomms_elan3.h,v 1.28 2004/11/12 10:55:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3.h,v $ */ -+ -+#define EP3_DMAFAILCOUNT 3 -+ -+ -+/* Main/Elan spinlock */ -+typedef struct ep3_spinlock_elan -+{ -+ volatile E3_uint32 sl_lock; /* main wants a lock */ -+ volatile E3_uint32 sl_seq; /* thread owns this word */ -+ /* NOTE: The lock/seq words must be within the same 32-byte Elan cache-line */ -+ E3_uint64 sl_pad[14]; /* pad to 64-bytes */ -+} EP3_SPINLOCK_ELAN; -+ -+/* Declare this as a main memory cache block for efficiency */ -+typedef struct ep3_spinlock_main { -+ volatile E3_uint32 sl_seq; /* copy of seq number updated by Elan */ -+ volatile E3_uint32 sl_pad[15]; /* pad to 64-bytes */ -+} EP3_SPINLOCK_MAIN; -+ -+#if defined (__ELAN3__) -+ -+extern void ep3_spinblock (EP3_SPINLOCK_ELAN *, EP3_SPINLOCK_MAIN *); -+ -+#define EP3_SPINENTER(SLE,SL) \ -+do {\ -+ (SLE)->sl_seq++; \ -+ if ((SLE)->sl_lock) \ -+ ep3_spinblock(SLE, SL);\ -+} while (0) -+ -+#define EP3_SPINEXIT(SLE,SL) \ -+do {\ -+ (SL)->sl_seq = (SLE)->sl_seq;\ -+} while (0) -+ -+#else -+ -+#define EP3_SPINENTER(DEV,SLE,SL) do { \ -+ E3_uint32 seq; \ -+\ -+ mb();\ -+ elan3_sdram_writel (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 1);\ -+ mb();\ -+ seq = elan3_sdram_readl (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_seq));\ -+ while (seq != (SL)->sl_seq)\ -+ {\ -+ while ((SL)->sl_seq == (seq - 1))\ -+ {\ -+ mb();\ -+\ -+ DELAY (1); \ -+ }\ -+ seq = elan3_sdram_readl (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_seq));\ -+ }\ -+} while (0) -+ -+#define EP3_SPINEXIT(DEV,SLE,SL) do { \ -+ wmb(); \ -+ elan3_sdram_writel (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 0);\ -+ mmiob(); \ -+} while (0) -+ -+#endif /* ! __ELAN3__ */ -+ -+/* per-rail elan memory portion receive descriptor */ -+typedef struct ep3_rxd_rail_elan -+{ -+ E3_DMA Dmas[EP_MAXFRAG+1]; /* Dma's for fetching data/putting data & status blk */ -+ E3_Event ChainEvent[EP_MAXFRAG]; /* Events to chain dmas */ -+ E3_BlockCopyEvent DataEvent; /* message received block event */ -+ E3_BlockCopyEvent DoneEvent; /* RPC status block event */ -+ -+ EP_NMD Data; /* Network mapping handle for receive data */ -+ -+ E3_Addr RxdMain; /* pointer to main memory portion */ -+ -+ E3_Addr Next; /* linked list when on pending list (elan address) */ -+ -+ E3_uint64 MainAddr; /* kernel address of ep_rxd_main */ -+} EP3_RXD_RAIL_ELAN; -+ -+#define EP3_RXD_RAIL_ELAN_SIZE roundup (sizeof (EP3_RXD_RAIL_ELAN), E3_DMA_ALIGN) -+ -+/* per-rail main memory portion of receive descriptor */ -+typedef struct ep3_rxd_rail_main -+{ -+ E3_uint32 DataEvent; /* dest for done event */ -+ E3_uint32 DoneEvent; /* dest for done event */ -+} EP3_RXD_RAIL_MAIN; -+ -+#define EP3_RXD_RAIL_MAIN_SIZE roundup (sizeof(EP3_RXD_RAIL_MAIN), sizeof (E3_uint32)) -+ -+#if !defined(__ELAN3__) -+/* Kernel memory portion of per-rail receive descriptor */ -+typedef struct ep3_rxd_rail -+{ -+ EP_RXD_RAIL Generic; /* generic rxd rail */ -+ -+ EP3_COOKIE DataCookie; /* Event cookie */ -+ EP3_COOKIE DoneCookie; /* Event cookie */ -+ EP3_COOKIE ChainCookie[EP_MAXFRAG]; /* Event cookie */ -+ -+ sdramaddr_t RxdElan; /* per-rail elan receive descriptor */ -+ E3_Addr RxdElanAddr; /* and elan address */ -+ -+ EP3_RXD_RAIL_MAIN *RxdMain; /* per-rail main receive descriptor */ -+ E3_Addr RxdMainAddr; /* and elan address */ -+ -+ EP_BACKOFF Backoff; /* dma backoff */ -+} EP3_RXD_RAIL; -+ -+#define EP3_NUM_RXD_PER_BLOCK 16 -+ -+typedef struct ep3_rxd_rail_block -+{ -+ struct list_head Link; -+ -+ EP3_RXD_RAIL Rxd[EP3_NUM_RXD_PER_BLOCK]; -+} EP3_RXD_RAIL_BLOCK; -+ -+#endif /* ! __ELAN3__ */ -+ -+typedef struct ep3_rcvr_rail_elan /* Elan memory service structure */ -+{ -+ EP3_SPINLOCK_ELAN ThreadLock; /* elan memory portion of spin lock */ -+ EP3_SPINLOCK_ELAN PendingLock; /* spin lock for pending rx list */ -+ -+ E3_Addr PendingDescs; /* list of pending receive descriptors */ -+ E3_uint32 ThreadShouldHalt; /* marks that the thread should halt */ -+ -+ E3_uint64 MainAddr; /* kernel address of ep_rcvr (for StallThreadForNoDescs)*/ -+} EP3_RCVR_RAIL_ELAN; -+ -+typedef struct ep3_rcvr_rail_main /* Main memory service strucure */ -+{ -+ EP3_SPINLOCK_MAIN ThreadLock; /* main memory portion of spin lock */ -+ EP3_SPINLOCK_MAIN PendingLock; /* spinlock for pending rx list */ -+ -+ volatile unsigned PendingDescsTailp; /* next pointer of last receive descriptor on pending list */ -+} EP3_RCVR_RAIL_MAIN; -+ -+#if !defined(__ELAN3__) -+ -+typedef struct ep3_rcvr_rail_stats -+{ -+ unsigned long some_stat; -+} EP3_RCVR_RAIL_STATS; -+ -+typedef struct ep3_rcvr_rail -+{ -+ EP_RCVR_RAIL Generic; /* generic portion */ -+ -+ EP3_RCVR_RAIL_MAIN *RcvrMain; -+ E3_Addr RcvrMainAddr; -+ sdramaddr_t RcvrElan; -+ E3_Addr RcvrElanAddr; -+ -+ sdramaddr_t InputQueueBase; /* base of receive queue */ -+ E3_Addr InputQueueAddr; /* elan address of receive queue */ -+ -+ E3_Addr ThreadStack; /* Thread processor stack */ -+ E3_Addr ThreadWaiting; /* Elan thread is waiting as no receive descriptors pending (sp stored here ) */ -+ E3_Addr ThreadHalted; /* Elan thread is waiting as it was requested to halt */ -+ -+ struct list_head FreeDescList; /* freelist of per-rail receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; /* total number created */ -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ struct list_head DescBlockList; /* list of receive descriptor blocks */ -+ -+ unsigned int FreeDescWaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t FreeDescSleep; /* and sleep here */ -+ -+ unsigned int CleanupWaiting; /* waiting for cleanup */ -+ kcondvar_t CleanupSleep; /* and sleep here */ -+ -+ EP3_RCVR_RAIL_STATS stats; /* elan3 specific rcvr_rail stats */ -+} EP3_RCVR_RAIL; -+ -+#endif /* ! __ELAN3__ */ -+ -+/* per-rail portion of transmit descriptor */ -+typedef struct ep3_txd_rail_elan -+{ -+ EP_ENVELOPE Envelope; /* message envelope */ -+ EP_PAYLOAD Payload; /* message payload */ -+ -+ E3_BlockCopyEvent EnveEvent; /* envelope event */ -+ E3_BlockCopyEvent DataEvent; /* data transfer event */ -+ E3_BlockCopyEvent DoneEvent; /* rpc done event */ -+} EP3_TXD_RAIL_ELAN; -+ -+#define EP3_TXD_RAIL_ELAN_SIZE roundup (sizeof (EP3_TXD_RAIL_ELAN), E3_BLK_ALIGN) -+ -+typedef struct ep3_txd_rail_main -+{ -+ E3_uint32 EnveEvent; /* dest for envelope event */ -+ E3_uint32 DataEvent; /* dest for data transfer event */ -+ E3_uint32 DoneEvent; /* dest for rpc done event */ -+} EP3_TXD_RAIL_MAIN; -+ -+#define EP3_TXD_RAIL_MAIN_SIZE roundup (sizeof(EP3_TXD_RAIL_MAIN), E3_BLK_ALIGN) -+ -+#if !defined(__ELAN3__) -+ -+typedef struct ep3_txd_rail -+{ -+ EP_TXD_RAIL Generic; /* generic txd rail */ -+ -+ EP3_COOKIE EnveCookie; /* Event cookies */ -+ EP3_COOKIE DataCookie; -+ EP3_COOKIE DoneCookie; -+ -+ sdramaddr_t TxdElan; /* Elan TX descriptor */ -+ E3_Addr TxdElanAddr; /* and elan address */ -+ -+ EP3_TXD_RAIL_MAIN *TxdMain; /* Elan Main memory tx descriptor */ -+ E3_Addr TxdMainAddr; /* and elan address */ -+ -+ EP_BACKOFF Backoff; /* dma backoff */ -+} EP3_TXD_RAIL; -+ -+ -+#define EP3_NUM_TXD_PER_BLOCK 16 -+ -+typedef struct ep3_txd_rail_block -+{ -+ struct list_head Link; -+ -+ EP3_TXD_RAIL Txd[EP3_NUM_TXD_PER_BLOCK]; -+} EP3_TXD_RAIL_BLOCK; -+ -+typedef struct ep3_xmtr_rail_stats -+{ -+ unsigned long some_stat; -+} EP3_XMTR_RAIL_STATS; -+ -+typedef struct ep3_xmtr_rail -+{ -+ EP_XMTR_RAIL Generic; /* generic portion */ -+ -+ struct list_head FreeDescList; /* freelist of per-rail receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ struct list_head DescBlockList; /* list of receive descriptor blocks */ -+ -+ unsigned int FreeDescWaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t FreeDescSleep; /* and sleep here */ -+ -+ EP3_XMTR_RAIL_STATS stats; /* elan3 specific xmtr rail stats */ -+} EP3_XMTR_RAIL; -+ -+typedef struct ep3_comms_rail -+{ -+ EP_COMMS_RAIL Generic; /* generic comms rail */ -+ sdramaddr_t QueueDescs; /* input queue descriptors */ -+} EP3_COMMS_RAIL; -+ -+/* epcommxTx_elan3.c */ -+extern void ep3xmtr_flush_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_failover_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_disconnect_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+ -+/* epcommsRx_elan3.c */ -+extern void CompleteEnvelope (EP3_RAIL *rail, E3_Addr rxdMainAddr, E3_uint32 PAckVal); -+extern void StallThreadForNoDescs (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp); -+extern void StallThreadForHalted (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp); -+ -+extern void ep3rcvr_flush_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_failover_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_disconnect_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+ -+/* epcomms_elan3.c */ -+extern EP_COMMS_RAIL *ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r); -+extern void ep3comms_del_rail (EP_COMMS_RAIL *r); -+extern void ep3comms_display_rail (EP_COMMS_RAIL *r); -+ -+/* epcommsTx_elan3.c */ -+extern int ep3xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+extern void ep3xmtr_unbind_txd (EP_TXD *txd, unsigned int phase); -+extern int ep3xmtr_poll_txd (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+extern long ep3xmtr_check (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+extern void ep3xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern void ep3xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern int ep3xmtr_check_txd_state(EP_TXD *txd); -+ -+extern void ep3xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+extern void ep3xmtr_fillout_rail_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+/* epcommsRx_elan3.c */ -+extern int ep3rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep3rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep3rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+extern EP_RXD *ep3rcvr_steal_rxd (EP_RCVR_RAIL *rcvrRail); -+ -+extern long ep3rcvr_check (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+extern void ep3rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+extern void ep3rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+extern void ep3rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+extern void ep3rcvr_fillout_rail_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#endif /* !defined(__ELAN3__) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __EPCOMMS_ELAN3_H */ -diff -urN clean/drivers/net/qsnet/ep/epcomms_elan3_thread.c linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan3_thread.c ---- clean/drivers/net/qsnet/ep/epcomms_elan3_thread.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan3_thread.c 2004-01-20 06:03:15.000000000 -0500 -@@ -0,0 +1,296 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan3_thread.c,v 1.4 2004/01/20 11:03:15 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3_thread.c,v $ */ -+ -+//#include -+ -+typedef char int8_t; -+typedef unsigned char uint8_t; -+typedef short int16_t; -+typedef unsigned short uint16_t; -+typedef int int32_t; -+typedef unsigned int uint32_t; -+typedef long long int64_t; -+typedef unsigned long long uint64_t; -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+#ifndef offsetof -+#define offsetof(s, m) (unsigned long)(&(((s *)0)->m)) -+#endif -+ -+EP3_RAIL_ELAN *rail; -+EP3_RCVR_RAIL_ELAN *r; -+EP3_RCVR_RAIL_MAIN *rm; -+ -+void -+ep3comms_rcvr (EP3_RAIL_ELAN *rail, EP3_RCVR_RAIL_ELAN *rcvrElan, EP3_RCVR_RAIL_MAIN *rcvrMain, -+ EP3_InputQueue *q, unsigned int *cookies) -+{ -+ int count = 1; -+ E3_Addr nfptr = q->q_fptr + q->q_size; -+ E3_uint32 tmp; -+ int i; -+ E3_Addr buffer; -+ int len; -+ E3_DMA *dma; -+ E3_Event *event; -+ -+ /* clear the queue state to allow envelopes to arrive */ -+ q->q_state = 0; -+ -+ for (;;) -+ { -+ if (! rcvrElan->ThreadShouldHalt) -+ c_waitevent ((E3_Event *) &q->q_event, count); /* HALT POINT */ -+ -+ if (rcvrElan->ThreadShouldHalt && nfptr == q->q_bptr) -+ { -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rcvrElan)); -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_THREAD_HALTED)); /* HALT POINT */ -+ continue; -+ } -+ -+ count = 0; -+ do { -+ /* Process the message at nfptr */ -+ EP_ENVELOPE *env = (EP_ENVELOPE *) nfptr; -+ EP3_RXD_RAIL_ELAN *rxd; -+ int ack; -+ -+ EP3_SPINENTER(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); /* HALT POINT */ -+ -+ while ((rxd = (EP3_RXD_RAIL_ELAN *)rcvrElan->PendingDescs) == 0) -+ { -+ /* no receive descriptors, so trap to the kernel to wait -+ * for receive descriptor to be queued, we pass the rcvr -+ * in %g1, so that the trap handler can restart us. */ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rcvrElan)); -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_TRAP_NO_DESCS)); /* HALT POINT */ -+ EP3_SPINENTER(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); /* HALT POINT */ -+ } -+ -+ if (env->Version != EP_ENVELOPE_VERSION) -+ { -+ /* This envelope has been cancelled - so just consume it */ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ goto consume_envelope; -+ } -+ -+ dma = rxd->Dmas; -+ event = rxd->ChainEvent; -+ -+ if (EP_IS_MULTICAST(env->Attr)) -+ { -+ dma->dma_type = E3_DMA_TYPE (DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t); -+ dma->dma_source = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, Bitmap); -+ dma->dma_dest = (E3_Addr) &((EP_RXD_MAIN *) rxd->RxdMain)->Bitmap; -+ dma->dma_destEvent = (E3_Addr) event; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ -+ event->ev_Count = 1; -+ -+ dma++; event++; -+ } -+ -+ if (env->nFrags == 0) -+ { -+ /* Generate a "get" DMA to accept the envelope and fire the rx handler */ -+ dma->dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = 0; -+ dma->dma_destEvent = (E3_Addr) &rxd->DataEvent; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ len = 0; -+ } -+ else -+ { -+ /* Generate the DMA chain to fetch the data */ -+ for (i = 0, buffer = rxd->Data.nmd_addr, len = 0; i < env->nFrags; i++, dma++, event++) -+ { -+ dma->dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = env->Frags[i].nmd_len; -+ dma->dma_source = env->Frags[i].nmd_addr; -+ dma->dma_dest = buffer; -+ dma->dma_destEvent = (E3_Addr) event; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ -+ event->ev_Count = 1; -+ -+ buffer += dma->dma_size; -+ len += dma->dma_size; -+ } -+ -+ /* Point the last dma at the done event */ -+ (--dma)->dma_destEvent = (E3_Addr) &rxd->DataEvent; -+ -+ if (rxd->Data.nmd_len < len) -+ { -+ /* The receive descriptor was too small for the message */ -+ /* complete the message anyway, but don't transfer any */ -+ /* data, we set the length to EP_MSG_TOO_BIG */ -+ for (i = 0, dma = rxd->Dmas; i < env->nFrags; i++, dma++) -+ dma->dma_size = 0; -+ -+ len = EP_MSG_TOO_BIG; -+ } -+ } -+ -+ /* Store the received message length in the rxdElan for CompleteEnvelope */ -+ rxd->Data.nmd_len = len; -+ -+ /* Initialise %g1 with the "rxd" so the trap handler can -+ * complete the envelope processing if we trap while sending the -+ * packet */ -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rxd)); -+ -+ /* Generate a packet to start the data transfer */ -+ c_open (EP_VP_DATA (env->NodeId)); -+ c_sendtrans2 (TR_THREADIDENTIFY, rxd->Dmas->dma_destCookieVProc, 0, 0); -+ c_sendmem (TR_SENDACK | TR_REMOTEDMA, 0, rxd->Dmas); -+ ack = c_close(); -+ -+ /* -+ * If we trapped for an output timeout, then the trap handler will have -+ * completed processing this envelope and cleared the spinlock, so we just -+ * need to update the queue descriptor. -+ */ -+ if (ack == EP3_PAckStolen) -+ goto consume_envelope; -+ -+ if (ack != E3_PAckOk) -+ { -+ /* our packet got nacked, so trap into the kernel so that -+ * it can complete processing of this envelope. -+ */ -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_TRAP_PACKET_NACKED)); /* HALT POINT */ -+ goto consume_envelope; -+ } -+ -+ /* remove the RXD from the pending list */ -+ EP3_SPINENTER (&rcvrElan->PendingLock, &rcvrMain->PendingLock); -+ if ((rcvrElan->PendingDescs = rxd->Next) == 0) -+ rcvrMain->PendingDescsTailp = 0; -+ EP3_SPINEXIT (&rcvrElan->PendingLock, &rcvrMain->PendingLock); -+ -+ /* Copy the envelope information - as 5 64 byte chunks. -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ if (EP_HAS_PAYLOAD(env->Attr)) -+ { -+ register void *src asm ("g5") = (void *) env; -+ register void *dst asm ("g6") = (void *) &((EP_RXD_MAIN *) rxd->RxdMain)->Envelope; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "add %%g7,64,%%g7 ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ -+ "ldblock64 [%0 + 0],%%l0 ! load 64-byte block into locals/ins\n" /* copy envelope */ -+ "stblock64 %%l0,[%1 + 0] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 64],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 64] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%0 + 128],%%l0 ! load 64-byte block into locals/ins\n" /* copy payload */ -+ "stblock64 %%l0,[%1 + 128] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 192],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 192] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (src), "r" (dst) -+ : /* clobbered */ "g5", "g6", "g7" ); -+ } -+ else -+ { -+ register void *src asm ("g5") = (void *) env; -+ register void *dst asm ("g6") = (void *) &((EP_RXD_MAIN *) rxd->RxdMain)->Envelope; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "add %%g7,64,%%g7 ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ -+ "ldblock64 [%0 + 0],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 0] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 64],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 64] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (src), "r" (dst) -+ : /* clobbered */ "g5", "g6", "g7" ); -+ } -+ -+ /* Store the message length to indicate that I've finished */ -+ ((EP_RXD_MAIN *) rxd->RxdMain)->Len = rxd->Data.nmd_len; /* PCI write */ -+ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ -+ consume_envelope: -+ /* Sample the queue full bit *BEFORE* moving the fptr. -+ * Then only clear it if it was full before, otherwise, -+ * as soon as the fptr is moved on the queue could fill -+ * up, and so clearing it could mark a full queue as -+ * empty. -+ * -+ * While the full bit is set, the queue is in a 'steady -+ * state', so it is safe to set the q_state -+ * -+ */ -+ if (((tmp = q->q_state) & E3_QUEUE_FULL) == 0) -+ q->q_fptr = nfptr; /* update queue */ -+ else -+ { -+ q->q_fptr = nfptr; /* update queue */ -+ q->q_state = tmp &~E3_QUEUE_FULL; /* and clear full flag */ -+ } -+ -+ count++; /* bump message count */ -+ if (nfptr == q->q_top) /* queue wrap */ -+ nfptr = q->q_base; -+ else -+ nfptr += q->q_size; -+ -+ c_break_busywait(); /* be nice HALT POINT */ -+ -+ } while (nfptr != q->q_bptr); /* loop until Fptr == Bptr */ -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcomms_elan4.c linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan4.c ---- clean/drivers/net/qsnet/ep/epcomms_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan4.c 2005-08-09 05:57:14.000000000 -0400 -@@ -0,0 +1,393 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan4.c,v 1.12.2.1 2005/08/09 09:57:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+static void -+ep4comms_flush_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ commsRail->r_flush_count = 0; -+ kcondvar_wakeupall (&commsRail->r_flush_sleep, &commsRail->r_flush_lock); -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+} -+ -+void -+ep4comms_flush_start (EP4_COMMS_RAIL *commsRail) -+{ -+ kmutex_lock (&commsRail->r_flush_mutex); -+} -+ -+void -+ep4comms_flush_wait (EP4_COMMS_RAIL *commsRail) -+{ -+ unsigned long flags; -+ -+ ep4_wait_event_cmd (commsRail->r_flush_mcq, -+ commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event), -+ E4_EVENT_INIT_VALUE (-32 * commsRail->r_flush_count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), -+ commsRail->r_flush_ecq->ecq_addr, -+ INTERRUPT_CMD | (commsRail->r_flush_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ while (commsRail->r_flush_count != 0) -+ if (kcondvar_timedwait (&commsRail->r_flush_sleep, &commsRail->r_flush_lock, &flags, (lbolt + (HZ*10))) == -1) -+ elan4_hardware_lock_check(((EP4_RAIL *)(commsRail->r_generic.Rail))->r_ctxt.ctxt_dev, "flush_wait"); -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+ -+ kmutex_unlock (&commsRail->r_flush_mutex); -+} -+ -+void -+ep4comms_flush_setevent (EP4_COMMS_RAIL *commsRail, ELAN4_CQ *cq) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ -+ elan4_set_event_cmd (cq, commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event)); -+ -+ commsRail->r_flush_count++; -+ -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+} -+ -+void -+ep4comms_flush_callback (void *arg, statemap_t *map) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->r_generic.Subsys; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ unsigned int rnum = rail->r_generic.Number; -+ struct list_head *el; -+ -+ /* -+ * We stall the retry thread from CB_FLUSH_FILTERING until -+ * we've finished CB_FLUSH_FLUSHING to ensure that sten -+ * packets can not be being retried while we flush them -+ * through. -+ */ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ ep4comms_flush_start (commsRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ break; -+ } -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_flush_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_flush_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep4comms_flush_wait (commsRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep_kthread_resume (&rail->r_retry_thread); -+ break; -+ } -+} -+ -+void -+ep4comms_failover_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_failover_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_failover_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep4comms_disconnect_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_disconnect_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_disconnect_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep4comms_neterr_callback (EP4_RAIL *rail, void *arg, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ /* First - stall the retry thread, so that it will no longer restart -+ * any sten packets from the retry lists */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ ep4comms_flush_start ((EP4_COMMS_RAIL *) commsRail); -+ -+ /* Second - flush through all command queues for xmtrs and rcvrs */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_neterr_flush (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum], nodeId, cookies); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_neterr_flush (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum], nodeId, cookies); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ /* Third - wait for flush to complete */ -+ ep4comms_flush_wait ((EP4_COMMS_RAIL *) commsRail); -+ -+ /* Fourth - flush through all command queues */ -+ ep4_flush_ecqs (rail); -+ -+ /* Fifth - search all the retry lists for the network error cookies */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_neterr_check (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum], nodeId, cookies); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_neterr_check (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum], nodeId, cookies); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+ -+EP_COMMS_RAIL * -+ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *)r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_COMMS_RAIL *commsRail; -+ E4_InputQueue qdesc; -+ int i; -+ -+ KMEM_ZALLOC (commsRail, EP4_COMMS_RAIL *,sizeof (EP4_COMMS_RAIL), 1); -+ -+ if (commsRail == NULL) -+ return NULL; -+ -+ commsRail->r_generic.Ops.DelRail = ep4comms_del_rail; -+ commsRail->r_generic.Ops.DisplayRail = ep4comms_display_rail; -+ commsRail->r_generic.Ops.Rcvr.AddRail = ep4rcvr_add_rail; -+ commsRail->r_generic.Ops.Rcvr.DelRail = ep4rcvr_del_rail; -+ commsRail->r_generic.Ops.Rcvr.Check = ep4rcvr_check; -+ commsRail->r_generic.Ops.Rcvr.QueueRxd = ep4rcvr_queue_rxd; -+ commsRail->r_generic.Ops.Rcvr.RpcPut = ep4rcvr_rpc_put; -+ commsRail->r_generic.Ops.Rcvr.RpcGet = ep4rcvr_rpc_get; -+ commsRail->r_generic.Ops.Rcvr.RpcComplete = ep4rcvr_rpc_complete; -+ -+ commsRail->r_generic.Ops.Rcvr.StealRxd = ep4rcvr_steal_rxd; -+ -+ commsRail->r_generic.Ops.Rcvr.DisplayRcvr = ep4rcvr_display_rcvr; -+ commsRail->r_generic.Ops.Rcvr.DisplayRxd = ep4rcvr_display_rxd; -+ -+ commsRail->r_generic.Ops.Rcvr.FillOutRailStats = ep4rcvr_fillout_rail_stats; -+ -+ commsRail->r_generic.Ops.Xmtr.AddRail = ep4xmtr_add_rail; -+ commsRail->r_generic.Ops.Xmtr.DelRail = ep4xmtr_del_rail; -+ commsRail->r_generic.Ops.Xmtr.Check = ep4xmtr_check; -+ commsRail->r_generic.Ops.Xmtr.BindTxd = ep4xmtr_bind_txd; -+ commsRail->r_generic.Ops.Xmtr.UnbindTxd = ep4xmtr_unbind_txd; -+ commsRail->r_generic.Ops.Xmtr.PollTxd = ep4xmtr_poll_txd; -+ commsRail->r_generic.Ops.Xmtr.CheckTxdState = ep4xmtr_check_txd_state; -+ -+ commsRail->r_generic.Ops.Xmtr.DisplayXmtr = ep4xmtr_display_xmtr; -+ commsRail->r_generic.Ops.Xmtr.DisplayTxd = ep4xmtr_display_txd; -+ -+ commsRail->r_generic.Ops.Xmtr.FillOutRailStats = ep4xmtr_fillout_rail_stats; -+ -+ /* Allocate command queue space for flushing (1 dword for interrupt + 4 dwords for waitevent) */ -+ if ((commsRail->r_flush_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1)) == NULL) -+ { -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ if ((commsRail->r_flush_mcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4)) == NULL) -+ { -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ /* Allocate and initialise the elan memory part */ -+ if ((commsRail->r_elan = ep_alloc_elan (r, EP4_COMMS_RAIL_ELAN_SIZE, 0, &commsRail->r_elan_addr)) == (sdramaddr_t) 0) -+ { -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ ep4_register_intcookie (rail, &commsRail->r_flush_intcookie, commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event), -+ ep4comms_flush_interrupt, commsRail); -+ -+ elan4_sdram_writeq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ -+ /* Allocate and initialise all the queue desriptors as "full" with no event */ -+ if ((commsRail->r_descs = ep_alloc_memory_elan (r, EP_EPCOMMS_QUEUE_BASE, roundup (EP_MSG_NSVC * EP_QUEUE_DESC_SIZE, SDRAM_PAGE_SIZE), EP_PERM_ALL, 0)) == (sdramaddr_t) 0) -+ { -+ ep_free_elan (r, commsRail->r_elan_addr, EP4_COMMS_RAIL_ELAN_SIZE); -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl (qdesc.q_bptr,qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ for (i = 0; i < EP_MSG_NSVC; i++) -+ elan4_sdram_copyq_to_sdram (rail->r_ctxt.ctxt_dev, &qdesc, commsRail->r_descs + (i * EP_QUEUE_DESC_SIZE), -+ sizeof (E4_InputQueue)); -+ -+ kmutex_init (&commsRail->r_flush_mutex); -+ spin_lock_init (&commsRail->r_flush_lock); -+ kcondvar_init (&commsRail->r_flush_sleep); -+ -+ ep_register_callback (r, EP_CB_FLUSH_FILTERING, ep4comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FLUSH_FLUSHING, ep4comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FAILOVER, ep4comms_failover_callback, commsRail); -+ ep_register_callback (r, EP_CB_DISCONNECTING, ep4comms_disconnect_callback, commsRail); -+ -+ commsRail->r_neterr_ops.op_func = ep4comms_neterr_callback; -+ commsRail->r_neterr_ops.op_arg = commsRail; -+ -+ ep4_add_neterr_ops (rail, &commsRail->r_neterr_ops); -+ -+ return (EP_COMMS_RAIL *) commsRail; -+} -+ -+void -+ep4comms_del_rail (EP_COMMS_RAIL *r) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) r; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ -+ ep_remove_callback (&rail->r_generic, EP_CB_FLUSH_FILTERING, ep4comms_flush_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_FLUSH_FLUSHING, ep4comms_flush_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_FAILOVER, ep4comms_failover_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_DISCONNECTING, ep4comms_disconnect_callback, commsRail); -+ -+ kcondvar_destroy (&commsRail->r_flush_sleep); -+ spin_lock_destroy (&commsRail->r_flush_lock); -+ kmutex_destroy (&commsRail->r_flush_mutex); -+ -+ ep_free_memory_elan (&rail->r_generic, EP_EPCOMMS_QUEUE_BASE); -+ ep_free_elan (&rail->r_generic, commsRail->r_elan_addr, EP4_COMMS_RAIL_ELAN_SIZE); -+ -+ ep4_deregister_intcookie (rail, &commsRail->r_flush_intcookie); -+ -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+} -+ -+void -+ep4comms_display_rail (EP_COMMS_RAIL *r) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) r; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ ep4_display_rail (rail); -+ -+ ep_debugf (DBG_DEBUG, " flush count=%d mcq=%p ecq=%p event %llx.%llx.%llx\n", -+ commsRail->r_flush_count, commsRail->r_flush_mcq, commsRail->r_flush_ecq, -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_CountAndType)), -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_WritePtr)), -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_WriteValue))); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcomms_elan4.h linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan4.h ---- clean/drivers/net/qsnet/ep/epcomms_elan4.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan4.h 2005-03-22 11:47:36.000000000 -0500 -@@ -0,0 +1,471 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EPCOMMS_ELAN4_H -+#define __EPCOMMS_ELAN4_H -+ -+#ident "@(#)$Id: epcomms_elan4.h,v 1.15 2005/03/22 16:47:36 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4.h,v $ */ -+ -+ -+#include -+ -+/* -+ * Elan4 spinlocks are a pair of 64 bit words, one in elan sdram and one in main memory -+ * the sdram word holds the thread sequence number in the bottom 32 bits and the main -+ * lock in the top 32 bits. The main memory word holds the sequence number only in -+ * it's bottom 32 bits */ -+ -+typedef volatile E4_uint64 EP4_SPINLOCK_MAIN; -+typedef volatile E4_uint64 EP4_SPINLOCK_ELAN; -+ -+#define EP4_SPINLOCK_SEQ 0 -+#define EP4_SPINLOCK_MLOCK 4 -+ -+#if defined(__elan4__) -+ -+#define EP4_SPINENTER(CPORT,SLE,SLM) \ -+do { \ -+ register long tmp; \ -+\ -+ asm volatile ("ld4 [%1], %0\n" \ -+ "inc %0\n" \ -+ "st4 %0, [%1]\n" \ -+ "ld4 [%1 + 4], %0\n" \ -+ "srl8,byte %0, 4, %0\n" \ -+ : /* outputs */ "=r" (tmp) \ -+ : /* inputs */ "r" (SLE), "r" (SLM)); \ -+\ -+ if (tmp) \ -+ ep4_spinblock (CPORT,SLE, SLM); \ -+} while (0) -+ -+extern void ep4_spinblock(E4_uint64 *cport, EP4_SPINLOCK_ELAN *sle, EP4_SPINLOCK_MAIN *slm); -+ -+#define EP4_SPINEXIT(CPORT,SLE,SLM) \ -+do { \ -+ register long tmp; \ -+\ -+ asm volatile ("ld4 [%1], %0\n" \ -+ "st4 %0, [%2]\n" \ -+ : /* outputs */ "=r" (tmp) \ -+ : /* inputs */ "r" (SLE), "r" (SLM)); \ -+} while (0) -+ -+#else -+ -+#define EP4_SPINENTER(DEV,SLE,SLM) \ -+do { \ -+ uint32_t seq; \ -+\ -+ mb(); \ -+ elan4_sdram_writel (DEV, (SLE) + EP4_SPINLOCK_MLOCK, 1); \ -+ mb(); \ -+ while ((seq = elan4_sdram_readl (DEV, (SLE) + EP4_SPINLOCK_SEQ)) != *((uint32_t *) (SLM))) \ -+ { \ -+ while (*((uint32_t *) (SLM)) == (seq - 1)) \ -+ { \ -+ mb(); \ -+ DELAY(1); \ -+ } \ -+ } \ -+} while (0) -+ -+#define EP4_SPINEXIT(DEV,SLE,SLM) \ -+do { \ -+ wmb(); \ -+ elan4_sdram_writel (DEV, (SLE) + EP4_SPINLOCK_MLOCK, 0); \ -+} while (0) -+ -+#endif /* !defined(__elan4__) */ -+ -+#define EP4_TXD_STEN_RETRYCOUNT 16 -+#define EP4_RXD_STEN_RETRYCOUNT 1 -+#define EP4_DMA_RETRYCOUNT 16 -+ -+typedef struct ep4_intr_cmd -+{ -+ E4_uint64 c_write_cmd; -+ E4_uint64 c_write_value; -+ E4_uint64 c_intr_cmd; -+} EP4_INTR_CMD; -+ -+#define EP4_INTR_CMD_NDWORDS (sizeof (EP4_INTR_CMD) / 8) -+ -+typedef struct ep4_rxd_sten_cmd -+{ -+ E4_uint64 c_open; -+ -+ E4_uint64 c_trans; -+ E4_uint64 c_cookie; -+ E4_uint64 c_dma_typeSize; -+ E4_uint64 c_dma_cookie; -+ E4_uint64 c_dma_vproc; -+ E4_uint64 c_dma_srcAddr; -+ E4_uint64 c_dma_dstAddr; -+ E4_uint64 c_dma_srcEvent; -+ E4_uint64 c_dma_dstEvent; -+ -+ E4_uint64 c_ok_guard; -+ E4_uint64 c_ok_write_cmd; -+ E4_uint64 c_ok_write_value; -+ -+ E4_uint64 c_fail_guard; -+ E4_uint64 c_fail_setevent; -+ -+ E4_uint64 c_nop_cmd; -+} EP4_RXD_STEN_CMD; -+ -+#define EP4_RXD_STEN_CMD_NDWORDS (sizeof (EP4_RXD_STEN_CMD) / 8) -+ -+typedef struct ep4_rxd_dma_cmd -+{ -+ E4_uint64 c_dma_typeSize; -+ E4_uint64 c_dma_cookie; -+ E4_uint64 c_dma_vproc; -+ E4_uint64 c_dma_srcAddr; -+ E4_uint64 c_dma_dstAddr; -+ E4_uint64 c_dma_srcEvent; -+ E4_uint64 c_dma_dstEvent; -+ E4_uint64 c_nop_cmd; -+} EP4_RXD_DMA_CMD; -+ -+#define EP4_RXD_DMA_CMD_NDWORDS (sizeof (EP4_RXD_DMA_CMD) / 8) -+#define EP4_RXD_START_CMD_NDWORDS (sizeof (E4_ThreadRegs) / 8) -+ -+typedef struct ep4_rxd_rail_elan -+{ -+ EP4_RXD_STEN_CMD rxd_sten[EP_MAXFRAG+1]; -+ -+ EP4_INTR_CMD rxd_done_cmd; /* command stream issued by done event (aligned to 64 bytes) */ -+ E4_Addr rxd_next; /* linked list when on pending list (pad to 32 bytes)*/ -+ E4_Event32 rxd_failed; /* event set when sten packet fails */ -+ -+ EP4_INTR_CMD rxd_failed_cmd; /* command stream issued by fail event (aligned to 64 bytes) */ -+ E4_uint64 rxd_queued; /* rxd queuing thread has executed (pad to 32 bytes)*/ -+ -+ E4_Event32 rxd_start; /* event to set to fire off and event chain (used as chain[0]) */ -+ E4_Event32 rxd_chain[EP_MAXFRAG]; /* chained events (aligned to 32 bytes) */ -+ E4_Event32 rxd_done; /* event to fire done command stream causing interrupt (used as chain[EP_MAXFRAG]) */ -+ -+ E4_Addr rxd_rxd; /* elan address of EP4_RXD_MAIN */ -+ E4_Addr rxd_main; /* elan address of EP4_RXD_RAIL_MAIN */ -+ E4_uint64 rxd_debug; /* thread debug value */ -+ -+ EP_NMD rxd_buffer; /* Network mapping descriptor for receive data */ -+} EP4_RXD_RAIL_ELAN; -+ -+#define EP4_RXD_RAIL_ELAN_SIZE roundup(sizeof (EP4_RXD_RAIL_ELAN), 64) -+ -+typedef struct ep4_rxd_rail_main -+{ -+ E4_uint64 rxd_sent[EP_MAXFRAG+1]; /* sten packet sent */ -+ E4_uint64 rxd_failed; /* sten packet failed */ -+ E4_uint64 rxd_done; /* operation complete */ -+ -+ E4_Addr rxd_scq; /* command port for scq */ -+} EP4_RXD_RAIL_MAIN; -+ -+#define EP4_RXD_RAIL_MAIN_SIZE roundup(sizeof (EP4_RXD_RAIL_MAIN), 8) -+ -+#if !defined(__elan4__) -+typedef struct ep4_rxd_rail -+{ -+ EP_RXD_RAIL rxd_generic; -+ -+ struct list_head rxd_retry_link; -+ unsigned long rxd_retry_time; -+ -+ EP4_INTCOOKIE rxd_intcookie; -+ -+ sdramaddr_t rxd_elan; -+ EP_ADDR rxd_elan_addr; -+ -+ EP4_RXD_RAIL_MAIN *rxd_main; -+ EP_ADDR rxd_main_addr; -+ -+ EP4_ECQ *rxd_ecq; /* cq with 128 bytes targetted by event */ -+ EP4_ECQ *rxd_scq; /* cq with 8 bytes targetted by main/thread store */ -+} EP4_RXD_RAIL; -+ -+#define EP4_NUM_RXD_PER_BLOCK 16 -+ -+typedef struct ep4_rxd_rail_block -+{ -+ struct list_head blk_link; -+ EP4_RXD_RAIL blk_rxds[EP4_NUM_RXD_PER_BLOCK]; -+} EP4_RXD_RAIL_BLOCK; -+ -+#endif /* !defined(__elan4__) */ -+ -+typedef struct ep4_rcvr_rail_elan -+{ -+ E4_uint64 rcvr_thread_stall[8]; /* place for thread to stall */ -+ E4_Event32 rcvr_qevent; /* Input queue event */ -+ E4_Event32 rcvr_thread_halt; /* place for thread to halt */ -+ -+ volatile E4_Addr rcvr_pending_tailp; /* list of pending rxd's (elan addr) */ -+ volatile E4_Addr rcvr_pending_head; /* -- this pair aligned to 16 bytes */ -+ -+ EP4_SPINLOCK_ELAN rcvr_thread_lock; /* spinlock for thread processing loop */ -+ -+ E4_uint64 rcvr_stall_intcookie; /* interrupt cookie to use when requseted to halt */ -+ -+ E4_uint64 rcvr_qbase; /* base of input queue */ -+ E4_uint64 rcvr_qlast; /* last item in input queue */ -+ -+ E4_uint64 rcvr_debug; /* thread debug value */ -+} EP4_RCVR_RAIL_ELAN; -+ -+typedef struct ep4_rcvr_rail_main -+{ -+ EP4_SPINLOCK_MAIN rcvr_thread_lock; /* spinlock for thread processing loop */ -+} EP4_RCVR_RAIL_MAIN; -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_rcvr_rail_stats -+{ -+ unsigned long some_stat; -+} EP4_RCVR_RAIL_STATS; -+ -+typedef struct ep4_rcvr_rail -+{ -+ EP_RCVR_RAIL rcvr_generic; /* generic portion */ -+ -+ sdramaddr_t rcvr_elan; -+ EP_ADDR rcvr_elan_addr; -+ -+ EP4_RCVR_RAIL_MAIN *rcvr_main; -+ EP_ADDR rcvr_main_addr; -+ -+ sdramaddr_t rcvr_slots; /* input queue slots */ -+ EP_ADDR rcvr_slots_addr; /* and elan address */ -+ -+ EP_ADDR rcvr_stack; /* stack for thread */ -+ -+ EP4_ECQ *rcvr_ecq; /* command queue space for thread STEN packets */ -+ EP4_ECQ *rcvr_resched; /* command queue space to reschedule the thread */ -+ -+ struct list_head rcvr_freelist; /* freelist of per-rail receive descriptors */ -+ unsigned int rcvr_freecount; /* and number on free list */ -+ unsigned int rcvr_totalcount; /* total number created */ -+ spinlock_t rcvr_freelock; /* and lock for free list */ -+ struct list_head rcvr_blocklist; /* list of receive descriptor blocks */ -+ -+ unsigned int rcvr_freewaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t rcvr_freesleep; /* and sleep here */ -+ -+ EP4_INTCOOKIE rcvr_stall_intcookie; /* interrupt cookie for thread halt */ -+ unsigned char rcvr_thread_halted; /* thread has been halted */ -+ unsigned char rcvr_cleanup_waiting; /* waiting for cleanup */ -+ kcondvar_t rcvr_cleanup_sleep; /* and sleep here */ -+ -+ EP4_RETRY_OPS rcvr_retryops; -+ -+ struct list_head rcvr_retrylist; /* list of txd's to retry envelopes for */ -+ struct list_head rcvr_polllist; /* list of txd's to poll for completion */ -+ spinlock_t rcvr_retrylock; -+ -+ EP4_RCVR_RAIL_STATS rcvr_stats; /* elan4 specific rcvr_rail stats */ -+ -+} EP4_RCVR_RAIL; -+ -+#endif /* !defined(__elan4__) */ -+ -+typedef struct ep4_txd_rail_elan -+{ -+ EP4_INTR_CMD txd_env_cmd; /* command stream for envelope event (64 byte aligned) */ -+ E4_uint64 txd_pad0; /* pad to 32 bytes */ -+ E4_Event32 txd_env; /* event set when STEN packet fails */ -+ -+ EP4_INTR_CMD txd_done_cmd; /* command stream for done event (64 byte aligned) */ -+ E4_uint64 txd_pad1; /* pad to 32 bytes */ -+ E4_Event32 txd_done; /* event set when transmit complete */ -+ -+ E4_Event32 txd_data; /* event set when xmit completes (=> phase becomes passive) */ -+} EP4_TXD_RAIL_ELAN; -+ -+#define EP4_TXD_RAIL_ELAN_SIZE roundup(sizeof(EP4_TXD_RAIL_ELAN), 64) -+ -+typedef struct ep4_txd_rail_main -+{ -+ E4_uint64 txd_env; -+ E4_uint64 txd_data; -+ E4_uint64 txd_done; -+} EP4_TXD_RAIL_MAIN; -+ -+#define EP4_TXD_RAIL_MAIN_SIZE roundup(sizeof(EP4_TXD_RAIL_MAIN), 8) -+ -+#if !defined (__elan4__) -+typedef struct ep4_txd_rail -+{ -+ EP_TXD_RAIL txd_generic; -+ -+ struct list_head txd_retry_link; -+ unsigned long txd_retry_time; -+ -+ EP4_INTCOOKIE txd_intcookie; -+ -+ sdramaddr_t txd_elan; -+ EP_ADDR txd_elan_addr; -+ -+ EP4_TXD_RAIL_MAIN *txd_main; -+ EP_ADDR txd_main_addr; -+ -+ EP4_ECQ *txd_ecq; -+ -+ E4_uint64 txd_cookie; -+} EP4_TXD_RAIL; -+ -+#define EP4_NUM_TXD_PER_BLOCK 21 -+ -+typedef struct ep4_txd_rail_block -+{ -+ struct list_head blk_link; -+ EP4_TXD_RAIL blk_txds[EP4_NUM_TXD_PER_BLOCK]; -+} EP4_TXD_RAIL_BLOCK; -+ -+typedef struct ep4_xmtr_rail_main -+{ -+ E4_int64 xmtr_flowcnt; -+} EP4_XMTR_RAIL_MAIN; -+ -+typedef struct ep4_xmtr_rail_stats -+{ -+ unsigned long some_stat; -+} EP4_XMTR_RAIL_STATS; -+ -+#define EP4_TXD_LIST_POLL 0 -+#define EP4_TXD_LIST_STALLED 1 -+#define EP4_TXD_LIST_RETRY 2 -+#define EP4_TXD_NUM_LISTS 3 -+typedef struct ep4_xmtr_rail -+{ -+ EP_XMTR_RAIL xmtr_generic; -+ -+ EP4_XMTR_RAIL_MAIN *xmtr_main; -+ EP_ADDR xmtr_main_addr; -+ -+ struct list_head xmtr_freelist; -+ unsigned int xmtr_freecount; -+ unsigned int xmtr_totalcount; -+ spinlock_t xmtr_freelock; -+ struct list_head xmtr_blocklist; -+ unsigned int xmtr_freewaiting; -+ kcondvar_t xmtr_freesleep; -+ -+ EP4_INTCOOKIE xmtr_intcookie; /* interrupt cookie for "polled" descriptors */ -+ -+ ELAN4_CQ *xmtr_cq; -+ E4_int64 xmtr_flowcnt; -+ -+ EP4_RETRY_OPS xmtr_retryops; -+ -+ struct list_head xmtr_retrylist[EP4_TXD_NUM_LISTS]; /* list of txd's to retry envelopes for */ -+ struct list_head xmtr_polllist; /* list of txd's to poll for completion */ -+ spinlock_t xmtr_retrylock; -+ -+ EP4_XMTR_RAIL_STATS stats; /* elan4 specific xmtr rail stats */ -+} EP4_XMTR_RAIL; -+ -+#define EP4_XMTR_CQSIZE CQ_Size64K /* size of command queue for xmtr */ -+#define EP4_XMTR_FLOWCNT (CQ_Size(EP4_XMTR_CQSIZE) / 512) /* # of STEN packets which can fit in */ -+ -+typedef struct ep4_comms_rail_elan -+{ -+ E4_Event32 r_flush_event; -+} EP4_COMMS_RAIL_ELAN; -+ -+#define EP4_COMMS_RAIL_ELAN_SIZE roundup(sizeof (EP4_COMMS_RAIL_ELAN), 32) -+ -+typedef struct ep4_comms_rail -+{ -+ EP_COMMS_RAIL r_generic; /* generic comms rail */ -+ sdramaddr_t r_descs; /* input queue descriptors */ -+ -+ sdramaddr_t r_elan; /* elan portion */ -+ EP_ADDR r_elan_addr; -+ -+ kmutex_t r_flush_mutex; /* sequentialise flush usage */ -+ EP4_INTCOOKIE r_flush_intcookie; /* interrupt cookie to generate */ -+ -+ kcondvar_t r_flush_sleep; /* place to sleep waiting */ -+ spinlock_t r_flush_lock; /* and spinlock to use */ -+ -+ unsigned int r_flush_count; /* # setevents issued */ -+ EP4_ECQ *r_flush_ecq; /* command queue for interrupt */ -+ EP4_ECQ *r_flush_mcq; /* command queeu to issue waitevent */ -+ -+ EP4_NETERR_OPS r_neterr_ops; /* network error fixup ops */ -+} EP4_COMMS_RAIL; -+ -+/* epcommsTx_elan4.c */ -+extern void ep4xmtr_flush_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_failover_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_disconnect_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+ -+extern void ep4xmtr_neterr_flush (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+extern void ep4xmtr_neterr_check (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* epcommsRx_elan4.c */ -+extern void ep4rcvr_flush_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_failover_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_disconnect_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+ -+extern void ep4rcvr_neterr_flush (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+extern void ep4rcvr_neterr_check (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* epcomms_elan4.c */ -+extern void ep4comms_flush_start (EP4_COMMS_RAIL *commsRail); -+extern void ep4comms_flush_wait (EP4_COMMS_RAIL *commsRail); -+extern void ep4comms_flush_setevent (EP4_COMMS_RAIL *commsRail, ELAN4_CQ *cq); -+ -+extern EP_COMMS_RAIL *ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r); -+extern void ep4comms_del_rail (EP_COMMS_RAIL *r); -+extern void ep4comms_display_rail (EP_COMMS_RAIL *r); -+ -+/* epcommsTx_elan4.c */ -+extern int ep4xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+extern void ep4xmtr_unbind_txd (EP_TXD *txd, unsigned int phase); -+extern int ep4xmtr_poll_txd (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+extern long ep4xmtr_check (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+extern void ep4xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern void ep4xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern int ep4xmtr_check_txd_state(EP_TXD *txd); -+ -+extern void ep4xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+extern void ep4xmtr_fillout_rail_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+/* epcommsRx_elan4.c */ -+extern int ep4rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep4rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep4rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+extern EP_RXD *ep4rcvr_steal_rxd (EP_RCVR_RAIL *rcvrRail); -+ -+extern long ep4rcvr_check (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+extern void ep4rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+extern void ep4rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+extern void ep4rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+extern void ep4rcvr_fillout_rail_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#endif /* !defined(__elan4__) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __EPCOMMS_ELAN4_H */ -diff -urN clean/drivers/net/qsnet/ep/epcomms_elan4_thread.c linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan4_thread.c ---- clean/drivers/net/qsnet/ep/epcomms_elan4_thread.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcomms_elan4_thread.c 2005-03-22 09:41:55.000000000 -0500 -@@ -0,0 +1,347 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan4_thread.c,v 1.13 2005/03/22 14:41:55 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4_thread.c,v $*/ -+ -+//#include -+ -+typedef char int8_t; -+typedef unsigned char uint8_t; -+typedef short int16_t; -+typedef unsigned short uint16_t; -+typedef int int32_t; -+typedef unsigned int uint32_t; -+typedef long int64_t; -+typedef unsigned long uint64_t; -+ -+#include -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+/* assembler in epcomms_asm_elan4_thread.S */ -+extern void c_waitevent_interrupt (E4_uint64 *cport, E4_Event32 *event, E4_uint64 count, E4_uint64 intcookie); -+extern EP4_RXD_RAIL_ELAN *c_stall_thread (EP4_RCVR_RAIL_ELAN *rcvrRail); -+ -+#define R32_to_R47 "%r32", "%r33", "%r34", "%r35", "%r36", "%r37", "%r38", "%r39", \ -+ "%r40", "%r41", "%r42", "%r43", "%r44", "%r45", "%r46", "%r47" -+#define R48_to_R63 "%r48", "%r49", "%r50", "%r51", "%r52", "%r53", "%r54", "%r55", \ -+ "%r56", "%r57", "%r58", "%r59", "%r60", "%r61", "%r62", "%r63" -+ -+/* proto types for code in asm_elan4_thread.S */ -+extern void c_waitevent (E4_uint64 *commandport, E4_Addr event, E4_uint64 count); -+extern void c_reschedule(E4_uint64 *commandport); -+ -+static inline unsigned long -+c_load_u16(unsigned short *ptr) -+{ -+ unsigned long value; -+ -+ asm volatile ("ld2 [%1], %%r2\n" -+ "srl8,byte %%r2, %1, %0\n" -+ "sll8 %0, 48, %0\n" -+ "srl8 %0, 48, %0\n" -+ : /* outputs */ "=r" (value) -+ : /* inputs */ "r" (ptr) -+ : /* clobbered */ "%r2"); -+ return value; -+} -+ -+static inline unsigned long -+c_load_u32(unsigned int *ptr) -+{ -+ unsigned long value; -+ -+ asm volatile ("ld4 [%1], %%r2\n" -+ "srl8,byte %%r2, %1, %0\n" -+ "sll8 %0, 32, %0\n" -+ "srl8 %0, 32, %0\n" -+ : /* outputs */ "=r" (value) -+ : /* inputs */ "r" (ptr) -+ : /* clobbered */ "%r2"); -+ return value; -+} -+ -+static inline void -+c_store_u32(unsigned int *ptr, unsigned long value) -+{ -+ asm volatile ("sll8,byte %0, %1, %%r2\n" -+ "st4 %%r2, [%1]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (value), "r" (ptr) -+ : /* clobbered */ "%r2"); -+} -+ -+/* Reschedule the current Elan thread to the back of the run queue -+ * if there is another one ready to run */ -+static inline void -+c_yield (E4_uint64 *commandport) -+{ -+ unsigned long rval; -+ -+ asm volatile ("breaktest %0" : /* outputs */ "=r" (rval) : /* inputs */); -+ -+ if (rval & ICC_SIGNED_BIT) -+ c_reschedule(commandport); -+} -+ -+/* Reschedule the current thread if we're in danger of exceeding the -+ * thread instruction count */ -+static inline void -+c_insn_check(E4_uint64 *commandport) -+{ -+ unsigned long rval; -+ -+ asm volatile ("breaktest %0" : /* outputs */ "=r" (rval) : /* inputs */); -+ -+ if (rval & ICC_ZERO_BIT) -+ c_reschedule(commandport); -+} -+ -+void -+ep4_spinblock (E4_uint64 *cport, EP4_SPINLOCK_ELAN *sle, EP4_SPINLOCK_MAIN *slm) -+{ -+ do { -+ unsigned long val = *sle & 0xfffffffff; -+ -+ *slm = val; /* Release my lock */ -+ -+ while (*sle >> 32) /* Wait until the main */ -+ c_yield(cport); /* releases the lock */ -+ -+ c_store_u32 ((unsigned int *) sle, val + 1); /* and try and relock */ -+ } while (*sle >> 32); -+} -+ -+#define RESCHED_AFTER_PKTS ((CQ_Size(CQ_Size64K) / 128) - 1) -+ -+void -+ep4comms_rcvr (EP4_RAIL_ELAN *rail, EP4_RCVR_RAIL_ELAN *rcvrElan, EP4_RCVR_RAIL_MAIN *rcvrMain, -+ E4_InputQueue *inputq, E4_uint64 *cport, E4_uint64 *resched) -+{ -+ long count = 1; -+ long fptr = inputq->q_fptr; -+ -+ for (;;) -+ { -+ c_waitevent (cport, inputq->q_event, -count << 5); -+ -+ count = 0; -+ -+ while (fptr != inputq->q_bptr) -+ { -+ EP_ENVELOPE *env = (EP_ENVELOPE *) fptr; -+ unsigned long nodeid = c_load_u32 (&env->NodeId); -+ unsigned long opencmd = OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(nodeid)); -+ unsigned long vproc = EP_VP_DATA(rail->r_nodeid); -+ EP_ATTRIBUTE attr = c_load_u32 (&env->Attr); -+ unsigned long txdRail = c_load_u32 (&env->TxdRail); -+ unsigned long nFrags = c_load_u32 (&env->nFrags); -+ unsigned long srcevent = (EP_IS_RPC(attr) ? txdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_data) : -+ txdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_done)); -+ E4_uint64 cookie; -+ EP4_RXD_RAIL_ELAN *rxdElan; -+ EP4_RXD_RAIL_MAIN *rxdMain; -+ EP_RXD_MAIN *rxd; -+ EP4_RXD_STEN_CMD *sten; -+ E4_Event32 *event; -+ unsigned long first; -+ unsigned long buffer; -+ unsigned long len; -+ unsigned long i; -+ -+ EP4_SPINENTER(resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ if ((rxdElan = (EP4_RXD_RAIL_ELAN *) rcvrElan->rcvr_pending_head) == 0) -+ { -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ rxdElan = c_stall_thread (rcvrElan); -+ -+ EP4_SPINENTER(resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ } -+ -+ if (c_load_u32 (&env->Version) != EP_ENVELOPE_VERSION) /* envelope has been cancelled */ -+ { -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ goto consume_envelope; -+ } -+ -+ rxd = (EP_RXD_MAIN *) rxdElan->rxd_rxd; -+ rxdMain = (EP4_RXD_RAIL_MAIN *) rxdElan->rxd_main; -+ first = (EP_MAXFRAG+1) - (( EP_IS_MULTICAST(attr) ? 1 : 0) + (nFrags == 0 ? 1 : nFrags)); -+ sten = &rxdElan->rxd_sten[first]; -+ event = &rxdElan->rxd_chain[first]; -+ cookie = rail->r_cookies[nodeid]; -+ -+ if (EP_IS_MULTICAST(attr)) /* need to fetch broadcast bitmap */ -+ { -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t), DMA_DataTypeWord, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcAddr = c_load_u32 (&env->TxdMain.nmd_addr) + offsetof(EP_TXD_MAIN, Bitmap); -+ sten->c_dma_dstAddr = (E4_Addr) &rxd->Bitmap; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS); -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ -+ sten++; event++; -+ } -+ -+ if (nFrags == 0) -+ { -+ /* Generate an empty "get" DMA to accept the envelope and fire the rx handler */ -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(0, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS); -+ -+ len = 0; -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ } -+ else -+ { -+ /* Generate the DMA chain to fetch the data */ -+ for (i = 0, buffer = c_load_u32 (&rxdElan->rxd_buffer.nmd_addr), len = 0; i < nFrags; i++) -+ { -+ unsigned long fragLen = c_load_u32 (&env->Frags[i].nmd_len); -+ -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(fragLen, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcAddr = c_load_u32 (&env->Frags[i].nmd_addr); -+ sten->c_dma_dstAddr = buffer; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS); -+ -+ buffer += fragLen; -+ len += fragLen; -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ -+ sten++; event++; -+ } -+ -+ (--event)->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS); -+ -+ if (c_load_u32 (&rxdElan->rxd_buffer.nmd_len) < len) -+ { -+ /* The receive descriptor was too small for the message */ -+ /* complete the message anyway, but don't transfer any */ -+ /* data, we set the length to EP_MSG_TOO_BIG */ -+ for (i = first, sten = &rxdElan->rxd_sten[first]; i <= EP_MAXFRAG; i++, sten++) -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(0, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ -+ len = EP_MSG_TOO_BIG; -+ } -+ } -+ -+ /* Stuff the first STEN packet into the command queue, there's always enough space, -+ * since we will insert a waitevent at least once for the queue size */ -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0 + 64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "st64 %%r48, [%1]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (&rxdElan->rxd_sten[first]), "r" (cport) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ /* remove the RXD from the pending list */ -+ if ((rcvrElan->rcvr_pending_head = rxdElan->rxd_next) == 0) -+ rcvrElan->rcvr_pending_tailp = (E4_Addr)&rcvrElan->rcvr_pending_head; -+ -+ /* mark as not queued */ -+ rxdElan->rxd_queued = 0; -+ -+ /* copy down the envelope */ -+ if (EP_HAS_PAYLOAD(attr)) -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0+64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "ld64 [%0+128], %%r32\n" -+ "st64 %%r48, [%1+64]\n" -+ "ld64 [%0+192], %%r48\n" -+ "st64 %%r32, [%1 + 128]\n" -+ "st64 %%r48, [%1 + 192]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (env), "r" (&rxd->Envelope) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ else -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0+64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "st64 %%r48, [%1+64]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (env), "r" (&rxd->Envelope) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ /* Store the message length to indicate that I've finished */ -+ c_store_u32 (&rxd->Len, len); -+ -+ /* Finally update the network error cookie */ -+ rail->r_cookies[nodeid] = cookie; -+ -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ consume_envelope: -+ if (fptr != rcvrElan->rcvr_qlast) -+ fptr += EP_INPUTQ_SIZE; -+ else -+ fptr = rcvrElan->rcvr_qbase; -+ -+ if (! rcvrElan->rcvr_stall_intcookie) -+ inputq->q_fptr = fptr; -+ -+ if (++count >= RESCHED_AFTER_PKTS) -+ break; -+ -+ c_insn_check (cport); -+ } -+ -+ if (rcvrElan->rcvr_stall_intcookie) -+ { -+ c_waitevent_interrupt (cport, &rcvrElan->rcvr_thread_halt, -(1 << 5), rcvrElan->rcvr_stall_intcookie); -+ inputq->q_fptr = fptr; -+ -+ count++; /* one extra as we were given an extra set to wake us up */ -+ } -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcommsFwd.c linux-2.6.9/drivers/net/qsnet/ep/epcommsFwd.c ---- clean/drivers/net/qsnet/ep/epcommsFwd.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcommsFwd.c 2005-07-20 08:01:34.000000000 -0400 -@@ -0,0 +1,310 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsFwd.c,v 1.12.4.1 2005/07/20 12:01:34 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsFwd.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+unsigned int epcomms_forward_limit = 8; -+ -+static void -+GenerateTree (unsigned nodeId, unsigned lowId, unsigned highId, bitmap_t *bitmap, -+ unsigned *parentp, unsigned *childrenp, int *nchildrenp) -+{ -+ int i; -+ int count; -+ int branch; -+ int nSub; -+ int branchIndex; -+ int parent; -+ int nBranch; -+ int rem; -+ int self; -+ int branchRatio; -+ int node; -+ int x, y, z; -+ -+ -+#ifdef DEBUG_PRINTF -+ { -+#define OVERFLOW "...]" -+#define LINESZ 128 -+ char space[LINESZ+1]; -+ -+ if (ep_sprintf_bitmap (space, LINESZ-strlen(OVERFLOW), bitmap, 0, 0, (highId - lowId)+1) != -1) -+ strcat (space, OVERFLOW); -+ -+ EPRINTF3 (DBG_FORWARD, "GenerateTree; elan node low=%d node high=%d bitmap=%s\n", lowId, highId, space); -+#undef OVERFLOW -+#undef LINESZ -+ } -+#endif -+ -+ /* Count the number of nodes in the partition */ -+ /* and work out which one I am */ -+ for (count = 0, self = ELAN_INVALID_NODE, i = lowId; i <= highId; i++) -+ { -+ if (BT_TEST (bitmap, i-lowId)) -+ { -+ if (i == nodeId) -+ self = count; -+ count++; -+ } -+ } -+ -+ EPRINTF2 (DBG_FORWARD, "GenerateTree: count=%d self=%d\n", count, self); -+ -+ if (count == 0 || self == ELAN_INVALID_NODE) -+ { -+ *parentp = ELAN_INVALID_NODE; -+ *nchildrenp = 0; -+ return; -+ } -+ -+ /* search for position in tree */ -+ branchRatio = EP_TREE_ARITY; /* branching ratio */ -+ branch = 0; /* start with process 0 */ -+ nSub = count; /* and whole tree */ -+ branchIndex = -1; /* my branch # in parent */ -+ parent = -1; /* my parent's group index # */ -+ -+ while (branch != self) /* descend process tree */ -+ { /* until I find myself */ -+ parent = branch; -+ branch++; /* parent + 1 = first born */ -+ nSub--; /* set # descendents */ -+ -+ rem = nSub % branchRatio; -+ nSub = nSub / branchRatio + 1; -+ x = rem * nSub; -+ y = self - branch; -+ -+ if (y < x) /* my first 'rem' branches have */ -+ { /* 1 more descendent... */ -+ branchIndex = y / nSub; -+ branch += branchIndex * nSub; -+ } -+ else /* than the rest of my branches */ -+ { -+ nSub--; -+ z = (y - x) / nSub; -+ branchIndex = rem + z; -+ branch += x + z * nSub; -+ } -+ } -+ -+ branch++; /* my first born */ -+ nSub--; /* total # of my descendents */ -+ /* leaves + their parents may have # children < branchRatio */ -+ nBranch = (nSub < branchRatio) ? nSub : branchRatio; -+ -+ EPRINTF2 (DBG_FORWARD, "GenerateTree: parent=%d nBranch=%d\n", parent, nBranch); -+ -+ /* Now calculate the real elan id's of the parent and my children */ -+ if (parent == -1) -+ *parentp = ELAN_INVALID_NODE; -+ else -+ { -+ for (i = lowId, node = 0; i <= highId; i++) -+ { -+ if (BT_TEST(bitmap, i-lowId)) -+ if (node++ == parent) -+ break; -+ } -+ *parentp = i; -+ } -+ -+ for (i = lowId, branchIndex = 0, node = 0; branchIndex < nBranch && i <= highId; i++) -+ { -+ if (BT_TEST(bitmap, i-lowId)) -+ { -+ if (node == branch) -+ { -+ branch = branch + nSub / branchRatio + ((branchIndex < (nSub % branchRatio)) ? 1 : 0); -+ -+ childrenp[branchIndex++] = i; -+ } -+ node++; -+ } -+ } -+ -+ *nchildrenp = branchIndex; -+} -+ -+static void -+ForwardTxDone (EP_TXD *txd, void *arg, EP_STATUS status) -+{ -+ EP_FWD_DESC *desc = (EP_FWD_DESC *) arg; -+ EP_RXD *rxd = desc->Rxd; -+ EP_COMMS_SUBSYS *subsys = rxd->Rcvr->Subsys; -+ unsigned long flags; -+ -+ /* XXXX: if transmit fails, could step to next node in this subtree ? */ -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ -+ if (--desc->NumChildren > 0) -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ else -+ { -+ rxd->Rcvr->ForwardRxdCount--; -+ -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_FREE (desc, sizeof (EP_FWD_DESC)); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+long -+ep_forward_rxds (EP_COMMS_SUBSYS *subsys, long nextRunTime) -+{ -+ unsigned long flags; -+ int i, res; -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ while (! list_empty (&subsys->ForwardDescList)) -+ { -+ EP_RXD *rxd = (EP_RXD *) list_entry (subsys->ForwardDescList.next, EP_RXD, Link); -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_FWD_DESC *desc; -+ -+ EPRINTF2 (DBG_FORWARD, "ep: forwarding rxd %p to range %x\n", rxd, env->Range); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Rcvr->ForwardRxdCount++; -+ -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_ALLOC (desc, EP_FWD_DESC *, sizeof (EP_FWD_DESC), 1); -+ -+ if (desc == NULL) -+ { -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ rxd->Rcvr->ForwardRxdCount--; -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ /* compute the spanning tree for this message */ -+ unsigned int destLo = EP_RANGE_LOW (env->Range); -+ unsigned int destHi = EP_RANGE_HIGH (env->Range); -+ unsigned int parent; -+ -+ GenerateTree (subsys->Subsys.Sys->Position.pos_nodeid, destLo, destHi, rxdMain->Bitmap, &parent, desc->Children, &desc->NumChildren); -+ -+ if (desc->NumChildren == 0 || (epcomms_forward_limit && (rxd->Rcvr->ForwardRxdCount >= epcomms_forward_limit))) -+ { -+ EPRINTF5 (DBG_FORWARD, "ep; don't forward rxd %p to /%d (%d children/ %d forwarding (%d))\n", -+ rxd, rxd->Rcvr->Service, desc->NumChildren, rxd->Rcvr->ForwardRxdCount, epcomms_forward_limit); -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ rxd->Rcvr->ForwardRxdCount--; -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_FREE (desc, sizeof (EP_FWD_DESC)); -+ -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ ep_nmd_subset (&desc->Data, &rxd->Data, 0, ep_rxd_len (rxd)); -+ desc->Rxd = rxd; -+ -+ /* NOTE - cannot access 'desc' after last call to multicast, since it could complete -+ * and free the desc before we access it again. Hence the reverse loop. */ -+ for (i = desc->NumChildren-1; i >= 0; i--) -+ { -+ ASSERT (desc->Children[i] < subsys->Subsys.Sys->Position.pos_nodes); -+ -+ EPRINTF3 (DBG_FORWARD, "ep: forwarding rxd %p to node %d/%d\n", rxd, desc->Children[i], rxd->Rcvr->Service); -+ -+ if ((res = ep_multicast_forward (subsys->ForwardXmtr, desc->Children[i], rxd->Rcvr->Service, 0, -+ ForwardTxDone, desc, env, EP_HAS_PAYLOAD(env->Attr) ? &rxdMain->Payload : NULL, -+ rxdMain->Bitmap, &desc->Data, 1)) != EP_SUCCESS) -+ { -+ ep_debugf (DBG_FORWARD, "ep: ep_multicast_forward failed\n"); -+ ForwardTxDone (NULL, desc, res); -+ } -+ } -+ -+ } -+ } -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ } -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ return (nextRunTime); -+} -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+void -+ep_csum_rxds (EP_COMMS_SUBSYS *subsys) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&subsys->CheckSumDescLock, flags); -+ while (! list_empty (&subsys->CheckSumDescList)) -+ { -+ EP_RXD *rxd = (EP_RXD *) list_entry (subsys->CheckSumDescList.next, EP_RXD, CheckSumLink); -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ list_del_init (&rxd->CheckSumLink); -+ spin_unlock_irqrestore (&subsys->CheckSumDescLock, flags); -+ -+ if (env->CheckSum) { -+ EP_NMD nmd; -+ uint32_t csum; -+ -+ ep_nmd_subset ( &nmd, &rxd->Data, 0, ep_rxd_len (rxd)); -+ -+ csum = ep_calc_check_sum(subsys->Subsys.Sys, env, &nmd, 1); -+ if ( env->CheckSum != csum ) { -+ int f; -+ -+ -+ printk("Check Sum Error: env(0x%x,0x%x) data(0x%x,0x%x)\n", ((csum >> 16) & 0x7FFF), ((env->CheckSum >> 16) & 0x7FFF), -+ (csum & 0xFFFF), (env->CheckSum & 0xFFFF)); -+ printk("Check Sum Error: Sent : NodeId %u Range 0x%x Service %u Version 0x%x Attr 0x%x\n", env->NodeId, env->Range, rxd->Rcvr->Service, env->Version, env->Attr); -+ printk("Check Sum Error: Sent : Xid Generation 0x%x Handle 0x%x Unique 0x%llx\n", env->Xid.Generation, env->Xid.Handle, (long long)env->Xid.Unique); -+ printk("Check Sum Error: Sent : TxdRail 0x%x TxdMain nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", env->TxdRail, env->TxdMain.nmd_addr, env->TxdMain.nmd_len, env->TxdMain.nmd_attr ); -+ printk("Check Sum Error: Sent : nFrags %d \n", env->nFrags); -+ for(f=0;fnFrags;f++) -+ printk("Check Sum Error: Sent (%d): nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", f, -+ env->Frags[f].nmd_addr, env->Frags[f].nmd_len, env->Frags[f].nmd_attr); -+ printk("Check Sum Error: Recv : nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", -+ nmd.nmd_addr, nmd.nmd_len, nmd.nmd_attr); -+ -+ } -+ } -+ ep_rxd_received_now(rxd); -+ -+ spin_lock_irqsave (&subsys->CheckSumDescLock, flags); -+ } -+ spin_unlock_irqrestore (&subsys->CheckSumDescLock, flags); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcommsRx.c linux-2.6.9/drivers/net/qsnet/ep/epcommsRx.c ---- clean/drivers/net/qsnet/ep/epcommsRx.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcommsRx.c 2004-11-30 07:02:06.000000000 -0500 -@@ -0,0 +1,1205 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx.c,v 1.33 2004/11/30 12:02:06 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx.c,v $*/ -+ -+#include -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+unsigned int ep_rxd_lowat = 5; -+ -+static int -+AllocateRxdBlock (EP_RCVR *rcvr, EP_ATTRIBUTE attr, EP_RXD **rxdp) -+{ -+ EP_RXD_BLOCK *blk; -+ EP_RXD *rxd; -+ EP_RXD_MAIN *pRxdMain; -+ int i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP_RXD_BLOCK *, sizeof (EP_RXD_BLOCK), ! (attr & EP_NO_SLEEP)); -+ -+ if (blk == NULL) -+ return (ENOMEM); -+ -+ if ((pRxdMain = ep_shared_alloc_main (rcvr->Subsys->Subsys.Sys, EP_RXD_MAIN_SIZE * EP_NUM_RXD_PER_BLOCK, attr, &blk->NmdMain)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP_RXD_BLOCK)); -+ return (ENOMEM); -+ } -+ -+ for (rxd = &blk->Rxd[0], i = 0; i < EP_NUM_RXD_PER_BLOCK; i++, rxd++) -+ { -+ rxd->Rcvr = rcvr; -+ rxd->RxdMain = pRxdMain; -+ -+ ep_nmd_subset (&rxd->NmdMain, &blk->NmdMain, (i * EP_RXD_MAIN_SIZE), EP_RXD_MAIN_SIZE); -+ -+ /* move onto next descriptor */ -+ pRxdMain = (EP_RXD_MAIN *) ((unsigned long) pRxdMain + EP_RXD_MAIN_SIZE); -+ } -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &rcvr->DescBlockList); -+ -+ rcvr->TotalDescCount += EP_NUM_RXD_PER_BLOCK; -+ -+ for (i = rxdp ? 1 : 0; i < EP_NUM_RXD_PER_BLOCK; i++) -+ { -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&blk->Rxd[i].CheckSumLink); -+#endif -+ -+ list_add (&blk->Rxd[i].Link, &rcvr->FreeDescList); -+ -+ rcvr->FreeDescCount++; -+ -+ if (rcvr->FreeDescWanted) -+ { -+ rcvr->FreeDescWanted--; -+ kcondvar_wakeupone (&rcvr->FreeDescSleep, &rcvr->FreeDescLock); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (rxdp) -+ { -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&blk->Rxd[0].CheckSumLink); -+#endif -+ -+ *rxdp = &blk->Rxd[0]; -+ } -+ return (ESUCCESS); -+} -+ -+static void -+FreeRxdBlock (EP_RCVR *rcvr, EP_RXD_BLOCK *blk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ rcvr->TotalDescCount -= EP_NUM_RXD_PER_BLOCK; -+ rcvr->FreeDescCount -= EP_NUM_RXD_PER_BLOCK; -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ ep_shared_free_main (rcvr->Subsys->Subsys.Sys, &blk->NmdMain); -+ KMEM_FREE (blk, sizeof (EP_RXD_BLOCK)); -+} -+ -+static EP_RXD * -+GetRxd (EP_RCVR *rcvr, EP_ATTRIBUTE attr) -+{ -+ EP_RXD *rxd; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ while (list_empty (&rcvr->FreeDescList)) -+ { -+ if (! (attr & EP_NO_ALLOC)) -+ { -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (AllocateRxdBlock (rcvr, attr, &rxd) == ESUCCESS) -+ return (rxd); -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ } -+ -+ if (attr & EP_NO_SLEEP) -+ { -+ IncrStat (rcvr->Subsys, NoFreeRxds); -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ return (NULL); -+ } -+ -+ rcvr->FreeDescWanted++; -+ kcondvar_wait (&rcvr->FreeDescSleep, &rcvr->FreeDescLock, &flags); -+ } -+ -+ rxd = list_entry (rcvr->FreeDescList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (--rcvr->FreeDescCount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ -+ return (rxd); -+} -+ -+static void -+FreeRxd (EP_RCVR *rcvr, EP_RXD *rxd) -+{ -+ unsigned long flags; -+ -+ ASSERT (EP_XID_INVALID(rxd->MsgXid)); -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ASSERT(list_empty(&rxd->CheckSumLink)); -+#endif -+ -+ list_add (&rxd->Link, &rcvr->FreeDescList); -+ -+ rcvr->FreeDescCount++; -+ -+ if (rcvr->FreeDescWanted) /* someone waiting for a receive */ -+ { /* descriptor, so wake them up */ -+ rcvr->FreeDescWanted--; -+ kcondvar_wakeupone (&rcvr->FreeDescSleep, &rcvr->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+} -+ -+int -+ep_queue_receive (EP_RCVR *rcvr, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr) -+{ -+ EP_RCVR_RAIL *rcvrRail; -+ EP_RXD *rxd; -+ int rnum; -+ unsigned long flags; -+ -+ if ((rxd = GetRxd (rcvr, attr)) == NULL) -+ return (ENOMEM); -+ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ rxd->Data = *nmd; -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ list_add_tail (&rxd->Link, &rcvr->ActiveDescList); -+ -+ if (EP_IS_PREFRAIL_SET(attr)) -+ rnum = EP_ATTR2PREFRAIL(attr); -+ else -+ rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(nmd)); -+ -+ if (rnum < 0 || !(EP_NMD_RAILMASK(nmd) & EP_RAIL2RAILMASK(rnum) & rcvr->RailMask)) -+ rcvrRail = NULL; -+ else -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ EPRINTF7 (DBG_RCVR,"ep_queue_receive: rxd=%p svc %d nmd=%08x,%d,%x rnum=%d rcvrRail=%p\n", -+ rxd, rcvr->Service, nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, rnum, rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if (rcvrRail == NULL || !EP_RCVR_OP (rcvrRail, QueueRxd) (rxd, rcvrRail)) -+ { -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_requeue_receive (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ int rnum = ep_pickRail(EP_NMD_RAILMASK(&rxd->Data)); -+ EP_RCVR_RAIL *rcvrRail; -+ unsigned long flags; -+ -+ ASSERT (rxd->RxdRail == NULL); -+ -+ EPRINTF5 (DBG_RCVR,"ep_requeue_receive: rxd=%p svc %d nmd=%08x,%d,%x\n", -+ rxd, rcvr->Service, nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ rxd->Data = *nmd; -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ list_add_tail (&rxd->Link, &rcvr->ActiveDescList); -+ -+ /* -+ * Rail selection: if they've asked for a particular rail, then use it, otherwise if -+ * the rail it was last received on is mapped for the nmd and is available -+ * then use that one, otherwise pick one that is mapped by the nmd. -+ */ -+ if (EP_IS_PREFRAIL_SET(attr)) -+ rnum = EP_ATTR2PREFRAIL(attr); -+ -+ if (rnum < 0 || ! (EP_RAIL2RAILMASK (rnum) & EP_NMD_RAILMASK(nmd) & ep_rcvr_availrails (rcvr))) -+ rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(nmd)); -+ -+ if (rnum < 0) -+ rcvrRail = NULL; -+ else -+ { -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ if (! (EP_NMD_RAILMASK(&rxd->Data) & EP_RAIL2RAILMASK(rnum)) && ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rnum)) < 0) -+ rcvrRail = NULL; -+ } -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if (rcvrRail == NULL || !EP_RCVR_OP(rcvrRail, QueueRxd) (rxd, rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR, "ep_requeue_receive: rcvrRail=%p - setting unbound\n", rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ if (rcvr->CleanupWaiting) -+ kcondvar_wakeupall (&rcvr->CleanupSleep, &rcvr->Lock); -+ rcvr->CleanupWaiting = 0; -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ -+ep_complete_receive (EP_RXD *rxd) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ ASSERT (rxd->RxdRail == NULL && rxd->State == EP_RXD_COMPLETED); -+ -+ FreeRxd (rcvr, rxd); -+ -+ /* if we're waiting for cleanup, then wake them up */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ if (rcvr->CleanupWaiting) -+ kcondvar_wakeupall (&rcvr->CleanupSleep, &rcvr->Lock); -+ rcvr->CleanupWaiting = 0; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+int -+ep_rpc_put (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *local, EP_NMD *remote, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_rpc_put: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ /* rxd no longer on active list - just free it */ -+ /* off and return an error */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcPut) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * rail or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_rpc_put: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_PUT_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_SUCCESS; -+} -+ -+int -+ep_rpc_get (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *remote, EP_NMD *local, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_rpc_get: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcGet) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * node or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_rpc_get: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_GET_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_SUCCESS; -+} -+ -+int -+ep_complete_rpc (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_STATUSBLK *blk, EP_NMD *local, EP_NMD *remote, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_complete_rpc: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ if (blk == NULL) -+ bzero (&rxd->RxdMain->StatusBlk, sizeof (EP_STATUSBLK)); -+ else -+ bcopy (blk, &rxd->RxdMain->StatusBlk, sizeof (EP_STATUSBLK)); -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcComplete) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * node or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_complete_rpc: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_COMPLETE_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* functions for accessing fields of rxds */ -+void *ep_rxd_arg(EP_RXD *rxd) { return (rxd->Arg); } -+int ep_rxd_len(EP_RXD *rxd) { return (rxd->RxdMain->Len); } -+EP_STATUS ep_rxd_status(EP_RXD *rxd) { return (rxd->RxdMain->Len < 0 ? rxd->RxdMain->Len : EP_SUCCESS); } -+int ep_rxd_isrpc(EP_RXD *rxd) { return (EP_IS_RPC(rxd->RxdMain->Envelope.Attr) != 0); } -+EP_ENVELOPE *ep_rxd_envelope(EP_RXD *rxd) { return (&rxd->RxdMain->Envelope); } -+EP_PAYLOAD *ep_rxd_payload(EP_RXD *rxd) { return (EP_HAS_PAYLOAD(rxd->RxdMain->Envelope.Attr) ? &rxd->RxdMain->Payload : NULL); } -+int ep_rxd_node(EP_RXD *rxd) { return (rxd->RxdMain->Envelope.NodeId); } -+EP_STATUSBLK *ep_rxd_statusblk(EP_RXD *rxd) { return (&rxd->RxdMain->StatusBlk); } -+EP_RAILMASK ep_rxd_railmask(EP_RXD *rxd) { return (rxd->Data.nmd_attr); } -+ -+static void -+ProcessNmdMapResponse (EP_RCVR *rcvr, EP_RXD *rxd, EP_MANAGER_MSG *msg) -+{ -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[rxd->RxdMain->Envelope.NodeId]; -+ int i; -+ -+ ASSERT (msg->Body.MapNmd.nFrags == rxd->nFrags); -+ -+ for (i = 0; i < rxd->nFrags; i++) -+ rxd->Remote[i] = msg->Body.MapNmd.Nmd[i]; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* node is still connected on this rail */ -+ (ep_nmd2railmask (rxd->Local, rxd->nFrags) & ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* NMDs are now valid for this rail */ -+ { -+ switch (rxd->State) -+ { -+ case EP_RXD_PUT_STALLED: -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcPut) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ case EP_RXD_GET_STALLED: -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcGet) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ case EP_RXD_COMPLETE_STALLED: -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcComplete) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ default: -+ panic ("ProcessNmdMapResponse: XID match but rxd in invalid state\n"); -+ break; -+ } -+ -+ rxd->NextRunTime = 0; -+ } -+ else -+ ep_debugf (DBG_MANAGER, "%s: ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p - still cannot proceed\n", rail->Name, rcvr, rxd); -+} -+ -+static void -+ProcessFailoverResponse (EP_RCVR *rcvr, EP_RXD *rxd, EP_MANAGER_MSG *msg) -+{ -+ /* XXXX - TBD */ -+#ifdef NOTYET -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ EP_RCVR_RAIL *nRcvrRail; -+ EP_RXD_RAIL *nRxdRail; -+ -+ ASSERT (rxd->RxdMain->Envelope.Attr & EP_RPC); -+ -+ EPRINTF6 (DBG_RCVR, "ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p Xid=%016llx state %x.%x - txd on rail %d\n", rcvr, rxd, -+ rxd->MsgXid.Unique, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, msg->Body.FailoverTxd.Rail); -+ -+ if ((nRcvrRail = rcvr->Rails[msg->Body.FailoverTxd.Rail]) == NULL || -+ (nRcvrRail->Rcvr->RailMask & EP_RAIL2RAILMASK (rail->Number)) == NULL) -+ { -+ ep_debugf (DBG_MANAGER, "%s: ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p - still cannot proceed\n", rail->Name, rcvr,rxd); -+ return; -+ } -+ -+ -+ nRxdRail = EP_RCVR_OP (nrcvrRail, GetRxd) (rcvr, nRcvrRail); -+ -+ -+ /* If the RPC was in progress, then rollback and mark it as flagged, -+ * this will then get treated as though the NMDs were not mapped -+ * for the rail when the user initiated the operation. -+ */ -+ switch (rxdRail->RxdMain->DataEvent) -+ { -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_PUT: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_PUT: -+ ASSERT (rxdRail->RxdMain->DoneEvent == EP_EVENT_PRIVATE || -+ rxdRail->RxdMain->DoneEvent == EP_EVENT_PENDING); -+ -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_PUT; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_PENDING; -+ break; -+ -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_GET: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_GET: -+ ASSERT (rxdRail->RxdMain->DoneEvent == EP_EVENT_PRIVATE || -+ rxdRail->RxdMain->DoneEvent == EP_EVENT_PENDING); -+ -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_GET; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_PENDING; -+ break; -+ -+ case EP_EVENT_PRIVATE: -+ switch (rxdRail->RxdMain->DoneEvent) -+ { -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_COMPLETE: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_COMPLETE: -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_PRIVATE; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_COMPLETE; -+ break; -+ -+ case EP_EVENT_PENDING: -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: rxd in invalid state\n"); -+ } -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: rxd in invalid staten"); -+ } -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* Mark rxdRail as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP_EVENT_PRIVATE; -+ -+ sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ BindRxdToRail (rxd, nRxdRail); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+#endif -+} -+ -+void -+ep_rcvr_xid_msg_handler (void *arg, EP_MANAGER_MSG *msg) -+{ -+ EP_RCVR *rcvr = (EP_RCVR *) arg; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el,EP_RXD, Link); -+ -+ if (EP_XIDS_MATCH (msg->Hdr.Xid, rxd->MsgXid)) -+ { -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE: -+ ProcessNmdMapResponse (rcvr, rxd, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE: -+ ProcessFailoverResponse (rcvr, rxd, msg); -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: XID match but invalid message type\n"); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+ -+EP_RCVR * -+ep_alloc_rcvr (EP_SYS *sys, EP_SERVICE svc, unsigned int nenvs) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_RCVR *rcvr; -+ struct list_head *el; -+ extern int portals_envelopes; -+ -+ if (portals_envelopes && (svc == EP_MSG_SVC_PORTALS_SMALL || svc == EP_MSG_SVC_PORTALS_LARGE)) -+ { -+ printk ("ep: use %d envelopes rather than %d for portals %s message service\n", sys->Position.pos_nodes * 16, nenvs, -+ svc == EP_MSG_SVC_PORTALS_SMALL ? "small" : "large"); -+ -+ nenvs = portals_envelopes; -+ } -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME)) == NULL) -+ return (NULL); -+ -+ KMEM_ZALLOC (rcvr, EP_RCVR *, sizeof (EP_RCVR), 1); -+ -+ if (rcvr == NULL) -+ return (NULL); -+ -+ rcvr->Subsys = subsys; -+ rcvr->Service = svc; -+ rcvr->InputQueueEntries = nenvs; -+ rcvr->FreeDescCount = 0; -+ rcvr->TotalDescCount = 0; -+ rcvr->ForwardRxdCount = 0; -+ -+ spin_lock_init (&rcvr->Lock); -+ INIT_LIST_HEAD (&rcvr->ActiveDescList); -+ -+ kcondvar_init (&rcvr->CleanupSleep); -+ kcondvar_init (&rcvr->FreeDescSleep); -+ spin_lock_init (&rcvr->FreeDescLock); -+ INIT_LIST_HEAD (&rcvr->FreeDescList); -+ INIT_LIST_HEAD (&rcvr->DescBlockList); -+ -+ ep_xid_cache_init (sys, &rcvr->XidCache); -+ -+ rcvr->XidCache.MessageHandler = ep_rcvr_xid_msg_handler; -+ rcvr->XidCache.Arg = rcvr; -+ -+ kmutex_lock (&subsys->Lock); -+ /* See if this service is already in use */ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Service == svc) -+ { -+ KMEM_FREE (rcvr, sizeof (EP_RCVR)); -+ kmutex_unlock (&subsys->Lock); -+ return NULL; -+ } -+ } -+ -+ -+ list_add_tail (&rcvr->Link, &subsys->Receivers); -+ -+ ep_procfs_rcvr_add(rcvr); -+ -+ /* Now add all rails which are already started */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.AddRail) (rcvr, commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_mod_inc_usecount(); -+ -+ return (rcvr); -+} -+ -+void -+ep_free_rcvr (EP_RCVR *rcvr) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head list; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.DelRail) (rcvr, commsRail); -+ } -+ -+ ep_procfs_rcvr_del(rcvr); -+ -+ list_del (&rcvr->Link); -+ kmutex_unlock (&subsys->Lock); -+ -+ INIT_LIST_HEAD (&list); -+ -+ /* abort all rxds - should not be bound to a rail */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (! list_empty (&rcvr->ActiveDescList)) -+ { -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ -+ ASSERT (rxd->RxdRail == NULL); -+ ASSERT (rxd->RxdMain->Len == EP_RXD_PENDING); -+ -+ rxd->State = EP_RXD_COMPLETED; -+ rxd->RxdMain->Len = EP_SHUTDOWN; -+ -+ list_del (&rxd->Link); -+ list_add_tail (&rxd->Link, &list); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ EP_RXD *rxd = list_entry (list.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ if (rxd->Handler) -+ rxd->Handler (rxd); -+ } -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ continue; -+ } -+ -+ if (rcvr->FreeDescCount == rcvr->TotalDescCount) -+ break; -+ -+ rcvr->CleanupWaiting++; -+ kcondvar_wait (&rcvr->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* must all be in free list */ -+ ASSERT( rcvr->FreeDescCount == rcvr->TotalDescCount); -+ -+ while (! list_empty(& rcvr->DescBlockList) ) -+ FreeRxdBlock (rcvr, list_entry (rcvr->DescBlockList.next, EP_RXD_BLOCK, Link)); -+ -+ /* had better be all gone now */ -+ ASSERT((rcvr->FreeDescCount == 0) && (rcvr->TotalDescCount == 0)); -+ -+ ep_xid_cache_destroy (sys, &rcvr->XidCache); -+ -+ spin_lock_destroy (&rcvr->Lock); -+ KMEM_FREE (rcvr, sizeof (EP_RCVR)); -+ -+ ep_mod_dec_usecount(); -+} -+ -+EP_RXD * -+StealRxdFromOtherRail (EP_RCVR *rcvr) -+{ -+ EP_RXD *rxd; -+ int i; -+ -+ /* looking at the the rcvr railmask to find a rail to try to steal rxd from */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->RailMask & (1 << i) ) -+ if ((rxd = EP_RCVR_OP (rcvr->Rails[i], StealRxd) (rcvr->Rails[i])) != NULL) -+ return rxd; -+ -+ return NULL; -+} -+ -+long -+CheckUnboundRxd (EP_RCVR *rcvr, EP_RXD *rxd, long nextRunTime) -+{ -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_RCVR_RAIL *rcvrRail; -+ int rnum; -+ -+ if ((rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(&rxd->Data))) < 0) -+ rnum = ep_rcvr_prefrail (rcvr, ep_rcvr_availrails (rcvr)); -+ -+ if ( rnum < 0 ) { -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ -+ return (nextRunTime); -+ } -+ -+ ASSERT ( rnum >= 0 ); -+ -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ ASSERT ( rcvrRail != NULL); -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if ((!(EP_NMD_RAILMASK (&rxd->Data) & EP_RAIL2RAILMASK(rnum)) && /* not mapped already and */ -+ ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rnum)) == 0) || /* failed mapping, or */ -+ !EP_RCVR_OP (rcvrRail, QueueRxd) (rxd, rcvrRail)) /* failed to queue */ -+ { -+ ASSERT (rxd->RxdRail == NULL); -+ -+ EPRINTF4 (DBG_RCVR,"CheckUnboundRxd: rcvr=%p rxd=%p -> rnum=%d rcvrRail=%p (failed)\n", rcvr, rxd, rnum, rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return (nextRunTime); -+} -+ -+int -+CheckRxdNmdsMapped (EP_RCVR *rcvr, EP_RXD *rxd) -+{ -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_RAIL *rail = rxdRail->RcvrRail->CommsRail->Rail; -+ int i; -+ -+ /* Try and map the local NMDs before checking to see if we can proceed */ -+ if (! (ep_nmd2railmask (rxd->Local, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ EPRINTF3 (DBG_MAPNMD, "%s: rcvr=%p rxd=%p RPC Local NMDs not mapped\n", rail->Name, rcvr, rxd); -+ -+ for (i = 0; i < rxd->nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&rxd->Local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ if (ep_nmd_map_rails (sys, &rxd->Local[i], EP_RAIL2RAILMASK(rail->Number))) -+ rxd->NextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ /* Try and map remote NMDs if they are not valid for this rail */ -+ if (! (ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ EP_MANAGER_MSG_BODY msgBody; -+ -+ EPRINTF3 (DBG_MAPNMD, "%s: rcvr=%p rxd=%p RPC Remote NMDs not mapped\n", rail->Name, rcvr, rxd); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ msgBody.MapNmd.nFrags = rxd->nFrags; -+ msgBody.MapNmd.Railmask = EP_RAIL2RAILMASK (rail->Number); -+ for (i = 0; i < rxd->nFrags; i++) -+ msgBody.MapNmd.Nmd[i] = rxd->Remote[i]; -+ -+ if (ep_send_message (rail, env->NodeId, EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST, rxd->MsgXid, &msgBody) == 0) -+ rxd->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ rxd->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ -+ return 0; -+ } -+ -+ if ((ep_nmd2railmask (rxd->Local, rxd->nFrags) & ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number)) != 0) -+ { -+ rxd->NextRunTime = 0; -+ return 1; -+ } -+ -+ return 0; -+} -+ -+long -+ep_check_rcvr (EP_RCVR *rcvr, long nextRunTime) -+{ -+ struct list_head *el, *nel; -+ unsigned long flags; -+ int i; -+ -+ /* Check to see if we're low on rxds */ -+ if (rcvr->FreeDescCount < ep_rxd_lowat) -+ AllocateRxdBlock (rcvr, 0, NULL); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->RailMask & (1 << i) ) -+ nextRunTime = EP_RCVR_OP (rcvr->Rails[i], Check) (rcvr->Rails[i], nextRunTime); -+ -+ /* See if we have any rxd's which need to be handled */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ if (rxdRail == NULL) -+ nextRunTime = CheckUnboundRxd (rcvr, rxd, nextRunTime); -+ else -+ { -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || /* envelope not received yet */ -+ rail->Nodes[env->NodeId].State != EP_NODE_CONNECTED) /* will be failing over */ -+ continue; -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_PUT_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcPut) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ -+ case EP_RXD_GET_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcGet) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ -+ case EP_RXD_COMPLETE_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcComplete)(rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ } -+ -+ if (rxd->NextRunTime && (nextRunTime == 0 || AFTER (nextRunTime, rxd->NextRunTime))) -+ nextRunTime = rxd->NextRunTime; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (nextRunTime); -+} -+ -+void -+ep_display_rxd (DisplayInfo *di, EP_RXD *rxd) -+{ -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ (di->func)(di->arg, " RXD: %p State=%x RxdMain=%p(%x.%x.%x) Data=%x.%x.%x %s\n", rxd, -+ rxd->State, rxd->RxdMain, rxd->NmdMain.nmd_addr, rxd->NmdMain.nmd_len, -+ rxd->NmdMain.nmd_attr, rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, -+ rxd->RxdMain->Len == EP_RXD_PENDING ? "Pending" : "Active"); -+ (di->func)(di->arg, " NodeId=%d Range=%d.%d TxdRail=%x TxdMain=%x.%x.%x nFrags=%d XID=%08x.%08x.%016llx\n", -+ env->NodeId, EP_RANGE_LOW(env->Range), EP_RANGE_HIGH(env->Range), env->TxdRail, env->TxdMain.nmd_addr, -+ env->TxdMain.nmd_len, env->TxdMain.nmd_attr, env->nFrags, env->Xid.Generation, env->Xid.Handle, env->Xid.Unique);; -+ (di->func)(di->arg, " Frag[0] %08x.%08x.%08x\n", env->Frags[0].nmd_addr, env->Frags[0].nmd_len, env->Frags[0].nmd_attr); -+ (di->func)(di->arg, " Frag[1] %08x.%08x.%08x\n", env->Frags[1].nmd_addr, env->Frags[1].nmd_len, env->Frags[1].nmd_attr); -+ (di->func)(di->arg, " Frag[2] %08x.%08x.%08x\n", env->Frags[2].nmd_addr, env->Frags[2].nmd_len, env->Frags[2].nmd_attr); -+ (di->func)(di->arg, " Frag[3] %08x.%08x.%08x\n", env->Frags[3].nmd_addr, env->Frags[3].nmd_len, env->Frags[3].nmd_attr); -+ -+ if (rxdRail) EP_RCVR_OP (rxdRail->RcvrRail, DisplayRxd) (di, rxdRail); -+} -+ -+void -+ep_display_rcvr (DisplayInfo *di, EP_RCVR *rcvr, int full) -+{ -+ int freeCount = 0; -+ int activeCount = 0; -+ int pendingCount = 0; -+ int railCounts[EP_MAX_RAILS]; -+ struct list_head *el; -+ int i; -+ unsigned long flags; -+ -+ for (i = 0; i FreeDescLock, flags); -+ list_for_each (el, &rcvr->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ pendingCount++; -+ else -+ activeCount++; -+ -+ if (rxdRail) -+ railCounts[rxdRail->RcvrRail->CommsRail->Rail->Number]++; -+ } -+ -+ (di->func)(di->arg, "RCVR: rcvr=%p number=%d\n", rcvr, rcvr->Service); -+ (di->func)(di->arg, " RXDS Free=%d (%d) Pending=%d Active=%d Rails=%d.%d.%d.%d\n", -+ freeCount, rcvr->FreeDescCount, pendingCount, activeCount, railCounts[0], railCounts[1], -+ railCounts[2], railCounts[3]); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->Rails[i] != NULL) -+ EP_RCVR_OP (rcvr->Rails[i], DisplayRcvr) (di, rcvr->Rails[i]); -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ -+ if (rxd->RxdMain->Len != EP_RXD_PENDING || full) -+ ep_display_rxd (di, rxd); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep_rxd_received_now(EP_RXD *rxd) -+{ -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ INC_STAT(rcvr->stats,rx); -+ ADD_STAT(rcvr->stats,rx_len, rxd->RxdMain->Len); -+ -+ if (rxd->RxdMain->Len < 0 || !EP_IS_MULTICAST(env->Attr)) -+ { -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ EPRINTF5 (DBG_RCVR, "ep_rxd_received: forward rxd=%p Data=%08x.%08x.%08x len=%d\n", rxd, -+ rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, ep_rxd_len(rxd)); -+ -+ spin_lock_irqsave (&rcvr->Subsys->ForwardDescLock, flags); -+ list_add_tail (&rxd->Link, &rcvr->Subsys->ForwardDescList); -+ spin_unlock_irqrestore (&rcvr->Subsys->ForwardDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+} -+ -+#if defined(CONFIG_EP_NO_CHECK_SUM) -+void -+ep_rxd_received(EP_RXD *rxd) -+{ -+ ep_rxd_received_now(rxd); -+} -+ -+#else -+ -+void -+ep_rxd_received(EP_RXD *rxd) -+{ -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ if (env->CheckSum) -+ ep_rxd_queue_csum(rxd); -+ else -+ ep_rxd_received_now(rxd); -+} -+ -+void -+ep_rxd_queue_csum(EP_RXD *rxd) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ EPRINTF5 (DBG_RCVR, "ep_rxd_queue_csum: rxd=%p Data=%08x.%08x.%08x len=%d\n", rxd, -+ rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, ep_rxd_len(rxd)); -+ -+ spin_lock_irqsave (&rcvr->Subsys->CheckSumDescLock, flags); -+ list_add_tail (&rxd->CheckSumLink, &rcvr->Subsys->CheckSumDescList); -+ spin_unlock_irqrestore (&rcvr->Subsys->CheckSumDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+} -+#endif -+ -+void -+ep_rcvr_fillout_stats(EP_RCVR *rcvr, char *str) -+{ -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(rcvr->stats,rx), GET_STAT_PER_SEC(rcvr->stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(rcvr->stats,rx_len) / (1024*1024), GET_STAT_PER_SEC(rcvr->stats,rx_len) / (1024*1024)); -+} -+ -+void -+ep_rcvr_rail_fillout_stats(EP_RCVR_RAIL *rcvr_rail, char *str) -+{ -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(rcvr_rail->stats,rx), GET_STAT_PER_SEC(rcvr_rail->stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(rcvr_rail->stats,rx_len) / (1024*1024), GET_STAT_PER_SEC(rcvr_rail->stats,rx_len) / (1024*1024)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcommsRx_elan3.c linux-2.6.9/drivers/net/qsnet/ep/epcommsRx_elan3.c ---- clean/drivers/net/qsnet/ep/epcommsRx_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcommsRx_elan3.c 2005-03-10 10:25:52.000000000 -0500 -@@ -0,0 +1,1776 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx_elan3.c,v 1.24 2005/03/10 15:25:52 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#define RCVR_TO_RAIL(rcvrRail) ((EP3_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail->Rail) -+#define RCVR_TO_DEV(rcvrRail) (RCVR_TO_RAIL(rcvrRail)->Device) -+#define RCVR_TO_SUBSYS(rcvrRail) (((EP_RCVR_RAIL *) rcvrRail)->Rcvr->Subsys) -+ -+static void RxDataEvent (EP3_RAIL *rail, void *arg); -+static void RxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void RxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS RxDataCookieOps = -+{ -+ RxDataEvent, -+ RxDataRetry, -+ NULL, /* DmaCancelled */ -+ RxDataVerify, -+}; -+ -+static void RxDoneEvent (EP3_RAIL *rail, void *arg); -+static void RxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void RxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS RxDoneCookieOps = -+{ -+ RxDoneEvent, -+ RxDoneRetry, -+ NULL, /* DmaCancelled */ -+ RxDoneVerify, -+}; -+ -+static int -+AllocateRxdRailBlock (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RXD_RAIL_BLOCK *blk; -+ EP3_RXD_RAIL *rxdRail; -+ sdramaddr_t pRxdElan; -+ EP3_RXD_RAIL_MAIN *pRxdMain; -+ E3_Addr pRxdElanAddr; -+ E3_Addr pRxdMainAddr; -+ E3_BlockCopyEvent event; -+ int i, j; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP3_RXD_RAIL_BLOCK *, sizeof (EP3_RXD_RAIL_BLOCK), 1); -+ if (blk == NULL) -+ return 0; -+ -+ if ((pRxdElan = ep_alloc_elan (&rail->Generic, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK, 0, &pRxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((pRxdMain = ep_alloc_main (&rail->Generic, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK, 0, &pRxdMainAddr)) == (sdramaddr_t) 0) -+ { -+ ep_free_elan (&rail->Generic, pRxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ReserveDmaRetries (rail, EP3_NUM_RXD_PER_BLOCK, 0) != ESUCCESS) -+ { -+ ep_free_main (&rail->Generic, pRxdMainAddr, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, pRxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (rxdRail = &blk->Rxd[0], i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rxdRail->Generic.RcvrRail = (EP_RCVR_RAIL *) rcvrRail; -+ rxdRail->RxdElan = pRxdElan; -+ rxdRail->RxdElanAddr = pRxdElanAddr; -+ rxdRail->RxdMain = pRxdMain; -+ rxdRail->RxdMainAddr = pRxdMainAddr; -+ -+ elan3_sdram_writel (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, RxdMain), 0); -+ elan3_sdram_writel (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next), 0); -+ elan3_sdram_writeq (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr), (long) rxdRail); -+ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ { -+ RegisterCookie (&rail->CookieTable, &rxdRail->ChainCookie[j], pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[j]), &RxDataCookieOps, (void *) rxdRail); -+ -+ event.ev_Type = EV_TYPE_DMA | (pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[j+1])); -+ event.ev_Count = 0; -+ -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[j]), sizeof (E3_BlockCopyEvent)); -+ } -+ -+ RegisterCookie (&rail->CookieTable, &rxdRail->DataCookie, pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), &RxDataCookieOps, (void *) rxdRail); -+ RegisterCookie (&rail->CookieTable, &rxdRail->DoneCookie, pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), &RxDoneCookieOps, (void *) rxdRail); -+ -+ EP3_INIT_COPY_EVENT (event, rxdRail->DataCookie, pRxdMainAddr + offsetof (EP3_RXD_RAIL_MAIN, DataEvent), 1); -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, rxdRail->DoneCookie, pRxdMainAddr + offsetof (EP3_RXD_RAIL_MAIN, DoneEvent), 1); -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), sizeof (E3_BlockCopyEvent)); -+ -+ pRxdMain->DataEvent = EP3_EVENT_FREE; -+ pRxdMain->DoneEvent = EP3_EVENT_FREE; -+ -+ /* move onto next descriptor */ -+ pRxdElan += EP3_RXD_RAIL_ELAN_SIZE; -+ pRxdElanAddr += EP3_RXD_RAIL_ELAN_SIZE; -+ pRxdMain = (EP3_RXD_RAIL_MAIN *) ((unsigned long) pRxdMain + EP3_RXD_RAIL_MAIN_SIZE); -+ pRxdMainAddr += EP3_RXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &rcvrRail->DescBlockList); -+ rcvrRail->TotalDescCount += EP3_NUM_RXD_PER_BLOCK; -+ rcvrRail->FreeDescCount += EP3_NUM_RXD_PER_BLOCK; -+ -+ for (i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++) -+ list_add (&blk->Rxd[i].Generic.Link, &rcvrRail->FreeDescList); -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ return 1; -+} -+ -+static void -+FreeRxdRailBlock (EP3_RCVR_RAIL *rcvrRail, EP3_RXD_RAIL_BLOCK *blk) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ EP3_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i, j; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ rcvrRail->TotalDescCount -= EP3_NUM_RXD_PER_BLOCK; -+ -+ for (rxdRail = &blk->Rxd[0], i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ -+ rcvrRail->FreeDescCount--; -+ -+ list_del (&rxdRail->Generic.Link); -+ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ DeregisterCookie (&rail->CookieTable, &rxdRail->ChainCookie[j]); -+ -+ DeregisterCookie (&rail->CookieTable, &rxdRail->DataCookie); -+ DeregisterCookie (&rail->CookieTable, &rxdRail->DoneCookie); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ ReleaseDmaRetries (rail, EP3_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->Generic, blk->Rxd[0].RxdMainAddr, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, blk->Rxd[0].RxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+} -+ -+static EP3_RXD_RAIL * -+GetRxdRail (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ if (list_empty (&rcvrRail->FreeDescList)) -+ rxdRail = NULL; -+ else -+ { -+ rxdRail = list_entry (rcvrRail->FreeDescList.next, EP3_RXD_RAIL, Generic.Link); -+ -+ list_del (&rxdRail->Generic.Link); -+ -+ rcvrRail->FreeDescCount--; -+ } -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (rcvrRail->FreeDescCount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&RCVR_TO_SUBSYS(rcvrRail)->Thread, lbolt); -+ -+ return (rxdRail); -+} -+ -+static void -+FreeRxdRail (EP3_RCVR_RAIL *rcvrRail, EP3_RXD_RAIL *rxdRail) -+{ -+ unsigned long flags; -+ -+#if defined(DEBUG_ASSERT) -+ { -+ EP_RAIL *rail = (EP_RAIL *) RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ -+ EP_ASSERT (rail, rxdRail->Generic.RcvrRail == &rcvrRail->Generic); -+ -+ EP_ASSERT (rail, rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_FREE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_FREE; -+ } -+#endif -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_add (&rxdRail->Generic.Link, &rcvrRail->FreeDescList); -+ -+ rcvrRail->FreeDescCount++; -+ -+ if (rcvrRail->FreeDescWaiting) -+ { -+ rcvrRail->FreeDescWaiting--; -+ kcondvar_wakeupall (&rcvrRail->FreeDescSleep, &rcvrRail->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+} -+ -+static void -+BindRxdToRail (EP_RXD *rxd, EP3_RXD_RAIL *rxdRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rxdRail->Generic.RcvrRail); -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ -+ EPRINTF3 (DBG_RCVR, "%s: BindRxdToRail: rxd=%p rxdRail=%p\n", rail->Generic.Name, rxd, rxdRail); -+ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, RxdMain), rxd->NmdMain.nmd_addr); /* PCI write */ -+ -+ rxd->RxdRail = &rxdRail->Generic; -+ rxdRail->Generic.Rxd = rxd; -+} -+ -+static void -+UnbindRxdFromRail (EP_RXD *rxd, EP3_RXD_RAIL *rxdRail) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ ASSERT (rxd->RxdRail == &rxdRail->Generic && rxdRail->Generic.Rxd == rxd); -+ -+ EPRINTF3 (DBG_RCVR, "%s: UnbindRxdFromRail: rxd=%p rxdRail=%p\n", RCVR_TO_RAIL(rxdRail->Generic.RcvrRail)->Generic.Name, rxd, rxdRail); -+ -+ rxd->RxdRail = NULL; -+ rxdRail->Generic.Rxd = NULL; -+ -+ if (rcvrRail->CleanupWaiting) -+ kcondvar_wakeupall (&rcvrRail->CleanupSleep, &rxd->Rcvr->Lock); -+ rcvrRail->CleanupWaiting = 0; -+} -+ -+static void -+LockRcvrThread (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ EP3_SPINLOCK_MAIN *sl = &rcvrRail->RcvrMain->ThreadLock; -+ E3_uint32 RestartBits = 0; -+ int delay = 1; -+ E3_uint32 seq; -+ E3_uint32 reg; -+ -+ ASSERT (SPINLOCK_HELD (&rcvrRail->Generic.Rcvr->Lock)); -+ -+ mb(); -+ elan3_sdram_writel (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 1); -+ mb(); -+ seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ while (seq != sl->sl_seq) -+ { -+ while (sl->sl_seq == (seq - 1)) -+ { -+ mb(); -+ -+ if ((read_reg32 (dev, Exts.InterruptReg) & (INT_TProc | INT_TProcHalted)) != 0 && spin_trylock (&dev->IntrLock)) -+ { -+ reg=read_reg32 (dev, Exts.InterruptReg); -+ ELAN_REG_REC(reg); -+ -+ if ((reg & (INT_TProc | INT_TProcHalted)) != 0&& -+ elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)) != sl->sl_seq) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: LockRcvrThread - thread trapped\n", rail->Generic.Name); -+ -+ /* The thread processor has *really* trapped, and the spinlock is still held. -+ * thus is must have trapped due to a network error - we need to complete the -+ * actions required for this envelope, since we may be spin-locking the receiver -+ * to search the dma retry lists for a particular dma. So must ensure that -+ * if the thread had trapped then the dma has been queued onto the retry list -+ * *before* we inspect them. -+ */ -+ IncrStat (commsRail, LockRcvrTrapped); -+ -+ /* We're going to generate a spurious interrupt here - since we will -+ * handle the thread processor trap directly */ -+ ELAN_REG_REC(reg); -+ if (HandleTProcTrap (dev, &RestartBits)) -+ { -+ /* NOTE - this is not an assert, since the "store" to unlock the lock could -+ * be held up on the PCI interface, whilst the thread processor has -+ * gone on and switched to a new thread, which has then trapped, and -+ * our read of the InterruptReg can overtake the unlock write. -+ * -+ * ASSERT (dev->ThreadTrap->Registers[REG_GLOBALS + (1^WordEndianFlip)] == -+ * elan3_sdram_readl (dev, rcvr->RcvrElan + offsetof (EP_RCVR_ELAN, PendingRxDescsElan))); -+ */ -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); -+ -+ DeliverTProcTrap (dev, dev->ThreadTrap, INT_TProc); -+ } -+ } -+ spin_unlock (&dev->IntrLock); -+ } -+ -+ DELAY (delay); delay++; -+ } -+ seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ } -+} -+ -+static void -+UnlockRcvrThread (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ -+ mb(); -+ elan3_sdram_writel (rail->Device, sle + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 0); -+ mmiob(); -+} -+ -+void -+CompleteEnvelope (EP3_RAIL *rail, E3_Addr rxdElanAddr, E3_uint32 PAckVal) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rxdElan = ep_elan2sdram (&rail->Generic, rxdElanAddr); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) (unsigned long) elan3_sdram_readq (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr)); -+ EP_RXD_MAIN *rxdMain = rxdRail->Generic.Rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ sdramaddr_t queue = ((EP3_COMMS_RAIL *) commsRail)->QueueDescs + rcvr->Service * sizeof (EP3_InputQueue); -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ EP3_SPINLOCK_MAIN *sl = &rcvrRail->RcvrMain->ThreadLock; -+ int nodeId; -+ EP_NODE_RAIL *nodeRail; -+ E3_DMA_BE dma; -+ E3_Addr nfptr; -+ E3_Addr next; -+ -+ ASSERT (commsRail->Rail == &rail->Generic); -+ ASSERT (rxdElanAddr == elan3_sdram_readl (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs))); -+ -+ IncrStat (commsRail, CompleteEnvelope); -+ -+ /* We don't need to aquire the NodeLock here (however we might be holding it), -+ * since this can only get called while the node is connected, or disconnecting. -+ * If the node is disconnecting, then we can get called from FlushDisconnecting() -+ * while holding the NodeLock - after we cannot get called again until the node -+ * has reconnected from scratch. -+ */ -+ /* Copy the envelope information */ -+ nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr)); -+ -+ if (nfptr == elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top))) -+ nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)); -+ else -+ nfptr += elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)); -+ -+ /* Copy the envelope and payload (unconditionally) */ -+ elan3_sdram_copyl_from_sdram (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr), env, EP_ENVELOPE_SIZE + EP_PAYLOAD_SIZE); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION); -+ -+ /* Copy the received message length */ -+ rxdMain->Len = elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len)); -+ -+ /* Remove the RXD from the pending desc list */ -+ if ((next = elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next))) == 0) -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ elan3_sdram_writel (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), next); -+ -+ /* Copy the DMA descriptor to queue on the approriate retry list */ -+ elan3_sdram_copyq_from_sdram (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]), &dma, sizeof (E3_DMA)); /* PCI read block */ -+ -+ EP_ASSERT (&rail->Generic, dma.s.dma_direction == DMA_READ);; -+ -+#if defined(DEBUG_ASSERT) && defined(DEBUG_SDRAM_ASSERT) -+ /* NOTE: not an assertion, since the thread packet could have successfully -+ * transferred the "put" dma to the far side - which could then have -+ * completed - but the far side will see a network error which will -+ * cause the virtual circuit to be dropped by the far side and this -+ * DMA will be removed */ -+ if (rxdRail->RxdMain->DataEvent != EP3_EVENT_ACTIVE || -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) != 1) -+ { -+ printk ("CompleteEnvelope: suspicious dma : Node=%d DataBlock=%d Event=%d\n", -+ env->NodeId, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count))); -+ } -+#endif -+ -+ EPRINTF6 (DBG_RCVR, "%s: CompleteEnvelope: rxd=%p NodeId=%d Xid=%llx Cookies=%08x,%08x\n", commsRail->Rail->Name, -+ rxdRail, env->NodeId, (long long) env->Xid.Unique, dma.s.dma_srcCookieVProc, dma.s.dma_destCookieVProc); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the DMA descriptor will -+ * be read from the EP_RETRY_DMA rather than the original DMA - this can then get reused -+ * and an incorrect DMA descriptor sent */ -+ dma.s.dma_source = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]); -+ dma.s.dma_direction = (dma.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ nodeId = EP_VP_TO_NODE(dma.s.dma_srcVProc); -+ nodeRail = &rail->Generic.Nodes[nodeId]; -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (PAckVal != E3_PAckOk) -+ { -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ QueueDmaForRetry (rail, &dma, EP_RETRY_LOW_PRI_RETRY); -+ else -+ QueueDmaOnStalledList (rail, &dma); -+ } -+ -+ /* Finaly forcefully drop the spinlock for the thread */ -+ sl->sl_seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ -+ wmb(); -+} -+ -+void -+StallThreadForNoDescs (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rcvrElan = ep_elan2sdram (&rail->Generic, rcvrElanAddr); -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) (unsigned long) elan3_sdram_readq (dev, rcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr)); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ -+ EPRINTF3 (DBG_RCVR, "%s: StallThreadForNoDescs - rcvrRail=%p sp=%x\n", commsRail->Rail->Name, rcvrRail, sp); -+ -+ IncrStat (commsRail, StallThread); -+ -+ /* NOTE: spin lock not required as thread is trapped */ -+ -+ if (rcvrRail->RcvrMain->PendingDescsTailp != 0) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: StallThreadForNoDescs - pending descriptors, wakeup thread\n", commsRail->Rail->Name); -+ -+ /* -+ * A receive buffer was queued after the thread had decided to go to -+ * sleep, but before the event interrupt occured. Just restart the -+ * thread to consume the envelope. -+ */ -+ IssueRunThread (rail, sp); -+ } -+ else -+ { -+ EPRINTF1 (DBG_RCVR, "%s: StallThreadForNoDescs - set ThreadWaiting\n", commsRail->Rail->Name); -+ -+ IncrStat (commsRail, ThrdWaiting); -+ -+ /* Mark the rcvr as waiting for a rxd, and schedule a call of ep_check_rcvr -+ * to attempt to "steal" a descriptor from a different rail */ -+ rcvrRail->ThreadWaiting = sp; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+} -+ -+void -+StallThreadForHalted (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rcvrElan = ep_elan2sdram (&rail->Generic, rcvrElanAddr); -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) (unsigned long) elan3_sdram_readq (dev, rcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr)); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ unsigned long flags = 0; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ rcvrRail->ThreadHalted = sp; -+ -+ EPRINTF2 (DBG_EPTRAP, "%s: StallThreadForHalted: sp=%08x\n", rail->Generic.Name, sp); -+ -+ if (rcvrRail->CleanupWaiting) -+ kcondvar_wakeupone (&rcvrRail->CleanupSleep, &rcvr->Lock); -+ rcvrRail->CleanupWaiting = 0; -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+/* -+ * RxDataEvent: arg == EP3_RXD_RAIL -+ * Called on completion of receiving data. -+ */ -+static void -+RxDataEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ int delay = 1; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (rxdRail->DataCookie, rxdRail->RxdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), rxdRail->DataCookie, rxdRail->RxdMain->DataEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("RxDataEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ printk ("%s: RxDataEvent: rxd %p not complete [%x,%x,%x]\n", rail->Generic.Name, rxd, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Type))); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ /* -+ * Note, since the thread will have sent the "get" dma before copying the -+ * envelope, we must check that it has completed doing this, if not then -+ * it might be that the thread trapped due to a network error, so we must -+ * spinlock against the thread -+ */ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ { -+ LockRcvrThread (rcvrRail); -+ UnlockRcvrThread (rcvrRail); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION && rxd->RxdMain->Len != EP_RXD_PENDING); -+ } -+ -+ EPRINTF7 (DBG_RCVR, "%s: RxDataEvent: rxd=%p rxdRail=%p completed from elan node %d [XID=%llx] Length %d State %x\n", -+ rail->Generic.Name, rxd, rxdRail, env->NodeId, (long long) env->Xid.Unique, rxd->RxdMain->Len, rxd->State); -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_RECEIVE_ACTIVE || rxd->State == EP_RXD_PUT_ACTIVE || rxd->State == EP_RXD_GET_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxd->Data.nmd_attr = EP_RAIL2RAILMASK (rail->Generic.Number); -+ -+ if (rxd->RxdMain->Len >= 0 && EP_IS_RPC(env->Attr)) -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ else -+ { -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ ep_rxd_received (rxd); -+ -+} -+ -+/* -+ * RxDataRetry: arg == EP3_RXD_RAIL -+ * Called on retry of "get" dma of large transmit data -+ * and rpc_get/rpc_put and "put" of datavec of rpc completion. -+ */ -+static void -+RxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_COMMS_RAIL *commsRail = rxdRail->Generic.RcvrRail->CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+#if defined(DEBUG_ASSERT) -+ RxDataVerify (rail, arg, dma); -+#endif -+ -+ IncrStat (commsRail, RxDataRetry); -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDataRetry: rcvr %p rxd %p [XID=%llx]\n", rail->Generic.Name, rxd->Rcvr, rxd, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&rxdRail->Backoff, EP_BACKOFF_DATA)); -+} -+ -+static void -+RxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+#if defined(DEBUG_ASSERT) -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+ if (dma->s.dma_direction == DMA_WRITE) -+ { -+ EP_ASSERT (&rail->Generic, -+ (rxd->State == EP_RXD_RECEIVE_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE) || -+ (rxd->State == EP_RXD_PUT_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE) || -+ (rxd->State == EP_RXD_COMPLETE_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_ACTIVE)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (rxd->State == EP_RXD_COMPLETE_ACTIVE ? -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 1: /* PCI read */ -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 1)); /* PCI read */ -+ } -+ else -+ { -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_READ_REQUEUE); -+ -+#if defined(DEBUG_SDRAM_ASSERT) -+ /* NOTE: not an assertion, since the "get" DMA can still be running if -+ * it's packet got a network error - and then the "put" from the -+ * far side has completed - however the virtual circuit should -+ * then be dropped by the far side and this DMA will be removed */ -+ if (EP_VP_TO_NODE(dma->s.dma_srcVProc) != ep_rxd_node(rxd) || -+ (rxd->State != EP_RXD_RECEIVE_ACTIVE && rxd->State != EP_RXD_GET_ACTIVE) || -+ rxdRail->RxdMain->DataEvent != EP3_EVENT_ACTIVE || -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) != 1) -+ { -+ EPRINTF6 (DBG_RCVR, "%s: RxDataRetry: suspicious dma : VProc=%d NodeId=%d State=%d DataBlock=%x Event=%d\n", -+ rail->Generic.Name, EP_VP_TO_NODE(dma->s.dma_srcVProc), ep_rxd_node(rxd), rxd->State, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count))); -+ } -+#endif /* defined(DEBUG_SDRAM_ASSERT) */ -+ } -+#endif /* DEBUG_ASSERT */ -+} -+ -+/* -+ * RxDoneEvent: arg == EP_RXD -+ * Called on completion of large receive. -+ */ -+static void -+RxDoneEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ ELAN3_DEV *dev = rail->Device; -+ int delay = 1; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("RxDoneEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ printk ("RxDoneEvent: rxd %p not complete [%x,%x.%x]\n", rxd, rxdRail->RxdMain->DoneEvent, -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Type))); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDoneEvent: rxd %p completed from elan node %d [XID=%llx]\n", -+ commsRail->Rail->Name, rxd, rxd->RxdMain->Envelope.NodeId, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ IncrStat (commsRail, RxDoneEvent); -+ -+ EP_ASSERT (&rail->Generic, rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* mark rxd as private */ -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler (rxd); -+} -+ -+/* -+ * RxDoneRetry: arg == EP_RXD -+ * Called on retry of "put" of RPC completion status block -+ */ -+static void -+RxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_COMMS_RAIL *commsRail = rxdRail->Generic.RcvrRail->CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+#if defined(DEBUG_ASSERT) -+ RxDoneVerify (rail, arg, dma); -+#endif -+ -+ IncrStat (commsRail, RxDoneRetry); -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDoneRetry: rcvr %p rxd %p [XID=%llx]\n", commsRail->Rail->Name, rxd->Rcvr, rxd, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&rxdRail->Backoff, EP_BACKOFF_DONE)); -+} -+ -+static void -+RxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+#if defined(DEBUG_ASSERT) -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == ep_rxd_node(rxd)); -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_COMPLETE_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 1)); /* PCI read */ -+#endif /* defined(DEBUG_ASSERT) */ -+} -+ -+int -+ep3rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RXD_RAIL *rxdRail; -+ -+ ASSERT ( SPINLOCK_HELD(&rxd->Rcvr->Lock)); -+ -+ if ((rxdRail = GetRxdRail (rcvrRail)) == NULL) -+ return 0; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_addr), rxd->Data.nmd_addr); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len), rxd->Data.nmd_len); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_attr), rxd->Data.nmd_attr); /* PCI write */ -+ -+ /* Bind the rxdRail and rxd together */ -+ BindRxdToRail (rxd, rxdRail); -+ -+ /* Mark as active */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* Interlock with StallThreadForNoDescs */ -+ spin_lock (&dev->IntrLock); -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_queue_rxd: rcvr %p rxd %p rxdRail %p\n", rail->Generic.Name, rxd->Rcvr, rxd, rxdRail); -+ -+ EP3_SPINENTER (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock), &rcvrRail->RcvrMain->PendingLock); -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next), 0); /* PCI write */ -+ if (rcvrRail->RcvrMain->PendingDescsTailp == 0) -+ elan3_sdram_writel (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), rxdRail->RxdElanAddr); /* PCI write */ -+ else -+ elan3_sdram_writel (dev, rcvrRail->RcvrMain->PendingDescsTailp, rxdRail->RxdElanAddr); /* PCI write */ -+ rcvrRail->RcvrMain->PendingDescsTailp = rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next); -+ -+ EP3_SPINEXIT (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock), &rcvrRail->RcvrMain->PendingLock); -+ -+ /* If the thread has paused because it was woken up with no receive buffer */ -+ /* ready, then wake it up to process the one we've just added */ -+ if (rcvrRail->ThreadWaiting) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: DoReceive: ThreadWaiting - restart thread\n", rail->Generic.Name); -+ -+ IssueRunThread (rail, rcvrRail->ThreadWaiting); -+ -+ rcvrRail->ThreadWaiting = (E3_Addr) 0; -+ } -+ -+ spin_unlock (&dev->IntrLock); -+ -+ return 1; -+} -+ -+void -+ep3rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_PUT_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Generate the DMA chain to put the data in two loops to burst -+ * the data across the PCI bus */ -+ for (len = 0, i = (nFrags-1), local += (nFrags-1), remote += (nFrags-1); i >= 0; len += local->nmd_len, i--, local--, remote--) -+ { -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = local->nmd_len; -+ dmabe.s.dma_source = local->nmd_addr; -+ dmabe.s.dma_dest = remote->nmd_addr; -+ dmabe.s.dma_destEvent = (E3_Addr) 0; -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (env->NodeId); -+ if (i == (nFrags-1)) -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent); -+ else -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i]); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_put: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len, dmabe.s.dma_destCookieVProc, dmabe.s.dma_srcCookieVProc); -+ -+ if (i != 0) -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ } -+ -+ for (i = 0; i < nFrags; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the data event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_put: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep3rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_GET_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Generate the DMA chain to get the data in two loops to burst -+ * the data across the PCI bus */ -+ for (len = 0, i = (nFrags-1), remote += (nFrags-1), local += (nFrags-1); i >= 0; len += remote->nmd_len, i--, remote--, local--) -+ { -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = remote->nmd_len; -+ dmabe.s.dma_source = remote->nmd_addr; -+ dmabe.s.dma_dest = local->nmd_addr; -+ if (i == (nFrags-1)) -+ dmabe.s.dma_destEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent); -+ else -+ dmabe.s.dma_destEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i]); -+ dmabe.s.dma_destCookieVProc = LocalCookie (rail, env->NodeId); -+ dmabe.s.dma_srcEvent = (E3_Addr) 0; -+ dmabe.s.dma_srcCookieVProc = RemoteCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_get rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, remote->nmd_addr, local->nmd_addr, remote->nmd_len, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ -+ /* -+ * Always copy down the dma descriptor, since we issue it as a READ_REQUEUE -+ * dma, and the elan will fetch the descriptor to send out of the link from -+ * the rxdElan->Dmas[i] location, before issueing the DMA chain we modify -+ * the dma_source. -+ */ -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ } -+ -+ for (i = 0; i < nFrags; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the data event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the DMA descriptor will -+ * be read from the EP_RETRY_DMA rather than the orignal DMA - this can then get reused -+ * and an incorrect DMA descriptor sent */ -+ dmabe.s.dma_source = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_get: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCGet, len); -+} -+ -+void -+ep3rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_COMPLETE_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Initialise the status block dma */ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = sizeof (EP_STATUSBLK); -+ dmabe.s.dma_source = rxd->NmdMain.nmd_addr + offsetof (EP_RXD_MAIN, StatusBlk); -+ dmabe.s.dma_dest = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, StatusBlk); -+ dmabe.s.dma_destEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent); -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA(env->NodeId); -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF8 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: rxd %p [XID=%llx] statusblk source=%08x dest=%08x len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, dmabe.s.dma_source, dmabe.s.dma_dest, dmabe.s.dma_size, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ -+ for (len = 0, i = EP_MAXFRAG, remote += (nFrags-1), local += (nFrags-1); i > EP_MAXFRAG-nFrags; len += local->nmd_len, i--, local--, remote--) -+ { -+ /* copy down previous dma */ -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = local->nmd_len; -+ dmabe.s.dma_source = local->nmd_addr; -+ dmabe.s.dma_dest = remote->nmd_addr; -+ dmabe.s.dma_destEvent = (E3_Addr) 0; -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (env->NodeId); -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i-1]); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ } -+ -+ for (i = EP_MAXFRAG-nFrags; i < EP_MAXFRAG; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the done event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DoneEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, CompleteRPC, len); -+} -+ -+void -+ep3rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ sdramaddr_t qdescs = ((EP3_COMMS_RAIL *) commsRail)->QueueDescs; -+ EP3_RCVR_RAIL *rcvrRail; -+ EP3_InputQueue qdesc; -+ sdramaddr_t stack; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (rcvrRail, EP3_RCVR_RAIL *, sizeof (EP3_RCVR_RAIL), TRUE); -+ -+ kcondvar_init (&rcvrRail->CleanupSleep); -+ spin_lock_init (&rcvrRail->FreeDescLock); -+ INIT_LIST_HEAD (&rcvrRail->FreeDescList); -+ INIT_LIST_HEAD (&rcvrRail->DescBlockList); -+ -+ rcvrRail->Generic.CommsRail = commsRail; -+ rcvrRail->Generic.Rcvr = rcvr; -+ -+ rcvrRail->RcvrMain = ep_alloc_main (&rail->Generic, sizeof (EP3_RCVR_RAIL_MAIN), 0, &rcvrRail->RcvrMainAddr); -+ rcvrRail->RcvrElan = ep_alloc_elan (&rail->Generic, sizeof (EP3_RCVR_RAIL_ELAN), 0, &rcvrRail->RcvrElanAddr); -+ rcvrRail->InputQueueBase = ep_alloc_elan (&rail->Generic, EP_INPUTQ_SIZE * rcvr->InputQueueEntries, 0, &rcvrRail->InputQueueAddr); -+ stack = ep_alloc_elan (&rail->Generic, EP3_STACK_SIZE, 0, &rcvrRail->ThreadStack); -+ -+ rcvrRail->TotalDescCount = 0; -+ rcvrRail->FreeDescCount = 0; -+ -+ /* Initialise the main/elan spin lock */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock.sl_lock), 0); -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock.sl_seq), 0); -+ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock.sl_lock), 0); -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock.sl_seq), 0); -+ -+ /* Initialise the receive lists */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), 0); -+ -+ /* Initialise the ThreadShould Halt */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadShouldHalt), 0); -+ -+ /* Initialise pointer to the ep_rcvr_rail */ -+ elan3_sdram_writeq (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr), (unsigned long) rcvrRail); -+ -+ /* Initialise elan visible main memory */ -+ rcvrRail->RcvrMain->ThreadLock.sl_seq = 0; -+ rcvrRail->RcvrMain->PendingLock.sl_seq = 0; -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ -+ /* initialise and copy down the input queue descriptor */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_base = rcvrRail->InputQueueAddr; -+ qdesc.q_top = rcvrRail->InputQueueAddr + (rcvr->InputQueueEntries-1) * EP_INPUTQ_SIZE; -+ qdesc.q_fptr = rcvrRail->InputQueueAddr; -+ qdesc.q_bptr = rcvrRail->InputQueueAddr + EP_INPUTQ_SIZE; -+ qdesc.q_size = EP_INPUTQ_SIZE; -+ qdesc.q_event.ev_Count = 0; -+ qdesc.q_event.ev_Type = 0; -+ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, qdescs + rcvr->Service * sizeof (EP3_InputQueue), sizeof (EP3_InputQueue)); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->Generic.Number] = &rcvrRail->Generic; -+ rcvr->RailMask |= EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* initialise and run the Elan thread to process the queue */ -+ IssueRunThread (rail, ep3_init_thread (rail->Device, ep_symbol (&rail->ThreadCode, "ep3comms_rcvr"), -+ rcvrRail->ThreadStack, stack, EP3_STACK_SIZE, 5, -+ rail->RailElanAddr, rcvrRail->RcvrElanAddr, rcvrRail->RcvrMainAddr, -+ EP_MSGQ_ADDR(rcvr->Service), -+ rail->ElanCookies)); -+} -+ -+void -+ep3rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rcvr->Rails[rail->Generic.Number]; -+ unsigned long flags; -+ struct list_head *el, *nel; -+ -+ EPRINTF1 (DBG_RCVR, "%s: ep3rcvr_del_rail: removing rail\n", rail->Generic.Name); -+ -+ /* flag the rail as no longer available */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->RailMask &= ~EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* mark the input queue descriptor as full */ -+ SetQueueLocked(rail, ((EP3_COMMS_RAIL *)commsRail)->QueueDescs + rcvr->Service * sizeof (EP3_InputQueue)); -+ -+ /* need to halt the thread first */ -+ /* set ThreadShouldHalt in elan memory */ -+ /* then trigger the event */ -+ /* and wait on haltWait */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadShouldHalt), TRUE); -+ -+ IssueSetevent (rail, EP_MSGQ_ADDR(rcvr->Service) + offsetof(EP3_InputQueue, q_event)); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ while (rcvrRail->ThreadHalted == 0) -+ { -+ rcvrRail->CleanupWaiting++; -+ kcondvar_wait (&rcvrRail->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point the thread is halted and it has no envelopes */ -+ -+ /* we need to wait until all the rxd's in the list that are -+ * bound to the rail we are removing are not pending -+ */ -+ for (;;) -+ { -+ int mustWait = 0; -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el,EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail) && rxd->RxdMain->Len != EP_RXD_PENDING) -+ { -+ mustWait++; -+ break; -+ } -+ } -+ -+ if (! mustWait) -+ break; -+ -+ EPRINTF1 (DBG_RCVR, "%s: ep3rcvr_del_rail: waiting for active rxd's to be returned\n", rail->Generic.Name); -+ -+ rcvrRail->CleanupWaiting++; -+ kcondvar_wait (&rcvrRail->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point all rxd's in the list that are bound to the deleting rail are not pending */ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail)) -+ { -+ /* here we need to unbind the remaining rxd's */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail(rcvrRail, rxdRail ); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* wait for all rxd's for this rail to become free */ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ while (rcvrRail->FreeDescCount != rcvrRail->TotalDescCount) -+ { -+ rcvrRail->FreeDescWaiting++; -+ kcondvar_wait (&rcvrRail->FreeDescSleep, &rcvrRail->FreeDescLock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ /* can now remove the rail as it can no longer be used */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->Generic.Number] = NULL; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* all the rxd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (rcvrRail->TotalDescCount == rcvrRail->FreeDescCount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&rcvrRail->DescBlockList)) -+ FreeRxdRailBlock (rcvrRail, list_entry(rcvrRail->DescBlockList.next, EP3_RXD_RAIL_BLOCK , Link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((rcvrRail->TotalDescCount == 0) && (rcvrRail->TotalDescCount == rcvrRail->FreeDescCount)); -+ -+ ep_free_elan (&rail->Generic, rcvrRail->ThreadStack, EP3_STACK_SIZE); -+ ep_free_elan (&rail->Generic, rcvrRail->InputQueueAddr, EP_INPUTQ_SIZE * rcvr->InputQueueEntries); -+ ep_free_elan (&rail->Generic, rcvrRail->RcvrElanAddr, sizeof (EP3_RCVR_RAIL_ELAN)); -+ ep_free_main (&rail->Generic, rcvrRail->RcvrMainAddr, sizeof (EP3_RCVR_RAIL_MAIN)); -+ -+ KMEM_FREE (rcvrRail, sizeof (EP3_RCVR_RAIL)); -+} -+ -+EP_RXD * -+ep3rcvr_steal_rxd (EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ E3_Addr rxdElanAddr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ LockRcvrThread (rcvrRail); -+ if ((rxdElanAddr = elan3_sdram_readl (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs))) != 0) -+ { -+ sdramaddr_t rxdElan = ep_elan2sdram (&rail->Generic, rxdElanAddr); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) (unsigned long) elan3_sdram_readq (rail->Device, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr)); -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ sdramaddr_t next; -+ -+ EPRINTF2 (DBG_RCVR, "%s: StealRxdFromOtherRail stealing rxd %p\n", rail->Generic.Name, rail); -+ -+ /* Remove the RXD from the pending desc list */ -+ if ((next = elan3_sdram_readl (rail->Device, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next))) == 0) -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), next); -+ UnlockRcvrThread (rcvrRail); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* Mark rxdRail as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ return rxd; -+ } -+ -+ UnlockRcvrThread (rcvrRail); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return NULL; -+} -+ -+long -+ep3rcvr_check (EP_RCVR_RAIL *r, long nextRunTime) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ EP_RXD *rxd; -+ unsigned long flags; -+ -+ if (rcvrRail->FreeDescCount < ep_rxd_lowat && !AllocateRxdRailBlock (rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow rxd rail pool\n", rail->Generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ if (rcvrRail->ThreadWaiting && (rxd = StealRxdFromOtherRail (rcvr)) != NULL) -+ { -+ /* Map the receive buffer into this rail as well */ -+ EPRINTF4 (DBG_RCVR, "%s: mapping rxd->Data (%08x.%08x.%08x) into this rails\n", -+ rail->Generic.Name, rxd->Data.nmd_addr,rxd->Data.nmd_len, rxd->Data.nmd_attr); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ if ((!(EP_NMD_RAILMASK (&rxd->Data) & EP_RAIL2RAILMASK(rail->Generic.Number)) && /* not already mapped and */ -+ ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rail->Generic.Number)) == 0) || /* failed to map it */ -+ ep3rcvr_queue_rxd (rxd, &rcvrRail->Generic)) /* or failed to queue it */ -+ { -+ EPRINTF5 (DBG_RCVR,"%s: stolen rcvr=%p rxd=%p -> rnum=%d rcvrRail=%p (failed)\n", -+ rail->Generic.Name, rcvr, rxd, rail->Generic.Number, rcvrRail); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ } -+ -+ return nextRunTime; -+} -+ -+static void -+ep3rcvr_flush_filtering (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Generic.Rail; -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t qdesc = commsRail->QueueDescs + rcvr->Service*sizeof (EP3_InputQueue); -+ E3_Addr qTop = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_top)); -+ E3_Addr qBase = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_base)); -+ E3_Addr qSize = elan3_sdram_readl (dev,qdesc + offsetof (EP3_InputQueue, q_size)); -+ E3_uint32 nfptr, qbptr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ nfptr = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_fptr)); -+ qbptr = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_bptr)); -+ -+ if (nfptr == qTop) -+ nfptr = qBase; -+ else -+ nfptr += qSize; -+ -+ while (nfptr != qbptr) -+ { -+ unsigned nodeId = elan3_sdram_readl (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr) + -+ offsetof (EP_ENVELOPE, NodeId)); -+ -+ EPRINTF3 (DBG_DISCON, "%s: ep3rcvr_flush_filtering: nodeId=%d State=%d\n", rail->Generic.Name, nodeId, rail->Generic.Nodes[nodeId].State); -+ -+ if (rail->Generic.Nodes[nodeId].State == EP_NODE_LOCAL_PASSIVATE) -+ elan3_sdram_writel (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr) + -+ offsetof (EP_ENVELOPE, Version), 0); -+ -+ if (nfptr == qTop) -+ nfptr = qBase; -+ else -+ nfptr += qSize; -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+ep3rcvr_flush_flushing (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF6 (DBG_DISCON, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p state %x.%x elan node %d\n", rail->Generic.Name, -+ rcvr, rxd, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep3rcvr_flush_flushing: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - passive\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - active\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP_INVALIDATE_XID (rxd->MsgXid); /* Ignore any previous NMD map responses */ -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep3rcvr_flush_flushing: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep3rcvr_flush_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ -+ switch (rail->Generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep3rcvr_flush_filtering (rcvr, rcvrRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep3rcvr_flush_flushing (rcvr, rcvrRail); -+ break; -+ } -+} -+ -+void -+ep3rcvr_failover_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_SYS *sys = subsys->Subsys.Sys; -+#endif -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_MANAGER_MSG_BODY msgBody; -+ EP_NODE *node = &sys->Nodes[env->NodeId]; -+#endif -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ EPRINTF6 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rcvr %p rxd %p elan node %d state %x.%x\n", rail->Generic.Name, rcvr, rxd, env->NodeId, -+ rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_failover_callback: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* clear the data event - the done event should already be zero */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+#ifdef SUPPORT_RAIL_FAILOVER -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent) && !(EP_IS_NO_FAILOVER(env->Attr))) /* incomplete RPC, which can be failed over */ -+ { -+ EPRINTF7 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rxd %p State %x.%x Xid %llxx MsgXid %llxx nodeId %d - failover\n", -+ rail->Generic.Name, rxd, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, -+ (long long) env->Xid.Unique, (long long) rxd->MsgXid.Unique, env->NodeId); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ /* XXXX maybe only send the message if the node failover retry is now ? */ -+ msgBody.Failover.Xid = env->Xid; -+ msgBody.Failover.Railmask = node->ConnectedRails; -+ -+ ep_send_message (&rail->Generic, env->NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST, rxd->MsgXid, &msgBody); -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+#endif -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep3rcvr_failover_callback: rxd state is aborted but bound to a rail\n"); -+ break; -+ } -+ -+ EPRINTF3 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rxd %p nodeId %d - finished\n", rail->Generic.Name, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep3rcvr_disconnect_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ EPRINTF4 (DBG_DISCON, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p elan node %d\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep3rcvr_disconnect_callback: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* clear the data event - the done event should already be zero */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* remark it as pending if it was partially received */ -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - not able to failover\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ /* Mark as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* Ignore any previous NMD/failover responses */ -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ /* Remove from active list */ -+ list_del (&rxd->Link); -+ -+ if (rxd->State == EP_RXD_RPC_IN_PROGRESS) /* ownder by user .... */ -+ rxd->State = EP_RXD_BEEN_ABORTED; -+ else /* queue for completion */ -+ { -+ rxd->RxdMain->Len = EP_CONN_RESET; /* ensure ep_rxd_status() fails */ -+ list_add_tail (&rxd->Link, &rxdList); -+ } -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_failover_callback: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&rxdList)) -+ { -+ EP_RXD *rxd = list_entry (rxdList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+void -+ep3rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *r) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) r; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rxdRail->Generic.RcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ (di->func)(di->arg, " ChainEvent=%x.%x %x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[0].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[0].ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[1].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[1].ev_Type))); -+ (di->func)(di->arg, " ChainEvent=%x.%x %x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[2].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[2].ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[3].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[3].ev_Type))); -+ (di->func)(di->arg, " DataEvent=%x.%x DoneEvent=%x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Type))); -+ (di->func)(di->arg, " Data=%x Len=%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_addr)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len))); -+} -+ -+void -+ep3rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t queue = commsRail->QueueDescs + rcvrRail->Generic.Rcvr->Service * sizeof (EP3_InputQueue); -+ E3_Addr qbase = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)); -+ E3_Addr qtop = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top)); -+ E3_uint32 qsize = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)); -+ int freeCount = 0; -+ int blockCount = 0; -+ unsigned long flags; -+ struct list_head *el; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ list_for_each (el, &rcvrRail->FreeDescList) -+ freeCount++; -+ list_for_each (el, &rcvrRail->DescBlockList) -+ blockCount++; -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ (di->func)(di->arg, " Rail %d FreeDesc %d (%d) Total %d Blocks %d %s\n", -+ rail->Generic.Number, rcvrRail->FreeDescCount, freeCount, rcvrRail->TotalDescCount, blockCount, -+ rcvrRail->ThreadWaiting ? "ThreadWaiting" : ""); -+ -+ (di->func)(di->arg, " InputQueue state=%x bptr=%x size=%x top=%x base=%x fptr=%x\n", -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_state)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_bptr)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr))); -+ (di->func)(di->arg, " event=%x.%x [%x.%x] wevent=%x.%x\n", -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Type)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Count)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Source)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Dest)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_wevent)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_wcount))); -+ -+ LockRcvrThread (rcvrRail); -+ { -+ E3_Addr nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr)); -+ EP_ENVELOPE env; -+ -+ if (nfptr == qtop) -+ nfptr = qbase; -+ else -+ nfptr += qsize; -+ -+ while (nfptr != elan3_sdram_readl (dev, queue + offsetof (E3_Queue, q_bptr))) -+ { -+ elan3_sdram_copyl_from_sdram (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr), -+ &env, sizeof (EP_ENVELOPE)); -+ -+ (di->func)(di->arg, " ENVELOPE Version=%x Attr=%x Xid=%08x.%08x.%016llx\n", -+ env.Version, env.Attr, env.Xid.Generation, env.Xid.Handle, (long long) env.Xid.Unique); -+ (di->func)(di->arg, " NodeId=%x Range=%x TxdRail=%x TxdMain=%x.%x.%x\n", -+ env.NodeId, env.Range, env.TxdRail, env.TxdMain.nmd_addr, -+ env.TxdMain.nmd_len, env.TxdMain.nmd_attr); -+ -+ -+ if (nfptr == qtop) -+ nfptr = qbase; -+ else -+ nfptr += qsize; -+ } -+ } -+ UnlockRcvrThread (rcvrRail); -+} -+ -+void -+ep3rcvr_fillout_rail_stats(EP_RCVR_RAIL *rcvr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP3_RCVR_RAIL * ep4rcvr_rail = (EP3_RCVR_RAIL *) rcvr_rail; */ -+} -+ -diff -urN clean/drivers/net/qsnet/ep/epcommsRx_elan4.c linux-2.6.9/drivers/net/qsnet/ep/epcommsRx_elan4.c ---- clean/drivers/net/qsnet/ep/epcommsRx_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcommsRx_elan4.c 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,1765 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx_elan4.c,v 1.35.2.1 2005/07/20 11:35:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+#define RCVR_TO_COMMS(rcvrRail) ((EP4_COMMS_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail) -+#define RCVR_TO_RAIL(rcvrRail) ((EP4_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail->Rail) -+#define RCVR_TO_DEV(rcvrRail) (RCVR_TO_RAIL(rcvrRail)->r_ctxt.ctxt_dev) -+#define RCVR_TO_SUBSYS(rcvrRail) (((EP_RCVR_RAIL *) rcvrRail)->Rcvr->Subsys) -+ -+#define RXD_TO_RCVR(txdRail) ((EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail) -+#define RXD_TO_RAIL(txdRail) RCVR_TO_RAIL(RXD_TO_RCVR(rxdRail)) -+ -+static void rxd_interrupt (EP4_RAIL *rail, void *arg); -+ -+static __inline__ void -+__ep4_rxd_assert_free (EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rxdRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ register int i, failed = 0; -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ if (((rxdRail)->rxd_main->rxd_sent[i] != EP4_STATE_FREE)) -+ failed |= (1 << i); -+ -+ if (((rxdRail)->rxd_main->rxd_failed != EP4_STATE_FREE)) -+ failed |= (1 << 5); -+ if (((rxdRail)->rxd_main->rxd_done != EP4_STATE_FREE)) -+ failed |= (1 << 6); -+ -+ if (sdram_assert) -+ { -+ if (((elan4_sdram_readq (RXD_TO_RAIL(rxdRail)->r_ctxt.ctxt_dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << 7); -+ for (i = 0; i < EP_MAXFRAG; i++) -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << (8 + i)); -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << 12); -+ if (((int)(elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)) >> 32) != -32)) -+ failed |= (1 << 13); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_free: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_free: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ (rxdRail)->rxd_main->rxd_sent[i] = EP4_STATE_FREE; -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_FREE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_FREE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writew (RXD_TO_RAIL(rxdRail)->r_ctxt.ctxt_dev, -+ (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType) + 4, 0); -+ -+ for (i = 0; i < EP_MAXFRAG; i++) -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType) + 4, -32); -+ } -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_free"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_assert_pending(EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rcvrRail); -+ register int failed = 0; -+ -+ failed |= ((rxdRail)->rxd_main->rxd_done != EP4_STATE_ACTIVE); -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_pending: %s - %d\n", file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_pending: %s - %d\n", file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_pending"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_assert_private(EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rxdRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ register int failed = 0; -+ -+ if (((rxdRail)->rxd_main->rxd_failed != EP4_STATE_ACTIVE)) failed |= (1 << 0); -+ if (((rxdRail)->rxd_main->rxd_done != EP4_STATE_PRIVATE)) failed |= (1 << 1); -+ -+ if (sdram_assert) -+ { -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)) >> 32) != 0)) failed |= (1 << 2); -+ if (((int) (elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)) >> 32) != -32)) failed |= (1 << 3); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_private: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_private: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_PRIVATE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType) + 4, -32); -+ } -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_private"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_private_to_free (EP4_RXD_RAIL *rxdRail) -+{ -+ register int i; -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_FREE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_FREE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_FREE; -+} -+ -+static __inline__ void -+__ep4_rxd_force_private (EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RAIL *rail = RXD_TO_RAIL(rxdRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_PRIVATE; -+ -+ if (sdram_assert) -+ elan4_sdram_writeq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+} -+ -+#define EP4_RXD_ASSERT_FREE(rxdRail) __ep4_rxd_assert_free(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_ASSERT_PENDING(rxdRail) __ep4_rxd_assert_pending(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_ASSERT_PRIVATE(rxdRail) __ep4_rxd_assert_private(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_PRIVATE_TO_FREE(rxdRail) __ep4_rxd_private_to_free(rxdRail) -+#define EP4_RXD_FORCE_PRIVATE(rxdRail) __ep4_rxd_force_private(rxdRail) -+ -+static int -+alloc_rxd_block (EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_RXD_RAIL_BLOCK *blk; -+ EP4_RXD_RAIL_MAIN *rxdMain; -+ EP_ADDR rxdMainAddr; -+ sdramaddr_t rxdElan; -+ EP_ADDR rxdElanAddr; -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i, j; -+ -+ KMEM_ZALLOC (blk, EP4_RXD_RAIL_BLOCK *, sizeof (EP4_RXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((rxdElan = ep_alloc_elan (&rail->r_generic, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK, 0, &rxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((rxdMain = ep_alloc_main (&rail->r_generic, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK, 0, &rxdMainAddr)) == (EP4_RXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ep4_reserve_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK, 0) != 0) -+ { -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ -+ return 0; -+ } -+ -+ for (rxdRail = &blk->blk_rxds[0], i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rxdRail->rxd_generic.RcvrRail = &rcvrRail->rcvr_generic; -+ rxdRail->rxd_elan = rxdElan; -+ rxdRail->rxd_elan_addr = rxdElanAddr; -+ rxdRail->rxd_main = rxdMain; -+ rxdRail->rxd_main_addr = rxdMainAddr; -+ -+ /* reserve 128 bytes of "event" cq space for the chained STEN packets */ -+ if ((rxdRail->rxd_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, EP4_RXD_STEN_CMD_NDWORDS)) == NULL) -+ goto failed; -+ -+ /* allocate a single word of "setevent" command space */ -+ if ((rxdRail->rxd_scq = ep4_get_ecq (rail, EP4_ECQ_SINGLE, 1)) == NULL) -+ { -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ goto failed; -+ } -+ -+ /* initialise the completion events */ -+ for (j = 0; j <= EP_MAXFRAG; j++) -+ rxdMain->rxd_sent[i] = EP4_STATE_FREE; -+ -+ rxdMain->rxd_done = EP4_STATE_FREE; -+ rxdMain->rxd_failed = EP4_STATE_FREE; -+ -+ /* initialise the scq for the thread */ -+ rxdMain->rxd_scq = rxdRail->rxd_scq->ecq_addr; -+ -+ /* initialise the "start" event to copy the first STEN packet into the command queue */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_START_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0])); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ /* initialise the "chain" events to copy the next STEN packet into the command queue */ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ { -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j+1])); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ } -+ -+ /* initialise the portions of the sten packets which don't change */ -+ for (j = 0; j < EP_MAXFRAG+1; j++) -+ { -+ if (j < EP_MAXFRAG) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_dma_dstEvent), -+ rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j])); -+ else -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_dma_dstEvent), -+ rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_guard), -+ GUARD_CMD | GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (EP4_RXD_STEN_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_sent[j]))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_fail_guard), -+ GUARD_CMD | GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (EP4_RXD_STEN_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_fail_setevent), -+ SET_EVENT_CMD | (rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_nop_cmd), -+ NOP_CMD); -+ } -+ -+ /* register a main interrupt cookie */ -+ ep4_register_intcookie (rail, &rxdRail->rxd_intcookie, rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ rxd_interrupt, rxdRail); -+ -+ /* initialise the command stream for the done event */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_done))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (rxdRail->rxd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ /* initialise the command stream for the fail event */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_failed))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_write_value), -+ EP4_STATE_FAILED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (rxdRail->rxd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ /* initialise the done and fail events */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ /* initialise the pointer to the main memory portion */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_main), -+ rxdMainAddr); -+ -+ /* move onto next descriptor */ -+ rxdElan += EP4_RXD_RAIL_ELAN_SIZE; -+ rxdElanAddr += EP4_RXD_RAIL_ELAN_SIZE; -+ rxdMain = (EP4_RXD_RAIL_MAIN *) ((unsigned long) rxdMain + EP4_RXD_RAIL_MAIN_SIZE); -+ rxdMainAddr += EP4_RXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_add (&blk->blk_link, &rcvrRail->rcvr_blocklist); -+ -+ rcvrRail->rcvr_totalcount += EP4_NUM_RXD_PER_BLOCK; -+ rcvrRail->rcvr_freecount += EP4_NUM_RXD_PER_BLOCK; -+ -+ for (i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++) -+ list_add (&blk->blk_rxds[i].rxd_generic.Link, &rcvrRail->rcvr_freelist); -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ return 1; -+ -+ failed: -+ while (--i >= 0) -+ { -+ rxdRail--; -+ -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ ep4_put_ecq (rail, rxdRail->rxd_scq, 1); -+ -+ ep4_deregister_intcookie (rail, &rxdRail->rxd_intcookie); -+ } -+ -+ ep4_release_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ -+ return 0; -+} -+ -+ -+static void -+free_rxd_block (EP4_RCVR_RAIL *rcvrRail, EP4_RXD_RAIL_BLOCK *blk) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_del (&blk->blk_link); -+ -+ rcvrRail->rcvr_totalcount -= EP4_NUM_RXD_PER_BLOCK; -+ -+ for (rxdRail = &blk->blk_rxds[0], i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rcvrRail->rcvr_freecount--; -+ -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ ep4_put_ecq (rail, rxdRail->rxd_scq, 1); -+ -+ ep4_deregister_intcookie (rail, &rxdRail->rxd_intcookie); -+ -+ list_del (&rxdRail->rxd_generic.Link); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ ep4_release_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_rxds[0].rxd_elan_addr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+} -+ -+static EP4_RXD_RAIL * -+get_rxd_rail (EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = RCVR_TO_SUBSYS(rcvrRail); -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ if (list_empty (&rcvrRail->rcvr_freelist)) -+ rxdRail = NULL; -+ else -+ { -+ rxdRail = list_entry (rcvrRail->rcvr_freelist.next, EP4_RXD_RAIL, rxd_generic.Link); -+ -+ EP4_RXD_ASSERT_FREE(rxdRail); -+ -+ list_del (&rxdRail->rxd_generic.Link); -+ -+ rcvrRail->rcvr_freecount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (rcvrRail->rcvr_freecount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (rxdRail); -+} -+ -+static void -+free_rxd_rail (EP4_RCVR_RAIL *rcvrRail, EP4_RXD_RAIL *rxdRail) -+{ -+ unsigned long flags; -+ -+ EP4_RXD_ASSERT_FREE(rxdRail); -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_add (&rxdRail->rxd_generic.Link, &rcvrRail->rcvr_freelist); -+ -+ rcvrRail->rcvr_freecount++; -+ -+ if (rcvrRail->rcvr_freewaiting) -+ { -+ rcvrRail->rcvr_freewaiting--; -+ kcondvar_wakeupall (&rcvrRail->rcvr_freesleep, &rcvrRail->rcvr_freelock); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+} -+ -+static void -+bind_rxd_rail (EP_RXD *rxd, EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rxdRail->rxd_generic.RcvrRail); -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ -+ EPRINTF3 (DBG_RCVR, "%s: bind_rxd_rail: rxd=%p rxdRail=%p\n", rail->r_generic.Name, rxd, rxdRail); -+ -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_rxd), rxd->NmdMain.nmd_addr); /* PCI write */ -+ -+ rxd->RxdRail = &rxdRail->rxd_generic; -+ rxdRail->rxd_generic.Rxd = rxd; -+} -+ -+static void -+unbind_rxd_rail (EP_RXD *rxd, EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ ASSERT (rxd->RxdRail == &rxdRail->rxd_generic && rxdRail->rxd_generic.Rxd == rxd); -+ -+ EP4_RXD_ASSERT_PRIVATE (rxdRail); -+ -+ EPRINTF3 (DBG_RCVR, "%s: unbind_rxd_rail: rxd=%p rxdRail=%p\n", RCVR_TO_RAIL(rcvrRail)->r_generic.Name, rxd, rxdRail); -+ -+ rxd->RxdRail = NULL; -+ rxdRail->rxd_generic.Rxd = NULL; -+ -+ if (rcvrRail->rcvr_cleanup_waiting) -+ kcondvar_wakeupall (&rcvrRail->rcvr_cleanup_sleep, &rxd->Rcvr->Lock); -+ rcvrRail->rcvr_cleanup_waiting = 0; -+ -+ EP4_RXD_PRIVATE_TO_FREE (rxdRail); -+} -+ -+ -+static void -+rcvr_stall_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ EPRINTF1 (DBG_RCVR, "rcvr_stall_interrupt: rcvrRail %p thread halted\n", rcvrRail); -+ -+ rcvrRail->rcvr_thread_halted = 1; -+ -+ kcondvar_wakeupall (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+rcvr_stall_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ sdramaddr_t qdesc = ((EP4_COMMS_RAIL *) commsRail)->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ E4_uint64 qbptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ -+ /* Mark the queue as full by writing the fptr */ -+ if (qbptr == (rcvrRail->rcvr_slots_addr + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1))) -+ elan4_sdram_writeq (dev, qdesc + offsetof (E4_InputQueue, q_fptr), rcvrRail->rcvr_slots_addr); -+ else -+ elan4_sdram_writeq (dev, qdesc + offsetof (E4_InputQueue, q_fptr), qbptr + EP_INPUTQ_SIZE); -+ -+ /* Notify the thread that it should stall after processing any outstanding envelopes */ -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), -+ rcvrRail->rcvr_stall_intcookie.int_val); -+ -+ /* Issue a swtevent to the queue event to wake the thread up */ -+ ep4_set_event_cmd (rcvrRail->rcvr_resched, rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent)); -+} -+ -+static void -+rxd_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) arg; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ EP4_RXD_RAIL_MAIN *rxdMain = rxdRail->rxd_main; -+ unsigned long delay = 1; -+ EP_RXD *rxd; -+ EP_ENVELOPE *env; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ for (;;) -+ { -+ if (rxdMain->rxd_done == EP4_STATE_FINISHED || rxdMain->rxd_failed == EP4_STATE_FAILED) -+ break; -+ -+ /* The write to rxd_done could be held up in the PCI bridge even though -+ * we've seen the interrupt cookie. Unlike elan3, there is no possibility -+ * of spurious interrupts since we flush the command queues on node -+ * disconnection and the txcallback mechanism */ -+ mb(); -+ -+ if (delay > EP4_EVENT_FIRING_TLIMIT) -+ { -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "rxd_interrupt - not finished\n"); -+ return; -+ } -+ DELAY(delay); -+ delay <<= 1; -+ } -+ -+ if (rxdMain->rxd_done != EP4_STATE_FINISHED) -+ { -+ EPRINTF8 (DBG_RETRY, "%s: rxd_interrupt: rxdRail %p retry: done=%d failed=%d NodeId=%d XID=%08x.%08x.%016llx\n", -+ rail->r_generic.Name, rxdRail, (int)rxdMain->rxd_done, (int)rxdMain->rxd_failed, rxdRail->rxd_generic.Rxd->RxdMain->Envelope.NodeId, -+ rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Generation, rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Handle, -+ (long long)rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Unique); -+ -+ spin_lock (&rcvrRail->rcvr_retrylock); -+ -+ rxdRail->rxd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; /* XXXX backoff ? */ -+ -+ list_add_tail (&rxdRail->rxd_retry_link, &rcvrRail->rcvr_retrylist); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, rxdRail->rxd_retry_time); -+ spin_unlock (&rcvrRail->rcvr_retrylock); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ -+ rxd = rxdRail->rxd_generic.Rxd; -+ env = &rxd->RxdMain->Envelope; -+ -+ /* -+ * Note, since the thread will have sent the remote dma packet before copying -+ * the envelope, we must check that it has completed doing this, we do this -+ * by acquiring the spinlock against the thread which it only drops once it's -+ * completed. -+ */ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ { -+ EP4_SPINENTER (rail->r_ctxt.ctxt_dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), -+ &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ EP4_SPINEXIT (rail->r_ctxt.ctxt_dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), -+ &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ EP4_ASSERT (rail, env->Version == EP_ENVELOPE_VERSION && rxd->RxdMain->Len != EP_RXD_PENDING); -+ } -+ -+ EPRINTF8 (DBG_RCVR, "%s: rxd_interrupt: rxd %p finished from %d XID %08x.%08x.%016llx len %d attr %x\n", rail->r_generic.Name, -+ rxd, rxd->RxdMain->Envelope.NodeId, rxd->RxdMain->Envelope.Xid.Generation, rxd->RxdMain->Envelope.Xid.Handle, -+ (long long)rxd->RxdMain->Envelope.Xid.Unique, rxd->RxdMain->Len, rxd->RxdMain->Envelope.Attr); -+ -+ rxdMain->rxd_done = EP4_STATE_PRIVATE; -+ rxd->Data.nmd_attr = EP_RAIL2RAILMASK (rail->r_generic.Number); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxd->RxdMain->Len >= 0 && EP_IS_RPC(env->Attr)) -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ else -+ { -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ } -+ -+ if (rxd->RxdMain->Len >= 0) { -+ INC_STAT(rcvrRail->rcvr_generic.stats,rx); -+ ADD_STAT(rcvrRail->rcvr_generic.stats,rx_len,rxd->RxdMain->Len); -+ INC_STAT(rail->r_generic.Stats,rx); -+ ADD_STAT(rail->r_generic.Stats,rx_len,rxd->RxdMain->Len); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ ep_rxd_received (rxd); -+ -+ break; -+ -+ case EP_RXD_PUT_ACTIVE: -+ case EP_RXD_GET_ACTIVE: -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler (rxd); -+ break; -+ -+ case EP_RXD_COMPLETE_ACTIVE: -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler(rxd); -+ break; -+ -+ default: -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ printk ("%s: rxd_interrupt: rxd %p in invalid state %d\n", rail->r_generic.Name, rxd, rxd->State); -+ /* NOTREACHED */ -+ } -+} -+ -+static void -+ep4rcvr_flush_filtering (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t qdesc = commsRail->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ E4_Addr qbase = rcvrRail->rcvr_slots_addr; -+ E4_Addr qlast = qbase + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1); -+ E4_uint64 qfptr, qbptr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ /* zip down the input queue and invalidate any envelope we find to a node which is locally passivated */ -+ qfptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_fptr)); -+ qbptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ -+ while (qfptr != qbptr) -+ { -+ unsigned int nodeId = elan4_sdram_readl (dev, rcvrRail->rcvr_slots + (qfptr - qbase) + offsetof (EP_ENVELOPE, NodeId)); -+ -+ EPRINTF3 (DBG_DISCON, "%s: ep4rcvr_flush_filtering: nodeId=%d State=%d\n", rail->r_generic.Name, nodeId, rail->r_generic.Nodes[nodeId].State); -+ -+ if (rail->r_generic.Nodes[nodeId].State == EP_NODE_LOCAL_PASSIVATE) -+ elan4_sdram_writel (dev, rcvrRail->rcvr_slots + (qfptr - qbase) + offsetof (EP_ENVELOPE, Version), 0); -+ -+ if (qfptr != qlast) -+ qfptr += EP_INPUTQ_SIZE; -+ else -+ qfptr = qbase; -+ } -+ -+ /* Insert an setevent command into the thread's command queue -+ * to ensure that all sten packets have completed */ -+ elan4_guard (rcvrRail->rcvr_ecq->ecq_cq, GUARD_ALL_CHANNELS); -+ ep4comms_flush_setevent (commsRail, rcvrRail->rcvr_ecq->ecq_cq); -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+ep4rcvr_flush_flushing (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ /* remove any sten packates which are retrying to nodes which are being passivated */ -+ spin_lock_irqsave (&rcvrRail->rcvr_retrylock, flags); -+ list_for_each_safe (el, nel, &rcvrRail->rcvr_retrylist) { -+ EP4_RXD_RAIL *rxdRail = list_entry (el, EP4_RXD_RAIL, rxd_retry_link); -+ EP_ENVELOPE *env = &rxdRail->rxd_generic.Rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF2 (DBG_XMTR, "%s; ep4rcvr_flush_flushing: removing rxdRail %p from retry list\n", rail->r_generic.Name, rxdRail); -+ -+ list_del (&rxdRail->rxd_retry_link); -+ } -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_retrylock, flags); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL (rxdRail, rcvrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF6 (DBG_DISCON, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p state %d elan node %d state %d\n", -+ rail->r_generic.Name, rcvr, rxd, (int)rxdRail->rxd_main->rxd_done, env->NodeId, rxd->State); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_flush_flushing: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - passive\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ default: -+ EP4_ASSERT (rail, EP_IS_RPC(env->Attr)); -+ -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - active\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP_INVALIDATE_XID (rxd->MsgXid); /* Ignore any previous NMD map responses */ -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_flush_flushing: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_flush_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep4rcvr_flush_filtering (rcvr, rcvrRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep4rcvr_flush_flushing (rcvr, rcvrRail); -+ break; -+ } -+} -+ -+void -+ep4rcvr_failover_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#if SUPPORT_RAIL_FAILOVER -+ EP_SYS *sys = subsys->Subsys.Sys; -+#endif -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+#if SUPPORT_RAIL_FAILOVER -+ EP_NODE *node = &sys->Nodes[env->NodeId]; -+ EP_MANAGER_MSG_BODY msgBody; -+#endif -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ EPRINTF5 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rcvr %p rxd %p elan node %d state %d\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId, (int)rxdRail->rxd_main->rxd_done); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_failover_callback: rxd state is free but bound to a rail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP4_RXD_FORCE_PRIVATE(rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP4_ASSERT (rail, EP_IS_RPC(env->Attr)); -+ -+#if SUPPORT_RAIL_FAILOVER -+ /* XXXX - no rail failover for now .... */ -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE && !EP_IS_NO_FAILOVER(env->Attr)) /* incomplete RPC, which can be failed over */ -+ { -+ EPRINTF6 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rxd %p State %d Xid %llxx MsgXid %llxx nodeId %d - failover\n", -+ rail->r_generic.Name, rxd, rxd->State, (long long)env->Xid.Unique, (long long)rxd->MsgXid.Unique, env->NodeId); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ /* XXXX maybe only send the message if the node failover retry is now ? */ -+ msgBody.Failover.Xid = env->Xid; -+ msgBody.Failover.Railmask = node->ConnectedRails; -+ -+ ep_send_message (&rail->r_generic, env->NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST, rxd->MsgXid, &msgBody); -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+#endif -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_failover_callback: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ EPRINTF3 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rxd %p nodeId %d - finished\n", rail->r_generic.Name, rxd, env->NodeId); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_disconnect_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ EPRINTF5 (DBG_DISCON, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p elan node %d state %x\n", rail->r_generic.Name, rcvr, rxd, env->NodeId, rxd->State); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_disconnect_callback: rxd state is free but bound to a rail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* remark it as pending if it was partially received */ -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE || rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE) /* incomplete RPC */ -+ { -+ EPRINTF5 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d state %x - not able to failover\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId, rxd->State); -+ -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* Ignore any previous NMD/failover responses */ -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ /* Remove from active list */ -+ list_del (&rxd->Link); -+ -+ if (rxd->State == EP_RXD_RPC_IN_PROGRESS) /* ownder by user .... */ -+ rxd->State = EP_RXD_BEEN_ABORTED; -+ else /* queue for completion */ -+ { -+ rxd->RxdMain->Len = EP_CONN_RESET; /* ensure ep_rxd_status() fails */ -+ list_add_tail (&rxd->Link, &rxdList); -+ } -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_disconnect_callback: rxd state is aborted but bound to a rail\n"); -+ break; -+ } -+ -+ printk ("%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&rxdList)) -+ { -+ EP_RXD *rxd = list_entry (rxdList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+void -+ep4rcvr_neterr_flush (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ /* Insert an setevent command into the thread's command queue -+ * to ensure that all sten packets have completed */ -+ elan4_guard (rcvrRail->rcvr_ecq->ecq_cq, GUARD_ALL_CHANNELS); -+ ep4comms_flush_setevent (commsRail, rcvrRail->rcvr_ecq->ecq_cq); -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_neterr_check (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || env->NodeId != nodeId) -+ continue; -+ -+ if (rxd->State == EP_RXD_RECEIVE_ACTIVE || rxd->State == EP_RXD_GET_ACTIVE) -+ { -+ EP_NETERR_COOKIE cookie; -+ unsigned int first, this; -+ -+ if (rxd->State == EP_RXD_RECEIVE_ACTIVE) -+ first = (EP_MAXFRAG+1) - (( EP_IS_MULTICAST(env->Attr) ? 1 : 0) + (env->nFrags == 0 ? 1 : env->nFrags)); -+ else -+ first = (EP_MAXFRAG+1) - rxd->nFrags; -+ -+ for (this = first; this < (EP_MAXFRAG+1); this++) -+ if (rxdRail->rxd_main->rxd_sent[this] == EP4_STATE_ACTIVE) -+ break; -+ -+ if (this > first) -+ { -+ /* Look at the last completed STEN packet and if it's neterr cookie matches, then change -+ * the rxd to look the same as if the sten packet had failed and then schedule it for retry */ -+ cookie = elan4_sdram_readq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[--this].c_cookie)); -+ -+ if (cookie == cookies[0] || cookie == cookies[1]) -+ { -+ EP_NETERR_COOKIE ncookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_STEN; -+ -+ EPRINTF6 (DBG_NETWORK_ERROR, "%s: ep4rcvr_neterr_check: cookie <%lld%s%s%s%s> matches rxd %p rxdRail %p this %d new cookie <%lld%s%s%s%s>\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(cookie), rxd, rxdRail, this, EP4_COOKIE_STRING(ncookie)); -+ -+ printk ("%s: ep4rcvr_neterr_check: cookie <%lld%s%s%s%s> matches rxd %p rxdRail %p this %d new cookie <%lld%s%s%s%s>\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(cookie), rxd, rxdRail, this, EP4_COOKIE_STRING(ncookie)); -+ -+ /* Allocate a new cookie for this sten packet, since this message could be received more than once. -+ * If the second arrives after we've sucessfully sent the response and the packet completes, then we -+ * could try and requeue it after the next sten packet got nacked. */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[this].c_cookie), ncookie); -+ -+ rxdRail->rxd_main->rxd_sent[this] = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_FAILED; -+ -+ spin_lock (&rcvrRail->rcvr_retrylock); -+ -+ EP4_ASSERT (rail, rxdRail->rxd_retry_time == 0); -+ -+ rxdRail->rxd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; -+ -+ list_add_tail (&rxdRail->rxd_retry_link, &rcvrRail->rcvr_retrylist); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, rxdRail->rxd_retry_time); -+ -+ spin_unlock (&rcvrRail->rcvr_retrylock); -+ } -+ } -+ } -+ } -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+int -+ep4rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *r) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_RXD_RAIL *rxdRail; -+ register int i; -+ -+ ASSERT (SPINLOCK_HELD(&rxd->Rcvr->Lock)); -+ -+ if ((rxdRail = get_rxd_rail (rcvrRail)) == NULL) -+ return 0; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ EPRINTF6 (DBG_RCVR, "%s: ep4rcvr_queue_rxd: rcvr %p rxd %p rxdRail %p buffer %x len %x\n", -+ rail->r_generic.Name, rxd->Rcvr, rxd, rxdRail, rxd->Data.nmd_addr, rxd->Data.nmd_len); -+ -+ /* bind the rxdRail and rxd together */ -+ bind_rxd_rail (rxd, rxdRail); -+ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_addr), rxd->Data.nmd_addr); /* PCI write */ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_len), rxd->Data.nmd_len); /* PCI write */ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_attr), rxd->Data.nmd_attr); /* PCI write */ -+ -+ /* Mark as active */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x00, /* %r0 */ -+ ep_symbol (&rail->r_threadcode, "c_queue_rxd")); -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x10, /* %r2 */ -+ rcvrRail->rcvr_elan_addr); -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x18, /* %r3 */ -+ rxdRail->rxd_elan_addr); -+ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_START_CMD_NDWORDS)); -+ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_start)); -+ -+ return 1; -+} -+ -+void -+ep4rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags; -+ EP4_RXD_DMA_CMD cmd; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_PUT_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = local->nmd_addr; -+ cmd.c_dma_dstAddr = remote->nmd_addr; -+ if (i == (nFrags-1)) -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done); -+ else -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]); -+ cmd.c_dma_dstEvent = 0; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_put: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, (long long)env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i]), sizeof (EP4_RXD_DMA_CMD)); -+ } -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags-1; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ EP4_ASSERT (rail, rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep4rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_GET_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to get the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_get rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, (long long)env->Xid.Unique, i, remote->nmd_addr, local->nmd_addr, remote->nmd_len); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_open), -+ OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(env->NodeId))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_trans), -+ SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_cookie), -+ ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_STEN); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_typeSize), -+ E4_DMA_TYPE_SIZE (local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_cookie), -+ ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_vproc), -+ EP_VP_DATA (rail->r_generic.Position.pos_nodeid)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_srcAddr), -+ remote->nmd_addr); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_dstAddr), -+ local->nmd_addr); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_srcEvent), -+ 0); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_dstEvent), -+ i == (nFrags-1) ? rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done) : -+ rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i])); -+ } -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags-1; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ EP4_ASSERT (rail, rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep4rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags - 1; -+ EP4_RXD_DMA_CMD cmd; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_COMPLETE_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = local->nmd_addr; -+ cmd.c_dma_dstAddr = remote->nmd_addr; -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]); -+ cmd.c_dma_dstEvent = 0; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_complete: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, (long long)env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i]), sizeof (EP4_RXD_DMA_CMD)); -+ } -+ -+ /* Initialise the status block dma */ -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(EP_STATUSBLK_SIZE, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = rxd->NmdMain.nmd_addr + offsetof (EP_RXD_MAIN, StatusBlk); -+ cmd.c_dma_dstAddr = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, StatusBlk); -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done); -+ cmd.c_dma_dstEvent = env->TxdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_done);; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF6 (DBG_RCVR, "%s: ep4rcvr_rpc_complete: rxd %p [XID=%llx] statusblk source=%08x dest=%08x len=%llx\n", -+ rail->r_generic.Name, rxd, (long long)env->Xid.Unique, (int) cmd.c_dma_srcAddr, (int) cmd.c_dma_dstAddr, (long long)EP_STATUSBLK_SIZE); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[EP_MAXFRAG]), sizeof (EP4_RXD_DMA_CMD)); -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ EP4_ASSERT (rail, rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, CompleteRPC, len); -+} -+ -+EP_RXD * -+ep4rcvr_steal_rxd (EP_RCVR_RAIL *r) -+{ -+ /* XXXX - TBD */ -+ return NULL; -+} -+ -+long -+ep4rcvr_check (EP_RCVR_RAIL *r, long nextRunTime) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ -+ if (rcvrRail->rcvr_freecount < ep_rxd_lowat && !alloc_rxd_block (rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow rxd rail pool\n", rail->r_generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return nextRunTime; -+} -+ -+unsigned long -+ep4rcvr_retry (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_retrylock, flags); -+ while (! list_empty (&rcvrRail->rcvr_retrylist)) -+ { -+ EP4_RXD_RAIL *rxdRail = list_entry (rcvrRail->rcvr_retrylist.next, EP4_RXD_RAIL, rxd_retry_link); -+ EP_ENVELOPE *env = &rxdRail->rxd_generic.Rxd->RxdMain->Envelope; -+ unsigned int first = (EP_MAXFRAG+1) - ((env->Attr & EP_MULTICAST ? 1 : 0) + (env->nFrags == 0 ? 1 : env->nFrags)); -+ -+ if (BEFORE (lbolt, rxdRail->rxd_retry_time)) -+ { -+ if (nextRunTime == 0 || AFTER (nextRunTime, rxdRail->rxd_retry_time)) -+ nextRunTime = rxdRail->rxd_retry_time; -+ -+ break; -+ } -+ -+ list_del (&rxdRail->rxd_retry_link); -+ rxdRail->rxd_retry_time = 0; -+ -+ /* determine which sten packet to resubmit */ -+ for (; first < (EP_MAXFRAG+1); first++) -+ if (rxdRail->rxd_main->rxd_sent[first] == EP4_STATE_ACTIVE) -+ break; -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4rcvr_retry: rxdRail %p, reissuing sten[%d]\n", rail->r_generic.Name, rxdRail, first); -+ -+ /* re-initialise the fail event */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ -+ /* re-initialise the chain event to resubmit this sten packet */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first-1].ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ /* finally issue the setevent to start the chain again */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_retrylock, flags); -+ -+ return nextRunTime; -+} -+ -+void -+ep4rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t qdescs = ((EP4_COMMS_RAIL *) commsRail)->r_descs; -+ EP4_RCVR_RAIL *rcvrRail; -+ E4_InputQueue qdesc; -+ E4_ThreadRegs tregs; -+ sdramaddr_t stack; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (rcvrRail, EP4_RCVR_RAIL *, sizeof (EP4_RCVR_RAIL), 1); -+ -+ spin_lock_init (&rcvrRail->rcvr_freelock); -+ INIT_LIST_HEAD (&rcvrRail->rcvr_freelist); -+ INIT_LIST_HEAD (&rcvrRail->rcvr_blocklist); -+ -+ kcondvar_init (&rcvrRail->rcvr_cleanup_sleep); -+ kcondvar_init (&rcvrRail->rcvr_freesleep); -+ -+ INIT_LIST_HEAD (&rcvrRail->rcvr_retrylist); -+ spin_lock_init (&rcvrRail->rcvr_retrylock); -+ -+ rcvrRail->rcvr_generic.CommsRail = commsRail; -+ rcvrRail->rcvr_generic.Rcvr = rcvr; -+ -+ rcvrRail->rcvr_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_RCVR_RAIL_MAIN), 0, &rcvrRail->rcvr_main_addr); -+ rcvrRail->rcvr_elan = ep_alloc_elan (&rail->r_generic, sizeof (EP4_RCVR_RAIL_ELAN), 0, &rcvrRail->rcvr_elan_addr); -+ rcvrRail->rcvr_slots = ep_alloc_elan (&rail->r_generic, EP_INPUTQ_SIZE * rcvr->InputQueueEntries, 0, &rcvrRail->rcvr_slots_addr); -+ stack = ep_alloc_elan (&rail->r_generic, EP4_STACK_SIZE, 0, &rcvrRail->rcvr_stack); -+ -+ /* allocate a command queue for the thread to use, plus space for it to wait/reschedule */ -+ rcvrRail->rcvr_ecq = ep4_alloc_ecq (rail, CQ_Size64K); -+ rcvrRail->rcvr_resched = ep4_get_ecq (rail, EP4_ECQ_ATOMIC, 8); -+ -+ ep4_register_intcookie (rail, &rcvrRail->rcvr_stall_intcookie, rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), -+ rcvr_stall_interrupt, rcvrRail); -+ -+ /* Initialise the elan portion */ -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent.ev_CountAndType), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_halt.ev_CountAndType), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_tailp), -+ rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head)); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qbase), rcvrRail->rcvr_slots_addr); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qlast), -+ rcvrRail->rcvr_slots_addr + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1)); -+ -+ /* Initialise the main memory portion */ -+ rcvrRail->rcvr_main->rcvr_thread_lock = 0; -+ -+ /* Install our retry handler */ -+ rcvrRail->rcvr_retryops.op_func = ep4rcvr_retry; -+ rcvrRail->rcvr_retryops.op_arg = rcvrRail; -+ -+ ep4_add_retry_ops (rail, &rcvrRail->rcvr_retryops); -+ -+ /* Update the queue desriptor */ -+ qdesc.q_bptr = rcvrRail->rcvr_slots_addr; -+ qdesc.q_fptr = rcvrRail->rcvr_slots_addr; -+ qdesc.q_control = E4_InputQueueControl (rcvrRail->rcvr_slots_addr, rcvrRail->rcvr_slots_addr + (EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1)), EP_INPUTQ_SIZE); -+ qdesc.q_event = rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent); -+ -+ ep4_write_qdesc (rail, qdescs + (rcvr->Service * EP_QUEUE_DESC_SIZE), &qdesc); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->r_generic.Number] = &rcvrRail->rcvr_generic; -+ rcvr->RailMask |= EP_RAIL2RAILMASK (rail->r_generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ { -+ sdramaddr_t stackTop = stack + EP4_STACK_SIZE; -+ E4_Addr stackTopAddr = rcvrRail->rcvr_stack + EP4_STACK_SIZE; -+ -+ ep4_init_thread (rail, &tregs, stackTop, stackTopAddr, ep_symbol (&rail->r_threadcode, "ep4comms_rcvr"), 6, -+ (E4_uint64) rail->r_elan_addr, (E4_uint64) rcvrRail->rcvr_elan_addr, (E4_uint64) rcvrRail->rcvr_main_addr, -+ (E4_uint64) EP_MSGQ_ADDR(rcvr->Service), (E4_uint64) rcvrRail->rcvr_ecq->ecq_addr, (E4_uint64) rcvrRail->rcvr_resched->ecq_addr); -+ } -+ -+ /* Issue the command to the threads private command queue */ -+ elan4_run_thread_cmd (rcvrRail->rcvr_ecq->ecq_cq, &tregs); -+ -+ ep_procfs_rcvr_add_rail(&(rcvrRail->rcvr_generic)); -+} -+ -+void -+ep4rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rcvr->Rails[rail->r_generic.Number]; -+ ELAN4_HALTOP haltop; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ ep_procfs_rcvr_del_rail(&(rcvrRail->rcvr_generic)); -+ -+ /* Run a halt operation to mark the input queue as full and -+ * request the thread to halt */ -+ haltop.op_mask = INT_DiscardingHighPri | INT_TProcHalted; -+ haltop.op_function = rcvr_stall_haltop; -+ haltop.op_arg = rcvrRail; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &haltop); -+ -+ /* Wait for the thread to tell us it's processed the input queue */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ while (! rcvrRail->rcvr_thread_halted) -+ kcondvar_wait (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock, &flags); -+ rcvrRail->rcvr_thread_halted = 0; -+ -+ /* flag the rail as no longer available */ -+ rcvr->RailMask &= ~EP_RAIL2RAILMASK (rail->r_generic.Number); -+ -+ /* wait for all active communications to terminate */ -+ for (;;) -+ { -+ int mustWait = 0; -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail) && rxd->RxdMain->Len != EP_RXD_PENDING) -+ { -+ mustWait++; -+ break; -+ } -+ } -+ -+ if (! mustWait) -+ break; -+ -+ rcvrRail->rcvr_cleanup_waiting++; -+ kcondvar_wait (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point all rxd's in the list that are bound to the deleting rail are pending */ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail)) -+ { -+ EP4_RXD_ASSERT_PENDING (rxdRail); -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* wait for all rxd's for this rail to become free */ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ while (rcvrRail->rcvr_freecount != rcvrRail->rcvr_totalcount) -+ { -+ rcvrRail->rcvr_freewaiting++; -+ kcondvar_wait (&rcvrRail->rcvr_freesleep, &rcvrRail->rcvr_freelock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ /* can now remove the rail as it can no longer be used */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->r_generic.Number] = NULL; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* all the rxd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (rcvrRail->rcvr_totalcount == rcvrRail->rcvr_freecount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&rcvrRail->rcvr_blocklist)) -+ free_rxd_block (rcvrRail, list_entry(rcvrRail->rcvr_blocklist.next, EP4_RXD_RAIL_BLOCK , blk_link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((rcvrRail->rcvr_totalcount == 0) && (rcvrRail->rcvr_totalcount == rcvrRail->rcvr_freecount)); -+ -+ ep4_remove_retry_ops (rail, &rcvrRail->rcvr_retryops); -+ -+ ep4_deregister_intcookie (rail, &rcvrRail->rcvr_stall_intcookie); -+ -+ ep4_put_ecq (rail, rcvrRail->rcvr_resched, 8); -+ ep4_free_ecq (rail, rcvrRail->rcvr_ecq); -+ -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_stack, EP4_STACK_SIZE); -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_slots_addr, EP_INPUTQ_SIZE * rcvr->InputQueueEntries); -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_elan_addr, sizeof (EP4_RCVR_RAIL_ELAN)); -+ ep_free_main (&rail->r_generic, rcvrRail->rcvr_main_addr, sizeof (EP4_RCVR_RAIL_MAIN)); -+ -+ KMEM_FREE (rcvrRail, sizeof (EP4_RCVR_RAIL)); -+} -+ -+void -+ep4rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *r) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) r; -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rxdRail->rxd_generic.RcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ int i; -+ -+ (di->func)(di->arg, " Rail %d rxd %p elan %lx(%x) main %p(%x) ecq %d scq %d debug %llx\n", rail->r_generic.Number, -+ rxdRail, rxdRail->rxd_elan, rxdRail->rxd_elan_addr, rxdRail->rxd_main, rxdRail->rxd_main_addr, -+ elan4_cq2num(rxdRail->rxd_ecq->ecq_cq), elan4_cq2num(rxdRail->rxd_scq->ecq_cq), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_debug))); -+ (di->func)(di->arg, " start %016llx %016llx %016llx [%016llx %016llx]\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_Params[1])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0].c_cookie)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0].c_dma_cookie))); -+ -+ for (i = 0; i < EP_MAXFRAG; i++) -+ (di->func)(di->arg, " chain[%d] %016llx %016llx %016llx [%016llx %016llx]\n", i, -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_Params[1])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[i+1].c_cookie)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[i+1].c_dma_cookie))); -+ (di->func)(di->arg, " done %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_Params[1])), -+ rxdRail->rxd_main->rxd_done); -+ (di->func)(di->arg, " fail %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_Params[1])), -+ rxdRail->rxd_main->rxd_failed); -+ (di->func)(di->arg, " next %016llx queued %016llx main %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_next)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_queued)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_main))); -+ (di->func)(di->arg, " sent %016llx %016llx %016llx %016llx %016llx\n", -+ rxdRail->rxd_main->rxd_sent[0], rxdRail->rxd_main->rxd_sent[1], rxdRail->rxd_main->rxd_sent[2], -+ rxdRail->rxd_main->rxd_sent[3], rxdRail->rxd_main->rxd_sent[4]); -+} -+ -+void -+ep4rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *r) -+{ -+ EP_RCVR *rcvr = r->Rcvr; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t rcvrElan = rcvrRail->rcvr_elan; -+ sdramaddr_t qdesc = commsRail->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ sdramaddr_t event = rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent); -+ unsigned int freeCount = 0; -+ unsigned int blockCount = 0; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ list_for_each (el, &rcvrRail->rcvr_freelist) -+ freeCount++; -+ list_for_each (el, &rcvrRail->rcvr_blocklist) -+ blockCount++; -+ spin_unlock_irqrestore(&rcvrRail->rcvr_freelock, flags); -+ -+ (di->func)(di->arg, " Rail %d elan %lx(%x) main %p(%x) ecq %d resched %d debug %llx\n", -+ rail->r_generic.Number, rcvrRail->rcvr_elan, rcvrRail->rcvr_elan_addr, -+ rcvrRail->rcvr_main, rcvrRail->rcvr_main_addr, elan4_cq2num(rcvrRail->rcvr_ecq->ecq_cq), -+ elan4_cq2num (rcvrRail->rcvr_resched->ecq_cq), -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_debug))); -+ (di->func)(di->arg, " free %d (%d) total %d blocks %d\n", -+ rcvrRail->rcvr_freecount, freeCount, rcvrRail->rcvr_totalcount, blockCount); -+ (di->func)(di->arg, " spinlock %016llx %016llx\n", rcvrRail->rcvr_main->rcvr_thread_lock, -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock))); -+ (di->func)(di->arg, " queue: bptr %016llx fptr %016llx control %016llx (base %lx %x)\n", -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)), -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_fptr)), -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_control)), -+ rcvrRail->rcvr_slots, rcvrRail->rcvr_slots_addr); -+ (di->func)(di->arg, " event %016llx %016llx %016llx\n", -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_CountAndType)), -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_Params[0])), -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_Params[1]))); -+ (di->func)(di->arg, " pending_tailp %016llx pending_head %016llx\n", -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_tailp)), -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head))); -+} -+ -+void -+ep4rcvr_fillout_rail_stats(EP_RCVR_RAIL *rcvr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP4_RCVR_RAIL * ep4rcvr_rail = (EP4_RCVR_RAIL *) rcvr_rail; */ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcommsTx.c linux-2.6.9/drivers/net/qsnet/ep/epcommsTx.c ---- clean/drivers/net/qsnet/ep/epcommsTx.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcommsTx.c 2005-09-02 07:04:02.000000000 -0400 -@@ -0,0 +1,919 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx.c,v 1.30.2.2 2005/09/02 11:04:02 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+ -+unsigned int ep_txd_lowat = 5; -+ -+static int -+AllocateTxdBlock (EP_XMTR *xmtr, EP_ATTRIBUTE attr, EP_TXD **txdp) -+{ -+ EP_TXD_BLOCK *blk; -+ EP_TXD *txd; -+ EP_TXD_MAIN *pTxdMain; -+ int i; -+ unsigned long flags; -+ -+ EPRINTF1 (DBG_XMTR, "AllocateTxdBlock: xmtr=%p\n", xmtr); -+ -+ KMEM_ZALLOC (blk, EP_TXD_BLOCK *, sizeof (EP_TXD_BLOCK), ! (attr & EP_NO_SLEEP)); -+ -+ if (blk == NULL) -+ return -ENOMEM; -+ -+ if ((pTxdMain = ep_shared_alloc_main (xmtr->Subsys->Subsys.Sys, EP_TXD_MAIN_SIZE * EP_NUM_TXD_PER_BLOCK, attr, &blk->NmdMain)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP_TXD_BLOCK)); -+ return -ENOMEM; -+ } -+ -+ for (txd = &blk->Txd[0], i = 0; i < EP_NUM_TXD_PER_BLOCK; i++, txd++) -+ { -+ txd->Xmtr = xmtr; -+ txd->TxdMain = pTxdMain; -+ -+ ep_nmd_subset (&txd->NmdMain, &blk->NmdMain, (i * EP_TXD_MAIN_SIZE), EP_TXD_MAIN_SIZE); -+ -+ /* move onto next descriptor */ -+ pTxdMain = (EP_TXD_MAIN *) ((unsigned long) pTxdMain + EP_TXD_MAIN_SIZE); -+ } -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &xmtr->DescBlockList); -+ xmtr->TotalDescCount += EP_NUM_TXD_PER_BLOCK; -+ -+ for (i = txdp ? 1 : 0; i < EP_NUM_TXD_PER_BLOCK; i++) -+ { -+ list_add (&blk->Txd[i].Link, &xmtr->FreeDescList); -+ -+ xmtr->FreeDescCount++; -+ -+ if (xmtr->FreeDescWanted) -+ { -+ xmtr->FreeDescWanted--; -+ kcondvar_wakeupone (&xmtr->FreeDescSleep, &xmtr->FreeDescLock); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (txdp) -+ *txdp = &blk->Txd[0]; -+ -+ return 0; -+} -+ -+static void -+FreeTxdBlock (EP_XMTR *xmtr, EP_TXD_BLOCK *blk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ list_del (&blk->Link); -+ -+ xmtr->TotalDescCount -= EP_NUM_RXD_PER_BLOCK; -+ xmtr->FreeDescCount -= EP_NUM_RXD_PER_BLOCK; -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ ep_shared_free_main (xmtr->Subsys->Subsys.Sys, &blk->NmdMain); -+ KMEM_FREE (blk, sizeof (EP_TXD_BLOCK)); -+} -+ -+static EP_TXD * -+GetTxd (EP_XMTR *xmtr, EP_ATTRIBUTE attr) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_TXD *txd; -+ int low_on_txds; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ while (list_empty (&xmtr->FreeDescList)) -+ { -+ if (! (attr & EP_NO_ALLOC)) -+ { -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (AllocateTxdBlock (xmtr, attr, &txd) == ESUCCESS) -+ return (txd); -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ } -+ -+ if (attr & EP_NO_SLEEP) -+ { -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ return (NULL); -+ } -+ -+ xmtr->FreeDescWanted++; -+ kcondvar_wait (&xmtr->FreeDescSleep, &xmtr->FreeDescLock, &flags); -+ } -+ -+ txd = list_entry (xmtr->FreeDescList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (--xmtr->FreeDescCount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (txd); -+} -+ -+void -+FreeTxd (EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ list_add (&txd->Link, &xmtr->FreeDescList); -+ -+ xmtr->FreeDescCount++; -+ -+ if (xmtr->FreeDescWanted) /* someone waiting for a receive */ -+ { /* descriptor, so wake them up */ -+ xmtr->FreeDescWanted--; -+ kcondvar_wakeupone (&xmtr->FreeDescSleep, &xmtr->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+} -+ -+int -+TxdShouldStabalise (EP_TXD_RAIL *txdRail, EP_RAIL *rail) -+{ -+ EP_TXD *txd = txdRail->Txd; -+ EP_XMTR *xmtr = txd->Xmtr; -+ EP_ATTRIBUTE attr = txd->Envelope.Attr; -+ int stabilise; -+ extern int txd_stabilise; -+ -+ switch (EP_ATTR2TYPE (attr)) -+ { -+ case EP_TYPE_SVC_INDICATOR: /* is the rail in the current service indicator rail mask */ -+ if ((txd_stabilise & 4) == 0) -+ return 0; -+ -+ stabilise = (ep_xmtr_svc_indicator_railmask (xmtr, EP_ATTR2DATA (attr), txd->NodeId) & EP_RAIL2RAILMASK (rail->Number)) == 0; -+ break; -+ -+ case EP_TYPE_TIMEOUT: -+ if ((txd_stabilise & 2) == 0) -+ return 0; -+ -+ stabilise = AFTER(lbolt, txdRail->Txd->TimeStamp + EP_ATTR2DATA(attr)); -+ break; -+ -+ default: -+ if ((txd_stabilise & 1) == 0) -+ return 0; -+ -+ stabilise = AFTER(lbolt, txdRail->Txd->TimeStamp + EP_DEFAULT_TIMEOUT); -+ break; -+ } -+ -+ if (stabilise) -+ { -+ txd->Envelope.Attr = EP_SET_TXD_STABALISING(txd->Envelope.Attr); -+ txd->RetryTime = lbolt; -+ -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ } -+ -+ return stabilise; -+} -+ -+void ep_xmtr_txd_stat(EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ int f; -+ unsigned long size; -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ size = 0; -+ for (f=0; f < txd->Envelope.nFrags; f++) -+ size += txd->Envelope.Frags[f].nmd_len; -+ -+ INC_STAT(xmtr->stats,tx); -+ ADD_STAT(xmtr->stats,tx_len, size); -+ -+ if ((txdRail != NULL) && (txdRail->XmtrRail != NULL)){ -+ INC_STAT(txdRail->XmtrRail->stats,tx); -+ ADD_STAT(txdRail->XmtrRail->stats,tx_len, size); -+ -+ if ((txdRail->XmtrRail->CommsRail != NULL) && ( txdRail->XmtrRail->CommsRail->Rail != NULL)) { -+ INC_STAT(txdRail->XmtrRail->CommsRail->Rail->Stats,tx); -+ ADD_STAT(txdRail->XmtrRail->CommsRail->Rail->Stats,tx_len, size); -+ } -+ } -+} -+ -+static int -+PollActiveTransmitList (EP_XMTR *xmtr, int flag) -+{ -+ struct list_head *el, *nel; -+ struct list_head list; -+ unsigned long flags; -+ int count; -+ -+ INIT_LIST_HEAD (&list); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ if (txdRail == NULL) -+ continue; -+ -+ ASSERT (txdRail->Txd == txd); -+ -+ if (EP_XMTR_OP (txdRail->XmtrRail,PollTxd) (txdRail->XmtrRail, txdRail, flag)) -+ { -+ list_del (&txd->Link); /* remove from active transmit list */ -+ list_add_tail (&txd->Link, &list); /* and add to list to call handlers */ -+ } -+ } -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ for (count = 0; !list_empty (&list); count++) -+ { -+ EP_TXD *txd = list_entry (list.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+ return (count); -+} -+ -+static inline void -+DoTransmit (EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ EP_RAILMASK nmdRailMask = ep_nmd2railmask (txd->Envelope.Frags, txd->Envelope.nFrags); -+ EP_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ int rnum; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ if (EP_IS_SVC_INDICATOR(txd->Envelope.Attr)) -+ nmdRailMask = nmdRailMask & ep_xmtr_svc_indicator_railmask(xmtr, EP_ATTR2DATA(txd->Envelope.Attr), txd->NodeId); -+ -+ if (EP_IS_PREFRAIL_SET(txd->Envelope.Attr)) -+ rnum = EP_ATTR2PREFRAIL(txd->Envelope.Attr); -+ else -+ rnum = ep_xmtr_prefrail (xmtr, nmdRailMask, txd->NodeId); -+ -+ if (rnum < 0 || !(nmdRailMask & EP_RAIL2RAILMASK(rnum))) -+ xmtrRail = NULL; -+ else -+ xmtrRail = xmtr->Rails[rnum]; -+ -+ /* Allocate the XID while holding the xmtr->Lock from our XID cache */ -+ txd->Envelope.Xid = ep_xid_cache_alloc (xmtr->Subsys->Subsys.Sys, &xmtr->XidCache); -+ -+ EPRINTF7 (DBG_XMTR, "ep: transmit txd %p to %d/%d: Xid %llx nFrags %d [%08x.%d]\n", -+ txd, txd->NodeId, txd->Service, (long long) txd->Envelope.Xid.Unique, -+ txd->Envelope.nFrags, txd->Envelope.Frags[0].nmd_addr, txd->Envelope.Frags[0].nmd_len); -+ -+ /* Store time transmit started to timeout if not received */ -+ txd->TimeStamp = lbolt; -+ -+ /* Initialise the retry backoff */ -+ txd->Backoff.type = EP_BACKOFF_FREE; -+ -+ list_add_tail (&txd->Link, &xmtr->ActiveDescList); -+ -+ if (xmtrRail == NULL || !EP_XMTR_OP(xmtrRail,BindTxd) (txd, xmtrRail, EP_TXD_PHASE_ACTIVE)) -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ PollActiveTransmitList (xmtr, POLL_TX_LIST); -+} -+ -+EP_STATUS -+ep_transmit_message (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = (unsigned short) dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_CLEAR_LOCAL_ATTR(attr); -+ txd->Envelope.Range = EP_RANGE (dest, dest); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, DataXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_multicast_message (EP_XMTR *xmtr, unsigned int destLo, unsigned int destHi, bitmap_t *bitmap, EP_SERVICE service, -+ EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_SYS *sys = xmtr->Subsys->Subsys.Sys; -+ EP_TXD *txd; -+ int nnodes; -+ int i, len; -+ unsigned long flags; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if (destLo == -1) -+ destLo = sys->Position.pos_nodeid & ~(EP_MAX_NODES-1); -+ -+ if (destHi == -1 && (destHi = ((sys->Position.pos_nodeid + EP_MAX_NODES) & ~(EP_MAX_NODES-1)) - 1) >= sys->Position.pos_nodes) -+ destHi = sys->Position.pos_nodes-1; -+ -+ nnodes = (destHi-destLo+1); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_MULTICAST(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = EP_RANGE (destLo, destHi); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (EP_IS_SVC_INDICATOR(attr)) -+ ep_xmtr_svc_indicator_bitmap(xmtr, EP_ATTR2DATA(attr), txd->TxdMain->Bitmap, destLo, nnodes); -+ else -+ bt_subset (statemap_tobitmap(sys->NodeSet), txd->TxdMain->Bitmap, destLo, nnodes); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (bitmap != NULL) /* bitmap supplied, so intersect it with */ -+ bt_intersect (txd->TxdMain->Bitmap, bitmap, nnodes); /* the current node set map */ -+ -+ if ((attr & EP_NOT_MYSELF) && destLo <= sys->Position.pos_nodeid && sys->Position.pos_nodeid <= destHi) -+ BT_CLEAR (txd->TxdMain->Bitmap, (sys->Position.pos_nodeid-destLo)); /* clear myself if not wanted */ -+ -+ if ((i = bt_lowbit (txd->TxdMain->Bitmap, nnodes)) < 0) -+ { -+ FreeTxd (xmtr, txd); -+ return (EP_NODE_DOWN); -+ } -+ -+ txd->NodeId = (unsigned short) i; -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, McastXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_transmit_rpc (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_RPC(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = EP_RANGE (dest, dest); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, RPCXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_multicast_forward (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, -+ EP_ENVELOPE *env, EP_PAYLOAD *payload, bitmap_t *bitmap, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = (unsigned short) dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_MULTICAST(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = env->Range; -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ bt_copy (bitmap, txd->TxdMain->Bitmap, EP_RANGE_HIGH(env->Range) - EP_RANGE_LOW(env->Range) + 1); -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, McastXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+int -+ep_poll_transmits (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, POLL_TX_LIST)); -+} -+ -+int -+ep_enable_txcallbacks (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, ENABLE_TX_CALLBACK)); -+} -+ -+int -+ep_disable_txcallbacks (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, DISABLE_TX_CALLBACK)); -+} -+ -+/* functions for accessing fields of txds */ -+int ep_txd_node(EP_TXD *txd) { return (txd->NodeId); } -+EP_STATUSBLK *ep_txd_statusblk(EP_TXD *txd) { return (&txd->TxdMain->StatusBlk); } -+ -+void -+ep_xmtr_xid_msg_handler (void *arg, EP_MANAGER_MSG *msg) -+{ -+ EP_XMTR *xmtr = (EP_XMTR *) arg; -+ EP_SYS *sys = xmtr->Subsys->Subsys.Sys; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST: -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ if (txdRail != NULL && EP_XIDS_MATCH (msg->Body.Failover.Xid, txd->Envelope.Xid)) -+ { -+ EP_XMTR_RAIL *xmtrRail = txdRail->XmtrRail; -+ EP_RAIL *rail = xmtrRail->CommsRail->Rail; -+ EP_MANAGER_MSG_BODY msgBody; -+ int rnum; -+ -+ if (! (msg->Body.Failover.Railmask & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ /* Need to failover this txd to a different rail, select a rail from -+ * the set that she has asked us to use and which is connected to her -+ * on this transmitter. If there are no such rails, then in all probability -+ * we're offline on all common rails and eventually she will see we have no -+ * rails in common and abort the receive. */ -+ if ((rnum = ep_xmtr_prefrail (xmtr, msg->Body.Failover.Railmask, txd->NodeId)) < 0) -+ ep_debugf (DBG_XMTR, "%s: ep_xmtr_xid_msg_handler: FAILOVER_REQUEST but can't determine rail (%04x,%04x,%d,%04x)\n", -+ rail->Name, msg->Body.Failover.Railmask, xmtr->RailMask, txd->NodeId, sys->Nodes[txd->NodeId].ConnectedRails); -+ else -+ { -+ EP_XMTR_RAIL *nXmtrRail = xmtr->Rails[rnum]; -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep_xmtr_xid_msg_handler: FAILOVER_REQUEST txd=%p XID=%llx-> rail %d\n", rail->Name, txd, (long long) txd->Envelope.Xid.Unique, rnum); -+ -+ /* Bind the txd rail onto the new rail - it doesn't matter if we fail -+ * as it will remain bound to the original rail */ -+ (void) EP_XMTR_OP (nXmtrRail, BindTxd) (txd, nXmtrRail, EP_TXD_PHASE_PASSIVE); -+ } -+ } -+ -+ /* Send a failover response including an envelope update */ -+ msgBody.FailoverTxd.Rail = rail->Number; -+ msgBody.FailoverTxd.Xid = txd->Envelope.Xid; -+ msgBody.FailoverTxd.TxdRail = txd->Envelope.TxdRail; -+ -+ ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE, msg->Hdr.Xid, &msgBody); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE: { -+ int txd_has_not_sent_envelope = 0; -+ EP_TXD *txd = NULL; -+ EP_TXD_RAIL *txdRail = NULL; -+ -+ if (msg->Body.NodeState.NetworkErrorState != 0) -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt + MESSAGE_RETRY_TIME); -+ else -+ { -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ -+ txd = list_entry (el, EP_TXD, Link); -+ txdRail = txd->TxdRail; -+ -+ if (txdRail != NULL && EP_XIDS_MATCH (msg->Hdr.Xid, txd->Envelope.Xid)) { -+ txd_has_not_sent_envelope = EP_XMTR_OP(txdRail->XmtrRail,CheckTxdState)(txd); -+ break; -+ } -+ } -+ -+ if (txd_has_not_sent_envelope) { -+ EPRINTF2 (DBG_STABILISE, "ep_xmtr_xid_msg_handler: GET_NODE_STATE_RESPONSE txd=%p XID=%llx not sent envelope\n", -+ txd, (long long) txd->Envelope.Xid.Unique); -+ -+ /* at this point it has finished stabalising */ -+ txd->Envelope.Attr = EP_CLEAR_TXD_STABALISING(txd->Envelope.Attr); -+ -+ /* store railmask into txd if not a service indicator or timeout */ -+ if (EP_IS_NO_TYPE(txd->Envelope.Attr)) -+ txd->Envelope.Attr = EP_SET_DATA(txd->Envelope.Attr, EP_TYPE_RAILMASK, msg->Body.NodeState.Railmask); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* TXD is now no longer bound to a rail , so let ep_check_xmtr() handle it */ -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ } -+ else -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ } -+ break; -+ } -+ default: -+ panic ("ep_xmtr_xid_msg_handler: XID match but invalid message type\n"); -+ } -+} -+ -+EP_XMTR * -+ep_alloc_xmtr (EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_XMTR *xmtr; -+ struct list_head *el; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME)) == NULL) -+ return (NULL); -+ -+ KMEM_ZALLOC (xmtr, EP_XMTR *, sizeof (EP_XMTR), 1); -+ -+ if (xmtr == NULL) -+ return (NULL); -+ -+ xmtr->Subsys = subsys; -+ -+ spin_lock_init (&xmtr->Lock); -+ INIT_LIST_HEAD (&xmtr->ActiveDescList); -+ -+ kcondvar_init (&xmtr->FreeDescSleep); -+ spin_lock_init (&xmtr->FreeDescLock); -+ INIT_LIST_HEAD (&xmtr->FreeDescList); -+ INIT_LIST_HEAD (&xmtr->DescBlockList); -+ -+ ep_xid_cache_init (sys, &xmtr->XidCache); -+ -+ xmtr->XidCache.MessageHandler = ep_xmtr_xid_msg_handler; -+ xmtr->XidCache.Arg = xmtr; -+ -+ kmutex_lock (&subsys->Lock); -+ list_add_tail (&xmtr->Link, &subsys->Transmitters); -+ -+ ep_procfs_xmtr_add(xmtr); -+ -+ /* Now add all rails which are already started */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(commsRail, Xmtr.AddRail) (xmtr, commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_mod_inc_usecount(); -+ -+ return (xmtr); -+} -+ -+void -+ep_free_xmtr (EP_XMTR *xmtr) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *rail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(rail,Xmtr.DelRail) (xmtr, rail); -+ } -+ -+ list_del (&xmtr->Link); -+ kmutex_unlock (&subsys->Lock); -+ -+ /* all the desc's must be free */ -+ ASSERT(xmtr->FreeDescCount == xmtr->TotalDescCount); -+ -+ /* delete the descs */ -+ while (!list_empty (&xmtr->DescBlockList)) -+ FreeTxdBlock( xmtr, list_entry(xmtr->DescBlockList.next, EP_TXD_BLOCK , Link)); -+ -+ /* they had better all be gone now */ -+ ASSERT((xmtr->FreeDescCount == 0) && (xmtr->TotalDescCount == 0)); -+ -+ ep_procfs_xmtr_del(xmtr); -+ -+ ep_xid_cache_destroy (sys, &xmtr->XidCache); -+ -+ spin_lock_destroy (&xmtr->Lock); -+ KMEM_FREE (xmtr, sizeof (EP_XMTR)); -+ -+ ep_mod_dec_usecount(); -+} -+ -+long -+ep_check_xmtr (EP_XMTR *xmtr, long nextRunTime) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ int timed_out=0; -+ int i; -+ EP_MANAGER_MSG_BODY body; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ /* See if we have any txd's which need to be bound to a rail */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_NODE *node = &sys->Nodes[txd->NodeId]; -+ EP_RAILMASK nodeRails = node->ConnectedRails & xmtr->RailMask; -+ EP_ENVELOPE *env = &txd->Envelope; -+ -+ if (EP_IS_TXD_STABALISING(txd->Envelope.Attr)) -+ { -+ ASSERT(txd->TxdRail != NULL); -+ -+ if (AFTER (lbolt, txd->RetryTime)) -+ { -+ EPRINTF6 (DBG_STABILISE, "ep_check_xmtr txd=%p txdRail=%p send get node state to %d Xid=%08x.%08x.%016llx\n", -+ txd, txd->TxdRail, txd->NodeId, env->Xid.Generation, env->Xid.Handle, (long long)env->Xid.Unique); -+ -+ body.Service = txd->Service; -+ if (ep_send_message ( txd->TxdRail->XmtrRail->CommsRail->Rail, txd->NodeId, EP_MANAGER_MSG_TYPE_GET_NODE_STATE, env->Xid, &body) == 0) -+ txd->RetryTime = lbolt + (MESSAGE_RETRY_TIME << ep_backoff (&txd->Backoff, EP_BACKOFF_STABILISE)); -+ else -+ txd->RetryTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+ -+ ep_kthread_schedule (&subsys->Thread, txd->RetryTime); -+ continue; -+ } -+ -+ if (txd->TxdRail != NULL) -+ continue; -+ -+ switch (EP_ATTR2TYPE(txd->Envelope.Attr)) -+ { -+ case EP_TYPE_SVC_INDICATOR: -+ { -+ EP_RAILMASK rmask=0; -+ struct list_head *tmp; -+ -+ list_for_each (tmp, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (tmp, EP_COMMS_RAIL, Link); -+ if ( cm_svc_indicator_is_set(commsRail->Rail, EP_ATTR2DATA(txd->Envelope.Attr), txd->NodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ nodeRails &= rmask; -+ break; -+ } -+ case EP_TYPE_TIMEOUT: -+ timed_out = AFTER(lbolt, txd->TimeStamp + EP_ATTR2DATA(txd->Envelope.Attr)) ? (1) : (0); -+ break; -+ case EP_TYPE_RAILMASK: -+ nodeRails &= EP_ATTR2DATA(txd->Envelope.Attr); -+ break; -+ default: -+ timed_out = AFTER(lbolt, txd->TimeStamp + EP_DEFAULT_TIMEOUT) ? (1) : (0); -+ break; -+ } -+ -+ if (nodeRails == 0 || timed_out || (EP_IS_NO_FAILOVER(env->Attr) && EP_IS_PREFRAIL_SET(env->Attr) && -+ (nodeRails & EP_RAIL2RAILMASK(EP_ATTR2PREFRAIL(env->Attr))) == 0)) -+ { -+ EPRINTF5 (timed_out ? DBG_STABILISE : DBG_XMTR, "ep_check_xmtr: txd=%p XID=%llx to %d no rails connected or cannot failover (nodeRails=0x%x,timed_out=%d\n", -+ txd, (long long) env->Xid.Unique, txd->NodeId, nodeRails, timed_out); -+ -+ list_del (&txd->Link); -+ list_add_tail (&txd->Link, &txdList); -+ } -+ else -+ { -+ EP_XMTR_RAIL *xmtrRail; -+ int i, len, rnum; -+ -+ if (EP_IS_PREFRAIL_SET(env->Attr) && (nodeRails & EP_RAIL2RAILMASK(EP_ATTR2PREFRAIL(env->Attr)))) -+ rnum = EP_ATTR2PREFRAIL(env->Attr); -+ else -+ rnum = ep_pickRail (nodeRails); -+ -+ EPRINTF3 (DBG_XMTR, "ep_check_xmtr: txd=%p XID=%llx mapping NMDs onto rail %d \n", txd, (long long) env->Xid.Unique, rnum); -+ -+ for (i = len = 0; i < env->nFrags; i++, len += env->Frags[i].nmd_len) -+ ep_nmd_map_rails (sys, &env->Frags[i], nodeRails); -+ -+ if ((xmtrRail = xmtr->Rails[rnum]) == NULL || -+ !EP_XMTR_OP(xmtrRail,BindTxd) (txd, xmtrRail, EP_TXD_PHASE_ACTIVE)) -+ ep_kthread_schedule (&subsys->Thread, lbolt + RESOURCE_RETRY_TIME); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_NODE_DOWN); -+ FreeTxd (xmtr, txd); -+ } -+ -+ /* Check to see if we're low on txds */ -+ if (xmtr->FreeDescCount < ep_txd_lowat) -+ AllocateTxdBlock (xmtr, 0, NULL); -+ -+ /* Then check each rail */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (xmtr->RailMask & (1 << i) ) -+ nextRunTime = EP_XMTR_OP (xmtr->Rails[i],Check) (xmtr->Rails[i], nextRunTime); -+ return (nextRunTime); -+} -+ -+void -+ep_display_txd (DisplayInfo *di, EP_TXD *txd) -+{ -+ EP_ENVELOPE *env = &txd->Envelope; -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ (di->func)(di->arg, "TXD: %p Version=%x Attr=%x Xid=%08x.%08x.%016llx\n", txd, -+ env->Version, env->Attr, env->Xid.Generation, env->Xid.Handle, (long long) env->Xid.Unique); -+ (di->func)(di->arg, " NodeId=%d Range=%d.%d TxdRail=%x TxdMain=%x.%x.%x nFrags=%d\n", -+ env->NodeId, EP_RANGE_LOW(env->Range), EP_RANGE_HIGH(env->Range), env->TxdRail, -+ env->TxdMain.nmd_addr, env->TxdMain.nmd_len, env->TxdMain.nmd_attr, env->nFrags); -+ (di->func)(di->arg, " Frag[0] %08x.%08x.%08x\n", env->Frags[0].nmd_addr, env->Frags[0].nmd_len, env->Frags[0].nmd_attr); -+ (di->func)(di->arg, " Frag[1] %08x.%08x.%08x\n", env->Frags[1].nmd_addr, env->Frags[1].nmd_len, env->Frags[1].nmd_attr); -+ (di->func)(di->arg, " Frag[2] %08x.%08x.%08x\n", env->Frags[2].nmd_addr, env->Frags[2].nmd_len, env->Frags[2].nmd_attr); -+ (di->func)(di->arg, " Frag[3] %08x.%08x.%08x\n", env->Frags[3].nmd_addr, env->Frags[3].nmd_len, env->Frags[3].nmd_attr); -+ -+ if (txdRail != NULL) EP_XMTR_OP (txdRail->XmtrRail, DisplayTxd) (di, txdRail); -+} -+ -+void -+ep_display_xmtr (DisplayInfo *di, EP_XMTR *xmtr) -+{ -+ int freeCount = 0; -+ int activeCount = 0; -+ struct list_head *el; -+ int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ list_for_each (el, &xmtr->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) -+ activeCount++; -+ -+ (di->func)(di->arg, "ep_display_xmtr: xmtr=%p Free=%d Active=%d\n", xmtr, freeCount, activeCount); -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (xmtr->Rails[i]) EP_XMTR_OP (xmtr->Rails[i], DisplayXmtr) (di, xmtr->Rails[i]); -+ -+ list_for_each (el,&xmtr->ActiveDescList) -+ ep_display_txd (di, list_entry (el, EP_TXD, Link)); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep_xmtr_fillout_stats(EP_XMTR *xmtr, char *str) -+{ -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(xmtr->stats,tx), GET_STAT_PER_SEC(xmtr->stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(xmtr->stats,tx_len) / (1024*1024), GET_STAT_PER_SEC(xmtr->stats,tx_len) / (1024*1024)); -+} -+ -+void -+ep_xmtr_rail_fillout_stats(EP_XMTR_RAIL *xmtr_rail, char *str) -+{ -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(xmtr_rail->stats,tx), GET_STAT_PER_SEC(xmtr_rail->stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(xmtr_rail->stats,tx_len) / (1024*1024), GET_STAT_PER_SEC(xmtr_rail->stats,tx_len) / (1024*1024)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcommsTx_elan3.c linux-2.6.9/drivers/net/qsnet/ep/epcommsTx_elan3.c ---- clean/drivers/net/qsnet/ep/epcommsTx_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcommsTx_elan3.c 2004-11-12 05:55:03.000000000 -0500 -@@ -0,0 +1,1173 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx_elan3.c,v 1.19 2004/11/12 10:55:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#define XMTR_TO_RAIL(xmtrRail) ((EP3_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail->Rail) -+#define XMTR_TO_DEV(xmtrRail) (XMTR_TO_RAIL(xmtrRail)->Device) -+#define XMTR_TO_SUBSYS(xmtrRail) (((EP_XMTR_RAIL *) xmtrRail)->Xmtr->Subsys) -+ -+static void TxEnveEvent (EP3_RAIL *rail, void *arg); -+static void TxEnveRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void TxEnveVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS EnveCookieOps = -+{ -+ TxEnveEvent, -+ TxEnveRetry, -+ NULL, /* DmaCancelled */ -+ TxEnveVerify -+}; -+ -+static void TxDataEvent (EP3_RAIL *rail, void *arg); -+static void TxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void TxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS DataCookieOps = -+{ -+ TxDataEvent, -+ TxDataRetry, -+ NULL, /* DmaCancelled */ -+ TxDataVerify -+}; -+ -+static void TxDoneEvent (EP3_RAIL *dev, void *arg); -+static void TxDoneRetry (EP3_RAIL *dev, void *arg, E3_DMA_BE *dma, int status); -+static void TxDoneVerify (EP3_RAIL *dev, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS DoneCookieOps = -+{ -+ TxDoneEvent, -+ TxDoneRetry, -+ NULL, /* DmaCancelled */ -+ TxDoneVerify, -+} ; -+ -+static int -+AllocateTxdRailBlock (EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_TXD_RAIL_BLOCK *blk; -+ EP3_TXD_RAIL *txdRail; -+ sdramaddr_t pTxdElan; -+ EP3_TXD_RAIL_MAIN *pTxdMain; -+ E3_Addr pTxdElanAddr; -+ E3_Addr pTxdMainAddr; -+ E3_BlockCopyEvent event; -+ int i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP3_TXD_RAIL_BLOCK *, sizeof (EP3_TXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((pTxdElan = ep_alloc_elan (&rail->Generic, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK, 0, &pTxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((pTxdMain = ep_alloc_main (&rail->Generic, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK, 0, &pTxdMainAddr)) == (EP3_TXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->Generic, pTxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ReserveDmaRetries (rail, EP3_NUM_TXD_PER_BLOCK, 0) != ESUCCESS) -+ { -+ ep_free_main (&rail->Generic, pTxdMainAddr, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, pTxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (txdRail = &blk->Txd[0], i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ txdRail->Generic.XmtrRail = &xmtrRail->Generic; -+ txdRail->TxdElan = pTxdElan; -+ txdRail->TxdElanAddr = pTxdElanAddr; -+ txdRail->TxdMain = pTxdMain; -+ txdRail->TxdMainAddr = pTxdMainAddr; -+ -+ RegisterCookie (&rail->CookieTable, &txdRail->EnveCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent), &EnveCookieOps, (void *) txdRail); -+ RegisterCookie (&rail->CookieTable, &txdRail->DataCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), &DataCookieOps, (void *) txdRail); -+ RegisterCookie (&rail->CookieTable, &txdRail->DoneCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), &DoneCookieOps, (void *) txdRail); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->EnveCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, EnveEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->DataCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, DataEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->DoneCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, DoneEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), sizeof (E3_BlockCopyEvent)); -+ -+ pTxdMain->EnveEvent = EP3_EVENT_FREE; -+ pTxdMain->DataEvent = EP3_EVENT_FREE; -+ pTxdMain->DoneEvent = EP3_EVENT_FREE; -+ -+ /* move onto next descriptor */ -+ pTxdElan += EP3_TXD_RAIL_ELAN_SIZE; -+ pTxdElanAddr += EP3_TXD_RAIL_ELAN_SIZE; -+ pTxdMain = (EP3_TXD_RAIL_MAIN *) ((unsigned long) pTxdMain + EP3_TXD_RAIL_MAIN_SIZE); -+ pTxdMainAddr += EP3_TXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &xmtrRail->DescBlockList); -+ xmtrRail->TotalDescCount += EP3_NUM_TXD_PER_BLOCK; -+ xmtrRail->FreeDescCount += EP3_NUM_TXD_PER_BLOCK; -+ -+ for (i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++) -+ list_add (&blk->Txd[i].Generic.Link, &xmtrRail->FreeDescList); -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ return 1; -+} -+ -+static void -+FreeTxdRailBlock (EP3_XMTR_RAIL *xmtrRail, EP3_TXD_RAIL_BLOCK *blk) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ EP3_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ xmtrRail->TotalDescCount -= EP3_NUM_TXD_PER_BLOCK; -+ -+ for (txdRail = &blk->Txd[0], i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ xmtrRail->FreeDescCount--; -+ -+ list_del (&txdRail->Generic.Link); -+ -+ DeregisterCookie (&rail->CookieTable, &txdRail->EnveCookie); -+ DeregisterCookie (&rail->CookieTable, &txdRail->DataCookie); -+ DeregisterCookie (&rail->CookieTable, &txdRail->DoneCookie); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ ReleaseDmaRetries (rail, EP3_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->Generic, blk->Txd[0].TxdMainAddr, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, blk->Txd[0].TxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+} -+ -+static EP3_TXD_RAIL * -+GetTxdRail (EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtrRail->Generic.Xmtr->Subsys; -+ EP3_TXD_RAIL *txdRail; -+ int low_on_txds; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ if (list_empty (&xmtrRail->FreeDescList)) -+ txdRail = NULL; -+ else -+ { -+ txdRail = list_entry (xmtrRail->FreeDescList.next, EP3_TXD_RAIL, Generic.Link); -+ -+#if defined(DEBUG) -+ { -+ EP_RAIL *rail = xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = ((EP3_RAIL *) rail)->Device; -+ -+ EP_ASSERT (rail, txdRail->TxdMain->EnveEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, txdRail->TxdMain->DataEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, txdRail->TxdMain->DoneEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ } -+#endif -+ -+ list_del (&txdRail->Generic.Link); -+ -+ xmtrRail->FreeDescCount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (xmtrRail->FreeDescCount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (txdRail); -+} -+ -+static void -+FreeTxdRail (EP3_XMTR_RAIL *xmtrRail, EP3_TXD_RAIL *txdRail) -+{ -+ unsigned long flags; -+ -+#if defined(DEBUG_ASSERT) -+ { -+ EP_RAIL *rail = xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = ((EP3_RAIL *) rail)->Device; -+ -+ EP_ASSERT (rail, txdRail->Generic.XmtrRail == &xmtrRail->Generic); -+ -+ EP_ASSERT (rail, txdRail->TxdMain->EnveEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, txdRail->TxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, txdRail->TxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_FREE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_FREE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_FREE; -+ } -+#endif -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_add (&txdRail->Generic.Link, &xmtrRail->FreeDescList); -+ -+ xmtrRail->FreeDescCount++; -+ -+ if (xmtrRail->FreeDescWaiting) -+ { -+ xmtrRail->FreeDescWaiting--; -+ kcondvar_wakeupall (&xmtrRail->FreeDescSleep, &xmtrRail->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+} -+ -+static void -+BindTxdToRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ASSERT (SPINLOCK_HELD (&txd->Xmtr->Lock)); -+ -+ EPRINTF6 (DBG_XMTR, "%s: BindTxdToRail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long) txd->Envelope.Xid.Unique); -+ -+ txd->TxdRail = &txdRail->Generic; -+ txdRail->Generic.Txd = txd; -+} -+ -+static void -+UnbindTxdFromRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ASSERT (SPINLOCK_HELD (&txd->Xmtr->Lock)); -+ ASSERT (txd->TxdRail == &txdRail->Generic && txdRail->Generic.Txd == txd); -+ -+ EPRINTF6 (DBG_XMTR, "%s: UnbindTxdToRail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long) txd->Envelope.Xid.Unique); -+ txd->TxdRail = NULL; -+ txdRail->Generic.Txd = NULL; -+} -+ -+/* -+ * TxEnveEvent: arg == EP_TXD -+ * Called when envelope delivered -+ */ -+static void -+TxEnveEvent (EP3_RAIL *rail, void *arg) -+{ -+ panic ("TxEnveEvent"); -+} -+ -+/* -+ * TxEnveRetry: arg == EP3_TXD_RAIL -+ * Called on retry of dma of large message envelope. -+ */ -+static void -+TxEnveRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ -+ EPRINTF3 (DBG_XMTR, "%s: TxEnveRetry: xmtr %p txd %p\n", rail->Generic.Name, xmtrRail, txdRail); -+ -+ EP_ASSERT (&rail->Generic, txdRail->TxdMain->EnveEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 1)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txdRail->Generic.Txd->NodeId); -+ -+ if (! TxdShouldStabalise (&txdRail->Generic, &rail->Generic)) -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&txdRail->Backoff, EP_BACKOFF_ENVELOPE)); -+ else -+ QueueDmaForRetry (rail, dma, EP_RETRY_STABALISING); /* place dma on stabilising list for neterr fixup */ -+} -+ -+static void -+TxEnveVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ -+ EP_ASSERT (&rail->Generic, txdRail->TxdMain->EnveEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 1)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txdRail->Generic.Txd->NodeId); -+} -+ -+/* -+ * TxDataEvent: arg == EP3_TXD -+ * Called on completion of a large transmit. -+ */ -+static void -+TxDataEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_XMTR *xmtr = xmtrRail->Generic.Xmtr; -+ EP3_TXD_RAIL_MAIN *txdMain = txdRail->TxdMain; -+ sdramaddr_t txdElan = txdRail->TxdElan; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (txdRail->DataCookie, txdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (rail->Device, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), txdRail->DataCookie, txdMain->DataEvent)) /* PCI read */ -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("TxDataEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ EPRINTF3 (DBG_XMTR, "%s: TxDataEvent: xmtr %p txd %p previously collecting by polling\n", -+ rail->Generic.Name, xmtrRail, txdRail); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ if ((txd = txdRail->Generic.Txd) == NULL || /* If there is no txd, or if the descriptor is marked */ -+ !(EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr)) || /* as no interrupt, or been reused as an RPC, */ -+ (EP_IS_RPC(txd->Envelope.Attr))) /* then we were either called as a result of a previous */ -+ { /* tx which was completed by polling or as a result */ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); /* of a EnableTxCallBack/DisableTxCallback */ -+ -+ EPRINTF4 (DBG_XMTR, "%s: TxDataEvent: xmtr %p txd %p recyled (%x)\n", -+ rail->Generic.Name, xmtr, txd, txd ? txd->Envelope.Attr : 0); -+ return; -+ } -+ -+ ASSERT (EP3_EVENT_FIRED (txdRail->EnveCookie, txdMain->EnveEvent)); -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDataEvent : xmtrRail=%p txdRail=%p tx=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ /* remove from active transmit lists */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags for next time round */ -+ txdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+} -+ -+/* -+ * TxDataRetry: arg == EP3_TXD -+ * Called on retry of remote "put" dma of large transmit data. -+ */ -+static void -+TxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ EP_ASSERT (&rail->Generic, ((txdRail->TxdMain->DataEvent == EP3_EVENT_ACTIVE && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) >= 1)) || /* PCI read */ -+ (EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)))); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txd->NodeId); -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDataRetry: xmtrRail=%p txdRail=%p txd=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&txdRail->Backoff, EP_BACKOFF_DATA)); -+} -+ -+static void -+TxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ EP_ASSERT (&rail->Generic, ((txdRail->TxdMain->DataEvent == EP3_EVENT_ACTIVE && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) >= 1)) || /* PCI read */ -+ (EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)))); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txd->NodeId); -+} -+ -+/* -+ * TxDoneEvent: arg == EP3_TXD -+ * Called on completion of a RPC. -+ */ -+static void -+TxDoneEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_XMTR *xmtr = xmtrRail->Generic.Xmtr; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), txdRail->DoneCookie, txdRail->TxdMain->DoneEvent) && -+ EP3_EVENT_FIRING (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("TxDoneEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ EPRINTF3 (DBG_XMTR, "%s: TxDoneEvent: xmtr %p txdRail %p previously collecting by polling\n", -+ rail->Generic.Name, xmtr, txdRail); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ if ((txd = txdRail->Generic.Txd) == NULL || /* If there is no txd, or if the descriptor is marked */ -+ !(EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr) || EP_IS_RPC(txd->Envelope.Attr))) /* marked as no interrupt, or been reused as an transmit, */ -+ { /* then we were either called as a result of a previous */ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); /* tx which was completed by polling or as a result */ -+ /* of a EnableTxCallBack/DisableTxCallback */ -+ -+ EPRINTF4 (DBG_XMTR, "%s: TxDoneEvent: xmtr %p txd %p recyled (%x)\n", -+ rail->Generic.Name, xmtr, txd, txd ? txd->Envelope.Attr : 0); -+ return; -+ } -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDoneEvent: xmtrRail=%p txdRail=%p txd=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ /* remove from active transmit list */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags for next time round */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ if (txd->Handler) -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+} -+ -+/* -+ * TxDoneRetry: arg == EP3_TXD -+ */ -+static void -+TxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ panic ("TxDoneRetry"); -+} -+ -+static void -+TxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ panic ("TxDoneVerify"); -+} -+ -+static void -+EnableTransmitCallback (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ELAN3_DEV *dev = XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Device; -+ -+ EPRINTF3 (DBG_XMTR, "%s: EnableTransmitCallback: txd %p txdRail %p\n", XMTR_TO_RAIL (txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail); -+ -+ txd->Envelope.Attr = EP_SET_INTERRUPT_ENABLED(txd->Envelope.Attr); -+ -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type), EV_TYPE_BCOPY); -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY | EV_TYPE_EVIRQ | txdRail->DoneCookie.Cookie); -+ } -+ else -+ { -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY | EV_TYPE_EVIRQ | txdRail->DataCookie.Cookie); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY); -+ } -+} -+ -+static void -+DisableTransmitCallback (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ELAN3_DEV *dev = XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Device; -+ -+ EPRINTF3 (DBG_XMTR, "%s: DisableTransmitCallback: txd %p txdRail %p\n", XMTR_TO_RAIL (txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail); -+ -+ txd->Envelope.Attr = EP_CLEAR_INTERRUPT_ENABLED(txd->Envelope.Attr); -+ -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY); -+} -+ -+static void -+InitialiseTxdRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail, int phase) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Initialise the per-rail fields in the envelope */ -+ txd->Envelope.TxdRail = txdRail->TxdElanAddr; -+ txd->Envelope.NodeId = rail->Generic.Position.pos_nodeid; -+ -+ /* Initialise the dma backoff */ -+ txdRail->Backoff.type = EP_BACKOFF_FREE; -+ -+ /* Initialise the per-rail events */ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 1); -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), -+ (txd->Envelope.nFrags ? txd->Envelope.nFrags : 1) + (EP_IS_MULTICAST(txd->Envelope.Attr) ? 1 : 0)); -+ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_ACTIVE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ ASSERT (EP_IS_RPC(txd->Envelope.Attr)); -+ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ -+ txdRail->TxdMain->EnveEvent = txdRail->EnveCookie.Cookie; -+ txdRail->TxdMain->DataEvent = txdRail->DataCookie.Cookie; -+ break; -+ } -+ -+ if (! EP_IS_RPC(txd->Envelope.Attr)) -+ txdRail->TxdMain->DoneEvent = txdRail->DoneCookie.Cookie; -+ else -+ { -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 1); -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_ACTIVE; -+ } -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ DisableTransmitCallback (txd, txdRail); -+ else -+ EnableTransmitCallback (txd, txdRail); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ if ( epdebug_check_sum ) -+ txd->Envelope.CheckSum = ep_calc_check_sum( txd->Xmtr->Subsys->Subsys.Sys, &txd->Envelope, txd->Envelope.Frags, txd->Envelope.nFrags); -+ else -+#endif -+ txd->Envelope.CheckSum = 0; -+ -+ /* copy the envelope and payload if present down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &txd->Envelope, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, Envelope), EP_ENVELOPE_SIZE); -+ -+ if (EP_HAS_PAYLOAD(txd->Envelope.Attr)) -+ elan3_sdram_copyl_to_sdram (rail->Device, &txd->Payload, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, Payload), EP_PAYLOAD_SIZE); -+} -+ -+void -+ep3xmtr_flush_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el; -+ unsigned long flags; -+ -+ switch (rail->Generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ /* only need to acquire/release the Lock to ensure that -+ * the node state transition has been noticed. */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ else if (! EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ } -+ else -+ { -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ default: -+ panic ("ep3xmtr_flush_callback: invalid callback step\n"); -+ break; -+ } -+} -+ -+void -+ep3xmtr_failover_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+#endif -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ /* Only progress relocation of txd's bound to this rail */ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+#ifdef SUPPORT_RAIL_FAILOVER -+ /* Transmit data not been sent, so just restart on different rail */ -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d unbind an retry\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset all events, since non of them could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ /* epcomms thread will restart on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ -+ if (EP_IS_RPC(txd->Envelope.Attr) && !EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ if (EP_IS_NO_FAILOVER(txd->Envelope.Attr)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d - not able to failover\n", -+ rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ list_del (&txd->Link); -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* envelope and data events must have been set, so only clear the done event */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT(elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT(elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ list_add_tail (&txd->Link, &txdList); -+ continue; -+ } -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d passive\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d completed\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+#endif -+ -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+ -+void -+ep3xmtr_disconnect_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_disconnect_callback - xmtr %p txd %p completed to node %d\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ continue; -+ } -+ -+ /* Remove from active list */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_disconnect_callback - xmtr %p txd %p node %d not conected\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ /* add to the list of txd's which are to be completed */ -+ list_add_tail (&txd->Link, &txdList); -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+int -+ep3xmtr_poll_txd (EP_XMTR_RAIL *x, EP_TXD_RAIL *t, int how) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) t; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ switch (how) -+ { -+ case ENABLE_TX_CALLBACK: -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ EnableTransmitCallback (txd, txdRail); -+ break; -+ -+ case DISABLE_TX_CALLBACK: -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ DisableTransmitCallback (txd, txdRail); -+ break; -+ } -+ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ EPRINTF3 (DBG_XMTR, "%s: ep3xmtr_poll_txd: txd=%p XID=%llx completed\n", -+ XMTR_TO_RAIL (xmtrRail)->Generic.Name, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtrRail->Generic.Xmtr,txd); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep3xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *x, unsigned int phase) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP3_TXD_RAIL *txdRail; -+ E3_DMA_BE dmabe; -+ -+ if ((txdRail = GetTxdRail (xmtrRail)) == NULL) -+ return 0; -+ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ if (rail->Generic.Nodes[txd->NodeId].State != EP_NODE_CONNECTED) -+ { -+ EPRINTF2 (DBG_XMTR, "%s: TransmitTxdOnRail: node %u not connected on this rail\n", rail->Generic.Name, txd->NodeId); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset all events, since non of them could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ return 0; -+ } -+ -+ InitialiseTxdRail (txd, txdRail, phase); -+ -+ /* Initialise the dma descriptor */ -+ dmabe.s.dma_type = E3_DMA_TYPE (DMA_BYTE, DMA_WRITE, DMA_QUEUED, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = (EP_HAS_PAYLOAD(txd->Envelope.Attr) ? EP_INPUTQ_SIZE : EP_ENVELOPE_SIZE); -+ dmabe.s.dma_source = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, Envelope); -+ dmabe.s.dma_dest = (E3_Addr) 0; -+ dmabe.s.dma_destEvent = EP_MSGQ_ADDR(txd->Service); -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (txd->NodeId); -+ dmabe.s.dma_srcEvent = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, txd->NodeId); -+ -+ EPRINTF8 (DBG_XMTR, "%s: TransmitTxdOnRail: txd=%p txdRail=%p @ %x XID=%llx dest=%u srcEvent=%x srcCookie=%x\n", rail->Generic.Name, -+ txd, txdRail, txdRail->TxdElanAddr, (long long) txd->Envelope.Xid.Unique, txd->NodeId, dmabe.s.dma_srcEvent, dmabe.s.dma_srcCookieVProc); -+ -+ BindTxdToRail (txd, txdRail); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ InitialiseTxdRail (txd, txdRail, EP_TXD_PHASE_PASSIVE); /* initialise as passive (updated envelope) */ -+ -+ EP_XMTR_OP (txd->TxdRail->XmtrRail, UnbindTxd) (txd, EP_TXD_PHASE_PASSIVE); /* unbind from existing rail */ -+ -+ BindTxdToRail (txd, txdRail); /* and bind it to our new rail */ -+ break; -+ } -+ -+ return 1; -+} -+ -+void -+ep3xmtr_unbind_txd (EP_TXD *txd, unsigned int phase) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ -+ /* XXXX - TBD assertions on phase */ -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+} -+ -+long -+ep3xmtr_check (EP_XMTR_RAIL *x, long nextRunTime) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ -+ if (xmtrRail->FreeDescCount < ep_txd_lowat && !AllocateTxdRailBlock(xmtrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow txd rail pool\n", XMTR_TO_RAIL(xmtrRail)->Generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return nextRunTime; -+} -+ -+void -+ep3xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (xmtrRail, EP3_XMTR_RAIL *, sizeof (EP3_XMTR_RAIL), 1); -+ -+ spin_lock_init (&xmtrRail->FreeDescLock); -+ kcondvar_init (&xmtrRail->FreeDescSleep); -+ INIT_LIST_HEAD (&xmtrRail->FreeDescList); -+ INIT_LIST_HEAD (&xmtrRail->DescBlockList); -+ -+ xmtrRail->Generic.CommsRail = commsRail; -+ xmtrRail->Generic.Xmtr = xmtr; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ xmtr->Rails[commsRail->Rail->Number] = &xmtrRail->Generic; -+ xmtr->RailMask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep3xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]; -+ unsigned long flags; -+ -+ /* rail mask set as not usable */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->RailMask &= ~EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* wait for all txd's for this rail to become free */ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ while (xmtrRail->FreeDescCount != xmtrRail->TotalDescCount) -+ { -+ xmtrRail->FreeDescWaiting++; -+ kcondvar_wait (&xmtrRail->FreeDescSleep, &xmtrRail->FreeDescLock, &flags); -+ } -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->Rails[commsRail->Rail->Number] = NULL; -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* need to free up the txd's and blocks */ -+ /* all the txd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (xmtrRail->TotalDescCount == xmtrRail->FreeDescCount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&xmtrRail->DescBlockList)) -+ FreeTxdRailBlock (xmtrRail, list_entry(xmtrRail->DescBlockList.next, EP3_TXD_RAIL_BLOCK , Link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((xmtrRail->FreeDescCount == 0) && (xmtrRail->TotalDescCount == 0)); -+ -+ spin_lock_destroy (&xmtrRail->FreeDescLock); -+ kcondvar_destroy (&xmtrRail->FreeDescSleep); -+ -+ KMEM_FREE (xmtrRail, sizeof (EP3_XMTR_RAIL)); -+} -+ -+void -+ep3xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *x) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el; -+ unsigned long flags; -+ int freeCount = 0; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ list_for_each (el, &xmtrRail->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ (di->func)(di->arg, " Rail=%d Free=%d Total=%d (%d)\n", -+ rail->Generic.Number, xmtrRail->FreeDescCount, xmtrRail->TotalDescCount, freeCount); -+} -+ -+void -+ep3xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *t) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) t; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_TXD_RAIL_MAIN *txdMain = txdRail->TxdMain; -+ sdramaddr_t txdElan = txdRail->TxdElan; -+ EP3_RAIL *rail = (EP3_RAIL *) xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = rail->Device; -+ -+ (di->func)(di->arg, " EnveEvent=%x DataEvent=%x DoneEvent=%x Rail=%s\n", -+ txdMain->EnveEvent, txdMain->DataEvent, txdMain->DoneEvent, rail->Generic.Name); -+ (di->func)(di->arg, " EnveEvent=%x.%x DataEvent=%x.%x DoneEvent=%x.%x\n", -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type))); -+} -+ -+int -+ep3xmtr_check_txd_state (EP_TXD *txd) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ E3_Addr enveEvent = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent); -+ EP3_RETRY_DMA *retry = NULL; -+ -+ struct list_head *el; -+ struct list_head *nel; -+ unsigned long flags; -+ -+ /* is enevelope event is really not set */ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent )) -+ return (0); -+ -+ /* remove matching dma from stalled list */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ list_for_each_safe(el, nel, &rail->DmaRetries[EP_RETRY_STABALISING]) { -+ retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if ( retry->Dma.s.dma_srcEvent == enveEvent ) { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ break; /* there can only be one */ -+ } -+ } -+ ASSERT ( retry != NULL); /* must find one in list */ -+ ASSERT ( retry->Dma.s.dma_srcEvent == enveEvent ); /* better still be the right type then */ -+ -+ /* add to free list */ -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ return (1); -+} -+ -+void -+ep3xmtr_fillout_rail_stats(EP_XMTR_RAIL *xmtr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP3_XMTR_RAIL * ep3xmtr_rail = (EP3_XMTR_RAIL *) xmtr_rail; */ -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/epcommsTx_elan4.c linux-2.6.9/drivers/net/qsnet/ep/epcommsTx_elan4.c ---- clean/drivers/net/qsnet/ep/epcommsTx_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/epcommsTx_elan4.c 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,1389 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx_elan4.c,v 1.32.2.1 2005/07/20 11:35:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+#define XMTR_TO_COMMS(xmtrRail) ((EP4_COMMS_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail) -+#define XMTR_TO_RAIL(xmtrRail) ((EP4_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail->Rail) -+#define XMTR_TO_DEV(xmtrRail) (XMTR_TO_RAIL(xmtrRail)->r_ctxt.ctxt_dev) -+#define XMTR_TO_SUBSYS(xmtrRail) (((EP_XMTR_RAIL *) xmtrRail)->Xmtr->Subsys) -+ -+#define TXD_TO_XMTR(txdRail) ((EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail) -+#define TXD_TO_RAIL(txdRail) XMTR_TO_RAIL(TXD_TO_XMTR(txdRail)) -+ -+static void txd_interrupt (EP4_RAIL *rail, void *arg); -+static void poll_interrupt (EP4_RAIL *rail, void *arg); -+ -+static __inline__ int -+on_list (struct list_head *ent, struct list_head *list) -+{ -+ struct list_head *el; -+ unsigned int count = 0; -+ list_for_each (el, list) { -+ if (el == ent) -+ count++; -+ } -+ return count; -+} -+ -+static __inline__ void -+__ep4_txd_assert_free (EP4_TXD_RAIL *txdRail, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ register int failed = 0; -+ -+ if ((txdRail)->txd_retry_time != 0) failed |= (1 << 0); -+ if ((txdRail)->txd_main->txd_env != EP4_STATE_FREE) failed |= (1 << 1); -+ if ((txdRail)->txd_main->txd_data != EP4_STATE_FREE) failed |= (1 << 2); -+ if ((txdRail)->txd_main->txd_done != EP4_STATE_FREE) failed |= (1 << 3); -+ -+ if (sdram_assert) -+ { -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)) >> 32) != -32) failed |= (1 << 4); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)) >> 32) != 0) failed |= (1 << 5); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)) >> 32) != 0) failed |= (1 << 6); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_txd_assert_free: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assert_free: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ (txdRail)->txd_retry_time = 0; -+ (txdRail)->txd_main->txd_env = EP4_STATE_FREE; -+ (txdRail)->txd_main->txd_data = EP4_STATE_FREE; -+ (txdRail)->txd_main->txd_done = EP4_STATE_FREE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType) + 4, -32); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType) + 4, 0); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType) + 4, 0); -+ } -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "__ep4_txd_assert_free"); -+ } -+} -+ -+static __inline__ void -+__ep4_txd_assert_finished (EP4_TXD_RAIL *txdRail, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ register int failed = 0; -+ -+ if ((txdRail)->txd_retry_time != 0) failed |= (1 << 0); -+ if ((txdRail)->txd_main->txd_env != EP4_STATE_FINISHED) failed |= (1 << 1); -+ if ((txdRail)->txd_main->txd_data != EP4_STATE_FINISHED) failed |= (1 << 2); -+ if ((txdRail)->txd_main->txd_done != EP4_STATE_FINISHED) failed |= (1 << 3); -+ -+ if (sdram_assert) -+ { -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)) >> 32) != -32) failed |= (1 << 4); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)) >> 32) != 0) failed |= (1 << 5); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)) >> 32) != 0) failed |= (1 << 6); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_txd_assert_finished: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assert_finished: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ (txdRail)->txd_retry_time = 0; -+ (txdRail)->txd_main->txd_env = EP4_STATE_FINISHED; -+ (txdRail)->txd_main->txd_data = EP4_STATE_FINISHED; -+ (txdRail)->txd_main->txd_done = EP4_STATE_FINISHED; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType) + 4, -32); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType) + 4, 0); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType) + 4, 0); -+ } -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "__ep4_txd_assert_finished"); -+ } -+} -+ -+static __inline__ int -+__ep4_txd_assfail (EP4_TXD_RAIL *txdRail, const char *expr, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ -+ printk ("__ep4_txd_assfail: %s:%d '%s'\n", file, line, expr); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assfail: %s:%d '%s'\n", file, line, expr); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ EP_ASSFAIL (XMTR_TO_RAIL (xmtrRail), "__ep4_txd_assfail"); -+ -+ return 0; -+} -+ -+#define EP4_TXD_ASSERT(txdRail, EX) ((void) ((EX) || (__ep4_txd_assfail(txdRail, #EX, __FILE__, __LINE__)))) -+#define EP4_TXD_ASSERT_FREE(txdRail) __ep4_txd_assert_free(txdRail, __FILE__, __LINE__) -+#define EP4_TXD_ASSERT_FINISHED(txdRail) __ep4_txd_assert_finished(txdRail, __FILE__, __LINE__) -+ -+static int -+alloc_txd_block (EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV(xmtrRail); -+ EP4_TXD_RAIL_BLOCK *blk; -+ EP4_TXD_RAIL_MAIN *txdMain; -+ EP_ADDR txdMainAddr; -+ sdramaddr_t txdElan; -+ EP_ADDR txdElanAddr; -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ KMEM_ZALLOC (blk, EP4_TXD_RAIL_BLOCK *, sizeof (EP4_TXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((txdElan = ep_alloc_elan (&rail->r_generic, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK, 0, &txdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((txdMain = ep_alloc_main (&rail->r_generic, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK, 0, &txdMainAddr)) == (EP4_TXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->r_generic, txdElanAddr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ep4_reserve_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK, 0) != 0) -+ { -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, txdElanAddr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (txdRail = &blk->blk_txds[0], i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ txdRail->txd_generic.XmtrRail = &xmtrRail->xmtr_generic; -+ txdRail->txd_elan = txdElan; -+ txdRail->txd_elan_addr = txdElanAddr; -+ txdRail->txd_main = txdMain; -+ txdRail->txd_main_addr = txdMainAddr; -+ -+ /* We only need to reserve space for one command stream, since the sten packet -+ * can only be retrying *before* the dma source event is set. -+ * reserve bytes of "event" cq space for the completion write + interrupt */ -+ if ((txdRail->txd_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, EP4_INTR_CMD_NDWORDS)) == NULL) -+ goto failed; -+ -+ /* register the main interrupt cookies */ -+ ep4_register_intcookie (rail, &txdRail->txd_intcookie, txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_done), txd_interrupt, txdRail); -+ -+ /* initialise the events */ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CopySource), -+ txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CopyDest), -+ txdRail->txd_ecq->ecq_addr); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_WritePtr), -+ txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_data)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_WriteValue), -+ EP4_STATE_FINISHED); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CopySource), -+ txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CopyDest), -+ txdRail->txd_ecq->ecq_addr); -+ -+ /* Initialise the command streams */ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_env))); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_write_value), -+ EP4_STATE_FAILED); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_done))); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ txdMain->txd_env = EP4_STATE_FREE; -+ txdMain->txd_data = EP4_STATE_FREE; -+ txdMain->txd_done = EP4_STATE_FREE; -+ -+ /* move onto next descriptor */ -+ txdElan += EP4_TXD_RAIL_ELAN_SIZE; -+ txdElanAddr += EP4_TXD_RAIL_ELAN_SIZE; -+ txdMain = (EP4_TXD_RAIL_MAIN *) ((unsigned long) txdMain + EP4_TXD_RAIL_MAIN_SIZE); -+ txdMainAddr += EP4_TXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_add (&blk->blk_link, &xmtrRail->xmtr_blocklist); -+ -+ xmtrRail->xmtr_totalcount += EP4_NUM_TXD_PER_BLOCK; -+ xmtrRail->xmtr_freecount += EP4_NUM_TXD_PER_BLOCK; -+ -+ for (i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++) -+ list_add (&blk->blk_txds[i].txd_generic.Link, &xmtrRail->xmtr_freelist); -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ return 1; -+ -+ failed: -+ while (--i >= 0) -+ { -+ ep4_put_ecq (rail, txdRail->txd_ecq, EP4_INTR_CMD_NDWORDS); -+ ep4_deregister_intcookie (rail, &txdRail->txd_intcookie); -+ } -+ ep4_release_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_txds[0].txd_elan_addr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ -+ return 0; -+} -+ -+static void -+free_txd_block (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL_BLOCK *blk) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_del (&blk->blk_link); -+ -+ xmtrRail->xmtr_totalcount -= EP4_NUM_TXD_PER_BLOCK; -+ -+ for (txdRail = &blk->blk_txds[0], i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ xmtrRail->xmtr_freecount--; -+ -+ ep4_put_ecq (rail, txdRail->txd_ecq, EP4_INTR_CMD_NDWORDS); -+ -+ ep4_deregister_intcookie (rail, &txdRail->txd_intcookie); -+ -+ list_del (&txdRail->txd_generic.Link); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ ep4_release_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_txds[0].txd_elan_addr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+} -+ -+static EP4_TXD_RAIL * -+get_txd_rail (EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = XMTR_TO_SUBSYS(xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int low_on_txds; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ if (list_empty (&xmtrRail->xmtr_freelist)) -+ txdRail = NULL; -+ else -+ { -+ txdRail = list_entry (xmtrRail->xmtr_freelist.next, EP4_TXD_RAIL, txd_generic.Link); -+ -+ EP4_TXD_ASSERT_FREE(txdRail); -+ -+ list_del (&txdRail->txd_generic.Link); -+ -+ xmtrRail->xmtr_freecount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (xmtrRail->xmtr_freecount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ -+ return (txdRail); -+} -+ -+static void -+free_txd_rail (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ unsigned long flags; -+ -+ EP4_TXD_ASSERT_FREE(txdRail); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_add (&txdRail->txd_generic.Link, &xmtrRail->xmtr_freelist); -+ -+ xmtrRail->xmtr_freecount++; -+ -+ if (xmtrRail->xmtr_freewaiting) -+ { -+ xmtrRail->xmtr_freewaiting--; -+ kcondvar_wakeupall (&xmtrRail->xmtr_freesleep, &xmtrRail->xmtr_freelock); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+} -+ -+static void -+bind_txd_rail (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EPRINTF6 (DBG_XMTR, "%s: bind_txd_rail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->txd_generic.XmtrRail)->r_generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long)txd->Envelope.Xid.Unique); -+ -+ txd->TxdRail = &txdRail->txd_generic; -+ txdRail->txd_generic.Txd = txd; -+} -+ -+static void -+unbind_txd_rail (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_TXD_ASSERT (txdRail, txd->TxdRail == &txdRail->txd_generic && txdRail->txd_generic.Txd == txd); -+ -+ EPRINTF6 (DBG_XMTR, "%s: unbind_txd_rail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->txd_generic.XmtrRail)->r_generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long)txd->Envelope.Xid.Unique); -+ -+ -+ txdRail->txd_generic.Txd = NULL; -+ txd->TxdRail = NULL; -+} -+ -+static void -+initialise_txd (EP_TXD *txd, EP4_TXD_RAIL *txdRail, unsigned int phase) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Initialise the per-rail fields in the envelope */ -+ txd->Envelope.TxdRail = txdRail->txd_elan_addr; -+ txd->Envelope.NodeId = rail->r_generic.Position.pos_nodeid; -+ -+ /* Allocate a network error fixup cookie */ -+ txdRail->txd_cookie = ep4_neterr_cookie (rail, txd->NodeId) | EP4_COOKIE_STEN; -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ if ( epdebug_check_sum ) -+ txd->Envelope.CheckSum = ep_calc_check_sum( txd->Xmtr->Subsys->Subsys.Sys, &txd->Envelope, txd->Envelope.Frags, txd->Envelope.nFrags); -+ else -+#endif -+ txd->Envelope.CheckSum = 0; -+ -+ /* Initialise the per-rail events */ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ { -+ unsigned int nsets = (txd->Envelope.nFrags ? txd->Envelope.nFrags : 1) + ( EP_IS_MULTICAST(txd->Envelope.Attr) ? 1 : 0); -+ -+ if (! EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32 * nsets, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ txdRail->txd_main->txd_data = EP4_STATE_FINISHED; -+ } -+ else -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32 * nsets , E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ txdRail->txd_main->txd_data = EP4_STATE_ACTIVE; -+ } -+ -+ txdRail->txd_main->txd_env = EP4_STATE_ACTIVE; -+ txdRail->txd_main->txd_done = EP4_STATE_ACTIVE; -+ break; -+ } -+ -+ case EP_TXD_PHASE_PASSIVE: -+ EP4_TXD_ASSERT (txdRail, EP_IS_RPC(txd->Envelope.Attr)); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ txdRail->txd_main->txd_data = EP4_STATE_FINISHED; -+ txdRail->txd_main->txd_done = EP4_STATE_ACTIVE; -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ break; -+ } -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), NOP_CMD); -+} -+ -+static void -+terminate_txd_rail (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_SDRAM_ASSERT (TXD_TO_RAIL(txdRail),\ -+ (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType),\ -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS));\ -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->txd_main->txd_env = EP4_STATE_FREE; -+ txdRail->txd_main->txd_data = EP4_STATE_FREE; -+ txdRail->txd_main->txd_done = EP4_STATE_FREE; -+ -+#if defined(DEBUG_ASSERT) -+ if (sdram_assert) -+ { -+ ELAN4_DEV *dev = XMTR_TO_RAIL (xmtrRail)->r_ctxt.ctxt_dev; -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ } -+#endif -+} -+ -+static void -+defer_txd_rail (EP4_TXD_RAIL *txdRail) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP4_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP_COMMS_SUBSYS *subsys = XMTR_TO_SUBSYS(xmtrRail); -+ -+ EPRINTF5 (DBG_XMTR, "%s: defer_txd_rail: xmtrRail=%p txdRail=%p env/data (%d,%d) not finished\n", -+ rail->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ /* transmit has completed, but the data dma has not completed -+ * (because of network error fixup), we queue the txdRail onto a list -+ * to be polled for completion later. -+ */ -+ if (txdRail->txd_retry_time) -+ { -+ EP4_TXD_ASSERT (txdRail, (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) == 1 || -+ on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1)); -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ } -+ -+ txdRail->txd_retry_time = lbolt; -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+} -+ -+static void -+finalise_txd (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ -+ EP4_TXD_ASSERT_FINISHED (txdRail); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+} -+ -+static void -+txd_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) arg; -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP_XMTR *xmtr = xmtrRail->xmtr_generic.Xmtr; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ for (;;) -+ { -+ if (txdRail->txd_main->txd_done == EP4_STATE_FINISHED || txdRail->txd_main->txd_env == EP4_STATE_FAILED) -+ break; -+ -+ /* The write to txd_done could be held up in the PCI bridge even though -+ * we've seen the interrupt cookie. Unlike elan3, there is no possibility -+ * of spurious interrupts since we flush the command queues on node -+ * disconnection and the txcallback mechanism */ -+ mb(); -+ -+ if (delay > EP4_EVENT_FIRING_TLIMIT) -+ { -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "txd_interrupt - not finished\n"); -+ return; -+ } -+ DELAY (delay); -+ delay <<= 1; -+ } -+ -+ txd = txdRail->txd_generic.Txd; -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FAILED) -+ { -+ spin_lock (&xmtrRail->xmtr_retrylock); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time == 0); /* cannot be on retry/poll list */ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_main->txd_done != EP4_STATE_FINISHED); /* data xfer cannot have finished */ -+ -+ if (TxdShouldStabalise (&txdRail->txd_generic, &rail->r_generic)) -+ { -+ EPRINTF6 (DBG_STABILISE, "%s: txd_interrupt: stablise xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, (long long)txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ txdRail->txd_retry_time = lbolt; /* indicate on retry list */ -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]); -+ } -+ else -+ { -+ EPRINTF6 (DBG_RETRY, "%s: txd_interrupt: retry xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, (long long)txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ txdRail->txd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; /* XXXX: backoff ? */ -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, txdRail->txd_retry_time); -+ } -+ spin_unlock (&xmtrRail->xmtr_retrylock); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ -+ EP4_TXD_ASSERT (txdRail, txd != NULL && !(EP_IS_NO_INTERRUPT(txd->Envelope.Attr))); -+ -+ EPRINTF6 (DBG_XMTR, "%s: txd_interrupt: xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, (long long)txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ if (txdRail->txd_main->txd_env != EP4_STATE_FINISHED || txdRail->txd_main->txd_data != EP4_STATE_FINISHED) -+ { -+ defer_txd_rail (txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ } -+ else -+ { -+ /* remove from active transmit list */ -+ list_del (&txd->Link); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ finalise_txd (txd, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+static void -+poll_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) arg; -+ -+ ep_poll_transmits (xmtrRail->xmtr_generic.Xmtr); -+} -+ -+void -+issue_envelope_packet (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ ELAN4_CQ *cq = xmtrRail->xmtr_cq; -+ E4_uint64 *blk0 = (E4_uint64 *) &txd->Envelope; -+ E4_uint64 *blk1 = EP_HAS_PAYLOAD(txd->Envelope.Attr) ? (E4_uint64 *) &txd->Payload : NULL; -+ E4_Addr qaddr = EP_MSGQ_ADDR(txd->Service); -+ -+ EP4_SDRAM_ASSERT (TXD_TO_RAIL(txdRail),\ -+ (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType),\ -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS));\ -+ -+ elan4_open_packet (cq, OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(txd->NodeId))); -+ elan4_sendtrans0 (cq, TR_INPUT_Q_GETINDEX, EP_MSGQ_ADDR(txd->Service)); -+ -+ /* send the payload if present */ -+ if (blk0) elan4_sendtransp (cq, TR_WRITE(128 >> 3, 0, TR_DATATYPE_BYTE), 0, blk0); -+ if (blk1) elan4_sendtransp (cq, TR_WRITE(128 >> 3, 0, TR_DATATYPE_BYTE), 128, blk1); -+ -+ elan4_sendtrans1 (cq, TR_INPUT_Q_COMMIT, qaddr, txdRail->txd_cookie); -+ -+ elan4_guard (cq, GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (EP4_TXD_STEN_RETRYCOUNT)); -+ elan4_write_dword_cmd (cq, txdRail->txd_main_addr + offsetof (EP4_TXD_RAIL_MAIN, txd_env), EP4_STATE_FINISHED); -+ -+ elan4_guard (cq, GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (EP4_TXD_STEN_RETRYCOUNT)); -+ elan4_set_event_cmd (cq, txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_env)); -+ -+ elan4_write_dword_cmd (cq, xmtrRail->xmtr_main_addr + offsetof (EP4_XMTR_RAIL_MAIN, xmtr_flowcnt), ++xmtrRail->xmtr_flowcnt); -+} -+ -+void -+ep4xmtr_flush_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_COMMS_RAIL *commsRail = XMTR_TO_COMMS (xmtrRail); -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ /* need to acquire/release the Lock to ensure that the node state -+ * transition has been noticed and no new envelopes are queued to -+ * nodes which are passivating. */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ /* Then we insert a "setevent" into the command queue to flush -+ * through the envelopes which have already been submitted */ -+ ep4comms_flush_setevent (commsRail, xmtrRail->xmtr_cq); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ /* remove any envelopes which are retrying to nodes which are going down */ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ list_for_each_safe (el, nel, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) { -+ EP4_TXD_RAIL *txdRail = list_entry (el, EP4_TXD_RAIL, txd_retry_link); -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_main->txd_env == EP4_STATE_FAILED); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF2 (DBG_XMTR, "%s; ep4xmtr_flush_callback: removing txdRail %p from retry list\n", rail->r_generic.Name, txdRail); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ list_del (&txdRail->txd_retry_link); -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]); -+ } -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ /* Determine whether we have active or passive messages to -+ * any node which is passivating */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ if (txdRail == NULL || txdRail->txd_generic.XmtrRail != &xmtrRail->xmtr_generic || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF5 (DBG_XMTR, "%s: flush txd=%p txdRail=%p data=%llx done=%llx\n", rail->r_generic.Name, -+ txd, txdRail, (long long)txdRail->txd_main->txd_data, (long long)txdRail->txd_main->txd_done); -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ else if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ } -+ else -+ { -+ if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ default: -+ panic ("ep4xmtr_flush_callback: invalid callback step\n"); -+ break; -+ } -+} -+ -+void -+ep4xmtr_failover_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ /* Only progress relocation of txd's bound to this rail */ -+ if (! TXD_BOUND2RAIL (txdRail, xmtrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ /* XXXX - no rail failover for now ....*/ -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep4xmtr_failover_callback - xmtr %p txd %p node %d completed\n", rail->r_generic.Name, xmtr, txd, txd->NodeId); -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+ -+void -+ep4xmtr_disconnect_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ if ( ! TXD_BOUND2RAIL (txdRail, xmtrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ if (txdRail->txd_main->txd_done == EP4_STATE_ACTIVE) -+ { -+ -+ EPRINTF8 (DBG_DISCON, "ep4xmtr_disconnect_callback: txdRail=%p : events %llx,%llx,%llx done %llx,%llx,%llx retry %lx\n",txdRail, -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)), -+ (long long)txdRail->txd_main->txd_env, (long long)txdRail->txd_main->txd_data, (long long)txdRail->txd_main->txd_done, -+ txdRail->txd_retry_time); -+ -+ if (txdRail->txd_retry_time) -+ { -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ EP4_TXD_ASSERT (txdRail, on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1); -+ -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ } -+ -+ /* Remove from active list */ -+ list_del (&txd->Link); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep4xmtr_disconnect_callback - xmtr %p txd %p node %d not conected\n", rail->r_generic.Name, xmtr, txd, txd->NodeId); -+ -+ /* add to the list of txd's which are to be completed */ -+ list_add_tail (&txd->Link, &txdList); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+void -+ep4xmtr_neterr_flush (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_COMMS_RAIL *commsRail = XMTR_TO_COMMS (xmtrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ /* insert a "setevent" into the command queue to flush -+ * through the envelopes which have already been submitted */ -+ ep4comms_flush_setevent (commsRail, xmtrRail->xmtr_cq); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep4xmtr_neterr_check (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ -+ if ( ! TXD_BOUND2RAIL (txdRail, xmtrRail) || txd->NodeId != nodeId) -+ continue; -+ -+ /* The only non-dma associated with a txd is the initial sten packet, if it has been acked -+ * and the neterr cookie matches, then change it to look like it's been acked since the -+ * INPUT_Q_COMMIT transaction has already been executed */ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FAILED && (txdRail->txd_cookie == cookies[0] || txdRail->txd_cookie == cookies[1])) -+ { -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: ep4xmtr_neterr_callback: cookie <%lld%s%s%s%s> matches txd %p txdRail %p\n", -+ rail->r_generic.Name, (long long)EP4_COOKIE_STRING(txdRail->txd_cookie), txd, txdRail); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ spin_lock (&xmtrRail->xmtr_retrylock); -+ -+ EP4_TXD_ASSERT (txdRail, (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) == 1 || -+ on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1)); -+ -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ spin_unlock (&xmtrRail->xmtr_retrylock); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+int -+ep4xmtr_poll_txd (EP_XMTR_RAIL *x, EP_TXD_RAIL *t, int how) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) t; -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ -+ if (! EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ return 0; -+ -+ switch (how) -+ { -+ case ENABLE_TX_CALLBACK: -+ if (!EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (xmtrRail->xmtr_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ txd->Envelope.Attr |= EP_INTERRUPT_ENABLED; -+ } -+ break; -+ -+ case DISABLE_TX_CALLBACK: -+ if (EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr & EP_INTERRUPT_ENABLED)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), NOP_CMD); -+ -+ txd->Envelope.Attr &= ~EP_INTERRUPT_ENABLED; -+ } -+ } -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FINISHED && txdRail->txd_main->txd_data == EP4_STATE_FINISHED && txdRail->txd_main->txd_done == EP4_STATE_FINISHED) -+ { -+ EPRINTF3 (DBG_XMTR, "%s: ep4xmtr_poll_txd: txd=%p XID=%llx completed\n", -+ XMTR_TO_RAIL (xmtrRail)->r_generic.Name, txd, (long long)txd->Envelope.Xid.Unique); -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ -+ ep_xmtr_txd_stat(xmtrRail->xmtr_generic.Xmtr,txd); -+ -+ finalise_txd (txd, txdRail); -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep4xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *x, unsigned int phase) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ -+ if ((txdRail = get_txd_rail (xmtrRail)) == NULL) -+ return 0; -+ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ if (rail->r_generic.Nodes[txd->NodeId].State != EP_NODE_CONNECTED) -+ { -+ EPRINTF2 (DBG_XMTR, "%s: ep4xmtr_bind_txd: node %u not connected on this rail\n", rail->r_generic.Name, txd->NodeId); -+ -+ free_txd_rail (xmtrRail, txdRail); -+ return 0; -+ } -+ -+ initialise_txd (txd, txdRail, EP_TXD_PHASE_ACTIVE); -+ -+ bind_txd_rail (txd, txdRail); -+ -+ /* generate the STEN packet to transfer the envelope */ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ if (((int) (xmtrRail->xmtr_flowcnt - xmtrRail->xmtr_main->xmtr_flowcnt)) < EP4_XMTR_FLOWCNT) -+ issue_envelope_packet (xmtrRail, txdRail); -+ else -+ { -+ txdRail->txd_retry_time = lbolt; -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, txdRail->txd_retry_time); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ initialise_txd (txd, txdRail, EP_TXD_PHASE_PASSIVE); -+ -+ EP_XMTR_OP (txd->TxdRail->XmtrRail, UnbindTxd) (txd, EP_TXD_PHASE_PASSIVE); /* unbind from existing rail */ -+ -+ bind_txd_rail (txd, txdRail); /* and bind it to our new rail */ -+ break; -+ } -+ -+ return 1; -+} -+ -+void -+ep4xmtr_unbind_txd (EP_TXD *txd, unsigned int phase) -+{ -+ /* XXXX - TBD */ -+} -+ -+long -+ep4xmtr_check (EP_XMTR_RAIL *x, long nextRunTime) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP_XMTR *xmtr = xmtrRail->xmtr_generic.Xmtr; -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ if (xmtrRail->xmtr_freecount < ep_txd_lowat && !alloc_txd_block (xmtrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow txd rail pool\n", XMTR_TO_RAIL(xmtrRail)->r_generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]) { -+ EP4_TXD_RAIL *txdRail = list_entry (el, EP4_TXD_RAIL, txd_retry_link); -+ -+ if (txdRail->txd_main->txd_env != EP4_STATE_FINISHED || txdRail->txd_main->txd_data != EP4_STATE_FINISHED) -+ { -+ ep_debugf (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) not finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ nextRunTime = lbolt + HZ; -+ } -+ else -+ { -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ -+ ep_debugf (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ EPRINTF5 (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ EPRINTF3 (DBG_XMTR, "%s: done %x data %x\n", XMTR_TO_RAIL(xmtrRail)->r_generic.Name, -+ (uint)(txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_done)), -+ (uint)(txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_data))); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ /* remove txd from active list and add to list to call handlers */ -+ list_del (&txd->Link); -+ list_add_tail (&txd->Link, &txdList); -+ -+ /* remove and free of txdRail */ -+ txdRail->txd_retry_time = 0; -+ list_del (&txdRail->txd_retry_link); -+ -+ finalise_txd (txd, txdRail); -+ -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ ep_xmtr_txd_stat (xmtr,txd); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+ -+ return nextRunTime; -+} -+ -+unsigned long -+ep4xmtr_retry (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) arg; -+ ELAN4_DEV *dev = XMTR_TO_DEV(xmtrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ while (! list_empty (&xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY])) -+ { -+ EP4_TXD_RAIL *txdRail = list_entry (xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY].next, EP4_TXD_RAIL, txd_retry_link); -+ -+ if (BEFORE (lbolt, txdRail->txd_retry_time)) -+ { -+ if (nextRunTime == 0 || AFTER (nextRunTime, txdRail->txd_retry_time)) -+ nextRunTime = txdRail->txd_retry_time; -+ -+ break; -+ } -+ -+ if (((int) (xmtrRail->xmtr_flowcnt - xmtrRail->xmtr_main->xmtr_flowcnt)) < EP4_XMTR_FLOWCNT) -+ { -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4xmtr_retry: re-issue envelope packet to %d for txdRail=%p\n", -+ rail->r_generic.Name, txdRail->txd_generic.Txd->Envelope.NodeId, txdRail); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_ACTIVE; -+ -+ issue_envelope_packet (xmtrRail, txdRail); -+ } -+ else -+ { -+ EPRINTF2 (DBG_RETRY, "%s: ep4xmtr_retry: cannot re-issue envelope packet to %d\n", rail->r_generic.Name, txdRail->txd_generic.Txd->Envelope.NodeId); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, txdRail->txd_retry_time)) -+ nextRunTime = txdRail->txd_retry_time; -+ -+ break; -+ } -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ return nextRunTime; -+} -+ -+void -+ep4xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP4_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ int i; -+ -+ KMEM_ZALLOC (xmtrRail, EP4_XMTR_RAIL *, sizeof (EP4_XMTR_RAIL), 1); -+ -+ spin_lock_init (&xmtrRail->xmtr_freelock); -+ kcondvar_init (&xmtrRail->xmtr_freesleep); -+ INIT_LIST_HEAD (&xmtrRail->xmtr_freelist); -+ INIT_LIST_HEAD (&xmtrRail->xmtr_blocklist); -+ -+ for (i = 0; i < EP4_TXD_NUM_LISTS; i++) -+ INIT_LIST_HEAD (&xmtrRail->xmtr_retrylist[i]); -+ spin_lock_init (&xmtrRail->xmtr_retrylock); -+ -+ xmtrRail->xmtr_generic.CommsRail = commsRail; -+ xmtrRail->xmtr_generic.Xmtr = xmtr; -+ -+ xmtrRail->xmtr_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_XMTR_RAIL_MAIN), 0, &xmtrRail->xmtr_main_addr); -+ xmtrRail->xmtr_cq = elan4_alloccq (&rail->r_ctxt, EP4_XMTR_CQSIZE, CQ_EnableAllBits, CQ_Priority); -+ -+ xmtrRail->xmtr_retryops.op_func = ep4xmtr_retry; -+ xmtrRail->xmtr_retryops.op_arg = xmtrRail; -+ -+ ep4_add_retry_ops (rail, &xmtrRail->xmtr_retryops); -+ -+ ep4_register_intcookie (rail, &xmtrRail->xmtr_intcookie, xmtrRail->xmtr_main_addr, -+ poll_interrupt, xmtrRail); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ xmtr->Rails[commsRail->Rail->Number] = &xmtrRail->xmtr_generic; -+ xmtr->RailMask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ ep_procfs_xmtr_add_rail(&(xmtrRail->xmtr_generic)); -+} -+ -+void -+ep4xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]; -+ unsigned long flags; -+ -+ /* rail mask set as not usable */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->RailMask &= ~EP_RAIL2RAILMASK (rail->r_generic.Number); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ ep_procfs_xmtr_del_rail(&(xmtrRail->xmtr_generic)); -+ -+ /* wait for all txd's for this rail to become free */ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ while (xmtrRail->xmtr_freecount != xmtrRail->xmtr_totalcount) -+ { -+ xmtrRail->xmtr_freewaiting++; -+ kcondvar_wait (&xmtrRail->xmtr_freesleep, &xmtrRail->xmtr_freelock, &flags); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->Rails[commsRail->Rail->Number] = NULL; -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* all the txd's accociated with DescBlocks must be in the freelist */ -+ ASSERT (xmtrRail->xmtr_totalcount == xmtrRail->xmtr_freecount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&xmtrRail->xmtr_blocklist)) -+ free_txd_block (xmtrRail, list_entry(xmtrRail->xmtr_blocklist.next, EP4_TXD_RAIL_BLOCK , blk_link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((xmtrRail->xmtr_freecount == 0) && (xmtrRail->xmtr_totalcount == 0)); -+ -+ ep4_deregister_intcookie (rail, &xmtrRail->xmtr_intcookie); -+ -+ ep4_remove_retry_ops (rail, &xmtrRail->xmtr_retryops); -+ -+ elan4_freecq (&rail->r_ctxt, xmtrRail->xmtr_cq); -+ ep_free_main (&rail->r_generic, xmtrRail->xmtr_main_addr, sizeof (EP4_XMTR_RAIL_MAIN)); -+ -+ spin_lock_destroy (&xmtrRail->xmtr_retrylock); -+ -+ spin_lock_destroy (&xmtrRail->xmtr_freelock); -+ kcondvar_destroy (&xmtrRail->xmtr_freesleep); -+ -+ KMEM_FREE (xmtrRail, sizeof (EP4_XMTR_RAIL)); -+} -+ -+void -+ep4xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *x) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ unsigned int freeCount = 0; -+ unsigned int pollCount = 0; -+ unsigned int stalledCount = 0; -+ unsigned int retryCount = 0; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ list_for_each (el, &xmtrRail->xmtr_freelist) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]) -+ pollCount++; -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) -+ stalledCount++; -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) -+ retryCount++; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ (di->func)(di->arg, " rail=%d free=%d total=%d (%d) (retry %d,%d,%d)\n", -+ rail->r_generic.Number, xmtrRail->xmtr_freecount, xmtrRail->xmtr_totalcount, -+ freeCount, pollCount, stalledCount, retryCount); -+ (di->func)(di->arg, " cq %d flowcnt %lld,%lld\n", elan4_cq2num (xmtrRail->xmtr_cq), xmtrRail->xmtr_flowcnt, xmtrRail->xmtr_main->xmtr_flowcnt); -+} -+ -+void -+ep4xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *t) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) t; -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP4_TXD_RAIL_MAIN *txdMain = txdRail->txd_main; -+ sdramaddr_t txdElan = txdRail->txd_elan; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ char *list = ""; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ if (txdRail->txd_retry_time) -+ { -+ if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL])) -+ list = " poll"; -+ else if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED])) -+ list = " stalled"; -+ else if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY])) -+ list = " retry"; -+ else -+ list = " ERROR"; -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ (di->func)(di->arg, " Rail %d txd %p elan %lx (%x) main %p (%x) cookie <%lld%s%s%s%s> ecq %d %s\n", rail->r_generic.Number, -+ txdRail, txdRail->txd_elan, txdRail->txd_elan_addr, txdRail->txd_main, txdRail->txd_main_addr, -+ EP4_COOKIE_STRING(txdRail->txd_cookie), elan4_cq2num (txdRail->txd_ecq->ecq_cq), list); -+ -+ (di->func)(di->arg, " env %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_Params[1])), -+ txdMain->txd_env); -+ (di->func)(di->arg, " data %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_Params[1])), -+ txdMain->txd_data); -+ (di->func)(di->arg, " done %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_Params[1])), -+ txdMain->txd_done); -+} -+ -+int -+ep4xmtr_check_txd_state (EP_TXD *txd) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail; -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ unsigned long flags; -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FINISHED) -+ return 0; -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ EP4_TXD_ASSERT (txdRail, on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1); -+ -+ list_del (&txdRail->txd_retry_link); -+ txdRail->txd_retry_time = 0; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+ -+ return 1; -+} -+ -+void -+ep4xmtr_fillout_rail_stats(EP_XMTR_RAIL *xmtr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP4_XMTR_RAIL * ep4xmtr_rail = (EP4_XMTR_RAIL *) xmtr_rail; */ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/ep_procfs.c linux-2.6.9/drivers/net/qsnet/ep/ep_procfs.c ---- clean/drivers/net/qsnet/ep/ep_procfs.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/ep_procfs.c 2005-03-30 09:06:34.000000000 -0500 -@@ -0,0 +1,331 @@ -+ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: ep_procfs.c,v 1.10 2005/03/30 14:06:34 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/ep_procfs.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+#include "conf_linux.h" -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+struct proc_dir_entry *ep_procfs_xmtr_root; -+struct proc_dir_entry *ep_procfs_rcvr_root; -+ -+static int -+ep_proc_open (struct inode *inode, struct file *file) -+{ -+ QSNET_PROC_PRIVATE *pr; -+ int pages = 4; -+ -+ if ((pr = kmalloc (sizeof (QSNET_PROC_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ do { -+ pr->pr_data_len = PAGESIZE * pages; -+ -+ KMEM_ZALLOC (pr->pr_data, char *, pr->pr_data_len, 1); -+ if (pr->pr_data == NULL) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Out of Memory\n"); -+ break; -+ } -+ -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_data[0] = 0; -+ -+ pr->pr_di.func = qsnet_proc_character_fill; -+ pr->pr_di.arg = (long)pr; -+ -+ if (!strcmp("debug_xmtr", file->f_dentry->d_iname)) -+ { -+ EP_XMTR *xmtr = (EP_XMTR *)(PDE(inode)->data); -+ ep_display_xmtr (&pr->pr_di, xmtr); -+ } -+ -+ if (!strcmp("debug_rcvr", file->f_dentry->d_iname)) -+ { -+ EP_RCVR *rcvr = (EP_RCVR *)(PDE(inode)->data); -+ ep_display_rcvr (&pr->pr_di, rcvr, 0); -+ } -+ -+ if (!strcmp("debug_full", file->f_dentry->d_iname)) -+ { -+ EP_RCVR *rcvr = (EP_RCVR *)(PDE(inode)->data); -+ ep_display_rcvr (&pr->pr_di, rcvr, 1); -+ } -+ -+ if ( pr->pr_len < pr->pr_data_len) -+ break; /* we managed to get all the output into the buffer */ -+ -+ pages++; -+ KMEM_FREE ( pr->pr_data, pr->pr_data_len); -+ } while (1); -+ -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+struct file_operations ep_proc_operations = -+{ -+ read: qsnet_proc_read, -+ open: ep_proc_open, -+ release: qsnet_proc_release, -+}; -+ -+static int -+proc_read_rcvr_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RCVR *rcvr = (EP_RCVR *)data; -+ -+ if (rcvr == NULL) -+ sprintf(page,"proc_read_rcvr_stats rcvr=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_rcvr_fillout_stats(rcvr,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_rcvr_rail_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RCVR_RAIL *rcvr_rail = (EP_RCVR_RAIL *)data; -+ -+ if (rcvr_rail == NULL) { -+ strcpy(page,"proc_read_rcvr_rail_stats rcvr_rail=NULL"); -+ } else { -+ page[0] = 0; -+ ep_rcvr_rail_fillout_stats(rcvr_rail, page); -+ EP_RCVR_OP(rcvr_rail,FillOutRailStats)(rcvr_rail,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+ep_procfs_rcvr_add(EP_RCVR *rcvr) -+{ -+ /* ep/rcvr/service_number/stats */ -+ /* ep/rcvr/service_number/debug_rcvr */ -+ /* ep/rcvr/service_number/debug_full */ -+ struct proc_dir_entry *p; -+ char str[32]; -+ -+ sprintf(str,"%d", rcvr->Service); -+ -+ rcvr->procfs_root = proc_mkdir (str, ep_procfs_rcvr_root); -+ -+ if ((p = create_proc_entry ("stats", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_rcvr_stats; -+ p->data = rcvr; -+ p->owner = THIS_MODULE; -+ } -+ -+ if ((p = create_proc_entry ("debug_rcvr", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rcvr; -+ } -+ -+ if ((p = create_proc_entry ("debug_full", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rcvr; -+ } -+} -+ -+void -+ep_procfs_rcvr_del(EP_RCVR *rcvr) -+{ -+ char str[32]; -+ sprintf(str,"%d", rcvr->Service); -+ -+ remove_proc_entry ("debug_full", rcvr->procfs_root); -+ remove_proc_entry ("debug_rcvr", rcvr->procfs_root); -+ remove_proc_entry ("stats", rcvr->procfs_root); -+ -+ remove_proc_entry (str, ep_procfs_rcvr_root); -+} -+ -+void -+ep_procfs_rcvr_add_rail(EP_RCVR_RAIL *rcvrRail) -+{ -+ /* ep/rcvr/service_number/railN/stats */ -+ -+ struct proc_dir_entry *p; -+ char str[32]; -+ sprintf(str,"rail%d",rcvrRail->CommsRail->Rail->Number); -+ -+ rcvrRail->procfs_root = proc_mkdir (str, rcvrRail->Rcvr->procfs_root); -+ -+ if ((p = create_proc_entry ("stats", 0, rcvrRail->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_rcvr_rail_stats; -+ p->data = rcvrRail; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+ep_procfs_rcvr_del_rail(EP_RCVR_RAIL *rcvrRail) -+{ -+ char str[32]; -+ sprintf(str,"rail%d",rcvrRail->CommsRail->Rail->Number); -+ -+ remove_proc_entry ("stats", rcvrRail->procfs_root); -+ -+ remove_proc_entry (str, rcvrRail->Rcvr->procfs_root); -+} -+ -+ -+ -+ -+static int -+proc_read_xmtr_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_XMTR *xmtr = (EP_XMTR *)data; -+ -+ if (xmtr == NULL) -+ strcpy(page,"proc_read_xmtr_stats xmtr=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_xmtr_fillout_stats(xmtr, page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_xmtr_rail_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_XMTR_RAIL *xmtr_rail = (EP_XMTR_RAIL *)data; -+ -+ if (xmtr_rail == NULL) -+ strcpy(page,"proc_read_xmtr_rail_stats xmtr_rail=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_xmtr_rail_fillout_stats(xmtr_rail, page); -+ EP_XMTR_OP(xmtr_rail,FillOutRailStats)(xmtr_rail,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+ep_procfs_xmtr_add(EP_XMTR *xmtr) -+{ -+ /* ep/xmtr/service_number/stats */ -+ /* ep/xmtr/service_number/debug_xmtr */ -+ struct proc_dir_entry *p; -+ char str[32]; -+ -+ sprintf(str,"%llx", (unsigned long long) (unsigned long)xmtr); -+ -+ xmtr->procfs_root = proc_mkdir (str, ep_procfs_xmtr_root); -+ -+ if ((p = create_proc_entry ("stats", 0, xmtr->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_xmtr_stats; -+ p->data = xmtr; -+ p->owner = THIS_MODULE; -+ } -+ -+ if ((p = create_proc_entry ("debug_xmtr", 0, xmtr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = xmtr; -+ } -+} -+ -+void -+ep_procfs_xmtr_del(EP_XMTR *xmtr) -+{ -+ char str[32]; -+ sprintf(str,"%llx", (unsigned long long) (unsigned long)xmtr); -+ -+ remove_proc_entry ("stats", xmtr->procfs_root); -+ remove_proc_entry ("debug_xmtr", xmtr->procfs_root); -+ -+ remove_proc_entry (str, ep_procfs_xmtr_root); -+} -+ -+void -+ep_procfs_xmtr_add_rail(EP_XMTR_RAIL *xmtrRail) -+{ -+ /* ep/xmtr/service_number/railN/stats */ -+ -+ struct proc_dir_entry *p; -+ char str[32]; -+ sprintf(str,"rail%d",xmtrRail->CommsRail->Rail->Number); -+ -+ xmtrRail->procfs_root = proc_mkdir (str, xmtrRail->Xmtr->procfs_root); -+ -+ if ((p = create_proc_entry ("stats", 0, xmtrRail->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_xmtr_rail_stats; -+ p->data = xmtrRail; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+ep_procfs_xmtr_del_rail(EP_XMTR_RAIL *xmtrRail) -+{ -+ char str[32]; -+ sprintf(str,"rail%d",xmtrRail->CommsRail->Rail->Number); -+ -+ remove_proc_entry ("stats", xmtrRail->procfs_root); -+ -+ remove_proc_entry (str, xmtrRail->Xmtr->procfs_root); -+} -+ -+void -+ep_procfs_rcvr_xmtr_init(void) -+{ -+ ep_procfs_rcvr_root = proc_mkdir ("rcvr", ep_procfs_root); -+ ep_procfs_xmtr_root = proc_mkdir ("xmtr", ep_procfs_root); -+} -+ -+void -+ep_procfs_rcvr_xmtr_fini(void) -+{ -+ remove_proc_entry ("rcvr", ep_procfs_root); -+ remove_proc_entry ("xmtr", ep_procfs_root); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/kalloc.c linux-2.6.9/drivers/net/qsnet/ep/kalloc.c ---- clean/drivers/net/qsnet/ep/kalloc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kalloc.c 2004-12-14 05:19:23.000000000 -0500 -@@ -0,0 +1,677 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kalloc.c,v 1.19 2004/12/14 10:19:23 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kalloc.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+ -+static void -+HashInPool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ int idx0 = HASH (pool->Handle.nmh_nmd.nmd_addr); -+ int idx1 = HASH (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len); -+ -+ list_add (&pool->HashBase, &alloc->HashBase[idx0]); -+ list_add (&pool->HashTop, &alloc->HashTop[idx1]); -+} -+ -+static void -+HashOutPool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ list_del (&pool->HashBase); -+ list_del (&pool->HashTop); -+} -+ -+static EP_POOL * -+LookupPool (EP_ALLOC *alloc, EP_ADDR addr) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &alloc->HashBase[HASH(addr)]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ if (pool->Handle.nmh_nmd.nmd_addr <= addr && addr < (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len)) -+ return (pool); -+ } -+ -+ list_for_each (el, &alloc->HashTop[HASH(addr)]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashTop); -+ -+ if (pool->Handle.nmh_nmd.nmd_addr <= addr && addr < (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len)) -+ return (pool); -+ } -+ -+ return (NULL); -+} -+ -+static EP_POOL * -+AllocatePool (EP_ALLOC *alloc, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr) -+{ -+ EP_ADDR base = 0; -+ EP_POOL *pool; -+ EP_RAIL *rail; -+ int i, railmask = 0; -+ struct list_head *el; -+ -+ KMEM_ZALLOC (pool, EP_POOL *, sizeof (EP_POOL), !(attr & EP_NO_SLEEP)); -+ -+ if (pool == NULL) -+ return (NULL); -+ -+ if (addr != 0) -+ base = addr; -+ else -+ { -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ { -+ KMEM_ZALLOC (pool->Bitmaps[i - LN2_MIN_SIZE], bitmap_t *, BT_BITOUL(1 << (LN2_MAX_SIZE-i)) * sizeof (bitmap_t), !(attr & EP_NO_SLEEP)); -+ if (pool->Bitmaps[i - LN2_MIN_SIZE] == NULL) -+ goto failed; -+ } -+ -+ if ((base = ep_rmalloc (alloc->ResourceMap, size, !(attr & EP_NO_SLEEP))) == 0) -+ goto failed; -+ } -+ -+ switch (alloc->Type) -+ { -+ case EP_ALLOC_TYPE_PRIVATE_SDRAM: -+ rail = alloc->Data.Private.Rail; -+ -+ if ((pool->Buffer.Sdram = rail->Operations.SdramAlloc (rail, base, size)) == 0) -+ goto failed; -+ -+ ep_perrail_sdram_map (rail, base, pool->Buffer.Sdram, size, perm, attr); -+ -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ break; -+ -+ case EP_ALLOC_TYPE_PRIVATE_MAIN: -+ KMEM_GETPAGES(pool->Buffer.Ptr, unsigned long, btop (size), !(attr & EP_NO_SLEEP)); -+ if (pool->Buffer.Ptr == 0) -+ goto failed; -+ -+ ep_perrail_kaddr_map (alloc->Data.Private.Rail, base, pool->Buffer.Ptr, size, perm, attr); -+ -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ break; -+ -+ case EP_ALLOC_TYPE_SHARED_MAIN: -+ KMEM_GETPAGES(pool->Buffer.Ptr, unsigned long, btop (size), !(attr & EP_NO_SLEEP)); -+ if (pool->Buffer.Ptr == 0) -+ goto failed; -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ ep_perrail_kaddr_map (rail, base, pool->Buffer.Ptr, size, perm, attr); -+ -+ railmask |= (1 << rail->Number); -+ } -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ pool->Handle.nmh_nmd.nmd_attr = EP_NMD_ATTR (alloc->Data.Shared.System->Position.pos_nodeid, railmask); -+ -+ ep_nmh_insert (&alloc->Data.Shared.System->MappingTable, &pool->Handle); -+ break; -+ -+ default: -+ goto failed; -+ } -+ -+ return (pool); -+ -+ failed: -+ if (addr == 0 && base) -+ ep_rmfree (alloc->ResourceMap, size, base); -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ if (pool->Bitmaps[i - LN2_MIN_SIZE] != NULL) -+ KMEM_FREE (pool->Bitmaps[i - LN2_MIN_SIZE], BT_BITOUL(1 << (LN2_MAX_SIZE - i)) * sizeof (bitmap_t)); -+ -+ KMEM_FREE (pool, sizeof (EP_POOL)); -+ return (NULL); -+} -+ -+static void -+FreePool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ struct list_head *el; -+ int i; -+ -+ switch (alloc->Type) -+ { -+ case EP_ALLOC_TYPE_PRIVATE_SDRAM: -+ ep_perrail_unmap (alloc->Data.Private.Rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ alloc->Data.Private.Rail->Operations.SdramFree (alloc->Data.Private.Rail, pool->Buffer.Sdram, pool->Handle.nmh_nmd.nmd_len); -+ break; -+ -+ case EP_ALLOC_TYPE_PRIVATE_MAIN: -+ ep_perrail_unmap (alloc->Data.Private.Rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ KMEM_FREEPAGES (pool->Buffer.Ptr, btop (pool->Handle.nmh_nmd.nmd_len)); -+ break; -+ -+ case EP_ALLOC_TYPE_SHARED_MAIN: -+ ep_nmh_remove (&alloc->Data.Shared.System->MappingTable, &pool->Handle); -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ ep_perrail_unmap (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ } -+ -+ KMEM_FREEPAGES (pool->Buffer.Ptr, btop (pool->Handle.nmh_nmd.nmd_len)); -+ break; -+ } -+ -+ if (pool->Bitmaps[0]) -+ { -+ ep_rmfree (alloc->ResourceMap, pool->Handle.nmh_nmd.nmd_len, pool->Handle.nmh_nmd.nmd_addr); -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ KMEM_FREE (pool->Bitmaps[i - LN2_MIN_SIZE], BT_BITOUL(1 << (LN2_MAX_SIZE - i)) * sizeof (bitmap_t)); -+ } -+ -+ KMEM_FREE (pool, sizeof (EP_POOL)); -+} -+ -+static int -+AddRail (EP_ALLOC *alloc, EP_RAIL *rail) -+{ -+ struct list_head *el; -+ EP_RAIL_ENTRY *l; -+ unsigned long flags; -+ int i; -+ -+ ASSERT (alloc->Type == EP_ALLOC_TYPE_SHARED_MAIN); -+ -+ KMEM_ZALLOC (l, EP_RAIL_ENTRY *, sizeof (EP_RAIL_ENTRY), 1); -+ -+ if (l == NULL) -+ return (ENOMEM); -+ -+ l->Rail = rail; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_perrail_kaddr_map (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Buffer.Ptr, -+ pool->Handle.nmh_nmd.nmd_len, EP_PERM_WRITE, EP_NO_SLEEP); -+ -+ pool->Handle.nmh_nmd.nmd_attr |= EP_NMD_ATTR (0, 1 << rail->Number); -+ } -+ } -+ -+ list_add (&l->Link, &alloc->Data.Shared.Rails); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ return (0); -+} -+ -+static void -+RemoveRail (EP_ALLOC *alloc, EP_RAIL *rail) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_perrail_unmap (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ pool->Handle.nmh_nmd.nmd_attr &= ~EP_NMD_ATTR (0, 1 << rail->Number); -+ } -+ } -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL_ENTRY *tmp = list_entry (el, EP_RAIL_ENTRY, Link); -+ if (tmp->Rail == rail) -+ { -+ list_del (el); -+ KMEM_FREE(tmp, sizeof (EP_RAIL_ENTRY)); -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+} -+ -+static EP_POOL * -+AllocateBlock (EP_ALLOC *alloc, unsigned size, EP_ATTRIBUTE attr, int *offset) -+{ -+ int block, j, k; -+ unsigned long flags; -+ EP_POOL *pool; -+ -+ -+ if (size > MAX_SIZE) -+ { -+ if ((attr & EP_NO_ALLOC) || (pool = AllocatePool (alloc, 0, size, alloc->Perm, attr)) == NULL) -+ return (NULL); -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ HashInPool (alloc, pool); -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ *offset = 0; -+ -+ return pool; -+ } -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ /* Round up size to next power of 2 */ -+ for (k = LN2_MIN_SIZE; (1 << k) < size; k++) -+ ; -+ -+ /* k now has ln2 of the size to allocate. */ -+ /* find the free list with the smallest block we can use*/ -+ for (j = k; j <= LN2_MAX_SIZE && list_empty (&alloc->Freelists[j - LN2_MIN_SIZE]); j++) -+ ; -+ -+ /* j has ln2 of the smallest size block we can use */ -+ if (j < LN2_MAX_SIZE) -+ { -+ int nbits = 1 << (LN2_MAX_SIZE-j); -+ -+ pool = list_entry (alloc->Freelists[j - LN2_MIN_SIZE].next, EP_POOL, Link[j - LN2_MIN_SIZE]); -+ block = (bt_lowbit (pool->Bitmaps[j - LN2_MIN_SIZE], nbits) << j); -+ -+ BT_CLEAR (pool->Bitmaps[j - LN2_MIN_SIZE], block >> j); -+ -+ if (bt_lowbit (pool->Bitmaps[j - LN2_MIN_SIZE], nbits) == -1) -+ list_del (&pool->Link[j - LN2_MIN_SIZE]); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ if ((attr & EP_NO_ALLOC) || (pool = AllocatePool (alloc, 0, MAX_SIZE, alloc->Perm, attr)) == NULL) -+ return (NULL); -+ -+ block = 0; -+ j = LN2_MAX_SIZE; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ HashInPool (alloc, pool); -+ } -+ -+ /* Split it until the buddies are the correct size, putting one -+ * buddy back on the free list and continuing to split the other */ -+ while (--j >= k) -+ { -+ list_add (&pool->Link[j - LN2_MIN_SIZE], &alloc->Freelists[j - LN2_MIN_SIZE]); -+ -+ BT_SET (pool->Bitmaps[j - LN2_MIN_SIZE], block >> j); -+ -+ block += (1 << j); -+ } -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ *offset = block; -+ -+ return (pool); -+} -+ -+static void -+FreeBlock (EP_ALLOC *alloc, EP_ADDR addr, unsigned size) -+{ -+ EP_POOL *pool; -+ int k, block = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ /* Round up size to next power of 2 */ -+ for (k = LN2_MIN_SIZE; (1 << k) < size; k++) -+ ; -+ -+ /* Find the pool containing this block */ -+ pool = LookupPool (alloc, addr); -+ -+ /* It must exist */ -+ ASSERT (pool != NULL); -+ -+ /* If we're freeing a subset of it, then update the bitmaps */ -+ if (size <= MAX_SIZE) -+ { -+ ASSERT (BT_TEST (pool->Bitmaps[k - LN2_MIN_SIZE], (addr - pool->Handle.nmh_nmd.nmd_addr) >> k) == 0); -+ -+ block = addr - pool->Handle.nmh_nmd.nmd_addr; -+ -+ while (k < LN2_MAX_SIZE && BT_TEST (pool->Bitmaps[k - LN2_MIN_SIZE], (block >> k) ^ 1)) -+ { -+ BT_CLEAR (pool->Bitmaps[k - LN2_MIN_SIZE], (block >> k) ^ 1); -+ -+ if (bt_lowbit (pool->Bitmaps[k - LN2_MIN_SIZE], (1 << (LN2_MAX_SIZE - k))) == -1) -+ list_del (&pool->Link[k - LN2_MIN_SIZE]); -+ -+ k++; -+ } -+ } -+ -+ if (k >= LN2_MAX_SIZE) -+ { -+ HashOutPool (alloc, pool); -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ FreePool (alloc, pool); -+ } -+ else -+ { -+ if (bt_lowbit (pool->Bitmaps[k - LN2_MIN_SIZE], (1 << (LN2_MAX_SIZE - k))) == -1) -+ list_add (&pool->Link[k - LN2_MIN_SIZE], &alloc->Freelists[k - LN2_MIN_SIZE]); -+ -+ BT_SET (pool->Bitmaps[k - LN2_MIN_SIZE], block >> k); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ } -+} -+ -+static void -+InitialiseAllocator (EP_ALLOC *alloc, EP_ALLOC_TYPE type, unsigned int perm, EP_RMAP *rmap) -+{ -+ int i; -+ -+ spin_lock_init (&alloc->Lock); -+ -+ alloc->Type = type; -+ alloc->ResourceMap = rmap; -+ alloc->Perm = perm; -+ -+ for (i = 0; i < NHASH; i++) -+ { -+ (&alloc->HashBase[i])->next = &alloc->HashBase[i]; -+ -+ INIT_LIST_HEAD (&alloc->HashBase[i]); -+ INIT_LIST_HEAD (&alloc->HashTop[i]); -+ } -+ -+ for (i = 0; i < NUM_FREELISTS; i++) -+ INIT_LIST_HEAD (&alloc->Freelists[i]); -+} -+ -+static void -+DestroyAllocator (EP_ALLOC *alloc) -+{ -+ struct list_head *el, *next; -+ int i; -+ -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each_safe (el, next, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ printk ("!!DestroyAllocator: pool=%p type=%d addr=%x len=%x\n", pool, alloc->Type, -+ pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ list_del (&pool->HashBase); -+ list_del (&pool->HashTop); -+ -+ // XXXX: FreePool (alloc, pool); -+ } -+ } -+ -+ spin_lock_destroy (&alloc->Lock); -+} -+ -+void -+ep_display_alloc (EP_ALLOC *alloc) -+{ -+ struct list_head *el; -+ int i; -+ int npools = 0; -+ int nbytes = 0; -+ int nfree = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ ep_debugf (DBG_DEBUG, "Kernel comms memory allocator %p type %d\n", alloc, alloc->Type); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_debugf (DBG_DEBUG, " POOL %4x: %p -> %x.%x\n", i, pool, pool->Handle.nmh_nmd.nmd_addr, -+ pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len); -+ -+ npools++; -+ nbytes += pool->Handle.nmh_nmd.nmd_len; -+ } -+ } -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i++) -+ { -+ int n = 0; -+ -+ list_for_each (el, &alloc->Freelists[i - LN2_MIN_SIZE]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, Link[i - LN2_MIN_SIZE]); -+ int nbits = bt_nbits (pool->Bitmaps[i - LN2_MIN_SIZE], 1 << (LN2_MAX_SIZE - i)); -+ -+ n += nbits; -+ nfree += (nbits << i); -+ } -+ -+ if (n != 0) -+ ep_debugf (DBG_DEBUG, " SIZE %5d : num %d\n", (1 << i), n); -+ } -+ ep_debugf (DBG_DEBUG, "%d pools with %d bytes and %d bytes free\n", npools, nbytes, nfree); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+} -+ -+/* per-rail allocators */ -+void -+ep_alloc_init (EP_RAIL *rail) -+{ -+ EP_RMAP *rmap = ep_rmallocmap (EP_PRIVATE_RMAP_SIZE, "PrivateMap", 1); -+ -+ ep_rmfree (rmap, EP_PRIVATE_TOP-EP_PRIVATE_BASE, EP_PRIVATE_BASE); -+ -+ InitialiseAllocator (&rail->ElanAllocator, EP_ALLOC_TYPE_PRIVATE_SDRAM, EP_PERM_ALL, rmap); -+ InitialiseAllocator (&rail->MainAllocator, EP_ALLOC_TYPE_PRIVATE_MAIN, EP_PERM_WRITE, rmap); -+ -+ rail->ElanAllocator.Data.Private.Rail = rail; -+ rail->MainAllocator.Data.Private.Rail = rail; -+} -+ -+void -+ep_alloc_fini (EP_RAIL *rail) -+{ -+ EP_RMAP *rmap = rail->ElanAllocator.ResourceMap; -+ -+ DestroyAllocator (&rail->ElanAllocator); -+ DestroyAllocator (&rail->MainAllocator); -+ -+ ep_rmfreemap (rmap); -+} -+ -+sdramaddr_t -+ep_alloc_memory_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr) -+{ -+ EP_POOL *pool = AllocatePool (&rail->ElanAllocator, addr, size, perm, attr); -+ unsigned long flags; -+ -+ if (pool == NULL) -+ return (0); -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ HashInPool (&rail->ElanAllocator, pool); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ return (pool->Buffer.Sdram); -+} -+ -+void -+ep_free_memory_elan (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ pool = LookupPool (&rail->ElanAllocator, addr); -+ -+ HashOutPool (&rail->ElanAllocator, pool); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ FreePool (&rail->ElanAllocator, pool); -+} -+ -+sdramaddr_t -+ep_alloc_elan (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&rail->ElanAllocator, size, attr, &offset)) == NULL) -+ return (0); -+ -+ *addrp = pool->Handle.nmh_nmd.nmd_addr + offset; -+ -+ return (pool->Buffer.Sdram + offset); -+} -+ -+void -+ep_free_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size) -+{ -+ FreeBlock (&rail->ElanAllocator, addr, size); -+} -+ -+void * -+ep_alloc_main (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&rail->MainAllocator, size, attr, &offset)) == NULL) -+ return (NULL); -+ -+ *addrp = pool->Handle.nmh_nmd.nmd_addr + offset; -+ -+ return ((void *) ((unsigned long) pool->Buffer.Ptr + offset)); -+} -+ -+void -+ep_free_main (EP_RAIL *rail, EP_ADDR addr, unsigned size) -+{ -+ FreeBlock (&rail->MainAllocator, addr, size); -+} -+ -+sdramaddr_t -+ep_elan2sdram (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ sdramaddr_t res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ if ((pool = LookupPool (&rail->ElanAllocator, addr)) == NULL) -+ res = 0; -+ else -+ res = pool->Buffer.Sdram + (addr - pool->Handle.nmh_nmd.nmd_addr); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ return (res); -+} -+ -+void * -+ep_elan2main (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ void *res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->MainAllocator.Lock, flags); -+ if ((pool = LookupPool (&rail->MainAllocator, addr)) == NULL) -+ res = NULL; -+ else -+ res = (void *) ((unsigned long) pool->Buffer.Ptr + (addr - pool->Handle.nmh_nmd.nmd_addr)); -+ spin_unlock_irqrestore (&rail->MainAllocator.Lock, flags); -+ -+ return (res); -+} -+ -+/* shared allocators */ -+int -+ep_shared_alloc_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ return (AddRail (&sys->Allocator, rail)); -+} -+ -+void -+ep_shared_alloc_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ RemoveRail (&sys->Allocator, rail); -+} -+ -+void -+ep_shared_alloc_init (EP_SYS *sys) -+{ -+ EP_RMAP *rmap = ep_rmallocmap (EP_SHARED_RMAP_SIZE, "shared_alloc_map", 1); -+ -+ ep_rmfree (rmap, EP_SHARED_TOP - EP_SHARED_BASE, EP_SHARED_BASE); -+ -+ InitialiseAllocator (&sys->Allocator, EP_ALLOC_TYPE_SHARED_MAIN, EP_PERM_WRITE, rmap); -+ -+ INIT_LIST_HEAD (&sys->Allocator.Data.Shared.Rails); -+ -+ sys->Allocator.Data.Shared.System = sys; -+} -+ -+void -+ep_shared_alloc_fini (EP_SYS *sys) -+{ -+ EP_RMAP *rmap = sys->Allocator.ResourceMap; -+ -+ DestroyAllocator (&sys->Allocator); -+ -+ ep_rmfreemap (rmap); -+} -+ -+void * -+ep_shared_alloc_main (EP_SYS *sys, unsigned size, EP_ATTRIBUTE attr, EP_NMD *nmd) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&sys->Allocator, size, attr, &offset)) == NULL) -+ return (NULL); -+ -+ ep_nmd_subset (nmd, &pool->Handle.nmh_nmd, offset, size); -+ -+ return ((void *) ((unsigned long) pool->Buffer.Ptr + offset)); -+} -+ -+void -+ep_shared_free_main (EP_SYS *sys, EP_NMD *nmd) -+{ -+ FreeBlock (&sys->Allocator, nmd->nmd_addr, nmd->nmd_len); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/kcomm.c linux-2.6.9/drivers/net/qsnet/ep/kcomm.c ---- clean/drivers/net/qsnet/ep/kcomm.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kcomm.c 2005-07-20 08:01:34.000000000 -0400 -@@ -0,0 +1,1447 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm.c,v 1.61.2.2 2005/07/20 12:01:34 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+ -+int MaxSwitchLevels = 5; /* Max 1024 sized machine */ -+ -+static char *NodeStateNames[EP_NODE_NUM_STATES] = -+{ -+ "Disconnected", -+ "Connecting", -+ "Connnected", -+ "LeavingConnected", -+ "LocalPassivate", -+ "RemotePassivate", -+ "Passivated", -+ "Disconnecting", -+}; -+ -+static void -+ep_xid_cache_fill (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->XidLock, flags); -+ -+ cache->Current = sys->XidNext; -+ cache->Last = cache->Current + EP_XID_CACHE_CHUNKS-1; -+ -+ sys->XidNext += EP_XID_CACHE_CHUNKS; -+ -+ spin_unlock_irqrestore (&sys->XidLock, flags); -+} -+ -+EP_XID -+ep_xid_cache_alloc (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ EP_XID xid; -+ -+ if (cache->Current == cache->Last) -+ ep_xid_cache_fill (sys, cache); -+ -+ xid.Generation = sys->XidGeneration; -+ xid.Handle = cache->Handle; -+ xid.Unique = cache->Current++; -+ -+ return (xid); -+} -+ -+void -+ep_xid_cache_init (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ /* Stall manager thread - it doesn't lock the XidCacheList */ -+ ep_kthread_stall (&sys->ManagerThread); -+ -+ cache->Handle = ++sys->XidHandle; -+ -+ list_add_tail (&cache->Link, &sys->XidCacheList); -+ -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+void -+ep_xid_cache_destroy (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ /* Stall manager thread - it doesn't lock the XidCacheList */ -+ ep_kthread_stall (&sys->ManagerThread); -+ -+ list_del (&cache->Link); -+ -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+EP_XID_CACHE * -+ep_xid_cache_find (EP_SYS *sys, EP_XID xid) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &sys->XidCacheList) { -+ EP_XID_CACHE *cache = list_entry (el, EP_XID_CACHE, Link); -+ -+ if (sys->XidGeneration == xid.Generation && cache->Handle == xid.Handle) -+ return (cache); -+ } -+ -+ return (NULL); -+} -+ -+static int -+MsgBusy (EP_RAIL *rail, EP_OUTPUTQ *outputq, int slotNum) -+{ -+ switch (rail->Operations.OutputQState (rail, outputq, slotNum)) -+ { -+ case EP_OUTPUTQ_BUSY: /* still busy */ -+ return 1; -+ -+ case EP_OUTPUTQ_FAILED: /* NACKed */ -+ { -+#if defined(DEBUG_PRINTF) -+ EP_MANAGER_MSG *msg = rail->Operations.OutputQMsg (rail, outputq, slotNum); -+ -+ EPRINTF4 (DBG_MANAGER, "%s: kcomm msg %d type %d to %d failed\n", rail->Name, slotNum, msg->Hdr.Type, msg->Hdr.DestId); -+#endif -+ break; -+ } -+ -+ case EP_OUTPUTQ_FINISHED: /* anything else is finished */ -+ break; -+ } -+ -+ return 0; -+} -+ -+int -+ep_send_message (EP_RAIL *rail, int nodeId, int type, EP_XID xid, EP_MANAGER_MSG_BODY *body) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ int n = EP_MANAGER_OUTPUTQ_SLOTS; -+ int slotNum; -+ int rnum; -+ EP_RAIL *msgRail; -+ EP_MANAGER_MSG *msg; -+ unsigned long flags; -+ -+ ASSERT (! EP_XID_INVALID (xid)); -+ -+ if ((rnum = ep_pickRail (node->ConnectedRails)) >= 0) -+ msgRail = sys->Rails[rnum]; -+ else -+ { -+ if (EP_MANAGER_MSG_TYPE_CONNECTED(type)) -+ { -+ ep_debugf (DBG_MANAGER, "%s: no rails available, trying to send type %d to %d\n", rail->Name, type, nodeId); -+ return -EHOSTDOWN; -+ } -+ -+ ep_debugf (DBG_MANAGER, "%s: no rails connected to %d - using receiving rail\n", rail->Name, nodeId); -+ -+ msgRail = rail; -+ } -+ -+ -+ spin_lock_irqsave (&msgRail->ManagerOutputQLock, flags); -+ -+ slotNum = msgRail->ManagerOutputQNextSlot; -+ -+ while (n-- > 0 && MsgBusy (msgRail, msgRail->ManagerOutputQ, slotNum)) /* search for idle message buffer */ -+ { -+ if (++(msgRail->ManagerOutputQNextSlot) == EP_MANAGER_OUTPUTQ_SLOTS) -+ msgRail->ManagerOutputQNextSlot = 0; -+ -+ slotNum = msgRail->ManagerOutputQNextSlot; -+ } -+ -+ if (n == 0) /* all message buffers busy */ -+ { -+ spin_unlock_irqrestore (&msgRail->ManagerOutputQLock, flags); -+ -+ ep_debugf (DBG_MANAGER, "%s: all message buffers busy: trying to send type %d to %d\n", msgRail->Name, type, nodeId); -+ return -EBUSY; -+ } -+ -+ msg = msgRail->Operations.OutputQMsg (msgRail, msgRail->ManagerOutputQ, slotNum); -+ -+ EPRINTF7 (DBG_MANAGER, "%s: ep_send_message: type=%d nodeId=%d rail=%d xid=%08x.%08x.%016llx\n", -+ msgRail->Name, type, nodeId, rail->Number, xid.Generation, xid.Handle, (long long) xid.Unique); -+ -+ msg->Hdr.Version = EP_MANAGER_MSG_VERSION; -+ msg->Hdr.Type = type; -+ msg->Hdr.Rail = rail->Number; -+ msg->Hdr.NodeId = msgRail->Position.pos_nodeid; -+ msg->Hdr.DestId = nodeId; -+ msg->Hdr.Xid = xid; -+ msg->Hdr.Checksum = 0; -+ -+ if (body) bcopy (body, &msg->Body, sizeof (EP_MANAGER_MSG_BODY)); -+ -+ msg->Hdr.Checksum = CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE); -+ -+ if (msgRail->Operations.OutputQSend (msgRail, msgRail->ManagerOutputQ, slotNum, EP_MANAGER_MSG_SIZE, -+ nodeId, EP_SYSTEMQ_MANAGER, EP_MANAGER_OUTPUTQ_RETRIES) < 0) -+ IncrStat (msgRail, SendMessageFailed); -+ -+ if (++(msgRail->ManagerOutputQNextSlot) == EP_MANAGER_OUTPUTQ_SLOTS) /* check this one last next time */ -+ msgRail->ManagerOutputQNextSlot = 0; -+ -+ spin_unlock_irqrestore (&msgRail->ManagerOutputQLock, flags); -+ -+ return 0; -+} -+ -+void -+ep_panic_node (EP_SYS *sys, int nodeId, unsigned char *reason) -+{ -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ EP_MANAGER_MSG_BODY body; -+ EP_XID xid; -+ kcondvar_t sleep; -+ int rnum; -+ unsigned long flags; -+ -+ if (nodeId > sys->Position.pos_nodes) -+ return; -+ -+ strncpy (body.PanicReason, reason, sizeof (body.PanicReason)); -+ -+ kcondvar_init (&sleep); -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ for (;;) -+ { -+ if (node->ConnectedRails == 0) -+ break; -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (node->ConnectedRails & (1 << rnum)) -+ break; -+ -+ xid = ep_xid_cache_alloc(sys, &sys->Rails[rnum]->XidCache); -+ -+ if (ep_send_message (sys->Rails[rnum], nodeId, EP_MANAGER_MSG_TYPE_REMOTE_PANIC, xid, &body) == 0) -+ break; -+ -+ if (kcondvar_timedwaitsig (&sleep, &sys->NodeLock, &flags, lbolt + hz) == CV_RET_SIGPENDING) -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ kcondvar_destroy (&sleep); -+} -+ -+static void -+ProcessNeterrRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: process neterr request - node %d cookies %llx %llx\n", rail->Name, msg->Hdr.NodeId, (long long)msg->Body.Cookies[0], (long long)msg->Body.Cookies[1]); -+ -+ rail->Operations.NeterrFixup (rail, msg->Hdr.NodeId, msg->Body.Cookies); -+ -+ ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_NETERR_RESPONSE, msg->Hdr.Xid, &msg->Body); -+} -+ -+ -+static void -+ProcessNeterrResponse (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ unsigned long flags; -+ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: process neterr response - node %d cookies %llx %llx\n", rail->Name, msg->Hdr.NodeId, (long long)msg->Body.Cookies[0], (long long)msg->Body.Cookies[1]); -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (EP_XIDS_MATCH (nodeRail->MsgXid, msg->Hdr.Xid)) -+ { -+ EP_INVALIDATE_XID (nodeRail->MsgXid); -+ -+ if (nodeRail->NetworkErrorCookies[0] != 0 && nodeRail->NetworkErrorCookies[0] == msg->Body.Cookies[0]) -+ nodeRail->NetworkErrorCookies[0] = 0; -+ -+ if (nodeRail->NetworkErrorCookies[1] != 0 && nodeRail->NetworkErrorCookies[1] == msg->Body.Cookies[1]) -+ nodeRail->NetworkErrorCookies[1] = 0; -+ -+ if (nodeRail->NetworkErrorCookies[0] == 0 && nodeRail->NetworkErrorCookies[1] == 0) -+ nodeRail->NetworkErrorState &= ~EP_NODE_NETERR_ATOMIC_PACKET; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+ -+static void -+ProcessGetNodeState (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ unsigned int service = msg->Body.Service; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessGetNodeState: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState ? " (NetworkError)" : ""); -+ -+ msg->Body.NodeState.State = nodeRail->State; -+ msg->Body.NodeState.NetworkErrorState = nodeRail->NetworkErrorState; -+ msg->Body.NodeState.Railmask = ep_rcvr_railmask (rail->System, service); -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE, msg->Hdr.Xid, &msg->Body) < 0) -+ printk ("%s: get node state for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+} -+ -+static void -+ProcessFlushRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessFlushRequest: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState ? " (NetworkError)" : ""); -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_REMOTE_PASSIVATE: -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; /* retransmit our flush request quickly */ -+ EPRINTF3 (DBG_MANAGER, "%s: ProcessFlushRequest: NextRunTime -> %lx (%lx)\n", rail->Name, nodeRail->NextRunTime, lbolt); -+ /* DROPTHROUGH */ -+ -+ case EP_NODE_PASSIVATED: -+ case EP_NODE_DISCONNECTED: -+ if (nodeRail->NetworkErrorState != 0) -+ break; -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE, msg->Hdr.Xid, NULL) < 0) -+ printk ("%s: flush request for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+ break; -+ -+ default: -+ EPRINTF4 (DBG_MANAGER, "%s: flush request for %s[%d] - node not in approriate state - %s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, NodeStateNames[nodeRail->State]); -+ break; -+ } -+} -+ -+static void -+ProcessFlushResponse (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail= &rail->Nodes[msg->Hdr.NodeId]; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessFlushResponse: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], EP_XIDS_MATCH (nodeRail->MsgXid, msg->Hdr.Xid) ? " (XIDS match)" : ""); -+ -+ if (nodeRail->State == EP_NODE_REMOTE_PASSIVATE && EP_XIDS_MATCH(nodeRail->MsgXid, msg->Hdr.Xid)) -+ { -+ EP_INVALIDATE_XID (nodeRail->MsgXid); -+ -+ printk ("%s: flush response from %d - move to passivated list\n", rail->Name, msg->Hdr.NodeId); -+ list_del (&nodeRail->Link); -+ -+ /* Node is now passivated - attempt to failover messages */ -+ list_add_tail (&nodeRail->Link, &rail->PassivatedList); -+ nodeRail->State = EP_NODE_PASSIVATED; -+ } -+ else -+ { -+ printk ("%s: flush response from %d - not passivating (%s) or XIDs mismatch (%llx %llx)\n", rail->Name, -+ msg->Hdr.NodeId, NodeStateNames[nodeRail->State], (long long) nodeRail->MsgXid.Unique, (long long) msg->Hdr.Xid.Unique); -+ } -+} -+ -+static void -+ProcessMapNmdRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_SYS *sys = rail->System; -+ EP_MAP_NMD_BODY *msgBody = &msg->Body.MapNmd; -+ int i; -+ -+ EPRINTF4 (DBG_MANAGER, "%s: Map NMD request from %d for %d NMDs to railmask %x\n", rail->Name, msg->Hdr.NodeId, msgBody->nFrags, msgBody->Railmask); -+ -+ for (i = 0; i < msgBody->nFrags; i++) -+ ep_nmd_map_rails (sys, &msgBody->Nmd[i], msgBody->Railmask); -+ -+ /* Must flush TLBs before responding */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (sys->Rails[i] && sys->Rails[i]->TlbFlushRequired) -+ ep_perrail_dvma_sync (sys->Rails[i]); -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE, msg->Hdr.Xid, &msg->Body) < 0) -+ printk ("%s: map nmd request for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+} -+ -+static void -+ProcessXidMessage (EP_RAIL *msgRail, EP_MANAGER_MSG *msg, EP_XID xid) -+{ -+ EP_XID_CACHE *xidCache = ep_xid_cache_find (msgRail->System, xid); -+ -+ EPRINTF6 (DBG_MANAGER, "%s: ProcessXidMessage: XID=%08x.%0x8.%016llx -> %p(%p)\n", -+ msgRail->Name, xid.Generation, xid.Handle, (long long) xid.Unique, -+ xidCache ? xidCache->MessageHandler : 0, xidCache ? xidCache->Arg : 0); -+ -+ if (xidCache != NULL) -+ xidCache->MessageHandler (xidCache->Arg, msg); -+} -+ -+static void -+ProcessMessage (EP_RAIL *msgRail, void *arg, void *msgbuf) -+{ -+ EP_SYS *sys = msgRail->System; -+ EP_MANAGER_MSG *msg = (EP_MANAGER_MSG *) msgbuf; -+ uint16_t csum = msg->Hdr.Checksum; -+ EP_RAIL *rail; -+ -+ if (msg->Hdr.Version != EP_MANAGER_MSG_VERSION) -+ return; -+ -+ msg->Hdr.Checksum= 0; -+ if (CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE) != csum) -+ { -+ printk ("%s: checksum failed on msg from %d (%d) (%x != %x) ?\n", msgRail->Name, msg->Hdr.NodeId, msg->Hdr.Type, csum, CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE)); -+ return; -+ } -+ -+ if ((rail = sys->Rails[msg->Hdr.Rail]) == NULL) -+ { -+ printk ("%s: rail no longer exists for msg from %d?\n", msgRail->Name, msg->Hdr.NodeId); -+ return; -+ } -+ -+ EPRINTF7 (DBG_MANAGER, "%s: ProcessMessage (%s) type=%d node=%d XID=%08x.%0x8.%016llx\n", -+ msgRail->Name, rail->Name, msg->Hdr.Type, msg->Hdr.NodeId, -+ msg->Hdr.Xid.Generation, msg->Hdr.Xid.Handle, (long long)msg->Hdr.Xid.Unique); -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_REMOTE_PANIC: -+ msg->Body.PanicReason[EP_PANIC_STRLEN] = '\0'; /* ensure string terminated */ -+ -+ printk ("%s: remote panic call from elan node %d - %s\n", msgRail->Name, msg->Hdr.NodeId, msg->Body.PanicReason); -+ panic ("ep: remote panic request\n"); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_NETERR_REQUEST: -+ ProcessNeterrRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_NETERR_RESPONSE: -+ ProcessNeterrResponse (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FLUSH_REQUEST: -+ ProcessFlushRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE: -+ ProcessFlushResponse (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST: -+ ProcessMapNmdRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST: -+ ProcessXidMessage (msgRail, msg, msg->Body.Failover.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE: -+ ProcessGetNodeState (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ default: -+ printk ("%s: Unknown message type %d from %d\n", msgRail->Name, msg->Hdr.Type, msg->Hdr.NodeId); -+ break; -+ } -+} -+ -+ -+static void -+ManagerQueueEvent (EP_RAIL *rail, void *arg) -+{ -+ ep_kthread_schedule ((EP_KTHREAD *) arg, lbolt); -+} -+ -+void -+UpdateConnectionState (EP_RAIL *rail, statemap_t *map) -+{ -+ EP_SYS *sys = rail->System; -+ bitmap_t seg; -+ int offset, nodeId; -+ unsigned long flags; -+ -+ while ((offset = statemap_findchange (map, &seg, 1)) >= 0) -+ { -+ for (nodeId = offset; nodeId < (offset + BT_NBIPUL) && nodeId < rail->Position.pos_nodes; nodeId++) -+ { -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[nodeId]; -+ -+ if (statemap_getbits (map, nodeId, 1)) -+ { -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_DISCONNECTED: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Disconnected \n", rail->Name, nodeId); -+ break; -+ -+ case EP_NODE_CONNECTING: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Connect\n", rail->Name, nodeId); -+ -+ /* load the route table entry *before* setting the state -+ * to connected, since DMA's can be initiated as soon as -+ * the node is marked as connected */ -+ rail->Operations.LoadNodeRoute (rail, nodeId); -+ -+ nodeRail->State = EP_NODE_CONNECTED; -+ -+ statemap_setbits (rail->NodeSet, nodeId, 1, 1); -+ if (statemap_getbits (sys->NodeSet, nodeId, 1) == 0) -+ statemap_setbits (sys->NodeSet, nodeId, 1, 1); -+ -+ /* Add to rails connected to this node */ -+ node->ConnectedRails |= (1 << rail->Number); -+ -+ /* Finally lower the per-node context filter */ -+ rail->Operations.LowerFilter (rail, nodeId); -+ break; -+ -+ case EP_NODE_LEAVING_CONNECTED: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Local Passivate\n", rail->Name, nodeId); -+ -+ /* Raise the per-node context filter */ -+ rail->Operations.RaiseFilter (rail, nodeId); -+ -+ /* If it's resolving network errors it will be on the NodeNeterrList, -+ * remove if from this list before placing it on the LocalPassivateList -+ * as we'll resolve the network error later in RemotePassivate */ -+ if (nodeRail->NetworkErrorState) -+ list_del (&nodeRail->Link); -+ -+ list_add_tail (&nodeRail->Link, &rail->LocalPassivateList); -+ nodeRail->State = EP_NODE_LOCAL_PASSIVATE; -+ -+ /* Remove from rails connected to this node */ -+ node->ConnectedRails &= ~(1 << rail->Number); -+ break; -+ -+ default: -+ printk ("%s: Node %d - in NodeChangeMap with state %d\n", rail->Name, nodeId, nodeRail->State); -+ panic ("Node in NodeChangeMap with invalid state\n"); -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ } -+ } -+ } -+} -+ -+void -+ProgressNetworkError (EP_RAIL *rail, EP_NODE_RAIL *nodeRail) -+{ -+ EP_SYS *sys = rail->System; -+ int nodeId = nodeRail - rail->Nodes; -+ EP_MANAGER_MSG_BODY msg; -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_REMOTE_PASSIVATE); -+ -+ if (BEFORE (lbolt, nodeRail->NextRunTime)) -+ return; -+ -+ if (nodeRail->NetworkErrorState & EP_NODE_NETERR_DMA_PACKET) -+ nodeRail->NetworkErrorState &= ~EP_NODE_NETERR_DMA_PACKET; -+ -+ if (nodeRail->NetworkErrorState & EP_NODE_NETERR_ATOMIC_PACKET) -+ { -+ if (EP_XID_INVALID (nodeRail->MsgXid)) -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ msg.Cookies[0] = nodeRail->NetworkErrorCookies[0]; -+ msg.Cookies[1] = nodeRail->NetworkErrorCookies[1]; -+ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: progress neterr - node %d cookies %llx %llx\n", rail->Name, nodeId, (long long)msg.Cookies[0], (long long)msg.Cookies[1]); -+ -+ if (ep_send_message (rail, nodeId, EP_MANAGER_MSG_TYPE_NETERR_REQUEST, nodeRail->MsgXid, &msg) == 0) -+ nodeRail->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+} -+ -+long -+ProgressNodeLists (EP_RAIL *rail, long nextRunTime) -+{ -+ EP_SYS *sys = rail->System; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ list_for_each_safe (el, nel, &rail->NetworkErrorList) { -+ EP_NODE_RAIL *nodeRail = list_entry (el, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ -+ ProgressNetworkError (rail, nodeRail); -+ -+ if (nodeRail->NetworkErrorState == 0) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: lower context filter for node %d due to network error\n", rail->Name, nodeId); -+ -+ rail->Operations.LowerFilter (rail, nodeId); -+ -+ list_del (&nodeRail->Link); -+ continue; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (! list_empty (&rail->LocalPassivateList)) -+ { -+ EPRINTF1 (DBG_MANAGER, "%s: Locally Passivating Nodes\n", rail->Name); -+ -+ /* We have disconnected from some nodes or have left ourselves -+ * flush through all communications and determine whether we -+ * need to perform rail failover */ -+ rail->Operations.FlushFilters (rail); -+ -+ ep_call_callbacks (rail, EP_CB_FLUSH_FILTERING, rail->NodeSet); -+ -+ rail->Operations.FlushQueues (rail); -+ -+ ep_call_callbacks (rail, EP_CB_FLUSH_FLUSHING, rail->NodeSet); -+ -+ while (! list_empty (&rail->LocalPassivateList)) -+ { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->LocalPassivateList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ -+ list_del (&nodeRail->Link); -+ -+ rail->Operations.UnloadNodeRoute (rail, nodeId); -+ -+ if (nodeRail->NetworkErrorState == 0 && nodeRail->MessageState == 0) -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Disconnecting\n", rail->Name, nodeId); -+ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ } -+ else -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Remote Passivate\n", rail->Name, nodeId); -+ -+ list_add_tail (&nodeRail->Link, &rail->RemotePassivateList); -+ nodeRail->State = EP_NODE_REMOTE_PASSIVATE; -+ -+ if (nodeRail->NetworkErrorState == 0) -+ nodeRail->NextRunTime = lbolt; -+ } -+ } -+ -+ ep_call_callbacks (rail, EP_CB_PASSIVATED, rail->NodeSet); -+ } -+ -+ list_for_each_safe (el, nel, &rail->RemotePassivateList) { -+ EP_NODE_RAIL *nodeRail = list_entry (el, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ if (node->ConnectedRails == 0) /* no rails connected to this node (anymore) */ -+ { -+ /* Remove from this list */ -+ list_del (&nodeRail->Link); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no rails, Remote Passivate -> Disconnecting\n", rail->Name, nodeId); -+ -+ /* transition towards disconnected */ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ EPRINTF6 (DBG_MANAGER, "%s: Node %d - %s NetworkErrorState=%x NextRunTime=%lx (%lx)\n", -+ rail->Name, nodeId, NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState, -+ nodeRail->NextRunTime, nextRunTime); -+ -+ if (nodeRail->NetworkErrorState) -+ { -+ ProgressNetworkError (rail, nodeRail); -+ } -+ else if (! BEFORE (lbolt, nodeRail->NextRunTime)) -+ { -+ if (EP_XID_INVALID (nodeRail->MsgXid)) -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ if (ep_send_message (rail, nodeId, EP_MANAGER_MSG_TYPE_FLUSH_REQUEST, nodeRail->MsgXid, NULL) == 0) -+ nodeRail->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ -+ if (! list_empty (&rail->PassivatedList)) -+ { -+ ep_call_callbacks (rail, EP_CB_FAILOVER, rail->NodeSet); -+ -+ list_for_each_safe (el, nel, &rail->PassivatedList) { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->PassivatedList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ ASSERT (nodeRail->NetworkErrorState == 0); -+ -+ if (node->ConnectedRails == 0) -+ { -+ /* Remove from this list */ -+ list_del (&nodeRail->Link); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no rails, Passivated -> Disconnecting\n", rail->Name, nodeId); -+ -+ /* transition towards disconnected */ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ EPRINTF6 (DBG_MANAGER, "%s: Node %d - %s NetworkErrorState=%x NextRunTime=%lx (%lx)\n", -+ rail->Name, nodeId, NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState, -+ nodeRail->NextRunTime, nextRunTime); -+ -+ if (nodeRail->MessageState == 0) -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no messages, Passivated -> Disconnecting\n", rail->Name,nodeId); -+ -+ list_del (&nodeRail->Link); -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ nodeRail->MessageState = 0; -+ nodeRail->NextRunTime = lbolt + FAILOVER_RETRY_TIME; -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ } -+ -+ if (! list_empty (&rail->DisconnectingList)) -+ { -+ ep_call_callbacks (rail, EP_CB_DISCONNECTING, rail->NodeSet); -+ -+ while (! list_empty (&rail->DisconnectingList)) -+ { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->DisconnectingList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, Disconnecting -> Disconnected\n", rail->Name, nodeId); -+ -+ list_del (&nodeRail->Link); -+ -+ rail->Operations.NodeDisconnected (rail, nodeId); -+ -+ /* Clear the network error state */ -+ nodeRail->NextRunTime = 0; -+ nodeRail->NetworkErrorState = 0; -+ nodeRail->NetworkErrorCookies[0] = 0; -+ nodeRail->NetworkErrorCookies[1] = 0; -+ -+ /* Clear the message state */ -+ nodeRail->MessageState = 0; -+ -+ cm_node_disconnected (rail, nodeId); -+ -+ nodeRail->State = EP_NODE_DISCONNECTED; -+ -+ statemap_setbits (rail->NodeSet, nodeId, 0, 1); -+ -+ if (node->ConnectedRails == 0) -+ statemap_setbits (sys->NodeSet, nodeId, 0, 1); -+ } -+ -+ ep_call_callbacks (rail, EP_CB_DISCONNECTED, rail->NodeSet); -+ } -+ -+ return (nextRunTime); -+} -+ -+void -+DisplayNodes (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ int i, state, count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ for (state = 0; state < EP_NODE_NUM_STATES; state++) -+ { -+ for (count = i = 0; i < rail->Position.pos_nodes; i++) -+ { -+ ASSERT (rail->Nodes[i].State < EP_NODE_NUM_STATES); -+ -+ if (rail->Nodes[i].State == state) -+ if (state != EP_NODE_DISCONNECTED) -+ printk ("%s %d", !count++ ? NodeStateNames[state] : "", i); -+ } -+ if (count) -+ printk ("%s (%d total)\n", state == EP_NODE_DISCONNECTED ? NodeStateNames[state] : "", count); -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+static void -+PositionFound (EP_RAIL *rail, ELAN_POSITION *pos) -+{ -+ EP_SYS *sys = rail->System; -+ struct list_head *el; -+ int i; -+ -+ /* only called from the ep_managage whilst rail->State == EP_RAIL_STATE_STARTED */ -+ ASSERT ( rail->State == EP_RAIL_STATE_STARTED ); -+ -+#if defined(PER_CPU_TIMEOUT) -+ /* -+ * On Tru64 - if we're running in a "funnelled" thread, then we will be -+ * unable to start the per-cpu timeouts, so if we return then eventually -+ * the ep_manager() thread will find the network position and we're -+ * in control of our own destiny. -+ */ -+ if (THREAD_IS_FUNNELED(current_thread())) -+ { -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+ return; -+ } -+#endif -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, pos->pos_nodeid); -+ -+ if (pos->pos_levels > MaxSwitchLevels) -+ { -+ for (i = 0; i < (pos->pos_levels - MaxSwitchLevels); i++) -+ pos->pos_nodes /= pos->pos_arity[i]; -+ -+ for (i = 0; i < MaxSwitchLevels; i++) -+ pos->pos_arity[i] = pos->pos_arity[i + (pos->pos_levels - MaxSwitchLevels)]; -+ -+ pos->pos_levels = MaxSwitchLevels; -+ pos->pos_nodeid = pos->pos_nodeid % pos->pos_nodes; -+ -+ printk ("%s: limiting switch levels to %d\n", rail->Name, MaxSwitchLevels); -+ printk ("%s: nodeid=%d level=%d numnodes=%d\n", rail->Name, pos->pos_nodeid, pos->pos_levels, pos->pos_nodes); -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, pos->pos_nodeid); -+ } -+ -+ if (rail->Position.pos_mode != ELAN_POS_UNKNOWN && rail->Position.pos_nodeid != pos->pos_nodeid) -+ { -+ printk ("%s: NodeId has changed from %d to %d\n", rail->Name, rail->Position.pos_nodeid, pos->pos_nodeid); -+ panic ("ep: PositionFound: NodeId has changed\n"); -+ } -+ -+ if (sys->Position.pos_mode != ELAN_POS_UNKNOWN && (sys->Position.pos_nodeid != pos->pos_nodeid || sys->Position.pos_nodes != pos->pos_nodes)) -+ { -+ printk ("%s: position incompatible - disabling rail\n", rail->Name); -+ rail->State = EP_RAIL_STATE_INCOMPATIBLE; -+ return; -+ } -+ -+ if (sys->Position.pos_mode == ELAN_POS_UNKNOWN) -+ { -+ sys->Position = *pos; -+ sys->NodeSet = statemap_create (pos->pos_nodes); -+ KMEM_ZALLOC (sys->Nodes, EP_NODE *, pos->pos_nodes * sizeof (EP_NODE), 1); -+ } -+ -+ rail->Position = *pos; -+ rail->SwitchBroadcastLevel = pos->pos_levels - 1; -+ rail->State = EP_RAIL_STATE_RUNNING; -+ -+ for (i = 0; i < pos->pos_levels; i++) -+ { -+ rail->SwitchProbeTick[i] = lbolt; -+ rail->SwitchLast[i].uplink = 4; -+ } -+ -+ rail->Operations.PositionFound (rail, pos); -+ -+ INIT_LIST_HEAD (&rail->NetworkErrorList); -+ INIT_LIST_HEAD (&rail->LocalPassivateList); -+ INIT_LIST_HEAD (&rail->RemotePassivateList); -+ INIT_LIST_HEAD (&rail->PassivatedList); -+ INIT_LIST_HEAD (&rail->DisconnectingList); -+ -+ rail->NodeSet = statemap_create (rail->Position.pos_nodes); -+ rail->NodeChangeMap = statemap_create (rail->Position.pos_nodes); -+ rail->NodeChangeTmp = statemap_create (rail->Position.pos_nodes); -+ -+ KMEM_ZALLOC (rail->Nodes, EP_NODE_RAIL *, rail->Position.pos_nodes * sizeof (EP_NODE_RAIL), 1); -+ -+ for (i = 0; i < rail->Position.pos_nodes; i++) -+ { -+ spin_lock_init (&rail->Nodes[i].CookieLock); -+ -+ INIT_LIST_HEAD (&rail->Nodes[i].StalledDmas); -+ -+ rail->Nodes[i].State = EP_NODE_DISCONNECTED; -+ } -+ -+ /* Notify all subsystems that a new rail has been enabled */ -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (subsys->AddRail) -+ subsys->AddRail (subsys, sys, rail); -+ -+ /* XXXX: what to do if the subsystem refused to add the rail ? */ -+ } -+ kmutex_unlock (&sys->SubsysLock); -+ -+ /* Now enable the manager input queue */ -+ ep_enable_inputq (rail, rail->ManagerInputQ); -+} -+ -+static void -+ep_manager (void *arg) -+{ -+ EP_SYS *sys = (EP_SYS *) arg; -+ struct list_head *el; -+ ELAN_POSITION pos; -+ unsigned long flags; -+ -+ kernel_thread_init ("ep_manager"); -+ kernel_thread_become_highpri(); -+ -+ for (;;) -+ { -+ long nextRunTime = lbolt + MSEC2TICKS(CM_THREAD_SCHEDULE_TIMEOUT); -+ -+ list_for_each (el, &sys->ManagedRails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL, ManagerLink); -+ -+ switch (rail->State) -+ { -+ case EP_RAIL_STATE_STARTED: -+ if (ProbeNetwork (rail, &pos) == 0) -+ { -+ PositionFound (rail, &pos); -+ break; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + HZ)) -+ nextRunTime = lbolt + HZ; -+ break; -+ -+ case EP_RAIL_STATE_RUNNING: -+ if (ep_poll_inputq (rail, rail->ManagerInputQ, 100, ProcessMessage, rail) >= 100) -+ nextRunTime = lbolt; -+ -+ /* Handle any nodes which the cluster membership subsystem -+ * has indicated are to begin connecting or disconnecting */ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (! statemap_changed (rail->NodeChangeMap)) -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ else -+ { -+ /* -+ * Take a copy of the statemap, and zero all entries so -+ * we only see new requests next time -+ */ -+ statemap_copy (rail->NodeChangeTmp, rail->NodeChangeMap); -+ statemap_zero (rail->NodeChangeMap); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ UpdateConnectionState (rail, rail->NodeChangeTmp); -+ } -+ -+ nextRunTime = ProgressNodeLists (rail, nextRunTime); -+ -+ if (statemap_changed (rail->NodeSet)) -+ { -+ ep_call_callbacks (rail, EP_CB_NODESET, rail->NodeSet); -+ -+ statemap_clearchanges (rail->NodeSet); -+ } -+ break; -+ -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ break; -+ } -+ } -+ -+ -+ EPRINTF5 (DBG_MANAGER, "ep_manager: sleep now=%lx nextRunTime=%lx (%ld) [%lx (%ld)]\n", -+ lbolt, nextRunTime, nextRunTime ? nextRunTime - lbolt : 0, sys->ManagerThread.next_run, -+ sys->ManagerThread.next_run ? sys->ManagerThread.next_run - lbolt : 0); -+ -+ if (ep_kthread_sleep (&sys->ManagerThread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_kthread_stopped (&sys->ManagerThread); -+ kernel_thread_exit(); -+} -+ -+void -+ep_connect_node (EP_RAIL *rail, int nodeId) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *node = &rail->Nodes[nodeId]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: ep_connect_node: nodeId %d\n", rail->Name, nodeId); -+ -+ ASSERT (node->State == EP_NODE_DISCONNECTED && statemap_getbits (rail->NodeChangeMap, nodeId, 1) == 0); -+ -+ node->State = EP_NODE_CONNECTING; -+ -+ statemap_setbits (rail->NodeChangeMap, nodeId, 1, 1); -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+} -+ -+int -+ep_disconnect_node (EP_RAIL *rail, int nodeId) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *node = &rail->Nodes[nodeId]; -+ int state; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ EPRINTF3 (DBG_MANAGER, "%s: ep_disconnect_node: nodeId %d - %s\n", rail->Name, nodeId, NodeStateNames[node->State]); -+ -+ switch (state = node->State) -+ { -+ case EP_NODE_CONNECTING: -+ statemap_setbits (rail->NodeChangeMap, nodeId, 0, 1); -+ -+ node->State = EP_NODE_DISCONNECTED; -+ break; -+ -+ case EP_NODE_CONNECTED: -+ statemap_setbits (rail->NodeChangeMap, nodeId, 1, 1); -+ -+ node->State = EP_NODE_LEAVING_CONNECTED; -+ break; -+ -+ case EP_NODE_LEAVING_CONNECTED: -+ /* no assert on NodeChangeMap as the map could have been taken but not acted on */ -+ break; -+ -+ default: -+ ASSERT (statemap_getbits (rail->NodeChangeMap, nodeId, 1) == 0); -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (state == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+ -+ return state; -+} -+ -+int -+ep_manager_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ if ((rail->ManagerOutputQ = ep_alloc_outputq (rail, EP_MANAGER_MSG_SIZE, EP_MANAGER_OUTPUTQ_SLOTS)) == NULL) -+ return -ENOMEM; -+ -+ if ((rail->ManagerInputQ = ep_alloc_inputq (rail, EP_SYSTEMQ_MANAGER, EP_MANAGER_MSG_SIZE, EP_MANAGER_INPUTQ_SLOTS, -+ ManagerQueueEvent, &sys->ManagerThread)) == NULL) -+ { -+ ep_free_outputq (rail, rail->ManagerOutputQ); -+ return -ENOMEM; -+ } -+ -+ spin_lock_init (&rail->ManagerOutputQLock); -+ -+ ep_xid_cache_init (sys, &rail->XidCache); -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ list_add_tail (&rail->ManagerLink, &sys->ManagedRails); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ return (0); -+} -+ -+void -+ep_manager_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ if (rail->ManagerInputQ != NULL) -+ { -+ ep_kthread_stall (&sys->ManagerThread); -+ list_del (&rail->ManagerLink); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ ep_xid_cache_destroy (sys, &rail->XidCache); -+ -+ spin_lock_destroy (&rail->ManagerOutputQLock); -+ -+ ep_disable_inputq (rail, rail->ManagerInputQ); -+ ep_free_inputq (rail, rail->ManagerInputQ); -+ ep_free_outputq (rail, rail->ManagerOutputQ); -+ } -+} -+ -+int -+ep_manager_init (EP_SYS *sys) -+{ -+ INIT_LIST_HEAD (&sys->ManagedRails); -+ -+ ep_kthread_init (&sys->ManagerThread); -+ -+ if (kernel_thread_create (ep_manager, (void *) sys) == 0) -+ return (ENOMEM); -+ -+ ep_kthread_started (&sys->ManagerThread); -+ -+ return (0); -+} -+ -+void -+ep_manager_fini (EP_SYS *sys) -+{ -+ ep_kthread_stop (&sys->ManagerThread); -+ ep_kthread_destroy (&sys->ManagerThread); -+} -+ -+int -+ep_sys_init (EP_SYS *sys) -+{ -+ kmutex_init (&sys->SubsysLock); -+ kmutex_init (&sys->StartStopLock); -+ spin_lock_init (&sys->NodeLock); -+ -+ INIT_LIST_HEAD (&sys->Subsystems); -+ -+ /* initialise the xid allocators */ -+ spin_lock_init (&sys->XidLock); -+ INIT_LIST_HEAD (&sys->XidCacheList); -+ -+ /* initially don't know where we are in the network */ -+ sys->Position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* initialise the network mapping descriptor hash tables */ -+ ep_nmh_init (&sys->MappingTable); -+ -+ /* intialise the shared allocators */ -+ ep_shared_alloc_init (sys); -+ -+ /* initialise the dvma space */ -+ ep_dvma_init (sys); -+ -+ /* intiialise the rail manager */ -+ ep_manager_init (sys); -+ -+ /* initialise all subsystems */ -+ cm_init (sys); -+ ep_comms_init (sys); -+ //ep_msgsys_init (sys); -+ -+ return (0); -+} -+ -+void -+ep_sys_fini (EP_SYS *sys) -+{ -+ /* Destroy the subsystems in the reverse order to their creation */ -+ while (! list_empty (&sys->Subsystems)) -+ { -+ EP_SUBSYS *subsys = list_entry (sys->Subsystems.prev, EP_SUBSYS, Link); -+ -+ list_del (&subsys->Link); -+ -+ subsys->Destroy (subsys, sys); -+ } -+ -+ ep_manager_fini(sys); -+ ep_dvma_fini (sys); -+ ep_shared_alloc_fini (sys); -+ -+ ep_nmh_fini (&sys->MappingTable); -+ -+ if (sys->Position.pos_mode != ELAN_POS_UNKNOWN) { -+ statemap_destroy (sys->NodeSet); -+ KMEM_FREE(sys->Nodes, sys->Position.pos_nodes * sizeof (EP_NODE)); -+ } -+ -+ spin_lock_destroy (&sys->XidLock); -+ -+ spin_lock_destroy (&sys->NodeLock); -+ kmutex_destroy (&sys->SubsysLock); -+ kmutex_destroy (&sys->StartStopLock); -+} -+ -+void -+ep_shutdown (EP_SYS *sys) -+{ -+ sys->Shutdown = 1; -+} -+ -+int -+ep_init_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ static int rnum; -+ -+ rail->System = sys; -+ rail->State = EP_RAIL_STATE_UNINITIALISED; -+ rail->Number = rnum++; -+ rail->Position.pos_mode = ELAN_POS_UNKNOWN; -+ rail->Position.pos_nodeid = ELAN_INVALID_NODE; -+ -+ rail->CallbackRegistered = 0; -+ -+ sprintf (rail->Name, "ep%d", rail->Number); -+ -+ /* Initialise externally visible locks */ -+ kmutex_init (&rail->CallbackLock); -+ -+ ep_alloc_init (rail); -+ -+ sys->Rails[rail->Number] = rail; -+ -+ return 0; -+} -+ -+void -+ep_destroy_rail (EP_RAIL *rail) -+{ -+ ASSERT (rail->State == EP_RAIL_STATE_UNINITIALISED); -+ -+ ep_alloc_fini (rail); -+ -+ kmutex_destroy (&rail->CallbackLock); -+ -+ rail->System->Rails[rail->Number] = NULL; -+ -+ rail->Operations.DestroyRail (rail); -+} -+ -+/* We need to traverse the Subsystems lists backwards -+ * but it's not defined in */ -+#define list_for_each_backwards(pos,list) \ -+ for (pos = (list)->prev; pos != (list); \ -+ pos = (pos)->prev) -+ -+void -+__ep_stop_rail (EP_RAIL *rail) -+{ -+ /* called holding the sys->Lock */ -+ EP_SYS *sys = rail->System; -+ struct list_head *el; -+ -+ rail->Operations.StallRail (rail); -+ -+ /* Notify all subsystems that this rail is being stopped */ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each_backwards (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (subsys->RemoveRail) -+ subsys->RemoveRail (subsys, sys, rail); -+ } -+ kmutex_unlock (&sys->SubsysLock); -+ -+ ep_manager_remove_rail (sys, rail); -+ -+ KMEM_FREE (rail->Nodes, rail->Position.pos_nodes * sizeof (EP_NODE_RAIL)); -+ -+ statemap_destroy (rail->NodeChangeTmp); -+ statemap_destroy (rail->NodeChangeMap); -+ statemap_destroy (rail->NodeSet); -+ } -+ -+ ep_dvma_remove_rail (sys, rail); -+ ep_shared_alloc_remove_rail (sys, rail); -+ -+ rail->Operations.StopRail (rail); -+ -+ rail->State = EP_RAIL_STATE_UNINITIALISED; -+} -+ -+void -+ep_stop_rail (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ -+ /* stall ep_manager */ -+ /* and remove the rail from the manaager */ -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ if ( rail->State == EP_RAIL_STATE_STARTED ) -+ ep_manager_remove_rail (sys, rail); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ __ep_stop_rail (rail); -+} -+ -+int -+ep_start_rail (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ -+ ASSERT (rail->State == EP_RAIL_STATE_UNINITIALISED); -+ -+ if (rail->Operations.StartRail (rail) < 0) -+ return -ENXIO; -+ -+ kmutex_lock (&sys->StartStopLock); -+ /* Add this rail to the shared allocator */ -+ if (ep_shared_alloc_add_rail (rail->System, rail)) -+ goto failed; -+ -+ /* Add this rail to dvma kmap */ -+ if (ep_dvma_add_rail (rail->System, rail)) -+ goto failed; -+ -+ /* rail is now started */ -+ rail->State = EP_RAIL_STATE_STARTED; -+ -+ /* notify the rail manager of the new rail */ -+ if (ep_manager_add_rail (rail->System, rail)) -+ goto failed; -+ -+ kmutex_unlock (&sys->StartStopLock); -+ return (ESUCCESS); -+ -+ failed: -+ printk ("%s: start failed\n", rail->Name); -+ kmutex_unlock (&sys->StartStopLock); -+ __ep_stop_rail (rail); -+ -+ return (ENOMEM); -+} -+ -+void -+ep_subsys_add (EP_SYS *sys, EP_SUBSYS *subsys) -+{ -+ kmutex_lock (&sys->SubsysLock); -+ list_add_tail (&subsys->Link, &sys->Subsystems); -+ kmutex_unlock (&sys->SubsysLock); -+} -+ -+void -+ep_subsys_del (EP_SYS *sys, EP_SUBSYS *subsys) -+{ -+ kmutex_lock (&sys->SubsysLock); -+ list_del (&subsys->Link); -+ kmutex_unlock (&sys->SubsysLock); -+} -+ -+EP_SUBSYS * -+ep_subsys_find (EP_SYS *sys, char *name) -+{ -+ struct list_head *el; -+ -+ ASSERT ( !in_interrupt()); -+ -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (! strcmp (subsys->Name, name)) -+ { -+ kmutex_unlock (&sys->SubsysLock); -+ return (subsys); -+ } -+ } -+ -+ kmutex_unlock (&sys->SubsysLock); -+ return (NULL); -+} -+ -+int -+ep_waitfor_nodeid (EP_SYS *sys) -+{ -+ int i, printed = 0; -+ kcondvar_t Sleep; -+ spinlock_t Lock; -+ -+ kcondvar_init (&Sleep); -+ spin_lock_init (&Lock); -+ -+#define TICKS_TO_WAIT (10*hz) -+#define TICKS_PER_LOOP (hz/10) -+ for (i = 0; sys->Position.pos_mode == ELAN_POS_UNKNOWN && i < TICKS_TO_WAIT; i += TICKS_PER_LOOP) -+ { -+ if (! printed++) -+ printk ("ep: waiting for network position to be found\n"); -+ -+ spin_lock (&Lock); -+ kcondvar_timedwait (&Sleep, &Lock, NULL, lbolt + TICKS_PER_LOOP); -+ spin_unlock (&Lock); -+ } -+ -+ if (sys->Position.pos_mode == ELAN_POS_UNKNOWN) -+ printk ("ep: network position not found after waiting\n"); -+ else if (printed) -+ printk ("ep: network position found at nodeid %d\n", sys->Position.pos_nodeid); -+ -+ spin_lock_destroy (&Lock); -+ kcondvar_destroy (&Sleep); -+ -+ return (sys->Position.pos_mode == ELAN_POS_UNKNOWN ? ELAN_INVALID_NODE : sys->Position.pos_nodeid); -+} -+ -+int -+ep_nodeid (EP_SYS *sys) -+{ -+ return (sys->Position.pos_mode == ELAN_POS_UNKNOWN ? ELAN_INVALID_NODE : sys->Position.pos_nodeid); -+} -+ -+int -+ep_numnodes (EP_SYS *sys) -+{ -+ return (sys->Position.pos_nodes); -+} -+ -+void -+ep_fillout_stats(EP_RAIL *r, char *str) -+{ -+ sprintf(str+strlen(str),"SendMessageFailed %lu NeterrAtomicPacket %lu NeterrDmaPacket %lu \n", r->Stats.SendMessageFailed, r->Stats.NeterrAtomicPacket, r->Stats.NeterrDmaPacket); -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(r->Stats,rx), GET_STAT_PER_SEC(r->Stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu MB/sec\n", GET_STAT_TOTAL(r->Stats,rx_len)/ (1024*1024), GET_STAT_PER_SEC(r->Stats,rx_len) / (1024*1024)); -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(r->Stats,tx), GET_STAT_PER_SEC(r->Stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu MB/sec\n", GET_STAT_TOTAL(r->Stats,tx_len)/ (1024*1024), GET_STAT_PER_SEC(r->Stats,tx_len) / (1024*1024)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/kcomm_elan3.c linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan3.c ---- clean/drivers/net/qsnet/ep/kcomm_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan3.c 2004-11-30 07:02:06.000000000 -0500 -@@ -0,0 +1,504 @@ -+ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm_elan3.c,v 1.34 2004/11/30 12:02:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "conf_linux.h" -+ -+extern EP_CODE threadcode_elan3; -+ -+unsigned int -+ep3_create_rails (EP_SYS *sys, unsigned int disabled) -+{ -+ unsigned int rmask = 0; -+ ELAN3_DEV *dev; -+ EP_RAIL *rail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if ((dev = elan3_device (i)) != NULL) -+ { -+ if ((rail = ep3_create_rail (sys, dev)) != NULL) -+ { -+ if (disabled & (1 << rail->Number)) -+ printk ("%s: auto-start of device disabled by configuration\n", rail->Name); -+ else -+ ep_start_rail (rail); -+ -+ ep_procfs_rail_init(rail); -+ -+ rmask |= (1 << rail->Number); -+ } -+ } -+ } -+ -+ return rmask; -+} -+ -+EP_RAIL * -+ep3_create_rail (EP_SYS *sys, ELAN3_DEV *dev) -+{ -+ EP3_RAIL *rail; -+ int res; -+ -+ KMEM_ZALLOC (rail, EP3_RAIL *, sizeof (EP3_RAIL), TRUE); -+ -+ if (rail == NULL) -+ return (EP_RAIL *) NULL; -+ -+ if ((res = ep_init_rail (sys, &rail->Generic)) != 0) -+ { -+ KMEM_FREE (rail, sizeof (EP3_RAIL)); -+ return (EP_RAIL *) NULL; -+ } -+ -+ rail->Device = dev; -+ -+ /* Install our rail operations */ -+ rail->Generic.Operations.DestroyRail = ep3_destroy_rail; -+ rail->Generic.Operations.StartRail = ep3_start_rail; -+ rail->Generic.Operations.StallRail = ep3_stall_rail; -+ rail->Generic.Operations.StopRail = ep3_stop_rail; -+ -+ rail->Generic.Operations.SdramAlloc = ep3_sdram_alloc; -+ rail->Generic.Operations.SdramFree = ep3_sdram_free; -+ rail->Generic.Operations.SdramWriteb = ep3_sdram_writeb; -+ -+ rail->Generic.Operations.KaddrMap = ep3_kaddr_map; -+ rail->Generic.Operations.SdramMap = ep3_sdram_map; -+ rail->Generic.Operations.Unmap = ep3_unmap; -+ -+ rail->Generic.Operations.DvmaReserve = ep3_dvma_reserve; -+ rail->Generic.Operations.DvmaRelease = ep3_dvma_release; -+ rail->Generic.Operations.DvmaSetPte = ep3_dvma_set_pte; -+ rail->Generic.Operations.DvmaReadPte = ep3_dvma_read_pte; -+ rail->Generic.Operations.DvmaUnload = ep3_dvma_unload; -+ rail->Generic.Operations.FlushTlb = ep3_flush_tlb; -+ -+ rail->Generic.Operations.ProbeRoute = ep3_probe_route; -+ rail->Generic.Operations.PositionFound = ep3_position_found; -+ rail->Generic.Operations.CheckPosition = ep3_check_position; -+ rail->Generic.Operations.NeterrFixup = ep3_neterr_fixup; -+ -+ rail->Generic.Operations.LoadSystemRoute = ep3_load_system_route; -+ -+ rail->Generic.Operations.LoadNodeRoute = ep3_load_node_route; -+ rail->Generic.Operations.UnloadNodeRoute = ep3_unload_node_route; -+ rail->Generic.Operations.LowerFilter = ep3_lower_filter; -+ rail->Generic.Operations.RaiseFilter = ep3_raise_filter; -+ rail->Generic.Operations.NodeDisconnected = ep3_node_disconnected; -+ -+ rail->Generic.Operations.FlushFilters = ep3_flush_filters; -+ rail->Generic.Operations.FlushQueues = ep3_flush_queues; -+ -+ rail->Generic.Operations.AllocInputQ = ep3_alloc_inputq; -+ rail->Generic.Operations.FreeInputQ = ep3_free_inputq; -+ rail->Generic.Operations.EnableInputQ = ep3_enable_inputq; -+ rail->Generic.Operations.DisableInputQ = ep3_disable_inputq; -+ rail->Generic.Operations.PollInputQ = ep3_poll_inputq; -+ -+ rail->Generic.Operations.AllocOutputQ = ep3_alloc_outputq; -+ rail->Generic.Operations.FreeOutputQ = ep3_free_outputq; -+ rail->Generic.Operations.OutputQMsg = ep3_outputq_msg; -+ rail->Generic.Operations.OutputQState = ep3_outputq_state; -+ rail->Generic.Operations.OutputQSend = ep3_outputq_send; -+ -+ rail->Generic.Operations.FillOutStats = ep3_fillout_stats; -+ -+ rail->Generic.Devinfo = dev->Devinfo; -+ -+ printk ("%s: connected via elan3 rev%c device %d\n", rail->Generic.Name, -+ 'a' + dev->Devinfo.dev_revision_id, dev->Instance); -+ -+ return (EP_RAIL *) rail; -+} -+ -+void -+ep3_destroy_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ KMEM_FREE (rail, sizeof (EP3_RAIL)); -+} -+ -+static int -+ep3_attach_rail (EP3_RAIL *rail) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ ELAN3_CTXT *ctxt; -+ ELAN_CAPABILITY *cap; -+ int ctx; -+ unsigned long flags; -+ -+ if ((ctxt = elan3_alloc (dev, TRUE)) == (ELAN3_CTXT *) NULL) -+ { -+ printk ("%s: cannot allocate elan context\n", rail->Generic.Name); -+ return -ENXIO; -+ } -+ -+ ctxt->Operations = &ep3_elan3_ops; -+ ctxt->Private = (void *) rail; -+ -+ /* Initialise a capability and attach to the elan*/ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), TRUE); -+ -+ elan_nullcap (cap); -+ -+ cap->cap_type = ELAN_CAP_TYPE_KERNEL; -+ cap->cap_version = ELAN_CAP_VERSION_NUMBER; -+ cap->cap_mycontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_lowcontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_highcontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_railmask = 1 << dev->Devinfo.dev_rail; -+ -+ /* Ensure the context filter is raised while we initialise */ -+ elan3_block_inputter (ctxt, TRUE); -+ -+ if (elan3_doattach (ctxt, cap) != 0) -+ { -+ printk ("%s: cannot attach to kernel context\n", rail->Generic.Name); -+ -+ KMEM_FREE (cap, sizeof (ELAN_CAPABILITY)); -+ elan3_free (ctxt); -+ return -ENXIO; -+ } -+ KMEM_FREE (cap, sizeof (ELAN_CAPABILITY)); -+ -+ /* now attach to all the kernel comms input/dmaring/data contexts */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ { -+ /* place it in the info table. NOTE: don't call elan3mmu_set_info, as this */ -+ /* will queue the info again on the devices info list */ -+ dev->CtxtTable[ctx] = ctxt; -+ -+ elan3mmu_set_context_filter (dev, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ elan3mmu_attach (dev, ctx, ctxt->Elan3mmu, ctxt->RouteTable->Table, ctxt->RouteTable->Size-1); -+ } -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ { -+ /* place it in the info table. NOTE: don't call elan3mmu_set_info, as this */ -+ /* will queue the info again on the devices info list */ -+ dev->CtxtTable[ctx] = ctxt; -+ -+ elan3mmu_set_context_filter (dev, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ elan3mmu_attach (dev, ctx, ctxt->Elan3mmu, ctxt->RouteTable->Table, ctxt->RouteTable->Size-1); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ /* Stash the ctxt,commandport, mmu and route table */ -+ rail->Ctxt = ctxt; -+ rail->CommandPort = ctxt->CommandPort; -+ rail->Elan3mmu = ctxt->Elan3mmu; -+ rail->RouteTable = ctxt->RouteTable; -+ -+ return 0; -+} -+ -+static void -+ep3_detach_rail (EP3_RAIL *rail) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ int ctx; -+ -+ /* detach from the elan */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ { -+ dev->CtxtTable[ctx] = NULL; -+ elan3mmu_detach (dev, ctx); -+ } -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ { -+ dev->CtxtTable[ctx] = NULL; -+ elan3mmu_detach (dev, ctx); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ elan3_dodetach(rail->Ctxt); -+ elan3_free (rail->Ctxt); -+ -+ rail->Ctxt = NULL; -+ rail->CommandPort = 0; -+ rail->Elan3mmu = NULL; -+ rail->RouteTable = NULL; -+} -+ -+int -+ep3_start_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ int i, res; -+ unsigned long flags; -+ -+ if ((res = ep3_attach_rail (rail)) != 0) -+ return res; -+ -+ spin_lock_init (&rail->CookieLock); -+ kmutex_init (&rail->HaltOpMutex); -+ kcondvar_init (&rail->HaltOpSleep); -+ -+ /* Initialise event interrupt cookie table */ -+ InitialiseCookieTable (&rail->CookieTable); -+ -+ /* Load and map the thread code */ -+ rail->ThreadCode = threadcode_elan3; -+ if (ep_loadcode (&rail->Generic, &rail->ThreadCode) != ESUCCESS) -+ goto failed; -+ -+ /* Map the command port to be visible to the Elan */ -+ ep3_ioaddr_map (&rail->Generic, EP3_COMMANDPORT_ADDR, rail->Ctxt->CommandPage, PAGESIZE, EP_PERM_WRITE); -+ rail->CommandPortAddr = EP3_COMMANDPORT_ADDR + (rail->Ctxt->CommandPort - rail->Ctxt->CommandPage); -+ -+ /* Allocate the elan visible sdram/main memory */ -+ if ((rail->RailElan = ep_alloc_elan (&rail->Generic, sizeof (EP3_RAIL_ELAN), 0, &rail->RailElanAddr)) == 0 || -+ (rail->RailMain = ep_alloc_main (&rail->Generic, sizeof (EP3_RAIL_MAIN), 0, &rail->RailMainAddr)) == 0) -+ { -+ goto failed; -+ } -+ -+ /* Allocate the system input queues at their fixed elan address */ -+ if (! (rail->QueueDescs = ep_alloc_memory_elan (&rail->Generic, EP_SYSTEM_QUEUE_BASE, PAGESIZE, EP_PERM_ALL, 0))) -+ goto failed; -+ -+ /* Initialise all queue entries to be full */ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan3_sdram_writel (rail->Device, EP_SYSTEMQ_DESC(rail->QueueDescs, i) + offsetof (EP3_InputQueue, q_state), E3_QUEUE_FULL); -+ -+ /* initialise the dma rings */ -+ if (DmaRingsCreate (rail)) -+ goto failed; -+ -+ if (InitialiseDmaRetries (rail)) -+ goto failed; -+ -+ if (ep3_init_probenetwork (rail)) -+ goto failed; -+ -+ /* can now drop the context filter for the system context */ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, FALSE, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+ -+ return 0; -+ -+ failed: -+ printk ("ep3_start_rail: failed for rail %d\n", rail->Generic.Number); -+ ep3_stop_rail (&rail->Generic); -+ -+ return -ENOMEM; -+} -+ -+void -+ep3_stall_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ int ctx; -+ unsigned long flags; -+ -+ /* raise all the context filters */ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan3mmu_set_context_filter (rail->Device, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ elan3mmu_set_context_filter (rail->Device, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ elan3mmu_set_context_filter (rail->Device, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_stop_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ep3_destroy_probenetwork (rail); -+ -+ if (rail->DmaRetryInitialised) -+ DestroyDmaRetries (rail); -+ -+ DmaRingsRelease(rail); -+ -+ if (rail->Generic.State == EP_RAIL_STATE_RUNNING) -+ { -+ KMEM_FREE (rail->MainCookies, rail->Generic.Position.pos_nodes * sizeof (E3_uint32)); -+ -+ ep_free_elan (&rail->Generic, rail->ElanCookies, rail->Generic.Position.pos_nodes * sizeof (E3_uint32)); -+ } -+ -+ if (rail->QueueDescs) -+ ep_free_memory_elan (&rail->Generic, EP_SYSTEM_QUEUE_BASE); -+ rail->QueueDescs = 0; -+ -+ if (rail->RailMain) -+ ep_free_main (&rail->Generic, rail->RailMainAddr, sizeof (EP3_RAIL_MAIN)); -+ rail->RailMain = 0; -+ -+ if (rail->RailElan) -+ ep_free_elan (&rail->Generic, rail->RailElanAddr, sizeof (EP3_RAIL_ELAN)); -+ rail->RailElan = 0; -+ -+ ep_unloadcode (&rail->Generic, &rail->ThreadCode); -+ -+ DestroyCookieTable (&rail->CookieTable); -+ -+ ep_perrail_unmap (&rail->Generic, rail->Ctxt->CommandPage, PAGESIZE); -+ -+ kcondvar_destroy (&rail->HaltOpSleep); -+ kmutex_destroy (&rail->HaltOpMutex); -+ spin_lock_destroy (&rail->CookieLock); -+ -+ ep3_detach_rail (rail); -+} -+ -+void -+ep3_position_found (EP_RAIL *r, ELAN_POSITION *pos) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t addr; -+ -+ rail->SwitchBroadcastLevelTick = lbolt; -+ -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, NodeId), pos->pos_nodeid); -+ -+ /* Allocate Network Identify cookie state */ -+ KMEM_ZALLOC (rail->MainCookies, E3_uint32 *, pos->pos_nodes * sizeof (E3_uint32), 1); -+ -+ if (! (addr = ep_alloc_elan (&rail->Generic, pos->pos_nodes * sizeof (E3_uint32), 0, &rail->ElanCookies))) -+ panic ("ep: PositionFound: cannot allocate elan cookies array\n"); -+ -+ elan3_sdram_zeroq_sdram (rail->Device, addr, pos->pos_nodes * sizeof (E3_uint32)); -+ -+ ep3_probe_position_found (rail, pos); -+} -+ -+sdramaddr_t -+ep3_sdram_alloc (EP_RAIL *r, EP_ADDR addr, unsigned size) -+{ -+ return elan3_sdram_alloc (((EP3_RAIL *) r)->Device, size); -+} -+ -+void -+ep3_sdram_free (EP_RAIL *r, sdramaddr_t addr, unsigned size) -+{ -+ elan3_sdram_free (((EP3_RAIL *) r)->Device, addr, size); -+} -+ -+void -+ep3_sdram_writeb (EP_RAIL *r, sdramaddr_t addr, unsigned char val) -+{ -+ elan3_sdram_writeb (((EP3_RAIL *) r)->Device, addr, val); -+} -+ -+void -+ep3_flush_tlb (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ -+ IncrStat (dev, TlbFlushes); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | MMU_FLUSH); -+ mmiob (); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ while (! (read_reg32 (dev, Cache_Control_Reg.ContReg) & MMU_FLUSHED)) -+ mb(); -+} -+ -+void -+ep3_load_system_route (EP_RAIL *r, unsigned vp, unsigned lowNode, unsigned highNode) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ -+ nflits = GenerateRoute (&rail->Generic.Position, flits, lowNode, highNode, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, vp, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ { -+ /* XXXX: whilst LoadRoute() can fail - it is not likely. */ -+ panic ("ep3_load_system_route: cannot load p2p route entry\n"); -+ } -+} -+ -+void -+ep3_load_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ -+ nflits = GenerateRoute (&rail->Generic.Position, flits, nodeId, nodeId, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, EP_VP_DATA(nodeId), EP3_CONTEXT_NUM(rail->Generic.Position.pos_nodeid), nflits, flits) != 0) -+ panic ("ep3_load_node_route: cannot load p2p data route entry\n"); -+} -+ -+void -+ep3_unload_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ClearRoute (rail->Device, rail->RouteTable, EP_VP_DATA(nodeId)); -+} -+ -+void -+ep3_lower_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, EP3_CONTEXT_NUM(nodeId), 0, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_raise_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, EP3_CONTEXT_NUM(nodeId), 1, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_node_disconnected (EP_RAIL *r, unsigned nodeId) -+{ -+ FreeStalledDmas ((EP3_RAIL *) r, nodeId); -+} -+ -+void -+ep3_fillout_stats(EP_RAIL *r, char *str) -+{ -+ /* no stats here yet */ -+ /* EP3_RAIL *ep3rail = (EP3_RAIL *)r; */ -+} -diff -urN clean/drivers/net/qsnet/ep/kcomm_elan3.h linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan3.h ---- clean/drivers/net/qsnet/ep/kcomm_elan3.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan3.h 2004-12-14 05:19:23.000000000 -0500 -@@ -0,0 +1,431 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_ELAN3_H -+#define __EP_KCOMM_ELAN3_H -+ -+#ident "@(#)$Id: kcomm_elan3.h,v 1.53 2004/12/14 10:19:23 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan3.h,v $*/ -+ -+#if !defined(__ELAN3__) -+#include -+#include -+#include -+#include -+#include -+#include -+#endif /* !defined(__ELAN3__) */ -+ -+#include -+ -+/* private address allocation */ -+#define EP3_TEXT_BASE 0xFF000000 /* base address for thread code (defined in makerules.elan3) */ -+#define EP3_COMMANDPORT_ADDR 0xFFF00000 /* mapping address for elan command port */ -+ -+#define EP3_STACK_SIZE 1024 /* default thread code stack size */ -+ -+#define EP3_PACEMAKER_EVENTADDR 0xfeedbeef /* mis-aligned address used by heartbeat pacemaker */ -+ -+/* context number allocation */ -+#define EP3_CONTEXT_NUM(nodeId) ((ELAN3_KCOMM_BASE_CONTEXT_NUM + (nodeId)) | SYS_CONTEXT_BIT) -+#define EP3_CONTEXT_ISDATA(ctx) (((ctx) & MAX_ROOT_CONTEXT_MASK) >= ELAN3_KCOMM_BASE_CONTEXT_NUM && \ -+ ((ctx) & MAX_ROOT_CONTEXT_MASK) <= ELAN3_KCOMM_TOP_CONTEXT_NUM) -+#define EP3_CONTEXT_TO_NODE(ctx) (((ctx) & MAX_ROOT_CONTEXT_MASK) - ELAN3_KCOMM_BASE_CONTEXT_NUM) -+ -+/* DMA issueing rings */ -+#define EP3_RING_CRITICAL 0 -+#define EP3_RING_CRITICAL_LEN 128 -+#define EP3_RING_HIGH_PRI 1 -+#define EP3_RING_HIGH_PRI_LEN 64 -+#define EP3_RING_LOW_PRI 2 -+#define EP3_RING_LOW_PRI_LEN 32 -+#define EP3_NUM_RINGS 3 -+ -+/* Value to "return" from c_close() when envelope handled by the trap handler */ -+#define EP3_PAckStolen 4 -+ -+/* unimplemented instruction trap types for thread code */ -+#define EP3_UNIMP_TRAP_NO_DESCS 0 -+#define EP3_UNIMP_TRAP_PACKET_NACKED 1 -+#define EP3_UNIMP_THREAD_HALTED 2 -+#define EP3_NUM_UNIMP_TRAPS 3 -+ -+/* forward declarations */ -+typedef struct ep3_rail EP3_RAIL; -+ -+/* block copy elan3 inputter queue - with waitvent0 */ -+typedef struct ep3_inputqueue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_BlockCopyEvent q_event; /* queue block copy event */ -+ E3_uint32 q_pad[4]; /* pad to 64 bytes */ -+ E3_Addr q_wevent; /* WaitEvent0 struct */ -+ E3_int32 q_wcount; -+} EP3_InputQueue; -+ -+ -+#if !defined(__ELAN3__) -+ -+/* dma retries types and retry times */ -+typedef struct ep3_retry_dma -+{ -+ struct list_head Link; /* chained on free/retry list */ -+ long RetryTime; /* "lbolt" to retry at */ -+ E3_DMA_BE Dma; /* DMA (in main memory) */ -+} EP3_RETRY_DMA; -+ -+typedef struct ep3_dma_ring -+{ -+ sdramaddr_t pEvent; -+ E3_Addr epEvent; -+ -+ sdramaddr_t pDma; -+ E3_Addr epDma; -+ -+ E3_uint32 *pDoneBlk; -+ E3_Addr epDoneBlk; -+ -+ int Entries; /* number of slots in array */ -+ int Position; /* current position in array */ -+ -+ ioaddr_t CommandPort; -+ ioaddr_t CommandPage; -+ DeviceMappingHandle CommandPageHandle; -+} EP3_DMA_RING; -+ -+#define DMA_RING_EVENT(ring,n) ((ring)->pEvent + (n)*sizeof (E3_BlockCopyEvent)) -+#define DMA_RING_EVENT_ELAN(ring,n) ((ring)->epEvent + (n)*sizeof (E3_BlockCopyEvent)) -+ -+#define DMA_RING_DMA(ring,n) ((ring)->pDma + (n)*sizeof (E3_DMA)) -+#define DMA_RING_DMA_ELAN(ring,n) ((ring)->epDma + (n)*sizeof (E3_DMA)) -+ -+#define DMA_RING_DONE_ELAN(ring,n) ((ring)->epDoneBlk + (n)*sizeof (E3_uint32)) -+ -+/* Event interrupt cookie operations and lookup table */ -+typedef struct ep3_cookie_ops -+{ -+ void (*Event) (EP3_RAIL *rail, void *arg); /* called from the interrupt handler when an event is "set" */ -+ void (*DmaRetry) (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int error); /* called from the interrupt handler when a DMA is "nacked" */ -+ void (*DmaCancelled)(EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); /* called from the interrupt handler/flush disconnecting when cancelled. */ -+ void (*DmaVerify) (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); /* called from multiple places, to check dma is consistent with state. */ -+} EP3_COOKIE_OPS; -+ -+typedef struct ep3_cookie -+{ -+ struct ep3_cookie *Next; /* Cookies are chained in hash table. */ -+ E3_uint32 Cookie; /* Cooke store in ev_Type */ -+ EP3_COOKIE_OPS *Operations; /* Cookie operations */ -+ void *Arg; /* Users arguement. */ -+} EP3_COOKIE; -+ -+#define EP3_COOKIE_HASH_SIZE (256) -+#define EP3_HASH_COOKIE(a) ((((a) >> 3) ^ ((a) >> 7) ^ ((a) >> 11)) & (EP3_COOKIE_HASH_SIZE-1)) -+ -+typedef struct ep3_cookie_table -+{ -+ spinlock_t Lock; -+ EP3_COOKIE *Entries[EP3_COOKIE_HASH_SIZE]; -+} EP3_COOKIE_TABLE; -+ -+#endif /* !defined(__ELAN3__) */ -+ -+#define EP3_EVENT_FREE ((1 << 4) | EV_WCOPY) -+#define EP3_EVENT_ACTIVE ((2 << 4) | EV_WCOPY) -+/* DONE == Cookie */ -+#define EP3_EVENT_FAILED ((3 << 4) | EV_WCOPY) -+#define EP3_EVENT_PRIVATE ((4 << 4) | EV_WCOPY) -+ -+/* The event cookie can get posted (and seen) before the write has */ -+/* hit main memory - in this case the event count is <= 0 and the block */ -+/* will be marked as ACTIVE - but could transition to DONE at any time */ -+/* Also for a word copy event, the value written into the "done" word */ -+/* can be the event interrupt cookie rather than the "source" value */ -+/* this happens since the uCode does not wait for the write to have */ -+/* occured before overwriting TMP_0 with the cookie */ -+#define EP3_EVENT_FIRING(edev, event, cookie, done) \ -+ (((((done) & ~(EV_TYPE_BCOPY | EV_TYPE_MASK_EVIRQ)) == (cookie).Cookie) || (done) == EP3_EVENT_ACTIVE) && \ -+ (int) elan3_sdram_readl (edev, (event) + offsetof (E3_BlockCopyEvent, ev_Count)) <= 0) -+#define EP3_EVENT_FIRED(cookie, done) \ -+ (((done) & ~(EV_TYPE_BCOPY | EV_TYPE_MASK_EVIRQ)) == (cookie).Cookie) -+ -+ -+/* Time limit to wait while event is firing and block write has not occured */ -+#define EP3_EVENT_FIRING_TLIMIT 16384 /* 1023 uS */ -+ -+#define EP3_INIT_COPY_EVENT(event, cookie, dest, intr) \ -+{ \ -+ (event).ev_Count = 0; \ -+ (event).ev_Type = (intr) ? EV_TYPE_BCOPY | EV_TYPE_EVIRQ | (cookie).Cookie : EV_TYPE_BCOPY; \ -+ (event).ev_Source = (cookie).Cookie | EV_WCOPY; \ -+ (event).ev_Dest = (dest) | EV_TYPE_BCOPY_WORD; \ -+} -+ -+#if !defined(__ELAN3__) -+ -+/* Generic input queues which can be polled */ -+typedef struct ep3_inputq -+{ -+ EP3_COOKIE q_cookie; -+ unsigned int q_slotSize; -+ unsigned int q_slotCount; -+ -+ void *q_slots; -+ EP_ADDR q_slotsAddr; -+ -+ EP_INPUTQ_CALLBACK *q_callback; -+ void *q_arg; -+ -+ sdramaddr_t q_desc; -+ E3_Addr q_descAddr; -+ -+ E3_Addr q_base; -+ E3_Addr q_top; -+ E3_Addr q_fptr; -+ -+ E3_uint32 q_waitCount; -+} EP3_INPUTQ; -+ -+typedef struct ep3_outputq -+{ -+ EP3_COOKIE q_cookie; -+ -+ unsigned int q_slotCount; /* # slots allocated */ -+ unsigned int q_slotSize; /* size of each slot (rounded up) */ -+ -+ sdramaddr_t q_elan; -+ E3_Addr q_elanAddr; -+ -+ void *q_main; -+ E3_Addr q_mainAddr; -+} EP3_OUTPUTQ; -+ -+#endif /* !defined(__ELAN3__) */ -+ -+/* per-rail elan memory portion of device */ -+typedef struct ep3_rail_elan -+{ -+ E3_uint16 ProbeSource0[TR_TRACEROUTE_ENTRIES]; /* 32 byte aligned */ -+ E3_uint16 ProbeSource1[TR_TRACEROUTE_ENTRIES]; -+ -+ E3_BlockCopyEvent ProbeDone; /* 16 byte aligned */ -+ E3_Event ProbeStart; /* 8 byte aligned */ -+ -+ E3_uint32 ProbeType; /* 4 byte aligned */ -+ E3_uint32 ProbeLevel; -+ -+ E3_uint32 NodeId; -+} EP3_RAIL_ELAN; -+ -+/* values for ProbeType */ -+#define PROBE_SINGLE 0 -+#define PROBE_MULTIPLE 1 -+/* number of attempts for each type */ -+#define PROBE_SINGLE_ATTEMPTS 10 -+#define PROBE_SINGLE_TIMEOUTS 5 -+#define PROBE_MULTIPLE_ATTEMPTS 20 -+#define PROBE_MULTIPLE_TIMEOUTS 10 -+ -+/* per-rail elan memory portsion of device */ -+typedef struct ep3_rail_main -+{ -+ E3_uint16 ProbeDest0[TR_TRACEROUTE_ENTRIES]; /* 32 byte aligned */ -+ E3_uint16 ProbeDest1[TR_TRACEROUTE_ENTRIES]; -+ -+ E3_uint32 ProbeDone; /* 4 byte aligned */ -+ E3_uint32 ProbeResult; -+ E3_uint32 ProbeLevel; -+} EP3_RAIL_MAIN; -+ -+#if !defined(__ELAN3__) -+ -+struct ep3_rail -+{ -+ EP_RAIL Generic; /* Generic rail */ -+ -+ ELAN3_DEV *Device; /* Elan device we're using */ -+ ELAN3_CTXT *Ctxt; /* Elan context struct */ -+ ioaddr_t CommandPort; /* commandport from context */ -+ E3_Addr CommandPortAddr; /* and address mapped into elan */ -+ -+ ELAN3_ROUTE_TABLE *RouteTable; /* routetable from context */ -+ ELAN3MMU *Elan3mmu; /* elanmmu from context */ -+ -+ EP3_COOKIE_TABLE CookieTable; /* Event cookie table */ -+ -+ EP_CODE ThreadCode; /* copy of thread code */ -+ unsigned int CommandPortEventTrap; /* flag to indicate command port eventint queue overflow trap */ -+ -+ sdramaddr_t RailElan; /* Elan visible main/sdram portions of */ -+ E3_Addr RailElanAddr; /* device structure */ -+ EP3_RAIL_MAIN *RailMain; -+ E3_Addr RailMainAddr; -+ -+ /* small system message queues */ -+ sdramaddr_t QueueDescs; /* Input Queue descriptors */ -+ -+ /* Network position prober */ -+ E3_Addr ProbeStack; /* Network position thread command structure */ -+ EP3_COOKIE ProbeCookie; /* event cookie for Done event */ -+ kcondvar_t ProbeWait; /* place to wait on probe thread */ -+ spinlock_t ProbeLock; /* and lock */ -+ volatile int ProbeDone; /* and flag to indicate it's done */ -+ -+ E3_uint16 ProbeDest0[TR_TRACEROUTE_ENTRIES]; /* last result of CheckNetworkPosition */ -+ E3_uint16 ProbeDest1[TR_TRACEROUTE_ENTRIES]; -+ E3_uint32 ProbeResult; -+ -+ long ProbeLevelTick[ELAN_MAX_LEVELS]; -+ long SwitchBroadcastLevelTick; -+ -+ /* rings for issueing dmas */ -+ EP3_DMA_RING DmaRings[EP3_NUM_RINGS]; -+ -+ /* retry lists for dmas */ -+ struct list_head DmaRetries[EP_NUM_RETRIES]; /* Dma retry lists */ -+ struct list_head DmaRetryFreeList; /* and free list */ -+ u_int DmaRetryCount; /* and total retry count */ -+ u_int DmaRetryReserved; /* and number reserved */ -+ u_int DmaRetryThreadShouldStall; /* count of reasons to stall retries */ -+ u_int DmaRetryThreadStarted:1; /* dma retry thread running */ -+ u_int DmaRetryThreadShouldStop:1; /* but should stop */ -+ u_int DmaRetryThreadStopped:1; /* and now it's stopped */ -+ u_int DmaRetryInitialised:1; /* have initialise dma retries */ -+ -+ spinlock_t DmaRetryLock; /* spinlock protecting lists */ -+ kcondvar_t DmaRetryWait; /* place retry thread sleeps */ -+ long DmaRetryTime; /* and when it will next wakeup */ -+ unsigned int DmaRetrySleeping; /* and it's sleeping there */ -+ -+ /* Network Identify Cookies */ -+ E3_uint32 *MainCookies; /* One cookie allocator per-node for main*/ -+ E3_Addr ElanCookies; /* and one for elan */ -+ spinlock_t CookieLock; /* spinlock to protect main cookies */ -+ -+ /* Halt operation flags for flushing. */ -+ kmutex_t HaltOpMutex; /* serialize access to halt operations */ -+ unsigned int HaltOpCompleted; /* flag to indicate halt operation completed */ -+ kcondvar_t HaltOpSleep; /* place to wait for it to complete */ -+ -+ /* Network error state */ -+ kcondvar_t NetworkErrorSleep; /* place to sleep for network error halt operation */ -+ u_int NetworkErrorFlushed; /* and flag to indicate flushed */ -+ -+ -+ EP3_RAIL_STATS Stats; /* statistics */ -+}; -+ -+/* support.c */ -+ -+extern ELAN3_OPS ep3_elan3_ops; -+ -+extern E3_uint32 LocalCookie (EP3_RAIL *rail, unsigned int remoteNode); -+extern E3_uint32 RemoteCookie (EP3_RAIL *rail, unsigned int remoteNode); -+ -+extern void InitialiseCookieTable (EP3_COOKIE_TABLE *table); -+extern void DestroyCookieTable (EP3_COOKIE_TABLE *table); -+extern void RegisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cookie, -+ E3_Addr event, EP3_COOKIE_OPS *ops, void *arg); -+extern void DeregisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cookie); -+extern EP3_COOKIE *LookupCookie (EP3_COOKIE_TABLE *table, uint32_t cookie); -+extern EP3_COOKIE *LookupEventCookie (EP3_RAIL *rail, EP3_COOKIE_TABLE *table, E3_Addr); -+ -+extern int DmaRingsCreate (EP3_RAIL *rail); -+extern void DmaRingsRelease (EP3_RAIL *rail); -+extern int IssueDma (EP3_RAIL *rail, E3_DMA_BE *dma, int type, int retryThread); -+ -+extern int IssueWaitevent (EP3_RAIL *rail, E3_Addr value); -+extern void IssueSetevent (EP3_RAIL *rail, E3_Addr value); -+extern void IssueRunThread (EP3_RAIL *rail, E3_Addr value); -+extern long DmaRetryTime (int type); -+extern int InitialiseDmaRetries (EP3_RAIL *rail); -+extern void DestroyDmaRetries (EP3_RAIL *rail); -+extern int ReserveDmaRetries (EP3_RAIL *rail, int count, EP_ATTRIBUTE attr); -+extern void ReleaseDmaRetries (EP3_RAIL *rail, int count); -+extern void StallDmaRetryThread (EP3_RAIL *rail); -+extern void ResumeDmaRetryThread (EP3_RAIL *rail); -+extern void QueueDmaForRetry (EP3_RAIL *rail, E3_DMA_BE *dma, int interval); -+extern void QueueDmaOnStalledList (EP3_RAIL *rail, E3_DMA_BE *dma); -+extern void FreeStalledDmas (EP3_RAIL *rail, unsigned int nodeId); -+ -+extern void SetQueueLocked(EP3_RAIL *rail, sdramaddr_t qaddr); -+ -+/* threadcode_elan3.c */ -+extern E3_Addr ep3_init_thread (ELAN3_DEV *dev, E3_Addr fn, E3_Addr addr, sdramaddr_t stack, -+ int stackSize, int nargs, ...); -+ -+/* probenetwork.c */ -+extern int ep3_init_probenetwork (EP3_RAIL *rail); -+extern void ep3_destroy_probenetwork (EP3_RAIL *rail); -+extern void ep3_probe_position_found (EP3_RAIL *rail, ELAN_POSITION *pos); -+extern int ep3_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw); -+extern int ep3_check_position (EP_RAIL *rail); -+ -+/* neterr_elan3.c */ -+extern void ep3_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* kcomm_elan3.c */ -+extern EP_RAIL *ep3_create_rail (EP_SYS *sys, ELAN3_DEV *dev); -+extern void ep3_destroy_rail (EP_RAIL *rail); -+ -+extern int ep3_start_rail (EP_RAIL *rail); -+extern void ep3_stall_rail (EP_RAIL *rail); -+extern void ep3_stop_rail (EP_RAIL *rail); -+ -+extern void ep3_position_found (EP_RAIL *rail, ELAN_POSITION *pos); -+ -+extern sdramaddr_t ep3_sdram_alloc (EP_RAIL *rail, EP_ADDR addr, unsigned int size); -+extern void ep3_sdram_free (EP_RAIL *rail, sdramaddr_t addr, unsigned int size); -+extern void ep3_sdram_writeb (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+extern void ep3_flush_tlb (EP_RAIL *r); -+extern void ep3_load_system_route (EP_RAIL *r, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+extern void ep3_load_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_unload_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_lower_filter (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_raise_filter (EP_RAIL *rail, unsigned int nodeId); -+extern void ep3_node_disconnected (EP_RAIL *r, unsigned int nodeId); -+ -+extern void ep3_fillout_stats(EP_RAIL *rail, char *str); -+ -+/* kmap_elan3.c */ -+extern void ep3_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep3_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep3_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned int len, unsigned int perm); -+extern void ep3_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len); -+extern void *ep3_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages); -+extern void ep3_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private); -+extern void ep3_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm); -+extern physaddr_t ep3_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index); -+extern void ep3_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages); -+ -+/* kmsg_elan3.c */ -+extern EP_INPUTQ *ep3_alloc_inputq (EP_RAIL *r, unsigned int qnum, unsigned int slotSize, unsigned int slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg); -+extern void ep3_free_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep3_enable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep3_disable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern int ep3_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+extern EP_OUTPUTQ *ep3_alloc_outputq (EP_RAIL *r, unsigned int slotSize, unsigned int slotCount); -+extern void ep3_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q); -+extern void *ep3_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep3_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep3_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum, unsigned int size, -+ unsigned int nodeId, unsigned int qnum, unsigned int retries); -+ -+/* support_elan3.c */ -+extern void ep3_flush_filters (EP_RAIL *r); -+extern void ep3_flush_queues (EP_RAIL *r); -+ -+#endif /* !defined(__ELAN3__) */ -+ -+#endif /* __EP_KCOMM_ELAN3_H */ -diff -urN clean/drivers/net/qsnet/ep/kcomm_elan4.c linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan4.c ---- clean/drivers/net/qsnet/ep/kcomm_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan4.c 2004-11-30 07:02:06.000000000 -0500 -@@ -0,0 +1,526 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm_elan4.c,v 1.19 2004/11/30 12:02:06 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan4.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "conf_linux.h" -+ -+extern EP_CODE threadcode_elan4; -+ -+unsigned int -+ep4_create_rails (EP_SYS *sys, unsigned int disabled) -+{ -+ unsigned int rmask = 0; -+ ELAN4_DEV *dev; -+ EP_RAIL *rail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if ((dev = elan4_reference_device (i, ELAN4_STATE_STARTED)) != NULL) -+ { -+ if ((rail = ep4_create_rail (sys, dev)) == NULL) -+ elan4_dereference_device (dev); -+ else -+ { -+ if (disabled & (1 << rail->Number)) -+ printk ("%s: auto-start of device disabled by configuration\n", rail->Name); -+ else -+ ep_start_rail (rail); -+ -+ ep_procfs_rail_init(rail); -+ -+ rmask |= (1 << rail->Number); -+ } -+ } -+ } -+ -+ if (rmask) -+ qsnet_debug_alloc(); -+ -+ return rmask; -+} -+ -+EP_RAIL * -+ep4_create_rail (EP_SYS *sys, ELAN4_DEV *dev) -+{ -+ EP4_RAIL *rail; -+ int res; -+ -+ KMEM_ZALLOC (rail, EP4_RAIL *, sizeof (EP4_RAIL), 1); -+ -+ if (rail == NULL) -+ return (EP_RAIL *) NULL; -+ -+ if ((res = ep_init_rail (sys, &rail->r_generic)) != 0) -+ { -+ KMEM_FREE (rail, sizeof (EP4_RAIL)); -+ return (EP_RAIL *) NULL; -+ } -+ -+ rail->r_ctxt.ctxt_dev = dev; -+ -+ /* install our rail operations */ -+ rail->r_generic.Operations.DestroyRail = ep4_destroy_rail; -+ rail->r_generic.Operations.StartRail = ep4_start_rail; -+ rail->r_generic.Operations.StallRail = ep4_stall_rail; -+ rail->r_generic.Operations.StopRail = ep4_stop_rail; -+ -+ rail->r_generic.Operations.SdramAlloc = ep4_sdram_alloc; -+ rail->r_generic.Operations.SdramFree = ep4_sdram_free; -+ rail->r_generic.Operations.SdramWriteb = ep4_sdram_writeb; -+ -+ rail->r_generic.Operations.KaddrMap = ep4_kaddr_map; -+ rail->r_generic.Operations.SdramMap = ep4_sdram_map; -+ rail->r_generic.Operations.Unmap = ep4_unmap; -+ -+ rail->r_generic.Operations.DvmaReserve = ep4_dvma_reserve; -+ rail->r_generic.Operations.DvmaRelease = ep4_dvma_release; -+ rail->r_generic.Operations.DvmaSetPte = ep4_dvma_set_pte; -+ rail->r_generic.Operations.DvmaReadPte = ep4_dvma_read_pte; -+ rail->r_generic.Operations.DvmaUnload = ep4_dvma_unload; -+ rail->r_generic.Operations.FlushTlb = ep4_flush_tlb; -+ -+ rail->r_generic.Operations.ProbeRoute = ep4_probe_route; -+ -+ rail->r_generic.Operations.PositionFound = ep4_position_found; -+ rail->r_generic.Operations.CheckPosition = ep4_check_position; -+ rail->r_generic.Operations.NeterrFixup = ep4_neterr_fixup; -+ -+ rail->r_generic.Operations.LoadSystemRoute = ep4_load_system_route; -+ -+ rail->r_generic.Operations.LoadNodeRoute = ep4_load_node_route; -+ rail->r_generic.Operations.UnloadNodeRoute = ep4_unload_node_route; -+ rail->r_generic.Operations.LowerFilter = ep4_lower_filter; -+ rail->r_generic.Operations.RaiseFilter = ep4_raise_filter; -+ rail->r_generic.Operations.NodeDisconnected = ep4_node_disconnected; -+ -+ rail->r_generic.Operations.FlushFilters = ep4_flush_filters; -+ rail->r_generic.Operations.FlushQueues = ep4_flush_queues; -+ -+ rail->r_generic.Operations.AllocInputQ = ep4_alloc_inputq; -+ rail->r_generic.Operations.FreeInputQ = ep4_free_inputq; -+ rail->r_generic.Operations.EnableInputQ = ep4_enable_inputq; -+ rail->r_generic.Operations.DisableInputQ = ep4_disable_inputq; -+ rail->r_generic.Operations.PollInputQ = ep4_poll_inputq; -+ -+ rail->r_generic.Operations.AllocOutputQ = ep4_alloc_outputq; -+ rail->r_generic.Operations.FreeOutputQ = ep4_free_outputq; -+ rail->r_generic.Operations.OutputQMsg = ep4_outputq_msg; -+ rail->r_generic.Operations.OutputQState = ep4_outputq_state; -+ rail->r_generic.Operations.OutputQSend = ep4_outputq_send; -+ -+ rail->r_generic.Operations.FillOutStats = ep4_fillout_stats; -+ rail->r_generic.Operations.Debug = ep4_debug_rail; -+ -+ rail->r_generic.Devinfo = dev->dev_devinfo; -+ -+ printk ("%s: connected via elan4 rev%c device %d\n", rail->r_generic.Name, -+ 'a' + dev->dev_devinfo.dev_revision_id, dev->dev_instance); -+ -+ return (EP_RAIL *) rail; -+} -+ -+void -+ep4_destroy_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_dereference_device (rail->r_ctxt.ctxt_dev); -+ -+ KMEM_FREE (rail, sizeof (EP4_RAIL)); -+} -+ -+static int -+ep4_attach_rail (EP4_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned ctx; -+ -+ if (elan4_insertctxt (dev, &rail->r_ctxt, &ep4_trap_ops) != 0) -+ return -ENOMEM; -+ -+ if ((rail->r_routetable = elan4_alloc_routetable (dev, 4)) == NULL) /* 512 << 4 == 8192 entries */ -+ { -+ elan4_removectxt (dev, &rail->r_ctxt); -+ return -ENOMEM; -+ } -+ elan4_set_routetable (&rail->r_ctxt, rail->r_routetable); -+ -+ /* Attach to the kernel comms nextwork context */ -+ if (elan4_attach_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM) < 0) -+ { -+ elan4_free_routetable (dev, rail->r_routetable); -+ elan4_removectxt (dev, &rail->r_ctxt); -+ -+ return -EBUSY; -+ } -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_attach_filter (&rail->r_ctxt, ctx); -+ -+ return 0; -+} -+ -+static void -+ep4_detach_rail (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned ctx; -+ -+ elan4_detach_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_detach_filter (&rail->r_ctxt, ctx); -+ -+ if (rail->r_routetable) -+ { -+ elan4_set_routetable (&rail->r_ctxt, NULL); -+ elan4_free_routetable (dev, rail->r_routetable); -+ } -+ -+ elan4_removectxt (dev, &rail->r_ctxt); -+} -+ -+int -+ep4_start_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_InputQueue qdesc; -+ int i, res; -+ -+ if ((res = ep4_attach_rail (rail)) < 0) -+ return res; -+ -+ /* Initialise main interrupt cookie table */ -+ spin_lock_init (&rail->r_intcookie_lock); -+ for (i = 0; i < EP4_INTCOOKIE_HASH_SIZE; i++) -+ INIT_LIST_HEAD (&rail->r_intcookie_hash[i]); -+ -+ kmutex_init (&rail->r_haltop_mutex); -+ kcondvar_init (&rail->r_haltop_sleep); -+ spin_lock_init (&rail->r_haltop_lock); -+ -+ spin_lock_init (&rail->r_cookie_lock); -+ -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_EVENT]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_ATOMIC]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_SINGLE]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_MAIN]); -+ spin_lock_init (&rail->r_ecq_lock); -+ -+ ep_kthread_init (&rail->r_retry_thread); -+ INIT_LIST_HEAD (&rail->r_retry_ops); -+ -+ INIT_LIST_HEAD (&rail->r_neterr_ops); -+ -+ kmutex_init (&rail->r_flush_mutex); -+ kcondvar_init (&rail->r_flush_sleep); -+ -+ /* Allocate the elan visible sdram/main memory */ -+ if ((rail->r_elan = ep_alloc_elan (&rail->r_generic, sizeof (EP4_RAIL_ELAN), 0, &rail->r_elan_addr)) == 0 || -+ (rail->r_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_RAIL_MAIN), 0, &rail->r_main_addr)) == 0) -+ { -+ goto failed; -+ } -+ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_qevents[i].ev_CountAndType), 0); -+ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_CountAndType), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ /* Allocate the system input queues at their fixed elan address */ -+ /* avoid sdram address aliasing by allocating the min sdram pagesize */ -+ if (! (rail->r_queuedescs= ep_alloc_memory_elan (&rail->r_generic, EP_SYSTEM_QUEUE_BASE, SDRAM_PAGE_SIZE, EP_PERM_ALL, 0))) -+ goto failed; -+ -+ /* Initialise the input queue descriptor as "full" with no event */ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl(qdesc.q_bptr, qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan4_sdram_copyq_to_sdram (dev, &qdesc, EP_SYSTEMQ_DESC (rail->r_queuedescs, i), sizeof (E4_InputQueue)); -+ -+ /* Allocate the resource map for command queue mappings */ -+ if ((rail->r_ecq_rmap = ep_rmallocmap (EP4_ECQ_RMAPSIZE, "r_ecq_rmap", 1)) == NULL) -+ goto failed; -+ -+ ep_rmfree (rail->r_ecq_rmap, EP4_ECQ_TOP - EP4_ECQ_BASE, EP4_ECQ_BASE); -+ -+ /* register an interrupt cookie & allocate command queues for command queue flushing */ -+ rail->r_flush_mcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4); -+ rail->r_flush_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1); -+ -+ if (rail->r_flush_mcq == NULL || rail->r_flush_ecq == NULL) -+ goto failed; -+ -+ ep4_register_intcookie (rail, &rail->r_flush_intcookie, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event), ep4_flush_interrupt, rail); -+ -+ /* startup the retry thread */ -+ if (kernel_thread_create (ep4_retry_thread, (void *) rail) == 0) -+ goto failed; -+ ep_kthread_started (&rail->r_retry_thread); -+ -+ ep4_initialise_dma_retries (rail); -+ -+ if ((rail->r_event_ecq = ep4_alloc_ecq (rail, CQ_Size1K)) == NULL) -+ goto failed; -+ -+ rail->r_threadcode = threadcode_elan4; -+ if (ep_loadcode (&rail->r_generic, &rail->r_threadcode)) -+ goto failed; -+ -+ elan4_flush_icache (&rail->r_ctxt); -+ -+ if (ep4_probe_init (rail)) -+ goto failed; -+ -+ /* can now drop the context filter for the system context */ -+ elan4_set_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM, E4_FILTER_HIGH_PRI); -+ -+ return 0; -+ -+ failed: -+ printk ("ep4_start_rail: failed for rail '%s'\n", rail->r_generic.Name); -+ ep4_stop_rail (&rail->r_generic); -+ -+ return -ENOMEM; -+} -+ -+void -+ep4_stall_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ unsigned ctx; -+ -+ /* Raise all the context filters */ -+ elan4_set_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM, E4_FILTER_DISCARD_ALL); -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_set_filter (&rail->r_ctxt, ctx, E4_FILTER_DISCARD_ALL); -+} -+ -+void -+ep4_stop_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ if (rail->r_generic.State == EP_RAIL_STATE_RUNNING) /* undo ep4_position_found() */ -+ { -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ EP_ADDR addr = elan4_sdram_readq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_cookies)); -+ -+ ep_free_elan (&rail->r_generic, addr, pos->pos_nodes * sizeof (E4_uint64)); -+ -+ KMEM_FREE (rail->r_cookies, pos->pos_nodes * sizeof (E4_uint64)); -+ } -+ -+ ep4_probe_destroy (rail); -+ -+ ep_unloadcode (&rail->r_generic, &rail->r_threadcode); -+ -+ if (rail->r_event_ecq) -+ ep4_free_ecq (rail, rail->r_event_ecq); -+ rail->r_event_ecq = NULL; -+ -+ ep4_finalise_dma_retries (rail); -+ -+ ep_kthread_stop (&rail->r_retry_thread); -+ ep_kthread_destroy (&rail->r_retry_thread); -+ -+ if (rail->r_flush_intcookie.int_arg) -+ ep4_deregister_intcookie (rail, &rail->r_flush_intcookie); -+ rail->r_flush_intcookie.int_arg = NULL; -+ -+ if (rail->r_flush_mcq) -+ ep4_put_ecq (rail, rail->r_flush_mcq, 4); -+ rail->r_flush_mcq = NULL; -+ -+ if (rail->r_flush_ecq) -+ ep4_put_ecq (rail, rail->r_flush_ecq, 1); -+ rail->r_flush_ecq = NULL; -+ -+ if (rail->r_ecq_rmap) -+ ep_rmfreemap (rail->r_ecq_rmap); -+ -+ if (rail->r_queuedescs) -+ ep_free_memory_elan (&rail->r_generic, EP_SYSTEM_QUEUE_BASE); -+ rail->r_queuedescs = 0; -+ -+ if (rail->r_elan) -+ ep_free_elan (&rail->r_generic, rail->r_elan_addr, sizeof (EP4_RAIL_ELAN)); -+ rail->r_elan = 0; -+ -+ if (rail->r_main) -+ ep_free_main (&rail->r_generic, rail->r_main_addr, sizeof (EP4_RAIL_MAIN)); -+ rail->r_main = NULL; -+ -+ kcondvar_destroy (&rail->r_flush_sleep); -+ kmutex_destroy (&rail->r_flush_mutex); -+ -+ spin_lock_destroy (&rail->r_ecq_lock); -+ spin_lock_destroy (&rail->r_cookie_lock); -+ -+ spin_lock_destroy (&rail->r_haltop_lock); -+ kcondvar_destroy(&rail->r_haltop_sleep); -+ kmutex_destroy (&rail->r_haltop_mutex); -+ spin_lock_destroy (&rail->r_intcookie_lock); -+ -+ ep4_detach_rail (rail); -+} -+ -+void -+ep4_position_found (EP_RAIL *r, ELAN_POSITION *pos) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ sdramaddr_t cookies; -+ EP_ADDR addr; -+ int i; -+ -+ KMEM_ZALLOC (rail->r_cookies, E4_uint64 *, pos->pos_nodes * sizeof (E4_uint64), 1); -+ -+ if (! (cookies = ep_alloc_elan (&rail->r_generic, pos->pos_nodes * sizeof (E4_uint64), 0, &addr))) -+ panic ("ep4_position_found: cannot allocate elan cookies array\n"); -+ -+ for (i = 0; i < pos->pos_nodes; i++) -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, cookies + (i * sizeof (E4_uint64)), 0); -+ -+ for (i = 0; i < pos->pos_nodes; i++) -+ rail->r_cookies[i] = 0; -+ -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_nodeid), pos->pos_nodeid); -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_cookies), addr); -+ -+ ep4_probe_position_found (rail, pos); -+} -+ -+sdramaddr_t -+ep4_sdram_alloc (EP_RAIL *r, EP_ADDR addr, unsigned size) -+{ -+ ELAN4_DEV *dev = ((EP4_RAIL *) r)->r_ctxt.ctxt_dev; -+ -+ if (size >= SDRAM_PAGE_SIZE) -+ return elan4_sdram_alloc (dev, size); -+ else -+ { -+ sdramaddr_t block = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ sdramaddr_t sdram = block + (addr & (SDRAM_PAGE_SIZE-1)); -+ -+ /* free of the portion before sdram */ -+ if (sdram > block) -+ elan4_sdram_free (dev, block, sdram - block); -+ -+ /* free of the portion after sdram */ -+ if ((block + SDRAM_PAGE_SIZE) > (sdram + size)) -+ elan4_sdram_free (dev, sdram + size, block + SDRAM_PAGE_SIZE - (sdram + size)); -+ -+ return sdram; -+ } -+} -+ -+void -+ep4_sdram_free (EP_RAIL *r, sdramaddr_t addr, unsigned size) -+{ -+ elan4_sdram_free (((EP4_RAIL *) r)->r_ctxt.ctxt_dev, addr, size); -+} -+ -+void -+ep4_sdram_writeb (EP_RAIL *r, sdramaddr_t addr, unsigned char val) -+{ -+ elan4_sdram_writeb (((EP4_RAIL *) r)->r_ctxt.ctxt_dev, addr, val); -+} -+ -+void -+ep4_flush_tlb (EP_RAIL *r) -+{ -+ elan4mmu_flush_tlb (((EP4_RAIL *) r)->r_ctxt.ctxt_dev); -+} -+ -+void -+ep4_load_system_route (EP_RAIL *r, unsigned vp, unsigned lowNode, unsigned highNode) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ -+ if (elan4_generate_route (&rail->r_generic.Position, &route, ELAN4_KCOMM_CONTEXT_NUM, -+ lowNode, highNode, FIRST_SYSTEM_PACKET | FIRST_HIGH_PRI | FIRST_TIMEOUT(3)) < 0) -+ { -+ panic ("ep4_load_system_route: generate route failed\n"); -+ /* NOTREACHED */ -+ } -+ -+ elan4_write_route (dev, rail->r_routetable, vp, &route); -+} -+ -+void -+ep4_load_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ -+ if (elan4_generate_route (&rail->r_generic.Position, &route, EP4_CONTEXT_NUM(rail->r_generic.Position.pos_nodeid), -+ nodeId, nodeId, FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3)) < 0) -+ { -+ panic ("ep4_load_node_route: generate route failed\n"); -+ /* NOTREACHED */ -+ } -+ -+ elan4_write_route (dev, rail->r_routetable, EP_VP_DATA(nodeId), &route); -+} -+ -+void -+ep4_unload_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ elan4_invalidate_route (dev, rail->r_routetable, EP_VP_DATA(nodeId)); -+} -+ -+void -+ep4_lower_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_set_filter (&rail->r_ctxt, EP4_CONTEXT_NUM(nodeId), E4_FILTER_HIGH_PRI); -+} -+ -+void -+ep4_raise_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_set_filter (&rail->r_ctxt, EP4_CONTEXT_NUM(nodeId), E4_FILTER_DISCARD_ALL); -+} -+ -+void -+ep4_node_disconnected (EP_RAIL *r, unsigned nodeId) -+{ -+ ep4_free_stalled_dmas ((EP4_RAIL *) r, nodeId); -+} -+ -+void -+ep4_fillout_stats(EP_RAIL *r, char *str) -+{ -+ /* no stats here yet */ -+ /* EP4_RAIL *ep4rail = (EP4_RAIL *)r; */ -+} -diff -urN clean/drivers/net/qsnet/ep/kcomm_elan4.h linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan4.h ---- clean/drivers/net/qsnet/ep/kcomm_elan4.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kcomm_elan4.h 2005-07-20 08:01:34.000000000 -0400 -@@ -0,0 +1,443 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_ELAN4_H -+#define __EP_KCOMM_ELAN4_H -+ -+#ident "@(#)$Id: kcomm_elan4.h,v 1.19.2.1 2005/07/20 12:01:34 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan4.h,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#if !defined(__elan4__) -+#include -+#endif /* ! defined(__elan4__) */ -+ -+/* private address allocation */ -+#define EP4_TEXT_BASE 0xF8000000 /* base address for thread code (defined in makerules.elan4) */ -+#define EP4_ECQ_BASE 0xFF000000 /* address space for mapping command queues */ -+#define EP4_ECQ_TOP 0xFF800000 -+ -+#define EP4_ECQ_RMAPSIZE 128 -+#define EP4_STACK_SIZE 1024 /* default thread code stack size */ -+#define EP4_MAX_LEVELS 8 /* same as ELAN_MAX_LEVELS */ -+ -+/* context number allocation */ -+#define EP4_CONTEXT_NUM(nodeId) (ELAN4_KCOMM_BASE_CONTEXT_NUM + (nodeId)) -+#define EP4_CONTEXT_ISDATA(ctx) ((ctx) >= ELAN4_KCOMM_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN4_KCOMM_TOP_CONTEXT_NUM) -+#define EP4_CONTEXT_TO_NODE(ctx) ((ctx) - ELAN4_KCOMM_BASE_CONTEXT_NUM) -+ -+/* -+ * network error cookie format: -+ * ------------------------------------------------- -+ * | unique cookie value | Remote | DMA | Location | -+ * ------------------------------------------------- -+ * [63:4] Cookie - unique cookie number -+ * [3] Thread - cookie generated by thread code -+ * [2] Remote - cookie generated by remote end -+ * [1] STEN - cookie is for a STEN packet -+ * [0] DMA - cookie is for a DMA -+ */ -+#define EP4_COOKIE_DMA (1 << 0) -+#define EP4_COOKIE_STEN (1 << 1) -+#define EP4_COOKIE_REMOTE (1 << 2) -+#define EP4_COOKIE_THREAD (1 << 3) -+#define EP4_COOKIE_INC (1ull << 4) -+ -+#define EP4_COOKIE_STRING(val) (long long)(((val) & ~(EP4_COOKIE_INC-1)) >> 4), \ -+ ((val) & EP4_COOKIE_DMA) ? ",dma" : "", \ -+ ((val) & EP4_COOKIE_REMOTE) ? ",remote" : "", \ -+ ((val) & EP4_COOKIE_THREAD) ? ",thread" : "", \ -+ ((val) & EP4_COOKIE_STEN) ? ",sten" : "" -+/* -+ * Done "word" values -+ */ -+#define EP4_STATE_FREE 0 -+#define EP4_STATE_ACTIVE 1 -+#define EP4_STATE_FINISHED 2 -+#define EP4_STATE_FAILED 3 -+#define EP4_STATE_PRIVATE 4 -+ -+#define EP4_EVENT_FIRING_TLIMIT 16384 /* 1023 uS */ -+ -+/* forward declarations */ -+typedef struct ep4_rail EP4_RAIL; -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_intcookie -+{ -+ struct list_head int_link; -+ E4_uint64 int_val; -+ void (*int_callback)(EP4_RAIL *rail, void *arg); -+ void *int_arg; -+} EP4_INTCOOKIE; -+ -+#define EP4_INTCOOKIE_HASH_SIZE 256 -+#define EP4_INTCOOKIE_HASH(a) ((((a) >> 3) ^ ((a) >> 7) ^ ((a) >> 11)) & (EP4_INTCOOKIE_HASH_SIZE-1)) -+ -+typedef struct ep4_ecq -+{ -+ struct list_head ecq_link; /* linked on r_ecq_list */ -+ ELAN4_INTOP ecq_intop; /* main interrupt op space */ -+ ELAN4_CQ *ecq_cq; /* command queue */ -+ E4_Addr ecq_addr; /* address mapped into elan */ -+ unsigned int ecq_avail; /* # dwords still available */ -+ -+ spinlock_t ecq_lock; /* spinlock for main accesses */ -+ sdramaddr_t ecq_event; /* event for flushing "event" queues */ -+ EP_ADDR ecq_event_addr; -+ struct ep4_ecq *ecq_flushcq; /* and command port to issue setevent to */ -+} EP4_ECQ; -+ -+#define EP4_ECQ_EVENT 0 /* command queues targetted by multi-blocks events */ -+#define EP4_ECQ_ATOMIC 1 /* command queues targetted by atomic store operations */ -+#define EP4_ECQ_SINGLE 2 /* command queues targetted by single word commands from main */ -+#define EP4_ECQ_MAIN 3 /* command queues targetted by multi word commands from main */ -+#define EP4_NUM_ECQ 4 -+ -+#define EP4_ECQ_Size(which) ((which) == EP4_ECQ_EVENT ? CQ_Size64K : \ -+ (which) == EP4_ECQ_ATOMIC ? CQ_Size8K : \ -+ (which) == EP4_ECQ_SINGLE ? CQ_Size1K : \ -+ (which) == EP4_ECQ_MAIN ? CQ_Size8K : \ -+ CQ_Size1K) -+ -+typedef struct ep4_dma_retry -+{ -+ struct list_head retry_link; /* chained on free/retry list */ -+ unsigned long retry_time; /* "lbolt" to retry at */ -+ E4_DMA retry_dma; /* DMA (in main memory) */ -+} EP4_DMA_RETRY; -+ -+#define EP4_DMA_RETRY_CQSIZE CQ_Size8K /* size of command queue for dma retry */ -+#define EP4_DMA_RETRY_FLOWCNT (CQ_Size(EP4_DMA_RETRY_CQSIZE)/72) /* # of reissued DMA's which can fit in */ -+ -+typedef struct ep4_inputq -+{ -+ EP4_INTCOOKIE q_intcookie; -+ unsigned int q_slotSize; -+ unsigned int q_slotCount; -+ -+ void *q_slots; -+ EP_ADDR q_slotsAddr; -+ -+ EP_INPUTQ_CALLBACK *q_callback; -+ void *q_arg; -+ -+ sdramaddr_t q_desc; -+ EP_ADDR q_descAddr; -+ EP_ADDR q_eventAddr; -+ EP4_ECQ *q_wcq; /* command queue to issue waitevent to */ -+ EP4_ECQ *q_ecq; /* command queue targetted by event to generate interrupt */ -+ -+ EP_ADDR q_fptr; /* cached current front pointer */ -+ EP_ADDR q_last; /* elan addr for last queue slot */ -+ -+ atomic_t q_fired; /* atomic flag that interrupt received */ -+ unsigned int q_count; /* count of slots consumed */ -+} EP4_INPUTQ; -+ -+typedef struct ep4_outputq -+{ -+ spinlock_t q_lock; -+ unsigned int q_slotCount; -+ unsigned int q_slotSize; -+ unsigned int q_dwords; -+ ELAN4_CQ *q_cq; -+ void *q_main; -+ EP_ADDR q_mainAddr; -+ unsigned int q_retries; -+} EP4_OUTPUTQ; -+ -+#endif /* ! defined(__elan4__) */ -+ -+typedef struct ep4_check_sten -+{ -+ E4_uint64 c_reset_event_cmd; /* WRITEDWORD to reset start event */ -+ E4_uint64 c_reset_event_value; -+ -+ E4_uint64 c_open; /* OPEN VP_PROBE(lvl) */ -+ E4_uint64 c_trans_traceroute0; /* SENDTRANS TR_TRACEROUTE 0s */ -+ E4_uint64 c_addr_traceroute0; -+ E4_uint64 c_data_traceroute0[8]; -+ E4_uint64 c_trans_traceroute1; /* SENDTRANS TR_TRACEROUTE 1s */ -+ E4_uint64 c_addr_traceroute1; -+ E4_uint64 c_data_traceroute1[8]; -+ E4_uint64 c_trans_sendack; /* SENDTRANS SENDACK */ -+ E4_uint64 c_addr_sendack; -+ -+ E4_uint64 c_guard_ok; /* GUARD OK - write level */ -+ E4_uint64 c_writedword_ok; -+ E4_uint64 c_value_ok; -+ -+ E4_uint64 c_guard_fail; /* GUARD FAIL - chain setevent/write fail */ -+ E4_uint64 c_setevent_fail; -+ E4_uint64 c_setevent_nop; -+ E4_uint64 c_nop_pad; -+} EP4_CHECK_STEN; -+ -+#define EP4_CHECK_STEN_NDWORDS (sizeof (EP4_CHECK_STEN) >> 3) -+ -+typedef struct ep4_rail_elan -+{ -+ EP4_CHECK_STEN r_check_sten[EP4_MAX_LEVELS]; -+ E4_Event32 r_check_fail; /* Check failed (== r_check_start[-1]) */ -+ E4_Event32 r_check_start[EP4_MAX_LEVELS]; -+ -+ E4_Event32 r_qevents[EP_NUM_SYSTEMQ]; -+ E4_Event32 r_flush_event; -+ -+ E4_uint64 r_nodeid; -+#ifdef __elan4__ -+ E4_uint64 *r_cookies; -+#else -+ E4_Addr r_cookies; -+#endif -+} EP4_RAIL_ELAN; -+ -+#define TRACEROUTE_ENTRIES 16 /* 2 * ELAN_MAX_LEVELS */ -+#define TRACEROUTE_NDWORDS (TRACEROUTE_ENTRIES/2) -+ -+typedef struct ep4_rail_main -+{ -+ E4_uint32 r_probe_dest0[TRACEROUTE_ENTRIES]; -+ E4_uint32 r_probe_dest1[TRACEROUTE_ENTRIES]; -+ E4_uint64 r_probe_result; -+ E4_uint64 r_probe_level; -+ -+ E4_uint64 r_dma_flowcnt; /* count of dma's queued */ -+} EP4_RAIL_MAIN; -+ -+#define EP4_PROBE_ACTIVE (0xffff) -+#define EP4_PROBE_FAILED (0xfffe) -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_retry_ops -+{ -+ struct list_head op_link; -+ unsigned long (*op_func)(EP4_RAIL *rail, void *arg, unsigned long nextRunTime); -+ void *op_arg; -+} EP4_RETRY_OPS; -+ -+typedef struct ep4_neterr_ops -+{ -+ struct list_head op_link; -+ void (*op_func) (EP4_RAIL *rail, void *arg, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ void *op_arg; -+} EP4_NETERR_OPS; -+ -+struct ep4_rail -+{ -+ EP_RAIL r_generic; -+ ELAN4_CTXT r_ctxt; -+ ELAN4_ROUTE_TABLE *r_routetable; -+ -+ spinlock_t r_intcookie_lock; -+ struct list_head r_intcookie_hash[EP4_INTCOOKIE_HASH_SIZE]; -+ -+ sdramaddr_t r_elan; -+ EP_ADDR r_elan_addr; -+ EP4_RAIL_MAIN *r_main; -+ EP_ADDR r_main_addr; -+ -+ EP_CODE r_threadcode; /* copy of thread code */ -+ -+ sdramaddr_t r_queuedescs; /* systemq queue descriptors */ -+ -+ E4_uint64 *r_cookies; /* network error cookies */ -+ spinlock_t r_cookie_lock; /* and spin lock */ -+ -+ kcondvar_t r_probe_wait; /* network position probing */ -+ spinlock_t r_probe_lock; -+ volatile int r_probe_done; -+ EP4_INTCOOKIE r_probe_intcookie; -+ EP4_ECQ *r_probe_cq; -+ E4_uint32 r_probe_source0[TRACEROUTE_ENTRIES]; -+ E4_uint32 r_probe_source1[TRACEROUTE_ENTRIES]; -+ -+ kmutex_t r_haltop_mutex; /* halt/flush operations */ -+ ELAN4_HALTOP r_haltop; -+ ELAN4_DMA_FLUSHOP r_flushop; -+ kcondvar_t r_haltop_sleep; -+ spinlock_t r_haltop_lock; -+ -+ struct list_head r_ecq_list[EP4_NUM_ECQ]; /* list of statically allocated command queues */ -+ EP_RMAP *r_ecq_rmap; /* resource map for command queue mappings */ -+ spinlock_t r_ecq_lock; /* spinlock for list/space management */ -+ -+ kmutex_t r_flush_mutex; /* serialize command queue flushing */ -+ unsigned long r_flush_count; /* # setevents issued for flushing */ -+ EP4_ECQ *r_flush_mcq; /* and command queue for waitevent */ -+ EP4_ECQ *r_flush_ecq; /* and command queue for interrupt */ -+ EP4_INTCOOKIE r_flush_intcookie; /* and interrupt cookie */ -+ kcondvar_t r_flush_sleep; /* and place to sleep ... */ -+ -+ EP_KTHREAD r_retry_thread; /* retry thread */ -+ struct list_head r_retry_ops; /* list of retry operations */ -+ -+ EP4_RETRY_OPS r_dma_ops; /* dma retry operations */ -+ EP4_ECQ *r_dma_ecq; /* command queue to reissue DMAs */ -+ E4_uint64 r_dma_flowcnt; /* count of dma's reissued */ -+ struct list_head r_dma_retrylist[EP_NUM_RETRIES]; /* retry lists */ -+ struct list_head r_dma_freelist; /* and free list */ -+ spinlock_t r_dma_lock; /* and spinlock to protect lists */ -+ unsigned long r_dma_allocated; /* # retries allocated*/ -+ unsigned long r_dma_reserved; /* # retries reserved */ -+ -+ EP4_ECQ *r_event_ecq; /* command queue for occasional setevents */ -+ -+ struct list_head r_neterr_ops; /* list of neterr fixup operations */ -+ -+ ELAN4_IPROC_TRAP r_iproc_trap; -+ ELAN4_TPROC_TRAP r_tproc_trap; -+} ; -+ -+#define EP4_CTXT_TO_RAIL(ctxt) ((EP4_RAIL *) (((unsigned long) (ctxt)) - offsetof (EP4_RAIL, r_ctxt))) -+ -+#if defined(DEBUG_ASSERT) -+#define EP4_ASSERT(rail,EXPR) EP_ASSERT(&((rail)->r_generic), EXPR) -+#define EP4_SDRAM_ASSERT(rail,off,value) EP4_ASSERT(rail, (sdram_assert ? elan4_sdram_readq ((rail)->r_ctxt.ctxt_dev, (off)) == (value) : 1)) -+#else -+#define EP4_ASSERT(rail,EXPR) -+#define EP4_SDRAM_ASSERT(rail,off,value) -+#endif -+ -+/* kcomm_elan4.c */ -+extern EP_RAIL *ep4_create_rail (EP_SYS *sys, ELAN4_DEV *dev); -+extern void ep4_destroy_rail (EP_RAIL *rail); -+ -+extern int ep4_start_rail (EP_RAIL *rail); -+extern void ep4_stall_rail (EP_RAIL *rail); -+extern void ep4_stop_rail (EP_RAIL *rail); -+ -+extern void ep4_debug_rail (EP_RAIL *rail); -+ -+extern void ep4_position_found (EP_RAIL *rail, ELAN_POSITION *pos); -+ -+extern sdramaddr_t ep4_sdram_alloc (EP_RAIL *rail, EP_ADDR addr, unsigned int size); -+extern void ep4_sdram_free (EP_RAIL *rail, sdramaddr_t addr, unsigned int size); -+extern void ep4_sdram_writeb (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+extern void ep4_flush_tlb (EP_RAIL *r); -+extern void ep4_load_system_route (EP_RAIL *r, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+extern void ep4_load_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_unload_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_lower_filter (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_raise_filter (EP_RAIL *rail, unsigned int nodeId); -+extern void ep4_node_disconnected (EP_RAIL *r, unsigned int nodeId); -+ -+/* kmap_elan4.c */ -+extern void ep4_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep4_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep4_cport_map (EP_RAIL *r, EP_ADDR eaddr, unsigned long cqaddr, unsigned int len, unsigned int perm); -+extern void ep4_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len); -+extern void *ep4_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages); -+extern void ep4_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private); -+extern void ep4_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm); -+extern physaddr_t ep4_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index); -+extern void ep4_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages); -+ -+/* kmsg_elan4.c */ -+extern EP_INPUTQ *ep4_alloc_inputq (EP_RAIL *r, unsigned int qnum, unsigned int slotSize, unsigned int slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg); -+extern void ep4_free_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep4_enable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep4_disable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern int ep4_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+extern EP_OUTPUTQ *ep4_alloc_outputq (EP_RAIL *r, unsigned int slotSize, unsigned int slotCount); -+extern void ep4_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q); -+extern void *ep4_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep4_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep4_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum, unsigned int size, -+ unsigned int nodeId, unsigned int qnum, unsigned int retries); -+ -+/* probenetwork_elan4.c */ -+extern int ep4_probe_init (EP4_RAIL *r); -+extern void ep4_probe_destroy (EP4_RAIL *r); -+extern void ep4_probe_position_found (EP4_RAIL *rail, ELAN_POSITION *pos); -+extern int ep4_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw); -+extern int ep4_check_position (EP_RAIL *rail); -+ -+/* support_elan4.c */ -+extern ELAN4_TRAP_OPS ep4_trap_ops; -+extern void ep4_register_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp, E4_uint64 cookie, void (*callback)(EP4_RAIL *r, void *arg), void *arg); -+extern void ep4_deregister_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp); -+extern EP4_INTCOOKIE *ep4_lookup_intcookie (EP4_RAIL *rail, E4_uint64 cookie); -+extern E4_uint64 ep4_neterr_cookie (EP4_RAIL *rail, unsigned int node); -+ -+extern void ep4_flush_filters (EP_RAIL *r); -+extern void ep4_flush_queues (EP_RAIL *r); -+extern void ep4_write_qdesc (EP4_RAIL *rail, sdramaddr_t qaddr, E4_InputQueue *qdesc); -+ -+extern EP4_ECQ *ep4_alloc_ecq (EP4_RAIL *rail, unsigned int cqsize); -+extern void ep4_free_ecq (EP4_RAIL *rail, EP4_ECQ *ecq); -+extern EP4_ECQ *ep4_get_ecq (EP4_RAIL *rail, unsigned int which, unsigned int ndwords); -+extern void ep4_put_ecq (EP4_RAIL *rail, EP4_ECQ *ecq, unsigned int ndwords); -+ -+extern void ep4_nop_cmd (EP4_ECQ *ecq, E4_uint64 tag); -+extern void ep4_set_event_cmd (EP4_ECQ *ecq, E4_Addr event); -+extern void ep4_wait_event_cmd (EP4_ECQ *ecq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1); -+ -+extern void ep4_flush_interrupt (EP4_RAIL *rail, void *arg); -+extern void ep4_flush_ecqs (EP4_RAIL *rail); -+ -+extern void ep4_init_thread (EP4_RAIL *rail, E4_ThreadRegs *regs, sdramaddr_t stackTop, -+ EP_ADDR stackAddr, E4_Addr startpc, int nargs,...); -+ -+extern void ep4_initialise_dma_retries (EP4_RAIL *rail); -+extern void ep4_finalise_dma_retries (EP4_RAIL *rail); -+extern int ep4_reserve_dma_retries (EP4_RAIL *rail, unsigned int count, unsigned int attr); -+extern void ep4_release_dma_retries(EP4_RAIL *rail, unsigned int count); -+extern void ep4_queue_dma_retry (EP4_RAIL *rail, E4_DMA *dma, int interval); -+extern void ep4_queue_dma_stalled (EP4_RAIL *rail, E4_DMA *dma); -+extern void ep4_free_stalled_dmas (EP4_RAIL *rail, unsigned int nodeId); -+extern void ep4_display_rail (EP4_RAIL *rail); -+ -+extern void ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_retry_thread (EP4_RAIL *rail); -+ -+/* neterr_elan4.c */ -+extern void ep4_add_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops); -+extern void ep4_remove_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops); -+extern void ep4_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* commands_elan4.c */ -+extern void elan4_nop_cmd (ELAN4_CQ *cq, E4_uint64 tag); -+extern void elan4_write_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data); -+extern void elan4_add_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data); -+extern void elan4_copy64_cmd (ELAN4_CQ *cq, E4_Addr from, E4_Addr to, E4_uint32 datatype); -+extern void elan4_interrupt_cmd (ELAN4_CQ *cq, E4_uint64 cookie); -+extern void elan4_run_thread_cmd (ELAN4_CQ *cq, E4_ThreadRegs *regs); -+extern void elan4_run_dma_cmd (ELAN4_CQ *cq, E4_DMA *dma); -+extern void elan4_set_event_cmd (ELAN4_CQ *cq, E4_Addr event); -+extern void elan4_set_eventn_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint32 count); -+extern void elan4_wait_event_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1); -+extern void elan4_open_packet (ELAN4_CQ *cq, E4_uint64 command); -+extern void elan4_guard (ELAN4_CQ *cq, E4_uint64 command); -+extern void elan4_sendtrans0 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr); -+extern void elan4_sendtrans1 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0); -+extern void elan4_sendtrans2 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0, E4_uint64 p1); -+extern void elan4_sendtransn (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, ...); -+extern void elan4_sendtransp (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 *ptr); -+ -+extern void ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_retry_thread (EP4_RAIL *rail); -+ -+extern void ep4_fillout_stats(EP_RAIL *rail, char *str); -+ -+#endif /* ! defined(__elan4__) */ -+ -+#endif /* __EP_KCOMM_ELAN4_H */ -diff -urN clean/drivers/net/qsnet/ep/kcomm_vp.h linux-2.6.9/drivers/net/qsnet/ep/kcomm_vp.h ---- clean/drivers/net/qsnet/ep/kcomm_vp.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kcomm_vp.h 2004-03-24 06:32:56.000000000 -0500 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_VP_H -+#define __EP_KCOMM_VP_H -+ -+#ident "@(#)$Id: kcomm_vp.h,v 1.2 2004/03/24 11:32:56 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_vp.h,v $*/ -+ -+#define EP_MAX_NODES 2048 /* Max nodes we support */ -+ -+/* virtual process allocation */ -+#define EP_VP_NODE_BASE (0) -+#define EP_VP_DATA_BASE (EP_VP_NODE_BASE + EP_MAX_NODES) -+#define EP_VP_PROBE_BASE (EP_VP_DATA_BASE + EP_MAX_NODES) -+#define EP_VP_PROBE_COUNT ELAN_MAX_LEVELS -+ -+#define EP_VP_BCAST_BASE (EP_VP_PROBE_BASE + EP_VP_PROBE_COUNT) -+#define EP_VP_BCAST_COUNT (CM_SGMTS_PER_LEVEL * (CM_MAX_LEVELS - 1) + 1) -+ -+#define EP_VP_NODE(nodeId) (EP_VP_NODE_BASE + (nodeId)) -+#define EP_VP_DATA(nodeId) (EP_VP_DATA_BASE + (nodeId)) -+#define EP_VP_PROBE(lvl) (EP_VP_PROBE_BASE + (lvl)) -+#define EP_VP_BCAST(lvl,sgmt) (EP_VP_BCAST_BASE + ((lvl) - 1)*CM_SGMTS_PER_LEVEL + (sgmt)) -+ -+#define EP_VP_TO_NODE(vp) ((vp) & (EP_MAX_NODES-1)) -+#define EP_VP_ISDATA(vp) ((vp) >= EP_VP_DATA_BASE && (vp) < (EP_VP_DATA_BASE + EP_MAX_NODES)) -+ -+#endif /* __EP_KCOMM_VP_H */ -+ -+ -diff -urN clean/drivers/net/qsnet/ep/kmap.c linux-2.6.9/drivers/net/qsnet/ep/kmap.c ---- clean/drivers/net/qsnet/ep/kmap.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kmap.c 2004-12-14 05:19:23.000000000 -0500 -@@ -0,0 +1,561 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap.c,v 1.12 2004/12/14 10:19:23 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "debug.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+void -+ep_perrail_kaddr_map (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t kaddr, unsigned long len, unsigned int perm, int ep_attr) -+{ -+ rail->Operations.KaddrMap (rail, eaddr, kaddr, len, perm, ep_attr); -+} -+ -+void -+ep_perrail_sdram_map (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned long len, unsigned int perm, int ep_attr) -+{ -+ rail->Operations.SdramMap (rail, eaddr, saddr, len, perm, ep_attr); -+} -+ -+void -+ep_perrail_unmap (EP_RAIL *rail, EP_ADDR eaddr, unsigned long len) -+{ -+ rail->Operations.Unmap (rail, eaddr, len); -+} -+ -+void -+ep_perrail_dvma_sync (EP_RAIL *rail) -+{ -+ if (rail->TlbFlushRequired) -+ { -+ rail->TlbFlushRequired = 0; -+ -+ rail->Operations.FlushTlb (rail); -+ } -+} -+ -+ -+static int ep_dvma_map_rails (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+static uint16_t ep_dvma_calc_check_sum (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum); -+#endif -+ -+EP_NMH_OPS ep_dvma_nmh_ops = -+{ -+ ep_dvma_map_rails, -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ep_dvma_calc_check_sum -+#endif -+}; -+ -+extern void -+ep_dvma_init (EP_SYS *sys) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ -+ kmutex_init (&d->dvma_lock); -+ -+ INIT_LIST_HEAD (&d->dvma_handles); -+ INIT_LIST_HEAD (&d->dvma_rails); -+ -+ d->dvma_rmap = ep_rmallocmap (EP_DVMA_RMAP_SIZE, "dvma_rmap", 1); -+ -+ ep_rmfree (d->dvma_rmap, EP_DVMA_TOP - EP_DVMA_BASE, EP_DVMA_BASE); -+} -+ -+extern void -+ep_dvma_fini (EP_SYS *sys) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ -+ ep_rmfreemap (d->dvma_rmap); -+ -+ kmutex_destroy (&d->dvma_lock); -+} -+ -+extern int -+ep_dvma_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_RAIL_ENTRY *l; -+ struct list_head *el; -+ -+ KMEM_ZALLOC (l, EP_RAIL_ENTRY *, sizeof (EP_RAIL_ENTRY), 1); -+ -+ if (l == NULL) -+ return (ENOMEM); -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ l->Rail = rail; -+ -+ list_add_tail (&l->Link, &d->dvma_rails); -+ -+ list_for_each (el, &d->dvma_handles) { -+ EP_DVMA_NMH *desc = list_entry (el, EP_DVMA_NMH, dvma_link); -+ int npages = desc->dvma_nmh.nmh_nmd.nmd_len >> PAGESHIFT; -+ -+ desc->dvma_rails[rail->Number] = rail; -+ desc->dvma_railmask |= ( 1 << rail->Number); -+ -+ desc->dvma_private[rail->Number] = rail->Operations.DvmaReserve (rail, desc->dvma_nmh.nmh_nmd.nmd_addr, npages); -+ } -+ -+ kmutex_unlock (&d->dvma_lock); -+ return (0); -+} -+ -+extern void -+ep_dvma_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ struct list_head *el; -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ list_for_each (el, &d->dvma_handles) { -+ EP_DVMA_NMH *desc = list_entry (el, EP_DVMA_NMH, dvma_link); -+ int npages = desc->dvma_nmh.nmh_nmd.nmd_len >> PAGESHIFT; -+ -+ desc->dvma_rails[rail->Number] = NULL; -+ desc->dvma_railmask &= ~(1 << rail->Number); -+ -+ rail->Operations.DvmaRelease (rail, desc->dvma_nmh.nmh_nmd.nmd_addr, npages, desc->dvma_private[rail->Number]); -+ } -+ -+ list_for_each (el, &d->dvma_rails) { -+ EP_RAIL_ENTRY *tmp = list_entry (el, EP_RAIL_ENTRY, Link); -+ -+ if (tmp->Rail == rail) -+ { -+ list_del (el); -+ -+ KMEM_FREE (tmp, sizeof (EP_RAIL_ENTRY)); -+ break; -+ } -+ } -+ kmutex_unlock (&d->dvma_lock); -+} -+ -+EP_NMH * -+ep_dvma_reserve (EP_SYS *sys, unsigned npages, unsigned perm) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_DVMA_NMH *desc; -+ EP_ADDR addr; -+ struct list_head *el; -+ int i; -+ -+ KMEM_ZALLOC (desc, EP_DVMA_NMH *, offsetof (EP_DVMA_NMH, dvma_attrs[npages]), 1); -+ -+ if (desc == NULL) -+ return (NULL); -+ -+ if ((addr = ep_rmalloc (d->dvma_rmap, npages << PAGESHIFT, 0)) == 0) -+ { -+ -+ KMEM_FREE (desc, sizeof (EP_DVMA_NMH)); -+ return (NULL); -+ } -+ -+ spin_lock_init (&desc->dvma_lock); -+ -+ desc->dvma_perm = perm; -+ -+ kmutex_lock (&d->dvma_lock); -+ /* reserve the mapping resource */ -+ list_for_each (el, &d->dvma_rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ EPRINTF4 (DBG_KMAP, "%s: ep_dvma_reserve desc=%p npages=%d rail=%p\n", rail->Name, desc, npages, rail); -+ -+ if ((desc->dvma_private[rail->Number] = rail->Operations.DvmaReserve (rail, addr, npages)) == NULL) -+ { -+ printk ("%s: !!ep_dvma_reserve - rail->DvmaReserve failed\n", rail->Name); -+ goto failed; -+ } -+ -+ desc->dvma_rails[rail->Number] = rail; -+ desc->dvma_railmask |= (1 << rail->Number); -+ } -+ -+ /* insert into the network mapping handle table */ -+ desc->dvma_nmh.nmh_nmd.nmd_addr = addr; -+ desc->dvma_nmh.nmh_nmd.nmd_len = npages << PAGESHIFT; -+ desc->dvma_nmh.nmh_nmd.nmd_attr = EP_NMD_ATTR (sys->Position.pos_nodeid, 0); -+ desc->dvma_nmh.nmh_ops = &ep_dvma_nmh_ops; -+ -+ ep_nmh_insert (&sys->MappingTable, &desc->dvma_nmh); -+ -+ list_add (&desc->dvma_link, &d->dvma_handles); -+ -+ kmutex_unlock (&d->dvma_lock); -+ -+ return (&desc->dvma_nmh); -+ -+ failed: -+ -+ kmutex_unlock (&d->dvma_lock); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (desc->dvma_rails[i] != NULL) -+ desc->dvma_rails[i]->Operations.DvmaRelease (desc->dvma_rails[i], addr, npages, desc->dvma_private[i]); -+ -+ ep_rmfree (d->dvma_rmap, npages << PAGESHIFT, addr); -+ -+ KMEM_FREE (desc, sizeof (EP_DVMA_NMH)); -+ return (NULL); -+} -+ -+void -+ep_dvma_release (EP_SYS *sys, EP_NMH *nmh) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ EP_ADDR addr = nmh->nmh_nmd.nmd_addr; -+ int npages = nmh->nmh_nmd.nmd_len >> PAGESHIFT; -+ EP_RAIL *rail; -+ int i; -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ list_del (&desc->dvma_link); -+ -+ ep_nmh_remove (&sys->MappingTable, nmh); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if ((rail = desc->dvma_rails[i]) != NULL) -+ rail->Operations.DvmaRelease (rail, addr, npages, desc->dvma_private[i]); -+ -+ ep_rmfree (d->dvma_rmap, npages << PAGESHIFT, addr); -+ -+ KMEM_FREE (desc, offsetof (EP_DVMA_NMH, dvma_attrs[npages])); -+ -+ kmutex_unlock (&d->dvma_lock); -+} -+ -+void -+ep_dvma_load (EP_SYS *sys, void *map, caddr_t vaddr, unsigned len, EP_NMH *nmh, unsigned index, EP_RAILMASK *hints, EP_NMD *subset) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = (unsigned long) vaddr & PAGEOFFSET; -+ unsigned npages = btopr (len + offset); -+ EP_ADDR addr = nmh->nmh_nmd.nmd_addr + (index << PAGESHIFT); -+ int rmask = *hints; -+ EP_RAIL *rail; -+ register int i, rnum; -+ unsigned long flags; -+ -+ EPRINTF7 (DBG_KMAP, "ep_dvma_load: map=%p vaddr=%p len=%x nmh=%p(%x,%x) index=%d\n", -+ map, vaddr, len, nmh, nmh->nmh_nmd.nmd_addr, nmh->nmh_nmd.nmd_len, index); -+ -+ /* If no rail specified, then map into all rails */ -+ if (rmask == 0) -+ rmask = desc->dvma_railmask; -+ -+ ASSERT ((index + npages) <= (nmh->nmh_nmd.nmd_len >> PAGESHIFT)); -+ -+ /* If not map specified then use the kernel map */ -+ if (map == NULL) -+ map = kernel_map; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ /* Now map each of the specified pages (backwards) */ -+ -+ vaddr = (vaddr - offset) + (npages-1)*PAGESIZE; -+ for (i = npages-1; i >= 0; i--, vaddr -= PAGESIZE) -+ { -+ physaddr_t paddr = vaddr_to_phys (map, vaddr); -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ { -+ if (! (rmask & (1 << rnum)) || (rail = desc->dvma_rails[rnum]) == NULL) -+ rmask &= ~(1 << rnum); -+ else -+ { -+ rail->Operations.DvmaSetPte (rail, desc->dvma_private[rnum], index + i, paddr, desc->dvma_perm); -+ -+ desc->dvma_attrs[index + i] |= (1 << rnum); -+ } -+ } -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((rmask & (1 << rnum)) && (rail = desc->dvma_rails[rnum]) != NULL) -+ rail->TlbFlushRequired = 1; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ /* Construct the network mapping handle to be returned. */ -+ subset->nmd_addr = addr + offset; -+ subset->nmd_len = len; -+ subset->nmd_attr = EP_NMD_ATTR(sys->Position.pos_nodeid, rmask); -+} -+ -+void -+ep_dvma_unload (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ EP_RAIL *rail; -+ int rnum; -+ int rmask; -+ register int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ /* compute which rails we need to unload on */ -+ for (rmask = 0, i = 0; i < npages; i++) -+ { -+ rmask |= desc->dvma_attrs[index + i]; -+ -+ desc->dvma_attrs[index + i] = 0; -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((rmask & (1 << rnum)) && (rail = desc->dvma_rails[rnum]) != NULL) -+ rail->Operations.DvmaUnload (rail, desc->dvma_private[rnum], index, npages); -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+} -+ -+int -+ep_dvma_map_rails (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ int r, rnum; -+ register int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_map_rails: nmd=%08x.%08x.%08x mask=%04x\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, mask); -+ -+ if ((mask &= desc->dvma_railmask) == 0) -+ { -+ printk ("ep_dvma_map_rails: no intersecting rails %04x.%04x\n", mask, desc->dvma_railmask); -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ return (-1); -+ } -+ -+ for (i = npages-1; i >= 0; i--) -+ { -+ int pgidx = (index + i); -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (desc->dvma_attrs[pgidx] & (1 << rnum)) -+ break; -+ -+ if (rnum == EP_MAX_RAILS) -+ { -+ EPRINTF3 (DBG_KMAP, "ep_dvma_map_rails: nmh=%p idx=%x [%08x] not ptes valid\n", nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + ((pgidx) << PAGESHIFT)); -+ mask = 0; -+ } -+ else -+ { -+ EP_RAIL *rail = desc->dvma_rails[rnum]; -+ physaddr_t paddr = rail->Operations.DvmaReadPte (rail, desc->dvma_private[rnum], pgidx); -+ -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_map_rails: nmh=%p idx=%x [%08x] paddr %llx\n", rail->Name, nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), (long long) paddr); -+ -+ for (r = 0; r < EP_MAX_RAILS; r++) -+ { -+ if ((mask & (1 << r)) == 0) -+ continue; -+ -+ if ((desc->dvma_attrs[pgidx] & (1 << r)) == 0) -+ { -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_map_rails: nmh=%p idx=%x [%08x] paddr=%llx\n", -+ desc->dvma_rails[rnum]->Name, nmh, pgidx, nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), -+ (long long) paddr); -+ -+ rail->Operations.DvmaSetPte (rail, desc->dvma_private[rnum], pgidx, paddr, desc->dvma_perm); -+ -+ desc->dvma_attrs[pgidx] |= (1 << r); -+ } -+ } -+ } -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((mask & (1 << rnum)) != 0) -+ desc->dvma_rails[rnum]->TlbFlushRequired = 1; -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_map_rails: nmd=%08x.%08x.%08x|%04x\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, mask); -+ -+ /* Finally update the network memory descriptor */ -+ nmd->nmd_attr |= mask; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ return (0); -+} -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+#include -+ -+/* Generic rolling checksum algorithm */ -+uint16_t -+rolling_check_sum (char *msg, int nob, uint16_t sum) -+{ -+ while (nob-- > 0) -+ sum = sum * 13 + *msg++; -+ -+ return (sum); -+} -+ -+#if ! defined(NO_RMAP) -+void -+unmap_phys_address(unsigned long phys_addr) -+{ -+ unsigned long pfn = (phys_addr >> PAGE_SHIFT); -+ -+ if (pfn_valid(pfn)) -+ kunmap(pfn_to_page(pfn)); -+} -+ -+void * -+map_phys_address(unsigned long phys_addr) -+{ -+ unsigned long pfn = (phys_addr >> PAGE_SHIFT); -+ -+ if (pfn_valid(pfn)) -+ return kmap(pfn_to_page(pfn)); -+ -+ return NULL; -+} -+#else -+void -+unmap_phys_address(unsigned long phys_addr) -+{ -+ struct page *p = virt_to_page(__va(phys_addr)); -+ -+ if (VALID_PAGE(p)) -+ kunmap(p); -+} -+ -+void * -+map_phys_address(unsigned long phys_addr) -+{ -+ struct page *p = virt_to_page(__va(phys_addr)); -+ -+ if (VALID_PAGE(p)) -+ return kmap(p); -+ -+ return NULL; -+} -+#endif -+ -+uint16_t -+ep_dvma_calc_check_sum (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum) -+{ -+ /* cant be called from an interupt */ -+ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ unsigned start, len; -+ int rnum; -+ register int i; -+ unsigned long flags; -+ EP_RAIL *rail; -+ -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ EPRINTF3 (DBG_KMAP, "ep_dvma_calc_check_sum: nmd=%08x.%08x.%08x \n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ -+ /* find a rail */ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (desc->dvma_attrs[index] & (1 << rnum)) -+ break; -+ -+ ASSERT (rnum != EP_MAX_RAILS); -+ -+ rail = desc->dvma_rails[rnum]; -+ -+ for (i = 0; i <= (npages-1); i++) -+ { -+ int pgidx = (index + i); -+ physaddr_t paddr = rail->Operations.DvmaReadPte (rail, desc->dvma_private[rnum], pgidx); -+ void * virt; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); /* unlock for check sum calc */ -+ -+ virt = map_phys_address(paddr); -+ -+ if (!virt) -+ printk("ep_dvma_calc_check_sum: virt = NULL ! \n"); -+ else { -+ if ( i == 0 ) { -+ /* last bit of the first page */ -+ start = (nmd->nmd_addr & (PAGESIZE - 1)) ; -+ len = PAGESIZE - start; -+ if ( len > nmd->nmd_len) /* less than the remaining page */ -+ len = nmd->nmd_len; -+ } else { -+ if ( i != (npages-1)) { -+ /* all of the middle pages */ -+ start = 0; -+ len = PAGESIZE; -+ } else { -+ /* first bit of the last page */ -+ start = 0; -+ len = ((nmd->nmd_addr + nmd->nmd_len -1) & (PAGESIZE -1)) +1; -+ } -+ } -+ -+ check_sum = rolling_check_sum (((char *)virt)+start, len, check_sum); -+ unmap_phys_address(paddr); -+ -+ /* re aquire the lock */ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ } -+ -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_calc_check_sum: nmh=%p idx=%x [%08x] paddr %llx\n", rail->Name, nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), (long long) paddr); -+ } -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_calc_check_sum: nmd=%08x.%08x.%08x = %d\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, check_sum); -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ return (check_sum); -+} -+#endif -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/kmap_elan3.c linux-2.6.9/drivers/net/qsnet/ep/kmap_elan3.c ---- clean/drivers/net/qsnet/ep/kmap_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kmap_elan3.c 2004-12-14 05:19:23.000000000 -0500 -@@ -0,0 +1,209 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap_elan3.c,v 1.4 2004/12/14 10:19:23 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "kcomm_elan3.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+#define ELAN3_PTES_PER_PAGE (PAGESIZE/ELAN3_PAGE_SIZE) -+ -+#if defined(__LITTLE_ENDIAN__) -+#define PERM_ENDIAN 0 -+#else -+#define PERM_ENDIAN ELAN3_PTE_BIG_ENDIAN -+#endif -+ -+static unsigned int main_permtable[] = -+{ -+ ELAN3_PERM_REMOTEALL, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int sdram_permtable[] = -+{ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEALL, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int io_permtable[] = -+{ -+ ELAN3_PERM_LOCAL_READ, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_ALL */ -+}; -+ -+void -+ep3_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned len, unsigned int perm, int ep_attr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (kaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) kaddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr + off, paddr + off, -+ main_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC | ((ep_attr & EP_NO_SLEEP) ? PTE_NO_SLEEP : 0)); -+ -+ eaddr += PAGESIZE; -+ kaddr += PAGESIZE; -+ } -+} -+ -+void -+ep3_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned len, unsigned int perm, int ep_attr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (saddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = elan3_sdram_to_phys (rail->Device, saddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr+off, paddr+off, -+ sdram_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC | ((ep_attr & EP_NO_SLEEP) ? PTE_NO_SLEEP : 0) ); -+ -+ eaddr += PAGESIZE; -+ saddr += PAGESIZE; -+ } -+} -+ -+void -+ep3_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned len, unsigned int perm) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (ioaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) ioaddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr + off, paddr + off, -+ io_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC); -+ -+ eaddr += PAGESIZE; -+ ioaddr += PAGESIZE; -+ } -+} -+void -+ep3_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned len) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ elan3mmu_unload (rail->Elan3mmu, eaddr, len, PTE_UNLOAD_UNLOCK | PTE_UNLOAD_NOSYNC); -+} -+ -+void * -+ep3_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned npages) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ void *private; -+ -+ KMEM_ALLOC (private, void *, npages * ELAN3_PTES_PER_PAGE * sizeof (sdramaddr_t), 1); -+ -+ if (private == NULL) -+ return NULL; -+ -+ elan3mmu_reserve (rail->Elan3mmu, eaddr, npages * ELAN3_PTES_PER_PAGE, (sdramaddr_t *) private); -+ -+ return private; -+} -+ -+void -+ep3_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned npages, void *private) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ elan3mmu_release (rail->Elan3mmu, eaddr, npages * ELAN3_PTES_PER_PAGE, (sdramaddr_t *) private); -+ -+ KMEM_FREE (private, npages * ELAN3_PTES_PER_PAGE * sizeof (sdramaddr_t)); -+} -+ -+void -+ep3_dvma_set_pte (EP_RAIL *r, void *private, unsigned index, physaddr_t paddr, unsigned int perm) -+{ -+ ELAN3_DEV *dev = ((EP3_RAIL *) r)->Device; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ int off; -+ -+ for (off =0 ; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ { -+ ELAN3_PTE newpte = elan3mmu_phys_to_pte (dev, paddr + off, main_permtable[perm]) | ELAN3_PTE_REF | ELAN3_PTE_MOD; -+ -+ elan3_writepte (dev, *ptep, newpte); -+ -+ ptep++; -+ } -+} -+ -+physaddr_t -+ep3_dvma_read_pte (EP_RAIL *r, void *private, unsigned index) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ ELAN3_PTE pte = elan3_readpte (rail->Device, *ptep); -+ -+ return pte & ELAN3_PTE_PFN_MASK; -+} -+ -+void -+ep3_dvma_unload (EP_RAIL *r, void *private, unsigned index, unsigned npages) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ ELAN3_PTE tpte = elan3mmu_kernel_invalid_pte (rail->Elan3mmu); -+ int i; -+ -+ for (i = (npages * ELAN3_PTES_PER_PAGE) - 1; i >= 0; i--) -+ elan3_writepte (rail->Device, ptep[i], tpte); -+} -diff -urN clean/drivers/net/qsnet/ep/kmap_elan4.c linux-2.6.9/drivers/net/qsnet/ep/kmap_elan4.c ---- clean/drivers/net/qsnet/ep/kmap_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kmap_elan4.c 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,224 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap_elan4.c,v 1.12.2.1 2005/07/20 11:35:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+#include "kcomm_elan4.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+static unsigned int main_permtable[] = -+{ -+ PERM_Unused, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_DataReadWrite, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int sdram_permtable[] = -+{ -+ PERM_LocExecute, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_RemoteAll, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int cport_permtable[] = -+{ -+ PERM_Unused, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_Unused, /* EP_PERM_ALL */ -+}; -+ -+void -+ep4_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (kaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) kaddr); -+ -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, main_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, HE_TYPE_OTHER, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ kaddr += PAGESIZE; -+ } -+} -+ -+void -+ep4_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (saddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ if ((eaddr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (saddr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT))) -+ printk ("ep4_sdram_map: eaddr=%x saddr=%lx - incorrectly alised\n", eaddr, saddr); -+ -+ for (i = 0; i < npages; i++) -+ { -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = ((saddr + off) >> PTE_PADDR_SHIFT) | PTE_SetPerm (sdram_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, HE_TYPE_OTHER, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ saddr += PAGESIZE; -+ } -+} -+ -+void -+ep4_cport_map (EP_RAIL *r, EP_ADDR eaddr, unsigned long cqaddr, unsigned int len, unsigned int perm) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (cqaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = ((cqaddr + off) >> PTE_PADDR_SHIFT) | PTE_SetPerm(cport_permtable[perm]) | PTE_CommandQueue; -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, HE_TYPE_OTHER, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ cqaddr += PAGESIZE; -+ } -+} -+void -+ep4_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ elan4mmu_unload_range (&rail->r_ctxt, 0, eaddr, len); -+} -+ -+void * -+ep4_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_reserve: eaddr=%x npages=%d (=> %d)\n", eaddr, npages, (npages << (PAGE_SHIFT - dev->dev_pageshift[0]))); -+ -+ return elan4mmu_reserve (&rail->r_ctxt, 0, (E4_Addr) eaddr, (npages << (PAGE_SHIFT - dev->dev_pageshift[0])), 1); -+} -+ -+void -+ep4_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_release: eaddr=%x npages=%d private=%p\n", eaddr, npages, private); -+ -+ elan4mmu_release (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private); -+} -+ -+void -+ep4_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int off; -+ unsigned long flags; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_set_pte: index %x -> eaddr %llx paddr %llx\n", -+ index, (long long)(((ELAN4_HASH_CACHE *) private)->hc_start + (index * PAGE_SIZE)), (long long) paddr); -+ -+ local_irq_save (flags); -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, main_permtable[perm]); -+ -+ elan4mmu_set_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, (index << (PAGE_SHIFT - dev->dev_pageshift[0])) + -+ (off >> dev->dev_pageshift[0]), newpte); -+ } -+ local_irq_restore (flags); -+} -+ -+physaddr_t -+ep4_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_uint64 pte; -+ unsigned long flags; -+ -+ local_irq_save (flags); -+ pte = elan4mmu_get_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, index << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ local_irq_restore (flags); -+ -+ return elan4mmu_pte2phys (dev, pte); -+} -+ -+void -+ep4_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP_ADDR eaddr = ((ELAN4_HASH_CACHE *) private)->hc_start + (index * PAGE_SIZE); -+ unsigned long idx = (index << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ unsigned long lim = idx + (npages << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ unsigned long flags; -+ -+ EPRINTF5 (DBG_KMAP, "ep4_dvma_unload: eaddr %x -> %lx : index=%d idx=%ld lim=%ld\n", -+ eaddr, (unsigned long)(eaddr + (npages * PAGE_SIZE)), index, idx, lim); -+ -+ local_irq_save (flags); -+ for (; idx < lim; idx++) -+ elan4mmu_clear_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, idx); -+ local_irq_restore (flags); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/kmsg_elan3.c linux-2.6.9/drivers/net/qsnet/ep/kmsg_elan3.c ---- clean/drivers/net/qsnet/ep/kmsg_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kmsg_elan3.c 2005-07-19 10:26:48.000000000 -0400 -@@ -0,0 +1,348 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmsg_elan3.c,v 1.4.2.1 2005/07/19 14:26:48 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+static void -+ep3_inputq_event (EP3_RAIL *rail, void *arg) -+{ -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) arg; -+ -+ (*inputq->q_callback)((EP_RAIL *)rail, inputq->q_arg); -+} -+ -+static EP3_COOKIE_OPS ep3_inputq_cookie_ops = -+{ -+ ep3_inputq_event, -+}; -+ -+EP_INPUTQ * -+ep3_alloc_inputq (EP_RAIL *r, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq; -+ EP3_InputQueue qdesc; -+ void *slots; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (inputq, EP3_INPUTQ *, sizeof (EP3_INPUTQ), TRUE); -+ -+ if (inputq == NULL) -+ return (EP_INPUTQ *) NULL; -+ -+ if ((slots = ep_alloc_main (&rail->Generic, slotSize * slotCount, 0, &inputq->q_slotsAddr)) == NULL) -+ { -+ KMEM_FREE (inputq, sizeof (EP3_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ inputq->q_slotSize = slotSize; -+ inputq->q_slotCount = slotCount; -+ inputq->q_callback = callback; -+ inputq->q_arg = arg; -+ inputq->q_slots = slots; -+ -+ /* Initialise all the slots to be "unreceived" */ -+ for (i = 0; i < slotCount; i++) -+ ((uint32_t *) ((unsigned long) slots + (i+1) * slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ inputq->q_base = inputq->q_slotsAddr; -+ inputq->q_top = inputq->q_base + (slotCount-1) * slotSize; -+ inputq->q_fptr = inputq->q_base; -+ inputq->q_desc = EP_SYSTEMQ_DESC(rail->QueueDescs, qnum); -+ inputq->q_descAddr = EP_SYSTEMQ_ADDR (qnum); -+ -+ if (callback) -+ RegisterCookie (&rail->CookieTable, &inputq->q_cookie, inputq->q_descAddr, &ep3_inputq_cookie_ops, inputq); -+ -+ /* Initialise the input queue descriptor */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_bptr = inputq->q_base + slotSize; -+ qdesc.q_fptr = inputq->q_fptr; -+ qdesc.q_base = inputq->q_base; -+ qdesc.q_top = inputq->q_top; -+ qdesc.q_size = slotSize; -+ qdesc.q_event.ev_Count = 1; -+ qdesc.q_event.ev_Type = callback ? EV_TYPE_EVIRQ | inputq->q_cookie.Cookie : 0; -+ qdesc.q_wevent = inputq->q_descAddr + offsetof (EP3_InputQueue, q_event); -+ qdesc.q_wcount = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, inputq->q_desc, sizeof (EP3_InputQueue)); -+ -+ return (EP_INPUTQ *) inputq; -+} -+ -+void -+ep3_free_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ -+ ep_free_main (&rail->Generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ if (inputq->q_callback) -+ DeregisterCookie (&rail->CookieTable, &inputq->q_cookie); -+ -+ KMEM_FREE (inputq, sizeof (EP3_INPUTQ)); -+} -+ -+void -+ep3_enable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ -+ elan3_sdram_writel (rail->Device, inputq->q_desc + offsetof (EP3_InputQueue, q_state), 0); -+} -+ -+void -+ep3_disable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ EP3_InputQueue qdesc; -+ -+ /* mark the queue as locked */ -+ SetQueueLocked (rail, inputq->q_desc); -+ -+ /* re-initialise the queue as empty */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_bptr = (E3_Addr) inputq->q_base + inputq->q_slotSize; -+ qdesc.q_fptr = inputq->q_fptr; -+ qdesc.q_base = inputq->q_base; -+ qdesc.q_top = inputq->q_top; -+ qdesc.q_size = inputq->q_slotSize; -+ qdesc.q_event.ev_Count = 1; -+ qdesc.q_event.ev_Type = inputq->q_callback ? EV_TYPE_EVIRQ | inputq->q_cookie.Cookie : 0; -+ qdesc.q_wevent = inputq->q_descAddr + offsetof (EP3_InputQueue, q_event); -+ qdesc.q_wcount = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, inputq->q_desc, sizeof (EP3_InputQueue)); -+} -+ -+int -+ep3_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ sdramaddr_t qdesc = inputq->q_desc; -+ E3_Addr nfptr; -+ int count = 0; -+ E3_uint32 state; -+ int delay; -+ -+ run_again_because_of_eventqueue_overflow: -+ nfptr = inputq->q_fptr + inputq->q_slotSize; -+ if (nfptr > inputq->q_top) -+ nfptr = inputq->q_base; -+ -+ while (nfptr != elan3_sdram_readl (rail->Device, qdesc + offsetof (EP3_InputQueue, q_bptr))) /* PCI read */ -+ { -+ unsigned long slot = (unsigned long) inputq->q_slots + (nfptr - inputq->q_base); -+ -+ /* Poll the final word of the message until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; ((uint32_t *) (slot + inputq->q_slotSize))[-1] == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ /* Call the message handler */ -+ (*handler) (r, arg, (void *) slot); -+ -+ /* reset the last word of the slot to "unreceived" */ -+ ((uint32_t *) (slot + inputq->q_slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ state = elan3_sdram_readl (rail->Device, qdesc + offsetof (EP3_InputQueue, q_state)); /* PCI read */ -+ if ((state & E3_QUEUE_FULL) == 0) -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_fptr), nfptr); /* PCI write */ -+ else -+ { -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_fptr), nfptr); /* PCI write */ -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_state), (state & ~E3_QUEUE_FULL)); /* PCI write */ -+ } -+ inputq->q_fptr = nfptr; -+ -+ nfptr += roundup (inputq->q_slotSize, E3_BLK_ALIGN); -+ if (nfptr > inputq->q_top) -+ nfptr = inputq->q_base; -+ -+ if (++count >= maxCount && maxCount) -+ break; -+ } -+ -+ if (inputq->q_callback && count != 0) -+ { -+ if (count != inputq->q_waitCount) -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_wcount), inputq->q_waitCount = count); -+ -+ if (IssueWaitevent (rail, inputq->q_descAddr + offsetof (EP3_InputQueue, q_wevent)) == ISSUE_COMMAND_TRAPPED) -+ goto run_again_because_of_eventqueue_overflow; -+ } -+ -+ return count; -+} -+ -+#define Q_EVENT(q,slotNum) ((q)->q_elan + (slotNum) * sizeof (E3_BlockCopyEvent)) -+#define Q_EVENT_ADDR(q,slotNum) ((q)->q_elanAddr + (slotNum) * sizeof (E3_BlockCopyEvent)) -+#define Q_MSG(q,slotNum) (void *)((q)->q_main + (slotNum) * (q)->q_slotSize) -+#define Q_MSG_ADDR(q,slotNum) ((q)->q_mainAddr + (slotNum) * (q)->q_slotSize) -+#define Q_DONE(q,slotNum) (*((int *)((q)->q_main + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E3_uint32)))) -+#define Q_DONE_ADDR(q,slotNum) ((q)->q_mainAddr + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E3_uint32)) -+ -+#define Q_ELAN_SIZE(q) ((q)->q_slotCount * sizeof (E3_BlockCopyEvent)) -+#define Q_MAIN_SIZE(q) ((q)->q_slotCount * ((q)->q_slotSize + sizeof (E3_uint32))) -+ -+static void -+ep3_outputq_retry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int error) -+{ -+ E3_DMA_BE *dmabe = (E3_DMA_BE *) dma; -+ sdramaddr_t event = ep_elan2sdram (&rail->Generic, dmabe->s.dma_srcEvent); -+ E3_Addr done = elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Dest)); -+ E3_uint32 *donep = ep_elan2main (&rail->Generic, done & ~EV_BCOPY_DTYPE_MASK); -+ -+ EPRINTF1 (DBG_KMSG, "ep3_ouputq_retry: donep at %p -> FAILED\n", donep); -+ -+ *donep = EP3_EVENT_FAILED; -+} -+ -+static EP3_COOKIE_OPS ep3_outputq_cookie_ops = -+{ -+ NULL, /* Event */ -+ ep3_outputq_retry, -+ NULL, /* DmaCancelled */ -+ NULL, /* DmaVerify */ -+}; -+ -+EP_OUTPUTQ * -+ep3_alloc_outputq (EP_RAIL *r, unsigned slotSize, unsigned slotCount) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq; -+ int i; -+ E3_BlockCopyEvent event; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (outputq, EP3_OUTPUTQ *, sizeof (EP3_OUTPUTQ), 1); -+ -+ if (outputq == NULL) -+ return NULL; -+ -+ outputq->q_slotCount = slotCount; -+ outputq->q_slotSize = slotSize; -+ -+ outputq->q_elan = ep_alloc_elan (r, Q_ELAN_SIZE(outputq), 0, &outputq->q_elanAddr); -+ -+ if (outputq->q_elan == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+ return NULL; -+ } -+ -+ outputq->q_main = ep_alloc_main (r, Q_MAIN_SIZE(outputq), 0, &outputq->q_mainAddr); -+ -+ if (outputq->q_main == (void *) NULL) -+ { -+ ep_free_elan (r, outputq->q_elanAddr, Q_ELAN_SIZE(outputq)); -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+ return NULL; -+ } -+ -+ RegisterCookie (&rail->CookieTable, &outputq->q_cookie, outputq->q_elanAddr, &ep3_outputq_cookie_ops, outputq); -+ -+ for (i = 0; i < slotCount; i++) -+ { -+ EP3_INIT_COPY_EVENT (event, outputq->q_cookie, Q_DONE_ADDR(outputq, i), 0); -+ -+ Q_DONE(outputq, i) = outputq->q_cookie.Cookie; -+ -+ elan3_sdram_copyl_to_sdram (rail->Device, &event, Q_EVENT(outputq, i), sizeof (E3_BlockCopyEvent)); -+ } -+ -+ return (EP_OUTPUTQ *) outputq; -+} -+ -+void -+ep3_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq = (EP3_OUTPUTQ *) q; -+ -+ DeregisterCookie (&rail->CookieTable, &outputq->q_cookie); -+ -+ ep_free_main (r, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ ep_free_elan (r, outputq->q_elanAddr, Q_ELAN_SIZE(outputq)); -+ -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+} -+ -+void * -+ep3_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ return Q_MSG ((EP3_OUTPUTQ *) q, slotNum); -+} -+ -+int -+ep3_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ switch (Q_DONE((EP3_OUTPUTQ *) q, slotNum)) -+ { -+ case EP3_EVENT_ACTIVE: -+ return EP_OUTPUTQ_BUSY; -+ -+ case EP3_EVENT_FAILED: -+ return EP_OUTPUTQ_FAILED; -+ -+ default: -+ return EP_OUTPUTQ_FINISHED; -+ } -+} -+ -+int -+ep3_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq = (EP3_OUTPUTQ *) q; -+ unsigned base = outputq->q_slotSize - roundup (size, E3_BLK_ALIGN); -+ E3_DMA_BE dmabe; -+ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_QUEUED, retries); -+ dmabe.s.dma_size = roundup (size, E3_BLK_ALIGN); -+ dmabe.s.dma_source = Q_MSG_ADDR(outputq, slotNum) + base; -+ dmabe.s.dma_dest = base; -+ dmabe.s.dma_destEvent = EP_SYSTEMQ_ADDR(qnum); -+ dmabe.s.dma_destCookieVProc = vp; -+ dmabe.s.dma_srcEvent = Q_EVENT_ADDR(outputq, slotNum); -+ dmabe.s.dma_srcCookieVProc = 0; -+ -+ Q_DONE(outputq, slotNum) = EP3_EVENT_ACTIVE; -+ -+ elan3_sdram_writel (rail->Device, Q_EVENT(outputq, slotNum), 1); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_CRITICAL, FALSE) != ISSUE_COMMAND_OK) -+ { -+ Q_DONE(outputq, slotNum) = EP3_EVENT_FAILED; -+ return FALSE; -+ } -+ -+ return TRUE; -+} -diff -urN clean/drivers/net/qsnet/ep/kmsg_elan4.c linux-2.6.9/drivers/net/qsnet/ep/kmsg_elan4.c ---- clean/drivers/net/qsnet/ep/kmsg_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kmsg_elan4.c 2005-02-28 09:05:38.000000000 -0500 -@@ -0,0 +1,418 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmsg_elan4.c,v 1.10 2005/02/28 14:05:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+ -+#include -+ -+static void -+ep4_inputq_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) arg; -+ -+ /* mark the queue as "fired" to cause a single waitevent -+ * to be issued next time the queue is polled */ -+ atomic_inc (&inputq->q_fired); -+ -+ (*inputq->q_callback)(&rail->r_generic, inputq->q_arg); -+} -+ -+EP_INPUTQ * -+ep4_alloc_inputq (EP_RAIL *r, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq; -+ E4_Event32 qevent; -+ void *slots; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (inputq, EP4_INPUTQ *, sizeof (EP4_INPUTQ), 1); -+ -+ if (inputq == NULL) -+ return (EP_INPUTQ *) NULL; -+ -+ if ((slots = ep_alloc_main (&rail->r_generic, slotSize * slotCount, 0, &inputq->q_slotsAddr)) == NULL) -+ { -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ inputq->q_slotSize = slotSize; -+ inputq->q_slotCount = slotCount; -+ inputq->q_callback = callback; -+ inputq->q_arg = arg; -+ inputq->q_slots = slots; -+ -+ /* Initialise all the slots to be "unreceived" */ -+ for (i = 0; i < slotCount; i++) -+ ((uint32_t *) ((unsigned long) slots + (i+1) * slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ inputq->q_last = inputq->q_slotsAddr + (slotCount-1) * slotSize; -+ inputq->q_fptr = inputq->q_slotsAddr; -+ inputq->q_desc = EP_SYSTEMQ_DESC (rail->r_queuedescs, qnum); -+ inputq->q_descAddr = EP_SYSTEMQ_ADDR (qnum); -+ inputq->q_eventAddr = rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_qevents[qnum]); -+ -+ if (callback) -+ { -+ if ((inputq->q_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1)) == 0) -+ { -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ if ((inputq->q_wcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4)) == 0) -+ { -+ ep4_put_ecq (rail, inputq->q_ecq, 1); -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ ep4_register_intcookie (rail, &inputq->q_intcookie, inputq->q_descAddr, ep4_inputq_interrupt, inputq); -+ -+ inputq->q_count = 0; -+ -+ atomic_set (&inputq->q_fired, 0); -+ -+ /* Initialise the queue event */ -+ qevent.ev_CountAndType = E4_EVENT_INIT_VALUE (callback ? -32 : 0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0); -+ qevent.ev_WritePtr = inputq->q_ecq->ecq_addr; -+ qevent.ev_WriteValue = (inputq->q_intcookie.int_val << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD; -+ } -+ -+ /* copy the event down to sdram */ -+ elan4_sdram_copyq_to_sdram (rail->r_ctxt.ctxt_dev, &qevent, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_qevents[qnum]), sizeof (E4_Event32)); -+ -+ return (EP_INPUTQ *) inputq; -+} -+ -+void -+ep4_free_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ if (inputq->q_callback) -+ { -+ ep4_deregister_intcookie (rail, &inputq->q_intcookie); -+ ep4_put_ecq (rail, inputq->q_ecq, 1); -+ ep4_put_ecq (rail, inputq->q_wcq, 4); -+ } -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+} -+ -+void -+ep4_enable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ EP_ADDR lastSlot = inputq->q_slotsAddr + (inputq->q_slotCount-1) * inputq->q_slotSize; -+ E4_InputQueue qdesc; -+ -+ qdesc.q_bptr = inputq->q_slotsAddr; -+ qdesc.q_fptr = inputq->q_slotsAddr; -+ qdesc.q_control = E4_InputQueueControl (inputq->q_slotsAddr, lastSlot, inputq->q_slotSize); -+ qdesc.q_event = inputq->q_callback ? inputq->q_eventAddr : 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ ep4_write_qdesc (rail, inputq->q_desc, &qdesc); -+ -+ EPRINTF5 (DBG_KMSG, "ep_enable_inputq: %x - %016llx %016llx %016llx %016llx\n", (int) inputq->q_descAddr, -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 0), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 8), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 16), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 24)); -+} -+ -+void -+ep4_disable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ E4_InputQueue qdesc; -+ -+ /* Initialise the input queue descriptor as "full" with no event */ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl(qdesc.q_bptr, qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ ep4_write_qdesc (rail, inputq->q_desc, &qdesc); -+} -+ -+int -+ep4_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ sdramaddr_t qdesc = inputq->q_desc; -+ E4_Addr fptr = inputq->q_fptr; -+ E4_Addr bptr = elan4_sdram_readl (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ int count = 0; -+ int delay; -+ -+ while (bptr != 0 && fptr != bptr) -+ { -+ while (fptr != bptr) -+ { -+ unsigned long slot = (unsigned long) inputq->q_slots + (fptr - inputq->q_slotsAddr); -+ -+ /* Poll the final word of the message until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; ((uint32_t *) (slot + inputq->q_slotSize))[-1] == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ EPRINTF4(DBG_KMSG, "ep4_poll_inputq: %x slot %d of %d [%08x]\n", (int)inputq->q_descAddr, -+ ((int)(fptr - inputq->q_slotsAddr))/inputq->q_slotSize, -+ inputq->q_slotCount, ((uint32_t *) (slot + inputq->q_slotSize))[-1]); -+ -+ /* Call the message handler */ -+ (*handler) (r, arg, (void *) slot); -+ -+ /* reset the last word of the slot to "unreceived" */ -+ ((uint32_t *) (slot + inputq->q_slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ /* move on the front pointer */ -+ fptr = (fptr == inputq->q_last) ? inputq->q_slotsAddr : fptr + inputq->q_slotSize; -+ -+ elan4_sdram_writel (dev, qdesc + offsetof (E4_InputQueue, q_fptr), fptr); -+ -+ inputq->q_count++; -+ -+ if (++count >= maxCount && maxCount) -+ { -+ inputq->q_fptr = fptr; -+ -+ return count; -+ } -+ } -+ -+ bptr = elan4_sdram_readl (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ } -+ -+ inputq->q_fptr = fptr; -+ -+ /* Only insert a single wait event command if the callback has -+ * occured, otherwise just acrue the count as we've just periodically -+ * polled it. -+ */ -+ if (inputq->q_callback && atomic_read (&inputq->q_fired)) -+ { -+ atomic_dec (&inputq->q_fired); -+ -+ ep4_wait_event_cmd (inputq->q_wcq, inputq->q_eventAddr, -+ E4_EVENT_INIT_VALUE (-inputq->q_count << 5, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), -+ inputq->q_ecq->ecq_addr, -+ (inputq->q_intcookie.int_val << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD); -+ -+ inputq->q_count = 0; -+ } -+ -+ return count; -+} -+ -+#define Q_MSG(q,slotNum) (unsigned long)((q)->q_main + (slotNum) * (q)->q_slotSize) -+#define Q_MSG_ADDR(q,slotNum) ((q)->q_mainAddr + (slotNum) * (q)->q_slotSize) -+#define Q_DONE(q,slotNum) *((E4_uint64 *)((q)->q_main + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E4_uint64))) -+#define Q_DONE_ADDR(q,slotNum) ((q)->q_mainAddr + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E4_uint64)) -+ -+#define Q_MAIN_SIZE(q) ((q)->q_slotCount * ((q)->q_slotSize + sizeof (E4_uint64))) -+ -+#define Q_DONE_VAL(val,cnt) ((cnt) << 16 | (val)) -+#define Q_DONE_RET(done) ((int) ((done) & 0xffff)) -+#define Q_DONE_CNT(done) ((int) ((done) >> 16)) -+ -+EP_OUTPUTQ * -+ep4_alloc_outputq (EP_RAIL *r, unsigned slotSize, unsigned slotCount) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_OUTPUTQ *outputq; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (outputq, EP4_OUTPUTQ *, sizeof (EP4_OUTPUTQ), 1); -+ -+ if (outputq == NULL) -+ return NULL; -+ -+ spin_lock_init (&outputq->q_lock); -+ -+ outputq->q_slotCount = slotCount; -+ outputq->q_slotSize = slotSize; -+ outputq->q_main = ep_alloc_main (r, Q_MAIN_SIZE(outputq), 0, &outputq->q_mainAddr); -+ -+ if (outputq->q_main == (E4_uint64 *) NULL) -+ { -+ KMEM_FREE (outputq, sizeof (EP_OUTPUTQ)); -+ return NULL; -+ } -+ -+ outputq->q_cq = elan4_alloccq (&rail->r_ctxt, CQ_Size64K, CQ_STENEnableBit | CQ_WriteEnableBit, CQ_Priority); -+ -+ if (outputq->q_cq == (ELAN4_CQ *) NULL) -+ { -+ ep_free_main (&rail->r_generic, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ -+ KMEM_FREE (outputq, sizeof (EP_OUTPUTQ)); -+ } -+ -+ outputq->q_dwords = CQ_Size (outputq->q_cq->cq_size) >> 3; -+ -+ /* mark all the queue slots as finished */ -+ for (i = 0; i < slotCount; i++) -+ Q_DONE(outputq, i) = Q_DONE_VAL (EP_OUTPUTQ_FINISHED, 0); -+ -+ return (EP_OUTPUTQ *) outputq; -+} -+ -+void -+ep4_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_OUTPUTQ *outputq = (EP4_OUTPUTQ *) q; -+ -+ elan4_freecq (&rail->r_ctxt, outputq->q_cq); -+ -+ ep_free_main (&rail->r_generic, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ -+ spin_lock_destroy (&outputq->q_lock); -+ -+ KMEM_FREE (outputq, sizeof (EP4_OUTPUTQ)); -+} -+ -+void * -+ep4_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ return (void *) Q_MSG ((EP4_OUTPUTQ *) q, slotNum); -+} -+ -+int -+ep4_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ EPRINTF2 (DBG_KMSG, "ep4_outputq_state: slotNum %d state %x\n", slotNum, (int)Q_DONE((EP4_OUTPUTQ *) q, slotNum)); -+ -+ return Q_DONE_RET(Q_DONE((EP4_OUTPUTQ *)q, slotNum)); -+} -+ -+int -+ep4_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries) -+{ -+ EP4_OUTPUTQ *outputq = (EP4_OUTPUTQ *) q; -+ unsigned int nbytes = roundup (size, 32); -+ unsigned int base = outputq->q_slotSize - nbytes; -+ unsigned int i, dwords; -+ unsigned long flags; -+ E4_uint64 val; -+ -+ spin_lock_irqsave (&outputq->q_lock, flags); -+ -+ EPRINTF4 (DBG_KMSG, "ep4_outputq_send: slotNum=%d size=%d vp=%d qnum=%d\n", slotNum, size, vp, qnum); -+ -+ /* compute command queue size as follows - each slot uses -+ * overhead: 14 dwords + -+ * data > 128 ? 36 dwords -+ * data > 64 ? 18 dwords -+ * data > 32 ? 10 dwords -+ * else 6 dwords -+ */ -+ dwords = 14 + (size > 128 ? 36 : -+ size > 64 ? 18 : -+ size ? 10 : 6); -+ -+ outputq->q_dwords += Q_DONE_CNT (Q_DONE(outputq, slotNum)); -+ -+ if (dwords > outputq->q_dwords) -+ { -+ /* attempt to reclaim command queue space from other slots */ -+ i = slotNum; -+ do { -+ if (++i == outputq->q_slotCount) -+ i = 0; -+ -+ val = Q_DONE(outputq, i); -+ -+ if ((Q_DONE_RET (val) == EP_OUTPUTQ_FINISHED || Q_DONE_RET (val) == EP_OUTPUTQ_FAILED) && Q_DONE_CNT(val) > 0) -+ { -+ outputq->q_dwords += Q_DONE_CNT (val); -+ -+ Q_DONE(outputq, i) = Q_DONE_VAL(Q_DONE_RET(val), 0); -+ } -+ } while (i != slotNum && dwords > outputq->q_dwords); -+ } -+ -+ if (dwords > outputq->q_dwords) -+ { -+ spin_unlock_irqrestore (&outputq->q_lock, flags); -+ -+ EPRINTF0 (DBG_KMSG, "ep4_outputq_state: no command queue space\n"); -+ return 0; -+ } -+ -+ outputq->q_dwords -= dwords; -+ -+ Q_DONE(outputq, slotNum) = Q_DONE_VAL (EP_OUTPUTQ_BUSY, dwords); -+ -+ if (outputq->q_retries != retries) -+ { -+ outputq->q_retries = retries; -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL(1) | GUARD_RESET(retries)); -+ elan4_nop_cmd (outputq->q_cq, 0); -+ } -+ -+ /* transfer the top "size" bytes from message buffer to top of input queue */ -+ elan4_open_packet (outputq->q_cq, OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, vp)); -+ elan4_sendtrans0 (outputq->q_cq, TR_INPUT_Q_GETINDEX, EP_SYSTEMQ_ADDR(qnum)); -+ -+ /* send upto EP_SYSTEMQ_MSG_MAX (256) bytes of message to the top of the slot */ -+ if (size > 128) -+ { -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base + 0, (void *) (Q_MSG(outputq, slotNum) + base + 0)); -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base + 128, (void *) (Q_MSG(outputq, slotNum) + base + 128)); -+ } -+ else if (size > 64) -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ else if (size > 32) -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (64 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ else -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (32 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ elan4_sendtrans1 (outputq->q_cq, TR_INPUT_Q_COMMIT, EP_SYSTEMQ_ADDR(qnum), 0 /* no cookie */); -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (outputq->q_retries)); -+ elan4_write_dword_cmd (outputq->q_cq, Q_DONE_ADDR(outputq, slotNum), Q_DONE_VAL (EP_OUTPUTQ_FINISHED, dwords)); -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (outputq->q_retries)); -+ elan4_write_dword_cmd (outputq->q_cq, Q_DONE_ADDR(outputq, slotNum), Q_DONE_VAL (EP_OUTPUTQ_FAILED, dwords)); -+ -+ spin_unlock_irqrestore (&outputq->q_lock, flags); -+ -+ return 1; -+} -diff -urN clean/drivers/net/qsnet/ep/kthread.c linux-2.6.9/drivers/net/qsnet/ep/kthread.c ---- clean/drivers/net/qsnet/ep/kthread.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kthread.c 2004-05-19 04:54:57.000000000 -0400 -@@ -0,0 +1,186 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kthread.c,v 1.5 2004/05/19 08:54:57 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.c,v $*/ -+ -+#include -+ -+#include -+ -+void -+ep_kthread_init (EP_KTHREAD *kt) -+{ -+ spin_lock_init (&kt->lock); -+ kcondvar_init (&kt->wait); -+ -+ kt->next_run = 0; -+ kt->should_stall = 0; -+ kt->started = 0; -+ kt->should_stop = 0; -+ kt->stopped = 0; -+ kt->state = KT_STATE_RUNNING; -+} -+ -+void -+ep_kthread_destroy (EP_KTHREAD *kt) -+{ -+ spin_lock_destroy (&kt->lock); -+ kcondvar_destroy (&kt->wait); -+} -+ -+void -+ep_kthread_started (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->started = 1; -+ spin_unlock_irqrestore(&kt->lock, flags); -+} -+ -+void -+ep_kthread_stopped (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->stopped = 1; -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ spin_unlock_irqrestore(&kt->lock, flags); -+} -+ -+int -+ep_kthread_should_stall (EP_KTHREAD *kth) -+{ -+ return (kth->should_stall); -+} -+ -+int -+ep_kthread_sleep (EP_KTHREAD *kt, long next_run) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (next_run && (kt->next_run == 0 || BEFORE (next_run, kt->next_run))) -+ kt->next_run = next_run; -+ -+ if (kt->should_stop) -+ { -+ spin_unlock_irqrestore (&kt->lock, flags); -+ return (-1); -+ } -+ -+ do { -+ if (kt->should_stall) -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ -+ kt->state = KT_STATE_SLEEPING; -+ kt->running = 0; -+ if (kt->should_stall || kt->next_run == 0) -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ else -+ kcondvar_timedwait (&kt->wait,&kt->lock, &flags, kt->next_run); -+ kt->state = KT_STATE_RUNNING; -+ kt->running = lbolt; -+ } while (kt->should_stall); -+ kt->next_run = 0; -+ spin_unlock_irqrestore (&kt->lock, flags); -+ -+ return (0); -+} -+ -+void -+ep_kthread_schedule (EP_KTHREAD *kt, long tick) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (kt->next_run == 0 || BEFORE (tick, kt->next_run)) -+ { -+ kt->next_run = tick; -+ if (!kt->should_stall && kt->state == KT_STATE_SLEEPING) -+ { -+ kt->state = KT_STATE_SCHEDULED; -+ kcondvar_wakeupone (&kt->wait, &kt->lock); -+ } -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_stall (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (kt->should_stall++ == 0) -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ -+ while (kt->state != KT_STATE_SLEEPING) -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_resume (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (--kt->should_stall == 0) -+ { -+ kt->state = KT_STATE_SCHEDULED; -+ kcondvar_wakeupone (&kt->wait, &kt->lock); -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_stop (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->should_stop = 1; -+ while (kt->started && !kt->stopped) -+ { -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+int -+ep_kthread_state (EP_KTHREAD *kt, long *time) -+{ -+ unsigned long flags; -+ int res = KT_STATE_SLEEPING; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ -+ if (kt->next_run) { -+ *time = kt->next_run; -+ res = kt->should_stall ? KT_STATE_STALLED : KT_STATE_SCHEDULED; -+ } -+ -+ if (kt->running) { -+ *time = kt->running; -+ res = KT_STATE_RUNNING; -+ } -+ -+ spin_unlock_irqrestore (&kt->lock, flags); -+ -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/kthread.h linux-2.6.9/drivers/net/qsnet/ep/kthread.h ---- clean/drivers/net/qsnet/ep/kthread.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/kthread.h 2004-05-06 10:24:08.000000000 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KTHREAD_H -+#define __ELAN3_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.4 2004/05/06 14:24:08 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.h,v $*/ -+ -+typedef struct ep_kthread -+{ -+ kcondvar_t wait; /* place to sleep */ -+ spinlock_t lock; /* and lock */ -+ long next_run; /* tick when thread should next run */ -+ long running; /* tick when thread started to run */ -+ unsigned short should_stall; -+ unsigned char state; -+ unsigned int started:1; -+ unsigned int should_stop:1; -+ unsigned int stopped:1; -+} EP_KTHREAD; -+ -+#define KT_STATE_SLEEPING 0 -+#define KT_STATE_SCHEDULED 1 -+#define KT_STATE_RUNNING 2 -+#define KT_STATE_STALLED 3 -+ -+#define AFTER(a, b) ((((long)(a)) - ((long)(b))) > 0) -+#define BEFORE(a,b) ((((long)(a)) - ((long)(b))) < 0) -+ -+extern void ep_kthread_init (EP_KTHREAD *kt); -+extern void ep_kthread_destroy (EP_KTHREAD *kt); -+extern void ep_kthread_started (EP_KTHREAD *kt); -+extern void ep_kthread_stopped (EP_KTHREAD *kt); -+extern int ep_kthread_should_stall (EP_KTHREAD *kth); -+extern int ep_kthread_sleep (EP_KTHREAD *kth, long next_run); -+extern void ep_kthread_schedule (EP_KTHREAD *kt, long when); -+extern void ep_kthread_stall (EP_KTHREAD *kth); -+extern void ep_kthread_resume (EP_KTHREAD *kt); -+extern void ep_kthread_stop (EP_KTHREAD *kt); -+extern int ep_kthread_state (EP_KTHREAD *kt, long *time); -+#endif /* __ELAN3_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/Makefile linux-2.6.9/drivers/net/qsnet/ep/Makefile ---- clean/drivers/net/qsnet/ep/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/Makefile 2005-10-10 17:47:31.000000000 -0400 -@@ -0,0 +1,17 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/ep/Makefile -+# -+ -+ -+ep3-$(CONFIG_ELAN3) := kcomm_elan3.o kmsg_elan3.o kmap_elan3.o neterr_elan3.o probenetwork_elan3.o support_elan3.o threadcode_elan3.o threadcode_elan3_Linux.o epcomms_elan3.o epcommsTx_elan3.o epcommsRx_elan3.o -+ep4-$(CONFIG_ELAN4) := kcomm_elan4.o kmsg_elan4.o kmap_elan4.o neterr_elan4.o probenetwork_elan4.o commands_elan4.o debug_elan4.o support_elan4.o threadcode_elan4_Linux.o epcomms_elan4.o epcommsTx_elan4.o epcommsRx_elan4.o -+# -+ -+obj-$(CONFIG_EP) += ep.o -+ep-objs := cm.o debug.o kalloc.o kcomm.o kmap.o kthread.o neterr.o nmh.o probenetwork.o railhints.o rmap.o statemap.o support.o threadcode.o epcomms.o epcommsRx.o epcommsTx.o epcommsFwd.o conf_linux.o procfs_linux.o ep_procfs.o cm_procfs.o $(ep3-$(CONFIG_EP)) $(ep4-$(CONFIG_EP)) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/ep/Makefile.conf linux-2.6.9/drivers/net/qsnet/ep/Makefile.conf ---- clean/drivers/net/qsnet/ep/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/Makefile.conf 2005-09-07 10:39:44.000000000 -0400 -@@ -0,0 +1,12 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = ep.o -+MODULENAME = ep -+KOBJFILES = cm.o debug.o kalloc.o kcomm.o kmap.o kthread.o neterr.o nmh.o probenetwork.o railhints.o rmap.o statemap.o support.o threadcode.o epcomms.o epcommsRx.o epcommsTx.o epcommsFwd.o conf_linux.o procfs_linux.o ep_procfs.o cm_procfs.o \$\(ep3-\$\(CONFIG_EP\)\) \$\(ep4-\$\(CONFIG_EP\)\) -+EXPORT_KOBJS = conf_linux.o -+CONFIG_NAME = CONFIG_EP -+SGALFC = -+# EXTRALINES START -+ -+ep3-$(CONFIG_ELAN3) := kcomm_elan3.o kmsg_elan3.o kmap_elan3.o neterr_elan3.o probenetwork_elan3.o support_elan3.o threadcode_elan3.o threadcode_elan3_Linux.o epcomms_elan3.o epcommsTx_elan3.o epcommsRx_elan3.o -+ep4-$(CONFIG_ELAN4) := kcomm_elan4.o kmsg_elan4.o kmap_elan4.o neterr_elan4.o probenetwork_elan4.o commands_elan4.o debug_elan4.o support_elan4.o threadcode_elan4_Linux.o epcomms_elan4.o epcommsTx_elan4.o epcommsRx_elan4.o -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/ep/neterr.c linux-2.6.9/drivers/net/qsnet/ep/neterr.c ---- clean/drivers/net/qsnet/ep/neterr.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/neterr.c 2005-07-20 08:01:34.000000000 -0400 -@@ -0,0 +1,79 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr.c,v 1.27.2.1 2005/07/20 12:01:34 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr.c,v $ */ -+ -+#include -+#include -+ -+#include "debug.h" -+ -+void -+ep_queue_network_error (EP_RAIL *rail, int nodeId, int what, int channel, EP_NETERR_COOKIE cookie) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[nodeId]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (nodeRail->NetworkErrorState == 0) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: raise context filter for node %d due to network error\n", rail->Name, nodeId); -+ -+ rail->Operations.RaiseFilter (rail, nodeId); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ printk ("%s: node %d is flushing - deferring network error fixup\n", rail->Name, nodeId); -+ else -+ list_add_tail (&nodeRail->Link, &rail->NetworkErrorList); -+ } -+ -+ switch (what) -+ { -+ case EP_NODE_NETERR_ATOMIC_PACKET: -+ ASSERT (nodeRail->NetworkErrorCookies[channel] == 0); -+ -+ /* Need to raise the approriate context filter for this node, -+ * and periodically send a neterr fixup message to it until -+ * we receive an ack from it -+ */ -+ IncrStat (rail, NeterrAtomicPacket); -+ -+ nodeRail->NetworkErrorCookies[channel] = cookie; -+ -+ nodeRail->NetworkErrorState |= EP_NODE_NETERR_ATOMIC_PACKET; -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: atomic packet destroyed - node %d cookie %llx\n", rail->Name, nodeId, (long long)cookie); -+ break; -+ -+ case EP_NODE_NETERR_DMA_PACKET: -+ /* Must be an overlapped dma packet, raise the context filter, -+ * and hold it up for a NETWORK_ERROR_TIMEOUT */ -+ IncrStat (rail, NeterrDmaPacket); -+ -+ nodeRail->NetworkErrorState |= EP_NODE_NETERR_DMA_PACKET; -+ break; -+ } -+ -+ nodeRail->NextRunTime = lbolt + NETWORK_ERROR_TIMEOUT; -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ep_kthread_schedule (&sys->ManagerThread, nodeRail->NextRunTime); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -diff -urN clean/drivers/net/qsnet/ep/neterr_elan3.c linux-2.6.9/drivers/net/qsnet/ep/neterr_elan3.c ---- clean/drivers/net/qsnet/ep/neterr_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/neterr_elan3.c 2003-11-17 08:26:45.000000000 -0500 -@@ -0,0 +1,326 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr_elan3.c,v 1.24 2003/11/17 13:26:45 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+typedef struct neterr_halt_args -+{ -+ EP3_RAIL *Rail; -+ unsigned int NodeId; -+ EP_NETERR_COOKIE *Cookies; -+} NETERR_HALT_ARGS; -+ -+static int -+DmaMatchesCookie (EP3_RAIL *rail, E3_DMA_BE *dma, int nodeId, EP_NETERR_COOKIE *cookies, char *where) -+{ -+ E3_uint32 cvproc; -+ E3_uint32 cookie; -+ -+ if (dma->s.dma_direction == DMA_WRITE) -+ { -+ cvproc = dma->s.dma_destCookieVProc; -+ cookie = dma->s.dma_srcCookieVProc; -+ } -+ else -+ { -+ cvproc = dma->s.dma_srcCookieVProc; -+ cookie = dma->s.dma_destCookieVProc; -+ } -+ -+ EPRINTF6 (DBG_NETWORK_ERROR, "%s: Neterr - %s: DMA %08x %08x %08x %08x\n", rail->Generic.Name, where, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_NETWORK_ERROR, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ if (EP_VP_ISDATA((cvproc & DMA_PROCESS_MASK)) && EP_VP_TO_NODE(cvproc & DMA_PROCESS_MASK) == nodeId) -+ { -+ /* -+ * This is a DMA going to the node which has a network fixup -+ * request pending, so check if the cookie matches. -+ */ -+ if ((cookie == cookies[0] || cookie == cookies[1]) /* && !WaitForEop */) -+ { -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: match cookie %08x on %s\n", rail->Generic.Name, cookie, where); -+ -+ return (TRUE); -+ } -+ } -+ -+ return (FALSE); -+} -+ -+ -+static void -+NetworkErrorHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ NETERR_HALT_ARGS *args = (NETERR_HALT_ARGS *) arg; -+ EP3_RAIL *rail = args->Rail; -+ EP_SYS *sys = rail->Generic.System; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ E3_DMA_BE dma; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ if (DmaMatchesCookie (rail, &dma, args->NodeId, args->Cookies, "runq ")) -+ { -+ /* -+ * Transfer the DMA to the node, it's source event will -+ * get executed later. -+ */ -+ QueueDmaOnStalledList (rail, &dma); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = (SYS_CONTEXT_BIT << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destEvent = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ rail->NetworkErrorFlushed = TRUE; -+ kcondvar_wakeupall (&rail->NetworkErrorSleep, &sys->NodeLock); -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+void -+ep3_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP_SYS *sys = rail->Generic.System; -+ ELAN3_DEV *dev = rail->Device; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[nodeId]; -+ E3_DMA_BE dmabe; -+ EP3_COOKIE *cp; -+ E3_uint32 vp; -+ NETERR_HALT_ARGS args; -+ struct list_head *el, *nel, matchedList; -+ int i; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&matchedList); -+ -+ StallDmaRetryThread (rail); -+ -+ args.Rail = rail; -+ args.NodeId = nodeId; -+ args.Cookies = cookies; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ QueueHaltOperation (rail->Device, 0, NULL, INT_TProcHalted | INT_DProcHalted, NetworkErrorHaltOperation, &args); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ while (! rail->NetworkErrorFlushed) -+ kcondvar_wait (&rail->NetworkErrorSleep, &sys->NodeLock, &flags); -+ rail->NetworkErrorFlushed = FALSE; -+ -+ spin_lock (&rail->DmaRetryLock); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el, nel, &rail->DmaRetries[i]) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (DmaMatchesCookie (rail, &retry->Dma, nodeId, cookies, "retry")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->Link, &matchedList); -+ } -+ } -+ } -+ -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (DmaMatchesCookie (rail, &retry->Dma, nodeId, cookies, "stalled")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->Link, &matchedList); -+ } -+ } -+ -+ spin_unlock (&rail->DmaRetryLock); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ResumeDmaRetryThread (rail); -+ -+ /* Now "set" the source event of any write DMA's */ -+ while (! list_empty (&matchedList)) -+ { -+ EP3_RETRY_DMA *retry = list_entry (matchedList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ if (retry->Dma.s.dma_direction == DMA_WRITE && retry->Dma.s.dma_srcEvent) -+ { -+ sdramaddr_t event = ep_elan2sdram (&rail->Generic, retry->Dma.s.dma_srcEvent); -+ -+ /* Block local interrupts, since we need to atomically -+ * decrement the event count and perform the word write -+ */ -+ local_irq_save (flags); -+ { -+ E3_uint32 type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type)); -+ E3_uint32 count = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Count)); -+ -+ elan3_sdram_writel (dev, event + offsetof (E3_Event, ev_Count), count - 1); -+ -+ if (count == 1) -+ { -+ if (type & EV_TYPE_MASK_BCOPY) -+ { -+ E3_Addr srcVal = elan3_sdram_readl (dev, event + offsetof (E3_BlockCopyEvent, ev_Source)); -+ E3_Addr dstAddr = elan3_sdram_readl (dev, event + offsetof (E3_BlockCopyEvent, ev_Dest)) & ~EV_BCOPY_DTYPE_MASK; -+ -+ ASSERT ((srcVal & EV_WCOPY) != 0); -+ -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: neterr perform event word write at %08x with %08x\n", rail->Generic.Name, dstAddr, srcVal); -+ -+ ELAN3_OP_STORE32 (rail->Ctxt, dstAddr, srcVal); -+ } -+ -+ if ((type & ~EV_TYPE_MASK_BCOPY) != 0) -+ { -+ if ((type & EV_TYPE_MASK_CHAIN) == EV_TYPE_CHAIN) -+ { -+ printk ("%s: event at %08x - chained event %x is invalid\n", rail->Generic.Name, retry->Dma.s.dma_srcEvent, type); -+ panic ("ep: neterr invalid event type\n"); -+ } -+ else if ((type & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: neterr event interrupt - cookie %08x\n", rail->Generic.Name, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY))); -+ -+ cp = LookupCookie (&rail->CookieTable, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY))); -+ -+ if (cp->Operations->Event) -+ cp->Operations->Event(rail, cp->Arg); -+ } -+ else if ((type & EV_TYPE_MASK_DMA) == EV_TYPE_DMA) -+ { -+ sdramaddr_t dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2)); -+ -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: neterr chained dma - %08x\n", rail->Generic.Name, (type & ~EV_TYPE_MASK2)); -+ -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ { -+ vp = dmabe.s.dma_destVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ } -+ else -+ { -+ vp = dmabe.s.dma_srcVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the -+ * DMA descriptor will be read from the EP_RETRY_DMA rather than the -+ * original DMA - this can then get reused and an incorrect DMA -+ * descriptor sent -+ * eventp->ev_Type contains the dma address with type in the lower bits -+ */ -+ -+ dmabe.s.dma_source = (type & ~EV_TYPE_MASK2); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+ ASSERT (EP_VP_ISDATA(vp)); -+ -+ nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_CONNECTED: -+ case EP_NODE_LEAVING_CONNECTED: -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ -+ case EP_NODE_LOCAL_PASSIVATE: -+ QueueDmaOnStalledList (rail, &dmabe); -+ break; -+ -+ default: -+ panic ("ep: neterr incorrect state for node\n"); -+ } -+ } -+ else if ((type & EV_TYPE_MASK_THREAD) == EV_TYPE_THREAD) -+ { -+ printk ("%s: event at %08x - thread waiting %x is invalid\n", rail->Generic.Name, retry->Dma.s.dma_srcEvent, type); -+ panic ("ep: neterr invalid event type\n"); -+ } -+ } -+ } -+ } -+ local_irq_restore(flags); -+ } -+ -+ /* add to free list */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -diff -urN clean/drivers/net/qsnet/ep/neterr_elan4.c linux-2.6.9/drivers/net/qsnet/ep/neterr_elan4.c ---- clean/drivers/net/qsnet/ep/neterr_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/neterr_elan4.c 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,264 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr_elan4.c,v 1.3.2.1 2005/07/20 11:35:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+struct neterr_desc -+{ -+ EP4_RAIL *rail; -+ unsigned int nodeid; -+ EP_NETERR_COOKIE *cookies; -+ int done; -+} ; -+ -+static int -+dma_matches_cookie (EP4_RAIL *rail, E4_uint64 vproc, E4_uint64 cookie, unsigned int nodeId, EP_NETERR_COOKIE *cookies, const char *where) -+{ -+ if ((EP_VP_ISDATA (vproc) && EP_VP_TO_NODE (vproc) == nodeId) && (cookie == cookies[0] || cookie == cookies[1])) -+ { -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: match cookie %016llx on %s\n", rail->r_generic.Name, (long long)cookie, where); -+ -+ return 1; -+ } -+ return 0; -+} -+ -+static void -+ep4_neterr_dma_flushop (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ struct neterr_desc *desc = (struct neterr_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ E4_uint64 qptrs = read_reg64 (dev, DProcHighPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ unsigned long flags; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 cookie = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_cookie)); -+ E4_uint64 vproc = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_vproc)); -+ -+ if (dma_matches_cookie (rail, vproc, cookie, desc->nodeid, desc->cookies, "runq ")) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_DProcQueueEntry)); -+ -+ ep4_queue_dma_stalled (rail, &qentry.Desc); -+ -+ /* Replace the dma with one which will "disappear" */ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+static void -+ep4_neterr_dma_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct neterr_desc *desc = (struct neterr_desc *) arg; -+ -+ elan4_queue_dma_flushop (dev, &desc->rail->r_flushop, 1); -+} -+ -+void -+ep4_neterr_fixup_dmas (EP4_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[nodeId]; -+ struct neterr_desc desc; -+ struct list_head matchedList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ register int i; -+ -+ desc.rail = rail; -+ desc.nodeid = nodeId; -+ desc.cookies = cookies; -+ desc.done = 0; -+ -+ INIT_LIST_HEAD (&matchedList); -+ -+ /* First - stall the retry thread, so that it will no longer restart -+ * any dma's from the retry list */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ /* Second - flush through all command queues targetted by events, thread etc */ -+ ep4_flush_ecqs (rail); -+ -+ /* Third - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queues */ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DProcHalted; -+ rail->r_haltop.op_function = ep4_neterr_dma_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ rail->r_flushop.op_function = ep4_neterr_dma_flushop; -+ rail->r_flushop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ kmutex_unlock (&rail->r_haltop_mutex); -+ -+ /* Fourth - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el,nel, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ if (dma_matches_cookie (rail, retry->retry_dma.dma_vproc, retry->retry_dma.dma_cookie, nodeId, cookies, "retry")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->retry_link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->retry_link, &matchedList); -+ } -+ } -+ } -+ -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ if (dma_matches_cookie (rail, retry->retry_dma.dma_vproc, retry->retry_dma.dma_cookie, nodeId, cookies, "stalled")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->retry_link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->retry_link, &matchedList); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ /* Now "set" the source event of any put DMA#'s we can use the dma -+ * retry command queue as the retry thread is stalled */ -+ while (! list_empty (&matchedList)) -+ { -+ EP4_DMA_RETRY *retry = list_entry (matchedList.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ elan4_set_event_cmd (rail->r_dma_ecq->ecq_cq, retry->retry_dma.dma_srcEvent); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ -+ /* Flush through the command queues to ensure that all the setevents have executed */ -+ ep4_flush_ecqs (rail); -+ -+ /* Finally - allow the retry thread to run again */ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_add_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops) -+{ -+ /* we're called from the ManagerThread, so no need to stall it */ -+ list_add_tail (&ops->op_link, &rail->r_neterr_ops); -+} -+void -+ep4_remove_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops) -+{ -+ EP_SYS *sys = rail->r_generic.System; -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ list_del (&ops->op_link); -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+void -+ep4_neterr_fixup_sten (EP4_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ struct list_head *el; -+ -+ /* First - stall the retry thread, so that it will no longer restart -+ * any sten packets from the retry list */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ /* Second - flush through all command queues targetted by events, thread etc */ -+ ep4_flush_ecqs (rail); -+ -+ list_for_each (el, &rail->r_neterr_ops) { -+ EP4_NETERR_OPS *op = list_entry (el, EP4_NETERR_OPS, op_link); -+ -+ (op->op_func) (rail, op->op_arg, nodeId, cookies); -+ } -+ -+ /* Flush through the command queues to ensure that all the setevents have executed */ -+ ep4_flush_ecqs (rail); -+ -+ /* Finally - allow the retry thread to run again */ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ /* network error cookies can come from the following : -+ * -+ * DMA engine -+ * if a DMA matches a network error cookie, then we just need to -+ * execute the local setevent *before* returning. -+ * -+ * STEN packet -+ * if the STEN packet was generated with as a WAIT_FOR_EOP -+ * and it's not present on the retry lists, then re-create -+ * it. -+ * -+ */ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: ep4_neterr_fixup: node %d cookies <%lld%s%s%s%s> <%lld%s%s%s%s>\n", -+ rail->r_generic.Name, nodeId, EP4_COOKIE_STRING(cookies[0]), EP4_COOKIE_STRING(cookies[1])); -+ -+ if ((cookies[0] & EP4_COOKIE_DMA) || (cookies[1] & EP4_COOKIE_DMA)) -+ ep4_neterr_fixup_dmas (rail, nodeId, cookies); -+ -+ if ((cookies[0] & EP4_COOKIE_STEN) || (cookies[1] & EP4_COOKIE_STEN)) -+ ep4_neterr_fixup_sten (rail, nodeId, cookies); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -diff -urN clean/drivers/net/qsnet/ep/nmh.c linux-2.6.9/drivers/net/qsnet/ep/nmh.c ---- clean/drivers/net/qsnet/ep/nmh.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/nmh.c 2004-01-05 08:48:08.000000000 -0500 -@@ -0,0 +1,181 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+#ident "@(#)$Id: nmh.c,v 1.6 2004/01/05 13:48:08 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/nmh.c,v $*/ -+ -+#include -+ -+#include -+ -+#define EP_NMD_SPANS(nmd, base, top) ((nmd)->nmd_addr <= (base) && \ -+ ((nmd)->nmd_addr + (nmd)->nmd_len - 1) >= (top)) -+ -+#define EP_NMD_OVERLAPS(nmd, addr, len) ((nmd)->nmd_addr <= ((addr) + (len)) && \ -+ ((nmd)->nmd_addr + (nmd)->nmd_len - 1) >= (addr)) -+ -+#define EP_NMH_HASH(tbl,idx,addr) ((addr) % (tbl)->tbl_size[idx]) -+ -+int -+ep_nmh_init (EP_NMH_TABLE *tbl) -+{ -+ int i, idx, hsize = 1; -+ -+ for (idx = EP_NMH_NUMHASH-1; idx >= 0; idx--, hsize <<= 1) -+ { -+ tbl->tbl_size[idx] = (hsize < EP_NMH_HASHSIZE) ? hsize : EP_NMH_HASHSIZE; -+ -+ KMEM_ZALLOC (tbl->tbl_hash[idx], struct list_head *, sizeof (struct list_head) * tbl->tbl_size[idx], 1); -+ -+ if (tbl->tbl_hash == NULL) -+ { -+ while (++idx < EP_NMH_NUMHASH) -+ KMEM_FREE (tbl->tbl_hash[idx], sizeof (struct list_head) * tbl->tbl_size[idx]); -+ return (ENOMEM); -+ } -+ -+ for (i = 0; i < tbl->tbl_size[idx]; i++) -+ INIT_LIST_HEAD (&tbl->tbl_hash[idx][i]); -+ } -+ -+ return (0); -+} -+ -+void -+ep_nmh_fini (EP_NMH_TABLE *tbl) -+{ -+ int idx; -+ -+ for (idx = 0; idx < EP_NMH_NUMHASH; idx++) -+ if (tbl->tbl_hash[idx]) -+ KMEM_FREE (tbl->tbl_hash[idx], sizeof (struct list_head) * tbl->tbl_size[idx]); -+ -+ bzero (tbl, sizeof (EP_NMH_TABLE)); -+} -+ -+void -+ep_nmh_insert (EP_NMH_TABLE *tbl, EP_NMH *nmh) -+{ -+ EP_ADDR base = nmh->nmh_nmd.nmd_addr; -+ EP_ADDR top = base + nmh->nmh_nmd.nmd_len - 1; -+ int idx; -+ -+ for (idx = 0, base >>= 12, top >>= 12; base != top && idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) -+ ; -+ -+ list_add_tail (&nmh->nmh_link, &tbl->tbl_hash[idx][EP_NMH_HASH(tbl, idx, base)]); -+} -+ -+void -+ep_nmh_remove (EP_NMH_TABLE *tbl, EP_NMH *nmh) -+{ -+ list_del (&nmh->nmh_link); -+} -+ -+EP_NMH * -+ep_nmh_find (EP_NMH_TABLE *tbl, EP_NMD *nmd) -+{ -+ EP_ADDR base = nmd->nmd_addr; -+ EP_ADDR top = base + nmd->nmd_len - 1; -+ int idx; -+ struct list_head *le; -+ -+ for (idx = 0, base >>= 12, top >>= 12; base != top && idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) -+ ; -+ -+ for (; idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) { -+ -+ list_for_each (le, &tbl->tbl_hash[idx][EP_NMH_HASH(tbl, idx, base)]) { -+ EP_NMH *nmh = list_entry (le, EP_NMH, nmh_link); -+ -+ if (EP_NMD_SPANS (&nmh->nmh_nmd, nmd->nmd_addr, nmd->nmd_addr + nmd->nmd_len - 1)) -+ return (nmh); -+ } -+ } -+ -+ return (0); -+} -+ -+void -+ep_nmd_subset (EP_NMD *subset, EP_NMD *nmd, unsigned off, unsigned len) -+{ -+ ASSERT ((off + len - 1) <= nmd->nmd_len); -+ -+ subset->nmd_addr = nmd->nmd_addr + off; -+ subset->nmd_len = len; -+ subset->nmd_attr = nmd->nmd_attr; -+} -+ -+int -+ep_nmd_merge (EP_NMD *merged, EP_NMD *a, EP_NMD *b) -+{ -+ if (EP_NMD_NODEID (a) != EP_NMD_NODEID (b)) /* not generated on the same node */ -+ return 0; -+ -+ if ((EP_NMD_RAILMASK (a) & EP_NMD_RAILMASK (b)) == 0) /* no common rails */ -+ return 0; -+ -+ if (b->nmd_addr == (a->nmd_addr + a->nmd_len)) -+ { -+ if (merged != NULL) -+ { -+ merged->nmd_addr = a->nmd_addr; -+ merged->nmd_len = a->nmd_len + b->nmd_len; -+ merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(a), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b)); -+ } -+ return 1; -+ } -+ -+ if (a->nmd_addr == (b->nmd_addr + b->nmd_len)) -+ { -+ if (merged != NULL) -+ { -+ merged->nmd_addr = b->nmd_addr; -+ merged->nmd_len = b->nmd_len + a->nmd_len; -+ merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(b), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b)); -+ } -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep_nmd_map_rails (EP_SYS *sys, EP_NMD *nmd, unsigned railmask) -+{ -+ EP_NMH *nmh = ep_nmh_find (&sys->MappingTable, nmd); -+ -+ if (nmh == NULL) -+ { -+ printk ("ep_nmd_map_rails: nmd=%08x.%08x.%08x cannot be found\n", -+ nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ return (-1); -+ } -+ -+ return (nmh->nmh_ops->op_map_rails (sys, nmh, nmd, railmask)); -+} -+ -+EP_RAILMASK -+ep_nmd2railmask (EP_NMD *frags, int nFrags) -+{ -+ EP_RAILMASK mask; -+ -+ if (nFrags == 0) -+ return ((EP_RAILMASK)-1); -+ -+ for (mask = EP_NMD_RAILMASK(frags); --nFrags; ) -+ mask &= EP_NMD_RAILMASK(++frags); -+ -+ return (mask); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/probenetwork.c linux-2.6.9/drivers/net/qsnet/ep/probenetwork.c ---- clean/drivers/net/qsnet/ep/probenetwork.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/probenetwork.c 2004-04-19 11:43:15.000000000 -0400 -@@ -0,0 +1,446 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork.c,v 1.43 2004/04/19 15:43:15 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork.c,v $ */ -+ -+#include -+ -+#include -+#include "debug.h" -+ -+int PositionCheck = 1; -+ -+#define NUM_DOWN_FROM_VAL(NumDownLinksVal, level) (((NumDownLinksVal) >> ((level) << 2)) & 0xF) -+ -+int -+ProbeNetwork (EP_RAIL *rail, ELAN_POSITION *pos) -+{ -+ int lvl, i; -+ int level; -+ int nodeid; -+ int numnodes; -+ int randomRoutingDisabled; -+ int sw; -+ int nacks; -+ int nowayup; -+ int nalias; -+ int upmask; -+ int partial; -+ int link; -+ int invalid; -+ int linkdown[ELAN_MAX_LEVELS]; -+ int linkup[ELAN_MAX_LEVELS]; -+ EP_SWITCH *switches[ELAN_MAX_LEVELS]; -+ int switchCount[ELAN_MAX_LEVELS+1]; -+ int lowestBcast; -+ int numUpLinks[ELAN_MAX_LEVELS]; -+ int routedown [ELAN_MAX_LEVELS]; -+ -+ EPRINTF1 (DBG_PROBE, "%s: ProbeNetwork started\n", rail->Name); -+ -+ switchCount[0] = 1; -+ numUpLinks [0] = 4; -+ -+ for (level = 0; level < ELAN_MAX_LEVELS; level++) -+ { -+ int ndown = NUM_DOWN_FROM_VAL (rail->Devinfo.dev_num_down_links_value, level); -+ -+ KMEM_ZALLOC (switches[level], EP_SWITCH *, sizeof (EP_SWITCH) * switchCount[level], 1); -+ -+ for (sw = 0, nacks = 0, nowayup = 0, lowestBcast=7; sw < switchCount[level]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[level][sw]; -+ int good = 1; -+ int tsw; -+ -+ for (nodeid = 0,tsw = sw, lvl = level-1 ; lvl >= 0 ; lvl--) -+ { -+ EP_SWITCH *lsw; -+ int link = (8-numUpLinks[lvl]) + (tsw % numUpLinks[lvl]); -+ -+ tsw = tsw / numUpLinks[lvl]; -+ lsw = &switches[lvl][tsw]; -+ -+ if (lsw->present == 0 || (lsw->lnr & (1 << link))) -+ { -+ EPRINTF4 (DBG_PROBE, "lvl %d sw %d present=%d lnr=%x\n", lvl, sw, lsw->present, lsw->lnr); -+ good = 0; -+ } -+ -+ linkup[lvl] = link; -+ linkdown[lvl] = lsw->link; -+ -+ if ( lvl ) nodeid = ((nodeid + linkdown[lvl]) * (8-numUpLinks[lvl-1])); -+ else nodeid += linkdown[0]; -+ -+ } -+ -+ /* -+ * don't bother probing routes which we we've already seen are unreachable -+ * because a link upwards was in reset or the switch previously nacked us. -+ */ -+ if (! good) -+ { -+ lsw->present = 0; -+ -+ nacks++; -+ nowayup++; -+ -+ continue; -+ } -+ -+ lsw->present = rail->Operations.ProbeRoute (rail, level, sw, nodeid, linkup, linkdown, 5, lsw); -+ -+ if (! lsw->present) -+ { -+ EPRINTF3 (DBG_PROBE, "%s: level %d switch %d - unexpected nack\n", rail->Name, level, sw); -+ -+ nacks++; -+ nowayup++; -+ } -+ else -+ { -+ EPRINTF5 (DBG_PROBE, "%s: level %d switch %d - link %d bcast %d\n", rail->Name, level, sw, lsw->link, lsw->bcast); -+ -+ if (level == 2 && rail->Devinfo.dev_device_id == PCI_DEVICE_ID_ELAN3) -+ { -+ /* If we see broadcast top as 7, and we came in on a low link, then we can't -+ * determine whether we're in a 128 way or a un-configured 64u64d switch, so -+ * we treat it as a 64u64d and detect the 128 way case by "going over the top" -+ * below. Unless we've been told what it really is by NumDownLinksVal. -+ */ -+ if (lsw->bcast == 7 && lsw->link < 4) -+ lsw->bcast = ndown ? (ndown - 1) : 3; -+ } -+ -+ if ( lowestBcast > lsw->bcast ) -+ lowestBcast = lsw->bcast; -+ -+ if (lsw->link > (ndown ? (ndown-1) : (lowestBcast == 7 ? 3 : lowestBcast))) -+ { -+ /* We've arrived on a "up-link" - this could be either -+ * we're in the top half of a x8 top-switch - or we're -+ * in the bottom half and have gone "over the top". We -+ * differentiate these cases since the switches below -+ * a x8 top-switch will have broadcast top set to 3, -+ * and the x8 topswitch have broadcast top set to 7. -+ */ -+ if (lsw->bcast == 7) -+ nowayup++; -+ else -+ { -+ EPRINTF2 (DBG_PROBE, "%s: level %d - gone over the top\n", -+ rail->Name, level); -+ -+ if (level > 0) -+ { -+ KMEM_FREE (switches[level], sizeof (EP_SWITCH) * switchCount[level] ); -+ level--; -+ } -+ -+ numUpLinks[level] = 0; -+ goto finished; -+ } -+ } -+ -+ } -+ } -+ -+ numUpLinks[level] = ndown ? (8 - ndown) : (7 - lowestBcast); -+ switchCount[level+1] = switchCount[level] * numUpLinks[level]; -+ -+ /* Now we know which links are uplinks, we can see whether there is -+ * any possible ways up */ -+ upmask = (ndown ? (0xFF << ndown) & 0xFF : (0xFF << (8 - numUpLinks[level])) & 0xFF); -+ -+ for (sw = 0; sw < switchCount[level]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[level][sw]; -+ -+ if (lsw->present && lsw->link <= (ndown ? (ndown-1) : (lowestBcast == 7 ? 3 : lowestBcast)) && (switches[level][sw].lnr & upmask) == upmask) -+ nowayup++; -+ } -+ -+ EPRINTF7 (DBG_PROBE, "%s: level %d - sw=%d nacks=%d nowayup=%d bcast=%d numup=%d\n", -+ rail->Name, level, sw, nacks, nowayup, lowestBcast, numUpLinks[level]); -+ -+ if (nacks == sw) -+ { -+ static bitmap_t printed[BT_BITOUL(EP_MAX_RAILS)]; -+ -+ if (! BT_TEST (printed, rail->Number)) -+ printk ("%s: cannot determine network position\n", rail->Name); -+ BT_SET (printed, rail->Number); -+ goto failed; -+ } -+ -+ if (nowayup == sw) -+ goto finished; -+ } -+ -+ printk ("%s: exceeded number of levels\n", rail->Name); -+ level = ELAN_MAX_LEVELS - 1; -+ -+ failed: -+ -+ for (lvl = 0; lvl <= level; lvl++) -+ KMEM_FREE (switches[lvl], sizeof (EP_SWITCH) * switchCount[lvl] ); -+ -+ return -EAGAIN; -+ -+ finished: -+ /* we've successfully probed the network - now calculate our node -+ * positon and what level of random routing is possible */ -+ nalias = 1; -+ for (lvl = 0, invalid = 0, partial = 0, randomRoutingDisabled = 0; lvl <= level; lvl++) -+ { -+ int ndown = NUM_DOWN_FROM_VAL (rail->Devinfo.dev_num_down_links_value, lvl); -+ int upmask = ndown ? (0xFF << ndown) & 0xFF : 0xF0; -+ -+ for (sw = 0, nalias = 0; sw < switchCount[lvl]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[lvl][sw]; -+ -+ /* You can only use adaptive routing if links 4-7 are uplinks, and at least one of them is -+ * not in reset. Otherwise you can randomly select an "uplink" if all the uplinks are not -+ * in reset. */ -+ if (lsw->present && ((upmask == 0xF0) ? (lsw->lnr & upmask) == upmask : (lsw->lnr & upmask) != 0)) -+ randomRoutingDisabled |= (1 << lvl); -+ -+ if (!lsw->present) -+ partial++; -+ else -+ { -+ if (lsw->invalid) -+ { -+ printk ("%s: invalid switch detected (level %d switch %d)\n", rail->Name, lvl, sw); -+ invalid++; -+ } -+ -+ for (i = 0; i < nalias; i++) -+ if (linkdown[i] == lsw->link) -+ break; -+ if (i == nalias) -+ linkdown[nalias++] = lsw->link; -+ } -+ } -+ -+ link = linkdown[0]; -+ for (i = 1; i < nalias; i++) -+ if (linkdown[i] < link) -+ link = linkdown[i]; -+ -+ if (nalias > 1 && lvl != level) -+ { -+ printk ("%s: switch aliased below top level (level %d)\n", rail->Name, lvl); -+ invalid++; -+ } -+ -+ routedown[lvl] = link; -+ } -+ -+ for (lvl = 0; lvl <= level; lvl++) -+ KMEM_FREE (switches[lvl], sizeof (EP_SWITCH) * switchCount[lvl] ); -+ -+ if (invalid) -+ { -+ printk ("%s: invalid switch configuration\n", rail->Name); -+ return (EINVAL); -+ } -+ -+ /* Handle the aliasing case where a 16 way is used as multiple smaller switches */ -+ if (nalias == 1) -+ level++; -+ else if (nalias == 2) /* a 16 way as 2x8 ways */ -+ numUpLinks[level++] = 6; /* only 2 down links */ -+ else if (nalias > 4) /* a 16 way as 8x2 ways */ -+ numUpLinks[level-1] = 6; -+ -+ /* -+ * Compute my nodeid and number of nodes in the machine -+ * from the routedown and the number of downlinks at each level. -+ */ -+ for(nodeid=0, lvl = level - 1; lvl >= 0; lvl--) -+ { -+ if (lvl) nodeid = ((nodeid + routedown[lvl]) * (8-numUpLinks[lvl-1])); -+ else nodeid += routedown[0]; -+ } -+ -+ for (numnodes = 1, lvl = 0; lvl < level; lvl++) -+ numnodes *= (8 - numUpLinks[lvl]); -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, nodeid); -+ -+ if (randomRoutingDisabled & ((1 << (level-1))-1)) -+ printk ("%s: nodeid=%d level=%d numnodes=%d (random routing disabled 0x%x)\n", -+ rail->Name, nodeid, level, numnodes, randomRoutingDisabled); -+ else if (partial) -+ printk ("%s: nodeid=%d level=%d numnodes=%d (random routing ok)\n", -+ rail->Name, nodeid, level, numnodes); -+ else -+ printk ("%s: nodeid=%d level=%d numnodes=%d\n", -+ rail->Name, nodeid, level, numnodes); -+ -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ pos->pos_nodeid = nodeid; -+ pos->pos_levels = level; -+ pos->pos_nodes = numnodes; -+ pos->pos_random_disabled = randomRoutingDisabled; -+ -+ for(lvl = 0; lvl < level; lvl++) -+ pos->pos_arity[level -lvl - 1] = (8-numUpLinks[lvl]); -+ pos->pos_arity[level] = 1; /* XXXX why does this need to be 1 ? */ -+ -+ return 0; -+} -+ -+/* -+ * broadcast top is invalid if it is not set to the number of downlinks-1, -+ * or at the topmost level it is less than ndown-1. -+ */ -+#define BCAST_TOP_INVALID(lvl, bcast, ndown) ((lvl) == 0 ? (bcast) < ((ndown)-1) : (bcast) != ((ndown) - 1)) -+ -+void -+CheckPosition (EP_RAIL *rail) -+{ -+ ELAN_POSITION *pos = &rail->Position; -+ unsigned int nodeid = pos->pos_nodeid; -+ unsigned int invalid = 0; -+ unsigned int changed = 0; -+ int lvl, slvl; -+ -+ if (! PositionCheck) -+ return; -+ -+ if (rail->Operations.CheckPosition(rail)) /* is update ready for this rail */ -+ { -+ EPRINTF2 (DBG_ROUTETABLE, "%s: check position: SwitchProbeLevel=%d\n", rail->Name, rail->SwitchProbeLevel); -+ -+ for (lvl = 0, slvl = pos->pos_levels-1; lvl <= rail->SwitchProbeLevel; lvl++, slvl--) -+ { -+ EP_SWITCHSTATE *state = &rail->SwitchState[lvl]; -+ EP_SWITCHSTATE *lstate = &rail->SwitchLast[lvl]; -+ unsigned int ndown = pos->pos_arity[slvl]; -+ unsigned int upmask = (0xFF << ndown) & 0xFF; -+ unsigned int mylink = nodeid % ndown; -+ unsigned int error = 0; -+ unsigned int binval = 0; -+ -+ nodeid /= ndown; -+ -+ /* -+ * broadcast top is invalid if it is not set to the number of downlinks-1, -+ * or at the topmost level it is less than ndown-1. -+ */ -+ if (BCAST_TOP_INVALID(lvl, state->bcast, ndown) || (state->LNR & upmask) == upmask) -+ { -+ /* no way up from here - we'd better be at the top */ -+ if (lvl != (pos->pos_levels-1)) -+ { -+ if (state->bcast != (ndown-1)) -+ printk ("%s: invalid broadcast top %d at level %d\n", rail->Name, state->bcast, lvl); -+ else if ((state->LNR & upmask) == upmask && (lstate->LNR & upmask) == upmask) -+ printk ("%s: no way up to switch at level %d (turned off ?)\n", rail->Name, lvl+1); -+ } -+ else -+ { -+ if (state->linkid != mylink) -+ printk ("%s: moved at top level was connected to link %d now connected to %d\n", rail->Name, mylink, state->linkid); -+ } -+ -+ if (state->linkid != mylink) -+ error++; -+ -+ if (BCAST_TOP_INVALID (lvl, state->bcast, ndown)) -+ binval++; -+ } -+ else -+ { -+ if (state->linkid != mylink) -+ { -+ if (state->linkid != rail->SwitchLast[lvl].linkid) -+ printk ("%s: moved at lvl %d was connected to link %d now connected to %d\n", rail->Name, lvl, mylink, state->linkid); -+ -+ error++; -+ } -+ } -+ -+ if (error == 0 && invalid == 0) -+ rail->SwitchProbeTick[lvl] = lbolt; -+ -+ EPRINTF10 (DBG_ROUTETABLE, "%s: lvl=%d (slvl=%d) linkid=%d bcast=%d lnr=%02x uplink=%d : error=%d binval=%d invalid=%d\n", -+ rail->Name, lvl, slvl, state->linkid, state->bcast, state->LNR, state->uplink, error, binval, invalid); -+ -+ invalid |= (error | binval); -+ } -+ -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ if (rail->SwitchState[lvl].uplink != rail->SwitchLast[lvl].uplink) -+ changed++; -+ -+ if (changed) -+ { -+ printk ("%s: broadcast tree has changed from", rail->Name); -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ printk ("%c%d", lvl == 0 ? ' ' : ',', rail->SwitchLast[lvl].uplink); -+ -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ printk ("%s%d", lvl == 0 ? " to " : ",", rail->SwitchState[lvl].uplink); -+ printk ("\n"); -+ } -+ -+ if (rail->SwitchProbeLevel > 0) -+ bcopy (rail->SwitchState, rail->SwitchLast, rail->SwitchProbeLevel * sizeof (EP_SWITCHSTATE)); -+ } -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ EPRINTF4 (DBG_ROUTETABLE, "%s: level %d lbolt=%lx ProbeLevelTick=%lx\n", -+ rail->Name, lvl, lbolt, rail->SwitchProbeTick[lvl]); -+ -+ if (AFTER (lbolt, rail->SwitchProbeTick[lvl] + EP_POSITION_TIMEOUT)) -+ { -+ if (lvl < rail->SwitchBroadcastLevel+1) -+ { -+ if (lvl == 0) -+ printk ("%s: cable disconnected\n", rail->Name); -+ else -+ printk ("%s: broadcast level has dropped to %d (should be %d)\n", -+ rail->Name, lvl, rail->Position.pos_levels); -+ } -+ break; -+ } -+ } -+ -+ if (lvl > rail->SwitchBroadcastLevel+1) -+ { -+ if (rail->SwitchBroadcastLevel < 0) -+ printk ("%s: cable reconnected\n", rail->Name); -+ if (lvl == rail->Position.pos_levels) -+ printk ("%s: broadcast level has recovered\n", rail->Name); -+ else -+ printk ("%s: broadcast level has recovered to %d (should be %d)\n", -+ rail->Name, lvl, rail->Position.pos_levels); -+ } -+ -+ if (rail->SwitchBroadcastLevel != (lvl - 1)) -+ { -+ EPRINTF2 (DBG_ROUTETABLE, "%s: setting SwitchBroadcastLevel to %d\n", rail->Name, lvl-1); -+ -+ rail->SwitchBroadcastLevel = lvl - 1; -+ rail->SwitchBroadcastLevelTick = lbolt; -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/probenetwork_elan3.c linux-2.6.9/drivers/net/qsnet/ep/probenetwork_elan3.c ---- clean/drivers/net/qsnet/ep/probenetwork_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/probenetwork_elan3.c 2005-04-26 05:36:19.000000000 -0400 -@@ -0,0 +1,302 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan3.c,v 1.41 2005/04/26 09:36:19 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+#include -+ -+static void ep3_probe_event (EP3_RAIL *rail, void *arg); -+static EP3_COOKIE_OPS ep3_probe_ops = -+{ -+ ep3_probe_event -+} ; -+ -+int -+ep3_init_probenetwork (EP3_RAIL *rail) -+{ -+ sdramaddr_t stack; -+ E3_Addr sp; -+ E3_BlockCopyEvent event; -+ int i; -+ -+ if (! (stack = ep_alloc_elan (&rail->Generic, EP3_STACK_SIZE, 0, &rail->ProbeStack))) -+ return -ENOMEM; -+ -+ spin_lock_init (&rail->ProbeLock); -+ kcondvar_init (&rail->ProbeWait); -+ -+ /* Initialise the probe command structure */ -+ for (i = 0; i < TR_TRACEROUTE_ENTRIES; i++) -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[i]), 0); -+ for (i = 0; i < TR_TRACEROUTE_ENTRIES; i++) -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[i]), 1); -+ -+ RegisterCookie (&rail->CookieTable, &rail->ProbeCookie, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeDone), &ep3_probe_ops, rail); -+ -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeStart.ev_Type), 0); -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeStart.ev_Count), 0); -+ -+ EP3_INIT_COPY_EVENT (event, rail->ProbeCookie, rail->RailMainAddr + offsetof (EP3_RAIL_MAIN, ProbeDone), 1); -+ elan3_sdram_copyl_to_sdram (rail->Device, &event, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeDone), sizeof (E3_BlockCopyEvent)); -+ -+ rail->RailMain->ProbeDone = EP3_EVENT_FREE; -+ -+ sp = ep3_init_thread (rail->Device, ep_symbol (&rail->ThreadCode, "kcomm_probe"), -+ rail->ProbeStack, stack, EP3_STACK_SIZE, -+ 3, rail->CommandPortAddr, rail->RailElanAddr, rail->RailMainAddr); -+ -+ IssueRunThread (rail, sp); -+ -+ return 0; -+} -+ -+void -+ep3_destroy_probenetwork (EP3_RAIL *rail) -+{ -+ if (rail->ProbeStack == (sdramaddr_t) 0) -+ return; -+ -+ /* XXXX: ensure that the network probe thread is stopped */ -+ -+ DeregisterCookie (&rail->CookieTable, &rail->ProbeCookie); -+ -+ kcondvar_destroy (&rail->ProbeWait); -+ spin_lock_destroy (&rail->ProbeLock); -+ -+ ep_free_elan (&rail->Generic, rail->ProbeStack, EP3_STACK_SIZE); -+} -+ -+static void -+ep3_probe_event (EP3_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ProbeLock, flags); -+ rail->ProbeDone = 1; -+ kcondvar_wakeupone (&rail->ProbeWait, &rail->ProbeLock); -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+} -+ -+int -+ep3_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_RAIL_MAIN *railMain = rail->RailMain; -+ sdramaddr_t railElan = rail->RailElan; -+ E3_uint16 flits[MAX_FLITS]; -+ E3_uint32 result; -+ int nflits; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ProbeLock, flags); -+ -+ nflits = GenerateProbeRoute ( flits, nodeid, level, linkup, linkdown, 0); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, EP_VP_PROBE(level), ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ { -+ EPRINTF0 (DBG_ROUTETABLE, "ProbeRoute: cannot load route entry\n"); -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+ return (EINVAL); -+ } -+ -+ do { -+ /* Initialise the probe source to include our partially computed nodeid */ -+ elan3_sdram_writew (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[TR_TRACEROUTE_ENTRIES-1]), nodeid); -+ elan3_sdram_writew (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[TR_TRACEROUTE_ENTRIES-1]), nodeid); -+ -+ /* Initialise the count result etc */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeType), PROBE_SINGLE); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeLevel), level); -+ -+ railMain->ProbeResult = -1; -+ -+ /* Clear the receive area */ -+ bzero (railMain->ProbeDest0, sizeof (railMain->ProbeDest0)); -+ bzero (railMain->ProbeDest1, sizeof (railMain->ProbeDest1)); -+ -+ /* Re-arm the completion event */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Count), 1); -+ railMain->ProbeDone = EP3_EVENT_ACTIVE; -+ rail->ProbeDone = 0; -+ -+ /* And wakeup the thread to do the probe */ -+ IssueSetevent (rail, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeStart)); -+ -+ /* Now wait for it to complete */ -+ while (! rail->ProbeDone) -+ kcondvar_wait (&rail->ProbeWait, &rail->ProbeLock, &flags); -+ -+ /* wait for block copy event to flush write buffers */ -+ while (! EP3_EVENT_FIRED (rail->ProbeCookie, railMain->ProbeDone)) -+ if (! EP3_EVENT_FIRING(rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone), rail->ProbeCookie, railMain->ProbeDone)) -+ panic ("ProbeRoute: network probe event failure\n"); -+ -+ result = railMain->ProbeResult; -+ -+ if (result == C_ACK_ERROR) -+ kcondvar_timedwait (&rail->ProbeWait, &rail->ProbeLock, &flags, lbolt + (hz/8)); -+ -+ railMain->ProbeDone = EP3_EVENT_FREE; -+ -+ } while (result != C_ACK_OK && --attempts); -+ -+ if (result == C_ACK_OK) -+ { -+ if (railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != nodeid || -+ railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != nodeid) -+ { -+ static unsigned long printed = 0; -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: lost nodeid at level %d switch %d - %d != %x\n", rail->Generic.Name, level, sw, -+ railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1], nodeid); -+ printed = lbolt; -+ } -+ result = C_ACK_ERROR; -+ } -+ else -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - level - 1]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - level - 1]; -+ -+ EPRINTF7 (DBG_PROBE, "%s: level %d switch %d - linkid=%d bcast=%d LNR=%02x%s\n", -+ rail->Generic.Name, level, sw, TR_TRACEROUTE0_LINKID(val0), -+ TR_TRACEROUTE1_BCAST_TOP(val1), TR_TRACEROUTE0_LNR(val0), -+ TR_TRACEROUTE0_REVID(val0) ? "" : " RevA Part"); -+ -+ lsw->lnr = TR_TRACEROUTE0_LNR(val0); -+ lsw->link = TR_TRACEROUTE0_LINKID(val0); -+ lsw->bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ lsw->invalid = (TR_TRACEROUTE0_REVID(val0) == 0); -+ } -+ } -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+ -+ return (result == C_ACK_OK); -+} -+ -+void -+ep3_probe_position_found (EP3_RAIL *rail, ELAN_POSITION *pos) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ int lvl, nflits; -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ nflits = GenerateCheckRoute (pos, flits, pos->pos_levels - lvl - 1, 0); -+ -+ if (LoadRoute (rail->Device, rail->Ctxt->RouteTable, EP_VP_PROBE(lvl), ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ panic ("ep3_probe_position_found: cannot load probe route entry\n"); -+ } -+ -+ /* Initialise the traceroute source data with our nodeid */ -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[TR_TRACEROUTE_ENTRIES-1]), pos->pos_nodeid); -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[TR_TRACEROUTE_ENTRIES-1]), pos->pos_nodeid); -+} -+ -+int -+ep3_check_position (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_RAIL_MAIN *railMain = rail->RailMain; -+ sdramaddr_t railElan = rail->RailElan; -+ ELAN_POSITION *pos = &rail->Generic.Position; -+ unsigned int level = rail->RailMain->ProbeLevel; -+ unsigned int updated = EP3_EVENT_FIRED (rail->ProbeCookie, railMain->ProbeDone); -+ unsigned int lvl; -+ -+ if (updated) -+ { -+ if (railMain->ProbeResult != C_ACK_OK) -+ { -+ EPRINTF2 (DBG_PROBE, "%s: CheckNetworkPosition: packet nacked result=%d\n", rail->Generic.Name, railMain->ProbeResult); -+ -+ rail->Generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - 2*(level+1)]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - 2*(level+1)]; -+ -+ if (val0 != pos->pos_nodeid || val1 != pos->pos_nodeid) -+ { -+ static unsigned long printed = 0; -+ -+ /* We've received a packet from another node - this probably means -+ * that we've moved */ -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: ep3_check_position - level %d lost nodeid\n", rail->Generic.Name, level); -+ printed = lbolt; -+ } -+ -+ rail->Generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ for (lvl = 0; lvl <= level; lvl++) -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ -+ rail->Generic.SwitchState[lvl].linkid = TR_TRACEROUTE0_LINKID(val0); -+ rail->Generic.SwitchState[lvl].LNR = TR_TRACEROUTE0_LNR(val0); -+ rail->Generic.SwitchState[lvl].bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ rail->Generic.SwitchState[lvl].uplink = 4; -+ -+ EPRINTF5 (DBG_PROBE, " --- lvl %d: linkid=%d LNR=%x bcast=%d uplink=%d\n", lvl, rail->Generic.SwitchState[lvl].linkid, -+ rail->Generic.SwitchState[lvl].LNR, rail->Generic.SwitchState[lvl].bcast ,rail->Generic.SwitchState[lvl].uplink); -+ } -+ rail->Generic.SwitchProbeLevel = level; -+ } -+ } -+ -+ railMain->ProbeDone = EP3_EVENT_FREE; -+ } -+ -+ if (railMain->ProbeDone == EP3_EVENT_FREE) -+ { -+ if (rail->Generic.SwitchBroadcastLevel == rail->Generic.Position.pos_levels-1) -+ level = rail->Generic.Position.pos_levels - 1; -+ else -+ level = rail->Generic.SwitchBroadcastLevel + 1; -+ -+ EPRINTF2 (DBG_PROBE, "%s: ep3_check_postiion: level %d\n", rail->Generic.Name, level); -+ -+ /* Initialise the count result etc */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeType), PROBE_MULTIPLE); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeLevel), level); -+ -+ railMain->ProbeResult = -1; -+ railMain->ProbeLevel = -1; -+ -+ /* Clear the receive area */ -+ bzero (railMain->ProbeDest0, sizeof (railMain->ProbeDest0)); -+ bzero (railMain->ProbeDest1, sizeof (railMain->ProbeDest1)); -+ -+ /* Re-arm the completion event */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Count), 1); -+ -+ railMain->ProbeDone = EP3_EVENT_ACTIVE; -+ -+ IssueSetevent (rail, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeStart)); -+ } -+ -+ return updated; -+} -+ -diff -urN clean/drivers/net/qsnet/ep/probenetwork_elan3_thread.c linux-2.6.9/drivers/net/qsnet/ep/probenetwork_elan3_thread.c ---- clean/drivers/net/qsnet/ep/probenetwork_elan3_thread.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/probenetwork_elan3_thread.c 2004-03-24 06:32:56.000000000 -0500 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan3_thread.c,v 1.19 2004/03/24 11:32:56 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan3_thread.c,v $*/ -+ -+#include -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+ -+static int -+kcomm_probe_vp (EP3_RAIL_ELAN *railElan, EP3_RAIL_MAIN *railMain, int vp, int attempts, int timeouts) -+{ -+ int rc; -+ -+ /* Since we use %g1 to hold the "rxd" so the trap handler can -+ * complete the envelope processing - we pass zero to indicate we're -+ * not a receiver thread */ -+ asm volatile ("mov %g0, %g1"); -+ -+ while (attempts && timeouts) -+ { -+ c_open (vp); -+ c_sendmem (TR_TRACEROUTE, &railMain->ProbeDest0, &railElan->ProbeSource0); -+ c_sendmem (TR_TRACEROUTE, &railMain->ProbeDest1, &railElan->ProbeSource1); -+ c_sendtrans0 (TR_SENDACK | TR_SETEVENT, (E3_Addr) 0); -+ -+ switch (rc = c_close()) -+ { -+ case C_ACK_OK: -+ return (C_ACK_OK); -+ -+ case C_ACK_DISCARD: -+ attempts--; -+ break; -+ -+ default: /* output timeout */ -+ timeouts--; -+ } -+ -+ c_break_busywait(); -+ } -+ -+ return (timeouts == 0 ? C_ACK_ERROR : C_ACK_DISCARD); -+} -+ -+void -+kcomm_probe (E3_CommandPort *cport, EP3_RAIL_ELAN *railElan, EP3_RAIL_MAIN *railMain) -+{ -+ int level; -+ -+ for (;;) -+ { -+ c_waitevent (&railElan->ProbeStart, 1); -+ -+ switch (railElan->ProbeType) -+ { -+ case PROBE_SINGLE: -+ railMain->ProbeResult = kcomm_probe_vp (railElan, railMain, EP_VP_PROBE(railElan->ProbeLevel), -+ PROBE_SINGLE_ATTEMPTS, PROBE_SINGLE_TIMEOUTS); -+ -+ cport->SetEvent = (E3_Addr) &railElan->ProbeDone; -+ break; -+ -+ case PROBE_MULTIPLE: -+ for (level = railElan->ProbeLevel; level >= 0; level--) -+ { -+ if (kcomm_probe_vp (railElan, railMain, EP_VP_PROBE(level), -+ PROBE_MULTIPLE_ATTEMPTS, PROBE_MULTIPLE_TIMEOUTS) == C_ACK_OK) -+ { -+ railMain->ProbeLevel = level; -+ railMain->ProbeResult = C_ACK_OK; -+ break; -+ } -+ -+ c_break_busywait(); -+ } -+ cport->SetEvent = (E3_Addr) &railElan->ProbeDone; -+ break; -+ } -+ -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/probenetwork_elan4.c linux-2.6.9/drivers/net/qsnet/ep/probenetwork_elan4.c ---- clean/drivers/net/qsnet/ep/probenetwork_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/probenetwork_elan4.c 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,401 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan4.c,v 1.10.2.1 2005/07/20 11:35:37 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+#include -+ -+static void -+probe_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_probe_lock, flags); -+ rail->r_probe_done = 1; -+ kcondvar_wakeupone (&rail->r_probe_wait, &rail->r_probe_lock); -+ spin_unlock_irqrestore (&rail->r_probe_lock, flags); -+} -+ -+int -+ep4_probe_init (EP4_RAIL *rail) -+{ -+ spin_lock_init (&rail->r_probe_lock); -+ kcondvar_init (&rail->r_probe_wait); -+ -+ rail->r_probe_cq = ep4_alloc_ecq (rail, CQ_Size1K); -+ -+ if (rail->r_probe_cq == NULL) -+ return -ENOMEM; -+ -+ ep4_register_intcookie (rail, &rail->r_probe_intcookie, rail->r_elan_addr, probe_interrupt, rail); -+ -+ return 0; -+} -+ -+void -+ep4_probe_destroy (EP4_RAIL *rail) -+{ -+ if (rail->r_probe_cq) -+ ep4_free_ecq (rail, rail->r_probe_cq); -+ -+ if (rail->r_probe_intcookie.int_arg == NULL) -+ return; -+ ep4_deregister_intcookie (rail, &rail->r_probe_intcookie); -+ -+ kcondvar_destroy (&rail->r_probe_wait); -+ spin_lock_destroy (&rail->r_probe_lock); -+} -+ -+#define LINKDOWN(nodeid, level) ((nodeid >> (level << 1)) & 3) -+#define PROBE_PATTERN0(nodeid) (0xaddebabe ^ nodeid) -+#define PROBE_PATTERN1(nodeid) (0xfeedbeef ^ nodeid) -+ -+#define EP4_PROBE_RETRIES 4 -+ -+int -+ep4_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_RAIL_MAIN *rmain = rail->r_main; -+ E4_uint16 first = 0; -+ int rb = 0; -+ -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ E4_VirtualProcessEntry route; -+ unsigned long flags; -+ int i; -+ -+ for (i = 0; i < ROUTE_NUM_PACKED; i++) -+ packed[i] = 0; -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < level; i++) -+ if (first == 0) -+ first = linkup ? FIRST_ROUTE(linkup[i]) : FIRST_ADAPTIVE; -+ else -+ packed[rb++] = linkup ? PACKED_ROUTE(linkup[i]) : PACKED_ADAPTIVE; -+ -+ /* Generate a "to-me" route down */ -+ if (first == 0) -+ first = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ /* Generate the "down" routes */ -+ for (i = level-1; i >= 0; i--) -+ packed[rb++] = linkdown ? PACKED_ROUTE(linkdown[i]) : PACKED_ROUTE(LINKDOWN(nodeid, i)); -+ -+ /* Pack up the routes into the virtual process entry */ -+ route.Values[0] = first | FIRST_HIGH_PRI | FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3); -+ route.Values[1] = ROUTE_CTXT_VALUE(ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ route.Values[0] |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ route.Values[1] |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ elan4_write_route (rail->r_ctxt.ctxt_dev, rail->r_routetable, EP_VP_PROBE(level), &route); -+ -+ while (attempts--) -+ { -+ rail->r_probe_done = 0; -+ -+ /* generate the STEN packet - note we use a datatype of dword as we're copying to elan in dwords -+ * NB - no flow control is required, since the max packet size is less than the command queue -+ * size and it's dedicated for network probing. -+ */ -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_nop_cmd (rail->r_probe_cq->ecq_cq, 0); -+ -+ elan4_open_packet (rail->r_probe_cq->ecq_cq, OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_PROBE(level))); -+ elan4_sendtransn (rail->r_probe_cq->ecq_cq, TR_TRACEROUTE(TRACEROUTE_NDWORDS), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest0), -+ 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, -+ 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull | ((E4_uint64)PROBE_PATTERN0(nodeid) << 32)); -+ elan4_sendtransn (rail->r_probe_cq->ecq_cq, TR_TRACEROUTE(TRACEROUTE_NDWORDS), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest1), -+ 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, -+ 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000000000001ull | ((E4_uint64)PROBE_PATTERN1(nodeid) << 32)); -+ elan4_sendtrans0 (rail->r_probe_cq->ecq_cq, TR_NOP_TRANS | TR_LAST_AND_SEND_ACK, 0); -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_write_dword_cmd (rail->r_probe_cq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_result), EP4_STATE_FINISHED); -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_write_dword_cmd (rail->r_probe_cq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_result), EP4_STATE_FAILED); -+ -+ elan4_interrupt_cmd (rail->r_probe_cq->ecq_cq, rail->r_probe_intcookie.int_val); -+ -+ spin_lock_irqsave (&rail->r_probe_lock, flags); -+ while (! rail->r_probe_done) -+ kcondvar_wait (&rail->r_probe_wait, &rail->r_probe_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_probe_lock, flags); -+ -+ if (rmain->r_probe_result == EP4_STATE_FINISHED) -+ { -+ if (rmain->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != PROBE_PATTERN0(nodeid) || -+ rmain->r_probe_dest1[TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != PROBE_PATTERN1(nodeid)) -+ { -+ static unsigned long printed = 0; -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: lost nodeid at level %d switch %d - %d != %x\n", rail->r_generic.Name, level, sw, -+ rmain->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level)+1) - 1], PROBE_PATTERN0(nodeid)); -+ printed = lbolt; -+ } -+ } -+ else -+ { -+ E4_uint32 val0 = rmain->r_probe_dest0[TRACEROUTE_ENTRIES - level - 1]; -+ E4_uint32 val1 = rmain->r_probe_dest1[TRACEROUTE_ENTRIES - level - 1]; -+ -+ lsw->lnr = TR_TRACEROUTE0_LNR(val0); -+ lsw->link = TR_TRACEROUTE0_LINKID(val0); -+ lsw->bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ lsw->invalid = 0; -+ -+ return 1; -+ } -+ } -+ -+ rmain->r_probe_result = EP4_STATE_FREE; -+ } -+ -+ return 0; -+} -+ -+ -+void -+ep4_probe_position_found (EP4_RAIL *rail, ELAN_POSITION *pos) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ int lvl; -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ /* Initialise the "probe" route to use the broadcast tree */ -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ unsigned char *arityp = &pos->pos_arity[pos->pos_levels - 1]; -+ unsigned int spanned = *arityp; -+ E4_uint16 first = 0; -+ int rb = 0; -+ -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ E4_VirtualProcessEntry route; -+ int i; -+ -+ for (i = 0; i < ROUTE_NUM_PACKED; i++) -+ packed[i] = 0; -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < lvl; i++, spanned *= *(--arityp)) -+ { -+ if (first == 0) -+ first = FIRST_BCAST_TREE; -+ else -+ packed[rb++] = PACKED_BCAST_TREE; -+ } -+ -+ /* Generate a "to-me" route down */ -+ if (first == 0) -+ first = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ spanned /= *arityp++; -+ -+ /* Generate the "down" routes */ -+ for (i = lvl-1; i >= 0; i--) -+ { -+ spanned /= *arityp; -+ packed[rb++] = PACKED_ROUTE((pos->pos_nodeid / spanned) % *arityp); -+ arityp++; -+ } -+ -+ -+ /* Pack up the routes into the virtual process entry */ -+ route.Values[0] = first | FIRST_HIGH_PRI | FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3); -+ route.Values[1] = ROUTE_CTXT_VALUE(ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ route.Values[0] |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ route.Values[1] |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ elan4_write_route (rail->r_ctxt.ctxt_dev, rail->r_routetable, EP_VP_PROBE(lvl), &route); -+ -+ /* Initialise "start" event for this level */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_CHECK_STEN_NDWORDS)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CopySource), -+ rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl])); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CopyDest), -+ rail->r_probe_cq->ecq_addr); -+ -+ /* Initiailise command stream - reset the start event */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_reset_event_cmd), -+ WRITE_DWORD_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[lvl]))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_reset_event_value), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_CHECK_STEN_NDWORDS)); -+ -+ /* Initiailise command stream - sten traceroute packet */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_open), -+ OPEN_STEN_PKT_CMD | OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_PROBE(lvl))); -+ -+ /* Initiailise command stream - traceroute 0 */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_traceroute0), -+ SEND_TRANS_CMD | (TR_TRACEROUTE(TRACEROUTE_NDWORDS) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_traceroute0), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest0)); -+ for (i = 0; i < (TRACEROUTE_NDWORDS-1); i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute0[i]), -+ 0x0000000000000000ull); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute0[i]), -+ 0x0000000000000000ull | ((E4_uint64) PROBE_PATTERN0(pos->pos_nodeid) << 32)); -+ -+ /* Initiailise command stream - traceroute 1 */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_traceroute1), -+ SEND_TRANS_CMD | (TR_TRACEROUTE(TRACEROUTE_NDWORDS) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_traceroute1), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest1)); -+ for (i = 0; i < (TRACEROUTE_NDWORDS-1); i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute1[i]), -+ 0x0000000100000001ull); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute1[i]), -+ 0x0000000000000001ull | ((E4_uint64) PROBE_PATTERN1(pos->pos_nodeid) << 32)); -+ -+ /* Initiailise command stream - null sendack */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_sendack), -+ SEND_TRANS_CMD | ((TR_NOP_TRANS | TR_LAST_AND_SEND_ACK) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_sendack), -+ 0); -+ -+ /* Initiailise command stream - guard ok, write done */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_guard_ok), -+ GUARD_CMD | GUARD_CHANNEL(1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_writedword_ok), -+ WRITE_DWORD_CMD | (rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_level))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_value_ok), -+ lvl); -+ -+ /* Initiailise command stream - guard fail, chain to next or write done */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_guard_fail), -+ GUARD_CMD | GUARD_CHANNEL(1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ -+ if (lvl > 0) -+ { -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_fail), -+ SET_EVENT_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[lvl-1]))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_nop), -+ NOP_CMD); -+ } -+ else -+ { -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_fail), -+ WRITE_DWORD_CMD | (rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_level))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_nop), -+ EP4_PROBE_FAILED); -+ } -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_nop_pad), -+ NOP_CMD); -+ } -+ -+ -+ rail->r_main->r_probe_level = EP4_PROBE_ACTIVE; -+ -+ mb(); -+ ep4_set_event_cmd (rail->r_probe_cq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[pos->pos_levels-1])); -+} -+ -+int -+ep4_check_position (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ unsigned int level = rail->r_main->r_probe_level; -+ unsigned int lvl; -+ -+ EPRINTF2 (DBG_PROBE, "%s: ep4_check_position: level=%lld\n", rail->r_generic.Name, (long long)rail->r_main->r_probe_level); -+ -+ if (rail->r_main->r_probe_level != EP4_PROBE_ACTIVE) -+ { -+ if (rail->r_main->r_probe_level == EP4_PROBE_FAILED) -+ { -+ EPRINTF1 (DBG_PROBE, "%s: ep4_check_position: packets all nacked\n", rail->r_generic.Name); -+ -+ rail->r_generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ E4_uint32 val0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - 2*(level+1)]; -+ E4_uint32 val1 = rail->r_main->r_probe_dest1[TRACEROUTE_ENTRIES - 2*(level+1)]; -+ -+ if (val0 != PROBE_PATTERN0 (pos->pos_nodeid) || val1 != PROBE_PATTERN1 (pos->pos_nodeid)) -+ { -+ static unsigned long printed = 0; -+ -+ /* We've received a packet from another node - this probably means -+ * that we've moved */ -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: ep4_check_position - level %d lost nodeid\n", rail->r_generic.Name, level); -+ printed = lbolt; -+ } -+ -+ rail->r_generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ for (lvl = 0 ; lvl <= level; lvl++) -+ { -+ E4_uint32 uval0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - lvl - 1]; -+ E4_uint32 dval0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ E4_uint32 dval1 = rail->r_main->r_probe_dest1[TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ -+ rail->r_generic.SwitchState[lvl].linkid = TR_TRACEROUTE0_LINKID (dval0); -+ rail->r_generic.SwitchState[lvl].LNR = TR_TRACEROUTE0_LNR(dval0); -+ rail->r_generic.SwitchState[lvl].bcast = TR_TRACEROUTE1_BCAST_TOP (dval1); -+ rail->r_generic.SwitchState[lvl].uplink = TR_TRACEROUTE0_LINKID (uval0); -+ -+ EPRINTF5 (DBG_PROBE, " --- lvl %d: linkid=%d LNR=%x bcast=%d uplink=%d\n", lvl, rail->r_generic.SwitchState[lvl].linkid, -+ rail->r_generic.SwitchState[lvl].LNR, rail->r_generic.SwitchState[lvl].bcast ,rail->r_generic.SwitchState[lvl].uplink); -+ -+ } -+ -+ rail->r_generic.SwitchProbeLevel = level; -+ } -+ } -+ -+ rail->r_main->r_probe_level = EP4_PROBE_ACTIVE; -+ mb(); -+ -+ if (rail->r_generic.SwitchBroadcastLevel == rail->r_generic.Position.pos_levels-1) -+ level = rail->r_generic.Position.pos_levels - 1; -+ else -+ level = rail->r_generic.SwitchBroadcastLevel + 1; -+ -+ ep4_set_event_cmd (rail->r_probe_cq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[level])); -+ -+ return 1; -+ } -+ -+ return 0; -+} -diff -urN clean/drivers/net/qsnet/ep/procfs_linux.c linux-2.6.9/drivers/net/qsnet/ep/procfs_linux.c ---- clean/drivers/net/qsnet/ep/procfs_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/procfs_linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,632 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_linux.c,v 1.60.2.3 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/procfs_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+#include "conf_linux.h" -+#include -+#include -+#include -+ -+#include -+ -+struct proc_dir_entry *ep_procfs_root; -+struct proc_dir_entry *ep_config_root; -+ -+/* -+ * We provide a slightly "special" interface for /proc/elan/device%d/nodeset, -+ * so that it can be included in a "poll" system call. On each "read" on the -+ * file, we generate a new nodeset if a) the previous one has been completely -+ * read and b) if it has changed since it was generated. -+ * -+ * Unfortunately ... this doesn't allow "tail -f" to work, since this uses -+ * fstat() on the fd, as we only hold the last nodeset string, we could not -+ * handle the case where two processes were reading a different rates. -+ * We could maybe have implemented this as a "sliding window", so that we -+ * add a new nodeset string, when it has changed and someone reads past -+ * end of the last one. Then if someone read from before out "window" -+ * we would produce "padding" data. The problem with this, is that a -+ * simple "cat" on /proc/elan/device%d/nodeset will read the whole "file" -+ * which will be mostly padding ! -+ * -+ * Just to not that the purpose of this interface is: -+ * 1) to allow cat /proc/elan/device%d/nodeset to show the current -+ * nodeset. -+ * 2) to allow rms (or similar) to poll() on the file, and when the -+ * nodeset changes read a new one. -+ * -+ * so ... we don't bother solving the troublesome "tail -f" problem. -+ */ -+ -+typedef struct nodeset_private -+{ -+ struct nodeset_private *pr_next; -+ EP_RAIL *pr_rail; -+ unsigned pr_changed; -+ char *pr_page; -+ unsigned pr_off; -+ unsigned pr_len; -+} NODESET_PRIVATE; -+ -+NODESET_PRIVATE *ep_nodeset_list; -+wait_queue_head_t ep_nodeset_wait; -+spinlock_t ep_nodeset_lock; -+ -+static int -+proc_write_state(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "start") && rail->State == EP_RAIL_STATE_UNINITIALISED) -+ ep_start_rail (rail); -+ -+ if (! strcmp (tmpbuf, "stop") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ ep_stop_rail (rail); -+ -+ if (! strcmp (tmpbuf, "offline") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ cm_force_offline (rail, 1, CM_OFFLINE_PROCFS); -+ -+ if (! strcmp (tmpbuf, "online") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ cm_force_offline (rail, 0, CM_OFFLINE_PROCFS); -+ -+ if (! strncmp (tmpbuf, "restart=", 8) && rail->State == EP_RAIL_STATE_RUNNING) -+ cm_restart_node (rail, simple_strtol (tmpbuf + 8, NULL, 0)); -+ -+ if (! strncmp (tmpbuf, "panic=", 6)) -+ ep_panic_node (rail->System, simple_strtol(tmpbuf + 6, NULL, 0), -+ strchr (tmpbuf, ',') ? strchr(tmpbuf, ',') + 1 : "remote panic request"); -+ -+ if (! strncmp (tmpbuf, "raise=", 6) && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ rail->Operations.RaiseFilter (rail, simple_strtol (tmpbuf + 6, NULL, 0)); -+ -+ if (! strncmp (tmpbuf, "lower=", 6) && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ rail->Operations.LowerFilter (rail, simple_strtol (tmpbuf + 6, NULL, 0)); -+ -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_state(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ int len; -+ -+ switch (rail->State) -+ { -+ case EP_RAIL_STATE_UNINITIALISED: -+ len = sprintf (page, "uninitialised\n"); -+ break; -+ case EP_RAIL_STATE_STARTED: -+ len = sprintf (page, "started\n"); -+ break; -+ case EP_RAIL_STATE_RUNNING: -+ len = sprintf (page, "running NodeId=%d NumNodes=%d\n", rail->Position.pos_nodeid, rail->Position.pos_nodes); -+ break; -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ len = sprintf (page, "incompatible NodeId=%d NumNodes=%d\n", rail->Position.pos_nodeid, rail->Position.pos_nodes); -+ break; -+ default: -+ len = sprintf (page, "\n"); -+ break; -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_display(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "rail")) -+ DisplayRail (rail); -+ if (! strcmp (tmpbuf, "segs")) -+ DisplaySegs (rail); -+ if (! strcmp (tmpbuf, "nodes")) -+ DisplayNodes (rail); -+ if (! strcmp (tmpbuf, "status")) -+ DisplayStatus (rail); -+ if (! strcmp (tmpbuf, "debug") && rail->Operations.Debug) -+ rail->Operations.Debug (rail); -+ if (! strncmp (tmpbuf, "epcomms", 7)) -+ ep_comms_display (rail->System, tmpbuf[7] == '=' ? tmpbuf + 8 : NULL); -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_display(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len = sprintf (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+ -+static int -+proc_read_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ -+ if ( rail == NULL ) { -+ strcpy(page,"proc_read_stats rail=NULL\n"); -+ } else { -+ page[0] = 0; -+ ep_fillout_stats(rail, page); -+ rail->Operations.FillOutStats (rail, page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_devinfo(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ ELAN_DEVINFO *devinfo = &rail->Devinfo; -+ ELAN_POSITION *pos = &rail->Position; -+ char *p = page; -+ -+ switch (devinfo->dev_device_id) -+ { -+ case PCI_DEVICE_ID_ELAN3: -+ p += sprintf (p, "ep%d is elan3 %d rev %c\n", rail->Number, -+ devinfo->dev_instance, 'a' + devinfo->dev_revision_id); -+ break; -+ -+ case PCI_DEVICE_ID_ELAN4: -+ p += sprintf (p, "ep%d is elan4 %d rev %c\n", rail->Number, -+ devinfo->dev_instance, 'a' + devinfo->dev_revision_id); -+ break; -+ default: -+ p += sprintf (p, "ep%d is unknown %x/%x\n", rail->Number, devinfo->dev_vendor_id, devinfo->dev_device_id); -+ break; -+ } -+ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ p += sprintf (p, "ep%d nodeid %d numnodes %d\n", rail->Number, pos->pos_nodeid, pos->pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static struct rail_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} rail_info[] = { -+ {"state", proc_read_state, proc_write_state}, -+ {"display", proc_read_display, proc_write_display}, -+ {"stats", proc_read_stats, NULL}, -+ {"devinfo", proc_read_devinfo, NULL}, -+}; -+ -+static int -+nodeset_open (struct inode *inode, struct file *file) -+{ -+ NODESET_PRIVATE *pr; -+ -+ if ((pr = kmalloc (sizeof (NODESET_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_changed = 1; -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_page = NULL; -+ pr->pr_rail = (EP_RAIL *)( PDE(inode)->data ); -+ -+ spin_lock (&ep_nodeset_lock); -+ pr->pr_next = ep_nodeset_list; -+ ep_nodeset_list = pr; -+ spin_unlock (&ep_nodeset_lock); -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+nodeset_release (struct inode *inode, struct file *file) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ NODESET_PRIVATE **ppr; -+ -+ spin_lock (&ep_nodeset_lock); -+ for (ppr = &ep_nodeset_list; (*ppr) != pr; ppr = &(*ppr)->pr_next) -+ ; -+ (*ppr) = pr->pr_next; -+ spin_unlock (&ep_nodeset_lock); -+ -+ if (pr->pr_page) -+ free_page ((unsigned long) pr->pr_page); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+nodeset_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ EP_RAIL *rail = pr->pr_rail; -+ int error; -+ unsigned long flags; -+ -+ if (!pr->pr_changed && pr->pr_off >= pr->pr_len) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (pr->pr_page == NULL && (pr->pr_page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ kmutex_lock (&rail->CallbackLock); -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ spin_lock_irqsave (&rail->System->NodeLock, flags); -+ ep_sprintf_bitmap (pr->pr_page, PAGESIZE, statemap_tobitmap(rail->NodeSet), 0, 0, rail->Position.pos_nodes); -+ spin_unlock_irqrestore (&rail->System->NodeLock, flags); -+ -+ if (rail->SwitchBroadcastLevel == -1) -+ strcat (pr->pr_page, ""); -+ else if (rail->SwitchBroadcastLevel < (rail->Position.pos_levels-1)) -+ sprintf (pr->pr_page + strlen (pr->pr_page), "<%d>", rail->SwitchBroadcastLevel); -+ strcat (pr->pr_page, "\n"); -+ } -+ else -+ strcpy (pr->pr_page, "\n"); -+ kmutex_unlock (&rail->CallbackLock); -+ -+ pr->pr_len = strlen (pr->pr_page); -+ pr->pr_off = 0; -+ pr->pr_changed = 0; -+ } -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_page + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ free_page ((unsigned long) pr->pr_page); -+ pr->pr_page = NULL; -+ } -+ -+ return (count); -+} -+ -+static unsigned int -+nodeset_poll (struct file *file, poll_table *wait) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ -+ poll_wait (file, &ep_nodeset_wait, wait); -+ if (pr->pr_changed || pr->pr_off < pr->pr_len) -+ return (POLLIN | POLLRDNORM); -+ return (0); -+} -+ -+static void -+nodeset_callback (void *arg, statemap_t *map) -+{ -+ EP_RAIL *rail = (EP_RAIL *) arg; -+ NODESET_PRIVATE *pr; -+ -+ ep_display_bitmap (rail->Name, "Nodeset", statemap_tobitmap(map), 0, ep_numnodes(rail->System)); -+ -+ spin_lock (&ep_nodeset_lock); -+ for (pr = ep_nodeset_list; pr; pr = pr->pr_next) -+ if (pr->pr_rail == rail) -+ pr->pr_changed = 1; -+ spin_unlock (&ep_nodeset_lock); -+ -+ wake_up_interruptible (&ep_nodeset_wait); -+} -+ -+static int -+proc_open (struct inode *inode, struct file *file) -+{ -+ QSNET_PROC_PRIVATE *pr; -+ CM_RAIL *cmRail; -+ EP_RAIL *epRail; -+ int pages = 4; -+ unsigned long flags; -+ -+ if ((pr = kmalloc (sizeof (QSNET_PROC_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ epRail = (EP_RAIL *)(PDE(inode)->data); -+ -+ do { -+ pr->pr_data_len = PAGESIZE * pages; -+ -+ KMEM_ZALLOC (pr->pr_data, char *, pr->pr_data_len, 1); -+ if (pr->pr_data == NULL) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Out of Memory\n"); -+ break; -+ } -+ -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_data[0] = 0; -+ -+ if (epRail->State != EP_RAIL_STATE_RUNNING) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Rail not Running\n"); -+ break; -+ } -+ else -+ { -+ pr->pr_di.func = qsnet_proc_character_fill; -+ pr->pr_di.arg = (long)pr; -+ -+ if (!strcmp("maps", file->f_dentry->d_iname)) -+ { -+ cmRail = epRail->ClusterRail; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ DisplayNodeMaps (&pr->pr_di, cmRail); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ if (!strcmp("segs", file->f_dentry->d_iname)) -+ { -+ cmRail = epRail->ClusterRail; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ DisplayNodeSgmts (&pr->pr_di, cmRail); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ if (!strcmp("tree", file->f_dentry->d_iname)) -+ DisplayRailDo (&pr->pr_di, epRail); -+ } -+ -+ if ( pr->pr_len < pr->pr_data_len) -+ break; /* we managed to get all the output into the buffer */ -+ -+ pages++; -+ KMEM_FREE ( pr->pr_data, pr->pr_data_len); -+ } while (1); -+ -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+struct file_operations proc_nodeset_operations = -+{ -+ read: nodeset_read, -+ poll: nodeset_poll, -+ open: nodeset_open, -+ release: nodeset_release, -+}; -+ -+struct file_operations proc_operations = -+{ -+ read: qsnet_proc_read, -+ open: proc_open, -+ release: qsnet_proc_release, -+}; -+ -+void -+ep_procfs_rail_init (EP_RAIL *rail) -+{ -+ struct proc_dir_entry *dir; -+ struct proc_dir_entry *p; -+ char name[10]; -+ int i; -+ -+ sprintf (name, "rail%d", rail->Number); -+ -+ if ((dir = rail->ProcDir = proc_mkdir (name, ep_procfs_root)) == NULL) -+ return; -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ { -+ if ((p = create_proc_entry (rail_info[i].name, 0, dir)) != NULL) -+ { -+ p->read_proc = rail_info[i].read_func; -+ p->write_proc = rail_info[i].write_func; -+ p->data = rail; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ if ((p = create_proc_entry ("nodeset", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_nodeset_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ -+ rail->CallbackRegistered = 1; -+ ep_register_callback (rail, EP_CB_NODESET, nodeset_callback, rail); -+ } -+ -+ if ((p = create_proc_entry ("maps", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+ if ((p = create_proc_entry ("segs", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+ if ((p = create_proc_entry ("tree", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+} -+ -+void -+ep_procfs_rail_fini (EP_RAIL *rail) -+{ -+ struct proc_dir_entry *dir = rail->ProcDir; -+ char name[10]; -+ int i; -+ -+ if (dir == NULL) -+ return; -+ -+ if (rail->CallbackRegistered) -+ { -+ ep_remove_callback (rail, EP_CB_NODESET, nodeset_callback, rail); -+ -+ remove_proc_entry ("nodeset", dir); -+ } -+ -+ remove_proc_entry ("maps", dir); -+ remove_proc_entry ("segs", dir); -+ remove_proc_entry ("tree", dir); -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ remove_proc_entry (rail_info[i].name, dir); -+ -+ sprintf (name, "rail%d", rail->Number); -+ remove_proc_entry (name, ep_procfs_root); -+} -+ -+#include "quadrics_version.h" -+static char quadrics_version[] = QUADRICS_VERSION; -+ -+void -+ep_procfs_init() -+{ -+ extern int txd_stabilise; -+ extern int MaxSwitchLevels; -+ -+ printk ("ep Module (version %s)\n", quadrics_version); -+ -+ spin_lock_init (&ep_nodeset_lock); -+ init_waitqueue_head (&ep_nodeset_wait); -+ -+ ep_procfs_root = proc_mkdir ("ep", qsnet_procfs_root); -+ ep_config_root = proc_mkdir ("config", ep_procfs_root); -+ -+ qsnet_proc_register_str (ep_procfs_root, "version", quadrics_version, 1); -+ -+ qsnet_proc_register_hex (ep_config_root, "epdebug", &epdebug, 0); -+ qsnet_proc_register_hex (ep_config_root, "epdebug_console", &epdebug_console, 0); -+ qsnet_proc_register_hex (ep_config_root, "epdebug_cmlevel", &epdebug_cmlevel, 0); -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ qsnet_proc_register_hex (ep_config_root, "epdebug_check_sum", &epdebug_check_sum, 0); -+#endif -+ qsnet_proc_register_hex (ep_config_root, "epcomms_forward_limit", &epcomms_forward_limit, 0); -+ qsnet_proc_register_int (ep_config_root, "txd_stabilise", &txd_stabilise, 0); -+ qsnet_proc_register_int (ep_config_root, "assfail_mode", &assfail_mode, 0); -+ qsnet_proc_register_int (ep_config_root, "max_switch_levels", &MaxSwitchLevels, 1); -+ -+ ep_procfs_rcvr_xmtr_init(); -+} -+ -+void -+ep_procfs_fini(void) -+{ -+ ep_procfs_rcvr_xmtr_fini(); -+ -+ remove_proc_entry ("max_switch_levels", ep_config_root); -+ remove_proc_entry ("assfail_mode", ep_config_root); -+ remove_proc_entry ("txd_stabilise", ep_config_root); -+ remove_proc_entry ("epcomms_forward_limit", ep_config_root); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ remove_proc_entry ("epdebug_check_sum", ep_config_root); -+#endif -+ remove_proc_entry ("epdebug_cmlevel", ep_config_root); -+ remove_proc_entry ("epdebug_console", ep_config_root); -+ remove_proc_entry ("epdebug", ep_config_root); -+ -+ remove_proc_entry ("version", ep_procfs_root); -+ -+ remove_proc_entry ("config", ep_procfs_root); -+ remove_proc_entry ("ep", qsnet_procfs_root); -+ -+ spin_lock_destroy (&ep_nodeset_lock); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/quadrics_version.h linux-2.6.9/drivers/net/qsnet/ep/quadrics_version.h ---- clean/drivers/net/qsnet/ep/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/ep/railhints.c linux-2.6.9/drivers/net/qsnet/ep/railhints.c ---- clean/drivers/net/qsnet/ep/railhints.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/railhints.c 2004-02-06 17:37:06.000000000 -0500 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: railhints.c,v 1.5 2004/02/06 22:37:06 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/railhints.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+int -+ep_pickRail(EP_RAILMASK railmask) -+{ -+ static volatile int lastGlobal; -+ int i, rnum, last = lastGlobal; -+ -+ /* Pick a single rail out of the railmask */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (railmask & (1 << ((last + i) % EP_MAX_RAILS))) -+ break; -+ -+ if (i == EP_MAX_RAILS) -+ return (-1); -+ -+ rnum = (last + i) % EP_MAX_RAILS; -+ -+ lastGlobal = (rnum + 1) % EP_MAX_RAILS; -+ -+ ASSERT (railmask & (1 << rnum)); -+ -+ return (rnum); -+} -+ -+int -+ep_xmtr_bcastrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails) -+{ -+ /* Retrun a single rail out of allowed mask with the best connectivity for broadcast. */ -+ return (ep_pickRail (allowedRails & xmtr->RailMask)); -+} -+ -+int -+ep_xmtr_prefrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails, unsigned nodeId) -+{ -+ EP_NODE *node = &xmtr->Subsys->Subsys.Sys->Nodes[nodeId]; -+ -+ EPRINTF5 (DBG_XMTR, "ep_xmtr_prefrail: xmtr=%p allowedRails=%x nodeId=%d xmtr->RailMaks=%x Connected=%x\n", -+ xmtr, allowedRails, nodeId, xmtr->RailMask, node->ConnectedRails); -+ -+ /* Return a single rail which is currently connected to nodeId (limited to rails -+ * in allowedmask) - if more than one rail is possible, then round-robin between -+ * them */ -+ return (ep_pickRail (allowedRails & xmtr->RailMask & node->ConnectedRails)); -+} -+ -+EP_RAILMASK -+ep_xmtr_availrails (EP_XMTR *xmtr) -+{ -+ /* Return which rails can be used to transmit one. */ -+ -+ return (xmtr->RailMask); -+} -+ -+EP_RAILMASK -+ep_xmtr_noderails (EP_XMTR *xmtr, unsigned nodeId) -+{ -+ EP_NODE *node = &xmtr->Subsys->Subsys.Sys->Nodes[nodeId]; -+ -+ /* Return which rails can be used to transmit to this node. */ -+ -+ return (xmtr->RailMask & node->ConnectedRails); -+} -+ -+int -+ep_rcvr_prefrail (EP_RCVR *rcvr, EP_RAILMASK allowedRails) -+{ -+ /* Return the "best" rail for queueing a receive buffer out on - this will be a -+ * rail with ThreadWaiting set or the rail with the least descriptors queued -+ * on it. */ -+ -+ return (ep_pickRail (allowedRails & rcvr->RailMask)); -+} -+ -+EP_RAILMASK -+ep_rcvr_availrails (EP_RCVR *rcvr) -+{ -+ /* Return which rails can be used to queue receive buffers. */ -+ return (rcvr->RailMask); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/rmap.c linux-2.6.9/drivers/net/qsnet/ep/rmap.c ---- clean/drivers/net/qsnet/ep/rmap.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/rmap.c 2004-05-19 06:24:38.000000000 -0400 -@@ -0,0 +1,365 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: rmap.c,v 1.15 2004/05/19 10:24:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/rmap.c,v $ */ -+ -+#include -+#include -+ -+#include "debug.h" -+ -+void -+ep_display_rmap (EP_RMAP *mp) -+{ -+ EP_RMAP_ENTRY *bp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ ep_debugf (DBG_DEBUG, "map: %s size %d free %d\n", mp->m_name, mp->m_size, mp->m_free); -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ ep_debugf (DBG_DEBUG, " [%lx - %lx]\n", bp->m_addr, bp->m_addr+bp->m_size-1); -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+} -+ -+void -+ep_mapinit (EP_RMAP *mp, char *name, u_int mapsize) -+{ -+ spin_lock_init (&mp->m_lock); -+ kcondvar_init (&mp->m_wait); -+ -+ /* The final segment in the array has size 0 and acts as a delimiter -+ * we insure that we never use segments past the end of the array by -+ * maintaining a free segment count in m_free. When excess segments -+ * occur we discard some resources */ -+ -+ mp->m_size = mapsize; -+ mp->m_free = mapsize; -+ mp->m_name = name; -+ -+ bzero (mp->m_map, sizeof (EP_RMAP_ENTRY) * (mapsize+1)); -+} -+ -+EP_RMAP * -+ep_rmallocmap (size_t mapsize, char *name, int cansleep) -+{ -+ EP_RMAP *mp; -+ -+ KMEM_ZALLOC (mp, EP_RMAP *, sizeof (EP_RMAP) + mapsize*sizeof (EP_RMAP_ENTRY), cansleep); -+ -+ if (mp != NULL) -+ ep_mapinit (mp, name, mapsize); -+ -+ return (mp); -+} -+ -+void -+ep_rmfreemap (EP_RMAP *mp) -+{ -+ spin_lock_destroy (&mp->m_lock); -+ kcondvar_destroy (&mp->m_wait); -+ -+ KMEM_FREE (mp, sizeof (EP_RMAP) + mp->m_size * sizeof (EP_RMAP_ENTRY)); -+} -+ -+static u_long -+ep_rmalloc_locked (EP_RMAP *mp, size_t size) -+{ -+ EP_RMAP_ENTRY *bp; -+ u_long addr; -+ -+ ASSERT (size > 0); -+ ASSERT (SPINLOCK_HELD (&mp->m_lock)); -+ -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ { -+ if (bp->m_size >= size) -+ { -+ addr = bp->m_addr; -+ bp->m_addr += size; -+ -+ if ((bp->m_size -= size) == 0) -+ { -+ /* taken all of this slot - so shift the map down */ -+ do { -+ bp++; -+ (bp-1)->m_addr = bp->m_addr; -+ } while (((bp-1)->m_size = bp->m_size) != 0); -+ -+ mp->m_free++; -+ } -+ return (addr); -+ } -+ } -+ -+ return (0); -+} -+ -+u_long -+ep_rmalloc (EP_RMAP *mp, size_t size, int cansleep) -+{ -+ unsigned long addr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ while ((addr = ep_rmalloc_locked (mp, size)) == 0 && cansleep) -+ { -+ mp->m_want = 1; -+ kcondvar_wait (&mp->m_wait, &mp->m_lock, &flags); -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ -+ return (addr); -+} -+ -+ -+ -+u_long -+ep_rmalloc_constrained (EP_RMAP *mp, size_t size, u_long alo, u_long ahi, u_long align, int cansleep) -+{ -+ EP_RMAP_ENTRY *bp, *bp2, *lbp; -+ unsigned long addr=0; -+ size_t delta; -+ int ok; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ again: -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ { -+ delta = 0; -+ -+ if (alo < bp->m_addr) -+ { -+ addr = bp->m_addr; -+ -+ if (addr & (align-1)) -+ addr = (addr + (align-1)) & ~(align-1); -+ -+ delta = addr - bp->m_addr; -+ -+ if (ahi >= bp->m_addr + bp->m_size) -+ ok = (bp->m_size >= (size + delta)); -+ else -+ ok = ((bp->m_addr + size + delta) <= ahi); -+ } -+ else -+ { -+ addr = alo; -+ if (addr & (align-1)) -+ addr = (addr + (align-1)) & ~(align-1); -+ delta = addr - bp->m_addr; -+ -+ if (ahi >= bp->m_addr + bp->m_size) -+ ok = ((alo + size + delta) <= (bp->m_addr + bp->m_size)); -+ else -+ ok = ((alo + size + delta) <= ahi); -+ } -+ -+ if (ok) -+ break; -+ } -+ -+ if (bp->m_size == 0) -+ { -+ if (cansleep) -+ { -+ mp->m_want = 1; -+ kcondvar_wait (&mp->m_wait, &mp->m_lock, &flags); -+ goto again; -+ } -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ return (0); -+ } -+ -+ /* found an approriate map entry - so take the bit out which we want */ -+ if (bp->m_addr == addr) -+ { -+ if (bp->m_size == size) -+ { -+ /* allocate entire segment and compress map */ -+ bp2 = bp; -+ while (bp2->m_size) -+ { -+ bp2++; -+ (bp2-1)->m_addr = bp2->m_addr; -+ (bp2-1)->m_size = bp2->m_size; -+ } -+ mp->m_free++; -+ } -+ else -+ { -+ /* take from start of segment */ -+ bp->m_addr += size; -+ bp->m_size -= size; -+ } -+ } -+ else -+ { -+ if (bp->m_addr + bp->m_size == addr + size) -+ { -+ /* take from end of segment */ -+ bp->m_size -= size; -+ } -+ else -+ { -+ /* split the segment loosing the last entry if there's no space */ -+ if (mp->m_free == 0) -+ { -+ /* find last map entry */ -+ for (lbp = bp; lbp->m_size != 0; lbp++) -+ ; -+ lbp--; -+ -+ if (lbp->m_size > (lbp-1)->m_size) -+ lbp--; -+ -+ printk ("%s: lost resource map entry [%lx, %lx]\n", -+ mp->m_name, lbp->m_addr, lbp->m_addr + lbp->m_size); -+ -+ *lbp = *(lbp+1); -+ (lbp+1)->m_size = 0; -+ -+ mp->m_free++; -+ } -+ -+ for (bp2 = bp; bp2->m_size != 0; bp2++) -+ continue; -+ -+ for (bp2--; bp2 > bp; bp2--) -+ { -+ (bp2+1)->m_addr = bp2->m_addr; -+ (bp2+1)->m_size = bp2->m_size; -+ } -+ -+ mp->m_free--; -+ -+ (bp+1)->m_addr = addr + size; -+ (bp+1)->m_size = bp->m_addr + bp->m_size - (addr + size); -+ bp->m_size = addr - bp->m_addr; -+ } -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ return (addr); -+} -+ -+void -+ep_rmfree (EP_RMAP *mp, size_t size, u_long addr) -+{ -+ EP_RMAP_ENTRY *bp; -+ unsigned long t; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ -+ ASSERT (addr != 0 && size > 0); -+ -+again: -+ /* find the piece of the map which starts after the returned space -+ * or the end of the map */ -+ for (bp = &mp->m_map[0]; bp->m_addr <= addr && bp->m_size != 0; bp++) -+ ; -+ -+ /* bp points to the piece to the right of where we want to go */ -+ -+ if (bp > &mp->m_map[0] && (bp-1)->m_addr + (bp-1)->m_size >= addr) -+ { -+ /* merge with piece on the left */ -+ -+ ASSERT ((bp-1)->m_addr + (bp-1)->m_size <= addr); -+ -+ (bp-1)->m_size += size; -+ -+ ASSERT (bp->m_size == 0 || addr+size <= bp->m_addr); -+ -+ if (bp->m_size && (addr + size) == bp->m_addr) -+ { -+ /* merge witht he piece on the right by -+ * growing the piece on the left and shifting -+ * the map down */ -+ -+ ASSERT ((addr + size) <= bp->m_addr); -+ -+ (bp-1)->m_size += bp->m_size; -+ while (bp->m_size) -+ { -+ bp++; -+ (bp-1)->m_addr = bp->m_addr; -+ (bp-1)->m_size = bp->m_size; -+ } -+ -+ mp->m_free++; -+ } -+ } -+ else if (addr + size >= bp->m_addr && bp->m_size) -+ { -+ /* merge with piece to the right */ -+ -+ ASSERT ((addr + size) <= bp->m_addr); -+ -+ bp->m_addr -= size; -+ bp->m_size += size; -+ } -+ else -+ { -+ /* doesn't join with left or right - check for map -+ overflow and discard the smallest of the last or -+ next to last entries */ -+ -+ if (mp->m_free == 0) -+ { -+ EP_RMAP_ENTRY *lbp; -+ -+ /* find last map entry */ -+ for (lbp = bp; lbp->m_size != 0; lbp++) -+ ; -+ lbp--; -+ -+ if (lbp->m_size > (lbp-1)->m_size) -+ lbp--; -+ -+ printk ("%s: lost resource map entry [%lx, %lx]\n", -+ mp->m_name, lbp->m_addr, lbp->m_addr + lbp->m_size); -+ -+ *lbp = *(lbp+1); -+ (lbp+1)->m_size = 0; -+ -+ mp->m_free++; -+ goto again; -+ } -+ -+ /* make a new entry and push the remaining ones up */ -+ do { -+ t = bp->m_addr; -+ bp->m_addr = addr; -+ addr = t; -+ t = bp->m_size; -+ bp->m_size = size; -+ bp++; -+ } while ((size = t) != 0); -+ -+ mp->m_free--; -+ } -+ -+ /* if anyone blocked on rmalloc failure, wake 'em up */ -+ if (mp->m_want) -+ { -+ mp->m_want = 0; -+ kcondvar_wakeupall (&mp->m_wait, &mp->m_lock); -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/spinlock_elan3_thread.c linux-2.6.9/drivers/net/qsnet/ep/spinlock_elan3_thread.c ---- clean/drivers/net/qsnet/ep/spinlock_elan3_thread.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/spinlock_elan3_thread.c 2003-10-07 09:22:38.000000000 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: spinlock_elan3_thread.c,v 1.9 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/spinlock_elan3_thread.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+void -+ep3_spinblock (EP3_SPINLOCK_ELAN *sle, EP3_SPINLOCK_MAIN *sl) -+{ -+ do { -+ sl->sl_seq = sle->sl_seq; /* Release my lock */ -+ -+ while (sle->sl_lock) /* Wait until the main */ -+ c_break(); /* releases the lock */ -+ -+ sle->sl_seq++; /* and try and relock */ -+ } while (sle->sl_lock); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/statemap.c linux-2.6.9/drivers/net/qsnet/ep/statemap.c ---- clean/drivers/net/qsnet/ep/statemap.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/statemap.c 2004-09-01 12:13:43.000000000 -0400 -@@ -0,0 +1,385 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: statemap.c,v 1.12 2004/09/01 16:13:43 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statemap.c,v $ */ -+ -+#include -+#include -+ -+/******************************** global state bitmap stuff **********************************/ -+static int -+statemap_setmapbit (bitmap_t *map, int offset, int bit) -+{ -+ bitmap_t *e = &map[offset >> BT_ULSHIFT]; -+ bitmap_t mask = ((bitmap_t)1) << (offset & BT_ULMASK); -+ int rc = ((*e) & mask) != 0; -+ -+ if (bit) -+ { -+ *e |= mask; -+ return (!rc); -+ } -+ -+ *e &= ~mask; -+ return (rc); -+} -+ -+static int -+statemap_firstsegbit (bitmap_t seg) -+{ -+ int bit = 0; -+ -+ if (seg == 0) -+ return (-1); -+ -+#if (BT_ULSHIFT == 6) -+ if ((seg & 0xffffffffL) == 0) -+ { -+ seg >>= 32; -+ bit += 32; -+ } -+#elif (BT_ULSHIFT != 5) -+# error "Unexpected value of BT_ULSHIFT" -+#endif -+ -+ if ((seg & 0xffff) == 0) -+ { -+ seg >>= 16; -+ bit += 16; -+ } -+ -+ if ((seg & 0xff) == 0) -+ { -+ seg >>= 8; -+ bit += 8; -+ } -+ -+ if ((seg & 0xf) == 0) -+ { -+ seg >>= 4; -+ bit += 4; -+ } -+ -+ if ((seg & 0x3) == 0) -+ { -+ seg >>= 2; -+ bit += 2; -+ } -+ -+ return (((seg & 0x1) == 0) ? bit + 1 : bit); -+} -+ -+bitmap_t -+statemap_getseg (statemap_t *map, unsigned int offset) -+{ -+ ASSERT (offset < map->size); -+ ASSERT ((offset & BT_ULMASK) == 0); -+ -+ return (map->bitmap[offset >> BT_ULSHIFT]); -+} -+ -+void -+statemap_setseg (statemap_t *map, unsigned int offset, bitmap_t seg) -+{ -+ ASSERT (offset < map->size); -+ ASSERT ((offset & BT_ULMASK) == 0); -+ -+ offset >>= BT_ULSHIFT; -+ if (map->bitmap[offset] == seg) -+ return; -+ -+ map->bitmap[offset] = seg; -+ -+ if (statemap_setmapbit (map->changemap2, offset, 1) && -+ statemap_setmapbit (map->changemap1, offset >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, offset >>= BT_ULSHIFT, 1); -+} -+ -+bitmap_t -+statemap_getbits (statemap_t *map, unsigned int offset, int nbits) -+{ -+ int index = offset >> BT_ULSHIFT; -+ bitmap_t mask = (nbits == BT_NBIPUL) ? (bitmap_t) -1 : (((bitmap_t)1) << nbits) - 1; -+ -+ ASSERT (nbits <= BT_NBIPUL); -+ ASSERT (offset + nbits <= map->size); -+ -+ offset &= BT_ULMASK; -+ if (offset + nbits <= BT_NBIPUL) -+ return ((map->bitmap[index] >> offset) & mask); -+ -+ return (((map->bitmap[index] >> offset) | -+ (map->bitmap[index + 1] << (BT_NBIPUL - offset))) & mask); -+} -+ -+void -+statemap_setbits (statemap_t *map, unsigned int offset, bitmap_t bits, int nbits) -+{ -+ int index = offset >> BT_ULSHIFT; -+ bitmap_t mask; -+ bitmap_t seg; -+ bitmap_t newseg; -+ -+ ASSERT (nbits <= BT_NBIPUL); -+ ASSERT (offset + nbits <= map->size); -+ -+ offset &= BT_ULMASK; -+ if (offset + nbits <= BT_NBIPUL) -+ { -+ mask = ((nbits == BT_NBIPUL) ? -1 : ((((bitmap_t)1) << nbits) - 1)) << offset; -+ seg = map->bitmap[index]; -+ newseg = ((bits << offset) & mask) | (seg & ~mask); -+ -+ if (seg == newseg) -+ return; -+ -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >>= BT_ULSHIFT, 1); -+ return; -+ } -+ -+ mask = ((bitmap_t)-1) << offset; -+ seg = map->bitmap[index]; -+ newseg = ((bits << offset) & mask) | (seg & ~mask); -+ -+ if (seg != newseg) -+ { -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >> BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >> (2 * BT_ULSHIFT), 1); -+ } -+ -+ index++; -+ offset = BT_NBIPUL - offset; -+ mask = (((bitmap_t)1) << (nbits - offset)) - 1; -+ seg = map->bitmap[index]; -+ newseg = ((bits >> offset) & mask) | (seg & ~mask); -+ -+ if (seg == newseg) -+ return; -+ -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >>= BT_ULSHIFT, 1); -+} -+ -+void -+statemap_zero (statemap_t *dst) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, offset += BT_NBIPUL) -+ { -+ *dstmap = 0; -+ *changemap2 |= bit2; -+ } -+ *changemap1 |= bit1; -+ } -+ *changemap0 |= bit0; -+ } -+} -+ -+void -+statemap_setmap (statemap_t *dst, statemap_t *src) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t *srcmap = src->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ -+ ASSERT (src->size == size); -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, srcmap++, offset += BT_NBIPUL) -+ if (*dstmap != *srcmap) -+ { -+ *dstmap = *srcmap; -+ *changemap2 |= bit2; -+ } -+ if (*changemap2 != 0) -+ *changemap1 |= bit1; -+ } -+ if (*changemap1 != 0) -+ *changemap0 |= bit0; -+ } -+} -+ -+void -+statemap_ormap (statemap_t *dst, statemap_t *src) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t *srcmap = src->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ bitmap_t seg; -+ -+ ASSERT (src->size == size); -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, srcmap++, offset += BT_NBIPUL) -+ { -+ seg = *dstmap | *srcmap; -+ if (*dstmap != seg) -+ { -+ *dstmap = seg; -+ *changemap2 |= bit2; -+ } -+ } -+ if (*changemap2 != 0) -+ *changemap1 |= bit1; -+ } -+ if (*changemap1 != 0) -+ *changemap0 |= bit0; -+ } -+} -+ -+int -+statemap_findchange (statemap_t *map, bitmap_t *newseg, int clearchange) -+{ -+ int bit0; -+ bitmap_t *cm1; -+ int bit1; -+ bitmap_t *cm2; -+ int bit2; -+ unsigned int offset; -+ -+ bit0 = statemap_firstsegbit (*(map->changemap0)); -+ if (bit0 < 0) -+ return (-1); -+ -+ offset = bit0; -+ cm1 = map->changemap1 + offset; -+ bit1 = statemap_firstsegbit (*cm1); -+ ASSERT (bit1 >= 0); -+ -+ offset = (offset << BT_ULSHIFT) + bit1; -+ cm2 = map->changemap2 + offset; -+ bit2 = statemap_firstsegbit (*cm2); -+ ASSERT (bit2 >= 0); -+ -+ offset = (offset << BT_ULSHIFT) + bit2; -+ *newseg = map->bitmap[offset]; -+ -+ if (clearchange && -+ (*cm2 &= ~(((bitmap_t)1) << bit2)) == 0 && -+ (*cm1 &= ~(((bitmap_t)1) << bit1)) == 0) -+ map->changemap0[0] &= ~(((bitmap_t)1) << bit0); -+ -+ return (offset << BT_ULSHIFT); -+} -+ -+int -+statemap_changed (statemap_t *map) -+{ -+ return ((*(map->changemap0) != 0)); -+} -+ -+void -+statemap_reset (statemap_t *map) -+{ -+ bzero (map->changemap0, map->changemap_nob + map->bitmap_nob); -+} -+ -+void -+statemap_copy (statemap_t *dst, statemap_t *src) -+{ -+ ASSERT (dst->size == src->size); -+ bcopy (src->changemap0, dst->changemap0, src->changemap_nob + src->bitmap_nob); -+} -+ -+void -+statemap_clearchanges (statemap_t *map) -+{ -+ if (statemap_changed (map)) -+ bzero (map->changemap0, map->changemap_nob); -+} -+ -+bitmap_t * -+statemap_tobitmap (statemap_t *map) -+{ -+ return (map->bitmap); -+} -+ -+statemap_t * -+statemap_create (int size) -+{ -+ int struct_entries = (sizeof (statemap_t) * 8 + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int bitmap_entries = (size + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap2_entries = (bitmap_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap1_entries = (changemap2_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap0_entries = (changemap1_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap_entries = changemap0_entries + changemap1_entries + changemap2_entries; -+ int nob = (struct_entries + bitmap_entries + changemap_entries) * sizeof (bitmap_t); -+ statemap_t *map; -+ -+ ASSERT ((1 << BT_ULSHIFT) == BT_NBIPUL); -+ ASSERT (changemap0_entries == 1); -+ -+ KMEM_ZALLOC (map, statemap_t *, nob, 1); -+ -+ map->size = size; -+ map->nob = nob; -+ map->changemap_nob = changemap_entries * sizeof (bitmap_t); -+ map->bitmap_nob = bitmap_entries * sizeof (bitmap_t); -+ map->changemap0 = ((bitmap_t *)map) + struct_entries; -+ map->changemap1 = map->changemap0 + changemap0_entries; -+ map->changemap2 = map->changemap1 + changemap1_entries; -+ map->bitmap = map->changemap2 + changemap2_entries; -+ -+ return (map); -+} -+ -+void -+statemap_destroy (statemap_t *map) -+{ -+ KMEM_FREE (map, map->nob); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/statusmon.h linux-2.6.9/drivers/net/qsnet/ep/statusmon.h ---- clean/drivers/net/qsnet/ep/statusmon.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/statusmon.h 2003-10-07 09:22:38.000000000 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: statusmon.h,v 1.6 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statusmon.h,v $*/ -+ -+#ifndef __ELAN3_STATUSMON_H -+#define __ELAN3_STATUSMON_H -+ -+typedef struct statusmon_node -+{ -+ u_int NodeId; -+ u_int State; -+} STATUSMON_SGMT; -+ -+typedef struct statusmon_level -+{ -+ unsigned Width; -+ STATUSMON_SGMT Nodes[CM_SGMTS_PER_LEVEL]; -+} STATUSMON_LEVEL; -+ -+typedef struct statusmon_msg -+{ -+ unsigned Type; -+ unsigned NodeId; -+ unsigned NumLevels; -+ unsigned TopLevel; -+ unsigned Role; -+ STATUSMON_LEVEL Levels[CM_MAX_LEVELS]; -+} STATUSMON_MSG; -+ -+ -+#endif /* __ELAN3_STATUSMON_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/support.c linux-2.6.9/drivers/net/qsnet/ep/support.c ---- clean/drivers/net/qsnet/ep/support.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/support.c 2004-09-30 10:59:15.000000000 -0400 -@@ -0,0 +1,109 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support.c,v 1.39 2004/09/30 14:59:15 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/support.c,v $ */ -+ -+#include -+#include -+ -+/****************************************************************************************/ -+/* -+ * Nodeset/flush callbacks. -+ */ -+int -+ep_register_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg) -+{ -+ EP_CALLBACK *cb; -+ -+ KMEM_ALLOC (cb, EP_CALLBACK *, sizeof (EP_CALLBACK), 1); -+ -+ cb->Routine = routine; -+ cb->Arg = arg; -+ -+ kmutex_lock (&rail->CallbackLock); -+ cb->Next = rail->CallbackList[idx]; -+ rail->CallbackList[idx] = cb; -+ kmutex_unlock (&rail->CallbackLock); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_remove_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg) -+{ -+ EP_CALLBACK *cb; -+ EP_CALLBACK **predp; -+ -+ kmutex_lock (&rail->CallbackLock); -+ for (predp = &rail->CallbackList[idx]; (cb = *predp); predp = &cb->Next) -+ if (cb->Routine == routine && cb->Arg == arg) -+ break; -+ -+ if (cb == NULL) -+ panic ("ep_remove_member_callback"); -+ -+ *predp = cb->Next; -+ kmutex_unlock (&rail->CallbackLock); -+ -+ KMEM_FREE (cb, sizeof (EP_CALLBACK)); -+} -+ -+void -+ep_call_callbacks (EP_RAIL *rail, unsigned idx, statemap_t *map) -+{ -+ EP_CALLBACK *cb; -+ -+ kmutex_lock (&rail->CallbackLock); -+ -+ rail->CallbackStep = idx; -+ -+ for (cb = rail->CallbackList[idx]; cb; cb = cb->Next) { -+ (cb->Routine) (cb->Arg, map); -+ } -+ kmutex_unlock (&rail->CallbackLock); -+} -+ -+unsigned int -+ep_backoff (EP_BACKOFF *backoff, int type) -+{ -+ static int bcount[EP_NUM_BACKOFF] = {1, 16, 32, 64, 128, 256, 512, 1024}; -+ -+ if (backoff->type != type) -+ { -+ backoff->type = type; -+ backoff->indx = 0; -+ backoff->count = 0; -+ } -+ -+ if (++backoff->count > bcount[backoff->indx] && backoff->indx < (EP_NUM_BACKOFF-1)) -+ { -+ backoff->indx++; -+ backoff->count = 0; -+ } -+ -+ return (backoff->indx); -+} -+ -+/* Generic checksum algorithm */ -+uint16_t -+CheckSum (char *msg, int nob) -+{ -+ uint16_t sum = 0; -+ -+ while (nob-- > 0) -+ sum = sum * 13 + *msg++; -+ -+ return (sum); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/support_elan3.c linux-2.6.9/drivers/net/qsnet/ep/support_elan3.c ---- clean/drivers/net/qsnet/ep/support_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/support_elan3.c 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,2123 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support_elan3.c,v 1.47.2.1 2005/07/20 11:35:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/support_elan3.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#include -+#include -+ -+/****************************************************************************************/ -+#define DMA_RING_NEXT_POS(ring) ((ring)->Position+1 == ring->Entries ? 0 : ((ring)->Position+1)) -+#define DMA_RING_PREV_POS(ring,pos) ((pos) == 0 ? (ring)->Entries-1 : (pos) - 1) -+ -+static int -+DmaRingCreate (EP3_RAIL *rail, EP3_DMA_RING *ring, int ctxnum, int entries) -+{ -+ unsigned long pgnum = (ctxnum * sizeof (E3_CommandPort)) / PAGE_SIZE; -+ unsigned long pgoff = (ctxnum * sizeof (E3_CommandPort)) & (PAGE_SIZE-1); -+ int s; -+ -+ /* set up the initial position */ -+ ring->Entries = entries; -+ ring->Position = 0; -+ -+ if (! (ring->pEvent = ep_alloc_elan (&rail->Generic, entries * sizeof (E3_BlockCopyEvent), 0, &ring->epEvent))) -+ { -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (! (ring->pDma = ep_alloc_elan (&rail->Generic, entries * sizeof (E3_DMA), 0, &ring->epDma))) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (! (ring->pDoneBlk = ep_alloc_main (&rail->Generic, entries * sizeof (E3_uint32), 0, &ring->epDoneBlk))) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, entries * sizeof (E3_DMA)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (MapDeviceRegister (rail->Device, ELAN3_BAR_COMMAND_PORT, &ring->CommandPage, pgnum * PAGE_SIZE, PAGE_SIZE, &ring->CommandPageHandle) != ESUCCESS) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, entries * sizeof (E3_DMA)); -+ ep_free_main (&rail->Generic, ring->epDoneBlk, entries * sizeof (E3_uint32)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ ring->CommandPort = ring->CommandPage + pgoff; -+ -+ for (s = 0; s < entries; s++) -+ { -+ /* setup the event */ -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Type), -+ EV_TYPE_BCOPY | EV_TYPE_DMA | DMA_RING_DMA_ELAN(ring, s)); -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Source), DMA_RING_DMA_ELAN(ring,s) | EV_WCOPY); -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Dest), DMA_RING_DONE_ELAN(ring,s) | EV_TYPE_BCOPY_WORD ); -+ -+ /* need to set all the doneBlks to appear that they have completed */ -+ ring->pDoneBlk[s] = DMA_RING_DMA_ELAN(ring,s) | EV_WCOPY; -+ } -+ -+ return 0; /* success */ -+} -+ -+static void -+DmaRingRelease(EP3_RAIL *rail, EP3_DMA_RING *ring) -+{ -+ if (ring->CommandPage != (ioaddr_t) 0) -+ { -+ UnmapDeviceRegister(rail->Device, &ring->CommandPageHandle); -+ -+ ep_free_elan (&rail->Generic, ring->epEvent, ring->Entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, ring->Entries * sizeof (E3_DMA)); -+ ep_free_main (&rail->Generic, ring->epDoneBlk, ring->Entries * sizeof (E3_uint32)); -+ } -+ ring->CommandPage = (ioaddr_t) 0; -+} -+ -+void -+DmaRingsRelease (EP3_RAIL *rail) -+{ -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_CRITICAL]); -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_HIGH_PRI]); -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_LOW_PRI]); -+} -+ -+int -+DmaRingsCreate (EP3_RAIL *rail) -+{ -+ if (DmaRingCreate (rail, &rail->DmaRings[EP3_RING_CRITICAL], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_CRITICAL, EP3_RING_CRITICAL_LEN) || -+ DmaRingCreate (rail, &rail->DmaRings[EP3_RING_HIGH_PRI], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_HIGH_PRI, EP3_RING_HIGH_PRI_LEN) || -+ DmaRingCreate (rail, &rail->DmaRings[EP3_RING_LOW_PRI], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_LOW_PRI, EP3_RING_LOW_PRI_LEN)) -+ { -+ DmaRingsRelease (rail); -+ return (ENOMEM); -+ } -+ -+ return 0; -+} -+ -+static int -+DmaRingNextSlot (EP3_DMA_RING *ring) -+{ -+ int pos = ring->Position; -+ int npos = DMA_RING_NEXT_POS(ring); -+ -+ if (ring->pDoneBlk[npos] == EP3_EVENT_ACTIVE) -+ return (-1); -+ -+ ring->pDoneBlk[pos] = EP3_EVENT_ACTIVE; -+ -+ ring->Position = npos; /* move on one */ -+ -+ return (pos); -+} -+ -+ -+/****************************************************************************************/ -+/* -+ * Dma/event command issueing - these handle cproc queue overflow traps. -+ */ -+static int -+DmaRunQueueSizeCheck (EP3_RAIL *rail, E3_uint32 len) -+{ -+ E3_uint64 FandBPtr = read_reg64 (rail->Device, DProc_SysCntx_FPtr); -+ E3_uint32 FPtr, BPtr; -+ E3_uint32 qlen; -+ -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ FPtr = (FandBPtr & 0xFFFFFFFFull); -+ BPtr = (FandBPtr >> 32); -+#else -+ FPtr = (FandBPtr >> 32); -+ BPtr = (FandBPtr & 0xFFFFFFFFull); -+#endif -+ -+ qlen = (((BPtr - FPtr)/sizeof (E3_DMA)) & (E3_SysCntxQueueSize-1)); -+ -+ if (qlen < 4) IncrStat (rail, DmaQueueLength[0]); -+ else if (qlen < 8) IncrStat (rail, DmaQueueLength[1]); -+ else if (qlen < 16) IncrStat (rail, DmaQueueLength[2]); -+ else if (qlen < 32) IncrStat (rail, DmaQueueLength[3]); -+ else if (qlen < 64) IncrStat (rail, DmaQueueLength[4]); -+ else if (qlen < 128) IncrStat (rail, DmaQueueLength[5]); -+ else if (qlen < 240) IncrStat (rail, DmaQueueLength[6]); -+ else IncrStat (rail, DmaQueueLength[7]); -+ -+ return (qlen < len); -+} -+ -+int -+IssueDma (EP3_RAIL *rail, E3_DMA_BE * dmabe, int type, int retryThread) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RETRY_DMA *retry; -+ EP3_DMA_RING *ring; -+ int slot; -+ int i, res; -+ unsigned long flags; -+ -+ ASSERT (dmabe->s.dma_direction == DMA_WRITE || dmabe->s.dma_direction == DMA_READ_REQUEUE); -+ -+ ASSERT (! EP_VP_ISDATA(dmabe->s.dma_destVProc) || -+ (dmabe->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dmabe->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dmabe->s.dma_destVProc) == rail->Generic.Position.pos_nodeid)); -+ -+ /* -+ * If we're not the retry thread - then don't issue this DMA -+ * if there are any already queued on the retry lists with -+ * higher or equal priority than this one that are ready to -+ * retry. -+ */ -+ if (! retryThread) -+ { -+ for (i = EP_RETRY_BASE; i < type; i++) -+ { -+ if (list_empty (&rail->DmaRetries[i])) -+ continue; -+ -+ retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ if (AFTER (lbolt, retry->RetryTime)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ } -+ } -+ -+ /* -+ * Depending on the type of DMA we're issuing - throttle back -+ * issueing of it if the DMA run queue is too full. This then -+ * prioritises the "special" messages and completing data -+ * transfers which have matched a receive buffer. -+ */ -+ -+ if (type >= EP_RETRY_LOW_PRI_RETRY) -+ { -+ if (! DmaRunQueueSizeCheck (rail, E3_SysCntxQueueSize / 2)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ ring = &rail->DmaRings[EP3_RING_LOW_PRI]; -+ } -+ else if (type == EP_RETRY_LOW_PRI) -+ { -+ if (! DmaRunQueueSizeCheck (rail, E3_SysCntxQueueSize / 3)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ ring = &rail->DmaRings[EP3_RING_LOW_PRI]; -+ } -+ else if (type >= EP_RETRY_HIGH_PRI) -+ ring = &rail->DmaRings[EP3_RING_HIGH_PRI]; -+ else -+ ring = &rail->DmaRings[EP3_RING_CRITICAL]; -+ -+ local_irq_save (flags); -+ if (! spin_trylock (&dev->CProcLock)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ if ((slot = DmaRingNextSlot (ring)) == -1) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ EPRINTF4 (DBG_COMMAND, "IssueDma: type %08x size %08x Elan source %08x Elan dest %08x\n", -+ dmabe->s.dma_type, dmabe->s.dma_size, dmabe->s.dma_source, dmabe->s.dma_dest); -+ EPRINTF2 (DBG_COMMAND, " dst event %08x cookie/proc %08x\n", -+ dmabe->s.dma_destEvent, dmabe->s.dma_destCookieVProc); -+ EPRINTF2 (DBG_COMMAND, " src event %08x cookie/proc %08x\n", -+ dmabe->s.dma_srcEvent, dmabe->s.dma_srcCookieVProc); -+ -+ elan3_sdram_copyq_to_sdram (dev, dmabe, DMA_RING_DMA(ring, slot), sizeof (E3_DMA)); /* PCI write block */ -+ elan3_sdram_writel (dev, DMA_RING_EVENT(ring, slot) + offsetof (E3_BlockCopyEvent, ev_Count), 1); /* PCI write */ -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (DMA_RING_EVENT_ELAN(ring,slot), (void *)(ring->CommandPort + offsetof (E3_CommandPort, SetEvent))); -+ mmiob(); /* and flush through IO writes */ -+ -+ res = ISSUE_COMMAND_OK; -+ } -+ spin_unlock (&dev->CProcLock); -+ } -+ local_irq_restore (flags); -+ -+ return (res); -+} -+ -+int -+IssueWaitevent (EP3_RAIL *rail, E3_Addr value) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (rail->CommandPortEventTrap == FALSE); -+ -+ /* -+ * Disable the command processor interrupts, so that we don't see -+ * spurious interrupts appearing. -+ */ -+ DISABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, (void *)(rail->CommandPort + offsetof (E3_CommandPort, WaitEvent0))); -+ mmiob(); /* and flush through IO writes */ -+ -+ do { -+ res = CheckCommandQueueFlushed (rail->Ctxt, EventComQueueNotEmpty, ISSUE_COMMAND_CANT_WAIT, &flags); -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: CheckCommandQueueFlushed -> %d\n", res); -+ -+ if (res == ISSUE_COMMAND_WAIT) -+ HandleCProcTrap (dev, 0, NULL); -+ } while (res != ISSUE_COMMAND_OK); -+ -+ if (! rail->CommandPortEventTrap) -+ res = ISSUE_COMMAND_OK; -+ else -+ { -+ rail->CommandPortEventTrap = FALSE; -+ res = ISSUE_COMMAND_TRAPPED; -+ } -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: -> %d\n", res); -+ -+ /* -+ * Re-enable the command processor interrupt as we've finished -+ * polling it. -+ */ -+ ENABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+IssueSetevent (EP3_RAIL *rail, E3_Addr value) -+{ -+ EPRINTF1 (DBG_COMMAND, "IssueSetevent: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, (void *)(rail->CommandPort + offsetof (E3_CommandPort, SetEvent))); -+ mmiob(); /* and flush through IO writes */ -+} -+ -+void -+IssueRunThread (EP3_RAIL *rail, E3_Addr value) -+{ -+ EPRINTF1 (DBG_COMMAND, "IssueRunThread: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, (void *)(rail->CommandPort + offsetof (E3_CommandPort, RunThread))); -+ mmiob(); /* and flush through IO writes */ -+} -+ -+/****************************************************************************************/ -+/* -+ * DMA retry list management -+ */ -+static unsigned DmaRetryTimes[EP_NUM_RETRIES]; -+ -+static void -+ep3_dma_retry (EP3_RAIL *rail) -+{ -+ EP3_COOKIE *cp; -+ int res; -+ int vp; -+ unsigned long flags; -+ int i; -+ -+ kernel_thread_init("ep3_dma_retry"); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ for (;;) -+ { -+ long yieldAt = lbolt + (hz/10); -+ long retryTime = 0; -+ -+ if (rail->DmaRetryThreadShouldStop) -+ break; -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ while (! list_empty (&rail->DmaRetries[i])) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ if (! AFTER (lbolt, retry->RetryTime)) -+ break; -+ -+ if (rail->DmaRetryThreadShouldStall || AFTER (lbolt, yieldAt)) -+ goto cant_do_more; -+ -+ EPRINTF2 (DBG_RETRY, "%s: DmaRetryThread: retry %p\n", rail->Generic.Name, retry); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", -+ rail->Generic.Name, retry->Dma.s.dma_type, retry->Dma.s.dma_size, retry->Dma.s.dma_source, retry->Dma.s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", -+ rail->Generic.Name, retry->Dma.s.dma_destEvent, retry->Dma.s.dma_destCookieVProc, -+ retry->Dma.s.dma_srcEvent, retry->Dma.s.dma_srcCookieVProc); -+#if defined(DEBUG) -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, retry->Dma.s.dma_srcEvent); -+ else -+ cp = LookupEventCookie (rail, &rail->CookieTable, retry->Dma.s.dma_destEvent); -+ -+ ASSERT (cp != NULL || (retry->Dma.s.dma_srcEvent == 0 && retry->Dma.s.dma_direction == DMA_WRITE && retry->Dma.s.dma_isRemote)); -+ -+ if (cp && cp->Operations->DmaVerify) -+ cp->Operations->DmaVerify (rail, cp->Arg, &retry->Dma); -+#endif -+ -+#if defined(DEBUG_ASSERT) -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ vp = retry->Dma.s.dma_destVProc; -+ else -+ vp = retry->Dma.s.dma_srcVProc; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || -+ (rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State >= EP_NODE_CONNECTED && -+ rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State <= EP_NODE_LOCAL_PASSIVATE)); -+#endif -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ res = IssueDma (rail, &(retry->Dma), i, TRUE); -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ if (res != ISSUE_COMMAND_OK) -+ goto cant_do_more; -+ -+ /* Command issued, so remove from list, and add to free list */ -+ list_del (&retry->Link); -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ } -+ } -+ cant_do_more: -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ if (!list_empty (&rail->DmaRetries[i])) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ retryTime = retryTime ? MIN(retryTime, retry->RetryTime) : retry->RetryTime; -+ } -+ } -+ -+ if (retryTime && !AFTER (retryTime, lbolt)) -+ retryTime = lbolt + 1; -+ -+ do { -+ EPRINTF3 (DBG_RETRY, "%s: ep_cm_retry: %s %lx\n", rail->Generic.Name, rail->DmaRetryThreadShouldStall ? "stalled" : "sleeping", retryTime); -+ -+ if (rail->DmaRetryTime == 0 || (retryTime != 0 && retryTime < rail->DmaRetryTime)) -+ rail->DmaRetryTime = retryTime; -+ -+ rail->DmaRetrySleeping = TRUE; -+ -+ if (rail->DmaRetryThreadShouldStall) /* wakeup threads waiting in StallDmaRetryThread */ -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); /* for us to really go to sleep for good. */ -+ -+ if (rail->DmaRetryTime == 0 || rail->DmaRetryThreadShouldStall) -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ else -+ kcondvar_timedwait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags, rail->DmaRetryTime); -+ -+ rail->DmaRetrySleeping = FALSE; -+ -+ } while (rail->DmaRetryThreadShouldStall); -+ -+ rail->DmaRetryTime = 0; -+ } -+ -+ rail->DmaRetryThreadStopped = 1; -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ kernel_thread_exit(); -+} -+ -+void -+StallDmaRetryThread (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryThreadShouldStall++; -+ -+ while (! rail->DmaRetrySleeping) -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+ResumeDmaRetryThread (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ ASSERT (rail->DmaRetrySleeping); -+ -+ if (--rail->DmaRetryThreadShouldStall == 0) -+ { -+ rail->DmaRetrySleeping = 0; -+ kcondvar_wakeupone (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+int -+InitialiseDmaRetries (EP3_RAIL *rail) -+{ -+ int i; -+ -+ spin_lock_init (&rail->DmaRetryLock); -+ kcondvar_init (&rail->DmaRetryWait); -+ -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ INIT_LIST_HEAD (&rail->DmaRetries[i]); -+ -+ INIT_LIST_HEAD (&rail->DmaRetryFreeList); -+ -+ DmaRetryTimes[EP_RETRY_HIGH_PRI] = EP_RETRY_HIGH_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ DmaRetryTimes[EP_RETRY_HIGH_PRI_RETRY+i] = EP_RETRY_HIGH_PRI_TIME << i; -+ -+ DmaRetryTimes[EP_RETRY_LOW_PRI] = EP_RETRY_LOW_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ DmaRetryTimes[EP_RETRY_LOW_PRI_RETRY+i] = EP_RETRY_LOW_PRI_TIME << i; -+ -+ DmaRetryTimes[EP_RETRY_ANONYMOUS] = EP_RETRY_ANONYMOUS_TIME; -+ DmaRetryTimes[EP_RETRY_NETERR] = EP_RETRY_NETERR_TIME; -+ -+ rail->DmaRetryInitialised = 1; -+ -+ if (kernel_thread_create (ep3_dma_retry, (void *) rail) == 0) -+ { -+ spin_lock_destroy (&rail->DmaRetryLock); -+ return (ENOMEM); -+ } -+ -+ rail->DmaRetryThreadStarted = 1; -+ -+ return (ESUCCESS); -+} -+ -+void -+DestroyDmaRetries (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryThreadShouldStop = 1; -+ while (rail->DmaRetryThreadStarted && !rail->DmaRetryThreadStopped) -+ { -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ } -+ rail->DmaRetryThreadStarted = 0; -+ rail->DmaRetryThreadStopped = 0; -+ rail->DmaRetryThreadShouldStop = 0; -+ rail->DmaRetryInitialised = 0; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ /* Everyone should have given back their retry dma's by now */ -+ ASSERT (rail->DmaRetryReserved == 0); -+ -+ while (! list_empty (&rail->DmaRetryFreeList)) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ KMEM_FREE (retry, sizeof (EP3_RETRY_DMA)); -+ } -+ -+ kcondvar_destroy (&rail->DmaRetryWait); -+ spin_lock_destroy (&rail->DmaRetryLock); -+} -+ -+int -+ReserveDmaRetries (EP3_RAIL *rail, int count, EP_ATTRIBUTE attr) -+{ -+ EP3_RETRY_DMA *retry; -+ int remaining = count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ if (remaining <= (rail->DmaRetryCount - rail->DmaRetryReserved)) -+ { -+ rail->DmaRetryReserved += remaining; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ return (ESUCCESS); -+ } -+ -+ remaining -= (rail->DmaRetryCount - rail->DmaRetryReserved); -+ -+ rail->DmaRetryReserved = rail->DmaRetryCount; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ while (remaining) -+ { -+ KMEM_ALLOC (retry, EP3_RETRY_DMA *, sizeof (EP3_RETRY_DMA), !(attr & EP_NO_SLEEP)); -+ -+ if (retry == NULL) -+ goto failed; -+ -+ /* clear E3_DMA */ -+ bzero((char *)(&(retry->Dma.s)), sizeof(E3_DMA)); -+ -+ remaining--; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ -+ rail->DmaRetryCount++; -+ rail->DmaRetryReserved++; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ } -+ return (ESUCCESS); -+ -+ failed: -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryReserved -= (count - remaining); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ return (ENOMEM); -+} -+ -+void -+ReleaseDmaRetries (EP3_RAIL *rail, int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryReserved -= count; -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+QueueDmaForRetry (EP3_RAIL *rail, E3_DMA_BE *dma, int interval) -+{ -+ EP3_RETRY_DMA *retry; -+ unsigned long flags; -+ -+ /* -+ * When requeueing DMAs they must never be "READ" dma's since -+ * these would fetch the DMA descriptor from the retryn descriptor -+ */ -+ ASSERT (dma->s.dma_direction == DMA_WRITE || dma->s.dma_direction == DMA_READ_REQUEUE); -+ ASSERT (dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dma->s.dma_destVProc) == rail->Generic.Position.pos_nodeid); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ EP_ASSERT (&rail->Generic, !list_empty (&rail->DmaRetryFreeList)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: QueueDmaForRetry: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n",rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, -+ dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ /* copy the DMA into the retry descriptor */ -+ retry->Dma.s.dma_type = dma->s.dma_type; -+ retry->Dma.s.dma_size = dma->s.dma_size; -+ retry->Dma.s.dma_source = dma->s.dma_source; -+ retry->Dma.s.dma_dest = dma->s.dma_dest; -+ retry->Dma.s.dma_destEvent = dma->s.dma_destEvent; -+ retry->Dma.s.dma_destCookieVProc = dma->s.dma_destCookieVProc; -+ retry->Dma.s.dma_srcEvent = dma->s.dma_srcEvent; -+ retry->Dma.s.dma_srcCookieVProc = dma->s.dma_srcCookieVProc; -+ -+ retry->RetryTime = lbolt + DmaRetryTimes[interval]; -+ -+ /* chain onto the end of the approriate retry list */ -+ list_add_tail (&retry->Link, &rail->DmaRetries[interval]); -+ -+ /* now wakeup the retry thread */ -+ if (rail->DmaRetryTime == 0 || retry->RetryTime < rail->DmaRetryTime) -+ rail->DmaRetryTime = retry->RetryTime; -+ -+ if (rail->DmaRetrySleeping && !rail->DmaRetryThreadShouldStall) -+ { -+ rail->DmaRetrySleeping = 0; -+ kcondvar_wakeupone (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ } -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+QueueDmaOnStalledList (EP3_RAIL *rail, E3_DMA_BE *dma) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) : -+ EP_VP_TO_NODE(dma->s.dma_destVProc)]; -+ EP3_RETRY_DMA *retry; -+ unsigned long flags; -+ -+ /* -+ * When requeueing DMAs they must never be "READ" dma's since -+ * these would fetch the DMA descriptor from the retryn descriptor -+ */ -+ ASSERT (dma->s.dma_direction == DMA_WRITE || dma->s.dma_direction == DMA_READ_REQUEUE); -+ ASSERT (dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dma->s.dma_destVProc) == rail->Generic.Position.pos_nodeid); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ EP_ASSERT (&rail->Generic, !list_empty (&rail->DmaRetryFreeList)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: QueueDmaOnStalledList: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, -+ dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ /* copy the DMA into the retry descriptor */ -+ retry->Dma.s.dma_type = dma->s.dma_type; -+ retry->Dma.s.dma_size = dma->s.dma_size; -+ retry->Dma.s.dma_source = dma->s.dma_source; -+ retry->Dma.s.dma_dest = dma->s.dma_dest; -+ retry->Dma.s.dma_destEvent = dma->s.dma_destEvent; -+ retry->Dma.s.dma_destCookieVProc = dma->s.dma_destCookieVProc; -+ retry->Dma.s.dma_srcEvent = dma->s.dma_srcEvent; -+ retry->Dma.s.dma_srcCookieVProc = dma->s.dma_srcCookieVProc; -+ -+ /* chain onto the node cancelled dma list */ -+ list_add_tail (&retry->Link, &nodeRail->StalledDmas); -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+FreeStalledDmas (EP3_RAIL *rail, unsigned int nodeId) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[nodeId]; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ list_del (el); -+ list_add (el, &rail->DmaRetryFreeList); -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+/****************************************************************************************/ -+/* -+ * Connection management. -+ */ -+static void -+DiscardingHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&rail->HaltOpSleep, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+typedef struct { -+ EP3_RAIL *rail; -+ sdramaddr_t qaddr; -+} SetQueueFullData; -+ -+static void -+SetQueueLockedOperation (ELAN3_DEV *dev, void *arg) -+{ -+ SetQueueFullData *data = (SetQueueFullData *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3_sdram_writel (dev, data->qaddr, E3_QUEUE_LOCKED | elan3_sdram_readl(dev, data->qaddr)); -+ -+ data->rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&data->rail->HaltOpSleep, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+static void -+FlushDmaQueuesHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) arg; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ E3_DMA_BE dma; -+ EP_NODE_RAIL *node; -+ int vp; -+ unsigned long flags; -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ EPRINTF5 (DBG_DISCON, "%s: FlushDmaQueuesHaltOperation: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma.s.dma_type, dma.s.dma_size, dma.s.dma_source, dma.s.dma_dest); -+ EPRINTF5 (DBG_DISCON, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma.s.dma_destEvent, dma.s.dma_destCookieVProc, -+ dma.s.dma_srcEvent, dma.s.dma_srcCookieVProc); -+ -+ ASSERT ((dma.s.dma_u.s.Context & SYS_CONTEXT_BIT) != 0); -+ -+ if (dma.s.dma_direction == DMA_WRITE) -+ vp = dma.s.dma_destVProc; -+ else -+ vp = dma.s.dma_srcVProc; -+ -+ node = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (node->State >= EP_NODE_CONNECTED && node->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && node->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ /* -+ * This is a DMA going to the node which is being removed, -+ * so move it onto the node dma list where it will get -+ * handled later. -+ */ -+ EPRINTF1 (DBG_DISCON, "%s: FlushDmaQueuesHaltOperation: move dma to cancelled list\n", rail->Generic.Name); -+ -+ if (dma.s.dma_direction != DMA_WRITE) -+ { -+ /* for read dma's set the DMA_READ_REQUEUE bits as the dma_source has been -+ * modified by the elan to point at the dma in the rxd where it was issued -+ * from */ -+ dma.s.dma_direction = (dma.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+ QueueDmaOnStalledList (rail, &dma); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = (SYS_CONTEXT_BIT << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destEvent = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&rail->HaltOpSleep, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+SetQueueLocked (EP3_RAIL *rail, sdramaddr_t qaddr) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ SetQueueFullData data; -+ unsigned long flags; -+ -+ /* Ensure that the context filter changes have been seen by halting -+ * then restarting the inputters - this also ensures that any setevent -+ * commands used to issue dma's have completed and any trap has been -+ * handled. */ -+ data.rail = rail; -+ data.qaddr = qaddr; -+ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DiscardingSysCntx | INT_TProcHalted, SetQueueLockedOperation, &data); -+ -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+} -+ -+void -+ep3_flush_filters (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ -+ /* Ensure that the context filter changes have been seen by halting -+ * then restarting the inputters - this also ensures that any setevent -+ * commands used to issue dma's have completed and any trap has been -+ * handled. */ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DiscardingSysCntx, DiscardingHaltOperation, rail); -+ -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+} -+ -+void -+ep3_flush_queues (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el; -+ struct list_head *nel; -+ EP_NODE_RAIL *node; -+ unsigned long flags; -+ int vp, i; -+ -+ ASSERT (NO_LOCKS_HELD); -+ -+ /* First - stall the dma retry thread, so that it will no longer -+ * restart any dma's from the rety lists. */ -+ StallDmaRetryThread (rail); -+ -+ /* Second - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queue. */ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted | INT_TProcHalted, FlushDmaQueuesHaltOperation, rail); -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+ -+ /* Third - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el, nel, &rail->DmaRetries[i]) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ vp = retry->Dma.s.dma_destVProc; -+ else -+ vp = retry->Dma.s.dma_srcVProc; -+ -+ node = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (node->State >= EP_NODE_CONNECTED && node->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && node->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF5 (DBG_DISCON, "%s: FlushDmaQueues: %08x %08x %08x %08x\n",rail->Generic.Name, -+ retry->Dma.s.dma_type, retry->Dma.s.dma_size, retry->Dma.s.dma_source, retry->Dma.s.dma_dest); -+ EPRINTF5 (DBG_DISCON, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ retry->Dma.s.dma_destEvent, retry->Dma.s.dma_destCookieVProc, -+ retry->Dma.s.dma_srcEvent, retry->Dma.s.dma_srcCookieVProc); -+ -+ list_del (&retry->Link); -+ -+ list_add_tail (&retry->Link, &node->StalledDmas); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ /* Finally - allow the dma retry thread to run again */ -+ ResumeDmaRetryThread (rail); -+} -+ -+/****************************************************************************************/ -+/* NOTE - we require that all cookies are non-zero, which is -+ * achieved because EP_VP_DATA() is non-zero for all -+ * nodes */ -+E3_uint32 -+LocalCookie (EP3_RAIL *rail, unsigned remoteNode) -+{ -+ E3_uint32 cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->CookieLock, flags); -+ cookie = DMA_COOKIE (rail->MainCookies[remoteNode], EP_VP_DATA(rail->Generic.Position.pos_nodeid)); -+ spin_unlock_irqrestore (&rail->CookieLock, flags); -+ -+ /* Main processor cookie for srcCookie - this is what is sent -+ * to the remote node along with the setevent from the put -+ * or the dma descriptor for a get */ -+ return (cookie); -+} -+ -+E3_uint32 -+RemoteCookie (EP3_RAIL *rail, u_int remoteNode) -+{ -+ uint32_t cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->CookieLock, flags); -+ cookie = DMA_REMOTE_COOKIE (rail->MainCookies[remoteNode], EP_VP_DATA(remoteNode)); -+ spin_unlock_irqrestore (&rail->CookieLock, flags); -+ -+ /* Main processor cookie for dstCookie - this is the cookie -+ * that the "remote put" dma uses for it's setevent packets for -+ * a get dma */ -+ -+ return (cookie); -+} -+ -+/****************************************************************************************/ -+/* -+ * Event Cookie management. -+ * -+ * We find the ep_cookie in one of two ways: -+ * 1) for block copy events -+ * the cookie value is stored in the ev_Source - for EVIRQ events -+ * it is also stored in the ev_Type -+ * 2) for normal events -+ * we just use the event address. -+ */ -+void -+InitialiseCookieTable (EP3_COOKIE_TABLE *table) -+{ -+ register int i; -+ -+ spin_lock_init (&table->Lock); -+ -+ for (i = 0; i < EP3_COOKIE_HASH_SIZE; i++) -+ table->Entries[i] = NULL; -+} -+ -+void -+DestroyCookieTable (EP3_COOKIE_TABLE *table) -+{ -+ register int i; -+ -+ for (i = 0; i < EP3_COOKIE_HASH_SIZE; i++) -+ if (table->Entries[i]) -+ printk ("DestroyCookieTable: entry %d not empty\n", i); -+ -+ spin_lock_destroy (&table->Lock); -+} -+ -+void -+RegisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cp, E3_uint32 cookie, EP3_COOKIE_OPS *ops, void *arg) -+{ -+ EP3_COOKIE *tcp; -+ int hashval = EP3_HASH_COOKIE(cookie); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ cp->Operations = ops; -+ cp->Arg = arg; -+ cp->Cookie = cookie; -+ -+#if defined(DEBUG) -+ /* Check that the cookie is unique */ -+ for (tcp = table->Entries[hashval]; tcp; tcp = tcp->Next) -+ if (tcp->Cookie == cookie) -+ panic ("RegisterEventCookie: non unique cookie\n"); -+#endif -+ cp->Next = table->Entries[hashval]; -+ -+ table->Entries[hashval] = cp; -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+} -+ -+void -+DeregisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cp) -+{ -+ EP3_COOKIE **predCookiep; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ for (predCookiep = &table->Entries[EP3_HASH_COOKIE (cp->Cookie)]; *predCookiep; predCookiep = &(*predCookiep)->Next) -+ { -+ if (*predCookiep == cp) -+ { -+ *predCookiep = cp->Next; -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+ -+ cp->Operations = NULL; -+ cp->Arg = NULL; -+ cp->Cookie = 0; -+ cp->Next = NULL; -+} -+ -+EP3_COOKIE * -+LookupCookie (EP3_COOKIE_TABLE *table, E3_Addr cookie) -+{ -+ EP3_COOKIE *cp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ for (cp = table->Entries[EP3_HASH_COOKIE(cookie)]; cp; cp = cp->Next) -+ if (cp->Cookie == cookie) -+ break; -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+ return (cp); -+} -+ -+EP3_COOKIE * -+LookupEventCookie (EP3_RAIL *rail, EP3_COOKIE_TABLE *table, E3_Addr eaddr) -+{ -+ sdramaddr_t event; -+ E3_uint32 type; -+ -+ if ((event = ep_elan2sdram (&rail->Generic, eaddr)) != (sdramaddr_t) 0) -+ { -+ type = elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Type)); -+ -+ if (type & EV_TYPE_BCOPY) -+ return (LookupCookie (table, elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Source)) & ~EV_WCOPY)); -+ else -+ return (LookupCookie (table, eaddr)); -+ } -+ -+ return (NULL); -+} -+ -+/****************************************************************************************/ -+/* -+ * Elan context operations - note only support interrupt ops. -+ */ -+static int ep3_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+static int ep3_dprocTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+static int ep3_tprocTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+static int ep3_iprocTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int chan); -+static int ep3_cprocTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+static int ep3_cprocReissue (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *tbuf); -+ -+static E3_uint8 ep3_load8 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+static E3_uint16 ep3_load16 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+static E3_uint32 ep3_load32 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+static E3_uint64 ep3_load64 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+ELAN3_OPS ep3_elan3_ops = -+{ -+ ELAN3_OPS_VERSION, /* Version */ -+ -+ NULL, /* Exception */ -+ NULL, /* GetWordItem */ -+ NULL, /* GetBlockItem */ -+ NULL, /* PutWordItem */ -+ NULL, /* PutBlockItem */ -+ NULL, /* PutbackItem */ -+ NULL, /* FreeWordItem */ -+ NULL, /* FreeBlockItem */ -+ NULL, /* CountItems */ -+ ep3_event, /* Event */ -+ NULL, /* SwapIn */ -+ NULL, /* SwapOut */ -+ NULL, /* FreePrivate */ -+ NULL, /* FixupNetworkError */ -+ ep3_dprocTrap, /* DProcTrap */ -+ ep3_tprocTrap, /* TProcTrap */ -+ ep3_iprocTrap, /* IProcTrap */ -+ ep3_cprocTrap, /* CProcTrap */ -+ ep3_cprocReissue, /* CProcReissue */ -+ NULL, /* StartFaultCheck */ -+ NULL, /* EndFaulCheck */ -+ ep3_load8, /* Load8 */ -+ ep3_store8, /* Store8 */ -+ ep3_load16, /* Load16 */ -+ ep3_store16, /* Store16 */ -+ ep3_load32, /* Load32 */ -+ ep3_store32, /* Store32 */ -+ ep3_load64, /* Load64 */ -+ ep3_store64, /* Store64 */ -+}; -+ -+static int -+ep3_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ EP3_COOKIE *cp = LookupCookie (&rail->CookieTable, cookie); -+ -+ if (cp == NULL) -+ { -+ printk ("ep3_event: cannot find event cookie for %x\n", cookie); -+ return (OP_HANDLED); -+ } -+ -+ if (cp->Operations->Event) -+ cp->Operations->Event(rail, cp->Arg); -+ -+ return (OP_HANDLED); -+} -+ -+/* Trap interface */ -+int -+ep3_dprocTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_COOKIE *cp; -+ E3_FaultSave_BE *FaultArea; -+ E3_uint16 vp; -+ int validTrap; -+ int numFaults; -+ int i; -+ sdramaddr_t event; -+ E3_uint32 type; -+ sdramaddr_t dma; -+ E3_DMA_BE dmabe; -+ int status = EAGAIN; -+ -+ EPRINTF4 (DBG_EPTRAP, "ep3_dprocTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ EPRINTF4 (DBG_EPTRAP, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ EPRINTF2 (DBG_EPTRAP, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ EPRINTF2 (DBG_EPTRAP, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+ ASSERT (trap->Status.s.Context & SYS_CONTEXT_BIT); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ if (trap->Desc.s.dma_direction == DMA_WRITE) -+ vp = trap->Desc.s.dma_destVProc; -+ else -+ vp = trap->Desc.s.dma_srcVProc; -+ -+ if (! trap->PacketInfo.s.PacketTimeout) -+ status = ETIMEDOUT; -+ else -+ { -+ status = EHOSTDOWN; -+ -+ /* XXXX: dma timedout - might want to "restart" tree ? */ -+ } -+ goto retry_dma; -+ -+ case MI_DmaFailCountError: -+ goto retry_dma; -+ -+ case MI_TimesliceDmaQueueOverflow: -+ IncrStat (rail, DprocDmaQueueOverflow); -+ -+ goto retry_dma; -+ -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ case MI_DequeueNonSysCntxDma: -+ case MI_DequeueSysCntxDma: -+ /* -+ * The DMA processor has trapped due to outstanding prefetches from the previous -+ * dma. The "current" dma has not been consumed, so we just ignore the trap -+ */ -+ return (OP_HANDLED); -+ -+ case MI_EventQueueOverflow: -+ IncrStat (rail, DprocEventQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, trap->Desc.s.dma_srcEvent)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof(E3_Event,ev_Type))) & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ spin_unlock (&ctxt->Device->IntrLock); -+ ep3_event (ctxt, (type & ~(EV_TYPE_MASK_EVIRQ | EV_TYPE_MASK_BCOPY)), OP_LWP); -+ spin_lock (&ctxt->Device->IntrLock); -+ } -+ return (OP_HANDLED); -+ -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, DprocDmaQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, trap->Desc.s.dma_srcEvent)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_DMA) == EV_TYPE_DMA && -+ (dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2))) != (sdramaddr_t) 0) -+ { -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ /* We only chain together DMA's of the same direction, so since -+ * we took a DmaQueueOverflow trap - this means that DMA which -+ * trapped was a WRITE dma - hence the one we chain to must also -+ * be a WRITE dma. -+ */ -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = dmabe.s.dma_destVProc; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (cp != NULL && (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE))); -+ } -+#endif -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ -+ return (OP_HANDLED); -+ } -+ -+ panic ("ep3_dprocTrap\n"); -+ return (OP_HANDLED); -+ -+ default: -+ break; -+ } -+ -+ /* If it's a dma which traps past the end of the source, then */ -+ /* just re-issue it */ -+ numFaults = validTrap = (trap->FaultSave.s.FSR.Status != 0); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ { -+ if (FaultArea->s.FSR.Status != 0) -+ { -+ numFaults++; -+ -+ /* XXXX: Rev B Elans can prefetch data past the end of the dma descriptor */ -+ /* if the fault relates to this, then just ignore it */ -+ if (FaultArea->s.FaultAddress >= (trap->Desc.s.dma_source+trap->Desc.s.dma_size)) -+ { -+ static int i; -+ if (i < 10 && i++ < 10) -+ printk ("ep3_dprocTrap: Rev B prefetch trap error %08x %08x\n", -+ FaultArea->s.FaultAddress, (trap->Desc.s.dma_source+trap->Desc.s.dma_size)); -+ continue; -+ } -+ -+ validTrap++; -+ } -+ } -+ -+ /* -+ * NOTE: for physical errors (uncorrectable ECC/PCI parity errors) the FSR will -+ * be zero - hence we will not see any faults - and none will be valid, -+ * so only ignore a Rev B prefetch trap if we've seen some faults. Otherwise -+ * we can reissue a DMA which has already sent it's remote event ! -+ */ -+ if (numFaults != 0 && validTrap == 0) -+ { -+ retry_dma: -+ if (trap->Desc.s.dma_direction == DMA_WRITE) -+ { -+ vp = trap->Desc.s.dma_destVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, trap->Desc.s.dma_srcEvent); -+ } -+ else -+ { -+ ASSERT (EP3_CONTEXT_ISDATA(trap->Desc.s.dma_queueContext) || trap->Desc.s.dma_direction == DMA_READ_REQUEUE); -+ -+ vp = trap->Desc.s.dma_srcVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, trap->Desc.s.dma_destEvent); -+ -+ /* for read dma's set the DMA_READ_REQUEUE bits as the dma_source has been -+ * modified by the elan to point at the dma in the rxd where it was issued -+ * from */ -+ trap->Desc.s.dma_direction = (trap->Desc.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+#ifdef DEBUG_ASSERT -+ { -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &trap->Desc, status); -+ else -+ { -+ ASSERT (trap->Desc.s.dma_direction == DMA_WRITE && trap->Desc.s.dma_srcEvent == 0 && trap->Desc.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &trap->Desc, EP_RETRY_ANONYMOUS); -+ } -+ -+ return (OP_HANDLED); -+ } -+ -+ printk ("ep3_dprocTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ printk (" FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ printk (" %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ printk (" type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ printk (" Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ printk (" Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+// panic ("ep3_dprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+int -+ep3_tprocTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ -+ EPRINTF6 (DBG_EPTRAP, "ep3_tprocTrap: SP=%08x PC=%08x NPC=%08x DIRTY=%08x TRAP=%08x MI=%s\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits, trap->TrapBits.Bits, MiToName (trap->mi)); -+ EPRINTF4 (DBG_EPTRAP, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ ASSERT (trap->Status.s.Context & SYS_CONTEXT_BIT); -+ -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ { -+ ASSERT (trap->TrapBits.s.OutputWasOpen == 0); -+ -+ EPRINTF0 (DBG_EPTRAP, "ep3_tprocTrap: ForcedTProcTrap\n"); -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, FALSE)); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ EPRINTF0 (DBG_EPTRAP, "ep3_tprocTrap: ThreadTimeout\n"); -+ -+ if (trap->Registers[REG_GLOBALS + (1^WordEndianFlip)] == 0) -+ RollThreadToClose (ctxt, trap, trap->TrapBits.s.PacketAckValue); -+ else -+ { -+ CompleteEnvelope (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], trap->TrapBits.s.PacketAckValue); -+ -+ RollThreadToClose (ctxt, trap, EP3_PAckStolen); -+ } -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, FALSE)); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->TrapBits.s.Unimplemented) -+ { -+ E3_uint32 instr = ELAN3_OP_LOAD32 (ctxt, trap->pc & PC_MASK); -+ -+ PRINTF1 (ctxt, DBG_EPTRAP, "ep3_tprocTrap: unimplemented instruction %08x\n", instr); -+ -+ if ((instr & OPCODE_MASK) == OPCODE_Ticc && -+ (instr & OPCODE_IMM) == OPCODE_IMM && -+ (Ticc_COND(instr) == Ticc_TA)) -+ { -+ switch (INSTR_IMM(instr)) -+ { -+ case EP3_UNIMP_TRAP_NO_DESCS: -+ StallThreadForNoDescs (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], -+ SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ case EP3_UNIMP_TRAP_PACKET_NACKED: -+ CompleteEnvelope (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], E3_PAckDiscard); -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ case EP3_UNIMP_THREAD_HALTED: -+ StallThreadForHalted (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], -+ SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ default: -+ break; -+ -+ } -+ } -+ } -+ break; -+ -+ default: -+ break; -+ } -+ -+ /* All other traps should not happen for kernel comms */ -+ printk ("ep3_tprocTrap: SP=%08x PC=%08x NPC=%08x DIRTY=%08x TRAP=%08x MI=%s\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits, -+ trap->TrapBits.Bits, MiToName (trap->mi)); -+ printk (" FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ printk (" DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ printk (" InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ printk (" OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ if (trap->DirtyBits.s.GlobalsDirty) -+ { -+ printk (" g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ printk (" g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.OutsDirty) -+ { -+ printk (" o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ printk (" o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.LocalsDirty) -+ { -+ printk (" l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ printk (" l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.InsDirty) -+ { -+ printk (" i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ printk (" i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ } -+ -+// panic ("ep3_tprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+int -+ep3_iprocTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int channel) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ EP3_COOKIE *cp; -+ sdramaddr_t event; -+ E3_uint32 type; -+ sdramaddr_t dma; -+ E3_DMA_BE dmabe; -+ -+ ASSERT (trap->Transactions[0].s.TrTypeCntx.s.Context & SYS_CONTEXT_BIT); -+ -+ /* -+ * first process the trap to determine the cause -+ */ -+ InspectIProcTrap (ctxt, trap); -+ -+ if (! trap->AckSent && trap->LockQueuePointer) /* Must be a network error in a queueing DMA */ -+ { /* packet - unlock the queue */ -+ IncrStat (rail, QueueingPacketTrap); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, FALSE); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->AckSent && trap->BadTransaction) -+ { -+ spin_unlock (&dev->IntrLock); -+ -+ /* NOTE - no network error fixup is necessary for system context -+ * messages since they are idempotent and are single packet -+ * dmas -+ */ -+ if (EP3_CONTEXT_ISDATA (trap->Transactions[0].s.TrTypeCntx.s.Context)) -+ { -+ int nodeId = EP3_CONTEXT_TO_NODE(trap->Transactions[0].s.TrTypeCntx.s.Context); -+ -+ if (trap->DmaIdentifyTransaction) -+ { -+ printk ("%s: network error on dma identify <%x> from node %d\n", rail->Generic.Name, trap->DmaIdentifyTransaction->s.TrAddr, nodeId); -+ -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, channel, trap->DmaIdentifyTransaction->s.TrAddr); -+ } -+ else if (trap->ThreadIdentifyTransaction) -+ { -+ printk ("%s: network error on thread identify <%x> from node %d\n", rail->Generic.Name, trap->ThreadIdentifyTransaction->s.TrAddr, nodeId); -+ -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, channel, trap->ThreadIdentifyTransaction->s.TrAddr); -+ } -+ else -+ { -+ printk ("%s: network error on dma packet from node %d\n", rail->Generic.Name, nodeId); -+ -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_DMA_PACKET, channel, 0); -+ } -+ } -+ -+ spin_lock (&dev->IntrLock); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->AckSent) -+ { -+ if (trap->TrappedTransaction == NULL) -+ return (OP_HANDLED); -+ -+ while (! trap->TrappedTransaction->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ E3_IprocTrapHeader_BE *hdrp = trap->TrappedTransaction; -+ E3_IprocTrapData_BE *datap = trap->TrappedDataBuffer; -+ -+ ASSERT (hdrp->s.TrTypeCntx.s.StatusRegValid != 0); -+ -+ if ((hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0) -+ { -+ printk ("ep3_iprocTrap: WRITEBLOCK : Addr %x\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ } -+ else -+ { -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: -+ switch (GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus)) -+ { -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, IprocDmaQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, hdrp->s.TrAddr)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_DMA) == EV_TYPE_DMA && -+ (dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2))) != (sdramaddr_t) 0) -+ { -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ else -+ { -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the -+ * DMA descriptor will be read from the EP3_RETRY_DMA rather than the -+ * original DMA - this can then get reused and an incorrect DMA -+ * descriptor sent -+ * eventp->ev_Type contains the dma address with type in the lower bits -+ */ -+ -+ dmabe.s.dma_source = (type & ~EV_TYPE_MASK2); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = (dmabe.s.dma_direction == DMA_WRITE ? dmabe.s.dma_destVProc : dmabe.s.dma_srcVProc); -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ } -+ -+ printk ("ep3_iprocTrap: SETEVENT : %x - cannot find dma to restart\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ -+ case MI_EventQueueOverflow: -+ { -+ sdramaddr_t event; -+ E3_uint32 type; -+ -+ IncrStat (rail, IprocEventQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, hdrp->s.TrAddr)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ spin_unlock (&dev->IntrLock); -+ ep3_event (ctxt, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)), OP_LWP); -+ spin_lock (&dev->IntrLock); -+ -+ break; -+ } -+ -+ printk ("ep3_iprocTrap: SETEVENT : %x - cannot find event\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ } -+ -+ default: -+ printk ("ep3_iprocTrap: SETEVENT : %x MI=%x\n", hdrp->s.TrAddr, GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus)); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ } -+ break; -+ -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: -+ /* Just ignore send-discard transactions */ -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: -+ { -+ E3_DMA_BE *dmap = (E3_DMA_BE *) datap; -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) != MI_DmaQueueOverflow) -+ { -+ printk ("ep3_iprocTrap: MI=%x\n", GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus)); -+ break; -+ } -+ -+ IncrStat (rail, IprocDmaQueueOverflow); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmap->s.dma_srcEvent); -+ -+ /* modify the dma type since it will still be a "read" dma */ -+ dmap->s.dma_type = (dmap->s.dma_type & ~DMA_TYPE_READ) | DMA_TYPE_ISREMOTE; -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = dmap->s.dma_destVProc; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, dmap, EAGAIN); -+ else -+ { -+ ASSERT (dmap->s.dma_direction == DMA_WRITE && dmap->s.dma_srcEvent == 0 && dmap->s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, dmap, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ } -+ default: -+ printk ("ep3_iprocTrap: %s\n", IProcTrapString (hdrp, datap)); -+ break; -+ } -+ } -+ -+ /* -+ * We've successfully processed this transaction, so move onto the -+ * next one. -+ */ -+ trap->TrappedTransaction++; -+ trap->TrappedDataBuffer++; -+ } -+ -+ return (OP_HANDLED); -+ } -+ -+ /* Workaround WRITEBLOCK transaction executed when LOCKQUEUE transaction missed */ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) && /* a DMA packet */ -+ trap->LockQueuePointer == 0 && trap->UnlockQueuePointer && /* a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr == trap->FaultSave.s.FaultAddress) /* and missed lockqueue */ -+ { -+ printk ("ep3_iprocTrap: missed lockqueue transaction for queue %x\n", trap->UnlockQueuePointer); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->FaultSave.s.FaultContext != 0) -+ printk ("ep3_iprocTrap: pagefault at %08x in context %x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.FaultContext); -+ -+// panic ("ep3_iprocTrap: unexpected inputter trap\n"); -+ -+ return (OP_HANDLED); -+} -+ -+/* -+ * Command processor trap -+ * kernel comms should only be able to generate -+ * queue overflow traps -+ */ -+int -+ep3_cprocTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ int ctxnum = (trap->TrapBuf.r.Breg >> 16) & MAX_ROOT_CONTEXT_MASK; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_DMA_RING *ring; -+ EP3_COOKIE *cp; -+ E3_DMA_BE dmabe; -+ int vp, slot; -+ unsigned long flags; -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, CprocDmaQueueOverflow); -+ -+ /* Use the context number that the setevent was issued in, -+ * to find the appropriate dma ring, then since they are guaranteed -+ * to be issued in order, we just search backwards till we find the -+ * last one which has completed its word copy - this must be the -+ * one which had caused the DmaQueueOverflow trap ! */ -+ -+ ASSERT (ctxnum >= ELAN3_DMARING_BASE_CONTEXT_NUM && ctxnum < (ELAN3_DMARING_BASE_CONTEXT_NUM+EP3_NUM_RINGS)); -+ -+ spin_lock_irqsave (&dev->CProcLock, flags); -+ -+ ring = &rail->DmaRings[ctxnum - ELAN3_DMARING_BASE_CONTEXT_NUM]; -+ slot = DMA_RING_PREV_POS(ring, ring->Position); -+ -+ while (ring->pDoneBlk[slot] == EP3_EVENT_ACTIVE) -+ slot = DMA_RING_PREV_POS(ring, slot); -+ -+ elan3_sdram_copyq_from_sdram (rail->Device , DMA_RING_DMA(ring,slot), &dmabe, sizeof (E3_DMA)); -+ -+#if defined(DEBUG_ASSERT) -+ while (slot != DMA_RING_PREV_POS(ring, ring->Position)) -+ { -+ ASSERT (ring->pDoneBlk[slot] != EP3_EVENT_ACTIVE); -+ -+ slot = DMA_RING_PREV_POS(ring, slot); -+ } -+#endif -+ spin_unlock_irqrestore (&dev->CProcLock, flags); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction = DMA_READ_REQUEUE); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ } -+ -+#if defined(DEBUG_ASSERT) -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ vp = dmabe.s.dma_destVProc; -+ else -+ vp = dmabe.s.dma_srcVProc; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State >= EP_NODE_CONNECTED && -+ rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State <= EP_NODE_LOCAL_PASSIVATE)); -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ -+ return (OP_HANDLED); -+ -+ case MI_EventQueueOverflow: -+ ASSERT (ctxnum == ELAN3_MRF_CONTEXT_NUM); -+ -+ IncrStat (rail, CprocEventQueueOverflow); -+ -+ rail->CommandPortEventTrap = TRUE; -+ return (OP_HANDLED); -+ -+#if defined(PER_CPU_TIMEOUT) -+ case MI_SetEventReadWait: -+ if (ctxnum == ELAN3_MRF_CONTEXT_NUM && trap->FaultSave.s.EventAddress == EP_PACEMAKER_EVENTADDR) -+ { -+ HeartbeatPacemaker (rail); -+ return (OP_HANDLED); -+ } -+#endif -+ -+ default: -+ printk ("ep3_cprocTrap : Context=%x Status=%x TrapType=%x\n", ctxnum, trap->Status.Status, trap->Status.s.TrapType); -+ printk (" FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ break; -+ } -+ -+// panic ("ep3_cprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+static int -+ep3_cprocReissue (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *tbuf) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ unsigned cmdoff = (tbuf->s.ContextType >> 5) & 0xFF; -+ int ctxnum = (tbuf->s.ContextType >> 16) & MAX_ROOT_CONTEXT_MASK; -+ -+ if (ctxnum >= ELAN3_DMARING_BASE_CONTEXT_NUM && ctxnum < (ELAN3_DMARING_BASE_CONTEXT_NUM+EP3_NUM_RINGS)) -+ { -+ EP3_DMA_RING *ring = &rail->DmaRings[ctxnum - ELAN3_DMARING_BASE_CONTEXT_NUM]; -+ -+ ASSERT ((cmdoff << 2) == offsetof (E3_CommandPort, SetEvent)); /* can only be setevent commands! */ -+ ASSERT (tbuf->s.Addr >= DMA_RING_EVENT_ELAN(ring,0) && tbuf->s.Addr < DMA_RING_EVENT_ELAN(ring, ring->Entries)); -+ -+ writel (tbuf->s.Addr, (void *)(ring->CommandPort + (cmdoff << 2))); -+ } -+ else -+ { -+ ASSERT (ctxnum == ELAN3_MRF_CONTEXT_NUM); -+ -+ writel (tbuf->s.Addr, (void *)(ctxt->CommandPort + (cmdoff << 2))); -+ } -+ -+ return (OP_HANDLED); -+} -+ -+static E3_uint8 -+ep3_load8 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint8 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readb (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != NULL) -+ return (*ptr); -+ -+ printk ("ep3_load8: %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint8 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writeb (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store8 %08x\n", addr); -+} -+ -+static E3_uint16 -+ep3_load16 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint16 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readw (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load16 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint16 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writew (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store16 %08x\n", addr); -+} -+ -+static E3_uint32 -+ep3_load32 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint32 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readl(dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load32 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint32 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writel (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store32 %08x\n", addr); -+} -+ -+static E3_uint64 -+ep3_load64 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint64 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readq (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load64 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint64 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writeq (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store64 %08x\n", addr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/support_elan4.c linux-2.6.9/drivers/net/qsnet/ep/support_elan4.c ---- clean/drivers/net/qsnet/ep/support_elan4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/support_elan4.c 2005-08-09 05:57:14.000000000 -0400 -@@ -0,0 +1,1192 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support_elan4.c,v 1.24.2.2 2005/08/09 09:57:14 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/support_elan4.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+#include -+ -+void -+ep4_register_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp, E4_uint64 cookie, void (*callback)(EP4_RAIL *r, void *arg), void *arg) -+{ -+ unsigned long flags; -+ -+ cp->int_val = cookie; -+ cp->int_callback = callback; -+ cp->int_arg = arg; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_add_tail (&cp->int_link, &rail->r_intcookie_hash[EP4_INTCOOKIE_HASH(cookie)]); -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+} -+ -+void -+ep4_deregister_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_del (&cp->int_link); -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+} -+ -+ -+EP4_INTCOOKIE * -+ep4_lookup_intcookie (EP4_RAIL *rail, E4_uint64 cookie) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_for_each (el, &rail->r_intcookie_hash[EP4_INTCOOKIE_HASH(cookie)]) { -+ EP4_INTCOOKIE *cp = list_entry (el, EP4_INTCOOKIE, int_link); -+ -+ if (cp->int_val == cookie) -+ { -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+ return cp; -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+ return NULL; -+} -+ -+E4_uint64 -+ep4_neterr_cookie (EP4_RAIL *rail, unsigned int node) -+{ -+ E4_uint64 cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_cookie_lock, flags); -+ cookie = rail->r_cookies[node]; -+ -+ rail->r_cookies[node] += EP4_COOKIE_INC; -+ -+ spin_unlock_irqrestore (&rail->r_cookie_lock, flags); -+ -+ return cookie; -+} -+ -+void -+ep4_eproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_EPROC_TRAP trap; -+ -+ elan4_extract_eproc_trap (ctxt->ctxt_dev, status, &trap, 0); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_eproc_trap (DBG_BUFFER, 0, "ep4_eproc_trap", &trap); -+ -+ switch (EPROC_TrapType (status)) -+ { -+ case EventProcNoFault: -+ EPRINTF1 (DBG_EPTRAP, "%s: EventProcNoFault\n", rail->r_generic.Name); -+ return; -+ -+ default: -+ printk ("%s: unhandled eproc trap %d\n", rail->r_generic.Name, EPROC_TrapType (status)); -+ elan4_display_eproc_trap (DBG_CONSOLE, 0, "ep4_eproc_trap", &trap); -+ } -+} -+ -+void -+ep4_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_CPROC_TRAP trap; -+ struct list_head *el; -+ register int i; -+ -+ elan4_extract_cproc_trap (ctxt->ctxt_dev, status, &trap, cqnum); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_cproc_trap (DBG_BUFFER, 0, "ep4_cproc_trap", &trap); -+ -+ switch (CPROC_TrapType (status)) -+ { -+ case CommandProcInterruptQueueOverflow: -+ /* -+ * Try and handle a bunch of elan main interrupts -+ */ -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (elan4_cq2num (ecq->ecq_cq) == cqnum) -+ { -+ printk ("%s: defer command queue %d after trap %x\n", -+ rail->r_generic.Name, cqnum, CPROC_TrapType (status)); -+ -+ elan4_queue_mainintop (ctxt->ctxt_dev, &ecq->ecq_intop); -+ return; -+ } -+ } -+ } -+ break; -+ -+ case CommandProcDmaQueueOverflow: -+ case CommandProcThreadQueueOverflow: -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (elan4_cq2num (ecq->ecq_cq) == cqnum) -+ { -+ printk ("%s: restart command queue %d after trap %x\n", -+ rail->r_generic.Name, cqnum, CPROC_TrapType (status)); -+ -+ elan4_restartcq (ctxt->ctxt_dev, ecq->ecq_cq); -+ return; -+ } -+ } -+ } -+ break; -+ } -+ -+ printk ("%s: unhandled cproc trap %d for cqnum %d\n", rail->r_generic.Name, CPROC_TrapType (status), cqnum); -+ elan4_display_cproc_trap (DBG_CONSOLE, 0, "ep4_cproc_trap", &trap); -+} -+ -+void -+ep4_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_DPROC_TRAP trap; -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, &trap, unit); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_dproc_trap (DBG_BUFFER, 0, "ep4_dproc_trap", &trap); -+ -+ if (! DPROC_PrefetcherFault (trap.tr_status)) -+ { -+ switch (DPROC_TrapType (trap.tr_status)) -+ { -+ case DmaProcFailCountError: -+ goto retry_this_dma; -+ -+ case DmaProcPacketAckError: -+ goto retry_this_dma; -+ -+ case DmaProcQueueOverflow: -+ goto retry_this_dma; -+ } -+ } -+ -+ printk ("%s: unhandled dproc trap\n", rail->r_generic.Name); -+ elan4_display_dproc_trap (DBG_CONSOLE, 0, "ep4_dproc_trap", &trap); -+ return; -+ -+ retry_this_dma: -+ /*XXXX implement backoff .... */ -+ -+ ep4_queue_dma_retry (rail, &trap.tr_desc, EP_RETRY_LOW_PRI); -+} -+ -+void -+ep4_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_TPROC_TRAP *trap = &rail->r_tproc_trap; -+ -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, trap); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_tproc_trap (DBG_BUFFER, 0, "ep4_tproc_trap", trap); -+ -+ printk ("%s: unhandled tproc trap\n", rail->r_generic.Name); -+ elan4_display_tproc_trap (DBG_CONSOLE, 0, "ep4_tproc_trap", trap); -+} -+ -+void -+ep4_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_IPROC_TRAP *trap = &rail->r_iproc_trap; -+ -+ elan4_extract_iproc_trap (ctxt->ctxt_dev, status, trap, unit); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_iproc_trap (DBG_BUFFER, 0, "ep4_iproc_trap", trap); -+ -+ elan4_inspect_iproc_trap (trap); -+ -+ switch (IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)) -+ { -+ case InputDmaQueueOverflow: -+ ep4_queue_dma_retry (rail, (E4_DMA *) &trap->tr_dataBuffers[trap->tr_trappedTrans], EP_RETRY_LOW_PRI); -+ return; -+ -+ case InputEventEngineTrapped: -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[trap->tr_trappedTrans]; -+ sdramaddr_t inputq; -+ E4_Addr event; -+ -+ /* XXXX: flow control on the command queue which we issue to is -+ * rather difficult, we don't want to have space for an event -+ * for each possible context, nor the mechanism to hold the -+ * context filter up until the event has been executed. Given -+ * that the event engine will be restarted by this same interrupt -+ * and we're using high priority command queues, then we just use -+ * a single small command queue for this. -+ */ -+ switch (IPROC_TransactionType(hdrp->IProcStatusCntxAndTrType) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_MASK: -+ if (hdrp->TrAddr != 0) -+ ep4_set_event_cmd (rail->r_event_ecq, hdrp->TrAddr); -+ return; -+ -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: -+ if ((inputq = ep_elan2sdram (&rail->r_generic, hdrp->TrAddr)) == 0) -+ printk ("%s: TR_INPUT_Q_COMMIT at %llx is not sdram\n", rail->r_generic.Name, (long long)hdrp->TrAddr); -+ else -+ { -+ if ((event = elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq + offsetof (E4_InputQueue, q_event))) != 0) -+ ep4_set_event_cmd (rail->r_event_ecq, event); -+ return; -+ } -+ } -+ break; -+ } -+ -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ case InputCrcErrorAfterPAckOk: -+ if (! (trap->tr_flags & TR_FLAG_ACK_SENT) || (trap->tr_flags & TR_FLAG_EOP_BAD)) -+ return; -+ -+ if (EP4_CONTEXT_ISDATA (IPROC_NetworkContext (status))) -+ { -+ unsigned int nodeId = EP4_CONTEXT_TO_NODE (IPROC_NetworkContext (status)); -+ -+ if ((trap->tr_flags & (TR_FLAG_DMA_PACKET | TR_FLAG_BAD_TRANS)) || -+ ((trap->tr_flags & TR_FLAG_EOP_ERROR) && (trap->tr_identifyTrans == TR_TRANS_INVALID))) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: network error on dma packet from node %d\n", rail->r_generic.Name, nodeId); -+ printk ("%s: network error on dma packet from node %d\n", rail->r_generic.Name, nodeId); -+ -+ ep_queue_network_error (&rail->r_generic, EP4_CONTEXT_TO_NODE(IPROC_NetworkContext (status)), EP_NODE_NETERR_DMA_PACKET, unit & 1, 0); -+ return; -+ } -+ -+ if (trap->tr_flags & TR_FLAG_EOP_ERROR) -+ { -+ E4_uint64 status = trap->tr_transactions[trap->tr_identifyTrans].IProcStatusCntxAndTrType; -+ EP_NETERR_COOKIE cookie = 0; -+ -+ switch (IPROC_TransactionType (status) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT_IDENTIFY & TR_OPCODE_MASK: -+ if (IPROC_TrapValue(status) == InputNoFault) -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ else -+ cookie = trap->tr_dataBuffers[trap->tr_identifyTrans].Data[0]; -+ EPRINTF3(DBG_NETWORK_ERROR, "%s: network error on setevent <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ printk ("%s: network error on setevent <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: -+ if (IPROC_TrapValue(status) == InputNoFault) -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ else -+ cookie = trap->tr_dataBuffers[trap->tr_identifyTrans].Data[0]; -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: network error on queue commit <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ printk ("%s: network error on queue commit <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_MASK: -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: network error on remote dma <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ printk ("%s: network error on remote dma <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_IDENTIFY & TR_OPCODE_MASK: -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: network error on identify <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ printk ("%s: network error on identify <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ default: -+ panic ("%s: unknown identify transaction type %x for eop error from node %d\n", rail->r_generic.Name, -+ IPROC_TransactionType (trap->tr_transactions[trap->tr_identifyTrans].IProcStatusCntxAndTrType), nodeId); -+ break; -+ } -+ -+ ep_queue_network_error (&rail->r_generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, unit & 1, cookie); -+ } -+ } -+ return; -+ } -+ -+ printk ("%s: unhandled iproc trap\n", rail->r_generic.Name); -+ elan4_display_iproc_trap (DBG_CONSOLE, 0, "ep4_iproc_trap", trap); -+} -+ -+void -+ep4_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ EP4_INTCOOKIE *cp = ep4_lookup_intcookie (rail, cookie); -+ -+ if (cp == NULL) -+ { -+ printk ("ep4_interrupt: cannot find event cookie for %016llx\n", (long long) cookie); -+ return; -+ } -+ -+ cp->int_callback (rail, cp->int_arg); -+} -+ -+ELAN4_TRAP_OPS ep4_trap_ops = -+{ -+ ep4_eproc_trap, -+ ep4_cproc_trap, -+ ep4_dproc_trap, -+ ep4_tproc_trap, -+ ep4_iproc_trap, -+ ep4_interrupt, -+}; -+ -+void -+ep4_flush_filters (EP_RAIL *r) -+{ -+ /* nothing to do here as elan4_set_filter() flushes the context filter */ -+} -+ -+struct flush_queues_desc -+{ -+ EP4_RAIL *rail; -+ volatile int done; -+} ; -+ -+static void -+ep4_flush_queues_flushop (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ struct flush_queues_desc *desc = (struct flush_queues_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ E4_uint64 qptrs = read_reg64 (dev, DProcHighPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ unsigned long flags; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 typeSize = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_typeSize)); -+ -+ if (DMA_Context (qentry.Desc.dma_typeSize) == rail->r_ctxt.ctxt_num) -+ { -+ E4_uint64 vp = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_vproc)); -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ EP4_ASSERT (rail, !EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ /* -+ * This is a DMA going to the node which is being removed, -+ * so move it onto the node dma list where it will get -+ * handled later. -+ */ -+ qentry.Desc.dma_typeSize = typeSize; -+ qentry.Desc.dma_cookie = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_cookie)); -+ qentry.Desc.dma_vproc = vp; -+ qentry.Desc.dma_srcAddr = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_srcAddr)); -+ qentry.Desc.dma_dstAddr = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_dstAddr)); -+ qentry.Desc.dma_srcEvent = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_srcEvent)); -+ qentry.Desc.dma_dstEvent = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_dstEvent)); -+ -+ EPRINTF4 (DBG_RETRY, "ep4_flush_dmas: %016llx %016llx %016llx %016llx\n", (long long)qentry.Desc.dma_typeSize, -+ (long long)qentry.Desc.dma_cookie, (long long)qentry.Desc.dma_vproc, (long long)qentry.Desc.dma_srcAddr); -+ EPRINTF3 (DBG_RETRY, " %016llx %016llx %016llx\n", (long long)qentry.Desc.dma_dstAddr, -+ (long long)qentry.Desc.dma_srcEvent, (long long)qentry.Desc.dma_dstEvent); -+ -+ ep4_queue_dma_stalled (rail, &qentry.Desc); -+ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+static void -+ep4_flush_queues_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct flush_queues_desc *desc = (struct flush_queues_desc *) arg; -+ -+ elan4_queue_dma_flushop (dev, &desc->rail->r_flushop, 1); -+} -+ -+void -+ep4_flush_queues (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ struct flush_queues_desc desc; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ int i; -+ -+ /* initialise descriptor */ -+ desc.rail = rail; -+ desc.done = 0; -+ -+ /* First - stall the dma retry thread, so that it will no longer restart -+ * any dma's from the retry list */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ /* Second - flush through all command queues targetted by events, thread etc */ -+ ep4_flush_ecqs (rail); -+ -+ /* Third - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queues */ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DProcHalted; -+ rail->r_haltop.op_function = ep4_flush_queues_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ rail->r_flushop.op_function = ep4_flush_queues_flushop; -+ rail->r_flushop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ kmutex_unlock (&rail->r_haltop_mutex); -+ -+ /* Fourth - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el,nel, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(retry->retry_dma.dma_vproc)]; -+ -+ EP4_ASSERT (rail, nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ list_del (&retry->retry_link); -+ list_add_tail (&retry->retry_link, &nodeRail->StalledDmas); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ /* Finally - allow the retry thread to run again */ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+struct write_qdesc_desc -+{ -+ EP4_RAIL *rail; -+ sdramaddr_t qaddr; -+ E4_InputQueue *qdesc; -+ volatile int done; -+} ; -+ -+static void -+ep4_write_qdesc_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct write_qdesc_desc *desc = (struct write_qdesc_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ unsigned long flags; -+ -+ elan4_sdram_copyq_to_sdram (dev, desc->qdesc, desc->qaddr, sizeof (E4_InputQueue)); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+void -+ep4_write_qdesc (EP4_RAIL *rail, sdramaddr_t qaddr, E4_InputQueue *qdesc) -+{ -+ struct write_qdesc_desc desc; -+ unsigned long flags; -+ -+ /* initialise descriptor */ -+ desc.rail = rail; -+ desc.qaddr = qaddr; -+ desc.qdesc = qdesc; -+ desc.done = 0; -+ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DiscardingHighPri; -+ rail->r_haltop.op_function = ep4_write_qdesc_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ -+ kmutex_unlock (&rail->r_haltop_mutex); -+} -+#define CQ_SIZE_NWORDS ((CQ_Size (ecq->ecq_cq->cq_size) >> 3) - 8) /* available number of dwords (less enough to flush) */ -+EP4_ECQ * -+ep4_alloc_ecq (EP4_RAIL *rail, unsigned cqsize) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_ECQ *ecq; -+ unsigned long pgoff, cqaddr; -+ -+ /* no space available, so allocate a new entry */ -+ KMEM_ZALLOC (ecq, EP4_ECQ *, sizeof (EP4_ECQ), 1); -+ -+ if (ecq == NULL) -+ return 0; -+ -+ if ((ecq->ecq_cq = elan4_alloccq (&rail->r_ctxt, cqsize, CQ_EnableAllBits, CQ_Priority)) == NULL) -+ { -+ KMEM_FREE (ecq, sizeof (EP4_ECQ)); -+ return 0; -+ } -+ -+ pgoff = (ecq->ecq_cq->cq_mapping & (PAGE_SIZE-1)); -+ cqaddr = (ecq->ecq_cq->cq_cqa->cqa_cqnum + ecq->ecq_cq->cq_idx + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ ecq->ecq_addr = ep_rmalloc (rail->r_ecq_rmap, PAGESIZE, 0) + pgoff; -+ ecq->ecq_avail = CQ_SIZE_NWORDS; /* available number of dwords (less enough to flush) */ -+ -+ ecq->ecq_intop.op_function = (ELAN4_HALTFN *) elan4_restartcq; -+ ecq->ecq_intop.op_arg = ecq->ecq_cq; -+ -+ ep4_cport_map (&rail->r_generic, ecq->ecq_addr - pgoff, cqaddr - pgoff, PAGESIZE, EP_PERM_WRITE); -+ -+ spin_lock_init (&ecq->ecq_lock); -+ -+ return ecq; -+} -+ -+void -+ep4_free_ecq (EP4_RAIL *rail, EP4_ECQ *ecq) -+{ -+ unsigned long pgoff = (ecq->ecq_cq->cq_mapping & (PAGE_SIZE-1)); -+ -+ spin_lock_destroy (&ecq->ecq_lock); -+ -+ ep4_unmap (&rail->r_generic, ecq->ecq_addr - pgoff, PAGESIZE); -+ ep_rmfree (rail->r_ecq_rmap, PAGESIZE, ecq->ecq_addr - pgoff); -+ -+ elan4_freecq (&rail->r_ctxt, ecq->ecq_cq); -+ -+ KMEM_FREE (ecq, sizeof (EP4_ECQ)); -+} -+ -+EP4_ECQ * -+ep4_get_ecq (EP4_RAIL *rail, unsigned which, unsigned ndwords) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ EP4_ECQ *ecq; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ list_for_each (el, &rail->r_ecq_list[which]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (ecq->ecq_avail >= ndwords) -+ { -+ ecq->ecq_avail -= ndwords; -+ -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ return ecq; -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ if ((ecq = ep4_alloc_ecq (rail, EP4_ECQ_Size (which))) == NULL) -+ return NULL; -+ -+ if (which == EP4_ECQ_EVENT) -+ { -+ if ((ecq->ecq_event = ep_alloc_elan (&rail->r_generic, sizeof (E4_Event32), 0, &ecq->ecq_event_addr)) == 0) -+ { -+ ep4_free_ecq (rail, ecq); -+ return NULL; -+ } -+ -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WritePtr), -+ ecq->ecq_addr); -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WriteValue), -+ SET_EVENT_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event))); -+ -+ if ((ecq->ecq_flushcq = ep4_get_ecq (rail, EP4_ECQ_SINGLE, 1)) == NULL) -+ { -+ ep_free_elan (&rail->r_generic, ecq->ecq_event_addr, sizeof (E4_Event32)); -+ ep4_free_ecq (rail, ecq); -+ return NULL; -+ } -+ } -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ list_add (&ecq->ecq_link, &rail->r_ecq_list[which]); -+ -+ ecq->ecq_avail -= ndwords; -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ return ecq; -+} -+ -+void -+ep4_put_ecq (EP4_RAIL *rail, EP4_ECQ *ecq, unsigned ndwords) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ -+ ecq->ecq_avail += ndwords; -+ -+ if (ecq->ecq_avail != CQ_SIZE_NWORDS) -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ else -+ { -+ list_del (&ecq->ecq_link); -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ if (ecq->ecq_flushcq) -+ ep4_put_ecq (rail, ecq->ecq_flushcq, 1); -+ if (ecq->ecq_event_addr) -+ ep_free_elan (&rail->r_generic, ecq->ecq_event_addr, sizeof (E4_Event32)); -+ -+ ep4_free_ecq (rail, ecq); -+ } -+} -+ -+void -+ep4_nop_cmd (EP4_ECQ *ecq, E4_uint64 tag) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_nop_cmd (ecq->ecq_cq, tag); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+ -+} -+ -+void -+ep4_set_event_cmd (EP4_ECQ *ecq, E4_Addr event) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_set_event_cmd (ecq->ecq_cq, event); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+} -+ -+void -+ep4_wait_event_cmd (EP4_ECQ *ecq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_wait_event_cmd (ecq->ecq_cq, event, candt, param0, param1); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+} -+ -+void -+ep4_flush_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ rail->r_flush_count = 0; -+ kcondvar_wakeupone (&rail->r_flush_sleep, &rail->r_ecq_lock); -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+} -+ -+void -+ep4_flush_ecqs (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ kmutex_lock (&rail->r_flush_mutex); -+ -+ EP4_SDRAM_ASSERT (rail, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0)); -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ /* first flush all the "event" queues */ -+ list_for_each (el, &rail->r_ecq_list[EP4_ECQ_EVENT]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ ep4_set_event_cmd (ecq->ecq_flushcq, ecq->ecq_event_addr); -+ -+ rail->r_flush_count++; -+ } -+ -+ /* next issue the setevents to all the other queues */ -+ for (i = EP4_ECQ_ATOMIC; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ ep4_set_event_cmd (ecq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event)); -+ -+ rail->r_flush_count++; -+ } -+ } -+ -+ /* issue the waitevent command */ -+ ep4_wait_event_cmd (rail->r_flush_mcq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event), -+ E4_EVENT_INIT_VALUE (-32 * rail->r_flush_count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0), -+ rail->r_flush_ecq->ecq_addr, -+ INTERRUPT_CMD | (rail->r_flush_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ while (rail->r_flush_count) -+ if (kcondvar_timedwait (&rail->r_flush_sleep, &rail->r_ecq_lock, &flags, (lbolt + (HZ*10))) == -1) -+ elan4_hardware_lock_check(dev, "flush_ecqs"); -+ -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ EP4_SDRAM_ASSERT (rail, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0)); -+ -+ kmutex_unlock (&rail->r_flush_mutex); -+} -+ -+void -+ep4_init_thread (EP4_RAIL *rail, E4_ThreadRegs *regs, sdramaddr_t stackTop, -+ EP_ADDR stackAddr, E4_Addr startpc, int nargs,...) -+{ -+ sdramaddr_t sp = stackTop - roundup (nargs * sizeof (E4_uint64), E4_STACK_ALIGN); -+ int i; -+ va_list ap; -+ -+ /* -+ * the thread start code expects the following : -+ * %r1 = stack pointer -+ * %r6 = frame pointer -+ * %r2 = function to call -+ * -+ * function args are store on stack above %sp -+ */ -+ -+ va_start(ap, nargs); -+ for (i = 0; i < nargs; i++) -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, sp + (i * sizeof (E4_uint64)), va_arg (ap, E4_uint64)); -+ va_end (ap); -+ -+ regs->Registers[0] = ep_symbol (&rail->r_threadcode, ".thread_start"); /* %r0 - PC */ -+ regs->Registers[1] = stackAddr - (stackTop - sp); /* %r1 - stack pointer */ -+ regs->Registers[2] = startpc; /* %r2 - start pc */ -+ regs->Registers[3] = 0; -+ regs->Registers[4] = 0; -+ regs->Registers[5] = 0; -+ regs->Registers[6] = stackTop; /* %r6 - frame pointer */ -+} -+ -+/* retransmission thread */ -+ -+void -+ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops) -+{ -+ ep_kthread_stall (&rail->r_retry_thread); -+ list_add_tail (&ops->op_link, &rail->r_retry_ops); -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops) -+{ -+ ep_kthread_stall (&rail->r_retry_thread); -+ list_del (&ops->op_link); -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_retry_thread (EP4_RAIL *rail) -+{ -+ struct list_head *el; -+ -+ kernel_thread_init ("ep4_retry"); -+ -+ for (;;) -+ { -+ long nextRunTime = 0; -+ -+ list_for_each (el, &rail->r_retry_ops) { -+ EP4_RETRY_OPS *ops = list_entry (el, EP4_RETRY_OPS, op_link); -+ -+ nextRunTime = ops->op_func (rail, ops->op_arg, nextRunTime); -+ } -+ -+ if (ep_kthread_sleep (&rail->r_retry_thread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_kthread_stopped (&rail->r_retry_thread); -+ -+ kernel_thread_exit(); -+} -+ -+/* DMA retransmission */ -+static unsigned ep4_dma_retry_times[EP_NUM_RETRIES]; -+ -+static unsigned long -+ep4_retry_dmas (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ unsigned long yieldAt = lbolt + (hz/10); -+ unsigned long flags; -+ int i; -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ while (! list_empty (&rail->r_dma_retrylist[i])) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_retrylist[i].next, EP4_DMA_RETRY, retry_link); -+ -+ if (! AFTER(lbolt, retry->retry_time)) -+ break; -+ -+ if (ep_kthread_should_stall (&rail->r_retry_thread) || AFTER (lbolt, yieldAt)) -+ goto cant_do_more; -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4_retry_dmas: flowcnt %llx %llx\n", rail->r_generic.Name, (long long)rail->r_dma_flowcnt, (long long)rail->r_main->r_dma_flowcnt); -+ -+ if ((rail->r_dma_flowcnt - rail->r_main->r_dma_flowcnt) > EP4_DMA_RETRY_FLOWCNT) -+ { -+ printk ("ep4_retry_dmas: flowcnt %llx %llx\n", (long long)rail->r_dma_flowcnt, (long long)rail->r_main->r_dma_flowcnt); -+ -+ goto cant_do_more; -+ } -+ -+ EPRINTF4 (DBG_RETRY, "%s: ep4_retry_dmas: %016llx %016llx %016llx\n", rail->r_generic.Name, -+ (long long)retry->retry_dma.dma_typeSize, (long long)retry->retry_dma.dma_cookie, (long long)retry->retry_dma.dma_vproc); -+ EPRINTF5 (DBG_RETRY, "%s: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ (long long)retry->retry_dma.dma_srcAddr, (long long)retry->retry_dma.dma_dstAddr, (long long)retry->retry_dma.dma_srcEvent, -+ (long long)retry->retry_dma.dma_dstEvent); -+ -+ elan4_run_dma_cmd (rail->r_dma_ecq->ecq_cq, &retry->retry_dma); -+ elan4_write_dword_cmd (rail->r_dma_ecq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_dma_flowcnt), ++rail->r_dma_flowcnt); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_del (&retry->retry_link); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ } -+ cant_do_more: -+ -+ /* re-compute the next retry time */ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ if (! list_empty (&rail->r_dma_retrylist[i])) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_retrylist[i].next, EP4_DMA_RETRY, retry_link); -+ -+ SET_NEXT_RUN_TIME (nextRunTime, retry->retry_time); -+ } -+ } -+ -+ return nextRunTime; -+} -+ -+void -+ep4_initialise_dma_retries (EP4_RAIL *rail) -+{ -+ int i; -+ -+ spin_lock_init (&rail->r_dma_lock); -+ -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ INIT_LIST_HEAD (&rail->r_dma_retrylist[i]); -+ -+ INIT_LIST_HEAD (&rail->r_dma_freelist); -+ -+ rail->r_dma_ecq = ep4_alloc_ecq (rail, EP4_DMA_RETRY_CQSIZE); -+ -+ rail->r_dma_allocated = 0; -+ rail->r_dma_reserved = 0; -+ -+ ep4_dma_retry_times[EP_RETRY_HIGH_PRI] = EP_RETRY_HIGH_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ ep4_dma_retry_times[EP_RETRY_HIGH_PRI_RETRY+i] = EP_RETRY_HIGH_PRI_TIME << i; -+ -+ ep4_dma_retry_times[EP_RETRY_LOW_PRI] = EP_RETRY_LOW_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ ep4_dma_retry_times[EP_RETRY_LOW_PRI_RETRY+i] = EP_RETRY_LOW_PRI_TIME << i; -+ -+ ep4_dma_retry_times[EP_RETRY_ANONYMOUS] = EP_RETRY_ANONYMOUS_TIME; -+ ep4_dma_retry_times[EP_RETRY_NETERR] = EP_RETRY_NETERR_TIME; -+ -+ rail->r_dma_ops.op_func = ep4_retry_dmas; -+ rail->r_dma_ops.op_arg = NULL; -+ -+ ep4_add_retry_ops (rail, &rail->r_dma_ops); -+} -+ -+void -+ep4_finalise_dma_retries (EP4_RAIL *rail) -+{ -+ ep4_remove_retry_ops (rail, &rail->r_dma_ops); -+ -+ /* Everyone should have given back their retry dma's by now */ -+ EP4_ASSERT (rail, rail->r_dma_reserved == 0); -+ -+ while (! list_empty (&rail->r_dma_freelist)) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ KMEM_FREE (retry, sizeof (EP4_DMA_RETRY)); -+ } -+ -+ ep4_free_ecq (rail, rail->r_dma_ecq); -+ -+ spin_lock_destroy (&rail->r_dma_lock); -+} -+ -+int -+ep4_reserve_dma_retries (EP4_RAIL *rail, unsigned int count, EP_ATTRIBUTE attr) -+{ -+ EP4_DMA_RETRY *retry; -+ unsigned int remaining = count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ if (remaining <= (rail->r_dma_allocated - rail->r_dma_reserved)) -+ { -+ rail->r_dma_reserved += remaining; -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ return 0; -+ } -+ -+ remaining -= (rail->r_dma_allocated - rail->r_dma_reserved); -+ -+ rail->r_dma_reserved = rail->r_dma_allocated; -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ while (remaining > 0) -+ { -+ KMEM_ALLOC (retry, EP4_DMA_RETRY *, sizeof (EP4_DMA_RETRY), !(attr & EP_NO_SLEEP)); -+ -+ if (retry == NULL) -+ goto failed; -+ -+ remaining--; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ -+ rail->r_dma_allocated++; -+ rail->r_dma_reserved++; -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ -+ return 0; -+ -+ failed: -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ rail->r_dma_reserved -= (count - remaining); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ return 1; -+} -+ -+void -+ep4_release_dma_retries (EP4_RAIL *rail, unsigned int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ rail->r_dma_reserved -= count; -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_queue_dma_retry (EP4_RAIL *rail, E4_DMA *dma, int interval) -+{ -+ EP4_DMA_RETRY *retry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ EP4_ASSERT (rail, !list_empty (&rail->r_dma_freelist)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: ep4_queue_dma_retry: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ (long long)dma->dma_typeSize, (long long)dma->dma_cookie, (long long)dma->dma_vproc, (long long)dma->dma_srcAddr); -+ EPRINTF5 (DBG_RETRY, "%s: %016llx %016llx %016llx (%d)\n", rail->r_generic.Name, -+ (long long)dma->dma_dstAddr, (long long)dma->dma_srcEvent, (long long)dma->dma_dstEvent, interval); -+ -+ retry->retry_dma.dma_typeSize = dma->dma_typeSize; -+ retry->retry_dma.dma_cookie = dma->dma_cookie; -+ retry->retry_dma.dma_vproc = dma->dma_vproc; -+ retry->retry_dma.dma_srcAddr = dma->dma_srcAddr; -+ retry->retry_dma.dma_dstAddr = dma->dma_dstAddr; -+ retry->retry_dma.dma_srcEvent = dma->dma_srcEvent; -+ retry->retry_dma.dma_dstEvent = dma->dma_dstEvent; -+ -+ retry->retry_time = lbolt + ep4_dma_retry_times[interval]; -+ -+ /* chain onto the end of the approriate retry list */ -+ list_add_tail (&retry->retry_link, &rail->r_dma_retrylist[interval]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, retry->retry_time); -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_queue_dma_stalled (EP4_RAIL *rail, E4_DMA *dma) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(dma->dma_vproc)]; -+ EP4_DMA_RETRY *retry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ EP4_ASSERT (rail, !list_empty (&rail->r_dma_freelist)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: ep4_queue_dma_stalled: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ (long long)dma->dma_typeSize, (long long)dma->dma_cookie, (long long)dma->dma_vproc, (long long)dma->dma_srcAddr); -+ EPRINTF4 (DBG_RETRY, "%s: %016llx %016llx %016llx\n", rail->r_generic.Name, -+ (long long)dma->dma_dstAddr, (long long)dma->dma_srcEvent, (long long)dma->dma_dstEvent); -+ -+ retry->retry_dma.dma_typeSize = dma->dma_typeSize; -+ retry->retry_dma.dma_cookie = dma->dma_cookie; -+ retry->retry_dma.dma_vproc = dma->dma_vproc; -+ retry->retry_dma.dma_srcAddr = dma->dma_srcAddr; -+ retry->retry_dma.dma_dstAddr = dma->dma_dstAddr; -+ retry->retry_dma.dma_srcEvent = dma->dma_srcEvent; -+ retry->retry_dma.dma_dstEvent = dma->dma_dstEvent; -+ -+ /* chain onto the node cancelled dma list */ -+ list_add_tail (&retry->retry_link, &nodeRail->StalledDmas); -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_free_stalled_dmas (EP4_RAIL *rail, unsigned int nodeId) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[nodeId]; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ list_del (el); -+ list_add (el, &rail->r_dma_freelist); -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_display_rail (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ register int i; -+ unsigned long flags; -+ -+ ep_debugf (DBG_DEBUG, "%s: vendorid=%x deviceid=%x\n", rail->r_generic.Name, -+ rail->r_generic.Devinfo.dev_vendor_id, rail->r_generic.Devinfo.dev_device_id); -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ for (i = 0; i < EP4_NUM_ECQ; i++) -+ { -+ list_for_each (el, &rail->r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (i == EP4_ECQ_EVENT) -+ ep_debugf (DBG_DEBUG, " ECQ[%d] ecq=%p cqnum=%d addr=%llx avail=%d event=%llx,%llx,%llx\n", -+ i, ecq, elan4_cq2num (ecq->ecq_cq), (long long)ecq->ecq_addr, ecq->ecq_avail, -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType)), -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WriteValue)), -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WritePtr))); -+ -+ else -+ ep_debugf (DBG_DEBUG, " ECQ[%d] ecq=%p cqnum=%d addr=%llx avail=%d\n", -+ i, ecq, elan4_cq2num (ecq->ecq_cq), (long long)ecq->ecq_addr, ecq->ecq_avail); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ ep_debugf (DBG_DEBUG, " flush count=%ld mcq=%p ecq=%p event %llx.%llx.%llx\n", -+ rail->r_flush_count, rail->r_flush_mcq, rail->r_flush_ecq, -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_CountAndType)), -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_WritePtr)), -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_WriteValue))); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each (el, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ ep_debugf (DBG_DEBUG, " RETRY[%d] typeSize %llx cookie %llx vproc %llx events %llx %llx\n", -+ i, (long long)retry->retry_dma.dma_typeSize, (long long)retry->retry_dma.dma_cookie, -+ (long long)retry->retry_dma.dma_vproc, (long long)retry->retry_dma.dma_srcEvent, (long long)retry->retry_dma.dma_dstEvent); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -diff -urN clean/drivers/net/qsnet/ep/threadcode.c linux-2.6.9/drivers/net/qsnet/ep/threadcode.c ---- clean/drivers/net/qsnet/ep/threadcode.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/threadcode.c 2003-10-07 09:22:38.000000000 -0400 -@@ -0,0 +1,146 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: threadcode.c,v 1.11 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/threadcode.c,v $ */ -+ -+#include -+ -+#include -+ -+EP_ADDR -+ep_symbol (EP_CODE *code, char *name) -+{ -+ EP_SYMBOL *s = code->symbols; -+ -+ while (s->name && strcmp (s->name, name)) -+ s++; -+ -+ return (s->name ? s->value : (EP_ADDR) 0); -+} -+ -+int -+ep_loadcode (EP_RAIL *rail, EP_CODE *code) -+{ -+ register int i; -+ -+ EP_ADDR _stext = ep_symbol (code, "_stext"); -+ EP_ADDR _etext = ep_symbol (code, "_etext"); -+ EP_ADDR _sdata = ep_symbol (code, "_sdata"); -+ EP_ADDR _edata = ep_symbol (code, "_edata"); -+ EP_ADDR _end = ep_symbol (code, "_end"); -+ EP_ADDR _rodata = roundup (_etext, sizeof (uint64_t)); -+ -+ if (_stext == (EP_ADDR) 0 || _etext == (EP_ADDR) 0 || -+ _sdata == (EP_ADDR) 0 || _edata == (EP_ADDR) 0 || -+ _end == (EP_ADDR) 0) -+ { -+ printk ("ep_loadcode: symbols not defined correctly for code at %p\n", code); -+ return (EINVAL); -+ } -+ -+ /* -+ * Include the rodata in the text segment -+ */ -+ _etext = _rodata + code->rodata_size; -+ -+ /* -+ * If _etext is in the same page as _sdata, then allocate a contiguous -+ * chunk of memory and map it as read/write. otherwise allocate two chunks -+ * and map the code in as read-only. -+ */ -+ if ((_etext & PAGEMASK) == (_sdata & PAGEMASK)) -+ { -+ code->ntext = btopr (_end - (_stext & PAGEMASK)); -+ code->pptext = ep_alloc_memory_elan (rail, _stext & PAGEMASK, ptob (code->ntext), EP_PERM_EXECUTE, 0); -+ -+ if (code->pptext == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ code->_stext = code->pptext + (_stext & PAGEOFFSET); -+ code->_rodata = code->_stext + (_rodata - _stext); -+ code->_sdata = code->_stext + (_sdata - _stext); -+ } -+ else -+ { -+ code->ntext = btopr (_etext - (_stext & PAGEMASK)); -+ code->ndata = btopr (_end - (_sdata & PAGEMASK)); -+ -+ if (code->ntext) -+ { -+ code->pptext = ep_alloc_memory_elan (rail, _stext & PAGEMASK, ptob (code->ntext), EP_PERM_EXECUTE, 0); -+ -+ if (code->pptext == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ code->_stext = code->pptext + (_stext & PAGEOFFSET); -+ code->_rodata = code->_stext + (_rodata - _stext); -+ } -+ -+ if (code->ndata) -+ { -+ code->ppdata = ep_alloc_memory_elan (rail, _sdata & PAGEMASK, ptob (code->ndata), EP_PERM_WRITE, 0); -+ -+ if (code->ppdata == (sdramaddr_t) 0) -+ { -+ if (code->ntext) ep_free_memory_elan (rail, _sdata & PAGEMASK); -+ code->ntext = 0; -+ -+ return (ENOMEM); -+ } -+ -+ code->_sdata = code->ppdata + (_sdata & PAGEOFFSET); -+ } -+ } -+ -+#ifdef __LITTLE_ENDIAN__ -+# define Flip 3 -+#else -+# define Flip 0 -+#endif -+ -+ /* -+ * Now copy the text and rodata into the SDRAM -+ * this is linked into the module to be byte -+ * copied to the SDRAM, since we want to copy -+ * with word accesses we have to do the byte -+ * assembly correctly. -+ */ -+ for (i = 0; i < code->text_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_stext + i, code->text[i^Flip]); -+ -+ for (i = 0; i < code->rodata_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_rodata + i, code->rodata[i^Flip]); -+ -+ /* -+ * And the initialised data segment. -+ */ -+ for (i = 0; i < code->data_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_sdata + i, code->data[i^Flip]); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_unloadcode (EP_RAIL *rail, EP_CODE *code) -+{ -+ EP_ADDR _stext = ep_symbol (code, "_stext"); -+ EP_ADDR _sdata = ep_symbol (code, "_sdata"); -+ -+ if (code->pptext) -+ ep_free_memory_elan (rail, _stext & PAGEMASK); -+ if (code->ppdata) -+ ep_free_memory_elan (rail, _sdata & PAGEMASK); -+ code->pptext = code->ppdata = 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/threadcode_elan3.c linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan3.c ---- clean/drivers/net/qsnet/ep/threadcode_elan3.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan3.c 2003-10-07 09:22:38.000000000 -0400 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: threadcode_elan3.c,v 1.11 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/threadcode_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+#include -+ -+E3_Addr -+ep3_init_thread (ELAN3_DEV *dev, -+ E3_Addr fn, /* Elan address of function */ -+ E3_Addr addr, /* Elan address of stack */ -+ sdramaddr_t stack, /* sdram address of stack */ -+ int stackSize, /* stack size (in bytes) */ -+ int nargs, -+ ...) -+{ -+ sdramaddr_t frame; -+ sdramaddr_t regs; -+ sdramaddr_t argsp; -+ int i; -+ va_list ap; -+ -+ /* -+ * Align the stack pointer at the top of the stack and leave space for a stack frame -+ */ -+ stack = ((stack + stackSize) & ~(E3_STACK_ALIGN-1)) - sizeof (E3_Frame); -+ addr = ((addr + stackSize) & ~(E3_STACK_ALIGN-1)) - sizeof (E3_Frame); -+ -+ va_start (ap, nargs); -+ -+ if (nargs > 6) -+ { -+ stack -= (((nargs*sizeof (E3_uint32))+E3_STACK_ALIGN-1) & ~(E3_STACK_ALIGN-1)); -+ addr -= (((nargs*sizeof (E3_uint32))+E3_STACK_ALIGN-1) & ~(E3_STACK_ALIGN-1)); -+ } -+ -+ frame = stack; -+ regs = stack - sizeof (E3_OutsRegs); -+ -+ /* -+ * Initialise the registers, and stack frame. -+ */ -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[6]), fn); -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[7]), 0); -+ -+ if (nargs <= 6) -+ { -+ for (i = 0; i < nargs; i++) -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[i]), va_arg (ap, E3_uint32)); -+ } -+ else -+ { -+ for (i = 0; i < 6; i++) -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[i]), va_arg (ap, E3_uint32)); -+ -+ for (argsp = frame + offsetof (E3_Frame, fr_argx[0]); i < nargs; i++, argsp += sizeof (E3_uint32)) -+ elan3_sdram_writel (dev, argsp, va_arg (ap, int)); -+ } -+ -+ elan3_sdram_writel (dev, frame + offsetof (E3_Frame, fr_savefp), 0); -+ elan3_sdram_writel (dev, frame + offsetof (E3_Frame, fr_savepc), 0); -+ -+ va_end (ap); -+ -+ return (addr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/ep/threadcode_elan3_Linux.c linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan3_Linux.c ---- clean/drivers/net/qsnet/ep/threadcode_elan3_Linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan3_Linux.c 2005-09-07 10:39:44.000000000 -0400 -@@ -0,0 +1,112 @@ -+/* --------------------------------------------------------*/ -+/* MACHINE GENERATED ELAN CODE */ -+#include -+#include -+#include "kcomm_elan3.h" -+static uint32_t threadcode_elan3_text[] = { -+0x80a0239c, 0x00001082, 0x00e0a280, 0x47008002, 0x0020a380, 0x20600288, 0x20200286, 0x43008002, -+0x00000001, 0x0a006081, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0xa800c613, 0xa300c609, 0x0020108a, 0x0080900b, 0x00006885, 0x0580a080, -+0x06008002, 0x02a0a080, 0x06008022, 0xffff0296, 0x04008010, 0xff3f0398, 0x1f008010, 0x00201090, -+0x00007081, 0x1600801c, 0x00000001, 0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, 0x40a0a3e0, 0x00c03f3f, -+0xf8e017be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, 0x0000a081, 0x06008010, 0x40a083e0, -+0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, 0x00a083c0, 0x60a0039c, 0x00e0a280, -+0xbfffbf12, 0x0020a380, 0x03008012, 0x02201090, 0x03201090, 0x08e0c381, 0x80a0039c, 0xe0a0239c, -+0x60a023de, 0x80a0a3e0, 0xa0a0a3f0, 0x080010b8, 0x090010b0, 0x0a0010b2, 0x04000037, 0x402006b4, -+0x50200690, 0x01201092, 0x20a0239c, 0x00a0a3f0, 0x00c03f3f, 0x8ce117be, 0x04e08f80, 0x06008012, -+0x00000001, 0x00c01ff8, 0x0000b081, 0x06008010, 0x00a083f0, 0x14e007be, 0x00c01ff8, 0x0000b081, -+0x00a083f0, 0x20a0039c, 0x582006d0, 0x0020a280, 0x05008002, 0x0900a280, 0x10008002, 0x50200690, -+0xeaffbf30, 0x5c2006d4, 0x18001090, 0x19001092, 0x1b800294, 0x0a201096, 0x8affff7f, 0x05201098, -+0x446026d0, 0x302027f4, 0xdfffbf10, 0x50200690, 0xfdffbf10, 0x446026c0, 0x5c2006e0, 0x0020a480, -+0xf9ffbf06, 0x18001090, 0x19001092, 0x1b000494, 0x14201096, 0x7bffff7f, 0x0a201098, 0x0020a280, -+0xf4ffbf22, 0x486026e0, 0x00007081, 0x1600801c, 0x00000001, 0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, -+0x40a0a3e0, 0x00c03f3f, 0x60e217be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, 0x0000a081, -+0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, 0x00a083c0, -+0x60a0039c, 0xff3f84a0, 0xe0ffbf1c, 0x18001090, 0xd5ffbf30, 0x60a003de, 0x80a083e0, 0xa0a083f0, -+0x08e0c381, 0xe0a0039c, 0x00a1239c, 0x60a023de, 0x80a0a3e0, 0xa0a0a3f0, 0x44a123d0, 0x090010b0, -+0x0a0010b6, 0x0b0010b8, 0x0c0010b4, 0x012010ba, 0xdca023fa, 0x142007d2, 0x082007d0, 0x084002b2, -+0x000027c0, 0xf42006d0, 0x0020a280, 0x15008032, 0xf42006d0, 0x18200790, 0xdca003d2, 0x20a0239c, -+0x00a0a3f0, 0x00c03f3f, 0x20e317be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ff8, 0x0000b081, -+0x06008010, 0x00a083f0, 0x14e007be, 0x00c01ff8, 0x0000b081, 0x00a083f0, 0x20a0039c, 0xf42006d0, -+0x0020a280, 0x0a008022, 0xdca023c0, 0x042007d0, 0x0840a680, 0x06008032, 0xdca023c0, 0x18001082, -+0x0220d091, 0xe1ffbf10, 0xf42006d0, 0x06008010, 0x190010a2, 0x042006d0, 0x00c026d0, 0x18001082, -+0x0020d091, 0x042006d0, 0x01200290, 0x042026d0, 0x000006d0, 0x0020a280, 0x04008002, 0x18001090, -+0x4f010040, 0x1b001092, 0xf02006e0, 0x0020a480, 0xf1ffbf02, 0x40b03611, 0x004004d2, 0x01201290, -+0x0840a280, 0x0e018012, 0x10001096, 0x046004d0, 0x01208a80, 0x33008002, 0xa0200484, 0x0c2610ba, -+0x000024fa, 0x00211090, 0x042024d0, 0x246004d0, 0x80200290, 0x082024d0, 0xec2004d0, 0x00210290, -+0x0c2024d0, 0x102024c4, 0x186004d2, 0x02602a93, 0x098006d0, 0x0001003b, 0x1d000290, 0x098026d0, -+0xc0ff3f3b, 0x1d000a90, 0x44a103fa, 0x606007d2, 0x00680292, 0x09001290, 0x4000003b, 0x1d001290, -+0x142024d0, 0x206004d0, 0x10210290, 0x182024d0, 0x186004d0, 0x02202a91, 0x088006d2, 0x0001003b, -+0x1d400292, 0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x186004d0, 0x00280290, 0x80000015, 0x0a001290, -+0x08401292, 0x4000003b, 0x1d401292, 0x1c2024d2, 0x01201090, 0xa02024d0, 0x20200496, 0xa8200484, -+0x306004d0, 0x0020a280, 0x2b008012, 0x00201098, 0x0c2610ba, 0x00c022fa, 0x04e022c0, 0xc0200490, -+0x10e022d0, 0x186004d2, 0x02602a93, 0x098006d0, 0x0001003b, 0x1d000290, 0x098026d0, 0xc0ff3f3b, -+0x1d000a90, 0x44a103fa, 0x606007d2, 0x00680292, 0x09001290, 0x4000003b, 0x1d001290, 0x14e022d0, -+0x206004d0, 0x10210290, 0x18e022d0, 0x186004d0, 0x02202a91, 0x088006d2, 0x0001003b, 0x1d400292, -+0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x186004d0, 0x00280290, 0x80000015, 0x0a001290, 0x08401292, -+0x4000003b, 0x1d401292, 0x1ce022d2, 0x4f008010, 0x0020109a, 0x0c00109a, 0x306004d0, 0x0840a380, -+0x3b00801a, 0xe02004c6, 0x0c2610ba, 0x00c022fa, 0x01202b91, 0x0c000290, 0x02202a91, 0x08400490, -+0x382002d2, 0x04e022d2, 0x342002d0, 0x08e022d0, 0x0ce022c6, 0x10e022c4, 0x186004d0, 0x02202a91, -+0x088006d2, 0x0001003b, 0x1d400292, 0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x44a103fa, 0x606007d0, -+0x00280290, 0x08401292, 0x4000003b, 0x1d401292, 0x14e022d2, 0x206004d0, 0x10210290, 0x18e022d0, -+0x186004d0, 0x02202a91, 0x088006d4, 0x0001003b, 0x1d800294, 0x088026d4, 0xc0ff3f3b, 0x1d800a94, -+0x186004d0, 0x00280290, 0x80000013, 0x09001290, 0x08801294, 0x4000003b, 0x1d801294, 0x1ce022d4, -+0x01201090, 0x008020d0, 0x04e002d0, 0x08c00086, 0x0840039a, 0x01200398, 0x20e00296, 0x306004d0, -+0x0800a380, 0xc9ffbf0a, 0x08a00084, 0xc0200490, 0xf0ff22d0, 0xe42004d0, 0x0d00a280, 0x0b00801a, -+0x00201098, 0x04008010, 0x10001096, 0x01200398, 0x20e00296, 0x306004d0, 0x0800a380, 0xfcffbf2a, -+0x04e022c0, 0xfc3f109a, 0xe42024da, 0x10001082, 0x186004d0, 0x00280290, 0x08006081, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00201098, -+0x0c00109a, 0x142004fa, 0xec00823b, 0x3080d61b, 0x00006891, 0x0420a280, 0x3b008002, 0x0c00a280, -+0x04008002, 0x00000001, 0x0120d091, 0x36008030, 0x7c2006d0, 0x01200290, 0x7c2026d0, 0x782006d0, -+0x0020a280, 0x04008002, 0x78200690, 0x64000040, 0x40e00692, 0xf02004d0, 0x0020a280, 0x03008012, -+0xf02026d0, 0x80e026c0, 0x7c2006d0, 0x40e026d0, 0x046004d0, 0x04208a80, 0x13008002, 0x1100108a, -+0xec2004cc, 0x3fa00b8e, 0x40e0018e, 0x0780239c, 0x0080bbe0, 0x006099e0, 0x00a0b9e0, 0x406099e0, -+0x40a0b9e0, 0x806099e0, 0x80a0b9e0, 0xc06099e0, 0xc0a0b9e0, 0x00809be0, 0x0780039c, 0x0e008010, -+0xec2004d2, 0xec2004cc, 0x3fa00b8e, 0x40e0018e, 0x0780239c, 0x0080bbe0, 0x006099e0, 0x00a0b9e0, -+0x406099e0, 0x40a0b9e0, 0x00809be0, 0x0780039c, 0xec2004d2, 0xe42004d0, 0x886222d0, 0x042006d0, -+0x00c026d0, 0x000007d0, 0x01208a80, 0x05008012, 0x00000001, 0x142027f2, 0x06008010, 0xdca003fa, -+0x142027f2, 0xfe3f0a90, 0x000027d0, 0xdca003fa, 0x016007ba, 0xdca023fa, 0x0c2007d0, 0x0840a680, -+0x04008032, 0x082007d0, 0x03008010, 0x102007f2, 0x084006b2, 0x00007081, 0x1600801c, 0x00000001, -+0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, 0x40a0a3e0, 0x02c03f3f, 0x8ce017be, 0x04e08f80, 0x06008012, -+0x00000001, 0x00c01ffc, 0x0000a081, 0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, -+0x40a083e0, 0x20a083f0, 0x00a083c0, 0x60a0039c, 0x042007d0, 0x0840a680, 0xb3febf12, 0x190010a2, -+0x8afebf10, 0xf42006d0, 0x60a003de, 0x80a083e0, 0xa0a083f0, 0x08e0c381, 0x00a1039c, 0x80a0239c, -+0x042002c4, 0x004022c4, 0x18008030, 0x00007081, 0x16008012, 0x00000001, 0x60a0239c, 0x00a0a3c0, -+0x20a0a3f0, 0x40a0a3e0, 0x02c03f3f, 0x24e117be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, -+0x0000a081, 0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, -+0x00a083c0, 0x60a0039c, 0x000002c4, 0x00a0a080, 0xe7ffbf12, 0x00000001, 0x042002c4, 0x01a00084, -+0x042022c4, 0x000002c4, 0x00a0a080, 0xddffbf12, 0x00000001, 0x08e0c381, 0x80a0039c, }; -+#define threadcode_elan3_text_size 0x97c -+static uint32_t threadcode_elan3_data[] = { -+0}; -+#define threadcode_elan3_data_size 0x0 -+static uint32_t threadcode_elan3_rodata[] = { -+0}; -+#define threadcode_elan3_rodata_size 0x0 -+static EP_SYMBOL threadcode_elan3_symbols[] = { -+ {"__bss_start", 0xff00297c}, -+ {"_edata", 0xff00297c}, -+ {"_end", 0xff002988}, -+ {"_etext", 0xff00097c}, -+ {"_sdata", 0xff00297c}, -+ {"_stext", 0xff000000}, -+ {"ep3_spinblock", 0xff0008dc}, -+ {"ep3comms_rcvr", 0xff0002a8}, -+ {"kcomm_probe", 0xff00013c}, -+ {"r", 0xff00297c}, -+ {"rail", 0xff002984}, -+ {"rm", 0xff002980}, -+ {0, 0}}; -+EP_CODE threadcode_elan3 = { -+ (unsigned char *) threadcode_elan3_text, -+ threadcode_elan3_text_size, -+ (unsigned char *) threadcode_elan3_data, -+ threadcode_elan3_data_size, -+ (unsigned char *) threadcode_elan3_rodata, -+ threadcode_elan3_rodata_size, -+ threadcode_elan3_symbols, -+}; -diff -urN clean/drivers/net/qsnet/ep/threadcode_elan3_Linux.code.dis linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan3_Linux.code.dis ---- clean/drivers/net/qsnet/ep/threadcode_elan3_Linux.code.dis 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan3_Linux.code.dis 2005-09-07 10:39:44.000000000 -0400 -@@ -0,0 +1,620 @@ -+ -+threadcode_elan3_Linux.code: file format elf32-elan -+ -+Disassembly of section .text: -+ -+ff000000 <_stext>: -+ff000000: 80 a0 23 9c sub %sp, 0x80, %sp -+ff000004: 00 00 10 82 mov %g0, %g1 -+ff000008: 00 e0 a2 80 cmp %o3, 0 -+ff00000c: 47 00 80 02 be ff000128 <_stext+0x128> -+ff000010: 00 20 a3 80 cmp %o4, 0 -+ff000014: 20 60 02 88 add %o1, 0x20, %g4 -+ff000018: 20 20 02 86 add %o0, 0x20, %g3 -+ff00001c: 43 00 80 02 be ff000128 <_stext+0x128> -+ff000020: 00 00 00 01 nop -+ff000024: 0a 00 60 81 open %o2 -+ff000028: 00 00 00 01 nop -+ff00002c: 00 00 00 01 nop -+ff000030: 00 00 00 01 nop -+ff000034: 00 00 00 01 nop -+ff000038: 00 00 00 01 nop -+ff00003c: 00 00 00 01 nop -+ff000040: 00 00 00 01 nop -+ff000044: 00 00 00 01 nop -+ff000048: 00 00 00 01 nop -+ff00004c: 00 00 00 01 nop -+ff000050: 00 00 00 01 nop -+ff000054: 00 00 00 01 nop -+ff000058: 00 00 00 01 nop -+ff00005c: 00 00 00 01 nop -+ff000060: 00 00 00 01 nop -+ff000064: 00 00 00 01 nop -+ff000068: 00 00 00 01 nop -+ff00006c: 00 00 00 01 nop -+ff000070: 00 00 00 01 nop -+ff000074: 00 00 00 01 nop -+ff000078: 00 00 00 01 nop -+ff00007c: 00 00 00 01 nop -+ff000080: 00 00 00 01 nop -+ff000084: 00 00 00 01 nop -+ff000088: a8 00 c6 13 sendtrans 0x3005, [ %o0 ], %o1 -+ff00008c: a3 00 c6 09 sendtrans 0x3005, [ %g3 ], %g4 -+ff000090: 00 20 10 8a clr %g5 -+ff000094: 00 80 90 0b sendtrans 0x8400, %g0, %g5 -+ff000098: 00 00 68 85 close %g2 -+ff00009c: 05 80 a0 80 cmp %g2, %g5 -+ff0000a0: 06 00 80 02 be ff0000b8 <_stext+0xb8> -+ff0000a4: 02 a0 a0 80 cmp %g2, 2 -+ff0000a8: 06 00 80 22 be,a ff0000c0 <_stext+0xc0> -+ff0000ac: ff ff 02 96 add %o3, -1, %o3 -+ff0000b0: 04 00 80 10 b ff0000c0 <_stext+0xc0> -+ff0000b4: ff 3f 03 98 add %o4, -1, %o4 -+ff0000b8: 1f 00 80 10 b ff000134 <_stext+0x134> -+ff0000bc: 00 20 10 90 clr %o0 -+ff0000c0: 00 00 70 81 breaktest -+ff0000c4: 16 00 80 1c bpos ff00011c <_stext+0x11c> -+ff0000c8: 00 00 00 01 nop -+ff0000cc: 60 a0 23 9c sub %sp, 0x60, %sp -+ff0000d0: 00 a0 a3 c0 stblock32 %g0, [ %sp ] -+ff0000d4: 20 a0 a3 f0 stblock32 %i0, [ %sp + 0x20 ] -+ff0000d8: 40 a0 a3 e0 stblock32 %l0, [ %sp + 0x40 ] -+ff0000dc: 00 c0 3f 3f sethi %hi(0xff000000), %i7 -+ff0000e0: f8 e0 17 be or %i7, 0xf8, %i7 ! ff0000f8 <_stext+0xf8> -+ff0000e4: 04 e0 8f 80 btst 4, %i7 -+ff0000e8: 06 00 80 12 bne ff000100 <_stext+0x100> -+ff0000ec: 00 00 00 01 nop -+ff0000f0: 00 c0 1f fc ldd [ %i7 ], %fp -+ff0000f4: 00 00 a0 81 break -+ff0000f8: 06 00 80 10 b ff000110 <_stext+0x110> -+ff0000fc: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff000100: 14 e0 07 be add %i7, 0x14, %i7 -+ff000104: 00 c0 1f fc ldd [ %i7 ], %fp -+ff000108: 00 00 a0 81 break -+ff00010c: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff000110: 20 a0 83 f0 ldblock32 [ %sp + 0x20 ], %i0 -+ff000114: 00 a0 83 c0 ldblock32 [ %sp ], %g0 -+ff000118: 60 a0 03 9c add %sp, 0x60, %sp -+ff00011c: 00 e0 a2 80 cmp %o3, 0 -+ff000120: bf ff bf 12 bne ff00001c <_stext+0x1c> -+ff000124: 00 20 a3 80 cmp %o4, 0 -+ff000128: 03 00 80 12 bne ff000134 <_stext+0x134> -+ff00012c: 02 20 10 90 mov 2, %o0 -+ff000130: 03 20 10 90 mov 3, %o0 -+ff000134: 08 e0 c3 81 retl -+ff000138: 80 a0 03 9c add %sp, 0x80, %sp -+ -+ff00013c : -+ff00013c: e0 a0 23 9c sub %sp, 0xe0, %sp -+ff000140: 60 a0 23 de st %o7, [ %sp + 0x60 ] -+ff000144: 80 a0 a3 e0 stblock32 %l0, [ %sp + 0x80 ] -+ff000148: a0 a0 a3 f0 stblock32 %i0, [ %sp + 0xa0 ] -+ff00014c: 08 00 10 b8 mov %o0, %i4 -+ff000150: 09 00 10 b0 mov %o1, %i0 -+ff000154: 0a 00 10 b2 mov %o2, %i1 -+ff000158: 04 00 00 37 sethi %hi(0x1000), %i3 -+ff00015c: 40 20 06 b4 add %i0, 0x40, %i2 -+ff000160: 50 20 06 90 add %i0, 0x50, %o0 -+ff000164: 01 20 10 92 mov 1, %o1 -+ff000168: 20 a0 23 9c sub %sp, 0x20, %sp -+ff00016c: 00 a0 a3 f0 stblock32 %i0, [ %sp ] -+ff000170: 00 c0 3f 3f sethi %hi(0xff000000), %i7 -+ff000174: 8c e1 17 be or %i7, 0x18c, %i7 ! ff00018c -+ff000178: 04 e0 8f 80 btst 4, %i7 -+ff00017c: 06 00 80 12 bne ff000194 -+ff000180: 00 00 00 01 nop -+ff000184: 00 c0 1f f8 ldd [ %i7 ], %i4 -+ff000188: 00 00 b0 81 waitevent -+ff00018c: 06 00 80 10 b ff0001a4 -+ff000190: 00 a0 83 f0 ldblock32 [ %sp ], %i0 -+ff000194: 14 e0 07 be add %i7, 0x14, %i7 -+ff000198: 00 c0 1f f8 ldd [ %i7 ], %i4 -+ff00019c: 00 00 b0 81 waitevent -+ff0001a0: 00 a0 83 f0 ldblock32 [ %sp ], %i0 -+ff0001a4: 20 a0 03 9c add %sp, 0x20, %sp -+ff0001a8: 58 20 06 d0 ld [ %i0 + 0x58 ], %o0 -+ff0001ac: 00 20 a2 80 cmp %o0, 0 -+ff0001b0: 05 00 80 02 be ff0001c4 -+ff0001b4: 09 00 a2 80 cmp %o0, %o1 -+ff0001b8: 10 00 80 02 be ff0001f8 -+ff0001bc: 50 20 06 90 add %i0, 0x50, %o0 -+ff0001c0: ea ff bf 30 b,a ff000168 -+ff0001c4: 5c 20 06 d4 ld [ %i0 + 0x5c ], %o2 -+ff0001c8: 18 00 10 90 mov %i0, %o0 -+ff0001cc: 19 00 10 92 mov %i1, %o1 -+ff0001d0: 1b 80 02 94 add %o2, %i3, %o2 -+ff0001d4: 0a 20 10 96 mov 0xa, %o3 -+ff0001d8: 8a ff ff 7f call ff000000 <_stext> -+ff0001dc: 05 20 10 98 mov 5, %o4 -+ff0001e0: 44 60 26 d0 st %o0, [ %i1 + 0x44 ] -+ff0001e4: 30 20 27 f4 st %i2, [ %i4 + 0x30 ] -+ff0001e8: df ff bf 10 b ff000164 -+ff0001ec: 50 20 06 90 add %i0, 0x50, %o0 -+ff0001f0: fd ff bf 10 b ff0001e4 -+ff0001f4: 44 60 26 c0 clr [ %i1 + 0x44 ] -+ff0001f8: 5c 20 06 e0 ld [ %i0 + 0x5c ], %l0 -+ff0001fc: 00 20 a4 80 cmp %l0, 0 -+ff000200: f9 ff bf 06 bl ff0001e4 -+ff000204: 18 00 10 90 mov %i0, %o0 -+ff000208: 19 00 10 92 mov %i1, %o1 -+ff00020c: 1b 00 04 94 add %l0, %i3, %o2 -+ff000210: 14 20 10 96 mov 0x14, %o3 -+ff000214: 7b ff ff 7f call ff000000 <_stext> -+ff000218: 0a 20 10 98 mov 0xa, %o4 -+ff00021c: 00 20 a2 80 cmp %o0, 0 -+ff000220: f4 ff bf 22 be,a ff0001f0 -+ff000224: 48 60 26 e0 st %l0, [ %i1 + 0x48 ] -+ff000228: 00 00 70 81 breaktest -+ff00022c: 16 00 80 1c bpos ff000284 -+ff000230: 00 00 00 01 nop -+ff000234: 60 a0 23 9c sub %sp, 0x60, %sp -+ff000238: 00 a0 a3 c0 stblock32 %g0, [ %sp ] -+ff00023c: 20 a0 a3 f0 stblock32 %i0, [ %sp + 0x20 ] -+ff000240: 40 a0 a3 e0 stblock32 %l0, [ %sp + 0x40 ] -+ff000244: 00 c0 3f 3f sethi %hi(0xff000000), %i7 -+ff000248: 60 e2 17 be or %i7, 0x260, %i7 ! ff000260 -+ff00024c: 04 e0 8f 80 btst 4, %i7 -+ff000250: 06 00 80 12 bne ff000268 -+ff000254: 00 00 00 01 nop -+ff000258: 00 c0 1f fc ldd [ %i7 ], %fp -+ff00025c: 00 00 a0 81 break -+ff000260: 06 00 80 10 b ff000278 -+ff000264: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff000268: 14 e0 07 be add %i7, 0x14, %i7 -+ff00026c: 00 c0 1f fc ldd [ %i7 ], %fp -+ff000270: 00 00 a0 81 break -+ff000274: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff000278: 20 a0 83 f0 ldblock32 [ %sp + 0x20 ], %i0 -+ff00027c: 00 a0 83 c0 ldblock32 [ %sp ], %g0 -+ff000280: 60 a0 03 9c add %sp, 0x60, %sp -+ff000284: ff 3f 84 a0 addcc %l0, -1, %l0 -+ff000288: e0 ff bf 1c bpos ff000208 -+ff00028c: 18 00 10 90 mov %i0, %o0 -+ff000290: d5 ff bf 30 b,a ff0001e4 -+ff000294: 60 a0 03 de ld [ %sp + 0x60 ], %o7 -+ff000298: 80 a0 83 e0 ldblock32 [ %sp + 0x80 ], %l0 -+ff00029c: a0 a0 83 f0 ldblock32 [ %sp + 0xa0 ], %i0 -+ff0002a0: 08 e0 c3 81 retl -+ff0002a4: e0 a0 03 9c add %sp, 0xe0, %sp -+ -+ff0002a8 : -+ff0002a8: 00 a1 23 9c sub %sp, 0x100, %sp -+ff0002ac: 60 a0 23 de st %o7, [ %sp + 0x60 ] -+ff0002b0: 80 a0 a3 e0 stblock32 %l0, [ %sp + 0x80 ] -+ff0002b4: a0 a0 a3 f0 stblock32 %i0, [ %sp + 0xa0 ] -+ff0002b8: 44 a1 23 d0 st %o0, [ %sp + 0x144 ] -+ff0002bc: 09 00 10 b0 mov %o1, %i0 -+ff0002c0: 0a 00 10 b6 mov %o2, %i3 -+ff0002c4: 0b 00 10 b8 mov %o3, %i4 -+ff0002c8: 0c 00 10 b4 mov %o4, %i2 -+ff0002cc: 01 20 10 ba mov 1, %i5 -+ff0002d0: dc a0 23 fa st %i5, [ %sp + 0xdc ] -+ff0002d4: 14 20 07 d2 ld [ %i4 + 0x14 ], %o1 -+ff0002d8: 08 20 07 d0 ld [ %i4 + 8 ], %o0 -+ff0002dc: 08 40 02 b2 add %o1, %o0, %i1 -+ff0002e0: 00 00 27 c0 clr [ %i4 ] -+ff0002e4: f4 20 06 d0 ld [ %i0 + 0xf4 ], %o0 -+ff0002e8: 00 20 a2 80 cmp %o0, 0 -+ff0002ec: 15 00 80 32 bne,a ff000340 -+ff0002f0: f4 20 06 d0 ld [ %i0 + 0xf4 ], %o0 -+ff0002f4: 18 20 07 90 add %i4, 0x18, %o0 -+ff0002f8: dc a0 03 d2 ld [ %sp + 0xdc ], %o1 -+ff0002fc: 20 a0 23 9c sub %sp, 0x20, %sp -+ff000300: 00 a0 a3 f0 stblock32 %i0, [ %sp ] -+ff000304: 00 c0 3f 3f sethi %hi(0xff000000), %i7 -+ff000308: 20 e3 17 be or %i7, 0x320, %i7 ! ff000320 -+ff00030c: 04 e0 8f 80 btst 4, %i7 -+ff000310: 06 00 80 12 bne ff000328 -+ff000314: 00 00 00 01 nop -+ff000318: 00 c0 1f f8 ldd [ %i7 ], %i4 -+ff00031c: 00 00 b0 81 waitevent -+ff000320: 06 00 80 10 b ff000338 -+ff000324: 00 a0 83 f0 ldblock32 [ %sp ], %i0 -+ff000328: 14 e0 07 be add %i7, 0x14, %i7 -+ff00032c: 00 c0 1f f8 ldd [ %i7 ], %i4 -+ff000330: 00 00 b0 81 waitevent -+ff000334: 00 a0 83 f0 ldblock32 [ %sp ], %i0 -+ff000338: 20 a0 03 9c add %sp, 0x20, %sp -+ff00033c: f4 20 06 d0 ld [ %i0 + 0xf4 ], %o0 -+ff000340: 00 20 a2 80 cmp %o0, 0 -+ff000344: 0a 00 80 22 be,a ff00036c -+ff000348: dc a0 23 c0 clr [ %sp + 0xdc ] -+ff00034c: 04 20 07 d0 ld [ %i4 + 4 ], %o0 -+ff000350: 08 40 a6 80 cmp %i1, %o0 -+ff000354: 06 00 80 32 bne,a ff00036c -+ff000358: dc a0 23 c0 clr [ %sp + 0xdc ] -+ff00035c: 18 00 10 82 mov %i0, %g1 -+ff000360: 02 20 d0 91 ta 2 -+ff000364: e1 ff bf 10 b ff0002e8 -+ff000368: f4 20 06 d0 ld [ %i0 + 0xf4 ], %o0 -+ff00036c: 06 00 80 10 b ff000384 -+ff000370: 19 00 10 a2 mov %i1, %l1 -+ff000374: 04 20 06 d0 ld [ %i0 + 4 ], %o0 -+ff000378: 00 c0 26 d0 st %o0, [ %i3 ] -+ff00037c: 18 00 10 82 mov %i0, %g1 -+ff000380: 00 20 d0 91 ta 0 -+ff000384: 04 20 06 d0 ld [ %i0 + 4 ], %o0 -+ff000388: 01 20 02 90 inc %o0 -+ff00038c: 04 20 26 d0 st %o0, [ %i0 + 4 ] -+ff000390: 00 00 06 d0 ld [ %i0 ], %o0 -+ff000394: 00 20 a2 80 cmp %o0, 0 -+ff000398: 04 00 80 02 be ff0003a8 -+ff00039c: 18 00 10 90 mov %i0, %o0 -+ff0003a0: 4f 01 00 40 call ff0008dc -+ff0003a4: 1b 00 10 92 mov %i3, %o1 -+ff0003a8: f0 20 06 e0 ld [ %i0 + 0xf0 ], %l0 -+ff0003ac: 00 20 a4 80 cmp %l0, 0 -+ff0003b0: f1 ff bf 02 be ff000374 -+ff0003b4: 40 b0 36 11 sethi %hi(0xdac10000), %o0 -+ff0003b8: 00 40 04 d2 ld [ %l1 ], %o1 -+ff0003bc: 01 20 12 90 or %o0, 1, %o0 -+ff0003c0: 08 40 a2 80 cmp %o1, %o0 -+ff0003c4: 0e 01 80 12 bne ff0007fc -+ff0003c8: 10 00 10 96 mov %l0, %o3 -+ff0003cc: 04 60 04 d0 ld [ %l1 + 4 ], %o0 -+ff0003d0: 01 20 8a 80 btst 1, %o0 -+ff0003d4: 33 00 80 02 be ff0004a0 -+ff0003d8: a0 20 04 84 add %l0, 0xa0, %g2 -+ff0003dc: 0c 26 10 ba mov 0x60c, %i5 -+ff0003e0: 00 00 24 fa st %i5, [ %l0 ] -+ff0003e4: 00 21 10 90 mov 0x100, %o0 -+ff0003e8: 04 20 24 d0 st %o0, [ %l0 + 4 ] -+ff0003ec: 24 60 04 d0 ld [ %l1 + 0x24 ], %o0 -+ff0003f0: 80 20 02 90 add %o0, 0x80, %o0 -+ff0003f4: 08 20 24 d0 st %o0, [ %l0 + 8 ] -+ff0003f8: ec 20 04 d0 ld [ %l0 + 0xec ], %o0 -+ff0003fc: 00 21 02 90 add %o0, 0x100, %o0 -+ff000400: 0c 20 24 d0 st %o0, [ %l0 + 0xc ] -+ff000404: 10 20 24 c4 st %g2, [ %l0 + 0x10 ] -+ff000408: 18 60 04 d2 ld [ %l1 + 0x18 ], %o1 -+ff00040c: 02 60 2a 93 sll %o1, 2, %o1 -+ff000410: 09 80 06 d0 ld [ %i2 + %o1 ], %o0 -+ff000414: 00 01 00 3b sethi %hi(0x40000), %i5 -+ff000418: 1d 00 02 90 add %o0, %i5, %o0 -+ff00041c: 09 80 26 d0 st %o0, [ %i2 + %o1 ] -+ff000420: c0 ff 3f 3b sethi %hi(0xffff0000), %i5 -+ff000424: 1d 00 0a 90 and %o0, %i5, %o0 -+ff000428: 44 a1 03 fa ld [ %sp + 0x144 ], %i5 -+ff00042c: 60 60 07 d2 ld [ %i5 + 0x60 ], %o1 -+ff000430: 00 68 02 92 add %o1, 0x800, %o1 -+ff000434: 09 00 12 90 or %o0, %o1, %o0 -+ff000438: 40 00 00 3b sethi %hi(0x10000), %i5 -+ff00043c: 1d 00 12 90 or %o0, %i5, %o0 -+ff000440: 14 20 24 d0 st %o0, [ %l0 + 0x14 ] -+ff000444: 20 60 04 d0 ld [ %l1 + 0x20 ], %o0 -+ff000448: 10 21 02 90 add %o0, 0x110, %o0 -+ff00044c: 18 20 24 d0 st %o0, [ %l0 + 0x18 ] -+ff000450: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff000454: 02 20 2a 91 sll %o0, 2, %o0 -+ff000458: 08 80 06 d2 ld [ %i2 + %o0 ], %o1 -+ff00045c: 00 01 00 3b sethi %hi(0x40000), %i5 -+ff000460: 1d 40 02 92 add %o1, %i5, %o1 -+ff000464: 08 80 26 d2 st %o1, [ %i2 + %o0 ] -+ff000468: c0 ff 3f 3b sethi %hi(0xffff0000), %i5 -+ff00046c: 1d 40 0a 92 and %o1, %i5, %o1 -+ff000470: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff000474: 00 28 02 90 add %o0, 0x800, %o0 -+ff000478: 80 00 00 15 sethi %hi(0x20000), %o2 -+ff00047c: 0a 00 12 90 or %o0, %o2, %o0 -+ff000480: 08 40 12 92 or %o1, %o0, %o1 -+ff000484: 40 00 00 3b sethi %hi(0x10000), %i5 -+ff000488: 1d 40 12 92 or %o1, %i5, %o1 -+ff00048c: 1c 20 24 d2 st %o1, [ %l0 + 0x1c ] -+ff000490: 01 20 10 90 mov 1, %o0 -+ff000494: a0 20 24 d0 st %o0, [ %l0 + 0xa0 ] -+ff000498: 20 20 04 96 add %l0, 0x20, %o3 -+ff00049c: a8 20 04 84 add %l0, 0xa8, %g2 -+ff0004a0: 30 60 04 d0 ld [ %l1 + 0x30 ], %o0 -+ff0004a4: 00 20 a2 80 cmp %o0, 0 -+ff0004a8: 2b 00 80 12 bne ff000554 -+ff0004ac: 00 20 10 98 clr %o4 -+ff0004b0: 0c 26 10 ba mov 0x60c, %i5 -+ff0004b4: 00 c0 22 fa st %i5, [ %o3 ] -+ff0004b8: 04 e0 22 c0 clr [ %o3 + 4 ] -+ff0004bc: c0 20 04 90 add %l0, 0xc0, %o0 -+ff0004c0: 10 e0 22 d0 st %o0, [ %o3 + 0x10 ] -+ff0004c4: 18 60 04 d2 ld [ %l1 + 0x18 ], %o1 -+ff0004c8: 02 60 2a 93 sll %o1, 2, %o1 -+ff0004cc: 09 80 06 d0 ld [ %i2 + %o1 ], %o0 -+ff0004d0: 00 01 00 3b sethi %hi(0x40000), %i5 -+ff0004d4: 1d 00 02 90 add %o0, %i5, %o0 -+ff0004d8: 09 80 26 d0 st %o0, [ %i2 + %o1 ] -+ff0004dc: c0 ff 3f 3b sethi %hi(0xffff0000), %i5 -+ff0004e0: 1d 00 0a 90 and %o0, %i5, %o0 -+ff0004e4: 44 a1 03 fa ld [ %sp + 0x144 ], %i5 -+ff0004e8: 60 60 07 d2 ld [ %i5 + 0x60 ], %o1 -+ff0004ec: 00 68 02 92 add %o1, 0x800, %o1 -+ff0004f0: 09 00 12 90 or %o0, %o1, %o0 -+ff0004f4: 40 00 00 3b sethi %hi(0x10000), %i5 -+ff0004f8: 1d 00 12 90 or %o0, %i5, %o0 -+ff0004fc: 14 e0 22 d0 st %o0, [ %o3 + 0x14 ] -+ff000500: 20 60 04 d0 ld [ %l1 + 0x20 ], %o0 -+ff000504: 10 21 02 90 add %o0, 0x110, %o0 -+ff000508: 18 e0 22 d0 st %o0, [ %o3 + 0x18 ] -+ff00050c: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff000510: 02 20 2a 91 sll %o0, 2, %o0 -+ff000514: 08 80 06 d2 ld [ %i2 + %o0 ], %o1 -+ff000518: 00 01 00 3b sethi %hi(0x40000), %i5 -+ff00051c: 1d 40 02 92 add %o1, %i5, %o1 -+ff000520: 08 80 26 d2 st %o1, [ %i2 + %o0 ] -+ff000524: c0 ff 3f 3b sethi %hi(0xffff0000), %i5 -+ff000528: 1d 40 0a 92 and %o1, %i5, %o1 -+ff00052c: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff000530: 00 28 02 90 add %o0, 0x800, %o0 -+ff000534: 80 00 00 15 sethi %hi(0x20000), %o2 -+ff000538: 0a 00 12 90 or %o0, %o2, %o0 -+ff00053c: 08 40 12 92 or %o1, %o0, %o1 -+ff000540: 40 00 00 3b sethi %hi(0x10000), %i5 -+ff000544: 1d 40 12 92 or %o1, %i5, %o1 -+ff000548: 1c e0 22 d2 st %o1, [ %o3 + 0x1c ] -+ff00054c: 4f 00 80 10 b ff000688 -+ff000550: 00 20 10 9a clr %o5 -+ff000554: 0c 00 10 9a mov %o4, %o5 -+ff000558: 30 60 04 d0 ld [ %l1 + 0x30 ], %o0 -+ff00055c: 08 40 a3 80 cmp %o5, %o0 -+ff000560: 3b 00 80 1a bcc ff00064c -+ff000564: e0 20 04 c6 ld [ %l0 + 0xe0 ], %g3 -+ff000568: 0c 26 10 ba mov 0x60c, %i5 -+ff00056c: 00 c0 22 fa st %i5, [ %o3 ] -+ff000570: 01 20 2b 91 sll %o4, 1, %o0 -+ff000574: 0c 00 02 90 add %o0, %o4, %o0 -+ff000578: 02 20 2a 91 sll %o0, 2, %o0 -+ff00057c: 08 40 04 90 add %l1, %o0, %o0 -+ff000580: 38 20 02 d2 ld [ %o0 + 0x38 ], %o1 -+ff000584: 04 e0 22 d2 st %o1, [ %o3 + 4 ] -+ff000588: 34 20 02 d0 ld [ %o0 + 0x34 ], %o0 -+ff00058c: 08 e0 22 d0 st %o0, [ %o3 + 8 ] -+ff000590: 0c e0 22 c6 st %g3, [ %o3 + 0xc ] -+ff000594: 10 e0 22 c4 st %g2, [ %o3 + 0x10 ] -+ff000598: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff00059c: 02 20 2a 91 sll %o0, 2, %o0 -+ff0005a0: 08 80 06 d2 ld [ %i2 + %o0 ], %o1 -+ff0005a4: 00 01 00 3b sethi %hi(0x40000), %i5 -+ff0005a8: 1d 40 02 92 add %o1, %i5, %o1 -+ff0005ac: 08 80 26 d2 st %o1, [ %i2 + %o0 ] -+ff0005b0: c0 ff 3f 3b sethi %hi(0xffff0000), %i5 -+ff0005b4: 1d 40 0a 92 and %o1, %i5, %o1 -+ff0005b8: 44 a1 03 fa ld [ %sp + 0x144 ], %i5 -+ff0005bc: 60 60 07 d0 ld [ %i5 + 0x60 ], %o0 -+ff0005c0: 00 28 02 90 add %o0, 0x800, %o0 -+ff0005c4: 08 40 12 92 or %o1, %o0, %o1 -+ff0005c8: 40 00 00 3b sethi %hi(0x10000), %i5 -+ff0005cc: 1d 40 12 92 or %o1, %i5, %o1 -+ff0005d0: 14 e0 22 d2 st %o1, [ %o3 + 0x14 ] -+ff0005d4: 20 60 04 d0 ld [ %l1 + 0x20 ], %o0 -+ff0005d8: 10 21 02 90 add %o0, 0x110, %o0 -+ff0005dc: 18 e0 22 d0 st %o0, [ %o3 + 0x18 ] -+ff0005e0: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff0005e4: 02 20 2a 91 sll %o0, 2, %o0 -+ff0005e8: 08 80 06 d4 ld [ %i2 + %o0 ], %o2 -+ff0005ec: 00 01 00 3b sethi %hi(0x40000), %i5 -+ff0005f0: 1d 80 02 94 add %o2, %i5, %o2 -+ff0005f4: 08 80 26 d4 st %o2, [ %i2 + %o0 ] -+ff0005f8: c0 ff 3f 3b sethi %hi(0xffff0000), %i5 -+ff0005fc: 1d 80 0a 94 and %o2, %i5, %o2 -+ff000600: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff000604: 00 28 02 90 add %o0, 0x800, %o0 -+ff000608: 80 00 00 13 sethi %hi(0x20000), %o1 -+ff00060c: 09 00 12 90 or %o0, %o1, %o0 -+ff000610: 08 80 12 94 or %o2, %o0, %o2 -+ff000614: 40 00 00 3b sethi %hi(0x10000), %i5 -+ff000618: 1d 80 12 94 or %o2, %i5, %o2 -+ff00061c: 1c e0 22 d4 st %o2, [ %o3 + 0x1c ] -+ff000620: 01 20 10 90 mov 1, %o0 -+ff000624: 00 80 20 d0 st %o0, [ %g2 ] -+ff000628: 04 e0 02 d0 ld [ %o3 + 4 ], %o0 -+ff00062c: 08 c0 00 86 add %g3, %o0, %g3 -+ff000630: 08 40 03 9a add %o5, %o0, %o5 -+ff000634: 01 20 03 98 inc %o4 -+ff000638: 20 e0 02 96 add %o3, 0x20, %o3 -+ff00063c: 30 60 04 d0 ld [ %l1 + 0x30 ], %o0 -+ff000640: 08 00 a3 80 cmp %o4, %o0 -+ff000644: c9 ff bf 0a bcs ff000568 -+ff000648: 08 a0 00 84 add %g2, 8, %g2 -+ff00064c: c0 20 04 90 add %l0, 0xc0, %o0 -+ff000650: f0 ff 22 d0 st %o0, [ %o3 + -16 ] -+ff000654: e4 20 04 d0 ld [ %l0 + 0xe4 ], %o0 -+ff000658: 0d 00 a2 80 cmp %o0, %o5 -+ff00065c: 0b 00 80 1a bcc ff000688 -+ff000660: 00 20 10 98 clr %o4 -+ff000664: 04 00 80 10 b ff000674 -+ff000668: 10 00 10 96 mov %l0, %o3 -+ff00066c: 01 20 03 98 inc %o4 -+ff000670: 20 e0 02 96 add %o3, 0x20, %o3 -+ff000674: 30 60 04 d0 ld [ %l1 + 0x30 ], %o0 -+ff000678: 08 00 a3 80 cmp %o4, %o0 -+ff00067c: fc ff bf 2a bcs,a ff00066c -+ff000680: 04 e0 22 c0 clr [ %o3 + 4 ] -+ff000684: fc 3f 10 9a mov -4, %o5 -+ff000688: e4 20 24 da st %o5, [ %l0 + 0xe4 ] -+ff00068c: 10 00 10 82 mov %l0, %g1 -+ff000690: 18 60 04 d0 ld [ %l1 + 0x18 ], %o0 -+ff000694: 00 28 02 90 add %o0, 0x800, %o0 -+ff000698: 08 00 60 81 open %o0 -+ff00069c: 00 00 00 01 nop -+ff0006a0: 00 00 00 01 nop -+ff0006a4: 00 00 00 01 nop -+ff0006a8: 00 00 00 01 nop -+ff0006ac: 00 00 00 01 nop -+ff0006b0: 00 00 00 01 nop -+ff0006b4: 00 00 00 01 nop -+ff0006b8: 00 00 00 01 nop -+ff0006bc: 00 00 00 01 nop -+ff0006c0: 00 00 00 01 nop -+ff0006c4: 00 00 00 01 nop -+ff0006c8: 00 00 00 01 nop -+ff0006cc: 00 00 00 01 nop -+ff0006d0: 00 00 00 01 nop -+ff0006d4: 00 00 00 01 nop -+ff0006d8: 00 00 00 01 nop -+ff0006dc: 00 00 00 01 nop -+ff0006e0: 00 00 00 01 nop -+ff0006e4: 00 00 00 01 nop -+ff0006e8: 00 00 00 01 nop -+ff0006ec: 00 00 00 01 nop -+ff0006f0: 00 00 00 01 nop -+ff0006f4: 00 00 00 01 nop -+ff0006f8: 00 00 00 01 nop -+ff0006fc: 00 20 10 98 clr %o4 ! 0 <*ABS*> -+ff000700: 0c 00 10 9a mov %o4, %o5 -+ff000704: 14 20 04 fa ld [ %l0 + 0x14 ], %i5 -+ff000708: ec 00 82 3b sendtrans 0x1007, %o4, %i5 -+ff00070c: 30 80 d6 1b sendtrans 0xb401, [ %l0 ], %o5 -+ff000710: 00 00 68 91 close %o0 -+ff000714: 04 20 a2 80 cmp %o0, 4 -+ff000718: 3b 00 80 02 be ff000804 -+ff00071c: 0c 00 a2 80 cmp %o0, %o4 -+ff000720: 04 00 80 02 be ff000730 -+ff000724: 00 00 00 01 nop -+ff000728: 01 20 d0 91 ta 1 -+ff00072c: 36 00 80 30 b,a ff000804 -+ff000730: 7c 20 06 d0 ld [ %i0 + 0x7c ], %o0 -+ff000734: 01 20 02 90 inc %o0 -+ff000738: 7c 20 26 d0 st %o0, [ %i0 + 0x7c ] -+ff00073c: 78 20 06 d0 ld [ %i0 + 0x78 ], %o0 -+ff000740: 00 20 a2 80 cmp %o0, 0 -+ff000744: 04 00 80 02 be ff000754 -+ff000748: 78 20 06 90 add %i0, 0x78, %o0 -+ff00074c: 64 00 00 40 call ff0008dc -+ff000750: 40 e0 06 92 add %i3, 0x40, %o1 -+ff000754: f0 20 04 d0 ld [ %l0 + 0xf0 ], %o0 -+ff000758: 00 20 a2 80 cmp %o0, 0 -+ff00075c: 03 00 80 12 bne ff000768 -+ff000760: f0 20 26 d0 st %o0, [ %i0 + 0xf0 ] -+ff000764: 80 e0 26 c0 clr [ %i3 + 0x80 ] -+ff000768: 7c 20 06 d0 ld [ %i0 + 0x7c ], %o0 -+ff00076c: 40 e0 26 d0 st %o0, [ %i3 + 0x40 ] -+ff000770: 04 60 04 d0 ld [ %l1 + 4 ], %o0 -+ff000774: 04 20 8a 80 btst 4, %o0 -+ff000778: 13 00 80 02 be ff0007c4 -+ff00077c: 11 00 10 8a mov %l1, %g5 -+ff000780: ec 20 04 cc ld [ %l0 + 0xec ], %g6 -+ff000784: 3f a0 0b 8e and %sp, 0x3f, %g7 -+ff000788: 40 e0 01 8e add %g7, 0x40, %g7 -+ff00078c: 07 80 23 9c sub %sp, %g7, %sp -+ff000790: 00 80 bb e0 stblock64 %l0, [ %sp ] -+ff000794: 00 60 99 e0 ldblock64 [ %g5 ], %l0 -+ff000798: 00 a0 b9 e0 stblock64 %l0, [ %g6 ] -+ff00079c: 40 60 99 e0 ldblock64 [ %g5 + 0x40 ], %l0 -+ff0007a0: 40 a0 b9 e0 stblock64 %l0, [ %g6 + 0x40 ] -+ff0007a4: 80 60 99 e0 ldblock64 [ %g5 + 0x80 ], %l0 -+ff0007a8: 80 a0 b9 e0 stblock64 %l0, [ %g6 + 0x80 ] -+ff0007ac: c0 60 99 e0 ldblock64 [ %g5 + 0xc0 ], %l0 -+ff0007b0: c0 a0 b9 e0 stblock64 %l0, [ %g6 + 0xc0 ] -+ff0007b4: 00 80 9b e0 ldblock64 [ %sp ], %l0 -+ff0007b8: 07 80 03 9c add %sp, %g7, %sp -+ff0007bc: 0e 00 80 10 b ff0007f4 -+ff0007c0: ec 20 04 d2 ld [ %l0 + 0xec ], %o1 -+ff0007c4: ec 20 04 cc ld [ %l0 + 0xec ], %g6 -+ff0007c8: 3f a0 0b 8e and %sp, 0x3f, %g7 -+ff0007cc: 40 e0 01 8e add %g7, 0x40, %g7 -+ff0007d0: 07 80 23 9c sub %sp, %g7, %sp -+ff0007d4: 00 80 bb e0 stblock64 %l0, [ %sp ] -+ff0007d8: 00 60 99 e0 ldblock64 [ %g5 ], %l0 -+ff0007dc: 00 a0 b9 e0 stblock64 %l0, [ %g6 ] -+ff0007e0: 40 60 99 e0 ldblock64 [ %g5 + 0x40 ], %l0 -+ff0007e4: 40 a0 b9 e0 stblock64 %l0, [ %g6 + 0x40 ] -+ff0007e8: 00 80 9b e0 ldblock64 [ %sp ], %l0 -+ff0007ec: 07 80 03 9c add %sp, %g7, %sp -+ff0007f0: ec 20 04 d2 ld [ %l0 + 0xec ], %o1 -+ff0007f4: e4 20 04 d0 ld [ %l0 + 0xe4 ], %o0 -+ff0007f8: 88 62 22 d0 st %o0, [ %o1 + 0x288 ] -+ff0007fc: 04 20 06 d0 ld [ %i0 + 4 ], %o0 -+ff000800: 00 c0 26 d0 st %o0, [ %i3 ] -+ff000804: 00 00 07 d0 ld [ %i4 ], %o0 -+ff000808: 01 20 8a 80 btst 1, %o0 -+ff00080c: 05 00 80 12 bne ff000820 -+ff000810: 00 00 00 01 nop -+ff000814: 14 20 27 f2 st %i1, [ %i4 + 0x14 ] -+ff000818: 06 00 80 10 b ff000830 -+ff00081c: dc a0 03 fa ld [ %sp + 0xdc ], %i5 -+ff000820: 14 20 27 f2 st %i1, [ %i4 + 0x14 ] -+ff000824: fe 3f 0a 90 and %o0, -2, %o0 -+ff000828: 00 00 27 d0 st %o0, [ %i4 ] -+ff00082c: dc a0 03 fa ld [ %sp + 0xdc ], %i5 -+ff000830: 01 60 07 ba inc %i5 -+ff000834: dc a0 23 fa st %i5, [ %sp + 0xdc ] -+ff000838: 0c 20 07 d0 ld [ %i4 + 0xc ], %o0 -+ff00083c: 08 40 a6 80 cmp %i1, %o0 -+ff000840: 04 00 80 32 bne,a ff000850 -+ff000844: 08 20 07 d0 ld [ %i4 + 8 ], %o0 -+ff000848: 03 00 80 10 b ff000854 -+ff00084c: 10 20 07 f2 ld [ %i4 + 0x10 ], %i1 -+ff000850: 08 40 06 b2 add %i1, %o0, %i1 -+ff000854: 00 00 70 81 breaktest -+ff000858: 16 00 80 1c bpos ff0008b0 -+ff00085c: 00 00 00 01 nop -+ff000860: 60 a0 23 9c sub %sp, 0x60, %sp -+ff000864: 00 a0 a3 c0 stblock32 %g0, [ %sp ] -+ff000868: 20 a0 a3 f0 stblock32 %i0, [ %sp + 0x20 ] -+ff00086c: 40 a0 a3 e0 stblock32 %l0, [ %sp + 0x40 ] -+ff000870: 02 c0 3f 3f sethi %hi(0xff000800), %i7 -+ff000874: 8c e0 17 be or %i7, 0x8c, %i7 ! ff00088c -+ff000878: 04 e0 8f 80 btst 4, %i7 -+ff00087c: 06 00 80 12 bne ff000894 -+ff000880: 00 00 00 01 nop -+ff000884: 00 c0 1f fc ldd [ %i7 ], %fp -+ff000888: 00 00 a0 81 break -+ff00088c: 06 00 80 10 b ff0008a4 -+ff000890: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff000894: 14 e0 07 be add %i7, 0x14, %i7 -+ff000898: 00 c0 1f fc ldd [ %i7 ], %fp -+ff00089c: 00 00 a0 81 break -+ff0008a0: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff0008a4: 20 a0 83 f0 ldblock32 [ %sp + 0x20 ], %i0 -+ff0008a8: 00 a0 83 c0 ldblock32 [ %sp ], %g0 -+ff0008ac: 60 a0 03 9c add %sp, 0x60, %sp -+ff0008b0: 04 20 07 d0 ld [ %i4 + 4 ], %o0 -+ff0008b4: 08 40 a6 80 cmp %i1, %o0 -+ff0008b8: b3 fe bf 12 bne ff000384 -+ff0008bc: 19 00 10 a2 mov %i1, %l1 -+ff0008c0: 8a fe bf 10 b ff0002e8 -+ff0008c4: f4 20 06 d0 ld [ %i0 + 0xf4 ], %o0 -+ff0008c8: 60 a0 03 de ld [ %sp + 0x60 ], %o7 -+ff0008cc: 80 a0 83 e0 ldblock32 [ %sp + 0x80 ], %l0 -+ff0008d0: a0 a0 83 f0 ldblock32 [ %sp + 0xa0 ], %i0 -+ff0008d4: 08 e0 c3 81 retl -+ff0008d8: 00 a1 03 9c add %sp, 0x100, %sp -+ -+ff0008dc : -+ff0008dc: 80 a0 23 9c sub %sp, 0x80, %sp -+ff0008e0: 04 20 02 c4 ld [ %o0 + 4 ], %g2 -+ff0008e4: 00 40 22 c4 st %g2, [ %o1 ] -+ff0008e8: 18 00 80 30 b,a ff000948 -+ff0008ec: 00 00 70 81 breaktest -+ff0008f0: 16 00 80 12 bne ff000948 -+ff0008f4: 00 00 00 01 nop -+ff0008f8: 60 a0 23 9c sub %sp, 0x60, %sp -+ff0008fc: 00 a0 a3 c0 stblock32 %g0, [ %sp ] -+ff000900: 20 a0 a3 f0 stblock32 %i0, [ %sp + 0x20 ] -+ff000904: 40 a0 a3 e0 stblock32 %l0, [ %sp + 0x40 ] -+ff000908: 02 c0 3f 3f sethi %hi(0xff000800), %i7 -+ff00090c: 24 e1 17 be or %i7, 0x124, %i7 ! ff000924 -+ff000910: 04 e0 8f 80 btst 4, %i7 -+ff000914: 06 00 80 12 bne ff00092c -+ff000918: 00 00 00 01 nop -+ff00091c: 00 c0 1f fc ldd [ %i7 ], %fp -+ff000920: 00 00 a0 81 break -+ff000924: 06 00 80 10 b ff00093c -+ff000928: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff00092c: 14 e0 07 be add %i7, 0x14, %i7 -+ff000930: 00 c0 1f fc ldd [ %i7 ], %fp -+ff000934: 00 00 a0 81 break -+ff000938: 40 a0 83 e0 ldblock32 [ %sp + 0x40 ], %l0 -+ff00093c: 20 a0 83 f0 ldblock32 [ %sp + 0x20 ], %i0 -+ff000940: 00 a0 83 c0 ldblock32 [ %sp ], %g0 -+ff000944: 60 a0 03 9c add %sp, 0x60, %sp -+ff000948: 00 00 02 c4 ld [ %o0 ], %g2 -+ff00094c: 00 a0 a0 80 cmp %g2, 0 -+ff000950: e7 ff bf 12 bne ff0008ec -+ff000954: 00 00 00 01 nop -+ff000958: 04 20 02 c4 ld [ %o0 + 4 ], %g2 -+ff00095c: 01 a0 00 84 inc %g2 -+ff000960: 04 20 22 c4 st %g2, [ %o0 + 4 ] -+ff000964: 00 00 02 c4 ld [ %o0 ], %g2 -+ff000968: 00 a0 a0 80 cmp %g2, 0 -+ff00096c: dd ff bf 12 bne ff0008e0 -+ff000970: 00 00 00 01 nop -+ff000974: 08 e0 c3 81 retl -+ff000978: 80 a0 03 9c add %sp, 0x80, %sp -+Disassembly of section .data: -diff -urN clean/drivers/net/qsnet/ep/threadcode_elan4_Linux.c linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan4_Linux.c ---- clean/drivers/net/qsnet/ep/threadcode_elan4_Linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan4_Linux.c 2005-09-07 10:39:44.000000000 -0400 -@@ -0,0 +1,107 @@ -+/* --------------------------------------------------------*/ -+/* MACHINE GENERATED ELAN CODE */ -+#include -+#include -+#include "kcomm_elan4.h" -+static uint32_t threadcode_elan4_text[] = { -+0x00a00087, 0xc04060cb, 0x00003080, 0x80001080, 0x02606180, 0x02004032, 0x807f60cb, 0x04606180, -+0x02004032, 0x407f60d3, 0x08606180, 0x02004032, 0x007f60db, 0x10606180, 0x02004032, 0xc07e60e3, -+0x20606180, 0x02004032, 0x807e60eb, 0x40606180, 0x02004032, 0x407e60f3, 0x80606180, 0x02004032, -+0x007e60fb, 0x40001180, 0xc3801080, 0xc07f60c3, 0x20002000, 0x20002000, 0x20002000, 0x20002000, -+0x407f8001, 0x6860c0c7, 0x4060e0d1, 0x00208490, 0x00208080, 0x00208080, 0x6060c0d4, 0x00208292, -+0x00608290, 0x00a08294, 0xff3f8088, 0x1c381293, 0xc00044c8, 0x13004291, 0xc000c5d1, 0xc00044c8, -+0x20381288, 0x0020b200, 0x0e004003, 0x01608408, 0x00001088, 0x04204288, 0x0020b200, 0x04004003, -+0x00208080, 0x74010040, 0x00a08488, 0xc00044c8, 0x20381288, 0x0020b200, 0xf6ff7f13, 0x01608408, -+0x10161282, 0x800094c2, 0xc00044c8, 0x20381288, 0x0020b200, 0xe7ff7f13, 0x00208080, 0x686040c7, -+0x406060d1, 0x606040d4, 0x08e00180, 0xc0608001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0xc07e8001, 0xc060c0c7, 0x4060e0d3, 0x00208490, 0x00208080, 0x00208080, 0x8060e0db, 0x00208698, -+0x00208080, 0x00208080, 0x4061c0c8, 0x00608295, 0x00a0829b, 0x5861c0cb, 0x6061c0cc, 0x6861c0cd, -+0x0120809c, 0x08e042d1, 0x1c00900a, 0x05b4128a, 0x606140c8, 0x586140cb, 0x58010040, 0x18e042c9, -+0x0020809c, 0x586140cd, 0xc04043c8, 0x0840b400, 0x30014003, 0xffff3f08, 0xe023829f, 0x20f4179f, -+0x10e3879f, 0xe023829e, 0x20b4179e, 0x03a3879e, 0x00a0879d, 0x00608493, 0x18608408, 0x800012c2, -+0x089a109a, 0x20b4169a, 0x20b8169a, 0x00a88609, 0x20741289, 0x01120008, 0x0a381288, 0x08408297, -+0x45208088, 0x06341288, 0x406140c9, 0xc84042c8, 0x00288218, 0x04608408, 0x800012c2, 0x089a1088, -+0x20341288, 0x20381288, 0x00208299, 0x20608408, 0x800012c2, 0x089a1089, 0x20741289, 0x20781289, -+0x30608408, 0x800012c2, 0x089a1094, 0x20341594, 0x20381594, 0x02604688, 0x0020b200, 0x03004012, -+0x80608216, 0x60608216, 0x90608509, 0x804012c8, 0x01208208, 0x804092c8, 0x046012c8, 0x043a1288, -+0x0020b200, 0x04004003, 0x686140c8, 0x7dffff7f, 0x00e0868a, 0x886045d0, 0x0020b400, 0x12004013, -+0x90608512, 0x808014c8, 0x80c096c8, 0x64010040, 0x00608588, 0x00208290, 0x808014c8, 0x01208208, -+0x808094c8, 0x04a014c8, 0x043a1288, 0x0020b200, 0x05004003, 0x686140c8, 0x00a08489, 0x69ffff7f, -+0x00e0868a, 0x80c014c2, 0x139a1089, 0x20741289, 0x20781289, 0x40b03608, 0x01208288, 0x0840b200, -+0x06004003, 0x90608508, 0x800012c8, 0x80c096c8, 0xbf004010, 0xa86045c8, 0xa02344c3, 0x01604688, -+0x0020b500, 0x03004013, 0x14008209, 0x01208209, 0x05208088, 0x09009221, 0x0774188a, 0x0a00840b, -+0x05741888, 0x0800840c, 0x406140cd, 0x29228088, 0x03341288, 0xc84043c9, 0x03b41688, 0xc84042cf, -+0x01604688, 0x0020b200, 0x1d004002, 0x0023830c, 0xca00c4d7, 0x40c40f09, 0x09608289, 0x08e0c2c9, -+0x0ae08388, 0x10e0c2c8, 0x81001008, 0x0a341288, 0x18e0c2c8, 0x1de08388, 0x20e0c2c8, 0x28e0c2d8, -+0x24e08408, 0x800012c2, 0x089a1088, 0x20341288, 0x20381288, 0x80208208, 0x30e0c2c8, 0x00e18008, -+0x38e0c2c8, 0x40e0c2d6, 0x48e0c2cc, 0xc000c3df, 0x20e0830f, 0x80e0820b, 0x2020830c, 0x0020b500, -+0x13004033, 0x0020808d, 0xc0c0c2d7, 0x40c40f0a, 0x09a0828a, 0x08e0c2ca, 0x0ae08388, 0x10e0c2c8, -+0x00040008, 0x18e0c2c8, 0x1de08388, 0x20e0c2c8, 0x28e0c2d8, 0x40e0c2d6, 0x48e0c2cc, 0xc000c3de, -+0x00208092, 0x4b004010, 0x20e0830f, 0xb8238408, 0x800012c2, 0x089a108e, 0x20b4138e, 0x20b8138e, -+0x00208092, 0x1480b400, 0x2d00401b, 0x40c40f08, 0x092082a3, 0x00040022, 0xffff3f08, 0xe02382a0, -+0x203418a0, 0x102388a0, 0x0d408309, 0x0d408209, 0x02741289, 0x09c08409, 0x3860820a, 0x808012c2, -+0x0a9a108a, 0x20b4128a, 0x20b8128a, 0xc0c0c2d7, 0x08e0c2e3, 0x0ae08388, 0x10e0c2c8, 0x20b41288, -+0x22008288, 0x18e0c2c8, 0x1de08388, 0x20e0c2c8, 0x28e0c2d8, 0x34608209, 0x804012c2, 0x099a1089, -+0x20741289, 0x20781289, 0x30e0c2c9, 0x38e0c2ce, 0x40e0c2d6, 0x48e0c2cc, 0xc000c3e0, 0x0a80830e, -+0x0a808412, 0x20e0830f, 0x80e0820b, 0x0160830d, 0x1440b300, 0xddff7f0b, 0x2020830c, 0xe03f830c, -+0xc000c3dd, 0xbc238408, 0x800012c2, 0x089a1088, 0x20341288, 0x20381288, 0x1200b200, 0x0e00401b, -+0x07741888, 0x0060888d, 0x0460b800, 0x08004019, 0x0800840b, 0x00040008, 0x18e0c2c8, 0x0160830d, -+0x0460b300, 0xfdff7f09, 0x80e0820b, 0xfc3f8092, 0x07741888, 0x08008408, 0x606140cb, 0xc00062e3, -+0x402062f3, 0xc0c0e2e3, 0xc0c0e2f3, 0x982244c8, 0x8860c5c8, 0x886045c8, 0x0020b200, 0x05004033, -+0xd82294c0, 0x88608508, 0x8060c5c8, 0xd82294c0, 0x04604688, 0x0020b200, 0x0c004002, 0xdc2294c0, -+0xc0c064e3, 0x40e064f3, 0xc0c0e0e3, 0x80e064e3, 0x40e0e0f3, 0xc0e064f3, 0x80e0e0e3, 0xc0e0e0f3, -+0x07004010, 0x88e28008, 0xc0c064e3, 0x40e064f3, 0xc0c0e0e3, 0x40e0e0f3, 0x88e28008, 0x08961482, -+0x800092c2, 0x406140cd, 0x29228088, 0x03341288, 0xc84043c9, 0x03b41688, 0xc840c2cf, 0x90608508, -+0x800012c8, 0x80c096c8, 0xa86045c8, 0x0840b400, 0x03004013, 0x00618411, 0xa06045d1, 0x986045c8, -+0x0020b200, 0x04004013, 0x0120871c, 0x586140c9, 0x0860c2d1, 0xfe21b700, 0x0f004035, 0x986045cb, -+0x00001088, 0x02204288, 0x0020b200, 0x05004003, 0x586140ca, 0x18000040, 0x606140c8, 0x586140ca, -+0xc08042c8, 0x0840b400, 0xdcfe7f13, 0x00608493, 0x986045cb, 0x00e0b200, 0xc5fe7f03, 0x1c00900a, -+0x606140c8, 0x60608509, 0x38000040, 0xe03f808a, 0x586140cb, 0x08e0c2d1, 0xbcfe7f10, 0x0120871c, -+0xc06040c7, 0x406060d3, 0x806060db, 0x08e00180, 0x40618001, 0x807f8001, 0xc040e0d3, 0x4060e0db, -+0x00208490, 0x00208698, 0x00208080, 0x00208080, 0x00e08192, 0x02000040, 0x00608091, 0x14e08110, -+0x17208097, 0xc000f2d3, 0xc04060d3, 0x406060db, 0x08a00080, 0x80608001, 0x407f8001, 0x4060e0d3, -+0x8060e0db, 0x00208490, 0x00208698, 0x00208080, 0x00208080, 0x00e08192, 0x02000040, 0x00608091, -+0x40e08110, 0xc040e0d1, 0x37208097, 0x3860c0d7, 0x00208490, 0x00e08597, 0x00208080, 0x00208080, -+0x1f608290, 0x20b41291, 0x08638491, 0x00608092, 0x00208293, 0xc000f2d1, 0x406060d3, 0x806060db, -+0x08a00080, 0xc0608001, 0x407f8001, 0x4060e0d3, 0x8060e0db, 0x00208490, 0x00208698, 0x00208080, -+0x00208080, 0x00e08192, 0x02000040, 0x00608091, 0x54e08110, 0xc040e0d1, 0x37208097, 0x3860c0d7, -+0x00208490, 0x00e08597, 0x00208080, 0x00208080, 0x1f608290, 0x20b41291, 0x08638491, 0x00608092, -+0x00208293, 0x0ef41294, 0x0d208594, 0x17208095, 0x17208096, 0x17208097, 0xc000f2d3, 0x406060d3, -+0x806060db, 0x08a00080, 0xc0608001, 0x01208097, 0xb0e3c0d7, 0x80a060d2, 0x98e28004, 0x98e2c0c0, -+0x80a0c0c4, 0xc080c4c3, 0x01e0b400, 0x06004002, 0x00a08490, 0x00e08097, 0x02208097, 0xb0e3c0d7, -+0xd8e2d0d0, 0xd8e2c0d0, 0x03208097, 0xb0e3c0d7, 0x00e08088, 0x0e004010, 0x00a060c3, 0x407f8001, -+0x4060e0d3, 0x8060e0db, 0x00208490, 0x00208698, 0x00208080, 0x00208080, 0x01208089, 0x8820c2c9, -+0x00608091, 0x00e08197, 0x0020f2d3, 0x406060d3, 0x806060db, 0x08e00180, 0xc0608001, }; -+#define threadcode_elan4_text_size 0x87c -+static uint32_t threadcode_elan4_data[] = { -+0}; -+#define threadcode_elan4_data_size 0x0 -+static uint32_t threadcode_elan4_rodata[] = { -+0}; -+#define threadcode_elan4_rodata_size 0x0 -+static EP_SYMBOL threadcode_elan4_symbols[] = { -+ {".thread_restart", 0x00000000f800000c}, -+ {".thread_start", 0x00000000f8000000}, -+ {"__bss_start", 0x00000000f810087c}, -+ {"_edata", 0x00000000f810087c}, -+ {"_end", 0x00000000f8100880}, -+ {"_etext", 0x00000000f800087c}, -+ {"_sdata", 0x00000000f810087c}, -+ {"_stext", 0x00000000f8000000}, -+ {"c_queue_rxd", 0x00000000f80007ec}, -+ {"c_reschedule", 0x00000000f80006b4}, -+ {"c_stall_thread", 0x00000000f800083c}, -+ {"c_waitevent", 0x00000000f80006f8}, -+ {"c_waitevent_interrupt", 0x00000000f8000768}, -+ {"ep4_spinblock", 0x00000000f8000080}, -+ {"ep4comms_rcvr", 0x00000000f8000140}, -+ {0, 0}}; -+EP_CODE threadcode_elan4 = { -+ (unsigned char *) threadcode_elan4_text, -+ threadcode_elan4_text_size, -+ (unsigned char *) threadcode_elan4_data, -+ threadcode_elan4_data_size, -+ (unsigned char *) threadcode_elan4_rodata, -+ threadcode_elan4_rodata_size, -+ threadcode_elan4_symbols, -+}; -diff -urN clean/drivers/net/qsnet/ep/threadcode_elan4_Linux.code.dis linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan4_Linux.code.dis ---- clean/drivers/net/qsnet/ep/threadcode_elan4_Linux.code.dis 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/ep/threadcode_elan4_Linux.code.dis 2005-09-07 10:39:44.000000000 -0400 -@@ -0,0 +1,628 @@ -+ -+threadcode_elan4_Linux.code: file format elf64-elan -+ -+Disassembly of section .text: -+ -+00000000f8000000 <_stext>: -+ f8000000: 00 a0 00 87 call %r2 -+ f8000004: c0 40 60 cb ld64 [ %sp ], %r8 -+ f8000008: 00 00 30 80 unimp -+ -+00000000f800000c <.thread_restart>: -+ f800000c: 80 00 10 80 setflg -+ f8000010: 02 60 61 80 btst 2, %r5 -+ f8000014: 02 00 40 32 bne,a f800001c <.thread_restart+0x10> -+ f8000018: 80 7f 60 cb ld64 [ %sp + -128 ], %r8 -+ f800001c: 04 60 61 80 btst 4, %r5 -+ f8000020: 02 00 40 32 bne,a f8000028 <.thread_restart+0x1c> -+ f8000024: 40 7f 60 d3 ld64 [ %sp + -192 ], %r16 -+ f8000028: 08 60 61 80 btst 8, %r5 -+ f800002c: 02 00 40 32 bne,a f8000034 <.thread_restart+0x28> -+ f8000030: 00 7f 60 db ld64 [ %sp + -256 ], %r24 -+ f8000034: 10 60 61 80 btst 0x10, %r5 -+ f8000038: 02 00 40 32 bne,a f8000040 <.thread_restart+0x34> -+ f800003c: c0 7e 60 e3 ld64 [ %sp + -320 ], %r32 -+ f8000040: 20 60 61 80 btst 0x20, %r5 -+ f8000044: 02 00 40 32 bne,a f800004c <.thread_restart+0x40> -+ f8000048: 80 7e 60 eb ld64 [ %sp + -384 ], %r40 -+ f800004c: 40 60 61 80 btst 0x40, %r5 -+ f8000050: 02 00 40 32 bne,a f8000058 <.thread_restart+0x4c> -+ f8000054: 40 7e 60 f3 ld64 [ %sp + -448 ], %r48 -+ f8000058: 80 60 61 80 btst 0x80, %r5 -+ f800005c: 02 00 40 32 bne,a f8000064 <.thread_restart+0x58> -+ f8000060: 00 7e 60 fb ld64 [ %sp + -512 ], %r56 -+ f8000064: 40 00 11 80 ldcc %r4 -+ f8000068: c3 80 10 80 rti %r2, %r3 -+ f800006c: c0 7f 60 c3 ld64 [ %sp + -64 ], %r0 -+ f8000070: 20 00 20 00 sethi %hi(0x80008000), %r0 -+ f8000074: 20 00 20 00 sethi %hi(0x80008000), %r0 -+ f8000078: 20 00 20 00 sethi %hi(0x80008000), %r0 -+ f800007c: 20 00 20 00 sethi %hi(0x80008000), %r0 -+ -+00000000f8000080 : -+ f8000080: 40 7f 80 01 add %sp, -192, %sp -+ f8000084: 68 60 c0 c7 st8 %r7, [ %sp + 0x68 ] -+ f8000088: 40 60 e0 d1 st32 %r16, [ %sp + 0x40 ] -+ f800008c: 00 20 84 90 mov %r16, %r16 -+ f8000090: 00 20 80 80 nop -+ f8000094: 00 20 80 80 nop -+ f8000098: 60 60 c0 d4 st8 %r20, [ %sp + 0x60 ] -+ f800009c: 00 20 82 92 mov %r8, %r18 -+ f80000a0: 00 60 82 90 mov %r9, %r16 -+ f80000a4: 00 a0 82 94 mov %r10, %r20 -+ f80000a8: ff 3f 80 88 mov -1, %r8 -+ f80000ac: 1c 38 12 93 srl8 %r8, 0x1c, %r19 -+ -+00000000f80000b0 : -+ f80000b0: c0 00 44 c8 ld8 [ %r16 ], %r8 -+ f80000b4: 13 00 42 91 and %r8, %r19, %r17 -+ f80000b8: c0 00 c5 d1 st8 %r17, [ %r20 ] -+ f80000bc: c0 00 44 c8 ld8 [ %r16 ], %r8 -+ f80000c0: 20 38 12 88 srl8 %r8, 0x20, %r8 -+ f80000c4: 00 20 b2 00 cmp %r8, 0 -+ f80000c8: 0e 00 40 03 be %xcc, f8000100 -+ f80000cc: 01 60 84 08 add %r17, 1, %r8 -+ -+00000000f80000d0 : -+ f80000d0: 00 00 10 88 breaktest %r8 -+ f80000d4: 04 20 42 88 and %r8, 4, %r8 -+ f80000d8: 00 20 b2 00 cmp %r8, 0 -+ f80000dc: 04 00 40 03 be %xcc, f80000ec -+ f80000e0: 00 20 80 80 nop -+ f80000e4: 74 01 00 40 call f80006b4 -+ f80000e8: 00 a0 84 88 mov %r18, %r8 -+ -+00000000f80000ec : -+ f80000ec: c0 00 44 c8 ld8 [ %r16 ], %r8 -+ f80000f0: 20 38 12 88 srl8 %r8, 0x20, %r8 -+ f80000f4: 00 20 b2 00 cmp %r8, 0 -+ f80000f8: f6 ff 7f 13 bne %xcc, f80000d0 -+ f80000fc: 01 60 84 08 add %r17, 1, %r8 -+ -+00000000f8000100 : -+ f8000100: 10 16 12 82 sll8, byte %r8, %r16, %r2 -+ f8000104: 80 00 94 c2 st4 %r2, [ %r16 ] -+ f8000108: c0 00 44 c8 ld8 [ %r16 ], %r8 -+ f800010c: 20 38 12 88 srl8 %r8, 0x20, %r8 -+ f8000110: 00 20 b2 00 cmp %r8, 0 -+ f8000114: e7 ff 7f 13 bne %xcc, f80000b0 -+ f8000118: 00 20 80 80 nop -+ f800011c: 68 60 40 c7 ld8 [ %sp + 0x68 ], %r7 -+ f8000120: 40 60 60 d1 ld32 [ %sp + 0x40 ], %r16 -+ f8000124: 60 60 40 d4 ld8 [ %sp + 0x60 ], %r20 -+ f8000128: 08 e0 01 80 retl -+ f800012c: c0 60 80 01 add %sp, 0xc0, %sp -+ f8000130: 00 00 00 01 sethi %hi(0), %sp -+ f8000134: 00 00 00 01 sethi %hi(0), %sp -+ f8000138: 00 00 00 01 sethi %hi(0), %sp -+ f800013c: 00 00 00 01 sethi %hi(0), %sp -+ -+00000000f8000140 : -+ f8000140: c0 7e 80 01 add %sp, -320, %sp -+ f8000144: c0 60 c0 c7 st8 %r7, [ %sp + 0xc0 ] -+ f8000148: 40 60 e0 d3 st64 %r16, [ %sp + 0x40 ] -+ f800014c: 00 20 84 90 mov %r16, %r16 -+ f8000150: 00 20 80 80 nop -+ f8000154: 00 20 80 80 nop -+ f8000158: 80 60 e0 db st64 %r24, [ %sp + 0x80 ] -+ f800015c: 00 20 86 98 mov %r24, %r24 -+ f8000160: 00 20 80 80 nop -+ f8000164: 00 20 80 80 nop -+ f8000168: 40 61 c0 c8 st8 %r8, [ %sp + 0x140 ] -+ f800016c: 00 60 82 95 mov %r9, %r21 -+ f8000170: 00 a0 82 9b mov %r10, %r27 -+ f8000174: 58 61 c0 cb st8 %r11, [ %sp + 0x158 ] -+ f8000178: 60 61 c0 cc st8 %r12, [ %sp + 0x160 ] -+ f800017c: 68 61 c0 cd st8 %r13, [ %sp + 0x168 ] -+ f8000180: 01 20 80 9c mov 1, %r28 -+ f8000184: 08 e0 42 d1 ld8 [ %r11 + 8 ], %r17 -+ -+00000000f8000188 : -+ f8000188: 1c 00 90 0a neg %r28, %r10 -+ -+00000000f800018c : -+ f800018c: 05 b4 12 8a sll8 %r10, 5, %r10 -+ f8000190: 60 61 40 c8 ld8 [ %sp + 0x160 ], %r8 -+ f8000194: 58 61 40 cb ld8 [ %sp + 0x158 ], %r11 -+ f8000198: 58 01 00 40 call f80006f8 -+ f800019c: 18 e0 42 c9 ld8 [ %r11 + 0x18 ], %r9 -+ f80001a0: 00 20 80 9c mov %r0, %r28 -+ f80001a4: 58 61 40 cd ld8 [ %sp + 0x158 ], %r13 -+ f80001a8: c0 40 43 c8 ld8 [ %r13 ], %r8 -+ f80001ac: 08 40 b4 00 cmp %r17, %r8 -+ f80001b0: 30 01 40 03 be %xcc, f8000670 -+ f80001b4: ff ff 3f 08 sethi %hi(0xfffffc00), %r8 -+ f80001b8: e0 23 82 9f or %r8, 0x3e0, %r31 -+ f80001bc: 20 f4 17 9f sll8 %r31, 0x20, %r31 -+ f80001c0: 10 e3 87 9f or %r31, 0x310, %r31 -+ f80001c4: e0 23 82 9e or %r8, 0x3e0, %r30 -+ f80001c8: 20 b4 17 9e sll8 %r30, 0x20, %r30 -+ f80001cc: 03 a3 87 9e or %r30, 0x303, %r30 -+ f80001d0: 00 a0 87 9d mov %r30, %r29 -+ f80001d4: 00 60 84 93 mov %r17, %r19 -+ -+00000000f80001d8 : -+ f80001d8: 18 60 84 08 add %r17, 0x18, %r8 -+ f80001dc: 80 00 12 c2 ld4 [ %r8 ], %r2 -+ f80001e0: 08 9a 10 9a srl8, byte %r2, %r8, %r26 -+ f80001e4: 20 b4 16 9a sll8 %r26, 0x20, %r26 -+ f80001e8: 20 b8 16 9a srl8 %r26, 0x20, %r26 -+ f80001ec: 00 a8 86 09 add %r26, 0x800, %r9 -+ f80001f0: 20 74 12 89 sll8 %r9, 0x20, %r9 -+ f80001f4: 01 12 00 08 sethi %hi(0x480400), %r8 -+ f80001f8: 0a 38 12 88 srl8 %r8, 0xa, %r8 -+ f80001fc: 08 40 82 97 or %r9, %r8, %r23 -+ f8000200: 45 20 80 88 mov 0x45, %r8 -+ f8000204: 06 34 12 88 sll8 %r8, 6, %r8 -+ f8000208: 40 61 40 c9 ld8 [ %sp + 0x140 ], %r9 -+ f800020c: c8 40 42 c8 ld8 [ %r9 + %r8 ], %r8 -+ f8000210: 00 28 82 18 add %r8, 0x800, %r24 -+ f8000214: 04 60 84 08 add %r17, 4, %r8 -+ f8000218: 80 00 12 c2 ld4 [ %r8 ], %r2 -+ f800021c: 08 9a 10 88 srl8, byte %r2, %r8, %r8 -+ f8000220: 20 34 12 88 sll8 %r8, 0x20, %r8 -+ f8000224: 20 38 12 88 srl8 %r8, 0x20, %r8 -+ f8000228: 00 20 82 99 mov %r8, %r25 -+ f800022c: 20 60 84 08 add %r17, 0x20, %r8 -+ f8000230: 80 00 12 c2 ld4 [ %r8 ], %r2 -+ f8000234: 08 9a 10 89 srl8, byte %r2, %r8, %r9 -+ f8000238: 20 74 12 89 sll8 %r9, 0x20, %r9 -+ f800023c: 20 78 12 89 srl8 %r9, 0x20, %r9 -+ f8000240: 30 60 84 08 add %r17, 0x30, %r8 -+ f8000244: 80 00 12 c2 ld4 [ %r8 ], %r2 -+ f8000248: 08 9a 10 94 srl8, byte %r2, %r8, %r20 -+ f800024c: 20 34 15 94 sll8 %r20, 0x20, %r20 -+ f8000250: 20 38 15 94 srl8 %r20, 0x20, %r20 -+ f8000254: 02 60 46 88 and %r25, 2, %r8 -+ f8000258: 00 20 b2 00 cmp %r8, 0 -+ f800025c: 03 00 40 12 bne f8000268 -+ f8000260: 80 60 82 16 add %r9, 0x80, %r22 -+ f8000264: 60 60 82 16 add %r9, 0x60, %r22 -+ -+00000000f8000268 : -+ f8000268: 90 60 85 09 add %r21, 0x90, %r9 -+ f800026c: 80 40 12 c8 ld4 [ %r9 ], %r8 -+ f8000270: 01 20 82 08 inc %r8 -+ f8000274: 80 40 92 c8 st4 %r8, [ %r9 ] -+ f8000278: 04 60 12 c8 ld4 [ %r9 + 4 ], %r8 -+ f800027c: 04 3a 12 88 srl8, byte %r8, 4, %r8 -+ f8000280: 00 20 b2 00 cmp %r8, 0 -+ f8000284: 04 00 40 03 be %xcc, f8000294 -+ f8000288: 68 61 40 c8 ld8 [ %sp + 0x168 ], %r8 -+ f800028c: 7d ff ff 7f call f8000080 -+ f8000290: 00 e0 86 8a mov %r27, %r10 -+ -+00000000f8000294 : -+ f8000294: 88 60 45 d0 ld8 [ %r21 + 0x88 ], %r16 -+ f8000298: 00 20 b4 00 cmp %r16, 0 -+ f800029c: 12 00 40 13 bne %xcc, f80002e4 -+ f80002a0: 90 60 85 12 add %r21, 0x90, %r18 -+ f80002a4: 80 80 14 c8 ld4 [ %r18 ], %r8 -+ f80002a8: 80 c0 96 c8 st4 %r8, [ %r27 ] -+ f80002ac: 64 01 00 40 call f800083c -+ f80002b0: 00 60 85 88 mov %r21, %r8 -+ f80002b4: 00 20 82 90 mov %r8, %r16 -+ f80002b8: 80 80 14 c8 ld4 [ %r18 ], %r8 -+ f80002bc: 01 20 82 08 inc %r8 -+ f80002c0: 80 80 94 c8 st4 %r8, [ %r18 ] -+ f80002c4: 04 a0 14 c8 ld4 [ %r18 + 4 ], %r8 -+ f80002c8: 04 3a 12 88 srl8, byte %r8, 4, %r8 -+ f80002cc: 00 20 b2 00 cmp %r8, 0 -+ f80002d0: 05 00 40 03 be %xcc, f80002e4 -+ f80002d4: 68 61 40 c8 ld8 [ %sp + 0x168 ], %r8 -+ f80002d8: 00 a0 84 89 mov %r18, %r9 -+ f80002dc: 69 ff ff 7f call f8000080 -+ f80002e0: 00 e0 86 8a mov %r27, %r10 -+ -+00000000f80002e4 : -+ f80002e4: 80 c0 14 c2 ld4 [ %r19 ], %r2 -+ f80002e8: 13 9a 10 89 srl8, byte %r2, %r19, %r9 -+ f80002ec: 20 74 12 89 sll8 %r9, 0x20, %r9 -+ f80002f0: 20 78 12 89 srl8 %r9, 0x20, %r9 -+ f80002f4: 40 b0 36 08 sethi %hi(0xdac10000), %r8 -+ f80002f8: 01 20 82 88 or %r8, 1, %r8 -+ f80002fc: 08 40 b2 00 cmp %r9, %r8 -+ f8000300: 06 00 40 03 be %xcc, f8000318 -+ f8000304: 90 60 85 08 add %r21, 0x90, %r8 -+ f8000308: 80 00 12 c8 ld4 [ %r8 ], %r8 -+ f800030c: 80 c0 96 c8 st4 %r8, [ %r27 ] -+ f8000310: bf 00 40 10 b f800060c -+ f8000314: a8 60 45 c8 ld8 [ %r21 + 0xa8 ], %r8 -+ -+00000000f8000318 : -+ f8000318: a0 23 44 c3 ld8 [ %r16 + 0x3a0 ], %r3 -+ f800031c: 01 60 46 88 and %r25, 1, %r8 -+ f8000320: 00 20 b5 00 cmp %r20, 0 -+ f8000324: 03 00 40 13 bne %xcc, f8000330 -+ f8000328: 14 00 82 09 add %r8, %r20, %r9 -+ f800032c: 01 20 82 09 add %r8, 1, %r9 -+ -+00000000f8000330 : -+ f8000330: 05 20 80 88 mov 5, %r8 -+ f8000334: 09 00 92 21 sub %r8, %r9, %r33 -+ f8000338: 07 74 18 8a sll8 %r33, 7, %r10 -+ f800033c: 0a 00 84 0b add %r16, %r10, %r11 -+ f8000340: 05 74 18 88 sll8 %r33, 5, %r8 -+ f8000344: 08 00 84 0c add %r16, %r8, %r12 -+ f8000348: 40 61 40 cd ld8 [ %sp + 0x140 ], %r13 -+ f800034c: 29 22 80 88 mov 0x229, %r8 -+ f8000350: 03 34 12 88 sll8 %r8, 3, %r8 -+ f8000354: c8 40 43 c9 ld8 [ %r13 + %r8 ], %r9 -+ f8000358: 03 b4 16 88 sll8 %r26, 3, %r8 -+ f800035c: c8 40 42 cf ld8 [ %r9 + %r8 ], %r15 -+ f8000360: 01 60 46 88 and %r25, 1, %r8 -+ f8000364: 00 20 b2 00 cmp %r8, 0 -+ f8000368: 1d 00 40 02 be f80003dc -+ f800036c: 00 23 83 0c add %r12, 0x300, %r12 -+ f8000370: ca 00 c4 d7 st8 %r23, [ %r16 + %r10 ] -+ f8000374: 40 c4 0f 09 sethi %hi(0x3f110000), %r9 -+ f8000378: 09 60 82 89 or %r9, 9, %r9 ! 3f110009 <*ABS*+0x3f110009> -+ f800037c: 08 e0 c2 c9 st8 %r9, [ %r11 + 8 ] -+ f8000380: 0a e0 83 88 or %r15, 0xa, %r8 -+ f8000384: 10 e0 c2 c8 st8 %r8, [ %r11 + 0x10 ] -+ f8000388: 81 00 10 08 sethi %hi(0x40020400), %r8 -+ f800038c: 0a 34 12 88 sll8 %r8, 0xa, %r8 -+ f8000390: 18 e0 c2 c8 st8 %r8, [ %r11 + 0x18 ] -+ f8000394: 1d e0 83 88 or %r15, 0x1d, %r8 -+ f8000398: 20 e0 c2 c8 st8 %r8, [ %r11 + 0x20 ] -+ f800039c: 28 e0 c2 d8 st8 %r24, [ %r11 + 0x28 ] -+ f80003a0: 24 e0 84 08 add %r19, 0x24, %r8 -+ f80003a4: 80 00 12 c2 ld4 [ %r8 ], %r2 -+ f80003a8: 08 9a 10 88 srl8, byte %r2, %r8, %r8 -+ f80003ac: 20 34 12 88 sll8 %r8, 0x20, %r8 -+ f80003b0: 20 38 12 88 srl8 %r8, 0x20, %r8 -+ f80003b4: 80 20 82 08 add %r8, 0x80, %r8 -+ f80003b8: 30 e0 c2 c8 st8 %r8, [ %r11 + 0x30 ] -+ f80003bc: 00 e1 80 08 add %r3, 0x100, %r8 -+ f80003c0: 38 e0 c2 c8 st8 %r8, [ %r11 + 0x38 ] -+ f80003c4: 40 e0 c2 d6 st8 %r22, [ %r11 + 0x40 ] -+ f80003c8: 48 e0 c2 cc st8 %r12, [ %r11 + 0x48 ] -+ f80003cc: c0 00 c3 df st8 %r31, [ %r12 ] -+ f80003d0: 20 e0 83 0f add %r15, 0x20, %r15 -+ f80003d4: 80 e0 82 0b add %r11, 0x80, %r11 -+ f80003d8: 20 20 83 0c add %r12, 0x20, %r12 -+ -+00000000f80003dc : -+ f80003dc: 00 20 b5 00 cmp %r20, 0 -+ f80003e0: 13 00 40 33 bne,a %xcc, f800042c -+ f80003e4: 00 20 80 8d mov %r0, %r13 -+ f80003e8: c0 c0 c2 d7 st8 %r23, [ %r11 ] -+ f80003ec: 40 c4 0f 0a sethi %hi(0x3f110000), %r10 -+ f80003f0: 09 a0 82 8a or %r10, 9, %r10 -+ f80003f4: 08 e0 c2 ca st8 %r10, [ %r11 + 8 ] -+ f80003f8: 0a e0 83 88 or %r15, 0xa, %r8 -+ f80003fc: 10 e0 c2 c8 st8 %r8, [ %r11 + 0x10 ] -+ f8000400: 00 04 00 08 sethi %hi(0x100000), %r8 -+ f8000404: 18 e0 c2 c8 st8 %r8, [ %r11 + 0x18 ] -+ f8000408: 1d e0 83 88 or %r15, 0x1d, %r8 -+ f800040c: 20 e0 c2 c8 st8 %r8, [ %r11 + 0x20 ] -+ f8000410: 28 e0 c2 d8 st8 %r24, [ %r11 + 0x28 ] -+ f8000414: 40 e0 c2 d6 st8 %r22, [ %r11 + 0x40 ] -+ f8000418: 48 e0 c2 cc st8 %r12, [ %r11 + 0x48 ] -+ f800041c: c0 00 c3 de st8 %r30, [ %r12 ] -+ f8000420: 00 20 80 92 mov %r0, %r18 -+ f8000424: 4b 00 40 10 b f8000550 -+ f8000428: 20 e0 83 0f add %r15, 0x20, %r15 -+ -+00000000f800042c : -+ f800042c: b8 23 84 08 add %r16, 0x3b8, %r8 -+ f8000430: 80 00 12 c2 ld4 [ %r8 ], %r2 -+ f8000434: 08 9a 10 8e srl8, byte %r2, %r8, %r14 -+ f8000438: 20 b4 13 8e sll8 %r14, 0x20, %r14 -+ f800043c: 20 b8 13 8e srl8 %r14, 0x20, %r14 -+ f8000440: 00 20 80 92 mov %r0, %r18 -+ f8000444: 14 80 b4 00 cmp %r18, %r20 -+ f8000448: 2d 00 40 1b bcc %xcc, f80004fc -+ f800044c: 40 c4 0f 08 sethi %hi(0x3f110000), %r8 -+ f8000450: 09 20 82 a3 or %r8, 9, %r35 -+ f8000454: 00 04 00 22 sethi %hi(0x100000), %r34 -+ f8000458: ff ff 3f 08 sethi %hi(0xfffffc00), %r8 -+ f800045c: e0 23 82 a0 or %r8, 0x3e0, %r32 -+ f8000460: 20 34 18 a0 sll8 %r32, 0x20, %r32 -+ f8000464: 10 23 88 a0 or %r32, 0x310, %r32 -+ -+00000000f8000468 : -+ f8000468: 0d 40 83 09 add %r13, %r13, %r9 -+ f800046c: 0d 40 82 09 add %r9, %r13, %r9 -+ f8000470: 02 74 12 89 sll8 %r9, 2, %r9 -+ f8000474: 09 c0 84 09 add %r19, %r9, %r9 -+ f8000478: 38 60 82 0a add %r9, 0x38, %r10 -+ f800047c: 80 80 12 c2 ld4 [ %r10 ], %r2 -+ f8000480: 0a 9a 10 8a srl8, byte %r2, %r10, %r10 -+ f8000484: 20 b4 12 8a sll8 %r10, 0x20, %r10 -+ f8000488: 20 b8 12 8a srl8 %r10, 0x20, %r10 -+ f800048c: c0 c0 c2 d7 st8 %r23, [ %r11 ] -+ f8000490: 08 e0 c2 e3 st8 %r35, [ %r11 + 8 ] -+ f8000494: 0a e0 83 88 or %r15, 0xa, %r8 -+ f8000498: 10 e0 c2 c8 st8 %r8, [ %r11 + 0x10 ] -+ f800049c: 20 b4 12 88 sll8 %r10, 0x20, %r8 -+ f80004a0: 22 00 82 88 or %r8, %r34, %r8 -+ f80004a4: 18 e0 c2 c8 st8 %r8, [ %r11 + 0x18 ] -+ f80004a8: 1d e0 83 88 or %r15, 0x1d, %r8 -+ f80004ac: 20 e0 c2 c8 st8 %r8, [ %r11 + 0x20 ] -+ f80004b0: 28 e0 c2 d8 st8 %r24, [ %r11 + 0x28 ] -+ f80004b4: 34 60 82 09 add %r9, 0x34, %r9 -+ f80004b8: 80 40 12 c2 ld4 [ %r9 ], %r2 -+ f80004bc: 09 9a 10 89 srl8, byte %r2, %r9, %r9 -+ f80004c0: 20 74 12 89 sll8 %r9, 0x20, %r9 -+ f80004c4: 20 78 12 89 srl8 %r9, 0x20, %r9 -+ f80004c8: 30 e0 c2 c9 st8 %r9, [ %r11 + 0x30 ] -+ f80004cc: 38 e0 c2 ce st8 %r14, [ %r11 + 0x38 ] -+ f80004d0: 40 e0 c2 d6 st8 %r22, [ %r11 + 0x40 ] -+ f80004d4: 48 e0 c2 cc st8 %r12, [ %r11 + 0x48 ] -+ f80004d8: c0 00 c3 e0 st8 %r32, [ %r12 ] -+ f80004dc: 0a 80 83 0e add %r14, %r10, %r14 -+ f80004e0: 0a 80 84 12 add %r18, %r10, %r18 -+ f80004e4: 20 e0 83 0f add %r15, 0x20, %r15 -+ f80004e8: 80 e0 82 0b add %r11, 0x80, %r11 -+ f80004ec: 01 60 83 0d inc %r13 -+ f80004f0: 14 40 b3 00 cmp %r13, %r20 -+ f80004f4: dd ff 7f 0b bcs %xcc, f8000468 -+ f80004f8: 20 20 83 0c add %r12, 0x20, %r12 -+ -+00000000f80004fc : -+ f80004fc: e0 3f 83 0c add %r12, -32, %r12 -+ f8000500: c0 00 c3 dd st8 %r29, [ %r12 ] -+ f8000504: bc 23 84 08 add %r16, 0x3bc, %r8 -+ f8000508: 80 00 12 c2 ld4 [ %r8 ], %r2 -+ f800050c: 08 9a 10 88 srl8, byte %r2, %r8, %r8 -+ f8000510: 20 34 12 88 sll8 %r8, 0x20, %r8 -+ f8000514: 20 38 12 88 srl8 %r8, 0x20, %r8 -+ f8000518: 12 00 b2 00 cmp %r8, %r18 -+ f800051c: 0e 00 40 1b bcc %xcc, f8000554 -+ f8000520: 07 74 18 88 sll8 %r33, 7, %r8 -+ f8000524: 00 60 88 8d mov %r33, %r13 -+ f8000528: 04 60 b8 00 cmp %r33, 4 -+ f800052c: 08 00 40 19 bgu %xcc, f800054c -+ f8000530: 08 00 84 0b add %r16, %r8, %r11 -+ f8000534: 00 04 00 08 sethi %hi(0x100000), %r8 -+ -+00000000f8000538 : -+ f8000538: 18 e0 c2 c8 st8 %r8, [ %r11 + 0x18 ] -+ f800053c: 01 60 83 0d inc %r13 -+ f8000540: 04 60 b3 00 cmp %r13, 4 -+ f8000544: fd ff 7f 09 bleu %xcc, f8000538 -+ f8000548: 80 e0 82 0b add %r11, 0x80, %r11 -+ -+00000000f800054c : -+ f800054c: fc 3f 80 92 mov -4, %r18 -+ -+00000000f8000550 : -+ f8000550: 07 74 18 88 sll8 %r33, 7, %r8 -+ -+00000000f8000554 : -+ f8000554: 08 00 84 08 add %r16, %r8, %r8 -+ f8000558: 60 61 40 cb ld8 [ %sp + 0x160 ], %r11 -+ f800055c: c0 00 62 e3 ld64 [ %r8 ], %r32 -+ f8000560: 40 20 62 f3 ld64 [ %r8 + 0x40 ], %r48 -+ f8000564: c0 c0 e2 e3 st64 %r32, [ %r11 ] -+ f8000568: c0 c0 e2 f3 st64 %r48, [ %r11 ] -+ f800056c: 98 22 44 c8 ld8 [ %r16 + 0x298 ], %r8 -+ f8000570: 88 60 c5 c8 st8 %r8, [ %r21 + 0x88 ] -+ f8000574: 88 60 45 c8 ld8 [ %r21 + 0x88 ], %r8 -+ f8000578: 00 20 b2 00 cmp %r8, 0 -+ f800057c: 05 00 40 33 bne,a %xcc, f8000590 -+ f8000580: d8 22 94 c0 st4 %r0, [ %r16 + 0x2d8 ] -+ f8000584: 88 60 85 08 add %r21, 0x88, %r8 -+ f8000588: 80 60 c5 c8 st8 %r8, [ %r21 + 0x80 ] -+ f800058c: d8 22 94 c0 st4 %r0, [ %r16 + 0x2d8 ] -+ -+00000000f8000590 : -+ f8000590: 04 60 46 88 and %r25, 4, %r8 -+ f8000594: 00 20 b2 00 cmp %r8, 0 -+ f8000598: 0c 00 40 02 be f80005c8 -+ f800059c: dc 22 94 c0 st4 %r0, [ %r16 + 0x2dc ] -+ f80005a0: c0 c0 64 e3 ld64 [ %r19 ], %r32 -+ f80005a4: 40 e0 64 f3 ld64 [ %r19 + 0x40 ], %r48 -+ f80005a8: c0 c0 e0 e3 st64 %r32, [ %r3 ] -+ f80005ac: 80 e0 64 e3 ld64 [ %r19 + 0x80 ], %r32 -+ f80005b0: 40 e0 e0 f3 st64 %r48, [ %r3 + 0x40 ] -+ f80005b4: c0 e0 64 f3 ld64 [ %r19 + 0xc0 ], %r48 -+ f80005b8: 80 e0 e0 e3 st64 %r32, [ %r3 + 0x80 ] -+ f80005bc: c0 e0 e0 f3 st64 %r48, [ %r3 + 0xc0 ] -+ f80005c0: 07 00 40 10 b f80005dc -+ f80005c4: 88 e2 80 08 add %r3, 0x288, %r8 -+ -+00000000f80005c8 : -+ f80005c8: c0 c0 64 e3 ld64 [ %r19 ], %r32 -+ f80005cc: 40 e0 64 f3 ld64 [ %r19 + 0x40 ], %r48 -+ f80005d0: c0 c0 e0 e3 st64 %r32, [ %r3 ] -+ f80005d4: 40 e0 e0 f3 st64 %r48, [ %r3 + 0x40 ] -+ f80005d8: 88 e2 80 08 add %r3, 0x288, %r8 -+ -+00000000f80005dc : -+ f80005dc: 08 96 14 82 sll8, byte %r18, %r8, %r2 -+ f80005e0: 80 00 92 c2 st4 %r2, [ %r8 ] -+ f80005e4: 40 61 40 cd ld8 [ %sp + 0x140 ], %r13 -+ f80005e8: 29 22 80 88 mov 0x229, %r8 -+ f80005ec: 03 34 12 88 sll8 %r8, 3, %r8 -+ f80005f0: c8 40 43 c9 ld8 [ %r13 + %r8 ], %r9 -+ f80005f4: 03 b4 16 88 sll8 %r26, 3, %r8 -+ f80005f8: c8 40 c2 cf st8 %r15, [ %r9 + %r8 ] -+ f80005fc: 90 60 85 08 add %r21, 0x90, %r8 -+ f8000600: 80 00 12 c8 ld4 [ %r8 ], %r8 -+ f8000604: 80 c0 96 c8 st4 %r8, [ %r27 ] -+ -+00000000f8000608 : -+ f8000608: a8 60 45 c8 ld8 [ %r21 + 0xa8 ], %r8 -+ -+00000000f800060c : -+ f800060c: 08 40 b4 00 cmp %r17, %r8 -+ f8000610: 03 00 40 13 bne %xcc, f800061c -+ f8000614: 00 61 84 11 add %r17, 0x100, %r17 -+ f8000618: a0 60 45 d1 ld8 [ %r21 + 0xa0 ], %r17 -+ -+00000000f800061c : -+ f800061c: 98 60 45 c8 ld8 [ %r21 + 0x98 ], %r8 -+ f8000620: 00 20 b2 00 cmp %r8, 0 -+ f8000624: 04 00 40 13 bne %xcc, f8000634 -+ f8000628: 01 20 87 1c inc %r28 -+ f800062c: 58 61 40 c9 ld8 [ %sp + 0x158 ], %r9 -+ f8000630: 08 60 c2 d1 st8 %r17, [ %r9 + 8 ] -+ -+00000000f8000634 : -+ f8000634: fe 21 b7 00 cmp %r28, 0x1fe -+ f8000638: 0f 00 40 35 bg,a %xcc, f8000674 -+ f800063c: 98 60 45 cb ld8 [ %r21 + 0x98 ], %r11 -+ f8000640: 00 00 10 88 breaktest %r8 -+ f8000644: 02 20 42 88 and %r8, 2, %r8 -+ f8000648: 00 20 b2 00 cmp %r8, 0 -+ f800064c: 05 00 40 03 be %xcc, f8000660 -+ f8000650: 58 61 40 ca ld8 [ %sp + 0x158 ], %r10 -+ f8000654: 18 00 00 40 call f80006b4 -+ f8000658: 60 61 40 c8 ld8 [ %sp + 0x160 ], %r8 -+ f800065c: 58 61 40 ca ld8 [ %sp + 0x158 ], %r10 -+ -+00000000f8000660 : -+ f8000660: c0 80 42 c8 ld8 [ %r10 ], %r8 -+ f8000664: 08 40 b4 00 cmp %r17, %r8 -+ f8000668: dc fe 7f 13 bne %xcc, f80001d8 -+ f800066c: 00 60 84 93 mov %r17, %r19 -+ -+00000000f8000670 : -+ f8000670: 98 60 45 cb ld8 [ %r21 + 0x98 ], %r11 -+ -+00000000f8000674 : -+ f8000674: 00 e0 b2 00 cmp %r11, 0 -+ f8000678: c5 fe 7f 03 be %xcc, f800018c -+ f800067c: 1c 00 90 0a neg %r28, %r10 -+ f8000680: 60 61 40 c8 ld8 [ %sp + 0x160 ], %r8 -+ f8000684: 60 60 85 09 add %r21, 0x60, %r9 -+ f8000688: 38 00 00 40 call f8000768 -+ f800068c: e0 3f 80 8a mov -32, %r10 -+ f8000690: 58 61 40 cb ld8 [ %sp + 0x158 ], %r11 -+ f8000694: 08 e0 c2 d1 st8 %r17, [ %r11 + 8 ] -+ f8000698: bc fe 7f 10 b f8000188 -+ f800069c: 01 20 87 1c inc %r28 -+ f80006a0: c0 60 40 c7 ld8 [ %sp + 0xc0 ], %r7 -+ f80006a4: 40 60 60 d3 ld64 [ %sp + 0x40 ], %r16 -+ f80006a8: 80 60 60 db ld64 [ %sp + 0x80 ], %r24 -+ f80006ac: 08 e0 01 80 retl -+ f80006b0: 40 61 80 01 add %sp, 0x140, %sp -+ -+00000000f80006b4 : -+ f80006b4: 80 7f 80 01 add %sp, -128, %sp -+ f80006b8: c0 40 e0 d3 st64 %r16, [ %sp ] -+ f80006bc: 40 60 e0 db st64 %r24, [ %sp + 0x40 ] -+ f80006c0: 00 20 84 90 mov %r16, %r16 -+ f80006c4: 00 20 86 98 mov %r24, %r24 -+ f80006c8: 00 20 80 80 nop -+ f80006cc: 00 20 80 80 nop -+ f80006d0: 00 e0 81 92 mov %r7, %r18 -+ f80006d4: 02 00 00 40 call f80006dc -+ f80006d8: 00 60 80 91 mov %sp, %r17 -+ f80006dc: 14 e0 81 10 add %r7, 0x14, %r16 -+ f80006e0: 17 20 80 97 mov 0x17, %r23 -+ f80006e4: c0 00 f2 d3 st64suspend %r16, [ %r8 ] -+ f80006e8: c0 40 60 d3 ld64 [ %sp ], %r16 -+ f80006ec: 40 60 60 db ld64 [ %sp + 0x40 ], %r24 -+ f80006f0: 08 a0 00 80 jmp %r2 + 8 -+ f80006f4: 80 60 80 01 add %sp, 0x80, %sp -+ -+00000000f80006f8 : -+ f80006f8: 40 7f 80 01 add %sp, -192, %sp -+ f80006fc: 40 60 e0 d3 st64 %r16, [ %sp + 0x40 ] -+ f8000700: 80 60 e0 db st64 %r24, [ %sp + 0x80 ] -+ f8000704: 00 20 84 90 mov %r16, %r16 -+ f8000708: 00 20 86 98 mov %r24, %r24 -+ f800070c: 00 20 80 80 nop -+ f8000710: 00 20 80 80 nop -+ f8000714: 00 e0 81 92 mov %r7, %r18 -+ f8000718: 02 00 00 40 call f8000720 -+ f800071c: 00 60 80 91 mov %sp, %r17 -+ f8000720: 40 e0 81 10 add %r7, 0x40, %r16 -+ f8000724: c0 40 e0 d1 st32 %r16, [ %sp ] -+ f8000728: 37 20 80 97 mov 0x37, %r23 -+ f800072c: 38 60 c0 d7 st8 %r23, [ %sp + 0x38 ] -+ f8000730: 00 20 84 90 mov %r16, %r16 -+ f8000734: 00 e0 85 97 mov %r23, %r23 -+ f8000738: 00 20 80 80 nop -+ f800073c: 00 20 80 80 nop -+ f8000740: 1f 60 82 90 or %r9, 0x1f, %r16 -+ f8000744: 20 b4 12 91 sll8 %r10, 0x20, %r17 -+ f8000748: 08 63 84 91 or %r17, 0x308, %r17 -+ f800074c: 00 60 80 92 mov %sp, %r18 -+ f8000750: 00 20 82 93 mov %r8, %r19 -+ f8000754: c0 00 f2 d1 st32suspend %r16, [ %r8 ] -+ f8000758: 40 60 60 d3 ld64 [ %sp + 0x40 ], %r16 -+ f800075c: 80 60 60 db ld64 [ %sp + 0x80 ], %r24 -+ f8000760: 08 a0 00 80 jmp %r2 + 8 -+ f8000764: c0 60 80 01 add %sp, 0xc0, %sp -+ -+00000000f8000768 : -+ f8000768: 40 7f 80 01 add %sp, -192, %sp -+ f800076c: 40 60 e0 d3 st64 %r16, [ %sp + 0x40 ] -+ f8000770: 80 60 e0 db st64 %r24, [ %sp + 0x80 ] -+ f8000774: 00 20 84 90 mov %r16, %r16 -+ f8000778: 00 20 86 98 mov %r24, %r24 -+ f800077c: 00 20 80 80 nop -+ f8000780: 00 20 80 80 nop -+ f8000784: 00 e0 81 92 mov %r7, %r18 -+ f8000788: 02 00 00 40 call f8000790 -+ f800078c: 00 60 80 91 mov %sp, %r17 -+ f8000790: 54 e0 81 10 add %r7, 0x54, %r16 -+ f8000794: c0 40 e0 d1 st32 %r16, [ %sp ] -+ f8000798: 37 20 80 97 mov 0x37, %r23 -+ f800079c: 38 60 c0 d7 st8 %r23, [ %sp + 0x38 ] -+ f80007a0: 00 20 84 90 mov %r16, %r16 -+ f80007a4: 00 e0 85 97 mov %r23, %r23 -+ f80007a8: 00 20 80 80 nop -+ f80007ac: 00 20 80 80 nop -+ f80007b0: 1f 60 82 90 or %r9, 0x1f, %r16 -+ f80007b4: 20 b4 12 91 sll8 %r10, 0x20, %r17 -+ f80007b8: 08 63 84 91 or %r17, 0x308, %r17 -+ f80007bc: 00 60 80 92 mov %sp, %r18 -+ f80007c0: 00 20 82 93 mov %r8, %r19 -+ f80007c4: 0e f4 12 94 sll8 %r11, 0xe, %r20 -+ f80007c8: 0d 20 85 94 or %r20, 0xd, %r20 -+ f80007cc: 17 20 80 95 mov 0x17, %r21 -+ f80007d0: 17 20 80 96 mov 0x17, %r22 -+ f80007d4: 17 20 80 97 mov 0x17, %r23 -+ f80007d8: c0 00 f2 d3 st64suspend %r16, [ %r8 ] -+ f80007dc: 40 60 60 d3 ld64 [ %sp + 0x40 ], %r16 -+ f80007e0: 80 60 60 db ld64 [ %sp + 0x80 ], %r24 -+ f80007e4: 08 a0 00 80 jmp %r2 + 8 -+ f80007e8: c0 60 80 01 add %sp, 0xc0, %sp -+ -+00000000f80007ec : -+ f80007ec: 01 20 80 97 mov 1, %r23 -+ f80007f0: b0 e3 c0 d7 st8 %r23, [ %r3 + 0x3b0 ] -+ f80007f4: 80 a0 60 d2 ld16 [ %r2 + 0x80 ], %r18 -+ f80007f8: 98 e2 80 04 add %r3, 0x298, %r4 -+ f80007fc: 98 e2 c0 c0 st8 %r0, [ %r3 + 0x298 ] -+ f8000800: 80 a0 c0 c4 st8 %r4, [ %r2 + 0x80 ] -+ f8000804: c0 80 c4 c3 st8 %r3, [ %r18 ] -+ f8000808: 01 e0 b4 00 cmp %r19, 1 -+ f800080c: 06 00 40 02 be f8000824 -+ f8000810: 00 a0 84 90 mov %r18, %r16 -+ f8000814: 00 e0 80 97 mov %r3, %r23 -+ f8000818: 02 20 80 97 mov 2, %r23 -+ f800081c: b0 e3 c0 d7 st8 %r23, [ %r3 + 0x3b0 ] -+ f8000820: d8 e2 d0 d0 st8suspend %r16, [ %r3 + 0x2d8 ] -+ f8000824: d8 e2 c0 d0 st8 %r16, [ %r3 + 0x2d8 ] -+ f8000828: 03 20 80 97 mov 3, %r23 -+ f800082c: b0 e3 c0 d7 st8 %r23, [ %r3 + 0x3b0 ] -+ f8000830: 00 e0 80 88 mov %r3, %r8 -+ f8000834: 0e 00 40 10 b f800086c <.epcomms_resume_thread> -+ f8000838: 00 a0 60 c3 ld64 [ %r2 ], %r0 -+ -+00000000f800083c : -+ f800083c: 40 7f 80 01 add %sp, -192, %sp -+ f8000840: 40 60 e0 d3 st64 %r16, [ %sp + 0x40 ] -+ f8000844: 80 60 e0 db st64 %r24, [ %sp + 0x80 ] -+ f8000848: 00 20 84 90 mov %r16, %r16 -+ f800084c: 00 20 86 98 mov %r24, %r24 -+ f8000850: 00 20 80 80 nop -+ f8000854: 00 20 80 80 nop -+ f8000858: 01 20 80 89 mov 1, %r9 -+ f800085c: 88 20 c2 c9 st8 %r9, [ %r8 + 0x88 ] -+ f8000860: 00 60 80 91 mov %sp, %r17 -+ f8000864: 00 e0 81 97 mov %r7, %r23 -+ f8000868: 00 20 f2 d3 st64suspend %r16, [ %r8 ] -+ -+00000000f800086c <.epcomms_resume_thread>: -+ f800086c: 40 60 60 d3 ld64 [ %sp + 0x40 ], %r16 -+ f8000870: 80 60 60 db ld64 [ %sp + 0x80 ], %r24 -+ f8000874: 08 e0 01 80 retl -+ f8000878: c0 60 80 01 add %sp, 0xc0, %sp -+Disassembly of section .data: -diff -urN clean/drivers/net/qsnet/jtag/jtagdrv.c linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv.c ---- clean/drivers/net/qsnet/jtag/jtagdrv.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv.c 2003-06-07 12:02:35.000000000 -0400 -@@ -0,0 +1,451 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: jtagdrv.c,v 1.12 2003/06/07 16:02:35 david Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv.c,v $*/ -+ -+#include -+ -+#include "jtagdrv.h" -+#include -+ -+int -+jtagdrv_strobe_data (JTAG_DEV *dev, u_char data) -+{ -+ u_char dsr; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_strobe_data: %s %s %s -> ", (data & LPT_DATA_TRST) ? "TRST" : "trst", -+ (data & LPT_DATA_TDI) ? "TDI" : "tdi", (data & LPT_DATA_TMS) ? "TMS" : "tms")); -+ -+ -+ LPT_WRITE_DATA (dev, data); DELAY(5); /* Drive NEW values on data wires */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_TCLK); DELAY(5); /* Drive strobe low */ -+ LPT_READ_STAT (dev, dsr); DELAY(5); /* Sample TDI from ring */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe high */ -+ -+ PRINTF (DBG_ECPP, ("%s\n", (dsr & LPT_STAT_PE) ? "TDO" : "tdo")); -+ -+ return ((dsr & LPT_STAT_PE) ? 1 : 0); -+} -+ -+void -+jtagdrv_select_ring (JTAG_DEV *dev, u_int ring) -+{ -+ PRINTF (DBG_ECPP, ("jtagdrv_select_ring: ring=0x%x\n", ring)); -+ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe and TCLK high */ -+ LPT_WRITE_DATA (dev, ring); DELAY(5); /* Drive ring address */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_RCLK); DELAY(5); /* Drive strobe low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe high */ -+} -+ -+void -+jtagdrv_reset (JTAG_DEV *dev) -+{ -+ register int i; -+ -+ for (i = 0; i < 5; i++) -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* 5 clocks to Reset from any state */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+void -+jtagdrv_shift_ir (JTAG_DEV *dev, u_char *value, int nbits) -+{ -+ register int i; -+ register int bit; -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select DR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select IR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Capture-IR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Shift-IR */ -+ -+ for (i = 0; i < nbits; i++) -+ { -+ /* strobe through the instruction bits, asserting TMS on the last bit */ -+ -+ if (i == (nbits-1)) -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ else -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ -+ if (bit) -+ JTAG_SET_BIT(value, i); -+ else -+ JTAG_CLR_BIT(value, i); -+ } -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Update-IR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+ -+void -+jtagdrv_shift_dr (JTAG_DEV *dev, u_char *value, int nbits) -+{ -+ register int i; -+ register int bit; -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select DR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Capture-DR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Shift-DR */ -+ -+ for (i = 0; i < nbits; i++) -+ { -+ /* strobe through the data bits, asserting TMS on the last bit */ -+ -+ if (i == (nbits-1)) -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ else -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ -+ if (bit) -+ JTAG_SET_BIT(value, i); -+ else -+ JTAG_CLR_BIT(value, i); -+ } -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Update-DR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+static int -+jtagdrv_i2c_start (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start\n")); -+ -+ /* Issue a stop sequence */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ -+ /* sample the line to see if we're idle */ -+ LPT_READ_STAT (dev, dsr); /* sample SDA */ -+ if ((dsr & LPT_STAT_SDA) == 0) /* Cannot start if SDA already driven */ -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start: cannot start - sda driven low\n")); -+ -+ for (i = 0; i < 16 ; i++) -+ { -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(5); /* SCLK low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); -+ -+ if (dsr & LPT_STAT_SDA) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start - stopped after %d clocks\n", i)); -+ break; -+ } -+ } -+ -+ if ((dsr & LPT_STAT_SDA) == 0) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start - cannot start - not idle\n")); -+ return (0); -+ } -+ -+ /* seen SDA float high, so issue a stop sequence */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ } -+ -+ LPT_WRITE_DATA (dev, 0); DELAY(4); /* drive SDA low */ -+ return (1); -+} -+ -+static void -+jtagdrv_i2c_stop (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop\n")); -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ -+ /* -+ * bug fix for temperature sensor chip -+ * if it's still driving SDA, then clock -+ * it until it stops driving it -+ */ -+ LPT_READ_STAT (dev, dsr); -+ if ((dsr & LPT_STAT_SDA) == 0) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop - slave not stodeved\n")); -+ for (i = 0; i < 16 ; i++) -+ { -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(5); /* SCLK low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); -+ -+ if (dsr & LPT_STAT_SDA) -+ break; -+ } -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop - stodeved after %d clocks\n", i)); -+ } -+} -+ -+static int -+jtagdrv_i2c_strobe (JTAG_DEV *dev, u_char data) -+{ -+ u_char dsr; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_strobe : %s", (data & LPT_DATA_SDA) ? "SDA" : "sda")); -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, data); DELAY(5); /* write data */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA */ -+ -+ PRINTF (DBG_ECPP, (" -> %s\n", (dsr & LPT_STAT_SDA) ? "SDA" : "sda")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 1 : 0); -+} -+ -+static int -+jtagdrv_i2c_get_ack (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA */ -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_get_ack -> %s\n", (dsr & LPT_STAT_SDA) ? "no ack" : "ack")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 0 : 1); -+} -+ -+static int -+jtagdrv_i2c_drive_ack (JTAG_DEV *dev, int nack) -+{ -+ u_char dsr; -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, nack ? LPT_DATA_SDA : 0); DELAY(5); /* SDA low for ack, high for nack */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA for ack */ -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_drive_ack %d -> %s\n", nack, (dsr & LPT_STAT_SDA) ? "done" : "more")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 1 : 0); -+} -+ -+static void -+jtagdrv_i2c_shift_addr (JTAG_DEV *dev, u_int address, int readNotWrite) -+{ -+ register int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_addr: %x\n", address)); -+ -+ for (i = I2C_ADDR_LEN-1; i >= 0; i--) -+ jtagdrv_i2c_strobe (dev, (address & (1 << i)) ? LPT_DATA_SDA : 0); -+ -+ jtagdrv_i2c_strobe (dev, readNotWrite ? LPT_DATA_SDA : 0); -+} -+ -+static u_char -+jtagdrv_i2c_shift_data (JTAG_DEV *dev, u_char data) -+{ -+ register int i; -+ u_char val = 0; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_data : %02x\n", data)); -+ -+ for (i = I2C_DATA_LEN-1; i >= 0; i--) -+ if (jtagdrv_i2c_strobe (dev, data & (1 << i) ? LPT_DATA_SDA : 0)) -+ val |= (1 << i); -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_data : -> %02x\n", val)); -+ -+ return (val); -+} -+ -+int -+jtagdrv_i2c_write (JTAG_DEV *dev, u_int address, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: address=%x count=%d data=%02x\n", address, count, data[0])); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ jtagdrv_i2c_shift_data (dev, data[i]); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: no ack on data phase %d\n", i)); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_WRITE_TO_BIG); -+ } -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_read (JTAG_DEV *dev, u_int address, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_read: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 1); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_read: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ data[i] = jtagdrv_i2c_shift_data (dev, 0xff); -+ -+ jtagdrv_i2c_drive_ack (dev, (i == (count-1) ? 1 : 0)); -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_writereg (JTAG_DEV *dev, u_int address, u_int intaddress, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_shift_data (dev, intaddress); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: no ack on intaddress phase\n")); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ jtagdrv_i2c_shift_data (dev, data[i]); -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writedate: no ack on byte %d\n", i)); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_WRITE_TO_BIG); -+ } -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_readreg (JTAG_DEV *dev, u_int address, u_int intaddress, u_int count, u_char *data) -+{ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_shift_data (dev, intaddress); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: no ack on intaddress phase\n")); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ -+ return (jtagdrv_i2c_read (dev, address, count, data)); -+} -+ -+void -+jtagdrv_i2c_clock_shift (JTAG_DEV *dev, u_int t, u_int n, u_int m) -+{ -+ int i; -+ -+ for (i = 2; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((t & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((t & (1 << i)) ? LPT_DATA_TDI : 0) | LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ for (i = 1; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((n & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((n & (1 << i)) ? LPT_DATA_TDI : 0)| LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ for (i = 6; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((m & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((m & (1 << i)) ? LPT_DATA_TDI : 0) | LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ LPT_WRITE_DATA (dev, 0); DELAY(1); /* clock low | 0 */ -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_TCLK); DELAY(1); /* strobe low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(1); /* strobe low */ -+} -+ -diff -urN clean/drivers/net/qsnet/jtag/jtagdrv.h linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv.h ---- clean/drivers/net/qsnet/jtag/jtagdrv.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv.h 2002-08-09 07:18:37.000000000 -0400 -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __JTAGDRV_COMMON_H -+#define __JTAGDRV_COMMON_H -+ -+#ident "@(#)$Id: jtagdrv.h,v 1.5 2002/08/09 11:18:37 addy Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv.h,v $*/ -+ -+#include -+ -+/* include OS specific header file */ -+#if defined(LINUX) -+# include "jtagdrv_Linux.h" -+#elif defined(DIGITAL_UNIX) -+# include "jtagdrv_OSF1.h" -+#elif defined(QNX) -+# include "jtagdrv_QNX.h" -+#else -+# error cannot determint os type -+#endif -+ -+extern int jtagdebug; -+ -+#define DBG_CFG (1 << 0) -+#define DBG_OPEN (1 << 1) -+#define DBG_IOCTL (1 << 2) -+#define DBG_ECPP (1 << 3) -+#define DBG_FN (1 << 4) -+ -+#define DRIVER_NAME "jtag" -+ -+#if defined(LINUX) -+#define PRINTF(n,X) ((n) & jtagdebug ? (void) printk X : (void) 0) -+#define PRINTMSG(fmt, arg...) printk(KERN_INFO DRIVER_NAME ": " fmt, ##arg) -+#else -+#define PRINTF(n,X) ((n) & jtagdebug ? (void) printf X : (void) 0) -+#define PRINTMSG(M, A) printf ("jtag: " M, A) -+#endif -+ -+extern void jtagdrv_select_ring (JTAG_DEV *pp, u_int ring); -+extern void jtagdrv_reset (JTAG_DEV *pp); -+extern void jtagdrv_shift_ir (JTAG_DEV *pp, u_char *value, int nbits); -+extern void jtagdrv_shift_dr (JTAG_DEV *pp, u_char *value, int nbits); -+ -+extern int jtagdrv_i2c_write (JTAG_DEV *pp, u_int address, u_int count, u_char *data); -+extern int jtagdrv_i2c_read (JTAG_DEV *pp, u_int address, u_int count, u_char *data); -+extern int jtagdrv_i2c_writereg (JTAG_DEV *pp, u_int address, u_int intaddress, u_int count, u_char *data); -+extern int jtagdrv_i2c_readreg (JTAG_DEV *pp, u_int address, u_int intaddress, u_int count, u_char *data); -+extern void jtagdrv_i2c_clock_shift (JTAG_DEV *pp, u_int t, u_int n, u_int m); -+ -+ -+#endif /* __JTAGDRV_COMMON_H */ -diff -urN clean/drivers/net/qsnet/jtag/jtagdrv_Linux.c linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv_Linux.c ---- clean/drivers/net/qsnet/jtag/jtagdrv_Linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv_Linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,326 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * $Id: jtagdrv_Linux.c,v 1.19.2.3 2005/09/07 14:35:03 mike Exp $ -+ * $Source: /cvs/master/quadrics/jtagmod/jtagdrv_Linux.c,v $ -+ */ -+ -+#include "jtagdrv.h" -+#include -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("JTAG Parallel port QsNet switch interface"); -+ -+MODULE_LICENSE("GPL"); -+ -+#define MAJOR_INSTANCE 0 /* 0 is dynamic assign of device major */ -+#define MAX_JTAG_DEV 4 -+ -+int jtag_major = MAJOR_INSTANCE; -+int jtagdebug = 0; -+module_param(jtag_major, uint, 0); -+module_param(jtagdebug, uint, 0); -+ -+JTAG_DEV jtag_devs[MAX_JTAG_DEV]; -+ -+int io[MAX_JTAG_DEV]= { 0, }; -+MODULE_PARM(io, "1-4i"); -+ -+ -+/* The fops functions */ -+int jtag_open(struct inode *, struct file *); -+int jtag_close(struct inode *, struct file *); -+int jtag_ioctl(struct inode *, struct file *, unsigned int, unsigned long ); -+ -+struct file_operations jtag_fops = { -+ ioctl: jtag_ioctl, -+ open: jtag_open, -+ release: jtag_close, -+}; -+ -+int -+jtag_probe(void) -+{ -+ int i=0; -+ int default_io = 1; -+ JTAG_DEV *dev; -+ unsigned char value=0xff; -+ -+ -+ /* see if there are any user supplied io addr */ -+ for ( i = 0; i < MAX_JTAG_DEV; i++) { -+ if ( io[i] != 0x00) -+ default_io = 0; -+ jtag_devs[i].base = io[i]; -+ } -+ -+ if ( default_io ) { -+ jtag_devs[0].base = 0x3bc; -+ jtag_devs[1].base = 0x378; -+ jtag_devs[2].base = 0x278; -+ jtag_devs[3].base = 0x268; -+ } -+ -+ for ( i = 0 ; i < MAX_JTAG_DEV; i++) { -+ if ( jtag_devs[i].base == 0x3bc ) -+ jtag_devs[i].region = 3; -+ else -+ jtag_devs[i].region = 8; -+ jtag_devs[i].present = 0; -+ } -+ -+ -+ if( default_io ) -+ { -+ for( i = 0 ; i < MAX_JTAG_DEV; i++) { -+ dev=&(jtag_devs[i]); -+ if(dev->base && request_region(dev->base, dev->region, "jtag")) { -+ LPT_WRITE(dev, 0,0); -+ LPT_READ(dev, 0,value); -+ if ( value != 0xff) { -+ PRINTMSG("(%d , %d) present, io=0x%04lx\n",jtag_major,i,dev->base); -+ -+ dev->present=1; -+ } -+ else -+ release_region(dev->base, dev->region); -+ } -+ else -+ { -+ PRINTMSG("failed to request_region (%d , %d), io=0x%04lx\n",jtag_major,i,dev->base); -+ return -1; -+ } -+ } -+ return 0; -+ } -+ else /* Force the region to be present, this makes the PCI parallel cards work */ -+ { -+ for( i = 0 ; i < MAX_JTAG_DEV; i++) -+ { -+ dev=&(jtag_devs[i]); -+ if(dev->base && request_region(dev->base, dev->region, "jtag") && (dev->base != 0)) -+ { -+ PRINTMSG("(%d , %d) forced by user, io=0x%04lx\n",jtag_major,i,dev->base); -+ dev->present=1; -+ } -+ else -+ { -+ if( dev->base != 0) -+ release_region(dev->base, dev->region); -+ } -+ } -+ return 0; -+ } -+} -+ -+int init_module(void) -+{ -+ int result,i; -+ result = register_chrdev(jtag_major, DRIVER_NAME, &jtag_fops); -+ if (result < 0) { -+ PRINTMSG("Couldn't register char device err == %d\n",jtag_major); -+ return -1; -+ } -+ -+ if ( jtag_major == 0 ) -+ jtag_major = result; -+ -+ for ( i = 0; i < MAX_JTAG_DEV; i++) { -+ jtag_devs[i].base=io[i]; -+ } -+ -+ jtag_probe(); -+ -+ PRINTMSG("Registered character device, major == %d\n",jtag_major); -+ return 0; -+} -+ -+void cleanup_module(void) -+{ -+ int i=0; -+ -+ for( i = 0; i < MAX_JTAG_DEV; i++) { -+ if( jtag_devs[i].present) -+ release_region(jtag_devs[i].base, jtag_devs[i].region); -+ } -+ -+ unregister_chrdev(jtag_major, DRIVER_NAME); -+ PRINTMSG("Unloaded char device\n"); -+} -+ -+ -+int -+jtag_open (struct inode *inode, struct file *filp) -+{ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ /* -+ * Only allow a single open at a time -+ */ -+ if (dev->open) -+ return (-EBUSY); -+ dev->open = 1; -+ -+ /* -+ * Initialise the hardware registers -+ */ -+ -+ LPT_WRITE (dev, LPT_CTRL, 0); -+ DELAY(50); -+ LPT_WRITE (dev, LPT_CTRL, LPT_CTRL_INIT); -+ -+ MOD_INC_USE_COUNT; -+ -+ return (0); -+} -+ -+int -+jtag_close(struct inode *inode, struct file *filp) -+{ -+ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ dev->open = 0; -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (0); -+} -+ -+int -+jtag_ioctl (struct inode *inode, struct file *filp, unsigned int io_cmd, unsigned long io_data) -+{ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ JTAG_RESET_ARGS *resetargs; -+ JTAG_SHIFT_ARGS *shiftargs; -+ I2C_ARGS *i2cargs; -+ I2C_CLOCK_SHIFT_ARGS *clockargs; -+ u_char *buf; -+ int freq; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ PRINTF (DBG_IOCTL, ("jtag_ioctl: device %d cmd=%x\n", unit, io_cmd)); -+ -+ switch (io_cmd) -+ { -+ case JTAG_RESET: -+ resetargs = (JTAG_RESET_ARGS *) io_data; -+ -+ if (! VALID_JTAG_RING (resetargs->ring)) -+ return (-EINVAL); -+ -+ jtagdrv_select_ring (dev, resetargs->ring); -+ jtagdrv_reset (dev); -+ return (0); -+ -+ case JTAG_SHIFT_IR: -+ case JTAG_SHIFT_DR: -+ shiftargs = (JTAG_SHIFT_ARGS *) io_data; -+ -+ if (! VALID_JTAG_RING (shiftargs->ring) || shiftargs->nbits > (JTAG_MAX_DATA_LEN*JTAG_MAX_CHIPS)) { -+ return (-EFAULT); -+ } -+ -+ buf = (u_char *) kmalloc (JTAG_NBYTES(shiftargs->nbits), GFP_KERNEL); -+ -+ if (buf == (u_char *) NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (buf, shiftargs->value, JTAG_NBYTES(shiftargs->nbits))) -+ { -+ kfree(buf); -+ return (-EFAULT); -+ } -+ -+ -+ jtagdrv_select_ring (dev, shiftargs->ring); -+ -+ if (io_cmd == JTAG_SHIFT_IR) -+ jtagdrv_shift_ir (dev, buf, shiftargs->nbits); -+ else -+ jtagdrv_shift_dr (dev, buf, shiftargs->nbits); -+ -+ if (copy_to_user (shiftargs->value, buf, JTAG_NBYTES (shiftargs->nbits))) -+ { -+ kfree (buf); -+ return (-EFAULT); -+ } -+ -+ kfree (buf); -+ return (0); -+ -+ case I2C_WRITE: -+ case I2C_READ: -+ case I2C_WRITEREG: -+ case I2C_READREG: -+ i2cargs = (I2C_ARGS *) io_data; -+ -+ if (! VALID_I2C_RING(i2cargs->ring) || i2cargs->count > I2C_MAX_DATA_LEN) -+ return (-EFAULT); -+ -+ jtagdrv_select_ring (dev, RING_I2C_BIT | i2cargs->ring); -+ switch (io_cmd) -+ { -+ case I2C_WRITE: -+ i2cargs->ok = jtagdrv_i2c_write (dev, i2cargs->device, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_READ: -+ i2cargs->ok = jtagdrv_i2c_read (dev, i2cargs->device, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_WRITEREG: -+ i2cargs->ok = jtagdrv_i2c_writereg (dev, i2cargs->device, i2cargs->reg, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_READREG: -+ i2cargs->ok = jtagdrv_i2c_readreg (dev, i2cargs->device, i2cargs->reg, i2cargs->count, i2cargs->data); -+ break; -+ } -+ return (0); -+ -+ case I2C_CLOCK_SHIFT: -+ clockargs = (I2C_CLOCK_SHIFT_ARGS *) io_data; -+ -+ freq = (10 * clockargs->m / (1 << (((clockargs->n + 1) & 3)))); -+ -+ /* validate the value, and initialise the ring */ -+ if (clockargs->t != 0 || clockargs->n > 3 || clockargs->m > 127) -+ return (-EINVAL); -+ -+ jtagdrv_select_ring (dev, RING_I2C_BIT | RING_CLOCK_SHIFT); -+ jtagdrv_i2c_clock_shift (dev, clockargs->t, clockargs->n, clockargs->m); -+ jtagdrv_select_ring (dev, 0); -+ return (0); -+ -+ default: -+ return (-EINVAL); -+ } -+ return (-EINVAL); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/jtag/jtagdrv_Linux.h linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv_Linux.h ---- clean/drivers/net/qsnet/jtag/jtagdrv_Linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/jtag/jtagdrv_Linux.h 2002-08-09 07:18:37.000000000 -0400 -@@ -0,0 +1,174 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: jtagdrv_Linux.h,v 1.3 2002/08/09 11:18:37 addy Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv_Linux.h,v $*/ -+ -+#ifndef __JTAGDRV_LINUX_H -+#define __JTAGDRV_LINUX_H -+ -+#include -+#include -+ -+typedef struct jtag_dev -+{ -+ unsigned long base; -+ int region; -+ -+ u_int present:1; -+ u_int open:1; -+} JTAG_DEV; -+ -+/* -+** -+** Hardware Defines -+** -+*/ -+ -+/* -+ * Assume that bit 4 of the Control Register is set to 1 (by default) -+ * to enable the printer port (CS3). -+ * -+ * The default base address is 3BC-3BF. -+ */ -+ -+#define LPT0 0x3BC /* CSR Base Address - note this can -+ * change depending on the setting -+ * in the Control Register 0. -+ * -+ * LPT1 0x378 -+ * LPT2 0x278 -+ * LPT3 0x268 -+ */ -+ -+/* -+ * Register offsets from the port base address -+ */ -+ -+#define LPT_REGISTER_0 0 -+#define LPT_REGISTER_1 1 -+#define LPT_REGISTER_2 2 -+#define LPT_REGISTER_3 0x400 -+#define LPT_REGISTER_4 0x401 -+#define LPT_REGISTER_5 0x402 -+ -+/* -+ * Chip control registers -+ */ -+ /* Base address for Super I/O National*/ -+ -+#define SIO_BASE_ADDR 0x26e /* Semiconductor PC87332VLJ combo-chip*/ -+#define CR4_REG 0x04 /* index 4, printer control reg 4 */ -+ -+#define LPT_EPP 0x01 /* Enable bit for epp */ -+#define LPT_ECP 0x04 /* Enable bit for ecp */ -+ -+/* -+ * Registers for use with centronics, nibble and byte modes. -+ */ -+ -+#define LPT_DATA LPT_REGISTER_0 /* line printer port data */ -+#define LPT_STAT LPT_REGISTER_1 /* LPT port status */ -+#define LPT_CTRL LPT_REGISTER_2 /* LPT port control */ -+ -+/* -+ * Registers for use with ECP mode. -+ */ -+ -+#define LPT_DFIFO LPT_REGISTER_3 /* r/w fifo register */ -+#define LPT_CFGB LPT_REGISTER_4 /* Configuration B */ -+#define LPT_ECR LPT_REGISTER_5 /* Exteded control */ -+ -+/* -+ * Bit assignments for ECR register. -+ */ -+ -+ /* Bits 0-4 */ -+ -+#define LPT_ECR_EMPTY 0x01 /* FIFO is empty */ -+#define LPT_ECR_FULL 0x02 /* FIFO is full */ -+#define LPT_ECR_SERV 0x04 /* Service bit */ -+#define LPT_ECR_DMA 0x08 /* DMA enable */ -+#define LPT_ECR_nINTR 0x10 /* Interrupt disable */ -+ -+ /* -+ * Bits 5-7 are ECR modes. -+ */ -+ -+#define LPT_ECR_PAR 0x20 /* Parallel port FIFO mode */ -+#define LPT_ECR_ECP 0x60 /* ECP mode */ -+#define LPT_ECR_CFG 0xE0 /* Configuration mode */ -+#define LPT_ECR_CLEAR ~0xE0 /* Cear mode bits */ -+ -+/* -+ * Bit assignments for the parallel port STATUS register: -+ */ -+ -+#define LPT_STAT_BIT0 0X1 /* Reserved. Bit always set. */ -+#define LPT_STAT_BIT1 0X2 /* Reserved. Bit always set. */ -+#define LPT_STAT_IRQ 0x4 /* interrupt status bit */ -+#define LPT_STAT_ERROR 0x8 /* set to 0 to indicate error */ -+#define LPT_STAT_SLCT 0x10 /* status of SLCT lead from printer */ -+#define LPT_STAT_PE 0x20 /* set to 1 when out of paper */ -+#define LPT_STAT_ACK 0x40 /* acknowledge - set to 0 when ready */ -+#define LPT_STAT_nBUSY 0x80 /* busy status bit, 0=busy, 1=ready */ -+ -+/* -+ * Bit assignments for the parallel port CONTROL register: -+ */ -+ -+#define LPT_CTRL_nSTROBE 0x1 /* Printer Strobe Control */ -+#define LPT_CTRL_nAUTOFD 0x2 /* Auto Feed Control */ -+#define LPT_CTRL_INIT 0x4 /* Initialize Printer Control */ -+#define LPT_CTRL_nSLCTIN 0x8 /* 0=select printer, 1=not selected */ -+#define LPT_CTRL_IRQ 0x10 /* Interrupt Request Enable Control */ -+#define LPT_CTRL_DIR 0x20 /* Direction control */ -+#define LPT_CTRL_BIT6 0X40 /* Reserved. Bit always set. */ -+#define LPT_CTRL_BIT7 0X80 /* Reserved. Bit always set. */ -+ -+ -+#define LPT_WRITE(dev, regname, value) do { outb(value, (dev)->base + regname); } while (0) -+#define LPT_READ(dev, regname,value) do { value = inb((dev)->base + regname); } while (0) -+ -+ -+ -+/* Standard register access macros */ -+#define LPT_WRITE_CTRL(dev, value) LPT_WRITE(dev, LPT_CTRL, LPT_CTRL_INIT | value) -+#define LPT_WRITE_DATA(dev, value) LPT_WRITE(dev, LPT_DATA, value) -+#define LPT_READ_STAT(dev, value) LPT_READ(dev, LPT_STAT, value) -+ -+/* -+ * The jtag signals are connected to the parallel port as follows : -+ * -+ * TRST bit 0 -+ * TDI bit 1 -+ * TMS bit 2 -+ * TCLK AFX -+ * TDO PE -+ */ -+#define LPT_DATA_TRST 1 -+#define LPT_DATA_TDI 2 -+#define LPT_DATA_TMS 4 -+#define LPT_CTRL_TCLK LPT_CTRL_nAUTOFD -+#define LPT_STAT_TDO LPT_STAT_PE -+ -+/* -+ * The I2C signals are connected as follows : -+ */ -+#define LPT_DATA_SDA 2 -+#define LPT_CTRL_SCLK LPT_CTRL_nAUTOFD -+#define LPT_STAT_SDA LPT_STAT_PE -+ -+/* -+ * The ring selection signals are as follows : -+ * addr bit 0-7 -+ * clock nSLCTIN -+ */ -+#define LPT_CTRL_RCLK LPT_CTRL_nSLCTIN -+ -+ -+#endif /* __JTAGDRV_LINUX_H */ -diff -urN clean/drivers/net/qsnet/jtag/Makefile linux-2.6.9/drivers/net/qsnet/jtag/Makefile ---- clean/drivers/net/qsnet/jtag/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/jtag/Makefile 2005-10-10 17:47:31.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/jtag/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_JTAG) += jtag.o -+jtag-objs := jtagdrv_Linux.o jtagdrv.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/jtag/Makefile.conf linux-2.6.9/drivers/net/qsnet/jtag/Makefile.conf ---- clean/drivers/net/qsnet/jtag/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/jtag/Makefile.conf 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = jtag.o -+MODULENAME = jtag -+KOBJFILES = jtagdrv_Linux.o jtagdrv.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_JTAG -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/jtag/quadrics_version.h linux-2.6.9/drivers/net/qsnet/jtag/quadrics_version.h ---- clean/drivers/net/qsnet/jtag/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/jtag/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/Kconfig linux-2.6.9/drivers/net/qsnet/Kconfig ---- clean/drivers/net/qsnet/Kconfig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/Kconfig 2005-10-10 17:47:30.000000000 -0400 -@@ -0,0 +1,79 @@ -+# -+# Kconfig for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd -+# -+# File: driver/net/qsnet/Kconfig -+# -+ -+menu "Quadrics QsNet" -+ depends on NETDEVICES -+ -+config QSNET -+ tristate "Quadrics QsNet support" -+ default m -+ depends on PCI -+ ---help--- -+ Quadrics QsNet is a high bandwidth, ultra low latency cluster interconnect -+ which provides both user and kernel programmers with secure, direct access -+ to the Quadrics network. -+ -+config ELAN3 -+ tristate "Elan 3 device driver" -+ default m -+ depends on QSNET -+ ---help--- -+ This is the main device driver for the Quadrics QsNet (Elan3) PCI device. -+ This is a high bandwidth, ultra low latency interconnect which provides -+ both user and kernel programmers with secure, direct access to the -+ Quadrics network. -+ -+config ELAN4 -+ tristate "Elan 4 device driver" -+ default m -+ depends on QSNET -+ ---help--- -+ This is the main device driver for the Quadrics QsNetII (Elan4) PCI-X device. -+ This is a high bandwidth, ultra low latency interconnect which provides -+ both user and kernel programmers with secure, direct access to the -+ Quadrics network. -+ -+config EP -+ tristate "Elan Kernel Comms" -+ default m -+ depends on QSNET && (ELAN4 || ELAN3) -+ ---help--- -+ This module implements the QsNet kernel communications layer. This -+ is used to layer kernel level facilities on top of the basic Elan -+ device drivers. These can be used to implement subsystems such as -+ TCP/IP and remote filing systems over the QsNet interconnect. -+ -+config EIP -+ tristate "Elan IP device driver" -+ default m -+ depends on QSNET && EP && NET -+ ---help--- -+ This is a network IP device driver for the Quadrics QsNet device. -+ It allows the TCP/IP protocol to be run over the Quadrics interconnect. -+ -+config RMS -+ tristate "Resource Management System support" -+ default m -+ depends on QSNET -+ ---help--- -+ This is a support module for the Quadrics RMS resource manager. It provides kernel -+ services for monitoring and controlling user job execution, termination and cleanup. -+ -+config JTAG -+ tristate "Switch monitoring" -+ default m -+ depends on QSNET -+ ---help--- -+ The jtag interface is used to allow processes to send and retrieve jtag -+ information to a Quadrics QsNet Elite switch via the parallel port. -+ The module requires a /dev/jtag[0-3] entry (usually there is only a /dev/jtag0) -+ device and a particular device only allows one process at a time to access this -+ resource. -+ For more information about JTag interface, please refer to the IEEE document on -+ http://www.ieee.org/ -+endmenu -diff -urN clean/drivers/net/qsnet/Makefile linux-2.6.9/drivers/net/qsnet/Makefile ---- clean/drivers/net/qsnet/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/Makefile 2005-10-10 17:47:30.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2005 Quadrics Ltd. -+# -+# File: driver/net/qsnet/Makefile -+# -+ -+obj-$(CONFIG_QSNET) += qsnet/ elan/ -+obj-$(CONFIG_ELAN3) += elan3/ -+obj-$(CONFIG_ELAN4) += elan4/ -+obj-$(CONFIG_EP) += ep/ -+obj-$(CONFIG_EIP) += eip/ -+obj-$(CONFIG_RMS) += rms/ -+obj-$(CONFIG_JTAG) += jtag/ -diff -urN clean/drivers/net/qsnet/qsnet/debug.c linux-2.6.9/drivers/net/qsnet/qsnet/debug.c ---- clean/drivers/net/qsnet/qsnet/debug.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/qsnet/debug.c 2005-03-23 06:04:54.000000000 -0500 -@@ -0,0 +1,583 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.22 2005/03/23 11:04:54 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/debug.c,v $ */ -+ -+#include -+#include -+#include -+ -+caddr_t qsnet_debug_buffer_ptr = NULL; -+int qsnet_debug_front = 0; -+int qsnet_debug_back = 0; -+int qsnet_debug_lost_lines = 0; -+int qsnet_debug_disabled = 0; -+ -+int qsnet_debug_line_size = 256; -+int qsnet_debug_num_lines = 8192; -+ -+int qsnet_assfail_mode = 1; /* default to BUG() */ -+ -+int qsnet_debug_running = 0; -+int kqsnet_debug_running = 0; -+ -+static spinlock_t qsnet_debug_lock; -+static kcondvar_t qsnet_debug_wait; -+static char qsnet_debug_buffer_space[8192]; -+ -+#define QSNET_DEBUG_PREFIX_MAX_SIZE 32 -+#define QSNET_DEBUG_MAX_WORDWRAP 15 -+ -+/* must be larger than QSNET_DEBUG_PREFIX_MAX_SIZE + QSNET_DEBUG_MAX_WORDWRAP + 2 */ -+#if defined(DIGITAL_UNIX) -+#define QSNET_DEBUG_CONSOLE_WIDTH 80 -+#elif defined(LINUX) -+#define QSNET_DEBUG_CONSOLE_WIDTH 128 -+#endif -+ -+#define isspace(CH) ((CH==' ') | (CH=='\t') | (CH=='\n')) -+ -+#ifdef LINUX -+#define ALLOC_DEBUG_BUFFER(ptr) do { (ptr) = (void *)__get_free_pages (GFP_KERNEL, get_order (qsnet_debug_num_lines * qsnet_debug_line_size)); } while (0) -+#define FREE_DEBUG_BUFFER(ptr) free_pages ((unsigned long) ptr, get_order (qsnet_debug_num_lines * qsnet_debug_line_size)) -+#else -+#define ALLOC_DEBUG_BUFFER(ptr) KMEM_ALLOC (ptr, caddr_t, qsnet_debug_num_lines * qsnet_debug_line_size, 1) -+#define FREE_DEBUG_BUFFER(ptr) KMEM_FREE (ptr, qsnet_debug_num_lines * qsnet_debug_line_size) -+#endif -+ -+void -+qsnet_debug_init () -+{ -+ spin_lock_init (&qsnet_debug_lock); -+ kcondvar_init (&qsnet_debug_wait); -+ -+ qsnet_debug_front = 0; -+ qsnet_debug_back = 0; -+ qsnet_debug_lost_lines = 0; -+ -+ if (qsnet_debug_line_size < (QSNET_DEBUG_PREFIX_MAX_SIZE + QSNET_DEBUG_MAX_WORDWRAP + 2)) -+ qsnet_debug_line_size = 256; -+ -+ qsnet_debug_running = 1; -+ -+ qsnet_proc_register_int (qsnet_procfs_config, "assfail_mode", &qsnet_assfail_mode, 0); -+} -+ -+void -+qsnet_debug_fini() -+{ -+ if (!qsnet_debug_running) return; -+ -+ remove_proc_entry ("assfail_mode", qsnet_procfs_config); -+ -+ spin_lock_destroy (&qsnet_debug_lock); -+ kcondvar_destroy (&qsnet_debug_wait); -+ -+ if (qsnet_debug_buffer_ptr) -+ FREE_DEBUG_BUFFER (qsnet_debug_buffer_ptr); -+ -+ qsnet_debug_buffer_ptr = NULL; -+ qsnet_debug_lost_lines = 0; -+ qsnet_debug_running = 0; -+} -+ -+void -+qsnet_debug_disable(int val) -+{ -+ qsnet_debug_disabled = val; -+} -+ -+void -+qsnet_debug_alloc() -+{ -+ caddr_t ptr; -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ ALLOC_DEBUG_BUFFER (ptr); -+ -+ if (ptr != NULL) -+ { -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ qsnet_debug_buffer_ptr = ptr; -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ FREE_DEBUG_BUFFER (ptr); -+ } -+ } -+ } -+ -+} -+ -+static void -+qsnet_prefix_debug(unsigned int mode, const char *prefix, char *buffer) -+{ -+ /* assumes caller has lock */ -+ -+ int prefixlen = strlen(prefix); -+ char pref[QSNET_DEBUG_PREFIX_MAX_SIZE]; -+ int prefix_done = 0; -+ -+ if (!qsnet_debug_running) return; -+ -+ if (qsnet_debug_disabled) -+ return; -+ -+ if (prefixlen >= QSNET_DEBUG_PREFIX_MAX_SIZE) -+ { -+ strncpy(pref,prefix,QSNET_DEBUG_PREFIX_MAX_SIZE -2); -+ strcpy (&pref[QSNET_DEBUG_PREFIX_MAX_SIZE-5],"... "); -+ -+ prefix = pref; -+ prefixlen = strlen(prefix); -+ } -+ -+#ifdef CONFIG_MPSAS -+ { -+ char *p; -+#define TRAP_PUTCHAR_B (0x17a - 256) -+#define SAS_PUTCHAR(c) do {\ -+ register int o0 asm ("o0") = (c);\ -+\ -+ asm volatile ("ta %0; nop" \ -+ : /* no outputs */\ -+ : /* inputs */ "i" (TRAP_PUTCHAR_B), "r" (o0)\ -+ : /* clobbered */ "o0");\ -+\ -+ if (o0 == '\n') {\ -+ o0 = '\r';\ -+\ -+ asm volatile ("ta %0; nop" \ -+ : /* no outputs */\ -+ : /* inputs */ "i" (TRAP_PUTCHAR_B), "r" (o0)\ -+ : /* clobbered */ "o0");\ -+ }\ -+ } while(0) -+ -+ for (p = prefix; *p; p++) -+ SAS_PUTCHAR (*p); -+ -+ for (p = buffer; *p; p++) -+ SAS_PUTCHAR (*p); -+ } -+#else -+ if (mode & QSNET_DEBUG_BUFFER) -+ { -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_lost_lines++; -+ else -+ { -+ caddr_t base = &qsnet_debug_buffer_ptr[qsnet_debug_line_size * qsnet_debug_back]; -+ caddr_t lim = base + qsnet_debug_line_size - 2; -+ caddr_t p; -+ -+ p = buffer; -+ prefix_done = 0; -+ while (*p) -+ { -+ /* sort out prefix */ -+ if ( prefix_done++ ) -+ { -+ int i; -+ for(i=0;i 0 ) -+ { -+ int i; -+ for(i=0;i remaining) len = remaining; -+ -+ strncpy(line, p, len); -+ line[len] = 0; -+ p += len; -+ -+ /* word wrap */ -+ if ((len == remaining) && *p && !isspace(*p)) -+ { -+ /* lets see if we can back track and find a white space to break on */ -+ char * ptr = &line[len-1]; -+ int count = 1; -+ -+ while ( ( !isspace(*ptr) ) && ( count < QSNET_DEBUG_MAX_WORDWRAP )) -+ { -+ count++; -+ ptr--; -+ } -+ -+ if ( isspace(*ptr) ) -+ { -+ /* found somewhere to wrap to */ -+ p -= (count-1); /* need to loose the white space */ -+ len -= count; -+ } -+ } -+ -+ if (line[len-1] != '\n' ) -+ { -+ line[len] = '\n'; -+ line[len+1] = 0; -+ } -+ -+ /* we put a \n in so dont need another one next */ -+ if ( *p == '\n') -+ p++; -+ -+#if defined(DIGITAL_UNIX) -+ { -+ char *pr; -+ -+ for (pr = pref; *pr; pr++) -+ cnputc (*pr); -+ -+ for (pr = line; *pr; pr++) -+ cnputc (*pr); -+ } -+#elif defined(LINUX) -+ printk("%s%s",pref,line); -+#endif -+ } -+ } -+#endif /* CONFIG_MPSAS */ -+} -+ -+void -+qsnet_vdebugf (unsigned int mode, const char *prefix, const char *fmt, va_list ap) -+{ -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ qsnet_debug_buffer_space[0] = '\0'; -+ -+#if defined(DIGITAL_UNIX) -+ prf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), NULL, fmt, ap); -+#elif defined(LINUX) -+ vsprintf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), fmt, ap); -+#endif -+ -+ if (prefix == NULL) -+ printk ("qsnet_vdebugf: prefix==NULL\n"); -+ else -+ qsnet_prefix_debug(mode, prefix, qsnet_debug_buffer_space); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+} -+ -+void kqsnet_debugf(char *fmt,...) -+{ -+ if ( kqsnet_debug_running ) { -+ va_list ap; -+ char string[20]; -+ -+ sprintf (string, "mm=%p:", current->mm); -+ va_start(ap, fmt); -+ qsnet_vdebugf(QSNET_DEBUG_BUFFER, string, fmt, ap); -+ va_end(ap); -+ } -+} -+void -+qsnet_debugf(unsigned int mode, const char *fmt,...) -+{ -+ va_list ap; -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ qsnet_debug_buffer_space[0] = '\0'; -+ -+ va_start (ap, fmt); -+#if defined(DIGITAL_UNIX) -+ prf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), NULL, fmt, ap); -+#elif defined(LINUX) -+ vsprintf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), fmt, ap); -+#endif -+ va_end (ap); -+ -+ qsnet_prefix_debug(mode, "", qsnet_debug_buffer_space); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+} -+ -+int -+qsnet_debug_buffer (caddr_t ubuffer, int len) -+{ -+ caddr_t buffer, ptr, base; -+ int remain, len1; -+ unsigned long flags; -+ static char qsnet_space[65536]; -+ -+ if (!qsnet_debug_running) return (0); -+ -+ if (len < qsnet_debug_line_size) -+ return (-1); -+ -+ if (len > (qsnet_debug_line_size * qsnet_debug_num_lines)) -+ len = qsnet_debug_line_size * qsnet_debug_num_lines; -+ -+ if ( len > 65536 ) { -+ KMEM_ZALLOC (buffer, caddr_t, len, 1); -+ } else -+ buffer = qsnet_space; -+ -+ if (buffer == NULL) -+ return (-1); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ if ( len > 65536 ) -+ KMEM_FREE (buffer, len); -+ return (-1); -+ } -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ while (!qsnet_debug_lost_lines && (qsnet_debug_back == qsnet_debug_front)) -+ if (kcondvar_waitsig (&qsnet_debug_wait, &qsnet_debug_lock, &flags) == 0) -+ break; -+ -+ ptr = buffer; -+ remain = len; -+ -+ if (qsnet_debug_lost_lines) -+ { -+ qsnet_debug_lost_lines = 0; -+ strcpy (ptr, "Debug Buffer has overflowed!!\n"); -+ len1 = strlen (ptr); -+ -+ remain -= len1; -+ ptr += len1; -+ } -+ -+ while (qsnet_debug_front != qsnet_debug_back) -+ { -+ /* copy the line from DebugFront */ -+ base = &qsnet_debug_buffer_ptr[qsnet_debug_front*qsnet_debug_line_size]; -+ -+ len1 = strlen (base); -+ -+ if (len1 > remain) -+ break; -+ -+ bcopy (base, ptr, len1); -+ -+ ptr += len1; -+ remain -= len1; -+ -+ qsnet_debug_front = (++qsnet_debug_front == qsnet_debug_num_lines) ? 0 : qsnet_debug_front; -+ } -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ len1 = ptr - buffer; -+ -+ if (len1 != 0 && copyout (buffer, ubuffer, len1)) -+ len1 = -1; -+ -+ if ( len > 65536 ) -+ KMEM_FREE (buffer, len); -+ -+ return (len1); -+} -+ -+void -+qsnet_debug_buffer_on() -+{ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+} -+ -+void -+qsnet_debug_buffer_clear() -+{ -+ unsigned long flags; -+ -+ qsnet_debug_buffer_on(); -+ -+ if (qsnet_debug_buffer_ptr != NULL){ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ qsnet_debug_front = 0; -+ qsnet_debug_back = 0; -+ qsnet_prefix_debug(QSNET_DEBUG_BUFFER,"Clear",""); -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+} -+ -+void -+qsnet_debug_buffer_mark(char *str) -+{ -+ unsigned long flags; -+ -+ qsnet_debug_buffer_on(); -+ -+ if (qsnet_debug_buffer_ptr != NULL) { -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ qsnet_prefix_debug(QSNET_DEBUG_BUFFER,"Mark",str); -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+} -+int -+qsnet_debug_dump () -+{ -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return (0); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ return (-1); -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ while (qsnet_debug_front != qsnet_debug_back) -+ { -+ printk ("%s", &qsnet_debug_buffer_ptr[qsnet_debug_front*qsnet_debug_line_size]); -+ -+ qsnet_debug_front = (++qsnet_debug_front == qsnet_debug_num_lines) ? 0 : qsnet_debug_front; -+ } -+ -+ if (qsnet_debug_lost_lines) -+ printk ("\n**** Debug buffer has lost %d lines\n****\n",qsnet_debug_lost_lines); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ return (0); -+} -+ -+int -+qsnet_debug_kmem (void *handle) -+{ -+ if (!qsnet_debug_running) return (0); -+ -+#ifdef KMEM_DEBUG -+ qsnet_kmem_display(handle); -+#endif -+ return (0); -+} -+ -+int -+qsnet_assfail (char *ex, const char *func, char *file, int line) -+{ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ printk (KERN_EMERG "qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ if (panicstr) -+ return (0); -+ -+ if (qsnet_assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (qsnet_assfail_mode & 2) -+ panic ("qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ if (qsnet_assfail_mode & 4) -+ qsnet_debug_disable (1); -+ -+ return 0; -+ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/qsnet/i686_mmx.c linux-2.6.9/drivers/net/qsnet/qsnet/i686_mmx.c ---- clean/drivers/net/qsnet/qsnet/i686_mmx.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/qsnet/i686_mmx.c 2004-01-05 07:08:25.000000000 -0500 -@@ -0,0 +1,99 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: i686_mmx.c,v 1.11 2004/01/05 12:08:25 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/i686_mmx.c,v $*/ -+ -+#include -+ -+#if defined(LINUX_I386) -+ -+#include -+#include -+#include -+#include -+ -+int mmx_disabled = 0; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+/* These functions are lifted from arch/i386/kernel/i387.c -+ * and MUST be kept in step with the kernel (currently 2.4.17) -+ * alternatively we should export the kernel_fpu_begin() function -+ */ -+static inline void __save_init_fpu( struct task_struct *tsk ) -+{ -+ if ( cpu_has_fxsr ) { -+ asm volatile( "fxsave %0 ; fnclex" -+ : "=m" (tsk->thread.i387.fxsave) ); -+ } else { -+ asm volatile( "fnsave %0 ; fwait" -+ : "=m" (tsk->thread.i387.fsave) ); -+ } -+ tsk->flags &= ~PF_USEDFPU; -+} -+#if defined(MODULE) -+void kernel_fpu_begin(void) -+{ -+ struct task_struct *tsk = current; -+ -+ if (tsk->flags & PF_USEDFPU) { -+ __save_init_fpu(tsk); -+ return; -+ } -+ clts(); -+} -+#endif -+#endif -+ -+extern inline int -+mmx_preamble(void) -+{ -+ if (mmx_disabled || in_interrupt()) -+ return (0); -+ -+ kernel_fpu_begin(); -+ -+ return (1); -+} -+ -+extern inline void -+mmx_postamble(void) -+{ -+ kernel_fpu_end(); -+} -+ -+extern u64 -+qsnet_readq (volatile u64 *ptr) -+{ -+ u64 value; -+ -+ if (! mmx_preamble()) -+ value = *ptr; -+ else -+ { -+ asm volatile ("movq (%0), %%mm0\n" -+ "movq %%mm0, (%1)\n" -+ : : "r" (ptr), "r" (&value) : "memory"); -+ mmx_postamble(); -+ } -+ return (value); -+} -+ -+void -+qsnet_writeq(u64 value, volatile u64 *ptr) -+{ -+ if (! mmx_preamble()) -+ *ptr = value; -+ else -+ { -+ asm volatile ("movq (%0), %%mm0\n" -+ "movq %%mm0, (%1)\n" -+ : : "r" (&value), "r" (ptr) : "memory"); -+ mmx_postamble(); -+ } -+} -+#endif -diff -urN clean/drivers/net/qsnet/qsnet/kernel_linux.c linux-2.6.9/drivers/net/qsnet/qsnet/kernel_linux.c ---- clean/drivers/net/qsnet/qsnet/kernel_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/qsnet/kernel_linux.c 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,902 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kernel_linux.c,v 1.74.2.5 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel_linux.c,v $*/ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include /* for smp_call_function() prototype */ -+#include -+#include -+ -+#include -+ -+extern int mmx_disabled; -+extern int qsnet_debug_line_size; -+extern int qsnet_debug_num_lines; -+ -+gid_t qsnet_procfs_gid; -+struct proc_dir_entry *qsnet_procfs_root; -+struct proc_dir_entry *qsnet_procfs_config; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("QsNet Kernel support code"); -+ -+MODULE_LICENSE("GPL"); -+ -+#if defined(LINUX_I386) -+module_param(mmx_disabled, uint, 0); -+#endif -+ -+module_param(qsnet_debug_line_size, uint, 0); -+module_param(qsnet_debug_num_lines, uint, 0); -+ -+module_param(qsnet_procfs_gid, uint, 0); -+ -+#ifdef KMEM_DEBUG -+EXPORT_SYMBOL(qsnet_kmem_alloc_debug); -+EXPORT_SYMBOL(qsnet_kmem_free_debug); -+#else -+EXPORT_SYMBOL(qsnet_kmem_alloc); -+EXPORT_SYMBOL(qsnet_kmem_free); -+#endif -+ -+EXPORT_SYMBOL(qsnet_kmem_display); -+EXPORT_SYMBOL(kmem_to_phys); -+ -+EXPORT_SYMBOL(cpu_hold_all); -+EXPORT_SYMBOL(cpu_release_all); -+ -+#if defined(LINUX_I386) -+EXPORT_SYMBOL(qsnet_readq); -+EXPORT_SYMBOL(qsnet_writeq); -+#endif -+ -+/* debug.c */ -+EXPORT_SYMBOL(qsnet_debugf); -+EXPORT_SYMBOL(kqsnet_debugf); -+EXPORT_SYMBOL(qsnet_vdebugf); -+EXPORT_SYMBOL(qsnet_debug_buffer); -+EXPORT_SYMBOL(qsnet_debug_alloc); -+EXPORT_SYMBOL(qsnet_debug_dump); -+EXPORT_SYMBOL(qsnet_debug_kmem); -+EXPORT_SYMBOL(qsnet_debug_disable); -+ -+EXPORT_SYMBOL(qsnet_assfail); -+ -+EXPORT_SYMBOL(qsnet_procfs_gid); -+EXPORT_SYMBOL(qsnet_procfs_root); -+ -+static int qsnet_open (struct inode *ino, struct file *fp); -+static int qsnet_release (struct inode *ino, struct file *fp); -+static int qsnet_ioctl (struct inode *ino, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static struct file_operations qsnet_ioctl_fops = -+{ -+ ioctl: qsnet_ioctl, -+ open: qsnet_open, -+ release: qsnet_release, -+}; -+ -+static int -+qsnet_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+qsnet_release (struct inode *inode, struct file *fp) -+{ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+qsnet_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int res=0; -+ -+ switch (cmd) -+ { -+ case QSNETIO_DEBUG_KMEM: -+ { -+ QSNETIO_DEBUG_KMEM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (QSNETIO_DEBUG_KMEM_STRUCT))) -+ return (-EFAULT); -+ -+ /* doesnt use handle as a pointer */ -+ qsnet_kmem_display(args.handle); -+ break; -+ } -+ -+ case QSNETIO_DEBUG_DUMP : -+ { -+ res = qsnet_debug_dump(); -+ break; -+ } -+ -+ case QSNETIO_DEBUG_BUFFER : -+ { -+ QSNETIO_DEBUG_BUFFER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (QSNETIO_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ -+ /* qsnet_debug_buffer uses copyout */ -+ if ((res = qsnet_debug_buffer (args.addr, args.len)) != -1) -+ { -+ args.len = res; -+ if (copy_to_user ((void *) arg, &args, sizeof (QSNETIO_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ res = 0; -+ } -+ break; -+ } -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+ -+#ifdef KMEM_DEBUG -+static int qsnet_kmem_open (struct inode *ino, struct file *fp); -+static int qsnet_kmem_release (struct inode *ino, struct file *fp); -+static ssize_t qsnet_kmem_read (struct file *file, char *buf, size_t count, loff_t *ppos); -+ -+static struct file_operations qsnet_kmem_fops = -+{ -+ open: qsnet_kmem_open, -+ release: qsnet_kmem_release, -+ read: qsnet_kmem_read, -+}; -+ -+typedef struct qsnet_private_space -+{ -+ char * space; -+ int size; -+ struct qsnet_private_space *next; -+} QSNET_PRIVATE_SPACE; -+ -+typedef struct qsnet_private -+{ -+ QSNET_PRIVATE_SPACE *space_chain; -+ QSNET_PRIVATE_SPACE *current_space; -+ int current_pos; -+ -+} QSNET_PRIVATE; -+ -+#define QSNET_KMEM_DEBUG_LINE_SIZE ((int)512) -+#define QSNET_PRIVATE_PAGE_SIZE ((int)(4*1024)) -+ -+static int qsnet_kmem_fill(QSNET_PRIVATE *pd); -+ -+void -+destroy_chain(QSNET_PRIVATE * pd) -+{ -+ QSNET_PRIVATE_SPACE *mem, *next; -+ -+ if (pd == NULL) return; -+ -+ for(mem = pd->space_chain ; mem != NULL; ) -+ { -+ next = mem->next; -+ if ( mem->space ) -+ kfree ( mem->space); -+ kfree(mem); -+ mem = next; -+ } -+ kfree (pd); -+} -+ -+QSNET_PRIVATE * -+make_chain(int len) -+{ -+ QSNET_PRIVATE * pd; -+ QSNET_PRIVATE_SPACE * mem; -+ int i; -+ -+ /* make the private data block */ -+ if ((pd = kmalloc (sizeof (QSNET_PRIVATE), GFP_KERNEL)) == NULL) -+ return NULL; -+ pd->space_chain = NULL; -+ -+ /* first make the holders */ -+ for(i=0;inext = pd->space_chain; -+ mem->size = 0; -+ mem->space = 0; -+ pd->space_chain = mem; -+ -+ /* now add the space */ -+ if ((mem->space = kmalloc (QSNET_PRIVATE_PAGE_SIZE, GFP_KERNEL)) == NULL) -+ { -+ destroy_chain(pd); -+ return (NULL); -+ } -+ -+ mem->space[0] = 0; -+ -+ } -+ -+ pd->current_space = pd->space_chain; -+ pd->current_pos = 0; -+ -+ return pd; -+} -+ -+static int -+qsnet_kmem_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+qsnet_kmem_release (struct inode *inode, struct file *fp) -+{ -+ if ( fp->private_data ) -+ { -+ QSNET_PRIVATE * pd = (QSNET_PRIVATE *) fp->private_data; -+ -+ /* free the space */ -+ if (pd->space_chain) -+ kfree (pd->space_chain); -+ -+ /* free struct */ -+ kfree (pd); -+ } -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+qsnet_kmem_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ QSNET_PRIVATE * pd = (QSNET_PRIVATE *) file->private_data; -+ int error; -+ int output_count; -+ int num_of_links=10; -+ -+ /* make a buffer to output count bytes in */ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if ( pd == NULL) -+ { -+ /* first time */ -+ -+ /* ok we have to guess at how much space we are going to need */ -+ /* if it fails we up the space and carry try again */ -+ /* we have to do it this way as we cant get more memory whilst */ -+ /* holding the lock */ -+ if ((pd = make_chain(num_of_links)) == NULL) -+ return (-ENOMEM); -+ -+ while ( qsnet_kmem_fill(pd) ) -+ { -+ destroy_chain(pd); -+ num_of_links += 10; -+ if ((pd = make_chain(num_of_links)) == NULL) -+ return (-ENOMEM); -+ } -+ -+ /* we have the space and filled it */ -+ file->private_data = (void *)pd; -+ } -+ -+ /* output buffer */ -+ if ( pd->current_pos >= pd->current_space->size ) -+ return (0); /* finished */ -+ -+ output_count = pd->current_space->size - pd->current_pos; -+ if ( output_count > count ) -+ output_count = count; -+ -+ copy_to_user(buf, (pd->current_space->space + pd->current_pos), output_count); -+ -+ pd->current_pos += output_count; -+ ppos += output_count; -+ -+ /* just check to see if we have finished the current space */ -+ if ( pd->current_pos >= pd->current_space->size ) -+ { -+ if ( pd->current_space->next ) -+ { -+ pd->current_space = pd->current_space->next; -+ pd->current_pos = 0; -+ } -+ } -+ -+ return (output_count); -+} -+#endif /* KMEM_DEBUG */ -+ -+static int -+proc_write_qsnetdebug(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "on")) -+ qsnet_debug_buffer_on(); -+ -+ if (! strcmp (tmpbuf, "clear")) -+ qsnet_debug_buffer_clear(); -+ -+ if (! strncmp (tmpbuf, "mark",4)) -+ qsnet_debug_buffer_mark( &tmpbuf[4] ); -+ -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_qsnetdebug(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len = sprintf (page, "echo command > /proc/qsnet/config/qsnetdebug\ncommand = on | off | clear | mark text\n"); -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+#include "quadrics_version.h" -+extern int kqsnet_debug_running; -+static char quadrics_version[] = QUADRICS_VERSION; -+ -+static int __init qsnet_init(void) -+{ -+ struct proc_dir_entry *p; -+ -+ printk ("qsnet Module (version %s)\n", quadrics_version); -+ if ((qsnet_procfs_root = proc_mkdir ("qsnet", 0)) == NULL) -+ { -+ printk ("qsnet: failed to create /proc/qsnet \n"); -+ return (-ENXIO); -+ } -+ -+ if ((p = create_proc_entry ("ioctl", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_root)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/ioctl\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &qsnet_ioctl_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+ -+ qsnet_proc_register_str (qsnet_procfs_root, "version", quadrics_version, S_IRUGO); -+ -+ if ((qsnet_procfs_config = proc_mkdir ("config", qsnet_procfs_root)) == NULL) -+ { -+ printk ("qsnet: failed to create /proc/qsnet/config \n"); -+ return (-ENXIO); -+ } -+ -+#ifdef KMEM_DEBUG -+ if ((p = create_proc_entry ("kmem_debug", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_config)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/config/kmem_debug\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &qsnet_kmem_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+#endif -+ -+ qsnet_debug_init(); -+ -+ qsnet_proc_register_int (qsnet_procfs_config, "kqsnet_debug_running", &kqsnet_debug_running, 0); -+ -+ if ((p = create_proc_entry ("qsnetdebug", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_config)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/config/qsnetdebug\n"); -+ return (-ENXIO); -+ } -+ p->read_proc = proc_read_qsnetdebug; -+ p->write_proc = proc_write_qsnetdebug; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+ -+ return (0); -+} -+ -+static void __exit qsnet_exit(void) -+{ -+#ifdef KMEM_DEBUG -+ qsnet_kmem_display(0); -+#endif -+ qsnet_debug_fini(); -+ -+ remove_proc_entry ("qsnetdebug", qsnet_procfs_config); -+ remove_proc_entry ("kqsnet_debug_running", qsnet_procfs_config); -+#ifdef KMEM_DEBUG -+ remove_proc_entry ("kmem_debug", qsnet_procfs_config); -+#endif -+ remove_proc_entry ("config", qsnet_procfs_root); -+ -+ remove_proc_entry ("version", qsnet_procfs_root); -+ remove_proc_entry ("ioctl", qsnet_procfs_root); -+ -+ remove_proc_entry ("qsnet", 0); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(qsnet_init); -+module_exit(qsnet_exit); -+ -+#ifdef KMEM_DEBUG -+/* -+ * Kernel memory allocation. We maintain our own list of allocated mem -+ * segments so we can free them on module cleanup. -+ * -+ * We use kmalloc for allocations less than one page in size; vmalloc for -+ * larger sizes. -+ */ -+ -+typedef struct { -+ struct list_head list; -+ void *ptr; -+ int len; -+ int used_vmalloc; -+ void *owner; -+ void *caller; -+ unsigned int time; -+ int line; -+ char filename[20]; -+} kmalloc_t; -+ -+static LIST_HEAD(kmalloc_head); -+ -+static spinlock_t kmalloc_lock = SPIN_LOCK_UNLOCKED; -+ -+/* -+ * Kernel memory allocation. We use kmalloc for allocations less -+ * than one page in size; vmalloc for larger sizes. -+ */ -+ -+static int -+qsnet_kmem_fill(QSNET_PRIVATE *pd) -+{ -+ kmalloc_t *kp; -+ struct list_head *lp; -+ unsigned long flags; -+ char str[QSNET_KMEM_DEBUG_LINE_SIZE]; -+ QSNET_PRIVATE_SPACE * current_space; -+ int current_pos; -+ int len; -+ current_space = pd->space_chain; -+ current_pos = 0; -+ -+ -+ current_space->space[0] = 0; -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ -+ /* make the next line */ -+ sprintf(str,"%p %d %d %p %p %u %d %s\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->time, kp->line, kp->filename); -+ len = strlen(str); -+ -+ /* does it fit on the current page */ -+ if ( (current_pos + len + 1) >= QSNET_PRIVATE_PAGE_SIZE) -+ { -+ /* move onto next page */ -+ if ((current_space = current_space->next) == NULL) -+ { -+ /* run out of space !!!! */ -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ return (1); -+ } -+ current_space->space[0] = 0; -+ current_pos = 0; -+ } -+ strcat( current_space->space + current_pos, str); -+ current_pos += len; -+ -+ /* remember how much we wrote to this page */ -+ current_space->size = current_pos; -+ -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ return (0); -+} -+ -+void * -+qsnet_kmem_alloc_debug(int len, int cansleep, int zerofill, char *file, int line) -+{ -+ void *new; -+ unsigned long flags; -+ kmalloc_t *kp; -+ -+ if (len < PAGE_SIZE || !cansleep) -+ new = kmalloc(len, cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ else -+ new = vmalloc(len); -+ -+ if (len >= PAGE_SIZE) -+ ASSERT(PAGE_ALIGNED((uintptr_t) new)); -+ -+ if (new && zerofill) -+ memset(new,0,len); -+ -+ /* record allocation */ -+ kp = kmalloc(sizeof(kmalloc_t), cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ ASSERT(kp != NULL); -+ kp->len = len; -+ kp->ptr = new; -+ kp->used_vmalloc = (len >= PAGE_SIZE || cansleep); -+ kp->owner = current; -+ kp->caller = __builtin_return_address(0); -+ kp->time = lbolt; -+ kp->line = line; -+ len = strlen(file); -+ -+ if (len > 18) -+ strcpy(kp->filename,&file[len-18]); -+ else -+ strcpy(kp->filename,file); -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ list_add(&kp->list, &kmalloc_head); -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ return new; -+} -+ -+void -+qsnet_kmem_free_debug(void *ptr, int len, char *file, int line) -+{ -+ unsigned long flags; -+ kmalloc_t *kp; -+ struct list_head *lp; -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ if (kp->ptr == ptr) { -+ if (kp->len != len) -+ printk("qsnet_kmem_free_debug(%p) ptr %p len %d mismatch: expected %d caller %p owner %p (%s:%d)\n", -+ current, ptr, len, kp->len, __builtin_return_address(0), kp->caller, file, line); -+ list_del(lp); -+ kfree(kp); /* free off descriptor */ -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ if (lp == &kmalloc_head) /* segment must be found */ -+ { -+ printk( "qsnet_kmem_free_debug(%p) ptr %p len %d not found: caller %p (%s:%d)\n", -+ current, ptr, len, __builtin_return_address(0), file, line); -+ } -+ -+ if ((((unsigned long) ptr) >= VMALLOC_START && ((unsigned long) ptr) < VMALLOC_END)) -+ vfree (ptr); -+ else -+ kfree (ptr); -+} -+ -+#else /* !KMEM_DEBUG */ -+ -+void * -+qsnet_kmem_alloc(int len, int cansleep, int zerofill) -+{ -+ void *new; -+ -+ if (len < PAGE_SIZE || !cansleep) -+ new = kmalloc(len, cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ else -+ new = vmalloc(len); -+ -+ if (len >= PAGE_SIZE) -+ ASSERT(PAGE_ALIGNED((unsigned long) new)); -+ -+ if (new && zerofill) -+ memset(new,0,len); -+ -+ return new; -+} -+ -+void -+qsnet_kmem_free(void *ptr, int len) -+{ -+ if ((((unsigned long) ptr) >= VMALLOC_START && ((unsigned long) ptr) < VMALLOC_END)) -+ vfree (ptr); -+ else -+ kfree (ptr); -+} -+#endif /* !KMEM_DEBUG */ -+ -+void -+qsnet_kmem_display(void *handle) -+{ -+#ifdef KMEM_DEBUG -+ kmalloc_t *kp; -+ struct list_head *lp; -+ unsigned long flags; -+ int count = 0, totsize = 0; -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ -+ if (!handle || handle == kp->owner) -+ { -+ printk("qsnet_kmem_display(%p): mem %p len %d unfreed caller %p (%p) \n", -+ handle, kp->ptr, kp->len, kp->caller, kp->owner); -+ -+ count++; -+ totsize += kp->len; -+ } -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ printk("qsnet_kmem_display(%p): %d bytes left in %d objects\n", handle, totsize, count); -+#endif -+} -+ -+physaddr_t -+kmem_to_phys(void *ptr) -+{ -+ virtaddr_t virt = (virtaddr_t) ptr; -+ physaddr_t phys; -+ pte_t *pte; -+ -+ if ((virt >= VMALLOC_START && virt < VMALLOC_END)) -+ { -+ pte = find_pte_kernel(virt); -+ ASSERT(pte && !pte_none(*pte)); -+ phys = pte_phys(*pte) + (virt & (PAGE_SIZE-1)); -+ } -+#if defined(PKMAP_BASE) -+ else if (virt >= PKMAP_BASE && virt < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) -+ { -+ pte = find_pte_kernel(virt); -+ ASSERT(pte && !pte_none(*pte)); -+ phys = pte_phys(*pte) + (virt & (PAGE_SIZE-1)); -+ } -+#endif -+#if defined(__ia64) -+ else if (virt >= __IA64_UNCACHED_OFFSET && virt < PAGE_OFFSET) -+ { -+ /* ia64 non-cached KSEG */ -+ phys = ((physaddr_t) ptr - __IA64_UNCACHED_OFFSET); -+ } -+#endif -+ else /* otherwise it's KSEG */ -+ { -+ phys = __pa(virt); -+ } -+ -+#if defined(CONFIG_ALPHA_GENERIC) || (defined(CONFIG_ALPHA_EV6) && !defined(USE_48_BIT_KSEG)) -+ /* -+ * with TS_BIAS as bit 40 - the tsunami pci space is mapped into -+ * the kernel at 0xfffff500.00000000 however we need to convert -+ * this to the true physical address 0x00000800.00000000. -+ * -+ * there is no need for PHYS_TWIDDLE since we knew we'd get a kernel -+ * virtual address already and handled this with __pa(). -+ */ -+ if (phys & (1ul << 40)) { -+ phys &= ~(1ul << 40); /* clear bit 40 (kseg I/O select) */ -+ phys |= (1ul << 43); /* set bit 43 (phys I/O select) */ -+ } -+#endif -+ return phys; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ -+EXPORT_SYMBOL(pci_resource_size); -+EXPORT_SYMBOL(pci_get_base_address); -+EXPORT_SYMBOL(pci_base_to_kseg); -+ -+ -+/* -+ * PCI stuff. -+ * -+ * XXX pci_base_to_kseg() and pci_kseg_to_phys() are problematic -+ * in that they may not work on non-Tsunami (DS20, ES40, etc) -+ * architectures, and may not work in non-zero PCI bus numbers. -+ */ -+ -+unsigned long -+pci_get_base_address(struct pci_dev *pdev, int index) -+{ -+ unsigned long base; -+ -+ ASSERT(index >= 0 && index <= 5); -+ /* borrowed in part from drivers/scsi/sym53c8xx.c */ -+ base = pdev->base_address[index++]; -+ -+#if BITS_PER_LONG > 32 -+ if ((base & 0x7) == 0x4) -+ base |= (((unsigned long)pdev->base_address[index]) << 32); -+#endif -+ return base; -+} -+ -+unsigned long -+pci_resource_size(struct pci_dev *pdev, int index) -+{ -+ u32 addr, mask, size; -+ -+ static u32 bar_addr[] = { -+ PCI_BASE_ADDRESS_0, -+ PCI_BASE_ADDRESS_1, -+ PCI_BASE_ADDRESS_2, -+ PCI_BASE_ADDRESS_3, -+ PCI_BASE_ADDRESS_4, -+ PCI_BASE_ADDRESS_5, -+ }; -+ ASSERT(index >= 0 && index <= 5); -+ -+ /* algorithm from Rubini book */ -+ pci_read_config_dword (pdev, bar_addr[index], &addr); -+ pci_write_config_dword(pdev, bar_addr[index], ~0); -+ pci_read_config_dword (pdev, bar_addr[index], &mask); -+ pci_write_config_dword(pdev, bar_addr[index], addr); -+ -+ mask &= PCI_BASE_ADDRESS_MEM_MASK; -+ size = ~mask + 1; -+ return size; -+} -+ -+/* -+ * Convert BAR register value to KSEG address. -+ */ -+void * -+pci_base_to_kseg(u64 baddr, int bus) -+{ -+ u64 kseg; -+ -+ /* XXX tsunami specific */ -+ baddr &= ~(u64)0x100000000; /* mask out hose bit */ -+ kseg = TSUNAMI_MEM(bus) + baddr; -+ return (void *)kseg; -+} -+ -+#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,0) */ -+ -+/* -+ * Spin the other CPU's in an SMP system. -+ * smp_call_function() needed to be exported to modules. It will be -+ * papered over in if running on a non-SMP box. -+ */ -+static spinlock_t hold_lock = SPIN_LOCK_UNLOCKED; -+ -+#if 0 -+static void cpu_hold(void *unused) -+{ -+ spin_lock(&hold_lock); -+ spin_unlock(&hold_lock); -+} -+#endif -+ -+void cpu_hold_all(void) -+{ -+ spin_lock(&hold_lock); -+ -+#if 0 -+ { -+ int res; -+ int retries = 10; -+ -+ /* XXXXX: cannot call smp_call_function() from interrupt context */ -+ -+ do { -+ /* only request blocking retry if not in interrupt context */ -+ res = smp_call_function(cpu_hold, NULL, !in_interrupt(), 0); -+ if (res) -+ mdelay(5); -+ } while (res && retries--); -+ -+ if (res) -+ printk("cpu_hold_all: IPI timeout\n"); -+ } -+#endif -+} -+ -+void cpu_release_all(void) -+{ -+ spin_unlock(&hold_lock); -+} -+ -+void -+qsnet_proc_character_fill (long mode, char *fmt, ...) -+{ -+ int len; -+ va_list ap; -+ QSNET_PROC_PRIVATE *private = (QSNET_PROC_PRIVATE *)mode; -+ -+ /* is the buffer already full */ -+ if (private->pr_len >= private->pr_data_len) -+ return; -+ -+ /* attempt to fill up to the remaining space */ -+ va_start (ap, fmt); -+ len = vsnprintf ( & private->pr_data[private->pr_len], (private->pr_data_len - private->pr_len), fmt, ap); -+ va_end (ap); -+ -+ if (len < 0 ) -+ { -+ /* we have reached the end of buffer and need to fail all future writes -+ * the caller can check (pr_len >= pr_data_len) and recall with more space -+ */ -+ private->pr_len = private->pr_data_len; -+ return; -+ } -+ -+ /* move the length along */ -+ private->pr_len += len; -+} -+ -+int -+qsnet_proc_release (struct inode *inode, struct file *file) -+{ -+ QSNET_PROC_PRIVATE *pr = (QSNET_PROC_PRIVATE *) file->private_data; -+ -+ if (pr->pr_data) -+ KMEM_FREE (pr->pr_data, pr->pr_data_len); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+EXPORT_SYMBOL(qsnet_proc_character_fill); -+EXPORT_SYMBOL(qsnet_proc_release); -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/qsnet/Makefile linux-2.6.9/drivers/net/qsnet/qsnet/Makefile ---- clean/drivers/net/qsnet/qsnet/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/qsnet/Makefile 2005-10-10 17:47:31.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/qsnet/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_QSNET) += qsnet.o -+qsnet-objs := debug.o kernel_linux.o i686_mmx.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/qsnet/Makefile.conf linux-2.6.9/drivers/net/qsnet/qsnet/Makefile.conf ---- clean/drivers/net/qsnet/qsnet/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/qsnet/Makefile.conf 2005-09-07 10:39:34.000000000 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = qsnet.o -+MODULENAME = qsnet -+KOBJFILES = debug.o kernel_linux.o i686_mmx.o -+EXPORT_KOBJS = kernel_linux.o -+CONFIG_NAME = CONFIG_QSNET -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/qsnet/qsnetkmem_linux.c linux-2.6.9/drivers/net/qsnet/qsnet/qsnetkmem_linux.c ---- clean/drivers/net/qsnet/qsnet/qsnetkmem_linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/qsnet/qsnetkmem_linux.c 2003-08-13 06:03:27.000000000 -0400 -@@ -0,0 +1,325 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: qsnetkmem_linux.c,v 1.3 2003/08/13 10:03:27 fabien Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/qsnetkmem_linux.c,v $*/ -+ -+/* macro macros */ -+#define MACRO_BEGIN do { -+#define MACRO_END } while (0) -+#define offsetof(T,F) ((int )&(((T *)0)->F)) -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define LIST_HEAD_INIT(name) { &(name), &(name) } -+ -+#define LIST_HEAD(name) \ -+ struct list_head name = LIST_HEAD_INIT(name) -+ -+typedef struct { -+ struct list_head list; -+ void *ptr; -+ int len; -+ int used_vmalloc; -+ void *owner; -+ void *caller; -+ unsigned int time; -+ int mark; -+ int line; -+ char file[256]; -+ -+} kmalloc_t; -+ -+ -+static LIST_HEAD(current_kmem); -+static LIST_HEAD(stored_kmem); -+ -+void -+count_kmem(struct list_head * list, long * count, long * size ) -+{ -+ long c,s; -+ struct list_head *tmp; -+ kmalloc_t *kmem_ptr = NULL; -+ -+ -+ c = s = 0L; -+ -+ list_for_each(tmp, list) { -+ kmem_ptr = list_entry(tmp, kmalloc_t , list); -+ c++; -+ s += kmem_ptr->len; -+ } -+ -+ *count = c; -+ *size = s; -+} -+ -+void -+clear_kmem(struct list_head * list) -+{ -+ struct list_head *tmp,*tmp2; -+ kmalloc_t *kmem_ptr = NULL; -+ -+ list_for_each_safe(tmp, tmp2, list) { -+ kmem_ptr = list_entry(tmp, kmalloc_t , list); -+ list_del_init(&kmem_ptr->list); -+ free( kmem_ptr ); -+ } -+} -+ -+void -+move_kmem(struct list_head * dest, struct list_head *src) -+{ -+ struct list_head *tmp,*tmp2; -+ kmalloc_t *kp= NULL; -+ -+ list_for_each_safe(tmp, tmp2, src) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ list_del_init(&kp->list); -+ -+/* -+ printf("mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+*/ -+ -+ list_add_tail(&kp->list, dest); -+ } -+} -+ -+void -+read_kmem(struct list_head * list) -+{ -+ FILE * fd; -+ char line[1024]; -+ int line_size = 100; -+ char * rep; -+ kmalloc_t * kp; -+ -+ clear_kmem(list); -+ -+ fd = fopen(QSNET_PROCFS_KMEM_DEBUG,"r"); -+ if ( fd == NULL) -+ { -+ printf("No Kmem Debug\n"); -+ return; -+ } -+ -+ rep = fgets(line,line_size, fd); -+ -+ while ( rep != NULL ) -+ { -+ kp = malloc(sizeof(kmalloc_t)); -+ -+ sscanf(line,"%p %d %d %p %p %u %d %s\n", -+ &kp->ptr, &kp->len, &kp->used_vmalloc, &kp->caller, &kp->owner, &kp->time, &kp->line, &kp->file[0]); -+ -+/* -+ printf(">>%s<<\n",line); -+ printf("%p %d %d %p %p %u %d %s\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->time, kp->line, kp->file); -+*/ -+ -+ list_add_tail(&kp->list, list); -+ -+ rep = fgets(line,line_size, fd); -+ } -+ fclose(fd); -+} -+ -+void -+mark_kmem(struct list_head * list, int mark) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ -+ kp->mark = mark; -+ } -+} -+ -+kmalloc_t * -+find_kmem(kmalloc_t * value, struct list_head * list) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if ( (kp->ptr == value->ptr) -+ && (kp->len == value->len) -+ && (kp->used_vmalloc == value->used_vmalloc ) -+ && (kp->owner == value->owner ) -+ && (kp->caller == value->caller ) -+ && (kp->time == value->time ) -+ && (kp->line == value->line ) -+ && !(strcmp(kp->file,value->file) )) -+ return kp; -+ } -+ return NULL; -+} -+ -+void -+diff_kmem(struct list_head *curr, struct list_head *stored) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ long c,s; -+ -+ mark_kmem(stored, 0); -+ mark_kmem(curr, 0); -+ -+ list_for_each(tmp, stored) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (find_kmem( kp, curr) != NULL) -+ kp->mark = 1; -+ } -+ -+ list_for_each(tmp, curr) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (find_kmem( kp, stored) != NULL) -+ kp->mark = 1; -+ } -+ -+ c=s=0L; -+ list_for_each(tmp, stored) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (kp->mark != 1) -+ { -+ printf("-- mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ c++; -+ s+= kp->len; -+ } -+ } -+ printf("-- %4ld %10ld \n",c,s); -+ -+ c=s=0L; -+ list_for_each(tmp, curr) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (kp->mark != 1) -+ { -+ printf("++ mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ c++; -+ s+= kp->len; -+ } -+ } -+ printf("++ %4ld %10ld \n",c,s); -+} -+ -+ -+void -+print_kmem(struct list_head * list) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ -+ printf("mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ -+ } -+} -+ -+void -+print_cmds() -+{ -+ long c,s; -+ -+ printf("q : quits \n"); -+ printf("r : read\n"); -+ printf("c : print current\n"); -+ printf("o : print stored\n"); -+ printf("s : store\n"); -+ -+ count_kmem(¤t_kmem, &c, &s ); -+ printf("\ncurrent : %4ld %10ld\n", c , s); -+ -+ count_kmem(&stored_kmem, &c, &s ); -+ printf("store : %4ld %10ld\n", c , s); -+ -+} -+ -+int -+main() -+{ -+ char line[128]; -+ int line_size=127; -+ int len; -+ -+ -+ while (1) -+ { -+ -+ printf(">> "); -+ fgets(line,line_size, stdin); -+ -+ -+ len = strlen( line ) -1; -+ if ( len ) -+ { -+ switch ( tolower(line[0]) ) -+ { -+ case 'q': -+ exit(0); -+ -+ case 'r' : -+ read_kmem(¤t_kmem); -+ break; -+ -+ case 'c' : -+ print_kmem(¤t_kmem); -+ break; -+ -+ case 'o' : -+ print_kmem(&stored_kmem); -+ break; -+ -+ case 's' : -+ clear_kmem(&stored_kmem); -+ move_kmem(&stored_kmem, ¤t_kmem); -+ break; -+ -+ case 'd' : -+ diff_kmem(¤t_kmem, &stored_kmem); -+ break; -+ -+ default: -+ print_cmds(); -+ } -+ -+ -+ -+ } -+ else -+ print_cmds(); -+ } -+ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/drivers/net/qsnet/qsnet/quadrics_version.h linux-2.6.9/drivers/net/qsnet/qsnet/quadrics_version.h ---- clean/drivers/net/qsnet/qsnet/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/qsnet/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/rms/Makefile linux-2.6.9/drivers/net/qsnet/rms/Makefile ---- clean/drivers/net/qsnet/rms/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/rms/Makefile 2005-10-10 17:47:31.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/rms/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_RMS) += rms.o -+rms-objs := rms_kern.o rms_kern_Linux.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -diff -urN clean/drivers/net/qsnet/rms/Makefile.conf linux-2.6.9/drivers/net/qsnet/rms/Makefile.conf ---- clean/drivers/net/qsnet/rms/Makefile.conf 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/rms/Makefile.conf 2005-09-07 10:39:48.000000000 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = rms.o -+MODULENAME = rms -+KOBJFILES = rms_kern.o rms_kern_Linux.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_RMS -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -diff -urN clean/drivers/net/qsnet/rms/quadrics_version.h linux-2.6.9/drivers/net/qsnet/rms/quadrics_version.h ---- clean/drivers/net/qsnet/rms/quadrics_version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/rms/quadrics_version.h 2005-09-07 10:39:49.000000000 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "5.11.3qsnet" -diff -urN clean/drivers/net/qsnet/rms/rms_kern.c linux-2.6.9/drivers/net/qsnet/rms/rms_kern.c ---- clean/drivers/net/qsnet/rms/rms_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/rms/rms_kern.c 2005-09-07 10:35:04.000000000 -0400 -@@ -0,0 +1,1484 @@ -+/* -+ * Copyright (c) 1996-2003 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2004-2006 by Quadrics Ltd -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * rms_kern.c: RMS kernel module -+ * -+ * $Source: /cvs/master/quadrics/rmsmod/rms_kern.c,v $ -+ */ -+ -+#ident "@(#)$Id: rms_kern.c,v 1.77.2.8 2005/09/07 14:35:04 mike Exp $" -+ -+#include -+#include -+#include -+#include -+ -+ -+/* -+ * extended stats added in version 5 -+ * patch free kernel support added in version 6 -+ */ -+#define RMS_MODVERSION 6 -+ -+#ifdef PROCESS_ACCT -+#define TIMEVAL_TO_MSEC(tv) ((tv)->tv_sec * 1000 + (tv)->tv_usec / 1000) -+#define TIMEVAL_TO_CT(tv) ((tv)->tv_sec * HZ + (tv)->tv_usec / (1000000L / HZ)) -+#endif -+ -+#ifdef get_mm_counter -+#define PROC_RSS(proc) ((proc)->mm ? get_mm_counter(proc->mm, rss) : 0) -+#else -+#ifdef RSS_ATOMIC -+#define PROC_RSS(proc) ((proc)->mm ? atomic_read(&(proc)->mm->rss) : 0) -+#else -+#define PROC_RSS(proc) ((proc)->mm ? (proc)->mm->rss : 0) -+#endif -+#endif -+ -+/* -+ * 2.6 kernels don't consistently export put_task_struct -+ */ -+ -+#ifdef free_task_struct -+#undef NO_PUTGET_TASK -+#else -+#define NO_PUTGET_TASK -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+# define RMS_NCPUS() smp_num_cpus -+#else -+# define RMS_NCPUS() num_online_cpus() -+#endif -+ -+#define CURUID() CURPROC()->uid -+#define p_pid pid -+ -+ -+/* care needed with conversion to millisecs on 32-bit Linux */ -+#ifdef LINUX_I386 -+#define CT_TO_MSEC(x) ct_to_msec(x) -+ -+uint64_t ct_to_msec(clock_t t) -+{ -+ uint64_t msecs; -+ if (t < 2000000) -+ { -+ t = (1000 * t)/HZ; -+ msecs = t; -+ } -+ else -+ { -+ t = t / HZ; -+ msecs = t * 1000; -+ } -+ return(msecs); -+} -+#else -+#define CT_TO_MSEC(x) (((x) * 1000)/HZ) -+#endif -+ -+#ifndef FALSE -+#define FALSE (0) -+#define TRUE (!FALSE) -+#endif -+ -+#include -+#include -+#ifndef NO_PTRACK -+#include -+#endif -+#include -+#ifndef NO_SHM_CLEANUP -+extern int shm_cleanup(void); -+#endif -+ -+struct cap_desc { -+ -+ struct cap_desc *next; -+ int index; /* index of capability in program */ -+ ELAN_CAPABILITY cap; /* elan capability */ -+ -+}; -+ -+struct proc_desc { -+ -+ struct proc_desc *next; -+ struct task_struct *task; -+ struct prg_desc *program; /* controlling program */ -+ int mycap; /* index of my capability */ -+ int myctx; /* context number for process */ -+ int flags; -+ int vp; /* elan virtual process number */ -+ int pid; /* process id */ -+ -+ /* last set of stats sampled */ -+ uint64_t utime; -+ uint64_t stime; -+ uint64_t majflt; -+ int maxrss; -+ -+}; -+ -+struct prg_desc { -+ -+ struct prg_desc *next; -+ int id; /* program id */ -+ int flags; /* program status flags */ -+ uid_t uid; /* user id */ -+ int ncpus; /* number of cpus allocated to program */ -+ int nprocs; /* number of processes in program */ -+ struct proc_desc *pdescs; /* processes in this program */ -+ int ncaps; /* number of capabilities */ -+ struct cap_desc *caps; /* elan capabilities */ -+ char *corepath; /* core path for parallel program */ -+ int psid; /* processor set id */ -+ -+ uint64_t cutime; /* user time accumulated by children */ -+ uint64_t cstime; /* system time accumulated by children */ -+ uint64_t start_time; /* time program created */ -+ uint64_t end_time; /* time last process exited */ -+ uint64_t sched_time; /* last time job was scheduled */ -+ uint64_t accum_atime; /* allocated time last deschedule */ -+ uint64_t memint; /* accumulated memory integral */ -+ uint64_t ebytes; /* data transferred by the Elan(s) */ -+ uint64_t exfers; /* number of Elan data transfers */ -+ uint64_t majflt; /* number of major faults */ -+ int maxrss; /* maximum size to date */ -+ -+ struct proc_dir_entry *proc_entry; -+ -+}; -+ -+static int rms_ptrack_callback (void *arg, int phase, struct task_struct *child); -+ -+static void prgsignal(struct prg_desc *program, int signo); -+static uint64_t gettime(void); -+static void freeProgram(struct prg_desc *program); -+ -+static struct prg_desc *programs = 0; -+ -+kmutex_t rms_lock; -+ -+#ifdef NO_PTRACK -+int ptrack_enabled = 0; -+#else -+int ptrack_enabled = 1; -+#endif -+ -+int rms_init(void) -+{ -+ kmutex_init (&rms_lock); -+ -+ DBG(printk("rms: initialising ptrack %d\n", ptrack_enabled)); -+ -+ return(ESUCCESS); -+} -+ -+int rms_reconfigure(void) -+{ -+ return(ESUCCESS); -+} -+ -+int rms_programs_registered(void) -+{ -+ /* -+ ** Called when trying to unload rms.mod will not succeed -+ ** if programs registered -+ */ -+ -+ struct prg_desc *program, **pp; -+ -+ kmutex_lock(&rms_lock); -+ -+ for (program = programs; program; program = program->next) -+ { -+ if (program->nprocs != 0) -+ { -+ kmutex_unlock(&rms_lock); -+ return(EBUSY); -+ } -+ } -+ -+ /* -+ ** We have traversed the programs list and no processes registered -+ ** Now free the memory -+ */ -+ -+ pp = &programs; -+ while ((program = *pp) != NULL) -+ { -+ *pp = program->next; -+ freeProgram(program); -+ } -+ kmutex_unlock(&rms_lock); -+ -+ return(ESUCCESS); -+ -+} -+ -+int rms_fini(void) -+{ -+ /* -+ * don't allow an unload if there are programs registered -+ */ -+ if (rms_programs_registered()) -+ return(EBUSY); -+ -+ kmutex_destroy (&rms_lock); -+ -+ DBG(printk("rms: removed\n")); -+ -+ return(ESUCCESS); -+} -+ -+extern struct proc_dir_entry *rms_procfs_programs; -+ -+/* -+ * display one pid per line if there isn't enough space -+ * for another pid then add "...\n" and stop -+ */ -+int pids_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ struct prg_desc *program = (struct prg_desc *)data; -+ struct proc_desc *pdesc; -+ char *ptr = page; -+ int bytes = 0, nb; -+ -+ kmutex_lock(&rms_lock); -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ if (bytes > count - 15) -+ { -+ bytes += sprintf(ptr,"...\n"); -+ break; -+ } -+ nb = sprintf(ptr, "%d %d\n", pdesc->pid, pdesc->vp); -+ bytes += nb; -+ ptr += nb; -+ } -+ kmutex_unlock(&rms_lock); -+ -+ return(bytes); -+} -+ -+int status_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ struct prg_desc *program = (struct prg_desc *)data; -+ int bytes; -+ if (program->flags & PRG_KILLED) -+ bytes = sprintf(page, "killed\n"); -+ else if (program->flags & PRG_SUSPEND) -+ bytes = sprintf(page, "suspended\n"); -+ else -+ bytes = sprintf(page, "running\n"); -+ return(bytes); -+} -+ -+void rms_create_proc_entry(struct prg_desc *program) -+{ -+ struct proc_dir_entry *p; -+ char name[32]; -+ -+ if (rms_procfs_programs) -+ { -+ sprintf(name,"%d", program->id); -+ if ((program->proc_entry = proc_mkdir(name, rms_procfs_programs)) != NULL) -+ { -+ if ((p = create_proc_entry ("pids", S_IRUGO, program->proc_entry)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = program; -+ p->read_proc = pids_callback; -+ } -+ if ((p = create_proc_entry ("status", S_IRUGO, program->proc_entry)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = program; -+ p->read_proc = status_callback; -+ } -+ } -+ } -+} -+ -+void rms_remove_proc_entry(struct prg_desc *program) -+{ -+ char name[32]; -+ if (rms_procfs_programs) -+ { -+ if (program->proc_entry) -+ { -+ remove_proc_entry ("pids", program->proc_entry); -+ remove_proc_entry ("status", program->proc_entry); -+ } -+ sprintf(name,"%d", program->id); -+ remove_proc_entry (name, rms_procfs_programs); -+ } -+} -+ -+/* -+ * find a program from its index/pid -+ */ -+static struct prg_desc *findProgram(const int id) -+{ -+ struct prg_desc *program; -+ for (program = programs; program; program = program->next) -+ if (program->id == id) -+ return(program); -+ return(0); -+} -+ -+static struct proc_desc *findProcess(const int pid) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ -+ for (program = programs; program; program = program->next) -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ if (pdesc->pid == pid) -+ return(pdesc); -+ -+ return(0); -+} -+ -+static void freeProgram(struct prg_desc *program) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ -+ rms_remove_proc_entry(program); -+ -+ while ((pdesc = program->pdescs) != NULL) -+ { -+ program->pdescs = pdesc->next; -+ KMEM_FREE(pdesc, sizeof(struct proc_desc)); -+ } -+ -+ while ((cdesc = program->caps) != NULL) -+ { -+ program->caps = cdesc->next; -+ KMEM_FREE(cdesc, sizeof(struct cap_desc)); -+ } -+ -+ if (program->corepath) -+ KMEM_FREE(program->corepath, MAXCOREPATHLEN + 1); -+ -+ KMEM_FREE(program, sizeof(struct prg_desc)); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+/* -+ * rms_prgcreate -+ * -+ * create a new program description -+ */ -+int rms_prgcreate(int id, uid_t uid, int cpus) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ -+ DBG(printk("rms_prgcreate :: program %d pid %d uid %d cpus %d\n", id, CURPROC()->p_pid, uid, cpus)); -+ -+ /* -+ * parallel programs are created as root by the rmsd as it forks the loader -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ /* -+ * program ids must be unique -+ */ -+ kmutex_lock(&rms_lock); -+ program = findProgram(id); -+ kmutex_unlock(&rms_lock); -+ if (program) -+ return(EINVAL); -+ -+ /* -+ * create a new program description -+ */ -+ KMEM_ALLOC(program, struct prg_desc *, sizeof(struct prg_desc), TRUE); -+ if (!program) -+ return(ENOMEM); -+ -+ program->id = id; -+ program->flags = PRG_RUNNING; -+ program->ncpus = cpus; -+ program->nprocs = 1; -+ program->uid = uid; -+ program->ncaps = 0; -+ program->caps = 0; -+ program->corepath = 0; -+ program->psid = 0; -+ program->start_time = program->sched_time = gettime(); -+ program->end_time = 0; -+ program->accum_atime = 0; -+ program->cutime = 0; -+ program->cstime = 0; -+ program->maxrss = 0; -+ program->memint = 0; -+ program->majflt = 0; -+ program->ebytes = 0; -+ program->exfers = 0; -+ -+ KMEM_ALLOC(pdesc, struct proc_desc *, sizeof(struct proc_desc), TRUE); -+ if (!pdesc) -+ { -+ KMEM_FREE (program,sizeof (struct prg_desc)); -+ return(ENOMEM); -+ } -+ -+ /* bump the reference count on the caller */ -+ pdesc->task = ptrack_enabled ? CURPROC() : NULL; -+ -+ pdesc->next = 0; -+ pdesc->mycap = ELAN_CAP_UNINITIALISED; -+ pdesc->myctx = ELAN_CAP_UNINITIALISED; -+ pdesc->vp = -1; /* rmsloader */ -+ pdesc->program = program; -+ pdesc->pid = CURPROC()->p_pid; -+ program->pdescs = pdesc; -+ -+ rms_create_proc_entry(program); -+ -+ kmutex_lock(&rms_lock); -+ -+#ifndef NO_PTRACK -+ -+ if (ptrack_enabled) -+ { -+ DBG(printk("rms_prgcreate :: ptrack enabled - fork callback\n")); -+ if (ptrack_register (rms_ptrack_callback, NULL) != 0) -+ { -+ kmutex_unlock(&rms_lock); -+ KMEM_FREE(pdesc,sizeof(struct proc_desc)); -+ KMEM_FREE(program,sizeof(struct prg_desc)); -+ return(ENOMEM); -+ } -+ } -+ else -+ { -+ DBG(printk("rms_prgcreate :: ptrack disabled - no fork callback\n")); -+ } -+#endif -+ -+ program->next = programs; -+ programs = program; -+ -+ MOD_INC_USE_COUNT; -+ -+ kmutex_unlock(&rms_lock); -+ return(ESUCCESS); -+} -+ -+ -+/* -+ * can't rely on put_task_struct being exported -+ * so we need to make sure that a proc is valid -+ * before extracting the stats -+ */ -+ -+int getProcessStats(struct proc_desc *pdesc) -+{ -+ struct task_struct *task = 0; -+ -+ if (ptrack_enabled) -+ task = pdesc->task; -+ else -+ { -+ read_lock(&tasklist_lock); -+ task = find_task_by_pid(pdesc->pid); -+ } -+ -+ if (task) -+ { -+#ifdef PROCESS_ACCT -+ pdesc->utime = TIMEVAL_TO_MSEC(&task->utime); -+ pdesc->stime = TIMEVAL_TO_MSEC(&task->stime); -+ -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ pdesc->utime = CT_TO_MSEC(task->times.tms_utime); -+ pdesc->stime = CT_TO_MSEC(task->times.tms_stime); -+ -+#else -+ pdesc->utime = CT_TO_MSEC(task->utime); -+ pdesc->stime = CT_TO_MSEC(task->stime); -+#endif -+ pdesc->majflt = task->maj_flt; -+ -+ /* -+ * the ptrack exit callbacks occur before exit_mm -+ * but if ptrack is not present we can get called -+ * with task->mm = 0 -+ */ -+ pdesc->maxrss = PROC_RSS(task) >> (20 - PAGE_SHIFT); -+ } -+ -+ if (!ptrack_enabled) -+ read_unlock(&tasklist_lock); -+ -+ return(task ? 0 : -1); -+} -+ -+ -+ -+ -+/* -+ * remove a process descriptor keeping track of the -+ * accumulated resource usage -+ */ -+ -+static void removeProcDesc(struct prg_desc *program, struct proc_desc *pdesc) -+{ -+#ifndef NO_PTRACK -+ struct proc_desc *p; -+#endif -+ int maxrss; -+ -+ /* -+ * keep track of the resources used by processes that have -+ * exited, if ptrack is enabled then we will be called -+ * as the process exists, otherwise we will have the last -+ * sample -+ */ -+ getProcessStats(pdesc); -+ -+ program->cutime += pdesc->utime; -+ program->cstime += pdesc->stime; -+ program->majflt += pdesc->majflt; -+ maxrss = pdesc->maxrss; -+ -+ /* -+ * process specific shared memory cleanup requires the shm_cleanup -+ * patch, otherwise the run time system is left to do the job with -+ * a blunt axe -+ */ -+#ifndef NO_SHM_CLEANUP -+ shm_cleanup(); -+#endif -+ -+ /* -+ * remove process from program -+ */ -+ KMEM_FREE(pdesc, sizeof(struct proc_desc)); -+ program->nprocs--; -+ -+ /* -+ * update the memory high water mark for the program -+ * -+ * safe to access the task structures if we have incremented -+ * their reference counts as they were added to the program -+ * however, the mm can be zero -+ */ -+#ifndef NO_PTRACK -+ for (p = program->pdescs; p; p = p->next) -+ maxrss += PROC_RSS(p->task) >> (20 - PAGE_SHIFT); -+ -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+#endif -+ -+ DBG(printk("rms_removproc :: program %d procs %d cutime %lld cstime %lld mem %d\n", -+ program->id, program->nprocs, -+ (long long)program->cutime, (long long)program->cstime, -+ program->maxrss)); -+ -+ /* -+ * final update to the program if this is the last process -+ */ -+ if (program->nprocs == 0) -+ { -+ program->end_time = gettime(); -+ program->flags &= ~PRG_RUNNING; -+ program->accum_atime += program->ncpus * (program->end_time - program->sched_time); -+ DBG(printk("rms_removproc :: last process has gone\n")); -+ } -+} -+ -+ -+/* -+ * rms_prgdestroy -+ * -+ * destroy a program description -+ */ -+int rms_prgdestroy(int id) -+{ -+ struct prg_desc *program, **pp; -+ struct proc_desc *pdesc; -+ -+ int status = ESRCH; -+ -+ /* -+ * parallel programs are created and destroyed by the rmsd -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ kmutex_lock(&rms_lock); -+ -+ pp = &programs; -+ while ((program = *pp) != NULL) -+ { -+ if (program->id == id) -+ { -+ /* -+ * with ptrack disabled there won't be any exit callbacks -+ */ -+ if (!ptrack_enabled) -+ { -+ while ((pdesc = program->pdescs)) -+ { -+ program->pdescs = pdesc->next; -+ removeProcDesc(program, pdesc); -+ } -+ } -+ -+ if (program->nprocs == 0) -+ { -+ DBG(printk("rms_prgdestro :: removing program %d\n", program->id)); -+ *pp = program->next; -+ freeProgram(program); -+ status = ESUCCESS; -+ } -+ else -+ { -+ DBG(printk("rms_prgdestro :: failed to remove program %d: %d\n", program->id, program->nprocs)); -+ status = ECHILD; -+ pp = &program->next; -+ } -+ } -+ else -+ pp = &program->next; -+ } -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+/* -+ * rms_prgids -+ */ -+int rms_prgids(int maxids, int *prgids, int *nprgs) -+{ -+ struct prg_desc *program; -+ int count = 0, *buf, *bufp; -+ int status = ESUCCESS; -+ -+ if (maxids < 1) -+ return(EINVAL); -+ -+ kmutex_lock(&rms_lock); -+ -+ for (program = programs; program; program = program->next) -+ count++; -+ count = MIN(count, maxids); -+ -+ if (count > 0) -+ { -+ KMEM_ALLOC(buf, int *, count * sizeof(int), TRUE); -+ if (buf) -+ { -+ for (program = programs, bufp=buf; bufp < buf + count; -+ program = program->next) -+ *bufp++ = program->id; -+ -+ if (copyout(buf, prgids, sizeof(int) * count)) -+ status = EFAULT; -+ -+ KMEM_FREE(buf, count * sizeof(int)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&count, nprgs, sizeof(int))) -+ status = EFAULT; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+/* -+ * rms_prginfo -+ */ -+int rms_prginfo(int id, int maxpids, pid_t *pids, int *nprocs) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ pid_t *pidp, *buf; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (program->nprocs > 0) -+ { -+ KMEM_ALLOC(buf, pid_t *, program->nprocs * sizeof(pid_t), TRUE); -+ if (buf) -+ { -+ for (pidp = buf, pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ *pidp++ = pdesc->pid; -+ -+ if (copyout(buf, pids, sizeof(pid_t) * MIN(program->nprocs, maxpids))) -+ status = EFAULT; -+ -+ KMEM_FREE(buf, program->nprocs * sizeof(pid_t)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&program->nprocs, nprocs, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+/* -+ * Deliver a signal to all the processes in a program -+ */ -+static void prgsignal(struct prg_desc *program, int signo) -+{ -+ struct proc_desc *pdesc; -+ DBG(printk("rms_prgsignal :: program %d signal %d\n", program->id, signo)); -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ kill_proc(pdesc->pid, signo, 1); -+} -+ -+int rms_prgsignal(int id, int signo) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ prgsignal(program, signo); -+ if (signo == SIGKILL) -+ program->flags |= PRG_KILLED; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+int rms_prgaddcap(int id, int index, ELAN_CAPABILITY *cap) -+{ -+ struct prg_desc *program; -+ struct cap_desc *cdesc; -+ int status = ESUCCESS; -+ -+ if (cap == NULL) -+ return(EINVAL); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ KMEM_ALLOC(cdesc, struct cap_desc *, sizeof(struct cap_desc), TRUE); -+ if (cdesc) -+ { -+ cdesc->index = index; -+ if (copyin(cap, &cdesc->cap, sizeof(ELAN_CAPABILITY))) -+ { -+ KMEM_FREE(cdesc, sizeof(struct cap_desc)); -+ status = EFAULT; -+ } -+ else -+ { -+ DBG(printk("rms_prgaddcap :: program %d index %d context %d<-->%d\n", -+ program->id, index, cdesc->cap.cap_lowcontext, cdesc->cap.cap_highcontext)); -+ cdesc->next = program->caps; -+ program->caps = cdesc; -+ program->ncaps++; -+ } -+ } -+ else -+ status = ENOMEM; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+static uint64_t gettime(void) -+{ -+ uint64_t now; -+ -+ struct timeval tv; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) -+ get_fast_time(&tv); -+#else -+ do_gettimeofday(&tv); -+#endif -+ now = tv.tv_sec * 1000 + tv.tv_usec / 1000; -+ return(now); -+} -+ -+ -+/* -+ * new stats collection interface, 64-bit with addition of Elan stats -+ */ -+int rms_prggetstats(int id, prgstats_t *stats) -+{ -+ struct prg_desc *program = 0; -+ struct proc_desc *pdesc; -+ -+ int maxrss, status = ESUCCESS; -+ prgstats_t totals; -+ uint64_t now = gettime(); -+ -+ kmutex_lock(&rms_lock); -+ -+ if (id < 0) -+ { -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ program = pdesc->program; -+ } -+ else -+ program = findProgram(id); -+ -+ if (program) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ totals.flags = program->flags; -+ totals.ncpus = program->ncpus; -+ maxrss = 0; -+ -+ if (program->nprocs > 0) -+ totals.etime = now - program->start_time; -+ else -+ totals.etime = program->end_time - program->start_time; -+ -+ totals.atime = program->accum_atime; -+ if (program->flags & PRG_RUNNING) -+ totals.atime += program->ncpus * (now - program->sched_time); -+ -+ totals.utime = program->cutime; -+ totals.stime = program->cstime; -+ totals.pageflts = program->majflt; -+ totals.memint = program->memint; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ getProcessStats(pdesc); -+ totals.utime += pdesc->utime; -+ totals.stime += pdesc->stime; -+ totals.pageflts += pdesc->majflt; -+ maxrss += pdesc->maxrss; -+ } -+ -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+ -+ totals.mem = program->maxrss; -+ totals.ebytes = program->ebytes; -+ totals.exfers = program->exfers; -+ -+ DBG(printk("rms_prggetsta :: program %d utime %lld stime %lld mem %d flt %lld\n", -+ program->id, (long long)totals.utime, (long long)totals.stime, -+ totals.mem, (long long)totals.pageflts)); -+ -+ if (copyout(&totals, stats, sizeof(prgstats_t))) -+ status = EFAULT; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_prgsuspend(int id) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->flags &= ~PRG_RUNNING; -+ program->flags |= PRG_SUSPEND; -+ program->accum_atime += program->ncpus * (gettime() - program->sched_time); -+ -+ /* suspend/resume just use signals for now */ -+ prgsignal(program, SIGSTOP); -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_prgresume(int id) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->flags &= ~PRG_SUSPEND; -+ program->flags |= PRG_RUNNING; -+ program->sched_time = gettime(); -+ prgsignal(program, SIGCONT); -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_ncaps(int *ncaps) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ if (copyout(&pdesc->program->ncaps, ncaps, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_getprgid(pid_t pid, int *id) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ if (pid == 0) -+ pid = CURPROC()->p_pid; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(pid)) != NULL) -+ { -+ if (copyout(&pdesc->program->id, id, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_setcap(int index, int ctx) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ int status = EINVAL; -+ -+ DBG(printk("rms_setcap :: process %d cap %d ctx %d\n",CURPROC()->p_pid,index,ctx)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ for (cdesc = pdesc->program->caps; cdesc; cdesc = cdesc->next) -+ if (cdesc->index == index && 0 <= ctx && ctx <= (cdesc->cap.cap_highcontext - cdesc->cap.cap_lowcontext + 1)) -+ { -+ pdesc->mycap = index; -+ pdesc->myctx = cdesc->cap.cap_lowcontext + ctx; -+ status = ESUCCESS; -+ } -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_mycap(int *index) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ DBG(printk("rms_mycap :: process %d\n", CURPROC()->p_pid)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ DBG(printk("rms_mycap :: found process %d mycap = %d\n", CURPROC()->p_pid, pdesc->mycap)); -+ if (copyout(&pdesc->mycap, index, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_getcap(int index, ELAN_CAPABILITY *cap) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ for (cdesc = pdesc->program->caps; cdesc; cdesc = cdesc->next) -+ if (cdesc->index == index) -+ break; -+ -+ if (cdesc) -+ { -+ /* tell each process about its own context */ -+ cdesc->cap.cap_mycontext = pdesc->myctx; -+ -+ if (copyout(&cdesc->cap, cap, ELAN_CAP_SIZE(&cdesc->cap))) -+ status = EFAULT; -+ -+ DBG(printk("rms_getcap :: program %d index %d context %d<-->%d\n", pdesc->program->id, -+ cdesc->index, cdesc->cap.cap_lowcontext, cdesc->cap.cap_highcontext)); -+ } -+ else -+ status = EINVAL; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+static void -+addProcDesc (struct proc_desc *pdesc, -+ struct prg_desc *program, -+ struct proc_desc *parent, -+ int pid) -+{ -+ pdesc->mycap = (parent ? parent->mycap : ELAN_CAP_UNINITIALISED); -+ pdesc->myctx = (parent ? parent->myctx : ELAN_CAP_UNINITIALISED); -+ pdesc->program = program; -+ pdesc->vp = -1; /* assigned by elaninitdone */ -+ pdesc->pid = pid; -+ -+ pdesc->next = program->pdescs; -+ program->pdescs = pdesc; -+ program->nprocs++; -+} -+ -+static int -+rms_fork_callback (struct task_struct *curproc, struct task_struct *child) -+{ -+ struct prg_desc *program; -+ struct proc_desc *parent; -+ struct proc_desc *pdesc = NULL; -+ -+ kmutex_lock(&rms_lock); -+ -+ DBG(printk("rms_fork_func :: phase is fork pid %d child %d\n", curproc->p_pid, child->p_pid)); -+ -+ /* -+ * find the process that forked -+ */ -+ if ((parent = findProcess(curproc->p_pid)) != NULL) -+ { -+ program = parent->program; -+ -+ DBG(printk("rms_fork_func :: program is %d flags %d\n", program->id, program->flags)); -+ -+ /* -+ * processes can be blocked in fork while prgsignal is in progress -+ * so check to see if the PRG_KILLED flag is set -+ */ -+ if (program->flags & PRG_KILLED) -+ DBG(printk("rms_fork_func :: fork handler called after program killed\n")); -+ else -+ { -+ /* -+ * create a new process description and add to program -+ */ -+ KMEM_ALLOC(pdesc, struct proc_desc *, sizeof(struct proc_desc), TRUE); -+ if (pdesc == NULL) -+ printk("rms_creatproc :: memory allocation failed\n"); -+ else -+ { -+ addProcDesc (pdesc, program, parent, child->p_pid); -+ -+ pdesc->task = child; -+ -+ } -+ } -+ } -+ else -+ DBG(printk("rms_fork_func :: no program\n")); -+ -+ kmutex_unlock (&rms_lock); -+ -+ return(pdesc == NULL); -+} -+ -+static void -+rms_exit_callback (struct task_struct *curproc) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc, **pdescp; -+ -+ kmutex_lock(&rms_lock); -+ -+ DBG(printk("rms_exit_func :: process %d exiting\n", curproc->p_pid)); -+ -+ /* -+ * find the process that exited and accumulate -+ * resource usage in its parent program -+ */ -+ for (program = programs, pdesc = 0; program && !pdesc; program = program->next) -+ { -+ pdescp = &program->pdescs; -+ while ((pdesc = *pdescp) != NULL) -+ { -+ if (pdesc->pid == curproc->p_pid) -+ { -+ *pdescp = pdesc->next; -+ removeProcDesc(program, pdesc); -+ break; -+ } -+ else -+ pdescp = &pdesc->next; -+ } -+ } -+ kmutex_unlock (&rms_lock); -+} -+ -+#ifndef NO_PTRACK -+ -+static int -+rms_ptrack_callback (void *arg, int phase, struct task_struct *child) -+{ -+ switch (phase) -+ { -+ case PTRACK_PHASE_CLONE: -+ if (rms_fork_callback (current, child)) -+ return PTRACK_DENIED; -+ else -+ return PTRACK_INNHERIT; -+ -+ case PTRACK_PHASE_CLONE_FAIL: -+ DBG(printk("rms_fork_func :: fork failed pid %d child %d\n", current->p_pid, child->p_pid)); -+ rms_exit_callback(child); -+ break; -+ -+ case PTRACK_PHASE_EXIT: -+ rms_exit_callback(current); -+ break; -+ } -+ return PTRACK_FINISHED; -+} -+ -+#endif -+ -+/* -+ * rms_elaninitdone - mark a process as having successfully completed elan initialisation -+ */ -+int rms_elaninitdone(int vp) -+{ -+ int status = ESUCCESS; -+ struct proc_desc *pdesc; -+ -+ DBG(printk("rms_elaninit :: process %d vp %d\n", CURPROC()->p_pid, vp)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ pdesc->vp = vp; -+ else -+ status = ESRCH; -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+/* -+ * rms_prgelanpids - return the ids of processes that have completed elan initialisation -+ */ -+int rms_prgelanpids(int id, int maxpids, int *vps, pid_t *pids, int *npids) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ pid_t *pidbuf; -+ int status = ESUCCESS, count = 0, *vpbuf; -+ -+ DBG(printk("rms_elanpids :: process %d id %d\n", CURPROC()->p_pid, id)); -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (program->nprocs > 0) -+ { -+ KMEM_ALLOC(pidbuf, pid_t *, program->nprocs * sizeof(pid_t), TRUE); -+ KMEM_ALLOC(vpbuf, int *, program->nprocs * sizeof(int), TRUE); -+ if (pidbuf && vpbuf) -+ { -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ if (pdesc->vp >= 0) -+ { -+ pidbuf[count] = pdesc->pid; -+ vpbuf[count] = pdesc->vp; -+ count++; -+ } -+ -+ if (count > 0 && (copyout(pidbuf, pids, sizeof(pid_t) * MIN(count, maxpids)) || -+ copyout(vpbuf, vps, sizeof(int) * MIN(count, maxpids)))) -+ status = EFAULT; -+ -+ KMEM_FREE(pidbuf, program->nprocs * sizeof(pid_t)); -+ KMEM_FREE(vpbuf, program->nprocs * sizeof(int)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&count, npids, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+ -+} -+ -+int rms_setpset(int psid) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ if (CURUID()) -+ return(EACCES); -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ program = pdesc->program; -+ program->psid = psid; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_getpset(int id, int *psid) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (copyout(&program->psid, psid, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int -+rms_setelanstats(int id, uint64_t ebytes, uint64_t exfers) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ DBG(printk("rms_setelanst :: process %d id %d\n", CURPROC()->p_pid, id)); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->ebytes = ebytes; -+ program->exfers = exfers; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int -+rms_modversion(void) -+{ -+ return(RMS_MODVERSION); -+} -+ -+int -+rms_addproc(int id, pid_t pid) -+{ -+ struct prg_desc *program; -+ struct task_struct *task; -+ struct proc_desc *parent; -+ struct proc_desc *pdesc; -+ int status; -+ -+ DBG(printk("rms_addproc :: program %d proc %d pid %d\n", id, CURPROC()->p_pid, pid)); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ if (findProcess(pid)) -+ status = ESRCH; -+ else -+ { -+ KMEM_ALLOC(pdesc, struct proc_desc *, sizeof(struct proc_desc), TRUE); -+ -+ if (pdesc == NULL) -+ status = ENOMEM; -+ else -+ { -+ read_lock(&tasklist_lock); -+ -+ if ((task = find_task_by_pid(pid)) == NULL) -+ status = ESRCH; -+ else -+ { -+#ifdef NO_NPTL -+ pid_t ppid = task->p_pptr->pid; -+#else -+ pid_t ppid = task->parent->pid; -+#endif -+ for (parent = program->pdescs; parent; parent = parent->next) -+ if (parent->pid == ppid) -+ break; -+ -+ addProcDesc (pdesc, program, parent, pid); -+ status = ESUCCESS; -+ } -+ -+ read_unlock (&tasklist_lock); -+ -+ if (status != ESUCCESS) -+ KMEM_FREE (pdesc, sizeof (struct proc_desc)); -+ } -+ } -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int -+rms_removeproc(int id, pid_t pid) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc, **pdescp; -+ int status; -+ -+ DBG(printk("rms_removproc :: program %d proc %d pid %d\n", id, CURPROC()->p_pid, pid)); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ status = ESRCH; -+ pdescp = &program->pdescs; -+ while ((pdesc = *pdescp) != NULL) -+ { -+ if (pdesc->pid == pid) -+ { -+ -+ *pdescp = pdesc->next; -+ removeProcDesc(program, pdesc); -+ status = ESUCCESS; -+ break; -+ } -+ else -+ pdescp = &pdesc->next; -+ } -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -+ -+ -+ -+ -+ -+ -diff -urN clean/drivers/net/qsnet/rms/rms_kern_Linux.c linux-2.6.9/drivers/net/qsnet/rms/rms_kern_Linux.c ---- clean/drivers/net/qsnet/rms/rms_kern_Linux.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/drivers/net/qsnet/rms/rms_kern_Linux.c 2005-09-07 10:35:04.000000000 -0400 -@@ -0,0 +1,489 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: rms_kern_Linux.c,v 1.25.2.3 2005/09/07 14:35:04 mike Exp $" -+/* $Source: /cvs/master/quadrics/rmsmod/rms_kern_Linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#ifndef NO_PTRACK -+#include -+#endif -+ -+#include -+#include -+ -+extern int ptrack_enabled; -+ -+MODULE_AUTHOR("Quadrics Ltd"); -+MODULE_DESCRIPTION("RMS support module"); -+MODULE_LICENSE("GPL"); -+ -+#ifndef NO_PTRACK -+module_param(ptrack_enabled, uint, 0); -+#endif -+ -+int rms_debug = 0; -+ -+ctl_table rms_table[] = { -+ { -+ .ctl_name = 1, -+ .procname = "rms_debug", -+ .data = &rms_debug, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .child = NULL, -+ .proc_handler = &proc_dointvec, -+ }, -+ {0} -+}; -+ -+ctl_table rms_root_table[] = { -+ { -+ .ctl_name = CTL_DEBUG, -+ .procname = "rms", -+ .data = NULL, -+ .maxlen = 0, -+ .mode = 0555, -+ .child = rms_table, -+ }, -+ {0} -+}; -+ -+static struct ctl_table_header *rms_sysctl_header; -+ -+static int rms_open (struct inode *ino, struct file *fp); -+static int rms_release (struct inode *ino, struct file *fp); -+static int rms_ioctl (struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+rms_ioctl32_cmds[] = -+{ -+ RMSIO_GETPRGID32, -+ RMSIO_GETCAP32 -+}; -+ -+static int rms_ioctl32 (unsigned int fd, unsigned int cmd, -+ unsigned long arg, struct file *file); -+#endif -+ -+static struct file_operations rms_fops = -+{ -+ .owner = THIS_MODULE, -+ .ioctl = rms_ioctl, -+ .open = rms_open, -+ .release = rms_release, -+}; -+ -+struct proc_dir_entry *rms_procfs_programs; -+static struct proc_dir_entry *rms_procfs_root; -+ -+int version_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ return(sprintf(page, "$Id: rms_kern_Linux.c,v 1.25.2.3 2005/09/07 14:35:04 mike Exp $\n")); -+} -+ -+int ptrack_callback(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ if (ptrack_enabled) -+ return(sprintf(page, "enabled\n")); -+ else -+ return(sprintf(page, "disabled\n")); -+} -+ -+ -+static int __init rms_start(void) -+{ -+ struct proc_dir_entry *p; -+ int res; -+ -+ if ((rms_sysctl_header = register_sysctl_table(rms_root_table, 1)) == 0) -+ { -+ printk ("rmsmod: failed to register sysctl table\n"); -+ return (-ENXIO); -+ } -+ -+ if ((rms_procfs_root = proc_mkdir("rms", NULL)) == NULL || -+ (rms_procfs_programs = proc_mkdir("programs", rms_procfs_root)) == NULL || -+ (p = create_proc_entry ("control", S_IRUGO, rms_procfs_root)) == NULL) -+ { -+ unregister_sysctl_table (rms_sysctl_header); -+ printk ("rmsmod: failed to register /proc/rms\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &rms_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ -+ if ((p = create_proc_entry ("version", S_IRUGO, rms_procfs_root)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->read_proc = version_callback; -+ } -+ -+#ifndef NO_PTRACK -+ if ((p = create_proc_entry ("ptrack", S_IRUGO, rms_procfs_root)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->read_proc = ptrack_callback; -+ } -+#endif -+ -+ if ((res = rms_init()) != ESUCCESS) -+ { -+#ifndef NO_PTRACK -+ remove_proc_entry ("ptrack", rms_procfs_root); -+#endif -+ remove_proc_entry ("version", rms_procfs_root); -+ remove_proc_entry ("programs", rms_procfs_root); -+ remove_proc_entry ("control", rms_procfs_root); -+ remove_proc_entry ("rms", NULL); -+ unregister_sysctl_table (rms_sysctl_header); -+ return (-res); -+ } -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); -+ register int i; -+ for (i = 0; i < sizeof (rms_ioctl32_cmds)/sizeof(rms_ioctl32_cmds[0]); i++) -+ register_ioctl32_conversion (rms_ioctl32_cmds[i], rms_ioctl32); -+ } -+ unlock_kernel(); -+#endif -+ return (0); -+} -+ -+static void __exit rms_exit(void) -+{ -+ rms_fini(); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern void unregister_ioctl32_conversion(unsigned int cmd); -+ register int i; -+ -+ for (i = 0; i < sizeof (rms_ioctl32_cmds)/sizeof(rms_ioctl32_cmds[0]); i++) -+ unregister_ioctl32_conversion (rms_ioctl32_cmds[i]); -+ } -+ unlock_kernel(); -+#endif -+ -+#ifndef NO_PTRACK -+ remove_proc_entry ("ptrack", rms_procfs_root); -+#endif -+ remove_proc_entry ("version", rms_procfs_root); -+ remove_proc_entry ("programs", rms_procfs_root); -+ remove_proc_entry ("control", rms_procfs_root); -+ remove_proc_entry ("rms", NULL); -+ unregister_sysctl_table(rms_sysctl_header); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(rms_start); -+module_exit(rms_exit); -+ -+static int -+rms_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ -+ return (0); -+} -+ -+static int -+rms_release (struct inode *inode, struct file *fp) -+{ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+rms_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int res; -+ -+ /* printk ("rmsmod: ioctl %x\n", cmd); */ -+ -+ switch (cmd) -+ { -+/* no corepath support in Linux yet */ -+#if 0 -+ case RMSIO_SETCOREPATH: -+ res = rms_setcorepath((caddr_t)arg); -+ break; -+ -+ case RMSIO_GETCOREPATH: -+ { -+ RMSIO_GETCOREPATH_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcorepath(args.pid, args.corepath, args.maxlen); -+ break; -+ } -+#endif -+ -+ case RMSIO_PRGCREATE: -+ { -+ RMSIO_PRGCREATE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgcreate(args.id, args.uid, args.cpus); -+ break; -+ } -+ -+ case RMSIO_PRGDESTROY: -+ res = rms_prgdestroy(arg); -+ break; -+ -+ case RMSIO_PRGIDS: -+ { -+ RMSIO_PRGIDS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgids(args.maxids, args.prgids, args.nprgs); -+ break; -+ } -+ -+ case RMSIO_PRGINFO: -+ { -+ RMSIO_PRGINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prginfo(args.id, args.maxpids, args.pids, args.nprocs); -+ break; -+ } -+ -+ case RMSIO_PRGSIGNAL: -+ { -+ RMSIO_PRGSIGNAL_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgsignal(args.id, args.signo); -+ break; -+ } -+ -+ case RMSIO_PRGADDCAP: -+ { -+ RMSIO_PRGADDCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgaddcap(args.id, args.index, args.cap); -+ break; -+ } -+ -+ case RMSIO_SETCAP: -+ { -+ RMSIO_SETCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_setcap(args.index, args.ctx); -+ break; -+ } -+ -+ case RMSIO_NCAPS: -+ res = rms_ncaps((int *)arg); -+ break; -+ -+ case RMSIO_GETPRGID: -+ { -+ RMSIO_GETPRGID_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getprgid(args.pid, args.id); -+ break; -+ } -+ -+ case RMSIO_GETMYCAP: -+ res = rms_mycap((int *)arg); -+ break; -+ -+ case RMSIO_GETCAP: -+ { -+ RMSIO_GETCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcap(args.index, args.cap); -+ break; -+ } -+ -+ case RMSIO_PRGGETSTATS: -+ { -+ /* no longer supported */ -+ res = EINVAL; -+ break; -+ } -+ -+ case RMSIO_PRGGETSTATS2: -+ { -+ RMSIO_PRGGETSTATS2_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prggetstats(args.id, args.stats); -+ break; -+ } -+ -+ case RMSIO_PRGSUSPEND: -+ res = rms_prgsuspend(arg); -+ break; -+ -+ case RMSIO_PRGRESUME: -+ res = rms_prgresume(arg); -+ break; -+ -+ case RMSIO_ELANINITDONE: -+ res = rms_elaninitdone(arg); -+ break; -+ -+ case RMSIO_PRGELANPIDS: -+ { -+ RMSIO_PRGELANPIDS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgelanpids(args.id, args.maxpids, args.vps, args.pids, args.npids); -+ break; -+ } -+ -+ case RMSIO_SETELANSTATS: -+ { -+ RMSIO_SETELANSTATS_STRUCT args; -+ elanstats_t estats; -+ -+ if (copy_from_user(&args, (void *)arg, sizeof(args)) || -+ copy_from_user(&estats, (void *)args.estats, sizeof(estats))) -+ return(-EFAULT); -+ -+ res = rms_setelanstats(args.id, estats.ebytes, estats.exfers); -+ break; -+ } -+ -+ case RMSIO_MODVERSION: -+ { -+ RMSIO_MODVERSION_STRUCT args; -+ int version = rms_modversion(); -+ -+ if (copy_from_user (&args, (void *)arg, sizeof (args))) -+ return (-EFAULT); -+ -+ if (copyout(&version, args.version, sizeof(int))) -+ res = EFAULT; -+ else -+ res = ESUCCESS; -+ -+ break; -+ } -+ -+ /* -+ * Patch free kernel support, proc entries added manually -+ */ -+ case RMSIO_ADDPROC: -+ { -+ RMSIO_PROC_STRUCT args; -+ -+ if (copy_from_user (&args, (void *)arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_addproc(args.id, args.pid); -+ break; -+ } -+ case RMSIO_REMOVEPROC: -+ { -+ RMSIO_PROC_STRUCT args; -+ -+ if (copy_from_user (&args, (void *)arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_removeproc(args.id, args.pid); -+ break; -+ } -+ -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ /* printk ("rmsmod: ioctl %x res %d\n", cmd, res); */ -+ -+ return ((res == 0) ? 0 : -res); -+} -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+rms_ioctl32 (unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file) -+{ -+ int res; -+ -+ switch (cmd) -+ { -+ case RMSIO_GETPRGID32: -+ { -+ RMSIO_GETPRGID_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getprgid(args.pid, (int *)(unsigned long) args.idptr); -+ break; -+ } -+ -+ case RMSIO_GETCAP32: -+ { -+ RMSIO_GETCAP_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcap(args.index, (ELAN_CAPABILITY *)(unsigned long) args.capptr); -+ break; -+ } -+ -+ default: -+ return (sys_ioctl (fd, cmd, arg)); -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/drivers/net/Kconfig linux-2.6.9/drivers/net/Kconfig ---- clean/drivers/net/Kconfig 2005-10-10 18:50:31.000000000 -0400 -+++ linux-2.6.9/drivers/net/Kconfig 2005-10-10 18:50:34.000000000 -0400 -@@ -2271,6 +2271,8 @@ - - source "drivers/net/tokenring/Kconfig" - -+source "drivers/net/qsnet/Kconfig" -+ - source "drivers/net/wireless/Kconfig" - - source "drivers/net/pcmcia/Kconfig" ---- clean/drivers/net/Makefile 2005-10-10 18:59:11.000000000 -0400 -+++ linux-2.6.9/drivers/net/Makefile 2005-10-10 18:59:28.000000000 -0400 -@@ -197,3 +197,4 @@ - - obj-$(CONFIG_NETCONSOLE) += netconsole.o - obj-$(CONFIG_NETDUMP) += netdump.o -+obj-$(CONFIG_QSNET) += qsnet/ -diff -urN clean/fs/exec.c linux-2.6.9/fs/exec.c ---- clean/fs/exec.c 2005-10-10 17:43:57.000000000 -0400 -+++ linux-2.6.9/fs/exec.c 2005-10-10 17:47:17.000000000 -0400 -@@ -54,6 +54,8 @@ - #include - #endif - -+#include -+ - int core_uses_pid; - char core_pattern[65] = "core"; - int suid_dumpable = 0; -@@ -1175,6 +1177,9 @@ - if (retval < 0) - goto out; - -+ /* notify any ptrack callbacks of the process exec */ -+ ptrack_call_callbacks(PTRACK_PHASE_EXEC, NULL); -+ - retval = search_binary_handler(bprm,regs); - if (retval >= 0) { - free_arg_pages(bprm); -diff -urN clean/fs/open.c linux-2.6.9/fs/open.c ---- clean/fs/open.c 2005-10-10 17:43:57.000000000 -0400 -+++ linux-2.6.9/fs/open.c 2005-10-10 17:47:17.000000000 -0400 -@@ -1029,6 +1029,8 @@ - goto out; - } - -+EXPORT_SYMBOL(sys_open); -+ - #ifndef __alpha__ - - /* -diff -urN clean/fs/read_write.c linux-2.6.9/fs/read_write.c ---- clean/fs/read_write.c 2005-05-13 13:39:11.000000000 -0400 -+++ linux-2.6.9/fs/read_write.c 2005-10-10 17:47:17.000000000 -0400 -@@ -145,6 +145,7 @@ - bad: - return retval; - } -+EXPORT_SYMBOL(sys_lseek); - - #ifdef __ARCH_WANT_SYS_LLSEEK - asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, -diff -urN clean/fs/select.c linux-2.6.9/fs/select.c ---- clean/fs/select.c 2005-05-13 13:39:11.000000000 -0400 -+++ linux-2.6.9/fs/select.c 2005-10-10 17:47:17.000000000 -0400 -@@ -529,3 +529,4 @@ - poll_freewait(&table); - return err; - } -+EXPORT_SYMBOL_GPL(sys_poll); -diff -urN clean/include/elan/bitmap.h linux-2.6.9/include/elan/bitmap.h ---- clean/include/elan/bitmap.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/bitmap.h 2004-01-20 12:32:15.000000000 -0500 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_BITMAP_H -+#define __QSNET_BITMAP_H -+ -+#ident "$Id: bitmap.h,v 1.5 2004/01/20 17:32:15 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/bitmap.h,v $ */ -+ -+typedef unsigned int bitmap_t; -+ -+#define BT_NBIPUL 32 /* n bits per bitmap_t */ -+#define BT_ULSHIFT 5 /* log 2 BT_NBIPUL to extract word index */ -+#define BT_ULMASK 0x1f /* to extract bit index */ -+ -+#define BT_WIM(bitmap,bitindex) ((bitmap)[(bitindex) >> BT_ULSHIFT]) /* word in map */ -+#define BT_BIW(bitindex) (1 << ((bitindex) & BT_ULMASK)) /* bit in word */ -+ -+/* BT_BITOUL -- n bits to n words */ -+#define BT_BITOUL(nbits) (((nbits) + BT_NBIPUL -1) / BT_NBIPUL) -+ -+#define BT_TEST(bitmap,bitindex) ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0) -+#define BT_SET(bitmap,bitindex) do { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); } while (0) -+#define BT_CLEAR(bitmap,bitindex) do { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); } while (0) -+ -+/* return first free bit in the bitmap, or -1 for failure */ -+extern int bt_freebit (bitmap_t *bitmap, int nbits); -+ -+/* return the index of the lowest set bit in the bitmap or -1 for failure */ -+extern int bt_lowbit (bitmap_t *bitmap, int nbits); -+ -+/* return the index of the next set/clear bit in the bitmap or -1 for failure */ -+extern int bt_nextbit (bitmap_t *bitmap, int nbits, int last, int isset); -+ -+/* copy/zero/fill/compare a bit map */ -+extern void bt_copy (bitmap_t *a, bitmap_t *b, int nbits); -+extern void bt_zero (bitmap_t *a, int nbits); -+extern void bt_fill (bitmap_t *a, int nbits); -+extern int bt_cmp (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* intersect bitmap 'a' with bitmap 'b' and return in 'a' */ -+extern void bt_intersect (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* remove/add bitmap 'b' from bitmap 'a' */ -+extern void bt_remove (bitmap_t *a, bitmap_t *b, int nbits); -+extern void bt_add (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* check whether bitmap 'a' spans bitmap 'b' */ -+extern int bt_spans (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* copy [base,base+nbits-1] from 'a' to 'b' */ -+extern void bt_subset (bitmap_t *a, bitmap_t *b, int base, int nbits); -+ -+/* find bits clear in 'a' and set in 'b', put result in 'c' */ -+extern void bt_up (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits); -+ -+/* find bits set in 'a' and clear in 'b', put result in 'c' */ -+extern void bt_down (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits); -+ -+/* return number of bits set in bitmap */ -+extern int bt_nbits (bitmap_t *a, int nbits); -+ -+ -+#endif /* __QSNET_BITMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/capability.h linux-2.6.9/include/elan/capability.h ---- clean/include/elan/capability.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/capability.h 2005-05-17 05:52:53.000000000 -0400 -@@ -0,0 +1,198 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability.h,v 1.18 2005/05/17 09:52:53 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/capability.h,v $*/ -+ -+#ifndef __ELAN_CAPABILITY_H -+#define __ELAN_CAPABILITY_H -+ -+#include -+ -+/* Maximum number of rails */ -+#define ELAN_MAX_RAILS (31) -+/* Maximum number of virtual processes we support */ -+#define ELAN_MAX_VPS (16384) -+ -+/* Number of words in a bitmap capability */ -+#define ELAN_BITMAPSIZE BT_BITOUL(ELAN_MAX_VPS) -+ -+/* Guaranteed invalid values */ -+#define ELAN_INVALID_PROCESS (0x7fffffff) /* A GUARANTEED invalid process # */ -+#define ELAN_INVALID_NODE (0xFFFF) -+#define ELAN_INVALID_CONTEXT (0xFFFF) -+ -+/* Number of values in a user key */ -+#define ELAN_USERKEY_ENTRIES 4 -+ -+typedef void * ELAN_CAP_OWNER; -+ -+/* -+ * When used in userspace this is relative to the base of -+ * the capabality but is an absolute location for kernel space. -+ */ -+typedef struct elan_location -+{ -+ unsigned short loc_node; -+ unsigned short loc_context; -+} ELAN_LOCATION; -+ -+typedef struct elan_userkey -+{ -+ unsigned key_values[ELAN_USERKEY_ENTRIES]; -+} ELAN_USERKEY; -+ -+typedef struct elan_capability -+{ -+ ELAN_USERKEY cap_userkey; /* User defined protection */ -+ -+ int cap_version; /* Version number */ -+ unsigned short cap_type; /* Capability Type */ -+ unsigned short cap_spare; /* spare was cap_elan_type */ -+ -+ int cap_lowcontext; /* low context number in block */ -+ int cap_highcontext; /* high context number in block */ -+ int cap_mycontext; /* my context number */ -+ -+ int cap_lownode; /* low elan id of group */ -+ int cap_highnode; /* high elan id of group */ -+ -+ unsigned int cap_railmask; /* which rails this capability is valid for */ -+ -+ bitmap_t cap_bitmap[ELAN_BITMAPSIZE]; /* Bitmap of process to processor translation */ -+} ELAN_CAPABILITY; -+ -+#define ELAN_CAP_UNINITIALISED (-1) -+ -+#define ELAN_CAP_VERSION_NUMBER (0x00010002) -+ -+#define ELAN_CAP_NUM_NODES(cap) ((cap)->cap_highnode - (cap)->cap_lownode + 1) -+#define ELAN_CAP_NUM_CONTEXTS(cap) ((cap)->cap_highcontext - (cap)->cap_lowcontext + 1) -+ -+/* using or defining our own MIN/MAX had confilicts with dunix so we define ELAN_ ones */ -+#define ELAN_MIN(a,b) ((a) > (b) ? (b) : (a)) -+#define ELAN_MAX(a,b) ((a) > (b) ? (a) : (b)) -+#define ELAN_CAP_BITMAPSIZE(cap) (ELAN_MAX (ELAN_MIN (ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap), ELAN_MAX_VPS), 0)) -+ -+#define ELAN_CAP_SIZE(cap) (offsetof (ELAN_CAPABILITY, cap_bitmap[BT_BITOUL(ELAN_CAP_BITMAPSIZE(cap))])) -+#define ELAN_CAP_ENTRIES(cap) (((cap)->cap_type & ELAN_CAP_TYPE_NO_BITMAP) ? ELAN_CAP_BITMAPSIZE((cap)) : bt_nbits((cap)->cap_bitmap, ELAN_CAP_BITMAPSIZE((cap)))) -+ -+#define ELAN_CAP_IS_RAIL_SET(cap,rail) ((cap)->cap_railmask & (1<cap_userkey.key_values[0] == (cap2)->cap_userkey.key_values[0] && \ -+ (cap1)->cap_userkey.key_values[1] == (cap2)->cap_userkey.key_values[1] && \ -+ (cap1)->cap_userkey.key_values[2] == (cap2)->cap_userkey.key_values[2] && \ -+ (cap1)->cap_userkey.key_values[3] == (cap2)->cap_userkey.key_values[3]) -+ -+#define ELAN_CAP_TYPE_MATCH(cap1,cap2) ((cap1)->cap_version == (cap2)->cap_version && \ -+ (cap1)->cap_type == (cap2)->cap_type) -+ -+#define ELAN_CAP_GEOM_MATCH(cap1,cap2) ((cap1)->cap_lowcontext == (cap2)->cap_lowcontext && \ -+ (cap1)->cap_highcontext == (cap2)->cap_highcontext && \ -+ (cap1)->cap_lownode == (cap2)->cap_lownode && \ -+ (cap1)->cap_highnode == (cap2)->cap_highnode && \ -+ (cap1)->cap_railmask == (cap2)->cap_railmask && \ -+ !bcmp (&(cap1)->cap_bitmap[0], &(cap2)->cap_bitmap[0], \ -+ BT_BITOUL(ELAN_CAP_BITMAPSIZE(cap1)*sizeof(bitmap_t)))) -+ -+#define ELAN_CAP_MATCH(cap1,cap2) (ELAN_CAP_KEY_MATCH (cap1, cap2) && \ -+ ELAN_CAP_TYPE_MATCH (cap1, cap2) && \ -+ ELAN_CAP_GEOM_MATCH (cap1, cap2)) -+ -+#define ELAN_CAP_VALID_MYCONTEXT(cap) ( ((cap)->cap_lowcontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_mycontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_highcontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_lowcontext <= (cap)->cap_mycontext) \ -+ && ((cap)->cap_mycontext <= (cap)->cap_highcontext)) -+ -+/* -+ * Definitions for type -+ */ -+#define ELAN_CAP_TYPE_BLOCK 1 /* Block distribution */ -+#define ELAN_CAP_TYPE_CYCLIC 2 /* Cyclic distribution */ -+#define ELAN_CAP_TYPE_KERNEL 3 /* Kernel capability */ -+ -+#define ELAN_CAP_TYPE_MASK (0xFFF) /* Mask for type */ -+ -+/* OR these bits in for extra features */ -+#define ELAN_CAP_TYPE_HWTEST (1 << 12) /* Hardware test capability type */ -+#define ELAN_CAP_TYPE_MULTI_RAIL (1 << 13) /* "new" multi rail capability */ -+#define ELAN_CAP_TYPE_NO_BITMAP (1 << 14) /* don't use bit map */ -+#define ELAN_CAP_TYPE_BROADCASTABLE (1 << 15) /* broadcastable */ -+ -+ -+extern void elan_nullcap (ELAN_CAPABILITY *cap); -+extern char *elan_capability_string (ELAN_CAPABILITY *cap, char *str); -+extern ELAN_LOCATION elan_vp2location (unsigned process, ELAN_CAPABILITY *cap); -+extern int elan_location2vp (ELAN_LOCATION location, ELAN_CAPABILITY *cap); -+extern int elan_nvps (ELAN_CAPABILITY *cap); -+extern int elan_nlocal (int node, ELAN_CAPABILITY *cap); -+extern int elan_maxlocal (ELAN_CAPABILITY *cap); -+extern int elan_localvps (int node, ELAN_CAPABILITY *cap, int *vps, int size); -+extern int elan_nrails (ELAN_CAPABILITY *cap); -+extern int elan_rails (ELAN_CAPABILITY *cap, int *rails); -+extern int elan_cap_overlap (ELAN_CAPABILITY *cap1, ELAN_CAPABILITY *cap2); -+ -+/* -+ * capability creation/access fns provide for running -+ * new libelan code on old OS releases -+ */ -+extern int elan_lowcontext(ELAN_CAPABILITY *cap); -+extern int elan_mycontext(ELAN_CAPABILITY *cap); -+extern int elan_highcontext(ELAN_CAPABILITY *cap); -+extern int elan_lownode(ELAN_CAPABILITY *cap); -+extern int elan_highnode(ELAN_CAPABILITY *cap); -+extern int elan_captype(ELAN_CAPABILITY *cap); -+extern int elan_railmask(ELAN_CAPABILITY *cap); -+ -+extern int elan_getenvCap (ELAN_CAPABILITY *cap, int index); -+extern ELAN_CAPABILITY *elan_createCapability(void); -+extern ELAN_CAPABILITY *elan_copyCapability(ELAN_CAPABILITY *from, int ctxShift); -+extern int elan_generateCapability(char *string); -+extern int elan_getMachinesCap (char *filename, ELAN_CAPABILITY *cap); -+ -+typedef struct elan_cap_struct -+{ -+ ELAN_CAP_OWNER owner; -+ ELAN_CAPABILITY cap; -+ -+ int attached; /* count of people attached */ -+ unsigned int active; /* ie not being destroyed */ -+} ELAN_CAP_STRUCT; -+ -+#if ! defined(__KERNEL__) -+extern void elan_get_random_key(ELAN_USERKEY *key); -+extern int elan_prefrails(ELAN_CAPABILITY *cap, int *pref, int nvp); -+#endif -+ -+#if defined(__KERNEL__) -+/* capability.c */ -+extern int elan_validate_cap (ELAN_CAPABILITY *cap); -+extern int elan_validate_map (ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+extern int elan_create_cap (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+extern int elan_destroy_cap (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+extern int elan_create_vp (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+extern int elan_destroy_vp (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+typedef void (*ELAN_DESTROY_CB)(void *args, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+extern int elan_attach_cap (ELAN_CAPABILITY *cap, unsigned int rail, void *args, ELAN_DESTROY_CB callback); -+extern int elan_detach_cap (ELAN_CAPABILITY *cap, unsigned int rail); -+ -+extern int elan_get_caps (uint *number_of_results, uint array_size, ELAN_CAP_STRUCT *caps); -+extern int elan_cap_dump (void); -+#endif /* __KERNEL__ */ -+ -+ -+#endif /* __ELAN_CAPABILITY_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/cm.h linux-2.6.9/include/elan/cm.h ---- clean/include/elan/cm.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/cm.h 2005-03-30 09:06:34.000000000 -0500 -@@ -0,0 +1,396 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_CM_H -+#define __ELAN_CM_H -+ -+#ident "@(#)$Id: cm.h,v 1.16 2005/03/30 14:06:34 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.h,v $*/ -+ -+#include -+ -+#if defined(DIGITAL_UNIX) -+/* -+ * On Tru64 - SMP doesn't mean Symmetric - cpu 0 is a master cpu and is responsible -+ * for handling all PCI interrupts and "funneled" operations. When a kernel thread -+ * is made runnable, the scheduler will choose which cpu it will run on at that time, -+ * and will only execute a higher priority thread from another cpu's run queue when -+ * it becomes totally idle (apparently also including user processes). Also the -+ * assert_wait_mesg_timo function uses a per-cpu timeout - these can only get executed -+ * at "preemptable" places - so again have no guarantee on when they will execute if -+ * they happen to be queued on a "hogged" cpu. The combination of these mean that the Tru64 -+ * is incapable of scheduling a high priority kernel thread within a deterministic time -+ * of when it should have become runnable - wonderfull. -+ * -+ * Hence the solution Compaq have proposed it to schedule a timeout onto all of the -+ * cpu's timeouts lists at the maximum frequency that we could want to execute code, -+ * then to handle the scheduling of work between these ourselves. With a bit of luck -+ * ..... at least one cpu will be sufficiently unloaded to allow us to get a chance -+ * to do our important work. -+ * -+ * However ..... this still is not reliable, since timeouts under Tru64 are still -+ * only run when the currently running kernel thread "co-operates" by calling one -+ * of a number of functions which is permitted to run the "lwc"s AND is not holding -+ * any spinlocks AND is running ai IPL 0. However Compaq are unable to provide -+ * any upper limit on the time between the "lwc"'s being run and so it is possible -+ * for all 4 cpus to not run them for an unbounded time. -+ * -+ * The solution proposed is to use the RM_TEMP_BACKDOOR hook which was added to -+ * hardclock() to "solve" this problem for Memory Channel. However, since it -+ * is called within the clock interrupt it is not permissible to aquire any -+ * spinlocks, nor to run for "too long". This means that it is not possible to -+ * call the heartbeat algorithm from this hook. -+ * -+ * Our solution to these limitations is to use the hook to cause an elan interrupt -+ * to be delivered, by issueing a mis-aligned SetEvent command - this causes the device -+ * to trap and ep_cprocTrap() can then run the heartbeat code. However there is a lock -+ * order violation between the elan_dev::IntrLock and ep_dev::Lock, so we have to -+ * use a trylock and if we fail, then hope that when the interrupt is delievered again -+ * some time later we will succeed. -+ * -+ * However this only works if the kernel is able to respond to the Elan interrupt, -+ * so we panic inside the RM_TEMP_BACKDOOR hook if the SetEvent's interrupt has -+ * not been taken for more than an CM_TIMER_SCHEDULE_TIMEOUT interval. -+ * -+ * In fact this is exactly the mechanism that other operating systems use to -+ * execute timeouts, since the hardclock interrupt posts a low priority -+ * "soft interrupt" which "pre-eempts" the currently running thread and then -+ * executes the timeouts.To block timeouts you use splsoftclock() the same as -+ * in Tru64. -+ */ -+#define PER_CPU_TIMEOUT TRUE -+#endif -+ -+ -+#define CM_SGMTS_PER_LEVEL 8 /* maximum nodes in each segment */ -+#define CM_MAX_LEVELS 6 /* maximum depth of tree */ -+ -+/* message buffers/dmas/events etc */ -+#define CM_NUM_NODE_MSG_BUFFERS (CM_MAX_LEVELS * CM_SGMTS_PER_LEVEL) /* subordinates and leader */ -+#define CM_NUM_SPARE_MSG_BUFFERS 8 /* spare msg buffers for non-connected nodes */ -+#define CM_NUM_MSG_BUFFERS (CM_NUM_NODE_MSG_BUFFERS + CM_NUM_SPARE_MSG_BUFFERS) -+ -+#define CM_INPUTQ_ENTRIES 128 /* # entries in input queue */ -+ -+#define CM_PERIODIC_DISCOVER_INTERVAL (5000) /* 5s (infrequent resolution of established leader conflicts) */ -+#define CM_URGENT_DISCOVER_INTERVAL (50) /* 0.05s (more frequently than heartbeats 'cause they don't retry) */ -+#define CM_HEARTBEAT_INTERVAL (125) /* 0.125s */ -+#define CM_TIMER_SCHEDULE_TIMEOUT (4000) /* 4s Maximum time before a timer that's secheduled to run gets to run (eg blocked in interrupt handlers etc) */ -+#define CM_THREAD_SCHEDULE_TIMEOUT (30000) /* 30s Maximum time before a thread that's scheduled to run gets to run */ -+#define CM_THREAD_RUNNING_TIMEOUT (30000) /* 30s Don't expect the manager thread to be running longer than this */ -+ -+#ifdef PER_CPU_TIMEOUT -+#define CM_PERCPU_TIMEOUT_INTERVAL (50) /* 0.05s (must be less than all above intervals) */ -+#define CM_PACEMAKER_INTERVAL (500) /* 0.05s */ -+ -+#define CM_HEARTBEAT_OVERDUE (250) /* 0.25s Maximum time a timeout can be overdue before taking extreme action */ -+#endif -+ -+#define CM_P2P_DMA_RETRIES 31 -+ -+/* We expect at least 1 point-to-point message in CM_P2P_MSG_RETRIES -+ * attempts to send one to be successfully received */ -+#define CM_P2P_MSG_RETRIES 8 -+ -+/* We expect at least 1 broadcast message in CM_BCAST_MSG_RETRIES attempts -+ * to send one to be successfully received. */ -+#define CM_BCAST_MSG_RETRIES 40 -+ -+/* Heartbeat timeout allows for a node stalling and still getting its -+ * heartbeat. The 2 is to allow for unsynchronised polling times. */ -+#define CM_HEARTBEAT_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_P2P_MSG_RETRIES) * CM_HEARTBEAT_INTERVAL) -+ -+/* Discover timeout must be > CM_HEARTBEAT_TIMEOUT to guarantee that people -+ * who don't see discovery are considered dead by their leader. This -+ * ensures that by the time a node "discovers" it is a leader of a segment, -+ * the previous leader of that segment will have been deemed to be dead by -+ * its the parent segment's leader */ -+#define CM_DISCOVER_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_BCAST_MSG_RETRIES) * CM_URGENT_DISCOVER_INTERVAL) -+ -+#define CM_WAITING_TIMEOUT (CM_DISCOVER_TIMEOUT * 100) -+ -+/* -+ * Convert all timeouts specified in mS into "ticks" -+ */ -+#define MSEC2TICKS(MSEC) (((MSEC)*HZ)/1000) -+ -+ -+/* statemap entry */ -+typedef struct cm_state_entry -+{ -+ int16_t level; /* cluster level to apply to */ -+ int16_t offset; /* from statemap_findchange() */ -+ uint16_t seg[BT_NBIPUL/16]; /* ditto */ -+} CM_STATEMAP_ENTRY; -+ -+/* offset is >= 0 for a change to apply and */ -+#define STATEMAP_NOMORECHANGES (-1) /* end of a set of updates */ -+#define STATEMAP_RESET (-2) /* reset the target map */ -+#define STATEMAP_NOOP (-3) /* null token */ -+ -+/* CM message format */ -+typedef int8_t CM_SEQ; /* heartbeat sequence numbers; at least 2 bits, signed */ -+ -+/* -+ * The message header is received into the last 64 byte block of -+ * the input queue and the Version *MUST* be the last word of the -+ * block to ensure that we can see that the whole of the message -+ * has reached main memory after we've seen the input queue pointer -+ * have been updated. -+ */ -+typedef struct ep_cm_hdr -+{ -+ uint32_t Pad0; -+ uint32_t Pad1; -+ -+ uint8_t Type; -+ uint8_t Level; -+ CM_SEQ Seq; /* precision at least 2 bits each*/ -+ CM_SEQ AckSeq; -+ -+ uint16_t NumMaps; -+ uint16_t MachineId; -+ -+ uint16_t NodeId; -+ uint16_t Checksum; -+ -+ uint32_t Timestamp; -+ uint32_t ParamHash; -+ uint32_t Version; -+} CM_HDR; -+ -+#define CM_HDR_SIZE sizeof (CM_HDR) -+ -+typedef struct cm_msg -+{ -+ union { -+ CM_STATEMAP_ENTRY Statemaps[1]; /* piggy-backed statemap updates start here */ -+ uint8_t Space[EP_SYSTEMQ_MSG_MAX - CM_HDR_SIZE]; -+ } Payload; -+ -+ CM_HDR Hdr; -+} CM_MSG; -+ -+/* The maximum number of statemap entries that can fit within an EP_CM_MSG_BUFFER */ -+#define CM_MSG_MAXMAPS (offsetof (CM_MSG, Hdr) / sizeof (CM_STATEMAP_ENTRY)) -+#define CM_MSG_MAP(mapno) (CM_MSG_MAXMAPS - (mapno) - 1) -+ -+/* The actual special message base & size, including 'nmaps' piggy-backed statemap entries */ -+#define CM_MSG_BASE(nmaps) (nmaps == 0 ? offsetof (CM_MSG, Hdr) : offsetof (CM_MSG, Payload.Statemaps[CM_MSG_MAXMAPS - nmaps])) -+#define CM_MSG_SIZE(nmaps) (sizeof (CM_MSG) - CM_MSG_BASE(nmaps)) -+ -+#define CM_MSG_VERSION 0xcad00005 -+#define CM_MSG_TYPE_RESOLVE_LEADER 0 -+#define CM_MSG_TYPE_DISCOVER_LEADER 1 -+#define CM_MSG_TYPE_NOTIFY 2 -+#define CM_MSG_TYPE_DISCOVER_SUBORDINATE 3 -+#define CM_MSG_TYPE_IMCOMING 4 -+#define CM_MSG_TYPE_HEARTBEAT 5 -+#define CM_MSG_TYPE_REJOIN 6 -+ -+/* CM machine segment */ -+typedef struct cm_sgmtMaps -+{ -+ u_char InputMapValid; /* Input map has been set */ -+ u_char OutputMapValid; /* Output map has been set */ -+ u_char SentChanges; /* got an outstanding STATEMAP_NOMORECHANGES to send */ -+ statemap_t *OutputMap; /* state to send */ -+ statemap_t *InputMap; /* state received */ -+ statemap_t *CurrentInputMap; /* state being received */ -+} CM_SGMTMAPS; -+ -+typedef struct cm_sgmt -+{ -+ u_char State; -+ u_char SendMaps; -+ u_char MsgAcked; -+ CM_SEQ MsgSeq; -+ CM_SEQ AckSeq; -+ u_int NodeId; -+ long UpdateTick; -+ long WaitingTick; -+ uint32_t Timestamp; -+ CM_SGMTMAPS Maps[CM_MAX_LEVELS]; /* Maps[i] == state for cluster level i */ -+ u_short MsgNumber; /* msg buffer to use */ -+ u_short NumMaps; /* # maps in message buffer */ -+ u_short Level; -+ u_short Sgmt; -+} CM_SGMT; -+ -+#define CM_SGMT_ABSENT 0 /* no one there at all */ -+#define CM_SGMT_WAITING 1 /* waiting for subtree to connect */ -+#define CM_SGMT_COMING 2 /* expecting a subtree to reconnect */ -+#define CM_SGMT_PRESENT 3 /* connected */ -+ -+typedef struct cm_level -+{ -+ int SwitchLevel; -+ u_int MinNodeId; -+ u_int NumNodes; -+ u_int NumSegs; -+ u_int MySgmt; -+ -+ /* SubordinateMap[i] == OR of all subordinate maps on this level and down for cluster level i */ -+ u_char SubordinateMapValid[CM_MAX_LEVELS]; -+ statemap_t *SubordinateMap[CM_MAX_LEVELS]; -+ -+ /* maps/flags for this cluster level */ -+ u_int Online:1; /* I've gone online (seen myself running) */ -+ u_int Restarting:1; /* driving my owm restart bit */ -+ u_char OfflineReasons; /* forced offline by broadcast */ -+ -+ u_char GlobalMapValid; -+ u_char SubTreeMapValid; -+ u_long Connected; -+ -+ statemap_t *LocalMap; /* state bits I drive */ -+ statemap_t *SubTreeMap; /* OR of my and my subtree states */ -+ statemap_t *GlobalMap; /* OR of all node states */ -+ statemap_t *LastGlobalMap; /* last map I saw */ -+ statemap_t *TmpMap; /* scratchpad */ -+ -+ CM_SGMT Sgmts[CM_SGMTS_PER_LEVEL]; -+} CM_LEVEL; -+ -+#define CM_ROLE_LEADER_CANDIDATE 0 -+#define CM_ROLE_LEADER 1 -+#define CM_ROLE_SUBORDINATE 2 -+ -+/* global status bits */ -+#define CM_GSTATUS_STATUS_MASK 0x03 /* bits nodes drive to broadcast their status */ -+#define CM_GSTATUS_ABSENT 0x00 /* Off the network */ -+#define CM_GSTATUS_STARTING 0x01 /* I'm waiting for everyone to see me online */ -+#define CM_GSTATUS_RUNNING 0x03 /* up and running */ -+#define CM_GSTATUS_CLOSING 0x02 /* I'm waiting for everyone to see me offline */ -+ -+#define CM_GSTATUS_ACK_MASK 0x0c /* bits node drive to ack other status */ -+#define CM_GSTATUS_MAY_START 0x04 /* Everyone thinks I may not start */ -+#define CM_GSTATUS_MAY_RUN 0x08 /* Everyone thinks I may not run */ -+ -+#define CM_GSTATUS_RESTART 0x10 /* Someone thinks I should restart */ -+#define CM_GSTATUS_BITS 5 -+ -+#define CM_GSTATUS_BASE(node) ((node) * CM_GSTATUS_BITS) -+ -+#if defined(PER_CPU_TIMEOUT) -+typedef struct cm_timeout_data -+{ -+ long ScheduledAt; /* lbolt timeout was scheduled to run at */ -+ -+ unsigned long EarlyCount; /* # times run early than NextRun */ -+ unsigned long MissedCount; /* # times run on time - but someone else was running it */ -+ unsigned long WastedCount; /* # times we failed to get the spinlock */ -+ unsigned long WorkCount; /* # times we're the one running */ -+ -+ unsigned long WorstDelay; /* worst scheduling delay */ -+ unsigned long BestDelay; /* best scheduling delay */ -+ -+ unsigned long WorstLockDelay; /* worst delay before getting rail->Lock */ -+ -+ unsigned long WorstHearbeatDelay; /* worst delay before calling DoHeartbeatWork */ -+} CM_TIMEOUT_DATA; -+#endif -+ -+typedef struct cm_rail -+{ -+ EP_RAIL *Rail; /* rail we're associated with */ -+ struct list_head Link; /* and linked on the CM_SUBSYS */ -+ -+ uint32_t ParamHash; /* hash of critical parameters */ -+ uint32_t Timestamp; -+ long DiscoverStartTick; /* when discovery start */ -+ -+ unsigned int NodeId; /* my node id */ -+ unsigned int NumNodes; /* and number of nodes */ -+ unsigned int NumLevels; /* number of levels computed from machine size */ -+ int BroadcastLevel; -+ long BroadcastLevelTick; -+ unsigned int TopLevel; /* level at which I'm not a leader */ -+ unsigned char Role; /* state at TopLevel */ -+ -+ EP_INPUTQ *PolledQueue; /* polled input queue */ -+ EP_INPUTQ *IntrQueue; /* intr input queue */ -+ EP_OUTPUTQ *MsgQueue; /* message */ -+ unsigned int NextSpareMsg; /* next "spare" message buffer to use */ -+ -+ EP_CM_RAIL_STATS Stats; /* statistics */ -+ -+ kmutex_t Mutex; -+ spinlock_t Lock; -+ -+ long NextHeartbeatTime; /* next time to check/send heartbeats */ -+ long NextDiscoverTime; /* next time to progress discovery */ -+ long NextRunTime; /* the earlier of the above two or intr requires inputq poll*/ -+ -+ unsigned int OfflineReasons; /* forced offline by procfs/manager thread stuck */ -+ -+#if defined(PER_CPU_TIMEOUT) -+ spinlock_t HeartbeatTimeoutsLock; /* spinlock to sequentialise per-cpu timeouts */ -+ long HeartbeatTimeoutsStarted; /* bitmap of which timeouts have started */ -+ long HeartbeatTimeoutsStopped; /* bitmap of which timeouts have stopped */ -+ long HeartbeatTimeoutsShouldStop; /* flag to indicate timeouts should stop */ -+ kcondvar_t HeartbeatTimeoutsWait; /* place to sleep waiting for timeouts to stop */ -+ long HeartbeatTimeoutRunning; /* someone is running the timeout - don't try for the lock */ -+ -+ long HeartbeatTimeoutOverdue; /* heartbeat seen as overdue - interrupt requested */ -+ -+ CM_TIMEOUT_DATA *HeartbeatTimeoutsData; /* per timeout data */ -+#else -+ struct timer_list HeartbeatTimer; /* timer for heartbeat/discovery */ -+#endif -+ -+ CM_LEVEL Levels[CM_MAX_LEVELS]; -+} CM_RAIL; -+ -+/* OfflineReasons (both per-rail and */ -+#define CM_OFFLINE_BROADCAST (1 << 0) -+#define CM_OFFLINE_PROCFS (1 << 1) -+#define CM_OFFLINE_MANAGER (1 << 2) -+ -+typedef struct cm_subsys -+{ -+ EP_SUBSYS Subsys; -+ CM_RAIL *Rails[EP_MAX_RAILS]; -+} CM_SUBSYS; -+ -+extern int MachineId; -+ -+extern void cm_node_disconnected (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_node (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_comms (CM_RAIL *cmRail); -+extern int cm_init (EP_SYS *sys); -+ -+extern void DisplayRail(EP_RAIL *rail); -+extern void DisplaySegs (EP_RAIL *rail); -+extern void DisplayStatus (EP_RAIL *rail); -+ -+extern void DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayRailDo (DisplayInfo *di, EP_RAIL *rail); -+ -+extern int cm_read_cluster(EP_RAIL *rail,char *page); -+extern void cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason); -+ -+extern int cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId); -+extern int cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+/* cm_procfs.c */ -+extern void cm_procfs_init (CM_SUBSYS *subsys); -+extern void cm_procfs_fini (CM_SUBSYS *subsys); -+extern void cm_procfs_rail_init (CM_RAIL *rail); -+extern void cm_procfs_rail_fini (CM_RAIL *rail); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_CM_H */ -+ -diff -urN clean/include/elan/compat.h linux-2.6.9/include/elan/compat.h ---- clean/include/elan/compat.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/compat.h 2003-12-03 08:18:48.000000000 -0500 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: compat.h,v 1.1 2003/12/03 13:18:48 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/compat.h,v $*/ -+ -+#ifndef __ELAN_COMPAT_H -+#define __ELAN_COMPAT_H -+ -+#define ELANMOD_STATS_MAP ELAN_STATS_MAP -+ -+#endif /* __ELAN_COMPAT_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/device.h linux-2.6.9/include/elan/device.h ---- clean/include/elan/device.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/device.h 2003-09-24 09:55:37.000000000 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/device.h,v $*/ -+ -+#ifndef __ELAN_DEVICE_H -+#define __ELAN_DEVICE_H -+ -+/* non-kernel headings */ -+typedef unsigned int ELAN_DEV_IDX; -+ -+#if defined(__KERNEL__) -+ -+/* device callbacks */ -+#define ELAN_DEV_OPS_VERSION ((u_int)1) -+ -+typedef struct elan_dev_ops -+{ -+ /* dev info */ -+ int (*get_position) (void *user_data, ELAN_POSITION *position); -+ int (*set_position) (void *user_data, unsigned short nodeId, unsigned short numNodes); -+ -+ /* cap */ -+ -+ u_int ops_version; -+} ELAN_DEV_OPS; -+ -+typedef struct elan_dev_struct -+{ -+ struct list_head node; -+ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; -+ void *user_data; -+ ELAN_DEV_OPS *ops; -+} ELAN_DEV_STRUCT; -+ -+/* device.c */ -+extern ELAN_DEV_IDX elan_dev_register (ELAN_DEVINFO *devinfo, -+ ELAN_DEV_OPS *ops, -+ void *userdata); -+extern int elan_dev_deregister (ELAN_DEVINFO *devinfo); -+ -+extern ELAN_DEV_STRUCT * elan_dev_find (ELAN_DEV_IDX devidx); -+ -+extern ELAN_DEV_STRUCT * elan_dev_find_byrail(unsigned short deviceid, unsigned rail); -+extern int elan_dev_dump (void); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_DEVICE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/devinfo.h linux-2.6.9/include/elan/devinfo.h ---- clean/include/elan/devinfo.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/devinfo.h 2005-02-01 07:35:53.000000000 -0500 -@@ -0,0 +1,92 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: devinfo.h,v 1.16 2005/02/01 12:35:53 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/devinfo.h,v $*/ -+ -+#ifndef __ELAN_DEVINFO_H -+#define __ELAN_DEVINFO_H -+ -+#define ELAN_MAX_LEVELS 8 /* maximum number of levels in switch network */ -+ -+typedef struct elan_position -+{ -+ unsigned pos_mode; /* mode we're operating in */ -+ unsigned pos_nodeid; /* port this device connected to */ -+ unsigned pos_levels; /* number of levels to top switch */ -+ unsigned pos_nodes; /* number of nodes in the machine */ -+ unsigned pos_random_disabled; /* levels at which "random" routing is not possible */ -+ unsigned char pos_arity[ELAN_MAX_LEVELS]; /* number of downlinks per switch level */ -+} ELAN_POSITION; -+ -+#define ELAN4_PARAM_PCI_PADDING_FLAGS 0 /* A bit field, representing good places to burst across the pci */ -+#define ELAN4_PARAM_EVENT_COPY_WIN 1 /* The num of cmds when it becomes quicker to send via event copy than write directly */ -+#define ELAN4_PARAM_WRITE_COMBINING 2 /* If set the device supports bursts accesses across the pci bus */ -+#define ELAN4_PARAM_DRIVER_FEATURES 11 /* device driver features */ -+#define ELAN4_PARAM_COUNT 12 -+ -+/* values for ELAN4_PARAM_DRIVER_FEATURES, dev_features */ -+#define ELAN4_FEATURE_PCI_MAP (1 << 0) /* must use pci mapping functions */ -+#define ELAN4_FEATURE_64BIT_READ (1 << 1) /* must perform 64 bit PIO reads */ -+#define ELAN4_FEATURE_PIN_DOWN (1 << 2) /* must pin down pages */ -+#define ELAN4_FEATURE_NO_WRITE_COMBINE (1 << 3) /* don't allow write combinig at all */ -+#define ELAN4_FEATURE_NO_IOPROC (1 << 4) /* unpatched kernel or disabled by procfs */ -+#define ELAN4_FEATURE_NO_IOPROC_UPDATE (1 << 5) /* don't do coproc update xlation loading */ -+#define ELAN4_FEATURE_NO_PAGEFAULT (1 << 6) /* don't do pagefaulting */ -+#define ELAN4_FEATURE_NO_PREFETCH (1 << 7) /* don't allow prefetching of elan sdram/cports */ -+ -+typedef struct elan_params -+{ -+ unsigned values[ELAN4_PARAM_COUNT]; -+} ELAN_PARAMS; -+ -+/* values for pos_mode */ -+#define ELAN_POS_UNKNOWN 0 /* network position unknown */ -+#define ELAN_POS_MODE_SWITCHED 1 /* connected to a switch */ -+#define ELAN_POS_MODE_LOOPBACK 2 /* loopback connector */ -+#define ELAN_POS_MODE_BACKTOBACK 3 /* cabled back-to-back to another node */ -+ -+typedef struct elan_devinfo -+{ -+ unsigned short dev_vendor_id; /* pci vendor id */ -+ unsigned short dev_device_id; /* pci device id */ -+ unsigned char dev_revision_id; /* pci revision id */ -+ unsigned char dev_instance; /* device instance number */ -+ unsigned char dev_rail; /* device rail number */ -+ -+ unsigned short dev_driver_version; /* device driver version */ -+ unsigned short dev_params_mask; /* mask for valid entries in dev_params array */ -+ ELAN_PARAMS dev_params; /* device parametization */ -+ -+ unsigned dev_num_down_links_value; /* hint to machine size */ -+} ELAN_DEVINFO; -+ -+#define PCI_VENDOR_ID_QUADRICS 0x14fc -+#define PCI_DEVICE_ID_ELAN3 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVB 0x0001 -+#define PCI_DEVICE_ID_ELAN4 0x0001 -+#define PCI_REVISION_ID_ELAN4_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN4_REVB 0x0001 -+ -+#if defined(__KERNEL__) -+/* devinfo.c */ -+#include -+#include -+extern int elan_get_devinfo (ELAN_DEV_IDX devidx, ELAN_DEVINFO *devinfo); -+extern int elan_get_position (ELAN_DEV_IDX devidx, ELAN_POSITION *position); -+extern int elan_set_position (ELAN_DEV_IDX devidx, unsigned short nodeId, unsigned short numNodes); -+#endif /* __KERNEL__ */ -+ -+ -+#endif /* __ELAN_DEVINFO_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/elanmoddebug.h linux-2.6.9/include/elan/elanmoddebug.h ---- clean/include/elan/elanmoddebug.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/elanmoddebug.h 2005-05-24 13:07:44.000000000 -0400 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN_DEBUG_H -+#define _ELAN_DEBUG_H -+ -+ -+#ident "$Id: elanmoddebug.h,v 1.6 2005/05/24 17:07:44 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmoddebug.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+/* 0 | QSNET_DEBUG_BUFFER | QSNET_DEBUG_CONSOLE */ -+extern int elan_debug_mode; -+extern int elan_debug_mask; -+ -+#define ELAN_DBG_VP 0x00000001 -+#define ELAN_DBG_CAP 0x00000002 -+#define ELAN_DBG_CTRL 0x00000004 -+#define ELAN_DBG_SYS_FN 0x00000008 -+#define ELAN_DBG_USERCOPY 0x00000010 -+#define ELAN_DBG_ALL 0xffffffff -+ -+ -+#if defined(DEBUG_PRINTF) -+# define ELAN_DEBUG0(m,fmt) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt) : (void)0) -+# define ELAN_DEBUG1(m,fmt,a) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a) : (void)0) -+# define ELAN_DEBUG2(m,fmt,a,b) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b) : (void)0) -+# define ELAN_DEBUG3(m,fmt,a,b,c) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c) : (void)0) -+# define ELAN_DEBUG4(m,fmt,a,b,c,d) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d) : (void)0) -+# define ELAN_DEBUG5(m,fmt,a,b,c,d,e) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d,e) : (void)0) -+# define ELAN_DEBUG6(m,fmt,a,b,c,d,e,f) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d,e,f) : (void)0) -+#ifdef __GNUC__ -+# define ELAN_DEBUG(m,args...) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode, ##args) : (void)0) -+#endif -+ -+#else -+ -+# define ELAN_DEBUG0(m,fmt) (0) -+# define ELAN_DEBUG1(m,fmt,a) (0) -+# define ELAN_DEBUG2(m,fmt,a,b) (0) -+# define ELAN_DEBUG3(m,fmt,a,b,c) (0) -+# define ELAN_DEBUG4(m,fmt,a,b,c,d) (0) -+# define ELAN_DEBUG5(m,fmt,a,b,c,d,e) (0) -+# define ELAN_DEBUG6(m,fmt,a,b,c,d,e,f) (0) -+#ifdef __GNUC__ -+# define ELAN_DEBUG(m,args...) -+#endif -+ -+#endif /* DEBUG_PRINTF */ -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ELAN_DEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/elanmod.h linux-2.6.9/include/elan/elanmod.h ---- clean/include/elan/elanmod.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/elanmod.h 2005-05-26 12:14:21.000000000 -0400 -@@ -0,0 +1,83 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod.h,v 1.13 2005/05/26 16:14:21 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod.h,v $*/ -+ -+#ifndef __ELAN_MOD_H -+#define __ELAN_MOD_H -+ -+#include -+#include -+#include -+#include -+ -+#if defined(__KERNEL__) -+ -+#include -+ -+/* Linux RW semaphores */ -+#include -+#include -+ -+#define ELANMOD_RWLOCK struct rw_semaphore -+#define ELANMOD_RWLOCK_INIT(l) init_rwsem(l) -+#define ELANMOD_RWLOCK_DESTROY(l) -+#define ELANMOD_RWLOCK_READ(l) down_read(l) -+#define ELANMOD_RWLOCK_WRITE(l) down_write(l) -+#define ELANMOD_RWLOCK_READ_UNLOCK(l) up_read(l) -+#define ELANMOD_RWLOCK_WRITE_UNLOCK(l) up_write(l) -+ -+extern ELANMOD_RWLOCK elan_rwlock; -+ -+/* elan_general.c */ -+extern int elan_init(void); -+extern int elan_fini(void); -+ -+/* return codes, -ve => errno, +ve => success */ -+#define ELAN_CAP_OK (0) -+#define ELAN_CAP_RMS (1) -+ -+#define ELAN_USER_ATTACH (1) -+#define ELAN_USER_DETACH (2) -+#define ELAN_USER_P2P (3) -+#define ELAN_USER_BROADCAST (4) -+ -+extern int elanmod_classify_cap (ELAN_POSITION *position, ELAN_CAPABILITY *cap, unsigned use); -+ -+#define ELAN_USER_BASE_CONTEXT_NUM 0x000 /* first user allowable context */ -+#define ELAN_USER_TOP_CONTEXT_NUM 0x7FF /* last user allowable context */ -+ -+#define ELAN_RMS_BASE_CONTEXT_NUM 0x400 /* reserved for RMS allocation */ -+#define ELAN_RMS_TOP_CONTEXT_NUM 0x7FF -+ -+#define ELAN_USER_CONTEXT(ctx) ((ctx) >= ELAN_USER_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN_USER_TOP_CONTEXT_NUM) -+ -+#define ELAN_RMS_CONTEXT(ctx) ((ctx) >= ELAN_RMS_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN_RMS_TOP_CONTEXT_NUM) -+ -+ -+/* capability.c */ -+struct elan_cap_node_struct; -+extern int elan_usercopy_attach (ELAN_CAPABILITY *cap, struct elan_cap_node_struct **node_ptr, void *handle, void *owner); -+extern int elan_usercopy_detach (struct elan_cap_node_struct *cap_ptr, void *owner); -+extern int elan_usercopy_handle (struct elan_cap_node_struct *cap_ptr, int ctxId, void **handlep); -+ -+/* usercopy.c */ -+extern int elan_usercopy (void *remote, void *local, size_t len, int write, -+ int ctxId, struct elan_cap_node_struct *cap_ptr); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_MOD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/elanmod_linux.h linux-2.6.9/include/elan/elanmod_linux.h ---- clean/include/elan/elanmod_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/elanmod_linux.h 2005-02-22 07:29:22.000000000 -0500 -@@ -0,0 +1,164 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod_linux.h,v 1.7 2005/02/22 12:29:22 addy Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod_linux.h,v $*/ -+ -+#ifndef __ELAN_MOD_LINUX_H -+#define __ELAN_MOD_LINUX_H -+ -+#define ELANCRTL_USER_BASE 0x40 -+ -+/* stats */ -+typedef struct elanctrl_stats_get_next_struct -+{ -+ ELAN_STATS_IDX statidx; -+ ELAN_STATS_IDX *next_statidx; /* return value */ -+} ELANCTRL_STATS_GET_NEXT_STRUCT; -+#define ELANCTRL_STATS_GET_NEXT _IOR ('e', ELANCRTL_USER_BASE + 0, ELANCTRL_STATS_GET_NEXT_STRUCT) -+ -+typedef struct elanctrl_stats_find_index_struct -+{ -+ caddr_t block_name; -+ ELAN_STATS_IDX *statidx; /* return value */ -+ uint *num_entries; /* return value */ -+} ELANCTRL_STATS_FIND_INDEX_STRUCT; -+#define ELANCTRL_STATS_FIND_INDEX _IOR ('e', ELANCRTL_USER_BASE + 1, ELANCTRL_STATS_FIND_INDEX_STRUCT) -+ -+typedef struct elanctrl_stats_get_block_info_struct -+{ -+ ELAN_STATS_IDX statidx; -+ caddr_t block_name; /* return value */ -+ uint *num_entries; /* return value */ -+} ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT; -+#define ELANCTRL_STATS_GET_BLOCK_INFO _IOR ('e', ELANCRTL_USER_BASE + 2, ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT) -+ -+typedef struct elanctrl_stats_get_index_name_struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint index; -+ caddr_t name; /* return value */ -+} ELANCTRL_STATS_GET_INDEX_NAME_STRUCT; -+#define ELANCTRL_STATS_GET_INDEX_NAME _IOR ('e', ELANCRTL_USER_BASE + 3, ELANCTRL_STATS_GET_INDEX_NAME_STRUCT) -+ -+typedef struct elanctrl_stats_clear_block_struct -+{ -+ ELAN_STATS_IDX statidx; -+} ELANCTRL_STATS_CLEAR_BLOCK_STRUCT; -+#define ELANCTRL_STATS_CLEAR_BLOCK _IOR ('e', ELANCRTL_USER_BASE + 4, ELANCTRL_STATS_CLEAR_BLOCK_STRUCT) -+ -+typedef struct elanctrl_stats_get_block_struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint entries; -+ ulong *values; /* return values */ -+} ELANCTRL_STATS_GET_BLOCK_STRUCT; -+#define ELANCTRL_STATS_GET_BLOCK _IOR ('e', ELANCRTL_USER_BASE + 5, ELANCTRL_STATS_GET_BLOCK_STRUCT) -+ -+ -+typedef struct elanctrl_get_devinfo_struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; /* return values */ -+} ELANCTRL_GET_DEVINFO_STRUCT; -+#define ELANCTRL_GET_DEVINFO _IOR ('e', ELANCRTL_USER_BASE + 6, ELANCTRL_GET_DEVINFO_STRUCT) -+ -+typedef struct elanctrl_get_position_struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_POSITION *position; /* return values */ -+} ELANCTRL_GET_POSITION_STRUCT; -+#define ELANCTRL_GET_POSITION _IOR ('e', ELANCRTL_USER_BASE + 7, ELANCTRL_GET_POSITION_STRUCT) -+ -+typedef struct elanctrl_set_position_struct -+{ -+ ELAN_DEV_IDX devidx; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} ELANCTRL_SET_POSITION_STRUCT; -+#define ELANCTRL_SET_POSITION _IOR ('e', ELANCRTL_USER_BASE + 8, ELANCTRL_SET_POSITION_STRUCT) -+ -+typedef struct elanctrl_create_cap_struct -+{ -+ ELAN_CAPABILITY cap; -+} ELANCTRL_CREATE_CAP_STRUCT; -+#define ELANCTRL_CREATE_CAP _IOW ('e', ELANCRTL_USER_BASE + 9, ELANCTRL_CREATE_CAP_STRUCT) -+ -+typedef struct elanctrl_destroy_cap_struct -+{ -+ ELAN_CAPABILITY cap; -+} ELANCTRL_DESTROY_CAP_STRUCT; -+#define ELANCTRL_DESTROY_CAP _IOW ('e', ELANCRTL_USER_BASE + 10, ELANCTRL_DESTROY_CAP_STRUCT) -+ -+typedef struct elanctrl_create_vp_struct -+{ -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} ELANCTRL_CREATE_VP_STRUCT; -+#define ELANCTRL_CREATE_VP _IOW ('e', ELANCRTL_USER_BASE + 11, ELANCTRL_CREATE_VP_STRUCT) -+ -+typedef struct elanctrl_destroy_vp_struct -+{ -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} ELANCTRL_DESTROY_VP_STRUCT; -+#define ELANCTRL_DESTROY_VP _IOW ('e', ELANCRTL_USER_BASE + 12, ELANCTRL_DESTROY_VP_STRUCT) -+ -+#define ELANCTRL_DEBUG_DUMP _IO ('e', ELANCRTL_USER_BASE + 13) -+ -+typedef struct elanctrl_get_caps_struct -+{ -+ uint *number_of_results; -+ uint array_size; -+ ELAN_CAP_STRUCT *caps; -+} ELANCTRL_GET_CAPS_STRUCT; -+#define ELANCTRL_GET_CAPS _IOW ('e', ELANCRTL_USER_BASE + 14, ELANCTRL_GET_CAPS_STRUCT) -+ -+ -+typedef struct elanctrl_debug_buffer_struct -+{ -+ caddr_t buffer; -+ int size; -+} ELANCTRL_DEBUG_BUFFER_STRUCT; -+#define ELANCTRL_DEBUG_BUFFER _IOW ('e', ELANCRTL_USER_BASE + 15, ELANCTRL_DEBUG_BUFFER_STRUCT) -+ -+ -+/* -+ * Usercopy ioctl definitions -+ */ -+typedef struct elanctrl_usercopy_attach_struct -+{ -+ ELAN_CAPABILITY cap; /* process capability (for security checks) */ -+} ELANCTRL_USERCOPY_ATTACH_STRUCT; -+#define ELANCTRL_USERCOPY_ATTACH _IOR ('u', ELANCRTL_USER_BASE + 0, ELANCTRL_USERCOPY_ATTACH_STRUCT) -+#define ELANCTRL_USERCOPY_DETACH _IO ('u', ELANCRTL_USER_BASE + 1) -+ -+typedef struct elanctrl_usercopy_struct -+{ -+ void *remote; /* remote process buffer */ -+ void *local; /* local process buffer */ -+ size_t len; -+ int write; /* Direction */ -+ -+ int ctxId; /* remote process context id (0 .. nlocal-1) */ -+ -+} ELANCTRL_USERCOPY_STRUCT; -+#define ELANCTRL_USERCOPY _IOR ('u', ELANCRTL_USER_BASE + 2, ELANCTRL_USERCOPY_STRUCT) -+ -+#define ELANMOD_PROCFS_IOCTL "/proc/qsnet/elan/ioctl" -+#define ELANMOD_PROCFS_USER_IOCTL "/proc/qsnet/elan/user" -+#define ELANMOD_PROCFS_VERSION "/proc/qsnet/elan/version" -+#define ELANMOD_PROCFS_DEBUG_MASK "/proc/qsnet/elan/debug_mask" -+#define ELANMOD_PROCFS_DEBUG_MODE "/proc/qsnet/elan/debug_mode" -+ -+#endif /* __ELAN_MOD_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/elanmod_subsystem.h linux-2.6.9/include/elan/elanmod_subsystem.h ---- clean/include/elan/elanmod_subsystem.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/elanmod_subsystem.h 2003-09-29 11:35:13.000000000 -0400 -@@ -0,0 +1,138 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_SUBSYSTEM_H -+#define __ELAN_SUBSYSTEM_H -+ -+#include -+#include -+ -+#if defined( __KERNEL__) -+int elan_configure( -+ cfg_op_t op, -+ caddr_t indata, -+ ulong indata_size, -+ caddr_t outdata, -+ ulong outdata_size); -+#endif -+ -+#define ELAN_KMOD_CODE(x) ((x)+CFG_OP_SUBSYS_MIN) -+#define ELAN_MAX_KMOD_CODES 100 -+ -+#define ELAN_SUBSYS "elan" -+ -+#define ELAN_STATS_GET_NEXT 0x01 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ ELAN_STATS_IDX *next_statidx; -+} elan_stats_get_next_struct; -+ -+ -+#define ELAN_STATS_FIND_INDEX 0x02 -+typedef struct { -+ caddr_t block_name; -+ ELAN_STATS_IDX *statidx; /* return value */ -+ uint *num_entries; /* return value */ -+} elan_stats_find_index_struct; -+ -+#define ELAN_STATS_GET_BLOCK_INFO 0x03 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ caddr_t block_name; /* return value */ -+ uint *num_entries; /* return value */ -+} elan_stats_get_block_info_struct; -+ -+#define ELAN_STATS_GET_INDEX_NAME 0x04 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ uint index; -+ caddr_t name; /* return value */ -+} elan_stats_get_index_name_struct; -+ -+#define ELAN_STATS_CLEAR_BLOCK 0x05 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+} elan_stats_clear_block_struct; -+ -+#define ELAN_STATS_GET_BLOCK 0x06 -+typedef struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint entries; -+ ulong *values; /* return values */ -+} elan_stats_get_block_struct; -+ -+#define ELAN_GET_DEVINFO 0x07 -+typedef struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; /* return values */ -+} elan_get_devinfo_struct; -+ -+#define ELAN_GET_POSITION 0x08 -+typedef struct { -+ ELAN_DEV_IDX devidx; -+ ELAN_POSITION *position; /* return values */ -+} elan_get_position_struct; -+ -+#define ELAN_SET_POSITION 0x09 -+typedef struct { -+ ELAN_DEV_IDX devidx; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} elan_set_position_struct; -+ -+#define ELAN_CREATE_CAP 0x0a -+typedef struct { -+ ELAN_CAPABILITY cap; -+} elan_create_cap_struct; -+ -+#define ELAN_DESTROY_CAP 0x0b -+typedef struct { -+ ELAN_CAPABILITY cap; -+} elan_destroy_cap_struct; -+ -+#define ELAN_CREATE_VP 0x0c -+typedef struct { -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} elan_create_vp_struct; -+ -+#define ELAN_DESTROY_VP 0x0d -+typedef struct { -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} elan_destroy_vp_struct; -+ -+ -+#define ELAN_DEBUG_DUMP 0x0e -+ -+#define ELAN_GET_CAPS 0x0f -+typedef struct { -+ uint *number_of_results; -+ uint array_size; -+ ELAN_CAP_STRUCT *caps; -+} elan_get_caps_struct; -+ -+#define ELAN_DEBUG_BUFFER 0x10 -+typedef struct { -+ caddr_t addr; -+ int len; -+} elan_debug_buffer_struct; -+ -+#define ELANMOD_PROCFS_IOCTL "/proc/qsnet/elan/ioctl" -+#define ELANMOD_PROCFS_VERSION "/proc/qsnet/elan/version" -+#define ELANMOD_PROCFS_DEBUG_MASK "/proc/qsnet/elan/debug_mask" -+#define ELANMOD_PROCFS_DEBUG_MODE "/proc/qsnet/elan/debug_mode" -+ -+#endif /* __ELAN_SUBSYSTEM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/epcomms.h linux-2.6.9/include/elan/epcomms.h ---- clean/include/elan/epcomms.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/epcomms.h 2004-11-12 05:55:03.000000000 -0500 -@@ -0,0 +1,635 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_EPCOMMS_H -+#define __ELAN_EPCOMMS_H -+ -+#ident "$Id: epcomms.h,v 1.46 2004/11/12 10:55:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms.h,v $ */ -+ -+#include -+#include -+ -+#define EPCOMMS_SUBSYS_NAME "epcomms" -+ -+/* message service numbers */ -+#define EP_MSG_SVC_EIP512 0x00 /* Quadrics EIP services */ -+#define EP_MSG_SVC_EIP1K 0x01 -+#define EP_MSG_SVC_EIP2K 0x02 -+#define EP_MSG_SVC_EIP4K 0x03 -+#define EP_MSG_SVC_EIP8K 0x04 -+#define EP_MSG_SVC_EIP16K 0x05 -+#define EP_MSG_SVC_EIP32K 0x06 -+#define EP_MSG_SVC_EIP64K 0x07 -+#define EP_MSG_SVC_EIP128K 0x08 -+ -+#define EP_MSG_SVC_PFS 0x09 /* Quadrics PFS rpc service */ -+ -+#define EP_MSG_SVC_PORTALS_SMALL 0x10 /* Lustre Portals */ -+#define EP_MSG_SVC_PORTALS_LARGE 0x11 -+ -+#define EP_MSG_NSVC 0x40 /* Max number of services */ -+ -+#define EP_MSGQ_ADDR(qnum) (EP_EPCOMMS_QUEUE_BASE + (qnum) * EP_QUEUE_DESC_SIZE) -+ -+/* -+ * EP_ENVELOPE -+ * Messages are sent by sending an envelope to the destination -+ * describing the source buffers to transfer. The receiving thread -+ * then allocates a receive buffer and fetches the data by issuing -+ * "get" dmas. -+ * -+ * NOTE: envelopes are not explicitly converted to network byte order -+ * since they are always transferred little endian as they are -+ * copied to/from elan memory using word operations. -+ */ -+typedef struct ep_envelope -+{ -+ uint32_t Version; /* Protocol version field */ -+ -+ EP_ATTRIBUTE Attr; /* Attributes */ -+ -+ EP_XID Xid; /* transaction id */ -+ -+ uint32_t NodeId; /* Source processor */ -+ uint32_t Range; /* range we're sending to (high << 16 | low) */ -+ -+ EP_ADDR TxdRail; /* address of per-rail txd */ -+ EP_NMD TxdMain; /* address of main memory portion of txd */ -+ -+ uint32_t nFrags; /* # fragments */ -+ EP_NMD Frags[EP_MAXFRAG]; /* network mapping handles of source data */ -+ -+ uint32_t CheckSum; /* holds the check sum value when active -+ * must be after all members to be checksum'd -+ */ -+ -+ uint32_t Pad[6]; /* Pad to 128 bytes */ -+} EP_ENVELOPE; -+ -+#define EP_ENVELOPE_VERSION 0xdac10001 -+#define EP_ENVELOPE_SIZE roundup (sizeof (EP_ENVELOPE), EP_BLK_SIZE) -+ -+/* -+ * RPC payload - this small amount of data is transfered in -+ * the envelope for RPCs -+ */ -+typedef struct ep_payload -+{ -+ uint32_t Data[128/sizeof(uint32_t)]; -+} EP_PAYLOAD; -+ -+#define EP_PAYLOAD_SIZE roundup (sizeof (EP_PAYLOAD), EP_BLK_SIZE) -+ -+#define EP_INPUTQ_SIZE (EP_ENVELOPE_SIZE + EP_PAYLOAD_SIZE) -+ -+/* -+ * EP_STATUSBLK -+ * RPC completion transfers a status block to the client. -+ */ -+typedef struct ep_statusblk -+{ -+ uint32_t Data[128/sizeof(uint32_t)]; -+} EP_STATUSBLK; -+ -+#define EP_STATUSBLK_SIZE roundup (sizeof(EP_STATUSBLK), EP_BLK_SIZE) -+ -+#define EP_RANGE(low,high) ((high) << 16 | (low)) -+#define EP_RANGE_LOW(range) ((range) & 0xFFFF) -+#define EP_RANGE_HIGH(range) (((range) >> 16) & 0xFFFF) -+ -+/* return codes from functions, + 'res' parameter to txd callback, ep_rxd_status() */ -+typedef enum -+{ -+ EP_SUCCESS = 0, /* message sent/received successfully */ -+ EP_RXD_PENDING = -1, /* rxd not completed by thread */ -+ EP_CONN_RESET = -2, /* virtual circuit reset */ -+ EP_NODE_DOWN = -3, /* node down - transmit not attempted */ -+ EP_MSG_TOO_BIG = -4, /* received message larger than buffer */ -+ EP_ENOMEM = -5, /* memory alloc failed */ -+ EP_EINVAL = -6, /* invalid parameters */ -+ EP_SHUTDOWN = -7, /* receiver is being shut down */ -+} EP_STATUS; -+ -+/* forward declarations */ -+typedef struct ep_rxd EP_RXD; -+typedef struct ep_txd EP_TXD; -+typedef struct ep_rcvr_rail EP_RCVR_RAIL; -+typedef struct ep_rcvr EP_RCVR; -+typedef struct ep_xmtr_rail EP_XMTR_RAIL; -+typedef struct ep_xmtr EP_XMTR; -+typedef struct ep_comms_rail EP_COMMS_RAIL; -+typedef struct ep_comms_subsys EP_COMMS_SUBSYS; -+ -+typedef struct ep_rcvr_stats EP_RCVR_STATS; -+typedef struct ep_xmtr_stats EP_XMTR_STATS; -+typedef struct ep_rcvr_rail_stats EP_RCVR_RAIL_STATS; -+typedef struct ep_xmtr_rail_stats EP_XMTR_RAIL_STATS; -+ -+typedef void (EP_RXH)(EP_RXD *rxd); /* callback function from receive completion */ -+typedef void (EP_TXH)(EP_TXD *txd, void *arg, EP_STATUS res); /* callback function from transmit completion */ -+ -+/* Main memory portion shared descriptor */ -+typedef struct ep_rxd_main -+{ -+ EP_ENVELOPE Envelope; /* 128 byte aligned envelope */ -+ EP_PAYLOAD Payload; /* 128 byte aligned payload */ -+ bitmap_t Bitmap[BT_BITOUL(EP_MAX_NODES)]; /* broadcast bitmap */ -+ EP_STATUSBLK StatusBlk; /* RPC status block to return */ -+ uint64_t Next; /* linked list when on active list (main address) */ -+ int32_t Len; /* Length of message received */ -+} EP_RXD_MAIN; -+ -+#define EP_RXD_MAIN_SIZE roundup (sizeof (EP_RXD_MAIN), EP_BLK_SIZE) -+ -+/* Phases for message/rpc */ -+#ifndef __ELAN__ -+ -+/* Kernel memory portion of per-rail receive descriptor */ -+typedef struct ep_rxd_rail -+{ -+ struct list_head Link; /* linked on freelist */ -+ EP_RCVR_RAIL *RcvrRail; /* rvcr we're associated with */ -+ -+ EP_RXD *Rxd; /* receive descriptor we're bound to */ -+} EP_RXD_RAIL; -+ -+#define RXD_BOUND2RAIL(rxdRail,rcvrRail) ((rxdRail) != NULL && ((EP_RXD_RAIL *) (rxdRail))->RcvrRail == (EP_RCVR_RAIL *) rcvrRail) -+ -+struct ep_rxd -+{ -+ struct list_head Link; /* linked on free/active list */ -+ EP_RCVR *Rcvr; /* owning receiver */ -+ -+ EP_RXD_MAIN *RxdMain; /* shared main memory portion. */ -+ EP_NMD NmdMain; /* and network mapping descriptor */ -+ -+ EP_RXD_RAIL *RxdRail; /* per-rail rxd we're bound to */ -+ -+ EP_RXH *Handler; /* completion function */ -+ void *Arg; /* and arguement */ -+ -+ unsigned int State; /* RXD status (active,stalled,failed) */ -+ -+ EP_NMD Data; /* network mapping descriptor for user buffer */ -+ -+ int nFrags; /* network mapping descriptor for put/get/complete */ -+ EP_NMD Local[EP_MAXFRAG]; -+ EP_NMD Remote[EP_MAXFRAG]; -+ -+ long NextRunTime; /* time to resend failover/map requests */ -+ EP_XID MsgXid; /* and transaction id */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ struct list_head CheckSumLink; /* linked on check sum list */ -+#endif -+}; -+ -+#define EP_NUM_RXD_PER_BLOCK 16 -+ -+/* rxd->State */ -+#define EP_RXD_FREE 0 -+ -+#define EP_RXD_RECEIVE_UNBOUND 1 -+#define EP_RXD_RECEIVE_ACTIVE 2 -+ -+#define EP_RXD_PUT_ACTIVE 3 -+#define EP_RXD_PUT_STALLED 4 -+#define EP_RXD_GET_ACTIVE 5 -+#define EP_RXD_GET_STALLED 6 -+ -+#define EP_RXD_COMPLETE_ACTIVE 7 -+#define EP_RXD_COMPLETE_STALLED 8 -+ -+#define EP_RXD_RPC_IN_PROGRESS 9 -+#define EP_RXD_COMPLETED 10 -+ -+#define EP_RXD_BEEN_ABORTED 11 /* rxd was aborted while in a private state */ -+ -+typedef struct ep_rxd_block -+{ -+ struct list_head Link; -+ -+ EP_NMD NmdMain; -+ -+ EP_RXD Rxd[EP_NUM_RXD_PER_BLOCK]; -+} EP_RXD_BLOCK; -+ -+struct ep_rcvr_rail_stats -+{ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+}; -+ -+struct ep_rcvr_rail -+{ -+ EP_RCVR *Rcvr; /* associated receiver */ -+ EP_COMMS_RAIL *CommsRail; /* comms rail */ -+ -+ struct proc_dir_entry *procfs_root; /* root of this rcvr_rail's procfs entry */ -+ EP_RCVR_RAIL_STATS stats; /* generic rcvr_rail stats */ -+}; -+ -+struct ep_rcvr_stats -+{ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+}; -+ -+struct ep_rcvr -+{ -+ struct list_head Link; /* queued on subsystem */ -+ EP_COMMS_SUBSYS *Subsys; /* kernel comms subsystem */ -+ EP_SERVICE Service; /* service number */ -+ -+ unsigned int InputQueueEntries; /* # entries on receive queue */ -+ -+ EP_RAILMASK RailMask; /* bitmap of which rails are available */ -+ EP_RCVR_RAIL *Rails[EP_MAX_RAILS]; -+ -+ spinlock_t Lock; /* spinlock for rails/receive lists */ -+ -+ struct list_head ActiveDescList; /* List of pending/active receive descriptors */ -+ -+ EP_XID_CACHE XidCache; /* XID cache (protected by Lock) */ -+ -+ struct list_head FreeDescList; /* List of free receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; /* total number created */ -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ kcondvar_t FreeDescSleep; /* with place to sleep for rx desc */ -+ int FreeDescWanted; /* and flag */ -+ struct list_head DescBlockList; -+ -+ unsigned int ForwardRxdCount; /* count of rxd's being forwarded */ -+ unsigned int CleanupWaiting; /* waiting for cleanup */ -+ kcondvar_t CleanupSleep; /* and place to sleep */ -+ -+ struct proc_dir_entry *procfs_root; /* place where this rcvr's proc entry is */ -+ EP_RCVR_STATS stats; -+}; -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+#define EP_ENVELOPE_CHECK_SUM (1<<31) -+extern uint32_t ep_calc_check_sum (EP_SYS *sys, EP_ENVELOPE *env, EP_NMD *nmd, int nFrags); -+#endif -+ -+#endif /* ! __ELAN__ */ -+ -+typedef struct ep_txd_main -+{ -+ EP_STATUSBLK StatusBlk; /* RPC status block */ -+ bitmap_t Bitmap[BT_BITOUL(EP_MAX_NODES)]; /* broadcast bitmap */ -+} EP_TXD_MAIN; -+ -+#define EP_TXD_MAIN_SIZE roundup (sizeof (EP_TXD_MAIN), EP_BLK_SIZE) -+ -+#ifndef __ELAN__ -+typedef struct ep_txd_rail -+{ -+ struct list_head Link; /* linked on freelist */ -+ EP_XMTR_RAIL *XmtrRail; /* xmtr we're associated with */ -+ -+ EP_TXD *Txd; /* txd we're bound to */ -+} EP_TXD_RAIL; -+ -+#define TXD_BOUND2RAIL(rxdRail,xmtrRail) ((txdRail) != NULL && ((EP_TXD_RAIL *) (txdRail))->XmtrRail == (EP_XMTR_RAIL *) xmtrRail) -+ -+struct ep_txd -+{ -+ struct list_head Link; /* linked on free/active list */ -+ EP_XMTR *Xmtr; /* service we're associated with */ -+ -+ EP_TXD_MAIN *TxdMain; /* shared main memory portion */ -+ EP_NMD NmdMain; /* and network mapping descriptor */ -+ -+ EP_TXD_RAIL *TxdRail; /* per-rail txd for this phase */ -+ -+ EP_TXH *Handler; /* completion function */ -+ void *Arg; /* and arguement */ -+ -+ unsigned short NodeId; /* node transmit is to. */ -+ EP_SERVICE Service; /* and seervice */ -+ -+ long TimeStamp; /* time we where created at, to find sends taking too long */ -+ long RetryTime; -+ EP_BACKOFF Backoff; -+ -+ EP_ENVELOPE Envelope; /* envelope for transmit */ -+ EP_PAYLOAD Payload; /* payload for transmit */ -+}; -+ -+#define EP_NUM_TXD_PER_BLOCK 16 -+ -+/* "phase" parameter to BindTxd */ -+#define EP_TXD_PHASE_ACTIVE 1 -+#define EP_TXD_PHASE_PASSIVE 2 -+ -+typedef struct ep_txd_block -+{ -+ struct list_head Link; -+ EP_NMD NmdMain; -+ EP_TXD Txd[EP_NUM_TXD_PER_BLOCK]; /* transmit descriptors */ -+} EP_TXD_BLOCK; -+ -+struct ep_xmtr_rail_stats -+{ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+}; -+ -+struct ep_xmtr_rail -+{ -+ EP_COMMS_RAIL *CommsRail; /* associated comms rail */ -+ EP_XMTR *Xmtr; /* associated transmitter */ -+ -+ struct proc_dir_entry *procfs_root; /* place where this xmtr's proc entry is */ -+ -+ EP_XMTR_RAIL_STATS stats; -+}; -+ -+struct ep_xmtr_stats -+{ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+}; -+ -+struct ep_xmtr -+{ -+ struct list_head Link; /* Linked on subsys */ -+ EP_COMMS_SUBSYS *Subsys; /* kernel comms subsystem */ -+ -+ EP_RAILMASK RailMask; /* bitmap of which rails are available */ -+ EP_XMTR_RAIL *Rails[EP_MAX_RAILS]; /* per-rail state */ -+ -+ spinlock_t Lock; /* lock for active descriptor list */ -+ -+ struct list_head ActiveDescList; /* list of active transmit descriptors */ -+ -+ EP_XID_CACHE XidCache; /* XID cache (protected by Lock) */ -+ -+ struct list_head FreeDescList; /* List of free receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ kcondvar_t FreeDescSleep; /* with place to sleep for rx desc */ -+ int FreeDescWanted; /* and flag */ -+ struct list_head DescBlockList; -+ -+ struct proc_dir_entry *procfs_root; /* place where this rcvr's proc entry is */ -+ EP_XMTR_STATS stats; -+}; -+ -+/* forward descriptor */ -+#define EP_TREE_ARITY 3 -+ -+typedef struct ep_fwd_desc -+{ -+ struct list_head Link; /* linked on forward/free lists */ -+ EP_RXD *Rxd; /* rxd to forward */ -+ EP_NMD Data; /* nmd of subset of receive buffer */ -+ unsigned NumChildren; /* number of places we're forwarding */ -+ unsigned Children[EP_TREE_ARITY]; -+} EP_FWD_DESC; -+ -+typedef struct ep_comms_ops -+{ -+ void (*DelRail) (EP_COMMS_RAIL *rail); -+ void (*DisplayRail) (EP_COMMS_RAIL *rail); -+ -+ struct { -+ void (*AddRail) (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ void (*DelRail) (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+ long (*Check) (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+ -+ int (*QueueRxd) (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+ void (*RpcPut)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ void (*RpcGet)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ void (*RpcComplete)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+ EP_RXD *(*StealRxd)(EP_RCVR_RAIL *rcvrRail); -+ -+ void (*DisplayRcvr) (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+ void (*DisplayRxd) (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+ void (*FillOutRailStats) (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+ } Rcvr; -+ -+ struct { -+ void (*AddRail) (EP_XMTR *xmtr, EP_COMMS_RAIL *rail); -+ void (*DelRail) (EP_XMTR *xmtr, EP_COMMS_RAIL *rail); -+ -+ long (*Check) (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+ -+ int (*BindTxd) (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+ void (*UnbindTxd) (EP_TXD *txd, unsigned int phase); -+ int (*PollTxd) (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+ -+ void (*DisplayXmtr) (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+ void (*DisplayTxd) (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+ int (*CheckTxdState) (EP_TXD *txd); -+ -+ void (*FillOutRailStats) (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+ } Xmtr; -+} EP_COMMS_OPS; -+ -+#define EP_RAIL_OP(commsRail, Which) (commsRail)->Ops.Which -+#define EP_RCVR_OP(rcvrRail, Which) (rcvrRail)->CommsRail->Ops.Rcvr.Which -+#define EP_XMTR_OP(xmtrRail, Which) (xmtrRail)->CommsRail->Ops.Xmtr.Which -+ -+/* "how" parameter to PollTxd */ -+#define POLL_TX_LIST 0 -+#define ENABLE_TX_CALLBACK 1 -+#define DISABLE_TX_CALLBACK 2 -+ -+struct ep_comms_rail -+{ -+ struct list_head Link; /* Linked on subsys */ -+ EP_RAIL *Rail; /* kernel comms rail */ -+ EP_COMMS_SUBSYS *Subsys; -+ EP_COMMS_OPS Ops; -+ -+ EP_COMMS_RAIL_STATS Stats; /* statistics */ -+}; -+ -+struct ep_comms_subsys -+{ -+ EP_SUBSYS Subsys; /* is a kernel comms subsystem */ -+ -+ kmutex_t Lock; /* global lock */ -+ -+ EP_COMMS_STATS Stats; /* statistics */ -+ -+ struct list_head Rails; /* list of all rails */ -+ -+ struct list_head Receivers; /* list of receivers */ -+ struct list_head Transmitters; /* and transmitters */ -+ -+ /* forward/allocator thread */ -+ EP_KTHREAD Thread; /* place thread sleeps */ -+ -+ /* message passing "broadcast" forward lists */ -+ spinlock_t ForwardDescLock; /* Lock for broadcast forwarding */ -+ struct list_head ForwardDescList; /* List of rxd's to forward */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ spinlock_t CheckSumDescLock; /* Lock for CheckSums */ -+ struct list_head CheckSumDescList; /* List of rxd's to be CheckSumed */ -+#endif -+ -+ EP_XMTR *ForwardXmtr; /* and transmitter to forward with */ -+}; -+ -+/* epcomms.c subsystem initialisation */ -+extern unsigned int epcomms_forward_limit; -+ -+extern int ep_comms_init (EP_SYS *sys); -+extern void ep_comms_display (EP_SYS *sys, char *how); -+extern EP_RAILMASK ep_rcvr_railmask (EP_SYS *epsys, EP_SERVICE service); -+ -+/* epcomms_elan3.c */ -+extern EP_COMMS_RAIL *ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail); -+ -+/* epcomms_elan4.c */ -+extern EP_COMMS_RAIL *ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail); -+ -+/* epcommsTx.c */ -+extern int TxdShouldStabalise (EP_TXD_RAIL *txdRail, EP_RAIL *rail); -+extern void FreeTxd (EP_XMTR *xmtr, EP_TXD *txd); -+ -+extern unsigned int ep_txd_lowat; -+extern long ep_check_xmtr (EP_XMTR *xmtr, long nextRunTime); -+extern void ep_display_xmtr (DisplayInfo *di, EP_XMTR *xmtr); -+extern void ep_xmtr_flush_callback (EP_XMTR *xmtr, EP_XMTR_RAIL *xmtrRail); -+extern void ep_xmtr_reloc_callback (EP_XMTR *xmtr, EP_XMTR_RAIL *xmtrRail); -+ -+extern void ep_xmtr_fillout_stats (EP_XMTR *xmtr, char *str); -+extern void ep_xmtr_rail_fillout_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+extern void ep_xmtr_txd_stat (EP_XMTR *xmtr, EP_TXD *txd); -+ -+/* epcommsRx.c */ -+extern EP_RXD *StealRxdFromOtherRail (EP_RCVR *rcvr); -+ -+extern unsigned int ep_rxd_lowat; -+extern long ep_check_rcvr (EP_RCVR *rcvr, long nextRunTime); -+extern void ep_rcvr_flush_callback (EP_RCVR *rcvr, EP_RCVR_RAIL *rcvrRail); -+extern void ep_rcvr_reloc_callback (EP_RCVR *rcvr, EP_RCVR_RAIL *rcvrRail); -+extern void ep_display_rcvr (DisplayInfo *di, EP_RCVR *rcvr, int full); -+ -+extern long ep_forward_rxds (EP_COMMS_SUBSYS *subsys, long nextRunTime); -+ -+extern void ep_rcvr_fillout_stats (EP_RCVR *rcvr, char *str); -+extern void ep_rcvr_rail_fillout_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern void ep_csum_rxds (EP_COMMS_SUBSYS *subsys); -+extern void ep_rxd_queue_csum (EP_RXD *rxd); -+#endif -+ -+extern void ep_rxd_received (EP_RXD *rxd); -+extern void ep_rxd_received_now (EP_RXD *rxd); -+ -+/* ep_procfs.c */ -+extern struct proc_dir_entry *ep_procfs_root; -+ -+extern void ep_procfs_rcvr_xmtr_init(void); -+extern void ep_procfs_rcvr_xmtr_fini(void); -+ -+extern void ep_procfs_rcvr_add(EP_RCVR *rcvr); -+extern void ep_procfs_rcvr_del(EP_RCVR *rcvr); -+ -+extern void ep_procfs_rcvr_add_rail(EP_RCVR_RAIL *rcvrRail); -+extern void ep_procfs_rcvr_del_rail(EP_RCVR_RAIL *rcvrRail); -+ -+extern void ep_procfs_xmtr_add(EP_XMTR *xmtr); -+extern void ep_procfs_xmtr_del(EP_XMTR *xmtr); -+ -+extern void ep_procfs_xmtr_add_rail(EP_XMTR_RAIL *xmtrRail); -+extern void ep_procfs_xmtr_del_rail(EP_XMTR_RAIL *xmtrRail); -+ -+ -+/* Public Interface */ -+ -+ -+/* epcomms.c message xmtr functions */ -+extern EP_XMTR *ep_alloc_xmtr (EP_SYS *sys); -+extern void ep_free_xmtr (EP_XMTR *xmtr); -+ -+extern EP_STATUS ep_transmit_message (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, -+ EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_multicast_message (EP_XMTR *xmtr, unsigned int destLo, unsigned int destHi, bitmap_t *bitmap, -+ EP_SERVICE service, EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, -+ EP_PAYLOAD *payload, EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_transmit_rpc (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, -+ EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_multicast_forward (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_ENVELOPE *env, EP_PAYLOAD *payload, -+ bitmap_t *bitmap, EP_NMD *nmd, int nFrags); -+ -+/* epcomms.c functions for use with polled transmits */ -+extern int ep_poll_transmits (EP_XMTR *xmtr); -+extern int ep_enable_txcallbacks (EP_XMTR *xmtr); -+extern int ep_disable_txcallbacks (EP_XMTR *xmtr); -+ -+/* epcomms.c message rcvr functions */ -+extern EP_RCVR *ep_alloc_rcvr (EP_SYS *sys, EP_SERVICE svc, unsigned int nenvelopes); -+extern void ep_free_rcvr (EP_RCVR *rcvr); -+ -+extern EP_STATUS ep_queue_receive (EP_RCVR *rcvr, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr); -+extern void ep_requeue_receive (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr); -+extern EP_STATUS ep_rpc_put (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *from, EP_NMD *to, int nFrags); -+extern EP_STATUS ep_rpc_get (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *from, EP_NMD *to, int nFrags); -+extern EP_STATUS ep_complete_rpc (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_STATUSBLK *blk, -+ EP_NMD *from, EP_NMD *to, int nFrags); -+extern void ep_complete_receive (EP_RXD *rxd); -+ -+/* railhints.c */ -+extern int ep_xmtr_bcastrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails); -+extern int ep_xmtr_prefrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails, unsigned nodeId); -+extern EP_RAILMASK ep_xmtr_availrails (EP_XMTR *xmtr); -+extern EP_RAILMASK ep_xmtr_noderails (EP_XMTR *xmtr, unsigned nodeId); -+extern int ep_rcvr_prefrail (EP_RCVR *rcvr, EP_RAILMASK allowedRails); -+extern EP_RAILMASK ep_rcvr_availrails (EP_RCVR *rcvr); -+extern EP_RAILMASK ep_rxd_railmask (EP_RXD *rxd); -+ -+/* epcomms.c functions for accessing fields of rxds */ -+extern void *ep_rxd_arg(EP_RXD *rxd); -+extern int ep_rxd_len(EP_RXD *rxd); -+extern EP_STATUS ep_rxd_status(EP_RXD *rxd); -+extern int ep_rxd_isrpc(EP_RXD *rxd); -+extern EP_ENVELOPE *ep_rxd_envelope(EP_RXD *rxd); -+extern EP_PAYLOAD *ep_rxd_payload(EP_RXD *rxd); -+extern int ep_rxd_node(EP_RXD *rxd); -+extern EP_STATUSBLK *ep_rxd_statusblk(EP_RXD *rxd); -+ -+/* functions for accessing fields of txds */ -+extern int ep_txd_node(EP_TXD *txd); -+extern EP_STATUSBLK *ep_txd_statusblk(EP_TXD *txd); -+ -+/* functions for controlling how many processes are using module */ -+extern void ep_mod_dec_usecount (void); -+extern void ep_mod_inc_usecount (void); -+ -+extern EP_RAILMASK ep_xmtr_svc_indicator_railmask (EP_XMTR *xmtr, int svc_indicator, int nodeId); -+extern int ep_xmtr_svc_indicator_bitmap (EP_XMTR *xmtr, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+#endif /* ! __ELAN__ */ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_EPCOMMS_H */ -+ -diff -urN clean/include/elan/epsvc.h linux-2.6.9/include/elan/epsvc.h ---- clean/include/elan/epsvc.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/epsvc.h 2004-02-13 05:03:27.000000000 -0500 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_EPSVC_H -+#define __ELAN_EPSVC_H -+ -+#ident "@(#)$Id: epsvc.h,v 1.9 2004/02/13 10:03:27 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epsvc.h,v $ */ -+ -+ -+#define EP_SVC_NUM_INDICATORS 8 -+#define EP_SVC_INDICATOR_MAX_NAME 32 -+ -+#define EP_SVC_EIP 0 -+#define EP_SVC_NAMES {"eip", "1", "2", "3", "4", "5", "6", "7"}; -+ -+#if defined(__KERNEL__) -+extern int ep_svc_indicator_set (EP_SYS *epsys, int svc_indicator); -+extern int ep_svc_indicator_clear (EP_SYS *epsys, int svc_indicator); -+extern int ep_svc_indicator_is_set (EP_SYS *epsys, int svc_indicator, int nodeId); -+extern int ep_svc_indicator_bitmap (EP_SYS *epsys, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+extern EP_RAILMASK ep_svc_indicator_railmask (EP_SYS *epsys, int svc_indicator, int nodeId); -+#endif -+ -+#endif /* __ELAN_EPSVC_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/kalloc.h linux-2.6.9/include/elan/kalloc.h ---- clean/include/elan/kalloc.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/kalloc.h 2004-05-19 06:23:59.000000000 -0400 -@@ -0,0 +1,108 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KALLOC_H -+#define __ELAN3_KALLOC_H -+ -+#ident "$Id: kalloc.h,v 1.11 2004/05/19 10:23:59 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kalloc.h,v $ */ -+ -+#include -+ -+/* -+ * Memory allocator -+ */ -+#define LN2_MIN_SIZE 6 /* 64 bytes */ -+#define LN2_MAX_SIZE 16 /* 64k bytes */ -+#define NUM_FREELISTS (LN2_MAX_SIZE-LN2_MIN_SIZE + 1) -+#define MIN_SIZE (1 << LN2_MIN_SIZE) -+#define MAX_SIZE (1 << LN2_MAX_SIZE) -+ -+#define HASHSHIFT LN2_MAX_SIZE -+#define NHASH 32 -+#define HASH(addr) (((addr) >> HASHSHIFT) & (NHASH-1)) -+ -+typedef enum -+{ -+ EP_ALLOC_TYPE_PRIVATE_SDRAM, -+ EP_ALLOC_TYPE_PRIVATE_MAIN, -+ EP_ALLOC_TYPE_SHARED_MAIN, -+} EP_ALLOC_TYPE; -+ -+typedef struct ep_pool -+{ -+ EP_NMH Handle; /* network mapping handle */ -+ -+ struct list_head HashBase; /* linked on hash lists */ -+ struct list_head HashTop; /* linked on hash lists */ -+ -+ struct list_head Link[NUM_FREELISTS]; /* linked on free lists */ -+ bitmap_t *Bitmaps[NUM_FREELISTS]; /* bitmaps for each size */ -+ -+ union { -+ sdramaddr_t Sdram; -+ unsigned long Ptr; -+ } Buffer; -+} EP_POOL; -+ -+typedef struct ep_alloc -+{ -+ spinlock_t Lock; -+ -+ EP_ALLOC_TYPE Type; -+ unsigned int Perm; -+ -+ EP_RMAP *ResourceMap; -+ -+ struct list_head HashBase[NHASH]; -+ struct list_head HashTop[NHASH]; -+ struct list_head Freelists[NUM_FREELISTS]; -+ -+ union { -+ struct { -+ EP_SYS *System; -+ struct list_head Rails; -+ } Shared; -+ -+ struct { -+ EP_RAIL *Rail; -+ } Private; -+ } Data; -+} EP_ALLOC; -+ -+extern void ep_display_alloc (EP_ALLOC *alloc); -+ -+extern void ep_alloc_init (EP_RAIL *rail); -+extern void ep_alloc_fini (EP_RAIL *rail); -+ -+extern sdramaddr_t ep_alloc_memory_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr); -+extern void ep_free_memory_elan (EP_RAIL *rail, EP_ADDR addr); -+ -+extern sdramaddr_t ep_alloc_elan (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp); -+extern void ep_free_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+extern void *ep_alloc_main (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addr); -+extern void ep_free_main (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+ -+extern sdramaddr_t ep_elan2sdram (EP_RAIL *rail, EP_ADDR addr); -+extern void *ep_elan2main (EP_RAIL *rail, EP_ADDR addr); -+ -+extern void ep_shared_alloc_init (EP_SYS *sys); -+extern void ep_shared_alloc_fini (EP_SYS *sys); -+extern int ep_shared_alloc_add_rail (EP_SYS *sys, EP_RAIL *rail); -+extern void ep_shared_alloc_remove_rail (EP_SYS *sys, EP_RAIL *rail); -+ -+extern void *ep_shared_alloc_main (EP_SYS *sys, unsigned size, EP_ATTRIBUTE attr, EP_NMD *nmd); -+extern void ep_shared_free_main (EP_SYS *sys, EP_NMD *nmd); -+ -+#endif /* __ELAN_KALLOC_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/kcomm.h linux-2.6.9/include/elan/kcomm.h ---- clean/include/elan/kcomm.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/kcomm.h 2005-04-05 12:36:28.000000000 -0400 -@@ -0,0 +1,831 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KCOMM_H -+#define __ELAN_KCOMM_H -+ -+#ident "$Id: kcomm.h,v 1.82 2005/04/05 16:36:28 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm.h,v $*/ -+#define EP_KCOMM_MAJOR_VERSION 3 -+#define EP_KCOMM_MINOR_VERSION 1 -+ -+#define EP_PROTOCOL_VERSION 1 /* CM/KCOMM protocol revision */ -+ -+#define EP_MAX_NODES 2048 /* Max nodes we support */ -+#define EP_MAX_RAILS 16 /* max number of rails (we use an unsigned short for bitmaps !) */ -+#define EP_MAXFRAG 4 /* max number of fragments */ -+ -+#define EP_BLK_SIZE 64 /* align objects for elan access */ -+ -+/* Elan virtual address address space */ -+#define EP_SYSTEM_QUEUE_BASE 0x00010000 /* Base address for system queues */ -+#define EP_MSGSYS_QUEUE_BASE 0x00020000 /* Base address for msgsys queues */ -+#define EP_EPCOMMS_QUEUE_BASE 0x00030000 /* Base address for message queues */ -+#define EP_DVMA_BASE 0x10000000 /* elan address range for dvma mapping. */ -+#define EP_DVMA_TOP 0xE0000000 -+ -+#define EP_SHARED_BASE 0xE0000000 /* shared main/elan allocators */ -+#define EP_SHARED_TOP 0xF0000000 -+ -+#define EP_PRIVATE_BASE 0xF0000000 /* private main/elan allocators */ -+#define EP_PRIVATE_TOP 0xF8000000 -+ -+#define EP_DVMA_RMAP_SIZE 1024 /* size of resource map for dvma address space */ -+#define EP_SHARED_RMAP_SIZE 1024 /* size of resource map for shared address space */ -+#define EP_PRIVATE_RMAP_SIZE 1024 /* size of resource map for private address space */ -+ -+/* Input queue descriptors fit into 64 bytes */ -+#define EP_QUEUE_DESC_SIZE 64 -+ -+/* Timeouts for checking network position */ -+#define EP_POSITION_TIMEOUT (4*HZ) /* 1s time to notice CheckNetworkPosition changes */ -+#define EP_WITHDRAW_TIMEOUT (2*HZ) /* 2s time before withdrawing from unreachable nodes */ -+ -+/* Time to try again due to resource failue (eg malloc etc) */ -+#define RESOURCE_RETRY_TIME (HZ/20) -+ -+/* Time to retransmit message when send failed */ -+#define MSGBUSY_RETRY_TIME (HZ/20) -+ -+/* Time between retransmits of messages network flush requests */ -+#define MESSAGE_RETRY_TIME (HZ/5) -+ -+/* time to hold the context filter up to ensure that the -+ * next packet of a dma is guaranteed to get nacked (8mS) */ -+#define NETWORK_ERROR_TIMEOUT (1 + roundup (HZ * 8 / 1000, 1)) -+ -+/* Time between retransmits of message failover requests */ -+#define FAILOVER_RETRY_TIME (HZ/5) -+ -+/* compute earliest time */ -+#define SET_NEXT_RUN_TIME(nextRunTime, time) \ -+do { \ -+ if ((nextRunTime) == 0 || AFTER(nextRunTime, (time)))\ -+ (nextRunTime) = (time);\ -+} while (0) -+ -+/* DMA retry backoff/priorities/issue rings */ -+#define EP_NUM_BACKOFF 8 -+#define EP_RETRY_STABALISING 0 -+#define EP_RETRY_BASE 1 -+ -+#define EP_RETRY_CRITICAL EP_RETRY_BASE -+#define EP_RETRY_HIGH_PRI (EP_RETRY_CRITICAL + 1) -+#define EP_RETRY_HIGH_PRI_TIME (1) -+#define EP_RETRY_HIGH_PRI_RETRY (EP_RETRY_HIGH_PRI + 1) -+#define EP_RETRY_HIGH_PRI_RETRY_TIME (2) -+#define EP_RETRY_LOW_PRI (EP_RETRY_HIGH_PRI_RETRY + EP_NUM_BACKOFF) -+#define EP_RETRY_LOW_PRI_TIME (2) -+#define EP_RETRY_LOW_PRI_RETRY (EP_RETRY_LOW_PRI + 1) -+#define EP_RETRY_LOW_PRI_RETRY_TIME (4) -+#define EP_RETRY_ANONYMOUS (EP_RETRY_LOW_PRI_RETRY + EP_NUM_BACKOFF) -+#define EP_RETRY_ANONYMOUS_TIME (10) -+#define EP_RETRY_NETERR (EP_RETRY_ANONYMOUS + EP_NUM_BACKOFF) -+#define EP_RETRY_NETERR_TIME (10) -+#define EP_NUM_RETRIES (EP_RETRY_NETERR + 1) -+ -+typedef unsigned short EP_SERVICE; -+ -+/* EP_ATTRIBUTE 32 bits -+ * -+ * 0-2 -+ * for initial call :- -+ * 0 (0x1) EP_NO_ALLOC used once -+ * 1 (0x2) EP_NO_SLEEP used once -+ * 2 (0x4) EP_NOT_MYSELF used once -+ * -+ * when stored and transmited :- -+ * 0 (0x0) EP_MULTICAST envelope -+ * 1 (0x2) EP_RPC envelope -+ * 2 (0x4) EP_HAS_PAYLOAD envelope -+ * -+ * 3-11 -+ * 3 (0x08) EP_PREFRAIL_SET preserved -+ * 4-7 (0xf0) Pref Rail -+ * 8 (0x100) EP_NO_INTERUPT -+ * 9 (0x200) EP_NO_FAILOVER -+ * -+ * 10 (0x400) EP_INTERRUPT_ENABLED internal -+ * 11 (0x800) EP_TXD_STABALISING internal -+ * -+ * 12-13 Not Used. -+ * -+ * 14-15 (0xC000) Data Type. passed in -+ * 00 none. -+ * 01 Service Indicator. -+ * 10 TimeOut. -+ * 11 RailMask -+ * -+ * 16-31 (0x10000) Data. Service Indicator, TimeOut, RailMask, Pref Rail. -+ * -+*/ -+ -+typedef uint32_t EP_ATTRIBUTE; -+ -+#define EP_LOCAL_ATTR_MASK 0x07 -+#define EP_CLEAR_LOCAL_ATTR(ATTR) ( (ATTR) & ~EP_LOCAL_ATTR_MASK ) -+ -+#define EP_NO_ALLOC 0x01 /* Don't call allocators if no free descriptors */ -+#define EP_NO_SLEEP 0x02 /* Don't sleep if no free descriptors */ -+#define EP_NOT_MYSELF 0x04 /* Don't send multicast to me */ -+ -+#define EP_MULTICAST 0x01 /* Message is a multicast */ -+#define EP_RPC 0x02 /* Wait for RPC reply */ -+#define EP_HAS_PAYLOAD_BIT 0x04 /* transfer payload */ -+ -+ -+#define EP_PREFRAIL_SET 0x08 /* preferred rail is set (otherwise pick one from the NMDs) */ -+ -+#define EP_PREFRAIL_SHIFT (4) -+#define EP_PREFRAIL_MASK 0xf0 -+#define EP_IS_PREFRAIL_SET(ATTR) (((ATTR) & EP_PREFRAIL_SET) != 0) -+#define EP_CLEAR_PREFRAIL(ATTR) (((ATTR) & ~EP_PREFRAIL_SET) & ~EP_PREFRAIL_MASK) -+#define EP_SET_PREFRAIL(ATTR,RAIL) (EP_CLEAR_PREFRAIL(ATTR) | (((RAIL) << EP_PREFRAIL_SHIFT ) & EP_PREFRAIL_MASK ) | EP_PREFRAIL_SET) -+ -+ -+#define EP_ATTR2PREFRAIL(ATTR) (((ATTR) & EP_PREFRAIL_MASK) >> EP_PREFRAIL_SHIFT) -+ -+ -+#define EP_INTERRUPT_ENABLED 0x400 /* event interrupt enabled on EP_NO_INTERRUPT */ -+#define EP_TXD_STABALISING 0x800 /* flag to indicate this is attempting to stabalise */ -+ -+#define EP_IS_MULTICAST(ATTR) (((ATTR) & EP_MULTICAST) != 0) -+#define EP_SET_MULTICAST(ATTR) ( (ATTR) | EP_MULTICAST) -+#define EP_CLEAR_MULTICAST(ATTR) ( (ATTR) & ~EP_MULTICAST) -+ -+#define EP_IS_RPC(ATTR) (((ATTR) & EP_RPC) != 0) -+#define EP_SET_RPC(ATTR) ( (ATTR) | EP_RPC) -+#define EP_CLEAR_RPC(ATTR) ( (ATTR) & ~EP_RPC) -+ -+#define EP_HAS_PAYLOAD(ATTR) (((ATTR) & EP_HAS_PAYLOAD_BIT) != 0) -+#define EP_SET_HAS_PAYLOAD(ATTR) ( (ATTR) | EP_HAS_PAYLOAD_BIT) -+#define EP_CLEAR_HAS_PAYLOAD(ATTR) ( (ATTR) & ~EP_HAS_PAYLOAD_BIT) -+ -+#define EP_IS_INTERRUPT_ENABLED(ATTR) (((ATTR) & EP_INTERRUPT_ENABLED) != 0) -+#define EP_SET_INTERRUPT_ENABLED(ATTR) ( (ATTR) | EP_INTERRUPT_ENABLED) -+#define EP_CLEAR_INTERRUPT_ENABLED(ATTR) ( (ATTR) & ~EP_INTERRUPT_ENABLED) -+ -+#define EP_IS_TXD_STABALISING(ATTR) (((ATTR) & EP_TXD_STABALISING) != 0) -+#define EP_SET_TXD_STABALISING(ATTR) ( (ATTR) | EP_TXD_STABALISING) -+#define EP_CLEAR_TXD_STABALISING(ATTR) ( (ATTR) & ~EP_TXD_STABALISING) -+ -+#define EP_NO_INTERRUPT 0x100 /* Don't generate completion interrupt (tx) */ -+#define EP_NO_FAILOVER 0x200 /* don't attempt rail failover, just abort */ -+ -+#define EP_IS_NO_INTERRUPT(ATTR) (((ATTR) & EP_NO_INTERRUPT) != 0) -+#define EP_SET_NO_INTERRUPT(ATTR) ( (ATTR) | EP_NO_INTERRUPT) -+#define EP_CLEAR_NO_INTERRUPT(ATTR) ( (ATTR) & ~EP_NO_INTERRUPT) -+ -+#define EP_IS_NO_FAILOVER(ATTR) (((ATTR) & EP_NO_FAILOVER) != 0) -+#define EP_SET_NO_FAILOVER(ATTR) ( (ATTR) | EP_NO_FAILOVER) -+#define EP_CLEAR_NO_FAILOVER(ATTR) ( (ATTR) & ~EP_NO_FAILOVER) -+ -+#define EP_TYPE_MASK 0xC000 -+#define EP_TYPE_SVC_INDICATOR 0x4000 -+#define EP_TYPE_TIMEOUT 0x8000 -+#define EP_TYPE_RAILMASK 0xC000 -+ -+#define EP_ATTR2TYPE(ATTR) ( (ATTR) & EP_TYPE_MASK ) -+ -+#define EP_IS_SVC_INDICATOR(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_SVC_INDICATOR) -+#define EP_IS_TIMEOUT(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_TIMEOUT) -+#define EP_IS_RAILMASK(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_RAILMASK) -+#define EP_IS_NO_TYPE(ATTR) (EP_ATTR2TYPE(ATTR) == 0) -+ -+#define EP_DATA_SHIFT (16) -+#define EP_DATA_MASK 0xffff0000 -+ -+#define EP_ATTR2DATA(ATTR) (((ATTR) & EP_DATA_MASK) >> EP_DATA_SHIFT) -+#define EP_DATA2ATTR(DATA) (((DATA) << EP_DATA_SHIFT) & EP_DATA_MASK) -+ -+#define EP_CLEAR_DATA(ATTR) (((ATTR) & ~EP_TYPE_MASK) & ~EP_DATA_MASK) -+#define EP_SET_DATA(ATTR,TYPE,DATA) (EP_CLEAR_DATA(ATTR) | ((TYPE) & EP_TYPE_MASK) | (((DATA) << EP_DATA_SHIFT) & EP_DATA_MASK)) -+ -+#define EP_DEFAULT_TIMEOUT (HZ*30) -+ -+#if !defined(offsetof) -+#define offsetof(s, m) (unsigned long)(&(((s *)0)->m)) -+#endif -+#if !defined(roundup) -+#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) -+#endif -+ -+/* -+ * Message transaction ID's - these are unique 64 bts -+ * numbers which include the initial rail number. -+ */ -+typedef struct ep_xid -+{ -+ uint32_t Generation; -+ uint32_t Handle; -+ uint64_t Unique; -+} EP_XID; -+ -+#define EP_INVALIDATE_XID(xid) ((xid).Generation = (xid).Handle = (xid).Unique = 0) -+ -+#define EP_XID_INVALID(xid) ((xid).Generation == 0 && (xid).Handle == 0 && (xid).Unique == 0) -+#define EP_XIDS_MATCH(a,b) ((a).Generation == (b).Generation && (a).Handle == (b).Handle && (a).Unique == (b).Unique) -+ -+typedef struct ep_backoff -+{ -+ unsigned char type; -+ unsigned char indx; -+ unsigned short count; -+} EP_BACKOFF; -+ -+/* values for "type" */ -+#define EP_BACKOFF_FREE 0 -+#define EP_BACKOFF_ENVELOPE 1 -+#define EP_BACKOFF_FETCH 2 -+#define EP_BACKOFF_DATA 3 -+#define EP_BACKOFF_DONE 4 -+#define EP_BACKOFF_STABILISE 5 -+ -+#ifndef __ELAN__ -+ -+/* forward declaration of types */ -+typedef struct ep_rail EP_RAIL; -+typedef struct ep_sys EP_SYS; -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+typedef struct ep_callback -+{ -+ struct ep_callback *Next; -+ void (*Routine)(void *, statemap_t *); -+ void *Arg; -+} EP_CALLBACK; -+ -+#define EP_CB_FLUSH_FILTERING 0 -+#define EP_CB_FLUSH_FLUSHING 1 -+#define EP_CB_PASSIVATED 2 -+#define EP_CB_FAILOVER 3 -+#define EP_CB_DISCONNECTING 4 -+#define EP_CB_DISCONNECTED 5 -+#define EP_CB_NODESET 6 -+#define EP_CB_COUNT 7 -+ -+#endif /* !defined(__ELAN__) */ -+ -+/* Small unreliable system message queues */ -+#define EP_SYSTEMQ_INTR 0 /* input queue for cluster membership generating an interrupt */ -+#define EP_SYSTEMQ_POLLED 1 /* input queue for cluster membership polled on clock tick */ -+#define EP_SYSTEMQ_MANAGER 2 /* input queue for manager messages */ -+#define EP_NUM_SYSTEMQ 64 -+ -+#define EP_SYSTEMQ_ADDR(qnum) (EP_SYSTEM_QUEUE_BASE + (qnum) * EP_QUEUE_DESC_SIZE) -+#define EP_SYSTEMQ_DESC(base,qnum) ((base) + (qnum) * EP_QUEUE_DESC_SIZE) -+ -+#define EP_SYSTEMQ_MSG_ALIGN 64 /* message sizes aligned to 64 byte boundaries */ -+#define EP_SYSTEMQ_MSG_MAX (4*64) /* max message size */ -+ -+/* Special flag for Version field to indicate message not -+ * seen in main memory yet and time limit to poll for it */ -+#define EP_SYSTEMQ_UNRECEIVED 0xdeadbabe -+#define EP_SYSTEMQ_UNRECEIVED_TLIMIT 16384 /* 1023 uS */ -+ -+#ifndef __ELAN__ -+ -+typedef void (EP_INPUTQ_HANDLER) (EP_RAIL *rail, void *arg, void *msg); -+typedef void (EP_INPUTQ_CALLBACK) (EP_RAIL *rail, void *arg); -+ -+typedef struct ep_inputq -+{ -+ unsigned long q_hidden; /* implementation hidden as ep3 or ep4 */ -+} EP_INPUTQ; -+ -+typedef struct ep_outputq -+{ -+ unsigned long q_hidden; /* implementation hidden as ep3 or ep4 */ -+} EP_OUTPUTQ; -+ -+/* returned values for ep_outputq_state */ -+#define EP_OUTPUTQ_BUSY 0 -+#define EP_OUTPUTQ_FAILED 1 -+#define EP_OUTPUTQ_FINISHED 2 -+ -+typedef struct ep_switch -+{ -+ unsigned present:1; -+ unsigned invalid:1; -+ unsigned link:3; -+ unsigned bcast:3; -+ unsigned lnr; -+} EP_SWITCH; -+ -+/* -+ * Network error fixup, flush, relocation messges -+ */ -+typedef struct ep_map_nmd_body -+{ -+ uint32_t nFrags; -+ EP_RAILMASK Railmask; -+ EP_NMD Nmd[EP_MAXFRAG]; -+} EP_MAP_NMD_BODY; -+ -+typedef struct ep_failover_body -+{ -+ EP_XID Xid; -+ EP_RAILMASK Railmask; -+} EP_FAILOVER_BODY; -+ -+typedef struct ep_failover_txd -+{ -+ EP_XID Xid; -+ uint32_t Rail; -+ EP_ADDR TxdRail; -+} EP_FAILOVER_TXD; -+ -+typedef uint64_t EP_NETERR_COOKIE; -+ -+#define EP_PANIC_STRLEN 31 -+ -+typedef struct ep_node_state -+{ -+ unsigned char State; -+ unsigned char NetworkErrorState; -+ EP_RAILMASK Railmask; -+} EP_NODE_STATE; -+ -+#define EP_MANAGER_MSG_SIZE (2 * EP_SYSTEMQ_MSG_ALIGN) -+ -+typedef struct ep_manager_msg_hdr -+{ -+ EP_XID Xid; /* Message transaction id */ -+ -+ uint16_t NodeId; /* Originating node number */ -+ uint16_t DestId; /* destination node id */ -+ -+ uint16_t Checksum; /* Message checksum */ -+ uint8_t Rail; /* Rail message associated with */ -+ uint8_t Type; /* Message type */ -+ -+ uint32_t Pad; /* pad to 32 bytes */ -+ -+ uint32_t Version; /* Message Version */ -+} EP_MANAGER_MSG_HDR; -+ -+typedef union ep_manager_msg_body -+{ -+ unsigned char Space[EP_MANAGER_MSG_SIZE - sizeof (EP_MANAGER_MSG_HDR)]; -+ -+ EP_NETERR_COOKIE Cookies[2]; /* EP_MSG_TYPE_NETERR */ -+ EP_MAP_NMD_BODY MapNmd; /* EP_MSG_TYPE_MAP_NMD */ -+ EP_FAILOVER_BODY Failover; /* EP_MSG_TYPE_FAILOVER_REQUEST */ -+ EP_FAILOVER_TXD FailoverTxd; /* EP_MSG_TYPE_FAILOVER_RESPONSE */ -+ unsigned char PanicReason[EP_PANIC_STRLEN+1]; /* EP_MSG_TYPE_REMOTE_PANIC */ -+ EP_NODE_STATE NodeState; /* EP_MSG_TYPE_GET_NODE_STATE_RESPONSE */ -+ EP_SERVICE Service; /* EP_MSG_TYPE_GET_NODE_STATE */ -+} EP_MANAGER_MSG_BODY; -+ -+typedef struct ep_manager_msg -+{ -+ EP_MANAGER_MSG_BODY Body; -+ EP_MANAGER_MSG_HDR Hdr; -+} EP_MANAGER_MSG; -+ -+#define EP_MANAGER_MSG_VERSION 0xcad01000 -+#define EP_MANAGER_MSG_TYPE_REMOTE_PANIC 0x00 -+#define EP_MANAGER_MSG_TYPE_NETERR_REQUEST 0x01 -+#define EP_MANAGER_MSG_TYPE_NETERR_RESPONSE 0x02 -+#define EP_MANAGER_MSG_TYPE_FLUSH_REQUEST 0x03 -+#define EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE 0x04 -+#define EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST 0x05 -+#define EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE 0x06 -+#define EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST 0x07 -+#define EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE 0x08 -+#define EP_MANAGER_MSG_TYPE_GET_NODE_STATE 0x09 -+#define EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE 0x0a -+ -+/* Message types which should only be sent when a rail is connected */ -+#define EP_MANAGER_MSG_TYPE_CONNECTED(type) (((type) & 1) == 1) -+ -+#define EP_MANAGER_OUTPUTQ_SLOTS 128 /* # entries in outputq */ -+#define EP_MANAGER_INPUTQ_SLOTS 128 /* # entries in inputq */ -+#define EP_MANAGER_OUTPUTQ_RETRIES 31 /* # retries for manager messages */ -+ -+/* XID's are allocated from a cache, which doesn't -+ * require locking since it relies on the caller to -+ * manage the locking for us. -+ */ -+typedef struct ep_xid_cache -+{ -+ struct list_head Link; -+ -+ uint32_t Handle; /* my XID cache handle */ -+ uint64_t Current; /* range of XID.Unique we can allocate from */ -+ uint64_t Last; -+ -+ void (*MessageHandler)(void *arg, EP_MANAGER_MSG *); -+ void *Arg; -+} EP_XID_CACHE; -+ -+#define EP_XID_CACHE_CHUNKS (10000) -+ -+typedef struct ep_node_rail -+{ -+ struct list_head Link; /* can be linked on work lists */ -+ -+ unsigned char State; /* node connection state */ -+ unsigned char NetworkErrorState; /* reasons for keeping the context filter up */ -+ unsigned char MessageState; /* state of messages during passivate/relocate */ -+ -+ EP_XID MsgXid; /* neterr/flush transaction id */ -+ long NextRunTime; /* time to drop context filter for destroyed dma packet, or to send next request */ -+ EP_NETERR_COOKIE NetworkErrorCookies[2]; /* identify cookie for destroyed atomic packet */ -+ -+ uint32_t Cookie; /* per-node network error cookie */ -+ spinlock_t CookieLock; /* and spinlock for it. */ -+ -+ struct list_head StalledDmas; /* list of stalled DMAs */ -+} EP_NODE_RAIL; -+ -+#define EP_NODE_DISCONNECTED 0 /* node is disconnected */ -+#define EP_NODE_CONNECTING 1 /* awaiting connection */ -+#define EP_NODE_CONNECTED 2 /* node is connected */ -+#define EP_NODE_LEAVING_CONNECTED 3 /* node is starting to disconnect */ -+#define EP_NODE_LOCAL_PASSIVATE 4 /* flushing context filter/run queues */ -+#define EP_NODE_REMOTE_PASSIVATE 5 /* stalling for neterr flush */ -+#define EP_NODE_PASSIVATED 6 /* relocating active/passive messages */ -+#define EP_NODE_DISCONNECTING 7 /* entering disconncted - abort remaining comms */ -+#define EP_NODE_NUM_STATES 8 -+ -+#define EP_NODE_NETERR_ATOMIC_PACKET (1 << 0) -+#define EP_NODE_NETERR_DMA_PACKET (1 << 1) -+ -+#define EP_NODE_PASSIVE_MESSAGES (1 << 0) -+#define EP_NODE_ACTIVE_MESSAGES (1 << 1) -+ -+/* -+ * Kernel thread code is loaded as a table. -+ */ -+typedef struct ep_symbol -+{ -+ char *name; -+ EP_ADDR value; -+} EP_SYMBOL; -+ -+typedef struct ep_code -+{ -+ u_char *text; -+ u_int text_size; -+ u_char *data; -+ u_int data_size; -+ u_char *rodata; -+ u_int rodata_size; -+ EP_SYMBOL *symbols; -+ -+ int ntext; -+ sdramaddr_t pptext; -+ EP_ADDR etext; -+ sdramaddr_t _stext; -+ sdramaddr_t _rodata; -+ -+ int ndata; -+ sdramaddr_t ppdata; -+ EP_ADDR edata; -+ sdramaddr_t _sdata; -+} EP_CODE; -+ -+typedef struct ep_switchstate -+{ -+ unsigned char linkid; -+ unsigned char LNR; -+ unsigned char bcast; -+ unsigned char uplink; -+} EP_SWITCHSTATE; -+ -+typedef struct ep_rail_ops -+{ -+ void (*DestroyRail) (EP_RAIL *rail); -+ -+ int (*StartRail) (EP_RAIL *rail); -+ void (*StallRail) (EP_RAIL *rail); -+ void (*StopRail) (EP_RAIL *rail); -+ -+ sdramaddr_t (*SdramAlloc) (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+ void (*SdramFree) (EP_RAIL *rail, sdramaddr_t addr, unsigned size); -+ void (*SdramWriteb) (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+ void (*KaddrMap) (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t kaddr, unsigned len, unsigned int perm, int ep_attr); -+ void (*SdramMap) (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned len, unsigned int perm, int ep_attr); -+ void (*Unmap) (EP_RAIL *rail, EP_ADDR eaddr, unsigned len); -+ -+ void *(*DvmaReserve) (EP_RAIL *rail, EP_ADDR eaddr, unsigned npages); -+ void (*DvmaRelease) (EP_RAIL *rail, EP_ADDR eaddr, unsigned npages, void *private); -+ void (*DvmaSetPte) (EP_RAIL *rail, void *private, unsigned index, physaddr_t phys, unsigned int perm); -+ physaddr_t (*DvmaReadPte) (EP_RAIL *rail, void *private, unsigned index); -+ void (*DvmaUnload)(EP_RAIL *rail, void *private, unsigned index, unsigned npages); -+ void (*FlushTlb) (EP_RAIL *rail); -+ -+ int (*ProbeRoute) (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, -+ int *linkdown, int attempts, EP_SWITCH *lsw); -+ void (*PositionFound) (EP_RAIL *rail, ELAN_POSITION *pos); -+ int (*CheckPosition) (EP_RAIL *rail); -+ void (*NeterrFixup) (EP_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+ void (*LoadSystemRoute) (EP_RAIL *rail, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+ -+ void (*LoadNodeRoute) (EP_RAIL *rail, unsigned nodeId); -+ void (*UnloadNodeRoute) (EP_RAIL *rail, unsigned nodeId); -+ void (*LowerFilter) (EP_RAIL *rail, unsigned nodeId); -+ void (*RaiseFilter) (EP_RAIL *rail, unsigned nodeId); -+ void (*NodeDisconnected) (EP_RAIL *rail, unsigned nodeId); -+ -+ void (*FlushFilters) (EP_RAIL *rail); -+ void (*FlushQueues) (EP_RAIL *rail); -+ -+ -+ EP_INPUTQ *(*AllocInputQ) (EP_RAIL *rail, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ void (*callback)(EP_RAIL *rail, void *arg), void *arg); -+ void (*FreeInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ void (*EnableInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ void (*DisableInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ int (*PollInputQ) (EP_RAIL *rail, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+ -+ EP_OUTPUTQ *(*AllocOutputQ) (EP_RAIL *rail, unsigned slotSize, unsigned slotCount); -+ void (*FreeOutputQ) (EP_RAIL *rail, EP_OUTPUTQ *outputq); -+ void *(*OutputQMsg) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum); -+ int (*OutputQState) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum); -+ int (*OutputQSend) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries); -+ -+ void (*FillOutStats) (EP_RAIL *rail, char *str); -+ void (*Debug) (EP_RAIL *rail); -+ -+} EP_RAIL_OPS; -+ -+#define ep_alloc_inputq(rail,qnum,slotSize,slotCount,callback,arg) \ -+ (rail)->Operations.AllocInputQ(rail,qnum,slotSize,slotCount,callback,arg) -+#define ep_free_inputq(rail,inputq) \ -+ (rail)->Operations.FreeInputQ(rail,inputq) -+#define ep_enable_inputq(rail,inputq) \ -+ (rail)->Operations.EnableInputQ(rail,inputq) -+#define ep_disable_inputq(rail,inputq) \ -+ (rail)->Operations.DisableInputQ(rail,inputq) -+#define ep_poll_inputq(rail,inputq,maxCount,handler,arg) \ -+ (rail)->Operations.PollInputQ(rail,inputq,maxCount,handler,arg) -+#define ep_alloc_outputq(rail,slotSize,slotCount)\ -+ (rail)->Operations.AllocOutputQ(rail,slotSize,slotCount) -+#define ep_free_outputq(rail,outputq)\ -+ (rail)->Operations.FreeOutputQ(rail,outputq) -+#define ep_outputq_msg(rail,outputq,slotNum)\ -+ (rail)->Operations.OutputQMsg(rail,outputq,slotNum) -+#define ep_outputq_state(rail,outputq,slotNum)\ -+ (rail)->Operations.OutputQState(rail,outputq,slotNum) -+#define ep_outputq_send(rail,outputq,slotNum,size,vp,qnum,retries)\ -+ (rail)->Operations.OutputQSend(rail,outputq,slotNum,size,vp,qnum,retries) -+ -+struct ep_rail -+{ -+ EP_SYS *System; /* "system" we've attached to */ -+ -+ unsigned char Number; /* Rail number */ -+ unsigned char State; /* Rail state */ -+ char Name[32]; /* Rail name */ -+ -+ struct list_head ManagerLink; /* linked on ManagedRails list */ -+ -+ ELAN_DEVINFO Devinfo; /* Device information for this rail */ -+ ELAN_POSITION Position; /* Position on switch device is connected to */ -+ -+ EP_RAIL_OPS Operations; /* device specific operations */ -+ EP_RAIL_STATS Stats; /* statistics */ -+ -+ EP_ALLOC ElanAllocator; /* per-rail elan memory allocator */ -+ EP_ALLOC MainAllocator; /* per-rail main memory allocator */ -+ -+ unsigned TlbFlushRequired; /* lazy TLB flushing */ -+ -+ int SwitchBroadcastLevel; /* current switch level ok for broadcast */ -+ unsigned long SwitchBroadcastLevelTick; -+ -+ int SwitchProbeLevel; /* result of last switch probe */ -+ EP_SWITCHSTATE SwitchState[ELAN_MAX_LEVELS]; -+ EP_SWITCHSTATE SwitchLast[ELAN_MAX_LEVELS]; -+ unsigned long SwitchProbeTick[ELAN_MAX_LEVELS]; -+ -+ /* Node disconnecting/connecting state */ -+ EP_CALLBACK *CallbackList[EP_CB_COUNT]; /* List of callbacks */ -+ kmutex_t CallbackLock; /* and lock for it. */ -+ unsigned CallbackStep; /* step through UpdateConnectionState. */ -+ -+ /* back pointer for cluster membership */ -+ void *ClusterRail; -+ -+ /* Per node state for message passing */ -+ EP_NODE_RAIL *Nodes; /* array of per-node state */ -+ statemap_t *NodeSet; /* per-rail statemap of connected nodes */ -+ statemap_t *NodeChangeMap; /* statemap of nodes to being connected/disconnected */ -+ statemap_t *NodeChangeTmp; /* and temporary copies */ -+ -+ struct list_head NetworkErrorList; /* list of nodes resolving network errors */ -+ struct list_head LocalPassivateList; /* list of nodes in state LOCAL_PASSIVATE */ -+ struct list_head RemotePassivateList; /* list of nodes waiting for remote network error flush */ -+ struct list_head PassivatedList; /* list of nodes performing message relocation */ -+ struct list_head DisconnectingList; /* list of nodes transitioning to disconnected */ -+ -+ EP_XID_CACHE XidCache; /* XID cache for node messages (single threaded access) */ -+ -+ /* Manager messages */ -+ EP_INPUTQ *ManagerInputQ; -+ EP_OUTPUTQ *ManagerOutputQ; -+ unsigned ManagerOutputQNextSlot; -+ spinlock_t ManagerOutputQLock; -+ -+ /* /proc entries */ -+ struct proc_dir_entry *ProcDir; -+ struct proc_dir_entry *SvcIndicatorDir; -+ int CallbackRegistered; -+}; -+ -+/* values for State */ -+#define EP_RAIL_STATE_UNINITIALISED 0 /* device uninitialised */ -+#define EP_RAIL_STATE_STARTED 1 /* device started but network position unknown */ -+#define EP_RAIL_STATE_RUNNING 2 /* device started and position known */ -+#define EP_RAIL_STATE_INCOMPATIBLE 3 /* device started, but position incompatible */ -+ -+typedef struct ep_rail_entry -+{ -+ struct list_head Link; -+ EP_RAIL *Rail; -+} EP_RAIL_ENTRY; -+ -+typedef struct ep_subsys -+{ -+ EP_SYS *Sys; -+ -+ struct list_head Link; /* Linked on sys->Subsystems */ -+ char *Name; /* Name to lookup */ -+ -+ void (*Destroy) (struct ep_subsys *subsys, EP_SYS *sys); -+ -+ int (*AddRail) (struct ep_subsys *subsys, EP_SYS *sys, EP_RAIL *rail); -+ void (*RemoveRail) (struct ep_subsys *subsys, EP_SYS *sys, EP_RAIL *rail); -+} EP_SUBSYS; -+ -+typedef struct ep_node -+{ -+ EP_RAILMASK ConnectedRails; -+} EP_NODE; -+ -+struct ep_sys -+{ -+ EP_RAIL *Rails[EP_MAX_RAILS]; /* array of all available devices */ -+ -+ kmutex_t StartStopLock; /* lock for starting stopping rails */ -+ -+ ELAN_POSITION Position; /* primary node position */ -+ -+ EP_NMH_TABLE MappingTable; /* Network mapping handle table */ -+ -+ EP_ALLOC Allocator; /* shared main memory allocator */ -+ -+ EP_DVMA_STATE DvmaState; /* dvma state */ -+ -+ kmutex_t SubsysLock; /* lock on the Subsytems list */ -+ struct list_head Subsystems; /* list of subsystems */ -+ -+ /* device manager state */ -+ struct list_head ManagedRails; /* list of managed devices */ -+ EP_KTHREAD ManagerThread; /* place for manager thread to sleep */ -+ -+ /* global node state */ -+ spinlock_t NodeLock; /* spinlock for node state (including per-device node state) */ -+ EP_NODE *Nodes; /* system wide node state */ -+ statemap_t *NodeSet; /* system wide nodeset */ -+ struct list_head NodesetCallbackList; /* list of "callbacks" */ -+ -+ /* Transaction Id */ -+ struct list_head XidCacheList; /* list of XID caches */ -+ uint32_t XidGeneration; /* XID generation number (distinguishes reboots) */ -+ uint32_t XidHandle; /* XID handles (distinguishes XID caches) */ -+ uint64_t XidNext; /* next XID to prime cache */ -+ spinlock_t XidLock; /* and it's spinlock */ -+ -+ /* Shutdown/Panic */ -+ unsigned int Shutdown; /* node has shutdown/panic'd */ -+}; -+ -+#if defined(DEBUG_ASSERT) -+extern int ep_assfail (EP_RAIL *rail, const char *string, const char *func, const char *file, const int line); -+extern int sdram_assert; -+extern int assfail_mode; -+ -+#define EP_ASSERT(rail, EX) do { \ -+ if (!(EX) && ep_assfail ((EP_RAIL *) (rail), #EX, __FUNCTION__, __FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#define EP_ASSFAIL(rail,EX) do { \ -+ if (ep_assfail ((EP_RAIL *) (rail), EX, __FUNCTION__, __FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#define SDRAM_ASSERT(EX) (sdram_assert ? (EX) : 1) -+#else -+#define EP_ASSERT(rail, EX) ((void) 0) -+#define EP_ASSFAIL(rail,str) ((void) 0) -+#define SDRAM_ASSERT(EX) (1) -+#endif -+ -+/* conf_osdep.c */ -+extern EP_SYS *ep_system(void); -+extern void ep_mod_dec_usecount (void); -+extern void ep_mod_inc_usecount (void); -+ -+/* procfs_osdep.c */ -+extern struct proc_dir_entry *ep_procfs_root; -+extern struct proc_dir_entry *ep_config_root; -+ -+/* kcomm.c */ -+extern int ep_sys_init (EP_SYS *sys); -+extern void ep_sys_fini (EP_SYS *sys); -+extern void ep_shutdown (EP_SYS *sys); -+extern int ep_init_rail (EP_SYS *sys, EP_RAIL *rail); -+extern void ep_destroy_rail (EP_RAIL *rail); -+extern int ep_start_rail (EP_RAIL *rail); -+extern void ep_stop_rail (EP_RAIL *rail); -+ -+extern void ep_connect_node (EP_RAIL *rail, int nodeId); -+extern int ep_disconnect_node (EP_RAIL *rail, int nodeId); -+ -+extern EP_XID ep_xid_cache_alloc (EP_SYS *sys, EP_XID_CACHE *cache); -+extern void ep_xid_cache_init (EP_SYS *sys, EP_XID_CACHE *cache); -+extern void ep_xid_cache_destroy (EP_SYS *sys, EP_XID_CACHE *cache); -+ -+extern int ep_send_message (EP_RAIL *rail, int nodeId, int type, EP_XID xid, EP_MANAGER_MSG_BODY *body); -+ -+extern void ep_panic_node (EP_SYS *sys, int nodeId, unsigned char *reason); -+ -+extern void ep_subsys_add (EP_SYS *sys, EP_SUBSYS *subsys); -+extern void ep_subsys_del (EP_SYS *sys, EP_SUBSYS *subsys); -+extern EP_SUBSYS *ep_subsys_find (EP_SYS *sys, char *name); -+ -+extern void DisplayNodes (EP_RAIL *rail); -+ -+extern void ep_fillout_stats(EP_RAIL *rail, char *str); -+ -+/* neterr.c */ -+extern void ep_queue_network_error (EP_RAIL *rail, int nodeId, int what, int channel, EP_NETERR_COOKIE cookie); -+ -+/* kcomm_elan3.c */ -+extern unsigned int ep3_create_rails (EP_SYS *sys, unsigned int disabled); -+ -+/* kcomm_elan4.c */ -+extern unsigned int ep4_create_rails (EP_SYS *sys, unsigned int disabled); -+ -+/* probenetwork.c */ -+extern int ProbeNetwork (EP_RAIL *rail, ELAN_POSITION *pos); -+extern void CheckPosition (EP_RAIL *rail); -+ -+extern uint16_t CheckSum (char *msg, int nob); -+ -+/* threadcode.c */ -+extern EP_ADDR ep_symbol (EP_CODE *code, char *name); -+extern int ep_loadcode (EP_RAIL *rail, EP_CODE *code); -+extern void ep_unloadcode (EP_RAIL *rail, EP_CODE *code); -+ -+/* Public interface */ -+/* debug.c */ -+extern int ep_sprintf_bitmap (char *str, unsigned nbytes, bitmap_t *bitmap, int base, int count, int off); -+extern void ep_display_bitmap (char *prefix, char *tag, bitmap_t *bitmap, unsigned base, unsigned nbits); -+ -+/* epcomms.c */ -+extern int ep_waitfor_nodeid (EP_SYS *sys); -+extern int ep_nodeid (EP_SYS *sys); -+extern int ep_numnodes (EP_SYS *sys); -+ -+/* railhints.c */ -+extern int ep_pickRail(EP_RAILMASK railmask); -+ -+/* support.c */ -+extern int ep_register_nodeset_callback (EP_SYS *sys, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_remove_nodeset_callback (EP_SYS *sys, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_call_nodeset_callbacks (EP_SYS *sys, statemap_t *map); -+ -+extern int ep_register_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_remove_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_call_callbacks (EP_RAIL *rail, unsigned idx, statemap_t *); -+extern unsigned int ep_backoff (EP_BACKOFF *backoff, int type); -+ -+#endif /* !__ELAN__ */ -+ -+#endif /* __ELAN_KCOMM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/kcomm_stats.h linux-2.6.9/include/elan/kcomm_stats.h ---- clean/include/elan/kcomm_stats.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/kcomm_stats.h 2005-05-31 07:42:43.000000000 -0400 -@@ -0,0 +1,153 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_EPSTATS_H -+#define __EP_EPSTATS_H -+ -+#ident "$Id: kcomm_stats.h,v 1.5.2.1 2005/05/31 11:42:43 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_stats.h,v $ */ -+ -+#define EP_BUCKET_SLOTS 8 -+ -+#define BucketStat(obj,stat,size) ((size) < 128 ? (obj)->Stats.stat[0]++ : \ -+ (size) < 512 ? (obj)->Stats.stat[1]++ : \ -+ (size) < 1024 ? (obj)->Stats.stat[2]++ : \ -+ (size) < 8192 ? (obj)->Stats.stat[3]++ : \ -+ (size) < 16384 ? (obj)->Stats.stat[4]++ : \ -+ (size) < 32768 ? (obj)->Stats.stat[5]++ : \ -+ (size) < 65536 ? (obj)->Stats.stat[6]++ : \ -+ (obj)->Stats.stat[7]++) -+#define IncrStat(obj,stat) ((obj)->Stats.stat++) -+ -+ -+#define EP3_NUM_DMA_FAIL 11 /* NOTE - the same as EP_NUM_RETRIES */ -+ -+#define ADD_STAT(STATS,STAT,VALUE) { unsigned long now = lbolt;\ -+ STATS.STAT.total += VALUE; \ -+ if ( (long)( now - STATS.STAT.last_time) > HZ ){ \ -+ STATS.STAT.last_per_sec = ( STATS.STAT.total - STATS.STAT.last_count)/ ( ( (long)( now - STATS.STAT.last_time) + (HZ/2)) / HZ);\ -+ STATS.STAT.last_time = now; \ -+ STATS.STAT.last_count = STATS.STAT.total; \ -+ }} \ -+ -+#define INC_STAT(STATS,STAT) ADD_STAT(STATS,STAT,1) -+ -+#define GET_STAT_PER_SEC(STATS, STAT) ( (( lbolt - STATS.STAT.last_time ) < (HZ * 5)) ? STATS.STAT.last_per_sec : 0 ) -+#define GET_STAT_TOTAL(STATS, STAT) ( STATS.STAT.total ) -+ -+struct ep_stats_count -+{ -+ unsigned long total; -+ unsigned long last_time; -+ unsigned long last_count; -+ unsigned long last_per_sec; -+}; -+ -+typedef struct ep_stats_count EP_STATS_COUNT; -+ -+typedef struct ep3_rail_stats -+{ -+ unsigned long IssueDmaFail[EP3_NUM_DMA_FAIL]; -+ -+ unsigned long DmaQueueLength[EP_BUCKET_SLOTS]; -+ unsigned long CprocDmaQueueOverflow; -+ unsigned long DprocDmaQueueOverflow; -+ unsigned long IprocDmaQueueOverflow; -+ unsigned long CprocEventQueueOverflow; -+ unsigned long DprocEventQueueOverflow; -+ unsigned long IprocEventQueueOverflow; -+ -+ unsigned long QueueingPacketTrap; -+ unsigned long DmaIdentifyTrap; -+ unsigned long ThreadIdentifyTrap; -+ unsigned long DmaPacketTrap; -+} EP3_RAIL_STATS; -+ -+typedef struct ep4_rail_stats -+{ -+ unsigned long somestatsgohere; -+} EP4_RAIL_STATS; -+ -+typedef struct ep_rail_stats -+{ -+ unsigned long SendMessageFailed; -+ unsigned long NeterrAtomicPacket; -+ unsigned long NeterrDmaPacket; -+ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+ -+} EP_RAIL_STATS; -+ -+typedef struct ep_cm_rail_stats -+{ -+ /* cluster membership statistics */ -+ unsigned long HeartbeatsSent; -+ unsigned long HeartbeatsRcvd; -+ -+ unsigned long RetryHeartbeat; -+ unsigned long RejoinRequest; -+ unsigned long RejoinTooSlow; -+ unsigned long LaunchMessageFail; -+ unsigned long MapChangesSent; -+ -+ /* Heartbeat scheduling stats */ -+ unsigned long HeartbeatOverdue; -+} EP_CM_RAIL_STATS; -+ -+typedef struct ep_comms_rail_stats -+{ -+ /* kernel comms large message statistics */ -+ unsigned long TxEnveEvent; -+ unsigned long TxDataEvent; -+ unsigned long TxDoneEvent; -+ unsigned long RxDoneEvent; -+ unsigned long MulticastTxDone; -+ unsigned long QueueReceive; -+ -+ unsigned long TxEnveRetry; -+ unsigned long TxDataRetry; -+ unsigned long TxDoneRetry; -+ unsigned long RxThrdEvent; -+ unsigned long RxDataRetry; -+ unsigned long RxDoneRetry; -+ unsigned long StallThread; -+ unsigned long ThrdWaiting; -+ unsigned long CompleteEnvelope; -+ -+ unsigned long NoFreeTxds; -+ unsigned long NoFreeRxds; -+ -+ unsigned long LockRcvrTrapped; -+} EP_COMMS_RAIL_STATS; -+ -+typedef struct ep_comms_stats -+{ -+ unsigned long DataXmit[8]; -+ unsigned long McastXmit[8]; -+ unsigned long RPCXmit[8]; -+ unsigned long RPCPut[8]; -+ unsigned long RPCGet[8]; -+ unsigned long CompleteRPC[8]; -+ unsigned long RxData[8]; -+ unsigned long RxMcast[8]; -+ -+ unsigned long NoFreeTxds; -+ unsigned long NoFreeRxds; -+} EP_COMMS_STATS; -+ -+#endif /* __EP_EPSTATS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/kmap.h linux-2.6.9/include/elan/kmap.h ---- clean/include/elan/kmap.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/kmap.h 2004-12-14 05:19:23.000000000 -0500 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KMAP_H -+#define __ELAN_KMAP_H -+ -+#ident "$Id: kmap.h,v 1.4 2004/12/14 10:19:23 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap.h,v $ */ -+ -+#include -+ -+extern void ep_perrail_kaddr_map (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t vaddr, unsigned long len, unsigned int perm, int ep_attr); -+extern void ep_perrail_sdram_map (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned long len, unsigned int perm, int ep_attr); -+extern void ep_perrail_unmap (EP_RAIL *rail, EP_ADDR eaddr, unsigned long len); -+extern void ep_perrail_dvma_sync (EP_RAIL *rail); -+ -+typedef struct ep_dvma_nmh -+{ -+ EP_NMH dvma_nmh; -+ -+ struct list_head dvma_link; /* chained on ep_dvma_state */ -+ unsigned dvma_perm; /* permissions for region */ -+ -+ spinlock_t dvma_lock; -+ EP_RAILMASK dvma_railmask; /* bitmap of rails */ -+ EP_RAIL *dvma_rails[EP_MAX_RAILS]; /* assoicated rails */ -+ void *dvma_private[EP_MAX_RAILS]; /* pointers to rail private data */ -+ unsigned int dvma_attrs[1]; /* bitmap of which rails pages are loaded NOTE - max 32 rails */ -+} EP_DVMA_NMH; -+ -+/* values for dvma_perm */ -+#define EP_PERM_EXECUTE 0 -+#define EP_PERM_READ 1 -+#define EP_PERM_WRITE 2 -+#define EP_PERM_ALL 3 -+ -+typedef struct ep_dvma_state -+{ -+ kmutex_t dvma_lock; -+ struct list_head dvma_handles; -+ struct list_head dvma_rails; -+ EP_RMAP *dvma_rmap; -+} EP_DVMA_STATE; -+ -+extern void ep_dvma_init (EP_SYS *sys); -+extern void ep_dvma_fini (EP_SYS *sys); -+extern EP_NMH *ep_dvma_reserve (EP_SYS *sys, unsigned npages, unsigned perm); -+extern void ep_dvma_release (EP_SYS *sys, EP_NMH *nmh); -+extern void ep_dvma_load (EP_SYS *sys, void *map, caddr_t vaddr, unsigned len, -+ EP_NMH *nmh, unsigned index, EP_RAILMASK *hints, EP_NMD *subset); -+extern void ep_dvma_unload (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd); -+ -+extern void ep_dvma_remove_rail (EP_SYS *sys, EP_RAIL *rail); -+extern int ep_dvma_add_rail (EP_SYS *sys, EP_RAIL *rail); -+ -+extern uint16_t rolling_check_sum (char *msg, int nob, uint16_t sum); -+ -+#endif /* __ELAN_KMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/kmsg.h linux-2.6.9/include/elan/kmsg.h ---- clean/include/elan/kmsg.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/kmsg.h 2003-09-23 09:55:12.000000000 -0400 -@@ -0,0 +1,14 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KMSG_H -+#define __ELAN_KMSG_H -+ -+#ident "@(#)$Id: kmsg.h,v 1.1 2003/09/23 13:55:12 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg.h,v $ */ -+ -+#endif /* __ELAN_KMSG_H */ -diff -urN clean/include/elan/kthread.h linux-2.6.9/include/elan/kthread.h ---- clean/include/elan/kthread.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/kthread.h 2004-05-06 10:24:08.000000000 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KTHREAD_H -+#define __ELAN3_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.4 2004/05/06 14:24:08 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.h,v $*/ -+ -+typedef struct ep_kthread -+{ -+ kcondvar_t wait; /* place to sleep */ -+ spinlock_t lock; /* and lock */ -+ long next_run; /* tick when thread should next run */ -+ long running; /* tick when thread started to run */ -+ unsigned short should_stall; -+ unsigned char state; -+ unsigned int started:1; -+ unsigned int should_stop:1; -+ unsigned int stopped:1; -+} EP_KTHREAD; -+ -+#define KT_STATE_SLEEPING 0 -+#define KT_STATE_SCHEDULED 1 -+#define KT_STATE_RUNNING 2 -+#define KT_STATE_STALLED 3 -+ -+#define AFTER(a, b) ((((long)(a)) - ((long)(b))) > 0) -+#define BEFORE(a,b) ((((long)(a)) - ((long)(b))) < 0) -+ -+extern void ep_kthread_init (EP_KTHREAD *kt); -+extern void ep_kthread_destroy (EP_KTHREAD *kt); -+extern void ep_kthread_started (EP_KTHREAD *kt); -+extern void ep_kthread_stopped (EP_KTHREAD *kt); -+extern int ep_kthread_should_stall (EP_KTHREAD *kth); -+extern int ep_kthread_sleep (EP_KTHREAD *kth, long next_run); -+extern void ep_kthread_schedule (EP_KTHREAD *kt, long when); -+extern void ep_kthread_stall (EP_KTHREAD *kth); -+extern void ep_kthread_resume (EP_KTHREAD *kt); -+extern void ep_kthread_stop (EP_KTHREAD *kt); -+extern int ep_kthread_state (EP_KTHREAD *kt, long *time); -+#endif /* __ELAN3_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan/nmh.h linux-2.6.9/include/elan/nmh.h ---- clean/include/elan/nmh.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/nmh.h 2004-01-06 05:29:55.000000000 -0500 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_NMH_H -+#define __ELAN3_NMH_H -+ -+#ident "@(#)$Id: nmh.h,v 1.7 2004/01/06 10:29:55 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/nmh.h,v $*/ -+ -+ -+/* Forward declarations */ -+typedef struct ep_nmd EP_NMD; -+typedef struct ep_nmh_ops EP_NMH_OPS; -+typedef struct ep_nmh EP_NMH; -+ -+/* Railmask held in 16 bit field (packs with nodeId into NMD */ -+typedef uint16_t EP_RAILMASK; -+ -+#define EP_RAIL2RAILMASK(rnum) (1 << (rnum)) -+#define EP_RAILMASK_ALL 0xffff -+ -+/* kernel comms elan network address */ -+typedef uint32_t EP_ADDR; -+ -+/* network mapping descriptor - this is returned to the user from a map operation, -+ * and is what is passed to all communication functions */ -+struct ep_nmd -+{ -+ EP_ADDR nmd_addr; /* base address */ -+ uint32_t nmd_len; /* size in bytes */ -+ uint32_t nmd_attr; /* nodeid << 16 | railmask */ -+}; -+ -+#define EP_NMD_ATTR(nodeid,railmask) (((nodeid) << 16) | (railmask)) -+#define EP_NMD_NODEID(nmd) ((nmd)->nmd_attr >> 16) -+#define EP_NMD_RAILMASK(nmd) ((nmd)->nmd_attr & EP_RAILMASK_ALL) -+ -+#if !defined(__ELAN__) -+ -+struct ep_nmh_ops -+{ -+ int (*op_map_rails) (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask); /* add mappings to different rail(s) */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ uint16_t (*op_calc_check_sum) (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum); /* calculates check sum */ -+#endif -+}; -+ -+struct ep_nmh -+{ -+ EP_NMD nmh_nmd; /* public field */ -+ struct list_head nmh_link; /* linked on hash table */ -+ EP_NMH_OPS *nmh_ops; /* operations to perform on object */ -+}; -+ -+#define EP_NMH_NUMHASH (32 - 11 + 1) /* one hash table for each power of 2 above pagesize */ -+#define EP_NMH_HASHSIZE (64) /* max size of each hash table */ -+ -+typedef struct ep_nmh_table -+{ -+ struct list_head *tbl_hash[EP_NMH_NUMHASH]; -+ unsigned tbl_size[EP_NMH_NUMHASH]; -+} EP_NMH_TABLE; -+ -+extern int ep_nmh_init (EP_NMH_TABLE *tbl); -+extern void ep_nmh_fini (EP_NMH_TABLE *tbl); -+ -+extern void ep_nmh_insert (EP_NMH_TABLE *tbl, EP_NMH *nmd); -+extern void ep_nmh_remove (EP_NMH_TABLE *tbl, EP_NMH *nmd); -+extern EP_NMH *ep_nmh_find (EP_NMH_TABLE *tbl, EP_NMD *nmh); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern uint32_t ep_nmd_calc_data_check_sum(EP_SYS *sys, EP_NMD *nmd, int nFrags); -+#endif -+ -+/* Public interface */ -+extern EP_RAILMASK ep_nmd2railmask (EP_NMD *frags, int nFrags); -+extern void ep_nmd_subset (EP_NMD *subset, EP_NMD *nmd, unsigned off, unsigned len); -+extern int ep_nmd_merge (EP_NMD *merged, EP_NMD *a, EP_NMD *b); -+extern int ep_nmd_map_rails (EP_SYS *sys, EP_NMD *nmd, unsigned railmask); -+ -+#endif /* __ELAN__ */ -+ -+#endif /* __ELAN3_NMH_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/rmap.h linux-2.6.9/include/elan/rmap.h ---- clean/include/elan/rmap.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/rmap.h 2004-05-19 06:24:40.000000000 -0400 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_RMAP_H -+#define __ELAN_RMAP_H -+ -+#ident "$Id: rmap.h,v 1.8 2004/05/19 10:24:40 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/rmap.h,v $ */ -+ -+ -+typedef struct ep_rmap_entry -+{ -+ size_t m_size; -+ u_long m_addr; -+} EP_RMAP_ENTRY; -+ -+typedef struct ep_rmap -+{ -+ spinlock_t m_lock; -+ kcondvar_t m_wait; -+ u_int m_size; -+ u_int m_free; -+ u_int m_want; -+ char *m_name; -+ EP_RMAP_ENTRY m_map[1]; -+} EP_RMAP; -+ -+extern void ep_display_rmap (EP_RMAP *map); -+ -+extern void ep_rmapinit (EP_RMAP *rmap, char *name, u_int mapsize); -+extern unsigned long ep_rmalloc (EP_RMAP *rmap, size_t size, int cansleep); -+extern unsigned long ep_rmalloc_constrained (EP_RMAP *mp, size_t size, unsigned long alo, unsigned long ahi, unsigned long align, int cansleep); -+extern void ep_rmfree (EP_RMAP *rmap, size_t size, unsigned long addr); -+extern unsigned long ep_rmget (EP_RMAP *rmap, size_t size, unsigned long addr); -+extern EP_RMAP *ep_rmallocmap (size_t size, char *name, int cansleep); -+extern void ep_rmfreemap (EP_RMAP *map); -+ -+#endif /* __ELAN3_RMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/statemap.h linux-2.6.9/include/elan/statemap.h ---- clean/include/elan/statemap.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/statemap.h 2003-10-07 09:22:38.000000000 -0400 -@@ -0,0 +1,52 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_STATEMAP_H -+#define __ELAN_STATEMAP_H -+ -+#ident "$Id: statemap.h,v 1.8 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statemap.h,v $ */ -+ -+#include -+ -+/******************************** global state bitmap stuff **********************************/ -+typedef struct -+{ -+ unsigned int size; -+ unsigned int nob; -+ unsigned int changemap_nob; -+ unsigned int bitmap_nob; -+ bitmap_t *changemap0; -+ bitmap_t *changemap1; -+ bitmap_t *changemap2; -+ bitmap_t *bitmap; -+} statemap_t; -+ -+extern bitmap_t statemap_getseg (statemap_t *map, unsigned int offset); -+extern void statemap_setseg (statemap_t *map, unsigned int offset, bitmap_t seg); -+extern bitmap_t statemap_getbits (statemap_t *map, unsigned int offset, int nbits); -+extern void statemap_setbits (statemap_t *map, unsigned int offset, bitmap_t bits, int nbits); -+extern void statemap_zero (statemap_t *map); -+extern void statemap_setmap (statemap_t *dst, statemap_t *src); -+extern void statemap_ormap (statemap_t *dst, statemap_t *src); -+extern int statemap_findchange (statemap_t *map, bitmap_t *newseg, int clearchange); -+extern int statemap_changed (statemap_t *map); -+extern void statemap_reset (statemap_t *map); -+extern void statemap_copy (statemap_t *dst, statemap_t *src); -+extern void statemap_clearchanges (statemap_t *map); -+extern bitmap_t *statemap_tobitmap (statemap_t *map); -+extern statemap_t *statemap_create (int size); -+extern void statemap_destroy (statemap_t *map); -+ -+#endif /* __ELAN_STATEMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan/stats.h linux-2.6.9/include/elan/stats.h ---- clean/include/elan/stats.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan/stats.h 2003-09-24 09:55:37.000000000 -0400 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/stats.h,v $*/ -+ -+#ifndef __ELAN_STATS_H -+#define __ELAN_STATS_H -+ -+ -+/* non-kernel headings */ -+#define ELAN_STATS_NAME_MAX_LEN ((uint)64) -+typedef unsigned int ELAN_STATS_IDX; -+ -+typedef struct elan_stats_map -+{ -+ char entry_name[ELAN_STATS_NAME_MAX_LEN]; -+ int index; -+} ELAN_STATS_MAP; -+ -+#if defined(__KERNEL__) -+ -+/* stats callbacks */ -+#define ELAN_STATS_OPS_VERSION ((u_int)1) -+typedef struct elan_stats_ops -+{ -+ u_int ops_version; -+ -+ int (*elan_stats_get_name) (void * arg, uint index, caddr_t name); -+ int (*elan_stats_get_block) (void * arg, uint entries, ulong *values); -+ int (*elan_stats_clear_block) (void * arg); -+ -+} ELAN_STATS_OPS; -+ -+typedef struct elan_stats_struct -+{ -+ struct list_head node; -+ -+ ELAN_STATS_IDX statidx; -+ char block_name[ELAN_STATS_NAME_MAX_LEN]; -+ uint num_entries; -+ ELAN_STATS_OPS *ops; -+ void *arg; -+ -+} ELAN_STATS_STRUCT; -+ -+/* stats.c */ -+extern int elan_stats_register (ELAN_STATS_IDX *statidx, -+ char *block_name, -+ uint num_entries, -+ ELAN_STATS_OPS *ops, -+ void *arg); -+ -+extern int elan_stats_deregister (ELAN_STATS_IDX statidx); -+extern ELAN_STATS_STRUCT *elan_stats_find (ELAN_STATS_IDX statidx); -+extern ELAN_STATS_STRUCT *elan_stats_find_by_name(caddr_t block_name); -+extern ELAN_STATS_STRUCT *elan_stats_find_next (ELAN_STATS_IDX statidx); -+ -+ -+/* elan_stats.c */ -+extern int elan_stats_get_next_index (ELAN_STATS_IDX statidx, ELAN_STATS_IDX *next_statidx); -+ -+extern int elan_stats_find_index (caddr_t block_name, ELAN_STATS_IDX *statidx, uint *num_entries); -+ -+extern int elan_stats_get_block_info (ELAN_STATS_IDX statidx, caddr_t block_name, uint *num_entries); -+ -+extern int elan_stats_get_index_name (ELAN_STATS_IDX statidx, uint index, caddr_t name); -+ -+extern int elan_stats_get_block (ELAN_STATS_IDX statidx, uint entries, ulong *values); -+ -+extern int elan_stats_clear_block (ELAN_STATS_IDX statidx); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_STATS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan3/compat.h linux-2.6.9/include/elan3/compat.h ---- clean/include/elan3/compat.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/compat.h 2004-06-09 05:07:03.000000000 -0400 -@@ -0,0 +1,177 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: compat.h,v 1.4 2004/06/09 09:07:03 mike Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/compat.h,v $*/ -+ -+#ifndef __ELAN3_COMPAT_H -+#define __ELAN3_COMPAT_H -+ -+/* compatibility header to allow Eagle branch QSNETLIBS -+ * to compile against head kernel */ -+ -+#define ELAN_EAGLE_COMPAT -+ -+/* vmseg.h */ -+#define ELAN_FLAGSTATS ELAN3_FLAGSTATS -+ -+/* uregs.h */ -+#define ELAN_STATS_NAME ELAN3_STATS_NAME -+#define elan3_stats_names elan_stats_names -+ -+/* spinlock.h */ -+#define ELAN_SPINLOCK ELAN3_SPINLOCK -+#define ELAN_SPINLOCK_MAIN ELAN3_SPINLOCK_MAIN -+#define ELAN_SPINLOCK_ELAN ELAN3_SPINLOCK_ELAN -+#define ELAN_ME_SPINENTER ELAN3_ME_SPINENTER -+#define ELAN_ME_FORCEENTER ELAN3_ME_FORCEENTER -+#define ELAN_ME_SPINEXIT ELAN3_ME_SPINEXIT -+#define ELAN_SPINENTER ELAN3_SPINENTER -+#define ELAN_SPINEXIT ELAN3_SPINEXIT -+#define elan3_me_spinblock elan_me_spinblock -+#define elan3_spinenter elan_spinenter -+ -+/* elanio.h */ -+#define ELANIO_CONTROL_PATHNAME ELAN3IO_CONTROL_PATHNAME -+#define ELANIO_USER_PATHNAME ELAN3IO_USER_PATHNAME -+#define ELANIO_SDRAM_PATHNAME ELAN3IO_SDRAM_PATHNAME -+#define ELANIO_MAX_PATHNAMELEN ELAN3IO_MAX_PATHNAMELEN -+ -+#define ELANIO_SET_BOUNDARY_SCAN ELAN3IO_SET_BOUNDARY_SCAN -+#define ELANIO_CLEAR_BOUNDARY_SCAN ELAN3IO_CLEAR_BOUNDARY_SCAN -+#define ELANIO_READ_LINKVAL ELAN3IO_READ_LINKVAL -+#define ELANIO_WRITE_LINKVAL ELAN3IO_WRITE_LINKVAL -+#define ELANIO_SET_DEBUG_STRUCT ELAN3IO_SET_DEBUG_STRUCT -+#define ELANIO_SET_DEBUG ELAN3IO_SET_DEBUG -+#define ELANIO_DEBUG_BUFFER_STRUCT ELAN3IO_DEBUG_BUFFER_STRUCT -+#define ELANIO_DEBUG_BUFFER ELAN3IO_DEBUG_BUFFER -+#define ELANIO_NETERR_SERVER_STRUCT ELAN3IO_NETERR_SERVER_STRUCT -+#define ELANIO_NETERR_SERVER ELAN3IO_NETERR_SERVER -+#define ELANIO_NETERR_FIXUP ELAN3IO_NETERR_FIXUP -+ -+#define ELANIO_FREE ELAN3IO_FREE -+#define ELANIO_ATTACH ELAN3IO_ATTACH -+#define ELANIO_DETACH ELAN3IO_DETACH -+#define ELANIO_ADDVP_STRUCT ELAN3IO_ADDVP_STRUCT -+#define ELANIO_ADDVP ELAN3IO_ADDVP -+#define ELANIO_REMOVEVP ELAN3IO_REMOVEVP -+#define ELANIO_BCASTVP_STRUCT ELAN3IO_BCASTVP_STRUCT -+#define ELANIO_BCASTVP ELAN3IO_BCASTVP -+#define ELANIO_LOAD_ROUTE_STRUCT ELAN3IO_LOAD_ROUTE_STRUCT -+#define ELANIO_LOAD_ROUTE ELAN3IO_LOAD_ROUTE -+#define ELANIO_PROCESS ELAN3IO_PROCESS -+#define ELANIO_SETPERM_STRUCT ELAN3IO_SETPERM_STRUCT -+#define ELANIO_SETPERM ELAN3IO_SETPERM -+#define ELANIO_CLEARPERM_STRUCT ELAN3IO_CLEARPERM_STRUCT -+#define ELANIO_CLEARPERM ELAN3IO_CLEARPERM -+#define ELANIO_CHANGEPERM_STRUCT ELAN3IO_CHANGEPERM_STRUCT -+#define ELANIO_CHANGEPERM ELAN3IO_CHANGEPERM -+#define ELANIO_HELPER_THREAD ELAN3IO_HELPER_THREAD -+#define ELANIO_WAITCOMMAND ELAN3IO_WAITCOMMAND -+#define ELANIO_BLOCK_INPUTTER ELAN3IO_BLOCK_INPUTTER -+#define ELANIO_SET_FLAGS ELAN3IO_SET_FLAGS -+#define ELANIO_WAITEVENT ELAN3IO_WAITEVENT -+#define ELANIO_ALLOC_EVENTCOOKIE ELAN3IO_ALLOC_EVENTCOOKIE -+#define ELANIO_FREE_EVENTCOOKIE ELAN3IO_FREE_EVENTCOOKIE -+#define ELANIO_ARM_EVENTCOOKIE ELAN3IO_ARM_EVENTCOOKIE -+#define ELANIO_WAIT_EVENTCOOKIE ELAN3IO_WAIT_EVENTCOOKIE -+#define ELANIO_SWAPSPACE ELAN3IO_SWAPSPACE -+#define ELANIO_EXCEPTION_SPACE ELAN3IO_EXCEPTION_SPACE -+#define ELANIO_GET_EXCEPTION ELAN3IO_GET_EXCEPTION -+#define ELANIO_UNLOAD_STRUCT ELAN3IO_UNLOAD_STRUCT -+#define ELANIO_UNLOAD ELAN3IO_UNLOAD -+#define ELANIO_GET_ROUTE_STRUCT ELAN3IO_GET_ROUTE_STRUCT -+#define ELANIO_GET_ROUTE ELAN3IO_GET_ROUTE -+#define ELANIO_RESET_ROUTE_STRUCT ELAN3IO_RESET_ROUTE_STRUCT -+#define ELANIO_RESET_ROUTE ELAN3IO_RESET_ROUTE -+#define ELANIO_CHECK_ROUTE_STRUCT ELAN3IO_CHECK_ROUTE_STRUCT -+#define ELANIO_CHECK_ROUTE ELAN3IO_CHECK_ROUTE -+#define ELANIO_VP2NODEID_STRUCT ELAN3IO_VP2NODEID_STRUCT -+#define ELANIO_VP2NODEID ELAN3IO_VP2NODEID -+#define ELANIO_SET_SIGNAL ELAN3IO_SET_SIGNAL -+#define ELANIO_PROCESS_2_LOCATION_STRUCT ELAN3IO_PROCESS_2_LOCATION_STRUCT -+#define ELANIO_PROCESS_2_LOCATION ELAN3IO_PROCESS_2_LOCATION -+#define ELANIO_GET_DEVINFO_STRUCT ELAN3IO_GET_DEVINFO_STRUCT -+#define ELANIO_GET_DEVINFO ELAN3IO_GET_DEVINFO -+#define ELANIO_GET_POSITION_STRUCT ELAN3IO_GET_POSITION_STRUCT -+#define ELANIO_GET_POSITION ELAN3IO_GET_POSITION -+#define ELANIO_STATS_STRUCT ELAN3IO_STATS_STRUCT -+#define ELANIO_STATS ELAN3IO_STATS -+# define ELAN_SYS_STATS_DEVICE ELAN3_SYS_STATS_DEVICE -+# define ELAN_SYS_STATS_ELAN3MMU ELAN3_SYS_STATS_MMU -+ -+#define ELANIO_OFF_FLAG_PAGE ELAN3IO_OFF_FLAG_PAGE -+#define ELANIO_OFF_UREG_PAGE ELAN3IO_OFF_UREG_PAGE -+#define ELANIO_OFF_COMMAND_PAGE ELAN3IO_OFF_COMMAND_PAGE -+ -+ -+/* elanvp.h */ -+#define ELAN_ROUTE_SUCCESS ELAN3_ROUTE_SUCCESS -+#define ELAN_ROUTE_SYSCALL_FAILED ELAN3_ROUTE_SYSCALL_FAILED -+#define ELAN_ROUTE_INVALID ELAN3_ROUTE_INVALID -+#define ELAN_ROUTE_TOO_LONG ELAN3_ROUTE_TOO_LONG -+#define ELAN_ROUTE_LOAD_FAILED ELAN3_ROUTE_LOAD_FAILED -+#define ELAN_ROUTE_PROC_RANGE ELAN3_ROUTE_PROC_RANGE -+#define ELAN_ROUTE_INVALID_LEVEL ELAN3_ROUTE_INVALID_LEVEL -+#define ELAN_ROUTE_OCILATES ELAN3_ROUTE_OCILATES -+#define ELAN_ROUTE_WRONG_DEST ELAN3_ROUTE_WRONG_DEST -+#define ELAN_ROUTE_TURN_LEVEL ELAN3_ROUTE_TURN_LEVEL -+#define ELAN_ROUTE_NODEID_UNKNOWN ELAN3_ROUTE_NODEID_UNKNOWN -+ -+/* elandev.h */ -+#define ELAN_STATS ELAN3_STATS -+#define ELAN_STATS_VERSION ELAN3_STATS_VERSION -+ -+/* perm.h */ -+#define ELAN_PERM_NOREMOTE ELAN3_PERM_NOREMOTE -+#define ELAN_PERM_LOCAL_READ ELAN3_PERM_LOCAL_READ -+#define ELAN_PERM_REMOTEALL ELAN3_PERM_REMOTEALL -+ -+/* threadsyscall.h */ -+#define ELAN_ABORT_TRAPNUM ELAN3_ABORT_TRAPNUM -+#define ELAN_ELANCALL_TRAPNUM ELAN3_ELANCALL_TRAPNUM -+#define ELAN_SYSCALL_TRAPNUM ELAN3_SYSCALL_TRAPNUM -+#define ELAN_SYS_close ELAN3_SYS_close -+#define ELAN_SYS_getpid ELAN3_SYS_getpid -+#define ELAN_SYS_ioctl ELAN3_SYS_ioctl -+#define ELAN_SYS_kill ELAN3_SYS_kill -+#define ELAN_SYS_lseek ELAN3_SYS_lseek -+#define ELAN_SYS_mmap ELAN3_SYS_mmap -+#define ELAN_SYS_munmap ELAN3_SYS_munmap -+#define ELAN_SYS_open ELAN3_SYS_open -+#define ELAN_SYS_poll ELAN3_SYS_poll -+#define ELAN_SYS_read ELAN3_SYS_read -+#define ELAN_SYS_write ELAN3_SYS_write -+#define ELAN_T_SYSCALL_CODE ELAN3_T_SYSCALL_CODE -+#define ELAN_T_SYSCALL_ERRNO ELAN3_T_SYSCALL_ERRNO -+ -+/* elansyscall.h */ -+#define ELAN_SYS_FLAG_DMA_BADVP ELAN3_SYS_FLAG_DMA_BADVP -+#define ELAN_SYS_FLAG_THREAD_BADVP ELAN3_SYS_FLAG_THREAD_BADVP -+#define ELAN_SYS_FLAG_DMAFAIL ELAN3_SYS_FLAG_DMAFAIL -+#define ELAN_SYS_FLAG_NETERR ELAN3_SYS_FLAG_NETERR -+ -+/* intrinsics.h */ -+#define elan_copy64w elan3_copy64w -+#define elan_read64dw elan3_read64dw -+#define elan_write64dw elan3_write64dw -+ -+#ifndef ELAN_POLL_EVENT -+#define ELAN_POLL_EVENT ELAN3_POLL_EVENT -+#endif -+#ifndef ELAN_WAIT_EVENT -+#define ELAN_WAIT_EVENT ELAN3_WAIT_EVENT -+#endif -+ -+#endif /* __ELAN3_COMPAT_H */ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -diff -urN clean/include/elan3/dma.h linux-2.6.9/include/elan3/dma.h ---- clean/include/elan3/dma.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/dma.h 2002-08-21 08:43:27.000000000 -0400 -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_DMA_H -+#define __ELAN3_DMA_H -+ -+#ident "$Id: dma.h,v 1.38 2002/08/21 12:43:27 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/dma.h,v $ */ -+ -+#include -+#include -+ -+/* Alignment for a DMA descriptor */ -+#define E3_DMA_ALIGN (32) -+ -+/* The maximum size a DMA can be (i.e. < 2GB) */ -+#define E3_MAX_DMA_SIZE 0x7fffffff -+ -+/* This macro returns TRUE if a fixup for the ELAN_REVB_BUG_2 problem is required -+ * i.e. if the DMA begins in the last 64-bytes of a page and its size causes it to enter the -+ * next page, hence causing the Elan to issue 2 (64-byte) block reads to different pages. -+ * See GNAT hw-elan3/3263 -+ */ -+#define E3_DMA_REVB_BUG_2(SIZE, ADDR, PAGESIZE) \ -+ ( (((int) (ADDR) & (PAGESIZE-64)) == (PAGESIZE-64)) && (-(((int) (ADDR) | ~(PAGESIZE-1))) < (SIZE)) ) -+ -+/* There is a point where a dma runs quicker from main memory than -+ * when running from sdram and having to copy all the data down -+ * first. -+ */ -+#define E3_DMA_SDRAM_CUTOFF 128 -+ -+typedef union _e3_DmaType -+{ -+ E3_uint32 type; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 dataType:2; /* Bits 0 to 1 */ -+ E3_uint32 direction:3; /* Bit 4 to 2 */ -+ E3_uint32 opCode:4; /* Bits 5 to 8 */ -+ E3_uint32 failCount:6; /* Bits 9 to 14 */ -+ E3_uint32 isRemote:1; /* Bit 15 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+#else -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 isRemote:1; /* Bit 15 */ -+ E3_uint32 failCount:6; /* Bits 9 to 14 */ -+ E3_uint32 opCode:4; /* Bits 5 to 8 */ -+ E3_uint32 direction:3; /* Bit 4 to 2 */ -+ E3_uint32 dataType:2; /* Bits 0 to 1 */ -+#endif -+ } s; -+} E3_DmaType; -+ -+#define E3_DMA_CONTEXT_MASK (ALL_CONTEXT_BITS << 16) -+ -+#define E3_DMA_CONTEXT(type) (((type) >> 16) & ALL_CONTEXT_BITS) -+#define E3_DMA_ISREMOTE(type) (((type) >> 15) & 1) -+#define E3_DMA_FAILCOUNT(type) (((type) >> 9) & 0x3F) -+#define E3_DMA_OPCODE(type) (((type) >> 5) & 0xF) -+#define E3_DMA_DIRECTION(type) (((type) >> 2) & 0x7) -+#define EP_DMA_DATATYPE(type) (((type) >> 0) & 0x3) -+ -+#define E3_DMA_TYPE(dataType, direction, opCode, failCount) \ -+ (((dataType) & 0x3) | (((direction) & 7) << 2) | (((opCode) & 0xF) << 5) | (((failCount) & 0x3F) << 9)) -+ -+ -+typedef union _e3_CookieVProc -+{ -+ E3_uint32 cookie_vproc; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 vproc:16; /* Bit 15 to 0 */ -+ E3_uint32 cookie:16; /* Bits 31 to 16 */ -+#else -+ E3_uint32 cookie:16; /* Bits 31 to 16 */ -+ E3_uint32 vproc:16; /* Bit 15 to 0 */ -+#endif -+ } s; -+} E3_CookieVProc; -+ -+#define E3_DMA_COOKIE_PROC(Cookie, VProc) (((VProc) & 0xffff) | (((Cookie) << 16))) -+ -+#define DMA_COOKIE_MASK (0xffff0000) -+#define DMA_PROCESS_MASK (0x0000ffff) -+ -+/* We use the bottom bit of the cookie to -+ * distinguish main/thread generated cookies -+ */ -+#define DMA_COOKIE_THREAD (0x01 << 16) -+ -+/* We use the next bit of the cookie to -+ * distinguish locally/remotely generated cookies -+ */ -+#define DMA_COOKIE_REMOTE (0x02 << 16) -+ -+/* Assign and increment cookie (NB: we have reserved the bottom two bits) -+ */ -+#define DMA_COOKIE(COOKIE, VPROC) ((((COOKIE) += (0x4 << 16)) & DMA_COOKIE_MASK) | VPROC) -+#define DMA_REMOTE_COOKIE(COOKIE, VPROC) ((((COOKIE) += (0x4 << 16)) & DMA_COOKIE_MASK) | DMA_COOKIE_REMOTE | VPROC) -+ -+#define DMA_COOKIE_REFRESH(COOKIEVP, COOKIE) \ -+do { \ -+ COOKIEVP &= ~DMA_COOKIE_MASK; /* Clear cookie */ \ -+ COOKIEVP |= DMA_COOKIE(COOKIE,0); /* Assign new cookie */ \ -+} while (0) -+ -+typedef struct e3_dma -+{ -+ E3_DmaType dma_u; -+ E3_uint32 dma_size; -+ E3_Addr dma_source; -+ E3_Addr dma_dest; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_srcEvent; -+ E3_CookieVProc dma_srcCookieProc; -+} E3_DMA; -+ -+ -+/* -+ * Word-swapped version of DMA descriptor. -+ * This is used by the UltraSPARC code to format the descriptor -+ * in main memory before block-copying it down to Elan SDRAM. -+ * In the process it does a dword (64-bit) conversion and so swaps -+ * the word order on a double-word pair basis -+ */ -+typedef struct e3_dma_swapped -+{ -+ E3_uint32 dma_size; -+ E3_DmaType dma_u; -+ E3_Addr dma_dest; -+ E3_Addr dma_source; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_srcCookieProc; -+ E3_Addr dma_srcEvent; -+} E3_DMA_SWAPPED; -+ -+/* Define a Main memory structure for DMA desc based on Endianess of machine */ -+#if defined(__LITTLE_ENDIAN__) -+#define E3_DMA_MAIN E3_DMA -+#else -+#define E3_DMA_MAIN E3_DMA_SWAPPED; -+#endif -+ -+#define dma_type dma_u.type -+#define dma_failCount dma_u.s.failCount -+#define dma_isRemote dma_u.s.isRemote -+#define dma_opCode dma_u.s.opCode -+#define dma_direction dma_u.s.direction -+#define dma_dataType dma_u.s.dataType -+#define dma_queueContext dma_u.s.Context -+ -+#define dma_destCookieVProc dma_destCookieProc.cookie_vproc -+#define dma_destVProc dma_destCookieProc.s.vproc -+#define dma_destCookie dma_destCookieProc.s.cookie -+#define dma_srcCookieVProc dma_srcCookieProc.cookie_vproc -+#define dma_srcVProc dma_srcCookieProc.s.vproc -+#define dma_srcCookie dma_srcCookieProc.s.cookie -+ -+/* -+ * Values for dma_opCode -+ */ -+#define DMA_NORMAL 0 -+#define DMA_QUEUED 1 -+#define DMA_NORMAL_BROADCAST 2 -+#define DMA_QUEUED_BROADCAST 3 -+#define DMA_NORMAL_UNSAFE 4 -+#define DMA_QUEUED_UNSAFE 5 -+#define DMA_NORMAL_BROADCAST_UNSAFE 6 -+#define DMA_QUEUED_BROADCAST_UNSAFE 7 -+ -+/* -+ * Values for dma_direction -+ */ -+#define DMA_WRITE 0 -+#define DMA_READ_REQUEUE 1 -+#define DMA_READ 3 -+#define DMA_READ_BROADCAST 7 -+ -+/* -+ * Values for dma_dataType -+ */ -+#define DMA_BYTE 0 -+#define DMA_HALFWORD 1 -+#define DMA_WORD 2 -+#define DMA_DOUBLE 3 -+ -+/* OUT OF DATE ? -+ #define DMA_OPCODE_SHIFT 3 -+ #define DMA_FAILCOUNT_SHIFT 9 -+*/ -+#define DMA_TYPE_ISREMOTE (1 << 15) -+#define DMA_TYPE_READ (3 << 2) -+#define DMA_TYPE_READ_REQUEUE (1 << 2) -+#define DMA_TYPE_DIRECTION_MASK (3 << 2) -+ -+#endif /* __ELAN3_DMA_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/e3types.h linux-2.6.9/include/elan3/e3types.h ---- clean/include/elan3/e3types.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/e3types.h 2002-08-09 07:23:33.000000000 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_E3TYPES_H -+#define __ELAN3_E3TYPES_H -+ -+#ident "$Id: e3types.h,v 1.18 2002/08/09 11:23:33 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/e3types.h,v $ */ -+ -+#include -+/* -+ * "flip" values for correctly indexing into -+ * block data which was copied from the Elan -+ * using 64 bit accesses. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+# define ByteEndianFlip 0 -+# define ShortEndianFlip 0 -+# define WordEndianFlip 0 -+#else -+# define ByteEndianFlip 7 -+# define ShortEndianFlip 3 -+# define WordEndianFlip 1 -+#endif -+ -+ -+#ifndef _ASM -+ -+typedef signed int E3_int; -+typedef unsigned int E3_uint; -+ -+typedef signed char E3_int8; -+typedef unsigned char E3_uint8; -+ -+typedef signed short E3_int16; -+typedef unsigned short E3_uint16; -+ -+typedef signed int E3_int32; -+typedef unsigned int E3_uint32; -+ -+#ifdef __ELAN3__ -+typedef signed long long E3_int64; -+typedef unsigned long long E3_uint64; -+#ifdef _MAIN_LP64 -+/* NOTE: If the Main is 64-bit we declare the Elan thread's -+ * E3_uintptr to be 64-bits too -+ */ -+typedef unsigned long long E3_uintptr; -+#else -+typedef unsigned long E3_uintptr; -+#endif -+ -+#else -+ -+#ifdef _LP64 -+typedef signed long E3_int64; -+typedef unsigned long E3_uint64; -+typedef unsigned long E3_uintptr; -+#else /* _ILP32 */ -+typedef signed long long E3_int64; -+typedef unsigned long long E3_uint64; -+typedef unsigned long E3_uintptr; -+#endif -+ -+#endif /* __ELAN3__ */ -+ -+/* 32-bit Elan3 address */ -+typedef E3_uint32 E3_Addr; -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN3_E3TYPES_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elan3mmu.h linux-2.6.9/include/elan3/elan3mmu.h ---- clean/include/elan3/elan3mmu.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elan3mmu.h 2004-12-14 05:19:32.000000000 -0500 -@@ -0,0 +1,346 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELAN3MMU_H -+#define __ELAN3_ELAN3MMU_H -+ -+#ident "$Id: elan3mmu.h,v 1.41 2004/12/14 10:19:32 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3mmu.h,v $*/ -+ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct elan3mmu_global_stats -+{ -+ int version; -+ int pteload; -+ int pteunload; -+ int ptereload; -+ -+ int streamable_alloc; -+ int streamable_free; -+ int streamable_alloc_failed; -+ -+ int num_ptbl_level[4]; /* number of level N ptbls */ -+ -+ int create_ptbl_failed; /* count of ptbl creation failure */ -+ -+ int lX_alloc_l3; /* count of l3 ptbls used as lX */ -+ int lX_freed_l3; /* count of lX ptbls freed as l3 */ -+ -+ int l2_alloc_l3; /* count of l3 ptbls used as l2 */ -+ int l2_freed_l3; /* count of l2 ptbls freed as l3 */ -+ -+ int stolen_ptbls; /* count of l3 ptbls stolen */ -+} ELAN3MMU_GLOBAL_STATS; -+ -+#define ELAN3MMU_STATS_VERSION 1 -+ -+#define ELAN3MMU_STAT(what) (elan3mmu_global_stats.what++) -+#define ELAN3MMU_SET_STAT(what,count) (elan3mmu_global_stats.what = count) -+ -+#ifdef __KERNEL__ -+ -+#define ELAN3_PT_SHIFT (ELAN3_L2_SHIFT + 2) -+ -+typedef struct elan3_ptbl -+{ -+ struct elan3_ptbl *ptbl_parent; /* Parent page table, or next on freelist */ -+ struct elan3mmu *ptbl_elan3mmu; /* elan3mmu we're allocated for */ -+ E3_Addr ptbl_base; /* Virtual address we're mapping */ -+ u_char ptbl_index; /* Index in ptbl group */ -+ u_char ptbl_valid; /* Number of valid entries */ -+ u_char ptbl_flags; /* Flags, defined below. */ -+ u_char ptbl_spare; -+} ELAN3_PTBL; -+ -+#define ptbl_next ptbl_parent /* Parent pointer is next pointer when on free list */ -+ -+#define PTBL_LEVEL_X 0x00 -+#define PTBL_LEVEL_1 0x01 -+#define PTBL_LEVEL_2 0x02 -+#define PTBL_LEVEL_3 0x03 -+#define PTBL_LEVEL_MASK 0x03 -+#define PTBL_LOCKED 0x04 /* Page table is locked, protects all fields */ -+#define PTBL_KEEP 0x08 /* This ptbl is not to be stolen */ -+#define PTBL_ALLOCED 0x10 /* This ptbl has been allocated, and is not free */ -+#define PTBL_GROUPED 0x20 /* This ptbl is a member of a group of ptbls */ -+#define PTBL_KERNEL 0x80 /* This ptbl is allocated for the kernel */ -+ -+#define PTBL_LEVEL(flags) ((flags) & PTBL_LEVEL_MASK) -+#define PTBL_IS_LOCKED(flags) (((flags) & (PTBL_LOCKED|PTBL_ALLOCED)) == (PTBL_LOCKED|PTBL_ALLOCED)) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define PTBL_GROUP_SIZE 8192 /* page table groups are 8k bytes */ -+# define PTBLS_PER_GROUP_L1 8 /* Number of level 1 tables in a group */ -+# define PTBLS_PER_GROUP_L2 32 /* ... level 2 */ -+# define PTBLS_PER_GROUP_L3 32 /* ... level 3 */ -+# define PTBLS_PER_GROUP_LX 32 /* ... level X */ -+# define PTBLS_PER_GROUP_MAX 32 /* max of l1,l2,l3,lX */ -+#else -+# define PTBL_GROUP_SIZE 4096 /* page table groups are 4k bytes */ -+# define PTBLS_PER_GROUP_L1 4 /* Number of level 1 tables in a group */ -+# define PTBLS_PER_GROUP_L2 16 /* ... level 2 */ -+# define PTBLS_PER_GROUP_L3 8 /* ... level 3 */ -+# define PTBLS_PER_GROUP_LX 16 /* ... level X */ -+# define PTBLS_PER_GROUP_MAX 16 /* max of l1,l2,l3,lX */ -+#endif -+ -+#define HMES_PER_GROUP (PTBLS_PER_GROUP_L3*ELAN3_L3_ENTRIES) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define PTBLS_PER_PTBL_L1 4 /* 256 PTPs */ -+# define PTBLS_PER_PTBL_L2 1 /* 64 PTPs */ -+# define PTBLS_PER_PTBL_L3 1 /* 32 PTEs */ -+#else -+# define PTBLS_PER_PTBL_L1 4 /* 256 PTPs */ -+# define PTBLS_PER_PTBL_L2 1 /* 64 PTPs */ -+# define PTBLS_PER_PTBL_L3 2 /* 64 PTEs */ -+#endif -+ -+#define ELAN3_LX_ENTRIES (32) -+#define PTBLS_PER_PTBL_LX (1) -+ -+#define L1_VA_PER_PTBL (ELAN3_L1_SIZE*(ELAN3_L1_ENTRIES/PTBLS_PER_PTBL_L1)) /* 4 ptbl for L1 */ -+#define L2_VA_PER_PTBL (ELAN3_L2_SIZE*(ELAN3_L2_ENTRIES/PTBLS_PER_PTBL_L2)) /* 1 ptbl for L2 */ -+#define L3_VA_PER_PTBL (ELAN3_L3_SIZE*(ELAN3_L3_ENTRIES/PTBLS_PER_PTBL_L3)) /* 1 ptbl for L3 */ -+ -+typedef struct elan3_ptbl_gr -+{ -+ struct elan3_ptbl_gr *pg_next; /* Next in list. */ -+ int pg_level; /* Level PG allocated for */ -+ sdramaddr_t pg_addr; /* sdram offset of ptes/ptps */ -+ ELAN3_PTBL pg_ptbls[PTBLS_PER_GROUP_MAX]; /* The actual page tables */ -+} ELAN3_PTBL_GR; -+ -+ -+/* -+ * The elan3mmu structure is the mmu dependant hardware address translation -+ * structure linked to the address space structure to show the translatioms -+ * provided by the elan for an address sapce. -+ * -+ * We also have a doubly linked list of 'regions' which allow the -+ * elan3mmu code to determine the access permissions for the elan -+ * dependant on the virtual address that the translation is being -+ * loaded at. -+ */ -+ -+typedef struct elan3mmu_rgn -+{ -+ struct elan3mmu_rgn *rgn_mnext; /* Doubly linked list of regions */ -+ struct elan3mmu_rgn *rgn_mprev; /* sorted on main address */ -+ caddr_t rgn_mbase; /* main address of base of region */ -+ -+ struct elan3mmu_rgn *rgn_enext; /* Doubly linked list of regions */ -+ struct elan3mmu_rgn *rgn_eprev; /* sorted on elan address */ -+ E3_Addr rgn_ebase; /* elan address of base of region */ -+ -+ u_int rgn_len; /* length of region */ -+ u_int rgn_perm; /* elan access permission */ -+} ELAN3MMU_RGN; -+ -+typedef struct elan3mmu -+{ -+ spinlock_t elan3mmu_lock; /* spinlock lock for regions */ -+ ELAN3MMU_RGN *elan3mmu_mrgns; /* Doubly linked list of memory regions */ -+ ELAN3MMU_RGN *elan3mmu_mtail; /* Last memory region on list */ -+ ELAN3MMU_RGN *elan3mmu_mrgnlast; /* Last region 'hit' */ -+ -+ ELAN3MMU_RGN *elan3mmu_ergns; /* Doubly linked list of memory regions */ -+ ELAN3MMU_RGN *elan3mmu_etail; /* Last memory region on list */ -+ ELAN3MMU_RGN *elan3mmu_ergnlast; /* Last region 'hit' */ -+ -+ struct elan3_dev *elan3mmu_dev; /* Elan device we're using. */ -+ struct elan3_ctxt *elan3mmu_ctxt; /* Elan ctxt we're associated with */ -+ -+ sdramaddr_t elan3mmu_ctp; /* Context table entry for our context */ -+ ELAN3_PTBL *elan3mmu_l1ptbl; /* Level 1 Page table (first of 4) */ -+ -+ spinlock_t elan3mmu_lXptbl_lock; /* spinlock for level X table list */ -+ ELAN3_PTBL *elan3mmu_lXptbl; /* Level X Page table list */ -+ -+#ifdef LINUX -+ struct mm_struct *elan3mmu_coproc_mm; /* Linux mm we're mapping */ -+#endif -+} ELAN3MMU; -+ -+_NOTE(LOCK_ORDER(elan3mmu::elan3mmu_lock elan3_dev::IntrLock)) -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3mmu::elan3mmu_lock, -+ elan3mmu::elan3mmu_mrgns elan3mmu::elan3mmu_mtail -+ elan3mmu::elan3mmu_ergns elan3mmu::elan3mmu_etail)) -+/* protected by dev->IntrLock for read by device driver */ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3mmu::elan3mmu_mrgns elan3mmu::elan3mmu_mtail -+ elan3mmu::elan3mmu_ergns elan3mmu::elan3mmu_etail)) -+ -+_NOTE(SCHEME_PROTECTS_DATA("only set to valid region", -+ elan3mmu::elan3mmu_ergnlast elan3mmu::elan3mmu_mrgnlast)) -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3mmu::elan3mmu_l1ptbl -+ elan3mmu::elan3mmu_ctp -+ elan3mmu::elan3mmu_dev)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3mmu::elan3mmu_l1ptbl -+ elan3mmu::elan3mmu_ctp -+ elan3mmu::elan3mmu_dev)) -+ -+/* -+ * Macros for accessing ptes/ptbls/ptbl_grs -+ */ -+ -+#define OFFSETOF(object,member) /* calculate offset of structure member */ \ -+ ((size_t) (&(((object *)0)->member))) -+#define PTBL_TO_GR(ptbl) /* convert ptbl to ptbl group */ \ -+ ((ELAN3_PTBL_GR *) ((caddr_t) ((ptbl) - (ptbl)->ptbl_index) - OFFSETOF(ELAN3_PTBL_GR,pg_ptbls[0]))) -+#define PTBL_TO_PTADDR(ptbl) /* convert ptbl to a ptp pointing at it */ \ -+ (PTBL_TO_GR(ptbl)->pg_addr + ((ptbl)->ptbl_index<pg_hmes + ((pte) - (ELAN3_PTE *) PTBL_TO_GR(ptbl)->pg_vaddr)) -+#define HME_TO_PTE(ptebl,hme) /* convert hme to corresponding pte */ \ -+ ((ELAN3_PTE *) PTBL_TO_GR(ptbl)->pg_vaddr + ((hme) - (PTBL_TO_GR(ptbl)->pg_hmes))) -+ -+ -+/* Flags for lock_ptbl */ -+#define LK_PTBL_NOWAIT 0x1 -+#define LK_PTBL_FAILOK 0x2 -+ -+/* Return values for lock_ptbl */ -+#define LK_PTBL_OK 0x0 -+#define LK_PTBL_MISMATCH 0x1 -+#define LK_PTBL_FAILED 0x2 -+ -+/* Flags for elan3mmu_ptesync */ -+#define NO_MLIST_LOCK 0 -+#define MLIST_LOCKED 1 -+ -+/* Flags for elan3mmu_pteload */ -+#define PTE_LOAD 0x00 -+#define PTE_LOAD_LOCK 0x01 /* translation should be locked */ -+#define PTE_LOAD_NOSYNC 0x02 /* ref/mod bits should not be sync'ed to page */ -+#define PTE_NO_SLEEP 0x04 /* true if we cant sleep */ -+#define PTE_NO_STEAL 0x08 /* true if we don't want to steal ptbls */ -+ -+#define PTE_LOAD_ENDIAN_MASK 0x10 /* mask for endian-ness */ -+#define PTE_LOAD_LITTLE_ENDIAN 0x00 /* translation is to little-endian memory */ -+#define PTE_LOAD_BIG_ENDIAN 0x10 /* translation is to big-endian memory */ -+ -+ -+/* Flags for elan3mmu_unload */ -+#define PTE_UNLOAD 0x00 -+#define PTE_UNLOAD_UNLOCK 0x01 -+#define PTE_UNLOAD_NOFLUSH 0x02 -+#define PTE_UNLOAD_NOSYNC 0x04 -+ -+extern int elan3mmu_debug; -+#ifdef DEBUG_PRINTF -+# define HAT_PRINTF0(n,msg) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg) : (void) 0) -+# define HAT_PRINTF1(n,msg,a) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a) : (void) 0) -+# define HAT_PRINTF2(n,msg,a,b) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b) : (void) 0) -+# define HAT_PRINTF3(n,msg,a,b,c) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c) : (void) 0) -+# define HAT_PRINTF4(n,msg,a,b,c,d) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d) : (void) 0) -+# define HAT_PRINTF5(n,msg,a,b,c,d,e) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d,e) : (void) 0) -+# define HAT_PRINTF6(n,msg,a,b,c,d,e,f) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d,e,f) : (void) 0) -+# ifdef LINUX -+# define HAT_PRINTF(n,args...) ((elan3mmu_debug & n) ? (void) elan3_debugf(NULL, DBG_HAT, ##args) : (void) 0) -+# endif -+#else -+# define HAT_PRINTF0(n,msg) -+# define HAT_PRINTF1(n,msg,a) -+# define HAT_PRINTF2(n,msg,a,b) -+# define HAT_PRINTF3(n,msg,a,b,c) -+# define HAT_PRINTF4(n,msg,a,b,c,d) -+# define HAT_PRINTF5(n,msg,a,b,c,d,e) -+# define HAT_PRINTF6(n,msg,a,b,c,d,e,f) -+# ifdef LINUX -+# define HAT_PRINTF(n,args...) -+# endif -+#endif -+ -+/* elan3mmu_generic.c */ -+extern ELAN3MMU_GLOBAL_STATS elan3mmu_global_stats; -+ -+extern void elan3mmu_init (void); -+extern void elan3mmu_fini (void); -+ -+extern ELAN3MMU *elan3mmu_alloc (struct elan3_ctxt *ctxt); -+extern void elan3mmu_free (ELAN3MMU *elan3mmu); -+ -+extern void elan3mmu_set_context_filter (ELAN3_DEV *dev, int ctx, int disabled, E3_uint32 Pend, E3_uint32 *Maskp); -+extern int elan3mmu_attach (ELAN3_DEV *dev, int ctx, ELAN3MMU *elan3mmu, sdramaddr_t routeTable, E3_uint32 routeMask); -+extern void elan3mmu_detach (ELAN3_DEV *dev, int ctx); -+ -+extern ELAN3MMU_RGN *elan3mmu_findrgn_elan (ELAN3MMU *elan3mmu, E3_Addr addr, int tail); -+extern int elan3mmu_addrgn_elan (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn); -+extern ELAN3MMU_RGN *elan3mmu_removergn_elan (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern ELAN3MMU_RGN *elan3mmu_rgnat_elan (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern ELAN3MMU_RGN *elan3mmu_findrgn_main (ELAN3MMU *elan3mmu, caddr_t addr, int tail); -+extern int elan3mmu_addrgn_main (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn); -+extern ELAN3MMU_RGN *elan3mmu_removergn_main (ELAN3MMU *elan3mmu, caddr_t addr); -+extern ELAN3MMU_RGN *elan3mmu_rgnat_main (ELAN3MMU *elan3mmu, caddr_t addr); -+ -+extern int elan3mmu_setperm (ELAN3MMU *elan3mmu, caddr_t maddr, E3_Addr eaddr, u_int len, u_int perm); -+extern void elan3mmu_clrperm (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len); -+extern int elan3mmu_checkperm (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, u_int access); -+extern caddr_t elan3mmu_mainaddr (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern E3_Addr elan3mmu_elanaddr (ELAN3MMU *elan3mmu, caddr_t addr); -+ -+extern void elan3mmu_expand (ELAN3MMU *elan3mmu, E3_Addr addr, int len, int level, int attr); -+extern void elan3mmu_reserve (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *); -+extern void elan3mmu_release (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *); -+ -+extern void elan3mmu_pteload (ELAN3MMU *elan3mmu, int level, E3_Addr addr, physaddr_t paddr, int perm, int attr); -+extern void elan3mmu_unload (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, int flags); -+extern void elan3mmu_sync (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, u_int clearflag); -+extern void elan3mmu_pteunload (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock); -+extern void elan3mmu_ptesync (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock); -+extern sdramaddr_t elan3mmu_ptp2pte (ELAN3MMU *elan3mmu, sdramaddr_t ptp, int level); -+extern sdramaddr_t elan3mmu_ptefind (ELAN3MMU *elan3mmu, E3_Addr, int *level, ELAN3_PTBL **pptbl, spinlock_t **plock, unsigned long *flags); -+extern sdramaddr_t elan3mmu_ptealloc (ELAN3MMU *elan3mmu, E3_Addr, int level, ELAN3_PTBL **pptbl, spinlock_t **plock, int attr, unsigned long *flags); -+extern void elan3mmu_l1inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l1ptbl, int flags); -+extern int elan3mmu_l2inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l2ptbl, int flags, E3_Addr addr, spinlock_t **pl2lock, unsigned long *lock_flags); -+extern int elan3mmu_l3inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l3ptbl, int flags, E3_Addr addr, spinlock_t **pl3lock, unsigned long *lock_flags); -+ -+extern void elan3mmu_free_l1ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+extern void elan3mmu_free_l2ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+extern void elan3mmu_free_l3ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+ -+extern int elan3mmu_lock_this_ptbl (ELAN3_PTBL *ptbl, int flag, spinlock_t **plock, unsigned long *flags); -+extern int elan3mmu_lock_ptbl (ELAN3_PTBL *ptbl, u_int flag, ELAN3MMU *elan3mmu, E3_Addr va, int level, spinlock_t **plock, unsigned long *flags); -+extern void elan3mmu_unlock_ptbl (ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+ -+/* elan3mmu_osdep.c */ -+extern void elan3mmu_init_osdep (void); -+extern void elan3mmu_fini_osdep (void); -+extern void elan3mmu_alloc_osdep (ELAN3MMU *elan3mmu); -+extern void elan3mmu_free_osdep (ELAN3MMU *elan3mmu); -+extern ELAN3_PTE elan3mmu_phys_to_pte (ELAN3_DEV *dev, physaddr_t paddr, int perm); -+extern ELAN3_PTE elan3mmu_kernel_invalid_pte (ELAN3MMU *elan3mmu); -+ -+#if defined (DIGITAL_UNIX) -+# include -+#elif defined (LINUX) -+# include -+#endif -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELAN3MMU_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elan3mmu_linux.h linux-2.6.9/include/elan3/elan3mmu_linux.h ---- clean/include/elan3/elan3mmu_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elan3mmu_linux.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,39 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_MMU_LINUX_H -+#define __ELAN3_MMU_LINUX_H -+ -+#ident "$Id: elan3mmu_linux.h,v 1.12 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3mmu_linux.h,v $*/ -+ -+/* XXX copy of elan3mmu_dunix.h */ -+ -+#define ALLOC_ELAN3MMU(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3MMU *, sizeof (ELAN3MMU), cansleep) -+#define ALLOC_PTBL_GR(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3_PTBL_GR *, sizeof (ELAN3_PTBL_GR), cansleep) -+#define ALLOC_ELAN3MMU_RGN(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3MMU_RGN *, sizeof (ELAN3MMU_RGN), cansleep) -+#define ALLOC_HMENTS(ptr,cansleep) KMEM_ALLOC((ptr,ELAN3_HMENT *, sizeof (ELAN3_HMENT), cansleep) -+ -+#define FREE_ELAN3MMU(ptr) KMEM_FREE(ptr,sizeof (ELAN3MMU)) -+#define FREE_PTBL_GR(ptr) KMEM_FREE(ptr,sizeof (ELAN3_PTBL_GR)) -+#define FREE_ELAN3MMU_RGN(ptr) KMEM_FREE(ptr,sizeof (ELAN3MMU_RGN)) -+#define FREE_HMENTS(ptr) KMEM_FREE(ptr,sizeof (ELAN3_HMENT)) -+ -+extern void elan3mmu_init_osdep(void); -+extern void elan3mmu_fini_osdep(void); -+ -+extern void elan3mmu_pte_range_unload (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len); -+extern void elan3mmu_pte_range_update (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len); -+extern void elan3mmu_pte_ctxt_unload(ELAN3MMU *elan3mmu); -+ -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elan3ops.h linux-2.6.9/include/elan3/elan3ops.h ---- clean/include/elan3/elan3ops.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elan3ops.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* $Id: elan3ops.h,v 1.3 2003/09/24 13:57:24 david Exp $ */ -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3ops.h,v $ */ -+ -+#ifndef _ELAN3_OPS_H -+#define _ELAN3_OPS_H -+ -+int get_position (void *arg, ELAN_POSITION *position); -+int set_position (void *arg, unsigned short nodeId, unsigned short numNodes); -+ -+int elan3mod_create_cap (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+int elan3mod_destroy_cap (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+ -+int elan3mod_create_vp (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+int elan3mod_destroy_vp (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+int elan3mod_attach_cap (void *arg_ctxt, ELAN_CAPABILITY *cap); -+int elan3mod_detach_cap (void *arg_ctxt); -+ -+extern ELAN_DEV_OPS elan3_dev_ops; -+ -+int stats_get_index_name (void *arg, uint index, caddr_t name); -+int stats_get_block (void *arg, uint entries, ulong *value); -+int stats_clear_block (void *arg); -+ -+int elan3_register_dev_stats (ELAN3_DEV * dev); -+void elan3_deregister_dev_stats (ELAN3_DEV * dev); -+ -+ -+#endif /* __ELAN3_OPS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/elan3/elanctxt.h linux-2.6.9/include/elan3/elanctxt.h ---- clean/include/elan3/elanctxt.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elanctxt.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,856 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANCTXT_H -+#define _ELAN3_ELANCTXT_H -+ -+#ident "$Id: elanctxt.h,v 1.81 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanctxt.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include -+#include -+ -+#define BumpUserStat(ctxt, stat) ((ctxt)->FlagPage->stat++) -+ -+#if defined(__LITTLE_ENDIAN__) -+ -+typedef union _CProcTrapBuf -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ E3_uint32 Areg; -+ E3_uint32 Breg; -+ } r; -+ struct -+ { -+ E3_uint32 Addr; -+ E3_uint32 ContextType; -+ } s; -+} CProcTrapBuf_BE; -+ -+typedef E3_EventInt E3_EventInt_BE; -+typedef E3_IprocTrapHeader E3_IprocTrapHeader_BE; -+typedef E3_IprocTrapData E3_IprocTrapData_BE; -+typedef E3_FaultSave E3_FaultSave_BE; -+ -+typedef union -+{ -+ E3_uint64 Align64; -+ E3_DMA s; -+} E3_DMA_BE; -+ -+typedef E3_ThreadQueue E3_ThreadQueue_BE; -+ -+#else -+ -+/* "Big-Endian" data structures copied by 64 bit loads, these are 32 bit word flipped */ -+/* from the corresponding data structure. */ -+ -+typedef union _CProcTrapBuf -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ E3_uint32 Breg; -+ E3_uint32 Areg; -+ } r; -+ struct -+ { -+ E3_uint32 ContextType; -+ E3_uint32 Addr; -+ } s; -+} CProcTrapBuf_BE; -+ -+typedef union _E3_EventInt_BE -+{ -+ E3_uint64 Align64; -+ struct { -+ E3_uint32 EventContext; /* Bits 16 to 28 */ -+ E3_uint32 IntCookie; -+ } s; -+} E3_EventInt_BE; -+ -+typedef union _E3_IprocTrapHeader_BE -+{ -+ E3_uint64 Align64; -+ -+ struct -+ { -+ E3_uint32 TrAddr; -+ E3_TrTypeCntx TrTypeCntx; -+ union -+ { -+ E3_IProcStatus_Reg u_IProcStatus; -+ E3_uint32 u_TrData1; -+ } ipsotd; -+ E3_uint32 TrData0; -+ } s; -+} E3_IprocTrapHeader_BE; -+ -+typedef E3_IprocTrapData E3_IprocTrapData_BE; -+ -+typedef union _E3_FaultSave_be -+{ -+ E3_uint64 Align64; -+ struct { -+ volatile E3_uint32 FaultContext; -+ E3_FaultStatusReg FSR; -+ volatile E3_uint32 EventAddress; -+ volatile E3_uint32 FaultAddress; -+ } s; -+} E3_FaultSave_BE; -+ -+typedef union _e3_dma_be -+{ -+ E3_uint64 Align64; -+ struct { -+ E3_uint32 dma_size; -+ E3_DmaType dma_u; -+ E3_Addr dma_dest; -+ E3_Addr dma_source; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_srcCookieProc; -+ E3_Addr dma_srcEvent; -+ } s; -+} E3_DMA_BE; -+ -+typedef union _E3_ThreadQueue_BE -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ /* copied by 64 bit copy from elan to main */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :16; /* Bits 0 to 15 */ -+ E3_Addr Thread; /* Bits 32 to 63 */ -+ } s; -+} E3_ThreadQueue_BE; -+ -+#endif /* defined(LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) */ -+ -+typedef struct neterr_msg -+{ -+ E3_uint32 Rail; /* Rail error received on */ -+ ELAN_CAPABILITY SrcCapability; /* Capability of source of packet */ -+ ELAN_CAPABILITY DstCapability; /* Capability of dest of packet */ -+ -+ E3_uint32 DstProcess; /* Virtual Process of dest of packet */ -+ E3_Addr CookieAddr; /* Cookie Address (or NULL for DMA) */ -+ E3_uint32 CookieVProc; /* Cookie and VP (identifies DMA) */ -+ E3_uint32 NextCookie; /* Next Cookie value (for thread) */ -+ E3_uint32 WaitForEop; /* Wait for EOP transaction */ -+} NETERR_MSG; -+ -+#ifdef __KERNEL__ -+ -+/* -+ * Associated with each input channel can be a network error -+ * resolver structure, which can be queued on the network -+ * error resolver threads to perform RPCs to the other kernels -+ * when a network error occurs with an identify transaction -+ * included -+ */ -+typedef struct neterr_resolver -+{ -+ struct neterr_resolver *Next; -+ -+ spinlock_t Lock; -+ -+ struct elan3_ctxt *Ctxt; -+ ELAN_LOCATION Location; -+ -+ int Completed; -+ int Status; -+ long Timestamp; -+ -+ NETERR_MSG Message; -+} NETERR_RESOLVER; -+ -+ -+typedef struct neterr_fixup -+{ -+ struct neterr_fixup *Next; -+ -+ kcondvar_t Wait; -+ int Completed; -+ int Status; -+ -+ NETERR_MSG Message; -+} NETERR_FIXUP; -+ -+#endif /* __KERNEL__ */ -+ -+/* Each of the following structures must be padded to a whole */ -+/* number of 64 bit words since the kernel uses 64 bit load/stores */ -+/* to transfer the elan register state. */ -+typedef struct command_trap -+{ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_uint32 Pad; /* 4 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ CProcTrapBuf_BE TrapBuf; /* 8 bytes */ -+} COMMAND_TRAP; -+ -+typedef struct thread_trap -+{ -+ E3_uint32 Registers[32]; /* 128 bytes */ -+#define REG_GLOBALS 0 -+#define REG_OUTS 8 -+#define REG_LOCALS 16 -+#define REG_INS 24 -+ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ E3_FaultSave_BE DataFaultSave; /* 16 bytes */ -+ E3_FaultSave_BE InstFaultSave; /* 16 bytes */ -+ E3_FaultSave_BE OpenFaultSave; /* 16 bytes */ -+ -+ E3_Status_Reg Status; /* 4 bytes */ -+ -+ E3_Addr pc; /* 4 bytes */ -+ E3_Addr npc; /* 4 bytes */ -+ E3_Addr StartPC; /* 4 bytes */ -+ E3_Addr sp; /* 4 bytes */ -+ E3_uint32 mi; /* 4 bytes */ -+ E3_TrapBits TrapBits; /* 4 bytes */ -+ E3_DirtyBits DirtyBits; /* 4 bytes */ -+} THREAD_TRAP; -+ -+typedef struct dma_trap -+{ -+ E3_DMA_BE Desc; /* 32 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ E3_FaultSave_BE Data0; /* 16 bytes */ -+ E3_FaultSave_BE Data1; /* 16 bytes */ -+ E3_FaultSave_BE Data2; /* 16 bytes */ -+ E3_FaultSave_BE Data3; /* 16 bytes */ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_DmaInfo PacketInfo; /* 4 bytes */ -+} DMA_TRAP; -+ -+typedef struct input_trap -+{ -+ E3_uint32 State; /* 4 bytes */ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ -+ u_int NumTransactions; /* 4 bytes */ -+ u_int Overflow; /* 4 bytes */ -+ u_int AckSent; /* 4 bytes */ -+ u_int BadTransaction; /* 4 bytes */ -+ -+ E3_IprocTrapHeader_BE *TrappedTransaction; /* 4 bytes */ -+ E3_IprocTrapData_BE *TrappedDataBuffer; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *WaitForEopTransaction; /* 4 bytes */ -+ E3_IprocTrapData_BE *WaitForEopDataBuffer; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *DmaIdentifyTransaction; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *ThreadIdentifyTransaction; /* 4 bytes */ -+ E3_Addr LockQueuePointer; /* 4 bytes */ -+ E3_Addr UnlockQueuePointer; /* 4 bytes */ -+ -+ E3_IprocTrapHeader_BE Transactions[MAX_TRAPPED_TRANS]; /* n * 8 bytes */ -+ E3_IprocTrapData_BE DataBuffers[MAX_TRAPPED_TRANS]; /* n * 64 bytes */ -+} INPUT_TRAP; -+ -+typedef struct input_fault_save -+{ -+ struct input_fault_save *Next; -+ E3_Addr Addr; -+ E3_uint32 Count; -+} INPUT_FAULT_SAVE; -+ -+#define NUM_INPUT_FAULT_SAVE 32 -+#define MIN_INPUT_FAULT_PAGES 8 -+#define MAX_INPUT_FAULT_PAGES 128 -+ -+typedef E3_uint32 EVENT_COOKIE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct event_cookie_entry -+{ -+ struct event_cookie_entry *ent_next; -+ struct event_cookie_entry *ent_prev; -+ -+ spinlock_t ent_lock; -+ unsigned ent_ref; -+ -+ EVENT_COOKIE ent_cookie; -+ EVENT_COOKIE ent_fired; -+ kcondvar_t ent_wait; -+} EVENT_COOKIE_ENTRY; -+ -+typedef struct event_cookie_table -+{ -+ struct event_cookie_table *tbl_next; -+ struct event_cookie_table *tbl_prev; -+ -+ unsigned long tbl_task; -+ unsigned long tbl_handle; -+ -+ spinlock_t tbl_lock; -+ unsigned tbl_ref; -+ EVENT_COOKIE_ENTRY *tbl_entries; -+} EVENT_COOKIE_TABLE; -+ -+#define NBYTES_PER_SMALL_ROUTE 8 -+#define NBYTES_PER_LARGE_ROUTE 16 -+ -+#define ROUTE_BLOCK_SIZE ELAN3_PAGE_SIZE -+#define NROUTES_PER_BLOCK (ROUTE_BLOCK_SIZE/NBYTES_PER_LARGE_ROUTE) -+ -+typedef struct elan3_routes -+{ -+ struct elan3_routes *Next; /* Can be chained together */ -+ -+ sdramaddr_t Routes; /* sdram offset of route entries */ -+ bitmap_t Bitmap[BT_BITOUL(NROUTES_PER_BLOCK)]; /* Bitmap of which entries are used */ -+} ELAN3_ROUTES; -+ -+ -+typedef struct elan3_route_table -+{ -+ spinlock_t Lock; /* Route lock */ -+ sdramaddr_t Table; /* Kernel address for route table */ -+ u_int Size; /* # entries in route table */ -+ -+ ELAN3_ROUTES *LargeRoutes; /* Large routes */ -+} ELAN3_ROUTE_TABLE; -+ -+typedef struct elan3_vpseg -+{ -+ struct elan3_vpseg *Next; -+ int Process; /* Virtual process */ -+ int Entries; /* and # processes */ -+ int Type; /* Type of cookie */ -+ -+ union -+ { -+ -+ ELAN_CAPABILITY Capability; /* Capability of remote segment */ -+# define SegCapability SegUnion.Capability -+ struct { -+ u_short LowProc; /* Base process number */ -+ u_short HighProc; /* and high process number */ -+# define SegLowProc SegUnion.BROADCAST.LowProc -+# define SegHighProc SegUnion.BROADCAST.HighProc -+ } BROADCAST; -+ } SegUnion; -+} ELAN3_VPSEG; -+ -+#define ELAN3_VPSEG_UNINT 0 /* Unitialised */ -+#define ELAN3_VPSEG_P2P 1 /* Point to Point */ -+#define ELAN3_VPSEG_BROADCAST 2 /* Broadcast */ -+ -+#define NUM_LISTS 7 /* Number of "swap" lists */ -+ -+typedef struct elan3_ctxt -+{ -+ struct elan3_ctxt *Next; /* can be queued on a task */ -+ struct elan3_ctxt *Prev; -+ -+ CtxtHandle Handle; /* user handle */ -+ int RefCnt; /* reference count */ -+ -+ ELAN3MMU *Elan3mmu; /* elan3mmu allocated for Elan translations */ -+ -+ struct elan3_ops *Operations; /* User supplied helper functions */ -+ void *Private; /* Users private pointer */ -+ -+ int Status; /* Status (guarded by dev_mutex) */ -+ int OthersState; /* State of halt queueing for dma/thread */ -+ int LwpCount; /* Number of lwp's running */ -+ -+ ELAN3_DEV *Device; /* Elan device */ -+ -+ ELAN_CAPABILITY Capability; /* Capability I've attached as */ -+ ELAN_POSITION Position; /* Position when I was created */ -+ -+ ELAN3_VPSEG *VpSegs; /* List of virtual process segments */ -+ ELAN3_ROUTE_TABLE *RouteTable; -+ -+ krwlock_t VpLock; /* Reader/writer lock for vp list */ -+ kmutex_t SwapListsLock; /* mutex to lock swap lists */ -+ kmutex_t CmdLock; /* mutex to lock trapped dma command */ -+ kmutex_t CmdPortLock; /* mutex to load/unload commandport xlation */ -+ -+ kcondvar_t Wait; /* Condition variable to sleep on */ -+ kcondvar_t CommandPortWait; /* Condition variable to wait for commandport */ -+ kcondvar_t LwpWait; /* Condition variable to wait for lwps to stop */ -+ kcondvar_t HaltWait; /* Condition variable to wait for halt */ -+ int Halted; /* and flag for halt cv */ -+ -+ caddr_t CommandPageMapping; /* user virtual address for command page mapping */ -+ ioaddr_t CommandPage; /* Elan command port mapping page */ -+ DeviceMappingHandle CommandPageHandle; /* DDI Handle */ -+ ioaddr_t CommandPort; /* Elan command port */ -+ void *CommandPortItem; /* Item we're re-issuing to commandport */ -+ -+ ELAN3_FLAGSTATS *FlagPage; /* Page visible to user process */ -+ -+ COMMAND_TRAP *CommandTraps; /* Command port traps */ -+ ELAN3_SPLIT_QUEUE CommandTrapQ; -+ -+ CProcTrapBuf_BE *Commands; /* Overflowed commands */ -+ ELAN3_QUEUE CommandQ; -+ -+ THREAD_TRAP *ThreadTraps; /* Thread processor traps */ -+ ELAN3_QUEUE ThreadTrapQ; -+ -+ DMA_TRAP *DmaTraps; /* Dma processor tra[ed */ -+ ELAN3_QUEUE DmaTrapQ; -+ -+ INPUT_TRAP Input0Trap; /* Inputter channel 0 trap */ -+ INPUT_TRAP Input1Trap; /* Inputter channel 1 trap */ -+ NETERR_RESOLVER *Input0Resolver; /* Inputter channel 0 network error resolver */ -+ NETERR_RESOLVER *Input1Resolver; /* Inputter channel 1 network error resolver */ -+ -+ INPUT_FAULT_SAVE InputFaults[NUM_INPUT_FAULT_SAVE]; /* stored writeblock addresses */ -+ INPUT_FAULT_SAVE *InputFaultList; /* organized in list for LRU */ -+ spinlock_t InputFaultLock; /* and lock for list */ -+ -+ kmutex_t NetworkErrorLock; -+ NETERR_FIXUP *NetworkErrorFixups; -+ -+ EVENT_COOKIE *EventCookies; /* Event cookies. */ -+ ELAN3_QUEUE EventCookieQ; -+ -+ E3_Addr *SwapThreads; /* Swapped Thread Queue */ -+ ELAN3_QUEUE SwapThreadQ; -+ -+ E3_DMA_BE *SwapDmas; /* Swapped Dmas Queue */ -+ ELAN3_QUEUE SwapDmaQ; -+ -+ int ItemCount[NUM_LISTS]; /* Count of items on each swap list */ -+ int inhibit; /* if set lwp not to reload translations */ -+ -+ int Disabled; -+} ELAN3_CTXT; -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3_ctxt::Status elan3_ctxt::OthersState -+ elan3_ctxt::CommandTrapQ elan3_ctxt::CommandQ elan3_ctxt::ThreadTrapQ elan3_ctxt::DmaTrapQ -+ elan3_ctxt::Input0Trap elan3_ctxt::Input1Trap elan3_ctxt::EventCookieQ elan3_ctxt::SwapThreadQ -+ elan3_ctxt::SwapDmaQ elan3_ctxt::CommandPortItem elan3_ctxt::LwpCount)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_ctxt::SwapListsLock, -+ elan3_ctxt::ItemCount)) -+_NOTE(RWLOCK_PROTECTS_DATA(elan3_ctxt::VpLock, -+ elan3_ctxt::VpSegs elan3_vpseg::Next elan3_vpseg::Process -+ elan3_vpseg::Entries elan3_vpseg::Type)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_ctxt::ItemCount elan3_ctxt::Status elan3_ctxt::CommandPortItem)) -+ -+_NOTE(LOCK_ORDER(elan3_ctxt::SwapListsLock elan3_ctxt::CmdLock elan3_dev::IntrLock)) -+_NOTE(LOCK_ORDER(elan3_ctxt::SwapListsLock as::a_lock)) /* implicit by pagefault */ -+ -+#define CTXT_DETACHED (1 << 0) /* Context is detached. */ -+#define CTXT_NO_LWPS (1 << 1) /* No lwp's to handle faults */ -+#define CTXT_EXITING (1 << 2) /* User process is exiting */ -+ -+#define CTXT_SWAPPING_OUT (1 << 3) /* Context is swapping out */ -+#define CTXT_SWAPPED_OUT (1 << 4) /* Context is swapped out */ -+ -+#define CTXT_SWAP_FREE (1 << 5) /* Swap buffer is free */ -+#define CTXT_SWAP_VALID (1 << 6) /* Swap buffer has queue entries in it */ -+ -+#define CTXT_DMA_QUEUE_FULL (1 << 7) /* Dma trap queue is full */ -+#define CTXT_THREAD_QUEUE_FULL (1 << 8) /* Thread trap queue is full */ -+#define CTXT_EVENT_QUEUE_FULL (1 << 9) /* Event interrupt queue is full */ -+#define CTXT_COMMAND_OVERFLOW_ERROR (1 << 10) /* Trap queue overflow */ -+ -+#define CTXT_SWAP_WANTED (1 << 11) /* Some one wanted to swap */ -+#define CTXT_WAITING_SWAPIN (1 << 12) /* Someone waiting on swapin */ -+ -+#define CTXT_WAITING_COMMAND (1 << 13) /* swgelan waiting on command port */ -+#define CTXT_COMMAND_MAPPED_MAIN (1 << 14) /* segelan has mapped command port */ -+ -+#define CTXT_QUEUES_EMPTY (1 << 15) /* dma/thread run queues are empty */ -+#define CTXT_QUEUES_EMPTYING (1 << 16) /* dma/thread run queues are being emptied */ -+ -+#define CTXT_USER_FILTERING (1 << 17) /* user requested context filter */ -+ -+#define CTXT_KERNEL (1 << 18) /* context is a kernel context */ -+#define CTXT_COMMAND_MAPPED_ELAN (1 << 19) /* command port is mapped for elan */ -+#define CTXT_FIXUP_NETERR (1 << 20) /* fixing up a network error */ -+ -+ -+#define CTXT_SWAPPED_REASONS (CTXT_NO_LWPS | \ -+ CTXT_DETACHED | \ -+ CTXT_EXITING | \ -+ CTXT_FIXUP_NETERR) -+ -+#define CTXT_OTHERS_REASONS (CTXT_EVENT_QUEUE_FULL | \ -+ CTXT_DMA_QUEUE_FULL | \ -+ CTXT_THREAD_QUEUE_FULL | \ -+ CTXT_COMMAND_OVERFLOW_ERROR | \ -+ CTXT_SWAPPED_REASONS) -+ -+#define CTXT_INPUTTER_REASONS (CTXT_USER_FILTERING | \ -+ CTXT_OTHERS_REASONS) -+ -+#define CTXT_COMMAND_MAPPED (CTXT_COMMAND_MAPPED_MAIN | \ -+ CTXT_COMMAND_MAPPED_ELAN) -+ -+#define CTXT_IS_KERNEL(ctxt) ((ctxt)->Status & CTXT_KERNEL) -+ -+/* -+ * State values for ctxt_inputterState/ctxt_commandportStats -+ */ -+#define CTXT_STATE_OK 0 -+#define CTXT_STATE_TRAPPED 1 /* Inputter channel 0 trapped */ -+#define CTXT_STATE_RESOLVING 2 /* An LWP is resolving the trap */ -+#define CTXT_STATE_NEEDS_RESTART 3 /* Th trapped packet needs to be executed */ -+#define CTXT_STATE_NETWORK_ERROR 4 /* We're waiting on an RPC for the identify transaction */ -+#define CTXT_STATE_EXECUTING 5 /* An LWP is executing the trapped packet */ -+ -+/* -+ * State values for OthersState. -+ */ -+#define CTXT_OTHERS_RUNNING 0 -+#define CTXT_OTHERS_HALTING 1 -+#define CTXT_OTHERS_SWAPPING 2 -+#define CTXT_OTHERS_HALTING_MORE 3 -+#define CTXT_OTHERS_SWAPPING_MORE 4 -+#define CTXT_OTHERS_SWAPPED 5 -+ -+typedef struct elan3_ops -+{ -+ u_int Version; -+ -+ int (*Exception) (ELAN3_CTXT *ctxt, int type, int proc, void *trap, va_list ap); -+ -+ /* swap item list functions */ -+ int (*GetWordItem) (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep); -+ int (*GetBlockItem) (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep); -+ void (*PutWordItem) (ELAN3_CTXT *ctxt, int list, E3_Addr value); -+ void (*PutBlockItem) (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr); -+ void (*PutbackItem) (ELAN3_CTXT *ctxt, int list, void *item); -+ void (*FreeWordItem) (ELAN3_CTXT *ctxt, void *item); -+ void (*FreeBlockItem) (ELAN3_CTXT *ctxt, void *item); -+ int (*CountItems) (ELAN3_CTXT *ctxt, int list); -+ -+ /* event interrupt cookie */ -+ int (*Event) (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+ -+ /* swapin/swapout functions. */ -+ void (*Swapin) (ELAN3_CTXT *ctxt); -+ void (*Swapout) (ELAN3_CTXT *ctxt); -+ -+ /* Free of private data */ -+ void (*FreePrivate) (ELAN3_CTXT *ctxt); -+ -+ /* Fixup a network error */ -+ int (*FixupNetworkError) (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef); -+ -+ /* Interrupt handler trap interface */ -+ int (*DProcTrap) (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+ int (*TProcTrap) (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+ int (*IProcTrap) (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int chan); -+ int (*CProcTrap) (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+ int (*CProcReissue) (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *TrapBuf); -+ -+ /* User memory access functions */ -+ int (*StartFaultCheck)(ELAN3_CTXT *ctxt); -+ void (*EndFaultCheck) (ELAN3_CTXT *ctxt); -+ -+ E3_uint8 (*Load8) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store8) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+ E3_uint16 (*Load16) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store16) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+ E3_uint32 (*Load32) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store32) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+ E3_uint64 (*Load64) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store64) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+} ELAN3_OPS; -+ -+#define ELAN3_OPS_VERSION 0xdeef0001 -+ -+/* -+ * Flags for ops_event. -+ */ -+#define OP_INTR 0 /* Called from interrupt handler */ -+#define OP_LWP 1 /* Called from "lwp" */ -+ -+/* -+ * Return codes for "ops" functions. -+ */ -+#define OP_DEFER 0 /* Defer to next lower interrupt */ -+#define OP_IGNORE 1 /* No event hander, so ignore it */ -+#define OP_HANDLED 2 /* Handled event (resume thread) */ -+#define OP_FAILED 3 /* Failed */ -+ -+#define ELAN3_CALL_OP(ctxt,fn) ((ctxt)->Operations && (ctxt)->Operations->fn) ? (ctxt)->Operations->fn -+ -+#define ELAN3_OP_EXCEPTION(ctxt,type,proc,trap,ap) (ELAN3_CALL_OP(ctxt,Exception) (ctxt,type,proc,trap,ap) : OP_IGNORE) -+#define ELAN3_OP_GET_WORD_ITEM(ctxt,list,itemp,valuep) (ELAN3_CALL_OP(ctxt,GetWordItem) (ctxt,list,itemp,valuep) : 0) -+#define ELAN3_OP_GET_BLOCK_ITEM(ctxt,list,itemp,valuep) (ELAN3_CALL_OP(ctxt,GetBlockItem) (ctxt,list,itemp,valuep) : 0) -+#define ELAN3_OP_PUT_WORD_ITEM(ctxt,list,value) (ELAN3_CALL_OP(ctxt,PutWordItem) (ctxt,list,value) : (void)0) -+#define ELAN3_OP_PUT_BLOCK_ITEM(ctxt,list,ptr) (ELAN3_CALL_OP(ctxt,PutBlockItem) (ctxt,list,ptr) : (void)0) -+#define ELAN3_OP_PUTBACK_ITEM(ctxt,list,item) (ELAN3_CALL_OP(ctxt,PutbackItem) (ctxt,list,item) : (void)0) -+#define ELAN3_OP_FREE_WORD_ITEM(ctxt,item) (ELAN3_CALL_OP(ctxt,FreeWordItem) (ctxt,item) : (void)0) -+#define ELAN3_OP_FREE_BLOCK_ITEM(ctxt,item) (ELAN3_CALL_OP(ctxt,FreeBlockItem)(ctxt,item) : (void)0) -+#define ELAN3_OP_COUNT_ITEMS(ctxt,list) (ELAN3_CALL_OP(ctxt,CountItems)(ctxt,list) : 0) -+#define ELAN3_OP_EVENT(ctxt,cookie,flag) (ELAN3_CALL_OP(ctxt,Event)(ctxt,cookie,flag) : OP_IGNORE) -+#define ELAN3_OP_SWAPIN(ctxt) (ELAN3_CALL_OP(ctxt,Swapin)(ctxt) : (void)0) -+#define ELAN3_OP_SWAPOUT(ctxt) (ELAN3_CALL_OP(ctxt,Swapout)(ctxt) : (void)0) -+#define ELAN3_OP_FREE_PRIVATE(ctxt) (ELAN3_CALL_OP(ctxt,FreePrivate)(ctxt) : (void)0) -+#define ELAN3_OP_FIXUP_NETWORK_ERROR(ctxt, nef) (ELAN3_CALL_OP(ctxt,FixupNetworkError)(ctxt,nef) : OP_FAILED) -+ -+#define ELAN3_OP_DPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,DProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_TPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,TProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_IPROC_TRAP(ctxt, trap, chan) (ELAN3_CALL_OP(ctxt,IProcTrap)(ctxt,trap,chan) : OP_DEFER) -+#define ELAN3_OP_CPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,CProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_CPROC_REISSUE(ctxt,tbuf) (ELAN3_CALL_OP(ctxt,CProcReissue)(ctxt, tbuf) : OP_DEFER) -+ -+#define ELAN3_OP_START_FAULT_CHECK(ctxt) (ELAN3_CALL_OP(ctxt,StartFaultCheck)(ctxt) : 0) -+#define ELAN3_OP_END_FAULT_CHECK(ctxt) (ELAN3_CALL_OP(ctxt,EndFaultCheck)(ctxt) : (void)0) -+#define ELAN3_OP_LOAD8(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load8)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE8(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store8)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD16(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load16)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE16(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store16)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD32(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load32)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE32(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store32)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD64(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load64)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE64(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store64)(ctxt,addr,val) : (void)0) -+ -+#endif /* __KERNEL__ */ -+ -+/* "list" arguement to ops functions */ -+#define LIST_DMA_PTR 0 -+#define LIST_DMA_DESC 1 -+#define LIST_THREAD 2 -+#define LIST_COMMAND 3 -+#define LIST_SETEVENT 4 -+#define LIST_FREE_WORD 5 -+#define LIST_FREE_BLOCK 6 -+ -+#define MAX_LISTS 7 -+ -+#if defined(__KERNEL__) && MAX_LISTS != NUM_LISTS -+# error Check NUM_LISTS == MAX_LISTS -+#endif -+ -+/* -+ * Values for the 'type' field to PostException(). -+ */ -+#define EXCEPTION_INVALID_ADDR 1 /* FaultArea, res */ -+#define EXCEPTION_UNIMP_INSTR 2 /* instr */ -+#define EXCEPTION_INVALID_PROCESS 3 /* proc, res */ -+#define EXCEPTION_SIMULATION_FAILED 4 /* */ -+#define EXCEPTION_UNIMPLEMENTED 5 /* */ -+#define EXCEPTION_SWAP_FAULT 6 /* */ -+#define EXCEPTION_SWAP_FAILED 7 /* */ -+#define EXCEPTION_BAD_PACKET 8 /* */ -+#define EXCEPTION_FAULTED 9 /* addr */ -+#define EXCEPTION_QUEUE_OVERFLOW 10 /* FaultArea, TrapType */ -+#define EXCEPTION_COMMAND_OVERFLOW 11 /* count */ -+#define EXCEPTION_DMA_RETRY_FAIL 12 /* */ -+#define EXCEPTION_CHAINED_EVENT 13 /* EventAddr */ -+#define EXCEPTION_THREAD_KILLED 14 /* */ -+#define EXCEPTION_CANNOT_SAVE_THREAD 15 -+#define EXCEPTION_BAD_SYSCALL 16 /* */ -+#define EXCEPTION_DEBUG 17 -+#define EXCEPTION_BAD_EVENT 18 /* */ -+#define EXCEPTION_NETWORK_ERROR 19 /* rvp */ -+#define EXCEPTION_BUS_ERROR 20 -+#define EXCEPTION_COOKIE_ERROR 21 -+#define EXCEPTION_PACKET_TIMEOUT 22 -+#define EXCEPTION_BAD_DMA 23 /* */ -+#define EXCEPTION_ENOMEM 24 -+ -+/* -+ * Values for the 'proc' field to ElanException(). -+ */ -+#define COMMAND_PROC 1 -+#define THREAD_PROC 2 -+#define DMA_PROC 3 -+#define INPUT_PROC 4 -+#define EVENT_PROC 5 -+ -+/* Flags to IssueDmaCommand */ -+#define ISSUE_COMMAND_FOR_CPROC 1 -+#define ISSUE_COMMAND_CANT_WAIT 2 -+ -+/* Return code from IssueDmaCommand.*/ -+#define ISSUE_COMMAND_OK 0 -+#define ISSUE_COMMAND_TRAPPED 1 -+#define ISSUE_COMMAND_RETRY 2 -+#define ISSUE_COMMAND_WAIT 3 -+ -+#ifdef __KERNEL__ -+ -+extern ELAN3_CTXT *elan3_alloc(ELAN3_DEV *dev, int kernel); -+extern void elan3_free (ELAN3_CTXT *ctxt); -+ -+extern int elan3_attach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+extern int elan3_doattach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+extern void elan3_detach (ELAN3_CTXT *ctxt); -+extern void elan3_dodetach (ELAN3_CTXT *ctxt); -+ -+extern int elan3_addvp (ELAN3_CTXT *ctxt, int process, ELAN_CAPABILITY *cap); -+extern int elan3_removevp (ELAN3_CTXT *ctxt, int process); -+extern int elan3_addbcastvp(ELAN3_CTXT *ctxt, int process, int base, int count); -+ -+extern int elan3_process (ELAN3_CTXT *ctxt); -+ -+extern int elan3_load_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits); -+extern int elan3_check_route(ELAN3_CTXT *ctxt, int process, E3_uint16 *flits, E3_uint32 *routeError); -+ -+extern int elan3_lwp (ELAN3_CTXT *ctxt); -+ -+extern void elan3_swapin (ELAN3_CTXT *ctxt, int reason); -+extern void elan3_swapout (ELAN3_CTXT *ctxt, int reason); -+extern int elan3_pagefault (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSave, int npages); -+extern void elan3_block_inputter (ELAN3_CTXT *ctxt, int block); -+ -+ -+extern E3_Addr elan3_init_thread (ELAN3_DEV *dev, E3_Addr fn, E3_Addr addr, sdramaddr_t stack, int stackSize, int nargs, ...); -+ -+extern void SetInputterState (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+extern void SetInputterStateForContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+extern void UnloadCommandPageMapping (ELAN3_CTXT *ctxt); -+extern void StartSwapoutContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+ -+extern int HandleExceptions (ELAN3_CTXT *ctxt, unsigned long *flags); -+extern int RestartContext (ELAN3_CTXT *ctxt, unsigned long *flags); -+extern int CheckCommandQueueFlushed (ELAN3_CTXT *ctxt, E3_uint32 cflags, int how, unsigned long *flags); -+extern int IssueCommand (ELAN3_CTXT *ctxt, unsigned cmdoff, E3_Addr value, int flags); -+extern int IssueDmaCommand (ELAN3_CTXT *ctxt, E3_Addr value, void *item, int flags); -+extern int WaitForDmaCommand (ELAN3_CTXT *ctxt, void *item, int flags); -+extern void FixupEventTrap (ELAN3_CTXT *ctxt, int proc, void *trap, E3_uint32 TrapType, -+ E3_FaultSave_BE *FaultSaveArea, int flags); -+extern int SimulateBlockCopy (ELAN3_CTXT *ctxt, E3_Addr EventAddress); -+extern void ReissueEvent (ELAN3_CTXT *ctxt, E3_Addr addr,int flags); -+extern int SetEventsNeedRestart (ELAN3_CTXT *ctxt); -+extern void RestartSetEvents (ELAN3_CTXT *ctxt); -+extern int RunEventType (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSaveArea, E3_uint32 EventType); -+extern void WakeupLwp (ELAN3_DEV *dev, void *arg); -+extern void QueueEventInterrupt (ELAN3_CTXT *ctxt, E3_uint32 cookie); -+extern int WaitForCommandPort (ELAN3_CTXT *ctxt); -+ -+extern int ElanException (ELAN3_CTXT *ctxt, int type, int proc, void *trap, ...); -+ -+/* context_osdep.c */ -+extern int LoadElanTranslation (ELAN3_CTXT *ctxt, E3_Addr elanAddr, int len, int protFault, int writeable); -+extern void LoadCommandPortTranslation (ELAN3_CTXT *ctxt); -+ -+#if defined(DIGITAL_UNIX) -+/* seg_elan.c */ -+extern caddr_t elan3_segelan3_create (ELAN3_CTXT *ctxt); -+extern void elan3_segelan3_destroy (ELAN3_CTXT *ctxt); -+extern int elan3_segelan3_map (ELAN3_CTXT *ctxt); -+extern void elan3_segelan3_unmap (ELAN3_CTXT *ctxt); -+ -+/* seg_elanmem.c */ -+extern int elan3_segelanmem_create (ELAN3_DEV *dev, unsigned object, unsigned off, vm_offset_t *addrp, int len); -+#endif /* defined(DIGITAL_UNIX) */ -+ -+/* route_table.c */ -+extern ELAN3_ROUTE_TABLE *AllocateRouteTable (ELAN3_DEV *dev, int size); -+extern void FreeRouteTable (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl); -+extern int LoadRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp, int ctxnum, int nflits, E3_uint16 *flits); -+extern int GetRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, E3_uint16 *flits); -+extern void InvalidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+extern void ValidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+extern void ClearRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+ -+extern int GenerateRoute (ELAN_POSITION *pos, E3_uint16 *flits, int lowid, int highid, int timeout, int highPri); -+extern int GenerateProbeRoute (E3_uint16 *flits, int nodeid, int level, int *linkup, int *linkdown, int adaptive); -+extern int GenerateCheckRoute (ELAN_POSITION *pos, E3_uint16 *flits, int level, int adaptive); -+ -+/* virtual_process.c */ -+extern ELAN_LOCATION ProcessToLocation (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, int process, ELAN_CAPABILITY *cap); -+extern int ResolveVirtualProcess (ELAN3_CTXT *ctxt, int process); -+extern caddr_t CapabilityString (ELAN_CAPABILITY *cap); -+extern void UnloadVirtualProcess (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+ -+extern int elan3_get_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits); -+extern int elan3_reset_route (ELAN3_CTXT *ctxt, int process); -+ -+/* cproc.c */ -+extern int NextCProcTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+extern void ResolveCProcTrap (ELAN3_CTXT *ctxt); -+extern int RestartCProcTrap (ELAN3_CTXT *ctxt); -+ -+/* iproc.c */ -+extern void InspectIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap); -+extern void ResolveIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvp); -+extern int RestartIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap); -+extern char *IProcTrapString (E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData *datap); -+extern void SimulateUnlockQueue (ELAN3_CTXT *ctxt, E3_Addr QueuePointer, int SentAck); -+ -+/* tproc.c */ -+extern int NextTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+extern void ResolveTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+extern int TProcNeedsRestart (ELAN3_CTXT *ctxt); -+extern void RestartTProcItems (ELAN3_CTXT *ctxt); -+extern E3_Addr SaveThreadToStack (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int SkipInstruction); -+extern void ReissueStackPointer (ELAN3_CTXT *ctxt, E3_Addr StackPointer); -+ -+/* tprocinsts.c */ -+extern int RollThreadToClose (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, E3_uint32 PAckVal); -+ -+/* tproc_osdep.c */ -+extern int ThreadSyscall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip); -+extern int ThreadElancall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip); -+ -+/* dproc.c */ -+extern int NextDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern void ResolveDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern int DProcNeedsRestart (ELAN3_CTXT *ctxt); -+extern void RestartDProcItems (ELAN3_CTXT *ctxt); -+extern void RestartDmaDesc (ELAN3_CTXT *ctxt, E3_DMA_BE *desc); -+extern void RestartDmaTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern void RestartDmaPtr (ELAN3_CTXT *ctxt, E3_Addr ptr); -+ -+/* network_error.c */ -+extern void InitialiseNetworkErrorResolver (void); -+extern void FinaliseNetworkErrorResolver (void); -+extern int QueueNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp); -+extern void FreeNetworkErrorResolver (NETERR_RESOLVER *rvp); -+extern void CancelNetworkErrorResolver (NETERR_RESOLVER *rvp); -+extern int ExecuteNetworkErrorFixup (NETERR_MSG *msg); -+extern void CompleteNetworkErrorFixup (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, int status); -+ -+extern int AddNeterrServerSyscall (int elanId, void *configp, void *addrp, char *namep); -+ -+/* eventcookie.c */ -+extern void cookie_init(void); -+extern void cookie_fini(void); -+extern EVENT_COOKIE_TABLE *cookie_alloc_table (unsigned long task, unsigned long handle); -+extern void cookie_free_table (EVENT_COOKIE_TABLE *tbl); -+extern int cookie_alloc_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_free_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_fire_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_wait_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_arm_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+ -+/* routecheck.c */ -+extern int elan3_route_check (ELAN3_CTXT *ctxt, E3_uint16 *flits, int destNode); -+extern int elan3_route_broadcast_check(ELAN3_CTXT *ctxt, E3_uint16 *flitsA, int lowNode, int highNode); -+ -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _ELAN3_ELANCTXT_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elandebug.h linux-2.6.9/include/elan3/elandebug.h ---- clean/include/elan3/elandebug.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elandebug.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANDEBUG_H -+#define _ELAN3_ELANDEBUG_H -+ -+#ident "$Id: elandebug.h,v 1.38 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandebug.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+extern u_int elan3_debug; -+extern u_int elan3_debug_console; -+extern u_int elan3_debug_buffer; -+extern u_int elan3_debug_ignore_dev; -+extern u_int elan3_debug_ignore_kcomm; -+extern u_int elan3_debug_ignore_ctxt; -+extern u_int elan3_debug_display_ctxt; -+ -+#define DBG_CONFIG 0x00000001 /* Module configuration */ -+#define DBG_HAT 0x00000002 -+#define DBG_FN 0x00000004 -+#define DBG_SEG 0x00000008 -+#define DBG_INTR 0x00000010 -+#define DBG_LWP 0x00000020 -+#define DBG_FAULT 0x00000040 -+#define DBG_EVENT 0x00000080 -+#define DBG_CPROC 0x00000100 -+#define DBG_TPROC 0x00000200 -+#define DBG_DPROC 0x00000400 -+#define DBG_IPROC 0x00000800 -+#define DBG_SWAP 0x00001000 -+#define DBG_CMD 0x00002000 -+#define DBG_VP 0x00004000 -+#define DBG_SYSCALL 0x00008000 -+#define DBG_BSCAN 0x00010000 -+#define DBG_LINKERR 0x00020000 -+#define DBG_NETERR 0x00040000 -+#define DBG_NETRPC 0x00080000 -+#define DBG_EVENTCOOKIE 0x00100000 -+#define DBG_SDRAM 0x00200000 -+ -+#define DBG_EP 0x10000000 -+#define DBG_EPCONSOLE 0x20000000 -+ -+#define DBG_EIP 0x40000000 -+#define DBG_EIPFAIL 0x80000000 -+ -+#define DBG_ALL 0xffffffff -+ -+/* values to pass as "ctxt" rather than a "ctxt" pointer */ -+#define DBG_DEVICE ((void *) 0) -+#define DBG_KCOMM ((void *) 1) -+#define DBG_ICS ((void *) 2) -+#define DBG_USER ((void *) 3) -+#define DBG_NTYPES 64 -+ -+#if defined(DEBUG_PRINTF) -+# define DBG(m,fn) ((elan3_debug&(m)) ? (void)(fn) : (void)0) -+# define PRINTF0(ctxt,m,fmt) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt) : (void)0) -+# define PRINTF1(ctxt,m,fmt,a) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a) : (void)0) -+# define PRINTF2(ctxt,m,fmt,a,b) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b) : (void)0) -+# define PRINTF3(ctxt,m,fmt,a,b,c) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c) : (void)0) -+# define PRINTF4(ctxt,m,fmt,a,b,c,d) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d) : (void)0) -+# define PRINTF5(ctxt,m,fmt,a,b,c,d,e) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d,e) : (void)0) -+# define PRINTF6(ctxt,m,fmt,a,b,c,d,e,f) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d,e,f) : (void)0) -+#ifdef __GNUC__ -+# define PRINTF(ctxt,m,args...) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m, ##args) : (void)0) -+#endif -+ -+#else -+ -+# define DBG(m, fn) do { ; } while (0) -+# define PRINTF0(ctxt,m,fmt) do { ; } while (0) -+# define PRINTF1(ctxt,m,fmt,a) do { ; } while (0) -+# define PRINTF2(ctxt,m,fmt,a,b) do { ; } while (0) -+# define PRINTF3(ctxt,m,fmt,a,b,c) do { ; } while (0) -+# define PRINTF4(ctxt,m,fmt,a,b,c,d) do { ; } while (0) -+# define PRINTF5(ctxt,m,fmt,a,b,c,d,e) do { ; } while (0) -+# define PRINTF6(ctxt,m,fmt,a,b,c,d,e,f) do { ; } while (0) -+#ifdef __GNUC__ -+# define PRINTF(ctxt,m,args...) do { ; } while (0) -+#endif -+ -+#endif /* DEBUG_PRINTF */ -+ -+#ifdef __GNUC__ -+extern void elan3_debugf (void *ctxt, unsigned int mode, char *fmt, ...) -+ __attribute__ ((format (printf,3,4))); -+#else -+extern void elan3_debugf (void *ctxt, unsigned int mode, char *fmt, ...); -+#endif -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ELAN3_ELANDEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elandev.h linux-2.6.9/include/elan3/elandev.h ---- clean/include/elan3/elandev.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elandev.h 2005-07-20 07:35:20.000000000 -0400 -@@ -0,0 +1,581 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANDEV_H -+#define __ELAN3_ELANDEV_H -+ -+#ident "$Id: elandev.h,v 1.76.2.1 2005/07/20 11:35:20 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandev.h,v $ */ -+ -+#include -+#include -+#include -+ -+#if defined(DIGITAL_UNIX) -+# include -+#elif defined(LINUX) -+# include -+#elif defined(SOLARIS) -+# include -+#endif -+ -+#ifndef TRUE -+# define TRUE 1 -+#endif -+#ifndef FALSE -+# define FALSE 0 -+#endif -+ -+/* -+ * Elan base address registers defined as follows : -+ */ -+#define ELAN3_BAR_SDRAM 0 -+#define ELAN3_BAR_COMMAND_PORT 1 -+#define ELAN3_BAR_REGISTERS 2 -+#define ELAN3_BAR_EBUS 3 -+ -+/* Macro to generate 'offset' to mmap "mem" device */ -+#define OFF_TO_SPACE(off) ((off) >> 28) -+#define OFF_TO_OFFSET(off) ((off) & 0x0FFFFFFF) -+#define GEN_OFF(space,off) (((space) << 28) | ((off) & 0x0FFFFFFF)) -+ -+#ifdef __KERNEL__ -+ -+/* -+ * Elan EBUS is configured as follows : -+ */ -+#define ELAN3_EBUS_ROM_OFFSET 0x000000 /* rom */ -+#define ELAN3_EBUS_INTPAL_OFFSET 0x180000 /* interrupt pal (write only) */ -+ -+#define ELAN3_EBUS_ROM_SIZE 0x100000 -+ -+/* -+ * Elan SDRAM is arranged as follows : -+ */ -+#define ELAN3_TANDQ_SIZE 0x0020000 /* Trap And Queue Size */ -+#define ELAN3_CONTEXT_SIZE 0x0010000 /* Context Table Size */ -+#define ELAN3_COMMAND_TRAP_SIZE 0x0010000 /* Command Port Trap Size */ -+ -+#ifdef MPSAS -+#define ELAN3_LN2_NUM_CONTEXTS 8 /* Support 256 contexts */ -+#else -+#define ELAN3_LN2_NUM_CONTEXTS 12 /* Support 4096 contexts */ -+#endif -+#define ELAN3_NUM_CONTEXTS (1 << ELAN3_LN2_NUM_CONTEXTS) /* Entries in context table */ -+ -+#define ELAN3_SDRAM_NUM_BANKS 4 /* Elan supports 4 Banks of Sdram */ -+#define ELAN3_SDRAM_BANK_SHIFT 26 /* each of which can be 64 mbytes ? */ -+#define ELAN3_SDRAM_BANK_SIZE (1 << ELAN3_SDRAM_BANK_SHIFT) -+ -+#define ELAN3_MAX_CACHE_SIZE (64 * 1024) /* Maximum cache size */ -+#define ELAN3_CACHE_SIZE (64 * 4 * E3_CACHELINE_SIZE) /* Elan3 has 8K cache */ -+ -+#ifndef offsetof -+#define offsetof(s, m) (size_t)(&(((s *)0)->m)) -+#endif -+ -+/* -+ * circular queue and macros to access members. -+ */ -+typedef struct -+{ -+ u_int q_back; /* Next free space */ -+ u_int q_front; /* First object to remove */ -+ u_int q_size; /* Size of queue */ -+ u_int q_count; /* Current number of entries */ -+ u_int q_slop; /* FULL <=> (count+slop) == size */ -+} ELAN3_QUEUE; -+ -+typedef struct -+{ -+ u_int q_back; /* Next free space */ -+ u_int q_middle; /* Middle pointer */ -+ u_int q_front; /* First object to remove */ -+ u_int q_size; /* Size of queue */ -+ u_int q_count; /* Current number of entries */ -+ u_int q_slop; /* FULL <=> (count+slop) == size */ -+} ELAN3_SPLIT_QUEUE; -+ -+#define ELAN3_QUEUE_INIT(q,num,slop) ((q).q_size = (num), (q).q_slop = (slop)+1, (q).q_front = (q).q_back = 0, (q).q_count = 0) -+#define ELAN3_QUEUE_FULL(q) ((q).q_count == ((q).q_size - (q).q_slop)) -+#define ELAN3_QUEUE_REALLY_FULL(q) ((q).q_count == (q).q_size - 1) -+#define ELAN3_QUEUE_EMPTY(q) ((q).q_count == 0) -+#define ELAN3_QUEUE_FRONT_EMPTY(q) ((q).q_front == (q).q_middle) -+#define ELAN3_QUEUE_BACK_EMPTY(q) ((q).q_middle == (q).q_back) -+#define ELAN3_QUEUE_ADD(q) ((q).q_back = ((q).q_back+1) % (q).q_size, (q).q_count++) -+#define ELAN3_QUEUE_REMOVE(q) ((q).q_front = ((q).q_front+1) % (q).q_size, (q).q_count--) -+#define ELAN3_QUEUE_ADD_FRONT(q) ((q).q_front = ((q).q_front-1) % (q).q_size, (q).q_count++) -+#define ELAN3_QUEUE_CONSUME(q) ((q).q_middle = ((q).q_middle+1) % (q).q_size) -+#define ELAN3_QUEUE_FRONT(q,qArea) (&(qArea)[(q).q_front]) -+#define ELAN3_QUEUE_MIDDLE(q,qArea) (&(qArea)[(q).q_middle]) -+#define ELAN3_QUEUE_BACK(q,qArea) (&(qArea)[(q).q_back]) -+ -+#define SDRAM_MIN_BLOCK_SHIFT 10 -+#define SDRAM_NUM_FREE_LISTS 17 /* allows max 64Mb block */ -+#define SDRAM_MIN_BLOCK_SIZE (1 << SDRAM_MIN_BLOCK_SHIFT) -+#define SDRAM_MAX_BLOCK_SIZE (SDRAM_MIN_BLOCK_SIZE << (SDRAM_NUM_FREE_LISTS-1)) -+#define SDRAM_FREELIST_TRIGGER 32 -+ -+typedef struct elan3_sdram_bank -+{ -+ u_int Size; /* Size of bank of memory */ -+ -+ ioaddr_t Mapping; /* Where mapped in the kernel */ -+ DeviceMappingHandle Handle; /* and mapping handle */ -+ -+ struct elan3_ptbl_gr **PtblGroups; -+ -+ bitmap_t *Bitmaps[SDRAM_NUM_FREE_LISTS]; -+} ELAN3_SDRAM_BANK; -+ -+typedef struct elan3_haltop -+{ -+ struct elan3_haltop *Next; /* Chain to next in list. */ -+ E3_uint32 Mask; /* Interrupt mask to see before calling function */ -+ -+ void (*Function)(void *, void *); /* Function to call */ -+ void *Arguement; /* Arguement to pass to function */ -+} ELAN3_HALTOP; -+ -+#define HALTOP_BATCH 32 -+ -+#endif /* __KERNEL__ */ -+ -+typedef struct elan3_stats -+{ -+ u_long Version; /* version field */ -+ u_long Interrupts; /* count of elan interrupts */ -+ u_long TlbFlushes; /* count of tlb flushes */ -+ u_long InvalidContext; /* count of traps with invalid context */ -+ u_long ComQueueHalfFull; /* count of interrupts due to com queue being half full */ -+ -+ u_long CProcTraps; /* count of cproc traps */ -+ u_long DProcTraps; /* count of dproc traps */ -+ u_long TProcTraps; /* cound of tproc traps */ -+ u_long IProcTraps; /* count of iproc traps */ -+ u_long EventInterrupts; /* count of event interrupts */ -+ -+ u_long PageFaults; /* count of elan page faults */ -+ -+ /* inputter related */ -+ u_long EopBadAcks; /* count of EOP_BAD_ACKs */ -+ u_long EopResets; /* count of EOP_ERROR_RESET */ -+ u_long InputterBadLength; /* count of BadLength */ -+ u_long InputterCRCDiscards; /* count of CRC_STATUS_DISCARD */ -+ u_long InputterCRCErrors; /* count of CRC_STATUS_ERROR */ -+ u_long InputterCRCBad; /* count of CRC_STATUS_BAD */ -+ u_long DmaNetworkErrors; /* count of errors in dma data */ -+ u_long DmaIdentifyNetworkErrors; /* count of errors after dma identify */ -+ u_long ThreadIdentifyNetworkErrors; /* count of errors after thread identify */ -+ -+ /* dma related */ -+ u_long DmaRetries; /* count of dma retries (due to retry fail count) */ -+ u_long DmaOutputTimeouts; /* count of dma output timeouts */ -+ u_long DmaPacketAckErrors; /* count of dma packet ack errors */ -+ -+ /* thread related */ -+ u_long ForcedTProcTraps; /* count of forced tproc traps */ -+ u_long TrapForTooManyInsts; /* count of too many instruction traps */ -+ u_long ThreadOutputTimeouts; /* count of thread output timeouts */ -+ u_long ThreadPacketAckErrors; /* count of thread packet ack errors */ -+ -+ /* link related */ -+ u_long LockError; /* count of RegPtr->Exts.LinkErrorTypes:LS_LockError */ -+ u_long DeskewError; /* count of RegPtr->Exts.LinkErrorTypes:LS_DeskewError */ -+ u_long PhaseError; /* count of RegPtr->Exts.LinkErrorTypes:LS_PhaseError */ -+ u_long DataError; /* count of RegPtr->Exts.LinkErrorTypes:LS_DataError */ -+ u_long FifoOvFlow0; /* count of RegPtr->Exts.LinkErrorTypes:LS_FifoOvFlow0 */ -+ u_long FifoOvFlow1; /* count of RegPtr->Exts.LinkErrorTypes:LS_FifoOvFlow1 */ -+ u_long LinkErrorValue; /* link error value on data error */ -+ -+ /* memory related */ -+ u_long CorrectableErrors; /* count of correctable ecc errors */ -+ u_long UncorrectableErrors; /* count of uncorrectable ecc errors */ -+ u_long MultipleErrors; /* count of multiple ecc errors */ -+ u_long SdramBytesFree; /* count of sdram bytes free */ -+ -+ /* Interrupt related */ -+ u_long LongestInterrupt; /* length of longest interrupt in ticks */ -+ -+ u_long EventPunts; /* count of punts of event interrupts to thread */ -+ u_long EventRescheds; /* count of reschedules of event interrupt thread */ -+} ELAN3_STATS; -+ -+#define ELAN3_STATS_VERSION (ulong)2 -+#define ELAN3_NUM_STATS (sizeof (ELAN3_STATS)/sizeof (u_long)) -+ -+#define ELAN3_STATS_DEV_FMT "elan3_stats_dev_%d" -+ -+#ifdef __KERNEL__ -+ -+#define BumpStat(dev,stat) ((dev)->Stats.stat++) -+ -+typedef struct elan3_level_ptbl_block -+{ -+ spinlock_t PtblLock; /* Page table freelist lock */ -+ int PtblTotal; /* Count of level N page tables allocated */ -+ int PtblFreeCount; /* Count of free level N page tables */ -+ struct elan3_ptbl *PtblFreeList; /* Free level N page tables */ -+ struct elan3_ptbl_gr *PtblGroupList; /* List of Groups of level N page tables */ -+} ELAN3_LEVEL_PTBL_BLOCK; -+ -+typedef struct elan3_dev -+{ -+ ELAN3_DEV_OSDEP Osdep; /* OS specific entries */ -+ int Instance; /* Device number */ -+ ELAN_DEVINFO Devinfo; -+ ELAN_POSITION Position; /* position in switch network (for user code) */ -+ ELAN_DEV_IDX DeviceIdx; /* device index registered with elanmod */ -+ -+ int ThreadsShouldStop; /* flag that kernel threads should stop */ -+ -+ spinlock_t IntrLock; -+ spinlock_t TlbLock; -+ spinlock_t CProcLock; -+ kcondvar_t IntrWait; /* place event interrupt thread sleeps */ -+ unsigned EventInterruptThreadStarted:1; /* event interrupt thread started */ -+ unsigned EventInterruptThreadStopped:1; /* event interrupt thread stopped */ -+ -+ DeviceMappingHandle RegHandle; /* DDI Handle */ -+ ioaddr_t RegPtr; /* Elan Registers */ -+ -+ volatile E3_uint32 InterruptMask; /* copy of RegPtr->InterruptMask */ -+ volatile E3_uint32 Event_Int_Queue_FPtr; /* copy of RegPtr->Event_Int_Queue_FPtr */ -+ volatile E3_uint32 SchCntReg; /* copy of RegPtr->SchCntReg */ -+ volatile E3_uint32 Cache_Control_Reg; /* true value for RegPtr->Cache_Control_Reg */ -+ -+ ELAN3_SDRAM_BANK SdramBanks[ELAN3_SDRAM_NUM_BANKS]; /* Elan sdram banks */ -+ spinlock_t SdramLock; /* Sdram allocator */ -+ sdramaddr_t SdramFreeLists[SDRAM_NUM_FREE_LISTS]; -+ unsigned SdramFreeCounts[SDRAM_NUM_FREE_LISTS]; -+ -+ sdramaddr_t TAndQBase; /* Trap and Queue area */ -+ sdramaddr_t ContextTable; /* Elan Context Table */ -+ u_int ContextTableSize; /* # entries in context table */ -+ -+ struct elan3_ctxt **CtxtTable; /* array of ctxt pointers or nulls */ -+ -+ sdramaddr_t CommandPortTraps[2]; /* Command port trap overflow */ -+ int CurrentCommandPortTrap; /* Which overflow queue we're using */ -+ -+ u_int HaltAllCount; /* Count of reasons to halt context 0 queues */ -+ u_int HaltNonContext0Count; /* Count of reasons to halt non-context 0 queues */ -+ u_int HaltDmaDequeueCount; /* Count of reasons to halt dma from dequeuing */ -+ u_int HaltThreadCount; /* Count of reasons to halt the thread processor */ -+ u_int FlushCommandCount; /* Count of reasons to flush command queues */ -+ u_int DiscardAllCount; /* Count of reasons to discard context 0 */ -+ u_int DiscardNonContext0Count; /* Count of reasons to discard non context 0 */ -+ -+ struct thread_trap *ThreadTrap; /* Thread Processor trap space */ -+ struct dma_trap *DmaTrap; /* DMA Processor trap space */ -+ -+ spinlock_t FreeHaltLock; /* Lock for haltop free list */ -+ ELAN3_HALTOP *FreeHaltOperations; /* Free list of haltops */ -+ u_int NumHaltOperations; /* Number of haltops allocated */ -+ u_int ReservedHaltOperations; /* Number of haltops reserved */ -+ -+ ELAN3_HALTOP *HaltOperations; /* List of operations to call */ -+ ELAN3_HALTOP **HaltOperationsTailpp; /* Pointer to last "next" pointer in list */ -+ E3_uint32 HaltOperationsMask; /* Or of all bits in list of operations */ -+ -+ physaddr_t SdramPhysBase; /* Physical address of SDRAM */ -+ physaddr_t SdramPhysMask; /* and mask of significant bits */ -+ -+ physaddr_t PciPhysBase; /* physical address of local PCI segment */ -+ physaddr_t PciPhysMask; /* and mask of significant bits */ -+ -+ long ErrorTime; /* lbolt at last error (link,ecc etc) */ -+ long ErrorsPerTick; /* count of errors for this tick */ -+ timer_fn_t ErrorTimeoutId; /* id of timeout when errors masked out */ -+ timer_fn_t DmaPollTimeoutId; /* id of timeout to poll for "bad" dmas */ -+ int FilterHaltQueued; -+ -+ /* -+ * HAT layer specific entries. -+ */ -+ ELAN3_LEVEL_PTBL_BLOCK Level[4]; -+ spinlock_t PtblGroupLock; /* Lock for Page Table group lists */ -+ struct elan3_ptbl_gr *Level3PtblGroupHand; /* Hand for ptbl stealing */ -+ -+ /* -+ * Per-Context Information structures. -+ */ -+ struct elan3_info *Infos; /* List of "infos" for this device */ -+ -+ char LinkShutdown; /* link forced into reset by panic/shutdown/dump */ -+ -+ /* -+ * Device statistics. -+ */ -+ ELAN3_STATS Stats; -+ ELAN_STATS_IDX StatsIndex; -+ -+ struct { -+ E3_Regs *RegPtr; -+ char *Sdram[ELAN3_SDRAM_NUM_BANKS]; -+ } PanicState; -+} ELAN3_DEV; -+ -+#define ELAN3_DEV_CTX_TABLE(dev,ctxtn) ( (dev)->CtxtTable[ (ctxtn) & MAX_ROOT_CONTEXT_MASK] ) -+ -+/* macros for accessing dev->RegPtr.Tags/Sets. */ -+#define write_cache_tag(dev,what,val) writeq (val, (void *) (dev->RegPtr + offsetof (E3_Regs, Tags.what))) -+#define read_cache_tag(dev,what) readq ((void *) (dev->RegPtr + offsetof (E3_Regs, Tags.what))) -+#define write_cache_set(dev,what,val) writeq (val, (void *) (dev->RegPtr + offsetof (E3_Regs, Sets.what))) -+#define read_cache_set(dev,what) readq ((void *) (dev->RegPtr + offsetof (E3_Regs, Sets.what))) -+ -+/* macros for accessing dev->RegPtr.Regs. */ -+#define write_reg64(dev,what,val) writeq (val, (void *) (dev->RegPtr + offsetof (E3_Regs, Regs.what))) -+#define write_reg32(dev,what,val) writel (val, (void *) (dev->RegPtr + offsetof (E3_Regs, Regs.what))) -+#define read_reg64(dev,what) readq ((void *) (dev->RegPtr + offsetof (E3_Regs, Regs.what))) -+#define read_reg32(dev,what) readl ((void *) (dev->RegPtr + offsetof (E3_Regs, Regs.what))) -+ -+/* macros for accessing dev->RegPtr.uRegs. */ -+#define write_ureg64(dev,what,val) writeq (val, (void *) (dev->RegPtr + offsetof (E3_Regs, URegs.what))) -+#define write_ureg32(dev,what,val) writel (val, (void *) (dev->RegPtr + offsetof (E3_Regs, URegs.what))) -+#define read_ureg64(dev,what) readq ((void *) (dev->RegPtr + offsetof (E3_Regs, URegs.what))) -+#define read_ureg32(dev,what) readl ((void *) (dev->RegPtr + offsetof (E3_Regs, URegs.what))) -+ -+/* macros for accessing dma descriptor/thread regs */ -+#define copy_dma_regs(dev, desc) \ -+MACRO_BEGIN \ -+ register int i; \ -+ for (i = 0; i < sizeof (E3_DMA)/sizeof(E3_uint64); i++) \ -+ ((E3_uint64 *) desc)[i] = readq ((void *)(dev->RegPtr + offsetof (E3_Regs, Regs.Dma_Desc) + i*sizeof (E3_uint64))); \ -+MACRO_END -+ -+#define copy_thread_regs(dev, regs) \ -+MACRO_BEGIN \ -+ register int i; \ -+ for (i = 0; i < (32*sizeof (E3_uint32))/sizeof(E3_uint64); i++) \ -+ ((E3_uint64 *) regs)[i] = readq ((void *)(dev->RegPtr + offsetof (E3_Regs, Regs.Globals[0]) + i*sizeof (E3_uint64))); \ -+MACRO_END -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ _E3_DataBusMap::Exts _E3_DataBusMap::Input_Context_Fil_Flush -+ elan3_dev::CurrentCommandPortTrap elan3_dev::HaltAllCount elan3_dev::HaltDmaDequeueCount -+ elan3_dev::FlushCommandCount elan3_dev::DiscardAllCount elan3_dev::DiscardNonContext0Count -+ elan3_dev::HaltOperations elan3_dev::HaltOperationsMask)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::TlbLock, -+ _E3_DataBusMap::Cache_Control_Reg)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::InfoLock, -+ elan3_dev::Infos elan3_dev::InfoTable)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::FreeHaltLock, -+ elan3_dev::FreeHaltOperations elan3_dev::NumHaltOperations elan3_dev::ReservedHaltOperations)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::PageFreeListLock, -+ elan3_dev::PageFreeList elan3_dev::PageFreeListSize)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level1PtblLock, -+ elan3_dev::Level1PtblTotal elan3_dev::Level1PtblFreeCount elan3_dev::Level1PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level2PtblLock, -+ elan3_dev::Level2PtblTotal elan3_dev::Level2PtblFreeCount elan3_dev::Level2PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level3PtblLock, -+ elan3_dev::Level3PtblTotal elan3_dev::Level3PtblFreeCount elan3_dev::Level3PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::PtblGroupLock, -+ elan3_dev::Level1PtblGroupList elan3_dev::Level2PtblGroupList elan3_dev::Level3PtblGroupList)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_dev::InfoTable elan3_dev::Level1PtblFreeList -+ elan3_dev::Level2PtblFreeList elan3_dev::Level3PtblFreeList)) -+ -+_NOTE(LOCK_ORDER(elan3_dev::InfoLock elan3_dev::IntrLock)) -+_NOTE(LOCK_ORDER(as::a_lock elan3_dev::InfoLock)) -+_NOTE(LOCK_ORDER(as::a_lock elan3_dev::IntrLock)) -+ -+#define SET_INT_MASK(dev,Mask) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev)->InterruptMask = (Mask))); mmiob(); MACRO_END -+#define ENABLE_INT_MASK(dev, bits) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev->InterruptMask |= (bits)))); mmiob(); MACRO_END -+#define DISABLE_INT_MASK(dev, bits) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev->InterruptMask &= ~(bits)))); mmiob(); MACRO_END -+ -+#define INIT_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ (dev)->SchCntReg = (val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define SET_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ ASSERT (((val) & HaltStopAndExtTestMask) == (val)); \ -+ (dev)->SchCntReg |= (val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob (); \ -+MACRO_END -+ -+#define CLEAR_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ ASSERT (((val) & HaltStopAndExtTestMask) == (val)); \ -+ (dev)->SchCntReg &= ~(val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define MODIFY_SCHED_STATUS(dev, SetBits, ClearBits) \ -+MACRO_BEGIN \ -+ ASSERT ((((SetBits)|(ClearBits)) & HaltStopAndExtTestMask) == ((SetBits)|(ClearBits))); \ -+ (dev)->SchCntReg = (((dev)->SchCntReg | (SetBits)) & ~(ClearBits)); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define PULSE_SCHED_STATUS(dev, RestartBits) \ -+MACRO_BEGIN \ -+ ASSERT (((RestartBits) & HaltStopAndExtTestMask) == 0); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg | (RestartBits)); \ -+ mmiob(); \ -+MACRO_END -+ -+#define SET_SCHED_LINK_VALUE(dev, enabled, val) \ -+MACRO_BEGIN \ -+ (dev)->SchCntReg = (((dev)->SchCntReg & HaltAndStopMask) | ((enabled) ? LinkBoundaryScan : 0) | LinkSetValue(val, 0)); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#ifdef DEBUG_ASSERT -+# define ELAN3_ASSERT(dev, EX) ((void)((EX) || elan3_assfail(dev, #EX, __FILE__, __LINE__))) -+#else -+# define ELAN3_ASSERT(dev, EX) -+#endif -+ -+/* elandev_generic.c */ -+extern int InitialiseElan (ELAN3_DEV *dev, ioaddr_t CmdPort); -+extern void FinaliseElan (ELAN3_DEV *dev); -+extern int InterruptHandler (ELAN3_DEV *dev); -+extern void PollForDmaHungup (void *arg); -+ -+extern int SetLinkBoundaryScan (ELAN3_DEV *dev); -+extern void ClearLinkBoundaryScan (ELAN3_DEV *dev); -+extern int WriteBoundaryScanValue (ELAN3_DEV *dev, int value); -+extern int ReadBoundaryScanValue(ELAN3_DEV *dev, int link); -+ -+extern int ReadVitalProductData (ELAN3_DEV *dev, int *CasLatency); -+ -+extern struct elan3_ptbl_gr *ElanGetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset); -+extern void ElanSetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset, struct elan3_ptbl_gr *ptg); -+ -+extern void ElanFlushTlb (ELAN3_DEV *dev); -+ -+extern void SetSchedStatusRegister (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp); -+extern void FreeHaltOperation (ELAN3_DEV *dev, ELAN3_HALTOP *op); -+extern int ReserveHaltOperations (ELAN3_DEV *dev, int count, int cansleep); -+extern void ReleaseHaltOperations (ELAN3_DEV *dev, int count); -+extern void ProcessHaltOperations (ELAN3_DEV *dev, E3_uint32 Pend); -+extern void QueueHaltOperation (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp, -+ E3_uint32 ReqMask, void (*Function)(ELAN3_DEV *, void *), void *Arguement); -+ -+extern int ComputePosition (ELAN_POSITION *pos, unsigned NodeId, unsigned NumNodes, unsigned numDownLinksVal); -+ -+extern caddr_t MiToName (int mi); -+extern void ElanBusError (ELAN3_DEV *dev); -+ -+extern void TriggerLsa (ELAN3_DEV *dev); -+ -+extern ELAN3_DEV *elan3_device (int instance); -+extern int DeviceRegisterSize (ELAN3_DEV *dev, int rnumber, int *sizep); -+extern int MapDeviceRegister (ELAN3_DEV *dev, int rnumber, ioaddr_t *addrp, int offset, -+ int len, DeviceMappingHandle *handlep); -+extern void UnmapDeviceRegister (ELAN3_DEV *dev, DeviceMappingHandle *handlep); -+ -+ -+/* sdram.c */ -+/* sdram accessing functions - define 4 different types for 8,16,32,64 bit accesses */ -+extern unsigned char elan3_sdram_readb (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned short elan3_sdram_readw (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned int elan3_sdram_readl (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned long long elan3_sdram_readq (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern void elan3_sdram_writeb (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned char val); -+extern void elan3_sdram_writew (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned short val); -+extern void elan3_sdram_writel (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned int val); -+extern void elan3_sdram_writeq (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned long long val); -+ -+extern void elan3_sdram_zerob_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zerow_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zerol_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zeroq_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+ -+extern void elan3_sdram_copyb_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyw_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyl_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyq_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyb_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyw_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyl_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyq_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+ -+extern void elan3_sdram_init (ELAN3_DEV *dev); -+extern void elan3_sdram_fini (ELAN3_DEV *dev); -+extern void elan3_sdram_add (ELAN3_DEV *dev, sdramaddr_t base, sdramaddr_t top); -+extern sdramaddr_t elan3_sdram_alloc (ELAN3_DEV *dev, int nbytes); -+extern void elan3_sdram_free (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern physaddr_t elan3_sdram_to_phys (ELAN3_DEV *dev, sdramaddr_t addr); -+ -+/* cproc.c */ -+extern void HandleCProcTrap (ELAN3_DEV *dev, E3_uint32 Pend, E3_uint32 *Mask); -+ -+/* iproc.c */ -+extern void HandleIProcTrap (ELAN3_DEV *dev, int Channel, E3_uint32 Pend, sdramaddr_t FaultSaveOff, -+ sdramaddr_t TransactionsOff, sdramaddr_t DataOff); -+ -+/* tproc.c */ -+extern int HandleTProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits); -+extern void DeliverTProcTrap (ELAN3_DEV *dev, struct thread_trap *threadTrap, E3_uint32 Pend); -+ -+/* dproc.c */ -+extern int HandleDProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits); -+extern void DeliverDProcTrap (ELAN3_DEV *dev, struct dma_trap *dmaTrap, E3_uint32 Pend); -+ -+#if defined(LINUX) -+/* procfs_linux.h */ -+extern struct proc_dir_entry *elan3_procfs_root; -+extern struct proc_dir_entry *elan3_config_root; -+ -+extern void elan3_procfs_init(void); -+extern void elan3_procfs_fini(void); -+extern void elan3_procfs_device_init (ELAN3_DEV *dev); -+extern void elan3_procfs_device_fini (ELAN3_DEV *dev); -+#endif /* defined(LINUX) */ -+ -+/* elan3_osdep.c */ -+extern int BackToBackMaster; -+extern int BackToBackSlave; -+ -+#define ELAN_REG_REC_MAX (100) -+#define ELAN_REG_REC(REG) { \ -+elan_reg_rec_file [elan_reg_rec_index] = __FILE__; \ -+elan_reg_rec_line [elan_reg_rec_index] = __LINE__; \ -+elan_reg_rec_reg [elan_reg_rec_index] = REG; \ -+elan_reg_rec_cpu [elan_reg_rec_index] = smp_processor_id(); \ -+elan_reg_rec_lbolt[elan_reg_rec_index] = lbolt; \ -+elan_reg_rec_index = ((elan_reg_rec_index+1) % ELAN_REG_REC_MAX);} -+ -+extern char * elan_reg_rec_file [ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_line [ELAN_REG_REC_MAX]; -+extern long elan_reg_rec_lbolt[ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_cpu [ELAN_REG_REC_MAX]; -+extern E3_uint32 elan_reg_rec_reg [ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_index; -+ -+#endif /* __KERNEL__ */ -+ -+ -+#define ELAN3_PROCFS_ROOT "/proc/qsnet/elan3" -+#define ELAN3_PROCFS_VERSION "/proc/qsnet/elan3/version" -+#define ELAN3_PROCFS_DEBUG "/proc/qsnet/elan3/config/elandebug" -+#define ELAN3_PROCFS_DEBUG_CONSOLE "/proc/qsnet/elan3/config/elandebug_console" -+#define ELAN3_PROCFS_DEBUG_BUFFER "/proc/qsnet/elan3/config/elandebug_buffer" -+#define ELAN3_PROCFS_MMU_DEBUG "/proc/qsnet/elan3/config/elan3mmu_debug" -+#define ELAN3_PROCFS_PUNT_LOOPS "/proc/qsnet/elan3/config/eventint_punt_loops" -+ -+#define ELAN3_PROCFS_DEVICE_STATS_FMT "/proc/qsnet/elan3/device%d/stats" -+#define ELAN3_PROCFS_DEVICE_POSITION_FMT "/proc/qsnet/elan3/device%d/position" -+#define ELAN3_PROCFS_DEVICE_NODESET_FMT "/proc/qsnet/elan3/device%d/nodeset" -+ -+#endif /* __ELAN3_ELANDEV_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elandev_linux.h linux-2.6.9/include/elan3/elandev_linux.h ---- clean/include/elan3/elandev_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elandev_linux.h 2005-04-05 11:28:37.000000000 -0400 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELANDEV_LINUX_H -+#define __ELANDEV_LINUX_H -+ -+#ident "$Id: elandev_linux.h,v 1.14 2005/04/05 15:28:37 robin Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandev_linux.h,v $*/ -+ -+#ifdef __KERNEL__ -+#include -+#include -+#include -+ -+#include -+ -+#if !defined(NO_COPROC) /* The older coproc kernel patch is applied */ -+#include -+ -+#define ioproc_ops coproc_ops_struct -+#define ioproc_register_ops register_coproc_ops -+#define ioproc_unregister_ops unregister_coproc_ops -+ -+#define IOPROC_MM_STRUCT_ARG 1 -+#define IOPROC_PATCH_APPLIED 1 -+ -+#elif !defined(NO_IOPROC) /* The new ioproc kernel patch is applied */ -+#include -+ -+#define IOPROC_PATCH_APPLIED 1 -+#endif -+#endif -+ -+#define ELAN3_MAJOR 60 -+#define ELAN3_NAME "elan3" -+#define ELAN3_MAX_CONTROLLER 16 /* limited to 4 bits */ -+ -+#define ELAN3_MINOR_DEVNUM(m) ((m) & 0x0f) /* card number */ -+#define ELAN3_MINOR_DEVFUN(m) (((m) >> 4) & 0x0f) /* function */ -+#define ELAN3_MINOR_CONTROL 0 /* function values */ -+#define ELAN3_MINOR_MEM 1 -+#define ELAN3_MINOR_USER 2 -+ -+typedef void *DeviceMappingHandle; -+ -+/* task and ctxt handle types */ -+typedef struct mm_struct *TaskHandle; -+typedef int CtxtHandle; -+ -+#define ELAN3_MY_TASK_HANDLE() (current->mm) -+#define KERNEL_TASK_HANDLE() (get_kern_mm()) -+ -+/* -+ * OS-dependent component of ELAN3_DEV struct. -+ */ -+typedef struct elan3_dev_osdep -+{ -+ struct pci_dev *pci; /* PCI config data */ -+ int ControlDeviceOpen; /* flag to indicate control */ -+ /* device open */ -+ struct proc_dir_entry *procdir; -+} ELAN3_DEV_OSDEP; -+ -+#endif /* __ELANDEV_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elanio.h linux-2.6.9/include/elan3/elanio.h ---- clean/include/elan3/elanio.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elanio.h 2003-12-08 10:40:26.000000000 -0500 -@@ -0,0 +1,226 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELAN3IO_H -+#define __ELAN3_ELAN3IO_H -+ -+#ident "$Id: elanio.h,v 1.19 2003/12/08 15:40:26 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanio.h,v $*/ -+ -+#define ELAN3IO_CONTROL_PATHNAME "/dev/elan3/control%d" -+#define ELAN3IO_MEM_PATHNAME "/dev/elan3/mem%d" -+#define ELAN3IO_USER_PATHNAME "/dev/elan3/user%d" -+#define ELAN3IO_SDRAM_PATHNAME "/dev/elan3/sdram%d" -+#define ELAN3IO_MAX_PATHNAMELEN 32 -+ -+/* ioctls on /dev/elan3/control */ -+#define ELAN3IO_CONTROL_BASE 0 -+ -+#define ELAN3IO_SET_BOUNDARY_SCAN _IO ('e', ELAN3IO_CONTROL_BASE + 0) -+#define ELAN3IO_CLEAR_BOUNDARY_SCAN _IO ('e', ELAN3IO_CONTROL_BASE + 1) -+#define ELAN3IO_READ_LINKVAL _IOWR ('e', ELAN3IO_CONTROL_BASE + 2, E3_uint32) -+#define ELAN3IO_WRITE_LINKVAL _IOWR ('e', ELAN3IO_CONTROL_BASE + 3, E3_uint32) -+ -+typedef struct elanio_set_debug_struct -+{ -+ char what[32]; -+ u_long value; -+} ELAN3IO_SET_DEBUG_STRUCT; -+#define ELAN3IO_SET_DEBUG _IOW ('e', ELAN3IO_CONTROL_BASE + 4, ELAN3IO_SET_DEBUG_STRUCT) -+ -+typedef struct elanio_debug_buffer_struct -+{ -+ caddr_t addr; -+ size_t len; -+} ELAN3IO_DEBUG_BUFFER_STRUCT; -+#define ELAN3IO_DEBUG_BUFFER _IOWR ('e', ELAN3IO_CONTROL_BASE + 5, ELAN3IO_DEBUG_BUFFER_STRUCT) -+ -+typedef struct elanio_neterr_server_struct -+{ -+ u_int elanid; -+ void *addr; -+ char *name; -+} ELAN3IO_NETERR_SERVER_STRUCT; -+#define ELAN3IO_NETERR_SERVER _IOW ('e', ELAN3IO_CONTROL_BASE + 6, ELAN3IO_NETERR_SERVER_STRUCT) -+#define ELAN3IO_NETERR_FIXUP _IOWR ('e', ELAN3IO_CONTROL_BASE + 7, NETERR_MSG) -+ -+typedef struct elanio_set_position_struct -+{ -+ u_int device; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} ELAN3IO_SET_POSITION_STRUCT; -+#define ELAN3IO_SET_POSITION _IOW ('e', ELAN3IO_CONTROL_BASE + 8, ELAN3IO_SET_POSITION_STRUCT) -+ -+#if defined(LINUX) -+ -+/* ioctls on /dev/elan3/sdram */ -+#define ELAN3IO_SDRAM_BASE 20 -+ -+/* ioctls on /dev/elan3/user */ -+#define ELAN3IO_USER_BASE 30 -+ -+#define ELAN3IO_FREE _IO ('e', ELAN3IO_USER_BASE + 0) -+ -+#define ELAN3IO_ATTACH _IOWR('e', ELAN3IO_USER_BASE + 1, ELAN_CAPABILITY) -+#define ELAN3IO_DETACH _IO ('e', ELAN3IO_USER_BASE + 2) -+ -+typedef struct elanio_addvp_struct -+{ -+ u_int process; -+ ELAN_CAPABILITY capability; -+} ELAN3IO_ADDVP_STRUCT; -+#define ELAN3IO_ADDVP _IOWR('e', ELAN3IO_USER_BASE + 3, ELAN3IO_ADDVP_STRUCT) -+#define ELAN3IO_REMOVEVP _IOW ('e', ELAN3IO_USER_BASE + 4, int) -+ -+typedef struct elanio_bcastvp_struct -+{ -+ u_int process; -+ u_int lowvp; -+ u_int highvp; -+} ELAN3IO_BCASTVP_STRUCT; -+#define ELAN3IO_BCASTVP _IOW ('e', ELAN3IO_USER_BASE + 5, ELAN3IO_BCASTVP_STRUCT) -+ -+typedef struct elanio_loadroute_struct -+{ -+ u_int process; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_LOAD_ROUTE_STRUCT; -+#define ELAN3IO_LOAD_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 6, ELAN3IO_LOAD_ROUTE_STRUCT) -+ -+#define ELAN3IO_PROCESS _IO ('e', ELAN3IO_USER_BASE + 7) -+ -+typedef struct elanio_setperm_struct -+{ -+ caddr_t maddr; -+ E3_Addr eaddr; -+ size_t len; -+ int perm; -+} ELAN3IO_SETPERM_STRUCT; -+#define ELAN3IO_SETPERM _IOW ('e', ELAN3IO_USER_BASE + 8, ELAN3IO_SETPERM_STRUCT) -+ -+typedef struct elanio_clearperm_struct -+{ -+ E3_Addr eaddr; -+ size_t len; -+} ELAN3IO_CLEARPERM_STRUCT; -+#define ELAN3IO_CLEARPERM _IOW ('e', ELAN3IO_USER_BASE + 9, ELAN3IO_CLEARPERM_STRUCT) -+ -+typedef struct elanio_changeperm_struct -+{ -+ E3_Addr eaddr; -+ size_t len; -+ int perm; -+} ELAN3IO_CHANGEPERM_STRUCT; -+#define ELAN3IO_CHANGEPERM _IOW ('e', ELAN3IO_USER_BASE + 10, ELAN3IO_CHANGEPERM_STRUCT) -+ -+ -+#define ELAN3IO_HELPER_THREAD _IO ('e', ELAN3IO_USER_BASE + 11) -+#define ELAN3IO_WAITCOMMAND _IO ('e', ELAN3IO_USER_BASE + 12) -+#define ELAN3IO_BLOCK_INPUTTER _IOW ('e', ELAN3IO_USER_BASE + 13, int) -+#define ELAN3IO_SET_FLAGS _IOW ('e', ELAN3IO_USER_BASE + 14, int) -+ -+#define ELAN3IO_WAITEVENT _IOW ('e', ELAN3IO_USER_BASE + 15, E3_Event) -+#define ELAN3IO_ALLOC_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 16, EVENT_COOKIE) -+#define ELAN3IO_FREE_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 17, EVENT_COOKIE) -+#define ELAN3IO_ARM_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 18, EVENT_COOKIE) -+#define ELAN3IO_WAIT_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 19, EVENT_COOKIE) -+ -+#define ELAN3IO_SWAPSPACE _IOW ('e', ELAN3IO_USER_BASE + 20, SYS_SWAP_SPACE) -+#define ELAN3IO_EXCEPTION_SPACE _IOW ('e', ELAN3IO_USER_BASE + 21, SYS_EXCEPTION_SPACE) -+#define ELAN3IO_GET_EXCEPTION _IOR ('e', ELAN3IO_USER_BASE + 22, SYS_EXCEPTION) -+ -+typedef struct elanio_unload_struct -+{ -+ void *addr; -+ size_t len; -+} ELAN3IO_UNLOAD_STRUCT; -+#define ELAN3IO_UNLOAD _IOW ('e', ELAN3IO_USER_BASE + 23, ELAN3IO_UNLOAD_STRUCT) -+ -+ -+ -+typedef struct elanio_getroute_struct -+{ -+ u_int process; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_GET_ROUTE_STRUCT; -+#define ELAN3IO_GET_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 24, ELAN3IO_GET_ROUTE_STRUCT) -+ -+typedef struct elanio_resetroute_struct -+{ -+ u_int process; -+} ELAN3IO_RESET_ROUTE_STRUCT; -+#define ELAN3IO_RESET_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 25, ELAN3IO_RESET_ROUTE_STRUCT) -+ -+typedef struct elanio_checkroute_struct -+{ -+ u_int process; -+ E3_uint32 routeError; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_CHECK_ROUTE_STRUCT; -+#define ELAN3IO_CHECK_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 26, ELAN3IO_CHECK_ROUTE_STRUCT) -+ -+typedef struct elanio_vp2nodeId_struct -+{ -+ u_int process; -+ unsigned short nodeId; -+ ELAN_CAPABILITY cap; -+} ELAN3IO_VP2NODEID_STRUCT; -+#define ELAN3IO_VP2NODEID _IOWR('e', ELAN3IO_USER_BASE + 27, ELAN3IO_VP2NODEID_STRUCT) -+ -+#define ELAN3IO_SET_SIGNAL _IOW ('e', ELAN3IO_USER_BASE + 28, int) -+ -+typedef struct elanio_process_2_location_struct -+{ -+ u_int process; -+ ELAN_LOCATION loc; -+} ELAN3IO_PROCESS_2_LOCATION_STRUCT; -+#define ELAN3IO_PROCESS_2_LOCATION _IOW ('e', ELAN3IO_USER_BASE + 29, ELAN3IO_PROCESS_2_LOCATION_STRUCT) -+ -+ -+ -+/* ioctls on all device */ -+#define ELAN3IO_GENERIC_BASE 100 -+typedef struct elanio_get_devinfo_struct -+{ -+ ELAN_DEVINFO *devinfo; -+} ELAN3IO_GET_DEVINFO_STRUCT; -+#define ELAN3IO_GET_DEVINFO _IOR ('e', ELAN3IO_GENERIC_BASE + 0, ELAN_DEVINFO) -+ -+typedef struct elanio_get_position_struct -+{ -+ ELAN_POSITION *position; -+} ELAN3IO_GET_POSITION_STRUCT; -+#define ELAN3IO_GET_POSITION _IOR ('e', ELAN3IO_GENERIC_BASE + 1, ELAN_POSITION) -+ -+typedef struct elanio_stats_struct -+{ -+ int which; -+ void *ptr; -+} ELAN3IO_STATS_STRUCT; -+#define ELAN3IO_STATS _IOR ('e', ELAN3IO_GENERIC_BASE + 2, ELAN3IO_STATS_STRUCT) -+# define ELAN3_SYS_STATS_DEVICE 0 -+# define ELAN3_SYS_STATS_MMU 1 -+ -+/* offsets on /dev/elan3/control */ -+ -+/* offsets on /dev/elan3/mem */ -+ -+/* page numbers on /dev/elan3/user */ -+#define ELAN3IO_OFF_COMMAND_PAGE 0 -+#define ELAN3IO_OFF_FLAG_PAGE 1 -+#define ELAN3IO_OFF_UREG_PAGE 2 -+ -+#endif /* LINUX */ -+ -+#endif /* __ELAN3_ELAN3IO_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elanregs.h linux-2.6.9/include/elan3/elanregs.h ---- clean/include/elan3/elanregs.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elanregs.h 2004-04-22 08:27:21.000000000 -0400 -@@ -0,0 +1,1063 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * Header file for internal slave mapping of the ELAN3 registers -+ */ -+ -+#ifndef _ELAN3_ELANREGS_H -+#define _ELAN3_ELANREGS_H -+ -+#ident "$Id: elanregs.h,v 1.87 2004/04/22 12:27:21 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanregs.h,v $*/ -+ -+#include -+#include -+#include -+ -+#define MAX_ROOT_CONTEXT_MASK 0xfff -+#define SYS_CONTEXT_BIT 0x1000 -+#define ALL_CONTEXT_BITS (MAX_ROOT_CONTEXT_MASK | SYS_CONTEXT_BIT) -+#define ROOT_TAB_OFFSET(Cntxt) (((Cntxt) & MAX_ROOT_CONTEXT_MASK) << 4) -+#define CLEAR_SYS_BIT(Cntxt) ((Cntxt) & ~SYS_CONTEXT_BIT) -+ -+#define E3_CACHELINE_SIZE (32) -+#define E3_CACHE_SIZE (8192) -+ -+typedef volatile struct _E3_CacheSets -+{ -+ E3_uint64 Set0[256]; /* 2k bytes per set */ -+ E3_uint64 Set1[256]; /* 2k bytes per set */ -+ E3_uint64 Set2[256]; /* 2k bytes per set */ -+ E3_uint64 Set3[256]; /* 2k bytes per set */ -+} E3_CacheSets; -+ -+typedef union e3_cache_tag -+{ -+ E3_uint64 Value; -+ struct { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 pad2:8; /* Undefined value when read */ -+ E3_uint32 LineError:1; /* A line error has occured */ -+ E3_uint32 Modified:1; /* Cache data is modified */ -+ E3_uint32 FillPending:1; /* Pipelined fill occuring*/ -+ E3_uint32 AddrTag27to11:17; /* Tag address bits 27 to 11 */ -+ E3_uint32 pad1:4; /* Undefined value when read */ -+ E3_uint32 pad0; /* Undefined value when read */ -+#else -+ E3_uint32 pad0; /* Undefined value when read */ -+ E3_uint32 pad1:4; /* Undefined value when read */ -+ E3_uint32 AddrTag27to11:17; /* Tag address bits 27 to 11 */ -+ E3_uint32 FillPending:1; /* Pipelined fill occuring*/ -+ E3_uint32 Modified:1; /* Cache data is modified */ -+ E3_uint32 LineError:1; /* A line error has occured */ -+ E3_uint32 pad2:8; /* Undefined value when read */ -+#endif -+ } s; -+} E3_CacheTag; -+ -+#define E3_NumCacheLines 64 -+#define E3_NumCacheSets 4 -+ -+typedef volatile struct _E3_CacheTags -+{ -+ E3_CacheTag Tags[E3_NumCacheLines][E3_NumCacheSets]; /* 2k bytes per set */ -+} E3_CacheTags; -+ -+typedef union E3_IProcStatus_Reg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 TrapType:8; /* iprocs trap ucode address */ -+ E3_uint32 SuspendAddr:8; /* iprocs suspend address */ -+ E3_uint32 EopType:2; /* Type of Eop Received */ -+ E3_uint32 QueueingPacket:1; /* receiving a queueing packet */ -+ E3_uint32 AckSent:1; /* a packet ack has been sent */ -+ E3_uint32 Reject:1; /* a packet nack has been sent */ -+ E3_uint32 CrcStatus:2; /* Crc Status value */ -+ E3_uint32 BadLength:1; /* Eop was received in a bad place */ -+ E3_uint32 Chan1:1; /* This packet received on v chan1 */ -+ E3_uint32 First:1; /* This is the first transaction in the packet */ -+ E3_uint32 Last:1; /* This is the last transaction in the packet */ -+ E3_uint32 Unused:2; -+ E3_uint32 WakeupFunction:3; /* iprocs wakeup function */ -+#else -+ E3_uint32 WakeupFunction:3; /* iprocs wakeup function */ -+ E3_uint32 Unused:2; -+ E3_uint32 Last:1; /* This is the last transaction in the packet */ -+ E3_uint32 First:1; /* This is the first transaction in the packet */ -+ E3_uint32 Chan1:1; /* This packet received on v chan1 */ -+ E3_uint32 BadLength:1; /* Eop was received in a bad place */ -+ E3_uint32 CrcStatus:2; /* Crc Status value */ -+ E3_uint32 Reject:1; /* a packet nack has been sent */ -+ E3_uint32 AckSent:1; /* a packet ack has been sent */ -+ E3_uint32 QueueingPacket:1; /* receiving a queueing packet */ -+ E3_uint32 EopType:2; /* Type of Eop Received */ -+ E3_uint32 SuspendAddr:8; /* iprocs suspend address */ -+ E3_uint32 TrapType:8; /* iprocs trap ucode address */ -+#endif -+ } s; -+} E3_IProcStatus_Reg; -+ -+#define CRC_STATUS_GOOD (0 << 21) -+#define CRC_STATUS_DISCARD (1 << 21) -+#define CRC_STATUS_ERROR (2 << 21) -+#define CRC_STATUS_BAD (3 << 21) -+ -+#define CRC_MASK (3 << 21) -+ -+#define EOP_GOOD (1 << 16) -+#define EOP_BADACK (2 << 16) -+#define EOP_ERROR_RESET (3 << 16) -+ -+#define E3_IPS_LastTrans (1 << 26) -+#define E3_IPS_FirstTrans (1 << 25) -+#define E3_IPS_VChan1 (1 << 24) -+#define E3_IPS_BadLength (1 << 23) -+#define E3_IPS_CrcMask (3 << 21) -+#define E3_IPS_Rejected (1 << 20) -+#define E3_IPS_AckSent (1 << 19) -+#define E3_IPS_QueueingPacket (1 << 18) -+#define E3_IPS_EopType (3 << 16) -+ -+typedef union E3_Status_Reg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 TrapType:8; /* procs trap ucode address */ -+ E3_uint32 SuspendAddr:8; /* procs suspend address */ -+ E3_uint32 Context:13; /* procs current context */ -+ E3_uint32 WakeupFunction:3; /* procs wakeup function */ -+#else -+ E3_uint32 WakeupFunction:3; /* procs wakeup function */ -+ E3_uint32 Context:13; /* procs current context */ -+ E3_uint32 SuspendAddr:8; /* procs suspend address */ -+ E3_uint32 TrapType:8; /* procs trap ucode address */ -+#endif -+ } s; -+} E3_Status_Reg; -+ -+/* values for WakeupFunction */ -+#define SleepOneTick 0 -+#define WakeupToSendTransOrEop 1 -+#define SleepOneTickThenRunnable 2 -+#define WakeupNever 4 -+/* extra dma wakeup functions */ -+#define WakupeToSendTransOrEop 1 -+#define WakeupForPacketAck 3 -+#define WakeupToSendTrans 5 -+/* extra thread wakup function */ -+#define WakeupStopped 3 -+/* extra cproc wakup function */ -+#define WakeupSetEvent 3 -+ -+#define GET_STATUS_CONTEXT(Ptr) ((Ptr.Status >> 16) & 0x1fff) -+#define GET_STATUS_SUSPEND_ADDR(Ptr) ((Ptr.Status >> 8) & 0xff) -+#define GET_STATUS_TRAPTYPE(Ptr) ((E3_uint32)(Ptr.Status & 0xff)) -+ -+/* -+ * Interrupt register bits -+ */ -+#define INT_PciMemErr (1<<15) /* Pci memory access error */ -+#define INT_SDRamInt (1<<14) /* SDRam ECC interrupt */ -+#define INT_EventInterrupt (1<<13) /* Event Interrupt */ -+#define INT_LinkError (1<<12) /* Link Error */ -+#define INT_ComQueue (1<<11) /* a comm queue half full */ -+#define INT_TProcHalted (1<<10) /* Tproc Halted */ -+#define INT_DProcHalted (1<<9) /* Dmas Halted */ -+#define INT_DiscardingNonSysCntx (1<<8) /* Inputters Discarding Non-SysCntx */ -+#define INT_DiscardingSysCntx (1<<7) /* Inputters Discarding SysCntx */ -+#define INT_TProc (1<<6) /* tproc interrupt */ -+#define INT_CProc (1<<5) /* cproc interrupt */ -+#define INT_DProc (1<<4) /* dproc interrupt */ -+#define INT_IProcCh1NonSysCntx (1<<3) /* iproc non-SysCntx interrupt */ -+#define INT_IProcCh1SysCntx (1<<2) /* iproc SysCntx interrupt */ -+#define INT_IProcCh0NonSysCntx (1<<1) /* iproc non-SysCntx interrupt */ -+#define INT_IProcCh0SysCntx (1<<0) /* iproc SysCntx interrupt */ -+ -+#define INT_Inputters (INT_IProcCh0SysCntx | INT_IProcCh0NonSysCntx | INT_IProcCh1SysCntx | INT_IProcCh1NonSysCntx) -+#define INT_Discarding (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx) -+#define INT_Halted (INT_DProcHalted | INT_TProcHalted) -+#define INT_ErrorInterrupts (INT_PciMemErr | INT_SDRamInt | INT_LinkError) -+ -+/* -+ * Link state bits. -+ */ -+#define LS_LinkNotReady (1 << 0) /* Link is in reset or recovering from an error */ -+#define LS_Locked (1 << 1) /* Linkinput PLL is locked */ -+#define LS_LockError (1 << 2) /* Linkinput PLL was unable to lock onto the input clock. */ -+#define LS_DeskewError (1 << 3) /* Linkinput was unable to Deskew all the inputs. (Broken wire?) */ -+#define LS_PhaseError (1 << 4) /* Linkinput Phase alignment error. */ -+#define LS_DataError (1 << 5) /* Received value was neither good data or a token. */ -+#define LS_FifoOvFlow0 (1 << 6) /* Channel 0 input fifo overflowed. */ -+#define LS_FifoOvFlow1 (1 << 7) /* Channel 1 input fifo overflowed. */ -+ -+/* -+ * Link State Constant defines, used for writing to LinkSetValue -+ */ -+ -+#define LRS_DataDel0 0x0 -+#define LRS_DataDel1 0x1 -+#define LRS_DataDel2 0x2 -+#define LRS_DataDel3 0x3 -+#define LRS_DataDel4 0x4 -+#define LRS_DataDel5 0x5 -+#define LRS_DataDel6 0x6 -+#define LRS_DataDel7 0x7 -+#define LRS_DataDel8 0x8 -+#define LRS_PllDelValue 0x9 -+#define LRS_ClockEven 0xA -+#define LRS_ClockOdd 0xB -+#define LRS_ErrorLSW 0xC -+#define LRS_ErrorMSW 0xD -+#define LRS_FinCoarseDeskew 0xE -+#define LRS_LinkInValue 0xF -+#define LRS_NumLinkDels 0x10 -+ -+#define LRS_Pllfast 0x40 -+ -+union Sched_Status -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 StopNonSysCntxs:1; -+ E3_uint32 FlushCommandQueues:1; -+ E3_uint32 HaltDmas:1; -+ E3_uint32 HaltDmaDequeue:1; -+ E3_uint32 HaltThread:1; -+ E3_uint32 CProcStop:1; -+ E3_uint32 DiscardSysCntxIn:1; -+ E3_uint32 DiscardNonSysCntxIn:1; -+ E3_uint32 RestartCh0SysCntx:1; -+ E3_uint32 RestartCh0NonSysCntx:1; -+ E3_uint32 RestartCh1SysCntx:1; -+ E3_uint32 RestartCh1NonSysCntx:1; -+ E3_uint32 RestartDProc:1; -+ E3_uint32 RestartTProc:1; -+ E3_uint32 RestartCProc:1; -+ E3_uint32 ClearLinkErrorInt:1; -+ E3_uint32 :3; -+ E3_uint32 LinkSetValue:10; -+ E3_uint32 FixLinkDelays:1; -+ E3_uint32 LinkBoundaryScan:1; -+#else -+ E3_uint32 LinkBoundaryScan:1; -+ E3_uint32 FixLinkDelays:1; -+ E3_uint32 LinkSetValue:10; -+ E3_uint32 :3; -+ E3_uint32 ClearLinkErrorInt:1; -+ E3_uint32 RestartCProc:1; -+ E3_uint32 RestartTProc:1; -+ E3_uint32 RestartDProc:1; -+ E3_uint32 RestartCh1NonSysCntx:1; -+ E3_uint32 RestartCh1SysCntx:1; -+ E3_uint32 RestartCh0NonSysCntx:1; -+ E3_uint32 RestartCh0SysCntx:1; -+ E3_uint32 DiscardNonSysCntxIn:1; -+ E3_uint32 DiscardSysCntxIn:1; -+ E3_uint32 CProcStop:1; -+ E3_uint32 HaltThread:1; -+ E3_uint32 HaltDmaDequeue:1; -+ E3_uint32 HaltDmas:1; -+ E3_uint32 FlushCommandQueues:1; -+ E3_uint32 StopNonSysCntxs:1; -+#endif -+ } s; -+}; -+ -+#define LinkBoundaryScan ((E3_uint32) 1<<31) /* Clears the link error interrupt */ -+#define FixLinkDelays ((E3_uint32) 1<<30) /* Clears the link error interrupt */ -+#define LinkSetValue(Val, OldVal) ((E3_uint32) (((Val) & 0x3ff) << 20) | ((OldVal) & ((~0x3ff) << 20))) -+ -+#define ClearLinkErrorInt ((E3_uint32) 1<<16) /* Clears the link error interrupt */ -+#define RestartCProc ((E3_uint32) 1<<15) /* Clears command proc interrupt */ -+#define RestartTProc ((E3_uint32) 1<<14) /* Clears thread interrupt */ -+#define RestartDProc ((E3_uint32) 1<<13) /* Clears dma0 interrupt */ -+#define RestartCh1NonSysCntx ((E3_uint32) 1<<12) /* Clears interrupt */ -+#define RestartCh1SysCntx ((E3_uint32) 1<<11) /* Clears interrupt */ -+#define RestartCh0NonSysCntx ((E3_uint32) 1<<10) /* Clears interrupt */ -+#define RestartCh0SysCntx ((E3_uint32) 1<<9) /* Clears interrupt */ -+#define CProcStopped ((E3_uint32) 1<<9) /* Read value only */ -+ -+#define TraceSetEvents ((E3_uint32) 1<<8) -+#define DiscardNonSysCntxIn ((E3_uint32) 1<<7) -+#define DiscardSysCntxIn ((E3_uint32) 1<<6) -+#define CProcStop ((E3_uint32) 1<<5) /* Will empty all the command port queues. */ -+#define HaltThread ((E3_uint32) 1<<4) /* Will stop the thread proc and clear the tproc command queue */ -+#define HaltDmaDequeue ((E3_uint32) 1<<3) /* Will stop the dmaers starting new dma's. */ -+#define HaltDmas ((E3_uint32) 1<<2) /* Will stop the dmaers and clear the dma command queues */ -+#define FlushCommandQueues ((E3_uint32) 1<<1) /* Causes the command ports to be flushed. */ -+#define StopNonSysCntxs ((E3_uint32) 1<<0) /* Prevents a non-SysCntx from starting. */ -+ -+/* Initial value of schedule status register */ -+#define LinkResetToken 0x00F -+ -+#define Sched_Initial_Value (LinkBoundaryScan | (LinkResetToken << 20) | \ -+ DiscardSysCntxIn | DiscardNonSysCntxIn | HaltThread | HaltDmas) -+ -+#define StopDmaQueues (HaltDmaDequeue | HaltDmas | \ -+ DiscardNonSysCntxIn | DiscardSysCntxIn) -+#define CheckDmaQueueStopped (INT_DiscardingNonSysCntx | INT_DiscardingSysCntx | INT_DProcHalted) -+ -+#define HaltStopAndExtTestMask 0xfff001ff -+#define HaltAndStopMask 0x000001ff -+ -+ -+#define DmaComQueueNotEmpty (1<<0) -+#define ThreadComQueueNotEmpty (1<<1) -+#define EventComQueueNotEmpty (1<<2) -+#define DmaComQueueHalfFull (1<<3) -+#define ThreadComQueueHalfFull (1<<4) -+#define EventComQueueHalfFull (1<<5) -+#define DmaComQueueError (1<<6) -+#define ThreadComQueueError (1<<7) -+#define EventComQueueError (1<<8) -+ -+#define ComQueueNotEmpty (DmaComQueueNotEmpty | ThreadComQueueNotEmpty | EventComQueueNotEmpty) -+#define ComQueueError (DmaComQueueError | ThreadComQueueError | EventComQueueError) -+ -+typedef union _E3_DmaInfo -+{ -+ E3_uint32 Value; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 DmaOutputOpen:1; /* The packet is currently open */ -+ E3_uint32 :7; -+ E3_uint32 TimeSliceCount:2; /* Time left to timeslice */ -+ E3_uint32 UseRemotePriv:1; /* Set for remote read dmas */ -+ E3_uint32 DmaLastPacket:1; /* Set for the last packet of a dma */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 AckBufferValid:1; /* Packet ack is valid. */ -+ E3_uint32 :16; /* read as Zero */ -+#else -+ E3_uint32 :16; /* read as Zero */ -+ E3_uint32 AckBufferValid:1; /* Packet ack is valid. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 DmaLastPacket:1; /* Set for the last packet of a dma */ -+ E3_uint32 UseRemotePriv:1; /* Set for remote read dmas */ -+ E3_uint32 TimeSliceCount:2; /* Time left to timeslice */ -+ E3_uint32 :7; -+ E3_uint32 DmaOutputOpen:1; /* The packet is currently open */ -+#endif -+ } s; -+} E3_DmaInfo; -+ -+typedef volatile struct _E3_DmaRds -+{ -+ E3_uint32 DMA_Source4to0AndTwoReads; -+ E3_uint32 pad13; -+ E3_uint32 DMA_BytesToRead; -+ E3_uint32 pad14; -+ E3_uint32 DMA_MinusPacketSize; -+ E3_uint32 pad15; -+ E3_uint32 DMA_MaxMinusPacketSize; -+ E3_uint32 pad16; -+ E3_uint32 DMA_DmaOutputOpen; -+ E3_uint32 pad16a; -+ E3_DmaInfo DMA_PacketInfo; -+ E3_uint32 pad17[7]; -+ E3_uint32 IProcTrapBase; -+ E3_uint32 pad18; -+ E3_uint32 IProcBlockTrapBase; -+ E3_uint32 pad19[11]; -+} E3_DmaRds; -+ -+typedef volatile struct _E3_DmaWrs -+{ -+ E3_uint64 pad0; -+ E3_uint64 LdAlignment; -+ E3_uint64 ResetAckNLdBytesToWr; -+ E3_uint64 SetAckNLdBytesToWr; -+ E3_uint64 LdBytesToRd; -+ E3_uint64 LdDmaType; -+ E3_uint64 SendRoutes; -+ E3_uint64 SendEop; -+ E3_uint64 pad1[8]; -+} E3_DmaWrs; -+ -+typedef volatile struct _E3_Exts -+{ -+ E3_uint32 CurrContext; /* 0x12a00 */ -+ E3_uint32 pad0; -+ E3_Status_Reg DProcStatus; /* 0x12a08 */ -+ E3_uint32 pad1; -+ E3_Status_Reg CProcStatus; /* 0x12a10 */ -+ E3_uint32 pad2; -+ E3_Status_Reg TProcStatus; /* 0x12a18 */ -+ E3_uint32 pad3; -+ E3_IProcStatus_Reg IProcStatus; /* 0x12a20 */ -+ E3_uint32 pad4[3]; -+ -+ E3_uint32 IProcTypeContext; /* 0x12a30 */ -+ E3_uint32 pad5; -+ E3_uint32 IProcTransAddr; /* 0x12a38 */ -+ E3_uint32 pad6; -+ E3_uint32 IProcCurrTransData0; /* 0x12a40 */ -+ E3_uint32 pad7; -+ E3_uint32 IProcCurrTransData1; /* 0x12a48 */ -+ E3_uint32 pad8; -+ -+ E3_uint32 SchCntReg; /* 0x12a50 */ -+ E3_uint32 pad9; -+ E3_uint32 InterruptReg; /* 0x12a58 */ -+ E3_uint32 pad10; -+ E3_uint32 InterruptMask; /* 0x12a60 */ -+ E3_uint32 pad11; -+ E3_uint32 LinkErrorTypes; /* 0x12a68 */ -+ E3_uint32 pad12[3]; -+ E3_uint32 LinkState; /* a read here returens the DataDel value for the */ -+ /* link that has just been defined by a write to */ -+ /* Regs.Exts.SchCntReg.LinkSetValue */ -+ E3_uint32 pad13; -+ -+ union /* 0x12a80 */ -+ { -+ E3_DmaWrs DmaWrs; -+ E3_DmaRds DmaRds; -+ } Dmas; -+} E3_Exts; -+ -+typedef union com_port_entry -+{ -+ E3_uint64 type; -+ struct -+ { -+ E3_uint32 Address; /* Command VAddr */ -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 Context0Issue:1; /* Issue was for context 0 */ -+ E3_uint32 EventNotCommand:1; /* Issue address bit 3 */ -+ E3_uint32 RemoteDesc:1; /* Issue address bit 5 */ -+ E3_uint32 :13; /* read as Zero */ -+ E3_uint32 Context:12; /* Command Context */ -+ E3_uint32 :4; /* read as Zero */ -+#else -+ E3_uint32 :4; /* read as Zero */ -+ E3_uint32 Context:12; /* Command Context */ -+ E3_uint32 :13; /* read as Zero */ -+ E3_uint32 RemoteDesc:1; /* Issue address bit 5 */ -+ E3_uint32 EventNotCommand:1; /* Issue address bit 3 */ -+ E3_uint32 Context0Issue:1; /* Issue was for context 0 */ -+#endif -+ } s; -+} E3_ComPortEntry; -+ -+/* control reg bits */ -+#define CONT_MMU_ENABLE (1 << 0) /* bit 0 enables mmu */ -+#define CONT_ENABLE_8K_PAGES (1 << 1) /* When set smallest page is 8k instead of 4k. */ -+#define CONT_EN_ALL_SETS (1 << 2) /* enable cache */ -+#define CONT_CACHE_LEVEL0 (1 << 3) /* cache context table */ -+#define CONT_CACHE_LEVEL1 (1 << 4) /* cache up level 1 PTD/PTE */ -+#define CONT_CACHE_LEVEL2 (1 << 5) /* cache up level 2 PTD/PTE */ -+#define CONT_CACHE_LEVEL3 (1 << 6) /* cache up level 3 PTD/PTE */ -+#define CONT_CACHE_TRAPS (1 << 7) /* cache up traps */ -+#define CONT_CACHE_LEV0_ROUTES (1 << 8) /* cache up small routes */ -+#define CONT_CACHE_LEV1_ROUTES (1 << 9) /* cache up large routes */ -+#define CONT_CACHE_ALL (CONT_CACHE_LEVEL0 | CONT_CACHE_LEVEL1 | CONT_CACHE_LEVEL2 | \ -+ CONT_CACHE_LEVEL3 | CONT_CACHE_TRAPS | \ -+ CONT_CACHE_LEV0_ROUTES | CONT_CACHE_LEV1_ROUTES) -+ -+#define CONT_SYNCHRONOUS (1 << 10) /* PCI running sync */ -+#define CONT_SER (1 << 11) /* Single bit output (Elan1 SER bit) */ -+#define CONT_SIR (1 << 12) /* Writing 1 resets elan. */ -+ -+#define CONT_PSYCHO_MODE (1 << 13) /* Enables all the perversion required by psycho */ -+#define CONT_ENABLE_ECC (1 << 14) /* Enables error detecting on the ECC */ -+#define CONT_SDRAM_TESTING (1 << 15) /* Switches to test mode for checking EEC data bits */ -+ -+/* defines SDRam CasLatency. Once set will not change again unless reset is reasserted. */ -+/* 1 = Cas Latency is 3, 0 = Cas Latency is 2 */ -+#define CAS_LATENCY_2 (0 << 16) -+#define CAS_LATENCY_3 (1 << 16) -+#define REFRESH_RATE_2US (0 << 17) /* defines 2us SDRam Refresh rate. */ -+#define REFRESH_RATE_4US (1 << 17) /* defines 4us SDRam Refresh rate. */ -+#define REFRESH_RATE_8US (2 << 17) /* defines 8us SDRam Refresh rate. */ -+#define REFRESH_RATE_16US (3 << 17) /* defines 16us SDRam Refresh rate. */ -+ -+#define CONT_PCI_ERR (1 << 19) /* Read 1 if PCI Error */ -+#define CONT_CLEAR_PCI_ERROR (1 << 19) /* Clears an PCI error. */ -+ -+/* Will cause the PCI error bit to become set. This is used to force the threads proc -+ and the uProc to start to stall. */ -+#define CONT_SET_PCI_ERROR (1 << 20) -+ -+/* Writes SDram control reg when set. Also starts SDram memory system refreshing. */ -+#define SETUP_SDRAM (1 << 21) -+ -+/* Flushes the tlb */ -+#define MMU_FLUSH (1 << 22) -+/* and read back when it's finished */ -+#define MMU_FLUSHED (1 << 0) -+ -+/* Clears any ECC error detected by SDRam interface */ -+#define CLEAR_SDRAM_ERROR (1 << 23) -+ -+#define ECC_ADDR_MASK 0x0ffffff8 -+#define ECC_UE_MASK 0x1 -+#define ECC_CE_MASK 0x2 -+#define ECC_ME_MASK 0x4 -+#define ECC_SYN_MASK 0xff -+ -+/* define page table entry bit fields */ -+#define TLB_PageSizeBits (3 << 0) -+#define TLB_ACCBits (7 << 2) -+#define TLB_LocalBit (1 << 5) -+#define TLB_PCI64BitTargetBit (1 << 6) -+#define TLB_PCIBigEndianBit (1 << 7) -+ -+#define TLB_ModifiedBit (1 << 55) -+#define TLB_ReferencedBit (1 << 63) -+ -+/* Used to read values from the tlb. */ -+#define TLB_TlbReadCntBitsSh 56 -+#define TLB_UseSelAddrSh (1ULL << 60) -+#define TLB_WriteTlbLine (1ULL << 61) -+ -+#define TLB_SEL_LINE(LineNo) (TLB_UseSelAddrSh | \ -+ ((E3_uint64)((LineNo) & 0xf) << TLB_TlbReadCntBitsSh)) -+ -+typedef union _E3_CacheContReg -+{ -+ E3_uint32 ContReg; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 MMU_Enable:1; /* wr 1 to enable the MMU */ -+ E3_uint32 Set8kPages:1; /* wr 1 smallest page is 8k. */ -+ E3_uint32 EnableAllSets:1; /* wr 1 All the cache sets are enabled */ -+ E3_uint32 Cache_Level0:1; /* wr 1 lev0 page tabs will be cached */ -+ E3_uint32 Cache_Level1:1; /* wr 1 lev1 page tabs will be cached */ -+ E3_uint32 Cache_Level2:1; /* wr 1 lev2 page tabs will be cached */ -+ E3_uint32 Cache_Level3:1; /* wr 1 lev3 page tabs will be cached */ -+ E3_uint32 Cache_Traps:1; /* wr 1 trap info will be cached */ -+ E3_uint32 Cache_Lev0_Routes:1; /* wr 1 small routes will be cached */ -+ E3_uint32 Cache_Lev1_Routes:1; /* wr 1 big routes will be cached */ -+ E3_uint32 PCI_Synchronous:1; /* Pci and sys clocks are running synchronously*/ -+ E3_uint32 SER:1; /* 1 bit output port */ -+ E3_uint32 SIR:1; /* write 1 will reset elan */ -+ E3_uint32 PsychoMode:1; /* Enables psycho perversion mode. */ -+ E3_uint32 CasLatency:1; /* 1=cas latency=3, 1=cas latency=2 */ -+ E3_uint32 RefreshRate:2; /* 0=2us, 1=4us, 2=8us, 3=16us */ -+ E3_uint32 Pci_Err:1; /* pci error. Write 1 clears err */ -+ E3_uint32 Set_Pci_Error:1; /* Will simulate an Pci error */ -+ E3_uint32 StartSDRam:1; /* Starts the sdram subsystem */ -+ E3_uint32 FlushTlb:1; /* Flush the contence of the tlb */ -+ E3_uint32 :11; -+#else -+ E3_uint32 :11; -+ E3_uint32 FlushTlb:1; /* Flush the contence of the tlb */ -+ E3_uint32 StartSDRam:1; /* Starts the sdram subsystem */ -+ E3_uint32 Set_Pci_Error:1; /* Will simulate an Pci error */ -+ E3_uint32 Pci_Err:1; /* pci error. Write 1 clears err */ -+ E3_uint32 RefreshRate:2; /* 0=2us, 1=4us, 2=8us, 3=16us */ -+ E3_uint32 CasLatency:1; /* 1=cas latency=3, 1=cas latency=2 */ -+ E3_uint32 PsychoMode:1; /* Enables psycho perversion mode. */ -+ E3_uint32 SIR:1; /* write 1 will reset elan */ -+ E3_uint32 SER:1; /* 1 bit output port */ -+ E3_uint32 PCI_Synchronous:1; /* Pci and sys clocks are running synchronously*/ -+ E3_uint32 Cache_Lev1_Routes:1; /* wr 1 big routes will be cached */ -+ E3_uint32 Cache_Lev0_Routes:1; /* wr 1 small routes will be cached */ -+ E3_uint32 Cache_Traps:1; /* wr 1 trap info will be cached */ -+ E3_uint32 Cache_Level3:1; /* wr 1 lev3 page tabs will be cached */ -+ E3_uint32 Cache_Level2:1; /* wr 1 lev2 page tabs will be cached */ -+ E3_uint32 Cache_Level1:1; /* wr 1 lev1 page tabs will be cached */ -+ E3_uint32 Cache_Level0:1; /* wr 1 lev0 page tabs will be cached */ -+ E3_uint32 EnableAllSets:1; /* wr 1 All the cache sets are enabled */ -+ E3_uint32 Set8kPages:1; /* wr 1 smallest page is 8k. */ -+ E3_uint32 MMU_Enable:1; /* wr 1 to enable the MMU */ -+#endif -+ } s; -+} E3_CacheContReg; -+ -+typedef union _E3_TrapBits -+{ -+ volatile E3_uint32 Bits; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 ForcedTProcTrap:1; /* The theads proc has been halted */ -+ E3_uint32 InstAccessException:1; /* An instruction access exception */ -+ E3_uint32 Unimplemented:1; /* Unimplemented instruction executed */ -+ E3_uint32 DataAccessException:1; /* A data access exception */ -+ -+ E3_uint32 ThreadTimeout:1; /* The threads outputer has timed out */ -+ E3_uint32 OpenException:1; /* Invalid sequence of open, sendtr or close */ -+ E3_uint32 OpenRouteFetch:1; /* Fault while fetching routes for previous open*/ -+ E3_uint32 TrapForTooManyInsts:1; /* Thread has been executing for too long */ -+ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ -+ E3_uint32 AckBufferValid:1; /* The PacketAckValue bits are valid */ -+ E3_uint32 OutputWasOpen:1; /* The output was open when tproc trapped */ -+ E3_uint32 TProcDeschedule:2; /* The reason the tproc stopped running. */ -+ E3_uint32 :17; -+#else -+ E3_uint32 :17; -+ E3_uint32 TProcDeschedule:2; /* The reason the tproc stopped running. */ -+ E3_uint32 OutputWasOpen:1; /* The output was open when tproc trapped */ -+ E3_uint32 AckBufferValid:1; /* The PacketAckValue bits are valid */ -+ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ -+ E3_uint32 TrapForTooManyInsts:1; /* Thread has been executing for too long */ -+ E3_uint32 OpenRouteFetch:1; /* Fault while fetching routes for previous open*/ -+ E3_uint32 OpenException:1; /* Invalid sequence of open, sendtr or close */ -+ E3_uint32 ThreadTimeout:1; /* The threads outputer has timed out */ -+ -+ E3_uint32 DataAccessException:1; /* A data access exception */ -+ E3_uint32 Unimplemented:1; /* Unimplemented instruction executed */ -+ E3_uint32 InstAccessException:1; /* An instruction access exception */ -+ E3_uint32 ForcedTProcTrap:1; /* The theads proc has been halted */ -+#endif -+ } s; -+} E3_TrapBits; -+ -+typedef union _E3_DirtyBits -+{ -+ volatile E3_uint32 Bits; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 GlobalsDirty:8; -+ E3_uint32 OutsDirty:8; /* will always read as dirty. */ -+ E3_uint32 LocalsDirty:8; -+ E3_uint32 InsDirty:8; -+#else -+ E3_uint32 InsDirty:8; -+ E3_uint32 LocalsDirty:8; -+ E3_uint32 OutsDirty:8; /* will always read as dirty. */ -+ E3_uint32 GlobalsDirty:8; -+#endif -+ } s; -+} E3_DirtyBits; -+ -+#define E3_TProcDescheduleMask 0x6000 -+#define E3_TProcDescheduleWait 0x2000 -+#define E3_TProcDescheduleSuspend 0x4000 -+#define E3_TProcDescheduleBreak 0x6000 -+ -+#define E3_TrapBitsMask 0x7fff -+ -+#define ThreadRestartFromTrapBit 1 -+#define ThreadReloadAllRegs 2 -+ -+#define E3_PAckOk 0 -+#define E3_PAckTestFail 1 -+#define E3_PAckDiscard 2 -+#define E3_PAckError 3 -+ -+typedef volatile struct _E3_DataBusMap -+{ -+ E3_uint64 Dma_Alignment_Port[8]; /* 0x00002800 */ -+ E3_uint32 pad0[0x30]; /* 0x00002840 */ -+ -+ E3_uint32 Input_Trans0_Data[0x10]; /* 0x00002900 */ -+ E3_uint32 Input_Trans1_Data[0x10]; -+ E3_uint32 Input_Trans2_Data[0x10]; -+ E3_uint32 Input_Trans3_Data[0x10]; -+ -+/* this is the start of the exts directly addressable from the ucode. */ -+ E3_Exts Exts; /* 0x00002a00 */ -+ -+/* this is the start of the registers directly addressable from the ucode. */ -+ E3_DMA Dma_Desc; /* 0x00002b00 */ -+ -+ E3_uint32 Dma_Last_Packet_Size; /* 0x00002b20 */ -+ E3_uint32 Dma_This_Packet_Size; /* 0x00002b24 */ -+ E3_uint32 Dma_Tmp_Source; /* 0x00002b28 */ -+ E3_uint32 Dma_Tmp_Dest; /* 0x00002b2c */ -+ -+ E3_Addr Thread_SP_Save_Ptr; /* points to the thread desched save word. */ -+ E3_uint32 Dma_Desc_Size_InProg; /* 0x00002b34 */ -+ -+ E3_uint32 Thread_Desc_SP; /* 0x00002b38 */ -+ E3_uint32 Thread_Desc_Context; /* 0x00002b3c */ -+ -+ E3_uint32 uCode_TMP[0x10]; /* 0x00002b40 */ -+ -+ E3_uint32 TProc_NonSysCntx_FPtr; /* 0x00002b80 */ -+ E3_uint32 TProc_NonSysCntx_BPtr; /* 0x00002b84 */ -+ E3_uint32 TProc_SysCntx_FPtr; /* 0x00002b88 */ -+ E3_uint32 TProc_SysCntx_BPtr; /* 0x00002b8c */ -+ E3_uint32 DProc_NonSysCntx_FPtr; /* 0x00002b90 */ -+ E3_uint32 DProc_NonSysCntx_BPtr; /* 0x00002b94 */ -+ E3_uint32 DProc_SysCntx_FPtr; /* 0x00002b98 */ -+ E3_uint32 DProc_SysCntx_BPtr; /* 0x00002b9c */ -+ -+ E3_uint32 Input_Trap_Base; /* 0x00002ba0 */ -+ E3_uint32 Input_Queue_Offset; /* 0x00002ba4 */ -+ E3_uint32 CProc_TrapSave_Addr; /* 0x00002ba8 */ -+ E3_uint32 Input_Queue_Addr; /* 0x00002bac */ -+ E3_uint32 uCode_TMP10; /* 0x00002bb0 */ -+ E3_uint32 uCode_TMP11; /* 0x00002bb4 */ -+ E3_uint32 Event_Trace_Ptr; /* 0x00002bb8 */ -+ E3_uint32 Event_Trace_Mask; /* 0x00002bbc */ -+ -+ E3_ComPortEntry DmaComQueue[3]; /* 0x00002bc0 */ -+ -+ E3_uint32 Event_Int_Queue_FPtr; /* 0x00002bd8 */ -+ E3_uint32 Event_Int_Queue_BPtr; /* 0x00002bdc */ -+ -+ E3_ComPortEntry ThreadComQueue[2]; /* 0x00002be0 */ -+ E3_ComPortEntry SetEventComQueue[2]; /* 0x00002bf0 */ -+ -+ E3_uint32 pad1[96]; /* 0x00002c00 */ -+ E3_uint32 ComQueueStatus; /* 0x00002d80 */ -+ E3_uint32 pad2[31]; /* 0x00002d84 */ -+ -+/* These are the internal registers of the threads proc. */ -+ E3_uint32 Globals[8]; /* 0x00002e00 */ -+ E3_uint32 Outs[8]; -+ E3_uint32 Locals[8]; -+ E3_uint32 Ins[8]; -+ -+ E3_uint32 pad3[16]; -+ -+ E3_uint32 IBufferReg[4]; -+ -+ E3_uint32 ExecuteNPC; -+ E3_uint32 ExecutePC; -+ -+ E3_uint32 StartPC; -+ E3_uint32 pad4; -+ -+ E3_uint32 StartnPC; -+ E3_uint32 pad5; -+ -+ E3_TrapBits TrapBits; -+ E3_DirtyBits DirtyBits; -+ E3_uint64 LoadDataReg; -+ E3_uint64 StoreDataReg; -+ -+ E3_uint32 ECC_STATUS0; -+ E3_uint32 ECC_STATUS1; -+ E3_uint32 pad6[0xe]; -+ -+/* Pci slave port regs */ -+ E3_uint32 PciSlaveReadCache[0x10]; -+ -+ E3_uint32 Fault_Base_Ptr; -+ E3_uint32 pad7; -+ E3_uint32 Context_Ptr; -+ E3_uint32 pad8; -+ E3_uint32 Input_Context_Filter; /* write only, No data */ -+ E3_uint32 Input_Context_Fil_Flush; /* write only, No data */ -+ E3_CacheContReg Cache_Control_Reg; -+ E3_uint32 pad9; -+ -+ E3_uint64 Tlb_Line_Value; -+ -+ E3_uint32 Walk_Datareg1; -+ E3_uint32 Walk_VAddr_Tab_Base; -+ E3_uint32 Walk_Datareg; -+ E3_uint32 Walk_ContextReg; -+ E3_uint32 Walk_FaultAddr; -+ E3_uint32 Walk_EventAddr; -+ -+/* outputers output cont ext registers. */ -+ E3_uint64 Dma_Route_012345_Context; -+ E3_uint64 pad10; -+ E3_uint64 Dma_Route_01234567; -+ E3_uint64 Dma_Route_89ABCDEF; -+ -+ E3_uint64 Thread_Route_012345_Context; -+ E3_uint64 pad11; -+ E3_uint64 Thread_Route_01234567; -+ E3_uint64 Thread_Route_89ABCDEF; -+} E3_DataBusMap; -+ -+typedef volatile struct _E3_Regs -+{ -+ E3_CacheSets Sets; /* 0x00000000 */ -+ E3_CacheTags Tags; /* 0x00002000 */ -+ E3_DataBusMap Regs; /* 0x00002800 */ -+ E3_uint32 pad1[0x400]; -+ E3_User_Regs URegs; -+} E3_Regs; -+ -+#define MAX_TRAPPED_TRANS 16 -+#define TRANS_DATA_WORDS 16 -+#define TRANS_DATA_BYTES 64 -+ -+/* -+ * Event interrupt -+ */ -+typedef volatile union _E3_EventInt -+{ -+ E3_uint64 ForceAlign; -+ struct { -+ E3_uint32 IntCookie; -+ E3_uint32 EventContext; /* Bits 16 to 28 */ -+ } s; -+} E3_EventInt; -+ -+#define GET_EVENT_CONTEXT(Ptr) ((Ptr->s.EventContext >> 16) & MAX_ROOT_CONTEXT_MASK) -+ -+typedef volatile union _E3_ThreadQueue -+{ -+ E3_uint64 ForceAlign; -+ struct -+ { -+ E3_Addr Thread; -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 :16; /* Bits 0 to 15 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+#else -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :16; /* Bits 0 to 15 */ -+#endif -+ } s; -+} E3_ThreadQueue; -+ -+typedef volatile union _E3_FaultStatusReg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 AccTypePerm:3; /* Access permission. See below. Bits 0 to 2 */ -+ E3_uint32 AccSize:4; /* Access size. See below for different types. Bits 3 to 6 */ -+ E3_uint32 WrAcc:1; /* Access was a write. Bit 7 */ -+ E3_uint32 NonAllocAcc:1; /* Access was a cache non allocate type. Bit 8 */ -+ E3_uint32 BlkDataType:2; /* Data size used for endian flips. Bits 9 to 10 */ -+ E3_uint32 RdLine:1; /* Access was a dma read line. Bit 11 */ -+ E3_uint32 RdMult:1; /* Access was a dma read multiple. Bit 12 */ -+ E3_uint32 Walking:1; /* The fault occued when walking. Bit 13 */ -+ E3_uint32 Level:2; /* Page table level when the fault occued. Bits 14 to 15 */ -+ E3_uint32 ProtFault:1; /* A protection fault occured. Bit 16 */ -+ E3_uint32 FaultPte:2; /* Page table type when the fault occured. Bit 17 */ -+ E3_uint32 AlignmentErr:1; /* Address alignment did not match the access size. Bit 19 */ -+ E3_uint32 VProcSizeErr:1; /* VProc number is out of range. Bit 20 */ -+ E3_uint32 WalkBadData:1; /* Memory CRC error during a walk. Bit 21 */ -+ E3_uint32 :10; /* Bits 22 to 31 */ -+#else -+ E3_uint32 :10; /* Bits 22 to 31 */ -+ E3_uint32 WalkBadData:1; /* Memory CRC error during a walk. Bit 21 */ -+ E3_uint32 VProcSizeErr:1; /* VProc number is out of range. Bit 20 */ -+ E3_uint32 AlignmentErr:1; /* Address alignment did not match the access size. Bit 19 */ -+ E3_uint32 FaultPte:2; /* Page table type when the fault occured. Bit 17 */ -+ E3_uint32 ProtFault:1; /* A protection fault occured. Bit 16 */ -+ E3_uint32 Level:2; /* Page table level when the fault occued. Bits 14 to 15 */ -+ E3_uint32 Walking:1; /* The fault occued when walking. Bit 13 */ -+ E3_uint32 RdMult:1; /* Access was a dma read multiple. Bit 12 */ -+ E3_uint32 RdLine:1; /* Access was a dma read line. Bit 11 */ -+ E3_uint32 BlkDataType:2; /* Data size used for endian flips. Bits 9 to 10 */ -+ E3_uint32 NonAllocAcc:1; /* Access was a cache non allocate type. Bit 8 */ -+ E3_uint32 WrAcc:1; /* Access was a write. Bit 7 */ -+ E3_uint32 AccSize:4; /* Access size. See below for different types. Bits 3 to 6 */ -+ E3_uint32 AccTypePerm:3; /* Access permission. See below. Bits 0 to 2 */ -+#endif -+ } s; -+} E3_FaultStatusReg; -+ -+typedef union _E3_FaultSave -+{ -+ E3_uint64 ForceAlign; -+ struct { -+ E3_FaultStatusReg FSR; -+ volatile E3_uint32 FaultContext; -+ volatile E3_uint32 FaultAddress; -+ volatile E3_uint32 EventAddress; -+ } s; -+} E3_FaultSave; -+ -+/* MMU fault status reg bit positions. */ -+#define FSR_WritePermBit 0 /* 1=Write access perm, 0=Read access perm */ -+#define FSR_RemotePermBit 1 /* 1=Remote access perm, 0=local access perm */ -+#define FSR_EventPermBit 2 /* 1=Event access perm, 0=data access perm */ -+#define FSR_Size0Bit 3 -+#define FSR_Size1Bit 4 -+#define FSR_Size2Bit 5 -+#define FSR_Size3Bit 6 -+#define FSR_WriteAccBit 7 /* 1=Write access, 0=Read access. */ -+#define FSR_NonAllocBit 8 /* 1=Do not fill cache with this data */ -+#define FSR_BlkDataTy0Bit 9 -+#define FSR_BlkDataTy1Bit 10 -+#define FSR_ReadLineBit 11 -+#define FSR_ReadMultipleBit 12 -+ -+#define FSR_PermMask (0xf << FSR_WritePermBit) -+#define FSR_SizeMask (0xf << FSR_Size0Bit) -+#define FSR_AccTypeMask (3 << FSR_WriteAccBit) -+#define FSR_BlkDataTyMask (3 << FSR_BlkDataTy0Bit) -+#define FSR_PciAccTyMask (3 << FSR_ReadLineBit) -+#define FSR_Walking (0x1 << 13) -+#define FSR_Level_Mask (0x3 << 14) -+#define FSR_ProtFault (0x1 << 16) -+#define FSR_FaultPTEType (0x2 << 17) -+#define FSR_AddrSizeError (0x1 << 19) -+#define FSR_VProcSizeError (0x1 << 20) -+#define FSR_WalkBadData (0x1 << 21) -+ -+#define FSR_PermRead 0 -+#define FSR_PermWrite 1 -+#define FSR_PermRemoteRead 2 -+#define FSR_PermRemoteWrite 3 -+#define FSR_PermEventRd 4 -+#define FSR_PermEventWr 5 -+#define FSR_PermRemoteEventRd 6 -+#define FSR_PermRemoteEventWr 7 -+ -+/* AT size values for each access type */ -+#define FSR_Word (0x0 << FSR_Size0Bit) -+#define FSR_DWord (0x1 << FSR_Size0Bit) -+#define FSR_QWord (0x2 << FSR_Size0Bit) -+#define FSR_Block32 (0x3 << FSR_Size0Bit) -+#define FSR_ReservedBlock (0x6 << FSR_Size0Bit) -+#define FSR_Block64 (0x7 << FSR_Size0Bit) -+#define FSR_GetCntxFilter (0x8 << FSR_Size0Bit) -+#define FSR_QueueDWord (0x9 << FSR_Size0Bit) -+#define FSR_RouteFetch (0xa << FSR_Size0Bit) -+#define FSR_QueueBlock (0xb << FSR_Size0Bit) -+#define FSR_Block32PartWrite (0xe << FSR_Size0Bit) -+#define FSR_Block64PartWrite (0xf << FSR_Size0Bit) -+ -+#define FSR_AllocRead (0 << FSR_WriteAccBit) -+#define FSR_AllocWrite (1 << FSR_WriteAccBit) -+#define FSR_NonAllocRd (2 << FSR_WriteAccBit) -+#define FSR_NonAllocWr (3 << FSR_WriteAccBit) -+ -+#define FSR_TypeByte (0 << FSR_BlkDataTy0Bit) -+#define FSR_TypeHWord (1 << FSR_BlkDataTy0Bit) -+#define FSR_TypeWord (2 << FSR_BlkDataTy0Bit) -+#define FSR_TypeDWord (3 << FSR_BlkDataTy0Bit) -+ -+typedef union E3_TrTypeCntx -+{ -+ E3_uint32 TypeContext; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 Type:16; /* Transaction type field */ -+ E3_uint32 Context:13; /* Transaction context */ -+ E3_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E3_uint32 StatusRegValid:1; /* Bit 30 */ -+ E3_uint32 LastTrappedTrans:1; /* Bit 31 */ -+#else -+ E3_uint32 LastTrappedTrans:1; /* Bit 31 */ -+ E3_uint32 StatusRegValid:1; /* Bit 30 */ -+ E3_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E3_uint32 Context:13; /* Transaction context */ -+ E3_uint32 Type:16; /* Transaction type field */ -+#endif -+ } s; -+} E3_TrTypeCntx; -+ -+#define GET_TRAP_TYPE(Ptr) (Ptr.TypeContext & 0xfff) -+#define GET_TRAP_CONTEXT(Ptr) ((Ptr.TypeContext >> 16) & 0x1fff) -+ -+/* Words have been swapped for big endian access when fetched with dword access from elan.*/ -+typedef union _E3_IprocTrapHeader -+{ -+ E3_uint64 forceAlign; -+ -+ struct -+ { -+ E3_TrTypeCntx TrTypeCntx; -+ E3_uint32 TrAddr; -+ E3_uint32 TrData0; -+ union -+ { -+ E3_IProcStatus_Reg u_IProcStatus; -+ E3_uint32 u_TrData1; -+ } ipsotd; -+ } s; -+} E3_IprocTrapHeader; -+ -+#define IProcTrapStatus ipsotd.u_IProcStatus -+#define TrData1 ipsotd.u_TrData1 -+ -+typedef struct E3_IprocTrapData -+{ -+ E3_uint32 TrData[TRANS_DATA_WORDS]; -+} E3_IprocTrapData; -+ -+/* -+ * 64 kbytes of elan local memory. Must be aligned on a 64k boundary -+ */ -+#define E3_NonSysCntxQueueSize 0x400 -+#define E3_SysCntxQueueSize 0x100 -+ -+typedef struct _E3_TrapAndQueue -+{ -+ E3_DMA NonSysCntxDmaQueue[E3_NonSysCntxQueueSize]; /* 0x000000 */ -+ E3_DMA SysCntxDmaQueue[E3_SysCntxQueueSize]; /* 0x008000 */ -+ E3_EventInt EventIntQueue[E3_NonSysCntxQueueSize]; /* 0x00A000 */ -+ E3_ThreadQueue NonSysCntxThreadQueue[E3_NonSysCntxQueueSize]; /* 0x00C000 */ -+ E3_ThreadQueue SysCntxThreadQueue[E3_SysCntxQueueSize]; /* 0x00E000 */ -+ E3_FaultSave IProcSysCntx; /* 0x00E800 */ -+ E3_Addr Thread_SP_Save; /* 0x00E810 */ -+ E3_uint32 dummy0[3]; /* 0x00E814 */ -+ E3_FaultSave ThreadProcData; /* 0x00E820 */ -+ E3_FaultSave ThreadProcInst; /* 0x00E830 */ -+ E3_FaultSave dummy1[2]; /* 0x00E840 */ -+ E3_FaultSave ThreadProcOpen; /* 0x00E860 */ -+ E3_FaultSave dummy2; /* 0x00E870 */ -+ E3_FaultSave IProcNonSysCntx; /* 0x00E880 */ -+ E3_FaultSave DProc; /* 0x00E890 */ -+ E3_FaultSave CProc; /* 0x00E8A0 */ -+ E3_FaultSave TProc; /* 0x00E8B0 */ -+ E3_FaultSave DProcData0; /* 0x00E8C0 */ -+ E3_FaultSave DProcData1; /* 0x00E8D0 */ -+ E3_FaultSave DProcData2; /* 0x00E8E0 */ -+ E3_FaultSave DProcData3; /* 0x00E8F0 */ -+ E3_uint32 dummy3[0xc0]; /* 0x00E900 */ -+ E3_IprocTrapHeader VCh0_C0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh0_NonC0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh1_C0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh1_NonC0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh0_C0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh0_NonC0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh1_C0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh1_NonC0_TrData[MAX_TRAPPED_TRANS]; -+ E3_uint64 DmaOverflowQueueSpace[0x1000]; -+ E3_uint64 ThreadOverflowQueueSpace[0x800]; -+ E3_uint64 EventOverflowQueueSpace[0x800]; -+} E3_TrapAndQueue; -+ -+ -+typedef struct _E3_ContextControlBlock -+{ -+ E3_uint32 rootPTP; -+ E3_uint32 filter; -+ E3_uint32 VPT_ptr; -+ E3_uint32 VPT_mask; -+} E3_ContextControlBlock; -+ -+#define E3_CCB_CNTX0 (0x20000000) -+#define E3_CCB_DISCARD_ALL (0x40000000) -+#define E3_CCB_ACKOK_ALL (0x80000000) -+#define E3_CCB_MASK (0xc0000000) -+ -+#define E3_NUM_CONTEXT_0 (0x20) -+ -+/* Macros to manipulate event queue pointers */ -+/* generate index in EventIntQueue */ -+#define E3_EVENT_INTQ_INDEX(fptr) (((fptr) & 0x1fff) >> 3) -+/* generate next fptr */ -+#define E3_EVENT_INTQ_NEXT(fptr) ((((fptr) + 8) & ~0x4000) | 0x2000) -+ -+ -+#endif /* notdef _ELAN3_ELANREGS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elansyscall.h linux-2.6.9/include/elan3/elansyscall.h ---- clean/include/elan3/elansyscall.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elansyscall.h 2004-06-07 09:50:06.000000000 -0400 -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANSYSCALL_H -+#define __ELAN3_ELANSYSCALL_H -+ -+#ident "$Id: elansyscall.h,v 1.34 2004/06/07 13:50:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elansyscall.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#ifndef _ASM -+ -+typedef struct sys_word_item -+{ -+ struct sys_word_item *Next; -+ E3_uint32 Value; -+} SYS_WORD_ITEM; -+ -+typedef struct sys_block_item -+{ -+ struct sys_block_item *Next; -+ E3_uint32 *Pointer; -+} SYS_BLOCK_ITEM; -+ -+typedef struct sys_swap_space -+{ -+ int Magic; -+ void *ItemListsHead[MAX_LISTS]; -+ void **ItemListsTailp[MAX_LISTS]; -+} SYS_SWAP_SPACE; -+ -+typedef struct sys_exception -+{ -+ int Type; -+ int Proc; -+ u_long Res; -+ u_long Value; -+ E3_FaultSave_BE FaultArea; -+ -+ union -+ { -+ DMA_TRAP Dma; -+ THREAD_TRAP Thread; -+ COMMAND_TRAP Command; -+ INPUT_TRAP Input; -+ } Union; -+} SYS_EXCEPTION; -+ -+typedef struct sys_exception_space -+{ -+ struct sys_exception_space *Next; -+ int Magic; -+ int Front; -+ int Back; -+ int Count; -+ int Overflow; -+ SYS_EXCEPTION Exceptions[1]; -+} SYS_EXCEPTION_SPACE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct sys_ctxt -+{ -+ SYS_SWAP_SPACE *Swap; -+ SYS_EXCEPTION_SPACE *Exceptions; -+ kmutex_t Lock; -+ -+ spinlock_t WaitLock; -+ kcondvar_t NetworkErrorWait; -+ -+ int Armed; -+ int Backoff; -+ long Time; -+ -+ u_long Flags; -+ int signal; -+ -+ EVENT_COOKIE_TABLE *Table; -+} SYS_CTXT; -+ -+extern SYS_CTXT *sys_init (ELAN3_CTXT *ctxt); -+extern int sys_waitevent (ELAN3_CTXT *ctxt, E3_Event *event); -+extern void sys_addException (SYS_CTXT *sctx, int type, int proc, caddr_t ptr, int size, -+ E3_FaultSave_BE *, u_long res, u_long value); -+extern int sys_getException (SYS_CTXT *sctx, SYS_EXCEPTION *ex); -+ -+/* returns -ve error or ELAN_CAP_OK or ELAN_CAP_RMS */ -+/* use = ELAN_USER_ATTACH, ELAN_USER_P2P, ELAN_USER_BROADCAST */ -+extern int elan3_validate_cap (ELAN3_DEV *dev, ELAN_CAPABILITY *cap ,int use); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _ASM */ -+ -+/* values for "Flags" */ -+#define ELAN3_SYS_FLAG_DMA_BADVP 1 -+#define ELAN3_SYS_FLAG_THREAD_BADVP 2 -+#define ELAN3_SYS_FLAG_DMAFAIL 4 -+#define ELAN3_SYS_FLAG_NETERR 8 -+ -+#define SYS_SWAP_MAGIC 0xB23C52DF -+#define SYS_EXCEPTION_MAGIC 0xC34D63E0 -+ -+#define EXCEPTION_GLOBAL_STRING "elan3_exceptions" -+#define EXCEPTION_ABORT_STRING "elan3_abortstring" -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELANSYSCALL_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elanuregs.h linux-2.6.9/include/elan3/elanuregs.h ---- clean/include/elan3/elanuregs.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elanuregs.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,295 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANUREGS_H -+#define __ELAN3_ELANUREGS_H -+ -+#ident "$Id: elanuregs.h,v 1.10 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanuregs.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Statistic control reg values -+ * Each 4-bit nibble of the control word specifies what statistic -+ * is to be recorded in each of the 8 statistic counters -+ */ -+ -+/* Count reg 0 */ -+#define STC_INPUT_TRANSACTIONS 0 -+#define STP_DMA_EOP_WAIT_ACK 1 -+#define STP_THREAD_RUNNING 2 -+#define STP_UCODE_WAIT_MEM 3 -+#define STC_CACHE_WRITE_BACKS 4 -+#define STC_PCI_SLAVE_READS 5 -+#define STC_REG0_UNUSED6 6 -+#define STP_REG0_UNUSED7 7 -+ -+#define STATS_REG0_NAMES { \ -+ "STC_INPUT_TRANSACTIONS", \ -+ "STP_DMA_EOP_WAIT_ACK", \ -+ "STP_THREAD_RUNNING", \ -+ "STP_UCODE_WAIT_MEM", \ -+ "STC_CACHE_WRITE_BACKS", \ -+ "STC_PCI_SLAVE_READS", \ -+ "STC_REG0_UNUSED6", \ -+ "STP_REG0_UNUSED7" \ -+} -+ -+/* Count reg 1 */ -+#define STC_INPUT_WRITE_BLOCKS (0 << 4) -+#define STP_DMA_DATA_TRANSMITTING (1 << 4) -+#define STP_THEAD_WAITING_INST (2 << 4) -+#define STC_REG1_UNUSED3 (3 << 4) -+#define STP_FETCHING_ROUTES (4 << 4) -+#define STC_REG1_UNUSED5 (5 << 4) -+#define STC_PCI_SLAVE_WRITES (6 << 4) -+#define STP_PCI_SLAVE_READ_WAITING (7 << 4) -+ -+#define STATS_REG1_NAMES { \ -+ "STC_INPUT_WRITE_BLOCKS", \ -+ "STP_DMA_DATA_TRANSMITTING", \ -+ "STP_THEAD_WAITING_INST", \ -+ "STC_REG1_UNUSED3", \ -+ "STP_FETCHING_ROUTES", \ -+ "STC_REG1_UNUSED5", \ -+ "STC_PCI_SLAVE_WRITES", \ -+ "STP_PCI_SLAVE_READ_WAITING" \ -+} -+ -+/* Count reg 2 */ -+#define STC_INPUT_PKTS (0 << 8) -+#define STP_DMA_WAITING_MEM (1 << 8) -+#define STP_THREAD_WAIT_OPEN_PKT (2 << 8) -+#define STC_REG2_UNUSED3 (3 << 8) -+#define STC_ROUTE_FETCHES (4 << 8) -+#define STC_CACHE_NON_ALLOC_MISSES (5 << 8) -+#define STC_REG2_UNUSED6 (6 << 8) -+#define STP_PCI_SLAVE_WRITE_WAITING (7 << 8) -+ -+#define STATS_REG2_NAMES { \ -+ "STC_INPUT_PKTS", \ -+ "STP_DMA_WAITING_MEM", \ -+ "STP_THREAD_WAIT_OPEN_PKT", \ -+ "STC_REG2_UNUSED3", \ -+ "STC_ROUTE_FETCHES", \ -+ "STC_CACHE_NON_ALLOC_MISSES", \ -+ "STC_REG2_UNUSED6", \ -+ "STP_PCI_SLAVE_WRITE_WAITING" \ -+} -+ -+/* Count reg 3 */ -+#define STC_INPUT_PKTS_REJECTED (0 << 12) -+#define STP_DMA_WAIT_NETWORK_BUSY (1 << 12) -+#define STP_THREAD_WAIT_PACK (2 << 12) -+#define STP_UCODE_BLOCKED_UCODE (3 << 12) -+#define STC_TLB_HITS (4 << 12) -+#define STC_REG3_UNUSED5 (5 << 12) -+#define STC_PCI_MASTER_READS (6 << 12) -+#define STP_PCI_MASTER_WRITE_WAITING (7 << 12) -+ -+#define STATS_REG3_NAMES { \ -+ "STC_INPUT_PKTS_REJECTED", \ -+ "STP_DMA_WAIT_NETWORK_BUSY", \ -+ "STP_THREAD_WAIT_PACK", \ -+ "STP_UCODE_BLOCKED_UCODE", \ -+ "STC_TLB_HITS", \ -+ "STC_REG3_UNUSED5", \ -+ "STC_PCI_MASTER_READS", \ -+ "STP_PCI_MASTER_WRITE_WAITING"\ -+} -+ -+/* Count reg 4 */ -+#define STP_INPUT_DATA_TRANSMITTING (0 << 16) -+#define STC_DMA_NON_CTX0_PKTS (1 << 16) -+#define STP_THREAD_EOP_WAIT_ACK (2 << 16) -+#define STP_UCODE_DPROC_RUNNING (3 << 16) -+#define STC_TLB_MEM_WALKS (4 << 16) -+#define STC_REG4_UNUSED5 (5 << 16) -+#define STC_PCI_MASTER_WRITES (6 << 16) -+#define STP_PCI_MASTER_READ_WAITING (7 << 16) -+ -+#define STATS_REG4_NAMES { \ -+ "STP_INPUT_DATA_TRANSMITTING", \ -+ "STC_DMA_NON_CTX0_PKTS", \ -+ "STP_THREAD_EOP_WAIT_ACK", \ -+ "STP_UCODE_DPROC_RUNNING", \ -+ "STC_TLB_MEM_WALKS", \ -+ "STC_REG4_UNUSED5", \ -+ "STC_PCI_MASTER_WRITES", \ -+ "STP_PCI_MASTER_READ_WAITING" \ -+} -+ -+/* Count reg 5 */ -+#define STP_INPUT_WAITING_NETWORK_DATA (0 << 20) -+#define STC_DMA_NON_CTX0_PKTS_REJECTED (1 << 20) -+#define STP_THREAD_WAITING_DATA (2 << 20) -+#define STP_UCODE_CPROC_RUNNING (3 << 20) -+#define STP_THREAD_TRANSMITTING_DATA (4 << 20) -+#define STP_PCI_WAITING_MAIN (5 << 20) -+#define STC_REG5_UNUSED6 (6 << 20) -+#define STC_REG5_UNUSED7 (7 << 20) -+ -+#define STATS_REG5_NAMES { \ -+ "STP_INPUT_WAITING_NETWORK_DATA", \ -+ "STC_DMA_NON_CTX0_PKTS_REJECTED", \ -+ "STP_THREAD_WAITING_DATA", \ -+ "STP_UCODE_CPROC_RUNNING", \ -+ "STP_THREAD_TRANSMITTING_DATA", \ -+ "STP_PCI_WAITING_MAIN", \ -+ "STC_REG5_UNUSED6", \ -+ "STC_REG5_UNUSED7" \ -+} -+ -+/* Count reg 6 */ -+#define STP_INPUT_WAITING_MEMORY (0 << 24) -+#define STC_DMA_CTX0_PKTS (1 << 24) -+#define STP_THREAD_WAITING_MEMORY (2 << 24) -+#define STP_UCODE_TPROC_RUNNING (3 << 24) -+#define STC_CACHE_HITS (4 << 24) -+#define STP_PCI_WAITING_ELAN (5 << 24) -+#define STC_REG6_UNUSED4 (6 << 24) -+#define STC_REG6_UNUSED7 (7 << 24) -+ -+#define STATS_REG6_NAMES { \ -+ "STP_INPUT_WAITING_MEMORY", \ -+ "STC_DMA_CTX0_PKTS", \ -+ "STP_THREAD_WAITING_MEMORY", \ -+ "STP_UCODE_TPROC_RUNNING", \ -+ "STC_CACHE_HITS", \ -+ "STP_PCI_WAITING_ELAN", \ -+ "STC_REG6_UNUSED4", \ -+ "STC_REG6_UNUSED7" \ -+} -+ -+/* Count reg 7 */ -+#define STC_INPUT_CTX_FILTER_FILL (0 << 28) -+#define STC_DMA_CTX0_PKTS_REJECTED (1 << 28) -+#define STP_THREAD_WAIT_NETWORK_BUSY (2 << 28) -+#define STP_UCODE_IPROC_RUNNING (3 << 28) -+#define STP_TLB_MEM_WALKING (4 << 28) -+#define STC_CACHE_ALLOC_MISSES (5 << 28) -+#define STP_PCI_DATA_TRANSFER (6 << 28) -+#define STC_REG7_UNUSED7 (7 << 28) -+ -+#define STATS_REG7_NAMES { \ -+ "STC_INPUT_CTX_FILTER_FILL", \ -+ "STC_DMA_CTX0_PKTS_REJECTED", \ -+ "STP_THREAD_WAIT_NETWORK_BUSY",\ -+ "STP_UCODE_IPROC_RUNNING", \ -+ "STP_TLB_MEM_WALKING", \ -+ "STC_CACHE_ALLOC_MISSES", \ -+ "STP_PCI_DATA_TRANSFER", \ -+ "STC_REG7_UNUSED7" \ -+} -+ -+#define STATS_REG_NAMES { \ -+ STATS_REG0_NAMES, \ -+ STATS_REG1_NAMES, \ -+ STATS_REG2_NAMES, \ -+ STATS_REG3_NAMES, \ -+ STATS_REG4_NAMES, \ -+ STATS_REG5_NAMES, \ -+ STATS_REG6_NAMES, \ -+ STATS_REG7_NAMES, \ -+} -+ -+extern const char *elan3_stats_names[8][8]; -+ -+#define ELAN3_STATS_NAME(COUNT, CONTROL) (elan3_stats_names[(COUNT)][(CONTROL) & 7]) -+ -+typedef volatile union e3_StatsControl -+{ -+ E3_uint32 StatsControl; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 StatCont0:4; -+ E3_uint32 StatCont1:4; -+ E3_uint32 StatCont2:4; -+ E3_uint32 StatCont3:4; -+ E3_uint32 StatCont4:4; -+ E3_uint32 StatCont5:4; -+ E3_uint32 StatCont6:4; -+ E3_uint32 StatCont7:4; -+#else -+ E3_uint32 StatCont7:4; -+ E3_uint32 StatCont6:4; -+ E3_uint32 StatCont5:4; -+ E3_uint32 StatCont4:4; -+ E3_uint32 StatCont3:4; -+ E3_uint32 StatCont2:4; -+ E3_uint32 StatCont1:4; -+ E3_uint32 StatCont0:4; -+#endif -+ } s; -+} E3_StatsControl; -+ -+typedef volatile union e3_StatsCount -+{ -+ E3_uint64 ClockStat; -+ struct -+ { -+ E3_uint32 ClockLSW; /* read only */ -+ E3_uint32 StatsCount; -+ } s; -+} E3_StatsCount; -+ -+typedef volatile union e3_clock -+{ -+ E3_uint64 NanoSecClock; -+ struct -+ { -+ E3_uint32 ClockLSW; -+ E3_uint32 ClockMSW; -+ } s; -+} E3_Clock; -+#define E3_TIME( X ) ((X).NanoSecClock) -+ -+typedef volatile struct _E3_User_Regs -+{ -+ E3_StatsCount StatCounts[8]; -+ E3_StatsCount InstCount; -+ E3_uint32 pad0; -+ E3_StatsControl StatCont; -+ E3_Clock Clock; -+ E3_uint32 pad1[0x7ea]; -+} E3_User_Regs; -+ -+typedef volatile struct _E3_CommandPort -+{ -+ E3_Addr PutDma; /* 0x000 */ -+ E3_uint32 Pad1; -+ E3_Addr GetDma; /* 0x008 */ -+ E3_uint32 Pad2; -+ E3_Addr RunThread; /* 0x010 */ -+ E3_uint32 Pad3[3]; -+ E3_Addr WaitEvent0; /* 0x020 */ -+ E3_uint32 Pad4; -+ E3_Addr WaitEvent1; /* 0x028 */ -+ E3_uint32 Pad5; -+ E3_Addr SetEvent; /* 0x030 */ -+ E3_uint32 Pad6[3]; -+ E3_uint32 Pad7[0x7f0]; /* Fill out to an 8K page */ -+} E3_CommandPort; -+/* Should have the new structures for the top four pages of the elan3 space */ -+ -+#define E3_COMMANDPORT_SIZE (sizeof (E3_CommandPort)) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELANUREGS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/elanvp.h linux-2.6.9/include/elan3/elanvp.h ---- clean/include/elan3/elanvp.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/elanvp.h 2004-06-18 05:28:06.000000000 -0400 -@@ -0,0 +1,165 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANVP_H -+#define _ELAN3_ELANVP_H -+ -+#ident "$Id: elanvp.h,v 1.45 2004/06/18 09:28:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanvp.h,v $ */ -+ -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Context number allocation. -+ * [0-31] system contexts -+ * [32-63] hardware test -+ * [64-1023] available -+ * [1024-2047] RMS allocatable -+ * [2048-4095] kernel comms data contexts -+ */ -+#define ELAN3_KCOMM_CONTEXT_NUM 0x001 /* old kernel comms context (system) */ -+#define ELAN3_CM_CONTEXT_NUM 0x002 /* new cluster member ship comms context (system) */ -+#define ELAN3_MRF_CONTEXT_NUM 0x003 /* multi-rail kernel comms context */ -+#define ELAN3_DMARING_BASE_CONTEXT_NUM 0x010 /* 16 contexts for dma ring issue (system) */ -+#define ELAN3_DMARING_TOP_CONTEXT_NUM 0x01f -+ -+#define ELAN3_HWTEST_BASE_CONTEXT_NUM 0x020 /* reserved for hardware test */ -+#define ELAN3_HWTEST_TOP_CONTEXT_NUM 0x03f -+ -+#define ELAN3_KCOMM_BASE_CONTEXT_NUM 0x800 /* kernel comms data transfer contexts */ -+#define ELAN3_KCOMM_TOP_CONTEXT_NUM 0xfff -+ -+#define ELAN3_HWTEST_CONTEXT(ctx) ((ctx) >= ELAN3_HWTEST_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN3_HWTEST_TOP_CONTEXT_NUM) -+ -+#define ELAN3_SYSTEM_CONTEXT(ctx) (((ctx) & SYS_CONTEXT_BIT) != 0 || \ -+ (ctx) < E3_NUM_CONTEXT_0 || \ -+ (ctx) >= ELAN3_KCOMM_BASE_CONTEXT_NUM) -+ -+/* Maximum number of virtual processes */ -+#define ELAN3_MAX_VPS (16384) -+ -+#define ELAN3_INVALID_PROCESS (0x7fffffff) /* A GUARANTEED invalid process # */ -+#define ELAN3_INVALID_NODE (0xFFFF) -+#define ELAN3_INVALID_CONTEXT (0xFFFF) -+ -+ -+ -+#if defined(__KERNEL__) && !defined(__ELAN3__) -+ -+/* -+ * Contexts are accessible via Elan capabilities, -+ * for each context that can be "attached" to there -+ * is a ELAN3_CTXT_INFO structure created by its -+ * "owner". This also "remembers" all remote -+ * segments that have "blazed" a trail to it. -+ * -+ * If the "owner" goes away the soft info is -+ * destroyed when it is no longer "attached" or -+ * "referenced" by a remote segment. -+ * -+ * If the owner changes the capability, then -+ * the soft info must be not "referenced" or -+ * "attached" before a new process can "attach" -+ * to it. -+ */ -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::InfoLock, -+ elan3_info::Next elan3_info::Prev elan3_info::Device elan3_info::Owner -+ elan3_info::Capability elan3_info::AttachedCapability elan3_info::Context)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3_info::Nacking elan3_info::Disabled)) -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_info::Context elan3_info::Device elan3_info::Capability)) -+ -+#endif /* __KERNEL__ */ -+ -+#define LOW_ROUTE_PRIORITY 0 -+#define HIGH_ROUTE_PRIORITY 1 -+ -+#define DEFAULT_ROUTE_TIMEOUT 3 -+#define DEFAULT_ROUTE_PRIORITY LOW_ROUTE_PRIORITY -+ -+ -+/* a small route is 4 flits (8 bytes), a big route */ -+/* is 8 flits (16 bytes) - each packed route is 4 bits */ -+/* so giving us a maximum of 28 as flit0 does not contain */ -+/* packed routes */ -+#define MAX_FLITS 8 -+#define MAX_PACKED 28 -+ -+/* bit definitions for 64 bit route pointer */ -+#define ROUTE_VALID (1ULL << 63) -+#define ROUTE_PTR (1ULL << 62) -+#define ROUTE_CTXT_SHIFT 48 -+#define ROUTE_PTR_MASK ((1ull << ROUTE_CTXT_SHIFT)-1) -+#define ROUTE_GET_CTXT ((VAL >> ROUTE_CTXT_SHIFT) & 0x3fff ) -+ -+#define SMALL_ROUTE(flits, context) (((E3_uint64) (flits)[0] << 0) | ((E3_uint64) (flits)[1] << 16) | \ -+ ((E3_uint64) (flits)[2] << 32) | ((E3_uint64) (context) << ROUTE_CTXT_SHIFT) | \ -+ ROUTE_VALID) -+ -+#define BIG_ROUTE_PTR(paddr, context) ((E3_uint64) (paddr) | ((E3_uint64) context << ROUTE_CTXT_SHIFT) | ROUTE_VALID | ROUTE_PTR) -+ -+#define BIG_ROUTE0(flits) (((E3_uint64) (flits)[0] << 0) | ((E3_uint64) (flits)[1] << 16) | \ -+ ((E3_uint64) (flits)[2] << 32) | ((E3_uint64) (flits)[3] << 48)) -+#define BIG_ROUTE1(flits) (((E3_uint64) (flits)[4] << 0) | ((E3_uint64) (flits)[5] << 16) | \ -+ ((E3_uint64) (flits)[6] << 32) | ((E3_uint64) (flits)[7] << 48)) -+ -+ -+/* defines for first flit of a route */ -+#define FIRST_HIGH_PRI (1 << 15) -+#define FIRST_AGE(Val) ((Val) << 11) -+#define FIRST_TIMEOUT(Val) ((Val) << 9) -+#define FIRST_PACKED(X) ((X) << 7) -+#define FIRST_ROUTE(Val) (Val) -+#define FIRST_ADAPTIVE (0x30) -+#define FIRST_BCAST_TREE (0x20) -+#define FIRST_MYLINK (0x10) -+#define FIRST_BCAST(Top, Bot) (0x40 | ((Top) << 3) | (Bot)) -+ -+/* defines for 3 bit packed entries for subsequent flits */ -+#define PACKED_ROUTE(Val) (8 | (Val)) -+#define PACKED_ADAPTIVE (3) -+#define PACKED_BCAST_TREE (2) -+#define PACKED_MYLINK (1) -+#define PACKED_BCAST0(Top,Bot) (4 | (Bot & 3)) -+#define PACKED_BCAST1(Top,Bot) ((Top << 1) | (Bot >> 2)) -+ -+/* ---------------------------------------------------------- -+ * elan3_route functions -+ * return ELAN3_ROUTE_xxx codes -+ * ---------------------------------------------------------- */ -+ -+#define ELAN3_ROUTE_SUCCESS (0x00) -+#define ELAN3_ROUTE_SYSCALL_FAILED (0x01) -+#define ELAN3_ROUTE_INVALID (0x02) -+#define ELAN3_ROUTE_TOO_LONG (0x04) -+#define ELAN3_ROUTE_LOAD_FAILED (0x08) -+#define ELAN3_ROUTE_PROC_RANGE (0x0f) -+#define ELAN3_ROUTE_INVALID_LEVEL (0x10) -+#define ELAN3_ROUTE_OCILATES (0x20) -+#define ELAN3_ROUTE_WRONG_DEST (0x40) -+#define ELAN3_ROUTE_TURN_LEVEL (0x80) -+#define ELAN3_ROUTE_NODEID_UNKNOWN (0xf0) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _ELAN3_ELANVP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/events.h linux-2.6.9/include/elan3/events.h ---- clean/include/elan3/events.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/events.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,183 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_EVENTS_H -+#define _ELAN3_EVENTS_H -+ -+#ident "$Id: events.h,v 1.45 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/events.h,v $*/ -+ -+/* -+ * Alignments for events, event queues and blockcopy blocks. -+ */ -+#define E3_EVENT_ALIGN (8) -+#define E3_QUEUE_ALIGN (32) -+#define E3_BLK_ALIGN (64) -+#define E3_BLK_SIZE (64) -+#define E3_BLK_PATTERN (0xfeedface) -+ -+#define E3_EVENT_FREE ((0 << 4) | EV_WCOPY) -+#define E3_EVENT_PENDING ((1 << 4) | EV_WCOPY) -+#define E3_EVENT_ACTIVE ((2 << 4) | EV_WCOPY) -+#define E3_EVENT_FIRED ((3 << 4) | EV_WCOPY) -+#define E3_EVENT_FAILED ((4 << 4) | EV_WCOPY) -+#define E3_EVENT_DONE ((5 << 4) | EV_WCOPY) -+#define E3_EVENT_PRIVATE ((6 << 4) | EV_WCOPY) -+ -+/* -+ * Event values and masks -+ * -+ * Block Copy event xxxxxxxxxxxxxxxx1 -+ * Chained event 30 bit ptr ....0x -+ * Event interrupt 29 bit cookie 01x -+ * Dma event 28 bit ptr 011x -+ * thread event 28 bit ptr 111x -+ */ -+#define EV_CLEAR (0x00000000) -+#define EV_TYPE_BCOPY (0x00000001) -+#define EV_TYPE_CHAIN (0x00000000) -+#define EV_TYPE_EVIRQ (0x00000002) -+#define EV_TYPE_DMA (0x00000006) -+#define EV_TYPE_THREAD (0x0000000e) -+ -+#define EV_TYPE_BCOPY_BYTE (0) -+#define EV_TYPE_BCOPY_HWORD (1) -+#define EV_TYPE_BCOPY_WORD (2) -+#define EV_TYPE_BCOPY_DWORD (3) -+ -+/* -+ * Data type is in the lowest two bits of the Dest pointer. -+ */ -+#define EV_BCOPY_DTYPE_MASK (3) -+#define EV_WCOPY (1) /* [DestWord] = Source */ -+#define EV_BCOPY (0) /* [DestBlock] = [SourceBlock] */ -+ -+#define EV_TYPE_MASK (0x0000000e) -+#define EV_TYPE_MASK_BCOPY (0x00000001) -+#define EV_TYPE_MASK_CHAIN (0x00000002) -+#define EV_TYPE_MASK_EVIRQ (0x00000006) -+#define EV_TYPE_MASK_DMA (0x0000000e) -+#define EV_TYPE_MASK_THREAD (0x0000000e) -+#define EV_TYPE_MASK2 (0x0000000f) -+ -+/* -+ * Min/Max size for Elan queue entries -+ */ -+#define E3_QUEUE_MIN E3_BLK_SIZE -+#define E3_QUEUE_MAX (E3_BLK_SIZE * 5) -+ -+/* -+ * Elan queue state bits -+ */ -+#define E3_QUEUE_FULL (1<<0) -+#define E3_QUEUE_LOCKED (1<<8) -+ -+#ifndef _ASM -+ -+typedef union _E3_Event -+{ -+ E3_uint64 ev_Int64; -+ struct { -+ volatile E3_int32 u_Count; -+ E3_uint32 u_Type; -+ } ev_u; -+} E3_Event; -+ -+typedef union _E3_BlockCopyEvent -+{ -+ E3_uint64 ev_ForceAlign; -+ struct E3_BlockCopyEvent_u { -+ volatile E3_int32 u_Count; -+ E3_uint32 u_Type; -+ E3_Addr u_Source; -+ E3_Addr u_Dest; /* lowest bits are the data type for endian conversion */ -+ } ev_u; -+} E3_BlockCopyEvent; -+ -+#define ev_Type ev_u.u_Type -+#define ev_Count ev_u.u_Count -+#define ev_Source ev_u.u_Source -+#define ev_Dest ev_u.u_Dest -+ -+typedef union _E3_WaitEvent0 -+{ -+ E3_uint64 we_ForceAlign; -+ struct { -+ E3_Addr u_EventLoc; -+ E3_int32 u_WaitCount; -+ } we_u; -+} E3_WaitEvent0; -+#define we_EventLoc we_u.u_EventLoc -+#define we_WaitCount we_u.u_WaitCount -+ -+typedef union _E3_Event_Blk -+{ -+ E3_uint8 eb_Bytes[E3_BLK_SIZE]; -+ E3_uint32 eb_Int32[E3_BLK_SIZE/sizeof (E3_uint32)]; -+ E3_uint64 eb_Int64[E3_BLK_SIZE/sizeof (E3_uint64)]; -+} E3_Event_Blk; -+ -+/* We make eb_done the last word of the blk -+ * so that we can guarantee the rest of the blk is -+ * correct when this value is set. -+ * However, when the TPORT code copies the envelope -+ * info into the blk, it uses a dword endian type. -+ * Thus we must correct for this when initialising -+ * the pattern in the Elan SDRAM blk (eeb_done) -+ */ -+#define eb_done eb_Int32[15] -+#define eeb_done eb_Int32[15^WordEndianFlip] -+ -+#define EVENT_WORD_READY(WORD) (*((volatile E3_uint32 *) WORD) != 0) -+#define EVENT_BLK_READY(BLK) (((volatile E3_Event_Blk *) (BLK))->eb_done != 0) -+#define EVENT_READY(EVENT) (((volatile E3_Event *) (EVENT))->ev_Count <= 0) -+ -+#define ELAN3_WAIT_EVENT (0) -+#define ELAN3_POLL_EVENT (-1) -+ -+#define SETUP_EVENT_TYPE(ptr,typeval) (((unsigned long)(ptr)) | (typeval)) -+ -+#define E3_RESET_BCOPY_BLOCK(BLK) \ -+ do { \ -+ (BLK)->eb_done = 0; \ -+ } while (0) -+ -+typedef struct e3_queue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_Event q_event; /* queue event */ -+} E3_Queue; -+ -+typedef struct e3_blockcopy_queue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_BlockCopyEvent q_event; /* queue event */ -+ E3_uint32 q_pad[6]; -+} E3_BlockCopyQueue; -+ -+#define E3_QUEUE_EVENT_OFFSET 24 -+#define QUEUE_FULL(Q) ((Q)->q_state & E3_QUEUE_FULL) -+ -+#endif /* ! _ASM */ -+ -+#endif /* _ELAN3_EVENTS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/intrinsics.h linux-2.6.9/include/elan3/intrinsics.h ---- clean/include/elan3/intrinsics.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/intrinsics.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,320 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_INTRINSICS_H -+#define _ELAN3_INTRINSICS_H -+ -+#ident "$Id: intrinsics.h,v 1.35 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/intrinsics.h,v $ */ -+ -+#include -+#include -+ -+/* -+ * This file contains definitions of the macros for accessing the QSW -+ * specific instructions, as if they were functions. -+ * The results from the function -+ */ -+ -+#define C_ACK_OK 0 /* return from c_close() */ -+#define C_ACK_TESTFAIL 1 /* return from c_close() */ -+#define C_ACK_DISCARD 2 /* return from c_close() */ -+#define C_ACK_ERROR 3 /* return from c_close() */ -+ -+/* -+ * Elan asi's for tproc block accesses -+ */ -+#define EASI_BYTE 0 -+#define EASI_HALF 1 -+#define EASI_WORD 2 -+#define EASI_DOUBLE 3 -+ -+#if defined(__ELAN3__) && !defined (_ASM) -+ -+extern inline void c_abort(void) -+{ -+ asm volatile (".word 0x0000 ! die you thread you " : : ); -+} -+ -+extern inline void c_suspend(void) -+{ -+ asm volatile ( -+ "set 1f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 1f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "suspend ! do the real suspend\n" -+ "1: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "suspend ! do the real suspend\n" : : ); -+} -+ -+extern inline int c_close(void) -+{ -+ register int rc asm("o0"); -+ -+ asm volatile ("close %0" : "=r" (rc) : ); -+ -+ return (rc); -+} -+ -+extern inline int c_close_cookie(volatile E3_uint32 *cookiep, E3_uint32 next) -+{ -+ register int rc asm("o0"); -+ -+ asm volatile ("close %0 ! close the packet\n" -+ "bz,a 1f ! ack received\n" -+ "st %1, [%2] ! update cookie on ack\n" -+ "1: ! label for not-ack\n" -+ : "=r" (rc) : "r" (next), "r" (cookiep)); -+ -+ return (rc); -+} -+ -+extern inline void c_break_busywait(void) -+{ -+ asm volatile ( -+ "breaktest ! test to see if break necessary\n" -+ "bpos 1f ! no other thread ready\n" -+ "nop ! delay slot\n" -+ "sub %%sp,3*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%g0,[%%sp+0] ! save the globals\n" -+ "stblock %%i0,[%%sp+8*4] ! save the ins\n" -+ "stblock %%l0,[%%sp+16*4] ! save the locals\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+16*4],%%l0 ! RevB bug fix. restore locals in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "ldblock [%%sp+16*4],%%l0 ! restore locals\n" -+ "4: ldblock [%%sp+8*4], %%i0 ! restore ins\n" -+ "ldblock [%%sp+0],%%g0 ! restore globals\n" -+ "add %%sp,3*8*4,%%sp ! restore stack pointer\n" -+ "1: " : : ); -+} -+ -+extern inline void c_break(void) -+{ -+ asm volatile ( -+ "breaktest ! test to see if break necessary\n" -+ "bne 1f ! haven't exceeded our inst count yet\n" -+ "nop ! delay slot\n" -+ "sub %%sp,3*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%g0,[%%sp+0] ! save the globals\n" -+ "stblock %%i0,[%%sp+8*4] ! save the ins\n" -+ "stblock %%l0,[%%sp+16*4] ! save the locals\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+16*4],%%l0 ! RevB bug fix. restore locals in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "ldblock [%%sp+16*4],%%l0 ! restore locals\n" -+ "4: ldblock [%%sp+8*4], %%i0 ! restore ins\n" -+ "ldblock [%%sp+0],%%g0 ! restore globals\n" -+ "add %%sp,3*8*4,%%sp ! restore stack pointer\n" -+ "1: " : : ); -+} -+ -+extern inline void c_open( const int arg ) -+{ -+ asm volatile ("open %0" : : "r" (arg) ); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+} -+ -+extern inline void c_waitevent( volatile E3_Event *const ptr, -+ const int count) -+{ -+ register volatile E3_Event *a_unlikely asm("o0") = ptr; -+ register int a_very_unlikely asm("o1") = count; -+ -+ asm volatile ( -+ "sub %%sp,1*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%i0,[%%sp+0] ! save the ins\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i4 ! RevB bug fix. data fetch of instructions\n" -+ "waitevent ! do the business\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+0],%%i0 ! RevB bug fix. restore ins in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i4 ! RevB bug fix. data fetch of instructions\n" -+ "waitevent ! do the business\n" -+ "ldblock [%%sp+0],%%i0 ! restore ins\n" -+ "4: add %%sp,1*8*4,%%sp ! restore stack pointer\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (a_unlikely), "r" (a_very_unlikely) -+ : /* clobbered */ "g0", "g1", "g2", "g3", "g4", "g5", "g6", "g7", -+ "l0", "l1", "l2", "l3", "l4", "l5", "l6", "l7" ); -+ -+} -+ -+#define c_sendtrans0(type,dest) \ -+ asm volatile ("sendtrans %0, %%g0, %1" : : "i" (type), "r" (dest)) -+ -+#define c_sendtrans1(type,dest,arg) \ -+ asm volatile ("sendtrans %0, %2, %1" : : "i" (type), "r" (dest), "r" (arg)) -+ -+#define c_sendtrans2(type,dest,arg1,arg2) \ -+ do { \ -+ register const unsigned long a_unlikely_1 asm("o4") = arg1; \ -+ register const unsigned long a_unlikely_2 asm("o5") = arg2; \ -+ asm volatile ("sendtrans %0, %2, %1" \ -+ : : "i" (type), "r" (dest), "r" (a_unlikely_1), "r" (a_unlikely_2)); \ -+ } while(0) -+ -+#define c_sendmem(type,dest,ptr) \ -+ asm volatile ("sendtrans %0, [%2], %1" : : "i" (type), "r" (dest), "r" (ptr)) -+ -+/* Copy a single 64-byte block (src blk is read using a BYTE endian type) */ -+extern inline void elan3_copy64b(void *src, void *dst) -+{ -+ /* Copy 64 bytes using ldblock/stblock -+ * We save and restore the locals/ins because if we don't gcc -+ * really makes a bad job of optimisising the rest of the thread code! -+ * -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ register void *tmp1 asm("g5") = src; -+ register void *tmp2 asm("g6") = dst; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "sub %%sp,2*8*4,%%sp ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ "ldblock64a [%0]%2,%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64a %%l0,[%1]%2 ! store 64-byte block from local/ins\n" -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7, %%sp ! undo alignment\n" -+ "add %%sp,2*8*4,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (tmp1), "r" (tmp2), "n" (EASI_BYTE) -+ : /* clobbered */ "g5", "g6", "g7" ); -+} -+ -+/* Copy a single 64-byte block (src blk is read using a WORD endian type) */ -+extern inline void elan3_copy64w(void *src, void *dst) -+{ -+ /* Copy 64 bytes using ldblock/stblock -+ * We save and restore the locals/ins because if we don't gcc -+ * really makes a bad job of optimisising the rest of the thread code! -+ * -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ register void *tmp1 asm("g5") = src; -+ register void *tmp2 asm("g6") = dst; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "sub %%sp,2*8*4,%%sp ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ "ldblock64a [%0]%2,%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64a %%l0,[%1]%2 ! store 64-byte block from local/ins\n" -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7, %%sp ! undo alignment\n" -+ "add %%sp,2*8*4,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (tmp1), "r" (tmp2), "n" (EASI_WORD) -+ : /* clobbered */ "g5", "g6", "g7" ); -+} -+ -+/* Read a 64-bit value with a WORD (32-bit) endian type */ -+extern inline E3_uint64 elan3_read64w( volatile E3_uint64 *const ptr ) -+{ -+ E3_uint64 result; -+ -+ asm volatile ( -+ "ldblock8a [%1]%2, %0\n" -+ : /* outputs */ "=r" (result) -+ : /* inputs */ "r" (ptr), "n" (EASI_WORD) ); -+ -+ return( result ); -+} -+ -+/* Read a 64-bit value with a DOUBLEWORD (64-bit) endian type */ -+extern inline E3_uint64 elan3_read64dw( volatile E3_uint64 *const ptr ) -+{ -+ E3_uint64 result; -+ -+ asm volatile ( -+ "ldblock8a [%1]%2, %0\n" -+ : /* outputs */ "=r" (result) -+ : /* inputs */ "r" (ptr), "n" (EASI_DOUBLE) ); -+ -+ return( result ); -+} -+ -+/* Write a 32-bit value with a WORD (32-bit) endian type */ -+extern inline void elan3_write64w( volatile E3_uint64 *const ptr, E3_uint64 value ) -+{ -+ asm volatile ( -+ "stblock8a %1, [%0]%2\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (ptr), "r" (value), "n" (EASI_WORD) ); -+} -+ -+/* Write a 64-bit value with a DOUBLEWORD (64-bit) endian type */ -+extern inline void elan3_write64dw( volatile E3_uint64 *const ptr, E3_uint64 value ) -+{ -+ asm volatile ( -+ "stblock8a %1, [%0]%2\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (ptr), "r" (value), "n" (EASI_DOUBLE) ); -+} -+ -+extern inline E3_uint32 c_swap(volatile E3_uint32 *source, E3_uint32 result) -+{ -+ asm volatile("swap [%1],%0\n" -+ : "=r" (result) -+ : "r" (source) ,"0" (result) -+ : "memory"); -+ return result; -+} -+ -+extern inline E3_uint32 c_swap_save(volatile E3_uint32 *source, const E3_uint32 result) -+{ -+ register E3_uint32 a_unlikely; -+ asm volatile("" : "=r" (a_unlikely) : ); -+ -+ asm volatile("mov %2,%0; swap [%1],%0\n" -+ : "=r" (a_unlikely) -+ : "r" (source) ,"r" (result), "0" (a_unlikely) -+ : "memory"); -+ return a_unlikely; -+} -+#endif /* (__ELAN3__) && !(_ASM) */ -+ -+#endif /* _ELAN3_INTRINSICS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/minames.h linux-2.6.9/include/elan3/minames.h ---- clean/include/elan3/minames.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/minames.h 2005-09-07 10:39:37.000000000 -0400 -@@ -0,0 +1,256 @@ -+{MI_WaitForRemoteDescRead, "MI_WaitForRemoteDescRead"}, -+{MI_WaitForRemoteDescRead2, "MI_WaitForRemoteDescRead2"}, -+{MI_WaitForRemoteDescRead2_seq1, "MI_WaitForRemoteDescRead2_seq1"}, -+{MI_SendRemoteDmaRoutes, "MI_SendRemoteDmaRoutes"}, -+{MI_IProcTrapped, "MI_IProcTrapped"}, -+{MI_DProcTrapped, "MI_DProcTrapped"}, -+{MI_CProcTrapped, "MI_CProcTrapped"}, -+{MI_TProcTrapped, "MI_TProcTrapped"}, -+{MI_TestWhichDmaQueue, "MI_TestWhichDmaQueue"}, -+{MI_TestWhichDmaQueue_seq1, "MI_TestWhichDmaQueue_seq1"}, -+{MI_InputRemoteDmaUpdateBPtr, "MI_InputRemoteDmaUpdateBPtr"}, -+{MI_FixupQueueContextAndRemoteBit, "MI_FixupQueueContextAndRemoteBit"}, -+{MI_FixupQueueContextAndRemoteBit_seq1, "MI_FixupQueueContextAndRemoteBit_seq1"}, -+{MI_FixupQueueContextAndRemoteBit_seq2, "MI_FixupQueueContextAndRemoteBit_seq2"}, -+{MI_FixupQueueContextAndRemoteBit_seq3, "MI_FixupQueueContextAndRemoteBit_seq3"}, -+{MI_FixupQueueContextAndRemoteBit_seq4, "MI_FixupQueueContextAndRemoteBit_seq4"}, -+{MI_RunDmaCommand, "MI_RunDmaCommand"}, -+{MI_DoSendRemoteDmaDesc, "MI_DoSendRemoteDmaDesc"}, -+{MI_DequeueNonSysCntxDma, "MI_DequeueNonSysCntxDma"}, -+{MI_WaitForRemoteDescRead1, "MI_WaitForRemoteDescRead1"}, -+{MI_RemoteDmaCommand, "MI_RemoteDmaCommand"}, -+{MI_WaitForRemoteRoutes, "MI_WaitForRemoteRoutes"}, -+{MI_DequeueSysCntxDma, "MI_DequeueSysCntxDma"}, -+{MI_ExecuteDmaDescriptorForQueue, "MI_ExecuteDmaDescriptorForQueue"}, -+{MI_ExecuteDmaDescriptor1, "MI_ExecuteDmaDescriptor1"}, -+{MI_ExecuteDmaDescriptor1_seq1, "MI_ExecuteDmaDescriptor1_seq1"}, -+{MI_ExecuteDmaDescriptor1_seq2, "MI_ExecuteDmaDescriptor1_seq2"}, -+{MI_ExecuteDmaDescriptor1_seq3, "MI_ExecuteDmaDescriptor1_seq3"}, -+{MI_GetNewSizeInProg, "MI_GetNewSizeInProg"}, -+{MI_GetNewSizeInProg_seq1, "MI_GetNewSizeInProg_seq1"}, -+{MI_FirstBlockRead, "MI_FirstBlockRead"}, -+{MI_ExtraFirstBlockRead, "MI_ExtraFirstBlockRead"}, -+{MI_UnimplementedError, "MI_UnimplementedError"}, -+{MI_UpdateDescriptor, "MI_UpdateDescriptor"}, -+{MI_UpdateDescriptor_seq1, "MI_UpdateDescriptor_seq1"}, -+{MI_UpdateDescriptor_seq2, "MI_UpdateDescriptor_seq2"}, -+{MI_UpdateDescriptor_seq3, "MI_UpdateDescriptor_seq3"}, -+{MI_UpdateDescriptor_seq4, "MI_UpdateDescriptor_seq4"}, -+{MI_UpdateDescriptor_seq5, "MI_UpdateDescriptor_seq5"}, -+{MI_GetNextSizeInProg, "MI_GetNextSizeInProg"}, -+{MI_DoStopThisDma, "MI_DoStopThisDma"}, -+{MI_DoStopThisDma_seq1, "MI_DoStopThisDma_seq1"}, -+{MI_GenNewBytesToRead, "MI_GenNewBytesToRead"}, -+{MI_WaitForEventReadTy1, "MI_WaitForEventReadTy1"}, -+{MI_WaitUpdateEvent, "MI_WaitUpdateEvent"}, -+{MI_WaitUpdateEvent_seq1, "MI_WaitUpdateEvent_seq1"}, -+{MI_DoSleepOneTickThenRunable, "MI_DoSleepOneTickThenRunable"}, -+{MI_RunEvent, "MI_RunEvent"}, -+{MI_EnqueueThread, "MI_EnqueueThread"}, -+{MI_CheckContext0, "MI_CheckContext0"}, -+{MI_EnqueueDma, "MI_EnqueueDma"}, -+{MI_CprocTrapping, "MI_CprocTrapping"}, -+{MI_CprocTrapping_seq1, "MI_CprocTrapping_seq1"}, -+{MI_WaitForRemoteRoutes1, "MI_WaitForRemoteRoutes1"}, -+{MI_SetEventCommand, "MI_SetEventCommand"}, -+{MI_DoSetEvent, "MI_DoSetEvent"}, -+{MI_DoRemoteSetEventNowOrTrapQueueingDma, "MI_DoRemoteSetEventNowOrTrapQueueingDma"}, -+{MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1, "MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1"}, -+{MI_SendRemoteDmaRoutes2, "MI_SendRemoteDmaRoutes2"}, -+{MI_WaitForRemoteRoutes2, "MI_WaitForRemoteRoutes2"}, -+{MI_WaitEventCommandTy0, "MI_WaitEventCommandTy0"}, -+{MI_DequeueNonSysCntxDma2, "MI_DequeueNonSysCntxDma2"}, -+{MI_WaitEventCommandTy1, "MI_WaitEventCommandTy1"}, -+{MI_WaitEventCommandTy1_seq1, "MI_WaitEventCommandTy1_seq1"}, -+{MI_DequeueNonSysCntxThread, "MI_DequeueNonSysCntxThread"}, -+{MI_DequeueSysCntxDma1, "MI_DequeueSysCntxDma1"}, -+{MI_DequeueSysCntxThread, "MI_DequeueSysCntxThread"}, -+{MI_TestNonSysCntxDmaQueueEmpty, "MI_TestNonSysCntxDmaQueueEmpty"}, -+{MI_TestNonSysCntxDmaQueueEmpty_seq1, "MI_TestNonSysCntxDmaQueueEmpty_seq1"}, -+{MI_TestNonSysCntxDmaQueueEmpty_seq2, "MI_TestNonSysCntxDmaQueueEmpty_seq2"}, -+{MI_RunThreadCommand, "MI_RunThreadCommand"}, -+{MI_SetEventWaitForLastAcess, "MI_SetEventWaitForLastAcess"}, -+{MI_SetEventReadWait, "MI_SetEventReadWait"}, -+{MI_SetEventReadWait_seq1, "MI_SetEventReadWait_seq1"}, -+{MI_TestEventType, "MI_TestEventType"}, -+{MI_TestEventType_seq1, "MI_TestEventType_seq1"}, -+{MI_TestEventBit2, "MI_TestEventBit2"}, -+{MI_DmaDescOrBlockCopyOrChainedEvent, "MI_DmaDescOrBlockCopyOrChainedEvent"}, -+{MI_RunThread, "MI_RunThread"}, -+{MI_RunThread1, "MI_RunThread1"}, -+{MI_RunThread1_seq1, "MI_RunThread1_seq1"}, -+{MI_IncDmaSysCntxBPtr, "MI_IncDmaSysCntxBPtr"}, -+{MI_IncDmaSysCntxBPtr_seq1, "MI_IncDmaSysCntxBPtr_seq1"}, -+{MI_IncDmaSysCntxBPtr_seq2, "MI_IncDmaSysCntxBPtr_seq2"}, -+{MI_WaitForCntxDmaDescRead, "MI_WaitForCntxDmaDescRead"}, -+{MI_FillInContext, "MI_FillInContext"}, -+{MI_FillInContext_seq1, "MI_FillInContext_seq1"}, -+{MI_WriteNewDescToQueue, "MI_WriteNewDescToQueue"}, -+{MI_WriteNewDescToQueue_seq1, "MI_WriteNewDescToQueue_seq1"}, -+{MI_TestForQueueWrap, "MI_TestForQueueWrap"}, -+{MI_TestForQueueWrap_seq1, "MI_TestForQueueWrap_seq1"}, -+{MI_TestQueueIsFull, "MI_TestQueueIsFull"}, -+{MI_TestQueueIsFull_seq1, "MI_TestQueueIsFull_seq1"}, -+{MI_TestQueueIsFull_seq2, "MI_TestQueueIsFull_seq2"}, -+{MI_CheckPsychoShitFixup, "MI_CheckPsychoShitFixup"}, -+{MI_PsychoShitFixupForcedRead, "MI_PsychoShitFixupForcedRead"}, -+{MI_PrepareDMATimeSlice, "MI_PrepareDMATimeSlice"}, -+{MI_PrepareDMATimeSlice_seq1, "MI_PrepareDMATimeSlice_seq1"}, -+{MI_TProcRestartFromTrapOrTestEventBit2, "MI_TProcRestartFromTrapOrTestEventBit2"}, -+{MI_TProcRestartFromTrapOrTestEventBit2_seq1, "MI_TProcRestartFromTrapOrTestEventBit2_seq1"}, -+{MI_WaitForGlobalsRead, "MI_WaitForGlobalsRead"}, -+{MI_WaitForNPCRead, "MI_WaitForNPCRead"}, -+{MI_EventInterrupt, "MI_EventInterrupt"}, -+{MI_EventInterrupt_seq1, "MI_EventInterrupt_seq1"}, -+{MI_EventInterrupt_seq2, "MI_EventInterrupt_seq2"}, -+{MI_EventInterrupt_seq3, "MI_EventInterrupt_seq3"}, -+{MI_TestSysCntxDmaQueueEmpty, "MI_TestSysCntxDmaQueueEmpty"}, -+{MI_TestSysCntxDmaQueueEmpty_seq1, "MI_TestSysCntxDmaQueueEmpty_seq1"}, -+{MI_TestIfRemoteDesc, "MI_TestIfRemoteDesc"}, -+{MI_DoDmaLocalSetEvent, "MI_DoDmaLocalSetEvent"}, -+{MI_DoDmaLocalSetEvent_seq1, "MI_DoDmaLocalSetEvent_seq1"}, -+{MI_DoDmaLocalSetEvent_seq2, "MI_DoDmaLocalSetEvent_seq2"}, -+{MI_DmaLoop1, "MI_DmaLoop1"}, -+{MI_ExitDmaLoop, "MI_ExitDmaLoop"}, -+{MI_ExitDmaLoop_seq1, "MI_ExitDmaLoop_seq1"}, -+{MI_RemoteDmaTestPAckType, "MI_RemoteDmaTestPAckType"}, -+{MI_PacketDiscardOrTestFailRecIfCCis0, "MI_PacketDiscardOrTestFailRecIfCCis0"}, -+{MI_PacketDiscardOrTestFailRecIfCCis0_seq1, "MI_PacketDiscardOrTestFailRecIfCCis0_seq1"}, -+{MI_TestNackFailIsZero2, "MI_TestNackFailIsZero2"}, -+{MI_TestNackFailIsZero3, "MI_TestNackFailIsZero3"}, -+{MI_DmaFailCountError, "MI_DmaFailCountError"}, -+{MI_TestDmaForSysCntx, "MI_TestDmaForSysCntx"}, -+{MI_TestDmaForSysCntx_seq1, "MI_TestDmaForSysCntx_seq1"}, -+{MI_TestDmaForSysCntx_seq2, "MI_TestDmaForSysCntx_seq2"}, -+{MI_TestAeqB2, "MI_TestAeqB2"}, -+{MI_TestAeqB2_seq1, "MI_TestAeqB2_seq1"}, -+{MI_GetNextDmaDescriptor, "MI_GetNextDmaDescriptor"}, -+{MI_DequeueSysCntxDma2, "MI_DequeueSysCntxDma2"}, -+{MI_InputSetEvent, "MI_InputSetEvent"}, -+{MI_PutBackSysCntxDma, "MI_PutBackSysCntxDma"}, -+{MI_PutBackSysCntxDma_seq1, "MI_PutBackSysCntxDma_seq1"}, -+{MI_PutBackSysCntxDma_seq2, "MI_PutBackSysCntxDma_seq2"}, -+{MI_InputRemoteDma, "MI_InputRemoteDma"}, -+{MI_InputRemoteDma_seq1, "MI_InputRemoteDma_seq1"}, -+{MI_WaitOneTickForWakeup1, "MI_WaitOneTickForWakeup1"}, -+{MI_SendRemoteDmaDesc, "MI_SendRemoteDmaDesc"}, -+{MI_InputLockQueue, "MI_InputLockQueue"}, -+{MI_CloseTheTrappedPacketIfCCis1, "MI_CloseTheTrappedPacketIfCCis1"}, -+{MI_CloseTheTrappedPacketIfCCis1_seq1, "MI_CloseTheTrappedPacketIfCCis1_seq1"}, -+{MI_PostDmaInterrupt, "MI_PostDmaInterrupt"}, -+{MI_InputUnLockQueue, "MI_InputUnLockQueue"}, -+{MI_WaitForUnLockDescRead, "MI_WaitForUnLockDescRead"}, -+{MI_SendEOPforRemoteDma, "MI_SendEOPforRemoteDma"}, -+{MI_LookAtRemoteAck, "MI_LookAtRemoteAck"}, -+{MI_InputWriteBlockQueue, "MI_InputWriteBlockQueue"}, -+{MI_WaitForSpStore, "MI_WaitForSpStore"}, -+{MI_TProcNext, "MI_TProcNext"}, -+{MI_TProcStoppedRunning, "MI_TProcStoppedRunning"}, -+{MI_InputWriteBlock, "MI_InputWriteBlock"}, -+{MI_RunDmaOrDeqNonSysCntxDma, "MI_RunDmaOrDeqNonSysCntxDma"}, -+{MI_ExecuteDmaDescriptorForRun, "MI_ExecuteDmaDescriptorForRun"}, -+{MI_ConfirmQueueLock, "MI_ConfirmQueueLock"}, -+{MI_DmaInputIdentify, "MI_DmaInputIdentify"}, -+{MI_TProcStoppedRunning2, "MI_TProcStoppedRunning2"}, -+{MI_TProcStoppedRunning2_seq1, "MI_TProcStoppedRunning2_seq1"}, -+{MI_TProcStoppedRunning2_seq2, "MI_TProcStoppedRunning2_seq2"}, -+{MI_ThreadInputIdentify, "MI_ThreadInputIdentify"}, -+{MI_InputIdWriteAddrAndType3, "MI_InputIdWriteAddrAndType3"}, -+{MI_IProcTrappedWriteStatus, "MI_IProcTrappedWriteStatus"}, -+{MI_FinishTrappingEop, "MI_FinishTrappingEop"}, -+{MI_InputTestTrans, "MI_InputTestTrans"}, -+{MI_TestAeqB3, "MI_TestAeqB3"}, -+{MI_ThreadUpdateNonSysCntxBack, "MI_ThreadUpdateNonSysCntxBack"}, -+{MI_ThreadQueueOverflow, "MI_ThreadQueueOverflow"}, -+{MI_RunContext0Thread, "MI_RunContext0Thread"}, -+{MI_RunContext0Thread_seq1, "MI_RunContext0Thread_seq1"}, -+{MI_RunContext0Thread_seq2, "MI_RunContext0Thread_seq2"}, -+{MI_RunDmaDesc, "MI_RunDmaDesc"}, -+{MI_RunDmaDesc_seq1, "MI_RunDmaDesc_seq1"}, -+{MI_RunDmaDesc_seq2, "MI_RunDmaDesc_seq2"}, -+{MI_TestAeqB, "MI_TestAeqB"}, -+{MI_WaitForNonCntxDmaDescRead, "MI_WaitForNonCntxDmaDescRead"}, -+{MI_DmaQueueOverflow, "MI_DmaQueueOverflow"}, -+{MI_BlockCopyEvent, "MI_BlockCopyEvent"}, -+{MI_BlockCopyEventReadBlock, "MI_BlockCopyEventReadBlock"}, -+{MI_BlockCopyWaitForReadData, "MI_BlockCopyWaitForReadData"}, -+{MI_InputWriteWord, "MI_InputWriteWord"}, -+{MI_TraceSetEvents, "MI_TraceSetEvents"}, -+{MI_TraceSetEvents_seq1, "MI_TraceSetEvents_seq1"}, -+{MI_TraceSetEvents_seq2, "MI_TraceSetEvents_seq2"}, -+{MI_InputWriteDoubleWd, "MI_InputWriteDoubleWd"}, -+{MI_SendLockTransIfCCis1, "MI_SendLockTransIfCCis1"}, -+{MI_WaitForDmaRoutes1, "MI_WaitForDmaRoutes1"}, -+{MI_LoadDmaContext, "MI_LoadDmaContext"}, -+{MI_InputTestAndSetWord, "MI_InputTestAndSetWord"}, -+{MI_InputTestAndSetWord_seq1, "MI_InputTestAndSetWord_seq1"}, -+{MI_GetDestEventValue, "MI_GetDestEventValue"}, -+{MI_SendDmaIdentify, "MI_SendDmaIdentify"}, -+{MI_InputAtomicAddWord, "MI_InputAtomicAddWord"}, -+{MI_LoadBFromTransD0, "MI_LoadBFromTransD0"}, -+{MI_ConditionalWriteBackCCTrue, "MI_ConditionalWriteBackCCTrue"}, -+{MI_WaitOneTickForWakeup, "MI_WaitOneTickForWakeup"}, -+{MI_SendFinalUnlockTrans, "MI_SendFinalUnlockTrans"}, -+{MI_SendDmaEOP, "MI_SendDmaEOP"}, -+{MI_GenLastAddrForPsycho, "MI_GenLastAddrForPsycho"}, -+{MI_FailedAckIfCCis0, "MI_FailedAckIfCCis0"}, -+{MI_FailedAckIfCCis0_seq1, "MI_FailedAckIfCCis0_seq1"}, -+{MI_WriteDmaSysCntxDesc, "MI_WriteDmaSysCntxDesc"}, -+{MI_TimesliceDmaQueueOverflow, "MI_TimesliceDmaQueueOverflow"}, -+{MI_DequeueNonSysCntxThread1, "MI_DequeueNonSysCntxThread1"}, -+{MI_DequeueNonSysCntxThread1_seq1, "MI_DequeueNonSysCntxThread1_seq1"}, -+{MI_TestThreadQueueEmpty, "MI_TestThreadQueueEmpty"}, -+{MI_ClearThreadQueueIfCC, "MI_ClearThreadQueueIfCC"}, -+{MI_DequeueSysCntxThread1, "MI_DequeueSysCntxThread1"}, -+{MI_DequeueSysCntxThread1_seq1, "MI_DequeueSysCntxThread1_seq1"}, -+{MI_TProcStartUpGeneric, "MI_TProcStartUpGeneric"}, -+{MI_WaitForPCload2, "MI_WaitForPCload2"}, -+{MI_WaitForNPCWrite, "MI_WaitForNPCWrite"}, -+{MI_WaitForEventWaitAddr, "MI_WaitForEventWaitAddr"}, -+{MI_WaitForWaitEventAccess, "MI_WaitForWaitEventAccess"}, -+{MI_WaitForWaitEventAccess_seq1, "MI_WaitForWaitEventAccess_seq1"}, -+{MI_WaitForWaitEventDesc, "MI_WaitForWaitEventDesc"}, -+{MI_WaitForEventReadTy0, "MI_WaitForEventReadTy0"}, -+{MI_SendCondTestFail, "MI_SendCondTestFail"}, -+{MI_InputMoveToNextTrans, "MI_InputMoveToNextTrans"}, -+{MI_ThreadUpdateSysCntxBack, "MI_ThreadUpdateSysCntxBack"}, -+{MI_FinishedSetEvent, "MI_FinishedSetEvent"}, -+{MI_EventIntUpdateBPtr, "MI_EventIntUpdateBPtr"}, -+{MI_EventQueueOverflow, "MI_EventQueueOverflow"}, -+{MI_MaskLowerSource, "MI_MaskLowerSource"}, -+{MI_DmaLoop, "MI_DmaLoop"}, -+{MI_SendNullSetEvent, "MI_SendNullSetEvent"}, -+{MI_SendFinalSetEvent, "MI_SendFinalSetEvent"}, -+{MI_TestNackFailIsZero1, "MI_TestNackFailIsZero1"}, -+{MI_DmaPacketTimedOutOrPacketError, "MI_DmaPacketTimedOutOrPacketError"}, -+{MI_NextPacketIsLast, "MI_NextPacketIsLast"}, -+{MI_TestForZeroLengthDma, "MI_TestForZeroLengthDma"}, -+{MI_WaitForPCload, "MI_WaitForPCload"}, -+{MI_ReadInIns, "MI_ReadInIns"}, -+{MI_WaitForInsRead, "MI_WaitForInsRead"}, -+{MI_WaitForLocals, "MI_WaitForLocals"}, -+{MI_WaitForOutsWrite, "MI_WaitForOutsWrite"}, -+{MI_WaitForWaitEvWrBack, "MI_WaitForWaitEvWrBack"}, -+{MI_WaitForLockRead, "MI_WaitForLockRead"}, -+{MI_TestQueueLock, "MI_TestQueueLock"}, -+{MI_InputIdWriteAddrAndType, "MI_InputIdWriteAddrAndType"}, -+{MI_InputIdWriteAddrAndType2, "MI_InputIdWriteAddrAndType2"}, -+{MI_ThreadInputIdentify2, "MI_ThreadInputIdentify2"}, -+{MI_WriteIntoTrapArea0, "MI_WriteIntoTrapArea0"}, -+{MI_GenQueueBlockWrAddr, "MI_GenQueueBlockWrAddr"}, -+{MI_InputDiscardFreeLock, "MI_InputDiscardFreeLock"}, -+{MI_WriteIntoTrapArea1, "MI_WriteIntoTrapArea1"}, -+{MI_WriteIntoTrapArea2, "MI_WriteIntoTrapArea2"}, -+{MI_ResetBPtrToBase, "MI_ResetBPtrToBase"}, -+{MI_InputDoTrap, "MI_InputDoTrap"}, -+{MI_RemoteDmaCntxt0Update, "MI_RemoteDmaCntxt0Update"}, -+{MI_ClearQueueLock, "MI_ClearQueueLock"}, -+{MI_IProcTrappedBlockWriteData, "MI_IProcTrappedBlockWriteData"}, -+{MI_FillContextFilter, "MI_FillContextFilter"}, -+{MI_IProcTrapped4, "MI_IProcTrapped4"}, -+{MI_RunSysCntxDma, "MI_RunSysCntxDma"}, -+{MI_ChainedEventError, "MI_ChainedEventError"}, -+{MI_InputTrappingEOP, "MI_InputTrappingEOP"}, -+{MI_CheckForRunIfZero, "MI_CheckForRunIfZero"}, -+{MI_TestForBreakOrSuspend, "MI_TestForBreakOrSuspend"}, -+{MI_SwapForRunable, "MI_SwapForRunable"}, -diff -urN clean/include/elan3/neterr_rpc.h linux-2.6.9/include/elan3/neterr_rpc.h ---- clean/include/elan3/neterr_rpc.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/neterr_rpc.h 2003-06-26 12:05:22.000000000 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_NETERR_RPC_H -+#define __ELAN3_NETERR_RPC_H -+ -+#ident "$Id: neterr_rpc.h,v 1.20 2003/06/26 16:05:22 fabien Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/neterr_rpc.h,v $*/ -+ -+#define NETERR_SERVICE "neterr-srv" -+#define NETERR_PROGRAM ((u_long) 170002) -+#define NETERR_VERSION ((u_long) 1) -+ -+#define NETERR_NULL_RPC 0 -+#define NETERR_FIXUP_RPC 1 -+ -+/* network error rpc timeout */ -+#define NETERR_RPC_TIMEOUT 5 -+ -+/* -+ * XDR functions for Tru64 and Linux in userspace. -+ * NB Linux kernelspace xdr routines are in network_error. -+ * and *must* be kept consistent. -+ */ -+#if defined(DIGITAL_UNIX) || !defined(__KERNEL__) -+bool_t -+xdr_capability (XDR *xdrs, void *arg) -+{ -+ ELAN_CAPABILITY *cap = (ELAN_CAPABILITY *) arg; -+ -+ return (xdr_opaque (xdrs, (caddr_t) &cap->cap_userkey, sizeof (cap->cap_userkey)) && -+ xdr_int (xdrs, &cap->cap_version) && -+ xdr_u_short (xdrs, &cap->cap_type) && -+ xdr_int (xdrs, &cap->cap_lowcontext) && -+ xdr_int (xdrs, &cap->cap_highcontext) && -+ xdr_int (xdrs, &cap->cap_mycontext) && -+ xdr_int (xdrs, &cap->cap_lownode) && -+ xdr_int (xdrs, &cap->cap_highnode) && -+ xdr_u_int (xdrs, &cap->cap_railmask) && -+ xdr_opaque (xdrs, (caddr_t) &cap->cap_bitmap[0], sizeof (cap->cap_bitmap))); -+} -+ -+bool_t -+xdr_neterr_msg (XDR *xdrs, void *req) -+{ -+ NETERR_MSG *msg = (NETERR_MSG *) req; -+ -+ return (xdr_u_int (xdrs, &msg->Rail) && -+ xdr_capability (xdrs, &msg->SrcCapability) && -+ xdr_capability (xdrs, &msg->DstCapability) && -+ xdr_u_int (xdrs, &msg->DstProcess) && -+ xdr_u_int (xdrs, &msg->CookieAddr) && -+ xdr_u_int (xdrs, &msg->CookieVProc) && -+ xdr_u_int (xdrs, &msg->NextCookie) && -+ xdr_u_int (xdrs, &msg->WaitForEop)); -+} -+#endif /* INCLUDE_XDR_INLINE */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN3_NETERR_RPC_H */ -diff -urN clean/include/elan3/perm.h linux-2.6.9/include/elan3/perm.h ---- clean/include/elan3/perm.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/perm.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_PERM_H -+#define __ELAN3_PERM_H -+ -+#ident "$Id: perm.h,v 1.7 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/perm.h,v $*/ -+ -+#define ELAN3_PERM_NULL 0x00 -+#define ELAN3_PERM_LOCAL_READ 0x04 -+#define ELAN3_PERM_READ 0x08 -+#define ELAN3_PERM_NOREMOTE 0x0c -+#define ELAN3_PERM_REMOTEREAD 0x10 -+#define ELAN3_PERM_REMOTEWRITE 0x14 -+#define ELAN3_PERM_REMOTEEVENT 0x18 -+#define ELAN3_PERM_REMOTEALL 0x1c -+ -+#endif /* __ELAN3_PERM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/pte.h linux-2.6.9/include/elan3/pte.h ---- clean/include/elan3/pte.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/pte.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,139 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_PTE_H -+#define __ELAN3_PTE_H -+ -+#ident "$Id: pte.h,v 1.26 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/pte.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" -+{ -+#endif -+ -+#include -+#include -+ -+typedef E3_uint64 ELAN3_PTE; -+typedef E3_uint32 ELAN3_PTP; -+ -+#define ELAN3_PTE_SIZE (8) -+#define ELAN3_PTP_SIZE (4) -+ -+#define ELAN3_PTE_REF ((E3_uint64) 1 << 63) /* 63 - referenced bit */ -+#define ELAN3_PTE_MOD ((E3_uint64) 1 << 55) /* 55 - modified bit */ -+#define ELAN3_RM_MASK (ELAN3_PTE_REF | ELAN3_PTE_MOD) -+ -+#define ELAN3_PTE_PFN_MASK 0x0000fffffffff000ull /* [12:48] - Physical address */ -+ -+#define ELAN3_PTE_BIG_ENDIAN 0x80 /* 7 - big endian */ -+#define ELAN3_PTE_64_BIT 0x40 /* 6 - 64 bit pci address */ -+#define ELAN3_PTE_LOCAL 0x20 /* 5 - local sdram */ -+ -+#define ELAN3_PTE_PERM_MASK 0x1c /* [2:4] - Permissions */ -+#define ELAN3_PTE_PERM_SHIFT 2 -+ -+#define ELAN3_ET_MASK 0x3 -+#define ELAN3_ET_INVALID 0x0 /* [0:1] */ -+#define ELAN3_ET_PTP 0x1 -+#define ELAN3_ET_PTE 0x2 -+ -+#define ELAN3_INVALID_PTP ((ELAN3_PTP) 0) -+#define ELAN3_INVALID_PTE ((ELAN3_PTE) 0) -+ -+#define ELAN3_PTP_TYPE(ptp) ((ptp) & ELAN3_ET_MASK) -+#define ELAN3_PTE_TYPE(pte) ((pte) & ELAN3_ET_MASK) -+#define ELAN3_PTE_PERM(pte) ((pte) & ELAN3_PTE_PERM_MASK) -+#define ELAN3_PTE_VALID(pte) (((pte) & ELAN3_ET_MASK) == ELAN3_ET_PTE) -+#define ELAN3_PTE_ISREF(pte) ((pte) & ELAN3_PTE_REF) -+#define ELAN3_PTE_ISMOD(pte) ((pte) & ELAN3_PTE_MOD) -+#define ELAN3_PTE_WRITEABLE(pte) (ELAN3_PERM_WRITEABLE(ELAN3_PTE_PERM(pte))) -+ -+#define ELAN3_PERM_WRITEABLE(perm) ((perm) == ELAN3_PERM_NOREMOTE || (perm) > ELAN3_PERM_REMOTEREAD) -+#define ELAN3_PERM_REMOTE(perm) ((perm) > ELAN3_PERM_NOREMOTE) -+ -+#define ELAN3_PERM_READONLY(perm) ((perm) == ELAN3_PERM_NOREMOTE ? ELAN3_PERM_LOCAL_READ : \ -+ (perm) > ELAN3_PERM_REMOTEREAD ? ELAN3_PERM_READ : (perm)) -+#if PAGE_SHIFT == 12 -+# define ELAN3_PAGE_SHIFT 12 -+#else -+# define ELAN3_PAGE_SHIFT 13 -+#endif -+ -+#define ELAN3_PAGE_SIZE (1 << ELAN3_PAGE_SHIFT) -+#define ELAN3_PAGE_OFFSET (ELAN3_PAGE_SIZE-1) -+#define ELAN3_PAGE_MASK (~ELAN3_PAGE_OFFSET) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define ELAN3_L3_SHIFT 5 -+#else -+# define ELAN3_L3_SHIFT 6 -+#endif -+#define ELAN3_L2_SHIFT 6 -+#define ELAN3_L1_SHIFT 8 -+ -+/* Number of entries in a given level ptbl */ -+#define ELAN3_L3_ENTRIES (1 << ELAN3_L3_SHIFT) -+#define ELAN3_L2_ENTRIES (1 << ELAN3_L2_SHIFT) -+#define ELAN3_L1_ENTRIES (1 << ELAN3_L1_SHIFT) -+ -+/* Virtual address spanned by each entry */ -+#define ELAN3_L3_SIZE (1 << (ELAN3_PAGE_SHIFT)) -+#define ELAN3_L2_SIZE (1 << (ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L1_SIZE (1 << (ELAN3_L3_SHIFT+ELAN3_L2_SHIFT+ELAN3_PAGE_SHIFT)) -+ -+/* Virtual address size of page table */ -+#define ELAN3_L1_PTSIZE (ELAN3_L1_ENTRIES * ELAN3_L1_SIZE) -+#define ELAN3_L3_PTSIZE (ELAN3_L3_ENTRIES * ELAN3_L3_SIZE) -+#define ELAN3_L2_PTSIZE (ELAN3_L2_ENTRIES * ELAN3_L2_SIZE) -+ -+/* Mask for offset into page table */ -+#define ELAN3_L1_PTOFFSET ((ELAN3_L1_SIZE*ELAN3_L1_ENTRIES)-1) -+#define ELAN3_L3_PTOFFSET ((ELAN3_L3_SIZE*ELAN3_L3_ENTRIES)-1) -+#define ELAN3_L2_PTOFFSET ((ELAN3_L2_SIZE*ELAN3_L2_ENTRIES)-1) -+ -+#define ELAN3_L1_INDEX(addr) (((E3_Addr) (addr) & 0xFF000000) >> (ELAN3_L2_SHIFT+ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L2_INDEX(addr) (((E3_Addr) (addr) & 0x00FD0000) >> (ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L3_INDEX(addr) (((E3_Addr) (addr) & 0x0003F000) >> ELAN3_PAGE_SHIFT) -+ -+#define ELAN3_L1_BASE(addr) (((E3_Addr)(addr)) & 0x00000000) -+#define ELAN3_L2_BASE(addr) (((E3_Addr)(addr)) & 0xFF000000) -+#define ELAN3_L3_BASE(addr) (((E3_Addr)(addr)) & 0xFFFC0000) -+ -+/* Convert a page table pointer entry to the PT */ -+#define PTP_TO_PT_PADDR(ptp) ((E3_Addr)(ptp & 0xFFFFFFFC)) -+ -+#ifdef __KERNEL__ -+/* -+ * incompatible access for permission macro. -+ */ -+extern u_char elan3mmu_permissionTable[8]; -+#define ELAN3_INCOMPAT_ACCESS(perm,access) (! (elan3mmu_permissionTable[(perm)>>ELAN3_PTE_PERM_SHIFT] & (1 << (access)))) -+ -+#define elan3_readptp(dev, ptp) (elan3_sdram_readl (dev, ptp)) -+#define elan3_writeptp(dev, ptp, value) (elan3_sdram_writel (dev, ptp, value)) -+#define elan3_readpte(dev, pte) (elan3_sdram_readq (dev, pte)) -+#define elan3_writepte(dev,pte, value) (elan3_sdram_writeq (dev, pte, value)) -+ -+#define elan3_invalidatepte(dev, pte) (elan3_sdram_writel (dev, pte, 0)) -+#define elan3_modifypte(dev,pte,new) (elan3_sdram_writel (dev, pte, (int) (new))) -+#define elan3_clrref(dev,pte) (elan3_sdram_writeb (dev, pte + 7) -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_PTE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/spinlock.h linux-2.6.9/include/elan3/spinlock.h ---- clean/include/elan3/spinlock.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/spinlock.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_SPINLOCK_ -+#define _ELAN3_SPINLOCK_ -+ -+#ident "$Id: spinlock.h,v 1.31 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/spinlock.h,v $*/ -+ -+/* -+ * This spinlock is designed for main/elan processor interactions. -+ * The lock is split over Elan/Main memory in such a way that -+ * we don't end up busy-polling over the PCI. -+ * In the Elan memory we have two words; one is a sequence number -+ * and the other is a lock word for main. -+ * In main memory we have a copy of the sequence number which main polls when it is -+ * waiting for the Elan to drop the lock. Main polls this word until it becomes -+ * equal to the sequence number it sampled. -+ * The Elan drops the lock by writing the current sequence number to main memory. -+ * It is coded to always give priority to the Elan thread, and so when both go for the -+ * lock, main will back off first. -+ * -+ * 18/3/98 -+ * This has been extended to avoid a starvation case where both the main and thread claim the -+ * lock and so both backoff (thread does a break). So now, main attempts to claim the -+ * lock by writing 'mainLock' then samples the 'sl_seq' and if it has the lock -+ * it sets 'mainGotLock'. The thread will now see the 'sl_mainLock' set, but will only -+ * backoff with a c_break_busywait() if 'mainGotLock' is set too. -+ */ -+typedef struct elan3_spinlock_elan { -+ union { -+ volatile E3_uint64 mainLocks; /* main writes this dble word */ -+ struct { -+ volatile E3_uint32 mainLock; /* main wants a lock */ -+ volatile E3_uint32 mainGotLock; /* main has the lock */ -+ } s; -+ } sl_u; -+ volatile E3_uint32 sl_seq; /* thread owns this word */ -+ volatile E3_uint32 sl_mainWait; /* performance counter */ -+ volatile E3_uint32 sl_elanWait; /* performance counter */ -+ volatile E3_uint32 sl_elanBusyWait; /* performance counter */ -+ /* NOTE: The lock/seq words must be within the same 32-byte Elan cache-line */ -+ E3_uint64 sl_pad[5]; /* pad to 64-bytes */ -+} ELAN3_SPINLOCK_ELAN; -+ -+#define sl_mainLocks sl_u.mainLocks -+#define sl_mainLock sl_u.s.mainLock -+#define sl_mainGotLock sl_u.s.mainGotLock -+ -+#define SL_MAIN_RECESSIVE 1 -+#define SL_MAIN_DOMINANT 2 -+ -+/* Declare this as a main memory cache block for efficiency */ -+typedef union elan3_spinlock_main { -+ volatile E3_uint32 sl_seq; /* copy of seq number updated by Elan */ -+ volatile E3_uint32 sl_Int32[E3_BLK_SIZE/sizeof (E3_uint32)]; -+} ELAN3_SPINLOCK_MAIN; -+ -+/* Main/Main or Elan/Elan lock word */ -+typedef volatile int ELAN3_SPINLOCK; -+ -+#ifdef __ELAN3__ -+ -+/* Main/Elan interlock */ -+ -+#define ELAN3_ME_SPINENTER(SLE,SL) do {\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ (SLE)->sl_seq++; \ -+ if ((SLE)->sl_mainLock) \ -+ elan3_me_spinblock(SLE, SL);\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ } while (0) -+#define ELAN3_ME_SPINEXIT(SLE,SL) do {\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ (SL)->sl_seq = (SLE)->sl_seq;\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ } while (0) -+ -+ -+/* Elan/Elan interlock */ -+#define ELAN3_SPINENTER(L) do {\ -+ asm volatile ("! store barrier");\ -+ if (c_swap ((L), 1)) elan3_spinenter(L);\ -+ asm volatile ("! store barrier");\ -+ } while (0) -+#define ELAN3_SPINEXIT(L) do {\ -+ asm volatile ("! store barrier");\ -+ c_swap((L), 0);\ -+ asm volatile ("! store barrier");\ -+ } while (0) -+ -+extern void elan3_me_spinblock (ELAN3_SPINLOCK_ELAN *sle, ELAN3_SPINLOCK_MAIN *sl); -+extern void elan3_spinenter (ELAN3_SPINLOCK *l); -+ -+#else -+ -+/* Main/Elan interlock */ -+#ifdef DEBUG -+#define ELAN3_ME_SPINENTER(SDRAM,SLE,SL) do {\ -+ register E3_int32 maxLoops = 0x7fffffff; \ -+ register E3_uint32 seq;\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) {\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), 0); \ -+ while ((SL)->sl_seq == (seq-1) && maxLoops--) ; \ -+ if (maxLoops < 0) { \ -+ printf("Failed to get ME lock %lx/%lx seq %d sle_seq %d sl_seq %d\n", \ -+ SL, SLE, seq, \ -+ elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)), \ -+ (SL)->sl_seq); \ -+ } \ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ }\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } while (0) -+#else -+#define ELAN3_ME_SPINENTER(SDRAM,SLE,SL) do {\ -+ register E3_uint32 seq;\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) {\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), 0); \ -+ while ((SL)->sl_seq == (seq-1)) ; \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ }\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } while (0) -+#endif -+#define ELAN3_ME_FORCEENTER(SDRAM,SLE,SL) do { \ -+ register E3_uint32 seq; \ -+ MEMBAR_STORELOAD(); \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_DOMINANT); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) \ -+ { \ -+ /* NOTE: we MUST call elan3_usecspin here for kernel comms */\ -+ while ((SL)->sl_seq == (seq)-1) \ -+ elan3_usecspin (1); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ } \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD(); \ -+} while (0) -+ -+#define ELAN3_ME_TRYENTER(SDRAM,SLE,SL,SEQ) do { \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ SEQ = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+} while (0) -+ -+#define ELAN3_ME_CHECKENTER(SDRAM,SLE,SL,SEQ) do { \ -+ if ((SEQ) == ((SL)->sl_seq)) { \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } \ -+ else ELAN3_ME_SPINENTER(SLE,SL); \ -+} while (0) -+ -+#define ELAN3_ME_SPINEXIT(SDRAM,SLE,SL) do {\ -+ MEMBAR_STORESTORE(); \ -+ elan3_write64_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLocks), 0); \ -+ MEMBAR_STORESTORE(); \ -+ } while (0) -+ -+ -+/* Main/Main */ -+#define ELAN3_SPINENTER(L) do {\ -+ while (c_swap ((L), 1)) ; \ -+ } while (0) -+#define ELAN3_SPINEXIT(L) do {\ -+ c_swap((L), 0);\ -+ } while (0) -+#endif /* _ELAN3_ */ -+ -+#endif /* _ELAN3_SPINLOCK_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/thread.h linux-2.6.9/include/elan3/thread.h ---- clean/include/elan3/thread.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/thread.h 2002-08-09 07:23:34.000000000 -0400 -@@ -0,0 +1,137 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_THREAD_H -+#define _ELAN3_THREAD_H -+ -+#ident "$Id: thread.h,v 1.17 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/thread.h,v $*/ -+ -+/* Alignment for a stack frame */ -+#define E3_STACK_ALIGN (64) -+ -+typedef struct _E3_Frame { -+ E3_uint32 fr_local[8]; /* saved locals (not used) */ -+ E3_uint32 fr_arg[6]; /* saved arguements o0 -> o5 */ -+ E3_Addr fr_savefp; /* saved frame pointer o6 */ -+ E3_Addr fr_savepc; /* saved program counter o7 */ -+ E3_Addr fr_stret; /* stuct return addr */ -+ E3_uint32 fr_argd[6]; /* arg dump area */ -+ E3_uint32 fr_argx[1]; /* array of args past the sixth */ -+} E3_Frame; -+ -+typedef struct _E3_Stack { -+ E3_uint32 Locals[8]; -+ E3_uint32 Ins[8]; -+ E3_uint32 Globals[8]; -+ E3_uint32 Outs[8]; -+} E3_Stack; -+ -+typedef struct _E3_OutsRegs { -+ E3_uint32 o[8]; /* o6 == pc, o7 == fptr */ -+} E3_OutsRegs; -+ -+/* -+ * "Magic" value for stack pointer to be ignored. -+ */ -+#define VanishingStackPointer 0x42 -+ -+ -+/* -+ * When the Elan traps the N & Z CC bits are held in the NPC -+ * and the V & C bits are in the PC -+ */ -+#define PSR_C_BIT (1) -+#define PSR_V_BIT (2) -+#define PSR_Z_BIT (1) -+#define PSR_N_BIT (2) -+#define CC_MASK (3) -+#define PC_MASK (~3) -+#define SP_MASK (~3) -+ -+/* -+ * Threads processor Opcodes. -+ */ -+#define OPCODE_MASK (0xC1F80000) -+#define OPCODE_IMM (1 << 13) -+ -+#define OPCODE_CLASS(instr) ((instr) & 0xC0000000) -+#define OPCODE_CLASS_0 0x00000000 -+#define OPCODE_CLASS_1 0x40000000 -+#define OPCODE_CLASS_2 0x80000000 -+#define OPCODE_CLASS_3 0xC0000000 -+ -+#define OPCODE_CPOP 0x81B00000 -+#define OPCODE_Ticc 0x81D00000 -+ -+#define OPCODE_FCODE_SHIFT 19 -+#define OPCODE_FCODE_MASK 0x1f -+#define OPCODE_NOT_ALUOP 0x01000000 -+ -+#define OPCODE_SLL 0x81280000 -+#define OPCODE_SRL 0x81300000 -+#define OPCODE_SRA 0x81380000 -+ -+#define OPCODE_OPEN 0x81600000 -+#define OPCODE_CLOSE 0x81680000 -+#define OPCODE_BREAKTEST 0x81700000 -+ -+#define OPCODE_BREAK 0x81a00000 -+#define OPCODE_SUSPEND 0x81a80000 -+#define OPCODE_WAIT 0x81b00000 -+ -+#define OPCODE_JMPL 0x81c00000 -+ -+#define OPCODE_LD 0xC0000000 -+#define OPCODE_LDD 0xC0180000 -+ -+#define OPCODE_LDBLOCK16 0xC0900000 -+#define OPCODE_LDBLOCK32 0xC0800000 -+#define OPCODE_LDBLOCK64 0xC0980000 -+ -+#define OPCODE_ST 0xC0200000 -+#define OPCODE_STD 0xC0380000 -+ -+#define OPCODE_SWAP 0xC0780000 -+ -+#define OPCODE_STBLOCK16 0xC0b00000 -+#define OPCODE_STBLOCK32 0xC0a00000 -+#define OPCODE_STBLOCK64 0xC0b80000 -+ -+#define OPCODE_CLASS0_MASK 0xC1C00000 -+#define OPCODE_SETHI 0x01000000 -+#define OPCODE_BICC 0x00800000 -+#define OPCODE_SENDREG 0x01800000 -+#define OPCODE_SENDMEM 0x01c00000 -+ -+#define OPCODE_BICC_BN 0x00000000 -+#define OPCODE_BICC_BE 0x02000000 -+#define OPCODE_BICC_BLE 0x04000000 -+#define OPCODE_BICC_BL 0x06000000 -+#define OPCODE_BICC_BLEU 0x08000000 -+#define OPCODE_BICC_BCS 0x0A000000 -+#define OPCODE_BICC_BNEG 0x0C000000 -+#define OPCODE_BICC_BVS 0x0E000000 -+ -+#define OPCODE_BICC_MASK 0x0E000000 -+#define OPCODE_BICC_ANNUL 0x20000000 -+ -+#define INSTR_RS2(instr) (((instr) >> 0) & 0x1F) -+#define INSTR_RS1(instr) (((instr) >> 14) & 0x1F) -+#define INSTR_RD(instr) (((instr) >> 25) & 0x1F) -+#define INSTR_IMM(instr) (((instr) & 0x1000) ? ((instr) & 0xFFF) | 0xFFFFF000 : (instr) & 0xFFF) -+ -+#define Ticc_COND(instr) INSTR_RD(instr) -+#define Ticc_TA 8 -+ -+#endif /* _ELAN3_THREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/threadlinkage.h linux-2.6.9/include/elan3/threadlinkage.h ---- clean/include/elan3/threadlinkage.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/threadlinkage.h 2002-08-09 07:23:34.000000000 -0400 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_THREADLINKAGE_H -+#define __ELAN3_THREADLINKAGE_H -+ -+#ident "$Id: threadlinkage.h,v 1.6 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/threadlinkage.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if defined(_ASM) || defined(__LANGUAGE_ASSEMBLY__) -+ -+/* -+ * Macro to define weak symbol aliases. These are similar to the ANSI-C -+ * #pragma weak name = _name -+ * except a compiler can determine type. The assembler must be told. Hence, -+ * the second parameter must be the type of the symbol (i.e.: function,...) -+ */ -+#define ANSI_PRAGMA_WEAK(sym, stype) \ -+ .weak sym; \ -+ .type sym, #stype; \ -+/* CSTYLED */ \ -+sym = _/**/sym -+ -+/* -+ * ENTRY provides the standard procedure entry code -+ */ -+#define ENTRY(x) \ -+ .section ".text"; \ -+ .align 4; \ -+ .global x; \ -+x: -+ -+/* -+ * ENTRY2 is identical to ENTRY but provides two labels for the entry point. -+ */ -+#define ENTRY2(x, y) \ -+ .section ".text"; \ -+ .align 4; \ -+ .global x, y; \ -+/* CSTYLED */ \ -+x: ; \ -+y: -+ -+ -+/* -+ * ALTENTRY provides for additional entry points. -+ */ -+#define ALTENTRY(x) \ -+ .global x; \ -+x: -+ -+/* -+ * DGDEF and DGDEF2 provide global data declarations. -+ * -+ * DGDEF provides a word aligned word of storage. -+ * -+ * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This -+ * implies this macro is best used for byte arrays. -+ * -+ * DGDEF3 allocates "sz" bytes of storage with "algn" alignment. -+ */ -+#define DGDEF2(name, sz) \ -+ .section ".data"; \ -+ .global name; \ -+ .size name, sz; \ -+name: -+ -+#define DGDEF3(name, sz, algn) \ -+ .section ".data"; \ -+ .align algn; \ -+ .global name; \ -+ .size name, sz; \ -+name: -+ -+#define DGDEF(name) DGDEF3(name, 4, 4) -+ -+/* -+ * SET_SIZE trails a function and set the size for the ELF symbol table. -+ */ -+#define SET_SIZE(x) \ -+ .size x, (.-x) -+ -+#endif /* _ASM || __LANGUAGE_ASSEMBLY__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_THREADLINKAGE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/threadsyscall.h linux-2.6.9/include/elan3/threadsyscall.h ---- clean/include/elan3/threadsyscall.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/threadsyscall.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_SYSCALL_H -+#define __ELAN3_SYSCALL_H -+ -+#ident "$Id: threadsyscall.h,v 1.12 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/threadsyscall.h,v $*/ -+ -+/* -+ * This file contains the system calls supported from the Elan. -+ */ -+#define ELAN3_DEBUG_TRAPNUM 5 /* thread debugging trap */ -+#define ELAN3_ABORT_TRAPNUM 6 /* bad abort trap */ -+#define ELAN3_ELANCALL_TRAPNUM 7 /* elansyscall trap */ -+#define ELAN3_SYSCALL_TRAPNUM 8 /* new syscall trap */ -+ -+#define ELAN3_T_SYSCALL_CODE 0 /* offsets in struct elan3_t_syscall */ -+#define ELAN3_T_SYSCALL_ERRNO 4 -+ -+#define ELAN3_SYS_open 1 -+#define ELAN3_SYS_close 2 -+#define ELAN3_SYS_write 3 -+#define ELAN3_SYS_read 4 -+#define ELAN3_SYS_poll 5 -+#define ELAN3_SYS_ioctl 6 -+#define ELAN3_SYS_lseek 7 -+#define ELAN3_SYS_mmap 8 -+#define ELAN3_SYS_munmap 9 -+#define ELAN3_SYS_kill 10 -+#define ELAN3_SYS_getpid 11 -+ -+#if !defined(SYS_getpid) && defined(__NR_getxpid) -+#define SYS_getpid __NR_getxpid /* for linux */ -+#endif -+ -+#if !defined(_ASM) && !defined(__LANGUAGE_ASSEMBLY__) -+ -+extern int elan3_t_open (const char *, int, ...); -+extern ssize_t elan3_t_write (int, const void *, unsigned); -+extern ssize_t elan3_t_read(int, void *, unsigned); -+extern int elan3_t_ioctl(int, int, ...); -+extern int elan3_t_close(int); -+extern off_t elan3_t_lseek(int filedes, off_t offset, int whence); -+ -+extern caddr_t elan3_t_mmap(caddr_t, size_t, int, int, int, off_t); -+extern int elan3_t_munmap(caddr_t, size_t); -+ -+extern int elan3_t_getpid(void); -+extern void elan3_t_abort(char *str); -+ -+#endif /* !_ASM && ! __LANGUAGE_ASSEMBLY__ */ -+ -+#endif /* __ELAN3_SYSCALL_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/trtype.h linux-2.6.9/include/elan3/trtype.h ---- clean/include/elan3/trtype.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/trtype.h 2002-08-09 07:23:34.000000000 -0400 -@@ -0,0 +1,116 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_TRTYPE_H -+#define _ELAN3_TRTYPE_H -+ -+#ident "$Id: trtype.h,v 1.13 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/trtype.h,v $ */ -+ -+/*<15> ackNow */ -+#define TR_SENDACK (1 << 15) -+ -+#define TR_SIZE_SHIFT 12 -+#define TR_SIZE_MASK 7 -+ -+/*<14:12> Size 0, 1, 2, 4, 8, 16, 32, 64 Double Words -+ Bit 14 is forced to zero currently so that only size 0, 1, 2, 4 are -+ allowed */ -+ -+#define TR_SIZE0 (0 << TR_SIZE_SHIFT) -+#define TR_SIZE1 (1 << TR_SIZE_SHIFT) -+#define TR_SIZE2 (2 << TR_SIZE_SHIFT) -+#define TR_SIZE4 (3 << TR_SIZE_SHIFT) -+#define TR_SIZE8 (4 << TR_SIZE_SHIFT) -+ -+#define TR_64_BIT_ADDR (1 << 11) -+#define TR_LAST_TRANS (1 << 10) -+ -+#define TR_WRITEBLOCK_BIT (1 << 9) -+#define TR_WRITEBLOCK (TR_WRITEBLOCK_BIT | TR_SIZE8) -+ -+ -+#define TR_WRITEBLOCK_SIZE 64 -+ -+/* -+ * write-block -+ */ -+/* WriteBlock <8:7> Data type -+ <6:0> Part write size */ -+#define TR_TYPE_SHIFT 7 -+#define TR_TYPE_MASK ((1 << 2) - 1) -+ -+#define TR_TYPE_BYTE 0 -+#define TR_TYPE_SHORT 1 -+#define TR_TYPE_WORD 2 -+#define TR_TYPE_DWORD 3 -+ -+#define TR_PARTSIZE_MASK ((1 << 7) -1) -+ -+#define TR_WAIT_FOR_EOP (1 << 8) -+ -+/* -+ * trace-route format -+ */ -+#define TR_TRACEROUTE0_CHANID(val) ((val) & 1) /* 0 Chan Id */ -+#define TR_TRACEROUTE0_LINKID(val) (((val) >> 1) & 7) /* 1:3 Link Id */ -+#define TR_TRACEROUTE0_REVID(val) (((val) >> 4) & 7) /* 4:6 Revision ID */ -+#define TR_TRACEROUTE0_BCAST_TOP_PIN(val) (((val) >> 7) & 1) /* 7 Broadcast Top Pin (REV B) */ -+#define TR_TRACEROUTE0_LNR(val) ((val) >> 8) /* 8:15 Global Link Not Ready */ -+ -+#define TR_TRACEROUTE1_PRIO(val) ((val & 0xF)) /* 0:3 Arrival Priority (REV A) */ -+#define TR_TRACEROUTE1_AGE(val) (((val) >> 4) & 0xF) /* 4:7 Priority Held(Age) (REV A) */ -+#define TR_TRACEROUTE1_ROUTE_SELECTED(val) ((val) & 0xFF) /* 0:7 Arrival age (REV B) */ -+#define TR_TRACEROUTE1_BCAST_TOP(val) (((val) >> 8) & 7) /* 8:10 Broadcast Top */ -+#define TR_TRACEROUTE1_ADAPT(val) (((val) >> 12) & 3) /* 12:13 This Adaptive Value (REV A) */ -+#define TR_TRACEROUTE1_BCAST_BOT(val) (((val) >> 12) & 7) /* 12:14 Broadcast Bottom (REV B) */ -+ -+#define TR_TRACEROUTE2_ARRIVAL_AGE(val) ((val) & 0xF) /* 0:3 Arrival Age (REV B) */ -+#define TR_TRACEROUTE2_CURR_AGE(val) (((val) >> 4) & 0xF) /* 4:7 Current Age (REV B) */ -+#define TR_TRACEROUTE2_BUSY(val) (((val) >> 8) & 0xFF) /* 8:15 Busy (REV B) */ -+ -+#define TR_TRACEROUTE_SIZE 32 -+#define TR_TRACEROUTE_ENTRIES (TR_TRACEROUTE_SIZE/2) -+ -+/* -+ * non-write block -+ */ -+#define TR_OPCODE_MASK (((1 << 8) - 1) | \ -+ (TR_SIZE_MASK << TR_SIZE_SHIFT) | \ -+ TR_WRITEBLOCK_BIT) -+ -+#define TR_NOP_TRANS (0x0 | TR_SIZE0) -+#define TR_SETEVENT (0x0 | TR_SIZE0 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_REMOTEDMA (0x1 | TR_SIZE4 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_LOCKQUEUE (0x2 | TR_SIZE0) -+#define TR_UNLOCKQUEUE (0x3 | TR_SIZE0 | TR_SENDACK | TR_LAST_TRANS) -+ -+#define TR_SENDDISCARD (0x4 | TR_SIZE0) -+#define TR_TRACEROUTE (0x5 | TR_SIZE4) -+ -+#define TR_DMAIDENTIFY (0x6 | TR_SIZE0) -+#define TR_THREADIDENTIFY (0x7 | TR_SIZE1) -+ -+#define TR_GTE (0x8 | TR_SIZE1) -+#define TR_LT (0x9 | TR_SIZE1) -+#define TR_EQ (0xA | TR_SIZE1) -+#define TR_NEQ (0xB | TR_SIZE1) -+ -+#define TR_WRITEWORD (0xC | TR_SIZE1) -+#define TR_WRITEDOUBLEWORD (0xD | TR_SIZE1) -+#define TR_TESTANDWRITE (0xE | TR_SIZE1) -+#define TR_ATOMICADDWORD (0xF | TR_SIZE1 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_OPCODE_TYPE_MASK 0xff -+ -+ -+#endif /* notdef _ELAN3_TRTYPE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/urom_addrs.h linux-2.6.9/include/elan3/urom_addrs.h ---- clean/include/elan3/urom_addrs.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/urom_addrs.h 2002-07-12 10:28:21.000000000 -0400 -@@ -0,0 +1,262 @@ -+#define MI_WaitForRemoteDescRead 0x0 -+#define MI_WaitForRemoteDescRead2 0x1 -+#define MI_WaitForRemoteDescRead2_seq1 0x2 -+#define MI_SendRemoteDmaRoutes 0x3 -+#define MI_IProcTrapped 0x4 -+#define MI_DProcTrapped 0x5 -+#define MI_CProcTrapped 0x6 -+#define MI_TProcTrapped 0x7 -+#define MI_TestWhichDmaQueue 0x8 -+#define MI_TestWhichDmaQueue_seq1 0x9 -+#define MI_InputRemoteDmaUpdateBPtr 0xa -+#define MI_FixupQueueContextAndRemoteBit 0xb -+#define MI_FixupQueueContextAndRemoteBit_seq1 0xc -+#define MI_FixupQueueContextAndRemoteBit_seq2 0xd -+#define MI_FixupQueueContextAndRemoteBit_seq3 0xe -+#define MI_FixupQueueContextAndRemoteBit_seq4 0xf -+#define MI_RunDmaCommand 0x10 -+#define MI_DoSendRemoteDmaDesc 0x11 -+#define MI_DequeueNonSysCntxDma 0x12 -+#define MI_WaitForRemoteDescRead1 0x13 -+#define MI_RemoteDmaCommand 0x14 -+#define MI_WaitForRemoteRoutes 0x15 -+#define MI_DequeueSysCntxDma 0x16 -+#define MI_ExecuteDmaDescriptorForQueue 0x17 -+#define MI_ExecuteDmaDescriptor1 0x18 -+#define MI_ExecuteDmaDescriptor1_seq1 0x19 -+#define MI_ExecuteDmaDescriptor1_seq2 0x1a -+#define MI_ExecuteDmaDescriptor1_seq3 0x1b -+#define MI_GetNewSizeInProg 0x1c -+#define MI_GetNewSizeInProg_seq1 0x1d -+#define MI_FirstBlockRead 0x1e -+#define MI_ExtraFirstBlockRead 0x1f -+#define MI_UnimplementedError 0x20 -+#define MI_UpdateDescriptor 0x21 -+#define MI_UpdateDescriptor_seq1 0x22 -+#define MI_UpdateDescriptor_seq2 0x23 -+#define MI_UpdateDescriptor_seq3 0x24 -+#define MI_UpdateDescriptor_seq4 0x25 -+#define MI_UpdateDescriptor_seq5 0x26 -+#define MI_GetNextSizeInProg 0x27 -+#define MI_DoStopThisDma 0x28 -+#define MI_DoStopThisDma_seq1 0x29 -+#define MI_GenNewBytesToRead 0x2a -+#define MI_WaitForEventReadTy1 0x2b -+#define MI_WaitUpdateEvent 0x2c -+#define MI_WaitUpdateEvent_seq1 0x2d -+#define MI_DoSleepOneTickThenRunable 0x2e -+#define MI_RunEvent 0x2f -+#define MI_EnqueueThread 0x30 -+#define MI_CheckContext0 0x31 -+#define MI_EnqueueDma 0x32 -+#define MI_CprocTrapping 0x33 -+#define MI_CprocTrapping_seq1 0x34 -+#define MI_WaitForRemoteRoutes1 0x35 -+#define MI_SetEventCommand 0x36 -+#define MI_DoSetEvent 0x37 -+#define MI_DoRemoteSetEventNowOrTrapQueueingDma 0x38 -+#define MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1 0x39 -+#define MI_SendRemoteDmaRoutes2 0x3a -+#define MI_WaitForRemoteRoutes2 0x3b -+#define MI_WaitEventCommandTy0 0x3c -+#define MI_DequeueNonSysCntxDma2 0x3d -+#define MI_WaitEventCommandTy1 0x3e -+#define MI_WaitEventCommandTy1_seq1 0x3f -+#define MI_DequeueNonSysCntxThread 0x40 -+#define MI_DequeueSysCntxDma1 0x41 -+#define MI_DequeueSysCntxThread 0x42 -+#define MI_TestNonSysCntxDmaQueueEmpty 0x43 -+#define MI_TestNonSysCntxDmaQueueEmpty_seq1 0x44 -+#define MI_TestNonSysCntxDmaQueueEmpty_seq2 0x45 -+#define MI_RunThreadCommand 0x46 -+#define MI_SetEventWaitForLastAcess 0x47 -+#define MI_SetEventReadWait 0x48 -+#define MI_SetEventReadWait_seq1 0x49 -+#define MI_TestEventType 0x4a -+#define MI_TestEventType_seq1 0x4b -+#define MI_TestEventBit2 0x4c -+#define MI_DmaDescOrBlockCopyOrChainedEvent 0x4d -+#define MI_RunThread 0x4e -+#define MI_RunThread1 0x4f -+#define MI_RunThread1_seq1 0x50 -+#define MI_IncDmaSysCntxBPtr 0x51 -+#define MI_IncDmaSysCntxBPtr_seq1 0x52 -+#define MI_IncDmaSysCntxBPtr_seq2 0x53 -+#define MI_WaitForCntxDmaDescRead 0x54 -+#define MI_FillInContext 0x55 -+#define MI_FillInContext_seq1 0x56 -+#define MI_WriteNewDescToQueue 0x57 -+#define MI_WriteNewDescToQueue_seq1 0x58 -+#define MI_TestForQueueWrap 0x59 -+#define MI_TestForQueueWrap_seq1 0x5a -+#define MI_TestQueueIsFull 0x5b -+#define MI_TestQueueIsFull_seq1 0x5c -+#define MI_TestQueueIsFull_seq2 0x5d -+#define MI_CheckPsychoShitFixup 0x5e -+#define MI_PsychoShitFixupForcedRead 0x5f -+#define MI_PrepareDMATimeSlice 0x60 -+#define MI_PrepareDMATimeSlice_seq1 0x61 -+#define MI_TProcRestartFromTrapOrTestEventBit2 0x62 -+#define MI_TProcRestartFromTrapOrTestEventBit2_seq1 0x63 -+#define MI_WaitForGlobalsRead 0x64 -+#define MI_WaitForNPCRead 0x65 -+#define MI_EventInterrupt 0x66 -+#define MI_EventInterrupt_seq1 0x67 -+#define MI_EventInterrupt_seq2 0x68 -+#define MI_EventInterrupt_seq3 0x69 -+#define MI_TestSysCntxDmaQueueEmpty 0x6a -+#define MI_TestSysCntxDmaQueueEmpty_seq1 0x6b -+#define MI_TestIfRemoteDesc 0x6c -+#define MI_DoDmaLocalSetEvent 0x6d -+#define MI_DoDmaLocalSetEvent_seq1 0x6e -+#define MI_DoDmaLocalSetEvent_seq2 0x6f -+#define MI_DmaLoop1 0x70 -+#define MI_ExitDmaLoop 0x71 -+#define MI_ExitDmaLoop_seq1 0x72 -+#define MI_RemoteDmaTestPAckType 0x73 -+#define MI_PacketDiscardOrTestFailRecIfCCis0 0x74 -+#define MI_PacketDiscardOrTestFailRecIfCCis0_seq1 0x75 -+#define MI_TestNackFailIsZero2 0x76 -+#define MI_TestNackFailIsZero3 0x77 -+#define MI_DmaFailCountError 0x78 -+#define MI_TestDmaForSysCntx 0x79 -+#define MI_TestDmaForSysCntx_seq1 0x7a -+#define MI_TestDmaForSysCntx_seq2 0x7b -+#define MI_TestAeqB2 0x7c -+#define MI_TestAeqB2_seq1 0x7d -+#define MI_GetNextDmaDescriptor 0x7e -+#define MI_DequeueSysCntxDma2 0x7f -+#define MI_InputSetEvent 0x80 -+#define MI_PutBackSysCntxDma 0x81 -+#define MI_PutBackSysCntxDma_seq1 0x82 -+#define MI_PutBackSysCntxDma_seq2 0x83 -+#define MI_InputRemoteDma 0x84 -+#define MI_InputRemoteDma_seq1 0x85 -+#define MI_WaitOneTickForWakeup1 0x86 -+#define MI_SendRemoteDmaDesc 0x87 -+#define MI_InputLockQueue 0x88 -+#define MI_CloseTheTrappedPacketIfCCis1 0x89 -+#define MI_CloseTheTrappedPacketIfCCis1_seq1 0x8a -+#define MI_PostDmaInterrupt 0x8b -+#define MI_InputUnLockQueue 0x8c -+#define MI_WaitForUnLockDescRead 0x8d -+#define MI_SendEOPforRemoteDma 0x8e -+#define MI_LookAtRemoteAck 0x8f -+#define MI_InputWriteBlockQueue 0x90 -+#define MI_WaitForSpStore 0x91 -+#define MI_TProcNext 0x92 -+#define MI_TProcStoppedRunning 0x93 -+#define MI_InputWriteBlock 0x94 -+#define MI_RunDmaOrDeqNonSysCntxDma 0x95 -+#define MI_ExecuteDmaDescriptorForRun 0x96 -+#define MI_ConfirmQueueLock 0x97 -+#define MI_DmaInputIdentify 0x98 -+#define MI_TProcStoppedRunning2 0x99 -+#define MI_TProcStoppedRunning2_seq1 0x9a -+#define MI_TProcStoppedRunning2_seq2 0x9b -+#define MI_ThreadInputIdentify 0x9c -+#define MI_InputIdWriteAddrAndType3 0x9d -+#define MI_IProcTrappedWriteStatus 0x9e -+#define MI_FinishTrappingEop 0x9f -+#define MI_InputTestTrans 0xa0 -+#define MI_TestAeqB3 0xa1 -+#define MI_ThreadUpdateNonSysCntxBack 0xa2 -+#define MI_ThreadQueueOverflow 0xa3 -+#define MI_RunContext0Thread 0xa4 -+#define MI_RunContext0Thread_seq1 0xa5 -+#define MI_RunContext0Thread_seq2 0xa6 -+#define MI_RunDmaDesc 0xa7 -+#define MI_RunDmaDesc_seq1 0xa8 -+#define MI_RunDmaDesc_seq2 0xa9 -+#define MI_TestAeqB 0xaa -+#define MI_WaitForNonCntxDmaDescRead 0xab -+#define MI_DmaQueueOverflow 0xac -+#define MI_BlockCopyEvent 0xad -+#define MI_BlockCopyEventReadBlock 0xae -+#define MI_BlockCopyWaitForReadData 0xaf -+#define MI_InputWriteWord 0xb0 -+#define MI_TraceSetEvents 0xb1 -+#define MI_TraceSetEvents_seq1 0xb2 -+#define MI_TraceSetEvents_seq2 0xb3 -+#define MI_InputWriteDoubleWd 0xb4 -+#define MI_SendLockTransIfCCis1 0xb5 -+#define MI_WaitForDmaRoutes1 0xb6 -+#define MI_LoadDmaContext 0xb7 -+#define MI_InputTestAndSetWord 0xb8 -+#define MI_InputTestAndSetWord_seq1 0xb9 -+#define MI_GetDestEventValue 0xba -+#define MI_SendDmaIdentify 0xbb -+#define MI_InputAtomicAddWord 0xbc -+#define MI_LoadBFromTransD0 0xbd -+#define MI_ConditionalWriteBackCCTrue 0xbe -+#define MI_WaitOneTickForWakeup 0xbf -+#define MI_SendFinalUnlockTrans 0xc0 -+#define MI_SendDmaEOP 0xc1 -+#define MI_GenLastAddrForPsycho 0xc2 -+#define MI_FailedAckIfCCis0 0xc3 -+#define MI_FailedAckIfCCis0_seq1 0xc4 -+#define MI_WriteDmaSysCntxDesc 0xc5 -+#define MI_TimesliceDmaQueueOverflow 0xc6 -+#define MI_DequeueNonSysCntxThread1 0xc7 -+#define MI_DequeueNonSysCntxThread1_seq1 0xc8 -+#define MI_TestThreadQueueEmpty 0xc9 -+#define MI_ClearThreadQueueIfCC 0xca -+#define MI_DequeueSysCntxThread1 0xcb -+#define MI_DequeueSysCntxThread1_seq1 0xcc -+#define MI_TProcStartUpGeneric 0xcd -+#define MI_WaitForPCload2 0xce -+#define MI_WaitForNPCWrite 0xcf -+#define MI_WaitForEventWaitAddr 0xd0 -+#define MI_WaitForWaitEventAccess 0xd1 -+#define MI_WaitForWaitEventAccess_seq1 0xd2 -+#define MI_WaitForWaitEventDesc 0xd3 -+#define MI_WaitForEventReadTy0 0xd4 -+#define MI_SendCondTestFail 0xd5 -+#define MI_InputMoveToNextTrans 0xd6 -+#define MI_ThreadUpdateSysCntxBack 0xd7 -+#define MI_FinishedSetEvent 0xd8 -+#define MI_EventIntUpdateBPtr 0xd9 -+#define MI_EventQueueOverflow 0xda -+#define MI_MaskLowerSource 0xdb -+#define MI_DmaLoop 0xdc -+#define MI_SendNullSetEvent 0xdd -+#define MI_SendFinalSetEvent 0xde -+#define MI_TestNackFailIsZero1 0xdf -+#define MI_DmaPacketTimedOutOrPacketError 0xe0 -+#define MI_NextPacketIsLast 0xe1 -+#define MI_TestForZeroLengthDma 0xe2 -+#define MI_WaitForPCload 0xe3 -+#define MI_ReadInIns 0xe4 -+#define MI_WaitForInsRead 0xe5 -+#define MI_WaitForLocals 0xe6 -+#define MI_WaitForOutsWrite 0xe7 -+#define MI_WaitForWaitEvWrBack 0xe8 -+#define MI_WaitForLockRead 0xe9 -+#define MI_TestQueueLock 0xea -+#define MI_InputIdWriteAddrAndType 0xeb -+#define MI_InputIdWriteAddrAndType2 0xec -+#define MI_ThreadInputIdentify2 0xed -+#define MI_WriteIntoTrapArea0 0xee -+#define MI_GenQueueBlockWrAddr 0xef -+#define MI_InputDiscardFreeLock 0xf0 -+#define MI_WriteIntoTrapArea1 0xf1 -+#define MI_WriteIntoTrapArea2 0xf2 -+#define MI_ResetBPtrToBase 0xf3 -+#define MI_InputDoTrap 0xf4 -+#define MI_RemoteDmaCntxt0Update 0xf5 -+#define MI_ClearQueueLock 0xf6 -+#define MI_IProcTrappedBlockWriteData 0xf7 -+#define MI_FillContextFilter 0xf8 -+#define MI_IProcTrapped4 0xf9 -+#define MI_RunSysCntxDma 0xfa -+#define MI_ChainedEventError 0xfb -+#define MI_InputTrappingEOP 0xfc -+#define MI_CheckForRunIfZero 0xfd -+#define MI_TestForBreakOrSuspend 0xfe -+#define MI_SwapForRunable 0xff -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/vmseg.h linux-2.6.9/include/elan3/vmseg.h ---- clean/include/elan3/vmseg.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/vmseg.h 2003-09-24 09:57:24.000000000 -0400 -@@ -0,0 +1,75 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _VM_SEG_ELAN3_H -+#define _VM_SEG_ELAN3_H -+ -+#ident "$Id: vmseg.h,v 1.20 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/vmseg.h,v $*/ -+ -+#include -+ -+/* -+ * This segment maps Elan registers, it is fixed size and has 8K -+ * pages split up as follows -+ * -+ * ---------------------------------------- -+ * | Performance Counters (read-only) | -+ * ---------------------------------------- -+ * | Flag Page (read-only) | -+ * ---------------------------------------- -+ * | Command Port | -+ * ---------------------------------------- -+ */ -+typedef volatile struct elan3_flagstats -+{ -+ u_int CommandFlag; -+ u_int PageFaults; -+ u_int CProcTraps; -+ u_int DProcTraps; -+ u_int TProcTraps; -+ u_int IProcTraps; -+ u_int EopBadAcks; -+ u_int EopResets; -+ u_int DmaNetworkErrors; -+ u_int DmaIdentifyNetworkErrors; -+ u_int ThreadIdentifyNetworkErrors; -+ u_int DmaRetries; -+ u_int ThreadSystemCalls; -+ u_int ThreadElanCalls; -+ u_int LoadVirtualProcess; -+} ELAN3_FLAGSTATS; -+ -+#ifdef DIGITAL_UNIX -+typedef volatile union elan3_flagpage -+{ -+ u_char Padding[8192]; -+ ELAN3_FLAGSTATS Stats; -+} ELAN3_FLAGPAGE; -+ -+typedef volatile struct elan3_vmseg -+{ -+ E3_CommandPort CommandPort; -+ ELAN3_FLAGPAGE FlagPage; -+ E3_User_Regs UserRegs; -+} ELAN3_VMSEG; -+ -+#define SEGELAN3_SIZE (sizeof (ELAN3_VMSEG)) -+ -+#define SEGELAN3_COMMAND_PORT 0 -+#define SEGELAN3_FLAG_PAGE 1 -+#define SEGELAN3_PERF_COUNTERS 2 -+ -+#endif /* DIGITAL_UNIX */ -+ -+#endif /* _VM_SEG_ELAN3_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan3/vpd.h linux-2.6.9/include/elan3/vpd.h ---- clean/include/elan3/vpd.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan3/vpd.h 2002-08-09 07:23:34.000000000 -0400 -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: vpd.h,v 1.5 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/vpd.h,v $*/ -+ -+#ifndef __ELAN3_VPD_H -+#define __ELAN3_VPD_H -+ -+#define LARGE_RESOURCE_BIT 0x80 -+ -+#define SMALL_RESOURCE_COMPATIBLE_DEVICE_ID 0x3 -+#define SMALL_RESOURCE_VENDOR_DEFINED 0xE -+#define SMALL_RESOURCE_END_TAG 0xF -+ -+#define LARGE_RESOURCE_STRING 0x2 -+#define LARGE_RESOURCE_VENDOR_DEFINED 0x4 -+#define LARGE_RESOURCE_VITAL_PRODUCT_DATA 0x10 -+ -+#define VPD_PART_NUMBER "PN" -+#define VPD_FRU_PART_NUMBER "FN" -+#define VPD_EC_LEVEL "EC" -+#define VPD_MANUFACTURE_ID "MN" -+#define VPD_SERIAL_NUMBER "SN" -+ -+#define VPD_LOAD_ID "LI" -+#define VPD_ROM_LEVEL "RL" -+#define VPD_ALTERABLE_ROM_LEVEL "RM" -+#define VPD_NETWORK_ADDRESS "NA" -+#define VPD_DEVICE_DRIVER_LEVEL "DD" -+#define VPD_DIAGNOSTIC_LEVEL "DG" -+#define VPD_LOADABLE_MICROCODE_LEVEL "LL" -+#define VPD_VENDOR_ID "VI" -+#define VPD_FUNCTION_NUMBER "FU" -+#define VPD_SUBSYSTEM_VENDOR_ID "SI" -+ -+#endif /* __ELAN3_VPD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/elan4/commands.h linux-2.6.9/include/elan4/commands.h ---- clean/include/elan4/commands.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/commands.h 2004-06-16 11:45:02.000000000 -0400 -@@ -0,0 +1,247 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_COMMANDS_H -+#define __ELAN4_COMMANDS_H -+ -+#ident "$Id: commands.h,v 1.29 2004/06/16 15:45:02 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/commands.h,v $*/ -+ -+/* -+ * This header file describes the command format for the Elan 4 -+ * See CommandFormat.doc -+ */ -+ -+/* -+ * Number of channels in traced elanlib_trace.c -+ */ -+#define TRACE_MAX_CHANNELS 2 -+ -+/* -+ * Define encoding for the commands issued into the command queues -+ */ -+#define RUN_THREAD_CMD 0x00 -+#define OPEN_STEN_PKT_CMD 0x01 -+#define WRITE_DWORD_CMD 0x02 -+#define ADD_DWORD_CMD 0x03 -+#define COPY64_CMD 0x05 -+#define GUARD_CMD 0x06 -+#define SET_EVENT_CMD 0x07 -+#define SEND_TRANS_CMD 0x09 -+#define INTERRUPT_CMD 0x0d -+#define RUN_DMA_CMD 0x0e -+#define SET_EVENTN_CMD 0x0f -+#define NOP_CMD 0x17 -+#define MAKE_EXT_CLEAN_CMD 0x37 -+#define WAIT_EVENT_CMD 0x1f -+ -+/* -+ * Define the portion of the data word the user is NOT -+ * allowed to use. This varies with Commmand type -+ */ -+#define RUN_THREAD_CMD_MASK 0x03 -+#define OPEN_STEN_PKT_CMD_MASK 0x0f -+#define WRITE_DWORD_CMD_MASK 0x07 -+#define ADD_DWORD_CMD_MASK 0x07 -+#define COPY64_CMD_MASK 0x0f -+#define GUARD_CMD_MASK 0x0f -+#define SET_EVENT_CMD_MASK 0x1f -+#define SEND_TRANS_CMD_MASK 0x1f -+#define INTERRUPT_CMD_MASK 0x0f -+#define RUN_DMA_CMD_MASK 0x0f -+#define SET_EVENTN_CMD_MASK 0x1f -+#define NOP_CMD_MASK 0x3f -+#define MAKE_EXT_CLEAN_MASK 0x3f -+#define WAIT_EVENT_CMD_MASK 0x1f -+ -+#define COPY64_DATA_TYPE_SHIFT 0x4 -+#define COPY64_DTYPE_BYTE (0 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_SHORT (1 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_WORD (2 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_LONG (3 << COPY64_DATA_TYPE_SHIFT) -+ -+/* -+ * SET_EVENTN - word 1 has following form -+ * [63:5] Event Address -+ * [4:0] Part Set Value. -+ */ -+#define SET_EVENT_PART_SET_MASK 0x1f -+ -+/* OPEN_STEN_PKT_CMD -+ * [63:32] Vproc -+ * [31] Use Test -+ * [30:28] unused -+ * [27:21] Test Acceptable PAck code -+ * [20:16] Test Ack Channel Number -+ * [15:9] Acceptable PAck code -+ * [8:4] Ack Channel Number (1 bit on Elan4) -+ * [3:0] Command type -+ */ -+/* Acceptable PAck code */ -+#define PACK_OK (1 << 0) -+#define PACK_TESTFAIL (1 << 1) -+#define PACK_DISCARD (1 << 2) -+#define RESTART_COUNT_ZERO (1 << 3) -+#define PACK_ERROR (1 << 7) -+#define PACK_TIMEOUT (1 << 8) -+ -+/* -+ *#ifndef USE_DIRTY_COMMANDS -+ *#define USE_DIRTY_COMMANDS -+ *#endif -+ */ -+#ifdef USE_DIRTY_COMMANDS -+#define OPEN_PACKET_USED_MASK 0x00000000780f00e0ULL -+#define SEND_TRANS_USED_MASK 0xffffffff0000fff0ULL -+#define COPY64_WRITE_USED_MASK 0x000000000000000fULL -+#define MAIN_INT_USED_MASK 0x0000000000003ff0ULL -+#define GUARD_USED_MASK 0xfffffe007000fde0ULL -+#define DMA_TYPESIZE_USED_MASK 0x000000000000fff0ULL -+#define SETEVENTN_USED_MASK 0xffffffffffffffe0ULL -+#define NOP_USED_MASK 0xffffffffffffffc0ULL -+#define EXT_CLEAN_USED_MASK 0xffffffffffffffc0ULL -+#define WAIT_CNT_TYPE_USED_MASK 0x00000000fffff800ULL -+#else -+#define OPEN_PACKET_USED_MASK 0x0ULL -+#define SEND_TRANS_USED_MASK 0x0ULL -+#define COPY64_WRITE_USED_MASK 0x0ULL -+#define MAIN_INT_USED_MASK 0x0ULL -+#define GUARD_USED_MASK 0x0ULL -+#define DMA_TYPESIZE_USED_MASK 0x0ULL -+#define SETEVENTN_USED_MASK 0x0ULL -+#define NOP_USED_MASK 0x0ULL -+#define EXT_CLEAN_USED_MASK 0x0ULL -+#define WAIT_CNT_TYPE_USED_MASK 0x0ULL -+#endif -+ -+#define OPEN_PACKET(chan, code, vproc) \ -+ ((((chan) & 1) << 4) | (((code) & 0x7f) << 9) | ((E4_uint64)(vproc) << 32) | OPEN_STEN_PKT_CMD) -+ -+#define OPEN_PACKET_TEST(chan, code, vproc, tchan, tcode) \ -+ ((((chan) & 1) << 4) | (((code) & 0x7f) << 9) | ((E4_uint64)(vproc) << 32) | \ -+ (((tchan) & 1) << 16) | (((tcode) & 0x7f) << 21) | (((E4_uint64) 1) << 31) | OPEN_STEN_PKT_CMD) -+ -+/* -+ * GUARD_CMD -+ * [63:41] unused -+ * [40] Reset Restart Fail Count // only performed if the Guard executes the next command. -+ * [39:32] New Restart Fail Count value -+ * [31] Use Test -+ * [30:28] unused -+ * [27:21] Test Acceptable PAck code -+ * [20:16] Test Ack Channel Number -+ * [15:9] unused -+ * [8:4] Ack Channel Number -+ * [3:0] Command type -+ */ -+/* GUARD_CHANNEL(chan) -+ */ -+#define GUARD_ALL_CHANNELS ((1 << 9) | GUARD_CMD) -+#define GUARD_CHANNEL(chan) ((((chan) & 1) << 4) | GUARD_CMD) -+#define GUARD_TEST(chan,code) ((1ull << 31) | (((code) & 0x7f) << 21) | (((chan) & 1) << 16)) -+#define GUARD_RESET(count) ((1ull << 40) | ((((E4_uint64) count) & 0xff) << 32)) -+ -+#define GUARD_CHANNEL_TEST(chan,tchan,tcode) \ -+ ((((chan) & 1) << 4) | (((tchan) & 1) << 16) | (((tcode) & 0x7f) << 21) | \ -+ (((E4_uint64) 1) << 31) | GUARD_CMD) -+ -+/* -+ * SEND_TRANS_CMD -+ * [63:32] unused -+ * [31:16] transaction type -+ * [15:4] unused -+ * [3:0] Command type -+ */ -+#define SEND_TRANS(TransType) (((TransType) << 16) | SEND_TRANS_CMD) -+ -+/* -+ * Command port trace debug levels -+ */ -+#define TRACE_CMD_BUFFER 0x01 -+#define TRACE_CMD_TYPE 0x02 -+#define TRACE_CHANNEL_OPENS 0x04 -+#define TRACE_GUARDED_ATOMICS 0x08 -+#define TRACE_CMD_TIMEOUT 0x10 -+ -+/* -+ * Commands that should be preceeded by a GUARD_CMD. -+ */ -+#define IS_ATOMIC_CMD(cmd) \ -+ ((cmd) == RUN_THREAD_CMD || (cmd) == ADD_DWORD_CMD || (cmd) == INTERRUPT_CMD || \ -+ (cmd) == RUN_DMA_CMD || (cmd) == SET_EVENT_CMD || (cmd) == SET_EVENTN_CMD || \ -+ (cmd) == WAIT_EVENT_CMD) -+ -+#ifndef _ASM -+ -+/* -+ * These structures are used to build event copy command streams. They are intended to be included -+ * in a larger structure to form a self documenting command sequence that can be easily coped and manipulated. -+ */ -+ -+typedef struct e4_runthreadcmd -+{ -+ E4_Addr PC; -+ E4_uint64 r[6]; -+} E4_RunThreadCmd; -+ -+typedef E4_uint64 E4_OpenCmd; -+ -+typedef struct e4_writecmd -+{ -+ E4_Addr WriteAddr; -+ E4_uint64 WriteValue; -+} E4_WriteCmd; -+ -+typedef struct e4_addcmd -+{ -+ E4_Addr AddAddr; -+ E4_uint64 AddValue; -+} E4_AddCmd; -+ -+typedef struct e4_copycmd -+{ -+ E4_Addr SrcAddr; -+ E4_Addr DstAddr; -+} E4_CopyCmd; -+ -+typedef E4_uint64 E4_GaurdCmd; -+typedef E4_uint64 E4_SetEventCmd; -+ -+/* -+ * The data to this command must be declared as a vector after the use of this. -+ */ -+typedef struct e4_sendtranscmd -+{ -+ E4_Addr Type; -+ E4_Addr Addr; -+} E4_SendTransCmd; -+ -+typedef E4_uint64 E4_IntCmd; -+ -+/* The normal Dma struc can be used here. */ -+ -+typedef struct e4_seteventncmd -+{ -+ E4_Addr Event; -+ E4_Addr SetCount; -+} E4_SetEventNCmd; -+ -+typedef E4_uint64 E4_NopCmd; -+typedef E4_uint64 E4_MakeExtCleanCmd; -+ -+typedef struct e4_waitcmd -+{ -+ E4_Addr ev_Event; -+ E4_Addr ev_CountType; -+ E4_Addr ev_Params[2]; -+} E4_WaitCmd; -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_COMMANDS_H */ -+ -diff -urN clean/include/elan4/debug.h linux-2.6.9/include/elan4/debug.h ---- clean/include/elan4/debug.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/debug.h 2005-03-23 06:06:15.000000000 -0500 -@@ -0,0 +1,112 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_DEBUG_H -+#define _ELAN4_DEBUG_H -+ -+#ident "$Id: debug.h,v 1.21 2005/03/23 11:06:15 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/debug.h,v $ */ -+ -+/* values for "type" field - note a "ctxt" is permissible */ -+/* and BUFFER/CONSOLE are for explict calls to elan4_debugf() */ -+#define DBG_DEVICE ((void *) 0) -+#define DBG_USER ((void *) 1) -+ -+#define DBG_BUFFER ((void *) 62) -+#define DBG_CONSOLE ((void *) 63) -+#define DBG_NTYPES 64 -+ -+/* values for "mode" field */ -+#define DBG_CONFIG 0x00000001 -+#define DBG_INTR 0x00000002 -+#define DBG_MAININT 0x00000004 -+#define DBG_SDRAM 0x00000008 -+#define DBG_MMU 0x00000010 -+#define DBG_REGISTER 0x00000020 -+#define DBG_CQ 0x00000040 -+#define DBG_NETWORK_CTX 0x00000080 -+ -+#define DBG_FLUSH 0x00000100 -+#define DBG_FILE 0x00000200 -+#define DBG_CONTROL 0x00000400 -+#define DBG_MEM 0x00000800 -+ -+#define DBG_PERM 0x00001000 -+#define DBG_FAULT 0x00002000 -+#define DBG_SWAP 0x00004000 -+#define DBG_TRAP 0x00008000 -+#define DBG_DDCQ 0x00010000 -+#define DBG_VP 0x00020000 -+#define DBG_RESTART 0x00040000 -+#define DBG_RESUME 0x00080000 -+#define DBG_CPROC 0x00100000 -+#define DBG_DPROC 0x00200000 -+#define DBG_EPROC 0x00400000 -+#define DBG_IPROC 0x00800000 -+#define DBG_TPROC 0x01000000 -+#define DBG_IOPROC 0x02000000 -+#define DBG_ROUTE 0x04000000 -+#define DBG_NETERR 0x08000000 -+ -+#define DBG_ALL 0x7FFFFFFF -+ -+ -+#ifdef DEBUG_PRINTF -+ -+# define PRINTF0(type,m,fmt) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt) : (void)0) -+# define PRINTF1(type,m,fmt,a) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a) : (void)0) -+# define PRINTF2(type,m,fmt,a,b) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b) : (void)0) -+# define PRINTF3(type,m,fmt,a,b,c) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c) : (void)0) -+# define PRINTF4(type,m,fmt,a,b,c,d) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d) : (void)0) -+# define PRINTF5(type,m,fmt,a,b,c,d,e) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e) : (void)0) -+# define PRINTF6(type,m,fmt,a,b,c,d,e,f) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f) : (void)0) -+# define PRINTF7(type,m,fmt,a,b,c,d,e,f,g) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g) : (void)0) -+# define PRINTF8(type,m,fmt,a,b,c,d,e,f,g,h) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define PRINTF9(type,m,fmt,a,b,c,d,e,f,g,h,i) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g,h,i): (void)0) -+#ifdef __GNUC__ -+# define PRINTF(type,m,args...) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m, ##args) : (void)0) -+#endif -+# define DBGCMD(type,m,cmd) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? (void) (cmd) : (void) 0) -+ -+#else -+ -+# define PRINTF0(type,m,fmt) (0) -+# define PRINTF1(type,m,fmt,a) (0) -+# define PRINTF2(type,m,fmt,a,b) (0) -+# define PRINTF3(type,m,fmt,a,b,c) (0) -+# define PRINTF4(type,m,fmt,a,b,c,d) (0) -+# define PRINTF5(type,m,fmt,a,b,c,d,e) (0) -+# define PRINTF6(type,m,fmt,a,b,c,d,e,f) (0) -+# define PRINTF7(type,m,fmt,a,b,c,d,e,f,g) (0) -+# define PRINTF8(type,m,fmt,a,b,c,d,e,f,g,h) (0) -+# define PRINTF9(type,m,fmt,a,b,c,d,e,f,g,h,i) (0) -+#ifdef __GNUC__ -+# define PRINTF(type,m,args...) -+#endif -+# define DBGCMD(type,m,cmd) ((void) 0) -+ -+#endif /* DEBUG_PRINTF */ -+ -+extern unsigned elan4_debug; -+extern unsigned elan4_debug_toconsole; -+extern unsigned elan4_debug_tobuffer; -+extern unsigned elan4_debug_display_ctxt; -+extern unsigned elan4_debug_ignore_ctxt; -+extern unsigned elan4_debug_ignore_type; -+ -+extern void elan4_debug_init(void); -+extern void elan4_debug_fini(void); -+extern void elan4_debugf (void *type, int mode, char *fmt, ...); -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN4_DEBUG_H */ -diff -urN clean/include/elan4/device.h linux-2.6.9/include/elan4/device.h ---- clean/include/elan4/device.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/device.h 2005-08-09 05:57:04.000000000 -0400 -@@ -0,0 +1,888 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_ELANDEV_H -+#define __ELAN4_ELANDEV_H -+ -+#ident "$Id: device.h,v 1.92.2.2 2005/08/09 09:57:04 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device.h,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_MPSAS -+#include -+#endif -+ -+#if defined(LINUX) -+#include -+#elif defined(TRU64UNIX) -+#include -+#elif defined(SOLARIS) -+#include -+#endif -+ -+/* -+ * Network context number allocation. -+ * [0] neterr fixup system context -+ * [1] kernel comms system context -+ * [2048-4095] kernel comms data contexts -+ */ -+#define ELAN4_NETERR_CONTEXT_NUM 0x00 /* network error fixup context number */ -+#define ELAN4_KCOMM_CONTEXT_NUM 0x01 /* kernel comms context number */ -+#define ELAN4_KCOMM_BASE_CONTEXT_NUM 0x800 /* kernel comms data transfer contexts */ -+#define ELAN4_KCOMM_TOP_CONTEXT_NUM 0xfff -+ -+#define ELAN4_SYSTEM_CONTEXT(ctx) ((ctx) >= ELAN4_KCOMM_BASE_CONTEXT_NUM) -+ -+typedef void (ELAN4_HALTFN)(struct elan4_dev *dev, void *arg); -+ -+typedef struct elan4_haltop -+{ -+ struct list_head op_link; /* chain on a list */ -+ E4_uint32 op_mask; /* Interrupt mask to see before calling function */ -+ -+ ELAN4_HALTFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+} ELAN4_HALTOP; -+ -+typedef void (ELAN4_DMA_FLUSHFN)(struct elan4_dev *dev, void *arg, int qfull); -+ -+typedef struct elan4_dma_flushop -+{ -+ struct list_head op_link; /* chain on a list */ -+ ELAN4_DMA_FLUSHFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+} ELAN4_DMA_FLUSHOP; -+ -+typedef void (ELAN4_INTFN)(struct elan4_dev *dev, void *arg); -+ -+typedef struct elan4_intop -+{ -+ struct list_head op_link; /* chain on a list */ -+ ELAN4_INTFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+ E4_uint64 op_cookie; /* and main interrupt cookie */ -+} ELAN4_INTOP; -+ -+typedef struct elan4_eccerrs -+{ -+ E4_uint64 EccStatus; -+ E4_uint64 ConfigReg; -+ E4_uint32 ErrorCount; -+} ELAN4_ECCERRS; -+ -+#define SDRAM_MIN_BLOCK_SHIFT 10 -+#define SDRAM_NUM_FREE_LISTS 19 /* allows max 256 Mb block */ -+#define SDRAM_MIN_BLOCK_SIZE (1 << SDRAM_MIN_BLOCK_SHIFT) -+#define SDRAM_MAX_BLOCK_SIZE (SDRAM_MIN_BLOCK_SIZE << (SDRAM_NUM_FREE_LISTS-1)) -+ -+#if PAGE_SHIFT < 13 -+#define SDRAM_PAGE_SIZE 8192 -+#define SDRAM_PGOFF_OFFSET 1 -+#define SDRAM_PGOFF_MASK (~SDRAM_PGOFF_OFFSET) -+#else -+#define SDRAM_PAGE_SIZE PAGE_SIZE -+#define SDRAM_PGOFF_OFFSET 0 -+#define SDRAM_PGOFF_MASK (~SDRAM_PGOFF_OFFSET) -+#endif -+ -+typedef struct elan4_sdram -+{ -+ sdramaddr_t b_base; /* offset in sdram bar */ -+ unsigned b_size; /* size of bank */ -+ ioaddr_t b_ioaddr; /* ioaddr where mapped into the kernel */ -+ ELAN4_MAP_HANDLE b_handle; /* and mapping handle */ -+ bitmap_t *b_bitmaps[SDRAM_NUM_FREE_LISTS]; /* buddy allocator bitmaps */ -+} ELAN4_SDRAM_BANK; -+ -+/* command queue */ -+typedef struct elan4_cq -+{ -+ struct elan4_cqa *cq_cqa; /* command queue allocator this belongs to */ -+ unsigned cq_idx; /* and which command queue this is */ -+ -+ sdramaddr_t cq_space; /* sdram backing up command queue */ -+ unsigned cq_size; /* size value */ -+ unsigned cq_perm; /* permissions */ -+ ioaddr_t cq_mapping; /* mapping of command queue page */ -+ ELAN4_MAP_HANDLE cq_handle; /* and mapping handle */ -+} ELAN4_CQ; -+ -+/* cqtype flags to elan4_alloccq() */ -+#define CQ_Priority (1 << 0) -+#define CQ_Reorder (1 << 1) -+ -+/* command queues are allocated in chunks,so that all the -+ * command ports are in a single system page */ -+#define ELAN4_CQ_PER_CQA MAX(1, (PAGESIZE/CQ_CommandMappingSize)) -+ -+/* maximum number of command queues per context */ -+#define ELAN4_MAX_CQA (256 / ELAN4_CQ_PER_CQA) -+ -+typedef struct elan4_cqa -+{ -+ struct list_head cqa_link; /* linked together */ -+ bitmap_t cqa_bitmap[BT_BITOUL(ELAN4_CQ_PER_CQA)]; /* bitmap of which are free */ -+ unsigned int cqa_type; /* allocation type */ -+ unsigned int cqa_cqnum; /* base cq number */ -+ unsigned int cqa_ref; /* "mappings" to a queue */ -+ unsigned int cqa_idx; /* index number */ -+ ELAN4_CQ cqa_cq[ELAN4_CQ_PER_CQA]; /* command queue entries */ -+} ELAN4_CQA; -+ -+#define elan4_cq2num(cq) ((cq)->cq_cqa->cqa_cqnum + (cq)->cq_idx) -+#define elan4_cq2idx(cq) ((cq)->cq_cqa->cqa_idx * ELAN4_CQ_PER_CQA + (cq)->cq_idx) -+ -+#ifdef ELAN4_LARGE_PAGE_SUPPORT -+#define NUM_HASH_TABLES 2 -+#else -+#define NUM_HASH_TABLES 1 -+#endif -+ -+typedef struct elan4_ctxt_trans_index -+{ -+ int tbl; -+ int index; -+} ELAN4_CTXT_TRANS_INDEX; -+ -+#define ELAN4_CTXT_MAX_SHUFFLE (10) -+ -+typedef struct elan4_ctxt -+{ -+ struct elan4_dev *ctxt_dev; /* device we're associated with */ -+ struct list_head ctxt_link; /* chained on device */ -+ -+ struct elan4_trap_ops *ctxt_ops; /* client specific operations */ -+ -+ unsigned int ctxt_features; /* device features this context is using */ -+ signed int ctxt_num; /* local context number */ -+ -+ struct list_head ctxt_cqalist; /* link list of command queue allocators */ -+ bitmap_t ctxt_cqamap[BT_BITOUL(ELAN4_MAX_CQA)]; /* bitmap for allocating cqa_idx */ -+ -+ ELAN4_HASH_ENTRY **ctxt_mmuhash[NUM_HASH_TABLES]; /* software hash tables */ -+ spinlock_t ctxt_mmulock; /* and spinlock. */ -+ -+ struct proc_dir_entry *procdir; -+ ELAN4_CTXT_TRANS_INDEX trans_index[NUM_HASH_TABLES]; /* place to stash info for /proc/qsnet/elan4/deviceN/ctxt/N/translations_N */ -+ -+ int shuffle_needed[NUM_HASH_TABLES]; /* true when there are entries in shuffle array */ -+ int shuffle[NUM_HASH_TABLES][ELAN4_CTXT_MAX_SHUFFLE]; /* hashidx's that need shuffling or -1 = none. if all set then shuffle ALL hashidx's */ -+} ELAN4_CTXT; -+ -+typedef struct elan4_trap_ops -+{ -+ void (*op_eproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status); -+ void (*op_cproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum); -+ void (*op_dproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit); -+ void (*op_tproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status); -+ void (*op_iproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit); -+ void (*op_interrupt) (ELAN4_CTXT *ctxt, E4_uint64 cookie); -+ void (*op_neterrmsg) (ELAN4_CTXT *ctxt, ELAN4_NETERR_MSG *msg); -+ void (*op_need_shuffle) (ELAN4_CTXT *ctxt, int tbl, int index); -+} ELAN4_TRAP_OPS; -+ -+typedef struct elan4_route_table -+{ -+ spinlock_t tbl_lock; -+ unsigned tbl_size; -+ sdramaddr_t tbl_entries; -+} ELAN4_ROUTE_TABLE; -+ -+#define DEV_STASH_ROUTE_COUNT 20 -+ -+typedef struct elan4_route_ringbuf { -+ int start; -+ int end; -+ E4_VirtualProcessEntry routes[DEV_STASH_ROUTE_COUNT]; -+} ELAN4_ROUTE_RINGBUF; -+ -+#define elan4_ringbuf_init(ringbuf) memset(&ringbuf, 0, sizeof(ELAN4_ROUTE_RINGBUF)); -+ -+typedef struct elan4_trans_index -+{ -+ int tbl; -+} ELAN4_TRANS_INDEX; -+ -+#define ELAN4_TRANS_STATS_NUM_BUCKETS 7 -+typedef struct elan4_trans_stats -+{ -+ int tbl; -+ int buckets[ELAN4_TRANS_STATS_NUM_BUCKETS]; -+} ELAN4_TRANS_STATS; -+ -+typedef struct elan4_dev -+{ -+ ELAN4_CTXT dev_ctxt; /* context for device operations */ -+ -+ ELAN4_DEV_OSDEP dev_osdep; /* OS specific entries */ -+ -+ int dev_instance; /* device number */ -+ ELAN_DEVINFO dev_devinfo; /* device information (revision etc */ -+ ELAN_POSITION dev_position; /* position connected to switch */ -+ ELAN_DEV_IDX dev_idx; /* device idx registered with elanmod */ -+ -+ kmutex_t dev_lock; /* lock for device state/references */ -+ unsigned dev_state; /* device state */ -+ unsigned dev_references; /* # references */ -+ -+ ioaddr_t dev_regs; /* Mapping of device registers */ -+ ELAN4_MAP_HANDLE dev_regs_handle; -+ ioaddr_t dev_rom; /* Mapping of rom */ -+ ELAN4_MAP_HANDLE dev_rom_handle; -+ ioaddr_t dev_i2c; /* Mapping of I2C registers */ -+ ELAN4_MAP_HANDLE dev_i2c_handle; -+ -+ E4_uint64 dev_sdram_cfg; /* SDRAM config value (from ROM) */ -+ E4_uint64 dev_sdram_initial_ecc_val; /* power on ECC register value */ -+ int dev_sdram_numbanks; /* # banks of sdram */ -+ ELAN4_SDRAM_BANK dev_sdram_banks[SDRAM_MAX_BANKS]; /* Mapping of sdram banks */ -+ spinlock_t dev_sdram_lock; /* spinlock for buddy allocator */ -+ sdramaddr_t dev_sdram_freelists[SDRAM_NUM_FREE_LISTS]; -+ unsigned dev_sdram_freecounts[SDRAM_NUM_FREE_LISTS]; -+ -+ physaddr_t dev_regs_phys; /* physaddr of registers */ -+ physaddr_t dev_sdram_phys; /* and of sdram */ -+ -+ sdramaddr_t dev_cacheflush_space; /* sdram reserved for cache flush operation */ -+ -+ sdramaddr_t dev_faultarea; /* fault areas for each unit */ -+ sdramaddr_t dev_inputtraparea; /* trap area for trapped transactions */ -+ sdramaddr_t dev_ctxtable; /* context table (E4_ContextControlBlock) */ -+ int dev_ctxtableshift; /* and size (in bits) */ -+ -+ E4_uint32 dev_syscontrol; /* copy of system control register */ -+ spinlock_t dev_syscontrol_lock; /* spinlock to sequentialise modifications */ -+ unsigned dev_direct_map_pci_writes; /* # counts for CONT_DIRECT_MAP_PCI_WRITES */ -+ -+ volatile E4_uint32 dev_intmask; /* copy of interrupt mask register */ -+ spinlock_t dev_intmask_lock; /* spinlock to sequentialise modifications */ -+ -+ /* i2c section */ -+ spinlock_t dev_i2c_lock; /* spinlock for i2c operations */ -+ unsigned int dev_i2c_led_disabled; /* count of reasons led auto update disabled */ -+ -+ /* mmu section */ -+ unsigned dev_pagesizeval[NUM_HASH_TABLES]; /* page size value */ -+ unsigned dev_pageshift[NUM_HASH_TABLES]; /* pageshift in bits. */ -+ unsigned dev_hashsize[NUM_HASH_TABLES]; /* # entries in mmu hash table */ -+ sdramaddr_t dev_hashtable[NUM_HASH_TABLES]; /* mmu hash table */ -+ ELAN4_HASH_ENTRY *dev_mmuhash[NUM_HASH_TABLES]; /* and software shadow */ -+ ELAN4_HASH_ENTRY *dev_mmufree_count; /* space holder - will use to indicate if there is a free slot in chain */ -+ ELAN4_HASH_ENTRY *dev_mmufreelist; /* and free blocks */ -+ spinlock_t dev_mmulock; -+ E4_uint16 dev_topaddr[4]; /* top address values */ -+ unsigned char dev_topaddrvalid; -+ unsigned char dev_topaddrmode; -+ unsigned char dev_pteval; /* allow setting of relaxed order/dont snoop attributes */ -+ -+ unsigned dev_rsvd_hashmask[NUM_HASH_TABLES]; -+ unsigned dev_rsvd_hashval[NUM_HASH_TABLES]; -+ -+ ELAN4_TRANS_INDEX trans_index[NUM_HASH_TABLES]; /* place to stash info for /proc/qsnet/elan4/deviceN/translations_N */ -+ ELAN4_TRANS_STATS trans_stats[NUM_HASH_TABLES]; /* place to stash info for /proc/qsnet/elan4/deviceN/stats/translations_N */ -+ /* run queues */ -+ sdramaddr_t dev_comqlowpri; /* CProc low & high pri run queues */ -+ sdramaddr_t dev_comqhighpri; -+ -+ sdramaddr_t dev_dmaqlowpri; /* DProc,TProc,Interrupt queues */ -+ sdramaddr_t dev_dmaqhighpri; -+ sdramaddr_t dev_threadqlowpri; -+ sdramaddr_t dev_threadqhighpri; -+ sdramaddr_t dev_interruptq; -+ -+ E4_uint32 dev_interruptq_nfptr; /* cache next main interrupt fptr */ -+ struct list_head dev_interruptq_list; /* list of operations to call when space in interruptq*/ -+ -+ /* command queue section */ -+ sdramaddr_t dev_cqaddr; /* SDRAM address of command queues */ -+ unsigned dev_cqoffset; /* offset for command queue alignment constraints */ -+ unsigned dev_cqcount; /* number of command queue descriptors */ -+ bitmap_t *dev_cqamap; /* bitmap for allocation */ -+ spinlock_t dev_cqlock; /* spinlock to protect bitmap */ -+ unsigned dev_cqreorder; /* offset for first re-ordering queue with mtrr */ -+ -+ /* halt operation section */ -+ struct list_head dev_haltop_list; /* list of operations to call when units halted */ -+ E4_uint32 dev_haltop_mask; /* mask of which ones to halt */ -+ E4_uint32 dev_haltop_active; /* mask of which haltops are executing */ -+ spinlock_t dev_haltop_lock; /* and their spinlock */ -+ struct timer_list dev_haltop_timer; /* timer looking for haltop locked in list */ -+ -+ struct { -+ struct list_head list; /* list of halt operations for DMAs */ -+ ELAN4_CQ *cq; /* and command queue's */ -+ ELAN4_INTOP intop; /* and main interrupt op */ -+ E4_uint64 status; /* status register (when waiting for intop)*/ -+ } dev_dma_flushop[2]; -+ -+ unsigned dev_halt_all_count; /* count of reasons to halt all units */ -+ unsigned dev_halt_lowpri_count; /* count of reasons to halt lowpri queues */ -+ unsigned dev_halt_cproc_count; /* count of reasons to halt command processor */ -+ unsigned dev_halt_dproc_count; /* count of reasons to halt dma processor */ -+ unsigned dev_halt_tproc_count; /* count of reasons to halt thread processor */ -+ unsigned dev_discard_all_count; /* count of reasons to discard all packets */ -+ unsigned dev_discard_lowpri_count; /* count of reasons to discard non-system packets */ -+ unsigned dev_discard_highpri_count; /* count of reasons to discard system packets */ -+ -+ E4_uint32 dev_schedstatus; /* copy of schedule status register */ -+ -+ /* local context allocation section */ -+ spinlock_t dev_ctxlock; /* spinlock to protect bitmap */ -+ bitmap_t *dev_ctxmap; /* bitmap for local context allocation */ -+ -+ spinlock_t dev_ctxt_lock; /* spinlock to protect context list */ -+ struct list_head dev_ctxt_list; /* linked list of contexts */ -+ -+ /* locks to sequentialise interrupt handling */ -+ spinlock_t dev_trap_lock; /* spinlock while handling a trap */ -+ spinlock_t dev_requeue_lock; /* spinlock sequentialising cproc requeue */ -+ -+ /* error rate interrupt section */ -+ long dev_error_time; /* lbolt at start of sampling period */ -+ unsigned dev_errors_per_period; /* errors so far this sampling period */ -+ timer_fn_t dev_error_timeoutid; /* timeout to re-enable error interrupts */ -+ timer_fn_t dev_linkerr_timeoutid; /* timeout to clear link error led */ -+ -+ /* kernel threads */ -+ unsigned dev_stop_threads:1; /* kernel threads should exit */ -+ -+ /* main interrupt thread */ -+ kcondvar_t dev_mainint_wait; /* place for mainevent interrupt thread to sleep */ -+ spinlock_t dev_mainint_lock; /* and it's spinlock */ -+ unsigned dev_mainint_started:1; -+ unsigned dev_mainint_stopped:1; -+ -+ /* device context - this is used to flush insert cache/instruction cache/dmas & threads */ -+ ELAN4_CPROC_TRAP dev_cproc_trap; /* space to extract cproc trap into */ -+ -+ struct list_head dev_intop_list; /* list of main interrupt operations */ -+ spinlock_t dev_intop_lock; /* and spinlock */ -+ E4_uint64 dev_intop_cookie; /* and next cookie to use */ -+ -+ spinlock_t dev_flush_lock; /* spinlock for flushing */ -+ kcondvar_t dev_flush_wait; /* and place to sleep */ -+ -+ ELAN4_CQ *dev_flush_cq[COMMAND_INSERTER_CACHE_ENTRIES]; /* command queues to flush the insert cache */ -+ ELAN4_INTOP dev_flush_op[COMMAND_INSERTER_CACHE_ENTRIES]; /* and a main interrupt operation for each one */ -+ unsigned dev_flush_finished; /* flush command finished */ -+ -+ ELAN4_HALTOP dev_iflush_haltop; /* halt operation for icache flush */ -+ unsigned dev_iflush_queued:1; /* icache haltop queued */ -+ -+ ELAN4_ROUTE_TABLE *dev_routetable; /* virtual process table (for dma queue flush)*/ -+ sdramaddr_t dev_sdrampages[2]; /* pages of sdram to hold suspend code sequence */ -+ E4_Addr dev_tproc_suspend; /* st8suspend instruction */ -+ E4_Addr dev_tproc_space; /* and target memory */ -+ -+ sdramaddr_t dev_neterr_inputq; /* network error input queue descriptor & event */ -+ sdramaddr_t dev_neterr_slots; /* network error message slots */ -+ ELAN4_CQ *dev_neterr_msgcq; /* command queue for sending messages */ -+ ELAN4_CQ *dev_neterr_intcq; /* command queue for message received interrupt */ -+ ELAN4_INTOP dev_neterr_intop; /* and it's main interrupt operation */ -+ E4_uint64 dev_neterr_queued; /* # message queued in msgcq */ -+ spinlock_t dev_neterr_lock; /* and spinlock .... */ -+ -+ ELAN4_DEV_STATS dev_stats; /* device statistics */ -+ ELAN4_ECCERRS dev_sdramerrs[30]; /* last few sdram errors for procfs */ -+ -+ unsigned int *dev_ack_errors; /* Map of source of dproc ack errors */ -+ ELAN4_ROUTE_RINGBUF dev_ack_error_routes; -+ unsigned int *dev_dproc_timeout; /* Ditto dproc timeout errors */ -+ ELAN4_ROUTE_RINGBUF dev_dproc_timeout_routes; -+ unsigned int *dev_cproc_timeout; /* Ditto cproc timeout errors */ -+ ELAN4_ROUTE_RINGBUF dev_cproc_timeout_routes; -+ -+ unsigned dev_linkerr_signalled; /* linkerror signalled to switch controller */ -+ -+ struct list_head dev_hc_list; /* list of the allocated hash_chunks */ -+ -+ ELAN4_IPROC_TRAP dev_iproc_trap; /* space for iproc trap */ -+} ELAN4_DEV; -+ -+/* values for dev_state */ -+#define ELAN4_STATE_STOPPED (1 << 0) /* device initialised but not started */ -+#define ELAN4_STATE_STARTING (1 << 1) /* device in process of starting */ -+#define ELAN4_STATE_STARTED (1 << 2) /* device started */ -+#define ELAN4_STATE_STOPPING (1 << 3) /* device in process of stopping */ -+ -+extern __inline__ unsigned long long -+__elan4_readq (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+#if defined(__i386) -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t save[2]; -+ uint64_t rval; -+ unsigned long flags, cr0; -+ -+ local_irq_save (flags); -+ -+ /* Save FPU state */ -+ asm volatile("mov %%cr0,%0 ; clts\n" : "=r" (cr0)); -+ -+ /* GNAT 7726: Save 128-bit xmm0 register value */ -+ asm volatile ("movupd %%xmm0,%0\n" : "=m" (save[0])); -+ -+ /* Do a 64-bit PCI read */ -+ asm volatile ("sfence\n" -+ "movq (%1), %%xmm0\n" -+ "movq %%xmm0, %0\n" -+ "sfence\n" -+ : "=m" (rval) : "r" (addr) : "memory"); -+ -+ /* GNAT 7726: Restore 128-bit xmm0 register value */ -+ asm volatile("movupd %0,%%xmm0\n" : : "m" (save[0])); -+ -+ /* Restore FPU state */ -+ asm volatile("mov %0,%%cr0\n" : : "r" (cr0)); -+ -+ local_irq_restore(flags); -+ -+ return rval; -+ } -+#endif -+ return readq ((void *)addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readl (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t val = __elan4_readq (dev, ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xffffffff); -+ } -+ return readl ((void *)addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readw (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t val = __elan4_readq (dev, ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xffff); -+ } -+ return readw ((void *)addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readb (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t val = __elan4_readq (dev, ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xff); -+ } -+ return readb ((void *)addr); -+} -+ -+/* macros for accessing dev->dev_regs.Tags. */ -+#define write_tag(dev,what,val) writeq (val, (void *) (dev->dev_regs + offsetof (E4_Registers, Tags.what))) -+#define read_tag(dev,what) __elan4_readq (dev, dev->dev_regs + offsetof (E4_Registers, Tags.what)) -+ -+/* macros for accessing dev->dev_regs.Regs. */ -+#define write_reg64(dev,what,val) writeq (val, (void *) (dev->dev_regs + offsetof (E4_Registers, Regs.what))) -+#define write_reg32(dev,what,val) writel (val, (void *) (dev->dev_regs + offsetof (E4_Registers, Regs.what))) -+#define read_reg64(dev,what) __elan4_readq (dev, dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+#define read_reg32(dev,what) __elan4_readl (dev, dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+ -+/* macros for accessing dev->dev_regs.uRegs. */ -+#define write_ureg64(dev,what,val) writeq (val, (void *) (dev->dev_regs + offsetof (E4_Registers, uRegs.what))) -+#define write_ureg32(dev,what,val) writel (val, (void *) (dev->dev_regs + offsetof (E4_Registers, uRegs.what))) -+#define read_ureg64(dev,what) __elan4_readq (dev, dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+#define read_ureg32(dev,what) __elan4_readl (dev, dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+ -+/* macros for accessing dev->dev_i2c */ -+#define write_i2c(dev,what,val) writeb (val, (void *) (dev->dev_i2c + offsetof (E4_I2C, what))) -+#define read_i2c(dev,what) __elan4_readb (dev, dev->dev_i2c + offsetof (E4_I2C, what)) -+ -+/* macros for accessing dev->dev_rom */ -+#define read_ebus_rom(dev,off) __elan4_readb (dev, dev->dev_rom + off) -+ -+/* PIO flush operations - ensure writes to registers/sdram are ordered */ -+#ifdef CONFIG_IA64_SGI_SN2 -+#define pioflush_reg(dev) read_reg32(dev,InterruptReg) -+#define pioflush_sdram(dev) elan4_sdram_readl(dev, 0) -+#else -+#define pioflush_reg(dev) mb() -+#define pioflush_sdram(dev) mb() -+#endif -+ -+/* macros for manipulating the interrupt mask register */ -+#define SET_INT_MASK(dev,value) \ -+do { \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask = (value)); \ -+ pioflush_reg(dev);\ -+} while (0) -+ -+#define CHANGE_INT_MASK(dev, value) \ -+do { \ -+ if ((dev)->dev_intmask != (value)) \ -+ {\ -+ write_reg32 (dev, InterruptMask, (dev)->dev_intmask = (value));\ -+ pioflush_reg(dev);\ -+ }\ -+} while (0) -+ -+#define ENABLE_INT_MASK(dev,value) \ -+do { \ -+ unsigned long flags; \ -+ \ -+ spin_lock_irqsave (&(dev)->dev_intmask_lock, flags); \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask |= (value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_intmask_lock, flags); \ -+} while (0) -+ -+#define DISABLE_INT_MASK(dev,value) \ -+do { \ -+ unsigned long flags; \ -+ \ -+ spin_lock_irqsave (&(dev)->dev_intmask_lock, flags); \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask &= ~(value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_intmask_lock, flags); \ -+} while (0) -+ -+#define SET_SYSCONTROL(dev,what,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ if ((dev)->what++ == 0) \ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol |= (value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define CLEAR_SYSCONTROL(dev,what,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ if (--(dev)->what == 0)\ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol &= ~(value)); \ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define PULSE_SYSCONTROL(dev,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol | (value)); \ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define CHANGE_SYSCONTROL(dev,add,sub) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ dev->dev_syscontrol |= (add);\ -+ dev->dev_syscontrol &= ~(sub);\ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol);\ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define SET_SCHED_STATUS(dev, value)\ -+do {\ -+ write_reg32 (dev, SchedStatus.Status, (dev)->dev_schedstatus = (value));\ -+ pioflush_reg (dev);\ -+} while (0) -+ -+#define CHANGE_SCHED_STATUS(dev, value)\ -+do {\ -+ if ((dev)->dev_schedstatus != (value))\ -+ {\ -+ write_reg32 (dev, SchedStatus.Status, (dev)->dev_schedstatus = (value));\ -+ pioflush_reg (dev);\ -+ }\ -+} while (0) -+ -+#define PULSE_SCHED_RESTART(dev,value)\ -+do {\ -+ write_reg32 (dev, SchedStatus.Restart, value);\ -+ pioflush_reg (dev);\ -+} while (0) -+ -+/* device context elan address space */ -+#define DEVICE_TPROC_SUSPEND_ADDR (0x1000000000000000ull) -+#define DEVICE_TPROC_SPACE_ADDR (0x1000000000000000ull + SDRAM_PAGE_SIZE) -+#if defined(__LITTLE_ENDIAN__) -+# define DEVICE_TPROC_SUSPEND_INSTR 0xd3f040c0 /* st64suspend %r16, [%r1] */ -+#else -+# define DEVICE_TPROC_SUSPEND_INSTR 0xc040f0d3 /* st64suspend %r16, [%r1] */ -+#endif -+ -+#define DEVICE_NETERR_INPUTQ_ADDR (0x2000000000000000ull) -+#define DEVICE_NETERR_INTCQ_ADDR (0x2000000000000000ull + SDRAM_PAGE_SIZE) -+#define DEVICE_NETERR_SLOTS_ADDR (0x2000000000000000ull + SDRAM_PAGE_SIZE*2) -+ -+/* -+ * Interrupt operation cookie space -+ * [50:48] type -+ * [47:0] value -+ */ -+#define INTOP_PERSISTENT (0x1000000000000ull) -+#define INTOP_ONESHOT (0x2000000000000ull) -+#define INTOP_TYPE_MASK (0x3000000000000ull) -+#define INTOP_VALUE_MASK (0x0ffffffffffffull) -+ -+/* functions for accessing sdram - sdram.c */ -+extern unsigned char elan4_sdram_readb (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned short elan4_sdram_readw (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned int elan4_sdram_readl (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned long long elan4_sdram_readq (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern void elan4_sdram_writeb (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned char val); -+extern void elan4_sdram_writew (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned short val); -+extern void elan4_sdram_writel (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned int val); -+extern void elan4_sdram_writeq (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned long long val); -+ -+extern void elan4_sdram_zerob_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zerow_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zerol_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zeroq_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+ -+extern void elan4_sdram_copyb_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyw_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyl_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyq_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyb_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyw_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyl_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyq_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+ -+/* device.c - configuration */ -+extern unsigned int elan4_hash_0_size_val; -+extern unsigned int elan4_hash_1_size_val; -+extern unsigned int elan4_ctxt_table_shift; -+extern unsigned int elan4_ln2_max_cqs; -+extern unsigned int elan4_dmaq_highpri_size; -+extern unsigned int elan4_threadq_highpri_size; -+extern unsigned int elan4_dmaq_lowpri_size; -+extern unsigned int elan4_threadq_lowpri_size; -+extern unsigned int elan4_interruptq_size; -+extern unsigned int elan4_mainint_punt_loops; -+extern unsigned int elan4_mainint_resched_ticks; -+extern unsigned int elan4_linkport_lock; -+extern unsigned int elan4_eccerr_recheck; -+ -+/* device.c */ -+extern void elan4_set_schedstatus (ELAN4_DEV *dev, E4_uint32 intreg); -+extern void elan4_queue_haltop (ELAN4_DEV *dev, ELAN4_HALTOP *op); -+extern void elan4_queue_intop (ELAN4_DEV *dev, ELAN4_CQ *cq, ELAN4_INTOP *op); -+extern void elan4_register_intop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+extern void elan4_deregister_intop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+extern void elan4_queue_dma_flushop (ELAN4_DEV *dev, ELAN4_DMA_FLUSHOP *op, int hipri); -+extern void elan4_queue_mainintop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+ -+extern int elan4_1msi0 (ELAN4_DEV *dev); -+ -+extern int elan4_insertctxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt, ELAN4_TRAP_OPS *ops); -+extern void elan4_removectxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt); -+extern ELAN4_CTXT *elan4_localctxt (ELAN4_DEV *dev, unsigned num); -+extern ELAN4_CTXT *elan4_networkctxt (ELAN4_DEV *dev, unsigned num); -+ -+extern int elan4_attach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum); -+extern void elan4_detach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum); -+extern void elan4_set_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum, E4_uint32 state); -+extern void elan4_set_routetable (ELAN4_CTXT *ctxt, ELAN4_ROUTE_TABLE *tbl); -+ -+extern ELAN4_CQA * elan4_getcqa (ELAN4_CTXT *ctxt, unsigned int idx); -+extern void elan4_putcqa (ELAN4_CTXT *ctxt, unsigned int idx); -+extern ELAN4_CQ *elan4_alloccq (ELAN4_CTXT *ctxt, unsigned cqsize, unsigned cqperm, unsigned cqtype); -+extern void elan4_freecq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq); -+extern void elan4_restartcq (ELAN4_DEV *dev, ELAN4_CQ *cq); -+extern void elan4_flushcq (ELAN4_DEV *dev, ELAN4_CQ *cq); -+extern void elan4_updatecq (ELAN4_DEV *dev, ELAN4_CQ *cq, unsigned perm, unsigned restart); -+ -+extern void elan4_flush_icache (ELAN4_CTXT *ctxt); -+extern void elan4_flush_icache_halted (ELAN4_CTXT *ctxt); -+ -+extern int elan4_initialise_device (ELAN4_DEV *dev); -+extern void elan4_finalise_device (ELAN4_DEV *dev); -+extern int elan4_start_device (ELAN4_DEV *dev); -+extern void elan4_stop_device (ELAN4_DEV *dev); -+ -+extern int elan4_compute_position (ELAN_POSITION *pos, unsigned nodeid, unsigned numnodes, unsigned aritiyval); -+extern int elan4_get_position (ELAN4_DEV *dev, ELAN_POSITION *pos); -+extern int elan4_set_position (ELAN4_DEV *dev, ELAN_POSITION *pos); -+extern void elan4_get_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short *mask); -+extern void elan4_set_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short mask); -+ -+ -+extern int elan4_read_vpd(ELAN4_DEV *dev, unsigned char *tag, unsigned char *result) ; -+ -+extern void proc_insertctxt(ELAN4_DEV *dev,ELAN4_CTXT *ctxt); -+extern void proc_removectxt(ELAN4_DEV *dev,ELAN4_CTXT *ctxt); -+ -+extern int elan4_route2str (E4_VirtualProcessEntry *route, char *routeStr); -+extern void elan4_hardware_lock_check(ELAN4_DEV *dev, char *from); -+ -+/* device_osdep.c */ -+extern unsigned int elan4_pll_cfg; -+extern int elan4_pll_div; -+extern int elan4_mod45disable; -+extern int assfail_mode; -+ -+extern int elan4_pciinit (ELAN4_DEV *dev); -+extern void elan4_pcifini (ELAN4_DEV *dev); -+extern void elan4_updatepll (ELAN4_DEV *dev, unsigned int val); -+extern void elan4_pcierror (ELAN4_DEV *dev); -+ -+extern ELAN4_DEV *elan4_reference_device (int instance, int state); -+extern void elan4_dereference_device (ELAN4_DEV *dev); -+ -+extern ioaddr_t elan4_map_device (ELAN4_DEV *dev, unsigned bar, unsigned off, unsigned size, ELAN4_MAP_HANDLE *handlep); -+extern void elan4_unmap_device (ELAN4_DEV *dev, ioaddr_t ptr, unsigned size, ELAN4_MAP_HANDLE *handlep); -+extern unsigned long elan4_resource_len (ELAN4_DEV *dev, unsigned bar); -+ -+extern void elan4_configure_writecombining (ELAN4_DEV *dev); -+extern void elan4_unconfigure_writecombining (ELAN4_DEV *dev); -+ -+/* i2c.c */ -+extern int i2c_disable_auto_led_update (ELAN4_DEV *dev); -+extern void i2c_enable_auto_led_update (ELAN4_DEV *dev); -+extern int i2c_write (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+extern int i2c_read (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+extern int i2c_writereg (ELAN4_DEV *dev, unsigned int addr, unsigned int reg, unsigned int count, unsigned char *data); -+extern int i2c_readreg (ELAN4_DEV *dev, unsigned int addr, unsigned int reg, unsigned int count, unsigned char *data); -+extern int i2c_read_rom (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+ -+#if defined(__linux__) -+/* procfs_Linux.c */ -+extern void elan4_procfs_device_init (ELAN4_DEV *dev); -+extern void elan4_procfs_device_fini (ELAN4_DEV *dev); -+extern void elan4_procfs_init(void); -+extern void elan4_procfs_fini(void); -+ -+extern struct proc_dir_entry *elan4_procfs_root; -+extern struct proc_dir_entry *elan4_config_root; -+#endif -+ -+/* sdram.c */ -+extern void elan4_sdram_init (ELAN4_DEV *dev); -+extern void elan4_sdram_fini (ELAN4_DEV *dev); -+extern void elan4_sdram_setup_delay_lines (ELAN4_DEV *dev, int factor); -+extern int elan4_sdram_init_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern void elan4_sdram_fini_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern void elan4_sdram_add_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern sdramaddr_t elan4_sdram_alloc (ELAN4_DEV *dev, int nbytes); -+extern void elan4_sdram_free (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_flushcache (ELAN4_DEV *dev, sdramaddr_t base, int nbytes); -+extern char *elan4_sdramerr2str (ELAN4_DEV *dev, E4_uint64 status, E4_uint64 ConfigReg, char *str); -+ -+/* traps.c */ -+extern void elan4_display_eproc_trap (void *type, int mode, char *str, ELAN4_EPROC_TRAP *trap); -+extern void elan4_display_cproc_trap (void *type, int mode, char *str, ELAN4_CPROC_TRAP *trap); -+extern void elan4_display_dproc_trap (void *type, int mode, char *str, ELAN4_DPROC_TRAP *trap); -+extern void elan4_display_tproc_trap (void *type, int mode, char *str, ELAN4_TPROC_TRAP *trap); -+extern void elan4_display_iproc_trap (void *type, int mode, char *str, ELAN4_IPROC_TRAP *trap); -+ -+ -+extern void elan4_extract_eproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_EPROC_TRAP *trap, int iswaitevent); -+extern void elan4_extract_cproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_CPROC_TRAP *trap, unsigned cqnum); -+extern void elan4_extract_dproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_DPROC_TRAP *trap, unsigned unit); -+extern void elan4_extract_tproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_TPROC_TRAP *trap); -+extern void elan4_extract_iproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_IPROC_TRAP *trap, unsigned unit); -+extern void elan4_ringbuf_store(ELAN4_ROUTE_RINGBUF *ringbuf, E4_VirtualProcessEntry *route, ELAN4_DEV *dev); -+extern int cproc_open_extract_vp (ELAN4_DEV *dev, ELAN4_CQ *cq, int chan); -+ -+extern void elan4_inspect_iproc_trap (ELAN4_IPROC_TRAP *trap); -+extern E4_uint64 elan4_trapped_open_command (ELAN4_DEV *dev, ELAN4_CQ *cq); -+ -+/* mmu.c */ -+extern void elan4mmu_flush_tlb (ELAN4_DEV *dev); -+extern ELAN4_HASH_ENTRY *elan4mmu_ptealloc (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, unsigned int *tagidxp); -+extern int elan4mmu_pteload (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, int type, E4_uint64 pte); -+extern int elan4mmu_pteload_page (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, struct page *page, int perm); -+extern void elan4mmu_pteunload (ELAN4_CTXT *ctxt, ELAN4_HASH_ENTRY *he, unsigned int tagidx, unsigned int pteidx); -+extern void elan4mmu_unload_range (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned long len); -+extern void elan4mmu_invalidate_ctxt (ELAN4_CTXT *ctxt); -+ -+extern ELAN4_HASH_CACHE *elan4mmu_reserve (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned int npages, int cansleep); -+extern void elan4mmu_release (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc); -+extern void elan4mmu_set_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx, E4_uint64 newpte); -+extern E4_uint64 elan4mmu_get_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx); -+extern void elan4mmu_clear_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx); -+ -+extern int elan4mmu_display_mmuhash(ELAN4_DEV *dev, int tlb, int *index, char *page, int count); -+extern int elan4mmu_display_ctxt_mmuhash(ELAN4_CTXT *ctxt, int tlb, int *index, char *page, int count); -+extern int elan4mmu_display_bucket_mmuhash(ELAN4_DEV *dev, int tlb, int *buckets, int nBuckets, char *page, int count); -+extern void elan4mmu_do_shuffle(ELAN4_CTXT *ctxt, int tbl); -+extern void elan4mmu_set_shuffle(ELAN4_CTXT *ctxt, int tbl, int hashidx); -+ -+/* mmu_osdep.c */ -+extern int elan4mmu_sdram_aliascheck (ELAN4_CTXT *ctxt, E4_Addr addr, sdramaddr_t phys); -+extern int elan4mmu_alloc_topaddr (ELAN4_DEV *dev, physaddr_t paddr, unsigned type); -+extern E4_uint64 elan4mmu_phys2pte (ELAN4_DEV *dev, physaddr_t phys, unsigned perm); -+extern physaddr_t elan4mmu_pte2phys (ELAN4_DEV *dev, E4_uint64 pte); -+ -+/* neterr.c */ -+extern int elan4_neterr_init (ELAN4_DEV *dev); -+extern void elan4_neterr_destroy (ELAN4_DEV *dev); -+extern int elan4_neterr_sendmsg (ELAN4_DEV *dev, unsigned int nodeid, unsigned int retries, ELAN4_NETERR_MSG *msg); -+extern int elan4_neterr_iproc_trap (ELAN4_DEV *dev, ELAN4_IPROC_TRAP *trap); -+ -+/* routetable.c */ -+extern ELAN4_ROUTE_TABLE *elan4_alloc_routetable (ELAN4_DEV *dev, unsigned size); -+extern void elan4_free_routetable (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl); -+extern void elan4_write_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry); -+extern void elan4_read_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry); -+extern void elan4_invalidate_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp); -+extern int elan4_generate_route (ELAN_POSITION *pos, E4_VirtualProcessEntry *route, unsigned ctxnum, -+ unsigned lowid, unsigned highid, unsigned options); -+extern int elan4_check_route (ELAN_POSITION *pos, ELAN_LOCATION location, E4_VirtualProcessEntry *route, unsigned flags); -+ -+/* user.c */ -+extern int __categorise_command (E4_uint64 command, int *cmdSize); -+extern int __whole_command (sdramaddr_t *commandPtr, sdramaddr_t insertPtr, unsigned int cqSize, unsigned int cmdSize); -+ -+/* debug.c */ -+extern int elan4_assfail (ELAN4_CTXT *ctxt, const char *ex, const char *func, const char *file, const int line); -+extern int elan4_debug_trigger (ELAN4_CTXT *ctxt, const char *func, const char *file, const int line, const char *fmt, ...); -+ -+#if defined(DEBUG_ASSERT) -+#define ELAN4_ASSERT(ctxt,EXPR) do { \ -+ if (!(EX) && elan4_assfail (ctxt, #EXPR, __FUNCTION__, __FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#else -+#define ELAN4_ASSERT(ctxt,EXPR) ((void) 0) -+#endif -+ -+#define ELAN4_DEBUG_TRIGGER(ctxt,fmt, args...) do {\ -+ if (elan4_debug_trigger (ctxt, __FUNCTION__, __FILE__, __LINE__, fmt, ##args)) \ -+ BUG();\ -+} while (0) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_ELANDEV_H */ -diff -urN clean/include/elan4/device_Linux.h linux-2.6.9/include/elan4/device_Linux.h ---- clean/include/elan4/device_Linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/device_Linux.h 2005-04-05 11:29:28.000000000 -0400 -@@ -0,0 +1,118 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_ELANDEV_LINUX_H -+#define __ELAN4_ELANDEV_LINUX_H -+ -+#ident "$Id: device_Linux.h,v 1.26 2005/04/05 15:29:28 robin Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device_Linux.h,v $*/ -+ -+#include -+ -+#if !defined(NO_COPROC) /* The older coproc kernel patch is applied */ -+#include -+ -+#define ioproc_ops coproc_ops_struct -+#define ioproc_register_ops register_coproc_ops -+#define ioproc_unregister_ops unregister_coproc_ops -+ -+#define IOPROC_MM_STRUCT_ARG 1 -+#define IOPROC_PATCH_APPLIED 1 -+ -+#elif !defined(NO_IOPROC) /* The new ioproc kernel patch is applied */ -+#include -+ -+#define IOPROC_PATCH_APPLIED 1 -+#endif -+ -+ -+#if defined(MPSAS) -+#include -+#endif -+ -+#if defined(CONFIG_DEVFS_FS) -+#include -+#endif -+ -+#define ELAN4_MAJOR 61 -+#define ELAN4_NAME "elan4" -+#define ELAN4_MAX_CONTROLLER 16 /* limited to 4 bits */ -+ -+/* OS dependant component of ELAN4_DEV struct */ -+typedef struct elan4_dev_osdep -+{ -+ struct pci_dev *pdev; /* PCI config data */ -+ -+ struct proc_dir_entry *procdir; -+ struct proc_dir_entry *configdir; -+ struct proc_dir_entry *statsdir; -+ struct proc_dir_entry *ctxtdir; -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_handle_t devfs_control; -+ devfs_handle_t devfs_sdram; -+ devfs_handle_t devfs_user; -+#endif -+ -+#if defined(CONFIG_MTRR) -+ int sdram_mtrr; -+ int regs_mtrr; -+#endif -+} ELAN4_DEV_OSDEP; -+ -+/* /dev/elan/rmsX */ -+ -+/* /dev/elan4/controlX */ -+typedef struct control_private -+{ -+ struct elan4_dev *pr_dev; -+ unsigned pr_boundary_scan; -+} CONTROL_PRIVATE; -+ -+/* /dev/elan4/sdramX */ -+typedef struct mem_page -+{ -+ struct mem_page *pg_next; -+ sdramaddr_t pg_addr; -+ unsigned long pg_pgoff; -+ unsigned pg_ref; -+} MEM_PAGE; -+ -+#define MEM_HASH_SIZE 32 -+#define MEM_HASH(pgoff) ((pgoff) & (MEM_HASH_SIZE-1)) -+ -+typedef struct mem_private -+{ -+ struct elan4_dev *pr_dev; -+ MEM_PAGE *pr_pages[MEM_HASH_SIZE]; -+ spinlock_t pr_lock; -+} MEM_PRIVATE; -+ -+/* /dev/elan4/userX */ -+typedef struct user_private -+{ -+ atomic_t pr_ref; -+ struct user_ctxt *pr_uctx; -+ struct mm_struct *pr_mm; -+ -+#if defined(IOPROC_PATCH_APPLIED) -+ struct ioproc_ops pr_ioproc; -+#endif -+} USER_PRIVATE; -+ -+/* No mapping handles on linux */ -+typedef void *ELAN4_MAP_HANDLE; -+ -+#define ELAN4_TASK_HANDLE() ((unsigned long) current->mm) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_ELANDEV_LINUX_H */ -diff -urN clean/include/elan4/dma.h linux-2.6.9/include/elan4/dma.h ---- clean/include/elan4/dma.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/dma.h 2003-09-04 08:39:17.000000000 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_DMA_H -+#define __ELAN4_DMA_H -+ -+#ident "$Id: dma.h,v 1.16 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/dma.h,v $*/ -+ -+#include -+ -+/* Alignment for a DMA descriptor */ -+#define E4_DMA_ALIGN (64) -+ -+/* Maximum size of a single DMA ((1 << 31)-1) */ -+#define E4_MAX_DMA_SIZE (0x7fffffff) -+ -+/* -+ * dma_typeSize -+ * -+ * [63:32] Size -+ * [31] unused -+ * [30] IsRemote -+ * [29] QueueWrite -+ * [28] ShmemWrite -+ * [27:26] DataType -+ * [25] Broadcast -+ * [24] AlignPackets -+ * [23:16] FailCount -+ * [15:14] unused -+ * [13:0] Context -+ */ -+ -+#define DMA_FailCount(val) (((val) & 0xff) << 16) -+#define DMA_AlignPackets (1 << 24) -+#define DMA_Broadcast (1 << 25) -+#define DMA_ShMemWrite (1 << 28) -+#define DMA_QueueWrite (1 << 29) -+#define DMA_IsRemote (1 << 30) -+#define DMA_Context(val) ((unsigned) (val) & 0x3ff) -+#define DMA_ContextMask 0x3fffull -+#define Dma_TypeSizeMask 0xfffffffffff00000ull -+ -+#define DMA_DataTypeByte (E4_DATATYPE_BYTE << 26) -+#define DMA_DataTypeShort (E4_DATATYPE_SHORT << 26) -+#define DMA_DataTypeWord (E4_DATATYPE_WORD << 26) -+#define DMA_DataTypeLong (E4_DATATYPE_DWORD << 26) -+ -+#define E4_DMA_TYPE_SIZE(size, dataType, flags, failCount) \ -+ ((((E4_uint64)(size)) << 32) | ((dataType) & DMA_DataTypeLong) | \ -+ (flags) | DMA_FailCount(failCount)) -+ -+typedef volatile struct e4_dma -+{ -+ E4_uint64 dma_typeSize; -+ E4_uint64 dma_cookie; -+ E4_uint64 dma_vproc; -+ E4_Addr dma_srcAddr; -+ E4_Addr dma_dstAddr; -+ E4_Addr dma_srcEvent; -+ E4_Addr dma_dstEvent; -+} E4_DMA; -+ -+/* Same as above but padded to 64-bytes */ -+typedef volatile struct e4_dma64 -+{ -+ E4_uint64 dma_typeSize; -+ E4_uint64 dma_cookie; -+ E4_uint64 dma_vproc; -+ E4_Addr dma_srcAddr; -+ E4_Addr dma_dstAddr; -+ E4_Addr dma_srcEvent; -+ E4_Addr dma_dstEvent; -+ E4_Addr dma_pad; -+} E4_DMA64; -+ -+#endif /* __ELAN4_DMA_H */ -diff -urN clean/include/elan4/events.h linux-2.6.9/include/elan4/events.h ---- clean/include/elan4/events.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/events.h 2004-06-23 07:07:18.000000000 -0400 -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_EVENTS_H -+#define __ELAN4_EVENTS_H -+ -+#ident "$Id: events.h,v 1.22 2004/06/23 11:07:18 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/events.h,v $*/ -+ -+#define E4_EVENT_ALIGN 32 -+#define E4_EVENTBLOCK_SIZE 64 -+ -+#ifndef _ASM -+/* -+ * Event locations must be aligned to a 32 byte boundary. It is very much more efficent to place -+ * them in elan local memory but is not essential. -+ */ -+typedef struct _E4_Event -+{ -+ volatile E4_uint64 ev_CountAndType; -+ E4_uint64 ev_Params[2]; -+} E4_Event; -+ -+/* Same as above but padded to correct Event alignment */ -+typedef struct _E4_Event32 -+{ -+ volatile E4_uint64 ev_CountAndType; -+ E4_uint64 ev_Params[2]; -+ E4_uint64 ev_pad; -+} E4_Event32; -+ -+/* -+ * An E4_EVENTBLOCK_SIZE aligned block of Main or Elan memory -+ */ -+typedef union _E4_Event_Blk -+{ -+ /* Padded to 64-bytes in case a cache-line write is more efficient */ -+ volatile E4_uint8 eb_unit8[E4_EVENTBLOCK_SIZE]; -+ volatile E4_uint32 eb_uint32[E4_EVENTBLOCK_SIZE/sizeof(E4_uint32)]; -+ volatile E4_uint64 eb_uint64[E4_EVENTBLOCK_SIZE/sizeof(E4_uint64)]; -+} E4_Event_Blk; -+#define eb_done eb_uint32[14] -+#define eb_done_dword eb_uint64[7] -+ -+#endif /* ! _ASM */ -+ -+/* -+ * ev_CountAndType -+ * [63:31] Count -+ * [10] CopyType -+ * [9:8] DataType -+ * [7:0] CopySize -+ */ -+#define E4_EVENT_TYPE_MASK 0x00000000ffffffffull -+#define E4_EVENT_COUNT_MASK 0xffffffff00000000ull -+#define E4_EVENT_COUNT_SHIFT 32 -+#define E4_EVENT_COPY_TYPE_MASK (1 << 10) -+#define E4_EVENT_DATA_TYPE_MASK (3 << 8) -+#define E4_EVENT_COPY_SIZE_MASK (0xff) -+ -+/* CopyType */ -+#define E4_EVENT_COPY (0 << 10) -+#define E4_EVENT_WRITE (1 << 10) -+ -+/* DataType */ -+#define E4_EVENT_DTYPE_BYTE (0 << 8) -+#define E4_EVENT_DTYPE_SHORT (1 << 8) -+#define E4_EVENT_DTYPE_WORD (2 << 8) -+#define E4_EVENT_DTYPE_LONG (3 << 8) -+ -+#define EVENT_COUNT(EventPtr) ((E4_int32)(elan4_load64 (&(EventPtr)->ev_CountAndType) >> E4_EVENT_COUNT_SHIFT)) -+#define EVENT_TYPE(EventPtr) ((E4_uint32)(elan4_load64 (&(EventPtr)->ev_CountAndType) & E4_EVENT_TYPE_MASK)) -+ -+#define E4_WAITEVENT_COUNT_TYPE_VALUE(Count, EventType, DataType, CopySize) \ -+ (((E4_uint64)(Count) << E4_EVENT_COUNT_SHIFT) | (EventType) | (DataType) | (CopySize)) -+ -+#define E4_EVENT_TYPE_VALUE(EventType, DataType, CopySize) \ -+ ((EventType) | (DataType) | (CopySize)) -+ -+#define E4_EVENT_INIT_VALUE(InitialCount, EventType, DataType, CopySize) \ -+ (((E4_uint64)(InitialCount) << E4_EVENT_COUNT_SHIFT) | E4_EVENT_TYPE_VALUE(EventType, DataType, CopySize)) -+ -+#define ev_CopySource ev_Params[0] -+#define ev_CopyDest ev_Params[1] -+#define ev_WritePtr ev_Params[0] -+#define ev_WriteValue ev_Params[1] -+ -+#define EVENT_BLK_READY(BLK) ((BLK)->eb_done != 0) -+#define EVENT_READY(EVENT) ((E4_uint32)((((volatile E4_Event *) (EVENT))->ev_CountAndType) >> E4_EVENT_COUNT_SHIFT) >= 0) -+ -+#define ELAN_WAIT_EVENT (0) -+#define ELAN_POLL_EVENT (-1) -+ -+#define E4_BLK_PATTERN ((E4_uint32)0xfeedface) -+ -+#define E4_INIT_COPY_EVENT(EVENT, BLK_ELAN, BLK, SIZE) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, SIZE), &(EVENT)->ev_CountAndType); \ -+ elan4_store64 ((BLK_ELAN), &(EVENT)->ev_CopySource); \ -+ elan4_store64 ((BLK), &(EVENT)->ev_CopyDest); \ -+ } while (0) -+ -+#define E4_INIT_WRITE_EVENT(EVENT, DWORD) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), &(EVENT)->ev_CountAndType); \ -+ elan4_store64 ((DWORD), &(EVENT)->ev_WritePtr); \ -+ elan4_store64 ((E4_Addr) (E4_BLK_PATTERN), &(EVENT)->ev_WriteValue); \ -+ } while (0) -+ -+#define E4_RESET_BLK_EVENT(BLK) \ -+ do { \ -+ (BLK)->eb_done = (0); \ -+ } while (0) -+ -+#define E4_PRIME_BLK_EVENT(EVENT, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#define E4_PRIME_COPY_EVENT(EVENT, SIZE, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, (SIZE >> 3)), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#define E4_PRIME_WRITE_EVENT(EVENT, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#ifndef _ASM -+ -+#define E4_INPUTQ_ALIGN 32 /* Descriptor must be 32-byte aligned */ -+ -+typedef struct _E4_InputQueue -+{ -+ volatile E4_Addr q_bptr; /* 64 bit aligned ptr to current back item */ -+ E4_Addr q_fptr; /* 64 bit aligned ptr to current front item */ -+ E4_uint64 q_control; /* this defines the last item, item size, and offset back to the first item. */ -+ E4_Addr q_event; /* queue event */ -+} E4_InputQueue; -+ -+#define E4_INPUTQ_LASTITEM_MASK 0x00000000ffffffffULL -+#define E4_INPUTQ_ITEMSIZE_MASK 0x000000ff00000000ULL -+#define E4_INPUTQ_LASTITEM_OFFSET_MASK 0xffffff0000000000ULL -+#define E4_INPUTQ_LASTITEM_SHIFT 0 -+#define E4_INPUTQ_ITEMSIZE_SHIFT 32 -+#define E4_INPUTQ_LASTITEM_OFFSET_SHIFT 40 -+ -+/* -+ * Macro to initialise the InputQueue control word given the FirstItem, LastItem & ItemSize -+ * FirstItem and LastItem are 64 bit double word aligned elan addresses. -+ */ -+#define E4_InputQueueControl(FirstItem, LastItem, ItemSizeInBytes)\ -+ (((((E4_uint64)(LastItem))) & E4_INPUTQ_LASTITEM_MASK) |\ -+ ((((E4_uint64)(ItemSizeInBytes)) << (E4_INPUTQ_ITEMSIZE_SHIFT-3)) & E4_INPUTQ_ITEMSIZE_MASK) |\ -+ ((((E4_uint64)((FirstItem)-(LastItem))) << (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3)) & E4_INPUTQ_LASTITEM_OFFSET_MASK)) -+ -+/* -+ * LastItemOffset is a sign extended -ve quantity with LastItemOffset[26:3] == q_control[63:40] -+ * we sign extend this by setting LastItemOffset[63:27] to be #one. -+ */ -+#define E4_InputQueueLastItemOffset(control) ((((E4_int64) -1) << (64 - (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3))) | \ -+ ((E4_int64) (((control) & E4_INPUTQ_LASTITEM_OFFSET_MASK) >> (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3)))) -+#define E4_InputQueueItemSize(control) (((control) & E4_INPUTQ_ITEMSIZE_MASK) >> (E4_INPUTQ_ITEMSIZE_SHIFT-3)) -+ -+/* -+ * Macro to increment the InputQ front pointer taking into account wrap -+ */ -+#define E4_InputQueueFptrIncrement(Q, FirstItem, LastItem, ItemSizeInBytes) \ -+ ((Q)->q_fptr = ( ((Q)->q_fptr == (LastItem)) ? (FirstItem) : ((Q)->q_fptr + (ItemSizeInBytes))) ) -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_EVENTS_H */ -diff -urN clean/include/elan4/i2c.h linux-2.6.9/include/elan4/i2c.h ---- clean/include/elan4/i2c.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/i2c.h 2003-12-02 11:11:22.000000000 -0500 -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_I2C_H -+#define _ELAN4_I2C_H -+ -+#ident "@(#)$Id: i2c.h,v 1.10 2003/12/02 16:11:22 lee Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/elan4hdr/i2c.h,v $*/ -+ -+/* I2C address space - bits[7:1] */ -+#define I2C_LED_I2C_ADDR 0x20 -+#define I2C_TEMP_ADDR 0x48 -+#define I2C_EEPROM_ADDR 0x50 -+ -+#define I2C_WRITE_ADDR(addr) ((addr) << 1 | 0) -+#define I2C_READ_ADDR(addr) ((addr) << 1 | 1) -+ -+/* I2C EEPROM appears as 8 I2C 256 byte devices */ -+#define I2C_24LC16B_BLOCKSIZE (256) -+#define I2C_24LC16B_BLOCKADDR(addr) ((addr) >> 8) -+#define I2C_24LC16B_BLOCKOFFSET(addr) ((addr) & 0xff) -+ -+#define I2C_ELAN_EEPROM_PCI_BASEADDR 0 /* PCI config starts at addr 0 in the EEPROM */ -+#define I2C_ELAN_EEPROM_VPD_BASEADDR 256 /* VPD data start */ -+#define I2C_ELAN_EEPROM_PCI_SIZE 256 /* PCI data max size */ -+#define I2C_ELAN_EEPROM_VPD_SIZE 256 /* VPD data max size */ -+ -+#define I2C_ELAN_EEPROM_SIZE 2048 -+ -+#define I2C_ELAN_EEPROM_DEVICE_ID 0xA0 -+#define I2C_ELAN_EEPROM_FAIL_LIMIT 8 -+ -+#define I2C_ELAN_EEPROM_ADDR_BLOCKSIZE_SHIFT 0x8 -+#define I2C_ELAN_EEPROM_ADDR_BLOCK_MASK 0x7 -+#define I2C_ELAN_EEPROM_ADDR_BLOCK_SHIFT 0x1 -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN4_I2C_H */ -diff -urN clean/include/elan4/intcookie.h linux-2.6.9/include/elan4/intcookie.h ---- clean/include/elan4/intcookie.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/intcookie.h 2004-08-09 10:02:37.000000000 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: intcookie.h,v 1.10 2004/08/09 14:02:37 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/intcookie.h,v $*/ -+ -+#ifndef __ELAN4_INTCOOKIE_H -+#define __ELAN4_INTCOOKIE_H -+ -+typedef E4_uint64 ELAN4_INTCOOKIE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct intcookie_entry -+{ -+ struct intcookie_entry *ent_next; -+ struct intcookie_entry *ent_prev; -+ -+ spinlock_t ent_lock; -+ unsigned ent_ref; -+ -+ ELAN4_INTCOOKIE ent_cookie; -+ ELAN4_INTCOOKIE ent_fired; -+ kcondvar_t ent_wait; -+} INTCOOKIE_ENTRY; -+ -+typedef struct intcookie_table -+{ -+ struct intcookie_table *tbl_next; -+ struct intcookie_table *tbl_prev; -+ -+ ELAN_CAPABILITY *tbl_cap; -+ -+ spinlock_t tbl_lock; -+ unsigned tbl_ref; -+ INTCOOKIE_ENTRY *tbl_entries; -+} INTCOOKIE_TABLE; -+ -+extern void intcookie_init(void); -+extern void intcookie_fini(void); -+extern INTCOOKIE_TABLE *intcookie_alloc_table (ELAN_CAPABILITY *cap); -+extern void intcookie_free_table (INTCOOKIE_TABLE *tbl); -+extern int intcookie_alloc (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_free (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_fire (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_fire_cap (ELAN_CAPABILITY *cap, ELAN4_INTCOOKIE cookie); -+extern int intcookie_wait (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_arm (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+ -+#endif /* __KERNEL */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_INTCOOKIE_H */ -diff -urN clean/include/elan4/ioctl.h linux-2.6.9/include/elan4/ioctl.h ---- clean/include/elan4/ioctl.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/ioctl.h 2005-01-10 12:45:50.000000000 -0500 -@@ -0,0 +1,320 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_IOCTL_H -+#define __ELAN4_IOCTL_H -+ -+#ident "@(#)$Id: ioctl.h,v 1.29 2005/01/10 17:45:50 duncant Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/ioctl.h,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#define ELAN4IO_CONTROL_PATHNAME "/dev/elan4/control%d" -+#define ELAN4IO_USER_PATHNAME "/dev/elan4/user%d" -+#define ELAN4IO_SDRAM_PATHNAME "/dev/elan4/sdram%d" -+#define ELAN4IO_MAX_PATHNAMELEN 32 -+ -+/* -+ * NOTE - ioctl values 0->0x1f are defined for -+ * generic/control usage. -+ */ -+ -+/* Macro to generate 'offset' to mmap "control" device */ -+#define OFF_TO_BAR(off) (((off) >> 28) & 0xF) -+#define OFF_TO_OFFSET(off) ((off) & 0x0FFFFFFF) -+#define GEN_OFF(bar,off) (((bar) << 28) | ((off) & 0x0FFFFFFF)) -+ -+/* Definiations for generic ioctls */ -+#define ELAN4IO_GENERIC_BASE 0x00 -+ -+typedef struct elan4io_stats_struct -+{ -+ int which; -+ unsigned long long ptr; /* always pass pointer as 64 bit */ -+} ELAN4IO_STATS_STRUCT; -+ -+#define ELAN4IO_STATS _IOR ('e', ELAN4IO_GENERIC_BASE + 0, ELAN4IO_STATS_STRUCT) -+#define ELAN4IO_DEVINFO _IOR ('e', ELAN4IO_GENERIC_BASE + 1, ELAN_DEVINFO) -+#define ELAN4IO_POSITION _IOR ('e', ELAN4IO_GENERIC_BASE + 2, ELAN_POSITION) -+ -+ -+/* -+ * Definitions for /dev/elan4/controlX -+ */ -+#define ELAN4IO_CONTROL_BASE 0x20 -+ -+#define ELAN4IO_GET_POSITION _IOR ('e', ELAN4IO_CONTROL_BASE + 0, ELAN_POSITION) -+#define ELAN4IO_SET_POSITION _IOW ('e', ELAN4IO_CONTROL_BASE + 1, ELAN_POSITION) -+#define ELAN4IO_DEBUG_SNAPSHOT _IOW ('e', ELAN4IO_CONTROL_BASE + 2, ) -+ -+typedef struct elan4io_params_mask_struct -+{ -+ unsigned short p_mask; -+ ELAN_PARAMS p_params; -+} ELAN4IO_PARAMS_STRUCT; -+#define ELAN4IO_GET_PARAMS _IOR ('e', ELAN4IO_CONTROL_BASE + 3, ELAN4IO_PARAMS_STRUCT) -+#define ELAN4IO_SET_PARAMS _IOW ('e', ELAN4IO_CONTROL_BASE + 4, ELAN4IO_PARAMS_STRUCT) -+ -+/* old versions - implicit p_mask == 3 */ -+#define ELAN4IO_OLD_GET_PARAMS _IOR ('e', ELAN4IO_CONTROL_BASE + 3, ELAN_PARAMS) -+#define ELAN4IO_OLD_SET_PARAMS _IOW ('e', ELAN4IO_CONTROL_BASE + 4, ELAN_PARAMS) -+ -+/* -+ * Definitions for /dev/elan4/userX -+ */ -+#define ELAN4IO_USER_BASE 0x40 -+ -+#define ELAN4IO_FREE _IO ('e', ELAN4IO_USER_BASE + 0) -+#define ELAN4IO_ATTACH _IOWR ('e', ELAN4IO_USER_BASE + 1, ELAN_CAPABILITY) -+#define ELAN4IO_DETACH _IOWR ('e', ELAN4IO_USER_BASE + 2, ELAN_CAPABILITY) -+#define ELAN4IO_BLOCK_INPUTTER _IO ('e', ELAN4IO_USER_BASE + 3) -+ -+typedef struct elan4io_add_p2pvp_struct -+{ -+ unsigned vp_process; -+ ELAN_CAPABILITY vp_capability; -+} ELAN4IO_ADD_P2PVP_STRUCT; -+ -+#define ELAN4IO_ADD_P2PVP _IOW ('e', ELAN4IO_USER_BASE + 4, ELAN4IO_ADD_P2PVP_STRUCT) -+ -+typedef struct elan4io_add_bcastvp_struct -+{ -+ unsigned int vp_process; -+ unsigned int vp_lowvp; -+ unsigned int vp_highvp; -+} ELAN4IO_ADD_BCASTVP_STRUCT; -+ -+#define ELAN4IO_ADD_BCASTVP _IOW ('e', ELAN4IO_USER_BASE + 5, ELAN4IO_ADD_BCASTVP_STRUCT) -+ -+#define ELAN4IO_REMOVEVP _IO ('e', ELAN4IO_USER_BASE + 6) -+ -+typedef struct elan4io_route_struct -+{ -+ unsigned int rt_process; -+ unsigned int rt_error; -+ E4_VirtualProcessEntry rt_route; -+} ELAN4IO_ROUTE_STRUCT; -+ -+#define ELAN4IO_SET_ROUTE _IOW ('e', ELAN4IO_USER_BASE + 7, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_RESET_ROUTE _IOW ('e', ELAN4IO_USER_BASE + 9, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_GET_ROUTE _IOWR ('e', ELAN4IO_USER_BASE + 8, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_CHECK_ROUTE _IOWR ('e', ELAN4IO_USER_BASE + 10, ELAN4IO_ROUTE_STRUCT) -+ -+typedef struct elan4io_alloc_cq_struct -+{ -+ unsigned int cq_size; /* input: size of queue */ -+ unsigned int cq_perm; /* input: requested permissions */ -+ unsigned int cq_type; /* input: queue type */ -+ unsigned int cq_indx; /* output: queue number */ -+} ELAN4IO_ALLOCCQ_STRUCT; -+ -+#define ELAN4IO_ALLOCCQ _IOWR ('e', ELAN4IO_USER_BASE + 11, ELAN4IO_ALLOCCQ_STRUCT) -+#define ELAN4IO_FREECQ _IOWR ('e', ELAN4IO_USER_BASE + 12, unsigned) -+ -+#define ELAN4IO_CQ_TYPE_REORDER 1 /* revb reordering command queue */ -+ -+typedef struct elan4io_perm_struct -+{ -+ E4_Addr ps_eaddr; -+ E4_uint64 ps_len; -+ unsigned long ps_maddr; -+ unsigned int ps_perm; -+} ELAN4IO_PERM_STRUCT; -+ -+typedef struct elan4io_perm_struct32 -+{ -+ E4_Addr ps_eaddr; -+ E4_uint64 ps_len; -+ unsigned int ps_maddr; -+ unsigned int ps_perm; -+} ELAN4IO_PERM_STRUCT32; -+ -+#define ELAN4IO_SETPERM _IOWR ('e', ELAN4IO_USER_BASE + 13, ELAN4IO_PERM_STRUCT) -+#define ELAN4IO_SETPERM32 _IOWR ('e', ELAN4IO_USER_BASE + 13, ELAN4IO_PERM_STRUCT32) -+#define ELAN4IO_CLRPERM _IOWR ('e', ELAN4IO_USER_BASE + 14, ELAN4IO_PERM_STRUCT) -+#define ELAN4IO_CLRPERM32 _IOWR ('e', ELAN4IO_USER_BASE + 14, ELAN4IO_PERM_STRUCT32) -+ -+typedef struct elan4io_trapsig_struct -+{ -+ int ts_signo; -+} ELAN4IO_TRAPSIG_STRUCT; -+#define ELAN4IO_TRAPSIG _IOW ('e', ELAN4IO_USER_BASE + 15, ELAN4IO_TRAPSIG_STRUCT) -+ -+typedef struct elan4io_traphandler_struct -+{ -+ unsigned int th_nticks; /* number of ticks to sleep for next trap */ -+ unsigned int th_proc; /* elan processor involved */ -+ unsigned long th_trapp; /* space to store trap */ -+} ELAN4IO_TRAPHANDLER_STRUCT; -+ -+typedef struct elan4io_traphandler_struct32 -+{ -+ unsigned int th_nticks; /* number of ticks to sleep for next trap */ -+ unsigned int th_proc; /* elan processor involved */ -+ unsigned int th_trapp; /* space to store trap */ -+} ELAN4IO_TRAPHANDLER_STRUCT32; -+ -+#define ELAN4IO_TRAPHANDLER _IOW ('e', ELAN4IO_USER_BASE + 16, ELAN4IO_TRAPHANDLER_STRUCT) -+#define ELAN4IO_TRAPHANDLER32 _IOW ('e', ELAN4IO_USER_BASE + 16, ELAN4IO_TRAPHANDLER_STRUCT32) -+ -+typedef struct elan4io_required_mappings_struct -+{ -+ E4_Addr rm_upage_addr; /* elan address of user page */ -+ E4_Addr rm_trestart_addr; /* elan address of tproc restart trampoline */ -+} ELAN4IO_REQUIRED_MAPPINGS_STRUCT; -+#define ELAN4IO_REQUIRED_MAPPINGS _IOW ('e', ELAN4IO_USER_BASE + 17, ELAN4IO_REQUIRED_MAPPINGS_STRUCT) -+ -+typedef struct elan4io_resume_eproc_trap_struct -+{ -+ E4_Addr rs_addr; -+} ELAN4IO_RESUME_EPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_EPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 18, ELAN4IO_RESUME_EPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_cproc_trap_struct -+{ -+ unsigned int rs_indx; -+} ELAN4IO_RESUME_CPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_CPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 19, ELAN4IO_RESUME_CPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_dproc_trap_struct -+{ -+ E4_DMA rs_desc; -+} ELAN4IO_RESUME_DPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_DPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 20, ELAN4IO_RESUME_DPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_tproc_trap_struct -+{ -+ E4_ThreadRegs rs_regs; -+} ELAN4IO_RESUME_TPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_TPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 21, ELAN4IO_RESUME_TPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_iproc_trap_struct -+{ -+ unsigned int rs_channel; -+ unsigned int rs_trans; -+ E4_IprocTrapHeader rs_header; -+ E4_IprocTrapData rs_data; -+} ELAN4IO_RESUME_IPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_IPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 22, ELAN4IO_RESUME_IPROC_TRAP_STRUCT) -+ -+#define ELAN4IO_FLUSH_ICACHE _IO ('e', ELAN4IO_USER_BASE + 23) -+#define ELAN4IO_STOP_CTXT _IO ('e', ELAN4IO_USER_BASE + 24) -+ -+#define ELAN4IO_ALLOC_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 25, ELAN4_INTCOOKIE) -+#define ELAN4IO_FREE_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 26, ELAN4_INTCOOKIE) -+#define ELAN4IO_ARM_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 27, ELAN4_INTCOOKIE) -+#define ELAN4IO_WAIT_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 28, ELAN4_INTCOOKIE) -+ -+typedef struct elan4io_alloc_trap_queues_struct -+{ -+ unsigned int tq_ndproc_traps; -+ unsigned int tq_neproc_traps; -+ unsigned int tq_ntproc_traps; -+ unsigned int tq_nthreads; -+ unsigned int tq_ndmas; -+} ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT; -+#define ELAN4IO_ALLOC_TRAP_QUEUES _IOW ('e', ELAN4IO_USER_BASE + 29, ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT) -+ -+typedef struct elan4io_neterr_msg_struct -+{ -+ unsigned int nm_vp; -+ unsigned int nm_nctx; -+ unsigned int nm_retries; -+ unsigned int nm_pad; -+ ELAN4_NETERR_MSG nm_msg; -+} ELAN4IO_NETERR_MSG_STRUCT; -+#define ELAN4IO_NETERR_MSG _IOW ('e', ELAN4IO_USER_BASE + 30, ELAN4IO_NETERR_MSG_STRUCT) -+ -+typedef struct elan4io_neterr_timer_struct -+{ -+ unsigned int nt_usecs; -+} ELAN4IO_NETERR_TIMER_STUCT; -+ -+#define ELAN4IO_NETERR_TIMER _IO ('e', ELAN4IO_USER_BASE + 31) -+ -+typedef struct elan4io_neterr_fixup_struct -+{ -+ E4_uint64 nf_cookie; -+ unsigned int nf_waitforeop; -+ unsigned int nf_sten; -+ unsigned int nf_vp; -+ unsigned int nf_pad; -+} ELAN4IO_NETERR_FIXUP_STRUCT; -+ -+#define ELAN4IO_NETERR_FIXUP _IOW ('e', ELAN4IO_USER_BASE + 32, ELAN4IO_NETERR_FIXUP_STRUCT) -+ -+typedef struct elan4io_firecap_struct -+{ -+ ELAN_CAPABILITY fc_capability; -+ ELAN4_INTCOOKIE fc_cookie; -+} ELAN4IO_FIRECAP_STRUCT; -+ -+#define ELAN4IO_FIRE_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 33, ELAN4IO_FIRECAP_STRUCT) -+ -+#define ELAN4IO_ALLOC_INTCOOKIE_TABLE _IOW ('e', ELAN4IO_USER_BASE + 34, ELAN_CAPABILITY) -+#define ELAN4IO_FREE_INTCOOKIE_TABLE _IO ('e', ELAN4IO_USER_BASE + 35) -+ -+typedef struct elan4io_translation -+{ -+ E4_Addr tr_addr; -+ unsigned long tr_len; -+ unsigned int tr_access; -+} ELAN4IO_TRANSLATION_STRUCT; -+ -+#define ELAN4IO_LOAD_TRANSLATION _IOW ('e', ELAN4IO_USER_BASE + 36, ELAN4IO_TRANSLATION_STRUCT) -+#define ELAN4IO_UNLOAD_TRANSLATION _IOW ('e', ELAN4IO_USER_BASE + 37, ELAN4IO_TRANSLATION_STRUCT) -+ -+typedef struct elan4io_dumpcq_struct32 -+{ -+ E4_uint64 cq_space; /* output: sdram addr of q, used to decode ptrs */ -+ E4_uint32 cq_size; /* output: The real size of the command queue */ -+ E4_uint32 bufsize; /* input: The size of the buffer to dump to */ -+ E4_uint32 cq_indx; /* input: index of cq to dump */ -+ unsigned int buffer; /* input: user address of rgs->buffer to dump to */ -+} ELAN4IO_DUMPCQ_STRUCT32; -+ -+typedef struct elan4io_dumpcq_struct -+{ -+ E4_uint64 cq_space; /* output: sdram addr of q, used to decode ptrs */ -+ E4_uint32 cq_size; /* output: The real size of the command queue */ -+ E4_uint32 bufsize; /* input: The size of the buffer to dump to */ -+ E4_uint32 cq_indx; /* input: index of cq to dump */ -+ unsigned long buffer; /* input: user address of rgs->buffer to dump to */ -+} ELAN4IO_DUMPCQ_STRUCT; -+ -+#define ELAN4IO_DUMPCQ _IOWR ('e', ELAN4IO_USER_BASE + 38, ELAN4IO_DUMPCQ_STRUCT) -+#define ELAN4IO_DUMPCQ32 _IOWR ('e', ELAN4IO_USER_BASE + 38, ELAN4IO_DUMPCQ_STRUCT32) -+ -+/* mmap offsets - - we define the file offset space as follows: -+ * -+ * page 0 - 4095 - command queues -+ * page 4096 - device user registers -+ * page 4097 - flag page/user stats -+ * page 4098 - device stats -+ * page 4099 - tproc trampoline -+ */ -+ -+#define ELAN4_OFF_COMMAND_QUEUES 0 -+#define ELAN4_OFF_USER_REGS 4096 -+#define ELAN4_OFF_USER_PAGE 4097 -+#define ELAN4_OFF_DEVICE_STATS 4098 -+#define ELAN4_OFF_TPROC_TRAMPOLINE 4099 -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_IOCTL_H */ -diff -urN clean/include/elan4/mmu.h linux-2.6.9/include/elan4/mmu.h ---- clean/include/elan4/mmu.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/mmu.h 2005-04-21 07:12:06.000000000 -0400 -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu.h,v 1.14 2005/04/21 11:12:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu.h,v $*/ -+ -+ -+#ifndef __ELAN4_MMU_H -+#define __ELAN4_MMU_H -+ -+#include -+ -+typedef union elan4_pte_page -+{ -+ struct { -+ struct page *page; -+ physaddr_t dma_addr; -+ } _page; -+#define pg_page _page.page -+#define pg_dma_addr _page.dma_addr -+ -+} ELAN4_PTE_PAGE; -+ -+typedef struct elan4_hash_entry -+{ -+ struct elan4_hash_entry *he_next; -+ struct elan4_hash_entry *he_prev; -+ -+ sdramaddr_t he_entry; -+ -+ struct elan4_hash_entry *he_chain[2]; -+ E4_uint64 he_tag[2]; -+ E4_uint32 he_pte[2]; -+ -+ ELAN4_PTE_PAGE he_pg[2][4]; -+} ELAN4_HASH_ENTRY; -+ -+#define ELAN4_HENT_CHUNKS 16 /* SDRAM_MIN_BLOCK_SIZE/sizeof (E4_HashTableEntry) */ -+ -+typedef struct elan4_hash_chunk -+{ -+ struct list_head hc_link; -+ ELAN4_HASH_ENTRY hc_hents[ELAN4_HENT_CHUNKS]; -+} ELAN4_HASH_CHUNK; -+ -+typedef struct elan4_hash_cache -+{ -+ E4_Addr hc_start; -+ E4_Addr hc_end; -+ int hc_tbl; -+ -+ ELAN4_HASH_ENTRY *hc_hes[1]; -+} ELAN4_HASH_CACHE; -+ -+/* -+ * he_pte is really 4 bytes of pte "type" one for each pte -+ * entry - however we declare it as an "int" so we can -+ * easily determine that all 4 entries are invalid -+ */ -+#define HE_SET_PTE(he,tagidx,pteidx,val) (((E4_uint8 *) &(he->he_pte[tagidx]))[pteidx] = (val)) -+#define HE_GET_PTE(he,tagidx,pteidx) (((E4_uint8 *) &(he->he_pte[tagidx]))[pteidx]) -+ -+#define HE_TYPE_INVALID 0 -+#define HE_TYPE_SDRAM 1 -+#define HE_TYPE_COMMAND 2 -+#define HE_TYPE_REGS 3 -+#define HE_TYPE_PAGE 4 -+#define HE_TYPE_OTHER 5 -+#define HE_TYPE_RESERVED 6 -+ -+/* -+ * he_tag has the following form : -+ * [63:27] tag -+ * [20:17] pte valid -+ * [16] locked -+ * [15] copy -+ * [14] valid -+ * [13:0] context -+ */ -+ -+#define HE_TAG_VALID (1 << 14) -+#define HE_TAG_COPY (1 << 15) -+#define HE_TAG_LOCKED (1 << 16) -+ -+#define INVALID_CONTEXT 0 -+ -+extern u_char elan4_permtable[]; -+#define ELAN4_INCOMPAT_ACCESS(perm,access) ((elan4_permtable[(perm)] & (1 << (access))) == 0) -+extern u_char elan4_permreadonly[]; -+#define ELAN4_PERM_READONLY(perm) (elan4_permreadonly[(perm)]) -+ -+extern int elan4_debug_mmu; -+ -+extern int elan4_mmuhash_chain_reduction; -+extern int elan4_mmuhash_chain_end_reduce; -+extern int elan4_mmuhash_chain_middle_reduce; -+extern int elan4_mmuhash_chain_middle_fail; -+extern int elan4_mmuhash_shuffle_attempts; -+extern int elan4_mmuhash_shuffle_done; -+ -+#ifdef DEBUG_PRINTF -+# define MPRINTF(ctxt,lvl,args...) (elan4_debug_mmu > (lvl) ? elan4_debugf(ctxt,DBG_MMU, ##args) : (void)0) -+#else -+# define MPRINTF(ctxt,lvl,args...) ((void) 0) -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_MMU_H */ -diff -urN clean/include/elan4/neterr.h linux-2.6.9/include/elan4/neterr.h ---- clean/include/elan4/neterr.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/neterr.h 2004-01-19 09:38:34.000000000 -0500 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_NETERR_H -+#define __ELAN4_NETERR_H -+ -+#ident "@(#)$Id: neterr.h,v 1.1 2004/01/19 14:38:34 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/elan4mod/neterr.h,v $*/ -+ -+typedef struct elan4_neterr_msg -+{ -+ E4_uint8 msg_type; -+ E4_uint8 msg_waitforeop; -+ E4_uint16 msg_context; /* network context # message sent to */ -+ E4_int16 msg_found; /* # cookie found (response) */ -+ -+ ELAN_LOCATION msg_sender; /* nodeid/context # message sent from */ -+ E4_uint32 msg_pad; -+ -+ E4_uint64 msg_cookies[6]; /* 64 bit cookies from identify packets */ -+} ELAN4_NETERR_MSG; -+ -+#define ELAN4_NETERR_MSG_SIZE sizeof (ELAN4_NETERR_MSG) -+#define ELAN4_NETERR_MSG_REQUEST 1 -+#define ELAN4_NETERR_MSG_RESPONSE 2 -+ -+#define ELAN4_NETERR_MAX_COOKIES (sizeof (((ELAN4_NETERR_MSG *) 0)->msg_cookies) / \ -+ sizeof (((ELAN4_NETERR_MSG *) 0)->msg_cookies[0])) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_NETERR_H */ -diff -urN clean/include/elan4/pci.h linux-2.6.9/include/elan4/pci.h ---- clean/include/elan4/pci.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/pci.h 2003-09-04 08:39:17.000000000 -0400 -@@ -0,0 +1,227 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_PCI_H -+#define __ELAN4_PCI_H -+ -+#ident "$Id: pci.h,v 1.32 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/pci.h,v $*/ -+ -+/* Elan has 2 64 bit bars */ -+#define ELAN4_BAR_SDRAM 0 -+#define ELAN4_BAR_REGISTERS 2 -+ -+#define PCI_VENDOR_ID_QUADRICS 0x14fc -+#define PCI_DEVICE_ID_ELAN3 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVB 0x0001 -+#define PCI_DEVICE_ID_ELAN4 0x0001 -+#define PCI_REVISION_ID_ELAN4_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN4_REVB 0x0001 -+ -+/* support standard pseudo bars */ -+#define ELAN4_PSEUDO_BAR_ROM 8 -+ -+/* Elan PCI control -+ configuration space register. ElanControlRegister */ -+#define PCI_ELAN_PARITY_ADDR_LO 0x40 -+#define PCI_ELAN_PARITY_ADDR_HI 0x44 -+#define PCI_ELAN_PARITY_TYPE 0x48 -+#define PCI_ELAN_CONTROL 0x4c -+#define PCI_ELAN_PLL_CONTROL 0x50 -+#define PCI_ELAN_SPLIT_MESSAGE_ATTR 0x54 -+#define PCI_ELAN_SPLIT_MESSAGE_VALUE 0x54 -+#define PCI_ELAN_RAMBIST_FAILED 0x54 -+#define PCI_ELAN_TOPPHYSADDR(i) (0x58 + ((i)<<1)) -+ -+/* -+ * [31] PciM66EN This is set it the bus is running in PCI2.3 - 66MHz mode. -+ * [30:28] InitPattern This gives the PCI-X startup mode. See "Pci intialisation patterns" below. -+ * [27] notBusIs64Bits If set the bus is running 32 bits wide. If Clear it is a 64 bit bus. -+ * [26:24] RamBistCntl Used to control the Elan4 RAM BIST. Not acitive it zero. -+ * [23] RamBistFinished Only used when performing the RAM BIST test. -+ * [22] SelectSplitMessAttr See ECTRL_SELECT_SPLIT_MESS_ATTR below. -+ * [21] ReceivedSplitCompError See ECTRL_REC_SPLIT_COMP_MESSAGE below -+ * [20:16] WriteHighPriTime Used with ReadHighPriTime to control the ratio of PCI master write to PCI master -+ * read bandwidth under heavy load. The high the value of WriteHighPriTime the longer -+ * the PCI write bursts will be allowed without interruption from a read transfer. -+ * [15] DisableCouplingTest This is only used as part of the RAM BIST test. It effects the testing of the main -+ * cache tag RAMS. -+ * [14:13] Not used Will read as zero. -+ * [12:8] ReadHighPriTime Used with WriteHighPriTime to control the ratio of PCI master write to PCI master -+ * read bandwidth under heavy load. The high the value of ReadHighPriTime the longer -+ * the PCI read bursts will be allowed without interruption from a write transfer. -+ * [7] EnableLatencyCountReset This bit effect the behaviour of disconnects due to the removal of GNT# after the latency -+ * counter has expired. If set it will allow the latency counter to be reset each time the -+ * GNT# is reasserted. If asserted it should provided improved bandwidth on the PCI bus -+ * without increasing the maximum latency another device would have for access to the bus. -+ * It will increase the average latency of other devices. -+ * [6] ExtraMasterAddrBits This bit used to control the physical PCI addresses generated by the MMU. -+ * [5] ReducedPciDecode If set the PCI local memory BAR will decode 256Mbytes of PCI address space. If clear it -+ * will decode 2Gbyte of PCI address space. -+ * [4] ConfigInEBusRom If set the constant values of the Elan4 PCI configuration space will be taken from the -+ * EEPROM. If clear the internal values will be used. -+ * [3] EnableRd2_2Bursts This bit only effects the behaviour of burst reads when the PCI bus is operating in -+ * PCI-2.2 mode. It allows adjacent reads to be merged into longer bursts for higher -+ * performance. -+ * [2] SoftIntReset If set this bit will cause the Elan4 to reset itself with the exception of the PCI -+ * configuation space. All internal state machines will be put into the reset state. -+ * [1] EnableWrBursts This bit allows much longer PCI-X write bursts. If set it will stop the Elan4 from -+ * being completely PCI-X compliant as the Elan4 may request a long PCI-X write burst that -+ * it does not complete. However it should significantly increase the maximum PCI-X write -+ * bandwidth and is unlikely to cause problems with many PCI-X bridge chips. -+ * [0] InvertMSIPriority This bit effect the way MSI interrupts are generated. It provides flexiblity to generate -+ * the MSI interrupts in a different way to allow for different implimentations of MSI -+ * logic and still give the correct priority of Elan4 interrupts. -+ * -+ * {PciM66EN, InitPattern, notBusIs64Bits, RamBistCntl, RamBistFinished, -+ * SelectSplitMessAttr, ReceivedSplitCompError, WriteHighPriTime, -+ * DisableCouplingTest, 2'h0, ReadHighPriTime, -+ * EnableLatencyCountReset, ExtraMasterAddrBits, ReducedPciDecode, ConfigInEBusRom, -+ * EnableRd2_2Bursts, SoftIntReset, EnableWrBursts, InvertMSIPriority} -+ */ -+ -+#define ECTRL_INVERT_MSI_PRIO (1 << 0) -+#define ECTRL_ENABLE_WRITEBURSTS (1 << 1) -+#define ECTRL_SOFTWARE_INTERNAL_RESET (1 << 2) -+#define ECTRL_ENABLE_2_2READBURSTS (1 << 3) -+#define ECTRL_CONFIG_IN_EBUS_ROM (1 << 4) -+#define ECTRL_28_NOT_30_BIT_LOCAL_BAR (1 << 5) -+#define ECTRL_ExtraMasterAddrBits (1 << 6) -+#define ECTRL_ENABLE_LATENCY_RESET (1 << 7) -+#define ECTRL_DISABLE_COUPLING_TEST (1 << 15) -+ -+/* -+ * Ratio of the following two registers set the relative bandwidth given to intputer data -+ * versus other PCI pci traffic when scheduling new PCI master accesses. -+ */ -+#define ECTRL_OTHER_HIGH_PRI_TIME_SHIFT (8) /* Sets top 4 bits of 8 bit counter */ -+#define ECTRL_OTHER_HIGH_PRI_TIME_MASK (0x1f) -+ -+ -+#define ECTRL_IPROC_HIGH_PRI_TIME_SHIFT (16) /* Sets top 4 bits of 8 bit counter */ -+#define ECTRL_IPROC_HIGH_PRI_TIME_MASK (0x1f) -+ -+/* -+ * This is set if a split completion message is received. -+ * This will cause a PCI error interrupt. -+ * This error is cleared by writting a 1 to this bit. -+ */ -+#define ECTRL_REC_SPLIT_COMP_MESSAGE (1 << 21) -+/* -+ * This bit is used to select reading of either the Split message attribute value when -+ * set or the split completion message data value from 0x54 in the config space -+ * if the ECTRL_REC_SPLIT_COMP_MESSAGE bit is set. 0x54 returns the the BistFailed flags -+ * if any of the BIST control bits are set (bits 26 to 24) -+ */ -+#define ECTRL_SELECT_SPLIT_MESS_ATTR (1 << 22) -+ -+// Internal RAM bist control bits. -+// Three bits of state control the RAM BIST (Built in self test). -+// -+// These bits must not be set unless the ECTRL_SOFTWARE_INTERNAL_RESET bit has also been set! -+// -+// For a normal fast ram test assert ECTRL_BIST_FAST_TEST. -+// For a data retention test first write ECTRL_START_RETENTION_TEST then wait the retention period of -+// at least 1ms and preferably much longer then write ECTRL_CONTINUE_RETENTION_TEST then wait -+// again and finallly write ECTRL_FINISH_RETENTION_TEST. -+// -+// The read only bit ECTRL_BIST_FINISHED_TEST can be polled to check that the test has compleated. -+#define ECTRL_BIST_CTRL_SHIFT (24) -+#define ECTRL_BIST_CTRL_MASK (7 << 24) -+ -+#define ECTRL_BIST_FAST_TEST ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) // old scheme -+#define ECTRL_START_RETENTION_TEST ((1 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_CONTINUE_RETENTION_TEST ((3 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_FINISH_RETENTION_TEST ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+ -+#define ECTRL_BIST_KICK_OFF ((1 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) // new scheme -+#define ECTRL_BIST_MOVE_ON_ODD ((3 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_BIST_MOVE_ON_EVEN ((5 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_BIST_SCREAM_THROUGH ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+ -+#define ECTRL_CLEAR_BIST_TEST (0 << 24) -+#define ECTRL_BIST_FINISHED_TEST (1 << 23) -+ -+// Read only current PCI bus type. -+#define ECTRL_RUNNING_32BIT_MODE (1 << 27) -+#define ECTRL_INITIALISATION_MODE (7 << 28) -+#define ECTRL_RUNNING_M66EN_MODE (1 << 31) -+ -+#define ECTRL_INIT_PATTERN_SHIFT (28) -+#define ECTRL_INIT_PATTERN_MASK (0x7) -+ -+// Pci intialisation patterns -+#define Pci2_2 (0 << 28) -+#define PciX50To66MHz (1 << 28) -+#define PciX66to100MHz (2 << 28) -+#define PciX100to133MHz (3 << 28) -+#define PciXReserved1 (4 << 28) -+#define PciXReserved2 (5 << 28) -+#define PciXReserved3 (6 << 28) -+#define PciXReserved4 (7 << 28) -+ -+/* Elan PCI pll and pad control configuration space register. ElanPllControlReg */ -+// This overrides the default PCI pll control settings. -+#define PciPll_FeedForwardISel0 (1 << 0) // Lsi name Z0 -+#define PciPll_FeedForwardISel1 (1 << 1) // Lsi name Z1 -+#define PciPll_ChargePumpISel0 (1 << 2) // Lsi name P0 -+#define PciPll_ChargePumpISel1 (1 << 3) // Lsi name P1 -+#define PciPll_EnableAutoReset (1 << 4) // Lsi name ENARST -+#define PciPll_RSEL200500 (1 << 5) // Lsi name Range Select, 0: 100 - 250MHz, 1: 200 - 500MHz -+#define PciPll_DivideFeedback (1 << 6) // Just used for test - This divides the shortcut feedback to the PCI PLL so that it can lock to the tester clock. -+#define PciPll_CutFeedback (1 << 7) // Just used for test - This disables the shortcut feedback. -+ -+// This overrides the default PCI BZ controler settings. -+#define PciBZ_UPDI (0xf << 8) -+#define PciBZ_WAIT_INT (0xf << 12) -+ -+// This overrides the default Sys and SDRam pll control settings. -+#define SysPll_FeedForwardISel0 (1 << 16) // Lsi name P0 -+#define SysPll_FeedForwardISel1 (1 << 17) // Lsi name P1 -+#define SysPll_ChargePumpISel0 (1 << 18) // Lsi name Z0 -+#define SysPll_ChargePumpISel1 (1 << 19) // Lsi name Z1 -+#define SysPll_EnableAutoReset (1 << 20) // Lsi name ENARST -+#define SysPll_DivPhaseCompInBy2 (1 << 21) // Lsi name NODIV (Should be DIV) -+#define SysPll_PllTestClkSel (1 << 22) // If asserted the master clock source is not taken from the pll. -+ -+#define Pll_ForceEBusADTristate (1 << 23) // Required to enable the testing of EnableAutoReset. Enables use of EBusAD[7] (rev A) -+#define Pll_LinkErrDirectToSDA (1 << 23) // Access to link error flag for triggering (rev B) -+ -+ -+#define ECTRL_SYS_CLOCK_RATIO_SHIFT (24) -+// Config: with 800MHz Speeds are 266 200 160 133. -+// 0 = 133/133 (1:1) 6:6 1 -+// 1 = 160/133 (6:5) 5:6 1.2 -+// 2 = 200/133 (3:2) 4:6 1.5 -+// 3 = 266/133 (2:1) 3:6 2 -+// 4 = 200/200 (1:1) 4:4 1 -+// 5 = 266/200 (4:3) 3:4 1.33 -+ -+// Config: with 600MHz Speeds are 200 150 120 100 -+// 0 = 100/100 (1:1) 6:6 1 -+// 1 = 120/100 (6:5) 5:6 1.2 -+// 2 = 150/100 (3:2) 4:6 1.5 -+// 3 = 200/100 (2:1) 3:6 2 -+// 4 = 150/150 (1:1) 4:4 1 -+// 5 = 200/150 (4:3) 3:4 1.33 -+ -+#define ECTRL_SYS_CLOCK_RATIO_SHIFT (24) -+#define ECTRL_SYS_CLOCK_RATIO_1_1Slow (0 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_6_5 (1 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_3_2 (2 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_2_1 (3 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_1_1Fast (4 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_4_3 (5 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_MAX_NORMAL (6) /* used to generate a valid random value */ -+#define GET_RANDOM_CLOCK_RATIO (Random(ECTRL_SYS_CLOCK_MAX_NORMAL) << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_PLL_TEST (6 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_TEST (7 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_MASK (7 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+ -+#endif /* __ELAN4_PCI_H */ -diff -urN clean/include/elan4/registers.h linux-2.6.9/include/elan4/registers.h ---- clean/include/elan4/registers.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/registers.h 2005-03-03 11:28:50.000000000 -0500 -@@ -0,0 +1,1587 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_REGISTERS_H -+#define _ELAN4_REGISTERS_H -+ -+#ident "$Id: registers.h,v 1.120 2005/03/03 16:28:50 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/registers.h,v $*/ -+ -+/* -+ * Header file for internal slave mapping of the ELAN4 registers -+ */ -+ -+#define E4_CACHELINE_SIZE (64) -+#define E4_STACK_ALIGN (64) -+ -+#ifndef _ASM -+ -+#include -+#include -+#include -+ -+typedef volatile struct _E4_CacheSets -+{ -+ E4_uint64 Set0[1024]; /* 8k bytes per set */ -+ E4_uint64 Set1[1024]; /* 8k bytes per set */ -+ E4_uint64 Set2[1024]; /* 8k bytes per set */ -+ E4_uint64 Set3[1024]; /* 8k bytes per set */ -+} E4_CacheSets; -+ -+typedef union e4_cache_tag -+{ -+ struct { -+ E4_uint32 pad0; /* Undefined value when read */ -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 :10; /* 0-9 - reserved */ -+ E4_uint32 LineError:1; /* 10 - line error */ -+ E4_uint32 Modified:1; /* 11 - modified */ -+ E4_uint32 FillPending:1; /* 12 - fill pending */ -+ E4_uint32 AddrTag30to13:18; /* 30-13 - tag */ -+ E4_uint32 :1; /* 31 - */ -+#else -+ E4_uint32 :1; /* 31 - */ -+ E4_uint32 AddrTag30to13:18; /* 30-13 - tag */ -+ E4_uint32 FillPending:1; /* 12 - fill pending */ -+ E4_uint32 Modified:1; /* 11 - modified */ -+ E4_uint32 LineError:1; /* 10 - line error */ -+ E4_uint32 :10; /* 0-9 - reserved */ -+#endif -+ } s; -+ E4_uint64 Value; -+} E4_CacheTag; -+ -+typedef volatile struct _E4_CacheTags -+{ -+ E4_CacheTag Tags[4][128]; /* 8k bytes per set, 64 byte cache line */ -+} E4_CacheTags; -+ -+#define E4_NumCacheSets 4 -+#define E4_NumCacheLines 128 -+#define E4_CacheLineSize 64 -+#define E4_CacheSize (E4_NumCacheSets * E4_NumCacheLines * E4_CacheLineSize) -+#define E4_CacheSetSize (E4_NumCacheLines * E4_CacheLineSize) -+ -+/* -+ * Run Queue pointers -+ * -+ * [62:35] FrontPointer[30:3] -+ * [33:32] Size Value -+ * [30:3] BackPointer[30:3] -+ */ -+#define E4_QueuePtrMask (0x7ffffff8ULL) -+#define E4_QueueSizeMask 3 -+#define E4_QueueEntrySize sizeof (E4_uint64) -+ -+#define E4_Queue8KBytes 0 -+#define E4_Queue64KBytes 1 -+#define E4_Queue512KBytes 2 -+#define E4_Queue4MBytes 3 -+ -+#define E4_QueueFrontValue(val,size) ((val) | (size)) -+#define E4_QueueValue(queue,size) (((E4_uint64) E4_QueueFrontValue(queue,size)) << 32 | ((E4_uint64) (queue))) -+ -+#define E4_QueueFrontPointer(val) /* extract queue front pointer from register */\ -+ (((val) >> 32) & E4_QueuePtrMask) -+#define E4_QueueBackPointer(val) /* extract queue back pointer from register */ \ -+ ((val) & E4_QueuePtrMask) -+#define E4_QueueSizeValue(val) /* extract queue size value from register */ \ -+ (((val) >> 32) & E4_QueueSizeMask) -+#define E4_QueueSize(value) /* queue size in bytes from size value */ \ -+ (1 << (((value)*3) + 13)) -+#define E4_QueueOffsetMask(fptr)\ -+ ((8192 << (((fptr) & E4_QueueSizeMask) << 3)) - 1) -+#define E4_QueueOffset(fptr)\ -+ ((fptr) & E4_QueueOffsetMask(fptr)) -+#define E4_QueueFrontPointerInc(fptr) \ -+ ( ((fptr) & ~E4_QueueOffsetMask(fptr)) | ((E4_QueueOffset(fptr) + 8) & E4_QueueOffsetMask(fptr)) ) -+ -+typedef union _E4_QueuePtr -+{ -+ E4_uint64 Value; -+ struct { -+ E4_uint32 Back; -+ E4_uint32 Front; -+ } s; -+} E4_QueuePtr; -+ -+/* -+ * DMA processor status register. -+ * -+ * [48] FirstSendTrans Set for the first packet of a dma. -+ * [47:46] TimeSliceCount Time left to timeslice. -+ * [45] DmaLastPacket Set for the last packet of a dma. -+ * [44] CurrPrefetchDma Dma descriptor the prefetcher is valid for. -+ * [43:39] PrefetcherState Dma prefetcher's state machines value. -+ * [38:33] PacketAssemblyState Packet assembler's state machines value. -+ * [32:31] PrefetcherWakeupFnt Dma prefetcher's wakeup function. -+ * [30:28] PacketAssWakeupFnt Packet assembler's wakeup function. -+ * [27] AckBufferValid Packet ack is valid. -+ * [26] PrefetchedDataProblem Had either a data read fault or data error. Valid if AckBufferValid. -+ * [25] PrefetcherHalting Prefetch data about to stop for halt. Valid if AckBufferValid. -+ * [24] PacketTimeout Packet timeout. Sent an EopError. Valid if AckBufferValid set. -+ * [23:22] PacketAckValue Packet ack type. Valid if AckBufferValid set. -+ * [21:20] FaultUnitNo Set if the dma prefetcher has faulted. -+ * [19:17] TrapType Packet assembler's trap type. -+ * [16] PrefetcherFault Set if the dma prefetcher has faulted for this DMA unit. -+ * [15] Remote The Dma had been issued remotly -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define DPROC_FirstSendTrans(s) ((unsigned)((s) >> 48) & 1) -+#define DPROC_TimeSliceCount(s) ((unsigned)(((s) >> 46) & 3) -+#define DPROC_DmaLastPacket(s) ((unsigned)((s) >> 45) & 1) -+#define DPROC_CurrPrefetchDma(s) ((unsigned)((s) >> 44) & 1) -+#define DPROC_PrefetcerState(s) ((unsigned)((s) >> 39) & 0x1f) -+#define DPROC_PacketAssemblerState(s) ((unsigned)((s) >> 33) & 0x1f) -+#define DPROC_PrefetcherWakeupFn(s) ((unsigned)((s) >> 31) & 3) -+#define DPROC_PacketAssemblerWakeupFn(s)((unsigned)((s) >> 28) & 3) -+#define DPROC_AckBufferValid(s) ((unsigned)((s) >> 27) & 1) -+#define DPROC_PrefetcherDataProblem(s) ((unsigned)((s) >> 26) & 1) -+#define DPROC_PrefetcherHalting(s) ((unsigned)((s) >> 25) & 1) -+#define DPROC_PacketTimeout(s) ((unsigned)((s) >> 24) & 1) -+#define DPROC_PacketAckValue(s) ((unsigned)((s) >> 22) & 3) -+#define DPROC_FaultUnitNo(s) ((unsigned)((s) >> 20) & 3) -+#define DPROC_TrapType(s) ((unsigned)((s) >> 17) & 7) -+#define DPROC_PrefetcherFault(s) ((unsigned)((s) >> 16) & 1) -+#define DPROC_Remote(s) ((unsigned)((s) >> 15) & 1) -+#define DPROC_Priority(s) ((unsigned)((s) >> 14) & 1) -+#define DPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Command processor status register. -+ * -+ * [26:21] CPState procs current state. -+ * [20] WakeupFnt procs wakeup function. -+ * [19:16] TrapValue procs trap value. -+ * [15] Remote Issued remotely. -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define CPROC_TrapType(s) ((unsigned)((s) >> 16) & 0xf) -+#define CPROC_Remote(s) ((unsigned)((s) >> 15) & 0x1) -+#define CPROC_Priority(s) ((unsigned)((s) >> 14) & 0x1) -+#define CPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Event processor status register. -+ * -+ * [34:30] CPState event procs current state. -+ * [29:28] WakeupFnt event procs wakeup function. -+ * [27:20] EventCopySize This is the number of DWords to still be copied on a copy dword event. -+ * [19] EProcPort1Fault CUN_EventProc1 has taken a translation fault. -+ * [18] EProcPort0Fault CUN_EventProc0 has taken a translation fault. -+ * [17:16] TrapValue event proc's trap value. -+ * [15] Remote Issued remotely. -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define EPROC_CPState(s) ((unsigned)((s) >> 30) & 0x1f) -+#define EPROC_WakeupFunction(s) ((unsigned)((s) >> 28) & 3) -+#define EPROC_CopySize(s) ((unsigned)((s) >> 20) & 0xFF) -+#define EPROC_Port1Fault(s) ((unsigned)((s) >> 19) & 1) -+#define EPROC_Port0Fault(s) ((unsigned)((s) >> 18) & 1) -+#define EPROC_TrapType(s) ((unsigned)((s) >> 16) & 3) -+#define EPROC_Remote(s) ((unsigned)((s) >> 15) & 1) -+#define EPROC_Priority(s) ((unsigned)((s) >> 14) & 1) -+#define EPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Thread processor status register. -+ * -+ * [39:24] MemPortBusy 16 bits of port busy flags for all FFU memory ports. -+ * [23:21] Reads as zero -+ * [20:18] TQState State vector for thread queuing proc. -+ * [17] HighRunQueueFull High priority run queue is full -+ * [16] LowRunQueueFull Low priority run queue is full -+ * [15] ReadyHigh More runable threads at high priority -+ * [14] ReadyLow More runable threads at low priority -+ * [13:0] Context procs current context. -+ */ -+#define TPROC_HighRunQueueFull(s) ((unsigned)((s) >> 17) & 1) -+#define TPROC_LowRunQueueFull(s) ((unsigned)((s) >> 16) & 1) -+#define TPROC_ReadyHigh(s) ((unsigned)((s) >> 15) & 1) -+#define TPROC_ReadyLow(s) ((unsigned)((s) >> 14) & 1) -+#define TPROC_Context(s) ((unsigned)((s) & 0x3fff)) -+ -+/* -+ * Input processor status register -+ * -+ * [55] Last Trans (~EOP) -+ * [54] First Trans (~EOP) -+ * [53] Channel (~EOP) -+ * [52] Bad Length (~EOP) -+ * [51:50] Trans CRC Status (~EOP) -+ * [49:48] EOP type -+ * [47] EOP trap -+ * [46] Trapping priority -+ * [45] Trapping Channel -+ * [44:43] Bad ack sent -+ * [42:41] Good ack sent -+ * [40] Queueing Packet (~EOP) -+ * [39:36] Channel trapped bits -+ * [35:32] IProc Trap Value -+ * [31:16] Network Context (~EOP) -+ * [15:0] Transaction Type (~EOP) -+ */ -+#define IPROC_LastTrans(s) ((unsigned)((s) >> 55) & 0x1) -+#define IPROC_FirstTrans(s) ((unsigned)((s) >> 54) & 0x1) -+#define IPROC_Channel(s) ((unsigned)((s) >> 53) & 0x1) -+#define IPROC_BadLength(s) ((unsigned)((s) >> 52) & 0x1) -+#define IPROC_TransCRCStatus(s) ((unsigned)((s) >> 50) & 0x3) -+#define IPROC_EOPType(s) ((unsigned)((s) >> 48) & 0x3) -+#define IPROC_EOPTrap(s) ((unsigned)((s) >> 47) & 0x1) -+#define IPROC_InputterPri(s) ((unsigned)((s) >> 46) & 0x1) -+#define IPROC_InputterChan(s) ((unsigned)((s) >> 45) & 0x1) -+#define IPROC_BadAckSent(s) ((unsigned)((s) >> 43) & 0x3) -+#define IPROC_GoodAckSent(s) ((unsigned)((s) >> 41) & 0x3) -+#define IPROC_QueueingPacket(s) ((unsigned)((s) >> 40) & 0x1) -+#define IPROC_ChannelTrapped(s) ((unsigned)((s) >> 36) & 0xF) -+#define IPROC_TrapValue(s) ((unsigned)((s) >> 32) & 0xF) -+#define IPROC_NetworkContext(s) ((unsigned)((s) >> 16) & 0xFFFF) -+#define IPROC_TransactionType(s) ((unsigned)(s) & 0xFFFF) -+ -+/* values for IPROC_TransCRCStatus */ -+#define CRC_STATUS_GOOD (0) -+#define CRC_STATUS_DISCARD (1) -+#define CRC_STATUS_ERROR (2) -+#define CRC_STATUS_BAD (3) -+ -+/* values for IPROC_EOPType */ -+#define EOP_GOOD (1) -+#define EOP_BADACK (2) -+#define EOP_ERROR_RESET (3) -+ -+/* -+ * Interrupt register bits -+ * -+ * There are up to four sources of interrupt for the MSI port. -+ * The Elan will request 4 ports but may only get either 2 or 1 port. The Interrupts are assigned -+ * as shown below: -+ * No Of MSI ints Low Prioity High Prioity -+ * 4 Event Ints OtherInts Inputer Ints Hard Error ints. -+ * i.e. Dproc, Tproc, Sten. HighPri and LowPri Link errs, ECC errs, -+ * -+ * 2 Event Ints All other interrupts. -+ * 1 All together. -+ * -+ * It is not safe to change the number of sources of interrupt while there may be outstanding, -+ * unserviced interrupts pending. -+ * There two forms of encoding. This has been provided in case an MSI implimentation assumes either -+ * a high value to have a high priority or a low value to have a high priority. This is controled -+ * by a bit in the Elan Pci Control register. -+ */ -+#define INT_LinkPortKeyFail (1<<18) -+#define INT_PciMemErr (1<<17) -+#define INT_SDRamInt (1<<16) -+#define INT_LinkError (1<<15) -+#define INT_IProcCh1HighPri (1<<14) -+#define INT_IProcCh0HighPri (1<<13) -+#define INT_IProcCh1LowPri (1<<12) -+#define INT_IProcCh0LowPri (1<<11) -+#define INT_DiscardingHighPri (1<<10) -+#define INT_DiscardingLowPri (1<<9) -+#define INT_CProcHalted (1<<8) -+#define INT_TProcHalted (1<<7) -+#define INT_DProcHalted (1<<6) -+#define INT_EProc (1<<5) -+#define INT_TProc (1<<4) -+#define INT_CProc (1<<3) -+#define INT_Dma1Proc (1<<2) -+#define INT_Dma0Proc (1<<1) -+#define INT_MainInterrupt (1<<0) -+ -+#define INT_Units (INT_EProc | INT_TProc | INT_CProc | INT_Dma1Proc | INT_Dma0Proc) -+#define INT_Inputters (INT_IProcCh1HighPri | INT_IProcCh0HighPri | INT_IProcCh1LowPri | INT_IProcCh0LowPri) -+#define INT_Discarding (INT_DiscardingHighPri | INT_DiscardingLowPri) -+#define INT_Halted (INT_CProcHalted | INT_TProcHalted | INT_DProcHalted) -+#define INT_ErrorInterrupts (INT_PciMemErr | INT_SDRamInt | INT_LinkError) -+ -+#define INT_MSI0 INT_MainInterrupt -+#define INT_MSI1 (INT_Units | INT_Discarding | INT_Halted) -+#define INT_MSI2 (INT_Inputters) -+#define INT_MSI3 (INT_ErrorInterrupts) -+ -+#define E4_INTERRUPT_REG_SHIFT 32 -+#define E4_INTERRUPT_MASK_MASK (0xffffffffULL) -+ -+/* -+ * Trap type values - see trapvalues.v -+ */ -+ -+#define CommandProcInserterError 0x1 -+#define CommandProcPermissionTrap 0x2 -+#define CommandProcSendTransInvalid 0x3 -+#define CommandProcSendTransExpected 0x4 -+#define CommandProcDmaQueueOverflow 0x5 -+#define CommandProcInterruptQueueOverflow 0x6 -+#define CommandProcMemoryFault 0x7 -+#define CommandProcRouteFetchFault 0x8 -+#define CommandProcFailCountZero 0x9 -+#define CommandProcAddressAlignment 0xa -+#define CommandProcWaitTrap 0xb -+#define CommandProcMultipleGuards 0xc -+#define CommandProcOpenOnGuardedChan 0xd -+#define CommandProcThreadQueueOverflow 0xe -+#define CommandProcBadData 0xf -+ -+#define DmaProcNoFault 0x0 -+#define DmaProcRouteFetchFault 0x1 -+#define DmaProcFailCountError 0x2 -+#define DmaProcPacketAckError 0x3 -+#define DmaProcRunQueueReadFault 0x4 -+#define DmaProcQueueOverflow 0x5 -+ -+#define EventProcNoFault 0x0 -+#define EventProcAddressAlignment 0x1 -+#define EventProcMemoryFault 0x2 -+#define EventProcCountWrapError 0x3 -+ -+#define InputNoFault 0x0 -+#define InputAddressAlignment 0x1 -+#define InputMemoryFault 0x2 -+#define InputInvalidTransType 0x3 -+#define InputDmaQueueOverflow 0x4 -+#define InputEventEngineTrapped 0x5 -+#define InputCrcErrorAfterPAckOk 0x6 -+#define InputEopErrorOnWaitForEop 0x7 -+#define InputEopErrorTrap 0x8 -+#define InputDiscardAfterAckOk 0x9 -+ -+typedef struct _E4_Sched_Status -+{ -+ E4_uint32 Status; -+ E4_uint32 Restart; -+} E4_Sched_Status; -+ -+typedef struct _E4_Input_Ptrs -+{ -+ E4_uint32 ContextFilterTable; -+ E4_uint32 TrapBasePtr; -+} E4_Input_Ptrs; -+ -+#define SCH_StopLowPriQueues (1 << 0) -+#define SCH_DProcHalt (1 << 1) -+#define SCH_TProcHalt (1 << 2) -+#define SCH_CProcHalt (1 << 3) -+ -+#define SCH_CProcTimeout600ns (1 << 4) -+#define SCH_CProcTimeout1p4us (2 << 4) -+#define SCH_CProcTimeout3p0us (3 << 4) -+#define SCH_CProcTimeout6p2us (4 << 4) -+#define SCH_CProcTimeout12p6us (5 << 4) -+#define SCH_CProcTimeout25p4us (6 << 4) -+#define SCH_CProcTimeout51p0us (7 << 4) -+#define SCH_DiscardLowPriInput (1 << 7) -+#define SCH_DiscardHighPriInput (1 << 8) -+ -+#define SCH_DProcTimeslice64us (0 << 9) -+#define SCH_DProcTimeslice128us (1 << 9) -+#define SCH_DProcTimeslice256us (2 << 9) -+#define SCH_DProcTimeslice512us (3 << 9) -+ -+#define SCH_Halt (SCH_StopLowPriQueues | SCH_DProcHalt | SCH_TProcHalt | SCH_CProcHalt) -+#define SCH_Discard (SCH_DiscardLowPriInput | SCH_DiscardHighPriInput) -+ -+#define SCH_RestartCProc (1 << 0) -+#define SCH_RestartTProc (1 << 1) -+#define SCH_RestartEProc (1 << 2) -+#define SCH_RestartDma0Proc (1 << 3) -+#define SCH_RestartDma1Proc (1 << 4) -+#define SCH_RestartDmaPrefetchProc (1 << 5) -+#define SCH_RestartCh0LowPriInput (1 << 6) -+#define SCH_RestartCh1LowPriInput (1 << 7) -+#define SCH_RestartCh0HighPriInput (1 << 8) -+#define SCH_RestartCh1HighPriInput (1 << 9) -+#define SCH_ClearLinkErrorInt (1 << 10) -+#define SCH_ContextFilterFlush (1 << 11) -+ -+/* -+ * Link state bits. -+ */ -+#define LS_LinkNotReady (1 << 0) /* Link is in reset or recovering from an error */ -+#define LS_Locked (1 << 1) /* Linkinput PLL is locked */ -+#define LS_LockError (1 << 2) /* Linkinput PLL was unable to lock onto the input clock. */ -+#define LS_DeskewError (1 << 3) /* Linkinput was unable to Deskew all the inputs. (Broken wire?) */ -+#define LS_PhaseError (1 << 4) /* Linkinput Phase alignment error. */ -+#define LS_DataError (1 << 5) /* Received value was neither good data or a token. */ -+#define LS_FifoOvFlow0 (1 << 6) /* Channel 0 input fifo overflowed. */ -+#define LS_FifoOvFlow1 (1 << 7) /* Channel 1 input fifo overflowed. */ -+#define LS_Mod45Changed (1 << 8) /* Mod45 bit has changed. Error setr to force reset. */ -+#define LS_PAckNotSeenError (1 << 9) /* PAck value not returned for this packet. */ -+ -+/* -+ * Link State Constant defines, used for writing to LinkSetValue -+ */ -+ -+#define LRS_DataDel0 0x0 -+#define LRS_DataDel1 0x1 -+#define LRS_DataDel2 0x2 -+#define LRS_DataDel3 0x3 -+#define LRS_DataDel4 0x4 -+#define LRS_DataDel5 0x5 -+#define LRS_DataDel6 0x6 -+#define LRS_DataDel7 0x7 -+#define LRS_DataDel8 0x8 -+#define LRS_LinkInValue 0x9 -+#define LRS_PllDelValue 0xA -+#define LRS_ClockEven 0xB -+#define LRS_ErrorVal8to0 0xC -+#define LRS_ErrorVal17to9 0xD -+#define LRS_ErrorVal26to18 0xE -+#define LRS_ErrorVal35to27 0xF -+#define LRS_NumLinkDels 0x10 -+ -+#define LRS_Pllfast 0x40 -+ -+typedef struct _E4_CommandControl -+{ -+ volatile E4_uint32 CommandQueueDescsBase; -+ volatile E4_uint32 CommandRequeuePtr; -+} E4_CommandControl; -+ -+#define E4_CommandRequeueBusy 0x80000000 /* Test against read value of CommandRequeuePtr */ -+#define E4_CommandRequeueHighPri 0x1 /* Will requeue onto the high pri queue */ -+#define E4_QueueDescPtrMask 0x7fffffe0 -+ -+typedef struct _E4_CommandQueueDesc -+{ -+ E4_uint64 CQ_QueuePtrs; -+ E4_uint64 CQ_HoldingValue; /* 32 bit value for 32 bit accesses or OutOfOrderMask*/ -+ E4_uint64 CQ_AckBuffers; /* Space for 32 4 bit ack buffer values. */ -+ E4_uint64 CQ_Control; -+} E4_CommandQueueDesc; -+ -+/* -+ * Rev A - CQ_QueuePtrs -+ * [63] Unused Should be set to zero. -+ * [62:51] Unused (reads as top of InsertPtr) -+ * [50:35] CompletedPtr Completed pointer. This is alligned to a byte address. -+ * [34] Trapped Will be set if the command has trapped. -+ * [33:32] Size Size of queue. -+ * [31] Used Will be set if the descriptor has been changed and written back by the elan. -+ * [30:3] InsertPtr Insert pointer. This is alligned to a byte address. -+ * [2] TimedOut Will be set if the queue timedout executing a command. -+ * [1] Priority When set the queue runs at high priority. -+ * [0] Error If this becomes set all new data written to the queue is * discarded. -+ * -+ * Rev B - CQ_QueuePtrs -+ * [63] TimedOut Will be set if the queue timedout executing a command. -+ * [62] Priority When set the queue runs at high priority. -+ * [61] QueueType 1=will accept unordered 64 bit PCI writes. 0=will accept ordered 32 or 64 bit PCI writes. -+ * [60:51] Unused (reads as top of InsertPtr) -+ * [50:35] CompletedPtr Completed pointer. This is alligned to a byte address. -+ * [34] Trapped Will be set if the command has trapped. -+ * [33:32] Size Size of queue. -+ * [31] Used Will be set if the descriptor has been changed and written back by the elan. -+ * [30:3] InsertPtr Insert pointer. This is alligned to a byte address. -+ * [2] OrderControl Holds bit 8 of last PCI accesses. Used by a reordering queue. -+ * [1:0] ErrorType This field has the current error status of the queue. -+ */ -+ -+/* Common between revA and RevB */ -+#define CQ_PtrMask (0x7ffffff8) /* 31 bit sdram address */ -+#define CQ_PtrOffsetMask (0x7fff8) -+#define CQ_PtrBaseMask (0x7ff80000) -+ -+#define CQ_InsertPtrShift (3 - 3) /* InsertPtr is 64 bit aligned */ -+#define CQ_SizeShift (32) -+# define CQ_Size1K 0 -+# define CQ_Size8K 1 -+# define CQ_Size64K 2 -+# define CQ_Size512K 3 -+# define CQ_SizeMask 3 -+ -+#define CQ_CompletedPtrShift (35 - 3) /* CompletedPtr is 64 but aligned */ -+ -+#define CQ_Used (1ull << 31) -+#define CQ_Trapped (1ull << 34) -+ -+#define CQ_QueuePtrsValue(Size,Inserter,Completer) \ -+ (((E4_uint64) (Size) << CQ_SizeShift) | \ -+ ((E4_uint64) (Inserter) << CQ_InsertPtrShift) | \ -+ ((E4_uint64) (Completer) << CQ_CompletedPtrShift)) -+ -+#define CQ_InsertPtr(QueuePtrs) \ -+ (((E4_uint64) QueuePtrs) & CQ_PtrMask) -+ -+#define CQ_CompletedPtr(QueuePtrs) \ -+ (((E4_uint32)((QueuePtrs) >> CQ_CompletedPtrShift) & CQ_PtrOffsetMask) | \ -+ (CQ_InsertPtr(QueuePtrs) & CQ_PtrBaseMask)) -+ -+#define CQ_Size(SizeVal) (1024 * (1 << ((SizeVal)*3))) -+ -+/* Rev A specific */ -+#define CQ_RevA_Error (1 << 0) -+#define CQ_RevA_Priority (1 << 1) -+#define CQ_RevA_TimedOut (1 << 2) -+ -+/* Rev B specific */ -+#define CQ_RevB_ErrorType(QueuePtr) ((QueuePtr) & (3 << 0)) -+# define CQ_RevB_NoError (0ull << 0) -+# define CQ_RevB_Overflowed (1ull << 0) -+# define CQ_RevB_InvalidWriteSize (2ull << 0) -+# define CQ_RevB_InvalidWriteOrder (3ull << 0) -+#define CQ_RevB_OrderControl (1ull << 2) -+ -+#define CQ_RevB_QueueType(QueuePtr) ((QueuePtr) & (1ull << 61)) -+# define CQ_RevB_ReorderingQueue (1ull << 61) -+# define CQ_RevB_32bitWriteQueue (0ull << 61) -+ -+#define CQ_RevB_Priority (1ull << 62) -+#define CQ_RevB_TimedOut (1ull << 62) -+ -+/* -+ * CQ_AckBuffers - Packet Ack Values -+ */ -+#define PackOk (0x0) -+#define PackTestFail (0x1) -+#define PackDiscard (0x2) -+#define PackError (0x7) -+#define PackTimeout (0x8) -+#define PackWaiting (0xF) -+#define PackValue(val,chan) (((val) >> ((chan) * 4)) & 0xf) -+ -+/* -+ * CQ_Control -+ * [63:35] ExtractPtr -+ * [34] Unused -+ * [33:32] ChannelNotCompleted -+ * [31:24] Permissions -+ * [23:16] RestartCount Decremented after each restart. Will trap when zero -+ * [15:14] Unused Should be set to zero -+ * [13:0] Context -+ */ -+#define CQ_Context(Control) ((E4_uint32) ((Control) >> 0) & 0x3fff) -+#define CQ_RestartCount(Control) ((E4_uint32) ((Control) >> 16) & 0x7f) -+#define CQ_ChannelNotCompleted(Control) ((E4_uint32) ((Control) >> 32) & 3) -+#define CQ_ExtractPtr(Control) ((E4_uint32) ((Control) >> 32) & 0xFFFFFFF8) -+ -+#define CQ_RestartCountShift 16 -+ -+#define CQ_SetEventEnableBit (1 << 24) -+#define CQ_WaitEventEnableBit (1 << 25) -+#define CQ_ModifyEnableBit (1 << 26) -+#define CQ_WriteEnableBit (1 << 27) -+#define CQ_ThreadStartEnableBit (1 << 28) -+#define CQ_DmaStartEnableBit (1 << 29) -+#define CQ_STENEnableBit (1 << 30) -+#define CQ_InterruptEnableBit (1 << 31) -+#define CQ_EnableAllBits (0xFF000000) -+#define CQ_PermissionMask (0xFF000000) -+ -+#define CQ_ControlValue(Cntx, RestartCount, Permissions) \ -+ (((Cntx) & 0x3fff) | (((RestartCount) & 0xff) << 16) | ((Permissions) & CQ_PermissionMask)) -+ -+/* -+ * This file describes the slave address map of Elan4. -+ * -+ * Elan4 has two PCI 64 bit base address registers. One is setup for elan -+ * local memory and the other is for the command port, elan registers and ebus. -+ * -+ * This file describes the command port, elan registers and ebus BAR. This is a -+ * 26 bit base address register and is split up as follows: -+ * 1 The ebus requires 21 bits of address. 26'h3e00000 to 26'h3ffffff -+ * 2 The control regsiters requires 16 bits of address. 26'h3df0000 to 26'h3dfffff -+ * 3 The command port has the rest. This give just under 8k command ports or about 123 per -+ * processor of a 64 node SMP. -+ */ -+ -+/* BAR1 contains the command queues followed by the registers and the Ebus - and is 26 bits */ -+/* each command queue has an 8K page associated with it */ -+#define CQ_CommandMappingSize (1 << 13) -+#define CQ_NumCommandDescs ((1 << (26 - 13))) -+#define CQ_CommandDescsAlignment ((1 << (26 - 13)) * sizeof (E4_CommandQueueDesc)) -+ -+/* control reg bits i.e. E4_DataBusMap.SysControlReg */ -+#define CONT_EN_ALL_SETS (1ULL << 0) /* enable cache */ -+#define CONT_MMU_ENABLE (1ULL << 1) /* bit 0 enables mmu */ -+#define CONT_CACHE_HASH_TABLE (1ULL << 2) /* cache up hash table entries */ -+#define CONT_CACHE_CHAINS (1ULL << 3) /* cache up chain entries */ -+#define CONT_CACHE_ROOT_CNTX (1ULL << 4) /* cache root context table for routes and filters. */ -+#define CONT_CACHE_STEN_ROUTES (1ULL << 5) /* cache up sten packet routes */ -+#define CONT_CACHE_DMA_ROUTES (1ULL << 6) /* cache up dma packet routes */ -+ -+#define CONT_CACHE_NONE 0ULL -+#define CONT_CACHE_ALL (CONT_CACHE_HASH_TABLE | CONT_CACHE_CHAINS | CONT_CACHE_ROOT_CNTX | \ -+ CONT_CACHE_STEN_ROUTES | CONT_CACHE_DMA_ROUTES) -+ -+/* This controls the format size and position of the MMU hash tables. */ -+#define CONT_INHIBIT_MAX_CHAIN_ITEMS (1ULL << 7) /* Prevents the MaxChainItems value of 1024 from forcing a translation miss */ -+#define CONT_TABLE0_MASK_SIZE_SHIFT 8 /* Defines the size of hash table 0 */ -+#define CONT_TABLE0_PAGE_SIZE_SHIFT 13 /* Set the page size for hash table 0 */ -+#define CONT_TABLE1_MASK_SIZE_SHIFT 16 /* Defines the size of hash table 1 */ -+#define CONT_TABLE1_PAGE_SIZE_SHIFT 21 /* Set the page size for hash table 1 */ -+#define CONT_TWO_HASH_TABLES (1ULL << 24) /* Sets the MMU to use two hash tables. If not set only 0 used. */ -+#define CONT_2K_NOT_1K_DMA_PACKETS (1ULL << 25) /* Used to select the default DMA packet size. */ -+#define CONT_ALIGN_ALL_DMA_PACKETS (1ULL << 26) /* Will force all dma packets to be aligned to a page.*/ -+#define CONT_DIRECT_MAP_PCI_WRITES (1ULL << 27) /* Will force pci writes to write and flush the dcache.*/ -+#define CONT_TLB_FLUSH (1ULL << 28) /* Invalidates the TLB and indicates when flushed */ -+#define CONT_CLEAR_WALK_WROTE_TABLES (1ULL << 29) /* Used to guarantee that the elan is using new PTE values. */ -+#define CONT_ROUTE_FLUSH (1ULL << 30) /* Invalidates all route cache entries. */ -+#define CONT_CLEAR_LINKPORT_INT (1ULL << 31) /* Clears the Linkport key fail interrupt. Reads as 0. */ -+#define CONT_CLEAR_SDRAM_ERROR (1ULL << 32) /* Clears an EEC error interrupt. Reads as 0. */ -+ -+/* -+ * These are extra control bits used for testing the DLLs of the SDRAM interface. Most of the Sdram -+ * control bits are defined in xsdram.h -+ */ -+#define SDRAM_FIXED_DLL_DELAY_SHIFT 47 -+#define SDRAM_FIXED_DLL_DELAY_BITS 5 -+#define SDRAM_FIXED_DLL_DELAY_MASK ((1ULL << SDRAM_FIXED_DLL_DELAY_BITS) - 1ULL) -+#define SDRAM_FIXED_DLL_DELAY(Value) ((SDRAM_FIXED_DLL_DELAY_MASK & (Value)) << SDRAM_FIXED_DLL_DELAY_SHIFT) -+#define SDRAM_FIXED_DELAY_ENABLE (1ULL << 52) -+#define SDRAM_GET_DLL_DELAY(Value) (((Value) >> SDRAM_FIXED_DLL_DELAY_SHIFT) & SDRAM_FIXED_DLL_DELAY_MASK) -+ -+#define SDRAM_166_DLL_CORRECTION_FACTOR 3 /* This is to allow for SSO and ringing on the DQ lines */ -+#define SDRAM_150_DLL_CORRECTION_FACTOR 2 /* This is to allow for SSO and ringing on the DQ lines */ -+ -+#define PAGE_SIZE_4K 0x0 -+#define PAGE_SIZE_8K 0x1 -+#define PAGE_SIZE_64K 0x2 -+#define PAGE_SIZE_512K 0x3 -+#define PAGE_SIZE_2M 0x4 -+#define PAGE_SIZE_4M 0x5 -+#define PAGE_SIZE_64M 0x6 -+#define PAGE_SIZE_512M 0x7 -+ -+#define PAGE_SIZE_MASK 0x7 -+#define PAGE_MASK_MASK 0x1f -+ -+/* control reg bits i.e. E4_DataBusMap.LinkControlReg */ -+#define LCONT_REVA_GREEN_LED (1 << 0) -+#define LCONT_REVA_YELLOW_LED (1 << 1) -+#define LCONT_REVA_RED_LED (1 << 2) -+#define LCONT_REVA_ENABLE_LED_DRIVE (1 << 3) /* Enable manual setting of the Leds to the bits set above. */ -+ -+#define LCONT_REVB_DISABLE_TLB_PREFETCH (1 << 0) -+#define LCONT_REVB_DISABLE_CRC_ERROR_CHECKING (1 << 1) -+ -+ -+#define LCONT_EN_SYS_WRITES (1 << 4) /* Enable linkport writes to sys registers. i.e. all of E4_DataBusMap. */ -+#define LCONT_EN_SYS_READS (1 << 5) /* Enable linkport reads from sys registers. i.e. all of E4_DataBusMap. */ -+#define LCONT_EN_USER_WRITES (1 << 6) /* Enable linkport writes to user registers. i.e. all of E4_User_Regs. */ -+#define LCONT_EN_USER_READS (1 << 7) /* Enable linkport reads from user registers. i.e. all of E4_User_Regs. */ -+ -+#define LCONT_TEST_VALUE_MASK 0x3ff /* Value used for test writes and link boundary scan. */ -+#define LCONT_TEST_VALUE_SHIFT 8 -+#define LCONT_TEST_VALUE(Value) ((LCONT_LINK_STATE_MASK & (Value)) << LCONT_TEST_VALUE_SHIFT) -+ -+/* -+ * State read from LINK_STATE when TEST_VALUE is set to the following values. -+ * TEST_VALUE LINK_STATE read TEST_VALUE LINK_STATE read -+ * 000 - Data delay count 0 008 - Data delay count 8 -+ * 001 - Data delay count 1 009 - Link in value -+ * 002 - Data delay count 2 00a - PLL delay -+ * 003 - Data delay count 3 00b - Clock Delay -+ * 004 - Data delay count 4 00c ? ErrorVal8to0 -+ * 005 - Data delay count 5 00d ? ErrorVal17to9 -+ * 006 - Data delay count 6 00e ? ErrorVal26to18 -+ * 007 - Data delay count 7 00f ? ErrorVal35to27 -+ */ -+ -+#define LCONT_TEST_CONTROL_MASK 0x3 /* Selects and controls the action of the LINK_STATE value. */ -+#define LCONT_TEST_CONTROL_SHIFT 18 -+ -+#define LCONT_READ_ERRORS 0 /* {Mod45RequestChanged, FifoOverflowError, DataError, PhaseError, -+ * DeskewError, LockError, Locked, LinkNotReady} */ -+#define LCONT_READ_STATE 1 /* Read valus addressed by TEST_CONTROL value */ -+#define LCONT_FIX_LINK_DELAYS 2 /* Sets delays to TEST_CONTROL value */ -+#define LCONT_BOUNDARY_SCAN 3 /* Puts link into boundary scan. Outputs TEST_CONTROL value to link, -+ * reads LINK_STATE from link. */ -+ -+#define LCONT_LINK_STATE_MASK 0x3ff /* Read only */ -+#define LCONT_LINK_STATE_SHIFT 20 /* Read only */ -+#define LCONT_LINK_STATE(ControlRegValue) (LCONT_LINK_STATE_MASK & ((ControlRegValue) >> LCONT_LINK_STATE_SHIFT)) -+ -+/* control reg bits i.e. E4_DataBusMap.LinkContSettings */ -+#define LCONT_MOD45_DISABLE (1 << 0) /* is set the link will try to run in TNB mode. */ -+#define LCONT_CONFIG_PHASE_MASK 0x7 /* This set the delay through the phase alignment buffer. */ -+#define LCONT_CONFIG_PHASE_SHIFT 1 -+ -+#define LCONT_PLL_REF_VAL_BITS_MASK 0x7f /* This is the divide value on the LinkIn clock to form the comms PLL */ -+#define LCONT_PLL_REF_VAL_BITS_SHIFT 4 /* reference clock. Div value is (n - 2). e.g. to Divide by 7 set to 5. */ -+ -+#define LCONT_FORCE_COMMSCLK_LOCAL (1 << 11) /* This must be set at one end of a back to back Elan configuration. */ -+#define LCONT_LVDS_VOLTAGE_BITS_MASK 0x3 /* This is used to set the voltage swing on the LVDS link output pads. */ -+#define LCONT_LVDS_VOLTAGE_BITS_SHIFT 12 /* reference clock. Div value is (n - 2). e.g. to Divide by 7 set to 5. */ -+ -+#define LCONT_VOD_170 0 /* Approximate differential voltage swing in mV of link outputs into */ -+#define LCONT_VOD_360 1 /* a 100 ohm diferential load. */ -+#define LCONT_VOD_460 2 -+#define LCONT_VOD_550 3 -+ -+#define LCONT_LVDS_TERMINATION_MASK 0x3 /* This set the resistor values of the internal single ended termation */ -+#define LCONT_LVDS_TERMINATION_SHIFT 14 /* resistors of the link input and comms input clcok. */ -+ -+#define LCONT_TERM_55_OHM 0 /* Resistor values for internal termination of LVDS pads. */ -+#define LCONT_TERM_50_OHM 1 -+#define LCONT_TERM_AUTO_OHM 2 /* Should normally be set to auto. */ -+#define LCONT_TERM_45_OHM 3 -+ -+#define LCONT_LVDS_EN_TERM_UPDATE (1 << 47) /* This should be asserted and deasserted if LCONT_LVDS_TERMINATION is changed. */ -+ -+/* Macros used to access and construct MMU hash table and chain entries. */ -+/* -+ * Each hash entry is made up of a 64 byte block. Each entry hash two tags where each -+ * tag has 4 PTE's. PTE's 0 to 2 use the bottom 48 bits of a 64 bit word and PTE 3 -+ * uses the top 16 bits of 3 64 bit words. -+ * -+ * These macros can be used to build a single PTE. PTE3 needs to be built into a 48 bit -+ * object before they can be used. -+ */ -+#define PTE_ENTRY_MASK 0x0000ffffffffffffULL -+#define PTE_TYPE_MASK 0x000000000000000fULL -+#define PTE_PERM_MASK 0x00000000000000f0ULL -+#define PTE_PERM_TYPE_MASK 0x00000000000000ffULL -+#define PTE_REF_MASK 0x0000000000000100ULL -+#define PTE_PPN_MASK 0x00007ffffffffe00ULL -+#define PTE_MOD_MASK 0x0000800000000000ULL -+#define PTE_TOPADDR_MASK 0x0000600000000000ULL -+ -+#define PTE_MOD_SHIFT 47 -+#define PTE_PPN_SHIFT 9 -+#define PTE_REF_SHIFT 8 -+#define PTE_PERM_SHIFT 4 -+#define PTE_TYPE_SHIFT 0 -+ -+#define PTE_PADDR_SHIFT (12 - 9) /* Physical addresses are shifted down 3 this to go into the PTE */ -+ -+ -+/* Values required for tag 3 */ -+#define PTE_REF_3 0x0100000000000000ULL -+#define PTE_MOD_3 0x8000000000000000ULL -+#define PTE_ENTRY_MASK_3 0xffff000000000000ULL -+#define PTE_PERM_TYPE_MASK_3 0x00ff000000000000ULL -+#define PTE_ENTRY_3_FOR_0(NewPte) ((NewPte << (48)) & PTE_ENTRY_MASK_3) -+#define PTE_ENTRY_3_FOR_1(NewPte) ((NewPte << (32)) & PTE_ENTRY_MASK_3) -+#define PTE_ENTRY_3_FOR_2(NewPte) ((NewPte << (16)) & PTE_ENTRY_MASK_3) -+ -+/* Values required for the tags */ -+#define TAG_CONTEXT_MASK 0x0000000000003fffULL -+#define TAG_ADDRESS_MASK 0xfffffffff8000000ULL -+#define TAG_CHAINPTR_18TO6_MASK 0x0000000007ffc000ULL -+#define TAG_CHAINPTR_LOW_SHIFT (14 - 6) -+#define TAG_CHAINPTR_30TO19_MASK 0x0000000003ffc000ULL -+#define TAG_CHAINPTR_HIGH_SHIFT (19 - 14) -+#define TAG_COPY_BIT 0x0000000004000000ULL -+ -+/* -+ * This takes number loaded into the control register and returns the page size as a power of two. -+ */ -+ -+#define E4_PAGE_SIZE_TABLE E4_uint32 const PageSizeTable[] = {12, 13, 16, 19, 21, 22, 26, 29} -+#define E4_PAGE_SIZE_TABLE_SIZE (sizeof(PageSizeTable)/sizeof(PageSizeTable[0])) -+ -+/* -+ * This macro generates a hash block index. -+ * -+ * Cntx This is the 14 bit context. It should not be larger than 14 bits. -+ * VAddr This is the 64 bit virtual address. It does not require any masking and can be a byte address. -+ * PageSize This is the value loaded into the control register for this hash table. -+ * HashTableMask This should be set mask out upper bits past the end of the hash table. -+ */ -+#define E4MMU_SHIFT_ADDR(VAddr, Shift) \ -+ ((((E4_uint32)(VAddr)) >> (Shift)) | (((E4_uint32)((VAddr) >> 32)) << (32 - (Shift)))) -+ -+#define E4MMU_CONTEXT_SCRAMBLE(Cntx) \ -+ ((((Cntx) << 8) | ((Cntx) >> 6)) ^ (((Cntx) << 15) | ((Cntx) << 1))) -+ -+#define E4MMU_HASH_INDEX(Cntx, VAddr, PageShift, HashTableMask) \ -+ ((E4MMU_SHIFT_ADDR(VAddr, (PageShift) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(Cntx)) & (HashTableMask)) -+ -+#define E4MMU_TAG(vaddr,ctx) (((vaddr) & TAG_ADDRESS_MASK) | ((ctx) & TAG_CONTEXT_MASK)) -+ -+#define E4MMU_TAG2VADDR(tag,hashidx,PageShift,HashTableMask) \ -+ (((tag) & TAG_ADDRESS_MASK) | ((((hashidx) ^ E4MMU_CONTEXT_SCRAMBLE((tag) & TAG_CONTEXT_MASK)) & (HashTableMask)) << ((PageShift + 2)))) -+ -+/* -+ * Detailed bit descriptions for the tags and PTE's are better done with the macros -+ * defined above. -+ */ -+typedef struct _E4_HashTableEntry -+{ -+ E4_uint64 Tag[2]; -+ E4_uint64 TagPTE[2][3]; -+} E4_HashTableEntry; -+ -+#define E4MMU_TAG_OFFSET(tag) ((tag) << 3) -+#define E4MMU_PTE_LOW_OFFSET(tag,pte) ((((tag)*3 + (pte) + 2) << 3)) -+#define E4MMU_PTE_HIGH_OFFSET(tag,pte) ((((tag)*3 + (pte) + 2) << 3) + 4) -+#define E4MMU_PTE3_WORD0_OFFSET(tag) ((((tag)*3 + 2) << 3) + 6) -+#define E4MMU_PTE3_WORD1_OFFSET(tag) ((((tag)*3 + 3) << 3) + 6) -+#define E4MMU_PTE3_WORD2_OFFSET(tag) ((((tag)*3 + 4) << 3) + 6) -+ -+ -+/* -+ * Hash0AddrBits is the size of the hash table in bytes as a power of 2. -+ * e.g. 11 would give 32 hash entries where each entry is 64 bytes. -+ */ -+#define SETUP_HASH_TABLES(Hash0PageSize, Hash0AddrBits, Hash1PageSize, Hash1AddrBits) \ -+ (((Hash0PageSize) << CONT_TABLE0_PAGE_SIZE_SHIFT) | \ -+ ((Hash0AddrBits) << CONT_TABLE0_MASK_SIZE_SHIFT) | \ -+ ((Hash1PageSize) << CONT_TABLE1_PAGE_SIZE_SHIFT) | \ -+ ((Hash1AddrBits) << CONT_TABLE1_MASK_SIZE_SHIFT)) -+ -+/* ECC status register */ -+#define ECC_Addr(s) ((s) & 0x7ffffff8ULL) -+#define ECC_Syndrome(s) (((s) >> 32) & 0xffffULL) -+#define ECC_RisingDQSSyndrome(s) (((s) >> 32) & 0xffULL) -+#define ECC_FallingDQSSyndrome(s) (((s) >> 40) & 0xffULL) -+#define ECC_UncorrectableErr(s) (((s) >> 48) & 1ULL) -+#define ECC_MultUncorrectErrs(s) (((s) >> 49) & 1ULL) -+#define ECC_CorrectableErr(s) (((s) >> 50) & 1ULL) -+#define ECC_MultCorrectErrs(s) (((s) >> 51) & 1ULL) -+ -+/* Permission type saved in a PTE. This is a four bit field */ -+#define PERM_Disabled 0x0 -+#define PERM_Unused 0x1 -+#define PERM_LocDataRead 0x2 -+#define PERM_LocDataWrite 0x3 -+#define PERM_LocRead 0x4 -+#define PERM_LocExecute 0x5 -+#define PERM_ReadOnly 0x6 -+#define PERM_LocWrite 0x7 -+#define PERM_LocEventOnly 0x8 -+#define PERM_LocEventWrite 0x9 -+#define PERM_RemoteEvent 0xa -+#define PERM_RemoteAll 0xb -+#define PERM_RemoteReadOnly 0xc -+#define PERM_RemoteWriteLocRead 0xd -+#define PERM_DataReadWrite 0xe -+#define PERM_NoFault 0xf -+ -+#define PERM_Mask 0xf -+ -+/* Permission type hints to device driver */ -+#define PERM_Preload 0x10 -+ -+#define PTE_SetPerm(Perm) (((Perm) & PERM_Mask) << 4) -+ -+/* Control info saved in the lookup field of the TLB */ -+#define PTE_PciNotLocal (1ULL << 0) /* Directs the access to the PCI interface */ -+#define PTE_BigEndian (1ULL << 1) /* Valid for PCI entries only */ -+#define PTE_RelaxedOrder (1ULL << 2) /* Valid for PCI entries only */ -+#define PTE_DontSnoop (1ULL << 3) /* Valid for PCI entries only */ -+ -+#define PTE_UseFixedSet (1ULL << 1) /* Value for non PCI entries only */ -+#define PTE_CommandQueue (1ULL << 2) /* Value for non PCI entries only */ -+#define PTE_SetFixedSetNo(Set) ((((Set) & 3) << 2) | PTE_UseFixedSet) -+ -+#define PTE_TypeBitsMask (0xfULL) -+#define PTE_PermissionTypeMask (0xfULL << 4) -+#define PTE_Referenced (1ULL << 8) -+#define PTE_PhysicalPageNoMask (0x7ffffffffe00ULL) -+#define PTE_Modified (1ULL << 47) -+ -+#define PTE_PhysicalAddrShiftIntoPTE (12 - 9) -+ -+/* define page table entry bit fields */ -+#define TLB_PageSizeBits (3 << 0) -+#define TLB_ACCBits (7 << 2) -+#define TLB_LocalBit (1 << 5) -+#define TLB_PCI64BitTargetBit (1 << 6) -+#define TLB_PCIBigEndianBit (1 << 7) -+ -+#define TLB_ModifiedBit (1 << 55) -+#define TLB_ReferencedBit (1 << 63) -+ -+/* Used to read values from the tlb. */ -+#define TLB_TlbReadCntBitsSh 56 -+#define TLB_UseSelAddrSh (1ULL << 60) -+#define TLB_WriteTlbLine (1ULL << 61) -+ -+#define TLB_SEL_LINE(LineNo) (TLB_UseSelAddrSh | \ -+ ((E4_uint64)((LineNo) & 0xf) << TLB_TlbReadCntBitsSh)) -+ -+#define TLB_NUM_ENTRIES 16 -+/* -+ * The following macros are used with the test access port (TlbLineValue) for the TLBs. -+ */ -+#define TLV_DoPciAccess (1ULL << 0) -+#define TLV_CommandAccess (1ULL << 1) -+#define TLV_DoCacheAccess (1ULL << 2) -+#define TLV_notStartTLBWalk (1ULL << 3) -+#define TLV_UseFixedSet (1ULL << 4) -+#define TLV_BigEndian (1ULL << 4) -+#define TLV_RelaxedOrder (1ULL << 5) -+#define TLV_DontSnoop (1ULL << 6) -+#define TLV_FixedSetNo_MASK (3ULL << 5) -+#define TLV_PciTypeBits_MASK (7ULL << 4) -+#define TLV_LookupBits_MASK (0x7fULL) -+#define TLV_MissErr (1ULL << 7) -+#define TLV_TypeBits (0xffULL) -+ -+#define TLV_PhysicalAddr_MASK (0x3fffffffff000ULL) -+ -+#define TLV_TlbTesting (1ULL << 51) -+#define TLV_SelectUnitsTlbRead (1ULL << 52) -+#define TLV_SelectTProcTlbRead (1ULL << 53) -+ -+#define TLV_TlbLineSelect_MASK (0xf) -+#define TLV_UnitsTlbLineSelect_SHIFT (54) -+#define TLV_TProcTlbLineSelect_SHIFT (59) -+#define TLV_EnableUnitsTlbRead (1ULL << 58) -+#define TLV_EnableTProcTlbRead (1ULL << 63) -+ -+/* -+ * Use this macro to enable direct testing of the Units TLB. -+ * When Line is in the range 0 to 15 a TLB line is selected for reading or writing. -+ * When Line is set to -1 the tlb will be activated to perform a match. -+ */ -+#define TLV_UnitsTlbLineSel(Line) (((Line) == -1) ? 0ULL : \ -+ (TLV_EnableUnitsTlbRead | ((E4_uint64)((Line) & TLV_TlbLineSelect_MASK) << TLV_UnitsTlbLineSelect_SHIFT))) -+#define TLV_TProcTlbLineSel(Line) (((Line) == -1) ? 0ULL : \ -+ (TLV_EnableTProcTlbRead | ((E4_uint64)((Line) & TLV_TlbLineSelect_MASK) << TLV_TProcTlbLineSelect_SHIFT))) -+ -+/* -+ * Thread_Trap_State -+ * see f_RegFileControl.v TProcStatus -+ */ -+#define TS_HaltThread (1 << 0) -+#define TS_TrapForTooManyInstructions (1 << 1) -+#define TS_InstAccessException (1 << 2) -+#define TS_Unimplemented (1 << 3) -+#define TS_DataAccessException (1 << 4) -+#define TS_DataAlignmentError (1 << 5) -+#define TS_TrapForUsingBadData (1 << 6) -+#define TS_TrapTypeMask (0x7f) -+#define TS_DataPortNo(ts) (((ts) >> 7) & 7) -+#define TS_TrappedFlag (1 << 10) -+#define TS_MemLock (1 << 11) -+#define TS_XCCshift 12 -+#define TS_XCCmask 0xff -+#define TS_ICC(ts) (((ts) >> 12) & 15) -+#define TS_XCC(ts) (((ts) >> 16) & 15) -+#define TS_InstValid_F (1 << 20) -+#define TS_InstValid_R (1 << 21) -+#define TS_InstValid_E (1 << 22) -+#define TS_InstValid_W (1 << 23) -+#define TS_HighPriority (1 << 24) -+#define TS_RemoteThread (1 << 25) -+#define TS_TProcTranslationInProgress (1 << 26) -+#define TS_MemLock_E (1 << 27) -+ -+/* Thread run queue entries */ -+typedef struct E4_ThreadRegs -+{ -+ E4_uint64 Registers[7]; -+} E4_ThreadRegs; -+ -+typedef struct E4_TProcQueueEntry -+{ -+ E4_ThreadRegs Regs; /* XXXX: jon check this */ -+ E4_uint64 Context; /* XXXX: jon check this */ -+} E4_TProcQueueEntry; -+ -+typedef struct E4_DProcQueueEntry -+{ -+ E4_DMA Desc; -+ E4_uint64 Pad; -+} E4_DProcQueueEntry; -+ -+/* -+ * Packet acknowledge values. -+ */ -+#define E4_PAckOk 0 -+#define E4_PAckTestFail 1 -+#define E4_PAckDiscard 2 -+#define E4_PAckError 3 -+ -+/* -+ * return values from breaktest instruction. -+ */ -+#define ICC_CARRY_BIT (0x1ULL << 0) /* Breaktest: Load pending */ -+#define ICC_ZERO_BIT (0x1ULL << 1) /* Breaktest: Time to break */ -+#define ICC_SIGNED_BIT (0x1ULL << 2) /* Breaktest: Another thread ready */ -+#define ICC_TPROC_RDY_LOW_PRI (0x1ULL << 3) -+#define ICC_TPROC_RDY_HIGH_PRI (0x1ULL << 4) -+#define ICC_RUNNING_HIGH_PRI (0x1ULL << 5) -+#define ICC_RUNNING_AS_REMOTE (0x1ULL << 6) -+#define ICC_TIME_TO_BREAK (0x1ULL << 7) -+#define ICC_RS1LOAD_PENDING (0x1ULL << 8) -+#define ICC_TPROC_HALT (0x1ULL << 9) -+ -+/* -+ * Main Interrupt cookies -+ * [63:14] user cookie -+ * [13:0] context -+ */ -+#define E4_MAIN_INT_SHIFT 14 -+#define E4_MAIN_INT_COOKIE(cookie) ((cookie) >> E4_MAIN_INT_SHIFT) -+#define E4_MAIN_INT_CTX(cookie) ((cookie) & 0x3FFF) -+ -+typedef E4_uint64 E4_MainIntEntry; -+ -+#define E4_MainIntEntrySize sizeof (E4_MainIntEntry) -+ -+/* -+ * The internal databus is 64 bits wide. -+ * All writes to the internal registers MUST be made with 64 bit write operations. -+ * These can be made up of pairs 32 bit writes on the PCI bus. The writes will be -+ * treated as nops if they are performed with two separate 32 bit writes. -+ */ -+typedef volatile struct _E4_DataBusMap -+{ -+ E4_uint64 InputTrans[4][16]; /* 0x000 */ -+ -+ E4_uint64 Dma0TransAddr; /* 0x200 */ -+ E4_DMA Dma0Desc; /* Current Dma0 registers */ /* 0x208 */ -+ -+ E4_uint64 Dma1TransAddr; /* 0x240 */ -+ E4_DMA Dma1Desc; /* Current Dma1 registers */ /* 0x248 */ -+ -+ E4_uint64 Dma0LastPacketSize; /* 0x280 */ -+ E4_uint64 Dma0ThisPacketSize; /* 0x288 */ -+ E4_uint64 Dma0DescSizeInProg; /* 0x290 */ -+ E4_uint64 Dma0BytesToPrefetch; /* 0x298 */ -+ E4_uint64 Dma0PrefetchAddr; /* 0x2a0 */ -+ E4_uint64 EventCountAndType; /* 0x2a8 */ -+ E4_uint64 EventParameters[2]; /* 0x2b0 */ -+ -+ E4_uint64 Dma1LastPacketSize; /* 0x2c0 */ -+ E4_uint64 Dma1ThisPacketSize; /* 0x2c8 */ -+ E4_uint64 Dma1DescSizeInProg; /* 0x2d0 */ -+ E4_uint64 Dma1BytesToPrefetch; /* 0x2d8 */ -+ E4_uint64 Dma1PrefetchAddr; /* 0x2e0 */ -+ E4_Input_Ptrs InputTrapAndFilter; /* 0x2e8 */ -+ E4_uint64 EventAddress; /* 0x2f0 */ -+ E4_QueuePtr MainIntQueuePtrs; /* 0x2f8 */ -+ -+ E4_uint64 Event_Copy[16]; /* 0x300 */ -+ -+ E4_uint64 CommandCopy[7]; /* 0x380 */ -+ E4_uint64 CommandHold; /* 0x3b8 */ -+ -+ E4_uint64 InputQueueDesc[4]; /* 0x3c0 */ -+ -+ /* Run queue Pointers */ -+ E4_uint64 DProcLowPriPtrs; /* 0x3e0 */ -+ E4_uint64 DProcHighPriPtrs; /* 0x3e8 */ -+ E4_uint64 TProcLowPriPtrs; /* 0x3f0 */ -+ E4_uint64 TProcHighPriPtrs; /* 0x3f8 */ -+ -+ E4_uint64 CProcStatus; /* 0x400 */ -+ E4_uint64 TProcStatus; /* 0x408 */ -+ E4_uint64 IProcStatus; /* 0x410 */ -+ E4_uint64 EProcStatus; /* 0x418 */ -+ E4_uint64 DProc0Status; /* 0x420 */ -+ E4_uint64 DProc1Status; /* 0x428 */ -+ E4_Sched_Status SchedStatus; /* 0x430 */ -+ -+ E4_uint64 LoadIProcCntxFilter; /* Will load one of 4 cntx filter regs. Write only */ /* 0x438 */ -+ -+ E4_CommandControl CommandControl; /* 0x440 */ -+ E4_uint64 CommandCacheTestPort; /* 0x448 */ -+ E4_uint64 CommandLowPriRunPtrs; /* 0x450 */ -+ E4_uint64 CommandHighPriRunPtrs; /* 0x458 */ -+ E4_uint64 CommandSchedDataPort[4]; /* 0x460 */ -+ -+ E4_uint64 DmaRouteBuffer[2][2]; /* Write only. Should not be written to. */ /* 0x480 */ -+ E4_uint64 StenRouteBuffer[2]; /* Write only. Should not be written to. */ /* 0x4a0 */ -+ E4_uint64 pad4[0x098 - 0x096]; /* 0x4b0 */ -+ -+ E4_uint64 DmaAlignmentPort[8]; /* Write only. Should only be written to clear the prev reg. */ /* 0x4c0 */ -+ -+ E4_uint64 MmuBlockEntry[8]; /* Used for hash table and chain fetches */ /* 0x500 */ -+ E4_uint64 WriteUnitsTlbLine[3]; /* 0x550 */ -+ E4_uint64 pad5; /* 0x540 */ -+ E4_uint64 WriteTProcTlbLine[3]; /* 0x568 */ -+ E4_uint64 pad6; /* 0x540 */ -+ -+ E4_uint64 MmuTableBasePtrs; /* Both tables packed into a single 64 bit value */ /* 0x580 */ -+ E4_uint64 MmuFaultAndRootCntxPtr; /* Both packed into a single 64 bit value */ /* 0x588 */ -+ E4_uint64 UnitsVAddr; /* 0x590 */ -+ E4_uint64 TProcVAddr; /* 0x598 */ -+ E4_uint64 UnitsCntx; /* 0x5a0 */ -+ E4_uint64 TProcCntx; /* Read only. Writes access VProcCacheWritePort */ /* 0x5a8 */ -+ E4_uint64 FaultAddrReg; /* 0x5b0 */ -+ E4_uint64 FaultTypeAndContextReg; /* 0x5b8 */ -+ -+ E4_uint32 SysControlReg; /* 0x5c0 */ -+ E4_uint32 CacheTagValue; /* 0x5c4 */ -+ E4_uint64 TlbLineValue; /* 0x5c8 */ -+ E4_uint64 SDRamConfigReg; /* 0x5d0 */ -+ E4_uint32 InterruptMask; /* 0x5d8 */ -+ E4_uint32 InterruptReg; /* 0x5dc */ -+ E4_uint64 SDRamECCStatus; /* 0x5e0 */ -+ E4_uint32 LinkControlReg; /* 0x5e8 */ -+ E4_uint32 LinkContSettings; /* 0x5ec */ -+ E4_uint64 LinkPortKey; /* 0x5f0 */ -+ E4_uint64 LinkPortLock; /* 0x5f8 */ -+ -+ E4_uint64 SDRamWriteBuffer[4][8]; /* 0x600 */ -+ E4_uint64 SDRamReadBuffer[4][8]; /* 0x700 */ -+ -+ E4_uint64 TProcRegs[64]; /* 0x800 */ -+ E4_uint64 TProcStartUp[8]; /* Not to be used except by the elan itself */ /* 0xa00 */ -+ -+ E4_uint64 LoadPending; /* 0xa40 */ -+ E4_uint64 StortPending; /* 0xa48 */ -+ E4_uint64 DirtyBits; /* 0xa50 */ -+ E4_uint64 BadBits; /* 0xa58 */ -+ -+ E4_uint64 ICachePort_Cntl_Addr; /* 0xa60 */ -+ E4_uint64 Thread_Trap_State; /* 0xa68 */ -+ -+/* Instruction buffer (4 * 32 bit words) */ -+ E4_uint64 nPC_W; /* 0xa70 */ -+ E4_uint64 PC_W; /* 0xa78 */ -+ -+ E4_uint64 ICacheFillData[8]; /* 0xa80 */ -+ E4_uint64 ICachePort[8]; /* 0xac0 */ -+ -+ E4_uint64 PciDataBufs[4][8]; /* 0xb00 */ -+ -+ E4_uint64 CommandQueueBuffer[128]; /* 0xc00 */ -+} E4_DataBusMap; -+ -+/* -+ * These macros are used to setup the thread pcoessors ICache. -+ */ -+#define E4_ICacheTagAddrShift 6 -+#define E4_AccessICacheRams 1 -+#define E4_InvalidTagValue 0xffffffffffffffffULL -+#define E4_ICacheSizeInBytes (1024*16) -+#define E4_ICacheLineSizeInBytes (64) -+#define E4_ICacheLines (E4_ICacheSizeInBytes/E4_ICacheLineSizeInBytes) -+#define E4_ICachePortSize ( (sizeof((E4_DataBusMap *) 0)->ICachePort) / \ -+ (sizeof((E4_DataBusMap *) 0)->ICachePort[0])) -+ -+#define E4_ICacheFixupInsn 0xc0b02f95ull /* st1 [%r0 + 0xf95] */ -+#define E4_ICacheFixupAddr 0xf95ull -+#define E4_ICacheFixupOffset 0xfc0 -+ -+/* -+ * Event interrupt -+ */ -+typedef volatile union _E4_EventInt -+{ -+ E4_uint64 ForceAlign; -+ struct { -+ E4_uint32 IntCookie; -+ E4_uint32 EventContext; /* Bits 16 to 28 */ -+ } s; -+} E4_EventInt; -+ -+/* -+ * The following are used to interpret a fault status register. -+ */ -+ -+/* -+ * FSR[14:0] - AccessType -+ * -+ * T = Type bit -+ * S = size bit. Size is in units of 64 bits or 8 bytes. -+ * E = Byte end pointer. Used to define the last written byte of the last 64 bits written. -+ * D = Data type bit. Used for endian conversion in the PCI interface. -+ * C = Used by the cache to decide if this access should allocate a cache line. -+ * d = Set if dma read or write data data. This is used to guarantee order at the PCI interface. -+ * A = Access type used to check permissions by the MMU in a virtual access. -+ * P = Part Write. If set some byte enables may be used. Effects the action of a cache miss. -+ */ -+ -+/* FSR[7:0] */ -+/* bit 7 => virtual write */ -+#define AT_VirtualWriteAccBit (1 << 7) /* AAADDdC1EEESSSS = Virtual Write */ -+#define AT_VirtualWriteSizeMask 0xf /* size of write access (0 => 128 bytes) */ -+#define AT_VirtualWriteEndPtrShift 4 /* end byte pointer for part write block */ -+#define AT_VirtualWriteEndPtrMask 0x7 -+ -+/* else bit 6 => virtual read */ -+#define AT_VirtualReadAccBit (1 << 6) /* AAADDdC01SSSSSS = Virtual Read */ -+#define AT_VirtualReadSizeMask 0x3f /* size of read access (0 => 512 bytes) */ -+ -+/* else => special access */ -+#define AT_SelBitsMask 0xf /* Bits to select the type of acces from */ -+#define AT_SelBitsShift 0x4 -+#define AT_SpecialRd (0x0 << 4) /* AAADDdC0000TTTT = Special read Access */ -+#define AT_SpecialWr (0x1 << 4) /* AAADDdC0001TTTT = Special write Access */ -+#define AT_PhysicalRd (0x2 << 4) /* AAADDdC00100SSS = Physical Read */ -+#define AT_PhysicalWr (0x3 << 4) /* AAADDdC0011PSSS = Physical write */ -+ -+#define AT_OtherSizeMask 0xf /* Size bits used by all other accesses. 0=128 bytes */ -+#define AT_SpecialBitsMask 0xf /* Bits used to define the special access types */ -+#define AT_CacheSizeBitsMask 0x7 /* Size bits used for local accesses. 0=64 */ -+#define AT_CachePhysPartWriteBit 0x8 /* This bit is set if the access is a part write to the cache */ -+ -+/* Special memory access operations */ -+#define AT_RegAccess 0x0 -+#define AT_GetCntxFilter 0xe /* Only used by special reads */ -+#define AT_RouteFetch 0xf /* Only used by special reads */ -+ -+/* FSR[9:8] */ -+#define AT_NonAlloc (1 << 8) /* 1=Do not fill cache with this data */ -+#define AT_DmaData (1 << 9) /* This is a DMA read access. Required to guarantee dma read order. */ -+ -+/* FSR[11:10] - Data Type - defines data type for endian conversion in PCI interface*/ -+#define AT_BlkDataTyMask 0x3 -+#define AT_BlkDataTyShift 10 -+ -+#define AT_BlkDataType(FSR) (((FSR) >> AT_BlkDataTyShift) & AT_BlkDataTyMask) -+#define AT_TypeByte 0x0 -+#define AT_TypeHWord 0x1 -+#define AT_TypeWord 0x2 -+#define AT_TypeDWord 0x3 -+ -+/* FSR[14:12] - Access Permissions */ -+#define AT_PermBitsMask 0x7 -+#define AT_PermBitsShift 12 -+ -+#define AT_Perm(FSR) (((FSR) >> AT_PermBitsShift) & AT_PermBitsMask) -+#define AT_PermLocalDataRead 0x0 -+#define AT_PermLocalDataWrite 0x1 -+#define AT_PermRemoteRead 0x2 -+#define AT_PermRemoteWrite 0x3 -+#define AT_PermExecute 0x4 -+#define AT_PermLocalEvent 0x5 -+#define AT_PermRemoteEvent 0x7 -+ -+/* FSR[22:15] - reason for fault */ -+ -+#define FSR_WalkForThread (1 << 15) /* The thread processor caused the fault */ -+#define FSR_Walking (1 << 16) /* The fault was caused during a hash table access */ -+#define FSR_NoTranslationsFound (1 << 17) /* The hash table did not contain a matching tag */ -+#define FSR_WalkingProtectionFault (1 << 18) /* A protection fault was detected while walking */ -+#define FSR_HashTable1 (1 << 19) /* Was accessing hash table 1 not 0 */ -+#define FSR_RouteVProcErr (1 << 20) /* This is an invalid vproc for a route fetch */ -+#define FSR_FaultForBadData (1 << 21) /* Bad data (double bit ECC error) while performing a walk access */ -+#define FSR_FaultForMaxChainCount (1 << 22) /* The Elan4 has walked a chain of 1024 items. */ -+ -+typedef volatile struct _E4_FaultSave -+{ -+ E4_uint64 FSRAndFaultContext; /* Bits 0-31 : FaultContext. Bits 32-63 : FaultStatus Register */ -+ E4_uint64 FaultAddress; -+} E4_FaultSave; -+ -+#define FaultSaveContext(FSRAndFaultContext) ((E4_uint32) ((FSRAndFaultContext) & 0xFFFFFFFF)) -+#define FaultSaveFSR(FSRAndFaultContext) ((E4_uint32) ((FSRAndFaultContext) >> 32)) -+ -+typedef union E4_TrTypeCntx -+{ -+ E4_uint32 TypeContext; -+ struct -+ { -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 Type:16; /* Transaction type field */ -+ E4_uint32 Context:13; /* Transaction context */ -+ E4_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E4_uint32 StatusRegValid:1; /* Bit 30 */ -+ E4_uint32 LastTrappedTrans:1; /* Bit 31 */ -+#else -+ E4_uint32 LastTrappedTrans:1; /* Bit 31 */ -+ E4_uint32 StatusRegValid:1; /* Bit 30 */ -+ E4_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E4_uint32 Context:13; /* Transaction context */ -+ E4_uint32 Type:16; /* Transaction type field */ -+#endif -+ } s; -+} E4_TrTypeCntx; -+ -+#define MAX_TRAPPED_TRANS 28 -+#define TRANS_DATA_DWORDS 16 -+#define TRANS_DATA_BYTES 128 -+#define NO_OF_INPUT_CHANNELS 4 -+ -+#define CH0_LOW_PRI_CHAN 0 -+#define CH1_LOW_PRI_CHAN 1 -+#define CH0_HIGH_PRI_CHAN 2 -+#define CH1_HIGH_PRI_CHAN 3 -+ -+/* Words have been swapped for big endian access when fetched with dword access from elan.*/ -+typedef struct _E4_IprocTrapHeader -+{ -+ E4_uint64 TrAddr; -+ E4_uint64 IProcStatusCntxAndTrType; -+} E4_IprocTrapHeader; -+ -+typedef struct _E4_IprocTrapData -+{ -+ E4_uint64 Data[TRANS_DATA_DWORDS]; -+} E4_IprocTrapData; -+ -+/* -+ * This struct defines the trap state for the inputers. It requires a contiguous 16K byte block of local memory. -+ * The channel bits have been grouped to the low end of the address to force all Identify cookies to use the -+ * same cache line. -+ */ -+typedef struct _E4_IprocTrapState -+{ -+ E4_IprocTrapData TrData[MAX_TRAPPED_TRANS][NO_OF_INPUT_CHANNELS]; -+ E4_IprocTrapHeader TrHeader[MAX_TRAPPED_TRANS][NO_OF_INPUT_CHANNELS]; -+ E4_uint64 pad[8*NO_OF_INPUT_CHANNELS]; -+} E4_IprocTrapState; -+ -+/* -+ * 64 kbytes of elan local memory. Must be aligned on a 64k boundary -+ */ -+#define E4_LowPriQueueSize 0x400 -+#define E4_HighPriQueueSize 0x100 -+ -+typedef struct _E4_FaultSaveArea -+{ -+ E4_FaultSave TProcData[8]; -+ E4_FaultSave TProcInst; -+ E4_FaultSave Dummy[7]; -+ E4_FaultSave SchedProc; -+ E4_FaultSave DProc; -+ E4_FaultSave EventProc; -+ E4_FaultSave IProc; -+ E4_FaultSave DProcData[4]; -+ E4_FaultSave QReadData[8]; -+} E4_FaultSaveArea; -+ -+/* Macros to manipulate event queue pointers */ -+/* generate index in EventIntQueue */ -+#define E4_EVENT_INTQ_INDEX(fptr) (((fptr) & 0x1fff) >> 3) -+/* generate next fptr */ -+#define E4_EVENT_INTQ_NEXT(fptr) ((((fptr) + 8) & ~0x4000) | 0x2000) -+ -+typedef struct _E4_CommandPort -+{ -+ volatile E4_uint64 Command[1024]; /* a whole 8k page */ -+} E4_CommandPort; -+ -+/* -+ * This is the allocation of unit numbers within the ELAN. It is used to extract the fault address -+ * and fault type after a unit has trapped on a memory fetch. Only units that can generate traps -+ * have been included. -+ */ -+#define CUN_TProcData0 0x00 -+#define CUN_TProcData1 0x01 -+#define CUN_TProcData2 0x02 -+#define CUN_TProcData3 0x03 -+#define CUN_TProcData4 0x04 -+#define CUN_TProcData5 0x05 -+#define CUN_TProcData6 0x06 -+#define CUN_TProcData7 0x07 -+#define CUN_TProcInst 0x08 -+ -+/* memory current unit numbers -+ * TProc data bus */ -+#define CUN_DProcPA0 0x10 -+#define CUN_DProcPA1 0x11 -+#define CUN_DProcPrefetch 0x12 -+#define CUN_CommandProc 0x13 -+#define CUN_DProcData0 0x14 /* Dma prefetch reads. */ -+#define CUN_DProcData1 0x15 /* Dma prefetch reads. */ -+#define CUN_DProcData2 0x16 /* Dma prefetch reads. */ -+#define CUN_DProcData3 0x17 /* Dma prefetch reads. */ -+ -+#define CUN_IProcLowPri 0x18 -+#define CUN_IProcHighPri 0x19 -+#define CUN_Spare0 0x1A -+#define CUN_Spare1 0x1B -+#define CUN_Spare2 0x1C -+#define CUN_ThreadQueue 0x1D -+#define CUN_EventProc0 0x1e -+#define CUN_EventProc1 0x1f -+ -+#define CUN_Entries 0x20 -+ -+typedef struct E4_Registers -+{ -+ E4_CacheTags Tags; /* 4k bytes c000 -> cfff */ -+ E4_DataBusMap Regs; /* 4k bytes d000 -> dfff */ -+ E4_User_Regs uRegs; /* 8k bytes e000 -> ffff */ -+} E4_Registers; -+ -+#define I2cCntl_I2cPortWrite (0 << 0) -+#define I2cCntl_I2cPortRead (1 << 0) -+#define I2cCntl_I2cPortGenStopBit (1 << 1) -+#define I2cCntl_I2cPortGenRestartBit (1 << 2) -+#define I2cCntl_I2cPortAccFailed (1 << 3) -+#define I2cCntl_I2cStopped (1 << 4) -+#define I2cCntl_I2cWakeupFailed (1 << 5) -+#define I2cCntl_I2cFastMode (1 << 6) -+#define I2cCntl_I2cPortBusy (1 << 7) -+ -+#define I2cCntl_LedI2cRegBase_Mask 0x7f -+#define I2cCntl_I2cUpdatingLedReg (1 << 7) -+ -+#define I2cCntl_InvertLedValues (1 << 0) /* read/write */ -+#define I2cCntl_LedRegWriteFailed (1 << 1) /* read only */ -+#define I2cCntl_EEPromLoadFailed (1 << 2) /* read only */ -+#define I2cCntl_InhibitI2CRom (1 << 3) /* read only */ -+#define I2cCntl_BadRomCrc (1 << 4) /* read only */ -+#define I2cCntl_MapInI2cConfigData (1 << 5) /* read/write */ -+#define I2cCntl_SampleNewLedValues (1 << 6) /* read/write */ -+#define I2cCntl_ClearLinkError (1 << 7) /* write only */ -+ -+typedef struct E4_I2C -+{ -+ volatile E4_uint8 I2cWrData; -+ volatile E4_uint8 I2cRdData; -+ volatile E4_uint8 I2cPortControl; -+ volatile E4_uint8 I2cLedBase; -+ volatile E4_uint8 I2cStatus; -+ volatile E4_uint8 I2cLedsValue; -+ volatile E4_uint16 I2cPad; -+ -+ E4_uint8 pad[256 - sizeof(E4_uint64)]; -+ -+ E4_uint8 UnchangedElan4ConfigRegs[256]; -+ E4_uint8 I2cRomConfigShadowValues[256]; -+ E4_uint8 ChangedElan4ConfigRegs[256]; -+} E4_I2C; -+ -+typedef struct _E4_ContextControlBlock -+{ -+ E4_uint32 Filter; /* Use a Network context to index for this value */ -+ E4_uint32 VirtualProcessTable; /* Use a local context to index for this value */ -+} E4_ContextControlBlock; -+ -+/* -+ * Filter -+ * [13:0] Context -+ * [14] DiscardAll -+ * [15] AckAll -+ * [16] HighPri -+ * [17] CountStats -+ * [31:18] Unused -+ */ -+#define E4_FILTER_STATS (1 << 17) -+#define E4_FILTER_HIGH_PRI (1 << 16) -+#define E4_FILTER_ACKOK_ALL (1 << 15) -+#define E4_FILTER_DISCARD_ALL (1 << 14) -+#define E4_FILTER_CONTEXT_MASK (0x3FFF) -+ -+/* -+ * VirtualProcessTable -+ * [8:0] Unused -+ * [12:9] Size num vp entries = 512 << Size -+ * [30:13] Pointer -+ * [31] Valid -+ */ -+#define E4_VPT_MIN_ENTRIES 512 -+#define E4_VPT_VALID ((unsigned)1 << 31) -+#define E4_VPT_PTR_SHIFT 0 -+#define E4_VPT_SIZE_SHIFT 9 -+#define E4_VPT_SIZE_MASK 0xf -+#define E4_VPT_NUM_VP(vpt_val) (E4_VPT_MIN_ENTRIES << (((vpt_val) >> E4_VPT_SIZE_SHIFT) & E4_VPT_SIZE_MASK)) -+#define E4_VPT_VALUE(ptr,size) (((ptr) << E4_VPT_PTR_SHIFT) | ((size) << E4_VPT_SIZE_SHIFT)) -+ -+ -+/* Virtual Process Table */ -+typedef struct _E4_VirtualProcessEntry -+{ -+ E4_uint64 Values[2]; -+} E4_VirtualProcessEntry; -+ -+/* -+ * Entries have the following format - rtX is a packed route -+ * -+ * |rt11|rt10|rt9 |rt8 |rt7 |rt6 |rt5 |rt4 |rt3 |rt2 |rt2 |rt0 |PAAADD RRRRRR| -+ * |output context |rt23|rt22|rt21|rt20|rt19|rt18|rt17|rt16|rt15|rt14|rt13|rt12| -+ */ -+ -+#define ROUTE_CTXT_SHIFT 48 -+#define ROUTE_CTXT_MASK (~((1ull << ROUTE_CTXT_SHIFT)-1)) -+#define ROUTE_CTXT_VALUE(ctx) (((E4_uint64) ctx) << ROUTE_CTXT_SHIFT) -+ -+#define ROUTE_PACKED_OFFSET 16 -+#define ROUTE_NUM_PACKED 24 -+ -+/* defines for first flit of a route */ -+#define FIRST_TIMEOUT(Val) ((Val) << 14) /* [15:14] */ -+#define FIRST_SYSTEM_PACKET (1 << 13) /* [13] */ -+#define FIRST_FLOOD_PACKET (1 << 12) /* [12] */ -+#define FIRST_HIGH_PRI (1 << 11) /* [11] */ -+#define FIRST_AGE(Val) ((Val) << 7) /* [10:7] */ -+#define FIRST_OPTIONS_MASK (0xFF80) -+ -+/* [6:0] unpacked 1st route value */ -+#define FIRST_INVALID (0) -+#define FIRST_ROUTE(Val) (0x08 | (Val)) -+#define FIRST_ADAPTIVE (0x30) -+#define FIRST_BCAST_TREE (0x20) -+#define FIRST_MYLINK (0x10) -+#define FIRST_BCAST(Top, Bot) (0x40 | ((Top) << 3) | (Bot)) -+ -+/* defines for 3 bit packed entries for subsequent flits */ -+#define PACKED_INVALID (0) -+#define PACKED_ROUTE(Val) (8 | (Val)) -+#define PACKED_ADAPTIVE (3) -+#define PACKED_BCAST_TREE (2) -+#define PACKED_MYLINK (1) -+#define PACKED_BCAST0(Top,Bot) (4 | (Bot & 3)) -+#define PACKED_BCAST1(Top,Bot) ((Top << 1) | (Bot >> 2)) -+ -+#endif /* _ASM */ -+/* The MMU root context pointer has a mask to bounds check -+ * it - this is computed as follows. -+ */ -+#define E4_CONTEXT_MASK(num) (((num) >= 0x2000) ? 0x00 : \ -+ ((num) >= 0x1000) ? 0x80 : \ -+ ((num) >= 0x0800) ? 0xc0 : \ -+ ((num) >= 0x0400) ? 0xe0 : \ -+ ((num) >= 0x0200) ? 0xf0 : \ -+ ((num) >= 0x0100) ? 0xf8 : \ -+ ((num) >= 0x0080) ? 0xfc : \ -+ ((num) >= 0x0040) ? 0xfe : 0xff) -+/* -+ * This generates the size field for a virtual process table. -+ * Size defined as 2^n no of 8K pages. -+ * Single cycle route fetches are possible if the minimum vproc table size is 8k. -+ */ -+#define E4_GEN_VPT_SIZE(Size) (((Size) & E4_VPT_SIZE_MASK) << E4_VPT_SIZE_SHIFT) -+ -+#define COMMAND_RUN_QUEUE_BITS (13 + 2) /* 8K entries of 4 bytes. This is fixed in hardware. */ -+#define COMMAND_DESCS_SPACE_BITS (13 + 5) /* 8K entries of 32 bytes. This is fixed in hardware. */ -+#define COMMAND_INSERTER_CACHE_ENTRIES 16 -+ -+#define COM_TEST_PORT_ADDR_MASK 0xfULL -+#define COM_TEST_PORT_ADDR_SH 0 -+ -+/* -+ * The flush register is accessed through the CommandControl register. -+ * The address is naturally alligned. It also positions the command descriptors in memory. -+ * When no command queues need flushing it should be or with COM_FLUSH_INVALID. This sets -+ * it to the top command queue descriptor. This cannot be accessed from the PCI. -+ */ -+#define COM_ENABLE_DEQUEUE (1 << 4) -+#define COM_FLUSH_DESCRIPTOR_MASK 0x7fffffe0ULL -+#define COM_FLUSH_INVALID 0x0003ffe0ULL -+ -+ -+/* -+ * Elan4 BAR1 is split up as follows : -+ * -+ * RevA -+ * 0x3f00000 EBUS other -+ * 0x3e00000 EBUS ROM -+ * 0x3dfc000 registers -+ * 0x0000000 command ports -+ * -+ * RevB -+ * 0x3ffc000 registers -+ * 0x3ff8000 padding -+ * 0x3ff6000 i2c registers -+ * 0x0000000 command ports -+ */ -+#define ELAN4_BAR1_SIZE (1 << 26) /* 64M */ -+#define ELAN4_REG_SIZE (1 << 14) /* 16K */ -+ -+#define ELAN4_REVA_EBUS_SIZE (1 << 21) /* 2M */ -+#define ELAN4_REVA_EBUS_OFFSET (ELAN4_BAR1_SIZE - ELAN4_REVA_EBUS_SIZE) -+#define ELAN4_REVA_REG_OFFSET (ELAN4_REVA_EBUS_OFFSET - ELAN4_REG_SIZE) -+#define ELAN4_REVA_NUM_COMMAND_QUEUES (ELAN4_REVA_REG_OFFSET >> 13) -+ -+#define ELAN4_REVA_EBUS_ROM_SIZE (1 << 20) /* 1M */ -+#define ELAN4_REVA_EBUS_ROM_OFFSET 0 -+ -+#define ELAN4_REVB_I2C_PADDING (1 << 14) /* 16K */ -+#define ELAN4_REVB_I2C_SIZE (1 << 13) /* 8k */ -+#define ELAN4_REVB_REG_OFFSET (ELAN4_BAR1_SIZE - ELAN4_REG_SIZE) -+#define ELAN4_REVB_I2C_OFFSET (ELAN4_REVB_REG_OFFSET - ELAN4_REVB_I2C_PADDING - ELAN4_REVB_I2C_SIZE) -+#define ELAN4_REVB_NUM_COMMAND_QUEUES (ELAN4_REVB_I2C_OFFSET >> 13) -+ -+#endif /* notdef _ELAN4_REGISTERS_H */ -diff -urN clean/include/elan4/sdram.h linux-2.6.9/include/elan4/sdram.h ---- clean/include/elan4/sdram.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/sdram.h 2003-09-24 09:55:55.000000000 -0400 -@@ -0,0 +1,41 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_SDRAM_H -+#define __ELAN4_SDRAM_H -+ -+#ident "$Id: sdram.h,v 1.8 2003/09/24 13:55:55 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/sdram.h,v $*/ -+ -+/* Include header file generated by sdram configuration program */ -+#include -+ -+/* SDRAM bank shift definitions */ -+#define SDRAM_0_CS_SHIFT 25 -+#define SDRAM_1_CS_SHIFT 27 -+#define SDRAM_2_CS_SHIFT 28 -+#define SDRAM_3_CS_SHIFT 29 -+ -+#define SDRAM_BANK_SHIFT(cfg) \ -+ (((cfg >> SDRAM_RamSize_SH) & 3) == 0 ? SDRAM_0_CS_SHIFT : \ -+ ((cfg >> SDRAM_RamSize_SH) & 3) == 1 ? SDRAM_1_CS_SHIFT : \ -+ ((cfg >> SDRAM_RamSize_SH) & 3) == 2 ? SDRAM_2_CS_SHIFT : SDRAM_3_CS_SHIFT) -+ -+#define SDRAM_BANK_SIZE(cfg) (1ULL << SDRAM_BANK_SHIFT(cfg)) -+#define SDRAM_BANK_OFFSET(cfg,bank) ((unsigned long long)(bank) << SDRAM_BANK_SHIFT(cfg)) -+#define SDRAM_NUM_BANKS(cfg) (4) -+#define SDRAM_MAX_BANKS 4 -+ -+/* When the elan access sdram it passes eaddr[12] as sdramaddr[12] when -+ * running with a 4k page size, however PCI accesses pass paddr[12], so -+ * we must ensure that sdram pages are allocated such that eaddr[12] is the -+ * same as paddr[12] - the easiest way is to allocate sdram in 8k chunks and -+ * ensure that maddr[12] == eaddr[12] == pgoff[0] */ -+#define SDRAM_MIN_PAGE_SIZE (8192) -+ -+#endif /* __ELAN4_SDRAM_H */ -diff -urN clean/include/elan4/stats.h linux-2.6.9/include/elan4/stats.h ---- clean/include/elan4/stats.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/stats.h 2005-04-19 12:14:52.000000000 -0400 -@@ -0,0 +1,83 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.h,v 1.12 2005/04/19 16:14:52 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/stats.h,v $*/ -+ -+#ifndef __ELAN4_STATS_H -+#define __ELAN4_STATS_H -+ -+#define ELAN4_DEV_STATS_BUCKETS 8 -+ -+ -+typedef struct elan4_dev_stats -+{ -+ unsigned long s_interrupts; -+ -+ unsigned long s_mainints[ELAN4_DEV_STATS_BUCKETS]; -+ unsigned long s_mainint_punts; -+ unsigned long s_mainint_rescheds; -+ -+ unsigned long s_haltints; -+ -+ unsigned long s_cproc_traps; -+ unsigned long s_dproc_traps; -+ unsigned long s_eproc_traps; -+ unsigned long s_iproc_traps; -+ unsigned long s_tproc_traps; -+ -+ unsigned long s_cproc_trap_types[0x10]; -+ unsigned long s_dproc_trap_types[7]; -+ unsigned long s_eproc_trap_types[4]; -+ unsigned long s_iproc_trap_types[0xa]; -+ unsigned long s_tproc_trap_types[7]; -+ -+ unsigned long s_correctable_errors; -+ unsigned long s_multiple_errors; -+ -+ unsigned long s_link_errors; -+ unsigned long s_lock_errors; -+ unsigned long s_deskew_errors; -+ unsigned long s_phase_errors; -+ unsigned long s_data_errors; -+ unsigned long s_fifo_overflow0; -+ unsigned long s_fifo_overflow1; -+ unsigned long s_mod45changed; -+ unsigned long s_pack_not_seen; -+ unsigned long s_linkport_keyfail; -+ -+ unsigned long s_eop_reset; -+ unsigned long s_bad_length; -+ unsigned long s_crc_bad; -+ unsigned long s_crc_error; -+ -+ unsigned long s_cproc_timeout; -+ unsigned long s_dproc_timeout; -+ -+ unsigned long s_sdram_bytes_free; -+} ELAN4_DEV_STATS; -+ -+#define MainIntBuckets ((int[ELAN4_DEV_STATS_BUCKETS-1]) {1, 2, 3, 4, 8, 16, 32}) -+ -+#define BumpDevStat(dev,stat) ((dev)->dev_stats.stat++) -+#define BucketDevStat(dev,stat,n,bucket) ((n) <= (bucket)[0] ? (dev)->dev_stats.stat[0]++ : \ -+ (n) <= (bucket)[1] ? (dev)->dev_stats.stat[1]++ : \ -+ (n) <= (bucket)[2] ? (dev)->dev_stats.stat[2]++ : \ -+ (n) <= (bucket)[3] ? (dev)->dev_stats.stat[3]++ : \ -+ (n) <= (bucket)[4] ? (dev)->dev_stats.stat[4]++ : \ -+ (n) <= (bucket)[5] ? (dev)->dev_stats.stat[5]++ : \ -+ (n) <= (bucket)[6] ? (dev)->dev_stats.stat[6]++ : \ -+ (dev)->dev_stats.stat[7]++) -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /*__ELAN4_STATS_H */ -diff -urN clean/include/elan4/tprintf.h linux-2.6.9/include/elan4/tprintf.h ---- clean/include/elan4/tprintf.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/tprintf.h 2003-09-04 08:39:17.000000000 -0400 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_TPRINTF_H -+#define __ELAN4_TPRINTF_H -+ -+#ident "$Id: tprintf.h,v 1.6 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/tprintf.h,v $*/ -+ -+ -+#ifdef _ASM -+#define TPRINTF0(string) add %r0, __LINE__, %r0 -+#define TPRINTF1(string,reg) add reg, __LINE__, %r0 -+#else -+#define TPRINTF0(string) asm volatile ("add %%r0, %0, %%r0" : : "i" (__LINE__)) -+#define TPRINTF1(string, value) asm volatile ("add %0, %1, %%r0" : : "r" (value), "i" (__LINE__)) -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_TPRINTF_H */ -diff -urN clean/include/elan4/trap.h linux-2.6.9/include/elan4/trap.h ---- clean/include/elan4/trap.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/trap.h 2003-10-07 08:11:10.000000000 -0400 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: trap.h,v 1.10 2003/10/07 12:11:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/trap.h,v $*/ -+ -+#ifndef __ELAN4_TRAP_H -+#define __ELAN4_TRAP_H -+ -+/* -+ * If the EProc Faults whilst performing an action (e.g. Read/Write on the data src or dest Addr) -+ * the Eproc increments the Addr(s) by a block size (64 bytes): -+ * 1: Fault on Read: -+ * Src EventAddr = Read Addr + block -+ * 2: Fault on Write: -+ * Src EventAddr = Read Addr + block -+ * Dst EventAddr = Read Addr + block -+ * Size = Size - block ndwords -+ * We must rewind the addr correctly to completely the transfer successfully -+ */ -+#define EVENT_COPY_NDWORDS 0x8 -+#define EVENT_COPY_BLOCK_SIZE 0x40 -+ -+typedef struct elan4_eproc_trap -+{ -+ E4_uint64 tr_status; -+ E4_FaultSave tr_faultarea; -+ E4_Event tr_event; -+ E4_Addr tr_eventaddr; -+} ELAN4_EPROC_TRAP; -+ -+typedef struct elan4_cproc_trap -+{ -+ E4_uint64 tr_status; /* cproc status register */ -+ E4_uint64 tr_command; /* cproc command */ -+ E4_CommandQueueDesc tr_qdesc; /* copy of command queue descriptor */ -+ E4_FaultSave tr_faultarea; /* fault area for mmu traps */ -+ ELAN4_EPROC_TRAP tr_eventtrap; /* associated event trap (waitevent) */ -+} ELAN4_CPROC_TRAP; -+ -+typedef struct elan4_dproc_trap -+{ -+ E4_DMA tr_desc; -+ E4_FaultSave tr_packAssemFault; -+ E4_FaultSave tr_prefetchFault; -+ E4_uint64 tr_status; -+} ELAN4_DPROC_TRAP; -+ -+typedef struct elan4_tproc_trap -+{ -+ E4_uint64 tr_regs[64]; -+ E4_FaultSave tr_dataFault; -+ E4_FaultSave tr_instFault; -+ E4_uint64 tr_status; -+ E4_uint64 tr_state; -+ E4_Addr tr_pc; -+ E4_Addr tr_npc; -+ E4_uint64 tr_dirty; -+ E4_uint64 tr_bad; -+} ELAN4_TPROC_TRAP; -+ -+typedef struct elan4_iproc_trap -+{ -+ E4_uint32 tr_numTransactions; -+ E4_uint32 tr_flags; -+ E4_uint32 tr_trappedTrans; -+ E4_uint32 tr_waitForEopTrans; -+ E4_uint32 tr_identifyTrans; -+ E4_uint32 tr_pad; -+ -+ E4_FaultSave tr_faultarea; -+ E4_IprocTrapHeader tr_transactions[MAX_TRAPPED_TRANS]; -+ E4_IprocTrapData tr_dataBuffers[MAX_TRAPPED_TRANS]; -+} ELAN4_IPROC_TRAP; -+ -+#define TR_FLAG_ACK_SENT (1 << 0) -+#define TR_FLAG_EOP_ERROR (1 << 1) -+#define TR_FLAG_BAD_TRANS (1 << 2) -+#define TR_FLAG_DMA_PACKET (1 << 3) -+#define TR_FLAG_EOP_BAD (1 << 4) -+#define TR_FLAG_TOOMANY_TRANS (1 << 5) -+ -+#define TR_TRANS_INVALID (0xffffffff) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_TRAP_H */ -diff -urN clean/include/elan4/trtype.h linux-2.6.9/include/elan4/trtype.h ---- clean/include/elan4/trtype.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/trtype.h 2004-02-06 05:38:21.000000000 -0500 -@@ -0,0 +1,112 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_TRTYPE_H -+#define _ELAN4_TRTYPE_H -+ -+#ident "$Id: trtype.h,v 1.20 2004/02/06 10:38:21 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/trtype.h,v $*/ -+ -+/*<15:11> Size field is used to give the number of additional 64 bit data values. -+ A value from 0 to 16 inclusive is valid. */ -+ -+#include -+ -+#define TR_SIZE_SHIFT (11) -+#define TR_SIZE_MASK (0x1f << TR_SIZE_SHIFT) -+#define SET_TR_SIZE(Size) (((Size) << TR_SIZE_SHIFT) & TR_SIZE_MASK) -+ -+/* <10:9> Last Transaction and AckNow bits, marks the last transaction and -+ enables a PACK_OK to be sent. */ -+#define TR_LAST_AND_SEND_ACK (3 << 9) -+ -+ -+/* <8> Only valid on the last transaction. Delays execution until an EOP_GOOD is received. -+ * Any other EOP type will abort execution of this transaction. */ -+#define TR_WAIT_FOR_EOP (1 << 8) -+ -+/* -+ * Data type. This is used by transactions of variable data type. It controls any endian -+ * converion required if the destiantion host processor has a big endian memory format. -+ */ -+/* WriteBlock <8:7> Data type -+ <6:0> Part write size */ -+#define TR_DATATYPE_SHIFT (6) -+#define TR_DATATYPE_MASK ((1 << 2) - 1) -+ -+#define TR_DATATYPE_BYTE E4_DATATYPE_BYTE -+#define TR_DATATYPE_SHORT E4_DATATYPE_SHORT -+#define TR_DATATYPE_WORD E4_DATATYPE_WORD -+#define TR_DATATYPE_DWORD E4_DATATYPE_DWORD -+ -+/* <5:0> Transaction Type -+ * For Writeblock <5:3> 000 => Write, 0001 => Read -+ * <2:0> End Byte Addr */ -+#define TR_OPCODE_MASK 0x3F -+#define TR_BLOCK_OPCODE_MASK 0x38 -+ -+#define TR_WRITEBLOCK 0x0 -+#define TR_ENDBYTE_MASK 0x7 -+#define TR_WRITE(Size, EndByte, DataType) \ -+ (0x0 | SET_TR_SIZE(Size) | ((EndByte) & TR_ENDBYTE_MASK) | \ -+ (((DataType) & TR_DATATYPE_MASK) << TR_DATATYPE_SHIFT)) -+ -+#define TR_NOP_TRANS (0x10 | SET_TR_SIZE(0)) -+#define TR_SETEVENT 0x10 -+#define TR_SETEVENT_NOIDENT (TR_SETEVENT | SET_TR_SIZE(0) | TR_LAST_AND_SEND_ACK) -+#define TR_SETEVENT_IDENTIFY (TR_SETEVENT | SET_TR_SIZE(1) | TR_LAST_AND_SEND_ACK) -+#define TR_REMOTEDMA (0x11 | SET_TR_SIZE(7) | TR_LAST_AND_SEND_ACK) -+#define TR_SENDDISCARD (0x12 | SET_TR_SIZE(0)) -+ -+/* -+ * Conditional transactions that might return PAckTestFail. -+ * All will allow further exection of the packet if ([Address] operator DataValue) is true. -+ * e.g. for TR_GTE further execution if ([Address] >= DataValue) is true. -+ * These should be used where a definite TRUE/FALSE answer is required. -+ */ -+#define TR_GTE (0x14 | SET_TR_SIZE(1)) -+#define TR_LT (0x15 | SET_TR_SIZE(1)) -+#define TR_EQ (0x16 | SET_TR_SIZE(1)) -+#define TR_NEQ (0x17 | SET_TR_SIZE(1)) -+ -+/* -+ * Conditional transactions that might return PAckDiscard. -+ * All will allow further exection of the packet if ([Address] operator DataValue) is true. -+ * e.g. for TR_GTE further execution if ([Address] >= DataValue) is true. -+ * These should be used where eventually a TRUE answer is expected but the node might not be ready yet. -+ * These can be mixed with the normal conditionals to allow a single packet to test for readyness and -+ * a TRUE/FALSE answer. -+ */ -+#define TR_GTE_DISCARD (0x34 | SET_TR_SIZE(1)) -+#define TR_LT_DISCARD (0x35 | SET_TR_SIZE(1)) -+#define TR_EQ_DISCARD (0x36 | SET_TR_SIZE(1)) -+#define TR_NEQ_DISCARD (0x37 | SET_TR_SIZE(1)) -+ -+#define TR_TRACEROUTE_TRANS 0x18 -+#define TR_TRACEROUTE(Size) (TR_TRACEROUTE_TRANS | (TR_DATATYPE_WORD << TR_DATATYPE_SHIFT) |SET_TR_SIZE(Size)) -+#define TR_IDENTIFY (0x19 | SET_TR_SIZE(0)) -+ -+#define TR_ADDWORD (0x1c | SET_TR_SIZE(2) | TR_LAST_AND_SEND_ACK) -+#define TR_INPUT_Q_COMMIT (0x1d | SET_TR_SIZE(1) | TR_LAST_AND_SEND_ACK) -+#define TR_TESTANDWRITE (0x1e | SET_TR_SIZE(3) | TR_LAST_AND_SEND_ACK) -+#define TR_INPUT_Q_GETINDEX (0x1f | SET_TR_SIZE(0)) -+ -+ -+ -+/* TraceRoute formate */ -+#define TR_TRACEROUTE0_CHANID(val) ((val) & 1) /* 0 Chan Id */ -+#define TR_TRACEROUTE0_LINKID(val) (((val) >> 1) & 7) /* 1:3 Link Id */ -+#define TR_TRACEROUTE0_REVID(val) (((val) >> 4) & 7) /* 4:6 Revision Id */ -+#define TR_TRACEROUTE0_BCAST_PIN(val) (((val) >> 7) & 1) /* 7 Bcast Top Pin */ -+#define TR_TRACEROUTE0_LNR(val) (((val) >> 8) & 0xFF) /* 8:15 Global Link Not Ready */ -+ -+#define TR_TRACEROUTE1_ROUTES_SELECTED(val) ((val & 0xFF)) /* 0:7 Routes Selected */ -+#define TR_TRACEROUTE1_BCAST_TOP(val) (((val) >> 8) & 7) /* 8:10 Broadcast Top */ -+#define TR_TRACEROUTE1_BCAST_BOTTOM(val) (((val) >> 12) & 7) /* 12:14 Broadcast Bottom */ -+ -+#endif /* _ELAN4_TRANSACTIONTYPE_H */ -diff -urN clean/include/elan4/types.h linux-2.6.9/include/elan4/types.h ---- clean/include/elan4/types.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/types.h 2003-09-04 08:39:17.000000000 -0400 -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_TYPES_H -+#define __ELAN4_TYPES_H -+ -+#ident "@(#)$Id: types.h,v 1.9 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/types.h,v $*/ -+ -+#include -+/* -+ * "flip" values for correctly indexing into -+ * block data which was copied from the Elan -+ * using 64 bit accesses. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+# define ByteEndianFlip 0 -+# define ShortEndianFlip 0 -+# define WordEndianFlip 0 -+#else -+# define ByteEndianFlip 7 -+# define ShortEndianFlip 3 -+# define WordEndianFlip 1 -+#endif -+ -+ -+#ifndef _ASM -+ -+typedef signed int E4_int; -+typedef unsigned int E4_uint; -+ -+typedef signed char E4_int8; -+typedef unsigned char E4_uint8; -+ -+typedef signed short E4_int16; -+typedef unsigned short E4_uint16; -+ -+typedef signed int E4_int32; -+typedef unsigned int E4_uint32; -+ -+#ifdef _LP64 -+typedef signed long E4_int64; -+typedef unsigned long E4_uint64; -+#else -+typedef signed long long E4_int64; -+typedef unsigned long long E4_uint64; -+#endif -+ -+/* 64-bit Elan4 */ -+typedef E4_uint64 E4_Addr; -+typedef E4_uint32 E4_LocPhysAddr; /* Really 31 bits */ -+ -+#define OneK (1024) -+#define EightK (8*OneK) -+ -+#define E4_DATATYPE_BYTE 0 -+#define E4_DATATYPE_SHORT 1 -+#define E4_DATATYPE_WORD 2 -+#define E4_DATATYPE_DWORD 3 -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_TYPES_H */ -+ -diff -urN clean/include/elan4/user.h linux-2.6.9/include/elan4/user.h ---- clean/include/elan4/user.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/user.h 2005-04-21 07:12:06.000000000 -0400 -@@ -0,0 +1,347 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user.h,v 1.45 2005/04/21 11:12:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user.h,v $*/ -+ -+#ifndef __ELAN4_USER_H -+#define __ELAN4_USER_H -+ -+#include -+#include -+#include -+ -+typedef struct trap_queue -+{ -+ unsigned q_back; /* Next free space */ -+ unsigned q_front; /* First object to remove */ -+ unsigned q_size; /* Size of queue */ -+ unsigned q_count; /* Current number of entries */ -+ unsigned q_slop; /* FULL <=> (count+slop) == size */ -+} RING_QUEUE; -+ -+#define RING_QUEUE_INIT(q,num,slop) ((q).q_size = (num), (q).q_slop = (slop), (q).q_front = (q).q_back = 0, (q).q_count = 0) -+#define RING_QUEUE_FULL(q) ((q).q_count >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_REALLY_FULL(q) ((q).q_count == (q).q_size) -+#define RING_QUEUE_EMPTY(q) ((q).q_count == 0) -+#define RING_QUEUE_NEXT(q,indx) ((indx) = (((indx)+1) % (q).q_size)) -+#define RING_QUEUE_PREV(q,indx) ((indx) = (((indx)+(q).q_size-1) % (q).q_size)) -+#define RING_QUEUE_ADD(q) (RING_QUEUE_NEXT(q ,(q).q_back), (++(q).q_count) >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_REMOVE(q) (RING_QUEUE_NEXT(q, (q).q_front), (--(q).q_count) == 0) -+#define RING_QUEUE_ADD_FRONT(q) (RING_QUEUE_PREV(q, (q).q_front), (++(q).q_count) >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_ENTRY(qArea,indx) (&(qArea)[(indx)]) -+#define RING_QUEUE_FRONT(q,qArea) RING_QUEUE_ENTRY(qArea, (q).q_front) -+#define RING_QUEUE_BACK(q,qArea) RING_QUEUE_ENTRY(qArea, (q).q_back) -+#define RING_QUEUE_ITERATE(q,idx) for (idx = (q).q_front; idx != (q).q_back; idx = (((idx) + 1) % (q).q_size)) -+ -+typedef struct user_rgn -+{ -+ struct user_rgn *rgn_mnext; /* Doubly linked list of regions */ -+ struct user_rgn *rgn_mprev; /* sorted on main address */ -+ virtaddr_t rgn_mbase; /* main address of base of region */ -+ -+ struct user_rgn *rgn_enext; /* Doubly linked list of regions */ -+ struct user_rgn *rgn_eprev; /* sorted on elan address */ -+ E4_Addr rgn_ebase; /* elan address of base of region */ -+ -+ unsigned long rgn_len; /* length of region */ -+ unsigned rgn_perm; /* elan access permission */ -+} USER_RGN; -+ -+typedef struct user_vpseg -+{ -+ struct list_head vps_link; -+ -+ unsigned short vps_process; /* virtual process number */ -+ unsigned short vps_entries; /* and # virtual processes */ -+ -+ unsigned vps_type; -+ union -+ { -+ struct { -+ ELAN_CAPABILITY *cap; -+ E4_VirtualProcessEntry *routes; -+ } p2p; -+#define vps_p2p_cap vps_u.p2p.cap -+#define vps_p2p_routes vps_u.p2p.routes -+ -+ struct { -+ unsigned short lowvp; -+ unsigned short highvp; -+ } bcast; -+#define vps_bcast_lowvp vps_u.bcast.lowvp -+#define vps_bcast_highvp vps_u.bcast.highvp -+ } vps_u; -+} USER_VPSEG; -+ -+/* values for vps_type */ -+#define USER_VPSEG_P2P 0 -+#define USER_VPSEG_BCAST 1 -+ -+typedef struct user_cq -+{ -+ struct list_head ucq_link; -+ -+ ELAN4_CQ *ucq_cq; /* the real command queue */ -+ -+ unsigned char ucq_state; /* command queue state */ -+ unsigned char ucq_errored; /* command queue has errored */ -+ unsigned char ucq_flags; /* flags */ -+ ELAN4_CPROC_TRAP ucq_trap; /* trap state */ -+ -+ atomic_t ucq_ref; /* # references to this cq (mmaps) */ -+} USER_CQ; -+ -+/* values for ucq_state */ -+#define UCQ_RUNNING 0 /* command queue is running */ -+#define UCQ_TRAPPED 1 /* command queue has trapped */ -+#define UCQ_NEEDS_RESTART 2 /* command queue has trapped, and needs restarting */ -+#define UCQ_STOPPED 3 /* command queue has trapped, and delivered to user */ -+ -+/* values for ucq_flags */ -+#define UCQ_SYSTEM (1 << 0) -+#define UCQ_REORDER (1 << 1) -+ -+extern int num_fault_save; -+extern int min_fault_pages; -+extern int max_fault_pages; -+ -+typedef struct fault_save -+{ -+ struct fault_save *next; -+ E4_Addr addr; -+ E4_uint32 count; -+} FAULT_SAVE; -+ -+typedef struct user_iproc_trap -+{ -+ unsigned char ut_state; -+ ELAN4_IPROC_TRAP ut_trap; -+} USER_IPROC_TRAP; -+ -+/* values for ut_state */ -+#define UTS_IPROC_RUNNING 0 -+#define UTS_IPROC_TRAPPED 1 -+#define UTS_IPROC_RESOLVING 2 -+#define UTS_IPROC_EXECUTE_PACKET 3 -+#define UTS_IPROC_EXECUTING 4 -+#define UTS_IPROC_NETWORK_ERROR 5 -+#define UTS_IPROC_STOPPED 6 -+ -+typedef struct user_ctxt_entry -+{ -+ struct list_head cent_link; /* entry chained on context */ -+ ELAN_CAPABILITY *cent_cap; /* capability we attached with */ -+} USER_CTXT_ENTRY; -+ -+typedef struct user_ctxt -+{ -+ ELAN4_CTXT uctx_ctxt; /* is also an elan context */ -+ -+ spinlock_t uctx_spinlock; /* spinlock for items used with interrupt handler */ -+ kcondvar_t uctx_wait; /* place to sleep (traphandler/swapout/swapin/neterr fixup) */ -+ -+ unsigned uctx_status; /* status (uctx_spinlock) */ -+ -+ pid_t uctx_trap_pid; /* pid to deliver signals to on trap */ -+ int uctx_trap_signo; /* signal number to deliver */ -+ unsigned uctx_trap_state; /* state of trap handling code */ -+ unsigned uctx_trap_count; /* count of "thread" in user_trap_handler() */ -+ -+ unsigned uctx_int_count; /* # interrupts since last zeroed */ -+ unsigned long uctx_int_start; /* tick when int_count last zeroed */ -+ unsigned long uctx_int_delay; /* # ticks to delay next wakeup */ -+ struct timer_list uctx_int_timer; /* and timer to use to delay signal */ -+ struct timer_list uctx_shuffle_timer; /* and timer to use to delay shuffle signal */ -+ -+ struct timer_list uctx_neterr_timer; /* network error timer */ -+ -+ struct list_head uctx_vpseg_list; /* list of vp segments we've got */ -+ kmutex_t uctx_vpseg_lock; /* and lock to protect it. */ -+ ELAN4_ROUTE_TABLE *uctx_routetable; /* our virtual process table */ -+ ELAN_POSITION uctx_position; /* position in network */ -+ -+ struct list_head uctx_cent_list; /* list of attached network contexts */ -+ -+ USER_CQ *uctx_ddcq; /* command queue for re-issueing traps */ -+ E4_uint64 uctx_ddcq_insertcnt; /* # dwords inserted into command queue */ -+ E4_uint64 uctx_ddcq_completed; /* last "completed" write was here */ -+ int uctx_ddcq_intr; /* count of outstanding ddcq interrupts */ -+ -+ ELAN4_HALTOP uctx_haltop; /* halt operation for flushing */ -+ ELAN4_DMA_FLUSHOP uctx_dma_flushop; /* flush operation for flushing dma runqueue */ -+ -+ INTCOOKIE_TABLE *uctx_intcookie_table; /* table of interrupt cookies (shared with other uctxs for this task) */ -+ -+ kmutex_t uctx_cqlock; /* lock for create/destory cqs */ -+ struct list_head uctx_cqlist; /* list of command queues (uctx_cqlock,uctx_spinlock) */ -+ -+ ELAN4_DPROC_TRAP *uctx_dprocTraps; /* queue of dproc traps to resolve/reissue */ -+ RING_QUEUE uctx_dprocTrapQ; -+ -+ ELAN4_TPROC_TRAP *uctx_tprocTraps; /* queue of tproc traps to resolve/reissue */ -+ RING_QUEUE uctx_tprocTrapQ; -+ -+ ELAN4_EPROC_TRAP *uctx_eprocTraps; /* queue of eproc traps to resolve */ -+ RING_QUEUE uctx_eprocTrapQ; -+ -+ USER_IPROC_TRAP uctx_iprocTrap[2]; /* input trap state, 1 per virtual channel */ -+ -+ E4_DMA *uctx_dmas; /* queue of dmas to restart */ -+ RING_QUEUE uctx_dmaQ; -+ -+ E4_ThreadRegs *uctx_threads; /* queue of threads to restart */ -+ RING_QUEUE uctx_threadQ; -+ -+ ELAN4_NETERR_MSG *uctx_msgs; /* queue of neterr messages */ -+ RING_QUEUE uctx_msgQ; -+ kmutex_t uctx_rgnmutex; /* lock for create/destroy regions */ -+ spinlock_t uctx_rgnlock; /* spinlock to protect linked lists */ -+ USER_RGN *uctx_mrgns; /* Doubly linked list of memory regions (uctx_rgnlock) */ -+ USER_RGN *uctx_mtail; /* Last memory region on list (uctx_rgnlock) */ -+ USER_RGN *uctx_mrgnlast; /* Last region 'hit' (uctx_rgnlock) */ -+ -+ USER_RGN *uctx_ergns; /* Doubly linked list of memory regions (uctx_rgnlock) */ -+ USER_RGN *uctx_etail; /* Last memory region on list (uctx_rgnlock) */ -+ USER_RGN *uctx_ergnlast; /* Last region 'hit' (uctx_rgnlock) */ -+ -+ ELAN4_USER_PAGE *uctx_upage; /* kernel page shared with user */ -+ sdramaddr_t uctx_trampoline; /* sdram page for tproc trampoline */ -+ -+ E4_Addr uctx_upage_addr; /* elan addr page mapped into */ -+ E4_Addr uctx_trestart_addr; /* address of thread restart code */ -+ FAULT_SAVE *uctx_faults; -+ FAULT_SAVE *uctx_fault_list; -+ int uctx_num_fault_save; -+ spinlock_t uctx_fault_lock; -+} USER_CTXT; -+ -+/* bit values for uctx_status */ -+#define UCTX_EXITING (1 << 0) /* context is exiting. */ -+#define UCTX_USER_FILTERING (1 << 1) /* user requested context filter */ -+#define UCTX_USER_STOPPED (1 << 2) /* user requested stop */ -+ -+#define UCTX_SWAPPING (1 << 3) /* context is swapping out */ -+#define UCTX_SWAPPED (1 << 4) /* context is swapped out */ -+ -+#define UCTX_STOPPING (1 << 5) /* stopping elan from running this context */ -+#define UCTX_STOPPED (1 << 6) /* elan no longer running this context */ -+ -+#define UCTX_EPROC_QUEUE_FULL (1 << 7) /* reasons for stopping running */ -+#define UCTX_DPROC_QUEUE_FULL (1 << 8) -+#define UCTX_TPROC_QUEUE_FULL (1 << 9) -+#define UCTX_IPROC_CH0_TRAPPED (1 << 10) -+#define UCTX_IPROC_CH1_TRAPPED (1 << 11) -+ -+#define UCTX_NETERR_TIMER (1 << 12) -+#define UCTX_NETERR_FIXUP (1 << 13) -+ -+#define UCTX_EPROC_QUEUE_OVERFLOW (1 << 14) -+#define UCTX_DPROC_QUEUE_OVERFLOW (1 << 15) -+#define UCTX_TPROC_QUEUE_OVERFLOW (1 << 16) -+ -+#define UCTX_EPROC_QUEUE_ERROR (1 << 17) -+#define UCTX_DPROC_QUEUE_ERROR (1 << 18) -+#define UCTX_TPROC_QUEUE_ERROR (1 << 19) -+ -+#define UCTX_STOPPED_REASONS (UCTX_EPROC_QUEUE_FULL | UCTX_DPROC_QUEUE_FULL | UCTX_TPROC_QUEUE_FULL) -+#define UCTX_SWAPPED_REASONS (UCTX_EXITING | UCTX_USER_STOPPED | UCTX_NETERR_FIXUP) -+#define UCTX_NACKING_REASONS (UCTX_USER_FILTERING | UCTX_IPROC_CH0_TRAPPED | UCTX_IPROC_CH1_TRAPPED) -+ -+#define UCTX_OVERFLOW_REASONS (UCTX_EPROC_QUEUE_OVERFLOW | UCTX_DPROC_QUEUE_OVERFLOW | UCTX_TPROC_QUEUE_OVERFLOW) -+#define UCTX_ERROR_REASONS (UCTX_EPROC_QUEUE_ERROR | UCTX_DPROC_QUEUE_ERROR | UCTX_TPROC_QUEUE_ERROR) -+ -+#define UCTX_RUNNABLE(uctx) (((uctx)->uctx_status & (UCTX_SWAPPED_REASONS | UCTX_STOPPED_REASONS)) == 0) -+#define UCTX_NACKING(uctx) (((uctx)->uctx_status & (UCTX_SWAPPED_REASONS | UCTX_STOPPED_REASONS | UCTX_NACKING_REASONS)) != 0) -+ -+/* values for uctx_trap_signalled */ -+#define UCTX_TRAP_IDLE 0 -+#define UCTX_TRAP_SLEEPING 1 -+#define UCTX_TRAP_SIGNALLED 2 -+#define UCTX_TRAP_ACTIVE 3 -+ -+extern int user_p2p_route_options; -+extern int user_bcast_route_options; -+extern int user_dproc_retry_count; -+extern int user_cproc_retry_count; -+extern int user_ioproc_enabled; -+extern int user_pagefault_enabled; -+ -+extern USER_CTXT *user_alloc (ELAN4_DEV *dev); -+extern void user_free (USER_CTXT *uctx); -+extern void user_swapout (USER_CTXT *uctx, unsigned reason); -+extern void user_swapin (USER_CTXT *uctx, unsigned reason); -+extern int user_attach (USER_CTXT *uctx, ELAN_CAPABILITY *cap); -+extern void user_detach (USER_CTXT *uctx, ELAN_CAPABILITY *cap); -+extern void user_block_inputter (USER_CTXT *uctx, unsigned blocked); -+extern int user_alloc_trap_queues (USER_CTXT *uctx, unsigned ndproc_traps, unsigned neproc_traps, -+ unsigned ntproc_traps, unsigned nthreads, unsigned ndmas); -+ -+extern int user_add_p2pvp (USER_CTXT *uctx, unsigned process, ELAN_CAPABILITY *cap); -+extern int user_add_bcastvp (USER_CTXT *uctx, unsigned process, unsigned lowvp, unsigned highvp); -+extern int user_removevp (USER_CTXT *uctx, unsigned process); -+ -+extern int user_set_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route); -+extern int user_reset_route (USER_CTXT *uctx, unsigned process); -+extern int user_get_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route); -+extern int user_check_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route, unsigned *error); -+extern int user_send_neterr_msg (USER_CTXT *uctx, unsigned int vp, unsigned int nctx, unsigned int retries, ELAN4_NETERR_MSG *msg); -+extern int user_neterr_sten (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop); -+extern int user_neterr_dma (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop); -+ -+extern int user_resume_eproc_trap (USER_CTXT *uctx, E4_Addr addr); -+extern int user_resume_cproc_trap (USER_CTXT *uctx, unsigned indx); -+extern int user_resume_dproc_trap (USER_CTXT *uctx, E4_DMA *dma); -+extern int user_resume_tproc_trap (USER_CTXT *uctx, E4_ThreadRegs *regs); -+extern int user_resume_iproc_trap (USER_CTXT *uctx, unsigned channel, unsigned trans, -+ E4_IprocTrapHeader *hdrp, E4_IprocTrapData *datap); -+ -+extern int user_trap_handler (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int nticks); -+extern USER_CQ *user_findcq (USER_CTXT *uctx, unsigned num); -+extern USER_CQ *user_alloccq (USER_CTXT *uctx, unsigned size, unsigned perm, unsigned flags); -+extern void user_freecq (USER_CTXT *uctx, USER_CQ *cq); -+extern void user_dropcq (USER_CTXT *uctx, USER_CQ *cq); -+ -+/* user_osdep.c */ -+extern int user_load_range (USER_CTXT *uctx, E4_Addr addr, unsigned long nbytes, E4_uint32 fsr); -+extern void user_update_main (USER_CTXT *uctx, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long len); -+extern void user_unload_main (USER_CTXT *uctx, unsigned long start, unsigned long len); -+ -+ -+/* regions.c */ -+extern USER_RGN *user_findrgn_elan (USER_CTXT *uctx, E4_Addr addr, int tail); -+extern USER_RGN *user_findrgn_main (USER_CTXT *uctx, virtaddr_t addr, int tail); -+extern USER_RGN *user_rgnat_elan (USER_CTXT *uctx, E4_Addr addr); -+extern USER_RGN *user_rgnat_main (USER_CTXT *uctx, virtaddr_t addr); -+extern int user_setperm (USER_CTXT *uctx, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, unsigned perm); -+extern void user_clrperm (USER_CTXT *uctx, E4_Addr addr, unsigned long len); -+extern int user_checkperm (USER_CTXT *uctx, E4_Addr raddr, unsigned long rsize, unsigned access); -+extern virtaddr_t user_elan2main (USER_CTXT *uctx, E4_Addr addr); -+extern E4_Addr user_main2elan (USER_CTXT *uctx, virtaddr_t addr); -+extern void user_preload_main (USER_CTXT *uctx, virtaddr_t addr, unsigned long len); -+extern void user_freergns (USER_CTXT *uctx); -+ -+/* user_ddcq.c */ -+extern int user_ddcq_check (USER_CTXT *uctx, unsigned num); -+extern int user_ddcq_flush (USER_CTXT *uctx); -+extern void user_ddcq_intr (USER_CTXT *uctx); -+extern void user_ddcq_write_dword (USER_CTXT *uctx, E4_Addr addr, E4_uint64 value); -+extern void user_ddcq_interrupt (USER_CTXT *uctx, E4_uint64 cookie); -+extern void user_ddcq_run_dma (USER_CTXT *uctx, E4_DMA *dma); -+extern void user_ddcq_run_thread (USER_CTXT *uctx, E4_ThreadRegs *regs); -+extern void user_ddcq_setevent (USER_CTXT *uctx, E4_Addr addr); -+extern void user_ddcq_seteventn (USER_CTXT *uctx, E4_Addr addr, E4_uint32 count); -+extern void user_ddcq_waitevent (USER_CTXT *uctx, E4_Addr addr, E4_uint64 CountAndType, E4_uint64 Param0, E4_uint64 Param1); -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_USER_H */ -diff -urN clean/include/elan4/userregs.h linux-2.6.9/include/elan4/userregs.h ---- clean/include/elan4/userregs.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/userregs.h 2004-10-06 10:50:38.000000000 -0400 -@@ -0,0 +1,383 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_USERREGS_H -+#define __ELAN4_USERREGS_H -+ -+#ident "$Id: userregs.h,v 1.15 2004/10/06 14:50:38 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/userregs.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Statistic control reg values -+ * Each 4-bit nibble of the control word specifies what statistic -+ * is to be recorded in each of the 8 statistic counters -+ */ -+#define COUNT_REG0_SHIFT 32ull -+#define COUNT_REG1_SHIFT 36ull -+#define COUNT_REG2_SHIFT 40ull -+#define COUNT_REG3_SHIFT 44ull -+#define COUNT_REG4_SHIFT 48ull -+#define COUNT_REG5_SHIFT 52ull -+#define COUNT_REG6_SHIFT 56ull -+#define COUNT_REG7_SHIFT 60ull -+ -+ -+/* Count reg 0 */ -+#define STC_INPUT_NON_WRITE_BLOCKS (0x0ull << COUNT_REG0_SHIFT) -+#define STP_DMA_EOP_WAIT_ACK (0x1ull << COUNT_REG0_SHIFT) -+#define STP_TPROC_RUNNING (0x2ull << COUNT_REG0_SHIFT) -+#define STC_STEN_PKTS_OPEN (0x3ull << COUNT_REG0_SHIFT) -+#define STP_CPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG0_SHIFT) -+#define STC_TLB_TABLE_WALKS (0x5ull << COUNT_REG0_SHIFT) -+#define STC_CACHE_HITS (0x6ull << COUNT_REG0_SHIFT) -+#define STC_PCI_SLAVE_READS (0x7ull << COUNT_REG0_SHIFT) -+#define STP_PCI_WAITING_FOR_GNT (0x8ull << COUNT_REG0_SHIFT) -+#define STP_SYS_CLOCK_RATE0 (0xfull << COUNT_REG0_SHIFT) -+ -+#define STATS_REG0_NAMES { \ -+ "STC_INPUT_NON_WRITE_BLOCKS", \ -+ "STP_DMA_EOP_WAIT_ACK", \ -+ "STP_TPROC_RUNNING", \ -+ "STC_STEN_PKTS_OPEN", \ -+ "STP_CPROC_HOLDS_FFU_DP", \ -+ "STC_TLB_TABLE_WALKS", \ -+ "STC_CACHE_HITS", \ -+ "STC_PCI_SLAVE_READS", \ -+ "STP_PCI_WAITING_FOR_GNT", \ -+ "STP_SYS_CLOCK_RATE0" \ -+} -+ -+/* Count reg 1 */ -+#define STC_INPUT_WRITE_BLOCKS (0x0ull << COUNT_REG1_SHIFT) -+#define STP_DMA_DATA_TRANSMITTING (0x1ull << COUNT_REG1_SHIFT) -+#define STC_CPROC_VALUES_EXE (0x2ull << COUNT_REG1_SHIFT) -+#define STC_STEN_TRANS_SENT (0x3ull << COUNT_REG1_SHIFT) -+#define STP_TPROC_DQ_HOLDS_FFU_DP (0x4ull << COUNT_REG1_SHIFT) -+#define STC_TPROC_TLB_HITS (0x5ull << COUNT_REG1_SHIFT) -+#define STC_CACHE_ALLOC_MISSES (0x6ull << COUNT_REG1_SHIFT) -+#define STP_PCI_MASTER_READ_WAITING (0x7ull << COUNT_REG1_SHIFT) -+#define STP_PCI_WAITING_FOR_DEVSEL (0x8ull << COUNT_REG1_SHIFT) -+#define STP_SYS_CLOCK_RATE1 (0xfull << COUNT_REG1_SHIFT) -+ -+#define STATS_REG1_NAMES { \ -+ "STC_INPUT_WRITE_BLOCKS", \ -+ "STP_DMA_DATA_TRANSMITTING", \ -+ "STC_CPROC_VALUES_EXE", \ -+ "STC_STEN_TRANS_SENT", \ -+ "STP_TPROC_DQ_HOLDS_FFU_DP", \ -+ "STC_TPROC_TLB_HITS", \ -+ "STC_CACHE_ALLOC_MISSES", \ -+ "STP_PCI_MASTER_READ_WAITING", \ -+ "STP_PCI_WAITING_FOR_DEVSEL", \ -+ "STP_SYS_CLOCK_RATE1" \ -+} -+ -+/* Count reg 2 */ -+#define STC_INPUT_PKTS (0x0ull << COUNT_REG2_SHIFT) -+#define STP_DMA_WAITING_MEM (0x1ull << COUNT_REG2_SHIFT) -+#define STC_CPROC_TRANSFERS (0x2ull << COUNT_REG2_SHIFT) -+#define STP_STEN_WAIT_NETWORK_BUSY (0x3ull << COUNT_REG2_SHIFT) -+#define STP_IPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG2_SHIFT) -+#define STC_UNITS_TLB_HITS (0x5ull << COUNT_REG2_SHIFT) -+#define STC_CACHE_NON_ALLOC_MISSES (0x6ull << COUNT_REG2_SHIFT) -+#define STP_PCI_MASTER_WRITE_WAITING (0x7ull << COUNT_REG2_SHIFT) -+#define STC_PCI_OUT_OF_ORDER_SPLIT_COMP (0x8ull << COUNT_REG2_SHIFT) -+#define STP_SYS_CLOCK_RATE2 (0xfull << COUNT_REG2_SHIFT) -+ -+#define STATS_REG2_NAMES { \ -+ "STC_INPUT_PKTS", \ -+ "STP_DMA_WAITING_MEM", \ -+ "STC_CPROC_TRANSFERS", \ -+ "STP_STEN_WAIT_NETWORK_BUSY", \ -+ "STP_IPROC_HOLDS_FFU_DP", \ -+ "STC_UNITS_TLB_HITS", \ -+ "STC_CACHE_NON_ALLOC_MISSES", \ -+ "STP_PCI_MASTER_WRITE_WAITING", \ -+ "STC_PCI_OUT_OF_ORDER_SPLIT_COMP", \ -+ "STP_SYS_CLOCK_RATE2" \ -+} -+ -+/* Count reg 3 */ -+#define STC_INPUT_PKTS_REJECTED (0x0ull << COUNT_REG3_SHIFT) -+#define STP_DMA_WAIT_NETWORK_BUSY (0x1ull << COUNT_REG3_SHIFT) -+#define STC_CPROC_PREFETCH_SDRAM (0x2ull << COUNT_REG3_SHIFT) -+#define STP_STEN_BLOCKED_ACKS_OR_VC (0x3ull << COUNT_REG3_SHIFT) -+#define STP_EPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG3_SHIFT) -+#define STP_TPROC_BLOCKED_MEMSYS (0x5ull << COUNT_REG3_SHIFT) -+#define STC_CACHE_WRITE_BACKS (0x6ull << COUNT_REG3_SHIFT) -+#define STP_PCI_SLAVE_READ_WAITING (0x7ull << COUNT_REG3_SHIFT) -+#define STP_PCI_IDLE_CYCLES (0x8ull << COUNT_REG3_SHIFT) -+#define STP_SYS_CLOCK_RATE3 (0xfull << COUNT_REG3_SHIFT) -+ -+#define STATS_REG3_NAMES { \ -+ "STC_INPUT_PKTS_REJECTED", \ -+ "STP_DMA_WAIT_NETWORK_BUSY", \ -+ "STC_CPROC_PREFETCH_SDRAM", \ -+ "STP_STEN_BLOCKED_ACKS_OR_VC", \ -+ "STP_EPROC_HOLDS_FFU_DP", \ -+ "STP_TPROC_BLOCKED_MEMSYS", \ -+ "STC_CACHE_WRITE_BACKS", \ -+ "STP_PCI_SLAVE_READ_WAITING", \ -+ "STP_PCI_IDLE_CYCLES", \ -+ "STP_SYS_CLOCK_RATE3" \ -+} -+ -+/* Count reg 4 */ -+#define STP_INPUT_DATA_TRANSMITTING (0x0ull << COUNT_REG4_SHIFT) -+#define STC_DMA_PKTS_ACCEPTED (0x1ull << COUNT_REG4_SHIFT) -+#define STC_CPROC_FLUSH_REQ_SDRAM (0x2ull << COUNT_REG4_SHIFT) -+#define STP_STEN_EOP_WAIT_ACK (0x3ull << COUNT_REG4_SHIFT) -+#define STP_DMA_HOLDS_FFU_DP (0x4ull << COUNT_REG4_SHIFT) -+#define STP_UNIT_BLOCKED_MEMSYS (0x5ull << COUNT_REG4_SHIFT) -+#define STC_PCI_MASTER_READS (0x6ull << COUNT_REG4_SHIFT) -+#define STP_PCI_SLAVE_WRITE_WAITING (0x7ull << COUNT_REG4_SHIFT) -+#define STC_INPUT_PACKETS_DISCARDED (0x8ull << COUNT_REG4_SHIFT) -+#define STP_SYS_CLOCK_RATE4 (0xfull << COUNT_REG4_SHIFT) -+ -+#define STATS_REG4_NAMES { \ -+ "STP_INPUT_DATA_TRANSMITTING", \ -+ "STC_DMA_PKTS_ACCEPTED", \ -+ "STC_CPROC_FLUSH_REQ_SDRAM", \ -+ "STP_STEN_EOP_WAIT_ACK", \ -+ "STP_DMA_HOLDS_FFU_DP", \ -+ "STP_UNIT_BLOCKED_MEMSYS", \ -+ "STC_PCI_MASTER_READS", \ -+ "STP_PCI_SLAVE_WRITE_WAITING", \ -+ "STC_INPUT_PACKETS_DISCARDED", \ -+ "STP_SYS_CLOCK_RATE4" \ -+} -+ -+/* Count reg 5 */ -+#define STP_INPUT_WAITING_NETWORK_DATA (0x0ull << COUNT_REG5_SHIFT) -+#define STC_DMA_PKTS_REJECTED (0x1ull << COUNT_REG5_SHIFT) -+#define STC_CPROC_INSERT_CACHE_MISSES (0x2ull << COUNT_REG5_SHIFT) -+#define STP_STEN_TRANSMITTING_DATA (0x3ull << COUNT_REG5_SHIFT) -+#define FFU_BLOCKED_DIFF_FFU_PROC (0x4ull << COUNT_REG5_SHIFT) -+#define STP_TABLE_WALKS_BLOCKED_MEMSYS (0x5ull << COUNT_REG5_SHIFT) -+#define STC_PCI_MASTER_WRITES (0x6ull << COUNT_REG5_SHIFT) -+#define STP_PCI_MASTER_HOLDS_BUS (0x7ull << COUNT_REG5_SHIFT) -+#define STC_PCI_NO_SPLIT_COMPS (0x8ull << COUNT_REG5_SHIFT) -+#define STP_SYS_CLOCK_RATE5 (0xfull << COUNT_REG5_SHIFT) -+ -+#define STATS_REG5_NAMES { \ -+ "STP_INPUT_WAITING_NETWORK_DATA", \ -+ "STC_DMA_PKTS_REJECTED", \ -+ "STC_CPROC_INSERT_CACHE_MISSES", \ -+ "STP_STEN_TRANSMITTING_DATA", \ -+ "FFU_BLOCKED_DIFF_FFU_PROC", \ -+ "STP_TABLE_WALKS_BLOCKED_MEMSYS", \ -+ "STC_PCI_MASTER_WRITES", \ -+ "STP_PCI_MASTER_HOLDS_BUS", \ -+ "STC_PCI_NO_SPLIT_COMPS", \ -+ "STP_SYS_CLOCK_RATE5" \ -+} -+ -+/* Count reg 6 */ -+#define STP_INPUT_BLOCKED_WAITING_TRANS (0x0ull << COUNT_REG6_SHIFT) -+#define STP_TPROC_INST_STALL (0x1ull << COUNT_REG6_SHIFT) -+#define STP_CPROC_WAITING_DESCHED (0x2ull << COUNT_REG6_SHIFT) -+#define STP_STEN_PKT_OPEN_WAITING_DATA (0x3ull << COUNT_REG6_SHIFT) -+#define STP_TLB_HASH_TABLE_ACCESSES (0x4ull << COUNT_REG6_SHIFT) -+#define STP_PCI_SLAVE_BLOCKED_MEMSYS (0x5ull << COUNT_REG6_SHIFT) -+#define STP_PCI_TRANSFERRING_DATA (0x6ull << COUNT_REG6_SHIFT) -+#define STP_PCI_MASTER_WAITING_BUS (0x7ull << COUNT_REG6_SHIFT) -+#define STP_PCI_READ_LATENCY (0x8ull << COUNT_REG6_SHIFT) -+#define STP_SYS_CLOCK_RATE6 (0xfull << COUNT_REG6_SHIFT) -+ -+#define STATS_REG6_NAMES { \ -+ "STP_INPUT_BLOCKED_WAITING_TRANS", \ -+ "STP_TPROC_INST_STALL", \ -+ "STP_CPROC_WAITING_DESCHED", \ -+ "STP_STEN_PKT_OPEN_WAITING_DATA", \ -+ "STP_TLB_HASH_TABLE_ACCESSES", \ -+ "STP_PCI_SLAVE_BLOCKED_MEMSYS", \ -+ "STP_PCI_TRANSFERRING_DATA", \ -+ "STP_PCI_MASTER_WAITING_BUS", \ -+ "STP_PCI_READ_LATENCY", \ -+ "STP_SYS_CLOCK_RATE6" \ -+} -+ -+/* Count reg 7 */ -+#define STC_INPUT_CTX_FILTER_FILL (0x0ull << COUNT_REG7_SHIFT) -+#define STP_TPROC_LOAD_STORE_STALL (0x1ull << COUNT_REG7_SHIFT) -+#define STC_CPROC_TIMEOUTS (0x2ull << COUNT_REG7_SHIFT) -+#define STP_STEN_BLOCKED_NETWORK (0x3ull << COUNT_REG7_SHIFT) -+#define STP_TLB_CHAIN_ACCESSES (0x4ull << COUNT_REG7_SHIFT) -+#define STP_CPROC_SCHED_BLOCKED_MEMSYS (0x5ull << COUNT_REG7_SHIFT) -+#define STC_PCI_SLAVE_WRITES (0x6ull << COUNT_REG7_SHIFT) -+#define STC_PCI_DISCONNECTS_RETRIES (0x7ull << COUNT_REG7_SHIFT) -+#define STC_RING_OSCILLATOR (0x8ull << COUNT_REG7_SHIFT) -+#define STP_SYS_CLOCK_RATE7 (0xfull << COUNT_REG7_SHIFT) -+ -+#define STATS_REG7_NAMES { \ -+ "STC_INPUT_CTX_FILTER_FILL", \ -+ "STP_TPROC_LOAD_STORE_STALL", \ -+ "STC_CPROC_TIMEOUTS", \ -+ "STP_STEN_BLOCKED_NETWORK", \ -+ "STP_TLB_CHAIN_ACCESSES", \ -+ "STP_CPROC_SCHED_BLOCKED_MEMSYS", \ -+ "STC_PCI_SLAVE_WRITES", \ -+ "STC_PCI_DISCONNECTS_RETRIES", \ -+ "STC_RING_OSCILLATOR", \ -+ "STP_SYS_CLOCK_RATE7" \ -+} -+ -+#define STATS_REG_NAMES { \ -+ STATS_REG0_NAMES, \ -+ STATS_REG1_NAMES, \ -+ STATS_REG2_NAMES, \ -+ STATS_REG3_NAMES, \ -+ STATS_REG4_NAMES, \ -+ STATS_REG5_NAMES, \ -+ STATS_REG6_NAMES, \ -+ STATS_REG7_NAMES, \ -+} -+ -+ -+#define INPUT_PERF_STATS (STC_INPUT_NON_WRITE_BLOCKS | STC_INPUT_WRITE_BLOCKS | \ -+ STC_INPUT_PKTS | STC_INPUT_PKTS_REJECTED | \ -+ STC_INPUT_CTX_FILTER_FILL | STP_INPUT_DATA_TRANSMITTING | \ -+ STP_INPUT_WAITING_NETWORK_DATA | STP_INPUT_BLOCKED_WAITING_TRANS | STC_INPUT_PACKETS_DISCARDED) -+ -+#define DMA_PERF_STATS (STC_DMA_PKTS_ACCEPTED | STC_DMA_PKTS_REJECTED | \ -+ STP_DMA_EOP_WAIT_ACK | STP_DMA_DATA_TRANSMITTING | \ -+ STP_DMA_WAITING_MEM | STP_DMA_WAIT_NETWORK_BUSY) -+ -+ -+#define TPROC_PERF_STATS (STP_TPROC_RUNNING | STP_TPROC_INST_STALL | \ -+ STP_TPROC_LOAD_STORE_STALL) -+ -+#define CPROC_PERF_STATS (STC_CPROC_VALUES_EXE | STC_CPROC_TRANSFERS | \ -+ STC_CPROC_PREFETCH_SDRAM | STC_CPROC_FLUSH_REQ_SDRAM | \ -+ STC_CPROC_INSERT_CACHE_MISSES | STP_CPROC_WAITING_DESCHED | \ -+ STC_CPROC_TIMEOUTS) -+ -+#define STEN_PERF_STATS (STC_STEN_PKTS_OPEN | STC_STEN_TRANS_SENT | \ -+ STP_STEN_WAIT_NETWORK_BUSY | STP_STEN_BLOCKED_ACKS_OR_VC | \ -+ STP_STEN_EOP_WAIT_ACK | STP_STEN_TRANSMITTING_DATA | \ -+ STP_STEN_PKT_OPEN_WAITING_DATA | STP_STEN_BLOCKED_NETWORK) -+ -+#define FFU_PREF_STATS (STP_CPROC_HOLDS_FFU_DP | STP_TPROC_DQ_HOLDS_FFU_DP | \ -+ STP_IPROC_HOLDS_FFU_DP | STP_EPROC_HOLDS_FFU_DP | \ -+ STP_DMA_HOLDS_FFU_DP | FFU_BLOCKED_DIFF_FFU_PROC) -+ -+#define TABLE_WALK_PERF_STATS (STC_TPROC_TLB_HITS | STC_UNITS_TLB_HITS | \ -+ STP_TLB_HASH_TABLE_ACCESSES | STP_TLB_CHAIN_ACCESSES | \ -+ STC_TLB_TABLE_WALKS) -+ -+#define ADDRESS_ARB_PERF_STATS (STP_UNIT_BLOCKED_MEMSYS | STP_TPROC_BLOCKED_MEMSYS | \ -+ STP_TABLE_WALKS_BLOCKED_MEMSYS | STP_CPROC_SCHED_BLOCKED_MEMSYS | \ -+ STP_PCI_SLAVE_BLOCKED_MEMSYS) -+ -+#define CACHE_PERF_STATS (STC_CACHE_HITS | STC_CACHE_ALLOC_MISSES | \ -+ STC_CACHE_NON_ALLOC_MISSES | STC_CACHE_WRITE_BACKS) -+ -+ -+#define PCI_PERF_STATS (STC_PCI_SLAVE_READS | STP_PCI_MASTER_READ_WAITING | \ -+ STP_PCI_MASTER_WRITE_WAITING | STP_PCI_SLAVE_READ_WAITING | \ -+ STP_PCI_SLAVE_WRITE_WAITING | STC_PCI_MASTER_WRITES | \ -+ STP_PCI_TRANSFERRING_DATA | STC_PCI_SLAVE_WRITES) -+ -+#define PCIBUS_PERF_STATS (STP_PCI_WAITING_FOR_GNT | STP_PCI_WAITING_FOR_DEVSEL | \ -+ STC_PCI_OUT_OF_ORDER_SPLIT_COMP | STP_PCI_IDLE_CYCLES | \ -+ STC_PCI_MASTER_READS | STP_PCI_MASTER_HOLDS_BUS | \ -+ STP_PCI_MASTER_WAITING_BUS | STC_PCI_DISCONNECTS_RETRIES) -+ -+ -+ extern const char *elan_stats_names[8][10]; -+ -+#define ELAN_STATS_NAME(COUNT, CONTROL) (elan_stats_names[(COUNT)][(CONTROL) & 7]) -+ -+ typedef volatile union e4_StatsControl -+ { -+ E4_uint64 StatsControl; -+ struct -+ { -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 StatCont0:4; -+ E4_uint32 StatCont1:4; -+ E4_uint32 StatCont2:4; -+ E4_uint32 StatCont3:4; -+ E4_uint32 StatCont4:4; -+ E4_uint32 StatCont5:4; -+ E4_uint32 StatCont6:4; -+ E4_uint32 StatCont7:4; -+#else -+ E4_uint32 StatCont7:4; -+ E4_uint32 StatCont6:4; -+ E4_uint32 StatCont5:4; -+ -+ E4_uint32 StatCont4:4; -+ E4_uint32 StatCont3:4; -+ E4_uint32 StatCont2:4; -+ E4_uint32 StatCont1:4; -+ E4_uint32 StatCont0:4; -+#endif -+ E4_uint32 pad; -+ } s; -+ } E4_StatsControl; -+ -+typedef volatile union e4_StatsCount -+{ -+ E4_uint64 ClockStat; -+ struct -+ { -+ E4_uint32 ClockLSW; /* read only */ -+ E4_uint32 StatsCount; -+ } s; -+} E4_StatsCount; -+ -+typedef volatile union e4_clock -+{ -+ E4_uint64 NanoSecClock; -+ struct -+ { -+ E4_uint32 ClockLSW; -+ E4_uint32 ClockMSW; -+ } s; -+} E4_Clock; -+#define E4_TIME( X ) ((X).NanoSecClock) -+ -+#define ELAN4_COMMS_CLOCK_FREQUENCY 660 /* In Mhz. This is half the bit rate. */ -+#define ELAN4_CLOCK_ADD_VALUE 200 /* For 200ns increment rate */ -+#define ELAN4_CLOCK_COMMS_DIV_VALUE (((ELAN4_COMMS_CLOCK_FREQUENCY * ELAN4_CLOCK_ADD_VALUE) / (1000 * 4)) - 1) -+#define ELAN4_CLOCK_TICK_RATE ((ELAN4_CLOCK_ADD_VALUE << 8) + ELAN4_CLOCK_COMMS_DIV_VALUE) -+ -+typedef volatile union e4_clocktickrate -+{ -+ E4_uint64 NanoSecClock; -+ struct -+ { -+ E4_uint32 pad1; -+ E4_uint32 TickRates; -+ } s; -+} E4_ClockTickRate; -+ -+/* -+ * This is made into an 8k byte object. -+ */ -+typedef volatile struct _E4_User_Regs -+{ -+ E4_StatsCount StatCounts[8]; -+ E4_StatsCount InstCount; -+ E4_Clock Clock; -+ E4_StatsControl StatCont; -+ E4_ClockTickRate ClockTickRate; -+ E4_uint8 pad1[EightK - ((sizeof(E4_StatsCount)*9)+sizeof(E4_StatsControl)+ -+ sizeof(E4_Clock)+sizeof(E4_ClockTickRate))]; -+} E4_User_Regs; -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN4_USERREGS_H */ -diff -urN clean/include/elan4/usertrap.h linux-2.6.9/include/elan4/usertrap.h ---- clean/include/elan4/usertrap.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/usertrap.h 2004-05-05 05:08:35.000000000 -0400 -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: usertrap.h,v 1.17 2004/05/05 09:08:35 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/usertrap.h,v $*/ -+ -+#ifndef __ELAN4_USERTRAP_H -+#define __ELAN4_USERTRAP_H -+ -+#ifndef _ASM -+typedef struct elan4_user_page -+{ -+ E4_uint64 upage_ddcq_completed; -+} ELAN4_USER_PAGE; -+ -+typedef struct elan4_user_trap -+{ -+ int ut_type; -+ unsigned ut_proc; -+ unsigned ut_args[4]; -+ -+ union { -+ ELAN4_EPROC_TRAP eproc; -+ ELAN4_CPROC_TRAP cproc; -+ ELAN4_DPROC_TRAP dproc; -+ ELAN4_IPROC_TRAP iproc; -+ ELAN4_TPROC_TRAP tproc; -+ ELAN4_NETERR_MSG msg; -+ } ut_trap; -+} ELAN4_USER_TRAP; -+ -+#endif /* _ASM */ -+ -+ -+/* value for ut_type */ -+#define UTS_FINISHED 0 /* all pending traps have been handled */ -+#define UTS_RESCHEDULE 1 /* must return to user mode and re-enter */ -+#define UTS_UNIMP_INSTR 2 /* unimplemented thread instruction */ -+#define UTS_EXECUTE_PACKET 3 /* iproc trap needs packet executing */ -+#define UTS_NETWORK_ERROR_TRAP 4 /* network error on this trap */ -+#define UTS_NETWORK_ERROR_MSG 5 /* network error message */ -+#define UTS_NETWORK_ERROR_TIMER 6 /* network error timer expired */ -+ -+#define UTS_EFAULT -1 /* failed to copyout trap */ -+#define UTS_INVALID_ADDR -2 /* all -ve codes mean trap could not be resolved. */ -+#define UTS_INVALID_VPROC -3 -+#define UTS_INVALID_COMMAND -4 -+#define UTS_BAD_TRAP -5 -+#define UTS_ALIGNMENT_ERROR -6 -+#define UTS_QUEUE_OVERFLOW -7 -+#define UTS_QUEUE_ERROR -8 -+#define UTS_INVALID_TRANS -9 -+#define UTS_PERMISSION_DENIED -10 -+#define UTS_CPROC_ERROR -11 -+#define UTS_INVALID_COOKIE -12 -+#define UTS_NETERR_ERROR -13 -+ -+/* "special" values for registering handlers */ -+#define UTS_ALL_TRAPS -9999 -+ -+/* value for ut_proc */ -+#define UTS_NOPROC 0 -+#define UTS_EPROC 1 -+#define UTS_CPROC 2 -+#define UTS_DPROC 3 -+#define UTS_TPROC 4 -+#define UTS_IPROC 5 -+#define UTS_NETERR_MSG 6 -+ -+/* unimplemented trap numbers for thread processor */ -+#define ELAN4_T_TRAP_INSTR(t) (0x80202000 | ((t) & 0xFF)) -+ -+#define ELAN4_T_SYSCALL_TRAP 1 -+# define ELAN4_T_OPEN 0 -+# define ELAN4_T_WRITE 1 -+# define ELAN4_T_READ 2 -+# define ELAN4_T_IOCTL 3 -+# define ELAN4_T_LSEEK 4 -+# define ELAN4_T_POLL 5 -+# define ELAN4_T_CLOSE 6 -+# define ELAN4_T_KILL 7 -+# define ELAN4_T_MMAP 8 -+# define ELAN4_T_MUNMAP 9 -+# define ELAN4_T_ABORT 100 -+# define ELAN4_T_DEBUG 101 -+# define ELAN4_T_REGDUMP 102 -+ -+#define ELAN4_T_REGDUMP_TRAP 2 -+ -+#define ELAN4_T_LIBELAN_TRAP 3 -+# define ELAN4_T_TPORT_NEWBUF 0 -+# define ELAN4_T_TPORT_GC 1 -+# define ELAN4_T_TPORT_DEBUG 2 -+ -+#define ELAN4_T_ALLOC_TRAP 4 -+# define ELAN4_T_ALLOC_ELAN 0 -+# define ELAN4_T_ALLOC_MAIN 1 -+# define ELAN4_T_FREE_ELAN 2 -+# define ELAN4_T_FREE_MAIN 3 -+ -+/* reserved main interrupt cookies */ -+#define ELAN4_INT_COOKIE_DDCQ 0 -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_USERTRAP_H */ -diff -urN clean/include/elan4/xsdram.h linux-2.6.9/include/elan4/xsdram.h ---- clean/include/elan4/xsdram.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/elan4/xsdram.h 2004-03-05 07:32:04.000000000 -0500 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_XSDRAM_H -+#define __ELAN4_XSDRAM_H -+ -+#ident "@(#)$Id: xsdram.h,v 1.13 2004/03/05 12:32:04 jon Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/elan4hdr/xsdram.h,v $*/ -+ -+/* SAMSUNG K4H281638D-TCB3 */ -+ -+#define SDRAM_tRCF_1_SH 0 -+#define SDRAM_tRP_1_SH 4 -+#define SDRAM_tRCD_SH 8 -+#define SDRAM_tRRD_SH 12 -+#define SDRAM_tEndWr_SH 16 -+#define SDRAM_tEndRd_SH 20 -+#define SDRAM_Burst_SH 24 -+#define SDRAM_CL_SH 28 -+#define SDRAM_DsblBypass (1ULL << 31) -+#define SDRAM_RefreshRate_SH 32 -+#define SDRAM_RamSize_SH 34 -+#define SDRAM_ReadLtncy_1_SH 36 -+#define SDRAM_RdOffset_SH 40 -+#define SDRAM_FlightDelay_SH 42 -+ -+#define SDRAM_ENABLE_ECC (1ULL << 44) // Enables error detecting on the ECC. -+#define SDRAM_SDRAM_TESTING (1ULL << 45) // Switches to test mode for checking EEC data bits -+#define SDRAM_SETUP (1ULL << 46) // Writes SDram control reg when set. Also starts -+ -+#define SDRAM_CS_MODE0 0ULL // 64Mbit, 128Mbit, 256Mbit, 512Mbit or 1Gbit (16-bit output) -+#define SDRAM_CS_MODE1 1ULL // 64Mbit, 128Mbit, 256Mbit or 512Mbit (8-bit output) -+#define SDRAM_CS_MODE2 2ULL // 2Gbit (16-bit output) or 1Gbit (8-bit output) -+#define SDRAM_CS_MODE3 3ULL // 4Gbit (16-bit output) or 2Gbit (8-bit output) -+ -+#if defined(LINUX) && !defined(CONFIG_MPSAS) -+#define SDRAM_STARTUP_VALUE ((0xbULL << SDRAM_tRCF_1_SH) | (0x2ULL << SDRAM_tRP_1_SH) | \ -+ (0x3ULL << SDRAM_tRCD_SH) | (0x2ULL << SDRAM_tRRD_SH) | \ -+ (0xaULL << SDRAM_tEndWr_SH) | (0x6ULL << SDRAM_tEndRd_SH) | \ -+ (0x8ULL << SDRAM_Burst_SH) | (0x6ULL << SDRAM_CL_SH) | \ -+ (0x2ULL << SDRAM_RefreshRate_SH) | (0x3ULL << SDRAM_RamSize_SH) | \ -+ (0x1ULL << SDRAM_RdOffset_SH) | (0x1ULL << SDRAM_FlightDelay_SH) | \ -+ (0x4ULL << SDRAM_ReadLtncy_1_SH)) -+#else -+#define SDRAM_STARTUP_VALUE ((0xbULL << SDRAM_tRCF_1_SH) | (0x2ULL << SDRAM_tRP_1_SH) | \ -+ (0x3ULL << SDRAM_tRCD_SH) | (0x2ULL << SDRAM_tRRD_SH) | \ -+ (0xaULL << SDRAM_tEndWr_SH) | (0x6ULL << SDRAM_tEndRd_SH) | \ -+ (0x8ULL << SDRAM_Burst_SH) | (0x6ULL << SDRAM_CL_SH) | \ -+ (0x0ULL << SDRAM_RefreshRate_SH) | (0x0ULL << SDRAM_RamSize_SH) | \ -+ (0x1ULL << SDRAM_RdOffset_SH) | (0x1ULL << SDRAM_FlightDelay_SH) | \ -+ (0x4ULL << SDRAM_ReadLtncy_1_SH) | SDRAM_ENABLE_ECC | SDRAM_SETUP) -+#endif -+ -+#endif /* __ELAN4_XSDRAM_H */ -diff -urN clean/include/jtag/jtagio.h linux-2.6.9/include/jtag/jtagio.h ---- clean/include/jtag/jtagio.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/jtag/jtagio.h 2004-12-16 05:39:27.000000000 -0500 -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: jtagio.h,v 1.8 2004/12/16 10:39:27 lee Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagio.h,v $*/ -+ -+ -+#ifndef __SYS_JTAGMOD_H -+#define __SYS_JTAGMOD_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define JTAG_MAX_CHIPS 8 -+#define JTAG_MAX_INSTR_LEN 8 -+#define JTAG_MAX_BITS (JTAG_MAX_CHIPS * JTAG_MAX_INSTR_LEN) -+#define JTAG_MAX_DATA_LEN 1024 -+ -+#define JTAG_BYPASS 0xFF -+ -+#define I2C_ADDR_LEN 7 /* 7 bits of address */ -+#define I2C_DATA_LEN 8 /* 8 bits of data */ -+#define I2C_MAX_DATA_LEN 9 /* and upto 9 bytes worth */ -+ -+#define BITS_PER_BYTE 8 -+#define JTAG_NBYTES(nbits) (((nbits)+BITS_PER_BYTE-1)/BITS_PER_BYTE) -+#define JTAG_BIT(v, num) (((v)[(num) / BITS_PER_BYTE] >> ((num) % BITS_PER_BYTE)) & 1) -+#define JTAG_SET_BIT(v, num) ((v)[(num) / BITS_PER_BYTE] |= (1 << ((num) % BITS_PER_BYTE))) -+#define JTAG_CLR_BIT(v, num) ((v)[(num) / BITS_PER_BYTE] &= ~(1 << ((num) % BITS_PER_BYTE))) -+ -+#define RING_CLOCK_CARD (0x3D) -+#define RING_CLOCK_SHIFT (0x3E) -+#define RING_JTAG_LOOPBACK (0x3F) -+#define RING_MAX (0x40) -+ -+#define RING_QUAD_BIT (0x40) -+#define RING_I2C_BIT (0x80) -+ -+#define VALID_JTAG_RING(ring) ((ring) < 0x20 || (ring) == RING_JTAG_LOOPBACK) -+#define VALID_I2C_RING(ring) ((ring) < 0x20 || (ring) == RING_CLOCK_CARD) -+ -+ -+typedef struct jtag_value -+{ -+ u_char bytes[JTAG_NBYTES(JTAG_MAX_DATA_LEN)]; -+} JTAG_VALUE; -+ -+/* arguements to JTAG_SHIFT_IR/JTAG_SHIFT_DR */ -+typedef struct jtag_reset_args -+{ -+ u_int ring; -+} JTAG_RESET_ARGS; -+ -+typedef struct jtag_shift_args -+{ -+ u_int ring; -+ u_int nbits; -+ u_char *value; -+} JTAG_SHIFT_ARGS; -+ -+typedef struct i2c_args -+{ -+ u_int ring; -+ u_int device; -+ u_int reg; -+ u_int count; -+ u_int ok; -+ u_char data[I2C_MAX_DATA_LEN]; -+} I2C_ARGS; -+ -+/* values for 'ok' - the return value from i2c_xx functions */ -+#define I2C_OP_SUCCESS 0 -+#define I2C_OP_ERROR 1 -+#define I2C_OP_NOT_IDLE 2 -+#define I2C_OP_NO_DEVICE 3 -+#define I2C_OP_WRITE_TO_BIG 4 -+#define I2C_OP_BAD_RESOURCE 5 -+ -+typedef struct i2c_clock_shift_args -+{ -+ u_int t; -+ u_int n; -+ u_int m; -+} I2C_CLOCK_SHIFT_ARGS; -+ -+#define JTAG_RESET _IOWR('j', '0', JTAG_RESET_ARGS) -+#define JTAG_SHIFT_IR _IOWR('j', '1', JTAG_SHIFT_ARGS) -+#define JTAG_SHIFT_DR _IOWR('j', '2', JTAG_SHIFT_ARGS) -+ -+#define I2C_CLOCK_SHIFT _IOWR('j', '4', I2C_CLOCK_SHIFT_ARGS) -+#define I2C_WRITE _IOWR('j', '5', I2C_ARGS) -+#define I2C_READ _IOWR('j', '6', I2C_ARGS) -+#define I2C_WRITEREG _IOWR('j', '7', I2C_ARGS) -+#define I2C_READREG _IOWR('j', '8', I2C_ARGS) -+ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __SYS_JTAGMOD_H */ -diff -urN clean/include/linux/init_task.h linux-2.6.9/include/linux/init_task.h ---- clean/include/linux/init_task.h 2004-10-18 17:53:13.000000000 -0400 -+++ linux-2.6.9/include/linux/init_task.h 2005-10-10 17:47:17.000000000 -0400 -@@ -2,6 +2,7 @@ - #define _LINUX__INIT_TASK_H - - #include -+#include - - #define INIT_FILES \ - { \ -@@ -112,6 +113,7 @@ - .proc_lock = SPIN_LOCK_UNLOCKED, \ - .switch_lock = SPIN_LOCK_UNLOCKED, \ - .journal_info = NULL, \ -+ INIT_TASK_PTRACK(tsk) \ - } - - -diff -urN clean/include/linux/ioproc.h linux-2.6.9/include/linux/ioproc.h ---- clean/include/linux/ioproc.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/linux/ioproc.h 2005-10-10 17:47:17.000000000 -0400 -@@ -0,0 +1,271 @@ -+/* -*- linux-c -*- -+ * -+ * Copyright (C) 2002-2004 Quadrics Ltd. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * -+ */ -+ -+/* -+ * Callbacks for IO processor page table updates. -+ */ -+ -+#ifndef __LINUX_IOPROC_H__ -+#define __LINUX_IOPROC_H__ -+ -+#include -+#include -+ -+typedef struct ioproc_ops { -+ struct ioproc_ops *next; -+ void *arg; -+ -+ void (*release)(void *arg, struct mm_struct *mm); -+ void (*sync_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); -+ void (*invalidate_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); -+ void (*update_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); -+ -+ void (*change_protection)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot); -+ -+ void (*sync_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ void (*invalidate_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ void (*update_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ -+} ioproc_ops_t; -+ -+/* IOPROC Registration -+ * -+ * Called by the IOPROC device driver to register its interest in page table -+ * changes for the process associated with the supplied mm_struct -+ * -+ * The caller should first allocate and fill out an ioproc_ops structure with -+ * the function pointers initialised to the device driver specific code for -+ * each callback. If the device driver doesn't have code for a particular -+ * callback then it should set the function pointer to be NULL. -+ * The ioproc_ops arg parameter will be passed unchanged as the first argument -+ * to each callback function invocation. -+ * -+ * The ioproc registration is not inherited across fork() and should be called -+ * once for each process that the IOPROC device driver is interested in. -+ * -+ * Must be called holding the mm->page_table_lock -+ */ -+extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip); -+ -+ -+/* IOPROC De-registration -+ * -+ * Called by the IOPROC device driver when it is no longer interested in page -+ * table changes for the process associated with the supplied mm_struct -+ * -+ * Normally this is not needed to be called as the ioproc_release() code will -+ * automatically unlink the ioproc_ops struct from the mm_struct as the -+ * process exits -+ * -+ * Must be called holding the mm->page_table_lock -+ */ -+extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip); -+ -+#ifdef CONFIG_IOPROC -+ -+/* IOPROC Release -+ * -+ * Called during exit_mmap() as all vmas are torn down and unmapped. -+ * -+ * Also unlinks the ioproc_ops structure from the mm list as it goes. -+ * -+ * No need for locks as the mm can no longer be accessed at this point -+ * -+ */ -+static inline void -+ioproc_release(struct mm_struct *mm) -+{ -+ struct ioproc_ops *cp; -+ -+ while ((cp = mm->ioproc_ops) != NULL) { -+ mm->ioproc_ops = cp->next; -+ -+ if (cp->release) -+ cp->release(cp->arg, mm); -+ } -+} -+ -+/* IOPROC SYNC RANGE -+ * -+ * Called when a memory map is synchronised with its disk image i.e. when the -+ * msync() syscall is invoked. Any future read or write to the associated -+ * pages by the IOPROC should cause the page to be marked as referenced or -+ * modified. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_sync_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->sync_range) -+ cp->sync_range(cp->arg, vma, start, end); -+} -+ -+/* IOPROC INVALIDATE RANGE -+ * -+ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the -+ * user or paged out by the kernel. -+ * -+ * After this call the IOPROC must not access the physical memory again unless -+ * a new translation is loaded. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_invalidate_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->invalidate_range) -+ cp->invalidate_range(cp->arg, vma, start, end); -+} -+ -+/* IOPROC UPDATE RANGE -+ * -+ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk -+ * up, when breaking COW or faulting in an anonymous page of memory. -+ * -+ * These give the IOPROC device driver the opportunity to load translations -+ * speculatively, which can improve performance by avoiding device translation -+ * faults. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_update_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->update_range) -+ cp->update_range(cp->arg, vma, start, end); -+} -+ -+ -+/* IOPROC CHANGE PROTECTION -+ * -+ * Called when the protection on a region of memory is changed i.e. when the -+ * mprotect() syscall is invoked. -+ * -+ * The IOPROC must not be able to write to a read-only page, so if the -+ * permissions are downgraded then it must honour them. If they are upgraded -+ * it can treat this in the same way as the ioproc_update_[range|sync]() calls -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->change_protection) -+ cp->change_protection(cp->arg, vma, start, end, newprot); -+} -+ -+/* IOPROC SYNC PAGE -+ * -+ * Called when a memory map is synchronised with its disk image i.e. when the -+ * msync() syscall is invoked. Any future read or write to the associated page -+ * by the IOPROC should cause the page to be marked as referenced or modified. -+ * -+ * Not currently called as msync() calls ioproc_sync_range() instead -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_sync_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->sync_page) -+ cp->sync_page(cp->arg, vma, addr); -+} -+ -+/* IOPROC INVALIDATE PAGE -+ * -+ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the -+ * user or paged out by the kernel. -+ * -+ * After this call the IOPROC must not access the physical memory again unless -+ * a new translation is loaded. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_invalidate_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->invalidate_page) -+ cp->invalidate_page(cp->arg, vma, addr); -+} -+ -+/* IOPROC UPDATE PAGE -+ * -+ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk -+ * up, when breaking COW or faulting in an anoymous page of memory. -+ * -+ * These give the IOPROC device the opportunity to load translations -+ * speculatively, which can improve performance by avoiding device translation -+ * faults. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_update_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->update_page) -+ cp->update_page(cp->arg, vma, addr); -+} -+ -+#else -+ -+/* ! CONFIG_IOPROC so make all hooks empty */ -+ -+#define ioproc_release(mm) do { } while (0) -+ -+#define ioproc_sync_range(vma,start,end) do { } while (0) -+ -+#define ioproc_invalidate_range(vma, start,end) do { } while (0) -+ -+#define ioproc_update_range(vma, start, end) do { } while (0) -+ -+#define ioproc_change_protection(vma, start, end, prot) do { } while (0) -+ -+#define ioproc_sync_page(vma, addr) do { } while (0) -+ -+#define ioproc_invalidate_page(vma, addr) do { } while (0) -+ -+#define ioproc_update_page(vma, addr) do { } while (0) -+ -+#endif /* CONFIG_IOPROC */ -+ -+#endif /* __LINUX_IOPROC_H__ */ -diff -urN clean/include/linux/ptrack.h linux-2.6.9/include/linux/ptrack.h ---- clean/include/linux/ptrack.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/linux/ptrack.h 2005-10-10 17:47:17.000000000 -0400 -@@ -0,0 +1,65 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * Derived from exit_actn.c by -+ * Copyright (C) 2003 Quadrics Ltd. -+ * -+ */ -+#ifndef __LINUX_PTRACK_H -+#define __LINUX_PTRACK_H -+ -+/* -+ * Process tracking - this allows a module to keep track of processes -+ * in order that it can manage all tasks derived from a single process. -+ */ -+ -+#define PTRACK_PHASE_CLONE 1 -+#define PTRACK_PHASE_CLONE_FAIL 2 -+#define PTRACK_PHASE_EXEC 3 -+#define PTRACK_PHASE_EXIT 4 -+ -+#define PTRACK_FINISHED 0 -+#define PTRACK_INNHERIT 1 -+#define PTRACK_DENIED 2 -+ -+#ifdef CONFIG_PTRACK -+ -+typedef int (*ptrack_callback_t)(void *arg, int phase, struct task_struct *child); -+ -+struct ptrack_desc { -+ struct list_head link; -+ ptrack_callback_t callback; -+ void *arg; -+}; -+ -+extern int ptrack_register (ptrack_callback_t callback, void *arg); -+extern void ptrack_deregister (ptrack_callback_t callback, void *arg); -+extern int ptrack_registered (ptrack_callback_t callback, void *arg); -+ -+extern int ptrack_call_callbacks (int phase, struct task_struct *child); -+ -+#define INIT_TASK_PTRACK(tsk) \ -+ .ptrack_list = LIST_HEAD_INIT(tsk.ptrack_list) -+ -+#else -+#define ptrack_call_callbacks (phase, child) (0) -+ -+#define INIT_TASK_PTRACK(tsk) -+ -+#endif -+ -+#endif /* __LINUX_PTRACK_H */ -diff -urN clean/include/linux/sched.h linux-2.6.9/include/linux/sched.h ---- clean/include/linux/sched.h 2005-05-13 13:39:11.000000000 -0400 -+++ linux-2.6.9/include/linux/sched.h 2005-10-10 17:47:17.000000000 -0400 -@@ -184,6 +184,9 @@ - asmlinkage void schedule(void); - - struct namespace; -+#ifdef CONFIG_IOPROC -+struct ioproc_ops; -+#endif - - /* Maximum number of active map areas.. This is a random (large) number */ - #define DEFAULT_MAX_MAP_COUNT 65536 -@@ -259,6 +262,11 @@ - struct kioctx *ioctx_list; - - struct kioctx default_kioctx; -+ -+#ifdef CONFIG_IOPROC -+ /* hooks for io devices with advanced RDMA capabilities */ -+ struct ioproc_ops *ioproc_ops; -+#endif - }; - - extern int mmlist_nr; -@@ -600,6 +608,10 @@ - struct mempolicy *mempolicy; - short il_next; /* could be shared with used_math */ - #endif -+#ifdef CONFIG_PTRACK -+/* process tracking callback */ -+ struct list_head ptrack_list; -+#endif - }; - - static inline pid_t process_group(struct task_struct *tsk) -diff -urN clean/include/qsnet/autoconf.h linux-2.6.9/include/qsnet/autoconf.h ---- clean/include/qsnet/autoconf.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/autoconf.h 2005-10-10 17:47:30.000000000 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 2005 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * NOTE: This file has been automatically generated: -+ * node : lester0.hp.com -+ * kernel : ../linux-2.6.9 -+ * date : Mon Oct 10 17:47:29 EDT 2005 -+ * -+ */ -+ -+#ifndef __QSNET_AUTOCONF_H -+#define __QSNET_AUTOCONF_H -+ -+#include -+#undef NO_RMAP -+#undef AC -+#undef NO_O1_SCHED -+#undef NO_NPTL -+#undef NO_ABI -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+#define PROCESS_ACCT -+#endif -+#undef RSS_ATOMIC -+#define NO_COPROC -+#undef NO_IOPROC -+#undef NO_PTRACK -+#define NO_PANIC_NOTIFIER -+#undef NO_SHM_CLEANUP -+#undef NO_PDE -+ -+ -+#define CONFIG_EIP -+#define CONFIG_ELAN -+#define CONFIG_ELAN3 -+#define CONFIG_ELAN4 -+#define CONFIG_EP -+#define CONFIG_JTAG -+#define CONFIG_QSNET -+#define CONFIG_RMS -+ -+#endif /* __QSNET_AUTOCONF_H */ -+ -diff -urN clean/include/qsnet/condvar.h linux-2.6.9/include/qsnet/condvar.h ---- clean/include/qsnet/condvar.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/condvar.h 2003-06-07 11:43:33.000000000 -0400 -@@ -0,0 +1,140 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+#if !defined(_LINUX_CONDVAR_H) -+#define _LINUX_CONDVAR_H -+ -+#if defined(__KERNEL__) -+ -+#include -+#include -+ -+#define CV_RET_SIGPENDING 0 -+#define CV_RET_TIMEOUT (-1) -+#define CV_RET_NORMAL 1 -+ -+struct kcondvar_task { -+ struct task_struct *task; /* need to wrap task in this */ -+ struct list_head list; /* to thread as a list */ -+ int blocked; -+}; -+ -+typedef struct { -+ struct list_head task_list; /* list of kcondvar_task's */ -+} kcondvar_t; -+ -+#define kcondvar_wait(c,l,fl) debug_kcondvar_wait(c, l, fl, 0, TASK_UNINTERRUPTIBLE) -+#define kcondvar_waitsig(c,l,fl) debug_kcondvar_wait(c, l, fl, 0, TASK_INTERRUPTIBLE) -+#define kcondvar_timedwait(c,l,fl,to) debug_kcondvar_wait(c, l, fl, to, TASK_UNINTERRUPTIBLE) -+#define kcondvar_timedwaitsig(c,l,fl,to) debug_kcondvar_wait(c, l, fl, to, TASK_INTERRUPTIBLE) -+#define kcondvar_wakeupone(c,l) kcondvar_wakeup(c, l, 0) -+#define kcondvar_wakeupall(c,l) kcondvar_wakeup(c, l, 1) -+ -+extern __inline__ void -+kcondvar_init(kcondvar_t *c) -+{ -+ INIT_LIST_HEAD(&c->task_list); -+} -+ -+extern __inline__ void -+kcondvar_destroy(kcondvar_t *c) -+{ -+ ASSERT(list_empty(&c->task_list)); -+} -+ -+/* -+ * We thread a struct kcondvar_task, allocated on the stack, onto the kcondvar_t's -+ * task_list, and take it off again when we wake up. -+ */ -+extern __inline__ int -+debug_kcondvar_wait(kcondvar_t *c, spinlock_t *l, unsigned long *fl, long tmo, int state) -+{ -+ struct kcondvar_task cvt; -+ int ret = CV_RET_NORMAL; -+ -+ ASSERT(!in_interrupt()); /* we can block */ -+ ASSERT(SPINLOCK_HELD(l)); /* enter holding lock */ -+ -+ cvt.task = current; -+ cvt.blocked = 1; -+ list_add(&cvt.list, &c->task_list); -+ do { -+ /* Note: we avoid using TASK_UNINTERRUPTIBLE here because avenrun() -+ * (linux/kernel/timer.c:calc_load()) -+ * computation treats it like TASK_RUNNABLE hence creates false high -+ * load averages when we create kernel threads. -+ * The cvt.blocked flag distinguishes a signal wakeup from a kcondvar_wakeup. -+ * -+ * However, if we do take a signal we could end up busily spinning here, if -+ * we ignore it (state == TASK_UNINTERRUPTIBLE) so once we see a signal -+ * pending we do sleep TASK_UNINTERRUPTIBLE to stop a busy spin. -+ * I have now blocked all signals for kernel threads to prevent this -+ * happening but other users of kcondvar_wait may still hit this spin. -+ */ -+ set_current_state (signal_pending(current) ? state : TASK_INTERRUPTIBLE); -+ -+ if (fl) -+ spin_unlock_irqrestore(l, *fl); -+ else -+ spin_unlock(l); -+ if (tmo) { -+ if (tmo <= jiffies || !schedule_timeout(tmo - jiffies)) -+ ret = CV_RET_TIMEOUT; -+ } else -+ schedule(); -+ if (fl) -+ spin_lock_irqsave (l, *fl); -+ else -+ spin_lock(l); -+ -+ /* signal_pending - Only exit the loop if the user was waiting TASK_INTERRUPTIBLE */ -+ if ((state == TASK_INTERRUPTIBLE) && signal_pending(current)) -+ ret = CV_RET_SIGPENDING; -+ -+ } while (cvt.blocked && ret == CV_RET_NORMAL); -+ list_del(&cvt.list); -+ -+ /* Reset task state in case we didn't sleep above */ -+ set_current_state (TASK_RUNNING); -+ -+ return ret; /* return holding lock */ -+} -+ -+extern __inline__ void -+kcondvar_wakeup(kcondvar_t *c, spinlock_t *l, int wakeall) -+{ -+ struct list_head *lp; -+ struct kcondvar_task *cvtp; -+ -+ ASSERT(SPINLOCK_HELD(l)); /* already holding lock */ -+ for (lp = c->task_list.next; lp != &c->task_list; lp = lp->next) { -+ cvtp = list_entry(lp, struct kcondvar_task, list); -+ if (cvtp->blocked) { -+ cvtp->blocked = 0; -+ /* wake_up_process added to kernel/ksyms.c */ -+ wake_up_process(cvtp->task); -+ if (!wakeall) -+ break; -+ } -+ } -+} /* return still holding lock */ -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_CONDVAR_H */ -diff -urN clean/include/qsnet/config.h linux-2.6.9/include/qsnet/config.h ---- clean/include/qsnet/config.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/config.h 2005-04-28 18:59:31.000000000 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _QSNET_CONFIG_H -+#define _QSNET_CONFIG_H -+ -+#ident "$Id: config.h,v 1.24 2005/04/28 22:59:31 robin Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/config.h,v $*/ -+ -+ -+/* -+ * QSNET standard defines : -+ * -+ * Target operating system defines -+ * SOLARIS -+ * TRU64UNIX/DIGITAL_UNIX -+ * LINUX -+ * -+ * Target processor defines -+ * SPARC -+ * ALPHA -+ * I386 -+ * IA64 -+ * X86_64 -+ * -+ * Byte order defines -+ * __LITTLE_ENDIAN__ -+ * __BIG_ENDIAN__ -+ * -+ * Data size defines -+ * _LP64 - LP64 - long/pointer is 64 bits -+ * _ILP32 - LP32 - long/pointer is 32 bits -+ * -+ * Elan defines for main processor -+ * __MAIN_LITTLE_ENDIAN__ - main byte order (for thread code) -+ * __MAIN_BIG_ENDIAN__ -+ * _MAIN_LP64 - main long size (for thread code) -+ * _MAIN_ILP32 -+ * -+ * Compiling for kernel (defined in makefile) -+ * _KERNEL -+ * -+ */ -+ -+#if defined(__LP64__) && !defined(_LP64) -+# define _LP64 -+#endif -+ -+#if defined(__arch64__) && !defined(_LP64) && !defined(_ILP32) -+# define _LP64 -+#endif -+ -+#if defined(__alpha__) && !defined(_LP64) && !defined(_ILP32) -+# define _LP64 -+#endif -+ -+#if !defined(__arch64__) && !defined(_ILP32) && !defined(_LP64) -+# define _ILP32 -+#endif -+ -+#if defined(__ELAN__) || defined(__ELAN3__) -+ -+#define __LITTLE_ENDIAN__ -+ -+#if defined(__host_solaris) && defined(__host_sparc) -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#define _MAIN_ILP32 -+#define __MAIN_BIG_ENDIAN__ -+ -+#elif defined(__host_osf) -+#define TRU64UNIX -+#define DIGITAL_UNIX -+#define ALPHA -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_alpha) -+#define LINUX -+#define ALPHA -+#define LINUX_ALPHA -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_sparc) -+#define LINUX -+#define SPARC -+#define LINUX_SPARC -+#define __MAIN_BIG_ENDIAN__ -+#ifdef __KERNEL__ -+# define _MAIN_LP64 -+#else -+# define _MAIN_ILP32 -+#endif -+ -+#elif defined(__host_linux) && defined(__host_i386) -+#define LINUX -+#define I386 -+#define LINUX_I386 -+#define _MAIN_ILP32 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_ia64) -+#define LINUX -+#define IA64 -+#define LINUX_IA64 -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_x86_64) -+#define LINUX -+#define X86_64 -+#define LINUX_X86_64 -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#else -+#error Cannot determine operating system/processor architecture. -+#endif -+ -+#else /* !defined(__ELAN3__) */ -+ -+#if (defined(sun) || defined(__sun)) && defined(sparc) && !defined(__sparcv9) /* Sun Solaris 5.6 */ -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#ifndef __BIG_ENDIAN__ -+#define __BIG_ENDIAN__ -+#endif -+ -+#elif (defined(sun) || defined(__sun)) && defined(sparc) && defined(__sparcv9) /* Sun Solaris 5.7 */ -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#define __BIG_ENDIAN__ -+ -+#elif defined(__osf__) && defined(__alpha) /* Digital Unix */ -+#define TRU64UNIX -+#define DIGITAL_UNIX -+#define ALPHA -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__alpha) /* Linux Alpha */ -+ -+#define LINUX -+#define ALPHA -+#define LINUX_ALPHA -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__sparc) /* Linux Sparc */ -+ -+#define LINUX -+#define SPARC -+#define LINUX_SPARC -+#define __BIG_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__i386) /* Linux i386 */ -+ -+#define LINUX -+#define I386 -+#define LINUX_I386 -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__ia64) /* Linux ia64 */ -+ -+#define LINUX -+#define IA64 -+#define LINUX_IA64 -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && (defined(__x86_64) || defined(__x86_64__)) /* Linux x86_64 */ -+ -+#define LINUX -+#define X86_64 -+#define LINUX_X86_64 -+#define __LITTLE_ENDIAN__ -+ -+#elif defined(__QNXNTO__) -+#define QNX -+#define I386 -+#define __LITTLE_ENDIAN__ -+#else -+#error Cannot determine operating system/processor architecture. -+#endif -+ -+#endif -+ -+#include -+ -+#endif /* _QSNET_CONFIG_H */ -diff -urN clean/include/qsnet/crwlock.h linux-2.6.9/include/qsnet/crwlock.h ---- clean/include/qsnet/crwlock.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/crwlock.h 2003-09-24 10:07:02.000000000 -0400 -@@ -0,0 +1,207 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+/* -+ * Complex - Reader/Writer locks -+ * Ref: "UNIX Systems for Modern Architectures", by Curt Schimmel, -+ * sec 11.6.3. -+ * -+ * This implementation is based on semaphores and may not be called from -+ * interrupt handlers. -+ * -+ */ -+ -+#if !defined(_LINUX_RWLOCK_H) -+#define _LINUX_RWLOCK_H -+ -+#if defined(__KERNEL__) -+ -+typedef enum { RD, WRT, ANY } crwlock_type_t; -+ -+#define crwlock_write_held(l) debug_crwlock_held(l, WRT, __BASE_FILE__,__LINE__) -+#define crwlock_read_held(l) debug_crwlock_held(l, RD, __BASE_FILE__, __LINE__) -+#define crwlock_held(l) debug_crwlock_held(l, ANY, __BASE_FILE__, __LINE__) -+ -+#define crwlock_read(l) debug_crwlock_read(l, __BASE_FILE__, __LINE__) -+#define crwlock_write(l) debug_crwlock_write(l, __BASE_FILE__, __LINE__) -+#define crwlock_done(l) debug_crwlock_done(l, __BASE_FILE__, __LINE__) -+ -+#if defined(DEBUG_RWLOCK) && defined(__alpha__) && !defined(DEBUG_SPINLOCK) -+#define DEBUG_SPINLOCK -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if !defined(DEBUG_SPINLOCK) -+#define debug_spin_lock(lock, file, line) spin_lock(lock) -+#endif -+ -+typedef struct { -+ spinlock_t m_lock; /* protects cnt fields below */ -+ int m_rdcnt; /* # of rdrs in crit section */ -+ int m_wrcnt; /* # of wrtrs in crit section */ -+ int m_rdwcnt; /* # of waiting readers */ -+ int m_wrwcnt; /* # of waiting writers */ -+ struct semaphore m_rdwait; /* sema where readers wait */ -+ struct semaphore m_wrwait; /* sema where writers wait */ -+ pid_t m_wrholder; /* task holding write lock */ -+} crwlock_t; -+ -+extern __inline__ void -+crwlock_init(crwlock_t *l) -+{ -+ l->m_lock = SPIN_LOCK_UNLOCKED; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0) -+ l->m_rdwait = MUTEX_LOCKED; -+ l->m_wrwait = MUTEX_LOCKED; -+#else -+ sema_init(&l->m_rdwait,0); -+ sema_init(&l->m_wrwait,0); -+#endif -+ l->m_rdcnt = l->m_wrcnt = l->m_rdwcnt = l->m_wrwcnt = 0; -+ l->m_wrholder = PID_NONE; -+} -+ -+extern __inline__ void -+crwlock_destroy(crwlock_t *l) -+{ -+ ASSERT(l->m_rdcnt == 0 && l->m_wrcnt == 0); -+} -+ -+/* -+ * If a writer has the lock presently or there are writers waiting, -+ * then we have to wait. -+ */ -+extern __inline__ void -+debug_crwlock_read(crwlock_t *l, char *file, int line) -+{ -+ ASSERT(!in_interrupt()); -+ spin_lock(&l->m_lock); -+ if (l->m_wrcnt || l->m_wrwcnt) { -+ l->m_rdwcnt++; -+ spin_unlock(&l->m_lock); -+ down(&l->m_rdwait); /* P */ -+ } else { -+ l->m_rdcnt++; -+ spin_unlock(&l->m_lock); -+ } -+} -+ -+/* -+ * If we're the last reader, and a writer is waiting, -+ * then let the writer go now. -+ */ -+/* private */ -+extern __inline__ void -+debug_crwlock_read_done(crwlock_t *l, char *file, int line) -+{ -+ spin_lock(&l->m_lock); -+ l->m_rdcnt--; -+ if (l->m_wrwcnt && l->m_rdcnt == 0) { -+ l->m_wrcnt = 1; -+ l->m_wrwcnt--; -+ spin_unlock(&l->m_lock); -+ up(&l->m_wrwait); /* V */ -+ return; -+ } -+ spin_unlock(&l->m_lock); -+} -+ -+extern __inline__ void -+debug_crwlock_write(crwlock_t *l, char *file, int line) -+{ -+ ASSERT(!in_interrupt()); -+ spin_lock(&l->m_lock); -+ if (l->m_wrcnt || l->m_rdcnt) { /* block if lock is in use */ -+ l->m_wrwcnt++; -+ spin_unlock(&l->m_lock); -+ down(&l->m_wrwait); /* P */ -+ } else { /* lock is not in use */ -+ l->m_wrcnt = 1; -+ spin_unlock(&l->m_lock); -+ } -+ l->m_wrholder = current->pid; -+} -+ -+/* private */ -+extern __inline__ void -+debug_crwlock_write_done(crwlock_t *l, char *file, int line) -+{ -+ int rdrs; -+ -+ spin_lock(&l->m_lock); -+ l->m_wrholder = PID_NONE; -+ if (l->m_rdwcnt) { /* let any readers go first */ -+ l->m_wrcnt = 0; -+ rdrs = l->m_rdwcnt; -+ l->m_rdcnt = rdrs; -+ l->m_rdwcnt = 0; -+ spin_unlock(&l->m_lock); -+ while (rdrs--) -+ up(&l->m_rdwait); /* V */ -+ } else if (l->m_wrwcnt) { /* or let any writer go */ -+ l->m_wrwcnt--; -+ spin_unlock(&l->m_lock); -+ up(&l->m_wrwait); /* V */ -+ } else { /* nobody waiting, unlock */ -+ l->m_wrcnt = 0; -+ spin_unlock(&l->m_lock); -+ } -+} -+ -+extern __inline__ void -+debug_crwlock_done(crwlock_t *l, char *file, int line) -+{ -+ if (l->m_wrholder == current->pid) -+ debug_crwlock_write_done(l, file, line); -+ else -+ debug_crwlock_read_done(l, file, line); -+} -+ -+/* -+ * Return nonzero if lock is held -+ */ -+extern __inline__ int -+debug_crwlock_held(crwlock_t *l, crwlock_type_t t, char *file, int line) -+{ -+ int res; -+ -+ spin_lock(&l->m_lock); -+ switch(t) { -+ case RD: -+ res = l->m_rdcnt; -+ break; -+ case WRT: -+ res = l->m_wrcnt; -+ break; -+ case ANY: -+ res = l->m_wrcnt + l->m_rdcnt; -+ break; -+ } -+ spin_unlock(&l->m_lock); -+ -+ return res; -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_RWLOCK_H */ -diff -urN clean/include/qsnet/ctrl_linux.h linux-2.6.9/include/qsnet/ctrl_linux.h ---- clean/include/qsnet/ctrl_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/ctrl_linux.h 2003-03-26 04:32:03.000000000 -0500 -@@ -0,0 +1,37 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_CTRL_LINUX_H -+#define __QSNET_CTRL_LINUX_H -+ -+#ident "$Id: ctrl_linux.h,v 1.3 2003/03/26 09:32:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/ctrl_linux.h,v $*/ -+ -+#define QSNETIO_USER_BASE 0x40 -+ -+#define QSNETIO_DEBUG_DUMP _IO ('e', QSNETIO_USER_BASE + 0) -+ -+typedef struct qsnetio_debug_buffer_struct -+{ -+ caddr_t addr; -+ size_t len; -+} QSNETIO_DEBUG_BUFFER_STRUCT; -+#define QSNETIO_DEBUG_BUFFER _IOWR ('e', QSNETIO_USER_BASE + 1, QSNETIO_DEBUG_BUFFER_STRUCT) -+ -+typedef struct qsnetio_debug_kmem_struct -+{ -+ void *handle; -+} QSNETIO_DEBUG_KMEM_STRUCT; -+#define QSNETIO_DEBUG_KMEM _IOWR ('e', QSNETIO_USER_BASE + 2, QSNETIO_DEBUG_KMEM_STRUCT) -+ -+#endif /* __QSNET_CTRL_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/qsnet/debug.h linux-2.6.9/include/qsnet/debug.h ---- clean/include/qsnet/debug.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/debug.h 2005-03-23 06:04:54.000000000 -0500 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+#ifndef _QSNET_DEBUG_H -+#define _QSNET_DEBUG_H -+ -+#if defined(DIGITAL_UNIX) -+#include -+#elif defined(LINUX) -+extern int qsnet_assfail (char *ex, const char *func, char *file, int line); -+ -+#define ASSERT(EX) do { \ -+ if (!(EX) && qsnet_assfail (#EX, __FUNCTION__, __BASE_FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#endif /* DIGITAL_UNIX */ -+ -+/* debug.c */ -+extern void qsnet_debug_init(void); -+extern void qsnet_debug_fini(void); -+extern void qsnet_debug_disable(int); -+extern void qsnet_debug_alloc(void); -+ -+#define QSNET_DEBUG_BUFFER ((unsigned int)(0x01)) -+#define QSNET_DEBUG_CONSOLE ((unsigned int)(0x02)) -+#define QSNET_DEBUG_BUF_CON ( QSNET_DEBUG_BUFFER | QSNET_DEBUG_CONSOLE ) -+ -+#ifdef __GNUC__ -+extern void qsnet_debugf (unsigned int mode, const char *fmt, ...) -+ __attribute__ ((format (printf,2,3))); -+extern void kqsnet_debugf (char *fmt, ...) -+ __attribute__ ((format (printf,1,2))); -+#else -+extern void qsnet_debugf (unsigned int mode, const char *fmt, ...); -+extern void kqsnet_debugf (char *fmt, ...); -+#endif -+extern void qsnet_vdebugf (unsigned int mode, const char *prefix, const char *fmt, va_list ap); -+extern int qsnet_debug_buffer(caddr_t ubuffer, int len); -+extern int qsnet_debug_dump (void); -+extern int qsnet_debug_kmem (void *handle); -+ -+extern void qsnet_debug_buffer_on(void); -+extern void qsnet_debug_buffer_clear(void); -+extern void qsnet_debug_buffer_mark(char *str); -+ -+#endif /* _QSNET_DEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/qsnet/kcompat.h linux-2.6.9/include/qsnet/kcompat.h ---- clean/include/qsnet/kcompat.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/kcompat.h 2005-07-05 11:09:03.000000000 -0400 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KCOMPAT_H -+#define __QSNET_KCOMPAT_H -+ -+#ident "$Id: kcompat.h,v 1.1.2.1 2005/07/05 15:09:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kcompat.h,v $*/ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -+ -+#define module_param(VAR,TYPE,PERM) MODULE_PARM(VAR,"i") -+ -+#endif /* KERNEL_VERSION(2,6,0) */ -+ -+#endif /* __QSNET_KCOMPAT_H */ -+ -+ -+ -+ -+ -+ -+ -diff -urN clean/include/qsnet/kernel.h linux-2.6.9/include/qsnet/kernel.h ---- clean/include/qsnet/kernel.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/kernel.h 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,39 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KERNEL_H -+#define __QSNET_KERNEL_H -+ -+#ident "$Id: kernel.h,v 1.8.18.1 2005/07/20 11:35:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel.h,v $*/ -+ -+#include -+#include -+ -+#if defined(SOLARIS) -+#include -+#endif -+ -+#if defined(DIGITAL_UNIX) -+#include -+#endif -+ -+#if defined(LINUX) -+#include -+#endif -+ -+#include -+#include -+ -+#endif /* __QSNET_KERNEL_H */ -+ -+ -+ -+ -+ -+ -+ -diff -urN clean/include/qsnet/kernel_linux.h linux-2.6.9/include/qsnet/kernel_linux.h ---- clean/include/qsnet/kernel_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/kernel_linux.h 2005-09-07 10:35:03.000000000 -0400 -@@ -0,0 +1,374 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KERNEL_LINUX_H -+#define __QSNET_KERNEL_LINUX_H -+ -+#ident "$Id: kernel_linux.h,v 1.69.2.3 2005/09/07 14:35:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel_linux.h,v $*/ -+ -+#include -+#if defined(MODVERSIONS) -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0) -+#include -+#else -+#include -+#endif -+#endif -+ -+#include -+#include -+ -+/* ASSERT(spin_is_locked(l)) would always fail on UP kernels */ -+#if defined(CONFIG_SMP) -+#define SPINLOCK_HELD(l) spin_is_locked(l) -+#else -+#define SPINLOCK_HELD(l) (1) -+#endif -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+ -+#if defined(LINUX_ALPHA) -+# include /* for TSUNAMI_MEM */ -+#endif -+ -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0) -+# undef MOD_INC_USE_COUNT -+# undef MOD_DEC_USE_COUNT -+# define MOD_INC_USE_COUNT -+# define MOD_DEC_USE_COUNT -+#endif -+ -+#define MIN(a,b) ((a) > (b) ? (b) : (a)) -+#define MAX(a,b) ((a) > (b) ? (a) : (b)) -+ -+/* stray types */ -+typedef u64 u_longlong_t; -+typedef unsigned long uintptr_t; -+typedef int bool_t; -+ -+typedef unsigned long virtaddr_t; /* virtual address */ -+typedef unsigned long ioaddr_t; /* io address */ -+typedef unsigned long sdramaddr_t; /* elan sdram offset */ -+ -+/* 386 kernel can be compiled with PAE enabled to use a 44 bit physical address */ -+#if defined(CONFIG_X86_PAE) -+typedef unsigned long long physaddr_t; -+#else -+typedef unsigned long physaddr_t; -+#endif -+ -+/* ticks since reboot, and tick freq */ -+#define lbolt jiffies -+#define hz HZ -+ -+/* System page size and friends */ -+#define PAGESIZE PAGE_SIZE -+#define PAGESHIFT PAGE_SHIFT -+#define PAGEOFFSET (PAGE_SIZE - 1) -+#define PAGEMASK PAGE_MASK -+ -+#define PAGE_ALIGNED(a) (((a) & PAGE_MASK) == a) -+ -+/* convert between bytes and pages */ -+#define btop(b) ((unsigned long)(b) >> PAGE_SHIFT) /* rnd down */ -+#define btopr(b) btop(PAGE_ALIGN((unsigned long) b)) /* rnd up */ -+#define ptob(p) ((unsigned long)(p) << PAGE_SHIFT) -+ -+/* round up sz to the nearest multiple of blk */ -+#define roundup(sz,blk) ((blk) * ((sz) / (blk) + ((sz) % (blk) ? 1 : 0))) -+ -+/* send a signal to a process */ -+#define psignal(pr,sig) send_sig(sig,pr,0) -+ -+/* microsecond delay */ -+#define DELAY(us) udelay(us) -+ -+/* macro macros */ -+#define MACRO_BEGIN do { -+#define MACRO_END } while (0) -+ -+/* D-Unix compatable errno values */ -+#define ESUCCESS 0 -+#define EFAIL 255 -+ -+/* ASSERT(NO_LOCKS_HELD) will be a no-op */ -+#define NO_LOCKS_HELD 1 -+ -+/* misc */ -+typedef int label_t; -+#define on_fault(ljp) ((ljp) == NULL) -+#define _NOTE(X) -+#define no_fault() ((void) 0) -+#define panicstr 0 -+ -+/* return from system call is -EXXX on linux */ -+#define set_errno(e) (-(e)) -+ -+/* -+ * BSD-style byte ops -+ */ -+ -+#define bcmp(src1,src2,len) memcmp(src1,src2,len) -+#define bzero(dst,len) memset(dst,0,len) -+#define bcopy(src,dst,len) memcpy(dst,src,len) -+ -+#define preemptable_start do { long must_yield_at = lbolt + (hz/10); -+#define preemptable_end } while (0) -+#define preemptable_check() do {\ -+ if ((lbolt - must_yield_at) > 0)\ -+ {\ -+ preemptable_yield() ; \ -+ must_yield_at = lbolt + (hz/10);\ -+ }\ -+ } while (0) -+ -+#define preemptable_yield() schedule() -+ -+#define CURPROC() current -+#define CURTHREAD() current -+#define SUSER() suser() -+ -+/* 64 bit IO operations on 32 bit intel cpus using MMX */ -+#if defined(LINUX_I386) -+extern u64 qsnet_readq (volatile u64 *ptr); -+extern void qsnet_writeq (u64 value, volatile u64 *ptr); -+ -+#define readq(ptr) qsnet_readq((void *) ptr) -+#define writeq(val,ptr) qsnet_writeq(val, (void *)ptr) -+#endif -+ -+/* -+ * Memory barriers -+ */ -+#ifndef mmiob -+# define mmiob() mb() -+#endif -+ -+/* -+ * Exit handlers -+ */ -+#define HANDLER_REGISTER(func,arg,flags) xa_handler_register(func,arg,flags) -+#define HANDLER_UNREGISTER(func,arg,flags) xa_handler_unregister(func,arg,flags) -+ -+/* -+ * KMEM_GETPAGES and KMEM_ALLOC both call kmem_alloc, which -+ * translates the call to kmalloc if < PAGE_SIZE, or vmalloc -+ * if >= PAGE_SIZE. vmalloc will always return a page-aligned -+ * region rounded up to the nearest page, while kmalloc will -+ * return bits and pieces of a page. -+ */ -+ -+#ifdef KMEM_DEBUG -+extern void *qsnet_kmem_alloc_debug(int len, int sleep, int zerofill, char *file, int line); -+extern void qsnet_kmem_free_debug(void *ptr, int len, char *file, int line); -+#define KMEM_ALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc_debug(len,sleep,0,__FILE__,__LINE__); } -+#define KMEM_ZALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc_debug(len,sleep,1,__FILE__,__LINE__); } -+ -+#define KMEM_FREE(ptr,len) qsnet_kmem_free_debug((void *)ptr,len,__FILE__,__LINE__) -+ -+#else -+ -+extern void *qsnet_kmem_alloc(int len, int sleep, int zerofill); -+extern void qsnet_kmem_free(void *ptr, int len); -+ -+#define KMEM_ALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc(len,sleep,0); } -+#define KMEM_ZALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc(len,sleep,1); } -+ -+#define KMEM_FREE(ptr,len) qsnet_kmem_free((void *)ptr,len) -+ -+#endif -+extern void qsnet_kmem_display(void *handle); -+extern physaddr_t kmem_to_phys(void *ptr); -+ -+#define KMEM_ASSERT(sleep) ASSERT(!(in_interrupt() && sleep)) -+ -+ -+#define KMEM_GETPAGES(ptr,type,pgs,sleep) KMEM_ZALLOC(ptr,type,ptob(pgs),sleep) -+#define KMEM_FREEPAGES(ptr,pgs) KMEM_FREE(ptr,ptob(pgs)); -+ -+/* -+ * Copying from user space -> kernel space (perms checked) -+ */ -+#define copyin(up,kp,size) copy_from_user(kp,up,size) -+#define copyin_noerr(up,kp,size) copy_from_user(kp,up,size) -+ -+/* get_user() gets xfer width right */ -+#define fulinux(ret, up) (get_user(ret, (up)) == 0 ? ret : -1) -+#define fulinuxp(ret, up) (get_user(ret, (up)) == 0 ? ret : NULL) -+ -+extern __inline__ int fubyte (u8 *up) { u8 ret; return fulinux(ret, up);} -+extern __inline__ int fusword (u16 *up) { u16 ret; return fulinux(ret, up);} -+extern __inline__ int fuword (u32 *up) { u32 ret; return fulinux(ret, up);} -+#if BITS_PER_LONG > 32 -+extern __inline__ u64 fulonglong(u64 *up) { u64 ret; return fulinux(ret, up);} -+#else -+extern __inline__ u64 fulonglong(u64 *up) { return ((u64) fuword((u32 *)up) | (((u64) fuword(((u32 *)up)+1))<<32)); } -+#endif -+extern __inline__ void *fuptr (void **up) { void *ret; return fulinuxp(ret,up);} -+ -+#define fubyte_noerr(up) fubyte(up) -+#define fusword_noerr(up) fusword(up) -+#define fuword_noerr(up) fuword(up) -+#define fulonglong_noerr(up) fulonglong(up) -+#define fuptr_noerr(up) fuptr(up) -+ -+extern __inline__ int copyinstr(char *up, char *kp, int max, int *size) -+{ -+ for (*size = 1; *size <= max; (*size)++) { -+ if (get_user(*kp, up++) != 0) -+ return EFAULT; /* bad user space addr */ -+ if (*kp++ == '\0') -+ return 0; /* success */ -+ } -+ *size = max; -+ return ENAMETOOLONG; /* runaway string */ -+} -+ -+/* -+ * Copying from kernel space -> user space (perms checked) -+ */ -+ -+#define copyout(kp,up,size) copy_to_user(up,kp,size) -+#define copyout_noerr(kp,up,size) copy_to_user(up,kp,size) -+ -+/* put_user() gets xfer width right */ -+#define sulinux(val, up) (put_user(val, (up)) == 0 ? 0 : -1) -+ -+extern __inline__ int subyte (u8 *up, u8 val) { return sulinux(val, up); } -+extern __inline__ int susword (u16 *up, u16 val) { return sulinux(val, up); } -+extern __inline__ int suword (u32 *up, u32 val) { return sulinux(val, up); } -+#if BITS_PER_LONG > 32 -+extern __inline__ int sulonglong(u64 *up, u64 val) { return sulinux(val, up); } -+#else -+extern __inline__ int sulonglong(u64 *up, u64 val) { return (suword((u32 *) up, (u32) val) == 0 ? -+ suword(((u32 *) up)+1, (u32) (val >> 32)) : -1); } -+#endif -+extern __inline__ int suptr (void **up,void *val){ return sulinux(val, up); } -+ -+#define subyte_noerr(up,val) subyte(up,val) -+#define susword_noerr(up,val) susword(up,val) -+#define suword_noerr(up,val) suword(up,val) -+#define sulonglong_noerr(up,val) sulonglong(up,val) -+#define suptr_noerr(up,val) suptr(up,val) -+ -+/* -+ * /proc/qsnet interface -+ */ -+extern inline int -+str_append(char *buf, char *add, int size) -+{ -+#define TRUNC_MSG "[Output truncated]\n" -+ int full = 0; -+ int max = size - strlen(TRUNC_MSG) - strlen(add) - 1; -+ -+ if (strlen(buf) > max) { -+ strcat(buf, TRUNC_MSG); -+ full = 1; -+ } else -+ strcat(buf, add); -+ return full; -+} -+ -+/* Spinlocks */ -+#define spin_lock_destroy(l) ((void) 0) -+ -+/* Complex - Reader/Writer locks - we added */ -+typedef crwlock_t krwlock_t; -+#define krwlock_init(l) crwlock_init(l) -+#define krwlock_destroy(l) crwlock_destroy(l) -+#define krwlock_write(l) crwlock_write(l) -+#define krwlock_read(l) crwlock_read(l) -+#define krwlock_done(l) crwlock_done(l) -+#define krwlock_is_locked(l) crwlock_held(l) -+#define krwlock_is_write_locked(l) crwlock_write_held(l) -+#define krwlock_is_read_locked(l) crwlock_read_held(l) -+ -+/* -+ * Timeouts - Solaris style. -+ */ -+typedef struct timer_list timer_fn_t; -+ -+extern inline void -+schedule_timer_fn(timer_fn_t *timer, void (*fun)(void *), void *arg, long hz_delay) -+{ -+ init_timer(timer); -+ -+ timer->function = (void (*)(unsigned long)) fun; -+ timer->data = (unsigned long) arg; -+ timer->expires = jiffies + hz_delay; -+ -+ add_timer(timer); -+} -+ -+/* returns 1 if timer_fn was cancelled */ -+extern inline int -+cancel_timer_fn(timer_fn_t *timer) -+{ -+ return (del_timer_sync(timer)); -+} -+ -+extern inline int -+timer_fn_queued(timer_fn_t *timer) -+{ -+ return (timer_pending (timer)); -+} -+/* -+ * Hold/release CPU's. -+ */ -+ -+extern void cpu_hold_all(void); -+extern void cpu_release_all(void); -+#define CAPTURE_CPUS() cpu_hold_all() -+#define RELEASE_CPUS() cpu_release_all() -+ -+#define IASSERT ASSERT -+ -+/* code to support multipage procfs entries */ -+ -+typedef struct display_info { -+ void (*func)(long, char *, ...); -+ long arg; -+} DisplayInfo; -+ -+typedef struct qsnet_proc_private -+{ -+ struct nodeset_private *pr_next; -+ void *pr_user_data; -+ char *pr_data; -+ int pr_data_len; -+ unsigned pr_off; -+ unsigned pr_len; -+ DisplayInfo pr_di; -+} QSNET_PROC_PRIVATE; -+ -+#endif /* __QSNET_KERNEL_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/qsnet/kpte.h linux-2.6.9/include/qsnet/kpte.h ---- clean/include/qsnet/kpte.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/kpte.h 2005-03-18 08:56:40.000000000 -0500 -@@ -0,0 +1,132 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KPTE_H -+#define __QSNET_KPTE_H -+ -+#ident "@(#)$Id: kpte.h,v 1.5 2005/03/18 13:56:40 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/qsnet/kpte.h,v $*/ -+ -+#include -+ -+#ifdef NO_RMAP -+# define pte_offset_kernel pte_offset -+# define pte_offset_map pte_offset -+# define pte_unmap(A) do { ; } while (0) -+#endif -+ -+/* -+ * Pte stuff -+ */ -+static __inline__ struct mm_struct * -+get_kern_mm(void) -+{ -+ return &init_mm; -+} -+ -+static __inline__ pte_t * -+find_pte_map(struct mm_struct *mm, unsigned long vaddr) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *ptep; -+ -+/* XXXX - need to handle huge tlb code */ -+ pgd = pgd_offset(mm, vaddr); -+ if (pgd_none(*pgd) || pgd_bad(*pgd)) -+ goto out; -+ -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10) -+ { -+ pud_t *pud = pud_offset(pgd, vaddr); -+ if (pud_none(*pud) || pud_bad(*pud)) -+ goto out; -+ -+ pmd = pmd_offset(pud, vaddr); -+ } -+#else -+ pmd = pmd_offset(pgd, vaddr); -+#endif -+ if (pmd_none(*pmd) || pmd_bad (*pmd)) -+ goto out; -+ -+ ptep = pte_offset_map (pmd, vaddr); -+ if (! ptep) -+ goto out; -+ -+ if (pte_present (*ptep)) -+ return ptep; -+ -+ pte_unmap (ptep); -+out: -+ return NULL; -+} -+ -+static __inline__ pte_t * -+find_pte_kernel(unsigned long vaddr) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ pgd = pgd_offset_k(vaddr); -+ if (pgd && !pgd_none(*pgd)) { -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10) -+ pud_t *pud = pud_offset(pgd, vaddr); -+ if (pud && !pud_none(*pud)) { -+ pmd = pmd_offset(pud, vaddr); -+#else -+ pmd = pmd_offset(pgd, vaddr); -+#endif -+ if (pmd && pmd_present(*pmd)) { -+ pte = pte_offset_kernel(pmd, vaddr); -+ if (pte && pte_present(*pte)) -+ return (pte); -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10) -+ } -+#endif -+ } -+ } -+ return (NULL); -+} -+ -+static __inline__ physaddr_t -+pte_phys(pte_t pte) -+{ -+#if defined(LINUX_ALPHA) -+ /* RedHat 7.1 2.4.3-12 -+ * They have now enabled Monster windows on Tsunami -+ * and so can use the Main's phys pte value -+ */ -+ return (pte_val(pte) >> (32-PAGE_SHIFT)); -+#elif defined(LINUX_I386) || defined(LINUX_X86_64) -+#if defined(_PAGE_NX) -+ return (pte_val(pte) & ~((1 << PAGE_SHIFT)-1) & ~_PAGE_NX); -+#else -+ return (pte_val(pte) & ~((1 << PAGE_SHIFT)-1)); -+#endif -+#elif defined(LINUX_SPARC) -+ return (pte_val(pte) & _PAGE_PADDR); -+#elif defined(LINUX_IA64) -+ return (pte_val(pte) & _PFN_MASK); -+#else -+#error Unknown architecture -+#endif -+} -+ -+#ifndef page_to_pfn -+#define page_to_pfn(page) (pte_phys(mk_pte(page, __pgprot(0))) >> PAGE_SHIFT) -+#endif -+ -+#endif /* __QSNET_KPTE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -diff -urN clean/include/qsnet/kthread.h linux-2.6.9/include/qsnet/kthread.h ---- clean/include/qsnet/kthread.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/kthread.h 2004-10-28 07:50:29.000000000 -0400 -@@ -0,0 +1,71 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KTHREAD_H -+#define __QSNET_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.1 2004/10/28 11:50:29 david Exp $ $Name: QSNETMODULES-5-11-3_20050907 $" -+/* $Source: /cvs/master/quadrics/qsnet/kthread.h,v $*/ -+ -+#include -+ -+/* -+ * kernel threads -+ */ -+extern __inline__ void -+kernel_thread_init(char *comm) -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#ifndef NO_NPTL -+# define sigmask_lock sighand->siglock -+#endif -+ lock_kernel(); -+ daemonize(); -+ reparent_to_init(); -+ -+ /* avoid getting signals */ -+ spin_lock_irq(¤t->sigmask_lock); -+ flush_signals(current); -+ sigfillset(¤t->blocked); -+ -+#ifdef NO_NPTL -+ recalc_sigpending(current); -+#else -+ recalc_sigpending(); -+#endif -+ -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ /* set our name for identification purposes */ -+ strncpy(current->comm, comm, sizeof(current->comm)); -+ -+ unlock_kernel(); -+#else -+ daemonize(comm); -+#endif -+} -+ -+extern __inline__ void * -+kernel_thread_wrap(caddr_t stk, int stksize, void (*proc)(void *), void *arg) -+{ -+ ASSERT(stk == NULL && stksize == 0); -+ kernel_thread((int (*)(void *))proc, arg, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); -+ return (void *)1; /* non-null value */ -+} -+ -+#define kernel_thread_create(proc,arg) kernel_thread_wrap(NULL,0,(void (*)(void *))proc,arg) -+#define kernel_thread_exit() ((void) 0) -+#define kernel_thread_become_highpri() ((void) 0) -+ -+#endif /* __QSNET_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/qsnet/list.h linux-2.6.9/include/qsnet/list.h ---- clean/include/qsnet/list.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/list.h 2003-10-27 08:55:33.000000000 -0500 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: list.h,v 1.5 2003/10/27 13:55:33 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/list.h,v $*/ -+ -+#ifndef __QSNET_LIST_H -+#define __QSNET_LIST_H -+ -+/* Implementation of doubly linked lists - compatible with linux */ -+struct list_head -+{ -+ struct list_head *next; -+ struct list_head *prev; -+}; -+ -+#if !defined(LINUX) -+#if ! defined( offsetof ) -+#define offsetof(T,F) ((int )&(((T *)0)->F)) -+#endif -+ -+#define LIST_HEAD_INIT(name) { &(name), &(name) } -+ -+#define LIST_HEAD(name) \ -+ struct list_head name = LIST_HEAD_INIT(name) -+#endif -+ -+#define list_entry(ptr, type, off) \ -+ ((type *) ((unsigned long)(ptr) - offsetof (type,off))) -+ -+#define INIT_LIST_HEAD(list) \ -+MACRO_BEGIN \ -+ (list)->next = (list)->prev = (list); \ -+MACRO_END -+ -+#define list_add(new, list) \ -+MACRO_BEGIN \ -+ (list)->next->prev = (new); \ -+ (new)->next = (list)->next; \ -+ (new)->prev = (list); \ -+ (list)->next = (new); \ -+MACRO_END -+ -+#define list_add_tail(new, list) \ -+MACRO_BEGIN \ -+ (list)->prev->next = new; \ -+ (new)->prev = (list)->prev; \ -+ (new)->next = (list); \ -+ (list)->prev = (new); \ -+MACRO_END -+ -+#define list_del(entry) \ -+MACRO_BEGIN \ -+ (entry)->prev->next = (entry)->next; \ -+ (entry)->next->prev = (entry)->prev; \ -+MACRO_END -+ -+#define list_del_init(entry) \ -+MACRO_BEGIN \ -+ (entry)->prev->next = (entry)->next; \ -+ (entry)->next->prev = (entry)->prev; \ -+ (entry)->next = (entry)->prev = (entry); \ -+MACRO_END -+ -+#define list_empty(list) \ -+ ((list)->next == (list)) -+ -+#define list_for_each(pos,list) \ -+ for (pos = (list)->next; pos != (list); \ -+ pos = (pos)->next) -+ -+#define list_for_each_safe(pos,n,list) \ -+ for (pos = (list)->next, n = (pos)->next; pos != (list); \ -+ pos = n, n = (pos)->next) -+ -+#endif /* __QSNET_LIST_H */ -diff -urN clean/include/qsnet/module.h linux-2.6.9/include/qsnet/module.h ---- clean/include/qsnet/module.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/module.h 2005-09-07 10:35:04.000000000 -0400 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_MODULE_H -+#define __QSNET_MODULE_H -+ -+#ident "$Id: module.h,v 1.1.2.1 2005/09/07 14:35:04 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/module.h,v $*/ -+ -+#include -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) -+#include -+#endif -+ -+#endif /* __QSNET_MODULE_H */ -+ -+ -+ -+ -+ -+ -+ -diff -urN clean/include/qsnet/mutex.h linux-2.6.9/include/qsnet/mutex.h ---- clean/include/qsnet/mutex.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/mutex.h 2003-06-26 12:05:45.000000000 -0400 -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+#if !defined(_LINUX_MUTEX_H) -+#define _LINUX_MUTEX_H -+#if defined(__KERNEL__) -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define PID_NONE 0 -+ -+typedef struct -+{ -+ struct semaphore sem; -+ pid_t holder; -+} kmutex_t; -+ -+extern __inline__ void -+kmutex_init (kmutex_t *l) -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0) -+ l->sem = MUTEX; -+#else -+ init_MUTEX(&l->sem); -+#endif -+ l->holder = PID_NONE; -+} -+ -+extern __inline__ void -+kmutex_destroy (kmutex_t *l) -+{ -+ ASSERT (l->holder == PID_NONE); -+} -+ -+extern __inline__ void -+kmutex_lock (kmutex_t *l) -+{ -+ ASSERT(l->holder != current->pid); -+ down (&l->sem); -+ l->holder = current->pid; -+} -+ -+extern __inline__ void -+kmutex_unlock (kmutex_t *l) -+{ -+ ASSERT(l->holder == current->pid); -+ -+ l->holder = PID_NONE; -+ up (&l->sem); -+} -+ -+extern __inline__ int -+kmutex_trylock (kmutex_t *l) -+{ -+ if (down_trylock (&l->sem) == 0) -+ { -+ l->holder = current->pid; -+ return (1); -+ } -+ return (0); -+} -+ -+extern __inline__ int -+kmutex_is_locked (kmutex_t *l) -+{ -+ return (l->holder == current->pid); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_MUTEX_H */ -diff -urN clean/include/qsnet/procfs_linux.h linux-2.6.9/include/qsnet/procfs_linux.h ---- clean/include/qsnet/procfs_linux.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/procfs_linux.h 2005-07-20 07:35:37.000000000 -0400 -@@ -0,0 +1,263 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __PROCFS_LINUX_H -+#define __PROCFS_LINUX_H -+ -+#ident "$Id: procfs_linux.h,v 1.13.2.2 2005/07/20 11:35:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/procfs_linux.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+#include -+#include -+#include -+ -+extern gid_t qsnet_procfs_gid; -+ -+/* borrowed from fs/proc/proc_misc - helper for proc_read_int */ -+static inline int -+qsnet_proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) -+{ -+ if (len <= off+count) *eof = 1; -+ *start = page + off; -+ len -= off; -+ if (len>count) len = count; -+ if (len<0) len = 0; -+ return len; -+} -+ -+static inline int -+qsnet_proc_write_int(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char tmpbuf[16]; -+ int res = count; -+ -+ if (count > sizeof(tmpbuf) - 1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user(tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ *(int *)data = simple_strtoul(tmpbuf, NULL, 0); -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_int(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ MOD_INC_USE_COUNT; -+ -+ len = sprintf(page, "%d\n", *(int *)data); -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_int(struct proc_dir_entry *dir, char *path, int *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_int; -+ p->read_proc = qsnet_proc_read_int; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+static inline int -+qsnet_proc_write_hex(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char tmpbuf[16]; -+ int res = count; -+ -+ if (count > sizeof(tmpbuf) - 1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user(tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ *(int *)data = simple_strtoul(tmpbuf, NULL, 0); -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_hex(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ MOD_INC_USE_COUNT; -+ -+ len = sprintf(page, "0x%x\n", *(int *)data); -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_hex(struct proc_dir_entry *dir, char *path, int *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_hex; -+ p->read_proc = qsnet_proc_read_hex; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+#define QSNET_PROC_STR_LEN_MAX ((int)256) -+ -+static inline int -+qsnet_proc_write_str(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ int res = count; -+ -+ if (count > (QSNET_PROC_STR_LEN_MAX - 1)) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user((char *)data, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ ((char *)data)[count] = '\0'; -+ /* remove linefeed */ -+ if ( (count) && (((char *)data)[count -1] == '\n')) -+ ((char *)data)[count -1] = '\0'; -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_str(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ if ( strlen(data) > (count + 1)) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ /* cant output too much */ -+ if ( strlen(data) > (count + 1)) -+ { -+ MOD_DEC_USE_COUNT; -+ return (-EINVAL); -+ } -+ -+ -+ len = sprintf(page, "%s\n", (char *)data); -+ if (len > count) -+ { -+ MOD_DEC_USE_COUNT; -+ return (-EINVAL); -+ } -+ -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_str(struct proc_dir_entry *dir, char *path, char *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_str; -+ p->read_proc = qsnet_proc_read_str; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+extern struct proc_dir_entry *qsnet_procfs_root; -+extern struct proc_dir_entry *qsnet_procfs_config; -+ -+/* code for procfs handling multipage requests */ -+ -+void qsnet_proc_character_fill (long mode, char *fmt, ...); -+int qsnet_proc_release (struct inode *inode, struct file *file); -+ -+static inline ssize_t -+qsnet_proc_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ QSNET_PROC_PRIVATE *pr = (QSNET_PROC_PRIVATE *) file->private_data; -+ int error; -+ -+ if (pr->pr_off >= pr->pr_len) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_data + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ return (count); -+} -+ -+ -+#ifdef NO_PDE -+static inline struct proc_dir_entry *PDE(const struct inode *inode) -+{ -+ return inode->u.generic_ip; -+} -+#endif -+#endif /* __KERNEL__ */ -+ -+#define QSNET_PROCFS_IOCTL "/proc/qsnet/ioctl" -+#define QSNET_PROCFS_KMEM_DEBUG "/proc/qsnet/kmem_debug" -+#define QSNET_PROCFS_VERSION "/proc/qsnet/version" -+ -+#endif /* __PROCFS_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -urN clean/include/qsnet/types.h linux-2.6.9/include/qsnet/types.h ---- clean/include/qsnet/types.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/types.h 2003-08-01 12:21:38.000000000 -0400 -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_TYPES_H -+#define __QSNET_TYPES_H -+ -+#ident "$Id: types.h,v 1.16 2003/08/01 16:21:38 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/types.h,v $*/ -+ -+/* -+ * Include typedefs for ISO/IEC 9899:1990 standard types -+ * -+ * -+ * The following integer typedefs are used: -+ * -+ * int8_t, int16_t, int32_t, int64_t, intptr_t -+ * uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t -+ * uchar_t, ushort_t, uint_t, ulong_t -+ * -+ * also defines the following: -+ * u_char, u_short, u_int, u_long, caddr_t -+ */ -+ -+#include -+ -+#if defined(SOLARIS) && defined(__KERNEL__) -+# include -+#endif -+ -+#if defined(SOLARIS) && !defined(__KERNEL__) -+# include -+# include -+#endif -+ -+#if defined(DIGITAL_UNIX) && defined(__KERNEL__) -+# include -+#endif -+ -+#if defined(DIGITAL_UNIX) && !defined(__KERNEL__) -+# include -+# include -+#endif -+ -+#if defined(LINUX) && defined(__KERNEL__) -+# include -+#endif -+ -+#if defined(LINUX) && !defined(__KERNEL__) -+# include -+# include -+# include -+ -+typedef unsigned char uchar_t; -+typedef unsigned short ushort_t; -+typedef unsigned int uint_t; -+typedef unsigned long ulong_t; -+#endif -+ -+#if defined(QNX) -+# include -+# include -+#endif -+ -+/* Define a type that will represent a Main CPU pointer -+ * on both the Main and the Elan -+ */ -+#ifdef __ELAN__ -+ -+#if defined(_MAIN_LP64) -+#define QSNET_MAIN_PTR uint64_t -+#else -+#define QSNET_MAIN_PTR uint32_t -+#endif -+ -+#else -+ -+#ifdef _LP64 -+#define QSNET_MAIN_PTR uint64_t -+#else -+#define QSNET_MAIN_PTR uint32_t -+#endif -+ -+#endif -+ -+ -+#endif /* __QSNET_TYPES_H */ -diff -urN clean/include/qsnet/workarounds.h linux-2.6.9/include/qsnet/workarounds.h ---- clean/include/qsnet/workarounds.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/qsnet/workarounds.h 2002-08-09 07:15:55.000000000 -0400 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _QSNET_WORKAROUNDS_H -+#define _QSNET_WORKAROUNDS_H -+ -+#ident "$Id: workarounds.h,v 1.11 2002/08/09 11:15:55 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/workarounds.h,v $ */ -+ -+/* Elan workarounds */ -+#undef ELAN_REVA_SUPPORTED /* rev a elans no longer supported. */ -+#undef ELITE_REVA_SUPPORTED /* removed since RMS disables broadcast on rev A elites. */ -+#define ELAN_REVB_BUG_1 -+/* WORKAROUND for GNAT hw-elan3/3263 */ -+#define ELAN_REVB_BUG_2 -+ -+/* WORKAROUND for GNATs ic-elan3/3637 & ic-elan3/3550 */ -+#define ELAN_REVB_BUG_3 -+ -+#endif /* _QSNET_WORKAROUNDS_H */ -diff -urN clean/include/rms/rmscall.h linux-2.6.9/include/rms/rmscall.h ---- clean/include/rms/rmscall.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/rms/rmscall.h 2005-07-28 06:49:09.000000000 -0400 -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * rmscall.h: user interface to rms kernel module -+ * -+ * $Id: rmscall.h,v 1.27.2.1 2005/07/28 10:49:09 robin Exp $ -+ * $Source: /cvs/master/quadrics/rmsmod/rmscall.h,v $ -+ * -+ */ -+ -+#ifndef RMSCALL_H_INCLUDED -+#define RMSCALL_H_INCLUDED 1 -+ -+#ident "$Id: rmscall.h,v 1.27.2.1 2005/07/28 10:49:09 robin Exp $" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * flags for rms_fork_register -+ * -+ * RMS_IOF is not in a public header file -+ */ -+#define RMS_IOF 1 /* inherit on fork */ -+ -+#ifndef __KERNEL__ -+#include -+#endif -+ -+#include -+#include -+ -+#define MAXCOREPATHLEN 32 -+ -+#if defined(SOLARIS) -+typedef long long rmstime_t; -+#else /* DIGITAL_UNIX */ -+typedef long rmstime_t; -+#endif -+ -+typedef enum { -+ -+ PRG_RUNNING = 0x01, /* program is running */ -+ PRG_ZOMBIE = 0x02, /* last process on a node has exited */ -+ PRG_NODE = 0x04, /* stats are complete for this node */ -+ PRG_KILLED = 0x08, /* program was killed */ -+ PRG_SUSPEND = 0x10, /* program is suspended */ -+ PRG_ERROR = 0x80 /* error collecting stats */ -+ -+} PRGSTATUS_FLAGS; -+ -+/* -+ * program time statistics extended in version 5 of the kernel module -+ */ -+typedef struct { -+ rmstime_t etime; /* elapsed cpu time (milli-secs) */ -+ rmstime_t atime; /* allocated cpu time (cpu milli-secs) */ -+ rmstime_t utime; /* user cpu time (cpu milli-secs) */ -+ rmstime_t stime; /* system cpu time (cpu milli-secs) */ -+ int ncpus; /* number of cpus allocated */ -+ int flags; /* program status flags */ -+ int mem; /* max memory size in MBytes */ -+ int pageflts; /* number of page faults */ -+ rmstime_t memint; /* memory integral */ -+} prgstats_old_t; -+ -+typedef struct { -+ uint64_t etime; /* elapsed cpu time (milli-secs) */ -+ uint64_t atime; /* allocated cpu time (cpu milli-secs) */ -+ uint64_t utime; /* user cpu time (cpu milli-secs) */ -+ uint64_t stime; /* system cpu time (cpu milli-secs) */ -+ uint64_t pageflts; /* number of page faults */ -+ uint64_t memint; /* memory integral */ -+ uint64_t ebytes; /* data transferred by the Elan(s) */ -+ uint64_t exfers; /* number of Elan data transfers */ -+ uint64_t spare64[4]; /* expansion space */ -+ int ncpus; /* number of cpus allocated */ -+ int flags; /* program status flags */ -+ int mem; /* max memory size in MBytes */ -+ int spare32[5]; /* expansion space */ -+} prgstats_t; -+ -+int rmsmod_init(void); -+void rmsmod_fini(void); -+ -+int rms_setcorepath(caddr_t path); -+int rms_getcorepath(pid_t pid, caddr_t path, int maxlen); -+int rms_prgcreate(int id, uid_t uid, int cpus); -+int rms_prgdestroy(int id); -+int rms_prgids(int maxids, int *prgids, int *nprgs); -+int rms_prginfo(int id, int maxpids, pid_t *pids, int *nprocs); -+int rms_prgaddcap(int id, int index, ELAN_CAPABILITY *cap); -+ -+int rms_prgsuspend(int id); -+int rms_prgresume(int id); -+int rms_prgsignal(int id, int signo); -+ -+int rms_getprgid(pid_t pid, int *id); -+int rms_ncaps(int *ncaps); -+int rms_getcap(int index, ELAN_CAPABILITY *cap); -+int rms_mycap(int *index); -+int rms_setcap(int index, int ctx); -+int rms_prefcap(int nprocess, int *index); -+ -+int rms_prggetstats(int id, prgstats_t *stats); -+void rms_accumulatestats(prgstats_t *total, prgstats_t *stats); -+char *rms_statsreport(prgstats_t *stats, char *buf); -+ -+int rms_elaninitdone(int vp); -+int rms_prgelanpids(int id, int maxpids, int *vps, pid_t *pids, int *npids); -+int rms_setelanstats(int id, uint64_t ebytes, uint64_t exfers); -+ -+int rms_setpset(int psid); -+int rms_getpset(int id, int *psid); -+int rms_modversion(void); -+ -+int rms_addproc(int id, pid_t pid); -+int rms_removeproc(int id, pid_t pid); -+int rms_ptrack_enabled(void); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+ -+#if defined(__KERNEL__) -+ -+int rms_init(void); -+int rms_fini(void); -+int rms_reconfigure(void); -+ -+extern int rms_debug; -+ -+#if 1 -+#define DBG(x) do if (rms_debug) x ; while (0) -+#else -+#define DBG(x) -+#endif -+ -+#endif -+ -+#endif /* RMSCALL_H_INCLUDED */ -+ -+ -+ -+ -diff -urN clean/include/rms/rmsio.h linux-2.6.9/include/rms/rmsio.h ---- clean/include/rms/rmsio.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/include/rms/rmsio.h 2004-08-26 07:49:30.000000000 -0400 -@@ -0,0 +1,194 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: rmsio.h,v 1.7 2004/08/26 11:49:30 duncan Exp $" -+/* $Source: /cvs/master/quadrics/rmsmod/rmsio.h,v $*/ -+ -+ -+#ifndef __RMSMOD_RMSIO_H -+#define __RMSMOD_RMSIO_H -+ -+/* arg is corepath string */ -+#define RMSIO_SETCOREPATH _IOW ('r', 1, char) -+ -+typedef struct rmsio_getcorepath_struct -+{ -+ pid_t pid; -+ char *corepath; -+ int maxlen; -+} RMSIO_GETCOREPATH_STRUCT; -+#define RMSIO_GETCOREPATH _IOW ('r', 2, RMSIO_GETCOREPATH_STRUCT) -+ -+typedef struct rmsio_prgcreate_struct -+{ -+ int id; -+ uid_t uid; -+ int cpus; -+} RMSIO_PRGCREATE_STRUCT; -+#define RMSIO_PRGCREATE _IOW ('r', 3, RMSIO_PRGCREATE_STRUCT) -+ -+typedef struct rmsio_prginfo_struct -+{ -+ int id; -+ int maxpids; -+ pid_t *pids; -+ int *nprocs; -+} RMSIO_PRGINFO_STRUCT; -+#define RMSIO_PRGINFO _IOW ('r', 4, RMSIO_PRGINFO_STRUCT) -+ -+typedef struct rmsio_prgsignal_struct -+{ -+ int id; -+ int signo; -+} RMSIO_PRGSIGNAL_STRUCT; -+#define RMSIO_PRGSIGNAL _IOW ('r', 5, RMSIO_PRGSIGNAL_STRUCT) -+ -+typedef struct rmsio_prgaddcap_struct -+{ -+ int id; -+ int index; -+ ELAN_CAPABILITY *cap; -+} RMSIO_PRGADDCAP_STRUCT; -+#define RMSIO_PRGADDCAP _IOW ('r', 6, RMSIO_PRGADDCAP_STRUCT) -+typedef struct rmsio_setcap_struct -+{ -+ int index; -+ int ctx; -+} RMSIO_SETCAP_STRUCT; -+#define RMSIO_SETCAP _IOW ('r', 7, RMSIO_SETCAP_STRUCT) -+ -+typedef struct rmsio_getcap_struct -+{ -+ int index; -+ ELAN_CAPABILITY *cap; -+} RMSIO_GETCAP_STRUCT; -+#define RMSIO_GETCAP _IOW ('r', 8, RMSIO_GETCAP_STRUCT) -+ -+typedef struct rmsio_getcap_struct32 -+{ -+ int index; -+ unsigned int capptr; -+} RMSIO_GETCAP_STRUCT32; -+#define RMSIO_GETCAP32 _IOW ('r', 8, RMSIO_GETCAP_STRUCT32) -+ -+/* arg is pointer to ncaps */ -+#define RMSIO_NCAPS _IOW ('r', 9, int) -+ -+typedef struct rmsio_prggetstats_struct -+{ -+ int id; -+ prgstats_old_t *stats; -+} RMSIO_PRGGETSTATS_STRUCT; -+#define RMSIO_PRGGETSTATS _IOW ('r', 10, RMSIO_PRGGETSTATS_STRUCT) -+ -+/* arg is program id */ -+#define RMSIO_PRGSUSPEND _IOW ('r', 11, int) -+#define RMSIO_PRGRESUME _IOW ('r', 12, int) -+#define RMSIO_PRGDESTROY _IOW ('r', 13, int) -+ -+typedef struct rmsio_getprgid_struct -+{ -+ pid_t pid; -+ int *id; -+} RMSIO_GETPRGID_STRUCT; -+#define RMSIO_GETPRGID _IOW ('r', 14, RMSIO_GETPRGID_STRUCT) -+ -+typedef struct rmsio_getprgid_struct32 -+{ -+ pid_t pid; -+ unsigned int idptr; -+} RMSIO_GETPRGID_STRUCT32; -+#define RMSIO_GETPRGID32 _IOW ('r', 14, RMSIO_GETPRGID_STRUCT32) -+ -+/* arg is pointer to index */ -+#define RMSIO_GETMYCAP _IOW ('r', 15, int) -+ -+typedef struct rmsio_prgids_struct -+{ -+ int maxids; -+ int *prgids; -+ int *nprgs; -+} RMSIO_PRGIDS_STRUCT; -+#define RMSIO_PRGIDS _IOW ('r', 16, RMSIO_PRGIDS_STRUCT) -+ -+/* arg is pointer to vp */ -+#define RMSIO_ELANINITDONE _IOW ('r', 17, int) -+ -+typedef struct rmsio_prgelanpids_struct -+{ -+ int id; -+ int maxpids; -+ int *vps; -+ int *pids; -+ int *npids; -+} RMSIO_PRGELANPIDS_STRUCT; -+#define RMSIO_PRGELANPIDS _IOW ('r', 18, RMSIO_PRGELANPIDS_STRUCT) -+ -+typedef struct rmsio_setpset_struct -+{ -+ int id; -+ int psid; -+} RMSIO_SETPSET_STRUCT; -+#define RMSIO_SETPSET _IOW ('r', 19, RMSIO_SETPSET_STRUCT) -+ -+typedef struct rmsio_getpset_struct -+{ -+ int id; -+ int *psid; -+} RMSIO_GETPSET_STRUCT; -+#define RMSIO_GETPSET _IOW ('r', 20, RMSIO_GETPSET_STRUCT) -+ -+/* -+ * have to pass a pointer to the stats, the switch -+ * statement goes wrong in the module of the size -+ * is too large -+ */ -+typedef struct { -+ uint64_t ebytes; -+ uint64_t exfers; -+} elanstats_t; -+ -+typedef struct rmsio_setelanstats_struct -+{ -+ int id; -+ elanstats_t *estats; -+} RMSIO_SETELANSTATS_STRUCT; -+#define RMSIO_SETELANSTATS _IOW ('r', 21, RMSIO_SETELANSTATS_STRUCT) -+ -+typedef struct rmsio_prggetstats2_struct -+{ -+ int id; -+ prgstats_t *stats; -+} RMSIO_PRGGETSTATS2_STRUCT; -+#define RMSIO_PRGGETSTATS2 _IOW ('r', 22, RMSIO_PRGGETSTATS2_STRUCT) -+ -+typedef struct rmsio_modversion_struct -+{ -+ int *version; -+} RMSIO_MODVERSION_STRUCT; -+#define RMSIO_MODVERSION _IOW ('r', 23, RMSIO_MODVERSION_STRUCT) -+ -+typedef struct rmsio_proc_struct -+{ -+ int id; -+ pid_t pid; -+} RMSIO_PROC_STRUCT; -+#define RMSIO_ADDPROC _IOW ('r', 24, RMSIO_PROC_STRUCT) -+#define RMSIO_REMOVEPROC _IOW ('r', 25, RMSIO_PROC_STRUCT) -+ -+ -+ -+#endif /* __RMSMOD_RMSIO_H */ -+ -+ -+ -+ -+ -+ -+ + ++EXPORT_SYMBOL_GPL(ioproc_unregister_ops); +Index: linux-269-5502/mm/hugetlb.c +=================================================================== +--- linux-269-5502.orig/mm/hugetlb.c ++++ linux-269-5502/mm/hugetlb.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; + static unsigned long nr_huge_pages, free_huge_pages; +@@ -260,6 +261,7 @@ void zap_hugepage_range(struct vm_area_s + struct mm_struct *mm = vma->vm_mm; + + spin_lock(&mm->page_table_lock); ++ ioproc_invalidate_range(vma, start, start + length); + unmap_hugepage_range(vma, start, start + length); + spin_unlock(&mm->page_table_lock); + } +Index: linux-269-5502/mm/Kconfig +=================================================================== +--- /dev/null ++++ linux-269-5502/mm/Kconfig +@@ -0,0 +1,15 @@ ++# ++# VM subsystem specific config ++# + -diff -urN clean/ipc/shm.c linux-2.6.9/ipc/shm.c ---- clean/ipc/shm.c 2005-05-13 13:39:10.000000000 -0400 -+++ linux-2.6.9/ipc/shm.c 2005-10-10 17:47:17.000000000 -0400 ++# Support for IO processors which have advanced RDMA capabilities ++# ++config IOPROC ++ bool "Enable IOPROC VM hooks" ++ depends on MMU ++ default y ++ help ++ This option enables hooks in the VM subsystem so that IO devices which ++ incorporate advanced RDMA capabilities can be kept in sync with CPU ++ page table changes. ++ See Documentation/vm/ioproc.txt for more details. +Index: linux-269-5502/mm/Makefile +=================================================================== +--- linux-269-5502.orig/mm/Makefile ++++ linux-269-5502/mm/Makefile +@@ -16,6 +16,7 @@ obj-$(CONFIG_SWAP) += page_io.o swap_sta + obj-$(CONFIG_X86_4G) += usercopy.o + obj-$(CONFIG_HUGETLBFS) += hugetlb.o + obj-$(CONFIG_NUMA) += mempolicy.o ++obj-$(CONFIG_IOPROC) += ioproc.o + obj-$(CONFIG_SHMEM) += shmem.o + obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o + +Index: linux-269-5502/mm/mprotect.c +=================================================================== +--- linux-269-5502.orig/mm/mprotect.c ++++ linux-269-5502/mm/mprotect.c +@@ -10,6 +10,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -100,6 +101,7 @@ change_protection(struct vm_area_struct + if (start >= end) + BUG(); + spin_lock(¤t->mm->page_table_lock); ++ ioproc_change_protection(vma, start, end, newprot); + do { + change_pmd_range(dir, start, end - start, newprot); + start = (start + PGDIR_SIZE) & PGDIR_MASK; +Index: linux-269-5502/mm/msync.c +=================================================================== +--- linux-269-5502.orig/mm/msync.c ++++ linux-269-5502/mm/msync.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -115,6 +116,7 @@ static int filemap_sync(struct vm_area_s + + if (address >= end) + BUG(); ++ ioproc_sync_range(vma, address, end); + do { + error |= filemap_sync_pmd_range(dir, address, end, vma, flags); + address = (address + PGDIR_SIZE) & PGDIR_MASK; +Index: linux-269-5502/mm/mremap.c +=================================================================== +--- linux-269-5502.orig/mm/mremap.c ++++ linux-269-5502/mm/mremap.c +@@ -9,6 +9,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -148,6 +149,8 @@ static unsigned long move_page_tables(st + { + unsigned long offset; + ++ ioproc_invalidate_range(vma, old_addr, old_addr + len); ++ ioproc_invalidate_range(vma, new_addr, new_addr + len); + flush_cache_range(vma, old_addr, old_addr + len); + + /* +Index: linux-269-5502/mm/fremap.c +=================================================================== +--- linux-269-5502.orig/mm/fremap.c ++++ linux-269-5502/mm/fremap.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -29,6 +30,7 @@ static inline void zap_pte(struct mm_str + if (pte_present(pte)) { + unsigned long pfn = pte_pfn(pte); + ++ ioproc_invalidate_page(vma, addr); + flush_cache_page(vma, addr); + pte = ptep_clear_flush(vma, addr, ptep); + if (pfn_valid(pfn)) { +@@ -93,6 +95,7 @@ int install_page(struct mm_struct *mm, s + pte_val = *pte; + pte_unmap(pte); + update_mmu_cache(vma, addr, pte_val); ++ ioproc_update_page(vma, addr); + + err = 0; + err_unlock: +@@ -132,6 +135,7 @@ int install_file_pte(struct mm_struct *m + pte_val = *pte; + pte_unmap(pte); + update_mmu_cache(vma, addr, pte_val); ++ ioproc_update_page(vma, addr); + spin_unlock(&mm->page_table_lock); + return 0; + +Index: linux-269-5502/mm/rmap.c +=================================================================== +--- linux-269-5502.orig/mm/rmap.c ++++ linux-269-5502/mm/rmap.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -566,6 +567,7 @@ static int try_to_unmap_one(struct page + } + + /* Nuke the page table entry. */ ++ ioproc_invalidate_page(vma, address); + flush_cache_page(vma, address); + pteval = ptep_clear_flush(vma, address, pte); + +@@ -673,6 +675,7 @@ static void try_to_unmap_cluster(unsigne + continue; + + /* Nuke the page table entry. */ ++ ioproc_invalidate_page(vma, address); + flush_cache_page(vma, address); + pteval = ptep_clear_flush(vma, address, pte); + +Index: linux-269-5502/mm/memory.c +=================================================================== +--- linux-269-5502.orig/mm/memory.c ++++ linux-269-5502/mm/memory.c +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -630,6 +631,7 @@ void zap_page_range(struct vm_area_struc + + lru_add_drain(); + spin_lock(&mm->page_table_lock); ++ ioproc_invalidate_range(vma, address, end); + tlb = tlb_gather_mmu(mm, 0); + unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); + tlb_finish_mmu(tlb, address, end); +@@ -998,6 +1000,7 @@ int zeromap_page_range(struct vm_area_st + BUG(); + + spin_lock(&mm->page_table_lock); ++ ioproc_invalidate_range(vma, beg, end); + do { + pmd_t *pmd = pmd_alloc(mm, dir, address); + error = -ENOMEM; +@@ -1012,6 +1015,7 @@ int zeromap_page_range(struct vm_area_st + /* + * Why flush? zeromap_pte_range has a BUG_ON for !pte_none() + */ ++ ioproc_update_range(vma, beg, end); + flush_tlb_range(vma, beg, end); + spin_unlock(&mm->page_table_lock); + return error; +@@ -1092,6 +1096,7 @@ int remap_page_range(struct vm_area_stru + vma->vm_flags |= VM_IO | VM_RESERVED; + + spin_lock(&mm->page_table_lock); ++ ioproc_invalidate_range(vma, beg, end); + do { + pmd_t *pmd = pmd_alloc(mm, dir, from); + error = -ENOMEM; +@@ -1106,6 +1111,7 @@ int remap_page_range(struct vm_area_stru + /* + * Why flush? remap_pte_range has a BUG_ON for !pte_none() + */ ++ ioproc_update_range(vma, beg, end); + flush_tlb_range(vma, beg, end); + spin_unlock(&mm->page_table_lock); + return error; +@@ -1194,6 +1200,7 @@ static int do_wp_page(struct mm_struct * + update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); + pte_unmap(page_table); ++ ioproc_update_page(vma, address); + spin_unlock(&mm->page_table_lock); + return VM_FAULT_MINOR; + } +@@ -1226,6 +1233,7 @@ static int do_wp_page(struct mm_struct * + ++mm->rss; + else + page_remove_rmap(old_page); ++ ioproc_invalidate_page(vma, address); + break_cow(vma, new_page, address, page_table); + lru_cache_add_active(new_page); + page_add_anon_rmap(new_page, vma, address); +@@ -1234,6 +1242,7 @@ static int do_wp_page(struct mm_struct * + new_page = old_page; + } + pte_unmap(page_table); ++ ioproc_update_page(vma, address); + page_cache_release(new_page); + page_cache_release(old_page); + spin_unlock(&mm->page_table_lock); +@@ -1630,6 +1639,7 @@ static int do_swap_page(struct mm_struct + update_mmu_cache(vma, address, pte); + lazy_mmu_prot_update(pte); + pte_unmap(page_table); ++ ioproc_update_page(vma, address); + spin_unlock(&mm->page_table_lock); + out: + return ret; +@@ -1695,6 +1705,7 @@ do_anonymous_page(struct mm_struct *mm, + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, entry); + lazy_mmu_prot_update(entry); ++ ioproc_update_page(vma, addr); + spin_unlock(&mm->page_table_lock); + out: + return VM_FAULT_MINOR; +@@ -1813,6 +1824,7 @@ retry: + /* no need to invalidate: a not-present page shouldn't be cached */ + update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); ++ ioproc_update_page(vma, address); + spin_unlock(&mm->page_table_lock); + out: + return ret; +@@ -1998,6 +2010,7 @@ int make_pages_present(unsigned long add + return ret; + return ret == len ? 0 : -1; + } ++EXPORT_SYMBOL(make_pages_present); + + /* + * Map a vmalloc()-space virtual address to the physical page. +Index: linux-269-5502/mm/mmap.c +=================================================================== +--- linux-269-5502.orig/mm/mmap.c ++++ linux-269-5502/mm/mmap.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1703,6 +1704,7 @@ static void unmap_region(struct mm_struc + unsigned long nr_accounted = 0; + + lru_add_drain(); ++ ioproc_invalidate_range(vma, start, end); + tlb = tlb_gather_mmu(mm, 0); + unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL); + vm_unacct_memory(nr_accounted); +@@ -1995,6 +1997,7 @@ void exit_mmap(struct mm_struct *mm) + + spin_lock(&mm->page_table_lock); + ++ ioproc_release(mm); + tlb = tlb_gather_mmu(mm, 1); + flush_cache_mm(mm); + /* Use ~0UL here to ensure all VMAs in the mm are unmapped */ +Index: linux-269-5502/ipc/shm.c +=================================================================== +--- linux-269-5502.orig/ipc/shm.c ++++ linux-269-5502/ipc/shm.c @@ -26,6 +26,7 @@ #include #include @@ -96985,7 +1253,7 @@ diff -urN clean/ipc/shm.c linux-2.6.9/ipc/shm.c #include #include -@@ -850,6 +851,44 @@ +@@ -856,6 +857,44 @@ asmlinkage long sys_shmdt(char __user *s return retval; } @@ -97030,95 +1298,33 @@ diff -urN clean/ipc/shm.c linux-2.6.9/ipc/shm.c #ifdef CONFIG_PROC_FS static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { -diff -urN clean/kernel/exit.c linux-2.6.9/kernel/exit.c ---- clean/kernel/exit.c 2005-10-10 17:43:57.000000000 -0400 -+++ linux-2.6.9/kernel/exit.c 2005-10-10 17:47:17.000000000 -0400 -@@ -30,6 +30,8 @@ - #include - #include - -+#include -+ - extern void sem_exit (void); - extern struct task_struct *child_reaper; - -@@ -822,6 +824,8 @@ - #endif - current->tux_exit(); - } -+ /* Notify any ptrack callbacks of the process exit */ -+ ptrack_call_callbacks (PTRACK_PHASE_EXIT, NULL); - __exit_mm(tsk); +Index: linux-269-5502/include/linux/init_task.h +=================================================================== +--- linux-269-5502.orig/include/linux/init_task.h ++++ linux-269-5502/include/linux/init_task.h +@@ -2,6 +2,7 @@ + #define _LINUX__INIT_TASK_H - exit_sem(tsk); -diff -urN clean/kernel/fork.c linux-2.6.9/kernel/fork.c ---- clean/kernel/fork.c 2005-05-13 13:39:08.000000000 -0400 -+++ linux-2.6.9/kernel/fork.c 2005-10-10 17:47:17.000000000 -0400 -@@ -14,6 +14,7 @@ - #include - #include - #include + #include +#include - #include - #include - #include -@@ -430,6 +431,9 @@ - mm->page_table_lock = SPIN_LOCK_UNLOCKED; - mm->ioctx_list_lock = RW_LOCK_UNLOCKED; - mm->ioctx_list = NULL; -+#ifdef CONFIG_IOPROC -+ mm->ioproc_ops = NULL; -+#endif - mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); - mm->free_area_cache = TASK_UNMAPPED_BASE; -@@ -1264,6 +1268,11 @@ - set_tsk_thread_flag(p, TIF_SIGPENDING); - } + #define INIT_FILES \ + { \ +@@ -112,6 +113,7 @@ extern struct group_info init_groups; + .proc_lock = SPIN_LOCK_UNLOCKED, \ + .switch_lock = SPIN_LOCK_UNLOCKED, \ + .journal_info = NULL, \ ++ INIT_TASK_PTRACK(tsk) \ + } -+ if (ptrack_call_callbacks(PTRACK_PHASE_CLONE, p)) { -+ sigaddset(&p->pending.signal, SIGKILL); -+ set_tsk_thread_flag(p, TIF_SIGPENDING); -+ } -+ - if (!(clone_flags & CLONE_STOPPED)) - wake_up_new_task(p, clone_flags); - else -diff -urN clean/kernel/Kconfig linux-2.6.9/kernel/Kconfig ---- clean/kernel/Kconfig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/kernel/Kconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -0,0 +1,14 @@ -+# -+# Kernel subsystem specific config -+# -+ -+# Support for Process Tracking callbacks -+# -+config PTRACK -+ bool "Enable PTRACK process tracking hooks" -+ default y -+ help -+ This option enables hooks to be called when processes are -+ created and destoryed in order for a resource management -+ system to know which processes are a member of a "job" and -+ to be able to clean up when the job is terminated. -diff -urN clean/kernel/Makefile linux-2.6.9/kernel/Makefile ---- clean/kernel/Makefile 2005-05-13 13:39:07.000000000 -0400 -+++ linux-2.6.9/kernel/Makefile 2005-10-10 17:47:17.000000000 -0400 -@@ -26,6 +26,7 @@ - obj-$(CONFIG_AUDIT) += audit.o - obj-$(CONFIG_AUDITSYSCALL) += auditsc.o - obj-$(CONFIG_KPROBES) += kprobes.o -+obj-$(CONFIG_PTRACK) += ptrack.o - ifneq ($(CONFIG_IA64),y) - # According to Alan Modra , the -fno-omit-frame-pointer is -diff -urN clean/kernel/ptrack.c linux-2.6.9/kernel/ptrack.c ---- clean/kernel/ptrack.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/kernel/ptrack.c 2005-10-10 17:47:17.000000000 -0400 -@@ -0,0 +1,145 @@ +Index: linux-269-5502/include/linux/ioproc.h +=================================================================== +--- /dev/null ++++ linux-269-5502/include/linux/ioproc.h +@@ -0,0 +1,270 @@ +/* -+ * Copyright (C) 2000 Regents of the University of California ++ * Copyright (C) 2006 Quadrics Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by @@ -97133,219 +1339,304 @@ diff -urN clean/kernel/ptrack.c linux-2.6.9/kernel/ptrack.c + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * Derived from exit_actn.c by -+ * Copyright (C) 2003 Quadrics Ltd. + */ + ++/* ++ * Callbacks for IO processor page table updates. ++ */ ++ ++#ifndef __LINUX_IOPROC_H__ ++#define __LINUX_IOPROC_H__ + -+#include -+#include +#include -+#include -+#include -+#include ++#include + -+#include ++typedef struct ioproc_ops { ++ struct ioproc_ops *next; ++ void *arg; + -+int -+ptrack_register (ptrack_callback_t callback, void *arg) ++ void (*release) (void *arg, struct mm_struct * mm); ++ void (*sync_range) (void *arg, struct vm_area_struct * vma, ++ unsigned long start, unsigned long end); ++ void (*invalidate_range) (void *arg, struct vm_area_struct * vma, ++ unsigned long start, unsigned long end); ++ void (*update_range) (void *arg, struct vm_area_struct * vma, ++ unsigned long start, unsigned long end); ++ ++ void (*change_protection) (void *arg, struct vm_area_struct * vma, ++ unsigned long start, unsigned long end, ++ pgprot_t newprot); ++ ++ void (*sync_page) (void *arg, struct vm_area_struct * vma, ++ unsigned long address); ++ void (*invalidate_page) (void *arg, struct vm_area_struct * vma, ++ unsigned long address); ++ void (*update_page) (void *arg, struct vm_area_struct * vma, ++ unsigned long address); ++ ++} ioproc_ops_t; ++ ++/* IOPROC Registration ++ * ++ * Called by the IOPROC device driver to register its interest in page table ++ * changes for the process associated with the supplied mm_struct ++ * ++ * The caller should first allocate and fill out an ioproc_ops structure with ++ * the function pointers initialised to the device driver specific code for ++ * each callback. If the device driver doesn't have code for a particular ++ * callback then it should set the function pointer to be NULL. ++ * The ioproc_ops arg parameter will be passed unchanged as the first argument ++ * to each callback function invocation. ++ * ++ * The ioproc registration is not inherited across fork() and should be called ++ * once for each process that the IOPROC device driver is interested in. ++ * ++ * Must be called holding the mm->page_table_lock ++ */ ++extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip); ++ ++/* IOPROC De-registration ++ * ++ * Called by the IOPROC device driver when it is no longer interested in page ++ * table changes for the process associated with the supplied mm_struct ++ * ++ * Normally this is not needed to be called as the ioproc_release() code will ++ * automatically unlink the ioproc_ops struct from the mm_struct as the ++ * process exits ++ * ++ * Must be called holding the mm->page_table_lock ++ */ ++extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip); ++ ++#ifdef CONFIG_IOPROC ++ ++/* IOPROC Release ++ * ++ * Called during exit_mmap() as all vmas are torn down and unmapped. ++ * ++ * Also unlinks the ioproc_ops structure from the mm list as it goes. ++ * ++ * No need for locks as the mm can no longer be accessed at this point ++ * ++ */ ++static inline void ioproc_release(struct mm_struct *mm) +{ -+ struct ptrack_desc *desc = kmalloc (sizeof (struct ptrack_desc), GFP_KERNEL); -+ -+ if (desc == NULL) -+ return -ENOMEM; ++ struct ioproc_ops *cp; ++ ++ while ((cp = mm->ioproc_ops) != NULL) { ++ mm->ioproc_ops = cp->next; ++ ++ if (cp->release) ++ cp->release(cp->arg, mm); ++ } ++} ++ ++/* IOPROC SYNC RANGE ++ * ++ * Called when a memory map is synchronised with its disk image i.e. when the ++ * msync() syscall is invoked. Any future read or write to the associated ++ * pages by the IOPROC should cause the page to be marked as referenced or ++ * modified. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_sync_range(struct vm_area_struct *vma, unsigned long start, ++ unsigned long end) ++{ ++ struct ioproc_ops *cp; ++ ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->sync_range) ++ cp->sync_range(cp->arg, vma, start, end); ++} ++ ++/* IOPROC INVALIDATE RANGE ++ * ++ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the ++ * user or paged out by the kernel. ++ * ++ * After this call the IOPROC must not access the physical memory again unless ++ * a new translation is loaded. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_invalidate_range(struct vm_area_struct *vma, unsigned long start, ++ unsigned long end) ++{ ++ struct ioproc_ops *cp; ++ ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->invalidate_range) ++ cp->invalidate_range(cp->arg, vma, start, end); ++} ++ ++/* IOPROC UPDATE RANGE ++ * ++ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk ++ * up, when breaking COW or faulting in an anonymous page of memory. ++ * ++ * These give the IOPROC device driver the opportunity to load translations ++ * speculatively, which can improve performance by avoiding device translation ++ * faults. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_update_range(struct vm_area_struct *vma, unsigned long start, ++ unsigned long end) ++{ ++ struct ioproc_ops *cp; + -+ desc->callback = callback; -+ desc->arg = arg; -+ -+ list_add_tail (&desc->link, ¤t->ptrack_list); -+ -+ return 0; ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->update_range) ++ cp->update_range(cp->arg, vma, start, end); +} + -+void -+ptrack_deregister (ptrack_callback_t callback, void *arg) -+{ -+ struct list_head *el, *nel; -+ -+ list_for_each_safe (el, nel, ¤t->ptrack_list) { -+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); -+ -+ if (desc->callback == callback && desc->arg == arg) { -+ list_del (&desc->link); -+ kfree (desc); -+ } -+ } ++/* IOPROC CHANGE PROTECTION ++ * ++ * Called when the protection on a region of memory is changed i.e. when the ++ * mprotect() syscall is invoked. ++ * ++ * The IOPROC must not be able to write to a read-only page, so if the ++ * permissions are downgraded then it must honour them. If they are upgraded ++ * it can treat this in the same way as the ioproc_update_[range|sync]() calls ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_change_protection(struct vm_area_struct *vma, unsigned long start, ++ unsigned long end, pgprot_t newprot) ++{ ++ struct ioproc_ops *cp; ++ ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->change_protection) ++ cp->change_protection(cp->arg, vma, start, end, ++ newprot); +} + -+int -+ptrack_registered (ptrack_callback_t callback, void *arg) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, ¤t->ptrack_list) { -+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); -+ -+ if (desc->callback == callback && desc->arg == arg) -+ return 1; -+ } -+ return 0; -+} -+ -+int -+ptrack_call_callbacks (int phase, struct task_struct *child) ++/* IOPROC SYNC PAGE ++ * ++ * Called when a memory map is synchronised with its disk image i.e. when the ++ * msync() syscall is invoked. Any future read or write to the associated page ++ * by the IOPROC should cause the page to be marked as referenced or modified. ++ * ++ * Not currently called as msync() calls ioproc_sync_range() instead ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_sync_page(struct vm_area_struct *vma, unsigned long addr) +{ -+ struct list_head *el, *nel; -+ struct ptrack_desc *new; -+ int res; ++ struct ioproc_ops *cp; + -+ if (phase == PTRACK_PHASE_CLONE) -+ INIT_LIST_HEAD (&child->ptrack_list); ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->sync_page) ++ cp->sync_page(cp->arg, vma, addr); ++} + -+ list_for_each_safe (el, nel, ¤t->ptrack_list) { -+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link); -+ -+ res = desc->callback (desc->arg, phase, child); -+ -+ switch (phase) -+ { -+ case PTRACK_PHASE_EXIT: -+ list_del (&desc->link); -+ kfree (desc); -+ break; -+ -+ case PTRACK_PHASE_CLONE: -+ switch (res) -+ { -+ case PTRACK_FINISHED: -+ break; ++/* IOPROC INVALIDATE PAGE ++ * ++ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the ++ * user or paged out by the kernel. ++ * ++ * After this call the IOPROC must not access the physical memory again unless ++ * a new translation is loaded. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_invalidate_page(struct vm_area_struct *vma, unsigned long addr) ++{ ++ struct ioproc_ops *cp; + -+ case PTRACK_INNHERIT: -+ if ((new = kmalloc (sizeof (struct ptrack_desc), GFP_ATOMIC)) == NULL) -+ { -+ /* allocation failed - notify that this process is not going -+ * to be started by signalling clone failure. -+ */ -+ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child); -+ -+ goto failed; -+ } ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->invalidate_page) ++ cp->invalidate_page(cp->arg, vma, addr); ++} + -+ new->callback = desc->callback; -+ new->arg = desc->arg; -+ -+ list_add_tail (&new->link, &child->ptrack_list); -+ break; ++/* IOPROC UPDATE PAGE ++ * ++ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk ++ * up, when breaking COW or faulting in an anoymous page of memory. ++ * ++ * These give the IOPROC device the opportunity to load translations ++ * speculatively, which can improve performance by avoiding device translation ++ * faults. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_update_page(struct vm_area_struct *vma, unsigned long addr) ++{ ++ struct ioproc_ops *cp; + -+ case PTRACK_DENIED: -+ goto failed; -+ } -+ break; -+ } -+ } ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->update_page) ++ cp->update_page(cp->arg, vma, addr); ++} + -+ return 0; ++#else + -+ failed: -+ while (! list_empty (&child->ptrack_list)) -+ { -+ struct ptrack_desc *desc = list_entry (child->ptrack_list.next, struct ptrack_desc, link); -+ -+ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child); ++/* ! CONFIG_IOPROC so make all hooks empty */ + -+ list_del (&desc->link); -+ kfree (desc); -+ } -+ return 1; -+} -+EXPORT_SYMBOL(ptrack_register); -+EXPORT_SYMBOL(ptrack_deregister); -+EXPORT_SYMBOL(ptrack_registered); -diff -urN clean/kernel/signal.c linux-2.6.9/kernel/signal.c ---- clean/kernel/signal.c 2005-05-13 13:39:11.000000000 -0400 -+++ linux-2.6.9/kernel/signal.c 2005-10-10 17:47:17.000000000 -0400 -@@ -2266,6 +2266,7 @@ - - return kill_something_info(sig, &info, pid); - } -+EXPORT_SYMBOL_GPL(sys_kill); - - /** - * sys_tgkill - send signal to one specific thread -diff -urN clean/Makefile linux-2.6.9/Makefile ---- clean/Makefile 2005-05-13 13:39:19.000000000 -0400 -+++ linux-2.6.9/Makefile 2005-10-10 17:47:31.000000000 -0400 -@@ -1,7 +1,7 @@ - VERSION = 2 - PATCHLEVEL = 6 - SUBLEVEL = 9 --EXTRAVERSION = -prep -+EXTRAVERSION = -prep.qp2.2.5.11.3qsnet - NAME=AC 1 - - # *DOCUMENTATION* -diff -urN clean/mm/fremap.c linux-2.6.9/mm/fremap.c ---- clean/mm/fremap.c 2004-10-18 17:53:06.000000000 -0400 -+++ linux-2.6.9/mm/fremap.c 2005-10-10 17:47:17.000000000 -0400 -@@ -12,6 +12,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -29,6 +30,7 @@ - if (pte_present(pte)) { - unsigned long pfn = pte_pfn(pte); ++#define ioproc_release(mm) do { } while (0) ++#define ioproc_sync_range(vma, start, end) do { } while (0) ++#define ioproc_invalidate_range(vma, start, end) do { } while (0) ++#define ioproc_update_range(vma, start, end) do { } while (0) ++#define ioproc_change_protection(vma, start, end, prot) do { } while (0) ++#define ioproc_sync_page(vma, addr) do { } while (0) ++#define ioproc_invalidate_page(vma, addr) do { } while (0) ++#define ioproc_update_page(vma, addr) do { } while (0) ++ ++#endif /* CONFIG_IOPROC */ ++#endif /* __LINUX_IOPROC_H__ */ +Index: linux-269-5502/include/linux/sched.h +=================================================================== +--- linux-269-5502.orig/include/linux/sched.h ++++ linux-269-5502/include/linux/sched.h +@@ -185,6 +185,9 @@ extern signed long schedule_timeout_unin + asmlinkage void schedule(void); -+ ioproc_invalidate_page(vma, addr); - flush_cache_page(vma, addr); - pte = ptep_clear_flush(vma, addr, ptep); - if (pfn_valid(pfn)) { -@@ -93,6 +95,7 @@ - pte_val = *pte; - pte_unmap(pte); - update_mmu_cache(vma, addr, pte_val); -+ ioproc_update_page(vma, addr); + struct namespace; ++#ifdef CONFIG_IOPROC ++struct ioproc_ops; ++#endif - err = 0; - err_unlock: -@@ -132,6 +135,7 @@ - pte_val = *pte; - pte_unmap(pte); - update_mmu_cache(vma, addr, pte_val); -+ ioproc_update_page(vma, addr); - spin_unlock(&mm->page_table_lock); - return 0; + /* Maximum number of active map areas.. This is a random (large) number */ + #define DEFAULT_MAX_MAP_COUNT 65536 +@@ -260,6 +263,11 @@ struct mm_struct { + struct kioctx *ioctx_list; -diff -urN clean/mm/hugetlb.c linux-2.6.9/mm/hugetlb.c ---- clean/mm/hugetlb.c 2004-10-18 17:54:37.000000000 -0400 -+++ linux-2.6.9/mm/hugetlb.c 2005-10-10 17:47:17.000000000 -0400 -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include + struct kioctx default_kioctx; ++ ++#ifdef CONFIG_IOPROC ++ /* hooks for io devices with advanced RDMA capabilities */ ++ struct ioproc_ops *ioproc_ops; ++#endif + }; - const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; - static unsigned long nr_huge_pages, free_huge_pages; -@@ -254,6 +255,7 @@ - struct mm_struct *mm = vma->vm_mm; + extern int mmlist_nr; +@@ -635,6 +643,10 @@ struct task_struct { + struct mempolicy *mempolicy; + short il_next; /* could be shared with used_math */ + #endif ++#ifdef CONFIG_PTRACK ++/* process tracking callback */ ++ struct list_head ptrack_list; ++#endif + }; - spin_lock(&mm->page_table_lock); -+ ioproc_invalidate_range(vma, start, start + length); - unmap_hugepage_range(vma, start, start + length); - spin_unlock(&mm->page_table_lock); - } -diff -urN clean/mm/ioproc.c linux-2.6.9/mm/ioproc.c ---- clean/mm/ioproc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/mm/ioproc.c 2005-10-10 17:47:17.000000000 -0400 -@@ -0,0 +1,58 @@ -+/* -*- linux-c -*- -+ * -+ * Copyright (C) 2002-2004 Quadrics Ltd. + static inline pid_t process_group(struct task_struct *tsk) +Index: linux-269-5502/include/linux/ptrack.h +=================================================================== +--- /dev/null ++++ linux-269-5502/include/linux/ptrack.h +@@ -0,0 +1,65 @@ ++/* ++ * Copyright (C) 2000 Regents of the University of California + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by @@ -97361,292 +1652,90 @@ diff -urN clean/mm/ioproc.c linux-2.6.9/mm/ioproc.c + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ++ * Derived from exit_actn.c by ++ * Copyright (C) 2003 Quadrics Ltd. + * + */ ++#ifndef __LINUX_PTRACK_H ++#define __LINUX_PTRACK_H + -+/* -+ * Registration for IO processor page table updates. ++/* ++ * Process tracking - this allows a module to keep track of processes ++ * in order that it can manage all tasks derived from a single process. + */ + -+#include -+#include ++#define PTRACK_PHASE_CLONE 1 ++#define PTRACK_PHASE_CLONE_FAIL 2 ++#define PTRACK_PHASE_EXEC 3 ++#define PTRACK_PHASE_EXIT 4 + -+#include -+#include ++#define PTRACK_FINISHED 0 ++#define PTRACK_INNHERIT 1 ++#define PTRACK_DENIED 2 + -+int -+ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip) -+{ -+ ip->next = mm->ioproc_ops; -+ mm->ioproc_ops = ip; ++#ifdef CONFIG_PTRACK + -+ return 0; -+} ++typedef int (*ptrack_callback_t)(void *arg, int phase, struct task_struct *child); + -+EXPORT_SYMBOL_GPL(ioproc_register_ops); ++struct ptrack_desc { ++ struct list_head link; ++ ptrack_callback_t callback; ++ void *arg; ++}; + -+int -+ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip) -+{ -+ struct ioproc_ops **tmp; ++extern int ptrack_register (ptrack_callback_t callback, void *arg); ++extern void ptrack_deregister (ptrack_callback_t callback, void *arg); ++extern int ptrack_registered (ptrack_callback_t callback, void *arg); + -+ for (tmp = &mm->ioproc_ops; *tmp && *tmp != ip; tmp= &(*tmp)->next) -+ ; -+ if (*tmp) { -+ *tmp = ip->next; -+ return 0; -+ } ++extern int ptrack_call_callbacks (int phase, struct task_struct *child); + -+ return -EINVAL; -+} ++#define INIT_TASK_PTRACK(tsk) \ ++ .ptrack_list = LIST_HEAD_INIT(tsk.ptrack_list) + -+EXPORT_SYMBOL_GPL(ioproc_unregister_ops); -diff -urN clean/mm/Kconfig linux-2.6.9/mm/Kconfig ---- clean/mm/Kconfig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.9/mm/Kconfig 2005-10-10 17:47:17.000000000 -0400 -@@ -0,0 +1,15 @@ -+# -+# VM subsystem specific config -+# ++#else ++#define ptrack_call_callbacks (phase, child) (0) + -+# Support for IO processors which have advanced RDMA capabilities -+# -+config IOPROC -+ bool "Enable IOPROC VM hooks" -+ depends on MMU -+ default y -+ help -+ This option enables hooks in the VM subsystem so that IO devices which -+ incorporate advanced RDMA capabilities can be kept in sync with CPU -+ page table changes. -+ See Documentation/vm/ioproc.txt for more details. -diff -urN clean/mm/Makefile linux-2.6.9/mm/Makefile ---- clean/mm/Makefile 2005-05-13 13:39:02.000000000 -0400 -+++ linux-2.6.9/mm/Makefile 2005-10-10 17:47:17.000000000 -0400 -@@ -16,6 +16,7 @@ - obj-$(CONFIG_X86_4G) += usercopy.o - obj-$(CONFIG_HUGETLBFS) += hugetlb.o - obj-$(CONFIG_NUMA) += mempolicy.o -+obj-$(CONFIG_IOPROC) += ioproc.o - obj-$(CONFIG_SHMEM) += shmem.o - obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o - -diff -urN clean/mm/memory.c linux-2.6.9/mm/memory.c ---- clean/mm/memory.c 2005-05-13 13:39:08.000000000 -0400 -+++ linux-2.6.9/mm/memory.c 2005-10-10 17:47:17.000000000 -0400 -@@ -43,6 +43,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -619,6 +620,7 @@ - - lru_add_drain(); - spin_lock(&mm->page_table_lock); -+ ioproc_invalidate_range(vma, address, end); - tlb = tlb_gather_mmu(mm, 0); - unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); - tlb_finish_mmu(tlb, address, end); -@@ -968,6 +970,7 @@ - BUG(); - - spin_lock(&mm->page_table_lock); -+ ioproc_invalidate_range(vma, beg, end); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); - error = -ENOMEM; -@@ -982,6 +985,7 @@ - /* - * Why flush? zeromap_pte_range has a BUG_ON for !pte_none() - */ -+ ioproc_update_range(vma, beg, end); - flush_tlb_range(vma, beg, end); - spin_unlock(&mm->page_table_lock); - return error; -@@ -1062,6 +1066,7 @@ - vma->vm_flags |= VM_IO | VM_RESERVED; - - spin_lock(&mm->page_table_lock); -+ ioproc_invalidate_range(vma, beg, end); - do { - pmd_t *pmd = pmd_alloc(mm, dir, from); - error = -ENOMEM; -@@ -1076,6 +1081,7 @@ - /* - * Why flush? remap_pte_range has a BUG_ON for !pte_none() - */ -+ ioproc_update_range(vma, beg, end); - flush_tlb_range(vma, beg, end); - spin_unlock(&mm->page_table_lock); - return error; -@@ -1162,6 +1168,7 @@ - ptep_set_access_flags(vma, address, page_table, entry, 1); - update_mmu_cache(vma, address, entry); - pte_unmap(page_table); -+ ioproc_update_page(vma, address); - spin_unlock(&mm->page_table_lock); - return VM_FAULT_MINOR; - } -@@ -1194,6 +1201,7 @@ - ++mm->rss; - else - page_remove_rmap(old_page); -+ ioproc_invalidate_page(vma, address); - break_cow(vma, new_page, address, page_table); - lru_cache_add_active(new_page); - page_add_anon_rmap(new_page, vma, address); -@@ -1202,6 +1210,7 @@ - new_page = old_page; - } - pte_unmap(page_table); -+ ioproc_update_page(vma, address); - page_cache_release(new_page); - page_cache_release(old_page); - spin_unlock(&mm->page_table_lock); -@@ -1495,6 +1504,7 @@ - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, address, pte); - pte_unmap(page_table); -+ ioproc_update_page(vma, address); - spin_unlock(&mm->page_table_lock); - out: - return ret; -@@ -1555,6 +1565,7 @@ - - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, entry); -+ ioproc_update_page(vma, addr); - lazy_mmu_prot_update(entry); - spin_unlock(&mm->page_table_lock); - out: -@@ -1673,6 +1684,7 @@ - - /* no need to invalidate: a not-present page shouldn't be cached */ - update_mmu_cache(vma, address, entry); -+ ioproc_update_page(vma, address); - lazy_mmu_prot_update(entry); - spin_unlock(&mm->page_table_lock); - out: -@@ -1853,6 +1865,7 @@ - return ret; - return ret == len ? 0 : -1; - } -+EXPORT_SYMBOL(make_pages_present); - - /* - * Map a vmalloc()-space virtual address to the physical page. -diff -urN clean/mm/mmap.c linux-2.6.9/mm/mmap.c ---- clean/mm/mmap.c 2005-05-13 13:39:10.000000000 -0400 -+++ linux-2.6.9/mm/mmap.c 2005-10-10 17:47:17.000000000 -0400 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1680,6 +1681,7 @@ - unsigned long nr_accounted = 0; - - lru_add_drain(); -+ ioproc_invalidate_range(vma, start, end); - tlb = tlb_gather_mmu(mm, 0); - unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL); - vm_unacct_memory(nr_accounted); -@@ -1965,6 +1967,7 @@ - - spin_lock(&mm->page_table_lock); - -+ ioproc_release(mm); - tlb = tlb_gather_mmu(mm, 1); - flush_cache_mm(mm); - /* Use ~0UL here to ensure all VMAs in the mm are unmapped */ -diff -urN clean/mm/mprotect.c linux-2.6.9/mm/mprotect.c ---- clean/mm/mprotect.c 2005-05-13 13:39:02.000000000 -0400 -+++ linux-2.6.9/mm/mprotect.c 2005-10-10 17:47:17.000000000 -0400 -@@ -10,6 +10,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -99,6 +100,7 @@ - if (start >= end) - BUG(); - spin_lock(¤t->mm->page_table_lock); -+ ioproc_change_protection(vma, start, end, newprot); - do { - change_pmd_range(dir, start, end - start, newprot); - start = (start + PGDIR_SIZE) & PGDIR_MASK; -diff -urN clean/mm/mremap.c linux-2.6.9/mm/mremap.c ---- clean/mm/mremap.c 2005-05-13 13:39:02.000000000 -0400 -+++ linux-2.6.9/mm/mremap.c 2005-10-10 17:47:17.000000000 -0400 -@@ -9,6 +9,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -148,6 +149,8 @@ - { - unsigned long offset; - -+ ioproc_invalidate_range(vma, old_addr, old_addr + len); -+ ioproc_invalidate_range(vma, new_addr, new_addr + len); - flush_cache_range(vma, old_addr, old_addr + len); - - /* -diff -urN clean/mm/msync.c linux-2.6.9/mm/msync.c ---- clean/mm/msync.c 2004-10-18 17:53:51.000000000 -0400 -+++ linux-2.6.9/mm/msync.c 2005-10-10 17:47:17.000000000 -0400 -@@ -12,6 +12,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -115,6 +116,7 @@ - - if (address >= end) - BUG(); -+ ioproc_sync_range(vma, address, end); - do { - error |= filemap_sync_pmd_range(dir, address, end, vma, flags); - address = (address + PGDIR_SIZE) & PGDIR_MASK; -diff -urN clean/mm/rmap.c linux-2.6.9/mm/rmap.c ---- clean/mm/rmap.c 2005-05-13 13:39:08.000000000 -0400 -+++ linux-2.6.9/mm/rmap.c 2005-10-10 17:47:17.000000000 -0400 -@@ -51,6 +51,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -566,6 +567,7 @@ - } - - /* Nuke the page table entry. */ -+ ioproc_invalidate_page(vma, address); - flush_cache_page(vma, address); - pteval = ptep_clear_flush(vma, address, pte); - -@@ -673,6 +675,7 @@ - continue; ++#define INIT_TASK_PTRACK(tsk) ++ ++#endif ++ ++#endif /* __LINUX_PTRACK_H */ +Index: linux-269-5502/include/asm-ia64/param.h +=================================================================== +--- linux-269-5502.orig/include/asm-ia64/param.h ++++ linux-269-5502/include/asm-ia64/param.h +@@ -27,7 +27,7 @@ + */ + # define HZ 32 + # else +-# define HZ 1024 ++# define HZ 100 + # endif + # define USER_HZ HZ + # define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ +Index: linux-269-5502/include/asm-i386/param.h +=================================================================== +--- linux-269-5502.orig/include/asm-i386/param.h ++++ linux-269-5502/include/asm-i386/param.h +@@ -2,7 +2,7 @@ + #define _ASMi386_PARAM_H - /* Nuke the page table entry. */ -+ ioproc_invalidate_page(vma, address); - flush_cache_page(vma, address); - pteval = ptep_clear_flush(vma, address, pte); + #ifdef __KERNEL__ +-# define HZ 1000 /* Internal kernel timer frequency */ ++# define HZ 100 /* Internal kernel timer frequency */ + # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ + # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ + #endif +Index: linux-269-5502/include/asm-x86_64/param.h +=================================================================== +--- linux-269-5502.orig/include/asm-x86_64/param.h ++++ linux-269-5502/include/asm-x86_64/param.h +@@ -2,7 +2,7 @@ + #define _ASMx86_64_PARAM_H + #ifdef __KERNEL__ +-# define HZ 1000 /* Internal kernel timer frequency */ ++# define HZ 100 /* Internal kernel timer frequency */ + # define USER_HZ 100 /* .. some user interfaces are in "ticks */ + #define CLOCKS_PER_SEC (USER_HZ) /* like times() */ + #endif diff --git a/lustre/kernel_patches/patches/qsnet-suse-2.6.patch b/lustre/kernel_patches/patches/qsnet-suse-2.6.patch index 14c227d..6a5576b 100644 --- a/lustre/kernel_patches/patches/qsnet-suse-2.6.patch +++ b/lustre/kernel_patches/patches/qsnet-suse-2.6.patch @@ -1,21 +1,18 @@ -Index: linux-2.6.5-7.191/arch/i386/defconfig -=================================================================== ---- linux-2.6.5-7.191.orig/arch/i386/defconfig 2005-06-28 12:24:26.000000000 -0400 -+++ linux-2.6.5-7.191/arch/i386/defconfig 2005-07-28 14:52:52.764691504 -0400 -@@ -139,6 +139,8 @@ - CONFIG_EFI=y - CONFIG_BOOT_IOREMAP=y - CONFIG_REGPARM=y +Index: LINUX-SRC-TREE/arch/i386/defconfig +=================================================================== +--- LINUX-SRC-TREE.orig/arch/i386/defconfig ++++ LINUX-SRC-TREE/arch/i386/defconfig +@@ -2932,3 +2932,5 @@ CONFIG_CFGNAME="default" + CONFIG_RELEASE="7.283" + CONFIG_X86_BIOS_REBOOT=y + CONFIG_PC=y +CONFIG_IOPROC=y +CONFIG_PTRACK=y - - # - # Special options -Index: linux-2.6.5-7.191/arch/i386/Kconfig +Index: LINUX-SRC-TREE/arch/i386/Kconfig =================================================================== ---- linux-2.6.5-7.191.orig/arch/i386/Kconfig 2005-06-28 12:24:15.000000000 -0400 -+++ linux-2.6.5-7.191/arch/i386/Kconfig 2005-07-28 14:52:52.765691352 -0400 -@@ -1030,6 +1030,9 @@ +--- LINUX-SRC-TREE.orig/arch/i386/Kconfig ++++ LINUX-SRC-TREE/arch/i386/Kconfig +@@ -1022,6 +1022,9 @@ config APM_REAL_MODE_POWER_OFF a work-around for a number of buggy BIOSes. Switch this option on if your computer crashes instead of powering off properly. @@ -25,10 +22,10 @@ Index: linux-2.6.5-7.191/arch/i386/Kconfig endmenu source "arch/i386/kernel/cpu/cpufreq/Kconfig" -Index: linux-2.6.5-7.191/arch/i386/mm/hugetlbpage.c +Index: LINUX-SRC-TREE/arch/i386/mm/hugetlbpage.c =================================================================== ---- linux-2.6.5-7.191.orig/arch/i386/mm/hugetlbpage.c 2005-06-28 12:24:09.000000000 -0400 -+++ linux-2.6.5-7.191/arch/i386/mm/hugetlbpage.c 2005-07-28 14:52:52.766691200 -0400 +--- LINUX-SRC-TREE.orig/arch/i386/mm/hugetlbpage.c ++++ LINUX-SRC-TREE/arch/i386/mm/hugetlbpage.c @@ -16,6 +16,7 @@ #include #include @@ -37,7 +34,7 @@ Index: linux-2.6.5-7.191/arch/i386/mm/hugetlbpage.c #include #include #include -@@ -393,6 +394,7 @@ +@@ -393,6 +394,7 @@ zap_hugepage_range(struct vm_area_struct { struct mm_struct *mm = vma->vm_mm; spin_lock(&mm->page_table_lock); @@ -45,11 +42,11 @@ Index: linux-2.6.5-7.191/arch/i386/mm/hugetlbpage.c unmap_hugepage_range(vma, start, start + length); spin_unlock(&mm->page_table_lock); } -Index: linux-2.6.5-7.191/arch/ia64/defconfig +Index: LINUX-SRC-TREE/arch/ia64/defconfig =================================================================== ---- linux-2.6.5-7.191.orig/arch/ia64/defconfig 2005-06-28 12:24:26.000000000 -0400 -+++ linux-2.6.5-7.191/arch/ia64/defconfig 2005-07-28 14:52:52.767691048 -0400 -@@ -103,6 +103,8 @@ +--- LINUX-SRC-TREE.orig/arch/ia64/defconfig ++++ LINUX-SRC-TREE/arch/ia64/defconfig +@@ -104,6 +104,8 @@ CONFIG_IA64_PALINFO=y CONFIG_EFI_VARS=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m @@ -58,11 +55,11 @@ Index: linux-2.6.5-7.191/arch/ia64/defconfig # # Power management and ACPI -Index: linux-2.6.5-7.191/arch/ia64/Kconfig +Index: LINUX-SRC-TREE/arch/ia64/Kconfig =================================================================== ---- linux-2.6.5-7.191.orig/arch/ia64/Kconfig 2005-06-28 12:24:20.000000000 -0400 -+++ linux-2.6.5-7.191/arch/ia64/Kconfig 2005-07-28 14:52:52.768690896 -0400 -@@ -323,6 +323,8 @@ +--- LINUX-SRC-TREE.orig/arch/ia64/Kconfig ++++ LINUX-SRC-TREE/arch/ia64/Kconfig +@@ -334,6 +334,8 @@ config EFI_VARS To use this option, you have to check that the "/proc file system support" (CONFIG_PROC_FS) is enabled, too. @@ -71,10 +68,10 @@ Index: linux-2.6.5-7.191/arch/ia64/Kconfig source "fs/Kconfig.binfmt" endmenu -Index: linux-2.6.5-7.191/arch/ia64/mm/hugetlbpage.c +Index: LINUX-SRC-TREE/arch/ia64/mm/hugetlbpage.c =================================================================== ---- linux-2.6.5-7.191.orig/arch/ia64/mm/hugetlbpage.c 2005-06-28 12:24:04.000000000 -0400 -+++ linux-2.6.5-7.191/arch/ia64/mm/hugetlbpage.c 2005-07-28 14:52:52.768690896 -0400 +--- LINUX-SRC-TREE.orig/arch/ia64/mm/hugetlbpage.c ++++ LINUX-SRC-TREE/arch/ia64/mm/hugetlbpage.c @@ -19,6 +19,7 @@ #include #include @@ -83,7 +80,7 @@ Index: linux-2.6.5-7.191/arch/ia64/mm/hugetlbpage.c #include #include #include -@@ -378,6 +379,7 @@ +@@ -378,6 +379,7 @@ void zap_hugepage_range(struct vm_area_s { struct mm_struct *mm = vma->vm_mm; spin_lock(&mm->page_table_lock); @@ -91,11 +88,11 @@ Index: linux-2.6.5-7.191/arch/ia64/mm/hugetlbpage.c unmap_hugepage_range(vma, start, start + length); spin_unlock(&mm->page_table_lock); } -Index: linux-2.6.5-7.191/arch/x86_64/defconfig +Index: LINUX-SRC-TREE/arch/x86_64/defconfig =================================================================== ---- linux-2.6.5-7.191.orig/arch/x86_64/defconfig 2005-06-28 12:24:26.000000000 -0400 -+++ linux-2.6.5-7.191/arch/x86_64/defconfig 2005-07-28 14:52:52.770690592 -0400 -@@ -91,6 +91,8 @@ +--- LINUX-SRC-TREE.orig/arch/x86_64/defconfig ++++ LINUX-SRC-TREE/arch/x86_64/defconfig +@@ -98,6 +98,8 @@ CONFIG_MTRR=y CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y @@ -104,11 +101,11 @@ Index: linux-2.6.5-7.191/arch/x86_64/defconfig # # Power management options -Index: linux-2.6.5-7.191/arch/x86_64/Kconfig +Index: LINUX-SRC-TREE/arch/x86_64/Kconfig =================================================================== ---- linux-2.6.5-7.191.orig/arch/x86_64/Kconfig 2005-06-28 12:24:23.000000000 -0400 -+++ linux-2.6.5-7.191/arch/x86_64/Kconfig 2005-07-28 14:52:52.770690592 -0400 -@@ -341,6 +341,9 @@ +--- LINUX-SRC-TREE.orig/arch/x86_64/Kconfig ++++ LINUX-SRC-TREE/arch/x86_64/Kconfig +@@ -343,6 +343,9 @@ source "drivers/acpi/Kconfig" source "arch/x86_64/kernel/cpufreq/Kconfig" @@ -118,10 +115,10 @@ Index: linux-2.6.5-7.191/arch/x86_64/Kconfig endmenu menu "Bus options (PCI etc.)" -Index: linux-2.6.5-7.191/Documentation/vm/ioproc.txt +Index: LINUX-SRC-TREE/Documentation/vm/ioproc.txt =================================================================== ---- linux-2.6.5-7.191.orig/Documentation/vm/ioproc.txt 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/Documentation/vm/ioproc.txt 2005-07-28 14:52:52.771690440 -0400 +--- /dev/null ++++ LINUX-SRC-TREE/Documentation/vm/ioproc.txt @@ -0,0 +1,468 @@ +Linux IOPROC patch overview +=========================== @@ -591,93585 +588,457 @@ Index: linux-2.6.5-7.191/Documentation/vm/ioproc.txt + + +-- Last update DavidAddison - 17 Aug 2004 -Index: linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_linux.c +Index: LINUX-SRC-TREE/fs/exec.c =================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/eip/eip_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_linux.c 2005-07-28 14:52:52.774689984 -0400 -@@ -0,0 +1,1576 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: eip_linux.c,v 1.89.2.4 2005/02/04 14:30:35 mike Exp $" -+ -+#include -+#include +--- LINUX-SRC-TREE.orig/fs/exec.c ++++ LINUX-SRC-TREE/fs/exec.c +@@ -65,6 +65,8 @@ EXPORT_SYMBOL(coredump_notifier_list); + #include + #endif + ++#include + -+#include + int core_uses_pid; + char core_pattern[65] = "core"; + int suid_dumpable = 0; +@@ -1213,6 +1215,9 @@ int do_execve(char * filename, + if (retval < 0) + goto out; + ++ /* notify any ptrack callbacks of the process exec */ ++ ptrack_call_callbacks(PTRACK_PHASE_EXEC, NULL); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include + retval = search_binary_handler(&bprm,regs); + if (retval >= 0) { + TRIG_EVENT(exec_hook, file->f_dentry->d_name.len, +Index: LINUX-SRC-TREE/fs/read_write.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/read_write.c ++++ LINUX-SRC-TREE/fs/read_write.c +@@ -339,6 +339,7 @@ asmlinkage ssize_t sys_write(unsigned in + + return ret; + } ++EXPORT_SYMBOL_GPL(sys_write); + + asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, + size_t count, loff_t pos) +Index: LINUX-SRC-TREE/fs/select.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/select.c ++++ LINUX-SRC-TREE/fs/select.c +@@ -649,3 +649,4 @@ ssize_t generic_aio_poll(struct kiocb *i + } + return -EIOCBRETRY; + } ++EXPORT_SYMBOL_GPL(sys_poll); +Index: LINUX-SRC-TREE/include/linux/init_task.h +=================================================================== +--- LINUX-SRC-TREE.orig/include/linux/init_task.h ++++ LINUX-SRC-TREE/include/linux/init_task.h +@@ -3,6 +3,7 @@ + + #include + #include ++#include + + #define INIT_FILES \ + { \ +@@ -116,6 +117,7 @@ extern struct group_info init_groups; + .map_base = __TASK_UNMAPPED_BASE, \ + .io_wait = NULL, \ + INIT_TASK_PAGG(tsk) \ ++ INIT_TASK_PTRACK(tsk) \ + } + + +Index: LINUX-SRC-TREE/include/linux/ioproc.h +=================================================================== +--- /dev/null ++++ LINUX-SRC-TREE/include/linux/ioproc.h +@@ -0,0 +1,271 @@ ++/* -*- linux-c -*- ++ * ++ * Copyright (C) 2002-2004 Quadrics Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * ++ */ + -+#include -+#include ++/* ++ * Callbacks for IO processor page table updates. ++ */ + -+#undef ASSERT -+#include -+#include ++#ifndef __LINUX_IOPROC_H__ ++#define __LINUX_IOPROC_H__ + ++#include ++#include + ++typedef struct ioproc_ops { ++ struct ioproc_ops *next; ++ void *arg; + -+#include -+#include ++ void (*release)(void *arg, struct mm_struct *mm); ++ void (*sync_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); ++ void (*invalidate_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); ++ void (*update_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); + -+#include "eip_linux.h" -+#include "eip_stats.h" ++ void (*change_protection)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot); + -+#ifdef UNUSED -+static void eip_skb_display(struct sk_buff *); -+#endif -+static void eip_iph_display(struct iphdr *); -+#ifdef UNUSED -+static void eip_eiph_display(EIP_HEADER *); -+static void eip_packet_display(unsigned char *); -+#endif -+static void eip_tmd_display(EIP_TMD *); -+static void eip_tmd_head_display(EIP_TMD_HEAD *); -+static void eip_rmd_display(EIP_RMD *); -+static void eip_rmd_head_display(EIP_RMD_HEAD *); ++ void (*sync_page)(void *arg, struct vm_area_struct *vma, unsigned long address); ++ void (*invalidate_page)(void *arg, struct vm_area_struct *vma, unsigned long address); ++ void (*update_page)(void *arg, struct vm_area_struct *vma, unsigned long address); + -+static void eip_rmd_reclaim(EIP_RMD *); ++} ioproc_ops_t; + -+static inline EP_NMH *eip_dma_reserve(int, int); -+static inline void __eip_tmd_load(EIP_TMD *, EP_RAILMASK *); -+static inline void __eip_tmd_unload(EIP_TMD *); -+static inline unsigned long eip_buff_alloc(int, int); -+static inline void eip_buff_free(unsigned long, int); -+static struct iphdr *eip_ipfrag_get(char *); -+static inline void eip_rmd_free(EIP_RMD *); -+static inline void eip_skb_load(EIP_RMD *); -+static inline void eip_skb_unload(EIP_RMD *); -+static inline void eip_rmd_requeue(EIP_RMD *); -+static EIP_RMD *eip_rmd_alloc(int, int); -+static int eip_rmd_alloc_replace(EIP_RMD *, int, int); -+static int eip_rmd_alloc_queue(int, int, int, int); -+static int eip_rmds_alloc(void); -+static void eip_rxhandler(EP_RXD *); -+static void eip_rx_tasklet(unsigned long); -+static inline void eip_tmd_init(EIP_TMD *, unsigned long, EIP_TMD_HEAD *, unsigned long, int); -+static inline EIP_TMD *eip_tmd_get(int); -+static inline void eip_tmd_put(EIP_TMD *); -+static inline void eip_tmd_load(EIP_TMD *); -+static inline void eip_tmd_unload(EIP_TMD *); -+static inline EIP_TMD *eip_tmd_alloc_queue(EIP_TMD *, EIP_TMD_HEAD *, int); -+static inline EIP_TMD *eip_tmd_alloc_queue_copybreak(EIP_TMD_HEAD *, int); -+static inline EIP_TMD *eip_tmd_alloc_queue_aggreg(EIP_TMD_HEAD *, int); -+static int eip_tmds_alloc(void); -+int eip_hard_start_xmit(struct sk_buff *, struct net_device *); -+static inline int eip_do_xmit(EIP_TMD *, EP_NMD *i, EP_PAYLOAD *); -+static void eip_txhandler(EP_TXD *, void *, EP_STATUS); -+static void eip_tx_tasklet(unsigned long); -+void eip_stop_queue(void); -+void eip_start_queue(void); -+static int eip_open(struct net_device *); -+static int eip_close(struct net_device *); -+static struct net_device_stats *eip_get_stats(struct net_device *); -+static int eip_change_mtu(struct net_device *, int); ++/* IOPROC Registration ++ * ++ * Called by the IOPROC device driver to register its interest in page table ++ * changes for the process associated with the supplied mm_struct ++ * ++ * The caller should first allocate and fill out an ioproc_ops structure with ++ * the function pointers initialised to the device driver specific code for ++ * each callback. If the device driver doesn't have code for a particular ++ * callback then it should set the function pointer to be NULL. ++ * The ioproc_ops arg parameter will be passed unchanged as the first argument ++ * to each callback function invocation. ++ * ++ * The ioproc registration is not inherited across fork() and should be called ++ * once for each process that the IOPROC device driver is interested in. ++ * ++ * Must be called holding the mm->page_table_lock ++ */ ++extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip); + -+static int eip_rx_dropping = 0; -+static int eip_rx_tasklet_locked = 1; + -+/* Global */ -+struct timer_list eip_rx_tasklet_timer; -+ -+EIP_RX *eip_rx = NULL; -+EIP_TX *eip_tx = NULL; -+int eip_checksum_state=CHECKSUM_NONE; ++/* IOPROC De-registration ++ * ++ * Called by the IOPROC device driver when it is no longer interested in page ++ * table changes for the process associated with the supplied mm_struct ++ * ++ * Normally this is not needed to be called as the ioproc_release() code will ++ * automatically unlink the ioproc_ops struct from the mm_struct as the ++ * process exits ++ * ++ * Must be called holding the mm->page_table_lock ++ */ ++extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip); + -+int tmd_max = EIP_TMD_MAX_NR; -+int rmd_max = EIP_RMD_MAX_NR; -+int rx_envelope_nr = EIP_RX_ENVELOPE_NR; -+int rx_granularity = EIP_RX_GRANULARITY; -+int tx_copybreak_max = EIP_TX_COPYBREAK_MAX; -+EP_RAILMASK tx_railmask = EP_RAILMASK_ALL; -+int eipdebug = 0; ++#ifdef CONFIG_IOPROC + -+#ifdef UNUSED -+static void eip_skb_display(struct sk_buff *skb) -+{ -+ if (skb) { -+ __EIP_DBG_PRINTF("SKB [%p] : len %d truesize %d proto %x pkt type %x cloned %d users %d summed %d\n", -+ skb, skb->len, skb->truesize, skb->protocol, skb->pkt_type, skb->cloned, atomic_read(&skb->users), skb->ip_summed); -+ __EIP_DBG_PRINTF("SKB [%p] : skb_shinfo dataref %d nr_frags %d frag_list[%p] (device %p)\n", skb, -+ atomic_read(&skb_shinfo(skb)->dataref), skb_shinfo(skb)->nr_frags, skb_shinfo(skb)->frag_list, skb->dev); -+ __EIP_DBG_PRINTF("SKB [%p] : head[%p] data[%p] tail [%p] end [%p] data_len [%d]\n", skb, skb->head, skb->data, -+ skb->tail, skb->end, skb->data_len); -+ __EIP_DBG_PRINTF("SKB [%p] : Transport Layer h.(th, uh, icmph, raw)[%p]\n", skb, skb->h.th); -+ __EIP_DBG_PRINTF("SKB [%p] : Network Layer nh.(iph, arph, raw)[%p]\n", skb, skb->nh.iph); -+ __EIP_DBG_PRINTF("SKB [%p] : Link Layer mac.(ethernet, raw)[%p]\n", skb, skb->mac.ethernet); -+ return; -+ } -+ EIP_ERR_PRINTF("SKB IS NULL - NO SKB TO DISPLAY\n"); -+} -+#endif -+static void eip_iph_display(struct iphdr *iph) -+{ -+ if (iph) { -+ __EIP_DBG_PRINTF("IPH [%p] : version %d header len %d TOS 0x%x Total len %d\n", -+ iph, iph->version, iph->ihl, htons(iph->tos), htons(iph->tot_len)); -+ __EIP_DBG_PRINTF("IPH [%p] : id %d frag flags 0x%x offset %d\n", -+ iph, htons(iph->id), (iph->frag_off & htons(IP_CE | IP_DF | IP_MF)) >> 4, -+ (htons(iph->frag_off) << 3) & IP_OFFSET); -+ __EIP_DBG_PRINTF("IPH [%p] : TTL %d proto %d header checksum 0x%x\n", iph, iph->ttl, iph->protocol, iph->check); -+ __EIP_DBG_PRINTF("IPH [%p] : IP src %u.%u.%u.%u dest %u.%u.%u.%u\n", iph, -+ ((unsigned char *)&(iph->saddr))[0],((unsigned char *)&(iph->saddr))[1], ((unsigned char *)&(iph->saddr))[2],((unsigned char *)&(iph->saddr))[3], -+ ((unsigned char *)&(iph->daddr))[0],((unsigned char *)&(iph->daddr))[1], ((unsigned char *)&(iph->daddr))[2],((unsigned char *)&(iph->daddr))[3]); -+ return; -+ } -+ EIP_ERR_PRINTF("IPH IS NULL - NO IPH TO DISPLAY\n"); -+} -+#ifdef UNUSED -+static void eip_eiph_display(EIP_HEADER * eiph) -+{ -+ if (eiph) { -+ __EIP_DBG_PRINTF("EIPH [%p] : dhost %04x.%04x.%04x sap %x\n", eiph, eiph->h_dhost.ip_bcast, eiph->h_dhost.ip_inst, -+ eiph->h_dhost.ip_addr, eiph->h_sap); -+ __EIP_DBG_PRINTF("EIPH [%p] : shost %04x.%04x.%04x \n", eiph, eiph->h_shost.ip_bcast, eiph->h_shost.ip_inst, -+ eiph->h_shost.ip_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("EIPH IS NULL - NO EIPH TO DISPLAY\n"); -+} -+static void eip_packet_display(unsigned char *data) -+{ -+ eip_eiph_display((EIP_HEADER *) data); -+ eip_iph_display((struct iphdr *) (data + EIP_HEADER_PAD + ETH_HLEN)); -+} -+#endif -+static void eip_tmd_display(EIP_TMD * tmd) -+{ -+ if (tmd) { -+ __EIP_DBG_PRINTF("\t\tTMD [%p] : next[%p] skb[%p] DVMA[%d]\n", tmd, tmd->chain.next, tmd->skb, tmd->dvma_idx); -+ if (tmd->dma_base) -+ __EIP_DBG_PRINTF("TMD [%p] : head[%p] *data 0x%lx\n", tmd, tmd->head, *((unsigned long *) tmd->dma_base)); -+ else -+ __EIP_DBG_PRINTF("TMD [%p] : head[%p] NO DATA !!!\n", tmd, tmd->head); -+ __EIP_DBG_PRINTF("TMD [%p] : DMA(%lx,%d,%d) ebase[%x]\n",tmd, tmd->dma_base, tmd->dma_len, tmd->nmd.nmd_len, -+ tmd->nmd.nmd_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("TMD IS NULL - NO TMD TO DISPLAY\n"); -+ -+} -+static void eip_ipf_display(EIP_IPFRAG * ipf) ++/* IOPROC Release ++ * ++ * Called during exit_mmap() as all vmas are torn down and unmapped. ++ * ++ * Also unlinks the ioproc_ops structure from the mm list as it goes. ++ * ++ * No need for locks as the mm can no longer be accessed at this point ++ * ++ */ ++static inline void ++ioproc_release(struct mm_struct *mm) +{ -+ if (ipf) { -+ __EIP_DBG_PRINTF("IPF[%p] : datagram len %d dma correction %d uts %lx frag_nr %d\n", ipf, ipf->datagram_len, -+ ipf->dma_correction, ipf->timestamp.tv_usec, ipf->frag_nr); -+ eip_tmd_display((EIP_TMD *) ipf); -+ return; -+ } -+ EIP_ERR_PRINTF("IPF IS NULL - NO IPF TO DISPLAY\n"); -+} ++ struct ioproc_ops *cp; + -+static void eip_tmd_head_display(EIP_TMD_HEAD * head) -+{ -+ if (head) { -+ __EIP_DBG_PRINTF("TMD HEAD [%p] : handle[%p] tmds[%p] %3.3d/%3.3d/%3.3d\n", head, head->handle, head->tmd, -+ EIP_STAT_QUEUED_GET(&head->stats), EIP_STAT_ALLOC_GET(&head->stats), -+ eip_tx->tmd_max_nr); -+ return; -+ } -+ EIP_ERR_PRINTF("TMD HEAD IS NULL - NO TMD HEAD TO DISPLAY\n"); -+} -+static void eip_rmd_display(EIP_RMD * rmd) -+{ -+ if (rmd) { -+ __EIP_DBG_PRINTF("RMD [%p] : next[%p] rxd[%p] DVMA[%d]\n", rmd, rmd->chain.next, rmd->rxd, rmd->dvma_idx); -+ __EIP_DBG_PRINTF("RMD [%p] : head[%p]\n", rmd, rmd->head); -+ __EIP_DBG_PRINTF("RMD [%p] : ebase[%x]\n", rmd, rmd->nmd.nmd_addr); -+ return; -+ } -+ EIP_ERR_PRINTF("RMD IS NULL - NO RMD TO DISPLAY\n"); -+} -+static void eip_rmd_head_display(EIP_RMD_HEAD * head) -+{ -+ if (head) { -+ __EIP_DBG_PRINTF("RMD HEAD [%p] : rcvr[%p] handle[%p] busy list[%p]\n", head, head->rcvr, head->handle, head->busy_list); -+ __EIP_DBG_PRINTF("RMD HEAD [%p] : %3.3d/%3.3d/%3.3d\n", head, -+ EIP_STAT_QUEUED_GET(&head->stats), EIP_STAT_ALLOC_GET(&head->stats), eip_rx->rmd_max_nr); -+ return; ++ while ((cp = mm->ioproc_ops) != NULL) { ++ mm->ioproc_ops = cp->next; ++ ++ if (cp->release) ++ cp->release(cp->arg, mm); + } -+ EIP_ERR_PRINTF("RMD HEAD IS NULL - NO RMD HEAD TO DISPLAY\n"); +} + -+/* END - DISPLAY FUNCTIONS */ -+static inline EP_NMH *eip_dma_reserve(int pages_nr, int perm) ++/* IOPROC SYNC RANGE ++ * ++ * Called when a memory map is synchronised with its disk image i.e. when the ++ * msync() syscall is invoked. Any future read or write to the associated ++ * pages by the IOPROC should cause the page to be marked as referenced or ++ * modified. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_sync_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ -+ EP_NMH *handle = ep_dvma_reserve(eip_tx->ep_system, pages_nr, perm); -+ -+ if (handle) -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "HANDLE [%p] %d pages of elan address space reserved\n", -+ handle, pages_nr); -+ else -+ EIP_ERR_PRINTF("cannot reserve %d page(s) of elan address space\n", pages_nr); ++ struct ioproc_ops *cp; + -+ return handle; ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->sync_range) ++ cp->sync_range(cp->arg, vma, start, end); +} + -+static inline void __eip_tmd_load(EIP_TMD * tmd, EP_RAILMASK *rmask) ++/* IOPROC INVALIDATE RANGE ++ * ++ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the ++ * user or paged out by the kernel. ++ * ++ * After this call the IOPROC must not access the physical memory again unless ++ * a new translation is loaded. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_invalidate_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ -+ EIP_ASSERT(tmd->nmd.nmd_len > 0); ++ struct ioproc_ops *cp; + -+ ep_dvma_load(eip_tx->ep_system, NULL, (caddr_t) tmd->dma_base, tmd->nmd.nmd_len, tmd->head->handle, -+ tmd->dvma_idx, rmask, &tmd->nmd); ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->invalidate_range) ++ cp->invalidate_range(cp->arg, vma, start, end); +} + -+static inline void __eip_tmd_unload(EIP_TMD * tmd) -+{ -+ EIP_ASSERT(tmd->nmd.nmd_addr && tmd->head->handle); -+ -+ ep_dvma_unload(eip_tx->ep_system, tmd->head->handle, &tmd->nmd); -+ tmd->nmd.nmd_addr = 0; -+} -+static inline unsigned long eip_buff_alloc(int buff_len, int gfp) ++/* IOPROC UPDATE RANGE ++ * ++ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk ++ * up, when breaking COW or faulting in an anonymous page of memory. ++ * ++ * These give the IOPROC device driver the opportunity to load translations ++ * speculatively, which can improve performance by avoiding device translation ++ * faults. ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_update_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ -+ unsigned long buff_base = (buff_len < PAGE_SIZE) ? -+ (unsigned long) kmalloc(buff_len, gfp) : -+ __get_dma_pages(gfp, get_order(buff_len)); -+ -+ if (likely(buff_base)) -+ return buff_base; ++ struct ioproc_ops *cp; + -+ EIP_ERR_PRINTF("cannot allocate %db of memory\n", buff_len); -+ return 0; -+} -+static inline void eip_buff_free(unsigned long buff_base, int buff_len) -+{ -+ (buff_len < PAGE_SIZE) ? kfree((void *) buff_base) : -+ free_pages(buff_base, get_order(buff_len)); ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->update_range) ++ cp->update_range(cp->arg, vma, start, end); +} -+static struct iphdr *eip_ipfrag_get(char *data) -+{ -+ struct ethhdr *eh = (struct ethhdr *) (data); -+ struct iphdr *iph; -+ -+ if (eh->h_proto == htons(ETH_P_IP)) { -+ iph = (struct iphdr *) ((char *) eh + ETH_HLEN); -+ -+ /* EIP_DBG(eip_iph_display(iph)); */ + -+ if ((iph->frag_off & htons(IP_MF | IP_OFFSET))) -+ return iph; -+ } -+ return NULL; -+} + -+static inline void eip_rmd_free(EIP_RMD * rmd) ++/* IOPROC CHANGE PROTECTION ++ * ++ * Called when the protection on a region of memory is changed i.e. when the ++ * mprotect() syscall is invoked. ++ * ++ * The IOPROC must not be able to write to a read-only page, so if the ++ * permissions are downgraded then it must honour them. If they are upgraded ++ * it can treat this in the same way as the ioproc_update_[range|sync]() calls ++ * ++ * Called holding the mm->page_table_lock ++ */ ++static inline void ++ioproc_change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot) +{ -+ EIP_ASSERT2(rmd->nmd.nmd_addr == 0, eip_rmd_display, rmd); -+ -+ if ( rmd->skb != NULL) -+ kfree_skb (rmd->skb); -+ -+ kfree(rmd); ++ struct ioproc_ops *cp; + -+ EIP_DBG_PRINTF(EIP_DBG_MEMFREE, "RMD [%p] : FREED\n", rmd); ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->change_protection) ++ cp->change_protection(cp->arg, vma, start, end, newprot); +} -+static inline void eip_skb_load(EIP_RMD * rmd) -+{ -+ EP_RAILMASK rmask = rmd->rxd ? ep_rxd_railmask (rmd->rxd) : 0; + -+ EIP_ASSERT(skb_tailroom(rmd->skb) > 0); -+ -+ ep_dvma_load(eip_tx->ep_system, NULL, (caddr_t) rmd->skb->data, skb_tailroom(rmd->skb), rmd->head->handle, -+ rmd->dvma_idx, &rmask, &rmd->nmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_EP_DVMA, "RMD [%p] : LOADED\n", rmd); -+} -+static inline void eip_skb_unload(EIP_RMD * rmd) -+{ -+ EIP_ASSERT(rmd->nmd.nmd_addr && rmd->head->handle); -+ -+ ep_dvma_unload(eip_tx->ep_system, rmd->head->handle, &rmd->nmd); -+ rmd->nmd.nmd_addr = 0; -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_EP_DVMA, "RMD [%p] : UNLOADED\n", rmd); -+} -+static inline void eip_rmd_requeue(EIP_RMD * rmd) -+{ -+ EIP_ASSERT(rmd->rxd); -+ -+ rmd->chain.next = NULL; -+ -+ ep_requeue_receive(rmd->rxd, eip_rxhandler, rmd, &rmd->nmd, EP_NO_ALLOC|EP_NO_SLEEP ); -+ -+ atomic_inc(&rmd->head->stats); -+ -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "RMD [%p] : REQUEUED\n", rmd); -+} -+static EIP_RMD * eip_rmd_alloc(int svc, int gfp) -+{ -+ int buff_len = EIP_SVC_SMALLEST_LEN << svc; -+ EIP_RMD *rmd; -+ struct sk_buff *skb; -+ -+ if (!(skb = alloc_skb((buff_len - EIP_EXTRA), gfp))) -+ return NULL; -+ -+ skb_reserve(skb, 2); -+ -+ if (!(rmd = (EIP_RMD *) kmalloc(buff_len, gfp))) { -+ kfree_skb(skb); -+ return NULL; -+ } -+ -+ rmd->skb = skb; -+ -+ rmd->chain.next = NULL; -+ rmd->rxd = NULL; -+ rmd->head = &eip_rx->head[svc]; -+ -+ return rmd; -+} -+ -+static int eip_rmd_alloc_replace(EIP_RMD *rmd, int svc, int gfp) -+{ -+ struct sk_buff *skb,*old; -+ int buff_len = EIP_SVC_SMALLEST_LEN << svc; -+ -+ if (!(skb = alloc_skb(buff_len, gfp))) -+ return 1; -+ -+ skb_reserve(skb, 2); -+ -+ eip_skb_unload(rmd); -+ -+ old = rmd->skb; -+ rmd->skb = skb; -+ -+ eip_skb_load(rmd); -+ -+ eip_rmd_requeue(rmd); -+ -+ kfree_skb(old); -+ -+ return 0; -+} -+ -+static int eip_rmd_alloc_queue(int svc, int dvma_idx, int gfp, int attr) -+{ -+ EIP_RMD * rmd = eip_rmd_alloc(svc, gfp); -+ -+ if (!rmd) -+ return 1; -+ -+ EIP_STAT_ALLOC_ADD(&rmd->head->stats, 1); -+ -+ rmd->dvma_idx = dvma_idx; -+ eip_skb_load(rmd); -+ -+ EIP_DBG2(EIP_DBG_RMD, eip_rmd_display, rmd, "RMD [%p] : ALLOCATED for SVC 0x%x\n", rmd, svc); -+ -+ if (ep_queue_receive(rmd->head->rcvr, eip_rxhandler, (void *) rmd, &rmd->nmd, attr) == ESUCCESS) { -+ atomic_inc(&rmd->head->stats); -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "RMD [%p] : QUEUED on SVC 0x%x\n", rmd, svc); -+ return 0; -+ } -+ -+ EIP_ERR_PRINTF("RMD [%p] : couldn't be QUEUED on SVC 0x%x\n", rmd, svc); -+ -+ EIP_STAT_ALLOC_SUB(&rmd->head->stats, 1); -+ -+ eip_skb_unload(rmd); -+ eip_rmd_free(rmd); -+ -+ return 1; -+} -+ -+static int eip_rmds_alloc(void) -+{ -+ int idx, svc; -+ -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ eip_rx->head[svc].rcvr = ep_alloc_rcvr(eip_tx->ep_system, EIP_SVC_EP(svc), rx_envelope_nr); -+ if (!eip_rx->head[svc].rcvr) { -+ EIP_ERR_PRINTF("Cannot install receiver for SVC 0x%x - maybe cable is disconnected\n", svc); -+ return -EAGAIN; -+ } -+ -+ eip_rx->head[svc].handle = -+ eip_dma_reserve(EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)) * eip_rx->rmd_max_nr, -+ EP_PERM_WRITE); -+ if (!eip_rx->head[svc].handle) -+ return -ENOMEM; -+ -+ EIP_DBG(EIP_DBG_RMD_HEAD, eip_rmd_head_display, &eip_rx->head[svc]); -+ -+ for (idx = 0; idx < EIP_RMD_NR; idx++) { -+ if (eip_rmd_alloc_queue(svc, idx * EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)), -+ GFP_KERNEL, EP_NO_SLEEP)) -+ return -ENOMEM; -+ } -+ } -+ return 0; -+} -+static void eip_rmds_free(void) -+{ -+ unsigned long flags; -+ EIP_RMD *rmd; -+ int svc; -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ rmd = eip_rx->irq_list; -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ eip_rmd_reclaim(rmd); -+ -+ for (svc = 0; svc < EIP_SVC_NR ; svc++) { -+ -+ while ((rmd = eip_rx->head[svc].busy_list)) { -+ eip_rx->head[svc].busy_list = NULL; -+ eip_rmd_reclaim(rmd); -+ if (eip_rx->head[svc].busy_list) { -+ EIP_DBG_PRINTF(EIP_DBG_RMD_QUEUE, "Still RMD [%p] on BUSY list SVC 0x%d - Scheduling\n", rmd, svc); -+ schedule(); -+ } -+ } -+ -+ EIP_ASSERT(EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats) == EIP_STAT_ALLOC_GET(&eip_rx->head[svc].stats)); -+ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "HEAD[%p] : FREEING RCVR [%p]\n", &eip_rx->head[svc], -+ eip_rx->head[svc].rcvr); -+ -+ ep_free_rcvr(eip_rx->head[svc].rcvr); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "HEAD[%p] : RELEASING DVMA [%p]\n", &eip_rx->head[svc], -+ eip_rx->head[svc].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_rx->head[svc].handle); -+ } -+ -+} -+static int eip_rx_queues_low (void) { -+ int svc; -+ for (svc = 0; svc < EIP_SVC_NR; svc++) -+ if (EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats) < EIP_RMD_ALLOC_THRESH) -+ return (1); -+ return (0); -+} -+static void eip_rxhandler(EP_RXD * rxd) -+{ -+ EIP_RMD *rmd = (EIP_RMD *) ep_rxd_arg(rxd); -+ EP_STATUS ret = ep_rxd_status(rxd); -+ EP_PAYLOAD * payload = ep_rxd_payload(rxd); -+ unsigned long data = (unsigned long) rmd->skb->data; -+ int frag_nr = 0; -+ int len; -+ -+ struct sk_buff *skb; -+ static char count = 0; -+ -+ atomic_dec(&rmd->head->stats); -+ rmd->rxd = rxd; -+ -+ if (likely(ret == EP_SUCCESS)) { -+ -+ rmd->head->dma++; -+ -+ if ( eip_rx_dropping) { -+ eip_rmd_requeue(rmd); -+ return; -+ } -+ -+ len = (payload) ? payload->Data[frag_nr++] : ep_rxd_len(rxd); -+ -+ EIP_DBG(EIP_DBG_RMD, eip_rmd_display, rmd); -+ -+again: -+ if ( (skb = skb_clone(rmd->skb, GFP_ATOMIC)) ) { -+ unsigned int off = (data - (unsigned long) rmd->skb->data); -+ -+ /* have to set the length before calling -+ * skb pull as it will not allow you to -+ * pull past the end */ -+ -+ skb_put (skb, off + len); -+ skb_pull (skb, off); -+ -+ skb->protocol = eth_type_trans(skb, eip_rx->net_device); -+ skb->ip_summed = eip_checksum_state; -+ skb->dev = eip_rx->net_device; -+ -+ /* Fabien/David/Mike this is a hack/fix to allow aggrigation of packets to work. -+ * The problem is ip_frag looks at the truesize to see if it is caching too much space. -+ * As we are reusing a large skb (cloned) for a number of small fragments, they appear to take up alot of space. -+ * so ip_frag dropped them after 4 frags (not good). So we lie and set the truesize to just bigger than the data. -+ */ -+ if (payload) -+ skb->truesize = SKB_DATA_ALIGN(skb->len + EIP_HEADER_PAD) +sizeof(struct sk_buff); -+ -+ } -+ if ( (skb) && -+ (netif_rx(skb) != NET_RX_DROP)){ -+ -+ eip_rx->bytes += len; -+ -+ if (payload && payload->Data[frag_nr] ) { -+ data += EIP_IP_ALIGN(len); -+ len = payload->Data[frag_nr++]; -+ goto again; -+ } -+ eip_rx->packets += ++frag_nr; -+ } else if ( (eip_rx->dropped++ % 20) == 0) -+ __EIP_DBG_PRINTK("Packet dropped by the TCP/IP stack - increase /proc/sys/net/core/netdev_max_backlog\n"); -+ } else if (ret == EP_SHUTDOWN ) { -+ EIP_DBG2(EIP_DBG_RMD, eip_rmd_display, rmd, "ABORTING\n"); -+ ep_complete_receive(rxd); -+ eip_skb_unload(rmd); -+ EIP_STAT_ALLOC_SUB(&rmd->head->stats, 1); -+ eip_rmd_free(rmd); -+ return; -+ } else { -+ EP_ENVELOPE *env = ep_rxd_envelope(rxd); -+ EP_NMD *nmd ; -+ -+ EIP_ERR_PRINTF("RMD[%p] : RECEIVE ret = %d\n", rmd, ret); -+ -+ for (len = 0 ; len < env->nFrags ; len++) { -+ nmd = &env->Frags[len]; -+ EIP_ERR_PRINTF("RMD[%p] : ep_frag #%d nmd_addr [%x] nmd_len %d\n", rmd, len, -+ (unsigned int) nmd->nmd_addr, nmd->nmd_len); -+ } -+ eip_rx->errors++; -+ EIP_ASSERT2(atomic_read(&skb_shinfo(rmd->skb)->dataref) == 1, eip_rmd_display, rmd); -+ } -+ -+ /* data is used to store the irq flags */ -+ spin_lock_irqsave(&eip_rx->lock, data); -+ rmd->chain.next = eip_rx->irq_list; -+ eip_rx->irq_list = rmd; -+ eip_rx->irq_list_nr++; -+ spin_unlock_irqrestore(&eip_rx->lock, data); -+ -+ if (((count++ % eip_rx->sysctl_granularity) == 0) /* and either we have passed up a number of them */ -+ || eip_rx_queues_low()) /* or we are low */ -+ tasklet_schedule(&eip_rx->tasklet); -+ else -+ { -+ if ( !timer_pending (&eip_rx_tasklet_timer) ) /* the timer not already set */ -+ mod_timer (&eip_rx_tasklet_timer, lbolt); -+ } -+} -+ -+/* dest ; if the buffer still reference on it mocve the rmd to the dest list */ -+static void eip_rmd_reclaim(EIP_RMD *rmd) -+{ -+ EIP_RMD *rmd_next = rmd; -+ int dataref; -+ -+ while (rmd_next) { -+ rmd = rmd_next; -+ rmd_next = rmd_next->chain.next; -+ -+ dataref = atomic_read(&skb_shinfo(rmd->skb)->dataref); -+ EIP_ASSERT(dataref > 0); -+ -+ if (dataref == 1) { -+ eip_rmd_requeue(rmd); -+ } else { -+ rmd->chain.next = rmd->head->busy_list; -+ rmd->head->busy_list = rmd; -+ } -+ } -+} -+static void eip_rx_tasklet(unsigned long arg) -+{ -+ EIP_RMD *rmd, *rmd_next; -+ unsigned long flags; -+ short svc, queued; -+ int needs_reschedule; -+ -+ if (eip_rx_tasklet_locked) /* we dont want the tasklet to do anything when we are finishing */ -+ return; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ rmd = eip_rx->head[svc].busy_list; -+ eip_rx->head[svc].busy_list = NULL; -+ eip_rmd_reclaim(rmd); -+ } -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ rmd = eip_rx->irq_list; -+ eip_rx->irq_list = NULL; -+ eip_rx->irq_list_nr = 0; -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ eip_rmd_reclaim(rmd); -+ -+ needs_reschedule = 0; -+ -+ for (svc = 0; svc < EIP_SVC_NR; svc++) { -+ /* the plan is : allocate some more if possible or steall some dvma space from those on the EIP_BUSY_LIST */ -+ queued = EIP_STAT_QUEUED_GET(&eip_rx->head[svc].stats); -+ -+ EIP_ASSERT(queued >= 0 && queued <= EIP_RMD_MAX_NR); -+ -+ if (queued < EIP_RMD_ALLOC_THRESH) { -+ short allocated = EIP_STAT_ALLOC_GET(&eip_rx->head[svc].stats); -+ short how_many; -+ -+ EIP_ASSERT(allocated >= 0 && allocated <= EIP_RMD_MAX_NR); -+ -+ if (likely(allocated < eip_rx->rmd_max_nr)) { -+ -+ how_many = (((allocated / EIP_RMD_ALLOC_STEP) + 1) * EIP_RMD_ALLOC_STEP); -+ if (how_many > eip_rx->rmd_max_nr) -+ how_many = eip_rx->rmd_max_nr; -+ -+ for (; allocated < how_many && -+ (eip_rmd_alloc_queue(svc, allocated * EIP_DVMA_PAGES((EIP_SVC_SMALLEST_LEN << svc)), -+ GFP_ATOMIC, EP_NO_ALLOC|EP_NO_SLEEP) == 0) ; allocated++); -+ if ( allocated != how_many ) { -+ eip_rx->reschedule++; -+ needs_reschedule = 1; -+ } -+ } else { -+ /* steal how_many rmds and put them on the aside list */ -+ how_many = EIP_RMD_ALLOC_THRESH - queued; -+ -+ EIP_ASSERT(how_many >= 0 && how_many <= EIP_RMD_ALLOC_THRESH); -+ -+ rmd_next = eip_rx->head[svc].busy_list; -+ eip_rx->head[svc].busy_list = NULL; -+ -+ while (how_many-- && rmd_next) { -+ rmd = rmd_next; -+ rmd_next = rmd_next->chain.next; -+ -+ if (eip_rmd_alloc_replace(rmd, svc, GFP_ATOMIC)) { -+ rmd_next = rmd; -+ break; -+ } -+ } -+ eip_rx->head[svc].busy_list = rmd_next; -+ if ( how_many ) -+ needs_reschedule = 1; -+ } -+ } -+ } -+ -+ if (needs_reschedule) -+ { -+ if ( !timer_pending (&eip_rx_tasklet_timer)) -+ mod_timer (&eip_rx_tasklet_timer, lbolt); -+ } -+} -+static void eip_rx_tasklet_resched(unsigned long arg) -+{ -+ tasklet_schedule(&eip_rx->tasklet); -+} -+ -+static inline void eip_tmd_init(EIP_TMD * tmd, unsigned long buff_base, EIP_TMD_HEAD * head, unsigned long buff_len, -+ int dvma_idx) -+{ -+ tmd->dvma_idx = dvma_idx; -+ tmd->dma_base = buff_base; -+ tmd->dma_len = -1; -+ tmd->skb = NULL; -+ tmd->head = head; -+ tmd->chain.next = NULL; -+ -+ if (tmd->head != &eip_tx->head[EIP_TMD_STD]) { -+ tmd->nmd.nmd_len = buff_len; -+ eip_tmd_load(tmd); -+ } else { -+ tmd->nmd.nmd_len = -1; -+ tmd->nmd.nmd_addr = 0; -+ } -+} -+ -+static inline EIP_TMD *eip_tmd_get(int id) -+{ -+ unsigned long flags; -+ EIP_TMD *tmd = NULL; -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ while ((tmd = eip_tx->head[id].tmd) == NULL) { -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ if (ep_enable_txcallbacks(eip_tx->xmtr) == 0) { -+ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ if (eip_tx->head[id].tmd == NULL) { -+ __EIP_DBG_PRINTF("Cannot get a TMD on head %d ... stopping queue\n", id); -+ -+ eip_stop_queue (); -+ -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ return NULL; -+ } -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ } -+ -+ ep_disable_txcallbacks(eip_tx->xmtr); -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ } -+ eip_tx->head[id].tmd = tmd->chain.next; -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ atomic_dec(&tmd->head->stats); -+ return tmd; -+} -+ -+static inline void eip_tmd_put(EIP_TMD * tmd) -+{ -+ unsigned long flags; -+ -+ tmd->skb = NULL; -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ tmd->chain.next = tmd->head->tmd; -+ tmd->head->tmd = tmd; -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ atomic_inc(&tmd->head->stats); -+ -+ eip_start_queue(); -+ -+ EIP_DBG_PRINTF(EIP_DBG_TMD_QUEUE, "TMD [%p] : REQUEUED\n", tmd); -+} -+static inline void eip_tmd_load(EIP_TMD * tmd) -+{ -+ EP_RAILMASK rmask = tx_railmask; -+ -+ __eip_tmd_load(tmd, &rmask); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "TMD [%p] : LOADED\n", tmd); -+} -+static inline void eip_tmd_unload(EIP_TMD * tmd) -+{ -+ __eip_tmd_unload(tmd); -+ -+ EIP_DBG_PRINTF(EIP_DBG_EP_DVMA, "TMD [%p] : UNLOADED\n", tmd); -+} -+static inline void eip_tmd_free(EIP_TMD * tmd) -+{ -+ eip_buff_free(tmd->dma_base, tmd->nmd.nmd_len); -+ -+ EIP_DBG_PRINTF(EIP_DBG_MEMFREE, "TMD [%p] : FREED\n", tmd); -+ -+ EIP_STAT_ALLOC_SUB(&tmd->head->stats, 1); -+} -+ -+/* tmd on a separate block */ -+static inline EIP_TMD *eip_tmd_alloc_queue(EIP_TMD * tmd, EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ eip_tmd_init(tmd, 0, head, -1, dvma_idx); -+ -+ eip_tmd_put(tmd); -+ -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+/* tmd on the buffer */ -+static inline EIP_TMD *eip_tmd_alloc_queue_copybreak(EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ EIP_TMD *tmd; -+ unsigned long buff_base; -+ -+ if (!(buff_base = eip_buff_alloc(tx_copybreak_max + sizeof(EIP_TMD), GFP_KERNEL))) -+ return NULL; -+ -+ tmd = (EIP_TMD *) (buff_base + tx_copybreak_max); -+ eip_tmd_init(tmd, buff_base, head, tx_copybreak_max, dvma_idx); -+ -+ eip_tmd_put(tmd); -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+ -+/* ipf are on the buffer */ -+static inline EIP_TMD *eip_tmd_alloc_queue_aggreg(EIP_TMD_HEAD * head, int dvma_idx) -+{ -+ EIP_TMD *tmd; -+ unsigned long buff_base; -+ -+ if (!(buff_base = eip_buff_alloc(EIP_SVC_BIGGEST_LEN, GFP_KERNEL))) -+ return NULL; -+ -+ tmd = (EIP_TMD *) (buff_base + EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG)); -+ eip_tmd_init(tmd, buff_base, head, EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG), dvma_idx); -+ -+ eip_tmd_put(tmd); -+ EIP_STAT_ALLOC_ADD(&tmd->head->stats, 1); -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ return tmd; -+} -+ -+static int eip_tmds_alloc() -+{ -+ int i; -+ int page_nr; -+ EIP_TMD *tmd; -+ -+ page_nr = EIP_DVMA_PAGES(tx_copybreak_max); -+ -+ eip_tx->head[EIP_TMD_COPYBREAK].handle = eip_dma_reserve(page_nr * eip_tx->tmd_max_nr, EP_PERM_READ); -+ -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_COPYBREAK]); -+ -+ for (i = 0; i < EIP_TMD_NR; i++) { -+ if (!eip_tmd_alloc_queue_copybreak(&eip_tx->head[EIP_TMD_COPYBREAK], i * page_nr)) -+ return -ENOMEM; -+ } -+ -+ eip_tx->head[EIP_TMD_STD].handle = -+ eip_dma_reserve(EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN) * eip_tx->tmd_max_nr, EP_PERM_READ); -+ -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_STD]); -+ -+ tmd = kmalloc(sizeof(EIP_TMD) * EIP_TMD_NR, GFP_KERNEL); -+ if (!tmd) { -+ EIP_ERR_PRINTF("Cannot ALLOCATE %d of tmds\n", (int) sizeof(EIP_TMD) * EIP_TMD_NR); -+ return -ENOMEM; -+ } -+ -+ page_nr = EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN); -+ -+ for (i = 0; i < EIP_TMD_NR; i++, tmd++) { -+ if (!eip_tmd_alloc_queue(tmd, &eip_tx->head[EIP_TMD_STD], i * page_nr)) -+ return -ENOMEM; -+ } -+ -+ page_nr = EIP_DVMA_PAGES(EIP_SVC_BIGGEST_LEN); -+ -+ eip_tx->head[EIP_TMD_AGGREG].handle = eip_dma_reserve(page_nr * eip_tx->tmd_max_nr, EP_PERM_READ); -+ EIP_DBG(EIP_DBG_TMD_HEAD, eip_tmd_head_display, &eip_tx->head[EIP_TMD_AGGREG]); -+ -+ for (i = 0; i < EIP_TMD_NR; i++) { -+ if (!eip_tmd_alloc_queue_aggreg(&eip_tx->head[EIP_TMD_AGGREG], i * page_nr)) -+ return -ENOMEM; -+ } -+ return 0; -+} -+ -+static void eip_tmds_free(void) -+{ -+ EIP_TMD *tmd; -+ EIP_TMD *tmd_next; -+ int i; -+ -+ ep_poll_transmits(eip_tx->xmtr); -+ -+ for (i = 0 ; i < 3 ; i++) { -+again: -+ if (EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats) < EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats)) { -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "Polling XMTR [%p]\n", eip_tx->xmtr); -+ ep_poll_transmits(eip_tx->xmtr); -+ goto again; -+ } -+ } -+ /* everything should be queued */ -+ if ((tmd = eip_tx->head[EIP_TMD_COPYBREAK].tmd)) { -+ do { -+ tmd_next = tmd->chain.next; -+ eip_tmd_unload(tmd); -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ -+ eip_tmd_free(tmd); -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "HEAD[EIP_TMD_COPYBREAK] release DVMA [%p]\n", -+ eip_tx->head[EIP_TMD_COPYBREAK].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_COPYBREAK].handle); -+ -+ /* these ones have been allocated as a block */ -+ if ((tmd = eip_tx->head[EIP_TMD_STD].tmd)) { -+ do { -+ if (tmd->dvma_idx == 0 ) { -+ kfree(tmd); -+ /* eip_tmd_free(tmd); */ -+ EIP_STAT_ALLOC_SUB(&tmd->head->stats, EIP_TMD_NR); -+ tmd_next = NULL; -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "TMD HEAD[%p] : [EIP_TMD_STD] BLOCK FREED\n", tmd); -+ } else -+ tmd_next = tmd->chain.next; -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "HEAD[EIP_TMD_STD] release DVMA [%p]\n", -+ eip_tx->head[EIP_TMD_STD].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_STD].handle); -+ -+ if ((tmd = eip_tx->head[EIP_TMD_AGGREG].tmd)) { -+ do { -+ tmd_next = tmd->chain.next; -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ -+ eip_tmd_unload(tmd); -+ eip_tmd_free(tmd); -+ } while (tmd_next && (tmd = tmd_next)); -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD_EP_DVMA, "TMD HEAD[%p] : [EIP_TMD_AGGREG] release DVMA\n", -+ eip_tx->head[EIP_TMD_AGGREG].handle); -+ -+ ep_dvma_release(eip_tx->ep_system, eip_tx->head[EIP_TMD_AGGREG].handle); -+ -+ ep_free_xmtr(eip_tx->xmtr); -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "XMTR[%p] : FREED\n", eip_tx->xmtr); -+} -+ -+static inline void eip_ipf_skb_add(EIP_IPFRAG * ipf, struct sk_buff *skb) -+{ -+ int align = EIP_IP_ALIGN(skb->len); -+ -+ -+ if (ipf->dma_len == -1) { /* like a virgin; touched for the very first time */ -+ do_gettimeofday(&ipf->timestamp); -+ /* FIXE ME put that in release tmd code */ -+ ipf->frag_nr = 0; -+ ipf->dma_len = 0; -+ ipf->datagram_len = -1; -+ ipf->dma_correction = 0; -+ } -+ -+ memcpy((void *) (ipf->dma_base + ipf->dma_len), skb->data, skb->len); -+ -+ if (ipf->datagram_len == -1) { -+ struct iphdr * iph = skb->nh.iph; -+ int offset = ntohs(iph->frag_off); -+ -+ /* last one ? ; offset & ~IP_OFFSET = IP fragment flags */ -+ if (((offset & ~IP_OFFSET) & IP_MF) == 0) { -+ offset &= IP_OFFSET; -+ offset <<= 3; -+ ipf->datagram_len = offset + htons(iph->tot_len) - sizeof(struct iphdr); -+ } -+ } -+ -+ skb->next = ipf->skb; -+ ipf->skb = skb; -+ ipf->payload.Data[ipf->frag_nr] = skb->len; -+ ipf->dma_len += align; -+ ipf->dma_correction += align - skb->len + ETH_HLEN + sizeof(struct iphdr); -+ /* FIXME ; Count got wrong if ip header has options */ -+ -+ ipf->frag_nr++; -+ -+ EIP_DBG2(EIP_DBG_TMD, eip_ipf_display, ipf, "ADDED skb[%p] len %db ALIGNED(%db)\n", skb, skb->len, EIP_IP_ALIGN(skb->len)); -+} -+ -+#define eip_ipf_hasroom(ipf, skb) ((ipf->dma_len + EIP_IP_ALIGN(skb->len) < eip_tx->sysctl_ipfrag_copybreak)) -+int eip_hard_start_xmit(struct sk_buff *skb, struct net_device *devnet) -+{ -+ -+ EIP_TMD *tmd; -+ EP_NMD nmd; -+ struct iphdr *iph; -+ int j; -+ -+ if (skb->destructor){ -+ atomic_inc(&eip_tx->destructor); -+ tasklet_schedule(&eip_tx->tasklet); -+ } -+ -+ if (!(iph = eip_ipfrag_get(skb->data)) || (eip_tx->sysctl_aggregation == 0)) { /* not ip fragment */ -+no_aggreg: -+ j = (skb->len < eip_tx->sysctl_copybreak) ? EIP_TMD_COPYBREAK : EIP_TMD_STD; /* j = head id */ -+ -+ if (!(tmd = eip_tmd_get(j))) { -+ if (skb->destructor) -+ atomic_dec(&eip_tx->destructor); -+ return 1; -+ } -+ -+ tmd->dma_len = skb->len; -+ tmd->skb = skb; -+ tmd->skb->next = NULL; -+ tmd->chain.next = NULL; -+ -+ if (j == EIP_TMD_COPYBREAK) { -+ memcpy((void *) tmd->dma_base, skb->data, skb->len); -+ -+ ep_nmd_subset(&nmd, &tmd->nmd, 0, skb->len); -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak++; -+#endif -+ return eip_do_xmit(tmd, &nmd, NULL); -+ } -+ tmd->dma_base = (unsigned long) skb->data; -+ tmd->nmd.nmd_len = skb->len; -+ eip_tmd_load(tmd); -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_std++; -+#endif -+ return eip_do_xmit(tmd, &tmd->nmd, NULL); -+ } else if ( skb->len > EIP_SVC_BIGGEST_LEN/2 ) { -+ /* don't aggregate when we have a full mtu of data */ -+ /* or more than 32k ; in this case it is cheaper */ -+ /* to just map the buffer and send it */ -+ goto no_aggreg; -+ } else { -+ EIP_IPFRAG *ipf = NULL; -+ unsigned long flags; -+ struct list_head *l; -+ struct iphdr *iph2; -+ int i; -+ __u16 id = iph->id; -+ __u32 saddr = iph->saddr; -+ __u32 daddr = iph->daddr; -+ __u8 protocol = iph->protocol; -+ -+ EIP_DBG(EIP_DBG_IPH, eip_iph_display, iph); -+ -+ j = 0; -+ -+ /* here we can't have full mtu size aggregated packet */ -+ EIP_ASSERT_RET(skb->len < eip_tx->sysctl_ipfrag_copybreak, 0); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ list_for_each(l, &eip_tx->ipfrag) { -+ ipf = list_entry(l, EIP_IPFRAG, list); -+ iph2 = eip_ipfrag_get((char *) ipf->dma_base); -+ -+ EIP_ASSERT(iph2); -+ -+ if ((iph2->id == id) && -+ (get_unaligned(&iph2->saddr) == saddr) && -+ (get_unaligned(&iph2->daddr) == daddr) && -+ (iph2->protocol == protocol)) { -+ /* || timeout */ -+ if (eip_ipf_hasroom(ipf, skb)) { -+ -+ eip_ipf_skb_add(ipf, skb); -+ -+ if ((ipf->datagram_len != -1) && -+ (ipf->dma_len == (ipf->datagram_len + ipf->dma_correction) || -+ ipf->frag_nr == (128 / sizeof(uint32_t)))) { -+send_aggreg: -+ ipf->payload.Data[ipf->frag_nr] = 0; -+ list_del(&ipf->list); -+ eip_tx->ipfrag_count--; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ -+ ep_nmd_subset(&nmd, &ipf->nmd, 0, ipf->dma_len); -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_aggreg++; -+#endif -+ if ((i = eip_do_xmit((EIP_TMD *) ipf, &nmd, &ipf->payload)) != EP_SUCCESS) -+ return i; -+ if (j) -+ goto new; -+ return 0; -+ } -+ -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ tasklet_schedule(&eip_tx->tasklet); -+ return 0; -+ } else { -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "IPF[%p] : FULL %db full - sending it\n", ipf, ipf->dma_len); -+ j = 1; -+ goto send_aggreg; -+ } -+ } -+ } -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+new: -+ if (!(ipf = (EIP_IPFRAG *) eip_tmd_get(EIP_TMD_AGGREG))) -+ goto no_aggreg; -+ -+ eip_ipf_skb_add(ipf, skb); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ list_add_tail(&ipf->list, &eip_tx->ipfrag); -+ eip_tx->ipfrag_count++; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ tasklet_schedule(&eip_tx->tasklet); -+ } -+ return 0; -+} -+static int eip_do_xmit(EIP_TMD * tmd, EP_NMD *nmd, EP_PAYLOAD *payload) -+{ -+ EIP_HEADER *eiph = (EIP_HEADER *) tmd->dma_base; -+ int attr = EP_SET_DATA((EP_NO_SLEEP | EP_NO_INTERRUPT | EP_NO_FAILOVER), EP_TYPE_SVC_INDICATOR, EP_SVC_EIP); -+ unsigned long flags; -+ int svc, rnum; -+ -+ SIZE_TO_SVC(nmd->nmd_len, svc); -+ -+ EIP_DBG(EIP_DBG_TMD, eip_tmd_display, tmd); -+ /* EIP_DBG(eip_eiph_display(eiph)); */ -+ -+ if (unlikely (eiph->h_dhost.ip_bcast)) -+ rnum = ep_pickRail (EP_NMD_RAILMASK (nmd) & tx_railmask & ep_xmtr_availrails(eip_tx->xmtr)); -+ else -+ rnum = ep_pickRail (EP_NMD_RAILMASK (nmd) & tx_railmask & ep_xmtr_noderails(eip_tx->xmtr, ntohs(eiph->h_dhost.ip_addr))); -+ -+ if (rnum >= 0) -+ attr = EP_SET_PREFRAIL(attr, rnum); -+ -+ /* add to inuse list */ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ list_add_tail (&tmd->chain.link, &eip_tx->inuse); -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ /* ENOMEM EINVAL ECONNREFUSED ESUCCESS */ -+ svc = (unlikely(eiph->h_dhost.ip_bcast)) ? -+ ep_multicast_message(eip_tx->xmtr, -1, -1, NULL, EIP_SVC_EP(svc), attr | EP_NOT_MYSELF, eip_txhandler, tmd, payload, nmd, 1) : -+ -+ ep_transmit_message(eip_tx->xmtr, ntohs(eiph->h_dhost.ip_addr), EIP_SVC_EP(svc), attr, eip_txhandler, tmd, payload, nmd, 1); -+ -+ if (likely(svc == EP_SUCCESS)) -+ return 0; -+ else if (svc == ENOMEM) { -+ EIP_ERR_PRINTF("%s", "Memory allocation error ...\n"); -+ eip_tx->errors++; -+ } -+ else -+ { -+ /* EP_EINVAL occurs when the svc has a bad value or the iovec has too many frag; */ -+ /* we don't use the latter option here */ -+ __EIP_DBG_PRINTF("TMD [%p] : DROPPED skb[%p] status = %d from ep_?_message\n", tmd, tmd->skb, svc); -+ -+ eip_tx->dropped++; -+ } -+ -+ eip_txhandler(NULL, tmd, -99); -+ -+ /* Quadrics GNAT sw-elan/4397 - since we will "never" be able to send this packet to the */ -+ /* destination node, we drop it and feign success - this has the same behaviour as an */ -+ /* ethernet where it sticks the packet on the wire, but no-one receives it. */ -+ return 0; -+} -+ -+static void eip_txhandler(EP_TXD * txd, void *arg, EP_STATUS status) -+{ -+ EIP_TMD *tmd = (EIP_TMD *) arg; -+ struct sk_buff *skb_next; -+ unsigned long flags; -+ int svc = 0; -+ -+ if (likely(status == EP_SUCCESS)) { -+ SIZE_TO_SVC(tmd->dma_len, svc); -+ eip_tx->dma[svc]++; -+ eip_tx->bytes += tmd->dma_len; -+ -+ if (tmd->head == &eip_tx->head[EIP_TMD_AGGREG]) { -+ EIP_IPFRAG *ipf = (EIP_IPFRAG *) tmd; -+ eip_tx->packets += ipf->frag_nr; -+ } else -+ eip_tx->packets++; -+ } else { -+ if (tmd->head == &eip_tx->head[EIP_TMD_AGGREG]) { -+ EIP_IPFRAG *ipf = (EIP_IPFRAG *) tmd; -+ eip_tx->dropped += ipf->frag_nr; -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "txhandler aggreg packet dropped status = %d\n", status); -+ } else { -+ eip_tx->dropped++; -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "txhandler packet dropped status = %d\n", status); -+ } -+ } -+ -+ if (tmd->head == &eip_tx->head[EIP_TMD_STD]) { -+ eip_tmd_unload(tmd); -+ tmd->dma_base = 0; -+ tmd->nmd.nmd_len = -1; -+ } -+ -+ tmd->dma_len = -1; -+ -+ svc = 0; -+ while (tmd->skb) { -+ svc++; -+ -+ if (tmd->skb->destructor) -+ atomic_dec(&eip_tx->destructor); -+ -+ skb_next = tmd->skb->next; -+ dev_kfree_skb_any(tmd->skb); -+ tmd->skb = skb_next; -+ } -+ EIP_DBG_PRINTF(EIP_DBG_TMD, "IPF/TMD [%p] : %d skb RELEASE/FREED\n", tmd, svc); -+ -+ /* remove from inuse list */ -+ spin_lock_irqsave (&eip_tx->lock, flags); -+ list_del (&tmd->chain.link); -+ spin_unlock_irqrestore (&eip_tx->lock, flags); -+ -+ eip_tmd_put(tmd); -+} -+ -+static void eip_tx_tasklet(unsigned long arg) -+{ -+ struct timeval now; -+ unsigned long flags; -+ EIP_IPFRAG *ipf, *ipfq = NULL; -+ EP_NMD nmd; -+ struct list_head *list; -+ struct list_head *tmp; -+ char resched = 0; -+ char poll = 1; -+ -+ do_gettimeofday(&now); -+ -+ spin_lock_irqsave(&eip_tx->ipfraglock, flags); -+ if (eip_tx->ipfrag_count) { -+ list_for_each_safe(list, tmp, &eip_tx->ipfrag) { -+ ipf = list_entry(list, EIP_IPFRAG, list); -+ /* delta = (((now.tv_sec - ipf->timestamp.tv_sec) * 1000000UL) + now.tv_usec) - ipf->timestamp.tv_usec; */ -+ if (((((now.tv_sec - ipf->timestamp.tv_sec) * 1000000UL) + now.tv_usec) - -+ ipf->timestamp.tv_usec) >= (1000UL * eip_tx->sysctl_ipfrag_to)) { -+ list_del(&ipf->list); -+ eip_tx->ipfrag_count--; -+ ipf->chain.next = (EIP_TMD *) ipfq; -+ ipfq = ipf; -+ } -+ } -+ } -+ if (eip_tx->ipfrag_count) -+ resched = 1; -+ spin_unlock_irqrestore(&eip_tx->ipfraglock, flags); -+ -+ while (ipfq) { -+ poll = 0; -+ -+ ep_nmd_subset(&nmd, &ipfq->nmd, 0, ipfq->dma_len); -+ -+ ipfq->payload.Data[ipfq->frag_nr] = 0; -+ -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_aggreg++; -+#endif -+ ipf = (EIP_IPFRAG *) ipfq->chain.next; -+ eip_do_xmit((EIP_TMD *) ipfq, &nmd, &ipfq->payload); -+ ipfq = ipf; -+ } -+ -+ if (poll) -+ ep_poll_transmits(eip_tx->xmtr); -+ -+ if (atomic_read(&eip_tx->destructor) || resched ) -+ tasklet_schedule(&eip_tx->tasklet); -+} -+void eip_start_queue() -+{ -+ if (netif_queue_stopped(eip_tx->net_device)) { -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Waking up %s queue\n", eip_tx->net_device->name); -+ netif_wake_queue(eip_tx->net_device); -+ } -+} -+void eip_stop_queue() -+{ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Stopping %s queue\n", eip_tx->net_device->name); -+ netif_stop_queue(eip_tx->net_device); -+} -+ -+static int eip_open(struct net_device *devnet) -+{ -+ if (devnet->flags & IFF_PROMISC) -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "%s entering in promiscuous mode\n", devnet->name); -+ -+ netif_start_queue(devnet); -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x up\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ return 0; -+} -+ -+static int eip_close(struct net_device *devnet) -+{ -+ if (devnet->flags & IFF_PROMISC) -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "%s leaving promiscuous mode\n", devnet->name); -+ -+ netif_stop_queue(devnet); -+ -+ eip_rx_tasklet(0); -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x down\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ return 0; -+} -+ -+static struct net_device_stats *eip_get_stats(struct net_device *devnet) -+{ -+ static struct net_device_stats stats; -+ -+ stats.rx_packets = eip_rx->packets; -+ stats.rx_bytes = eip_rx->bytes; -+ stats.rx_errors = eip_rx->errors; -+ stats.rx_dropped = eip_rx->dropped; -+ -+ stats.tx_packets = eip_tx->packets; -+ stats.tx_bytes = eip_tx->bytes; -+ stats.tx_errors = eip_tx->errors; -+ stats.tx_dropped = eip_tx->dropped; -+ return &stats; -+} -+ -+static int eip_change_mtu(struct net_device *devnet, int mtu) -+{ -+ if (mtu <= EIP_MTU_MAX) { -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "MTU size changed from %d to %d\n", devnet->mtu, mtu); -+ devnet->mtu = mtu; -+ } -+ return 0; -+} -+ -+#ifdef MODULE -+int eip_init(void) -+{ -+ struct net_device *devnet; -+ int errno = 0; -+ -+ eip_rx_dropping = 0; -+ eip_rx_tasklet_locked = 1; -+ -+ /* timer up but not started */ -+ init_timer (&eip_rx_tasklet_timer); -+ eip_rx_tasklet_timer.function = eip_rx_tasklet_resched; -+ eip_rx_tasklet_timer.data = (unsigned long) 0; -+ eip_rx_tasklet_timer.expires = lbolt + hz; -+ -+ devnet = alloc_etherdev(sizeof(EIP_RX) + sizeof(EIP_TX)); -+ if (!devnet) { -+ EIP_ERR_PRINTF("Unable to ALLOCATE etherdev structure\n"); -+ return -ENOMEM; -+ } -+ strcpy (devnet->name, "eip0"); -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Enabling aggregation code\n"); -+ devnet->change_mtu = eip_change_mtu; -+ devnet->mtu = EIP_MTU_MAX; -+ devnet->open = eip_open; -+ devnet->stop = eip_close; -+ devnet->hard_start_xmit = eip_hard_start_xmit; -+ devnet->get_stats = eip_get_stats; -+ -+ /* devnet->features |= (NETIF_F_DYNALLOC); */ -+ /* devnet->features = (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA); */ -+ /* devnet->features |= (NETIF_F_SG|NETIF_F_FRAGLIST|NETIF_F_HIGHDMA|NETIF_F_HW_CSUM); */ -+ -+ eip_rx = (EIP_RX *) devnet->priv; -+ eip_tx = (EIP_TX *) (eip_rx + 1); -+ -+ /* instance 0 */ -+ eip_tx->ep_system = ep_system(); -+ if (eip_tx->ep_system == NULL) { -+ EIP_ERR_PRINTF("kernel comms for iface %s does not exist\n", devnet->name); -+ errno = -ENXIO; -+ goto out; -+ } -+ if (ep_waitfor_nodeid(eip_tx->ep_system) == ELAN_INVALID_NODE) { -+ EIP_ERR_PRINTF("network position not found\n"); -+ errno = -EAGAIN; -+ goto out; -+ } -+ eip_tx->xmtr = ep_alloc_xmtr(eip_tx->ep_system); -+ if (!eip_tx->xmtr) { -+ EIP_ERR_PRINTF("Cannot create allocated transmitter - maybe cable is disconnected\n"); -+ errno = -EAGAIN; -+ goto out; -+ } -+ /* assign MAC address */ -+ *((int *) &devnet->dev_addr[4]) = htons(ep_nodeid(eip_tx->ep_system)); -+ eip_rx->net_device = devnet; -+ eip_tx->net_device = devnet; -+ -+ atomic_set(&eip_tx->destructor, 0); -+ -+ if ((tmd_max >= EIP_TMD_MIN_NR) && (tmd_max <= EIP_TMD_MAX_NR)) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting tmd_max_nr to %d\n", tmd_max); -+ eip_tx->tmd_max_nr = tmd_max; -+ } else { -+ EIP_ERR_PRINTF("parameter error : %d <= tmd_max(%d) <= %d using default %d\n", -+ EIP_TMD_MIN_NR, tmd_max, EIP_TMD_MAX_NR, EIP_TMD_MAX_NR); -+ eip_tx->tmd_max_nr = EIP_TMD_MAX_NR; -+ } -+ -+ if ((rmd_max >= EIP_RMD_MIN_NR) && (rmd_max <= EIP_RMD_MAX_NR)) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting rmd_max_nr to %d\n", rmd_max); -+ eip_rx->rmd_max_nr = rmd_max; -+ } else { -+ EIP_ERR_PRINTF("parameter error : %d <= rmd_max(%d) <= %d using default %d\n", EIP_RMD_MIN_NR, -+ rmd_max, EIP_RMD_MAX_NR, EIP_RMD_MAX_NR); -+ eip_rx->rmd_max_nr = EIP_RMD_MAX_NR; -+ } -+ -+ if ((rx_envelope_nr > 0) && (rx_envelope_nr <= 1024)) { /* > 1024 don't be silly */ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Setting rx_envelope_nr to %d\n", rx_envelope_nr); -+ } else { -+ EIP_ERR_PRINTF("parameter error : 0 < rx_envelope_nr(%d) <= 1024 using default %d\n", -+ rx_envelope_nr, EIP_RX_ENVELOPE_NR); -+ rx_envelope_nr = EIP_RX_ENVELOPE_NR; -+ } -+ -+ if (tx_copybreak_max <= EIP_TX_COPYBREAK_MAX) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Setting tx_copybreak_max to %d\n", tx_copybreak_max); -+ } else { -+ EIP_ERR_PRINTF("parameter error : tx_copybreak_max > %d using default %d\n", -+ EIP_TX_COPYBREAK_MAX, EIP_TX_COPYBREAK_MAX); -+ tx_copybreak_max = EIP_TX_COPYBREAK_MAX; -+ } -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak = 0; -+ eip_tx->sent_std = 0; -+ eip_tx->sent_aggreg = 0; -+#endif -+ -+ eip_tx->ipfrag_count = 0; -+ eip_aggregation_set(1); -+ eip_rx_granularity_set(rx_granularity); -+ eip_tx_copybreak_set(EIP_TX_COPYBREAK); -+ eip_ipfrag_to_set(EIP_IPFRAG_TO); -+ eip_ipfrag_copybreak_set(EIP_IPFRAG_COPYBREAK); -+ -+ spin_lock_init(&eip_tx->lock); -+ spin_lock_init(&eip_tx->ipfraglock); -+ spin_lock_init(&eip_rx->lock); -+ tasklet_init(&eip_rx->tasklet, eip_rx_tasklet, 0); -+ tasklet_init(&eip_tx->tasklet, eip_tx_tasklet, 0); -+ INIT_LIST_HEAD(&eip_tx->ipfrag); -+ INIT_LIST_HEAD(&eip_tx->inuse); -+ -+ /* if we fail here cannot do much yet; waiting for rcvr remove code in ep. */ -+ errno = eip_tmds_alloc(); -+ if (errno) -+ goto out; -+ -+ errno = eip_rmds_alloc(); -+ if (errno) -+ goto out; -+ -+ errno = eip_stats_init(); -+ if (errno) -+ goto out; -+ -+ if (ep_svc_indicator_set(eip_tx->ep_system, EP_SVC_EIP) != EP_SUCCESS) { -+ EIP_ERR_PRINTF("Cannot set the service indicator\n"); -+ errno = -EINVAL; -+ goto out; -+ } -+ -+ eip_rx_tasklet_locked = 0; -+ tasklet_schedule(&eip_rx->tasklet); -+ -+ SET_MODULE_OWNER(eip_tx->net_device); -+ -+ if (register_netdev(devnet)) { -+ printk("eip: failed to register netdev\n"); -+ goto out; -+ } -+ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "iface %s MAC %02x:%02x:%02x:%02x:%02x:%02x ready\n", -+ devnet->name, (devnet->dev_addr[0]) & 0xff, -+ (devnet->dev_addr[1]) & 0xff, (devnet->dev_addr[2]) & 0xff, (devnet->dev_addr[3]) & 0xff, -+ (devnet->dev_addr[4]) & 0xff, (devnet->dev_addr[5]) & 0xff); -+ -+ return 0; -+ out: -+ unregister_netdev(devnet); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 25) -+ kfree(devnet); -+#else -+ free_netdev(devnet); -+#endif -+ -+ return errno; -+} -+void eip_exit(void) -+{ -+ int i; -+ -+ eip_rx_dropping = 1; /* means that new messages wont be sent to tcp stack */ -+ eip_rx_tasklet_locked = 1; -+ -+ netif_stop_queue(eip_tx->net_device); -+ -+ if (ep_svc_indicator_clear(eip_tx->ep_system, EP_SVC_EIP) != EP_SUCCESS) { -+ EIP_ERR_PRINTF("Cannot unset the service indicator\n"); -+ } -+ -+ schedule_timeout(10); -+ -+ del_timer_sync (&eip_rx_tasklet_timer); -+ -+ tasklet_disable(&eip_rx->tasklet); -+ tasklet_disable(&eip_tx->tasklet); -+ -+ tasklet_kill(&eip_tx->tasklet); -+ tasklet_kill(&eip_rx->tasklet); -+ -+ eip_rmds_free(); -+ eip_tmds_free(); -+ -+ /* that things freed */ -+ for (i = 0 ; i < EIP_SVC_NR ; i++) { -+ if ( EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats) != 0 ) -+ EIP_ERR_PRINTF("%d RMDs not FREED on SVC[%d]\n", EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), i); -+ } -+ for (i = 0 ; i < 3 ; i++) { -+ if ( EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats) != 0 ) -+ EIP_ERR_PRINTF("%d TMDs not freed on TX HEAD[%d]\n", EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), i); -+ -+ } -+ unregister_netdev(eip_tx->net_device); -+ kfree(eip_tx->net_device); -+ -+ eip_stats_cleanup(); -+} -+ -+module_init(eip_init); -+module_exit(eip_exit); -+ -+MODULE_PARM(eipdebug, "i"); -+MODULE_PARM_DESC(eipdebug, "Set debug flags"); -+ -+MODULE_PARM(rx_envelope_nr, "i"); -+MODULE_PARM_DESC(rx_enveloppe_nr, "Number of allocated enveloppe on the rx side"); -+ -+MODULE_PARM(tx_copybreak_max, "i"); -+MODULE_PARM_DESC(tx_copybreak_max, "Maximum size of the tx copybreak limit (default 512)"); -+ -+MODULE_PARM(tmd_max, "i"); -+MODULE_PARM(rmd_max, "i"); -+MODULE_PARM_DESC(tmd_max, "Maximun number of transmit buffers (default 64)"); -+MODULE_PARM_DESC(rmd_max, "Maximun number of receive buffers (default 64)"); -+ -+MODULE_PARM(tx_railmask, "i"); -+MODULE_PARM_DESC(tx_railmask, "Mask of which rails transmits can be queued on"); -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan IP driver"); -+MODULE_LICENSE("GPL"); -+#endif /* MODULE */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/eip/eip_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_linux.h 2005-07-28 14:52:52.775689832 -0400 -@@ -0,0 +1,399 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: eip_linux.h,v 1.46.2.1 2004/10/01 10:49:38 mike Exp $" -+ -+#ifndef __EIP_LINUX_H -+#define __EIP_LINUX_H -+ -+#define EIP_WATERMARK (0xfab1e) -+ -+#define EIP_PAGES(s) (((s - 1) >> PAGE_SHIFT) + 1) -+#define EIP_DVMA_PAGES(s) ((s < PAGE_SIZE) ? EIP_PAGES(s) + 1 : EIP_PAGES(s)) -+ -+#define EIP_SVC_SMALLEST_LEN (1 << 9) /* 512 */ -+#define EIP_SVC_BIGGEST_LEN (1 << 16) /* 64k */ -+ -+#define EIP_SVC_SMALLEST (0) -+#define EIP_SVC_BIGGEST (7) -+ -+#define EIP_SVC_NR (8) -+#define EIP_SVC_EP(s) (s + EP_MSG_SVC_EIP512) -+ -+#define EIP_STAT_ALLOC_SHIFT (8) -+#define EIP_STAT_ALLOC_GET(atomicp) ((int) atomic_read(atomicp) >> EIP_STAT_ALLOC_SHIFT) -+#define EIP_STAT_ALLOC_ADD(atomicp, v) (atomic_add((v << EIP_STAT_ALLOC_SHIFT), atomicp)) -+#define EIP_STAT_ALLOC_SUB(atomicp, v) (atomic_sub((v << EIP_STAT_ALLOC_SHIFT), atomicp)) -+ -+#define EIP_STAT_QUEUED_MASK (0xff) -+#define EIP_STAT_QUEUED_GET(atomicp) ((int) atomic_read(atomicp) & EIP_STAT_QUEUED_MASK) -+ -+#define EIP_RMD_NR (8) -+#define EIP_RMD_MIN_NR (8) -+#define EIP_RMD_MAX_NR (64) /* should be < than (1 << EIP_STAT_ALLOC_SHIFT) */ -+ -+#define EIP_RMD_ALLOC_STEP (8) -+#define EIP_RMD_ALLOC_THRESH (16) -+ -+#define EIP_RMD_ALLOC (1) -+#define EIP_RMD_REPLACE (0) -+ -+#define EIP_TMD_NR (64) -+#define EIP_TMD_MIN_NR (16) -+#define EIP_TMD_MAX_NR (64) /* should be < than (1 << EIP_STAT_ALLOC_SHIFT) */ -+ -+#define EIP_TMD_TYPE_NR (3) -+#define EIP_TMD_COPYBREAK (0x0) -+#define EIP_TMD_STD (0x1) -+#define EIP_TMD_AGGREG (0x2) -+ -+#define EIP_TX_COPYBREAK (512) -+#define EIP_TX_COPYBREAK_MAX (1024) -+ -+#define EIP_IPFRAG_TO (50) /* time out before a frag is sent in msec */ -+#define EIP_IPFRAG_COPYBREAK (EIP_SVC_BIGGEST_LEN - sizeof(EIP_IPFRAG) - EIP_HEADER_PAD) -+ -+#define EIP_RX_ENVELOPE_NR ((EIP_RMD_MAX_NR*EIP_SVC_NR)/2) -+#define EIP_RX_GRANULARITY (1) -+ -+#define EIP_IP_ALIGN(X) (((X) + (15)) & ~(15)) -+#define EIP_EXTRA roundup (sizeof(EIP_RMD), 256) -+#define EIP_RCV_DMA_LEN(s) (s - EIP_EXTRA - EIP_HEADER_PAD) -+#define EIP_MTU_MAX (EIP_RCV_DMA_LEN(EIP_SVC_BIGGEST_LEN) - (ETH_HLEN)) -+ -+#define SIZE_TO_SVC(s, svc) \ -+ do { \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 9))) {svc = 0;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 10))) {svc = 1;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 11))) {svc = 2;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 12))) {svc = 3;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 13))) {svc = 4;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 14))) {svc = 5;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 15))) {svc = 6;break;} \ -+ if (s <= EIP_RCV_DMA_LEN((1 << 16))) {svc = 7;break;} \ -+ svc = -666; \ -+ EIP_ASSERT(1 == 0); \ -+ } while (0) -+ -+extern int eipdebug; -+#define EIP_ASSERT_ON -+/* #define NO_DEBUG */ -+ -+ -+/* ######################## */ -+#ifdef NO_DEBUG -+#define __EIP_DBG_PRINTF(fmt, args...) -+#define EIP_DBG_PRINTF(flag, fmt, args...) -+#else -+ -+#define EIP_DBG_RMD 0x1 -+#define EIP_DBG_TMD 0x2 -+#define EIP_DBG_RMD_HEAD 0x4 -+#define EIP_DBG_TMD_HEAD 0x8 -+#define EIP_DBG_EIPH 0x10 -+#define EIP_DBG_IPH 0x20 -+#define EIP_DBG_RMD_EP_DVMA 0x40 -+#define EIP_DBG_TMD_EP_DVMA 0x80 -+#define EIP_DBG_EP_DVMA (EIP_DBG_RMD_EP_DVMA|EIP_DBG_TMD_EP_DVMA) -+#define EIP_DBG_MEMALLOC 0x100 -+#define EIP_DBG_MEMFREE 0x200 -+#define EIP_DBG_RMD_QUEUE 0x400 -+#define EIP_DBG_TMD_QUEUE 0x800 -+#define EIP_DBG_GEN 0x1000 -+#define EIP_DBG_DEBUG 0x2000 -+ -+#define __EIP_DBG_PRINTF(fmt, args...) (qsnet_debugf (QSNET_DEBUG_BUFFER, " CPU #%d %s: " fmt, smp_processor_id(), __func__, ## args)) -+#define EIP_DBG_PRINTF(flag, fmt, args...) (unlikely(eipdebug & flag) ? __EIP_DBG_PRINTF(fmt, ## args):(void)0) -+ -+#define __EIP_DBG_PRINTK(fmt, args...) (qsnet_debugf (QSNET_DEBUG_BUF_CON, " CPU #%d %s: " fmt, smp_processor_id(), __func__, ## args)) -+#define EIP_DBG_PRINTK(flag, fmt, args...) (unlikely(eipdebug & flag) ? __EIP_DBG_PRINTF(fmt, ## args):(void)0) -+ -+#define EIP_ERR_PRINTF(fmt, args...) __EIP_DBG_PRINTK("!!! ERROR !!! - " fmt, ## args) -+ -+ -+#define EIP_DBG2(flag, fn, fn_arg, fmt, args...) \ -+ if (unlikely(eipdebug & flag)) { \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "+CPU #%d %s: " fmt, smp_processor_id(), __func__, ##args); \ -+ (void)(fn)(fn_arg); \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "-CPU #%d %s: " fmt, smp_processor_id(), __func__, ##args); \ -+ } -+ -+ -+#define EIP_DBG(flag, fn, args...) \ -+ if (unlikely(eipdebug & flag)) { \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "+CPU #%d %s\n", smp_processor_id(), __func__); \ -+ (void)(fn)(args); \ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "-CPU #%d %s :\n", smp_processor_id(), __func__); \ -+ } -+#endif /* NO_DEBUG */ -+ -+ -+#ifdef EIP_ASSERT_ON -+ -+#define __EIP_ASSERT_PRINT(exp) \ -+ eipdebug = 0xffff; \ -+ EIP_ERR_PRINTF("ASSERT : %s, %s::%d\n", \ -+ #exp, __BASE_FILE__, __LINE__); -+ -+#define EIP_ASSERT(exp) \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ netif_stop_queue(eip_tx->net_device); \ -+ } -+ -+#define EIP_ASSERT2(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_BUG(exp) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ BUG(); \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_GOTO(exp, label, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ goto label; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RET(exp, ret) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ return ret; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RETURN(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ return; \ -+ } \ -+ } while (0) -+ -+#define EIP_ASSERT_RETNULL(exp, f, arg) \ -+ do { \ -+ if (!(exp)) { \ -+ __EIP_ASSERT_PRINT(exp); \ -+ f(arg); \ -+ return NULL; \ -+ } \ -+ } while (0) -+ -+#else -+ -+#define EIP_ASSERT(exp) do {} while(0) -+#define EIP_ASSERT_OUT(exp) do {} while(0) -+#define EIP_ASSERT_RETURN(exp) do {} while(0) -+#define EIP_ASSERT_RETNULL(exp) do {} while(0) -+#define EIP_ASSERT_BUG(exp) do {} while(0) -+ -+#endif /* EIP_ASSERT */ -+ -+ -+ -+typedef struct { -+ u_short ip_bcast; -+ u_short ip_inst; -+ u_short ip_addr; -+} EIP_ADDRESS; -+ -+typedef struct { -+ EIP_ADDRESS h_dhost; -+ EIP_ADDRESS h_shost; -+ u_short h_sap; -+} EIP_HEADER; -+#define EIP_HEADER_PAD (2) -+ -+typedef struct eip_proc_fs { -+ const char *name; -+ struct proc_dir_entry **parent; -+ read_proc_t *read; -+ write_proc_t *write; -+ unsigned char allocated; -+ struct proc_dir_entry *entry; -+} EIP_PROC_FS; -+ -+#define EIP_PROC_ROOT_DIR "eip" -+ -+#define EIP_PROC_DEBUG_DIR "debug" -+#define EIP_PROC_DEBUG_RX_FLUSH "rx_flush" -+#define EIP_PROC_DEBUG_TX_FLUSH "tx_flush" -+ -+#define EIP_PROC_AGGREG_DIR "aggregation" -+#define EIP_PROC_AGGREG_ONOFF "enable" -+#define EIP_PROC_AGGREG_TO "timeout" -+#define EIP_PROC_AGGREG_COPYBREAK "copybreak" -+ -+#define EIP_PROC_TX_COPYBREAK "tx_copybreak" -+#define EIP_PROC_STATS "stats" -+#define EIP_PROC_RX_GRAN "rx_granularity" -+#define EIP_PROC_TX_RAILMASK "tx_railmask" -+#define EIP_PROC_TMD_INUSE "tmd_inuse" -+#define EIP_PROC_EIPDEBUG "eipdebug" -+#define EIP_PROC_CHECKSUM "checksum" -+ -+/* RX */ -+/* dma_len is used to keep the len of a received packet */ -+/* nmd.nmd_len is the max dma that can be received */ -+/* */ -+struct eip_rmd { -+ struct sk_buff *skb; -+ -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ EP_RXD *rxd; -+ struct eip_rmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_rmd *next; /* all other lists */ -+ } chain; -+}; -+typedef struct eip_rmd EIP_RMD; -+struct eip_rmd_head { -+ EP_NMH *handle; -+ -+ EP_RCVR *rcvr; -+ EIP_RMD *busy_list; -+ -+ /* stats */ -+ atomic_t stats; -+ unsigned long dma; -+}; -+ -+typedef struct eip_rmd_head EIP_RMD_HEAD; -+typedef struct eip_rx { -+ struct eip_rmd_head head[EIP_SVC_NR]; -+ -+ EIP_RMD *irq_list; -+ short irq_list_nr; -+ -+ /* stats */ -+ unsigned long packets; -+ unsigned long bytes; -+ unsigned long errors; -+ unsigned long dropped; -+ unsigned long reschedule; -+ -+ spinlock_t lock; -+ struct tasklet_struct tasklet; -+ unsigned char rmd_max_nr; -+ unsigned char sysctl_granularity; -+ struct net_device *net_device; -+} EIP_RX; -+ -+/* TX */ -+/* dma_len_max is the maximum len for a given DMA */ -+/* where mnd.nmd_len is the len of the packet to send ~> than skb->len */ -+typedef struct eip_ipfrag_handle { -+ /* common with tmd */ -+ unsigned long dma_base; -+ int dma_len; -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ struct sk_buff *skb; -+ struct eip_tmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_tmd *next; /* all other lists */ -+ } chain; -+ -+ /* private */ -+ struct list_head list; -+ struct timeval timestamp; -+ unsigned int frag_nr; -+ int datagram_len; /* Ip data */ -+ int dma_correction; -+ EP_PAYLOAD payload; -+} EIP_IPFRAG; -+ -+struct eip_tmd { -+ unsigned long dma_base; -+ int dma_len; -+ EP_NMD nmd; -+ u16 dvma_idx; -+ -+ struct sk_buff *skb; -+ struct eip_tmd_head *head; -+ union { -+ struct list_head link; /* when on "busy" list */ -+ struct eip_tmd *next; /* all other lists */ -+ } chain; -+}; -+ -+struct eip_tmd_head { -+ EP_NMH *handle; -+ -+ struct eip_tmd *tmd; -+ atomic_t stats; -+}; -+ -+typedef struct eip_tmd EIP_TMD; -+typedef struct eip_tmd_head EIP_TMD_HEAD; -+ -+/* #define EIP_MORE_STATS */ -+ -+typedef struct eip_tx { -+ struct net_device *net_device; -+ EP_XMTR *xmtr; -+ EP_SYS *ep_system; -+ -+ struct eip_tmd_head head[EIP_TMD_TYPE_NR]; -+ struct list_head inuse; -+ atomic_t destructor; -+ -+ /* stats */ -+ unsigned long packets; -+ unsigned long bytes; -+ unsigned long errors; -+ unsigned long dropped; -+ unsigned long dma[EIP_SVC_NR]; -+ -+#ifdef EIP_MORE_STATS -+ unsigned long sent_copybreak; -+ unsigned long sent_std; -+ unsigned long sent_aggreg; -+#endif -+ -+ unsigned char tmd_max_nr; -+ -+ unsigned short sysctl_copybreak; -+ unsigned short sysctl_ipfrag_to; -+ unsigned short sysctl_ipfrag_copybreak; -+ unsigned short sysctl_aggregation; -+ -+ unsigned short ipfrag_count; -+ struct list_head ipfrag; -+ spinlock_t ipfraglock; -+ -+ spinlock_t lock; -+ struct tasklet_struct tasklet; -+} EIP_TX; -+ -+/* =============================================== */ -+ /* unsigned long multicast; */ -+#endif /* __EIP_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_stats.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/eip/eip_stats.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_stats.c 2005-07-28 14:52:52.776689680 -0400 -@@ -0,0 +1,375 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * $Id: eip_stats.c,v 1.34.2.2 2005/03/20 12:01:22 david Exp $ -+ * $Source: /cvs/master/quadrics/eipmod/eip_stats.c,v $ -+ */ -+ -+#include -+#include -+ -+#include -+ -+#include -+ -+#include -+#include -+ -+#include -+ -+#include -+ -+#include "eip_linux.h" -+#include "eip_stats.h" -+ -+extern EIP_RX *eip_rx; -+extern EIP_TX *eip_tx; -+extern int tx_copybreak_max; -+extern EP_RAILMASK tx_railmask; -+extern int eip_checksum_state; -+extern void eip_stop_queue(void); -+extern void eip_start_queue(void); -+ -+static int eip_stats_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int i, outlen = 0; -+ -+ *buf = '\0'; -+ strcat(buf, "\n"); -+ strcat(buf, "--------------------------------------------+------------+-----------------+\n"); -+ strcat(buf, " SKB/DMA | | Rx | Tx | TMD TYPE |\n"); -+ strcat(buf, "--------------------------------------------+------------|-----------------+\n"); -+ -+ i = 0; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #1[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #2[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld | #3[%3.3d/%3.3d/%3.3d] |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i], -+ EIP_STAT_QUEUED_GET(&eip_tx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_tx->head[i].stats), -+ eip_tx->tmd_max_nr); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld +-----------------+\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ i++; -+ sprintf(buf + strlen(buf), " [%5d/%5d] | [%3.3d/%3.3d/%3.3d] | %10ld | %10ld |\n", -+ EIP_SVC_SMALLEST_LEN << i, (int) EIP_RCV_DMA_LEN((EIP_SVC_SMALLEST_LEN << i)), -+ EIP_STAT_QUEUED_GET(&eip_rx->head[i].stats), EIP_STAT_ALLOC_GET(&eip_rx->head[i].stats), -+ eip_rx->rmd_max_nr, eip_rx->head[i].dma, eip_tx->dma[i]); -+ -+ strcat(buf, "--------------------------------------------+------------+\n"); -+ sprintf(buf + strlen(buf), " RMD IRQ %4.4d %10lu | %10lu |\n", -+ eip_rx->irq_list_nr, -+ eip_rx->packets, eip_tx->packets); -+ strcat(buf, "--------------------------------------------+------------+\n"); -+ -+#ifdef EIP_MORE_STATS -+ strcat(buf, "\n"); -+ sprintf(buf + strlen(buf), " Copybreak %10ld Std %10ld Aggreg %10ld\n", -+ eip_tx->sent_copybreak, eip_tx->sent_std, eip_tx->sent_aggreg); -+#endif -+ -+ -+ strcat(buf, "\n"); -+ sprintf(buf + strlen(buf), "Rx bytes: %lu (%lu Mb) errors: %lu dropped: %lu reschedule: %lu\n", -+ eip_rx->bytes, eip_rx->bytes / (1024 * 1024), eip_rx->errors, eip_rx->dropped, eip_rx->reschedule); -+ sprintf(buf + strlen(buf), "Tx bytes: %lu (%lu Mb) errors: %lu dropped: %lu\n", -+ eip_tx->bytes, eip_tx->bytes / (1024 * 1024), eip_tx->errors, eip_tx->dropped); -+ strcat(buf, "\n"); -+ -+ outlen = strlen(buf); -+ ASSERT(outlen < PAGE_SIZE); -+ *eof = 1; -+ return outlen; -+} -+ -+void eip_stats_dump(void) -+{ -+ int eof; -+ -+ char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ -+ if (buf == NULL) -+ { -+ printk("no memory to produce eip_stats\n"); -+ return; -+ } -+ -+ eip_stats_read(buf, NULL, 0, 0, &eof, NULL); -+ -+ printk(buf); -+ -+ kfree(buf); -+} -+ -+static int eip_stats_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&eip_rx->lock, flags); -+ eip_rx->packets = 0; -+ eip_rx->bytes = 0; -+ eip_rx->errors = 0; -+ eip_rx->dropped = 0; -+ eip_rx->reschedule = 0; -+ for (i = 0; i < EIP_SVC_NR; eip_rx->head[i].dma = 0, i++); -+ spin_unlock_irqrestore(&eip_rx->lock, flags); -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ eip_tx->packets = 0; -+ eip_tx->bytes = 0; -+ eip_tx->errors = 0; -+ eip_tx->dropped = 0; -+#ifdef EIP_MORE_STATS -+ eip_tx->sent_copybreak = 0; -+ eip_tx->sent_std = 0; -+ eip_tx->sent_aggreg = 0; -+#endif -+ for (i = 0; i < EIP_SVC_NR; eip_tx->dma[i] = 0, i++); -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ -+ return count; -+} -+ -+#define eip_stats_var_write(name) \ -+static int eip_stats_##name##_write(struct file *file, const char *buf, unsigned long count, void *data) \ -+{ \ -+ char * b = (char *) buf; \ -+ *(b + count) = '\0'; \ -+ eip_##name##_set((int) simple_strtoul(b, NULL, 10)); \ -+ return count; \ -+} -+ -+#define eip_stats_var_read(name, var) \ -+static int eip_stats_##name##_read(char *buf, char **start, off_t off, int count, int *eof, void *data) \ -+{ \ -+ sprintf(buf, "%d\n", var); \ -+ *eof = 1; \ -+ return strlen(buf); \ -+} -+ -+ -+#define eip_stats_var_set(name, min, max, default, var) \ -+void eip_##name##_set(int i) \ -+{ \ -+ if ( (i >= min) && (i <= max)) { \ -+ EIP_DBG_PRINTK(EIP_DBG_GEN, "Setting " #name " to %d\n", i); \ -+ var =(unsigned short) i; \ -+ } \ -+ else { \ -+ EIP_ERR_PRINTF("parameter error : %d <= " #name "(%d) <= %d using default %d\n", min, i, (int) max, (int) default); \ -+ } \ -+} -+ -+eip_stats_var_set(tx_copybreak, 0, tx_copybreak_max, EIP_TX_COPYBREAK, eip_tx->sysctl_copybreak); -+eip_stats_var_set(rx_granularity, 1, EIP_RMD_MIN_NR, EIP_RX_GRANULARITY, eip_rx->sysctl_granularity); -+eip_stats_var_set(tx_railmask, 0, EP_RAILMASK_ALL, EP_RAILMASK_ALL, tx_railmask); -+eip_stats_var_set(ipfrag_to, 0, (1 << 16), EIP_IPFRAG_TO, eip_tx->sysctl_ipfrag_to); -+eip_stats_var_set(aggregation, 0, 1, 1, eip_tx->sysctl_aggregation); -+eip_stats_var_set(ipfrag_copybreak, 0, EIP_IPFRAG_COPYBREAK, EIP_IPFRAG_COPYBREAK, eip_tx->sysctl_ipfrag_copybreak); -+/* eip_stats_var_set(eipdebug, 0, , 0, eipdebug); */ -+ -+eip_stats_var_read(aggregation, eip_tx->sysctl_aggregation); -+eip_stats_var_read(ipfrag_count, eip_tx->ipfrag_count); -+eip_stats_var_read(ipfrag_to, eip_tx->sysctl_ipfrag_to); -+eip_stats_var_read(ipfrag_copybreak, eip_tx->sysctl_ipfrag_copybreak); -+eip_stats_var_read(tx_copybreak, eip_tx->sysctl_copybreak); -+eip_stats_var_read(rx_granularity, eip_rx->sysctl_granularity); -+eip_stats_var_read(tx_railmask, tx_railmask); -+ -+eip_stats_var_write(aggregation); -+eip_stats_var_write(ipfrag_to); -+eip_stats_var_write(ipfrag_copybreak); -+eip_stats_var_write(tx_copybreak); -+eip_stats_var_write(rx_granularity); -+eip_stats_var_write(tx_railmask); -+ -+ -+static int eip_checksum_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char * b = (char *) buf; -+ int value; -+ -+ *(b + count) = '\0'; -+ -+ value = (int) simple_strtoul(b, NULL, 10); -+ if ((value >= CHECKSUM_NONE) && (value <= CHECKSUM_UNNECESSARY)) -+ eip_checksum_state = value; -+ else -+ EIP_ERR_PRINTF("%d <= checksum(%d) <= %d using old value %d\n", CHECKSUM_NONE, value, CHECKSUM_UNNECESSARY, eip_checksum_state); -+ -+ return count; -+} -+ -+static int eip_checksum_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ switch ( eip_checksum_state ) -+ { -+ case 0 : sprintf(buf, "0 CHECKSUM_NONE\n"); break; -+ case 1 : sprintf(buf, "1 CHECKSUM_HW\n"); break; -+ case 2 : sprintf(buf, "2 CHECKSUM_UNNECESSARY\n"); break; -+ default : sprintf(buf, "%d INVALID VALUE\n", eip_checksum_state); break; -+ } -+ *eof = 1; -+ return strlen(buf); -+} -+ -+static int eip_stats_eipdebug_read(char *buf, char **start, off_t off, int count, int *eof, void *data) -+{ -+ *buf = '\0'; -+ sprintf(buf + strlen(buf), "0x%x\n", eipdebug); -+ *eof = 1; -+ return strlen(buf); -+} -+static int eip_stats_eipdebug_write(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char * p = (char *) buf; -+ *(p + count - 1) = '\0'; -+ eipdebug = simple_strtoul(p, NULL, 0); -+ __EIP_DBG_PRINTK("Setting eipdebug to 0x%x\n", eipdebug); -+ return count; -+} -+ -+static int eip_stats_tmd_inuse_read(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ struct list_head *lp; -+ unsigned long flags; -+ unsigned int len = 0; -+ -+ spin_lock_irqsave(&eip_tx->lock, flags); -+ list_for_each (lp, &eip_tx->inuse) { -+ EIP_TMD *tmd = list_entry (lp, EIP_TMD, chain.link); -+ EIP_HEADER *eiph = (EIP_HEADER *) tmd->dma_base; -+ -+ len += sprintf(page+len, "tmd=%p id=%d len=%d\n", -+ tmd, eiph ? ntohs(eiph->h_dhost.ip_addr) : -1, -+ tmd->dma_len); -+ -+ if (len + 40 >= count) -+ break; -+ } -+ spin_unlock_irqrestore(&eip_tx->lock, flags); -+ -+ return qsnet_proc_calc_metrics (page, start, off, count, eof, len); -+} -+ -+static int eip_stats_debug_rx_flush(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Flushing rx ...\n"); -+ tasklet_schedule(&eip_rx->tasklet); -+ return count; -+} -+static int eip_stats_debug_tx_flush(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Flushing tx ... %d tmds reclaimed\n", ep_enable_txcallbacks(eip_tx->xmtr)); -+ ep_disable_txcallbacks(eip_tx->xmtr); -+ tasklet_schedule(&eip_tx->tasklet); -+ return count; -+} -+ -+#define EIP_PROC_PARENT_NR (3) -+/* NOTE : the parents should be declared b4 the children */ -+static EIP_PROC_FS eip_procs[] = { -+ /* {name, parent, read fn, write fn, allocated, entry}, */ -+ {EIP_PROC_ROOT_DIR, &qsnet_procfs_root, NULL, NULL, 0, NULL}, -+ {EIP_PROC_DEBUG_DIR, &eip_procs[0].entry, NULL, NULL, 0, NULL}, -+ {EIP_PROC_AGGREG_DIR, &eip_procs[0].entry, NULL, NULL, 0, NULL}, /* end of parents */ -+ {EIP_PROC_STATS, &eip_procs[0].entry, eip_stats_read, eip_stats_write, 0, NULL}, -+ {EIP_PROC_TX_COPYBREAK, &eip_procs[0].entry, eip_stats_tx_copybreak_read, eip_stats_tx_copybreak_write, 0, NULL}, -+ {EIP_PROC_RX_GRAN, &eip_procs[0].entry, eip_stats_rx_granularity_read, eip_stats_rx_granularity_write, 0, NULL}, -+ {EIP_PROC_TX_RAILMASK, &eip_procs[0].entry, eip_stats_tx_railmask_read, eip_stats_tx_railmask_write, 0, NULL}, -+ {EIP_PROC_TMD_INUSE, &eip_procs[0].entry, eip_stats_tmd_inuse_read, NULL, 0, NULL}, -+ {EIP_PROC_EIPDEBUG, &eip_procs[0].entry, eip_stats_eipdebug_read, eip_stats_eipdebug_write, 0, NULL}, -+ {EIP_PROC_CHECKSUM, &eip_procs[0].entry, eip_checksum_read, eip_checksum_write, 0, NULL}, -+ {EIP_PROC_DEBUG_RX_FLUSH, &eip_procs[1].entry, NULL, eip_stats_debug_rx_flush, 0, NULL}, -+ {EIP_PROC_DEBUG_TX_FLUSH, &eip_procs[1].entry, NULL, eip_stats_debug_tx_flush, 0, NULL}, -+ {"ipfrag_count", &eip_procs[2].entry, eip_stats_ipfrag_count_read, NULL, 0, NULL}, -+ {EIP_PROC_AGGREG_TO, &eip_procs[2].entry, eip_stats_ipfrag_to_read, eip_stats_ipfrag_to_write, 0, NULL}, -+ {EIP_PROC_AGGREG_ONOFF, &eip_procs[2].entry, eip_stats_aggregation_read, eip_stats_aggregation_write, 0, NULL}, -+ {EIP_PROC_AGGREG_COPYBREAK, &eip_procs[2].entry, eip_stats_ipfrag_copybreak_read, eip_stats_ipfrag_copybreak_write, 0, NULL}, -+ {NULL, NULL, NULL, NULL, 1, NULL}, -+}; -+ -+int eip_stats_init(void) -+{ -+ int p; -+ -+ for (p = 0; !eip_procs[p].allocated; p++) { -+ if (p < EIP_PROC_PARENT_NR) -+ eip_procs[p].entry = proc_mkdir(eip_procs[p].name, *eip_procs[p].parent); -+ else -+ eip_procs[p].entry = create_proc_entry(eip_procs[p].name, 0, *eip_procs[p].parent); -+ -+ if (!eip_procs[p].entry) { -+ EIP_ERR_PRINTF("%s\n", "Cannot allocate proc entry"); -+ eip_stats_cleanup(); -+ return -ENOMEM; -+ } -+ -+ eip_procs[p].entry->owner = THIS_MODULE; -+ eip_procs[p].entry->write_proc = eip_procs[p].write; -+ eip_procs[p].entry->read_proc = eip_procs[p].read; -+ eip_procs[p].allocated = 1; -+ } -+ eip_procs[p].allocated = 0; -+ return 0; -+} -+ -+void eip_stats_cleanup(void) -+{ -+ int p; -+ for (p = (sizeof (eip_procs)/sizeof (eip_procs[0]))-1; p >= 0; p--) -+ if (eip_procs[p].allocated) { -+ EIP_DBG_PRINTF(EIP_DBG_GEN, "Removing %s from proc\n", eip_procs[p].name); -+ remove_proc_entry(eip_procs[p].name, *eip_procs[p].parent); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_stats.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/eip/eip_stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/eip/eip_stats.h 2005-07-28 14:52:52.776689680 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: eip_stats.h,v 1.14 2004/05/10 14:47:47 daniel Exp $" -+ -+#ifndef __EIP_STATS_H -+#define __EIP_STATS_H -+ -+int eip_stats_init(void); -+void eip_stats_cleanup(void); -+void eip_rx_granularity_set(int); -+void eip_tx_copybreak_set(int); -+void eip_ipfrag_to_set(int); -+void eip_aggregation_set(int); -+void eip_ipfrag_copybreak_set(int); -+void eip_stats_dump(void); -+ -+#endif /* __EIP_STATS_H */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/eip/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/eip/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/eip/Makefile 2005-07-28 14:52:52.776689680 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/eip/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_EIP) += eip.o -+eip-objs := eip_linux.o eip_stats.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/eip/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/eip/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/eip/Makefile.conf 2005-07-28 14:52:52.777689528 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = eip.o -+MODULENAME = eip -+KOBJFILES = eip_linux.o eip_stats.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_EIP -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/eip/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/eip/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/eip/quadrics_version.h 2005-07-28 14:52:52.777689528 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/bitmap.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/bitmap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/bitmap.c 2005-07-28 14:52:52.777689528 -0400 -@@ -0,0 +1,287 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: bitmap.c,v 1.5 2004/01/20 17:32:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/shared/bitmap.c,v $*/ -+ -+#if defined(__KERNEL__) -+#include -+#endif -+#include -+#include -+ -+/* -+ * Return the index of the first available bit in the -+ * bitmap , or -1 for failure -+ */ -+int -+bt_freebit (bitmap_t *bitmap, int nbits) -+{ -+ int last = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for a word with a bit off */ -+ for (i = 0; i <= last; i++) -+ if (bitmap[i] != ~((bitmap_t) 0)) -+ break; -+ -+ if (i <= last) -+ { -+ /* found an word with a bit off, now see which bit it is */ -+ maxbit = (i == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if ((bitmap[i] & (1 << j)) == 0) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ return (-1); -+ -+} -+ -+/* -+ * bt_lowbit: -+ * Return the index of the lowest set bit in the -+ * bitmap, or -1 for failure. -+ */ -+int -+bt_lowbit (bitmap_t *bitmap, int nbits) -+{ -+ int last = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for a word with a bit on */ -+ for (i = 0; i <= last; i++) -+ if (bitmap[i] != 0) -+ break; -+ if (i <= last) -+ { -+ /* found a word bit a bit on, now see which bit it is */ -+ maxbit = (i == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if (bitmap[i] & (1 << j)) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ -+ return (-1); -+} -+ -+/* -+ * Return the index of the first available bit in the -+ * bitmap , or -1 for failure -+ */ -+int -+bt_nextbit (bitmap_t *bitmap, int nbits, int last, int isset) -+{ -+ int first = ((last+1) + BT_NBIPUL-1) >> BT_ULSHIFT; -+ int end = (--nbits) >> BT_ULSHIFT; -+ int maxbit; -+ int i, j; -+ -+ /* look for bits before the first whole word */ -+ if (((last+1) & BT_ULMASK) != 0) -+ { -+ maxbit = ((first-1) == last) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = ((last+1) & BT_ULMASK); j <= maxbit; j++) -+ if ((bitmap[first-1] & (1 << j)) == (isset << j)) -+ return (((first-1) << BT_ULSHIFT) | j); -+ } -+ -+ /* look for a word with a bit off */ -+ for (i = first; i <= end; i++) -+ if (bitmap[i] != (isset ? 0 : ~((bitmap_t) 0))) -+ break; -+ -+ if (i <= end) -+ { -+ /* found an word with a bit off, now see which bit it is */ -+ maxbit = (i == end) ? (nbits & BT_ULMASK) : (BT_NBIPUL-1); -+ for (j = 0; j <= maxbit; j++) -+ if ((bitmap[i] & (1 << j)) == (isset << j)) -+ return ((i << BT_ULSHIFT) | j); -+ } -+ return (-1); -+} -+ -+void -+bt_copy (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ b[i] = a[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST(a, i)) -+ BT_SET(b,i); -+ else -+ BT_CLEAR(b,i); -+} -+ -+void -+bt_zero (bitmap_t *bitmap, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ bitmap[i] = 0; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ BT_CLEAR(bitmap,i); -+} -+ -+void -+bt_fill (bitmap_t *bitmap, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ bitmap[i] = ~((bitmap_t) 0); -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ BT_SET(bitmap,i); -+} -+ -+int -+bt_cmp (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ if (a[i] != b[i]) -+ return (1); -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (a, i) != BT_TEST(b, i)) -+ return (1); -+ return (0); -+} -+ -+void -+bt_intersect (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] &= b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (a, i) && BT_TEST (b, i)) -+ BT_SET (a, i); -+ else -+ BT_CLEAR (a, i); -+} -+ -+void -+bt_remove (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] &= ~b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST (b, i)) -+ BT_CLEAR (a, i); -+} -+ -+void -+bt_add (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < (nbits>>BT_ULSHIFT); i++) -+ a[i] |= b[i]; -+ -+ for (i <<= BT_ULSHIFT; i < nbits; i++) -+ if (BT_TEST(b, i)) -+ BT_SET (a, i); -+} -+ -+/* -+ * bt_spans : partition a spans partition b -+ * == all bits set in 'b' are set in 'a' -+ */ -+int -+bt_spans (bitmap_t *a, bitmap_t *b, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ if (BT_TEST (b, i) && !BT_TEST (a, i)) -+ return (0); -+ return (1); -+} -+ -+/* -+ * bt_subset: copy [base,base+nbits-1] from 'a' to 'b' -+ */ -+void -+bt_subset (bitmap_t *a, bitmap_t *b, int base, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (BT_TEST (a, base+i)) -+ BT_SET(b,i); -+ else -+ BT_CLEAR (b,i); -+ } -+} -+ -+void -+bt_up (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (!BT_TEST (a, i) && BT_TEST (b, i)) -+ { -+ BT_SET (c, i); -+ } -+ else -+ { -+ BT_CLEAR (c, i); -+ } -+ } -+} -+ -+void -+bt_down (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits) -+{ -+ int i; -+ -+ for (i = 0; i < nbits; i++) -+ { -+ if (BT_TEST (a, i) && !BT_TEST (b, i)) -+ { -+ BT_SET (c, i); -+ } -+ else -+ { -+ BT_CLEAR (c, i); -+ } -+ } -+} -+ -+int -+bt_nbits (bitmap_t *a, int nbits) -+{ -+ int i, c; -+ for (i = 0, c = 0; i < nbits; i++) -+ if (BT_TEST (a, i)) -+ c++; -+ return (c); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/capability.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/capability.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/capability.c 2005-07-28 14:52:52.779689224 -0400 -@@ -0,0 +1,628 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability.c,v 1.13 2004/07/20 10:15:33 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/capability.c,v $ */ -+ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_cap_list); -+ -+typedef struct elan_vp_struct -+{ -+ struct list_head list; -+ ELAN_CAPABILITY vp; -+} ELAN_VP_NODE_STRUCT; -+ -+ -+typedef struct elan_attached_struct -+{ -+ void *cb_args; -+ ELAN_DESTROY_CB cb_func; -+} ELAN_ATTACHED_STRUCT; -+ -+typedef struct elan_cap_node_struct -+{ -+ struct list_head list; -+ ELAN_CAP_STRUCT node; -+ ELAN_ATTACHED_STRUCT *attached[ELAN_MAX_RAILS]; -+ struct list_head vp_list; -+} ELAN_CAP_NODE_STRUCT; -+ -+ -+ELAN_CAP_NODE_STRUCT * -+find_cap_node(ELAN_CAPABILITY *cap) -+{ -+ struct list_head *tmp; -+ ELAN_CAP_NODE_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ /* is it an exact match */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) -+ && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap)) { -+ return ptr; -+ } -+ } -+ return ptr; -+}; -+ -+ELAN_VP_NODE_STRUCT * -+find_vp_node( ELAN_CAP_NODE_STRUCT *cap_node,ELAN_CAPABILITY *map) -+{ -+ struct list_head * tmp; -+ ELAN_VP_NODE_STRUCT * ptr = NULL; -+ -+ list_for_each(tmp, &cap_node->vp_list) { -+ ptr = list_entry(tmp, ELAN_VP_NODE_STRUCT , list); -+ /* is it an exact match */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->vp,map) -+ && ELAN_CAP_GEOM_MATCH(&ptr->vp,map)){ -+ return ptr; -+ } -+ } -+ return ptr; -+} -+ -+int -+elan_validate_cap(ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* check versions */ -+ if (cap->cap_version != ELAN_CAP_VERSION_NUMBER) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->Version != ELAN_CAP_VERSION) %d %d\n", cap->cap_version, ELAN_CAP_VERSION_NUMBER); -+ return (EINVAL); -+ } -+ -+ /* check its not HWTEST */ -+ if ( cap->cap_type & ELAN_CAP_TYPE_HWTEST ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_cap: failed type = ELAN_CAP_TYPE_HWTEST \n"); -+ return (EINVAL); -+ } -+ -+ /* check its type */ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_KERNEL : -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_cap: failed type = ELAN_CAP_TYPE_KERNEL \n"); -+ return (EINVAL); -+ -+ /* check it has a valid type */ -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ break; -+ -+ /* all others are failed as well */ -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap: failed unknown type = %x \n", (cap->cap_type & ELAN_CAP_TYPE_MASK)); -+ return (EINVAL); -+ } -+ -+ if ((cap->cap_lowcontext == ELAN_CAP_UNINITIALISED) || (cap->cap_highcontext == ELAN_CAP_UNINITIALISED) -+ || (cap->cap_lownode == ELAN_CAP_UNINITIALISED) || (cap->cap_highnode == ELAN_CAP_UNINITIALISED)) -+ { -+ -+ ELAN_DEBUG4 (ELAN_DBG_VP,"elan_validate_cap: ELAN_CAP_UNINITIALISED LowNode %d HighNode %d LowContext %d highContext %d\n", -+ cap->cap_lownode , cap->cap_highnode, -+ cap->cap_lowcontext , cap->cap_highcontext); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_lowcontext > cap->cap_highcontext) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->cap_lowcontext > cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_lownode > cap->cap_highnode) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP,"elan_validate_cap: (cap->cap_lownode > cap->cap_highnode) %d %d\n",cap->cap_lownode, cap->cap_highnode); -+ return (EINVAL); -+ } -+ -+ if (cap->cap_mycontext != ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_cap: failed cap->cap_mycontext is set %d \n", cap->cap_mycontext); -+ return (EINVAL); -+ } -+ -+ -+ if ((ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap)) > ELAN_MAX_VPS) -+ { -+ ELAN_DEBUG6 (ELAN_DBG_VP,"elan_validate_cap: too many vps LowNode %d HighNode %d LowContext %d highContext %d, %d >% d\n", -+ cap->cap_lownode , cap->cap_highnode, -+ cap->cap_lowcontext , cap->cap_highcontext, -+ (ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap)), -+ ELAN_MAX_VPS); -+ -+ return (EINVAL); -+ } -+ -+ return (ESUCCESS); -+} -+ -+int -+elan_validate_map(ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vptr = NULL; -+ char space[256]; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map \n"); -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_map cap = %s \n",elan_capability_string(cap,space)); -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_validate_map map = %s \n",elan_capability_string(map,space)); -+ -+ /* does cap exist */ -+ ptr = find_cap_node(cap); -+ if ( ptr == NULL ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap not found \n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ /* is it active */ -+ if ( ! ptr->node.active ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap not active \n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* are they the same */ -+ if ( ELAN_CAP_TYPE_MATCH(cap,map) -+ && ELAN_CAP_GEOM_MATCH(cap,map)) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: cap == map passed\n"); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ -+ /* is map in map list */ -+ vptr = find_vp_node(ptr, map); -+ if ( vptr == NULL ) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: map not found\n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP,"elan_validate_map: map passed\n"); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_create_cap(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ int i, rail; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1 (ELAN_DBG_VP,"elan_create_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* need to check that the cap does not over lap another one -+ or is an exact match with only the userkey changing */ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if ( ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) -+ && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap) -+ && (&ptr->node.owner == owner)) { -+ if ( ptr->node.active ) { -+ /* dont inc attached count as its like a create */ -+ ptr->node.cap.cap_userkey = cap->cap_userkey; -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ else -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ } -+ -+ /* does it overlap, even with ones being destroyed */ -+ if (elan_cap_overlap(&ptr->node.cap,cap)) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EACCES; -+ } -+ } -+ -+ /* create it */ -+ KMEM_ALLOC(ptr, ELAN_CAP_NODE_STRUCT *, sizeof(ELAN_CAP_NODE_STRUCT), 1); -+ if (ptr == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ /* create space for the attached array */ -+ for(rail=0;railattached[rail]=NULL; -+ if ( ELAN_CAP_IS_RAIL_SET(cap,rail) ) -+ { -+ KMEM_ALLOC(ptr->attached[rail], ELAN_ATTACHED_STRUCT *, sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(cap), 1); -+ if (ptr->attached[rail] == NULL) -+ { -+ for(;rail>=0;rail--) -+ if ( ptr->attached[rail] ) -+ KMEM_FREE(ptr->attached[rail], sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(cap)); -+ -+ KMEM_FREE(ptr, sizeof(ELAN_CAP_NODE_STRUCT)); -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ /* blank the attached array */ -+ for(i=0;iattached[rail][i].cb_func = NULL; -+ } -+ } -+ -+ ptr->node.owner = owner; -+ ptr->node.cap = *cap; -+ ptr->node.attached = 1; /* creator counts as attached */ -+ ptr->node.active = 1; -+ ptr->vp_list.next = &(ptr->vp_list); -+ ptr->vp_list.prev = &(ptr->vp_list); -+ -+ list_add_tail(&ptr->list, &elan_cap_list); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+void -+elan_destroy_cap_test(ELAN_CAP_NODE_STRUCT *cap_ptr) -+{ -+ /* called by someone holding the mutex */ -+ struct list_head * vp_tmp; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ int rail; -+ -+ /* check to see if it can be deleted now */ -+ if ( cap_ptr->node.attached == 0 ) { -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_destroy_cap_test: attached == 0\n"); -+ -+ /* delete the vp list */ -+ list_for_each(vp_tmp, &(cap_ptr->vp_list)) { -+ vp_ptr = list_entry(vp_tmp, ELAN_VP_NODE_STRUCT , list); -+ list_del(&vp_ptr->list); -+ KMEM_FREE( vp_ptr, sizeof(ELAN_VP_NODE_STRUCT)); -+ } -+ -+ list_del(&cap_ptr->list); -+ -+ /* delete space for the attached array */ -+ for(rail=0;railattached[rail]) -+ KMEM_FREE(cap_ptr->attached[rail], sizeof(ELAN_ATTACHED_STRUCT) * ELAN_CAP_NUM_CONTEXTS(&(cap_ptr->node.cap))); -+ -+ KMEM_FREE(cap_ptr, sizeof(ELAN_CAP_NODE_STRUCT)); -+ } -+} -+ -+int -+elan_destroy_cap(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap) -+{ -+ char space[127]; -+ struct list_head * el; -+ struct list_head * nel; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ int i, rail; -+ int found = 0; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1 (ELAN_DBG_CAP,"elan_destroy_cap %s\n",elan_capability_string(cap,space)); -+ -+ list_for_each_safe (el, nel, &elan_cap_list) { -+ ptr = list_entry(el, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if ( (ptr->node.owner == owner ) -+ && ( (cap == NULL) -+ || (ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) && ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap)))) { -+ -+ if ( ptr->node.active ) { -+ -+ /* mark as in active and dec attached count */ -+ ptr->node.active = 0; -+ ptr->node.attached--; -+ ptr->node.owner = 0; /* no one own's it now */ -+ -+ /* need to tell any one who was attached that this has been destroy'd */ -+ for(rail=0;railnode.cap), rail)) { -+ for(i=0;i< ELAN_CAP_NUM_CONTEXTS(&(ptr->node.cap));i++) -+ if ( ptr->attached[rail][i].cb_func != NULL) -+ ptr->attached[rail][i].cb_func(ptr->attached[rail][i].cb_args, cap, NULL); -+ } -+ -+ /* now try to destroy it */ -+ elan_destroy_cap_test(ptr); -+ -+ /* found it */ -+ found = 1; -+ } -+ } -+ } -+ -+ if ( found ) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ -+ /* failed */ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_destroy_cap: didnt find it \n"); -+ -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_get_caps(uint *number_of_results, uint array_size, ELAN_CAP_STRUCT *caps) -+{ -+ uint results = 0; -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_get_caps\n"); -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ copyout(&ptr->node, &caps[results], sizeof (ELAN_CAP_STRUCT)); -+ -+ results++; -+ -+ if ( results >= array_size ) -+ { -+ copyout(&results, number_of_results, sizeof(uint)); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ copyout(&results, number_of_results, sizeof(uint)); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_create_vp(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * cap_ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_create_vp\n"); -+ -+ /* the railmasks must match */ -+ if ( cap->cap_railmask != map->cap_railmask) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* does the cap exist */ -+ cap_ptr = find_cap_node(cap); -+ if ((cap_ptr == NULL) || ( cap_ptr->node.owner != owner ) || (! cap_ptr->node.active) ) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* is there already a mapping */ -+ vp_ptr = find_vp_node(cap_ptr,map); -+ if ( vp_ptr != NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* create space for mapping */ -+ KMEM_ALLOC(vp_ptr, ELAN_VP_NODE_STRUCT *, sizeof(ELAN_VP_NODE_STRUCT), 1); -+ if (vp_ptr == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ /* copy map */ -+ vp_ptr->vp = *map; -+ list_add_tail(&vp_ptr->list, &(cap_ptr->vp_list)); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_destroy_vp(ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ ELAN_CAP_NODE_STRUCT * cap_ptr = NULL; -+ ELAN_VP_NODE_STRUCT * vp_ptr = NULL; -+ int i, rail; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_destroy_vp\n"); -+ -+ cap_ptr = find_cap_node(cap); -+ if ((cap_ptr!=NULL) && (cap_ptr->node.owner == owner) && ( cap_ptr->node.active)) -+ { -+ vp_ptr = find_vp_node( cap_ptr, map ); -+ if ( vp_ptr != NULL ) -+ { -+ list_del(&vp_ptr->list); -+ KMEM_FREE(vp_ptr, sizeof(ELAN_VP_NODE_STRUCT)); -+ -+ /* need to tell those who are attached that map is nolonger in use */ -+ for(rail=0;railnode.cap));i++) -+ if ( cap_ptr->attached[rail][i].cb_func != NULL) -+ cap_ptr->attached[rail][i].cb_func( cap_ptr->attached[rail][i].cb_args, cap, map); -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ /* didnt find it */ -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_attach_cap(ELAN_CAPABILITY *cap, unsigned int rail, void *args, ELAN_DESTROY_CB func) -+{ -+ char space[127]; -+ struct list_head *el; -+ -+ ELAN_DEBUG1 (ELAN_DBG_CAP,"elan_attach_cap %s\n",elan_capability_string(cap,space)); -+ -+ /* currently must provide a call back, as null mean something */ -+ if ( func == NULL) -+ return (EINVAL); -+ -+ /* mycontext must be set and correct */ -+ if ( ! ELAN_CAP_VALID_MYCONTEXT(cap)) -+ return (EINVAL); -+ -+ /* rail must be one of the rails in railmask */ -+ if (((1 << rail) & cap->cap_railmask) == 0) -+ return (EINVAL); -+ -+ kmutex_lock(&elan_mutex); -+ -+ list_for_each(el, &elan_cap_list) { -+ ELAN_CAP_NODE_STRUCT *cap_ptr = list_entry(el, ELAN_CAP_NODE_STRUCT , list); -+ -+ /* is it an exact match */ -+ if (ELAN_CAP_MATCH(&cap_ptr->node.cap,cap) && cap_ptr->node.active) { -+ unsigned int attached_index = cap->cap_mycontext - cap->cap_lowcontext; -+ -+ if ( cap_ptr->attached[rail][attached_index].cb_func != NULL ) /* only one per ctx per rail */ -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* keep track of who attached as we might need to tell them when */ -+ /* cap or maps get destroyed */ -+ cap_ptr->attached[rail][ attached_index ].cb_func = func; -+ cap_ptr->attached[rail][ attached_index ].cb_args = args; -+ cap_ptr->node.attached++; -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_attach_cap: passed\n"); -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_attach_cap: failed to find \n"); -+ -+ /* didnt find one */ -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_detach_cap(ELAN_CAPABILITY *cap, unsigned int rail) -+{ -+ struct list_head *el, *nel; -+ char space[256]; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elan_detach_cap %s\n",elan_capability_string(cap,space)); -+ list_for_each_safe (el, nel, &elan_cap_list) { -+ ELAN_CAP_NODE_STRUCT *ptr = list_entry (el, ELAN_CAP_NODE_STRUCT, list); -+ -+ /* is it an exact match */ -+ if (ELAN_CAP_TYPE_MATCH(&ptr->node.cap,cap) && -+ ELAN_CAP_GEOM_MATCH(&ptr->node.cap,cap) && -+ (ptr->node.cap.cap_railmask & cap->cap_railmask) == cap->cap_railmask) { -+ -+ unsigned int attached_index = cap->cap_mycontext - cap->cap_lowcontext; -+ -+ if ( ptr->attached[rail][ attached_index ].cb_func == NULL ) -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elanmod_detach_cap already removed \n"); -+ -+ ptr->attached[rail][ attached_index ].cb_func = NULL; -+ ptr->attached[rail][ attached_index ].cb_args = (void *)0; -+ -+ ptr->node.attached--; -+ -+ ELAN_DEBUG1(ELAN_DBG_CAP,"elanmod_detach_cap new attach count%d \n", ptr->node.attached); -+ -+ elan_destroy_cap_test(ptr); -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_detach_cap: success\n"); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+ } -+ } -+ -+ ELAN_DEBUG0(ELAN_DBG_CAP,"elan_detach_cap: failed to find\n"); -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+} -+ -+int -+elan_cap_dump() -+{ -+ struct list_head * tmp; -+ ELAN_CAP_NODE_STRUCT * ptr = NULL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ list_for_each(tmp, &elan_cap_list) { -+ ptr = list_entry(tmp, ELAN_CAP_NODE_STRUCT , list); -+ -+ ELAN_DEBUG2 (ELAN_DBG_ALL, "cap dump: owner %p type %x\n", ptr->node.owner, ptr->node.cap.cap_type); -+ -+ ELAN_DEBUG5 (ELAN_DBG_ALL, "cap dump: LowNode %d HighNode %d LowContext %d mycontext %d highContext %d\n", -+ ptr->node.cap.cap_lownode , ptr->node.cap.cap_highnode, -+ ptr->node.cap.cap_lowcontext , ptr->node.cap.cap_mycontext, ptr->node.cap.cap_highcontext); -+ -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/capability_general.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/capability_general.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/capability_general.c 2005-07-28 14:52:52.779689224 -0400 -@@ -0,0 +1,446 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability_general.c,v 1.10 2004/02/25 13:47:59 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/shared/capability_general.c,v $ */ -+ -+#if defined(__KERNEL__) -+ -+#include -+ -+#else -+ -+#include -+#include -+#include -+ -+#endif -+ -+#include -+ -+ -+void -+elan_nullcap (ELAN_CAPABILITY *cap) -+{ -+ register int i; -+ -+ for (i = 0; i < sizeof (cap->cap_userkey)/sizeof(cap->cap_userkey.key_values[0]); i++) -+ cap->cap_userkey.key_values[i] = ELAN_CAP_UNINITIALISED; -+ -+ cap->cap_lowcontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_highcontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_mycontext = ELAN_CAP_UNINITIALISED; -+ cap->cap_lownode = ELAN_CAP_UNINITIALISED; -+ cap->cap_highnode = ELAN_CAP_UNINITIALISED; -+ cap->cap_railmask = ELAN_CAP_UNINITIALISED; -+ cap->cap_type = ELAN_CAP_UNINITIALISED; -+ cap->cap_spare = 0; -+ cap->cap_version = ELAN_CAP_VERSION_NUMBER; -+ -+ for (i = 0; i < sizeof (cap->cap_bitmap)/sizeof (cap->cap_bitmap[0]); i++) -+ cap->cap_bitmap[i] = 0; -+} -+ -+char * -+elan_capability_string (ELAN_CAPABILITY *cap, char *str) -+{ -+ if (cap == NULL) -+ sprintf (str, "[-.-.-.-] cap = NULL\n"); -+ else -+ sprintf (str, "[%x.%x.%x.%x] Version %x Type %x \n" -+ "Context %x.%x.%x Node %x.%x\n", -+ cap->cap_userkey.key_values[0], cap->cap_userkey.key_values[1], -+ cap->cap_userkey.key_values[2], cap->cap_userkey.key_values[3], -+ cap->cap_version, cap->cap_type, -+ cap->cap_lowcontext, cap->cap_mycontext, cap->cap_highcontext, -+ cap->cap_lownode, cap->cap_highnode); -+ -+ return (str); -+} -+ -+ELAN_LOCATION -+elan_vp2location (u_int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN_LOCATION location; -+ int i, vp, node, context, nnodes, nctxs; -+ -+ vp = 0; -+ -+ location.loc_node = ELAN_INVALID_NODE; -+ location.loc_context = -1; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (node = 0, i = 0; node < nnodes; node++) -+ { -+ for (context = 0; context < nctxs; context++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, context + (node * nctxs))) -+ { -+ if (vp == process) -+ { -+ /* Return relative indices within the capability box */ -+ location.loc_node = node; -+ location.loc_context = context; -+ -+ return (location); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (context = 0, i = 0; context < nctxs; context++) -+ { -+ for (node = 0; node < nnodes; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (context * nnodes))) -+ { -+ if (vp == process) -+ { -+ location.loc_node = node; -+ location.loc_context = context; -+ -+ return (location); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ } -+ -+ return( location ); -+} -+ -+int -+elan_location2vp (ELAN_LOCATION location, ELAN_CAPABILITY *cap) -+{ -+ int vp, node, context, nnodes, nctxs; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ vp = 0; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (node = 0 ; node < nnodes ; node++) -+ { -+ for (context = 0; context < nctxs; context++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, context + (node * nctxs))) -+ { -+ if ((location.loc_node == node) && (location.loc_context == context)) -+ { -+ /* Found it ! */ -+ return( vp ); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (context = 0; context < nctxs; context++) -+ { -+ for (node = 0; node < nnodes; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (context * nnodes))) -+ { -+ if ((location.loc_node == node) && (location.loc_context == context)) -+ { -+ /* Found it ! */ -+ return( vp ); -+ } -+ -+ vp++; -+ } -+ } -+ } -+ break; -+ } -+ -+ /* Failed to find it */ -+ return( -1 ); -+} -+ -+/* Return the number of processes as described by a capability */ -+int -+elan_nvps (ELAN_CAPABILITY *cap) -+{ -+ int i, c, nbits = ELAN_CAP_BITMAPSIZE(cap); -+ -+ if (cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) -+ return (nbits); -+ -+ for (i = 0, c = 0; i < nbits; i++) -+ if (BT_TEST (cap->cap_bitmap, i)) -+ c++; -+ -+ return (c); -+} -+ -+/* Return the number of local processes on a given node as described by a capability */ -+int -+elan_nlocal (int node, ELAN_CAPABILITY *cap) -+{ -+ int vp; -+ ELAN_LOCATION loc; -+ int nLocal = 0; -+ -+ for (vp = 0; vp < elan_nvps(cap); vp++) -+ { -+ loc = elan_vp2location(vp, cap); -+ if (loc.loc_node == node) -+ nLocal++; -+ } -+ -+ return (nLocal); -+} -+ -+/* Return the maximum number of local processes on any node as described by a capability */ -+int -+elan_maxlocal (ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highcontext - cap->cap_lowcontext + 1); -+} -+ -+/* Return the vps of the local processes on a given node as described by a capability */ -+int -+elan_localvps (int node, ELAN_CAPABILITY *cap, int *vps, int size) -+{ -+ int context; -+ ELAN_LOCATION loc; -+ int nLocal = 0; -+ -+ loc.loc_node = node; -+ -+ for (context = 0; context < MIN(size, elan_maxlocal(cap)); context++) -+ { -+ loc.loc_context = context; -+ -+ /* Should return -1 if none found */ -+ if ( (vps[context] = elan_location2vp( loc, cap )) != -1) -+ nLocal++; -+ } -+ -+ return (nLocal); -+} -+ -+/* Return the number of rails that this capability utilises */ -+int -+elan_nrails (ELAN_CAPABILITY *cap) -+{ -+ int nrails = 0; -+ unsigned int railmask; -+ -+ /* Test for a multi-rail capability */ -+ if (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL) -+ { -+ /* Grab rail bitmask from capability */ -+ railmask = cap->cap_railmask; -+ -+ while (railmask) -+ { -+ if (railmask & 1) -+ nrails++; -+ -+ railmask >>= 1; -+ } -+ } -+ else -+ /* Default to just one rail */ -+ nrails = 1; -+ -+ return (nrails); -+} -+ -+/* Fill out an array giving the physical rail numbers utilised by a capability */ -+int -+elan_rails (ELAN_CAPABILITY *cap, int *rails) -+{ -+ int nrails, rail; -+ unsigned int railmask; -+ -+ /* Test for a multi-rail capability */ -+ if (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL) -+ { -+ /* Grab rail bitmask from capability */ -+ railmask = cap->cap_railmask; -+ -+ nrails = rail = 0; -+ while (railmask) -+ { -+ if (railmask & 1) -+ rails[nrails++] = rail; -+ -+ rail++; -+ railmask >>= 1; -+ } -+ } -+ else -+ { -+ /* Default to just one rail */ -+ rails[0] = 0; -+ nrails = 1; -+ } -+ -+ return( nrails ); -+} -+ -+int -+elan_cap_overlap(ELAN_CAPABILITY *cap1, ELAN_CAPABILITY *cap2) -+{ -+ /* by context */ -+ if ( cap1->cap_highcontext < cap2->cap_lowcontext ) return (0); -+ if ( cap1->cap_lowcontext > cap2->cap_highcontext) return (0); -+ -+ /* by node */ -+ if ( cap1->cap_highnode < cap2->cap_lownode ) return (0); -+ if ( cap1->cap_lownode > cap2->cap_highnode) return (0); -+ -+ /* by rail */ -+ /* they overlap if they have a rail in common */ -+ return (cap1->cap_railmask & cap2->cap_railmask); -+} -+ -+#if !defined(__KERNEL__) -+ -+/* Fill out an array that hints at the best use of the rails on a -+ * per process basis. The library user can then decide whether or not -+ * to take this into account (e.g. TPORTs) -+ * All processes calling this fn will be returned the same information. -+ */ -+int -+elan_prefrails(ELAN_CAPABILITY *cap, int *pref, int nvp) -+{ -+ int i; -+ int nrails = elan_nrails(cap); -+ int maxlocal = elan_maxlocal(cap); -+ -+ /* Test for a multi-rail capability */ -+ if (! (cap->cap_type & ELAN_CAP_TYPE_MULTI_RAIL)) -+ { -+ /* Default to just one rail */ -+ for (i = 0; i < nvp; i++) -+ pref[i] = 0; -+ -+ return( 0 ); -+ } -+ -+ /* -+ * We allocate rails on a per node basis sharing our the rails -+ * equally amongst the local processes. However, if there is only -+ * one process per node and multiple rails, then we use a different -+ * algorithm where rails are allocated across all the processes in -+ * a round-robin fashion -+ */ -+ -+ if (maxlocal == 1) -+ { -+ /* Allocate rails in a round-robin manner */ -+ for (i = 0; i < nvp; i++) -+ *pref++ = i % nrails; -+ } -+ else -+ { -+ int node; -+ int *vps; -+ int nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ -+ vps = (int *) malloc(sizeof(int)*maxlocal); -+ -+ /* Grab the local process info for each node and allocate -+ * rails to those vps on an equal basis -+ */ -+ for (node = 0; node < nnodes; node++) -+ { -+ int nlocal; -+ int pprail; -+ -+ /* Grab an array of local vps */ -+ nlocal = elan_localvps(node, cap, vps, maxlocal); -+ -+ /* Calculate the number processes per rail */ -+ if ((pprail = nlocal/nrails) == 0) -+ pprail = 1; -+ -+ /* Allocate processes to rails */ -+ for (i = 0; i < nlocal; i++) -+ { -+ pref[vps[i]] = (i / pprail) % nrails; -+ } -+ } -+ -+ free(vps); -+ } -+ -+ return( 0 ); -+} -+ -+void -+elan_get_random_key(ELAN_USERKEY *key) -+{ -+ int i; -+ for (i = 0; i < sizeof(key->key_values) / sizeof(key->key_values[0]); i++) -+ key->key_values[i] = lrand48(); -+} -+ -+int elan_lowcontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_lowcontext); -+} -+ -+int elan_mycontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_mycontext); -+} -+ -+int elan_highcontext(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highcontext); -+} -+ -+int elan_lownode(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_lownode); -+} -+ -+int elan_highnode(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_highnode); -+} -+ -+int elan_captype(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_type); -+} -+ -+int elan_railmask(ELAN_CAPABILITY *cap) -+{ -+ return(cap->cap_railmask); -+} -+ -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/device.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/device.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/device.c 2005-07-28 14:52:52.780689072 -0400 -@@ -0,0 +1,147 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.c,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/device.c,v $*/ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_dev_list); -+ -+ELAN_DEV_STRUCT * -+elan_dev_find (ELAN_DEV_IDX devidx) -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ if (ptr->devidx == devidx) -+ return ptr; -+ if (ptr->devidx > devidx) -+ return ERR_PTR(-ENXIO); -+ } -+ -+ return ERR_PTR(-EINVAL); -+} -+ -+ELAN_DEV_STRUCT * -+elan_dev_find_byrail (unsigned short deviceid, unsigned rail) -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ -+ ELAN_DEBUG5 (ELAN_DBG_ALL,"elan_dev_find_byrail devidx %d - %04x %04x, %d %d \n", ptr->devidx, -+ ptr->devinfo->dev_device_id, deviceid, ptr->devinfo->dev_rail, rail); -+ -+ if (ptr->devinfo->dev_device_id == deviceid && ptr->devinfo->dev_rail == rail) -+ return ptr; -+ } -+ -+ return NULL; -+} -+ -+ELAN_DEV_IDX -+elan_dev_register (ELAN_DEVINFO *devinfo, ELAN_DEV_OPS *ops, void * user_data) -+{ -+ ELAN_DEV_STRUCT *ptr; -+ ELAN_DEV_IDX devidx = 0; -+ struct list_head *tmp; -+ -+ kmutex_lock(&elan_mutex); -+ -+ /* is it already registered */ -+ if ((ptr = elan_dev_find_byrail(devinfo->dev_device_id, devinfo->dev_rail)) != NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ /* find a free device idx */ -+ list_for_each (tmp, &elan_dev_list) { -+ if (list_entry (tmp, ELAN_DEV_STRUCT, node)->devidx != devidx) -+ break; -+ devidx++; -+ } -+ -+ /* create it and add */ -+ KMEM_ALLOC(ptr, ELAN_DEV_STRUCT *, sizeof(ELAN_DEV_STRUCT), 1); -+ if (ptr == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ ptr->devidx = devidx; -+ ptr->ops = ops; -+ ptr->devinfo = devinfo; -+ ptr->user_data = user_data; -+ -+ /* insert this entry *before* the last entry we've found */ -+ list_add_tail(&ptr->node, tmp); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_dev_deregister (ELAN_DEVINFO *devinfo) -+{ -+ ELAN_DEV_STRUCT *target; -+ -+ kmutex_lock(&elan_mutex); -+ -+ if ((target = elan_dev_find_byrail (devinfo->dev_device_id, devinfo->dev_rail)) == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return EINVAL; -+ } -+ -+ list_del(&target->node); -+ -+ /* delete target entry */ -+ KMEM_FREE(target, sizeof(ELAN_DEV_STRUCT)); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_dev_dump () -+{ -+ struct list_head *tmp; -+ ELAN_DEV_STRUCT *ptr=NULL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ list_for_each(tmp, &elan_dev_list) { -+ ptr = list_entry(tmp, ELAN_DEV_STRUCT , node); -+ -+ ELAN_DEBUG3 (ELAN_DBG_ALL,"dev dump: index %u rail %u elan%c\n", -+ ptr->devidx, ptr->devinfo->dev_rail, '3' + ptr->devinfo->dev_device_id); -+ ELAN_DEBUG5 (ELAN_DBG_ALL,"dev dump: Vid %x Did %x Rid %x DR %d DVal %x\n", -+ ptr->devinfo->dev_vendor_id, -+ ptr->devinfo->dev_device_id, -+ ptr->devinfo->dev_revision_id, -+ ptr->devinfo->dev_driver_version, -+ ptr->devinfo->dev_num_down_links_value); -+ -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/devinfo.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/devinfo.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/devinfo.c 2005-07-28 14:52:52.780689072 -0400 -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: devinfo.c,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/devinfo.c,v $*/ -+ -+#include -+#include -+ -+int -+elan_get_devinfo(ELAN_DEV_IDX devidx, ELAN_DEVINFO *devinfo) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ kmutex_lock(&elan_mutex); -+ -+ target = elan_dev_find (devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR(target); -+ else -+ { -+ copyout(target->devinfo, devinfo, sizeof(ELAN_DEVINFO)); -+ res = ESUCCESS; -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_get_position(ELAN_DEV_IDX devidx, ELAN_POSITION *position) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ kmutex_lock(&elan_mutex); -+ -+ target = elan_dev_find(devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR(target); -+ else -+ res = target->ops->get_position(target->user_data, position); -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_set_position(ELAN_DEV_IDX devidx, unsigned short nodeId, unsigned short numNodes) -+{ -+ ELAN_DEV_STRUCT *target; -+ int res; -+ -+ kmutex_lock(&elan_mutex); -+ -+ target = elan_dev_find(devidx); -+ -+ if (IS_ERR (target)) -+ res = PTR_ERR (target); -+ else -+ res = target->ops->set_position(target->user_data, nodeId, numNodes); -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/elanmod.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/elanmod.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/elanmod.c 2005-07-28 14:52:52.781688920 -0400 -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+#ident "@(#)$Id: elanmod.c,v 1.11 2004/06/18 09:28:16 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod.c,v $*/ -+ -+#include -+#include -+ -+kmutex_t elan_mutex; -+ -+int -+elan_init() -+{ -+ kmutex_init(&elan_mutex); -+ return (ESUCCESS); -+} -+ -+int -+elan_fini() -+{ -+ kmutex_destroy(&elan_mutex); -+ return (ESUCCESS); -+} -+ -+int -+elanmod_classify_cap (ELAN_POSITION *position, ELAN_CAPABILITY *cap, unsigned use) -+{ -+ if (cap->cap_version != ELAN_CAP_VERSION_NUMBER) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: (cap->Version != ELAN_CAP_VERSION) %d %d\n", cap->cap_version, ELAN_CAP_VERSION_NUMBER); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext == ELAN_CAP_UNINITIALISED || cap->cap_highcontext == ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: LowContext %d HighContext %d MyContext %d\n", -+ cap->cap_lowcontext , cap->cap_highcontext, cap->cap_mycontext); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext > cap->cap_highcontext) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: (cap->cap_lowcontext > cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext); -+ return (-EINVAL); -+ } -+ -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ if (position->pos_mode == ELAN_POS_UNKNOWN) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: Position Unknown \n"); -+ return (-EAGAIN); -+ } -+ -+ if ( ! ( ELAN_USER_CONTEXT(cap->cap_lowcontext) && ELAN_USER_CONTEXT(cap->cap_highcontext))) -+ { -+ ELAN_DEBUG4 (ELAN_DBG_VP, "elanmod_classify_cap: USER_BASE_CONTEXT %d %d %d %d \n" , ELAN_USER_BASE_CONTEXT_NUM,cap->cap_lowcontext, cap->cap_highcontext ,ELAN_USER_TOP_CONTEXT_NUM); -+ return (-EINVAL); -+ } -+ if (cap->cap_lownode == ELAN_CAP_UNINITIALISED) -+ cap->cap_lownode = position->pos_nodeid; -+ if (cap->cap_highnode == ELAN_CAP_UNINITIALISED) -+ cap->cap_highnode = position->pos_nodeid; -+ -+ if (cap->cap_lownode < 0 || cap->cap_highnode >= position->pos_nodes || cap->cap_lownode > cap->cap_highnode) -+ { -+ ELAN_DEBUG3 ( ELAN_DBG_VP,"elanmod_classify_cap: low %d high %d pos %d \n" , cap->cap_lownode ,cap->cap_highnode, position->pos_nodes); -+ -+ return (-EINVAL); -+ } -+ -+ if ((cap->cap_highnode < position->pos_nodeid) || (cap->cap_lownode > position->pos_nodeid)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: node not i range low %d high %d this %d\n", -+ cap->cap_lownode, cap->cap_highnode, position->pos_nodeid); -+ return (-EINVAL); -+ } -+ -+ break; -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP, "elanmod_classify_cap: cant decode type %x \n", cap->cap_type & ELAN_CAP_TYPE_MASK); -+ return (-EINVAL); -+ -+ } -+ -+ switch (use) -+ { -+ case ELAN_USER_ATTACH: -+ case ELAN_USER_DETACH: -+ if (cap->cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: cap->cap_mycontext == ELAN_CAP_UNINITIALISED"); -+ return (-EINVAL); -+ } -+ -+ if ((cap->cap_mycontext != ELAN_CAP_UNINITIALISED) && -+ (cap->cap_mycontext < cap->cap_lowcontext || cap->cap_mycontext > cap->cap_highcontext)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_VP, "elanmod_classify_cap: cap->cap_mycontext out of range %d %d %d \n", cap->cap_lowcontext,cap->cap_mycontext,cap->cap_highcontext); -+ return (-EINVAL); -+ } -+ break; -+ -+ case ELAN_USER_P2P: -+ break; -+ -+ case ELAN_USER_BROADCAST: -+ if (! (cap->cap_type & ELAN_CAP_TYPE_BROADCASTABLE)) { -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: use ELAN_USER_BROADCAST but cap not ELAN_CAP_TYPE_BROADCASTABLE\n"); -+ return (-EINVAL); -+ } -+ break; -+ -+ default: -+ ELAN_DEBUG1 (ELAN_DBG_VP, "elanmod_classify_cap: unknown use (%d)\n",use); -+ return (-EINVAL); -+ } -+ -+ -+ -+ /* is any ctxt an rms one ?? */ -+ if (ELAN_RMS_CONTEXT(cap->cap_lowcontext) || ELAN_RMS_CONTEXT(cap->cap_highcontext)) -+ { -+ /* so both low and high must be */ -+ if (!(ELAN_RMS_CONTEXT(cap->cap_lowcontext) && ELAN_RMS_CONTEXT(cap->cap_highcontext))) -+ { -+ ELAN_DEBUG2 (ELAN_DBG_VP, "elanmod_classify_cap: not rms ctxt %x %x\n",cap->cap_lowcontext,cap->cap_highcontext ); -+ return (-EINVAL); -+ } -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: returning ELAN_CAP_RMS\n"); -+ return (ELAN_CAP_RMS); -+ } -+ -+ ELAN_DEBUG0 (ELAN_DBG_VP, "elanmod_classify_cap: returning ELAN_CAP_OK\n"); -+ return (ELAN_CAP_OK); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/elanmod_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/elanmod_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/elanmod_linux.c 2005-07-28 14:52:52.781688920 -0400 -@@ -0,0 +1,410 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod_linux.c,v 1.16 2004/06/14 15:45:37 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+ -+#include -+#include -+ -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan support module"); -+ -+MODULE_LICENSE("GPL"); -+ -+/* elanmod.c */ -+EXPORT_SYMBOL(elanmod_classify_cap); -+ -+/* bitmap.c */ -+#include -+ -+EXPORT_SYMBOL(bt_freebit); -+EXPORT_SYMBOL(bt_lowbit); -+EXPORT_SYMBOL(bt_nextbit); -+EXPORT_SYMBOL(bt_copy); -+EXPORT_SYMBOL(bt_zero); -+EXPORT_SYMBOL(bt_fill); -+EXPORT_SYMBOL(bt_cmp); -+EXPORT_SYMBOL(bt_intersect); -+EXPORT_SYMBOL(bt_remove); -+EXPORT_SYMBOL(bt_add); -+EXPORT_SYMBOL(bt_spans); -+EXPORT_SYMBOL(bt_subset); -+EXPORT_SYMBOL(bt_up); -+EXPORT_SYMBOL(bt_down); -+EXPORT_SYMBOL(bt_nbits); -+ -+/* capability.c */ -+EXPORT_SYMBOL(elan_nullcap); -+EXPORT_SYMBOL(elan_detach_cap); -+EXPORT_SYMBOL(elan_attach_cap); -+EXPORT_SYMBOL(elan_validate_map); -+ -+/* stats.c */ -+EXPORT_SYMBOL(elan_stats_register); -+EXPORT_SYMBOL(elan_stats_deregister); -+ -+/* device.c */ -+EXPORT_SYMBOL(elan_dev_deregister); -+EXPORT_SYMBOL(elan_dev_register); -+ -+/* debug */ -+int elan_debug_mode = QSNET_DEBUG_BUFFER; -+int elan_debug_mask; -+ -+static struct proc_dir_entry *elan_procfs_root; -+ -+extern void elan_procfs_init(void); -+extern void elan_procfs_fini(void); -+ -+static int elan_open (struct inode *ino, struct file *fp); -+static int elan_release (struct inode *ino, struct file *fp); -+static int elan_ioctl (struct inode *ino, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static struct file_operations elan_fops = -+{ -+ ioctl: elan_ioctl, -+ open: elan_open, -+ release: elan_release, -+}; -+ -+static int __init elan_start(void) -+{ -+ int res; -+ -+ elan_procfs_init(); -+ -+ if ((res = elan_init()) != ESUCCESS) -+ { -+ elan_procfs_fini(); -+ return (-res); -+ } -+ -+ return (0); -+} -+ -+static void __exit elan_exit(void) -+{ -+ elan_fini(); -+ elan_procfs_fini(); -+} -+ -+ -+/* Declare the module init and exit functions */ -+void -+elan_procfs_init() -+{ -+ struct proc_dir_entry *p; -+ -+ elan_procfs_root = proc_mkdir("elan", qsnet_procfs_root); -+ -+ qsnet_proc_register_hex(elan_procfs_root, "debug_mask", &elan_debug_mask, 0); -+ qsnet_proc_register_hex(elan_procfs_root, "debug_mode", &elan_debug_mode, 0); -+ -+ if ((p = create_proc_entry ("ioctl", 0, elan_procfs_root)) != NULL) -+ { -+ p->proc_fops = &elan_fops; -+ p->data = 0; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+elan_procfs_fini() -+{ -+ remove_proc_entry ("debug_mask", elan_procfs_root); -+ remove_proc_entry ("debug_mode", elan_procfs_root); -+ -+ remove_proc_entry ("ioctl", elan_procfs_root); -+ remove_proc_entry ("version", elan_procfs_root); -+ -+ remove_proc_entry ("elan", qsnet_procfs_root); -+} -+ -+module_init(elan_start); -+module_exit(elan_exit); -+ -+static int -+elan_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+elan_release (struct inode *inode, struct file *fp) -+{ -+ /* mark all caps owned by fp to be destroyed */ -+ elan_destroy_cap(fp,NULL); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+elan_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int rep = 0; -+ -+ switch (cmd) -+ { -+ case ELANCTRL_STATS_GET_NEXT : -+ { -+ ELANCTRL_STATS_GET_NEXT_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_NEXT_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_next_index(args.statidx, args.next_statidx) != 0 ) -+ return (-EINVAL); -+ -+ break; -+ } -+ case ELANCTRL_STATS_FIND_INDEX : -+ { -+ ELANCTRL_STATS_FIND_INDEX_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_FIND_INDEX_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_find_index(args.block_name, args.statidx, args.num_entries) != 0 ) -+ return (-EINVAL); -+ -+ break; -+ } -+ case ELANCTRL_STATS_GET_BLOCK_INFO : -+ { -+ ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_block_info(args.statidx, args.block_name, args.num_entries) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_GET_INDEX_NAME : -+ { -+ ELANCTRL_STATS_GET_INDEX_NAME_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_INDEX_NAME_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_index_name(args.statidx, args.index, args.name) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_CLEAR_BLOCK : -+ { -+ ELANCTRL_STATS_CLEAR_BLOCK_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_CLEAR_BLOCK_STRUCT))) -+ return (-EFAULT); -+ -+ /* statidx is not a pointer */ -+ if (elan_stats_clear_block(args.statidx) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_STATS_GET_BLOCK : -+ { -+ ELANCTRL_STATS_GET_BLOCK_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_STATS_GET_BLOCK_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_stats_get_block(args.statidx, args.entries, args.values) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_GET_DEVINFO : -+ { -+ ELANCTRL_GET_DEVINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_DEVINFO_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_devinfo(args.devidx, args.devinfo) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_GET_POSITION : -+ { -+ ELANCTRL_GET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_position(args.devidx, args.position) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_SET_POSITION : -+ { -+ ELANCTRL_SET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_SET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_set_position(args.devidx, args.nodeId, args.numNodes) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_CREATE_CAP : -+ { -+ ELANCTRL_CREATE_CAP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_CREATE_CAP_STRUCT *, sizeof(ELANCTRL_CREATE_CAP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_CREATE_CAP_STRUCT))) -+ return (-EFAULT); -+ else -+ { -+ if ((elan_validate_cap(&args->cap) != 0) || (elan_create_cap(fp,&args->cap) != 0 )) -+ rep = (-EINVAL); -+ } -+ -+ /* free the space */ -+ KMEM_FREE(args, sizeof(ELANCTRL_CREATE_CAP_STRUCT)); -+ -+ break; -+ } -+ case ELANCTRL_DESTROY_CAP : -+ { -+ ELANCTRL_DESTROY_CAP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_DESTROY_CAP_STRUCT *, sizeof(ELANCTRL_DESTROY_CAP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_DESTROY_CAP_STRUCT))) -+ rep = (-EFAULT); -+ else -+ { -+ if (elan_destroy_cap(fp, &args->cap) != 0 ) -+ rep = (-EINVAL); -+ } -+ -+ /* free the space */ -+ KMEM_FREE(args, sizeof(ELANCTRL_DESTROY_CAP_STRUCT)); -+ -+ break; -+ } -+ case ELANCTRL_CREATE_VP : -+ { -+ ELANCTRL_CREATE_VP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_CREATE_VP_STRUCT *, sizeof(ELANCTRL_CREATE_VP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_CREATE_VP_STRUCT))) -+ return (-EFAULT); -+ else -+ { -+ if ((elan_validate_cap( &args->map) != 0) || (elan_create_vp(fp, &args->cap, &args->map) != 0 )) -+ rep = (-EINVAL); -+ } -+ -+ KMEM_FREE(args, sizeof(ELANCTRL_CREATE_VP_STRUCT )); -+ -+ break; -+ } -+ case ELANCTRL_DESTROY_VP : -+ { -+ ELANCTRL_DESTROY_VP_STRUCT *args; -+ -+ /* get space for args */ -+ KMEM_ALLOC(args, ELANCTRL_DESTROY_VP_STRUCT *, sizeof(ELANCTRL_DESTROY_VP_STRUCT), 1); -+ if (args == NULL) -+ return(-ENOMEM); -+ -+ /* copy them */ -+ if (copy_from_user (args, (void *) arg, sizeof (ELANCTRL_DESTROY_VP_STRUCT))) -+ rep = (-EFAULT); -+ else -+ { -+ if (elan_destroy_vp(fp, &args->cap, &args->map) != 0 ) -+ rep = (-EINVAL); -+ } -+ -+ KMEM_FREE(args, sizeof(ELANCTRL_DESTROY_VP_STRUCT )); -+ -+ break; -+ } -+ -+ case ELANCTRL_GET_CAPS : -+ { -+ ELANCTRL_GET_CAPS_STRUCT args; -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_GET_CAPS_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if (elan_get_caps(args.number_of_results, args.array_size, args.caps) != 0 ) -+ return (-EINVAL); -+ break; -+ } -+ case ELANCTRL_DEBUG_DUMP : -+ { -+ elan_cap_dump(); -+ elan_dev_dump(); -+ -+ break; -+ } -+ case ELANCTRL_DEBUG_BUFFER : -+ { -+ ELANCTRL_DEBUG_BUFFER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELANCTRL_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ -+ /* uses copyin/copyout */ -+ if ((args.size = qsnet_debug_buffer (args.buffer, args.size)) != -1 && -+ copy_to_user ((void *) arg, &args, sizeof (ELANCTRL_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ break; -+ } -+ default: -+ return (-EINVAL); -+ break; -+ } -+ -+ return (rep); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/Makefile 2005-07-28 14:52:52.782688768 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_QSNET) += elan.o -+elan-objs := elanmod.o device.o stats.o devinfo.o capability.o elanmod_linux.o capability_general.o bitmap.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/Makefile.conf 2005-07-28 14:52:52.782688768 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan.o -+MODULENAME = elan -+KOBJFILES = elanmod.o device.o stats.o devinfo.o capability.o elanmod_linux.o capability_general.o bitmap.o -+EXPORT_KOBJS = elanmod_linux.o -+CONFIG_NAME = CONFIG_QSNET -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/quadrics_version.h 2005-07-28 14:52:52.782688768 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan/stats.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan/stats.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan/stats.c 2005-07-28 14:52:52.783688616 -0400 -@@ -0,0 +1,277 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.c,v 1.6 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/stats.c,v $*/ -+ -+#include -+#include -+ -+static LIST_HEAD(elan_stats_list); -+static ELAN_STATS_IDX elan_next_statidx=0; -+ -+ELAN_STATS_STRUCT * -+elan_stats_find(ELAN_STATS_IDX statidx) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ if ( ptr->statidx == statidx ) -+ return ptr; -+ } -+ -+ ELAN_DEBUG1 (ELAN_DBG_CTRL, "elan_stats_find failed %d\n", statidx); -+ return NULL; -+} -+ -+ELAN_STATS_STRUCT * -+elan_stats_find_by_name(caddr_t block_name) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ if (!strcmp(ptr->block_name, block_name)) -+ { -+ ELAN_DEBUG3 (ELAN_DBG_CTRL, "elan_stats_find_by_name found %s (%d,%d)\n", block_name, ptr->statidx, ptr->num_entries); -+ return ptr; -+ } -+ } -+ -+ ELAN_DEBUG1 (ELAN_DBG_CTRL, "elan_stats_find_by_name failed %s\n", block_name); -+ return NULL; -+} -+ -+ELAN_STATS_STRUCT * -+elan_stats_find_next(ELAN_STATS_IDX statidx) -+{ -+ struct list_head *tmp; -+ ELAN_STATS_STRUCT *ptr=NULL; -+ -+ list_for_each(tmp, &elan_stats_list) { -+ ptr = list_entry(tmp, ELAN_STATS_STRUCT , node); -+ -+ if ( ptr->statidx > statidx ) -+ return ptr; -+ } -+ -+ return NULL; -+} -+ -+int -+elan_stats_get_next_index (ELAN_STATS_IDX statidx, ELAN_STATS_IDX *next_block) -+{ -+ ELAN_STATS_STRUCT *target; -+ ELAN_STATS_IDX next = 0; -+ -+ kmutex_lock(&elan_mutex); -+ -+ if ((target = elan_stats_find_next(statidx)) != NULL) -+ next = target->statidx; -+ -+ copyout(&next, next_block, sizeof(ELAN_STATS_IDX) ); -+ -+ kmutex_unlock(&elan_mutex); -+ return 0; -+} -+ -+int -+elan_stats_find_index (caddr_t block_name, ELAN_STATS_IDX *statidx, uint *num_entries) -+ -+{ -+ ELAN_STATS_STRUCT *target; -+ ELAN_STATS_IDX index = 0; -+ uint entries = 0; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_find_index %s \n", block_name); -+ -+ if ((target = elan_stats_find_by_name(block_name)) != NULL) -+ { -+ index = target->statidx; -+ entries = target->num_entries; -+ } -+ -+ ELAN_DEBUG3(ELAN_DBG_CTRL, "elan_stats_find_index found %d %d (target=%p)\n", index, entries, target); -+ -+ copyout(&index, statidx, sizeof(ELAN_STATS_IDX)); -+ copyout(&entries, num_entries, sizeof(uint)); -+ -+ kmutex_unlock(&elan_mutex); -+ return ESUCCESS; -+} -+ -+int -+elan_stats_get_block_info (ELAN_STATS_IDX statidx, caddr_t block_name, uint *num_entries) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_get_block_info statidx %d\n",statidx); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ ELAN_DEBUG2(ELAN_DBG_CTRL, "elan_stats_get_block_info name %s entries %d\n",block_name, *num_entries); -+ -+ copyout( target->block_name, block_name, ELAN_STATS_NAME_MAX_LEN); -+ copyout(&target->num_entries, num_entries, sizeof(uint)); -+ -+ res = ESUCCESS; -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_stats_get_index_name (ELAN_STATS_IDX statidx, uint index, caddr_t name) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ ELAN_DEBUG2(ELAN_DBG_CTRL, "elan_stats_get_index_name statidx %d index %d\n",statidx, index); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_get_name== NULL) -+ { -+ ELAN_DEBUG0(ELAN_DBG_CTRL, "elan_stats_get_index_name no callback\n"); -+ kmutex_unlock(&elan_mutex); -+ return res; -+ } -+ -+ if ((res = target->ops->elan_stats_get_name(target->arg, index, name)) == 0) -+ ELAN_DEBUG1(ELAN_DBG_CTRL, "elan_stats_get_index_name name %s\n",name); -+ -+ } -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_stats_get_block (ELAN_STATS_IDX statidx, uint entries, ulong *values) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_get_block == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return res; -+ } -+ -+ res = target->ops->elan_stats_get_block(target->arg, entries, values); -+ } -+ -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+int -+elan_stats_clear_block (ELAN_STATS_IDX statidx) -+{ -+ ELAN_STATS_STRUCT *target; -+ int res=EINVAL; -+ -+ kmutex_lock(&elan_mutex); -+ -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ if ( target->ops->elan_stats_clear_block == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return res; -+ } -+ -+ res = target->ops->elan_stats_clear_block(target->arg); -+ } -+ kmutex_unlock(&elan_mutex); -+ return res; -+} -+ -+void -+elan_stats_next_statidx(void) -+{ -+ /* XXXXX need to put not in use check here incase we loop MRH */ -+ /* tho its a bigish loop :) */ -+ elan_next_statidx++; -+ if (!elan_next_statidx) -+ elan_next_statidx++; -+} -+ -+int -+elan_stats_register (ELAN_STATS_IDX *statidx, -+ char *block_name, -+ uint num_entries, -+ ELAN_STATS_OPS *ops, -+ void *arg) -+{ -+ ELAN_STATS_STRUCT *target; -+ -+ kmutex_lock(&elan_mutex); -+ -+ /* create it and add */ -+ KMEM_ALLOC(target, ELAN_STATS_STRUCT *, sizeof(ELAN_STATS_STRUCT), 1); -+ if (target == NULL) -+ { -+ kmutex_unlock(&elan_mutex); -+ return ENOMEM; -+ } -+ -+ elan_stats_next_statidx(); -+ -+ *statidx = elan_next_statidx; -+ -+ target->statidx = elan_next_statidx; -+ target->num_entries = num_entries; -+ target->ops = ops; -+ target->arg = arg; -+ strcpy(target->block_name, block_name); -+ -+ list_add_tail(&target->node, &elan_stats_list); -+ -+ kmutex_unlock(&elan_mutex); -+ return 0; -+} -+ -+int -+elan_stats_deregister (ELAN_STATS_IDX statidx) -+{ -+ ELAN_STATS_STRUCT *target; -+ -+ kmutex_lock(&elan_mutex); -+ if ((target = elan_stats_find(statidx)) != NULL) -+ { -+ -+ list_del(&target->node); -+ -+ /* delete target entry */ -+ KMEM_FREE(target, sizeof(ELAN_STATS_STRUCT)); -+ } -+ kmutex_unlock(&elan_mutex); -+ -+ return target == NULL ? EINVAL : 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/context.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/context.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/context.c 2005-07-28 14:52:52.786688160 -0400 -@@ -0,0 +1,2101 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: context.c,v 1.116.2.1 2004/11/12 14:24:18 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/context.c,v $ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+/* -+ * Global variables configurable from /etc/system file -+ * (OR /etc/sysconfigtab on Digital UNIX) -+ */ -+int ntrapped_threads = 64; -+int ntrapped_dmas = 64; -+int ntrapped_events = E3_NonSysCntxQueueSize + 128; -+int ntrapped_commands = 64; -+int noverflow_commands = 1024; -+int nswapped_threads = 64; -+int nswapped_dmas = 64; -+ -+#define NUM_HALTOPS 8 -+ -+void *SwapListsLockInfo; -+void *CmdLockInfo; -+ -+static void HaltSwapContext (ELAN3_DEV *dev, void *arg); -+ -+static char *OthersStateStrings[] = {"others_running", "others_halting", "others_swapping", -+ "others_halting_more", "others_swapping_more", "others_swapped"}; -+ -+ELAN3_CTXT * -+elan3_alloc (ELAN3_DEV *dev, int kernel) -+{ -+ ELAN3_CTXT *ctxt; -+ int i; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_FN, "elan3_alloc: %s\n", kernel ? "kernel" : "user"); -+ -+ KMEM_ZALLOC (ctxt, ELAN3_CTXT *, sizeof (ELAN3_CTXT), TRUE); -+ -+ if (ctxt == NULL) -+ return (NULL); -+ -+ elan_nullcap (&ctxt->Capability); -+ -+ ctxt->Device = dev; -+ ctxt->OthersState = CTXT_OTHERS_SWAPPED; -+ ctxt->RefCnt = 1; -+ ctxt->Position = dev->Position; -+ -+ if (kernel) -+ ctxt->Status = CTXT_DETACHED | CTXT_SWAPPED_OUT | CTXT_KERNEL; -+ else -+ ctxt->Status = CTXT_DETACHED | CTXT_SWAPPED_OUT | CTXT_NO_LWPS; -+ -+ ctxt->Elan3mmu = elan3mmu_alloc (ctxt); -+ -+ kcondvar_init (&ctxt->Wait); -+ kcondvar_init (&ctxt->CommandPortWait); -+ kcondvar_init (&ctxt->LwpWait); -+ kcondvar_init (&ctxt->HaltWait); -+ -+ spin_lock_init (&ctxt->InputFaultLock); -+ -+ kmutex_init (&ctxt->SwapListsLock); -+ kmutex_init (&ctxt->CmdPortLock); -+ kmutex_init (&ctxt->NetworkErrorLock); -+ kmutex_init (&ctxt->CmdLock); -+ -+ krwlock_init (&ctxt->VpLock); -+ -+ KMEM_GETPAGES (ctxt->FlagPage, ELAN3_FLAGSTATS *, 1, TRUE); -+ if (!ctxt->FlagPage) -+ goto error; -+ bzero ((char *) ctxt->FlagPage, PAGESIZE); -+ -+ KMEM_ZALLOC (ctxt->CommandTraps, COMMAND_TRAP *, sizeof (COMMAND_TRAP) * ntrapped_commands, TRUE); -+ if (!ctxt->CommandTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->ThreadTraps, THREAD_TRAP *, sizeof (THREAD_TRAP) * ntrapped_threads, TRUE); -+ if (!ctxt->ThreadTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->DmaTraps, DMA_TRAP *, sizeof (DMA_TRAP) * ntrapped_dmas, TRUE); -+ if (!ctxt->DmaTraps) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->EventCookies, EVENT_COOKIE *, sizeof (EVENT_COOKIE) * ntrapped_events, TRUE); -+ if (!ctxt->EventCookies) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->Commands, CProcTrapBuf_BE *, sizeof (CProcTrapBuf_BE) * noverflow_commands,TRUE); -+ if (!ctxt->Commands) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->SwapThreads, E3_Addr *, sizeof (E3_Addr) * nswapped_threads, TRUE); -+ if (!ctxt->SwapThreads) -+ goto error; -+ -+ KMEM_ZALLOC (ctxt->SwapDmas, E3_DMA_BE *, sizeof (E3_DMA_BE) * nswapped_dmas, TRUE); -+ if (!ctxt->SwapDmas) -+ goto error; -+ -+ /* -+ * "slop" is defined as follows : -+ * number of entries REQUIRED to be left spare to consume all other traps -+ * up until the time that the context can be swapped out. -+ * -+ * CommandTrapQ : 1 command issued by main + 1 issued by the thread processor per elan -+ * ThreadTrapQ : 2 from command + 2 input -+ * DmaTrapQ : 2 from command + 2 input -+ * EventTrapQ : 2 from command + 1 thread + 1 dma + 2 input + E3_NonSysCntxQueueSize -+ */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ELAN3_QUEUE_INIT (ctxt->CommandTrapQ, ntrapped_commands, 2); -+ ELAN3_QUEUE_INIT (ctxt->ThreadTrapQ, ntrapped_threads, 4); -+ ELAN3_QUEUE_INIT (ctxt->DmaTrapQ, ntrapped_dmas, 4); -+ ELAN3_QUEUE_INIT (ctxt->EventCookieQ, ntrapped_events, MIN(E3_NonSysCntxQueueSize + 6, ntrapped_events - 6)); -+ ELAN3_QUEUE_INIT (ctxt->CommandQ, noverflow_commands, 0); -+ ELAN3_QUEUE_INIT (ctxt->SwapThreadQ, nswapped_threads, 0); -+ ELAN3_QUEUE_INIT (ctxt->SwapDmaQ, nswapped_dmas, 0); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+#if defined(DIGITAL_UNIX) -+ /* Allocate the segelan for the command port */ -+ if (! kernel && elan3_segelan3_create (ctxt) == NULL) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ return ((ELAN3_CTXT *) NULL); -+ } -+#endif -+ -+ /* -+ * Initialise the Input Fault list -+ */ -+ spin_lock (&ctxt->InputFaultLock); -+ for (i = 0; i < NUM_INPUT_FAULT_SAVE; i++) -+ ctxt->InputFaults[i].Next = (i == (NUM_INPUT_FAULT_SAVE-1)) ? NULL : &ctxt->InputFaults[i+1]; -+ ctxt->InputFaultList = &ctxt->InputFaults[0]; -+ spin_unlock (&ctxt->InputFaultLock); -+ -+ ReserveHaltOperations (dev, NUM_HALTOPS, TRUE); -+ -+ if ((ctxt->RouteTable = AllocateRouteTable (ctxt->Device, ELAN3_MAX_VPS)) == NULL) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_FN, "elan3_alloc: cannot map route table\n"); -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ return ((ELAN3_CTXT *) NULL); -+ } -+ -+ return (ctxt); -+ -+ -+ error: -+ -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ if (ctxt->FlagPage) -+ KMEM_FREEPAGES ((void *) ctxt->FlagPage, 1); -+ if (ctxt->CommandTraps) -+ KMEM_FREE ((void *) ctxt->CommandTraps, sizeof (COMMAND_TRAP) * ntrapped_commands); -+ if (ctxt->ThreadTraps) -+ KMEM_FREE ((void *) ctxt->ThreadTraps, sizeof (THREAD_TRAP) * ntrapped_threads); -+ if (ctxt->DmaTraps) -+ KMEM_FREE ((void *) ctxt->DmaTraps, sizeof (DMA_TRAP) * ntrapped_dmas); -+ if (ctxt->EventCookies) -+ KMEM_FREE ((void *) ctxt->EventCookies, sizeof (EVENT_COOKIE) * ntrapped_events); -+ if (ctxt->Commands) -+ KMEM_FREE ((void *) ctxt->Commands, sizeof (CProcTrapBuf_BE) * noverflow_commands); -+ if (ctxt->SwapThreads) -+ KMEM_FREE ((void *) ctxt->SwapThreads, sizeof (E3_Addr) * nswapped_threads); -+ if (ctxt->SwapDmas) -+ KMEM_FREE ((void *) ctxt->SwapDmas, sizeof (E3_DMA_BE) * nswapped_dmas); -+ -+ kcondvar_destroy (&ctxt->Wait); -+ kcondvar_destroy (&ctxt->CommandPortWait); -+ kcondvar_destroy (&ctxt->LwpWait); -+ kcondvar_destroy (&ctxt->HaltWait); -+ -+ kmutex_destroy (&ctxt->SwapListsLock); -+ kmutex_destroy (&ctxt->CmdLock); -+ kmutex_destroy (&ctxt->NetworkErrorLock); -+ spin_lock_destroy (&ctxt->InputFaultLock); -+ -+ krwlock_destroy (&ctxt->VpLock); -+ -+ KMEM_FREE (ctxt, sizeof (ELAN3_CTXT)); -+ -+ return (NULL); -+} -+ -+void -+elan3_free (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ NETERR_FIXUP *nef; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_free: %p \n", ctxt); -+ -+ elan3_removevp (ctxt, ELAN3_INVALID_PROCESS); /* Remove any virtual process mappings */ -+ -+#if defined(DIGITAL_UNIX) -+ WaitForContext (ctxt); /* wait for all references to this context to go away */ -+#endif -+ -+ if (ctxt->RouteTable) -+ FreeRouteTable (dev, ctxt->RouteTable); -+ ctxt->RouteTable = NULL; -+ -+ elan3mmu_free (ctxt->Elan3mmu); /* free of our Elan3mmu */ -+ -+ if (ctxt->Private) /* Call back to "user" to free off */ -+ ELAN3_OP_FREE_PRIVATE (ctxt); /* private data */ -+ -+#if defined(DIGITAL_UNIX) -+ if (! CTXT_IS_KERNEL(ctxt)) -+ elan3_segelan3_destroy (ctxt); /* Unmap the command port from the users address space. */ -+#endif -+ -+ ReleaseHaltOperations (dev, NUM_HALTOPS); -+ -+ if (ctxt->Input0Resolver) -+ CancelNetworkErrorResolver (ctxt->Input0Resolver); -+ -+ if (ctxt->Input1Resolver) -+ CancelNetworkErrorResolver (ctxt->Input1Resolver); -+ -+ while ((nef = ctxt->NetworkErrorFixups) != NULL) -+ { -+ ctxt->NetworkErrorFixups = nef->Next; -+ -+ CompleteNetworkErrorFixup (ctxt, nef, ESRCH); -+ } -+ -+ KMEM_FREEPAGES ((void *) ctxt->FlagPage, 1); -+ -+ KMEM_FREE ((void *) ctxt->CommandTraps, sizeof (COMMAND_TRAP) * ntrapped_commands); -+ KMEM_FREE ((void *) ctxt->ThreadTraps, sizeof (THREAD_TRAP) * ntrapped_threads); -+ KMEM_FREE ((void *) ctxt->DmaTraps, sizeof (DMA_TRAP) * ntrapped_dmas); -+ KMEM_FREE ((void *) ctxt->EventCookies, sizeof (EVENT_COOKIE) * ntrapped_events); -+ KMEM_FREE ((void *) ctxt->Commands, sizeof (CProcTrapBuf_BE) * noverflow_commands); -+ KMEM_FREE ((void *) ctxt->SwapThreads, sizeof (E3_Addr) * nswapped_threads); -+ KMEM_FREE ((void *) ctxt->SwapDmas, sizeof (E3_DMA_BE) * nswapped_dmas); -+ -+ kcondvar_destroy (&ctxt->Wait); -+ kcondvar_destroy (&ctxt->CommandPortWait); -+ kcondvar_destroy (&ctxt->LwpWait); -+ kcondvar_destroy (&ctxt->HaltWait); -+ -+ kmutex_destroy (&ctxt->SwapListsLock); -+ kmutex_destroy (&ctxt->CmdLock); -+ kmutex_destroy (&ctxt->NetworkErrorLock); -+ spin_lock_destroy (&ctxt->InputFaultLock); -+ -+ krwlock_destroy (&ctxt->VpLock); -+ -+ KMEM_FREE (ctxt, sizeof (ELAN3_CTXT)); -+} -+ -+int -+elan3_doattach(ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ unsigned long pgnum = ((cap->cap_mycontext & MAX_ROOT_CONTEXT_MASK) * sizeof (E3_CommandPort)) / PAGE_SIZE; -+ unsigned long pgoff = ((cap->cap_mycontext & MAX_ROOT_CONTEXT_MASK) * sizeof (E3_CommandPort)) & (PAGE_SIZE-1); -+ ELAN3_DEV *dev = ctxt->Device; -+ int res = ESUCCESS; -+ unsigned long flags; -+ -+ /* Map in the command port for this context */ -+ if (MapDeviceRegister (dev, ELAN3_BAR_COMMAND_PORT, &ctxt->CommandPage, pgnum * PAGE_SIZE, PAGE_SIZE, &ctxt->CommandPageHandle) != ESUCCESS) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_doattach: MapDeviceRegister failed"); -+ return (EINVAL); -+ } -+ -+ ctxt->CommandPort = ctxt->CommandPage + pgoff; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ res = 0; -+ if (ELAN3_DEV_CTX_TABLE(dev,cap->cap_mycontext) != NULL) -+ res = EBUSY; -+ else -+ { -+ if ((res = elan3mmu_attach (ctxt->Device, cap->cap_mycontext, ctxt->Elan3mmu, -+ ctxt->RouteTable->Table, ctxt->RouteTable->Size-1)) == 0) -+ { -+ ELAN3_DEV_CTX_TABLE(dev,cap->cap_mycontext) = ctxt; -+ ctxt->Capability = *cap; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (res == ESUCCESS) -+ elan3_swapin (ctxt, CTXT_DETACHED); -+ else -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ ctxt->CommandPort = (ioaddr_t) 0; -+ } -+ -+ return (res); -+} -+ -+void -+elan3_destroy_callback( void * args, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ if (map == NULL) -+ { -+ /* the cap is being destroyed */ -+ PRINTF0 (NULL, DBG_VP, "elan3_destroy_callback: the cap is being destroyed \n"); -+ } -+ else -+ { -+ /* the map is being destroyed */ -+ PRINTF0 (NULL, DBG_VP, "elan3_destroy_callback: the map is being destroyed \n"); -+ } -+} -+ -+int -+elan3_attach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int type; -+ int res; -+ -+ switch (type = elan3_validate_cap (dev, cap, ELAN_USER_ATTACH)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ((res = elan_attach_cap(cap, dev->Devinfo.dev_rail, ctxt, elan3_destroy_callback)) != 0) -+ return res; -+ break; -+ -+ default: -+ return (EINVAL); -+ } -+ -+ if (((res = elan3_doattach(ctxt,cap)) != ESUCCESS) && (type == ELAN_CAP_RMS)) -+ elan_detach_cap(cap, dev->Devinfo.dev_rail); -+ -+ return res; -+} -+ -+void -+elan3_detach ( ELAN3_CTXT *ctxt ) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int need_to_call_elanmod_detach = 0; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_detach: %p \n", ctxt ); -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_detach: context not attached \n"); -+ return ; -+ } -+ -+ /* must you be in the ctx_table ?? */ -+ -+ switch (ctxt->Capability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ if (ELAN3_SYSTEM_CONTEXT (ctxt->Capability.cap_mycontext)) -+ return ; -+ -+ if (! (ctxt->Capability.cap_type & ELAN_CAP_TYPE_HWTEST)) -+ need_to_call_elanmod_detach = 1; -+ -+ break; -+ } -+ default: -+ return ; -+ } -+ -+ elan3_swapout (ctxt, CTXT_DETACHED); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3mmu_detach (dev, ctxt->Capability.cap_mycontext); -+ ELAN3_DEV_CTX_TABLE(dev,ctxt->Capability.cap_mycontext) = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (ctxt->CommandPage) -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ } -+ -+ if (need_to_call_elanmod_detach) -+ elan_detach_cap(&ctxt->Capability, dev->Devinfo.dev_rail); -+ -+ elan_nullcap (&ctxt->Capability); -+ -+} -+ -+void -+elan3_dodetach ( ELAN3_CTXT *ctxt ) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_FN, "elan3_dodetach: %p \n", ctxt ); -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (ctxt, DBG_FN, "elan3_dodetach: context not attached \n"); -+ return ; -+ } -+ -+ elan3_swapout (ctxt, CTXT_DETACHED); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3mmu_detach (dev, ctxt->Capability.cap_mycontext); -+ ELAN3_DEV_CTX_TABLE(dev,ctxt->Capability.cap_mycontext) = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (ctxt->CommandPage) -+ { -+ UnmapDeviceRegister (dev, &ctxt->CommandPageHandle); -+ ctxt->CommandPage = (ioaddr_t) 0; -+ } -+ -+ elan_nullcap (&ctxt->Capability); -+} -+ -+void -+elan3_swapin (ELAN3_CTXT *ctxt, int reason) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (ctxt->Status & CTXT_SWAPPED_REASONS); -+ -+ PRINTF3 (ctxt, DBG_SWAP, "elan3_swapin: status %x State %s reason %x\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState], reason); -+ -+ while (ctxt->Status & CTXT_SWAPPING_OUT) /* In transition */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); -+ -+ if (reason == CTXT_NO_LWPS && ctxt->LwpCount++ != 0) /* Added another LWP */ -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ if ((ctxt->Status & ~reason) & CTXT_SWAPPED_REASONS) -+ ctxt->Status &= ~reason; -+ else -+ { -+ ASSERT (ctxt->Status & CTXT_SWAPPED_OUT); -+ ASSERT (ctxt->OthersState == CTXT_OTHERS_SWAPPED); -+ -+ /* -+ * Will not be swapped out anymore, so ask the "user" to perform -+ * any swapping in he needs before letting the context run again. -+ */ -+ -+ ctxt->Status &= ~(CTXT_SWAPPED_OUT | CTXT_QUEUES_EMPTY | reason); -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_OK && ctxt->Input1Trap.State == CTXT_STATE_OK) -+ SetInputterStateForContext (ctxt, 0, NULL); -+ -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ } -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapin: all done - status %x state %s\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+ -+void -+elan3_swapout (ELAN3_CTXT *ctxt, int reason) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int cansleep; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ PRINTF3 (ctxt, DBG_SWAP, "elan3_swapout: status %x state %s reason %x\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState], reason); -+ -+ if (reason == CTXT_NO_LWPS) -+ { -+ if (--ctxt->LwpCount != 0) /* Still other LWPs running */ -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); /* Wakeup anyone waiting on LwpCount */ -+ } -+ -+ ctxt->Status |= reason; -+ -+ while (ctxt->Status & CTXT_SWAPPING_OUT) /* wait for someone else to finish swapping */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); /* out */ -+ -+ if (ctxt->Status & CTXT_SWAPPED_OUT) -+ { -+ if (reason == CTXT_NO_LWPS) /* Wakeup other thread waiting on LWP exit */ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ /* -+ * mark the context as swapping out. -+ */ -+ ctxt->Status |= CTXT_SWAPPING_OUT; -+ -+ if (reason != CTXT_FIXUP_NETERR) -+ { -+ /* -+ * Stop all of the lwps. -+ */ -+ while (ctxt->LwpCount) -+ { -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); /* Wake up any lwps */ -+ kcondvar_wait (&ctxt->LwpWait, &dev->IntrLock, &flags); /* then wait for them to enter elan3_swapout */ -+ } -+ } -+ -+ StartSwapoutContext (ctxt, 0, NULL); -+ for (;;) -+ { -+ PRINTF0 (ctxt, DBG_SWAP, "elan3_swapout: HandleExceptions\n"); -+ -+ cansleep = (HandleExceptions(ctxt, &flags) == ESUCCESS); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapout: OthersState=%d cansleep=%d\n", ctxt->OthersState, cansleep); -+ -+ if (ctxt->OthersState == CTXT_OTHERS_SWAPPED) -+ break; -+ -+ if (cansleep) -+ kcondvar_wait (&ctxt->Wait, &dev->IntrLock, &flags); -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "elan3_swapout: swapped out\n"); -+ -+ ASSERT (ELAN3_QUEUE_EMPTY (ctxt->DmaTrapQ)); -+ ASSERT (ELAN3_QUEUE_EMPTY (ctxt->ThreadTrapQ)); -+ -+ ctxt->Status |= CTXT_SWAPPED_OUT; -+ ctxt->Status &= ~CTXT_SWAPPING_OUT; -+ -+ kcondvar_wakeupall (&ctxt->LwpWait, &dev->IntrLock); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "elan3_swapout: all done - status %x state %s\n", -+ ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+elan3_pagefault (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSave, int npages) -+{ -+ E3_Addr elanAddr = FaultSave->s.FaultAddress; -+ int writeable; -+ int res; -+ -+ PRINTF3 (ctxt, DBG_FAULT, "elan3_pagefault: elanAddr %08x FSR %08x : %s\n", elanAddr, FaultSave->s.FSR.Status, -+ FaultSave->s.FSR.s.ProtFault ? "protection fault" : "pte invalid"); -+ -+ /* Look at the FSR to determine the fault type etc */ -+ -+ if (FaultSave->s.FSR.Status == 0) /* this is a target abort/parity error, so look */ -+ { /* at the PCI config space registers to determine */ -+ ElanBusError (ctxt->Device); -+ return (EFAULT); -+ } -+ -+ if (FaultSave->s.FSR.s.AlignmentErr) /* Alignment errors are always fatal. */ -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: Alignment error\n"); -+ return (EFAULT); -+ } -+ -+ if (FaultSave->s.FSR.s.WalkBadData) /* Memory ECC error during a walk */ -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: Memory ECC error during walk\n"); -+ return (EFAULT); -+ } -+ -+ if (!FaultSave->s.FSR.s.ProtFault && /* DMA memory type changed */ -+ !FaultSave->s.FSR.s.Walking) -+ { -+ PRINTF0 (ctxt, DBG_FAULT, "elan3_pagefault: DMA memory type changed\n"); -+ return (EFAULT); -+ } -+ -+ ASSERT (FaultSave->s.FSR.s.ProtFault ? /* protection errors, should always have a valid pte */ -+ (!FaultSave->s.FSR.s.Walking || !(FaultSave->s.FSR.s.Level==3) || FaultSave->s.FSR.s.FaultPte == ELAN3_ET_PTE) : -+ FaultSave->s.FSR.s.FaultPte == ELAN3_ET_INVALID); /* otherwise it must be an invalid pte */ -+ -+ /* -+ * Determine whether to fault for a 'write' from the access permissions we need, and not -+ * from the access type (WrAcc). -+ */ -+ writeable = (FaultSave->s.FSR.s.AccTypePerm & (1 << FSR_WritePermBit)); -+ -+ /* Check that we have the right permissions for this access type. */ -+ if ((res = elan3mmu_checkperm (ctxt->Elan3mmu, (elanAddr&PAGEMASK), npages*PAGESIZE, FaultSave->s.FSR.s.AccTypePerm)) != 0) -+ { -+ PRINTF1 (ctxt, DBG_FAULT, "elan3_pagefault: %s\n", (res == ENOMEM) ? "no protection mapping" : "protection error"); -+ -+ return (res); -+ } -+ -+ res = LoadElanTranslation (ctxt, (elanAddr&PAGEMASK), npages*PAGESIZE, FaultSave->s.FSR.s.ProtFault, writeable); -+ -+ if (res == ESUCCESS) -+ { -+ BumpStat (ctxt->Device, PageFaults); -+ BumpUserStat (ctxt, PageFaults); -+ } -+ -+ PRINTF1 (ctxt, DBG_FAULT, "elan3_pagefault: -> %d\n", res); -+ -+ return (res); -+} -+ -+void -+elan3_block_inputter (ELAN3_CTXT *ctxt, int block) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (block) -+ ctxt->Status |= CTXT_USER_FILTERING; -+ else -+ ctxt->Status &= ~CTXT_USER_FILTERING; -+ -+ if (ctxt->Capability.cap_mycontext != ELAN_CAP_UNINITIALISED) -+ SetInputterStateForContext (ctxt, 0, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+FixupNetworkErrors (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ NETERR_FIXUP *nef; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ctxt->NetworkErrorFixups == NULL) -+ return (ESUCCESS); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ kmutex_lock (&ctxt->NetworkErrorLock); /* single thread while fixing up errors */ -+ elan3_swapout (ctxt, CTXT_FIXUP_NETERR); -+ -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ while ((nef = ctxt->NetworkErrorFixups) != NULL) -+ { -+ ctxt->NetworkErrorFixups = nef->Next; -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ if (ELAN3_OP_FIXUP_NETWORK_ERROR (ctxt, nef) == OP_FAILED) -+ CompleteNetworkErrorFixup (ctxt, nef, EINVAL); -+ -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ elan3_swapin (ctxt, CTXT_FIXUP_NETERR); -+ -+ kmutex_unlock (&ctxt->NetworkErrorLock); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+} -+ -+int -+CompleteNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER *rvp) -+{ -+ int state; -+ -+ switch (rvp->Status) -+ { -+ case ESUCCESS: -+ /* -+ * the item still existed at the source - if it's a wait for EOP transaction -+ * then the source will retry - otherwise the remote event will have been -+ * cleared and we should execute it -+ */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: ESUCCESS zero WaitForEopTransaction %p\n", trap->WaitForEopTransaction); -+ -+ state = trap->WaitForEopTransaction ? CTXT_STATE_OK : CTXT_STATE_NEEDS_RESTART; -+ -+ break; -+ -+ case ESRCH: -+ /* -+ * the item was not found at the source - we should always execute the transaction -+ * since it will never be resent -+ */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: ESRCH execute WaitForEopTransaction %p\n", trap->WaitForEopTransaction); -+ state = CTXT_STATE_NEEDS_RESTART; -+ break; -+ -+ default: /* other errors */ -+ PRINTF1 (ctxt, DBG_NETERR, "CompleteNetworkErrorResolver: %d\n", rvp->Status); -+ if (ElanException (ctxt, EXCEPTION_NETWORK_ERROR, INPUT_PROC, trap, &rvp) == OP_HANDLED) -+ state = CTXT_STATE_NEEDS_RESTART; -+ else -+ state = CTXT_STATE_OK; -+ break; -+ } -+ -+ FreeNetworkErrorResolver (rvp); -+ -+ return (state); -+} -+ -+int -+HandleExceptions (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ THREAD_TRAP tproc; -+ DMA_TRAP dproc; -+ NETERR_RESOLVER *rvp; -+ int state; -+ -+ if (ctxt->Status & CTXT_COMMAND_OVERFLOW_ERROR) -+ { -+ ctxt->Status &= ~CTXT_COMMAND_OVERFLOW_ERROR; -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ElanException (ctxt, EXCEPTION_COMMAND_OVERFLOW, COMMAND_PROC, NULL); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (! ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ)) -+ { -+ /* XXXX: unmap translations to the command port */ -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveCProcTrap (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_TRAPPED) -+ { -+ ctxt->Input0Trap.State = CTXT_STATE_RESOLVING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveIProcTrap (ctxt, &ctxt->Input0Trap, &ctxt->Input0Resolver); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input1Trap.State == CTXT_STATE_TRAPPED) -+ { -+ ctxt->Input1Trap.State = CTXT_STATE_RESOLVING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveIProcTrap (ctxt, &ctxt->Input1Trap, &ctxt->Input1Resolver); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if ((rvp = ctxt->Input0Resolver) != NULL && rvp->Completed) -+ { -+ ASSERT (ctxt->Input0Trap.State == CTXT_STATE_NETWORK_ERROR); -+ -+ ctxt->Input0Resolver = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ state = CompleteNetworkErrorResolver (ctxt, &ctxt->Input0Trap, rvp); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ ctxt->Input0Trap.State = state; -+ return (EAGAIN); -+ } -+ -+ if ((rvp = ctxt->Input1Resolver) != NULL && rvp->Completed) -+ { -+ ASSERT (ctxt->Input1Trap.State == CTXT_STATE_NETWORK_ERROR); -+ -+ ctxt->Input1Resolver = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ state = CompleteNetworkErrorResolver (ctxt,&ctxt->Input1Trap, rvp); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ ctxt->Input1Trap.State = state; -+ return (EAGAIN); -+ } -+ -+ if (NextTProcTrap (ctxt, &tproc)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveTProcTrap (ctxt, &tproc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_THREAD_QUEUE_FULL; -+ -+ if (NextDProcTrap (ctxt, &dproc)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ResolveDProcTrap (ctxt, &dproc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_DMA_QUEUE_FULL; -+ -+ /* Handle all event interrupts. */ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->EventCookieQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->EventCookieQ)) -+ { -+ E3_uint32 cookie = *ELAN3_QUEUE_FRONT (ctxt->EventCookieQ, ctxt->EventCookies); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->EventCookieQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ if (ELAN3_OP_EVENT (ctxt, cookie, OP_LWP) != OP_DEFER) -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ else -+ { -+ spin_lock_irqsave (&dev->IntrLock, *flags); /* place the cookie back on the queue. */ -+ /* note we place it on the front to ensure */ -+ ELAN3_QUEUE_ADD_FRONT (ctxt->EventCookieQ); /* event ordering. */ -+ *ELAN3_QUEUE_FRONT (ctxt->EventCookieQ, ctxt->EventCookies) = cookie; -+ } -+ } -+ return (EAGAIN); -+ } -+ ctxt->Status &= ~CTXT_EVENT_QUEUE_FULL; -+ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->SwapDmaQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->SwapDmaQ)) -+ { -+ E3_DMA_BE DmaDesc = *ELAN3_QUEUE_FRONT (ctxt->SwapDmaQ, ctxt->SwapDmas); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->SwapDmaQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartDmaDesc (ctxt, &DmaDesc); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ return (EAGAIN); -+ } -+ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->SwapThreadQ)) -+ { -+ while (! ELAN3_QUEUE_EMPTY (ctxt->SwapThreadQ)) -+ { -+ E3_Addr StackPointer = *ELAN3_QUEUE_FRONT (ctxt->SwapThreadQ, ctxt->SwapThreads); -+ -+ ELAN3_QUEUE_REMOVE (ctxt->SwapThreadQ); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ ReissueStackPointer (ctxt, StackPointer); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ return (EAGAIN); -+ } -+ -+ switch (ctxt->OthersState) -+ { -+ case CTXT_OTHERS_SWAPPING: -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ else -+ ctxt->OthersState = CTXT_OTHERS_SWAPPED; -+ -+ PRINTF1 (ctxt, DBG_LWP, "HandleExceptions: OthersState : swapping -> %s\n", OthersStateStrings[ctxt->OthersState]); -+ -+ break; -+ -+ case CTXT_OTHERS_SWAPPING_MORE: -+ ctxt->OthersState = CTXT_OTHERS_HALTING_MORE; -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted | INT_TProcHalted, HaltSwapContext, ctxt); -+ -+ PRINTF1 (ctxt, DBG_LWP, "HandleExceptions: OthersState : swapping_more -> %s\n", OthersStateStrings[ctxt->OthersState]); -+ break; -+ } -+ return (ESUCCESS); -+} -+ -+int -+RestartContext (ELAN3_CTXT *ctxt, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ PRINTF1 (ctxt, DBG_LWP, "RestartContext: status %x\n", ctxt->Status); -+ -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ if (! ELAN3_QUEUE_FRONT_EMPTY (ctxt->CommandTrapQ) || ! ELAN3_QUEUE_EMPTY(ctxt->CommandQ)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartCProcTrap (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input0Trap.State == CTXT_STATE_NEEDS_RESTART) -+ { -+ ctxt->Input0Trap.State = CTXT_STATE_EXECUTING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ res = RestartIProcTrap (ctxt, &ctxt->Input0Trap); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ -+ if (res == ESUCCESS) -+ ctxt->Input0Trap.State = CTXT_STATE_OK; -+ else -+ ctxt->Input0Trap.State = CTXT_STATE_NEEDS_RESTART; -+ return (EAGAIN); -+ } -+ -+ if (ctxt->Input1Trap.State == CTXT_STATE_NEEDS_RESTART) -+ { -+ ctxt->Input1Trap.State = CTXT_STATE_EXECUTING; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ res = RestartIProcTrap (ctxt, &ctxt->Input1Trap); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ -+ if (res == ESUCCESS) -+ ctxt->Input1Trap.State = CTXT_STATE_OK; -+ else -+ ctxt->Input1Trap.State = CTXT_STATE_NEEDS_RESTART; -+ return (EAGAIN); -+ } -+ -+ if (SetEventsNeedRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartSetEvents (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ SetInputterStateForContext (ctxt, 0, NULL); -+ -+ if (TProcNeedsRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ -+ LoadCommandPortTranslation (ctxt); -+ RestartTProcItems (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (DProcNeedsRestart (ctxt)) -+ { -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ RestartDProcItems (ctxt); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ return (EAGAIN); -+ } -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) -+ { -+ PRINTF1 (ctxt, DBG_LWP, "RestartContext: setting Command Flag at %p to 0\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 0; -+ -+ if (ctxt->Status & CTXT_WAITING_COMMAND) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "RestartContext: waking up threads waiting for commandport\n"); -+ -+ ctxt->Status &= ~CTXT_WAITING_COMMAND; -+ -+ kcondvar_wakeupall (&ctxt->CommandPortWait, &dev->IntrLock); -+ } -+ } -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+HaltSwapContext (ELAN3_DEV *dev, void *arg) -+{ -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) arg; -+ int SysCntx = (ctxt->Capability.cap_mycontext & SYS_CONTEXT_BIT); -+ E3_ThreadQueue_BE thread; -+ E3_DMA_BE dma; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ u_int *runCount; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (ctxt->OthersState == CTXT_OTHERS_HALTING || ctxt->OthersState == CTXT_OTHERS_HALTING_MORE); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "HaltSwapContext: status %x state %s\n", ctxt->Status, OthersStateStrings[ctxt->OthersState]); -+ -+ if (! (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING_MORE) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if (--(*runCount) == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ ctxt->OthersState = CTXT_OTHERS_RUNNING; -+ -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: no more reason to swap -> others_running\n"); -+ -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ -+ /* -+ * Capture all other processors since we're not being responsive to -+ * the command processor interrupt. -+ */ -+ CAPTURE_CPUS(); -+ -+ if (SysCntx) -+ { -+ FPtr = read_reg32 (dev, TProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, TProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[E3_SysCntxQueueSize-1]); -+ } -+ else -+ { -+ FPtr = read_reg32 (dev, TProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, TProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[E3_NonSysCntxQueueSize-1]); -+ } -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, (void *) &thread, sizeof (E3_ThreadQueue_BE)); -+ -+ if (thread.s.Context == ctxt->Capability.cap_mycontext) -+ { -+ if (ELAN3_QUEUE_FULL (ctxt->SwapThreadQ)) -+ break; -+ -+ *ELAN3_QUEUE_BACK(ctxt->SwapThreadQ, ctxt->SwapThreads) = thread.s.Thread; -+ ELAN3_QUEUE_ADD (ctxt->SwapThreadQ); -+ -+ /* -+ * Remove this entry from the queue by replacing it with -+ * the "magic" thread value. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ thread.s.Context = SysCntx ? SYS_CONTEXT_BIT : 0; -+ thread.s.Thread = VanishingStackPointer; -+ -+ elan3_sdram_copyq_to_sdram (dev, (void *) &thread, FPtr, sizeof (E3_ThreadQueue_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_ThreadQueue); -+ } -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ if (SysCntx) -+ { -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ } -+ else -+ { -+ FPtr = read_reg32 (dev, DProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[E3_NonSysCntxQueueSize-1]); -+ } -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ if (dma.s.dma_u.s.Context == ctxt->Capability.cap_mycontext) -+ { -+ if (ELAN3_QUEUE_FULL (ctxt->SwapDmaQ)) -+ break; -+ -+ *ELAN3_QUEUE_BACK (ctxt->SwapDmaQ, ctxt->SwapDmas) = dma; -+ ELAN3_QUEUE_ADD (ctxt->SwapDmaQ); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = ((SysCntx ? SYS_CONTEXT_BIT : 0) << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = (E3_Addr) 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = (E3_Addr) 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ /* -+ * Release the other processors now before signalling the LWP. -+ */ -+ RELEASE_CPUS(); -+ -+ if (! ELAN3_QUEUE_FULL (ctxt->SwapDmaQ) && !ELAN3_QUEUE_FULL (ctxt->SwapThreadQ)) -+ { -+ /* -+ * We've compleletly emptied the elan queues of items in this -+ * context, so we now mark it as fully swapped out. -+ */ -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING_MORE) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if (--(*runCount) == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: queues emptied -> others_swapping\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING; -+ kcondvar_wakeupall (&ctxt->Wait, &dev->IntrLock); -+ } -+ else -+ { -+ if (ctxt->OthersState == CTXT_OTHERS_HALTING) -+ { -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if ((*runCount)++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ PRINTF0 (ctxt, DBG_SWAP, "HaltSwapContext: queues not emptied -> others_swapping_more\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING_MORE; -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+UnloadCommandPageMapping (ELAN3_CTXT *ctxt) -+{ -+ /* -+ * Unload the Elan translations, and flag the main processor to stall after -+ * issueing its next command. -+ */ -+ if (ctxt->CommandPageMapping != NULL && (ctxt->Status & CTXT_COMMAND_MAPPED_ELAN)) -+ { -+ ELAN3MMU_RGN *rgn = elan3mmu_rgnat_main (ctxt->Elan3mmu, ctxt->CommandPageMapping); -+ -+ if (rgn != NULL) -+ { -+ E3_Addr eaddr = rgn->rgn_ebase + (ctxt->CommandPageMapping - rgn->rgn_mbase); -+ -+ PRINTF1 (ctxt, DBG_INTR, "UnloadCommandPageMapping: unmapping command port at addr %08x\n", eaddr); -+ -+ elan3mmu_unload (ctxt->Elan3mmu, eaddr, PAGESIZE, PTE_UNLOAD); -+ } -+ -+ ctxt->Status &= ~CTXT_COMMAND_MAPPED_ELAN; -+ } -+} -+ -+void -+StartSwapoutContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int SysCntx = (ctxt->Capability.cap_mycontext & SYS_CONTEXT_BIT); -+ u_int *runCount; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ PRINTF2 (ctxt, DBG_SWAP, "StartSwapoutContext: Status %x OthersState %s\n", -+ ctxt->Status, OthersStateStrings [ctxt->OthersState]); -+ /* -+ * Disable the inputters, we should already have a reason for it. -+ */ -+ SetInputterStateForContext (ctxt, Pend, Maskp); -+ -+ UnloadCommandPageMapping (ctxt); -+ -+ /* -+ * Flag main processor to stall after issueing next command -+ */ -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: setting Command Flag at %p to 1\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 1; -+ -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: OthersState=%d\n", ctxt->OthersState); -+ -+ /* -+ * And queue a haltop to stop the queues and clear it out. -+ */ -+ switch (ctxt->OthersState) -+ { -+ case CTXT_OTHERS_RUNNING: -+ PRINTF0 (ctxt, DBG_SWAP, "StartSwapoutContext: -> others_halting\n"); -+ -+ ctxt->OthersState = CTXT_OTHERS_HALTING; -+ -+ QueueHaltOperation (dev, Pend, Maskp, INT_DProcHalted | INT_TProcHalted, HaltSwapContext, ctxt); -+ break; -+ -+ case CTXT_OTHERS_SWAPPING: -+ PRINTF0 (ctxt, DBG_SWAP, "StartSwapoutContext: -> others_swapping_more\n"); -+ ctxt->OthersState = CTXT_OTHERS_SWAPPING_MORE; -+ -+ runCount = SysCntx ? &dev->HaltAllCount : &dev->HaltNonContext0Count; -+ -+ if ((*runCount)++ == 0) -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ break; -+ default: -+ PRINTF1 (ctxt, DBG_SWAP, "StartSwapoutContext: OthersState=%d\n", ctxt->OthersState); -+ break; -+ } -+} -+ -+#if defined(DIGITAL_UNIX) -+/* temporary tweaks to priority bump */ -+int lwp_do_prio = 1; -+int lwp_do_nxm = 1; -+int lwp_prio = BASEPRI_USER-1; -+#elif defined(LINUX) -+/* This is the default nice level for the helper LWP */ -+int LwpNice = -1; -+#endif -+ -+int -+elan3_lwp (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_LWP, "elan3_lwp: started, context 0x%x\n", ctxt->Capability.cap_mycontext); -+ -+#if defined(DIGITAL_UNIX) -+ { -+ thread_t mythread = current_thread(); -+ if (lwp_do_prio && (lwp_do_nxm || !IS_NXM_TASK(mythread->task))) -+ { -+ mythread->priority = mythread->sched_pri = lwp_prio; -+ mythread->max_priority = BASEPRI_HIGHEST; -+ (void) thread_priority(mythread, lwp_prio, 0, 1); -+ } -+ } -+#elif defined(LINUX) -+ { -+ /* Do the priority trick for the helper LWP so that it -+ * runs in preferance to the user threads which may be -+ * burning CPU waiting for a trap to be fixed up -+ */ -+#ifdef NO_O1_SCHED -+ if (LwpNice >= -20 && LwpNice < 20) -+ current->nice = LwpNice; -+#else -+ set_user_nice(current, LwpNice); -+#endif -+ } -+#endif -+ -+ elan3_swapin (ctxt, CTXT_NO_LWPS); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ /* If we're swapped out, and not detached (or exiting) then wait until we're swapped back in */ -+ /* since otherwise we could "spin" forever continually calling elan3_lwp() */ -+ if ((ctxt->Status & CTXT_SWAPPED_REASONS) && ! (ctxt->Status & (CTXT_DETACHED|CTXT_EXITING))) -+ kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags); -+ -+ for (;;) -+ { -+#if defined(DIGITAL_UNIX) -+ if (thread_should_halt(current_thread()) || -+ CURSIG_CHECK(task_to_proc(current_thread()->task), u.np_uthread)) -+ { -+ PRINTF1 (ctxt, DBG_LWP, "elan3_lwp: exiting on %s\n", -+ thread_should_halt(current_thread()) ? "halt" : "signal"); -+ break; -+ } -+#endif -+ -+ if (ctxt->Status & CTXT_SWAPPED_REASONS) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting on swapped reasons\n"); -+ break; -+ } -+ -+ if (! (ctxt->inhibit)) -+ { -+ if (FixupNetworkErrors (ctxt, &flags) == ESUCCESS && -+ HandleExceptions (ctxt, &flags) == ESUCCESS && -+ RestartContext (ctxt, &flags) == ESUCCESS) -+ { -+ if (kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags) == 0) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting by kcondvar_wait_sig()\n"); -+ break; -+ } -+ } -+ } -+ else -+ { -+ printk("elan3_lwp :: skipping as inhibited\n"); -+ if (kcondvar_waitsig (&ctxt->Wait, &dev->IntrLock, &flags) == 0) -+ { -+ PRINTF0 (ctxt, DBG_LWP, "elan3_lwp: exiting by kcondvar_wait_sig()\n"); -+ break; -+ } -+ } -+ -+ } -+ -+ /* Return EINVAL to elan3_syscall_lwp() when we want it to exit */ -+ res = (ctxt->Status & (CTXT_DETACHED|CTXT_EXITING)) ? EINVAL : 0; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ elan3_swapout (ctxt, CTXT_NO_LWPS); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ FixupNetworkErrors (ctxt, &flags); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+SetInputterStateForContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ ELAN3_DEV *dev = NULL; -+ int new_disabled = 0; -+ int ctxnum; -+ -+ ASSERT (ctxt != NULL); -+ dev = ctxt->Device; -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ new_disabled = (ctxt->Input0Trap.State != CTXT_STATE_OK || -+ ctxt->Input1Trap.State != CTXT_STATE_OK || -+ (ctxt->Status & CTXT_INPUTTER_REASONS) != 0); -+ -+ -+ ctxnum = ctxt->Capability.cap_mycontext; -+ -+#ifndef __lock_lint -+ PRINTF2 (ctxt , DBG_IPROC, "SetInputterState: ctxnum %x %s attached\n", ctxnum, ctxt->Disabled ? "disabled " : ""); -+#endif /* __lock_lint */ -+ -+ if (ctxt->Disabled != new_disabled) -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "SetInputterState: ctxnum %x change %s\n", ctxnum, new_disabled ? "enabled to disabled" : "disabled to enabled"); -+ -+ ctxt->Disabled = new_disabled; -+ -+ /* synchronize the context filter for this context */ -+ elan3mmu_set_context_filter (dev, ctxnum, new_disabled, Pend, Maskp); -+ } -+} -+ -+int -+CheckCommandQueueFlushed (ELAN3_CTXT *ctxt, E3_uint32 cflags, int how, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int delay = 1; -+ int i, SeenComQueueEmpty; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ ASSERT (cflags != DmaComQueueNotEmpty || dev->HaltDmaDequeueCount != 0); -+ -+ /* -+ * Flush the command processor queues and poll the queue to see it it empties. -+ */ -+ if (dev->FlushCommandCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ /* -+ * Ensure previous writes have been flushed through the write buffers -+ */ -+ wmb(); mmiob(); -+ -+ /* -+ * If the command processor traps, or it's taking too long to observe -+ * the queue as emtpy, then we need to force the interrupt handler to -+ * run for us. So queue a halt operation for the dma processor. -+ */ -+ SeenComQueueEmpty = !(read_reg32 (dev, ComQueueStatus) & cflags); -+ for (i = 20; i > 0 || (how & ISSUE_COMMAND_CANT_WAIT); i--) -+ { -+ if (SeenComQueueEmpty || (read_reg32 (dev, Exts.InterruptReg) & (INT_CProc | INT_ComQueue))) -+ break; -+ -+ mb(); -+ DELAY (delay); -+ -+ if ((delay <<= 1) == 0) delay = 1; -+ -+ SeenComQueueEmpty = !(read_reg32 (dev, ComQueueStatus) & cflags); -+ } -+ -+ if (--dev->FlushCommandCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ /* -+ * If we've seen the command queue that we're interested in with nothing in it -+ * and the command processor has not trapped then the commands we've -+ * issued have been successfully processed. -+ */ -+ if (SeenComQueueEmpty && ! (read_reg32 (dev, Exts.InterruptReg) & (INT_CProc | INT_ComQueue))) -+ { -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: observed dma queue empty and command proc not trapped\n"); -+ -+ if (cflags == DmaComQueueNotEmpty && --dev->HaltDmaDequeueCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ return (ISSUE_COMMAND_OK); -+ } -+ -+ if ((how & ISSUE_COMMAND_CANT_WAIT) != 0) -+ return (ISSUE_COMMAND_WAIT); -+ -+ /* -+ * Halt the dma processor and wait for it to halt, if the command we've issued has -+ * trapped then the interrupt handler will have moved it to the context structure. -+ */ -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: waiting for dproc to halt\n"); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted, WakeupLwp, ctxt); -+ while (! ctxt->Halted) -+ { -+ PRINTF1 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: waiting for Halted - %d\n", ctxt->Halted); -+ -+ kcondvar_wait (&ctxt->HaltWait, &dev->IntrLock, flags); -+ -+ PRINTF1 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: woken for Halted - %d\n", ctxt->Halted); -+ } -+ ctxt->Halted = 0; -+ -+ PRINTF0 (ctxt, DBG_CMD, "CheckCommandQueueFlushed: dproc halted, checking for trap\n"); -+ -+ if (cflags == DmaComQueueNotEmpty && --dev->HaltDmaDequeueCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ return (ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ) ? ISSUE_COMMAND_OK : ISSUE_COMMAND_TRAPPED); -+} -+ -+int -+WaitForCommandPort (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (ctxt->Status & CTXT_DETACHED) -+ res = EINVAL; -+ else -+ { -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ ctxt->Status |= CTXT_WAITING_COMMAND; -+ if (CTXT_IS_KERNEL(ctxt)) -+ kcondvar_wait (&ctxt->CommandPortWait, &dev->IntrLock, &flags); -+ else -+ kcondvar_waitsig (&ctxt->CommandPortWait, &dev->IntrLock, &flags); -+ } -+ -+ res = (!ELAN3_QUEUE_EMPTY(ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) ? EAGAIN : 0; -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+static char * -+CommandName (int offset) -+{ -+ switch (offset) -+ { -+ case offsetof (E3_CommandPort, PutDma): return ("PutDma"); -+ case offsetof (E3_CommandPort, GetDma): return ("GetDma"); -+ case offsetof (E3_CommandPort, RunThread): return ("RunThread"); -+ case offsetof (E3_CommandPort, WaitEvent0): return ("WaitEvent0"); -+ case offsetof (E3_CommandPort, WaitEvent1): return ("WaitEvent1"); -+ case offsetof (E3_CommandPort, SetEvent): return ("SetEvent"); -+ default: return ("Bad Command"); -+ } -+} -+ -+int -+IssueCommand (ELAN3_CTXT *ctxt, unsigned cmdoff, E3_Addr value, int cflags) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((! (cflags & ISSUE_COMMAND_FOR_CPROC) && !ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ /* -+ * Cannot issue commands for non-cproc traps if command port is trapped, -+ * nor if the dma/thread trap queues are full, or we're swapping out -+ */ -+ PRINTF2 (ctxt, DBG_CMD, "IssueCommand: %s %08x -> ISSUE_COMMAND_RETRY\n", -+ CommandName (cmdoff), value); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueCommand: %s %08x -> ISSUE_COMMAND_OK\n", -+ CommandName (cmdoff), value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, ctxt->CommandPort + cmdoff); /* issue command */ -+ mmiob(); /* and flush through IO writes */ -+ -+ res = ISSUE_COMMAND_OK; -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+int -+IssueDmaCommand (ELAN3_CTXT *ctxt, E3_Addr value, void *item, int how) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ /* -+ * Since we may be issuing a command that could trap, and we're interested in -+ * the outcome, the command port trap resolving code must be locked out. -+ */ -+ kmutex_lock (&ctxt->CmdLock); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((! (how & ISSUE_COMMAND_FOR_CPROC) && !ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ)) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p] -> ISSUE_COMMAND_RETRY\n", value, item); -+ -+ /* -+ * Cannot issue commands for non-cproc traps if command port is trapped, -+ * nor if the dma/thread trap queues are full, or we're swapping out -+ */ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ -+ ASSERT (item == NULL || ctxt->CommandPortItem == NULL); -+ -+ /* -+ * Stop the DMA processor from removing entries from the -+ * command port, and force the command processor to do this. -+ * This means that if a trap occurs then it will be the command -+ * processor that traps. -+ */ -+ if (dev->HaltDmaDequeueCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p]\n", value, item); -+ -+ /* -+ * Always issue the DMA to the 'write' command, since we've asserted HaltDmaDequeue -+ * the command processor will read the descriptor and transfer it to the run queue. -+ * The command processor looks at the dma_direction field to determine whether it is -+ * a read or a write and whether to alter the dma_souce of the descriptr on the run -+ * queue -+ */ -+ mb(); /* ensure writes to main memory ccompleted */ -+ writel (value, ctxt->CommandPort + offsetof (E3_CommandPort, PutDma)); -+ mmiob(); /* and flush through IO writes */ -+ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, how, &flags); -+ -+ if (res == ISSUE_COMMAND_TRAPPED) -+ { -+ PRINTF2 (ctxt, DBG_CMD, "IssueDmaCommand: PutDma %08x [%p] -> ISSUE_COMMAND_TRAPPED\n", value, item); -+ /* -+ * Remember the item we're issueing so that if the command port traps the item will not -+ * get freed off until the descriptor has been read after the command trap has been fixed -+ * up. -+ */ -+ if (item != NULL) -+ ctxt->CommandPortItem = item; -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+ -+ return (res); -+} -+ -+int -+WaitForDmaCommand (ELAN3_CTXT *ctxt, void *item, int how) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, how, &flags); -+ -+ if (res == ISSUE_COMMAND_TRAPPED && item != NULL) -+ ctxt->CommandPortItem = item; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+FixupEventTrap (ELAN3_CTXT *ctxt, int proc, void *trap, E3_uint32 TrapType, E3_FaultSave_BE *FaultSaveArea, int flags) -+{ -+ ASSERT (! CTXT_IS_KERNEL (ctxt)); -+ -+ /* -+ * This code re-issues the part of the set event that trapped. -+ */ -+ switch (TrapType) -+ { -+ case MI_ChainedEventError: -+ ElanException (ctxt, EXCEPTION_CHAINED_EVENT, proc, trap, FaultSaveArea->s.EventAddress); -+ break; -+ -+ -+ case MI_SetEventReadWait: -+ /* -+ * Fault occured on the read for the event location. Just re-issue -+ * setevent using EventAddress in E3_FaultSave -+ */ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_SetEventReadWait: re-issuing setevent %08x\n", -+ FaultSaveArea->s.EventAddress); -+ -+ ReissueEvent (ctxt, (E3_Addr) FaultSaveArea->s.EventAddress, flags); -+ break; -+ -+ case MI_DoSetEvent: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * perform the set. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check that the event has the block copy bit -+ * set in it, since we couldn't trap here if it -+ * didn't -+ */ -+ if ((EventType & EV_TYPE_BCOPY) != EV_TYPE_BCOPY) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_DoSetEvent: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_DoSetEvent: RunEventType %x\n", EventType); -+ -+ if (RunEventType (ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ -+ break; -+ } -+ -+ case MI_ThreadUpdateNonSysCntxBack: -+ case MI_ThreadUpdateSysCntxBack: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the thread. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_THREAD|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_THREAD)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_ThreadUpdateCntx0Back: Unexpected type=%x for setevent trap. Should be thread\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_ThreadUpdateCntx0Back: RunEventType %x\n", EventType); -+ if (RunEventType (ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_EventIntUpdateBPtr: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the dma. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_EVIRQ)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_EventIntUpdateBPtr: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_EventIntUpdateBPtr: RunEventType %x\n", EventType); -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_RunDmaDesc: -+ { -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Must grab the event type, source and dest then simulate the block copy and then -+ * run the dma. Once the block copy is started the event location cannot be read -+ * again. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ /* -+ * Check for the correct EventPtr type -+ */ -+ if ((EventType & (EV_TYPE_MASK_DMA|EV_TYPE_MASK_BCOPY)) != (EV_TYPE_BCOPY | EV_TYPE_DMA)) -+ { -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_RunDmaDesc: Unexpected type=%x\n", EventType); -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_RunDmaDesc: RunEventType %x\n", EventType); -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_WaitForCntxDmaDescRead: -+ case MI_WaitForNonCntxDmaDescRead: -+ /* -+ * Fault occured on the read of the dma descriptor. Run dma using the -+ * Fault Address in FaultSave. -+ */ -+ PRINTF1 (ctxt, DBG_EVENT, "FixupEventTrap: MI_WaitForCntxDmaDescRead: re-issue dma at %08x\n", FaultSaveArea->s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, FaultSaveArea->s.FaultAddress); -+ break; -+ -+ case MI_FinishedSetEvent: -+ /* -+ * Fault occured because the block write of a block copy event trapped. -+ * Simulate the block copy. -+ */ -+ if (SimulateBlockCopy (ctxt, FaultSaveArea->s.EventAddress)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ -+ case MI_BlockCopyEvent: -+ case MI_BlockCopyWaitForReadData: -+ { -+ /* -+ * Fault occured on the read or write of the data for a block copy -+ * event. Simulate the block copy using EventAddress in E3_FaultSave. Must also sample -+ * the event type and then perform a run. -+ */ -+ E3_Event *EventPtr = (E3_Event *) elan3mmu_mainaddr (ctxt->Elan3mmu, FaultSaveArea->s.EventAddress); -+ E3_uint32 EventType = fuword (&EventPtr->ev_Type); -+ -+ PRINTF0 (ctxt, DBG_EVENT, "FixupEventTrap: MI_BlockCopyWaitForReadData: BCopy read fault in BCopy event. Simulating BCopy.\n"); -+ -+ if (RunEventType(ctxt, FaultSaveArea, EventType)) -+ ElanException (ctxt, EXCEPTION_BAD_EVENT, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+ -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ /* XXXX: should handle queue overflow */ -+ PRINTF0 (ctxt, DBG_EVENT, "FixupEventTrap: Queue overflow\n"); -+ -+ ElanException (ctxt, EXCEPTION_QUEUE_OVERFLOW, proc, trap, FaultSaveArea, TrapType); -+ break; -+ -+ default: -+ ElanException (ctxt, EXCEPTION_BUS_ERROR, proc, trap, FaultSaveArea, TrapType); -+ break; -+ } -+} -+ -+int -+SimulateBlockCopy (ELAN3_CTXT *ctxt, E3_Addr EventAddress) -+{ -+ E3_Addr SourcePtrElan; -+ E3_Addr DestPtrElan; -+ unsigned DataType; -+ int i; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ElanException (ctxt, EXCEPTION_FAULTED, EVENT_PROC, NULL, EventAddress); -+ return (TRUE); -+ } -+ -+ SourcePtrElan = ELAN3_OP_LOAD32 (ctxt, EventAddress + offsetof (E3_BlockCopyEvent, ev_Source)); -+ DestPtrElan = ELAN3_OP_LOAD32 (ctxt, EventAddress + offsetof (E3_BlockCopyEvent, ev_Dest)); -+ DataType = DestPtrElan & EV_BCOPY_DTYPE_MASK; -+ DestPtrElan &= ~EV_BCOPY_DTYPE_MASK; -+ -+ -+ PRINTF3 (ctxt, DBG_EVENT, "SimulateBlockCopy: Event %08x SourcePtr %08x DestPtr %08x\n", -+ EventAddress, SourcePtrElan, DestPtrElan); -+ -+ if (SourcePtrElan & EV_WCOPY) -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan, SourcePtrElan); -+ else -+ { -+ /* -+ * NOTE: since the block copy could be to sdram, we issue the writes backwards, -+ * except we MUST ensure that the last item in the block is written last. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+ /* -+ * For little endian cpu's we don't need to worry about the data type. -+ */ -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+#else -+ switch (DataType) -+ { -+ case EV_TYPE_BCOPY_BYTE: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint8)); i >= 0; i -= sizeof (E3_uint8)) -+ ELAN3_OP_STORE8 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD8 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint8); -+ ELAN3_OP_STORE8 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD8 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_HWORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint16)); i >= 0; i -= sizeof (E3_uint16)) -+ ELAN3_OP_STORE16 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD16 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint16); -+ ELAN3_OP_STORE16 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD16 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_WORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint32)); i >= 0; i -= sizeof (E3_uint32)) -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD32 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint32); -+ ELAN3_OP_STORE32 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD32 (ctxt, SourcePtrElan + i)); -+ break; -+ -+ case EV_TYPE_BCOPY_DWORD: -+ for (i = E3_BLK_SIZE-(2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ -+ i = E3_BLK_SIZE - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, DestPtrElan + i, ELAN3_OP_LOAD64 (ctxt, SourcePtrElan + i)); -+ break; -+ } -+#endif -+ } -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ return (FALSE); -+} -+ -+void -+ReissueEvent (ELAN3_CTXT *ctxt, E3_Addr addr, int flags) -+{ -+ PRINTF1 (ctxt, DBG_CMD, "ReissueEvent : Event=%08x\n", addr); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), addr, flags) == ISSUE_COMMAND_RETRY) -+ { -+ PRINTF1 (ctxt, DBG_CMD, "ReissueEvent: queue event %08x\n", addr); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ctxt->ItemCount[LIST_SETEVENT]++; -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_SETEVENT, addr); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ } -+} -+ -+int -+SetEventsNeedRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_SETEVENT] != 0); -+} -+ -+void -+RestartSetEvents (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_uint32 EventPointer; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ -+ while (ctxt->ItemCount[LIST_SETEVENT]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_SETEVENT, &item, &EventPointer)) -+ ctxt->ItemCount[LIST_SETEVENT] = 0; -+ else -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), EventPointer, FALSE) == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_SETEVENT, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_SETEVENT]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+int -+RunEventType(ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSaveArea, E3_uint32 EventType) -+{ -+ int failed = FALSE; -+ -+ if ((EventType & EV_TYPE_BCOPY) != 0) -+ failed = SimulateBlockCopy(ctxt, FaultSaveArea->s.EventAddress); -+ -+ if ((EventType & EV_TYPE_MASK) == EV_TYPE_THREAD) -+ ReissueStackPointer (ctxt, EventType & ~(EV_TYPE_MASK_THREAD|EV_TYPE_MASK_BCOPY)); -+ else if ((EventType & EV_TYPE_MASK) == EV_TYPE_DMA) -+ RestartDmaPtr (ctxt, EventType & ~(EV_TYPE_MASK_DMA|EV_TYPE_MASK_BCOPY)); -+ else if ((EventType & EV_TYPE_EVIRQ) != 0) -+ QueueEventInterrupt (ctxt, EventType & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)); -+ else /* Chained event */ -+ { -+ if ((EventType & ~EV_TYPE_BCOPY) != 0) /* not null setevent */ -+ ReissueEvent (ctxt, EventType & ~(EV_TYPE_MASK_CHAIN|EV_TYPE_MASK_BCOPY), FALSE); -+ } -+ -+ return (failed); -+} -+ -+void -+WakeupLwp (ELAN3_DEV *dev, void *arg) -+{ -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) arg; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_INTR, "WakeupLwp: %d\n", SPINLOCK_HELD (&dev->IntrLock)); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ctxt->Halted = 1; -+ kcondvar_wakeupone (&ctxt->HaltWait, &dev->IntrLock); -+ -+ PRINTF0 (ctxt, DBG_INTR, "WakeupLwp: woken up context\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+QueueEventInterrupt (ELAN3_CTXT *ctxt, E3_uint32 cookie) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF1 (ctxt, DBG_EVENT, "QueueEventInterrupt: cookie %08x\n", cookie); -+ -+ if (ELAN3_OP_EVENT (ctxt, cookie, OP_INTR) == OP_DEFER) -+ { -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->EventCookieQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, 0, NULL); -+ } -+ else -+ { -+ *(ELAN3_QUEUE_BACK (ctxt->EventCookieQ, ctxt->EventCookies)) = cookie; -+ -+ ELAN3_QUEUE_ADD (ctxt->EventCookieQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ if (ELAN3_QUEUE_FULL (ctxt->EventCookieQ)) -+ { -+ ctxt->Status |= CTXT_EVENT_QUEUE_FULL; -+ StartSwapoutContext (ctxt, 0, NULL); -+ } -+ } -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ } -+} -+ -+int -+ElanException (ELAN3_CTXT *ctxt, int type, int proc, void *trap, ...) -+{ -+ int res; -+ va_list ap; -+ -+ va_start (ap, trap); -+ -+ PRINTF2 (ctxt, DBG_FN, "ElanException: proc %d type %d\n", proc, type); -+ -+ res = ELAN3_OP_EXCEPTION (ctxt, type, proc, trap, ap); -+ -+ va_end (ap); -+ -+ return (res); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/context_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/context_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/context_linux.c 2005-07-28 14:52:52.787688008 -0400 -@@ -0,0 +1,229 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: context_linux.c,v 1.28.2.3 2005/03/02 13:45:27 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/context_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int -+LoadElanTranslation (ELAN3_CTXT *ctxt, E3_Addr addr, int len, int protFault, int writeable) -+{ -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3MMU_RGN *rgn; -+ caddr_t mainAddr; -+ int perm; -+ unsigned int off; -+ unsigned long flags; -+ -+ ASSERT (PAGE_ALIGNED (addr) && PAGE_ALIGNED (len)); -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: addr %08x len %08x%s%s\n", -+ addr, len, protFault ? " prot fault" : "", writeable ? " writeable" : ""); -+ -+ /* Ensure there's enough elan mmu tables for us to use */ -+ elan3mmu_expand (elan3mmu, addr, len, PTBL_LEVEL_3, 0); -+ -+ while (len > 0) -+ { -+ /* -+ * Retrieve permission region and calculate main address -+ */ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ rgn = elan3mmu_rgnat_elan (elan3mmu, addr); -+ if (rgn == NULL) { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: no permission region at %lx %p\n", -+ (u_long) addr, rgn); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (EFAULT); -+ } -+ mainAddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ -+ ASSERT (PAGE_ALIGNED ((unsigned long)mainAddr)); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ /* -+ * If we're tying to load a translation to the elan command port, -+ * then don't do it now, but mark the context to have it reloaded -+ * just before we restart any threads. We do this because we don't -+ * want to call into the segment driver since we could then block -+ * waiting for the command port to become available. -+ */ -+ if (mainAddr == ctxt->CommandPageMapping) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: addr=%08x maps command port\n", addr); -+ -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ UnloadCommandPageMapping (ctxt); -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ } -+ else -+ { -+ struct vm_area_struct *area; -+ struct mm_struct *mm = current->mm; -+ pte_t *ptep_ptr; -+ pte_t ptep_value; -+ -+ down_read (¤t->mm->mmap_sem); -+ -+ if ((area = find_vma_intersection(mm, (unsigned long)mainAddr, (unsigned long)mainAddr + PAGESIZE)) == NULL) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p no vma\n", mainAddr); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ if (writeable && !(area->vm_flags & VM_WRITE)) -+ { -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p not writeable\n", mainAddr); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ spin_lock (&mm->page_table_lock); -+ -+ /* dont deference the pointer after the unmap */ -+ ptep_ptr = find_pte_map (mm, (unsigned long)mainAddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadElanTranslation: %p %s %s\n", -+ mainAddr, writeable ? "writeable" : "readonly", -+ !ptep_ptr ? "invalid" : pte_none(ptep_value) ? "none " : !pte_present(ptep_value) ? "swapped " : -+ writeable && !pte_write(ptep_value) ? "COW" : "OK"); -+ -+ if (!ptep_ptr || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value))) -+ { -+ spin_unlock (&mm->page_table_lock); -+ -+ get_user_pages (current, current->mm, (unsigned long) mainAddr, PAGE_SIZE, -+ (area->vm_flags & VM_WRITE), 0, NULL, NULL); -+ -+ spin_lock (&mm->page_table_lock); -+ -+ /* dont deference the pointer after the unmap */ -+ ptep_ptr = find_pte_map (mm, (unsigned long)mainAddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ if (!ptep_ptr || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value))) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ } -+ -+ /* don't allow user write access to kernel pages if not kernel */ -+ if (!pte_read(ptep_value)) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ return EFAULT; -+ } -+ -+ if (writeable) -+ pte_mkdirty(ptep_value); -+ pte_mkyoung (ptep_value); -+ -+ /* now load the elan pte */ -+ if (writeable) -+ perm = rgn->rgn_perm; -+ else -+ perm = ELAN3_PERM_READONLY(rgn->rgn_perm & ELAN3_PTE_PERM_MASK) | (rgn->rgn_perm & ~ELAN3_PTE_PERM_MASK); -+ -+ for (off = 0; off < PAGE_SIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, addr + off, pte_phys(ptep_value) + off, perm, PTE_LOAD | PTE_NO_SLEEP); -+ -+ spin_unlock (&mm->page_table_lock); -+ up_read (¤t->mm->mmap_sem); -+ } -+ -+ len -= PAGESIZE; -+ addr += PAGESIZE; -+ } -+ return (ESUCCESS); -+} -+ -+ -+/* -+ * LoadCommandPortTranslation: -+ * explicitly load an elan translation to the command port. -+ * but only do it if the command port is accessible. -+ * -+ * we call this function just after we have restarted -+ * and trapped commands, since when a command traps -+ * the elan translation to the command port is unloaded. -+ */ -+void -+LoadCommandPortTranslation (ELAN3_CTXT *ctxt) -+{ -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3MMU_RGN *rgn; -+ E3_Addr addr; -+ int perm; -+ physaddr_t phys; -+ unsigned int off; -+ unsigned long flags; -+ -+ PRINTF (ctxt, DBG_FAULT, "LoadCommandPortTranslation: SegAddr=%p Status=%x\n", ctxt->CommandPageMapping, ctxt->Status); -+ -+ if (ctxt->CommandPageMapping != NULL && !(ctxt->Status & CTXT_COMMAND_MAPPED_ELAN)) -+ { -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ rgn = elan3mmu_rgnat_main (elan3mmu, ctxt->CommandPageMapping); -+ if (rgn == (ELAN3MMU_RGN *) NULL) -+ { -+ PRINTF(ctxt, DBG_FAULT, "LoadCommandPortTranslation: no permission for command port\n"); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return; -+ } -+ -+ addr = rgn->rgn_ebase + (ctxt->CommandPageMapping - rgn->rgn_mbase); -+ perm = rgn->rgn_perm; -+ phys = kmem_to_phys((caddr_t) ctxt->CommandPage); -+ -+ spin_lock_irqsave (&ctxt->Device->IntrLock, flags); -+ if (ELAN3_QUEUE_EMPTY(ctxt->CommandTrapQ) && !(ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF(ctxt, DBG_FAULT, "LoadCommandPortTranslation: load xlation addr=%08x phys=%llx perm=%d\n", -+ addr, (unsigned long long)phys, perm); -+ -+ ctxt->Status |= CTXT_COMMAND_MAPPED_ELAN; -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, addr + off, phys + off, perm, PTE_LOAD | PTE_NO_SLEEP); -+ } -+ spin_unlock_irqrestore (&ctxt->Device->IntrLock, flags); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/cproc.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/cproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/cproc.c 2005-07-28 14:52:52.788687856 -0400 -@@ -0,0 +1,539 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cproc.c,v 1.46 2004/02/10 15:05:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/cproc.c,v $ */ -+ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+void -+HandleCProcTrap (ELAN3_DEV *dev, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ E3_FaultSave_BE FaultSave; -+ CProcTrapBuf_BE TrapBuf; -+ COMMAND_TRAP *trap; -+ ELAN3_CTXT *ctxt; -+ sdramaddr_t CurrTrap; -+ sdramaddr_t LastTrapAddr; -+ int NTrapEntries; -+ int NewPend; -+ unsigned long flags; -+ -+ /* -+ * Temporarily mask out the command processor interrupt, since -+ * we may cause it be re-asserted when we re-issue the commands -+ * from the overflow queue area. -+ */ -+ DISABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ NewPend = read_reg32 (dev, Exts.InterruptReg); -+ -+ do { -+ if (NewPend & INT_ComQueue) -+ { -+ if ((read_reg32 (dev, ComQueueStatus) & ComQueueError) != 0) -+ { -+ printk ("elan%d: InterruptReg=%x ComQueueStatus=%x\n", dev->Instance, -+ read_reg32 (dev, Exts.InterruptReg), read_reg32 (dev, ComQueueStatus)); -+ panic ("elan: command queue has overflowed !!"); -+ /* NOTREACHED */ -+ } -+ -+ BumpStat (dev, ComQueueHalfFull); -+ -+ /* -+ * Capture the other cpus and stop the threads processor then -+ * allow the command processor to eagerly flush the command queue. -+ */ -+ dev->FlushCommandCount++; dev->HaltThreadCount++; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ CAPTURE_CPUS(); -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ mb(); -+ -+ /* -+ * Let the threads processor run again, and release the cross call. -+ */ -+ RELEASE_CPUS(); -+ -+ dev->FlushCommandCount--; dev->HaltThreadCount--; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ /* -+ * Re-sample the interrupt register to see if the command processor -+ * has trapped while flushing the queue. Preserve the INT_ComQueue -+ * bit, so we can clear the ComQueueStatus register later. -+ */ -+ NewPend = (read_reg32 (dev, Exts.InterruptReg) | INT_ComQueue); -+ } -+ -+ CurrTrap = dev->CommandPortTraps[dev->CurrentCommandPortTrap]; -+ -+ if (NewPend & INT_CProc) -+ { -+ BumpStat (dev, CProcTraps); -+ -+ /* -+ * Copy the MMU Fault Save area and zero it out for future traps. -+ */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), &FaultSave, sizeof (E3_FaultSave)); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), sizeof (E3_FaultSave)); -+ -+ /* -+ * First entry in the cproc trap save area is the value of Areg and Breg for the -+ * uWord before the address fault. -+ */ -+ TrapBuf.Align64 = elan3_sdram_readq (dev, CurrTrap); CurrTrap += sizeof (TrapBuf.Align64); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, (TrapBuf.r.Breg >> 16)); -+ if (ctxt == NULL) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: context invalid [%08x.%08x]\n", TrapBuf.r.Areg, TrapBuf.r.Breg); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandTrapQ)) -+ { -+ if ((ctxt->Status & CTXT_COMMAND_OVERFLOW_ERROR) == 0) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, Maskp); -+ } -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->CommandTrapQ, ctxt->CommandTraps); -+ -+ trap->FaultSave = FaultSave; -+ trap->Status.Status = read_reg32 (dev, Exts.CProcStatus.Status); -+ trap->TrapBuf = TrapBuf; -+ -+ /* -+ * The command processor does not stop after it has trapped. It will continue -+ * to save commands for other contexts into the commands port save area. -+ * The valid context for the trap is held in FaultSave. As some of this -+ * trap code uses the context in the status register the local copy must be -+ * updated with the trap context. -+ */ -+ trap->Status.s.Context = (TrapBuf.r.Breg >> 16); -+ -+ PRINTF4 (ctxt, DBG_INTR, "HandleCProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName(trap->Status.s.TrapType)); -+ PRINTF2 (ctxt, DBG_INTR, "HandleCProcTrap: Areg=%08x Breg=%08x\n", -+ trap->TrapBuf.r.Areg, trap->TrapBuf.r.Breg); -+ -+ if (ELAN3_OP_CPROC_TRAP (ctxt, trap) == OP_DEFER) -+ { -+ ELAN3_QUEUE_ADD (ctxt->CommandTrapQ); -+ -+ PRINTF1 (ctxt, DBG_INTR, "HandleCProcTrap: setting Command Flag at %p to 1\n", &ctxt->FlagPage->CommandFlag); -+ -+ ctxt->FlagPage->CommandFlag = 1; -+ -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ } -+ } -+ -+ UnloadCommandPageMapping (ctxt); -+ } -+ } -+ -+ /* -+ * Now change the CommandPortTrap queue. -+ * Must stop the command processor, wait for it to stop, find the final -+ * entry in the current cproc trap save area, reset the comm port -+ * trap save address to the other queue, clear the command port interrupt and -+ * set it running normally again, and then let it go again. This is not very -+ * time critical but it would be a good idea to prevent a higher priority -+ * interrupt from slowing down the process to prevent to fifos filling. -+ */ -+ spin_lock_irqsave (&dev->CProcLock, flags); -+ -+ SET_SCHED_STATUS (dev, CProcStop); -+ -+ while ((read_reg32 (dev, Exts.SchCntReg) & CProcStopped) == 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: waiting for command processor to stop\n"); -+ mb(); -+ } -+ -+ /* -+ * Remember how many entries are in the saved command queue, and -+ * re-initialise it, before restarting the command processor. -+ */ -+ NTrapEntries = (read_reg32 (dev, CProc_TrapSave_Addr) - dev->CommandPortTraps[dev->CurrentCommandPortTrap])/sizeof (E3_uint64); -+ LastTrapAddr = dev->CommandPortTraps[dev->CurrentCommandPortTrap] + NTrapEntries*sizeof (TrapBuf); -+ -+ dev->CurrentCommandPortTrap ^= 1; -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[dev->CurrentCommandPortTrap]); -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: command trap queue has %d entries\n", NTrapEntries); -+ -+ if (NTrapEntries > ELAN3_COMMAND_TRAP_SIZE/sizeof (E3_uint64)) -+ panic ("HandleCProcTrap: command trap queue has overflowed\n"); -+ -+ if (NewPend & INT_CProc) -+ { -+ /* -+ * Clear the CProc interrupt and set it running normally again. Nothing should -+ * be running now that could issue commands apart from this trap handler. -+ */ -+ PULSE_SCHED_STATUS (dev, RestartCProc); -+ } -+ -+ if (NewPend & INT_ComQueue) -+ { -+ /* -+ * Write any value here to clear out the half full and error bits of the command -+ * overflow queues. This will also remove the overflow interrupt. -+ */ -+ write_reg32 (dev, ComQueueStatus, 0); -+ } -+ -+ /* -+ * And let the command processor start again -+ */ -+ CLEAR_SCHED_STATUS (dev, CProcStop); -+ -+ /* -+ * Now re-issue all the commands that were issued after the command port trapped. -+ * Should halt the dma processor and force command sto be put onto the run queues -+ * to ensure that a remote re-issued command is handled correctly. NOTE it is -+ * not necessary to wait for the dma processor to stop and this will reduce the -+ * performance impact. As CProcHalt is asserted all commands will be flushed -+ * to the queues. -+ */ -+ dev->HaltDmaDequeueCount++; dev->FlushCommandCount++; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ /* -+ * XXXX: should we do a capture/release if the trap overflow -+ * area has a "large" number of commands in it, since -+ * we will just stuff them all back in, together with -+ * all those issued by the other cpus/thread processors. -+ */ -+ while (CurrTrap != LastTrapAddr) -+ { -+ /* Read the next saved (but not trapped) command */ -+ TrapBuf.Align64 = elan3_sdram_readq (dev, CurrTrap); CurrTrap += sizeof (TrapBuf); -+ -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, (TrapBuf.s.ContextType >> 16)); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: context %x invalid\n", TrapBuf.s.ContextType >> 16); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (!ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF3 (ctxt, DBG_INTR, "HandleCProcTrap: save command %x context %x - %08x\n", -+ (TrapBuf.s.ContextType>>3) & 0x3ff, TrapBuf.s.ContextType >> 17, TrapBuf.s.Addr); -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, Maskp); -+ } -+ else -+ { -+ *ELAN3_QUEUE_BACK(ctxt->CommandQ, ctxt->Commands) = TrapBuf; -+ -+ ELAN3_QUEUE_ADD (ctxt->CommandQ); -+ } -+ continue; -+ } -+ -+ /* Reissue the command to the command port for this context */ -+ PRINTF2 (ctxt, DBG_INTR, "HandleCProcTrap: re-issue command %x - %08x\n", -+ (TrapBuf.s.ContextType>>5) & 0xff, TrapBuf.s.Addr); -+ -+ mb(); -+ if (ELAN3_OP_CPROC_REISSUE(ctxt, &TrapBuf) != OP_HANDLED) -+ ((E3_uint32 *) ctxt->CommandPort)[(TrapBuf.s.ContextType>>5) & 0xff] = TrapBuf.s.Addr; -+ mmiob(); -+ } -+ } -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandleCProcTrap: waiting for queues to empty after reissueing commands\n"); -+ mb(); -+ } -+ -+ dev->HaltDmaDequeueCount--; dev->FlushCommandCount--; -+ SetSchedStatusRegister (dev, Pend, Maskp); -+ -+ spin_unlock_irqrestore (&dev->CProcLock, flags); -+ -+ /* -+ * Re-read the interrupt register and see if we've got another command -+ * port interrupt -+ */ -+ NewPend = read_reg32 (dev, Exts.InterruptReg); -+ } while ((NewPend & (INT_CProc | INT_ComQueue)) != 0); -+ -+ -+ /* -+ * Re-enable the command processor interrupt as we've finished -+ * polling it. -+ */ -+ ENABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+} -+ -+void -+ResolveCProcTrap (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ COMMAND_TRAP *trap; -+ int res; -+ unsigned long flags; -+ -+ kmutex_lock (&ctxt->CmdLock); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ while (! ELAN3_QUEUE_BACK_EMPTY (ctxt->CommandTrapQ)) -+ { -+ trap = ELAN3_QUEUE_MIDDLE(ctxt->CommandTrapQ, ctxt->CommandTraps); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_EventIntUpdateBPtr: -+ case MI_ChainedEventError: -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ PRINTF1 (ctxt, DBG_CPROC, "ResolveCProcTrap: %s\n", MiToName (trap->Status.s.TrapType)); -+ break; -+ -+ default: -+ /* All other traps are MMU related, we should have a fault address and FSR */ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_CPROC, "ResolveCProcTrap: elan3_pagefault failed for address %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, COMMAND_PROC, trap, &trap->FaultSave, res); -+ -+ /* Set the trap type to 0 so the command does not get re-issued */ -+ trap->Status.s.TrapType = 0; -+ } -+ break; -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ELAN3_QUEUE_CONSUME (ctxt->CommandTrapQ); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&ctxt->CmdLock); -+} -+ -+int -+RestartCProcTrap (ELAN3_CTXT *ctxt) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ COMMAND_TRAP trap; -+ void *item; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ while (! ELAN3_QUEUE_FRONT_EMPTY (ctxt->CommandTrapQ)) -+ { -+ trap = (*ELAN3_QUEUE_FRONT (ctxt->CommandTrapQ, ctxt->CommandTraps)); -+ ELAN3_QUEUE_REMOVE (ctxt->CommandTrapQ); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ BumpUserStat (ctxt, CProcTraps); -+ -+ switch (trap.Status.s.TrapType) -+ { -+ case 0: -+ res = ISSUE_COMMAND_OK; -+ break; -+ -+ case MI_WaitForWaitEventDesc: -+ /* -+ * Fault occured on the read of wait event descriptor for wait event type 0. -+ * Fault already fixed. Just re-issue the wait command. Wait event descriptor addr -+ * is in the Areg save value. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type0 desc read fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent0), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForEventReadTy0: -+ /* -+ * Fault occured on the read of event location for wait event type 0. -+ * Fault already fixed. Just re-issue the wait command. Wait event descriptor addr -+ * is in the Areg save value. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type0 event loc fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent0), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForEventReadTy1: -+ /* -+ * Fault occured on the read of the event location for wait event type 1. -+ * Areg has the original ptr and count. -+ * Fault already fixed. Just re-issue the wait command using Areg and context. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: WaitEvent type1 event location read fault %08x\n", -+ trap.TrapBuf.r.Areg); -+ res = IssueCommand (ctxt, offsetof (E3_CommandPort, WaitEvent1), trap.TrapBuf.r.Areg, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ case MI_WaitForCntxDmaDescRead: -+ case MI_WaitForNonCntxDmaDescRead: -+ /* -+ * Fault occured on the read of the dma descriptor. Run dma using the -+ * Fault Address in FaultSave. -+ */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: MI_WaitForCntxDmaDescRead: re-issue dma at %08x\n", -+ trap.FaultSave.s.FaultAddress); -+ -+ res = IssueDmaCommand (ctxt, trap.FaultSave.s.FaultAddress, NULL, ISSUE_COMMAND_FOR_CPROC); -+ break; -+ -+ default: -+ /* -+ * Assume the fault will be fixed by FixupEventTrap. -+ */ -+ FixupEventTrap (ctxt, COMMAND_PROC, &trap, trap.Status.s.TrapType, &trap.FaultSave, ISSUE_COMMAND_FOR_CPROC); -+ -+ res = ISSUE_COMMAND_OK; -+ break; -+ } -+ -+ switch (res) -+ { -+ case ISSUE_COMMAND_OK: /* command re-issued ok*/ -+ break; -+ -+ case ISSUE_COMMAND_TRAPPED: /* command trapped, it will have been copied */ -+ return (EAGAIN); /* to the back of the trap queue */ -+ -+ case ISSUE_COMMAND_RETRY: /* didn't issue command, so place back at front for */ -+ spin_lock_irqsave (&dev->IntrLock, flags); /* later (after resolving other traps */ -+ -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->CommandTrapQ)) -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ else -+ { -+ ELAN3_QUEUE_ADD_FRONT(ctxt->CommandTrapQ); -+ (*ELAN3_QUEUE_FRONT (ctxt->CommandTrapQ, ctxt->CommandTraps)) = trap; -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ -+ default: -+ return (EINVAL); -+ } -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ } -+ -+ /* -+ * GNAT 5409 - if CommandPortItem was not NULL, but other reasons were set, -+ * then we'd not free the CommandPortItem even though we'd re- -+ * issued all trapped and overflowed commands. Hence only return -+ * without clearing CommandPortItem if we will be called again as -+ * either CommandTrapQ or CommandQ is not empty. -+ */ -+ -+ /* Now run the overflowed commands for this context */ -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandQ)) -+ { -+ if (! ELAN3_QUEUE_EMPTY (ctxt->CommandTrapQ) || (ctxt->Status & CTXT_OTHERS_REASONS)) -+ { -+ PRINTF0 (ctxt, DBG_CPROC, "RestartCProcTrap: cannot issue overflowed commands\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ } -+ -+ /* -+ * Just re-issue the commands, if one traps then the remainder will -+ * just get placed in the overflow queue again and the interrupt handler -+ * will copy them back in here. -+ * -+ * Stop the dma processor from taking commands, since one of the commands -+ * could be a re-issued remote dma, which must be processed by the command -+ * processor. -+ */ -+ -+ if (dev->HaltDmaDequeueCount++ == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ -+ while (! ELAN3_QUEUE_EMPTY (ctxt->CommandQ)) -+ { -+ CProcTrapBuf_BE *TrapBuf = ELAN3_QUEUE_FRONT (ctxt->CommandQ, ctxt->Commands); -+ -+ PRINTF2 (ctxt, DBG_CPROC, "RestartCProcTrap: re-issue command %x - %08x\n", -+ (TrapBuf->s.ContextType>>5) & 0xff, TrapBuf->s.Addr); -+ mb(); /* ensure writes to main memory completed */ -+ ((E3_uint32 *) ctxt->CommandPort)[(TrapBuf->s.ContextType>>5) & 0xff] = TrapBuf->s.Addr; -+ mmiob(); /* and flush through IO writes */ -+ -+ ELAN3_QUEUE_REMOVE (ctxt->CommandQ); -+ } -+ -+ /* observe the command processor having halted */ -+ res = CheckCommandQueueFlushed (ctxt, DmaComQueueNotEmpty, 0, &flags); -+ -+ if (res != ISSUE_COMMAND_OK) -+ { -+ PRINTF0 (ctxt, DBG_CPROC, "RestartCProcTrap: trapped after issueing overflowed commands\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EAGAIN); -+ } -+ } -+ -+ /* remove the command port item, while holding the lock */ -+ item = ctxt->CommandPortItem; -+ ctxt->CommandPortItem = NULL; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (item != NULL) /* Free of any item that may have been stored */ -+ { /* because of the commandport trap */ -+ PRINTF1 (ctxt, DBG_CPROC, "RestartCProcTrap: commandPortItem %p\n", item); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ELAN3_OP_FREE_BLOCK_ITEM (ctxt, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ } -+ -+ return (ESUCCESS); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/dproc.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/dproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/dproc.c 2005-07-28 14:52:52.789687704 -0400 -@@ -0,0 +1,553 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: dproc.c,v 1.52 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/dproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DMA_RETRY_FAIL_COUNT 8 -+ -+static void PrintUserDma (ELAN3_CTXT *ctxt, E3_Addr addr); -+ -+int -+HandleDProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits) -+{ -+ DMA_TRAP *trap = dev->DmaTrap; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ /* Scoop out the trap information, before restarting the Elan */ -+ trap->Status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ -+ ASSERT(trap->Status.s.WakeupFunction == WakeupNever); -+ -+ /* copy the normal dma access fault type */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), &trap->FaultSave, sizeof (E3_FaultSave_BE)); -+ -+ /* copy all 4 of the dma data fault type */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), &trap->Data0, 4*sizeof (E3_FaultSave_BE)); -+ -+ /* Copy the DMA descriptor */ -+ copy_dma_regs (dev, &trap->Desc); -+ -+ /* Copy the packet info */ -+ trap->PacketInfo.Value = read_reg32 (dev, Exts.Dmas.DmaRds.DMA_PacketInfo.Value); -+ -+ /* update device statistics */ -+ BumpStat (dev, DProcTraps); -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ if (trap->PacketInfo.s.PacketTimeout) -+ BumpStat (dev, DmaOutputTimeouts); -+ else if (trap->PacketInfo.s.PacketAckValue == C_ACK_ERROR) -+ BumpStat (dev, DmaPacketAckErrors); -+ break; -+ -+ case MI_DmaFailCountError: -+ BumpStat (dev, DmaRetries); -+ break; -+ } -+ -+ /* Must now zero all the FSRs so that a subsequent fault can be seen */ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), sizeof (E3_FaultSave)); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), 4*sizeof (E3_FaultSave)); -+ -+ *RestartBits |= RestartDProc; -+ return (TRUE); -+} -+ -+void -+DeliverDProcTrap (ELAN3_DEV *dev, DMA_TRAP *dmaTrap, E3_uint32 Pend) -+{ -+ ELAN3_CTXT *ctxt; -+ E3_FaultSave_BE *FaultArea; -+ DMA_TRAP *trap; -+ register int i; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, dmaTrap->Status.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "DeliverDProcTrap: context %x invalid\n", dmaTrap->Status.s.Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_OP_DPROC_TRAP (ctxt, dmaTrap) == OP_DEFER) -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->DmaTrapQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->DmaTrapQ, ctxt->DmaTraps); -+ -+ bcopy (dmaTrap, trap, sizeof (DMA_TRAP)); -+ -+ PRINTF5 (ctxt, DBG_INTR, "DeliverDProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x PacketInfo=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, trap->PacketInfo.Value, MiToName (trap->Status.s.TrapType)); -+ PRINTF3 (ctxt, DBG_INTR, " FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ PRINTF4 (ctxt, DBG_INTR, " %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_INTR, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ PRINTF2 (ctxt, DBG_INTR, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ PRINTF2 (ctxt, DBG_INTR, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ ELAN3_QUEUE_ADD (ctxt->DmaTrapQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ if (ELAN3_QUEUE_FULL (ctxt->DmaTrapQ)) -+ { -+ PRINTF0 (ctxt, DBG_INTR, "DeliverDProcTrap: dma queue full, must swap out\n"); -+ ctxt->Status |= CTXT_DMA_QUEUE_FULL; -+ -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+ } -+} -+ -+int -+NextDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->DmaTrapQ)) -+ return (0); -+ -+ *trap = *ELAN3_QUEUE_FRONT (ctxt->DmaTrapQ, ctxt->DmaTraps); -+ ELAN3_QUEUE_REMOVE (ctxt->DmaTrapQ); -+ -+ return (1); -+} -+ -+void -+ResolveDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ E3_FaultSave_BE *FaultArea; -+ int FaultHandled = 0; -+ int res; -+ register int i; -+ -+ PRINTF4 (ctxt, DBG_DPROC, "ResolveDProcTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ PRINTF3 (ctxt, DBG_DPROC, " FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ PRINTF4 (ctxt, DBG_DPROC, " %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_DPROC, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ PRINTF2 (ctxt, DBG_DPROC, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ PRINTF2 (ctxt, DBG_DPROC, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+ BumpUserStat (ctxt, DProcTraps); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ /* -+ * Faulted due to packet timeout or a PAckError. -+ * Reset fail count and reissue the same desc. -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: got a PAckError or the output timed out. Rescheduling dma.\n"); -+ if (ElanException (ctxt, EXCEPTION_PACKET_TIMEOUT, DMA_PROC, trap) == OP_IGNORE) -+ { -+ BumpUserStat (ctxt, DmaRetries); -+ -+ trap->Desc.s.dma_failCount = DMA_RETRY_FAIL_COUNT; -+ -+ RestartDmaTrap (ctxt, trap); -+ } -+ return; -+ -+ case MI_DmaFailCountError: -+ /* -+ * Faulted due to dma fail count. -+ * Reset fail count and reissue the same desc. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: Reset dma fail count to %d\n", DMA_RETRY_FAIL_COUNT); -+ -+ if (ElanException (ctxt, EXCEPTION_DMA_RETRY_FAIL, DMA_PROC, trap) == OP_IGNORE) -+ { -+ BumpUserStat (ctxt, DmaRetries); -+ -+ trap->Desc.s.dma_failCount = DMA_RETRY_FAIL_COUNT; -+ -+ RestartDmaTrap (ctxt, trap); -+ } -+ return; -+ -+ case MI_TimesliceDmaQueueOverflow: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: dma timeslice queue overflow\n"); -+ RestartDmaTrap (ctxt, trap); -+ return; -+ -+ case MI_UnimplementedError: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: unimplemented dma trap\n"); -+ if (ElanException (ctxt, EXCEPTION_UNIMPLEMENTED, DMA_PROC, trap) == OP_IGNORE) -+ RestartDmaTrap (ctxt, trap); -+ return; -+ -+ case MI_EventQueueOverflow: -+ case MI_ThreadQueueOverflow: -+ case MI_DmaQueueOverflow: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped on a write set event.\n"); -+ FixupEventTrap (ctxt, DMA_PROC, trap, trap->Status.s.TrapType, &trap->FaultSave, 0); -+ return; -+ -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ case MI_DequeueNonSysCntxDma: -+ case MI_DequeueSysCntxDma: -+ /* -+ * The DMA processor has trapped due to outstanding prefetches from the previous -+ * dma. The "current" dma has not been consumed, so we just ignore the trap -+ */ -+ return; -+ -+ case MI_WaitForRemoteDescRead2: -+ case MI_ExecuteDmaDescriptorForRun: -+ /* -+ * The DMA processor has trapped while fetching the dma descriptor, so -+ * zero it out to not confuse the user on an error -+ */ -+ bzero (&trap->Desc, sizeof (trap->Desc)); -+ break; -+ } -+ -+ /* -+ * All other uWords will have updated one of the fault areas, so fix -+ * any faults found in them. If there were no faults found then it -+ * must have been a bus error -+ */ -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ { -+ if (FaultArea->s.FSR.Status != 0) -+ { -+ FaultHandled++; -+ -+ ASSERT ((FaultArea->s.FSR.Status & FSR_SizeMask) == FSR_Block64 || -+ (FaultArea->s.FSR.Status & FSR_SizeMask) == FSR_Block32); -+ -+ ASSERT (FaultArea->s.FaultContext == trap->Status.s.Context); -+ -+ if (((trap->Desc.s.dma_source & PAGEOFFSET) >= (PAGESIZE-E3_BLK_SIZE)) && -+ ((trap->Desc.s.dma_source & PAGEMASK) != ((trap->Desc.s.dma_source + trap->Desc.s.dma_size-1) & PAGEMASK))) -+ { -+ /* XXXX: dma started within last 64 bytes of the page -+ * terminate the process if it has pagefaulted */ -+ if (FaultArea->s.FaultAddress == (trap->Desc.s.dma_source & ~(E3_BLK_SIZE-1))) -+ { -+ printk ("elan%d: invalid dma - context=%x source=%x\n", ctxt->Device->Instance, -+ ctxt->Capability.cap_mycontext, trap->Desc.s.dma_source); -+ -+ if (ElanException (ctxt, EXCEPTION_BAD_DMA, DMA_PROC, trap, NULL, 0) != OP_IGNORE) -+ return; -+ } -+ } -+ -+ if (trap->Desc.s.dma_size != 0 && (res = elan3_pagefault (ctxt, FaultArea, 1)) != ESUCCESS) -+ { -+ /* XXXX: Rev B Elans can prefetch data passed the end of the dma descriptor */ -+ /* if the fault relates to this, then just ignore it */ -+ if (FaultArea->s.FaultAddress < (trap->Desc.s.dma_source+trap->Desc.s.dma_size) || -+ FaultArea->s.FaultAddress > (trap->Desc.s.dma_source+trap->Desc.s.dma_size+E3_BLK_SIZE*2)) -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: elan3_pagefault failed for address %x\n", -+ FaultArea->s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, FaultArea, res) != OP_IGNORE) -+ return; -+ } -+ } -+ } -+ } -+ -+ if (trap->FaultSave.s.FSR.Status != 0) -+ { -+ FaultHandled++; -+ -+ ASSERT (trap->FaultSave.s.FaultContext == trap->Status.s.Context); -+ -+ if ((trap->FaultSave.s.FSR.Status & FSR_SizeMask) == FSR_RouteFetch) -+ { -+ res = ResolveVirtualProcess (ctxt, trap->FaultSave.s.FaultAddress & 0xffff); /* mask out cookie */ -+ -+ switch (res) -+ { -+ default: -+ if (ElanException (ctxt, EXCEPTION_INVALID_PROCESS, DMA_PROC, trap, trap->FaultSave.s.FaultAddress, res) != OP_IGNORE) -+ return; -+ -+ case EAGAIN: -+ /* XXXX; wait on trail blazing code */ -+ -+ case 0: -+ break; -+ } -+ } -+ else -+ { -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: elan3_pagefault failed for address %x\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, &trap->FaultSave, res) != OP_IGNORE) -+ return; -+ } -+ } -+ } -+ -+ if (! FaultHandled) -+ { -+ ElanBusError (ctxt->Device); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, DMA_PROC, trap, &trap->FaultSave, EFAULT) != OP_IGNORE) -+ return; -+ } -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_WaitForRemoteDescRead2: -+ /* -+ * Faulted while trying to read the dma descriptor for a read dma. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a remote dma descriptor at %x.\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, trap->FaultSave.s.FaultAddress); -+ break; -+ -+ case MI_ExecuteDmaDescriptorForRun: -+ /* -+ * Faulted while trying to read the dma descriptor for a write dma. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ PRINTF1 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a write dma descriptor at %x.\n", -+ trap->FaultSave.s.FaultAddress); -+ -+ RestartDmaPtr (ctxt, trap->FaultSave.s.FaultAddress); -+ break; -+ -+ case MI_WaitForRemoteRoutes1: -+ case MI_WaitForRemoteRoutes2: -+ case MI_SendRemoteDmaDesc: -+ case MI_SendDmaIdentify: -+ case MI_SendRemoteDmaRoutes2: -+ case MI_WaitForDmaRoutes1: -+ case MI_DmaLoop: -+ case MI_ExitDmaLoop: -+ case MI_GetDestEventValue: -+ case MI_SendFinalUnlockTrans: -+ case MI_SendNullSetEvent: -+ case MI_SendFinalSetEvent: -+ case MI_SendDmaEOP: -+ /* -+ * Faulted either fetching routes or fetching dma data. -+ * Fix fault and re-issue using FaultAddress. -+ */ -+ -+ case MI_SendEOPforRemoteDma: -+ case MI_LookAtRemoteAck: -+ case MI_FailedAckIfCCis0: -+ /* -+ * Possible fault when reading the remote desc into the dma data buffers -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped reading a dma data or fetching a route\n"); -+ RestartDmaTrap (ctxt, trap); -+ break; -+ -+ case MI_DequeueSysCntxDma: -+ case MI_DequeueNonSysCntxDma: -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ /* -+ * It is possible that a dma can get back onto the queue while outstanding dma -+ * have not finished trapping. In this case the trap can be ignored as the dma -+ * state has been saved. It might trap again the next time it comes to the front -+ * of the queue and be fixed then. -+ */ -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trap after dma has finished. ignored\n"); -+ break; -+ -+ default: -+ PRINTF0 (ctxt, DBG_DPROC, "ResolveDProcTrap: trapped on a write set event.\n"); -+ FixupEventTrap (ctxt, DMA_PROC, trap, trap->Status.s.TrapType, &trap->FaultSave, 0); -+ break; -+ } -+} -+ -+int -+DProcNeedsRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_DMA_PTR] != 0 || -+ ctxt->ItemCount[LIST_DMA_DESC] != 0); -+} -+ -+void -+RestartDProcItems (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_Addr value; -+ int res; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ while (ctxt->ItemCount[LIST_DMA_PTR]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_DMA_PTR, &item, &value)) -+ ctxt->ItemCount[LIST_DMA_PTR] = 0; -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "RestartDProc: issue write dma at %x\n", value); -+ PrintUserDma (ctxt, value); -+ -+ res = IssueDmaCommand (ctxt, value, NULL, 0); -+ -+ if (res == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_DMA_PTR, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_DMA_PTR]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ -+ while (ctxt->ItemCount[LIST_DMA_DESC]) -+ { -+ if (! ELAN3_OP_GET_BLOCK_ITEM (ctxt, LIST_DMA_DESC, &item, &value)) -+ ctxt->ItemCount[LIST_DMA_DESC] = 0; -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "RestartDProc: issue dma desc at %x\n", value); -+ PrintUserDma (ctxt, value); -+ -+ res = IssueDmaCommand (ctxt, value, item, 0); -+ -+ switch (res) -+ { -+ case ISSUE_COMMAND_OK: -+ ctxt->ItemCount[LIST_DMA_DESC]--; -+ ELAN3_OP_FREE_BLOCK_ITEM (ctxt, item); -+ break; -+ -+ case ISSUE_COMMAND_RETRY: -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_DMA_DESC, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ -+ case ISSUE_COMMAND_TRAPPED: -+ ctxt->ItemCount[LIST_DMA_DESC]--; -+ /* The item will be freed off when the command port trap */ -+ /* fixed up and the command successfully re-issued */ -+ break; -+ } -+ } -+ } -+ -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+void -+RestartDmaDesc(ELAN3_CTXT *ctxt, E3_DMA_BE *desc) -+{ -+ kmutex_lock (&ctxt->SwapListsLock); -+ if (desc->s.dma_direction != DMA_WRITE) -+ desc->s.dma_direction = (desc->s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ ELAN3_OP_PUT_BLOCK_ITEM (ctxt, LIST_DMA_DESC, (E3_uint32 *) desc); -+ ctxt->ItemCount[LIST_DMA_DESC]++; -+ -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+void -+RestartDmaTrap(ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ /* Negative length DMAs are illegal, since they hangup the dma processor, -+ * if they got generated then they will have been spotted by PollForDmahungup, -+ * and delivered to us with a Dequeue suspend address, -+ * -+ * GNAT sw-elan3/3908: Moved this check into this new function to avoid -+ * it sampling old or invalid register state -+ */ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ ElanException (ctxt, EXCEPTION_BAD_DMA, DMA_PROC, trap, NULL, 0); -+ else -+ RestartDmaDesc (ctxt, &trap->Desc); -+} -+ -+void -+RestartDmaPtr (ELAN3_CTXT *ctxt, E3_Addr ptr) -+{ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_DMA_PTR, ptr); -+ ctxt->ItemCount[LIST_DMA_PTR]++; -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+static void -+PrintUserDma (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_DMA *dma; -+ -+ /* Dont call a function which takes locks unless we need to */ -+ if (!(elan3_debug & DBG_DPROC)) -+ return; -+ -+ dma = (E3_DMA *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ PRINTF4 (ctxt, DBG_DPROC, "DMA: type %08x size %08x source %08x dest %08x\n", -+ fuword ((int *) &dma->dma_type), fuword ((int *) &dma->dma_size), -+ fuword ((int *) &dma->dma_source), fuword ((int *) &dma->dma_dest)); -+ PRINTF4 (ctxt, DBG_DPROC, "DMA: Dest %08x %08x Local %08x %08x\n", -+ fuword ((int *) &dma->dma_destEvent), fuword ((int *) &dma->dma_destCookieProc), -+ fuword ((int *) &dma->dma_srcEvent), fuword ((int *) &dma->dma_srcCookieProc)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/elan3mmu_generic.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/elan3mmu_generic.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/elan3mmu_generic.c 2005-07-28 14:52:52.795686792 -0400 -@@ -0,0 +1,3255 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3mmu_generic.c,v 1.75.2.1 2004/12/14 10:19:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/vm/elan3mmu_generic.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_MPSAS -+# define zero_all_ptbls -+#endif -+ -+/* -+ * Debugging -+ */ -+int elan3mmu_debug = 0; -+ -+#define N_L3PTBL_MTX (0x20) -+#define N_L2PTBL_MTX (0x40) -+#define N_L1PTBL_MTX (0x20) -+ -+#define L3PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L3PTBL_MTX - 1)) -+static spinlock_t l3ptbl_lock[N_L3PTBL_MTX]; -+ -+#define L2PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L2PTBL_MTX - 1)) -+static spinlock_t l2ptbl_lock[N_L2PTBL_MTX]; -+ -+#define L1PTBL_MTX_HASH(p) \ -+ ((((uintptr_t)(p) >> 12) ^ ((uintptr_t)(p) >> 2)) & (N_L1PTBL_MTX - 1)) -+static spinlock_t l1ptbl_lock[N_L1PTBL_MTX]; -+ -+ -+#define BASE2VA(p) ((E3_Addr)((p)->ptbl_base << 16)) -+#define VA2BASE(v) ((u_short)(((uintptr_t)(v)) >> 16)) -+ -+ELAN3MMU_GLOBAL_STATS elan3mmu_global_stats; -+ -+static void elan3mmu_flush_context_filter (ELAN3_DEV *dev, void *); -+static void elan3mmu_unload_loop (ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl, int first_valid, int nptes, int flags); -+ -+static ELAN3_PTBL *elan3mmu_create_ptbls (ELAN3_DEV *dev, int level, int attr, int keep); -+static ELAN3_PTBL *elan3mmu_ta_to_ptbl (ELAN3MMU *elan3mmu, ELAN3_PTP *ptp); -+ -+static ELAN3_PTBL *elan3mmu_alloc_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, int *idx); -+void elan3mmu_free_lXptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl); -+ -+void elan3mmu_free_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl_ptr, int idx); -+ -+static ELAN3_PTBL *elan3mmu_alloc_l1ptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu); -+static ELAN3_PTBL *elan3mmu_alloc_l2ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, -+ E3_Addr base, spinlock_t **plock, unsigned long *flags); -+static ELAN3_PTBL *elan3mmu_alloc_l3ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, -+ E3_Addr base, spinlock_t **plock, unsigned long *flags); -+ -+static int elan3mmu_steal_this_ptbl (ELAN3_DEV *dev, ELAN3_PTBL *l3ptbl); -+static ELAN3_PTBL *elan3mmu_steal_l3ptbl (ELAN3_DEV *dev, int attr); -+ -+static spinlock_t *elan3mmu_ptbl_to_lock (int level, ELAN3_PTBL *ptbl); -+ -+/* -+ * Encoding of MMU permissions against access type, -+ * to allow quick permission checking against access -+ * type. -+ */ -+u_char elan3mmu_permissionTable[] = -+{ -+ 0xcc, /* 11001100 ELAN3_PERM_NULL */ -+ 0x01, /* 00000001 ELAN3_PERM_LOCALREAD */ -+ 0x05, /* 00000101 ELAN3_PERM_READ */ -+ 0x33, /* 00110011 ELAN3_PERM_NOREMOTE */ -+ 0x37, /* 00110111 ELAN3_PERM_REMOTEREAD */ -+ 0x3f, /* 00111111 ELAN3_PERM_REMOTEWRITE */ -+ 0xf7, /* 11110111 ELAN3_PERM_REMOTEEVENT */ -+ 0xff, /* 11111111 ELAN3_PERM_REMOTEALL */ -+} ; -+ -+void -+elan3mmu_init() -+{ -+ register int i; -+ -+ HAT_PRINTF0 (1, "elan3mmu_init: initialising elan mmu\n"); -+ -+ for (i = 0; i < N_L1PTBL_MTX; i++) -+ spin_lock_init (&l1ptbl_lock[i]); -+ -+ for (i = 0; i < N_L2PTBL_MTX; i++) -+ spin_lock_init (&l2ptbl_lock[i]); -+ -+ for (i = 0; i < N_L3PTBL_MTX; i++) -+ spin_lock_init (&l3ptbl_lock[i]); -+ -+ elan3mmu_global_stats.version = ELAN3MMU_STATS_VERSION; -+ -+ elan3mmu_init_osdep(); -+} -+ -+void -+elan3mmu_fini() -+{ -+ register int i; -+ -+ HAT_PRINTF0 (1, "elan3mmu_fini: finalising elan mmu\n"); -+ -+ for (i = 0; i < N_L1PTBL_MTX; i++) -+ spin_lock_destroy (&l1ptbl_lock[i]); -+ -+ for (i = 0; i < N_L2PTBL_MTX; i++) -+ spin_lock_destroy (&l2ptbl_lock[i]); -+ -+ for (i = 0; i < N_L3PTBL_MTX; i++) -+ spin_lock_destroy (&l3ptbl_lock[i]); -+ -+ elan3mmu_fini_osdep(); -+} -+ -+ELAN3MMU * -+elan3mmu_alloc (ELAN3_CTXT *ctxt) -+{ -+ ELAN3MMU *elan3mmu; -+ ELAN3_PTBL *l1ptbl; -+ -+ ALLOC_ELAN3MMU (elan3mmu, TRUE); -+ -+ spin_lock_init (&elan3mmu->elan3mmu_lock); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ elan3mmu->elan3mmu_ergns = NULL; -+ elan3mmu->elan3mmu_etail = NULL; -+ elan3mmu->elan3mmu_ergnlast = NULL; -+ elan3mmu->elan3mmu_mrgns = NULL; -+ elan3mmu->elan3mmu_mtail = NULL; -+ elan3mmu->elan3mmu_mrgnlast = NULL; -+ elan3mmu->elan3mmu_ctxt = ctxt; -+ -+ spin_lock_init (&elan3mmu->elan3mmu_lXptbl_lock); -+ elan3mmu->elan3mmu_lXptbl = NULL; -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ l1ptbl = elan3mmu_alloc_l1ptbl(ctxt->Device, 0, elan3mmu); -+ -+ elan3mmu->elan3mmu_ctp = (sdramaddr_t) 0; -+ elan3mmu->elan3mmu_dev = ctxt->Device; -+ elan3mmu->elan3mmu_l1ptbl = l1ptbl; -+ -+ /* Ensure that there are at least some level 3 page tables, since if a level 2 and */ -+ /* a level 3 table are allocated together, then the level 3 is allocated with the NO_ALLOC */ -+ /* flag, thus there MUST be at least one that can be stolen or on the free list */ -+ if (elan3mmu->elan3mmu_dev->Level[PTBL_LEVEL_3].PtblFreeList == NULL) -+ elan3mmu_create_ptbls (elan3mmu->elan3mmu_dev, PTBL_LEVEL_3, 0, 0); -+ -+ HAT_PRINTF1 (1, "elan3mmu_alloc: elan3mmu %p\n", elan3mmu); -+ -+ elan3mmu_alloc_osdep (elan3mmu); -+ -+ return (elan3mmu); -+} -+ -+void -+elan3mmu_free (ELAN3MMU *elan3mmu) -+{ -+ ELAN3MMU_RGN *rgn; -+ ELAN3_PTBL *l1ptbl; -+ spinlock_t *l1lock; -+ unsigned long l1flags; -+ unsigned long flags; -+ -+ HAT_PRINTF1 (1, "elan3mmu_free : elan3mmu %p\n", elan3mmu); -+ -+ /* -+ * Invalidate the level1 page table, since it's already removed -+ * from the context table, there is no need to flush the tlb. -+ */ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ elan3mmu->elan3mmu_l1ptbl = NULL; -+ -+ if (elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, (E3_Addr) 0, PTBL_LEVEL_1, &l1lock, &l1flags) == LK_PTBL_OK) -+ { -+ elan3mmu_l1inval (elan3mmu, l1ptbl, PTE_UNLOAD_NOFLUSH); -+ elan3mmu_free_l1ptbl (elan3mmu->elan3mmu_dev, l1ptbl, l1lock, l1flags); -+ } -+ -+ /* -+ * Free of any permission regions. -+ */ -+ spin_lock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ while ((rgn = elan3mmu->elan3mmu_mrgns) != NULL) -+ { -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); /* lock_lint */ -+ elan3mmu_removergn_elan (elan3mmu, rgn->rgn_ebase); -+ elan3mmu_removergn_main (elan3mmu, rgn->rgn_mbase); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); /* lock_lint */ -+ -+ FREE_ELAN3MMU_RGN (rgn); -+ } -+ elan3mmu->elan3mmu_mrgnlast = NULL; -+ elan3mmu->elan3mmu_ergnlast = NULL; -+ -+ /* -+ * Free the lXptbl list -+ */ -+ ASSERT (elan3mmu->elan3mmu_lXptbl == NULL); /* XXXX MRH need to add list removal */ -+ -+ elan3mmu->elan3mmu_lXptbl = NULL; -+ spin_lock_destroy (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); /* lock_lint */ -+ -+ spin_lock_destroy (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU (elan3mmu); -+} -+ -+/*================================================================================*/ -+/* Interface routines to device driver */ -+static void -+elan3mmu_flush_context_filter (ELAN3_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ ASSERT ((read_reg32 (dev, Exts.InterruptReg) & (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx)) == -+ (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx)); -+ -+ dev->FilterHaltQueued = 0; -+ -+ write_reg32 (dev, Input_Context_Fil_Flush, 0); -+ -+ HAT_PRINTF0 (1, "elan3mmu_flush_context_filter completed\n"); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+elan3mmu_set_context_filter (ELAN3_DEV *dev, int ctx, int disabled, E3_uint32 Pend, E3_uint32 *Maskp) -+{ -+ int mctx = ctx & MAX_ROOT_CONTEXT_MASK; -+ sdramaddr_t ctp = dev->ContextTable + mctx * sizeof (E3_ContextControlBlock); -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ASSERT ((mctx < 32 || mctx >= ELAN3_KCOMM_BASE_CONTEXT_NUM) ? (ctx & SYS_CONTEXT_BIT) : ! (ctx & SYS_CONTEXT_BIT)); -+ -+ elan3_sdram_writel (dev, ctp + offsetof (E3_ContextControlBlock, filter), -+ ((ctx & SYS_CONTEXT_BIT) ? E3_CCB_CNTX0 : 0) | (disabled ? E3_CCB_DISCARD_ALL : 0)); -+ -+ HAT_PRINTF4 (1, "elan3mmu_set_context_filter: ctx %x [%lx] -> %s (%x)\n", ctx, ctp, -+ disabled ? "up" : "down", elan3_sdram_readl (dev, ctp + offsetof (E3_ContextControlBlock, filter))); -+ -+ /* queue a halt operation to flush the context filter while the inputter is halted */ -+ if (dev->FilterHaltQueued == 0) -+ { -+ dev->FilterHaltQueued = 1; -+ QueueHaltOperation (dev, Pend, Maskp, INT_DiscardingSysCntx | INT_DiscardingNonSysCntx, -+ elan3mmu_flush_context_filter, NULL); -+ } -+} -+ -+int -+elan3mmu_attach (ELAN3_DEV *dev, int ctx, ELAN3MMU *elan3mmu, sdramaddr_t routeTable, E3_uint32 routeMask) -+{ -+ sdramaddr_t ctp; -+ ELAN3_PTP trootptp; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return (EINVAL); -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ trootptp = elan3_readptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP)); -+ -+ if (ELAN3_PTP_TYPE(trootptp) != ELAN3_ET_INVALID) -+ return (EBUSY); -+ -+ elan3mmu->elan3mmu_ctp = ctp; -+ -+ trootptp = PTBL_TO_PTADDR (elan3mmu->elan3mmu_l1ptbl) | ELAN3_ET_PTP; -+ -+ HAT_PRINTF4 (1, "elan3mmu_attach: ctp at %08lx : trootptp=%08x VPT_ptr=%08lx VPT_mask=%08x\n", -+ ctp, trootptp, routeTable, routeMask); -+ -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP), trootptp); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_ptr), routeTable); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_mask), routeMask); -+ -+ return (ESUCCESS); -+} -+ -+void -+elan3mmu_detach (ELAN3_DEV *dev, int ctx) -+{ -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ sdramaddr_t ctp; -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return; -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ HAT_PRINTF1 (1, "elan3mmu_detach: clearing ptp at %lx\n", ctp); -+ -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP), invalidptp); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_mask), 0); -+ elan3_writeptp (dev, ctp + offsetof (E3_ContextControlBlock, VPT_ptr), 0); -+ -+ ElanFlushTlb (dev); -+} -+ -+int -+elan3mmu_reference (ELAN3MMU *elan3mmu, int ctx) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ sdramaddr_t ctp; -+ E3_ContextControlBlock ccb; -+ ELAN3_PTP trootptp; -+ -+ ctx &= MAX_ROOT_CONTEXT_MASK; /* Mask out all high bits in context */ -+ -+ if (ctx < 0 || ctx >= dev->ContextTableSize) -+ return (EINVAL); -+ -+ ctp = dev->ContextTable + ctx * sizeof (E3_ContextControlBlock); -+ -+ trootptp = elan3_readptp (dev, ctp + offsetof (E3_ContextControlBlock, rootPTP)); -+ -+ if (ELAN3_PTP_TYPE(trootptp) != ELAN3_ET_INVALID) -+ return (EBUSY); -+ -+ elan3_sdram_copyl_from_sdram (dev, elan3mmu->elan3mmu_ctp, &ccb, sizeof (E3_ContextControlBlock)); -+ elan3_sdram_copyl_to_sdram (dev, &ccb, ctp, sizeof (E3_ContextControlBlock)); -+ -+ return (ESUCCESS); -+ -+} -+/*================================================================================*/ -+/* Elan permission regions. */ -+ -+/* elan address region management */ -+ELAN3MMU_RGN * -+elan3mmu_findrgn_elan (ELAN3MMU *elan3mmu, -+ E3_Addr addr, int tail) -+{ -+ ELAN3MMU_RGN *next = NULL; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *hirgn; -+ ELAN3MMU_RGN *lorgn; -+ E3_Addr base; -+ E3_Addr lastaddr; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) || SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (elan3mmu->elan3mmu_ergns == NULL) -+ return (NULL); -+ -+ rgn = elan3mmu->elan3mmu_ergnlast; -+ if (rgn == NULL) -+ rgn = elan3mmu->elan3mmu_ergns; -+ -+ forward = 0; -+ if ((u_long) (base = rgn->rgn_ebase) < (u_long)addr) -+ { -+ if ((u_long)addr <= ((u_long) base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = elan3mmu->elan3mmu_etail; -+ -+ if ((u_long) (lastaddr = (hirgn->rgn_ebase + hirgn->rgn_len - 1)) < (u_long) addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((u_long) (addr - base) > (u_long) (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_enext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = elan3mmu->elan3mmu_ergns; -+ -+ if ((u_long)lorgn->rgn_ebase > (u_long) addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((u_long)(addr - lorgn->rgn_ebase) < (u_long) (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((u_long)(rgn->rgn_ebase + rgn->rgn_len - 1) < (u_long)addr) -+ rgn = rgn->rgn_enext; -+ -+ if ((u_long)rgn->rgn_ebase <= (u_long)addr) -+ elan3mmu->elan3mmu_ergnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while ((u_long)rgn->rgn_ebase > (u_long)addr) -+ { -+ next = rgn; -+ rgn = rgn->rgn_eprev; -+ } -+ -+ if ((u_long) (rgn->rgn_ebase + rgn->rgn_len - 1) < (u_long)addr) -+ return (next); -+ else -+ { -+ elan3mmu->elan3mmu_ergnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+int -+elan3mmu_addrgn_elan (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, nrgn->rgn_ebase, 1); -+ E3_Addr nbase = nrgn->rgn_ebase; -+ E3_Addr ntop = nbase + nrgn->rgn_len - 1; /* avoid wrap */ -+ E3_Addr base; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL) -+ { -+ elan3mmu->elan3mmu_ergns = elan3mmu->elan3mmu_etail = nrgn; -+ nrgn->rgn_enext = nrgn->rgn_eprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_ebase; -+ -+ if ((u_long)(base + rgn->rgn_len - 1) < (u_long)nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_eprev = rgn; /* of list */ -+ nrgn->rgn_enext = NULL; -+ rgn->rgn_enext = elan3mmu->elan3mmu_etail = nrgn; -+ } -+ else -+ { -+ if ((u_long)nbase >= (u_long)base || (u_long)ntop >= (u_long)base) -+ return (-1); /* overlapping region */ -+ -+ nrgn->rgn_enext = rgn; /* insert before region */ -+ nrgn->rgn_eprev = rgn->rgn_eprev; -+ rgn->rgn_eprev = nrgn; -+ if (elan3mmu->elan3mmu_ergns == rgn) -+ elan3mmu->elan3mmu_ergns = nrgn; -+ else -+ nrgn->rgn_eprev->rgn_enext = nrgn; -+ } -+ } -+ elan3mmu->elan3mmu_ergnlast = nrgn; -+ -+ return (0); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_removergn_elan (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL || rgn->rgn_ebase != addr) -+ return (NULL); -+ -+ elan3mmu->elan3mmu_ergnlast = rgn->rgn_enext; -+ if (rgn == elan3mmu->elan3mmu_etail) -+ elan3mmu->elan3mmu_etail = rgn->rgn_eprev; -+ else -+ rgn->rgn_enext->rgn_eprev = rgn->rgn_eprev; -+ -+ if (rgn == elan3mmu->elan3mmu_ergns) -+ elan3mmu->elan3mmu_ergns = rgn->rgn_enext; -+ else -+ rgn->rgn_eprev->rgn_enext = rgn->rgn_enext; -+ -+ return (rgn); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_rgnat_elan (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); -+ E3_Addr base; -+ -+ if (rgn != NULL && (u_long)(base = rgn->rgn_ebase) <= (u_long)addr && (u_long)addr <= (u_long)(base + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+/* main address region management */ -+ELAN3MMU_RGN * -+elan3mmu_findrgn_main (ELAN3MMU *elan3mmu, -+ caddr_t addr, int tail) -+{ -+ ELAN3MMU_RGN *next = NULL; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *hirgn; -+ ELAN3MMU_RGN *lorgn; -+ caddr_t lastaddr; -+ caddr_t base; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) || SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (elan3mmu->elan3mmu_mrgns == NULL) -+ return (NULL); -+ -+ rgn = elan3mmu->elan3mmu_mrgnlast; -+ if (rgn == NULL) -+ rgn = elan3mmu->elan3mmu_mrgns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_mbase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = elan3mmu->elan3mmu_mtail; -+ if ((lastaddr = hirgn->rgn_mbase + hirgn->rgn_len - 1) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_mnext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = elan3mmu->elan3mmu_mrgns; -+ if (lorgn->rgn_mbase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_mbase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_mnext; -+ -+ if (rgn->rgn_mbase <= addr) -+ elan3mmu->elan3mmu_mrgnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_mbase > addr) -+ { -+ next = rgn; -+ rgn = rgn->rgn_mprev; -+ } -+ if ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ return (next); -+ else -+ { -+ elan3mmu->elan3mmu_mrgnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+int -+elan3mmu_addrgn_main (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, nrgn->rgn_mbase, 1); -+ caddr_t nbase = nrgn->rgn_mbase; -+ caddr_t ntop = nbase + nrgn->rgn_len - 1; -+ caddr_t base; -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL) -+ { -+ elan3mmu->elan3mmu_mrgns = elan3mmu->elan3mmu_mtail = nrgn; -+ nrgn->rgn_mnext = nrgn->rgn_mprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_mbase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_mprev = rgn; /* of list */ -+ nrgn->rgn_mnext = NULL; -+ rgn->rgn_mnext = elan3mmu->elan3mmu_mtail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) -+ return (-1); /* overlapping region */ -+ -+ nrgn->rgn_mnext = rgn; /* insert before region */ -+ nrgn->rgn_mprev = rgn->rgn_mprev; -+ rgn->rgn_mprev = nrgn; -+ if (elan3mmu->elan3mmu_mrgns == rgn) -+ elan3mmu->elan3mmu_mrgns = nrgn; -+ else -+ nrgn->rgn_mprev->rgn_mnext = nrgn; -+ } -+ } -+ elan3mmu->elan3mmu_mrgnlast = nrgn; -+ -+ return (0); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_removergn_main (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ ASSERT (SPINLOCK_HELD (&elan3mmu->elan3mmu_dev->IntrLock) && SPINLOCK_HELD (&elan3mmu->elan3mmu_lock)); -+ -+ if (rgn == NULL || rgn->rgn_mbase != addr) -+ return (NULL); -+ -+ elan3mmu->elan3mmu_mrgnlast = rgn->rgn_mnext; -+ if (rgn == elan3mmu->elan3mmu_mtail) -+ elan3mmu->elan3mmu_mtail = rgn->rgn_mprev; -+ else -+ rgn->rgn_mnext->rgn_mprev = rgn->rgn_mprev; -+ -+ if (rgn == elan3mmu->elan3mmu_mrgns) -+ elan3mmu->elan3mmu_mrgns = rgn->rgn_mnext; -+ else -+ rgn->rgn_mprev->rgn_mnext = rgn->rgn_mnext; -+ -+ return (rgn); -+} -+ -+ELAN3MMU_RGN * -+elan3mmu_rgnat_main (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ caddr_t base; -+ -+ if (rgn != NULL && (base = rgn->rgn_mbase) <= addr && addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+int -+elan3mmu_setperm (ELAN3MMU *elan3mmu, -+ caddr_t maddr, -+ E3_Addr eaddr, -+ u_int len, -+ u_int perm) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3MMU_RGN *nrgn; -+ unsigned long flags; -+ -+ HAT_PRINTF4 (1, "elan3mmu_setperm: user %p elan %08x len %x perm %x\n", maddr, eaddr, len, perm); -+ -+ if ((((uintptr_t) maddr) & PAGEOFFSET) || (eaddr & PAGEOFFSET) || (len & PAGEOFFSET)) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: alignment failure\n"); -+ return (EINVAL); -+ } -+ -+ if (((uintptr_t) maddr + len - 1) < (uintptr_t) maddr || ((u_long)eaddr + len - 1) < (u_long)eaddr) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: range failure\n"); -+ return (EINVAL); -+ } -+ -+ ALLOC_ELAN3MMU_RGN(nrgn, TRUE); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ nrgn->rgn_mbase = maddr; -+ nrgn->rgn_ebase = eaddr; -+ nrgn->rgn_len = len; -+ nrgn->rgn_perm = perm; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if (elan3mmu_addrgn_elan (elan3mmu, nrgn) < 0) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: elan address exists\n"); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU_RGN (nrgn); -+ return (EINVAL); -+ } -+ -+ if (elan3mmu_addrgn_main (elan3mmu, nrgn) < 0) -+ { -+ HAT_PRINTF0 (1, "elan3mmu_setperm: main address exists\n"); -+ elan3mmu_removergn_elan (elan3mmu, eaddr); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ FREE_ELAN3MMU_RGN (nrgn); -+ return (EINVAL); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (ESUCCESS); -+} -+ -+void -+elan3mmu_clrperm (ELAN3MMU *elan3mmu, -+ E3_Addr addr, -+ u_int len) -+{ -+ E3_Addr raddr; -+ E3_Addr rtop; -+ ELAN3MMU_RGN *nrgn; -+ ELAN3MMU_RGN *rgn; -+ ELAN3MMU_RGN *rgn_next; -+ u_int ssize; -+ unsigned long flags; -+ int res; -+ -+ HAT_PRINTF2 (1, "elan3mmu_clrperm: elan %08x len %x\n", addr, len); -+ -+ raddr = (addr & PAGEMASK); -+ rtop = ((addr + len - 1) & PAGEMASK) + PAGEOFFSET; -+ -+ ALLOC_ELAN3MMU_RGN (nrgn, TRUE); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (rgn = elan3mmu_findrgn_elan (elan3mmu, addr, 0); rgn != NULL; rgn = rgn_next) -+ { -+ if (rtop < rgn->rgn_ebase) /* rtop was in a gap */ -+ break; -+ -+ rgn_next = rgn->rgn_enext; /* Save next region pointer */ -+ -+ if (raddr <= rgn->rgn_ebase && rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* whole region is cleared */ -+ elan3mmu_unload (elan3mmu, rgn->rgn_ebase, rgn->rgn_len, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ elan3mmu_removergn_elan (elan3mmu, rgn->rgn_ebase); -+ elan3mmu_removergn_main (elan3mmu, rgn->rgn_mbase); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ FREE_ELAN3MMU_RGN (rgn); -+ } -+ else if (raddr <= rgn->rgn_ebase) -+ { -+ /* clearing at beginning, so shrink size and increment base ptrs */ -+ ssize = rtop - rgn->rgn_ebase + 1; -+ -+ elan3mmu_unload (elan3mmu, rgn->rgn_ebase, ssize, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ rgn->rgn_mbase += ssize; -+ rgn->rgn_ebase += ssize; -+ rgn->rgn_len -= ssize; -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ } -+ else if (rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* clearing at end, so just shrink length of region */ -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ -+ elan3mmu_unload (elan3mmu, raddr, ssize, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ rgn->rgn_len -= ssize; -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ } -+ else -+ { -+ /* the section to go is in the middle, so need to */ -+ /* split it into two regions */ -+ elan3mmu_unload (elan3mmu, raddr, rtop - raddr + 1, PTE_UNLOAD); -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ ASSERT (nrgn != NULL); -+ -+ nrgn->rgn_mbase = rgn->rgn_mbase + (rtop - rgn->rgn_ebase + 1);; -+ nrgn->rgn_ebase = rtop + 1; -+ nrgn->rgn_len = ((rgn->rgn_ebase + rgn->rgn_len - 1) - rtop); -+ nrgn->rgn_perm = rgn->rgn_perm; -+ -+ rgn->rgn_len = (raddr - rgn->rgn_ebase); /* shrink original region */ -+ -+ res = elan3mmu_addrgn_elan (elan3mmu, nrgn); /* insert new region */ -+ ASSERT (res == 0); /* which cannot fail */ -+ -+ res = elan3mmu_addrgn_main (elan3mmu, nrgn); -+ ASSERT (res == 0); -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_dev->IntrLock, flags); -+ -+ nrgn = NULL; -+ } -+ } -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ if (nrgn != NULL) -+ FREE_ELAN3MMU_RGN (nrgn); -+} -+ -+int -+elan3mmu_checkperm (ELAN3MMU *elan3mmu, -+ E3_Addr addr, -+ u_int len, -+ u_int access) -+{ -+ E3_Addr raddr = (((E3_Addr) addr) & PAGEMASK); -+ u_int rtop = ((addr + len - 1) & PAGEMASK) + PAGEOFFSET; -+ u_int rsize = rtop - raddr + 1; -+ ELAN3MMU_RGN *rgn; -+ -+ HAT_PRINTF3 (1, "elan3mmu_checkperm: user %08x len %x access %x\n", addr, len, access); -+ -+ -+ if ((raddr + rsize - 1) < raddr) -+ return (ENOMEM); -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_elan (elan3mmu, raddr)) == (ELAN3MMU_RGN *) NULL) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (ENOMEM); -+ } -+ else -+ { -+ register int ssize; -+ -+ for (; rsize != 0; rsize -= ssize, raddr += ssize) -+ { -+ if (raddr > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ rgn = rgn->rgn_enext; -+ -+ if (rgn == NULL || raddr != rgn->rgn_ebase) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (ENOMEM); -+ } -+ } -+ if ((raddr + rsize - 1) > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ else -+ ssize = rsize; -+ -+ HAT_PRINTF4 (1, "elan3mmu_checkperm : rgn %x -> %x perm %x access %x\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len, rgn->rgn_perm, access); -+ -+ if (ELAN3_INCOMPAT_ACCESS (rgn->rgn_perm, access)) -+ { -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ return (EACCES); -+ } -+ } -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (ESUCCESS); -+} -+ -+caddr_t -+elan3mmu_mainaddr (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3MMU_RGN *rgn; -+ caddr_t raddr; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_elan (elan3mmu, addr)) == (ELAN3MMU_RGN *) NULL) -+ raddr = NULL; -+ else -+ raddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (raddr); -+} -+ -+E3_Addr -+elan3mmu_elanaddr (ELAN3MMU *elan3mmu, caddr_t addr) -+{ -+ ELAN3MMU_RGN *rgn; -+ E3_Addr raddr; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ if ((rgn = elan3mmu_rgnat_main (elan3mmu, addr)) == (ELAN3MMU_RGN *) NULL) -+ raddr = (E3_Addr) 0; -+ else -+ raddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+ -+ return (raddr); -+} -+ -+void -+elan3mmu_displayrgns(ELAN3MMU *elan3mmu) -+{ -+ ELAN3MMU_RGN *rgn; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ HAT_PRINTF0 (1, "elan3mmu_displayrgns: main regions\n"); -+ for (rgn = elan3mmu->elan3mmu_mrgns; rgn; rgn = (rgn->rgn_mnext == elan3mmu->elan3mmu_mrgns) ? NULL : rgn->rgn_mnext) -+ HAT_PRINTF5 (1, " RGN %p ebase %08x mbase %p len %08x perm %08x\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len, rgn->rgn_perm); -+ HAT_PRINTF0 (1, "elan3mmu_displayrgns: elan regions\n"); -+ for (rgn = elan3mmu->elan3mmu_ergns; rgn; rgn = (rgn->rgn_enext == elan3mmu->elan3mmu_ergns) ? NULL : rgn->rgn_enext) -+ HAT_PRINTF5 (1, " RGN %p ebase %08x mbase %p len %08x perm %08x\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len, rgn->rgn_perm); -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/*============================================================================*/ -+/* Private functions */ -+#define ELAN3_PTE_IS_VALID(ptbl, pte) \ -+ ((ptbl->ptbl_flags & PTBL_KERNEL) ? \ -+ (pte&(~ELAN3_PTE_REF)) != elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu) : \ -+ ELAN3_PTE_VALID(pte)) -+ -+void -+elan3mmu_expand (ELAN3MMU *elan3mmu, E3_Addr addr, int len, int level, int attr) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ spinlock_t *lock; -+ u_int span; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_expand: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + len); -+ -+ for ( ; len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ -+ switch (level) -+ { -+ case PTBL_LEVEL_3: -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ break; -+ case PTBL_LEVEL_2: -+ span = MIN(len, ELAN3_L2_PTSIZE - ((E3_Addr) addr & ELAN3_L2_PTOFFSET)); -+ break; -+ default: -+ span = len; -+ break; -+ } -+ -+ if (pte != (sdramaddr_t) 0) -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+} -+ -+void -+elan3mmu_reserve (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *ptes) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ spinlock_t *lock; -+ u_int span; -+ int len; -+ int i; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_reserve: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + (npages << ELAN3_PAGE_SHIFT)); -+ -+ for (len = (npages << ELAN3_PAGE_SHIFT); len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptealloc (elan3mmu, addr, 3, &ptbl, &lock, 0, &flags); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ if (ptes != NULL) -+ { -+ for (i = 0; i < span; i += ELAN3_PAGE_SIZE, pte += ELAN3_PTE_SIZE) -+ *ptes++ = pte; -+ ptbl->ptbl_valid += (span >> ELAN3_PAGE_SHIFT); -+ -+ HAT_PRINTF4 (2, "elan3mmu_reserve: inc valid for level %d ptbl %p to %d (%d)\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid, (span >> ELAN3_PAGE_SHIFT)); -+ -+ } -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+} -+ -+void -+elan3mmu_release (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *ptes) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t pte; -+ ELAN3_PTE tpte; -+ spinlock_t *lock; -+ u_int span; -+ int len; -+ int i; -+ int level; -+ unsigned long flags; -+ -+ HAT_PRINTF3 (1, "elan3mmu_release: elan3mmu %p %08x to %08x\n", elan3mmu, -+ addr, addr + (npages << ELAN3_PAGE_SHIFT)); -+ -+ if (ptes == NULL) -+ return; -+ -+ tpte = elan3mmu_kernel_invalid_pte (elan3mmu); -+ -+ for (len = (npages << ELAN3_PAGE_SHIFT); len != 0; addr += span, len -= span) -+ { -+ /* as we asked for level 3 we know its a pte */ -+ pte = elan3mmu_ptefind(elan3mmu, addr, &level, &ptbl, &lock, &flags); -+ ASSERT (level == PTBL_LEVEL_3); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ -+ for (i = 0 ; i < span; i += ELAN3_PAGE_SIZE, pte += ELAN3_PTE_SIZE) -+ elan3_writepte (dev, pte, tpte); -+ ptbl->ptbl_valid -= (span >> ELAN3_PAGE_SHIFT); -+ -+ HAT_PRINTF3 (2, "elan3mmu_release: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+} -+ -+void -+elan3mmu_pteload (ELAN3MMU *elan3mmu, int level, E3_Addr addr, physaddr_t paddr, int perm, int attr) -+ -+{ -+ ELAN3_DEV *dev; -+ ELAN3_PTBL *ptbl; -+ spinlock_t *lock; -+ unsigned long flags; -+ ELAN3_PTE newpte; -+ ELAN3_PTE oldpte; -+ sdramaddr_t pte; -+ -+ ASSERT((level == PTBL_LEVEL_2) || (level == PTBL_LEVEL_3)); -+ -+ /* Generate the new pte which we're going to load */ -+ dev = elan3mmu->elan3mmu_dev; -+ -+ newpte = elan3mmu_phys_to_pte (dev, paddr, perm); -+ -+ if (attr & PTE_LOAD_BIG_ENDIAN) -+ newpte |= ELAN3_PTE_BIG_ENDIAN; -+ -+ HAT_PRINTF4 (1, "elan3mmu_pteload: elan3mmu %p level %d addr %x pte %llx\n", elan3mmu, level, addr, (long long) newpte); -+ HAT_PRINTF5 (1, "elan3mmu_pteload:%s%s%s perm=%d phys=%llx\n", -+ (newpte & ELAN3_PTE_LOCAL) ? " local" : "", -+ (newpte & ELAN3_PTE_64_BIT) ? " 64 bit" : "", -+ (newpte & ELAN3_PTE_BIG_ENDIAN) ? " big-endian" : " little-endian", -+ (u_int) (newpte & ELAN3_PTE_PERM_MASK) >> ELAN3_PTE_PERM_SHIFT, -+ (unsigned long long) (newpte & ELAN3_PTE_PFN_MASK)); -+ -+ if (level == PTBL_LEVEL_3) -+ pte = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ else -+ { -+ sdramaddr_t ptp = elan3mmu_ptealloc (elan3mmu, addr, level, &ptbl, &lock, attr, &flags); -+ -+ pte = elan3mmu_ptp2pte (elan3mmu, ptp, level); -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: level %d ptp at %lx => pte at %lx\n", level, ptp, pte); -+ } -+ -+ if (pte == (sdramaddr_t) 0) -+ { -+ ASSERT (level == PTBL_LEVEL_3 && (attr & (PTE_NO_SLEEP | PTE_NO_STEAL)) == (PTE_NO_SLEEP | PTE_NO_STEAL)); -+ return; -+ } -+ -+ ASSERT (ptbl->ptbl_elan3mmu == elan3mmu); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == level); -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ -+ oldpte = elan3_readpte (dev, pte); -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: modify pte at %lx from %llx to %llx\n", pte, (long long) oldpte, (long long) newpte); -+ -+ if (ELAN3_PTE_IS_VALID(ptbl, oldpte)) -+ { -+ ELAN3MMU_STAT(ptereload); -+ -+ ASSERT ((newpte & ~((E3_uint64)ELAN3_PTE_PERM_MASK | ELAN3_RM_MASK)) == (oldpte & ~((E3_uint64)ELAN3_PTE_PERM_MASK | ELAN3_RM_MASK))); -+ -+ if ((newpte & ~ELAN3_RM_MASK) != (oldpte & ~ELAN3_RM_MASK)) -+ { -+ /* We're modifying a valid translation, it must be mapping the same page */ -+ /* so we use elan3_modifypte to not affect the referenced and modified bits */ -+ elan3_modifypte (dev, pte, newpte); -+ -+ -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+ } -+ } -+ else -+ { -+ ELAN3MMU_STAT(pteload); -+ -+ ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteload: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_pteload: write pte %lx to %llx\n", pte, (long long) newpte); -+ -+ elan3_writepte (dev, pte, newpte); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) -+ ElanFlushTlb (elan3mmu->elan3mmu_dev); -+ -+ } -+ -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+} -+ -+void -+elan3mmu_unload (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, int attr) -+{ -+ ELAN3_PTBL *ptbl; -+ sdramaddr_t ptp; -+ spinlock_t *lock; -+ int level; -+ u_int span; -+ unsigned long flags; -+ -+ HAT_PRINTF3(1, "elan3mmu_unload (elan3mmu %p addr %x -> %x)\n", elan3mmu, addr, addr+len-1); -+ -+ for (; len != 0; addr += span, len -= span) -+ { -+ ptp = elan3mmu_ptefind(elan3mmu, addr, &level, &ptbl, &lock, &flags); -+ -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ if (ptp != (sdramaddr_t) 0) -+ { -+ HAT_PRINTF2 (2, "elan3mmu_unload: unload [%x,%x]\n", addr, addr + span); -+ -+ if ( level == PTBL_LEVEL_3 ) -+ elan3mmu_unload_loop (elan3mmu, ptbl, ptp - PTBL_TO_PTADDR(ptbl), span >> ELAN3_PAGE_SHIFT, attr); -+ else -+ { -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *lXptbl; -+ ELAN3_PTP tptp; -+ int idx; -+ -+ tptp = elan3_readptp (elan3mmu->elan3mmu_dev, ptp); -+ -+ ASSERT (ELAN3_PTP_TYPE(tptp) == ELAN3_ET_PTE); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tptp); -+ idx = (PTP_TO_PT_PADDR(tptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ if ( level == PTBL_LEVEL_1) -+ span = MIN(len, ELAN3_L2_PTSIZE - ((E3_Addr) addr & ELAN3_L2_PTOFFSET)); -+ else -+ span = MIN(len, ELAN3_L3_PTSIZE - ((E3_Addr) addr & ELAN3_L3_PTOFFSET)); -+ -+ /* invalidate the ptp. */ -+ elan3_writeptp (dev, ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_unload: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+ } -+ elan3mmu_unlock_ptbl (ptbl, lock, flags); -+ } -+ } -+} -+ -+static void -+elan3mmu_unload_loop (ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl, int first_valid, int nptes, int flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ sdramaddr_t pte; -+ ELAN3_PTE tpte; -+ int last_valid = first_valid + nptes; -+ int i; -+ -+ HAT_PRINTF3 (1, "elan3mmu_unloadloop: ptbl %p entries [%d->%d]\n", ptbl, first_valid, last_valid); -+ -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ pte = PTBL_TO_PTADDR(ptbl) + first_valid; -+ -+ for (i = first_valid; i < last_valid; i++, pte += ELAN3_PTE_SIZE) -+ { -+ if (ptbl->ptbl_valid == 0) -+ break; -+ -+ tpte = elan3_readpte (dev, pte); -+ if (! ELAN3_PTE_IS_VALID(ptbl, tpte)) -+ continue; -+ -+ elan3mmu_pteunload (ptbl, pte, flags, NO_MLIST_LOCK); -+ } -+} -+ -+void -+elan3mmu_pteunload (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock) -+{ -+ ELAN3_DEV *dev = ptbl->ptbl_elan3mmu->elan3mmu_dev; -+ ELAN3_PTE tpte; -+ -+ ASSERT (PTBL_LEVEL (ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ ASSERT (PTBL_IS_LOCKED (ptbl->ptbl_flags)); -+ -+ HAT_PRINTF2 (1, "elan3mmu_pteunload: ptbl %p pte %lx\n", ptbl, pte); -+ -+ ELAN3MMU_STAT (pteunload); -+ -+ elan3_invalidatepte (dev, pte); -+ -+ if (! (flags & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ tpte = ELAN3_INVALID_PTE; -+ elan3_writepte (dev, pte, tpte); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) -+ { -+ tpte = elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu); -+ -+ elan3_writepte (dev, pte, tpte); -+ } -+ -+ ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_pteunload: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl->ptbl_flags), ptbl, ptbl->ptbl_valid); -+ -+} -+ -+void -+elan3mmu_ptesync (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock) -+{ -+ -+} -+ -+/* -+ * Create more page tables at a given level for this Elan. -+ */ -+static ELAN3_PTBL * -+elan3mmu_create_ptbls (ELAN3_DEV *dev, int level, int attr, int keep) -+{ -+ sdramaddr_t pts; -+ ELAN3_PTBL *ptbl; -+ ELAN3_PTBL *first; -+ ELAN3_PTBL *last; -+ ELAN3_PTBL_GR *ptg; -+ register int i; -+ register int inc; -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: create level %d ptbls\n", level); -+ -+ pts = elan3_sdram_alloc (dev, PTBL_GROUP_SIZE); -+ if (pts == (sdramaddr_t) 0) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_create_ptbls: cannot map elan pages\n"); -+ -+ ELAN3MMU_STAT (create_ptbl_failed); -+ return (NULL); -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: pts at %lx\n", pts); -+ -+ ALLOC_PTBL_GR (ptg, !(attr & PTE_NO_SLEEP)); /* Allocate the group of page tables */ -+ if (ptg == NULL) /* for this page */ -+ { -+ HAT_PRINTF0 (2, "elan3mmu_create_ptbls: cannot allocate page table group\n"); -+ -+ elan3_sdram_free (dev, pts, PTBL_GROUP_SIZE); -+ -+ ELAN3MMU_STAT (create_ptbl_failed); -+ return (NULL); -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: ptg is %p\n", ptg); -+ -+ ElanSetPtblGr (dev, pts, ptg); -+ -+ HAT_PRINTF4 (2, "elan3mmu_create_ptbls: zeroing %d bytes at %lx, %d bytes at %p\n", -+ PTBL_GROUP_SIZE, pts, (int) sizeof (ELAN3_PTBL_GR), ptg); -+ -+#ifndef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, pts, PTBL_GROUP_SIZE); /* Ensure that all PTEs/PTPs are invalid */ -+#endif -+ bzero ((caddr_t) ptg, sizeof (ELAN3_PTBL_GR)); -+ -+ ptg->pg_addr = pts; -+ ptg->pg_level = level; -+ -+ ptbl = ptg->pg_ptbls; /* Initialise the index in all page tables */ -+ for (i = 0; i < PTBLS_PER_GROUP_MAX; i++) -+ { -+ ptbl->ptbl_index = (u_char) i; -+ ptbl->ptbl_next = (ELAN3_PTBL *) 0xdeaddead; -+ ptbl++; -+ } -+ -+ switch (level) /* Determine the number of ptbls we can */ -+ { /* allocate from this page, by jumping */ -+ case PTBL_LEVEL_X: inc = PTBLS_PER_PTBL_LX; break; /* multiples of the smallest. */ -+ case PTBL_LEVEL_1: inc = PTBLS_PER_PTBL_L1; break; -+ case PTBL_LEVEL_2: inc = PTBLS_PER_PTBL_L2; break; -+ case PTBL_LEVEL_3: inc = PTBLS_PER_PTBL_L3; break; -+ default: inc = PTBLS_PER_PTBL_L3; break; -+ } -+ -+ ptbl = ptg->pg_ptbls; /* Chain them together */ -+ for (i = 0; i < PTBLS_PER_GROUP_MAX; i += inc, ptbl += inc) -+ ptbl->ptbl_next = ptbl + inc; -+ -+ first = ptg->pg_ptbls; /* Determine list of */ -+ last = first + PTBLS_PER_GROUP_MAX - inc; /* ptbls to add to free list */ -+ if (! keep) -+ ptbl = NULL; -+ else -+ { -+ ptbl = first; -+ first = first->ptbl_next; -+ } -+ -+ spin_lock (&dev->Level[level].PtblLock); -+ dev->Level[level].PtblTotal += PTBLS_PER_GROUP_MAX/inc; /* Increment the counts */ -+ dev->Level[level].PtblFreeCount += PTBLS_PER_GROUP_MAX/inc; -+ -+ ELAN3MMU_SET_STAT (num_ptbl_level[level], dev->Level[level].PtblTotal); -+ -+ if (keep) -+ dev->Level[level].PtblFreeCount--; -+ -+ last->ptbl_next = dev->Level[level].PtblFreeList; /* And add to free list */ -+ dev->Level[level].PtblFreeList = first; -+ spin_unlock (&dev->Level[level].PtblLock); -+ -+ spin_lock (&dev->PtblGroupLock); -+ ptg->pg_next = dev->Level[level].PtblGroupList; -+ dev->Level[level].PtblGroupList = ptg; -+ spin_unlock (&dev->PtblGroupLock); -+ -+ HAT_PRINTF1 (2, "elan3mmu_create_ptbls: returning ptbl %p\n", ptbl); -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_ta_to_ptbl (ELAN3MMU *elan3mmu, ELAN3_PTP *ptp) -+{ -+ E3_Addr ptpa = PTP_TO_PT_PADDR(*ptp); -+ ELAN3_PTBL_GR *pg = ElanGetPtblGr (elan3mmu->elan3mmu_dev, (sdramaddr_t)ptpa & ~(PTBL_GROUP_SIZE-1)); -+ -+ return (pg->pg_ptbls + ((ptpa - pg->pg_addr) >> ELAN3_PT_SHIFT)); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_lXptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ if (dev->Level[PTBL_LEVEL_X].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_X].PtblFreeList; -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: found ptbl %p on free list\n", ptbl); -+ -+ dev->Level[PTBL_LEVEL_X].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_X].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_X, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: created level X ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_lXptbl: not allowed to steal ptbl for use at level 2\n"); -+ return NULL; -+ } -+ -+ ELAN3MMU_STAT(lX_alloc_l3); -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_lXptbl: stolen level3 ptbl %p used as level 2\n", ptbl); -+ } -+ -+ ptbl->ptbl_elan3mmu = elan3mmu; -+ ptbl->ptbl_base = 0; -+ ptbl->ptbl_parent = 0; -+ ptbl->ptbl_flags = PTBL_LEVEL_X | PTBL_ALLOCED; -+ -+ HAT_PRINTF2 (2, "elan3mmu_alloc_lXptbl: ptbl %p dev %p\n", ptbl, dev); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zero_sdarm (dev, PTBL_TO_PTADDR(ptbl), ELAN3_LX_ENTRIES*ELAN3_PTE_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, int *idx) -+{ -+ ELAN3_PTBL * ptbl_ptr; -+ int index; -+ -+ /* lock whilst looking for space */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* walk the lXptbl list */ -+ ptbl_ptr = elan3mmu->elan3mmu_lXptbl; -+ while ( ptbl_ptr != NULL ) -+ { -+ /* does this ptlb have any free ones */ -+ if ( (index = ptbl_ptr->ptbl_valid) < ELAN3_LX_ENTRIES) -+ { -+ /* better to search from valid count as its likly to be free */ -+ index = ptbl_ptr->ptbl_valid; -+ do { -+ if ((ptbl_ptr->ptbl_base & (1 << index)) == 0) -+ goto found; -+ -+ /* move index on and wrap back to start if needed */ -+ if ((++index) == ELAN3_LX_ENTRIES) -+ index = 0; -+ } while (index != ptbl_ptr->ptbl_valid); -+ -+ panic ("elan3mmu_alloc_pte: has ptbl valid < 32 when but no free pte's"); -+ } -+ ptbl_ptr = ptbl_ptr->ptbl_parent; -+ } -+ -+ /* unlock so we can create space */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* if create some more */ -+ ptbl_ptr = elan3mmu_alloc_lXptbl(dev, 0, elan3mmu); -+ -+ /* get the lock again */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ /* add to front of list as its obviously got free ones on it */ -+ ptbl_ptr->ptbl_parent = elan3mmu->elan3mmu_lXptbl; -+ elan3mmu->elan3mmu_lXptbl = ptbl_ptr; -+ -+ /* grap the first one */ -+ index = 0; -+ -+ found: -+ ptbl_ptr->ptbl_base |= (1 << index); -+ ptbl_ptr->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_pte: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl_ptr->ptbl_flags), ptbl_ptr, ptbl_ptr->ptbl_valid); -+ -+ /* release the loc and return it */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ *idx = index; -+ return (ptbl_ptr); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l1ptbl (ELAN3_DEV *dev, int attr, ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i,j; -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ if (dev->Level[PTBL_LEVEL_1].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_1].PtblFreeList; -+ dev->Level[PTBL_LEVEL_1].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_1].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ -+ if (ptbl == NULL) -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_1, attr, 1); -+ -+ if (ptbl == NULL) -+ panic ("elan3mmu_alloc_l1ptbl: cannot alloc ptbl"); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L1; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (j); -+ p->ptbl_flags = PTBL_LEVEL_1 | PTBL_GROUPED; -+ p->ptbl_parent = NULL; -+ -+ j += L1_VA_PER_PTBL; -+ } -+ -+ /* Now mark the real page table as allocated */ -+ /* level 1 ptbls are returned unlocked */ -+ ptbl->ptbl_flags = PTBL_LEVEL_1 | PTBL_ALLOCED; -+ -+ HAT_PRINTF2 (2, "elan3mmu_alloc_l1ptbl: ptbl %p dev %p\n", ptbl, dev); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L1_ENTRIES*ELAN3_PTP_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l2ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, E3_Addr base, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i; -+ int j; -+ unsigned long ptbl_flags; -+ -+ spin_lock_irqsave (&dev->Level[PTBL_LEVEL_2].PtblLock, ptbl_flags); -+ if (dev->Level[PTBL_LEVEL_2].PtblFreeList) -+ { -+ ptbl = dev->Level[PTBL_LEVEL_2].PtblFreeList; -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: found ptbl %p on free list\n", ptbl); -+ -+ dev->Level[PTBL_LEVEL_2].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_2].PtblFreeCount--; -+ } -+ spin_unlock_irqrestore (&dev->Level[PTBL_LEVEL_2].PtblLock, ptbl_flags); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_2, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: created level 2 ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_l2ptbl: not allowted to steal ptbl for use at level 2\n"); -+ return (NULL); -+ } -+ -+ ELAN3MMU_STAT(l2_alloc_l3); -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l2ptbl: stolen level3 ptbl %p used as level 2\n", ptbl); -+ } -+ -+ *plock = elan3mmu_ptbl_to_lock (PTBL_LEVEL_2, ptbl); -+ spin_lock_irqsave (*plock, *flags); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L2; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (base + j); -+ p->ptbl_flags = PTBL_LEVEL_2 | PTBL_GROUPED; -+ p->ptbl_parent = parent; -+ -+ j += L2_VA_PER_PTBL; -+ } -+ -+ ptbl->ptbl_flags = PTBL_LEVEL_2 | PTBL_ALLOCED | PTBL_LOCKED; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_l2ptbl: ptbl %p dev %p base %x\n", ptbl, dev, base); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zero_sdarm (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L2_ENTRIES*ELAN3_PTP_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_alloc_l3ptbl (ELAN3_DEV *dev, int attr, ELAN3_PTBL *parent, ELAN3MMU *elan3mmu, E3_Addr base, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_PTBL *ptbl = NULL; -+ ELAN3_PTBL *p; -+ int i; -+ int j; -+ unsigned long ptbl_flags; -+ -+ spin_lock_irqsave (&dev->Level[PTBL_LEVEL_3].PtblLock, ptbl_flags); -+ if (dev->Level[PTBL_LEVEL_3].PtblFreeList) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: found ptbl %p on free list\n", ptbl); -+ -+ ptbl = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount--; -+ } -+ spin_unlock_irqrestore (&dev->Level[PTBL_LEVEL_3].PtblLock, ptbl_flags); -+ -+ if (ptbl == NULL) -+ { -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_3, attr, 1); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: created level 3 ptbl %p\n", ptbl); -+ } -+ -+ if (ptbl == NULL) -+ { -+ if ((attr & PTE_NO_STEAL)) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_alloc_l3ptbl: not allowed to steal ptbl for use at level 3\n"); -+ return (NULL); -+ } -+ -+ ptbl = elan3mmu_steal_l3ptbl (dev, attr); -+ -+ HAT_PRINTF1 (2, "elan3mmu_alloc_l3ptbl: stolen level3 ptbl %p\n", ptbl); -+ } -+ -+ *plock = elan3mmu_ptbl_to_lock (PTBL_LEVEL_3, ptbl); -+ spin_lock_irqsave (*plock,*flags); -+ -+ for (p = ptbl, j = i = 0; i < PTBLS_PER_PTBL_L3; i++, p++) -+ { -+ p->ptbl_elan3mmu = elan3mmu; -+ p->ptbl_base = VA2BASE (base + j); -+ p->ptbl_flags = PTBL_LEVEL_3 | PTBL_GROUPED; -+ p->ptbl_parent = parent; -+ -+ j += L3_VA_PER_PTBL; -+ } -+ -+ ptbl->ptbl_flags = PTBL_LEVEL_3 | PTBL_ALLOCED | PTBL_LOCKED; -+ -+ HAT_PRINTF3 (2, "elan3mmu_alloc_l3ptbl: ptbl %p dev %p base %x\n", ptbl, dev, base); -+ -+#ifdef zero_all_ptbls -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L3_ENTRIES*ELAN3_PTE_SIZE); -+#endif -+ -+ return (ptbl); -+} -+ -+void -+elan3mmu_free_pte (ELAN3_DEV *dev, ELAN3MMU *elan3mmu, ELAN3_PTBL *ptbl_ptr, int idx) -+{ -+ sdramaddr_t pte = PTBL_TO_PTADDR (ptbl_ptr) | (idx * sizeof (ELAN3_PTE)); -+ ELAN3_PTE tpte = ELAN3_INVALID_PTE; -+ ELAN3_PTBL *prev; -+ -+ /* ensure that the pte is invalid when free */ -+ elan3_writepte (dev, pte, tpte); -+ -+ /* lock whilst removing */ -+ spin_lock (&elan3mmu->elan3mmu_lXptbl_lock); -+ -+ HAT_PRINTF4 (2, "elan3mmu_free_pte idx %d ptbl_ptr %p ptbl_base %x ptbl_ptr->ptbl_valid %d \n", -+ idx, ptbl_ptr, ptbl_ptr->ptbl_base, ptbl_ptr->ptbl_valid); -+ /* make sure it was set */ -+ ASSERT ( ptbl_ptr->ptbl_base & (1 << idx) ); -+ ASSERT ( ptbl_ptr->ptbl_valid > 0 ); -+ -+ ptbl_ptr->ptbl_base &= ~(1 << idx); -+ ptbl_ptr->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_free_pte: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(ptbl_ptr->ptbl_flags), ptbl_ptr, ptbl_ptr->ptbl_valid); -+ -+ /* was that the last one on this page */ -+ if ( ! ptbl_ptr->ptbl_valid ) -+ { -+ /* so no bits should be set then */ -+ ASSERT ( ptbl_ptr->ptbl_base == 0 ); -+ -+ /* is this the first page ?? */ -+ if ( elan3mmu->elan3mmu_lXptbl == ptbl_ptr ) -+ { -+ /* make the list start at the second element */ -+ elan3mmu->elan3mmu_lXptbl = ptbl_ptr->ptbl_parent; -+ -+ /* put ptbl back on free list */ -+ elan3mmu_free_lXptbl(dev, ptbl_ptr); -+ -+ /* unlock and return */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ return ; -+ } -+ -+ /* scan thro list looking for this page */ -+ prev = elan3mmu->elan3mmu_lXptbl; -+ while ( prev->ptbl_parent != NULL ) -+ { -+ if ( prev->ptbl_parent == ptbl_ptr ) /* its the next one */ -+ { -+ /* remove element from chain */ -+ prev->ptbl_parent = ptbl_ptr->ptbl_parent; -+ -+ /* put ptbl back on free list */ -+ elan3mmu_free_lXptbl(dev, ptbl_ptr); -+ -+ /* unlock and return */ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+ return ; -+ } -+ prev = prev->ptbl_parent; -+ } -+ -+ panic ("elan3mmu_free_pte: failed to find ptbl in chain"); -+ /* NOTREACHED */ -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lXptbl_lock); -+} -+ -+void -+elan3mmu_free_lXptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl) -+{ -+ ELAN3_PTBL_GR *ptg; -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_lXptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_X); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ ptbl->ptbl_flags = 0; -+ -+ ptg = PTBL_TO_GR(ptbl); -+ -+ if (ptg->pg_level == PTBL_LEVEL_3) -+ { -+ ELAN3MMU_STAT(lX_freed_l3); -+ -+ HAT_PRINTF1 (2, "elan3mmu_free_lXptbl: freeing stolen level 3 ptbl %p\n", ptbl); -+ -+ /* this was really a level 3 ptbl which we had to steal */ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ } -+ else -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_X].PtblFreeList; -+ dev->Level[PTBL_LEVEL_X].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_X].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_X].PtblLock); -+ } -+} -+ -+void -+elan3mmu_free_l1ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ HAT_PRINTF3 (2, "elan3mmu_free_l1ptbl: dev %p ptbl %p ptbl->ptbl_valid %x \n", dev, ptbl, ptbl->ptbl_valid); -+ -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_1); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l1ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_1].PtblFreeList; -+ dev->Level[PTBL_LEVEL_1].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_1].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_1].PtblLock); -+ -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_free_l2ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ELAN3_PTBL_GR *ptg; -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l2ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ ASSERT (PTBL_IS_LOCKED(ptbl->ptbl_flags)); -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ ptg = PTBL_TO_GR(ptbl); -+ -+ if (ptg->pg_level == PTBL_LEVEL_3) -+ { -+ ELAN3MMU_STAT(l2_freed_l3); -+ -+ HAT_PRINTF1 (2, "elan3mmu_free_l2ptbl: freeing stolen level 3 ptbl %p\n", ptbl); -+ -+ /* this was really a level 3 ptbl which we had to steal */ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ } -+ else -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_2].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_2].PtblFreeList; -+ dev->Level[PTBL_LEVEL_2].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_2].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_2].PtblLock); -+ } -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_free_l3ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ASSERT (PTBL_IS_LOCKED(ptbl->ptbl_flags)); -+ ASSERT (ptbl->ptbl_flags & PTBL_ALLOCED); -+ ASSERT ((ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ ASSERT (PTBL_LEVEL(ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ ASSERT (ptbl->ptbl_valid == 0); -+ -+ HAT_PRINTF2 (2, "elan3mmu_free_l3ptbl: dev %p ptbl %p\n", dev, ptbl); -+ -+ if (ptbl->ptbl_flags & PTBL_KERNEL) /* if the ptbl has been used by the kernel */ -+ { /* then zero all the pte's, since they will */ -+ elan3_sdram_zeroq_sdram (dev, PTBL_TO_PTADDR(ptbl), ELAN3_L3_ENTRIES*ELAN3_PTE_SIZE); -+ } -+ -+ ptbl->ptbl_flags = 0; -+ spin_unlock (lock); -+ -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl->ptbl_next = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount++; -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ -+ local_irq_restore (flags); -+} -+ -+void -+elan3mmu_kernel_l3ptbl (ELAN3_PTBL *ptbl) -+{ -+ ELAN3_DEV *dev = ptbl->ptbl_elan3mmu->elan3mmu_dev; -+ sdramaddr_t pte = PTBL_TO_PTADDR(ptbl); -+ ELAN3_PTE tpte = elan3mmu_kernel_invalid_pte(ptbl->ptbl_elan3mmu); -+ int i; -+ -+ ptbl->ptbl_flags |= PTBL_KERNEL; -+ for (i = 0; i < ELAN3_L3_ENTRIES; i++, pte += ELAN3_PTE_SIZE) -+ { -+ elan3_writepte (dev, pte, tpte); -+ } -+} -+ -+#define PTBL_CAN_STEAL(flag) (((flag) & (PTBL_KERNEL|PTBL_KEEP)) == 0 && (((flag) & PTBL_ALLOCED) && PTBL_LEVEL(flag) == PTBL_LEVEL_3)) -+#define PTBL_MAY_STEAL(flag) (((flag) & (PTBL_KERNEL|PTBL_KEEP|PTBL_LOCKED)) == 0 && (((flag) & PTBL_ALLOCED) && PTBL_LEVEL(flag) == PTBL_LEVEL_3)) -+ -+static int -+elan3mmu_steal_this_ptbl (ELAN3_DEV *dev, ELAN3_PTBL *l3ptbl) -+{ -+ ELAN3_PTBL *l2ptbl = l3ptbl->ptbl_parent; -+ E3_Addr l2addr = BASE2VA(l2ptbl); -+ E3_Addr l3addr = BASE2VA(l3ptbl); -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ sdramaddr_t l2ptp; -+ spinlock_t *l2lock; -+ unsigned long l2flags; -+ -+ HAT_PRINTF5 (1, "elan3mmu_steal_this_ptbl: l3ptbl %p (%x) l2ptbl %p (%x) l2addr %x\n", -+ l3ptbl, l3ptbl->ptbl_flags, l2ptbl, l2ptbl->ptbl_flags, l2addr); -+ -+ if (PTBL_CAN_STEAL (l3ptbl->ptbl_flags) && -+ elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_NOWAIT, l3ptbl->ptbl_elan3mmu, l2addr, PTBL_LEVEL_2, &l2lock, &l2flags) == LK_PTBL_OK) -+ { -+ ELAN3MMU_STAT(stolen_ptbls); -+ -+ /* Locked both L3 and L2 page tables. */ -+ l2ptp = PTBL_TO_PTADDR (l2ptbl) + ELAN3_L2_INDEX(l3addr)*ELAN3_PTP_SIZE; -+ -+ /* detach the level 3 page table */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ ElanFlushTlb (dev); -+ -+ l2ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_steal_this_ptbl: dec valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_unload_loop (l3ptbl->ptbl_elan3mmu, l3ptbl, 0, ELAN3_L3_ENTRIES, PTE_UNLOAD_NOFLUSH); -+ -+ ASSERT (l3ptbl->ptbl_valid == 0); -+ -+ l3ptbl->ptbl_flags = 0; -+ return (1); -+ } -+ return (0); -+} -+ -+static ELAN3_PTBL * -+elan3mmu_steal_l3ptbl (ELAN3_DEV *dev, int attr) -+{ -+ ELAN3_PTBL_GR *ptg; -+ ELAN3_PTBL *ptbl; -+ spinlock_t *lock; -+ unsigned long group_flags; -+ unsigned long ptbl_flags; -+ register int i; -+ -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: attr %x\n", attr); -+ -+ spin_lock_irqsave (&dev->PtblGroupLock, group_flags); -+ -+ ptg = dev->Level3PtblGroupHand; -+ -+ if (ptg == NULL) -+ ptg = dev->Level[PTBL_LEVEL_3].PtblGroupList; -+ -+ for (;;) -+ { -+ while (ptg) -+ { -+ for (i = 0, ptbl = ptg->pg_ptbls; i < PTBLS_PER_GROUP_MAX; i++, ptbl++) -+ { -+ if (PTBL_MAY_STEAL (ptbl->ptbl_flags) && -+ elan3mmu_lock_this_ptbl (ptbl, LK_PTBL_NOWAIT, &lock, &ptbl_flags) == LK_PTBL_OK) -+ { -+ if (elan3mmu_steal_this_ptbl (dev, ptbl )) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: stolen ptbl %p\n", ptbl); -+ -+ elan3mmu_unlock_ptbl (ptbl, lock,ptbl_flags); -+ -+ dev->Level3PtblGroupHand = ptg->pg_next; -+ -+ spin_unlock_irqrestore (&dev->PtblGroupLock, group_flags); -+ -+ return (ptbl); -+ } -+ elan3mmu_unlock_ptbl (ptbl, lock, ptbl_flags); -+ } -+ } -+ ptg = ptg->pg_next; -+ } -+ -+ if (dev->Level[PTBL_LEVEL_3].PtblFreeList) -+ { -+ spin_lock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ ptbl = dev->Level[PTBL_LEVEL_3].PtblFreeList; -+ if (ptbl != NULL) -+ { -+ dev->Level[PTBL_LEVEL_3].PtblFreeList = ptbl->ptbl_next; -+ dev->Level[PTBL_LEVEL_3].PtblFreeCount--; -+ } -+ spin_unlock (&dev->Level[PTBL_LEVEL_3].PtblLock); -+ -+ if (ptbl != NULL) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: found ptbl %p on free list\n", ptbl); -+ break; -+ } -+ } -+ -+ ptbl = elan3mmu_create_ptbls (dev, PTBL_LEVEL_3, attr, 1); -+ -+ if (ptbl != NULL) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_steal_l3ptbl: created new ptbl %p\n", ptbl); -+ break; -+ } -+ -+ HAT_PRINTF0 (1, "elan3mmu_steal_l3ptbl: cannot find a ptbl, retrying\n"); -+ ptg = dev->Level[PTBL_LEVEL_3].PtblGroupList; -+ } -+ -+ spin_unlock (&dev->PtblGroupLock); -+ return (ptbl); -+} -+ -+sdramaddr_t -+elan3mmu_ptefind (ELAN3MMU *elan3mmu, E3_Addr addr, int *level, -+ ELAN3_PTBL **pptbl, spinlock_t **plock, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ sdramaddr_t l1ptp; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ spinlock_t *l1lock; -+ spinlock_t *l2lock; -+ spinlock_t *l3lock; -+ unsigned long l1flags; -+ unsigned long l2flags; -+ unsigned long l3flags; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptefind: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ *level = 0; -+ -+ if (l1ptbl == NULL) -+ return ((sdramaddr_t) NULL); -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+retryl1: -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptefind: l1ptbl %p l1ptp %lx l1base %x : tl1ptp %x\n", l1ptbl, l1ptp, l1base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ *level = 1; -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return with l1lock */ -+ return (l1ptp); -+ -+ case ELAN3_ET_INVALID: -+ return ((sdramaddr_t) 0); -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_ptefind: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptefind: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptefind: l2ptbl %p l2ptp %lx l2base %x : tl2ptp %x\n", l2ptbl, l2ptp, l2base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ switch (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ *level = 2; -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return with l2lock */ -+ return (l2ptp); -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptefind: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptefind: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ case ELAN3_ET_INVALID: -+ return ((sdramaddr_t) 0); -+ -+ case ELAN3_ET_PTP: -+ break; -+ default: -+ panic ("elan3mmu_ptefind: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptefind: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptefind: l3ptbl %p l3pte %lx\n", l3ptbl, l3pte); -+ -+ switch (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags)) -+ { -+ case LK_PTBL_OK: -+ *level = 3; -+ *plock = l3lock; -+ *pptbl = l3ptbl; -+ *flags = l3flags; -+ -+ return (l3pte); -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptefind: l3 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptefind: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_elan3mmu, l3ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptefind: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ /* NOTREACHED */ -+ return ((sdramaddr_t) 0); -+} -+ -+sdramaddr_t -+elan3mmu_ptp2pte (ELAN3MMU *elan3mmu, sdramaddr_t ptp, int level) -+{ -+ ELAN3_PTP tptp = elan3_readptp (elan3mmu->elan3mmu_dev, ptp); -+ -+ ASSERT (level != 3 && ELAN3_PTP_TYPE(tptp) == ELAN3_ET_PTE); -+ -+ return PTP_TO_PT_PADDR(tptp); -+} -+ -+sdramaddr_t -+elan3mmu_ptealloc (ELAN3MMU *elan3mmu, E3_Addr addr, int level, -+ ELAN3_PTBL **pptbl, spinlock_t **plock, int attr, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ sdramaddr_t l1ptp; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ spinlock_t *l1lock; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ spinlock_t *l2lock; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ E3_Addr l3base; -+ spinlock_t *l3lock; -+ -+ unsigned long l1flags; -+ unsigned long l2flags; -+ unsigned long l3flags; -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ if (l1ptbl == NULL) -+ return ((sdramaddr_t) 0); -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+retryl1: -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF5 (2, "elan3mmu_ptealloc: l1ptbl %p 1ptp %lx l1base %x (%x) : tl1ptp %x\n", -+ l1ptbl, l1ptp, l1base, l1ptbl->ptbl_base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ if (level == PTBL_LEVEL_1) -+ { -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return holding l1lock */ -+ return (l1ptp); -+ } -+ panic ("elan3mmu_ptealloc: found pte in level 1 page table"); -+ /* NOTREACHED */ -+ -+ case ELAN3_ET_PTP: -+ if (level == PTBL_LEVEL_1) -+ panic ("elan3mmu_ptealloc: found PTP when loading a level 1 PTE\n"); -+ break; -+ -+ case ELAN3_ET_INVALID: -+ if (level == PTBL_LEVEL_1) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ /* raced with someone else, whose got there first */ -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ /* drop the l1lock and retry */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ goto retryl1; -+ } -+ -+ tl1ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ *pptbl = l1ptbl; -+ *plock = l1lock; -+ *flags = l1flags; -+ -+ /* return holding l1lock */ -+ return (l1ptp); -+ } -+ -+ if (level == PTBL_LEVEL_2) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if ((l2ptbl = elan3mmu_alloc_l2ptbl (dev, attr, l1ptbl, elan3mmu, ELAN3_L2_BASE(addr), &l2lock, &l2flags)) == NULL) -+ { -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ return ((sdramaddr_t) 0); -+ } -+ -+ /* Connect l2ptbl to the new LX pte */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr) * ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ /* Now need to lock the l1 ptbl */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l2 ptbl/lx pte\n"); -+ -+ tl2ptp = ELAN3_INVALID_PTP; -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ elan3mmu_free_l2ptbl (dev, l2ptbl, l2lock, l2flags); -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ -+ goto retryl1; -+ } -+ -+ /* Now have L1 locked, so install the L2 ptbl */ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl1ptp = PTBL_TO_PTADDR(l2ptbl) | ELAN3_ET_PTP; -+ l1ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l1ptp %lx to %x\n", l1ptp, tl1ptp); -+ -+ /* unordered unlock - lock l1ptbl, lock l2ptbl, unlock l1ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l2flags); /* need to unlock with the l2flags to keep irq order correct */ -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l1flags; /* return the l1flags here as we have released the l2flags already to keep order */ -+ -+ /* return holding l2lock */ -+ return (l2ptp); -+ } -+ -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: allocating level 2 and level 3 page tables\n"); -+ -+ /* Allocate a level 2 and level 3 page table and link them together */ -+ if ((l2ptbl = elan3mmu_alloc_l2ptbl (dev, attr, l1ptbl, elan3mmu, ELAN3_L2_BASE(addr), &l2lock, &l2flags)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if ((l3ptbl = elan3mmu_alloc_l3ptbl (dev, attr | PTE_NO_SLEEP, l2ptbl, elan3mmu, ELAN3_L3_BASE(addr), &l3lock, &l3flags)) == NULL) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ return ((sdramaddr_t) 0); -+ } -+ -+ ASSERT (PTBL_IS_LOCKED (l2ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: l2ptbl %p (%x,%x) l3ptbl %p (%x,%x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_base, -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_base); -+ -+ if (CTXT_IS_KERNEL (elan3mmu->elan3mmu_ctxt)) -+ { -+ l2ptbl->ptbl_flags |= PTBL_KERNEL; -+ elan3mmu_kernel_l3ptbl (l3ptbl); -+ } -+ -+ /* -+ * Connect L3 ptbl to the new L2 ptbl. -+ */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr) * ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(l3ptbl) | ELAN3_ET_PTP; -+ -+ l2ptbl->ptbl_valid = 1; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: set valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ /* -+ * Now need to lock the l1 ptbl - to maintain lock ordering -+ * we set the PTBL_KEEP bit to stop the l3 ptbl from being -+ * stolen and drop the locks in the order we aquired them -+ */ -+ l3ptbl->ptbl_flags |= PTBL_KEEP; -+ -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ elan3mmu_lock_ptbl (l1ptbl, 0, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &l1flags); -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ -+ /* Now have l1 and l3 ptbls locked, so install the new l2 ptbl into the l1. */ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: l1ptp %lx is %x\n", l1ptp, tl1ptp); -+ -+ if (ELAN3_PTP_TYPE(tl1ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l2/l3 ptbls\n"); -+ -+ /* free off the level 3 page table */ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l3 ptbl %p (%x)\n", l3ptbl, l3ptbl->ptbl_flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ /* and unlock the level 1 ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l1flags); -+ -+ /* lock the level 2 page table, and clear out the PTP, then free it */ -+ (void) elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: locked l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ tl2ptp = ELAN3_INVALID_PTP; -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ l2ptbl->ptbl_valid = 0; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: set to 0 valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: freeing l2 ptbl %p (%x)\n", l2ptbl, l2ptbl->ptbl_flags); -+ -+ elan3mmu_free_l2ptbl (dev, l2ptbl, l2lock, l2flags); -+ -+ goto retryl1; -+ } -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptealloc: l1ptbl is %p (%x), l3ptbl is %p (%x)\n", -+ l1ptbl, l1ptbl->ptbl_flags, l3ptbl, l3ptbl->ptbl_flags); -+ -+ /* Now have L1 and L3 locked, so install the L2 ptbl */ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl1ptp = PTBL_TO_PTADDR(l2ptbl) | ELAN3_ET_PTP; -+ l1ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l1ptp %lx to %x\n", l1ptp, tl1ptp); -+ -+ /* unordered unlock - lock l1ptbl, lock l3ptbl, unlock l1ptbl */ -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, l3flags); /* free using l3flags to keep irq ordering */ -+ -+ l3pte = PTBL_TO_PTADDR (l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ /* Level 3 ptbl is already locked, so just return the pte */ -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l1flags; /* return l1flags to keep irq ordering */ -+ -+ return (l3pte); -+ -+ default: -+ panic ("elan3mmu_ptealloc: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptealloc: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF5 (2, "elan3mmu_ptealloc: l2ptbl %p l2ptp %lx l2base %x (%x) : tl2ptp %x\n", -+ l2ptbl, l2ptp, l2base, l2ptbl->ptbl_base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ if (level == PTBL_LEVEL_2) { -+ /* this is a pointer to a pte, we should just return it */ -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptealloc: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptealloc: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_PTE) -+ { -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return holdind l2lock */ -+ return (l2ptp); -+ } -+ panic ("elan3mmu: found pte in level 2 page table"); -+ /* NOTREACHED */ -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ case ELAN3_ET_INVALID: -+ if (level == PTBL_LEVEL_2) -+ { -+ if ((lXptbl = elan3mmu_alloc_pte (dev, elan3mmu, &idx)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, 0, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags)) -+ { -+ case LK_PTBL_OK: -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_ptealloc: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ HAT_PRINTF6 (2, "elan3mmu_ptealloc: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x)\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr); -+ -+ /* -+ * We've trogged down to this ptbl, but someone has just -+ * stolen it, so try all over again. -+ */ -+ goto retryl1; -+ -+ default: -+ panic ("elan3mmu_ptealloc: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free lx pte\n"); -+ -+ elan3mmu_free_pte (dev, elan3mmu, lXptbl, idx); -+ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ goto retryl1; -+ } -+ -+ /* Connect l2ptbl to the new LX pte */ -+ tl2ptp = PTBL_TO_PTADDR(lXptbl) | (idx * ELAN3_PTE_SIZE) | ELAN3_ET_PTE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write l2ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ *pptbl = l2ptbl; -+ *plock = l2lock; -+ *flags = l2flags; -+ -+ /* return holding l2lock */ -+ return (l2ptp); -+ } -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: allocate level 3 page table\n"); -+ -+ if ((l3ptbl = elan3mmu_alloc_l3ptbl (dev, attr, l2ptbl, elan3mmu, ELAN3_L3_BASE(addr), &l3lock, &l3flags)) == NULL) -+ return ((sdramaddr_t) 0); -+ -+ if (CTXT_IS_KERNEL (elan3mmu->elan3mmu_ctxt)) -+ elan3mmu_kernel_l3ptbl (l3ptbl); -+ -+ /* -+ * Now need to lock the l2 ptbl - to maintain lock ordering -+ * we set the PTBL_KEEP bit to stop the l3 ptbl from being -+ * stolen and drop the locks in the order we aquired them -+ */ -+ l3ptbl->ptbl_flags |= PTBL_KEEP; -+ -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ -+ if (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &l2flags) == LK_PTBL_MISMATCH) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: l2ptbl freed, free l3 ptbl and try again\n"); -+ -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ /* free off the level 3 page table, and try again */ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ goto retryl1; -+ } -+ -+ elan3mmu_lock_ptbl (l3ptbl, 0, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags); -+ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ -+ /* Now have L2 and L3 ptbls locked, see if someone has beaten us to it. */ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: l2ptp at %lx is %x\n", l2ptp, tl2ptp); -+ -+ if (ELAN3_PTP_TYPE(tl2ptp) != ELAN3_ET_INVALID) -+ { -+ HAT_PRINTF0 (2, "elan3mmu_ptealloc: beaten to it, free l3 ptbl and try again\n"); -+ -+ /* free off the level 3 page table, and try again */ -+ l3ptbl->ptbl_flags &= ~PTBL_KEEP; -+ elan3mmu_free_l3ptbl (dev, l3ptbl, l3lock, l3flags); -+ -+ /* Someone has allocated the ptbl before us */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l2flags); -+ -+ goto retryl1; -+ } -+ -+ ASSERT (PTBL_IS_LOCKED (l2ptbl->ptbl_flags)); -+ -+ /* Install the L3 ptbl into the L2 one */ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ tl2ptp = PTBL_TO_PTADDR(l3ptbl) | ELAN3_ET_PTP; -+ l2ptbl->ptbl_valid++; -+ -+ HAT_PRINTF3 (2, "elan3mmu_ptealloc: inc valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ -+ HAT_PRINTF2 (2, "elan3mmu_ptealloc: write level 2 ptp %lx to %x\n", l2ptp, tl2ptp); -+ -+ /* unordered unlock - lock l2ptbl, lock l3ptbl, unlock l2ptbl */ -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, l3flags); /* free with the l3flags to keep irq ordering */ -+ -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ /* Level 3 ptbl is already locked, so just return the pte */ -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l2flags; /* return l2flags to keep irq ordering */ -+ -+ return (l3pte); -+ -+ default: -+ panic ("elan3mmu_ptealloc: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ HAT_PRINTF1 (2, "elan3mmu_ptealloc: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ l3base = ELAN3_L3_BASE(addr); -+ -+ HAT_PRINTF4 (2, "elan3mmu_ptealloc: l3ptbl %p 3pte %lx l3base %x (%x)\n", -+ l3ptbl, l3pte, l3base, l3ptbl->ptbl_base); -+ -+ if (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &l3flags) == LK_PTBL_OK) -+ { -+ *pptbl = l3ptbl; -+ *plock = l3lock; -+ *flags = l3flags; -+ -+ return (l3pte); -+ } -+ -+ /* got all the way down here, but its been nicked before we could lock it */ -+ /* so try all over again */ -+ goto retryl1; -+} -+ -+void -+elan3mmu_l1inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l1ptbl, int attr) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_PTP tl1ptp; -+ sdramaddr_t l1ptp; -+ E3_Addr addr; -+ spinlock_t *l2lock; -+ ELAN3_PTBL *l2ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ int i; -+ int ret; -+ unsigned long flags; -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl); -+ -+ HAT_PRINTF2 (1, "elan3mmu_l1inval: l1ptbl %p l1ptp %lx\n", l1ptbl, l1ptp); -+ -+ for (i = 0, addr = 0; i < ELAN3_L1_ENTRIES; i++, l1ptp += ELAN3_PTP_SIZE) -+ { -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ idx = (PTP_TO_PT_PADDR(tl1ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: l1ptbl %p : lXptbl %p idx %d\n", -+ l1ptbl, lXptbl, idx); -+ -+ /* invalidate the L1 pte. */ -+ elan3_writeptp (dev, l1ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ l1ptbl->ptbl_valid--; -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ -+ break; -+ -+ case ELAN3_ET_PTP: -+ HAT_PRINTF5 (2, "elan3mmu_l1inval: l1ptbl %p : ptp %lx (%x) addr %x (%d)\n", -+ l1ptbl, l1ptp, tl1ptp, addr, i); -+ -+ /* invalidate the L1 ptp. */ -+ elan3_writeptp (dev, l1ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ /* invalidate the level 2 page table */ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ ret = elan3mmu_l2inval (elan3mmu, l2ptbl, attr | PTE_UNLOAD_NOFLUSH, addr, &l2lock, &flags); -+ -+ ASSERT ((l2ptbl->ptbl_flags & PTBL_KEEP) == 0); -+ -+ if (ret == LK_PTBL_OK) -+ { -+ if (((l2ptbl->ptbl_flags & PTBL_KEEP) == 0) && l2ptbl->ptbl_valid == 0) -+ { -+ HAT_PRINTF1 (2, "elan3mmu_l1inval: free l2ptbl %p\n", l2ptbl); -+ -+ l1ptbl->ptbl_valid--; -+ elan3mmu_free_l2ptbl (elan3mmu->elan3mmu_dev, l2ptbl, l2lock, flags); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ } -+ else -+ { -+ /* need to keep this page table, so even though its now empty, */ -+ /* chain it back in */ -+ HAT_PRINTF1 (2, "elan3mmu_l1inval: keep l2ptbl %p\n", l2ptbl); -+ -+ elan3_writeptp (dev, l1ptp, tl1ptp); -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, flags); -+ } -+ } -+ else -+ { -+ l1ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l1inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l1ptbl->ptbl_flags), l1ptbl, l1ptbl->ptbl_valid); -+ } -+ break; -+ -+ case ELAN3_ET_INVALID: -+ break; -+ -+ default: -+ panic ("elan3mmu_l1inval: found invalid entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ if (l1ptbl->ptbl_valid == 0) -+ break; -+ -+ addr += ELAN3_L1_SIZE; -+ } -+} -+ -+int -+elan3mmu_l2inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l2ptbl, int attr, E3_Addr addr, spinlock_t **pl2lock, unsigned long *flags) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTP invalidptp = ELAN3_INVALID_PTP; -+ ELAN3_PTP tl2ptp; -+ sdramaddr_t l2ptp; -+ spinlock_t *l3lock; -+ unsigned long l3flags; -+ ELAN3_PTBL *l3ptbl; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ int i; -+ int ret; -+ -+ HAT_PRINTF2 (1, "elan3mmu_l2inval: l2ptbl %p addr %x\n", l2ptbl, addr); -+ -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (PTBL_LEVEL (l2ptbl->ptbl_parent->ptbl_flags) == PTBL_LEVEL_1); -+ -+ ret = elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, pl2lock, flags); -+ -+ ASSERT (ret == LK_PTBL_OK); -+ ASSERT (l2ptbl->ptbl_elan3mmu == elan3mmu); -+ ASSERT (l2ptbl->ptbl_parent->ptbl_elan3mmu == elan3mmu); -+ -+ l2ptp = PTBL_TO_PTADDR(l2ptbl); -+ -+ for (i = 0; i < ELAN3_L2_ENTRIES; i++, l2ptp += ELAN3_PTP_SIZE) -+ { -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ idx = (PTP_TO_PT_PADDR(tl2ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: l2ptbl %p : lXptbl %p idx %d\n", -+ l2ptbl, lXptbl, idx); -+ -+ /* invalidate the L2 pte. */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ l2ptbl->ptbl_valid--; -+ elan3mmu_free_pte ( dev, elan3mmu, lXptbl, idx); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ -+ break; -+ -+ case ELAN3_ET_PTP: -+ HAT_PRINTF5 (2, "elan3mmu_l2inval: l2ptbl %p : ptp %lx (%x) addr %x (%d)\n", -+ l2ptbl, l2ptp, tl2ptp, addr, i); -+ -+ /* invalidate the L2 ptp. */ -+ elan3_writeptp (dev, l2ptp, invalidptp); -+ if (! (attr & PTE_UNLOAD_NOFLUSH)) -+ ElanFlushTlb (dev); -+ -+ /* unload the level 3 page table */ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ ret = elan3mmu_l3inval (elan3mmu, l3ptbl, attr | PTE_UNLOAD_NOFLUSH, addr, &l3lock, &l3flags); -+ -+ if (ret == LK_PTBL_OK) -+ { -+ if ((l3ptbl->ptbl_flags & PTBL_KEEP) == 0 && l3ptbl->ptbl_valid == 0) -+ { -+ /* decrement the valid count of the level 2 page table, and */ -+ /* free off the level 3 page table */ -+ HAT_PRINTF1 (2, "elan3mmu_l2inval: free l3ptbl %p\n", l3ptbl); -+ -+ l2ptbl->ptbl_valid--; -+ elan3mmu_free_l3ptbl (elan3mmu->elan3mmu_dev, l3ptbl, l3lock, l3flags); -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ } -+ else -+ { -+ /* need to keep this page table, so even though its now empty, */ -+ /* chain it back in */ -+ HAT_PRINTF1 (2, "elan3mmu_l2inval: keep l3ptbl %p\n", l3ptbl); -+ -+ elan3_writeptp (dev, l2ptp, tl2ptp); -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, l3flags); -+ } -+ } -+ else -+ { -+ l2ptbl->ptbl_valid--; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l2inval: dec valid for level %d ptbl %p to %d\n", -+ PTBL_LEVEL(l2ptbl->ptbl_flags), l2ptbl, l2ptbl->ptbl_valid); -+ } -+ break; -+ -+ case ELAN3_ET_INVALID: -+ break; -+ -+ default: -+ panic ("elan3mmu_l2inval: found pte in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ if (l2ptbl->ptbl_valid == 0) -+ break; -+ -+ addr += ELAN3_L2_SIZE; -+ } -+ -+ ASSERT (PTBL_IS_LOCKED(l2ptbl->ptbl_flags)); -+ -+ return (ret); -+} -+ -+int -+elan3mmu_l3inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l3ptbl, int attr, E3_Addr addr, spinlock_t **pl3lock, unsigned long *flags) -+{ -+ int ret; -+ -+ HAT_PRINTF3 (2, "elan3mmu_l3inval: l3ptbl %p parent %p addr %x\n", l3ptbl, l3ptbl->ptbl_parent, addr); -+ -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_parent->ptbl_flags)); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_parent->ptbl_flags) == PTBL_LEVEL_2); -+ ASSERT (l3ptbl->ptbl_parent->ptbl_elan3mmu == elan3mmu); -+ ASSERT (l3ptbl->ptbl_parent->ptbl_base == VA2BASE (ELAN3_L2_BASE(addr))); -+ -+ ret = elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, pl3lock, flags); -+ -+ ASSERT (ret == LK_PTBL_OK); -+ ASSERT (PTBL_LEVEL (l3ptbl->ptbl_flags) == PTBL_LEVEL_3); -+ -+ elan3mmu_unload_loop (elan3mmu, l3ptbl, 0, ELAN3_L3_ENTRIES, attr); -+ -+ ASSERT (PTBL_IS_LOCKED (l3ptbl->ptbl_flags)); -+ -+ return (ret); -+ } -+ -+int -+elan3mmu_lock_this_ptbl (ELAN3_PTBL *ptbl, int flag, spinlock_t **plock, unsigned long *flags) -+{ -+ int level = PTBL_LEVEL (ptbl->ptbl_flags); -+ spinlock_t *lock = elan3mmu_ptbl_to_lock (level, ptbl); -+ -+ local_irq_save (*flags); -+ -+ if ((flag & LK_PTBL_NOWAIT) == 0) -+ spin_lock (lock); -+ else if (! spin_trylock (lock)) { -+ local_irq_restore (*flags); -+ return (LK_PTBL_FAILED); -+ } -+ -+ if (level != PTBL_LEVEL (ptbl->ptbl_flags)) -+ { -+ spin_unlock (lock); -+ local_irq_restore (*flags); -+ return (LK_PTBL_MISMATCH); -+ } -+ -+ ptbl->ptbl_flags |= PTBL_LOCKED; -+ *plock = lock; -+ return (LK_PTBL_OK); -+} -+ -+int -+elan3mmu_lock_ptbl (ELAN3_PTBL *ptbl, u_int flag, ELAN3MMU *elan3mmu, E3_Addr va, int level, spinlock_t **plock, unsigned long *flags) -+{ -+ spinlock_t *lock = elan3mmu_ptbl_to_lock (level, ptbl); -+ int res = LK_PTBL_MISMATCH; -+ -+ local_irq_save (*flags); -+ -+ if ((flag & LK_PTBL_NOWAIT) == 0) -+ spin_lock (lock); -+ else if (spin_trylock (lock) == 0) { -+ local_irq_restore(*flags); -+ return (LK_PTBL_FAILED); -+ } -+ -+ if (PTBL_LEVEL (ptbl->ptbl_flags) != level) -+ { -+ res = LK_PTBL_MISMATCH; -+ goto mismatch; -+ } -+ -+ /* We have the right mutex, so check that its the ptbl we want. */ -+ switch (level) -+ { -+ case PTBL_LEVEL_1: va = ELAN3_L1_BASE(va); break; -+ case PTBL_LEVEL_2: va = ELAN3_L2_BASE(va); break; -+ case PTBL_LEVEL_3: va = ELAN3_L3_BASE(va); break; -+ } -+ -+ if (ptbl->ptbl_elan3mmu != elan3mmu || ptbl->ptbl_base != VA2BASE(va)) -+ { -+ res = LK_PTBL_MISMATCH; -+ goto mismatch; -+ } -+ -+ ASSERT ((ptbl->ptbl_flags & PTBL_LOCKED) == 0); -+ ptbl->ptbl_flags |= PTBL_LOCKED; -+ -+ *plock = lock; -+ return (LK_PTBL_OK); -+ -+mismatch: -+ if (! (flag & LK_PTBL_FAILOK)) -+ panic ("elan3mmu: failed to lock ptbl\n"); -+ -+ spin_unlock (lock); -+ local_irq_restore(*flags); -+ return (res); -+} -+ -+void -+elan3mmu_unlock_ptbl (ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags) -+{ -+ ptbl->ptbl_flags &= ~PTBL_LOCKED; -+ spin_unlock_irqrestore (lock,flags); -+} -+ -+static spinlock_t * -+elan3mmu_ptbl_to_lock (int level, ELAN3_PTBL *ptbl) -+{ -+ switch (level) -+ { -+ case PTBL_LEVEL_3: return (&l3ptbl_lock[L3PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_2: return (&l2ptbl_lock[L2PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_1: return (&l1ptbl_lock[L1PTBL_MTX_HASH(ptbl)]); -+ case PTBL_LEVEL_X: -+ panic ("elan3mmu: ptbl_to_lock, bad level X"); -+ default: -+ panic ("elan3mmu: ptbl_to_lock, bad level"); -+ /* NOTREACHED */ -+ } -+ return (NULL); -+} -+ -+void -+elan3mmu_display (ELAN3MMU *elan3mmu, E3_Addr addr) -+{ -+ ELAN3_DEV *dev = elan3mmu->elan3mmu_dev; -+ ELAN3_PTBL *l1ptbl; -+ sdramaddr_t l1ptp; -+ spinlock_t *l1lock; -+ ELAN3_PTE tl1pte; -+ ELAN3_PTP tl1ptp; -+ E3_Addr l1base; -+ ELAN3_PTBL *l2ptbl; -+ sdramaddr_t l2ptp; -+ ELAN3_PTE tl2pte; -+ spinlock_t *l2lock; -+ ELAN3_PTP tl2ptp; -+ E3_Addr l2base; -+ ELAN3_PTBL *l3ptbl; -+ sdramaddr_t l3pte; -+ ELAN3_PTE tl3pte; -+ spinlock_t *l3lock; -+ ELAN3_PTBL *lXptbl; -+ int idx; -+ unsigned long flags; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: elan3mmu %p addr %x\n", elan3mmu, addr); -+ -+ l1ptbl = elan3mmu->elan3mmu_l1ptbl; -+ -+ if (l1ptbl == NULL) -+ return; -+ -+ l1ptp = PTBL_TO_PTADDR(l1ptbl) + ELAN3_L1_INDEX(addr)*ELAN3_PTP_SIZE; -+ l1base = ELAN3_L1_BASE(addr); -+ -+ tl1ptp = elan3_readptp (dev, l1ptp); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l1ptbl %p l1ptp %lx l1base %x : tl1ptp %x\n", l1ptbl, l1ptp, l1base, tl1ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl1ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: level 1 page table for pte %x\n", tl1ptp); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ idx = (PTP_TO_PT_PADDR(tl1ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lXptbl %p idx %d\n",lXptbl, idx); -+ -+ tl1pte = elan3_readpte (dev,(PTBL_TO_PTADDR (lXptbl) + idx * ELAN3_PTE_SIZE)); -+ -+ switch (elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_1, &l1lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l1ptbl, l1lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lvl 1 l1pte matches value %llx\n", (long long) tl1pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l1 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : lvl 1 ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l1ptbl, l1ptbl->ptbl_flags, l1ptbl->ptbl_elan3mmu, l1ptbl->ptbl_base, elan3mmu, addr, (long long)tl1pte); -+ -+ break; -+ default: -+ panic ("elan3mmu_display: lvl 1 elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ return; -+ -+ case ELAN3_ET_INVALID: -+ return; -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_display: found bad entry in level 1 page table"); -+ /* NOTREACHED */ -+ } -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: chain to level 2 ptbl from ptp %x\n", tl1ptp); -+ -+ l2ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl1ptp); -+ l2ptp = PTBL_TO_PTADDR(l2ptbl) + ELAN3_L2_INDEX(addr)*ELAN3_PTP_SIZE; -+ l2base = ELAN3_L2_BASE(addr); -+ -+ tl2ptp = elan3_readptp (dev, l2ptp); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l2ptbl %p l2ptp %lx l2base %x : tl2ptp %x\n", -+ l2ptbl, l2ptp, l2base, tl2ptp); -+ -+ switch (ELAN3_PTP_TYPE(tl2ptp)) -+ { -+ case ELAN3_ET_PTE: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: level 2 page table for pte %x\n", tl2ptp); -+ -+ lXptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ idx = (PTP_TO_PT_PADDR(tl2ptp) - PTBL_TO_PTADDR(lXptbl))/ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lXptbl %p idx %d\n",lXptbl, idx); -+ -+ tl2pte = elan3_readpte (dev,(PTBL_TO_PTADDR (lXptbl) + idx * ELAN3_PTE_SIZE)); -+ -+ switch (elan3mmu_lock_ptbl (l2ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_2, &l2lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l2ptbl, l2lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: lvl 2 l1pte matches value %llx\n", (long long)tl2pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l2 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : lvl 2 ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l2ptbl, l2ptbl->ptbl_flags, l2ptbl->ptbl_elan3mmu, l2ptbl->ptbl_base, elan3mmu, addr, (long long) tl2pte); -+ -+ break; -+ default: -+ panic ("elan3mmu_display: lvl 2 elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+ return; -+ -+ case ELAN3_ET_INVALID: -+ return; -+ -+ case ELAN3_ET_PTP: -+ break; -+ -+ default: -+ panic ("elan3mmu_display: found bad entry in level 2 page table"); -+ /* NOTREACHED */ -+ } -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: chain to level 3 page table from ptp %x\n", tl2ptp); -+ -+ l3ptbl = elan3mmu_ta_to_ptbl (elan3mmu, &tl2ptp); -+ l3pte = PTBL_TO_PTADDR(l3ptbl) + ELAN3_L3_INDEX(addr)*ELAN3_PTE_SIZE; -+ -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l3ptbl %p l3pte %lx\n",l3ptbl, l3pte); -+ -+ tl3pte = elan3_readpte (dev, l3pte); -+ switch (elan3mmu_lock_ptbl (l3ptbl, LK_PTBL_FAILOK, elan3mmu, addr, PTBL_LEVEL_3, &l3lock, &flags)) -+ { -+ case LK_PTBL_OK: -+ elan3mmu_unlock_ptbl (l3ptbl, l3lock, flags); -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: l3pte matches value %llx\n", (long long) tl3pte); -+ break; -+ -+ case LK_PTBL_FAILED: -+ panic ("elan3mmu_display: l3 lock failed"); -+ /* NOTREACHED */ -+ -+ case LK_PTBL_MISMATCH: -+ elan3_debugf (NULL, DBG_HAT, "elan3mmu_display: PTBL_MISMATCH : ptbl %p flags %x elan3mmu %p base %x (%p %x) %llx\n", -+ l3ptbl, l3ptbl->ptbl_flags, l3ptbl->ptbl_elan3mmu, l3ptbl->ptbl_base, elan3mmu, addr, (long long) tl3pte); -+ -+ break; -+ -+ default: -+ panic ("elan3mmu_display: elan3mmu_lock_ptbl returned bad value"); -+ /* NOTREACHED */ -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/elan3mmu_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/elan3mmu_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/elan3mmu_linux.c 2005-07-28 14:52:52.795686792 -0400 -@@ -0,0 +1,284 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3mmu_linux.c,v 1.50.2.3 2004/12/14 10:19:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/vm/elan3mmu_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * Strategy for syncing main <-> elan pte's: -+ * -+ * Install callbacks for linux flush_tlb_page(), flush_tlb_range(), -+ * flush_tlb_all(), and flush_tlb_mm() so when a main PTE changes, -+ * the elan translations, if any, are invalidated. They can then be -+ * faulted in again with the correct physical page, perms, etc., on demand. -+ * -+ * Callbacks are stacked on the mm_struct, one per context. We also stack -+ * a ctxt pointer so we don't have to do lookups on every call. -+ * -+ * Sanity check -- we clearly want to flush the elan PTEs in these -+ * situations, all of which are covered by tlb_flush_{page,range}() -+ * -+ * 1) kernel/vmscan.c::try_to_swap_out() swaps out a page -+ * -+ * 2) kernel/mremap.c::copy_one_pte() moves a page as a result of the -+ * mremap system call -+ * -+ * 3) kernel/mprotect.c::change_pte_range() changes the permissions of a -+ * page as the result of the mprotect system call -+ * -+ * Other Notes: -+ * -+ * Dirty a page in the mains page tables when it is faulted into the elan. -+ * This way it will not be thrown away by the swapper. -+ * -+ * Pages write protected for COW are copied by elan3mmu_main_pagefault() -+ * when a writeable translation is loaded into the elan. -+ */ -+ -+caddr_t elan3mmu_kernel_invalid_space; -+ELAN3_PTE elan3mmu_kernel_invalid_pte_val; -+ -+void -+elan3mmu_init_osdep (void) -+{ -+ pte_t *pte; -+ -+ KMEM_GETPAGES (elan3mmu_kernel_invalid_space, caddr_t, 1, TRUE); -+ -+ ASSERT(elan3mmu_kernel_invalid_space != NULL); -+ -+ pte = find_pte_kernel ((unsigned long) elan3mmu_kernel_invalid_space); -+ -+ elan3mmu_kernel_invalid_pte_val = ELAN3_PTE_64_BIT | (pte_phys(*pte) & ELAN3_PTE_PFN_MASK) | ELAN3_PERM_REMOTEREAD | ELAN3_ET_PTE; -+ -+#ifdef __alpha -+ /* -+ * NOTE: Elan sign-extends bit 48 of the physical address, so if we need to -+ * set any of bits 63:48, then we will set them all by setting bit 48/ -+ */ -+ if (alpha_mv.pci_dac_offset & 0xFFFF000000000000ull) -+ elan3mmu_kernel_invalid_pte_val |= (1ull << 48); -+ else -+ elan3mmu_kernel_invalid_pte_val |= alpha_mv.pci_dac_offset; -+#endif -+ -+ HAT_PRINTF(0x10, "elan3mmu_invalid_space at %p phys=%llx pte=%llx\n", elan3mmu_kernel_invalid_space, -+ (unsigned long long) pte_phys(*pte), (unsigned long long) elan3mmu_kernel_invalid_pte_val); -+} -+ -+void -+elan3mmu_fini_osdep() -+{ -+ KMEM_FREEPAGES (elan3mmu_kernel_invalid_space, 1); -+} -+ -+void -+elan3mmu_alloc_osdep (ELAN3MMU *elan3mmu) -+{ -+ elan3mmu->elan3mmu_coproc_mm = current->mm; -+} -+ -+/* -+ * Convert physical page frame number to elan pte. -+ */ -+ELAN3_PTE -+elan3mmu_phys_to_pte (ELAN3_DEV *dev, physaddr_t paddr, int perm) -+{ -+ ELAN3_PTE newpte; -+ -+ ASSERT (paddr != 0); -+ -+ if ((paddr & dev->SdramPhysMask) == dev->SdramPhysBase) /* SDRAM, turn on PTE_LOCAL bit */ -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx SDRAM\n", (unsigned long long) paddr); -+ -+ newpte = ELAN3_PTE_LOCAL | (paddr & ELAN3_PTE_PFN_MASK & ~dev->SdramPhysMask) | perm | ELAN3_ET_PTE; -+ } -+#if defined(LINUX_ALPHA) -+ else if ((paddr & dev->PciPhysMask) == dev->PciPhysBase) -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx PCI\n", (unsigned long long) paddr); -+ newpte = ELAN3_PTE_64_BIT | (paddr & ELAN3_PTE_PFN_MASK & ~dev->PciPhysMask) | perm | ELAN3_ET_PTE; -+ } -+#endif -+ else /* main memory, must convert to PCI view */ -+ { -+ PRINTF(NULL, DBG_HAT, "elan3mmu_phys_to_pte: phys %llx is main memory\n", (unsigned long long) paddr); -+ -+ /* main memory, just set the architecture specific PTE_BYPASS bit */ -+ /* This requires the Tsunami chipset being programmed to support -+ * the monster window option. This is in linux-2.4.5 and later kernels -+ * and is also patched into the RH 7.1/2.4.3-12 Alpha kernel -+ */ -+ newpte = ELAN3_PTE_64_BIT | (paddr & ELAN3_PTE_PFN_MASK) | perm | ELAN3_ET_PTE; -+ -+#ifdef __alpha -+ /* -+ * NOTE: Elan sign-extends bit 48 of the physical address, so if we need to -+ * set any of bits 63:48, then we will set them all by setting bit 48/ -+ */ -+ if (alpha_mv.pci_dac_offset & 0xFFFF000000000000ull) -+ newpte |= (1ull << 48); -+ else -+ newpte |= alpha_mv.pci_dac_offset; -+#endif -+ } -+ -+ if ( ELAN3_PERM_WRITEABLE( perm )) -+ newpte |= ( ELAN3_PTE_MOD | ELAN3_PTE_REF ); -+ else -+ newpte |= ( ELAN3_PTE_REF ) ; -+ -+ return (newpte); -+} -+ -+ELAN3_PTE -+elan3mmu_kernel_invalid_pte (ELAN3MMU *elan3mmu) -+{ -+ if (elan3mmu->elan3mmu_dev->Devinfo.dev_revision_id == PCI_REVISION_ID_ELAN3_REVB) -+ return (elan3mmu_kernel_invalid_pte_val); -+ return (ELAN3_INVALID_PTE); -+} -+ -+/* -+ * Invalidate a range of addresses for specified context. -+ */ -+void -+elan3mmu_pte_range_unload (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len) -+{ -+ E3_Addr eaddr; -+ ELAN3MMU_RGN *rgn; -+ unsigned long span; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (; len; len -= span, addr += span) -+ { -+ rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < addr) -+ span = len; -+ else if (rgn->rgn_mbase > addr) -+ span = MIN(len, rgn->rgn_mbase - addr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - addr); -+ eaddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ HAT_PRINTF(0x10, " unloading eaddr %x main %p (%ld pages)\n", -+ eaddr, addr, btopr(span)); -+ elan3mmu_unload (elan3mmu, eaddr, span, PTE_UNLOAD); -+ } /* takes care of elan tlb flush also */ -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/* -+ * -+ */ -+void -+elan3mmu_update_range (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t vaddr, E3_Addr eaddr, u_int len, u_int perm) -+{ -+ u_int roperm = ELAN3_PERM_READONLY(perm & ELAN3_PTE_PERM_MASK) | (perm & ~ELAN3_PTE_PERM_MASK); -+ u_int off; -+ -+ HAT_PRINTF3(1, "elan3mmu_update_range (elan3mmu %p addr %p -> %p)\n", elan3mmu, vaddr, vaddr+len-1); -+ -+ while (len > 0) -+ { -+ pte_t *pte_ptr; -+ pte_t pte_value; -+ -+ pte_ptr = find_pte_map(mm, (unsigned long)vaddr); -+ if (pte_ptr) { -+ pte_value = *pte_ptr; -+ pte_unmap(pte_ptr); -+ } -+ -+ HAT_PRINTF(0x10, " elan3mmu_update_range %x (%p) %s\n", eaddr, vaddr, -+ !pte_ptr ? "invalid" : pte_none(pte_value) ? "none " : !pte_present(pte_value) ? "swapped " : -+ !pte_write(pte_value) ? "RO/COW" : "OK"); -+ -+ if (pte_ptr && !pte_none(pte_value) && pte_present(pte_value)) -+ for (off = 0; off < PAGE_SIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (elan3mmu, PTBL_LEVEL_3, eaddr + off, pte_phys(pte_value) + off, pte_write(pte_value) ? perm : roperm, PTE_LOAD|PTE_NO_SLEEP|PTE_NO_STEAL); -+ vaddr += PAGESIZE; -+ eaddr += PAGESIZE; -+ len -= PAGESIZE; -+ } -+} -+ -+/* -+ * Update a range of addresses for specified context. -+ */ -+void -+elan3mmu_pte_range_update (ELAN3MMU *elan3mmu, struct mm_struct *mm,caddr_t vaddr, unsigned long len) -+{ -+ E3_Addr eaddr; -+ ELAN3MMU_RGN *rgn; -+ unsigned long span; -+ -+ spin_lock (&elan3mmu->elan3mmu_lock); -+ -+ for (; len; len -= span, vaddr += span) -+ { -+ rgn = elan3mmu_findrgn_main (elan3mmu, vaddr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < vaddr) -+ span = len; -+ else if (rgn->rgn_mbase > vaddr) -+ span = MIN(len, rgn->rgn_mbase - vaddr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - vaddr); -+ eaddr = rgn->rgn_ebase + (vaddr - rgn->rgn_mbase); -+ -+ HAT_PRINTF(0x10, " updating eaddr %u main %p (%ld pages)\n", -+ eaddr, vaddr, btopr(span)); -+ -+ elan3mmu_update_range(elan3mmu, mm, vaddr, eaddr, span, rgn->rgn_perm); -+ } -+ } -+ -+ spin_unlock (&elan3mmu->elan3mmu_lock); -+} -+ -+/* -+ * Invalidate all ptes for the given context. -+ */ -+void -+elan3mmu_pte_ctxt_unload(ELAN3MMU *elan3mmu) -+{ -+ ELAN3_PTBL *l1ptbl = (elan3mmu ? elan3mmu->elan3mmu_l1ptbl : NULL); -+ spinlock_t *l1mtx; -+ unsigned long flags; -+ -+ if (l1ptbl && elan3mmu_lock_ptbl (l1ptbl, LK_PTBL_FAILOK, elan3mmu, (E3_Addr) 0, 1, &l1mtx, &flags) == LK_PTBL_OK) -+ { -+ elan3mmu_l1inval(elan3mmu, elan3mmu->elan3mmu_l1ptbl, 0); -+ elan3mmu_unlock_ptbl (l1ptbl, l1mtx, flags); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/elan3ops.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/elan3ops.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/elan3ops.c 2005-07-28 14:52:52.796686640 -0400 -@@ -0,0 +1,170 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elan3ops.c,v 1.4 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elan3ops.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+extern ELAN_STATS_OPS elan3_device_stats_ops; -+ -+ELAN_DEV_OPS elan3_dev_ops = { -+ -+ get_position, -+ set_position, -+ -+ ELAN_DEV_OPS_VERSION -+}; -+ -+ELAN_STATS_OPS elan3_device_stats_ops = { -+ ELAN_STATS_OPS_VERSION, -+ -+ stats_get_index_name, -+ stats_get_block, -+ stats_clear_block -+}; -+ -+static char *elan3_device_stats_names[ELAN3_NUM_STATS] = -+{ -+ "version field", /* not cleared */ -+ "elan interrupts", -+ "tlb flushes", -+ "traps with invalid context", -+ "interrupts com queue half full", -+ "cproc traps", -+ "dproc traps", -+ "tproc traps", -+ "iproc traps", -+ "event interrupts", -+ "elan page faults", -+ "EopBadAcks", -+ "EopResets", -+ "InputterBadLength", -+ "InputterCRCDiscards", -+ "InputterCRCErrors", -+ "InputterCRCBad", -+ "errors in dma data", -+ "errors after dma identify", -+ "errors after thread identify", -+ "dma retries", -+ "dma output timeouts", -+ "dma packet ack errors", -+ "forced tproc traps", -+ "too many instruction traps", -+ "output timeouts", -+ "packet ack errors", -+ "LockError", -+ "DeskewError", -+ "PhaseError", -+ "DataError", -+ "FifoOvFlow0", -+ "FifoOvFlow1", -+ "link error value on data error", -+ "correctable ecc errors", -+ "uncorrectable ecc errors", -+ "multiple ecc errors", -+ "sdram bytes free", /* not cleared */ -+ "longest interrupt in ticks", -+ "punts of event int's to thread", -+ "reschedules of event int's thread" -+}; -+ -+int -+stats_get_index_name (void *arg, uint index, caddr_t name) -+{ -+ copyout (elan3_device_stats_names[index], name, strlen (elan3_device_stats_names[index]) + 1 /* with \0 */); -+ -+ return (0); -+} -+ -+int -+stats_get_block (void *arg, uint entries, ulong *value) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ -+ if ( entries > ELAN3_NUM_STATS ) /* if space too big only send valid portion */ -+ entries = ELAN3_NUM_STATS; -+ -+ copyout(&dev->Stats, value, sizeof(ulong) * entries); -+ -+ return (0); -+} -+ -+int -+stats_clear_block (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ u_long *ptr = (u_long *) &dev->Stats; -+ int n; -+ -+ for (n = 0; n < ELAN3_NUM_STATS; n++) -+ { -+ switch (n) -+ { -+ case offsetof (ELAN3_STATS, Version)/sizeof(u_long): -+ case offsetof (ELAN3_STATS, SdramBytesFree)/sizeof(u_long): -+ break; -+ default: -+ ptr[n] = (ulong)0; -+ } -+ } -+ return (0); -+} -+ -+int -+get_position (void *user_data, ELAN_POSITION *position) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *)user_data; -+ -+ copyout(&dev->Position, position, sizeof(ELAN_POSITION)); -+ -+ return (0); -+} -+ -+int -+set_position (void *user_data, unsigned short nodeId, unsigned short numNodes) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *)user_data; -+ -+ if (ComputePosition (&dev->Position, nodeId, numNodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ return (EINVAL); -+ -+ return (0); -+} -+ -+int -+elan3_register_dev_stats(ELAN3_DEV * dev) -+{ -+ char name[ELAN_STATS_NAME_MAX_LEN+1]; -+ -+ sprintf (name, ELAN3_STATS_DEV_FMT, dev->Instance); -+ -+ elan_stats_register(&dev->StatsIndex, -+ name, -+ sizeof (elan3_device_stats_names)/sizeof (elan3_device_stats_names[0]), -+ &elan3_device_stats_ops, -+ (void *)dev); -+ -+ return (0); -+} -+ -+void -+elan3_deregister_dev_stats(ELAN3_DEV * dev) -+{ -+ elan_stats_deregister(dev->StatsIndex); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/elandebug.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/elandebug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/elandebug.c 2005-07-28 14:52:52.796686640 -0400 -@@ -0,0 +1,151 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elandebug.c,v 1.25 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandebug.c,v $*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+void -+elan3_debugf (void *p, unsigned int mode, char *fmt,...) -+{ -+ char prefix[128]; -+ -+#if defined (DIGITAL_UNIX) -+#define PREFIX_FMT "[%lx.%08x]" -+#define PREFIX_VAL (int)CURTHREAD() -+#else -+#define PREFIX_FMT "[%lx.%04d]" -+#define PREFIX_VAL (current->pid) -+#endif -+ -+ if ((unsigned long) p > DBG_NTYPES) -+ { -+ ELAN3_CTXT *ctxt = (ELAN3_CTXT *) p; -+ -+ if (elan3_debug_display_ctxt && (ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK) != elan3_debug_display_ctxt) -+ return; -+ if (elan3_debug_ignore_ctxt && (ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK) == elan3_debug_ignore_ctxt) -+ return; -+ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ sprintf (prefix, PREFIX_FMT " (XXX) ", lbolt, PREFIX_VAL); -+ else -+ sprintf (prefix, PREFIX_FMT " (%03x) ", lbolt, PREFIX_VAL, -+ ctxt->Capability.cap_mycontext & MAX_ROOT_CONTEXT_MASK); -+ } -+ else -+ { -+ char *what; -+ -+ if (elan3_debug_ignore_dev & (1 << ((unsigned long) p))) -+ return; -+ -+ switch ((unsigned long) p) -+ { -+ case (int) DBG_DEVICE: what = "dev"; break; -+ case (int) DBG_KCOMM: what = "kcm"; break; -+ case (int) DBG_ICS: what = "ics"; break; -+ case (int) DBG_USER: what = "usr"; break; -+ default: what = NULL; break; -+ } -+ -+ if (what) -+ sprintf (prefix, PREFIX_FMT " [%s] ", lbolt, PREFIX_VAL, what); -+ else -+ sprintf (prefix, PREFIX_FMT " [%3d] ", lbolt, PREFIX_VAL, (int)(long)what); -+ } -+ -+ { -+ va_list ap; -+ -+ va_start (ap, fmt); -+ qsnet_vdebugf ((((mode & elan3_debug_buffer)?QSNET_DEBUG_BUFFER:0)|((mode & elan3_debug_console)?QSNET_DEBUG_CONSOLE:0)) , prefix, fmt, ap); -+ va_end (ap); -+ } -+} -+ -+ -+void -+elan3_alloc_panicstate (ELAN3_DEV *dev, int allocsdram) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr == NULL) -+ KMEM_ZALLOC (dev->PanicState.RegPtr, E3_Regs *, sizeof (E3_Regs), 1); -+ -+ if (allocsdram) -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->PanicState.Sdram[bank] == NULL && dev->SdramBanks[bank].Size) -+ KMEM_ZALLOC (dev->PanicState.Sdram[bank], char *, dev->SdramBanks[bank].Size, 1); -+} -+ -+void -+elan3_free_panicstate (ELAN3_DEV *dev) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr != NULL) -+ KMEM_FREE (dev->PanicState.RegPtr, sizeof (E3_Regs)); -+ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->PanicState.Sdram[bank] != NULL && dev->SdramBanks[bank].Size) -+ KMEM_FREE (dev->PanicState.Sdram[bank], dev->SdramBanks[bank].Size); -+ -+ bzero (&dev->PanicState, sizeof (dev->PanicState)); -+} -+ -+void -+elan3_save_panicstate (ELAN3_DEV *dev) -+{ -+ register int bank; -+ -+ if (dev->PanicState.RegPtr) -+ { -+ printk ("elan%d: saving state on panic .....\n", dev->Devinfo.dev_instance); -+ -+ bcopy ((void *) dev->RegPtr, (void *) dev->PanicState.RegPtr, sizeof (E3_Regs)); -+ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ if (dev->SdramBanks[bank].Size && dev->PanicState.Sdram[bank]) -+ elan3_sdram_copyq_from_sdram (dev, (bank << ELAN3_SDRAM_BANK_SHIFT), dev->PanicState.Sdram[bank], dev->SdramBanks[bank].Size); -+ -+ } -+} -+ -+int -+elan3_assfail (ELAN3_DEV *dev, char *string, char *file, int line) -+{ -+ if (panicstr) -+ return (0); -+ -+ printk ("elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+ -+#if defined(LINUX) -+ elan3_save_panicstate (dev); -+ -+ panic ("elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+#else -+ cmn_err (CE_PANIC, "elan: assertion failed '%s' File '%s' Line %d\n", string, file, line); -+#endif -+ /*NOTREACHED*/ -+ return (0); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/elandev_generic.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/elandev_generic.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/elandev_generic.c 2005-07-28 14:52:52.799686184 -0400 -@@ -0,0 +1,1862 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elandev_generic.c,v 1.111.2.3 2004/11/15 11:12:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandev_generic.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * Module globals, configurable from system file. -+ */ -+u_int elan3_debug = 0; -+u_int elan3_debug_console = 0; -+u_int elan3_debug_buffer = -1; -+u_int elan3_debug_ignore_dev = 0; -+u_int elan3_debug_ignore_kcomm = 0; -+u_int elan3_debug_ignore_ctxt = 0; -+u_int elan3_debug_display_ctxt = 0; -+ -+int eventint_punt_loops; -+int eventint_punt_ticks; -+int eventint_resched_ticks; -+ -+static void InitialiseDmaBuffers (ELAN3_DEV *dev, ioaddr_t CmdPort); -+static int ProbeSdram (ELAN3_DEV *dev); -+static void InitialiseSdram (ELAN3_DEV *dev); -+static void ReEnableErrorInterrupts (void *arg); -+void PollForDmaHungup (void *arg); -+static void elan3_event_interrupt (ELAN3_DEV *dev); -+ -+/* -+ * BaseAddr is ptr to the start of a table aligned on a power of two byte address. -+ * SizePower must be in the range of 6 to 12. It defines the number of valid contexts as -+ * shown below. -+ * -+ * SizePower Valid Contexts Table size in bytes. -+ * 6 64 1k -+ * 7 128 2k -+ * 8 256 4K -+ * 9 512 8k -+ * 10 1024 16k -+ * 11 2048 32k -+ * 12 4096 64k -+ */ -+#define GEN_CONTEXT_PTR(BaseAddr, SizePower) (((E3_uint32) BaseAddr) | \ -+ (~((1 << ((SizePower) - 6)) - 1) & 0x3f)) -+ -+int -+InitialiseElan (ELAN3_DEV *dev, ioaddr_t CmdPort) -+{ -+ E3_IprocTrapHeader_BE TrapCleanup[4]; -+ E3_ContextControlBlock ContextControlBlock; -+ sdramaddr_t ptr; -+ int res; -+ int i; -+ -+ eventint_punt_loops = 100; -+ eventint_punt_ticks = (hz/100); -+ eventint_resched_ticks = (hz/4); -+ -+ dev->Stats.Version = ELAN3_STATS_VERSION; -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* -+ * The elan should have already been reset, so the interrupt mask -+ * should be 0 and the schedule status register should be set to -+ * its initial state -+ */ -+ ASSERT (dev->InterruptMask == 0); -+ ASSERT ((read_reg32 (dev, Exts.SchCntReg) & HaltStopAndExtTestMask) == Sched_Initial_Value); -+ -+ /* -+ * Write any value here to clear out the half full and error bits of the command -+ * overflow queues. -+ */ -+ write_reg32 (dev, ComQueueStatus, 0); -+ -+ /* Initialise the cache tags before touching the SDRAM */ -+ /* we initialise them to "map" the bottom of SDRAM */ -+ for (i = 0; i < E3_NumCacheLines; i++) -+ { -+ write_cache_tag (dev, Tags[i][0].Value, 0x0000000000000000ULL); -+ write_cache_tag (dev, Tags[i][1].Value, 0x0000080000000000ULL); -+ write_cache_tag (dev, Tags[i][2].Value, 0x0000100000000000ULL); -+ write_cache_tag (dev, Tags[i][3].Value, 0x0000180000000000ULL); -+ } -+ -+#ifndef CONFIG_MPSAS -+ for (i = 0; i < E3_NumCacheLines*(E3_CACHELINE_SIZE/sizeof(E3_uint64)); i++) -+ { -+ write_cache_set (dev, Set0[i], 0xcac1ecac1ecac1e0ULL); -+ write_cache_set (dev, Set1[i], 0xcac1ecac1ecac1e1ULL); -+ write_cache_set (dev, Set2[i], 0xcac1ecac1ecac1e2ULL); -+ write_cache_set (dev, Set3[i], 0xcac1ecac1ecac1e3ULL); -+ } -+#endif -+ -+ if ((res = ProbeSdram(dev)) != ESUCCESS) -+ return (res); -+ -+ /* Enable all cache sets before initialising the sdram allocators */ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, (dev->Cache_Control_Reg |= CONT_EN_ALL_SETS)); -+ -+ InitialiseSdram (dev); -+ -+ dev->TAndQBase = elan3_sdram_alloc (dev, ELAN3_TANDQ_SIZE); -+ dev->ContextTable = elan3_sdram_alloc (dev, ELAN3_CONTEXT_SIZE); -+ dev->ContextTableSize = ELAN3_NUM_CONTEXTS; -+ dev->CommandPortTraps[0] = elan3_sdram_alloc (dev, ELAN3_COMMAND_TRAP_SIZE); -+ dev->CommandPortTraps[1] = elan3_sdram_alloc (dev, ELAN3_COMMAND_TRAP_SIZE); -+ dev->CurrentCommandPortTrap = 0; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "InitialiseElan: ContextTable %08lx TAndQ %08lx CommandPortTrap %08lx\n", -+ dev->ContextTable, dev->TAndQBase, dev->CommandPortTraps[0]); -+ -+ /* Allocate the thread amd dma trap areas */ -+ KMEM_ZALLOC (dev->ThreadTrap, THREAD_TRAP *, sizeof (THREAD_TRAP), TRUE); -+ KMEM_ZALLOC (dev->DmaTrap, DMA_TRAP *, sizeof (DMA_TRAP), TRUE); -+ -+ /* Allocate the ctxt table */ -+ KMEM_ZALLOC (dev->CtxtTable, ELAN3_CTXT **, dev->ContextTableSize * sizeof ( ELAN3_CTXT *), TRUE); -+ -+ /* Initialise halt queue list */ -+ dev->HaltOperationsTailpp = &dev->HaltOperations; -+ -+ /* From elan3/code/harness/elanstuff.c */ -+ /* Init the clock. */ -+ write_ureg64 (dev, Clock.NanoSecClock, 0); -+ -+ /* Init the instruction count reg. */ -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ -+ /* Init the stats control reg. Must be done before the count regs.*/ -+ write_ureg32 (dev, StatCont.StatsControl, 0); -+ -+ /* Init the stats count regs. */ -+ write_ureg32 (dev, StatCounts[0].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[1].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[2].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[3].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[4].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[5].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[6].s.StatsCount, 0); -+ write_ureg32 (dev, StatCounts[7].s.StatsCount, 0); -+ -+ /* -+ * Initialise the Context_Ptr and Fault_Base_Ptr -+ */ -+ write_reg32 (dev, Fault_Base_Ptr, dev->TAndQBase + offsetof(E3_TrapAndQueue, IProcSysCntx)); -+ write_reg32 (dev, Context_Ptr, GEN_CONTEXT_PTR (dev->ContextTable, ELAN3_LN2_NUM_CONTEXTS)); -+ -+ /* scrub the TProc Registers */ -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Globals[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Outs[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Locals[i], 0xdeadbabe); -+ for (i = 0; i < 8; i++) -+ write_reg32 (dev, Ins[i], 0xdeadbabe); -+ -+ /* -+ * Initialise the Queue pointers. Arrange them so that the starting positions are -+ * farthest apart in one set of the cache. Thus 512 bytes apart, but with cntx0 -+ * thread the same as the interrupt queue. -+ */ -+ write_reg32 (dev, TProc_NonSysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0xc0])); -+ write_reg32 (dev, TProc_NonSysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxThreadQueue[0xc0])); -+ write_reg32 (dev, TProc_SysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0x80])); -+ write_reg32 (dev, TProc_SysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0x80])); -+ -+ write_reg32 (dev, DProc_NonSysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0])); -+ write_reg32 (dev, DProc_NonSysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0])); -+ write_reg32 (dev, DProc_SysCntx_FPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0x10])); -+ write_reg32 (dev, DProc_SysCntx_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0x10])); -+ -+ dev->Event_Int_Queue_FPtr = dev->TAndQBase + offsetof (E3_TrapAndQueue, EventIntQueue[0x80]); -+ write_reg32 (dev, Event_Int_Queue_FPtr, dev->Event_Int_Queue_FPtr); -+ write_reg32 (dev, Event_Int_Queue_BPtr, dev->TAndQBase + offsetof (E3_TrapAndQueue, EventIntQueue[0x80])); -+ -+ -+ /* Initialise Input_Trap_Base to last 8 Kbytes of trap area, uCode adds the right offset */ -+ write_reg32 (dev, Input_Trap_Base, dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxThreadQueue[0])); -+ -+ /* Ptr to word used to save the SP to when a thread deschedules */ -+ write_reg32 (dev, Thread_SP_Save_Ptr, dev->TAndQBase + offsetof (E3_TrapAndQueue, Thread_SP_Save)); -+ -+ /* Initialise the command trap base */ -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[0]); -+ -+ /* Initialise the set event tracing registers */ -+ write_reg32 (dev, Event_Trace_Ptr, 0); -+ write_reg32 (dev, Event_Trace_Mask, 0); -+ -+ /* Initialise Tlb_Line_Value to zero. The TLB cannot be read while either the */ -+ /* uCode or thread proc might be running. Must be set to 0. */ -+ write_reg64 (dev, Tlb_Line_Value, 0); -+ -+ /* Control register. Cache everything, Enable MMU, RefreshRate=3, CasLatency=1, StartSDR */ -+ dev->Cache_Control_Reg |= CONT_MMU_ENABLE | CONT_EN_ALL_SETS | CONT_CACHE_ALL | CONT_ENABLE_ECC; -+ -+#if ELAN3_PAGE_SHIFT == 13 -+ dev->Cache_Control_Reg |= CONT_ENABLE_8K_PAGES; -+#endif -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg); -+ -+ /* -+ * Initialise the context table to be discard for all contexts -+ */ -+ ContextControlBlock.rootPTP = 0; -+ ContextControlBlock.filter = E3_CCB_DISCARD_ALL; -+ ContextControlBlock.VPT_mask = 0; -+ ContextControlBlock.VPT_ptr = 0; -+ -+ for (i = 0, ptr = dev->ContextTable; i < ELAN3_NUM_CONTEXTS; i++, ptr += sizeof (E3_ContextControlBlock)) -+ elan3_sdram_copyl_to_sdram (dev, &ContextControlBlock, ptr, sizeof (E3_ContextControlBlock)); -+ -+ /* From elan3/code/trap_handler/init.c */ -+ /* -+ * Initialise the Trap And Queue area in Elan SDRAM. -+ */ -+ TrapCleanup[0].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[0].s.TrAddr = 0; -+ TrapCleanup[0].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[0].s.TrData0 = 0; -+ TrapCleanup[1].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[1].s.TrAddr = 0; -+ TrapCleanup[1].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[1].s.TrData0 = 0; -+ TrapCleanup[2].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[2].s.TrAddr = 0; -+ TrapCleanup[2].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[2].s.TrData0 = 0; -+ TrapCleanup[3].s.TrTypeCntx.TypeContext = 0; -+ TrapCleanup[3].s.TrAddr = 0; -+ TrapCleanup[3].s.IProcTrapStatus.Status = CRC_STATUS_GOOD; -+ TrapCleanup[3].s.TrData0 = 0; -+ -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx.s.FaultContext), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx.s.FSR.Status), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx.s.FaultContext), 0); -+ elan3_sdram_writel (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx.s.FSR.Status), 0); -+ -+ /* Must now zero all the FSRs so that a subsequent Fault can be seen */ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, CProc), 16); -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0), 64); -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), 16); -+ -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrHead[0]), 64); -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrHead[0]), 64); -+ -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrHead[0]), 64); -+ elan3_sdram_copyq_to_sdram (dev, TrapCleanup, dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrHead[0]), 64); -+ -+ InitialiseDmaBuffers(dev, CmdPort); -+ -+ /* reserve a halt operation for flushing the context filter */ -+ ReserveHaltOperations (dev, 1, TRUE); -+ -+ /* Allow the Thread/Dma to run */ -+ CLEAR_SCHED_STATUS (dev, HaltThread | HaltDmas); -+ -+ /* Enable All Interrrupts */ -+ SET_INT_MASK (dev, (INT_PciMemErr | INT_SDRamInt | INT_EventInterrupt | INT_LinkError | INT_ComQueue | -+ INT_TProc | INT_CProc | INT_DProc | INT_IProcCh1NonSysCntx | -+ INT_IProcCh1SysCntx | INT_IProcCh0NonSysCntx | INT_IProcCh0SysCntx)); -+ -+ /* Take the link out of boundary scan */ -+ SET_SCHED_LINK_VALUE (dev, 0, 0); -+ -+ /* And clear any link errors */ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ /* XXXX: clear discard context 0, AFTER setting up the kernel comms */ -+ CLEAR_SCHED_STATUS (dev, DiscardSysCntxIn | DiscardNonSysCntxIn); -+ -+ /* Start a thread to handle excessive Event Interrrupts */ -+ if (kernel_thread_create (elan3_event_interrupt, (caddr_t) dev) == NULL) -+ { -+ panic ("InitialiseElan: cannot start elan3_event_interrupt\n"); -+ return (EFAIL); -+ } -+ dev->EventInterruptThreadStarted = 1; -+ -+ ReserveHaltOperations (dev, 1, TRUE); -+ -+ PollForDmaHungup (dev); -+ -+ /* register the device and stats with elanmod for RMS */ -+ dev->DeviceIdx = elan_dev_register(&dev->Devinfo, &elan3_dev_ops, (void *) dev); -+ -+ elan3_register_dev_stats(dev); -+ -+ return (ESUCCESS); -+} -+ -+static void -+InitialiseDmaBuffers(ELAN3_DEV *dev, ioaddr_t CmdPort) -+{ -+ register int i; -+ -+ /* GNAT sw-elan3/3908: -+ * Clear down the power on state of the Dma_Desc registers to make sure we don't -+ * try and interpret them when a trap happens. -+ */ -+ write_reg32 (dev, Dma_Desc.dma_type, 0); -+ write_reg32 (dev, Dma_Desc.dma_size, 0); -+ write_reg32 (dev, Dma_Desc.dma_source, 0); -+ write_reg32 (dev, Dma_Desc.dma_dest, 0); -+ write_reg32 (dev, Dma_Desc.dma_destEvent, 0); -+ write_reg32 (dev, Dma_Desc.dma_destCookieVProc, 0); -+ write_reg32 (dev, Dma_Desc.dma_srcEvent, 0); -+ write_reg32 (dev, Dma_Desc.dma_srcCookieVProc, 0); -+ -+ /* -+ * The following is a sequence of writes to remove X's from the dma buffers and -+ * registers. It is only safe to write these registers after reset and before any -+ * dma's have been issued. The chip will NOT function corectly if they are written at -+ * any other time or in a different order. -+ */ -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.ResetAckNLdBytesToWr, ((u_longlong_t)0x1000) << 32); -+ write_reg64 (dev, Exts.Dmas.DmaWrs.LdBytesToRd, ((u_longlong_t)0x100) << 32); -+ -+ for (i=0;i<(4*8);i++) -+ write_reg64 (dev, Dma_Alignment_Port[0], 0); -+ -+ /* -+ * This is used to clear out X's from some of the trap registers. This is required to -+ * prevent the first traps from possibly writting X's into the SDram and upsetting the -+ * ECC value. It requires that the trap save area registers have been set up but does -+ * not require any translations to be ready. -+ */ -+ writel (-1, CmdPort + offsetof (E3_CommandPort, SetEvent)); -+ while ((read_reg32 (dev, Exts.InterruptReg) & INT_CProc) == 0) -+ { -+ mb(); -+ DELAY (1); -+ } -+ -+ write_reg32 (dev, CProc_TrapSave_Addr, dev->CommandPortTraps[dev->CurrentCommandPortTrap]); -+ -+ PULSE_SCHED_STATUS(dev, RestartCProc); -+} -+ -+void -+FinaliseElan (ELAN3_DEV *dev) -+{ -+ ELAN3_PTBL_GR *ptg; -+ ELAN3_HALTOP *op; -+ ELAN3_HALTOP *chain = NULL; -+ int bank; -+ int indx; -+ int size; -+ unsigned long flags; -+ int level; -+ -+ elan_stats_deregister (dev->StatsIndex); -+ elan_dev_deregister(&dev->Devinfo); -+ -+ /* Cancel the dma poller */ -+ cancel_timer_fn (&dev->DmaPollTimeoutId); -+ -+ /* release it's halt operation */ -+ ReleaseHaltOperations (dev, 1); -+ -+ /* stop all kernel threads */ -+ dev->ThreadsShouldStop = 1; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ while (dev->EventInterruptThreadStarted && !dev->EventInterruptThreadStopped) -+ { -+ kcondvar_wakeupall (&dev->IntrWait, &dev->IntrLock); -+ kcondvar_wait (&dev->IntrWait, &dev->IntrLock, &flags); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ /* Set the interrupt mask to 0 and the schedule control register to run nothing */ -+ SET_INT_MASK (dev, 0); -+ SET_SCHED_STATUS (dev, DiscardNonSysCntxIn | DiscardSysCntxIn | HaltThread | HaltDmas); -+ -+ /* Cancel any link error timeout */ -+ if (timer_fn_queued(&dev->ErrorTimeoutId)) -+ cancel_timer_fn (&dev->ErrorTimeoutId); -+ -+ /* Free of and page tables that have been allocated */ -+ spin_lock (&dev->PtblGroupLock); -+ for(level=0; level<4; level++) -+ { -+ while ((ptg = dev->Level[level].PtblGroupList) != NULL) -+ { -+ dev->Level[level].PtblGroupList = ptg->pg_next; -+ -+ elan3_sdram_free (dev, ptg->pg_addr, PTBL_GROUP_SIZE); -+ FREE_PTBL_GR(ptg); -+ } -+ } -+ -+ spin_unlock (&dev->PtblGroupLock); -+ -+ /* Free of all halt operations */ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ while ((op = dev->FreeHaltOperations) != NULL) -+ { -+ dev->FreeHaltOperations = op->Next; -+ -+ /* Keep a list of 'freed' ops for later KMEM_FREE call */ -+ op->Next = chain; -+ chain = op; -+ } -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ /* Have now dropped the spinlock - can call KMEM_FREE */ -+ while ((op = chain) != NULL) -+ { -+ chain = op->Next; -+ -+ KMEM_FREE (op, sizeof (ELAN3_HALTOP)); -+ } -+ -+ /* Free of the ctxt table */ -+ KMEM_FREE (dev->CtxtTable, dev->ContextTableSize * sizeof (ELAN3_CTXT *)); -+ -+ /* Free of the thread and dma atrap areas */ -+ KMEM_FREE (dev->ThreadTrap, sizeof (THREAD_TRAP)); -+ KMEM_FREE (dev->DmaTrap, sizeof (DMA_TRAP)); -+ -+ /* Free of the memsegs and pages */ -+ for (bank = 0; bank < ELAN3_SDRAM_NUM_BANKS; bank++) -+ { -+ if (dev->SdramBanks[bank].Size) -+ { -+ UnmapDeviceRegister (dev, &dev->SdramBanks[bank].Handle); -+ -+ KMEM_FREE (dev->SdramBanks[bank].PtblGroups, sizeof (ELAN3_PTBL_GR *) * (dev->SdramBanks[bank].Size / PTBL_GROUP_SIZE)); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= dev->SdramBanks[bank].Size; indx++, size <<= 1) -+ KMEM_FREE (dev->SdramBanks[bank].Bitmaps[indx], sizeof (bitmap_t)*BT_BITOUL(dev->SdramBanks[bank].Size/size)); -+ } -+ } -+ elan3_sdram_fini (dev); -+} -+ -+#define INIT_PATTERN(offset) (0xBEEC000000000011ull | ((u_longlong_t)(offset)) << 16) -+#define FREE_PATTERN(offset) (0xBEEC000000000022ull | ((u_longlong_t)(offset)) << 16) -+ -+static int -+ProbeSdram (ELAN3_DEV *dev) -+{ -+ int Instance; -+ u_int Bank; -+ int MemSpaceSize; -+ int BankMaxSize; -+ int BankOffset; -+ int BankSize; -+ ioaddr_t BankBase; -+ ioaddr_t PageBase; -+ ioaddr_t PageBase1; -+ ioaddr_t PageBase2; -+ DeviceMappingHandle BankHandle; -+ DeviceMappingHandle PageHandle; -+ DeviceMappingHandle PageHandle1; -+ DeviceMappingHandle PageHandle2; -+ register int i; -+ u_longlong_t value; -+ extern int sdram_bank_limit; -+ -+ /* NOTE: The Cache control register is set to only enable cache set 0 */ -+ /* and has ECC disabled */ -+ Instance = dev->Instance; -+ -+ /* Determine the size of the SDRAM from the BAR register */ -+ if (DeviceRegisterSize (dev, ELAN3_BAR_SDRAM, &MemSpaceSize) != ESUCCESS) -+ { -+ printk ("elan%d: cannot determine SDRAM size\n", Instance); -+ return (EFAIL); -+ } -+ -+ elan3_sdram_init (dev); -+ -+ BankMaxSize = MemSpaceSize / ELAN3_SDRAM_NUM_BANKS; -+ -+ for (Bank = 0; Bank < ELAN3_SDRAM_NUM_BANKS; Bank++) -+ { -+ BankOffset = Bank * BankMaxSize; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "elan%d: Probing RAM Bank %d (max size %08x)\n", Instance, Bank, BankMaxSize); -+ -+ /* Probe the memory bank by mapping two pages that are the size of the cache apart */ -+ /* this guarantees that when we store the second pattern we displace the first pattern */ -+ /* from the cache, also store the second pattern again the size of the cache up again */ -+ /* to ensure that the SDRAM wires don't stay floating at pattern1 */ -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &BankBase, BankOffset, PAGESIZE, &BankHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ continue; -+ } -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase1, BankOffset + ELAN3_MAX_CACHE_SIZE, PAGESIZE, &PageHandle1) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ UnmapDeviceRegister (dev, &BankHandle); -+ continue; -+ } -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase2, BankOffset + 2*ELAN3_MAX_CACHE_SIZE, PAGESIZE, &PageHandle2) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot probe memory bank %d\n", Instance, Bank); -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ continue; -+ } -+ -+#define PATTERN0 (0x5555555555555555L) -+#define PATTERN1 (0xAAAAAAAAAAAAAAAAL) -+ writeq (PATTERN0, (u_longlong_t *) BankBase); -+ writeq (PATTERN1, (u_longlong_t *) PageBase1); -+ writeq (PATTERN1, (u_longlong_t *) PageBase2); -+ -+ mmiob(); -+ -+ value = readq ((u_longlong_t *) BankBase); -+ -+ if (value != PATTERN0) -+ { -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ continue; -+ } -+ -+ writeq (PATTERN1, (u_longlong_t *) BankBase); -+ writeq (PATTERN0, (u_longlong_t *) PageBase1); -+ writeq (PATTERN0, (u_longlong_t *) PageBase2); -+ -+ mmiob(); -+ -+ value = readq ((u_longlong_t *) BankBase); -+ if (value != PATTERN1) -+ { -+ UnmapDeviceRegister (dev, &BankHandle); -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ continue; -+ } -+ UnmapDeviceRegister (dev, &PageHandle1); -+ UnmapDeviceRegister (dev, &PageHandle2); -+ -+ /* Bank is present, so work out its size, we store tha maximum size at the base */ -+ /* and then store the address at each address on every power of two address until */ -+ /* we reach the minimum mappable size (PAGESIZE), we then read back the value at the */ -+ /* base to determine the bank size */ -+ writeq ((u_longlong_t) BankMaxSize, (u_longlong_t *) BankBase); -+ -+ for (BankSize = (BankMaxSize>>1); BankSize > PAGESIZE; BankSize >>= 1) -+ { -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &PageBase, BankOffset + BankSize, PAGESIZE, &PageHandle) == ESUCCESS) -+ { -+ writeq (BankSize, (u_longlong_t *) PageBase); -+ UnmapDeviceRegister (dev, &PageHandle); -+ } -+ } -+ mmiob(); -+ -+ BankSize = (u_long) readq ((u_longlong_t *) BankBase); -+ -+ if (sdram_bank_limit == 0 || BankSize <= (sdram_bank_limit * 1024 * 1024)) -+ printk ("elan%d: memory bank %d is %dK\n", Instance, Bank, BankSize / 1024); -+ else -+ { -+ BankSize = (sdram_bank_limit * 1024 * 1024); -+ printk ("elan%d: limit memory bank %d to %dK\n", Instance, Bank, BankSize / 1024); -+ } -+ -+ UnmapDeviceRegister (dev, &BankHandle); -+ -+ /* Now map all of this bank into the kernel */ -+ if (MapDeviceRegister (dev, ELAN3_BAR_SDRAM, &BankBase, BankOffset, BankSize, &BankHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot initialise memory bank %d\n", Instance, Bank); -+ continue; -+ } -+ -+ dev->SdramBanks[Bank].Size = BankSize; -+ dev->SdramBanks[Bank].Mapping = BankBase; -+ dev->SdramBanks[Bank].Handle = BankHandle; -+ -+#ifndef CONFIG_MPSAS -+ /* Initialise it for ECC */ -+ preemptable_start { -+ for (i = 0; i < BankSize; i += 8) -+ { -+ elan3_sdram_writeq (dev, (Bank << ELAN3_SDRAM_BANK_SHIFT) | i, INIT_PATTERN(BankOffset+i)); -+ -+ preemptable_check(); -+ } -+ } preemptable_end; -+#endif -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+InitialiseSdram (ELAN3_DEV *dev) -+{ -+ int indx, size, b; -+ -+ for (b = 0; b < ELAN3_SDRAM_NUM_BANKS; b++) -+ { -+ ELAN3_SDRAM_BANK *bank = &dev->SdramBanks[b]; -+ -+ if (bank->Size == 0) -+ continue; -+ -+ /* allocate a ptbl group pointer for each possible ptbl group in this bank */ -+ KMEM_ZALLOC (bank->PtblGroups, ELAN3_PTBL_GR **, sizeof (ELAN3_PTBL_GR *) * bank->Size/PTBL_GROUP_SIZE, TRUE); -+ -+ /* allocate the buddy allocator bitmaps */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->Size; indx++, size <<= 1) -+ KMEM_ZALLOC (bank->Bitmaps[indx], bitmap_t *, sizeof (bitmap_t)*BT_BITOUL(bank->Size/size), TRUE); -+ -+ /* and add it to the sdram buddy allocator */ -+ elan3_sdram_add (dev, (b << ELAN3_SDRAM_BANK_SHIFT), (b << ELAN3_SDRAM_BANK_SHIFT) + bank->Size); -+ } -+} -+ -+#include -+ -+int -+ReadVitalProductData (ELAN3_DEV *dev, int *CasLatency) -+{ -+ DeviceMappingHandle RomHandle; -+ unsigned char *RomBase; -+ unsigned char *PCIDataPtr; -+ unsigned char *VPDPtr; -+ unsigned char *lim; -+ int type; -+ int i, len, len2; -+ char name[3] = "XX"; -+ char value[256]; -+ int finished = 0; -+ -+ -+ /* default valud for CAS latency is 3 */ -+ (*CasLatency) = CAS_LATENCY_3; -+ -+ if (MapDeviceRegister (dev, ELAN3_BAR_EBUS, (ioaddr_t *) &RomBase, ELAN3_EBUS_ROM_OFFSET, ELAN3_EBUS_ROM_SIZE, &RomHandle) != ESUCCESS) -+ { -+ printk ("elan%d: Cannot map ROM\n", dev->Instance); -+ return (EFAIL); -+ } -+ -+ /* Check the ROM signature */ -+ if (RomBase[0] != 0x55 || RomBase[1] != 0xAA) -+ { -+ printk ("elan%d: Invalid ROM signature %02x %02x\n", dev->Instance, RomBase[0], RomBase[1]); -+ return (ESUCCESS); -+ } -+ -+ PCIDataPtr = RomBase + ((RomBase[0x19] << 8) | RomBase[0x18]); -+ -+ /* check the pci data structure */ -+ if (PCIDataPtr[0] != 'P' || PCIDataPtr[1] != 'C' || PCIDataPtr[2] != 'I' || PCIDataPtr[3] != 'R') -+ { -+ printk ("elan%d: Invalid PCI Data structure\n", dev->Instance); -+ return (ESUCCESS); -+ } -+ -+ /* Extract the VPD pointer */ -+ VPDPtr = RomBase + ((PCIDataPtr[9] << 8) | PCIDataPtr[8]); -+ -+ if (VPDPtr == RomBase) -+ { -+ printk ("elan%d: No Vital Product Data\n", dev->Instance); -+ return (ESUCCESS); -+ } -+ -+ while (! finished) -+ { -+ type = *VPDPtr++; -+ -+ if (type & LARGE_RESOURCE_BIT) -+ { -+ len = *(VPDPtr++); -+ len += *(VPDPtr++) << 8; -+ -+ switch (type & ~LARGE_RESOURCE_BIT) -+ { -+ case LARGE_RESOURCE_STRING: -+ printk ("elan%d: ", dev->Instance); -+ for (i = 0; i < len; i++) -+ printk ("%c", *VPDPtr++); -+ printk ("\n"); -+ break; -+ -+ case LARGE_RESOURCE_VENDOR_DEFINED: -+ VPDPtr += len; -+ break; -+ -+ case LARGE_RESOURCE_VITAL_PRODUCT_DATA: -+ for (lim = VPDPtr + len; VPDPtr < lim; ) -+ { -+ name[0] = *VPDPtr++; -+ name[1] = *VPDPtr++; -+ len2 = *VPDPtr++; -+ -+ for (i = 0; i < len2 && VPDPtr < lim; i++) -+ value[i] = *VPDPtr++; -+ value[i] = '\0'; -+ -+ if (! strcmp (name, "SN")) -+ printk ("elan%d: Serial Number - %s\n", dev->Instance, value); -+ -+ if (! strcmp (name, "Z0")) -+ (*CasLatency) = (strcmp (value, "CAS_LATENCY_2") ? CAS_LATENCY_3 : CAS_LATENCY_2); -+ } -+ break; -+ -+ default: -+ printk ("elan%d: unknown large resource %x\n", dev->Instance, type); -+ finished = 1; -+ break; -+ } -+ } -+ else -+ { -+ len = type & 0x7; -+ -+ switch (type >> 3) -+ { -+ case SMALL_RESOURCE_COMPATIBLE_DEVICE_ID: -+ VPDPtr += len; -+ break; -+ -+ case SMALL_RESOURCE_VENDOR_DEFINED: -+ VPDPtr += len; -+ break; -+ -+ case SMALL_RESOURCE_END_TAG: -+ finished = 1; -+ break; -+ -+ default: -+ printk ("elan%d: unknown small resource %x\n", dev->Instance, type >> 3); -+ finished = 1; -+ break; -+ } -+ } -+ } -+ -+ UnmapDeviceRegister (dev, &RomHandle); -+ return (ESUCCESS); -+} -+ -+void -+ElanSetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset, ELAN3_PTBL_GR *ptg) -+{ -+ int bank = offset >> ELAN3_SDRAM_BANK_SHIFT; -+ -+ dev->SdramBanks[bank].PtblGroups[(offset & (ELAN3_SDRAM_BANK_SIZE-1)) / PTBL_GROUP_SIZE] = ptg; -+} -+ -+ELAN3_PTBL_GR * -+ElanGetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset) -+{ -+ int bank = offset >> ELAN3_SDRAM_BANK_SHIFT; -+ -+ return (dev->SdramBanks[bank].PtblGroups[(offset & (ELAN3_SDRAM_BANK_SIZE-1)) / PTBL_GROUP_SIZE]); -+} -+ -+void -+ElanFlushTlb (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ BumpStat (dev, TlbFlushes); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | MMU_FLUSH); -+ mmiob(); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ while (! (read_reg32 (dev, Cache_Control_Reg.ContReg) & MMU_FLUSHED)) -+ mb(); -+} -+ -+void -+KillNegativeDma (ELAN3_DEV *dev, void *arg) -+{ -+ DMA_TRAP *trap = dev->DmaTrap; -+ E3_Status_Reg status; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (read_reg32 (dev, Exts.InterruptReg) & INT_DProcHalted); -+ -+ /* Initialise the trap to deliver to the offending user process */ -+ trap->Status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ trap->PacketInfo.Value = 0; -+ -+ bzero (&trap->FaultSave, sizeof (trap->FaultSave)); -+ bzero (&trap->Data0, sizeof (trap->Data0)); -+ bzero (&trap->Data1, sizeof (trap->Data1)); -+ bzero (&trap->Data2, sizeof (trap->Data2)); -+ bzero (&trap->Data3, sizeof (trap->Data3)); -+ -+ /* run down the kernel dma run queue and panic on a -ve length dma */ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &trap->Desc, sizeof (E3_DMA_BE)); -+ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ panic ("KillNegativeDma: -ve sized kernel dma\n"); -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ /* run down the user dma run queue and "remove" and -ve length dma's */ -+ FPtr = read_reg32 (dev, DProc_NonSysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_NonSysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, NonSysCntxDmaQueue[E3_NonSysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &trap->Desc, sizeof (E3_DMA_BE)); -+ -+ if (trap->Desc.s.dma_size > E3_MAX_DMA_SIZE) -+ { -+ PRINTF3 (NULL, DBG_INTR, "KillNegativeDma: remove dma - context %d size %d SuspendAddr %x\n", -+ trap->Desc.s.dma_u.s.Context, trap->Desc.s.dma_size, trap->Status.s.SuspendAddr); -+ -+ trap->Status.s.TrapType = trap->Status.s.SuspendAddr; -+ trap->Status.s.Context = trap->Desc.s.dma_u.s.Context; -+ -+ DeliverDProcTrap (dev, trap, 0); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ trap->Desc.s.dma_type = 0; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = (E3_Addr) 0; -+ trap->Desc.s.dma_srcEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_srcCookieVProc = (E3_Addr) 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &trap->Desc, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ status.Status = read_reg32 (dev, Exts.DProcStatus.Status); -+ -+ if (status.s.SuspendAddr == MI_DequeueNonSysCntxDma || -+ status.s.SuspendAddr == MI_DequeueSysCntxDma || -+ status.s.SuspendAddr == MI_DmaLoop) -+ { -+ PRINTF0 (NULL, DBG_INTR, "KillNegativeDma: unlock dma processor\n"); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ mmiob(); -+ -+ DELAY (10); -+ -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdAlignment, 0); -+ write_reg32 (dev, Exts.Dmas.DmaWrs.LdDmaType, 0); -+ mmiob(); -+ } -+ -+ PRINTF0 (NULL, DBG_INTR, "KillNegativeDma: dma processor restarted\n"); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 1); -+} -+ -+void -+ForceTProcTrap (ELAN3_DEV *dev, void *arg) -+{ -+ printk ("elan%d: forced tproc trap .....\n", dev->Instance); -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 1); -+} -+ -+void -+PollForDmaHungup (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ unsigned long flags; -+ E3_Status_Reg status; -+ E3_uint32 insn1, insn3; -+ register int i; -+ -+ if (read_reg32 (dev, Dma_Desc.dma_size) > E3_MAX_DMA_SIZE) -+ { -+ status.Status = read_reg32 (dev, Exts.DProcStatus); -+ -+ PRINTF2 (NULL, DBG_INTR, "PollForDmaHungup: size %x SuspendAddr %x\n", read_reg32 (dev, Dma_Desc.dma_size), status.s.SuspendAddr); -+ -+ if (status.s.SuspendAddr == MI_DequeueNonSysCntxDma || -+ status.s.SuspendAddr == MI_DequeueSysCntxDma || -+ status.s.SuspendAddr == MI_DmaLoop) -+ { -+ printk ("elan%d: PollForDmaHungup: size %x context %d SuspendAddr %x\n", -+ dev->Instance, read_reg32 (dev, Dma_Desc.dma_size), -+ status.s.Context, status.s.SuspendAddr); -+ -+ PRINTF2 (NULL, DBG_INTR, "PollForDmaHungup: dma_size %x status %x\n", -+ read_reg32 (dev, Dma_Desc.dma_size), status.Status); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted, KillNegativeDma, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return; -+ } -+ } -+ -+ status.Status = read_reg32 (dev, Exts.TProcStatus); -+ if (status.s.WakeupFunction == WakeupStopped) -+ { -+ E3_uint32 PC = read_reg32 (dev, ExecutePC); -+ -+ /* See if it's likely that the thread is really "stuck" on a waitevent/break -+ * instruction ......... */ -+ for (i = 0; i < 10; i++) -+ { -+ status.Status = read_reg32 (dev, Exts.TProcStatus); -+ insn1 = read_reg32 (dev, IBufferReg[1]); -+ insn3 = read_reg32 (dev, IBufferReg[3]); -+ -+ if (! (status.s.WakeupFunction == WakeupStopped && read_reg32 (dev, ExecutePC) == PC && /* stopping and it could be a break/waitevent */ -+ (insn1 == 0x81a00000 || insn3 == 0x81a00000 || /* break instruction */ -+ insn1 == 0x81b00000 || insn3 == 0x81b00000))) /* waitevent instruction */ -+ break; -+ } -+ -+ if (i == 10) -+ { -+ printk ("elan%d: forcing tproc trap from %s instruction at pc %x\n", dev->Instance, -+ (insn1 == 0x81a00000 || insn3 == 0x81a00000) ? "break" : "waitevent", PC); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_TProcHalted, ForceTProcTrap, NULL); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return; -+ } -+ } -+ -+ schedule_timer_fn (&dev->DmaPollTimeoutId, PollForDmaHungup, (void *) dev, 10); -+} -+ -+/*=======================================================================================*/ -+/* -+ * Interrupt handler. -+ */ -+static void -+ReEnableErrorInterrupts (void *arg) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "ReEnableErrorInterrupts: IntMask=%x\n", read_reg32 (dev, Exts.InterruptMask)); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+CheckForExcessiveErrorRate (ELAN3_DEV *dev) -+{ -+ if (dev->ErrorTime == (lbolt/hz)) -+ { -+ if (dev->ErrorsPerTick++ > 100) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "CheckForExcessiveErrorRate: too many links errors, disabling interrupt\n"); -+ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ schedule_timer_fn (&dev->ErrorTimeoutId, ReEnableErrorInterrupts, (void *) dev, hz); -+ } -+ } -+ else -+ { -+ dev->ErrorTime = (lbolt/hz); -+ dev->ErrorsPerTick = 0; -+ } -+} -+/*=======================================================================================*/ -+/* -+ * Interrupt handler. -+ */ -+static void -+HandlePciMemErr (ELAN3_DEV *dev) -+{ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "HandlePciMemErr : masking out interrupt\n"); -+ -+ ElanBusError (dev); -+ panic ("elan pci memory error\n"); -+} -+ -+static void -+HandleSDRamInterrupt (ELAN3_DEV *dev) -+{ -+ E3_uint32 EccStatus0 = read_reg32 (dev, ECC_STATUS0); -+ E3_uint32 EccStatus1 = read_reg32 (dev, ECC_STATUS1); -+ unsigned long flags; -+ -+ PRINTF5 (DBG_DEVICE, DBG_INTR, "elan: ECC error - Addr=%x UE=%x CE=%x ME=%x Syn=%x\n", -+ EccStatus0 & ECC_ADDR_MASK, EccStatus0 & ECC_UE_MASK, -+ EccStatus0 & ECC_CE_MASK, EccStatus0 & ECC_ME_MASK, -+ EccStatus1 & ECC_SYN_MASK); -+ -+ if (EccStatus0 & (ECC_UE_MASK|ECC_CE_MASK)) -+ { -+ printk ("elan%d: ECC memory error (Address=%08x Syndrome=%02x %s%s%s)\n", -+ dev->Instance, -+ (EccStatus0 & ECC_ADDR_MASK), (EccStatus1 & ECC_SYN_MASK), -+ (EccStatus0 & ECC_UE_MASK) ? "Uncorrectable " : "", -+ (EccStatus0 & ECC_CE_MASK) ? "Correctable " : "", -+ (EccStatus0 & ECC_ME_MASK) ? "Multiple Errors " : ""); -+ } -+ -+ if (EccStatus0 & ECC_UE_MASK) -+ panic ("elan: Uncorrectable ECC memory error"); -+ if (EccStatus0 & ECC_CE_MASK) -+ BumpStat (dev, CorrectableErrors); -+ if (EccStatus0 & ECC_ME_MASK) -+ BumpStat (dev, MultipleErrors); -+ -+ /* -+ * Clear the interrupt and reset the error flags. -+ * Note. Might loose an UE or CE if it occurs between reading the status and -+ * clearing the interrupt. I don't think this matters very much as the -+ * status reg will only be used to identify a bad simm. -+ */ -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | CLEAR_SDRAM_ERROR); -+ mmiob(); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ CheckForExcessiveErrorRate (dev); -+} -+ -+static int -+HandleEventInterrupt (ELAN3_DEV *dev, int nticks, unsigned long *flags) -+{ -+ E3_uint32 Fptr = dev->Event_Int_Queue_FPtr; -+ E3_uint32 Bptr = read_reg32 (dev, Event_Int_Queue_BPtr); /* PCI read */ -+ long tlim = lbolt + nticks; -+ long count = 0; -+ ELAN3_CTXT *ctxt; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ ASSERT ((dev->InterruptMask & INT_EventInterrupt) == 0); -+ -+ while (Fptr != Bptr) -+ { -+ while (Fptr != Bptr) -+ { -+ E3_EventInt_BE EvInt; -+ E3_uint32 Context; -+ -+ /* If we're running in the interrupt handler and have seen a high -+ * rate of event interrupts then punt to the thread - however on -+ * Linux the elan interrupt handler can block the timer interrupt, -+ * and so lbolt (jiffies) is not incremented, hence we punt after -+ a number of loops instead */ -+#if defined(LINUX) -+ if (in_interrupt() && ++count > eventint_punt_loops) -+ return (EAGAIN); -+#endif -+ -+ if (nticks && ((int) (lbolt - tlim)) > 0) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Fptr %x Bptr %x punting to thread\n", Fptr, Bptr); -+ return (EAGAIN); -+ } -+ -+ elan3_sdram_copyq_from_sdram (dev, Fptr, (void *) &EvInt, 8); /* PCI read */ -+ -+ /* The context number is held in the top 16 bits of the EventContext */ -+ Context = (EvInt.s.EventContext >> 16) & MAX_ROOT_CONTEXT_MASK; -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Context %d : Cookie %x\n", Context, EvInt.s.IntCookie); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, Context); -+ -+ /* Work out new fptr, and store it in the device, since we'll be dropping the IntrLock */ -+ Fptr = E3_EVENT_INTQ_NEXT(Fptr); -+ dev->Event_Int_Queue_FPtr = Fptr; -+ -+ if (ctxt == NULL) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "HandleEventInterrupt: Fptr %x Bptr %x context %d invalid\n", -+ Fptr, Bptr, Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ BumpStat (dev, EventInterrupts); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, *flags); -+ QueueEventInterrupt (ctxt, EvInt.s.IntCookie); -+ spin_lock_irqsave (&dev->IntrLock, *flags); -+ } -+ -+ /* Re-read the FPtr, since we've dropped the IntrLock */ -+ Fptr = dev->Event_Int_Queue_FPtr; -+ -+ /* Store the new FPtr to the elan, this also clears the interrupt. */ -+ write_reg32 (dev, Event_Int_Queue_FPtr, Fptr); /* PCI write */ -+ -+ mmiob(); -+ } -+ -+ mb(); -+ Bptr = read_reg32 (dev, Event_Int_Queue_BPtr); /* PCI read */ -+ } -+ -+ return (ESUCCESS); -+} -+ -+int -+SetLinkBoundaryScan (ELAN3_DEV *dev) -+{ -+ int res = ESUCCESS; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ res = EAGAIN; -+ else -+ { -+ PRINTF0 (DBG_DEVICE, DBG_BSCAN, "SetLinkBoundaryScan: setting link into boundary scan mode\n"); -+ -+ /* -+ * We're going to set the link into boundary scan mode, so firstly -+ * set the inputters to discard everything. -+ */ -+ if (dev->DiscardAllCount++ == 0) -+ SetSchedStatusRegister (dev, read_reg32 (dev, Exts.InterruptReg), NULL); -+ -+ /* -+ * Now disable the error interrupts -+ */ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And set the link into boundary scan mode, and drive -+ * a reset token onto the link. -+ */ -+ SET_SCHED_LINK_VALUE (dev, 1, LinkResetToken); -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+ClearLinkBoundaryScan (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_BSCAN, "ClearLinkBoundaryScan: taking link out of boundary scan mode\n"); -+ -+ /* -+ * Take the link out of boundary scan -+ */ -+ SET_SCHED_LINK_VALUE (dev, 0, 0); -+ -+ /* -+ * Clear any link errors. -+ */ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ /* -+ * Re-enable the error interrupts. -+ */ -+ if (! timer_fn_queued(&dev->ErrorTimeoutId)) -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And stop the inputter from discarding all packets. -+ */ -+ if (--dev->DiscardAllCount == 0) -+ SetSchedStatusRegister (dev, 0, NULL); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+int -+WriteBoundaryScanValue (ELAN3_DEV *dev, int value) -+{ -+ int res = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) != 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "WriteBoundaryScanValue: driving value 0x%x onto link\n", value); -+ SET_SCHED_LINK_VALUE (dev, 1, value); -+ -+ res = read_reg32 (dev, Exts.LinkState); -+ -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "WriteBoundaryScanValue: return 0x%x\n", res); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+int -+ReadBoundaryScanValue(ELAN3_DEV *dev, int link) -+{ -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "ReadBoundaryScanValue: set linkval 0x%x\n", link); -+ SET_SCHED_LINK_VALUE (dev, 0, link); -+ } -+ res = read_reg32 (dev, Exts.LinkState); -+ PRINTF1 (DBG_DEVICE, DBG_BSCAN, "ReadBoundaryScanValue: return 0x%x\n", res); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+static int -+ReadLinkVal (ELAN3_DEV *dev, int link) -+{ -+ if ((dev->SchCntReg & LinkBoundaryScan) == 0) -+ SET_SCHED_LINK_VALUE (dev, 0, link); -+ -+ return (read_reg32 (dev, Exts.LinkState)); -+} -+ -+static void -+HandleLinkError (ELAN3_DEV *dev) -+{ -+ E3_uint32 value = read_reg32 (dev, Exts.LinkErrorTypes); -+ -+ PRINTF1 (DBG_DEVICE, DBG_LINKERR, "HandleLinkError: LinkErrorTypes %08x - clearing\n", value); -+ -+ if (value & LS_LockError) BumpStat (dev, LockError); -+ if (value & LS_DeskewError) BumpStat (dev, DeskewError); -+ if (value & LS_PhaseError) BumpStat (dev, PhaseError); -+ if (value & LS_DataError) BumpStat (dev, DataError); -+ if (value & LS_FifoOvFlow0) BumpStat (dev, FifoOvFlow0); -+ if (value & LS_FifoOvFlow1) BumpStat (dev, FifoOvFlow1); -+ -+ if (value & LS_DataError) -+ dev->Stats.LinkErrorValue = ReadLinkVal (dev, 12) | (ReadLinkVal (dev, 13) << 9); -+ -+ PULSE_SCHED_STATUS (dev, ClearLinkErrorInt); -+ -+ CheckForExcessiveErrorRate (dev); -+} -+ -+static void -+HandleErrorInterrupt (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ if (Pend & INT_PciMemErr) -+ HandlePciMemErr (dev); -+ -+ if (Pend & INT_SDRamInt) -+ HandleSDRamInterrupt (dev); -+ -+ if (Pend & INT_LinkError) -+ HandleLinkError (dev); -+} -+ -+static void -+HandleAnyIProcTraps (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ E3_uint32 RestartBits = 0; -+ -+ if (Pend & INT_IProcCh0SysCntx) -+ { -+ HandleIProcTrap (dev, 0, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_C0_TrData[0])); -+ -+ RestartBits |= RestartCh0SysCntx; -+ } -+ -+ if (Pend & INT_IProcCh1SysCntx) -+ { -+ HandleIProcTrap (dev, 1, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_C0_TrData[0])); -+ -+ RestartBits |= RestartCh1SysCntx; -+ } -+ -+ if (Pend & INT_IProcCh0NonSysCntx) -+ { -+ HandleIProcTrap (dev, 0, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh0_NonC0_TrData[0])); -+ -+ RestartBits |= RestartCh0NonSysCntx; -+ } -+ -+ -+ if (Pend & INT_IProcCh1NonSysCntx) -+ { -+ HandleIProcTrap (dev, 1, Pend, -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, IProcNonSysCntx), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrHead[0]), -+ dev->TAndQBase + offsetof (E3_TrapAndQueue, VCh1_NonC0_TrData[0])); -+ RestartBits |= RestartCh1NonSysCntx; -+ } -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); -+} -+ -+static void -+elan3_event_interrupt (ELAN3_DEV *dev) -+{ -+ unsigned long flags; -+ -+ kernel_thread_init("elan3_event_int"); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ for (;;) -+ { -+ /* Make sure we never sleep with the EventInterrupt disabled */ -+ if (! (dev->InterruptMask & INT_EventInterrupt)) -+ { -+ if (HandleEventInterrupt (dev, eventint_resched_ticks, &flags) != ESUCCESS) -+ BumpStat (dev, EventRescheds); -+ -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); -+ } -+ -+ if (dev->ThreadsShouldStop) -+ break; -+ -+ kcondvar_wait (&dev->IntrWait, &dev->IntrLock, &flags); -+ } -+ -+ dev->EventInterruptThreadStopped = 1; -+ kcondvar_wakeupall (&dev->IntrWait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ kernel_thread_exit (); -+} -+ -+int -+InterruptHandler (ELAN3_DEV *dev) -+{ -+ E3_uint32 Mask; -+ E3_uint32 Pend; -+ E3_uint32 RestartBits; -+ int deliverDProcTrap; -+ int deliverTProcTrap; -+ static long lboltsave; -+ int loop_count = 0; -+ unsigned long flags; -+ int tproc_delivered; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ BumpStat (dev, Interrupts); -+ -+ Mask = dev->InterruptMask; -+ Pend = read_reg32 (dev, Exts.InterruptReg); /* PCI read */ -+ -+ /* Save the lbolt so we know how long in do loop or in event handling */ -+ lboltsave = lbolt; -+ -+ if ((Pend & Mask) == INT_EventInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_EventInterrupt); -+ -+ if (HandleEventInterrupt (dev, eventint_punt_ticks, &flags) == ESUCCESS) -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); -+ else -+ { -+ BumpStat (dev, EventPunts); -+ -+ kcondvar_wakeupone (&dev->IntrWait, &dev->IntrLock); -+ } -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (ESUCCESS); -+ } -+ -+ if ((Pend & Mask) == 0) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Spurious Pend %x Mask %x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ return (EFAIL); -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Pend %x Mask %08x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ -+ do { -+ loop_count++; -+ RestartBits = 0; -+ -+ if (Pend & Mask & (INT_CProc | INT_ComQueue)) -+ HandleCProcTrap (dev, Pend, &Mask); -+ -+ tproc_delivered = 0; -+ -+ if (Pend & Mask & INT_TProc) { -+ ELAN_REG_REC(Pend); -+ tproc_delivered = 1; -+ deliverTProcTrap = HandleTProcTrap (dev, &RestartBits); -+ } -+ else -+ deliverTProcTrap = 0; -+ -+ if (Pend & Mask & INT_DProc) -+ deliverDProcTrap = HandleDProcTrap (dev, &RestartBits); -+ else -+ deliverDProcTrap = 0; -+ -+ ASSERT ((RestartBits & RestartDProc) == 0 || (read_reg32 (dev, Exts.DProcStatus.Status) >> 29) == 4); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT ((RestartBits & RestartDProc) == 0 || elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); /* Restart any processors which had trapped. */ -+ SET_INT_MASK (dev, Mask); /* And install the new interrupt mask */ -+ -+ if ((Pend & Mask & INT_TProc) && deliverTProcTrap) -+ DeliverTProcTrap (dev, dev->ThreadTrap, Pend); -+ -+ if ((Pend & Mask & INT_DProc) && deliverDProcTrap) -+ DeliverDProcTrap (dev, dev->DmaTrap, Pend); -+ -+ if (Pend & Mask & INT_Inputters) -+ HandleAnyIProcTraps (dev, Pend); -+ -+ if (Pend & Mask & INT_EventInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_EventInterrupt); -+ -+ if (loop_count == 1 && HandleEventInterrupt (dev, eventint_punt_ticks, &flags) == ESUCCESS) /* always punt to the thread if we've */ -+ ENABLE_INT_MASK (dev, INT_EventInterrupt); /* been round the loop once */ -+ else -+ { -+ BumpStat (dev, EventPunts); -+ -+ kcondvar_wakeupone (&dev->IntrWait, &dev->IntrLock); -+ } -+ } -+ -+ if (Pend & (INT_Halted | INT_Discarding)) -+ ProcessHaltOperations (dev, Pend); -+ -+ if (Pend & Mask & INT_ErrorInterrupts) -+ HandleErrorInterrupt (dev, Pend); -+ -+ Mask = dev->InterruptMask; -+ Pend = read_reg32 (dev, Exts.InterruptReg); /* PCI read */ -+ -+ if (tproc_delivered) -+ ELAN_REG_REC(Pend); -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "InterruptHandler: Pend %x Mask %08x SchedStatus %x\n", -+ Pend, Mask, read_reg32 (dev, Exts.SchCntReg)); -+ } while ((Pend & Mask) != 0); -+ -+ if ((lbolt - lboltsave) > dev->Stats.LongestInterrupt) -+ dev->Stats.LongestInterrupt = (lbolt - lboltsave); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "InterruptHandler: lbolt is %lx; start lbolt is %lx\n", -+ lbolt, lboltsave); -+ -+ return (ESUCCESS); -+} -+ -+void -+SetSchedStatusRegister (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp) -+{ -+ E3_uint32 HaltMask = dev->HaltOperationsMask; -+ E3_uint32 Mask = Maskp ? *Maskp : dev->InterruptMask; -+ E3_uint32 ClearBits = 0; -+ E3_uint32 SetBits = 0; -+ -+ PRINTF5 (DBG_DEVICE, DBG_INTR, "SetSchedStatusRegister: HaltOperationsMask=%x HaltAll=%d HaltDmaDequeue=%d HaltThread=%d DiscardAll=%d\n", -+ HaltMask, dev->HaltAllCount, dev->HaltDmaDequeueCount, dev->HaltThreadCount, dev->DiscardAllCount); -+ -+ if (dev->FlushCommandCount) -+ SetBits |= FlushCommandQueues; -+ -+ if ((HaltMask & INT_DProcHalted) || dev->HaltAllCount) -+ { -+ SetBits |= HaltDmas | HaltDmaDequeue; -+ if (Pend & INT_DProcHalted) -+ Mask &= ~INT_DProcHalted; -+ else -+ Mask |= INT_DProcHalted; -+ } -+ -+ if (dev->HaltDmaDequeueCount) -+ { -+ SetBits |= HaltDmaDequeue; -+ if (Pend & INT_DProcHalted) -+ Mask &= ~INT_DProcHalted; -+ else -+ Mask |= INT_DProcHalted; -+ } -+ -+ if ((HaltMask & INT_TProcHalted) || dev->HaltAllCount || dev->HaltThreadCount) -+ { -+ SetBits |= HaltThread; -+ if (Pend & INT_TProcHalted) -+ Mask &= ~INT_TProcHalted; -+ else -+ Mask |= INT_TProcHalted; -+ } -+ -+ if ((HaltMask & INT_DiscardingSysCntx) || dev->DiscardAllCount) -+ { -+ SetBits |= DiscardSysCntxIn; -+ if (Pend & INT_DiscardingSysCntx) -+ Mask &= ~INT_DiscardingSysCntx; -+ else -+ Mask |= INT_DiscardingSysCntx; -+ } -+ -+ if ((HaltMask & INT_DiscardingNonSysCntx) || dev->DiscardNonContext0Count || dev->DiscardAllCount) -+ { -+ SetBits |= DiscardNonSysCntxIn; -+ if (Pend & INT_DiscardingNonSysCntx) -+ Mask &= ~INT_DiscardingNonSysCntx; -+ else -+ Mask |= INT_DiscardingNonSysCntx; -+ } -+ -+ if (dev->HaltNonContext0Count) -+ SetBits |= StopNonSysCntxs; -+ -+ ClearBits = SetBits ^ (FlushCommandQueues | HaltDmas | HaltDmaDequeue | HaltThread | -+ DiscardSysCntxIn | DiscardNonSysCntxIn | StopNonSysCntxs); -+ -+ PRINTF4 (DBG_DEVICE, DBG_INTR, "SetSchedStatusRegister: SetBits=%x InterruptMask=%x InterruptReg=%x Mask=%x\n", -+ SetBits, dev->InterruptMask, read_reg32 (dev, Exts.InterruptReg), Mask); -+ -+ MODIFY_SCHED_STATUS (dev, SetBits, ClearBits); -+ -+ if (Maskp) -+ *Maskp = Mask; /* copyback new interrupt mask */ -+ else -+ SET_INT_MASK(dev, Mask); -+} -+ -+void -+FreeHaltOperation (ELAN3_DEV *dev, ELAN3_HALTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ op->Next = dev->FreeHaltOperations; -+ dev->FreeHaltOperations = op; -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+} -+ -+int -+ReserveHaltOperations (ELAN3_DEV *dev, int count, int cansleep) -+{ -+ ELAN3_HALTOP *op; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ while ((dev->NumHaltOperations - dev->ReservedHaltOperations) < count) -+ { -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ KMEM_ZALLOC (op, ELAN3_HALTOP *, sizeof (ELAN3_HALTOP), cansleep); -+ -+ if (op == NULL) -+ return (FALSE); -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ -+ dev->NumHaltOperations++; -+ -+ op->Next = dev->FreeHaltOperations; -+ dev->FreeHaltOperations = op; -+ } -+ -+ dev->ReservedHaltOperations += count; -+ -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+ -+ return (TRUE); -+} -+ -+void -+ReleaseHaltOperations (ELAN3_DEV *dev, int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->FreeHaltLock, flags); -+ dev->ReservedHaltOperations -= count; -+ spin_unlock_irqrestore (&dev->FreeHaltLock, flags); -+} -+ -+void -+QueueHaltOperation (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp, -+ E3_uint32 ReqMask, void (*Function)(ELAN3_DEV *, void *), void *Arguement) -+{ -+ ELAN3_HALTOP *op; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ spin_lock (&dev->FreeHaltLock); -+ op = dev->FreeHaltOperations; -+ -+ ASSERT (op != NULL); -+ -+ dev->FreeHaltOperations = op->Next; -+ spin_unlock (&dev->FreeHaltLock); -+ -+ op->Mask = ReqMask; -+ op->Function = (void (*)(void *, void *))Function; -+ op->Arguement = Arguement; -+ -+ dev->HaltOperationsMask |= ReqMask; /* Add our bits to the global bits needed. */ -+ SetSchedStatusRegister (dev, Pend, Maskp); /* Set the control register and the interrupt mask */ -+ -+ /* -+ * If the condition is already satisfied, then SetSchedStatusRegister will -+ * have masked out the interrupt, so re-enable it now to take it straight -+ * away -+ */ -+ if (Maskp == NULL) -+ { -+ if ((read_reg32 (dev, Exts.InterruptReg) & ReqMask) == ReqMask) -+ ENABLE_INT_MASK (dev, ReqMask); -+ } -+ else -+ { -+ if ((Pend & ReqMask) == ReqMask) -+ *Maskp |= ReqMask; -+ } -+ -+ *dev->HaltOperationsTailpp = op; /* Queue at end of list, since ProcessHaltOperations */ -+ dev->HaltOperationsTailpp = &op->Next; /* drops the IntrLock while running down the list */ -+ op->Next = NULL; -+} -+ -+void -+ProcessHaltOperations (ELAN3_DEV *dev, E3_uint32 Pend) -+{ -+ E3_uint32 Mask; -+ ELAN3_HALTOP *op; -+ ELAN3_HALTOP **prevp; -+ E3_uint32 haltMask; -+ ELAN3_HALTOP *next; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: Pend %x\n", Pend); -+ -+ for (;;) -+ { -+ ELAN3_HALTOP *head = NULL; -+ ELAN3_HALTOP **tailp = &head; -+ -+ /* -+ * Generate a list of halt operations which can be called now. -+ */ -+ for (haltMask = 0, prevp = &dev->HaltOperations; (op = *prevp) != NULL; ) -+ { -+ if ((Pend & op->Mask) != op->Mask) -+ { -+ haltMask |= op->Mask; -+ prevp = &op->Next; -+ } -+ else -+ { -+ *prevp = op->Next; /* remove from list */ -+ if (op->Next == NULL) -+ dev->HaltOperationsTailpp = prevp; -+ -+ *tailp = op; /* add to local list */ -+ op->Next = NULL; -+ tailp = &op->Next; -+ } -+ } -+ -+ if (head == NULL) /* nothing to do, so update */ -+ { /* the schedule status register */ -+ dev->HaltOperationsMask = haltMask; /* and the interrupt mask */ -+ SetSchedStatusRegister (dev, Pend, NULL); -+ return; -+ } -+ -+ /* -+ * flush the command queues, before calling any operations -+ */ -+ Mask = dev->InterruptMask; -+ -+ if (dev->FlushCommandCount++ == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ if ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ { -+ if (dev->HaltThreadCount++ == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ CAPTURE_CPUS(); -+ -+ while ((read_reg32 (dev, ComQueueStatus) & ComQueueNotEmpty) != 0) -+ mb(); -+ -+ RELEASE_CPUS(); -+ -+ if (--dev->HaltThreadCount == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ } -+ -+ if (read_reg32 (dev, Exts.InterruptReg) & INT_CProc) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: command processor has trapped\n"); -+ HandleCProcTrap (dev, Pend, &Mask); -+ } -+ -+ if (--dev->FlushCommandCount == 0) -+ SetSchedStatusRegister (dev, Pend, &Mask); -+ -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "ProcessHaltOperations: interrupt mask %08x -> %08x\n", -+ dev->InterruptMask, Mask); -+ -+ SET_INT_MASK (dev, Mask); -+ spin_unlock (&dev->IntrLock); -+ -+ /* -+ * now process the list of operations -+ * we have -+ */ -+ for (op = head; op != NULL; op = next) -+ { -+ next = op->Next; -+ -+ op->Function (dev, op->Arguement); -+ -+ FreeHaltOperation (dev, op); -+ } -+ -+ spin_lock (&dev->IntrLock); -+ } -+} -+ -+int -+ComputePosition (ELAN_POSITION *pos, unsigned nodeId, unsigned numNodes, unsigned numDownLinksVal) -+{ -+ int i, lvl, n; -+ char numDownLinks[ELAN_MAX_LEVELS]; -+ -+ if (nodeId >= numNodes) -+ return (EINVAL); -+ -+ for (i = 0; i < ELAN_MAX_LEVELS; i++, numDownLinksVal >>= 4) -+ numDownLinks[i] = numDownLinksVal & 7; -+ -+ for (lvl = 0, n = numNodes; n > ((lvl % 3) == 2 ? 8 : 4) && lvl < ELAN_MAX_LEVELS; lvl++) -+ { -+ if (numDownLinks[lvl] == 0) -+ numDownLinks[lvl] = 4; -+ -+ if ((n % numDownLinks[lvl]) != 0) -+ return (EINVAL); -+ -+ n /= numDownLinks[lvl]; -+ } -+ -+ if (numDownLinks[lvl] == 0) -+ numDownLinks[lvl] = n; -+ -+ if (numDownLinks[lvl] != n) -+ return (EINVAL); -+ -+ for (i = 0; i <= lvl; i++) -+ pos->pos_arity[i] = numDownLinks[lvl - i]; -+ -+ pos->pos_nodes = numNodes; -+ pos->pos_levels = lvl + 1; -+ pos->pos_nodeid = nodeId; -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/elandev_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/elandev_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/elandev_linux.c 2005-07-28 14:52:52.803685576 -0400 -@@ -0,0 +1,2358 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: elandev_linux.c,v 1.102.2.5 2005/03/07 16:27:44 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elandev_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,2,0) -+#error please use a 2.2 series kernel or newer -+#endif -+ -+/* Minor numbers encoded as : -+ * [5:0] device number -+ * [15:6] function number -+ */ -+#define ELAN3_DEVICE_MASK 0x3F -+ -+#define ELAN3_MINOR_CONTROL 0 -+#define ELAN3_MINOR_MEM 1 -+#define ELAN3_MINOR_USER 2 -+#define ELAN3_MINOR_SHIFT 6 -+ -+#define ELAN3_DEVICE(inode) (MINOR(inode->i_rdev) & ELAN3_DEVICE_MASK) -+#define ELAN3_MINOR(inode) (MINOR(inode->i_rdev) >> ELAN3_MINOR_SHIFT) -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+# define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -+# define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) -+typedef void irqreturn_t; -+#endif -+# define IRQ_NONE -+# define IRQ_HANDLED -+# define IRQ_RETVAL(x) -+#endif -+ -+ -+/* -+ * Function prototypes. -+ */ -+static int elanattach(int instance, struct pci_dev *pcidev); -+static int elandetach(int instance); -+ -+static int elan3_open (struct inode *inode, struct file *file); -+static int elan3_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg); -+static int elan3_mmap (struct file *file, struct vm_area_struct *vm_area); -+static int elan3_release (struct inode *inode, struct file *file); -+ -+static int elan3_reboot_event (struct notifier_block *self, unsigned long event, void *buffer); -+static int elan3_panic_event (struct notifier_block *self, unsigned long event, void *buffer); -+ -+static irqreturn_t InterruptHandlerWrapper(int irq, void *dev_id, struct pt_regs *regs); -+ -+static int ConfigurePci(ELAN3_DEV *dev); -+static int ResetElan(ELAN3_DEV *dev, ioaddr_t intPalAddr); -+ -+static void elan3_shutdown_devices(int panicing); -+ -+/* -+ * Globals. -+ */ -+static ELAN3_DEV *elan3_devices[ELAN3_MAX_CONTROLLER]; -+static int NodeId = ELAN3_INVALID_NODE; -+static int NumNodes; -+static int DownLinks; -+static int RandomRoutingDisabled; -+int BackToBackMaster; -+int BackToBackSlave; -+int enable_sdram_writecombining; -+int sdram_bank_limit; -+extern int LwpNice; -+ -+char * elan_reg_rec_file [ELAN_REG_REC_MAX]; -+int elan_reg_rec_line [ELAN_REG_REC_MAX]; -+long elan_reg_rec_lbolt[ELAN_REG_REC_MAX]; -+int elan_reg_rec_cpu [ELAN_REG_REC_MAX]; -+E3_uint32 elan_reg_rec_reg [ELAN_REG_REC_MAX]; -+int elan_reg_rec_index; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan3 Device Driver"); -+ -+MODULE_LICENSE("GPL"); -+ -+MODULE_PARM(NodeId,"i"); -+MODULE_PARM(NumNodes,"i"); -+MODULE_PARM(RandomRoutingDisabled,"i"); -+MODULE_PARM(DownLinks,"i"); -+MODULE_PARM(BackToBackMaster,"i"); -+MODULE_PARM(BackToBackSlave,"i"); -+MODULE_PARM(LwpNice, "i"); -+MODULE_PARM(elan3_debug, "i"); -+MODULE_PARM(elan3_debug_console, "i"); -+MODULE_PARM(elan3_debug_buffer, "i"); -+MODULE_PARM(elan3mmu_debug, "i"); -+MODULE_PARM(sdram_bank_limit, "i"); -+ -+/* elan3/os/context.c */ -+EXPORT_SYMBOL(elan3_alloc); -+EXPORT_SYMBOL(elan3_attach); -+EXPORT_SYMBOL(elan3_doattach); -+EXPORT_SYMBOL(elan3_free); -+EXPORT_SYMBOL(elan3_detach); -+EXPORT_SYMBOL(elan3_dodetach); -+EXPORT_SYMBOL(elan3_block_inputter); -+EXPORT_SYMBOL(CheckCommandQueueFlushed); -+ -+/* elan3/os/sdram.c */ -+EXPORT_SYMBOL(elan3_sdram_alloc); -+EXPORT_SYMBOL(elan3_sdram_free); -+EXPORT_SYMBOL(elan3_sdram_to_phys); -+EXPORT_SYMBOL(elan3_sdram_writeb); -+EXPORT_SYMBOL(elan3_sdram_writew); -+EXPORT_SYMBOL(elan3_sdram_writel); -+EXPORT_SYMBOL(elan3_sdram_writeq); -+EXPORT_SYMBOL(elan3_sdram_readb); -+EXPORT_SYMBOL(elan3_sdram_readw); -+EXPORT_SYMBOL(elan3_sdram_readl); -+EXPORT_SYMBOL(elan3_sdram_readq); -+EXPORT_SYMBOL(elan3_sdram_zerob_sdram); -+EXPORT_SYMBOL(elan3_sdram_zerow_sdram); -+EXPORT_SYMBOL(elan3_sdram_zerol_sdram); -+EXPORT_SYMBOL(elan3_sdram_zeroq_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyb_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyw_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyl_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyq_to_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyb_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyw_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyl_from_sdram); -+EXPORT_SYMBOL(elan3_sdram_copyq_from_sdram); -+ -+/* elan3/os/tproc.c */ -+EXPORT_SYMBOL(DeliverTProcTrap); -+EXPORT_SYMBOL(HandleTProcTrap); -+EXPORT_SYMBOL(SaveThreadToStack); -+ -+/* elan3/os/tprocinsts.c */ -+EXPORT_SYMBOL(RollThreadToClose); -+ -+/* elan3/os/iproc.c */ -+EXPORT_SYMBOL(InspectIProcTrap); -+EXPORT_SYMBOL(IProcTrapString); -+EXPORT_SYMBOL(SimulateUnlockQueue); -+ -+/* elan3/os/cproc.c */ -+EXPORT_SYMBOL(HandleCProcTrap); -+ -+/* elan3/os/route_table.c */ -+EXPORT_SYMBOL(GenerateRoute); -+EXPORT_SYMBOL(LoadRoute); -+EXPORT_SYMBOL(InvalidateRoute); -+EXPORT_SYMBOL(ValidateRoute); -+EXPORT_SYMBOL(ClearRoute); -+EXPORT_SYMBOL(GenerateProbeRoute); -+EXPORT_SYMBOL(GenerateCheckRoute); -+ -+/* elan3/os/elandev_generic.c */ -+EXPORT_SYMBOL(elan3_debug); -+EXPORT_SYMBOL(QueueHaltOperation); -+EXPORT_SYMBOL(ReleaseHaltOperations); -+EXPORT_SYMBOL(ReserveHaltOperations); -+ -+/* elan3/vm/elan3mmu_generic.c */ -+EXPORT_SYMBOL(elan3mmu_pteload); -+EXPORT_SYMBOL(elan3mmu_unload); -+EXPORT_SYMBOL(elan3mmu_set_context_filter); -+EXPORT_SYMBOL(elan3mmu_reserve); -+EXPORT_SYMBOL(elan3mmu_attach); -+EXPORT_SYMBOL(elan3mmu_detach); -+EXPORT_SYMBOL(elan3mmu_release); -+/* elan3/vm/elan3mmu_linux.c */ -+EXPORT_SYMBOL(elan3mmu_phys_to_pte); -+EXPORT_SYMBOL(elan3mmu_kernel_invalid_pte); -+ -+/* elan3/os/elan3_debug.c */ -+EXPORT_SYMBOL(elan3_debugf); -+ -+/* elan3/os/minames.c */ -+EXPORT_SYMBOL(MiToName); -+ -+/* elan3/os/elandev_generic.c */ -+EXPORT_SYMBOL(MapDeviceRegister); -+EXPORT_SYMBOL(UnmapDeviceRegister); -+ -+EXPORT_SYMBOL(elan_reg_rec_lbolt); -+EXPORT_SYMBOL(elan_reg_rec_file); -+EXPORT_SYMBOL(elan_reg_rec_index); -+EXPORT_SYMBOL(elan_reg_rec_cpu); -+EXPORT_SYMBOL(elan_reg_rec_reg); -+EXPORT_SYMBOL(elan_reg_rec_line); -+ -+/* -+ * Standard device entry points. -+ */ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int elan3_dump_event (struct notifier_block *self, unsigned long event, void *buffer); -+ -+static struct notifier_block elan3_dump_notifier = -+{ -+ notifier_call: elan3_dump_event, -+ priority: 0, -+}; -+ -+static int -+elan3_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ( event == DUMP_BEGIN ) -+ elan3_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+ -+#endif -+ -+static struct file_operations elan3_fops = { -+ ioctl: elan3_ioctl, /* ioctl */ -+ mmap: elan3_mmap, /* mmap */ -+ open: elan3_open, /* open */ -+ release: elan3_release, /* release */ -+}; -+ -+static struct notifier_block elan3_reboot_notifier = -+{ -+ notifier_call: elan3_reboot_event, -+ priority: 0, -+}; -+ -+static struct notifier_block elan3_panic_notifier = -+{ -+ notifier_call: elan3_panic_event, -+ priority: 0, -+}; -+ -+ELAN3_DEV * -+elan3_device (int instance) -+{ -+ if (instance < 0 || instance >= ELAN3_MAX_CONTROLLER) -+ return ((ELAN3_DEV *) NULL); -+ return elan3_devices[instance]; -+} -+EXPORT_SYMBOL(elan3_device); -+ -+/* -+ * Called at rmmod time. elandetach() for each card + general cleanup. -+ */ -+#ifdef MODULE -+static void __exit elan3_exit(void) -+{ -+ int i; -+ -+ printk("elan: preparing to remove module\n"); -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&elan3_dump_notifier); -+#endif -+ unregister_reboot_notifier (&elan3_reboot_notifier); -+ notifier_chain_unregister (&panic_notifier_list, &elan3_panic_notifier); -+ -+ /* call elandetach() for each device configured. */ -+ for (i = 0; i < ELAN3_MAX_CONTROLLER; i++) -+ if (elan3_devices[i] != NULL) -+ elandetach(i); -+ -+ FinaliseNetworkErrorResolver(); -+ elan3mmu_fini(); -+ -+ cookie_fini(); -+ unregister_chrdev(ELAN3_MAJOR, ELAN3_NAME); -+ -+ elan3_procfs_fini(); -+ -+ printk("elan: module removed\n"); -+} -+ -+/* -+ * Called at insmod time. First we perform general driver initialization, -+ * then call elanattach() for each card. -+ */ -+#ifdef MODULE -+static int __init elan3_init(void) -+#else -+__initfunc(int elan3_init(void)) -+#endif -+{ -+ int e; -+ int boards; -+ struct pci_dev *dev; -+ char revid; -+ -+ elan_reg_rec_index=0; -+ { -+ int i; -+ for(i=0;islot_name); -+ else -+ { -+ if (boards < ELAN3_MAX_CONTROLLER) -+ /* Count successfully attached devices */ -+ boards += ((elanattach(boards, dev) == 0) ? 1 : 0); -+ else -+ { -+ printk ("elan: max controllers = %d\n", ELAN3_MAX_CONTROLLER); -+ break; -+ } -+ } -+ } -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&elan3_dump_notifier); -+#endif -+ register_reboot_notifier (&elan3_reboot_notifier); -+ notifier_chain_register (&panic_notifier_list, &elan3_panic_notifier); -+ -+ return 0; -+} -+ -+/* Declare the module init and exit functions */ -+module_init(elan3_init); -+module_exit(elan3_exit); -+ -+#endif -+ -+static void -+elan3_shutdown_devices(int panicing) -+{ -+ ELAN3_DEV *dev; -+ unsigned long flags; -+ register int i; -+ -+ local_irq_save (flags); -+ for (i = 0; i < ELAN3_MAX_CONTROLLER; i++) -+ { -+ if ((dev = elan3_devices[i]) != NULL) -+ { -+ if (! panicing) spin_lock (&dev->IntrLock); -+ -+ printk(KERN_INFO "elan%d: forcing link into reset\n", dev->Instance); -+ -+ /* -+ * We're going to set the link into boundary scan mode, so firstly -+ * set the inputters to discard everything. -+ */ -+ if (dev->DiscardAllCount++ == 0) -+ SetSchedStatusRegister (dev, read_reg32 (dev, Exts.InterruptReg), NULL); -+ -+ dev->LinkShutdown = 1; -+ -+ /* -+ * Now disable the error interrupts -+ */ -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ /* -+ * And set the link into boundary scan mode, and drive -+ * a reset token onto the link. -+ */ -+ SET_SCHED_LINK_VALUE (dev, 1, LinkResetToken); -+ -+ if (! panicing) spin_unlock (&dev->IntrLock); -+ } -+ } -+ local_irq_restore (flags); -+} -+ -+static int -+elan3_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (! (event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ return (NOTIFY_DONE); -+ -+ elan3_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+ -+static int -+elan3_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ elan3_shutdown_devices (TRUE); -+ -+ return (NOTIFY_DONE); -+} -+ -+#include -+/* -+ * Called by init_module() for each card discovered on PCI. -+ */ -+static int -+elanattach(int instance, struct pci_dev *pcidev) -+{ -+ ELAN3_DEV *dev; -+ int ramSize; -+ int level; -+ ioaddr_t sdramAddr, cmdPortAddr, intPalAddr; -+ DeviceMappingHandle handle; -+ -+ printk("elan%d: attach, irq=%d\n", instance, pcidev->irq); -+ -+ /* -+ * Allocate the ELAN3_DEV structure. -+ */ -+ KMEM_ZALLOC(dev, ELAN3_DEV *, sizeof(ELAN3_DEV), TRUE); -+ if (dev == NULL) { -+ printk ("elan%d: KMEM_ALLOC failed\n", instance); -+ return (-ENOMEM); -+ } -+ elan3_devices[instance] = dev; -+ dev->Osdep.pci = pcidev; -+ -+ dev->Instance = instance; -+ -+ /* Initialise the device information */ -+ pci_read_config_word (pcidev, PCI_VENDOR_ID, &dev->Devinfo.dev_vendor_id); -+ pci_read_config_word (pcidev, PCI_DEVICE_ID, &dev->Devinfo.dev_device_id); -+ pci_read_config_byte (pcidev, PCI_REVISION_ID, &dev->Devinfo.dev_revision_id); -+ -+ dev->Devinfo.dev_instance = instance; -+ dev->Devinfo.dev_rail = instance; -+ dev->Devinfo.dev_driver_version = 0; -+ dev->Devinfo.dev_num_down_links_value = DownLinks; -+ -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ dev->Position.pos_random_disabled = RandomRoutingDisabled; -+ -+ /* -+ * Set up PCI config regs. -+ */ -+ if (ConfigurePci(dev) != ESUCCESS) -+ goto fail0; -+ -+ /* -+ * Determine the PFnums of the SDRAM and command port -+ */ -+ if (MapDeviceRegister(dev, ELAN3_BAR_SDRAM, &sdramAddr, 0, PAGESIZE, &handle) != ESUCCESS) -+ goto fail1; -+ -+ DeviceRegisterSize(dev, ELAN3_BAR_SDRAM, &ramSize); -+ -+ dev->SdramPhysMask = ~((physaddr_t) ramSize - 1); -+ dev->SdramPhysBase = kmem_to_phys((void *) sdramAddr); -+ -+ UnmapDeviceRegister (dev, &handle); -+ -+#if defined(LINUX_ALPHA) -+ /* -+ * consider a physical address to be on the same pci bus -+ * as us if it's physical address is "close" to our sdram -+ * physical address. -+ * this is almost certainly incorrect for large memory (> 2Gb) -+ * i386 machines - and is only correct for alpha for 32 bit -+ * base address registers. -+ * -+ * Modified this to match the Tru64 driver value; -+ * i.e. PciPhysMask = 0xfffffffffffc0000 -+ */ -+# define PCI_ADDR_MASK (0x7FFFFFFFl) -+ -+ dev->PciPhysMask = ~PCI_ADDR_MASK; -+ dev->PciPhysBase = dev->SdramPhysBase & dev->PciPhysMask; -+#endif -+ /* -+ * Now reset the elan chip. -+ */ -+ if (MapDeviceRegister(dev, ELAN3_BAR_REGISTERS, &dev->RegPtr, 0, 0, &dev->RegHandle) != ESUCCESS) -+ goto fail1; -+ -+ if (MapDeviceRegister(dev, ELAN3_BAR_EBUS, &intPalAddr, ELAN3_EBUS_INTPAL_OFFSET, PAGESIZE, -+ &handle) != ESUCCESS) -+ goto fail2; -+ -+ ResetElan(dev, intPalAddr); -+ -+ UnmapDeviceRegister (dev, &handle); -+ -+ /* -+ * Initialise the device mutex's which must be accessible from the -+ * interrupt handler. -+ */ -+ kcondvar_init (&dev->IntrWait); -+ spin_lock_init (&dev->IntrLock); -+ spin_lock_init (&dev->TlbLock); -+ spin_lock_init (&dev->CProcLock); -+ spin_lock_init (&dev->FreeHaltLock); -+ for(level=0; level<4; level++) -+ spin_lock_init (&dev->Level[level].PtblLock); -+ spin_lock_init (&dev->PtblGroupLock); -+ -+ /* -+ * Add the interrupt handler, -+ */ -+ if (request_irq(dev->Osdep.pci->irq, InterruptHandlerWrapper, -+ SA_SHIRQ, "elan3", dev) != 0) { -+ printk ("elan%d: request_irq failed\n", instance); -+ goto fail3; -+ } -+ -+ if (MapDeviceRegister(dev, ELAN3_BAR_COMMAND_PORT, &cmdPortAddr, 0, PAGESIZE, &handle) != ESUCCESS) -+ goto fail4; -+ -+ if (InitialiseElan(dev, cmdPortAddr) == EFAIL) { -+ printk ("elan%d: InitialiseElan failed\n", instance); -+ UnmapDeviceRegister (dev, &handle); -+ goto fail4; -+ } -+ UnmapDeviceRegister (dev, &handle); -+ -+ /* If our nodeid is defined, then set it now */ -+ if (NodeId != ELAN3_INVALID_NODE && ComputePosition (&dev->Position, NodeId, NumNodes, DownLinks) == 0) -+ { -+ if (RandomRoutingDisabled & ((1 << (dev->Position.pos_levels-1))-1)) -+ printk ("elan%d: NodeId=%d NodeLevel=%d NumNodes=%d (random routing disabled 0x%x)\n", -+ dev->Instance, dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes, RandomRoutingDisabled); -+ else -+ printk ("elan%d: NodeId=%d NodeLevel=%d NumNodes=%d (random routing ok)\n", -+ dev->Instance, dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes); -+ } -+ -+ if (BackToBackMaster || BackToBackSlave) -+ { -+ dev->Position.pos_mode = ELAN_POS_MODE_BACKTOBACK; -+ dev->Position.pos_nodeid = (BackToBackMaster == 0); -+ dev->Position.pos_nodes = 2; -+ dev->Position.pos_levels = 1; -+ dev->Position.pos_arity[0] = 2; -+ -+ printk ("elan%d: back-to-back %s - elan node %d\n", dev->Instance, -+ BackToBackMaster ? "master" : "slave", dev->Position.pos_nodeid); -+ } -+ -+ elan3_procfs_device_init (dev); -+ -+ /* Success */ -+ return (0); -+ -+fail4: -+ free_irq(dev->Osdep.pci->irq, dev); -+ -+fail3: -+ kcondvar_destroy (&dev->IntrWait); -+ spin_lock_destroy (&dev->IntrLock); -+ spin_lock_destroy (&dev->InfoLock); -+ spin_lock_destroy (&dev->TlbLock); -+ spin_lock_destroy (&dev->CProcLock); -+ spin_lock_destroy (&dev->FreeHaltLock); -+ spin_lock_destroy (&dev->Level1PtblLock); -+ spin_lock_destroy (&dev->Level2PtblLock); -+ spin_lock_destroy (&dev->Level3PtblLock); -+ spin_lock_destroy (&dev->PtblGroupLock); -+ -+fail2: -+ UnmapDeviceRegister (dev, &dev->RegHandle); -+ -+fail1: -+ pci_disable_device (dev->Osdep.pci); -+fail0: -+ KMEM_FREE(dev, sizeof(ELAN3_DEV)); -+ -+ elan3_devices[instance] = NULL; -+ -+ /* Failure */ -+ return (-ENODEV); -+} -+ -+/* -+ * Called by elan3_exit() for each board found on PCI. -+ */ -+static int -+elandetach(int instance) -+{ -+ ELAN3_DEV *dev = elan3_devices[instance]; -+ -+ printk("elan%d: detach\n", instance); -+ -+ elan3_procfs_device_fini (dev); -+ -+ FinaliseElan (dev); -+ -+ UnmapDeviceRegister (dev, &dev->RegHandle); -+ -+ free_irq(dev->Osdep.pci->irq, dev); -+ -+ pci_disable_device(dev->Osdep.pci); -+ -+ kcondvar_destroy (&dev->IntrWait); -+ spin_lock_destroy (&dev->IntrLock); -+ spin_lock_destroy (&dev->InfoLock); -+ spin_lock_destroy (&dev->TlbLock); -+ spin_lock_destroy (&dev->CProcLock); -+ spin_lock_destroy (&dev->FreeHaltLock); -+ spin_lock_destroy (&dev->Level1PtblLock); -+ spin_lock_destroy (&dev->Level2PtblLock); -+ spin_lock_destroy (&dev->Level3PtblLock); -+ spin_lock_destroy (&dev->PtblGroupLock); -+ -+ KMEM_FREE(dev, sizeof(ELAN3_DEV)); -+ elan3_devices[instance] = NULL; -+ -+ return 0; -+} -+ -+/* -+ * generic ioctls - available on control and user devices. -+ */ -+ -+static int -+device_stats_ioctl (ELAN3_DEV *dev, unsigned long arg) -+{ -+ ELAN3IO_STATS_STRUCT *args; -+ -+ KMEM_ALLOC(args, ELAN3IO_STATS_STRUCT *, sizeof(ELAN3IO_STATS_STRUCT), TRUE); -+ -+ if (args == NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN3IO_STATS_STRUCT))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ switch (args->which) -+ { -+ case ELAN3_SYS_STATS_DEVICE: -+ if (copy_to_user (args->ptr, &dev->Stats, sizeof (ELAN3_STATS))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (0); -+ -+ case ELAN3_SYS_STATS_MMU: -+ if (copy_to_user (args->ptr, &elan3mmu_global_stats, sizeof (ELAN3MMU_GLOBAL_STATS))) -+ { -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EFAULT); -+ } -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (0); -+ -+ default: -+ KMEM_FREE(args, sizeof(ELAN3IO_STATS_STRUCT)); -+ return (-EINVAL); -+ } -+} -+ -+/* -+ * /dev/elan3/controlX - control device -+ * -+ */ -+ -+typedef struct control_private -+{ -+ u_int pr_boundary_scan; -+} CONTROL_PRIVATE; -+ -+static int -+control_open (struct inode *inode, struct file *file) -+{ -+ CONTROL_PRIVATE *pr; -+ -+ KMEM_ALLOC(pr, CONTROL_PRIVATE *, sizeof (CONTROL_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_boundary_scan = 0; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ -+ return (0); -+} -+ -+static int -+control_release (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ -+ if (pr->pr_boundary_scan) -+ ClearLinkBoundaryScan(dev); -+ -+ KMEM_FREE (pr, sizeof(CONTROL_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+control_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ int res; -+ -+ switch (cmd) -+ { -+ case ELAN3IO_SET_BOUNDARY_SCAN: -+ if (SetLinkBoundaryScan (dev) == 0) -+ pr->pr_boundary_scan = 1; -+ return (0); -+ -+ case ELAN3IO_CLEAR_BOUNDARY_SCAN: -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ pr->pr_boundary_scan = 0; -+ -+ ClearLinkBoundaryScan (dev); -+ return (0); -+ -+ case ELAN3IO_READ_LINKVAL: -+ { -+ E3_uint32 val; -+ -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ if (copy_from_user(&val, (E3_uint32 *)arg, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ val = ReadBoundaryScanValue (dev, val); -+ -+ if (copy_to_user((E3_uint32 *)arg, &val, sizeof(E3_uint32))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN3IO_WRITE_LINKVAL: -+ { -+ E3_uint32 val; -+ -+ if (pr->pr_boundary_scan == 0) -+ return (-EINVAL); -+ -+ if (copy_from_user(&val, (E3_uint32 *)arg, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ val = WriteBoundaryScanValue (dev, val); -+ -+ if (copy_to_user((E3_uint32 *)arg, &val, sizeof(E3_uint32))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN3IO_SET_POSITION: -+ { -+ ELAN3IO_SET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ if (ComputePosition (&dev->Position, args.nodeId, args.numNodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ return (-EINVAL); -+ -+ return (0); -+ } -+ -+ case ELAN3IO_SET_DEBUG: -+ { -+ ELAN3IO_SET_DEBUG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SET_DEBUG_STRUCT))) -+ return (-EFAULT); -+ -+ if (! strcmp (args.what, "elan3_debug")) -+ elan3_debug = args.value; -+ else if (! strcmp (args.what, "elan3_debug_console")) -+ elan3_debug_console = args.value; -+ else if (! strcmp (args.what, "elan3_debug_buffer")) -+ elan3_debug_buffer = args.value; -+ else if (! strcmp (args.what, "elan3_debug_ignore_dev")) -+ elan3_debug_ignore_dev = args.value; -+ else if (! strcmp (args.what, "elan3_debug_ignore_ctxt")) -+ elan3_debug_ignore_ctxt = args.value; -+ else if (! strcmp (args.what, "elan3mmu_debug")) -+ elan3mmu_debug = args.value; -+ -+ return (0); -+ } -+ -+ case ELAN3IO_NETERR_SERVER: -+ { -+ ELAN3IO_NETERR_SERVER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_NETERR_SERVER_STRUCT))) -+ return (-EFAULT); -+ -+ res = AddNeterrServerSyscall (args.elanid, args.addr, args.name, NULL); -+ return (set_errno (res)); -+ } -+ -+ case ELAN3IO_NETERR_FIXUP: -+ { -+ NETERR_MSG *msg; -+ -+ KMEM_ALLOC(msg, NETERR_MSG *, sizeof (NETERR_MSG), TRUE); -+ -+ if (msg == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (msg, (void *) arg, sizeof (NETERR_MSG))) -+ res = EFAULT; -+ else -+ res = ExecuteNetworkErrorFixup (msg); -+ -+ KMEM_FREE (msg, sizeof (NETERR_MSG)); -+ return (set_errno (res)); -+ } -+ -+ case ELAN3IO_STATS: -+ return (device_stats_ioctl (dev, arg)); -+ -+ case ELAN3IO_GET_DEVINFO: -+ { -+ if (copy_to_user ((void *) arg, &dev->Devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN3IO_GET_POSITION: -+ { -+ if (copy_to_user ((void *) arg, &dev->Position, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ return (0); -+ } -+ default: -+ return (-EINVAL); -+ } -+} -+ -+static int -+control_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(file->f_dentry->d_inode)]; -+ int space = OFF_TO_SPACE(vma->vm_pgoff << PAGE_SHIFT); -+ int off = OFF_TO_OFFSET(vma->vm_pgoff << PAGE_SHIFT); -+ int size; -+ ioaddr_t addr; -+ DeviceMappingHandle handle; -+ physaddr_t phys; -+ -+ if (space < ELAN3_BAR_SDRAM || space > ELAN3_BAR_EBUS) -+ return (-EINVAL); -+ -+ if (off < 0 || DeviceRegisterSize (dev, space, &size) != ESUCCESS || off > size) -+ return (-EINVAL); -+ -+ if (MapDeviceRegister(dev, space, &addr, off, PAGESIZE, &handle) != ESUCCESS) -+ return (-EINVAL); -+ -+ phys = kmem_to_phys((caddr_t) addr); -+ UnmapDeviceRegister(dev, &handle); -+ -+#ifdef NO_RMAP -+ if (remap_page_range(vma->vm_start, phys, vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#else -+ if (remap_page_range(vma, vma->vm_start, phys, vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#endif -+ return (-EAGAIN); -+ -+ return (0); -+} -+ -+/* -+ * /dev/elan3/sdramX - sdram access device -+ */ -+typedef struct mem_page -+{ -+ struct mem_page *pg_next; -+ sdramaddr_t pg_addr; -+ u_long pg_pgoff; -+ u_int pg_ref; -+} MEM_PAGE; -+ -+#define MEM_HASH_SIZE 32 -+#define MEM_HASH(pgoff) ((pgoff) & (MEM_HASH_SIZE-1)) -+ -+typedef struct mem_private -+{ -+ ELAN3_DEV *pr_dev; -+ MEM_PAGE *pr_pages[MEM_HASH_SIZE]; -+ spinlock_t pr_lock; -+} MEM_PRIVATE; -+ -+static void -+mem_freepage (MEM_PRIVATE *pr, MEM_PAGE *pg) -+{ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_freepage: pr=%p pgoff=%lx pg=%p ref=%d\n", pr, pg->pg_pgoff, pg, pg->pg_ref); -+ -+ elan3_sdram_free (pr->pr_dev, pg->pg_addr, PAGE_SIZE); -+ KMEM_FREE (pg, sizeof(MEM_PAGE)); -+} -+ -+static MEM_PAGE * -+mem_getpage (MEM_PRIVATE *pr, u_long pgoff, virtaddr_t addr) -+{ -+ int hashval = MEM_HASH (pgoff); -+ MEM_PAGE *npg = NULL; -+ MEM_PAGE *pg; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_getpage: pr=%p pgoff=%lx addr=%lx\n", pr, pgoff, addr); -+ -+ again: -+ spin_lock (&pr->pr_lock); -+ for (pg = pr->pr_pages[hashval]; pg; pg = pg->pg_next) -+ if (pg->pg_pgoff == pgoff) -+ break; -+ -+ if (pg != NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_getpage: pr=%p pgoff=%lx addr=%lx -> found %p addr=%lx\n", pr, pgoff, addr, pg, pg->pg_addr); -+ -+ pg->pg_ref++; -+ spin_unlock (&pr->pr_lock); -+ -+ if (npg != NULL) /* we'd raced and someone else had created */ -+ mem_freepage (pr, npg); /* this page - so free of our new one*/ -+ return (pg); -+ } -+ -+ if (npg != NULL) /* didn't find the page, so inset the */ -+ { /* new one we've just created */ -+ npg->pg_next = pr->pr_pages[hashval]; -+ pr->pr_pages[hashval] = npg; -+ -+ spin_unlock (&pr->pr_lock); -+ return (npg); -+ } -+ -+ spin_unlock (&pr->pr_lock); /* drop spinlock before creating a new page */ -+ -+ KMEM_ALLOC(npg, MEM_PAGE *, sizeof (MEM_PAGE), TRUE); -+ -+ if (npg == NULL) -+ return (NULL); -+ -+ if ((npg->pg_addr = elan3_sdram_alloc (pr->pr_dev, PAGE_SIZE)) == 0) -+ { -+ KMEM_FREE (npg, sizeof (MEM_PAGE)); -+ return (NULL); -+ } -+ -+ /* zero the page before returning it to the user */ -+ elan3_sdram_zeroq_sdram (pr->pr_dev, npg->pg_addr, PAGE_SIZE); -+ -+ npg->pg_pgoff = pgoff; -+ npg->pg_ref = 1; -+ -+ /* created a new page - so have to rescan before inserting it */ -+ goto again; -+} -+ -+static void -+mem_droppage (MEM_PRIVATE *pr, u_long pgoff, int dontfree) -+{ -+ MEM_PAGE **ppg; -+ MEM_PAGE *pg; -+ -+ spin_lock (&pr->pr_lock); -+ for (ppg = &pr->pr_pages[MEM_HASH(pgoff)]; *ppg; ppg = &(*ppg)->pg_next) -+ if ((*ppg)->pg_pgoff == pgoff) -+ break; -+ -+ pg = *ppg; -+ -+ ASSERT (*ppg != NULL); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_droppage: pr=%p pgoff=%lx pg=%p ref=%d dontfree=%d\n", pr, pgoff, (*ppg), (*ppg)->pg_ref, dontfree); -+ -+ if (--pg->pg_ref == 0 && !dontfree) -+ { -+ *ppg = pg->pg_next; -+ -+ mem_freepage (pr, pg); -+ } -+ -+ spin_unlock (&pr->pr_lock); -+} -+ -+static int -+mem_open (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ MEM_PRIVATE *pr; -+ register int i; -+ -+ KMEM_ALLOC(pr, MEM_PRIVATE *, sizeof (MEM_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ spin_lock_init (&pr->pr_lock); -+ pr->pr_dev = dev; -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ pr->pr_pages[i] = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+mem_release (struct inode *node, struct file *file) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg, *next; -+ int i; -+ -+ /* free off any pages that we'd allocated */ -+ spin_lock (&pr->pr_lock); -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ { -+ for (pg = pr->pr_pages[i]; pg; pg = next) -+ { -+ next = pg->pg_next; -+ mem_freepage (pr, pg); -+ } -+ } -+ spin_unlock (&pr->pr_lock); -+ -+ KMEM_FREE (pr, sizeof (MEM_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+mem_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ return (-EINVAL); -+} -+ -+static void mem_vma_open(struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ preemptable_start { -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) { -+ mem_getpage (pr, pgoff, addr); -+ preemptable_check(); -+ } -+ } preemptable_end; -+} -+ -+static void mem_vma_close(struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the call to close may not have the same vm_start/vm_end values as -+ * were passed into mmap()/open() - since if an partial unmap had occured -+ * then the vma could have been shrunk or even split. -+ * -+ * if a the vma is split then an vma_open() will be called for the top -+ * portion - thus causing the reference counts to become incorrect. -+ * -+ * We drop the reference to any pages we're notified about - so they get freed -+ * earlier than when the device is finally released. -+ */ -+ for (pgoff = vma->vm_pgoff, addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_droppage (pr, pgoff, 0); -+} -+ -+static struct vm_operations_struct mem_vm_ops = { -+ open: mem_vma_open, -+ close: mem_vma_close, -+}; -+ -+static int -+mem_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: vm_mm=%p start=%lx end=%lx pgoff=%lx prot=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_page_prot.pgprot , file); -+ -+ preemptable_start { -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ if ((pg = mem_getpage (pr, pgoff, addr)) == NULL) -+ goto failed; -+ -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ -+#if defined(__ia64__) -+ if (enable_sdram_writecombining) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+#endif -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: addr %lx -> pg=%p addr=%lx phys=%llx flags=%lx prot=%lx\n", -+ addr, pg, pg->pg_addr, (long long) elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), vma->vm_flags, vma->vm_page_prot.pgprot); -+ -+#ifdef NO_RMAP -+ if (remap_page_range (addr, elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), PAGE_SIZE, vma->vm_page_prot)) -+#else -+ if (remap_page_range (vma, addr, elan3_sdram_to_phys (pr->pr_dev, pg->pg_addr), PAGE_SIZE, vma->vm_page_prot)) -+#endif -+ { -+ mem_droppage (pr, pgoff, 0); /* drop our reference to this page */ -+ goto failed; -+ } -+ -+ preemptable_check(); -+ } -+ } preemptable_end; -+ -+ /* Don't try to swap out Elan SDRAM pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump SDRAM pages to a core file -+ * (Pity I would really like to do this but it crashes in elf_core_dump() as -+ * it can only handle pages that are in the mem_map area (addy 11/01/2002)) -+ */ -+ vma->vm_flags |= VM_IO; -+ -+ vma->vm_ops = &mem_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+ -+ failed: -+ PRINTF (DBG_DEVICE, DBG_SEG, "mem_mmap: failed\n"); -+ -+ /* free of any pages we've already allocated/referenced */ -+ while ((--pgoff) >= vma->vm_pgoff) -+ mem_droppage (pr, pgoff, 0); -+ -+ return (-ENOMEM); -+} -+ -+/* -+ * /dev/elan3/userX - control device -+ * -+ * "user_private" can be referenced from a number of places -+ * 1) the "file" structure. -+ * 2) the "mm" ioproc ops -+ * 3) the "mmap" of the command port. -+ * -+ */ -+typedef struct user_private -+{ -+ spinlock_t pr_lock; -+ atomic_t pr_mappings; -+ atomic_t pr_ref; -+ ELAN3_CTXT *pr_ctxt; -+ struct mm_struct *pr_mm; -+ struct ioproc_ops pr_ioproc; -+} USER_PRIVATE; -+ -+static void -+user_free (USER_PRIVATE *pr) -+{ -+ /* Have to unreserve the FlagPage or else we leak memory like a sieve! */ -+ ClearPageReserved(pte_page(*find_pte_kernel((unsigned long) pr->pr_ctxt->FlagPage))); -+ -+ elan3_detach(pr->pr_ctxt); -+ elan3_free (pr->pr_ctxt); -+ -+ KMEM_FREE (pr, sizeof(USER_PRIVATE)); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+static void -+user_ioproc_release (void *arg, struct mm_struct *mm) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF3 (pr->pr_ctxt, DBG_SEG, "user_ioproc_release: ctxt=%p pr=%p ref=%d\n", -+ pr->pr_ctxt, pr, atomic_read (&pr->pr_ref)); -+ -+ elan3mmu_pte_ctxt_unload (pr->pr_ctxt->Elan3mmu); -+ -+ pr->pr_mm = NULL; -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+} -+ -+/* -+ * On 2.4 kernels we get passed a mm_struct, whereas on 2.6 kernels -+ * we get the vma which is more usefull -+ */ -+#if defined(IOPROC_MM_STRUCT_ARG) -+static void -+user_ioproc_sync_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ ASSERT(start <= end && ((start & PAGEOFFSET) == 0) && ((end & PAGEOFFSET) == 0)); -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_update_range: start=%lx end=%lx\n", start, end); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu, mm,(caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, mm, (caddr_t) start, end-start); -+} -+ -+#else -+ -+static void -+user_ioproc_sync_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ ASSERT(start <= end && ((start & PAGEOFFSET) == 0) && ((end & PAGEOFFSET) == 0)); -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_update_range: start=%lx end=%lx\n", start, end); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_SEG, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ ASSERT(start <= end); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) start, end-start); -+} -+#endif /* defined(IOPROC_NO_VMA_RANGE) */ -+ -+static void -+user_ioproc_sync_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_ioproc_sync_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+static void -+user_ioproc_invalidate_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_ioproc_invalidate_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_unload(pr->pr_ctxt->Elan3mmu, vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+static void -+user_ioproc_update_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF1 (pr->pr_ctxt, DBG_SEG, "user_ioproc_update_page: addr=%lx\n", addr); -+ -+ elan3mmu_pte_range_update (pr->pr_ctxt->Elan3mmu,vma->vm_mm, (caddr_t) (addr & PAGE_MASK), PAGE_SIZE); -+} -+ -+int -+user_ptrack_handler (void *arg, int phase, struct task_struct *child) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ -+ PRINTF5 (pr->pr_ctxt, DBG_FN, "user_ptrack_handler: ctxt=%p pr=%p ref=%d phase %d mm->ref %d\n", -+ pr->pr_ctxt, pr, atomic_read (&pr->pr_ref), phase, atomic_read (¤t->mm->mm_count)); -+ -+ if (phase == PTRACK_PHASE_EXIT) -+ { -+ /* this will force the helper thread to exit */ -+ elan3_swapout (ctxt, CTXT_EXITING); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+ } -+ return PTRACK_FINISHED; -+} -+ -+static int -+user_open (struct inode *inode, struct file *file) -+{ -+ ELAN3_DEV *dev = elan3_devices[ELAN3_DEVICE(inode)]; -+ USER_PRIVATE *pr; -+ ELAN3_CTXT *ctxt; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC(pr, USER_PRIVATE *, sizeof (USER_PRIVATE), TRUE); -+ -+ if (pr == NULL) -+ return (-ENOMEM); -+ -+ if ((ctxt = elan3_alloc (dev, 0)) == NULL) -+ { -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ if (sys_init (ctxt) == NULL) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ /* initialise refcnt to 3 - one for "file", one for XA handler, one for the ioproc ops */ -+ atomic_set (&pr->pr_ref, 3); -+ -+ atomic_set (&pr->pr_mappings, 0); -+ spin_lock_init (&pr->pr_lock); -+ -+ pr->pr_ctxt = ctxt; -+ pr->pr_mm = current->mm; -+ -+ /* register an ptrack handler to force the helper thread to exit when we do */ -+ if (ptrack_register (user_ptrack_handler, pr) < 0) -+ { -+ elan3_detach(ctxt); -+ elan3_free (ctxt); -+ KMEM_FREE (pr, sizeof (USER_PRIVATE)); -+ return (-ENOMEM); -+ } -+ -+ /* register a ioproc callback to notify us of translation changes */ -+ -+ pr->pr_ioproc.arg = (void *) pr; -+ pr->pr_ioproc.release = user_ioproc_release; -+ pr->pr_ioproc.sync_range = user_ioproc_sync_range; -+ pr->pr_ioproc.invalidate_range = user_ioproc_invalidate_range; -+ pr->pr_ioproc.update_range = user_ioproc_update_range; -+ pr->pr_ioproc.change_protection = user_ioproc_change_protection; -+ pr->pr_ioproc.sync_page = user_ioproc_sync_page; -+ pr->pr_ioproc.invalidate_page = user_ioproc_invalidate_page; -+ pr->pr_ioproc.update_page = user_ioproc_update_page; -+ -+ spin_lock (¤t->mm->page_table_lock); -+ ioproc_register_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ file->private_data = (void *) pr; -+ -+ PRINTF2 (pr->pr_ctxt, DBG_FN, "user_open: done ctxt=%p pr=%p\n", ctxt, pr); -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+user_release (struct inode *inode, struct file *file) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ -+ PRINTF3 (pr->pr_ctxt, DBG_FN, "user_release: ctxt=%p pr=%p ref=%d\n", pr->pr_ctxt, pr, -+ atomic_read (&pr->pr_ref)); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_free (pr); -+ -+ return (0); -+} -+ -+static int -+user_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ int res = 0; -+ -+ if (current->mm != pr->pr_mm) -+ return (-EINVAL); -+ -+ PRINTF4 (ctxt, DBG_FN, "user_ioctl: ctxt=%p cmd=%x(%d) arg=%lx\n", ctxt, cmd, _IOC_NR(cmd), arg); -+ -+ switch (cmd) -+ { -+ case ELAN3IO_FREE: -+ if (atomic_read (&pr->pr_mappings) > 0) -+ return (-EINVAL); -+ -+ spin_lock (¤t->mm->page_table_lock); -+ if (pr->pr_mm != current->mm) -+ spin_unlock (¤t->mm->page_table_lock); -+ else -+ { -+ ioproc_unregister_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ user_ioproc_release (pr, current->mm); -+ } -+ -+ if (ptrack_registered (user_ptrack_handler, pr)) -+ { -+ ptrack_deregister (user_ptrack_handler, pr); -+ user_ptrack_handler (pr, PTRACK_PHASE_EXIT, NULL); -+ } -+ break; -+ -+ case ELAN3IO_ATTACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ KMEM_ALLOC(cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), TRUE); -+ -+ if (cap == NULL) -+ return (set_errno (EFAULT)); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = EFAULT; -+ else -+ { -+ if ((res = elan3_attach (ctxt, cap)) == 0) -+ { -+ if (copy_to_user ((void *) arg, cap, sizeof (ELAN_CAPABILITY))) -+ { -+ elan3_detach (ctxt); -+ res = EFAULT; -+ } -+ } -+ } -+ KMEM_FREE (cap, sizeof(ELAN_CAPABILITY)); -+ break; -+ } -+ -+ case ELAN3IO_DETACH: -+ spin_lock (&pr->pr_lock); -+ if (atomic_read (&pr->pr_mappings) > 0) -+ res = EINVAL; -+ else -+ elan3_detach (ctxt); -+ spin_unlock (&pr->pr_lock); -+ break; -+ -+ case ELAN3IO_ADDVP: -+ { -+ ELAN3IO_ADDVP_STRUCT *args; -+ -+ KMEM_ALLOC(args, ELAN3IO_ADDVP_STRUCT *, sizeof (ELAN3IO_ADDVP_STRUCT), TRUE); -+ -+ if (args == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN3IO_ADDVP_STRUCT))) -+ res = EFAULT; -+ else -+ { -+ if ( (res=elan3_addvp (ctxt, args->process, &args->capability)) != 0) -+ PRINTF0 (ctxt, DBG_FN, "ELAN3IO_ADDVP elan3_addvp failed \n"); -+ } -+ -+ KMEM_FREE (args, sizeof (ELAN3IO_ADDVP_STRUCT)); -+ break; -+ } -+ -+ case ELAN3IO_REMOVEVP: -+ res = elan3_removevp (ctxt, arg); -+ break; -+ -+ case ELAN3IO_BCASTVP: -+ { -+ ELAN3IO_BCASTVP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_BCASTVP_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_addbcastvp (ctxt, args.process, args.lowvp, args.highvp); -+ break; -+ } -+ -+ case ELAN3IO_LOAD_ROUTE: -+ { -+ ELAN3IO_LOAD_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_load_route (ctxt, args.process, args.flits); -+ break; -+ } -+ -+ case ELAN3IO_CHECK_ROUTE: -+ { -+ ELAN3IO_CHECK_ROUTE_STRUCT args; -+ -+ args.routeError = 0; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = elan3_check_route (ctxt, args.process, args.flits, & args.routeError)) == ESUCCESS) -+ { -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_LOAD_ROUTE_STRUCT))) -+ return (-EFAULT); -+ } -+ break; -+ } -+ -+ case ELAN3IO_PROCESS_2_LOCATION: -+ { -+ ELAN3IO_PROCESS_2_LOCATION_STRUCT args; -+ ELAN_LOCATION loc; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_PROCESS_2_LOCATION_STRUCT))) -+ return (-EFAULT); -+ -+ krwlock_write (&ctxt->VpLock); -+ loc = ProcessToLocation (ctxt, NULL, args.process , NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ args.loc = loc; -+ -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_PROCESS_2_LOCATION_STRUCT))) -+ return (-EFAULT); -+ -+ break; -+ } -+ -+ case ELAN3IO_GET_ROUTE: -+ { -+ ELAN3IO_GET_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = elan3_get_route (ctxt, args.process, args.flits)) == ESUCCESS) -+ { -+ if (copy_to_user ( (void *) arg, &args,sizeof (ELAN3IO_GET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ } -+ break; -+ } -+ -+ case ELAN3IO_RESET_ROUTE: -+ { -+ ELAN3IO_RESET_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_RESET_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3_reset_route (ctxt, args.process); -+ break; -+ } -+ -+ case ELAN3IO_VP2NODEID: -+ { -+ ELAN3IO_VP2NODEID_STRUCT *vp2nodeId; -+ ELAN_LOCATION location; -+ -+ KMEM_ALLOC (vp2nodeId, ELAN3IO_VP2NODEID_STRUCT *, sizeof(ELAN3IO_VP2NODEID_STRUCT), TRUE); -+ if (vp2nodeId == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if (copy_from_user (vp2nodeId, (void *) arg, sizeof (ELAN3IO_VP2NODEID_STRUCT))) { -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ krwlock_write (&ctxt->VpLock); -+ location = ProcessToLocation (ctxt, NULL, vp2nodeId->process , NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ vp2nodeId->nodeId = location.loc_node; -+ if (copy_to_user ( (void *) arg, vp2nodeId, sizeof (ELAN3IO_VP2NODEID_STRUCT))) { -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ return (-EFAULT); -+ } -+ -+ KMEM_FREE (vp2nodeId, sizeof(ELAN3IO_VP2NODEID_STRUCT)); -+ -+ break; -+ } -+ -+ case ELAN3IO_PROCESS: -+ return (elan3_process (ctxt)); -+ -+ case ELAN3IO_SETPERM: -+ { -+ ELAN3IO_SETPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_SETPERM_STRUCT))) -+ return (-EFAULT); -+ -+ res = elan3mmu_setperm (ctxt->Elan3mmu, args.maddr, args.eaddr, args.len, args.perm); -+ break; -+ } -+ -+ case ELAN3IO_CLEARPERM: -+ { -+ ELAN3IO_CLEARPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_CLEARPERM_STRUCT))) -+ return (-EFAULT); -+ -+ elan3mmu_clrperm (ctxt->Elan3mmu, args.eaddr, args.len); -+ break; -+ } -+ -+ case ELAN3IO_CHANGEPERM: -+ { -+ ELAN3IO_CHANGEPERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_CHANGEPERM_STRUCT))) -+ return (-EFAULT); -+ -+ res = EINVAL; -+ break; -+ } -+ -+ case ELAN3IO_HELPER_THREAD: -+ res = elan3_lwp (ctxt); -+ break; -+ -+ case ELAN3IO_WAITCOMMAND: -+ res = WaitForCommandPort (ctxt); -+ break; -+ -+ case ELAN3IO_BLOCK_INPUTTER: -+ elan3_block_inputter (ctxt, arg); -+ break; -+ -+ case ELAN3IO_SET_FLAGS: -+ sctx->Flags = arg; -+ break; -+ -+ case ELAN3IO_SET_SIGNAL: -+ sctx->signal = arg; -+ break; -+ -+ case ELAN3IO_WAITEVENT: -+ res = sys_waitevent (ctxt, (E3_Event *) arg); -+ break; -+ -+ case ELAN3IO_ALLOC_EVENTCOOKIE: -+ res = cookie_alloc_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_FREE_EVENTCOOKIE: -+ res = cookie_free_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_ARM_EVENTCOOKIE: -+ res = cookie_arm_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_WAIT_EVENTCOOKIE: -+ res = cookie_wait_cookie (sctx->Table, arg); -+ break; -+ -+ case ELAN3IO_SWAPSPACE: -+ if (fuword (&((SYS_SWAP_SPACE *) arg)->Magic) != SYS_SWAP_MAGIC) -+ return (set_errno (EINVAL)); -+ -+ ((SYS_CTXT *) ctxt->Private)->Swap = (SYS_SWAP_SPACE *) arg; -+ break; -+ -+ case ELAN3IO_EXCEPTION_SPACE: -+ if (fuword (&((SYS_EXCEPTION_SPACE *) arg)->Magic) != SYS_EXCEPTION_MAGIC) -+ return (set_errno (EINVAL)); -+ -+ ((SYS_CTXT *) ctxt->Private)->Exceptions = (SYS_EXCEPTION_SPACE *) arg; -+ break; -+ -+ case ELAN3IO_GET_EXCEPTION: -+ { -+ SYS_EXCEPTION *exception; -+ -+ if (((SYS_CTXT *) ctxt->Private)->Exceptions == NULL) -+ return (set_errno (EINVAL)); -+ -+ KMEM_ALLOC(exception, SYS_EXCEPTION *, sizeof (SYS_EXCEPTION), TRUE); -+ -+ if (exception == NULL) -+ return (set_errno (ENOMEM)); -+ -+ if ((res = sys_getException (((SYS_CTXT *) ctxt->Private), exception)) == 0 && -+ copy_to_user ((void *) arg, exception, sizeof (SYS_EXCEPTION))) -+ res = EFAULT; -+ -+ KMEM_FREE (exception, sizeof (SYS_EXCEPTION)); -+ break; -+ } -+ -+ case ELAN3IO_UNLOAD: -+ { -+ ELAN3MMU *elan3mmu = ctxt->Elan3mmu; -+ ELAN3IO_UNLOAD_STRUCT args; -+ int span; -+ unsigned long flags; -+ E3_Addr eaddr; -+ caddr_t addr; -+ size_t len; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_UNLOAD_STRUCT))) -+ return (-EFAULT); -+ -+ addr = (caddr_t) args.addr; -+ len = args.len; -+ -+ if (((unsigned long) addr & PAGEMASK) || (len & PAGEMASK) || (len < 0)) -+ return -EINVAL; -+ -+ spin_lock_irqsave (&elan3mmu->elan3mmu_lock, flags); -+ for (; len; len -= span, addr += span) -+ { -+ ELAN3MMU_RGN *rgn = elan3mmu_findrgn_main (elan3mmu, addr, 0); -+ -+ if (rgn == NULL || (rgn->rgn_mbase + rgn->rgn_len) < addr) -+ span = len; -+ else if (rgn->rgn_mbase > addr) -+ span = MIN(len, rgn->rgn_mbase - addr); -+ else -+ { -+ span = MIN(len, (rgn->rgn_mbase + rgn->rgn_len) - addr); -+ eaddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ elan3mmu_unload (elan3mmu, eaddr, span, PTE_UNLOAD); -+ } -+ } -+ spin_unlock_irqrestore (&elan3mmu->elan3mmu_lock, flags); -+ -+ return 0; -+ } -+ -+ case ELAN3IO_GET_DEVINFO: -+ { -+ ELAN3IO_GET_DEVINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_DEVINFO_STRUCT))) -+ return (-EFAULT); -+ -+ if (copy_to_user ((void *) args.devinfo, &ctxt->Device->Devinfo, sizeof (ELAN_DEVINFO))) -+ res = EFAULT; -+ break; -+ } -+ -+ case ELAN3IO_GET_POSITION: -+ { -+ ELAN3IO_GET_POSITION_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN3IO_GET_POSITION_STRUCT))) -+ return (-EFAULT); -+ -+ if (copy_to_user ((void *) args.position, &ctxt->Device->Position, sizeof (ELAN_POSITION))) -+ res = EFAULT; -+ break; -+ } -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ return (res ? set_errno (res) : 0); -+} -+ -+static void user_vma_open(struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ if (vma->vm_pgoff == ELAN3IO_OFF_COMMAND_PAGE) -+ if (atomic_dec_and_test (&pr->pr_mappings)) -+ pr->pr_ctxt->CommandPageMapping = NULL; -+} -+ -+static void user_vma_close(struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ if (vma->vm_pgoff == ELAN3IO_OFF_COMMAND_PAGE) -+ atomic_inc (&pr->pr_mappings); -+} -+ -+static struct vm_operations_struct user_vm_ops = { -+ open: user_vma_open, -+ close: user_vma_close, -+}; -+ -+static int -+user_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ ELAN3_CTXT *ctxt = pr->pr_ctxt; -+ ioaddr_t ioaddr; -+ -+ /* -+ * NOTE - since we need to maintain the reference count on -+ * the user_private we only permit single page -+ * mmaps - this means that we will certainly see -+ * the correct number of closes to maintain the -+ * the reference count correctly. -+ */ -+ -+ if ((vma->vm_end - vma->vm_start) != PAGE_SIZE) -+ return (-EINVAL); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: vm_mm=%p start=%lx end=%lx pgoff=%lx flags=%lx prot=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_flags, vma->vm_page_prot.pgprot, vma->vm_file); -+ -+ switch (vma->vm_pgoff) -+ { -+ default: -+ return (-EINVAL); -+ -+ case ELAN3IO_OFF_COMMAND_PAGE: -+ spin_lock (&pr->pr_lock); -+ if (ctxt->CommandPage == (ioaddr_t) 0 || atomic_read (&pr->pr_mappings) != 0) -+ { -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: command port - %s\n", ctxt->CommandPort ? "already mapped" : "not attached"); -+ spin_unlock (&pr->pr_lock); -+ return (-EINVAL); -+ } -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: commandport at %lx phys %llx prot %lx\n", -+ vma->vm_start, (unsigned long long) kmem_to_phys ((void *) ctxt->CommandPort), vma->vm_page_prot.pgprot); -+ -+ /* Don't try to swap out physical pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump addresses that are not real memory to a core file. -+ */ -+ vma->vm_flags |= VM_IO; -+ -+#ifdef NO_RMAP -+ if (remap_page_range (vma->vm_start, kmem_to_phys ((void *) ctxt->CommandPage), vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#else -+ if (remap_page_range (vma, vma->vm_start, kmem_to_phys ((void *) ctxt->CommandPage), vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+#endif -+ { -+ spin_unlock (&pr->pr_lock); -+ return (-EAGAIN); -+ } -+ ctxt->CommandPageMapping = (void *) vma->vm_start; -+ -+ atomic_inc (&pr->pr_mappings); -+ -+ spin_unlock (&pr->pr_lock); -+ break; -+ -+ case ELAN3IO_OFF_UREG_PAGE: -+#ifdef LINUX_SPARC -+ pgprot_val(vma->vm_page_prot) &= ~(_PAGE_CACHE); -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IE; -+#elif defined(pgprot_noncached) -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+#endif -+ ioaddr = ctxt->Device->RegPtr + (offsetof (E3_Regs, URegs) & PAGEMASK); -+ -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: user_regs at %lx phys %llx prot %lx\n", vma->vm_start, -+ (unsigned long long) kmem_to_phys ((void *) ioaddr), vma->vm_page_prot.pgprot); -+ -+ /* Don't try to swap out physical pages.. */ -+ vma->vm_flags |= VM_RESERVED; -+ -+ /* -+ * Don't dump addresses that are not real memory to a core file. -+ */ -+ vma->vm_flags |= VM_IO; -+ -+#ifdef NO_RMAP -+ if (remap_page_range (vma->vm_start, kmem_to_phys ((void *) ioaddr), -+#else -+ if (remap_page_range (vma, vma->vm_start, kmem_to_phys ((void *) ioaddr), -+#endif -+ vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ return (-EAGAIN); -+ break; -+ -+ case ELAN3IO_OFF_FLAG_PAGE: -+ PRINTF (DBG_DEVICE, DBG_SEG, "user_mmap: flag page at %lx phys %llx\n", vma->vm_start, -+ (unsigned long long) kmem_to_phys ((void *) ctxt->FlagPage)); -+ -+ /* we do not want to have this area swapped out, lock it */ -+ vma->vm_flags |= VM_LOCKED; -+ -+ /* Mark the page as reserved or else the remap_page_range() doesn't remap it */ -+ SetPageReserved(pte_page(*find_pte_kernel((unsigned long) ctxt->FlagPage))); -+ -+#ifdef NO_RMAP -+ if (remap_page_range (vma->vm_start, kmem_to_phys ((void *) ctxt->FlagPage), -+#else -+ if (remap_page_range (vma, vma->vm_start, kmem_to_phys ((void *) ctxt->FlagPage), -+#endif -+ vma->vm_end - vma->vm_start, vma->vm_page_prot)) -+ return (-EAGAIN); -+ break; -+ } -+ -+ ASSERT (vma->vm_ops == NULL); -+ -+ vma->vm_ops = &user_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ return (0); -+} -+ -+/* driver entry points */ -+static int -+elan3_open (struct inode *inode, struct file *file) -+{ -+ if (elan3_devices[ELAN3_DEVICE(inode)] == NULL) -+ return (-ENXIO); -+ -+ PRINTF (DBG_DEVICE, DBG_FN, "elan3_open: device %d minor %d file=%p\n", ELAN3_DEVICE(inode), ELAN3_MINOR(inode), file); -+ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_open (inode, file)); -+ case ELAN3_MINOR_MEM: -+ return (mem_open (inode, file)); -+ case ELAN3_MINOR_USER: -+ return (user_open (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan3_release (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_DEVICE, DBG_FN, "elan3_release: device %d minor %d file=%p\n", ELAN3_DEVICE(inode), ELAN3_MINOR(inode), file); -+ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_release (inode, file)); -+ case ELAN3_MINOR_MEM: -+ return (mem_release (inode, file)); -+ case ELAN3_MINOR_USER: -+ return (user_release (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan3_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ switch (ELAN3_MINOR (inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_ioctl (inode, file, cmd, arg)); -+ case ELAN3_MINOR_MEM: -+ return (mem_ioctl (inode, file, cmd, arg)); -+ case ELAN3_MINOR_USER: -+ return (user_ioctl (inode, file, cmd, arg)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+ -+static int -+elan3_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ PRINTF (DBG_DEVICE, DBG_SEG, "elan3_mmap: instance %d minor %d start=%lx end=%lx pgoff=%lx flags=%lx prot=%lx\n", -+ ELAN3_DEVICE (file->f_dentry->d_inode), ELAN3_MINOR (file->f_dentry->d_inode), -+ vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_flags, vma->vm_page_prot.pgprot); -+ -+ switch (ELAN3_MINOR (file->f_dentry->d_inode)) -+ { -+ case ELAN3_MINOR_CONTROL: -+ return (control_mmap (file, vma)); -+ case ELAN3_MINOR_MEM: -+ return (mem_mmap (file, vma)); -+ case ELAN3_MINOR_USER: -+ return (user_mmap (file, vma)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static irqreturn_t -+InterruptHandlerWrapper(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ if (InterruptHandler ((ELAN3_DEV *)dev_id) == 0) -+ return IRQ_HANDLED; -+ else -+ return IRQ_NONE; -+} -+ -+ -+/* -+ * Elan specific PCI configuration registers. -+ */ -+ -+#define PCI_CONF_PARITY_PHYS_LO 0x40 -+#define PCI_CONF_PARITY_PHYS_HI 0x44 -+#define PCI_CONF_PARITY_PHASE_ADDR 0x46 -+#define PCI_CONF_PARITY_MASTER_TYPE 0x47 -+#define PCI_CONF_ELAN3_CTRL 0x48 -+ -+#define ECTRL_EXTEND_LATENCY (1 << 0) -+#define ECTRL_ENABLE_PREFETCH (1 << 1) -+#define ECTRL_SOFTWARE_INTERNAL_RESET (1 << 2) -+#define ECTRL_REDUCED_RETRY_RATE (1 << 3) -+#define ECTRL_CLOCK_DIVIDE_RATE_SHIFT 4 -+#define ECTRL_COMMS_DIVIDE_RATE_SHIFT 10 -+#define ECTRL_FORCE_COMMSCLK_LOCAL (1 << 14) -+ -+/* -+ * Configure PCI. -+ */ -+static int -+ConfigurePci(ELAN3_DEV *dev) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ u32 rom_address; -+ -+ if (pci_enable_device(pci)) -+ return (ENXIO); -+ -+ /* disable ROM */ -+ pci_read_config_dword(pci, PCI_ROM_ADDRESS, &rom_address); -+ rom_address &= ~PCI_ROM_ADDRESS_ENABLE; -+ pci_write_config_dword(pci, PCI_ROM_ADDRESS, rom_address); -+ mb(); -+ -+ /* this is in 32-bit WORDS */ -+ pci_write_config_byte(pci, PCI_CACHE_LINE_SIZE, (64 >> 2)); -+ mb(); -+ -+ /* allow 40 ticks to respond, 16 data phases */ -+ pci_write_config_byte(pci, PCI_LATENCY_TIMER, 255); -+ mb(); -+ -+ /* don't enable PCI_COMMAND_SERR--see note in elandev_dunix.c */ -+ pci_write_config_word(pci, PCI_COMMAND, PCI_COMMAND_MEMORY -+ | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY); -+ mb(); -+ -+ return ESUCCESS; -+} -+ -+/* -+ * Reset chip to a known state. -+ */ -+static int -+ResetElan(ELAN3_DEV *dev, ioaddr_t intPalAddr) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ int instance = dev->Instance; -+ u32 val; -+ u8 revid; -+ int CasLatency; -+ int res; -+ -+ /* determine rev of board */ -+ pci_read_config_byte(pci, PCI_REVISION_ID, &revid); -+ -+ /* GNAT 2328 - don't set ECTRL_ENABLE_PREFETCH on Elan rev A */ -+ val = ECTRL_EXTEND_LATENCY | (39 << ECTRL_CLOCK_DIVIDE_RATE_SHIFT) -+ | (6 << ECTRL_COMMS_DIVIDE_RATE_SHIFT); -+ switch (revid) -+ { -+ case PCI_REVISION_ID_ELAN3_REVA: -+ printk("elan%d: is an elan3 (revision a) - not supported\n", instance); -+ return (EFAIL); -+ -+ case PCI_REVISION_ID_ELAN3_REVB: -+ val |= ECTRL_ENABLE_PREFETCH; -+ if (BackToBackMaster) -+ val |= ECTRL_FORCE_COMMSCLK_LOCAL; -+ printk("elan%d: is an elan3 (revision b)\n", instance); -+ break; -+ default: -+ printk("elan%d: unsupported elan3 revision %d\n", -+ instance, revid); -+ return EFAIL; -+ } -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, val); -+ mb(); -+ -+ /* -+ * GNAT: 2474 -+ * Hit reset on the Elan, then we MUST initialise the schedule status -+ * register to drive reset on the link before the link can come out -+ * of reset (15 uS). We need to keep it like this until we've -+ * initialised SDRAM -+ */ -+ pci_read_config_dword(pci, PCI_CONF_ELAN3_CTRL, &val); -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, -+ val | ECTRL_SOFTWARE_INTERNAL_RESET); -+ mb(); -+ -+ /* Read the Vital Product Data to determine the cas latency */ -+ if ((res = ReadVitalProductData (dev, &CasLatency)) != ESUCCESS) -+ return (res); -+ -+ /* -+ * Now clear the Software internal reset bit, and start the sdram -+ */ -+ pci_write_config_dword(pci, PCI_CONF_ELAN3_CTRL, val); -+ mb(); -+ -+ /* -+ * Enable SDRAM before sizing and initalising it for ECC. -+ * NOTE: don't enable all sets of the cache (yet), nor ECC -+ */ -+ dev->Cache_Control_Reg = (CasLatency | REFRESH_RATE_16US); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, (dev->Cache_Control_Reg | SETUP_SDRAM)); -+ mb(); -+ -+ INIT_SCHED_STATUS(dev, Sched_Initial_Value); -+ -+ /* -+ * Set the interrupt mask to 0 and enable the interrupt PAL -+ * by writing any value to it. -+ */ -+ SET_INT_MASK (dev, 0); -+ writeb (0, intPalAddr); -+ -+ return ESUCCESS; -+} -+ -+/* -+ * Determine the size of elan PCI address spaces. EFAIL is returned if -+ * unused or invalid BAR is specified, or if board reports I/O mapped space. -+ */ -+int -+DeviceRegisterSize(ELAN3_DEV *dev, int rnumber, int *sizep) -+{ -+ struct pci_dev *pdev = dev->Osdep.pci; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ *sizep = pci_resource_size(pdev, rnumber); -+#else -+ *sizep = pci_resource_end(pdev, rnumber) - pci_resource_start(pdev, rnumber) + 1; -+#endif -+ return ESUCCESS; -+} -+ -+/* -+ * Map PCI memory into kernel virtual address space. On the alpha, -+ * we just return appropriate kseg address, and Unmap is a no-op. -+ */ -+int -+MapDeviceRegister(ELAN3_DEV *dev, int rnumber, ioaddr_t *addrp, -+ int off, int len, DeviceMappingHandle *handlep) -+{ -+ struct pci_dev *pdev = dev->Osdep.pci; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ u64 base = pci_get_base_address(pdev, rnumber); -+ *addrp = (ioaddr_t) pci_base_to_kseg(base + off, pdev->bus->number); -+ -+#else -+ if (len == 0) -+ len = pci_resource_end(pdev, rnumber) - pci_resource_start(pdev, rnumber) + 1; -+ -+ if (len == 0) -+ return (EINVAL); -+ -+ *addrp = (ioaddr_t) ioremap_nocache (pci_resource_start(pdev, rnumber) + off, len); -+#endif -+ -+ *handlep = (void *) *addrp; -+ -+ return (*addrp ? ESUCCESS : ENOMEM); -+} -+void -+UnmapDeviceRegister(ELAN3_DEV *dev, DeviceMappingHandle *handlep) -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+ iounmap (*handlep); -+#endif -+} -+ -+void -+ElanBusError (ELAN3_DEV *dev) -+{ -+ struct pci_dev *pci = dev->Osdep.pci; -+ u8 phaseaddr, type; -+ u16 status, cmd, physhi; -+ u32 physlo; -+ -+ printk("elan%d: bus error occured\n", dev->Instance); -+ -+ pci_read_config_word (pci, PCI_STATUS, &status); -+ pci_read_config_word (pci, PCI_COMMAND, &cmd); -+ pci_read_config_dword(pci, PCI_CONF_PARITY_PHYS_LO, &physlo); -+ pci_read_config_word (pci, PCI_CONF_PARITY_PHYS_HI, &physhi); -+ pci_read_config_byte (pci, PCI_CONF_PARITY_PHASE_ADDR, &phaseaddr); -+ pci_read_config_byte (pci, PCI_CONF_PARITY_MASTER_TYPE, &type); -+ -+#define PCI_CONF_STAT_FORMAT "\20" \ -+ "\6SIXTY_SIX_MHZ\7UDF\10FAST_BACK\11PARITY" \ -+ "\14SIG_TARGET_ABORT\15REC_TARGET_ABORT\16REC_MASTER_ABORT" \ -+ "\17SIG_SYSTEM_ERROR\20DETECTED_PARITY" -+ -+ printk ("elan%d: status %x cmd %4x physaddr %04x%08x phase %x type %x\n", -+ dev->Instance, status, cmd, physhi, physlo, phaseaddr, type); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/elansyscall.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/elansyscall.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/elansyscall.c 2005-07-28 14:52:52.805685272 -0400 -@@ -0,0 +1,1230 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elansyscall.c,v 1.99.2.1 2004/10/28 17:08:56 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/elansyscall.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static int sys_exception (ELAN3_CTXT *ctxt, int type, int proc, void *trap, va_list ap); -+static int sys_getWordItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep); -+static int sys_getBlockItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep); -+static void sys_putWordItem (ELAN3_CTXT *ctxt, int list, E3_uint32 value); -+static void sys_putBlockItem (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr); -+static void sys_putbackItem (ELAN3_CTXT *ctxt, int list, void *item); -+static void sys_freeWordItem (ELAN3_CTXT *ctxt, void *item); -+static void sys_freeBlockItem (ELAN3_CTXT *ctxt, void *item); -+static int sys_countItems (ELAN3_CTXT *ctxt, int list); -+static int sys_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+static void sys_swapin (ELAN3_CTXT *ctxt); -+static void sys_swapout (ELAN3_CTXT *ctxt); -+static void sys_freePrivate (ELAN3_CTXT *ctxt); -+static int sys_fixupNetworkError (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef); -+static int sys_startFaultCheck (ELAN3_CTXT *ctxt); -+static void sys_endFaultCheck (ELAN3_CTXT *ctxt); -+static E3_uint8 sys_load8 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+static E3_uint16 sys_load16 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+static E3_uint32 sys_load32 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+static E3_uint64 sys_load64 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void sys_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+static ELAN3_OPS elan3_sys_ops = { -+ ELAN3_OPS_VERSION, /* Version */ -+ -+ sys_exception, /* Exception */ -+ sys_getWordItem, /* GetWordItem */ -+ sys_getBlockItem, /* GetBlockItem */ -+ sys_putWordItem, /* PutWordItem */ -+ sys_putBlockItem, /* PutBlockItem */ -+ sys_putbackItem, /* PutbackItem */ -+ sys_freeWordItem, /* FreeWordItem */ -+ sys_freeBlockItem, /* FreeBlockItem */ -+ sys_countItems, /* CountItems */ -+ sys_event, /* Event */ -+ sys_swapin, /* Swapin */ -+ sys_swapout, /* Swapout */ -+ sys_freePrivate, /* FreePrivate */ -+ sys_fixupNetworkError, /* FixupNetworkError */ -+ NULL, /* DProcTrap */ -+ NULL, /* TProcTrap */ -+ NULL, /* IProcTrap */ -+ NULL, /* CProcTrap */ -+ NULL, /* CProcReissue */ -+ sys_startFaultCheck, /* StartFaultCheck */ -+ sys_endFaultCheck, /* EndFaultCheck */ -+ sys_load8, /* Load8 */ -+ sys_store8, /* Store8 */ -+ sys_load16, /* Load16 */ -+ sys_store16, /* Store16 */ -+ sys_load32, /* Load32 */ -+ sys_store32, /* Store32 */ -+ sys_load64, /* Load64 */ -+ sys_store64 /* Store64 */ -+}; -+ -+va_list null_valist; -+ -+SYS_CTXT * -+sys_init (ELAN3_CTXT *ctxt) -+{ -+ SYS_CTXT *sctx; -+ -+ /* Allocate and initialise the context private data */ -+ KMEM_ZALLOC (sctx, SYS_CTXT *, sizeof (SYS_CTXT), TRUE); -+ -+ if (sctx == NULL) -+ return ((SYS_CTXT *) NULL); -+ -+ sctx->Swap = NULL; -+ sctx->Armed = 0; -+ sctx->Backoff = 1; -+ sctx->Table = cookie_alloc_table ((unsigned long) ELAN3_MY_TASK_HANDLE(), 0); -+ sctx->signal = SIGSEGV; -+ -+ if (sctx->Table == NULL) -+ { -+ KMEM_FREE (sctx, sizeof (SYS_CTXT)); -+ return ((SYS_CTXT *) NULL); -+ } -+ -+ kmutex_init (&sctx->Lock); -+ spin_lock_init (&sctx->WaitLock); -+ kcondvar_init (&sctx->NetworkErrorWait); -+ -+ /* Install my context operations and private data */ -+ ctxt->Operations = &elan3_sys_ops; -+ ctxt->Private = (void *) sctx; -+ -+ return (sctx); -+} -+ -+/* returns -ve on error or ELAN_CAP_OK or ELAN_CAP_RMS */ -+/* use = ELAN_USER_ATTACH, ELAN_USER_P2P, ELAN_USER_BROADCAST */ -+int -+elan3_validate_cap(ELAN3_DEV *dev, ELAN_CAPABILITY *cap ,int use) -+{ -+ /* Don't allow a user process to attach to system context */ -+ if (ELAN3_SYSTEM_CONTEXT (cap->cap_lowcontext) || ELAN3_SYSTEM_CONTEXT (cap->cap_highcontext) -+ || cap->cap_highcontext <= ELAN_USER_BASE_CONTEXT_NUM || cap->cap_highcontext <= ELAN_USER_BASE_CONTEXT_NUM) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: lctx %x hctx %x \n",cap->cap_lowcontext, cap->cap_highcontext); -+ PRINTF3 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: bit %x low %x high %x\n", ((cap->cap_lowcontext) & SYS_CONTEXT_BIT), -+ E3_NUM_CONTEXT_0, ELAN3_KCOMM_BASE_CONTEXT_NUM); -+ -+ -+ PRINTF0 (DBG_DEVICE, DBG_VP,"elan3_validate_cap: user process cant attach to system cap\n"); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_type & ELAN_CAP_TYPE_HWTEST) -+ { -+ if (!(cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP)) /* cant have a bit map */ -+ { -+ PRINTF0 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST must have ELAN_CAP_TYPE_NO_BITMAP\n"); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lowcontext != cap->cap_highcontext) -+ { -+ PRINTF2 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST (cap->cap_lowcontext != cap->cap_highcontext) %d %d\n",cap->cap_lowcontext , cap->cap_highcontext) ; -+ return (-EINVAL); -+ } -+ -+ if ( ! (ELAN3_HWTEST_CONTEXT(cap->cap_lowcontext) && ELAN3_HWTEST_CONTEXT(cap->cap_highcontext))) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST HWTEST_BASE_CONTEXT %d %d %d \n" , ELAN3_HWTEST_BASE_CONTEXT_NUM,cap->cap_lowcontext ,ELAN3_HWTEST_TOP_CONTEXT_NUM); -+ return (-EINVAL); -+ } -+ -+ if (cap->cap_lownode != ELAN_CAP_UNINITIALISED || cap->cap_highnode != ELAN_CAP_UNINITIALISED) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_VP, "elanmod_classify_cap: ELAN_CAP_TYPE_HWTEST nodes != ELAN_CAP_UNINITIALISED\n"); -+ return (-EINVAL); -+ } -+ -+ return ELAN_CAP_OK; -+ } -+ -+ return elanmod_classify_cap(&dev->Position, cap, use); -+} -+ -+int -+sys_waitevent (ELAN3_CTXT *ctxt, E3_Event *event) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ EVENT_COOKIE cookie; -+ -+ if (ctxt->Device->Devinfo.dev_revision_id == PCI_REVISION_ID_ELAN3_REVA) -+ return (EINVAL); -+ -+ cookie = fuword ((int *) &event->ev_Type) & ~(EV_TYPE_MASK_EVIRQ | EV_TYPE_MASK_BCOPY); -+ -+ if (cookie_alloc_cookie (sctx->Table, cookie) != ESUCCESS) -+ return (EINVAL); -+ -+ cookie_arm_cookie (sctx->Table, cookie); -+ -+ if (fuword ((int *) &event->ev_Count) > 0) -+ cookie_wait_cookie (sctx->Table, cookie); -+ -+ cookie_free_cookie (sctx->Table, cookie); -+ -+ return (ESUCCESS); -+} -+ -+static void * -+sys_getItem (SYS_SWAP_SPACE *sp, int list) -+{ -+ void *itemp = (void *) fuptr_noerr ((void **) &sp->ItemListsHead[list]); -+ void *next; -+ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_getItem: sp=%p list=%d head=%p itemp=%p\n", -+ sp, list, &sp->ItemListsHead[list], itemp); -+ -+ if (itemp == NULL) -+ return (NULL); -+ -+ next = (void *) fuptr_noerr ((void *) itemp); -+ -+ suptr_noerr ((void *) &sp->ItemListsHead[list], (void *) next); -+ if (next == NULL) -+ suptr_noerr ((void *) &sp->ItemListsTailp[list], (void *)&sp->ItemListsHead[list]); -+ return (itemp); -+} -+ -+static void -+sys_putItemBack (SYS_SWAP_SPACE *sp, int list, void *itemp) -+{ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_putItemBack: sp=%p list=%d itemp=%p value=%08x\n", -+ sp, list, itemp, fuword_noerr ((int *) &((SYS_WORD_ITEM *) itemp)->Value)); -+ -+ suptr_noerr ((void **) itemp, NULL); /* item->Next = NULL */ -+ suptr_noerr ((void **) fuptr_noerr ((void **) &sp->ItemListsTailp[list]), (void *)itemp); /* *Tailp = item */ -+ suptr_noerr ((void **) &sp->ItemListsTailp[list], (void *) itemp); /* Tailp = &item->Next */ -+} -+ -+static void -+sys_putItemFront (SYS_SWAP_SPACE *sp, int list, void *itemp) -+{ -+ PRINTF4 (DBG_DEVICE, DBG_SYSCALL, "sys_putItemFront: sp=%p list=%d itemp=%p value=%08x\n", -+ sp, list, itemp, fuword_noerr ((int *) &((SYS_WORD_ITEM *) itemp)->Value)); -+ -+ suptr_noerr ((void **) itemp, fuptr_noerr ((void **) &sp->ItemListsHead[list])); /* item->Next = Head */ -+ suptr_noerr ((void **) &sp->ItemListsHead[list], (void *) itemp); /* Head = item */ -+ -+ if (fuptr_noerr ((void **) &sp->ItemListsTailp[list]) == (void *) &sp->ItemListsHead[list]) /* if (Tailp == &Head) */ -+ suptr_noerr ((void **) &sp->ItemListsTailp[list], (void *) itemp); /* Tailp = &Item->Next */ -+} -+ -+ -+static int -+sys_getWordItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_WORD_ITEM *item; -+ int res; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ item = (SYS_WORD_ITEM *) sys_getItem (sp, list); -+ -+ if (item == NULL) -+ res = 0; -+ else -+ { -+ if (list == LIST_DMA_PTR) -+ sctx->Armed = TRUE; -+ -+ *itemp = (void *) item; -+ *valuep = (E3_Addr) fuword_noerr ((E3_int32 *) &item->Value); -+ -+ PRINTF3 (ctxt, DBG_SYSCALL, "sys_getWordItem: list=%d -> item=%p value=%08x\n", list, *itemp, *valuep); -+ -+ res = 1; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+static int -+sys_getBlockItem (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item; -+ int res; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ item = sys_getItem (sp, list); -+ -+ if (item == NULL) -+ res = 0; -+ else -+ { -+ E3_uint32 *dest = fuptr_noerr ((void **) &item->Pointer); -+ -+ if (list == LIST_DMA_DESC) -+ sctx->Armed = TRUE; -+ -+ *itemp = (void *) item; -+ *valuep = elan3mmu_elanaddr (ctxt->Elan3mmu, (caddr_t) dest); -+ -+ PRINTF3 (ctxt, DBG_SYSCALL, "sys_getBlockItem: list=%d -> item=%p addr=%08x\n", list, *itemp, *valuep); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[0]), fuword_noerr ((int *) &dest[1]), -+ fuword_noerr ((int *) &dest[2]), fuword_noerr ((int *) &dest[3])); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[4]), fuword_noerr ((int *) &dest[5]), -+ fuword_noerr ((int *) &dest[6]), fuword_noerr ((int *) &dest[7])); -+ -+ -+ res = 1; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+static void -+sys_putWordItem (ELAN3_CTXT *ctxt, int list, E3_Addr value) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_WORD_ITEM *item; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ PRINTF2 (ctxt,DBG_SYSCALL, "sys_putWordItem: list=%x value=%x\n", list, value); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ item = sys_getItem (sp, LIST_FREE_WORD); -+ -+ PRINTF1 (ctxt, DBG_SYSCALL, "sys_putWordItem: item=%p\n", item); -+ -+ if (item == NULL) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAILED, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putWordItem: storing value=%08x at %p\n", value, &item->Value); -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putWordItem: item=%p value=%08x\n", item, value); -+ -+ suword_noerr ((E3_int32 *) &item->Value, value); /* write "value" into item */ -+ -+ sys_putItemBack (sp, list, item); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_putBlockItem (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item; -+ label_t ljp; -+ E3_uint32 *source; -+ E3_uint32 *dest; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putBlockItem: list=%x ptr=%p\n", list, ptr); -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ item = sys_getItem (sp, LIST_FREE_BLOCK); /* get an item from the freelist. */ -+ -+ if (item == NULL) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAILED, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ /* -+ * The block will have been read using 64 bit reads, since we have -+ * to write it to user memory using 32 bit writes, we need to perform -+ * an endian swap on the Ultrasparc. -+ */ -+ dest = (E3_uint32 *) fuptr_noerr ((void **) &item->Pointer); -+ source = (E3_uint32 *) ptr; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_putBlockItem: item=%p dest=%p\n",item, dest); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ source[0^WordEndianFlip], source[1^WordEndianFlip], source[2^WordEndianFlip], source[3^WordEndianFlip]); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ source[4^WordEndianFlip], source[5^WordEndianFlip], source[6^WordEndianFlip], source[7^WordEndianFlip]); -+ -+ suword_noerr ((E3_int32 *) &dest[7], (E3_int32) source[7^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[6], (E3_int32) source[6^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[5], (E3_int32) source[5^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[4], (E3_int32) source[4^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[3], (E3_int32) source[3^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[2], (E3_int32) source[2^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[1], (E3_int32) source[1^WordEndianFlip]); -+ suword_noerr ((E3_int32 *) &dest[0], (E3_int32) source[0^WordEndianFlip]); -+ -+ sys_putItemBack (sp, list, item); /* chain onto list of items. */ -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_freeWordItem (ELAN3_CTXT *ctxt, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, LIST_FREE_WORD, (void *) NULL, null_valist); -+ return; -+ } -+ -+ sys_putItemBack (sp, LIST_FREE_WORD, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_freeBlockItem (ELAN3_CTXT *ctxt, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ SYS_BLOCK_ITEM *item = (SYS_BLOCK_ITEM *)itemp; -+ E3_uint32 *dest; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, LIST_FREE_BLOCK, (void *) NULL, null_valist); -+ return; -+ } -+#ifdef DEBUG_PRINTF -+ dest = (E3_uint32 *) fuptr_noerr ((void **) &item->Pointer); -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_freeBlockItem: item=%p dest=%p\n", item, dest); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[0]), fuword_noerr ((int *) &dest[1]), -+ fuword_noerr ((int *) &dest[2]), fuword_noerr ((int *) &dest[3])); -+ PRINTF4 (ctxt, DBG_SYSCALL, " %08x %08x %08x %08x\n", -+ fuword_noerr ((int *) &dest[4]), fuword_noerr ((int *) &dest[5]), -+ fuword_noerr ((int *) &dest[6]), fuword_noerr ((int *) &dest[7])); -+#endif -+ -+ sys_putItemBack (sp, LIST_FREE_BLOCK, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static void -+sys_putbackItem (ELAN3_CTXT *ctxt, int list, void *itemp) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ label_t ljp; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return; -+ } -+ -+ sys_putItemFront (sp, list, itemp); -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+} -+ -+static int -+sys_countItems (ELAN3_CTXT *ctxt, int list) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ int count = 0; -+ void *item; -+ label_t ljb; -+ -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ sys_exception (ctxt, EXCEPTION_SWAP_FAULT, list, (void *) NULL, null_valist); -+ return (0); -+ } -+ -+ for (item = (void *) fuptr_noerr ((void **) &sp->ItemListsHead[list]); -+ item != NULL; -+ item = (void *) fuptr_noerr ((void **) item)) -+ { -+ count++; -+ } -+ -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (count); -+} -+ -+ -+long sys_longTime; -+long sys_shortTime; -+int sys_waitTicks; -+int sys_maxBackoff; -+ -+#define SYS_LONG_TIME MAX((hz * 5) / 1000, 1) /* 5 ms */ -+#define SYS_SHORT_TIME MAX((hz * 2) / 1000, 1) /* 2 ms */ -+#define SYS_WAIT_TICKS MAX((hz * 1) / 1000, 1) /* 1 ms - backoff granularity */ -+#define SYS_MAX_BACKOFF MAX((hz * 5) / 1000, 1) /* 5 ms - max backoff for "nacked" packets*/ -+#define SYS_TIMEOUT_BACKOFF MAX((hz * 10) / 1000, 1) /* 10 ms - backoff for output timeout (point to point) */ -+#define SYS_BCAST_BACKOFF MAX((hz * 50) / 1000, 1) /* 50 ms - backoff for output timeout (broadcast) */ -+#define SYS_NETERR_BACKOFF MAX((hz * 10) / 1000, 1) /* 10 ms - delay for network error in dma data */ -+ -+static void -+sys_backoffWait (ELAN3_CTXT *ctxt, int ticks) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ long t; -+ -+ spin_lock (&sctx->WaitLock); -+ -+ t = lbolt - sctx->Time; -+ -+ if (sys_longTime == 0) sys_longTime = SYS_LONG_TIME; -+ if (sys_shortTime == 0) sys_shortTime = SYS_SHORT_TIME; -+ if (sys_waitTicks == 0) sys_waitTicks = SYS_WAIT_TICKS; -+ if (sys_maxBackoff == 0) sys_maxBackoff = SYS_MAX_BACKOFF; -+ -+ if (t > sys_longTime) /* It's a long time since the last trap */ -+ sctx->Backoff = 0; /* so set the backoff back down to 0 */ -+ -+ if (ticks) -+ { -+ PRINTF2 (ctxt, DBG_DPROC, "sys_backoffWait : Waiting - %d ticks [%lx]\n", ticks, t); -+ kcondvar_timedwait (&sctx->NetworkErrorWait, &sctx->WaitLock, NULL, lbolt + ticks); -+ } -+ else if (sctx->Armed) -+ { -+ if (t < sys_shortTime) /* It's been a short time since the last */ -+ { /* trap, so increase the backoff */ -+ sctx->Backoff++; -+ -+ if (sctx->Backoff > sys_maxBackoff) -+ sctx->Backoff = sys_maxBackoff; -+ } -+ -+ PRINTF2 (ctxt, DBG_DPROC, "sys_backoffWait : Waiting - %d [%lx]\n", sctx->Backoff, t); -+ -+ if (sctx->Backoff) -+ kcondvar_timedwaitsig (&sctx->NetworkErrorWait, &sctx->WaitLock, NULL, lbolt + sctx->Backoff * sys_waitTicks); -+ -+ sctx->Armed = 0; -+ } -+ else -+ { -+ PRINTF1 (ctxt, DBG_DPROC, "sys_backoffWait : Not Waiting - %d\n", sctx->Backoff); -+ -+ } -+ sctx->Time = lbolt; -+ -+ spin_unlock (&sctx->WaitLock); -+} -+ -+static int -+trapSize (int proc) -+{ -+ switch (proc) -+ { -+ case DMA_PROC: return (sizeof (DMA_TRAP)); -+ case THREAD_PROC: return (sizeof (THREAD_TRAP)); -+ case COMMAND_PROC: return (sizeof (COMMAND_TRAP)); -+ case INPUT_PROC: return (sizeof (INPUT_TRAP)); -+ default: return (0); -+ } -+} -+ -+static int -+sys_exception (ELAN3_CTXT *ctxt, int type, int proc, void *trapp, va_list ap) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ int res; -+ -+ PRINTF2 (ctxt, DBG_SYSCALL, "sys_exception: type %d proc %d\n", type, proc); -+ -+ switch (type) -+ { -+ case EXCEPTION_INVALID_ADDR: -+ { -+ E3_FaultSave_BE *faultSave = va_arg (ap, E3_FaultSave_BE *); -+ int res = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), faultSave, res, 0); -+ break; -+ } -+ -+ case EXCEPTION_UNIMP_INSTR: -+ { -+ E3_uint32 instr = va_arg (ap, E3_uint32); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, instr); -+ break; -+ } -+ -+ case EXCEPTION_INVALID_PROCESS: -+ { -+ E3_uint32 vproc = va_arg (ap, E3_uint32); -+ int res = va_arg (ap, int); -+ -+ switch (proc) -+ { -+ case DMA_PROC: -+ if (sctx->Flags & ELAN3_SYS_FLAG_DMA_BADVP) -+ { -+ DMA_TRAP *trap = (DMA_TRAP *) trapp; -+ -+ if (trap->Desc.s.dma_direction != DMA_WRITE) -+ trap->Desc.s.dma_srcEvent = trap->Desc.s.dma_destEvent; -+ -+ trap->Desc.s.dma_direction = DMA_WRITE; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = 0; -+ trap->Desc.s.dma_srcCookieVProc = 0; -+ -+ return (OP_IGNORE); -+ } -+ break; -+ -+ case THREAD_PROC: -+ if (sctx->Flags & ELAN3_SYS_FLAG_THREAD_BADVP) -+ { -+ THREAD_TRAP *trap = (THREAD_TRAP *) trapp; -+ -+ trap->TrapBits.s.PacketAckValue = E3_PAckError; -+ -+ return (OP_IGNORE); -+ } -+ break; -+ } -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, res, vproc); -+ break; -+ } -+ -+ case EXCEPTION_FAULTED: -+ { -+ E3_Addr addr = va_arg (ap, E3_Addr); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, addr); -+ break; -+ } -+ -+ case EXCEPTION_QUEUE_OVERFLOW: -+ { -+ E3_FaultSave_BE *faultSave = va_arg (ap, E3_FaultSave_BE *); -+ int trapType = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), faultSave, 0, trapType); -+ break; -+ } -+ -+ case EXCEPTION_COMMAND_OVERFLOW: -+ { -+ int count = va_arg (ap, int); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, count); -+ break; -+ } -+ -+ case EXCEPTION_CHAINED_EVENT: -+ { -+ E3_Addr addr = va_arg (ap, E3_Addr); -+ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, addr); -+ break; -+ } -+ -+ case EXCEPTION_DMA_RETRY_FAIL: -+ case EXCEPTION_PACKET_TIMEOUT: -+ if (proc != DMA_PROC) -+ sys_backoffWait (ctxt, SYS_TIMEOUT_BACKOFF); -+ else -+ { -+ DMA_TRAP *trap = (DMA_TRAP *) trapp; -+ -+ if (sctx->Flags & ELAN3_SYS_FLAG_DMAFAIL) -+ { -+ E3_BlockCopyEvent *event; -+ -+ if (trap->Desc.s.dma_direction != DMA_WRITE) -+ trap->Desc.s.dma_srcEvent = trap->Desc.s.dma_destEvent; -+ -+ /* change the source word to be E3_EVENT_FAILED */ -+ if ((event = (E3_BlockCopyEvent *) elan3mmu_mainaddr (ctxt->Elan3mmu, trap->Desc.s.dma_srcEvent)) == NULL) -+ { -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, 0); -+ break; -+ } -+ -+ suword (&event->ev_Source, E3_EVENT_FAILED); -+ wmb(); mmiob(); -+ -+ trap->Desc.s.dma_direction = DMA_WRITE; -+ trap->Desc.s.dma_size = 0; -+ trap->Desc.s.dma_source = (E3_Addr) 0; -+ trap->Desc.s.dma_dest = (E3_Addr) 0; -+ trap->Desc.s.dma_destEvent = (E3_Addr) 0; -+ trap->Desc.s.dma_destCookieVProc = 0; -+ trap->Desc.s.dma_srcCookieVProc = 0; -+ -+ return (OP_IGNORE); -+ } -+ -+ if (type == EXCEPTION_DMA_RETRY_FAIL) -+ sys_backoffWait (ctxt, 0); -+ else -+ { -+ ELAN_LOCATION location; -+ -+ krwlock_read (&ctxt->VpLock); -+ location = ProcessToLocation (ctxt, NULL, trap->Desc.s.dma_direction == DMA_WRITE ? -+ trap->Desc.s.dma_destVProc : trap->Desc.s.dma_srcVProc, NULL); -+ krwlock_done (&ctxt->VpLock); -+ -+ sys_backoffWait (ctxt, location.loc_node == ELAN3_INVALID_NODE ? SYS_BCAST_BACKOFF : SYS_TIMEOUT_BACKOFF); -+ } -+ } -+ return (OP_IGNORE); -+ -+ case EXCEPTION_NETWORK_ERROR: -+ { -+ INPUT_TRAP *trap = (INPUT_TRAP *) trapp; -+ NETERR_RESOLVER **rvpp = va_arg (ap, NETERR_RESOLVER **); -+ -+ ASSERT (trap->State == CTXT_STATE_NETWORK_ERROR); -+ -+ if (! (sctx->Flags & ELAN3_SYS_FLAG_NETERR) && (trap->DmaIdentifyTransaction || trap->ThreadIdentifyTransaction)) -+ { -+ if ((*rvpp) != (NETERR_RESOLVER *) NULL) -+ res = (*rvpp)->Status; -+ else if ((res = QueueNetworkErrorResolver (ctxt, trap, rvpp)) == ESUCCESS) -+ { -+ /* Successfully queued the network error resolver */ -+ return (OP_HANDLED); -+ } -+ -+ /* network error resolution has failed - either a bad cookie or */ -+ /* an rpc error has occured */ -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, res, 0); -+ } -+ else -+ { -+ /* Must be an overlaped dma packet. Must wait long enough to -+ * ensure that the sending dma'er has tried to send the next -+ * packet and had it discarded. In the real world this should -+ * be greater than an output timeout. (About 8mSec) */ -+ -+ sys_backoffWait (ctxt, SYS_NETERR_BACKOFF); -+ -+ /* set this inputter state to be ok, since we've been called -+ * by the lwp it will lower the context filter for us, so -+ * re-enabling the inputter, note we don't need to execute -+ * any of the packet since the dma process will re-transmit -+ * it after receiving a nack for the next packet */ -+ trap->State = CTXT_STATE_OK; -+ -+ return (OP_HANDLED); -+ } -+ break; -+ } -+ -+ default: -+ sys_addException (sctx, type, proc, trapp, trapSize(proc), NULL, 0, 0); -+ break; -+ } -+ -+ if (type != EXCEPTION_DEBUG) -+#ifdef LINUX -+#ifdef NO_NPTL -+ psignal (CURPROC()->p_opptr, sctx->signal); -+#else -+ psignal (CURPROC()->parent, sctx->signal); -+#endif -+#else -+ psignal (CURPROC(), sctx->signal); -+#endif -+ return (OP_HANDLED); -+} -+ -+static int -+sys_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ -+ cookie_fire_cookie (sctx->Table, cookie); -+ -+ return (OP_HANDLED); -+} -+ -+static void -+sys_swapin (ELAN3_CTXT *ctxt) -+{ -+ PRINTF0 (ctxt, DBG_SYSCALL, "sys_swapin\n"); -+} -+ -+static void -+sys_swapout (ELAN3_CTXT *ctxt) -+{ -+ PRINTF0 (ctxt, DBG_SYSCALL, "sys_swapout\n"); -+} -+ -+static void -+sys_freePrivate (ELAN3_CTXT *ctxt) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ -+ cookie_free_table (sctx->Table); -+ -+ kmutex_destroy (&sctx->Lock); -+ spin_lock_destroy (&sctx->WaitLock); -+ kcondvar_destroy (&sctx->NetworkErrorWait); -+ -+ KMEM_FREE (sctx, sizeof (SYS_CTXT)); -+ ctxt->Private = NULL; -+} -+ -+static int -+sys_checkThisDma (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, E3_DMA *dma) -+{ -+ E3_DmaType type; -+ E3_uint32 cookie; -+ E3_uint32 cvproc; -+ int ignore; -+ int match; -+ -+ type.type = fuword_noerr ((int *) &dma->dma_type); -+ -+ if (type.s.direction == DMA_WRITE) -+ { -+ cookie = fuword_noerr ((int *) &dma->dma_srcCookieVProc); -+ cvproc = fuword_noerr ((int *) &dma->dma_destCookieVProc); -+ } -+ else -+ { -+ cookie = fuword_noerr ((int *) &dma->dma_destCookieVProc); -+ cvproc = fuword_noerr ((int *) &dma->dma_srcCookieVProc); -+ } -+ -+ PRINTF5 (ctxt, DBG_NETERR, "sys_checkThisDma: dir = %d cookie = %08x cvproc = %08x CookieVProc %08x DstProcess %04x\n", -+ type.s.direction, cookie, cvproc, nef->Message.CookieVProc, nef->Message.DstProcess); -+ -+ /* A DMA matches a network errror fixup if it's going to the right place (or is a broadcast) -+ * and the approriate cookie matches, except that we ignore DMA's which don't have a destEvent -+ * since they don't have any atomic behaviour (though they still send the identify) */ -+ -+ ignore = (type.s.direction == DMA_WRITE && cookie == 0 && -+ fuword_noerr ((int *) &dma->dma_destEvent) == 0); -+ match = (nef->Message.CookieVProc == cookie && -+ (nef->Message.DstProcess == (cvproc & DMA_PROCESS_MASK) || nef->Message.WaitForEop)); -+ -+ PRINTF2 (ctxt, DBG_NETERR, " -> %s %s\n", ignore ? "ignore" : match ? "matched" : "not-matched", nef->Message.WaitForEop ? "wait for eop" : ""); -+ -+ if (match && !ignore && !nef->Message.WaitForEop) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "sys_checkThisDma: nuking the dma\n"); -+ -+ /* NOTE - we access the dma descriptor backwards since it could exist in sdram */ -+ if (type.s.direction != DMA_WRITE) -+ suword_noerr ((int *) &dma->dma_srcEvent, 0); -+ -+ suword_noerr ((int *) &dma->dma_destEvent, 0); -+ suword_noerr ((int *) &dma->dma_dest, 0); -+ suword_noerr ((int *) &dma->dma_source, 0); -+ suword_noerr ((int *) &dma->dma_size, 0); -+ -+ if (type.s.direction != DMA_WRITE) -+ suword_noerr ((int *) &dma->dma_type, fuword_noerr ((int *) &dma->dma_type) & E3_DMA_CONTEXT_MASK); -+ -+ wmb(); mmiob(); -+ } -+ -+ return (match && !ignore); -+} -+ -+static int -+sys_fixupNetworkError (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef) -+{ -+ SYS_CTXT *sctx = (SYS_CTXT *) ctxt->Private; -+ SYS_SWAP_SPACE *sp = sctx->Swap; -+ int matched = 0; -+ SYS_WORD_ITEM *wordp; -+ SYS_BLOCK_ITEM *blockp; -+ label_t ljb; -+ int res; -+ -+ PRINTF3 (ctxt, DBG_NETERR, "sys_fixupnetworkError %08x %08x %08x\n", -+ nef->Message.CookieAddr, nef->Message.CookieVProc, nef->Message.NextCookie); -+ -+ if (nef->Message.CookieAddr == (E3_Addr) 0) /* It's a DMA which requires fixing up */ -+ { -+ kmutex_lock (&sctx->Lock); -+ -+ if (on_fault (&ljb)) -+ res = EFAULT; -+ else -+ { -+ /* scan the dma ptr list */ -+ for (wordp = (SYS_WORD_ITEM *) fuptr_noerr ((void **) &sp->ItemListsHead[LIST_DMA_PTR]); -+ wordp != NULL; -+ wordp = (SYS_WORD_ITEM *) fuptr_noerr ((void **) &wordp->Next)) -+ { -+ E3_uint32 value = fuword_noerr ((int *) &wordp->Value); -+ E3_DMA *dma = (E3_DMA *) elan3mmu_mainaddr (ctxt->Elan3mmu, value); -+ -+ PRINTF3 (ctxt, DBG_NETERR, "sys_fixupnetworkError: check block item %p Value %08x dma %p\n", wordp, value, dma); -+ -+ matched += sys_checkThisDma (ctxt, nef, dma); -+ } -+ -+ /* scan the dma desc list */ -+ for (blockp = (SYS_BLOCK_ITEM *) fuptr_noerr ((void **) &sp->ItemListsHead[LIST_DMA_DESC]); -+ blockp != NULL; -+ blockp = (SYS_BLOCK_ITEM *) fuptr_noerr ((void **) &blockp->Next)) -+ { -+ E3_DMA *dma = (E3_DMA *) fuptr_noerr ((void *) &blockp->Pointer); -+ -+ PRINTF2 (ctxt, DBG_NETERR, "sys_fixupnetworkError: check block item %p Pointer %p\n", blockp, dma); -+ -+ matched += sys_checkThisDma (ctxt, nef, dma); -+ } -+ -+ /* If we've still not found it, then check the command port item */ -+ /* it MUST be present as a command waiting to be executed, as */ -+ /* otherwise it could have already happened and we will claim to */ -+ /* have found it, but not realy */ -+ if (ctxt->CommandPortItem != NULL) -+ { -+ E3_DMA *dma = (E3_DMA *) fuptr_noerr ((void *) &((SYS_BLOCK_ITEM *) ctxt->CommandPortItem)->Pointer); -+ -+ if (sys_checkThisDma (ctxt, nef, dma)) -+ { -+ printk ("!!! it's the command port item - need to ensure that the command exists\n"); -+ matched++; -+ } -+ } -+ -+ res = matched ? ESUCCESS : ESRCH; -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ if (matched > 1) -+ ElanException (ctxt, EXCEPTION_COOKIE_ERROR, DMA_PROC, NULL, NULL, nef->Message.CookieVProc); -+ } -+ else /* It's a thread which requires fixing up */ -+ { -+ E3_int32 *cookiePtr = (E3_int32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, nef->Message.CookieAddr); -+ E3_uint32 curval = fuword_noerr (cookiePtr); -+ -+ if (curval == nef->Message.CookieVProc) /* thread doesn't think it's been done */ -+ { -+ if (! nef->Message.WaitForEop) -+ { -+ suword_noerr (cookiePtr, nef->Message.NextCookie); -+ mb(); mmiob(); -+ } -+ -+ res = ESUCCESS; -+ } -+ else /* thread thinks that it's been executed */ -+ { -+ res = ESRCH; -+ } -+ } -+ -+ CompleteNetworkErrorFixup (ctxt, nef, res); -+ -+ return (OP_HANDLED); -+} -+ -+ -+static int -+sys_startFaultCheck (ELAN3_CTXT *ctxt) -+{ -+ return (0); -+} -+ -+static void -+sys_endFaultCheck (ELAN3_CTXT *ctxt) -+{ -+ wmb(); -+} -+ -+static E3_uint8 -+sys_load8 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint8 *maddr = (E3_uint8 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fubyte_noerr (maddr)); -+} -+ -+static void -+sys_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val) -+{ -+ E3_uint8 *maddr = (E3_uint8 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ subyte_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint16 -+sys_load16 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint16 *maddr = (E3_uint16 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fusword_noerr (maddr)); -+} -+ -+static void -+sys_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val) -+{ -+ E3_uint16 *maddr = (E3_uint16 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ susword_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint32 -+sys_load32 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint32 *maddr = (E3_uint32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fuword_noerr (maddr)); -+} -+ -+static void -+sys_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val) -+{ -+ E3_uint32 *maddr = (E3_uint32 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ suword_noerr (maddr, val); -+ wmb(); mmiob(); -+} -+ -+static E3_uint64 -+sys_load64 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ E3_uint64 *maddr = (E3_uint64 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ return (fulonglong_noerr ((long long *) maddr)); -+} -+ -+static void -+sys_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val) -+{ -+ E3_uint64 *maddr = (E3_uint64 *) elan3mmu_mainaddr (ctxt->Elan3mmu, addr); -+ -+ sulonglong_noerr ((long long *) maddr, val); -+ wmb(); mmiob(); -+} -+ -+ -+void -+sys_addException (SYS_CTXT *sctx, int type, int proc, caddr_t trapp, int size, -+ E3_FaultSave_BE *faultSave, u_long res, u_long value) -+{ -+ SYS_EXCEPTION *ex_ptr; -+ int front; -+ int back; -+ int count; -+ label_t ljp; -+ -+ PRINTF4 (DBG_DEVICE, DBG_FN, "sys_addException: type %d proc %d res %ld value %ld\n", -+ type, proc, res, value); -+ -+ KMEM_ZALLOC (ex_ptr, SYS_EXCEPTION *, sizeof (SYS_EXCEPTION), TRUE); -+ -+ if (ex_ptr != NULL) -+ { -+ bzero ((caddr_t) ex_ptr, sizeof (SYS_EXCEPTION)); -+ -+ ex_ptr->Type = type; -+ ex_ptr->Proc = proc; -+ ex_ptr->Res = res; -+ ex_ptr->Value = value; -+ -+ if (trapp && size) -+ bcopy (trapp, (caddr_t) &ex_ptr->Union, size); -+ if (faultSave) -+ bcopy ((caddr_t) faultSave, (caddr_t) &ex_ptr->FaultArea, sizeof (E3_FaultSave_BE)); -+ } -+ -+ kmutex_lock (&sctx->Lock); -+ if (! on_fault (&ljp)) -+ { -+ front = fuword_noerr (&sctx->Exceptions->Front); -+ back = fuword_noerr (&sctx->Exceptions->Back); -+ count = fuword_noerr (&sctx->Exceptions->Count); -+ -+ if (count <= 0 || front < 0 || back < 0 || front >= count || back >= count) -+ suword_noerr (&sctx->Exceptions->Overflow, fuword_noerr (&sctx->Exceptions->Overflow) + 1); -+ else if (((front+1) % count ) == back) -+ suword_noerr (&sctx->Exceptions->Overflow, fuword_noerr (&sctx->Exceptions->Overflow) + 1); -+ else -+ { -+ if (ex_ptr != NULL) -+ copyout_noerr ((caddr_t) ex_ptr, (caddr_t) &sctx->Exceptions->Exceptions[front], sizeof (SYS_EXCEPTION)); -+ else -+ { -+ suword_noerr (&sctx->Exceptions->Exceptions[front].Type, EXCEPTION_ENOMEM); -+ suword_noerr (&sctx->Exceptions->Exceptions[front].Proc, 0); -+ } -+ suword_noerr (&sctx->Exceptions->Front, (front + 1) % count); -+ } -+ -+ /* always reset the magic number in case it's been overwritten */ -+ /* so that 'edb' can find the exception page in the core file */ -+ suword_noerr (&sctx->Exceptions->Magic, SYS_EXCEPTION_MAGIC); -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ if (ex_ptr != NULL) -+ KMEM_FREE (ex_ptr, sizeof (SYS_EXCEPTION)); -+} -+ -+int -+sys_getException (SYS_CTXT *sctx, SYS_EXCEPTION *ex) -+{ -+ int front; -+ int back; -+ int count; -+ int res; -+ label_t ljp; -+ -+ if (sctx->Exceptions == NULL) -+ return (EINVAL); -+ -+ kmutex_lock (&sctx->Lock); -+ if (on_fault (&ljp)) -+ { -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ return (EFAULT); -+ } -+ -+ front = fuword_noerr (&sctx->Exceptions->Front); -+ back = fuword_noerr (&sctx->Exceptions->Back); -+ count = fuword_noerr (&sctx->Exceptions->Count); -+ -+ if (count <= 0 || front < 0 || back < 0 || front >= count || back >= count || back == front) -+ res = EINVAL; -+ else -+ { -+ copyin_noerr ((caddr_t) &sctx->Exceptions->Exceptions[back], (caddr_t) ex, sizeof (SYS_EXCEPTION)); -+ suword_noerr (&sctx->Exceptions->Back, (back+1) % count); -+ -+ res = ESUCCESS; -+ } -+ no_fault(); -+ kmutex_unlock (&sctx->Lock); -+ -+ return (res); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/eventcookie.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/eventcookie.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/eventcookie.c 2005-07-28 14:52:52.806685120 -0400 -@@ -0,0 +1,324 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: eventcookie.c,v 1.7 2003/08/13 10:03:03 fabien Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/eventcookie.c,v $*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static EVENT_COOKIE_TABLE *cookie_tables; -+static spinlock_t cookie_table_lock; -+ -+/* -+ * cookie_drop_entry: -+ * drop the reference to a cookie held -+ * by the cookie table -+ */ -+static void -+cookie_drop_entry (EVENT_COOKIE_ENTRY *ent) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ent->ent_lock, flags); -+ if (--ent->ent_ref != 0) -+ { -+ ent->ent_fired = ent->ent_cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (EVENT_COOKIE_ENTRY)); -+ } -+} -+ -+void -+cookie_init() -+{ -+ spin_lock_init (&cookie_table_lock); -+} -+ -+void -+cookie_fini() -+{ -+ spin_lock_destroy (&cookie_table_lock); -+} -+ -+EVENT_COOKIE_TABLE * -+cookie_alloc_table (unsigned long task, unsigned long handle) -+{ -+ EVENT_COOKIE_TABLE *tbl, *ntbl; -+ -+ KMEM_ZALLOC (ntbl, EVENT_COOKIE_TABLE *, sizeof (EVENT_COOKIE_TABLE), TRUE); -+ -+ if (ntbl == NULL) -+ return (NULL); -+ -+ spin_lock (&cookie_table_lock); -+ -+ for (tbl = cookie_tables; tbl; tbl = tbl->tbl_next) -+ if (tbl->tbl_task == task && tbl->tbl_handle == handle) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ else -+ { -+ spin_lock_init (&ntbl->tbl_lock); -+ -+ ntbl->tbl_task = task; -+ ntbl->tbl_handle = handle; -+ ntbl->tbl_ref = 1; -+ ntbl->tbl_entries = NULL; -+ -+ if ((ntbl->tbl_next = cookie_tables) != NULL) -+ cookie_tables->tbl_prev = ntbl; -+ cookie_tables = ntbl; -+ ntbl->tbl_prev = NULL; -+ } -+ spin_unlock (&cookie_table_lock); -+ -+ if (tbl == NULL) -+ return (ntbl); -+ else -+ { -+ KMEM_FREE (ntbl, sizeof (EVENT_COOKIE_TABLE)); -+ return (tbl); -+ } -+} -+ -+void -+cookie_free_table (EVENT_COOKIE_TABLE *tbl) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ -+ spin_lock (&cookie_table_lock); -+ if (tbl->tbl_ref > 1) -+ { -+ tbl->tbl_ref--; -+ spin_unlock (&cookie_table_lock); -+ return; -+ } -+ -+ if (tbl->tbl_prev) -+ tbl->tbl_prev->tbl_next = tbl->tbl_next; -+ else -+ cookie_tables = tbl->tbl_next; -+ if (tbl->tbl_next) -+ tbl->tbl_next->tbl_prev = tbl->tbl_prev; -+ -+ spin_unlock (&cookie_table_lock); -+ -+ /* NOTE - table no longer visible to other threads -+ * no need to aquire tbl_lock */ -+ while ((ent = tbl->tbl_entries) != NULL) -+ { -+ if ((tbl->tbl_entries = ent->ent_next) != NULL) -+ ent->ent_next->ent_prev = NULL; -+ -+ cookie_drop_entry (ent); -+ } -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl, sizeof (EVENT_COOKIE_TABLE)); -+} -+ -+int -+cookie_alloc_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent, *nent; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (nent, EVENT_COOKIE_ENTRY *, sizeof (EVENT_COOKIE_ENTRY), TRUE); -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ kcondvar_init (&nent->ent_wait); -+ spin_lock_init (&nent->ent_lock); -+ -+ nent->ent_ref = 1; -+ nent->ent_cookie = cookie; -+ -+ if ((nent->ent_next = tbl->tbl_entries) != NULL) -+ tbl->tbl_entries->ent_prev = nent; -+ tbl->tbl_entries = nent; -+ nent->ent_prev = NULL; -+ } -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ if (ent == NULL) -+ return (ESUCCESS); -+ else -+ { -+ KMEM_FREE (nent, sizeof (EVENT_COOKIE_ENTRY)); -+ return (EINVAL); -+ } -+} -+ -+int -+cookie_free_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ if (ent->ent_prev == NULL) -+ tbl->tbl_entries = ent->ent_next; -+ else -+ ent->ent_prev->ent_next = ent->ent_next; -+ -+ if (ent->ent_next != NULL) -+ ent->ent_next->ent_prev = ent->ent_prev; -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ cookie_drop_entry (ent); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * cookie_fire_cookie: -+ * fire the cookie - this is called from the event interrupt. -+ */ -+int -+cookie_fire_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * cookie_wait_cookie: -+ * deschedule on a cookie if it has not already fired. -+ * note - if the cookie is removed from the table, then -+ * we free it off when we're woken up. -+ */ -+int -+cookie_wait_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ spin_unlock (&tbl->tbl_lock); -+ -+ if (ent->ent_fired != 0) -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ return (ESUCCESS); -+ } -+ -+ ent->ent_ref++; -+ kcondvar_waitsig (&ent->ent_wait, &ent->ent_lock, &flags); -+ -+ if (--ent->ent_ref > 0) -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (EVENT_COOKIE_ENTRY)); -+ } -+ return (ESUCCESS); -+} -+ -+int -+cookie_arm_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie) -+{ -+ EVENT_COOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = 0; -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/iproc.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/iproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/iproc.c 2005-07-28 14:52:52.808684816 -0400 -@@ -0,0 +1,925 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: iproc.c,v 1.47 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/iproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+static int TrSizeTable[] = {0, 8, 16, 32, 64}; -+ -+static void ConvertTransactionToSetEvent (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_Addr Addr); -+static void SimulateBlockWrite (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateWriteWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateWriteDWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void SimulateTraceRoute (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap); -+static void BumpInputterStats (ELAN3_DEV *dev, E3_IprocTrapHeader_BE *hdrp); -+ -+void -+HandleIProcTrap (ELAN3_DEV *dev, -+ int Channel, -+ E3_uint32 Pend, -+ sdramaddr_t FaultSaveOff, -+ sdramaddr_t TransactionsOff, -+ sdramaddr_t DataOff) -+{ -+ E3_IprocTrapHeader_BE Transaction0; -+ ELAN3_CTXT *ctxt; -+ INPUT_TRAP *trap; -+ register int i; -+ -+ /* -+ * Read the 1st set of transactions, so we can determine the -+ * context for the trap -+ */ -+ elan3_sdram_copyq_from_sdram (dev, TransactionsOff, (void *) &Transaction0, 16); -+ -+ BumpStat (dev, IProcTraps); -+ BumpInputterStats (dev, &Transaction0); -+ -+ if (Transaction0.s.TrTypeCntx.s.TypeCntxInvalid) -+ { -+ /* -+ * The context is not valid. This will occur if the packet -+ * trapped for an EopError with no IdentTrans or an error corrupted the context -+ * giving a CRC error on the first transaction and the Ack had not been returned. -+ */ -+ if (Transaction0.s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_IPROC, "iproc: Error on EOP without a good context, ignoring trap\n"); -+ } -+ else -+ { -+ /* Check that only crap has been received. If not then die. */ -+ if (! Transaction0.s.IProcTrapStatus.s.BadLength && -+ (Transaction0.s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_GOOD) -+ { -+ printk ("iproc: Did not have a valid context for the trap area.\n"); -+ printk ("iproc: TrTypeCntx=%x TrAddr=%x TrData0=%x IProcTrapStatus=%x\n", -+ Transaction0.s.TrTypeCntx.TypeContext, Transaction0.s.TrAddr, -+ Transaction0.s.TrData0, Transaction0.s.IProcTrapStatus.Status); -+ panic ("elan3: iproc did not have a valid context"); -+ /* NOTREACHED */ -+ } -+ PRINTF0 (DBG_DEVICE, DBG_IPROC, "iproc: First transaction is bad, ignoring trap\n"); -+ } -+ } -+ else -+ { -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, Transaction0.s.TrTypeCntx.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "HandleIProcTrap: context %x invalid\n", -+ Transaction0.s.TrTypeCntx.s.Context); -+ -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ trap = (Channel == 0) ? &ctxt->Input0Trap : &ctxt->Input1Trap; -+ -+ ASSERT (trap->State == CTXT_STATE_OK); -+ -+ trap->Transactions[0] = Transaction0; -+ -+ PRINTF1 (ctxt, DBG_INTR, "HandleIProcTrap: %s\n", IProcTrapString (&trap->Transactions[0], NULL)); -+ /* -+ * Copy the rest of the transactions into the trap area. -+ */ -+ for (i = 0; !(trap->Transactions[i].s.TrTypeCntx.s.LastTrappedTrans);) -+ { -+ if (++i >= MAX_TRAPPED_TRANS) -+ { -+ trap->Overflow = 1; -+ break; -+ } -+ -+ elan3_sdram_copyq_from_sdram (dev, TransactionsOff + i*sizeof (E3_IprocTrapHeader), (void *) &trap->Transactions[i], 16); -+ -+ PRINTF1 (ctxt, DBG_INTR, " %s\n", IProcTrapString (&trap->Transactions[i], NULL)); -+ -+ BumpInputterStats (dev, &trap->Transactions[i]); -+ } -+ -+ /* -+ * Remember the number of transactions we've copied. -+ */ -+ trap->NumTransactions = i+1; -+ -+ PRINTF1 (ctxt, DBG_INTR, " NumTransactions = %d\n", trap->NumTransactions); -+ -+ /* -+ * Copy all the data blocks in one go to let the Elan prefetcher work -+ */ -+ elan3_sdram_copyq_from_sdram (dev, DataOff, trap->DataBuffers, trap->NumTransactions*sizeof (E3_IprocTrapData)); -+ -+ /* -+ * Copy fault save area and clear out for next time round. -+ */ -+ elan3_sdram_copyq_from_sdram (dev, FaultSaveOff, (void *) &trap->FaultSave, 16); -+ elan3_sdram_zeroq_sdram (dev, FaultSaveOff, 16); -+ -+ if (ELAN3_OP_IPROC_TRAP (ctxt, trap, Channel) == OP_DEFER) -+ { -+ /* -+ * Mark the trap as valid and set the inputter state to -+ * raise the context filter. -+ */ -+ trap->State = CTXT_STATE_TRAPPED; -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ SetInputterStateForContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+} -+ -+void -+InspectIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap) -+{ -+ int i; -+ int StatusValid; -+ -+ trap->AckSent = 0; -+ trap->BadTransaction = 0; -+ -+ trap->TrappedTransaction = NULL; -+ trap->TrappedDataBuffer = NULL; -+ trap->WaitForEopTransaction = NULL; -+ trap->WaitForEopDataBuffer = NULL; -+ trap->DmaIdentifyTransaction = NULL; -+ trap->ThreadIdentifyTransaction = NULL; -+ trap->LockQueuePointer = (E3_Addr) 0; -+ trap->UnlockQueuePointer = (E3_Addr) 0; -+ -+ /* -+ * Now scan all the transactions received -+ */ -+ for (i = 0; i < trap->NumTransactions ; i++) -+ { -+ E3_IprocTrapHeader_BE *hdrp = &trap->Transactions[i]; -+ E3_IprocTrapData_BE *datap = &trap->DataBuffers[i]; -+ -+ StatusValid = hdrp->s.TrTypeCntx.s.StatusRegValid != 0; -+ -+ if (StatusValid && hdrp->s.IProcTrapStatus.s.AckSent) /* Remember if we've sent the ack back */ -+ trap->AckSent = 1; -+ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) /* Check for EOP */ -+ { -+ ASSERT (i == trap->NumTransactions - 1); -+ -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_GOOD: -+ /* if we get an EOP_GOOD then the outputer should have received a PAckOk. */ -+ /* unless it was a flood, in which case someone must have sent an ack */ -+ /* but not necessarily us */ -+ break; -+ -+ case EOP_BADACK: -+ BumpUserStat (ctxt, EopBadAcks); -+ -+ /* if we get an EOP_BADACK then the outputer did not receive a PAckOk even if -+ * we sent a PAckOk. We can clear tinfo.AckSent. */ -+ if (trap->AckSent == 1) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: Network error destroyed PAckOk\n"); -+ trap->AckSent = 0; -+ } -+ break; -+ -+ case EOP_ERROR_RESET: -+ BumpUserStat (ctxt, EopResets); -+ -+ /* if we get an EOP_ERROR_RESET then the outputer may or may not have got a PAckOk. */ -+ trap->BadTransaction = 1; -+ break; -+ -+ default: -+ panic ("InspectIProcTrap: invalid EOP type in status register\n"); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ -+ PRINTF2 (ctxt, DBG_IPROC, "InspectIProcTrap: %2d: %s\n", i, IProcTrapString (hdrp, datap)); -+ -+ if (! StatusValid) /* We're looking at transactions stored before the trap */ -+ { /* these should only be identifies and lock transactions */ -+ -+ if (hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) -+ panic ("InspectIProcTrap: writeblock transaction found in input trap header before trap occured\n"); -+ -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_LOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (trap->LockQueuePointer) /* Already seen a LOCKQUEUE transaction in this packet, */ -+ { /* the user program should not have done this !! */ -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ -+ trap->LockQueuePointer = (E3_Addr) hdrp->s.TrAddr; /* Remember the queue pointer in case we need to unlock it */ -+ break; -+ -+ case TR_DMAIDENTIFY & TR_OPCODE_TYPE_MASK: -+ if (trap->DmaIdentifyTransaction || /* Already seen an identify transaction in this packet */ -+ trap->ThreadIdentifyTransaction) /* the user program should not have done this */ -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->DmaIdentifyTransaction = hdrp; -+ break; -+ -+ case TR_THREADIDENTIFY & TR_OPCODE_TYPE_MASK: -+ if (trap->DmaIdentifyTransaction || /* Already seen an identify transaction in this packet */ -+ trap->ThreadIdentifyTransaction) /* the user program should not have done this */ -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->ThreadIdentifyTransaction = hdrp; -+ break; -+ -+ default: -+ panic ("InspectIProcTrap: invalid transaction found in input trap header before trap occured\n"); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ -+ if (StatusValid && trap->TrappedTransaction == NULL) /* Remember the transaction which caused the */ -+ { /* trap */ -+ trap->TrappedTransaction = hdrp; -+ trap->TrappedDataBuffer = datap; -+ } -+ -+ if(hdrp->s.IProcTrapStatus.s.BadLength || -+ ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_ERROR) || -+ ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_BAD)) -+ { -+ int j; -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: transaction has a bad crc\n"); -+ for (j=0; jTrData[j], datap->TrData[j+1], datap->TrData[j+2], datap->TrData[j+3]); -+ trap->BadTransaction = 1; -+ continue; -+ } -+ -+ /* No more to do if it's a writeblock transaction */ -+ if (hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) -+ continue; -+ -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) == MI_InputDoTrap && -+ (hdrp->s.TrTypeCntx.s.Type & TR_WAIT_FOR_EOP) != 0) -+ { -+ /* -+ * This is a wait for eop transaction that has trapped because the inputer -+ * then received a EopError. The next transaction saved should always be an -+ * EopError. -+ */ -+ PRINTF0 (ctxt, DBG_IPROC, "InspectIProcTrap: got a trapped WaitForEop transaction due to EopError\n"); -+ -+ trap->WaitForEopTransaction = hdrp; -+ trap->WaitForEopDataBuffer = datap; -+ continue; -+ } -+ -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (trap->UnlockQueuePointer) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ return; -+ } -+ trap->UnlockQueuePointer = (E3_Addr) hdrp->s.TrAddr; -+ break; -+ } -+ } -+} -+ -+void -+ResolveIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ int res; -+ unsigned long flags; -+ -+ ASSERT (! CTXT_IS_KERNEL (ctxt)); -+ -+ BumpUserStat (ctxt, IProcTraps); -+ -+ InspectIProcTrap (ctxt, trap); -+ -+ /* -+ * fixup page fault if we've trapped because of one. -+ */ -+ if (trap->FaultSave.s.FaultContext != 0) -+ { -+ /* -+ * If it's a WRITEBLOCK transaction, then see if we remember faulting -+ * before it, and try and prefault in a sensible amount past it. -+ */ -+ int fixedFault = FALSE; -+ INPUT_FAULT_SAVE *entry; -+ INPUT_FAULT_SAVE **predp; -+ int npages; -+ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0 && /* a DMA packet */ -+ trap->LockQueuePointer == (E3_Addr) 0 && /* but not a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr != 0) /* and not a DMA to 0 */ -+ { -+ spin_lock (&ctxt->InputFaultLock); -+ -+ for (predp = &ctxt->InputFaultList; (entry = *predp)->Next != NULL ; predp = &entry->Next) -+ { -+ if (entry->Addr == trap->TrappedTransaction->s.TrAddr) -+ break; -+ } -+ -+ *predp = entry->Next; -+ entry->Next = ctxt->InputFaultList; -+ ctxt->InputFaultList = entry; -+ -+ if (entry->Addr == trap->TrappedTransaction->s.TrAddr) -+ { -+ if ((entry->Count <<= 1) > MAX_INPUT_FAULT_PAGES) -+ entry->Count = MAX_INPUT_FAULT_PAGES; -+ } -+ else -+ { -+ entry->Count = MIN_INPUT_FAULT_PAGES; -+ } -+ -+ entry->Addr = trap->TrappedTransaction->s.TrAddr + (entry->Count * PAGESIZE); -+ npages = entry->Count; -+ -+ spin_unlock (&ctxt->InputFaultLock); -+ -+ if (elan3_pagefault (ctxt, &trap->FaultSave, npages) != ESUCCESS) -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "ResolveIProcTrap: pagefaulting %d pages at %08x - failed\n", -+ npages, trap->TrappedTransaction->s.TrAddr); -+ } -+ else -+ { -+ PRINTF2 (ctxt, DBG_IPROC, "ResolveIProcTrap: pagefaulting %d pages at %08x - succeeded\n", -+ npages, trap->TrappedTransaction->s.TrAddr); -+ -+ fixedFault = TRUE; -+ } -+ } -+ -+ /* Workaround WRITEBLOCK transaction executed when LOCKQUEUE transaction missed */ -+ /* the packet will have been nacked */ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) && /* a DMA packet */ -+ trap->LockQueuePointer == 0 && trap->UnlockQueuePointer && /* a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr == trap->FaultSave.s.FaultAddress) /* and missed lockqueue */ -+ { -+ fixedFault = TRUE; -+ } -+ -+ if (! fixedFault) -+ { -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_IPROC, "ResolveIProcTrap: elan3_pagefault failed at %x\n", -+ trap->FaultSave.s.FaultAddress); -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, INPUT_PROC, trap, &trap->FaultSave, res); -+ return; -+ } -+ } -+ } -+ -+ if (! trap->AckSent && trap->LockQueuePointer) /* Queued DMA */ -+ { /* The ack was not sent, so the queue will be locked. */ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, FALSE); /* We must unlock it. */ -+ } -+ -+ if (trap->AckSent && trap->BadTransaction) -+ { -+ if (trap->DmaIdentifyTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: Dma identify needs network resultion\n"); -+ -+ BumpStat (dev, DmaIdentifyNetworkErrors); -+ BumpUserStat (ctxt, DmaIdentifyNetworkErrors); -+ -+ if (trap->WaitForEopTransaction) -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: have delayed wait for eop transaction\n"); -+ } -+ else if (trap->ThreadIdentifyTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: Thread identify needs network resolution\n"); -+ -+ BumpStat (dev, ThreadIdentifyNetworkErrors); -+ BumpUserStat (ctxt, ThreadIdentifyNetworkErrors); -+ -+ if (trap->WaitForEopTransaction) -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: have delayed wait for eop transaction\n"); -+ } -+ else -+ { -+ BumpStat (dev, DmaNetworkErrors); -+ BumpUserStat (ctxt, DmaNetworkErrors); -+ } -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ if (! trap->AckSent) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack not sent, lowering context filter\n"); -+ -+ trap->State = CTXT_STATE_OK; -+ } -+ else -+ { -+ if (trap->BadTransaction) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack sent, waiting on bad transaction\n"); -+ trap->State = CTXT_STATE_NETWORK_ERROR; -+ } -+ else -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "ResolveIProcTrap: ack sent, waiting on packet to be re-executed\n"); -+ trap->State = CTXT_STATE_NEEDS_RESTART; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ if (trap->AckSent && trap->BadTransaction) -+ ElanException (ctxt, EXCEPTION_NETWORK_ERROR, INPUT_PROC, trap, rvpp); -+} -+ -+int -+RestartIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap) -+{ -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: %d transactions\n", trap->NumTransactions); -+ -+ if (trap->TrappedTransaction == NULL) /* No transaction trapped - probably a network */ -+ return (ESUCCESS); /* error */ -+ -+ while (! trap->TrappedTransaction->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ E3_IprocTrapHeader_BE *hdrp = trap->TrappedTransaction; -+ E3_IprocTrapData_BE *datap = trap->TrappedDataBuffer; -+ -+ ASSERT (hdrp->s.TrTypeCntx.s.StatusRegValid != 0); -+ -+ PRINTF2 (ctxt, DBG_IPROC, "RestartIProc: TrType=0x%x Status=0x%x\n", -+ hdrp->s.TrTypeCntx.TypeContext, hdrp->s.IProcTrapStatus.Status); -+ -+ if ((hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0) -+ { -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: WRITEBLOCK : Addr %x\n", hdrp->s.TrAddr); -+ SimulateBlockWrite (ctxt, hdrp, datap); -+ } -+ else -+ { -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: -+ PRINTF1 (ctxt, DBG_IPROC, "RestartIProc: SETEVENT : %x\n", hdrp->s.TrAddr); -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) != MI_InputDoTrap) -+ FixupEventTrap (ctxt, INPUT_PROC, trap, GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus), &trap->FaultSave, FALSE); -+ else if (hdrp->s.TrAddr) -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), hdrp->s.TrAddr, FALSE) != ISSUE_COMMAND_OK) -+ return (EAGAIN); -+ } -+ break; -+ -+ case TR_WRITEWORD & TR_OPCODE_TYPE_MASK: -+ SimulateWriteWord (ctxt, hdrp, datap); -+ break; -+ -+ case TR_WRITEDOUBLEWORD & TR_OPCODE_TYPE_MASK: -+ SimulateWriteDWord (ctxt, hdrp, datap); -+ break; -+ -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) == MI_InputDoTrap) -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ else -+ { -+ switch (GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus)) -+ { -+ case MI_WaitForUnLockDescRead: -+ /* -+ * Fault occured on the read of the queue descriptor - since the ack -+ * has been sent we need to move the queue on one slot. -+ */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: TR_UNLOCKQUEUE : desc read fault\n"); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, TRUE); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), -+ hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue setevent to complete queue unlock, since we've already unlocked */ -+ /* the queue, we should "convert" this transaction into a setevent transaction that */ -+ /* hasn't trapped */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: could not issue setevent for SimulateUnlockQueue\n"); -+ -+ ConvertTransactionToSetEvent (ctxt, hdrp, hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET); -+ return (EAGAIN); -+ } -+ break; -+ -+ case MI_DoSetEvent: -+ /* -+ * Fault occured on either the write to unlock the queue or during -+ * processing of the event. Test the fault address against the -+ * queue address to find out which - in this case, since the ack -+ * has been sent we need to move the queue on one slot. -+ */ -+ if (trap->FaultSave.s.FaultAddress == trap->LockQueuePointer) -+ { -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: fixed unlock queue write to unlock fault\n"); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, TRUE); -+ -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, SetEvent), -+ hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue setevent to complete queue unlock, since we've already unlocked */ -+ /* the queue, we should "convert" this transaction into a setevent transaction that */ -+ /* hasn't trapped */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: could not issue setevent for SimulateUnlockQueue\n"); -+ -+ ConvertTransactionToSetEvent (ctxt, hdrp, hdrp->s.TrAddr + E3_QUEUE_EVENT_OFFSET); -+ return (EFAIL); -+ } -+ break; -+ } -+ /*DROPTHROUGH*/ -+ -+ default: -+ FixupEventTrap (ctxt, INPUT_PROC, trap, GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus), -+ &trap->FaultSave, FALSE); -+ break; -+ } -+ trap->LockQueuePointer = trap->UnlockQueuePointer = 0; -+ } -+ break; -+ -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: -+ /* Just ignore send-discard transactions */ -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: ignore SENDDISCARD\n"); -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: REMOTEDMA\n"); -+ -+ /* modify the dma type since it will still be a "read" dma */ -+ ((E3_DMA_BE *) datap)->s.dma_type &= ~(DMA_TYPE_READ | E3_DMA_CONTEXT_MASK); -+ ((E3_DMA_BE *) datap)->s.dma_type |= DMA_TYPE_ISREMOTE; -+ -+ RestartDmaDesc (ctxt, (E3_DMA_BE *) datap); -+ break; -+ -+ case TR_TRACEROUTE & TR_OPCODE_TYPE_MASK: -+ PRINTF0 (ctxt, DBG_IPROC, "RestartIProc: TRACEROUTE\n"); -+ SimulateTraceRoute (ctxt, hdrp, datap); -+ break; -+ -+ default: -+ ElanException (ctxt, EXCEPTION_BAD_PACKET, INPUT_PROC, trap); -+ break; -+ } -+ } -+ -+ /* -+ * We've successfully processed this transaction, so move onto the -+ * next one. -+ */ -+ trap->TrappedTransaction++; -+ trap->TrappedDataBuffer++; -+ } -+ -+ return (ESUCCESS); -+} -+ -+static void -+ConvertTransactionToSetEvent (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_Addr Addr) -+{ -+ hdrp->s.TrTypeCntx.s.Type = TR_SETEVENT; -+ hdrp->s.TrTypeCntx.s.StatusRegValid = 0; -+ hdrp->s.TrAddr = Addr; -+} -+ -+void -+SimulateBlockWrite (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ void *saddr = (void *) ((unsigned long) datap + (hdrp->s.TrAddr & 0x3f)); -+ unsigned nbytes = (hdrp->s.TrTypeCntx.s.Type) & TR_PARTSIZE_MASK; -+ int i; -+ -+ if (nbytes == 0) -+ nbytes = sizeof (E3_IprocTrapData_BE); -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateBlockWrite: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ /* -+ * NOTE: since the block copy could be to sdram, we issue the writes backwards, -+ * except we MUST ensure that the last item in the block is written last. -+ */ -+ switch (((hdrp->s.TrTypeCntx.s.Type) >> TR_TYPE_SHIFT) & TR_TYPE_MASK) -+ { -+ case TR_TYPE_BYTE: /* 8 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint8)); i >= 0; i -= sizeof (E3_uint8)) -+ ELAN3_OP_STORE8 (ctxt, hdrp->s.TrAddr + i, ((E3_uint8 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint8); -+ ELAN3_OP_STORE8 (ctxt, hdrp->s.TrAddr + i, ((E3_uint8 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_SHORT: /* 16 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint16)); i >= 0; i -= sizeof (E3_uint16)) -+ ELAN3_OP_STORE16 (ctxt, hdrp->s.TrAddr + i, ((E3_uint16 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint16); -+ ELAN3_OP_STORE16 (ctxt, hdrp->s.TrAddr + i, ((E3_uint16 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_WORD: /* 32 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint32)); i >= 0; i -= sizeof (E3_uint32)) -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + i, ((E3_uint32 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint32); -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + i, ((E3_uint32 *) saddr)[i]); -+ break; -+ -+ case TR_TYPE_DWORD: /* 64 bit */ -+ for (i = nbytes - (2*sizeof (E3_uint64)); i >= 0; i -= sizeof (E3_uint64)) -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr + i, ((E3_uint64 *) saddr)[i]); -+ i = nbytes - sizeof (E3_uint64); -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr + i, ((E3_uint64 *) saddr)[i]); -+ break; -+ } -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateWriteWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateWriteWord: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr, ((E3_uint32 *) datap)[WordEndianFlip]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateWriteDWord (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateWriteDWord: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ ELAN3_OP_STORE64 (ctxt, hdrp->s.TrAddr, ((E3_uint64 *) datap)[0]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateTraceRoute (ELAN3_CTXT *ctxt, E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ E3_uint32 *saddr = (E3_uint32 *) ((unsigned long) datap + (hdrp->s.TrAddr & 0x3f)); -+ unsigned nwords = TrSizeTable[(hdrp->s.TrTypeCntx.s.Type >> TR_SIZE_SHIFT) & TR_SIZE_MASK] / sizeof (E3_uint32); -+ int i; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "SimulateTraceRoute: faulted at %x\n", hdrp->s.TrAddr); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, hdrp->s.TrAddr); -+ return; -+ } -+ -+ for (i = nwords-2; i >= 0; i--) -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + (i * sizeof (E3_uint32)), saddr[i ^ WordEndianFlip]); -+ -+ i = nwords-1; -+ ELAN3_OP_STORE32 (ctxt, hdrp->s.TrAddr + (i * sizeof (E3_uint32)), saddr[i ^ WordEndianFlip]); -+ -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+} -+ -+void -+SimulateUnlockQueue (ELAN3_CTXT *ctxt, E3_Addr QueuePointer, int SentAck) -+{ -+ E3_uint32 QueueLock; -+ E3_Addr QueueBPTR; -+ E3_Addr QueueFPTR; -+ E3_uint64 QueueStateAndBPTR; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_IPROC, "UnlockQueue: faulted with QueuePointer %x\n", QueuePointer); -+ ElanException (ctxt, EXCEPTION_FAULTED, INPUT_PROC, NULL, QueuePointer); -+ return; -+ } -+ -+ if (SentAck) -+ { -+ QueueBPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_bptr)); -+ QueueFPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_fptr)); -+ -+ if (QueueBPTR == ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_top))) /* move on back pointer */ -+ QueueBPTR = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_base)); -+ else -+ QueueBPTR += ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_size)); -+ -+ QueueLock = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state)); -+ -+ if (QueueBPTR == QueueFPTR) /* and set full bit if fptr == bptr */ -+ QueueLock |= E3_QUEUE_FULL; -+ -+ QueueLock &= ~E3_QUEUE_LOCKED; -+ -+ QueueStateAndBPTR = (E3_uint64)QueueLock << 32 | QueueBPTR; -+ -+ ELAN3_OP_STORE64 (ctxt, QueuePointer + offsetof (E3_Queue, q_state), QueueStateAndBPTR); -+ } -+ else -+ { -+ QueueLock = ELAN3_OP_LOAD32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state)); -+ -+ QueueLock &= ~E3_QUEUE_LOCKED; -+ -+ ELAN3_OP_STORE32 (ctxt, QueuePointer + offsetof (E3_Queue, q_state), QueueLock); -+ } -+ -+ no_fault(); -+} -+ -+static void -+BumpInputterStats (ELAN3_DEV *dev, E3_IprocTrapHeader_BE *hdrp) -+{ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) /* EOP */ -+ { -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_BADACK: -+ BumpStat (dev, EopBadAcks); -+ break; -+ case EOP_ERROR_RESET: -+ BumpStat (dev, EopResets); -+ break; -+ } -+ } -+ else if (hdrp->s.TrTypeCntx.s.StatusRegValid) -+ { -+ /* -+ * Errors are tested in order of badness. i.e. badlength will prevent a BadCrc and so on... -+ */ -+ if (hdrp->s.IProcTrapStatus.s.BadLength) -+ BumpStat (dev, InputterBadLength); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_BAD) -+ BumpStat (dev, InputterCRCBad); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_ERROR) -+ BumpStat (dev, InputterCRCErrors); -+ else if ((hdrp->s.IProcTrapStatus.Status & CRC_MASK) == CRC_STATUS_DISCARD) -+ BumpStat (dev, InputterCRCDiscards); -+ } -+} -+ -+char * -+IProcTrapString (E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData_BE *datap) -+{ -+ static char buffer[256]; -+ static char typeString[256]; -+ static char statusString[256]; -+ char *ptr; -+ E3_Addr Addr = hdrp->s.TrAddr; -+ E3_uint32 Type = hdrp->s.TrTypeCntx.s.Type; -+ E3_uint32 Context = hdrp->s.TrTypeCntx.s.Context; -+ E3_uint32 StatusValid = hdrp->s.TrTypeCntx.s.StatusRegValid; -+ -+ if (hdrp->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ switch (hdrp->s.IProcTrapStatus.Status & E3_IPS_EopType) -+ { -+ case EOP_GOOD: sprintf (typeString, "EOP GOOD"); break; -+ case EOP_BADACK: sprintf (typeString, "EOP BADACK"); break; -+ case EOP_ERROR_RESET: sprintf (typeString, "EOP ERROR RESET"); break; -+ default: sprintf (typeString, "EOP - bad status"); break; -+ } -+ sprintf (buffer, "%15s Cntx=%08x", typeString, Context); -+ } -+ else -+ { -+ if (Type & TR_WRITEBLOCK_BIT) -+ { -+ switch ((Type >> TR_TYPE_SHIFT) & TR_TYPE_MASK) -+ { -+ case TR_TYPE_BYTE: ptr = "Byte"; break; -+ case TR_TYPE_SHORT: ptr = "Short"; break; -+ case TR_TYPE_WORD: ptr = "Word"; break; -+ case TR_TYPE_DWORD: ptr = "Double"; break; -+ default: ptr = "Unknown"; break; -+ } -+ -+ sprintf (typeString, "WriteBlock Type=%s Size=%2d", ptr, Type & TR_PARTSIZE_MASK); -+ } -+ else -+ { -+ switch (Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Setevent"); break; -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Remote DMA"); break; -+ case TR_LOCKQUEUE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Lock Queue"); break; -+ case TR_UNLOCKQUEUE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Unlock Queue"); break; -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Send Discard"); break; -+ case TR_DMAIDENTIFY & TR_OPCODE_TYPE_MASK: sprintf (typeString, "DMA Identify"); break; -+ case TR_THREADIDENTIFY & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Thread Identify"); break; -+ case TR_GTE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "GTE"); break; -+ case TR_LT & TR_OPCODE_TYPE_MASK: sprintf (typeString, "LT"); break; -+ case TR_EQ & TR_OPCODE_TYPE_MASK: sprintf (typeString, "EQ"); break; -+ case TR_NEQ & TR_OPCODE_TYPE_MASK: sprintf (typeString, "NEQ"); break; -+ case TR_WRITEWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Write Word"); break; -+ case TR_WRITEDOUBLEWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Write Double"); break; -+ case TR_ATOMICADDWORD & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Atomic Add"); break; -+ case TR_TESTANDWRITE & TR_OPCODE_TYPE_MASK: sprintf (typeString, "Test and Write"); break; -+ default: sprintf (typeString, "Type=%d", Type & TR_OPCODE_TYPE_MASK); break; -+ } -+ } -+ sprintf (buffer, "%15s Addr=%08x Cntx=%08x", typeString, Addr, Context); -+ /*(Type & TR_SENDACK) ? " Sendack" : "", */ -+ /*(Type & TR_LAST_TRANS) ? " LastTrans" : "", */ -+ /*(Type & TR_WAIT_FOR_EOP) ? " WaitForEop" : ""); */ -+ } -+ -+ if (StatusValid) -+ { -+ sprintf (statusString, " Type=%s %x", MiToName (hdrp->s.IProcTrapStatus.s.TrapType), hdrp->s.IProcTrapStatus.Status); -+ strcat (buffer, statusString); -+ -+ if (hdrp->s.IProcTrapStatus.s.BadLength) -+ strcat (buffer, " BadLength"); -+ switch (hdrp->s.IProcTrapStatus.Status & CRC_MASK) -+ { -+ case CRC_STATUS_DISCARD: -+ strcat (buffer, " CRC Discard"); -+ break; -+ case CRC_STATUS_ERROR: -+ strcat (buffer, " CRC Error"); -+ break; -+ -+ case CRC_STATUS_BAD: -+ strcat (buffer, " CRC Bad"); -+ break; -+ } -+ } -+ -+ return (buffer); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/Makefile 2005-07-28 14:52:52.808684816 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan3/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_ELAN3) += elan3.o -+elan3-objs := context.o cproc.o dproc.o elandebug.o elandev_generic.o elansyscall.o eventcookie.o iproc.o sdram.o minames.o network_error.o route_table.o tproc.o tprocinsts.o routecheck.o virtual_process.o elan3ops.o context_linux.o elandev_linux.o procfs_linux.o tproc_linux.o elan3mmu_generic.o elan3mmu_linux.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/Makefile.conf 2005-07-28 14:52:52.808684816 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan3.o -+MODULENAME = elan3 -+KOBJFILES = context.o cproc.o dproc.o elandebug.o elandev_generic.o elansyscall.o eventcookie.o iproc.o sdram.o minames.o network_error.o route_table.o tproc.o tprocinsts.o routecheck.o virtual_process.o elan3ops.o context_linux.o elandev_linux.o procfs_linux.o tproc_linux.o elan3mmu_generic.o elan3mmu_linux.o -+EXPORT_KOBJS = elandev_linux.o procfs_linux.o -+CONFIG_NAME = CONFIG_ELAN3 -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/minames.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/minames.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/minames.c 2005-07-28 14:52:52.809684664 -0400 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: minames.c,v 1.12 2003/06/07 15:57:49 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/minames.c,v $*/ -+ -+#include -+#include -+ -+caddr_t -+MiToName (int mi) -+{ -+ static char space[32]; -+ static struct { -+ int mi; -+ char *name; -+ } info[] = { -+#include -+ }; -+ register int i; -+ -+ -+ for (i = 0; i < sizeof(info)/sizeof(info[0]); i++) -+ if (info[i].mi == mi) -+ return (info[i].name); -+ sprintf (space, "MI %x", mi); -+ return (space); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/network_error.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/network_error.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/network_error.c 2005-07-28 14:52:52.810684512 -0400 -@@ -0,0 +1,777 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: network_error.c,v 1.32.2.1 2004/10/28 11:54:57 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/network_error.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef DIGITAL_UNIX -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+typedef xdrproc_t kxdrproc_t; -+#endif -+ -+#ifdef LINUX -+#include -+#include -+#include -+#include -+ -+#include -+#define SYS_NMLN __NEW_UTS_LEN -+#endif -+ -+#include -+ -+spinlock_t ResolveRequestLock; -+kcondvar_t ResolveRequestWait; -+ -+NETERR_RESOLVER *ResolveRequestHead; -+NETERR_RESOLVER **ResolveRequestTailp = &ResolveRequestHead; -+int ResolveRequestCount; -+int ResolveRequestThreads; -+int ResolveRequestMaxThreads = 4; -+int ResolveRequestTimeout = 60; -+ -+typedef struct neterr_server -+{ -+ struct neterr_server *Next; -+ struct neterr_server *Prev; -+ unsigned ElanId; -+ -+ char *Name; -+ int RefCount; -+ struct sockaddr_in Addr; -+} NETERR_SERVER; -+ -+#define NETERR_HASH_ENTRIES 64 -+#define NETERR_HASH(elanid) (((unsigned) elanid) % NETERR_HASH_ENTRIES) -+NETERR_SERVER *NeterrServerHash[NETERR_HASH_ENTRIES]; -+kmutex_t NeterrServerLock; -+ -+static NETERR_SERVER *FindNeterrServer (int elanId); -+static void DereferenceNeterrServer (NETERR_SERVER *server); -+static int CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg); -+ -+void -+InitialiseNetworkErrorResolver () -+{ -+ spin_lock_init (&ResolveRequestLock); -+ kcondvar_init (&ResolveRequestWait); -+ -+ ResolveRequestHead = NULL; -+ ResolveRequestTailp = &ResolveRequestHead; -+ -+ kmutex_init (&NeterrServerLock); -+} -+ -+void -+FinaliseNetworkErrorResolver () -+{ -+ spin_lock_destroy (&ResolveRequestLock); -+ kcondvar_destroy (&ResolveRequestWait); -+ -+ kmutex_destroy (&NeterrServerLock); -+} -+ -+static NETERR_RESOLVER * -+AllocateNetworkErrorResolver (void) -+{ -+ NETERR_RESOLVER *rvp; -+ -+ KMEM_ZALLOC (rvp, NETERR_RESOLVER *, sizeof (NETERR_RESOLVER), TRUE); -+ spin_lock_init (&rvp->Lock); -+ -+ return (rvp); -+} -+ -+void -+FreeNetworkErrorResolver (NETERR_RESOLVER *rvp) -+{ -+ spin_lock_destroy (&rvp->Lock); -+ KMEM_FREE (rvp, sizeof (NETERR_RESOLVER)); -+} -+ -+static void -+elan3_neterr_resolver (void) -+{ -+ NETERR_RESOLVER *rvp; -+ NETERR_SERVER *server; -+ int status; -+ unsigned long flags; -+ -+ kernel_thread_init("elan3_neterr_resolver"); -+ spin_lock (&ResolveRequestLock); -+ -+ while ((rvp = ResolveRequestHead) != NULL) -+ { -+ if ((ResolveRequestHead = rvp->Next) == NULL) -+ ResolveRequestTailp = &ResolveRequestHead; -+ -+ spin_unlock (&ResolveRequestLock); -+ -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: rvp = %p\n", rvp); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " Rail %d\n", rvp->Message.Rail); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " SrcCapability %s\n", CapabilityString (&rvp->Message.SrcCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " DstCapability %s\n", CapabilityString (&rvp->Message.DstCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieAddr %08x\n", rvp->Message.CookieAddr); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieVProc %08x\n", rvp->Message.CookieVProc); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " NextCookie %08x\n", rvp->Message.NextCookie); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " WaitForEop %08x\n", rvp->Message.WaitForEop); -+ -+ if ((server = FindNeterrServer (rvp->Location.loc_node)) == NULL) -+ status = ECONNREFUSED; -+ else if (ResolveRequestTimeout && ((int)(lbolt - rvp->Timestamp)) > (ResolveRequestTimeout*HZ)) -+ { -+ printk ("elan_neterr: rpc to '%s' timedout - context %d killed\n", server->Name, rvp->Message.SrcCapability.cap_mycontext); -+ status = ECONNABORTED; -+ } -+ else -+ { -+ status = CallNeterrServer (server, &rvp->Message); -+ -+ DereferenceNeterrServer (server); -+ } -+ -+ if ((status == EINTR || status == ETIMEDOUT) && rvp->Ctxt != NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: retry rvp=%p\n", rvp); -+ spin_lock (&ResolveRequestLock); -+ rvp->Next = NULL; -+ *ResolveRequestTailp = rvp; -+ ResolveRequestTailp = &rvp->Next; -+ } -+ else -+ { -+ rvp->Status = status; -+ -+ spin_lock (&rvp->Lock); -+ -+ if (rvp->Ctxt != NULL) -+ { -+ PRINTF2 (rvp->Ctxt, DBG_NETERR, "elan3_neterr_resolver: completing rvp %p for ctxt %p\n", rvp, rvp->Ctxt); -+ spin_lock_irqsave (&rvp->Ctxt->Device->IntrLock, flags); -+ -+ rvp->Completed = TRUE; -+ -+ kcondvar_wakeupall (&rvp->Ctxt->Wait, &rvp->Ctxt->Device->IntrLock); -+ -+ /* -+ * drop the locks out of order since the rvp can get freeed -+ * as soon as we drop the IntrLock - so cannot reference the -+ * rvp after this. -+ */ -+ -+ spin_unlock (&rvp->Lock); -+ spin_unlock_irqrestore (&rvp->Ctxt->Device->IntrLock, flags); -+ } -+ else -+ { -+ PRINTF2 (DBG_DEVICE, DBG_NETERR, "elan3_neterr_resolver: completing rvp %p for deceased ctxt %p\n", rvp, rvp->Ctxt); -+ spin_unlock (&rvp->Lock); -+ FreeNetworkErrorResolver (rvp); -+ } -+ -+ spin_lock (&ResolveRequestLock); -+ ResolveRequestCount--; -+ } -+ } -+ -+ ResolveRequestThreads--; -+ -+ spin_unlock (&ResolveRequestLock); -+ kernel_thread_exit(); -+} -+ -+int -+QueueNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp) -+{ -+ int isdma = trap->DmaIdentifyTransaction != NULL; -+ E3_IprocTrapHeader_BE *hdrp = isdma ? trap->DmaIdentifyTransaction : trap->ThreadIdentifyTransaction; -+ E3_uint32 process = isdma ? (hdrp->s.TrAddr & 0xFFFF) : (hdrp->s.TrData0 & 0xFFFF); -+ NETERR_RESOLVER *rvp; -+ -+ PRINTF2 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: process = %d %s\n", process, isdma ? "(dma)" : "(thread)"); -+ -+ if ((rvp = AllocateNetworkErrorResolver()) == NULL) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: cannot allocate resolver\n"); -+ return (ENOMEM); -+ } -+ -+ rvp->Message.Rail = ctxt->Device->Devinfo.dev_rail; -+ -+ krwlock_read (&ctxt->VpLock); -+ rvp->Location = ProcessToLocation (ctxt, NULL, process, &rvp->Message.SrcCapability); -+ krwlock_done (&ctxt->VpLock); -+ -+ if (rvp->Location.loc_node == ELAN3_INVALID_NODE) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: invalid elan id\n"); -+ -+ FreeNetworkErrorResolver (rvp); -+ return (EINVAL); -+ } -+ -+ rvp->Message.DstCapability = ctxt->Capability; -+ rvp->Message.DstProcess = elan3_process (ctxt); -+ rvp->Message.WaitForEop = (trap->WaitForEopTransaction != NULL); -+ -+ if (isdma) -+ { -+ rvp->Message.CookieAddr = 0; -+ rvp->Message.CookieVProc = hdrp->s.TrAddr; -+ rvp->Message.NextCookie = 0; -+ } -+ else -+ { -+ rvp->Message.CookieAddr = hdrp->s.TrAddr; -+ rvp->Message.CookieVProc = hdrp->s.TrData0; -+ rvp->Message.NextCookie = hdrp->s.TrData1; -+ } -+ -+ rvp->Completed = FALSE; -+ rvp->Ctxt = ctxt; -+ rvp->Timestamp = lbolt; -+ -+ spin_lock (&ResolveRequestLock); -+ -+ rvp->Next = NULL; -+ *ResolveRequestTailp = rvp; -+ ResolveRequestTailp = &rvp->Next; -+ ResolveRequestCount++; -+ -+ kcondvar_wakeupone (&ResolveRequestWait, &ResolveRequestLock); -+ -+ if (ResolveRequestCount < ResolveRequestThreads || ResolveRequestThreads >= ResolveRequestMaxThreads) -+ spin_unlock (&ResolveRequestLock); -+ else -+ { -+ ResolveRequestThreads++; -+ -+ spin_unlock (&ResolveRequestLock); -+ if (kernel_thread_create (elan3_neterr_resolver, NULL) == NULL) -+ { -+ spin_lock (&ResolveRequestLock); -+ ResolveRequestThreads--; -+ spin_unlock (&ResolveRequestLock); -+ -+ if (ResolveRequestThreads == 0) -+ { -+ PRINTF0 (ctxt, DBG_NETERR, "QueueNetworkErrorResolver: cannot thread pool\n"); -+ -+ FreeNetworkErrorResolver (rvp); -+ return (ENOMEM); -+ } -+ } -+ } -+ -+ *rvpp = rvp; -+ return (ESUCCESS); -+} -+ -+void -+CancelNetworkErrorResolver (NETERR_RESOLVER *rvp) -+{ -+ spin_lock (&rvp->Lock); -+ -+ PRINTF2 (rvp->Ctxt, DBG_NETERR, "CancelNetworkErrorResolver: rvp=%p %s\n", rvp, rvp->Completed ? "Completed" : "Pending"); -+ -+ if (rvp->Completed) -+ { -+ spin_unlock (&rvp->Lock); -+ FreeNetworkErrorResolver (rvp); -+ } -+ else -+ { -+ rvp->Ctxt = NULL; -+ spin_unlock (&rvp->Lock); -+ } -+} -+ -+static NETERR_FIXUP * -+AllocateNetworkErrorFixup (void) -+{ -+ NETERR_FIXUP *nef; -+ -+ KMEM_ZALLOC (nef, NETERR_FIXUP *, sizeof (NETERR_FIXUP), TRUE); -+ -+ if (nef == (NETERR_FIXUP *) NULL) -+ return (NULL); -+ -+ kcondvar_init (&nef->Wait); -+ -+ return (nef); -+} -+ -+static void -+FreeNetworkErrorFixup (NETERR_FIXUP *nef) -+{ -+ kcondvar_destroy (&nef->Wait); -+ KMEM_FREE (nef, sizeof (NETERR_FIXUP)); -+} -+ -+int -+ExecuteNetworkErrorFixup (NETERR_MSG *msg) -+{ -+ ELAN3_DEV *dev; -+ ELAN3_CTXT *ctxt; -+ NETERR_FIXUP *nef; -+ NETERR_FIXUP **predp; -+ int rc; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, "ExecuteNetworkErrorFixup: msg = %p\n", msg); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " Rail %d\n", msg->Rail); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " SrcCapability %s\n", CapabilityString (&msg->SrcCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " DstCapability %s\n", CapabilityString (&msg->DstCapability)); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieAddr %08x\n", msg->CookieAddr); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " CookieVProc %08x\n", msg->CookieVProc); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " NextCookie %08x\n", msg->NextCookie); -+ PRINTF1 (DBG_DEVICE, DBG_NETERR, " WaitForEop %08x\n", msg->WaitForEop); -+ -+ if ((dev = elan3_device (msg->Rail)) == NULL) -+ return (ESRCH); -+ -+ if ((nef = AllocateNetworkErrorFixup()) == NULL) -+ return (ENOMEM); -+ -+ if (nef == (NETERR_FIXUP *) NULL) -+ return (ENOMEM); -+ -+ bcopy (msg, &nef->Message, sizeof (NETERR_MSG)); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, msg->SrcCapability.cap_mycontext); -+ -+ if (ctxt == NULL) -+ rc = ESRCH; -+ else if (!ELAN_CAP_MATCH (&msg->SrcCapability, &ctxt->Capability)) -+ rc = EPERM; -+ else -+ { -+ if (ctxt->Status & CTXT_NO_LWPS) -+ rc = EAGAIN; -+ else -+ { -+ for (predp = &ctxt->NetworkErrorFixups; *predp != NULL; predp = &(*predp)->Next) -+ ; -+ nef->Next = NULL; -+ *predp = nef; -+ -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ while (! nef->Completed) -+ kcondvar_wait (&nef->Wait, &dev->IntrLock, &flags); -+ -+ rc = nef->Status; -+ } -+ } -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ FreeNetworkErrorFixup (nef); -+ -+ return (rc); -+} -+ -+void -+CompleteNetworkErrorFixup (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, int status) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ unsigned long flags; -+ -+ PRINTF2 (ctxt, DBG_NETERR, "CompleteNetworkErrorFixup: %p %d\n", nef, status); -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ nef->Status = status; -+ nef->Completed = TRUE; -+ kcondvar_wakeupone (&nef->Wait, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+ -+static NETERR_SERVER * -+NewNeterrServer (int elanId, struct sockaddr_in *addr, char *name) -+{ -+ NETERR_SERVER *server; -+ -+ KMEM_ZALLOC (server, NETERR_SERVER *, sizeof (NETERR_SERVER), TRUE); -+ KMEM_ALLOC (server->Name, char *, strlen (name)+1, TRUE); -+ -+ bcopy (addr, &server->Addr, sizeof (struct sockaddr_in)); -+ bcopy (name, server->Name, strlen (name)+1); -+ -+ server->ElanId = elanId; -+ server->RefCount = 1; -+ -+ return (server); -+} -+ -+static void -+DeleteNeterrServer (NETERR_SERVER *server) -+{ -+ KMEM_FREE (server->Name, strlen(server->Name)+1); -+ KMEM_FREE (server, sizeof (NETERR_SERVER)); -+} -+ -+static NETERR_SERVER * -+FindNeterrServer (int elanId) -+{ -+ NETERR_SERVER *server; -+ -+ kmutex_lock (&NeterrServerLock); -+ -+ for (server = NeterrServerHash[NETERR_HASH(elanId)]; server != NULL; server = server->Next) -+ if (server->ElanId == elanId) -+ break; -+ -+ if (server != NULL) -+ server->RefCount++; -+ kmutex_unlock (&NeterrServerLock); -+ -+ return (server); -+} -+ -+static void -+DereferenceNeterrServer (NETERR_SERVER *server) -+{ -+ kmutex_lock (&NeterrServerLock); -+ if ((--server->RefCount) == 0) -+ DeleteNeterrServer (server); -+ kmutex_unlock (&NeterrServerLock); -+} -+ -+int -+AddNeterrServer (int elanId, struct sockaddr_in *addr, char *name) -+{ -+ NETERR_SERVER *server; -+ NETERR_SERVER *old; -+ int hashval = NETERR_HASH(elanId); -+ -+ server = NewNeterrServer (elanId, addr, name); -+ -+ if (server == NULL) -+ return (ENOMEM); -+ -+ kmutex_lock (&NeterrServerLock); -+ for (old = NeterrServerHash[hashval]; old != NULL; old = old->Next) -+ if (old->ElanId == elanId) -+ break; -+ -+ /* remove "old" server from hash table */ -+ if (old != NULL) -+ { -+ if (old->Prev) -+ old->Prev->Next = old->Next; -+ else -+ NeterrServerHash[hashval] = old->Next; -+ if (old->Next) -+ old->Next->Prev = old->Prev; -+ } -+ -+ /* insert "new" server into hash table */ -+ if ((server->Next = NeterrServerHash[hashval]) != NULL) -+ server->Next->Prev = server; -+ server->Prev = NULL; -+ NeterrServerHash[hashval] = server; -+ -+ kmutex_unlock (&NeterrServerLock); -+ -+ if (old != NULL) -+ DereferenceNeterrServer (old); -+ -+ return (ESUCCESS); -+} -+ -+int -+AddNeterrServerSyscall (int elanId, void *addrp, void *namep, char *unused) -+{ -+ struct sockaddr_in addr; -+ char *name; -+ int error; -+ int nob; -+ -+ /* Sanity check the supplied elanId argument */ -+ if (elanId < 0) -+ return ( set_errno(EINVAL) ); -+ -+ KMEM_ALLOC (name, caddr_t, SYS_NMLN, TRUE); -+ -+ if (copyin ((caddr_t) addrp, (caddr_t) &addr, sizeof (addr)) || -+ copyinstr ((caddr_t) namep, name, SYS_NMLN, &nob)) -+ { -+ error = EFAULT; -+ } -+ else -+ { -+ PRINTF2 (DBG_DEVICE, DBG_NETERR, "AddNeterrServer: '%s' at elanid %d\n", name, elanId); -+ -+ error = AddNeterrServer (elanId, &addr, name); -+ } -+ KMEM_FREE (name, SYS_NMLN); -+ -+ return (error ? set_errno(error) : ESUCCESS); -+} -+ -+ -+#if defined(DIGITAL_UNIX) -+static int -+CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg) -+{ -+ cred_t *cr = crget(); -+ struct rpc_err rpcerr; -+ extern cred_t *kcred; -+ struct timeval wait; -+ enum clnt_stat rc; -+ int status; -+ CLIENT *clnt; -+ int error; -+ -+ PRINTF4 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) - family=%d port=%d addr=%08x\n", server->Name, -+ server->Addr.sin_family, server->Addr.sin_port, server->Addr.sin_addr.s_addr); -+ -+ if ((clnt = clntkudp_create (&server->Addr, (struct sockaddr_in *)0, NETERR_PROGRAM, NETERR_VERSION, 1, cr)) == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): clntkudp_create error\n", server->Name); -+ -+ return (ENOMEM); -+ } -+ -+ wait.tv_sec = NETERR_RPC_TIMEOUT; -+ wait.tv_usec = 0; -+ -+ PRINTF2 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): CLNT_CALL timeout = %d\n", server->Name, NETERR_RPC_TIMEOUT); -+ -+ rc = CLNT_CALL(clnt, NETERR_FIXUP_RPC, xdr_neterr_msg, (void *)msg, xdr_int, (void *) &status, wait); -+ -+ PRINTF3 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): CLNT_CALL -> %d (%s)\n", server->Name, rc, clnt_sperrno(rc));; -+ -+ switch (rc) -+ { -+ case RPC_SUCCESS: -+ break; -+ -+ case RPC_INTR: -+ status = EINTR; -+ break; -+ -+ case RPC_TIMEDOUT: -+ status = ETIMEDOUT; -+ break; -+ -+ default: -+ printf ("CallNeterrServer(%s): %s\n", server->Name, clnt_sperrno(status)); -+ status = ENOENT; -+ break; -+ } -+ -+ CLNT_DESTROY(clnt); -+ -+ crfree(cr); -+ -+ ASSERT(rc == RPC_SUCCESS || status != 0); -+ -+ PRINTF2 (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): status=%d\n", server->Name, status); -+ -+ return (status); -+} -+#endif -+ -+#if defined(LINUX) -+ -+#define xdrsize(type) ((sizeof(type) + 3) >> 2) -+ -+static int -+xdr_error(struct rpc_rqst *req, u32 *p, void *dummy) -+{ -+ return -EIO; -+} -+ -+static int -+xdr_decode_int(struct rpc_rqst *req, u32 *p, int *res) -+{ -+ *res = ntohl(*p++); -+ return 0; -+} -+ -+#define XDR_capability_sz ((12 + BT_BITOUL(ELAN3_MAX_VPS)) * sizeof (u32)) -+ -+static int -+xdr_encode_capability(u32 *p, ELAN_CAPABILITY *cap) -+{ -+ u32 *pp = p; -+ -+ /* basic xdr unit is u32 - for opaque types we must round up to that */ -+ memcpy(p, &cap->cap_userkey, sizeof(cap->cap_userkey)); -+ p += xdrsize(cap->cap_userkey); -+ -+ *p++ = htonl(cap->cap_version); -+ ((u16 *) (p++))[1] = htons(cap->cap_type); -+ *p++ = htonl(cap->cap_lowcontext); -+ *p++ = htonl(cap->cap_highcontext); -+ *p++ = htonl(cap->cap_mycontext); -+ *p++ = htonl(cap->cap_lownode); -+ *p++ = htonl(cap->cap_highnode); -+ *p++ = htonl(cap->cap_railmask); -+ -+ memcpy(p, &cap->cap_bitmap[0], sizeof(cap->cap_bitmap)); -+ p += xdrsize(cap->cap_bitmap); -+ -+ ASSERT (((unsigned long) p - (unsigned long) pp) == XDR_capability_sz); -+ -+ return (p - pp); -+} -+ -+ -+#define XDR_neterr_sz (((1 + 5) * sizeof (u32)) + (2*XDR_capability_sz)) -+ -+static int -+xdr_encode_neterr_msg(struct rpc_rqst *req, u32 *p, NETERR_MSG *msg) -+{ -+ u32 *pp = p; -+ -+ *p++ = htonl(msg->Rail); -+ -+ p += xdr_encode_capability(p, &msg->SrcCapability); -+ p += xdr_encode_capability(p, &msg->DstCapability); -+ -+ *p++ = htonl(msg->DstProcess); -+ *p++ = htonl(msg->CookieAddr); -+ *p++ = htonl(msg->CookieVProc); -+ *p++ = htonl(msg->NextCookie); -+ *p++ = htonl(msg->WaitForEop); -+ -+ ASSERT (((unsigned long) p - (unsigned long) pp) == XDR_neterr_sz); -+ -+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); -+ -+ return 0; -+} -+ -+static struct rpc_procinfo neterr_procedures[2] = -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+# define RPC_ID_NULL "neterr_null" -+# define RPC_ID_FIXUP_RPC "neterr_fixup_rpc" -+#else -+# define RPC_ID_NULL NETERR_NULL_RPC -+# define RPC_ID_FIXUP_RPC NETERR_FIXUP_RPC -+#endif -+ { -+ RPC_ID_NULL, /* procedure name or number*/ -+ (kxdrproc_t) xdr_error, /* xdr encode fun */ -+ (kxdrproc_t) xdr_error, /* xdr decode fun */ -+ 0, /* req buffer size */ -+ 0, /* call count */ -+ }, -+ { -+ RPC_ID_FIXUP_RPC, -+ (kxdrproc_t) xdr_encode_neterr_msg, -+ (kxdrproc_t) xdr_decode_int, -+ XDR_neterr_sz, -+ 0, -+ }, -+}; -+ -+static struct rpc_version neterr_version1 = -+{ -+ 1, /* version */ -+ 2, /* number of procedures */ -+ neterr_procedures /* procedures */ -+}; -+ -+static struct rpc_version *neterr_version[] = -+{ -+ NULL, -+ &neterr_version1, -+}; -+ -+static struct rpc_stat neterr_stats; -+ -+static struct rpc_program neterr_program = -+{ -+ NETERR_SERVICE, -+ NETERR_PROGRAM, -+ sizeof(neterr_version)/sizeof(neterr_version[0]), -+ neterr_version, -+ &neterr_stats, -+}; -+ -+static int -+CallNeterrServer (NETERR_SERVER *server, NETERR_MSG *msg) -+{ -+ struct rpc_xprt *xprt; -+ struct rpc_clnt *clnt; -+ struct rpc_timeout to; -+ int rc, status; -+ -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s)\n", server->Name); -+ -+ xprt_set_timeout(&to, 1, NETERR_RPC_TIMEOUT * HZ); -+ -+ if ((xprt = xprt_create_proto(IPPROTO_UDP, &server->Addr, &to)) == NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) xprt_create_proto failed\n", server->Name); -+ return EFAIL; -+ } -+ -+ if ((clnt = rpc_create_client(xprt, server->Name, &neterr_program, NETERR_VERSION, RPC_AUTH_NULL)) == NULL) -+ { -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s) rpc_create_client failed\n", server->Name); -+ xprt_destroy (xprt); -+ -+ return EFAIL; -+ } -+ -+ clnt->cl_softrtry = 1; -+ clnt->cl_chatty = 0; -+ clnt->cl_oneshot = 1; -+ clnt->cl_intr = 0; -+ -+ if ((rc = rpc_call(clnt, NETERR_FIXUP_RPC, msg, &status, 0)) < 0) -+ { -+ /* RPC error has occured - determine whether we should retry */ -+ -+ status = ETIMEDOUT; -+ } -+ -+ PRINTF (DBG_DEVICE, DBG_NETRPC, "CallNeterrServer(%s): -> %d\n", server->Name, status); -+ -+ return (status); -+} -+ -+#endif /* defined(LINUX) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/procfs_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/procfs_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/procfs_linux.c 2005-07-28 14:52:52.810684512 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_linux.c,v 1.21 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/procfs_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+struct proc_dir_entry *elan3_procfs_root; -+struct proc_dir_entry *elan3_config_root; -+ -+static int -+proc_read_position (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) data; -+ int len; -+ -+ if (dev->Position.pos_mode == ELAN_POS_UNKNOWN) -+ len = sprintf (page, "\n"); -+ else -+ len = sprintf (page, -+ "NodeId %d\n" -+ "NumLevels %d\n" -+ "NumNodes %d\n", -+ dev->Position.pos_nodeid, dev->Position.pos_levels, dev->Position.pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_position (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN3_DEV *dev = (ELAN3_DEV *) data; -+ unsigned nodeid = ELAN3_INVALID_NODE; -+ unsigned numnodes = 0; -+ char *page, *p; -+ int res; -+ -+ if (count == 0) -+ return (0); -+ -+ if (count >= PAGE_SIZE) -+ return (-EINVAL); -+ -+ if ((page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (page, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ page[count] = '\0'; -+ -+ if (page[count-1] == '\n') -+ page[count-1] = '\0'; -+ -+ if (! strcmp (page, "")) -+ { -+ dev->Position.pos_mode = ELAN_POS_UNKNOWN; -+ dev->Position.pos_nodeid = ELAN3_INVALID_NODE; -+ dev->Position.pos_nodes = 0; -+ dev->Position.pos_levels = 0; -+ } -+ else -+ { -+ for (p = page; *p; ) -+ { -+ while (isspace (*p)) -+ p++; -+ -+ if (! strncmp (p, "NodeId=", strlen("NodeId="))) -+ nodeid = simple_strtoul (p + strlen ("NodeId="), NULL, 0); -+ if (! strncmp (p, "NumNodes=", strlen ("NumNodes="))) -+ numnodes = simple_strtoul (p + strlen ("NumNodes="), NULL, 0); -+ -+ while (*p && !isspace(*p)) -+ p++; -+ } -+ -+ if (ComputePosition (&dev->Position, nodeid, numnodes, dev->Devinfo.dev_num_down_links_value) != 0) -+ printk ("elan%d: invalid values for NodeId=%d NumNodes=%d\n", dev->Instance, nodeid, numnodes); -+ else -+ printk ("elan%d: setting NodeId=%d NumNodes=%d NumLevels=%d\n", dev->Instance, dev->Position.pos_nodeid, -+ dev->Position.pos_nodes, dev->Position.pos_levels); -+ } -+ } -+ -+ MOD_DEC_USE_COUNT; -+ free_page ((unsigned long) page); -+ -+ return (count); -+} -+ -+ -+void -+elan3_procfs_device_init (ELAN3_DEV *dev) -+{ -+ struct proc_dir_entry *dir, *p; -+ char name[NAME_MAX]; -+ -+ sprintf (name, "device%d", dev->Instance); -+ dir = dev->Osdep.procdir = proc_mkdir (name, elan3_procfs_root); -+ -+ if ((p = create_proc_entry ("position", 0, dir)) != NULL) -+ { -+ p->read_proc = proc_read_position; -+ p->write_proc = proc_write_position; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ -+} -+ -+void -+elan3_procfs_device_fini (ELAN3_DEV *dev) -+{ -+ struct proc_dir_entry *dir = dev->Osdep.procdir; -+ char name[NAME_MAX]; -+ -+ remove_proc_entry ("position", dir); -+ -+ sprintf (name, "device%d", dev->Instance); -+ remove_proc_entry (name, elan3_procfs_root); -+} -+ -+void -+elan3_procfs_init() -+{ -+ extern int eventint_punt_loops; -+ extern int ResolveRequestTimeout; -+ -+ elan3_procfs_root = proc_mkdir("elan3", qsnet_procfs_root); -+ -+ elan3_config_root = proc_mkdir("config", elan3_procfs_root); -+ -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug", &elan3_debug, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug_console", &elan3_debug_console, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3_debug_buffer", &elan3_debug_buffer, 0); -+ qsnet_proc_register_hex (elan3_config_root, "elan3mmu_debug", &elan3mmu_debug, 0); -+ qsnet_proc_register_int (elan3_config_root, "eventint_punt_loops", &eventint_punt_loops, 0); -+ qsnet_proc_register_int (elan3_config_root, "neterr_timeout", &ResolveRequestTimeout, 0); -+ -+#if defined(__ia64__) -+ { -+ extern int enable_sdram_writecombining; -+ qsnet_proc_register_int (elan3_config_root, "enable_sdram_writecombining", &enable_sdram_writecombining, 0); -+ } -+#endif -+} -+ -+void -+elan3_procfs_fini() -+{ -+#if defined(__ia64__) -+ remove_proc_entry ("enable_sdram_writecombining", elan3_config_root); -+#endif -+ remove_proc_entry ("neterr_timeout", elan3_config_root); -+ remove_proc_entry ("eventint_punt_loops", elan3_config_root); -+ remove_proc_entry ("elan3mmu_debug", elan3_config_root); -+ remove_proc_entry ("elan3_debug_buffer", elan3_config_root); -+ remove_proc_entry ("elan3_debug_console", elan3_config_root); -+ remove_proc_entry ("elan3_debug", elan3_config_root); -+ -+ remove_proc_entry ("config", elan3_procfs_root); -+ remove_proc_entry ("version", elan3_procfs_root); -+ -+ remove_proc_entry ("elan3", qsnet_procfs_root); -+} -+ -+EXPORT_SYMBOL(elan3_procfs_root); -+EXPORT_SYMBOL(elan3_config_root); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/quadrics_version.h 2005-07-28 14:52:52.811684360 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/routecheck.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/routecheck.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/routecheck.c 2005-07-28 14:52:52.811684360 -0400 -@@ -0,0 +1,313 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* ------------------------------------------------------------- */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* ---------------------------------------------------------------------- */ -+typedef struct elan3_net_location { -+ int netid; -+ int plane; -+ int level; -+} ELAN3_NET_LOCATION; -+/* ---------------------------------------------------------------------- */ -+#define FLIT_LINK_ARRAY_MAX (ELAN3_MAX_LEVELS*2) -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_follow_link( ELAN3_CTXT *ctxt, ELAN3_NET_LOCATION *loc, int link) -+{ -+ ELAN_POSITION *pos = &ctxt->Position; -+ -+ if ((link<0) || (link>7)) -+ { -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_follow_link: link (%d) out of range \n",link); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ -+ /* going up or down ? */ -+ if ( link >= pos->pos_arity[loc->level] ) -+ { -+ /* Up */ -+ if (loc->level >= pos->pos_levels) -+ loc->plane = 0; -+ else -+ { -+ if ((loc->level == 1) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->plane = (16 * ( loc->plane / 8 )) + (4 * ( loc->plane % 4)) -+ +(link - pos->pos_arity[loc->level]); -+ else -+ loc->plane = (loc->plane * (8 - pos->pos_arity[loc->level])) -+ +(link - pos->pos_arity[loc->level]); -+ } -+ loc->level--; -+ if ( loc->level < 0 ) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_follow_link: link goes off the top\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ loc->netid = loc->netid / pos->pos_arity[loc->level]; -+ } -+ else -+ { -+ /* going down */ -+ if ((loc->level == 0) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->netid = link % 2; -+ else -+ loc->netid =(loc->netid * pos->pos_arity[loc->level])+link; -+ -+ loc->level++; -+ if (loc->level > pos->pos_levels) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_follow_link: link goes off the bottom\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ -+ if ( loc->level >= (pos->pos_levels-1)) -+ loc->plane = 0; -+ else -+ if ((loc->level == 1) && (pos->pos_arity[0] == 8)) /* oddness in some machines ie 512 */ -+ loc->plane = (((loc->plane)>>2)*2) - ( ((loc->plane)>>2) & 3 ) + ((link<2)?0:4); /* ((p/4) % 4) */ -+ else -+ loc->plane = loc->plane/(8-pos->pos_arity[loc->level]); -+ } -+ return (ELAN3_ROUTE_SUCCESS); -+} -+/* ---------------------------------------------------------------------- */ -+int /* assumes they are connected, really only used for finding the MyLink */ -+elan3_route_get_mylink (ELAN_POSITION *pos, ELAN3_NET_LOCATION *locA, ELAN3_NET_LOCATION *locB) -+{ -+ /* whats the My Link for locA to LocB */ -+ if ( locA->level > locB->level ) -+ return locB->plane - (locA->plane * (8 - pos->pos_arity[locA->level])) + pos->pos_arity[locA->level]; -+ -+ return locB->netid - (locA->netid * pos->pos_arity[locA->level]); -+} -+/* ---------------------------------------------------------------------- */ -+#define FIRST_GET_HIGH_PRI(FLIT) (FLIT & FIRST_HIGH_PRI) -+#define FIRST_GET_AGE(FLIT) ((FLIT & FIRST_AGE(15))>>11) -+#define FIRST_GET_TIMEOUT(FLIT) ((FLIT & FIRST_TIMEOUT(3))>>9) -+#define FIRST_GET_NEXT(FLIT) ((FLIT & FIRST_PACKED(3))>>7) -+#define FIRST_GET_ROUTE(FLIT) (FLIT & 0x7f) -+#define FIRST_GET_BCAST(FLIT) (FLIT & 0x40) -+#define FIRST_GET_IS_INVALID(FLIT) ((FLIT & 0x78) == 0x08) -+#define FIRST_GET_TYPE(FLIT) ((FLIT & 0x30)>>4) -+#define PRF_GET_ROUTE(FLIT,N) ((FLIT >> (N*4)) & 0x0F) -+#define PRF_GET_IS_MYLINK(ROUTE) (ROUTE == PACKED_MYLINK) -+#define PRF_GET_IS_NORMAL(ROUTE) (ROUTE & 0x8) -+#define PRF_GET_NORMAL_LINK(ROUTE) (ROUTE & 0x7) -+#define PRF_MOVE_ON(INDEX,NEXT) do { if (NEXT==3) {NEXT=0;INDEX++;} else {NEXT++; }} while (0); -+/* ---------------------------------------------------------------------- */ -+int /* turn level needed or -1 if not possible */ -+elan3_route_get_min_turn_level( ELAN_POSITION *pos, int nodeId) -+{ -+ int l,range = 1; -+ -+ for(l=pos->pos_levels-1;l>=0;l--) -+ { -+ range = range * pos->pos_arity[l]; -+ -+ if ( ((pos->pos_nodeid - (pos->pos_nodeid % range)) <= nodeId ) -+ && (nodeId <= (pos->pos_nodeid - (pos->pos_nodeid % range)+range -1))) -+ return l; -+ } -+ return -1; -+} -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_check(ELAN3_CTXT *ctxt, E3_uint16 *flits, int destNodeId) -+{ -+ ELAN3_NET_LOCATION lastLoc,currLoc; -+ int err; -+ int turnLevel; -+ int goingDown; -+ int lnk,index,next,val; -+ ELAN_POSITION *pos = &ctxt->Position; -+ -+ /* is the dest possible */ -+ if ( (destNodeId <0 ) || (destNodeId >= pos->pos_nodes)) -+ return (ELAN3_ROUTE_PROC_RANGE); -+ -+ /* -+ * walk the route, -+ * - to see if we get there -+ * - checking we dont turn around -+ */ -+ currLoc.netid = pos->pos_nodeid; /* the elan */ -+ currLoc.plane = 0; -+ currLoc.level = pos->pos_levels; -+ -+ turnLevel = currLoc.level; /* track the how far the route goes in */ -+ goingDown = 0; /* once set we cant go up again ie only one change of direction */ -+ -+ /* move onto the network from the elan */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,4)) != ELAN3_ROUTE_SUCCESS) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: initial elan3_route_follow_link failed\n"); -+ return err; -+ } -+ /* do the first part of flit */ -+ switch ( FIRST_GET_TYPE(flits[0]) ) -+ { -+ case 0 /* sent */ : { lnk = (flits[0] & 0x7); break; } -+ case PACKED_MYLINK : { lnk = pos->pos_nodeid % pos->pos_arity[pos->pos_levels-1]; break; } -+ case PACKED_ADAPTIVE : { lnk = 7; /* all routes are the same just check one */ break; } -+ default : -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_check: unexpected first flit (%d)\n",flits[0]); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ -+ /* move along this link and check new location */ -+ memcpy(&lastLoc,&currLoc,sizeof(ELAN3_NET_LOCATION)); /* keep track of last loc */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,lnk)) != ELAN3_ROUTE_SUCCESS ) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: elan3_route_follow_link failed\n"); -+ return err; -+ } -+ if ((currLoc.level > pos->pos_levels) || (currLoc.level < 0 )) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route leaves machine\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ if ( lastLoc.level < currLoc.level ) -+ { -+ turnLevel = lastLoc.level; -+ goingDown = 1; -+ } -+ else -+ { -+ if (turnLevel > currLoc.level) -+ turnLevel = currLoc.level; -+ if (goingDown) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route ocilated\n"); -+ return (ELAN3_ROUTE_OCILATES); -+ } -+ } -+ -+ /* loop on doing the remaining flits */ -+ index = 1; -+ next = FIRST_GET_NEXT(flits[0]); -+ val = PRF_GET_ROUTE(flits[index],next); -+ while(val) -+ { -+ if (PRF_GET_IS_NORMAL(val) ) -+ lnk = PRF_GET_NORMAL_LINK(val); -+ else -+ { -+ switch ( val ) -+ { -+ case PACKED_MYLINK : -+ { -+ lnk = elan3_route_get_mylink(pos, &currLoc,&lastLoc); -+ break; -+ } -+ default : -+ PRINTF1 (ctxt, DBG_VP, "elan3_route_check: unexpected packed flit (%d)\n",val); -+ return (ELAN3_ROUTE_INVALID); -+ } -+ } -+ -+ /* move along this link and check new location */ -+ memcpy(&lastLoc,&currLoc,sizeof(ELAN3_NET_LOCATION)); /* keep track of last loc */ -+ if ((err=elan3_route_follow_link(ctxt,&currLoc,lnk)) != ELAN3_ROUTE_SUCCESS) -+ return err; -+ -+ if ((currLoc.level > pos->pos_levels ) || ( currLoc.level < 0 )) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route leaves machine\n"); -+ return (ELAN3_ROUTE_INVALID_LEVEL); -+ } -+ -+ if ( lastLoc.level < currLoc.level ) -+ goingDown = 1; -+ else -+ { -+ if (turnLevel > currLoc.level) -+ turnLevel = currLoc.level; -+ if (goingDown) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route ocilated\n"); -+ return (ELAN3_ROUTE_OCILATES); -+ } -+ } -+ -+ /* move to next part of flit */ -+ PRF_MOVE_ON(index,next); -+ if ( index >= MAX_FLITS) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: route too long\n"); -+ return (ELAN3_ROUTE_TOO_LONG); -+ } -+ /* extract the new value */ -+ val = PRF_GET_ROUTE(flits[index],next); -+ } -+ -+ /* have we got to where we want ? */ -+ if ((currLoc.level != pos->pos_levels) || (currLoc.netid != destNodeId)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_route_check: goes to %d instead of %d\n",currLoc.netid , destNodeId ); -+ return (ELAN3_ROUTE_WRONG_DEST); -+ } -+ -+ /* -+ * there is the case of src == dest -+ * getTurnLevel returns pos->pos_levels, and turnLevel is (pos->pos_levels -1) -+ * then we assume they really want to go onto the network. -+ * otherwise we check that the turn at the appriate level -+ */ -+ if ( (pos->pos_nodeid != destNodeId) || ( turnLevel != (pos->pos_levels -1)) ) -+ { -+ int lev; -+ if ((lev = elan3_route_get_min_turn_level(pos,destNodeId)) == -1) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_route_check: cant calculate turn level\n"); -+ return (ELAN3_ROUTE_INVALID); /* not sure this can happen here as checks above should protect me */ -+ } -+ if (turnLevel != lev) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_route_check: turn level should be %d but is %d \n", lev, turnLevel); -+ return (ELAN3_ROUTE_TURN_LEVEL); -+ } -+ } -+ return (ELAN3_ROUTE_SUCCESS); -+} -+/* ---------------------------------------------------------------------- */ -+int -+elan3_route_broadcast_check(ELAN3_CTXT *ctxt , E3_uint16 *flits, int lowNode, int highNode ) -+{ -+ E3_uint16 flitsTmp[MAX_FLITS]; -+ int nflits,i; -+ -+ nflits = GenerateRoute (&ctxt->Position, flitsTmp, lowNode, highNode, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ for(i=0;i -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static sdramaddr_t -+AllocateLargeRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int ctxnum, E3_uint64 *smallRoute) -+{ -+ int bit = -1; -+ ELAN3_ROUTES *rent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ for (rent = tbl->LargeRoutes; rent; rent = rent->Next) -+ { -+ if ((bit = bt_freebit (rent->Bitmap, NROUTES_PER_BLOCK)) != -1) -+ break; -+ } -+ -+ if (bit == -1) /* No spare entries in large routes */ -+ { /* so allocate a new page */ -+ PRINTF0 (DBG_DEVICE, DBG_VP, "AllocateLargeRoute: allocate route entries\n"); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ -+ KMEM_ZALLOC(rent, ELAN3_ROUTES *, sizeof (ELAN3_ROUTES), TRUE); -+ -+ if (rent == (ELAN3_ROUTES *) NULL) -+ return ((sdramaddr_t) 0); -+ -+ rent->Routes = elan3_sdram_alloc (dev, PAGESIZE); -+ if (rent->Routes == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (rent, sizeof (ELAN3_ROUTES)); -+ return ((sdramaddr_t) 0); -+ } -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* Add to list of large routes */ -+ rent->Next = tbl->LargeRoutes; -+ tbl->LargeRoutes = rent; -+ -+ /* and use entry 0 */ -+ bit = 0; -+ } -+ -+ /* Set the bit in the bitmap to mark this route as allocated */ -+ BT_SET (rent->Bitmap, bit); -+ -+ /* And generate the small route pointer and the pointer to the large routes */ -+ (*smallRoute) = BIG_ROUTE_PTR(rent->Routes + (bit*NBYTES_PER_LARGE_ROUTE), ctxnum); -+ -+ PRINTF4 (DBG_DEVICE, DBG_VP, "AllocateLargeRoute: rent %p using entry %d at %lx with route pointer %llx\n", -+ rent, bit, rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE), (long long) (*smallRoute)); -+ -+ /* Invalidate the large route */ -+ elan3_sdram_zeroq_sdram (dev, rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE), NBYTES_PER_LARGE_ROUTE); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ -+ return (rent->Routes + (bit * NBYTES_PER_LARGE_ROUTE)); -+} -+ -+static void -+FreeLargeRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, E3_uint64 smallRoute) -+{ -+ E3_Addr addr = (E3_Addr) (smallRoute & ((1ULL << ROUTE_CTXT_SHIFT)-1)); -+ ELAN3_ROUTES *rent; -+ -+ PRINTF1 (DBG_DEVICE, DBG_VP, "FreeLargeRoute: free route %llx\n", (long long) smallRoute); -+ -+ ASSERT (SPINLOCK_HELD (&tbl->Lock)); -+ -+ for (rent = tbl->LargeRoutes; rent; rent = rent->Next) -+ { -+ if (rent->Routes <= addr && (rent->Routes + ROUTE_BLOCK_SIZE) > addr) -+ { -+ int indx = (addr - rent->Routes)/NBYTES_PER_LARGE_ROUTE; -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "FreeLargeRoute: rent=%p indx=%d\n", rent, indx); -+ -+ BT_CLEAR(rent->Bitmap, indx); -+ return; -+ } -+ } -+ -+ panic ("elan: FreeLargeRoute - route not found in large route tables"); -+} -+ -+static void -+FreeLargeRoutes (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl) -+{ -+ ELAN3_ROUTES *rent; -+ -+ while ((rent = tbl->LargeRoutes) != NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_VP, "FreeLargeRoutes: free rent %p\n", rent); -+ -+ tbl->LargeRoutes = rent->Next; -+ -+ elan3_sdram_free (dev, rent->Routes, PAGESIZE); -+ -+ KMEM_FREE (rent, sizeof(ELAN3_ROUTES)); -+ } -+} -+ -+int -+GetRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, E3_uint16 *flits) -+{ -+ E3_uint64 routeValue; -+ sdramaddr_t largeRouteOff; -+ -+ if (process < 0 || process >= tbl->Size) -+ return (EINVAL); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ if (routeValue & ROUTE_PTR) -+ { -+ largeRouteOff = (routeValue & ROUTE_PTR_MASK); -+ -+ routeValue = elan3_sdram_readq (dev, largeRouteOff + 0); -+ flits[0] = routeValue & 0xffff; -+ flits[1] = (routeValue >> 16) & 0xffff; -+ flits[2] = (routeValue >> 32) & 0xffff; -+ flits[3] = (routeValue >> 48) & 0xffff; -+ -+ routeValue = elan3_sdram_readq (dev, largeRouteOff + 8); -+ flits[4] = routeValue & 0xffff; -+ flits[5] = (routeValue >> 16) & 0xffff; -+ flits[6] = (routeValue >> 32) & 0xffff; -+ flits[6] = (routeValue >> 48) & 0xffff; -+ } -+ else -+ { -+ flits[0] = routeValue & 0xffff; -+ flits[1] = (routeValue >> 16) & 0xffff; -+ flits[2] = (routeValue >> 32) & 0xffff; -+ } -+ -+ return (ESUCCESS); -+} -+ -+ELAN3_ROUTE_TABLE * -+AllocateRouteTable (ELAN3_DEV *dev, int size) -+{ -+ ELAN3_ROUTE_TABLE *tbl; -+ -+ KMEM_ZALLOC (tbl, ELAN3_ROUTE_TABLE *, sizeof (ELAN3_ROUTE_TABLE), TRUE); -+ -+ if (tbl == (ELAN3_ROUTE_TABLE *) NULL) -+ return (NULL); -+ -+ tbl->Size = size; -+ tbl->Table = elan3_sdram_alloc (dev, size*NBYTES_PER_SMALL_ROUTE); -+ -+ if (tbl->Table == 0) -+ { -+ KMEM_FREE (tbl, sizeof (ELAN3_ROUTE_TABLE)); -+ return (NULL); -+ } -+ spin_lock_init (&tbl->Lock); -+ -+ /* zero the route table */ -+ elan3_sdram_zeroq_sdram (dev, tbl->Table, size*NBYTES_PER_SMALL_ROUTE); -+ -+ return (tbl); -+} -+ -+void -+FreeRouteTable (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl) -+{ -+ elan3_sdram_free (dev, tbl->Table, tbl->Size*NBYTES_PER_SMALL_ROUTE); -+ -+ FreeLargeRoutes (dev, tbl); -+ -+ spin_lock_destroy (&tbl->Lock); -+ -+ KMEM_FREE (tbl, sizeof (ELAN3_ROUTE_TABLE)); -+} -+ -+int -+LoadRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, int ctxnum, int nflits, E3_uint16 *flits) -+{ -+ E3_uint64 routeValue; -+ E3_uint64 largeRouteValue; -+ sdramaddr_t largeRouteOff; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return (EINVAL); -+ -+ PRINTF3 (DBG_DEVICE, DBG_VP, "LoadRoute: table %lx process %d ctxnum %x\n", tbl->Table ,process, ctxnum); -+ -+ if (nflits < 4) -+ { -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* See if we're replacing a "large" route */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ if (routeValue & ROUTE_PTR) -+ FreeLargeRoute (dev, tbl, routeValue); -+ -+ routeValue = SMALL_ROUTE(flits, ctxnum); -+ -+ if ( routeValue & ROUTE_PTR) -+ PRINTF0 (DBG_DEVICE, DBG_VP, "SHOULD BE A SMALL ROUTE !!!!!!!\n"); -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "LoadRoute: loading small route %d %llx\n", process, (long long) routeValue); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, routeValue); -+ } -+ else -+ { -+ E3_uint64 value0 = BIG_ROUTE0(flits); -+ E3_uint64 value1 = BIG_ROUTE1(flits); -+ -+ if ((largeRouteOff = AllocateLargeRoute (dev, tbl, ctxnum, &largeRouteValue)) == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ if ((routeValue & ROUTE_PTR) == 0) -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, largeRouteValue); -+ else -+ { -+ FreeLargeRoute (dev, tbl, largeRouteValue); -+ -+ largeRouteOff = (routeValue & ROUTE_PTR_MASK); -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_VP, "LoadRoute: loading large route %d - %llx %llx\n", process, -+ (long long) value0, (long long) value1); -+ -+ elan3_sdram_writeq (dev, largeRouteOff + 0, value0); -+ elan3_sdram_writeq (dev, largeRouteOff + 8, value1); -+ } -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+ return (ESUCCESS); -+} -+void -+InvalidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* unset ROUTE_VALID -+ * does not matter if its short or long, will check when we re-use it -+ */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, (routeValue & (~ROUTE_VALID))); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+void -+ValidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "ValidateRoute: table %ld process %d \n", tbl->Table ,process); -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ /* set ROUTE_VALID -+ */ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, (routeValue | ROUTE_VALID)); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+void -+ClearRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process) -+{ -+ E3_uint64 routeValue; -+ unsigned long flags; -+ -+ if (process < 0 || process >= tbl->Size) -+ return; -+ -+ spin_lock_irqsave (&tbl->Lock, flags); -+ -+ PRINTF2 (DBG_DEVICE, DBG_VP, "ClearRoute: table %ld process %d \n", tbl->Table ,process); -+ -+ routeValue = elan3_sdram_readq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE); -+ -+ elan3_sdram_writeq (dev, tbl->Table + process * NBYTES_PER_SMALL_ROUTE, 0); -+ -+ if (routeValue & ROUTE_PTR) -+ FreeLargeRoute (dev, tbl, routeValue); -+ -+ spin_unlock_irqrestore (&tbl->Lock, flags); -+} -+ -+static int -+ElanIdEqual (ELAN_POSITION *pos, int level, int ida, int idb) -+{ -+ int l; -+ -+ for (l = pos->pos_levels-1; l >= level; l--) -+ { -+ ida /= pos->pos_arity[l]; -+ idb /= pos->pos_arity[l]; -+ } -+ -+ return (ida == idb); -+} -+ -+static int -+RouteDown (ELAN_POSITION *pos, int level, int elanid) -+{ -+ int l; -+ -+ for (l = (pos->pos_levels - 1); level < pos->pos_levels - 1; level++, l--) -+ { -+ if ( pos->pos_arity[l] ) -+ elanid /= pos->pos_arity[l]; -+ } -+ elanid %= pos->pos_arity[l]; -+ -+ return elanid; -+} -+ -+static int -+InitPackedAndFlits (u_char *packed, E3_uint16 *flits) -+{ -+ int rb = 0; -+ -+ bzero ((caddr_t) packed, MAX_PACKED+4); -+ bzero ((caddr_t) flits, MAX_FLITS * sizeof (E3_uint16)); -+ -+ /* Initialise 4 bytes of packed, so that the "padding" */ -+ /* NEVER terminates with 00, as this is recognised as */ -+ /* as CRC flit */ -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ packed[rb++] = 0xF; -+ -+ return (rb); -+} -+ -+static int -+PackThemRoutesUp (E3_uint16 *flits, u_char *packed, int rb, int timeout, int highPri) -+{ -+ int i, nflits; -+ -+ flits[0] |= FIRST_TIMEOUT(timeout); -+ if (highPri) -+ flits[0] |= FIRST_HIGH_PRI; -+ -+ /* round up the number of route bytes to flits */ -+ /* and subtract the 4 extra we've padded out with */ -+ nflits = (rb-1)/4; -+ -+ for (i = nflits; i > 0; i--) -+ { -+ flits[i] = (packed[rb-1] << 12 | -+ packed[rb-2] << 8 | -+ packed[rb-3] << 4 | -+ packed[rb-4] << 0); -+ rb -= 4; -+ } -+ -+ /* Now set the position of the first packed route */ -+ /* byte in the 2nd 16 bit flit, taking account of the */ -+ /* 4 byte padding */ -+ flits[0] |= FIRST_PACKED (4-rb); -+ -+ return (nflits+1); -+} -+ -+int -+GenerateRoute (ELAN_POSITION *pos, E3_uint16 *flits, int lowid, int highid, int timeout, int highPri) -+{ -+ int broadcast = (lowid != highid); -+ int rb = 0; -+ int first = 1; -+ int noRandom = 0; -+ int level; -+ u_char packed[MAX_PACKED+4]; -+ int numDownLinks; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ for (level = pos->pos_levels-1; /* Move up out of the elan */ -+ level > 0 && ! (ElanIdEqual (pos, level, pos->pos_nodeid, lowid) && -+ ElanIdEqual (pos, level, pos->pos_nodeid, highid)); level--) -+ { -+ noRandom |= pos->pos_random_disabled & (1 << (pos->pos_levels-1-level)); -+ } -+ -+ for (level = pos->pos_levels-1; /* Move up out of the elan */ -+ level > 0 && ! (ElanIdEqual (pos, level, pos->pos_nodeid, lowid) && -+ ElanIdEqual (pos, level, pos->pos_nodeid, highid)); level--) -+ { -+ numDownLinks = pos->pos_arity [level]; -+ if (first) -+ { -+ if (broadcast || noRandom) -+ flits[0] = FIRST_BCAST_TREE; -+ else -+ { -+ if (numDownLinks == 4) -+ flits[0] = FIRST_ADAPTIVE; -+ else -+ flits[0] = FIRST_ROUTE( numDownLinks + ( lowid % (8-numDownLinks) )); -+ } -+ first = 0; -+ } -+ else -+ { -+ if (broadcast || noRandom) -+ packed[rb++] = PACKED_BCAST_TREE; -+ else -+ { -+ if (numDownLinks == 4) -+ packed[rb++] = PACKED_ADAPTIVE; -+ else -+ packed[rb++] = PACKED_ROUTE( numDownLinks + ( lowid % (8-numDownLinks) )); -+ } -+ } -+ } -+ -+ while (level < pos->pos_levels) -+ { -+ int lowRoute = RouteDown (pos, level, lowid); -+ int highRoute = RouteDown (pos, level, highid); -+ -+ if (first) -+ { -+ if (broadcast) -+ flits[0] = FIRST_BCAST(highRoute, lowRoute); -+ else -+ flits[0] = FIRST_ROUTE(lowRoute); -+ -+ first = 0; -+ } -+ else -+ { -+ if (broadcast) -+ { -+ packed[rb++] = PACKED_BCAST0(highRoute, lowRoute); -+ packed[rb++] = PACKED_BCAST1(highRoute, lowRoute); -+ } -+ else -+ packed[rb++] = PACKED_ROUTE(lowRoute); -+ } -+ -+ level++; -+ } -+ -+#ifdef ELITE_REVA_SUPPORTED -+ if (broadcast && (pos->pos_levels == 3)) -+ { -+ packed[rb++] = PACKED_BCAST0(0, 0); -+ packed[rb++] = PACKED_BCAST1(0, 0); -+ } -+#endif -+ -+ return (PackThemRoutesUp (flits, packed, rb, timeout, highPri)); -+} -+ -+int -+GenerateCheckRoute (ELAN_POSITION *pos, E3_uint16 *flits, int level, int adaptive) -+{ -+ int notfirst = 0; -+ int l, rb; -+ u_char packed[MAX_PACKED+4]; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ for (l = pos->pos_levels-1; l > level; l--) -+ if (! notfirst++) -+ flits[0] = adaptive ? FIRST_ADAPTIVE : FIRST_BCAST_TREE; -+ else -+ packed[rb++] = adaptive ? PACKED_ADAPTIVE : PACKED_BCAST_TREE; -+ -+ if (! notfirst++ ) -+ flits[0] = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ for (l++ /* consume mylink */; l < pos->pos_levels; l++) -+ if (! notfirst++) -+ flits[0] = FIRST_ROUTE (RouteDown (pos, l, pos->pos_nodeid)); -+ else -+ packed[rb++] = PACKED_ROUTE (RouteDown (pos, l, pos->pos_nodeid)); -+ -+ -+ return (PackThemRoutesUp (flits, packed, rb, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY)); -+} -+ -+ -+/* -+ * In this case "level" is the number of levels counted from the bottom. -+ */ -+int -+GenerateProbeRoute (E3_uint16 *flits, int nodeid, int level, int *linkup, int *linkdown, int adaptive ) -+{ -+ int first = 1; -+ int i, rb; -+ u_char packed[MAX_PACKED+4]; -+ -+ rb = InitPackedAndFlits (packed, flits); -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < level; i++) -+ { -+ if (first) -+ flits[0] = linkup ? FIRST_ROUTE(linkup[i]) : adaptive ? FIRST_ADAPTIVE : FIRST_BCAST_TREE; -+ else -+ packed[rb++] = linkup ? PACKED_ROUTE(linkup[i]) : adaptive ? PACKED_ADAPTIVE : PACKED_BCAST_TREE; -+ first = 0; -+ } -+ -+ /* Generate a "to-me" route down */ -+ if (first) -+ flits[0] = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ for (i = level-1; i >= 0; i--) -+ packed[rb++] = PACKED_ROUTE(linkdown[i]); -+ -+ return (PackThemRoutesUp (flits, packed, rb, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/sdram.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/sdram.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/sdram.c 2005-07-28 14:52:52.814683904 -0400 -@@ -0,0 +1,807 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: sdram.c,v 1.17 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/sdram.c,v $*/ -+ -+ -+#include -+ -+#include -+#include -+#include -+ -+/* sdram access functions */ -+#define sdram_off_to_bank(dev,off) (&dev->SdramBanks[(off) >> ELAN3_SDRAM_BANK_SHIFT]) -+#define sdram_off_to_offset(dev,off) ((off) & (ELAN3_SDRAM_BANK_SIZE-1)) -+#define sdram_off_to_bit(dev,indx,off) (sdram_off_to_offset(dev,off) >> (SDRAM_MIN_BLOCK_SHIFT+(indx))) -+ -+#define sdram_off_to_mapping(dev,off) (sdram_off_to_bank(dev,off)->Mapping + sdram_off_to_offset(dev,off)) -+ -+unsigned char -+elan3_sdram_readb (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readb ((unsigned char *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned short -+elan3_sdram_readw (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readw ((unsigned short *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned int -+elan3_sdram_readl (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readl ((unsigned int *) sdram_off_to_mapping(dev, off))); -+} -+ -+unsigned long long -+elan3_sdram_readq (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+ return (readq ((unsigned long long *) sdram_off_to_mapping(dev, off))); -+} -+ -+void -+elan3_sdram_writeb (ELAN3_DEV *dev, sdramaddr_t off, unsigned char val) -+{ -+ writeb (val, (unsigned char *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writew (ELAN3_DEV *dev, sdramaddr_t off, unsigned short val) -+{ -+ writew (val, (unsigned short *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writel (ELAN3_DEV *dev, sdramaddr_t off, unsigned int val) -+{ -+ writel (val, (unsigned int *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_writeq (ELAN3_DEV *dev, sdramaddr_t off, unsigned long long val) -+{ -+ writeq (val, (unsigned long long *) sdram_off_to_mapping(dev, off)); -+ wmb(); -+} -+ -+void -+elan3_sdram_copyb_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+} -+ -+void -+elan3_sdram_copyw_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+void -+elan3_sdram_copyl_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+void -+elan3_sdram_copyq_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+#ifdef __LITTLE_ENDIAN__ -+ bcopy ((void *)sdram_off_to_mapping(dev, from), to, nbytes); -+#else -+#error incorrect for big endian -+#endif -+} -+ -+#define E3_WRITEBUFFER_SIZE 16 -+#define E3_WRITEBUFFER_OFFSET(x) (((unsigned long) x) & (E3_WRITEBUFFER_SIZE-1)) -+#define E3_WRITEBUFFER_BASE(x) (((unsigned long) x) & ~((unsigned long) (E3_WRITEBUFFER_SIZE-1))) -+ -+void -+elan3_sdram_copyb_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint8_t)) + sizeof (uint8_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) from)[i], &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) slim)[i], &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ for (i = 0; i < E3_WRITEBUFFER_SIZE/sizeof (uint8_t); i++) -+ writeb (((uint8_t *) slim)[i], &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint8_t); i++) -+ writeb (((uint8_t *) from)[i], &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerob_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint8_t)) + sizeof (uint8_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint8_t); i++) -+ writeb (0, &((uint8_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyw_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint16_t)) + sizeof (uint16_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint16_t); i++) -+ writew (((uint16_t *) from)[i], &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint16_t); i++) -+ writew (((uint16_t *) slim)[i], &((uint16_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writew (((uint16_t *) slim)[0], &((uint16_t *) dlim)[0]); -+ writew (((uint16_t *) slim)[1], &((uint16_t *) dlim)[1]); -+ writew (((uint16_t *) slim)[2], &((uint16_t *) dlim)[2]); -+ writew (((uint16_t *) slim)[3], &((uint16_t *) dlim)[3]); -+ writew (((uint16_t *) slim)[4], &((uint16_t *) dlim)[4]); -+ writew (((uint16_t *) slim)[5], &((uint16_t *) dlim)[5]); -+ writew (((uint16_t *) slim)[6], &((uint16_t *) dlim)[6]); -+ writew (((uint16_t *) slim)[7], &((uint16_t *) dlim)[7]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint16_t); i++) -+ writew (((uint16_t *) from)[i], &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerow_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint16_t)) + sizeof (uint16_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint16_t); i++) -+ writew (0, &((uint16_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyl_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint32_t)) + sizeof (uint32_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint32_t); i++) -+ writel (((uint32_t *) from)[i], &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint32_t); i++) -+ writel (((uint32_t *) slim)[i], &((uint32_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writel (((uint32_t *) slim)[0], &((uint32_t *) dlim)[0]); -+ writel (((uint32_t *) slim)[1], &((uint32_t *) dlim)[1]); -+ writel (((uint32_t *) slim)[2], &((uint32_t *) dlim)[2]); -+ writel (((uint32_t *) slim)[3], &((uint32_t *) dlim)[3]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint32_t); i++) -+ writel (((uint32_t *) from)[i], &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zerol_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint32_t)) + sizeof (uint32_t); -+ int i; -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ for (i = 0; i < nbytes/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ for (i = 0; i < ntop/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dlim)[i]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ for (i = 0; i < nbase/sizeof(uint32_t); i++) -+ writel (0, &((uint32_t *) dbase)[i]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_copyq_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ virtaddr_t slim = (virtaddr_t) from + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint64_t)) + sizeof (uint64_t); -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ writeq (((uint64_t *) from)[0], &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ slim -= ntop; -+ dlim -= ntop; -+ -+ writeq (((uint64_t *) slim)[0], &((uint64_t *) dlim)[0]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ slim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (((uint64_t *) slim)[0], &((uint64_t *) dlim)[0]); -+ writeq (((uint64_t *) slim)[1], &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ writeq (((uint64_t *) from)[0], &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ } -+} -+ -+void -+elan3_sdram_zeroq_sdram (ELAN3_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ virtaddr_t dbase = (virtaddr_t) sdram_off_to_mapping (dev, to); -+ virtaddr_t dlim = (virtaddr_t) dbase + nbytes; -+ unsigned nbase = E3_WRITEBUFFER_SIZE - E3_WRITEBUFFER_OFFSET (dbase); -+ unsigned ntop = E3_WRITEBUFFER_OFFSET (dlim - sizeof (uint64_t)) + sizeof (uint64_t); -+ -+ if (E3_WRITEBUFFER_BASE(dbase) == E3_WRITEBUFFER_BASE(dlim)) -+ { -+ writeq (0, &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ else -+ { -+ if (ntop < E3_WRITEBUFFER_SIZE) -+ { -+ dlim -= ntop; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ wmb(); -+ } -+ -+ while (dlim >= (dbase + E3_WRITEBUFFER_SIZE)) -+ { -+ dlim -= E3_WRITEBUFFER_SIZE; -+ -+ writeq (0, &((uint64_t *) dlim)[0]); -+ writeq (0, &((uint64_t *) dlim)[1]); -+ wmb(); -+ } -+ -+ if (nbase < E3_WRITEBUFFER_SIZE) -+ { -+ writeq (0, &((uint64_t *) dbase)[0]); -+ wmb(); -+ } -+ } -+} -+ -+physaddr_t -+elan3_sdram_to_phys (ELAN3_DEV *dev, sdramaddr_t off) -+{ -+#if defined(DIGITAL_UNIX) -+ return (KSEG_TO_PHYS (sdram_off_to_mapping (dev, off))); -+#elif defined(LINUX) -+ return (kmem_to_phys ((void *) sdram_off_to_mapping (dev, off))); -+#endif -+} -+ -+/* sdram buddy allocator */ -+#define read_next(dev, block) elan3_sdram_readl(dev, block + 0) -+#define read_prev(dev, block) elan3_sdram_readl(dev, block + 4) -+#define write_next(dev, block, val) (elan3_sdram_writel(dev, block + 0, val), val) -+#define write_prev(dev, block, val) (elan3_sdram_writel(dev, block + 4, val), val) -+ -+#define freelist_insert(dev,idx,block)\ -+do {\ -+ sdramaddr_t next = dev->SdramFreeLists[(idx)];\ -+\ -+ /*\ -+ * block->prev = NULL;\ -+ * block->next = next;\ -+ * if (next != NULL)\ -+ * next->prev = block;\ -+ * freelist = block;\ -+ */\ -+ write_prev (dev, block, (sdramaddr_t) 0);\ -+ write_next (dev, block, next);\ -+ if (next != (sdramaddr_t) 0)\ -+ write_prev (dev, next, block);\ -+ dev->SdramFreeLists[idx] = block;\ -+\ -+ dev->SdramFreeCounts[idx]++;\ -+ dev->Stats.SdramBytesFree += (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#define freelist_remove(dev,idx,block)\ -+do {\ -+ /*\ -+ * if (block->prev)\ -+ * block->prev->next = block->next;\ -+ * else\ -+ * dev->SdramFreeLists[idx] = block->next;\ -+ * if (block->next)\ -+ * block->next->prev = block->prev;\ -+ */\ -+ sdramaddr_t blocknext = read_next (dev, block);\ -+ sdramaddr_t blockprev = read_prev (dev, block);\ -+\ -+ if (blockprev)\ -+ write_next (dev, blockprev, blocknext);\ -+ else\ -+ dev->SdramFreeLists[idx] = blocknext;\ -+ if (blocknext)\ -+ write_prev (dev, blocknext, blockprev);\ -+\ -+ dev->SdramFreeCounts[idx]--;\ -+ dev->Stats.SdramBytesFree -= (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#define freelist_removehead(dev,idx,block)\ -+do {\ -+ sdramaddr_t blocknext = read_next (dev, block);\ -+\ -+ if ((dev->SdramFreeLists[idx] = blocknext) != 0)\ -+ write_prev (dev, blocknext, 0);\ -+\ -+ dev->SdramFreeCounts[idx]--;\ -+ dev->Stats.SdramBytesFree -= (SDRAM_MIN_BLOCK_SIZE << idx);\ -+} while (0) -+ -+#if defined(DEBUG) -+static int -+display_blocks (ELAN3_DEV *dev, int indx, char *string) -+{ -+ sdramaddr_t block; -+ int nbytes = 0; -+ -+ printk ("%s - indx %d\n", string, indx); -+ for (block = dev->SdramFreeLists[indx]; block != (sdramaddr_t) 0; block = read_next (dev, block)) -+ { -+ printk (" %lx", block); -+ nbytes += (SDRAM_MIN_BLOCK_SIZE << indx); -+ } -+ printk ("\n"); -+ -+ return (nbytes); -+} -+ -+ -+void -+elan3_sdram_display (ELAN3_DEV *dev, char *string) -+{ -+ int indx; -+ int nbytes = 0; -+ -+ printk ("elan3_sdram_display: dev=%p\n", dev); -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ if (dev->SdramFreeLists[indx] != (sdramaddr_t) 0) -+ nbytes += display_blocks (dev, indx, string); -+ printk ("\n%d bytes free\n", nbytes); -+} -+ -+void -+elan3_sdram_verify (ELAN3_DEV *dev) -+{ -+ int indx, size, nbits, i, b; -+ sdramaddr_t block; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ unsigned count = 0; -+ -+ for (block = dev->SdramFreeLists[indx]; block; block = read_next (dev, block), count++) -+ { -+ ELAN3_SDRAM_BANK *bank = sdram_off_to_bank (dev, block); -+ unsigned off = sdram_off_to_offset (dev, block); -+ int bit = sdram_off_to_bit (dev, indx, block); -+ -+ if ((block & (size-1)) != 0) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - not aligned\n", block, indx); -+ -+ if (bank == NULL || off > bank->Size) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - outside bank\n", block, indx); -+ else if (BT_TEST (bank->Bitmaps[indx], bit) == 0) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - bit not set\n", block, indx); -+ else -+ { -+ for (i = indx-1, nbits = 2; i >= 0; i--, nbits <<= 1) -+ { -+ bit = sdram_off_to_bit (dev, i, block); -+ -+ for (b = 0; b < nbits; b++) -+ if (BT_TEST(bank->Bitmaps[i], bit + b)) -+ printk ("elan3_sdram_verify: block=%lx indx=%x - also free i=%d bit=%x\n", block, indx, i, bit+b); -+ } -+ } -+ } -+ -+ if (dev->SdramFreeCounts[indx] != count) -+ printk ("elan3_sdram_verify: indx=%x expected %d got %d\n", indx, dev->SdramFreeCounts[indx], count); -+ } -+} -+ -+#endif /* defined(DEBUG) */ -+ -+static void -+free_block (ELAN3_DEV *dev, sdramaddr_t block, int indx) -+{ -+ ELAN3_SDRAM_BANK *bank = sdram_off_to_bank (dev, block); -+ unsigned bit = sdram_off_to_bit(dev, indx, block); -+ unsigned size = SDRAM_MIN_BLOCK_SIZE << indx; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: block=%lx indx=%d bit=%x\n", block, indx, bit); -+ -+ ASSERT ((block & (size-1)) == 0); -+ ASSERT (BT_TEST (bank->Bitmaps[indx], bit) == 0); -+ -+ while (BT_TEST (bank->Bitmaps[indx], bit ^ 1)) -+ { -+ sdramaddr_t buddy = block ^ size; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: merge block=%lx buddy=%lx indx=%d\n", block, buddy, indx); -+ -+ BT_CLEAR (bank->Bitmaps[indx], bit ^ 1); -+ -+ freelist_remove (dev, indx, buddy); -+ -+ block = (block < buddy) ? block : buddy; -+ indx++; -+ size <<= 1; -+ bit >>= 1; -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: free block=%lx indx=%d bit=%x\n", block, indx, bit); -+ -+ freelist_insert (dev, indx, block); -+ -+ BT_SET (bank->Bitmaps[indx], bit); -+} -+ -+void -+elan3_sdram_init (ELAN3_DEV *dev) -+{ -+ int indx; -+ -+ spin_lock_init (&dev->SdramLock); -+ -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ { -+ dev->SdramFreeLists[indx] = (sdramaddr_t) 0; -+ dev->SdramFreeCounts[indx] = 0; -+ } -+} -+ -+void -+elan3_sdram_fini (ELAN3_DEV *dev) -+{ -+ spin_lock_destroy (&dev->SdramLock); -+} -+ -+void -+elan3_sdram_add (ELAN3_DEV *dev, sdramaddr_t base, sdramaddr_t top) -+{ -+ register int indx; -+ register unsigned long size; -+ -+ /* align to the minimum block size */ -+ base = (base + SDRAM_MIN_BLOCK_SIZE - 1) & ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ top &= ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ -+ /* don't allow 0 as a valid "base" */ -+ if (base == 0) -+ base = E3_CACHE_SIZE; -+ -+ /* carve the bottom to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((base & size) == 0) -+ continue; -+ -+ if ((base + size) > top) -+ break; -+ -+ free_block (dev, base, indx); -+ -+ base += size; -+ } -+ -+ /* carve the top down to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((top & size) == 0) -+ continue; -+ -+ if ((top - size) < base) -+ break; -+ -+ free_block (dev, (top - size), indx); -+ -+ top -= size; -+ } -+ -+ /* now free of the space in between */ -+ while (base < top) -+ { -+ free_block (dev, base, (SDRAM_NUM_FREE_LISTS-1)); -+ -+ base += SDRAM_MAX_BLOCK_SIZE; -+ } -+} -+ -+sdramaddr_t -+elan3_sdram_alloc (ELAN3_DEV *dev, int nbytes) -+{ -+ sdramaddr_t block; -+ register int i, indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->SdramLock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: nbytes=%d indx=%d\n", nbytes, indx); -+ -+ /* find the smallest block which is big enough for this allocation */ -+ for (i = indx; i < SDRAM_NUM_FREE_LISTS; i++, size <<= 1) -+ if (dev->SdramFreeLists[i]) -+ break; -+ -+ if (i == SDRAM_NUM_FREE_LISTS) -+ { -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+ return ((sdramaddr_t) 0); -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: use block=%lx indx=%d\n", dev->SdramFreeLists[i], i); -+ -+ /* remove the block from the free list */ -+ freelist_removehead (dev, i, (block = dev->SdramFreeLists[i])); -+ -+ /* clear the approriate bit in the bitmap */ -+ BT_CLEAR (sdram_off_to_bank (dev, block)->Bitmaps[i], sdram_off_to_bit (dev,i, block)); -+ -+ /* and split it up as required */ -+ while (i-- > indx) -+ free_block (dev, block + (size >>= 1), i); -+ -+ PRINTF1 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_alloc: return block=%lx\n", block); -+ -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+ -+ ASSERT ((block & ((SDRAM_MIN_BLOCK_SIZE << (indx))-1)) == 0); -+ -+ return ((sdramaddr_t) block); -+} -+ -+void -+elan3_sdram_free (ELAN3_DEV *dev, sdramaddr_t block, int nbytes) -+{ -+ register int indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->SdramLock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan3_sdram_free: indx=%d block=%lx\n", indx, block); -+ -+ free_block (dev, block, indx); -+ -+ spin_unlock_irqrestore (&dev->SdramLock, flags); -+} -+ -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/tproc.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/tproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/tproc.c 2005-07-28 14:52:52.815683752 -0400 -@@ -0,0 +1,778 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: tproc.c,v 1.51.2.1 2004/11/15 11:12:36 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tproc.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+int -+HandleTProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits) -+{ -+ THREAD_TRAP *trap = dev->ThreadTrap; -+ int delay = 1; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ trap->Status.Status = read_reg32 (dev, Exts.TProcStatus); -+ trap->sp = read_reg32 (dev, Thread_Desc_SP); -+ trap->pc = read_reg32 (dev, ExecutePC); -+ trap->npc = read_reg32 (dev, ExecuteNPC); -+ trap->StartPC = read_reg32 (dev, StartPC); -+ trap->mi = GET_STATUS_TRAPTYPE(trap->Status); -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ trap->DirtyBits.Bits = read_reg32 (dev, DirtyBits.Bits); -+ -+ if ( ! (trap->Status.s.WakeupFunction == SleepOneTick) ) { -+ int p,i; -+ E3_uint32 reg = read_reg32 (dev, Exts.InterruptReg); -+ -+ ELAN_REG_REC(reg); -+ p = elan_reg_rec_index; -+ for(i=0;iStatus.s.WakeupFunction == SleepOneTick); -+ -+ /* copy the four access fault areas */ -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), (void *) &trap->FaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), (void *) &trap->DataFaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), (void *) &trap->InstFaultSave, 16); -+ elan3_sdram_copyq_from_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), (void *) &trap->OpenFaultSave, 16); -+ -+ /* copy the registers, note the endian swap flips the odd registers into the even registers -+ and visa versa. */ -+ copy_thread_regs (dev, trap->Registers); -+ -+ /* -+ * If the output was open then the ack may not have returned yet. Must wait for the -+ * ack to become valid and update trap_dirty with the new value. Will simulate the -+ * instructions later. -+ */ -+ if (trap->TrapBits.s.OutputWasOpen) -+ { -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ while (! trap->TrapBits.s.AckBufferValid) -+ { -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "tproc: waiting for ack to become valid\n"); -+ trap->TrapBits.Bits = read_reg32 (dev, TrapBits.Bits); -+ DELAY (delay); -+ -+ if ((delay <<= 1) == 0) delay = 1; -+ } -+ } -+ -+ /* update device statistics */ -+ BumpStat (dev, TProcTraps); -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ BumpStat (dev, ForcedTProcTraps); -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ if (trap->TrapBits.s.PacketTimeout) -+ BumpStat (dev, ThreadOutputTimeouts); -+ else if (trap->TrapBits.s.PacketAckValue == E3_PAckError) -+ BumpStat (dev, ThreadPacketAckErrors); -+ } -+ if (trap->TrapBits.s.TrapForTooManyInsts) -+ BumpStat (dev, TrapForTooManyInsts); -+ break; -+ } -+ -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, TProc), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcData), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcInst), 16); -+ elan3_sdram_zeroq_sdram (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, ThreadProcOpen), 16); -+ -+ *RestartBits |= RestartTProc; -+ -+ return (TRUE); -+} -+ -+void -+DeliverTProcTrap (ELAN3_DEV *dev, THREAD_TRAP *threadTrap, E3_uint32 Pend) -+{ -+ ELAN3_CTXT *ctxt; -+ THREAD_TRAP *trap; -+ -+ ASSERT(SPINLOCK_HELD (&dev->IntrLock)); -+ -+ ctxt = ELAN3_DEV_CTX_TABLE(dev, threadTrap->Status.s.Context); -+ -+ if (ctxt == NULL) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "DeliverTProcTrap: context %x invalid\n", threadTrap->Status.s.Context); -+ BumpStat (dev, InvalidContext); -+ } -+ else -+ { -+ if (ELAN3_OP_TPROC_TRAP (ctxt, threadTrap) == OP_DEFER) -+ { -+ if (ELAN3_QUEUE_REALLY_FULL (ctxt->ThreadTrapQ)) -+ { -+ ctxt->Status |= CTXT_COMMAND_OVERFLOW_ERROR; -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ else -+ { -+ trap = ELAN3_QUEUE_BACK (ctxt->ThreadTrapQ, ctxt->ThreadTraps); -+ -+ bcopy (threadTrap, trap, sizeof (THREAD_TRAP)); -+ -+ PRINTF4 (ctxt, DBG_INTR, "DeliverTProcTrap: SP=%08x PC=%08x NPC=%08x StartPC %08x\n", -+ trap->sp, trap->pc, trap->npc, trap->StartPC); -+ PRINTF3 (ctxt, DBG_INTR, " mi=%s trap=%08x dirty=%08x\n", -+ MiToName (trap->mi), trap->TrapBits.Bits, trap->DirtyBits.Bits); -+ PRINTF3 (ctxt, DBG_INTR, " FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_INTR, " OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_INTR, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_INTR, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ ELAN3_QUEUE_ADD (ctxt->ThreadTrapQ); -+ kcondvar_wakeupone (&ctxt->Wait, &dev->IntrLock); -+ -+ if (ELAN3_QUEUE_FULL (ctxt->ThreadTrapQ)) -+ { -+ PRINTF0 (ctxt, DBG_INTR, "DeliverTProcTrap: thread queue full, must swap out\n"); -+ ctxt->Status |= CTXT_THREAD_QUEUE_FULL; -+ -+ StartSwapoutContext (ctxt, Pend, NULL); -+ } -+ } -+ } -+ } -+} -+ -+int -+NextTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ -+ ASSERT (SPINLOCK_HELD (&dev->IntrLock)); -+ -+ if (ELAN3_QUEUE_EMPTY (ctxt->ThreadTrapQ)) -+ return (0); -+ -+ *trap = *ELAN3_QUEUE_FRONT (ctxt->ThreadTrapQ, ctxt->ThreadTraps); -+ ELAN3_QUEUE_REMOVE (ctxt->ThreadTrapQ); -+ -+ return (1); -+} -+ -+void -+ResolveTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ int i; -+ int res; -+ E3_Addr StackPointer; -+ -+ PRINTF4 (ctxt, DBG_TPROC, "ResolveTProcTrap: SP=%08x PC=%08x NPC=%08x StartPC %08x\n", -+ trap->sp, trap->pc, trap->npc, trap->StartPC); -+ PRINTF3 (ctxt, DBG_TPROC, " mi=%s trap=%08x dirty=%08x\n", -+ MiToName (trap->mi), trap->TrapBits.Bits, trap->DirtyBits.Bits); -+ PRINTF3 (ctxt, DBG_TPROC, " FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ PRINTF3 (ctxt, DBG_TPROC, " OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ PRINTF4 (ctxt, DBG_TPROC, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ -+ BumpUserStat (ctxt, TProcTraps); -+ -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ { -+ /* -+ * This occurs if the threads processor trapped. All other cases will be for the ucode -+ * thread trapping. -+ */ -+ int restart = 1; -+ int skip = 0; -+ -+ PRINTF1 (ctxt, DBG_TPROC, "TProc: Mi=Unimp. Using trap->TrapBits=%x\n", trap->TrapBits.Bits); -+ -+ /* -+ * Data Access Exception. -+ */ -+ if (trap->TrapBits.s.DataAccessException) -+ { -+ ASSERT (CTXT_IS_KERNEL(ctxt) || trap->DataFaultSave.s.FSR.Status == 0 || -+ ctxt->Capability.cap_mycontext == trap->DataFaultSave.s.FaultContext); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: DataAccessException %08x\n", trap->DataFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->DataFaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed for data %08x\n", -+ trap->DataFaultSave.s.FaultAddress); -+ -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->DataFaultSave, res) != OP_IGNORE) -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Instruction Access Exception. -+ */ -+ if (trap->TrapBits.s.InstAccessException) -+ { -+ ASSERT (CTXT_IS_KERNEL (ctxt) || trap->InstFaultSave.s.FSR.Status == 0 || -+ ctxt->Capability.cap_mycontext == trap->InstFaultSave.s.FaultContext); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: InstAccessException %08x\n", trap->InstFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->InstFaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed for inst %08x\n", -+ trap->InstFaultSave.s.FaultAddress); -+ -+ ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->InstFaultSave, res); -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Forced TProc trap/Unimplemented instruction -+ * -+ * If there is a force tproc trap then don't look at -+ * the unimplemented instruction bit - since it can -+ * be set in obscure circumstances. -+ */ -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: forced tproc trap, restarting\n"); -+ else if (trap->TrapBits.s.Unimplemented) -+ { -+ E3_uint32 instr = ELAN3_OP_LOAD32 (ctxt, trap->pc & PC_MASK); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: unimplemented instruction %08x\n", instr); -+ -+ if ((instr & OPCODE_MASK) == OPCODE_Ticc && -+ (instr & OPCODE_IMM) == OPCODE_IMM && -+ (Ticc_COND(instr) == Ticc_TA)) -+ { -+ switch (INSTR_IMM(instr)) -+ { -+ case ELAN3_ELANCALL_TRAPNUM: -+ /* -+ * Since the thread cannot easily access the global variable which holds -+ * the elan system call number, we provide a different trap for the elan -+ * system call, and copy the system call number into %g1 before calling -+ * ThreadSyscall(). -+ */ -+ BumpUserStat (ctxt, ThreadElanCalls); -+ -+ if (ThreadElancall (ctxt, trap, &skip) != ESUCCESS) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_SYSCALL, THREAD_PROC, trap); -+ restart = 0; -+ } -+ break; -+ -+ case ELAN3_SYSCALL_TRAPNUM: -+ BumpUserStat (ctxt, ThreadSystemCalls); -+ -+ if (ThreadSyscall (ctxt, trap, &skip) != ESUCCESS) -+ { -+ ElanException (ctxt, EXCEPTION_BAD_SYSCALL, THREAD_PROC, trap); -+ restart = 0; -+ } -+ break; -+ -+ case ELAN3_DEBUG_TRAPNUM: -+ ElanException (ctxt, EXCEPTION_DEBUG, THREAD_PROC, trap); -+ skip = 1; -+ break; -+ -+ case ELAN3_ABORT_TRAPNUM: -+ default: -+ ElanException (ctxt, EXCEPTION_UNIMP_INSTR, THREAD_PROC, trap, instr); -+ restart = 0; -+ break; -+ } -+ -+ } -+ else -+ { -+ ElanException (ctxt, EXCEPTION_UNIMP_INSTR, THREAD_PROC, trap, instr); -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Faulted fetching routes. -+ */ -+ if (trap->TrapBits.s.OpenRouteFetch) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: OpenRouteFetch %08x\n", trap->OpenFaultSave.s.FaultAddress); -+ -+ if ((res = ResolveVirtualProcess (ctxt, trap->OpenFaultSave.s.FaultAddress)) != ESUCCESS && -+ ElanException (ctxt, EXCEPTION_INVALID_PROCESS, THREAD_PROC, trap, trap->DataFaultSave.s.FaultAddress, res) != OP_IGNORE) -+ { -+ restart = 0; -+ } -+ else if (RollThreadToClose (ctxt, trap, E3_PAckDiscard) != ESUCCESS) /* Force a discard */ -+ { -+ restart = 0; -+ } -+ } -+ -+ /* -+ * Thread Timeout -+ */ -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ if (ElanException (ctxt, EXCEPTION_PACKET_TIMEOUT, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ else -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: timeout or PAckError!\n"); -+ -+ /* Might deschedule the thread for a while or mark the link error here. */ -+ if (! trap->TrapBits.s.OutputWasOpen && RollThreadToClose (ctxt, trap, trap->TrapBits.s.PacketAckValue) != ESUCCESS) -+ { -+ restart = 0; -+ } -+ } -+ } -+ -+ /* -+ * Open exception -+ */ -+ if (trap->TrapBits.s.OpenException) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: open exception\n"); -+ if (ElanException (ctxt, EXCEPTION_THREAD_KILLED, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ } -+ -+ /* -+ * Too many instructions. -+ */ -+ if (trap->TrapBits.s.TrapForTooManyInsts) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: too many instructions\n"); -+ if (ElanException (ctxt, EXCEPTION_THREAD_KILLED, THREAD_PROC, trap) != OP_IGNORE) -+ restart = 0; -+ } -+ -+ if (restart) -+ { -+ /* -+ * If the output was open when the trap was taken then the trap code must move -+ * the PC on past the close instruction and simulate the effect of all the instructions -+ * that do not output onto the link. The value of the ack received is then used to -+ * simulate the close instruction. -+ */ -+ if (trap->TrapBits.s.OutputWasOpen && RollThreadToClose(ctxt, trap, trap->TrapBits.s.PacketAckValue) != ESUCCESS) -+ { -+ /* -+ * Don't restart if we couldn't roll it forweards -+ * to a close instruction. -+ */ -+ break; -+ } -+ -+ /* -+ * We must check back 3 instructions from the PC, and if we see the -+ * c_close_cookie() sequence then we must execute the instructions to -+ * the end of it. -+ */ -+ /* XXXX: code to be written */ -+ -+ StackPointer = SaveThreadToStack (ctxt, trap, skip); -+ -+ ReissueStackPointer (ctxt, StackPointer); -+ } -+ -+ break; -+ } -+ -+ /* -+ * This case is different from the others as %o6 has been overwritten with -+ * the SP. The real PC can be read from StartPC and written back -+ * into %o6 on the stack. -+ */ -+ case MI_TProcNext: /* Reading the outs block */ -+ { -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing StartPc to o6\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[6]), trap->StartPC & PC_MASK); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ /* DROPTHROUGH */ -+ } -+ /* -+ * all of these will be generated when starting up a thread. -+ * Just re-issue the command after fixing the trap. The ucode keeps the startup -+ * from trap information in Thread_Desc_SP while it is still loading the regs. -+ */ -+ case MI_WaitForGlobalsRead: /* Reading the globals block (trap restart) */ -+ case MI_WaitForNPCRead: /* Reading the nPC, V and C (trap restart) */ -+ case MI_WaitForPCload: /* Reading the PC, N and Z (trap restart) */ -+ case MI_WaitForInsRead: /* Reading the ins block (trap restart) */ -+ case MI_WaitForLocals: /* Reading the ins block (trap restart) */ -+ case MI_WaitForPCload2: /* Reading the PC (normal thread start) */ -+ case MI_WaitForSpStore: /* Writing the SP to the outs block */ -+ PRINTF2 (ctxt, DBG_TPROC, "ResolveTProcTrap: %s %08x\n", MiToName (trap->mi), trap->InstFaultSave.s.FaultAddress); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed at %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, &trap->FaultSave, trap, res) != OP_IGNORE) -+ break; -+ } -+ -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ -+ /* -+ * These traps could occur after the threads proc has stopped (either for a wait, -+ * break, or suspend, but not a trap). Must simulate the uCode's job. -+ */ -+ case MI_WaitForOutsWrite: /* Writing the outs block */ -+ case MI_WaitForNPCWrite: /* Writing the nPC block */ -+ { -+ E3_uint32 DeschedBits = (trap->TrapBits.Bits & E3_TProcDescheduleMask); -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: trapped on %s while stopping a thread\n", MiToName(trap->mi)); -+ -+ /* -+ * Copy npc into o6. -+ */ -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = trap->npc; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing outs to stack\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ -+ /* -+ * Now write the outs back to the stack. NOTE then endian flip is undone. -+ */ -+ for (i = 0; i < 8; i++) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[i]), trap->Registers[REG_OUTS+(i^WordEndianFlip)]); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ /* -+ * thread has been saved. Now find out why the thread proc stopped. -+ */ -+ if (DeschedBits == E3_TProcDescheduleSuspend) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: suspend instruction executed\n"); -+ break; -+ } -+ -+ /* -+ * Break. Just reissue the command. -+ */ -+ if (DeschedBits == E3_TProcDescheduleBreak) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: break instruction, reissue sp %08x\n", trap->sp); -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ } -+ -+ ASSERT (DeschedBits == E3_TProcDescheduleWait); -+ -+ /* DROPTHROUGH to fix up a wait event */ -+ } -+ -+ /* -+ * Trapped here trying to execute a wait instruction. All the thread state has already -+ * been saved and the trap has been fixed so simplest thing to do is to start the -+ * thread up at the wait instruction again. -+ */ -+ case MI_WaitForEventWaitAddr: /* Reading back the %o0,%o1 pair for a -+ wait event instr. */ -+ case MI_WaitForWaitEventAccess: /* Locked dword read of the event location. -+ Note that this read is done with write -+ permissions so we never get a trap on the write */ -+ { -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ -+ if ((res = elan3_pagefault (ctxt, &trap->FaultSave, 1)) != ESUCCESS) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "ResolveTProcTrap: elan3_pagefault failed at %08x\n", -+ trap->FaultSave.s.FaultAddress); -+ if (ElanException (ctxt, EXCEPTION_INVALID_ADDR, THREAD_PROC, trap, &trap->DataFaultSave, res) != OP_IGNORE) -+ break; -+ } -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF0 (ctxt, DBG_TPROC, "ResolveTProcTrap: faulted writing pc to stack\n"); -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ break; -+ } -+ -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[6]), trap->pc); -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ReissueStackPointer (ctxt, trap->sp); -+ break; -+ } -+ -+ /* -+ * Assume the fault will be fixed by FixupEventTrap. -+ */ -+ default: -+ FixupEventTrap (ctxt, THREAD_PROC, trap, trap->mi, &trap->FaultSave, 0); -+ break; -+ } -+} -+ -+int -+TProcNeedsRestart (ELAN3_CTXT *ctxt) -+{ -+ return (ctxt->ItemCount[LIST_THREAD] != 0); -+} -+ -+void -+RestartTProcItems (ELAN3_CTXT *ctxt) -+{ -+ void *item; -+ E3_uint32 StackPointer; -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ -+ while (ctxt->ItemCount[LIST_THREAD]) -+ { -+ if (! ELAN3_OP_GET_WORD_ITEM (ctxt, LIST_THREAD, &item, &StackPointer)) -+ ctxt->ItemCount[LIST_THREAD] = 0; -+ else -+ { -+ if (IssueCommand (ctxt, offsetof (E3_CommandPort, RunThread), StackPointer, 0) == ISSUE_COMMAND_RETRY) -+ { -+ ELAN3_OP_PUTBACK_ITEM (ctxt, LIST_THREAD, item); -+ kmutex_unlock (&ctxt->SwapListsLock); -+ return; -+ } -+ -+ ctxt->ItemCount[LIST_THREAD]--; -+ ELAN3_OP_FREE_WORD_ITEM (ctxt, item); -+ } -+ } -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+E3_Addr -+SaveThreadToStack (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int SkipInstruction) -+{ -+ E3_Addr stack = (trap->sp & SP_MASK) - sizeof (E3_Stack); -+ E3_Addr orflag; -+ register int i; -+ -+ /* -+ * When the thread deschedules normally, the N & Z flags are written -+ * to the stack in o6, and the V & C flags are lost. -+ * Since the Elan will store the NPC into o6 (to skip the instruction), -+ * the CC flags are visible to the trap handler in the trapped PC and NPC. -+ * If the instruction needs to be re-executed then the CC flags need to be -+ * kept in the right place to be read in when the thread re-starts. -+ * -+ * PC has N & Z from trapped NPC. -+ * NPC has V & C from trapped PC. -+ */ -+ if (SkipInstruction) -+ { -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = trap->npc; -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)] = ((trap->npc & PC_MASK) + 4) | (trap->pc & CC_MASK); -+ } -+ else -+ { -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)] = (trap->pc & PC_MASK) | (trap->npc & CC_MASK); -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)] = (trap->npc & PC_MASK) | (trap->pc & CC_MASK); -+ } -+ -+ if (ELAN3_OP_START_FAULT_CHECK(ctxt)) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "RestartThread: faulted writing out thread\n"); -+ ELAN3_OP_END_FAULT_CHECK(ctxt); -+ -+ ElanException (ctxt, EXCEPTION_CANNOT_SAVE_THREAD, THREAD_PROC, NULL); -+ return ((E3_Addr) 0); -+ } -+ -+ -+#ifdef DEBUG_PRINTF -+ PRINTF4 (ctxt, DBG_TPROC, "SaveThreadToStack: SP=%08x PC=%08x NPC=%08x DIRTY=%08x\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits); -+ if (trap->DirtyBits.s.GlobalsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.OutsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.LocalsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.InsDirty) -+ { -+ PRINTF4 (ctxt, DBG_TPROC, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ PRINTF4 (ctxt, DBG_TPROC, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ } -+#endif -+ -+ PRINTF1 (ctxt, DBG_TPROC, "flushing registers to stack %08x\n", stack); -+ -+ /* -+ * NOTE - store the register to the stack in reverse order, since the stack -+ * will be allocated in sdram, and we cannot use the sdram accessing functions -+ * here, as it is "mapped" in user-space. -+ */ -+ for (i = 0; i < 8; i++) -+ { -+ if (trap->DirtyBits.s.GlobalsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Globals[i]), trap->Registers[REG_GLOBALS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.OutsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Outs[i]), trap->Registers[REG_OUTS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.LocalsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Locals[i]), trap->Registers[REG_LOCALS+(i^WordEndianFlip)]); -+ if (trap->DirtyBits.s.InsDirty & (1 << i)) -+ ELAN3_OP_STORE32 (ctxt, stack + offsetof (E3_Stack, Ins[i]), trap->Registers[REG_INS+(i^WordEndianFlip)]); -+ } -+ -+ /* always restore all registers */ -+ orflag = ThreadRestartFromTrapBit | ThreadReloadAllRegs; -+ -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ return (trap->sp | orflag); -+} -+ -+void -+ReissueStackPointer (ELAN3_CTXT *ctxt, E3_Addr StackPointer) -+{ -+ PRINTF1 (ctxt, DBG_TPROC, "ReissueStackPointer : Queue SP %08x\n", StackPointer); -+ -+ kmutex_lock (&ctxt->SwapListsLock); -+ ctxt->ItemCount[LIST_THREAD]++; -+ ELAN3_OP_PUT_WORD_ITEM (ctxt, LIST_THREAD, StackPointer); -+ kmutex_unlock (&ctxt->SwapListsLock); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/tprocinsts.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/tprocinsts.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/tprocinsts.c 2005-07-28 14:52:52.816683600 -0400 -@@ -0,0 +1,401 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: tprocinsts.c,v 1.20 2003/09/24 13:57:25 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tprocinsts.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define MAXINSTR 256 /* # Instructions to look at while looking for close */ -+ -+static E3_uint32 ALU (ELAN3_CTXT *ctxt, -+ E3_uint32 fcode, E3_uint32 X, E3_uint32 Y, -+ E3_uint32 *Z, E3_uint32 *N, E3_uint32 *C, E3_uint32 *V); -+ -+char *OpcodeNames[] = -+{ -+ "ADD ", -+ "AND ", -+ "OR ", -+ "XOR ", -+ "SUB ", -+ "ANDN ", -+ "ORN ", -+ "XNOR ", -+ "ADDX ", -+ "UNIP ", -+ "UMUL ", -+ "SMUL ", -+ "SUBX ", -+ "UNIP ", -+ "UDIV ", -+ "SDIV ", -+ "ADDcc ", -+ "ANDcc ", -+ "ORcc ", -+ "XORcc ", -+ "SUBcc ", -+ "ANDNcc", -+ "ORNcc ", -+ "XNORcc", -+ "ADDXcc", -+ "UNIPcc", -+ "UMULcc", -+ "SMULcc", -+ "SUBXcc", -+ "UNIPcc", -+ "UDIVcc", -+ "SDIVcc" -+}; -+ -+#define REGISTER_VALUE(trap, rN) (((rN) == 0) ? 0 : (trap)->Registers[(rN)^WordEndianFlip]) -+#define ASSIGN_REGISTER(trap, rN, value) ((rN) != 0 ? trap->Registers[(rN)^WordEndianFlip] = (value) : 0) -+ -+int -+RollThreadToClose (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, E3_uint32 PAckVal) -+{ -+ E3_Addr pc = (trap->pc & PC_MASK); -+ E3_Addr npc = (trap->npc & PC_MASK); -+ E3_uint32 Z = (trap->npc & PSR_Z_BIT) ? 1 : 0; -+ E3_uint32 N = (trap->npc & PSR_N_BIT) ? 1 : 0; -+ E3_uint32 C = (trap->pc & PSR_C_BIT) ? 1 : 0; -+ E3_uint32 V = (trap->pc & PSR_V_BIT) ? 1 : 0; -+ E3_uint32 instr; -+ E3_Addr addr; -+ -+ if (ELAN3_OP_START_FAULT_CHECK (ctxt)) -+ { -+ failed: -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ ElanException (ctxt, EXCEPTION_SIMULATION_FAILED, THREAD_PROC, trap); -+ return (EFAULT); -+ } -+ -+ /* -+ * Thread trapped with output open, or while closing, -+ * so roll the PC forwards to the instruction after the -+ * next c_close, and execute that with the register -+ * specified in c_close set to the trap which occured. -+ * (This is not 1 which means an ACK) -+ */ -+ PRINTF1 (ctxt, DBG_TPROC, "RollThreadToClose: roll pc %x to c_close\n", pc); -+ -+ for (;;) -+ { -+ instr = ELAN3_OP_LOAD32 (ctxt, pc); -+ -+ PRINTF2 (ctxt, DBG_TPROC, "RollThreadToClose: PC=%x INSTR=%x\n", pc, instr); -+ -+ switch (OPCODE_CLASS(instr)) -+ { -+ case OPCODE_CLASS_0: -+ switch ((instr) & OPCODE_CLASS0_MASK) -+ { -+ case OPCODE_SETHI: -+ PRINTF3 (ctxt, DBG_TPROC, "PC %x : sethi r%d = %x\n", pc, INSTR_RD(instr), instr << 10); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), instr << 10); -+ break; -+ -+ case OPCODE_SENDREG: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : sendreg\n", pc); -+ break; -+ -+ case OPCODE_SENDMEM: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : sendmem\n", pc); -+ break; -+ -+ case OPCODE_BICC: -+ { -+ int DoBranch = (instr >> 28) & 1; -+ int CondBranch = 1; -+ E3_Addr OldnPC = npc; -+ -+ PRINTF5 (ctxt, DBG_TPROC, "PC %x : Bicc Z=%x N=%x C=%x V=%x ", pc, Z, N, C, V); -+ switch (instr & OPCODE_BICC_MASK) -+ { -+ case OPCODE_BICC_BN: CondBranch = 0; break; -+ case OPCODE_BICC_BE: DoBranch ^= Z; break; -+ case OPCODE_BICC_BLE: DoBranch ^= Z | (N ^ V); break; -+ case OPCODE_BICC_BL: DoBranch ^= N ^ V; break; -+ case OPCODE_BICC_BLEU: DoBranch ^= C | Z; break; -+ case OPCODE_BICC_BCS: DoBranch ^= C; break; -+ case OPCODE_BICC_BNEG: DoBranch ^= N; break; -+ case OPCODE_BICC_BVS: DoBranch ^= V; break; -+ } -+ -+ /* Do the branch */ -+ if (DoBranch != 0) -+ { -+ npc = pc + (((instr & 0x3fffff) << 2) | -+ (((instr & 0x200000) != 0) ? 0xff000000 : 0)); -+ -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : branch taken to %x\n", pc, npc); -+ } -+ else -+ { -+ npc = npc + 4; -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : branch not taken\n", pc); -+ } -+ pc = OldnPC; -+ -+ /* Test if the next is annuled */ -+ if (((instr & OPCODE_BICC_ANNUL) != 0) & -+ ((DoBranch == 0) | (CondBranch == 0))) -+ { -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : branch annulled\n", pc); -+ -+ pc = npc; -+ npc += 4; -+ } -+ -+ /* -+ * we've already consumed the instruction - so continue rather -+ * than break; -+ */ -+ continue; -+ } -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 0 instr %x\n", pc, instr); -+ goto failed; -+ } -+ break; -+ -+ case OPCODE_CLASS_1: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 1 instr %x\n", pc, instr); -+ goto failed; -+ -+ case OPCODE_CLASS_2: -+ { -+ E3_uint32 X = REGISTER_VALUE (trap, INSTR_RS1(instr)); -+ E3_uint32 Y = (instr & OPCODE_IMM) ? INSTR_IMM(instr) : REGISTER_VALUE (trap, INSTR_RS2(instr)); -+ -+ if ((instr & OPCODE_NOT_ALUOP) == 0) -+ { -+ E3_uint32 fcode = (instr >> OPCODE_FCODE_SHIFT) & OPCODE_FCODE_MASK; -+ E3_uint32 result = ALU (ctxt, fcode, X, Y, &Z, &N, &C, &V); -+ -+ PRINTF5 (ctxt, DBG_TPROC, "PC %x : %s %x %x -> %x", pc, OpcodeNames[fcode], X, Y, result); -+ PRINTF4 (ctxt, DBG_TPROC, " Z=%x N=%x C=%x V=%x\n", Z, N, C, V); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), result); -+ } -+ else -+ { -+ switch (instr & OPCODE_MASK) -+ { -+ case OPCODE_OPEN: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_open\n", pc); -+ break; -+ -+ case OPCODE_CLOSE: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_close\n", pc); -+ goto found_close; -+ -+ case OPCODE_SLL: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SLL\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X << Y); -+ break; -+ -+ case OPCODE_SRL: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SRL\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X >> Y); -+ break; -+ -+ case OPCODE_SRA: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SRA\n", pc); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), X >> Y); -+ break; -+ -+ case OPCODE_BREAKTEST: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : BREAKTEST not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_BREAK: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : BREAK not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_SUSPEND: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : SUSPEND not allowed while open\n", pc); -+ goto failed; -+ -+ case OPCODE_WAIT: -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : WAIT not allowed while open\n", pc); -+ goto failed; -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 2 instr %x\n", pc, instr); -+ goto failed; -+ } -+ } -+ break; -+ } -+ -+ case OPCODE_CLASS_3: -+ { -+ if ((instr & OPCODE_IMM) != 0) -+ addr = REGISTER_VALUE (trap, INSTR_RS1(instr)) + INSTR_IMM(instr); -+ else -+ addr = (REGISTER_VALUE (trap, INSTR_RS1(instr)) + -+ REGISTER_VALUE (trap, INSTR_RS2(instr))); -+ -+ switch (instr & OPCODE_MASK) -+ { -+ case OPCODE_LD: -+ PRINTF3 (ctxt, DBG_TPROC, "PC %x : LD [%x], r%d\n", pc, addr, INSTR_RD(instr)); -+ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), ELAN3_OP_LOAD32 (ctxt, addr)); -+ break; -+ -+ case OPCODE_LDD: -+ case OPCODE_LDBLOCK16: -+ case OPCODE_LDBLOCK32: -+ case OPCODE_LDBLOCK64: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : LDBLOCKx @ %x is not possible while output open\n", pc, addr); -+ goto failed; -+ -+ case OPCODE_ST: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : ST @ %x\n", pc, addr); -+ -+ ELAN3_OP_STORE32 (ctxt, addr, REGISTER_VALUE (trap, INSTR_RD(instr))); -+ break; -+ -+ case OPCODE_STD: -+ case OPCODE_STBLOCK16: -+ case OPCODE_STBLOCK32: -+ case OPCODE_STBLOCK64: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : STD @ %x is not posisble while output open\n", pc, addr); -+ goto failed; -+ -+ case OPCODE_SWAP: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : SWAP @ %x is not posible while output open\n", pc, addr); -+ goto failed; -+ -+ default: -+ PRINTF2 (ctxt, DBG_TPROC, "PC %x : unknown class 3 instr %x\n", pc, instr); -+ goto failed; -+ } -+ break; -+ }} -+ -+ pc = npc; -+ npc += 4; -+ } -+ -+found_close: -+ ELAN3_OP_END_FAULT_CHECK (ctxt); -+ -+ PRINTF1 (ctxt, DBG_TPROC, "PC %x : c_close\n", pc); -+ -+ /* -+ * Found the new pc, and have the close instruction in *instr -+ */ -+ ASSIGN_REGISTER (trap, INSTR_RD(instr), PAckVal); -+ -+ /* -+ * Move to instruction after close. -+ */ -+ trap->pc = npc; -+ -+ /* Insert the value of Z and N from the close inst */ -+ trap->npc = (npc + 4) | ((PAckVal == E3_PAckOk) ? 1 : -+ (PAckVal == E3_PAckTestFail) ? 2 : 0); -+ -+ return (ESUCCESS); -+} -+ -+E3_uint32 -+ALU (ELAN3_CTXT *ctxt, -+ E3_uint32 fcode, E3_uint32 X, E3_uint32 Y, -+ E3_uint32 *Z, E3_uint32 *N, E3_uint32 *C, E3_uint32 *V) -+{ -+ E3_uint32 XMSB, YMSB, ZMSB, Cprime; -+ E3_uint32 Yprime; -+ E3_uint32 Result=0; -+ -+ Yprime = ((fcode >> 2) & 1) ? ~Y : Y; -+ Cprime = ((fcode >> 2) & 1) ^ (*C & ((fcode >> 3) & 1)); -+ XMSB = (X >> 31) & 1; -+ YMSB = (Yprime >> 31) & 1; -+ /* mul or div */ -+ if ((fcode & 0xa) == 0xa) -+ { -+ PRINTF0 (ctxt, DBG_TPROC, "ALU: tried a multiply or a divide\n"); -+ return (0); -+ } -+ -+ switch (fcode & 3) -+ { -+ /*ADD */ -+ case 0: -+ Result = X + Yprime + Cprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ ZMSB = Result >> 31; -+ *V = ((XMSB & YMSB & ~ZMSB) | (~XMSB &~YMSB & ZMSB)); -+ *C = ((fcode >> 2) & 1) ^ ( (XMSB & YMSB) | (~ZMSB & (XMSB | YMSB))); -+ break; -+ -+ /*AND */ -+ case 1: -+ Result = X & Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ -+ /*OR */ -+ case 2: -+ Result = X | Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ -+ /*XOR */ -+ case 3: -+ Result = X ^ Yprime ; -+ if ((fcode & 0x10) == 0) -+ return (Result); -+ -+ *V = 0; -+ *C = 0; -+ break; -+ } -+ -+ *Z = (Result == 0) ? 1 : 0; -+ *N = (Result >> 31) & 1; -+ -+ return (Result); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/tproc_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/tproc_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/tproc_linux.c 2005-07-28 14:52:52.817683448 -0400 -@@ -0,0 +1,215 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: tproc_linux.c,v 1.19.2.1 2004/10/28 17:08:56 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/tproc_linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+ -+#ifdef NO_ABI -+#include -+extern asmlinkage long sys_open(const char *, int, int); -+extern asmlinkage ssize_t sys_write(unsigned int, const char *, size_t); -+extern asmlinkage ssize_t sys_read(unsigned int, char *, size_t); -+extern asmlinkage off_t sys_lseek(unsigned int, off_t, unsigned int); -+extern asmlinkage long sys_poll(struct pollfd *, unsigned int, long); -+extern asmlinkage long sys_kill(int, int); -+#else -+# include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * NOTE: system calls from kernel on Linux are different on alpha and i386 -+ * on alpha they return -errno on failure -+ * on i386 they return -1 on failure and set errno -+ */ -+ -+static void -+ReturnSyscall (THREAD_TRAP *trap, unsigned long rc, int *skip) -+{ -+ if (rc >= (unsigned long) (-130)) -+ { -+ trap->pc |= PSR_C_BIT; /* clear carry to indicate failure */ -+ -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)] = -rc; -+ } -+ else -+ { -+ trap->pc &= ~PSR_C_BIT; /* set carry to indicate success */ -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)] = rc; -+ } -+ trap->Registers[REG_OUTS+(1^WordEndianFlip)] = 0; -+ *skip = 1; -+} -+ -+static void -+dump_regs(ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ PRINTF (ctxt, DBG_TPROC, " OUTS %08x %08x %08x %08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ PRINTF (ctxt, DBG_TPROC, " %08x %08x %08x %08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+} -+ -+int -+ThreadSyscall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip) -+{ -+ int code; -+ caddr_t maddr; -+ struct file *file; -+ unsigned long rc; -+ int i; -+ uintptr_t av[6]; -+ uintptr_t ptr; -+ -+ PRINTF (ctxt, DBG_TPROC, "ThreadSyscall: PC %08x G1 %08x\n", -+ trap->pc, trap->Registers[REG_GLOBALS+(1^WordEndianFlip)]); -+ dump_regs(ctxt, trap); -+ -+ code = trap->Registers[REG_GLOBALS+(1^WordEndianFlip)]; -+ -+ /* Copy the system call arguments from %o0-%o5 */ -+ for (i = 0; i < 6; i++) -+ av[i] = trap->Registers[REG_OUTS+(i^WordEndianFlip)]; -+ -+ rc = (unsigned long) -EINVAL; -+ -+ switch (code) { -+ case ELAN3_SYS_open: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ if (maddr != NULL) -+ rc = sys_open((const char *)maddr, av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_close: -+ rc = sys_close(av[0]); -+ break; -+ -+ case ELAN3_SYS_write: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[1]); -+ if (maddr != NULL) -+ rc = sys_write(av[0], (const char *)maddr, av[2]); -+ break; -+ -+ case ELAN3_SYS_read: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[1]); -+ if (maddr != NULL) -+ rc = sys_read(av[0], (char *)maddr, av[2]); -+ break; -+ -+ case ELAN3_SYS_poll: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ if (maddr != NULL) -+ rc = sys_poll((struct pollfd *)maddr, av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_lseek: -+ rc = sys_lseek(av[0], av[1], av[2]); -+ break; -+ -+ case ELAN3_SYS_mmap: -+ if ((E3_Addr) av[0] == (E3_Addr) 0) -+ maddr = NULL; -+ else if ((maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0])) == NULL) -+ break; -+ -+ file = NULL; -+ /* GNAT 5515: If *not* anonymous memory need to do fget */ -+ if ((av[3] & MAP_ANONYMOUS) == 0 && (file = fget (av[4])) == NULL) -+ { -+ rc = -EBADF; -+ break; -+ } -+ -+ down_write (¤t->mm->mmap_sem); -+ ptr = do_mmap_pgoff (file, (unsigned long) maddr, av[1], av[2], av[3], av[5] >>PAGE_SHIFT); -+ up_write (¤t->mm->mmap_sem); -+ -+ if (file) -+ fput (file); -+ -+ if (IS_ERR((void *) ptr)) -+ rc = PTR_ERR((void *) ptr); -+ else -+ rc = elan3mmu_elanaddr (ctxt->Elan3mmu, (caddr_t)ptr); -+ -+ break; -+ -+ case ELAN3_SYS_munmap: -+ maddr = elan3mmu_mainaddr (ctxt->Elan3mmu, (E3_Addr) av[0]); -+ -+#ifdef AC -+ if (maddr != NULL) -+ rc = do_munmap(current->mm, (unsigned long) maddr, av[1], 1); -+#else -+ if (maddr != NULL) -+ rc = do_munmap(current->mm, (unsigned long) maddr, av[1]); -+#endif -+ break; -+ -+ case ELAN3_SYS_kill: -+ rc = sys_kill(av[0], av[1]); -+ break; -+ -+ case ELAN3_SYS_getpid: -+ rc = current->pid; -+ break; -+ -+ default: -+ return EINVAL; -+ } -+ ReturnSyscall(trap, rc, skip); -+ return ESUCCESS; -+} -+ -+ -+int -+ThreadElancall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip) -+{ -+ int ret = ESUCCESS; -+ -+ PRINTF (ctxt, DBG_TPROC, "ThreadElancall: PC %08x\n", trap->pc); -+ dump_regs(ctxt, trap); -+ -+ /* -+ * Elan system call 'type' is passed in o0 -+ */ -+ switch (trap->Registers[REG_OUTS+(0^WordEndianFlip)]) -+ { -+ default: -+ ret = EINVAL; -+ break; -+ } -+ return ret; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan3/virtual_process.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan3/virtual_process.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan3/virtual_process.c 2005-07-28 14:52:52.818683296 -0400 -@@ -0,0 +1,884 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: virtual_process.c,v 1.68 2004/06/07 13:50:10 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/os/virtual_process.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static ELAN3_VPSEG * -+InstallSegment (ELAN3_CTXT *ctxt, int process, int entries) -+{ -+ ELAN3_VPSEG **prevSeg, *seg; -+ int lastTop = -1; -+ int top = process + entries-1; -+ -+ ASSERT (krwlock_is_write_locked (&ctxt->VpLock)); -+ -+ for (prevSeg = &ctxt->VpSegs; (seg = (*prevSeg)) != NULL; prevSeg = &seg->Next) -+ { -+ int thisTop = seg->Process + seg->Entries - 1; -+ -+ if (process < seg->Process && (process <= lastTop || top >= seg->Process)) -+ { -+ /* -+ * Overlaps with last segment, or this one -+ */ -+ return (NULL); -+ } -+ if (seg->Process > process) -+ break; -+ -+ lastTop = thisTop; -+ } -+ -+ KMEM_ZALLOC (seg, ELAN3_VPSEG *, sizeof (ELAN3_VPSEG), TRUE); -+ -+ if (seg == (ELAN3_VPSEG *) NULL) -+ return (NULL); -+ -+ seg->Process = process; -+ seg->Entries = entries; -+ -+ -+ PRINTF2 (ctxt, DBG_VP, "InstallSegment: add seg %p before %p\n", seg, *prevSeg); -+ -+ seg->Next = *prevSeg; -+ *prevSeg = seg; -+ -+ return (seg); -+} -+ -+static int -+RemoveSegment (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg) -+{ -+ ELAN3_VPSEG **prevSeg, *thisSeg; -+ -+ ASSERT (krwlock_is_write_locked (&ctxt->VpLock)); -+ -+ for (prevSeg = &ctxt->VpSegs; (thisSeg = (*prevSeg)) != NULL; prevSeg = &thisSeg->Next) -+ { -+ if (thisSeg == seg) -+ break; -+ } -+ -+ if (thisSeg == (ELAN3_VPSEG *) NULL) -+ return (EINVAL); -+ -+ -+ PRINTF2 (ctxt, DBG_VP, "RemoveSegment: remove seg %p next %p\n", thisSeg, thisSeg->Next); -+ -+ *prevSeg = thisSeg->Next; -+ -+ KMEM_FREE ((caddr_t) seg, sizeof (ELAN3_VPSEG)); -+ -+ return (ESUCCESS); -+} -+ -+static ELAN3_VPSEG * -+FindSegment (ELAN3_CTXT *ctxt, int low, int high) -+{ -+ ELAN3_VPSEG *seg; -+ -+ ASSERT(krwlock_is_locked (&ctxt->VpLock)); -+ -+ for (seg = ctxt->VpSegs; seg; seg = seg->Next) -+ { -+ if (seg->Process <= low && (seg->Process + seg->Entries) > high) -+ return (seg); -+ } -+ -+ return ((ELAN3_VPSEG *) NULL); -+} -+ -+ELAN_LOCATION -+ProcessToLocation (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN_LOCATION location; -+ int nnodes,nctxs; -+ int node,ctx,i; -+ -+ ASSERT(krwlock_is_locked (&ctxt->VpLock)); -+ -+ location.loc_node = ELAN3_INVALID_NODE; -+ location.loc_context = -1; -+ -+ PRINTF3 (ctxt, DBG_VP, "ProcessToLocation: process %d seg %p cap %p\n", process, seg, cap); -+ -+ if (seg == NULL) -+ seg = FindSegment (ctxt, process, process); -+ -+ if (!seg || (seg->Type != ELAN3_VPSEG_P2P)) -+ return (location); -+ -+ cap = &seg->SegCapability; -+ nnodes = ELAN_CAP_NUM_NODES (cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (cap); -+ -+ switch (seg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (node = 0, i = 0; node < nnodes && i < entries; node++) -+ { -+ for (ctx = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ if (( seg->SegCapability.cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->SegCapability.cap_bitmap, ctx + (node * nctxs))) -+ { -+ if (i++ == (process - seg->Process)) -+ { -+ location.loc_node = seg->SegCapability.cap_lownode + node; -+ location.loc_context = seg->SegCapability.cap_lowcontext + ctx; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (ctx = 0, i = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ for (node = 0; node < nnodes && i < entries; node++) -+ { -+ if ((seg->SegCapability.cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->SegCapability.cap_bitmap, node + (ctx * nnodes))) -+ { -+ if (i++ == (process - seg->Process)) -+ { -+ location.loc_node = seg->SegCapability.cap_lownode + node; -+ location.loc_context = seg->SegCapability.cap_lowcontext + ctx; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ default: -+ break; -+ } -+ -+ found: -+ -+ PRINTF3 (ctxt, DBG_VP, "ProcessToLocation: process %d -> Node %d Context %d\n", process, location.loc_node, location.loc_context); -+ -+ if (cap != NULL) -+ { -+ bcopy ((caddr_t) &seg->SegCapability, (caddr_t) cap, sizeof (ELAN_CAPABILITY)); -+ cap->cap_mycontext = location.loc_context; -+ } -+ -+ return (location); -+} -+ -+int -+LocationToProcess (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, ELAN_LOCATION loc, ELAN_CAPABILITY *cap) -+{ -+ int nnodes,nctxs; -+ int node,ctx,i; -+ -+ if (seg == NULL) -+ return ELAN3_INVALID_PROCESS; -+ -+ if (!seg || (seg->Type != ELAN3_VPSEG_P2P)) -+ return ELAN3_INVALID_PROCESS; -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (node = 0, i = 0; node < nnodes && i < entries; node++) -+ { -+ for (ctx = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctx + (node * nctxs))) -+ { -+ if ((loc.loc_node == (cap->cap_lownode + node) ) -+ && (loc.loc_context == (cap->cap_lowcontext + ctx) )) -+ { -+ return (i + seg->Process); -+ } -+ i++; -+ } -+ } -+ } -+ break; -+ } -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ int entries = ELAN_CAP_ENTRIES(cap); -+ -+ for (ctx = 0, i = 0; ctx < nctxs && i < entries; ctx++) -+ { -+ for (node = 0; node < nnodes && i < entries; node++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, node + (ctx * nnodes))) -+ { -+ if ((loc.loc_node == (cap->cap_lownode + node) ) -+ && (loc.loc_context == (cap->cap_lowcontext + ctx) )) -+ { -+ return (i + seg->Process); -+ } -+ i++; -+ -+ } -+ } -+ } -+ break; -+ } -+ default: -+ break; -+ } -+ -+ return ELAN3_INVALID_PROCESS; -+} -+ -+int -+elan3_addvp (ELAN3_CTXT *ctxt, int process, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ ELAN_POSITION *pos = &ctxt->Position; -+ ELAN3_VPSEG *seg; -+ int i; -+ int nodeOff; -+ int ctxOff; -+ int nnodes; -+ int nctxs; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ int entries; -+ -+ PRINTF2 (ctxt, DBG_VP, "elan3_addvp: %d -> %s\n", process, CapabilityString (cap)); -+ -+ entries = ELAN_CAP_ENTRIES(cap); -+ if (entries <= 0 || (process + entries) > ELAN3_MAX_VPS) -+ return (EINVAL); -+ -+ /* -+ * Scan the virtual process segment list, to add this entry, and ensure that -+ * the ranges don't overlap. -+ */ -+ krwlock_write (&ctxt->VpLock); -+ -+ /* check cap. */ -+ switch (elan3_validate_cap (ctxt->Device, cap, ELAN_USER_P2P)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(cap, cap) != ESUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = InstallSegment (ctxt, process, entries)) == NULL) -+ { -+ PRINTF0 (ctxt, DBG_VP, "elan3_addvp: failed to find a seg\n"); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ seg->Type = ELAN3_VPSEG_P2P; -+ seg->SegCapability = *cap; -+ seg->SegCapability.cap_mycontext = ELAN_CAP_UNINITIALISED; -+ -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: segment type %x %d %d\n", -+ seg->SegCapability.cap_type, seg->Process, entries); -+ -+ -+ nnodes = cap->cap_highnode - cap->cap_lownode + 1; -+ nctxs = cap->cap_highcontext - cap->cap_lowcontext + 1; -+ -+ /* position not determined, so cannot load any routes, the hwtest -+ * process must explicitly set it's own routes */ -+ -+ if (!(cap->cap_type & ELAN_CAP_TYPE_HWTEST) && (pos->pos_mode != ELAN_POS_UNKNOWN)) -+ { -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, i = 0; nodeOff < nnodes && i < entries; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs && i < entries; ctxOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ /* Don't load a route if there's no switch and trying to talk to myself */ -+ if (pos->pos_mode == ELAN_POS_MODE_SWITCHED || -+ (pos->pos_mode == ELAN_POS_MODE_LOOPBACK && cap->cap_lownode + nodeOff == pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && cap->cap_lownode + nodeOff != pos->pos_nodeid)) -+ { -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: virtual process %d -> node %d context %d\n", -+ seg->Process + i, cap->cap_lownode +nodeOff, cap->cap_lowcontext +ctxOff); -+ -+ nflits = GenerateRoute (pos, flits, cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, -+ DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ -+ -+ LoadRoute (dev, ctxt->RouteTable, seg->Process+i, cap->cap_lowcontext + ctxOff, nflits, flits); -+ } -+ -+ i++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, i = 0; ctxOff < nctxs && i < entries; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes && i < entries; nodeOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ /* Don't load a route if there's no switch and trying to talk to myself */ -+ if (pos->pos_mode == ELAN_POS_MODE_SWITCHED || -+ (pos->pos_mode == ELAN_POS_MODE_LOOPBACK && cap->cap_lownode + nodeOff == pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && cap->cap_lownode + nodeOff != pos->pos_nodeid)) -+ { -+ PRINTF3 (ctxt, DBG_VP, "elan3_addvp: virtual process %d -> node %d context %d\n", -+ seg->Process + i, cap->cap_lownode + nodeOff, cap->cap_lowcontext +ctxOff); -+ -+ nflits = GenerateRoute (pos, flits, cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, -+ DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ -+ LoadRoute (dev, ctxt->RouteTable, seg->Process+i, cap->cap_lowcontext +ctxOff, nflits, flits); -+ } -+ i++; -+ } -+ } -+ } -+ break; -+ default: -+ break; -+ } -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_removevp (ELAN3_CTXT *ctxt, int process) -+{ -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *next; -+ int i; -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_removevp: remove process %d\n", process); -+ -+ if (process == ELAN3_INVALID_PROCESS) -+ seg = ctxt->VpSegs; -+ else -+ seg = FindSegment (ctxt, process, process); -+ -+ if (seg == (ELAN3_VPSEG *) NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ do { -+ PRINTF3 (ctxt, DBG_VP, "elan3_removevp: segment is %p [%x,%x]\n", -+ seg, seg->Process, seg->Process+seg->Entries); -+ -+ for (i = 0; i < seg->Entries; i++) -+ ClearRoute (ctxt->Device, ctxt->RouteTable, seg->Process+i); -+ -+ /* get Next pointer value before structure is free'd */ -+ next = seg->Next; -+ RemoveSegment (ctxt, seg); -+ -+ } while (process == ELAN3_INVALID_PROCESS && (seg = next) != NULL); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_addbcastvp (ELAN3_CTXT *ctxt, int process, int lowProc, int highProc) -+{ -+ ELAN_POSITION *pos = &ctxt->Position; -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *aseg; -+ int virtualProcess; -+ E3_uint64 routeValue; -+ -+ PRINTF3 (ctxt, DBG_VP, "elan3_addbcastvp: process %d [%d,%d]\n", process, lowProc, highProc); -+ -+ if (lowProc > highProc || pos->pos_mode != ELAN_POS_MODE_SWITCHED) -+ return (EINVAL); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ if ((aseg = FindSegment (ctxt, lowProc, highProc)) == NULL || (aseg->Type != ELAN3_VPSEG_P2P)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_addbcastvp: process [%d,%d] does not map to p2p segment\n", lowProc, highProc); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* check aseg->SegCapability */ -+ switch (elan3_validate_cap (ctxt->Device, &aseg->SegCapability, ELAN_USER_BROADCAST)) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(&ctxt->Capability, &aseg->SegCapability) != ESUCCESS ) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ( ProcessToLocation (ctxt, aseg, lowProc, NULL).loc_context != -+ ProcessToLocation (ctxt, aseg, highProc, NULL).loc_context) -+ { -+ PRINTF2 (ctxt, DBG_VP, "elan3_addbcastvp: process [%d,%d] does not map to single context\n", lowProc, highProc); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = InstallSegment (ctxt, process, 1)) == NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ seg->Type = ELAN3_VPSEG_BROADCAST; -+ seg->SegLowProc = lowProc; -+ seg->SegHighProc = highProc; -+ -+ PRINTF4 (ctxt, DBG_VP, "elan3_addbcastvp: installed seg %p Type %d LowProc %d HighProc %d\n", -+ seg, seg->Type, seg->SegLowProc, seg->SegHighProc); -+ -+ for (virtualProcess = lowProc; virtualProcess <= highProc; virtualProcess++) -+ { -+ if (virtualProcess < 0 || virtualProcess >= ctxt->RouteTable->Size) -+ routeValue = 0; -+ else -+ routeValue = elan3_sdram_readq ( ctxt->Device, ctxt->RouteTable->Table + virtualProcess * NBYTES_PER_SMALL_ROUTE); -+ -+ if (! (routeValue & ROUTE_VALID)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "loadvp[%x]: broadcast %x not valid\n", -+ ctxt->Capability.cap_mycontext, virtualProcess); -+ break; -+ } -+ } -+ -+ if (virtualProcess > highProc) /* All vps now present */ -+ { /* so load up broadcast route */ -+ E3_uint16 flits[MAX_FLITS]; -+ ELAN_LOCATION low = ProcessToLocation (ctxt, aseg, lowProc, NULL); -+ ELAN_LOCATION high = ProcessToLocation (ctxt, aseg, highProc, NULL); -+ int nflits = GenerateRoute (pos, flits, low.loc_node, high.loc_node, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ PRINTF6 (ctxt, DBG_VP, "loadvp[%x]: broadcast %d -> %x.%x [%x.%x]\n", ctxt->Capability.cap_mycontext, -+ seg->Process, low.loc_node, high.loc_node, -+ low.loc_context, high.loc_context); -+ -+ LoadRoute ( ctxt->Device, ctxt->RouteTable, seg->Process, low.loc_context, nflits, flits); -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); -+} -+ -+int -+elan3_process (ELAN3_CTXT *ctxt) -+{ -+ int res = ELAN3_INVALID_PROCESS; -+ ELAN3_VPSEG *seg; -+ ELAN_LOCATION loc; -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ loc.loc_node = ctxt->Position.pos_nodeid; -+ loc.loc_context = ctxt->Capability.cap_mycontext; -+ -+ for (seg = ctxt->VpSegs ; seg; seg = seg->Next) -+ { -+ if (seg->Type == ELAN3_VPSEG_P2P && -+ seg->SegCapability.cap_lowcontext <= ctxt->Capability.cap_mycontext && -+ seg->SegCapability.cap_highcontext >= ctxt->Capability.cap_mycontext && -+ seg->SegCapability.cap_lownode <= ctxt->Position.pos_nodeid && -+ seg->SegCapability.cap_highnode >= ctxt->Position.pos_nodeid) -+ { -+ if ((res=LocationToProcess (ctxt,seg,loc,&ctxt->Capability)) != ELAN3_INVALID_PROCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return res; -+ } -+ } -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_check_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits, E3_uint32 *routeError) -+{ -+ PRINTF5 (ctxt, DBG_VP, "elan3_check_route: vp=%d flits=%04x %04x %04x %04x\n", -+ process, flits[0], flits[1], flits[2], flits[3]); -+ PRINTF4 (ctxt, DBG_VP, " %04x %04x %04x %04x\n", -+ flits[4], flits[5], flits[6], flits[7]); -+ -+ krwlock_read (&ctxt->VpLock); -+ *routeError=elan3_route_check(ctxt,flits,ProcessToLocation (ctxt, NULL, process, NULL).loc_node); -+ krwlock_done (&ctxt->VpLock); -+ -+ return (ESUCCESS); /* the call is a success tho the errorcode may be set */ -+} -+ -+int -+elan3_load_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits) -+{ -+ ELAN3_VPSEG *seg; -+ int res = 0; -+ int nflits; -+ int err; -+ -+ PRINTF5 (ctxt, DBG_VP, "elan3_load_route: vp=%d flits=%04x %04x %04x %04x\n", -+ process, flits[0], flits[1], flits[2], flits[3]); -+ PRINTF4 (ctxt, DBG_VP, " %04x %04x %04x %04x\n", -+ flits[4], flits[5], flits[6], flits[7]); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ /* check the route is valid */ -+ if (!(ctxt->Capability.cap_type & ELAN_CAP_TYPE_HWTEST)) -+ { -+ /* must have already attached to define my context number */ -+ if (ctxt->Capability.cap_mycontext == ELAN_CAP_UNINITIALISED) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((err=elan3_route_check(ctxt,flits,ProcessToLocation (ctxt, NULL, process, NULL).loc_node)) != ELAN3_ROUTE_SUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ } -+ -+ if ((seg = FindSegment (ctxt, process, process)) == NULL || seg->Type != ELAN3_VPSEG_P2P) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* Calculate number of flits in this route */ -+ for (nflits = 0; nflits < MAX_FLITS && flits[nflits]; nflits++) -+ ; -+ -+ res = LoadRoute (ctxt->Device, ctxt->RouteTable, process, ProcessToLocation (ctxt, seg, process, NULL).loc_context, nflits, flits); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_get_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits) -+{ -+ ELAN3_VPSEG *seg; -+ int res = 0; -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_get_route: vp=%d \n", process); -+ -+ krwlock_write (&ctxt->VpLock); -+ -+ if (ctxt->RouteTable == NULL) /* is there a route table */ -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if ((seg = FindSegment (ctxt, process, process)) != NULL && seg->Type != ELAN3_VPSEG_P2P) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if (seg == NULL) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ res = GetRoute (ctxt->Device, ctxt->RouteTable, process, flits); -+ -+ krwlock_done (&ctxt->VpLock); -+ -+ return (res); -+} -+ -+int -+elan3_reset_route (ELAN3_CTXT *ctxt, int process) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ -+ PRINTF1 (ctxt, DBG_VP, "elan3_reset_route: vp=%d \n", process); -+ -+ GenerateRoute (&ctxt->Position, flits, process, process, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ return elan3_load_route(ctxt,process,flits); -+} -+ -+int -+ResolveVirtualProcess (ELAN3_CTXT *ctxt, int process) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ ELAN3_DEV *dev = ctxt->Device; -+ int res = ESUCCESS; -+ ELAN3_VPSEG *seg; -+ ELAN3_VPSEG *aseg; -+ E3_uint64 routeValue; -+ -+ krwlock_read (&ctxt->VpLock); -+ -+ PRINTF1 (ctxt, DBG_VP, "ResolveVirtualProcess: vp=%d \n", process); -+ -+ if (ctxt->RouteTable == NULL || process < 0 || process >= ctxt->RouteTable->Size) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ if (! (seg = FindSegment (ctxt, process, process))) -+ { -+ PRINTF1 (ctxt, DBG_VP, "ResolveVirtualProcess: cannot find segment for virtual process %d\n", process); -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ /* check cap. */ -+ switch (elan3_validate_cap (ctxt->Device, &seg->SegCapability, ((seg->Type == ELAN3_VPSEG_P2P) ? ELAN_USER_P2P : ELAN_USER_BROADCAST))) -+ { -+ case ELAN_CAP_OK: -+ /* nothing */ -+ break; -+ -+ case ELAN_CAP_RMS: -+ if ( elan_validate_map(&ctxt->Capability, &seg->SegCapability) != ESUCCESS) -+ { -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ break; -+ -+ default: -+ krwlock_done (&ctxt->VpLock); -+ return (EINVAL); -+ } -+ -+ BumpUserStat (ctxt, LoadVirtualProcess); -+ -+ routeValue = elan3_sdram_readq (dev, ctxt->RouteTable->Table + process * NBYTES_PER_SMALL_ROUTE); -+ if (routeValue & ROUTE_VALID) /* Virtual process already */ -+ { /* loaded */ -+ krwlock_done (&ctxt->VpLock); -+ return (ESUCCESS); -+ } -+ -+ switch (seg->Type) -+ { -+ case ELAN3_VPSEG_P2P: -+ switch (seg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ if ((res = elan_validate_map (&ctxt->Capability,&seg->SegCapability)) == ESUCCESS && -+ (res = GetRoute(dev, ctxt->RouteTable ,process, flits)) == ESUCCESS) -+ { -+ if (elan3_route_check(ctxt, flits, ProcessToLocation (ctxt, seg, process, NULL).loc_node)) -+ res = EINVAL; -+ else -+ ValidateRoute(dev, ctxt->RouteTable, process); -+ } -+ break; -+ default: -+ res = EINVAL; -+ break; -+ } -+ break; -+ -+ case ELAN3_VPSEG_BROADCAST: -+ /* Find the segment that this broadcast range spans. */ -+ aseg = FindSegment (ctxt, seg->SegLowProc, seg->SegHighProc); -+ -+ if (aseg == NULL || (aseg->Type != ELAN3_VPSEG_P2P) || !(aseg->SegCapability.cap_type & ELAN_CAP_TYPE_BROADCASTABLE)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "resolveVirtualProcess: %d -> EINVAL (%s)\n", process, -+ (aseg == NULL ? "no segment" : ((seg->Type != ELAN3_VPSEG_P2P) ? "not point to point" : -+ "not broadcastable"))); -+ res = EINVAL; -+ break; -+ } -+ -+ switch (aseg->SegCapability.cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ case ELAN_CAP_TYPE_CYCLIC: -+ { -+ ELAN_LOCATION lowNode = ProcessToLocation (ctxt,aseg,seg->SegLowProc , NULL); -+ ELAN_LOCATION highNode = ProcessToLocation (ctxt,aseg,seg->SegHighProc , NULL); -+ -+ -+ if ((res = elan_validate_map (&ctxt->Capability,&aseg->SegCapability)) == ESUCCESS && -+ (res=GetRoute(dev, ctxt->RouteTable ,process, flits)) == ESUCCESS) -+ { -+ if (elan3_route_broadcast_check(ctxt,flits, lowNode.loc_node , highNode.loc_node ) != ELAN3_ROUTE_SUCCESS ) -+ res = EINVAL; -+ else -+ ValidateRoute(dev, ctxt->RouteTable, process); -+ } -+ break; -+ } -+ -+ default: -+ res = EINVAL; -+ break; -+ } -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ krwlock_done (&ctxt->VpLock); -+ return (res); -+} -+ -+void -+UnloadVirtualProcess (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap) -+{ -+ ELAN3_DEV *dev = ctxt->Device; -+ ELAN3_VPSEG *seg; -+ ELAN_CAPABILITY *scap; -+ int i; -+ -+ for (seg = ctxt->VpSegs; seg; seg = seg->Next) -+ { -+ switch (seg->Type) -+ { -+ case ELAN3_VPSEG_P2P: -+ scap = &seg->SegCapability; -+ -+ if (cap == NULL || ELAN_CAP_MATCH (scap, cap)) -+ { -+ PRINTF2 (ctxt, DBG_VP, "unloadvp: segment [%x.%x]\n", -+ seg->Process, seg->Process + seg->Entries-1); -+ -+ for (i = 0; i < seg->Entries; i++) -+ InvalidateRoute (dev, ctxt->RouteTable, seg->Process+i); -+ } -+ break; -+ -+ case ELAN3_VPSEG_BROADCAST: -+ for (i = 0; i < seg->Entries; i++) -+ { -+ ELAN3_VPSEG *aseg = FindSegment (ctxt, seg->SegLowProc, seg->SegHighProc); -+ -+ if (aseg != NULL && ELAN_CAP_MATCH(&aseg->SegCapability, cap)) -+ { -+ PRINTF1 (ctxt, DBG_VP, "unloadvp: broadcast vp %d\n", seg->Process); -+ -+ InvalidateRoute (dev, ctxt->RouteTable, seg->Process+i); -+ } -+ } -+ } -+ } -+} -+ -+caddr_t -+CapabilityString (ELAN_CAPABILITY *cap) -+{ -+#define CAPSTR_LEN 200 -+#define NCAPSTRS 4 -+ static char space[CAPSTR_LEN*NCAPSTRS]; -+ static int bufnum; -+ static spinlock_t lock; -+ static int lockinitialised; -+ int num; -+ unsigned long flags; -+ -+ if (! lockinitialised) -+ { -+ spin_lock_init (&lock); -+ lockinitialised = 1; -+ } -+ -+ spin_lock_irqsave (&lock, flags); -+ -+ if ((num = ++bufnum) == NCAPSTRS) -+ num = bufnum = 0; -+ spin_unlock_irqrestore (&lock, flags); -+ -+ sprintf (space + (num * CAPSTR_LEN), "%4x %4x %4x %4x %4x %4x %4x [%x.%x.%x.%x]", cap->cap_type, -+ cap->cap_lownode, cap->cap_highnode, -+ cap->cap_lowcontext, cap->cap_mycontext, cap->cap_highcontext, ELAN_CAP_ENTRIES(cap), -+ cap->cap_userkey.key_values[0], cap->cap_userkey.key_values[1], -+ cap->cap_userkey.key_values[2], cap->cap_userkey.key_values[3]); -+ -+ return (space + (num * CAPSTR_LEN)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/debug.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/debug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/debug.c 2005-07-28 14:52:52.819683144 -0400 -@@ -0,0 +1,94 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.16 2004/07/07 11:22:33 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/debug.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+unsigned elan4_debug = 0; -+unsigned elan4_debug_toconsole = 0; -+unsigned elan4_debug_tobuffer = DBG_ALL; -+ -+unsigned elan4_debug_display_ctxt; -+unsigned elan4_debug_ignore_ctxt; -+unsigned elan4_debug_ignore_type; -+ -+void -+elan4_debug_init() -+{ -+ if ((elan4_debug & elan4_debug_tobuffer) != 0) -+ qsnet_debug_alloc(); -+} -+ -+void -+elan4_debug_fini() -+{ -+} -+ -+void -+elan4_debugf (void *type, int mode, char *fmt,...) -+{ -+ char prefix[128]; -+ int where = 0; -+ va_list ap; -+ -+ if ((mode & elan4_debug_tobuffer) != 0 || type == DBG_BUFFER) -+ where |= QSNET_DEBUG_BUFFER; -+ if ((mode & elan4_debug_toconsole) != 0 || type == DBG_CONSOLE) -+ where |= QSNET_DEBUG_CONSOLE; -+ -+ if (where == 0) -+ return; -+ -+ if ((unsigned long) type > DBG_NTYPES) -+ { -+ ELAN4_CTXT *ctxt = (ELAN4_CTXT *) type; -+ -+ if (elan4_debug_display_ctxt && ctxt->ctxt_num != elan4_debug_display_ctxt) -+ return; -+ if (elan4_debug_ignore_ctxt && ctxt->ctxt_num == elan4_debug_ignore_ctxt) -+ return; -+ -+ sprintf (prefix, "[%08ld.%04d] elan4 (%03x) ", lbolt, current->pid, ctxt->ctxt_num); -+ } -+ else if ((unsigned long) type == (int) DBG_CONSOLE) -+ prefix[0] = '\0'; -+ else -+ { -+ char *what; -+ -+ if (elan4_debug_ignore_type & (1 << ((unsigned long) type))) -+ return; -+ -+ switch ((unsigned long) type) -+ { -+ case (int) DBG_DEVICE: what = "dev"; break; -+ case (int) DBG_USER: what = "usr"; break; -+ default: what = NULL; break; -+ } -+ -+ if (what) -+ sprintf (prefix, "[%08ld.%04d] elan4 [%s] ", lbolt, current->pid, what); -+ else -+ sprintf (prefix, "[%08ld.%04d] elan4 [%3d] ", lbolt, current->pid, (int)(long)type); -+ } -+ -+ va_start(ap,fmt); -+ qsnet_vdebugf (where, prefix, fmt, ap); -+ va_end (ap); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/device.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/device.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/device.c 2005-07-28 14:52:52.824682384 -0400 -@@ -0,0 +1,2916 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.c,v 1.87.6.11 2005/03/18 13:48:53 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+/* allow this code to compile against an Eagle elanmod */ -+#ifdef __ELANMOD_DEVICE_H -+#define ELAN_DEV_OPS ELANMOD_DEV_OPS -+#define ELAN_DEV_OPS_VERSION ELANMOD_DEV_OPS_VERSION -+#define elan_dev_register elanmod_dev_register -+#define elan_dev_deregister elanmod_dev_deregister -+#endif -+ -+/* XXXX configurational defines */ -+ -+#if defined (CONFIG_MPSAS) -+#define HASH_0_SIZE_VAL (12 + 6) -+#define HASH_1_SIZE_VAL (2 + 6) -+#define CTXT_TABLE_SHIFT 8 -+#define LN2_MAX_CQS 8 /* 256 */ -+#else -+#define HASH_0_SIZE_VAL (13 + 6) -+#define HASH_1_SIZE_VAL (2 + 6) -+#define CTXT_TABLE_SHIFT 12 -+#define LN2_MAX_CQS 10 /* 1024 */ -+#endif -+ -+unsigned int elan4_hash_0_size_val = HASH_0_SIZE_VAL; -+unsigned int elan4_hash_1_size_val = HASH_1_SIZE_VAL; -+unsigned int elan4_ctxt_table_shift = CTXT_TABLE_SHIFT; -+unsigned int elan4_ln2_max_cqs = LN2_MAX_CQS; -+unsigned int elan4_dmaq_highpri_size = 2; /* 8192 entries */ -+unsigned int elan4_threadq_highpri_size = 1; /* 1024 entries */ -+unsigned int elan4_dmaq_lowpri_size = 2; /* 8192 entries */ -+unsigned int elan4_threadq_lowpri_size = 1; /* 1024 entries */ -+unsigned int elan4_interruptq_size = 0; /* 1024 entries */ -+unsigned int elan4_mainint_punt_loops = 1; -+unsigned int elan4_mainint_resched_ticks = 0; -+unsigned int elan4_linkport_lock = 0xbe0fcafe; /* default link port lock */ -+unsigned int elan4_eccerr_recheck = 1; -+ -+static int -+elan4_op_get_position (void *arg, ELAN_POSITION *ptr) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *)arg; -+ ELAN_POSITION pos; -+ -+ elan4_get_position (dev, &pos); -+ -+ return copyout (&pos, ptr, sizeof (ELAN_POSITION)); -+} -+ -+static int -+elan4_op_set_position (void *arg, unsigned short nodeid, unsigned short numnodes) -+{ -+ /* XXXXX -+ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ compute_position (&pos, nodeid, numnode, num_down_links_value); -+ -+ return elan4_set_position (dev, pos); -+ */ -+ return EINVAL; -+} -+ -+ELAN_DEV_OPS elan4_dev_ops = -+{ -+ elan4_op_get_position, -+ elan4_op_set_position, -+ -+ ELAN_DEV_OPS_VERSION -+}; -+ -+static E4_uint32 -+elan4_read_filter (ELAN4_DEV *dev, unsigned networkctx) -+{ -+ return (elan4_sdram_readl (dev, dev->dev_ctxtable + (networkctx * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, Filter))); -+} -+ -+static void -+elan4_write_filter (ELAN4_DEV *dev, unsigned networkctx, E4_uint32 value) -+{ -+ elan4_sdram_writel (dev, (dev->dev_ctxtable + (networkctx * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, Filter)), value); -+ pioflush_sdram(dev); -+} -+ -+void -+elan4_set_schedstatus (ELAN4_DEV *dev, E4_uint32 intreg) -+{ -+ E4_uint32 setbits = 0; -+ E4_uint32 intmask = 0; -+ E4_uint32 haltmask; -+ E4_uint32 next_sched; -+ E4_uint32 next_intmask; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intmask_lock, flags); -+ -+ haltmask = (dev->dev_haltop_mask | dev->dev_haltop_active); -+ -+ if ((haltmask & INT_DProcHalted) || dev->dev_halt_all_count || dev->dev_halt_dproc_count) -+ setbits |= SCH_DProcHalt; -+ -+ if ((haltmask & INT_TProcHalted) || dev->dev_halt_all_count || dev->dev_halt_tproc_count) -+ setbits |= SCH_TProcHalt; -+ -+ if ((haltmask & INT_CProcHalted) || dev->dev_halt_all_count || dev->dev_halt_cproc_count) -+ setbits |= SCH_CProcHalt; -+ -+ if ((haltmask & INT_DiscardingLowPri) || dev->dev_discard_all_count || dev->dev_discard_lowpri_count) -+ setbits |= SCH_DiscardLowPriInput; -+ -+ if ((haltmask & INT_DiscardingHighPri) || dev->dev_discard_all_count || dev->dev_discard_highpri_count) -+ setbits |= SCH_DiscardHighPriInput; -+ -+ if (dev->dev_halt_lowpri_count) -+ setbits |= SCH_StopLowPriQueues; -+ -+ if (haltmask & INT_DProcHalted) intmask |= INT_DProcHalted; -+ if (haltmask & INT_TProcHalted) intmask |= INT_TProcHalted; -+ if (haltmask & INT_CProcHalted) intmask |= INT_CProcHalted; -+ if (haltmask & INT_DiscardingLowPri) intmask |= INT_DiscardingLowPri; -+ if (haltmask & INT_DiscardingHighPri) intmask |= INT_DiscardingHighPri; -+ -+ next_intmask = (dev->dev_intmask & ~(INT_Halted | INT_Discarding)) | (intmask & ~intreg); -+ next_sched = (dev->dev_schedstatus & ~(SCH_Halt | SCH_Discard)) | setbits; -+ -+ PRINTF5 (DBG_DEVICE, DBG_REGISTER, "elan4_set_schedstatus: haltmask=%x setbits=%x intmask=%x next_sched=%x next_intmask=%x\n", -+ haltmask, setbits, intmask, next_sched, next_intmask); -+ -+ CHANGE_INT_MASK (dev, next_intmask); -+ CHANGE_SCHED_STATUS (dev, next_sched); -+ -+ spin_unlock_irqrestore (&dev->dev_intmask_lock, flags); -+} -+ -+void -+elan4_queue_haltop (ELAN4_DEV *dev, ELAN4_HALTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ /* add to the end of the halt operations list */ -+ list_add_tail (&op->op_link, &dev->dev_haltop_list); -+ -+ if ((dev->dev_haltop_mask & op->op_mask) != op->op_mask) -+ { -+ dev->dev_haltop_mask |= op->op_mask; -+ -+ elan4_set_schedstatus (dev, 0); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+void -+elan4_queue_intop (ELAN4_DEV *dev, ELAN4_CQ *cq, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ -+ op->op_cookie = INTOP_ONESHOT | ((dev->dev_intop_cookie++) & INTOP_VALUE_MASK); -+ -+ list_add_tail (&op->op_link, &dev->dev_intop_list); -+ -+ writeq ((op->op_cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD, cq->cq_mapping); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+void -+elan4_register_intop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ -+ op->op_cookie = INTOP_PERSISTENT | ((dev->dev_intop_cookie++) & INTOP_VALUE_MASK); -+ -+ list_add_tail (&op->op_link, &dev->dev_intop_list); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+void -+elan4_deregister_intop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ list_del (&op->op_link); -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+} -+ -+static __inline__ void -+__issue_dma_flushop_cmd (ELAN4_CQ *cq) -+{ -+ writeq (DMA_ShMemWrite | RUN_DMA_CMD, cq->cq_mapping); -+ writeq (0 /* cookie */, cq->cq_mapping); -+ writeq (0 /* vproc */, cq->cq_mapping); -+ writeq (0 /* srcAddr */, cq->cq_mapping); -+ writeq (0 /* dstAddr */, cq->cq_mapping); -+ writeq (0 /* srcEvent */, cq->cq_mapping); -+ writeq (0 /* dstEvent */, cq->cq_mapping); -+ writeq (SET_EVENT_CMD, cq->cq_mapping); -+} -+ -+static void -+handle_dma_flushops_intop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned int hipri = ((unsigned long) arg & 1); -+ E4_uint64 status = dev->dev_dma_flushop[hipri].status; -+ ELAN4_CQ *cq = dev->dev_dma_flushop[hipri].cq; -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ E4_uint32 completedPtr = CQ_CompletedPtr(queuePtrs); -+ E4_uint32 size = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ unsigned long flags; -+ -+ /* -+ * Since we're called from a main interrupt which was issued through the approriate -+ * flushcq the command queue descriptor for dma flushing can no longer be in the -+ * insert cache, nor can it be in the extractor (as it's trapped), hence it is -+ * safe to modify the completed pointer -+ */ -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ ASSERT (status != 0); -+ -+ /* skip over either the DMA/SETEVENT or just the SETEVENT depending on the trap type */ -+ if (CPROC_TrapType (status) == CommandProcDmaQueueOverflow) -+ completedPtr = (completedPtr & ~(size-1)) | ((completedPtr + 64) & (size - 1)); -+ else -+ completedPtr = (completedPtr & ~(size-1)) | ((completedPtr + 8) & (size - 1)); -+ -+ elan4_sdram_writel (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs) + 4, -+ ((queuePtrs >> 32) & ~CQ_PtrOffsetMask) | (completedPtr & CQ_PtrOffsetMask)); -+ -+ elan4_restartcq (dev, dev->dev_dma_flushop[hipri].cq); -+ -+ if (! list_empty (&dev->dev_dma_flushop[hipri].list)) -+ __issue_dma_flushop_cmd (dev->dev_dma_flushop[hipri].cq); -+ -+ dev->dev_dma_flushop[hipri].status = 0; -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+} -+ -+static void -+handle_dma_flushops (ELAN4_DEV *dev, E4_uint64 status, int cqnum) -+{ -+ unsigned int hipri = (cqnum == elan4_cq2num(dev->dev_dma_flushop[1].cq) ? 1 : 0); -+ ELAN4_CQ *cq = dev->dev_dma_flushop[hipri].cq; -+ ELAN4_CQ *flushq = dev->dev_flush_cq[elan4_cq2num(cq) & (COMMAND_INSERTER_CACHE_ENTRIES-1)]; -+ struct list_head *ops; -+ unsigned long flags; -+ int qfull,count; -+ E4_uint64 queuePtrs; -+ LIST_HEAD(list); -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ ASSERT (cqnum == elan4_cq2num (dev->dev_dma_flushop[hipri].cq)); -+ ASSERT (! list_empty (&dev->dev_dma_flushop[hipri].list)); -+ ASSERT (dev->dev_dma_flushop[hipri].status == 0); -+ -+ /* remove the whole list */ -+ ops = dev->dev_dma_flushop[hipri].list.next; -+ -+ list_del_init (&dev->dev_dma_flushop[hipri].list); -+ -+ /* and add it to our local list */ -+ list_add_tail (&list, ops); -+ -+ /* now determine whether the queue was full - since it cannot be empty -+ * then if the front and back pointers are the same then it is full */ -+ queuePtrs = hipri ? read_reg64 (dev, DProcHighPriPtrs) : read_reg64 (dev, DProcLowPriPtrs); -+ qfull = (E4_QueueFrontPointer (queuePtrs) == E4_QueueBackPointer (queuePtrs)); -+ -+ if (CPROC_TrapType(status) == CommandProcDmaQueueOverflow && !qfull) -+ printk (" ******* queue overflow trap - but queue not full\n"); -+ -+ if (qfull && CPROC_TrapType(status) != CommandProcDmaQueueOverflow) -+ printk (" ****** queue full - but not overflow trap : %llx %llx %x\n", -+ read_reg64 (dev, DProcLowPriPtrs), read_reg64 (dev, DProcHighPriPtrs), CPROC_TrapType(status)); -+ -+ /* Store the status register, this also indicates that the intop is pending */ -+ dev->dev_dma_flushop[hipri].status = status; -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ /* Issue a main interrupt command to the approriate flush command queue, -+ * which will then safely update the completed pointer to skip over the -+ * command which has trapped, also prevent any new commands to be issued -+ * to the command queue. -+ */ -+ dev->dev_dma_flushop[hipri].intop.op_function = handle_dma_flushops_intop; -+ dev->dev_dma_flushop[hipri].intop.op_arg = (void *) (unsigned long) hipri; -+ -+ elan4_queue_intop (dev, flushq, &dev->dev_dma_flushop[hipri].intop); -+ -+ /* now execute all operations */ -+ for (count = 0; ! list_empty (&list); count++) -+ { -+ ELAN4_DMA_FLUSHOP *op = list_entry (list.next, ELAN4_DMA_FLUSHOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ (*op->op_function) (dev, op->op_arg, qfull); -+ } -+ -+ /* finally release the "reasons" for halting */ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ if ((dev->dev_halt_dproc_count -= count) == 0) -+ elan4_set_schedstatus (dev, 0); -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ return; -+} -+ -+void -+elan4_queue_dma_flushop (ELAN4_DEV *dev, ELAN4_DMA_FLUSHOP *op, int hipri) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ if (dev->dev_halt_dproc_count++ == 0) /* ensure that the DMA processor cannot */ -+ elan4_set_schedstatus (dev, 0); /* execute the DMA we issue. */ -+ -+ if (list_empty (&dev->dev_dma_flushop[hipri].list) && dev->dev_dma_flushop[hipri].status == 0) -+ __issue_dma_flushop_cmd (dev->dev_dma_flushop[hipri].cq); -+ -+ list_add_tail (&op->op_link, &dev->dev_dma_flushop[hipri].list); -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+static void -+enable_elan_errors (void *arg) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ ENABLE_INT_MASK (dev, INT_ErrorInterrupts); -+} -+ -+#define ERROR_DISABLE_PERIOD (hz/2) -+#define ERROR_SAMPLE_PERIOD (hz/10) -+#define ERROR_LIMIT (100) -+ -+static __inline__ void -+check_error_rate (ELAN4_DEV *dev) -+{ -+ if (dev->dev_error_time == (lbolt/ERROR_SAMPLE_PERIOD)) -+ { -+ if (++dev->dev_errors_per_period >= ERROR_LIMIT && (dev->dev_intmask & INT_ErrorInterrupts)) -+ { -+ DISABLE_INT_MASK (dev, INT_ErrorInterrupts); -+ -+ schedule_timer_fn (&dev->dev_error_timeoutid, enable_elan_errors, (void *) dev, ERROR_DISABLE_PERIOD); -+ } -+ } -+ else -+ { -+ dev->dev_error_time = (lbolt/ERROR_SAMPLE_PERIOD); -+ dev->dev_errors_per_period = 0; -+ } -+} -+ -+static __inline__ int -+handle_mainints (ELAN4_DEV *dev, int nticks, int nintr) -+{ -+ E4_uint32 nfptr = dev->dev_interruptq_nfptr; -+ E4_uint32 bptr = read_reg32 (dev, MainIntQueuePtrs.s.Back); -+ E4_uint32 qsize = E4_QueueSize(elan4_interruptq_size); -+ E4_uint32 qmask = qsize - 1; -+ long tlim = lbolt + nticks; -+ int done = 0; -+ unsigned long flags; -+ -+ do { -+ int todo = ((bptr - nfptr) & qmask) / E4_MainIntEntrySize; -+ -+ ASSERT (todo > 0); -+ -+ PRINTF4 (DBG_DEVICE, DBG_MAININT, "handle_mainints: fptr %x nfptr %x bptr %x : %d todo\n", -+ read_reg32 (dev, MainIntQueuePtrs.s.Front), nfptr, bptr, todo); -+ -+ if (nintr >= 0 && (done + todo) > nintr) /* punt because too may to do in interrupt */ -+ { -+ PRINTF4 (DBG_DEVICE, DBG_MAININT, "handle_mainints: punting (done %d todo %d) (bptr %x fptr %x)\n", -+ done, todo, bptr, read_reg32 (dev, MainIntQueuePtrs.s.Front)); -+ -+ return 1; -+ } -+ -+ BucketDevStat (dev, s_mainints, todo, MainIntBuckets); -+ -+ /* consume all the entries in the queue which we think are there */ -+ do { -+ E4_uint64 value = elan4_sdram_readq (dev, nfptr); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, E4_MAIN_INT_CTX (value)); -+ E4_uint32 fptr = nfptr; -+ -+ PRINTF2 (DBG_DEVICE, DBG_MAININT, "handle_mainints: process cookie %llx - write fptr=%x\n", value, nfptr); -+ -+ if (ctxt == NULL) -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "handle_mainints: context %d invalid\n", E4_MAIN_INT_CTX (value)); -+ else -+ ctxt->ctxt_ops->op_interrupt (ctxt, E4_MAIN_INT_COOKIE(value)); -+ -+ /* compute the next queue front pointer, before updating the front pointer -+ * since we need to ensure that elan4_queue_mainintop doesn't see the queue -+ * as being empty if an extra interrupt is queued in between */ -+ dev->dev_interruptq_nfptr = nfptr = (nfptr & ~qmask) | ((nfptr + sizeof (E4_uint64)) & qmask); -+ -+ /* update the queue front pointer, doing this will clear the -+ * interrupt for *all* interrupt cookies which have previously -+ * been added to the queue */ -+ write_reg32 (dev, MainIntQueuePtrs.s.Front, E4_QueueFrontValue (fptr, elan4_interruptq_size)); -+ pioflush_reg (dev); -+ } while (bptr != nfptr); -+ -+ /* re-sample the back pointer and if it's different from the previous -+ * queue front pointer, then the queue has something on it again */ -+ done += todo; -+ -+ if ((nticks > 0 && ((int) (lbolt - tlim)) > 0)) /* been executing for too long in thread */ -+ return 1; -+ -+ bptr = read_reg32 (dev, MainIntQueuePtrs.s.Back); -+ -+ PRINTF3 (DBG_DEVICE, DBG_MAININT, "handle_mainints: resample : fptr %x nfptr %x bptr %x\n", -+ read_reg32 (dev, MainIntQueuePtrs.s.Front), nfptr, bptr); -+ -+ /* at this point we've made some space in the interrupt queue, -+ * so check to see if we've got anything to restart */ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ while (! list_empty (&dev->dev_interruptq_list)) -+ { -+ ELAN4_INTOP *op = list_entry (dev->dev_interruptq_list.next, ELAN4_INTOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ op->op_function (dev, op->op_arg); -+ } -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ } while (bptr != nfptr); -+ -+ return 0; -+} -+ -+static void -+elan4_mainint_thread (ELAN4_DEV *dev) -+{ -+ unsigned long flags; -+ -+ kernel_thread_init ("elan4_mainint"); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ for (;;) -+ { -+ if (dev->dev_stop_threads) -+ break; -+ -+ if (! (dev->dev_intmask & INT_MainInterrupt)) -+ { -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ if (handle_mainints (dev, elan4_mainint_resched_ticks, -1)) -+ BumpDevStat (dev, s_mainint_rescheds); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ ENABLE_INT_MASK (dev, INT_MainInterrupt); -+ } -+ -+ kcondvar_wait (&dev->dev_mainint_wait, &dev->dev_mainint_lock, &flags); -+ } -+ -+ dev->dev_mainint_stopped = 1; -+ kcondvar_wakeupall (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ kernel_thread_exit(); -+} -+ -+void -+elan4_queue_mainintop (ELAN4_DEV *dev, ELAN4_INTOP *op) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ if (dev->dev_interruptq_nfptr == read_reg32 (dev, MainIntQueuePtrs.s.Back)) -+ op->op_function (dev, op->op_arg); -+ else -+ list_add_tail (&op->op_link, &dev->dev_interruptq_list); -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+} -+ -+static __inline__ E4_uint32 -+handle_cproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint32 cqptr = read_reg32 (dev, CommandControl.CommandQueueDescsBase) & E4_QueueDescPtrMask; -+ unsigned cqnum = ((cqptr - dev->dev_cqaddr) / sizeof (E4_CommandQueueDesc)); -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 control = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ E4_uint64 status = read_reg64 (dev, CProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, CQ_Context (control)); -+ -+ PRINTF4 (DBG_DEVICE, DBG_INTR, "handle_cproc_trap: cqnum=%d status=%016llx control=%016llx TrapType\n", -+ cqnum, status, control, CPROC_TrapType (status)); -+ PRINTF4 (DBG_DEVICE, DBG_INTR, " %016llx %016llx %016llx %016llx\n", -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers)), -+ elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control))); -+ -+ BumpDevStat (dev, s_cproc_traps); -+ -+ ctxt->ctxt_ops->op_cproc_trap (ctxt, status, cqnum); -+ -+ return (CPROC_TrapType (status) == CommandProcWaitTrap ? SCH_RestartCProc | SCH_RestartEProc : SCH_RestartCProc); -+} -+ -+static __inline__ E4_uint32 -+handle_dproc_trap (ELAN4_DEV *dev, int unit) -+{ -+ E4_uint64 status = (unit == 0) ? read_reg64 (dev, DProc0Status) : read_reg64 (dev, DProc1Status); -+ E4_uint32 restart = (unit == 0) ? SCH_RestartDma0Proc : SCH_RestartDma1Proc; -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, DPROC_Context (status)); -+ -+ PRINTF3 (DBG_DEVICE, DBG_INTR, "handle_dproc_trap: unit %d context %d%s\n", unit, DPROC_Context(status), -+ DPROC_PrefetcherFault(status) ? " (prefetcher)" : ""); -+ -+ if (DPROC_PrefetcherFault (status)) -+ restart |= SCH_RestartDmaPrefetchProc; -+ -+ BumpDevStat (dev, s_dproc_traps); -+ -+ ctxt->ctxt_ops->op_dproc_trap (ctxt, status, unit); -+ -+ return (restart); -+} -+ -+static __inline__ E4_uint32 -+handle_eproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, EProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, EPROC_Context (status)); -+ -+ BumpDevStat (dev, s_eproc_traps); -+ -+ ctxt->ctxt_ops->op_eproc_trap (ctxt, status); -+ -+ return (SCH_RestartEProc); -+} -+ -+static __inline__ E4_uint32 -+handle_tproc_trap (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, TProcStatus); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, TPROC_Context (status)); -+ -+ BumpDevStat (dev, s_tproc_traps); -+ -+ ctxt->ctxt_ops->op_tproc_trap (ctxt, status); -+ -+ return (SCH_RestartTProc); -+} -+ -+static __inline__ void -+handle_haltints (ELAN4_DEV *dev, E4_uint32 intreg) -+{ -+ struct list_head list = LIST_HEAD_INIT(list); -+ E4_uint32 mask = 0; -+ E4_uint32 active = 0; -+ struct list_head *entry; -+ struct list_head *next; -+ unsigned long flags; -+ -+ BumpDevStat (dev, s_haltints); -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ -+ list_for_each_safe (entry, next, &dev->dev_haltop_list) { -+ ELAN4_HALTOP *op = list_entry (entry, ELAN4_HALTOP, op_link); -+ -+ PRINTF (DBG_DEVICE, DBG_INTR, "handle_haltints: op=%p op_mask=%x intreg=%x\n", op, op->op_mask, intreg); -+ -+ if ((op->op_mask & intreg) != op->op_mask) -+ mask |= op->op_mask; -+ else -+ { -+ list_del (&op->op_link); /* remove from list */ -+ list_add_tail (&op->op_link, &list); /* add to local list */ -+ -+ active |= op->op_mask; -+ } -+ } -+ -+ ASSERT (dev->dev_haltop_mask == (mask | active)); -+ -+ dev->dev_haltop_mask = mask; -+ -+ if (list_empty (&list)) -+ elan4_set_schedstatus (dev, intreg); -+ else -+ { -+ dev->dev_haltop_active = active; -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ ELAN4_HALTOP *op = list_entry (list.next, ELAN4_HALTOP, op_link); -+ -+ list_del (&op->op_link); -+ -+ (*op->op_function) (dev, op->op_arg); -+ } -+ -+ spin_lock_irqsave (&dev->dev_haltop_lock, flags); -+ dev->dev_haltop_active = 0; -+ -+ elan4_set_schedstatus (dev, 0); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_haltop_lock, flags); -+} -+ -+static __inline__ E4_uint32 -+handle_iproc_trap (ELAN4_DEV *dev, unsigned unit) -+{ -+ sdramaddr_t hdroff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrHeader[0][unit]); -+ E4_uint64 status = elan4_sdram_readq (dev, hdroff + offsetof (E4_IprocTrapHeader, IProcStatusCntxAndTrType)); -+ E4_uint32 filter = elan4_read_filter (dev, IPROC_NetworkContext (status)); -+ ELAN4_CTXT *ctxt = elan4_localctxt (dev, filter & E4_FILTER_CONTEXT_MASK); -+ -+ /* -+ * The context is not valid in the following case : -+ * ack not been sent AND bad CRC/bad length. -+ * -+ * NOTE TransCRCStatus and BadLength only valid if NOT an EopTrap. -+ */ -+ ASSERT ((IPROC_GoodAckSent (status) & (1 << IPROC_InputterChan (status))) || IPROC_EOPTrap (status) || -+ (IPROC_TransCRCStatus (status) == CRC_STATUS_GOOD && !IPROC_BadLength (status))); -+ -+ BumpDevStat (dev, s_iproc_traps); -+ -+ ctxt->ctxt_ops->op_iproc_trap (ctxt, status, unit); -+ -+ return (SCH_RestartCh0LowPriInput << unit); -+} -+ -+void -+handle_pcimemerr (ELAN4_DEV *dev) -+{ -+ elan4_pcierror (dev); -+ -+ check_error_rate (dev); -+} -+ -+void -+handle_sdramint (ELAN4_DEV *dev) -+{ -+ E4_uint64 status = read_reg64 (dev, SDRamECCStatus); -+ E4_uint64 ConfigRegValue = read_reg64 (dev, SDRamConfigReg); -+ char errstr[200]; -+ int i; -+ int Found = 0; -+ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "handle_sdramint\n"); -+ -+ printk ("elan%d: ECC Error %s status=%llx\n", -+ dev->dev_instance, elan4_sdramerr2str (dev, status, ConfigRegValue, errstr), status); -+ -+ if (!ECC_UncorrectableErr(status) && !ECC_MultUncorrectErrs(status)) -+ printk ("elan%d: ECC error data=%016llx\n", dev->dev_instance, elan4_sdram_readq (dev, ECC_Addr(status))); -+ -+ if (ECC_CorrectableErr (status)) -+ BumpDevStat (dev, s_correctable_errors); -+ if (ECC_MultCorrectErrs (status)) -+ BumpDevStat (dev, s_multiple_errors); -+ -+ if (ECC_UncorrectableErr(status)) -+ panic ("elan%d: uncorrectable ECC error\n", dev->dev_instance); -+ if (ECC_MultUncorrectErrs(status)) -+ panic ("elan%d: muliple uncorrectable ECC error\n", dev->dev_instance); -+ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_SDRAM_ERROR); -+ -+ /* -+ * Now try to test for a read/write error type. -+ * This can only be done if it was a correctable error as an uncorrectable error might lockup the node. -+ * It should not be attempted if the data is in the dcache because fetching again would not generate an -+ * error even if the problem was a read, and flushing the cache line would fix a write probelm. -+ * Reading the same location again should cause a new error if the problem was caused by a bad write. -+ */ -+ if (elan4_eccerr_recheck && -+ (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA) && -+ ECC_CorrectableErr(status) && !ECC_UncorrectableErr(status)) -+ { -+ E4_uint64 status2; -+ E4_uint64 Addr = ECC_Addr(status) & ~(E4_CACHELINE_SIZE-1); -+ E4_uint32 SetIndex = (Addr >> 6) & ~(E4_NumCacheLines-1); -+ int InCache = 0; -+ -+ /* check the cache tags to see if the data has been read into a cache line. */ -+ for (i=0; idev_regs + offsetof(E4_Registers, Tags.Tags[i][SetIndex].Value)) & 0x7fffe000) == (Addr & 0x7fffe000)) -+ { -+ InCache = 1; -+ break; -+ } -+ -+ if (InCache == 0) -+ { -+ printk ("elan%d: checking if ECC error was read or write\n", dev->dev_instance); -+ -+ /* Now read and throw away the answer. A read of a word will schedule a block read of sdram */ -+ elan4_sdram_readq (dev, Addr); -+ status2 = read_reg64 (dev, SDRamECCStatus); -+ if ((Addr == (ECC_Addr(status2) & ~(E4_CACHELINE_SIZE-1))) && ECC_CorrectableErr(status2)) // Write error. -+ { -+ status = (status & ~0x0030000000000000ULL) | 0x0010000000000000ULL; -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_SDRAM_ERROR); -+ } -+ else -+ status = (status & ~0x0030000000000000ULL) | 0x0020000000000000ULL; -+ } -+ else -+ status = status | 0x0030000000000000ULL; -+ } -+ else -+ status &= ~0x0030000000000000ULL; -+ -+ /* search for this error already being logged */ -+ for (i = sizeof (dev->dev_sdramerrs)/sizeof (dev->dev_sdramerrs[0]) - 1; i >= 0; i--) -+ if ((dev->dev_sdramerrs[i].EccStatus == status) && (dev->dev_sdramerrs[i].ConfigReg == ConfigRegValue)) -+ { -+ Found = 1; -+ dev->dev_sdramerrs[i].ErrorCount += 1; // Keep a count. -+ break; -+ } -+ -+ /* stash the status for /proc */ -+ if (!Found) -+ { -+ for (i = sizeof (dev->dev_sdramerrs)/sizeof (dev->dev_sdramerrs[0]) - 1; i > 0; i--) -+ dev->dev_sdramerrs[i] = dev->dev_sdramerrs[i-1]; -+ dev->dev_sdramerrs[0].EccStatus = status; -+ dev->dev_sdramerrs[0].ConfigReg = ConfigRegValue; -+ dev->dev_sdramerrs[0].ErrorCount = 1; // First error -+ } -+ -+ check_error_rate (dev); -+} -+ -+static void -+clear_linkerr_led (void *arg) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) arg; -+ -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_ClearLinkError); -+} -+ -+void -+handle_linkerror (ELAN4_DEV *dev) -+{ -+ E4_uint32 LinkState; -+ E4_uint32 CurrState = read_reg32 (dev, LinkControlReg); -+ -+ /* Set for reading errors. */ -+ write_reg32 (dev, LinkControlReg, -+ (CurrState = CurrState & ~((LCONT_TEST_CONTROL_MASK << LCONT_TEST_CONTROL_SHIFT) | -+ (LCONT_TEST_VALUE_MASK << LCONT_TEST_VALUE_SHIFT)))); -+ LinkState = LCONT_LINK_STATE(CurrState = read_reg32 (dev, LinkControlReg)); -+ -+#ifdef DEBUG -+ { -+ E4_uint8 ErrorMsg[256], DataErrorVal[64]; -+ -+ strcpy (ErrorMsg, "handle_linkerror:"); -+ if (LinkState & LS_LockError) strcat (ErrorMsg, " LockError"); -+ if (LinkState & LS_DeskewError) strcat (ErrorMsg, " DeskewError"); -+ if (LinkState & LS_PhaseError) strcat (ErrorMsg, " PhaseError"); -+ if (LinkState & LS_DataError) -+ { -+ E4_uint32 error[4]; -+ E4_uint32 i; -+ strcat (ErrorMsg, " DataError"); -+ /* Errors */ -+ for(i = LRS_ErrorVal8to0; i <= LRS_ErrorVal35to27; i++) -+ { -+ write_reg32 (dev, LinkControlReg, -+ CurrState | LCONT_TEST_VALUE(i) | (LCONT_READ_STATE << LCONT_TEST_CONTROL_SHIFT)); -+ error[i - LRS_ErrorVal8to0] = LCONT_LINK_STATE(read_reg32 (dev, LinkControlReg)); -+ } -+ sprintf (DataErrorVal, " Link State Error Val: %09llx %03x %03x %03x %03x", -+ (unsigned long long) ((error[0] & 0x1ffUL) | ((error[1] & 0x1ffUL) << 9) | -+ ((error[2] & 0x1ffUL) << 18) | ((error[3] & 0x1ffUL) << 27)), -+ error[3], error[2], error[1], error[0]); -+ strcat (ErrorMsg, DataErrorVal); -+ } -+ if (LinkState & LS_FifoOvFlow0) strcat (ErrorMsg, " FifoOvFlow0"); -+ if (LinkState & LS_FifoOvFlow1) strcat (ErrorMsg, " FifoOvFlow1"); -+ if (LinkState & LS_Mod45Changed) strcat (ErrorMsg, " Mod45Changed"); -+ if (LinkState & LS_PAckNotSeenError) strcat (ErrorMsg, " PAckNotSeenError"); -+ strcat (ErrorMsg, "\n"); -+ PRINTF0 (DBG_DEVICE, DBG_INTR, ErrorMsg); -+ } -+#endif -+ -+ BumpDevStat (dev, s_link_errors); -+ -+ if (LinkState & LS_LockError) BumpDevStat (dev, s_lock_errors); -+ if (LinkState & LS_DeskewError) BumpDevStat (dev, s_deskew_errors); -+ if (LinkState & LS_PhaseError) BumpDevStat (dev, s_phase_errors); -+ if (LinkState & LS_DataError) BumpDevStat (dev, s_data_errors); -+ if (LinkState & LS_FifoOvFlow0) BumpDevStat (dev, s_fifo_overflow0); -+ if (LinkState & LS_FifoOvFlow1) BumpDevStat (dev, s_fifo_overflow1); -+ if (LinkState & LS_Mod45Changed) BumpDevStat (dev, s_mod45changed); -+ if (LinkState & LS_PAckNotSeenError) BumpDevStat (dev, s_pack_not_seen); -+ -+ PULSE_SCHED_RESTART (dev, SCH_ClearLinkErrorInt); -+ -+ /* schedule a timer to clear the link error LED, so that it stays on -+ * for a second for every link error that occurs */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && !timer_fn_queued (&dev->dev_linkerr_timeoutid)) -+ schedule_timer_fn (&dev->dev_linkerr_timeoutid, clear_linkerr_led, (void *) dev, HZ); -+ -+ /* -+ * Signal the link error to the switch by -+ * enabling the INT_LinkPortKeyFail bit. -+ * Always clear the error bit as the switch -+ * might have produced a spurious "ack" ... -+ */ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_LINKPORT_INT); -+ -+ if (dev->dev_linkerr_signalled == 0) -+ dev->dev_linkerr_signalled = 1; -+ else -+ dev->dev_linkerr_signalled = 2; -+ -+ ENABLE_INT_MASK (dev, INT_LinkPortKeyFail); -+ -+ check_error_rate (dev); -+} -+ -+void -+handle_linkportkeyfail (ELAN4_DEV *dev) -+{ -+ PRINTF0 (DBG_DEVICE, DBG_INTR, "handle_linkportkeyfail\n"); -+ -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_LINKPORT_INT); -+ -+ if (! dev->dev_linkerr_signalled) -+ { -+ /* Hmmm - they're not playing ball */ -+ BumpDevStat (dev, s_linkport_keyfail); -+ -+ DISABLE_INT_MASK (dev, INT_LinkPortKeyFail); -+ } -+ else -+ { -+ /* If more link errors have occured since we -+ * signalled the error, then leave it signalled. */ -+ if (--dev->dev_linkerr_signalled == 0) -+ DISABLE_INT_MASK (dev, INT_LinkPortKeyFail); -+ } -+} -+ -+ -+static __inline__ void -+__elan4_4msi0 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ unsigned long flags; -+ -+ if (intreg & intmask & INT_MainInterrupt) -+ { -+ DISABLE_INT_MASK (dev, INT_MainInterrupt); -+ -+ if (handle_mainints (dev, -1, elan4_mainint_punt_loops) == 0) -+ ENABLE_INT_MASK (dev, INT_MainInterrupt); -+ else -+ { -+ BumpDevStat (dev, s_mainint_punts); -+ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ kcondvar_wakeupone (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ } -+ } -+} -+ -+static __inline__ void -+__elan4_4msi1 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ E4_uint32 restart = 0; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi1: %x\n", intreg); -+ -+ spin_lock (&dev->dev_trap_lock); -+ -+ if (intreg & intmask & INT_CProc) -+ restart |= handle_cproc_trap (dev); -+ if (intreg & intmask & INT_EProc) -+ restart |= handle_eproc_trap (dev); -+ if (intreg & intmask & INT_Dma0Proc) -+ restart |= handle_dproc_trap (dev, 0); -+ if (intreg & intmask & INT_Dma1Proc) -+ restart |= handle_dproc_trap (dev, 1); -+ if (intreg & intmask & INT_TProc) -+ restart |= handle_tproc_trap (dev); -+ -+ PULSE_SCHED_RESTART (dev, restart); -+ -+ spin_unlock (&dev->dev_trap_lock); -+ -+ if (intreg & (INT_Halted|INT_Discarding)) -+ handle_haltints (dev, intreg); -+} -+ -+static __inline__ void -+__elan4_4msi2 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ E4_uint32 restart = 0; -+ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi2: %x\n", intreg); -+ -+ spin_lock (&dev->dev_trap_lock); -+ if (intreg & intmask & INT_IProcCh0LowPri) -+ restart |= handle_iproc_trap (dev, 0); -+ -+ if (intreg & intmask & INT_IProcCh1LowPri) -+ restart |= handle_iproc_trap (dev, 1); -+ -+ if (intreg & intmask & INT_IProcCh0HighPri) -+ restart |= handle_iproc_trap (dev, 2); -+ -+ if (intreg & intmask & INT_IProcCh1HighPri) -+ restart |= handle_iproc_trap (dev, 3); -+ -+ PULSE_SCHED_RESTART (dev, restart); -+ -+ spin_unlock (&dev->dev_trap_lock); -+} -+ -+static __inline__ void -+__elan4_4msi3 (ELAN4_DEV *dev, E4_uint32 intreg, E4_uint32 intmask) -+{ -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "__elan4_4msi3: %x\n", intreg); -+ -+ if (intreg & intmask & INT_PciMemErr) -+ handle_pcimemerr (dev); -+ -+ if (intreg & intmask & INT_SDRamInt) -+ handle_sdramint (dev); -+ -+ if (intreg & intmask & INT_LinkError) -+ handle_linkerror (dev); -+ -+ if (intreg & intmask & INT_LinkPortKeyFail) -+ handle_linkportkeyfail (dev); -+} -+ -+int -+elan4_1msi0 (ELAN4_DEV *dev) -+{ -+ E4_uint32 intmask = dev->dev_intmask; -+ E4_uint32 intreg; -+ -+ if (intmask == 0 || ((intreg = read_reg32 (dev, InterruptReg)) & intmask) == 0) -+ return (0); -+ -+ BumpDevStat (dev, s_interrupts); -+ -+ do { -+ PRINTF1 (DBG_DEVICE, DBG_INTR, "elan4_1msi0: %x\n", intreg); -+ -+ if (intreg & intmask & INT_MSI0) -+ __elan4_4msi0(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI1) -+ __elan4_4msi1(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI2) -+ __elan4_4msi2(dev, intreg, intmask); -+ if (intreg & intmask & INT_MSI3) -+ __elan4_4msi3(dev, intreg, intmask); -+ -+ if (intreg & INT_LinkPortKeyFail) -+ handle_linkportkeyfail (dev); -+ -+ /* must ensure that the read of the interrupt mask -+ * completes before the read of the interrupt register -+ * since the main interrupt thread clears it's interrupt -+ * and then re-enables it in the interrupt mask. */ -+ intmask = dev->dev_intmask; -+ mb(); -+ intreg = read_reg32 (dev, InterruptReg); -+ -+ } while ((intreg & intmask) != 0); -+ -+ return (1); -+} -+ -+/* local context management */ -+int -+elan4_insertctxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt, ELAN4_TRAP_OPS *ops) -+{ -+ unsigned long flags; -+ int tbl; -+ -+ ctxt->ctxt_dev = dev; -+ ctxt->ctxt_ops = ops; -+ -+ INIT_LIST_HEAD (&ctxt->ctxt_cqalist); -+ spin_lock_init (&ctxt->ctxt_mmulock); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ KMEM_ZALLOC (ctxt->ctxt_mmuhash[tbl], ELAN4_HASH_ENTRY **, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *), 1); -+ -+ if (ctxt->ctxt_mmuhash[tbl] == NULL) -+ { -+ if (tbl != 0) -+ KMEM_FREE (ctxt->ctxt_mmuhash[0], dev->dev_hashsize[0] * sizeof (ELAN4_HASH_ENTRY *)); -+ spin_lock_destroy (&ctxt->ctxt_mmulock); -+ return (-ENOMEM); -+ } -+ } -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ if ((ctxt->ctxt_num = bt_freebit (dev->dev_ctxmap, (1 << dev->dev_ctxtableshift))) >= 0) -+ { -+ /* chain onto the lists of all contexts */ -+ list_add (&ctxt->ctxt_link, &dev->dev_ctxt_list); -+ -+ BT_SET (dev->dev_ctxmap, ctxt->ctxt_num); -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return (ctxt->ctxt_num < 0 ? -ENOMEM : 0); -+} -+ -+void -+elan4_removectxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt) -+{ -+ unsigned long flags; -+ int tbl; -+ -+ /* remove from list of contexts */ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ list_del (&ctxt->ctxt_link); -+ -+ BT_CLEAR (dev->dev_ctxmap, ctxt->ctxt_num); -+ -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ spin_lock_destroy (&ctxt->ctxt_info_lock); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ KMEM_FREE (ctxt->ctxt_mmuhash[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *)); -+ -+ spin_lock_destroy (&ctxt->ctxt_mmulock); -+} -+ -+ELAN4_CTXT * -+elan4_localctxt (ELAN4_DEV *dev, unsigned num) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ list_for_each (entry, &dev->dev_ctxt_list) { -+ ELAN4_CTXT *ctxt = list_entry (entry, ELAN4_CTXT, ctxt_link); -+ -+ if (ctxt->ctxt_num == num) -+ { -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ return (ctxt); -+ } -+ } -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return ((ELAN4_CTXT *) NULL); -+} -+ -+ELAN4_CTXT * -+elan4_networkctxt (ELAN4_DEV *dev, unsigned num) -+{ -+ E4_uint32 filter = elan4_read_filter (dev, num); -+ -+ if ((filter & E4_FILTER_CONTEXT_MASK) == INVALID_CONTEXT) -+ return NULL; -+ else -+ return elan4_localctxt (dev, filter & E4_FILTER_CONTEXT_MASK); -+} -+ -+/* network context management */ -+int -+elan4_attach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int res = 0; -+ E4_uint32 filter; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_ctxt_lock, flags); -+ -+ filter = elan4_read_filter (dev, ctxnum); -+ if ((filter & E4_FILTER_CONTEXT_MASK) != INVALID_CONTEXT) -+ { -+ PRINTF2 (ctxt, DBG_NETWORK_CTX, "elan4_attach_filter: ctx=%d filter=%x -> EBUSY\n", ctxnum, filter); -+ res = -EBUSY; -+ } -+ else -+ { -+ PRINTF1 (ctxt, DBG_NETWORK_CTX, "elan4_attach_filter: ctx=%d - SUCCESS\n", ctxnum); -+ -+ elan4_write_filter (dev, ctxnum, ctxt->ctxt_num | E4_FILTER_DISCARD_ALL); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+ } -+ spin_unlock_irqrestore (&dev->dev_ctxt_lock, flags); -+ -+ return (res); -+} -+ -+void -+elan4_detach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ PRINTF1 (ctxt, DBG_NETWORK_CTX, "elan4_detach_filter: detach from network context %d\n", ctxnum); -+ -+ elan4_write_filter (dev, ctxnum, INVALID_CONTEXT | E4_FILTER_DISCARD_ALL); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+} -+ -+void -+elan4_set_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum, E4_uint32 state) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ PRINTF6 (ctxt, DBG_NETWORK_CTX, "elan4_set_filter: set filter state %x for network context %d <%s%s%s%s>\n", state, ctxnum, -+ (state & E4_FILTER_DISCARD_ALL) ? "discard," : "", -+ (state & E4_FILTER_ACKOK_ALL) ? "ack-ok," : "", -+ (state & E4_FILTER_HIGH_PRI) ? "high-pri," : "", -+ (state & E4_FILTER_STATS) ? "stats," : ""); -+ -+ elan4_write_filter (dev, ctxnum, ctxt->ctxt_num | state); -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush); -+} -+ -+void -+elan4_set_routetable (ELAN4_CTXT *ctxt, ELAN4_ROUTE_TABLE *tbl) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_uint32 value = tbl ? (E4_VPT_VALID | E4_VPT_VALUE(tbl->tbl_entries, tbl->tbl_size)) : 0; -+ -+ /* and insert into the vp table */ -+ elan4_sdram_writel (dev, (dev->dev_ctxtable + (ctxt->ctxt_num * sizeof (E4_ContextControlBlock)) + -+ offsetof (E4_ContextControlBlock, VirtualProcessTable)), value); -+ pioflush_sdram(dev); -+ -+ PULSE_SYSCONTROL (dev, CONT_ROUTE_FLUSH); -+} -+ -+/* command queue management */ -+ELAN4_CQA * -+elan4_getcqa (ELAN4_CTXT *ctxt, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ struct list_head *el; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each (el, &ctxt->ctxt_cqalist) { -+ ELAN4_CQA *cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_idx == idx) -+ { -+ cqa->cqa_ref++; -+ -+ spin_unlock (&dev->dev_cqlock); -+ return cqa; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ return NULL; -+} -+ -+void -+elan4_putcqa (ELAN4_CTXT *ctxt, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ struct list_head *el, *nel; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each_safe (el, nel, &ctxt->ctxt_cqalist) { -+ ELAN4_CQA *cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_idx == idx) -+ { -+ if (--cqa->cqa_ref || bt_lowbit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA) != -1) -+ spin_unlock (&dev->dev_cqlock); -+ else -+ { -+ list_del (&cqa->cqa_link); -+ -+ BT_CLEAR (ctxt->ctxt_cqamap, cqa->cqa_idx); -+ BT_CLEAR (dev->dev_cqamap, cqa->cqa_cqnum/ELAN4_CQ_PER_CQA); -+ spin_unlock (&dev->dev_cqlock); -+ -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ } -+ return; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ -+ printk ("elan4_putcqa: idx %d not found\n", idx); -+ BUG(); -+} -+ -+static ELAN4_CQ * -+elan4_getcq (ELAN4_CTXT *ctxt, unsigned int type) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQA *cqa; -+ struct list_head *el; -+ int cidx, didx; -+ -+ spin_lock (&dev->dev_cqlock); -+ list_for_each (el, &ctxt->ctxt_cqalist) { -+ cqa = list_entry (el, ELAN4_CQA, cqa_link); -+ -+ if (cqa->cqa_type == type && (cidx = bt_freebit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA)) >=0) -+ { -+ BT_SET (cqa->cqa_bitmap, cidx); -+ -+ spin_unlock (&dev->dev_cqlock); -+ return &cqa->cqa_cq[cidx]; -+ } -+ } -+ spin_unlock (&dev->dev_cqlock); -+ -+ /* allocate a new cqa and it's chunk of command queue descriptors */ -+ KMEM_ZALLOC (cqa, ELAN4_CQA *, sizeof (ELAN4_CQA), 1); -+ if (cqa == NULL) -+ return NULL; -+ -+ spin_lock (&dev->dev_cqlock); -+ cidx = bt_freebit (ctxt->ctxt_cqamap, ELAN4_MAX_CQA); -+ -+ /* On architectures which have MTRR registers for write-combinig -+ * the top command queues from dev->dev_cqreorder upwards are -+ * used for reordered queues. Without MTRR registers any page -+ * sized group can use write combinig through the ptes. */ -+ if (dev->dev_cqreorder == 0) -+ didx = bt_freebit (dev->dev_cqamap, dev->dev_cqcount/ELAN4_CQ_PER_CQA); -+ else -+ { -+ if ((type & CQ_Reorder) != 0) -+ didx = bt_nextbit (dev->dev_cqamap, dev->dev_cqcount/ELAN4_CQ_PER_CQA, (dev->dev_cqreorder/ELAN4_CQ_PER_CQA) - 1, 0); -+ else -+ didx = bt_freebit (dev->dev_cqamap, dev->dev_cqreorder/ELAN4_CQ_PER_CQA); -+ } -+ -+ if (cidx < 0 || didx < 0) -+ { -+ spin_unlock (&dev->dev_cqlock); -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ return NULL; -+ } -+ -+ BT_SET (ctxt->ctxt_cqamap, cidx); -+ BT_SET (dev->dev_cqamap, didx); -+ -+ cqa->cqa_idx = cidx; -+ cqa->cqa_type = type; -+ cqa->cqa_cqnum = (didx * ELAN4_CQ_PER_CQA); -+ -+ list_add_tail (&cqa->cqa_link, &ctxt->ctxt_cqalist); -+ -+ /* initialise the cqa struct */ -+ for (cidx = 0; cidx < ELAN4_CQ_PER_CQA; cidx++) -+ { -+ cqa->cqa_cq[cidx].cq_idx = cidx; -+ cqa->cqa_cq[cidx].cq_cqa = cqa; -+ } -+ -+ /* no mappings yet */ -+ cqa->cqa_ref = 0; -+ -+ /* we're going to return entry zero */ -+ BT_SET (cqa->cqa_bitmap, 0); -+ spin_unlock (&dev->dev_cqlock); -+ -+ return &cqa->cqa_cq[0]; -+} -+ -+static void -+elan4_putcq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQA *cqa = cq->cq_cqa; -+ -+ spin_lock (&dev->dev_cqlock); -+ -+ BT_CLEAR (cqa->cqa_bitmap, cq->cq_idx); -+ -+ if (bt_lowbit (cqa->cqa_bitmap, ELAN4_CQ_PER_CQA) != -1 || cqa->cqa_ref) -+ spin_unlock (&dev->dev_cqlock); -+ else -+ { -+ list_del (&cqa->cqa_link); -+ -+ BT_CLEAR (ctxt->ctxt_cqamap, cqa->cqa_idx); -+ BT_CLEAR (dev->dev_cqamap, cqa->cqa_cqnum/ELAN4_CQ_PER_CQA); -+ spin_unlock (&dev->dev_cqlock); -+ -+ KMEM_FREE (cqa, sizeof (ELAN4_CQA)); -+ } -+} -+ -+ELAN4_CQ * -+elan4_alloccq (ELAN4_CTXT *ctxt, unsigned cqsize, unsigned perm, unsigned cqtype) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CQ *cq; -+ int cqnum; -+ sdramaddr_t cqdesc; -+ unsigned offset; -+ E4_uint64 value; -+ -+ if ((cq = elan4_getcq (ctxt, cqtype)) == NULL) -+ return NULL; -+ -+ cqnum = elan4_cq2num(cq); -+ -+ cq->cq_space = elan4_sdram_alloc (dev, CQ_Size(cqsize)); -+ if (cq->cq_space == (virtaddr_t) 0) -+ { -+ elan4_putcq (ctxt, cq); -+ return (NULL); -+ } -+ -+ cq->cq_size = cqsize; -+ cq->cq_perm = perm; -+ -+ /* and finally initialise the command queue descriptor */ -+ cqdesc = dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)); -+ -+ value = CQ_QueuePtrsValue (cqsize, cq->cq_space, cq->cq_space); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ value |= ((cqtype & CQ_Priority) ? CQ_RevA_Priority : 0); -+ else -+ value |= (((cqtype & CQ_Priority) ? CQ_RevB_Priority : 0) | -+ ((cqtype & CQ_Reorder) ? CQ_RevB_ReorderingQueue : CQ_RevB_32bitWriteQueue)); -+ -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs), value); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue), 0); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers), 0); -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control), CQ_ControlValue (ctxt->ctxt_num, 2, perm)); -+ pioflush_sdram (dev); -+ -+ offset = (cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ cq->cq_mapping = elan4_map_device (dev, ELAN4_BAR_REGISTERS, (offset & ~(PAGE_SIZE-1)), -+ PAGE_SIZE, &cq->cq_handle) + (offset & (PAGE_SIZE-1)); -+#ifdef CONFIG_MPSAS -+ if (ctxt == &dev->dev_ctxt) -+ return (cq); -+#endif -+ -+ elan4_sdram_flushcache (dev, cq->cq_space, CQ_Size(cqsize)); -+ -+ return (cq); -+} -+ -+void -+elan4_freecq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned offset = (elan4_cq2num(cq) + dev->dev_cqoffset) * CQ_CommandMappingSize; -+ -+ elan4_flushcq (dev, cq); -+ -+ elan4_unmap_device (dev, cq->cq_mapping - (offset & (PAGE_SIZE-1)), PAGE_SIZE, &cq->cq_handle); -+ elan4_sdram_free (dev, cq->cq_space, CQ_Size (cq->cq_size)); -+ -+ elan4_putcq (ctxt, cq); -+} -+ -+void -+elan4_restartcq (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ int hipri; -+ unsigned long flags; -+ -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restarting cq %p\n", cq); -+ -+ spin_lock_irqsave (&dev->dev_requeue_lock, flags); -+ -+ while (read_reg32 (dev, CommandControl.CommandRequeuePtr) & E4_CommandRequeueBusy) -+ ; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ hipri = (elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)) & CQ_RevA_Priority) != 0; -+ else -+ hipri = (elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)) & CQ_RevB_Priority) != 0; -+ -+ if (hipri) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restart cq %d as high pri\n", elan4_cq2num(cq)); -+ write_reg32 (dev, CommandControl.CommandRequeuePtr, cqdesc | E4_CommandRequeueHighPri); -+ } -+ else -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CPROC, "restartcq: restart cq %d as low pri\n", elan4_cq2num(cq)); -+ write_reg32 (dev, CommandControl.CommandRequeuePtr, cqdesc); -+ } -+ pioflush_reg (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_requeue_lock, flags); -+} -+ -+static void -+flushcq_intop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ dev->dev_flush_finished |= (1 << (unsigned long) arg); -+ kcondvar_wakeupall (&dev->dev_flush_wait, &dev->dev_flush_lock); -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+void -+elan4_flushcq (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ int flushqnum = elan4_cq2num(cq) & (COMMAND_INSERTER_CACHE_ENTRIES-1); -+ ELAN4_CQ *flushq = dev->dev_flush_cq[flushqnum]; -+ unsigned long flags; -+ -+ PRINTF (DBG_DEVICE, DBG_FLUSH, "elan4_flushcq: cqnum=%d\n", elan4_cq2num(cq)); -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ while (! (dev->dev_flush_finished & (1 << flushqnum))) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ dev->dev_flush_finished &= ~(1 << flushqnum); -+ -+ dev->dev_flush_op[flushqnum].op_function = flushcq_intop; -+ dev->dev_flush_op[flushqnum].op_arg = (void *) (unsigned long) flushqnum; -+ -+ elan4_queue_intop (dev, flushq, &dev->dev_flush_op[flushqnum]); -+ -+ while (! (dev->dev_flush_finished & (1 << flushqnum))) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_updatecq (ELAN4_DEV *dev, ELAN4_CQ *cq, unsigned perm, unsigned restart) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint32 control = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ -+ /* Write the command queues control word, but ensure that the ChannelNotCompleted fields -+ * are not modified. We use this to just alter the RestartCount/Permissions fields */ -+ -+ elan4_sdram_writel (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control), -+ CQ_ControlValue (CQ_Context (control), restart ? restart : CQ_RestartCount (control), perm)); -+} -+ -+/* instruction cache flush */ -+static __inline__ void -+elan4_flush_icache_locked (ELAN4_DEV *dev) -+{ -+ int i, j; -+ -+ PRINTF0 (DBG_DEVICE, DBG_FLUSH, "elan4_flush_icache_locked: flushing icache\n"); -+ -+ for (i = 0; i < (E4_ICacheLines/E4_ICachePortSize); i++) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, i << E4_ICacheTagAddrShift); -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], E4_InvalidTagValue); -+ } -+ -+ /* -+ * Initialise the top of the ICache Set0 with a instruction which will -+ * cause a know trap fingerprint so that the application can identify it -+ * and ignore the trap. -+ */ -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_ICacheFixupOffset | E4_AccessICacheRams); -+ -+ /* Errata 24: must ensure that the DCache is flushed after loading -+ * code for the thread processor. */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ pioflush_reg (dev); -+} -+ -+static void -+device_iflush_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ elan4_flush_icache_locked (dev); -+ -+ dev->dev_iflush_queued = 0; -+ -+ kcondvar_wakeupall (&dev->dev_flush_wait, &dev->dev_flush_lock); -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_flush_icache_halted (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ elan4_flush_icache_locked (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+void -+elan4_flush_icache (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_flush_lock, flags); -+ -+ PRINTF1 (DBG_DEVICE, DBG_FLUSH, "elan4_flush_icache: queued=%d\n", dev->dev_iflush_queued); -+ -+ if (! dev->dev_iflush_queued) -+ { -+ dev->dev_iflush_queued = 1; -+ -+ elan4_queue_haltop (dev, &dev->dev_iflush_haltop); -+ } -+ -+ while (dev->dev_iflush_queued) -+ kcondvar_wait (&dev->dev_flush_wait, &dev->dev_flush_lock, &flags); -+ -+ spin_unlock_irqrestore (&dev->dev_flush_lock, flags); -+} -+ -+/* device context operations */ -+static void -+device_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_CPROC_TRAP *trap = &dev->dev_cproc_trap; -+ -+ elan4_extract_cproc_trap (dev, status, trap, cqnum); -+ -+ DBGCMD (DBG_DEVICE, DBG_FLUSH, elan4_display_cproc_trap (DBG_DEVICE, DBG_FLUSH, "device_cproc_trap", trap)); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcInterruptQueueOverflow: -+ PRINTF (ctxt, DBG_FLUSH, "device_cproc_trap: cqnum=%d\n", cqnum); -+ -+ /* XXXX: we could either just hit restart (and hope) - or we could extract -+ * the event interrupt cookie out and "complete" the command before -+ * restarting it */ -+ elan4_restartcq (dev, dev->dev_flush_cq[cqnum]); -+ return; -+ -+ case CommandProcDmaQueueOverflow: -+ case CommandProcPermissionTrap: -+ handle_dma_flushops (dev, status, cqnum); -+ return; -+ -+ default: -+ printk ("device_cproc_trap: status=%llx control=%llx TrapType=%x cqnum=%d\n", (long long) trap->tr_status, -+ elan4_sdram_readq (dev, dev->dev_cqaddr + cqnum * sizeof (E4_CommandQueueDesc) + -+ offsetof (E4_CommandQueueDesc, CQ_Control)), -+ (int) CPROC_TrapType(trap->tr_status), cqnum); -+ panic ("device_cproc_trap"); -+ } -+} -+ -+static void -+device_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ ELAN4_TPROC_TRAP trap; -+ -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, &trap); -+ -+ elan4_display_tproc_trap (DBG_CONSOLE, DBG_TRAP, "device_tproc_trap", &trap); -+ panic ("device_tproc_trap"); -+} -+ -+static void -+device_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ ELAN4_DPROC_TRAP trap; -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, &trap, unit); -+ -+ elan4_display_dproc_trap (DBG_CONSOLE, DBG_TRAP, "device_dproc_trap", &trap); -+ panic ("device_dproc_trap"); -+} -+ -+static void -+device_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) ctxt; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ PRINTF (ctxt, DBG_FLUSH, "device_interrupt: cookie=%llx\n", cookie); -+ -+ spin_lock_irqsave (&dev->dev_intop_lock, flags); -+ list_for_each_safe (el, nel, &dev->dev_intop_list) { -+ ELAN4_INTOP *op = list_entry (el, ELAN4_INTOP, op_link); -+ -+ if (op->op_cookie == cookie) -+ { -+ if ((op->op_cookie & INTOP_TYPE_MASK) == INTOP_ONESHOT) -+ list_del (&op->op_link); -+ -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+ -+ (*op->op_function)(dev, op->op_arg); -+ return; -+ } -+ } -+ spin_unlock_irqrestore (&dev->dev_intop_lock, flags); -+ -+ panic ("device_interrupt: interrupt cookie %llx not found\n", cookie); -+} -+ -+static void -+device_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_IPROC_TRAP *trap = &dev->dev_iproc_trap; -+ -+ elan4_extract_iproc_trap (dev, status, trap, unit); -+ elan4_inspect_iproc_trap (trap); -+ -+ DBGCMD (ctxt, DBG_IPROC, elan4_display_iproc_trap (ctxt, DBG_IPROC, "device_iproc_trap", trap)); -+ -+ if (elan4_neterr_iproc_trap (dev, trap)) -+ return; -+ -+ elan4_display_iproc_trap (DBG_CONSOLE, DBG_TRAP, "device_iproc_trap", trap); -+ panic ("device_iproc_trap: unexpected trap\n"); -+} -+ -+ELAN4_TRAP_OPS device_trap_ops = -+{ -+ NULL, -+ device_cproc_trap, -+ device_dproc_trap, -+ device_tproc_trap, -+ device_iproc_trap, -+ device_interrupt, -+}; -+ -+/* -+ * elan4_initialise_device -+ * initialise the ELAN4_DEV struct - spinlocks,cvs etc. -+ * map the registers, sdram etc -+ */ -+int -+elan4_initialise_device (ELAN4_DEV *dev) -+{ -+ int i, bit; -+ -+ if (elan4_mainint_resched_ticks == 0) -+ elan4_mainint_resched_ticks = (hz/4); -+ -+ /* map the registers */ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ dev->dev_regs = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVA_REG_OFFSET, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ -+ dev->dev_rom = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVA_EBUS_OFFSET + ELAN4_REVA_EBUS_ROM_OFFSET, -+ ELAN4_REVA_EBUS_ROM_SIZE, &dev->dev_rom_handle); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ dev->dev_regs = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVB_REG_OFFSET, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ dev->dev_rom = (ioaddr_t) 0; -+ dev->dev_i2c = elan4_map_device (dev, ELAN4_BAR_REGISTERS, ELAN4_REVB_I2C_OFFSET, ELAN4_REVB_I2C_SIZE, &dev->dev_i2c_handle); -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ /* XXXX: parse the ebus rom to determine the sdram configuration */ -+ { -+ extern long long sdram_cfg; -+ -+ if (sdram_cfg == 0) -+ dev->dev_sdram_cfg = SDRAM_STARTUP_VALUE; -+ else -+ dev->dev_sdram_cfg = sdram_cfg; -+ } -+ -+ for (bit = 0; ((1 << bit) & elan4_resource_len (dev, ELAN4_BAR_SDRAM)) == 0; bit++) -+ ; -+ -+ switch ((dev->dev_sdram_cfg >> SDRAM_RamSize_SH) & 3) -+ { -+ case 0: /* 64Mbit, 128Mbit, 256Mbit, 512Mbit or 1Gbit (16-bit output) */ -+ dev->dev_sdram_numbanks = 4; bit -= 2; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = (i << bit); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 1: /* 64Mbit, 128Mbit, 256Mbit or 512Mbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 4; bit -= 2; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = ((i & 2) << (bit)) | ((i & 1) << (bit-1)); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 2: /* 2Gbit (16-bit output) or 1Gbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 2; bit--; -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ dev->dev_sdram_banks[i].b_base = (i << bit); -+ dev->dev_sdram_banks[i].b_size = (1 << bit); -+ } -+ break; -+ -+ case 3: /* 4Gbit (16-bit output) or 2Gbit (8-bit output) */ -+ dev->dev_sdram_numbanks = 1; -+ dev->dev_sdram_banks[0].b_base = 0; -+ dev->dev_sdram_banks[0].b_size = (1 << bit); -+ break; -+ } -+ -+ elan4_sdram_init (dev); -+ -+ /* initialise locks for classes of interrupts */ -+ spin_lock_init (&dev->dev_trap_lock); -+ spin_lock_init (&dev->dev_intop_lock); -+ spin_lock_init (&dev->dev_haltop_lock); -+ spin_lock_init (&dev->dev_mainint_lock); -+ -+ /* initialise other locks */ -+ spin_lock_init (&dev->dev_i2c_lock); -+ -+ spin_lock_init (&dev->dev_mmulock); -+ spin_lock_init (&dev->dev_cqlock); -+ spin_lock_init (&dev->dev_ctxlock); -+ -+ spin_lock_init (&dev->dev_intmask_lock); -+ spin_lock_init (&dev->dev_syscontrol_lock); -+ -+ spin_lock_init (&dev->dev_ctxt_lock); -+ spin_lock_init (&dev->dev_flush_lock); -+ spin_lock_init (&dev->dev_requeue_lock); -+ -+ kmutex_init (&dev->dev_lock); -+ -+ kcondvar_init (&dev->dev_mainint_wait); -+ kcondvar_init (&dev->dev_flush_wait); -+ -+ /* initialsie lists */ -+ INIT_LIST_HEAD (&dev->dev_ctxt_list); -+ INIT_LIST_HEAD (&dev->dev_intop_list); -+ INIT_LIST_HEAD (&dev->dev_interruptq_list); -+ INIT_LIST_HEAD (&dev->dev_hc_list); -+ INIT_LIST_HEAD (&dev->dev_haltop_list); -+ INIT_LIST_HEAD (&dev->dev_dma_flushop[0].list); -+ INIT_LIST_HEAD (&dev->dev_dma_flushop[1].list); -+ -+ dev->dev_state = ELAN4_STATE_STOPPED; -+ -+ return (0); -+} -+ -+void -+elan4_finalise_device (ELAN4_DEV *dev) -+{ -+ kcondvar_destroy (&dev->dev_flush_wait); -+ kcondvar_destroy (&dev->dev_mainint_wait); -+ -+ kmutex_destroy (&dev->dev_lock); -+ -+ spin_lock_destroy (&dev->dev_requeue_lock); -+ spin_lock_destroy (&dev->dev_flush_lock); -+ spin_lock_destroy (&dev->dev_ctxt_lock); -+ -+ spin_lock_destroy (&dev->dev_syscontrol_lock); -+ spin_lock_destroy (&dev->dev_intmask_lock); -+ -+ spin_lock_destroy (&dev->dev_ctxlock); -+ spin_lock_destroy (&dev->dev_cqlock); -+ spin_lock_destroy (&dev->dev_mmulock); -+ -+ spin_lock_destroy (&dev->dev_i2c_lock); -+ -+ spin_lock_destroy (&dev->dev_mainint_lock); -+ spin_lock_destroy (&dev->dev_haltop_lock); -+ spin_lock_destroy (&dev->dev_intop_lock); -+ spin_lock_destroy (&dev->dev_trap_lock); -+ -+ while (! list_empty (&dev->dev_hc_list)) -+ { -+ ELAN4_HASH_CHUNK *hc = list_entry (dev->dev_hc_list.next, ELAN4_HASH_CHUNK, hc_link); -+ -+ list_del (&hc->hc_link); -+ -+ KMEM_FREE(hc, sizeof (ELAN4_HASH_CHUNK)); -+ } -+ -+ elan4_sdram_fini (dev); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ elan4_unmap_device (dev, dev->dev_rom, ELAN4_REVA_EBUS_ROM_SIZE, &dev->dev_rom_handle); -+ elan4_unmap_device (dev, dev->dev_regs, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ break; -+ case PCI_REVISION_ID_ELAN4_REVB: -+ elan4_unmap_device (dev, dev->dev_i2c, ELAN4_REVB_I2C_SIZE, &dev->dev_i2c_handle); -+ elan4_unmap_device (dev, dev->dev_regs, ELAN4_REG_SIZE, &dev->dev_regs_handle); -+ break; -+ } -+} -+ -+static int -+measure_sysclk (ELAN4_DEV *dev) -+{ -+ E4_uint64 val0, val1; -+ E4_uint32 ticks, ns; -+ -+ write_ureg64 (dev, StatCont, STP_SYS_CLOCK_RATE0); -+ -+ val0 = read_ureg64 (dev, StatCounts[0]); -+ udelay (1000); -+ val1 = read_ureg64 (dev, StatCounts[0]); -+ -+ -+ ticks = ((val1 >> 32) - (val0 >> 32)); -+ ns = ((val1 & 0xffffffff) - (val0 & 0xffffffff)); -+ -+ return (ticks / (ns / 1000)); -+} -+ -+static void -+initialise_cache (ELAN4_DEV *dev) -+{ -+ register int set, line; -+ -+ /* Initialise the cache to "map" the bottom of sdram - we will use -+ * this space for cache flushing, so require the cache to be set -+ * up so that cachelines for this are in the correct set. -+ * -+ * XXXX: for MPSAS we set bit 28, to ensure that any access to -+ * sdram causes the line to be filled first to expunge any -+ * Xs. */ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], (((E4_uint64) set) << 29) | (1 << 28) | (line << 16)); -+} -+ -+#ifndef CONFIG_MPSAS -+static void -+initialise_cache_tags (ELAN4_DEV *dev, unsigned addr) -+{ -+ register int set, line; -+ -+ /* Initialise the whole cache to hold sdram at "addr" as direct mapped */ -+ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], addr | (set << 13) | (1 << 11)); -+} -+ -+static void -+initialise_ecc (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ register int i, addr; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ { -+ initialise_cache_tags (dev, E4_CacheSize); -+ for (addr = 0; addr < bank->b_size; addr += E4_CacheSize) -+ { -+ for (i = 0; i < E4_CacheSize; i += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr | i, bank->b_ioaddr + addr + i); -+ initialise_cache_tags (dev, addr); -+ } -+ } -+ else -+ { -+ /* Write the whole of this bank of sdram. */ -+ for (addr = 0; addr < bank->b_size; addr += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr, bank->b_ioaddr + addr); -+ -+ /* Now flush out the top out of the cache */ -+ for (addr = 0; addr < E4_CacheSize; addr += sizeof (E4_uint64)) -+ writeq (0xbeec000000000000ull | addr, bank->b_ioaddr + addr); -+ -+ /* Now read the top value of sdram to guarantee the write has occured before the ecc is enabled */ -+ __elan4_readq (dev, bank->b_ioaddr + bank->b_size - sizeof (E4_uint64)); -+ } -+} -+#endif -+ -+#ifdef CONFIG_MPSAS -+static void -+do_initdma (ELAN4_DEV *dev) -+{ -+#define VIRTUAL_ADDRESS 0x10000000ull -+ ELAN4_CQ *cq = dev->dev_flush_cq[0]; -+ E4_uint64 value; -+ E4_uint32 intreg; -+ E4_uint64 status; -+ -+ PRINTF (DBG_DEVICE, DBG_CONFIG, "elan: performing initialising dma\n"); -+ -+ DISABLE_INT_MASK (dev, INT_Dma0Proc | INT_Dma1Proc); -+ -+ /* initialise the context filter */ -+ elan4_attach_filter (&dev->dev_ctxt, 0); -+ -+ /* now issue a DMA - we expect this to trap */ -+ writeq (E4_DMA_TYPE_SIZE (128*4, DMA_DataTypeByte, 0, 0) | RUN_DMA_CMD, cq->cq_mapping + (0 << 3)); -+ writeq (0, cq->cq_mapping + (1 << 3)); -+ writeq (0, cq->cq_mapping + (2 << 3)); -+ writeq (dev->dev_tproc_space, cq->cq_mapping + (3 << 3)); -+ writeq (dev->dev_tproc_space, cq->cq_mapping + (4 << 3)); -+ writeq (0, cq->cq_mapping + (5 << 3)); -+ writeq (0, cq->cq_mapping + (6 << 3)); -+ -+ /* spin waiting for it to trap - then restart the dma processor */ -+ do { -+ value = read_reg64 (dev, IntAndMaskReg); -+ intreg = (value >> E4_INTERRUPT_REG_SHIFT); -+ } while ((intreg & (INT_Dma0Proc | INT_Dma1Proc)) == 0); -+ -+ /* check it trapped for the right reason */ -+ status = (intreg & INT_Dma0Proc) ? read_reg64 (dev, DProc0Status) : read_reg64 (dev, DProc1Status); -+ -+ if (DPROC_PrefetcherFault (status) || (DPROC_TrapType(status) != DmaProcFailCountError && DPROC_TrapType(status) != DmaProcPacketAckError)) -+ { -+ printk ("elan: bad dma trap, status = %lx\n", (long)status); -+ panic ("elan: bad dma trap\n"); -+ } -+ -+ PULSE_SCHED_RESTART (dev, SCH_RestartDma0Proc | SCH_RestartDma1Proc | SCH_RestartDmaPrefetchProc); -+ -+ elan4_detach _filter (&dev->dev_ctxt, 0); -+ -+ ENABLE_INT_MASK (dev, INT_Dma0Proc | INT_Dma1Proc); -+#undef VIRTUAL_ADDRESS -+} -+#endif -+ -+static int -+ebus_read_vpd (ELAN4_DEV *dev, unsigned char *data, unsigned int nob) -+{ -+ unsigned int pci_data_ptr; -+ unsigned int vpd_ptr; -+ register int i; -+ -+ if (read_ebus_rom (dev, 0) != 0x55 || read_ebus_rom (dev, 1) != 0xaa) -+ { -+ printk ("elan%d: invalid rom signature in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ pci_data_ptr = (read_ebus_rom (dev, 0x19) << 8) | read_ebus_rom (dev, 0x18); -+ -+ /* check the pci data structure */ -+ if (read_ebus_rom (dev, pci_data_ptr + 0) != 'P' || -+ read_ebus_rom (dev, pci_data_ptr + 1) != 'C' || -+ read_ebus_rom (dev, pci_data_ptr + 2) != 'I' || -+ read_ebus_rom (dev, pci_data_ptr + 3) != 'R') -+ { -+ printk ("elan%d: invalid pci data structure in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ /* extract the VPD pointer */ -+ vpd_ptr = (read_ebus_rom (dev, pci_data_ptr + 9) << 8) | read_ebus_rom (dev, pci_data_ptr + 8); -+ -+ if (vpd_ptr == 0) -+ { -+ printk ("elan%d: no vital product data in ebus rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ /* read the vpd data */ -+ for (i = 0; i < nob; i++) -+ data[i] = read_ebus_rom (dev, vpd_ptr + i); -+ -+ return 0; -+} -+ -+int -+elan4_read_vpd (ELAN4_DEV *dev, unsigned char *tag, unsigned char *result) -+{ -+ unsigned char vpd[I2C_ELAN_EEPROM_VPD_SIZE]; -+ unsigned char *ptr = vpd; -+ unsigned int finished = 0; -+ unsigned char *lim; -+ unsigned char name[3]; -+ unsigned char value[256]; -+ unsigned char type; -+ unsigned int len, len2; -+ register int i; -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ { -+ if (ebus_read_vpd (dev, vpd, I2C_ELAN_EEPROM_VPD_SIZE) < 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unable to read serial number from EBUS rom\n", dev->dev_instance); -+ return -EINVAL ; -+ } -+ } -+ else -+ { -+ if (i2c_read_rom (dev, I2C_ELAN_EEPROM_VPD_BASEADDR, I2C_ELAN_EEPROM_VPD_SIZE, vpd) < 0) -+ { -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unable to read serial number from I2C rom\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ } -+ -+ result[0] = 0; -+ while (! finished) -+ { -+ type = *ptr++; -+ -+ if (type & LARGE_RESOURCE_BIT) -+ { -+ len = *(ptr++); -+ len += *(ptr++) << 8; -+ -+ switch (type & ~LARGE_RESOURCE_BIT) -+ { -+ case LARGE_RESOURCE_STRING: -+ case LARGE_RESOURCE_VENDOR_DEFINED: -+ ptr += len; -+ break; -+ -+ case LARGE_RESOURCE_VITAL_PRODUCT_DATA: -+ for (lim = ptr + len; ptr < lim; ) -+ { -+ name[0] = *ptr++; -+ name[1] = *ptr++; -+ name[2] = '\0'; -+ len2 = *ptr++; -+ -+ for (i = 0; i < len2 && ptr < lim; i++) -+ value[i] = *ptr++; -+ value[i] = '\0'; -+ -+ PRINTF3 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, %s: $s\n", dev->dev_instance, name, value); -+ -+ if (tag != NULL) -+ { /* looking for just one tag */ -+ if (!strcmp (name, tag)) -+ strcpy(result, value); -+ } -+ else -+ { /* get all tags */ -+ strcat(result,name); -+ strcat(result,": "); -+ strcat(result,value); -+ strcat(result,"\n"); -+ } -+ } -+ break; -+ -+ default: -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unknown large resource %x\n", dev->dev_instance, type); -+ finished = 1; -+ break; -+ } -+ } -+ else -+ { -+ len = type & 0x7; -+ -+ switch (type >> 3) -+ { -+ case SMALL_RESOURCE_COMPATIBLE_DEVICE_ID: -+ ptr += len; -+ break; -+ -+ case SMALL_RESOURCE_VENDOR_DEFINED: -+ ptr += len; -+ break; -+ -+ case SMALL_RESOURCE_END_TAG: -+ finished = 1; -+ break; -+ -+ default: -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, unknown small resource %x\n", dev->dev_instance, type >> 3); -+ finished = 1; -+ break; -+ } -+ } -+ } -+ -+ if ( result[0] == 0 ) { -+ if ( tag != 0 ) -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, failed to find tag %s\n", dev->dev_instance, tag); -+ else -+ PRINTF1 (DBG_DEVICE, DBG_CONFIG, "elan%d: elan4_read_vpd, failed to find any tags\n", dev->dev_instance); -+ return -EINVAL; -+ } -+ -+ return (0); -+} -+ -+int -+elan4_start_device (ELAN4_DEV *dev) -+{ -+ E4_VirtualProcessEntry entry; -+ unsigned pagesizeval[2]; -+ unsigned hashsizeval[2]; -+ register int i, j, tbl, res; -+ unsigned attempts = 0; -+ E4_PAGE_SIZE_TABLE; -+ unsigned char serial[256]; -+ unsigned int sdram_factor = SDRAM_166_DLL_CORRECTION_FACTOR; -+ -+ PRINTF (DBG_DEVICE, DBG_ALL, "elan4_start_device: entered\n"); -+ -+ dev->dev_state = ELAN4_STATE_STARTING; -+ -+ tryagain: -+ /* Initialise the pci config space */ -+ if ((res = elan4_pciinit (dev)) < 0) -+ return (res); -+ -+ /* Display the serial number */ -+ if (elan4_read_vpd (dev, "SN", serial)) -+ printk("elan%d: SN: failed to read\n", dev->dev_instance); -+ else -+ printk("elan%d: SN: %s\n", dev->dev_instance, serial); -+ -+ /* initialise the interrupt mask to zero */ -+ SET_INT_MASK (dev, 0); -+ -+ /* Initialise the device registers */ -+ write_reg64 (dev, TlbLineValue, 0); -+ write_reg64 (dev, SysControlReg, 0); -+ -+ /* Initialise the SDRAM using the configuration value from the ROM */ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg | SDRAM_SETUP); -+ -+ /* Setup the linkport registers */ -+ write_reg64 (dev, LinkPortLock, elan4_linkport_lock); -+ -+ /* Setup the tick rates, start the clock, and init the stats registers */ -+ write_ureg32 (dev, ClockTickRate.s.TickRates, ELAN4_CLOCK_TICK_RATE); -+ write_ureg64 (dev, Clock, 0); -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ for (i = 0; i < 8; i++) -+ write_ureg32 (dev, StatCounts[i].s.StatsCount, 0); -+ -+ /* Initialise the Link Control register - disable the TLB prefetcher on RevB -+ * as it can cause very occasional data corruption. */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ write_reg32 (dev, LinkControlReg, LCONT_EN_SYS_READS | LCONT_REVB_DISABLE_TLB_PREFETCH); -+ else -+ write_reg32 (dev, LinkControlReg, LCONT_EN_SYS_READS); -+ -+ /* Initialise the Link Control Settings to set the PLL Reference Value */ -+ write_reg32 (dev, LinkContSettings, -+ (elan4_mod45disable ? LCONT_MOD45_DISABLE : 0) | -+ (3 << LCONT_CONFIG_PHASE_SHIFT) | -+ ((elan4_pll_div & LCONT_PLL_REF_VAL_BITS_MASK) << LCONT_PLL_REF_VAL_BITS_SHIFT) | -+ (LCONT_VOD_360 << LCONT_LVDS_VOLTAGE_BITS_SHIFT) | -+ (LCONT_TERM_AUTO_OHM << LCONT_LVDS_TERMINATION_SHIFT)); -+ -+ /* Clear the link error LED on RevB and above */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA) -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_ClearLinkError); -+ -+ /* Compute the SysClk frequency and update the PLL if necessary */ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA) -+ { -+ int mhz = measure_sysclk (dev); -+ -+ if (elan4_pll_cfg != 0 || mhz > 190 || mhz < 170) -+ printk ("elan%d: SysClk running at %d Mhz\n", dev->dev_instance, measure_sysclk (dev)); -+ else -+ { -+ sdram_factor = SDRAM_150_DLL_CORRECTION_FACTOR; -+ -+ elan4_updatepll (dev, ECTRL_SYS_CLOCK_RATIO_4_3); -+ -+ printk ("elan%d: SysClk now running at %d Mhz\n", dev->dev_instance, measure_sysclk (dev)); -+ } -+ } -+ -+ initialise_cache (dev); -+ -+ /* Initialise the MMU hash table parameters */ -+ /* Select the largest elan pagesize which is spanned by the -+ * system pagesize for mmu table 0*/ -+ for (i = 0; i < E4_PAGE_SIZE_TABLE_SIZE; i++) -+ if (PageSizeTable[i] > PAGE_SHIFT) -+ break; -+ -+ pagesizeval[0] = i - 1; -+ hashsizeval[0] = elan4_hash_0_size_val; -+ -+ /* Select a suitable elan pagesize to match any "large" page -+ * support that the OS provides. */ -+ pagesizeval[1] = PAGE_SIZE_4M; -+ hashsizeval[1] = elan4_hash_1_size_val; -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ dev->dev_pagesizeval[tbl] = pagesizeval[tbl]; -+ dev->dev_pageshift[tbl] = PageSizeTable[pagesizeval[tbl]]; -+ dev->dev_hashsize[tbl] = (1 << hashsizeval[tbl])/sizeof (E4_HashTableEntry); -+ dev->dev_rsvd_hashmask[tbl] = ((1 << (27 - dev->dev_pageshift[tbl]))-1) & ~((1 << hashsizeval[tbl])-1); -+ dev->dev_rsvd_hashval[tbl] = 0xFFFFFFFF; -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: pageshifts %d,%d\n", dev->dev_pageshift[0], -+ NUM_HASH_TABLES == 2 ? dev->dev_pageshift[1] : 0); -+ -+ /* Initialise the control register to the desired value */ -+ dev->dev_syscontrol = (CONT_EN_ALL_SETS | CONT_MMU_ENABLE | CONT_CACHE_ALL | CONT_2K_NOT_1K_DMA_PACKETS | -+ (pagesizeval[0] << CONT_TABLE0_PAGE_SIZE_SHIFT) | (hashsizeval[0] << CONT_TABLE0_MASK_SIZE_SHIFT)); -+ -+ if (NUM_HASH_TABLES == 2) -+ dev->dev_syscontrol |= CONT_TWO_HASH_TABLES | (pagesizeval[1] << CONT_TABLE1_PAGE_SIZE_SHIFT) | (hashsizeval[1] << CONT_TABLE1_MASK_SIZE_SHIFT); -+ -+ write_reg64 (dev, SysControlReg, dev->dev_syscontrol); -+ -+ /* use direct mapped pci writes during sdram initialisation, since for -+ * cache flushing to work, we need to ensure that the cacheflush page -+ * never gets lines into the incorrect cache set. */ -+ SET_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ elan4_sdram_setup_delay_lines(dev, sdram_factor); -+ -+ for (i = res = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_size) -+ res |= elan4_sdram_init_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ if (! res) -+ { -+ if (dev->dev_devinfo.dev_device_id == PCI_REVISION_ID_ELAN4_REVB && ++attempts < 5) -+ { -+ printk ("elan%d: sdram not working, resetting\n", dev->dev_instance); -+ goto tryagain; -+ } -+ -+ printk ("elan%d: could not find any sdram banks\n", dev->dev_instance); -+ goto failed; -+ } -+ -+#ifndef CONFIG_MPSAS -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: initialising for ECC\n"); -+ -+ for (i = 0 ; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ initialise_ecc (dev, &dev->dev_sdram_banks[i]); -+#endif -+ -+ dev->dev_sdram_initial_ecc_val = read_reg64 (dev, SDRamECCStatus); -+ -+ /* Now enable ECC after we've scrubbed the memory */ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg | SDRAM_ENABLE_ECC); -+ -+ /* clear any errors, and flush the tlb/route cache */ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH | CONT_ROUTE_FLUSH | CONT_CLEAR_LINKPORT_INT | CONT_CLEAR_SDRAM_ERROR); -+ -+ write_ureg32 (dev, InstCount.s.StatsCount, 0); -+ -+ /* Initialise the thread processor's register file */ -+ for (i = 0; i < 64; i++) -+ write_reg64 (dev, TProcRegs[i], 0); -+ -+ /* Initialise the thread processor's ICache tags */ -+ for (i = 0; i < (E4_ICacheLines/E4_ICachePortSize); i++) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, i << E4_ICacheTagAddrShift); -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], E4_InvalidTagValue); -+ } -+ -+ /* -+ * Initialise the ICache with a sethi %hi(addr << 7), %r0 -+ * writing 8 64 bit values per loop of sethi %g0 values ending in 77 for something different?? -+ */ -+ for (i = 0; i < E4_ICacheSizeInBytes; i += (E4_ICachePortSize << 3)) -+ { -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_AccessICacheRams | (i >> 3)); -+ -+ for (j = 0; j < E4_ICachePortSize; j++) -+ write_reg64 (dev, ICachePort[j], -+ (E4_uint64) (((E4_uint64)i << (4+7)) + ((E4_uint64)j << (1+7)) + (0x077)) | -+ (E4_uint64) (((E4_uint64)i << (4+7+32)) + ((E4_uint64)j << (1+7+32)) + (0x0e7)) << 32); -+ } -+ -+ /* -+ * Initialise the top of the ICache Set0 with a instruction which will -+ * cause a know trap fingerprint so that the application can identify it -+ * and ignore the trap. -+ */ -+ write_reg64 (dev, ICachePort_Cntl_Addr, E4_ICacheFixupOffset | E4_AccessICacheRams); -+ for (i = 0; i < E4_ICachePortSize; i++) -+ write_reg64 (dev, ICachePort[i], E4_ICacheFixupInsn | (E4_ICacheFixupInsn << 32)); -+ -+ /* create the buddy allocator for SDRAM */ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ elan4_sdram_add_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ dev->dev_ctxtableshift = elan4_ctxt_table_shift; -+ dev->dev_cqcount = (1 << elan4_ln2_max_cqs); -+ dev->dev_cqreorder = 0; -+ -+ /* allocate the sdram for cache flushing whilst still in direct mapped mode */ -+ dev->dev_cacheflush_space = elan4_sdram_alloc (dev, E4_CacheSize); -+ -+ /* and longer need direct mapped pci writes */ -+ CLEAR_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ /* allocate the hash tables, command queues, context tables etc */ -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: allocating hash tables, command queueus, context tables\n"); -+ -+ dev->dev_comqlowpri = elan4_sdram_alloc (dev, (1 << COMMAND_RUN_QUEUE_BITS)); -+ dev->dev_comqhighpri = elan4_sdram_alloc (dev, (1 << COMMAND_RUN_QUEUE_BITS)); -+ dev->dev_cqaddr = elan4_sdram_alloc (dev, sizeof (E4_CommandQueueDesc) * dev->dev_cqcount); -+ dev->dev_dmaqhighpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_dmaq_highpri_size)); -+ dev->dev_dmaqlowpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_dmaq_lowpri_size)); -+ dev->dev_threadqhighpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_threadq_highpri_size)); -+ dev->dev_threadqlowpri = elan4_sdram_alloc (dev, E4_QueueSize(elan4_threadq_lowpri_size)); -+ dev->dev_interruptq = elan4_sdram_alloc (dev, E4_QueueSize(elan4_interruptq_size)); -+ -+ dev->dev_ctxtable = elan4_sdram_alloc (dev, (1 << dev->dev_ctxtableshift) * sizeof (E4_ContextControlBlock)); -+ dev->dev_faultarea = elan4_sdram_alloc (dev, CUN_Entries * sizeof (E4_FaultSave)); -+ dev->dev_inputtraparea = elan4_sdram_alloc (dev, sizeof (E4_IprocTrapState)); -+ -+ dev->dev_sdrampages[0] = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ dev->dev_sdrampages[1] = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ dev->dev_hashtable[tbl] = elan4_sdram_alloc (dev, dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+#ifndef CONFIG_MPSAS -+ /* Initialise hash tables to invalid (zero) */ -+ elan4_sdram_zeroq_sdram (dev, dev->dev_hashtable[tbl], dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+#endif -+ } -+ -+ /* Initialise all context filters to discard */ -+#ifdef CONFIG_MPSAS -+ if (sas_memset_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, dev->dev_ctxtable, -+ E4_FILTER_DISCARD_ALL, (1 << (dev->dev_ctxtableshift-1))) < 0) -+ { -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i++) -+ elan4_write_filter (dev, i, E4_FILTER_DISCARD_ALL); -+ } -+#else -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i++) -+ elan4_write_filter (dev, i, E4_FILTER_DISCARD_ALL); -+#endif -+ -+ PRINTF4 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: hashtables %x,%x, %x,%x\n", dev->dev_hashtable[0], -+ dev->dev_hashsize[0], dev->dev_hashtable[1], dev->dev_hashsize[1]); -+ -+ /* install the hash table pointers */ -+ PRINTF0 (DBG_DEVICE, DBG_CONFIG, "elan4_start_device: initialise registers with table addresses\n"); -+ write_reg64 (dev, MmuTableBasePtrs, (((E4_uint64) dev->dev_hashtable[0]) | ((E4_uint64) dev->dev_hashtable[1]) << 32)); -+ write_reg64 (dev, MmuFaultAndRootCntxPtr, (((E4_uint64) dev->dev_ctxtableshift) | -+ ((E4_uint64) dev->dev_ctxtable) | -+ ((E4_uint64) dev->dev_faultarea) << 32)); -+ write_reg64 (dev, InputTrapAndFilter, (((E4_uint64) dev->dev_ctxtableshift) | -+ ((E4_uint64) dev->dev_ctxtable) | -+ ((E4_uint64) dev->dev_inputtraparea) << 32)); -+ /* -+ * The run ptrs have this format: (Front << 32) | Back -+ * The base for both the front and back is uses the high bits of the back pointer. -+ * So writting just the base value is good enough. -+ */ -+ write_reg64 (dev, CommandLowPriRunPtrs, dev->dev_comqlowpri); -+ write_reg64 (dev, CommandHighPriRunPtrs, dev->dev_comqhighpri); -+ -+ /* Initialise the run queues */ -+ write_reg64 (dev, DProcHighPriPtrs, E4_QueueValue (dev->dev_dmaqhighpri, elan4_dmaq_highpri_size)); -+ write_reg64 (dev, DProcLowPriPtrs, E4_QueueValue (dev->dev_dmaqlowpri, elan4_dmaq_lowpri_size)); -+ write_reg64 (dev, TProcHighPriPtrs, E4_QueueValue (dev->dev_threadqhighpri, elan4_threadq_highpri_size)); -+ write_reg64 (dev, TProcLowPriPtrs, E4_QueueValue (dev->dev_threadqlowpri, elan4_threadq_lowpri_size)); -+ -+ /* Initialise the interrupt queue as "empty" - this is actually with one entry on it */ -+ write_reg64 (dev, MainIntQueuePtrs.Value, (((E4_uint64) E4_QueueFrontValue (dev->dev_interruptq, elan4_interruptq_size) << 32) | -+ ((E4_uint64) E4_QueueBackPointer(dev->dev_interruptq + E4_MainIntEntrySize)))); -+ -+ dev->dev_interruptq_nfptr = dev->dev_interruptq + E4_MainIntEntrySize; -+ -+ /* -+ * Flush the context filter before dropping the Discard all bits in the schedule status register. -+ * Also hit the SCH_RestartTProc to clear out X's from the trap state and -+ * hit the SCH_RestartDmaPrefetchProc to clear out X's from the prev register. -+ */ -+ PULSE_SCHED_RESTART (dev, SCH_ContextFilterFlush | SCH_RestartTProc | SCH_RestartDmaPrefetchProc); -+ -+ /* setup the schedule status register. */ -+ SET_SCHED_STATUS (dev, SCH_CProcTimeout6p2us | SCH_DProcTimeslice512us); -+ -+ /* -+ * Now initialise the inserter cache.s -+ * Bit 31 of the first word of the descriptor is a valid bit. This must be cleared. -+ * Bit 31 becomes a used bit in the descriptors in memory. -+ */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ { -+ write_reg32 (dev, CommandControl.CommandQueueDescsBase, i); /* select a cache line */ -+ write_reg64 (dev, CommandCacheTestPort, 0); /* Mark it invalid */ -+ } -+ -+ /* Setup the pointer to the command descriptors */ -+ /* the table must be aligned on a CQ_CommandDescsAlignement boundary */ -+ /* since we've allocated a small table - we work out the offset of the */ -+ /* first entry in our table for mapping in the command ports later */ -+ dev->dev_cqoffset = (dev->dev_cqaddr & (CQ_CommandDescsAlignment-1)) / sizeof (E4_CommandQueueDesc); -+ -+ write_reg32 (dev, CommandControl.CommandQueueDescsBase, (dev->dev_cqaddr & ~(CQ_CommandDescsAlignment-1)) | COM_ENABLE_DEQUEUE); -+ -+ /* allocate the bitmaps for cq,ctxt allocation */ -+ KMEM_ZALLOC (dev->dev_cqamap, bitmap_t *, BT_BITOUL(dev->dev_cqcount/ELAN4_CQ_PER_CQA) * sizeof (bitmap_t), 1); -+ KMEM_ZALLOC (dev->dev_ctxmap, bitmap_t *, BT_BITOUL(1 << dev->dev_ctxtableshift) * sizeof (bitmap_t), 1); -+ -+ if (dev->dev_cqamap == NULL || dev->dev_ctxmap == NULL) -+ goto failed; -+ -+ /* Make every fourth context be invalid for ICache fixup. -+ * context 0 is also invalid - since it is used to indicate -+ * an invalid tag. */ -+ for (i = 0; i < (1 << dev->dev_ctxtableshift); i += 4) -+ BT_SET (dev->dev_ctxmap, i); -+ -+ /* initialise the halt operations */ -+ dev->dev_haltop_mask = 0; -+ dev->dev_haltop_active = 0; -+ -+ /* allocate the hash table shadow structures - and place all blocks on the free lists */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ KMEM_ZALLOC (dev->dev_mmuhash[tbl], ELAN4_HASH_ENTRY *, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY), 1); -+ KMEM_ZALLOC (dev->dev_mmufree[tbl], ELAN4_HASH_ENTRY **, dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *), 1); -+ -+ if (dev->dev_mmuhash[tbl] == NULL || dev->dev_mmufree[tbl] == NULL) -+ goto failed; -+ -+ for (i = 0; i < dev->dev_hashsize[tbl]; i++) -+ { -+ dev->dev_mmuhash[tbl][i].he_entry = dev->dev_hashtable[tbl] + (i * sizeof (E4_HashTableEntry)); -+ dev->dev_mmufree[tbl][i] = &dev->dev_mmuhash[tbl][i]; -+ } -+ } -+ -+ /* setup the interrupt mask register */ -+ SET_INT_MASK (dev, (INT_MSI0 | INT_MSI1 | INT_MSI2 | INT_MSI3) & ~(INT_Discarding | INT_Halted | INT_LinkPortKeyFail)); -+ -+ /* start a thread to handle excessive main interrupts */ -+ if (kernel_thread_create (elan4_mainint_thread, (caddr_t) dev) == NULL) -+ goto failed; -+ dev->dev_mainint_started = 1; -+ -+ /* install the device context - and allocate the first 16 command queues */ -+ if (elan4_insertctxt (dev, &dev->dev_ctxt, &device_trap_ops) != 0) -+ goto failed; -+ -+ /* Allocate command queues, one for each entry in the inserter cache, -+ * we'll use these queues to flush the insert cache */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ { -+ if ((dev->dev_flush_cq[i] = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit | CQ_InterruptEnableBit, -+ CQ_Priority)) == NULL) -+ goto failed; -+ -+ ASSERT (elan4_cq2num(dev->dev_flush_cq[i]) == i); -+ -+ dev->dev_flush_finished |= (1 << i); -+ } -+ -+ /* Allocate command queues for dma halt operations */ -+ if ((dev->dev_dma_flushop[0].cq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit, 0)) == NULL || -+ (dev->dev_dma_flushop[1].cq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_DmaStartEnableBit, CQ_Priority)) == NULL) -+ goto failed; -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+#endif -+ -+ /* initialise halt operation for flushing the icache */ -+ dev->dev_iflush_haltop.op_function = device_iflush_haltop; -+ dev->dev_iflush_haltop.op_arg = dev; -+ dev->dev_iflush_haltop.op_mask = INT_TProcHalted; -+ -+ /* Allocate a route table, and create a valid route for vp==0, this is used -+ * when a DMA is removed from the dma run queue */ -+ if ((dev->dev_routetable = elan4_alloc_routetable (dev, 0)) == NULL) -+ goto failed; -+ -+ elan4_set_routetable (&dev->dev_ctxt, dev->dev_routetable); -+ -+ entry.Values[0] = FIRST_MYLINK; -+ entry.Values[1] = 0; -+ -+ elan4_write_route (dev, dev->dev_routetable, 0, &entry); -+ -+ /* map the sdram pages into the elan */ -+ dev->dev_tproc_suspend = DEVICE_TPROC_SUSPEND_ADDR; -+ dev->dev_tproc_space = DEVICE_TPROC_SPACE_ADDR; -+ -+ elan4mmu_pteload (&dev->dev_ctxt, 0, dev->dev_tproc_suspend, (dev->dev_sdrampages[0] >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocExecute)); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, dev->dev_tproc_space, (dev->dev_sdrampages[1] >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocDataWrite)); -+ -+ /* and store the thread suspend sequence in it for use when a thread is removed from the run queue */ -+ elan4_sdram_writel (dev, dev->dev_sdrampages[0], DEVICE_TPROC_SUSPEND_INSTR); -+ -+#ifdef CONFIG_MPSAS -+ do_initdma (dev); -+#endif -+ -+ if (!elan4_neterr_init (dev)) -+ goto failed; -+ -+ elan4_configure_writecombining (dev); -+ -+ /* finally register the device with elanmod for rms */ -+ dev->dev_idx = elan_dev_register (&dev->dev_devinfo, &elan4_dev_ops, (void *) dev); -+ -+ dev->dev_state = ELAN4_STATE_STARTED; -+ -+ return (0); -+ -+ failed: -+ printk ("elan%d: failed to start elan4 device - stopping\n", dev->dev_instance); -+ -+ elan4_stop_device (dev); -+ return (-ENOMEM); -+} -+ -+void -+elan4_stop_device (ELAN4_DEV *dev) -+{ -+ unsigned long flags; -+ int i, tbl; -+ -+ dev->dev_state = ELAN4_STATE_STOPPING; -+ -+ elan_dev_deregister (&dev->dev_devinfo); -+ -+ elan4_unconfigure_writecombining (dev); -+ -+ elan4_neterr_destroy (dev); -+ -+ if (dev->dev_tproc_suspend) -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, dev->dev_tproc_suspend, 1 << dev->dev_pageshift[0]); -+ -+ if (dev->dev_tproc_space) -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, dev->dev_tproc_space, 1 << dev->dev_pageshift[0]); -+ -+ if (dev->dev_routetable) -+ { -+ elan4_set_routetable (&dev->dev_ctxt, NULL); -+ elan4_free_routetable (dev, dev->dev_routetable); -+ } -+ -+ for (i = 0; i < 2; i++) -+ if (dev->dev_dma_flushop[i].cq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_dma_flushop[i].cq); -+ -+ /* free of the device context - and insert cache flushing command queues */ -+ for (i = 0; i < COMMAND_INSERTER_CACHE_ENTRIES; i++) -+ if (dev->dev_flush_cq[i]) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_flush_cq[i]); -+ -+ if (dev->dev_ctxt.ctxt_dev) -+ elan4_removectxt (dev, &dev->dev_ctxt); -+ -+ /* stop the mainint thread */ -+ spin_lock_irqsave (&dev->dev_mainint_lock, flags); -+ dev->dev_stop_threads = 1; -+ -+ while (dev->dev_mainint_started && !dev->dev_mainint_stopped) -+ { -+ kcondvar_wakeupall (&dev->dev_mainint_wait, &dev->dev_mainint_lock); -+ kcondvar_wait (&dev->dev_mainint_wait, &dev->dev_mainint_lock, &flags); -+ } -+ dev->dev_mainint_started = dev->dev_mainint_stopped = 0; -+ spin_unlock_irqrestore (&dev->dev_mainint_lock, flags); -+ -+ /* cancel any error interrupt timeouts */ -+ if (timer_fn_queued (&dev->dev_error_timeoutid)) -+ cancel_timer_fn (&dev->dev_error_timeoutid); -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && timer_fn_queued (&dev->dev_linkerr_timeoutid)) -+ cancel_timer_fn (&dev->dev_linkerr_timeoutid); -+ -+ /* reset the interrupt mask register to zero */ -+ if (dev->dev_regs) -+ SET_INT_MASK (dev, 0); -+ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ { -+ if (dev->dev_mmuhash[tbl]) -+ KMEM_FREE (dev->dev_mmuhash[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY)); -+ if (dev->dev_mmufree[tbl]) -+ KMEM_FREE (dev->dev_mmufree[tbl], dev->dev_hashsize[tbl] * sizeof (ELAN4_HASH_ENTRY *)); -+ if (dev->dev_hashtable[tbl]) -+ elan4_sdram_free (dev, dev->dev_hashtable[tbl], dev->dev_hashsize[tbl] * sizeof (E4_HashTableEntry)); -+ } -+ -+ if (dev->dev_cqamap) -+ KMEM_FREE (dev->dev_cqamap, BT_BITOUL (dev->dev_cqcount/ELAN4_CQ_PER_CQA) * sizeof (bitmap_t)); -+ if (dev->dev_ctxmap) -+ KMEM_FREE (dev->dev_ctxmap, BT_BITOUL(1 << dev->dev_ctxtableshift) * sizeof (bitmap_t)); -+ -+ if (dev->dev_comqlowpri) -+ elan4_sdram_free (dev, dev->dev_comqlowpri, (1 << COMMAND_RUN_QUEUE_BITS)); -+ if (dev->dev_comqhighpri) -+ elan4_sdram_free (dev, dev->dev_comqhighpri, (1 << COMMAND_RUN_QUEUE_BITS)); -+ if (dev->dev_cqaddr) -+ elan4_sdram_free (dev, dev->dev_cqaddr, sizeof (E4_CommandQueueDesc) * dev->dev_cqcount); -+ if (dev->dev_dmaqhighpri) -+ elan4_sdram_free (dev, dev->dev_dmaqhighpri, E4_QueueSize(elan4_dmaq_highpri_size)); -+ if (dev->dev_dmaqlowpri) -+ elan4_sdram_free (dev, dev->dev_dmaqlowpri, E4_QueueSize(elan4_dmaq_lowpri_size)); -+ if (dev->dev_threadqhighpri) -+ elan4_sdram_free (dev, dev->dev_threadqhighpri, E4_QueueSize(elan4_threadq_highpri_size)); -+ if (dev->dev_threadqlowpri) -+ elan4_sdram_free (dev, dev->dev_threadqlowpri, E4_QueueSize(elan4_threadq_lowpri_size)); -+ if (dev->dev_interruptq) -+ elan4_sdram_free (dev, dev->dev_interruptq, E4_QueueSize(elan4_interruptq_size)); -+ -+ if (dev->dev_ctxtable) -+ elan4_sdram_free (dev, dev->dev_ctxtable, (1 << dev->dev_ctxtableshift) * sizeof (E4_ContextControlBlock)); -+ if (dev->dev_faultarea) -+ elan4_sdram_free (dev, dev->dev_faultarea, CUN_Entries * sizeof (E4_FaultSave)); -+ if (dev->dev_inputtraparea) -+ elan4_sdram_free (dev, dev->dev_inputtraparea, sizeof (E4_IprocTrapState)); -+ -+ if (dev->dev_sdrampages[0]) -+ elan4_sdram_free (dev, dev->dev_sdrampages[0], SDRAM_PAGE_SIZE); -+ if (dev->dev_sdrampages[1]) -+ elan4_sdram_free (dev, dev->dev_sdrampages[1], SDRAM_PAGE_SIZE); -+ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ if (dev->dev_sdram_banks[i].b_ioaddr) -+ elan4_sdram_fini_bank (dev, &dev->dev_sdram_banks[i]); -+ -+ elan4_pcifini (dev); -+ -+ dev->dev_state = ELAN4_STATE_STOPPED; -+ -+ if (dev->dev_ack_errors) -+ kfree(dev->dev_ack_errors); -+ if (dev->dev_dproc_timeout) -+ kfree(dev->dev_dproc_timeout); -+ if (dev->dev_cproc_timeout) -+ kfree(dev->dev_cproc_timeout); -+} -+ -+static __inline__ int -+compute_arity (int lvl, unsigned n, char *arity) -+{ -+ if (arity[lvl] == 0) -+ { -+ if (n <= 8) -+ arity[lvl] = n; -+ else -+ arity[lvl] = 4; -+ } -+ -+ return (arity[lvl]); -+} -+ -+int -+elan4_compute_position (ELAN_POSITION *pos, unsigned nodeid, unsigned numnodes, unsigned arityval) -+{ -+ int i, lvl, n; -+ char arity[ELAN_MAX_LEVELS]; -+ -+ if (nodeid >= numnodes) -+ return -EINVAL; -+ -+ for (i = 0; i < ELAN_MAX_LEVELS; i++, arityval >>= 4) -+ arity[i] = arityval & 7; -+ -+ for (lvl = 0, n = numnodes; n > compute_arity(lvl, n, arity) && lvl < ELAN_MAX_LEVELS; lvl++) -+ { -+ if ((n % arity[lvl]) != 0) -+ return -EINVAL; -+ -+ n /= arity[lvl]; -+ } -+ -+ if (arity[lvl] != n) -+ return -EINVAL; -+ -+ for (i = 0; i <= lvl; i++) -+ pos->pos_arity[i] = arity[lvl - i]; -+ -+ pos->pos_nodes = numnodes; -+ pos->pos_levels = lvl + 1; -+ pos->pos_nodeid = nodeid; -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ -+ return 0; -+} -+ -+int -+elan4_get_position (ELAN4_DEV *dev, ELAN_POSITION *pos) -+{ -+ kmutex_lock (&dev->dev_lock); -+ *pos = dev->dev_position; -+ kmutex_unlock (&dev->dev_lock); -+ -+ return (pos->pos_mode); -+} -+ -+int -+elan4_set_position (ELAN4_DEV *dev, ELAN_POSITION *pos) -+{ -+ int forceLocal = 0; -+ int nnodes, i; -+ unsigned int *ack_errors; -+ unsigned int *dproc_timeout; -+ unsigned int *cproc_timeout; -+ -+ switch (pos->pos_mode) -+ { -+ case ELAN_POS_UNKNOWN: -+ break; -+ -+ case ELAN_POS_MODE_SWITCHED: -+ if (pos->pos_levels > ELAN_MAX_LEVELS) -+ return (-EINVAL); -+ -+ for (i = 0, nnodes = 1; i < pos->pos_levels; i++) -+ { -+ -+ if (pos->pos_arity[i] <= 0 || (i == 0 ? pos->pos_arity[i] > 8 : pos->pos_arity[i] >= 8)) /* allow an 8 way top-switch */ -+ return (-EINVAL); -+ -+ nnodes *= pos->pos_arity[i]; -+ } -+ -+ if (pos->pos_nodes > nnodes || pos->pos_nodeid >= pos->pos_nodes) -+ return (-EINVAL); -+ break; -+ -+ case ELAN_POS_MODE_LOOPBACK: -+ if (pos->pos_levels != 1 || pos->pos_nodes != 1 || pos->pos_nodeid != 0 || pos->pos_arity[0] != 1) -+ return (-EINVAL); -+ -+ forceLocal = 1; -+ break; -+ -+ case ELAN_POS_MODE_BACKTOBACK: -+ if (pos->pos_levels != 1 || pos->pos_nodes != 2 || pos->pos_nodeid >= 2 || pos->pos_arity[0] != 2) -+ return (-EINVAL); -+ -+ forceLocal = (pos->pos_nodeid == 0); -+ break; -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ ack_errors = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!ack_errors) -+ return (-EINVAL); -+ memset(ack_errors, 0, pos->pos_nodes * sizeof(unsigned int)); -+ dproc_timeout = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!dproc_timeout) -+ { -+ kfree(ack_errors); -+ return (-EINVAL); -+ } -+ memset(dproc_timeout, 0, pos->pos_nodes * sizeof(unsigned int)); -+ cproc_timeout = kmalloc(pos->pos_nodes * sizeof(unsigned int), GFP_KERNEL); -+ if (!cproc_timeout) -+ { -+ kfree(ack_errors); -+ kfree(dproc_timeout); -+ return (-EINVAL); -+ } -+ memset(cproc_timeout, 0, pos->pos_nodes * sizeof(unsigned int)); -+ -+ kmutex_lock (&dev->dev_lock); -+ dev->dev_position = *pos; -+ dev->dev_ack_errors = ack_errors; -+ dev->dev_dproc_timeout = dproc_timeout; -+ dev->dev_cproc_timeout = cproc_timeout; -+ -+ if (forceLocal) -+ write_reg32 (dev, LinkContSettings, read_reg32 (dev, LinkContSettings) | LCONT_FORCE_COMMSCLK_LOCAL); -+ else -+ write_reg32 (dev, LinkContSettings, read_reg32 (dev, LinkContSettings) & ~LCONT_FORCE_COMMSCLK_LOCAL); -+ -+ pioflush_reg (dev); -+ kmutex_unlock (&dev->dev_lock); -+ -+ return (0); -+} -+ -+void -+elan4_get_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short *mask) -+{ -+ kmutex_lock (&dev->dev_lock); -+ -+ *mask = dev->dev_devinfo.dev_params_mask; -+ memcpy (params, &dev->dev_devinfo.dev_params, sizeof (ELAN_PARAMS)); -+ -+ kmutex_unlock (&dev->dev_lock); -+} -+ -+void -+elan4_set_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short mask) -+{ -+ int i; -+ -+ kmutex_lock (&dev->dev_lock); -+ for (i = 0; i < ELAN4_PARAM_COUNT; i++) -+ if (mask & (1 << i)) -+ dev->dev_devinfo.dev_params.values[i] = params->values[i]; -+ -+ dev->dev_devinfo.dev_params_mask |= mask; -+ kmutex_unlock (&dev->dev_lock); -+} -+ -+ -+EXPORT_SYMBOL(elan4_get_position); -+EXPORT_SYMBOL(elan4_set_position); -+ -+EXPORT_SYMBOL(elan4_queue_haltop); -+EXPORT_SYMBOL(elan4_queue_dma_flushop); -+EXPORT_SYMBOL(elan4_queue_mainintop); -+ -+EXPORT_SYMBOL(elan4_insertctxt); -+EXPORT_SYMBOL(elan4_removectxt); -+ -+EXPORT_SYMBOL(elan4_attach_filter); -+EXPORT_SYMBOL(elan4_detach_filter); -+EXPORT_SYMBOL(elan4_set_filter); -+EXPORT_SYMBOL(elan4_set_routetable); -+ -+EXPORT_SYMBOL(elan4_alloccq); -+EXPORT_SYMBOL(elan4_freecq); -+EXPORT_SYMBOL(elan4_restartcq); -+ -+EXPORT_SYMBOL(elan4_flush_icache); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/device_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/device_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/device_Linux.c 2005-07-28 14:52:52.828681776 -0400 -@@ -0,0 +1,2760 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device_Linux.c,v 1.74.6.20 2005/03/10 11:30:01 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device_Linux.c,v $*/ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#ifdef CONFIG_MTRR -+#include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#include -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) -+typedef void irqreturn_t; -+#endif -+# define IRQ_NONE -+# define IRQ_HANDLED -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+#error please use a 2.4.0 series kernel or newer -+#endif -+ -+ -+#if defined(LINUX_SPARC) || defined(LINUX_PPC64) -+#define __io_remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#elif defined(NO_RMAP) -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(from,offset,size,prot) -+#else -+#define __io_remap_page_range(from,offset,size,prot) io_remap_page_range(vma,from,offset,size,prot) -+#define __remap_page_range(from,offset,size,prot) remap_page_range(vma,from,offset,size,prot) -+#endif -+ -+static unsigned int pat_pteval = -1; -+ -+#ifndef pgprot_noncached -+static inline pgprot_t pgprot_noncached(pgprot_t _prot) -+{ -+ unsigned long prot = pgprot_val(_prot); -+#if defined(__powerpc__) -+ prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; -+#elif defined(__sparc__) -+ prot &= ~(_PAGE_CACHE); -+ prot |= _PAGE_IE; -+#endif -+ -+ return __pgprot(prot); -+} -+#endif -+ -+#ifndef pgprot_writecombine -+static inline pgprot_t pgprot_writecombine (pgprot_t _prot) -+{ -+ unsigned long prot = pgprot_val(_prot); -+ -+ if (pat_pteval != -1) -+ prot = (prot & ~(_PAGE_PCD | _PAGE_PWT | _PAGE_PSE)) | pat_pteval; -+ -+ return __pgprot (prot); -+} -+#endif -+ -+#define ELAN4_DRIVER_VERSION 0x103 /* 16 bit value */ -+ -+/* -+ * Function prototypes. -+ */ -+static int elan4_attach_device (int instance, struct pci_dev *pdev); -+static void elan4_detach_device (ELAN4_DEV *dev); -+ -+static int elan4_open (struct inode *inode, struct file *file); -+static int elan4_release(struct inode *inode, struct file *file); -+static int elan4_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg); -+static int elan4_mmap (struct file *file, struct vm_area_struct *vm_area); -+ -+static irqreturn_t elan4_irq (int irq, void *arg, struct pt_regs *regs); -+ -+static void elan4_shutdown_devices(int panicing); -+ -+static int disabled; /* bitmask of which devices not to start */ -+unsigned int elan4_pll_cfg = 0; -+int elan4_pll_div = 31; /* RevC PCB */ -+int elan4_mod45disable = 0; -+static int optimise_pci_bus = 1; /* 0 => don't, 1 => if ok, 2 => always */ -+static int default_features = 0; /* default values for dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] */ -+ -+long long sdram_cfg = SDRAM_STARTUP_VALUE; -+static int sdram_cfg_lo; -+static int sdram_cfg_hi; -+int sdram_bank_limit; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("Elan 4 Device Driver"); -+MODULE_LICENSE("GPL"); -+ -+MODULE_PARM(elan4_debug, "i"); -+MODULE_PARM(elan4_debug_toconsole, "i"); -+MODULE_PARM(elan4_debug_tobuffer, "i"); -+MODULE_PARM(elan4_debug_mmu, "i"); -+MODULE_PARM(elan4_pll_cfg, "i"); -+MODULE_PARM(elan4_pll_div, "i"); -+MODULE_PARM(elan4_mod45disable, "i"); -+MODULE_PARM(optimise_pci_bus, "i"); -+MODULE_PARM(default_features, "i"); -+ -+MODULE_PARM(disabled, "i"); -+MODULE_PARM(sdram_cfg_lo, "i"); -+MODULE_PARM(sdram_cfg_hi, "i"); -+MODULE_PARM(sdram_bank_limit, "i"); -+ -+MODULE_PARM(elan4_hash_0_size_val, "i"); -+MODULE_PARM(elan4_hash_1_size_val, "i"); -+MODULE_PARM(elan4_ctxt_table_shift, "i"); -+MODULE_PARM(elan4_ln2_max_cqs, "i"); -+MODULE_PARM(elan4_dmaq_highpri_size, "i"); -+MODULE_PARM(elan4_threadq_highpri_size, "i"); -+MODULE_PARM(elan4_dmaq_lowpri_size, "i"); -+MODULE_PARM(elan4_threadq_lowpri_size, "i"); -+MODULE_PARM(elan4_interruptq_size, "i"); -+ -+MODULE_PARM(elan4_mainint_punt_loops, "i"); -+MODULE_PARM(elan4_mainint_resched_ticks, "i"); -+MODULE_PARM(elan4_linkport_lock, "i"); -+MODULE_PARM(elan4_eccerr_recheck, "i"); -+ -+MODULE_PARM(user_p2p_route_options, "i"); -+MODULE_PARM(user_bcast_route_options, "i"); -+MODULE_PARM(user_dproc_retry_count, "i"); -+MODULE_PARM(user_cproc_retry_count, "i"); -+ -+/* -+ * Standard device entry points. -+ */ -+static struct file_operations elan4_fops = { -+ ioctl: elan4_ioctl, -+ mmap: elan4_mmap, -+ open: elan4_open, -+ release: elan4_release, -+}; -+ -+ELAN4_DEV *elan4_devices[ELAN4_MAX_CONTROLLER]; -+ -+#if defined(CONFIG_DEVFS_FS) -+static devfs_handle_t devfs_handle; -+#endif -+ -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+elan4_ioctl32_cmds[] = -+{ /* /dev/elan/control */ -+ ELAN4IO_DEVINFO, -+ ELAN4IO_GET_POSITION, -+ ELAN4IO_SET_POSITION, -+ ELAN4IO_GET_PARAMS, -+ ELAN4IO_SET_PARAMS, -+ -+ /* /dev/elan4/user */ -+ ELAN4IO_POSITION, -+ ELAN4IO_FREE, -+ ELAN4IO_ATTACH, -+ ELAN4IO_DETACH, -+ ELAN4IO_BLOCK_INPUTTER, -+ -+ ELAN4IO_ADD_P2PVP, -+ ELAN4IO_ADD_BCASTVP, -+ ELAN4IO_REMOVEVP, -+ ELAN4IO_SET_ROUTE, -+ ELAN4IO_RESET_ROUTE, -+ ELAN4IO_GET_ROUTE, -+ ELAN4IO_CHECK_ROUTE, -+ -+ ELAN4IO_ALLOCCQ, -+ ELAN4IO_FREECQ, -+ ELAN4IO_SETPERM32, -+ ELAN4IO_CLRPERM32, -+ ELAN4IO_TRAPSIG, -+ ELAN4IO_TRAPHANDLER32, -+ ELAN4IO_REQUIRED_MAPPINGS, -+ -+ ELAN4IO_RESUME_EPROC_TRAP, -+ ELAN4IO_RESUME_CPROC_TRAP, -+ ELAN4IO_RESUME_DPROC_TRAP, -+ ELAN4IO_RESUME_TPROC_TRAP, -+ ELAN4IO_RESUME_IPROC_TRAP, -+ -+ ELAN4IO_FLUSH_ICACHE, -+ -+ ELAN4IO_STOP_CTXT, -+ -+ ELAN4IO_ALLOC_INTCOOKIE, -+ ELAN4IO_FREE_INTCOOKIE, -+ ELAN4IO_ARM_INTCOOKIE, -+ ELAN4IO_WAIT_INTCOOKIE, -+ -+ ELAN4IO_ALLOC_TRAP_QUEUES, -+ ELAN4IO_NETERR_MSG, -+ ELAN4IO_NETERR_TIMER, -+ ELAN4IO_NETERR_FIXUP, -+ -+ ELAN4IO_DUMPCQ32, -+}; -+ -+static int elan4_ioctl32 (unsigned int fd, unsigned int cmd, -+ unsigned long arg, struct file *file); -+#endif -+ -+/* -+ * Standard device entry points. -+ */ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int -+elan4_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (event == DUMP_BEGIN) -+ elan4_shutdown_devices (FALSE); -+ -+ return (NOTIFY_DONE); -+} -+static struct notifier_block elan4_dump_notifier = -+{ -+ notifier_call: elan4_dump_event, -+ priority: 0, -+}; -+ -+#endif -+ -+static int -+elan4_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ((event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ elan4_shutdown_devices (0); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block elan4_reboot_notifier = -+{ -+ notifier_call: elan4_reboot_event, -+ priority: 0, -+}; -+ -+static int -+elan4_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ elan4_shutdown_devices (1); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block elan4_panic_notifier = -+{ -+ notifier_call: elan4_panic_event, -+ priority: 0, -+}; -+ -+static int __init -+elan4_init (void) -+{ -+ int err; -+ struct pci_dev *pdev; -+ int count; -+#if defined(__ia64) -+ int seenRevA = 0; -+#endif -+ -+ if ((err = register_chrdev (ELAN4_MAJOR, ELAN4_NAME, &elan4_fops)) < 0) -+ return (err); -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_handle = devfs_mk_dir (NULL, "elan4", NULL); -+#endif -+ -+ intcookie_init(); -+ elan4_debug_init(); -+ elan4_procfs_init(); -+ -+#ifdef CONFIG_MPSAS -+ sas_init(); -+#endif -+ -+ if (sdram_cfg_lo != 0 && sdram_cfg_hi != 0) -+ sdram_cfg = (((unsigned long long) sdram_cfg_hi) << 32) | ((unsigned long long) sdram_cfg_lo); -+ -+ for (count = 0, pdev = NULL; (pdev = pci_find_device(PCI_VENDOR_ID_QUADRICS, PCI_DEVICE_ID_ELAN4, pdev)) != NULL ; count++) -+ { -+#if defined(__ia64) -+ unsigned char revid; -+ -+ pci_read_config_byte (pdev, PCI_REVISION_ID, &revid); -+ -+ if (revid == PCI_REVISION_ID_ELAN4_REVA && seenRevA++ != 0 && pci_find_device (PCI_VENDOR_ID_HP, 0x122e, NULL)) -+ { -+ printk ("elan: only a single elan4a supported on rx2600\n"); -+ continue; -+ } -+#endif -+ -+ if (count < ELAN4_MAX_CONTROLLER) -+ elan4_attach_device (count, pdev); -+ } -+ -+ if (count >= ELAN4_MAX_CONTROLLER) -+ printk ("elan: found %d elan4 devices - only support %d\n", count, ELAN4_MAX_CONTROLLER); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); -+ register int i; -+ for (i = 0; i < sizeof (elan4_ioctl32_cmds)/sizeof(elan4_ioctl32_cmds[0]); i++) -+ register_ioctl32_conversion (elan4_ioctl32_cmds[i], elan4_ioctl32); -+ } -+ unlock_kernel(); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&elan4_dump_notifier); -+#endif -+ register_reboot_notifier (&elan4_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_register (&panic_notifier_list, &elan4_panic_notifier); -+#endif -+ -+ return (0); -+} -+ -+#ifdef MODULE -+static void __exit -+elan4_exit (void) -+{ -+ int i; -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern void unregister_ioctl32_conversion(unsigned int cmd); -+ -+ for (i = 0; i < sizeof (elan4_ioctl32_cmds)/sizeof(elan4_ioctl32_cmds[0]); i++) -+ unregister_ioctl32_conversion (elan4_ioctl32_cmds[i]); -+ } -+ unlock_kernel(); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&elan4_dump_notifier); -+#endif -+ unregister_reboot_notifier (&elan4_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_unregister (&panic_notifier_list, &elan4_panic_notifier); -+#endif -+ -+ for (i = 0; i < ELAN4_MAX_CONTROLLER; i++) -+ if (elan4_devices[i] != NULL) -+ elan4_detach_device (elan4_devices[i]); -+ -+ elan4_procfs_fini(); -+ elan4_debug_fini(); -+ intcookie_fini(); -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_unregister (devfs_handle); -+#endif -+ -+ unregister_chrdev(ELAN4_MAJOR, ELAN4_NAME); -+} -+ -+module_init (elan4_init); -+module_exit (elan4_exit); -+ -+#else -+__initcall (elan4_init); -+#endif -+ -+/* -+ * Minor numbers encoded as : -+ * [5:0] device number -+ * [15:6] function number -+ */ -+#define ELAN4_DEVICE_MASK 0x3F -+#define ELAN4_DEVICE(inode) (MINOR((inode)->i_rdev) & ELAN4_DEVICE_MASK) -+ -+#define ELAN4_MINOR_CONTROL 0 -+#define ELAN4_MINOR_MEM 1 -+#define ELAN4_MINOR_USER 2 -+ -+#define ELAN4_MINOR_SHIFT 6 -+#define ELAN4_MINOR(inode) (MINOR((inode)->i_rdev) >> ELAN4_MINOR_SHIFT) -+ -+/* -+ * Called by init_module() for each card discovered on PCI. -+ */ -+static int -+elan4_attach_device (int instance, struct pci_dev *pdev) -+{ -+ ELAN4_DEV *dev; -+ int res; -+ -+ KMEM_ALLOC (dev, ELAN4_DEV *, sizeof (ELAN4_DEV), 1); -+ if ((dev == NULL)) -+ return (-ENOMEM); -+ memset (dev, 0, sizeof (ELAN4_DEV)); -+ -+ /* setup os dependent section of ELAN4_DEV */ -+ dev->dev_instance = instance; -+ dev->dev_osdep.pdev = pdev; -+ -+ /* initialise the devinfo */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_VENDOR_ID, &dev->dev_devinfo.dev_vendor_id); -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_DEVICE_ID, &dev->dev_devinfo.dev_device_id); -+ pci_read_config_byte (dev->dev_osdep.pdev, PCI_REVISION_ID, &dev->dev_devinfo.dev_revision_id); -+ -+ dev->dev_devinfo.dev_rail = instance; -+ dev->dev_devinfo.dev_driver_version = ELAN4_DRIVER_VERSION; -+ dev->dev_devinfo.dev_num_down_links_value = 0; -+ dev->dev_devinfo.dev_params_mask = (1 << ELAN4_PARAM_DRIVER_FEATURES); -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] = default_features; -+ -+ dev->dev_position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* initialise the data structures and map the device */ -+ if ((res = elan4_initialise_device (dev)) != 0) -+ { -+ kfree (dev); -+ return res; -+ } -+ -+ /* add the interrupt handler */ -+ if (request_irq (pdev->irq, elan4_irq, SA_SHIRQ, "elan4", dev) != 0) -+ { -+ elan4_finalise_device (dev); -+ KMEM_FREE (dev, sizeof(*dev)); -+ return -ENXIO; -+ } -+ -+ if (pci_request_regions(dev->dev_osdep.pdev, "elan4")) -+ { -+ free_irq (dev->dev_osdep.pdev->irq, dev); -+ KMEM_FREE (dev, sizeof(*dev)); -+ return -ENODEV; -+ } -+ -+#if defined(CONFIG_DEVFS_FS) -+ { -+ char name[16]; -+ -+ sprintf (name, "control%d", dev->dev_instance); -+ dev->dev_osdep.devfs_control = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_CONTROL << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR | S_IWUSR, -+ &elan4_fops, NULL); -+ sprintf (name, "sdram%d", dev->dev_instance); -+ dev->dev_osdep.devfs_sdram = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_MEM << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH, -+ &elan4_fops, NULL); -+ sprintf (name, "user%d", dev->dev_instance); -+ dev->dev_osdep.devfs_user = devfs_register(devfs_handle, name, DEVFS_FL_NONE, ELAN4_MAJOR, -+ dev->dev_instance | (ELAN4_MINOR_USER << ELAN4_MINOR_SHIFT), S_IFCHR | S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH, -+ &elan4_fops, NULL); -+ } -+#endif -+ -+ /* add the procfs entry */ -+ elan4_procfs_device_init (dev); -+ -+ /* allow the device to be referenced now */ -+ elan4_devices[instance] = dev; -+ -+ if ((disabled & (1 << instance)) == 0) -+ { -+ if (elan4_start_device (dev) != 0) -+ { -+ printk ("elan%d: auto-start of device failed\n", dev->dev_instance); -+ -+ elan4_detach_device (dev); -+ return (-ENXIO); -+ } -+ -+ dev->dev_state = ELAN4_STATE_STARTED; -+ } -+ -+#if defined (__sparc) -+ printk ("elan%d: at pci %s (irq = %s)\n", instance, pdev->slot_name, __irq_itoa(pdev->irq)); -+#else -+ printk ("elan%d: at pci %s (irq = %d)\n", instance, pdev->slot_name, pdev->irq); -+#endif -+ -+ return (0); -+} -+ -+/* -+ * Called by cleanup_module() for each board found on PCI. -+ */ -+static void -+elan4_detach_device (ELAN4_DEV *dev) -+{ -+ /* stop the chip and free of resources */ -+ if (dev->dev_state == ELAN4_STATE_STARTED) -+ elan4_stop_device (dev); -+ -+ elan4_devices[dev->dev_instance] = NULL; -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_unregister (dev->dev_osdep.devfs_control); -+ devfs_unregister (dev->dev_osdep.devfs_sdram); -+ devfs_unregister (dev->dev_osdep.devfs_user); -+#endif -+ -+ /* release the address space */ -+ pci_release_regions (dev->dev_osdep.pdev); -+ -+ /* release the interrupt */ -+ free_irq (dev->dev_osdep.pdev->irq, dev); -+ -+ /* remove the procfs entry */ -+ elan4_procfs_device_fini (dev); -+ -+ /* unmap the device and finalise the data structures */ -+ elan4_finalise_device (dev); -+ -+ KMEM_FREE (dev, sizeof(*dev)); -+} -+ -+/* -+ * Maintain reference counts on the device -+ */ -+ELAN4_DEV * -+elan4_reference_device (int instance, int state) -+{ -+ ELAN4_DEV *dev = elan4_devices[instance]; -+ -+ if (dev == NULL) -+ return (NULL); -+ -+ kmutex_lock (&dev->dev_lock); -+ -+ if ((dev->dev_state & state) == 0) -+ { -+ kmutex_unlock (&dev->dev_lock); -+ return (NULL); -+ } -+ -+ dev->dev_references++; -+ kmutex_unlock (&dev->dev_lock); -+ -+#ifdef MODULE -+ MOD_INC_USE_COUNT; -+#endif -+ -+#ifdef CONFIG_MPSAS -+ sas_set_position(dev); -+#endif -+ -+ return (dev); -+} -+ -+void -+elan4_dereference_device (ELAN4_DEV *dev) -+{ -+ kmutex_lock (&dev->dev_lock); -+ dev->dev_references--; -+ kmutex_unlock (&dev->dev_lock); -+ -+#ifdef MODULE -+ MOD_DEC_USE_COUNT; -+#endif -+} -+ -+static void -+elan4_shutdown_devices(int panicing) -+{ -+ ELAN4_DEV *dev; -+ unsigned long flags; -+ register int i; -+ -+ local_irq_save (flags); -+ for (i = 0; i < ELAN4_MAX_CONTROLLER; i++) -+ { -+ if ((dev = elan4_devices[i]) != NULL) -+ { -+ printk(KERN_INFO "elan%d: forcing link into reset\n", dev->dev_instance); -+ -+ /* set the inputters to discard everything */ -+ if (! panicing) spin_lock (&dev->dev_haltop_lock); -+ -+ if (dev->dev_discard_lowpri_count++ == 0) -+ elan4_set_schedstatus (dev, 0); -+ if (dev->dev_discard_highpri_count++ == 0) -+ elan4_set_schedstatus (dev, 0); -+ -+ if (! panicing) spin_unlock (&dev->dev_haltop_lock); -+ -+ /* ideally we'd like to halt all the outputters too, -+ * however this will prevent the kernel comms flushing -+ * to work correctly ..... -+ */ -+ } -+ } -+ local_irq_restore (flags); -+} -+ -+/* -+ * /dev/elan4/controlX - control device -+ * -+ */ -+static int -+control_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STOPPED | ELAN4_STATE_STARTED); -+ CONTROL_PRIVATE *pr; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC (pr, CONTROL_PRIVATE *, sizeof (CONTROL_PRIVATE), 1); -+ if ((pr == NULL)) -+ { -+ elan4_dereference_device (dev); -+ -+ return (-ENOMEM); -+ } -+ -+ PRINTF (DBG_USER, DBG_FILE, "control_open: dev=%p pr=%p\n", dev, pr); -+ -+ pr->pr_dev = dev; -+ pr->pr_boundary_scan = 0; -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+control_release (struct inode *inode, struct file *file) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ ELAN4_DEV *dev = pr->pr_dev; -+ -+ PRINTF (DBG_DEVICE, DBG_FILE, "control_release: pr=%p\n", pr); -+ -+ //if (pr->pr_boundary_scan) -+ // elan4_clear_boundary_scan (dev, pr); -+ -+ elan4_dereference_device (dev); -+ -+ KMEM_FREE (pr, sizeof(*pr)); -+ -+ return (0); -+} -+ -+static int -+control_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ -+ PRINTF (DBG_DEVICE, DBG_FILE, "control_ioctl: cmd=%x arg=%lx\n", cmd, arg); -+ -+ switch (cmd) -+ { -+ case ELAN4IO_DEVINFO: -+ if (copy_to_user ((void *) arg, &pr->pr_dev->dev_devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ -+ case ELAN4IO_GET_POSITION: -+ { -+ ELAN_POSITION pos; -+ -+ elan4_get_position (pr->pr_dev, &pos); -+ -+ if (copy_to_user ((void *) arg, &pos, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SET_POSITION: -+ { -+ ELAN_POSITION pos; -+ -+ if (copy_from_user (&pos, (void *) arg, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ -+ return (elan4_set_position (pr->pr_dev, &pos)); -+ } -+ -+ case ELAN4IO_OLD_GET_PARAMS: -+ { -+ ELAN_PARAMS params; -+ unsigned short mask; -+ -+ elan4_get_params (pr->pr_dev, ¶ms, &mask); -+ -+ if (copy_to_user ((void *) arg, ¶ms, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_OLD_SET_PARAMS: -+ { -+ ELAN_PARAMS params; -+ -+ if (copy_from_user (¶ms, (void *) arg, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ elan4_set_params (pr->pr_dev, ¶ms, 3); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SET_PARAMS: -+ { -+ ELAN4IO_PARAMS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PARAMS_STRUCT))) -+ return (-EFAULT); -+ -+ elan4_set_params (pr->pr_dev, &args.p_params, args.p_mask); -+ -+ return (0); -+ } -+ -+ case ELAN4IO_GET_PARAMS: -+ { -+ ELAN4IO_PARAMS_STRUCT args; -+ -+ elan4_get_params (pr->pr_dev, &args.p_params, &args.p_mask); -+ -+ if (copy_to_user ((void *) arg, &args, sizeof (ELAN_PARAMS))) -+ return (-EFAULT); -+ -+ return (0); -+ } -+ } -+ -+ return (-EINVAL); -+} -+ -+static int -+control_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ CONTROL_PRIVATE *pr = (CONTROL_PRIVATE *) file->private_data; -+ unsigned bar = OFF_TO_BAR (vma->vm_pgoff << PAGE_SHIFT); -+ unsigned long off = OFF_TO_OFFSET (vma->vm_pgoff << PAGE_SHIFT); -+ long len = vma->vm_end - vma->vm_start; -+ -+ PRINTF (DBG_USER, DBG_FILE, "control_mmap: pr=%p bar=%x off=%x\n", pr, bar, off); -+ -+ /* check bar number and translate the standard psuedo bars */ -+ switch (bar) -+ { -+ case ELAN4_BAR_SDRAM: -+ case ELAN4_BAR_REGISTERS: -+ break; -+ -+ default: -+ return (-EINVAL); -+ } -+ -+ if (off < 0 || (off + len) > pci_resource_len (pr->pr_dev->dev_osdep.pdev, bar)) -+ return (-EINVAL); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (__io_remap_page_range (vma->vm_start, pci_resource_start (pr->pr_dev->dev_osdep.pdev, bar) + off, len, vma->vm_page_prot)) -+ return (-EAGAIN); -+ -+ return (0); -+} -+ -+/* -+ * /dev/elan4/sdramX - sdram access device -+ */ -+static void -+mem_freepage (MEM_PRIVATE *pr, MEM_PAGE *pg) -+{ -+ PRINTF (DBG_USER, DBG_MEM, "mem_freepage: pr=%p pgoff=%lx pg=%p ref=%d\n", pr, pg->pg_pgoff, pg, pg->pg_ref); -+ -+ elan4_sdram_free (pr->pr_dev, pg->pg_addr, SDRAM_PAGE_SIZE); -+ -+ KMEM_FREE(pg, sizeof(*pg)); -+} -+ -+static MEM_PAGE * -+mem_getpage (MEM_PRIVATE *pr, unsigned long pgoff) -+{ -+ int hashval = MEM_HASH (pgoff); -+ MEM_PAGE *npg = NULL; -+ MEM_PAGE *pg; -+ -+ ASSERT ((pgoff & SDRAM_PGOFF_OFFSET) == 0); -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_getpage: pr=%p pgoff=%lx\n", pr, pgoff); -+ -+ again: -+ spin_lock (&pr->pr_lock); -+ for (pg = pr->pr_pages[hashval]; pg; pg = pg->pg_next) -+ if (pg->pg_pgoff == pgoff) -+ break; -+ -+ if (pg != NULL) -+ { -+ PRINTF (DBG_USER, DBG_MEM, "mem_getpage: pr=%p pgoff=%lx -> found %p addr=%x\n", pr, pgoff, pg, pg->pg_addr); -+ -+ pg->pg_ref++; -+ spin_unlock (&pr->pr_lock); -+ -+ if (npg != NULL) /* we'd raced and someone else had created */ -+ mem_freepage (pr, npg); /* this page - so free of our new one*/ -+ return (pg); -+ } -+ -+ if (npg != NULL) /* didn't find the page, so inset the */ -+ { /* new one we've just created */ -+ npg->pg_next = pr->pr_pages[hashval]; -+ pr->pr_pages[hashval] = npg; -+ -+ spin_unlock (&pr->pr_lock); -+ return (npg); -+ } -+ -+ spin_unlock (&pr->pr_lock); /* drop spinlock before creating a new page */ -+ -+ KMEM_ALLOC (npg, MEM_PAGE *, sizeof (MEM_PAGE), 1); -+ if ((npg == NULL)) -+ return (NULL); -+ -+ if ((npg->pg_addr = elan4_sdram_alloc (pr->pr_dev, SDRAM_PAGE_SIZE)) == 0) -+ { -+ KMEM_FREE(npg, sizeof(*npg)); -+ return (NULL); -+ } -+ -+#ifndef CONFIG_MPSAS -+ /* zero the page before returning it to the user */ -+ elan4_sdram_zeroq_sdram (pr->pr_dev, npg->pg_addr, SDRAM_PAGE_SIZE); -+#endif -+ -+ npg->pg_pgoff = pgoff; -+ npg->pg_ref = 1; -+ -+ /* created a new page - so have to rescan before inserting it */ -+ goto again; -+} -+ -+static void -+mem_droppage (MEM_PRIVATE *pr, unsigned long pgoff, int dontfree) -+{ -+ MEM_PAGE **ppg; -+ MEM_PAGE *pg; -+ -+ spin_lock (&pr->pr_lock); -+ for (ppg = &pr->pr_pages[MEM_HASH(pgoff)]; *ppg; ppg = &(*ppg)->pg_next) -+ if ((*ppg)->pg_pgoff == pgoff) -+ break; -+ -+ pg = *ppg; -+ -+ ASSERT (*ppg != NULL); -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_droppage: pr=%p pgoff=%lx pg=%p ref=%d dontfree=%d\n", pr, pgoff, (*ppg), (*ppg)->pg_ref, dontfree); -+ -+ if (--pg->pg_ref == 0 && !dontfree) -+ { -+ *ppg = pg->pg_next; -+ -+ mem_freepage (pr, pg); -+ } -+ -+ spin_unlock (&pr->pr_lock); -+} -+ -+static int -+mem_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STARTED); -+ MEM_PRIVATE *pr; -+ register int i; -+ -+ if (dev == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC (pr, MEM_PRIVATE *, sizeof (MEM_PRIVATE), 1); -+ if ((pr == NULL)) -+ { -+ elan4_dereference_device (dev); -+ return (-ENOMEM); -+ } -+ -+ spin_lock_init (&pr->pr_lock); -+ pr->pr_dev = dev; -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ pr->pr_pages[i] = NULL; -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+mem_release (struct inode *node, struct file *file) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg, *next; -+ int i; -+ -+ /* free off any pages that we'd allocated */ -+ spin_lock (&pr->pr_lock); -+ for (i = 0; i < MEM_HASH_SIZE; i++) -+ { -+ for (pg = pr->pr_pages[i]; pg; pg = next) -+ { -+ next = pg->pg_next; -+ mem_freepage (pr, pg); -+ } -+ } -+ spin_unlock (&pr->pr_lock); -+ -+ elan4_dereference_device (pr->pr_dev); -+ KMEM_FREE(pr, sizeof(*pr)); -+ -+ return (0); -+} -+ -+static int -+mem_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ return (-EINVAL); -+} -+ -+static void -+mem_vma_open (struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_getpage (pr, pgoff & SDRAM_PGOFF_MASK); -+} -+ -+static void -+mem_vma_close (struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) vma->vm_private_data; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the call to close may not have the same vm_start/vm_end values as -+ * were passed into mmap()/open() - since if an partial unmap had occurred -+ * then the vma could have been shrunk or even split. -+ * -+ * if a the vma is split then an vma_open() will be called for the top -+ * portion - thus causing the reference counts to become incorrect. -+ * -+ * We drop the reference to any pages we're notified about - so they get freed -+ * earlier than when the device is finally released. -+ */ -+ for (pgoff = vma->vm_pgoff, addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); -+} -+ -+struct vm_operations_struct mem_vm_ops = { -+ open: mem_vma_open, -+ close: mem_vma_close, -+}; -+ -+static int -+mem_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ MEM_PRIVATE *pr = (MEM_PRIVATE *) file->private_data; -+ MEM_PAGE *pg; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_mmap: vma=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma, vma->vm_start, vma->vm_end, vma->vm_pgoff, file); -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ vma->vm_ops = &mem_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ if ((pg = mem_getpage (pr, pgoff & SDRAM_PGOFF_MASK)) == NULL) -+ goto failed; -+ -+ PRINTF (DBG_USER, DBG_MEM, "mem_mmap: addr %lx -> pg=%p sdram=%x+%x bar=%lx\n", -+ addr, pg, pg->pg_addr, (pgoff & SDRAM_PGOFF_OFFSET) * PAGE_SIZE, -+ pci_resource_start (pr->pr_dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (! (pr->pr_dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ -+ if (__io_remap_page_range (addr, pci_resource_start (pr->pr_dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + -+ pg->pg_addr + (pgoff & SDRAM_PGOFF_OFFSET) * PAGE_SIZE, PAGE_SIZE, vma->vm_page_prot)) -+ { -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); /* drop our reference to this page */ -+ goto failed; -+ } -+ -+#if defined(conditional_schedule) -+ conditional_schedule(); -+#endif -+ } -+ -+ return (0); -+ -+ failed: -+ /* free of any pages we've already allocated/referenced */ -+ while (pgoff-- > vma->vm_pgoff) -+ mem_droppage (pr, pgoff & SDRAM_PGOFF_MASK, 0); -+ -+ return (-ENOMEM); -+} -+ -+/* -+ * /dev/elan4/userX - control device -+ * -+ */ -+static inline void -+user_private_free (USER_PRIVATE *pr) -+{ -+ ELAN4_DEV *dev = pr->pr_uctx->uctx_ctxt.ctxt_dev; -+ -+ ASSERT (atomic_read (&pr->pr_ref) == 0); -+ -+ user_free (pr->pr_uctx); -+ KMEM_FREE(pr, sizeof(*pr)); -+ -+ elan4_dereference_device (dev); -+} -+ -+#if defined(IOPROC_PATCH_APPLIED) -+static void -+user_ioproc_release (void *arg, struct mm_struct *mm) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_release: ref=%d\n", atomic_read (&pr->pr_ref)); -+ -+ elan4mmu_invalidate_ctxt (&pr->pr_uctx->uctx_ctxt); -+ -+ pr->pr_mm = NULL; -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_private_free (pr); -+} -+ -+/* -+ * On 2.4 kernels we get passed a mm_struct, whereas on 2.6 kernels -+ * we get the vma which is more usefull -+ */ -+#if defined(IOPROC_MM_STRUCT_ARG) -+static void -+user_ioproc_sync_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ /* XXXX: this is intended to sync the modified bit from our page tables, -+ * into the main cpu's modified bits - however since we do not -+ * syncronize our modified bit on a ioproc_invalidate_page() call, -+ * then it could get lost if we modify the page after the last -+ * modification and writepage has occurred. Hence we invalidate -+ * all translations and allow it to refault. -+ */ -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_update_range: mm=%p start=%lx end=%lx\n", mm, start, end); -+ -+#if defined(CONFIG_MPSAS) -+ if (((end - start) >> PAGE_SHIFT) > 16) -+ return; -+#endif -+ -+ user_update_main (pr->pr_uctx, mm, start, end - start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct mm_struct *mm, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+ -+#else -+ -+static void -+user_ioproc_sync_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_sync_range: start=%lx end=%lx\n", start, end); -+ -+ /* XXXX: this is intended to sync the modified bit from our page tables, -+ * into the main cpu's modified bits - however since we do not -+ * syncronize our modified bit on a ioproc_invalidate_page() call, -+ * then it could get lost if we modify the page after the last -+ * modification and writepage has occurred. Hence we invalidate -+ * all translations and allow it to refault. -+ */ -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_invalidate_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_invalidate_range: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+ -+static void -+user_ioproc_update_range (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_update_range: vma=%p start=%lx end=%lx\n", vma, start, end); -+ -+#if defined(CONFIG_MPSAS) -+ if (((end - start) >> PAGE_SHIFT) > 16) -+ return; -+#endif -+ -+ user_update_main (pr->pr_uctx, vma->vm_mm, start, end - start); -+} -+ -+static void -+user_ioproc_change_protection (void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_change_protection: start=%lx end=%lx\n", start, end); -+ -+ user_unload_main (pr->pr_uctx, start, end - start); -+} -+#endif /* defined(IOPROC_NO_VMA_RANGE) */ -+ -+static void -+user_ioproc_sync_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_sync_page: addr=%lx\n", addr); -+ -+ user_unload_main (pr->pr_uctx, addr & PAGE_MASK, PAGE_SIZE); -+} -+ -+static void -+user_ioproc_invalidate_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_invalidate_page: addr=%lx\n", addr); -+ -+ user_unload_main (pr->pr_uctx, addr & PAGE_MASK, PAGE_SIZE); -+} -+ -+static void -+user_ioproc_update_page (void *arg, struct vm_area_struct *vma, unsigned long addr) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) arg; -+ -+ PRINTF (pr->pr_uctx, DBG_IOPROC, "user_ioproc_update_page: addr=%lx\n", addr); -+ -+ user_update_main (pr->pr_uctx, vma->vm_mm, addr & PAGE_MASK, PAGE_SIZE); -+} -+#endif /* defined(IOPROC_PATCH_APPLIED) */ -+ -+static int -+user_open (struct inode *inode, struct file *file) -+{ -+ ELAN4_DEV *dev; -+ USER_PRIVATE *pr; -+ USER_CTXT *uctx; -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_open: mm=%p users=%d count=%d\n", current->mm, -+ atomic_read (¤t->mm->mm_users), atomic_read (¤t->mm->mm_count)); -+ -+ if ((dev = elan4_reference_device (ELAN4_DEVICE(inode), ELAN4_STATE_STARTED)) == NULL) -+ return (-ENXIO); -+ -+ KMEM_ALLOC (pr, USER_PRIVATE *, sizeof (USER_PRIVATE), 1); -+ if ((pr == NULL)) -+ { -+ elan4_dereference_device (dev); -+ return (-ENOMEM); -+ } -+ -+ uctx = user_alloc (dev); -+ -+ if (IS_ERR(uctx)) -+ { -+ elan4_dereference_device (dev); -+ KMEM_FREE(pr, sizeof(*pr)); -+ -+ return PTR_ERR(uctx); -+ } -+ -+ /* initialise refcnt to 1 - one for "file" */ -+ atomic_set (&pr->pr_ref, 1); -+ -+ pr->pr_uctx = uctx; -+ pr->pr_mm = current->mm; -+ -+ { -+ /* register a ioproc callback to notify us of translation changes */ -+ pr->pr_ioproc.arg = (void *) pr; -+ pr->pr_ioproc.release = user_ioproc_release; -+ pr->pr_ioproc.sync_range = user_ioproc_sync_range; -+ pr->pr_ioproc.invalidate_range = user_ioproc_invalidate_range; -+ pr->pr_ioproc.update_range = user_ioproc_update_range; -+ pr->pr_ioproc.change_protection = user_ioproc_change_protection; -+ pr->pr_ioproc.sync_page = user_ioproc_sync_page; -+ pr->pr_ioproc.invalidate_page = user_ioproc_invalidate_page; -+ pr->pr_ioproc.update_page = user_ioproc_update_page; -+ -+ /* add an extra reference for the ioproc ops */ -+ atomic_inc (&pr->pr_ref); -+ -+ spin_lock (¤t->mm->page_table_lock); -+ ioproc_register_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ } -+ -+ file->private_data = (void *) pr; -+ -+ return (0); -+} -+ -+static int -+user_release (struct inode *inode, struct file *file) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ -+ PRINTF (pr->pr_uctx, DBG_FILE, "user_release: ref=%d\n", atomic_read (&pr->pr_ref)); -+ -+ if (atomic_dec_and_test (&pr->pr_ref)) -+ user_private_free (pr); -+ -+ return (0); -+} -+ -+static int -+user_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ int res = 0; -+ -+ PRINTF (uctx, DBG_FILE, "user_ioctl: cmd=%x arg=%lx\n", cmd, arg); -+ -+ if (current->mm != pr->pr_mm) -+ return (-EINVAL); -+ -+ switch (cmd) -+ { -+ case ELAN4IO_DEVINFO: -+ if (copy_to_user ((void *) arg, &uctx->uctx_ctxt.ctxt_dev->dev_devinfo, sizeof (ELAN_DEVINFO))) -+ return (-EFAULT); -+ return (0); -+ -+ case ELAN4IO_POSITION: -+ { -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ -+ if (copy_to_user ((void *) arg, &dev->dev_position, sizeof (ELAN_POSITION))) -+ return (-EFAULT); -+ return (0); -+ } -+ -+ case ELAN4IO_FREE: -+ { -+ spin_lock (¤t->mm->page_table_lock); -+ if (pr->pr_mm != current->mm) -+ spin_unlock (¤t->mm->page_table_lock); -+ else -+ { -+ ioproc_unregister_ops (current->mm, &pr->pr_ioproc); -+ spin_unlock (¤t->mm->page_table_lock); -+ -+ user_ioproc_release (pr, current->mm); -+ } -+ } -+ return (0); -+ -+ case ELAN4IO_ATTACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else if ((res = user_attach (uctx, cap)) == 0 && -+ copy_to_user ((void *) arg, cap, sizeof (ELAN_CAPABILITY))) -+ { -+ user_detach (uctx, cap); -+ res = -EFAULT; -+ } -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ return (res); -+ } -+ -+ case ELAN4IO_DETACH: -+ { -+ ELAN_CAPABILITY *cap; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else -+ user_detach (uctx, cap); -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ return (res); -+ } -+ -+ case ELAN4IO_BLOCK_INPUTTER: -+ user_block_inputter (uctx, arg); -+ return (0); -+ -+ case ELAN4IO_ADD_P2PVP: -+ { -+ ELAN4IO_ADD_P2PVP_STRUCT *args; -+ -+ KMEM_ALLOC (args, ELAN4IO_ADD_P2PVP_STRUCT *, sizeof (ELAN4IO_ADD_P2PVP_STRUCT), 1); -+ if ((args == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN4IO_ADD_P2PVP_STRUCT))) -+ res = -EFAULT; -+ else -+ res = user_add_p2pvp (uctx, args->vp_process, &args->vp_capability); -+ -+ KMEM_FREE(args, sizeof(*args)); -+ return (res); -+ } -+ -+ case ELAN4IO_ADD_BCASTVP: -+ { -+ ELAN4IO_ADD_BCASTVP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ADD_BCASTVP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_add_bcastvp (uctx, args.vp_process, args.vp_lowvp, args.vp_highvp)); -+ } -+ -+ case ELAN4IO_REMOVEVP: -+ return (user_removevp (uctx, arg)); -+ -+ case ELAN4IO_SET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_set_route (uctx, args.rt_process, &args.rt_route)); -+ } -+ -+ case ELAN4IO_RESET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_reset_route (uctx, args.rt_process)); -+ } -+ -+ case ELAN4IO_GET_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = user_get_route (uctx, args.rt_process, &args.rt_route)) == 0 && -+ copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ res = -EFAULT; -+ -+ return (res); -+ } -+ -+ case ELAN4IO_CHECK_ROUTE: -+ { -+ ELAN4IO_ROUTE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ return (-EFAULT); -+ -+ if ((res = user_check_route (uctx, args.rt_process, &args.rt_route, &args.rt_error)) == 0 && -+ copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ROUTE_STRUCT))) -+ res = -EFAULT; -+ -+ return (res); -+ } -+ -+ case ELAN4IO_ALLOCCQ: -+ { -+ ELAN4IO_ALLOCCQ_STRUCT args; -+ USER_CQ *ucq; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ALLOCCQ_STRUCT))) -+ return (-EFAULT); -+ -+ ucq = user_alloccq (uctx, args.cq_size & CQ_SizeMask, args.cq_perm & CQ_PermissionMask, -+ (args.cq_type & ELAN4IO_CQ_TYPE_REORDER) ? UCQ_REORDER : 0); -+ if (IS_ERR (ucq)) -+ return PTR_ERR (ucq); -+ -+ args.cq_indx = elan4_cq2idx (ucq->ucq_cq); -+ -+ if (copy_to_user ((void *) arg, &args, sizeof (ELAN4IO_ALLOCCQ_STRUCT))) -+ { -+ user_dropcq (uctx, ucq); -+ return (-EFAULT); -+ } -+ -+ /* don't drop the reference on the cq until the context is freed, -+ * or the caller explicitly frees the cq */ -+ return (0); -+ } -+ -+ case ELAN4IO_FREECQ: -+ { -+ USER_CQ *ucq; -+ unsigned indx; -+ -+ if (copy_from_user (&indx, (void *) arg, sizeof (unsigned))) -+ return (-EFAULT); -+ -+ if ((ucq = user_findcq (uctx, indx)) == NULL) /* can't free unallocated cq */ -+ return (-EINVAL); -+ -+ user_dropcq (uctx, ucq); /* drop the reference we've just taken */ -+ -+ if ((ucq->ucq_flags & UCQ_SYSTEM)) /* can't free device driver cq */ -+ return (-EINVAL); -+ -+ user_dropcq (uctx, ucq); /* and the one held from the alloccq call */ -+ -+ return (0); -+ } -+ -+ case ELAN4IO_DUMPCQ: -+ { -+ ELAN4IO_DUMPCQ_STRUCT args; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CQ *ucq; -+ void *buf; -+ int i; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof(ELAN4IO_DUMPCQ_STRUCT))) -+ return (-EFAULT); -+ -+ if ((ucq = user_findcq (uctx, args.cq_indx)) == NULL) -+ return (-EINVAL); -+ -+ if (args.bufsize) -+ { -+ E4_uint32 usedBufSize = min(args.cq_size, args.bufsize); -+ -+ KMEM_ALLOC (buf, void *, usedBufSize, 1); -+ -+ if (buf == NULL) -+ return (-ENOMEM); -+ -+ for (i=0; iucq_cq->cq_space + i); -+ -+ if (copy_to_user((void *)args.buffer, buf, usedBufSize)) -+ { -+ KMEM_FREE(buf, args.bufsize); -+ return (-EFAULT); -+ } -+ KMEM_FREE(buf, usedBufSize); -+ args.bufsize = usedBufSize; -+ } -+ -+ args.cq_size = CQ_Size(ucq->ucq_cq->cq_size); -+ args.cq_space = ucq->ucq_cq->cq_space; -+ -+ -+ if (copy_to_user((void *)arg, &args, sizeof(ELAN4IO_DUMPCQ_STRUCT))) -+ { -+ return (-EFAULT); -+ } -+ -+ user_dropcq (uctx, ucq); /* drop the reference we've just taken */ -+ -+ return (0); -+ } -+ -+ case ELAN4IO_SETPERM: -+ { -+ ELAN4IO_PERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_setperm (uctx, args.ps_maddr, args.ps_eaddr, args.ps_len, args.ps_perm)); -+ } -+ -+ case ELAN4IO_CLRPERM: -+ { -+ ELAN4IO_PERM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT))) -+ return (-EFAULT); -+ -+ user_clrperm (uctx, args.ps_eaddr, args.ps_len); -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPSIG: -+ { -+ ELAN4IO_TRAPSIG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPSIG_STRUCT))) -+ return (-EFAULT); -+ -+ pr->pr_uctx->uctx_trap_pid = current->pid; -+ pr->pr_uctx->uctx_trap_signo = args.ts_signo; -+ -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPHANDLER: -+ { -+ ELAN4IO_TRAPHANDLER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPHANDLER_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_trap_handler (pr->pr_uctx, (ELAN4_USER_TRAP *)args.th_trapp, args.th_nticks)); -+ } -+ -+ case ELAN4IO_REQUIRED_MAPPINGS: -+ { -+ ELAN4IO_REQUIRED_MAPPINGS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_REQUIRED_MAPPINGS_STRUCT))) -+ return (-EFAULT); -+ -+ pr->pr_uctx->uctx_upage_addr = args.rm_upage_addr; -+ pr->pr_uctx->uctx_trestart_addr = args.rm_trestart_addr; -+ -+ return (0); -+ } -+ -+ case ELAN4IO_ALLOC_TRAP_QUEUES: -+ { -+ ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_alloc_trap_queues (uctx, args.tq_ndproc_traps, args.tq_neproc_traps, -+ args.tq_ntproc_traps, args.tq_nthreads, args.tq_ndmas)); -+ } -+ -+ case ELAN4IO_RESUME_EPROC_TRAP: -+ { -+ ELAN4IO_RESUME_EPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_EPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_eproc_trap (pr->pr_uctx, args.rs_addr)); -+ } -+ -+ case ELAN4IO_RESUME_CPROC_TRAP: -+ { -+ ELAN4IO_RESUME_CPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_CPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_cproc_trap (pr->pr_uctx, args.rs_indx)); -+ } -+ -+ case ELAN4IO_RESUME_DPROC_TRAP: -+ { -+ ELAN4IO_RESUME_DPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_DPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_dproc_trap (pr->pr_uctx, &args.rs_desc)); -+ } -+ -+ case ELAN4IO_RESUME_TPROC_TRAP: -+ { -+ ELAN4IO_RESUME_TPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_TPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_tproc_trap (pr->pr_uctx, &args.rs_regs)); -+ } -+ -+ case ELAN4IO_RESUME_IPROC_TRAP: -+ { -+ ELAN4IO_RESUME_IPROC_TRAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_RESUME_IPROC_TRAP_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_resume_iproc_trap (pr->pr_uctx, args.rs_channel, args.rs_trans, -+ &args.rs_header, &args.rs_data)); -+ } -+ -+ case ELAN4IO_FLUSH_ICACHE: -+ elan4_flush_icache (&uctx->uctx_ctxt); -+ return (0); -+ -+ case ELAN4IO_STOP_CTXT: -+ if (arg) -+ user_swapout (uctx, UCTX_USER_STOPPED); -+ else -+ user_swapin (uctx, UCTX_USER_STOPPED); -+ return (0); -+ -+ case ELAN4IO_ALLOC_INTCOOKIE_TABLE: -+ { -+ ELAN_CAPABILITY *cap; -+ INTCOOKIE_TABLE *tbl; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (cap, (void *) arg, sizeof (ELAN_CAPABILITY))) -+ res = -EFAULT; -+ else -+ { -+ tbl = intcookie_alloc_table(cap); -+ -+ if (tbl == NULL) -+ res = -ENOMEM; -+ else -+ { -+ /* Install the intcookie table we've just created */ -+ spin_lock (&uctx->uctx_spinlock); -+ if (uctx->uctx_intcookie_table != NULL) -+ res = -EBUSY; -+ else -+ uctx->uctx_intcookie_table = tbl; -+ spin_unlock (&uctx->uctx_spinlock); -+ -+ /* drop the table we created if there already was one */ -+ if (res != 0) -+ intcookie_free_table (tbl); -+ } -+ } -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ -+ return (res); -+ } -+ -+ case ELAN4IO_FREE_INTCOOKIE_TABLE: -+ { -+ INTCOOKIE_TABLE *tbl; -+ -+ spin_lock (&uctx->uctx_spinlock); -+ tbl = uctx->uctx_intcookie_table; -+ uctx->uctx_intcookie_table = NULL; -+ spin_unlock (&uctx->uctx_spinlock); -+ -+ if (tbl != NULL) -+ intcookie_free_table (tbl); -+ -+ return (tbl == NULL ? -EINVAL : 0); -+ } -+ -+ case ELAN4IO_ALLOC_INTCOOKIE: -+ { -+ /* For backwards compatibility with the old libs (pre 1.8.0) -+ * we allocate an intcookie table on the first cookie -+ * alloc if one hasn't be created already -+ */ -+ if (uctx->uctx_intcookie_table == NULL) -+ { -+ ELAN_CAPABILITY *cap; -+ INTCOOKIE_TABLE *tbl; -+ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), 1); -+ if ((cap == NULL)) -+ return (-ENOMEM); -+ -+ /* Create a dummy capability */ -+ elan_nullcap(cap); -+ -+ /* Must be unique for each process on a node */ -+ cap->cap_mycontext = (int) ELAN4_TASK_HANDLE(); -+ -+ /* Create a new intcookie table */ -+ tbl = intcookie_alloc_table(cap); -+ -+ /* Hang intcookie table off uctx */ -+ spin_lock (&uctx->uctx_spinlock); -+ if (uctx->uctx_intcookie_table == NULL) -+ { -+ uctx->uctx_intcookie_table = tbl; -+ spin_unlock (&uctx->uctx_spinlock); -+ } -+ else -+ { -+ spin_unlock (&uctx->uctx_spinlock); -+ intcookie_free_table(tbl); -+ } -+ -+ KMEM_FREE(cap, sizeof(*cap)); -+ } -+ -+ return (intcookie_alloc (uctx->uctx_intcookie_table, arg)); -+ } -+ -+ case ELAN4IO_FREE_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_free (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_ARM_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_arm (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_WAIT_INTCOOKIE: -+ if (uctx->uctx_intcookie_table == NULL) -+ return -EINVAL; -+ else -+ return (intcookie_wait (uctx->uctx_intcookie_table, arg)); -+ -+ case ELAN4IO_FIRE_INTCOOKIE: -+ { -+ ELAN4IO_FIRECAP_STRUCT *args; -+ -+ KMEM_ALLOC (args, ELAN4IO_FIRECAP_STRUCT *, sizeof (ELAN4IO_FIRECAP_STRUCT), 1); -+ if ((args == NULL)) -+ return (-ENOMEM); -+ -+ if (copy_from_user (args, (void *) arg, sizeof (ELAN4IO_FIRECAP_STRUCT))) -+ res = -EFAULT; -+ else -+ res = intcookie_fire_cap (&args->fc_capability, args->fc_cookie); -+ -+ KMEM_FREE(args, sizeof(*args)); -+ -+ return (res); -+ } -+ -+ case ELAN4IO_NETERR_MSG: -+ { -+ ELAN4IO_NETERR_MSG_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_NETERR_MSG_STRUCT))) -+ return (-EFAULT); -+ -+ return (user_send_neterr_msg (uctx, args.nm_vp, args.nm_nctx, args.nm_retries, &args.nm_msg)); -+ } -+ -+ case ELAN4IO_NETERR_TIMER: -+ { -+ unsigned long ticks = ((unsigned long) arg * HZ) / 1000; -+ -+ PRINTF (uctx, DBG_NETERR, "elan4_neterr_timer: arg %ld inc %ld\n", arg, ticks); -+ -+ mod_timer (&uctx->uctx_neterr_timer, (jiffies + (ticks > 0 ? ticks : 1))); -+ return 0; -+ } -+ -+ case ELAN4IO_NETERR_FIXUP: -+ { -+ ELAN4IO_NETERR_FIXUP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_NETERR_FIXUP_STRUCT))) -+ return (-EFAULT); -+ -+ if (args.nf_sten) -+ return (user_neterr_sten (uctx, args.nf_vp, args.nf_cookie, args.nf_waitforeop)); -+ else -+ return (user_neterr_dma (uctx, args.nf_vp, args.nf_cookie, args.nf_waitforeop)); -+ } -+ default: -+ PRINTF (uctx, DBG_FILE, "user_ioctl: invalid ioctl %x\n", cmd); -+ return (-EINVAL); -+ } -+} -+ -+static void -+user_vma_open (struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (uctx, DBG_FILE, "user_vma_open: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ elan4_getcqa (&uctx->uctx_ctxt, pgoff); -+} -+ -+static void -+user_vma_close (struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) vma->vm_private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ unsigned long addr; -+ unsigned long pgoff; -+ -+ PRINTF (uctx, DBG_FILE, "user_vma_close: vm_mm=%p start=%lx end=%lx pgoff=%lx file=%p\n", -+ vma->vm_mm, vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_file); -+ -+ /* NOTE: the same comments apply as mem_vma_close */ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ if (elan4_getcqa (&uctx->uctx_ctxt, pgoff) != NULL) -+ { -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* drop the reference we've just taken */ -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* and the one held by the mmap */ -+ } -+} -+ -+struct vm_operations_struct user_vm_ops = { -+ open: user_vma_open, -+ close: user_vma_close, -+}; -+ -+static int -+user_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN4_CQA *cqa; -+ unsigned long addr; -+ unsigned long pgoff; -+ int res; -+ ioaddr_t ioaddr; -+ -+ /* Don't allow these pages to be swapped out of dumped */ -+ vma->vm_flags |= (VM_RESERVED | VM_IO); -+ -+ vma->vm_ops = &user_vm_ops; -+ vma->vm_file = file; -+ vma->vm_private_data = (void *) pr; -+ -+ for (addr = vma->vm_start, pgoff = vma->vm_pgoff; addr < vma->vm_end; addr += PAGE_SIZE, pgoff++) -+ { -+ switch (pgoff) -+ { -+ default: -+ PRINTF (uctx, DBG_FILE, "user_mmap: command queue %ld mapping at %lx\n", pgoff, addr); -+ -+ if ((cqa = elan4_getcqa (&uctx->uctx_ctxt, pgoff)) == NULL) -+ { -+ res = -EINVAL; -+ goto failed; -+ } -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: cqa=%p idx=%d num=%d ref=%d\n", cqa, cqa->cqa_idx, cqa->cqa_cqnum, cqa->cqa_ref); -+ -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ if (! (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_WRITE_COMBINE) && (cqa->cqa_type & CQ_Reorder) != 0) -+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (%lx, %lx, %lx, %lx)\n", -+ addr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (cqa->cqa_cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize, PAGE_SIZE, -+ vma->vm_page_prot); -+ -+ if (__io_remap_page_range (addr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (cqa->cqa_cqnum + dev->dev_cqoffset) * CQ_CommandMappingSize, -+ PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range failed\n"); -+ -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_USER_REGS: -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ ioaddr = pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + ELAN4_REVA_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ ioaddr = pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + ELAN4_REVB_REG_OFFSET + offsetof(E4_Registers, uRegs); -+ break; -+ -+ default: -+ res = -EINVAL; -+ goto failed; -+ } -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: user_regs at %lx ioaddr %lx prot %lx\n", -+ addr, ioaddr, vma->vm_page_prot.pgprot); -+ -+ if (__io_remap_page_range (addr, (ioaddr & PAGEMASK), PAGE_SIZE, vma->vm_page_prot)) -+ { -+ res = -EAGAIN; -+ goto failed; -+ } -+ -+ break; -+ -+ case ELAN4_OFF_USER_PAGE: -+ PRINTF (uctx, DBG_FILE, "user_mmap: shared user page - kaddr=%lx uaddr=%lx phys=%lx\n", -+ uctx->uctx_upage, addr, kmem_to_phys (uctx->uctx_upage)); -+ -+ /* we do not want to have this area swapped out, lock it */ -+ vma->vm_flags |= VM_LOCKED; -+ -+ /* Mark the page as reserved or else the remap_page_range() doesn't remap it */ -+ SetPageReserved(pte_page(*find_pte_kernel((unsigned long) uctx->uctx_upage))); -+ -+ if (__remap_page_range (addr, kmem_to_phys (uctx->uctx_upage), PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (user_page) failed\n"); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_TPROC_TRAMPOLINE: -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ PRINTF (uctx, DBG_FILE, "user_mmap: tproc trampoline - kaddr=%lx uaddr=%lx phys=%lx\n", uctx->uctx_trampoline, addr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + uctx->uctx_trampoline + (addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT))); -+ -+ if (__io_remap_page_range (addr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM) + -+ uctx->uctx_trampoline + (addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)), -+ PAGE_SIZE, vma->vm_page_prot)) -+ { -+ PRINTF (uctx, DBG_FILE, "user_mmap: remap_page_range (tproc_trampoline) failed\n"); -+ res = -ENOMEM; -+ goto failed; -+ } -+ break; -+ -+ case ELAN4_OFF_DEVICE_STATS: -+ printk ("user_mmap: device_stats\n"); -+ break; -+ } -+ -+ } -+ -+ return (0); -+ -+ failed: -+ for (addr -= PAGE_SIZE, pgoff--; addr >= vma->vm_start; addr -= PAGE_SIZE, pgoff--) -+ elan4_putcqa (&uctx->uctx_ctxt, pgoff); /* drop the reference we've just taken */ -+ return (res); -+} -+ -+/* driver entry points */ -+static int -+elan4_open (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_open: device %d minor %d file=%p\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), file); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_open (inode, file)); -+ case ELAN4_MINOR_MEM: -+ return (mem_open (inode, file)); -+ case ELAN4_MINOR_USER: -+ return (user_open (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan4_release (struct inode *inode, struct file *file) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_release: device %d minor %d file=%p\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), file); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_release (inode, file)); -+ case ELAN4_MINOR_MEM: -+ return (mem_release (inode, file)); -+ case ELAN4_MINOR_USER: -+ return (user_release (inode, file)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+static int -+elan4_ioctl (struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl: device %d minor %d cmd %x\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), cmd); -+ -+ switch (ELAN4_MINOR (inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_ioctl (inode, file, cmd, arg)); -+ case ELAN4_MINOR_MEM: -+ return (mem_ioctl (inode, file, cmd, arg)); -+ case ELAN4_MINOR_USER: -+ return (user_ioctl (inode, file, cmd, arg)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+elan4_ioctl32 (unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file) -+{ -+ struct inode *inode = file->f_dentry->d_inode; -+ extern int sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg); -+ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl32: device %d minor %d cmd %x\n", ELAN4_DEVICE(inode), ELAN4_MINOR(inode), cmd); -+ -+ if (ELAN4_MINOR (inode) == ELAN4_MINOR_USER) -+ { -+ USER_PRIVATE *pr = (USER_PRIVATE *) file->private_data; -+ USER_CTXT *uctx = pr->pr_uctx; -+ -+ if (current->mm != pr->pr_mm) -+ return -EINVAL; -+ -+ switch (cmd) -+ { -+ case ELAN4IO_SETPERM32: -+ { -+ ELAN4IO_PERM_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: setperm maddr=%x eaddr=%llx len=%llxx perm=%d\n", -+ args.ps_maddr, args.ps_eaddr,args.ps_len, args.ps_perm); -+ -+ return (user_setperm (uctx, args.ps_maddr, args.ps_eaddr, args.ps_len, args.ps_perm)); -+ } -+ -+ case ELAN4IO_CLRPERM32: -+ { -+ ELAN4IO_PERM_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_PERM_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: clrperm eaddr=%llx len=%ll\n", -+ args.ps_eaddr, args.ps_len); -+ -+ user_clrperm (uctx, args.ps_eaddr, args.ps_len); -+ return (0); -+ } -+ -+ case ELAN4IO_TRAPHANDLER32: -+ { -+ ELAN4IO_TRAPHANDLER_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (ELAN4IO_TRAPHANDLER_STRUCT32))) -+ return (-EFAULT); -+ -+ PRINTF (DBG_USER, DBG_FILE, "user_ioctl32: traphandler trapp=%x nticks=%d\n", -+ args.th_trapp, args.th_nticks); -+ -+ return (user_trap_handler (pr->pr_uctx, (ELAN4_USER_TRAP *)(unsigned long)args.th_trapp, args.th_nticks)); -+ } -+ } -+ } -+ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_ioctl32: fd=%d cmd=%x arg=%lx file=%p\n", fd, cmd, arg, file); -+ return (sys_ioctl (fd, cmd, arg)); -+} -+#endif -+ -+ -+ -+static int -+elan4_mmap (struct file *file, struct vm_area_struct *vma) -+{ -+ PRINTF (DBG_USER, DBG_FILE, "elan4_mmap: instance %d minor %d start=%lx end=%lx pgoff=%lx\n", -+ ELAN4_DEVICE (file->f_dentry->d_inode), ELAN4_MINOR (file->f_dentry->d_inode), -+ vma->vm_start, vma->vm_end, vma->vm_pgoff); -+ -+ switch (ELAN4_MINOR (file->f_dentry->d_inode)) -+ { -+ case ELAN4_MINOR_CONTROL: -+ return (control_mmap (file, vma)); -+ case ELAN4_MINOR_MEM: -+ return (mem_mmap (file, vma)); -+ case ELAN4_MINOR_USER: -+ return (user_mmap (file, vma)); -+ default: -+ return (-ENXIO); -+ } -+} -+ -+void -+elan4_update_intel_p64h2 (ELAN4_DEV *dev, struct pci_dev *bridge) -+{ -+ u16 cnf; -+ -+ pci_read_config_word (bridge, 0x40 /* CNF */, &cnf); -+ -+ /* We expect the CNF register to be configured as follows -+ * -+ * [8] == 1 PMODE PCI Mode -+ * [7:6] == 2/3 PFREQ PCI Frequency (100/133) -+ * [5] == 0 RSDIS Restreaming Disable -+ * [4:3] == 0x PP Prefetch Policy -+ * [2] == 0 DTD Delayed Transaction Depth -+ * [1:0] == 10 MDT MaximumDelaedTransactions -+ */ -+ -+ if ((cnf & (1 << 8)) == 0) -+ printk ("elan%d: strangeness - elan reports PCI-X but P64H2 reports PCI mode !\n", dev->dev_instance); -+ else if ((cnf & 0xb7) != 0x82 && (cnf & 0xb7) != 0x84 && optimise_pci_bus < 2) -+ printk ("elan%d: P64H2 CNF is not configured as expected : RSDIS=%d PP=%d DTD=%d MDT=%d\n", -+ dev->dev_instance, (cnf >> 5) & 1, (cnf >> 3) & 3, (cnf >> 2) & 1, cnf & 3); -+ else -+ { -+ switch ((cnf >> 6) & 3) -+ { -+ case 2: /* PCI-X 100 */ -+ pci_write_config_word (bridge, 0xfc /* PC100 */, 0x7777); -+ -+ printk ("elan%d: optimise P64H2 : setting MDT=0, DTD=1, PFC=777 for PCI-X 100\n", dev->dev_instance); -+ -+ break; -+ -+ case 3: /* PCI-X 133 */ -+ pci_write_config_word (bridge, 0xfe /* PC133 */, 0x7777); -+ -+ printk ("elan%d: optimise P64H2 : setting MDT=0, DTD=1, PFC=777 for PCI-X 133\n", dev->dev_instance); -+ break; -+ } -+ -+ pci_write_config_word (bridge, 0x40 /* CNF */, (cnf & 0xfff8) | 0x4); /* DTD=1 MDT=0 */ -+ } -+} -+ -+int -+elan4_optimise_intel_p64h2 (ELAN4_DEV *dev, struct pci_dev *pdev) -+{ -+ struct pci_bus *bus = pdev->bus; -+ struct pci_dev *bridge = bus->self; -+ unsigned int devcount = 0; -+ u8 revision; -+ u32 ectrl; -+ struct list_head *el; -+ -+ pci_read_config_dword (pdev, PCI_ELAN_CONTROL, &ectrl); -+ -+ /* We can only run in PCI-Xmode with a B1 stepping P64H2 because of P64H2 Errata 3 */ -+ pci_read_config_byte (bridge, PCI_REVISION_ID, &revision); -+ if (revision < 0x04) -+ { -+ if ((ectrl & ECTRL_INITIALISATION_MODE) != Pci2_2) -+ { -+ static const char *p64h2_stepping[4] = {"UNKNOWN", "UNKNOWN", "UNKNOWN", "B0"}; -+ -+ printk ("elan%d: unable to use device because of P64H2 Errata 3 on\n" -+ " %s stepping part and running in a PCI-X slot\n", -+ dev->dev_instance, p64h2_stepping[revision]); -+ return -EINVAL; -+ } -+ } -+ -+ /* We can only alter the bus configuration registers if the Elan is the only device -+ * on the bus ... */ -+ list_for_each (el, &bus->devices) { -+ struct pci_dev *pcip = list_entry (el, struct pci_dev, bus_list); -+ -+ if (pcip == pdev || (pcip->vendor == PCI_VENDOR_ID_INTEL && pcip->device == 0x1462 /* P64H2 HOTPLUG */)) -+ continue; -+ -+ devcount++; -+ } -+ -+ if (devcount > 0 || !list_empty (&bus->children)) -+ { -+ printk ("elan%d: unable to optimise P64H2 settings as %s%s\n", dev->dev_instance, -+ (devcount > 0) ? "more than one device on bus" : "", -+ ! list_empty (&bus->children) ? "has child buses" : ""); -+ return 0; -+ } -+ -+#ifdef __ia64 -+ if ((ectrl & ECTRL_INITIALISATION_MODE) == PciX100to133MHz) -+ { -+ struct pci_dev *pcip; -+ unsigned int sioh_good = 0; -+ unsigned int sioh_downgrade = 0; -+ unsigned int snc_good = 0; -+ unsigned int snc_downgrade = 0; -+ -+ /* Search for the associated SIOH and SNC on ia64, -+ * if we have a C2 SIOH and a C0/C1 SNC, then we can -+ * reconfigure the P64H2 as follows: -+ * CNF:MDT = 0 -+ * CNF:DTD = 1 -+ * CNF:PC133 = 7777 -+ * -+ * if not, then issue a warning that down rev parts -+ * affect bandwidth. -+ */ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_INTEL, 0x500, pcip)); ) -+ { -+ pci_read_config_byte (pcip, PCI_REVISION_ID, &revision); -+ -+ if (revision >= 0x21) -+ snc_good++; -+ else -+ { -+ printk ("elan%d: SNC revision %x (%s)\n", dev->dev_instance, revision, -+ revision == 0x00 ? "A0" : revision == 0x01 ? "A1" : -+ revision == 0x02 ? "A2" : revision == 0x03 ? "A3" : -+ revision == 0x10 ? "B0" : revision == 0x20 ? "C0" : -+ revision == 0x21 ? "C1" : "UNKNOWN"); -+ -+ snc_downgrade++; -+ } -+ } -+ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_INTEL, 0x510, pcip)) != NULL; ) -+ { -+ pci_read_config_byte (pcip, PCI_REVISION_ID, &revision); -+ -+ -+ if (revision >= 0x22) -+ sioh_good++; -+ else -+ { -+ printk ("elan%d: SIOH revsision %x (%s)\n", dev->dev_instance, revision, -+ revision == 0x10 ? "C0" : revision == 0x20 ? "C0" : -+ revision == 0x21 ? "C1" : revision == 0x22 ? "C2" : "UNKNOWN"); -+ -+ sioh_downgrade++; -+ } -+ } -+ -+ if (optimise_pci_bus < 2 && (sioh_downgrade || snc_downgrade)) -+ printk ("elan%d: unable to optimise as SNC/SIOH below required C1/C2 steppings\n", dev->dev_instance); -+ else if (optimise_pci_bus < 2 && (sioh_good == 0 || snc_good == 0)) -+ printk ("elan%d: unable to optimise as cannot determine SNC/SIOH revision\n", dev->dev_instance); -+ else -+ elan4_update_intel_p64h2 (dev, bridge); -+ } -+#endif -+ -+#ifdef __i386 -+ if ((ectrl & ECTRL_INITIALISATION_MODE) == PciX100to133MHz) -+ elan4_update_intel_p64h2 (dev, bridge); -+#endif -+ return 0; -+} -+ -+int -+elan4_optimise_intel_pxh (ELAN4_DEV *dev, struct pci_dev *pdev) -+{ -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] |= ELAN4_FEATURE_64BIT_READ; -+ -+ return 0; -+} -+ -+void -+elan4_optimise_serverworks_ciobx2 (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pdev = dev->dev_osdep.pdev; -+ struct pci_dev *pcip; -+ unsigned char bus; -+ unsigned int dor; -+ -+ /* Find the CIOBX2 for our bus number */ -+ for (pcip = NULL; (pcip = pci_find_device (PCI_VENDOR_ID_SERVERWORKS, 0x0101, pcip)) != NULL;) -+ { -+ pci_read_config_byte (pcip, 0x44 /* BUSNUM */, &bus); -+ -+ if (pdev->bus->number == bus) -+ { -+ printk ("elan%d: optimise CIOBX2 : setting DOR to disable read pipe lining\n", dev->dev_instance); -+ -+ pci_read_config_dword (pcip, 0x78 /* DOR */, &dor); -+ pci_write_config_dword (pcip, 0x78 /* DOR */, dor | (1 << 16)); -+ -+ printk ("elan%d: disabling write-combining on ServerWorks chipset\n", dev->dev_instance); -+ dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] |= ELAN4_FEATURE_NO_WRITE_COMBINE; -+ } -+ } -+} -+ -+int -+elan4_optimise_bus (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pdev = dev->dev_osdep.pdev; -+ -+ if (pdev->bus && pdev->bus->self) -+ { -+ struct pci_dev *bridge = pdev->bus->self; -+ -+ if (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x1460 /* Intel P64H2 */) -+ return elan4_optimise_intel_p64h2 (dev, pdev); -+ -+ if ((bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0329) /* Intel 6700PXH Fn 0 */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x032a) /* Intel 6700PXH Fn 2 */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x032c) /* Intel 6702PXH */ || -+ (bridge->vendor == PCI_VENDOR_ID_INTEL && bridge->device == 0x0320) /* Intel PXH-D */) -+ return elan4_optimise_intel_pxh (dev, pdev); -+ } -+ -+ if (pci_find_device (PCI_VENDOR_ID_HP, 0x122e, NULL) != NULL) /* on HP ZX1 set the relaxed ordering */ -+ dev->dev_pteval = PTE_RelaxedOrder; /* bit to get better DMA bandwidth. */ -+ -+ if (pci_find_device (PCI_VENDOR_ID_SERVERWORKS, 0x0101, NULL) != NULL) /* ServerWorks CIOBX2 */ -+ elan4_optimise_serverworks_ciobx2 (dev); -+ -+ return 0; -+} -+ -+int -+elan4_pciinit (ELAN4_DEV *dev) -+{ -+ int res; -+ u32 value; -+ u16 command; -+ u8 cacheline; -+ unsigned long flags; -+ -+ if (optimise_pci_bus && (res = elan4_optimise_bus (dev)) <0) -+ return (res); -+ -+ if ((res = pci_enable_device (dev->dev_osdep.pdev)) < 0) -+ return (res); -+ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ if ((value & ECTRL_INITIALISATION_MODE) == Pci2_2) -+ printk ("elan%d: is an elan4%c (PCI-2.2)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ else -+ { -+ switch (value & ECTRL_INITIALISATION_MODE) -+ { -+ case PciX50To66MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 50-66)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ case PciX66to100MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 66-100)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ case PciX100to133MHz: -+ printk ("elan%d: is an elan4%c (PCI-X 100-133)\n", dev->dev_instance, 'a' + dev->dev_devinfo.dev_revision_id); -+ break; -+ -+ default: -+ printk ("elan%d: Invalid PCI-X mode\n", dev->dev_instance); -+ return (-EINVAL); -+ } -+ } -+ -+ /* initialise the elan pll control register */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, &value); -+ -+ if (elan4_pll_cfg) -+ { -+ printk ("elan%d: setting pll control to %08x\n", dev->dev_instance, elan4_pll_cfg); -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, elan4_pll_cfg); -+ } -+ else -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | ECTRL_SYS_CLOCK_RATIO_4_3); -+ else -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | ECTRL_SYS_CLOCK_RATIO_6_5 | SysPll_FeedForwardISel0 | SysPll_FeedForwardISel1); -+ } -+ -+ /* initialise the elan control register */ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ -+ value = ((15 << ECTRL_IPROC_HIGH_PRI_TIME_SHIFT) | -+ (15 << ECTRL_OTHER_HIGH_PRI_TIME_SHIFT) | -+ (value & ECTRL_28_NOT_30_BIT_LOCAL_BAR) | -+ (dev->dev_topaddrmode ? ECTRL_ExtraMasterAddrBits : 0) | -+ ECTRL_ENABLE_LATENCY_RESET | -+ ECTRL_ENABLE_WRITEBURSTS | -+ ECTRL_ENABLE_2_2READBURSTS); -+ -+#ifdef LINUX_SPARC -+ value &= ~(ECTRL_ENABLE_LATENCY_RESET | ECTRL_ENABLE_WRITEBURSTS); -+#endif -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value | ECTRL_SOFTWARE_INTERNAL_RESET); -+ -+ switch (dev->dev_devinfo.dev_revision_id) -+ { -+ case PCI_REVISION_ID_ELAN4_REVA: -+ /* Delay 10ms here if we've changed the sysclock ratio */ -+ /* to allow the PLL to stabalise before proceeding */ -+ udelay (10000); -+ break; -+ -+ case PCI_REVISION_ID_ELAN4_REVB: -+ { -+ unsigned char val = read_i2c (dev, I2cLedsValue); -+ -+ /* On RevB we have to explicitly reset the PLLs */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_COMMAND, &command); -+ -+ write_i2c (dev, I2cLedsValue, val | 0x80); -+ udelay (1000); -+ -+ /* Issue the PLL counter reset and immediately inhibit all pci interaction -+ * while the PLL is recovering. The write to the PCI_COMMAND register has -+ * to occur within 50uS of the write to the i2c registers */ -+ local_irq_save (flags); -+ write_i2c (dev, I2cLedsValue, val & ~0x80); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, (1 << 10) /* PCI_COMMAND_DISABLE_INT */); -+ local_irq_restore (flags); -+ -+ /* Wait for the write to occur and for the PLL to regain lock */ -+ udelay (20000); udelay (20000); -+ -+ /* Re-enable pci interaction and clear any spurious errors deteced */ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_STATUS, PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, command); -+ break; -+ } -+ } -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value); -+ -+ /* Enable master accesses */ -+ pci_set_master (dev->dev_osdep.pdev); -+ -+ /* Verify that the memWrInvalidate bit is set */ -+ pci_read_config_word (dev->dev_osdep.pdev, PCI_COMMAND, &command); -+ pci_read_config_byte (dev->dev_osdep.pdev, PCI_CACHE_LINE_SIZE, &cacheline); -+ -+ if ((command & PCI_COMMAND_INVALIDATE) == 0) -+ { -+ printk ("elan%d: enable MemWrInvalidate (cacheline %d)\n", -+ dev->dev_instance, cacheline * 4); -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_COMMAND, command | PCI_COMMAND_INVALIDATE); -+ } -+ -+ return (0); -+} -+ -+void -+elan4_updatepll (ELAN4_DEV *dev, unsigned int val) -+{ -+ u32 value; -+ -+ if (elan4_pll_cfg == 0) -+ { -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, &value); -+ -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_PLL_CONTROL, -+ (value & ~ECTRL_SYS_CLOCK_RATIO_MASK) | val); -+ -+ /* Delay 10ms here if we've changed the sysclock ratio */ -+ /* to allow the PLL to stabalise before proceeding */ -+ udelay (10000); -+ } -+} -+ -+void -+elan4_pcifini (ELAN4_DEV *dev) -+{ -+ u32 value; -+ -+ pci_read_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, &value); -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value | ECTRL_SOFTWARE_INTERNAL_RESET); -+ pci_write_config_dword (dev->dev_osdep.pdev, PCI_ELAN_CONTROL, value); -+ -+ pci_disable_device (dev->dev_osdep.pdev); -+} -+ -+void -+elan4_pcierror (ELAN4_DEV *dev) -+{ -+ struct pci_dev *pci = dev->dev_osdep.pdev; -+ u8 type; -+ u16 status, cmd; -+ u32 physlo, physhi, control; -+ -+ printk("elan%d: pci error has occurred\n", dev->dev_instance); -+ -+ pci_read_config_word (pci, PCI_STATUS, &status); -+ pci_read_config_word (pci, PCI_COMMAND, &cmd); -+ pci_read_config_dword (pci, PCI_ELAN_CONTROL, &control); -+ -+ if (control & ECTRL_REC_SPLIT_COMP_MESSAGE) -+ { -+ u32 message, attr; -+ -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control & ~ECTRL_SELECT_SPLIT_MESS_ATTR); -+ pci_read_config_dword (pci, PCI_ELAN_SPLIT_MESSAGE_VALUE, &message); -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control | ECTRL_SELECT_SPLIT_MESS_ATTR); -+ pci_read_config_dword (pci, PCI_ELAN_SPLIT_MESSAGE_VALUE, &attr); -+ -+ printk ("elan%d: pcierror - received split completion message - attr=%08x, message=%08x\n", -+ dev->dev_instance, attr, message); -+ -+ pci_write_config_dword (pci, PCI_ELAN_CONTROL, control | ECTRL_REC_SPLIT_COMP_MESSAGE); /* clear the error */ -+ } -+ else -+ { -+ pci_read_config_dword (pci, PCI_ELAN_PARITY_ADDR_LO, &physlo); -+ pci_read_config_dword (pci, PCI_ELAN_PARITY_ADDR_HI, &physhi); -+ pci_read_config_byte (pci, PCI_ELAN_PARITY_TYPE, &type); -+ -+ printk ("elan%d: pcierror - status %x cmd %4x physaddr %08x%08x type %x\n", -+ dev->dev_instance, status, cmd, physhi, physlo, type); -+ -+ if (status & PCI_STATUS_PARITY) -+ printk ("elan%d: parity error signalled (PERR)\n", dev->dev_instance); -+ if (status & PCI_STATUS_DETECTED_PARITY) -+ printk ("elan%d: detected parity error\n", dev->dev_instance); -+ if (status & PCI_STATUS_REC_MASTER_ABORT) -+ printk ("elan%d: received master abort\n", dev->dev_instance); -+ if (status & PCI_STATUS_REC_TARGET_ABORT) -+ printk ("elan%d: received target abort\n", dev->dev_instance); -+ if (status & PCI_STATUS_SIG_SYSTEM_ERROR) -+ printk ("elan%d: signalled SERR\n", dev->dev_instance); -+ if (status & PCI_STATUS_SIG_TARGET_ABORT) -+ printk ("elan%d: signalled target abort\n", dev->dev_instance); -+ -+ pci_write_config_word (pci, PCI_STATUS, status); /* clear the errors */ -+ } -+ -+ DISABLE_INT_MASK (dev, INT_PciMemErr); -+ -+#ifdef notdef -+ panic ("elan%d: pcierror\n", dev->dev_instance); /* better panic ! */ -+#endif -+} -+ -+static irqreturn_t -+elan4_irq (int irq, void *arg, struct pt_regs *regs) -+{ -+ if (elan4_1msi0 ((ELAN4_DEV *) arg)) -+ return IRQ_HANDLED; -+ else -+ return IRQ_NONE; -+} -+ -+ioaddr_t -+elan4_map_device (ELAN4_DEV *dev, unsigned bar, unsigned off, unsigned size, ELAN4_MAP_HANDLE *handle) -+{ -+ return (ioaddr_t) ioremap_nocache (pci_resource_start (dev->dev_osdep.pdev, bar) + off, size); -+} -+ -+void -+elan4_unmap_device (ELAN4_DEV *dev, ioaddr_t ptr, unsigned size, ELAN4_MAP_HANDLE *handle) -+{ -+ iounmap ((void *) ptr); -+} -+ -+unsigned long -+elan4_resource_len (ELAN4_DEV *dev, unsigned bar) -+{ -+ return (pci_resource_len (dev->dev_osdep.pdev, bar)); -+} -+ -+void -+elan4_configure_writecombining (ELAN4_DEV *dev) -+{ -+ if ((dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ return; -+ -+#if (defined(__i386) || defined(__x86_64)) && defined (X86_FEATURE_PAT) -+ -+#ifndef boot_cpu_has -+# define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) -+#endif -+ -+ /* Try to utilise PAT entries which already exist */ -+ if (boot_cpu_has (X86_FEATURE_PAT)) -+ { -+ unsigned int val0, val1, i; -+ int slot = -1; -+ -+ /* Read the IA32CR_PAT MSR register and see if a slot is -+ * set for write-combinig. Note we assume that all CPUs -+ * are configured the same like they're supposed to. */ -+ rdmsr (0x277, val0, val1); -+ -+ /* Check for PAT write combining entry (value 0x01) */ -+ for (i = 0; i < 4; i++, val0 >>= 8) -+ if ((val0 & 0xff) == 0x01) -+ slot = i; -+ for (i = 4; i < 8; i++, val1 >>= 8) -+ if ((val1 & 0xff) == 0x01) -+ slot = i; -+ -+ if (slot >= 0) -+ { -+ printk ("elan%d: using PAT for write combining (slot %d)\n", dev->dev_instance, slot); -+ -+ pat_pteval = ((slot & 4) ? _PAGE_PSE : 0) | ((slot & 2) ? _PAGE_PCD : 0) | ((slot & 1) ? _PAGE_PWT : 0); -+ return; -+ } -+ } -+#endif -+ -+#ifdef CONFIG_MTRR -+ /* try and initialise the MTRR registers to enable write-combining */ -+ dev->dev_osdep.sdram_mtrr = mtrr_add (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ MTRR_TYPE_WRCOMB, 1); -+ if (dev->dev_osdep.sdram_mtrr < 0) -+ printk ("elan%d: cannot configure MTRR for sdram\n", dev->dev_instance); -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVB) -+ { -+ unsigned int cqreorder = dev->dev_cqcount >> 1; -+ unsigned int cqcount = dev->dev_cqcount - cqreorder; -+ -+ dev->dev_osdep.regs_mtrr = mtrr_add (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (dev->dev_cqoffset + cqreorder) * CQ_CommandMappingSize, -+ CQ_CommandMappingSize * cqcount, -+ MTRR_TYPE_WRCOMB, 1); -+ -+ if (dev->dev_osdep.regs_mtrr < 0) -+ printk ("elan%d: cannot configure MTRR for command ports\n", dev->dev_instance); -+ else -+ dev->dev_cqreorder = cqreorder; -+ } -+#endif -+} -+ -+void -+elan4_unconfigure_writecombining (ELAN4_DEV *dev) -+{ -+ if ((dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_NO_WRITE_COMBINE)) -+ return; -+ -+#ifdef CONFIG_MTRR -+ if (pat_pteval == -1) -+ { -+ if (dev->dev_osdep.sdram_mtrr >=0 ) -+ mtrr_del (dev->dev_osdep.sdram_mtrr, pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM), -+ pci_resource_len (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ -+ if (dev->dev_cqreorder && dev->dev_osdep.regs_mtrr >= 0) -+ mtrr_del (dev->dev_osdep.regs_mtrr, -+ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS) + -+ (dev->dev_cqoffset + dev->dev_cqreorder) * CQ_CommandMappingSize, -+ CQ_CommandMappingSize * (dev->dev_cqcount >> 1)); -+ } -+#endif -+} -+ -+EXPORT_SYMBOL(elan4_reference_device); -+EXPORT_SYMBOL(elan4_dereference_device); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/i2c.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/i2c.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/i2c.c 2005-07-28 14:52:52.829681624 -0400 -@@ -0,0 +1,248 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: i2c.c,v 1.4 2004/01/07 13:37:45 jon Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/i2c.c,v $*/ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#define I2C_POLL_LIMIT 8 -+ -+static int -+i2c_poll_busy (ELAN4_DEV *dev) -+{ -+ int t = 100; -+ int loop = 0; -+ volatile unsigned char val; -+ -+ /* wait for any led I2C operation to finish */ -+ while (((val = read_i2c (dev, I2cPortControl)) & I2cCntl_I2cPortBusy) && loop++ < I2C_POLL_LIMIT) -+ { -+ DELAY (t); -+ -+ if (t < 500000) -+ t <<= 1; -+ } -+ if (loop >= I2C_POLL_LIMIT) -+ { -+ printk ("elan%d: I2c has timed out waiting for I2cPortBusy to clear!\n", dev->dev_instance); -+ printk ("elan%d: I2cPortControl=%x I2cLedBase=%x I2cStatus=%x\n", -+ dev->dev_instance, val, read_i2c (dev, I2cLedBase), read_i2c (dev, I2cStatus)); -+ } -+ -+ return val; -+} -+ -+static int -+i2c_poll_stopped (ELAN4_DEV *dev) -+{ -+ int t = 100; -+ int loop = 0; -+ unsigned char val=0, newval; -+ -+ /* wait for any led I2C operation to finish. Must see it stopped at least twice */ -+ while (!(((newval = read_i2c (dev, I2cPortControl)) & I2cCntl_I2cStopped) && -+ (val & I2cCntl_I2cStopped)) && -+ (loop++ < I2C_POLL_LIMIT)) -+ { -+ DELAY (t); -+ -+ if (t < 500000) -+ t <<= 1; -+ val = newval; -+ } -+ -+ return val; -+} -+ -+int -+i2c_disable_auto_led_update (ELAN4_DEV *dev) -+{ -+ spin_lock (&dev->dev_i2c_lock); -+ -+ if (dev->dev_i2c_led_disabled++ == 0) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) & ~I2cCntl_I2cUpdatingLedReg); -+ -+ if (! (i2c_poll_stopped (dev) & I2cCntl_I2cStopped)) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) | I2cCntl_I2cUpdatingLedReg); -+ -+ spin_unlock (&dev->dev_i2c_lock); -+ -+ return -EAGAIN; -+ } -+ -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) & ~I2cCntl_SampleNewLedValues); -+ } -+ -+ spin_unlock (&dev->dev_i2c_lock); -+ -+ return 0; -+} -+ -+void -+i2c_enable_auto_led_update (ELAN4_DEV *dev) -+{ -+ spin_lock (&dev->dev_i2c_lock); -+ if (--dev->dev_i2c_led_disabled == 0) -+ { -+ write_i2c (dev, I2cLedBase, read_i2c (dev, I2cLedBase) | I2cCntl_I2cUpdatingLedReg); -+ write_i2c (dev, I2cStatus, read_i2c (dev, I2cStatus) | I2cCntl_SampleNewLedValues); -+ } -+ -+ spin_unlock (&dev->dev_i2c_lock); -+} -+ -+int -+i2c_write (ELAN4_DEV *dev, unsigned int address, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, data[i]); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | (i == (count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_read (ELAN4_DEV *dev, unsigned int address, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_READ_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, 0xff); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortRead | ((i == count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ -+ i2c_poll_busy (dev); -+ -+ data[i] = read_i2c (dev, I2cRdData); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_writereg (ELAN4_DEV *dev, unsigned int address, unsigned int reg, unsigned int count, unsigned char *data) -+{ -+ int i; -+ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ write_i2c (dev, I2cWrData, reg); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ for (i = 0; i < count; i++) -+ { -+ write_i2c (dev, I2cWrData, data[i]); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | ((i == count-1) ? I2cCntl_I2cPortGenStopBit : 0)); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ printk (" i2c_writereg: off %d failed\n", i); -+ } -+ -+ return 0; -+} -+ -+int -+i2c_readreg (ELAN4_DEV *dev, unsigned int address, unsigned int reg, unsigned int count, unsigned char *data) -+{ -+ if (! (i2c_poll_busy (dev) & I2cCntl_I2cStopped)) -+ return -EAGAIN; /* not idle */ -+ -+ write_i2c (dev, I2cWrData, I2C_WRITE_ADDR(address)); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ write_i2c (dev, I2cWrData, reg); -+ write_i2c (dev, I2cPortControl, I2cCntl_I2cPortWrite | I2cCntl_I2cPortGenStopBit); -+ -+ if (i2c_poll_busy (dev) & I2cCntl_I2cPortAccFailed) -+ return -ENXIO; -+ -+ return i2c_read (dev, address, count, data); -+} -+ -+int -+i2c_read_rom (ELAN4_DEV *dev, unsigned int addr, unsigned int len, unsigned char *data) -+{ -+ unsigned int top = addr + len; -+ int res; -+ -+ if ((res = i2c_disable_auto_led_update (dev)) == 0) -+ { -+ /* read the rom in chunks that don't span the block boundary */ -+ while (addr < top) -+ { -+ unsigned int thisnob = top - addr; -+ unsigned int blocknob = I2C_24LC16B_BLOCKSIZE - I2C_24LC16B_BLOCKOFFSET(addr); -+ -+ if (thisnob > blocknob) -+ thisnob = blocknob; -+ -+ if ((res = i2c_readreg (dev, I2C_EEPROM_ADDR + I2C_24LC16B_BLOCKADDR(addr), -+ I2C_24LC16B_BLOCKOFFSET(addr), thisnob, data)) < 0) -+ break; -+ -+ addr += thisnob; -+ data += thisnob; -+ } -+ -+ i2c_enable_auto_led_update (dev); -+ } -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/intcookie.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/intcookie.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/intcookie.c 2005-07-28 14:52:52.829681624 -0400 -@@ -0,0 +1,371 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: intcookie.c,v 1.14.2.1 2005/03/01 12:01:57 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/intcookie.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+static INTCOOKIE_TABLE *intcookie_tables; -+static spinlock_t intcookie_table_lock; -+ -+/* -+ * intcookie_drop_entry: -+ * drop the reference to a cookie held -+ * by the cookie table -+ */ -+static void -+intcookie_drop_entry (INTCOOKIE_ENTRY *ent) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ent->ent_lock, flags); -+ if (--ent->ent_ref != 0) -+ { -+ ent->ent_fired = ent->ent_cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (INTCOOKIE_ENTRY)); -+ } -+} -+ -+void -+intcookie_init() -+{ -+ spin_lock_init (&intcookie_table_lock); -+} -+ -+void -+intcookie_fini() -+{ -+ spin_lock_destroy (&intcookie_table_lock); -+} -+ -+INTCOOKIE_TABLE * -+intcookie_alloc_table (ELAN_CAPABILITY *cap) -+{ -+ INTCOOKIE_TABLE *tbl, *ntbl; -+ ELAN_CAPABILITY *ncap; -+ -+ KMEM_ZALLOC (ntbl, INTCOOKIE_TABLE *, sizeof (INTCOOKIE_TABLE), 1); -+ -+ if (ntbl == NULL) -+ return (NULL); -+ -+ KMEM_ALLOC (ncap, ELAN_CAPABILITY *, ELAN_CAP_SIZE(cap), 1); -+ -+ if (ncap == NULL) -+ { -+ KMEM_FREE (ntbl, sizeof (INTCOOKIE_TABLE)); -+ return (NULL); -+ } -+ -+ spin_lock (&intcookie_table_lock); -+ -+ for (tbl = intcookie_tables; tbl; tbl = tbl->tbl_next) -+ if (ELAN_CAP_MATCH (tbl->tbl_cap, cap) && tbl->tbl_cap->cap_mycontext == cap->cap_mycontext) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ else -+ { -+ spin_lock_init (&ntbl->tbl_lock); -+ -+ ntbl->tbl_cap = ncap; -+ ntbl->tbl_ref = 1; -+ ntbl->tbl_entries = NULL; -+ -+ /* Save supplied cap */ -+ memcpy (ncap, cap, ELAN_CAP_SIZE(cap)); -+ -+ if ((ntbl->tbl_next = intcookie_tables) != NULL) -+ intcookie_tables->tbl_prev = ntbl; -+ intcookie_tables = ntbl; -+ ntbl->tbl_prev = NULL; -+ } -+ spin_unlock (&intcookie_table_lock); -+ -+ if (tbl == NULL) -+ return (ntbl); -+ else -+ { -+ KMEM_FREE (ntbl, sizeof (INTCOOKIE_TABLE)); -+ KMEM_FREE (ncap, ELAN_CAP_SIZE(cap)); -+ return (tbl); -+ } -+} -+ -+void -+intcookie_free_table (INTCOOKIE_TABLE *tbl) -+{ -+ INTCOOKIE_ENTRY *ent; -+ -+ spin_lock (&intcookie_table_lock); -+ if (tbl->tbl_ref > 1) -+ { -+ tbl->tbl_ref--; -+ spin_unlock (&intcookie_table_lock); -+ return; -+ } -+ -+ if (tbl->tbl_prev) -+ tbl->tbl_prev->tbl_next = tbl->tbl_next; -+ else -+ intcookie_tables = tbl->tbl_next; -+ if (tbl->tbl_next) -+ tbl->tbl_next->tbl_prev = tbl->tbl_prev; -+ -+ spin_unlock (&intcookie_table_lock); -+ -+ /* NOTE - table no longer visible to other threads -+ * no need to aquire tbl_lock */ -+ while ((ent = tbl->tbl_entries) != NULL) -+ { -+ if ((tbl->tbl_entries = ent->ent_next) != NULL) -+ ent->ent_next->ent_prev = NULL; -+ -+ intcookie_drop_entry (ent); -+ } -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl->tbl_cap, ELAN_CAP_SIZE(tbl->tbl_cap)); -+ KMEM_FREE (tbl, sizeof (INTCOOKIE_TABLE)); -+} -+ -+int -+intcookie_alloc (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent, *nent; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (nent, INTCOOKIE_ENTRY *, sizeof (INTCOOKIE_ENTRY), 1); -+ -+ if (nent == NULL) -+ return (-ENOMEM); -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ kcondvar_init (&nent->ent_wait); -+ spin_lock_init (&nent->ent_lock); -+ -+ nent->ent_ref = 1; -+ nent->ent_cookie = cookie; -+ -+ if ((nent->ent_next = tbl->tbl_entries) != NULL) -+ tbl->tbl_entries->ent_prev = nent; -+ tbl->tbl_entries = nent; -+ nent->ent_prev = NULL; -+ } -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ if (ent == NULL) -+ return (0); -+ else -+ { -+ KMEM_FREE (nent, sizeof (INTCOOKIE_ENTRY)); -+ return (-EINVAL); -+ } -+} -+ -+int -+intcookie_free (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ if (ent->ent_prev == NULL) -+ tbl->tbl_entries = ent->ent_next; -+ else -+ ent->ent_prev->ent_next = ent->ent_next; -+ -+ if (ent->ent_next != NULL) -+ ent->ent_next->ent_prev = ent->ent_prev; -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ intcookie_drop_entry (ent); -+ -+ return (0); -+} -+ -+/* -+ * intcookie_fire_cookie: -+ * fire the cookie - this is called from the event interrupt. -+ */ -+int -+intcookie_fire (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = cookie; -+ kcondvar_wakeupall (&ent->ent_wait, &ent->ent_lock); -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (0); -+} -+ -+int -+intcookie_fire_cap (ELAN_CAPABILITY *cap, ELAN4_INTCOOKIE cookie) -+{ -+ int res; -+ INTCOOKIE_TABLE *tbl; -+ -+ spin_lock (&intcookie_table_lock); -+ -+ for (tbl = intcookie_tables; tbl; tbl = tbl->tbl_next) -+ if (ELAN_CAP_MATCH (tbl->tbl_cap, cap) && tbl->tbl_cap->cap_mycontext == cap->cap_mycontext) -+ break; -+ -+ if (tbl != NULL) -+ tbl->tbl_ref++; -+ -+ spin_unlock (&intcookie_table_lock); -+ -+ /* No matching table found */ -+ if (tbl == NULL) -+ return (-EINVAL); -+ -+ /* Fire the correct cookie */ -+ res = intcookie_fire (tbl, cookie); -+ -+ /* Decrement reference count (and free if necessary) */ -+ intcookie_free_table (tbl); -+ -+ return (res); -+} -+ -+/* -+ * intcookie_wait_cookie: -+ * deschedule on a cookie if it has not already fired. -+ * note - if the cookie is removed from the table, then -+ * we free it off when we're woken up. -+ */ -+int -+intcookie_wait (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ spin_unlock (&tbl->tbl_lock); -+ -+ if (ent->ent_fired != 0) -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ return (0); -+ } -+ -+ ent->ent_ref++; -+ kcondvar_waitsig (&ent->ent_wait, &ent->ent_lock, &flags); -+ -+ res = ent->ent_fired ? 0 : -EINTR; -+ -+ if (--ent->ent_ref > 0) -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ else -+ { -+ spin_unlock_irqrestore (&ent->ent_lock, flags); -+ -+ spin_lock_destroy (&ent->ent_lock); -+ kcondvar_destroy (&ent->ent_wait); -+ -+ KMEM_FREE (ent, sizeof (INTCOOKIE_ENTRY)); -+ } -+ -+ return (res); -+} -+ -+int -+intcookie_arm (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie) -+{ -+ INTCOOKIE_ENTRY *ent; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&tbl->tbl_lock, flags); -+ for (ent = tbl->tbl_entries; ent; ent = ent->ent_next) -+ if (ent->ent_cookie == cookie) -+ break; -+ -+ if (ent == NULL) -+ { -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ return (-EINVAL); -+ } -+ -+ spin_lock (&ent->ent_lock); -+ ent->ent_fired = 0; -+ spin_unlock (&ent->ent_lock); -+ -+ spin_unlock_irqrestore (&tbl->tbl_lock, flags); -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/Makefile 2005-07-28 14:52:52.830681472 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/elan4/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_ELAN4) += elan4.o -+elan4-objs := device.o i2c.o mmu.o sdram.o debug.o routetable.o trap.o user.o user_ddcq.o regions.o intcookie.o neterr.o device_Linux.o user_Linux.o procfs_Linux.o mmu_Linux.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/Makefile.conf 2005-07-28 14:52:52.830681472 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = elan4.o -+MODULENAME = elan4 -+KOBJFILES = device.o i2c.o mmu.o sdram.o debug.o routetable.o trap.o user.o user_ddcq.o regions.o intcookie.o neterr.o device_Linux.o user_Linux.o procfs_Linux.o mmu_Linux.o -+EXPORT_KOBJS = device.o device_Linux.o mmu.o mmu_Linux.o procfs_Linux.o routetable.o sdram.o trap.o -+CONFIG_NAME = CONFIG_ELAN4 -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/mmu.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/mmu.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/mmu.c 2005-07-28 14:52:52.832681168 -0400 -@@ -0,0 +1,862 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu.c,v 1.29.6.3 2005/03/10 15:49:24 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+ -+int elan4_debug_mmu; -+ -+/* Permission table - see ELAN4 MMU documentation */ -+u_char elan4_permtable[] = -+{ -+ 0x00, /* 0x000000 - Disable */ -+ 0x00, /* 0x000000 - Unused */ -+ 0x01, /* 0x000001 - Local Data Read */ -+ 0x03, /* 0x000011 - Local Data Write */ -+ 0x11, /* 0x010001 - Local Read */ -+ 0x10, /* 0x010000 - Local Execute */ -+ 0x05, /* 0x000101 - Read Only */ -+ 0x13, /* 0x010011 - Local Write */ -+ 0x20, /* 0x100000 - Local Event Access */ -+ 0x23, /* 0x100011 - Local Event Write Ac */ -+ 0xa3, /* 1x100011 - Remote Ev Loc Write */ -+ 0xaf, /* 1x101111 - Remote All */ -+ 0x07, /* 0x000111 - Remote Read Only */ -+ 0x0d, /* 0x001101 - Remote Write Only */ -+ 0x0f, /* 0x001111 - Remote Read/Write */ -+ 0xbf, /* 1x111111 - No Fault */ -+}; -+ -+u_char elan4_permreadonly[] = -+{ -+ PERM_Disabled, /* PERM_Disabled */ -+ PERM_Disabled, /* PERM_Unused */ -+ PERM_LocDataRead, /* PERM_LocDataRead */ -+ PERM_LocDataRead, /* PERM_LocDataWrite */ -+ PERM_LocRead, /* PERM_LocRead */ -+ PERM_LocExecute, /* PERM_LocExecute */ -+ PERM_ReadOnly, /* PERM_ReadOnly */ -+ PERM_LocRead, /* PERM_LocWrite */ -+ PERM_LocEventOnly, /* PERM_LocEventOnly */ -+ PERM_LocDataRead, /* PERM_LocEventWrite */ -+ PERM_LocDataRead, /* PERM_RemoteEvent */ -+ PERM_ReadOnly, /* PERM_RemoteAll */ -+ PERM_RemoteReadOnly, /* PERM_RemoteReadOnly */ -+ PERM_ReadOnly, /* PERM_RemoteWriteLocRead */ -+ PERM_ReadOnly, /* PERM_DataReadWrite */ -+ PERM_ReadOnly, /* PERM_NoFault */ -+}; -+ -+static void -+elan4mmu_synctag (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx) -+{ -+ E4_uint64 value = (he->he_tag[tagidx] & HE_TAG_VALID) ? he->he_tag[tagidx] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK) : INVALID_CONTEXT; -+ -+ if (he->he_next) -+ value |= ((tagidx == 0) ? -+ ((he->he_next->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK) : -+ ((he->he_next->he_entry << TAG_CHAINPTR_LOW_SHIFT) & TAG_CHAINPTR_18TO6_MASK)); -+ else if (tagidx == 0) -+ value |= TAG_CHAINPTR_30TO19_MASK; -+ -+ MPRINTF (DBG_DEVICE, 4, "elan4mmu_synctag: he=%p tagidx=%d he->he_tag=%llx -> value=%llx\n", he, tagidx, he->he_tag[tagidx], value); -+ -+ elan4_sdram_writeq (dev, he->he_entry + E4MMU_TAG_OFFSET(tagidx), value); -+} -+ -+static void -+elan4mmu_chain_hents (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *phe, ELAN4_HASH_ENTRY *he) -+{ -+ ASSERT ((elan4_sdram_readq (dev, phe->he_entry + E4MMU_TAG_OFFSET(0)) & TAG_CHAINPTR_30TO19_MASK) == TAG_CHAINPTR_30TO19_MASK); -+ -+ elan4_sdram_writeq (dev, phe->he_entry + E4MMU_TAG_OFFSET(1), -+ ((phe->he_tag[1] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK)) | ((he->he_entry << TAG_CHAINPTR_LOW_SHIFT) & TAG_CHAINPTR_18TO6_MASK))); -+ elan4_sdram_writeq (dev, phe->he_entry + E4MMU_TAG_OFFSET(0), -+ ((phe->he_tag[0] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK)) | ((he->he_entry >> TAG_CHAINPTR_HIGH_SHIFT) & TAG_CHAINPTR_30TO19_MASK))); -+} -+ -+static void -+elan4mmu_writepte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx, E4_uint64 value) -+{ -+ /* -+ * NOTE - we can only change a valid PTE if we're upgrading it's permissions, -+ * any other changes should have invalidated it first. */ -+ -+ MPRINTF (DBG_DEVICE, 4, "elan4mmu_writepte: he=%p tagidx=%d pteidx=%x value=%llx\n", he, tagidx, pteidx, (unsigned long long) value); -+ -+ if (pteidx == 3) -+ { -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD1_OFFSET(tagidx), (value >> 16) & 0xFFFF); -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD2_OFFSET(tagidx), (value >> 32) & 0xFFFF); -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx), (value >> 0) & 0xFFFF); -+ } -+ else -+ { -+ elan4_sdram_writew (dev, he->he_entry + E4MMU_PTE_HIGH_OFFSET(tagidx, pteidx), (value >> 32) & 0xFFFF); -+ elan4_sdram_writel (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx), value & 0xFFFFFFFF); -+ } -+} -+ -+static void -+elan4mmu_invalidatepte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx) -+{ -+ if (pteidx == 3) -+ elan4_sdram_writeb (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx), PTE_SetPerm (PERM_Disabled)); -+ else -+ elan4_sdram_writeb (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx), PTE_SetPerm (PERM_Disabled)); -+} -+ -+static E4_uint64 -+elan4mmu_readpte (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int tagidx, int pteidx) -+{ -+ if (pteidx == 3) -+ return (((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD0_OFFSET(tagidx)) << 0) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD1_OFFSET(tagidx)) << 16) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE3_WORD2_OFFSET(tagidx)) << 32)); -+ else -+ return ((E4_uint64) elan4_sdram_readl (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, pteidx)) | -+ ((E4_uint64) elan4_sdram_readw (dev, he->he_entry + E4MMU_PTE_HIGH_OFFSET(tagidx, pteidx)) << 32)); -+} -+ -+ -+void -+elan4mmu_flush_tlb (ELAN4_DEV *dev) -+{ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH); -+ -+ while (read_reg64 (dev, SysControlReg) & CONT_TLB_FLUSH) -+ DELAY (1); -+} -+ -+/* -+ * elanmmu_flush_tlb_hash - this flushes the hash copy entries and the elan -+ * tlb. However after the write to the hash copy entry if the elan was -+ * in the process of walking, then it could write the hash copy with a valid -+ * entry which we had just invalidated. However once we've seen the tlb flushed -+ * then if the walk engine had done a write - then we need to invaldate the -+ * hash copy entries again and reflush the tlb. -+ * -+ * If we're invalidating a lot of hash blocks, then the chances are that the -+ * walk engine will perform a write - so we flush the tlb first, then invalidate -+ * the hash copy entries, then flush the tlb again. -+ */ -+static void -+elan4mmu_flush_tlb_hash (ELAN4_DEV *dev, int tbl, unsigned baseidx, unsigned topidx) -+{ -+ int notmany = (abs(topidx - baseidx) < 5) ? 1 : 0; -+ int hashidx; -+ E4_uint32 reg; -+ -+ if (notmany) -+ PULSE_SYSCONTROL (dev, CONT_CLEAR_WALK_WROTE_TABLES); -+ else -+ elan4mmu_flush_tlb(dev); -+ -+ do { -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ if (dev->dev_mmuhash[tbl][hashidx].he_tag[0] & HE_TAG_COPY) -+ { -+ ASSERT ((dev->dev_mmuhash[tbl][hashidx].he_tag[0] & HE_TAG_VALID) == 0); -+ ASSERT ((dev->dev_mmuhash[tbl][hashidx].he_tag[1] & HE_TAG_VALID) == 0); -+ -+ elan4mmu_synctag (dev, &dev->dev_mmuhash[tbl][hashidx], 0); -+ elan4mmu_synctag (dev, &dev->dev_mmuhash[tbl][hashidx], 1); -+ } -+ -+ PULSE_SYSCONTROL (dev, CONT_TLB_FLUSH); -+ -+ while ((reg = read_reg64 (dev, SysControlReg)) & CONT_TLB_FLUSH) -+ DELAY (1); -+ -+ } while (notmany-- && (reg & CONT_CLEAR_WALK_WROTE_TABLES) != 0); -+} -+ -+void -+elan4mmu_display_hent (ELAN4_DEV *dev, ELAN4_HASH_ENTRY *he, int hashidx) -+{ -+ int tagidx; -+ -+ elan4_debugf (DBG_DEVICE, DBG_MMU, "elan4mmu_display_hent: hashidx=%d he=%p entry at %lx\n", hashidx, he, he->he_entry); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " next=%p prev=%p chain=%p,%p\n", he->he_next, he->he_prev, he->he_chain[0], he->he_chain[1]); -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ { -+ E4_uint64 tag = elan4_sdram_readq (dev, he->he_entry + E4MMU_TAG_OFFSET(tagidx)); -+ E4_uint64 pte0 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 0)); -+ E4_uint64 pte1 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 1)); -+ E4_uint64 pte2 = elan4_sdram_readq (dev, he->he_entry + E4MMU_PTE_LOW_OFFSET(tagidx, 2)); -+ E4_uint64 pte3 = ((pte0 >> 48) | (pte1 >> 32) | (pte2 >> 16)); -+ -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Tag %d (%llx,%08x) context=%04x vaddr=%llx\n", tagidx, he->he_tag[tagidx], he->he_pte[tagidx], (int) (tag & TAG_CONTEXT_MASK), (tag & TAG_ADDRESS_MASK)); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 0 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte0 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte0 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte0 & PTE_TYPE_MASK), (pte0 & PTE_MOD_MASK) ? " mod" : "", (pte0 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 1 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte1 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte1 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte1 & PTE_TYPE_MASK), (pte1 & PTE_MOD_MASK) ? " mod" : "", (pte1 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 2 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte2 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte2 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte2 & PTE_TYPE_MASK), (pte2 & PTE_MOD_MASK) ? " mod" : "", (pte2 & PTE_REF_MASK) ? " ref" : ""); -+ elan4_debugf (DBG_DEVICE, DBG_MMU, " Pte 3 - PPN=%llx PERM=%x TYPE=%x%s%s\n", (pte3 & PTE_PPN_MASK) >> PTE_PPN_SHIFT, -+ (int) (pte3 & PTE_PERM_MASK) >> PTE_PERM_SHIFT, (int)(pte3 & PTE_TYPE_MASK), (pte3 & PTE_MOD_MASK) ? " mod" : "", (pte3 & PTE_REF_MASK) ? " ref" : ""); -+ } -+} -+ -+static __inline__ ELAN4_HASH_ENTRY * -+he_ctxt_next (ELAN4_HASH_ENTRY *he, int ctxnum) -+{ -+ return ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxnum) ? he->he_chain[0] : he->he_chain[1]; -+} -+ -+static __inline__ ELAN4_HASH_ENTRY * -+he_ctxt_unlink (ELAN4_CTXT *ctxt, int tbl, int hashidx, ELAN4_HASH_ENTRY *prevhe, ELAN4_HASH_ENTRY *he, ELAN4_HASH_ENTRY *next) -+{ -+ /* Check whether either tag is in use by this context */ -+ if ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num || (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ return he; -+ -+ if (prevhe == NULL) -+ ctxt->ctxt_mmuhash[tbl][hashidx] = next; -+ else -+ { -+ /* previous he, ensure that both chain pointers are changed is this ctxt is using both tags */ -+ ASSERT ((prevhe->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num || (prevhe->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num); -+ -+ if ((prevhe->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ prevhe->he_chain[0] = next; -+ if ((prevhe->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num) -+ prevhe->he_chain[1] = next; -+ } -+ -+ return prevhe; -+} -+ -+void -+elan4mmu_display (ELAN4_CTXT *ctxt, int tbl, const char *tag) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ ELAN4_HASH_ENTRY *he; -+ int hashidx; -+ -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxt->ctxt_num)) -+ { -+ elan4_debugf (DBG_DEVICE, DBG_MMU, "%s: hashidx=%d he=%p tags <%llx,%llx>\n", tag, hashidx, he, -+ (he->he_tag[0] & TAG_CONTEXT_MASK) == ctxt->ctxt_num ? E4MMU_TAG2VADDR (he->he_tag[0], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1) : 0, -+ (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxt->ctxt_num ? E4MMU_TAG2VADDR (he->he_tag[1], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1) : 0); -+ elan4mmu_display_hent (dev, he, hashidx); -+ } -+} -+ -+static ELAN4_HASH_ENTRY * -+elan4mmu_alloc_hent (ELAN4_DEV *dev, int tbl, int hashidx, E4_uint64 newtag, int *tagidx) -+{ -+ ELAN4_HASH_ENTRY *he, *phe; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ /* 2nd see if there are any partial free blocks */ -+ if ((he = dev->dev_mmufree[tbl][hashidx]) != NULL) -+ { -+ *tagidx = ((he->he_tag[0] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) ? 0 : 1; -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_alloc_hent: allocate he=%p idx=%d%s\n", he, *tagidx, (he == &dev->dev_mmuhash[tbl][hashidx]) ? " hash-block" : ""); -+ -+ he->he_tag[*tagidx] = newtag | HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, *tagidx); -+ -+ if ((he->he_tag[(*tagidx) ^ 1] & TAG_CONTEXT_MASK) != INVALID_CONTEXT) -+ { -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_alloc_hent: block full - remove from freelist\n"); -+ dev->dev_mmufree[tbl][hashidx] = he->he_chain[*tagidx]; -+ } -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return (he); -+ } -+ -+ if ((he = dev->dev_mmufreelist) != NULL) -+ dev->dev_mmufreelist = he->he_next; -+ else -+ { -+ ELAN4_HASH_CHUNK *hc; -+ sdramaddr_t entry; -+ -+ KMEM_ALLOC (hc, ELAN4_HASH_CHUNK *, sizeof (ELAN4_HASH_CHUNK), 0); -+ -+ if (hc == NULL) -+ { -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return ((ELAN4_HASH_ENTRY *) NULL); -+ } -+ -+ if ((entry = elan4_sdram_alloc (dev, sizeof (E4_HashTableEntry) * ELAN4_HENT_CHUNKS)) == (sdramaddr_t) 0) -+ { -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ -+ KMEM_FREE (hc, sizeof (ELAN4_HASH_CHUNK)); -+ return ((ELAN4_HASH_ENTRY *) NULL); -+ } -+ -+ list_add_tail (&hc->hc_link, &dev->dev_hc_list); -+ -+ elan4_sdram_zeroq_sdram (dev, entry, sizeof (E4_HashTableEntry) * ELAN4_HENT_CHUNKS); -+ -+ /* no initialise all chunks and chain all but the first onto the freelist */ -+ for (i = 0; i < ELAN4_HENT_CHUNKS; i++, entry += sizeof (E4_HashTableEntry)) -+ { -+ hc->hc_hents[i].he_entry = entry; -+ -+ if (i == 0) -+ he = &hc->hc_hents[0]; -+ else -+ { -+ hc->hc_hents[i].he_next = dev->dev_mmufreelist; -+ dev->dev_mmufreelist = &hc->hc_hents[i]; -+ } -+ } -+ } -+ -+ /* Initialise hash entry, using slot 0 */ -+ *tagidx = 0; -+ -+ he->he_next = NULL; -+ he->he_prev = NULL; -+ he->he_chain[0] = NULL; -+ he->he_chain[1] = NULL; -+ he->he_tag[0] = newtag | HE_TAG_VALID; -+ he->he_tag[1] = E4MMU_TAG(0, INVALID_CONTEXT); -+ he->he_pte[0] = 0; -+ he->he_pte[1] = 0; -+ -+ elan4mmu_synctag (dev, he, 0); -+ -+ /* add slot 1 to freelist */ -+ he->he_chain[1] = dev->dev_mmufree[tbl][hashidx]; -+ dev->dev_mmufree[tbl][hashidx] = he; -+ -+ /* add to mmuhash lists */ -+ for (phe = &dev->dev_mmuhash[tbl][hashidx]; phe->he_next; phe = phe->he_next) -+ ; -+ phe->he_next = he; -+ he->he_prev = phe; -+ he->he_next = NULL; -+ -+ /* finally chain the hash block into the hash tables */ -+ elan4mmu_chain_hents (dev, phe, he); -+ -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+ return (he); -+} -+ -+static void -+elan4mmu_free_hent (ELAN4_DEV *dev, int tbl, int hashidx, ELAN4_HASH_ENTRY *he, int tagidx) -+{ -+ unsigned long flags; -+ int pteidx; -+ -+ /* Invalidate the tag, and zero all ptes */ -+ for (pteidx = 0; pteidx < 4; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx)) -+ elan4mmu_writepte (dev, he, tagidx, pteidx, 0); -+ -+ spin_lock_irqsave (&dev->dev_mmulock, flags); -+ -+ he->he_tag[tagidx] = E4MMU_TAG(0, INVALID_CONTEXT); -+ he->he_pte[tagidx] = 0; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ -+ if ((he->he_tag[tagidx^1] & TAG_CONTEXT_MASK) == INVALID_CONTEXT) /* Both tags are now free */ -+ { -+ if (he == &dev->dev_mmuhash[tbl][hashidx]) /* it's the hash block entry */ -+ { /* so as it's already on the freelist */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; /* just copy it's chain pointers */ -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => all free but hashblk\n", tbl, hashidx, tagidx, he); -+ } -+ else -+ { -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => all free\n", tbl, hashidx, tagidx, he); -+ -+ /* XXXX - should remove it from the hash table, and -+ * place back on the anonymous freelist */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; -+ } -+ } -+ else -+ { -+ /* Other tag still in use */ -+ he->he_chain[tagidx] = dev->dev_mmufree[tbl][hashidx]; -+ dev->dev_mmufree[tbl][hashidx] = he; -+ -+ MPRINTF (DBG_DEVICE, 3, "elan4mmu_free_hent: tbl=%d hashidx=%x tagidx=%d he=%p => other tag in use\n", tbl, hashidx, tagidx, he); -+ } -+ spin_unlock_irqrestore (&dev->dev_mmulock, flags); -+} -+ -+ELAN4_HASH_ENTRY * -+elan4mmu_ptealloc (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, unsigned int *tagidxp) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned ctxnum = ctxt->ctxt_num; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxnum, vaddr, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_uint64 newtag = E4MMU_TAG(vaddr, ctxnum); -+ ELAN4_HASH_ENTRY *he = &dev->dev_mmuhash[tbl][hashidx]; -+ unsigned tagidx; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: tbl=%d ctxnum=%d vaddr=%llx -> hashidx %d\n", tbl, ctxnum, vaddr, hashidx); -+ -+ /* 1st) check whether we're reloading an existing entry */ -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ { -+ ASSERT ((he->he_tag[0] & TAG_CONTEXT_MASK) == ctxnum || (he->he_tag[1] & TAG_CONTEXT_MASK) == ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ { -+ if ((he->he_tag[tagidx] & (TAG_ADDRESS_MASK | TAG_CONTEXT_MASK | HE_TAG_VALID)) == (newtag | HE_TAG_VALID)) -+ { -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: return old he %p tagidx %d\n", he, tagidx); -+ -+ *tagidxp = tagidx; -+ return he; -+ } -+ } -+ } -+ -+ if ((he = elan4mmu_alloc_hent (dev, tbl, hashidx, newtag, &tagidx)) == NULL) -+ return NULL; -+ -+ /* chain onto context hash */ -+ if ((he->he_tag[tagidx ^ 1] & TAG_CONTEXT_MASK) == ctxnum) /* already chained using other link */ -+ { /* so ensure both slots are chained the same */ -+ he->he_chain[tagidx] = he->he_chain[tagidx^1]; -+ } -+ else -+ { -+ he->he_chain[tagidx] = ctxt->ctxt_mmuhash[tbl][hashidx]; -+ ctxt->ctxt_mmuhash[tbl][hashidx] = he; -+ } -+ -+ MPRINTF (ctxt, 2, "elan4mmu_ptealloc: return new he %p tagidx %d\n", he, tagidx); -+ -+ *tagidxp = tagidx; -+ -+ return he; -+} -+ -+int -+elan4mmu_pteload (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, E4_uint64 newpte) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(vaddr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx; -+ ELAN4_HASH_ENTRY *he; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_pteload: ctx=%d tbl=%d pteidx=%d vaddr=%llx pte=%llx\n", -+ ctxt->ctxt_num, tbl, pteidx, (unsigned long long)vaddr, newpte); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ if ((he = elan4mmu_ptealloc (ctxt, tbl, vaddr, &tagidx)) == NULL) -+ { -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return -ENOMEM; -+ } -+ -+ MPRINTF (ctxt, 1, "elan4mmu_pteload: %s he=%p tagidx=%d pteidx=%d\n", HE_GET_PTE(he,0,pteidx) ? "reloading" : "loading", he, tagidx, pteidx); -+ -+ ASSERT (HE_GET_PTE(he,tagidx,pteidx) == 0 || /* invalid -> valid */ -+ (elan4mmu_readpte (dev, he, tagidx, pteidx) & PTE_PPN_MASK) == (newpte & PTE_PPN_MASK)); /* or same phys address */ -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, newpte); -+ -+ HE_SET_PTE(he, tagidx, pteidx, (newpte & PTE_PERM_TYPE_MASK)); -+ -+ spin_unlock (&ctxt->ctxt_mmulock); -+ return 0; -+} -+ -+void -+elan4mmu_unload_range (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned long len) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned ctxnum = ctxt->ctxt_num; -+ unsigned long tagspan = (1 << (dev->dev_pageshift[tbl] + 2)); -+ E4_Addr end = start + len - 1; -+ int needflush = 0; -+ unsigned baseidx, topidx; -+ unsigned hashidx, tagidx, pteidx; -+ ELAN4_HASH_ENTRY *he, *prevhe, *next; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_unload_range: tbl=%d start=%llx end=%llx len=%lx\n", tbl, start, end, len); -+ -+ /* determine how much of the hash table we've got to scan */ -+ -+ /* GNAT 6760: When we have a Main page size which maps onto multiple Elan pages -+ * we need to do something a bit more clever here or else it takes ms per page invalidate -+ * This change helps in the meantime -+ */ -+ /* if (len <= (1 << dev->dev_pageshift[tbl])) */ -+ if (len <= PAGE_SIZE) -+ { -+ baseidx = E4MMU_HASH_INDEX (ctxnum, start, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ topidx = E4MMU_HASH_INDEX (ctxnum, end, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ -+ if (baseidx != topidx) -+ { -+ /* GNAT 6760: Need to search whole of the hash table (slow!) */ -+ baseidx = 0; -+ topidx = dev->dev_hashsize[tbl] - 1; -+ } -+ } -+ else -+ { -+ baseidx = 0; -+ topidx = dev->dev_hashsize[tbl] - 1; -+ } -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: baseidx=%d topidx=%d\n", baseidx, topidx); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ /* 1st - invalidate the tag for all hash blocks which are completely invalidated, -+ * and remember the first/last hash blocks */ -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ { -+ E4_Addr base = E4MMU_TAG2VADDR (he->he_tag[tagidx], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_Addr top = base + (tagspan -1); -+ -+ if (start < top && end > base) -+ { -+ unsigned bidx = (start <= base) ? 0 : (start & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ unsigned tidx = (end >= top) ? 3 : (end & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx hashidx=%d bidx=%d tidx=%d\n", he, base, top, hashidx, bidx, tidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx)) -+ { -+ elan4mmu_invalidatepte (dev, he, tagidx, pteidx); -+ needflush = 1; -+ } -+ } -+ else if (base >= start && top <= end) /* hash entry completely spanned */ -+ { /* so invalidate the tag */ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx spanned\n", he, base, top); -+ -+ he->he_tag[tagidx] &= ~HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ needflush = 1; -+ } -+ } -+ -+ if (needflush) -+ { -+ /* 2nd invalidate the first/last hash blocks if they are partially invalidated -+ * and flush the tlb/hash copy blocks */ -+ elan4mmu_flush_tlb_hash (dev, tbl, baseidx, topidx); -+ -+ /* 3rd free off the hash entries which are completely invalidated */ -+ for (hashidx = baseidx; hashidx <= topidx; hashidx++) -+ for (prevhe = NULL, he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = next) -+ { -+ next = he_ctxt_next (he, ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ { -+ E4_Addr base = E4MMU_TAG2VADDR (he->he_tag[tagidx], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ E4_Addr top = base + (tagspan -1); -+ -+ if (start < top && end > base) -+ { -+ unsigned bidx = (start <= base) ? 0 : (start & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ unsigned tidx = (end >= top) ? 3 : (end & (tagspan-1)) >> dev->dev_pageshift[tbl]; -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx bidx=%d tidx=%d\n", he, base, top, bidx, tidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ if (HE_GET_PTE(he, tagidx, pteidx)) -+ { -+ HE_SET_PTE(he, tagidx, pteidx, 0); -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, 0); -+ } -+ } -+ -+ if ((base >= start && top <= end) || he->he_pte[tagidx] == 0) /* hash entry completely spanned or all pte's cleared */ -+ { /* so invalidate the pte's and free it */ -+ -+ MPRINTF (ctxt, 1, "elan4mmu_unload_range: he=%p base=%llx top=%llx spanned or empty\n", he, base, top); -+ -+ elan4mmu_free_hent (dev, tbl, hashidx, he, tagidx); -+ } -+ } -+ -+ prevhe = he_ctxt_unlink (ctxt, tbl, hashidx, prevhe, he, next); -+ } -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+void -+elan4mmu_invalidate_ctxt (ELAN4_CTXT *ctxt) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ int ctxnum = ctxt->ctxt_num; -+ ELAN4_HASH_ENTRY *he; -+ int tbl, hashidx, tagidx; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_invalidate_ctxt: invalidating ctxnum=%d\n", ctxnum); -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ /* 1st invalidate all tags belonging to me */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ for (he = ctxt->ctxt_mmuhash[tbl][hashidx]; he != NULL; he = he_ctxt_next (he, ctxnum)) -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) /* own tag block */ -+ { -+ MPRINTF (ctxt, 1, "elan4mmu_invalidate_ctxt: he=%p addr=%llx hashidx=%d tagidx=%d\n", -+ he, he->he_tag[tagidx] & TAG_ADDRESS_MASK, hashidx, tagidx); -+ -+ he->he_tag[tagidx] &= ~HE_TAG_VALID; -+ -+ elan4mmu_synctag (dev, he, tagidx); -+ } -+ -+ /* 2nd flush the tlb & cached hash block */ -+ elan4mmu_flush_tlb (dev); -+ -+ /* 3rd invalidate all pte's and free off the hash entries */ -+ for (tbl = 0; tbl < NUM_HASH_TABLES; tbl++) -+ for (hashidx = 0; hashidx < dev->dev_hashsize[tbl]; hashidx++) -+ while ((he = ctxt->ctxt_mmuhash[tbl][hashidx]) != NULL) -+ { -+ ctxt->ctxt_mmuhash[tbl][hashidx] = he_ctxt_next (he, ctxnum); -+ -+ for (tagidx = 0; tagidx < 2; tagidx++) -+ if ((he->he_tag[tagidx] & TAG_CONTEXT_MASK) == ctxnum) -+ elan4mmu_free_hent (dev, tbl, hashidx, he, tagidx); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+ELAN4_HASH_CACHE * -+elan4mmu_reserve (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned int npages, int cansleep) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_Addr end = start + (npages << dev->dev_pageshift[tbl]) - 1; -+ unsigned long tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr base = (start & ~(tagspan-1)); -+ E4_Addr top = (end & ~(tagspan-1)) + (tagspan-1); -+ unsigned int nhes = (top - base + 1) >> tagshift; -+ ELAN4_HASH_CACHE *hc; -+ unsigned int tagidx, pteidx; -+ E4_Addr addr; -+ int i; -+ -+ MPRINTF (ctxt, 0, "elan4mmu_reserve: start=%llx npages=%d\n", start, npages); -+ MPRINTF (ctxt, 0, " pageshift=%d tagspan=%lx base=%llx top=%llx end=%llx nhes=%d\n", -+ dev->dev_pageshift[tbl], tagspan, base, top, end, nhes); -+ -+ KMEM_ALLOC (hc, ELAN4_HASH_CACHE *, offsetof (ELAN4_HASH_CACHE, hc_hes[nhes]), cansleep); -+ -+ if (hc == NULL) -+ return NULL; -+ -+ hc->hc_start = start; -+ hc->hc_end = end; -+ hc->hc_tbl = tbl; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ for (addr = base, i = 0; i < nhes; addr += tagspan, i++) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[tbl] : 3; -+ -+ -+ if ((hc->hc_hes[i] = elan4mmu_ptealloc (ctxt, tbl, addr & ~(tagspan-1), &tagidx)) == NULL) -+ goto failed; -+ -+ -+ MPRINTF (ctxt, 2, "elan4mmu_reserve: tbl=%d addr=%llx -> hashidx=%d tagidx=%d\n", tbl, addr & ~(tagspan-1), -+ E4MMU_HASH_INDEX (ctxt->ctxt_num, (addr & ~(tagspan-1)), dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1), tagidx); -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ { -+ ASSERT (HE_GET_PTE (hc->hc_hes[i], tagidx, pteidx) == 0); -+ -+ MPRINTF (ctxt, 2, "elan4mmu_reserve: i=%d addr=%llx he=%p (tagidx=%d pteidx=%d)\n", -+ i, addr, hc->hc_hes[i], tagidx, pteidx); -+ -+ HE_SET_PTE (hc->hc_hes[i], tagidx, pteidx, PTE_PERM_TYPE_MASK); -+ } -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ return hc; -+ -+ failed: -+ for (i--, addr -= tagspan; i >= 0; i--, addr -= tagspan) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[tbl] : 3; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1); -+ unsigned tagidx = (addr == E4MMU_TAG2VADDR (hc->hc_hes[i]->he_tag[0], hashidx, dev->dev_pageshift[tbl], dev->dev_hashsize[tbl]-1)) ? 0 : 1; -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ HE_SET_PTE(hc->hc_hes[i], tagidx, pteidx, 0); -+ -+ if (hc->hc_hes[i]->he_pte[tagidx] == 0) -+ elan4mmu_free_hent (dev, tbl, hashidx, hc->hc_hes[i], tagidx); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+ -+ KMEM_FREE (hc, offsetof (ELAN4_HASH_CACHE, hc_hes[nhes])); -+ -+ return NULL; -+} -+ -+void -+elan4mmu_release (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ E4_Addr start = hc->hc_start; -+ E4_Addr end = hc->hc_end; -+ unsigned long tagshift = dev->dev_pageshift[hc->hc_tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr base = (start & ~(tagspan-1)); -+ E4_Addr top = (end & ~(tagspan-1)) + (tagspan-1); -+ unsigned int nhes = (top - base + 1) >> tagshift; -+ ELAN4_HASH_ENTRY *prevhe, *he, *next; -+ E4_Addr addr; -+ unsigned int pteidx; -+ int i; -+ -+ spin_lock (&ctxt->ctxt_mmulock); -+ -+ MPRINTF (ctxt, 0, "elan4mmu_release: base=%llx top=%llx\n", base, top); -+ -+ for (addr = base, i = 0; i < nhes; addr += tagspan, i++) -+ { -+ unsigned bidx = (i == 0) ? (start & (tagspan-1)) >> dev->dev_pageshift[hc->hc_tbl] : 0; -+ unsigned tidx = (i == (nhes-1)) ? (end & (tagspan-1)) >> dev->dev_pageshift[hc->hc_tbl] : 3; -+ unsigned hashidx = E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[hc->hc_tbl], dev->dev_hashsize[hc->hc_tbl]-1); -+ unsigned tagidx = (addr == E4MMU_TAG2VADDR (hc->hc_hes[i]->he_tag[0], hashidx, dev->dev_pageshift[hc->hc_tbl], dev->dev_hashsize[hc->hc_tbl]-1)) ? 0 : 1; -+ -+ for (pteidx = bidx; pteidx <= tidx; pteidx++) -+ { -+ elan4mmu_invalidatepte (dev, hc->hc_hes[i], tagidx, pteidx); -+ -+ HE_SET_PTE(hc->hc_hes[i], tagidx, pteidx, 0); -+ } -+ -+ MPRINTF (ctxt, 2, "elan4mmu_release: i=%d addr=%llx he=%p (hashidx=%d tagidx=%d pteidx=%d) pte=%x\n", -+ i, addr, hc->hc_hes[i], hashidx, tagidx, pteidx, hc->hc_hes[i]->he_pte[tagidx]); -+ -+ /* remove from context hash */ -+ /* need to move to the hc->hc_hes[i] in the ctxt list and set prevhe, he, next */ -+ prevhe = NULL; -+ he = ctxt->ctxt_mmuhash[hc->hc_tbl][hashidx]; -+ next = he_ctxt_next (he, ctxt->ctxt_num); -+ -+ while(he != hc->hc_hes[i]) { -+ prevhe = he; -+ he = next; -+ next = he_ctxt_next (he, ctxt->ctxt_num); -+ } -+ -+ if (he->he_pte[tagidx] == 0) -+ elan4mmu_free_hent (dev, hc->hc_tbl, hashidx, he, tagidx); -+ -+ he_ctxt_unlink (ctxt, hc->hc_tbl, hashidx, prevhe, he, next); -+ } -+ spin_unlock (&ctxt->ctxt_mmulock); -+} -+ -+void -+elan4mmu_set_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx, E4_uint64 newpte) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_set_pte: idx=%d addr=%llx he=%p (tagidx=%d pteidx=%d) newpte=%llx\n", idx, addr, he, tagidx, pteidx, newpte); -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ elan4mmu_writepte (dev, he, tagidx, pteidx, newpte); -+} -+ -+E4_uint64 -+elan4mmu_get_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ return elan4mmu_readpte (dev, he, tagidx, pteidx); -+} -+ -+void -+elan4mmu_clear_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ unsigned int tbl = hc->hc_tbl; -+ unsigned int tagshift = dev->dev_pageshift[tbl] + 2; -+ E4_Addr tagspan = 1 << tagshift; -+ E4_Addr addr = hc->hc_start + (idx << dev->dev_pageshift[tbl]); -+ ELAN4_HASH_ENTRY *he = hc->hc_hes[(addr - (hc->hc_start & ~(tagspan-1))) >> tagshift]; -+ unsigned pteidx = E4MMU_SHIFT_ADDR(addr, dev->dev_pageshift[tbl]) & 3; -+ unsigned tagidx = he->he_tag[0] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID) ? 0 : 1; -+ -+ MPRINTF (ctxt, 2, "elan4mmu_clear_pte: idx=%d addr=%llx he=%p (tagidx=%d pteidx=%d)\n", idx, addr, he, tagidx, pteidx); -+ -+ ASSERT (he->he_tag[tagidx] == (E4MMU_TAG (addr, ctxt->ctxt_num) | HE_TAG_VALID)); -+ -+ elan4mmu_invalidatepte (dev, he, tagidx, pteidx); -+} -+ -+EXPORT_SYMBOL(elan4mmu_flush_tlb); -+EXPORT_SYMBOL(elan4mmu_pteload); -+EXPORT_SYMBOL(elan4mmu_unload_range); -+EXPORT_SYMBOL(elan4mmu_reserve); -+EXPORT_SYMBOL(elan4mmu_release); -+EXPORT_SYMBOL(elan4mmu_set_pte); -+EXPORT_SYMBOL(elan4mmu_get_pte); -+EXPORT_SYMBOL(elan4mmu_clear_pte); -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/mmu_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/mmu_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/mmu_Linux.c 2005-07-28 14:52:52.832681168 -0400 -@@ -0,0 +1,265 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu_Linux.c,v 1.8 2004/05/10 14:10:46 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu_Linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+/* -+ * Convert a physical address into an pte. This should generate a "local" pte for -+ * physical addresses which are elan4 sdram or elan4 command queues. For elan4 -+ * registers and other addresses on the same bus, this should be the local pci -+ * bus address. All other addresses should access the physical address via the -+ * PCI bridge. -+ */ -+ -+#ifdef __alpha -+#define ioaddr2paddr(ioaddr) virt_to_phys((void *) __ioremap(ioaddr, PAGE_SIZE)) -+#elif defined(__ia64) -+#define ioaddr2paddr(ioaddr) ((ioaddr) & ~__IA64_UNCACHED_OFFSET) -+#else -+#define ioaddr2paddr(ioaddr) (ioaddr) -+#endif -+ -+int -+elan4mmu_categorise_paddr (ELAN4_DEV *dev, physaddr_t *physp) -+{ -+ physaddr_t sdram_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t sdram_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t regs_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t regs_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t phys = *physp; -+ int iscommand; -+ -+ if (phys >= sdram_base && phys <= sdram_top) -+ { -+ (*physp) = (phys ^ sdram_base); -+ return ELAN4MMU_PADDR_SDRAM; -+ } -+ -+ if (phys >= regs_base && phys < regs_top) -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ iscommand = (phys < (regs_base + ELAN4_REVA_REG_OFFSET)); -+ else -+ iscommand = (phys < (regs_base + ELAN4_REVB_I2C_OFFSET)); -+ -+ if (iscommand) -+ { -+ (*physp) = phys ^ regs_base; -+ -+ return ELAN4MMU_PADDR_COMMAND; -+ } -+ else -+ { -+ // XXXX (*physp) = phys2bus (phys); -+ -+ return ELAN4MMU_PADDR_LOCALPCI; -+ } -+ } -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ if (VALID_PAGE (virt_to_page (phys_to_virt (phys)))) -+#else -+ if (virt_addr_valid (phys_to_virt (phys))) -+#endif -+ return ELAN4MMU_PADDR_PAGE; -+ -+ return ELAN4MMU_PADDR_OTHER; -+} -+ -+int -+elan4mmu_sdram_aliascheck (ELAN4_CTXT *ctxt, E4_Addr addr, physaddr_t phys) -+{ -+ ELAN4_DEV *dev = ctxt->ctxt_dev; -+ -+ /* -+ * On MPSAS we don't allocate a large enough context table, so -+ * if we see an address/context pair which would "alias" because -+ * they differ in unchecked hash bits to a previous pteload, -+ * then we kill the application. -+ */ -+ unsigned hashval = (E4MMU_SHIFT_ADDR(addr, (dev->dev_pageshift[0]) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(ctxt->ctxt_num)); -+ -+ if (dev->dev_rsvd_hashval[0] == 0xFFFFFFFF) -+ dev->dev_rsvd_hashval[0] = hashval & dev->dev_rsvd_hashmask[0]; -+ -+ if ((hashval & dev->dev_rsvd_hashmask[0]) != dev->dev_rsvd_hashval[0]) -+ { -+ printk ("elan4mmu_sdram_aliascheck: vaddr=%016llx ctxnum=%x -> [%x] overlaps %x - %x [hashidx=%x]\n", (unsigned long long) addr, -+ ctxt->ctxt_num, hashval, hashval & dev->dev_rsvd_hashmask[0], dev->dev_rsvd_hashval[0], -+ E4MMU_HASH_INDEX (ctxt->ctxt_num, addr, dev->dev_pageshift[0], dev->dev_hashsize[0]-1)); -+ -+ return 0; -+ } -+ -+ if (((addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (phys & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)))) -+ { -+ printk ("elan4mmu_sdram_aliascheck: vaddr=%016llx incorrectly alias sdram at %lx\n", (unsigned long long) addr, -+ phys ^ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+int -+elan4mmu_alloc_topaddr (ELAN4_DEV *dev, physaddr_t paddr, unsigned type) -+{ -+#if defined(__i386) && !defined(CONFIG_X86_PAE) -+ if (dev->dev_topaddrvalid == 0) -+ { -+ dev->dev_topaddrvalid = 1; -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(0), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(1), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(2), 0); -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(3), 0); -+ } -+ return (0); -+#else -+ register int i; -+ E4_uint16 match; -+ -+ if (dev->dev_topaddrmode) /* ExtraMasterAddrBits=1 => match {paddr[63:50],type[3:2]} */ -+ match = ((paddr >> 48) & ~3) | ((type >> 2) & 3); -+ else /* ExtraMasterAddrBits=0 => match {paddr[63:48]} */ -+ match = (paddr >> 48); -+ -+ MPRINTF (DBG_DEVICE, 2, "elan4mmu_alloc_topaddr: mode=%d paddr=%lx type=%x match=%x [%x %x.%x.%x.%x]\n", -+ dev->dev_topaddrmode, paddr, type, match, dev->dev_topaddrvalid, -+ dev->dev_topaddr[0], dev->dev_topaddr[1], dev->dev_topaddr[2], dev->dev_topaddr[3]); -+ -+ for (i = 0; i < 4; i++) -+ if ((dev->dev_topaddrvalid & (1 << i)) && dev->dev_topaddr[i] == match) -+ return (i); -+ -+ for (i = 0; i < 4; i++) -+ { -+ if ((dev->dev_topaddrvalid & (1 << i)) == 0) -+ { -+ MPRINTF (DBG_DEVICE, 2, "elan4mmu_alloc_topaddr: allocate slot %d for %x\n", i, match); -+ -+ dev->dev_topaddrvalid |= (1 << i); -+ dev->dev_topaddr[i] = match; -+ -+ pci_write_config_word (dev->dev_osdep.pdev, PCI_ELAN_TOPPHYSADDR(i), match); -+ return (i); -+ } -+ } -+ -+ panic ("elan4mmu_alloc_topaddr: all topaddrs in use\n"); -+ return (0); -+#endif -+} -+ -+E4_uint64 -+elan4mmu_phys2pte (ELAN4_DEV *dev, physaddr_t phys, unsigned perm) -+{ -+ physaddr_t sdram_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t sdram_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t regs_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t regs_top = ioaddr2paddr (pci_resource_end (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ int iscommand; -+ E4_uint64 pte; -+ unsigned type; -+ -+ if (phys >= sdram_base && phys <= sdram_top) -+ { -+ phys ^= sdram_base; -+ type = PTE_SetPerm (perm); -+ } -+ else if (phys >= regs_base && phys < regs_top) -+ { -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ iscommand = (phys < (regs_base + ELAN4_REVA_REG_OFFSET)); -+ else -+ iscommand = (phys < (regs_base + ELAN4_REVB_I2C_OFFSET)); -+ -+ if (iscommand) -+ { -+ phys ^= regs_base; -+ type = PTE_SetPerm (perm) | PTE_CommandQueue; -+ } -+ else -+ { -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal; -+ // phys = phys2bus (phys); -+ } -+ } -+ else -+ { -+ type = PTE_SetPerm (perm) | PTE_PciNotLocal | dev->dev_pteval; -+ -+#ifdef LINUX_SPARC -+ /* XXXX if not local pci bus, then or in the bypass bit */ -+ phys |= 0xfffe000000000000; -+ type |= PTE_BigEndian; -+#endif -+ -+ -+#if defined(__alpha) -+ phys |= alpha_mv.pci_dac_offset; -+#endif -+ } -+ -+ if ((type & PTE_PciNotLocal) == 0) -+ pte = (phys >> PTE_PADDR_SHIFT) | type; -+ else -+ { -+ unsigned topaddr = elan4mmu_alloc_topaddr (dev, phys, type); -+ -+ if (dev->dev_topaddrmode) -+ pte = (phys >> PTE_PADDR_SHIFT) | (type & ~0xc) | (topaddr << 2); -+ else -+ pte = ((phys >> PTE_PADDR_SHIFT) & ~PTE_TOPADDR_MASK) | (((E4_uint64) topaddr) << 45) | type; -+ } -+ -+ return pte; -+} -+ -+physaddr_t -+elan4mmu_pte2phys (ELAN4_DEV *dev, E4_uint64 pte) -+{ -+ physaddr_t sdram_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ physaddr_t regs_base = ioaddr2paddr (pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS)); -+ physaddr_t phys; -+ -+ if (pte & PTE_PciNotLocal) -+ { -+ if (dev->dev_topaddrmode) -+ phys = ((physaddr_t)(dev->dev_topaddr[(pte >> 2) & 3] & 0xfffc) << 48) | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT); -+ else -+ phys = ((physaddr_t)(dev->dev_topaddr[(pte >> 45) & 3] & 0xffff) << 48)| ((pte & PTE_PPN_MASK & ~PTE_TOPADDR_MASK) << PTE_PADDR_SHIFT); -+ -+#ifdef LINUX_SPARC /* XXXX if not local pci bus, then or in the bypass bit */ -+ phys ^= 0xfffe000000000000; -+#endif -+ -+#if defined(__alpha) -+ phys ^= alpha_mv.pci_dac_offset; -+#endif -+ return phys; -+ } -+ -+ if (pte & PTE_CommandQueue) -+ return (regs_base | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT)); -+ -+ /* sdram */ -+ return (sdram_base | ((pte & PTE_PPN_MASK) << PTE_PADDR_SHIFT)); -+} -+ -+EXPORT_SYMBOL(elan4mmu_phys2pte); -+EXPORT_SYMBOL(elan4mmu_pte2phys); -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/neterr.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/neterr.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/neterr.c 2005-07-28 14:52:52.833681016 -0400 -@@ -0,0 +1,270 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr.c,v 1.4.6.3 2004/11/05 13:11:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/neterr.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+typedef struct neterr_inputq -+{ -+ E4_InputQueue inputq; /* input queue */ -+ E4_Event32 qevent; /* input queue event */ -+ E4_uint64 sent; /* # messages sent (cq flow control)*/ -+} NETERR_INPUTQ; -+ -+#define NETERR_NSLOTS 64 /* single page of queue space (4Kb) */ -+ -+#define NETERR_RETRIES 16 -+#define NETERR_CQ_SIZE CQ_Size8K -+#define NETERR_CQ_MSGS (CQ_Size(NETERR_CQ_SIZE) / (21*8)) -+#define NETERR_VP_COUNT 64 /* this *must* be > NETERR_CQ_MSGS */ -+#define NETERR_VP_BASE 1 /* use vp 1 upwards */ -+ -+void -+elan4_neterr_interrupt (ELAN4_DEV *dev, void *arg) -+{ -+ E4_Addr qfptr = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr)); -+ E4_Addr qbptr = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_bptr)); -+ E4_Addr qfirst = DEVICE_NETERR_SLOTS_ADDR; -+ E4_Addr qlast = qfirst + (NETERR_NSLOTS-1) * ELAN4_NETERR_MSG_SIZE; -+ ELAN4_CQ *cq = dev->dev_neterr_intcq; -+ int count = 0; -+ ELAN4_CTXT *ctxt; -+ ELAN4_NETERR_MSG msg; -+ -+ while (qfptr != qbptr) -+ { -+ elan4_sdram_copyq_from_sdram (dev, dev->dev_neterr_slots + (qfptr - qfirst), &msg, ELAN4_NETERR_MSG_SIZE); -+ -+ ctxt = elan4_networkctxt (dev, msg.msg_context); -+ -+ if (ctxt != NULL && ctxt->ctxt_ops->op_neterrmsg) -+ ctxt->ctxt_ops->op_neterrmsg (ctxt, &msg); -+ else -+ PRINTF (DBG_DEVICE, DBG_NETERR, "elan4_neterr_interrupt: no process - sender %d.%d\n", msg.msg_sender.loc_node, msg.msg_sender.loc_context); -+ -+ count++; -+ -+ /* move on the from pointer */ -+ qfptr = (qfptr == qlast) ? qfirst : qfptr + ELAN4_NETERR_MSG_SIZE; -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr), qfptr); -+ } -+ -+ if (count == 0) -+ { -+ printk ("elan4_neterr_interrupt: spurious\n"); -+ return; -+ } -+ -+ /* Issue the waitevent to the interrupt queue */ -+ writeq (WAIT_EVENT_CMD | (DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)), cq->cq_mapping); -+ writeq ( E4_EVENT_INIT_VALUE (-32 * count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INTCQ_ADDR, cq->cq_mapping); -+ writeq (INTERRUPT_CMD | (dev->dev_neterr_intop.op_cookie << E4_MAIN_INT_SHIFT), cq->cq_mapping); -+ -+ pioflush_reg (dev); -+} -+ -+int -+elan4_neterr_init (ELAN4_DEV *dev) -+{ -+ unsigned int intqaddr; -+ E4_Addr qfirst, qlast; -+ -+ if ((dev->dev_neterr_inputq = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE)) == 0) -+ return 0; -+ -+ if ((dev->dev_neterr_slots = elan4_sdram_alloc (dev, roundup (NETERR_NSLOTS * ELAN4_NETERR_MSG_SIZE, SDRAM_PAGE_SIZE))) == 0) -+ return 0; -+ -+ if ((dev->dev_neterr_msgcq = elan4_alloccq (&dev->dev_ctxt, NETERR_CQ_SIZE, CQ_STENEnableBit | CQ_WriteEnableBit, CQ_Priority)) == NULL) -+ return 0; -+ -+ if ((dev->dev_neterr_intcq = elan4_alloccq (&dev->dev_ctxt, CQ_Size1K, CQ_WaitEventEnableBit | CQ_InterruptEnableBit, CQ_Priority)) == NULL) -+ return 0; -+ -+ intqaddr = (dev->dev_cqoffset + elan4_cq2num (dev->dev_neterr_intcq)) * CQ_CommandMappingSize; -+ qfirst = DEVICE_NETERR_SLOTS_ADDR; -+ qlast = qfirst + (NETERR_NSLOTS-1) * ELAN4_NETERR_MSG_SIZE; -+ -+ spin_lock_init (&dev->dev_neterr_lock); -+ -+ /* Register an interrupt operation */ -+ dev->dev_neterr_intop.op_function = elan4_neterr_interrupt; -+ dev->dev_neterr_intop.op_arg = NULL; -+ -+ elan4_register_intop (dev, &dev->dev_neterr_intop); -+ -+ /* Initialise the inputq descriptor and event */ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_fptr), qfirst); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_bptr), qfirst); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_control), E4_InputQueueControl (qfirst, qlast, ELAN4_NETERR_MSG_SIZE)); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, inputq.q_event), DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)); -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_CountAndType), E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_WritePtr), DEVICE_NETERR_INTCQ_ADDR); -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, qevent.ev_WriteValue), (dev->dev_neterr_intop.op_cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD); -+ -+ elan4_sdram_writeq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, sent), 0); -+ -+ /* Map them all into the device context */ -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_INPUTQ_ADDR, (dev->dev_neterr_inputq >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_RemoteAll)); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_INTCQ_ADDR, (intqaddr >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_LocDataWrite) | PTE_CommandQueue); -+ elan4mmu_pteload (&dev->dev_ctxt, 0, DEVICE_NETERR_SLOTS_ADDR, (dev->dev_neterr_slots >> PTE_PADDR_SHIFT) | PTE_SetPerm(PERM_DataReadWrite)); -+ -+ /* finally attach to the neterr context */ -+ if (elan4_attach_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM) != 0) -+ panic ("elan4_neterr_init: failed to attach to neterr context\n"); -+ -+ /* and drop the context filter */ -+ elan4_set_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM, E4_FILTER_HIGH_PRI); -+ -+ return 1; -+} -+ -+void -+elan4_neterr_destroy (ELAN4_DEV *dev) -+{ -+ if (dev->dev_neterr_intcq) -+ { -+ elan4_detach_filter (&dev->dev_ctxt, ELAN4_NETERR_CONTEXT_NUM); -+ -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_SLOTS_ADDR, 1 << dev->dev_pageshift[0]); -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_INTCQ_ADDR, 1 << dev->dev_pageshift[0]); -+ elan4mmu_unload_range (&dev->dev_ctxt, 0, DEVICE_NETERR_INPUTQ_ADDR, 1 << dev->dev_pageshift[0]); -+ -+ spin_lock_destroy (&dev->dev_neterr_lock); -+ } -+ -+ if (dev->dev_neterr_intcq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_neterr_intcq); -+ dev->dev_neterr_intcq = NULL; -+ -+ if (dev->dev_neterr_msgcq) -+ elan4_freecq (&dev->dev_ctxt, dev->dev_neterr_msgcq); -+ dev->dev_neterr_msgcq = NULL; -+ -+ if (dev->dev_neterr_slots) -+ elan4_sdram_free (dev, dev->dev_neterr_slots, roundup (NETERR_NSLOTS * ELAN4_NETERR_MSG_SIZE, SDRAM_PAGE_SIZE)); -+ dev->dev_neterr_slots = 0; -+ -+ if (dev->dev_neterr_inputq) -+ elan4_sdram_free (dev, dev->dev_neterr_inputq, SDRAM_PAGE_SIZE); -+ dev->dev_neterr_inputq = 0; -+} -+ -+int -+elan4_neterr_sendmsg (ELAN4_DEV *dev, unsigned int nodeid, unsigned int retries, ELAN4_NETERR_MSG *msg) -+{ -+ ELAN4_CQ *cq = dev->dev_neterr_msgcq; -+ E4_uint64 sent; -+ E4_VirtualProcessEntry route; -+ unsigned int vp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_neterr_lock, flags); -+ -+ sent = elan4_sdram_readq (dev, dev->dev_neterr_inputq + offsetof (NETERR_INPUTQ, sent)); -+ -+ PRINTF (DBG_DEVICE, DBG_NETERR, "elan4_neterr_sendmsg: nodeid=%d retries=%d cookie=%llx sender=%d,%d%s\n", -+ nodeid, retries, msg->msg_cookies[0], msg->msg_sender.loc_node, msg->msg_sender.loc_context, -+ (dev->dev_neterr_queued - sent) >= NETERR_CQ_MSGS ? " - no cq space" : ""); -+ -+ if ((dev->dev_neterr_queued - sent) >= NETERR_CQ_MSGS) -+ { -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 0; -+ } -+ -+ vp = NETERR_VP_BASE + (dev->dev_neterr_queued % NETERR_VP_COUNT); -+ -+ if (elan4_generate_route (&dev->dev_position, &route, ELAN4_NETERR_CONTEXT_NUM, nodeid, nodeid, FIRST_SYSTEM_PACKET | FIRST_HIGH_PRI) < 0) -+ { -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 0; -+ } -+ -+ elan4_write_route (dev, dev->dev_routetable, vp, &route); -+ -+ writeq ((GUARD_CMD | GUARD_CHANNEL(0) | GUARD_RESET(retries)), cq->cq_mapping); -+ writeq (NOP_CMD, cq->cq_mapping); -+ -+ writeq (OPEN_STEN_PKT_CMD | OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, vp), cq->cq_mapping); -+ writeq (SEND_TRANS_CMD | (TR_INPUT_Q_GETINDEX << 16), cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq), cq->cq_mapping); -+ -+ writeq (SEND_TRANS_CMD | (TR_WRITE (64 >> 3, 0, TR_DATATYPE_DWORD) << 16), cq->cq_mapping); -+ writeq ( 0 /* address */, cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[0], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[1], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[2], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[3], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[4], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[5], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[6], cq->cq_mapping); -+ writeq ( ((E4_uint64 *) msg)[7], cq->cq_mapping); -+ -+ writeq (SEND_TRANS_CMD | (TR_INPUT_Q_COMMIT << 16), cq->cq_mapping); -+ writeq ( DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq), cq->cq_mapping); -+ writeq ( 0 /* cookie */, cq->cq_mapping); -+ -+ writeq (GUARD_CMD | GUARD_CHANNEL(0) | GUARD_RESET(NETERR_RETRIES), cq->cq_mapping); -+ writeq (WRITE_DWORD_CMD | (DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, sent)), cq->cq_mapping); -+ writeq ( ++dev->dev_neterr_queued, cq->cq_mapping); -+ -+ pioflush_reg (dev); -+ -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ -+ return 1; -+} -+ -+int -+elan4_neterr_iproc_trap (ELAN4_DEV *dev, ELAN4_IPROC_TRAP *trap) -+{ -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[trap->tr_trappedTrans]; -+ unsigned long flags; -+ -+ switch (IPROC_TrapValue (hdrp->IProcStatusCntxAndTrType)) -+ { -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ case InputCrcErrorAfterPAckOk: -+ return 1; -+ -+ case InputEventEngineTrapped: -+ printk ("elan%d: device_iproc_trap: InputEventEngineTrapped - Trans=%x TrAddr=%llx\n", -+ dev->dev_instance, (int)IPROC_TransactionType (hdrp->IProcStatusCntxAndTrType), (long long) hdrp->TrAddr); -+ -+ if ((IPROC_TransactionType (hdrp->IProcStatusCntxAndTrType) & TR_OPCODE_MASK) == (TR_INPUT_Q_COMMIT & TR_OPCODE_MASK) && -+ hdrp->TrAddr == DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, inputq)) -+ { -+ spin_lock_irqsave (&dev->dev_neterr_lock, flags); -+ writeq ((DEVICE_NETERR_INPUTQ_ADDR + offsetof (NETERR_INPUTQ, qevent)) | SET_EVENT_CMD, dev->dev_neterr_msgcq->cq_mapping); -+ spin_unlock_irqrestore (&dev->dev_neterr_lock, flags); -+ return 1; -+ } -+ -+ default: -+ return 0; -+ } -+} -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/procfs_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/procfs_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/procfs_Linux.c 2005-07-28 14:52:52.835680712 -0400 -@@ -0,0 +1,1074 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_Linux.c,v 1.27.2.9 2005/03/09 12:00:08 addy Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/elan4mod/procfs_Linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * -+ * procfs format for elan4: -+ * -+ * /proc/qsnet/elan4/config -+ * elan4_debug -+ * elan4_debug_toconsole -+ * elan4_debug_tobuffer -+ * elan4_debug_display_ctxt -+ * elan4_debug_ignore_ctxt -+ * elan4_debug_ignore_type -+ * elan4_debug_mmu -+ * elan4_mainint_punt_loops -+ * user_p2p_route_options -+ * user_bcast_route_options -+ * -+ * /proc/qsnet/elan4/deviceN -+ * stats -+ * position -+ * vpd -+ */ -+ -+struct proc_dir_entry *elan4_procfs_root; -+struct proc_dir_entry *elan4_config_root; -+ -+/* borrowed from fs/proc/proc_misc - helper for proc_read_int */ -+static int -+proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) -+{ -+ if (len <= off+count) *eof = 1; -+ *start = page + off; -+ len -= off; -+ if (len>count) len = count; -+ if (len<0) len = 0; -+ return len; -+} -+ -+static int -+proc_read_devinfo (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len = 0; -+ -+ if (! dev) -+ len = sprintf (page, "\n"); -+ else -+ { -+ len += sprintf (page + len, "dev_vendor_id 0x%x\n", dev->dev_devinfo.dev_vendor_id); -+ len += sprintf (page + len, "dev_device_id 0x%x\n", dev->dev_devinfo.dev_vendor_id); -+ len += sprintf (page + len, "dev_revision_id 0x%x\n", dev->dev_devinfo.dev_revision_id); -+ len += sprintf (page + len, "dev_instance 0x%x\n", dev->dev_devinfo.dev_instance); -+ len += sprintf (page + len, "dev_rail 0x%x\n", dev->dev_devinfo.dev_rail); -+ len += sprintf (page + len, "dev_driver_version 0x%x\n", dev->dev_devinfo.dev_driver_version); -+ len += sprintf (page + len, "dev_params_mask 0x%x\n", dev->dev_devinfo.dev_params_mask); -+ len += sprintf (page + len, "dev_params: \n"); -+ len += sprintf (page + len, " 0 - PciCmdQPadFlag 0x%x\n", dev->dev_devinfo.dev_params.values[0]); -+ len += sprintf (page + len, " 1 - EventCopyWinPt 0x%x\n", dev->dev_devinfo.dev_params.values[1]); -+ len += sprintf (page + len, " 2 - PciWriteCombining 0x%x\n", dev->dev_devinfo.dev_params.values[2]); -+ len += sprintf (page + len, " 3 - 0x%x\n", dev->dev_devinfo.dev_params.values[3]); -+ len += sprintf (page + len, " 4 - 0x%x\n", dev->dev_devinfo.dev_params.values[4]); -+ len += sprintf (page + len, " 5 - 0x%x\n", dev->dev_devinfo.dev_params.values[5]); -+ len += sprintf (page + len, " 6 - 0x%x\n", dev->dev_devinfo.dev_params.values[6]); -+ len += sprintf (page + len, " 7 - 0x%x\n", dev->dev_devinfo.dev_params.values[7]); -+ len += sprintf (page + len, " 8 - 0x%x\n", dev->dev_devinfo.dev_params.values[8]); -+ len += sprintf (page + len, " 9 - 0x%x\n", dev->dev_devinfo.dev_params.values[9]); -+ len += sprintf (page + len, " 10 - 0x%x\n", dev->dev_devinfo.dev_params.values[10]); -+ len += sprintf (page + len, " 11 - features 0x%x\n", dev->dev_devinfo.dev_params.values[11]); -+ len += sprintf (page + len, "dev_num_down_links_value 0x%x\n", dev->dev_devinfo.dev_num_down_links_value); -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_position (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ if (dev->dev_position.pos_mode == ELAN_POS_UNKNOWN) -+ len = sprintf (page, "\n"); -+ else -+ len = sprintf (page, -+ "NodeId %d\n" -+ "NumLevels %d\n" -+ "NumNodes %d\n", -+ dev->dev_position.pos_nodeid, -+ dev->dev_position.pos_levels, -+ dev->dev_position.pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_position (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned nodeid = ELAN_INVALID_NODE; -+ unsigned numnodes = 0; -+ char *page, *p; -+ int res; -+ ELAN_POSITION pos; -+ -+ if (count == 0) -+ return (0); -+ -+ if (count >= PAGE_SIZE) -+ return (-EINVAL); -+ -+ if ((page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (page, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ page[count] = '\0'; -+ -+ if (page[count-1] == '\n') -+ page[count-1] = '\0'; -+ -+ if (! strcmp (page, "")) -+ { -+ pos.pos_mode = ELAN_POS_UNKNOWN; -+ pos.pos_nodeid = ELAN_INVALID_NODE; -+ pos.pos_nodes = 0; -+ pos.pos_levels = 0; -+ } -+ else -+ { -+ for (p = page; *p; ) -+ { -+ while (isspace (*p)) -+ p++; -+ -+ if (! strncmp (p, "NodeId=", strlen("NodeId="))) -+ nodeid = simple_strtoul (p + strlen ("NodeId="), NULL, 0); -+ if (! strncmp (p, "NumNodes=", strlen ("NumNodes="))) -+ numnodes = simple_strtoul (p + strlen ("NumNodes="), NULL, 0); -+ -+ while (*p && !isspace(*p)) -+ p++; -+ } -+ -+ if (elan4_compute_position (&pos, nodeid, numnodes, dev->dev_devinfo.dev_num_down_links_value) != 0) -+ printk ("elan%d: invalid values for NodeId=%d NumNodes=%d\n", dev->dev_instance, nodeid, numnodes); -+ else -+ { -+ printk ("elan%d: setting NodeId=%d NumNodes=%d NumLevels=%d\n", dev->dev_instance, pos.pos_nodeid, -+ pos.pos_nodes, pos.pos_levels); -+ -+ if (elan4_set_position (dev, &pos) < 0) -+ printk ("elan%d: failed to set device position\n", dev->dev_instance); -+ } -+ } -+ } -+ -+ MOD_DEC_USE_COUNT; -+ free_page ((unsigned long) page); -+ -+ return (count); -+} -+ -+static int -+proc_read_temp (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned char values[2]; -+ int len; -+ -+ if (i2c_disable_auto_led_update (dev) < 0) -+ len = sprintf (page, ""); -+ else -+ { -+ if (i2c_read (dev, I2C_TEMP_ADDR, 2, values) < 0) -+ len = sprintf (page, ""); -+ else -+ len = sprintf (page, "%s%d%s\n", (values[0] & 0x80) ? "-" : "", -+ (values[0] & 0x80) ? -((signed char)values[0]) - 1 : values[0], -+ (values[1] & 0x80) ? ".5" : ".0"); -+ -+ i2c_enable_auto_led_update (dev); -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_eccerr (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char errstr[200]; -+ register int i, len = 0; -+ -+ *page = '\0'; -+ -+ for (i = 0; i < sizeof (dev->dev_sdramerrs)/sizeof(dev->dev_sdramerrs[0]); i++) -+ if (dev->dev_sdramerrs[i].ErrorCount != 0) -+ len += sprintf (page + len, "%s occured %0d times\n", -+ elan4_sdramerr2str (dev, dev->dev_sdramerrs[i].EccStatus, dev->dev_sdramerrs[i].ConfigReg, errstr), -+ dev->dev_sdramerrs[i].ErrorCount); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_vpd (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ if ( elan4_read_vpd (dev, NULL, page) ) -+ len = sprintf (page, "no vpd tags found\n"); -+ else -+ len = strlen(page)+1; -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_read_linkportkey (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int len; -+ -+ len = sprintf (page, "%llx\n", read_reg64 (dev, LinkPortLock)); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_linkportkey (struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ int res = 0; -+ char tmpbuf[30]; -+ -+ if (count > sizeof (tmpbuf) - 1) -+ return -EINVAL; -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ write_reg64 (dev, LinkPortLock, simple_strtoull (tmpbuf, NULL, 16)); -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (count); -+} -+ -+static struct device_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+ unsigned minrev; -+} device_info[] = { -+ {"devinfo", proc_read_devinfo, NULL, 0}, -+ {"position", proc_read_position, proc_write_position, 0}, -+ {"temp", proc_read_temp, NULL, 1}, -+ {"eccerr", proc_read_eccerr, NULL, 0}, -+ {"vpd", proc_read_vpd, NULL, 0}, -+ {"linkportkey", proc_read_linkportkey, proc_write_linkportkey, 0}, -+}; -+ -+static int -+proc_read_link_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "link_errors", dev->dev_stats.s_link_errors); -+ p += sprintf (p, "%20s %ld\n", "lock_errors", dev->dev_stats.s_lock_errors); -+ p += sprintf (p, "%20s %ld\n", "deskew_errors", dev->dev_stats.s_deskew_errors); -+ p += sprintf (p, "%20s %ld\n", "phase_errors", dev->dev_stats.s_phase_errors); -+ -+ p += sprintf (p, "%20s %ld\n", "data_errors", dev->dev_stats.s_data_errors); -+ p += sprintf (p, "%20s %ld\n", "fifo_overflow0", dev->dev_stats.s_fifo_overflow0); -+ p += sprintf (p, "%20s %ld\n", "fifo_overflow1", dev->dev_stats.s_fifo_overflow1); -+ p += sprintf (p, "%20s %ld\n", "mod45changed", dev->dev_stats.s_mod45changed); -+ p += sprintf (p, "%20s %ld\n", "pack_not_seen", dev->dev_stats.s_pack_not_seen); -+ -+ p += sprintf (p, "%20s %ld\n", "linkport_keyfail", dev->dev_stats.s_linkport_keyfail); -+ p += sprintf (p, "%20s %ld\n", "eop_reset", dev->dev_stats.s_eop_reset); -+ p += sprintf (p, "%20s %ld\n", "bad_length", dev->dev_stats.s_bad_length); -+ p += sprintf (p, "%20s %ld\n", "crc_error", dev->dev_stats.s_crc_error); -+ p += sprintf (p, "%20s %ld\n", "crc_bad", dev->dev_stats.s_crc_bad); -+ -+ p += sprintf (p, "%20s %ld\n", "cproc_timeout", dev->dev_stats.s_cproc_timeout); -+ p += sprintf (p, "%20s %ld\n", "dproc_timeout", dev->dev_stats.s_dproc_timeout); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static char * -+proc_sprintf_bucket_stat (char *p, char *name, unsigned long *stats, int *buckets) -+{ -+ int i; -+ -+ p += sprintf (p, "%20s ", name); -+ -+ for (i = 0; i < ELAN4_DEV_STATS_BUCKETS-1; i++) -+ p += sprintf (p, "%ld(<=%d) ", stats[i], buckets[i]); -+ p += sprintf (p, "%ld(>%d)\n", stats[i], buckets[i-1]); -+ -+ return p; -+} -+ -+static int -+proc_read_intr_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "interrupts", dev->dev_stats.s_interrupts); -+ p += sprintf (p, "%20s %ld\n", "haltints", dev->dev_stats.s_haltints); -+ -+ p += sprintf (p, "%20s %ld\n", "mainint_punts", dev->dev_stats.s_mainint_punts); -+ p += sprintf (p, "%20s %ld\n", "mainint_rescheds", dev->dev_stats.s_mainint_rescheds); -+ -+ p = proc_sprintf_bucket_stat (p, "mainints", dev->dev_stats.s_mainints, MainIntBuckets); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "cproc_traps", dev->dev_stats.s_cproc_traps); -+ p += sprintf (p, "%20s %ld\n", "dproc_traps", dev->dev_stats.s_dproc_traps); -+ p += sprintf (p, "%20s %ld\n", "eproc_traps", dev->dev_stats.s_eproc_traps); -+ p += sprintf (p, "%20s %ld\n", "iproc_traps", dev->dev_stats.s_iproc_traps); -+ p += sprintf (p, "%20s %ld\n", "tproc_traps", dev->dev_stats.s_tproc_traps); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_cproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const CProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_cproc_trap_types)/sizeof(dev->dev_stats.s_cproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", CProcTrapNames[i], dev->dev_stats.s_cproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_dproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const DProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_dproc_trap_types)/sizeof(dev->dev_stats.s_dproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", DProcTrapNames[i], dev->dev_stats.s_dproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_eproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const EProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_eproc_trap_types)/sizeof(dev->dev_stats.s_eproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", EProcTrapNames[i], dev->dev_stats.s_eproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_iproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const IProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_iproc_trap_types)/sizeof(dev->dev_stats.s_iproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", IProcTrapNames[i], dev->dev_stats.s_iproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_tproc_trap_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ int i; -+ extern char *const TProcTrapNames[]; -+ -+ for (i = 0; i < sizeof (dev->dev_stats.s_tproc_trap_types)/sizeof(dev->dev_stats.s_tproc_trap_types[0]); i++) -+ p += sprintf (p, "%-40s %ld\n", TProcTrapNames[i], dev->dev_stats.s_tproc_trap_types[i]); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_sdram_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ -+ p += sprintf (p, "%20s %ld\n", "correctable_errors", dev->dev_stats.s_correctable_errors); -+ p += sprintf (p, "%20s %ld\n", "multiple_errors", dev->dev_stats.s_multiple_errors); -+ p += sprintf (p, "%20s %ldK\n", "sdram_bytes_free", dev->dev_stats.s_sdram_bytes_free/1024); -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+void -+elan4_ringbuf_store (ELAN4_ROUTE_RINGBUF *ringbuf, E4_VirtualProcessEntry *route, ELAN4_DEV *dev) -+{ -+ int newend; -+ -+ ASSERT (kmutex_is_locked (&dev->dev_lock)); -+ -+ memcpy(&ringbuf->routes[ringbuf->end], route, sizeof(E4_VirtualProcessEntry)); -+ newend = ringbuf->end + 1; -+ if (newend >= DEV_STASH_ROUTE_COUNT) -+ newend -= DEV_STASH_ROUTE_COUNT; -+ if (newend == ringbuf->start) -+ ringbuf->start += 1; -+ if (ringbuf->start >= DEV_STASH_ROUTE_COUNT) -+ ringbuf->start -= DEV_STASH_ROUTE_COUNT; -+ ringbuf->end = newend; -+} -+ -+static int -+proc_read_dproc_timeout_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *dproc_timeout; -+ -+ dproc_timeout = dev->dev_dproc_timeout; -+ -+ if (!dproc_timeout) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (dproc_timeout[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, dproc_timeout[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+elan4_route2str (E4_VirtualProcessEntry *route, char *routeStr) -+{ -+ int part = 0; -+ int shift; -+ int broadcast; -+ E4_uint64 value; -+ char *ptr = routeStr; -+ int b; -+ -+ /* unpack first */ -+ value = route->Values[part] & 0x7f; -+ if ( (value & 0x78) == 0) { -+ /* empty route */ -+ strcpy(routeStr,"Invalid lead route"); -+ return (-EINVAL); -+ } -+ -+ if ( value & 0x40 ) { -+ /* broad cast */ -+ strcpy(routeStr,"Broadcast"); -+ return (-EINVAL); -+ } else { -+ switch ((value & 0x30) >> 4) { -+ case 0: { *ptr++ = '0' + (value & 0x7); break; } -+ case 1: { *ptr++ = 'M'; break; } -+ case 2: { *ptr++ = 'U'; break; } -+ case 3: { *ptr++ = 'A'; break; } -+ } -+ } -+ -+ shift = 16; -+ broadcast = 0; -+ while ( 1 ) { -+ b = (route->Values[part] >> shift) & 0xf; -+ -+ if ( broadcast ) { -+ /* about to pick up the second byte of a broadcast pair */ -+ broadcast = 0; -+ } else { -+ if ( b & 0x8) { -+ /* output link */ -+ *ptr++ = '0' + (b & 0x7); -+ } else { -+ if ( b & 0x4) { -+ /* broad cast */ -+ broadcast = 1; -+ } else { -+ switch ( b & 0x3 ) { -+ case 0: { *ptr++ = 0 ; return (0); break; } -+ case 1: { *ptr++ = 'M'; break; } -+ case 2: { *ptr++ = 'U'; break; } -+ case 3: { *ptr++ = 'A'; break; } -+ } -+ } -+ } -+ } -+ -+ shift += 4; -+ if ( part != 0 ) { -+ if ( shift > 36) { -+ /* too far, now in the crc value */ -+ strcpy(routeStr,"Invalid route length"); -+ return (-EINVAL); -+ } -+ } else { -+ if ( shift >= 64) { -+ /* move to the next 64 bits */ -+ part = 1; -+ shift = 2; -+ } -+ } -+ } -+ -+ /* never reached */ -+ return (-EINVAL); -+} -+ -+ -+static int -+proc_read_dproc_timeout_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_dproc_timeout_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ -+ memset(&routestr, 0, 33); -+ -+ kmutex_lock(&dev->dev_lock); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ kmutex_unlock(&dev->dev_lock); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+ -+static int -+proc_read_cproc_timeout_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *cproc_timeout; -+ -+ cproc_timeout = dev->dev_cproc_timeout; -+ -+ if (!cproc_timeout) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (cproc_timeout[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, cproc_timeout[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_cproc_timeout_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_cproc_timeout_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ -+ memset(&routestr, 0, 33); -+ -+ kmutex_lock(&dev->dev_lock); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ kmutex_unlock(&dev->dev_lock); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_traperr_stats (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ unsigned int *ack_errors; -+ -+ ack_errors = dev->dev_ack_errors; -+ -+ if (!ack_errors) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int i; -+ -+ for (i=0; idev_position.pos_nodes; i++) -+ if (ack_errors[i] != 0) -+ p += sprintf (p, "Node %d: %u errors\n", i, ack_errors[i]); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static int -+proc_read_ackerror_routes (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ char *p = page; -+ ELAN4_ROUTE_RINGBUF *ringbuf; -+ char routestr[33]; -+ -+ ringbuf = &dev->dev_ack_error_routes; -+ -+ if (!ringbuf) -+ p += sprintf (p, "No stats available\n"); -+ else -+ { -+ int start; -+ int end; -+ int i; -+ -+ memset(&routestr, 0, 33); -+ -+ kmutex_lock(&dev->dev_lock); -+ -+ start = ringbuf->start; -+ end = ringbuf->end; -+ -+ if (end < start) -+ end = DEV_STASH_ROUTE_COUNT; -+ -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ -+ if (ringbuf->end < start) -+ { -+ start = 0; -+ end = ringbuf->end; -+ for (i=start; iroutes[i], routestr); -+ p += sprintf (p, "Route %llx %llx->%s\n", ringbuf->routes[i].Values[0], ringbuf->routes[i].Values[1], routestr); -+ } -+ } -+ -+ kmutex_unlock(&dev->dev_lock); -+ } -+ -+ return (proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static struct stats_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} stats_info[] = { -+ {"link", proc_read_link_stats, NULL}, -+ {"intr", proc_read_intr_stats, NULL}, -+ {"trap", proc_read_trap_stats, NULL}, -+ {"cproc", proc_read_cproc_trap_stats, NULL}, -+ {"dproc", proc_read_dproc_trap_stats, NULL}, -+ {"eproc", proc_read_eproc_trap_stats, NULL}, -+ {"iproc", proc_read_iproc_trap_stats, NULL}, -+ {"tproc", proc_read_tproc_trap_stats, NULL}, -+ {"sdram", proc_read_sdram_stats, NULL}, -+ {"trapdmaerr", proc_read_traperr_stats, NULL}, -+ {"dproctimeout", proc_read_dproc_timeout_stats, NULL}, -+ {"cproctimeout", proc_read_cproc_timeout_stats, NULL}, -+ {"dproctimeoutroutes", proc_read_dproc_timeout_routes, NULL}, -+ {"cproctimeoutroutes", proc_read_cproc_timeout_routes, NULL}, -+ {"ackerrroutes", proc_read_ackerror_routes, NULL}, -+}; -+ -+static int -+proc_read_sysconfig (char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ E4_uint32 syscontrol = dev->dev_syscontrol; -+ int len = 0; -+ -+ *eof = 1; -+ if (off != 0) -+ return (0); -+ -+ if (syscontrol & CONT_EN_ALL_SETS) -+ len += sprintf (page + len, "%sEN_ALL_SETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_MMU_ENABLE) -+ len += sprintf (page + len, "%sMMU_ENABLE", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_HASH_TABLE) -+ len += sprintf (page + len, "%sCACHE_HASH_TABLE", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_CHAINS) -+ len += sprintf (page + len, "%sCACHE_CHAINS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_ROOT_CNTX) -+ len += sprintf (page + len, "%sCACHE_ROOT_CNTX", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_STEN_ROUTES) -+ len += sprintf (page + len, "%sCACHE_STEN_ROUTES", len == 0 ? "" : " "); -+ if (syscontrol & CONT_CACHE_DMA_ROUTES) -+ len += sprintf (page + len, "%sCACHE_DMA_ROUTES", len == 0 ? "" : " "); -+ if (syscontrol & CONT_INHIBIT_MAX_CHAIN_ITEMS) -+ len += sprintf (page + len, "%sINHIBIT_MAX_CHAIN_ITEMS", len == 0 ? "" : " "); -+ -+ len += sprintf (page + len, "%sTABLE0_MASK_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE0_MASK_SIZE_SHIFT) & PAGE_MASK_MASK); -+ len += sprintf (page + len, "%sTABLE0_PAGE_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE0_PAGE_SIZE_SHIFT) & PAGE_SIZE_MASK); -+ len += sprintf (page + len, "%sTABLE1_MASK_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE1_MASK_SIZE_SHIFT) & PAGE_MASK_MASK); -+ len += sprintf (page + len, "%sTABLE1_PAGE_SIZE=%d", len == 0 ? "" : " ", (syscontrol >> CONT_TABLE1_PAGE_SIZE_SHIFT) & PAGE_SIZE_MASK); -+ -+ if (syscontrol & CONT_2K_NOT_1K_DMA_PACKETS) -+ len += sprintf (page + len, "%s2K_NOT_1K_DMA_PACKETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_ALIGN_ALL_DMA_PACKETS) -+ len += sprintf (page + len, "%sALIGN_ALL_DMA_PACKETS", len == 0 ? "" : " "); -+ if (syscontrol & CONT_DIRECT_MAP_PCI_WRITES) -+ len += sprintf (page + len, "%sDIRECT_MAP_PCI_WRITES", len == 0 ? "" : " "); -+ -+ len += sprintf (page + len, "\n"); -+ -+ *start = page; -+ return (len); -+} -+ -+static int -+proc_write_sysconfig (struct file *file, const char *ubuffer, unsigned long count, void *data) -+{ -+ ELAN4_DEV *dev = (ELAN4_DEV *) data; -+ unsigned long page = __get_free_page (GFP_KERNEL); -+ char *buffer = (char *)page; -+ int add = 0; -+ int sub = 0; -+ -+ count = MIN (count, PAGE_SIZE - 1); -+ if (copy_from_user (buffer, ubuffer, count)) -+ { -+ free_page (page); -+ return (-EFAULT); -+ } -+ -+ buffer[count] = 0; /* terminate string */ -+ -+ while (*buffer != 0) -+ { -+ char *ptr; -+ char *end; -+ int ch; -+ int val; -+ int op; -+ -+ ch = *buffer; -+ if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') -+ { -+ buffer++; -+ continue; -+ } -+ -+ op = *buffer; -+ if (op == '+' || op == '-') -+ buffer++; -+ -+ for (end = buffer; *end != 0; end++) -+ if (*end == ' ' || *end == '\t' || -+ *end == '\r' || *end == '\n') -+ break; -+ -+ if (end == buffer) -+ break; -+ -+ ch = *end; -+ *end = 0; -+ -+ for (ptr = buffer; *ptr != 0; ptr++) -+ if ('a' <= *ptr && *ptr <= 'z') -+ *ptr = *ptr + 'A' - 'a'; -+ -+ if (!strcmp (buffer, "EN_ALL_SETS")) -+ val = CONT_EN_ALL_SETS; -+ if (!strcmp (buffer, "CACHE_HASH_TABLE")) -+ val = CONT_CACHE_HASH_TABLE; -+ else if (!strcmp (buffer, "CACHE_CHAINS")) -+ val = CONT_CACHE_CHAINS; -+ else if (!strcmp (buffer, "CACHE_ROOT_CNTX")) -+ val = CONT_CACHE_ROOT_CNTX; -+ else if (!strcmp (buffer, "CACHE_STEN_ROUTES")) -+ val = CONT_CACHE_STEN_ROUTES; -+ else if (!strcmp (buffer, "CACHE_DMA_ROUTES")) -+ val = CONT_CACHE_DMA_ROUTES; -+ else if (!strcmp (buffer, "2K_NOT_1K_DMA_PACKETS")) -+ val = CONT_2K_NOT_1K_DMA_PACKETS; -+ else if (!strcmp (buffer, "ALIGN_ALL_DMA_PACKETS")) -+ val = CONT_ALIGN_ALL_DMA_PACKETS; -+ else -+ val = 0; -+ -+ if (op == '+') -+ add |= val; -+ else if (op == '-') -+ sub |= val; -+ -+ *end = ch; -+ buffer = end; -+ } -+ -+ if ((add | sub) & CONT_EN_ALL_SETS) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ CHANGE_SYSCONTROL (dev, add, sub); -+ -+ if ((add | sub) & CONT_EN_ALL_SETS) -+ elan4_sdram_flushcache (dev, 0, E4_CacheSize); -+ -+ free_page (page); -+ return (count); -+} -+ -+static struct config_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} config_info[] = { -+ {"sysconfig", proc_read_sysconfig, proc_write_sysconfig}, -+}; -+ -+void -+elan4_procfs_device_init (ELAN4_DEV *dev) -+{ -+ struct proc_dir_entry *p; -+ char name[NAME_MAX]; -+ int i; -+ -+ sprintf (name, "device%d", dev->dev_instance); -+ dev->dev_osdep.procdir = proc_mkdir (name, elan4_procfs_root); -+ -+ for (i = 0; i < sizeof (device_info)/sizeof (device_info[0]); i++) -+ { -+ if (dev->dev_devinfo.dev_revision_id < device_info[i].minrev) -+ continue; -+ -+ if ((p = create_proc_entry (device_info[i].name, 0, dev->dev_osdep.procdir)) != NULL) -+ { -+ p->read_proc = device_info[i].read_func; -+ p->write_proc = device_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ dev->dev_osdep.configdir = proc_mkdir ("config", dev->dev_osdep.procdir); -+ for (i = 0; i < sizeof (config_info)/sizeof (config_info[0]); i++) -+ { -+ if ((p = create_proc_entry (config_info[i].name, 0, dev->dev_osdep.configdir)) != NULL) -+ { -+ p->read_proc = config_info[i].read_func; -+ p->write_proc = config_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ dev->dev_osdep.statsdir = proc_mkdir ("stats", dev->dev_osdep.procdir); -+ for (i = 0; i < sizeof (stats_info)/sizeof (stats_info[0]); i++) -+ { -+ if ((p = create_proc_entry (stats_info[i].name, 0, dev->dev_osdep.statsdir)) != NULL) -+ { -+ p->read_proc = stats_info[i].read_func; -+ p->write_proc = stats_info[i].write_func; -+ p->data = dev; -+ p->owner = THIS_MODULE; -+ } -+ } -+} -+ -+void -+elan4_procfs_device_fini (ELAN4_DEV *dev) -+{ -+ char name[NAME_MAX]; -+ int i; -+ -+ for (i = 0; i < sizeof (stats_info)/sizeof (stats_info[0]); i++) -+ remove_proc_entry (stats_info[i].name, dev->dev_osdep.statsdir); -+ remove_proc_entry ("stats", dev->dev_osdep.procdir); -+ -+ for (i = 0; i < sizeof (config_info)/sizeof (config_info[0]); i++) -+ remove_proc_entry (config_info[i].name, dev->dev_osdep.configdir); -+ remove_proc_entry ("config", dev->dev_osdep.procdir); -+ -+ for (i = 0; i < sizeof (device_info)/sizeof (device_info[0]); i++) -+ { -+ if (dev->dev_devinfo.dev_revision_id < device_info[i].minrev) -+ continue; -+ -+ remove_proc_entry (device_info[i].name, dev->dev_osdep.procdir); -+ } -+ -+ sprintf (name, "device%d", dev->dev_instance); -+ remove_proc_entry (name, elan4_procfs_root); -+} -+ -+void -+elan4_procfs_init(void) -+{ -+ elan4_procfs_root = proc_mkdir("elan4", qsnet_procfs_root); -+ elan4_config_root = proc_mkdir("config", elan4_procfs_root); -+ -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug", &elan4_debug, 0); -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug_toconsole", &elan4_debug_toconsole, 0); -+ qsnet_proc_register_hex (elan4_config_root, "elan4_debug_tobuffer", &elan4_debug_tobuffer, 0); -+ qsnet_proc_register_int (elan4_config_root, "elan4_debug_mmu", &elan4_debug_mmu, 0); -+ qsnet_proc_register_int (elan4_config_root, "elan4_mainint_punt_loops", &elan4_mainint_punt_loops, 0); -+ qsnet_proc_register_hex (elan4_config_root, "user_p2p_route_options", &user_p2p_route_options, 0); -+ qsnet_proc_register_hex (elan4_config_root, "user_bcast_route_options", &user_bcast_route_options, 0); -+ qsnet_proc_register_int (elan4_config_root, "user_dproc_retry_count", &user_dproc_retry_count, 0); -+ qsnet_proc_register_int (elan4_config_root, "user_cproc_retry_count", &user_cproc_retry_count, 0); -+ qsnet_proc_register_int (elan4_config_root, "num_fault_save", &num_fault_save, 0); -+ qsnet_proc_register_int (elan4_config_root, "min_fault_pages", &min_fault_pages, 0); -+ qsnet_proc_register_int (elan4_config_root, "max_fault_pages", &max_fault_pages, 0); -+} -+ -+void -+elan4_procfs_fini(void) -+{ -+ remove_proc_entry ("max_fault_pages", elan4_config_root); -+ remove_proc_entry ("min_fault_pages", elan4_config_root); -+ remove_proc_entry ("num_fault_save", elan4_config_root); -+ remove_proc_entry ("user_cproc_retry_count", elan4_config_root); -+ remove_proc_entry ("user_dproc_retry_count", elan4_config_root); -+ remove_proc_entry ("user_bcast_route_options", elan4_config_root); -+ remove_proc_entry ("user_p2p_route_options", elan4_config_root); -+ remove_proc_entry ("elan4_mainint_punt_loops", elan4_config_root); -+ remove_proc_entry ("elan4_debug_mmu", elan4_config_root); -+ remove_proc_entry ("elan4_debug_tobuffer", elan4_config_root); -+ remove_proc_entry ("elan4_debug_toconsole", elan4_config_root); -+ remove_proc_entry ("elan4_debug", elan4_config_root); -+ -+ remove_proc_entry ("config", elan4_procfs_root); -+ remove_proc_entry ("elan4", qsnet_procfs_root); -+} -+ -+EXPORT_SYMBOL(elan4_procfs_root); -+EXPORT_SYMBOL(elan4_config_root); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/quadrics_version.h 2005-07-28 14:52:52.835680712 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/regions.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/regions.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/regions.c 2005-07-28 14:52:52.836680560 -0400 -@@ -0,0 +1,609 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: regions.c,v 1.18.2.1 2004/11/18 11:31:08 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/regions.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+/*================================================================================*/ -+/* elan address region management */ -+USER_RGN * -+user_findrgn_elan (USER_CTXT *uctx, E4_Addr addr, int tail) -+{ -+ USER_RGN *rgn; -+ USER_RGN *hirgn; -+ USER_RGN *lorgn; -+ E4_Addr base; -+ E4_Addr lastaddr; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) || kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (uctx->uctx_ergns == NULL) -+ return (NULL); -+ -+ rgn = uctx->uctx_ergnlast; -+ if (rgn == NULL) -+ rgn = uctx->uctx_ergns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_ebase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = uctx->uctx_etail; -+ -+ if ((lastaddr = (hirgn->rgn_ebase + hirgn->rgn_len - 1)) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_enext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = uctx->uctx_ergns; -+ -+ if (lorgn->rgn_ebase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_ebase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_ebase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_enext; -+ -+ if (rgn->rgn_ebase <= addr) -+ uctx->uctx_ergnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_ebase > addr) -+ rgn = rgn->rgn_eprev; -+ -+ if ((rgn->rgn_ebase + rgn->rgn_len - 1) < addr) -+ return (rgn->rgn_enext); -+ else -+ { -+ uctx->uctx_ergnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+static int -+user_addrgn_elan (USER_CTXT *uctx, USER_RGN *nrgn) -+{ -+ USER_RGN *rgn = user_findrgn_elan (uctx, nrgn->rgn_ebase, 1); -+ E4_Addr nbase = nrgn->rgn_ebase; -+ E4_Addr ntop = nbase + nrgn->rgn_len - 1; -+ E4_Addr base; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (rgn == NULL) -+ { -+ uctx->uctx_ergns = uctx->uctx_etail = nrgn; -+ nrgn->rgn_enext = nrgn->rgn_eprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_ebase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_eprev = rgn; /* of list */ -+ nrgn->rgn_enext = NULL; -+ rgn->rgn_enext = uctx->uctx_etail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) /* overlapping region */ -+ return (-1); -+ -+ nrgn->rgn_enext = rgn; /* insert before region */ -+ nrgn->rgn_eprev = rgn->rgn_eprev; -+ rgn->rgn_eprev = nrgn; -+ if (uctx->uctx_ergns == rgn) -+ uctx->uctx_ergns = nrgn; -+ else -+ nrgn->rgn_eprev->rgn_enext = nrgn; -+ } -+ } -+ uctx->uctx_ergnlast = nrgn; -+ -+ return (0); -+} -+ -+static USER_RGN * -+user_removergn_elan (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ uctx->uctx_ergnlast = rgn->rgn_enext; -+ if (rgn == uctx->uctx_etail) -+ uctx->uctx_etail = rgn->rgn_eprev; -+ else -+ rgn->rgn_enext->rgn_eprev = rgn->rgn_eprev; -+ -+ if (rgn == uctx->uctx_ergns) -+ uctx->uctx_ergns = rgn->rgn_enext; -+ else -+ rgn->rgn_eprev->rgn_enext = rgn->rgn_enext; -+ -+ return (rgn); -+} -+ -+USER_RGN * -+user_rgnat_elan (USER_CTXT *uctx, E4_Addr addr) -+{ -+ USER_RGN *rgn = user_findrgn_elan (uctx, addr, 0); -+ -+ if (rgn != NULL && rgn->rgn_ebase <= addr && addr <= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ return (rgn); -+ -+ return (NULL); -+} -+ -+/* main address region management */ -+USER_RGN * -+user_findrgn_main (USER_CTXT *uctx, virtaddr_t addr, int tail) -+{ -+ USER_RGN *rgn; -+ USER_RGN *hirgn; -+ USER_RGN *lorgn; -+ virtaddr_t lastaddr; -+ virtaddr_t base; -+ int forward; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) || kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (uctx->uctx_mrgns == NULL) -+ return (NULL); -+ -+ rgn = uctx->uctx_mrgnlast; -+ if (rgn == NULL) -+ rgn = uctx->uctx_mrgns; -+ -+ forward = 0; -+ if ((base = rgn->rgn_mbase) < addr) -+ { -+ if (addr <= (base + rgn->rgn_len - 1)) -+ return (rgn); /* ergnlast contained addr */ -+ -+ hirgn = uctx->uctx_mtail; -+ if ((lastaddr = hirgn->rgn_mbase + hirgn->rgn_len - 1) < addr) -+ return (tail ? hirgn : NULL); /* addr is out of range */ -+ -+ if ((addr - base) > (lastaddr - addr)) -+ rgn = hirgn; -+ else -+ { -+ rgn = rgn->rgn_mnext; -+ forward++; -+ } -+ } -+ else -+ { -+ lorgn = uctx->uctx_mrgns; -+ if (lorgn->rgn_mbase > addr) -+ return (lorgn); /* lowest regions is higher than addr */ -+ if ((addr - lorgn->rgn_mbase) < (base - addr)) -+ { -+ rgn = lorgn; /* search forward from head */ -+ forward++; -+ } -+ } -+ if (forward) -+ { -+ while ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ rgn = rgn->rgn_mnext; -+ -+ if (rgn->rgn_mbase <= addr) -+ uctx->uctx_mrgnlast = rgn; -+ return (rgn); -+ } -+ else -+ { -+ while (rgn->rgn_mbase > addr) -+ rgn = rgn->rgn_mprev; -+ -+ if ((rgn->rgn_mbase + rgn->rgn_len - 1) < addr) -+ return (rgn->rgn_mnext); -+ else -+ { -+ uctx->uctx_mrgnlast = rgn; -+ return (rgn); -+ } -+ } -+} -+ -+static int -+user_addrgn_main (USER_CTXT *uctx, USER_RGN *nrgn) -+{ -+ USER_RGN *rgn = user_findrgn_main (uctx, nrgn->rgn_mbase, 1); -+ virtaddr_t nbase = nrgn->rgn_mbase; -+ virtaddr_t ntop = nbase + nrgn->rgn_len - 1; -+ virtaddr_t base; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ if (rgn == NULL) -+ { -+ uctx->uctx_mrgns = uctx->uctx_mtail = nrgn; -+ nrgn->rgn_mnext = nrgn->rgn_mprev = NULL; -+ } -+ else -+ { -+ base = rgn->rgn_mbase; -+ -+ if ((base + rgn->rgn_len - 1) < nbase) /* top of region below requested address */ -+ { /* so insert after region (and hence at end */ -+ nrgn->rgn_mprev = rgn; /* of list */ -+ nrgn->rgn_mnext = NULL; -+ rgn->rgn_mnext = uctx->uctx_mtail = nrgn; -+ } -+ else -+ { -+ if (nbase >= base || ntop >= base) /* overlapping region */ -+ return (-1); -+ -+ nrgn->rgn_mnext = rgn; /* insert before region */ -+ nrgn->rgn_mprev = rgn->rgn_mprev; -+ rgn->rgn_mprev = nrgn; -+ if (uctx->uctx_mrgns == rgn) -+ uctx->uctx_mrgns = nrgn; -+ else -+ nrgn->rgn_mprev->rgn_mnext = nrgn; -+ } -+ } -+ uctx->uctx_mrgnlast = nrgn; -+ -+ return (0); -+} -+ -+static USER_RGN * -+user_removergn_main (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_rgnlock) && kmutex_is_locked (&uctx->uctx_rgnmutex)); -+ -+ uctx->uctx_mrgnlast = rgn->rgn_mnext; -+ if (rgn == uctx->uctx_mtail) -+ uctx->uctx_mtail = rgn->rgn_mprev; -+ else -+ rgn->rgn_mnext->rgn_mprev = rgn->rgn_mprev; -+ -+ if (rgn == uctx->uctx_mrgns) -+ uctx->uctx_mrgns = rgn->rgn_mnext; -+ else -+ rgn->rgn_mprev->rgn_mnext = rgn->rgn_mnext; -+ -+ return (rgn); -+} -+ -+/* Remove whole region from both lists */ -+static void -+user_removergn (USER_CTXT *uctx, USER_RGN *rgn) -+{ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, rgn->rgn_ebase, rgn->rgn_len); -+ -+ user_removergn_elan (uctx, rgn); -+ user_removergn_main (uctx, rgn); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ KMEM_FREE (rgn, sizeof (USER_RGN)); -+} -+ -+/* Remove all allocated regions */ -+void -+user_freergns (USER_CTXT *uctx) -+{ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ while (uctx->uctx_mrgns) -+ user_removergn(uctx, uctx->uctx_mrgns); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ ASSERT (uctx->uctx_ergns == NULL); -+} -+ -+USER_RGN * -+user_rgnat_main (USER_CTXT *uctx, virtaddr_t addr) -+{ -+ USER_RGN *rgn = user_findrgn_main (uctx, addr, 0); -+ -+ if (rgn != NULL && rgn->rgn_mbase <= addr && addr <= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ return (rgn); -+ return (NULL); -+} -+ -+int -+user_setperm (USER_CTXT *uctx, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, unsigned perm) -+{ -+ USER_RGN *nrgn; -+ -+ PRINTF4 (uctx, DBG_PERM, "user_setperm: user %lx elan %llx len %lx perm %x\n", maddr, (long long) eaddr, len, perm); -+ -+ if ((maddr & PAGEOFFSET) || (eaddr & PAGEOFFSET) || (len & PAGEOFFSET)) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: alignment failure\n"); -+ return (-EINVAL); -+ } -+ -+ if ((maddr + len - 1) <= maddr || (eaddr + len - 1) <= eaddr) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: range failure\n"); -+ return (-EINVAL); -+ } -+ -+ KMEM_ALLOC (nrgn, USER_RGN *, sizeof (USER_RGN), 1); -+ -+ if (nrgn == NULL) -+ return (-ENOMEM); -+ -+ nrgn->rgn_mbase = maddr; -+ nrgn->rgn_ebase = eaddr; -+ nrgn->rgn_len = len; -+ nrgn->rgn_perm = perm; -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if (user_addrgn_elan (uctx, nrgn) < 0) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: elan address exists\n"); -+ spin_unlock (&uctx->uctx_rgnlock); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ KMEM_FREE (nrgn, sizeof (USER_RGN)); -+ return (-EINVAL); -+ } -+ -+ if (user_addrgn_main (uctx, nrgn) < 0) -+ { -+ PRINTF0 (uctx, DBG_PERM, "user_setperm: main address exists\n"); -+ user_removergn_elan (uctx, nrgn); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ KMEM_FREE (nrgn, sizeof (USER_RGN)); -+ return (-EINVAL); -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ if ((perm & PERM_Preload)) -+ user_preload_main (uctx, maddr, len); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ return (0); -+} -+ -+void -+user_clrperm (USER_CTXT *uctx, E4_Addr addr, unsigned long len) -+{ -+ E4_Addr raddr; -+ E4_Addr rtop; -+ USER_RGN *nrgn; -+ USER_RGN *rgn; -+ USER_RGN *rgn_next; -+ unsigned long ssize; -+ int res; -+ -+ PRINTF2 (uctx, DBG_PERM, "user_clrperm: elan %llx len %lx\n", addr, len); -+ -+ raddr = (addr & PAGEMASK); -+ rtop = ((addr + len - 1) & PAGEMASK) + (PAGESIZE-1); -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ for (rgn = user_findrgn_elan (uctx, addr, 0); rgn != NULL; rgn = rgn_next) -+ { -+ if (rtop < rgn->rgn_ebase) /* rtop was in a gap */ -+ break; -+ -+ rgn_next = rgn->rgn_enext; /* Save next region pointer */ -+ -+ PRINTF (uctx, DBG_PERM, " elan %llx->%llx main %p->%p\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1, -+ rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len-1); -+ -+ if (raddr <= rgn->rgn_ebase && rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* whole region is cleared */ -+ -+ PRINTF (uctx, DBG_PERM, " whole region\n"); -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1); -+ user_removergn (uctx, rgn); -+ } -+ else if (raddr <= rgn->rgn_ebase) -+ { -+ /* clearing at beginning, so shrink size and increment base ptrs */ -+ ssize = rtop - rgn->rgn_ebase + 1; -+ -+ PRINTF (uctx, DBG_PERM, " clear at beginning %x\n", ssize); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", rgn->rgn_ebase, rgn->rgn_ebase + ssize-1); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, rgn->rgn_ebase, ssize); -+ -+ rgn->rgn_mbase += ssize; -+ rgn->rgn_ebase += ssize; -+ rgn->rgn_len -= ssize; -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ else if (rtop >= (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ /* clearing at end, so just shrink length of region */ -+ ssize = (rgn->rgn_ebase + rgn->rgn_len - 1) - raddr + 1; -+ -+ PRINTF (uctx, DBG_PERM, " clear at end %x\n", ssize); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", raddr, raddr+ssize-1); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, raddr, ssize); -+ -+ rgn->rgn_len -= ssize; -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ else -+ { -+ /* the section to go is in the middle, so need to */ -+ /* split it into two regions */ -+ KMEM_ALLOC (nrgn, USER_RGN *, sizeof (USER_RGN), 1); -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_PERM, " unload elan %llx->%llx\n", raddr, rtop); -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* XXXX tbl */, raddr, rtop - raddr + 1); -+ -+ nrgn->rgn_mbase = rgn->rgn_mbase + (rtop - rgn->rgn_ebase + 1); -+ nrgn->rgn_ebase = rtop + 1; -+ nrgn->rgn_len = (rgn->rgn_ebase + rgn->rgn_len - 1) - rtop; -+ nrgn->rgn_perm = rgn->rgn_perm; -+ -+ PRINTF (uctx, DBG_PERM, " new elan %llx->%llx main %p->%p\n", -+ nrgn->rgn_ebase, nrgn->rgn_ebase + nrgn->rgn_len-1, -+ nrgn->rgn_mbase, nrgn->rgn_mbase + nrgn->rgn_len-1); -+ -+ rgn->rgn_len = (raddr - rgn->rgn_ebase); /* shrink original region */ -+ -+ PRINTF (uctx, DBG_PERM, " old elan %llx->%llx main %p->%p\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + rgn->rgn_len-1, -+ rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len-1); -+ -+ res = user_addrgn_elan (uctx, nrgn); /* insert new region */ -+ ASSERT (res == 0); /* which cannot fail */ -+ -+ res = user_addrgn_main (uctx, nrgn); -+ ASSERT (res == 0); -+ -+ spin_unlock(&uctx->uctx_rgnlock); -+ } -+ } -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+} -+ -+int -+user_checkperm (USER_CTXT *uctx, E4_Addr raddr, unsigned long rsize, unsigned access) -+{ -+ USER_RGN *rgn; -+ -+ PRINTF3 (uctx, DBG_PERM, "user_checkperm: elan %lx len %lx access %x\n", raddr, rsize, access); -+ -+ if ((raddr + rsize - 1) < raddr) -+ return (-ENOMEM); -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ if ((rgn = user_rgnat_elan (uctx, raddr)) == (USER_RGN *) NULL) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-ENOMEM); -+ } -+ else -+ { -+ register int ssize; -+ -+ for (; rsize != 0; rsize -= ssize, raddr += ssize) -+ { -+ if (raddr > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ { -+ rgn = rgn->rgn_enext; -+ -+ if (rgn == NULL || raddr != rgn->rgn_ebase) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-ENOMEM); -+ } -+ } -+ if ((raddr + rsize - 1) > (rgn->rgn_ebase + rgn->rgn_len - 1)) -+ ssize = ((rgn->rgn_ebase + rgn->rgn_len - 1) - raddr) + 1; -+ else -+ ssize = rsize; -+ -+ PRINTF4 (uctx, DBG_PERM, "user_checkperm : rgn %lx -> %lx perm %x access %x\n", -+ rgn->rgn_ebase, rgn->rgn_ebase + (E4_Addr)rgn->rgn_len, rgn->rgn_perm, access); -+ -+ if (ELAN4_INCOMPAT_ACCESS (rgn->rgn_perm, access)) -+ { -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EACCES); -+ } -+ } -+ } -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ return (0); -+} -+ -+virtaddr_t -+user_elan2main (USER_CTXT *uctx, E4_Addr addr) -+{ -+ USER_RGN *rgn; -+ virtaddr_t raddr; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if ((rgn = user_rgnat_elan (uctx, addr)) == (USER_RGN *) NULL) -+ raddr = (virtaddr_t) 0; -+ else -+ raddr = rgn->rgn_mbase + (addr - rgn->rgn_ebase); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ return (raddr); -+} -+ -+E4_Addr -+user_main2elan (USER_CTXT *uctx, virtaddr_t addr) -+{ -+ USER_RGN *rgn; -+ E4_Addr raddr; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ if ((rgn = user_rgnat_main (uctx, addr)) == (USER_RGN *) NULL) -+ raddr = (virtaddr_t) 0; -+ else -+ raddr = rgn->rgn_ebase + (addr - rgn->rgn_mbase); -+ -+ spin_unlock (&uctx->uctx_rgnlock); -+ -+ return (raddr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/routetable.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/routetable.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/routetable.c 2005-07-28 14:52:52.837680408 -0400 -@@ -0,0 +1,249 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: routetable.c,v 1.15 2004/07/20 09:29:40 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/routetable.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+ELAN4_ROUTE_TABLE * -+elan4_alloc_routetable (ELAN4_DEV *dev, unsigned size) -+{ -+ ELAN4_ROUTE_TABLE *tbl; -+ -+ KMEM_ZALLOC (tbl, ELAN4_ROUTE_TABLE *, sizeof (ELAN4_ROUTE_TABLE), 1); -+ -+ if (tbl == (ELAN4_ROUTE_TABLE *) NULL) -+ return (NULL); -+ -+ tbl->tbl_size = (size & E4_VPT_SIZE_MASK); -+ tbl->tbl_entries = elan4_sdram_alloc (dev, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ if (tbl->tbl_entries == 0) -+ { -+ KMEM_FREE (tbl, sizeof (ELAN4_ROUTE_TABLE)); -+ return ((ELAN4_ROUTE_TABLE *) NULL); -+ } -+ -+ spin_lock_init (&tbl->tbl_lock); -+ -+ /* zero the route table */ -+ elan4_sdram_zeroq_sdram (dev, tbl->tbl_entries, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ return (tbl); -+} -+ -+void -+elan4_free_routetable (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl) -+{ -+ elan4_sdram_free (dev, tbl->tbl_entries, (E4_VPT_MIN_ENTRIES << tbl->tbl_size) * sizeof (E4_VirtualProcessEntry)); -+ -+ spin_lock_destroy (&tbl->tbl_lock); -+ -+ KMEM_FREE (tbl, sizeof (ELAN4_ROUTE_TABLE)); -+} -+ -+void -+elan4_write_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1]), entry->Values[1]); -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0]), entry->Values[0]); -+ pioflush_sdram (dev); -+} -+ -+void -+elan4_read_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ entry->Values[0] = elan4_sdram_readq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0])); -+ entry->Values[1] = elan4_sdram_readq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1])); -+} -+ -+void -+elan4_invalidate_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp) -+{ -+ ASSERT (vp < (E4_VPT_MIN_ENTRIES << tbl->tbl_size)); -+ -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[0]), 0); -+ elan4_sdram_writeq (dev, tbl->tbl_entries + (vp * sizeof (E4_VirtualProcessEntry)) + offsetof (E4_VirtualProcessEntry, Values[1]), 0); -+ pioflush_sdram (dev); -+} -+ -+static void -+pack_them_routes (E4_VirtualProcessEntry *entry, E4_uint16 first, E4_uint8 *packed, unsigned ctx) -+{ -+ E4_uint64 value0 = first; -+ E4_uint64 value1 = ROUTE_CTXT_VALUE(ctx); -+ E4_uint32 ThirdRouteBCastVal; -+ register int i; -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ value0 |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ value1 |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ /* DMA fix for large broadcast route values that fall into the double issue of route value 3 bug. */ -+ /* NOTE - this is only required when the link is running in Mod45 mode, it could be automatically -+ * disabled when Mod44 is detected */ -+ -+ /* First seach for the alignment type. The bug is only sensitive to an odd bcast aligment on the 3rd word. */ -+ for (i=4;i<16;i++) -+ if (((value0 >> (i*4)) & 0xc) == 4) -+ i++; -+ -+ if (i == 17) -+ { -+ ThirdRouteBCastVal = value1 & 0xcccccccc; -+ if (((value1 & 0xfffff0000000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x04444444)) -+ value1 |= 0x140000000ULL; -+ else if (((value1 & 0xfffffff00000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00044444)) -+ value1 |= 0x1400000ULL; -+ else if (((value1 & 0xfffffffff000ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00000444)) -+ value1 |= 0x14000ULL; -+ else if (((value1 & 0xfffffffffff0ULL) == 0ULL) && (ThirdRouteBCastVal == 0x00000004)) -+ value1 |= 0x140ULL; -+ } -+ -+ entry->Values[0] = value0; -+ entry->Values[1] = value1; -+} -+ -+int -+elan4_generate_route (ELAN_POSITION *pos, E4_VirtualProcessEntry *route, unsigned ctx, unsigned lowid, unsigned highid, unsigned options) -+{ -+ unsigned int broadcast = (lowid != highid); -+ unsigned int noadaptive = 0; -+ int padbcast = 0; -+ E4_uint16 first; -+ int rb; -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ int level, llink, hlink; -+ -+ regenerate_routes: -+ first = 0; -+ rb = 0; -+ -+ switch (pos->pos_mode) -+ { -+ case ELAN_POS_MODE_LOOPBACK: -+ if (lowid != highid || lowid != pos->pos_nodeid) -+ return (-EINVAL); -+ -+ route->Values[0] = FIRST_MYLINK; -+ route->Values[1] = ROUTE_CTXT_VALUE (ctx); -+ return (0); -+ -+ case ELAN_POS_MODE_BACKTOBACK: -+ if (lowid != highid || lowid == pos->pos_nodeid) -+ return (-EINVAL); -+ -+ route->Values[0] = FIRST_MYLINK; -+ route->Values[1] = ROUTE_CTXT_VALUE (ctx); -+ return (0); -+ -+ case ELAN_POS_MODE_SWITCHED: -+ { -+ unsigned char *arityp = &pos->pos_arity[pos->pos_levels - 1]; -+ unsigned int spanned = *arityp; -+ unsigned int broadcasting = 0; -+ -+ bzero (packed, sizeof (packed)); -+ -+ /* XXXX compute noadaptive ? */ -+ -+ for (level = 0; -+ level < pos->pos_levels && ! ((pos->pos_nodeid / spanned) == (lowid / spanned) && -+ (pos->pos_nodeid / spanned) == (highid / spanned)); -+ level++, spanned *= *(--arityp)) -+ { -+ if (first == 0) -+ first = (broadcast || noadaptive) ? FIRST_BCAST_TREE : FIRST_ADAPTIVE; -+ else if (broadcast && padbcast) -+ { -+ padbcast = 0; -+ packed[rb++] = PACKED_BCAST0(4, 4); -+ packed[rb++] = PACKED_BCAST1(4, 4); -+ } -+ else -+ packed[rb++] = (broadcast || noadaptive) ? PACKED_BCAST_TREE : PACKED_ADAPTIVE; -+ } -+ -+ while (level >= 0) -+ { -+ spanned /= *arityp; -+ -+ llink = (lowid / spanned) % *arityp; -+ hlink = (highid / spanned) % *arityp; -+ -+ if (llink != hlink || broadcasting) -+ { -+ broadcasting = 1; -+ -+ if (first == 0) -+ first = FIRST_BCAST (hlink, llink); -+ else -+ { -+ packed[rb++] = PACKED_BCAST0(hlink, llink); -+ -+ if ((rb % 4) == 0 && PACKED_BCAST1(hlink, llink) == 0) -+ { -+ padbcast = 1; -+ goto regenerate_routes; -+ } -+ -+ packed[rb++] = PACKED_BCAST1(hlink, llink); -+ } -+ } -+ else -+ { -+ if (first == 0) -+ first = FIRST_ROUTE(llink); -+ else -+ packed[rb++] = PACKED_ROUTE(llink); -+ } -+ -+ level--; -+ arityp++; -+ } -+ -+ pack_them_routes (route, first | (options & FIRST_OPTIONS_MASK), packed, ctx); -+ return (0); -+ } -+ } -+ -+ return (-EINVAL); -+} -+ -+int -+elan4_check_route (ELAN_POSITION *postiion, ELAN_LOCATION location, E4_VirtualProcessEntry *route, unsigned flags) -+{ -+ /* XXXX - TBD */ -+ return (0); -+} -+ -+EXPORT_SYMBOL(elan4_alloc_routetable); -+EXPORT_SYMBOL(elan4_free_routetable); -+EXPORT_SYMBOL(elan4_write_route); -+EXPORT_SYMBOL(elan4_read_route); -+EXPORT_SYMBOL(elan4_invalidate_route); -+EXPORT_SYMBOL(elan4_generate_route); -+EXPORT_SYMBOL(elan4_check_route); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/sdram.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/sdram.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/sdram.c 2005-07-28 14:52:52.839680104 -0400 -@@ -0,0 +1,1039 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: sdram.c,v 1.29.6.4 2005/03/03 16:30:45 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/sdram.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+EXPORT_SYMBOL_GPL(elan4_sdram_readb); -+EXPORT_SYMBOL_GPL(elan4_sdram_readw); -+EXPORT_SYMBOL_GPL(elan4_sdram_readl); -+EXPORT_SYMBOL_GPL(elan4_sdram_readq); -+EXPORT_SYMBOL_GPL(elan4_sdram_writeb); -+EXPORT_SYMBOL_GPL(elan4_sdram_writew); -+EXPORT_SYMBOL_GPL(elan4_sdram_writel); -+EXPORT_SYMBOL_GPL(elan4_sdram_writeq); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerob_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerow_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zerol_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_zeroq_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyb_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyw_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyl_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyq_from_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyb_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyw_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyl_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_copyq_to_sdram); -+EXPORT_SYMBOL_GPL(elan4_sdram_alloc); -+EXPORT_SYMBOL_GPL(elan4_sdram_free); -+EXPORT_SYMBOL_GPL(elan4_sdram_flushcache); -+ -+#define SDRAM_MIN_BANK_SIZE ((1 << 15) * 8) /* 256 Kbytes */ -+ -+static inline ELAN4_SDRAM_BANK * -+sdramaddr_to_bank (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ register int i; -+ -+ for (i = 0; i < dev->dev_sdram_numbanks; i++) -+ { -+ ELAN4_SDRAM_BANK *bank = &dev->dev_sdram_banks[i]; -+ -+ if (saddr >= bank->b_base && saddr < (bank->b_base + bank->b_size)) -+ return (bank); -+ } -+ printk ("sdramaddr_to_bank: sdram address %lx not in a sdram bank\n", saddr); -+ BUG(); -+ -+ return (NULL); /* NOTREACHED */ -+} -+ -+static inline int -+sdramaddr_to_bankoffset (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ return (saddr & (sdramaddr_to_bank (dev, saddr)->b_size-1)); -+} -+ -+static inline int -+sdramaddr_to_bit(ELAN4_DEV *dev, int indx, sdramaddr_t saddr) -+{ -+ return (sdramaddr_to_bankoffset(dev, saddr) >> (SDRAM_MIN_BLOCK_SHIFT+(indx))); -+} -+ -+static inline ioaddr_t -+sdramaddr_to_ioaddr (ELAN4_DEV *dev, sdramaddr_t saddr) -+{ -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, saddr); -+ -+ return (bank->b_ioaddr + (saddr - bank->b_base)); -+} -+ -+unsigned char -+elan4_sdram_readb (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readb (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned short -+elan4_sdram_readw (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readw (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned int -+elan4_sdram_readl (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readl (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+unsigned long long -+elan4_sdram_readq (ELAN4_DEV *dev, sdramaddr_t off) -+{ -+ return (__elan4_readq (dev, sdramaddr_to_ioaddr(dev, off))); -+} -+ -+void -+elan4_sdram_writeb (ELAN4_DEV *dev, sdramaddr_t off, unsigned char val) -+{ -+ writeb (val, (void *) sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writew (ELAN4_DEV *dev, sdramaddr_t off, unsigned short val) -+{ -+ writew (val, (void *) sdramaddr_to_ioaddr(dev, off)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writel (ELAN4_DEV *dev, sdramaddr_t off, unsigned int val) -+{ -+ writel (val, (void *) (sdramaddr_to_ioaddr(dev, off))); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_writeq (ELAN4_DEV *dev, sdramaddr_t off, unsigned long long val) -+{ -+ writeq (val, (void *) (sdramaddr_to_ioaddr(dev, off))); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_zerob_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (0, (void *) dest); -+} -+ -+void -+elan4_sdram_zerow_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (0, (void *) dest); -+} -+ -+void -+elan4_sdram_zerol_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u32)) -+ writel (0, (void *) dest); -+} -+ -+void -+elan4_sdram_zeroq_sdram (ELAN4_DEV *dev, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ ioaddr_t lim = dest + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_memset_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, to, 0, nbytes) == 0) -+ return; -+#endif -+ -+ for (; dest < lim; dest += sizeof (u64)) -+ writeq (0, (void *) dest); -+} -+ -+void -+elan4_sdram_copyb_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u8 *dest = (u8 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u8)) -+ *dest++ = __elan4_readb (dev, src); -+} -+ -+void -+elan4_sdram_copyw_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u16 *dest = (u16 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u16)) -+ *dest++ = __elan4_readw (dev, src); -+} -+ -+void -+elan4_sdram_copyl_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u32 *dest = (u32 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+ for (; src < lim; src += sizeof (u32)) -+ *dest++ = __elan4_readl (dev, src); -+} -+ -+void -+elan4_sdram_copyq_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes) -+{ -+ ioaddr_t src = sdramaddr_to_ioaddr (dev, from); -+ u64 *dest = (u64 *) to; -+ ioaddr_t lim = src + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyfrom_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, from, (unsigned long) to, nbytes) == 0) -+ return; -+#endif -+ -+ for (; src < lim; src += sizeof (u64)) -+ *dest++ = __elan4_readq (dev, src); -+} -+ -+void -+elan4_sdram_copyb_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u8 *src = (u8 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u8)) -+ writeb (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyw_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u16 *src = (u16 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u16)) -+ writew (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyl_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u32 *src = (u32 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+ for (; dest < lim; dest += sizeof (u16)) -+ writew (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+void -+elan4_sdram_copyq_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes) -+{ -+ ioaddr_t dest = sdramaddr_to_ioaddr (dev, to); -+ u64 *src = (u64 *) from; -+ ioaddr_t lim = dest + nbytes; -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyto_dev (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM, to, (unsigned long) from, nbytes) == 0) -+ return; -+#endif -+ -+ for (; dest < lim; dest += sizeof (u64)) -+ writeq (*src++, (void *) (dest)); -+ -+ mb(); -+} -+ -+/* sdram buddy allocator */ -+typedef struct sdramblock -+{ -+ sdramaddr_t next; -+ sdramaddr_t prev; -+} sdramblock_t; -+ -+static inline sdramaddr_t -+read_next (ELAN4_DEV *dev, sdramaddr_t block) -+{ -+ return __elan4_readl (dev, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, next))); -+} -+ -+static inline sdramaddr_t -+read_prev (ELAN4_DEV *dev, sdramaddr_t block) -+{ -+ return __elan4_readl (dev, sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, prev))); -+} -+ -+static inline void -+write_next (ELAN4_DEV *dev, sdramaddr_t block, sdramaddr_t val) -+{ -+ writel (val, (void *) (sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, next)))); -+} -+ -+static inline void -+write_prev (ELAN4_DEV *dev, sdramaddr_t block, sdramaddr_t val) -+{ -+ writel (val, (void *) (sdramaddr_to_ioaddr (dev, block + offsetof (sdramblock_t, prev)))); -+} -+ -+static inline void -+freelist_insert (ELAN4_DEV *dev, int idx, sdramaddr_t block) -+{ -+ sdramaddr_t next = dev->dev_sdram_freelists[(idx)]; -+ -+ /* -+ * block->prev = NULL; -+ * block->next = next; -+ * if (next != NULL) -+ * next->prev = block; -+ * freelist = block; -+ */ -+ write_prev (dev, block, (sdramaddr_t) 0); -+ write_next (dev, block, next); -+ if (next != (sdramaddr_t) 0) -+ write_prev (dev, next, block); -+ dev->dev_sdram_freelists[idx] = block; -+ -+ dev->dev_sdram_freecounts[idx]++; -+ dev->dev_stats.s_sdram_bytes_free += (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+static inline void -+freelist_remove (ELAN4_DEV *dev,int idx, sdramaddr_t block) -+{ -+ /* -+ * if (block->prev) -+ * block->prev->next = block->next; -+ * else -+ * dev->dev_sdram_freelists[idx] = block->next; -+ * if (block->next) -+ * block->next->prev = block->prev; -+ */ -+ sdramaddr_t blocknext = read_next (dev, block); -+ sdramaddr_t blockprev = read_prev (dev, block); -+ -+ if (blockprev) -+ write_next (dev, blockprev, blocknext); -+ else -+ dev->dev_sdram_freelists[idx] = blocknext; -+ if (blocknext) -+ write_prev (dev, blocknext, blockprev); -+ -+ dev->dev_sdram_freecounts[idx]--; -+ dev->dev_stats.s_sdram_bytes_free -= (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+static inline void -+freelist_removehead(ELAN4_DEV *dev, int idx, sdramaddr_t block) -+{ -+ sdramaddr_t blocknext = read_next (dev, block); -+ -+ if ((dev->dev_sdram_freelists[idx] = blocknext) != 0) -+ write_prev (dev, blocknext, 0); -+ -+ dev->dev_sdram_freecounts[idx]--; -+ dev->dev_stats.s_sdram_bytes_free -= (SDRAM_MIN_BLOCK_SIZE << idx); -+ -+ mb(); -+} -+ -+#ifdef DEBUG -+static int -+display_blocks (ELAN4_DEV *dev, int indx, char *string) -+{ -+ sdramaddr_t block; -+ int nbytes = 0; -+ -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "%s - indx %d\n", string, indx); -+ for (block = dev->dev_sdram_freelists[indx]; block != (sdramaddr_t) 0; block = read_next (dev, block)) -+ { -+ PRINTF (DBG_DEVICE, DBG_SDRAM, " %x\n", block); -+ nbytes += (SDRAM_MIN_BLOCK_SIZE << indx); -+ } -+ -+ return (nbytes); -+} -+ -+void -+elan4_sdram_display (ELAN4_DEV *dev, char *string) -+{ -+ int indx; -+ int nbytes = 0; -+ -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_display: dev=%p\n", dev); -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ if (dev->dev_sdram_freelists[indx] != (sdramaddr_t) 0) -+ nbytes += display_blocks (dev, indx, string); -+ PRINTF (DBG_DEVICE, DBG_SDRAM, "\n%d bytes free - %d pages free\n", nbytes, nbytes/SDRAM_PAGE_SIZE); -+} -+ -+void -+elan4_sdram_verify (ELAN4_DEV *dev) -+{ -+ int indx, size, nbits, i, b; -+ sdramaddr_t block; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ unsigned count = 0; -+ -+ for (block = dev->dev_sdram_freelists[indx]; block; block = read_next (dev, block), count++) -+ { -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, block); -+ unsigned off = sdramaddr_to_bankoffset (dev, block); -+ int bit = sdramaddr_to_bit (dev, indx, block); -+ -+ if ((block & (size-1)) != 0) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - not aligned\n", block, indx); -+ -+ if (bank == NULL || off > bank->b_size) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - outside bank\n", block, indx); -+ else if (BT_TEST (bank->b_bitmaps[indx], bit) == 0) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - bit not set\n", block, indx); -+ else -+ { -+ for (i = indx-1, nbits = 2; i >= 0; i--, nbits <<= 1) -+ { -+ bit = sdramaddr_to_bit (dev, i, block); -+ -+ for (b = 0; b < nbits; b++) -+ if (BT_TEST(bank->b_bitmaps[i], bit + b)) -+ printk ("elan4_sdram_verify: block=%lx indx=%x - also free i=%d bit=%x\n", block, indx, i, bit+b); -+ } -+ } -+ } -+ -+ if (dev->dev_sdram_freecounts[indx] != count) -+ printk ("elan4_sdram_verify: indx=%x expected %d got %d\n", indx, dev->dev_sdram_freecounts[indx], count); -+ } -+} -+ -+#endif -+ -+static void -+free_block (ELAN4_DEV *dev, sdramaddr_t block, int indx) -+{ -+ ELAN4_SDRAM_BANK *bank = sdramaddr_to_bank (dev, block); -+ unsigned bit = sdramaddr_to_bit (dev, indx, block); -+ unsigned size = SDRAM_MIN_BLOCK_SIZE << indx; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: block=%x indx=%d bit=%x\n", block, indx, bit); -+ -+ ASSERT ((block & (size-1)) == 0); -+ ASSERT (BT_TEST (bank->b_bitmaps[indx], bit) == 0); -+ -+ while (BT_TEST (bank->b_bitmaps[indx], bit ^ 1)) -+ { -+ sdramaddr_t buddy = block ^ size; -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: merge block=%x buddy=%x indx=%d\n", block, buddy, indx); -+ -+ BT_CLEAR (bank->b_bitmaps[indx], bit ^ 1); -+ -+ freelist_remove (dev, indx, buddy); -+ -+ block = (block < buddy) ? block : buddy; -+ indx++; -+ size <<= 1; -+ bit >>= 1; -+ } -+ -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "free_block: free block=%x indx=%d bit=%x\n", block, indx, bit); -+ -+ freelist_insert (dev, indx, block); -+ -+ BT_SET (bank->b_bitmaps[indx], bit); -+} -+ -+void -+elan4_sdram_init (ELAN4_DEV *dev) -+{ -+ int indx; -+ -+ spin_lock_init (&dev->dev_sdram_lock); -+ -+ for (indx = 0; indx < SDRAM_NUM_FREE_LISTS; indx++) -+ { -+ dev->dev_sdram_freelists[indx] = (sdramaddr_t) 0; -+ dev->dev_sdram_freecounts[indx] = 0; -+ } -+} -+ -+void -+elan4_sdram_fini (ELAN4_DEV *dev) -+{ -+ spin_lock_destroy (&dev->dev_sdram_lock); -+} -+ -+#ifdef CONFIG_MPSAS -+/* size of Elan SDRAM in simulation */ -+#define SDRAM_used_addr_bits (16) -+#define SDRAM_SIMULATION_BANK_SIZE ((1 << SDRAM_used_addr_bits) * 8) /* 128 kbytes */ -+ -+static int -+elan4_sdram_probe_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ printk ("elan%d: memory bank %d is %d Kb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (SDRAM_SIMULATION_BANK_SIZE / 1024)); -+ -+ bank->b_size = SDRAM_SIMULATION_BANK_SIZE; -+ -+ return 1; -+} -+ -+#else -+ -+static void -+initialise_cache_tags (ELAN4_DEV *dev, unsigned addr) -+{ -+ register int set, line; -+ -+ mb(); -+ -+ /* Initialise the whole cache to hold sdram at "addr" as direct mapped */ -+ -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (line = 0; line < E4_NumCacheLines; line++) -+ write_tag (dev, Tags[set][line], addr | (set << 13) | (1 << 11)); -+ -+ read_tag (dev, Tags[set][line]); /* read it back to guarantee the memory system is quite again */ -+ mb(); -+} -+ -+static __inline__ int -+sdram_GreyToBinary(int GreyVal, int NoOfBits) -+{ -+ int Bit; -+ int BinaryVal=0; -+ for (Bit=(1 << (NoOfBits-1)); Bit != 0; Bit >>= 1) -+ BinaryVal ^= (GreyVal & Bit) ^ ((BinaryVal >> 1) & Bit); -+ return (BinaryVal); -+} -+ -+static __inline__ int -+sdram_BinaryToGrey(int BinaryVal) -+{ -+ return (BinaryVal ^ (BinaryVal >> 1)); -+} -+ -+void -+elan4_sdram_setup_delay_lines (ELAN4_DEV *dev, int factor) -+{ -+ /* This is used to fix the SDRAM delay line values */ -+ int i, AutoGenDelayValue=0; -+ int NewDelayValue; -+ -+ if (dev->dev_sdram_cfg & SDRAM_FIXED_DELAY_ENABLE) /* already setup. */ -+ return; -+ -+ /* now get an average of 10 dll values */ -+ for (i=0;i<10;i++) -+ AutoGenDelayValue += sdram_GreyToBinary(SDRAM_GET_DLL_DELAY(read_reg64 (dev, SDRamConfigReg)), -+ SDRAM_FIXED_DLL_DELAY_BITS); -+ -+ NewDelayValue = factor + (AutoGenDelayValue / 10); /* Mean of 10 values */ -+ -+ dev->dev_sdram_cfg = (dev->dev_sdram_cfg & ~(SDRAM_FIXED_DLL_DELAY_MASK << SDRAM_FIXED_DLL_DELAY_SHIFT)) | -+ SDRAM_FIXED_DELAY_ENABLE | SDRAM_FIXED_DLL_DELAY(sdram_BinaryToGrey(NewDelayValue)); -+ -+ write_reg64 (dev, SDRamConfigReg, dev->dev_sdram_cfg); /* Put back the new value */ -+ -+ pioflush_reg (dev); -+} -+ -+static int -+elan4_sdram_probe_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ unsigned long mappedsize = bank->b_size; -+ ioaddr_t ioaddr; -+ unsigned long long value, size; -+ register int i; -+ extern int sdram_bank_limit; -+ -+ if (mappedsize > SDRAM_MAX_BLOCK_SIZE) -+ mappedsize = SDRAM_MAX_BLOCK_SIZE; -+ -+ while ((ioaddr = elan4_map_device (dev, ELAN4_BAR_SDRAM, bank->b_base, mappedsize, &bank->b_handle)) == 0) -+ { -+ if (mappedsize <= (64*1024*1024)) /* boards normally populated with 64mb, so winge if we can't see this much */ -+ printk ("elan%d: could not map bank %d size %dMb\n", dev->dev_instance, (int)(bank - dev->dev_sdram_banks), (int)mappedsize/(1024*1024)); -+ -+ if ((mappedsize >>= 1) < (1024*1024)) -+ return 0; -+ } -+ -+ /* first probe to see if the memory bank is present */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, E4_CacheSize); -+ -+ for (i = 0; i < 64; i++) -+ { -+ unsigned long long pattern = (1ull << i); -+ -+ writeq (pattern, ioaddr); /* write pattern at base */ -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 0); -+ -+ writeq (~pattern, ioaddr + E4_CacheSize); /* write ~pattern at cachesize */ -+ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, E4_CacheSize); -+ -+ writeq (~pattern, ioaddr + 2*E4_CacheSize); /* write ~pattern at 2*cachesize */ -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 2*E4_CacheSize); -+ -+ value = __elan4_readq (dev, ioaddr); /* read pattern back at 0 */ -+ -+ if (value != pattern) -+ { -+ printk ("elan%d: sdram bank %d not present\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 0; -+ } -+ } -+ -+ /* sdram bank is present, so work out it's size. We store the maximum size at the base -+ * and then store the address at each address on every power of two address until -+ * we reach the minimum mappable size (PAGESIZE), we then read back the value at the -+ * base to determine the bank size */ -+ writeq (mappedsize, ioaddr); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, 0); -+ -+ for (size = mappedsize >> 1; size > PAGE_SIZE; size >>= 1) -+ { -+ writeq (size, ioaddr + size); -+ if (dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) -+ initialise_cache_tags (dev, size); -+ } -+ -+ if ((size = __elan4_readq (dev, ioaddr)) < SDRAM_MIN_BANK_SIZE) -+ { -+ printk ("elan%d: memory bank %d dubious\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 0; -+ } -+ -+ if (sdram_bank_limit == 0 || size <= (sdram_bank_limit * 1024 * 1024)) -+ printk ("elan%d: memory bank %d is %d Mb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (size / (1024*1024))); -+ else -+ { -+ size = (sdram_bank_limit * 1024 * 1024); -+ printk ("elan%d: limit bank %d to %d Mb\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks), (int) (size / (1024*1024))); -+ } -+ -+ bank->b_size = size; -+ -+ elan4_unmap_device (dev, ioaddr, mappedsize, &bank->b_handle); -+ return 1; -+} -+#endif -+ -+int -+elan4_sdram_init_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ int indx, size; -+ -+ bank->b_ioaddr = 0; -+ -+ if (! elan4_sdram_probe_bank (dev, bank)) -+ return 0; -+ -+ if ((bank->b_ioaddr = elan4_map_device (dev, ELAN4_BAR_SDRAM, bank->b_base, bank->b_size, &bank->b_handle)) == (ioaddr_t) 0) -+ { -+ printk ("elan%d: could not map sdrambank %d\n", dev->dev_instance, (int) (bank - dev->dev_sdram_banks)); -+ return 0; -+ } -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->b_size; indx++, size <<= 1) /* allocate the buddy allocator bitmaps */ -+ KMEM_ZALLOC (bank->b_bitmaps[indx], bitmap_t *, sizeof (bitmap_t) * BT_BITOUL(bank->b_size/size), 1); -+ -+ return 1; -+} -+ -+void -+elan4_sdram_fini_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ int indx, size; -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size <= bank->b_size; indx++, size <<= 1) -+ KMEM_FREE (bank->b_bitmaps[indx], sizeof (bitmap_t) * BT_BITOUL(bank->b_size/size)); -+ -+ elan4_unmap_device (dev, bank->b_ioaddr, bank->b_size, &bank->b_handle); -+} -+ -+void -+elan4_sdram_add_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank) -+{ -+ sdramaddr_t base = bank->b_base; -+ sdramaddr_t top = bank->b_base + bank->b_size; -+ register int indx; -+ register unsigned long size; -+ -+ /* align to the minimum block size */ -+ base = (base + SDRAM_MIN_BLOCK_SIZE - 1) & ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ top &= ~((sdramaddr_t) SDRAM_MIN_BLOCK_SIZE-1); -+ -+ /* don't allow 0 as a valid "base" */ -+ if (base == 0) -+ base = SDRAM_MIN_BLOCK_SIZE; -+ -+ /* carve the bottom to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((base & size) == 0) -+ continue; -+ -+ if ((base + size) > top) -+ break; -+ -+ free_block (dev, base, indx); -+ -+ base += size; -+ } -+ -+ /* carve the top down to the biggest boundary */ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; indx < SDRAM_NUM_FREE_LISTS; indx++, size <<= 1) -+ { -+ if ((top & size) == 0) -+ continue; -+ -+ if ((top - size) < base) -+ break; -+ -+ free_block (dev, (top - size), indx); -+ -+ top -= size; -+ } -+ -+ /* now free of the space in between */ -+ while (base < top) -+ { -+ free_block (dev, base, (SDRAM_NUM_FREE_LISTS-1)); -+ -+ base += SDRAM_MAX_BLOCK_SIZE; -+ } -+} -+ -+sdramaddr_t -+elan4_sdram_alloc (ELAN4_DEV *dev, int nbytes) -+{ -+ sdramaddr_t block; -+ register int i, indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_sdram_lock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_alloc: nbytes=%d indx=%d\n", nbytes, indx); -+ -+ /* need to split a bigger block up */ -+ for (i = indx; i < SDRAM_NUM_FREE_LISTS; i++, size <<= 1) -+ if (dev->dev_sdram_freelists[i]) -+ break; -+ -+ if (i == SDRAM_NUM_FREE_LISTS) -+ { -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+ printk ("elan4_sdram_alloc: %d bytes failed\n", nbytes); -+ return ((sdramaddr_t) 0); -+ } -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_alloc: use block=%x indx=%d\n", dev->dev_sdram_freelists[i], i); -+ -+ /* remove the block from the free list */ -+ freelist_removehead (dev, i, (block = dev->dev_sdram_freelists[i])); -+ -+ /* clear the approriate bit in the bitmap */ -+ BT_CLEAR (sdramaddr_to_bank (dev, block)->b_bitmaps[i], sdramaddr_to_bit (dev,i, block)); -+ -+ /* and split it up as required */ -+ while (i-- > indx) -+ free_block (dev, block + (size >>= 1), i); -+ -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+ -+ ASSERT ((block & ((SDRAM_MIN_BLOCK_SIZE << (indx))-1)) == 0); -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_zeroq_sdram (dev, block, sizeof (sdramblock_t)); -+#endif -+ -+ return ((sdramaddr_t) block); -+} -+ -+void -+elan4_sdram_free (ELAN4_DEV *dev, sdramaddr_t block, int nbytes) -+{ -+ register int indx; -+ unsigned long size; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->dev_sdram_lock, flags); -+ -+ for (indx = 0, size = SDRAM_MIN_BLOCK_SIZE; size < nbytes; indx++, size <<= 1) -+ ; -+ -+ PRINTF2 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_free: indx=%d block=%x\n", indx, block); -+ -+ free_block (dev, block, indx); -+ -+ spin_unlock_irqrestore (&dev->dev_sdram_lock, flags); -+} -+ -+void -+elan4_sdram_flushcache (ELAN4_DEV *dev, sdramaddr_t addr, int len) -+{ -+ int set, off; -+ -+ SET_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+ -+ /* -+ * if flushing more than a single set (8K), then you have to flush the whole cache. -+ * NOTE - in the real world we will probably want to generate a burst across -+ * the pci bus. -+ */ -+ if (len >= E4_CacheSetSize) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => whole cache\n", addr, len, addr + len); -+ -+#ifdef CONFIG_MPSAS -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space, E4_CacheSize); -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (off = 0; off < E4_CacheSetSize; off += E4_CacheLineSize) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+#endif -+ } -+ else -+ { -+ unsigned base = addr & ~(E4_CACHELINE_SIZE-1); -+ unsigned top = (addr + len + (E4_CACHELINE_SIZE-1)) & ~(E4_CACHELINE_SIZE-1); -+ unsigned baseoff = base & (E4_CacheSetSize-1); -+ unsigned topoff = top & (E4_CacheSetSize-1); -+ -+ if ((base ^ top) & E4_CacheSetSize) /* wraps */ -+ { -+ PRINTF7 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => split cache (%x,%x %x,%x)\n", -+ addr, len, addr + len, 0, topoff, baseoff, E4_CacheSetSize); -+ -+#ifdef CONFIG_MPSAS -+ for (set = 0; set < E4_NumCacheSets; set++) -+ { -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize), topoff); -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + baseoff, E4_CacheSetSize - baseoff); -+ } -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ { -+ for (off = 0; off < (top & (E4_CacheSetSize-1)); off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+ -+ for (off = (base & (E4_CacheSetSize-1)); off < E4_CacheSetSize; off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+ } -+#endif -+ } -+ else -+ { -+ PRINTF5 (DBG_DEVICE, DBG_SDRAM, "elan4_sdram_flushcache: addr=%x len=%x (%x) => part cache (%x,%x)\n", -+ addr, len, addr + len, baseoff, topoff); -+ -+#ifdef CONFIG_MPSAS -+ for (set = 0; set < E4_NumCacheSets; set++) -+ elan4_sdram_zeroq_sdram (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + baseoff, topoff - baseoff); -+#else -+ for (set = 0; set < E4_NumCacheSets; set++) -+ for (off = (base & (E4_CacheSetSize-1)); off < (top & (E4_CacheSetSize-1)); off += E4_CACHELINE_SIZE) -+ elan4_sdram_writeq (dev, dev->dev_cacheflush_space + (set * E4_CacheSetSize) + off, 0); -+#endif -+ } -+ } -+ pioflush_sdram (dev); -+ -+ CLEAR_SYSCONTROL (dev, dev_direct_map_pci_writes, CONT_DIRECT_MAP_PCI_WRITES); -+} -+ -+static char * -+get_correctableErr_bitpos(uint SyndromeBits) -+{ -+ switch (SyndromeBits) -+ { -+ case 0x00: return ("NoErr"); -+ case 0x31: return ("00"); -+ case 0x32: return ("01"); -+ case 0xc4: return ("02"); -+ case 0xc8: return ("03"); -+ case 0x26: return ("04"); -+ case 0x91: return ("05"); -+ case 0x89: return ("06"); -+ case 0x64: return ("07"); -+ case 0xc1: return ("08"); -+ case 0xf2: return ("09"); -+ case 0x34: return ("10"); -+ case 0xf8: return ("11"); -+ case 0xf1: return ("12"); -+ case 0xc2: return ("13"); -+ case 0xf4: return ("14"); -+ case 0x38: return ("15"); -+ case 0xd6: return ("16"); -+ case 0xa1: return ("17"); -+ case 0x79: return ("18"); -+ case 0xa4: return ("19"); -+ case 0xd9: return ("20"); -+ case 0xa2: return ("21"); -+ case 0x76: return ("22"); -+ case 0xa8: return ("23"); -+ case 0xe6: return ("24"); -+ case 0x51: return ("25"); -+ case 0xb9: return ("26"); -+ case 0x54: return ("27"); -+ case 0xe9: return ("28"); -+ case 0x52: return ("29"); -+ case 0xb6: return ("30"); -+ case 0x58: return ("31"); -+ case 0x13: return ("32"); -+ case 0x23: return ("33"); -+ case 0x4c: return ("34"); -+ case 0x8c: return ("35"); -+ case 0x62: return ("36"); -+ case 0x19: return ("37"); -+ case 0x98: return ("38"); -+ case 0x46: return ("39"); -+ case 0x1c: return ("40"); -+ case 0x2f: return ("41"); -+ case 0x43: return ("42"); -+ case 0x8f: return ("43"); -+ case 0x1f: return ("44"); -+ case 0x2c: return ("45"); -+ case 0x4f: return ("46"); -+ case 0x83: return ("47"); -+ case 0x6d: return ("48"); -+ case 0x1a: return ("49"); -+ case 0x97: return ("50"); -+ case 0x4a: return ("51"); -+ case 0x9d: return ("52"); -+ case 0x2a: return ("53"); -+ case 0x67: return ("54"); -+ case 0x8a: return ("55"); -+ case 0x6e: return ("56"); -+ case 0x15: return ("57"); -+ case 0x9b: return ("58"); -+ case 0x45: return ("59"); -+ case 0x9e: return ("60"); -+ case 0x25: return ("61"); -+ case 0x6b: return ("62"); -+ case 0x85: return ("63"); -+ case 0x01: return ("C0"); -+ case 0x02: return ("C1"); -+ case 0x04: return ("C2"); -+ case 0x08: return ("C3"); -+ case 0x10: return ("C4"); -+ case 0x20: return ("C5"); -+ case 0x40: return ("C6"); -+ case 0x80: return ("C7"); -+ -+ case 0x07: case 0x0b: case 0x0d: case 0x0e: case 0x3d: case 0x3e: case 0x70: case 0x7c: // T -+ case 0xb0: case 0xbc: case 0xc7: case 0xcb: case 0xd0: case 0xd3: case 0xe0: case 0xe3: // T -+ return ("triple"); -+ -+ case 0x0f: case 0x55: case 0x5a: case 0xa5: case 0xaa: case 0xf0: case 0xff: // Q -+ return ("quadruple"); -+ -+ case 0x16: case 0x29: case 0x37: case 0x3b: case 0x49: case 0x57: case 0x5b: case 0x5d: case 0x5e: case 0x61: // M -+ case 0x68: case 0x73: case 0x75: case 0x7a: case 0x7f: case 0x86: case 0x92: case 0x94: case 0xa7: case 0xab: // M -+ case 0xad: case 0xae: case 0xb3: case 0xb5: case 0xba: case 0xbf: case 0xcd: case 0xce: case 0xd5: case 0xda: // M -+ case 0xdc: case 0xdf: case 0xe5: case 0xea: case 0xec: case 0xef: case 0xf7: case 0xfb: case 0xfd: case 0xfe: // M -+ return ("multiple"); -+ -+ default: // all other cases -+ return ("double"); -+ } -+} -+ -+char * -+elan4_sdramerr2str (ELAN4_DEV *dev, E4_uint64 status, E4_uint64 ConfigReg, char *str) -+{ -+ E4_uint64 StartupSyndrome = dev->dev_sdram_initial_ecc_val; -+ int RisingDQSsyndrome = ((ECC_RisingDQSSyndrome(status) == ECC_RisingDQSSyndrome(StartupSyndrome)) ? -+ 0 : ECC_RisingDQSSyndrome(status)); -+ int FallingDQSsyndrome = ((ECC_FallingDQSSyndrome(status) == ECC_FallingDQSSyndrome(StartupSyndrome)) ? -+ 0 : ECC_FallingDQSSyndrome(status)); -+ E4_uint64 Addr = ECC_Addr(status); -+ int Bank = (Addr >> 6) & 3; -+ int Cas = ((Addr >> 3) & 7) | ((Addr >> (8 - 3)) & 0xf8) | ((Addr >> (25 - 8)) & 0x100) | -+ ((Addr >> (27 - 9)) & 0x200) | ((Addr >> (29 - 10)) & 0xc00); -+ int Ras = ((Addr >> 13) & 0xfff) | ((Addr >> (26 - 12)) & 0x1000) | ((Addr >> (28 - 13)) & 0x2000) | -+ ((Addr >> (30 - 14)) & 0x4000); -+ -+ sprintf (str, "Addr=%07llx Bank=%x Ras=%x Cas=%x Falling DQS=%s Rising DQS=%s Syndrome=%x%s%s%s%s Type=%s SDRamDelay=%s,%0d", /* 41 + 16 + 8 + 15 + 24 + 13 + 22 + 10 + 10 == 151 */ -+ (long long)Addr, Bank, Ras, Cas, -+ get_correctableErr_bitpos(FallingDQSsyndrome), -+ get_correctableErr_bitpos(RisingDQSsyndrome), -+ (int)ECC_Syndrome(status), -+ ECC_UncorrectableErr(status) ? " Uncorrectable" : "", -+ ECC_MultUncorrectErrs(status) ? " Multiple-Uncorrectable" : "", -+ ECC_CorrectableErr(status) ? " Correctable" : "", -+ ECC_MultCorrectErrs(status) ? " Multiple-Correctable" : "", -+ (status & 0x0010000000000000ull) ? "W" : -+ (status & 0x0020000000000000ull) ? "R" : -+ (status & 0x0030000000000000ull) ? "C" : "-", -+ (ConfigReg & SDRAM_FIXED_DELAY_ENABLE) ? "F" : "A", -+ sdram_GreyToBinary(SDRAM_GET_DLL_DELAY(ConfigReg), SDRAM_FIXED_DLL_DELAY_BITS)); -+ -+ return str; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/trap.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/trap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/trap.c 2005-07-28 14:52:52.840679952 -0400 -@@ -0,0 +1,777 @@ -+/* -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: trap.c,v 1.19.10.3 2005/03/09 12:00:08 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/trap.c,v $*/ -+ -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+char * const PermTypes[16] = -+{ -+ "Disabled", "Unused", "LocalDataRead", "LocalDataWrite", -+ "LocalRead", "LocalExecute", "ReadOnly", "LocalWrite", -+ "LocalEventOnly", "LocalEventWrite", "RemoteEvent", "RemoteAll", -+ "RemoteReadOnly", "RemoteWriteOnly", "DataReadWrite", "NoFault", -+}; -+ -+char * const AccTypes[] = -+{ -+ "LocalDataRead ", "LocalDataWrite", "RemoteRead ", "RemoteWrite ", -+ "Execute ", "LocalEvent ", "Unused ", "RemoteEvent " -+}; -+char * const DataTypes[] = {"Byte ", "HWord", "Word ", "DWord"}; -+char * const PhysTypes[] = {"Special Read", "Special Write", "Physical Read", "Physical Write"}; -+ -+char * const EProcTrapNames[] = { -+ "EventProcNoFault", -+ "EventProcAddressAlignment", -+ "EventProcMemoryFault", -+ "EventProcCountWrapError", -+}; -+ -+char * const CProcTrapNames[] = { -+ "CommandProcNoFault", -+ "CommandProcInserterError", -+ "CommandProcPermissionTrap", -+ "CommandProcSendTransInvalid", -+ "CommandProcSendTransExpected", -+ "CommandProcDmaQueueOverflow", -+ "CommandProcInterruptQueueOverflow", -+ "CommandProcMemoryFault", -+ "CommandProcRouteFetchFault", -+ "CommandProcFailCountZero", -+ "CommandProcAddressAlignment", -+ "CommandProcWaitTrap", -+ "CommandProcMultipleGuards", -+ "CommandProcOpenOnGuardedChan", -+ "CommandProcThreadQueueOverflow", -+ "CommandProcBadData", -+}; -+ -+char *const CProcInsertError[] = { -+ "No Error", -+ "Overflowed", -+ "Invalid Write Size", -+ "Invalid Write Order", -+}; -+ -+char * const DProcTrapNames[] = { -+ "DmaProcNoFault", -+ "DmaProcRouteFetchFault", -+ "DmaProcFailCountError", -+ "DmaProcPacketAckError", -+ "DmaProcRunQueueReadFault", -+ "DmaProcQueueOverFlow", -+}; -+ -+char *const IProcTrapNames[] = { -+ "InputNoFault", -+ "InputAddressAlignment", -+ "InputMemoryFault", -+ "InputInvalidTransType", -+ "InputDmaQueueOverflow", -+ "InputEventEngineTrapped", -+ "InputCrcErrorAfterPAckOk", -+ "InputEopErrorOnWaitForEop", -+ "InputEopErrorTrap", -+ "InputDiscardAfterAckOk", -+}; -+ -+char *const TProcTrapNames[] = { -+ "HaltThread", -+ "TrapForTooManyInstructions", -+ "InstAccessException", -+ "Unimplemented", -+ "DataAccessException", -+ "DataAlignmentError", -+ "TrapForUsingBadData", -+}; -+ -+#define declare_spaces(space, str) char space[64]; do { int i; for (i = 0; i < strlen(str); i++) spaces[i] = ' '; space[i] = '\0'; } while (0) -+#define declare_prefix(space, spaces, str) char space[64]; do { strcpy (space, spaces); strcat (space, str); } while (0) -+ -+void -+elan4_display_farea (void *type, int mode, char *str, E4_FaultSave *farea) -+{ -+ E4_uint32 FSR = FaultSaveFSR(farea->FSRAndFaultContext); -+ -+ declare_spaces(spaces, str); -+ -+ elan4_debugf (type, mode, "%s Fault occurred at %016llx for context %4x\n", str, -+ farea->FaultAddress, FaultSaveContext(farea->FSRAndFaultContext)); -+ -+ if (FSR & AT_VirtualWriteAccBit) /* Virtual write access */ -+ elan4_debugf (type, mode, "%s FSR=%x: Virtual Write. DWSize=0x%x EndP=0x%x Access=%s DT=%s\n", -+ spaces, FSR, FSR & AT_VirtualWriteSizeMask, -+ (FSR >> AT_VirtualWriteEndPtrShift) & AT_VirtualWriteEndPtrMask, -+ AccTypes[(FSR >> AT_PermBitsShift) & AT_PermBitsMask], -+ DataTypes[(FSR >> AT_BlkDataTyShift) & AT_BlkDataTyMask]); -+ else if (FSR & AT_VirtualReadAccBit) /* Virtual read access */ -+ elan4_debugf (type, mode, "%s FSR=%x: Virtual Read. DWSize=0x%x Access=%s DT=%s\n", -+ spaces, FSR, FSR & AT_VirtualReadSizeMask, -+ AccTypes[(FSR >> AT_PermBitsShift) & AT_PermBitsMask], -+ DataTypes[(FSR >> AT_BlkDataTyShift) & AT_BlkDataTyMask]); -+ else -+ elan4_debugf (type, mode, "%s FSR=%x: %s. Size=0x%x\n", spaces, -+ FSR, PhysTypes[(FSR >> AT_SelBitsShift) & AT_SelBitsMask], -+ FSR & AT_OtherSizeMask); -+ elan4_debugf (type, mode, "%s FSR: %s %s%s %sWalking\n", spaces, -+ (FSR & AT_NonAlloc) ? "NonAlloc" : "Alloc", -+ (FSR & AT_DmaData) ? "Dma " : "", -+ (FSR & FSR_WalkForThread) ? "ThreadAcc" : "UnitsAcc", -+ (FSR & FSR_Walking) ? "" : "Not"); -+ PRINTF (type, mode, "%s FSR: %s%sHashTable=%s\n", spaces, -+ (FSR & FSR_NoTranslationsFound) ? "NoTranslationsFound " : "", -+ (FSR & FSR_WalkingProtectionFault) ? "WalkingProtectionFault " : "", -+ (FSR & FSR_HashTable1) ? "1" : "0"); -+ if (FSR & (FSR_RouteVProcErr | FSR_FaultForBadData)) -+ elan4_debugf (type, mode, "%s FSR: %s%s\n", spaces, -+ (FSR & FSR_RouteVProcErr) ? "RouteVProcErr " : "", -+ (FSR & FSR_FaultForBadData) ? "FaultForBadData " : ""); -+} -+ -+void -+elan4_display_eproc_trap (void *type, int mode, char *str, ELAN4_EPROC_TRAP *trap) -+{ -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s Status=%016llx %s EventAddr=%016llx CountAndType=%016llx\n", str, -+ trap->tr_status, EProcTrapNames[EPROC_TrapType(trap->tr_status)], -+ trap->tr_eventaddr, trap->tr_event.ev_CountAndType); -+ elan4_debugf (type, mode, "%s Param=%016llx.%016llx\n", spaces, -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ -+ elan4_display_farea (type, mode, strcat (spaces, EPROC_Port0Fault(trap->tr_status) ? " EPROC0" : " EPROC1"), &trap->tr_faultarea); -+} -+ -+void -+elan4_display_cproc_trap (void *type, int mode, char *str, ELAN4_CPROC_TRAP *trap) -+{ -+ declare_spaces(spaces, str); -+ -+ elan4_debugf (type, mode, "%s Status=%llx %s Command=%llx\n", str, trap->tr_status, -+ CProcTrapNames[CPROC_TrapType(trap->tr_status)], trap->tr_command); -+ elan4_debugf (type, mode, "%s Desc=%016llx %016llx %016llx %016llx\n", str, -+ trap->tr_qdesc.CQ_QueuePtrs, trap->tr_qdesc.CQ_HoldingValue, -+ trap->tr_qdesc.CQ_AckBuffers, trap->tr_qdesc.CQ_Control); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcInserterError: -+ elan4_debugf (type, mode, "%s %s\n", str, CProcInsertError[CQ_RevB_ErrorType(trap->tr_qdesc.CQ_QueuePtrs)]); -+ break; -+ -+ case CommandProcWaitTrap: -+ elan4_display_eproc_trap (type, mode, spaces, &trap->tr_eventtrap); -+ break; -+ -+ default: -+ elan4_display_farea (type, mode, spaces, &trap->tr_faultarea); -+ break; -+ } -+} -+ -+void -+elan4_display_dproc_trap (void *type, int mode, char *str, ELAN4_DPROC_TRAP *trap) -+{ -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s status %llx - %s\n", str, -+ trap->tr_status, DProcTrapNames[DPROC_TrapType(trap->tr_status)]); -+ -+ elan4_debugf (type, mode, "%s DESC %016llx %016llx %016llx %016llx\n", spaces, trap->tr_desc.dma_typeSize, -+ trap->tr_desc.dma_cookie, trap->tr_desc.dma_vproc, trap->tr_desc.dma_srcAddr); -+ elan4_debugf (type, mode, "%s %016llx %016llx %016llx\n", spaces, trap->tr_desc.dma_dstAddr, -+ trap->tr_desc.dma_srcEvent, trap->tr_desc.dma_dstEvent); -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ elan4_display_farea (type, mode, spaces, &trap->tr_prefetchFault); -+} -+ -+void -+elan4_display_tproc_trap (void *type, int mode, char *str, ELAN4_TPROC_TRAP *trap) -+{ -+ register int i; -+ declare_spaces (spaces, str); -+ -+ elan4_debugf (type, mode, "%s PC=%016llx nPC=%016llx State=%016llx Status=%016llx -%s%s%s%s\n", str, -+ trap->tr_pc, trap->tr_npc, trap->tr_state, trap->tr_status, -+ (trap->tr_state & TS_TrapForTooManyInstructions) ? " TrapForTooManyInstructions" : "", -+ (trap->tr_state & TS_Unimplemented) ? " Unimplemented" : "", -+ (trap->tr_state & TS_DataAlignmentError) ? " DataAlignmentError" : "", -+ (trap->tr_state & TS_InstAccessException) ? " InstAccessException" : "", -+ (trap->tr_state & TS_DataAccessException) ? " DataAlignmentError" : ""); -+ -+ for (i = 0; i < 64; i += 4) -+ elan4_debugf (type, mode, "%s r%d - %016llx %016llx %016llx %016llx\n", spaces, i, -+ trap->tr_regs[i], trap->tr_regs[i+1], trap->tr_regs[i+2], trap->tr_regs[i+3]); -+ -+ if (trap->tr_state & TS_InstAccessException) -+ { -+ declare_prefix (prefix, spaces, "Inst"); -+ -+ elan4_display_farea (type, mode, prefix, &trap->tr_instFault); -+ } -+ -+ if (trap->tr_state & TS_DataAccessException) -+ { -+ declare_prefix (prefix, spaces, "Data"); -+ elan4_display_farea (type, mode, prefix, &trap->tr_dataFault); -+ } -+} -+ -+void -+elan4_display_iproc_trap (void *type, int mode, char *str, ELAN4_IPROC_TRAP *trap) -+{ -+ register int i; -+ declare_spaces (spaces, str); -+ -+ for (i = 0; i < trap->tr_numTransactions; i++) -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[i]; -+ E4_uint64 status = hdrp->IProcStatusCntxAndTrType; -+ E4_Addr addr = hdrp->TrAddr; -+ char *typeString; -+ char buffer[256]; -+ char *ptr = buffer; -+ -+ if (IPROC_EOPTrap(status)) -+ { -+ switch (IPROC_EOPType(status)) -+ { -+ case EOP_GOOD: typeString = "EopGood"; break; -+ case EOP_BADACK: typeString = "EopBadAck"; break; -+ case EOP_ERROR_RESET: typeString = "EopReset"; break; -+ default: typeString = "EopBad"; break; -+ } -+ -+ ptr += sprintf (ptr, "%15s Cntx=%-6d", typeString, IPROC_NetworkContext(status)); -+ } -+ else -+ { -+ if (IPROC_BadLength(status)) -+ typeString = "BadLength"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_DISCARD) -+ typeString = "DiscardCrc"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_ERROR) -+ typeString = "ErrorCrc Remote Network error"; -+ else if (IPROC_TransCRCStatus(status) == CRC_STATUS_BAD) -+ typeString = "BadCrc Cable error into this node."; -+ else -+ { -+ if ((IPROC_TransactionType(status) & TR_BLOCK_OPCODE_MASK) == TR_WRITEBLOCK) -+ typeString = "WriteBlock"; -+ else -+ { -+ switch (IPROC_TransactionType(status) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT_IDENTIFY & TR_OPCODE_MASK: typeString = "SetEvent"; break; -+ case TR_REMOTEDMA & TR_OPCODE_MASK: typeString = "RemoteDma"; break; -+ case TR_SENDDISCARD & TR_OPCODE_MASK: typeString = "SendDiscard"; break; -+ case TR_GTE & TR_OPCODE_MASK: typeString = "GTE"; break; -+ case TR_LT & TR_OPCODE_MASK: typeString = "LT"; break; -+ case TR_EQ & TR_OPCODE_MASK: typeString = "EQ"; break; -+ case TR_NEQ & TR_OPCODE_MASK: typeString = "NEQ"; break; -+ case TR_IDENTIFY & TR_OPCODE_MASK: typeString = "Idenfity"; break; -+ case TR_ADDWORD & TR_OPCODE_MASK: typeString = "AddWord"; break; -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: typeString = "InputQCommit"; break; -+ case TR_TESTANDWRITE & TR_OPCODE_MASK: typeString = "TestAndWrite"; break; -+ case TR_INPUT_Q_GETINDEX & TR_OPCODE_MASK: typeString = "InputQGetIndex"; break; -+ case TR_TRACEROUTE_TRANS & TR_OPCODE_MASK: typeString = "TraceRoute"; break; -+ default: typeString = "Unknown"; break; -+ } -+ } -+ } -+ -+ ptr += sprintf (ptr, "%15s Cntx=%-6d Addr=%016llx", typeString, IPROC_NetworkContext(status), (unsigned long long) addr); -+ } -+ -+ -+ if (IPROC_TrapValue(status) != InputNoFault) -+ { -+ ptr += sprintf (ptr, " TrType=%2d ChanTrapped=%x GoodAck=%x BadAck=%x InputterChan=%d", IPROC_TrapValue(status), -+ IPROC_ChannelTrapped(status), IPROC_GoodAckSent(status), IPROC_BadAckSent(status), -+ IPROC_InputterChan(status)); -+ if (IPROC_EOPTrap(status)) -+ ptr += sprintf (ptr, " EOPType=%d", IPROC_EOPType(status)); -+ else -+ ptr += sprintf (ptr, " %s%s%s%s", -+ IPROC_FirstTrans(status) ? " FirstTrans" : "", -+ IPROC_LastTrans(status) ? " LastTrans" : "", -+ (IPROC_TransactionType(status) & TR_WAIT_FOR_EOP) ? " WaitForEop" : "", -+ (IPROC_GoodAckSent(status) & (1 << IPROC_Channel(status))) ? " AckSent" : ""); -+ } -+ -+ elan4_debugf (type, mode, "%s %s\n", str, buffer); -+ -+ str = spaces; -+ } -+ -+ elan4_display_farea (type, mode, spaces, &trap->tr_faultarea); -+} -+ -+#define elan4_sdram_copy_faultarea(dev, unit, farea) \ -+ elan4_sdram_copyq_from_sdram ((dev), (dev)->dev_faultarea + (unit) * sizeof (E4_FaultSave), (E4_uint64 *) farea, sizeof (E4_FaultSave)); -+ -+void -+elan4_extract_eproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_EPROC_TRAP *trap, int iswaitevent) -+{ -+ /* only one of the memory ports can fault at a time */ -+ ASSERT (EPROC_TrapType(status) != EventProcMemoryFault || (EPROC_Port0Fault(status) ^ EPROC_Port1Fault(status)) == 1); -+ -+ trap->tr_status = status; -+ -+ if (EPROC_Port0Fault(status)) -+ elan4_sdram_copy_faultarea (dev, CUN_EventProc0, &trap->tr_faultarea); -+ if (EPROC_Port1Fault(status)) -+ elan4_sdram_copy_faultarea (dev, CUN_EventProc1, &trap->tr_faultarea); -+ -+ if (iswaitevent) -+ { -+ /* -+ * for waitevents the Event address is always taken from the command processor -+ * -+ * if we trapped during the copy then we take the "Event" from the event processor -+ * since we need to complete the copy. Otherwise we'll be reissuing the original -+ * command again -+ */ -+ E4_uint32 fsr = FaultSaveFSR(trap->tr_faultarea.FSRAndFaultContext); -+ -+ trap->tr_eventaddr = read_reg64 (dev, CommandHold) ^ WAIT_EVENT_CMD; -+ -+ if (EPROC_TrapType(trap->tr_status) == EventProcMemoryFault && -+ (AT_Perm(fsr) == AT_PermLocalDataRead || AT_Perm(fsr) == AT_PermLocalDataWrite)) -+ { -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, EventCountAndType); -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, EventParameters[0]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, EventParameters[1]); -+ } -+ else -+ { -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, CommandCopy[5]); -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, CommandCopy[4]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, CommandCopy[6]); -+ -+ } -+ } -+ else -+ { -+ trap->tr_eventaddr = read_reg64 (dev, EventAddress); -+ trap->tr_event.ev_CountAndType = read_reg64 (dev, EventCountAndType); -+ trap->tr_event.ev_Params[0] = read_reg64 (dev, EventParameters[0]); -+ trap->tr_event.ev_Params[1] = read_reg64 (dev, EventParameters[1]); -+ } -+ -+ BumpDevStat (dev, s_eproc_trap_types[EPROC_TrapType(status)]); -+} -+ -+int -+cproc_open_extract_vp (ELAN4_DEV *dev, ELAN4_CQ *cq, int chan) -+{ -+ /* cq = ucq->ucq_cq */ -+ if ((cq->cq_perm & CQ_STENEnableBit) != 0) -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ sdramaddr_t insertPtr = (queuePtrs & CQ_PtrMask); -+ sdramaddr_t commandPtr = CQ_CompletedPtr (queuePtrs); -+ unsigned int cqSize = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && (queuePtrs & CQ_RevB_ReorderingQueue)) -+ { -+ E4_uint32 oooMask = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)); -+ -+ for (; (oooMask & 1) != 0; oooMask >>= 1) -+ insertPtr = (insertPtr & ~(cqSize-1)) | ((insertPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ } -+ -+ while (commandPtr != insertPtr) -+ { -+ E4_uint64 command = elan4_sdram_readq (dev, commandPtr); -+ unsigned int cmdSize; -+ -+ switch (__categorise_command (command, &cmdSize)) -+ { -+ case 0: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 1: /* open */ -+ if (((chan << 4) == (command & (1<<4)))) -+ /* Matches supplied channel */ -+ return (command >> 32); -+ else -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 2: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ case 3: -+ printk ("cproc_open_extract_vp: invalid command %llx\n", command); -+ return -1; -+ } -+ } /* while */ -+ } -+ -+ return -1; -+} -+ -+void -+elan4_extract_cproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_CPROC_TRAP *trap, unsigned cqnum) -+{ -+ /* extract the state from the device */ -+ elan4_sdram_copy_faultarea (dev, CUN_CommandProc, &trap->tr_faultarea); -+ -+ trap->tr_status = status; -+ trap->tr_command = read_reg64 (dev, CommandHold); -+ -+ elan4_sdram_copyq_from_sdram (dev, dev->dev_cqaddr + (cqnum * sizeof (E4_CommandQueueDesc)), &trap->tr_qdesc, sizeof (E4_CommandQueueDesc)); -+ -+ if (CPROC_TrapType (status) == CommandProcWaitTrap) -+ elan4_extract_eproc_trap (dev, read_reg64 (dev, EProcStatus), &trap->tr_eventtrap, 1); -+ -+ BumpDevStat (dev, s_cproc_trap_types[CPROC_TrapType(status)]); -+ -+ if (PackValue(trap->tr_qdesc.CQ_AckBuffers, 0) == PackTimeout || PackValue(trap->tr_qdesc.CQ_AckBuffers, 1) == PackTimeout) -+ BumpDevStat (dev, s_cproc_timeout); -+} -+ -+void -+elan4_extract_dproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_DPROC_TRAP *trap, unsigned unit) -+{ -+ trap->tr_status = status; -+ -+ if (unit == 0) -+ { -+ trap->tr_desc.dma_typeSize = read_reg64 (dev, Dma0Desc.dma_typeSize); -+ trap->tr_desc.dma_cookie = read_reg64 (dev, Dma0Desc.dma_cookie); -+ trap->tr_desc.dma_vproc = read_reg64 (dev, Dma0Desc.dma_vproc); -+ trap->tr_desc.dma_srcAddr = read_reg64 (dev, Dma0Desc.dma_srcAddr); -+ trap->tr_desc.dma_dstAddr = read_reg64 (dev, Dma0Desc.dma_dstAddr); -+ trap->tr_desc.dma_srcEvent = read_reg64 (dev, Dma0Desc.dma_srcEvent); -+ trap->tr_desc.dma_dstEvent = read_reg64 (dev, Dma0Desc.dma_dstEvent); -+ -+ elan4_sdram_copy_faultarea (dev, CUN_DProcPA0, &trap->tr_packAssemFault); -+ } -+ else -+ { -+ trap->tr_desc.dma_typeSize = read_reg64 (dev, Dma1Desc.dma_typeSize); -+ trap->tr_desc.dma_cookie = read_reg64 (dev, Dma1Desc.dma_cookie); -+ trap->tr_desc.dma_vproc = read_reg64 (dev, Dma1Desc.dma_vproc); -+ trap->tr_desc.dma_srcAddr = read_reg64 (dev, Dma1Desc.dma_srcAddr); -+ trap->tr_desc.dma_dstAddr = read_reg64 (dev, Dma1Desc.dma_dstAddr); -+ trap->tr_desc.dma_srcEvent = read_reg64 (dev, Dma1Desc.dma_srcEvent); -+ trap->tr_desc.dma_dstEvent = read_reg64 (dev, Dma1Desc.dma_dstEvent); -+ -+ elan4_sdram_copy_faultarea (dev, CUN_DProcPA1, &trap->tr_packAssemFault); -+ } -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ elan4_sdram_copy_faultarea (dev, (CUN_DProcData0 | DPROC_FaultUnitNo(trap->tr_status)), &trap->tr_prefetchFault); -+ -+ if (DPROC_PacketTimeout (trap->tr_status)) -+ BumpDevStat (dev, s_dproc_timeout); -+ -+ BumpDevStat (dev, s_dproc_trap_types[DPROC_TrapType(status)]); -+} -+ -+void -+elan4_extract_tproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_TPROC_TRAP *trap) -+{ -+ int i; -+ -+ trap->tr_status = status; -+ trap->tr_state = read_reg64 (dev, Thread_Trap_State); -+ trap->tr_pc = read_reg64 (dev, PC_W); -+ trap->tr_npc = read_reg64 (dev, nPC_W); -+ trap->tr_dirty = read_reg64 (dev, DirtyBits); -+ trap->tr_bad = read_reg64 (dev, BadBits); -+ -+#ifdef CONFIG_MPSAS -+ if (sas_copyfrom_dev (dev->dev_osdep.pdev, ELAN4_BAR_REGISTERS, -+ ((dev->dev_devinfo.dev_revision_id == PCI_REVISION_ID_ELAN4_REVA) ? ELAN4_REVA_REG_OFFSET : ELAN4_REVB_REG_OFFSET) + -+ offsetof (E4_Registers, Regs.TProcRegs), (unsigned long) &trap->tr_regs, 64*sizeof (E4_uint64)) < 0) -+ { -+ for (i = 0; i < 64; i++) -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ trap->tr_regs[i] = read_reg64 (dev, TProcRegs[i]); -+ } -+ -+ for (i = 0; i < 64; i++) -+ if (! (trap->tr_dirty & ((E4_uint64) 1 << i))) -+ trap->tr_regs[i] = 0xdeadbabedeadbabeULL; -+#else -+ for (i = 0; i < 64; i++) -+ { -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ trap->tr_regs[i] = read_reg64 (dev, TProcRegs[i]); -+ else -+ trap->tr_regs[i] = 0xdeadbabedeadbabeULL; -+ } -+#endif -+ -+ if (trap->tr_state & TS_DataAccessException) -+ elan4_sdram_copy_faultarea (dev, CUN_TProcData0 | TS_DataPortNo (trap->tr_state), &trap->tr_dataFault); -+ -+ if (trap->tr_state & TS_InstAccessException) -+ elan4_sdram_copy_faultarea (dev, CUN_TProcInst, &trap->tr_instFault); -+ -+ for (i = 0; i < 7; i++) -+ if (trap->tr_state & (1 << i)) -+ BumpDevStat (dev, s_tproc_trap_types[i]); -+} -+ -+void -+elan4_extract_iproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_IPROC_TRAP *trap, unsigned unit) -+{ -+ sdramaddr_t hdroff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrHeader[0][unit]); -+ sdramaddr_t dataoff = dev->dev_inputtraparea + offsetof (E4_IprocTrapState, TrData[0][unit]); -+ register int i, j; -+ int CurrUnitNo = (unit >= 2) ? CUN_IProcHighPri : CUN_IProcLowPri; -+ sdramaddr_t CurrFaultArea = dev->dev_faultarea + (CurrUnitNo * sizeof (E4_FaultSave)); -+ -+ /* Finally copy the fault area */ -+ elan4_sdram_copy_faultarea (dev, CurrUnitNo, &trap->tr_faultarea); -+ -+ /* -+ * Clear out the fault save area after reading to allow a fault on the write of the back pointer of -+ * an InputQCommit to be obsurved if a simultaneous event proc trap occurs. -+ */ -+ elan4_sdram_writeq (dev, CurrFaultArea + offsetof(E4_FaultSave, FSRAndFaultContext), 0x0ULL); -+ elan4_sdram_writeq (dev, CurrFaultArea + offsetof(E4_FaultSave, FaultAddress), 0x0ULL); -+ -+ /* copy the transaction headers */ -+ trap->tr_transactions[0].IProcStatusCntxAndTrType = status; -+ trap->tr_transactions[0].TrAddr = elan4_sdram_readq (dev, hdroff + offsetof (E4_IprocTrapHeader, TrAddr)); -+ -+ for (i = 0; !IPROC_EOPTrap(trap->tr_transactions[i].IProcStatusCntxAndTrType);) -+ { -+ if (IPROC_BadLength (trap->tr_transactions[i].IProcStatusCntxAndTrType)) -+ BumpDevStat (dev, s_bad_length); -+ else if (IPROC_TransCRCStatus (trap->tr_transactions[i].IProcStatusCntxAndTrType) == CRC_STATUS_BAD) -+ BumpDevStat (dev, s_crc_bad); -+ else if (IPROC_TransCRCStatus (trap->tr_transactions[i].IProcStatusCntxAndTrType) == CRC_STATUS_ERROR) -+ BumpDevStat (dev, s_crc_error); -+ -+ BumpDevStat (dev, s_iproc_trap_types[IPROC_TrapValue (trap->tr_transactions[i].IProcStatusCntxAndTrType)]); -+ -+ hdroff += NO_OF_INPUT_CHANNELS*sizeof (E4_IprocTrapHeader); -+ -+ if (++i == MAX_TRAPPED_TRANS) -+ break; -+ -+ elan4_sdram_copyq_from_sdram (dev, hdroff, &trap->tr_transactions[i], sizeof (E4_IprocTrapHeader)); -+ } -+ -+ if (IPROC_EOPType (trap->tr_transactions[i].IProcStatusCntxAndTrType) == EOP_ERROR_RESET) -+ BumpDevStat (dev, s_eop_reset); -+ -+ /* Remember the number of transactions we've copied */ -+ trap->tr_numTransactions = i + 1; -+ -+ /* Copy all the data blocks in one go */ -+ for (i = 0; i < MIN (trap->tr_numTransactions, MAX_TRAPPED_TRANS); i++, dataoff += NO_OF_INPUT_CHANNELS*sizeof (E4_IprocTrapData)) -+ { -+ if (IPROC_BadLength(status) || IPROC_TransCRCStatus (status) != CRC_STATUS_GOOD) -+ elan4_sdram_copyq_from_sdram (dev, dataoff, trap->tr_dataBuffers[i].Data, TRANS_DATA_DWORDS*sizeof(E4_uint64)); -+ else -+ { -+ int trtype = IPROC_TransactionType(trap->tr_transactions[i].IProcStatusCntxAndTrType); -+ int ndwords = (trtype & TR_SIZE_MASK) >> TR_SIZE_SHIFT; -+ -+ elan4_sdram_copyq_from_sdram (dev, dataoff, trap->tr_dataBuffers[i].Data, ndwords*sizeof(E4_uint64)); -+ -+ for (j = ndwords; j < TRANS_DATA_DWORDS; j++) -+ trap->tr_dataBuffers[i].Data[j] = 0xbeec0f212345678ull; -+ } -+ } -+ -+} -+ -+void -+elan4_inspect_iproc_trap (ELAN4_IPROC_TRAP *trap) -+{ -+ int i; -+ -+ trap->tr_flags = 0; -+ trap->tr_trappedTrans = TR_TRANS_INVALID; -+ trap->tr_waitForEopTrans = TR_TRANS_INVALID; -+ trap->tr_identifyTrans = TR_TRANS_INVALID; -+ -+ if (trap->tr_numTransactions > MAX_TRAPPED_TRANS) -+ trap->tr_flags = TR_FLAG_TOOMANY_TRANS; -+ -+ /* -+ * Now scan all the transactions received -+ */ -+ for (i = 0; i < MIN(trap->tr_numTransactions, MAX_TRAPPED_TRANS) ; i++) -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[i]; -+ E4_uint64 status = hdrp->IProcStatusCntxAndTrType; -+ -+ if (trap->tr_identifyTrans == TR_TRANS_INVALID) -+ { -+ switch (IPROC_TransactionType (status) & (TR_OPCODE_MASK | TR_SIZE_MASK)) -+ { -+ case TR_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_REMOTEDMA & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_SETEVENT_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_INPUT_Q_COMMIT & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_ADDWORD & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_TESTANDWRITE & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ trap->tr_identifyTrans = i; -+ break; -+ } -+ } -+ -+ if (IPROC_TrapValue(status) == InputNoFault) /* We're looking at transactions stored before the trap */ -+ continue; /* these should only be identifies */ -+ -+ if (trap->tr_trappedTrans == TR_TRANS_INVALID) /* Remember the transaction which caused the */ -+ trap->tr_trappedTrans = i; /* trap */ -+ -+ if (IPROC_GoodAckSent (status) & (1 << IPROC_InputterChan (status))) -+ trap->tr_flags |= TR_FLAG_ACK_SENT; -+ -+ if (IPROC_EOPTrap(status)) /* Check for EOP */ -+ { -+ ASSERT (i == trap->tr_numTransactions - 1); -+ -+ switch (IPROC_EOPType(status)) -+ { -+ case EOP_GOOD: -+ /* if we get an EOP_GOOD then the outputer should have received a PAckOk. */ -+ /* unless it was a flood, in which case someone must have sent an ack */ -+ /* but not necessarily us */ -+ break; -+ -+ case EOP_BADACK: -+ /* if we get an EOP_BADACK then the outputer did not receive a PAckOk even if -+ * we sent a PAckOk. WFlag this to ignore the AckSent. */ -+ trap->tr_flags |= TR_FLAG_EOP_BAD; -+ break; -+ -+ case EOP_ERROR_RESET: -+ /* if we get an EOP_ERROR_RESET then the outputer may or may not have got a PAckOk. */ -+ trap->tr_flags |= TR_FLAG_EOP_ERROR; -+ break; -+ -+ default: -+ printk ("elan4_inspect_iproc_trap: unknown eop type %d", IPROC_EOPType(status)); -+ BUG(); -+ /* NOTREACHED */ -+ } -+ continue; -+ } -+ else -+ { -+ if (IPROC_BadLength(status) || (IPROC_TransCRCStatus (status) == CRC_STATUS_ERROR || -+ IPROC_TransCRCStatus (status) == CRC_STATUS_BAD)) -+ { -+ { -+ register int j; -+ if (IPROC_BadLength(status)) -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: Trapped on bad length data. status=%016llx Address=%016llx\n", -+ status, hdrp->TrAddr); -+ else -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: Trapped with bad CRC. status=%016llx Address=%016llx\n", -+ status, hdrp->TrAddr); -+ for (j = 0; j < TRANS_DATA_DWORDS; j++) -+ PRINTF2 (DBG_DEVICE, DBG_INTR, "LinkError: DataBuffers[%d] : %016llx\n", j, trap->tr_dataBuffers[i].Data[j]); -+ } -+ -+ trap->tr_flags |= TR_FLAG_BAD_TRANS; -+ continue; -+ } -+ -+ if (IPROC_TransCRCStatus (status) == CRC_STATUS_DISCARD) -+ continue; -+ -+ if ((((IPROC_TransactionType(status) & TR_BLOCK_OPCODE_MASK) == TR_WRITEBLOCK) || -+ (IPROC_TransactionType(status) == TR_TRACEROUTE_TRANS)) && -+ (trap->tr_flags & TR_FLAG_ACK_SENT) && trap->tr_identifyTrans == TR_TRANS_INVALID) -+ { -+ /* -+ * Writeblock after the ack is sent without an identify transaction - this is -+ * considered to be a DMA packet and requires the next packet to be nacked - since -+ * the DMA processor will send this in a deterministic time and there's an upper -+ * limit on the network latency (the output timeout) we just need to hold the context -+ * filter up for a while. -+ */ -+ trap->tr_flags |= TR_FLAG_DMA_PACKET; -+ } -+ -+ if (IPROC_LastTrans(status) && (IPROC_TransactionType(status) & TR_WAIT_FOR_EOP)) -+ { -+ /* -+ * WaitForEop transactions - if we have to do network error fixup -+ * then we may need to execute/ignore this transaction dependant -+ * on whether the source will be resending it. -+ */ -+ trap->tr_waitForEopTrans = i; -+ } -+ -+ /* -+ * This is a special case caused by a minor input processor bug. -+ * If simultaneous InputMemoryFault and InputEventEngineTrapped occur then the chip will probably return -+ * InputEventEngineTrapped even though the write of the back pointer has not occured and must be done by -+ * the trap handler. -+ * In this case the fault address will equal q->q_bptr. If there has been only EventEngineTrap then the -+ * the fault address should be zero as the trap handler now always zeros this after every input trap. -+ */ -+ if ((IPROC_TransactionType (status) & TR_OPCODE_MASK) == (TR_INPUT_Q_COMMIT & TR_OPCODE_MASK) && -+ trap->tr_faultarea.FaultAddress == hdrp->TrAddr + offsetof(E4_InputQueue, q_bptr) && -+ IPROC_TrapValue(status) == InputEventEngineTrapped) -+ { -+ hdrp->IProcStatusCntxAndTrType = (status & 0xFFFFFFF0FFFFFFFFull) | ((E4_uint64) InputMemoryFault << 32); -+ } -+ } -+ -+ PRINTF (DBG_DEVICE, DBG_INTR, "inspect[%d] status=%llx TrapValue=%d -> flags %x\n", i, status, IPROC_TrapValue(status), trap->tr_flags); -+ } -+} -+ -+E4_uint64 -+elan4_trapped_open_command (ELAN4_DEV *dev, ELAN4_CQ *cq) -+{ -+ sdramaddr_t cqdesc = dev->dev_cqaddr + elan4_cq2num(cq) * sizeof (E4_CommandQueueDesc); -+ E4_uint64 cqcontrol = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ E4_uint32 extractOff = CQ_ExtractPtr (cqcontrol) & (CQ_Size(cq->cq_size)-1); -+ -+ if (extractOff == 0) -+ extractOff = CQ_Size(cq->cq_size) - sizeof (E4_uint64); -+ else -+ extractOff -= sizeof (E4_uint64); -+ -+ return (elan4_sdram_readq (dev, cq->cq_space + extractOff)); -+} -+ -+EXPORT_SYMBOL(elan4_extract_eproc_trap); -+EXPORT_SYMBOL(elan4_display_eproc_trap); -+EXPORT_SYMBOL(elan4_extract_cproc_trap); -+EXPORT_SYMBOL(elan4_display_cproc_trap); -+EXPORT_SYMBOL(elan4_extract_dproc_trap); -+EXPORT_SYMBOL(elan4_display_dproc_trap); -+EXPORT_SYMBOL(elan4_extract_tproc_trap); -+EXPORT_SYMBOL(elan4_display_tproc_trap); -+EXPORT_SYMBOL(elan4_extract_iproc_trap); -+EXPORT_SYMBOL(elan4_inspect_iproc_trap); -+EXPORT_SYMBOL(elan4_display_iproc_trap); -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/user.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/user.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/user.c 2005-07-28 14:52:52.846679040 -0400 -@@ -0,0 +1,3362 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user.c,v 1.68.2.11 2005/03/09 12:00:09 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user.c,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+ -+/* allow this code to compile against an Eagle elanmod */ -+#ifdef __ELANMOD_DEVICE_H -+#define elan_attach_cap(cap,rnum,args,func) elanmod_attach_cap(cap,args,func) -+#define elan_detach_cap(cap,rnum) elanmod_detach_cap(cap) -+#endif -+ -+#define NETERR_MSGS 16 -+ -+int user_p2p_route_options = FIRST_TIMEOUT(3); -+int user_bcast_route_options = FIRST_TIMEOUT(3); -+int user_dproc_retry_count = 15; -+int user_cproc_retry_count = 2; -+ -+int num_fault_save = 30; -+int min_fault_pages = 1; -+int max_fault_pages = 128; -+ -+static int -+user_validate_cap (USER_CTXT *uctx, ELAN_CAPABILITY *cap, unsigned use) -+{ -+ /* Don't allow a user process to attach to system context */ -+ if (ELAN4_SYSTEM_CONTEXT (cap->cap_lowcontext) || ELAN4_SYSTEM_CONTEXT (cap->cap_highcontext)) -+ { -+ PRINTF3 (DBG_DEVICE, DBG_VP,"user_validate_cap: lctx %x hctx %x high %x\n", cap->cap_lowcontext, cap->cap_highcontext, ELAN4_KCOMM_BASE_CONTEXT_NUM); -+ PRINTF0 (DBG_DEVICE, DBG_VP,"user_validate_cap: user process cant attach to system cap\n"); -+ return (EINVAL); -+ } -+ -+ return elanmod_classify_cap(&uctx->uctx_position, cap, use); -+} -+ -+static __inline__ void -+__user_signal_trap (USER_CTXT *uctx) -+{ -+ switch (uctx->uctx_trap_state) -+ { -+ case UCTX_TRAP_IDLE: -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: deliver signal %d to pid %d\n", uctx->uctx_trap_signo, uctx->uctx_trap_pid); -+ -+ if (uctx->uctx_trap_signo) -+ kill_proc (uctx->uctx_trap_pid, uctx->uctx_trap_signo, 1); -+ break; -+ -+ case UCTX_TRAP_SLEEPING: -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: wakeup sleeping trap handler\n"); -+ -+ kcondvar_wakeupone (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ break; -+ } -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+} -+ -+static void -+user_signal_timer (unsigned long arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_timer: state=%d pid=%d signal=%d (now %d start %d)\n", -+ uctx->uctx_trap_state, uctx->uctx_trap_pid, uctx->uctx_trap_signo, jiffies, -+ uctx->uctx_int_start); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ __user_signal_trap (uctx); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+#define MAX_INTS_PER_TICK 50 -+#define MIN_INTS_PER_TICK 20 -+ -+static void -+user_signal_trap (USER_CTXT *uctx) -+{ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: state=%d pid=%d signal=%d%s\n", uctx->uctx_trap_state, -+ uctx->uctx_trap_pid, uctx->uctx_trap_signo, timer_pending(&uctx->uctx_int_timer) ? " (timer-pending)" : ""); -+ -+ uctx->uctx_int_count++; -+ -+ if (timer_pending (&uctx->uctx_int_timer)) -+ return; -+ -+ if (uctx->uctx_int_count > ((int)(jiffies - uctx->uctx_int_start) * MAX_INTS_PER_TICK)) -+ { -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: deferring signal for %d ticks (count %d ticks %d -> %d)\n", -+ uctx->uctx_int_delay + 1, uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start), -+ ((int)(jiffies - uctx->uctx_int_start) * MAX_INTS_PER_TICK)); -+ -+ /* We're interrupting too fast, so defer this signal */ -+ uctx->uctx_int_timer.expires = jiffies + (++uctx->uctx_int_delay); -+ -+ add_timer (&uctx->uctx_int_timer); -+ } -+ else -+ { -+ __user_signal_trap (uctx); -+ -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: check signal for %d ticks (count %d ticks %d -> %d)\n", -+ uctx->uctx_int_delay + 1, uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start), -+ (int)(jiffies - uctx->uctx_int_start) * MIN_INTS_PER_TICK); -+ -+ if (uctx->uctx_int_count < ((int) (jiffies - uctx->uctx_int_start)) * MIN_INTS_PER_TICK) -+ { -+ PRINTF (uctx, DBG_TRAP, "user_signal_trap: reset interrupt throttle (count %d ticks %d)\n", -+ uctx->uctx_int_count, (int) (jiffies - uctx->uctx_int_start)); -+ -+ uctx->uctx_int_start = jiffies; -+ uctx->uctx_int_count = 0; -+ uctx->uctx_int_delay = 0; -+ } -+ } -+} -+ -+static void -+user_neterr_timer (unsigned long arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ uctx->uctx_status |= UCTX_NETERR_TIMER; -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_flush_dma_runqueue (ELAN4_DEV *dev, USER_CTXT *uctx, int qfull) -+{ -+ E4_uint64 qptrs = read_reg64 (dev, DProcLowPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 typeSize = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_typeSize)); -+ -+ if (DMA_Context (typeSize) == uctx->uctx_ctxt.ctxt_num) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_DProcQueueEntry)); -+ -+ PRINTF4 (uctx, DBG_SWAP, "user_flush_dma_runqueue: %016llx %016llx %016llx %016llx\n", qentry.Desc.dma_typeSize, -+ qentry.Desc.dma_cookie, qentry.Desc.dma_vproc, qentry.Desc.dma_srcAddr); -+ PRINTF3 (uctx, DBG_SWAP, " %016llx %016llx %016llx\n", qentry.Desc.dma_dstAddr, -+ qentry.Desc.dma_srcEvent, qentry.Desc.dma_dstEvent); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_dmaQ)) -+ uctx->uctx_status |= UCTX_DPROC_QUEUE_OVERFLOW; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = qentry.Desc; -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ } -+ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+} -+ -+static void -+user_flush_thread_runqueue (ELAN4_DEV *dev, USER_CTXT *uctx, int qfull) -+{ -+ E4_uint64 qptrs = read_reg64 (dev, TProcLowPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_TProcQueueEntry qentry; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 context = elan4_sdram_readq (dev, qfptr + offsetof (E4_TProcQueueEntry, Context)); -+ -+ if (TPROC_Context (context) == uctx->uctx_ctxt.ctxt_num) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_TProcQueueEntry)); -+ -+ PRINTF (uctx, DBG_SWAP, "user_flush_thread_runqueue: %016llx %016llx %016llx %016llx\n", qentry.Regs.Registers[0], -+ qentry.Regs.Registers[1], qentry.Regs.Registers[2], qentry.Regs.Registers[3]); -+ PRINTF (uctx, DBG_SWAP, " %016llx %016llx %016llx\n", -+ qentry.Regs.Registers[4], qentry.Regs.Registers[5], qentry.Regs.Registers[6]); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_threadQ)) -+ uctx->uctx_status |= UCTX_TPROC_QUEUE_OVERFLOW; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = qentry.Regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ } -+ -+ /* change the thread to execute the suspend sequence */ -+ qentry.Regs.Registers[0] = dev->dev_tproc_suspend; -+ qentry.Regs.Registers[1] = dev->dev_tproc_space; -+ qentry.Context = dev->dev_ctxt.ctxt_num; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_TProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_TProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+} -+ -+static void -+user_flush_dmas (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ unsigned long flags; -+ -+ ASSERT ((read_reg32 (dev, InterruptReg) & INT_DProcHalted) != 0); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if ((uctx->uctx_status & (UCTX_SWAPPED_REASONS|UCTX_STOPPED_REASONS)) == 0) -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush_dmas: status %x - no more reasons\n", uctx->uctx_status); -+ -+ uctx->uctx_status &= ~UCTX_STOPPING; -+ -+ user_signal_trap (uctx); -+ } -+ else -+ { -+ user_flush_dma_runqueue (dev, uctx, qfull); -+ -+ uctx->uctx_status = (uctx->uctx_status | UCTX_STOPPED) & ~UCTX_STOPPING; -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_flush_dmas: statux %x - stopped\n", uctx->uctx_status); -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_flush (ELAN4_DEV *dev, void *arg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ struct list_head *entry; -+ unsigned long flags; -+ -+ ASSERT ((read_reg32 (dev, InterruptReg) & (INT_Halted|INT_Discarding)) == (INT_Halted|INT_Discarding)); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if ((uctx->uctx_status & (UCTX_SWAPPED_REASONS|UCTX_STOPPED_REASONS)) == 0) -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: status %x - no more reasons\n", uctx->uctx_status); -+ -+ uctx->uctx_status &= ~UCTX_STOPPING; -+ -+ user_signal_trap (uctx); -+ } -+ else -+ { -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: status %x - flushing context\n", uctx->uctx_status); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_RUNNING) -+ { -+ /* NOTE: since the inserter can still be running we modify the permissions -+ * to zero then when the extractor starts up again it will trap */ -+ PRINTF1 (uctx, DBG_SWAP, "user_flush: stopping cq indx=%d\n", elan4_cq2idx(ucq->ucq_cq)); -+ -+ elan4_updatecq (dev, ucq->ucq_cq, 0, 0); -+ } -+ } -+ -+ user_flush_thread_runqueue (dev, uctx, TPROC_LowRunQueueFull(read_reg64 (dev, TProcStatus))); -+ -+ /* since we can't determine whether the dma run queue is full or empty, we use a dma -+ * halt operation to do the flushing - as the reason for halting the dma processor -+ * will be released when we return, we keep it halted until the flush has completed */ -+ elan4_queue_dma_flushop (dev, &uctx->uctx_dma_flushop, 0); -+ -+ if (uctx->uctx_status & UCTX_EXITING) -+ elan4_flush_icache_halted (&uctx->uctx_ctxt); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_set_filter (USER_CTXT *uctx, E4_uint32 state) -+{ -+ struct list_head *entry; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ list_for_each (entry, &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (entry, USER_CTXT_ENTRY, cent_link); -+ -+ elan4_set_filter (&uctx->uctx_ctxt, cent->cent_cap->cap_mycontext, state); -+ } -+} -+ -+static void -+user_start_nacking (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_start_nacking: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ if (UCTX_NACKING(uctx)) -+ uctx->uctx_status |= reason; -+ else -+ { -+ uctx->uctx_status |= reason; -+ -+ user_set_filter (uctx, E4_FILTER_STATS | E4_FILTER_DISCARD_ALL); -+ } -+} -+ -+static void -+user_stop_nacking (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_stop_nacking: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ uctx->uctx_status &= ~reason; -+ -+ if (! UCTX_NACKING (uctx)) -+ user_set_filter (uctx, E4_FILTER_STATS); -+} -+ -+static void -+user_start_stopping (USER_CTXT *uctx, unsigned reason) -+{ -+ ELAN4_DEV *dev =uctx->uctx_ctxt.ctxt_dev; -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_start_stopping: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ ASSERT (! (uctx->uctx_status & UCTX_STOPPED)); -+ -+ user_start_nacking (uctx, reason); -+ -+ if ((uctx->uctx_status & UCTX_STOPPING) != 0) -+ return; -+ -+ uctx->uctx_status |= UCTX_STOPPING; -+ -+ /* queue the halt operation to remove all threads/dmas/cqs from the run queues */ -+ /* and also flush through the context filter change */ -+ elan4_queue_haltop (dev, &uctx->uctx_haltop); -+} -+ -+static void -+user_stop_stopping (USER_CTXT *uctx, unsigned reason) -+{ -+ PRINTF2 (uctx, DBG_SWAP, "user_stop_stopping: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ user_stop_nacking (uctx, reason); -+ -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ uctx->uctx_status &= ~UCTX_STOPPED; -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_stop_stopping: no more reasons => %x\n", uctx->uctx_status); -+ -+ user_signal_trap (uctx); -+ } -+} -+ -+void -+user_swapout (USER_CTXT *uctx, unsigned reason) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_swapout: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ user_start_nacking (uctx, reason); -+ -+ while (uctx->uctx_status & (UCTX_SWAPPING|UCTX_STOPPING) && /* wait for someone else to finish */ -+ uctx->uctx_trap_count > 0) /* and for trap handlers to notice */ -+ { /* and exit */ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapout: waiting for %d trap handlers to exit/previous swapout\n", uctx->uctx_trap_count); -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_SWAPPED) /* already swapped out */ -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return; -+ } -+ -+ uctx->uctx_status |= (UCTX_SWAPPING|UCTX_STOPPING); /* mark the context as swapping & stopping */ -+ -+ /* queue the halt operation to remove all threads/dmas/cqs from the run queues */ -+ /* and also flush through the context filter change */ -+ elan4_queue_haltop (dev, &uctx->uctx_haltop); -+ -+ while (! (uctx->uctx_status & UCTX_STOPPED)) -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); -+ -+ /* all state has been removed from the elan - we can now "tidy" it up */ -+ -+ PRINTF0 (uctx, DBG_SWAP, "user_swapout: swapped out\n"); -+ -+ uctx->uctx_status = (uctx->uctx_status & ~UCTX_SWAPPING) | UCTX_SWAPPED; -+ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapout: all done - status %x\n", uctx->uctx_status); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+void -+user_swapin (USER_CTXT *uctx, unsigned reason) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ ASSERT (uctx->uctx_status & UCTX_SWAPPED_REASONS); -+ -+ PRINTF2 (uctx, DBG_SWAP, "user_swapin: status %x reason %x\n", uctx->uctx_status, reason); -+ -+ while (uctx->uctx_status & (UCTX_SWAPPING|UCTX_STOPPING)) /* wait until other threads have */ -+ kcondvar_wait (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags); /* completed their swap operation */ -+ -+ ASSERT (uctx->uctx_status & (UCTX_SWAPPED | UCTX_STOPPED)); -+ -+ user_stop_nacking (uctx, reason); -+ -+ if (! (uctx->uctx_status & UCTX_SWAPPED_REASONS)) -+ { -+ uctx->uctx_status &= ~UCTX_SWAPPED; -+ -+ /* no longer swapped out - wakeup anyone sleeping waiting for swapin */ -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ if (! (uctx->uctx_status & UCTX_STOPPED_REASONS)) -+ { -+ uctx->uctx_status &= ~UCTX_STOPPED; -+ user_signal_trap (uctx); -+ } -+ } -+ -+ PRINTF1 (uctx, DBG_SWAP, "user_swapin: all done - status %x\n", uctx->uctx_status); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+void -+user_destroy_callback (void *arg, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) arg; -+ -+ PRINTF (uctx, DBG_VP, "user_destroy_callback: %s\n", map == NULL ? "cap destoyed" : "map destroyed"); -+} -+ -+int -+user_attach (USER_CTXT *uctx, ELAN_CAPABILITY *cap) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CTXT_ENTRY *cent; -+ unsigned long flags; -+ int ctype, res; -+ -+ if ((ctype = user_validate_cap (uctx, cap, ELAN_USER_ATTACH)) < 0) -+ return ctype; -+ -+ if ((ctype == ELAN_CAP_RMS) && (res = elan_attach_cap (cap, dev->dev_devinfo.dev_rail, uctx, user_destroy_callback)) != 0) -+ { -+ /* NOTE: elan_attach_cap returns +ve errnos */ -+ return -res; -+ } -+ -+ KMEM_ALLOC (cent, USER_CTXT_ENTRY *, sizeof (USER_CTXT_ENTRY), 1); -+ if (cent == NULL) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ return -ENOMEM; -+ } -+ -+ KMEM_ALLOC (cent->cent_cap, ELAN_CAPABILITY *, ELAN_CAP_SIZE(cap), 1); -+ if (cent->cent_cap == NULL) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ return -ENOMEM; -+ } -+ -+ memcpy (cent->cent_cap, cap, ELAN_CAP_SIZE(cap)); -+ -+ if ((res = elan4_attach_filter (&uctx->uctx_ctxt, cap->cap_mycontext)) != 0) -+ { -+ if (ctype == ELAN_CAP_RMS) -+ elan_detach_cap (cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent->cent_cap, ELAN_CAP_SIZE (cap)); -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ -+ return res; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_add_tail (¢->cent_link, &uctx->uctx_cent_list); -+ -+ if (! UCTX_NACKING (uctx)) -+ user_set_filter (uctx, E4_FILTER_STATS); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (0); -+ -+} -+ -+void -+user_detach (USER_CTXT *uctx, ELAN_CAPABILITY *cap) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ struct list_head *entry; -+ struct list_head *next; -+ struct list_head list; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&list); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF (uctx, DBG_NETWORK_CTX, cap ? "user_detach: network context %d\n" : "user_detach: all network contexts\n", cap ? cap->cap_mycontext : 0); -+ -+ list_for_each_safe (entry, next, &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (entry, USER_CTXT_ENTRY, cent_link); -+ -+ if (cap == NULL || ELAN_CAP_MATCH (cap, cent->cent_cap)) -+ { -+ PRINTF1 (uctx, DBG_NETWORK_CTX, "user_detach: detach from network context %d\n", cent->cent_cap->cap_mycontext); -+ -+ elan4_detach_filter (&uctx->uctx_ctxt, cent->cent_cap->cap_mycontext); -+ -+ list_del (¢->cent_link); -+ list_add_tail (¢->cent_link, &list); -+ } -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ USER_CTXT_ENTRY *cent = list_entry (list.next, USER_CTXT_ENTRY, cent_link); -+ -+ list_del (¢->cent_link); -+ -+ if (user_validate_cap (uctx, cent->cent_cap, ELAN_USER_DETACH) == ELAN_CAP_RMS) -+ elan_detach_cap (cent->cent_cap, dev->dev_devinfo.dev_rail); -+ -+ KMEM_FREE (cent->cent_cap, ELAN_CAP_SIZE (cent->cent_cap)); -+ KMEM_FREE (cent, sizeof (USER_CTXT_ENTRY)); -+ } -+} -+ -+void -+user_block_inputter (USER_CTXT *uctx, unsigned blocked) -+{ -+ unsigned long flags; -+ int isblocked; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ isblocked = (uctx->uctx_status & UCTX_USER_FILTERING); -+ -+ if (blocked && !isblocked) -+ user_start_nacking (uctx, UCTX_USER_FILTERING); -+ -+ if (!blocked && isblocked) -+ user_stop_nacking (uctx, UCTX_USER_FILTERING); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static USER_VPSEG * -+user_install_vpseg (USER_CTXT *uctx, unsigned process, unsigned entries) -+{ -+ struct list_head *entry; -+ USER_VPSEG *seg; -+ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (process <= (seg->vps_process + seg->vps_entries-1) && -+ (process + entries - 1) >= seg->vps_process) -+ return ((USER_VPSEG *) NULL); -+ } -+ -+ KMEM_ZALLOC (seg, USER_VPSEG *, sizeof (USER_VPSEG), 1); -+ -+ if (seg == (USER_VPSEG *) NULL) -+ return ((USER_VPSEG *) NULL); -+ -+ seg->vps_process = process; -+ seg->vps_entries = entries; -+ -+ list_add_tail (&seg->vps_link, &uctx->uctx_vpseg_list); -+ -+ return (seg); -+} -+ -+static void -+user_remove_vpseg (USER_CTXT *uctx, USER_VPSEG *seg) -+{ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_del (&seg->vps_link); -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+ /* These pointers (union) are only valid for P2P segs */ -+ if (seg->vps_p2p_routes) -+ KMEM_FREE (seg->vps_p2p_routes, sizeof (E4_VirtualProcessEntry) * seg->vps_entries); -+ -+ if (seg->vps_p2p_cap) -+ KMEM_FREE (seg->vps_p2p_cap, ELAN_CAP_SIZE(seg->vps_p2p_cap)); -+ -+ break; -+ -+ case USER_VPSEG_BCAST: -+ ; -+ } -+ -+ KMEM_FREE (seg, sizeof (USER_VPSEG)); -+} -+ -+static USER_VPSEG * -+user_find_vpseg (USER_CTXT *uctx, unsigned low, unsigned high) -+{ -+ struct list_head *entry; -+ -+ ASSERT (kmutex_is_locked (&uctx->uctx_vpseg_lock)); -+ -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ USER_VPSEG *seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (seg->vps_process <= low && (seg->vps_process + seg->vps_entries) > high) -+ return (seg); -+ } -+ -+ return ((USER_VPSEG *) NULL); -+} -+ -+static ELAN_LOCATION -+user_process2location (USER_CTXT *uctx, USER_VPSEG *seg, unsigned process) -+{ -+ ELAN_LOCATION location; -+ int nnodes, nctxs; -+ int nodeOff, ctxOff, vpOff; -+ -+ location.loc_node = ELAN_INVALID_NODE; -+ location.loc_context = -1; -+ -+ if (seg == NULL) -+ seg = user_find_vpseg (uctx, process, process); -+ -+ if (seg == NULL || (seg->vps_type != USER_VPSEG_P2P)) -+ return (location); -+ -+ nnodes = ELAN_CAP_NUM_NODES (seg->vps_p2p_cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (seg->vps_p2p_cap); -+ -+ switch (seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (vpOff++ == (process - seg->vps_process)) -+ { -+ location.loc_node = seg->vps_p2p_cap->cap_lownode + nodeOff; -+ location.loc_context = seg->vps_p2p_cap->cap_lowcontext + ctxOff; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (vpOff++ == (process - seg->vps_process)) -+ { -+ location.loc_node = seg->vps_p2p_cap->cap_lownode + nodeOff; -+ location.loc_context = seg->vps_p2p_cap->cap_lowcontext + ctxOff; -+ goto found; -+ } -+ } -+ } -+ } -+ break; -+ } -+ -+ found: -+ return (location); -+} -+ -+static unsigned -+user_location2process (USER_CTXT *uctx, ELAN_LOCATION location) -+{ -+ unsigned int process = ELAN_INVALID_PROCESS; -+ struct list_head *entry; -+ int nnodes, nctxs; -+ int nodeOff, ctxOff, vpOff; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ list_for_each (entry, &uctx->uctx_vpseg_list) { -+ USER_VPSEG *seg = list_entry (entry, USER_VPSEG, vps_link); -+ -+ if (seg->vps_type != USER_VPSEG_P2P) -+ continue; -+ -+ if (location.loc_node >= seg->vps_p2p_cap->cap_lownode && location.loc_node <= seg->vps_p2p_cap->cap_highnode && -+ location.loc_context >= seg->vps_p2p_cap->cap_lowcontext && location.loc_context <= seg->vps_p2p_cap->cap_highcontext) -+ { -+ nnodes = ELAN_CAP_NUM_NODES (seg->vps_p2p_cap); -+ nctxs = ELAN_CAP_NUM_CONTEXTS (seg->vps_p2p_cap); -+ -+ switch (seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (location.loc_node == seg->vps_p2p_cap->cap_lownode + nodeOff && -+ location.loc_context == seg->vps_p2p_cap->cap_lowcontext + ctxOff) -+ { -+ process = seg->vps_process + vpOff; -+ goto found; -+ } -+ vpOff++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((seg->vps_p2p_cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (seg->vps_p2p_cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (location.loc_node == seg->vps_p2p_cap->cap_lownode + nodeOff && -+ location.loc_context == seg->vps_p2p_cap->cap_lowcontext + ctxOff) -+ { -+ process = seg->vps_process + vpOff; -+ goto found; -+ } -+ vpOff++; -+ } -+ } -+ } -+ break; -+ } -+ } -+ } -+ found: -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (process); -+} -+ -+static void -+user_loadroute_vpseg (USER_CTXT *uctx, USER_VPSEG *seg, ELAN_POSITION *pos) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN_CAPABILITY *cap = seg->vps_p2p_cap; -+ unsigned nnodes = ELAN_CAP_NUM_NODES (cap); -+ unsigned nctxs = ELAN_CAP_NUM_CONTEXTS (cap); -+ E4_VirtualProcessEntry route; -+ unsigned nodeOff; -+ unsigned ctxOff; -+ unsigned vpOff; -+ -+ switch (cap->cap_type & ELAN_CAP_TYPE_MASK) -+ { -+ case ELAN_CAP_TYPE_BLOCK: -+ for (nodeOff = 0, vpOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ for (ctxOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, ctxOff + (nodeOff * nctxs))) -+ { -+ if (seg->vps_p2p_routes != NULL) -+ route = seg->vps_p2p_routes[vpOff]; -+ else if (elan4_generate_route (&uctx->uctx_position, &route, cap->cap_lowcontext + ctxOff, -+ cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, user_p2p_route_options) < 0) -+ { -+ vpOff++; -+ continue; -+ } -+ -+ PRINTF5 (uctx, DBG_VP, "user_loadroute_vpseg: virtual process %d -> node %d context %d [%016llx.%016llx]\n", -+ seg->vps_process + vpOff, cap->cap_lownode + nodeOff, cap->cap_lowcontext + ctxOff, -+ route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process + vpOff, &route); -+ -+ vpOff++; -+ } -+ } -+ } -+ break; -+ -+ case ELAN_CAP_TYPE_CYCLIC: -+ for (ctxOff = 0, vpOff = 0; ctxOff < nctxs; ctxOff++) -+ { -+ for (nodeOff = 0; nodeOff < nnodes; nodeOff++) -+ { -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) || BT_TEST (cap->cap_bitmap, nodeOff + (ctxOff * nnodes))) -+ { -+ if (seg->vps_p2p_routes != NULL) -+ route = seg->vps_p2p_routes[vpOff]; -+ else if (elan4_generate_route (&uctx->uctx_position, &route, cap->cap_lowcontext + ctxOff, -+ cap->cap_lownode + nodeOff, cap->cap_lownode + nodeOff, user_p2p_route_options) < 0) -+ { -+ vpOff++; -+ continue; -+ } -+ -+ PRINTF5 (uctx, DBG_VP, "user_loadroute_vpseg: virtual process %d -> node %d context %d [%016llx.%016llx]\n", -+ seg->vps_process + vpOff, cap->cap_lownode + nodeOff, cap->cap_lowcontext + ctxOff, -+ route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process + vpOff, &route); -+ -+ vpOff++; -+ } -+ } -+ } -+ break; -+ } -+} -+ -+static int -+user_loadroute_bcast (USER_CTXT *uctx, USER_VPSEG *seg) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN_POSITION *pos = &uctx->uctx_position; -+ E4_VirtualProcessEntry route; -+ USER_VPSEG *aseg; -+ int res; -+ ELAN_LOCATION low; -+ ELAN_LOCATION high; -+ -+ if ((aseg = user_find_vpseg (uctx, seg->vps_bcast_lowvp, seg->vps_bcast_highvp)) == NULL || aseg->vps_type != USER_VPSEG_P2P) -+ return (-EINVAL); -+ -+#ifdef use_elanmod -+ if ((res = user_validate_cap (dev, aseg->vps_p2p_cap, ELAN_USER_BROADCAST)) < 0) -+ return (res); -+#endif -+ -+ low = user_process2location (uctx, aseg, seg->vps_bcast_lowvp); -+ high = user_process2location (uctx, aseg, seg->vps_bcast_highvp); -+ -+ if (low.loc_context != high.loc_context) -+ return (-EINVAL); -+ -+ /* NOTE: if loopback can only broadcast to ourself - -+ * if back-to-back can only broadcast to other node */ -+ if ((pos->pos_mode == ELAN_POS_MODE_LOOPBACK && low.loc_node != high.loc_node && low.loc_node != pos->pos_nodeid) || -+ (pos->pos_mode == ELAN_POS_MODE_BACKTOBACK && low.loc_node != high.loc_node && low.loc_node == pos->pos_nodeid)) -+ { -+ return (-EINVAL); -+ } -+ -+ if ((res = elan4_generate_route (pos, &route, low.loc_context, low.loc_node, high.loc_node, user_bcast_route_options)) < 0) -+ return (res); -+ -+ PRINTF (uctx, DBG_VP, "user_loadroute_bcast: virtual process %d -> nodes %d.%d context %d [%016llx.%016llx]\n", -+ seg->vps_process, low.loc_node, high.loc_node, low.loc_context, route.Values[0], route.Values[1]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, seg->vps_process, &route); -+ return (0); -+} -+ -+int -+user_add_p2pvp (USER_CTXT *uctx, unsigned process, ELAN_CAPABILITY *cap) -+{ -+ USER_VPSEG *seg; -+ ELAN_CAPABILITY *ncap; -+ unsigned entries; -+ -+ if ((cap->cap_type & ELAN_CAP_TYPE_NO_BITMAP) == 0) -+ entries = bt_nbits (cap->cap_bitmap , ELAN_CAP_BITMAPSIZE(cap)); -+ else -+ entries = ELAN_CAP_BITMAPSIZE(cap); -+ -+ if ((process + entries) > (E4_VPT_MIN_ENTRIES << uctx->uctx_routetable->tbl_size)) -+ return (-EINVAL); -+ -+ KMEM_ALLOC (ncap, ELAN_CAPABILITY *, ELAN_CAP_SIZE (cap), 1); -+ -+ if (ncap == NULL) -+ return (-ENOMEM); -+ -+ memcpy (ncap, cap, ELAN_CAP_SIZE (cap)); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_install_vpseg (uctx, process, entries)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ seg->vps_type = USER_VPSEG_P2P; -+ seg->vps_p2p_cap = ncap; -+ seg->vps_p2p_routes = NULL; -+ -+ user_loadroute_vpseg (uctx, seg, &uctx->uctx_position); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_add_bcastvp (USER_CTXT *uctx, unsigned process, unsigned lowvp, unsigned highvp) -+{ -+ USER_VPSEG *seg; -+ int res; -+ -+ if (lowvp > highvp || process >= (E4_VPT_MIN_ENTRIES << uctx->uctx_routetable->tbl_size)) -+ return (-EINVAL); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_install_vpseg (uctx, process, 1)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ seg->vps_type = USER_VPSEG_BCAST; -+ seg->vps_bcast_lowvp = lowvp; -+ seg->vps_bcast_highvp = highvp; -+ -+ if ((res = user_loadroute_bcast (uctx, seg)) < 0) -+ user_remove_vpseg (uctx, seg); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (res); -+} -+ -+int -+user_removevp (USER_CTXT *uctx, unsigned process) -+{ -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if (process == ELAN_INVALID_PROCESS) -+ seg = list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link); -+ else -+ seg = user_find_vpseg (uctx, process, process); -+ -+ if (seg == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ do { -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int i; -+ -+ for (i = 0; i < seg->vps_entries; i++) -+ elan4_invalidate_route (dev, uctx->uctx_routetable, seg->vps_process + i); -+ -+ user_remove_vpseg (uctx, seg); -+ -+ } while (process == ELAN_INVALID_PROCESS && (seg = list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link)) != NULL); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_set_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ /* check that the route supplied is valid and goes to the correct place */ -+ location = user_process2location (uctx, seg, process); -+ -+ if (elan4_check_route (&uctx->uctx_position, location, route, 0) != 0) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ if (seg->vps_p2p_routes == NULL) -+ KMEM_ZALLOC (seg->vps_p2p_routes, E4_VirtualProcessEntry *, sizeof (E4_VirtualProcessEntry) * seg->vps_entries, 1); -+ -+ if (seg->vps_p2p_routes == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-ENOMEM); -+ } -+ -+ seg->vps_p2p_routes[process - seg->vps_process].Values[0] = route->Values[0]; -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1] = ROUTE_CTXT_VALUE(location.loc_context) | (route->Values[1] & ~ROUTE_CTXT_MASK); -+ -+ PRINTF (uctx, DBG_ROUTE, "user_set_route: vp=%d -> %016llx%016llx\n", process, -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1], seg->vps_p2p_routes[process - seg->vps_process].Values[0]); -+ -+ elan4_write_route (dev, uctx->uctx_routetable, process, &seg->vps_p2p_routes[process - seg->vps_process]); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_reset_route (USER_CTXT *uctx, unsigned process) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ ELAN_LOCATION location; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ if (seg->vps_p2p_routes != NULL) -+ { -+ seg->vps_p2p_routes[process - seg->vps_process].Values[0] = 0; -+ seg->vps_p2p_routes[process - seg->vps_process].Values[1] = 0; -+ } -+ -+ /* generate the default route to this location */ -+ location = user_process2location (uctx, seg, process); -+ -+ PRINTF (uctx, DBG_ROUTE, "user_reset_route: vp=%d\n", process); -+ -+ if (elan4_generate_route (&uctx->uctx_position, &route, location.loc_context, location.loc_node, location.loc_node, 0) < 0) -+ elan4_invalidate_route (dev, uctx->uctx_routetable, process); -+ else -+ elan4_write_route (dev, uctx->uctx_routetable, process, &route); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (0); -+} -+ -+int -+user_get_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ elan4_read_route (dev, uctx->uctx_routetable, process, route); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (0); -+} -+ -+int -+user_check_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route, unsigned *error) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_VPSEG *seg; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL || seg->vps_type != USER_VPSEG_P2P) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ elan4_read_route (dev, uctx->uctx_routetable, process, route); -+ -+ *error = elan4_check_route (&uctx->uctx_position, user_process2location (uctx, seg, process), route, 0); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (0); -+} -+ -+int -+user_send_neterr_msg (USER_CTXT *uctx, unsigned int vp, unsigned int nctx, unsigned int retries, ELAN4_NETERR_MSG *msg) -+{ -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ unsigned long flags; -+ int res, found = 0; -+ struct list_head *el; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ /* determine the location of the virtual process */ -+ if ((seg = user_find_vpseg (uctx, vp, vp)) == NULL) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: vp=%d has no vpseg\n", vp); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return -EINVAL; -+ } -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+ location = user_process2location (uctx, seg, vp); -+ break; -+ -+ case USER_VPSEG_BCAST: -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: vp=%d is a bcast vp\n", vp); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return -EINVAL; -+ } -+ -+ /* check that we're attached to the network context */ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_for_each (el , &uctx->uctx_cent_list) { -+ USER_CTXT_ENTRY *cent = list_entry (el, USER_CTXT_ENTRY, cent_link); -+ -+ if (cent->cent_cap->cap_mycontext == nctx) -+ found++; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if (! found) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_send_neterr_msg: nctx=%d not attached\n", nctx); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return -EINVAL; -+ } -+ -+ /* Update the fields which the user might have "faked" */ -+ msg->msg_context = location.loc_context; -+ msg->msg_sender.loc_node = uctx->uctx_position.pos_nodeid; -+ msg->msg_sender.loc_context = nctx; -+ -+ res = elan4_neterr_sendmsg (uctx->uctx_ctxt.ctxt_dev, location.loc_node, retries, msg); -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ return (res); -+} -+ -+ -+static int -+user_resolvevp (USER_CTXT *uctx, unsigned process) -+{ -+ int res = 0; -+ USER_VPSEG *seg; -+ ELAN_LOCATION location; -+ E4_VirtualProcessEntry route; -+ -+ PRINTF1 (uctx, DBG_VP, "user_resolvevp: process=%d\n", process); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ -+ if ((seg = user_find_vpseg (uctx, process, process)) == NULL) -+ { -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (-EINVAL); -+ } -+ -+ switch (seg->vps_type) -+ { -+ case USER_VPSEG_P2P: -+#ifdef use_elanmod -+ if ((res = user_validate_cap (uctx, seg->vps_p2p_cap, ELAN_USER_P2P)) != 0) -+ break; -+#endif -+ -+ location = user_process2location (uctx, seg, process); -+ -+ PRINTF (uctx, DBG_VP, "user_resolvevp: vp=%d -> node=%d ctx=%d\n", process, location.loc_node, location.loc_context); -+ -+ if (seg->vps_p2p_routes != NULL && seg->vps_p2p_routes[process - seg->vps_process].Values[0] != 0) -+ route = seg->vps_p2p_routes[process - seg->vps_process]; -+ else if ((res = elan4_generate_route (&uctx->uctx_position, &route, location.loc_context, location.loc_node, location.loc_node, user_p2p_route_options)) < 0) -+ break;; -+ -+ elan4_write_route (uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, process, &route); -+ break; -+ -+ case USER_VPSEG_BCAST: -+ res = user_loadroute_bcast (uctx, seg); -+ break; -+ -+ default: -+ res = -EINVAL; -+ break; -+ } -+ -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ return (res); -+} -+ -+static void -+user_eproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_eprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_EPROC, "user_eproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ elan4_extract_eproc_trap (ctxt->ctxt_dev, status, RING_QUEUE_BACK (uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps), 0); -+ -+ DBGCMD (ctxt, DBG_EPROC, elan4_display_eproc_trap (ctxt, DBG_EPROC, "user_eproc_trap", RING_QUEUE_BACK(uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps))); -+ -+ if (RING_QUEUE_ADD (uctx->uctx_eprocTrapQ)) -+ user_start_stopping (uctx, UCTX_EPROC_QUEUE_FULL); -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ USER_CQ *ucq = NULL; -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2num(ucq->ucq_cq) == cqnum) -+ break; -+ } -+ -+ ASSERT (ucq != NULL); -+ -+ if (ucq->ucq_state != UCQ_RUNNING && CPROC_TrapType (status) == CommandProcInserterError) -+ { -+ PRINTF (ctxt, DBG_TRAP, "user_cproc_trap CommandProcInserterError\n"); -+ ucq->ucq_errored++; -+ } -+ else -+ { -+ ASSERT (ucq->ucq_state == UCQ_RUNNING); -+ -+ elan4_extract_cproc_trap (ctxt->ctxt_dev, status, &ucq->ucq_trap, cqnum); -+ -+ DBGCMD (ctxt, DBG_CPROC, elan4_display_cproc_trap (ctxt, DBG_CPROC, "user_cproc_trap", &ucq->ucq_trap)); -+ -+ ucq->ucq_state = UCQ_TRAPPED; -+ -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_dprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_DPROC, "user_dproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_DPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ ELAN4_DPROC_TRAP *trap = RING_QUEUE_BACK (uctx->uctx_dprocTrapQ, uctx->uctx_dprocTraps); -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, trap, unit); -+ -+ DBGCMD (ctxt, DBG_DPROC, elan4_display_dproc_trap (ctxt, DBG_DPROC, "user_dproc_trap", trap)); -+ -+ if (!DPROC_PrefetcherFault (status) && DPROC_TrapType(status) == DmaProcFailCountError && !RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ { -+ trap->tr_desc.dma_typeSize |= DMA_FailCount (user_dproc_retry_count); -+ -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = trap->tr_desc; -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ } -+ else -+ { -+ if (RING_QUEUE_ADD (uctx->uctx_dprocTrapQ)) -+ user_start_stopping (uctx, UCTX_DPROC_QUEUE_FULL); -+ } -+ } -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_REALLY_FULL (uctx->uctx_tprocTrapQ) || (uctx->uctx_status & UCTX_STOPPED)) -+ { -+ PRINTF (uctx, DBG_TPROC, "user_tproc_trap: %s\n", (uctx->uctx_status & UCTX_STOPPED) ? "context stopped" : "trap queue overflow"); -+ -+ uctx->uctx_status |= UCTX_TPROC_QUEUE_ERROR; -+ } -+ else -+ { -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, RING_QUEUE_BACK (uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps)); -+ -+ DBGCMD (ctxt, DBG_TPROC, elan4_display_tproc_trap (ctxt, DBG_TPROC, "user_tproc_trap", RING_QUEUE_BACK (uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps))); -+ -+ if (RING_QUEUE_ADD (uctx->uctx_tprocTrapQ)) -+ user_start_stopping (uctx, UCTX_TPROC_QUEUE_FULL); -+ } -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ USER_IPROC_TRAP *utrap = &uctx->uctx_iprocTrap[unit & 1]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ ASSERT (utrap->ut_state == UTS_IPROC_RUNNING); -+ -+ elan4_extract_iproc_trap (ctxt->ctxt_dev, status, &utrap->ut_trap, unit); -+ DBGCMD (ctxt, DBG_IPROC, elan4_display_iproc_trap (ctxt, DBG_IPROC, "user_iproc_trap", &utrap->ut_trap)); -+ -+ utrap->ut_state = UTS_IPROC_TRAPPED; -+ -+ user_start_nacking (uctx, unit ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF1 (uctx, DBG_TRAP, "user_interrupt: cookie=%llx\n", cookie); -+ -+ switch (cookie) -+ { -+ case ELAN4_INT_COOKIE_DDCQ: -+ uctx->uctx_ddcq_intr--; -+ -+ user_signal_trap (uctx); -+ break; -+ -+ default: -+ if (uctx->uctx_intcookie_table == NULL || intcookie_fire (uctx->uctx_intcookie_table, cookie) != 0) -+ { -+ PRINTF2 (uctx, DBG_TRAP, "user_interrupt: cookie=%llx %s\n", cookie, uctx->uctx_intcookie_table ? "not found" : "no table"); -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_ERROR; -+ user_signal_trap (uctx); -+ } -+ break; -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+static void -+user_neterrmsg (ELAN4_CTXT *ctxt, ELAN4_NETERR_MSG *msg) -+{ -+ USER_CTXT *uctx = (USER_CTXT *) ctxt; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (! RING_QUEUE_FULL (uctx->uctx_msgQ)) -+ { -+ memcpy (RING_QUEUE_BACK (uctx->uctx_msgQ, uctx->uctx_msgs), msg, sizeof (ELAN4_NETERR_MSG)); -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_msgQ); -+ -+ user_signal_trap (uctx); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+} -+ -+ELAN4_TRAP_OPS user_trap_ops = -+{ -+ user_eproc_trap, -+ user_cproc_trap, -+ user_dproc_trap, -+ user_tproc_trap, -+ user_iproc_trap, -+ user_interrupt, -+ user_neterrmsg, -+}; -+ -+static int -+deliver_trap (ELAN4_USER_TRAP *utrapp, int type, unsigned proc, void *trap, ...) -+{ -+ register int i, len; -+ va_list ap; -+ -+ PRINTF (NULL, DBG_TRAP, "deliver_trap: type=%d proc=%d\n", type, proc); -+ -+ switch (proc) -+ { -+ case UTS_CPROC: len = sizeof (ELAN4_CPROC_TRAP); break; -+ case UTS_DPROC: len = sizeof (ELAN4_DPROC_TRAP); break; -+ case UTS_EPROC: len = sizeof (ELAN4_EPROC_TRAP); break; -+ case UTS_IPROC: len = sizeof (ELAN4_IPROC_TRAP); break; -+ case UTS_TPROC: len = sizeof (ELAN4_TPROC_TRAP); break; -+ case UTS_NETERR_MSG: len = sizeof (ELAN4_NETERR_MSG); break; -+ default: len = 0; break; -+ } -+ -+ if (put_user (type, &utrapp->ut_type) || put_user (proc, &utrapp->ut_proc) || copy_to_user (&utrapp->ut_trap, trap, len)) -+ return (UTS_EFAULT); -+ -+ va_start (ap, trap); -+ for (i = 0; i < sizeof (utrapp->ut_args)/sizeof (utrapp->ut_args[0]); i++) -+ if (put_user (va_arg (ap, unsigned long), &utrapp->ut_args[i])) -+ return (UTS_EFAULT); -+ va_end (ap); -+ -+ return (type); -+} -+ -+static int -+user_pagefault (USER_CTXT *uctx, E4_FaultSave *farea) -+{ -+ E4_Addr addr = farea->FaultAddress; -+ E4_uint32 fsr = FaultSaveFSR(farea->FSRAndFaultContext); -+ FAULT_SAVE *entry; -+ FAULT_SAVE **predp; -+ int count; -+ -+ PRINTF2 (uctx, DBG_FAULT, "user_pagefault: addr=%llx fsr %x\n", (unsigned long long) addr, fsr); -+ -+ if ((fsr & FSR_FaultForBadData) != 0) /* Memory ECC error during walk */ -+ { -+ PRINTF0 (uctx, DBG_FAULT, "user_pagefault: ECC error during walk\n"); -+ return (-EFAULT); -+ } -+ -+ if ((fsr & FSR_FaultForMaxChainCount) != 0) /* Have walked a chain of 1024 items */ -+ { -+ PRINTF0 (uctx, DBG_FAULT, "user_pagefault: pte chain too long\n"); -+ return (-EFAULT); -+ } -+ -+ if (uctx->uctx_num_fault_save) -+ { -+ spin_lock (&uctx->uctx_fault_lock); -+ for( predp = &uctx->uctx_fault_list; (entry = *predp)->next != NULL; predp = &entry->next) -+ { -+ if (entry->addr == (addr & ~((E4_Addr) PAGE_SIZE-1))) -+ break; -+ } -+ -+ *predp = entry->next; -+ entry->next = uctx->uctx_fault_list; -+ uctx->uctx_fault_list = entry; -+ -+ if (entry->addr == (addr & ~((E4_Addr) PAGE_SIZE-1))) -+ { -+ if ((entry->count <<= 1) > max_fault_pages) -+ entry->count = max_fault_pages; -+ } -+ else -+ entry->count = min_fault_pages; -+ -+ entry->addr = (addr & ~((E4_Addr) PAGE_SIZE-1))+(entry->count * PAGE_SIZE); -+ count = entry->count; -+ spin_unlock (&uctx->uctx_fault_lock); -+ -+ if (user_load_range (uctx, addr & ~((E4_Addr) PAGE_SIZE-1), count * PAGESIZE, fsr) == 0) -+ return 0; -+ -+ /* else pre-faulting has failed, try just this page */ -+ } -+ -+ return (user_load_range (uctx, addr & ~((E4_Addr) PAGE_SIZE-1), PAGE_SIZE, fsr)); -+ -+} -+ -+static int -+queue_dma_for_retry (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, E4_DMA *dma) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, UCTX_DPROC_QUEUE_OVERFLOW)); -+ } -+ -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = *dma; -+ -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+queue_thread_for_retry (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, E4_ThreadRegs *regs) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (RING_QUEUE_FULL (uctx->uctx_threadQ)) -+ { -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, UCTX_TPROC_QUEUE_OVERFLOW)); -+ } -+ -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = *regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+fixup_eproc_trap (USER_CTXT *uctx, ELAN4_EPROC_TRAP *trap, int waitevent) -+{ -+ E4_FaultSave *farea = &trap->tr_faultarea; -+ E4_uint32 fsr = FaultSaveFSR(farea->FSRAndFaultContext); -+ E4_uint64 CountAndType; -+ E4_uint64 CopySource; -+ E4_uint64 CopyDest; -+ -+ /* -+ * Event processor can trap as follows : -+ * 1) Event location read (faddr == event location & Event Permission) -+ * 2) Event location write (faddr == event location & Event Permission) -+ * 3) Copy Source read Read Access -+ * 4) Copy/Write dest write other -+ * -+ * NOTE - it is possible to see both 3) and 4) together - but only with physical errors. -+ */ -+ if (AT_Perm(fsr) == AT_PermLocalDataRead || AT_Perm(fsr) == AT_PermLocalDataWrite) -+ { -+ /* -+ * We complete the copy/write by issuing a waitevent 0 of the approriate type. -+ * - NB mask off bottom bits of EventAddr in case of partial setevent -+ */ -+ E4_uint64 EventAddr = trap->tr_eventaddr & ~((E4_uint64) E4_EVENT_ALIGN-1); -+ -+ if (! user_ddcq_check (uctx, 4)) -+ return (0); -+ -+ if ((trap->tr_event.ev_CountAndType & E4_EVENT_COPY_TYPE_MASK) == E4_EVENT_WRITE) -+ { -+ /* case 4) faulted on write word to destination */ -+ -+ CountAndType = trap->tr_event.ev_CountAndType & E4_EVENT_TYPE_MASK; -+ -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: write Event=%llx CountAndType=%llx\n", EventAddr, CountAndType); -+ PRINTF (uctx, DBG_TRAP, " WritePtr=%llx WriteValue=%llx\n", -+ trap->tr_event.ev_WritePtr, trap->tr_event.ev_WriteValue); -+ -+ user_ddcq_waitevent (uctx, EventAddr, CountAndType, trap->tr_event.ev_WritePtr, trap->tr_event.ev_WriteValue); -+ } -+ else -+ { -+ /* case 3) or case 4) faulted on read/write of copy */ -+ if (AT_Perm (fsr) == AT_PermLocalDataRead) -+ { -+ CountAndType = (trap->tr_event.ev_CountAndType & E4_EVENT_DATA_TYPE_MASK) | EPROC_CopySize(trap->tr_status); -+ CopySource = trap->tr_event.ev_CopySource - EVENT_COPY_BLOCK_SIZE; -+ CopyDest = trap->tr_event.ev_CopyDest; -+ } -+ else -+ { -+ CountAndType = ((trap->tr_event.ev_CountAndType & E4_EVENT_DATA_TYPE_MASK) | -+ ((EPROC_CopySize(trap->tr_status) + EVENT_COPY_NDWORDS) & E4_EVENT_COPY_SIZE_MASK)); -+ CopySource = trap->tr_event.ev_CopySource - EVENT_COPY_BLOCK_SIZE; -+ CopyDest = trap->tr_event.ev_CopyDest - EVENT_COPY_BLOCK_SIZE; -+ } -+ -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: copy Event=%llx CountAndType=%llx\n", EventAddr, CountAndType); -+ PRINTF (uctx, DBG_TRAP, " CopySource=%llx CopyDest=%llx\n", CopySource, CopyDest); -+ -+ user_ddcq_waitevent (uctx, EventAddr, CountAndType, CopySource, CopyDest); -+ } -+ } -+ else -+ { -+ E4_uint64 EventAddr = trap->tr_eventaddr & ~((E4_uint64) E4_EVENT_ALIGN-1); -+ E4_uint32 EventCount = trap->tr_eventaddr & (E4_EVENT_ALIGN-1); -+ -+ /* case 1) or 2) - just reissue the event */ -+ if (! waitevent) -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: setevent EventAddr=%llx EventCount=%x\n", EventAddr, EventCount); -+ else -+ { -+ PRINTF (uctx, DBG_TRAP, "fixup_eproc_trap: waitevent Event=%llx CountAndType=%llx\n", EventAddr, trap->tr_event.ev_CountAndType); -+ PRINTF (uctx, DBG_TRAP, " Param[0]=%llx Param[1]=%llx\n", -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ } -+ -+ if (! user_ddcq_check (uctx, waitevent ? 4 : 2)) -+ return (0); -+ -+ if (waitevent) -+ user_ddcq_waitevent (uctx, EventAddr, trap->tr_event.ev_CountAndType, -+ trap->tr_event.ev_Params[0], trap->tr_event.ev_Params[1]); -+ else -+ user_ddcq_seteventn (uctx, EventAddr, EventCount); -+ } -+ -+ return (1); -+} -+ -+ -+static int -+resolve_eproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_EPROC_TRAP *trap) -+{ -+ switch (EPROC_TrapType (trap->tr_status)) -+ { -+ case EventProcNoFault: -+ PRINTF (uctx, DBG_TRAP, "resolve_eproc_trap: EventProcNoFault\n"); -+ -+ return (UTS_FINISHED); -+ -+ case EventProcAddressAlignment: -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_EPROC, trap)); -+ -+ case EventProcMemoryFault: -+ PRINTF (uctx, DBG_TRAP, "resolve_eproc_trap: EventProcMemoryFault @ %llx\n", trap->tr_faultarea.FaultAddress); -+ -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_EPROC, trap)); -+ -+ return (UTS_FINISHED); -+ -+ case EventProcCountWrapError: -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_EPROC, trap)); -+ -+ default: -+ printk ("resolve_eproc_trap: bad trap type %d\n", EPROC_TrapType (trap->tr_status)); -+ BUG(); -+ } -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_cproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, USER_CQ *ucq) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ ELAN4_CPROC_TRAP *trap = &ucq->ucq_trap; -+ E4_uint64 command; -+ int res; -+ int chan; -+ -+ ELAN_LOCATION location; -+ int vp, node; -+ -+ PRINTF2 (uctx, DBG_CPROC, "resolve_cproc_trap: cq %p is trapped - Status %lx\n", ucq, trap->tr_status); -+ -+ switch (CPROC_TrapType (trap->tr_status)) -+ { -+ case CommandProcDmaQueueOverflow: -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcDmaQueueOverflow\n"); -+ /* -+ * XXXX: should wait for the queue to become emptier if we're -+ * responsible for it being very full -+ */ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcInterruptQueueOverflow: -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcInterruptQueueOverflow\n"); -+ /* -+ * XXXX: should wait for the queue to become emptier if we're -+ * responsible for it being very full -+ */ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcWaitTrap: -+ PRINTF0 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcWaitTrap\n"); -+ -+ if ((res = resolve_eproc_trap (uctx, utrapp, &trap->tr_eventtrap)) != UTS_FINISHED) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (res); -+ } -+ -+ if (fixup_eproc_trap (uctx, &trap->tr_eventtrap, 1) == 0) -+ return UTS_RESCHEDULE; -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcMemoryFault: -+ PRINTF1 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcMemoryFault at %llx\n", trap->tr_faultarea.FaultAddress); -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ } -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcRouteFetchFault: -+ command = elan4_trapped_open_command (dev, ucq->ucq_cq); -+ -+ PRINTF1 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcRouteFetchFault to vp %d\n", (int) (command >> 32)); -+ -+ if (user_resolvevp (uctx, (unsigned) (command >> 32)) != 0) -+ { -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_VPROC, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq), (long) (command >> 32))); -+ } -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcFailCountZero: -+ PRINTF0 (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcFailCountZero - reset failcount\n"); -+ -+ /* Update CPROC timeout route statistics */ -+ for (chan = 0; chan <= 1; chan++) -+ { -+ /* Was there a timeout on this channel ? */ -+ if (PackValue(trap->tr_qdesc.CQ_AckBuffers, chan) == PackTimeout) -+ { -+ /* Find the last open command for that channel to extract the relevant vp */ -+ if ((vp = cproc_open_extract_vp(uctx->uctx_ctxt.ctxt_dev, ucq->ucq_cq, chan)) != -1) -+ { -+ E4_VirtualProcessEntry route; -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ location = user_process2location(uctx, NULL, vp); -+ elan4_read_route (uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, vp, &route); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ node = location.loc_node; -+ -+ kmutex_lock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ -+ if ((node >= 0) && (node < uctx->uctx_ctxt.ctxt_dev->dev_position.pos_nodes)) -+ { -+ uctx->uctx_ctxt.ctxt_dev->dev_cproc_timeout[node]++; -+ -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_cproc_timeout_routes, -+ &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ -+ kmutex_unlock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ } -+ } -+ } -+ -+ /* NOTE - we must not modify the ChannelNotCompleted bits - so modify */ -+ /* the restart count with a part-word store */ -+ elan4_updatecq (dev, ucq->ucq_cq, ucq->ucq_cq->cq_perm, user_cproc_retry_count); -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ -+ case CommandProcAddressAlignment: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ case CommandProcPermissionTrap: -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(ucq->ucq_cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 control = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_Control)); -+ -+ PRINTF (uctx, DBG_CPROC, "resolve_cproc_trap: CommandProcPermissionTrap - %s\n", -+ (control & CQ_PermissionMask) != ucq->ucq_cq->cq_perm ? "resume from stop" : "permission denied"); -+ -+ if ((control & CQ_PermissionMask) == ucq->ucq_cq->cq_perm) -+ return (deliver_trap (utrapp, UTS_PERMISSION_DENIED, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ elan4_updatecq (dev, ucq->ucq_cq, ucq->ucq_cq->cq_perm, 0); -+ -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ break; -+ } -+ -+ case CommandProcBadData: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_COMMAND, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ -+ default: -+ ucq->ucq_state = UCQ_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_CPROC, trap, elan4_cq2idx(ucq->ucq_cq))); -+ } -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_dproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_DPROC_TRAP *trap) -+{ -+ ELAN_LOCATION location; -+ int node; -+ E4_VirtualProcessEntry route; -+ -+ if (DPROC_PrefetcherFault (trap->tr_status)) -+ { -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: PrefetcherFault at %llx\n", trap->tr_prefetchFault.FaultAddress); -+ -+ if (user_pagefault (uctx, &trap->tr_prefetchFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_DPROC, trap)); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc)); -+ } -+ -+ switch (DPROC_TrapType (trap->tr_status)) -+ { -+ case DmaProcRouteFetchFault: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcRouteFetchFault vp %d\n", trap->tr_desc.dma_vproc); -+ -+ if (user_resolvevp (uctx, trap->tr_desc.dma_vproc) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_VPROC, UTS_DPROC, trap, trap->tr_desc.dma_vproc)); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* immediate */)); -+ -+ case DmaProcFailCountError: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcFailCountError - vp %d cookie %llx\n", -+ trap->tr_desc.dma_vproc, trap->tr_desc.dma_cookie); -+ -+ trap->tr_desc.dma_typeSize |= DMA_FailCount (user_dproc_retry_count); -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcPacketAckError: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcPacketAckError - %d%s\n", DPROC_PacketAckValue (trap->tr_status), -+ DPROC_PacketTimeout (trap->tr_status) ? " timeout" : ""); -+ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ location = user_process2location(uctx, NULL, trap->tr_desc.dma_vproc); -+ elan4_read_route(uctx->uctx_ctxt.ctxt_dev, uctx->uctx_routetable, trap->tr_desc.dma_vproc, &route); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ node = location.loc_node; -+ -+ /* Update dproc route timeout statistics */ -+ if ((node >= 0) && (node < uctx->uctx_ctxt.ctxt_dev->dev_position.pos_nodes)) -+ { -+ kmutex_lock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ -+ if ((route.Values[0] != 0) || (route.Values[1] != 0)) -+ { -+ if (DPROC_PacketTimeout (trap->tr_status)) -+ { -+ uctx->uctx_ctxt.ctxt_dev->dev_dproc_timeout[node]++; -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_dproc_timeout_routes, -+ &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ else -+ { -+ uctx->uctx_ctxt.ctxt_dev->dev_ack_errors[node]++; -+ elan4_ringbuf_store(&uctx->uctx_ctxt.ctxt_dev->dev_ack_error_routes, -+ &route, uctx->uctx_ctxt.ctxt_dev); -+ } -+ } -+ -+ kmutex_unlock(&uctx->uctx_ctxt.ctxt_dev->dev_lock); -+ } -+ -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcQueueOverflow: -+ PRINTF (uctx, DBG_DPROC, "resolve_dproc_trap: DmaProcQueueOverflow\n"); -+ return (queue_dma_for_retry (uctx, utrapp, &trap->tr_desc /* XXXX - backoff for some time later */)); -+ -+ case DmaProcRunQueueReadFault: -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_DPROC, trap)); -+ -+ default: -+ printk ("resolve_dproc_trap: unknown trap type : %d\n", DPROC_TrapType(trap->tr_status)); -+ BUG(); -+ } -+ return UTS_FINISHED; -+} -+ -+int -+resolve_tproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, ELAN4_TPROC_TRAP *trap) -+{ -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: trap state = %lx\n", trap->tr_state); -+ -+ if (trap->tr_state & TS_TrapForTooManyInstructions) -+ return (deliver_trap (utrapp, UTS_BAD_TRAP, UTS_TPROC, trap)); -+ -+ if (trap->tr_state & TS_Unimplemented) -+ return (deliver_trap (utrapp, UTS_UNIMP_INSTR, UTS_TPROC, trap)); -+ -+ if (trap->tr_state & TS_DataAlignmentError) -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_TPROC, trap)); -+ -+ if ((trap->tr_state & TS_InstAccessException) && user_pagefault (uctx, &trap->tr_instFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_TPROC, trap)); -+ -+ if ((trap->tr_state & TS_DataAccessException) && user_pagefault (uctx, &trap->tr_dataFault) != 0) -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_TPROC, trap)); -+ -+ /* If we're restarting from trap - then just need to re-issue it */ -+ if (trap->tr_pc == uctx->uctx_trestart_addr || (trap->tr_state & TS_TrappedFlag)) -+ { -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: trapped in trap code PC=%llx SP=%llx\n", trap->tr_pc, trap->tr_regs[1]); -+ -+ trap->tr_regs[0] = uctx->uctx_trestart_addr; -+ } -+ else -+ { -+ E4_uint64 *sp = (E4_uint64 *) user_elan2main (uctx, trap->tr_regs[1]); -+ int i, reload; -+ -+ /* need to store the register on the stack see */ -+ /* lib_tproc_trampoline_elan4_thread.S for stack layout */ -+#define TS_STACK_OFF(REG) ((((REG)&7)) - (((REG)>>3)*8) - 8) -+ for (reload = 0, i = 0; i < 64; i++) -+ { -+ if (trap->tr_dirty & ((E4_uint64) 1 << i)) -+ { -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: %%r%d [%016llx] -> %p\n", i, trap->tr_regs[i], &sp[TS_STACK_OFF(i)]); -+ -+ sulonglong ((u64 *) &sp[TS_STACK_OFF(i)], trap->tr_regs[i]); -+ -+ reload |= (1 << (i >> 3)); -+ } -+ } -+#undef TS_STACK_OFF -+ -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: pc %llx npc %llx\n", trap->tr_pc, trap->tr_npc); -+ PRINTF (uctx, DBG_TPROC, "resolve_tproc_trap: CC %x reload %x\n", (int) (trap->tr_state >> TS_XCCshift), reload); -+ -+ trap->tr_regs[0] = uctx->uctx_trestart_addr; -+ trap->tr_regs[2] = trap->tr_pc; -+ trap->tr_regs[3] = trap->tr_npc; -+ trap->tr_regs[4] = (trap->tr_state >> TS_XCCshift) & TS_XCCmask; -+ trap->tr_regs[5] = reload; -+ } -+ -+ return (queue_thread_for_retry (uctx, utrapp, (E4_ThreadRegs *) trap->tr_regs)); -+} -+ -+static int -+resolve_iproc_trap (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int channel) -+{ -+ USER_IPROC_TRAP *utrap = &uctx->uctx_iprocTrap[channel]; -+ ELAN4_IPROC_TRAP *trap = &utrap->ut_trap; -+ unsigned long flags; -+ -+ elan4_inspect_iproc_trap (trap); -+ -+ if (trap->tr_flags & TR_FLAG_TOOMANY_TRANS) -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ ASSERT (trap->tr_trappedTrans >= 0 && trap->tr_trappedTrans < trap->tr_numTransactions); -+ -+ switch (IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)) -+ { -+ case InputMemoryFault: -+ if (user_pagefault (uctx, &trap->tr_faultarea) != 0) -+ { -+ utrap->ut_state = UTS_IPROC_STOPPED; -+ -+ return (deliver_trap (utrapp, UTS_INVALID_ADDR, UTS_IPROC, trap, channel)); -+ } -+ break; -+ -+ case InputDmaQueueOverflow: -+ case InputEventEngineTrapped: -+ /* nothing to do for these 2 - restarting will simulate the transactions */ -+ break; -+ -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ break; -+ -+ case InputCrcErrorAfterPAckOk: -+ PRINTF (DBG_DEVICE, DBG_IPROC, "InputCrcErrorAfterPAckOk: flags %x\n", trap->tr_flags); -+ -+ ASSERT ((trap->tr_flags & TR_FLAG_ACK_SENT) && ((trap->tr_flags & (TR_FLAG_DMA_PACKET|TR_FLAG_BAD_TRANS)) || -+ ((trap->tr_flags & TR_FLAG_EOP_ERROR) && trap->tr_identifyTrans == TR_TRANS_INVALID))); -+ break; -+ -+ case InputDiscardAfterAckOk: -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ case InputAddressAlignment: -+ return (deliver_trap (utrapp, UTS_ALIGNMENT_ERROR, UTS_IPROC, trap, channel)); -+ -+ case InputInvalidTransType: -+ return (deliver_trap (utrapp, UTS_INVALID_TRANS, UTS_IPROC, trap, channel)); -+ -+ default: -+ printk ("resolve_iproc_trap: unknown trap type %d\n", IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)); -+ BUG(); -+ /* NOTREACHED */ -+ } -+ -+ if (! (trap->tr_flags & TR_FLAG_ACK_SENT) || (trap->tr_flags & TR_FLAG_EOP_BAD)) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ } -+ else if ((trap->tr_flags & (TR_FLAG_DMA_PACKET | TR_FLAG_BAD_TRANS)) || ((trap->tr_flags & TR_FLAG_EOP_ERROR) && (trap->tr_identifyTrans == TR_TRANS_INVALID))) -+ { -+ /* -+ * TR_FLAG_DMA_PACKET means a DMA packet has faulted. -+ * -+ * TR_FLAG_BAD_TRANS means we have a transaction with a bad CRC after the transaction -+ * which sent the ack - this implies it's an overlapped ack DMA packet -+ * -+ * TR_FLAG_EOP_ERROR means we've received an EOP reset - if we hadn't seen an identify -+ * transaction then it's a DMA packet. -+ * -+ * To ensure that the DMA processor works correctly the next packet must be NACKed to -+ * cause it to resend this one. -+ */ -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: %s during DMA packet\n", -+ (trap->tr_flags & TR_FLAG_BAD_TRANS) ? "BadTransaction" : (trap->tr_flags & TR_FLAG_EOP_ERROR) ? "EopError" : "trap"); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ if (trap->tr_flags & TR_FLAG_DMA_PACKET) -+ { -+ if (! (trap->tr_flags & TR_FLAG_BAD_TRANS)) -+ utrap->ut_state = UTS_IPROC_EXECUTE_PACKET; -+ else -+ { -+ kcondvar_t waithere; -+ -+ /* We must ensure that the next packet is always nacked, so -+ * we wait here for an output timeout before dropping the -+ * context filter - we just pause here for 4 mS */ -+ kcondvar_init (&waithere); -+ kcondvar_timedwait (&waithere, &uctx->uctx_spinlock, &flags, lbolt + (HZ/250) + 1);; -+ kcondvar_destroy (&waithere); -+ -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ } -+ else -+ { -+ utrap->ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ } -+ else if (trap->tr_flags & TR_FLAG_EOP_ERROR) -+ { -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: EopError with identify\n"); -+ -+ utrap->ut_state = UTS_IPROC_NETWORK_ERROR; -+ } -+ else -+ { -+ PRINTF (uctx, DBG_IPROC, "resolve_iproc_trap: execute packet\n"); -+ -+ utrap->ut_state = UTS_IPROC_EXECUTE_PACKET; -+ } -+ -+ return UTS_FINISHED; -+} -+ -+ -+static int -+resolve_cproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ struct list_head *entry; -+ int res = UTS_FINISHED; -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_TRAPPED) -+ { -+ res = resolve_cproc_trap (uctx, utrapp, ucq); -+ -+ if (res != UTS_FINISHED) -+ break; -+ } -+ -+ if (ucq->ucq_errored) -+ { -+ ucq->ucq_errored = 0; -+ res = deliver_trap (utrapp, UTS_CPROC_ERROR, UTS_CPROC, &ucq->ucq_trap, elan4_cq2idx(ucq->ucq_cq)); -+ break; -+ } -+ } -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ return (res); -+} -+ -+static int -+resolve_eproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_eprocTrapQ)) -+ { -+ ELAN4_EPROC_TRAP trap = *RING_QUEUE_FRONT (uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_eprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_eproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ if (fixup_eproc_trap (uctx, &trap, 0) == 0) -+ { -+ PRINTF (uctx, DBG_EPROC, "resolve_eproc_trap: could not fixup eproc trap - requeue it\n"); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_REALLY_FULL(uctx->uctx_eprocTrapQ)) -+ uctx->uctx_status |= UCTX_EPROC_QUEUE_OVERFLOW; -+ else -+ { -+ *RING_QUEUE_FRONT(uctx->uctx_eprocTrapQ, uctx->uctx_eprocTraps) = trap; -+ -+ (void) RING_QUEUE_ADD_FRONT(uctx->uctx_eprocTrapQ); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return UTS_RESCHEDULE; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_EPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_EPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_dproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_dprocTrapQ)) -+ { -+ ELAN4_DPROC_TRAP trap = *RING_QUEUE_FRONT(uctx->uctx_dprocTrapQ, uctx->uctx_dprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_dprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_dproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_DPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_DPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_tproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ while (! RING_QUEUE_EMPTY (uctx->uctx_tprocTrapQ)) -+ { -+ ELAN4_TPROC_TRAP trap = *RING_QUEUE_FRONT(uctx->uctx_tprocTrapQ, uctx->uctx_tprocTraps); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_tprocTrapQ); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_tproc_trap (uctx, utrapp, &trap)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ -+ if (uctx->uctx_status & UCTX_TPROC_QUEUE_FULL) -+ user_stop_stopping (uctx, UCTX_TPROC_QUEUE_FULL); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_iproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int i, res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ for (i = 0; i < 2; i++) -+ if (uctx->uctx_iprocTrap[i].ut_state == UTS_IPROC_TRAPPED) -+ { -+ uctx->uctx_iprocTrap[i].ut_state = UTS_IPROC_RESOLVING; -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ if ((res = resolve_iproc_trap (uctx, utrapp, i)) != UTS_FINISHED) -+ return (res); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+resolve_all_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ int res; -+ -+ if ((res = resolve_iproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_cproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_eproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_dproc_traps (uctx, utrapp)) != UTS_FINISHED || -+ (res = resolve_tproc_traps (uctx, utrapp)) != UTS_FINISHED) -+ return (res); -+ -+ if (uctx->uctx_status & UCTX_OVERFLOW_REASONS) -+ return (deliver_trap (utrapp, UTS_QUEUE_OVERFLOW, UTS_NOPROC, NULL, uctx->uctx_status)); -+ -+ if (uctx->uctx_status & UCTX_ERROR_REASONS) -+ return (deliver_trap (utrapp, UTS_QUEUE_ERROR, UTS_NOPROC, NULL, uctx->uctx_status)); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+execute_iproc_traps (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ for (i = 0; i < 2; i++) -+ switch (uctx->uctx_iprocTrap[i].ut_state) -+ { -+ case UTS_IPROC_EXECUTE_PACKET: -+ uctx->uctx_iprocTrap[i].ut_state = UTS_IPROC_EXECUTING; -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_EXECUTE_PACKET, UTS_IPROC, &uctx->uctx_iprocTrap[i].ut_trap, i)); -+ -+ case UTS_IPROC_NETWORK_ERROR: -+ spin_unlock_irqrestore(&uctx->uctx_spinlock, flags); -+ -+ return (deliver_trap (utrapp, UTS_NETWORK_ERROR_TRAP, UTS_IPROC, &uctx->uctx_iprocTrap[i].ut_trap, i)); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static int -+progress_neterr (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (! RING_QUEUE_EMPTY (uctx->uctx_msgQ)) -+ { -+ ELAN4_NETERR_MSG msg = *RING_QUEUE_FRONT (uctx->uctx_msgQ, uctx->uctx_msgs); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_msgQ); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return deliver_trap (utrapp, UTS_NETWORK_ERROR_MSG, UTS_NETERR_MSG, &msg, user_location2process (uctx, msg.msg_sender)); -+ } -+ -+ if (uctx->uctx_status & UCTX_NETERR_TIMER) -+ { -+ uctx->uctx_status &= ~UCTX_NETERR_TIMER; -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return deliver_trap (utrapp, UTS_NETWORK_ERROR_TIMER, UTS_NOPROC, NULL); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (UTS_FINISHED); -+} -+ -+static void -+restart_command_queues (USER_CTXT *uctx) -+{ -+ struct list_head *entry; -+ -+ ASSERT (SPINLOCK_HELD (&uctx->uctx_spinlock)); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (ucq->ucq_state == UCQ_NEEDS_RESTART) -+ { -+ ucq->ucq_state = UCQ_RUNNING; -+ -+ elan4_restartcq (uctx->uctx_ctxt.ctxt_dev, ucq->ucq_cq); -+ } -+ } -+} -+ -+static int -+restart_dmas (USER_CTXT *uctx) -+{ -+ PRINTF (uctx, DBG_TRAP, "restart_dmas: back=%d front=%d\n", uctx->uctx_dmaQ.q_back, uctx->uctx_dmaQ.q_front); -+ -+ while (! RING_QUEUE_EMPTY (uctx->uctx_dmaQ)) -+ { -+ if (! user_ddcq_check (uctx, 7)) -+ return (0); -+ -+ user_ddcq_run_dma (uctx, RING_QUEUE_FRONT(uctx->uctx_dmaQ, uctx->uctx_dmas)); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_dmaQ); -+ } -+ -+ return (1); -+} -+ -+static int -+restart_threads (USER_CTXT *uctx) -+{ -+ PRINTF (uctx, DBG_TRAP, "restart_threads: back=%d front=%d\n", uctx->uctx_threadQ.q_back, uctx->uctx_threadQ.q_front); -+ -+ while (! RING_QUEUE_EMPTY (uctx->uctx_threadQ)) -+ { -+ if (! user_ddcq_check (uctx, 7)) -+ return (0); -+ -+ user_ddcq_run_thread (uctx, RING_QUEUE_FRONT(uctx->uctx_threadQ, uctx->uctx_threads)); -+ -+ (void) RING_QUEUE_REMOVE (uctx->uctx_threadQ); -+ } -+ -+ return (1); -+} -+ -+int -+user_resume_eproc_trap (USER_CTXT *uctx, E4_Addr addr) -+{ -+ PRINTF2 (uctx, DBG_RESUME, "user_resume_eproc_trap: addr=%llx -> %s\n", addr, user_ddcq_check(uctx, 2) ? "success" : "EAGAIN"); -+ -+ if (! user_ddcq_check (uctx, 2)) -+ return (-EAGAIN); -+ -+ user_ddcq_setevent (uctx, addr); -+ -+ return (0); -+} -+ -+int -+user_resume_cproc_trap (USER_CTXT *uctx, unsigned indx) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_cproc_trap: indx=%d\n", indx); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2idx(ucq->ucq_cq) == indx && ucq->ucq_state == UCQ_STOPPED && !(ucq->ucq_flags & UCQ_SYSTEM)) -+ { -+ ucq->ucq_state = UCQ_NEEDS_RESTART; -+ -+ user_signal_trap (uctx); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (0); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (-EINVAL); -+} -+ -+int -+user_resume_dproc_trap (USER_CTXT *uctx, E4_DMA *dma) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_FULL (uctx->uctx_dmaQ)) -+ res = -ENOMEM; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_dmaQ, uctx->uctx_dmas) = *dma; -+ (void) RING_QUEUE_ADD (uctx->uctx_dmaQ); -+ -+ user_signal_trap (uctx); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (res); -+} -+ -+int -+user_resume_tproc_trap (USER_CTXT *uctx, E4_ThreadRegs *regs) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (RING_QUEUE_FULL (uctx->uctx_threadQ)) -+ res = -ENOMEM; -+ else -+ { -+ *RING_QUEUE_BACK (uctx->uctx_threadQ, uctx->uctx_threads) = *regs; -+ (void) RING_QUEUE_ADD (uctx->uctx_threadQ); -+ -+ user_signal_trap (uctx); -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (res); -+} -+ -+int -+user_resume_iproc_trap (USER_CTXT *uctx, unsigned channel, unsigned trans, -+ E4_IprocTrapHeader *hdrp, E4_IprocTrapData *datap) -+{ -+ unsigned long flags; -+ int res = 0; -+ -+ if (channel >= 2) -+ return (-EINVAL); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_STOPPED && -+ uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_EXECUTING && -+ uctx->uctx_iprocTrap[channel].ut_state != UTS_IPROC_NETWORK_ERROR) -+ res = -EINVAL; -+ else -+ { -+ ELAN4_IPROC_TRAP *trap = &uctx->uctx_iprocTrap[channel].ut_trap; -+ -+ if (trans < trap->tr_numTransactions) -+ { -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_iproc_trap: trans=%d -> execute\n", trans); -+ -+ uctx->uctx_iprocTrap[channel].ut_state = UTS_IPROC_EXECUTE_PACKET; -+ trap->tr_trappedTrans = trans; -+ trap->tr_transactions[trans] = *hdrp; -+ trap->tr_dataBuffers[trans] = *datap; -+ } -+ else -+ { -+ PRINTF1 (uctx, DBG_RESUME, "user_resume_iproc_trap: trans=%d -> running\n", trans); -+ -+ uctx->uctx_iprocTrap[channel].ut_state = UTS_IPROC_RUNNING; -+ -+ user_stop_nacking (uctx, channel ? UCTX_IPROC_CH0_TRAPPED : UCTX_IPROC_CH1_TRAPPED); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (res); -+} -+ -+int -+__categorise_command (E4_uint64 command, int *cmdSize) -+{ -+ switch (command & 0x3) -+ { -+ case RUN_THREAD_CMD: *cmdSize = 7; break; -+ -+ default: -+ switch (command & 0x7) -+ { -+ case WRITE_DWORD_CMD: *cmdSize = 2; break; -+ case ADD_DWORD_CMD: *cmdSize = 2; break; -+ -+ default: -+ switch (command & 0xF) -+ { -+ case OPEN_STEN_PKT_CMD: -+ *cmdSize = 1; -+ return 1; -+ -+ case COPY64_CMD: *cmdSize = 2; break; -+ case GUARD_CMD: *cmdSize = 1; break; -+ case INTERRUPT_CMD: *cmdSize = 1; break; -+ case RUN_DMA_CMD: *cmdSize = 7; break; -+ -+ default: -+ switch (command & 0x1f) -+ { -+ case SEND_TRANS_CMD: -+ *cmdSize = 2 + (((command >> 16) & TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ return 2; -+ -+ case SET_EVENT_CMD: *cmdSize = 1; break; -+ case SET_EVENTN_CMD: *cmdSize = 2; break; -+ case WAIT_EVENT_CMD: *cmdSize = 4; break; -+ -+ default: -+ switch (command & 0x3f) -+ { -+ case NOP_CMD: *cmdSize = 1; break; -+ case MAKE_EXT_CLEAN_CMD: *cmdSize = 1; break; -+ default: -+ return 3; -+ } -+ break; -+ } -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+int -+__whole_command (sdramaddr_t *commandPtr, sdramaddr_t insertPtr, unsigned int cqSize, unsigned int cmdSize) -+{ -+ /* Move onto next command */ -+ while (cmdSize-- && (*commandPtr) != insertPtr) -+ *commandPtr = ((*commandPtr) & ~(cqSize-1)) | (((*commandPtr) + sizeof (E4_uint64)) & (cqSize-1)); -+ -+ return cmdSize == -1; -+} -+ -+int -+user_neterr_sten (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int found = 0; -+ struct list_head *el; -+ -+ user_swapout (uctx, UCTX_NETERR_FIXUP); -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ list_for_each (el, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (el, USER_CQ, ucq_link); -+ -+ if ((ucq->ucq_cq->cq_perm & CQ_STENEnableBit) != 0) -+ { -+ sdramaddr_t cqdesc = dev->dev_cqaddr + (elan4_cq2num(ucq->ucq_cq) * sizeof (E4_CommandQueueDesc)); -+ E4_uint64 queuePtrs = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_QueuePtrs)); -+ sdramaddr_t insertPtr = (queuePtrs & CQ_PtrMask); -+ sdramaddr_t commandPtr = CQ_CompletedPtr (queuePtrs); -+ unsigned int cqSize = CQ_Size ((queuePtrs >> CQ_SizeShift) & CQ_SizeMask); -+ E4_uint64 openCommand = 0; -+ -+ if (dev->dev_devinfo.dev_revision_id != PCI_REVISION_ID_ELAN4_REVA && (queuePtrs & CQ_RevB_ReorderingQueue)) -+ { -+ E4_uint32 oooMask = elan4_sdram_readl (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_HoldingValue)); -+ -+ for (; (oooMask & 1) != 0; oooMask >>= 1) -+ insertPtr = (insertPtr & ~(cqSize-1)) | ((insertPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ } -+ -+ while (commandPtr != insertPtr) -+ { -+ E4_uint64 command = elan4_sdram_readq (dev, commandPtr); -+ sdramaddr_t identifyPtr; -+ unsigned int cmdSize; -+ -+ switch (__categorise_command (command, &cmdSize)) -+ { -+ case 0: -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 1: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cq=%d OPEN %llx\n", elan4_cq2num (ucq->ucq_cq), command); -+ -+ if ((command >> 32) == vp) -+ openCommand = command; -+ -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ break; -+ -+ case 2: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cq=%d SENDTRANS %llx\n", elan4_cq2num (ucq->ucq_cq), command); -+ -+ if (openCommand == 0) -+ (void) __whole_command (&commandPtr, insertPtr, cqSize, cmdSize); -+ else -+ { -+ switch ((command >> 16) & (TR_OPCODE_MASK | TR_SIZE_MASK)) -+ { -+ case TR_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_REMOTEDMA & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_IDENTIFY/TR_REMOTEDMA\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_SETEVENT_IDENTIFY & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ case TR_INPUT_Q_COMMIT & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_SETEVENT_IDENTIFY/TR_INPUT_Q_COMMIT\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 2*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_ADDWORD & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_ADDWORD\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 3*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ case TR_TESTANDWRITE & (TR_OPCODE_MASK | TR_SIZE_MASK): -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: TR_TESTANDWRITE\n"); -+ identifyPtr = (commandPtr & ~(cqSize-1)) | ((commandPtr + 4*sizeof (E4_uint64)) & (cqSize-1)); -+ break; -+ -+ default: -+ identifyPtr = 0; -+ } -+ -+ if (! __whole_command (&commandPtr, insertPtr, cqSize, cmdSize)) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: not whole command\n"); -+ openCommand = 0; -+ } -+ -+ else if (identifyPtr) -+ { -+ E4_uint64 tcookie = elan4_sdram_readq (dev, identifyPtr); -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cookie=%llx [%llx]\n", tcookie, cookie); -+ -+ if (tcookie == cookie) -+ { -+ unsigned int vchan = (openCommand >> 4) & 0x1f; -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: cookie matches - vchan=%d\n", vchan); -+ -+ if (! waitforeop) -+ { -+ /* Alter the CQ_AckBuffer for this channel to indicate an -+ * ack was received */ -+ E4_uint64 value = elan4_sdram_readq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers)); -+ E4_uint64 nvalue = ((value & ~((E4_uint64)0xf << ((vchan & 0xf) << 2))) | -+ ((E4_uint64) PackOk << ((vchan & 0xf) << 2))); -+ -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: CQ_AckBuffers %llx -> %llx\n", value, nvalue); -+ -+ elan4_sdram_writeq (dev, cqdesc + offsetof (E4_CommandQueueDesc, CQ_AckBuffers), nvalue); -+ pioflush_sdram (dev); -+ } -+ -+ found++; -+ } -+ openCommand = 0; -+ } -+ -+ if ((command >> 16) & TR_LAST_AND_SEND_ACK) -+ openCommand = 0; -+ } -+ break; -+ -+ case 3: -+ PRINTF (uctx, DBG_NETERR, "user_neterr_sten: invalid command %llx\n", command); -+ kmutex_unlock (&uctx->uctx_cqlock); -+ return -EINVAL; -+ } -+ -+ } -+ } -+ } -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ user_swapin (uctx, UCTX_NETERR_FIXUP); -+ -+ return found; -+} -+ -+int -+user_neterr_dma (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop) -+{ -+ unsigned long flags; -+ int found = 0; -+ int idx; -+ -+ user_swapout (uctx, UCTX_NETERR_FIXUP); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ RING_QUEUE_ITERATE (uctx->uctx_dmaQ, idx) { -+ E4_DMA *dma = &uctx->uctx_dmas[idx]; -+ -+ if (dma->dma_vproc == vp && dma->dma_cookie == cookie) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_dma: dmaQ matches %s\n", waitforeop ? "waitforeop" : "remove remoteness"); -+ -+ if (! waitforeop) -+ { -+ dma->dma_dstEvent = 0; -+ dma->dma_typeSize = DMA_ShMemWrite | DMA_Context (dma->dma_typeSize); -+ } -+ found++; -+ } -+ } -+ -+ RING_QUEUE_ITERATE (uctx->uctx_dprocTrapQ, idx) { -+ ELAN4_DPROC_TRAP *trap = &uctx->uctx_dprocTraps[idx]; -+ -+ if (trap->tr_desc.dma_vproc == vp && trap->tr_desc.dma_cookie == cookie) -+ { -+ PRINTF (uctx, DBG_NETERR, "user_neterr_dma: dmaTrapQ matches %s\n", waitforeop ? "waitforeop" : "remove remoteness"); -+ -+ if (! waitforeop) -+ { -+ trap->tr_desc.dma_dstEvent = 0; -+ trap->tr_desc.dma_typeSize = DMA_ShMemWrite | DMA_Context (trap->tr_desc.dma_typeSize); -+ } -+ found++; -+ } -+ } -+ -+ /* The device driver command queue should be empty at this point ! */ -+ if (user_ddcq_flush (uctx) == 0) -+ found = -EAGAIN; -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ /* The device driver command queue should be empty at this point ! */ -+ if (user_ddcq_flush (uctx) == 0) -+ found = -EAGAIN; -+ -+ user_swapin (uctx, UCTX_NETERR_FIXUP); -+ -+ return found; -+} -+ -+int -+user_trap_handler (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int nticks) -+{ -+ unsigned long entered = jiffies; -+ unsigned int need_reenter = 0; -+ unsigned long flags; -+ int res; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ PRINTF1 (uctx, DBG_TRAP, "user_trap_handler: entered state=%d\n", uctx->uctx_trap_state); -+ -+ uctx->uctx_trap_count++; -+ -+ for (;;) -+ { -+ if (uctx->uctx_status & UCTX_SWAPPED_REASONS) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting on swapped reasons\n"); -+ -+ res = UTS_FINISHED; -+ goto no_more_to_do; -+ } -+ -+ if ((long) (jiffies - entered) > HZ) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting for reschedule\n"); -+ res = UTS_RESCHEDULE; -+ goto no_more_to_do; -+ } -+ -+ switch (uctx->uctx_trap_state) -+ { -+ case UCTX_TRAP_ACTIVE: -+ uctx->uctx_trap_state = UCTX_TRAP_SLEEPING; -+ -+ if (nticks == 0 || need_reenter || kcondvar_timedwaitsig (&uctx->uctx_wait, &uctx->uctx_spinlock, &flags, lbolt + nticks) != CV_RET_NORMAL) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: exiting by kcondvar_timedwaitsig\n"); -+ -+ res = UTS_FINISHED; -+ goto no_more_to_do; -+ } -+ -+ /* Have slept above, so resample entered */ -+ entered = jiffies; -+ -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+ continue; -+ -+ case UCTX_TRAP_IDLE: -+ case UCTX_TRAP_SIGNALLED: -+ uctx->uctx_trap_state = UCTX_TRAP_ACTIVE; -+ break; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: resolve traps - state=%d status=%x\n", uctx->uctx_trap_state, uctx->uctx_status); -+ -+ switch ((res = resolve_all_traps (uctx, utrapp))) -+ { -+ case UTS_FINISHED: -+ break; -+ -+ case UTS_RESCHEDULE: -+ need_reenter++; -+ break; -+ -+ default: -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (! user_ddcq_flush (uctx)) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: ddcq not flushed - re-enter\n"); -+ need_reenter++; -+ continue; -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ if ((res = progress_neterr (uctx, utrapp)) != UTS_FINISHED) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ if ((res = execute_iproc_traps (uctx, utrapp)) != UTS_FINISHED) -+ { -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ goto no_more_to_do; -+ } -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: restart items - state=%d status=%x\n", uctx->uctx_trap_state, uctx->uctx_status); -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ restart_command_queues (uctx); -+ -+ if (! restart_threads (uctx) || ! restart_dmas (uctx)) -+ { -+ PRINTF0 (uctx, DBG_TRAP, "user_trap_handler: ddcq full - re-enter\n"); -+ need_reenter++; -+ } -+ } -+ } -+ no_more_to_do: -+ uctx->uctx_trap_state = UCTX_TRAP_IDLE; -+ -+ /* -+ * Always ensure that the command queue is flushed with a flow control -+ * write, so that on the next trap we (hopefully) find it empty and so -+ * can immediately restart the context. Also if we need to be re-enter -+ * the trap handler and don't have an interrupt outstanding, then issue -+ * one now. -+ */ -+ user_ddcq_flush (uctx); -+ if (need_reenter && uctx->uctx_ddcq_intr == 0) -+ { -+ uctx->uctx_ddcq_intr++; -+ user_ddcq_intr (uctx); -+ } -+ -+ if (--uctx->uctx_trap_count == 0 && (uctx->uctx_status & UCTX_SWAPPING)) -+ kcondvar_wakeupall (&uctx->uctx_wait, &uctx->uctx_spinlock); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ PRINTF2 (uctx, DBG_TRAP, "user_trap_handler: finished state=%d res=%d\n", uctx->uctx_trap_state, res); -+ -+ return (res == UTS_EFAULT ? -EFAULT : 0); -+} -+ -+USER_CQ * -+user_alloccq (USER_CTXT *uctx, unsigned cqsize, unsigned perm, unsigned uflags) -+{ -+ USER_CQ *ucq; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (ucq, USER_CQ *, sizeof (USER_CQ), 1); -+ -+ if (ucq == (USER_CQ *) NULL) -+ return ERR_PTR(-ENOMEM); -+ -+ /* NOTE - do not allow the user to create high-priority queues as we only flush through the low-priority run queues */ -+ if ((ucq->ucq_cq = elan4_alloccq (&uctx->uctx_ctxt, cqsize, perm, (uflags & UCQ_REORDER) ? CQ_Reorder : 0)) == NULL) -+ { -+ KMEM_FREE (ucq, sizeof (USER_CQ)); -+ -+ PRINTF2 (uctx, DBG_CQ, "user_alloccq: failed elan4_allocq cqsize %d uflags %x\n", cqsize, uflags); -+ -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ atomic_set (&ucq->ucq_ref, 1); -+ -+ ucq->ucq_state = UCQ_RUNNING; -+ ucq->ucq_flags = uflags; -+ -+ PRINTF3 (uctx, DBG_CQ, "user_alloccq: ucq=%p idx=%d cqnum=%d\n", ucq, elan4_cq2idx (ucq->ucq_cq), elan4_cq2num(ucq->ucq_cq)); -+ -+ /* chain it onto the context */ -+ kmutex_lock (&uctx->uctx_cqlock); -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_add (&ucq->ucq_link, &uctx->uctx_cqlist); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ return (ucq); -+} -+ -+USER_CQ * -+user_findcq (USER_CTXT *uctx, unsigned idx) -+{ -+ struct list_head *entry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_for_each (entry, &uctx->uctx_cqlist) { -+ USER_CQ *ucq = list_entry (entry, USER_CQ, ucq_link); -+ -+ if (elan4_cq2idx(ucq->ucq_cq) == idx) -+ { -+ atomic_inc (&ucq->ucq_ref); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ return (ucq); -+ } -+ } -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return (NULL); -+} -+ -+void -+user_dropcq (USER_CTXT *uctx, USER_CQ *ucq) -+{ -+ unsigned long flags; -+ -+ PRINTF2 (uctx, DBG_CQ, "user_dropcq: ucq=%p ref=%d\n", ucq, atomic_read (&ucq->ucq_ref)); -+ -+ kmutex_lock (&uctx->uctx_cqlock); -+ if (! atomic_dec_and_test (&ucq->ucq_ref)) -+ { -+ kmutex_unlock (&uctx->uctx_cqlock); -+ return; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ list_del (&ucq->ucq_link); -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ kmutex_unlock (&uctx->uctx_cqlock); -+ -+ elan4_freecq (&uctx->uctx_ctxt, ucq->ucq_cq); -+ -+ KMEM_FREE (ucq, sizeof (USER_CQ)); -+} -+ -+int -+user_alloc_trap_queues (USER_CTXT *uctx, unsigned ndproc_traps, unsigned neproc_traps, -+ unsigned ntproc_traps, unsigned nthreads, unsigned ndmas) -+{ -+ ELAN4_DPROC_TRAP *dprocs; -+ ELAN4_EPROC_TRAP *eprocs; -+ ELAN4_TPROC_TRAP *tprocs; -+ E4_DMA *dmas; -+ E4_ThreadRegs *threads; -+ ELAN4_NETERR_MSG *msgs; -+ unsigned long flags; -+ -+ int nmsgs = NETERR_MSGS; -+ -+ /* bounds check the values that have been passed in */ -+ if (ndproc_traps < 2 || ndproc_traps > 10000 || -+ ntproc_traps < 1 || ntproc_traps > 100 || -+ neproc_traps < 6 || neproc_traps > 10000 || -+ nthreads < 2 || nthreads > 10000 || -+ ndmas < 2 || ndmas > 10000) -+ return -EINVAL; -+ -+ if (uctx->uctx_dmas != NULL) -+ return -EBUSY; -+ -+ KMEM_ZALLOC (dprocs, ELAN4_DPROC_TRAP *, ndproc_traps * sizeof (ELAN4_DPROC_TRAP), 1); -+ KMEM_ZALLOC (eprocs, ELAN4_EPROC_TRAP *, neproc_traps * sizeof (ELAN4_EPROC_TRAP), 1); -+ KMEM_ZALLOC (tprocs, ELAN4_TPROC_TRAP *, ntproc_traps * sizeof (ELAN4_TPROC_TRAP), 1); -+ KMEM_ZALLOC (threads, E4_ThreadRegs *, nthreads * sizeof (E4_ThreadRegs), 1); -+ KMEM_ZALLOC (dmas, E4_DMA *, ndmas * sizeof (E4_DMA), 1); -+ KMEM_ZALLOC (msgs, ELAN4_NETERR_MSG *, nmsgs * sizeof (ELAN4_NETERR_MSG), 1); -+ -+ if (dprocs == NULL || eprocs == NULL || tprocs == NULL || dmas == NULL || threads == NULL || msgs == NULL) -+ { -+ if (dprocs != NULL) KMEM_FREE (dprocs, ndproc_traps * sizeof (ELAN4_DPROC_TRAP)); -+ if (eprocs != NULL) KMEM_FREE (eprocs, neproc_traps * sizeof (ELAN4_EPROC_TRAP)); -+ if (tprocs != NULL) KMEM_FREE (tprocs, ntproc_traps * sizeof (ELAN4_TPROC_TRAP)); -+ if (threads != NULL) KMEM_FREE (threads, nthreads * sizeof (E4_ThreadRegs)); -+ if (dmas != NULL) KMEM_FREE (dmas, ndmas * sizeof (E4_DMA)); -+ if (msgs != NULL) KMEM_FREE (msgs, nmsgs * sizeof (ELAN4_NETERR_MSG)); -+ -+ return -ENOMEM; -+ } -+ -+ spin_lock_irqsave (&uctx->uctx_spinlock, flags); -+ -+ uctx->uctx_dprocTraps = dprocs; -+ uctx->uctx_eprocTraps = eprocs; -+ uctx->uctx_tprocTraps = tprocs; -+ uctx->uctx_threads = threads; -+ uctx->uctx_dmas = dmas; -+ uctx->uctx_msgs = msgs; -+ -+ RING_QUEUE_INIT (uctx->uctx_dprocTrapQ, ndproc_traps, 1 /* 1 for 2nd dma */); -+ RING_QUEUE_INIT (uctx->uctx_tprocTrapQ, ntproc_traps, 0); -+ RING_QUEUE_INIT (uctx->uctx_eprocTrapQ, neproc_traps, 5 /* 1 for command, 2 for dma, 2 for inputter */); -+ RING_QUEUE_INIT (uctx->uctx_threadQ, nthreads, 1); -+ RING_QUEUE_INIT (uctx->uctx_dmaQ, ndmas, 1); -+ RING_QUEUE_INIT (uctx->uctx_msgQ, nmsgs, 0); -+ -+ spin_unlock_irqrestore (&uctx->uctx_spinlock, flags); -+ -+ return 0; -+} -+ -+USER_CTXT * -+user_alloc (ELAN4_DEV *dev) -+{ -+ USER_CTXT *uctx; -+ int res; -+ int i; -+ -+ /* Allocate and initialise the context private data */ -+ KMEM_ZALLOC (uctx, USER_CTXT *, sizeof (USER_CTXT), 1); -+ -+ if (uctx == NULL) -+ return ERR_PTR(-ENOMEM); -+ -+ if (elan4_get_position (dev, &uctx->uctx_position) == ELAN_POS_UNKNOWN) -+ { -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-EAGAIN); -+ } -+ -+ if ((res = elan4_insertctxt (dev, &uctx->uctx_ctxt, &user_trap_ops)) != 0) -+ { -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(res); -+ } -+ -+ KMEM_GETPAGES (uctx->uctx_upage, ELAN4_USER_PAGE *, btopr (sizeof (ELAN4_USER_PAGE)), 1); -+ if (uctx->uctx_upage == NULL) -+ { -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ if ((uctx->uctx_trampoline = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE)) == 0) -+ { -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ if ((uctx->uctx_routetable = elan4_alloc_routetable (dev, 4 /* 512 << 4 == 8192 entries */)) == NULL) -+ { -+ elan4_sdram_free (dev, uctx->uctx_trampoline, SDRAM_PAGE_SIZE); -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ elan4_set_routetable (&uctx->uctx_ctxt, uctx->uctx_routetable); -+ -+ /* initialise the trap and swap queues to be really full */ -+ RING_QUEUE_INIT (uctx->uctx_dprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_tprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_eprocTrapQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_threadQ, 0, 1); -+ RING_QUEUE_INIT (uctx->uctx_dmaQ, 0, 1); -+ -+ INIT_LIST_HEAD (&uctx->uctx_cent_list); -+ INIT_LIST_HEAD (&uctx->uctx_vpseg_list); -+ INIT_LIST_HEAD (&uctx->uctx_cqlist); -+ -+ uctx->uctx_haltop.op_function = user_flush; -+ uctx->uctx_haltop.op_arg = uctx; -+ uctx->uctx_haltop.op_mask = INT_Halted|INT_Discarding; -+ -+ uctx->uctx_dma_flushop.op_function = user_flush_dmas; -+ uctx->uctx_dma_flushop.op_arg = uctx; -+ -+ kmutex_init (&uctx->uctx_vpseg_lock); -+ kmutex_init (&uctx->uctx_cqlock); -+ kmutex_init (&uctx->uctx_rgnmutex); -+ -+ spin_lock_init (&uctx->uctx_spinlock); -+ spin_lock_init (&uctx->uctx_rgnlock); -+ spin_lock_init (&uctx->uctx_fault_lock); -+ -+ kcondvar_init (&uctx->uctx_wait); -+ -+ if ((uctx->uctx_ddcq = user_alloccq (uctx, CQ_Size1K, CQ_EnableAllBits, UCQ_SYSTEM)) == NULL) -+ { -+ user_free (uctx); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ uctx->uctx_trap_count = 0; -+ uctx->uctx_trap_state = UCTX_TRAP_IDLE; -+ uctx->uctx_status = 0 /* UCTX_DETACHED | UCTX_SWAPPED | UCTX_STOPPED */; -+ -+ init_timer (&uctx->uctx_int_timer); -+ -+ uctx->uctx_int_timer.function = user_signal_timer; -+ uctx->uctx_int_timer.data = (unsigned long) uctx; -+ uctx->uctx_int_start = jiffies; -+ uctx->uctx_int_count = 0; -+ uctx->uctx_int_delay = 0; -+ -+ init_timer (&uctx->uctx_neterr_timer); -+ uctx->uctx_neterr_timer.function = user_neterr_timer; -+ uctx->uctx_neterr_timer.data = (unsigned long) uctx; -+ -+ uctx->uctx_upage->upage_ddcq_completed = 0; -+ uctx->uctx_ddcq_completed = 0; -+ uctx->uctx_ddcq_insertcnt = 0; -+ -+ uctx->uctx_num_fault_save = num_fault_save; -+ if (uctx->uctx_num_fault_save) -+ { -+ KMEM_ZALLOC (uctx->uctx_faults, FAULT_SAVE *, (sizeof(FAULT_SAVE) * uctx->uctx_num_fault_save), 1); -+ if ( uctx->uctx_faults == NULL) -+ { -+ user_free (uctx); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ for (i = 0; i < uctx->uctx_num_fault_save; i++) -+ uctx->uctx_faults[i].next = (i == (uctx->uctx_num_fault_save-1) ? NULL : &uctx->uctx_faults[i+1]); -+ -+ } -+ uctx->uctx_fault_list = uctx->uctx_faults; -+ -+ return (uctx); -+} -+ -+void -+user_free (USER_CTXT *uctx) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ -+ user_swapout (uctx, UCTX_EXITING); -+ -+ /* Detach from all input contexts */ -+ user_detach (uctx, NULL); -+ -+ /* since we're single threaded here - (called from close()) */ -+ /* we don't need to hold the lock to drop the command queues */ -+ /* since they cannot be mapped into user space */ -+ while (! list_empty (&uctx->uctx_cqlist)) -+ user_dropcq (uctx, list_entry (uctx->uctx_cqlist.next, USER_CQ, ucq_link)); -+ -+ /* Free off all of vpseg_list */ -+ kmutex_lock (&uctx->uctx_vpseg_lock); -+ while (! list_empty (&uctx->uctx_vpseg_list)) -+ user_remove_vpseg(uctx, list_entry (uctx->uctx_vpseg_list.next, USER_VPSEG, vps_link)); -+ kmutex_unlock (&uctx->uctx_vpseg_lock); -+ -+ if (timer_pending (&uctx->uctx_int_timer)) -+ del_timer_sync (&uctx->uctx_int_timer); -+ -+ if (timer_pending (&uctx->uctx_neterr_timer)) -+ del_timer_sync (&uctx->uctx_neterr_timer); -+ -+ if (uctx->uctx_dprocTraps) -+ KMEM_FREE (uctx->uctx_dprocTraps, uctx->uctx_dprocTrapQ.q_size * sizeof (ELAN4_DPROC_TRAP)); -+ if (uctx->uctx_tprocTraps) -+ KMEM_FREE (uctx->uctx_tprocTraps, uctx->uctx_tprocTrapQ.q_size * sizeof (ELAN4_TPROC_TRAP)); -+ if (uctx->uctx_eprocTraps) -+ KMEM_FREE (uctx->uctx_eprocTraps, uctx->uctx_eprocTrapQ.q_size * sizeof (ELAN4_EPROC_TRAP)); -+ if (uctx->uctx_dmas) -+ KMEM_FREE (uctx->uctx_dmas, uctx->uctx_dmaQ.q_size * sizeof (E4_DMA)); -+ if (uctx->uctx_msgs) -+ KMEM_FREE (uctx->uctx_msgs, NETERR_MSGS * sizeof (ELAN4_NETERR_MSG)); -+ if (uctx->uctx_threads) -+ KMEM_FREE (uctx->uctx_threads, uctx->uctx_threadQ.q_size * sizeof (E4_ThreadRegs)); -+ if (uctx->uctx_faults) -+ KMEM_FREE (uctx->uctx_faults, (sizeof(FAULT_SAVE) * uctx->uctx_num_fault_save)); -+ -+ if (uctx->uctx_intcookie_table) -+ intcookie_free_table (uctx->uctx_intcookie_table); -+ -+ elan4_set_routetable (&uctx->uctx_ctxt, NULL); -+ elan4_free_routetable (dev, uctx->uctx_routetable); -+ -+ /* Free off all USER_RGNs */ -+ user_freergns(uctx); -+ -+ elan4_sdram_free (dev, uctx->uctx_trampoline, SDRAM_PAGE_SIZE); -+ -+ /* Clear the PG_Reserved bit before free to avoid a memory leak */ -+ ClearPageReserved(pte_page(*find_pte_kernel((unsigned long) uctx->uctx_upage))); -+ KMEM_FREEPAGES (uctx->uctx_upage, btopr (sizeof (ELAN4_USER_PAGE))); -+ -+ elan4_removectxt (dev, &uctx->uctx_ctxt); -+ -+ kcondvar_destroy (&uctx->uctx_wait); -+ -+ spin_lock_destroy (&uctx->uctx_rgnlock); -+ spin_lock_destroy (&uctx->uctx_spinlock); -+ -+ kmutex_destroy (&uctx->uctx_rgnmutex); -+ kmutex_destroy (&uctx->uctx_cqlock); -+ kmutex_destroy (&uctx->uctx_vpseg_lock); -+ -+ KMEM_FREE (uctx, sizeof (USER_CTXT)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/user_ddcq.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/user_ddcq.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/user_ddcq.c 2005-07-28 14:52:52.847678888 -0400 -@@ -0,0 +1,226 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user_ddcq.c,v 1.15 2004/06/23 11:06:05 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user_ddcq.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#if PAGE_SIZE < CQ_CommandMappingSize -+# define ELAN4_COMMAND_QUEUE_MAPPING PAGE_SIZE -+#else -+# define ELAN4_COMMAND_QUEUE_MAPPING CQ_CommandMappingSize -+#endif -+ -+/* The user device driver command queue is used for re-issuing -+ * trapped items. It is allocated as a 1K command queue, and -+ * we insert command flow writes event 256 words. -+ */ -+#define USER_CTRLFLOW_COUNT 256 -+ -+/* Flow control of the device driver command queue is handled by periodically -+ * inserting dword writes into the command stream. When you need to know -+ * that the queue has been flushed, then you insert an extra contorl flow -+ * write into the command queue. Should the queue not be flushed, but the -+ * trap handler be returning to user space, then it will also insert and -+ * extra interrupt command to ensure that it is re-entered after the queue -+ * has been flushed. -+ * -+ * Note - we account the space for the interrupt command on each control -+ * flow write so that we do not overflow the queue even if we end up -+ * inserting an interrupt for every command flow write. In general only -+ * a single interrupt should get inserted.... -+ */ -+ -+#define user_ddcq_command_write(value,off) do { \ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_write: cmdptr=%x off=%d value=%llx\n", cmdptr, off, value);\ -+ writeq(value, cmdptr + (off << 3)); \ -+} while (0) -+ -+#define user_ddcq_command_space(uctx) \ -+ ((CQ_Size (uctx->uctx_ddcq->ucq_cq->cq_size)>>3) - ((uctx)->uctx_ddcq_insertcnt - (uctx)->uctx_upage->upage_ddcq_completed)) -+ -+#define user_ddcq_command_flow_write(uctx) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+\ -+ (uctx)->uctx_ddcq_completed = ((uctx)->uctx_ddcq_insertcnt += 3);\ -+\ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_command_flow_write: completed=%llx [%llx] addr=%llx\n", (uctx)->uctx_ddcq_completed, \ -+ (uctx)->uctx_upage->upage_ddcq_completed, (uctx)->uctx_upage_addr); \ -+ user_ddcq_command_write (GUARD_CMD | GUARD_ALL_CHANNELS, 0);\ -+ user_ddcq_command_write (WRITE_DWORD_CMD | (uctx)->uctx_upage_addr, 1);\ -+ user_ddcq_command_write ((uctx)->uctx_ddcq_completed, 2);\ -+} while (0) -+ -+#define user_ddcq_command_flow_intr(uctx) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+\ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_command_flow_intr: completed=%llx [%llx] addr=%llx\n", (uctx)->uctx_ddcq_completed, \ -+ (uctx)->uctx_upage->upage_ddcq_completed, (uctx)->uctx_upage_addr); \ -+ user_ddcq_command_write (INTERRUPT_CMD | ELAN4_INT_COOKIE_DDCQ, 3);\ -+} while (0) -+ -+#define user_ddcq_command_prologue(uctx, count) do { \ -+ E4_uint64 iptr = (uctx)->uctx_ddcq_insertcnt; \ -+ ioaddr_t cmdptr = (uctx)->uctx_ddcq->ucq_cq->cq_mapping + ((iptr<<3) & ((ELAN4_COMMAND_QUEUE_MAPPING >> 1)-1));\ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_prologue: iptr=%llx cmdptr=%x\n", iptr, cmdptr); -+ -+#define user_ddcq_command_epilogue(uctx, count, extra) \ -+ (uctx)->uctx_ddcq_insertcnt = iptr + (count);\ -+\ -+ PRINTF(uctx, DBG_DDCQ, "user_ddcq_command_epilogue: iptr=%llx + %x + %x - completed %llx\n", iptr, count, extra, (uctx)->uctx_ddcq_completed);\ -+ if (((iptr) + (count) + (extra)) > ((uctx)->uctx_ddcq_completed + USER_CTRLFLOW_COUNT))\ -+ user_ddcq_command_flow_write(uctx); \ -+} while (0) -+ -+int -+user_ddcq_check (USER_CTXT *uctx, unsigned num) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_check_ddcq: insert=%llx completed=%llx num=%d\n", -+ uctx->uctx_ddcq_insertcnt, uctx->uctx_upage->upage_ddcq_completed, num); -+ -+ /* Ensure that there is enough space for the command we want to issue, -+ * PLUS the guard/writeword for the control flow flush. -+ * PLUS the interrupt command for rescheduling */ -+ if (user_ddcq_command_space (uctx) > (num + 4)) -+ { -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_check: loads of space\n"); -+ -+ return (1); -+ } -+ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_check: not enough space - reschedule\n"); -+ -+ uctx->uctx_trap_state = UCTX_TRAP_SIGNALLED; -+ return (0); -+} -+ -+int -+user_ddcq_flush (USER_CTXT *uctx) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ USER_CQ *ucq = uctx->uctx_ddcq; -+ -+ switch (ucq->ucq_state) -+ { -+ case UCQ_TRAPPED: -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: command queue is trapped\n"); -+ return (0); -+ -+ case UCQ_NEEDS_RESTART: -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: restarting command queue\n"); -+ -+ if (UCTX_RUNNABLE (uctx)) -+ { -+ ucq->ucq_state = UCQ_RUNNING; -+ elan4_restartcq (dev, ucq->ucq_cq); -+ } -+ break; -+ } -+ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_flush: insertcnt=%llx completed=%llx [%llx]\n", -+ uctx->uctx_ddcq_insertcnt, uctx->uctx_ddcq_completed, uctx->uctx_upage->upage_ddcq_completed); -+ -+ if (uctx->uctx_ddcq_completed != uctx->uctx_ddcq_insertcnt) -+ user_ddcq_command_flow_write (uctx); -+ -+ return (uctx->uctx_ddcq_completed == uctx->uctx_upage->upage_ddcq_completed); -+} -+ -+void -+user_ddcq_intr (USER_CTXT *uctx) -+{ -+ user_ddcq_command_flow_intr (uctx); -+} -+ -+void -+user_ddcq_run_dma (USER_CTXT *uctx, E4_DMA *dma) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_run_dma: cookie=%llx vproc=%llx\n", dma->dma_cookie, dma->dma_vproc); -+ -+ user_ddcq_command_prologue(uctx, 7) { -+ -+ user_ddcq_command_write ((dma->dma_typeSize & ~DMA_ContextMask) | RUN_DMA_CMD, 0); -+ user_ddcq_command_write (dma->dma_cookie, 1); -+ user_ddcq_command_write (dma->dma_vproc, 2); -+ user_ddcq_command_write (dma->dma_srcAddr, 3); -+ user_ddcq_command_write (dma->dma_dstAddr, 4); -+ user_ddcq_command_write (dma->dma_srcEvent, 5); -+ user_ddcq_command_write (dma->dma_dstEvent, 6); -+ -+ } user_ddcq_command_epilogue (uctx, 7, 0); -+} -+ -+void -+user_ddcq_run_thread (USER_CTXT *uctx, E4_ThreadRegs *regs) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_run_thread: PC=%llx SP=%llx\n", regs->Registers[0], regs->Registers[1]); -+ -+ user_ddcq_command_prologue(uctx, 7) { -+ -+ user_ddcq_command_write (regs->Registers[0] | RUN_THREAD_CMD, 0); -+ user_ddcq_command_write (regs->Registers[1], 1); -+ user_ddcq_command_write (regs->Registers[2], 2); -+ user_ddcq_command_write (regs->Registers[3], 3); -+ user_ddcq_command_write (regs->Registers[4], 4); -+ user_ddcq_command_write (regs->Registers[5], 5); -+ user_ddcq_command_write (regs->Registers[6], 6); -+ -+ } user_ddcq_command_epilogue (uctx, 7, 0); -+} -+ -+void -+user_ddcq_setevent (USER_CTXT *uctx, E4_Addr addr) -+{ -+ user_ddcq_command_prologue (uctx, 1) { -+ -+ user_ddcq_command_write (SET_EVENT_CMD | addr, 0); -+ -+ } user_ddcq_command_epilogue (uctx, 1, 0); -+} -+ -+void -+user_ddcq_seteventn (USER_CTXT *uctx, E4_Addr addr, E4_uint32 count) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_seteventn: addr=%llx count=%lx\n", addr, count); -+ -+ user_ddcq_command_prologue (uctx, 2) { -+ -+ user_ddcq_command_write (SET_EVENTN_CMD, 0); -+ user_ddcq_command_write (addr | count, 1); -+ -+ } user_ddcq_command_epilogue (uctx, 2, 0); -+} -+ -+void -+user_ddcq_waitevent (USER_CTXT *uctx, E4_Addr addr, E4_uint64 CountAndType, E4_uint64 Param0, E4_uint64 Param1) -+{ -+ PRINTF (uctx, DBG_DDCQ, "user_ddcq_waitevent: addr=%llx CountAndType=%llx Param=%llx,%llx\n", addr, CountAndType, Param0, Param1); -+ -+ user_ddcq_command_prologue (uctx, 4) { -+ -+ user_ddcq_command_write (WAIT_EVENT_CMD | addr, 0); -+ user_ddcq_command_write (CountAndType, 1); -+ user_ddcq_command_write (Param0, 2); -+ user_ddcq_command_write (Param1, 3); -+ -+ } user_ddcq_command_epilogue (uctx, 4, 0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/elan4/user_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/elan4/user_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/elan4/user_Linux.c 2005-07-28 14:52:52.847678888 -0400 -@@ -0,0 +1,377 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user_Linux.c,v 1.25.2.4 2005/01/18 14:36:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user_Linux.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+ -+static int -+user_pteload (USER_CTXT *uctx, E4_Addr addr, physaddr_t phys, int perm) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, phys, perm); -+ -+ /* -+ * On MPSAS we don't allocate a large enough context table, so -+ * if we see an address/context pair which would "alias" because -+ * they differ in unchecked hash bits to a previous pteload, -+ * then we kill the application. -+ */ -+ { -+ unsigned hashval = (E4MMU_SHIFT_ADDR(addr, (dev->dev_pageshift[0]) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(uctx->uctx_ctxt.ctxt_num)); -+ -+ if (dev->dev_rsvd_hashval[0] == 0xFFFFFFFF) -+ dev->dev_rsvd_hashval[0] = hashval & dev->dev_rsvd_hashmask[0]; -+ -+ if ((hashval & dev->dev_rsvd_hashmask[0]) != dev->dev_rsvd_hashval[0]) -+ { -+ printk ("user_pteload: vaddr=%016llx ctxnum=%x -> [%x] overlaps %x - %x [hashidx=%x]\n", (unsigned long long) addr, -+ uctx->uctx_ctxt.ctxt_num, hashval, hashval & dev->dev_rsvd_hashmask[0], dev->dev_rsvd_hashval[0], -+ E4MMU_HASH_INDEX (uctx->uctx_ctxt.ctxt_num, addr, dev->dev_pageshift[0], dev->dev_hashsize[0]-1)); -+ -+ return -EFAULT; -+ } -+ } -+ -+ if ((newpte & (PTE_PciNotLocal | PTE_CommandQueue)) == 0 && -+ ((addr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (phys & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)))) -+ { -+ printk ("user_pteload: vaddr=%016llx incorrectly alias sdram at %lx\n", (unsigned long long) addr, -+ phys ^ pci_resource_start (dev->dev_osdep.pdev, ELAN4_BAR_SDRAM)); -+ return -EFAULT; -+ } -+ -+ if (newpte & PTE_PciNotLocal) -+ PRINTF (uctx, DBG_FAULT, "user_pteload: addr=%llx -> pte=%llx (pci)\n", addr, newpte); -+ else if (newpte & PTE_CommandQueue) -+ PRINTF (uctx, DBG_FAULT, "user_pteload: addr=%llx -> pte=%llx (command)\n", addr, newpte); -+ else -+ PRINTF (uctx, DBG_FAULT, "user_pteload: addr=%llx -> pte=%llx (sdram)\n", addr, newpte); -+ -+ elan4mmu_pteload (&uctx->uctx_ctxt, 0, addr, newpte); -+ -+ return (0); -+} -+ -+int -+user_load_range (USER_CTXT *uctx, E4_Addr eaddr, unsigned long nbytes, E4_uint32 fsr) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ struct mm_struct *mm = current->mm; -+ int writeable = (AT_Perm(fsr) == AT_PermLocalDataWrite || -+ AT_Perm(fsr) == AT_PermRemoteWrite || -+ AT_Perm(fsr) == AT_PermLocalEvent || -+ AT_Perm(fsr) == AT_PermRemoteEvent); -+ struct vm_area_struct *vma; -+ int i, perm; -+ unsigned long len; -+ unsigned long maddr; -+ physaddr_t phys; -+ -+ kmutex_lock (&uctx->uctx_rgnmutex); -+ -+ while (nbytes > 0) -+ { -+ USER_RGN *rgn = user_rgnat_elan (uctx, eaddr); -+ -+ if (rgn == NULL || ELAN4_INCOMPAT_ACCESS (rgn->rgn_perm, AT_Perm (fsr))) -+ { -+ PRINTF (uctx, DBG_FAULT, "user_load_range: eaddr=%llx -> %s\n", eaddr, rgn == NULL ? "no mapping" : "no permission"); -+ -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (rgn == NULL ? -EFAULT : -EPERM); -+ } -+ -+ if (writeable) -+ perm = rgn->rgn_perm; -+/* This is the correct code but it breaks the Eagle libraries (1.6.X) - backed out (addy 24.08.04) -+ else if (AT_Perm(fsr) == AT_PermExecute && (rgn->rgn_perm & PERM_Mask) != PERM_LocExecute) -+*/ -+ else if (AT_Perm(fsr) == AT_PermExecute) -+ perm = PERM_LocRead | (rgn->rgn_perm & ~PERM_Mask); -+ else -+ perm = ELAN4_PERM_READONLY (rgn->rgn_perm & PERM_Mask) | (rgn->rgn_perm & ~PERM_Mask); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: rgn=%p [%llx.%lx.%x]\n", rgn, rgn->rgn_ebase, rgn->rgn_mbase, rgn->rgn_len); -+ -+ len = ((rgn->rgn_ebase + rgn->rgn_len) - eaddr); -+ if (len > nbytes) -+ len = nbytes; -+ nbytes -= len; -+ -+ maddr = rgn->rgn_mbase + (eaddr - rgn->rgn_ebase); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: eaddr=%llx->%llx -> %lx->%lx len=%x perm=%x\n", eaddr, -+ eaddr + len, maddr, maddr + len, len, perm); -+ -+ down_read (&mm->mmap_sem); -+ while (len > 0) -+ { -+ if ((vma = find_vma_intersection (mm, maddr, maddr + PAGE_SIZE)) == NULL || -+ (writeable && !(vma->vm_flags & VM_WRITE))) -+ { -+ PRINTF (DBG_USER, DBG_FAULT, "ctxt_pagefault: %s %lx\n", vma ? "no writeble at" : "no vma for", maddr); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EFAULT); -+ } -+ -+ spin_lock (&mm->page_table_lock); -+ { -+ pte_t *ptep_ptr; -+ pte_t ptep_value; -+ -+ ptep_ptr = find_pte_map (mm, maddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: %lx %s %s\n", maddr, writeable ? "writeable" : "readonly", -+ !ptep_ptr ? "invalid" : pte_none(ptep_value) ? "none " : !pte_present(ptep_value) ? "swapped " : -+ writeable && !pte_write(ptep_value) ? "COW" : "OK"); -+ -+ if (ptep_ptr == NULL || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value)) || !pte_read (ptep_value)) -+ { -+ spin_unlock (&mm->page_table_lock); -+ -+ make_pages_present(maddr, maddr + PAGE_SIZE); -+ -+ spin_lock (&mm->page_table_lock); -+ -+ ptep_ptr = find_pte_map (mm, maddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ if (ptep_ptr == NULL || pte_none(ptep_value) || !pte_present(ptep_value) || (writeable && !pte_write(ptep_value)) || !pte_read (ptep_value)) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EFAULT); -+ } -+ } -+ -+ if (writeable) -+ pte_mkdirty(ptep_value); -+ pte_mkyoung (ptep_value); -+ -+ phys = pte_phys (ptep_value); -+ -+ for (i = 0; i < PAGE_SIZE; i += (1 << dev->dev_pageshift[0])) -+ { -+ if (user_pteload (uctx, eaddr, phys, perm) < 0) -+ { -+ spin_unlock (&mm->page_table_lock); -+ up_read (&mm->mmap_sem); -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ return (-EFAULT); -+ } -+ -+ eaddr += (1 << dev->dev_pageshift[0]); -+ phys += (1 << dev->dev_pageshift[0]); -+ } -+ } -+ spin_unlock (&mm->page_table_lock); -+ -+ maddr += PAGE_SIZE; -+ len -= PAGE_SIZE; -+ } -+ up_read (&mm->mmap_sem); -+ } -+ kmutex_unlock (&uctx->uctx_rgnmutex); -+ -+ PRINTF (uctx, DBG_FAULT, "user_load_range: alldone\n"); -+ -+ return (0); -+} -+ -+void -+user_preload_main (USER_CTXT *uctx, virtaddr_t addr, unsigned long len) -+{ -+ virtaddr_t lim = addr + len - 1; -+ struct vm_area_struct *vma; -+ -+ down_read (¤t->mm->mmap_sem); -+ -+ if ((vma = find_vma (current->mm, addr)) != NULL) -+ { -+ do { -+ unsigned long start = vma->vm_start; -+ unsigned long end = vma->vm_end; -+ -+ if ((start-1) >= lim) -+ break; -+ -+ if (start < addr) start = addr; -+ if ((end-1) > lim) end = lim+1; -+ -+ if (vma->vm_flags & VM_IO) -+ continue; -+ -+ user_unload_main (uctx, start, end - start); -+ -+ make_pages_present (start, end); -+ -+ user_update_main (uctx, current->mm, start, end - start); -+ -+ } while ((vma = find_vma (current->mm, vma->vm_end)) != NULL); -+ } -+ up_read (¤t->mm->mmap_sem); -+} -+ -+static void -+user_update_range (USER_CTXT *uctx, int tbl, struct mm_struct *mm, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, int perm) -+{ -+ ELAN4_DEV *dev = uctx->uctx_ctxt.ctxt_dev; -+ int roperm = ELAN4_PERM_READONLY(perm & PERM_Mask) | (perm & ~PERM_Mask); -+ int nbytes; -+ -+ while (len > 0) -+ { -+ pte_t *ptep_ptr; -+ pte_t ptep_value; -+ -+ ptep_ptr = find_pte_map (mm, maddr); -+ if (ptep_ptr) { -+ ptep_value = *ptep_ptr; -+ pte_unmap(ptep_ptr); -+ } -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_range: %llx (%lx) %s\n", eaddr, maddr, -+ !ptep_ptr ? "invalid" : pte_none(ptep_value) ? "none " : !pte_present(ptep_value) ? "swapped " : -+ !pte_write(ptep_value) ? "RO/COW" : "OK"); -+ -+ if (ptep_ptr && !pte_none(ptep_value) && pte_present(ptep_value) && pte_read (ptep_value)) { -+ physaddr_t phys_value = pte_phys(ptep_value); -+ for (nbytes = 0; nbytes < PAGE_SIZE; nbytes += (1 << dev->dev_pageshift[0])) -+ { -+ user_pteload (uctx, eaddr, phys_value, pte_write (ptep_value) ? perm : roperm); -+ -+ eaddr += (1 << dev->dev_pageshift[0]); -+ phys_value += (1 << dev->dev_pageshift[0]); -+ } -+ } -+ -+ maddr += PAGE_SIZE; -+ len -= PAGE_SIZE; -+ } -+} -+ -+void -+user_update_main (USER_CTXT *uctx, struct mm_struct *mm, virtaddr_t start, unsigned long len) -+{ -+ USER_RGN *rgn; -+ unsigned long ssize; -+ virtaddr_t end = start + len - 1; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: start=%lx end=%lx\n", start, end); -+ -+ for (rgn = user_findrgn_main (uctx, start, 0); rgn != NULL; rgn = rgn->rgn_mnext) -+ { -+ if (end < rgn->rgn_mbase) -+ break; -+ -+ if (start <= rgn->rgn_mbase && end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: whole %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len - 1); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, rgn->rgn_mbase, rgn->rgn_ebase, rgn->rgn_len, rgn->rgn_perm); -+ } -+ else if (start <= rgn->rgn_mbase) -+ { -+ ssize = end - rgn->rgn_mbase + 1; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: start %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + ssize); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, rgn->rgn_mbase, rgn->rgn_ebase, ssize, rgn->rgn_perm); -+ } -+ else if (end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ ssize = (rgn->rgn_mbase + rgn->rgn_len) - start; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: end %lx -> %lx\n", start, start + ssize); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, start, rgn->rgn_ebase + (start - rgn->rgn_mbase), ssize, rgn->rgn_perm); -+ } -+ else -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_update_main: middle %lx -> %lx\n", start, end); -+ -+ user_update_range (uctx, 0 /* tbl */, mm, start, rgn->rgn_ebase + (start - rgn->rgn_mbase), len, rgn->rgn_perm); -+ } -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+} -+ -+void -+user_unload_main (USER_CTXT *uctx, virtaddr_t start, unsigned long len) -+{ -+ USER_RGN *rgn; -+ unsigned long ssize; -+ virtaddr_t end = start + len - 1; -+ -+ spin_lock (&uctx->uctx_rgnlock); -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: start=%lx end=%lx\n", start, end); -+ -+ for (rgn = user_findrgn_main (uctx, start, 0); rgn != NULL; rgn = rgn->rgn_mnext) -+ { -+ if (end < rgn->rgn_mbase) -+ break; -+ -+ if (start <= rgn->rgn_mbase && end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: whole %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + rgn->rgn_len - 1); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase, rgn->rgn_len); -+ } -+ else if (start <= rgn->rgn_mbase) -+ { -+ ssize = end - rgn->rgn_mbase + 1; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: start %lx -> %lx\n", rgn->rgn_mbase, rgn->rgn_mbase + ssize); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase, ssize); -+ } -+ else if (end >= (rgn->rgn_mbase + rgn->rgn_len - 1)) -+ { -+ ssize = (rgn->rgn_mbase + rgn->rgn_len) - start; -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: end %lx -> %lx\n", start, start + ssize); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase + (start - rgn->rgn_mbase), ssize); -+ } -+ else -+ { -+ -+ PRINTF (uctx, DBG_IOPROC, "user_unload_main: middle %lx -> %lx\n", start, end); -+ -+ elan4mmu_unload_range (&uctx->uctx_ctxt, 0 /* tbl */, rgn->rgn_ebase + (start - rgn->rgn_mbase), len); -+ } -+ } -+ spin_unlock (&uctx->uctx_rgnlock); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/asm_elan4_thread.S -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/asm_elan4_thread.S 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/asm_elan4_thread.S 2005-07-28 14:52:52.848678736 -0400 -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: asm_elan4_thread.S,v 1.1 2003/09/23 13:55:11 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/asm_elan4_thread.S,v $*/ -+ -+#include -+#include -+ -+/* -+ * c_reschedule (E4_uint64 *commandport) -+ */ -+ .global c_reschedule -+c_reschedule: -+ add %sp, -128, %sp -+ st64 %r16, [%sp] // preserve call preserved registers -+ st64 %r24, [%sp + 64] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ mov NOP_CMD, %r23 // "nop" command -+ st64suspend %r16, [%r8] -+3: ld64 [%sp], %r16 -+ ld64 [%sp + 64], %r24 // restore call preserved register -+ jmpl %r2+8, %r0 // and return -+ add %sp, 128, %sp -+ -+ -+/* -+ * c_waitevent (E4_uint64 *commandport, E4_Event *event, E4_uint64 count) -+ */ -+ .global c_waitevent -+c_waitevent: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ st32 %r16, [%sp] // event source block -+ mov MAKE_EXT_CLEAN_CMD, %r23 // "flush command queue desc" command -+ st8 %r23, [%sp+56] // event source block -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r23,%r23 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ -+ or %r9, WAIT_EVENT_CMD, %r16 -+ sll8 %r10, 32, %r17 -+ or %r17, E4_EVENT_TYPE_VALUE(E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), %r17 -+ mov %sp, %r18 -+ mov %r8, %r19 -+ -+ st32suspend %r16, [%r8] -+ -+3: ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r2+8, %r0 // and return -+ add %sp, 192, %sp -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/assym_elan4.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/assym_elan4.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/assym_elan4.h 2005-07-28 14:52:52.848678736 -0400 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: genassym_elan4.c,v 1.3 2004/04/25 11:26:07 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/genassym_elan4.c,v $*/ -+ -+/* Generated by genassym_elan4 - do not modify */ -+ -+#define EP4_RCVR_THREAD_STALL 0 -+#define EP4_RCVR_PENDING_TAILP 128 -+#define EP4_RCVR_PENDING_HEAD 136 -+#define EP4_RCVR_DEBUG 176 -+#define EP4_RXD_NEXT 664 -+#define EP4_RXD_QUEUED 728 -+#define EP4_RXD_DEBUG 944 -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/cm.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/cm.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/cm.c 2005-07-28 14:52:52.853677976 -0400 -@@ -0,0 +1,3000 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cm.c,v 1.83.2.6 2005/01/13 12:37:57 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "debug.h" -+#include "cm.h" -+#include -+ -+#include -+ -+#if defined(LINUX) -+#include "conf_linux.h" -+#endif -+ -+int BranchingRatios[CM_MAX_LEVELS]; -+ -+int MachineId = -1; -+int BrokenLevel = -1; /* Simulates Broken Network */ -+int RejoinCheck = 1; -+int RejoinPanic = 0; -+ -+static int -+SegmentNo (CM_RAIL *cmRail, u_int nodeid, u_int lvl) -+{ -+ int i; -+ -+ ASSERT (lvl < cmRail->NumLevels); -+ -+ for (i = 0; i < lvl; i++) -+ nodeid /= cmRail->Levels[i].NumSegs; -+ -+ return (nodeid % cmRail->Levels[lvl].NumSegs); -+} -+ -+static int -+ClusterIds (CM_RAIL *cmRail, int clvl, int *clmin, int *clmax) -+{ -+ int clid = cmRail->Rail->Position.pos_nodeid - cmRail->Levels[clvl].MinNodeId; -+ -+ if (clvl == 0) -+ *clmin = *clmax = clid; -+ else -+ { -+ *clmin = cmRail->Levels[clvl - 1].MinNodeId - cmRail->Levels[clvl].MinNodeId; -+ *clmax = *clmin + cmRail->Levels[clvl - 1].NumNodes - 1; -+ } -+ return (clid); -+} -+ -+#if defined(PER_CPU_TIMEOUT) -+static void -+__Schedule_Discovery (CM_RAIL *cmRail) /* we urgently need to schedule discovery */ -+{ -+ cmRail->NextDiscoverTime = lbolt; -+ -+ if (cmRail->NextRunTime == 0 || AFTER (cmRail->NextRunTime, cmRail->NextDiscoverTime)) -+ cmRail->NextRunTime = cmRail->NextDiscoverTime; -+} -+ -+static void -+__Schedule_Heartbeat (CM_RAIL *cmRail) -+{ -+ cmRail->NextHeartbeatTime = lbolt; -+ -+ if (cmRail->NextRunTime == 0 || AFTER (cmRail->NextRunTime, cmRail->NextHeartbeatTime)) -+ cmRail->NextRunTime = cmRail->NextHeartbeatTime; -+} -+#else -+ -+static void -+__Schedule_Timer (CM_RAIL *cmRail, long tick) -+{ -+ if (! timer_pending (&cmRail->HeartbeatTimer) || AFTER (cmRail->NextRunTime, tick)) -+ { -+ cmRail->NextRunTime = tick; -+ -+ mod_timer (&cmRail->HeartbeatTimer, tick); -+ } -+} -+ -+static void -+__Schedule_Discovery (CM_RAIL *cmRail) /* we urgently need to schedule discovery */ -+{ -+ __Schedule_Timer (cmRail, cmRail->NextDiscoverTime = lbolt); -+} -+ -+static void -+__Schedule_Heartbeat (CM_RAIL *cmRail) -+{ -+ __Schedule_Timer (cmRail, cmRail->NextHeartbeatTime = lbolt); -+} -+#endif -+ -+static int -+MsgBusy (CM_RAIL *cmRail, int msgNumber) -+{ -+ switch (ep_outputq_state (cmRail->Rail, cmRail->MsgQueue, msgNumber)) -+ { -+ case EP_OUTPUTQ_BUSY: /* still busy */ -+ return 1; -+ -+ case EP_OUTPUTQ_FAILED: /* NACKed */ -+ { -+#if defined(DEBUG_PRINTF) -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, msgNumber); -+ uint8_t type = msg->Hdr.Type; -+ uint16_t nmaps = msg->Hdr.NumMaps; -+ int16_t off = msg->Payload.Statemaps[CM_MSG_MAP(0)].offset; -+ -+ CPRINTF4 (((type == CM_MSG_TYPE_DISCOVER_LEADER) || (type == CM_MSG_TYPE_DISCOVER_SUBORDINATE)) ? 6 : 3, /* we expect broadcasts to be NACKed */ -+ "%s: msg %d type %d failed%s\n", cmRail->Rail->Name, msgNumber, type, -+ (type != CM_MSG_TYPE_HEARTBEAT) ? "" : nmaps == 0 ? ": null heartbeat" : -+ off == STATEMAP_RESET ? ": heartbeat with R statemaps" : ": heartbeat with statemaps"); -+#endif -+ return 0; -+ } -+ -+ case EP_OUTPUTQ_FINISHED: -+ return 0; -+ -+ default: -+ panic ("MsgBusy - bad return code from ep_outputq_state\n"); -+ /* NOTREACHED */ -+ } -+ return 0; -+} -+ -+static void -+LaunchMessage (CM_RAIL *cmRail, int msgNumber, int vp, int qnum, int retries, int type, int lvl, int nmaps) -+{ -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, msgNumber); -+ CM_HDR *hdr = &msg->Hdr; -+ -+ ASSERT (nmaps >= 0 && nmaps <= CM_MSG_MAXMAPS); -+ ASSERT (SPINLOCK_HELD (&cmRail->Lock)); -+ -+ hdr->Version = CM_MSG_VERSION; -+ hdr->ParamHash = cmRail->ParamHash; -+ hdr->Timestamp = cmRail->Timestamp; -+ hdr->Checksum = 0; -+ hdr->NodeId = cmRail->Rail->Position.pos_nodeid; -+ hdr->MachineId = MachineId; -+ hdr->NumMaps = nmaps; -+ hdr->Level = lvl; -+ hdr->Type = type; -+ hdr->Checksum = CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)); -+ -+ if (BrokenLevel != -1 && (lvl >= ((BrokenLevel >> (cmRail->Rail->Number*4)) & 0xf))) /* Simulate broken network? */ -+ return; -+ -+ if (ep_outputq_send (cmRail->Rail, cmRail->MsgQueue, msgNumber, -+ CM_MSG_SIZE(nmaps), vp, qnum, retries)); -+ IncrStat (cmRail, LaunchMessageFail); -+} -+ -+static int -+SendMessage (CM_RAIL *cmRail, int nodeId, int lvl, int type) -+{ -+ int msgNumber = CM_NUM_NODE_MSG_BUFFERS + cmRail->NextSpareMsg; -+ int n = CM_NUM_SPARE_MSG_BUFFERS; -+ int retries; -+ -+ ASSERT (type == CM_MSG_TYPE_IMCOMING || /* other types must use SendToSgmt */ -+ type == CM_MSG_TYPE_REJOIN); -+ -+ while (n-- > 0 && MsgBusy (cmRail, msgNumber)) /* search for idle "spare" buffer */ -+ { -+ if (++(cmRail->NextSpareMsg) == CM_NUM_SPARE_MSG_BUFFERS) -+ cmRail->NextSpareMsg = 0; -+ -+ msgNumber = CM_NUM_NODE_MSG_BUFFERS + cmRail->NextSpareMsg; -+ } -+ -+ if (n == 0) /* all "spare" message buffers busy */ -+ { -+ CPRINTF3 (3, "%s: all spare message buffers busy: trying to send type %d to %d\n", -+ cmRail->Rail->Name, type, nodeId); -+ return (0); -+ } -+ -+ /* NB IMCOMING may be echoed by MANY nodes, so we don't (and musn't) have any retries */ -+ retries = (type == CM_MSG_TYPE_IMCOMING) ? 0 : CM_P2P_DMA_RETRIES; -+ -+ LaunchMessage (cmRail, msgNumber, EP_VP_NODE (nodeId), EP_SYSTEMQ_INTR, /* eager receive */ -+ retries, type, lvl, 0); -+ -+ if (++(cmRail->NextSpareMsg) == CM_NUM_SPARE_MSG_BUFFERS) /* check this one last next time */ -+ cmRail->NextSpareMsg = 0; -+ -+ return (1); -+} -+ -+static int -+SendToSgmt (CM_RAIL *cmRail, CM_SGMT *sgmt, int type) -+{ -+ bitmap_t seg; -+ int offset; -+ int nmaps; -+ int sidx; -+ int clvl; -+ -+ ASSERT (sgmt->Level <= cmRail->TopLevel); -+ -+ if (MsgBusy (cmRail, sgmt->MsgNumber)) /* previous message still busy */ -+ { -+ CPRINTF3 (3, "%s: node message buffer busy: trying to send type %d to %d\n", -+ cmRail->Rail->Name, type, sgmt->NodeId); -+ -+ return (0); -+ } -+ -+ switch (type) -+ { -+ case CM_MSG_TYPE_RESOLVE_LEADER: -+ case CM_MSG_TYPE_DISCOVER_LEADER: -+ ASSERT (sgmt->State == CM_SGMT_ABSENT); -+ ASSERT (sgmt->Level == ((cmRail->Role == CM_ROLE_LEADER_CANDIDATE) ? cmRail->TopLevel : cmRail->TopLevel - 1)); -+ ASSERT (sgmt->Level < cmRail->NumLevels); -+ ASSERT (sgmt->Sgmt == cmRail->Levels[sgmt->Level].MySgmt); -+ -+ /* broadcast to me and all my peers at this level (== my segment in the level above) */ -+ sidx = (sgmt->Level == cmRail->NumLevels - 1) ? 0 : cmRail->Levels[sgmt->Level + 1].MySgmt; -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_BCAST (sgmt->Level + 1, sidx), -+ EP_SYSTEMQ_INTR, 0, /* eager rx; no retries */ -+ type, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_DISCOVER_SUBORDINATE: -+ ASSERT (sgmt->Sgmt != cmRail->Levels[sgmt->Level].MySgmt); -+ ASSERT (sgmt->State == CM_SGMT_WAITING); -+ ASSERT (sgmt->Level > 0); /* broadcasting just to subtree */ -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_BCAST (sgmt->Level, sgmt->Sgmt), -+ EP_SYSTEMQ_INTR, 0, /* eager rx; no retries */ -+ CM_MSG_TYPE_DISCOVER_SUBORDINATE, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_NOTIFY: -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_NODE (sgmt->NodeId), -+ EP_SYSTEMQ_INTR, CM_P2P_DMA_RETRIES, /* eager rx; lots of retries */ -+ CM_MSG_TYPE_NOTIFY, sgmt->Level, 0); -+ return (1); -+ -+ case CM_MSG_TYPE_HEARTBEAT: -+ { -+ CM_MSG *msg = ep_outputq_msg (cmRail->Rail, cmRail->MsgQueue, sgmt->MsgNumber); -+ CM_HDR *hdr = &msg->Hdr; -+ -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ -+ hdr->AckSeq = sgmt->AckSeq; -+ -+ if (!sgmt->MsgAcked) /* Current message not acknowledged */ -+ { -+ /* must have been something significant to require an ack */ -+ ASSERT (sgmt->SendMaps); -+ ASSERT (sgmt->NumMaps > 0); -+ -+ CPRINTF3 (3, "%s: retrying heartbeat to %d (%d entries)\n", cmRail->Rail->Name, sgmt->NodeId, sgmt->NumMaps); -+ -+ IncrStat (cmRail, RetryHeartbeat); -+ -+ nmaps = sgmt->NumMaps; -+ } -+ else -+ { -+ nmaps = 0; -+ -+ if (sgmt->SendMaps) /* can send maps */ -+ { -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (!sgmt->Maps[clvl].OutputMapValid) -+ continue; -+ -+ while ((offset = statemap_findchange (sgmt->Maps[clvl].OutputMap, &seg, 1)) >= 0) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(nmaps)]; -+ -+ sgmt->Maps[clvl].SentChanges = 1; -+ -+ map->level = clvl; -+ map->offset = offset; -+ map->seg[0] = seg & 0xffff; -+ map->seg[1] = (seg >> 16) & 0xffff; -+#if (BT_ULSHIFT == 6) -+ map->seg[2] = (seg >> 32) & 0xffff; -+ map->seg[3] = (seg >> 48) & 0xffff; -+#elif (BT_ULSHIFT != 5) -+#error "Bad value for BT_ULSHIFT" -+#endif -+ if (++nmaps == CM_MSG_MAXMAPS) -+ goto msg_full; -+ } -+ -+ if (sgmt->Maps[clvl].SentChanges) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(nmaps)]; -+ -+ sgmt->Maps[clvl].SentChanges = 0; -+ -+ map->level = clvl; -+ map->offset = STATEMAP_NOMORECHANGES; -+ -+ if (++nmaps == CM_MSG_MAXMAPS) -+ goto msg_full; -+ } -+ } -+ } -+ -+ ASSERT (nmaps < CM_MSG_MAXMAPS); -+ -+ msg_full: -+ sgmt->NumMaps = nmaps; /* remember how many incase we retry */ -+ -+ if (nmaps == 0) /* no changes to send */ -+ hdr->Seq = sgmt->MsgSeq; /* this one can be dropped */ -+ else -+ { -+ hdr->Seq = ++(sgmt->MsgSeq); /* on to next message number */ -+ sgmt->MsgAcked = 0; /* need this one to be acked before I can send another */ -+ -+ IncrStat (cmRail, MapChangesSent); -+ } -+ } -+ -+ LaunchMessage (cmRail, sgmt->MsgNumber, EP_VP_NODE (sgmt->NodeId), -+ EP_SYSTEMQ_POLLED, CM_P2P_DMA_RETRIES, /* polled receive, lots of retries */ -+ CM_MSG_TYPE_HEARTBEAT, sgmt->Level, nmaps); -+ -+ IncrStat (cmRail, HeartbeatsSent); -+ -+ return (1); -+ } -+ -+ default: /* other types must use SendMessage */ -+ printk ("SendToSgmt: invalid type %d\n", type); -+ ASSERT (0); -+ -+ return (1); -+ } -+} -+ -+static char * -+GlobalStatusString (statemap_t *map, int idx) -+{ -+ char *strings[] = {"....", "S...", "C...", "R...", -+ ".s..", "Ss..", "Cs..", "Rs..", -+ "..r.", "S.r.", "C.r.", "R.r.", -+ ".sr.", "Ssr.", "Csr.", "Rsr.", -+ "...R", "S..R", "C..R", "R..R", -+ ".s.R", "Ss.R", "Cs.R", "Rs.R", -+ "..rR", "S.rR", "C.rR", "R.rR", -+ ".srR", "SsrR", "CsrR", "RsrR"}; -+ -+ return (strings[statemap_getbits (map, idx * CM_GSTATUS_BITS, CM_GSTATUS_BITS)]); -+} -+ -+static char * -+MapString (char *name, statemap_t *map, int nnodes, char *trailer) -+{ -+ static char *space; -+ int i; -+ -+ if (space == NULL) -+ KMEM_ALLOC (space, char *, EP_MAX_NODES*(CM_GSTATUS_BITS+1), 0); -+ -+ if (space == NULL) -+ return (""); -+ else -+ { -+ char *ptr = space; -+ -+ sprintf (space, "%s ", name); ptr += strlen (ptr); -+ for (i = 0; i < nnodes; i++, ptr += strlen (ptr)) -+ sprintf (ptr, "%s%s", i == 0 ? "" : ",", GlobalStatusString (map, i)); -+ sprintf (ptr, " %s", trailer); -+ return (space); -+ } -+} -+ -+void -+DisplayMap (DisplayInfo *di, CM_RAIL *cmRail, char *name, statemap_t *map, int nnodes, char *trailer) -+{ -+ char linebuf[256]; -+ char *ptr = linebuf; -+ int i; -+ -+#define NODES_PER_LINE 32 -+ for (i = 0; i < nnodes; i++) -+ { -+ if (ptr == linebuf) -+ { -+ sprintf (ptr, "%4d", i); -+ ptr += strlen (ptr); -+ } -+ -+ sprintf (ptr, ",%s", GlobalStatusString (map, i)); -+ ptr += strlen (ptr); -+ -+ if ((i % NODES_PER_LINE) == (NODES_PER_LINE-1) || (i == (nnodes-1))) -+ { -+ (di->func)(di->arg, "%s: %s %s %s\n", cmRail->Rail->Name, name, linebuf, trailer); -+ ptr = linebuf; -+ } -+ } -+#undef NODES_PER_LINE -+} -+ -+void -+DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail) -+{ -+ int lvl; -+ int clvl; -+ char mapname[128]; -+ -+ (di->func)(di->arg, "%s: Node %d maps...\n", cmRail->Rail->Name, cmRail->Rail->Position.pos_nodeid); -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ int nnodes = cmRail->Levels[clvl].NumNodes; -+ -+ (di->func)(di->arg, "%s: Cluster level %d: Connected %ld - %s%s\n", -+ cmRail->Rail->Name, clvl, cmRail->Levels[clvl].Connected, -+ cmRail->Levels[clvl].Online ? "Online" : "Offline", -+ cmRail->Levels[clvl].Restarting ? ", Restarting" : ""); -+ -+ for (lvl = 0; lvl < cmRail->TopLevel && lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ sprintf (mapname, "%10s%2d", "Level", lvl); -+ DisplayMap (di, cmRail, mapname, level->SubordinateMap[clvl], nnodes, -+ level->SubordinateMapValid[clvl] ? "" : "(invalid)"); -+ } -+ -+ sprintf (mapname, "%12s", "Local"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].LocalMap, nnodes, ""); -+ -+ sprintf (mapname, "%12s", "Subtree"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].SubTreeMap, nnodes, -+ cmRail->Levels[clvl].SubTreeMapValid ? "" : "(invalid)"); -+ -+ sprintf (mapname, "%12s", "Global"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].GlobalMap, nnodes, -+ cmRail->Levels[clvl].GlobalMapValid ? "" : "(invalid)"); -+ -+ sprintf (mapname, "%12s", "LastGlobal"); -+ DisplayMap (di, cmRail, mapname, cmRail->Levels[clvl].LastGlobalMap, nnodes, ""); -+ } -+} -+ -+void -+DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail) -+{ -+ int lvl; -+ int sidx; -+ -+ (di->func)(di->arg, "%s: Node %d segments...\n", cmRail->Rail->Name, cmRail->NodeId); -+ -+ for (lvl = 0; lvl <= cmRail->TopLevel && lvl < cmRail->NumLevels; lvl++) -+ { -+ (di->func)(di->arg, " level %d: ", lvl); -+ -+ for (sidx = 0; sidx < ((lvl == cmRail->TopLevel) ? 1 : cmRail->Levels[lvl].NumSegs); sidx++) -+ { -+ CM_SGMT *sgmt = &cmRail->Levels[lvl].Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ (di->func)(di->arg, "[%d, in: %d out: %d %s%s]", -+ sgmt->NodeId, -+ sgmt->AckSeq, -+ sgmt->MsgSeq, -+ sgmt->MsgAcked ? "A" : "-", -+ sgmt->SendMaps ? "!" : "-"); -+ else -+ (di->func)(di->arg, "[%s]", (sgmt->State == CM_SGMT_ABSENT ? "absent" : -+ sgmt->State == CM_SGMT_WAITING ? "waiting" : -+ sgmt->State == CM_SGMT_COMING ? "coming" : "UNKNOWN")); -+ } -+ (di->func)(di->arg, "\n"); -+ } -+} -+ -+ -+static void -+StartConnecting (CM_RAIL *cmRail, CM_SGMT *sgmt, int NodeId, int Timestamp) -+{ -+ int clvl; -+ -+ CPRINTF4 (2, "%s: lvl %d subtree %d node %d -> connecting\n", cmRail->Rail->Name, sgmt->Level, sgmt->Sgmt, NodeId); -+ -+ /* Only reconnect the same guy if he was reborn */ -+ ASSERT (sgmt->State != CM_SGMT_PRESENT || -+ (sgmt->NodeId == NodeId && sgmt->Timestamp != Timestamp)); -+ -+ /* After we've connected to a new peer, we wait to receive -+ * STATEMAP_RESET before we accumulate changes and we wait for a -+ * complete map to be received before we propagate changes to other -+ * nodes. -+ * -+ * If I'm the subordinate, I can start sending maps right away, since -+ * the leader is ready for them already. If I'm the leader, I hold off -+ * sending maps until I've seen the subordinate's first heartbeat, -+ * because the subordinate might miss my NOTIFY message, still think -+ * she's a leader candidate and ignore my heartbeats. -+ */ -+ sgmt->SendMaps = (sgmt->Level == cmRail->TopLevel); /* I can send maps to my leader (she NOTIFIED me) */ -+ -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_reset (sgmt->Maps[clvl].CurrentInputMap); -+ statemap_reset (sgmt->Maps[clvl].InputMap); -+ statemap_reset (sgmt->Maps[clvl].OutputMap); -+ -+ sgmt->Maps[clvl].InputMapValid = 0; -+ sgmt->Maps[clvl].OutputMapValid = 0; -+ sgmt->Maps[clvl].SentChanges = 0; -+ -+ if (sgmt->Level == cmRail->TopLevel) /* connection to leader */ -+ { -+ ASSERT (sgmt->Sgmt == 0); -+ ASSERT (cmRail->Role == CM_ROLE_SUBORDINATE); -+ -+ if (cmRail->Levels[clvl].SubTreeMapValid) /* already got a subtree map to send up */ -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].SubTreeMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ } -+ } -+ else /* connection to subordinate */ -+ { -+ ASSERT (sgmt->Sgmt != cmRail->Levels[sgmt->Level].MySgmt); -+ -+ if (cmRail->Levels[clvl].GlobalMapValid) /* already got a global map to broadcast */ -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].GlobalMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ } -+ } -+ } -+ -+ /* Initialise sequence counters */ -+ sgmt->MsgSeq = sgmt->AckSeq = 0; -+ sgmt->MsgAcked = 1; /* ready to send a new sequenced message */ -+ -+ sgmt->State = CM_SGMT_PRESENT; -+ sgmt->NodeId = NodeId; -+ sgmt->UpdateTick = lbolt; -+ sgmt->Timestamp = Timestamp; -+} -+ -+static void -+StartSubTreeDiscovery (CM_RAIL *cmRail, CM_SGMT *sgmt) -+{ -+ sgmt->State = CM_SGMT_WAITING; -+ sgmt->UpdateTick = lbolt; -+ sgmt->WaitingTick = lbolt; -+ -+ if (sgmt->Level > 0) -+ __Schedule_Discovery (cmRail); -+} -+ -+void -+StartSubordinateDiscovery (CM_RAIL *cmRail) -+{ -+ int i; -+ int lvl = cmRail->TopLevel - 1; -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ ASSERT (lvl >= 0 && lvl < cmRail->NumLevels); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ if (i != level->MySgmt) /* No-one should connect here */ -+ StartSubTreeDiscovery (cmRail, sgmt); -+ } -+} -+ -+void -+StartLeaderDiscovery (CM_RAIL *cmRail) -+{ -+ int i; -+ int clvl; -+ CM_LEVEL *level = &cmRail->Levels[cmRail->TopLevel]; -+ -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ for (clvl = cmRail->TopLevel; clvl < cmRail->NumLevels; clvl++) -+ { -+ cmRail->Levels[clvl].GlobalMapValid = 0; -+ cmRail->Levels[clvl].SubTreeMapValid = 0; -+ level->SubordinateMapValid[clvl] = 0; -+ } -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ } -+ -+ cmRail->DiscoverStartTick = lbolt; -+ cmRail->Role = CM_ROLE_LEADER_CANDIDATE; -+ -+ __Schedule_Discovery (cmRail); -+} -+ -+static void -+RaiseTopLevel (CM_RAIL *cmRail) -+{ -+ ASSERT (cmRail->NumLevels != 0); -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ CPRINTF2 (2, "%s: RaiseTopLevel %d\n", cmRail->Rail->Name, cmRail->TopLevel + 1); -+ -+ if (++cmRail->TopLevel == cmRail->NumLevels) /* whole machine leader? */ -+ cmRail->Role = CM_ROLE_LEADER; -+ else -+ StartLeaderDiscovery (cmRail); /* look for my leader */ -+ -+ StartSubordinateDiscovery (cmRail); /* and any direct subordinates */ -+} -+ -+static void -+LowerTopLevel (CM_RAIL *cmRail, int lvl) -+{ -+ ASSERT (cmRail->NumLevels != 0); -+ ASSERT (lvl < cmRail->NumLevels); -+ -+ CPRINTF2 (2, "%s: LowerTopLevel %d\n", cmRail->Rail->Name, lvl); -+ -+ if (lvl == 0) -+ cmRail->Timestamp = lbolt; -+ -+ cmRail->TopLevel = lvl; -+ -+ StartLeaderDiscovery (cmRail); /* look for my leader */ -+} -+ -+static int -+IShouldLead (CM_RAIL *cmRail, CM_MSG *msg) -+{ -+ /* NB, this function MUST be consistently calculated on any nodes, just -+ * from the info supplied in the message. Otherwise leadership -+ * arbitration during concurrent discovery will fail. -+ */ -+ return (cmRail->NodeId < msg->Hdr.NodeId); -+} -+ -+static int -+SumCheck (CM_MSG *msg) -+{ -+ CM_HDR *hdr = &msg->Hdr; -+ uint16_t sum = hdr->Checksum; -+ uint16_t nmaps = hdr->NumMaps; -+ -+ if (nmaps > CM_MSG_MAXMAPS) { -+ printk ("SumCheck: nmaps %d > CM_MSG_MAXMAPS\n", nmaps); -+ return 0; -+ } -+ -+ if ((hdr->Type != CM_MSG_TYPE_HEARTBEAT) && nmaps != 0) { -+ printk ("SumCheck: type(%d) not HEARTBEAT and nmaps(%d) != 0\n", hdr->Type, nmaps); -+ return 0; -+ } -+ -+ hdr->Checksum = 0; -+ -+ if (CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)) != sum) { -+ printk ("SumCheck: checksum failed %x %x\n", CheckSum ((char *)msg + CM_MSG_BASE(nmaps), CM_MSG_SIZE(nmaps)), sum); -+ -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static void -+ProcessMessage (EP_RAIL *rail, void *arg, void *msgbuf) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ CM_MSG *msg = (CM_MSG *) msgbuf; -+ CM_HDR *hdr = &msg->Hdr; -+ int lvl; -+ int sidx; -+ CM_LEVEL *level; -+ CM_SGMT *sgmt; -+ bitmap_t seg; -+ int i; -+ int delay; -+ static long tlast; -+ static int count; -+ -+ /* Poll the message Version field until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; hdr->Version == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ /* Display a message every 60 seconds if we see an "old" format message */ -+ if (hdr->Version == EP_SYSTEMQ_UNRECEIVED && (((lbolt - tlast) > 60*HZ) ? (count = 0) : ++count) < 1) -+ { -+ printk ("%s: received old protocol message (type %d from node %d)\n", cmRail->Rail->Name, -+ ((uint8_t *) msg)[20], ((uint16_t *) msg)[4]); -+ -+ tlast = lbolt; -+ goto finished; -+ } -+ -+ if (hdr->Version != CM_MSG_VERSION || hdr->ParamHash != cmRail->ParamHash || hdr->MachineId != MachineId) -+ { -+ CPRINTF8 (1, "%s: invalid message : Version %08x (%08x) ParamHash %08x (%08x) MachineId %04x (%04x) Nodeid %d\n", cmRail->Rail->Name, -+ hdr->Version, CM_MSG_VERSION, hdr->ParamHash, cmRail->ParamHash, hdr->MachineId, MachineId, hdr->NodeId); -+ goto finished; -+ } -+ -+ if (!SumCheck (msg)) -+ { -+ printk ("%s: checksum failed on msg from %d?\n", cmRail->Rail->Name, hdr->NodeId); -+ goto finished; -+ } -+ -+ if (hdr->NodeId == cmRail->NodeId) /* ignore my own broadcast */ -+ { -+ CPRINTF3 (6, "%s: node %d type %d: ignored (MESSAGE FROM ME)\n", -+ cmRail->Rail->Name, hdr->NodeId, hdr->Type); -+ -+ if (hdr->Type != CM_MSG_TYPE_DISCOVER_LEADER && hdr->Type != CM_MSG_TYPE_RESOLVE_LEADER) -+ printk ("%s: node %d type %d: ignored (MESSAGE FROM ME)\n", -+ cmRail->Rail->Name, hdr->NodeId, hdr->Type); -+ goto finished; -+ } -+ -+ lvl = hdr->Level; -+ level = &cmRail->Levels[lvl]; -+ -+ if (BrokenLevel != -1 && (lvl >= ((BrokenLevel >> (cmRail->Rail->Number*4)) & 0xf))) /* Simulate broken network? */ -+ goto finished; -+ -+ if (lvl >= cmRail->NumLevels || /* from outer space */ -+ hdr->NodeId < level->MinNodeId || /* from outside this level's subtree */ -+ hdr->NodeId >= level->MinNodeId + level->NumNodes) -+ { -+ printk ("%s: lvl %d node %d type %d: ignored (%s)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId, hdr->Type, -+ lvl >= cmRail->NumLevels ? "level too big for machine" : "outside subtree"); -+ goto finished; -+ } -+ -+ sidx = SegmentNo (cmRail, hdr->NodeId, lvl); -+ sgmt = &level->Sgmts[sidx]; -+ -+ switch (hdr->Type) -+ { -+ case CM_MSG_TYPE_RESOLVE_LEADER: -+ if (lvl >= cmRail->TopLevel) -+ { -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d RESOLVE_LEADER: ignored (above my level)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ /* someone else thinks they lead at the same level as me */ -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d RESOLVE_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d RESOLVE_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ -+ SendMessage (cmRail, hdr->NodeId, lvl, CM_MSG_TYPE_REJOIN); -+ break; -+ -+ case CM_MSG_TYPE_DISCOVER_LEADER: -+ if (lvl > cmRail->TopLevel) -+ { -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: ignored (above my level)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ if (sidx == level->MySgmt) /* someone I led thinks they lead some of my subtrees */ -+ { -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d DISCOVER_LEADER: !REJOIN (putsch)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ if (lvl < cmRail->TopLevel) /* I'm the leader of this level */ -+ { -+ if (sgmt->State == CM_SGMT_PRESENT && /* someone thinks someone I lead is dead */ -+ sgmt->NodeId != hdr->NodeId) -+ { -+ /* My subordinate's death could be noticed by one of her peers -+ * before I do. If she _is_ dead, I'll notice before long and -+ * NOTIFY this discover. If this discover completes before I -+ * detect my subordinate's death, the discovering node will -+ * try to take over from me, and then I'll RESET her. -+ */ -+ CPRINTF4 (6, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: ignored (got established subordinate)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ return; -+ } -+ -+ if (sgmt->State != CM_SGMT_PRESENT || /* New connection */ -+ sgmt->Timestamp != hdr->Timestamp) /* new incarnation */ -+ StartConnecting (cmRail, sgmt, hdr->NodeId, hdr->Timestamp); -+ -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d DISCOVER_LEADER: !NOTIFY)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_NOTIFY); -+ break; -+ } -+ -+ ASSERT (lvl == cmRail->TopLevel); -+ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ /* I think my leader is alive, in which case she'll NOTIFY this -+ * DISCOVER. If she's dead, I'll start to become a leader -+ * candidate and handle this appropriately. -+ */ -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: ignored (I'm a subordinate)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ ASSERT (cmRail->Role == CM_ROLE_LEADER_CANDIDATE); -+ -+ /* A peer at this level is bidding for leadership along with me */ -+ if (IShouldLead (cmRail, msg)) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: but I should lead\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ /* So there _is_ someone there; She'll be seeing my DISCOVER -+ * messages and extending her discovery period, so that when I -+ * become leader, I'll NOTIFY her. In the meantime I'll flag her -+ * activity, so she remains WAITING. -+ */ -+ sgmt->UpdateTick = lbolt; -+ break; -+ } -+ -+ /* Defer to sender... */ -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER: delaying me becoming leader\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ StartLeaderDiscovery (cmRail); -+ break; -+ -+ case CM_MSG_TYPE_DISCOVER_SUBORDINATE: -+ if (lvl <= cmRail->TopLevel) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (from my subtree)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (I'm not looking for a leader)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (hdr->Level > cmRail->BroadcastLevel && AFTER (lbolt, cmRail->BroadcastLevelTick + EP_WITHDRAW_TIMEOUT)) -+ { -+ CPRINTF3 (6, "%s: lvl %d node %d DISCOVER_SUBORDINATE: ignored (broadcast level too low)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ CPRINTF3 (2, "%s: lvl %d node %d DISCOVER_SUBORDINATE: !IMCOMING\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_IMCOMING); -+ break; -+ -+ case CM_MSG_TYPE_IMCOMING: -+ if (lvl > cmRail->TopLevel || /* from peer or node above me */ -+ sgmt->State == CM_SGMT_PRESENT || /* already got a subtree */ -+ sgmt->State == CM_SGMT_ABSENT) /* already written off this subtree */ -+ { -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d IMCOMING: ignored\n", cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ break; -+ } -+ -+ CPRINTF4 (2, "%s: lvl %d sidx %d node %d IMCOMING: waiting...\n", cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ sgmt->State = CM_SGMT_COMING; -+ sgmt->UpdateTick = lbolt; -+ break; -+ -+ case CM_MSG_TYPE_NOTIFY: -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE || /* I'm not looking for a leader */ -+ lvl != cmRail->TopLevel) /* at this level */ -+ { -+ /* If this person really should be my leader, my existing leader -+ * will time out, and I'll discover this one. */ -+ CPRINTF4 (2, "%s: lvl %d node %d NOTIFY: ignored (%s)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId, -+ lvl < cmRail->TopLevel ? "already leader" : -+ lvl > cmRail->TopLevel ? "lvl too high" : "already subordinate"); -+ break; -+ } -+ -+ CPRINTF3 (2, "%s: lvl %d node %d NOTIFY: becoming subordinate\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ -+ cmRail->Role = CM_ROLE_SUBORDINATE; /* Now I've found my level */ -+ StartConnecting (cmRail, &level->Sgmts[0], hdr->NodeId, hdr->Timestamp); -+ break; -+ -+ case CM_MSG_TYPE_HEARTBEAT: -+ if (lvl > cmRail->TopLevel) -+ { -+ CPRINTF3 (2, "%s: lvl %d node %d H/BEAT: ignored (lvl too high)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ -+ if (lvl == cmRail->TopLevel) /* heartbeat from my leader */ -+ { -+ if (cmRail->Role == CM_ROLE_LEADER_CANDIDATE) /* but I've not got one */ -+ { -+ /* I'm probably a new incarnation of myself; I'll keep doing -+ * discovery until my previous existence's leader NOTIFY's me. -+ * If I was this node's leader, she'll time me out (I'm not -+ * sending heartbeats to her) and we'll fight it out for -+ * leadership. */ -+ CPRINTF3 (2, "%s: lvl %d node %d H/BEAT ignored (no leader)\n", -+ cmRail->Rail->Name, lvl, hdr->NodeId); -+ break; -+ } -+ sidx = 0; -+ sgmt = &level->Sgmts[0]; -+ } -+ -+ if (sgmt->State != CM_SGMT_PRESENT || /* not fully connected with this guy */ -+ sgmt->NodeId != hdr->NodeId || /* someone else impersonating my peer */ -+ sgmt->Timestamp != hdr->Timestamp) /* new incarnation of my peer */ -+ { -+ CPRINTF4 (1, "%s: lvl %d sidx %d node %d H/BEAT: !REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId); -+ -+ printk ("%s: lvl %d sidx %d node %d H/BEAT: !REJOIN %s\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, -+ sgmt->State != CM_SGMT_PRESENT ? "not present" : -+ sgmt->NodeId != hdr->NodeId ? "someone else" : "new incarnation"); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ if (!((hdr->Seq == sgmt->AckSeq) || /* NOT duplicate message or */ -+ (hdr->Seq == (CM_SEQ)(sgmt->AckSeq + 1))) || /* expected message */ -+ !((hdr->AckSeq == sgmt->MsgSeq) || /* NOT expected ack or */ -+ (hdr->AckSeq == (CM_SEQ)(sgmt->MsgSeq - 1)))) /* duplicate ack */ -+ { -+ CPRINTF9 (1, "%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (out-of-seq) M(%d,a%d) S%d,A%d\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ (int)hdr->Seq, (int)hdr->AckSeq, (int)sgmt->MsgSeq, (int)sgmt->AckSeq); -+ -+ printk ("%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (out-of-seq) M(%d,a%d) S%d,A%d\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ (int)hdr->Seq, (int)hdr->AckSeq, (int)sgmt->MsgSeq, (int)sgmt->AckSeq); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ break; -+ } -+ -+ IncrStat (cmRail, HeartbeatsRcvd); -+ -+ sgmt->UpdateTick = lbolt; -+ sgmt->SendMaps = 1; -+ -+ if (sgmt->MsgSeq == hdr->AckSeq) /* acking current message */ -+ sgmt->MsgAcked = 1; /* can send the next one */ -+ -+ if (hdr->Seq == sgmt->AckSeq) /* discard duplicate (or NULL heartbeat) */ -+ { -+ CPRINTF6 (6, "%s: lvl %d sidx %d node %d type %d: %s H/BEAT\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, -+ hdr->NumMaps == 0 ? "null" : "duplicate"); -+ break; -+ } -+ -+ CPRINTF7 (6, "%s: lvl %d sidx %d node %d type %d: seq %d maps %d H/BEAT\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, hdr->Seq, hdr->NumMaps); -+ -+ sgmt->AckSeq = hdr->Seq; /* ready to receive next one */ -+ -+ for (i = 0; i < hdr->NumMaps; i++) -+ { -+ CM_STATEMAP_ENTRY *map = &msg->Payload.Statemaps[CM_MSG_MAP(i)]; -+ int clvl = map->level; -+ -+ if (clvl < 0) /* end of message */ -+ break; -+ -+ if (clvl < sgmt->Level) /* bad level */ -+ { -+ CPRINTF6 (1, "%s: lvl %d sidx %d node %d type %d: H/BEAT !REJOIN (bad clevel %d)\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type, clvl); -+ -+ SendMessage (cmRail, hdr->NodeId, hdr->Level, CM_MSG_TYPE_REJOIN); -+ goto finished; -+ } -+ -+ if (map->offset == STATEMAP_NOMORECHANGES) /* end of atomic changes */ -+ { -+ if (!sgmt->Maps[clvl].InputMapValid || /* not set InputMap yet */ -+ statemap_changed (sgmt->Maps[clvl].CurrentInputMap)) /* previously applied changes */ -+ { -+ CPRINTF3 (4, "%s: received new clvl %d map from %d\n", cmRail->Rail->Name, clvl, sgmt->NodeId); -+ -+ statemap_setmap (sgmt->Maps[clvl].InputMap, sgmt->Maps[clvl].CurrentInputMap); -+ sgmt->Maps[clvl].InputMapValid = 1; -+ -+ statemap_clearchanges (sgmt->Maps[clvl].CurrentInputMap); -+ } -+ continue; -+ } -+ -+ seg = ((bitmap_t)map->seg[0]) -+ | (((bitmap_t)map->seg[1]) << 16) -+#if (BT_ULSHIFT == 6) -+ | (((bitmap_t)map->seg[2]) << 32) -+ | (((bitmap_t)map->seg[3]) << 48) -+#elif (BT_ULSHIFT != 5) -+#error "Bad value for BT_ULSHIFT" -+#endif -+ ; -+ statemap_setseg (sgmt->Maps[clvl].CurrentInputMap, map->offset, seg); -+ } -+ break; -+ -+ case CM_MSG_TYPE_REJOIN: -+ CPRINTF5 (1, "%s: lvl %d sidx %d node %d type %d: REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type); -+ printk ("%s: lvl %d sidx %d node %d type %d: REJOIN\n", -+ cmRail->Rail->Name, lvl, sidx, hdr->NodeId, hdr->Type); -+ -+ LowerTopLevel (cmRail, 0); -+ -+ IncrStat (cmRail, RejoinRequest); -+ break; -+ -+ default: -+ printk ("%s: lvl=%d unknown message type %d\n", cmRail->Rail->Name, lvl, hdr->Type); -+ break; -+ } -+ finished: -+ hdr->Version = EP_SYSTEMQ_UNRECEIVED; -+} -+ -+static void -+PollInputQueues (CM_RAIL *cmRail) -+{ -+ ep_poll_inputq (cmRail->Rail, cmRail->IntrQueue, 0, ProcessMessage, cmRail); -+ ep_poll_inputq (cmRail->Rail, cmRail->PolledQueue, 0, ProcessMessage, cmRail); -+} -+ -+static void -+IntrQueueCallback (EP_RAIL *rail, void *arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ unsigned long flags; -+ -+ /* If the lock is held, then don't bother spinning for it, -+ * since the messages will be received at this, or the -+ * next heartbeat */ -+ local_irq_save (flags); -+ if (spin_trylock (&cmRail->Lock)) -+ { -+ if (AFTER (lbolt, cmRail->NextRunTime + MSEC2TICKS(CM_TIMER_SCHEDULE_TIMEOUT))) -+ printk ("%s: heartbeat timer stuck - scheduled\n", cmRail->Rail->Name); -+ else -+ ep_poll_inputq (rail, cmRail->IntrQueue, 0, ProcessMessage, cmRail); -+ spin_unlock (&cmRail->Lock); -+ } -+ local_irq_restore (flags); -+} -+ -+char * -+sprintClPeers (char *str, CM_RAIL *cmRail, int clvl) -+{ -+ int clLo = cmRail->Levels[clvl].MinNodeId; -+ int clHi = clLo + cmRail->Levels[clvl].NumNodes - 1; -+ int subClLo = (clvl == 0) ? cmRail->NodeId : cmRail->Levels[clvl - 1].MinNodeId; -+ int subClHi = subClLo + ((clvl == 0) ? 0 : cmRail->Levels[clvl - 1].NumNodes - 1); -+ -+ if (subClHi == clHi) -+ sprintf (str, "[%d-%d]", clLo, subClLo - 1); -+ else if (subClLo == clLo) -+ sprintf (str, "[%d-%d]", subClHi + 1, clHi); -+ else -+ sprintf (str, "[%d-%d][%d-%d]", clLo, subClLo - 1, subClHi + 1, clHi); -+ -+ return (str); -+} -+ -+static void -+RestartComms (CM_RAIL *cmRail, int clvl) -+{ -+ int base; -+ int nodeId; -+ int lstat; -+ int numClNodes; -+ int subClMin; -+ int subClMax; -+ int myClId; -+ int thisClId; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ base = myClId * CM_GSTATUS_BITS; -+ numClNodes = cmRail->Levels[clvl].NumNodes; -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ cmRail->Levels[clvl].Restarting = 1; -+ -+ if (cmRail->Levels[clvl].Online) -+ { -+ cmRail->Levels[clvl].Online = 0; -+ -+ for (thisClId = 0; thisClId < numClNodes; thisClId++) -+ { -+ if (thisClId == subClMin) /* skip sub-cluster; it's just someone in this cluster */ -+ { /* that wants me to restart */ -+ thisClId = subClMax; -+ continue; -+ } -+ -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ /* gstat must == RUNNING */ -+ cmRail->Levels[clvl].Connected--; -+ break; -+ case EP_NODE_DISCONNECTED: -+ /* CLOSING || STARTING || (lstat & RESTART) */ -+ break; -+ } -+ } -+ } -+ } -+} -+ -+static void -+UpdateGlobalStatus (CM_RAIL *cmRail) -+{ -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ int nodeId; -+ int offset; -+ int base; -+ bitmap_t gstat; -+ bitmap_t lgstat; -+ bitmap_t lstat; -+ int clvl; -+ int numClNodes; -+ int subClMin; -+ int subClMax; -+ int myClId; -+ int thisClId; -+ int lastClId; -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (!cmRail->Levels[clvl].GlobalMapValid || /* not got the global map yet */ -+ !statemap_changed (cmRail->Levels[clvl].GlobalMap)) /* no changes to respond to */ -+ { -+ CPRINTF2 (6, "%s: Got invalid or unchanged clvl %d global map\n", cmRail->Rail->Name, clvl); -+ continue; -+ } -+ -+ CPRINTF2 (5, "%s: Got valid changed clvl %d global map\n", cmRail->Rail->Name, clvl); -+ -+ lastClId = -1; -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ numClNodes = cmRail->Levels[clvl].NumNodes; -+ -+ while ((offset = statemap_findchange (cmRail->Levels[clvl].GlobalMap, &gstat, 1)) >= 0) -+ { -+ /* -+ * Check every node that this segment covers - however -+ * if the last node we checked in the previous segmemt -+ * is also the first node in this segment, then skip -+ * it. -+ */ -+ if ((thisClId = (offset/CM_GSTATUS_BITS)) == lastClId) -+ thisClId++; -+ lastClId = (offset + BT_NBIPUL - 1)/CM_GSTATUS_BITS; -+ -+ /* check each node that might have changed */ -+ for ( ; thisClId <= lastClId && thisClId < numClNodes; thisClId++) -+ { -+ base = thisClId * CM_GSTATUS_BITS; -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ -+ if (thisClId >= subClMin && thisClId <= subClMax) /* skip sub-cluster */ -+ continue; -+ -+ /* This isn't me; I need to sense what this node is driving -+ * (just the starting and running bits) and respond -+ * appropriately... -+ */ -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ if (lgstat == gstat) /* no change in peer state */ -+ continue; -+ -+ CPRINTF5 (3, "%s: Node %d: lgstat %s, gstat %s, lstat %s\n", cmRail->Rail->Name, nodeId, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ /* What I'm currently driving as my acknowledgement */ -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ -+ switch (gstat) -+ { -+ case CM_GSTATUS_STARTING: -+ if ((lgstat == CM_GSTATUS_ABSENT || lgstat == CM_GSTATUS_CLOSING) && lstat == CM_GSTATUS_MAY_START) -+ { -+ CPRINTF2 (1, "%s: ===================node %d STARTING\n", cmRail->Rail->Name, nodeId); -+ -+ ASSERT (cmRail->Rail->Nodes[nodeId].State == EP_NODE_DISCONNECTED); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ continue; -+ } -+ break; -+ -+ case CM_GSTATUS_RUNNING: -+ if ((lgstat == CM_GSTATUS_ABSENT && lstat == CM_GSTATUS_MAY_START) || -+ (lgstat == CM_GSTATUS_STARTING && lstat == CM_GSTATUS_MAY_RUN)) -+ { -+ CPRINTF3 (1, "%s: ===================node %d%s RUNNING\n", cmRail->Rail->Name, nodeId, -+ lgstat == CM_GSTATUS_ABSENT ? " Already" : ""); -+ -+ ASSERT (cmRail->Rail->Nodes[nodeId].State == EP_NODE_DISCONNECTED); -+ -+ if (cmRail->Levels[clvl].Online) -+ { -+ ep_connect_node (cmRail->Rail, nodeId); -+ -+ cmRail->Levels[clvl].Connected++; -+ } -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ continue; -+ } -+ break; -+ -+ case CM_GSTATUS_CLOSING: -+ CPRINTF4 (1, "%s: ===================node %d CLOSING%s%s\n", cmRail->Rail->Name, nodeId, -+ (lstat & CM_GSTATUS_RESTART) ? " for Restart" : "", -+ cmRail->Levels[clvl].Online ? "" : " (offline)"); -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ cmRail->Levels[clvl].Connected--; -+ /* DROPTHROUGH */ -+ case EP_NODE_DISCONNECTED: -+ lstat = CM_GSTATUS_MAY_START; -+ break; -+ } -+ } -+ -+ if ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_START) /* clear restart if we've disconnected */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ continue; -+ -+ default: -+ break; -+ } -+ -+ /* "unexpected" state change forces me to ask her to restart */ -+ if (! (lstat & CM_GSTATUS_RESTART)) /* not requesting restart already */ -+ { -+ CPRINTF5 (1, "%s: ===================node %d %s, old %s new %s\n", cmRail->Rail->Name, nodeId, -+ (gstat == CM_GSTATUS_ABSENT) ? "ABSENT" : "REQUEST RESTART", -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId)); -+ -+ /* request restart */ -+ if (cmRail->Levels[clvl].Online && lstat == CM_GSTATUS_MAY_RUN) -+ { -+ switch (ep_disconnect_node (cmRail->Rail, nodeId)) -+ { -+ case EP_NODE_CONNECTING: -+ cmRail->Levels[clvl].Connected--; -+ /* DROPTHROUGH */ -+ case EP_NODE_DISCONNECTED: -+ lstat = CM_GSTATUS_MAY_START; -+ break; -+ } -+ } -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, lstat | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ continue; -+ } -+ -+ continue; -+ } -+ } -+ -+ /* Now check myself - see what everyone else thinks I'm doing */ -+ base = myClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS); -+ -+ if (lgstat == gstat) /* my state in this cluster hasn't changed */ -+ { -+ CPRINTF3 (6, "%s: my clvl %d global status unchanged from %s\n", cmRail->Rail->Name, -+ clvl, GlobalStatusString (cmRail->Levels[clvl].GlobalMap, myClId)); -+ goto all_done; -+ } -+ -+ if ((gstat & CM_GSTATUS_RESTART) != 0) /* someone wants me to restart */ -+ { -+ if ((lstat & CM_GSTATUS_STATUS_MASK) == CM_GSTATUS_CLOSING) /* I'm already restarting */ -+ goto all_done; -+ -+ CPRINTF2 (1, "%s: ===================RESTART REQUEST from %s\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ printk ("%s: Restart Request from %s\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ RestartComms (cmRail, clvl); -+ goto all_done; -+ } -+ -+ CPRINTF6 (5, "%s: clvl %d: lgstat %s gstat %s, lstat %s%s\n", cmRail->Rail->Name, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, myClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, myClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, myClId), -+ (gstat != lstat) ? " (IGNORED)" : ""); -+ -+ if (gstat != lstat) /* not everyone agrees with me */ -+ goto all_done; -+ -+ switch (lstat) -+ { -+ default: -+ ASSERT (0); /* I never drive this */ -+ -+ case CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START: /* I can restart now (have seen restart go away) */ -+ ASSERT (!cmRail->Levels[clvl].Online); -+ -+ CPRINTF2 (1,"%s: ===================NODES %s AGREE I MAY START\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I MAY START\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_STARTING | CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ goto all_done; -+ -+ case CM_GSTATUS_STARTING | CM_GSTATUS_MAY_RUN: -+ ASSERT (!cmRail->Levels[clvl].Online); -+ -+ CPRINTF2 (1, "%s: ===================NODES %s AGREE I MAY RUN\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I MAY RUN\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, -+ CM_GSTATUS_RUNNING | CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ goto all_done; -+ -+ case CM_GSTATUS_RUNNING | CM_GSTATUS_MAY_RUN: -+ if (! cmRail->Levels[clvl].Online) -+ { -+ CPRINTF2 (1, "%s: ===================NODES %s AGREE I'M RUNNING\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ printk ("%s: ===================NODES %s AGREE I'M RUNNING\n", cmRail->Rail->Name, -+ sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ cmRail->Levels[clvl].Online = 1; -+ -+ for (thisClId = 0; thisClId < numClNodes; thisClId++) -+ { -+ if (thisClId == subClMin) /* skip sub-cluster */ -+ { -+ thisClId = subClMax; -+ continue; -+ } -+ -+ nodeId = cmRail->Levels[clvl].MinNodeId + thisClId; -+ -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ gstat = statemap_getbits (cmRail->Levels[clvl].GlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ /* Only connect to her if I see her as running and I'm not requesting her -+ * to restart - this means that I was offline when I saw her transition -+ * to running and haven't seen her in a "bad" state since. */ -+ if (gstat == CM_GSTATUS_RUNNING && ! (lstat & CM_GSTATUS_RESTART)) -+ { -+ CPRINTF5 (1, "%s: node %d lgstat %s gstat %s, lstat %s -> CONNECT\n", cmRail->Rail->Name, nodeId, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ if (lstat == CM_GSTATUS_MAY_START) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_RUN, CM_GSTATUS_BITS); -+ -+ ep_connect_node (cmRail->Rail, nodeId); -+ -+ cmRail->Levels[clvl].Connected++; -+ } -+ } -+ } -+ goto all_done; -+ } -+ -+ all_done: -+ statemap_setmap (cmRail->Levels[clvl].LastGlobalMap, cmRail->Levels[clvl].GlobalMap); -+ } -+} -+ -+static void -+ReduceGlobalMap (CM_RAIL *cmRail, int clvl) -+{ -+ int lvl; -+ int sidx; -+ int recompute; -+ CM_LEVEL *level; -+ int cTopLevel; -+ int cRole; -+ -+ if (clvl < cmRail->TopLevel) -+ { -+ cTopLevel = clvl + 1; -+ cRole = CM_ROLE_LEADER; -+ } -+ else -+ { -+ cTopLevel = cmRail->TopLevel; -+ cRole = cmRail->Role; -+ } -+ -+ /* Update cmRail->Levels[*].SubordinateMap[clvl] for all subordinate levels */ -+ for (lvl = 0; lvl < cTopLevel; lvl++) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ /* We need to recompute this level's statemap if... -+ * . Previous level's statemap has changes to propagate OR -+ * . This level's statemap has not been computed yet OR -+ * . A subordinate at this level has sent me a change. -+ * Note that we can only do this if all subordinates from this -+ * level down are present with valid statemaps, or absent (i.e. not -+ * timing out). -+ */ -+ -+ ASSERT (lvl == 0 || cmRail->Levels[lvl - 1].SubordinateMapValid[clvl]); -+ -+ recompute = !level->SubordinateMapValid[clvl] || -+ (lvl > 0 && statemap_changed (cmRail->Levels[lvl - 1].SubordinateMap[clvl])); -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (!(sgmt->State == CM_SGMT_ABSENT || /* absent nodes contribute zeros */ -+ (sgmt->State == CM_SGMT_PRESENT && /* present nodes MUST have received a map to contribute */ -+ sgmt->Maps[clvl].InputMapValid))) -+ { -+ CPRINTF5 (5, "%s: waiting for clvl %d lvl %d seg %d node %d\n", cmRail->Rail->Name, -+ clvl, lvl, sidx, sgmt->NodeId); -+ -+ /* Gotta wait for this guy, so we can't compute this level, -+ * or any higher levels. */ -+ return; -+ } -+ -+ if (statemap_changed (sgmt->Maps[clvl].InputMap)) -+ { -+ ASSERT (sgmt->Maps[clvl].InputMapValid); -+ -+ recompute = 1; -+ -+ CPRINTF7 (5, "%s: %s clvl %d map from @ %d %d (%d) - %s\n", -+ cmRail->Rail->Name, sgmt->State == CM_SGMT_ABSENT ? "newly absent" : "got new", -+ clvl, lvl, sidx, sgmt->NodeId, -+ MapString ("Input", sgmt->Maps[clvl].InputMap, cmRail->Levels[clvl].NumNodes, "")); -+ } -+ } -+ -+ if (recompute) -+ { -+ if (lvl == 0) -+ statemap_reset (cmRail->Levels[clvl].TmpMap); -+ else -+ { -+ ASSERT (cmRail->Levels[lvl - 1].SubordinateMapValid[clvl]); -+ -+ statemap_copy (cmRail->Levels[clvl].TmpMap, cmRail->Levels[lvl - 1].SubordinateMap[clvl]); -+ statemap_clearchanges (cmRail->Levels[lvl - 1].SubordinateMap[clvl]); -+ } -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State != CM_SGMT_ABSENT) /* absent nodes contribute zeroes */ -+ { -+ ASSERT (sgmt->State == CM_SGMT_PRESENT); -+ ASSERT (sgmt->Maps[clvl].InputMapValid); -+ statemap_ormap (cmRail->Levels[clvl].TmpMap, sgmt->Maps[clvl].InputMap); -+ } -+ statemap_clearchanges (sgmt->Maps[clvl].InputMap); -+ } -+ -+ statemap_setmap (level->SubordinateMap[clvl], cmRail->Levels[clvl].TmpMap); -+ level->SubordinateMapValid[clvl] = 1; -+ -+ CPRINTF4 (5, "%s: recompute clvl %d level %d statemap - %s\n", cmRail->Rail->Name, clvl, lvl, -+ MapString ("level", level->SubordinateMap[clvl], cmRail->Levels[clvl].NumNodes, "")); -+ } -+ } -+ -+ if (cRole == CM_ROLE_LEADER_CANDIDATE) /* don't know this cluster's leader yet */ -+ return; -+ -+ ASSERT (cTopLevel == 0 || cmRail->Levels[cTopLevel - 1].SubordinateMapValid[clvl]); -+ -+ /* Update SubTreeMap */ -+ -+ if (!cmRail->Levels[clvl].SubTreeMapValid || -+ statemap_changed (cmRail->Levels[clvl].LocalMap) || -+ (cTopLevel > 0 && statemap_changed (cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]))) -+ { -+ statemap_copy (cmRail->Levels[clvl].TmpMap, cmRail->Levels[clvl].LocalMap); -+ statemap_clearchanges (cmRail->Levels[clvl].LocalMap); -+ -+ if (cTopLevel > 0) -+ { -+ statemap_ormap (cmRail->Levels[clvl].TmpMap, cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]); -+ statemap_clearchanges (cmRail->Levels[cTopLevel - 1].SubordinateMap[clvl]); -+ } -+ -+ statemap_setmap (cmRail->Levels[clvl].SubTreeMap, cmRail->Levels[clvl].TmpMap); -+ cmRail->Levels[clvl].SubTreeMapValid = 1; -+ -+ CPRINTF3 (5, "%s: recompute clvl %d subtree map - %s\n", cmRail->Rail->Name, clvl, -+ MapString ("subtree", cmRail->Levels[clvl].SubTreeMap, cmRail->Levels[clvl].NumNodes, "")); -+ } -+ -+ if (cRole == CM_ROLE_SUBORDINATE) /* got a leader (Not me) */ -+ { /* => send SubTreeMap to her */ -+ CM_SGMT *leader = &cmRail->Levels[cmRail->TopLevel].Sgmts[0]; -+ -+ ASSERT (leader->State == CM_SGMT_PRESENT); -+ ASSERT (cmRail->Levels[clvl].SubTreeMapValid); -+ -+ if (!leader->Maps[clvl].OutputMapValid || -+ statemap_changed (cmRail->Levels[clvl].SubTreeMap)) -+ { -+ statemap_setmap (leader->Maps[clvl].OutputMap, cmRail->Levels[clvl].SubTreeMap); -+ leader->Maps[clvl].OutputMapValid = 1; -+ -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ -+ CPRINTF3 (5, "%s: sending clvl %d subtree map to leader (%d)\n", cmRail->Rail->Name, clvl, leader->NodeId); -+ } -+ } -+} -+ -+void -+BroadcastGlobalMap (CM_RAIL *cmRail, int clvl) -+{ -+ int lvl; -+ int sidx; -+ CM_LEVEL *level; -+ CM_SGMT *leader; -+ int cTopLevel; -+ int cRole; -+ -+ if (clvl < cmRail->TopLevel) -+ { -+ cTopLevel = clvl + 1; -+ cRole = CM_ROLE_LEADER; -+ } -+ else -+ { -+ cTopLevel = cmRail->TopLevel; -+ cRole = cmRail->Role; -+ } -+ -+ switch (cRole) -+ { -+ default: -+ ASSERT (0); -+ -+ case CM_ROLE_LEADER_CANDIDATE: /* don't know this cluster's leader yet */ -+ return; -+ -+ case CM_ROLE_LEADER: /* cluster leader: */ -+ ASSERT (clvl < cmRail->TopLevel); /* set GlobalMap from SubTreeMap */ -+ -+ if (!cmRail->Levels[clvl].SubTreeMapValid) /* can't set global map */ -+ return; -+ -+ if (cmRail->Levels[clvl].GlobalMapValid && /* already set global map */ -+ !statemap_changed (cmRail->Levels[clvl].SubTreeMap)) /* no changes to propagate */ -+ return; -+ -+ statemap_setmap (cmRail->Levels[clvl].GlobalMap, cmRail->Levels[clvl].SubTreeMap); -+ cmRail->Levels[clvl].GlobalMapValid = 1; -+ statemap_clearchanges (cmRail->Levels[clvl].SubTreeMap); -+ -+ CPRINTF2 (5, "%s: whole cluster %d leader setting global map\n", cmRail->Rail->Name, clvl); -+ -+ UpdateGlobalStatus (cmRail); -+ break; -+ -+ case CM_ROLE_SUBORDINATE: /* cluster subordinate: */ -+ ASSERT (clvl >= cmRail->TopLevel); /* receive GlobalMap from leader */ -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ -+ leader = &cmRail->Levels[cmRail->TopLevel].Sgmts[0]; -+ ASSERT (leader->State == CM_SGMT_PRESENT); -+ -+ if (!leader->Maps[clvl].InputMapValid) /* can't set global map */ -+ return; -+ -+ if (cmRail->Levels[clvl].GlobalMapValid && /* already set global map */ -+ !statemap_changed (leader->Maps[clvl].InputMap)) /* no changes to propagate */ -+ return; -+ -+ statemap_setmap (cmRail->Levels[clvl].GlobalMap, leader->Maps[clvl].InputMap); -+ cmRail->Levels[clvl].GlobalMapValid = 1; -+ statemap_clearchanges (leader->Maps[clvl].InputMap); -+ -+ CPRINTF3 (5, "%s: getting clvl %d global map from leader (%d)\n", cmRail->Rail->Name, clvl, leader->NodeId); -+ -+ UpdateGlobalStatus (cmRail); -+ break; -+ } -+ -+ CPRINTF3 (5, "%s: clvl %d %s\n", cmRail->Rail->Name, clvl, -+ MapString ("global", cmRail->Levels[clvl].GlobalMap, cmRail->Levels[clvl].NumNodes, "")); -+ -+ /* Broadcast global map to all subordinates */ -+ for (lvl = 0; lvl < cTopLevel; lvl++) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ { -+ statemap_setmap (sgmt->Maps[clvl].OutputMap, cmRail->Levels[clvl].GlobalMap); -+ sgmt->Maps[clvl].OutputMapValid = 1; -+ -+ CPRINTF5 (5, "%s: sending clvl %d global map to subordinate %d %d (%d)\n", -+ cmRail->Rail->Name, clvl, lvl, sidx, sgmt->NodeId); -+ } -+ } -+ } -+} -+ -+static void -+CheckPeerPulse (CM_RAIL *cmRail, CM_SGMT *sgmt) -+{ -+ int clvl, sendRejoin; -+ -+ switch (sgmt->State) -+ { -+ case CM_SGMT_ABSENT: -+ break; -+ -+ case CM_SGMT_WAITING: /* waiting for a subtree */ -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_DISCOVER_TIMEOUT))) -+ break; -+ -+ CPRINTF3 (2, "%s: lvl %d subtree %d contains no live nodes\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_zero (sgmt->Maps[clvl].InputMap); /* need to start propagating zeros (flags change) */ -+ sgmt->Maps[clvl].InputMapValid = 1; /* and must indicate that the map is now valid */ -+ } -+ break; -+ -+ case CM_SGMT_COMING: /* lost/waiting subtree sent me IMCOMING */ -+ ASSERT (sgmt->Level > 0); /* we only do subtree discovery below our own level */ -+ -+ if (AFTER (lbolt, sgmt->WaitingTick + MSEC2TICKS(CM_WAITING_TIMEOUT))) -+ { -+ CPRINTF3 (1, "%s: lvl %d subtree %d waiting too long\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ printk ("%s: lvl %d subtree %d waiting too long\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_ABSENT; -+ for (clvl = sgmt->Level; clvl < cmRail->NumLevels; clvl++) -+ { -+ statemap_zero (sgmt->Maps[clvl].InputMap); /* need to start propagating zeros (flags change) */ -+ sgmt->Maps[clvl].InputMapValid = 1; /* and must indicate that the map is now valid */ -+ } -+ break; -+ } -+ -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_DISCOVER_TIMEOUT))) -+ break; -+ -+ CPRINTF3 (2, "%s: lvl %d subtree %d hasn't connected yet\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0])); -+ -+ sgmt->State = CM_SGMT_WAITING; -+ sgmt->UpdateTick = lbolt; -+ -+ if (sgmt->Level > 0) -+ __Schedule_Discovery (cmRail); -+ break; -+ -+ case CM_SGMT_PRESENT: -+ if (!AFTER (lbolt, sgmt->UpdateTick + MSEC2TICKS(CM_HEARTBEAT_TIMEOUT))) -+ break; -+ -+ if (sgmt->Level == cmRail->TopLevel) /* leader died */ -+ { -+ sendRejoin = (sgmt->State == CM_SGMT_PRESENT && sgmt->AckSeq == 0); -+ -+ CPRINTF4 (1, "%s: leader (%d) node %d JUST DIED%s\n", -+ cmRail->Rail->Name, sgmt->Level, sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ printk ("%s: lvl %d leader (%d) JUST DIED%s\n", -+ cmRail->Rail->Name, sgmt->Level, sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ if (sendRejoin) -+ { -+ /* she's not sent us any heartbeats even though she responded to a discover -+ * so tell her to rejoin the tree at the bottom, this will mean that she -+ * has to run the heartbeat timer before being able to rejoin the tree. */ -+ SendMessage (cmRail, sgmt->NodeId, sgmt->Level, CM_MSG_TYPE_REJOIN); -+ } -+ -+ StartLeaderDiscovery (cmRail); -+ break; -+ } -+ -+ sendRejoin = (sgmt->State == CM_SGMT_PRESENT && sgmt->AckSeq == 0); -+ -+ CPRINTF5 (2, "%s: lvl %d subordinate %d (%d) JUST DIED%s\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0]), sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ printk ("%s: lvl %d subordinate %d (%d) JUST DIED%s\n", cmRail->Rail->Name, -+ sgmt->Level, (int) (sgmt - &cmRail->Levels[sgmt->Level].Sgmts[0]), sgmt->NodeId, -+ sendRejoin ? ": !REJOIN" : ""); -+ -+ if (sendRejoin) -+ { -+ /* she's not sent us any heartbeats even though she responded to a discover -+ * so tell her to rejoin the tree at the bottom, this will mean that she -+ * has to run the heartbeat timer before being able to rejoin the tree. */ -+ SendMessage (cmRail, sgmt->NodeId, sgmt->Level, CM_MSG_TYPE_REJOIN); -+ } -+ -+ StartSubTreeDiscovery (cmRail, sgmt); -+ break; -+ -+ default: -+ ASSERT (0); -+ } -+} -+ -+static void -+CheckPeerPulses (CM_RAIL *cmRail) -+{ -+ int lvl; -+ int sidx; -+ -+ /* check children are alive */ -+ for (lvl = 0; lvl < cmRail->TopLevel; lvl++) -+ for (sidx = 0; sidx < cmRail->Levels[lvl].NumSegs; sidx++) -+ CheckPeerPulse (cmRail, &cmRail->Levels[lvl].Sgmts[sidx]); -+ -+ /* check leader is alive */ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ ASSERT (cmRail->Levels[cmRail->TopLevel].Sgmts[0].State == CM_SGMT_PRESENT); -+ -+ CheckPeerPulse (cmRail, &cmRail->Levels[cmRail->TopLevel].Sgmts[0]); -+ } -+} -+ -+static void -+SendHeartbeats (CM_RAIL *cmRail) -+{ -+ int lvl; -+ -+ /* Send heartbeats to my children */ -+ for (lvl = 0; lvl < cmRail->TopLevel; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ int sidx; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &cmRail->Levels[lvl].Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_PRESENT) -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_HEARTBEAT); -+ } -+ } -+ -+ /* Send heartbeat to my leader */ -+ if (cmRail->Role == CM_ROLE_SUBORDINATE) -+ { -+ ASSERT (cmRail->TopLevel < cmRail->NumLevels); -+ SendToSgmt (cmRail, &cmRail->Levels[cmRail->TopLevel].Sgmts[0], CM_MSG_TYPE_HEARTBEAT); -+ } -+} -+ -+static int -+BroadcastDiscover (CM_RAIL *cmRail) -+{ -+ int sidx; -+ int lvl; -+ int msgType; -+ CM_LEVEL *level; -+ int urgent; -+ -+ ASSERT (cmRail->TopLevel <= cmRail->NumLevels); -+ ASSERT ((cmRail->Role == CM_ROLE_LEADER) ? (cmRail->TopLevel == cmRail->NumLevels) : -+ (cmRail->Role == CM_ROLE_SUBORDINATE) ? (cmRail->Levels[cmRail->TopLevel].Sgmts[0].State == CM_SGMT_PRESENT) : -+ (cmRail->Role == CM_ROLE_LEADER_CANDIDATE)); -+ -+ if (cmRail->Role != CM_ROLE_LEADER_CANDIDATE) /* got a leader/lead whole machine */ -+ { -+ urgent = 0; /* non-urgent leader discovery */ -+ lvl = cmRail->TopLevel - 1; /* on nodes I lead (resolves leader conflicts) */ -+ msgType = CM_MSG_TYPE_RESOLVE_LEADER; -+ } -+ else -+ { -+ urgent = 1; /* urgent leader discovery */ -+ lvl = cmRail->TopLevel; /* on nodes I'd like to lead */ -+ msgType = CM_MSG_TYPE_DISCOVER_LEADER; -+ } -+ -+ if (lvl >= 0) -+ { -+ if (lvl > cmRail->BroadcastLevel) -+ { -+ /* Unable to broadcast at this level in the spanning tree, so we -+ * just continue doing discovery until we are able to broadcast */ -+ CPRINTF4 (6, "%s: broadcast level %d too low to discover %d at level %d\n", -+ cmRail->Rail->Name, cmRail->BroadcastLevel, msgType, lvl); -+ -+ cmRail->DiscoverStartTick = lbolt; -+ } -+ else -+ { -+ level = &cmRail->Levels[lvl]; -+ SendToSgmt (cmRail, &level->Sgmts[level->MySgmt], msgType); -+ } -+ } -+ -+ while (lvl > 0) -+ { -+ level = &cmRail->Levels[lvl]; -+ -+ for (sidx = 0; sidx < level->NumSegs; sidx++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[sidx]; -+ -+ if (sgmt->State == CM_SGMT_WAITING) -+ { -+ ASSERT (sidx != level->MySgmt); -+ /* Do subordinate discovery. Existing subordinates will -+ * ignore it, but leader candidates will send IMCOMING. -+ * This is always urgent since we'll assume a subtree is -+ * absent if I don't get IMCOMING within the timeout. -+ */ -+ SendToSgmt (cmRail, sgmt, CM_MSG_TYPE_DISCOVER_SUBORDINATE); -+ urgent = 1; -+ } -+ } -+ lvl--; -+ } -+ -+ return (urgent); -+} -+ -+static void -+CheckBroadcast (CM_RAIL *cmRail) -+{ -+ int clvl; -+ -+ for (clvl = cmRail->NumLevels-1; clvl >= 0 && cmRail->Rail->SwitchBroadcastLevel < cmRail->Levels[clvl].SwitchLevel; clvl--) -+ ; -+ -+ if (cmRail->OfflineReasons || cmRail->Rail->System->Shutdown) -+ clvl = -1; -+ -+ /* if the level at which we can broadcast drops, then we must rejoin the -+ * spanning tree at the highest level for which broadcast is good. */ -+ if (cmRail->BroadcastLevel > clvl && clvl < (int)(cmRail->Role == CM_ROLE_LEADER ? cmRail->TopLevel - 1 : cmRail->TopLevel)) -+ { -+ printk ("%s: REJOINING at level %d because %s\n", cmRail->Rail->Name, clvl+1, -+ (cmRail->OfflineReasons & CM_OFFLINE_MANAGER) ? "of manager thread" : -+ (cmRail->OfflineReasons & CM_OFFLINE_PROCFS) ? "force offline" : -+ cmRail->Rail->System->Shutdown ? "system shutdown" : "broadcast level changed"); -+ LowerTopLevel (cmRail, clvl+1); -+ } -+ -+ if (cmRail->BroadcastLevel != clvl) -+ { -+ cmRail->BroadcastLevel = clvl; -+ cmRail->BroadcastLevelTick = lbolt; -+ } -+ -+ /* schedule the update thread, to withdraw from comms with -+ * nodes "outside" of the valid broadcastable range. */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ if (cmRail->BroadcastLevel < clvl) -+ { -+ if (AFTER (lbolt, cmRail->BroadcastLevelTick + EP_WITHDRAW_TIMEOUT) && -+ !(cmRail->Levels[clvl].OfflineReasons & CM_OFFLINE_BROADCAST)) -+ { -+ printk ("%s: Withdraw at Level %d\n", cmRail->Rail->Name, clvl); -+ cmRail->Levels[clvl].OfflineReasons |= CM_OFFLINE_BROADCAST; -+ } -+ } -+ else -+ { -+ if (cmRail->Levels[clvl].OfflineReasons & CM_OFFLINE_BROADCAST) -+ { -+ printk ("%s: Rejoin at Level %d\n", cmRail->Rail->Name, clvl); -+ cmRail->Levels[clvl].OfflineReasons &= ~CM_OFFLINE_BROADCAST; -+ } -+ } -+ } -+ -+} -+ -+static void -+CheckManager (CM_RAIL *cmRail) -+{ -+ long time, state = ep_kthread_state (&cmRail->Rail->System->ManagerThread, &time); -+ -+ if (state == KT_STATE_RUNNING && BEFORE (lbolt, time + MSEC2TICKS(CM_THREAD_RUNNING_TIMEOUT))) -+ state = KT_STATE_SLEEPING; -+ if (state != KT_STATE_SLEEPING && BEFORE (lbolt, time + MSEC2TICKS(CM_THREAD_SCHEDULE_TIMEOUT))) -+ state = KT_STATE_SLEEPING; -+ -+ if ((cmRail->OfflineReasons & CM_OFFLINE_MANAGER) && state == KT_STATE_SLEEPING) -+ { -+ printk ("%s: manager thread unstuck\n", cmRail->Rail->Name); -+ -+ cmRail->OfflineReasons &= ~CM_OFFLINE_MANAGER; -+ } -+ -+ if (!(cmRail->OfflineReasons & CM_OFFLINE_MANAGER) && state != KT_STATE_SLEEPING) -+ { -+ printk ("%s: manager thread stuck - %s\n", cmRail->Rail->Name, -+ state == KT_STATE_SCHEDULED ? "scheduled" : -+ state == KT_STATE_RUNNING ? "running" : -+ state == KT_STATE_STALLED ? "stalled" : "unknown"); -+ -+ cmRail->OfflineReasons |= CM_OFFLINE_MANAGER; -+ } -+} -+ -+static void -+CheckOfflineReasons (CM_RAIL *cmRail, int clvl) -+{ -+ int subClMin, subClMax, myClId; -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ -+ if (cmRail->Levels[clvl].OfflineReasons) -+ { -+ if (cmRail->Levels[clvl].Online) -+ { -+ printk ("%s: Withdraw from %s\n", cmRail->Rail->Name, sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ RestartComms (cmRail, clvl); -+ } -+ } -+ else -+ { -+ if (cmRail->Levels[clvl].Restarting && cmRail->Levels[clvl].Connected == 0) -+ { -+ printk ("%s: Rejoin with %s\n", cmRail->Rail->Name, sprintClPeers (clNodeStr, cmRail, clvl)); -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ -+ ASSERT (statemap_getbits (cmRail->Levels[clvl].LocalMap, myClId * CM_GSTATUS_BITS, CM_GSTATUS_BITS) == -+ (CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START | CM_GSTATUS_RESTART)); -+ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, myClId * CM_GSTATUS_BITS, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ cmRail->Levels[clvl].Restarting = 0; -+ } -+ } -+} -+ -+void -+DoHeartbeatWork (CM_RAIL *cmRail) -+{ -+ long now = lbolt; -+ int clvl; -+ -+ if ((RejoinCheck || RejoinPanic) && -+ AFTER (now, cmRail->NextRunTime + MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT))) /* If I've been unresponsive for too long */ -+ { -+ /* I'd better reconnect to the network because I've not been playing the game */ -+ CPRINTF4 (1, "%s: REJOINING because I was too slow (heartbeat) [%ld,%ld,(%ld)]\n", cmRail->Rail->Name, now, cmRail->NextRunTime, (long int)MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT)); -+ printk ("%s: REJOINING because I was too slow (heartbeat) [%ld,%ld,(%ld)]\n", cmRail->Rail->Name, now, cmRail->NextRunTime, (long int)MSEC2TICKS (CM_TIMER_SCHEDULE_TIMEOUT)); -+ -+ LowerTopLevel (cmRail, 0); -+ -+ IncrStat (cmRail, RejoinTooSlow); -+ -+ if (RejoinPanic) -+ panic ("ep: REJOINING because I was too slow (heartbeat)\n"); -+ } -+ -+ PollInputQueues (cmRail); -+ -+ if (cmRail->NextDiscoverTime && ! BEFORE (now, cmRail->NextDiscoverTime)) -+ { -+ if (BroadcastDiscover (cmRail)) /* urgent discovery required? */ -+ cmRail->NextDiscoverTime = now + MSEC2TICKS (CM_URGENT_DISCOVER_INTERVAL); -+ else -+ cmRail->NextDiscoverTime = now + MSEC2TICKS (CM_PERIODIC_DISCOVER_INTERVAL); -+ -+ if (cmRail->Role == CM_ROLE_LEADER_CANDIDATE && AFTER (now, cmRail->DiscoverStartTick + MSEC2TICKS (CM_DISCOVER_TIMEOUT))) -+ RaiseTopLevel (cmRail); -+ } -+ -+ if (cmRail->NextHeartbeatTime && ! BEFORE (now, cmRail->NextHeartbeatTime)) -+ { -+ CheckPosition (cmRail->Rail); -+ CheckPeerPulses (cmRail); -+ CheckBroadcast (cmRail); -+ CheckManager (cmRail); -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ CheckOfflineReasons (cmRail, clvl); -+ ReduceGlobalMap (cmRail, clvl); -+ BroadcastGlobalMap (cmRail, clvl); -+ } -+ -+ SendHeartbeats (cmRail); -+ -+ /* Compute the next heartbeat time, but "drift" it towards the last -+ * periodic discovery time we saw from the whole machine leader */ -+ cmRail->NextHeartbeatTime = now + MSEC2TICKS (CM_HEARTBEAT_INTERVAL); -+ } -+ -+ if (cmRail->NextDiscoverTime && AFTER (cmRail->NextHeartbeatTime, cmRail->NextDiscoverTime)) -+ cmRail->NextRunTime = cmRail->NextDiscoverTime; -+ else -+ cmRail->NextRunTime = cmRail->NextHeartbeatTime; -+} -+ -+#define CM_SVC_INDICATOR_OFFSET(CMRAIL,CLVL,IND,NODEID) ( ( CMRAIL->Levels[CLVL].NumNodes * CM_GSTATUS_BITS ) \ -+ + ( CMRAIL->Levels[CLVL].NumNodes * IND ) \ -+ + ( NODEID - CMRAIL->Levels[CLVL].MinNodeId ) ) -+int -+cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ -+ EPRINTF2 (DBG_SVC,"cm_svc_indicator_set: rail %p ind %d\n", rail, svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC,"cm_svc_indicator_set: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId), 1, 1); -+ EPRINTF3 (DBG_SVC,"cm_svc_indicator_set: clvl %d nodeId %d offset %d\n", clvl, cmRail->NodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId)); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+int -+cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ -+ EPRINTF2 (DBG_SVC, "cm_svc_indicator_clear: rail %p ind %d\n", rail, svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_clear: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId), 0, 1); -+ EPRINTF3 (DBG_SVC, "cm_svc_indicator_clear: clvl %d nodeId %d offset %d\n", clvl, cmRail->NodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, cmRail->NodeId)); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+int -+cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int clvl; -+ bitmap_t bits; -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_is_set: rail %p ind %d nodeId %d (me=%d)\n", rail, svc_indicator, nodeId, cmRail->NodeId); -+ -+ if (svc_indicator < 0 || svc_indicator > EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_is_set: service indicator %d not registered\n", svc_indicator); -+ return (0); -+ } -+ -+ if (rail->State == EP_RAIL_STATE_UNINITIALISED) -+ return (0); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ if ( clvl == cmRail->NumLevels) { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_is_set: node out of range %d \n", nodeId); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ return (0); -+ } -+ -+ if ( cmRail->NodeId == nodeId ) -+ bits = statemap_getbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ else -+ bits = statemap_getbits (cmRail->Levels[clvl].GlobalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_is_set: clvl %d nodeId %d offset %d %x\n", clvl, nodeId, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), bits); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return ( (bits == 0) ? (0) : (1) ); -+} -+ -+int -+cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ /* or in the bit map */ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int nodeId, clvl; -+ bitmap_t bits; -+ unsigned long flags; -+ int clip_out_low, clip_out_high; -+ int curr_low, curr_high; -+ int check_low, check_high; -+ -+ EPRINTF4 (DBG_SVC, "cm_svc_indicator_bitmap: rail %p ind %d low %d high %d\n", rail, svc_indicator, low, (low + nnodes)); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ { -+ EPRINTF1 (DBG_SVC, "cm_svc_indicator_bitmap: service indicator %d not registered\n", svc_indicator); -+ return (-1); -+ } -+ -+ if (rail->State != EP_RAIL_STATE_RUNNING) -+ return (-2); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ clip_out_low = clip_out_high = -1; /* all in */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) { -+ -+ /* curr_high/low is the range of the current lvl */ -+ curr_low = cmRail->Levels[clvl].MinNodeId; -+ curr_high = cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes; -+ -+ /* find out how much of low high is in this range and only check that part */ -+ check_low = ( low < curr_low) ? curr_low : low; -+ check_high = ( (low + nnodes) > curr_high) ? curr_high : (low + nnodes); -+ -+ EPRINTF6 (DBG_SVC, "cm_svc_indicator_bitmap: curr(%d,%d) check(%d,%d) clip(%d,%d)\n", curr_low, curr_high, check_low, check_high, clip_out_low, clip_out_high); -+ -+ for(nodeId = check_low; nodeId < check_high; nodeId++) { -+ -+ if ( (clip_out_low <= nodeId) && (nodeId <= clip_out_high)) -+ nodeId = clip_out_high; /* step over the cliped out section */ -+ else { -+ -+ if ( cmRail->NodeId == nodeId ) -+ bits = statemap_getbits (cmRail->Levels[clvl].LocalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ else -+ bits = statemap_getbits (cmRail->Levels[clvl].GlobalMap, CM_SVC_INDICATOR_OFFSET (cmRail, clvl, svc_indicator, nodeId), 1); -+ -+ if ( bits ) { -+ EPRINTF2 (DBG_SVC, "cm_svc_indicator_bitmap: its set nodeId %d (clvl %d)\n", nodeId, clvl); -+ BT_SET ( bitmap , nodeId - low ); -+ } -+ } -+ } -+ -+ /* widen the clip out range */ -+ clip_out_low = curr_low; -+ clip_out_high = curr_high -1; -+ } -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ return (0); -+} -+ -+#if defined(PER_CPU_TIMEOUT) -+static void -+cm_percpu_timeout (void *arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ CM_TIMEOUT_DATA *hbd = &cmRail->HeartbeatTimeoutsData[current_cpu()]; -+ long now = lbolt; -+ unsigned delay = now - hbd->ScheduledAt; -+ unsigned long flags; -+ -+ if (delay > hbd->WorstDelay) -+ hbd->WorstDelay = delay; -+ if (hbd->BestDelay == 0 || delay < hbd->BestDelay) -+ hbd->BestDelay = delay; -+ -+ if (cmRail->HeartbeatTimeoutsShouldStop) -+ { -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ cmRail->HeartbeatTimeoutsStopped |= (1 << current_cpu()); -+ kcondvar_wakeupall (&cmRail->HeartbeatTimeoutsWait, &cmRail->Lock); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ return; -+ } -+ -+ if (cmRail->NextRunTime == 0 || AFTER (cmRail->NextRunTime, lbolt)) -+ hbd->EarlyCount++; -+ else if (cmRail->HeartbeatTimeoutRunning) -+ hbd->MissedCount++; -+ else -+ { -+ local_irq_save (flags); -+ -+ if (! spin_trylock (&cmRail->HeartbeatTimeoutsLock)) -+ hbd->WastedCount++; -+ else -+ { -+ cmRail->HeartbeatTimeoutRunning = 1; -+ hbd->WorkCount++; -+ -+ spin_lock (&cmRail->Lock); -+ -+ if ((delay = (lbolt - cmRail->NextRunTime)) > hbd->WorstHearbeatDelay) -+ hbd->WorstHearbeatDelay = delay; -+ if ((delay = (lbolt - now) > hbd->WorstLockDelay)) -+ hbd->WorstLockDelay = delay; -+ -+ DoHeartbeatWork (cmRail); -+ -+ spin_unlock (&cmRail->Lock); -+ spin_unlock (&cmRail->HeartbeatTimeoutsLock); -+ -+ cmRail->HeartbeatTimeoutRunning = 0; -+ } -+ local_irq_restore (flags); -+ } -+ -+ hbd->ScheduledAt = lbolt + MSEC2TICKS (CM_PERCPU_TIMEOUT_INTERVAL); -+ timeout_cpu (cm_percpu_timeout, cmRail, MSECS2TICKS (CM_PERCPU_TIMEOUT_INTERVAL), CALLOUT_TYPE|CALLOUT_NOMALLOC); -+} -+ -+static void -+StartPerCpuTimeouts (CM_RAIL *cmRail) -+{ -+ register int c; -+ -+ spin_lock_init (&cmRail->HeartbeatTimeoutsLock); -+ -+ KMEM_ZALLOC (cmRail->HeartbeatTimeoutsData, CM_TIMEOUT_DATA *, ncpus * sizeof (CM_TIMEOUT_DATA), 1); -+ -+ for (c = 0; c < cpus_in_box; c++) -+ { -+ if (cpu_to_processor (c)) -+ { -+ if (current_cpu() != c) -+ { -+ thread_bind (current_thread(), cpu_to_processor(c)); -+ mpsleep (current_thread(), 0, "StartPerCpuTimeouts", 1, NULL, 0); -+ -+ if (current_cpu() != c) -+ panic ("ep: StartPerCpuTimeouts - failed to switch cpu\n"); -+ } -+ -+ cmRail->HeartbeatTimeoutsStarted |= (1 << c); -+ cmRail->HeartbeatTimeoutsData[c].ScheduledAt = lbolt + c; -+ -+ timeout_cpu (cm_percpu_timeout, cmRail, c, CALLOUT_TYPE|CALLOUT_NOMALLOC); -+ } -+ } -+ -+ thread_bind(current_thread(), NULL); -+} -+ -+static void -+StopPerCpuTimeouts (CM_RAIL *cmRail) -+{ -+ register int c; -+ unsigned long flags; -+ -+ cmRail->HeartbeatTimeoutsShouldStop = 1; -+ -+ for (c = 0; c < cpus_in_box; c++) -+ { -+ if (cmRail->HeartbeatTimeoutsStarted & (1 << c)) -+ { -+ printk ("%s: stopping cpu_timeout on cpu %d\n", cmRail->Rail->Name, c); -+ -+ if (untimeout_cpu (cm_percpu_timeout, cmRail, c, CALLOUT_TYPE|CALLOUT_NOMALLOC, NULL)) -+ cmRail->HeartbeatTimeoutsStopped |= (1 << c); -+ } -+ } -+ thread_bind(current_thread(), NULL); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ while (cmRail->HeartbeatTimeoutsStopped != cmRail->HeartbeatTimeoutsStarted) -+ kcondvar_wait (&cmRail->HeartbeatTimeoutsWait, &cmRail->Lock, &flags); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ cmRail->HeartbeatTimeoutsStarted = 0; -+ cmRail->HeartbeatTimeoutsStopped = 0; -+ cmRail->HeartbeatTimeoutsShouldStop = 0; -+ -+ KMEM_FREE (cmRail->HeartbeatTimeoutsData, ncpus * sizeof (CM_TIMEOUT_DATA)); -+ -+ spin_lock_destroy (&cmRail->HeartbeatTimeoutsLock); -+} -+ -+#else -+ -+static void -+cm_heartbeat_timer (unsigned long arg) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ ASSERT (cmRail->Rail->State == EP_RAIL_STATE_RUNNING); -+ -+ DoHeartbeatWork (cmRail); -+ -+ __Schedule_Timer (cmRail, cmRail->NextRunTime); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+#endif /* defined(PER_CPU_TIMEOUT) */ -+ -+ -+ -+void -+DisplayRailDo (DisplayInfo *di, EP_RAIL *rail) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ int i, j; -+ -+ if (rail->State != EP_RAIL_STATE_RUNNING) -+ return; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ (di->func)(di->arg, "NodeId=%d NodeLevel=%d NumLevels=%d NumNodes=%d\n", -+ cmRail->NodeId, cmRail->TopLevel, cmRail->NumLevels, cmRail->Rail->Position.pos_nodes); -+ -+ (di->func)(di->arg, "["); -+ -+ for (i = 0; i < cmRail->NumLevels; i++) -+ { -+ if (i > 0) -+ (di->func)(di->arg, ","); -+ -+ if (i < cmRail->TopLevel) -+ { -+ (di->func)(di->arg, "L "); -+ -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ switch (cmRail->Levels[i].Sgmts[j].State) -+ { -+ case CM_SGMT_PRESENT: (di->func)(di->arg, "p%-4d", cmRail->Levels[i].Sgmts[j].NodeId); break; -+ case CM_SGMT_WAITING: (di->func)(di->arg, "w%4s", ""); break; -+ case CM_SGMT_COMING: (di->func)(di->arg, "c%4s", ""); break; -+ case CM_SGMT_ABSENT: (di->func)(di->arg, ".%4s", ""); break; -+ default: (di->func)(di->arg, "?%4s", ""); break; -+ } -+ } -+ else -+ switch (cmRail->Role) -+ { -+ case CM_ROLE_LEADER_CANDIDATE: -+ (di->func)(di->arg,"l "); -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ (di->func)(di->arg," "); -+ break; -+ -+ case CM_ROLE_SUBORDINATE: -+ switch (cmRail->Levels[i].Sgmts[0].State) -+ { -+ case CM_SGMT_PRESENT: (di->func)(di->arg, "p%-4d", cmRail->Levels[i].Sgmts[0].NodeId); break; -+ case CM_SGMT_WAITING: (di->func)(di->arg, "w%4s", ""); break; -+ case CM_SGMT_COMING: (di->func)(di->arg, "c%4s", ""); break; -+ case CM_SGMT_ABSENT: (di->func)(di->arg, ".%4s", ""); break; -+ default: (di->func)(di->arg, "?%4s", ""); break; -+ } -+ for (j = 1; j < cmRail->Levels[i].NumSegs; j++) -+ (di->func)(di->arg, " "); -+ break; -+ -+ default: -+ (di->func)(di->arg, "####"); -+ break; -+ } -+ } -+ (di->func)(di->arg, "]\n"); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+DisplayRail (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ DisplayRailDo (&di_ep_debug, rail); -+} -+ -+void -+DisplayStatus (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ DisplayNodeMaps (&di_ep_debug, cmRail); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+} -+ -+void -+DisplaySegs (EP_RAIL *rail) -+{ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ DisplayNodeSgmts (&di_ep_debug, cmRail); -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+} -+ -+static void -+LoadBroadcastRoute (CM_RAIL *cmRail, int lvl, int sidx) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int nsegs = cmRail->Levels[0].NumSegs; -+ int vp = EP_VP_BCAST(lvl, sidx); -+ int nodes = 1; -+ int baseNode; -+ int i; -+ -+ ASSERT (lvl > 0 && lvl <= cmRail->NumLevels); -+ ASSERT (sidx == 0 || lvl < cmRail->NumLevels); -+ -+ ASSERT (vp >= EP_VP_BCAST_BASE && vp < EP_VP_BCAST_BASE + EP_VP_BCAST_COUNT); -+ -+ for (i = 1; i <= lvl; i++) -+ { -+ nodes *= nsegs; -+ nsegs = (i == cmRail->NumLevels) ? 1 : cmRail->Levels[i].NumSegs; -+ } -+ -+ baseNode = ((cmRail->NodeId / (nodes * nsegs)) * nsegs + sidx) * nodes; -+ -+ CPRINTF5 (2, "%s: broadcast vp lvl %d sidx %d [%d,%d]\n", -+ cmRail->Rail->Name, lvl, sidx, baseNode, baseNode + nodes - 1); -+ -+ rail->Operations.LoadSystemRoute (rail, vp, baseNode, baseNode + nodes - 1); -+} -+ -+static void -+LoadRouteTable (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int i, j; -+ -+ if (cmRail->NumNodes > EP_MAX_NODES) -+ { -+ printk ("More nodes (%d) than point-to-point virtual process table entries (%d)\n", cmRail->NumNodes, EP_MAX_NODES); -+ panic ("LoadRouteTable\n"); -+ } -+ -+ for (i = 0; i < cmRail->NumNodes; i++) -+ rail->Operations.LoadSystemRoute (rail, EP_VP_NODE(i), i, i); -+ -+ /* Generate broadcast routes for subtrees */ -+ for (i = 1; i < cmRail->NumLevels; i++) -+ for (j = 0; j < cmRail->Levels[i].NumSegs; j++) -+ LoadBroadcastRoute (cmRail, i, j); -+ -+ /* Generate broadcast route for whole machine */ -+ LoadBroadcastRoute (cmRail, cmRail->NumLevels, 0); -+ -+ /* Finally invalidate all the data routes */ -+ for (i = 0; i < cmRail->NumNodes; i++) -+ rail->Operations.UnloadNodeRoute (cmRail->Rail, i); -+} -+ -+void -+cm_node_disconnected (EP_RAIL *rail, unsigned nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int base, lstat, lgstat; -+ int clvl, subClMin, subClMax; -+ int thisClId, myClId; -+ unsigned long flags; -+ -+ ASSERT (nodeId != cmRail->NodeId); -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ thisClId = nodeId - cmRail->Levels[clvl].MinNodeId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ ASSERT ((lstat & CM_GSTATUS_ACK_MASK) == CM_GSTATUS_MAY_RUN); -+ -+ CPRINTF7 (2, "%s: cm_node_disconnected: Node %d: clvl %d, lgstat %s, gstat %s, lstat %s -> %sMAY_START\n", -+ cmRail->Rail->Name, nodeId, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId), -+ ((lgstat != CM_GSTATUS_CLOSING) && (lstat & CM_GSTATUS_RESTART)) ? "RESTART|" : ""); -+ -+ switch (lgstat) -+ { -+ case CM_GSTATUS_CLOSING: -+ /* delayed ack of closing - set MAY_START and clear RESTART */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ break; -+ case CM_GSTATUS_STARTING: -+ case CM_GSTATUS_RUNNING: -+ IASSERT (! cmRail->Levels[clvl].Online || lstat & CM_GSTATUS_RESTART); -+ break; -+ case CM_GSTATUS_ABSENT: -+ IASSERT (lstat & CM_GSTATUS_RESTART); -+ } -+ -+ cmRail->Levels[clvl].Connected--; -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+cm_restart_node (EP_RAIL *rail, unsigned nodeId) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ int base, lstat, lgstat; -+ int clvl, subClMin, subClMax; -+ int thisClId, myClId; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ if (nodeId == rail->Position.pos_nodeid) -+ { -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ RestartComms (cmRail, clvl); -+ } -+ else -+ { -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ if (nodeId >= cmRail->Levels[clvl].MinNodeId && nodeId < (cmRail->Levels[clvl].MinNodeId + cmRail->Levels[clvl].NumNodes)) -+ break; -+ -+ myClId = ClusterIds (cmRail, clvl, &subClMin, &subClMax); -+ thisClId = nodeId - cmRail->Levels[clvl].MinNodeId; -+ base = thisClId * CM_GSTATUS_BITS; -+ lstat = statemap_getbits (cmRail->Levels[clvl].LocalMap, base, CM_GSTATUS_BITS); -+ lgstat = statemap_getbits (cmRail->Levels[clvl].LastGlobalMap, base, CM_GSTATUS_BITS) & CM_GSTATUS_STATUS_MASK; -+ -+ CPRINTF6 (2, "%s: cm_restart_node: Node %d: clvl %d, lgstat %s, gstat %s, lstat %s\n", -+ cmRail->Rail->Name, nodeId, clvl, -+ GlobalStatusString (cmRail->Levels[clvl].LastGlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].GlobalMap, thisClId), -+ GlobalStatusString (cmRail->Levels[clvl].LocalMap, thisClId)); -+ -+ if (lgstat != CM_GSTATUS_CLOSING) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, base, lstat | CM_GSTATUS_RESTART, CM_GSTATUS_BITS); -+ } -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+void -+cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason) -+{ -+ CM_RAIL *cmRail = rail->ClusterRail; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ if (offline) -+ cmRail->OfflineReasons |= reason; -+ else -+ cmRail->OfflineReasons &= ~reason; -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+} -+ -+static void -+cm_remove_rail (EP_SUBSYS *subsys, EP_SYS *epsys, EP_RAIL *rail) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ CM_RAIL *cmRail = sys->Rails[rail->Number]; -+ int i, lvl, clvl; -+ -+ cm_procfs_rail_fini (cmRail); -+ -+ sys->Rails[rail->Number] = NULL; -+ rail->ClusterRail = NULL; -+ -+#if defined(PER_CPU_TIMEOUT) -+ StopPerCpuTimeouts (cmRail); -+#else -+ del_timer_sync (&cmRail->HeartbeatTimer); -+#endif -+ cmRail->NextRunTime = 0; -+ cmRail->NextDiscoverTime = 0; -+ cmRail->NextHeartbeatTime = 0; -+ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ for (lvl = 0; lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ statemap_destroy (level->SubordinateMap[clvl]); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ statemap_destroy (level->Sgmts[i].Maps[clvl].CurrentInputMap); -+ statemap_destroy (level->Sgmts[i].Maps[clvl].InputMap); -+ statemap_destroy (level->Sgmts[i].Maps[clvl].OutputMap); -+ } -+ } -+ -+ cmRail->Levels[clvl].Online = 0; -+ -+ statemap_destroy (cmRail->Levels[clvl].TmpMap); -+ statemap_destroy (cmRail->Levels[clvl].GlobalMap); -+ statemap_destroy (cmRail->Levels[clvl].LastGlobalMap); -+ statemap_destroy (cmRail->Levels[clvl].SubTreeMap); -+ statemap_destroy (cmRail->Levels[clvl].LocalMap); -+ } -+ -+ spin_lock_destroy (&cmRail->Lock); -+ -+ ep_free_inputq (cmRail->Rail, cmRail->PolledQueue); -+ ep_free_inputq (cmRail->Rail, cmRail->IntrQueue); -+ ep_free_outputq (cmRail->Rail, cmRail->MsgQueue); -+ -+ KMEM_FREE (cmRail, sizeof (CM_RAIL)); -+} -+ -+static int -+cm_add_rail (EP_SUBSYS *subsys, EP_SYS *epsys, EP_RAIL *rail) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ ELAN_POSITION *pos = &rail->Position; -+ CM_RAIL *cmRail; -+ int lvl, n, nn, clvl, span, i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (cmRail, CM_RAIL *, sizeof (CM_RAIL), 1); -+ -+ if (cmRail == NULL) -+ return (ENOMEM); -+ -+ cmRail->Rail = rail; -+ cmRail->NodeId = pos->pos_nodeid; -+ cmRail->NumNodes = pos->pos_nodes; -+ -+ spin_lock_init (&cmRail->Lock); -+ -+ if ((cmRail->IntrQueue = ep_alloc_inputq (rail, EP_SYSTEMQ_INTR, sizeof (CM_MSG), CM_INPUTQ_ENTRIES, IntrQueueCallback, cmRail)) == NULL || -+ (cmRail->PolledQueue = ep_alloc_inputq (rail, EP_SYSTEMQ_POLLED, sizeof (CM_MSG), CM_INPUTQ_ENTRIES, NULL, 0)) == NULL || -+ (cmRail->MsgQueue = ep_alloc_outputq (rail, sizeof (CM_MSG), CM_NUM_MSG_BUFFERS)) == NULL) -+ { -+ goto failed; -+ } -+ -+ /* point to first "spare" message buffer */ -+ cmRail->NextSpareMsg = 0; -+ -+ /* Compute the branching ratios from the switcy arity */ -+ for (lvl = 0; lvl < CM_MAX_LEVELS; lvl++) -+ BranchingRatios[lvl] = (lvl < pos->pos_levels) ? pos->pos_arity[pos->pos_levels - lvl - 1] : 4; -+ -+ /* now determine the number of levels of hierachy we have */ -+ /* and how many nodes per level there are */ -+ for (lvl = 0, nn = 1, n = pos->pos_nodes; -+ n > 1; -+ nn *= BranchingRatios[lvl], n = n / BranchingRatios[lvl], lvl++) -+ { -+ int nSegs = (n > BranchingRatios[lvl]) ? BranchingRatios[lvl] : n; -+ int nNodes = nn * nSegs; -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ for (clvl = 0, span = pos->pos_arity[pos->pos_levels - clvl - 1]; -+ span < nNodes && clvl < pos->pos_levels - 1; -+ clvl++, span *= pos->pos_arity[pos->pos_levels - clvl - 1]) -+ ; -+ -+ level->SwitchLevel = clvl; -+ level->MinNodeId = (pos->pos_nodeid / nNodes) * nNodes; -+ level->NumNodes = nNodes; -+ level->NumSegs = nSegs; -+ } -+ -+ cmRail->NumLevels = lvl; -+ cmRail->BroadcastLevel = lvl-1; -+ -+ CPRINTF4 (2, "%s: NodeId=%d NumNodes=%d NumLevels=%d\n", -+ rail->Name, pos->pos_nodeid, pos->pos_nodes, cmRail->NumLevels); -+ -+ LoadRouteTable (cmRail); -+ -+ /* Init SGMT constants */ -+ for (lvl = 0; lvl < cmRail->NumLevels; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ level->MySgmt = SegmentNo (cmRail, cmRail->NodeId, lvl); -+ -+ for (i = 0; i < CM_SGMTS_PER_LEVEL; i++) -+ { -+ CM_SGMT *sgmt = &level->Sgmts[i]; -+ -+ sgmt->MsgNumber = lvl * CM_SGMTS_PER_LEVEL + i; -+ sgmt->Level = lvl; -+ sgmt->Sgmt = i; -+ } -+ } -+ -+ /* Init maps for each cluster level */ -+ for (clvl = 0; clvl < cmRail->NumLevels; clvl++) -+ { -+ int nNodes = cmRail->Levels[clvl].NumNodes; -+ int mapBits = (nNodes * CM_GSTATUS_BITS) + (nNodes * EP_SVC_NUM_INDICATORS); -+ int clmin; -+ int clmax; -+ int clid = ClusterIds (cmRail, clvl, &clmin, &clmax); -+ -+ for (lvl = 0; lvl <= clvl; lvl++) -+ { -+ CM_LEVEL *level = &cmRail->Levels[lvl]; -+ -+ level->SubordinateMap[clvl] = statemap_create (mapBits); -+ -+ for (i = 0; i < level->NumSegs; i++) -+ { -+ level->Sgmts[i].Maps[clvl].CurrentInputMap = statemap_create (mapBits); -+ level->Sgmts[i].Maps[clvl].InputMap = statemap_create (mapBits); -+ level->Sgmts[i].Maps[clvl].OutputMap = statemap_create (mapBits); -+ } -+ } -+ -+ cmRail->Levels[clvl].Online = 0; -+ -+ cmRail->Levels[clvl].TmpMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].GlobalMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].LastGlobalMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].SubTreeMap = statemap_create (mapBits); -+ cmRail->Levels[clvl].LocalMap = statemap_create (mapBits); -+ -+ /* Flag everyone outside my next lower cluster as sensed offline... */ -+ for (i = 0; i < clmin; i++) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, i * CM_GSTATUS_BITS, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ for (i = clmax + 1; i < nNodes; i++) -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, i * CM_GSTATUS_BITS, CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ -+ /* ...and set my own state */ -+ statemap_setbits (cmRail->Levels[clvl].LocalMap, clid * CM_GSTATUS_BITS, -+ CM_GSTATUS_CLOSING | CM_GSTATUS_MAY_START, CM_GSTATUS_BITS); -+ } -+ -+ /* compute parameter hash to add to messages */ -+ cmRail->ParamHash = EP_PROTOCOL_VERSION; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_PERIODIC_DISCOVER_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_URGENT_DISCOVER_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_HEARTBEAT_INTERVAL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_P2P_DMA_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_P2P_MSG_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_BCAST_MSG_RETRIES; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_TIMER_SCHEDULE_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_HEARTBEAT_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_DISCOVER_TIMEOUT; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + BT_NBIPUL; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + CM_GSTATUS_BITS; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + EP_SVC_NUM_INDICATORS; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + cmRail->NumLevels; -+ cmRail->ParamHash = cmRail->ParamHash * 127 + cmRail->NumNodes; -+ for (i = 0; i < cmRail->NumLevels; i++) -+ cmRail->ParamHash = cmRail->ParamHash * 127 + BranchingRatios[i]; -+ -+#if defined(PER_CPU_TIMEOUT) -+ StartPerCpuTimeouts (cmRail); -+#endif -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+#if !defined(PER_CPU_TIMEOUT) -+ /* Initialise the timer, but don't add it yet, since -+ * __Schedule_Heartbeat() will do this. */ -+ -+ init_timer (&cmRail->HeartbeatTimer); -+ -+ cmRail->HeartbeatTimer.function = cm_heartbeat_timer; -+ cmRail->HeartbeatTimer.data = (unsigned long) cmRail; -+ cmRail->HeartbeatTimer.expires = lbolt + hz; -+#endif -+ -+ /* start sending heartbeats */ -+ __Schedule_Heartbeat (cmRail); -+ -+ /* start discovering who else is out there */ -+ LowerTopLevel (cmRail, 0); -+ -+ /* connect to myself straight away - I know I'm here */ -+ ep_connect_node (rail, cmRail->NodeId); -+ -+ /* add to all rails */ -+ sys->Rails[rail->Number] = cmRail; -+ rail->ClusterRail = (void *) cmRail; -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ -+ /* Enable the input queues */ -+ ep_enable_inputq (rail, cmRail->PolledQueue); -+ ep_enable_inputq (rail, cmRail->IntrQueue); -+ -+ /* Create the procfs entries */ -+ cm_procfs_rail_init (cmRail); -+ -+ return 0; -+ -+ failed: -+ cm_remove_rail (subsys, epsys, rail); -+ return -ENOMEM; -+} -+ -+static void -+cm_fini (EP_SUBSYS *subsys, EP_SYS *epsys) -+{ -+ CM_SUBSYS *sys = (CM_SUBSYS *) subsys; -+ -+ cm_procfs_fini(sys); -+ -+ KMEM_FREE (sys, sizeof (CM_SUBSYS)); -+} -+ -+int -+cm_init (EP_SYS *sys) -+{ -+ CM_SUBSYS *subsys; -+ -+ KMEM_ZALLOC (subsys, CM_SUBSYS *, sizeof (CM_SUBSYS), 1); -+ -+ if (subsys == NULL) -+ return (ENOMEM); -+ -+ subsys->Subsys.Sys = sys; -+ subsys->Subsys.Name = "cm"; -+ subsys->Subsys.Destroy = cm_fini; -+ subsys->Subsys.AddRail = cm_add_rail; -+ subsys->Subsys.RemoveRail = cm_remove_rail; -+ -+ ep_subsys_add (sys, &subsys->Subsys); -+ -+ cm_procfs_init (subsys); -+ -+ /* -+ * Initialise the machineid if it wasn't specified by -+ * the modules.conf file - otherwise truncate it to -+ * 16 bits. -+ */ -+ if (MachineId != -1) -+ MachineId = (uint16_t) MachineId; -+ else -+ { -+#if defined(LINUX_ALPHA) -+ MachineId = (uint16_t)((5 << 12) | HZ); -+#elif defined(LINUX_SPARC) -+ MachineId = (uint16_t)((4 << 12) | HZ); -+#elif defined(LINUX_I386) -+ MachineId = (uint16_t)((3 << 12) | HZ); -+#elif defined( LINUX_IA64) -+ MachineId = (uint16_t)((2 << 12) | HZ); -+#elif defined(LINUX_X86_64) -+ MachineId = (uint16_t)((1 << 12) | HZ); -+#else -+ MachineId = (uint16_t)((0 << 12) | HZ); -+#endif -+ } -+ -+ return (0); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/cm.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/cm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/cm.h 2005-07-28 14:52:52.854677824 -0400 -@@ -0,0 +1,412 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_CM_H -+#define __ELAN_CM_H -+ -+#ident "@(#)$Id: cm.h,v 1.14.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.h,v $*/ -+ -+#include -+ -+#if defined(DIGITAL_UNIX) -+/* -+ * On Tru64 - SMP doesn't mean Symmetric - cpu 0 is a master cpu and is responsible -+ * for handling all PCI interrupts and "funneled" operations. When a kernel thread -+ * is made runnable, the scheduler will choose which cpu it will run on at that time, -+ * and will only execute a higher priority thread from another cpu's run queue when -+ * it becomes totally idle (apparently also including user processes). Also the -+ * assert_wait_mesg_timo function uses a per-cpu timeout - these can only get executed -+ * at "preemptable" places - so again have no guarantee on when they will execute if -+ * they happen to be queued on a "hogged" cpu. The combination of these mean that the Tru64 -+ * is incapable of scheduling a high priority kernel thread within a deterministic time -+ * of when it should have become runnable - wonderfull. -+ * -+ * Hence the solution Compaq have proposed it to schedule a timeout onto all of the -+ * cpu's timeouts lists at the maximum frequency that we could want to execute code, -+ * then to handle the scheduling of work between these ourselves. With a bit of luck -+ * ..... at least one cpu will be sufficiently unloaded to allow us to get a chance -+ * to do our important work. -+ * -+ * However ..... this still is not reliable, since timeouts under Tru64 are still -+ * only run when the currently running kernel thread "co-operates" by calling one -+ * of a number of functions which is permitted to run the "lwc"s AND is not holding -+ * any spinlocks AND is running ai IPL 0. However Compaq are unable to provide -+ * any upper limit on the time between the "lwc"'s being run and so it is possible -+ * for all 4 cpus to not run them for an unbounded time. -+ * -+ * The solution proposed is to use the RM_TEMP_BACKDOOR hook which was added to -+ * hardclock() to "solve" this problem for Memory Channel. However, since it -+ * is called within the clock interrupt it is not permissible to aquire any -+ * spinlocks, nor to run for "too long". This means that it is not possible to -+ * call the heartbeat algorithm from this hook. -+ * -+ * Our solution to these limitations is to use the hook to cause an elan interrupt -+ * to be delivered, by issueing a mis-aligned SetEvent command - this causes the device -+ * to trap and ep_cprocTrap() can then run the heartbeat code. However there is a lock -+ * order violation between the elan_dev::IntrLock and ep_dev::Lock, so we have to -+ * use a trylock and if we fail, then hope that when the interrupt is delievered again -+ * some time later we will succeed. -+ * -+ * However this only works if the kernel is able to respond to the Elan interrupt, -+ * so we panic inside the RM_TEMP_BACKDOOR hook if the SetEvent's interrupt has -+ * not been taken for more than an CM_TIMER_SCHEDULE_TIMEOUT interval. -+ * -+ * In fact this is exactly the mechanism that other operating systems use to -+ * execute timeouts, since the hardclock interrupt posts a low priority -+ * "soft interrupt" which "pre-eempts" the currently running thread and then -+ * executes the timeouts.To block timeouts you use splsoftclock() the same as -+ * in Tru64. -+ */ -+#define PER_CPU_TIMEOUT TRUE -+#endif -+ -+ -+#define CM_SGMTS_PER_LEVEL 8 /* maximum nodes in each segment */ -+#define CM_MAX_LEVELS 6 /* maximum depth of tree */ -+ -+/* message buffers/dmas/events etc */ -+#define CM_NUM_NODE_MSG_BUFFERS (CM_MAX_LEVELS * CM_SGMTS_PER_LEVEL) /* subordinates and leader */ -+#define CM_NUM_SPARE_MSG_BUFFERS 8 /* spare msg buffers for non-connected nodes */ -+#define CM_NUM_MSG_BUFFERS (CM_NUM_NODE_MSG_BUFFERS + CM_NUM_SPARE_MSG_BUFFERS) -+ -+#define CM_INPUTQ_ENTRIES 128 /* # entries in input queue */ -+ -+#define CM_PERIODIC_DISCOVER_INTERVAL (5000) /* 5s (infrequent resolution of established leader conflicts) */ -+#define CM_URGENT_DISCOVER_INTERVAL (50) /* 0.05s (more frequently than heartbeats 'cause they don't retry) */ -+#define CM_HEARTBEAT_INTERVAL (125) /* 0.125s */ -+#define CM_TIMER_SCHEDULE_TIMEOUT (4000) /* 4s Maximum time before a timer that's secheduled to run gets to run (eg blocked in interrupt handlers etc) */ -+#define CM_THREAD_SCHEDULE_TIMEOUT (30000) /* 30s Maximum time before a thread that's scheduled to run gets to run */ -+#define CM_THREAD_RUNNING_TIMEOUT (30000) /* 30s Don't expect the manager thread to be running longer than this */ -+ -+#ifdef PER_CPU_TIMEOUT -+#define CM_PERCPU_TIMEOUT_INTERVAL (50) /* 0.05s (must be less than all above intervals) */ -+#define CM_PACEMAKER_INTERVAL (500) /* 0.05s */ -+ -+#define CM_HEARTBEAT_OVERDUE (250) /* 0.25s Maximum time a timeout can be overdue before taking extreme action */ -+#endif -+ -+#define CM_P2P_DMA_RETRIES 31 -+ -+/* We expect at least 1 point-to-point message in CM_P2P_MSG_RETRIES -+ * attempts to send one to be successfully received */ -+#define CM_P2P_MSG_RETRIES 8 -+ -+/* We expect at least 1 broadcast message in CM_BCAST_MSG_RETRIES attempts -+ * to send one to be successfully received. */ -+#define CM_BCAST_MSG_RETRIES 40 -+ -+/* Heartbeat timeout allows for a node stalling and still getting its -+ * heartbeat. The 2 is to allow for unsynchronised polling times. */ -+#define CM_HEARTBEAT_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_P2P_MSG_RETRIES) * CM_HEARTBEAT_INTERVAL) -+ -+/* Discover timeout must be > CM_HEARTBEAT_TIMEOUT to guarantee that people -+ * who don't see discovery are considered dead by their leader. This -+ * ensures that by the time a node "discovers" it is a leader of a segment, -+ * the previous leader of that segment will have been deemed to be dead by -+ * its the parent segment's leader */ -+#define CM_DISCOVER_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_BCAST_MSG_RETRIES) * CM_URGENT_DISCOVER_INTERVAL) -+ -+#define CM_WAITING_TIMEOUT (CM_DISCOVER_TIMEOUT * 100) -+ -+/* -+ * Convert all timeouts specified in mS into "ticks" -+ */ -+#define MSEC2TICKS(MSEC) (((MSEC)*HZ)/1000) -+ -+ -+/* statemap entry */ -+typedef struct cm_state_entry -+{ -+ int16_t level; /* cluster level to apply to */ -+ int16_t offset; /* from statemap_findchange() */ -+ uint16_t seg[BT_NBIPUL/16]; /* ditto */ -+} CM_STATEMAP_ENTRY; -+ -+/* offset is >= 0 for a change to apply and */ -+#define STATEMAP_NOMORECHANGES (-1) /* end of a set of updates */ -+#define STATEMAP_RESET (-2) /* reset the target map */ -+#define STATEMAP_NOOP (-3) /* null token */ -+ -+/* CM message format */ -+typedef int8_t CM_SEQ; /* heartbeat sequence numbers; at least 2 bits, signed */ -+ -+/* -+ * The message header is received into the last 64 byte block of -+ * the input queue and the Version *MUST* be the last word of the -+ * block to ensure that we can see that the whole of the message -+ * has reached main memory after we've seen the input queue pointer -+ * have been updated. -+ */ -+typedef struct ep_cm_hdr -+{ -+ uint32_t Pad0; -+ uint32_t Pad1; -+ -+ uint8_t Type; -+ uint8_t Level; -+ CM_SEQ Seq; /* precision at least 2 bits each*/ -+ CM_SEQ AckSeq; -+ -+ uint16_t NumMaps; -+ uint16_t MachineId; -+ -+ uint16_t NodeId; -+ uint16_t Checksum; -+ -+ uint32_t Timestamp; -+ uint32_t ParamHash; -+ uint32_t Version; -+} CM_HDR; -+ -+#define CM_HDR_SIZE sizeof (CM_HDR) -+ -+typedef struct cm_msg -+{ -+ union { -+ CM_STATEMAP_ENTRY Statemaps[1]; /* piggy-backed statemap updates start here */ -+ uint8_t Space[EP_SYSTEMQ_MSG_MAX - CM_HDR_SIZE]; -+ } Payload; -+ -+ CM_HDR Hdr; -+} CM_MSG; -+ -+/* The maximum number of statemap entries that can fit within an EP_CM_MSG_BUFFER */ -+#define CM_MSG_MAXMAPS (offsetof (CM_MSG, Hdr) / sizeof (CM_STATEMAP_ENTRY)) -+#define CM_MSG_MAP(mapno) (CM_MSG_MAXMAPS - (mapno) - 1) -+ -+/* The actual special message base & size, including 'nmaps' piggy-backed statemap entries */ -+#define CM_MSG_BASE(nmaps) (nmaps == 0 ? offsetof (CM_MSG, Hdr) : offsetof (CM_MSG, Payload.Statemaps[CM_MSG_MAXMAPS - nmaps])) -+#define CM_MSG_SIZE(nmaps) (sizeof (CM_MSG) - CM_MSG_BASE(nmaps)) -+ -+#define CM_MSG_VERSION 0xcad00005 -+#define CM_MSG_TYPE_RESOLVE_LEADER 0 -+#define CM_MSG_TYPE_DISCOVER_LEADER 1 -+#define CM_MSG_TYPE_NOTIFY 2 -+#define CM_MSG_TYPE_DISCOVER_SUBORDINATE 3 -+#define CM_MSG_TYPE_IMCOMING 4 -+#define CM_MSG_TYPE_HEARTBEAT 5 -+#define CM_MSG_TYPE_REJOIN 6 -+ -+/* CM machine segment */ -+typedef struct cm_sgmtMaps -+{ -+ u_char InputMapValid; /* Input map has been set */ -+ u_char OutputMapValid; /* Output map has been set */ -+ u_char SentChanges; /* got an outstanding STATEMAP_NOMORECHANGES to send */ -+ statemap_t *OutputMap; /* state to send */ -+ statemap_t *InputMap; /* state received */ -+ statemap_t *CurrentInputMap; /* state being received */ -+} CM_SGMTMAPS; -+ -+typedef struct cm_sgmt -+{ -+ u_char State; -+ u_char SendMaps; -+ u_char MsgAcked; -+ CM_SEQ MsgSeq; -+ CM_SEQ AckSeq; -+ u_int NodeId; -+ long UpdateTick; -+ long WaitingTick; -+ uint32_t Timestamp; -+ CM_SGMTMAPS Maps[CM_MAX_LEVELS]; /* Maps[i] == state for cluster level i */ -+ u_short MsgNumber; /* msg buffer to use */ -+ u_short NumMaps; /* # maps in message buffer */ -+ u_short Level; -+ u_short Sgmt; -+} CM_SGMT; -+ -+#define CM_SGMT_ABSENT 0 /* no one there at all */ -+#define CM_SGMT_WAITING 1 /* waiting for subtree to connect */ -+#define CM_SGMT_COMING 2 /* expecting a subtree to reconnect */ -+#define CM_SGMT_PRESENT 3 /* connected */ -+ -+typedef struct cm_level -+{ -+ int SwitchLevel; -+ u_int MinNodeId; -+ u_int NumNodes; -+ u_int NumSegs; -+ u_int MySgmt; -+ -+ /* SubordinateMap[i] == OR of all subordinate maps on this level and down for cluster level i */ -+ u_char SubordinateMapValid[CM_MAX_LEVELS]; -+ statemap_t *SubordinateMap[CM_MAX_LEVELS]; -+ -+ /* maps/flags for this cluster level */ -+ u_int Online:1; /* I've gone online (seen myself running) */ -+ u_int Restarting:1; /* driving my owm restart bit */ -+ u_char OfflineReasons; /* forced offline by broadcast */ -+ -+ u_char GlobalMapValid; -+ u_char SubTreeMapValid; -+ u_long Connected; -+ -+ statemap_t *LocalMap; /* state bits I drive */ -+ statemap_t *SubTreeMap; /* OR of my and my subtree states */ -+ statemap_t *GlobalMap; /* OR of all node states */ -+ statemap_t *LastGlobalMap; /* last map I saw */ -+ statemap_t *TmpMap; /* scratchpad */ -+ -+ CM_SGMT Sgmts[CM_SGMTS_PER_LEVEL]; -+} CM_LEVEL; -+ -+#define CM_ROLE_LEADER_CANDIDATE 0 -+#define CM_ROLE_LEADER 1 -+#define CM_ROLE_SUBORDINATE 2 -+ -+/* global status bits */ -+#define CM_GSTATUS_STATUS_MASK 0x03 /* bits nodes drive to broadcast their status */ -+#define CM_GSTATUS_ABSENT 0x00 /* Off the network */ -+#define CM_GSTATUS_STARTING 0x01 /* I'm waiting for everyone to see me online */ -+#define CM_GSTATUS_RUNNING 0x03 /* up and running */ -+#define CM_GSTATUS_CLOSING 0x02 /* I'm waiting for everyone to see me offline */ -+ -+#define CM_GSTATUS_ACK_MASK 0x0c /* bits node drive to ack other status */ -+#define CM_GSTATUS_MAY_START 0x04 /* Everyone thinks I may not start */ -+#define CM_GSTATUS_MAY_RUN 0x08 /* Everyone thinks I may not run */ -+ -+#define CM_GSTATUS_RESTART 0x10 /* Someone thinks I should restart */ -+#define CM_GSTATUS_BITS 5 -+ -+#define CM_GSTATUS_BASE(node) ((node) * CM_GSTATUS_BITS) -+ -+#if defined(PER_CPU_TIMEOUT) -+typedef struct cm_timeout_data -+{ -+ long ScheduledAt; /* lbolt timeout was scheduled to run at */ -+ -+ unsigned long EarlyCount; /* # times run early than NextRun */ -+ unsigned long MissedCount; /* # times run on time - but someone else was running it */ -+ unsigned long WastedCount; /* # times we failed to get the spinlock */ -+ unsigned long WorkCount; /* # times we're the one running */ -+ -+ unsigned long WorstDelay; /* worst scheduling delay */ -+ unsigned long BestDelay; /* best scheduling delay */ -+ -+ unsigned long WorstLockDelay; /* worst delay before getting rail->Lock */ -+ -+ unsigned long WorstHearbeatDelay; /* worst delay before calling DoHeartbeatWork */ -+} CM_TIMEOUT_DATA; -+#endif -+ -+typedef struct cm_rail -+{ -+ EP_RAIL *Rail; /* rail we're associated with */ -+ struct list_head Link; /* and linked on the CM_SUBSYS */ -+ -+ uint32_t ParamHash; /* hash of critical parameters */ -+ uint32_t Timestamp; -+ long DiscoverStartTick; /* when discovery start */ -+ -+ unsigned int NodeId; /* my node id */ -+ unsigned int NumNodes; /* and number of nodes */ -+ unsigned int NumLevels; /* number of levels computed from machine size */ -+ int BroadcastLevel; -+ long BroadcastLevelTick; -+ unsigned int TopLevel; /* level at which I'm not a leader */ -+ unsigned char Role; /* state at TopLevel */ -+ -+ EP_INPUTQ *PolledQueue; /* polled input queue */ -+ EP_INPUTQ *IntrQueue; /* intr input queue */ -+ EP_OUTPUTQ *MsgQueue; /* message */ -+ unsigned int NextSpareMsg; /* next "spare" message buffer to use */ -+ -+ EP_CM_RAIL_STATS Stats; /* statistics */ -+ -+ kmutex_t Mutex; -+ spinlock_t Lock; -+ -+ long NextHeartbeatTime; /* next time to check/send heartbeats */ -+ long NextDiscoverTime; /* next time to progress discovery */ -+ long NextRunTime; /* the earlier of the above two or intr requires inputq poll*/ -+ -+ unsigned int OfflineReasons; /* forced offline by procfs/manager thread stuck */ -+ -+#if defined(PER_CPU_TIMEOUT) -+ spinlock_t HeartbeatTimeoutsLock; /* spinlock to sequentialise per-cpu timeouts */ -+ long HeartbeatTimeoutsStarted; /* bitmap of which timeouts have started */ -+ long HeartbeatTimeoutsStopped; /* bitmap of which timeouts have stopped */ -+ long HeartbeatTimeoutsShouldStop; /* flag to indicate timeouts should stop */ -+ kcondvar_t HeartbeatTimeoutsWait; /* place to sleep waiting for timeouts to stop */ -+ long HeartbeatTimeoutRunning; /* someone is running the timeout - don't try for the lock */ -+ -+ long HeartbeatTimeoutOverdue; /* heartbeat seen as overdue - interrupt requested */ -+ -+ CM_TIMEOUT_DATA *HeartbeatTimeoutsData; /* per timeout data */ -+#else -+ struct timer_list HeartbeatTimer; /* timer for heartbeat/discovery */ -+#endif -+ -+ CM_LEVEL Levels[CM_MAX_LEVELS]; -+} CM_RAIL; -+ -+/* OfflineReasons (both per-rail and */ -+#define CM_OFFLINE_BROADCAST (1 << 0) -+#define CM_OFFLINE_PROCFS (1 << 1) -+#define CM_OFFLINE_MANAGER (1 << 2) -+ -+typedef struct cm_subsys -+{ -+ EP_SUBSYS Subsys; -+ CM_RAIL *Rails[EP_MAX_RAILS]; -+} CM_SUBSYS; -+ -+extern int MachineId; -+ -+extern void cm_node_disconnected (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_node (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_comms (CM_RAIL *cmRail); -+extern int cm_init (EP_SYS *sys); -+ -+extern void DisplayRail(EP_RAIL *rail); -+extern void DisplaySegs (EP_RAIL *rail); -+extern void DisplayStatus (EP_RAIL *rail); -+ -+typedef struct proc_private -+{ -+ struct nodeset_private *pr_next; -+ EP_RAIL *pr_rail; -+ char *pr_data; -+ int pr_data_len; -+ unsigned pr_off; -+ unsigned pr_len; -+ DisplayInfo pr_di; -+} PROC_PRIVATE; -+ -+extern void proc_character_fill (long mode, char *fmt, ...); -+extern int proc_release (struct inode *inode, struct file *file); -+extern ssize_t proc_read (struct file *file, char *buf, size_t count, loff_t *ppos); -+ -+ -+extern void DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayRailDo (DisplayInfo *di, EP_RAIL *rail); -+ -+extern int cm_read_cluster(EP_RAIL *rail,char *page); -+extern void cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason); -+ -+extern int cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId); -+extern int cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+/* cm_procfs.c */ -+extern void cm_procfs_init (CM_SUBSYS *subsys); -+extern void cm_procfs_fini (CM_SUBSYS *subsys); -+extern void cm_procfs_rail_init (CM_RAIL *rail); -+extern void cm_procfs_rail_fini (CM_RAIL *rail); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_CM_H */ -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/cm_procfs.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/cm_procfs.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/cm_procfs.c 2005-07-28 14:52:52.855677672 -0400 -@@ -0,0 +1,254 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2005 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: cm_procfs.c,v 1.5 2004/05/14 09:23:13 daniel Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm_procfs.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "debug.h" -+#include "cm.h" -+#include -+ -+#include -+ -+extern char *sprintClPeers (char *str, CM_RAIL *cmRail, int clvl); -+ -+static int -+proc_read_cluster(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ CM_RAIL *cmRail = (CM_RAIL *) data; -+ char *p = page; -+ -+ page[0] = 0; -+ -+ if (cmRail->Rail->State != EP_RAIL_STATE_RUNNING) -+ p += sprintf(p, "\n"); -+ else -+ { -+ CM_LEVEL *cmLevel; -+ unsigned long flags; -+ int i, j; -+ char clNodeStr[32]; /* [%d-%d][%d-%d] */ -+ char seperate_with; -+ -+ struct { int val; char *name; } bitvals[] = { -+ {CM_OFFLINE_BROADCAST, "Broadcast"}, -+ {CM_OFFLINE_PROCFS, "Offline"}, -+ {CM_OFFLINE_MANAGER, "Manager"}}; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ -+ for (i = 0; i < cmRail->NumLevels; i++) -+ { -+ cmLevel = &cmRail->Levels[i]; -+ -+ p += sprintf(p, "%23s %7s ", sprintClPeers (clNodeStr, cmRail, i), cmLevel->Online?"Online":"Offline"); -+ -+ if ((cmLevel->Online ) | ( cmLevel->Connected > 0)) -+ p += sprintf(p, "Connected=%lu ", cmLevel->Connected); -+ -+ seperate_with = '<'; -+ -+ if ( cmLevel->Restarting ) { -+ p += sprintf(p, "%cRestarting", seperate_with); -+ seperate_with = ','; -+ } -+ -+ if ( ! (cmLevel->GlobalMapValid & cmLevel->SubTreeMapValid )) { -+ p += sprintf(p, "%cMap Not Valid", seperate_with); -+ seperate_with = ','; -+ } -+ -+ if ( cmLevel->OfflineReasons ) { -+ for (j = 0; j < sizeof (bitvals)/sizeof(bitvals[0]); j++) -+ if (cmLevel->OfflineReasons & bitvals[j].val) { -+ p += sprintf(p, "%c%s", seperate_with, bitvals[j].name); -+ seperate_with = ','; -+ } -+ } -+ if ( cmRail->OfflineReasons ) { -+ for (j = 0; j < sizeof (bitvals)/sizeof(bitvals[0]); j++) -+ if (cmRail->OfflineReasons & bitvals[j].val) { -+ p += sprintf(p, "%c%s", seperate_with, bitvals[j].name); -+ seperate_with = ','; -+ } -+ } -+ -+ if ( seperate_with != '<' ) -+ p += sprintf(p,">\n"); -+ else -+ p += sprintf(p,"\n"); -+ } -+ -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ return qsnet_proc_calc_metrics (page, start, off, count, eof, p - page); -+} -+ -+static struct rail_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} rail_info[] = { -+ {"cluster", proc_read_cluster, NULL}, -+}; -+ -+struct proc_dir_entry *svc_indicators_root; -+ -+typedef struct svc_indicator_data -+{ -+ int svc_indicator; -+ EP_RAIL *rail; -+} SVC_INDICATOR_DATA; -+ -+static SVC_INDICATOR_DATA svc_indicator_data[EP_SVC_NUM_INDICATORS][EP_MAX_RAILS]; -+static char *svc_indicator_names[EP_SVC_NUM_INDICATORS] = EP_SVC_NAMES; -+ -+static int -+proc_read_svc_indicator_rail_bitmap (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ SVC_INDICATOR_DATA *svc_data = (SVC_INDICATOR_DATA *)data; -+ unsigned int nnodes = ep_numnodes (ep_system()); -+ bitmap_t *bitmap; -+ -+ KMEM_ZALLOC (bitmap, bitmap_t *, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t)), 1); -+ -+ cm_svc_indicator_bitmap (svc_data->rail, svc_data->svc_indicator, bitmap, 0, nnodes); -+ -+ ep_sprintf_bitmap (page, PAGESIZE, bitmap, 0, 0, nnodes); -+ -+ KMEM_FREE (bitmap, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t))); -+ -+ strcat (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_svc_indicator_bitmap(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ unsigned int num = (unsigned long) data; -+ EP_SYS *sys = ep_system(); -+ unsigned int nnodes = ep_numnodes (sys); -+ bitmap_t *bitmap; -+ -+ KMEM_ALLOC(bitmap, bitmap_t *, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t)), 1); -+ -+ ep_svc_indicator_bitmap (sys, num, bitmap, 0, nnodes); -+ -+ ep_sprintf_bitmap (page, PAGESIZE, bitmap, 0, 0, nnodes); -+ -+ KMEM_FREE (bitmap, (BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t))); -+ -+ strcat (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+cm_procfs_rail_init (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ struct proc_dir_entry *p; -+ int i; -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ { -+ if ((p = create_proc_entry (rail_info[i].name, 0, cmRail->Rail->ProcDir)) != NULL) -+ { -+ p->read_proc = rail_info[i].read_func; -+ p->write_proc = rail_info[i].write_func; -+ p->data = cmRail; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ if ((rail->SvcIndicatorDir = proc_mkdir ("svc_indicators", cmRail->Rail->ProcDir)) != NULL) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ { -+ if ((p = create_proc_entry (svc_indicator_names[i], 0, rail->SvcIndicatorDir)) != NULL) -+ { -+ svc_indicator_data[i][rail->Number].svc_indicator = i; -+ svc_indicator_data[i][rail->Number].rail = rail; -+ -+ p->write_proc = NULL; -+ p->read_proc = proc_read_svc_indicator_rail_bitmap; -+ p->data = (void *)&svc_indicator_data[i][rail->Number]; -+ p->owner = THIS_MODULE; -+ } -+ } -+ } -+} -+ -+void -+cm_procfs_rail_fini (CM_RAIL *cmRail) -+{ -+ EP_RAIL *rail = cmRail->Rail; -+ int i; -+ -+ if (rail->SvcIndicatorDir) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ remove_proc_entry (svc_indicator_names[i], rail->SvcIndicatorDir); -+ -+ remove_proc_entry ("svc_indicators", cmRail->Rail->ProcDir); -+ } -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ remove_proc_entry (rail_info[i].name, cmRail->Rail->ProcDir); -+} -+ -+void -+cm_procfs_init (CM_SUBSYS *subsys) -+{ -+ struct proc_dir_entry *p; -+ int i; -+ -+ qsnet_proc_register_hex (ep_config_root, "machine_id", &MachineId, 0); -+ -+ if ((svc_indicators_root = proc_mkdir("svc_indicators", ep_procfs_root)) != NULL) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ { -+ if ((p = create_proc_entry (svc_indicator_names[i], 0, svc_indicators_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_svc_indicator_bitmap; -+ p->data = (void *)(long) i; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ } -+} -+ -+void -+cm_procfs_fini (CM_SUBSYS *subsys) -+{ -+ int i; -+ -+ if (svc_indicators_root) -+ { -+ for (i = 0; i < EP_SVC_NUM_INDICATORS; i++) -+ remove_proc_entry (svc_indicator_names[i], svc_indicators_root); -+ -+ remove_proc_entry ("svc_indicators", ep_procfs_root); -+ } -+ -+ remove_proc_entry ("machine_id", ep_config_root); -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/commands_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/commands_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/commands_elan4.c 2005-07-28 14:52:52.855677672 -0400 -@@ -0,0 +1,173 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: commands_elan4.c,v 1.2 2003/10/23 15:07:53 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/commands_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+ -+static __inline__ void -+elan4_command_write (ELAN4_CQ *cq, E4_uint64 val, unsigned off) -+{ -+ writeq (val, cq->cq_mapping + offsetof (E4_CommandPort, Command[off])); -+} -+ -+void -+elan4_nop_cmd (ELAN4_CQ *cq, E4_uint64 tag) -+{ -+ elan4_command_write (cq, tag | NOP_CMD, 0); -+} -+ -+void -+elan4_write_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data) -+{ -+ elan4_command_write (cq, addr | WRITE_DWORD_CMD, 0); -+ elan4_command_write (cq, data, 1); -+} -+ -+void -+elan4_add_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data) -+{ -+ elan4_command_write (cq, addr | ADD_DWORD_CMD, 0); -+ elan4_command_write (cq, data, 1); -+} -+ -+void -+elan4_copy64_cmd (ELAN4_CQ *cq, E4_Addr from, E4_Addr to, E4_uint32 datatype) -+{ -+ elan4_command_write (cq, from | (datatype << COPY64_DATA_TYPE_SHIFT) | COPY64_CMD, 0); -+ elan4_command_write (cq, to | (datatype << COPY64_DATA_TYPE_SHIFT), 1); -+} -+ -+void -+elan4_interrupt_cmd (ELAN4_CQ *cq, E4_uint64 cookie) -+{ -+ elan4_command_write (cq, (cookie << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD, 0); -+} -+ -+ -+void -+elan4_run_thread_cmd (ELAN4_CQ *cq, E4_ThreadRegs *regs) -+{ -+ elan4_command_write (cq, regs->Registers[0] | RUN_THREAD_CMD, 0); -+ elan4_command_write (cq, regs->Registers[1], 1); -+ elan4_command_write (cq, regs->Registers[2], 2); -+ elan4_command_write (cq, regs->Registers[3], 3); -+ elan4_command_write (cq, regs->Registers[4], 4); -+ elan4_command_write (cq, regs->Registers[5], 5); -+ elan4_command_write (cq, regs->Registers[6], 6); -+} -+ -+void -+elan4_run_dma_cmd (ELAN4_CQ *cq, E4_DMA *dma) -+{ -+ E4_uint64 *dmaptr = (E4_uint64 *) dma; -+ -+ elan4_command_write (cq, dmaptr[0] | RUN_DMA_CMD, 0); -+ elan4_command_write (cq, dmaptr[1], 1); -+ elan4_command_write (cq, dmaptr[2], 2); -+ elan4_command_write (cq, dmaptr[3], 3); -+ elan4_command_write (cq, dmaptr[4], 4); -+ elan4_command_write (cq, dmaptr[5], 5); -+ elan4_command_write (cq, dmaptr[6], 6); -+} -+ -+void -+elan4_set_event_cmd (ELAN4_CQ *cq, E4_Addr event) -+{ -+ elan4_command_write (cq, event | SET_EVENT_CMD, 0); -+} -+ -+void -+elan4_set_eventn_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint32 count) -+{ -+ elan4_command_write (cq, SET_EVENTN_CMD,0); -+ elan4_command_write (cq, event | count, 1); -+} -+ -+void -+elan4_wait_event_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1) -+{ -+ elan4_command_write (cq, event | WAIT_EVENT_CMD, 0); -+ elan4_command_write (cq, candt, 1); -+ elan4_command_write (cq, param0, 2); -+ elan4_command_write (cq, param1, 3); -+} -+ -+void -+elan4_open_packet (ELAN4_CQ *cq, E4_uint64 command) -+{ -+ elan4_command_write (cq, command | OPEN_STEN_PKT_CMD, 0); -+} -+ -+void -+elan4_guard (ELAN4_CQ *cq, E4_uint64 command) -+{ -+ elan4_command_write (cq, command | GUARD_CMD, 0); -+} -+ -+void -+elan4_sendtrans0 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+} -+ -+void -+elan4_sendtrans1 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ elan4_command_write (cq, p0, 2); -+} -+ -+void -+elan4_sendtrans2 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0, E4_uint64 p1) -+{ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ elan4_command_write (cq, p0, 2); -+ elan4_command_write (cq, p1, 3); -+} -+ -+void -+elan4_sendtransn (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, ...) -+{ -+ E4_uint32 ndword = ((trtype & TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ va_list ap; -+ register int i; -+ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ -+ va_start (ap, addr); -+ for (i = 2; i < ndword+2; i++) -+ elan4_command_write (cq, va_arg (ap, E4_uint64), i); -+ va_end (ap); -+} -+ -+void -+elan4_sendtransp (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 *ptr) -+{ -+ E4_uint32 ndword = ((trtype &TR_SIZE_MASK) >> TR_SIZE_SHIFT); -+ register int i; -+ -+ elan4_command_write (cq, (trtype << 16) | SEND_TRANS_CMD, 0); -+ elan4_command_write (cq, addr, 1); -+ for (i = 2; i < ndword+2; i++) -+ elan4_command_write (cq, *ptr++, i); -+} -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/conf_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/conf_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/conf_linux.c 2005-07-28 14:52:52.856677520 -0400 -@@ -0,0 +1,309 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: conf_linux.c,v 1.37.2.3 2005/01/18 14:47:35 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/conf_linux.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+ -+#include "conf_linux.h" -+ -+#include -+#include -+#include -+#include -+ -+/* Module parameters */ -+unsigned int epdebug = 0; -+unsigned int epdebug_console = 0; -+unsigned int epdebug_cmlevel = 0; -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+unsigned int epdebug_check_sum = 0; -+#endif -+int disabled = 0; -+int sdram_assert = 0; -+int assfail_mode = 0; -+int txd_stabilise = 7; -+int portals_envelopes = 0; -+ -+/* External module parameters */ -+extern int MaxSwitchLevels; -+extern int RejoinCheck; -+extern int RejoinPanic; -+extern int PositionCheck; -+extern int MachineId; -+ -+/* Module globals */ -+EP_SYS epsys; -+ -+#ifdef MODULE -+MODULE_AUTHOR("Quadrics Ltd"); -+MODULE_DESCRIPTION("Elan Kernel Comms"); -+ -+MODULE_LICENSE("GPL"); -+ -+MODULE_PARM(epdebug, "i"); -+MODULE_PARM(epdebug_console, "i"); -+MODULE_PARM(epdebug_cmlevel, "i"); -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+MODULE_PARM(epdebug_check_sum, "i"); -+#endif -+MODULE_PARM(disabled, "i"); -+ -+MODULE_PARM(MachineId, "i"); -+MODULE_PARM(RejoinPanic, "i"); -+MODULE_PARM(RejoinCheck, "i"); -+MODULE_PARM(PositionCheck, "i"); -+MODULE_PARM(MaxSwitchLevels, "i"); -+ -+MODULE_PARM(sdram_assert, "i"); -+MODULE_PARM(assfail_mode, "i"); -+MODULE_PARM(txd_stabilise, "i"); -+MODULE_PARM(portals_envelopes,"i"); -+ -+/* epcomms.c large message service functions */ -+EXPORT_SYMBOL(ep_alloc_xmtr); -+EXPORT_SYMBOL(ep_free_xmtr); -+EXPORT_SYMBOL(ep_transmit_message); -+EXPORT_SYMBOL(ep_multicast_message); -+EXPORT_SYMBOL(ep_transmit_rpc); -+ -+EXPORT_SYMBOL(ep_alloc_rcvr); -+EXPORT_SYMBOL(ep_free_rcvr); -+EXPORT_SYMBOL(ep_queue_receive); -+EXPORT_SYMBOL(ep_requeue_receive); -+EXPORT_SYMBOL(ep_rpc_put); -+EXPORT_SYMBOL(ep_rpc_get); -+EXPORT_SYMBOL(ep_complete_rpc); -+EXPORT_SYMBOL(ep_complete_receive); -+ -+EXPORT_SYMBOL(ep_poll_transmits); -+EXPORT_SYMBOL(ep_enable_txcallbacks); -+EXPORT_SYMBOL(ep_disable_txcallbacks); -+ -+/* epcomms.c functions for accessing fields of rxds/txds */ -+EXPORT_SYMBOL(ep_rxd_arg); -+EXPORT_SYMBOL(ep_rxd_len); -+EXPORT_SYMBOL(ep_rxd_isrpc); -+EXPORT_SYMBOL(ep_rxd_envelope); -+EXPORT_SYMBOL(ep_rxd_payload); -+EXPORT_SYMBOL(ep_rxd_node); -+EXPORT_SYMBOL(ep_rxd_status); -+EXPORT_SYMBOL(ep_rxd_statusblk); -+EXPORT_SYMBOL(ep_txd_node); -+EXPORT_SYMBOL(ep_txd_statusblk); -+ -+/* kmap.c, nmh.c - handling mapping of pages into network memory */ -+EXPORT_SYMBOL(ep_dvma_reserve); -+EXPORT_SYMBOL(ep_dvma_release); -+EXPORT_SYMBOL(ep_dvma_load); -+EXPORT_SYMBOL(ep_dvma_unload); -+EXPORT_SYMBOL(ep_nmd_subset); -+EXPORT_SYMBOL(ep_nmd_merge); -+ -+EXPORT_SYMBOL(ep_system); -+ -+/* kcomm.c */ -+EXPORT_SYMBOL(ep_nodeid); -+EXPORT_SYMBOL(ep_numnodes); -+EXPORT_SYMBOL(ep_waitfor_nodeid); -+ -+/* railhints.c */ -+EXPORT_SYMBOL(ep_pickRail); -+EXPORT_SYMBOL(ep_xmtr_bcastrail); -+EXPORT_SYMBOL(ep_xmtr_prefrail); -+EXPORT_SYMBOL(ep_xmtr_availrails); -+EXPORT_SYMBOL(ep_xmtr_noderails); -+EXPORT_SYMBOL(ep_rcvr_prefrail); -+EXPORT_SYMBOL(ep_rcvr_availrails); -+EXPORT_SYMBOL(ep_rxd_railmask); -+ -+EXPORT_SYMBOL(ep_svc_indicator_bitmap); -+EXPORT_SYMBOL(ep_svc_indicator_is_set); -+EXPORT_SYMBOL(ep_svc_indicator_clear); -+EXPORT_SYMBOL(ep_svc_indicator_set); -+ -+/* cm.c */ -+EXPORT_SYMBOL(cm_svc_indicator_clear); -+EXPORT_SYMBOL(cm_svc_indicator_set); -+EXPORT_SYMBOL(cm_svc_indicator_is_set); -+EXPORT_SYMBOL(cm_svc_indicator_bitmap); -+ -+#endif -+ -+EP_SYS * -+ep_system() -+{ -+ return (&epsys); -+} -+ -+void -+ep_mod_inc_usecount() -+{ -+ MOD_INC_USE_COUNT; -+} -+ -+void -+ep_mod_dec_usecount() -+{ -+ MOD_DEC_USE_COUNT; -+} -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ -+#include -+ -+static int -+ep_dump_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if (event == DUMP_BEGIN) -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+static struct notifier_block ep_dump_notifier = -+{ -+ notifier_call: ep_dump_event, -+ priority: 0, -+}; -+ -+#endif -+ -+static int -+ep_reboot_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ if ((event == SYS_RESTART || event == SYS_HALT || event == SYS_POWER_OFF)) -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block ep_reboot_notifier = -+{ -+ notifier_call: ep_reboot_event, -+ priority: 0, -+}; -+ -+static int -+ep_panic_event (struct notifier_block *self, unsigned long event, void *buffer) -+{ -+ ep_shutdown (&epsys); -+ -+ return (NOTIFY_DONE); -+} -+ -+static struct notifier_block ep_panic_notifier = -+{ -+ notifier_call: ep_panic_event, -+ priority: 0, -+}; -+ -+/* -+ * Module configuration. -+ */ -+#ifdef MODULE -+static int __init ep_init(void) -+#else -+__initfunc(int ep_init(void)) -+#endif -+{ -+ register int rmask = 0; -+ -+ ep_procfs_init (); -+ -+ ep_sys_init (&epsys); -+ -+#if defined(CONFIG_ELAN4) || defined(CONFIG_ELAN4_MODULE) -+ rmask = ep4_create_rails (&epsys, disabled); -+#endif -+ -+ /* If we've brought up an elan4 rail, then disable all elan3 rails. */ -+ if ((rmask & ~disabled) != 0) -+ disabled = ~rmask; -+ -+#if defined(CONFIG_ELAN3) || defined(CONFIG_ELAN3_MODULE) -+ rmask = ep3_create_rails (&epsys, disabled); -+#endif -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ register_dump_notifier (&ep_dump_notifier); -+#endif -+ register_reboot_notifier (&ep_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_register (&panic_notifier_list, &ep_panic_notifier); -+#endif -+ -+ return (0); -+} -+ -+/* -+ * Module removal. -+ */ -+#ifdef MODULE -+static void -+__exit ep_exit(void) -+{ -+ register int i; -+ -+#if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) -+ unregister_dump_notifier (&ep_dump_notifier); -+#endif -+ unregister_reboot_notifier (&ep_reboot_notifier); -+ -+#if !defined(NO_PANIC_NOTIFIER) -+ notifier_chain_unregister (&panic_notifier_list, &ep_panic_notifier); -+#endif -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (epsys.Rails[i]) -+ { -+ switch (epsys.Rails[i]->State) -+ { -+ case EP_RAIL_STATE_UNINITIALISED: -+ break; -+ -+ case EP_RAIL_STATE_STARTED: -+ case EP_RAIL_STATE_RUNNING: -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ /* remove per-rail CM proc entries */ -+ ep_stop_rail (epsys.Rails[i]); -+ break; -+ } -+ -+ /* remove EP proc rail entries after per-rail CM entries */ -+ ep_procfs_rail_fini (epsys.Rails[i]); -+ ep_destroy_rail (epsys.Rails[i]); -+ } -+ } -+ -+ ep_sys_fini (&epsys); -+ -+ ep_procfs_fini (); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(ep_init); -+module_exit(ep_exit); -+ -+#endif -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/conf_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/conf_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/conf_linux.h 2005-07-28 14:52:52.856677520 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: conf_linux.h,v 1.6 2003/10/02 14:16:07 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/conf_linux.h,v $*/ -+ -+#ifndef __ELAN_CONF_LINUX_H -+#define __ELAN_CONF_LINUX_H -+ -+extern void ep_procfs_init(void); -+extern void ep_procfs_fini(void); -+extern void ep_procfs_rail_init(EP_RAIL *rail); -+extern void ep_procfs_rail_fini(EP_RAIL *rail); -+ -+extern void ep_procfs_svc_indicator_create(int svc_indicator, char *name); -+extern void ep_procfs_svc_indicator_remove(int svc_indicator, char *name); -+ -+#endif /* __ELAN_CONF_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/debug.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/debug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/debug.c 2005-07-28 14:52:52.857677368 -0400 -@@ -0,0 +1,145 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.28.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/debug.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+ -+DisplayInfo di_ep_debug = {ep_debugf, DBG_DEBUG}; -+ -+/* -+ * Generate a partial bitmap string, for the bitmap from offset "off" for "count" bits, -+ * to allow for displaying of subsets, treat entry 0 of the bitmap as having value "base". -+ */ -+int -+ep_sprintf_bitmap (char *str, unsigned nbytes, bitmap_t *bitmap, int base, int off, int nbits) -+{ -+ char entry[12]; /* space for N-N */ -+ register int i, j, len; -+ register int notstart = off; -+ register int notfirst = 0; -+ char *p = str; -+ -+ for (i = off; i < nbits; i++) -+ { -+ if (BT_TEST (bitmap, i)) -+ { -+ for (j = i+1; j < nbits; j++) -+ if (! BT_TEST (bitmap, j)) -+ break; -+ -+ if (j == (i+1)) -+ len = (int)sprintf (entry, "%d", base + i); -+ else -+ len = (int)sprintf (entry, "%d-%d", base + i, base + j-1); -+ -+ /* NOTE the 2 is for: one for comma, one for (possible) closing bracket */ -+ if ((p - str) <= (nbytes - (len+3))) -+ p += (int)sprintf (p, "%c%s", notfirst++ ? ',' : notstart ? ' ' : '[', entry); -+ else -+ { -+ /* no more space on this line, so move onto next */ -+ sprintf (p, "%c", notfirst++ ? ',' : '['); -+ -+ return (i); -+ } -+ -+ i = j; -+ } -+ } -+ -+ if (!notfirst) -+ sprintf (str, ""); -+ else -+ strcpy (p, "]"); -+ -+ return (-1); -+} -+ -+void -+ep_display_bitmap (char *prefix, char *tag, bitmap_t *bitmap, unsigned base, unsigned nbits) -+{ -+ /* Tru64 kernel printf() truncates lines at 128 bytes - the man pages for printf (9) -+ * do not mention this restriction, nor that it does not terminate the line with a -+ * carriage return, this is pretty naff. -+ * Linux has a similar limit though is much more generous at 1024 - and you can just -+ * look at the code to see why this has been done. -+ * -+ * Our nodeset information could well be longer than 128 characters, so we're going to -+ * have to split it into a number of lines. */ -+ -+#define LINEBUF_SIZE 128 -+ char *p, linebuf[LINEBUF_SIZE+1]; /* +1 for null termination */ -+ int i, noff, off = 0; -+ -+ do { -+ if (off == 0) -+ p = linebuf + (int)sprintf (linebuf, "%s: %s ", prefix, tag); -+ else -+ { -+ p = linebuf + (int)sprintf (linebuf, "%s: ", prefix); -+ for (i = 0; tag[i] != '\0'; i++) -+ *p++ = ' '; -+ } -+ -+ noff = ep_sprintf_bitmap (p, &linebuf[LINEBUF_SIZE-1]-p, bitmap, base, off, nbits); -+ -+ printk ("%s\n", linebuf); -+ -+ } while ((off = noff) != -1); -+ -+#undef LINEBUF_SIZE -+} -+ -+void -+ep_debugf (long mode, char *fmt, ...) -+{ -+ va_list ap; -+ char prefix[32]; -+ -+ va_start (ap, fmt); -+#if defined(LINUX) -+ sprintf (prefix, "[%08d.%04d] ", (int) lbolt, current->pid); -+#else -+ sprintf (prefix, "[%08d.----] ", (int) lbolt); -+#endif -+ qsnet_vdebugf ((mode & epdebug_console ? QSNET_DEBUG_CONSOLE: 0) | QSNET_DEBUG_BUFFER, prefix, fmt, ap); -+ va_end (ap); -+} -+ -+int -+ep_assfail (EP_RAIL *rail, const char *ex, const char *func, const char *file, const int line) -+{ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ printk (KERN_EMERG "ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ if (panicstr) -+ return (0); -+ -+ if (assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (assfail_mode & 2) -+ panic ("ep: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ if (assfail_mode & 4) -+ epdebug = 0; -+ -+ return 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/debug_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/debug_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/debug_elan4.c 2005-07-28 14:52:52.857677368 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug_elan4.c,v 1.1 2004/05/19 10:21:04 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/debug_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "conf_linux.h" -+#include "debug.h" -+ -+static void -+ep4_display_ecqs (EP4_RAIL *rail) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ ep_debugf (DBG_DEBUG, "ECQ: type %d: avail %d cqnum %d\n", i, ecq->ecq_avail, elan4_cq2num (ecq->ecq_cq)); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+} -+ -+void -+ep4_debug_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP_SYS *sys = rail->r_generic.System; -+ -+ ep_debugf (DBG_DEBUG, "ep%d: is elan4 %d rev %c\n", rail->r_generic.Number, -+ rail->r_generic.Devinfo.dev_instance, 'a' + rail->r_generic.Devinfo.dev_revision_id); -+ -+ ep4_display_ecqs (rail); -+ -+ ep_display_alloc (&sys->Allocator); -+ ep_display_rmap (sys->Allocator.ResourceMap); -+ -+ ep_display_alloc (&rail->r_generic.ElanAllocator); -+ ep_display_alloc (&rail->r_generic.MainAllocator); -+ -+ ep_display_rmap (rail->r_generic.ElanAllocator.ResourceMap); -+} -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/debug.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/debug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/debug.h 2005-07-28 14:52:52.857677368 -0400 -@@ -0,0 +1,109 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_EPDEBUG_H -+#define _ELAN3_EPDEBUG_H -+ -+#ident "$Id: debug.h,v 1.18.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/debug.h,v $ */ -+ -+extern unsigned int epdebug; -+extern unsigned int epdebug_console; -+extern unsigned int epdebug_cmlevel; -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern unsigned int epdebug_check_sum; -+#endif -+#define DBG_CONFIG 0x00000001 /* Module configuration */ -+#define DBG_PROBE 0x00000002 -+#define DBG_ROUTETABLE 0x00000004 -+#define DBG_STATEMAP 0x00000008 -+ -+#define DBG_CM 0x00000020 -+#define DBG_XMTR 0x00000040 -+#define DBG_RCVR 0x00000080 -+#define DBG_FORWARD 0x00000100 -+#define DBG_DISCON 0x00000200 -+#define DBG_EPTRAP 0x00000400 -+#define DBG_COMMAND 0x00000800 -+#define DBG_RETRY 0x00001000 -+#define DBG_DEBUG 0x00002000 -+#define DBG_NETWORK_ERROR 0x00004000 -+#define DBG_MSGSYS 0x00008000 -+#define DBG_MANAGER 0x00010000 -+#define DBG_KMAP 0x00020000 -+#define DBG_FAILOVER 0x00040000 -+#define DBG_MAPNMD 0x00080000 -+#define DBG_KMSG 0x00100000 -+#define DBG_SVC 0x00200000 -+#define DBG_STABILISE 0x00400000 -+ -+#if defined(DEBUG_PRINTF) -+ -+# define EPRINTF0(m,fmt) ((epdebug&(m)) ? ep_debugf(m,fmt) : (void)0) -+# define EPRINTF1(m,fmt,a) ((epdebug&(m)) ? ep_debugf(m,fmt,a) : (void)0) -+# define EPRINTF2(m,fmt,a,b) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b) : (void)0) -+# define EPRINTF3(m,fmt,a,b,c) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c) : (void)0) -+# define EPRINTF4(m,fmt,a,b,c,d) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d) : (void)0) -+# define EPRINTF5(m,fmt,a,b,c,d,e) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e) : (void)0) -+# define EPRINTF6(m,fmt,a,b,c,d,e,f) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f) : (void)0) -+# define EPRINTF7(m,fmt,a,b,c,d,e,f,g) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g) : (void)0) -+# define EPRINTF8(m,fmt,a,b,c,d,e,f,g,h) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h,i) : (void)0) -+# define EPRINTF10(m,fmt,a,b,c,d,e,f,g,h,i,j) ((epdebug&(m)) ? ep_debugf(m,fmt,a,b,c,d,e,f,g,h,i,j) : (void)0) -+ -+# define CPRINTF0(lvl,fmt) (((lvl) <= epdebug_cmlevel) ? EPRINTF0(DBG_CM,fmt) : (void)0) -+# define CPRINTF1(lvl,fmt,a) (((lvl) <= epdebug_cmlevel) ? EPRINTF1(DBG_CM,fmt,a) : (void)0) -+# define CPRINTF2(lvl,fmt,a,b) (((lvl) <= epdebug_cmlevel) ? EPRINTF2(DBG_CM,fmt,a,b) : (void)0) -+# define CPRINTF3(lvl,fmt,a,b,c) (((lvl) <= epdebug_cmlevel) ? EPRINTF3(DBG_CM,fmt,a,b,c) : (void)0) -+# define CPRINTF4(lvl,fmt,a,b,c,d) (((lvl) <= epdebug_cmlevel) ? EPRINTF4(DBG_CM,fmt,a,b,c,d) : (void)0) -+# define CPRINTF5(lvl,fmt,a,b,c,d,e) (((lvl) <= epdebug_cmlevel) ? EPRINTF5(DBG_CM,fmt,a,b,c,d,e) : (void)0) -+# define CPRINTF6(lvl,fmt,a,b,c,d,e,f) (((lvl) <= epdebug_cmlevel) ? EPRINTF6(DBG_CM,fmt,a,b,c,d,e,f) : (void)0) -+# define CPRINTF7(lvl,fmt,a,b,c,d,e,f,g) (((lvl) <= epdebug_cmlevel) ? EPRINTF7(DBG_CM,fmt,a,b,c,d,e,f,g) : (void)0) -+# define CPRINTF8(lvl,fmt,a,b,c,d,e,f,g,h) (((lvl) <= epdebug_cmlevel) ? EPRINTF8(DBG_CM,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define CPRINTF9(lvl,fmt,a,b,c,d,e,f,g,h,i) (((lvl) <= epdebug_cmlevel) ? EPRINTF9(DBG_CM,fmt,a,b,c,d,e,f,g,h,i) : (void)0) -+ -+#if defined __GNUC__ -+extern void ep_debugf (long mode, char *fmt, ...) __attribute__ ((format (printf,2,3))); -+#else -+extern void ep_debugf (long mode, char *fmt, ...); -+#endif -+ -+#else -+ -+# define EPRINTF0(m,fmt) (0) -+# define EPRINTF1(m,fmt,a) (0) -+# define EPRINTF2(m,fmt,a,b) (0) -+# define EPRINTF3(m,fmt,a,b,c) (0) -+# define EPRINTF4(m,fmt,a,b,c,d) (0) -+# define EPRINTF5(m,fmt,a,b,c,d,e) (0) -+# define EPRINTF6(m,fmt,a,b,c,d,e,f) (0) -+# define EPRINTF7(m,fmt,a,b,c,d,e,f,g) (0) -+# define EPRINTF8(m,fmt,a,b,c,d,e,f,g,h) (0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i) (0) -+# define EPRINTF9(m,fmt,a,b,c,d,e,f,g,h,i,j) (0) -+ -+# define CPRINTF0(lvl,fmt) (0) -+# define CPRINTF1(lvl,fmt,a) (0) -+# define CPRINTF2(lvl,fmt,a,b) (0) -+# define CPRINTF3(lvl,fmt,a,b,c) (0) -+# define CPRINTF4(lvl,fmt,a,b,c,d) (0) -+# define CPRINTF5(lvl,fmt,a,b,c,d,e) (0) -+# define CPRINTF6(lvl,fmt,a,b,c,d,e,f) (0) -+# define CPRINTF7(lvl,fmt,a,b,c,d,e,f,g) (0) -+# define CPRINTF8(lvl,fmt,a,b,c,d,e,f,g,h) (0) -+# define CPRINTF9(lvl,fmt,a,b,c,d,e,f,g,h,i) (0) -+ -+#endif /* DEBUG */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN3_EPDEBUG_H */ -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_asm_elan4_thread.S 2005-07-28 14:52:52.858677216 -0400 -@@ -0,0 +1,133 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_asm_elan4_thread.S,v 1.5 2004/04/25 11:25:43 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_asm_elan4_thread.S,v $*/ -+ -+#include -+#include -+ -+#include "assym_elan4.h" -+ -+/* XXXXX - registers.h */ -+#define E4_MAIN_INT_SHIFT 14 -+ -+/* -+ * c_waitevent_interrupt (E4_uint64 *commandport, E4_Event *event, E4_uint64 count, E4_uint64 intcookie) -+ */ -+ .global c_waitevent_interrupt -+c_waitevent_interrupt: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov %r7, %r18 // (%r2) return pc -+1: call 2f -+ mov %sp, %r17 // (%r1) SP -+2: add %r7, (3f-1b), %r16 // (%r0) PC -+ st32 %r16, [%sp] // event source block -+ mov MAKE_EXT_CLEAN_CMD, %r23 -+ st8 %r23, [%sp+56] // event source block -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r23,%r23 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ or %r9, WAIT_EVENT_CMD, %r16 ! WAIT_EVENT_CMD | event -+ sll8 %r10, 32, %r17 -+ or %r17, E4_EVENT_TYPE_VALUE(E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), %r17 ! ev_CountAndType -+ mov %sp, %r18 ! ev_Source -+ mov %r8, %r19 ! ev_Dest -+ sll8 %r11, E4_MAIN_INT_SHIFT, %r20 -+ or %r20, INTERRUPT_CMD, %r20 ! INTERRUPT_CMD | (cookie << E4_MAIN_INT_SHIFT) -+ mov NOP_CMD, %r21 -+ mov NOP_CMD, %r22 -+ mov NOP_CMD, %r23 -+ -+ st64suspend %r16, [%r8] -+ -+3: ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r2+8, %r0 // and return -+ add %sp, 192, %sp -+ -+ -+#define EP4_RCVR_PENDING_STALLED 1 /* indicates thread has stalled for no descriptor (rcvr_pending_head) */ -+ -+#define RXD_DEBUG(VAL,RXD,TMP) \ -+ mov VAL, TMP; \ -+ st8 TMP, [RXD + EP4_RXD_DEBUG] -+ -+ -+ /* -+ * %r2 - rcvr elan -+ * %r3 - rxd elan -+ */ -+ .global c_queue_rxd -+c_queue_rxd: -+ RXD_DEBUG(1, %r3, %r23) -+ -+ ld16 [%r2 + EP4_RCVR_PENDING_TAILP], %r18 /* r18 == tailp, r19 = head */ -+ add %r3, EP4_RXD_NEXT, %r4 -+ -+ st8 %r0, [%r3 + EP4_RXD_NEXT] /* rxd->rxd_next = NULL */ -+ st8 %r4, [%r2 + EP4_RCVR_PENDING_TAILP] /* tailp = &rxd->rxd_next */ -+ st8 %r3, [%r18] /* *tailp = rxd */ -+ -+ cmp %r19, EP4_RCVR_PENDING_STALLED /* thread stalled ? */ -+ beq 1f -+ mov %r18, %r16 /* must have used %r16, %r19, %r23 */ -+ mov %r3, %r23 -+ -+ RXD_DEBUG(2, %r3, %r23) -+ -+ st8suspend %r16, [%r3 + EP4_RXD_QUEUED] /* no - mark as queued - all done */ -+ -+1: st8 %r16, [%r3 + EP4_RXD_QUEUED] /* mark as queued */ -+ -+ RXD_DEBUG(3, %r3, %r23) -+ -+ mov %r3, %r8 /* return rxd from c_stall_thread */ -+ ba .epcomms_resume_thread /* resume the thread */ -+ ld64 [%r2 + EP4_RCVR_THREAD_STALL], %r0 -+ -+ /* -+ * c_stall_thread (EP4_RCVR_ELAN *rcvrElan) -+ */ -+ .global c_stall_thread -+c_stall_thread: -+ add %sp, -192, %sp -+ st64 %r16, [%sp + 64] // preserve call preserved registers -+ st64 %r24, [%sp + 128] // - see CALL_USED_REGISTERS. -+ mov %r16,%r16 // BUG FIX: E4 RevA -+ mov %r24,%r24 // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ nop // BUG FIX: E4 RevA -+ -+ mov EP4_RCVR_PENDING_STALLED, %r9 // Mark rcvr as stalled -+ st8 %r9, [%r8 + EP4_RCVR_PENDING_HEAD] -+ -+ // XXXX _ TBD should generate interrupt -+ -+ mov %r1, %r17 // SP -+ mov %r7, %r23 // return pc -+ -+ st64suspend %r16, [%r8 + EP4_RCVR_THREAD_STALL] -+ -+.epcomms_resume_thread: -+ /* %r8 == rxdElan */ -+ -+ ld64 [%sp + 64], %r16 // restore call preserved register -+ ld64 [%sp + 128], %r24 -+ jmpl %r7+8, %r0 // and return -+ add %sp, 192, %sp -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms.c 2005-07-28 14:52:52.859677064 -0400 -@@ -0,0 +1,484 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms.c,v 1.71.2.6 2004/11/30 12:02:16 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms.c,v $ */ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include "cm.h" -+#include "debug.h" -+ -+static void -+ep_comms_thread (void *arg) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) arg; -+ struct list_head *el; -+ -+ kernel_thread_init ("ep_comms"); -+ -+ /* since ep_alloc_xmtr() has incremented the module use count, -+ * we would be preventing the module from being unloaded, so -+ * we decrement the use count since this thread must terminate -+ * during unload of the module. -+ */ -+ ep_mod_dec_usecount(); -+ -+ for (;;) -+ { -+ long nextRunTime = 0; -+ -+ /* NOTE - subsys->Lock serializes us against flush/relocations -+ * caused by rail nodeset transitions. -+ */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ nextRunTime = ep_check_xmtr (list_entry (el, EP_XMTR, Link), nextRunTime); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ nextRunTime = ep_check_rcvr (list_entry (el, EP_RCVR, Link), nextRunTime); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ep_csum_rxds (subsys); -+#endif -+ nextRunTime = ep_forward_rxds (subsys, nextRunTime); -+ -+ if (ep_kthread_sleep (&subsys->Thread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_mod_inc_usecount(); -+ -+ ep_kthread_stopped (&subsys->Thread); -+ kernel_thread_exit(); -+} -+ -+int -+ep_comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ EP_COMMS_RAIL *commsRail; -+ struct list_head *el; -+ -+ printk ("%s: vendorid=%x deviceid=%x\n", rail->Name, rail->Devinfo.dev_vendor_id, rail->Devinfo.dev_device_id); -+ -+ switch (rail->Devinfo.dev_device_id) -+ { -+#if defined(CONFIG_ELAN3) || defined(CONFIG_ELAN3_MODULE) -+ case PCI_DEVICE_ID_ELAN3: -+ commsRail = ep3comms_add_rail (s, sys, rail); -+ break; -+#endif -+#if defined(CONFIG_ELAN4) || defined(CONFIG_ELAN4_MODULE) -+ case PCI_DEVICE_ID_ELAN4: -+ commsRail = ep4comms_add_rail (s, sys, rail); -+ break; -+#endif -+ default: -+ return 0; -+ } -+ -+ if (commsRail == NULL) -+ return 1; -+ -+ commsRail->Rail = rail; -+ commsRail->Subsys = subsys; -+ -+ kmutex_lock (&subsys->Lock); -+ list_add_tail (&commsRail->Link, &subsys->Rails); -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.AddRail) (rcvr, commsRail); -+ } -+ -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ EP_RAIL_OP (commsRail, Xmtr.AddRail) (xmtr, commsRail); -+ } -+ -+ kmutex_unlock (&subsys->Lock); -+ -+ return 0; -+} -+ -+void -+ep_comms_del_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ EP_COMMS_RAIL *commsRail = NULL; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ /* find out rail entry and remove from system list */ -+ list_for_each (el, &subsys->Rails) { -+ if ((commsRail = list_entry (el, EP_COMMS_RAIL, Link))->Rail == rail) -+ break; -+ } -+ -+ list_del (&commsRail->Link); -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ EP_RAIL_OP(commsRail, Rcvr.DelRail) (rcvr, commsRail); -+ } -+ -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ EP_RAIL_OP(commsRail,Xmtr.DelRail) (xmtr, commsRail); -+ } -+ -+ kmutex_unlock (&subsys->Lock); -+ -+ EP_RAIL_OP (commsRail, DelRail) (commsRail); -+} -+ -+void -+ep_comms_fini (EP_SUBSYS *s, EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) s; -+ -+ ep_kthread_stop (&subsys->Thread); -+ ep_kthread_destroy (&subsys->Thread); -+ -+ if (subsys->ForwardXmtr) -+ ep_free_xmtr (subsys->ForwardXmtr); -+ -+ spin_lock_destroy (&subsys->ForwardDescLock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ spin_lock_destroy (&subsys->CheckSumDescLock); -+#endif -+ -+ kmutex_destroy (&subsys->Lock); -+ -+ KMEM_FREE (subsys, sizeof (EP_COMMS_SUBSYS)); -+} -+ -+int -+ep_comms_init (EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ -+ KMEM_ZALLOC (subsys, EP_COMMS_SUBSYS *, sizeof (EP_COMMS_SUBSYS), 1); -+ -+ if (subsys == NULL) -+ return (ENOMEM); -+ -+ INIT_LIST_HEAD (&subsys->Rails); -+ INIT_LIST_HEAD (&subsys->Receivers); -+ INIT_LIST_HEAD (&subsys->Transmitters); -+ INIT_LIST_HEAD (&subsys->ForwardDescList); -+ -+ kmutex_init (&subsys->Lock); -+ spin_lock_init (&subsys->ForwardDescLock); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&subsys->CheckSumDescList); -+ spin_lock_init (&subsys->CheckSumDescLock); -+#endif -+ -+ subsys->Subsys.Sys = sys; -+ subsys->Subsys.Name = "epcomms"; -+ subsys->Subsys.Destroy = ep_comms_fini; -+ subsys->Subsys.AddRail = ep_comms_add_rail; -+ subsys->Subsys.RemoveRail = ep_comms_del_rail; -+ -+ ep_subsys_add (sys, &subsys->Subsys); -+ ep_kthread_init (&subsys->Thread); -+ -+ if ((subsys->ForwardXmtr = ep_alloc_xmtr (subsys->Subsys.Sys)) == NULL) -+ goto failed; -+ -+ if (kernel_thread_create (ep_comms_thread, subsys) == NULL) -+ goto failed; -+ ep_kthread_started (&subsys->Thread); -+ -+ return (0); -+ -+ failed: -+ ep_subsys_del (sys, &subsys->Subsys); -+ ep_comms_fini (&subsys->Subsys, sys); -+ -+ return (ENOMEM); -+} -+ -+void -+ep_comms_display (EP_SYS *sys, char *how) -+{ -+ EP_COMMS_SUBSYS *subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME); -+ struct list_head *el; -+ -+ if (how == NULL || !strncmp (how, "rail", 4)) -+ { -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(commsRail, DisplayRail) (commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ } -+ -+ if (how == NULL || !strncmp (how, "xmtr", 4)) -+ list_for_each (el, &subsys->Transmitters) -+ ep_display_xmtr (&di_ep_debug, list_entry (el, EP_XMTR, Link)); -+ -+ if (how == NULL || !strncmp (how, "rcvr", 4)) -+ list_for_each (el, &subsys->Receivers) -+ ep_display_rcvr (&di_ep_debug, list_entry (el, EP_RCVR, Link), (how && how[4] == ',') ? 1 : 0); -+} -+ -+int -+ep_svc_indicator_set (EP_SYS *epsys, int svc_indicator) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_set: %d \n",svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator > EP_SVC_NUM_INDICATORS) -+ return (EP_EINVAL); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_set: ep_subsys_find failed\n"); -+ return (EP_EINVAL); -+ } -+ -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ cm_svc_indicator_set(commsRail->Rail, svc_indicator); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_set: %d success\n",svc_indicator); -+ return (EP_SUCCESS); -+} -+ -+int -+ep_svc_indicator_clear (EP_SYS *epsys, int svc_indicator) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_clear: %d \n",svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (EP_EINVAL); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_clear: ep_subsys_find failed\n"); -+ return (EP_EINVAL); -+ } -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ cm_svc_indicator_clear(commsRail->Rail, svc_indicator); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_clear: %d success\n",svc_indicator); -+ return (EP_SUCCESS); -+} -+ -+int -+ep_svc_indicator_is_set (EP_SYS *epsys, int svc_indicator, int nodeId) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ int set = 0; -+ -+ EPRINTF2 (DBG_SVC,"ep_svc_indicator_is_set: svc %d node %d \n", svc_indicator, nodeId); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_is_set: ep_subsys_find failed\n"); -+ return (0); -+ } -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ set |= cm_svc_indicator_is_set(commsRail->Rail, svc_indicator, nodeId); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ EPRINTF3 (DBG_SVC,"ep_svc_indicator_is_set: svc %d node %d returning %d\n", svc_indicator, nodeId, set); -+ return set; -+} -+ -+int -+ep_svc_indicator_bitmap (EP_SYS *epsys, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ -+ EPRINTF1 (DBG_SVC,"ep_svc_indicator_bitmap: svc %d\n", svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (-1); -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) { -+ EPRINTF0 (DBG_SVC,"ep_svc_indicator_bitmap: ep_subsys_find failed\n"); -+ return (-2); -+ } -+ -+ /* clear bitmap */ -+ bt_zero (bitmap, nnodes); -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and setting info on Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ /* this will or in each bit map */ -+ cm_svc_indicator_bitmap (commsRail->Rail, svc_indicator, bitmap, low, nnodes); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ return (0); -+} -+ -+int -+ep_xmtr_svc_indicator_bitmap (EP_XMTR *xmtr, int svc_indicator, bitmap_t * bitmap, int low, int nnodes) -+{ -+ int i; -+ -+ EPRINTF1 (DBG_SVC,"ep_xmtr_svc_indicator_bitmap: svc %d\n", svc_indicator); -+ -+ if (svc_indicator < 0 || svc_indicator >= EP_SVC_NUM_INDICATORS) -+ return (-1); -+ -+ /* clear bitmap */ -+ bt_zero (bitmap, nnodes); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (xmtr->RailMask & (1 << i) ) -+ { -+ /* this will or in each bit map */ -+ cm_svc_indicator_bitmap (xmtr->Rails[i]->CommsRail->Rail, svc_indicator, bitmap, low, nnodes); -+ } -+ } -+ -+ return (0); -+} -+ -+EP_RAILMASK -+ep_svc_indicator_railmask (EP_SYS *epsys, int svc_indicator, int nodeId) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ struct list_head *el; -+ EP_RAILMASK rmask=0; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) -+ return (rmask); -+ -+ kmutex_lock (&subsys->Lock); /* walking rails list and reading info from Rail */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ if ( cm_svc_indicator_is_set(commsRail->Rail, svc_indicator,nodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ return (rmask); -+} -+ -+EP_RAILMASK -+ep_xmtr_svc_indicator_railmask (EP_XMTR *xmtr, int svc_indicator, int nodeId) -+{ -+ EP_RAILMASK rmask=0; -+ EP_COMMS_RAIL *commsRail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if (xmtr->RailMask & (1 << i) ) -+ { -+ commsRail = xmtr->Rails[i]->CommsRail; -+ -+ if ( cm_svc_indicator_is_set(commsRail->Rail, svc_indicator,nodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ } -+ -+ EPRINTF3 (DBG_SVC, "ep_xmtr_svc_indicator_railmask: svc %d node %d mask 0x%x\n", svc_indicator, nodeId, rmask); -+ -+ return (rmask); -+} -+ -+EP_RAILMASK -+ep_rcvr_railmask (EP_SYS *epsys, EP_SERVICE service) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_RAILMASK rmask=0; -+ struct list_head *el; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (epsys, "epcomms")) == NULL) -+ return (rmask); -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Service == service) -+ rmask |= rcvr->RailMask; -+ } -+ kmutex_unlock(&subsys->Lock); -+ -+ return (rmask); -+} -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+uint32_t -+ep_calc_check_sum (EP_SYS *sys, EP_ENVELOPE *env, EP_NMD *nmd, int nFrags) -+{ -+ EP_NMH *nmh; -+ int i; -+ uint16_t check_data = 0; -+ uint16_t check_env = 0; -+ -+ for (i = 0; i < nFrags; i++) { -+ /* find the nmh for this frag */ -+ nmh = ep_nmh_find (&sys->MappingTable, &nmd[i]); -+ -+ ASSERT( nmh != NULL); -+ -+ /* add the next frag to the check sum */ -+ check_data = nmh->nmh_ops->op_calc_check_sum (sys, nmh, &nmd[i], check_data); -+ } -+ -+ check_env = rolling_check_sum ((char *) env, offsetof(EP_ENVELOPE, CheckSum), 0); -+ -+ return (EP_ENVELOPE_CHECK_SUM | ( (check_env & 0x7FFF) << 16) | (check_data & 0xFFFF)); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan3.c 2005-07-28 14:52:52.859677064 -0400 -@@ -0,0 +1,191 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan3.c,v 1.60 2004/08/03 11:34:34 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+void -+ep3comms_flush_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_flush_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_flush_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep3comms_failover_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_failover_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_failover_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep3comms_disconnect_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[commsRail->Rail->Number]) -+ ep3xmtr_disconnect_callback (xmtr, (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[commsRail->Rail->Number]) -+ ep3rcvr_disconnect_callback (rcvr, (EP3_RCVR_RAIL *) rcvr->Rails[commsRail->Rail->Number]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+EP_COMMS_RAIL * -+ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_COMMS_RAIL *commsRail; -+ EP3_InputQueue qdesc; -+ int i; -+ -+ KMEM_ZALLOC (commsRail, EP3_COMMS_RAIL *, sizeof (EP3_COMMS_RAIL), TRUE); -+ -+ if (commsRail == NULL) -+ return NULL; -+ -+ commsRail->Generic.Ops.DelRail = ep3comms_del_rail; -+ commsRail->Generic.Ops.DisplayRail = ep3comms_display_rail; -+ commsRail->Generic.Ops.Rcvr.AddRail = ep3rcvr_add_rail; -+ commsRail->Generic.Ops.Rcvr.DelRail = ep3rcvr_del_rail; -+ commsRail->Generic.Ops.Rcvr.Check = ep3rcvr_check; -+ commsRail->Generic.Ops.Rcvr.QueueRxd = ep3rcvr_queue_rxd; -+ commsRail->Generic.Ops.Rcvr.RpcPut = ep3rcvr_rpc_put; -+ commsRail->Generic.Ops.Rcvr.RpcGet = ep3rcvr_rpc_get; -+ commsRail->Generic.Ops.Rcvr.RpcComplete = ep3rcvr_rpc_complete; -+ -+ commsRail->Generic.Ops.Rcvr.StealRxd = ep3rcvr_steal_rxd; -+ -+ commsRail->Generic.Ops.Rcvr.FillOutRailStats = ep3rcvr_fillout_rail_stats; -+ -+ commsRail->Generic.Ops.Rcvr.DisplayRcvr = ep3rcvr_display_rcvr; -+ commsRail->Generic.Ops.Rcvr.DisplayRxd = ep3rcvr_display_rxd; -+ -+ commsRail->Generic.Ops.Xmtr.AddRail = ep3xmtr_add_rail; -+ commsRail->Generic.Ops.Xmtr.DelRail = ep3xmtr_del_rail; -+ commsRail->Generic.Ops.Xmtr.Check = ep3xmtr_check; -+ commsRail->Generic.Ops.Xmtr.BindTxd = ep3xmtr_bind_txd; -+ commsRail->Generic.Ops.Xmtr.UnbindTxd = ep3xmtr_unbind_txd; -+ commsRail->Generic.Ops.Xmtr.PollTxd = ep3xmtr_poll_txd; -+ commsRail->Generic.Ops.Xmtr.CheckTxdState = ep3xmtr_check_txd_state; -+ -+ commsRail->Generic.Ops.Xmtr.DisplayXmtr = ep3xmtr_display_xmtr; -+ commsRail->Generic.Ops.Xmtr.DisplayTxd = ep3xmtr_display_txd; -+ -+ commsRail->Generic.Ops.Xmtr.FillOutRailStats = ep3xmtr_fillout_rail_stats; -+ -+ /* Allocate the input queues at their fixed elan address */ -+ if (! (commsRail->QueueDescs = ep_alloc_memory_elan (r, EP_EPCOMMS_QUEUE_BASE, roundup (EP_MSG_NSVC * sizeof (EP3_InputQueue), PAGESIZE), EP_PERM_ALL, 0))) -+ { -+ KMEM_FREE (commsRail, sizeof (EP3_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_base = 0; -+ qdesc.q_top = 0; -+ qdesc.q_fptr = 0; -+ qdesc.q_bptr = 0; -+ qdesc.q_size = 0; -+ qdesc.q_event.ev_Count = 0; -+ qdesc.q_event.ev_Type = 0; -+ -+ /* Initialise all queue entries to be full */ -+ for (i = 0; i < EP_MSG_NSVC; i++) -+ elan3_sdram_copyl_to_sdram (dev, &qdesc, commsRail->QueueDescs + (i * sizeof (EP3_InputQueue)), sizeof (EP3_InputQueue)); -+ -+ ep_register_callback (r, EP_CB_FLUSH_FILTERING, ep3comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FLUSH_FLUSHING, ep3comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FAILOVER, ep3comms_failover_callback, commsRail); -+ ep_register_callback (r, EP_CB_DISCONNECTING, ep3comms_disconnect_callback, commsRail); -+ -+ return (EP_COMMS_RAIL *) commsRail; -+} -+ -+void -+ep3comms_del_rail (EP_COMMS_RAIL *r) -+{ -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) r; -+ EP_RAIL *rail = commsRail->Generic.Rail; -+ -+ ep_remove_callback (rail, EP_CB_FLUSH_FILTERING, ep3comms_flush_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_FLUSH_FLUSHING, ep3comms_flush_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_FAILOVER, ep3comms_failover_callback, commsRail); -+ ep_remove_callback (rail, EP_CB_DISCONNECTING, ep3comms_disconnect_callback, commsRail); -+ -+ ep_free_memory_elan (rail, EP_EPCOMMS_QUEUE_BASE); -+ -+ KMEM_FREE (commsRail, sizeof (EP3_COMMS_RAIL)); -+} -+ -+void -+ep3comms_display_rail (EP_COMMS_RAIL *r) -+{ -+ -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan3.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms_elan3.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan3.h 2005-07-28 14:52:52.860676912 -0400 -@@ -0,0 +1,330 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EPCOMMS_ELAN3_H -+#define __EPCOMMS_ELAN3_H -+ -+#ident "@(#)$Id: epcomms_elan3.h,v 1.27.2.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3.h,v $ */ -+ -+#define EP3_DMAFAILCOUNT 3 -+ -+ -+/* Main/Elan spinlock */ -+typedef struct ep3_spinlock_elan -+{ -+ volatile E3_uint32 sl_lock; /* main wants a lock */ -+ volatile E3_uint32 sl_seq; /* thread owns this word */ -+ /* NOTE: The lock/seq words must be within the same 32-byte Elan cache-line */ -+ E3_uint64 sl_pad[14]; /* pad to 64-bytes */ -+} EP3_SPINLOCK_ELAN; -+ -+/* Declare this as a main memory cache block for efficiency */ -+typedef struct ep3_spinlock_main { -+ volatile E3_uint32 sl_seq; /* copy of seq number updated by Elan */ -+ volatile E3_uint32 sl_pad[15]; /* pad to 64-bytes */ -+} EP3_SPINLOCK_MAIN; -+ -+#if defined (__ELAN3__) -+ -+extern void ep3_spinblock (EP3_SPINLOCK_ELAN *, EP3_SPINLOCK_MAIN *); -+ -+#define EP3_SPINENTER(SLE,SL) \ -+do {\ -+ (SLE)->sl_seq++; \ -+ if ((SLE)->sl_lock) \ -+ ep3_spinblock(SLE, SL);\ -+} while (0) -+ -+#define EP3_SPINEXIT(SLE,SL) \ -+do {\ -+ (SL)->sl_seq = (SLE)->sl_seq;\ -+} while (0) -+ -+#else -+ -+#define EP3_SPINENTER(DEV,SLE,SL) do { \ -+ E3_uint32 seq; \ -+\ -+ mb();\ -+ elan3_sdram_writel (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 1);\ -+ mb();\ -+ seq = elan3_sdram_readl (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_seq));\ -+ while (seq != (SL)->sl_seq)\ -+ {\ -+ while ((SL)->sl_seq == (seq - 1))\ -+ {\ -+ mb();\ -+\ -+ DELAY (1); \ -+ }\ -+ seq = elan3_sdram_readl (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_seq));\ -+ }\ -+} while (0) -+ -+#define EP3_SPINEXIT(DEV,SLE,SL) do { \ -+ wmb(); \ -+ elan3_sdram_writel (DEV, (SLE) + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 0);\ -+ mmiob(); \ -+} while (0) -+ -+#endif /* ! __ELAN3__ */ -+ -+/* per-rail elan memory portion receive descriptor */ -+typedef struct ep3_rxd_rail_elan -+{ -+ E3_DMA Dmas[EP_MAXFRAG+1]; /* Dma's for fetching data/putting data & status blk */ -+ E3_Event ChainEvent[EP_MAXFRAG]; /* Events to chain dmas */ -+ E3_BlockCopyEvent DataEvent; /* message received block event */ -+ E3_BlockCopyEvent DoneEvent; /* RPC status block event */ -+ -+ EP_NMD Data; /* Network mapping handle for receive data */ -+ -+ E3_Addr RxdMain; /* pointer to main memory portion */ -+ -+ E3_Addr Next; /* linked list when on pending list (elan address) */ -+ -+ E3_uint64 MainAddr; /* kernel address of ep_rxd_main */ -+} EP3_RXD_RAIL_ELAN; -+ -+#define EP3_RXD_RAIL_ELAN_SIZE roundup (sizeof (EP3_RXD_RAIL_ELAN), E3_DMA_ALIGN) -+ -+/* per-rail main memory portion of receive descriptor */ -+typedef struct ep3_rxd_rail_main -+{ -+ E3_uint32 DataEvent; /* dest for done event */ -+ E3_uint32 DoneEvent; /* dest for done event */ -+} EP3_RXD_RAIL_MAIN; -+ -+#define EP3_RXD_RAIL_MAIN_SIZE roundup (sizeof(EP3_RXD_RAIL_MAIN), sizeof (E3_uint32)) -+ -+#if !defined(__ELAN3__) -+/* Kernel memory portion of per-rail receive descriptor */ -+typedef struct ep3_rxd_rail -+{ -+ EP_RXD_RAIL Generic; /* generic rxd rail */ -+ -+ EP3_COOKIE DataCookie; /* Event cookie */ -+ EP3_COOKIE DoneCookie; /* Event cookie */ -+ EP3_COOKIE ChainCookie[EP_MAXFRAG]; /* Event cookie */ -+ -+ sdramaddr_t RxdElan; /* per-rail elan receive descriptor */ -+ E3_Addr RxdElanAddr; /* and elan address */ -+ -+ EP3_RXD_RAIL_MAIN *RxdMain; /* per-rail main receive descriptor */ -+ E3_Addr RxdMainAddr; /* and elan address */ -+ -+ EP_BACKOFF Backoff; /* dma backoff */ -+} EP3_RXD_RAIL; -+ -+#define EP3_NUM_RXD_PER_BLOCK 16 -+ -+typedef struct ep3_rxd_rail_block -+{ -+ struct list_head Link; -+ -+ EP3_RXD_RAIL Rxd[EP3_NUM_RXD_PER_BLOCK]; -+} EP3_RXD_RAIL_BLOCK; -+ -+#endif /* ! __ELAN3__ */ -+ -+typedef struct ep3_rcvr_rail_elan /* Elan memory service structure */ -+{ -+ EP3_SPINLOCK_ELAN ThreadLock; /* elan memory portion of spin lock */ -+ EP3_SPINLOCK_ELAN PendingLock; /* spin lock for pending rx list */ -+ -+ E3_Addr PendingDescs; /* list of pending receive descriptors */ -+ E3_uint32 ThreadShouldHalt; /* marks that the thread should halt */ -+ -+ E3_uint64 MainAddr; /* kernel address of ep_rcvr (for StallThreadForNoDescs)*/ -+} EP3_RCVR_RAIL_ELAN; -+ -+typedef struct ep3_rcvr_rail_main /* Main memory service strucure */ -+{ -+ EP3_SPINLOCK_MAIN ThreadLock; /* main memory portion of spin lock */ -+ EP3_SPINLOCK_MAIN PendingLock; /* spinlock for pending rx list */ -+ -+ volatile unsigned PendingDescsTailp; /* next pointer of last receive descriptor on pending list */ -+} EP3_RCVR_RAIL_MAIN; -+ -+#if !defined(__ELAN3__) -+ -+typedef struct ep3_rcvr_rail_stats -+{ -+ unsigned long some_stat; -+} EP3_RCVR_RAIL_STATS; -+ -+typedef struct ep3_rcvr_rail -+{ -+ EP_RCVR_RAIL Generic; /* generic portion */ -+ -+ EP3_RCVR_RAIL_MAIN *RcvrMain; -+ E3_Addr RcvrMainAddr; -+ sdramaddr_t RcvrElan; -+ E3_Addr RcvrElanAddr; -+ -+ sdramaddr_t InputQueueBase; /* base of receive queue */ -+ E3_Addr InputQueueAddr; /* elan address of receive queue */ -+ -+ E3_Addr ThreadStack; /* Thread processor stack */ -+ E3_Addr ThreadWaiting; /* Elan thread is waiting as no receive descriptors pending (sp stored here ) */ -+ E3_Addr ThreadHalted; /* Elan thread is waiting as it was requested to halt */ -+ -+ struct list_head FreeDescList; /* freelist of per-rail receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; /* total number created */ -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ struct list_head DescBlockList; /* list of receive descriptor blocks */ -+ -+ unsigned int FreeDescWaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t FreeDescSleep; /* and sleep here */ -+ -+ unsigned int CleanupWaiting; /* waiting for cleanup */ -+ kcondvar_t CleanupSleep; /* and sleep here */ -+ -+ EP3_RCVR_RAIL_STATS stats; /* elan3 specific rcvr_rail stats */ -+} EP3_RCVR_RAIL; -+ -+#endif /* ! __ELAN3__ */ -+ -+/* per-rail portion of transmit descriptor */ -+typedef struct ep3_txd_rail_elan -+{ -+ EP_ENVELOPE Envelope; /* message envelope */ -+ EP_PAYLOAD Payload; /* message payload */ -+ -+ E3_BlockCopyEvent EnveEvent; /* envelope event */ -+ E3_BlockCopyEvent DataEvent; /* data transfer event */ -+ E3_BlockCopyEvent DoneEvent; /* rpc done event */ -+} EP3_TXD_RAIL_ELAN; -+ -+#define EP3_TXD_RAIL_ELAN_SIZE roundup (sizeof (EP3_TXD_RAIL_ELAN), E3_BLK_ALIGN) -+ -+typedef struct ep3_txd_rail_main -+{ -+ E3_uint32 EnveEvent; /* dest for envelope event */ -+ E3_uint32 DataEvent; /* dest for data transfer event */ -+ E3_uint32 DoneEvent; /* dest for rpc done event */ -+} EP3_TXD_RAIL_MAIN; -+ -+#define EP3_TXD_RAIL_MAIN_SIZE roundup (sizeof(EP3_TXD_RAIL_MAIN), E3_BLK_ALIGN) -+ -+#if !defined(__ELAN3__) -+ -+typedef struct ep3_txd_rail -+{ -+ EP_TXD_RAIL Generic; /* generic txd rail */ -+ -+ EP3_COOKIE EnveCookie; /* Event cookies */ -+ EP3_COOKIE DataCookie; -+ EP3_COOKIE DoneCookie; -+ -+ sdramaddr_t TxdElan; /* Elan TX descriptor */ -+ E3_Addr TxdElanAddr; /* and elan address */ -+ -+ EP3_TXD_RAIL_MAIN *TxdMain; /* Elan Main memory tx descriptor */ -+ E3_Addr TxdMainAddr; /* and elan address */ -+ -+ EP_BACKOFF Backoff; /* dma backoff */ -+} EP3_TXD_RAIL; -+ -+ -+#define EP3_NUM_TXD_PER_BLOCK 16 -+ -+typedef struct ep3_txd_rail_block -+{ -+ struct list_head Link; -+ -+ EP3_TXD_RAIL Txd[EP3_NUM_TXD_PER_BLOCK]; -+} EP3_TXD_RAIL_BLOCK; -+ -+typedef struct ep3_xmtr_rail_stats -+{ -+ unsigned long some_stat; -+} EP3_XMTR_RAIL_STATS; -+ -+typedef struct ep3_xmtr_rail -+{ -+ EP_XMTR_RAIL Generic; /* generic portion */ -+ -+ struct list_head FreeDescList; /* freelist of per-rail receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ struct list_head DescBlockList; /* list of receive descriptor blocks */ -+ -+ unsigned int FreeDescWaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t FreeDescSleep; /* and sleep here */ -+ -+ EP3_XMTR_RAIL_STATS stats; /* elan3 specific xmtr rail stats */ -+} EP3_XMTR_RAIL; -+ -+typedef struct ep3_comms_rail -+{ -+ EP_COMMS_RAIL Generic; /* generic comms rail */ -+ sdramaddr_t QueueDescs; /* input queue descriptors */ -+} EP3_COMMS_RAIL; -+ -+/* epcommxTx_elan3.c */ -+extern void ep3xmtr_flush_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_failover_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_disconnect_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail); -+ -+/* epcommsRx_elan3.c */ -+extern void CompleteEnvelope (EP3_RAIL *rail, E3_Addr rxdMainAddr, E3_uint32 PAckVal); -+extern void StallThreadForNoDescs (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp); -+extern void StallThreadForHalted (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp); -+ -+extern void ep3rcvr_flush_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_failover_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_disconnect_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail); -+ -+/* epcomms_elan3.c */ -+extern EP_COMMS_RAIL *ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r); -+extern void ep3comms_del_rail (EP_COMMS_RAIL *r); -+extern void ep3comms_display_rail (EP_COMMS_RAIL *r); -+ -+/* epcommsTx_elan3.c */ -+extern int ep3xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+extern void ep3xmtr_unbind_txd (EP_TXD *txd, unsigned int phase); -+extern int ep3xmtr_poll_txd (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+extern long ep3xmtr_check (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+extern void ep3xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern void ep3xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern int ep3xmtr_check_txd_state(EP_TXD *txd); -+ -+extern void ep3xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+extern void ep3xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+extern void ep3xmtr_fillout_rail_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+/* epcommsRx_elan3.c */ -+extern int ep3rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep3rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep3rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+extern EP_RXD *ep3rcvr_steal_rxd (EP_RCVR_RAIL *rcvrRail); -+ -+extern long ep3rcvr_check (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+extern void ep3rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+extern void ep3rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+extern void ep3rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+extern void ep3rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+extern void ep3rcvr_fillout_rail_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#endif /* !defined(__ELAN3__) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __EPCOMMS_ELAN3_H */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan3_thread.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms_elan3_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan3_thread.c 2005-07-28 14:52:52.861676760 -0400 -@@ -0,0 +1,296 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan3_thread.c,v 1.4 2004/01/20 11:03:15 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan3_thread.c,v $ */ -+ -+//#include -+ -+typedef char int8_t; -+typedef unsigned char uint8_t; -+typedef short int16_t; -+typedef unsigned short uint16_t; -+typedef int int32_t; -+typedef unsigned int uint32_t; -+typedef long long int64_t; -+typedef unsigned long long uint64_t; -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+#ifndef offsetof -+#define offsetof(s, m) (unsigned long)(&(((s *)0)->m)) -+#endif -+ -+EP3_RAIL_ELAN *rail; -+EP3_RCVR_RAIL_ELAN *r; -+EP3_RCVR_RAIL_MAIN *rm; -+ -+void -+ep3comms_rcvr (EP3_RAIL_ELAN *rail, EP3_RCVR_RAIL_ELAN *rcvrElan, EP3_RCVR_RAIL_MAIN *rcvrMain, -+ EP3_InputQueue *q, unsigned int *cookies) -+{ -+ int count = 1; -+ E3_Addr nfptr = q->q_fptr + q->q_size; -+ E3_uint32 tmp; -+ int i; -+ E3_Addr buffer; -+ int len; -+ E3_DMA *dma; -+ E3_Event *event; -+ -+ /* clear the queue state to allow envelopes to arrive */ -+ q->q_state = 0; -+ -+ for (;;) -+ { -+ if (! rcvrElan->ThreadShouldHalt) -+ c_waitevent ((E3_Event *) &q->q_event, count); /* HALT POINT */ -+ -+ if (rcvrElan->ThreadShouldHalt && nfptr == q->q_bptr) -+ { -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rcvrElan)); -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_THREAD_HALTED)); /* HALT POINT */ -+ continue; -+ } -+ -+ count = 0; -+ do { -+ /* Process the message at nfptr */ -+ EP_ENVELOPE *env = (EP_ENVELOPE *) nfptr; -+ EP3_RXD_RAIL_ELAN *rxd; -+ int ack; -+ -+ EP3_SPINENTER(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); /* HALT POINT */ -+ -+ while ((rxd = (EP3_RXD_RAIL_ELAN *)rcvrElan->PendingDescs) == 0) -+ { -+ /* no receive descriptors, so trap to the kernel to wait -+ * for receive descriptor to be queued, we pass the rcvr -+ * in %g1, so that the trap handler can restart us. */ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rcvrElan)); -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_TRAP_NO_DESCS)); /* HALT POINT */ -+ EP3_SPINENTER(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); /* HALT POINT */ -+ } -+ -+ if (env->Version != EP_ENVELOPE_VERSION) -+ { -+ /* This envelope has been cancelled - so just consume it */ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ goto consume_envelope; -+ } -+ -+ dma = rxd->Dmas; -+ event = rxd->ChainEvent; -+ -+ if (EP_IS_MULTICAST(env->Attr)) -+ { -+ dma->dma_type = E3_DMA_TYPE (DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t); -+ dma->dma_source = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, Bitmap); -+ dma->dma_dest = (E3_Addr) &((EP_RXD_MAIN *) rxd->RxdMain)->Bitmap; -+ dma->dma_destEvent = (E3_Addr) event; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ -+ event->ev_Count = 1; -+ -+ dma++; event++; -+ } -+ -+ if (env->nFrags == 0) -+ { -+ /* Generate a "get" DMA to accept the envelope and fire the rx handler */ -+ dma->dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = 0; -+ dma->dma_destEvent = (E3_Addr) &rxd->DataEvent; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ len = 0; -+ } -+ else -+ { -+ /* Generate the DMA chain to fetch the data */ -+ for (i = 0, buffer = rxd->Data.nmd_addr, len = 0; i < env->nFrags; i++, dma++, event++) -+ { -+ dma->dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dma->dma_size = env->Frags[i].nmd_len; -+ dma->dma_source = env->Frags[i].nmd_addr; -+ dma->dma_dest = buffer; -+ dma->dma_destEvent = (E3_Addr) event; -+ dma->dma_destCookieVProc = DMA_COOKIE_THREAD | DMA_COOKIE (cookies[env->NodeId], EP_VP_DATA (rail->NodeId)); -+ dma->dma_srcEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DataEvent); -+ dma->dma_srcCookieVProc = DMA_COOKIE_THREAD | DMA_REMOTE_COOKIE (cookies[env->NodeId], EP_VP_DATA (env->NodeId)); -+ -+ event->ev_Count = 1; -+ -+ buffer += dma->dma_size; -+ len += dma->dma_size; -+ } -+ -+ /* Point the last dma at the done event */ -+ (--dma)->dma_destEvent = (E3_Addr) &rxd->DataEvent; -+ -+ if (rxd->Data.nmd_len < len) -+ { -+ /* The receive descriptor was too small for the message */ -+ /* complete the message anyway, but don't transfer any */ -+ /* data, we set the length to EP_MSG_TOO_BIG */ -+ for (i = 0, dma = rxd->Dmas; i < env->nFrags; i++, dma++) -+ dma->dma_size = 0; -+ -+ len = EP_MSG_TOO_BIG; -+ } -+ } -+ -+ /* Store the received message length in the rxdElan for CompleteEnvelope */ -+ rxd->Data.nmd_len = len; -+ -+ /* Initialise %g1 with the "rxd" so the trap handler can -+ * complete the envelope processing if we trap while sending the -+ * packet */ -+ asm volatile ("mov %0, %%g1" : /* no outputs */ : "r" (rxd)); -+ -+ /* Generate a packet to start the data transfer */ -+ c_open (EP_VP_DATA (env->NodeId)); -+ c_sendtrans2 (TR_THREADIDENTIFY, rxd->Dmas->dma_destCookieVProc, 0, 0); -+ c_sendmem (TR_SENDACK | TR_REMOTEDMA, 0, rxd->Dmas); -+ ack = c_close(); -+ -+ /* -+ * If we trapped for an output timeout, then the trap handler will have -+ * completed processing this envelope and cleared the spinlock, so we just -+ * need to update the queue descriptor. -+ */ -+ if (ack == EP3_PAckStolen) -+ goto consume_envelope; -+ -+ if (ack != E3_PAckOk) -+ { -+ /* our packet got nacked, so trap into the kernel so that -+ * it can complete processing of this envelope. -+ */ -+ asm volatile ("ta %0" : /* no outputs */ : "i" (EP3_UNIMP_TRAP_PACKET_NACKED)); /* HALT POINT */ -+ goto consume_envelope; -+ } -+ -+ /* remove the RXD from the pending list */ -+ EP3_SPINENTER (&rcvrElan->PendingLock, &rcvrMain->PendingLock); -+ if ((rcvrElan->PendingDescs = rxd->Next) == 0) -+ rcvrMain->PendingDescsTailp = 0; -+ EP3_SPINEXIT (&rcvrElan->PendingLock, &rcvrMain->PendingLock); -+ -+ /* Copy the envelope information - as 5 64 byte chunks. -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ if (EP_HAS_PAYLOAD(env->Attr)) -+ { -+ register void *src asm ("g5") = (void *) env; -+ register void *dst asm ("g6") = (void *) &((EP_RXD_MAIN *) rxd->RxdMain)->Envelope; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "add %%g7,64,%%g7 ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ -+ "ldblock64 [%0 + 0],%%l0 ! load 64-byte block into locals/ins\n" /* copy envelope */ -+ "stblock64 %%l0,[%1 + 0] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 64],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 64] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%0 + 128],%%l0 ! load 64-byte block into locals/ins\n" /* copy payload */ -+ "stblock64 %%l0,[%1 + 128] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 192],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 192] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (src), "r" (dst) -+ : /* clobbered */ "g5", "g6", "g7" ); -+ } -+ else -+ { -+ register void *src asm ("g5") = (void *) env; -+ register void *dst asm ("g6") = (void *) &((EP_RXD_MAIN *) rxd->RxdMain)->Envelope; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "add %%g7,64,%%g7 ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ -+ "ldblock64 [%0 + 0],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 0] ! store 64-byte block from local/ins\n" -+ "ldblock64 [%0 + 64],%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64 %%l0,[%1 + 64] ! store 64-byte block from local/ins\n" -+ -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (src), "r" (dst) -+ : /* clobbered */ "g5", "g6", "g7" ); -+ } -+ -+ /* Store the message length to indicate that I've finished */ -+ ((EP_RXD_MAIN *) rxd->RxdMain)->Len = rxd->Data.nmd_len; /* PCI write */ -+ -+ EP3_SPINEXIT(&rcvrElan->ThreadLock, &rcvrMain->ThreadLock); -+ -+ consume_envelope: -+ /* Sample the queue full bit *BEFORE* moving the fptr. -+ * Then only clear it if it was full before, otherwise, -+ * as soon as the fptr is moved on the queue could fill -+ * up, and so clearing it could mark a full queue as -+ * empty. -+ * -+ * While the full bit is set, the queue is in a 'steady -+ * state', so it is safe to set the q_state -+ * -+ */ -+ if (((tmp = q->q_state) & E3_QUEUE_FULL) == 0) -+ q->q_fptr = nfptr; /* update queue */ -+ else -+ { -+ q->q_fptr = nfptr; /* update queue */ -+ q->q_state = tmp &~E3_QUEUE_FULL; /* and clear full flag */ -+ } -+ -+ count++; /* bump message count */ -+ if (nfptr == q->q_top) /* queue wrap */ -+ nfptr = q->q_base; -+ else -+ nfptr += q->q_size; -+ -+ c_break_busywait(); /* be nice HALT POINT */ -+ -+ } while (nfptr != q->q_bptr); /* loop until Fptr == Bptr */ -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan4.c 2005-07-28 14:52:52.862676608 -0400 -@@ -0,0 +1,392 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan4.c,v 1.11.2.1 2004/10/28 11:53:28 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+static void -+ep4comms_flush_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ commsRail->r_flush_count = 0; -+ kcondvar_wakeupall (&commsRail->r_flush_sleep, &commsRail->r_flush_lock); -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+} -+ -+void -+ep4comms_flush_start (EP4_COMMS_RAIL *commsRail) -+{ -+ kmutex_lock (&commsRail->r_flush_mutex); -+} -+ -+void -+ep4comms_flush_wait (EP4_COMMS_RAIL *commsRail) -+{ -+ unsigned long flags; -+ -+ ep4_wait_event_cmd (commsRail->r_flush_mcq, -+ commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event), -+ E4_EVENT_INIT_VALUE (-32 * commsRail->r_flush_count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), -+ commsRail->r_flush_ecq->ecq_addr, -+ INTERRUPT_CMD | (commsRail->r_flush_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ while (commsRail->r_flush_count != 0) -+ kcondvar_wait (&commsRail->r_flush_sleep, &commsRail->r_flush_lock, &flags); -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+ -+ kmutex_unlock (&commsRail->r_flush_mutex); -+} -+ -+void -+ep4comms_flush_setevent (EP4_COMMS_RAIL *commsRail, ELAN4_CQ *cq) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&commsRail->r_flush_lock, flags); -+ -+ elan4_set_event_cmd (cq, commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event)); -+ -+ commsRail->r_flush_count++; -+ -+ spin_unlock_irqrestore (&commsRail->r_flush_lock, flags); -+} -+ -+void -+ep4comms_flush_callback (void *arg, statemap_t *map) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->r_generic.Subsys; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ unsigned int rnum = rail->r_generic.Number; -+ struct list_head *el; -+ -+ /* -+ * We stall the retry thread from CB_FLUSH_FILTERING until -+ * we've finished CB_FLUSH_FLUSHING to ensure that sten -+ * packets can not be being retried while we flush them -+ * through. -+ */ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ ep4comms_flush_start (commsRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ break; -+ } -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_flush_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_flush_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep4comms_flush_wait (commsRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep_kthread_resume (&rail->r_retry_thread); -+ break; -+ } -+} -+ -+void -+ep4comms_failover_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_failover_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_failover_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep4comms_disconnect_callback (void *arg, statemap_t *map) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_disconnect_callback (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum]); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_disconnect_callback (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum]); -+ } -+ kmutex_unlock (&subsys->Lock); -+} -+ -+void -+ep4comms_neterr_callback (EP4_RAIL *rail, void *arg, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP_COMMS_RAIL *commsRail = (EP_COMMS_RAIL *) arg; -+ EP_COMMS_SUBSYS *subsys = commsRail->Subsys; -+ unsigned int rnum = commsRail->Rail->Number; -+ struct list_head *el; -+ -+ /* First - stall the retry thread, so that it will no longer restart -+ * any sten packets from the retry lists */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ ep4comms_flush_start ((EP4_COMMS_RAIL *) commsRail); -+ -+ /* Second - flush through all command queues for xmtrs and rcvrs */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_neterr_flush (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum], nodeId, cookies); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_neterr_flush (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum], nodeId, cookies); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ /* Third - wait for flush to complete */ -+ ep4comms_flush_wait ((EP4_COMMS_RAIL *) commsRail); -+ -+ /* Fourth - flush through all command queues */ -+ ep4_flush_ecqs (rail); -+ -+ /* Fifth - search all the retry lists for the network error cookies */ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Transmitters) { -+ EP_XMTR *xmtr = list_entry (el, EP_XMTR, Link); -+ -+ if (xmtr->Rails[rnum]) -+ ep4xmtr_neterr_check (xmtr, (EP4_XMTR_RAIL *) xmtr->Rails[rnum], nodeId, cookies); -+ } -+ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Rails[rnum]) -+ ep4rcvr_neterr_check (rcvr, (EP4_RCVR_RAIL *) rcvr->Rails[rnum], nodeId, cookies); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+ -+EP_COMMS_RAIL * -+ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *)r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_COMMS_RAIL *commsRail; -+ E4_InputQueue qdesc; -+ int i; -+ -+ KMEM_ZALLOC (commsRail, EP4_COMMS_RAIL *,sizeof (EP4_COMMS_RAIL), 1); -+ -+ if (commsRail == NULL) -+ return NULL; -+ -+ commsRail->r_generic.Ops.DelRail = ep4comms_del_rail; -+ commsRail->r_generic.Ops.DisplayRail = ep4comms_display_rail; -+ commsRail->r_generic.Ops.Rcvr.AddRail = ep4rcvr_add_rail; -+ commsRail->r_generic.Ops.Rcvr.DelRail = ep4rcvr_del_rail; -+ commsRail->r_generic.Ops.Rcvr.Check = ep4rcvr_check; -+ commsRail->r_generic.Ops.Rcvr.QueueRxd = ep4rcvr_queue_rxd; -+ commsRail->r_generic.Ops.Rcvr.RpcPut = ep4rcvr_rpc_put; -+ commsRail->r_generic.Ops.Rcvr.RpcGet = ep4rcvr_rpc_get; -+ commsRail->r_generic.Ops.Rcvr.RpcComplete = ep4rcvr_rpc_complete; -+ -+ commsRail->r_generic.Ops.Rcvr.StealRxd = ep4rcvr_steal_rxd; -+ -+ commsRail->r_generic.Ops.Rcvr.DisplayRcvr = ep4rcvr_display_rcvr; -+ commsRail->r_generic.Ops.Rcvr.DisplayRxd = ep4rcvr_display_rxd; -+ -+ commsRail->r_generic.Ops.Rcvr.FillOutRailStats = ep4rcvr_fillout_rail_stats; -+ -+ commsRail->r_generic.Ops.Xmtr.AddRail = ep4xmtr_add_rail; -+ commsRail->r_generic.Ops.Xmtr.DelRail = ep4xmtr_del_rail; -+ commsRail->r_generic.Ops.Xmtr.Check = ep4xmtr_check; -+ commsRail->r_generic.Ops.Xmtr.BindTxd = ep4xmtr_bind_txd; -+ commsRail->r_generic.Ops.Xmtr.UnbindTxd = ep4xmtr_unbind_txd; -+ commsRail->r_generic.Ops.Xmtr.PollTxd = ep4xmtr_poll_txd; -+ commsRail->r_generic.Ops.Xmtr.CheckTxdState = ep4xmtr_check_txd_state; -+ -+ commsRail->r_generic.Ops.Xmtr.DisplayXmtr = ep4xmtr_display_xmtr; -+ commsRail->r_generic.Ops.Xmtr.DisplayTxd = ep4xmtr_display_txd; -+ -+ commsRail->r_generic.Ops.Xmtr.FillOutRailStats = ep4xmtr_fillout_rail_stats; -+ -+ /* Allocate command queue space for flushing (1 dword for interrupt + 4 dwords for waitevent) */ -+ if ((commsRail->r_flush_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1)) == NULL) -+ { -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ if ((commsRail->r_flush_mcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4)) == NULL) -+ { -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ /* Allocate and initialise the elan memory part */ -+ if ((commsRail->r_elan = ep_alloc_elan (r, EP4_COMMS_RAIL_ELAN_SIZE, 0, &commsRail->r_elan_addr)) == (sdramaddr_t) 0) -+ { -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ ep4_register_intcookie (rail, &commsRail->r_flush_intcookie, commsRail->r_elan_addr + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event), -+ ep4comms_flush_interrupt, commsRail); -+ -+ elan4_sdram_writeq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ -+ /* Allocate and initialise all the queue desriptors as "full" with no event */ -+ if ((commsRail->r_descs = ep_alloc_memory_elan (r, EP_EPCOMMS_QUEUE_BASE, roundup (EP_MSG_NSVC * EP_QUEUE_DESC_SIZE, SDRAM_PAGE_SIZE), EP_PERM_ALL, 0)) == (sdramaddr_t) 0) -+ { -+ ep_free_elan (r, commsRail->r_elan_addr, EP4_COMMS_RAIL_ELAN_SIZE); -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+ return NULL; -+ } -+ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl (qdesc.q_bptr,qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ for (i = 0; i < EP_MSG_NSVC; i++) -+ elan4_sdram_copyq_to_sdram (rail->r_ctxt.ctxt_dev, &qdesc, commsRail->r_descs + (i * EP_QUEUE_DESC_SIZE), -+ sizeof (E4_InputQueue)); -+ -+ kmutex_init (&commsRail->r_flush_mutex); -+ spin_lock_init (&commsRail->r_flush_lock); -+ kcondvar_init (&commsRail->r_flush_sleep); -+ -+ ep_register_callback (r, EP_CB_FLUSH_FILTERING, ep4comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FLUSH_FLUSHING, ep4comms_flush_callback, commsRail); -+ ep_register_callback (r, EP_CB_FAILOVER, ep4comms_failover_callback, commsRail); -+ ep_register_callback (r, EP_CB_DISCONNECTING, ep4comms_disconnect_callback, commsRail); -+ -+ commsRail->r_neterr_ops.op_func = ep4comms_neterr_callback; -+ commsRail->r_neterr_ops.op_arg = commsRail; -+ -+ ep4_add_neterr_ops (rail, &commsRail->r_neterr_ops); -+ -+ return (EP_COMMS_RAIL *) commsRail; -+} -+ -+void -+ep4comms_del_rail (EP_COMMS_RAIL *r) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) r; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ -+ ep_remove_callback (&rail->r_generic, EP_CB_FLUSH_FILTERING, ep4comms_flush_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_FLUSH_FLUSHING, ep4comms_flush_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_FAILOVER, ep4comms_failover_callback, commsRail); -+ ep_remove_callback (&rail->r_generic, EP_CB_DISCONNECTING, ep4comms_disconnect_callback, commsRail); -+ -+ kcondvar_destroy (&commsRail->r_flush_sleep); -+ spin_lock_destroy (&commsRail->r_flush_lock); -+ kmutex_destroy (&commsRail->r_flush_mutex); -+ -+ ep_free_memory_elan (&rail->r_generic, EP_EPCOMMS_QUEUE_BASE); -+ ep_free_elan (&rail->r_generic, commsRail->r_elan_addr, EP4_COMMS_RAIL_ELAN_SIZE); -+ -+ ep4_deregister_intcookie (rail, &commsRail->r_flush_intcookie); -+ -+ ep4_put_ecq (rail, commsRail->r_flush_mcq, 4); -+ ep4_put_ecq (rail, commsRail->r_flush_ecq, 1); -+ -+ KMEM_FREE (commsRail, sizeof (EP4_COMMS_RAIL)); -+} -+ -+void -+ep4comms_display_rail (EP_COMMS_RAIL *r) -+{ -+ EP4_COMMS_RAIL *commsRail = (EP4_COMMS_RAIL *) r; -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->r_generic.Rail; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ ep4_display_rail (rail); -+ -+ ep_debugf (DBG_DEBUG, " flush count=%d mcq=%p ecq=%p event %llx.%llx.%llx\n", -+ commsRail->r_flush_count, commsRail->r_flush_mcq, commsRail->r_flush_ecq, -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_CountAndType)), -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_WritePtr)), -+ elan4_sdram_readq (dev, commsRail->r_elan + offsetof (EP4_COMMS_RAIL_ELAN, r_flush_event.ev_WriteValue))); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan4.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms_elan4.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan4.h 2005-07-28 14:52:52.863676456 -0400 -@@ -0,0 +1,470 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EPCOMMS_ELAN4_H -+#define __EPCOMMS_ELAN4_H -+ -+#ident "@(#)$Id: epcomms_elan4.h,v 1.13.2.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4.h,v $ */ -+ -+ -+#include -+ -+/* -+ * Elan4 spinlocks are a pair of 64 bit words, one in elan sdram and one in main memory -+ * the sdram word holds the thread sequence number in the bottom 32 bits and the main -+ * lock in the top 32 bits. The main memory word holds the sequence number only in -+ * it's bottom 32 bits */ -+ -+typedef volatile E4_uint64 EP4_SPINLOCK_MAIN; -+typedef volatile E4_uint64 EP4_SPINLOCK_ELAN; -+ -+#define EP4_SPINLOCK_SEQ 0 -+#define EP4_SPINLOCK_MLOCK 4 -+ -+#if defined(__elan4__) -+ -+#define EP4_SPINENTER(CPORT,SLE,SLM) \ -+do { \ -+ register long tmp; \ -+\ -+ asm volatile ("ld4 [%1], %0\n" \ -+ "inc %0\n" \ -+ "st4 %0, [%1]\n" \ -+ "ld4 [%1 + 4], %0\n" \ -+ "srl8,byte %0, 4, %0\n" \ -+ : /* outputs */ "=r" (tmp) \ -+ : /* inputs */ "r" (SLE), "r" (SLM)); \ -+\ -+ if (tmp) \ -+ ep4_spinblock (CPORT,SLE, SLM); \ -+} while (0) -+ -+extern void ep4_spinblock(E4_uint64 *cport, EP4_SPINLOCK_ELAN *sle, EP4_SPINLOCK_MAIN *slm); -+ -+#define EP4_SPINEXIT(CPORT,SLE,SLM) \ -+do { \ -+ register long tmp; \ -+\ -+ asm volatile ("ld4 [%1], %0\n" \ -+ "st4 %0, [%2]\n" \ -+ : /* outputs */ "=r" (tmp) \ -+ : /* inputs */ "r" (SLE), "r" (SLM)); \ -+} while (0) -+ -+#else -+ -+#define EP4_SPINENTER(DEV,SLE,SLM) \ -+do { \ -+ uint32_t seq; \ -+\ -+ mb(); \ -+ elan4_sdram_writel (DEV, (SLE) + EP4_SPINLOCK_MLOCK, 1); \ -+ mb(); \ -+ while ((seq = elan4_sdram_readl (DEV, (SLE) + EP4_SPINLOCK_SEQ)) != *((uint32_t *) (SLM))) \ -+ { \ -+ while (*((uint32_t *) (SLM)) == (seq - 1)) \ -+ { \ -+ mb(); \ -+ DELAY(1); \ -+ } \ -+ } \ -+} while (0) -+ -+#define EP4_SPINEXIT(DEV,SLE,SLM) \ -+do { \ -+ wmb(); \ -+ elan4_sdram_writel (DEV, (SLE) + EP4_SPINLOCK_MLOCK, 0); \ -+} while (0) -+ -+#endif /* !defined(__elan4__) */ -+ -+#define EP4_STEN_RETRYCOUNT 16 -+#define EP4_DMA_RETRYCOUNT 16 -+ -+typedef struct ep4_intr_cmd -+{ -+ E4_uint64 c_write_cmd; -+ E4_uint64 c_write_value; -+ E4_uint64 c_intr_cmd; -+} EP4_INTR_CMD; -+ -+#define EP4_INTR_CMD_NDWORDS (sizeof (EP4_INTR_CMD) / 8) -+ -+typedef struct ep4_rxd_sten_cmd -+{ -+ E4_uint64 c_open; -+ -+ E4_uint64 c_trans; -+ E4_uint64 c_cookie; -+ E4_uint64 c_dma_typeSize; -+ E4_uint64 c_dma_cookie; -+ E4_uint64 c_dma_vproc; -+ E4_uint64 c_dma_srcAddr; -+ E4_uint64 c_dma_dstAddr; -+ E4_uint64 c_dma_srcEvent; -+ E4_uint64 c_dma_dstEvent; -+ -+ E4_uint64 c_ok_guard; -+ E4_uint64 c_ok_write_cmd; -+ E4_uint64 c_ok_write_value; -+ -+ E4_uint64 c_fail_guard; -+ E4_uint64 c_fail_setevent; -+ -+ E4_uint64 c_nop_cmd; -+} EP4_RXD_STEN_CMD; -+ -+#define EP4_RXD_STEN_CMD_NDWORDS (sizeof (EP4_RXD_STEN_CMD) / 8) -+ -+typedef struct ep4_rxd_dma_cmd -+{ -+ E4_uint64 c_dma_typeSize; -+ E4_uint64 c_dma_cookie; -+ E4_uint64 c_dma_vproc; -+ E4_uint64 c_dma_srcAddr; -+ E4_uint64 c_dma_dstAddr; -+ E4_uint64 c_dma_srcEvent; -+ E4_uint64 c_dma_dstEvent; -+ E4_uint64 c_nop_cmd; -+} EP4_RXD_DMA_CMD; -+ -+#define EP4_RXD_DMA_CMD_NDWORDS (sizeof (EP4_RXD_DMA_CMD) / 8) -+#define EP4_RXD_START_CMD_NDWORDS (sizeof (E4_ThreadRegs) / 8) -+ -+typedef struct ep4_rxd_rail_elan -+{ -+ EP4_RXD_STEN_CMD rxd_sten[EP_MAXFRAG+1]; -+ -+ EP4_INTR_CMD rxd_done_cmd; /* command stream issued by done event (aligned to 64 bytes) */ -+ E4_Addr rxd_next; /* linked list when on pending list (pad to 32 bytes)*/ -+ E4_Event32 rxd_failed; /* event set when sten packet fails */ -+ -+ EP4_INTR_CMD rxd_failed_cmd; /* command stream issued by fail event (aligned to 64 bytes) */ -+ E4_uint64 rxd_queued; /* rxd queuing thread has executed (pad to 32 bytes)*/ -+ -+ E4_Event32 rxd_start; /* event to set to fire off and event chain (used as chain[0]) */ -+ E4_Event32 rxd_chain[EP_MAXFRAG]; /* chained events (aligned to 32 bytes) */ -+ E4_Event32 rxd_done; /* event to fire done command stream causing interrupt (used as chain[EP_MAXFRAG]) */ -+ -+ E4_Addr rxd_rxd; /* elan address of EP4_RXD_MAIN */ -+ E4_Addr rxd_main; /* elan address of EP4_RXD_RAIL_MAIN */ -+ E4_uint64 rxd_debug; /* thread debug value */ -+ -+ EP_NMD rxd_buffer; /* Network mapping descriptor for receive data */ -+} EP4_RXD_RAIL_ELAN; -+ -+#define EP4_RXD_RAIL_ELAN_SIZE roundup(sizeof (EP4_RXD_RAIL_ELAN), 64) -+ -+typedef struct ep4_rxd_rail_main -+{ -+ E4_uint64 rxd_sent[EP_MAXFRAG+1]; /* sten packet sent */ -+ E4_uint64 rxd_failed; /* sten packet failed */ -+ E4_uint64 rxd_done; /* operation complete */ -+ -+ E4_Addr rxd_scq; /* command port for scq */ -+} EP4_RXD_RAIL_MAIN; -+ -+#define EP4_RXD_RAIL_MAIN_SIZE roundup(sizeof (EP4_RXD_RAIL_MAIN), 8) -+ -+#if !defined(__elan4__) -+typedef struct ep4_rxd_rail -+{ -+ EP_RXD_RAIL rxd_generic; -+ -+ struct list_head rxd_retry_link; -+ unsigned long rxd_retry_time; -+ -+ EP4_INTCOOKIE rxd_intcookie; -+ -+ sdramaddr_t rxd_elan; -+ EP_ADDR rxd_elan_addr; -+ -+ EP4_RXD_RAIL_MAIN *rxd_main; -+ EP_ADDR rxd_main_addr; -+ -+ EP4_ECQ *rxd_ecq; /* cq with 128 bytes targetted by event */ -+ EP4_ECQ *rxd_scq; /* cq with 8 bytes targetted by main/thread store */ -+} EP4_RXD_RAIL; -+ -+#define EP4_NUM_RXD_PER_BLOCK 16 -+ -+typedef struct ep4_rxd_rail_block -+{ -+ struct list_head blk_link; -+ EP4_RXD_RAIL blk_rxds[EP4_NUM_RXD_PER_BLOCK]; -+} EP4_RXD_RAIL_BLOCK; -+ -+#endif /* !defined(__elan4__) */ -+ -+typedef struct ep4_rcvr_rail_elan -+{ -+ E4_uint64 rcvr_thread_stall[8]; /* place for thread to stall */ -+ E4_Event32 rcvr_qevent; /* Input queue event */ -+ E4_Event32 rcvr_thread_halt; /* place for thread to halt */ -+ -+ volatile E4_Addr rcvr_pending_tailp; /* list of pending rxd's (elan addr) */ -+ volatile E4_Addr rcvr_pending_head; /* -- this pair aligned to 16 bytes */ -+ -+ EP4_SPINLOCK_ELAN rcvr_thread_lock; /* spinlock for thread processing loop */ -+ -+ E4_uint64 rcvr_stall_intcookie; /* interrupt cookie to use when requseted to halt */ -+ -+ E4_uint64 rcvr_qbase; /* base of input queue */ -+ E4_uint64 rcvr_qlast; /* last item in input queue */ -+ -+ E4_uint64 rcvr_debug; /* thread debug value */ -+} EP4_RCVR_RAIL_ELAN; -+ -+typedef struct ep4_rcvr_rail_main -+{ -+ EP4_SPINLOCK_MAIN rcvr_thread_lock; /* spinlock for thread processing loop */ -+} EP4_RCVR_RAIL_MAIN; -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_rcvr_rail_stats -+{ -+ unsigned long some_stat; -+} EP4_RCVR_RAIL_STATS; -+ -+typedef struct ep4_rcvr_rail -+{ -+ EP_RCVR_RAIL rcvr_generic; /* generic portion */ -+ -+ sdramaddr_t rcvr_elan; -+ EP_ADDR rcvr_elan_addr; -+ -+ EP4_RCVR_RAIL_MAIN *rcvr_main; -+ EP_ADDR rcvr_main_addr; -+ -+ sdramaddr_t rcvr_slots; /* input queue slots */ -+ EP_ADDR rcvr_slots_addr; /* and elan address */ -+ -+ EP_ADDR rcvr_stack; /* stack for thread */ -+ -+ EP4_ECQ *rcvr_ecq; /* command queue space for thread STEN packets */ -+ EP4_ECQ *rcvr_resched; /* command queue space to reschedule the thread */ -+ -+ struct list_head rcvr_freelist; /* freelist of per-rail receive descriptors */ -+ unsigned int rcvr_freecount; /* and number on free list */ -+ unsigned int rcvr_totalcount; /* total number created */ -+ spinlock_t rcvr_freelock; /* and lock for free list */ -+ struct list_head rcvr_blocklist; /* list of receive descriptor blocks */ -+ -+ unsigned int rcvr_freewaiting; /* waiting for descriptors to be freed */ -+ kcondvar_t rcvr_freesleep; /* and sleep here */ -+ -+ EP4_INTCOOKIE rcvr_stall_intcookie; /* interrupt cookie for thread halt */ -+ unsigned char rcvr_thread_halted; /* thread has been halted */ -+ unsigned char rcvr_cleanup_waiting; /* waiting for cleanup */ -+ kcondvar_t rcvr_cleanup_sleep; /* and sleep here */ -+ -+ EP4_RETRY_OPS rcvr_retryops; -+ -+ struct list_head rcvr_retrylist; /* list of txd's to retry envelopes for */ -+ struct list_head rcvr_polllist; /* list of txd's to poll for completion */ -+ spinlock_t rcvr_retrylock; -+ -+ EP4_RCVR_RAIL_STATS rcvr_stats; /* elan4 specific rcvr_rail stats */ -+ -+} EP4_RCVR_RAIL; -+ -+#endif /* !defined(__elan4__) */ -+ -+typedef struct ep4_txd_rail_elan -+{ -+ EP4_INTR_CMD txd_env_cmd; /* command stream for envelope event (64 byte aligned) */ -+ E4_uint64 txd_pad0; /* pad to 32 bytes */ -+ E4_Event32 txd_env; /* event set when STEN packet fails */ -+ -+ EP4_INTR_CMD txd_done_cmd; /* command stream for done event (64 byte aligned) */ -+ E4_uint64 txd_pad1; /* pad to 32 bytes */ -+ E4_Event32 txd_done; /* event set when transmit complete */ -+ -+ E4_Event32 txd_data; /* event set when xmit completes (=> phase becomes passive) */ -+} EP4_TXD_RAIL_ELAN; -+ -+#define EP4_TXD_RAIL_ELAN_SIZE roundup(sizeof(EP4_TXD_RAIL_ELAN), 64) -+ -+typedef struct ep4_txd_rail_main -+{ -+ E4_uint64 txd_env; -+ E4_uint64 txd_data; -+ E4_uint64 txd_done; -+} EP4_TXD_RAIL_MAIN; -+ -+#define EP4_TXD_RAIL_MAIN_SIZE roundup(sizeof(EP4_TXD_RAIL_MAIN), 8) -+ -+#if !defined (__elan4__) -+typedef struct ep4_txd_rail -+{ -+ EP_TXD_RAIL txd_generic; -+ -+ struct list_head txd_retry_link; -+ unsigned long txd_retry_time; -+ -+ EP4_INTCOOKIE txd_intcookie; -+ -+ sdramaddr_t txd_elan; -+ EP_ADDR txd_elan_addr; -+ -+ EP4_TXD_RAIL_MAIN *txd_main; -+ EP_ADDR txd_main_addr; -+ -+ EP4_ECQ *txd_ecq; -+ -+ E4_uint64 txd_cookie; -+} EP4_TXD_RAIL; -+ -+#define EP4_NUM_TXD_PER_BLOCK 21 -+ -+typedef struct ep4_txd_rail_block -+{ -+ struct list_head blk_link; -+ EP4_TXD_RAIL blk_txds[EP4_NUM_TXD_PER_BLOCK]; -+} EP4_TXD_RAIL_BLOCK; -+ -+typedef struct ep4_xmtr_rail_main -+{ -+ E4_int64 xmtr_flowcnt; -+} EP4_XMTR_RAIL_MAIN; -+ -+typedef struct ep4_xmtr_rail_stats -+{ -+ unsigned long some_stat; -+} EP4_XMTR_RAIL_STATS; -+ -+#define EP4_TXD_LIST_POLL 0 -+#define EP4_TXD_LIST_STALLED 1 -+#define EP4_TXD_LIST_RETRY 2 -+#define EP4_TXD_NUM_LISTS 3 -+typedef struct ep4_xmtr_rail -+{ -+ EP_XMTR_RAIL xmtr_generic; -+ -+ EP4_XMTR_RAIL_MAIN *xmtr_main; -+ EP_ADDR xmtr_main_addr; -+ -+ struct list_head xmtr_freelist; -+ unsigned int xmtr_freecount; -+ unsigned int xmtr_totalcount; -+ spinlock_t xmtr_freelock; -+ struct list_head xmtr_blocklist; -+ unsigned int xmtr_freewaiting; -+ kcondvar_t xmtr_freesleep; -+ -+ EP4_INTCOOKIE xmtr_intcookie; /* interrupt cookie for "polled" descriptors */ -+ -+ ELAN4_CQ *xmtr_cq; -+ E4_int64 xmtr_flowcnt; -+ -+ EP4_RETRY_OPS xmtr_retryops; -+ -+ struct list_head xmtr_retrylist[EP4_TXD_NUM_LISTS]; /* list of txd's to retry envelopes for */ -+ struct list_head xmtr_polllist; /* list of txd's to poll for completion */ -+ spinlock_t xmtr_retrylock; -+ -+ EP4_XMTR_RAIL_STATS stats; /* elan4 specific xmtr rail stats */ -+} EP4_XMTR_RAIL; -+ -+#define EP4_XMTR_CQSIZE CQ_Size64K /* size of command queue for xmtr */ -+#define EP4_XMTR_FLOWCNT (CQ_Size(EP4_XMTR_CQSIZE) / 512) /* # of STEN packets which can fit in */ -+ -+typedef struct ep4_comms_rail_elan -+{ -+ E4_Event32 r_flush_event; -+} EP4_COMMS_RAIL_ELAN; -+ -+#define EP4_COMMS_RAIL_ELAN_SIZE roundup(sizeof (EP4_COMMS_RAIL_ELAN), 32) -+ -+typedef struct ep4_comms_rail -+{ -+ EP_COMMS_RAIL r_generic; /* generic comms rail */ -+ sdramaddr_t r_descs; /* input queue descriptors */ -+ -+ sdramaddr_t r_elan; /* elan portion */ -+ EP_ADDR r_elan_addr; -+ -+ kmutex_t r_flush_mutex; /* sequentialise flush usage */ -+ EP4_INTCOOKIE r_flush_intcookie; /* interrupt cookie to generate */ -+ -+ kcondvar_t r_flush_sleep; /* place to sleep waiting */ -+ spinlock_t r_flush_lock; /* and spinlock to use */ -+ -+ unsigned int r_flush_count; /* # setevents issued */ -+ EP4_ECQ *r_flush_ecq; /* command queue for interrupt */ -+ EP4_ECQ *r_flush_mcq; /* command queeu to issue waitevent */ -+ -+ EP4_NETERR_OPS r_neterr_ops; /* network error fixup ops */ -+} EP4_COMMS_RAIL; -+ -+/* epcommsTx_elan4.c */ -+extern void ep4xmtr_flush_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_failover_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_disconnect_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail); -+ -+extern void ep4xmtr_neterr_flush (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+extern void ep4xmtr_neterr_check (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* epcommsRx_elan4.c */ -+extern void ep4rcvr_flush_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_failover_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_disconnect_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail); -+ -+extern void ep4rcvr_neterr_flush (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+extern void ep4rcvr_neterr_check (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* epcomms_elan4.c */ -+extern void ep4comms_flush_start (EP4_COMMS_RAIL *commsRail); -+extern void ep4comms_flush_wait (EP4_COMMS_RAIL *commsRail); -+extern void ep4comms_flush_setevent (EP4_COMMS_RAIL *commsRail, ELAN4_CQ *cq); -+ -+extern EP_COMMS_RAIL *ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *r); -+extern void ep4comms_del_rail (EP_COMMS_RAIL *r); -+extern void ep4comms_display_rail (EP_COMMS_RAIL *r); -+ -+/* epcommsTx_elan4.c */ -+extern int ep4xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+extern void ep4xmtr_unbind_txd (EP_TXD *txd, unsigned int phase); -+extern int ep4xmtr_poll_txd (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+extern long ep4xmtr_check (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+extern void ep4xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern void ep4xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail); -+extern int ep4xmtr_check_txd_state(EP_TXD *txd); -+ -+extern void ep4xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+extern void ep4xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+extern void ep4xmtr_fillout_rail_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+/* epcommsRx_elan4.c */ -+extern int ep4rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep4rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+extern void ep4rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+extern EP_RXD *ep4rcvr_steal_rxd (EP_RCVR_RAIL *rcvrRail); -+ -+extern long ep4rcvr_check (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+extern void ep4rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+extern void ep4rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+extern void ep4rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+extern void ep4rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+extern void ep4rcvr_fillout_rail_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#endif /* !defined(__elan4__) */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __EPCOMMS_ELAN4_H */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan4_thread.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcomms_elan4_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcomms_elan4_thread.c 2005-07-28 14:52:52.864676304 -0400 -@@ -0,0 +1,346 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcomms_elan4_thread.c,v 1.10.8.2 2004/09/28 10:36:51 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms_elan4_thread.c,v $*/ -+ -+//#include -+ -+typedef char int8_t; -+typedef unsigned char uint8_t; -+typedef short int16_t; -+typedef unsigned short uint16_t; -+typedef int int32_t; -+typedef unsigned int uint32_t; -+typedef long int64_t; -+typedef unsigned long uint64_t; -+ -+#include -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+/* assembler in epcomms_asm_elan4_thread.S */ -+extern void c_waitevent_interrupt (E4_uint64 *cport, E4_Event32 *event, E4_uint64 count, E4_uint64 intcookie); -+extern EP4_RXD_RAIL_ELAN *c_stall_thread (EP4_RCVR_RAIL_ELAN *rcvrRail); -+ -+#define R32_to_R47 "%r32", "%r33", "%r34", "%r35", "%r36", "%r37", "%r38", "%r39", \ -+ "%r40", "%r41", "%r42", "%r43", "%r44", "%r45", "%r46", "%r47" -+#define R48_to_R63 "%r48", "%r49", "%r50", "%r51", "%r52", "%r53", "%r54", "%r55", \ -+ "%r56", "%r57", "%r58", "%r59", "%r60", "%r61", "%r62", "%r63" -+ -+/* proto types for code in asm_elan4_thread.S */ -+extern void c_waitevent (E4_uint64 *commandport, E4_Addr event, E4_uint64 count); -+extern void c_reschedule(E4_uint64 *commandport); -+ -+static inline unsigned long -+c_load_u16(unsigned short *ptr) -+{ -+ unsigned long value; -+ -+ asm volatile ("ld2 [%1], %%r2\n" -+ "srl8,byte %%r2, %1, %0\n" -+ "sll8 %0, 48, %0\n" -+ "srl8 %0, 48, %0\n" -+ : /* outputs */ "=r" (value) -+ : /* inputs */ "r" (ptr) -+ : /* clobbered */ "%r2"); -+ return value; -+} -+ -+static inline unsigned long -+c_load_u32(unsigned int *ptr) -+{ -+ unsigned long value; -+ -+ asm volatile ("ld4 [%1], %%r2\n" -+ "srl8,byte %%r2, %1, %0\n" -+ "sll8 %0, 32, %0\n" -+ "srl8 %0, 32, %0\n" -+ : /* outputs */ "=r" (value) -+ : /* inputs */ "r" (ptr) -+ : /* clobbered */ "%r2"); -+ return value; -+} -+ -+static inline void -+c_store_u32(unsigned int *ptr, unsigned long value) -+{ -+ asm volatile ("sll8,byte %0, %1, %%r2\n" -+ "st4 %%r2, [%1]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (value), "r" (ptr) -+ : /* clobbered */ "%r2"); -+} -+ -+/* Reschedule the current Elan thread to the back of the run queue -+ * if there is another one ready to run */ -+static inline void -+c_yield (E4_uint64 *commandport) -+{ -+ unsigned long rval; -+ -+ asm volatile ("breaktest %0" : /* outputs */ "=r" (rval) : /* inputs */); -+ -+ if (rval & ICC_SIGNED_BIT) -+ c_reschedule(commandport); -+} -+ -+/* Reschedule the current thread if we're in danger of exceeding the -+ * thread instruction count */ -+static inline void -+c_insn_check(E4_uint64 *commandport) -+{ -+ unsigned long rval; -+ -+ asm volatile ("breaktest %0" : /* outputs */ "=r" (rval) : /* inputs */); -+ -+ if (rval & ICC_ZERO_BIT) -+ c_reschedule(commandport); -+} -+ -+void -+ep4_spinblock (E4_uint64 *cport, EP4_SPINLOCK_ELAN *sle, EP4_SPINLOCK_MAIN *slm) -+{ -+ do { -+ unsigned long val = *sle & 0xfffffffff; -+ -+ *slm = val; /* Release my lock */ -+ -+ while (*sle >> 32) /* Wait until the main */ -+ c_yield(cport); /* releases the lock */ -+ -+ c_store_u32 ((unsigned int *) sle, val + 1); /* and try and relock */ -+ } while (*sle >> 32); -+} -+ -+#define RESCHED_AFTER_PKTS ((CQ_Size(CQ_Size64K) / 128) - 1) -+ -+void -+ep4comms_rcvr (EP4_RAIL_ELAN *rail, EP4_RCVR_RAIL_ELAN *rcvrElan, EP4_RCVR_RAIL_MAIN *rcvrMain, -+ E4_InputQueue *inputq, E4_uint64 *cport, E4_uint64 *resched) -+{ -+ long count = 1; -+ long fptr = inputq->q_fptr; -+ -+ for (;;) -+ { -+ c_waitevent (cport, inputq->q_event, -count << 5); -+ -+ count = 0; -+ -+ while (fptr != inputq->q_bptr) -+ { -+ EP_ENVELOPE *env = (EP_ENVELOPE *) fptr; -+ unsigned long nodeid = c_load_u32 (&env->NodeId); -+ unsigned long opencmd = OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(nodeid)); -+ unsigned long vproc = EP_VP_DATA(rail->r_nodeid); -+ EP_ATTRIBUTE attr = c_load_u32 (&env->Attr); -+ unsigned long txdRail = c_load_u32 (&env->TxdRail); -+ unsigned long nFrags = c_load_u32 (&env->nFrags); -+ E4_uint64 cookie = rail->r_cookies[nodeid]; -+ unsigned long srcevent = (EP_IS_RPC(attr) ? txdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_data) : -+ txdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_done)); -+ EP4_RXD_RAIL_ELAN *rxdElan; -+ EP4_RXD_RAIL_MAIN *rxdMain; -+ EP_RXD_MAIN *rxd; -+ EP4_RXD_STEN_CMD *sten; -+ E4_Event32 *event; -+ unsigned long first; -+ unsigned long buffer; -+ unsigned long len; -+ unsigned long i; -+ -+ EP4_SPINENTER(resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ if ((rxdElan = (EP4_RXD_RAIL_ELAN *) rcvrElan->rcvr_pending_head) == 0) -+ { -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ rxdElan = c_stall_thread (rcvrElan); -+ -+ EP4_SPINENTER(resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ } -+ -+ if (c_load_u32 (&env->Version) != EP_ENVELOPE_VERSION) /* envelope has been cancelled */ -+ { -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ goto consume_envelope; -+ } -+ -+ rxd = (EP_RXD_MAIN *) rxdElan->rxd_rxd; -+ rxdMain = (EP4_RXD_RAIL_MAIN *) rxdElan->rxd_main; -+ first = (EP_MAXFRAG+1) - (( EP_IS_MULTICAST(attr) ? 1 : 0) + (nFrags == 0 ? 1 : nFrags)); -+ sten = &rxdElan->rxd_sten[first]; -+ event = &rxdElan->rxd_chain[first]; -+ -+ if (EP_IS_MULTICAST(attr)) /* need to fetch broadcast bitmap */ -+ { -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(BT_BITOUL(EP_MAX_NODES) * sizeof (bitmap_t), DMA_DataTypeWord, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcAddr = c_load_u32 (&env->TxdMain.nmd_addr) + offsetof(EP_TXD_MAIN, Bitmap); -+ sten->c_dma_dstAddr = (E4_Addr) &rxd->Bitmap; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS); -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ -+ sten++; event++; -+ } -+ -+ if (nFrags == 0) -+ { -+ /* Generate an empty "get" DMA to accept the envelope and fire the rx handler */ -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(0, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS); -+ -+ len = 0; -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ } -+ else -+ { -+ /* Generate the DMA chain to fetch the data */ -+ for (i = 0, buffer = c_load_u32 (&rxdElan->rxd_buffer.nmd_addr), len = 0; i < nFrags; i++) -+ { -+ unsigned long fragLen = c_load_u32 (&env->Frags[i].nmd_len); -+ -+ sten->c_open = opencmd; -+ sten->c_trans = SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16); -+ sten->c_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_STEN; -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(fragLen, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ sten->c_dma_cookie = cookie | EP4_COOKIE_THREAD | EP4_COOKIE_REMOTE | EP4_COOKIE_DMA | EP4_COOKIE_INC; -+ sten->c_dma_vproc = vproc; -+ sten->c_dma_srcAddr = c_load_u32 (&env->Frags[i].nmd_addr); -+ sten->c_dma_dstAddr = buffer; -+ sten->c_dma_srcEvent = srcevent; -+ sten->c_dma_dstEvent = (E4_Addr) event; -+ -+ event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS); -+ -+ buffer += fragLen; -+ len += fragLen; -+ -+ cookie += (EP4_COOKIE_INC << 1); -+ -+ sten++; event++; -+ } -+ -+ (--event)->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS); -+ -+ if (c_load_u32 (&rxdElan->rxd_buffer.nmd_len) < len) -+ { -+ /* The receive descriptor was too small for the message */ -+ /* complete the message anyway, but don't transfer any */ -+ /* data, we set the length to EP_MSG_TOO_BIG */ -+ for (i = first, sten = &rxdElan->rxd_sten[first]; i <= EP_MAXFRAG; i++, sten++) -+ sten->c_dma_typeSize = E4_DMA_TYPE_SIZE(0, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ -+ len = EP_MSG_TOO_BIG; -+ } -+ } -+ -+ /* Stuff the first STEN packet into the command queue, there's always enough space, -+ * since we will insert a waitevent at least once for the queue size */ -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0 + 64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "st64 %%r48, [%1]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (&rxdElan->rxd_sten[first]), "r" (cport) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ /* remove the RXD from the pending list */ -+ if ((rcvrElan->rcvr_pending_head = rxdElan->rxd_next) == 0) -+ rcvrElan->rcvr_pending_tailp = (E4_Addr)&rcvrElan->rcvr_pending_head; -+ -+ /* mark as not queued */ -+ rxdElan->rxd_queued = 0; -+ -+ /* copy down the envelope */ -+ if (EP_HAS_PAYLOAD(attr)) -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0+64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "ld64 [%0+128], %%r32\n" -+ "st64 %%r48, [%1+64]\n" -+ "ld64 [%0+192], %%r48\n" -+ "st64 %%r32, [%1 + 128]\n" -+ "st64 %%r48, [%1 + 192]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (env), "r" (&rxd->Envelope) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ else -+ asm volatile ("ld64 [%0], %%r32\n" -+ "ld64 [%0+64], %%r48\n" -+ "st64 %%r32, [%1]\n" -+ "st64 %%r48, [%1+64]\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (env), "r" (&rxd->Envelope) -+ : /* clobbered */ R32_to_R47, R48_to_R63); -+ -+ /* Store the message length to indicate that I've finished */ -+ c_store_u32 (&rxd->Len, len); -+ -+ /* Finally update the network error cookie */ -+ rail->r_cookies[nodeid] = cookie; -+ -+ EP4_SPINEXIT (resched, &rcvrElan->rcvr_thread_lock, &rcvrMain->rcvr_thread_lock); -+ -+ consume_envelope: -+ if (fptr != rcvrElan->rcvr_qlast) -+ fptr += EP_INPUTQ_SIZE; -+ else -+ fptr = rcvrElan->rcvr_qbase; -+ -+ if (! rcvrElan->rcvr_stall_intcookie) -+ inputq->q_fptr = fptr; -+ -+ if (++count >= RESCHED_AFTER_PKTS) -+ break; -+ -+ c_insn_check (cport); -+ } -+ -+ if (rcvrElan->rcvr_stall_intcookie) -+ { -+ c_waitevent_interrupt (cport, &rcvrElan->rcvr_thread_halt, -(1 << 5), rcvrElan->rcvr_stall_intcookie); -+ inputq->q_fptr = fptr; -+ -+ count++; /* one extra as we were given an extra set to wake us up */ -+ } -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsFwd.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcommsFwd.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsFwd.c 2005-07-28 14:52:52.864676304 -0400 -@@ -0,0 +1,310 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsFwd.c,v 1.12 2004/08/16 12:21:15 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsFwd.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+unsigned int epcomms_forward_limit = 8; -+ -+static void -+GenerateTree (unsigned nodeId, unsigned lowId, unsigned highId, bitmap_t *bitmap, -+ unsigned *parentp, unsigned *childrenp, int *nchildrenp) -+{ -+ int i; -+ int count; -+ int branch; -+ int nSub; -+ int branchIndex; -+ int parent; -+ int nBranch; -+ int rem; -+ int self; -+ int branchRatio; -+ int node; -+ int x, y, z; -+ -+ -+#ifdef DEBUG_PRINTF -+ { -+#define OVERFLOW "...]" -+#define LINESZ 128 -+ char space[LINESZ+1]; -+ -+ if (ep_sprintf_bitmap (space, LINESZ-strlen(OVERFLOW), bitmap, 0, 0, (highId - lowId)+1) != -1) -+ strcat (space, OVERFLOW); -+ -+ EPRINTF3 (DBG_FORWARD, "GenerateTree; elan node low=%d node high=%d bitmap=%s\n", lowId, highId, space); -+#undef OVERFLOW -+#undef LINESZ -+ } -+#endif -+ -+ /* Count the number of nodes in the partition */ -+ /* and work out which one I am */ -+ for (count = 0, self = ELAN_INVALID_NODE, i = lowId; i <= highId; i++) -+ { -+ if (BT_TEST (bitmap, i-lowId)) -+ { -+ if (i == nodeId) -+ self = count; -+ count++; -+ } -+ } -+ -+ EPRINTF2 (DBG_FORWARD, "GenerateTree: count=%d self=%d\n", count, self); -+ -+ if (count == 0 || self == ELAN_INVALID_NODE) -+ { -+ *parentp = ELAN_INVALID_NODE; -+ *nchildrenp = 0; -+ return; -+ } -+ -+ /* search for position in tree */ -+ branchRatio = EP_TREE_ARITY; /* branching ratio */ -+ branch = 0; /* start with process 0 */ -+ nSub = count; /* and whole tree */ -+ branchIndex = -1; /* my branch # in parent */ -+ parent = -1; /* my parent's group index # */ -+ -+ while (branch != self) /* descend process tree */ -+ { /* until I find myself */ -+ parent = branch; -+ branch++; /* parent + 1 = first born */ -+ nSub--; /* set # descendents */ -+ -+ rem = nSub % branchRatio; -+ nSub = nSub / branchRatio + 1; -+ x = rem * nSub; -+ y = self - branch; -+ -+ if (y < x) /* my first 'rem' branches have */ -+ { /* 1 more descendent... */ -+ branchIndex = y / nSub; -+ branch += branchIndex * nSub; -+ } -+ else /* than the rest of my branches */ -+ { -+ nSub--; -+ z = (y - x) / nSub; -+ branchIndex = rem + z; -+ branch += x + z * nSub; -+ } -+ } -+ -+ branch++; /* my first born */ -+ nSub--; /* total # of my descendents */ -+ /* leaves + their parents may have # children < branchRatio */ -+ nBranch = (nSub < branchRatio) ? nSub : branchRatio; -+ -+ EPRINTF2 (DBG_FORWARD, "GenerateTree: parent=%d nBranch=%d\n", parent, nBranch); -+ -+ /* Now calculate the real elan id's of the parent and my children */ -+ if (parent == -1) -+ *parentp = ELAN_INVALID_NODE; -+ else -+ { -+ for (i = lowId, node = 0; i <= highId; i++) -+ { -+ if (BT_TEST(bitmap, i-lowId)) -+ if (node++ == parent) -+ break; -+ } -+ *parentp = i; -+ } -+ -+ for (i = lowId, branchIndex = 0, node = 0; branchIndex < nBranch && i <= highId; i++) -+ { -+ if (BT_TEST(bitmap, i-lowId)) -+ { -+ if (node == branch) -+ { -+ branch = branch + nSub / branchRatio + ((branchIndex < (nSub % branchRatio)) ? 1 : 0); -+ -+ childrenp[branchIndex++] = i; -+ } -+ node++; -+ } -+ } -+ -+ *nchildrenp = branchIndex; -+} -+ -+static void -+ForwardTxDone (EP_TXD *txd, void *arg, EP_STATUS status) -+{ -+ EP_FWD_DESC *desc = (EP_FWD_DESC *) arg; -+ EP_RXD *rxd = desc->Rxd; -+ EP_COMMS_SUBSYS *subsys = rxd->Rcvr->Subsys; -+ unsigned long flags; -+ -+ /* XXXX: if transmit fails, could step to next node in this subtree ? */ -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ -+ if (--desc->NumChildren > 0) -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ else -+ { -+ rxd->Rcvr->ForwardRxdCount--; -+ -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_FREE (desc, sizeof (EP_FWD_DESC)); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+long -+ep_forward_rxds (EP_COMMS_SUBSYS *subsys, long nextRunTime) -+{ -+ unsigned long flags; -+ int i, res; -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ while (! list_empty (&subsys->ForwardDescList)) -+ { -+ EP_RXD *rxd = (EP_RXD *) list_entry (subsys->ForwardDescList.next, EP_RXD, Link); -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_FWD_DESC *desc; -+ -+ EPRINTF2 (DBG_FORWARD, "ep: forwarding rxd %p to range %x\n", rxd, env->Range); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Rcvr->ForwardRxdCount++; -+ -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_ALLOC (desc, EP_FWD_DESC *, sizeof (EP_FWD_DESC), 1); -+ -+ if (desc == NULL) -+ { -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ rxd->Rcvr->ForwardRxdCount--; -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ /* compute the spanning tree for this message */ -+ unsigned int destLo = EP_RANGE_LOW (env->Range); -+ unsigned int destHi = EP_RANGE_HIGH (env->Range); -+ unsigned int parent; -+ -+ GenerateTree (subsys->Subsys.Sys->Position.pos_nodeid, destLo, destHi, rxdMain->Bitmap, &parent, desc->Children, &desc->NumChildren); -+ -+ if (desc->NumChildren == 0 || (epcomms_forward_limit && (rxd->Rcvr->ForwardRxdCount >= epcomms_forward_limit))) -+ { -+ EPRINTF5 (DBG_FORWARD, "ep; don't forward rxd %p to /%d (%d children/ %d forwarding (%d))\n", -+ rxd, rxd->Rcvr->Service, desc->NumChildren, rxd->Rcvr->ForwardRxdCount, epcomms_forward_limit); -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ rxd->Rcvr->ForwardRxdCount--; -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ KMEM_FREE (desc, sizeof (EP_FWD_DESC)); -+ -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ ep_nmd_subset (&desc->Data, &rxd->Data, 0, ep_rxd_len (rxd)); -+ desc->Rxd = rxd; -+ -+ /* NOTE - cannot access 'desc' after last call to multicast, since it could complete -+ * and free the desc before we access it again. Hence the reverse loop. */ -+ for (i = desc->NumChildren-1; i >= 0; i--) -+ { -+ ASSERT (desc->Children[i] < subsys->Subsys.Sys->Position.pos_nodes); -+ -+ EPRINTF3 (DBG_FORWARD, "ep: forwarding rxd %p to node %d/%d\n", rxd, desc->Children[i], rxd->Rcvr->Service); -+ -+ if ((res = ep_multicast_forward (subsys->ForwardXmtr, desc->Children[i], rxd->Rcvr->Service, 0, -+ ForwardTxDone, desc, env, EP_HAS_PAYLOAD(env->Attr) ? &rxdMain->Payload : NULL, -+ rxdMain->Bitmap, &desc->Data, 1)) != EP_SUCCESS) -+ { -+ ep_debugf (DBG_FORWARD, "ep: ep_multicast_forward failed\n"); -+ ForwardTxDone (NULL, desc, res); -+ } -+ } -+ -+ } -+ } -+ -+ spin_lock_irqsave (&subsys->ForwardDescLock, flags); -+ } -+ spin_unlock_irqrestore (&subsys->ForwardDescLock, flags); -+ -+ return (nextRunTime); -+} -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+void -+ep_csum_rxds (EP_COMMS_SUBSYS *subsys) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&subsys->CheckSumDescLock, flags); -+ while (! list_empty (&subsys->CheckSumDescList)) -+ { -+ EP_RXD *rxd = (EP_RXD *) list_entry (subsys->CheckSumDescList.next, EP_RXD, CheckSumLink); -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ list_del_init (&rxd->CheckSumLink); -+ spin_unlock_irqrestore (&subsys->CheckSumDescLock, flags); -+ -+ if (env->CheckSum) { -+ EP_NMD nmd; -+ uint32_t csum; -+ -+ ep_nmd_subset ( &nmd, &rxd->Data, 0, ep_rxd_len (rxd)); -+ -+ csum = ep_calc_check_sum(subsys->Subsys.Sys, env, &nmd, 1); -+ if ( env->CheckSum != csum ) { -+ int f; -+ -+ -+ printk("Check Sum Error: env(0x%x,0x%x) data(0x%x,0x%x)\n", ((csum >> 16) & 0x7FFF), ((env->CheckSum >> 16) & 0x7FFF), -+ (csum & 0xFFFF), (env->CheckSum & 0xFFFF)); -+ printk("Check Sum Error: Sent : NodeId %u Range 0x%x Service %u Version 0x%x Attr 0x%x\n", env->NodeId, env->Range, rxd->Rcvr->Service, env->Version, env->Attr); -+ printk("Check Sum Error: Sent : Xid Generation 0x%x Handle 0x%x Unique 0x%llx\n", env->Xid.Generation, env->Xid.Handle, env->Xid.Unique); -+ printk("Check Sum Error: Sent : TxdRail 0x%x TxdMain nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", env->TxdRail, env->TxdMain.nmd_addr, env->TxdMain.nmd_len, env->TxdMain.nmd_attr ); -+ printk("Check Sum Error: Sent : nFrags %d \n", env->nFrags); -+ for(f=0;fnFrags;f++) -+ printk("Check Sum Error: Sent (%d): nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", f, -+ env->Frags[f].nmd_addr, env->Frags[f].nmd_len, env->Frags[f].nmd_attr); -+ printk("Check Sum Error: Recv : nmd_addr 0x%x nmd_len %u nmd_attr 0x%x\n", -+ nmd.nmd_addr, nmd.nmd_len, nmd.nmd_attr); -+ -+ } -+ } -+ ep_rxd_received_now(rxd); -+ -+ spin_lock_irqsave (&subsys->CheckSumDescLock, flags); -+ } -+ spin_unlock_irqrestore (&subsys->CheckSumDescLock, flags); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsRx.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcommsRx.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsRx.c 2005-07-28 14:52:52.866676000 -0400 -@@ -0,0 +1,1205 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx.c,v 1.27.2.5 2004/11/30 12:02:16 mike Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx.c,v $*/ -+ -+#include -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+unsigned int ep_rxd_lowat = 5; -+ -+static int -+AllocateRxdBlock (EP_RCVR *rcvr, EP_ATTRIBUTE attr, EP_RXD **rxdp) -+{ -+ EP_RXD_BLOCK *blk; -+ EP_RXD *rxd; -+ EP_RXD_MAIN *pRxdMain; -+ int i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP_RXD_BLOCK *, sizeof (EP_RXD_BLOCK), ! (attr & EP_NO_SLEEP)); -+ -+ if (blk == NULL) -+ return (ENOMEM); -+ -+ if ((pRxdMain = ep_shared_alloc_main (rcvr->Subsys->Subsys.Sys, EP_RXD_MAIN_SIZE * EP_NUM_RXD_PER_BLOCK, attr, &blk->NmdMain)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP_RXD_BLOCK)); -+ return (ENOMEM); -+ } -+ -+ for (rxd = &blk->Rxd[0], i = 0; i < EP_NUM_RXD_PER_BLOCK; i++, rxd++) -+ { -+ rxd->Rcvr = rcvr; -+ rxd->RxdMain = pRxdMain; -+ -+ ep_nmd_subset (&rxd->NmdMain, &blk->NmdMain, (i * EP_RXD_MAIN_SIZE), EP_RXD_MAIN_SIZE); -+ -+ /* move onto next descriptor */ -+ pRxdMain = (EP_RXD_MAIN *) ((unsigned long) pRxdMain + EP_RXD_MAIN_SIZE); -+ } -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &rcvr->DescBlockList); -+ -+ rcvr->TotalDescCount += EP_NUM_RXD_PER_BLOCK; -+ -+ for (i = rxdp ? 1 : 0; i < EP_NUM_RXD_PER_BLOCK; i++) -+ { -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&blk->Rxd[i].CheckSumLink); -+#endif -+ -+ list_add (&blk->Rxd[i].Link, &rcvr->FreeDescList); -+ -+ rcvr->FreeDescCount++; -+ -+ if (rcvr->FreeDescWanted) -+ { -+ rcvr->FreeDescWanted--; -+ kcondvar_wakeupone (&rcvr->FreeDescSleep, &rcvr->FreeDescLock); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (rxdp) -+ { -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ INIT_LIST_HEAD (&blk->Rxd[0].CheckSumLink); -+#endif -+ -+ *rxdp = &blk->Rxd[0]; -+ } -+ return (ESUCCESS); -+} -+ -+static void -+FreeRxdBlock (EP_RCVR *rcvr, EP_RXD_BLOCK *blk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ rcvr->TotalDescCount -= EP_NUM_RXD_PER_BLOCK; -+ rcvr->FreeDescCount -= EP_NUM_RXD_PER_BLOCK; -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ ep_shared_free_main (rcvr->Subsys->Subsys.Sys, &blk->NmdMain); -+ KMEM_FREE (blk, sizeof (EP_RXD_BLOCK)); -+} -+ -+static EP_RXD * -+GetRxd (EP_RCVR *rcvr, EP_ATTRIBUTE attr) -+{ -+ EP_RXD *rxd; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+ while (list_empty (&rcvr->FreeDescList)) -+ { -+ if (! (attr & EP_NO_ALLOC)) -+ { -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (AllocateRxdBlock (rcvr, attr, &rxd) == ESUCCESS) -+ return (rxd); -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ } -+ -+ if (attr & EP_NO_SLEEP) -+ { -+ IncrStat (rcvr->Subsys, NoFreeRxds); -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ return (NULL); -+ } -+ -+ rcvr->FreeDescWanted++; -+ kcondvar_wait (&rcvr->FreeDescSleep, &rcvr->FreeDescLock, &flags); -+ } -+ -+ rxd = list_entry (rcvr->FreeDescList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (--rcvr->FreeDescCount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ -+ return (rxd); -+} -+ -+static void -+FreeRxd (EP_RCVR *rcvr, EP_RXD *rxd) -+{ -+ unsigned long flags; -+ -+ ASSERT (EP_XID_INVALID(rxd->MsgXid)); -+ -+ spin_lock_irqsave (&rcvr->FreeDescLock, flags); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ASSERT(list_empty(&rxd->CheckSumLink)); -+#endif -+ -+ list_add (&rxd->Link, &rcvr->FreeDescList); -+ -+ rcvr->FreeDescCount++; -+ -+ if (rcvr->FreeDescWanted) /* someone waiting for a receive */ -+ { /* descriptor, so wake them up */ -+ rcvr->FreeDescWanted--; -+ kcondvar_wakeupone (&rcvr->FreeDescSleep, &rcvr->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+} -+ -+int -+ep_queue_receive (EP_RCVR *rcvr, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr) -+{ -+ EP_RCVR_RAIL *rcvrRail; -+ EP_RXD *rxd; -+ int rnum; -+ unsigned long flags; -+ -+ if ((rxd = GetRxd (rcvr, attr)) == NULL) -+ return (ENOMEM); -+ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ rxd->Data = *nmd; -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ list_add_tail (&rxd->Link, &rcvr->ActiveDescList); -+ -+ if (EP_IS_PREFRAIL_SET(attr)) -+ rnum = EP_ATTR2PREFRAIL(attr); -+ else -+ rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(nmd)); -+ -+ if (rnum < 0 || !(EP_NMD_RAILMASK(nmd) & EP_RAIL2RAILMASK(rnum) & rcvr->RailMask)) -+ rcvrRail = NULL; -+ else -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ EPRINTF7 (DBG_RCVR,"ep_queue_receive: rxd=%p svc %d nmd=%08x,%d,%x rnum=%d rcvrRail=%p\n", -+ rxd, rcvr->Service, nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, rnum, rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if (rcvrRail == NULL || !EP_RCVR_OP (rcvrRail, QueueRxd) (rxd, rcvrRail)) -+ { -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_requeue_receive (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ int rnum = ep_pickRail(EP_NMD_RAILMASK(&rxd->Data)); -+ EP_RCVR_RAIL *rcvrRail; -+ unsigned long flags; -+ -+ ASSERT (rxd->RxdRail == NULL); -+ -+ EPRINTF5 (DBG_RCVR,"ep_requeue_receive: rxd=%p svc %d nmd=%08x,%d,%x\n", -+ rxd, rcvr->Service, nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ rxd->Data = *nmd; -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ list_add_tail (&rxd->Link, &rcvr->ActiveDescList); -+ -+ /* -+ * Rail selection: if they've asked for a particular rail, then use it, otherwise if -+ * the rail it was last received on is mapped for the nmd and is available -+ * then use that one, otherwise pick one that is mapped by the nmd. -+ */ -+ if (EP_IS_PREFRAIL_SET(attr)) -+ rnum = EP_ATTR2PREFRAIL(attr); -+ -+ if (rnum < 0 || ! (EP_RAIL2RAILMASK (rnum) & EP_NMD_RAILMASK(nmd) & ep_rcvr_availrails (rcvr))) -+ rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(nmd)); -+ -+ if (rnum < 0) -+ rcvrRail = NULL; -+ else -+ { -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ if (! (EP_NMD_RAILMASK(&rxd->Data) & EP_RAIL2RAILMASK(rnum)) && ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rnum)) < 0) -+ rcvrRail = NULL; -+ } -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if (rcvrRail == NULL || !EP_RCVR_OP(rcvrRail, QueueRxd) (rxd, rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR, "ep_requeue_receive: rcvrRail=%p - setting unbound\n", rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ if (rcvr->CleanupWaiting) -+ kcondvar_wakeupall (&rcvr->CleanupSleep, &rcvr->Lock); -+ rcvr->CleanupWaiting = 0; -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ -+ep_complete_receive (EP_RXD *rxd) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ ASSERT (rxd->RxdRail == NULL && rxd->State == EP_RXD_COMPLETED); -+ -+ FreeRxd (rcvr, rxd); -+ -+ /* if we're waiting for cleanup, then wake them up */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ if (rcvr->CleanupWaiting) -+ kcondvar_wakeupall (&rcvr->CleanupSleep, &rcvr->Lock); -+ rcvr->CleanupWaiting = 0; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+int -+ep_rpc_put (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *local, EP_NMD *remote, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_rpc_put: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ /* rxd no longer on active list - just free it */ -+ /* off and return an error */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcPut) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * rail or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_rpc_put: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_PUT_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_SUCCESS; -+} -+ -+int -+ep_rpc_get (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *remote, EP_NMD *local, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_rpc_get: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcGet) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * node or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_rpc_get: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_GET_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return EP_SUCCESS; -+} -+ -+int -+ep_complete_rpc (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_STATUSBLK *blk, EP_NMD *local, EP_NMD *remote, int nFrags) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ if (rxd->State == EP_RXD_BEEN_ABORTED) -+ { -+ EPRINTF2 (DBG_RCVR, "ep_complete_rpc: rcvr %p rxd %p completed because no rails available\n", rcvr, rxd); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return EP_CONN_RESET; -+ } -+ else -+ { -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->CommsRail; -+ EP_RAIL *rail = commsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[env->NodeId]; -+ int i; -+ -+ if (blk == NULL) -+ bzero (&rxd->RxdMain->StatusBlk, sizeof (EP_STATUSBLK)); -+ else -+ bcopy (blk, &rxd->RxdMain->StatusBlk, sizeof (EP_STATUSBLK)); -+ -+ /* Attempt to ensure that the local nmds are mapped */ -+ for (i = 0; i < nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ ep_nmd_map_rails (sys, &local[i], EP_RAIL2RAILMASK(rail->Number)); -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* rail is connected */ -+ (ep_nmd2railmask (local, nFrags) & ep_nmd2railmask (remote, nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* and NMDs valid for it */ -+ { -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcComplete) (rxd, local, remote, nFrags); -+ } -+ else -+ { -+ /* RPC completion cannot progress - either node is no longer connected on this -+ * node or some of the source/destination NMDs are not mapped on this rail. -+ * Save the NMDs into the RXD and schedule the thread to request mappings */ -+ EPRINTF4 (DBG_RCVR, "%s: ep_complete_rpc: rcvr %p rxd %p %s\n", rail->Name, rcvr, rxd, -+ (nodeRail->State == EP_NODE_CONNECTED) ? "NMDs not valid on this rail" : "no longer connected on this rail"); -+ -+ rxd->State = EP_RXD_COMPLETE_STALLED; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+ -+ /* install the handler */ -+ rxd->Handler = handler; -+ rxd->Arg = arg; -+ -+ /* store the arguements */ -+ rxd->nFrags = nFrags; -+ for (i = 0; i < nFrags; i++) -+ { -+ rxd->Local[i] = local[i]; -+ rxd->Remote[i] = remote[i]; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (ESUCCESS); -+} -+ -+/* functions for accessing fields of rxds */ -+void *ep_rxd_arg(EP_RXD *rxd) { return (rxd->Arg); } -+int ep_rxd_len(EP_RXD *rxd) { return (rxd->RxdMain->Len); } -+EP_STATUS ep_rxd_status(EP_RXD *rxd) { return (rxd->RxdMain->Len < 0 ? rxd->RxdMain->Len : EP_SUCCESS); } -+int ep_rxd_isrpc(EP_RXD *rxd) { return (EP_IS_RPC(rxd->RxdMain->Envelope.Attr) != 0); } -+EP_ENVELOPE *ep_rxd_envelope(EP_RXD *rxd) { return (&rxd->RxdMain->Envelope); } -+EP_PAYLOAD *ep_rxd_payload(EP_RXD *rxd) { return (EP_HAS_PAYLOAD(rxd->RxdMain->Envelope.Attr) ? &rxd->RxdMain->Payload : NULL); } -+int ep_rxd_node(EP_RXD *rxd) { return (rxd->RxdMain->Envelope.NodeId); } -+EP_STATUSBLK *ep_rxd_statusblk(EP_RXD *rxd) { return (&rxd->RxdMain->StatusBlk); } -+EP_RAILMASK ep_rxd_railmask(EP_RXD *rxd) { return (rxd->Data.nmd_attr); } -+ -+static void -+ProcessNmdMapResponse (EP_RCVR *rcvr, EP_RXD *rxd, EP_MANAGER_MSG *msg) -+{ -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[rxd->RxdMain->Envelope.NodeId]; -+ int i; -+ -+ ASSERT (msg->Body.MapNmd.nFrags == rxd->nFrags); -+ -+ for (i = 0; i < rxd->nFrags; i++) -+ rxd->Remote[i] = msg->Body.MapNmd.Nmd[i]; -+ -+ if (nodeRail->State == EP_NODE_CONNECTED && /* node is still connected on this rail */ -+ (ep_nmd2railmask (rxd->Local, rxd->nFrags) & ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) /* NMDs are now valid for this rail */ -+ { -+ switch (rxd->State) -+ { -+ case EP_RXD_PUT_STALLED: -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcPut) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ case EP_RXD_GET_STALLED: -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcGet) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ case EP_RXD_COMPLETE_STALLED: -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP(rcvrRail, RpcComplete) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ break; -+ -+ default: -+ panic ("ProcessNmdMapResponse: XID match but rxd in invalid state\n"); -+ break; -+ } -+ -+ rxd->NextRunTime = 0; -+ } -+ else -+ ep_debugf (DBG_MANAGER, "%s: ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p - still cannot proceed\n", rail->Name, rcvr, rxd); -+} -+ -+static void -+ProcessFailoverResponse (EP_RCVR *rcvr, EP_RXD *rxd, EP_MANAGER_MSG *msg) -+{ -+ /* XXXX - TBD */ -+#ifdef NOTYET -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ EP_RCVR_RAIL *nRcvrRail; -+ EP_RXD_RAIL *nRxdRail; -+ -+ ASSERT (rxd->RxdMain->Envelope.Attr & EP_RPC); -+ -+ EPRINTF6 (DBG_RCVR, "ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p Xid=%016llx state %x.%x - txd on rail %d\n", rcvr, rxd, -+ rxd->MsgXid.Unique, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, msg->Body.FailoverTxd.Rail); -+ -+ if ((nRcvrRail = rcvr->Rails[msg->Body.FailoverTxd.Rail]) == NULL || -+ (nRcvrRail->Rcvr->RailMask & EP_RAIL2RAILMASK (rail->Number)) == NULL) -+ { -+ ep_debugf (DBG_MANAGER, "%s: ep_rcvr_xid_msg_handler: rcvr=%p rxd=%p - still cannot proceed\n", rail->Name, rcvr,rxd); -+ return; -+ } -+ -+ -+ nRxdRail = EP_RCVR_OP (nrcvrRail, GetRxd) (rcvr, nRcvrRail); -+ -+ -+ /* If the RPC was in progress, then rollback and mark it as flagged, -+ * this will then get treated as though the NMDs were not mapped -+ * for the rail when the user initiated the operation. -+ */ -+ switch (rxdRail->RxdMain->DataEvent) -+ { -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_PUT: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_PUT: -+ ASSERT (rxdRail->RxdMain->DoneEvent == EP_EVENT_PRIVATE || -+ rxdRail->RxdMain->DoneEvent == EP_EVENT_PENDING); -+ -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_PUT; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_PENDING; -+ break; -+ -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_GET: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_GET: -+ ASSERT (rxdRail->RxdMain->DoneEvent == EP_EVENT_PRIVATE || -+ rxdRail->RxdMain->DoneEvent == EP_EVENT_PENDING); -+ -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_GET; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_PENDING; -+ break; -+ -+ case EP_EVENT_PRIVATE: -+ switch (rxdRail->RxdMain->DoneEvent) -+ { -+ case EP_EVENT_ACTIVE|EP_RXD_PHASE_COMPLETE: -+ case EP_EVENT_FLAGGED|EP_RXD_PHASE_COMPLETE: -+ nRxdRail->RxdMain->DataEvent = EP_EVENT_PRIVATE; -+ nRxdRail->RxdMain->DoneEvent = EP_EVENT_FLAGGED|EP_RXD_PHASE_COMPLETE; -+ break; -+ -+ case EP_EVENT_PENDING: -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: rxd in invalid state\n"); -+ } -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: rxd in invalid staten"); -+ } -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* Mark rxdRail as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP_EVENT_PRIVATE; -+ -+ sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ BindRxdToRail (rxd, nRxdRail); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+#endif -+} -+ -+void -+ep_rcvr_xid_msg_handler (void *arg, EP_MANAGER_MSG *msg) -+{ -+ EP_RCVR *rcvr = (EP_RCVR *) arg; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el,EP_RXD, Link); -+ -+ if (EP_XIDS_MATCH (msg->Hdr.Xid, rxd->MsgXid)) -+ { -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE: -+ ProcessNmdMapResponse (rcvr, rxd, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE: -+ ProcessFailoverResponse (rcvr, rxd, msg); -+ break; -+ -+ default: -+ panic ("ep_rcvr_xid_msg_handler: XID match but invalid message type\n"); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+ -+EP_RCVR * -+ep_alloc_rcvr (EP_SYS *sys, EP_SERVICE svc, unsigned int nenvs) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_RCVR *rcvr; -+ struct list_head *el; -+ extern int portals_envelopes; -+ -+ if (portals_envelopes && (svc == EP_MSG_SVC_PORTALS_SMALL || svc == EP_MSG_SVC_PORTALS_LARGE)) -+ { -+ printk ("ep: use %d envelopes rather than %d for portals %s message service\n", sys->Position.pos_nodes * 16, nenvs, -+ svc == EP_MSG_SVC_PORTALS_SMALL ? "small" : "large"); -+ -+ nenvs = portals_envelopes; -+ } -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME)) == NULL) -+ return (NULL); -+ -+ KMEM_ZALLOC (rcvr, EP_RCVR *, sizeof (EP_RCVR), 1); -+ -+ if (rcvr == NULL) -+ return (NULL); -+ -+ rcvr->Subsys = subsys; -+ rcvr->Service = svc; -+ rcvr->InputQueueEntries = nenvs; -+ rcvr->FreeDescCount = 0; -+ rcvr->TotalDescCount = 0; -+ rcvr->ForwardRxdCount = 0; -+ -+ spin_lock_init (&rcvr->Lock); -+ INIT_LIST_HEAD (&rcvr->ActiveDescList); -+ -+ kcondvar_init (&rcvr->CleanupSleep); -+ kcondvar_init (&rcvr->FreeDescSleep); -+ spin_lock_init (&rcvr->FreeDescLock); -+ INIT_LIST_HEAD (&rcvr->FreeDescList); -+ INIT_LIST_HEAD (&rcvr->DescBlockList); -+ -+ ep_xid_cache_init (sys, &rcvr->XidCache); -+ -+ rcvr->XidCache.MessageHandler = ep_rcvr_xid_msg_handler; -+ rcvr->XidCache.Arg = rcvr; -+ -+ kmutex_lock (&subsys->Lock); -+ /* See if this service is already in use */ -+ list_for_each (el, &subsys->Receivers) { -+ EP_RCVR *rcvr = list_entry (el, EP_RCVR, Link); -+ -+ if (rcvr->Service == svc) -+ { -+ KMEM_FREE (rcvr, sizeof (EP_RCVR)); -+ kmutex_unlock (&subsys->Lock); -+ return NULL; -+ } -+ } -+ -+ -+ list_add_tail (&rcvr->Link, &subsys->Receivers); -+ -+ ep_procfs_rcvr_add(rcvr); -+ -+ /* Now add all rails which are already started */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.AddRail) (rcvr, commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_mod_inc_usecount(); -+ -+ return (rcvr); -+} -+ -+void -+ep_free_rcvr (EP_RCVR *rcvr) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head list; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP (commsRail, Rcvr.DelRail) (rcvr, commsRail); -+ } -+ -+ ep_procfs_rcvr_del(rcvr); -+ -+ list_del (&rcvr->Link); -+ kmutex_unlock (&subsys->Lock); -+ -+ INIT_LIST_HEAD (&list); -+ -+ /* abort all rxds - should not be bound to a rail */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (! list_empty (&rcvr->ActiveDescList)) -+ { -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ -+ ASSERT (rxd->RxdRail == NULL); -+ ASSERT (rxd->RxdMain->Len == EP_RXD_PENDING); -+ -+ rxd->State = EP_RXD_COMPLETED; -+ rxd->RxdMain->Len = EP_SHUTDOWN; -+ -+ list_del (&rxd->Link); -+ list_add_tail (&rxd->Link, &list); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&list)) -+ { -+ EP_RXD *rxd = list_entry (list.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ if (rxd->Handler) -+ rxd->Handler (rxd); -+ } -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ continue; -+ } -+ -+ if (rcvr->FreeDescCount == rcvr->TotalDescCount) -+ break; -+ -+ rcvr->CleanupWaiting++; -+ kcondvar_wait (&rcvr->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* must all be in free list */ -+ ASSERT( rcvr->FreeDescCount == rcvr->TotalDescCount); -+ -+ while (! list_empty(& rcvr->DescBlockList) ) -+ FreeRxdBlock (rcvr, list_entry (rcvr->DescBlockList.next, EP_RXD_BLOCK, Link)); -+ -+ /* had better be all gone now */ -+ ASSERT((rcvr->FreeDescCount == 0) && (rcvr->TotalDescCount == 0)); -+ -+ ep_xid_cache_destroy (sys, &rcvr->XidCache); -+ -+ spin_lock_destroy (&rcvr->Lock); -+ KMEM_FREE (rcvr, sizeof (EP_RCVR)); -+ -+ ep_mod_dec_usecount(); -+} -+ -+EP_RXD * -+StealRxdFromOtherRail (EP_RCVR *rcvr) -+{ -+ EP_RXD *rxd; -+ int i; -+ -+ /* looking at the the rcvr railmask to find a rail to try to steal rxd from */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->RailMask & (1 << i) ) -+ if ((rxd = EP_RCVR_OP (rcvr->Rails[i], StealRxd) (rcvr->Rails[i])) != NULL) -+ return rxd; -+ -+ return NULL; -+} -+ -+long -+CheckUnboundRxd (EP_RCVR *rcvr, EP_RXD *rxd, long nextRunTime) -+{ -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_RCVR_RAIL *rcvrRail; -+ int rnum; -+ -+ if ((rnum = ep_rcvr_prefrail (rcvr, EP_NMD_RAILMASK(&rxd->Data))) < 0) -+ rnum = ep_rcvr_prefrail (rcvr, ep_rcvr_availrails (rcvr)); -+ -+ if ( rnum < 0 ) { -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ -+ return (nextRunTime); -+ } -+ -+ ASSERT ( rnum >= 0 ); -+ -+ rcvrRail = rcvr->Rails[rnum]; -+ -+ ASSERT ( rcvrRail != NULL); -+ -+ rxd->State = EP_RXD_RECEIVE_ACTIVE; -+ -+ if ((!(EP_NMD_RAILMASK (&rxd->Data) & EP_RAIL2RAILMASK(rnum)) && /* not mapped already and */ -+ ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rnum)) == 0) || /* failed mapping, or */ -+ !EP_RCVR_OP (rcvrRail, QueueRxd) (rxd, rcvrRail)) /* failed to queue */ -+ { -+ ASSERT (rxd->RxdRail == NULL); -+ -+ EPRINTF4 (DBG_RCVR,"CheckUnboundRxd: rcvr=%p rxd=%p -> rnum=%d rcvrRail=%p (failed)\n", rcvr, rxd, rnum, rcvrRail); -+ -+ rxd->State = EP_RXD_RECEIVE_UNBOUND; -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return (nextRunTime); -+} -+ -+int -+CheckRxdNmdsMapped (EP_RCVR *rcvr, EP_RXD *rxd) -+{ -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_SYS *sys = rcvr->Subsys->Subsys.Sys; -+ EP_RAIL *rail = rxdRail->RcvrRail->CommsRail->Rail; -+ int i; -+ -+ /* Try and map the local NMDs before checking to see if we can proceed */ -+ if (! (ep_nmd2railmask (rxd->Local, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ EPRINTF3 (DBG_MAPNMD, "%s: rcvr=%p rxd=%p RPC Local NMDs not mapped\n", rail->Name, rcvr, rxd); -+ -+ for (i = 0; i < rxd->nFrags; i++) -+ if (! (EP_NMD_RAILMASK(&rxd->Local[i]) & EP_RAIL2RAILMASK(rail->Number))) -+ if (ep_nmd_map_rails (sys, &rxd->Local[i], EP_RAIL2RAILMASK(rail->Number))) -+ rxd->NextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ /* Try and map remote NMDs if they are not valid for this rail */ -+ if (! (ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ EP_MANAGER_MSG_BODY msgBody; -+ -+ EPRINTF3 (DBG_MAPNMD, "%s: rcvr=%p rxd=%p RPC Remote NMDs not mapped\n", rail->Name, rcvr, rxd); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ msgBody.MapNmd.nFrags = rxd->nFrags; -+ msgBody.MapNmd.Railmask = EP_RAIL2RAILMASK (rail->Number); -+ for (i = 0; i < rxd->nFrags; i++) -+ msgBody.MapNmd.Nmd[i] = rxd->Remote[i]; -+ -+ if (ep_send_message (rail, env->NodeId, EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST, rxd->MsgXid, &msgBody) == 0) -+ rxd->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ rxd->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ -+ return 0; -+ } -+ -+ if ((ep_nmd2railmask (rxd->Local, rxd->nFrags) & ep_nmd2railmask (rxd->Remote, rxd->nFrags) & EP_RAIL2RAILMASK (rail->Number)) != 0) -+ { -+ rxd->NextRunTime = 0; -+ return 1; -+ } -+ -+ return 0; -+} -+ -+long -+ep_check_rcvr (EP_RCVR *rcvr, long nextRunTime) -+{ -+ struct list_head *el, *nel; -+ unsigned long flags; -+ int i; -+ -+ /* Check to see if we're low on rxds */ -+ if (rcvr->FreeDescCount < ep_rxd_lowat) -+ AllocateRxdBlock (rcvr, 0, NULL); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->RailMask & (1 << i) ) -+ nextRunTime = EP_RCVR_OP (rcvr->Rails[i], Check) (rcvr->Rails[i], nextRunTime); -+ -+ /* See if we have any rxd's which need to be handled */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ if (rxdRail == NULL) -+ nextRunTime = CheckUnboundRxd (rcvr, rxd, nextRunTime); -+ else -+ { -+ EP_RCVR_RAIL *rcvrRail = rxdRail->RcvrRail; -+ EP_RAIL *rail = rcvrRail->CommsRail->Rail; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || /* envelope not received yet */ -+ rail->Nodes[env->NodeId].State != EP_NODE_CONNECTED) /* will be failing over */ -+ continue; -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_PUT_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_PUT_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcPut) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ -+ case EP_RXD_GET_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_GET_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcGet) (rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ -+ case EP_RXD_COMPLETE_STALLED: -+ if (CheckRxdNmdsMapped (rcvr, rxd)) -+ { -+ rxd->State = EP_RXD_COMPLETE_ACTIVE; -+ -+ EP_RCVR_OP (rcvrRail, RpcComplete)(rxd, rxd->Local, rxd->Remote, rxd->nFrags); -+ } -+ break; -+ } -+ -+ if (rxd->NextRunTime && (nextRunTime == 0 || AFTER (nextRunTime, rxd->NextRunTime))) -+ nextRunTime = rxd->NextRunTime; -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return (nextRunTime); -+} -+ -+void -+ep_display_rxd (DisplayInfo *di, EP_RXD *rxd) -+{ -+ EP_RXD_MAIN *rxdMain = rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ (di->func)(di->arg, " RXD: %p State=%x RxdMain=%p(%x.%x.%x) Data=%x.%x.%x %s\n", rxd, -+ rxd->State, rxd->RxdMain, rxd->NmdMain.nmd_addr, rxd->NmdMain.nmd_len, -+ rxd->NmdMain.nmd_attr, rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, -+ rxd->RxdMain->Len == EP_RXD_PENDING ? "Pending" : "Active"); -+ (di->func)(di->arg, " NodeId=%d Range=%d.%d TxdRail=%x TxdMain=%x.%x.%x nFrags=%d XID=%08x.%08x.%016llx\n", -+ env->NodeId, EP_RANGE_LOW(env->Range), EP_RANGE_HIGH(env->Range), env->TxdRail, env->TxdMain.nmd_addr, -+ env->TxdMain.nmd_len, env->TxdMain.nmd_attr, env->nFrags, env->Xid.Generation, env->Xid.Handle, env->Xid.Unique);; -+ (di->func)(di->arg, " Frag[0] %08x.%08x.%08x\n", env->Frags[0].nmd_addr, env->Frags[0].nmd_len, env->Frags[0].nmd_attr); -+ (di->func)(di->arg, " Frag[1] %08x.%08x.%08x\n", env->Frags[1].nmd_addr, env->Frags[1].nmd_len, env->Frags[1].nmd_attr); -+ (di->func)(di->arg, " Frag[2] %08x.%08x.%08x\n", env->Frags[2].nmd_addr, env->Frags[2].nmd_len, env->Frags[2].nmd_attr); -+ (di->func)(di->arg, " Frag[3] %08x.%08x.%08x\n", env->Frags[3].nmd_addr, env->Frags[3].nmd_len, env->Frags[3].nmd_attr); -+ -+ if (rxdRail) EP_RCVR_OP (rxdRail->RcvrRail, DisplayRxd) (di, rxdRail); -+} -+ -+void -+ep_display_rcvr (DisplayInfo *di, EP_RCVR *rcvr, int full) -+{ -+ int freeCount = 0; -+ int activeCount = 0; -+ int pendingCount = 0; -+ int railCounts[EP_MAX_RAILS]; -+ struct list_head *el; -+ int i; -+ unsigned long flags; -+ -+ for (i = 0; i FreeDescLock, flags); -+ list_for_each (el, &rcvr->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&rcvr->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP_RXD_RAIL *rxdRail = rxd->RxdRail; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ pendingCount++; -+ else -+ activeCount++; -+ -+ if (rxdRail) -+ railCounts[rxdRail->RcvrRail->CommsRail->Rail->Number]++; -+ } -+ -+ (di->func)(di->arg, "RCVR: rcvr=%p number=%d\n", rcvr, rcvr->Service); -+ (di->func)(di->arg, " RXDS Free=%d (%d) Pending=%d Active=%d Rails=%d.%d.%d.%d\n", -+ freeCount, rcvr->FreeDescCount, pendingCount, activeCount, railCounts[0], railCounts[1], -+ railCounts[2], railCounts[3]); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (rcvr->Rails[i] != NULL) -+ EP_RCVR_OP (rcvr->Rails[i], DisplayRcvr) (di, rcvr->Rails[i]); -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ -+ if (rxd->RxdMain->Len != EP_RXD_PENDING || full) -+ ep_display_rxd (di, rxd); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep_rxd_received_now(EP_RXD *rxd) -+{ -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ INC_STAT(rcvr->stats,rx); -+ ADD_STAT(rcvr->stats,rx_len, rxd->RxdMain->Len); -+ -+ if (rxd->RxdMain->Len < 0 || !EP_IS_MULTICAST(env->Attr)) -+ { -+ rxd->Handler (rxd); -+ } -+ else -+ { -+ EPRINTF5 (DBG_RCVR, "ep_rxd_received: forward rxd=%p Data=%08x.%08x.%08x len=%d\n", rxd, -+ rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, ep_rxd_len(rxd)); -+ -+ spin_lock_irqsave (&rcvr->Subsys->ForwardDescLock, flags); -+ list_add_tail (&rxd->Link, &rcvr->Subsys->ForwardDescList); -+ spin_unlock_irqrestore (&rcvr->Subsys->ForwardDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+} -+ -+#if defined(CONFIG_EP_NO_CHECK_SUM) -+void -+ep_rxd_received(EP_RXD *rxd) -+{ -+ ep_rxd_received_now(rxd); -+} -+ -+#else -+ -+void -+ep_rxd_received(EP_RXD *rxd) -+{ -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ if (env->CheckSum) -+ ep_rxd_queue_csum(rxd); -+ else -+ ep_rxd_received_now(rxd); -+} -+ -+void -+ep_rxd_queue_csum(EP_RXD *rxd) -+{ -+ EP_RCVR *rcvr = rxd->Rcvr; -+ unsigned long flags; -+ -+ EPRINTF5 (DBG_RCVR, "ep_rxd_queue_csum: rxd=%p Data=%08x.%08x.%08x len=%d\n", rxd, -+ rxd->Data.nmd_addr, rxd->Data.nmd_len, rxd->Data.nmd_attr, ep_rxd_len(rxd)); -+ -+ spin_lock_irqsave (&rcvr->Subsys->CheckSumDescLock, flags); -+ list_add_tail (&rxd->CheckSumLink, &rcvr->Subsys->CheckSumDescList); -+ spin_unlock_irqrestore (&rcvr->Subsys->CheckSumDescLock, flags); -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+} -+#endif -+ -+void -+ep_rcvr_fillout_stats(EP_RCVR *rcvr, char *str) -+{ -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(rcvr->stats,rx), GET_STAT_PER_SEC(rcvr->stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(rcvr->stats,rx_len) / (1024*1024), GET_STAT_PER_SEC(rcvr->stats,rx_len) / (1024*1024)); -+} -+ -+void -+ep_rcvr_rail_fillout_stats(EP_RCVR_RAIL *rcvr_rail, char *str) -+{ -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(rcvr_rail->stats,rx), GET_STAT_PER_SEC(rcvr_rail->stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(rcvr_rail->stats,rx_len) / (1024*1024), GET_STAT_PER_SEC(rcvr_rail->stats,rx_len) / (1024*1024)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsRx_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcommsRx_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsRx_elan3.c 2005-07-28 14:52:52.870675392 -0400 -@@ -0,0 +1,1776 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx_elan3.c,v 1.19.2.4 2005/03/10 15:24:08 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#define RCVR_TO_RAIL(rcvrRail) ((EP3_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail->Rail) -+#define RCVR_TO_DEV(rcvrRail) (RCVR_TO_RAIL(rcvrRail)->Device) -+#define RCVR_TO_SUBSYS(rcvrRail) (((EP_RCVR_RAIL *) rcvrRail)->Rcvr->Subsys) -+ -+static void RxDataEvent (EP3_RAIL *rail, void *arg); -+static void RxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void RxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS RxDataCookieOps = -+{ -+ RxDataEvent, -+ RxDataRetry, -+ NULL, /* DmaCancelled */ -+ RxDataVerify, -+}; -+ -+static void RxDoneEvent (EP3_RAIL *rail, void *arg); -+static void RxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void RxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS RxDoneCookieOps = -+{ -+ RxDoneEvent, -+ RxDoneRetry, -+ NULL, /* DmaCancelled */ -+ RxDoneVerify, -+}; -+ -+static int -+AllocateRxdRailBlock (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RXD_RAIL_BLOCK *blk; -+ EP3_RXD_RAIL *rxdRail; -+ sdramaddr_t pRxdElan; -+ EP3_RXD_RAIL_MAIN *pRxdMain; -+ E3_Addr pRxdElanAddr; -+ E3_Addr pRxdMainAddr; -+ E3_BlockCopyEvent event; -+ int i, j; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP3_RXD_RAIL_BLOCK *, sizeof (EP3_RXD_RAIL_BLOCK), 1); -+ if (blk == NULL) -+ return 0; -+ -+ if ((pRxdElan = ep_alloc_elan (&rail->Generic, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK, 0, &pRxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((pRxdMain = ep_alloc_main (&rail->Generic, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK, 0, &pRxdMainAddr)) == (sdramaddr_t) 0) -+ { -+ ep_free_elan (&rail->Generic, pRxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ReserveDmaRetries (rail, EP3_NUM_RXD_PER_BLOCK, 0) != ESUCCESS) -+ { -+ ep_free_main (&rail->Generic, pRxdMainAddr, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, pRxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (rxdRail = &blk->Rxd[0], i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rxdRail->Generic.RcvrRail = (EP_RCVR_RAIL *) rcvrRail; -+ rxdRail->RxdElan = pRxdElan; -+ rxdRail->RxdElanAddr = pRxdElanAddr; -+ rxdRail->RxdMain = pRxdMain; -+ rxdRail->RxdMainAddr = pRxdMainAddr; -+ -+ elan3_sdram_writel (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, RxdMain), 0); -+ elan3_sdram_writel (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next), 0); -+ elan3_sdram_writeq (dev, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr), (long) rxdRail); -+ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ { -+ RegisterCookie (&rail->CookieTable, &rxdRail->ChainCookie[j], pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[j]), &RxDataCookieOps, (void *) rxdRail); -+ -+ event.ev_Type = EV_TYPE_DMA | (pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[j+1])); -+ event.ev_Count = 0; -+ -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[j]), sizeof (E3_BlockCopyEvent)); -+ } -+ -+ RegisterCookie (&rail->CookieTable, &rxdRail->DataCookie, pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), &RxDataCookieOps, (void *) rxdRail); -+ RegisterCookie (&rail->CookieTable, &rxdRail->DoneCookie, pRxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), &RxDoneCookieOps, (void *) rxdRail); -+ -+ EP3_INIT_COPY_EVENT (event, rxdRail->DataCookie, pRxdMainAddr + offsetof (EP3_RXD_RAIL_MAIN, DataEvent), 1); -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, rxdRail->DoneCookie, pRxdMainAddr + offsetof (EP3_RXD_RAIL_MAIN, DoneEvent), 1); -+ elan3_sdram_copyl_to_sdram (dev, &event, pRxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), sizeof (E3_BlockCopyEvent)); -+ -+ pRxdMain->DataEvent = EP3_EVENT_FREE; -+ pRxdMain->DoneEvent = EP3_EVENT_FREE; -+ -+ /* move onto next descriptor */ -+ pRxdElan += EP3_RXD_RAIL_ELAN_SIZE; -+ pRxdElanAddr += EP3_RXD_RAIL_ELAN_SIZE; -+ pRxdMain = (EP3_RXD_RAIL_MAIN *) ((unsigned long) pRxdMain + EP3_RXD_RAIL_MAIN_SIZE); -+ pRxdMainAddr += EP3_RXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &rcvrRail->DescBlockList); -+ rcvrRail->TotalDescCount += EP3_NUM_RXD_PER_BLOCK; -+ rcvrRail->FreeDescCount += EP3_NUM_RXD_PER_BLOCK; -+ -+ for (i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++) -+ list_add (&blk->Rxd[i].Generic.Link, &rcvrRail->FreeDescList); -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ return 1; -+} -+ -+static void -+FreeRxdRailBlock (EP3_RCVR_RAIL *rcvrRail, EP3_RXD_RAIL_BLOCK *blk) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ EP3_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i, j; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ rcvrRail->TotalDescCount -= EP3_NUM_RXD_PER_BLOCK; -+ -+ for (rxdRail = &blk->Rxd[0], i = 0; i < EP3_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ -+ rcvrRail->FreeDescCount--; -+ -+ list_del (&rxdRail->Generic.Link); -+ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ DeregisterCookie (&rail->CookieTable, &rxdRail->ChainCookie[j]); -+ -+ DeregisterCookie (&rail->CookieTable, &rxdRail->DataCookie); -+ DeregisterCookie (&rail->CookieTable, &rxdRail->DoneCookie); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ ReleaseDmaRetries (rail, EP3_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->Generic, blk->Rxd[0].RxdMainAddr, EP3_RXD_RAIL_MAIN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, blk->Rxd[0].RxdElanAddr, EP3_RXD_RAIL_ELAN_SIZE * EP3_NUM_RXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP3_RXD_RAIL_BLOCK)); -+} -+ -+static EP3_RXD_RAIL * -+GetRxdRail (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ if (list_empty (&rcvrRail->FreeDescList)) -+ rxdRail = NULL; -+ else -+ { -+ rxdRail = list_entry (rcvrRail->FreeDescList.next, EP3_RXD_RAIL, Generic.Link); -+ -+ list_del (&rxdRail->Generic.Link); -+ -+ rcvrRail->FreeDescCount--; -+ } -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (rcvrRail->FreeDescCount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&RCVR_TO_SUBSYS(rcvrRail)->Thread, lbolt); -+ -+ return (rxdRail); -+} -+ -+static void -+FreeRxdRail (EP3_RCVR_RAIL *rcvrRail, EP3_RXD_RAIL *rxdRail) -+{ -+ unsigned long flags; -+ -+#if defined(DEBUG_ASSERT) -+ { -+ EP_RAIL *rail = (EP_RAIL *) RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ -+ EP_ASSERT (rail, rxdRail->Generic.RcvrRail == &rcvrRail->Generic); -+ -+ EP_ASSERT (rail, rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_FREE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_FREE; -+ } -+#endif -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ -+ list_add (&rxdRail->Generic.Link, &rcvrRail->FreeDescList); -+ -+ rcvrRail->FreeDescCount++; -+ -+ if (rcvrRail->FreeDescWaiting) -+ { -+ rcvrRail->FreeDescWaiting--; -+ kcondvar_wakeupall (&rcvrRail->FreeDescSleep, &rcvrRail->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+} -+ -+static void -+BindRxdToRail (EP_RXD *rxd, EP3_RXD_RAIL *rxdRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rxdRail->Generic.RcvrRail); -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ -+ EPRINTF3 (DBG_RCVR, "%s: BindRxdToRail: rxd=%p rxdRail=%p\n", rail->Generic.Name, rxd, rxdRail); -+ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, RxdMain), rxd->NmdMain.nmd_addr); /* PCI write */ -+ -+ rxd->RxdRail = &rxdRail->Generic; -+ rxdRail->Generic.Rxd = rxd; -+} -+ -+static void -+UnbindRxdFromRail (EP_RXD *rxd, EP3_RXD_RAIL *rxdRail) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ ASSERT (rxd->RxdRail == &rxdRail->Generic && rxdRail->Generic.Rxd == rxd); -+ -+ EPRINTF3 (DBG_RCVR, "%s: UnbindRxdFromRail: rxd=%p rxdRail=%p\n", RCVR_TO_RAIL(rxdRail->Generic.RcvrRail)->Generic.Name, rxd, rxdRail); -+ -+ rxd->RxdRail = NULL; -+ rxdRail->Generic.Rxd = NULL; -+ -+ if (rcvrRail->CleanupWaiting) -+ kcondvar_wakeupall (&rcvrRail->CleanupSleep, &rxd->Rcvr->Lock); -+ rcvrRail->CleanupWaiting = 0; -+} -+ -+static void -+LockRcvrThread (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ EP3_SPINLOCK_MAIN *sl = &rcvrRail->RcvrMain->ThreadLock; -+ E3_uint32 RestartBits = 0; -+ int delay = 1; -+ E3_uint32 seq; -+ E3_uint32 reg; -+ -+ ASSERT (SPINLOCK_HELD (&rcvrRail->Generic.Rcvr->Lock)); -+ -+ mb(); -+ elan3_sdram_writel (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 1); -+ mb(); -+ seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ while (seq != sl->sl_seq) -+ { -+ while (sl->sl_seq == (seq - 1)) -+ { -+ mb(); -+ -+ if ((read_reg32 (dev, Exts.InterruptReg) & (INT_TProc | INT_TProcHalted)) != 0 && spin_trylock (&dev->IntrLock)) -+ { -+ reg=read_reg32 (dev, Exts.InterruptReg); -+ ELAN_REG_REC(reg); -+ -+ if ((reg & (INT_TProc | INT_TProcHalted)) != 0&& -+ elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)) != sl->sl_seq) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: LockRcvrThread - thread trapped\n", rail->Generic.Name); -+ -+ /* The thread processor has *really* trapped, and the spinlock is still held. -+ * thus is must have trapped due to a network error - we need to complete the -+ * actions required for this envelope, since we may be spin-locking the receiver -+ * to search the dma retry lists for a particular dma. So must ensure that -+ * if the thread had trapped then the dma has been queued onto the retry list -+ * *before* we inspect them. -+ */ -+ IncrStat (commsRail, LockRcvrTrapped); -+ -+ /* We're going to generate a spurious interrupt here - since we will -+ * handle the thread processor trap directly */ -+ ELAN_REG_REC(reg); -+ if (HandleTProcTrap (dev, &RestartBits)) -+ { -+ /* NOTE - this is not an assert, since the "store" to unlock the lock could -+ * be held up on the PCI interface, whilst the thread processor has -+ * gone on and switched to a new thread, which has then trapped, and -+ * our read of the InterruptReg can overtake the unlock write. -+ * -+ * ASSERT (dev->ThreadTrap->Registers[REG_GLOBALS + (1^WordEndianFlip)] == -+ * elan3_sdram_readl (dev, rcvr->RcvrElan + offsetof (EP_RCVR_ELAN, PendingRxDescsElan))); -+ */ -+ -+ PULSE_SCHED_STATUS (dev, RestartBits); -+ -+ DeliverTProcTrap (dev, dev->ThreadTrap, INT_TProc); -+ } -+ } -+ spin_unlock (&dev->IntrLock); -+ } -+ -+ DELAY (delay); delay++; -+ } -+ seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ } -+} -+ -+static void -+UnlockRcvrThread (EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ -+ mb(); -+ elan3_sdram_writel (rail->Device, sle + offsetof (EP3_SPINLOCK_ELAN, sl_lock), 0); -+ mmiob(); -+} -+ -+void -+CompleteEnvelope (EP3_RAIL *rail, E3_Addr rxdElanAddr, E3_uint32 PAckVal) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rxdElan = ep_elan2sdram (&rail->Generic, rxdElanAddr); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) (unsigned long) elan3_sdram_readq (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr)); -+ EP_RXD_MAIN *rxdMain = rxdRail->Generic.Rxd->RxdMain; -+ EP_ENVELOPE *env = &rxdMain->Envelope; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ sdramaddr_t queue = ((EP3_COMMS_RAIL *) commsRail)->QueueDescs + rcvr->Service * sizeof (EP3_InputQueue); -+ sdramaddr_t sle = rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock); -+ EP3_SPINLOCK_MAIN *sl = &rcvrRail->RcvrMain->ThreadLock; -+ int nodeId; -+ EP_NODE_RAIL *nodeRail; -+ E3_DMA_BE dma; -+ E3_Addr nfptr; -+ E3_Addr next; -+ -+ ASSERT (commsRail->Rail == &rail->Generic); -+ ASSERT (rxdElanAddr == elan3_sdram_readl (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs))); -+ -+ IncrStat (commsRail, CompleteEnvelope); -+ -+ /* We don't need to aquire the NodeLock here (however we might be holding it), -+ * since this can only get called while the node is connected, or disconnecting. -+ * If the node is disconnecting, then we can get called from FlushDisconnecting() -+ * while holding the NodeLock - after we cannot get called again until the node -+ * has reconnected from scratch. -+ */ -+ /* Copy the envelope information */ -+ nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr)); -+ -+ if (nfptr == elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top))) -+ nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)); -+ else -+ nfptr += elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)); -+ -+ /* Copy the envelope and payload (unconditionally) */ -+ elan3_sdram_copyl_from_sdram (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr), env, EP_ENVELOPE_SIZE + EP_PAYLOAD_SIZE); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION); -+ -+ /* Copy the received message length */ -+ rxdMain->Len = elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len)); -+ -+ /* Remove the RXD from the pending desc list */ -+ if ((next = elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next))) == 0) -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ elan3_sdram_writel (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), next); -+ -+ /* Copy the DMA descriptor to queue on the approriate retry list */ -+ elan3_sdram_copyq_from_sdram (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]), &dma, sizeof (E3_DMA)); /* PCI read block */ -+ -+ EP_ASSERT (&rail->Generic, dma.s.dma_direction == DMA_READ);; -+ -+#if defined(DEBUG_ASSERT) && defined(DEBUG_SDRAM_ASSERT) -+ /* NOTE: not an assertion, since the thread packet could have successfully -+ * transferred the "put" dma to the far side - which could then have -+ * completed - but the far side will see a network error which will -+ * cause the virtual circuit to be dropped by the far side and this -+ * DMA will be removed */ -+ if (rxdRail->RxdMain->DataEvent != EP3_EVENT_ACTIVE || -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) != 1) -+ { -+ printk ("CompleteEnvelope: suspicious dma : Node=%d DataBlock=%d Event=%d\n", -+ env->NodeId, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count))); -+ } -+#endif -+ -+ EPRINTF6 (DBG_RCVR, "%s: CompleteEnvelope: rxd=%p NodeId=%d Xid=%llx Cookies=%08x,%08x\n", commsRail->Rail->Name, -+ rxdRail, env->NodeId, (long long) env->Xid.Unique, dma.s.dma_srcCookieVProc, dma.s.dma_destCookieVProc); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the DMA descriptor will -+ * be read from the EP_RETRY_DMA rather than the original DMA - this can then get reused -+ * and an incorrect DMA descriptor sent */ -+ dma.s.dma_source = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]); -+ dma.s.dma_direction = (dma.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ nodeId = EP_VP_TO_NODE(dma.s.dma_srcVProc); -+ nodeRail = &rail->Generic.Nodes[nodeId]; -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (PAckVal != E3_PAckOk) -+ { -+ if (nodeRail->State == EP_NODE_CONNECTED) -+ QueueDmaForRetry (rail, &dma, EP_RETRY_LOW_PRI_RETRY); -+ else -+ QueueDmaOnStalledList (rail, &dma); -+ } -+ -+ /* Finaly forcefully drop the spinlock for the thread */ -+ sl->sl_seq = elan3_sdram_readl (dev, sle + offsetof (EP3_SPINLOCK_ELAN, sl_seq)); -+ -+ wmb(); -+} -+ -+void -+StallThreadForNoDescs (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rcvrElan = ep_elan2sdram (&rail->Generic, rcvrElanAddr); -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) (unsigned long) elan3_sdram_readq (dev, rcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr)); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ -+ EPRINTF3 (DBG_RCVR, "%s: StallThreadForNoDescs - rcvrRail=%p sp=%x\n", commsRail->Rail->Name, rcvrRail, sp); -+ -+ IncrStat (commsRail, StallThread); -+ -+ /* NOTE: spin lock not required as thread is trapped */ -+ -+ if (rcvrRail->RcvrMain->PendingDescsTailp != 0) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: StallThreadForNoDescs - pending descriptors, wakeup thread\n", commsRail->Rail->Name); -+ -+ /* -+ * A receive buffer was queued after the thread had decided to go to -+ * sleep, but before the event interrupt occured. Just restart the -+ * thread to consume the envelope. -+ */ -+ IssueRunThread (rail, sp); -+ } -+ else -+ { -+ EPRINTF1 (DBG_RCVR, "%s: StallThreadForNoDescs - set ThreadWaiting\n", commsRail->Rail->Name); -+ -+ IncrStat (commsRail, ThrdWaiting); -+ -+ /* Mark the rcvr as waiting for a rxd, and schedule a call of ep_check_rcvr -+ * to attempt to "steal" a descriptor from a different rail */ -+ rcvrRail->ThreadWaiting = sp; -+ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ } -+} -+ -+void -+StallThreadForHalted (EP3_RAIL *rail, E3_Addr rcvrElanAddr, E3_Addr sp) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t rcvrElan = ep_elan2sdram (&rail->Generic, rcvrElanAddr); -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) (unsigned long) elan3_sdram_readq (dev, rcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr)); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ unsigned long flags = 0; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ rcvrRail->ThreadHalted = sp; -+ -+ EPRINTF2 (DBG_EPTRAP, "%s: StallThreadForHalted: sp=%08x\n", rail->Generic.Name, sp); -+ -+ if (rcvrRail->CleanupWaiting) -+ kcondvar_wakeupone (&rcvrRail->CleanupSleep, &rcvr->Lock); -+ rcvrRail->CleanupWaiting = 0; -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+/* -+ * RxDataEvent: arg == EP3_RXD_RAIL -+ * Called on completion of receiving data. -+ */ -+static void -+RxDataEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ int delay = 1; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (rxdRail->DataCookie, rxdRail->RxdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent), rxdRail->DataCookie, rxdRail->RxdMain->DataEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("RxDataEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ printk ("%s: RxDataEvent: rxd %p not complete [%x,%x,%x]\n", rail->Generic.Name, rxd, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Type))); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ /* -+ * Note, since the thread will have sent the "get" dma before copying the -+ * envelope, we must check that it has completed doing this, if not then -+ * it might be that the thread trapped due to a network error, so we must -+ * spinlock against the thread -+ */ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ { -+ LockRcvrThread (rcvrRail); -+ UnlockRcvrThread (rcvrRail); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION && rxd->RxdMain->Len != EP_RXD_PENDING); -+ } -+ -+ EPRINTF7 (DBG_RCVR, "%s: RxDataEvent: rxd=%p rxdRail=%p completed from elan node %d [XID=%llx] Length %d State %x\n", -+ rail->Generic.Name, rxd, rxdRail, env->NodeId, (long long) env->Xid.Unique, rxd->RxdMain->Len, rxd->State); -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_RECEIVE_ACTIVE || rxd->State == EP_RXD_PUT_ACTIVE || rxd->State == EP_RXD_GET_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxd->Data.nmd_attr = EP_RAIL2RAILMASK (rail->Generic.Number); -+ -+ if (rxd->RxdMain->Len >= 0 && EP_IS_RPC(env->Attr)) -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ else -+ { -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ } -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ ep_rxd_received (rxd); -+ -+} -+ -+/* -+ * RxDataRetry: arg == EP3_RXD_RAIL -+ * Called on retry of "get" dma of large transmit data -+ * and rpc_get/rpc_put and "put" of datavec of rpc completion. -+ */ -+static void -+RxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_COMMS_RAIL *commsRail = rxdRail->Generic.RcvrRail->CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+#if defined(DEBUG_ASSERT) -+ RxDataVerify (rail, arg, dma); -+#endif -+ -+ IncrStat (commsRail, RxDataRetry); -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDataRetry: rcvr %p rxd %p [XID=%llx]\n", rail->Generic.Name, rxd->Rcvr, rxd, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&rxdRail->Backoff, EP_BACKOFF_DATA)); -+} -+ -+static void -+RxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+#if defined(DEBUG_ASSERT) -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+ if (dma->s.dma_direction == DMA_WRITE) -+ { -+ EP_ASSERT (&rail->Generic, -+ (rxd->State == EP_RXD_RECEIVE_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE) || -+ (rxd->State == EP_RXD_PUT_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_PRIVATE) || -+ (rxd->State == EP_RXD_COMPLETE_ACTIVE && rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_ACTIVE)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (rxd->State == EP_RXD_COMPLETE_ACTIVE ? -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 1: /* PCI read */ -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 1)); /* PCI read */ -+ } -+ else -+ { -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_READ_REQUEUE); -+ -+#if defined(DEBUG_SDRAM_ASSERT) -+ /* NOTE: not an assertion, since the "get" DMA can still be running if -+ * it's packet got a network error - and then the "put" from the -+ * far side has completed - however the virtual circuit should -+ * then be dropped by the far side and this DMA will be removed */ -+ if (EP_VP_TO_NODE(dma->s.dma_srcVProc) != ep_rxd_node(rxd) || -+ (rxd->State != EP_RXD_RECEIVE_ACTIVE && rxd->State != EP_RXD_GET_ACTIVE) || -+ rxdRail->RxdMain->DataEvent != EP3_EVENT_ACTIVE || -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) != 1) -+ { -+ EPRINTF6 (DBG_RCVR, "%s: RxDataRetry: suspicious dma : VProc=%d NodeId=%d State=%d DataBlock=%x Event=%d\n", -+ rail->Generic.Name, EP_VP_TO_NODE(dma->s.dma_srcVProc), ep_rxd_node(rxd), rxd->State, rxdRail->RxdMain->DataEvent, -+ elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count))); -+ } -+#endif /* defined(DEBUG_SDRAM_ASSERT) */ -+ } -+#endif /* DEBUG_ASSERT */ -+} -+ -+/* -+ * RxDoneEvent: arg == EP_RXD -+ * Called on completion of large receive. -+ */ -+static void -+RxDoneEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP_COMMS_RAIL *commsRail = rcvrRail->Generic.CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ EP_RCVR *rcvr = rxd->Rcvr; -+ ELAN3_DEV *dev = rail->Device; -+ int delay = 1; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent), rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("RxDoneEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ printk ("RxDoneEvent: rxd %p not complete [%x,%x.%x]\n", rxd, rxdRail->RxdMain->DoneEvent, -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Type))); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDoneEvent: rxd %p completed from elan node %d [XID=%llx]\n", -+ commsRail->Rail->Name, rxd, rxd->RxdMain->Envelope.NodeId, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ IncrStat (commsRail, RxDoneEvent); -+ -+ EP_ASSERT (&rail->Generic, rxdRail->RxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* mark rxd as private */ -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler (rxd); -+} -+ -+/* -+ * RxDoneRetry: arg == EP_RXD -+ * Called on retry of "put" of RPC completion status block -+ */ -+static void -+RxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_COMMS_RAIL *commsRail = rxdRail->Generic.RcvrRail->CommsRail; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+#if defined(DEBUG_ASSERT) -+ RxDoneVerify (rail, arg, dma); -+#endif -+ -+ IncrStat (commsRail, RxDoneRetry); -+ -+ EPRINTF4 (DBG_RCVR, "%s: RxDoneRetry: rcvr %p rxd %p [XID=%llx]\n", commsRail->Rail->Name, rxd->Rcvr, rxd, (long long) rxd->RxdMain->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&rxdRail->Backoff, EP_BACKOFF_DONE)); -+} -+ -+static void -+RxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+#if defined(DEBUG_ASSERT) -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) arg; -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == ep_rxd_node(rxd)); -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_COMPLETE_ACTIVE && rxdRail->RxdMain->DoneEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 1)); /* PCI read */ -+#endif /* defined(DEBUG_ASSERT) */ -+} -+ -+int -+ep3rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RXD_RAIL *rxdRail; -+ -+ ASSERT ( SPINLOCK_HELD(&rxd->Rcvr->Lock)); -+ -+ if ((rxdRail = GetRxdRail (rcvrRail)) == NULL) -+ return 0; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_addr), rxd->Data.nmd_addr); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len), rxd->Data.nmd_len); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_attr), rxd->Data.nmd_attr); /* PCI write */ -+ -+ /* Bind the rxdRail and rxd together */ -+ BindRxdToRail (rxd, rxdRail); -+ -+ /* Mark as active */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); -+ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* Interlock with StallThreadForNoDescs */ -+ spin_lock (&dev->IntrLock); -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_queue_rxd: rcvr %p rxd %p rxdRail %p\n", rail->Generic.Name, rxd->Rcvr, rxd, rxdRail); -+ -+ EP3_SPINENTER (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock), &rcvrRail->RcvrMain->PendingLock); -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next), 0); /* PCI write */ -+ if (rcvrRail->RcvrMain->PendingDescsTailp == 0) -+ elan3_sdram_writel (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), rxdRail->RxdElanAddr); /* PCI write */ -+ else -+ elan3_sdram_writel (dev, rcvrRail->RcvrMain->PendingDescsTailp, rxdRail->RxdElanAddr); /* PCI write */ -+ rcvrRail->RcvrMain->PendingDescsTailp = rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next); -+ -+ EP3_SPINEXIT (dev, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock), &rcvrRail->RcvrMain->PendingLock); -+ -+ /* If the thread has paused because it was woken up with no receive buffer */ -+ /* ready, then wake it up to process the one we've just added */ -+ if (rcvrRail->ThreadWaiting) -+ { -+ EPRINTF1 (DBG_RCVR, "%s: DoReceive: ThreadWaiting - restart thread\n", rail->Generic.Name); -+ -+ IssueRunThread (rail, rcvrRail->ThreadWaiting); -+ -+ rcvrRail->ThreadWaiting = (E3_Addr) 0; -+ } -+ -+ spin_unlock (&dev->IntrLock); -+ -+ return 1; -+} -+ -+void -+ep3rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_PUT_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Generate the DMA chain to put the data in two loops to burst -+ * the data across the PCI bus */ -+ for (len = 0, i = (nFrags-1), local += (nFrags-1), remote += (nFrags-1); i >= 0; len += local->nmd_len, i--, local--, remote--) -+ { -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = local->nmd_len; -+ dmabe.s.dma_source = local->nmd_addr; -+ dmabe.s.dma_dest = remote->nmd_addr; -+ dmabe.s.dma_destEvent = (E3_Addr) 0; -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (env->NodeId); -+ if (i == (nFrags-1)) -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent); -+ else -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i]); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_put: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len, dmabe.s.dma_destCookieVProc, dmabe.s.dma_srcCookieVProc); -+ -+ if (i != 0) -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ } -+ -+ for (i = 0; i < nFrags; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the data event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_put: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep3rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_GET_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Generate the DMA chain to get the data in two loops to burst -+ * the data across the PCI bus */ -+ for (len = 0, i = (nFrags-1), remote += (nFrags-1), local += (nFrags-1); i >= 0; len += remote->nmd_len, i--, remote--, local--) -+ { -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_READ, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = remote->nmd_len; -+ dmabe.s.dma_source = remote->nmd_addr; -+ dmabe.s.dma_dest = local->nmd_addr; -+ if (i == (nFrags-1)) -+ dmabe.s.dma_destEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DataEvent); -+ else -+ dmabe.s.dma_destEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i]); -+ dmabe.s.dma_destCookieVProc = LocalCookie (rail, env->NodeId); -+ dmabe.s.dma_srcEvent = (E3_Addr) 0; -+ dmabe.s.dma_srcCookieVProc = RemoteCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_get rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, remote->nmd_addr, local->nmd_addr, remote->nmd_len, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ -+ /* -+ * Always copy down the dma descriptor, since we issue it as a READ_REQUEUE -+ * dma, and the elan will fetch the descriptor to send out of the link from -+ * the rxdElan->Dmas[i] location, before issueing the DMA chain we modify -+ * the dma_source. -+ */ -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ } -+ -+ for (i = 0; i < nFrags; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the data event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the DMA descriptor will -+ * be read from the EP_RETRY_DMA rather than the orignal DMA - this can then get reused -+ * and an incorrect DMA descriptor sent */ -+ dmabe.s.dma_source = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, Dmas[0]); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_get: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCGet, len); -+} -+ -+void -+ep3rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rxdRail->Generic.RcvrRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ EP3_RXD_RAIL_MAIN *rxdMain = rxdRail->RxdMain; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ E3_DMA_BE dmabe; -+ int i, len; -+ -+ EP_ASSERT (&rail->Generic, rxd->State == EP_RXD_COMPLETE_ACTIVE); -+ EP_ASSERT (&rail->Generic, rxdMain->DataEvent == EP3_EVENT_PRIVATE && rxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); /* PCI read */ -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Initialise the status block dma */ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = sizeof (EP_STATUSBLK); -+ dmabe.s.dma_source = rxd->NmdMain.nmd_addr + offsetof (EP_RXD_MAIN, StatusBlk); -+ dmabe.s.dma_dest = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, StatusBlk); -+ dmabe.s.dma_destEvent = env->TxdRail + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent); -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA(env->NodeId); -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF8 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: rxd %p [XID=%llx] statusblk source=%08x dest=%08x len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, dmabe.s.dma_source, dmabe.s.dma_dest, dmabe.s.dma_size, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ -+ for (len = 0, i = EP_MAXFRAG, remote += (nFrags-1), local += (nFrags-1); i > EP_MAXFRAG-nFrags; len += local->nmd_len, i--, local--, remote--) -+ { -+ /* copy down previous dma */ -+ elan3_sdram_copyq_to_sdram (dev, &dmabe, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Dmas[i]), sizeof (E3_DMA)); /* PCI write block */ -+ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_NORMAL, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = local->nmd_len; -+ dmabe.s.dma_source = local->nmd_addr; -+ dmabe.s.dma_dest = remote->nmd_addr; -+ dmabe.s.dma_destEvent = (E3_Addr) 0; -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (env->NodeId); -+ dmabe.s.dma_srcEvent = rxdRail->RxdElanAddr + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i-1]); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, env->NodeId); -+ -+ EPRINTF9 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x Cookies=%x.%x\n", rail->Generic.Name, rxd, -+ (long long) env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len, dmabe.s.dma_destCookieVProc, -+ dmabe.s.dma_srcCookieVProc); -+ } -+ -+ for (i = EP_MAXFRAG-nFrags; i < EP_MAXFRAG; i++) -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[i].ev_Count), 1); /* PCI write */ -+ -+ /* Initialise the done event */ -+ elan3_sdram_writel (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 1); /* PCI write */ -+ rxdMain->DoneEvent = EP3_EVENT_ACTIVE; -+ -+ ASSERT (rail->Generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->Generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ { -+ /* Failed to issue the dma command, so copy the dma descriptor and queue it for retry */ -+ EPRINTF2 (DBG_RCVR, "%s: ep3rcvr_rpc_complete: queue rxd %p on retry thread\n", rail->Generic.Name, rxd); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ } -+ -+ BucketStat (rxd->Rcvr->Subsys, CompleteRPC, len); -+} -+ -+void -+ep3rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ sdramaddr_t qdescs = ((EP3_COMMS_RAIL *) commsRail)->QueueDescs; -+ EP3_RCVR_RAIL *rcvrRail; -+ EP3_InputQueue qdesc; -+ sdramaddr_t stack; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (rcvrRail, EP3_RCVR_RAIL *, sizeof (EP3_RCVR_RAIL), TRUE); -+ -+ kcondvar_init (&rcvrRail->CleanupSleep); -+ spin_lock_init (&rcvrRail->FreeDescLock); -+ INIT_LIST_HEAD (&rcvrRail->FreeDescList); -+ INIT_LIST_HEAD (&rcvrRail->DescBlockList); -+ -+ rcvrRail->Generic.CommsRail = commsRail; -+ rcvrRail->Generic.Rcvr = rcvr; -+ -+ rcvrRail->RcvrMain = ep_alloc_main (&rail->Generic, sizeof (EP3_RCVR_RAIL_MAIN), 0, &rcvrRail->RcvrMainAddr); -+ rcvrRail->RcvrElan = ep_alloc_elan (&rail->Generic, sizeof (EP3_RCVR_RAIL_ELAN), 0, &rcvrRail->RcvrElanAddr); -+ rcvrRail->InputQueueBase = ep_alloc_elan (&rail->Generic, EP_INPUTQ_SIZE * rcvr->InputQueueEntries, 0, &rcvrRail->InputQueueAddr); -+ stack = ep_alloc_elan (&rail->Generic, EP3_STACK_SIZE, 0, &rcvrRail->ThreadStack); -+ -+ rcvrRail->TotalDescCount = 0; -+ rcvrRail->FreeDescCount = 0; -+ -+ /* Initialise the main/elan spin lock */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock.sl_lock), 0); -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadLock.sl_seq), 0); -+ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock.sl_lock), 0); -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingLock.sl_seq), 0); -+ -+ /* Initialise the receive lists */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), 0); -+ -+ /* Initialise the ThreadShould Halt */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadShouldHalt), 0); -+ -+ /* Initialise pointer to the ep_rcvr_rail */ -+ elan3_sdram_writeq (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, MainAddr), (unsigned long) rcvrRail); -+ -+ /* Initialise elan visible main memory */ -+ rcvrRail->RcvrMain->ThreadLock.sl_seq = 0; -+ rcvrRail->RcvrMain->PendingLock.sl_seq = 0; -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ -+ /* initialise and copy down the input queue descriptor */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_base = rcvrRail->InputQueueAddr; -+ qdesc.q_top = rcvrRail->InputQueueAddr + (rcvr->InputQueueEntries-1) * EP_INPUTQ_SIZE; -+ qdesc.q_fptr = rcvrRail->InputQueueAddr; -+ qdesc.q_bptr = rcvrRail->InputQueueAddr + EP_INPUTQ_SIZE; -+ qdesc.q_size = EP_INPUTQ_SIZE; -+ qdesc.q_event.ev_Count = 0; -+ qdesc.q_event.ev_Type = 0; -+ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, qdescs + rcvr->Service * sizeof (EP3_InputQueue), sizeof (EP3_InputQueue)); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->Generic.Number] = &rcvrRail->Generic; -+ rcvr->RailMask |= EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* initialise and run the Elan thread to process the queue */ -+ IssueRunThread (rail, ep3_init_thread (rail->Device, ep_symbol (&rail->ThreadCode, "ep3comms_rcvr"), -+ rcvrRail->ThreadStack, stack, EP3_STACK_SIZE, 5, -+ rail->RailElanAddr, rcvrRail->RcvrElanAddr, rcvrRail->RcvrMainAddr, -+ EP_MSGQ_ADDR(rcvr->Service), -+ rail->ElanCookies)); -+} -+ -+void -+ep3rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) rcvr->Rails[rail->Generic.Number]; -+ unsigned long flags; -+ struct list_head *el, *nel; -+ -+ EPRINTF1 (DBG_RCVR, "%s: ep3rcvr_del_rail: removing rail\n", rail->Generic.Name); -+ -+ /* flag the rail as no longer available */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->RailMask &= ~EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* mark the input queue descriptor as full */ -+ SetQueueLocked(rail, ((EP3_COMMS_RAIL *)commsRail)->QueueDescs + rcvr->Service * sizeof (EP3_InputQueue)); -+ -+ /* need to halt the thread first */ -+ /* set ThreadShouldHalt in elan memory */ -+ /* then trigger the event */ -+ /* and wait on haltWait */ -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, ThreadShouldHalt), TRUE); -+ -+ IssueSetevent (rail, EP_MSGQ_ADDR(rcvr->Service) + offsetof(EP3_InputQueue, q_event)); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ while (rcvrRail->ThreadHalted == 0) -+ { -+ rcvrRail->CleanupWaiting++; -+ kcondvar_wait (&rcvrRail->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point the thread is halted and it has no envelopes */ -+ -+ /* we need to wait until all the rxd's in the list that are -+ * bound to the rail we are removing are not pending -+ */ -+ for (;;) -+ { -+ int mustWait = 0; -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el,EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail) && rxd->RxdMain->Len != EP_RXD_PENDING) -+ { -+ mustWait++; -+ break; -+ } -+ } -+ -+ if (! mustWait) -+ break; -+ -+ EPRINTF1 (DBG_RCVR, "%s: ep3rcvr_del_rail: waiting for active rxd's to be returned\n", rail->Generic.Name); -+ -+ rcvrRail->CleanupWaiting++; -+ kcondvar_wait (&rcvrRail->CleanupSleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point all rxd's in the list that are bound to the deleting rail are not pending */ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail)) -+ { -+ /* here we need to unbind the remaining rxd's */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail(rcvrRail, rxdRail ); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* wait for all rxd's for this rail to become free */ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ while (rcvrRail->FreeDescCount != rcvrRail->TotalDescCount) -+ { -+ rcvrRail->FreeDescWaiting++; -+ kcondvar_wait (&rcvrRail->FreeDescSleep, &rcvrRail->FreeDescLock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ /* can now remove the rail as it can no longer be used */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->Generic.Number] = NULL; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* all the rxd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (rcvrRail->TotalDescCount == rcvrRail->FreeDescCount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&rcvrRail->DescBlockList)) -+ FreeRxdRailBlock (rcvrRail, list_entry(rcvrRail->DescBlockList.next, EP3_RXD_RAIL_BLOCK , Link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((rcvrRail->TotalDescCount == 0) && (rcvrRail->TotalDescCount == rcvrRail->FreeDescCount)); -+ -+ ep_free_elan (&rail->Generic, rcvrRail->ThreadStack, EP3_STACK_SIZE); -+ ep_free_elan (&rail->Generic, rcvrRail->InputQueueAddr, EP_INPUTQ_SIZE * rcvr->InputQueueEntries); -+ ep_free_elan (&rail->Generic, rcvrRail->RcvrElanAddr, sizeof (EP3_RCVR_RAIL_ELAN)); -+ ep_free_main (&rail->Generic, rcvrRail->RcvrMainAddr, sizeof (EP3_RCVR_RAIL_MAIN)); -+ -+ KMEM_FREE (rcvrRail, sizeof (EP3_RCVR_RAIL)); -+} -+ -+EP_RXD * -+ep3rcvr_steal_rxd (EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ E3_Addr rxdElanAddr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ LockRcvrThread (rcvrRail); -+ if ((rxdElanAddr = elan3_sdram_readl (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs))) != 0) -+ { -+ sdramaddr_t rxdElan = ep_elan2sdram (&rail->Generic, rxdElanAddr); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) (unsigned long) elan3_sdram_readq (rail->Device, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, MainAddr)); -+ EP_RXD *rxd = rxdRail->Generic.Rxd; -+ sdramaddr_t next; -+ -+ EPRINTF2 (DBG_RCVR, "%s: StealRxdFromOtherRail stealing rxd %p\n", rail->Generic.Name, rail); -+ -+ /* Remove the RXD from the pending desc list */ -+ if ((next = elan3_sdram_readl (rail->Device, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Next))) == 0) -+ rcvrRail->RcvrMain->PendingDescsTailp = 0; -+ elan3_sdram_writel (rail->Device, rcvrRail->RcvrElan + offsetof (EP3_RCVR_RAIL_ELAN, PendingDescs), next); -+ UnlockRcvrThread (rcvrRail); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* Mark rxdRail as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ elan3_sdram_writel (rail->Device, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ return rxd; -+ } -+ -+ UnlockRcvrThread (rcvrRail); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ return NULL; -+} -+ -+long -+ep3rcvr_check (EP_RCVR_RAIL *r, long nextRunTime) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->Generic.Rcvr; -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ EP_RXD *rxd; -+ unsigned long flags; -+ -+ if (rcvrRail->FreeDescCount < ep_rxd_lowat && !AllocateRxdRailBlock (rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow rxd rail pool\n", rail->Generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ if (rcvrRail->ThreadWaiting && (rxd = StealRxdFromOtherRail (rcvr)) != NULL) -+ { -+ /* Map the receive buffer into this rail as well */ -+ EPRINTF4 (DBG_RCVR, "%s: mapping rxd->Data (%08x.%08x.%08x) into this rails\n", -+ rail->Generic.Name, rxd->Data.nmd_addr,rxd->Data.nmd_len, rxd->Data.nmd_attr); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ if ((!(EP_NMD_RAILMASK (&rxd->Data) & EP_RAIL2RAILMASK(rail->Generic.Number)) && /* not already mapped and */ -+ ep_nmd_map_rails (sys, &rxd->Data, EP_RAIL2RAILMASK(rail->Generic.Number)) == 0) || /* failed to map it */ -+ ep3rcvr_queue_rxd (rxd, &rcvrRail->Generic)) /* or failed to queue it */ -+ { -+ EPRINTF5 (DBG_RCVR,"%s: stolen rcvr=%p rxd=%p -> rnum=%d rcvrRail=%p (failed)\n", -+ rail->Generic.Name, rcvr, rxd, rail->Generic.Number, rcvrRail); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ } -+ -+ return nextRunTime; -+} -+ -+static void -+ep3rcvr_flush_filtering (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Generic.Rail; -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t qdesc = commsRail->QueueDescs + rcvr->Service*sizeof (EP3_InputQueue); -+ E3_Addr qTop = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_top)); -+ E3_Addr qBase = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_base)); -+ E3_Addr qSize = elan3_sdram_readl (dev,qdesc + offsetof (EP3_InputQueue, q_size)); -+ E3_uint32 nfptr, qbptr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ nfptr = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_fptr)); -+ qbptr = elan3_sdram_readl (dev, qdesc + offsetof (EP3_InputQueue, q_bptr)); -+ -+ if (nfptr == qTop) -+ nfptr = qBase; -+ else -+ nfptr += qSize; -+ -+ while (nfptr != qbptr) -+ { -+ unsigned nodeId = elan3_sdram_readl (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr) + -+ offsetof (EP_ENVELOPE, NodeId)); -+ -+ EPRINTF3 (DBG_DISCON, "%s: ep3rcvr_flush_filtering: nodeId=%d State=%d\n", rail->Generic.Name, nodeId, rail->Generic.Nodes[nodeId].State); -+ -+ if (rail->Generic.Nodes[nodeId].State == EP_NODE_LOCAL_PASSIVATE) -+ elan3_sdram_writel (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr) + -+ offsetof (EP_ENVELOPE, Version), 0); -+ -+ if (nfptr == qTop) -+ nfptr = qBase; -+ else -+ nfptr += qSize; -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+ep3rcvr_flush_flushing (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF6 (DBG_DISCON, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p state %x.%x elan node %d\n", rail->Generic.Name, -+ rcvr, rxd, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep3rcvr_flush_flushing: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - passive\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - active\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP_INVALIDATE_XID (rxd->MsgXid); /* Ignore any previous NMD map responses */ -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep3rcvr_flush_flushing: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep3rcvr_flush_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ -+ switch (rail->Generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep3rcvr_flush_filtering (rcvr, rcvrRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep3rcvr_flush_flushing (rcvr, rcvrRail); -+ break; -+ } -+} -+ -+void -+ep3rcvr_failover_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_SYS *sys = subsys->Subsys.Sys; -+#endif -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_MANAGER_MSG_BODY msgBody; -+ EP_NODE *node = &sys->Nodes[env->NodeId]; -+#endif -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ EPRINTF6 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rcvr %p rxd %p elan node %d state %x.%x\n", rail->Generic.Name, rcvr, rxd, env->NodeId, -+ rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_failover_callback: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* clear the data event - the done event should already be zero */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+#ifdef SUPPORT_RAIL_FAILOVER -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent) && !(EP_IS_NO_FAILOVER(env->Attr))) /* incomplete RPC, which can be failed over */ -+ { -+ EPRINTF7 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rxd %p State %x.%x Xid %llxx MsgXid %llxx nodeId %d - failover\n", -+ rail->Generic.Name, rxd, rxdRail->RxdMain->DataEvent, rxdRail->RxdMain->DoneEvent, -+ (long long) env->Xid.Unique, (long long) rxd->MsgXid.Unique, env->NodeId); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ /* XXXX maybe only send the message if the node failover retry is now ? */ -+ msgBody.Failover.Xid = env->Xid; -+ msgBody.Failover.Railmask = node->ConnectedRails; -+ -+ ep_send_message (&rail->Generic, env->NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST, rxd->MsgXid, &msgBody); -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+#endif -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep3rcvr_failover_callback: rxd state is aborted but bound to a rail\n"); -+ break; -+ } -+ -+ EPRINTF3 (DBG_FAILOVER, "%s: ep3rcvr_failover_callback: rxd %p nodeId %d - finished\n", rail->Generic.Name, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep3rcvr_disconnect_callback (EP_RCVR *rcvr, EP3_RCVR_RAIL *rcvrRail) -+{ -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ LockRcvrThread (rcvrRail); /* PCI lock */ -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ EPRINTF4 (DBG_DISCON, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p elan node %d\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep3rcvr_disconnect_callback: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->RxdMain->DataEvent == EP3_EVENT_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* clear the data event - the done event should already be zero */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* remark it as pending if it was partially received */ -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP_ASSERT (&rail->Generic, EP_IS_RPC(env->Attr)); -+ -+ if (!EP3_EVENT_FIRED (rxdRail->DoneCookie, rxdRail->RxdMain->DoneEvent)) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - not able to failover\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ -+ /* Mark as no longer active */ -+ rxdRail->RxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ rxdRail->RxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (dev, rxdRail->RxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ UnbindRxdFromRail (rxd, rxdRail); -+ FreeRxdRail (rcvrRail, rxdRail); -+ -+ /* Ignore any previous NMD/failover responses */ -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ /* Remove from active list */ -+ list_del (&rxd->Link); -+ -+ if (rxd->State == EP_RXD_RPC_IN_PROGRESS) /* ownder by user .... */ -+ rxd->State = EP_RXD_BEEN_ABORTED; -+ else /* queue for completion */ -+ { -+ rxd->RxdMain->Len = EP_CONN_RESET; /* ensure ep_rxd_status() fails */ -+ list_add_tail (&rxd->Link, &rxdList); -+ } -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_failover_callback: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep3rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->Generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ UnlockRcvrThread (rcvrRail); /* PCI unlock */ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&rxdList)) -+ { -+ EP_RXD *rxd = list_entry (rxdList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+void -+ep3rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *r) -+{ -+ EP3_RXD_RAIL *rxdRail = (EP3_RXD_RAIL *) r; -+ sdramaddr_t rxdElan = rxdRail->RxdElan; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rxdRail->Generic.RcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ -+ (di->func)(di->arg, " ChainEvent=%x.%x %x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[0].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[0].ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[1].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[1].ev_Type))); -+ (di->func)(di->arg, " ChainEvent=%x.%x %x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[2].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[2].ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[3].ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, ChainEvent[3].ev_Type))); -+ (di->func)(di->arg, " DataEvent=%x.%x DoneEvent=%x.%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DataEvent.ev_Type)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, DoneEvent.ev_Type))); -+ (di->func)(di->arg, " Data=%x Len=%x\n", -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_addr)), -+ elan3_sdram_readl (dev, rxdElan + offsetof (EP3_RXD_RAIL_ELAN, Data.nmd_len))); -+} -+ -+void -+ep3rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *r) -+{ -+ EP3_RCVR_RAIL *rcvrRail = (EP3_RCVR_RAIL *) r; -+ EP3_COMMS_RAIL *commsRail = (EP3_COMMS_RAIL *) rcvrRail->Generic.CommsRail; -+ EP3_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN3_DEV *dev = rail->Device; -+ sdramaddr_t queue = commsRail->QueueDescs + rcvrRail->Generic.Rcvr->Service * sizeof (EP3_InputQueue); -+ E3_Addr qbase = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)); -+ E3_Addr qtop = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top)); -+ E3_uint32 qsize = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)); -+ int freeCount = 0; -+ int blockCount = 0; -+ unsigned long flags; -+ struct list_head *el; -+ -+ spin_lock_irqsave (&rcvrRail->FreeDescLock, flags); -+ list_for_each (el, &rcvrRail->FreeDescList) -+ freeCount++; -+ list_for_each (el, &rcvrRail->DescBlockList) -+ blockCount++; -+ spin_unlock_irqrestore (&rcvrRail->FreeDescLock, flags); -+ -+ (di->func)(di->arg, " Rail %d FreeDesc %d (%d) Total %d Blocks %d %s\n", -+ rail->Generic.Number, rcvrRail->FreeDescCount, freeCount, rcvrRail->TotalDescCount, blockCount, -+ rcvrRail->ThreadWaiting ? "ThreadWaiting" : ""); -+ -+ (di->func)(di->arg, " InputQueue state=%x bptr=%x size=%x top=%x base=%x fptr=%x\n", -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_state)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_bptr)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_size)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_top)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_base)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr))); -+ (di->func)(di->arg, " event=%x.%x [%x.%x] wevent=%x.%x\n", -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Type)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Count)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Source)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_event.ev_Dest)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_wevent)), -+ elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_wcount))); -+ -+ LockRcvrThread (rcvrRail); -+ { -+ E3_Addr nfptr = elan3_sdram_readl (dev, queue + offsetof (EP3_InputQueue, q_fptr)); -+ EP_ENVELOPE env; -+ -+ if (nfptr == qtop) -+ nfptr = qbase; -+ else -+ nfptr += qsize; -+ -+ while (nfptr != elan3_sdram_readl (dev, queue + offsetof (E3_Queue, q_bptr))) -+ { -+ elan3_sdram_copyl_from_sdram (dev, rcvrRail->InputQueueBase + (nfptr - rcvrRail->InputQueueAddr), -+ &env, sizeof (EP_ENVELOPE)); -+ -+ (di->func)(di->arg, " ENVELOPE Version=%x Attr=%x Xid=%08x.%08x.%016llx\n", -+ env.Version, env.Attr, env.Xid.Generation, env.Xid.Handle, (long long) env.Xid.Unique); -+ (di->func)(di->arg, " NodeId=%x Range=%x TxdRail=%x TxdMain=%x.%x.%x\n", -+ env.NodeId, env.Range, env.TxdRail, env.TxdMain.nmd_addr, -+ env.TxdMain.nmd_len, env.TxdMain.nmd_attr); -+ -+ -+ if (nfptr == qtop) -+ nfptr = qbase; -+ else -+ nfptr += qsize; -+ } -+ } -+ UnlockRcvrThread (rcvrRail); -+} -+ -+void -+ep3rcvr_fillout_rail_stats(EP_RCVR_RAIL *rcvr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP3_RCVR_RAIL * ep4rcvr_rail = (EP3_RCVR_RAIL *) rcvr_rail; */ -+} -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsRx_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcommsRx_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsRx_elan4.c 2005-07-28 14:52:52.874674784 -0400 -@@ -0,0 +1,1758 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsRx_elan4.c,v 1.30.2.3 2005/03/10 15:24:09 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsRx_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+#define RCVR_TO_COMMS(rcvrRail) ((EP4_COMMS_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail) -+#define RCVR_TO_RAIL(rcvrRail) ((EP4_RAIL *) ((EP_RCVR_RAIL *) rcvrRail)->CommsRail->Rail) -+#define RCVR_TO_DEV(rcvrRail) (RCVR_TO_RAIL(rcvrRail)->r_ctxt.ctxt_dev) -+#define RCVR_TO_SUBSYS(rcvrRail) (((EP_RCVR_RAIL *) rcvrRail)->Rcvr->Subsys) -+ -+#define RXD_TO_RCVR(txdRail) ((EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail) -+#define RXD_TO_RAIL(txdRail) RCVR_TO_RAIL(RXD_TO_RCVR(rxdRail)) -+ -+static void rxd_interrupt (EP4_RAIL *rail, void *arg); -+ -+static __inline__ void -+__ep4_rxd_assert_free (EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rxdRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ register int i, failed = 0; -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ if (((rxdRail)->rxd_main->rxd_sent[i] != EP4_STATE_FREE)) -+ failed |= (1 << i); -+ -+ if (((rxdRail)->rxd_main->rxd_failed != EP4_STATE_FREE)) -+ failed |= (1 << 5); -+ if (((rxdRail)->rxd_main->rxd_done != EP4_STATE_FREE)) -+ failed |= (1 << 6); -+ -+ if (sdram_assert) -+ { -+ if (((elan4_sdram_readq (RXD_TO_RAIL(rxdRail)->r_ctxt.ctxt_dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << 7); -+ for (i = 0; i < EP_MAXFRAG; i++) -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << (8 + i)); -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)) >> 32) != 0)) -+ failed |= (1 << 12); -+ if (((int)(elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)) >> 32) != -32)) -+ failed |= (1 << 13); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_free: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_free: failed=%x rxdRail=%p %s - %d\n", failed, rxdRail, file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ (rxdRail)->rxd_main->rxd_sent[i] = EP4_STATE_FREE; -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_FREE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_FREE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writew (RXD_TO_RAIL(rxdRail)->r_ctxt.ctxt_dev, -+ (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType) + 4, 0); -+ -+ for (i = 0; i < EP_MAXFRAG; i++) -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType) + 4, -32); -+ } -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_free"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_assert_pending(EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rcvrRail); -+ register int failed = 0; -+ -+ failed |= ((rxdRail)->rxd_main->rxd_done != EP4_STATE_ACTIVE); -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_pending: %s - %d\n", file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_pending: %s - %d\n", file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_pending"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_assert_private(EP4_RXD_RAIL *rxdRail, const char *file, const int line) -+{ -+ EP4_RCVR_RAIL *rcvrRail = RXD_TO_RCVR(rxdRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ register int failed = 0; -+ -+ if (((rxdRail)->rxd_main->rxd_failed != EP4_STATE_ACTIVE)) failed |= (1 << 0); -+ if (((rxdRail)->rxd_main->rxd_done != EP4_STATE_PRIVATE)) failed |= (1 << 1); -+ -+ if (sdram_assert) -+ { -+ if (((elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)) >> 32) != 0)) failed |= (1 << 2); -+ if (((int) (elan4_sdram_readq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)) >> 32) != -32)) failed |= (1 << 3); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_rxd_assert_private: %s - %d\n", file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_rxd_assert_private: %s - %d\n", file, line); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_PRIVATE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType) + 4, 0); -+ elan4_sdram_writew (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType) + 4, -32); -+ } -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "__ep4_rxd_assert_private"); -+ } -+} -+ -+static __inline__ void -+__ep4_rxd_private_to_free (EP4_RXD_RAIL *rxdRail) -+{ -+ register int i; -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_FREE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_FREE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_FREE; -+} -+ -+static __inline__ void -+__ep4_rxd_force_private (EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RAIL *rail = RXD_TO_RAIL(rxdRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ (rxdRail)->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ (rxdRail)->rxd_main->rxd_done = EP4_STATE_PRIVATE; -+ -+ if (sdram_assert) -+ elan4_sdram_writeq (dev, (rxdRail)->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+} -+ -+#define EP4_RXD_ASSERT_FREE(rxdRail) __ep4_rxd_assert_free(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_ASSERT_PENDING(rxdRail) __ep4_rxd_assert_pending(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_ASSERT_PRIVATE(rxdRail) __ep4_rxd_assert_private(rxdRail, __FILE__, __LINE__) -+#define EP4_RXD_PRIVATE_TO_FREE(rxdRail) __ep4_rxd_private_to_free(rxdRail) -+#define EP4_RXD_FORCE_PRIVATE(rxdRail) __ep4_rxd_force_private(rxdRail) -+ -+static int -+alloc_rxd_block (EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_RXD_RAIL_BLOCK *blk; -+ EP4_RXD_RAIL_MAIN *rxdMain; -+ EP_ADDR rxdMainAddr; -+ sdramaddr_t rxdElan; -+ EP_ADDR rxdElanAddr; -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i, j; -+ -+ KMEM_ZALLOC (blk, EP4_RXD_RAIL_BLOCK *, sizeof (EP4_RXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((rxdElan = ep_alloc_elan (&rail->r_generic, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK, 0, &rxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((rxdMain = ep_alloc_main (&rail->r_generic, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK, 0, &rxdMainAddr)) == (EP4_RXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ep4_reserve_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK, 0) != 0) -+ { -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ -+ return 0; -+ } -+ -+ for (rxdRail = &blk->blk_rxds[0], i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rxdRail->rxd_generic.RcvrRail = &rcvrRail->rcvr_generic; -+ rxdRail->rxd_elan = rxdElan; -+ rxdRail->rxd_elan_addr = rxdElanAddr; -+ rxdRail->rxd_main = rxdMain; -+ rxdRail->rxd_main_addr = rxdMainAddr; -+ -+ /* reserve 128 bytes of "event" cq space for the chained STEN packets */ -+ if ((rxdRail->rxd_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, EP4_RXD_STEN_CMD_NDWORDS)) == NULL) -+ goto failed; -+ -+ /* allocate a single word of "setevent" command space */ -+ if ((rxdRail->rxd_scq = ep4_get_ecq (rail, EP4_ECQ_SINGLE, 1)) == NULL) -+ { -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ goto failed; -+ } -+ -+ /* initialise the completion events */ -+ for (j = 0; j <= EP_MAXFRAG; j++) -+ rxdMain->rxd_sent[i] = EP4_STATE_FREE; -+ -+ rxdMain->rxd_done = EP4_STATE_FREE; -+ rxdMain->rxd_failed = EP4_STATE_FREE; -+ -+ /* initialise the scq for the thread */ -+ rxdMain->rxd_scq = rxdRail->rxd_scq->ecq_addr; -+ -+ /* initialise the "start" event to copy the first STEN packet into the command queue */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_START_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0])); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ /* initialise the "chain" events to copy the next STEN packet into the command queue */ -+ for (j = 0; j < EP_MAXFRAG; j++) -+ { -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j+1])); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j].ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ } -+ -+ /* initialise the portions of the sten packets which don't change */ -+ for (j = 0; j < EP_MAXFRAG+1; j++) -+ { -+ if (j < EP_MAXFRAG) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_dma_dstEvent), -+ rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[j])); -+ else -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_dma_dstEvent), -+ rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_guard), -+ GUARD_CMD | GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_sent[j]))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_ok_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_fail_guard), -+ GUARD_CMD | GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_fail_setevent), -+ SET_EVENT_CMD | (rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[j].c_nop_cmd), -+ NOP_CMD); -+ } -+ -+ /* register a main interrupt cookie */ -+ ep4_register_intcookie (rail, &rxdRail->rxd_intcookie, rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ rxd_interrupt, rxdRail); -+ -+ /* initialise the command stream for the done event */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_done))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (rxdRail->rxd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ /* initialise the command stream for the fail event */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (rxdMainAddr + offsetof (EP4_RXD_RAIL_MAIN, rxd_failed))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_write_value), -+ EP4_STATE_FAILED); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (rxdRail->rxd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ /* initialise the done and fail events */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done_cmd)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CopySource), -+ rxdElanAddr + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed_cmd)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CopyDest), -+ rxdRail->rxd_ecq->ecq_addr); -+ -+ /* initialise the pointer to the main memory portion */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_main), -+ rxdMainAddr); -+ -+ /* move onto next descriptor */ -+ rxdElan += EP4_RXD_RAIL_ELAN_SIZE; -+ rxdElanAddr += EP4_RXD_RAIL_ELAN_SIZE; -+ rxdMain = (EP4_RXD_RAIL_MAIN *) ((unsigned long) rxdMain + EP4_RXD_RAIL_MAIN_SIZE); -+ rxdMainAddr += EP4_RXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_add (&blk->blk_link, &rcvrRail->rcvr_blocklist); -+ -+ rcvrRail->rcvr_totalcount += EP4_NUM_RXD_PER_BLOCK; -+ rcvrRail->rcvr_freecount += EP4_NUM_RXD_PER_BLOCK; -+ -+ for (i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++) -+ list_add (&blk->blk_rxds[i].rxd_generic.Link, &rcvrRail->rcvr_freelist); -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ return 1; -+ -+ failed: -+ while (--i >= 0) -+ { -+ rxdRail--; -+ -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ ep4_put_ecq (rail, rxdRail->rxd_scq, 1); -+ -+ ep4_deregister_intcookie (rail, &rxdRail->rxd_intcookie); -+ } -+ -+ ep4_release_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, rxdElanAddr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+ -+ return 0; -+} -+ -+ -+static void -+free_rxd_block (EP4_RCVR_RAIL *rcvrRail, EP4_RXD_RAIL_BLOCK *blk) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_del (&blk->blk_link); -+ -+ rcvrRail->rcvr_totalcount -= EP4_NUM_RXD_PER_BLOCK; -+ -+ for (rxdRail = &blk->blk_rxds[0], i = 0; i < EP4_NUM_RXD_PER_BLOCK; i++, rxdRail++) -+ { -+ rcvrRail->rcvr_freecount--; -+ -+ ep4_put_ecq (rail, rxdRail->rxd_ecq, EP4_RXD_STEN_CMD_NDWORDS); -+ ep4_put_ecq (rail, rxdRail->rxd_scq, 1); -+ -+ ep4_deregister_intcookie (rail, &rxdRail->rxd_intcookie); -+ -+ list_del (&rxdRail->rxd_generic.Link); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ ep4_release_dma_retries (rail, EP4_NUM_RXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_rxds[0].rxd_main_addr, EP4_RXD_RAIL_MAIN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_rxds[0].rxd_elan_addr, EP4_RXD_RAIL_ELAN_SIZE * EP4_NUM_RXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_RXD_RAIL_BLOCK)); -+} -+ -+static EP4_RXD_RAIL * -+get_rxd_rail (EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = RCVR_TO_SUBSYS(rcvrRail); -+ EP4_RXD_RAIL *rxdRail; -+ unsigned long flags; -+ int low_on_rxds; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ if (list_empty (&rcvrRail->rcvr_freelist)) -+ rxdRail = NULL; -+ else -+ { -+ rxdRail = list_entry (rcvrRail->rcvr_freelist.next, EP4_RXD_RAIL, rxd_generic.Link); -+ -+ EP4_RXD_ASSERT_FREE(rxdRail); -+ -+ list_del (&rxdRail->rxd_generic.Link); -+ -+ rcvrRail->rcvr_freecount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_rxds = (rcvrRail->rcvr_freecount < ep_rxd_lowat); -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ if (low_on_rxds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (rxdRail); -+} -+ -+static void -+free_rxd_rail (EP4_RCVR_RAIL *rcvrRail, EP4_RXD_RAIL *rxdRail) -+{ -+ unsigned long flags; -+ -+ EP4_RXD_ASSERT_FREE(rxdRail); -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ -+ list_add (&rxdRail->rxd_generic.Link, &rcvrRail->rcvr_freelist); -+ -+ rcvrRail->rcvr_freecount++; -+ -+ if (rcvrRail->rcvr_freewaiting) -+ { -+ rcvrRail->rcvr_freewaiting--; -+ kcondvar_wakeupall (&rcvrRail->rcvr_freesleep, &rcvrRail->rcvr_freelock); -+ } -+ -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+} -+ -+static void -+bind_rxd_rail (EP_RXD *rxd, EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rxdRail->rxd_generic.RcvrRail); -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ -+ EPRINTF3 (DBG_RCVR, "%s: bind_rxd_rail: rxd=%p rxdRail=%p\n", rail->r_generic.Name, rxd, rxdRail); -+ -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_rxd), rxd->NmdMain.nmd_addr); /* PCI write */ -+ -+ rxd->RxdRail = &rxdRail->rxd_generic; -+ rxdRail->rxd_generic.Rxd = rxd; -+} -+ -+static void -+unbind_rxd_rail (EP_RXD *rxd, EP4_RXD_RAIL *rxdRail) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ -+ ASSERT (SPINLOCK_HELD (&rxd->Rcvr->Lock)); -+ ASSERT (rxd->RxdRail == &rxdRail->rxd_generic && rxdRail->rxd_generic.Rxd == rxd); -+ -+ EP4_RXD_ASSERT_PRIVATE (rxdRail); -+ -+ EPRINTF3 (DBG_RCVR, "%s: unbind_rxd_rail: rxd=%p rxdRail=%p\n", RCVR_TO_RAIL(rcvrRail)->r_generic.Name, rxd, rxdRail); -+ -+ rxd->RxdRail = NULL; -+ rxdRail->rxd_generic.Rxd = NULL; -+ -+ if (rcvrRail->rcvr_cleanup_waiting) -+ kcondvar_wakeupall (&rcvrRail->rcvr_cleanup_sleep, &rxd->Rcvr->Lock); -+ rcvrRail->rcvr_cleanup_waiting = 0; -+ -+ EP4_RXD_PRIVATE_TO_FREE (rxdRail); -+} -+ -+ -+static void -+rcvr_stall_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ EPRINTF1 (DBG_RCVR, "rcvr_stall_interrupt: rcvrRail %p thread halted\n", rcvrRail); -+ -+ rcvrRail->rcvr_thread_halted = 1; -+ -+ kcondvar_wakeupall (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+rcvr_stall_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ sdramaddr_t qdesc = ((EP4_COMMS_RAIL *) commsRail)->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ E4_uint64 qbptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ -+ /* Mark the queue as full by writing the fptr */ -+ if (qbptr == (rcvrRail->rcvr_slots_addr + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1))) -+ elan4_sdram_writeq (dev, qdesc + offsetof (E4_InputQueue, q_fptr), rcvrRail->rcvr_slots_addr); -+ else -+ elan4_sdram_writeq (dev, qdesc + offsetof (E4_InputQueue, q_fptr), qbptr + EP_INPUTQ_SIZE); -+ -+ /* Notify the thread that it should stall after processing any outstanding envelopes */ -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), -+ rcvrRail->rcvr_stall_intcookie.int_val); -+ -+ /* Issue a swtevent to the queue event to wake the thread up */ -+ ep4_set_event_cmd (rcvrRail->rcvr_resched, rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent)); -+} -+ -+static void -+rxd_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) arg; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP_RCVR *rcvr = rcvrRail->rcvr_generic.Rcvr; -+ EP4_RXD_RAIL_MAIN *rxdMain = rxdRail->rxd_main; -+ unsigned long delay = 1; -+ EP_RXD *rxd; -+ EP_ENVELOPE *env; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ -+ for (;;) -+ { -+ if (rxdMain->rxd_done == EP4_STATE_FINISHED || rxdMain->rxd_failed == EP4_STATE_FAILED) -+ break; -+ -+ /* The write to rxd_done could be held up in the PCI bridge even though -+ * we've seen the interrupt cookie. Unlike elan3, there is no possibility -+ * of spurious interrupts since we flush the command queues on node -+ * disconnection and the txcallback mechanism */ -+ mb(); -+ -+ if (delay > EP4_EVENT_FIRING_TLIMIT) -+ { -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ EP_ASSFAIL (RCVR_TO_RAIL(rcvrRail), "rxd_interrupt - not finished\n"); -+ return; -+ } -+ DELAY(delay); -+ delay <<= 1; -+ } -+ -+ if (rxdMain->rxd_done != EP4_STATE_FINISHED) -+ { -+ EPRINTF8 (DBG_RETRY, "%s: rxd_interrupt: rxdRail %p retry: done=%d failed=%d NodeId=%d XID=%08x.%08x.%016llx\n", -+ rail->r_generic.Name, rxdRail, (int)rxdMain->rxd_done, (int)rxdMain->rxd_failed, rxdRail->rxd_generic.Rxd->RxdMain->Envelope.NodeId, -+ rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Generation, rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Handle, -+ rxdRail->rxd_generic.Rxd->RxdMain->Envelope.Xid.Unique); -+ -+ spin_lock (&rcvrRail->rcvr_retrylock); -+ -+ rxdRail->rxd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; /* XXXX backoff ? */ -+ -+ list_add_tail (&rxdRail->rxd_retry_link, &rcvrRail->rcvr_retrylist); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, rxdRail->rxd_retry_time); -+ spin_unlock (&rcvrRail->rcvr_retrylock); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ return; -+ } -+ -+ rxd = rxdRail->rxd_generic.Rxd; -+ env = &rxd->RxdMain->Envelope; -+ -+ /* -+ * Note, since the thread will have sent the remote dma packet before copying -+ * the envelope, we must check that it has completed doing this, we do this -+ * by acquiring the spinlock against the thread which it only drops once it's -+ * completed. -+ */ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING) -+ { -+ EP4_SPINENTER (rail->r_ctxt.ctxt_dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), -+ &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ EP4_SPINEXIT (rail->r_ctxt.ctxt_dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), -+ &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ ASSERT (env->Version == EP_ENVELOPE_VERSION && rxd->RxdMain->Len != EP_RXD_PENDING); -+ } -+ -+ EPRINTF8 (DBG_RCVR, "%s: rxd_interrupt: rxd %p finished from %d XID %08x.%08x.%016llx len %d attr %x\n", rail->r_generic.Name, -+ rxd, rxd->RxdMain->Envelope.NodeId, rxd->RxdMain->Envelope.Xid.Generation, rxd->RxdMain->Envelope.Xid.Handle, -+ rxd->RxdMain->Envelope.Xid.Unique, rxd->RxdMain->Len, rxd->RxdMain->Envelope.Attr); -+ -+ rxdMain->rxd_done = EP4_STATE_PRIVATE; -+ rxd->Data.nmd_attr = EP_RAIL2RAILMASK (rail->r_generic.Number); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxd->RxdMain->Len >= 0 && EP_IS_RPC(env->Attr)) -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ else -+ { -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ } -+ -+ if (rxd->RxdMain->Len >= 0) { -+ INC_STAT(rcvrRail->rcvr_generic.stats,rx); -+ ADD_STAT(rcvrRail->rcvr_generic.stats,rx_len,rxd->RxdMain->Len); -+ INC_STAT(rail->r_generic.Stats,rx); -+ ADD_STAT(rail->r_generic.Stats,rx_len,rxd->RxdMain->Len); -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ ep_rxd_received (rxd); -+ -+ break; -+ -+ case EP_RXD_PUT_ACTIVE: -+ case EP_RXD_GET_ACTIVE: -+ rxd->State = EP_RXD_RPC_IN_PROGRESS; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler (rxd); -+ break; -+ -+ case EP_RXD_COMPLETE_ACTIVE: -+ rxd->State = EP_RXD_COMPLETED; -+ -+ /* remove from active list */ -+ list_del (&rxd->Link); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ rxd->Handler(rxd); -+ break; -+ -+ default: -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ printk ("%s: rxd_interrupt: rxd %p in invalid state %d\n", rail->r_generic.Name, rxd, rxd->State); -+ /* NOTREACHED */ -+ } -+} -+ -+static void -+ep4rcvr_flush_filtering (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t qdesc = commsRail->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ E4_Addr qbase = rcvrRail->rcvr_slots_addr; -+ E4_Addr qlast = qbase + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1); -+ E4_uint64 qfptr, qbptr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ /* zip down the input queue and invalidate any envelope we find to a node which is locally passivated */ -+ qfptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_fptr)); -+ qbptr = elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ -+ while (qfptr != qbptr) -+ { -+ unsigned int nodeId = elan4_sdram_readl (dev, rcvrRail->rcvr_slots + (qfptr - qbase) + offsetof (EP_ENVELOPE, NodeId)); -+ -+ EPRINTF3 (DBG_DISCON, "%s: ep4rcvr_flush_filtering: nodeId=%d State=%d\n", rail->r_generic.Name, nodeId, rail->r_generic.Nodes[nodeId].State); -+ -+ if (rail->r_generic.Nodes[nodeId].State == EP_NODE_LOCAL_PASSIVATE) -+ elan4_sdram_writel (dev, rcvrRail->rcvr_slots + (qfptr - qbase) + offsetof (EP_ENVELOPE, Version), 0); -+ -+ if (qfptr != qlast) -+ qfptr += EP_INPUTQ_SIZE; -+ else -+ qfptr = qbase; -+ } -+ -+ /* Insert an setevent command into the thread's command queue -+ * to ensure that all sten packets have completed */ -+ elan4_guard (rcvrRail->rcvr_ecq->ecq_cq, GUARD_ALL_CHANNELS); -+ ep4comms_flush_setevent (commsRail, rcvrRail->rcvr_ecq->ecq_cq); -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+static void -+ep4rcvr_flush_flushing (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ /* remove any sten packates which are retrying to nodes which are being passivated */ -+ spin_lock_irqsave (&rcvrRail->rcvr_retrylock, flags); -+ list_for_each_safe (el, nel, &rcvrRail->rcvr_retrylist) { -+ EP4_RXD_RAIL *rxdRail = list_entry (el, EP4_RXD_RAIL, rxd_retry_link); -+ EP_ENVELOPE *env = &rxdRail->rxd_generic.Rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF2 (DBG_XMTR, "%s; ep4rcvr_flush_flushing: removing rxdRail %p from retry list\n", rail->r_generic.Name, rxdRail); -+ -+ list_del (&rxdRail->rxd_retry_link); -+ } -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_retrylock, flags); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL (rxdRail, rcvrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF5 (DBG_DISCON, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p state %d elan node %d\n", -+ rail->r_generic.Name, rcvr, rxd, (int)rxdRail->rxd_main->rxd_done, env->NodeId); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_flush_flushing: rxd state is free but bound to a fail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - passive\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ default: -+ EP4_ASSERT (rail, EP_IS_RPC(env->Attr)); -+ -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete RPC */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - active\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP_INVALIDATE_XID (rxd->MsgXid); /* Ignore any previous NMD map responses */ -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_flush_flushing: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_flush_flushing: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_flush_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL(rcvrRail); -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ ep4rcvr_flush_filtering (rcvr, rcvrRail); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ ep4rcvr_flush_flushing (rcvr, rcvrRail); -+ break; -+ } -+} -+ -+void -+ep4rcvr_failover_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = rcvr->Subsys; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#if SUPPORT_RAIL_FAILOVER -+ EP_SYS *sys = subsys->Subsys.Sys; -+#endif -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+#if SUPPORT_RAIL_FAILOVER -+ EP_NODE *node = &sys->Nodes[env->NodeId]; -+ EP_MANAGER_MSG_BODY msgBody; -+#endif -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ EPRINTF5 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rcvr %p rxd %p elan node %d state %d\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId, (int)rxdRail->rxd_main->rxd_done); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_failover_callback: rxd state is free but bound to a rail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP4_RXD_FORCE_PRIVATE(rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ EP4_ASSERT (rail, EP_IS_RPC(env->Attr)); -+ -+#if SUPPORT_RAIL_FAILOVER -+ /* XXXX - no rail failover for now .... */ -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE && !EP_IS_NO_FAILOVER(env->Attr)) /* incomplete RPC, which can be failed over */ -+ { -+ EPRINTF6 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rxd %p State %d Xid %llxx MsgXid %llxx nodeId %d - failover\n", -+ rail->r_generic.Name, rxd, rxd->State, env->Xid.Unique, rxd->MsgXid.Unique, env->NodeId); -+ -+ if (EP_XID_INVALID(rxd->MsgXid)) -+ rxd->MsgXid = ep_xid_cache_alloc (sys, &rcvr->XidCache); -+ -+ /* XXXX maybe only send the message if the node failover retry is now ? */ -+ msgBody.Failover.Xid = env->Xid; -+ msgBody.Failover.Railmask = node->ConnectedRails; -+ -+ ep_send_message (&rail->r_generic, env->NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST, rxd->MsgXid, &msgBody); -+ -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ continue; -+ } -+#endif -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_failover_callback: rxd state is aborted but bound to a fail\n"); -+ break; -+ } -+ EPRINTF3 (DBG_FAILOVER, "%s: ep4rcvr_failover_callback: rxd %p nodeId %d - finished\n", rail->r_generic.Name, rxd, env->NodeId); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_disconnect_callback (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head rxdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&rxdList); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[env->NodeId]; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ EPRINTF5 (DBG_DISCON, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p elan node %d state %x\n", rail->r_generic.Name, rcvr, rxd, env->NodeId, rxd->State); -+ -+ switch (rxd->State) -+ { -+ case EP_RXD_FREE: -+ printk ("ep4rcvr_disconnect_callback: rxd state is free but bound to a rail\n"); -+ break; -+ -+ case EP_RXD_RECEIVE_ACTIVE: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE) /* incomplete message receive */ -+ { -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - unbind\n", rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* remark it as pending if it was partially received */ -+ rxd->RxdMain->Len = EP_RXD_PENDING; -+ -+ /* epcomms thread will requeue on different rail */ -+ ep_kthread_schedule (&rcvr->Subsys->Thread, lbolt); -+ continue; -+ } -+ break; -+ -+ default: -+ if (rxdRail->rxd_main->rxd_done == EP4_STATE_ACTIVE || rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE) /* incomplete RPC */ -+ { -+ EPRINTF5 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d state %x - not able to failover\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId, rxd->State); -+ -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ -+ /* Ignore any previous NMD/failover responses */ -+ EP_INVALIDATE_XID (rxd->MsgXid); -+ -+ /* Remove from active list */ -+ list_del (&rxd->Link); -+ -+ if (rxd->State == EP_RXD_RPC_IN_PROGRESS) /* ownder by user .... */ -+ rxd->State = EP_RXD_BEEN_ABORTED; -+ else /* queue for completion */ -+ { -+ rxd->RxdMain->Len = EP_CONN_RESET; /* ensure ep_rxd_status() fails */ -+ list_add_tail (&rxd->Link, &rxdList); -+ } -+ continue; -+ } -+ break; -+ -+ case EP_RXD_BEEN_ABORTED: -+ printk ("ep4rcvr_disconnect_callback: rxd state is aborted but bound to a rail\n"); -+ break; -+ } -+ -+ printk ("%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ EPRINTF4 (DBG_RCVR, "%s: ep4rcvr_disconnect_callback: rcvr %p rxd %p nodeId %d - finished\n", -+ rail->r_generic.Name, rcvr, rxd, env->NodeId); -+ ep4rcvr_display_rxd (&di_ep_debug, &rxdRail->rxd_generic); -+ } -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ while (! list_empty (&rxdList)) -+ { -+ EP_RXD *rxd = list_entry (rxdList.next, EP_RXD, Link); -+ -+ list_del (&rxd->Link); -+ -+ rxd->Handler (rxd); -+ } -+} -+ -+void -+ep4rcvr_neterr_flush (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ /* Insert an setevent command into the thread's command queue -+ * to ensure that all sten packets have completed */ -+ elan4_guard (rcvrRail->rcvr_ecq->ecq_cq, GUARD_ALL_CHANNELS); -+ ep4comms_flush_setevent (commsRail, rcvrRail->rcvr_ecq->ecq_cq); -+ -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+void -+ep4rcvr_neterr_check (EP_RCVR *rcvr, EP4_RCVR_RAIL *rcvrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ EP4_SPINENTER (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ -+ if (rxd->RxdMain->Len == EP_RXD_PENDING || !RXD_BOUND2RAIL(rxdRail,rcvrRail) || env->NodeId != nodeId) -+ continue; -+ -+ if (rxd->State == EP_RXD_RECEIVE_ACTIVE || rxd->State == EP_RXD_GET_ACTIVE) -+ { -+ EP_NETERR_COOKIE cookie; -+ unsigned int first, this; -+ -+ if (rxd->State == EP_RXD_RECEIVE_ACTIVE) -+ first = (EP_MAXFRAG+1) - (( EP_IS_MULTICAST(env->Attr) ? 1 : 0) + (env->nFrags == 0 ? 1 : env->nFrags)); -+ else -+ first = (EP_MAXFRAG+1) - rxd->nFrags; -+ -+ for (this = first; this < (EP_MAXFRAG+1); this++) -+ if (rxdRail->rxd_main->rxd_sent[this] == EP4_STATE_ACTIVE) -+ break; -+ -+ if (this > first) -+ { -+ /* Look at the last completed STEN packet and if it's neterr cookie matches, then change -+ * the rxd to look the same as if the sten packet had failed and then schedule it for retry */ -+ cookie = elan4_sdram_readq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[--this].c_cookie)); -+ -+ if (cookie == cookies[0] || cookie == cookies[1]) -+ { -+ EPRINTF5 (DBG_NETWORK_ERROR, "%s: ep4rcvr_neterr_check: cookie <%lld%s%s%s%s> matches rxd %p rxdRail %p this %d\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(cookie), rxd, rxdRail, this); -+ -+ printk ("%s: ep4rcvr_neterr_check: cookie <%lld%s%s%s%s> matches rxd %p rxdRail %p this %d : time %ld\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(cookie), rxd, rxdRail, this, rxdRail->rxd_retry_time); -+ -+ rxdRail->rxd_main->rxd_sent[this] = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_FAILED; -+ -+ spin_lock (&rcvrRail->rcvr_retrylock); -+ -+ ASSERT (rxdRail->rxd_retry_time == 0); -+ -+ rxdRail->rxd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; -+ -+ list_add_tail (&rxdRail->rxd_retry_link, &rcvrRail->rcvr_retrylist); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, rxdRail->rxd_retry_time); -+ -+ spin_unlock (&rcvrRail->rcvr_retrylock); -+ } -+ } -+ } -+ } -+ EP4_SPINEXIT (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), &rcvrRail->rcvr_main->rcvr_thread_lock); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+} -+ -+int -+ep4rcvr_queue_rxd (EP_RXD *rxd, EP_RCVR_RAIL *r) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_RXD_RAIL *rxdRail; -+ register int i; -+ -+ ASSERT (SPINLOCK_HELD(&rxd->Rcvr->Lock)); -+ -+ if ((rxdRail = get_rxd_rail (rcvrRail)) == NULL) -+ return 0; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ EPRINTF6 (DBG_RCVR, "%s: ep4rcvr_queue_rxd: rcvr %p rxd %p rxdRail %p buffer %x len %x\n", -+ rail->r_generic.Name, rxd->Rcvr, rxd, rxdRail, rxd->Data.nmd_addr, rxd->Data.nmd_len); -+ -+ /* bind the rxdRail and rxd together */ -+ bind_rxd_rail (rxd, rxdRail); -+ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_addr), rxd->Data.nmd_addr); /* PCI write */ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_len), rxd->Data.nmd_len); /* PCI write */ -+ elan4_sdram_writel (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_buffer.nmd_attr), rxd->Data.nmd_attr); /* PCI write */ -+ -+ /* Mark as active */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x00, /* %r0 */ -+ ep_symbol (&rail->r_threadcode, "c_queue_rxd")); -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x10, /* %r2 */ -+ rcvrRail->rcvr_elan_addr); -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0]) + 0x18, /* %r3 */ -+ rxdRail->rxd_elan_addr); -+ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_START_CMD_NDWORDS)); -+ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_start)); -+ -+ return 1; -+} -+ -+void -+ep4rcvr_rpc_put (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags; -+ EP4_RXD_DMA_CMD cmd; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_PUT_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = local->nmd_addr; -+ cmd.c_dma_dstAddr = remote->nmd_addr; -+ if (i == (nFrags-1)) -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done); -+ else -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]); -+ cmd.c_dma_dstEvent = 0; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_put: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i]), sizeof (EP4_RXD_DMA_CMD)); -+ } -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags-1; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ ASSERT (rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep4rcvr_rpc_get (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_GET_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_get rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, i, remote->nmd_addr, local->nmd_addr, remote->nmd_len); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_open), -+ OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(env->NodeId))); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_trans), -+ SEND_TRANS_CMD | ((TR_REMOTEDMA | TR_WAIT_FOR_EOP) << 16)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_cookie), -+ ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_STEN); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_typeSize), -+ E4_DMA_TYPE_SIZE (local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_cookie), -+ ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_vproc), -+ EP_VP_DATA (rail->r_generic.Position.pos_nodeid)); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_srcAddr), -+ remote->nmd_addr); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_dstAddr), -+ local->nmd_addr); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_srcEvent), -+ 0); -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i].c_dma_dstEvent), -+ i == (nFrags-1) ? rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done) : -+ rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i])); -+ } -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags-1; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ ASSERT (rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, RPCPut, len); -+} -+ -+void -+ep4rcvr_rpc_complete (EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rxdRail->rxd_generic.RcvrRail; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = RCVR_TO_DEV (rcvrRail); -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP_ENVELOPE *env = &rxd->RxdMain->Envelope; -+ unsigned long first = (EP_MAXFRAG+1) - nFrags - 1; -+ EP4_RXD_DMA_CMD cmd; -+ register int i, len; -+ -+ EP4_ASSERT (rail, rxd->State == EP_RXD_COMPLETE_ACTIVE); -+ EP4_ASSERT (rail, rxdRail->rxd_main->rxd_done == EP4_STATE_PRIVATE); -+ EP4_SDRAM_ASSERT (rail, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Generate the DMA chain to put the data */ -+ for (i = 0, len = 0; i < nFrags; i++, len += local->nmd_len, local++, remote++) -+ { -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(local->nmd_len, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = local->nmd_addr; -+ cmd.c_dma_dstAddr = remote->nmd_addr; -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]); -+ cmd.c_dma_dstEvent = 0; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF7 (DBG_RCVR, "%s: ep4rcvr_rpc_complete: rxd %p [XID=%llx] idx=%d Source=%08x Dest=%08x Len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, i, local->nmd_addr, remote->nmd_addr, local->nmd_len); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[first + i]), sizeof (EP4_RXD_DMA_CMD)); -+ } -+ -+ /* Initialise the status block dma */ -+ cmd.c_dma_typeSize = RUN_DMA_CMD | E4_DMA_TYPE_SIZE(EP_STATUSBLK_SIZE, DMA_DataTypeByte, 0, EP4_DMA_RETRYCOUNT); -+ cmd.c_dma_cookie = ep4_neterr_cookie (rail, env->NodeId) | EP4_COOKIE_DMA; -+ cmd.c_dma_vproc = EP_VP_DATA(env->NodeId); -+ cmd.c_dma_srcAddr = rxd->NmdMain.nmd_addr + offsetof (EP_RXD_MAIN, StatusBlk); -+ cmd.c_dma_dstAddr = env->TxdMain.nmd_addr + offsetof (EP_TXD_MAIN, StatusBlk); -+ cmd.c_dma_srcEvent = rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_done); -+ cmd.c_dma_dstEvent = env->TxdRail + offsetof (EP4_TXD_RAIL_ELAN, txd_done);; -+ cmd.c_nop_cmd = NOP_CMD; -+ -+ EPRINTF6 (DBG_RCVR, "%s: ep4rcvr_rpc_complete: rxd %p [XID=%llx] statusblk source=%08x dest=%08x len=%x\n", -+ rail->r_generic.Name, rxd, env->Xid.Unique, (int) cmd.c_dma_srcAddr, (int) cmd.c_dma_dstAddr, EP_STATUSBLK_SIZE); -+ -+ elan4_sdram_copyq_to_sdram (dev, &cmd, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[EP_MAXFRAG]), sizeof (EP4_RXD_DMA_CMD)); -+ -+ /* Initialise the event chain */ -+ for (i = 0; i < nFrags; i++) -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first + i]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ for (i = 0; i <= EP_MAXFRAG; i++) -+ rxdRail->rxd_main->rxd_sent[i] = EP4_STATE_ACTIVE; -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ rxdRail->rxd_main->rxd_done = EP4_STATE_ACTIVE; -+ -+ /* Initialise the previous event to start the whole chain off */ -+ elan4_sdram_writeq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1]), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_DMA_CMD_NDWORDS)); -+ -+ ASSERT (rail->r_generic.Nodes[env->NodeId].State >= EP_NODE_CONNECTED && rail->r_generic.Nodes[env->NodeId].State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ /* finally issue the setevent to start the whole chain */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ -+ BucketStat (rxd->Rcvr->Subsys, CompleteRPC, len); -+} -+ -+EP_RXD * -+ep4rcvr_steal_rxd (EP_RCVR_RAIL *r) -+{ -+ /* XXXX - TBD */ -+ return NULL; -+} -+ -+long -+ep4rcvr_check (EP_RCVR_RAIL *r, long nextRunTime) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ -+ if (rcvrRail->rcvr_freecount < ep_rxd_lowat && !alloc_rxd_block (rcvrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow rxd rail pool\n", rail->r_generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return nextRunTime; -+} -+ -+unsigned long -+ep4rcvr_retry (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) arg; -+ ELAN4_DEV *dev = RCVR_TO_DEV(rcvrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_retrylock, flags); -+ while (! list_empty (&rcvrRail->rcvr_retrylist)) -+ { -+ EP4_RXD_RAIL *rxdRail = list_entry (rcvrRail->rcvr_retrylist.next, EP4_RXD_RAIL, rxd_retry_link); -+ EP_ENVELOPE *env = &rxdRail->rxd_generic.Rxd->RxdMain->Envelope; -+ unsigned int first = (EP_MAXFRAG+1) - ((env->Attr & EP_MULTICAST ? 1 : 0) + (env->nFrags == 0 ? 1 : env->nFrags)); -+ -+ if (BEFORE (lbolt, rxdRail->rxd_retry_time)) -+ { -+ if (nextRunTime == 0 || AFTER (nextRunTime, rxdRail->rxd_retry_time)) -+ nextRunTime = rxdRail->rxd_retry_time; -+ -+ break; -+ } -+ -+ list_del (&rxdRail->rxd_retry_link); -+ rxdRail->rxd_retry_time = 0; -+ -+ /* determine which sten packet to resubmit */ -+ for (; first < (EP_MAXFRAG+1); first++) -+ if (rxdRail->rxd_main->rxd_sent[first] == EP4_STATE_ACTIVE) -+ break; -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4rcvr_retry: rxdRail %p, reissuing sten[%d]\n", rail->r_generic.Name, rxdRail, first); -+ -+ /* re-initialise the fail event */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ rxdRail->rxd_main->rxd_failed = EP4_STATE_ACTIVE; -+ -+ /* re-initialise the chain event to resubmit this sten packet */ -+ elan4_sdram_writeq (dev, rxdRail->rxd_elan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first-1].ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_RXD_STEN_CMD_NDWORDS)); -+ -+ /* finally issue the setevent to start the chain again */ -+ ep4_set_event_cmd (rxdRail->rxd_scq, rxdRail->rxd_elan_addr + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[first - 1])); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_retrylock, flags); -+ -+ return nextRunTime; -+} -+ -+void -+ep4rcvr_add_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t qdescs = ((EP4_COMMS_RAIL *) commsRail)->r_descs; -+ EP4_RCVR_RAIL *rcvrRail; -+ E4_InputQueue qdesc; -+ E4_ThreadRegs tregs; -+ sdramaddr_t stack; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (rcvrRail, EP4_RCVR_RAIL *, sizeof (EP4_RCVR_RAIL), 1); -+ -+ spin_lock_init (&rcvrRail->rcvr_freelock); -+ INIT_LIST_HEAD (&rcvrRail->rcvr_freelist); -+ INIT_LIST_HEAD (&rcvrRail->rcvr_blocklist); -+ -+ kcondvar_init (&rcvrRail->rcvr_cleanup_sleep); -+ kcondvar_init (&rcvrRail->rcvr_freesleep); -+ -+ INIT_LIST_HEAD (&rcvrRail->rcvr_retrylist); -+ spin_lock_init (&rcvrRail->rcvr_retrylock); -+ -+ rcvrRail->rcvr_generic.CommsRail = commsRail; -+ rcvrRail->rcvr_generic.Rcvr = rcvr; -+ -+ rcvrRail->rcvr_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_RCVR_RAIL_MAIN), 0, &rcvrRail->rcvr_main_addr); -+ rcvrRail->rcvr_elan = ep_alloc_elan (&rail->r_generic, sizeof (EP4_RCVR_RAIL_ELAN), 0, &rcvrRail->rcvr_elan_addr); -+ rcvrRail->rcvr_slots = ep_alloc_elan (&rail->r_generic, EP_INPUTQ_SIZE * rcvr->InputQueueEntries, 0, &rcvrRail->rcvr_slots_addr); -+ stack = ep_alloc_elan (&rail->r_generic, EP4_STACK_SIZE, 0, &rcvrRail->rcvr_stack); -+ -+ /* allocate a command queue for the thread to use, plus space for it to wait/reschedule */ -+ rcvrRail->rcvr_ecq = ep4_alloc_ecq (rail, CQ_Size64K); -+ rcvrRail->rcvr_resched = ep4_get_ecq (rail, EP4_ECQ_ATOMIC, 8); -+ -+ ep4_register_intcookie (rail, &rcvrRail->rcvr_stall_intcookie, rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), -+ rcvr_stall_interrupt, rcvrRail); -+ -+ /* Initialise the elan portion */ -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent.ev_CountAndType), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_halt.ev_CountAndType), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_tailp), -+ rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head)); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_stall_intcookie), 0); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qbase), rcvrRail->rcvr_slots_addr); -+ elan4_sdram_writeq (dev, rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qlast), -+ rcvrRail->rcvr_slots_addr + EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1)); -+ -+ /* Initialise the main memory portion */ -+ rcvrRail->rcvr_main->rcvr_thread_lock = 0; -+ -+ /* Install our retry handler */ -+ rcvrRail->rcvr_retryops.op_func = ep4rcvr_retry; -+ rcvrRail->rcvr_retryops.op_arg = rcvrRail; -+ -+ ep4_add_retry_ops (rail, &rcvrRail->rcvr_retryops); -+ -+ /* Update the queue desriptor */ -+ qdesc.q_bptr = rcvrRail->rcvr_slots_addr; -+ qdesc.q_fptr = rcvrRail->rcvr_slots_addr; -+ qdesc.q_control = E4_InputQueueControl (rcvrRail->rcvr_slots_addr, rcvrRail->rcvr_slots_addr + (EP_INPUTQ_SIZE * (rcvr->InputQueueEntries-1)), EP_INPUTQ_SIZE); -+ qdesc.q_event = rcvrRail->rcvr_elan_addr + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent); -+ -+ ep4_write_qdesc (rail, qdescs + (rcvr->Service * EP_QUEUE_DESC_SIZE), &qdesc); -+ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->r_generic.Number] = &rcvrRail->rcvr_generic; -+ rcvr->RailMask |= EP_RAIL2RAILMASK (rail->r_generic.Number); -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ { -+ sdramaddr_t stackTop = stack + EP4_STACK_SIZE; -+ E4_Addr stackTopAddr = rcvrRail->rcvr_stack + EP4_STACK_SIZE; -+ -+ ep4_init_thread (rail, &tregs, stackTop, stackTopAddr, ep_symbol (&rail->r_threadcode, "ep4comms_rcvr"), 6, -+ (E4_uint64) rail->r_elan_addr, (E4_uint64) rcvrRail->rcvr_elan_addr, (E4_uint64) rcvrRail->rcvr_main_addr, -+ (E4_uint64) EP_MSGQ_ADDR(rcvr->Service), (E4_uint64) rcvrRail->rcvr_ecq->ecq_addr, (E4_uint64) rcvrRail->rcvr_resched->ecq_addr); -+ } -+ -+ /* Issue the command to the threads private command queue */ -+ elan4_run_thread_cmd (rcvrRail->rcvr_ecq->ecq_cq, &tregs); -+ -+ ep_procfs_rcvr_add_rail(&(rcvrRail->rcvr_generic)); -+} -+ -+void -+ep4rcvr_del_rail (EP_RCVR *rcvr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) rcvr->Rails[rail->r_generic.Number]; -+ ELAN4_HALTOP haltop; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ ep_procfs_rcvr_del_rail(&(rcvrRail->rcvr_generic)); -+ -+ /* Run a halt operation to mark the input queue as full and -+ * request the thread to halt */ -+ haltop.op_mask = INT_DiscardingHighPri | INT_TProcHalted; -+ haltop.op_function = rcvr_stall_haltop; -+ haltop.op_arg = rcvrRail; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &haltop); -+ -+ /* Wait for the thread to tell us it's processed the input queue */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ while (! rcvrRail->rcvr_thread_halted) -+ kcondvar_wait (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock, &flags); -+ rcvrRail->rcvr_thread_halted = 0; -+ -+ /* flag the rail as no longer available */ -+ rcvr->RailMask &= ~EP_RAIL2RAILMASK (rail->r_generic.Number); -+ -+ /* wait for all active communications to terminate */ -+ for (;;) -+ { -+ int mustWait = 0; -+ -+ list_for_each (el, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail) && rxd->RxdMain->Len != EP_RXD_PENDING) -+ { -+ mustWait++; -+ break; -+ } -+ } -+ -+ if (! mustWait) -+ break; -+ -+ rcvrRail->rcvr_cleanup_waiting++; -+ kcondvar_wait (&rcvrRail->rcvr_cleanup_sleep, &rcvr->Lock, &flags); -+ } -+ -+ /* at this point all rxd's in the list that are bound to the deleting rail are pending */ -+ list_for_each_safe (el, nel, &rcvr->ActiveDescList) { -+ EP_RXD *rxd = list_entry (el, EP_RXD, Link); -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) rxd->RxdRail; -+ -+ if (rxdRail && RXD_BOUND2RAIL (rxdRail, rcvrRail)) -+ { -+ EP4_RXD_ASSERT_PENDING (rxdRail); -+ EP4_RXD_FORCE_PRIVATE (rxdRail); -+ -+ unbind_rxd_rail (rxd, rxdRail); -+ free_rxd_rail (rcvrRail, rxdRail); -+ } -+ } -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* wait for all rxd's for this rail to become free */ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ while (rcvrRail->rcvr_freecount != rcvrRail->rcvr_totalcount) -+ { -+ rcvrRail->rcvr_freewaiting++; -+ kcondvar_wait (&rcvrRail->rcvr_freesleep, &rcvrRail->rcvr_freelock, &flags); -+ } -+ spin_unlock_irqrestore (&rcvrRail->rcvr_freelock, flags); -+ -+ /* can now remove the rail as it can no longer be used */ -+ spin_lock_irqsave (&rcvr->Lock, flags); -+ rcvr->Rails[rail->r_generic.Number] = NULL; -+ spin_unlock_irqrestore (&rcvr->Lock, flags); -+ -+ /* all the rxd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (rcvrRail->rcvr_totalcount == rcvrRail->rcvr_freecount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&rcvrRail->rcvr_blocklist)) -+ free_rxd_block (rcvrRail, list_entry(rcvrRail->rcvr_blocklist.next, EP4_RXD_RAIL_BLOCK , blk_link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((rcvrRail->rcvr_totalcount == 0) && (rcvrRail->rcvr_totalcount == rcvrRail->rcvr_freecount)); -+ -+ ep4_remove_retry_ops (rail, &rcvrRail->rcvr_retryops); -+ -+ ep4_deregister_intcookie (rail, &rcvrRail->rcvr_stall_intcookie); -+ -+ ep4_put_ecq (rail, rcvrRail->rcvr_resched, 8); -+ ep4_free_ecq (rail, rcvrRail->rcvr_ecq); -+ -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_stack, EP4_STACK_SIZE); -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_slots_addr, EP_INPUTQ_SIZE * rcvr->InputQueueEntries); -+ ep_free_elan (&rail->r_generic, rcvrRail->rcvr_elan_addr, sizeof (EP4_RCVR_RAIL_ELAN)); -+ ep_free_main (&rail->r_generic, rcvrRail->rcvr_main_addr, sizeof (EP4_RCVR_RAIL_MAIN)); -+ -+ KMEM_FREE (rcvrRail, sizeof (EP4_RCVR_RAIL)); -+} -+ -+void -+ep4rcvr_display_rxd (DisplayInfo *di, EP_RXD_RAIL *r) -+{ -+ EP4_RXD_RAIL *rxdRail = (EP4_RXD_RAIL *) r; -+ sdramaddr_t rxdElan = rxdRail->rxd_elan; -+ EP4_RAIL *rail = RCVR_TO_RAIL (rxdRail->rxd_generic.RcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ int i; -+ -+ (di->func)(di->arg, " Rail %d rxd %p elan %lx(%x) main %p(%x) ecq %d scq %d debug %llx\n", rail->r_generic.Number, -+ rxdRail, rxdRail->rxd_elan, rxdRail->rxd_elan_addr, rxdRail->rxd_main, rxdRail->rxd_main_addr, -+ elan4_cq2num(rxdRail->rxd_ecq->ecq_cq), elan4_cq2num(rxdRail->rxd_scq->ecq_cq), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_debug))); -+ (di->func)(di->arg, " start %016llx %016llx %016llx [%016llx %016llx]\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_start.ev_Params[1])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0].c_cookie)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[0].c_dma_cookie))); -+ -+ for (i = 0; i < EP_MAXFRAG; i++) -+ (di->func)(di->arg, " chain[%d] %016llx %016llx %016llx [%016llx %016llx]\n", i, -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_chain[i].ev_Params[1])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[i+1].c_cookie)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_sten[i+1].c_dma_cookie))); -+ (di->func)(di->arg, " done %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_done.ev_Params[1])), -+ rxdRail->rxd_main->rxd_done); -+ (di->func)(di->arg, " fail %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_CountAndType)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_Params[0])), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_failed.ev_Params[1])), -+ rxdRail->rxd_main->rxd_failed); -+ (di->func)(di->arg, " next %016llx queued %016llx main %016llx\n", -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_next)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_queued)), -+ elan4_sdram_readq (dev, rxdElan + offsetof (EP4_RXD_RAIL_ELAN, rxd_main))); -+ (di->func)(di->arg, " sent %016llx %016llx %016llx %016llx %016llx\n", -+ rxdRail->rxd_main->rxd_sent[0], rxdRail->rxd_main->rxd_sent[1], rxdRail->rxd_main->rxd_sent[2], -+ rxdRail->rxd_main->rxd_sent[3], rxdRail->rxd_main->rxd_sent[4]); -+} -+ -+void -+ep4rcvr_display_rcvr (DisplayInfo *di, EP_RCVR_RAIL *r) -+{ -+ EP_RCVR *rcvr = r->Rcvr; -+ EP4_RCVR_RAIL *rcvrRail = (EP4_RCVR_RAIL *) r; -+ EP4_COMMS_RAIL *commsRail = RCVR_TO_COMMS(rcvrRail); -+ EP4_RAIL *rail = RCVR_TO_RAIL (rcvrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ sdramaddr_t rcvrElan = rcvrRail->rcvr_elan; -+ sdramaddr_t qdesc = commsRail->r_descs + (rcvr->Service * EP_QUEUE_DESC_SIZE); -+ sdramaddr_t event = rcvrRail->rcvr_elan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_qevent); -+ unsigned int freeCount = 0; -+ unsigned int blockCount = 0; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rcvrRail->rcvr_freelock, flags); -+ list_for_each (el, &rcvrRail->rcvr_freelist) -+ freeCount++; -+ list_for_each (el, &rcvrRail->rcvr_blocklist) -+ blockCount++; -+ spin_unlock_irqrestore(&rcvrRail->rcvr_freelock, flags); -+ -+ (di->func)(di->arg, " Rail %d elan %lx(%x) main %p(%x) ecq %d resched %d debug %llx\n", -+ rail->r_generic.Number, rcvrRail->rcvr_elan, rcvrRail->rcvr_elan_addr, -+ rcvrRail->rcvr_main, rcvrRail->rcvr_main_addr, elan4_cq2num(rcvrRail->rcvr_ecq->ecq_cq), -+ elan4_cq2num (rcvrRail->rcvr_resched->ecq_cq), -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_debug))); -+ (di->func)(di->arg, " free %d (%d) total %d blocks %d\n", -+ rcvrRail->rcvr_freecount, freeCount, rcvrRail->rcvr_totalcount, blockCount); -+ (di->func)(di->arg, " spinlock %016llx %016llx\n", rcvrRail->rcvr_main->rcvr_thread_lock, -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_thread_lock))); -+ (di->func)(di->arg, " queue: bptr %016llx fptr %016llx control %016llx (base %lx %x)\n", -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_bptr)), -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_fptr)), -+ elan4_sdram_readq (dev, qdesc + offsetof (E4_InputQueue, q_control)), -+ rcvrRail->rcvr_slots, rcvrRail->rcvr_slots_addr); -+ (di->func)(di->arg, " event %016llx %016llx %016llx\n", -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_CountAndType)), -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_Params[0])), -+ elan4_sdram_readq (dev, event + offsetof (E4_Event32, ev_Params[1]))); -+ (di->func)(di->arg, " pending_tailp %016llx pending_head %016llx\n", -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_tailp)), -+ elan4_sdram_readq (dev, rcvrElan + offsetof (EP4_RCVR_RAIL_ELAN, rcvr_pending_head))); -+} -+ -+void -+ep4rcvr_fillout_rail_stats(EP_RCVR_RAIL *rcvr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP4_RCVR_RAIL * ep4rcvr_rail = (EP4_RCVR_RAIL *) rcvr_rail; */ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsTx.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcommsTx.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsTx.c 2005-07-28 14:52:52.876674480 -0400 -@@ -0,0 +1,919 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx.c,v 1.25.2.5 2004/12/09 10:02:42 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+ -+unsigned int ep_txd_lowat = 5; -+ -+static int -+AllocateTxdBlock (EP_XMTR *xmtr, EP_ATTRIBUTE attr, EP_TXD **txdp) -+{ -+ EP_TXD_BLOCK *blk; -+ EP_TXD *txd; -+ EP_TXD_MAIN *pTxdMain; -+ int i; -+ unsigned long flags; -+ -+ EPRINTF1 (DBG_XMTR, "AllocateTxdBlock: xmtr=%p\n", xmtr); -+ -+ KMEM_ZALLOC (blk, EP_TXD_BLOCK *, sizeof (EP_TXD_BLOCK), ! (attr & EP_NO_SLEEP)); -+ -+ if (blk == NULL) -+ return -ENOMEM; -+ -+ if ((pTxdMain = ep_shared_alloc_main (xmtr->Subsys->Subsys.Sys, EP_TXD_MAIN_SIZE * EP_NUM_TXD_PER_BLOCK, attr, &blk->NmdMain)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP_TXD_BLOCK)); -+ return -ENOMEM; -+ } -+ -+ for (txd = &blk->Txd[0], i = 0; i < EP_NUM_TXD_PER_BLOCK; i++, txd++) -+ { -+ txd->Xmtr = xmtr; -+ txd->TxdMain = pTxdMain; -+ -+ ep_nmd_subset (&txd->NmdMain, &blk->NmdMain, (i * EP_TXD_MAIN_SIZE), EP_TXD_MAIN_SIZE); -+ -+ /* move onto next descriptor */ -+ pTxdMain = (EP_TXD_MAIN *) ((unsigned long) pTxdMain + EP_TXD_MAIN_SIZE); -+ } -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &xmtr->DescBlockList); -+ xmtr->TotalDescCount += EP_NUM_TXD_PER_BLOCK; -+ -+ for (i = txdp ? 1 : 0; i < EP_NUM_TXD_PER_BLOCK; i++) -+ { -+ list_add (&blk->Txd[i].Link, &xmtr->FreeDescList); -+ -+ xmtr->FreeDescCount++; -+ -+ if (xmtr->FreeDescWanted) -+ { -+ xmtr->FreeDescWanted--; -+ kcondvar_wakeupone (&xmtr->FreeDescSleep, &xmtr->FreeDescLock); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (txdp) -+ *txdp = &blk->Txd[0]; -+ -+ return 0; -+} -+ -+static void -+FreeTxdBlock (EP_XMTR *xmtr, EP_TXD_BLOCK *blk) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ list_del (&blk->Link); -+ -+ xmtr->TotalDescCount -= EP_NUM_RXD_PER_BLOCK; -+ xmtr->FreeDescCount -= EP_NUM_RXD_PER_BLOCK; -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ ep_shared_free_main (xmtr->Subsys->Subsys.Sys, &blk->NmdMain); -+ KMEM_FREE (blk, sizeof (EP_TXD_BLOCK)); -+} -+ -+static EP_TXD * -+GetTxd (EP_XMTR *xmtr, EP_ATTRIBUTE attr) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_TXD *txd; -+ int low_on_txds; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ while (list_empty (&xmtr->FreeDescList)) -+ { -+ if (! (attr & EP_NO_ALLOC)) -+ { -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (AllocateTxdBlock (xmtr, attr, &txd) == ESUCCESS) -+ return (txd); -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ } -+ -+ if (attr & EP_NO_SLEEP) -+ { -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ return (NULL); -+ } -+ -+ xmtr->FreeDescWanted++; -+ kcondvar_wait (&xmtr->FreeDescSleep, &xmtr->FreeDescLock, &flags); -+ } -+ -+ txd = list_entry (xmtr->FreeDescList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (--xmtr->FreeDescCount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (txd); -+} -+ -+void -+FreeTxd (EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ -+ list_add (&txd->Link, &xmtr->FreeDescList); -+ -+ xmtr->FreeDescCount++; -+ -+ if (xmtr->FreeDescWanted) /* someone waiting for a receive */ -+ { /* descriptor, so wake them up */ -+ xmtr->FreeDescWanted--; -+ kcondvar_wakeupone (&xmtr->FreeDescSleep, &xmtr->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+} -+ -+int -+TxdShouldStabalise (EP_TXD_RAIL *txdRail, EP_RAIL *rail) -+{ -+ EP_TXD *txd = txdRail->Txd; -+ EP_XMTR *xmtr = txd->Xmtr; -+ EP_ATTRIBUTE attr = txd->Envelope.Attr; -+ int stabilise; -+ extern int txd_stabilise; -+ -+ switch (EP_ATTR2TYPE (attr)) -+ { -+ case EP_TYPE_SVC_INDICATOR: /* is the rail in the current service indicator rail mask */ -+ if ((txd_stabilise & 4) == 0) -+ return 0; -+ -+ stabilise = (ep_xmtr_svc_indicator_railmask (xmtr, EP_ATTR2DATA (attr), txd->NodeId) & EP_RAIL2RAILMASK (rail->Number)) == 0; -+ break; -+ -+ case EP_TYPE_TIMEOUT: -+ if ((txd_stabilise & 2) == 0) -+ return 0; -+ -+ stabilise = AFTER(lbolt, txdRail->Txd->TimeStamp + EP_ATTR2DATA(attr)); -+ break; -+ -+ default: -+ if ((txd_stabilise & 1) == 0) -+ return 0; -+ -+ stabilise = AFTER(lbolt, txdRail->Txd->TimeStamp + EP_DEFAULT_TIMEOUT); -+ break; -+ } -+ -+ if (stabilise) -+ { -+ txd->Envelope.Attr = EP_SET_TXD_STABALISING(txd->Envelope.Attr); -+ txd->RetryTime = lbolt; -+ -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ } -+ -+ return stabilise; -+} -+ -+void ep_xmtr_txd_stat(EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ int f; -+ unsigned long size; -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ size = 0; -+ for (f=0; f < txd->Envelope.nFrags; f++) -+ size += txd->Envelope.Frags[f].nmd_len; -+ -+ INC_STAT(xmtr->stats,tx); -+ ADD_STAT(xmtr->stats,tx_len, size); -+ -+ if ((txdRail != NULL) && (txdRail->XmtrRail != NULL)){ -+ INC_STAT(txdRail->XmtrRail->stats,tx); -+ ADD_STAT(txdRail->XmtrRail->stats,tx_len, size); -+ -+ if ((txdRail->XmtrRail->CommsRail != NULL) && ( txdRail->XmtrRail->CommsRail->Rail != NULL)) { -+ INC_STAT(txdRail->XmtrRail->CommsRail->Rail->Stats,tx); -+ ADD_STAT(txdRail->XmtrRail->CommsRail->Rail->Stats,tx_len, size); -+ } -+ } -+} -+ -+static int -+PollActiveTransmitList (EP_XMTR *xmtr, int flag) -+{ -+ struct list_head *el, *nel; -+ struct list_head list; -+ unsigned long flags; -+ int count; -+ -+ INIT_LIST_HEAD (&list); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ if (txdRail == NULL) -+ continue; -+ -+ ASSERT (txdRail->Txd == txd); -+ -+ if (EP_XMTR_OP (txdRail->XmtrRail,PollTxd) (txdRail->XmtrRail, txdRail, flags)) -+ { -+ list_del (&txd->Link); /* remove from active transmit list */ -+ list_add_tail (&txd->Link, &list); /* and add to list to call handlers */ -+ } -+ } -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ for (count = 0; !list_empty (&list); count++) -+ { -+ EP_TXD *txd = list_entry (list.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+ return (count); -+} -+ -+static inline void -+DoTransmit (EP_XMTR *xmtr, EP_TXD *txd) -+{ -+ EP_RAILMASK nmdRailMask = ep_nmd2railmask (txd->Envelope.Frags, txd->Envelope.nFrags); -+ EP_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ int rnum; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ if (EP_IS_SVC_INDICATOR(txd->Envelope.Attr)) -+ nmdRailMask = nmdRailMask & ep_xmtr_svc_indicator_railmask(xmtr, EP_ATTR2DATA(txd->Envelope.Attr), txd->NodeId); -+ -+ if (EP_IS_PREFRAIL_SET(txd->Envelope.Attr)) -+ rnum = EP_ATTR2PREFRAIL(txd->Envelope.Attr); -+ else -+ rnum = ep_xmtr_prefrail (xmtr, nmdRailMask, txd->NodeId); -+ -+ if (rnum < 0 || !(nmdRailMask & EP_RAIL2RAILMASK(rnum))) -+ xmtrRail = NULL; -+ else -+ xmtrRail = xmtr->Rails[rnum]; -+ -+ /* Allocate the XID while holding the xmtr->Lock from our XID cache */ -+ txd->Envelope.Xid = ep_xid_cache_alloc (xmtr->Subsys->Subsys.Sys, &xmtr->XidCache); -+ -+ EPRINTF7 (DBG_XMTR, "ep: transmit txd %p to %d/%d: Xid %llx nFrags %d [%08x.%d]\n", -+ txd, txd->NodeId, txd->Service, (long long) txd->Envelope.Xid.Unique, -+ txd->Envelope.nFrags, txd->Envelope.Frags[0].nmd_addr, txd->Envelope.Frags[0].nmd_len); -+ -+ /* Store time transmit started to timeout if not received */ -+ txd->TimeStamp = lbolt; -+ -+ /* Initialise the retry backoff */ -+ txd->Backoff.type = EP_BACKOFF_FREE; -+ -+ list_add_tail (&txd->Link, &xmtr->ActiveDescList); -+ -+ if (xmtrRail == NULL || !EP_XMTR_OP(xmtrRail,BindTxd) (txd, xmtrRail, EP_TXD_PHASE_ACTIVE)) -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ PollActiveTransmitList (xmtr, POLL_TX_LIST); -+} -+ -+EP_STATUS -+ep_transmit_message (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = (unsigned short) dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_CLEAR_LOCAL_ATTR(attr); -+ txd->Envelope.Range = EP_RANGE (dest, dest); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, DataXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_multicast_message (EP_XMTR *xmtr, unsigned int destLo, unsigned int destHi, bitmap_t *bitmap, EP_SERVICE service, -+ EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_SYS *sys = xmtr->Subsys->Subsys.Sys; -+ EP_TXD *txd; -+ int nnodes; -+ int i, len; -+ unsigned long flags; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if (destLo == -1) -+ destLo = sys->Position.pos_nodeid & ~(EP_MAX_NODES-1); -+ -+ if (destHi == -1 && (destHi = ((sys->Position.pos_nodeid + EP_MAX_NODES) & ~(EP_MAX_NODES-1)) - 1) >= sys->Position.pos_nodes) -+ destHi = sys->Position.pos_nodes-1; -+ -+ nnodes = (destHi-destLo+1); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_MULTICAST(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = EP_RANGE (destLo, destHi); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (EP_IS_SVC_INDICATOR(attr)) -+ ep_xmtr_svc_indicator_bitmap(xmtr, EP_ATTR2DATA(attr), txd->TxdMain->Bitmap, destLo, nnodes); -+ else -+ bt_subset (statemap_tobitmap(sys->NodeSet), txd->TxdMain->Bitmap, destLo, nnodes); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (bitmap != NULL) /* bitmap supplied, so intersect it with */ -+ bt_intersect (txd->TxdMain->Bitmap, bitmap, nnodes); /* the current node set map */ -+ -+ if ((attr & EP_NOT_MYSELF) && destLo <= sys->Position.pos_nodeid && sys->Position.pos_nodeid <= destHi) -+ BT_CLEAR (txd->TxdMain->Bitmap, (sys->Position.pos_nodeid-destLo)); /* clear myself if not wanted */ -+ -+ if ((i = bt_lowbit (txd->TxdMain->Bitmap, nnodes)) < 0) -+ { -+ FreeTxd (xmtr, txd); -+ return (EP_NODE_DOWN); -+ } -+ -+ txd->NodeId = (unsigned short) i; -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, McastXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_transmit_rpc (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_RPC(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = EP_RANGE (dest, dest); -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, RPCXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+EP_STATUS -+ep_multicast_forward (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, -+ EP_ENVELOPE *env, EP_PAYLOAD *payload, bitmap_t *bitmap, EP_NMD *nmd, int nFrags) -+{ -+ EP_TXD *txd; -+ int i, len; -+ -+ if (nFrags > EP_MAXFRAG || service > EP_MSG_NSVC) -+ return (EP_EINVAL); -+ -+ if ((txd = GetTxd (xmtr, attr)) == NULL) -+ return (EP_ENOMEM); -+ -+ txd->Handler = handler; -+ txd->Arg = arg; -+ txd->Service = service; -+ txd->NodeId = (unsigned short) dest; -+ -+ /* Initialise the envelope */ -+ txd->Envelope.Version = EP_ENVELOPE_VERSION; -+ txd->Envelope.Attr = EP_SET_MULTICAST(EP_CLEAR_LOCAL_ATTR(attr)); -+ txd->Envelope.Range = env->Range; -+ txd->Envelope.TxdMain = txd->NmdMain; -+ txd->Envelope.nFrags = nFrags; -+ -+ for (i = len = 0; i < nFrags; len += nmd[i].nmd_len, i++) -+ txd->Envelope.Frags[i] = nmd[i]; -+ -+ bt_copy (bitmap, txd->TxdMain->Bitmap, EP_RANGE_HIGH(env->Range) - EP_RANGE_LOW(env->Range) + 1); -+ -+ if (payload) -+ { -+ txd->Envelope.Attr = EP_SET_HAS_PAYLOAD(txd->Envelope.Attr); -+ -+ bcopy (payload, &txd->Payload, sizeof (EP_PAYLOAD)); -+ } -+ -+ DoTransmit (xmtr, txd); -+ -+ BucketStat (xmtr->Subsys, McastXmit, len); -+ -+ return (EP_SUCCESS); -+} -+ -+int -+ep_poll_transmits (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, POLL_TX_LIST)); -+} -+ -+int -+ep_enable_txcallbacks (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, ENABLE_TX_CALLBACK)); -+} -+ -+int -+ep_disable_txcallbacks (EP_XMTR *xmtr) -+{ -+ return (PollActiveTransmitList (xmtr, DISABLE_TX_CALLBACK)); -+} -+ -+/* functions for accessing fields of txds */ -+int ep_txd_node(EP_TXD *txd) { return (txd->NodeId); } -+EP_STATUSBLK *ep_txd_statusblk(EP_TXD *txd) { return (&txd->TxdMain->StatusBlk); } -+ -+void -+ep_xmtr_xid_msg_handler (void *arg, EP_MANAGER_MSG *msg) -+{ -+ EP_XMTR *xmtr = (EP_XMTR *) arg; -+ EP_SYS *sys = xmtr->Subsys->Subsys.Sys; -+ struct list_head *el,*nel; -+ unsigned long flags; -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST: -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ if (txdRail != NULL && EP_XIDS_MATCH (msg->Body.Failover.Xid, txd->Envelope.Xid)) -+ { -+ EP_XMTR_RAIL *xmtrRail = txdRail->XmtrRail; -+ EP_RAIL *rail = xmtrRail->CommsRail->Rail; -+ EP_MANAGER_MSG_BODY msgBody; -+ int rnum; -+ -+ if (! (msg->Body.Failover.Railmask & EP_RAIL2RAILMASK (rail->Number))) -+ { -+ /* Need to failover this txd to a different rail, select a rail from -+ * the set that she has asked us to use and which is connected to her -+ * on this transmitter. If there are no such rails, then in all probability -+ * we're offline on all common rails and eventually she will see we have no -+ * rails in common and abort the receive. */ -+ if ((rnum = ep_xmtr_prefrail (xmtr, msg->Body.Failover.Railmask, txd->NodeId)) < 0) -+ ep_debugf (DBG_XMTR, "%s: ep_xmtr_xid_msg_handler: FAILOVER_REQUEST but can't determine rail (%04x,%04x,%d,%04x)\n", -+ rail->Name, msg->Body.Failover.Railmask, xmtr->RailMask, txd->NodeId, sys->Nodes[txd->NodeId].ConnectedRails); -+ else -+ { -+ EP_XMTR_RAIL *nXmtrRail = xmtr->Rails[rnum]; -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep_xmtr_xid_msg_handler: FAILOVER_REQUEST txd=%p XID=%llx-> rail %d\n", rail->Name, txd, (long long) txd->Envelope.Xid.Unique, rnum); -+ -+ /* Bind the txd rail onto the new rail - it doesn't matter if we fail -+ * as it will remain bound to the original rail */ -+ (void) EP_XMTR_OP (nXmtrRail, BindTxd) (txd, nXmtrRail, EP_TXD_PHASE_PASSIVE); -+ } -+ } -+ -+ /* Send a failover response including an envelope update */ -+ msgBody.FailoverTxd.Rail = rail->Number; -+ msgBody.FailoverTxd.Xid = txd->Envelope.Xid; -+ msgBody.FailoverTxd.TxdRail = txd->Envelope.TxdRail; -+ -+ ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE, msg->Hdr.Xid, &msgBody); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE: { -+ int txd_has_not_sent_envelope = 0; -+ EP_TXD *txd = NULL; -+ EP_TXD_RAIL *txdRail = NULL; -+ -+ if (msg->Body.NodeState.NetworkErrorState != 0) -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt + MESSAGE_RETRY_TIME); -+ else -+ { -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ -+ txd = list_entry (el, EP_TXD, Link); -+ txdRail = txd->TxdRail; -+ -+ if (txdRail != NULL && EP_XIDS_MATCH (msg->Hdr.Xid, txd->Envelope.Xid)) { -+ txd_has_not_sent_envelope = EP_XMTR_OP(txdRail->XmtrRail,CheckTxdState)(txd); -+ break; -+ } -+ } -+ -+ if (txd_has_not_sent_envelope) { -+ EPRINTF2 (DBG_STABILISE, "ep_xmtr_xid_msg_handler: GET_NODE_STATE_RESPONSE txd=%p XID=%llx not sent envelope\n", -+ txd, (long long) txd->Envelope.Xid.Unique); -+ -+ /* at this point it has finished stabalising */ -+ txd->Envelope.Attr = EP_CLEAR_TXD_STABALISING(txd->Envelope.Attr); -+ -+ /* store railmask into txd if not a service indicator or timeout */ -+ if (EP_IS_NO_TYPE(txd->Envelope.Attr)) -+ txd->Envelope.Attr = EP_SET_DATA(txd->Envelope.Attr, EP_TYPE_RAILMASK, msg->Body.NodeState.Railmask); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* TXD is now no longer bound to a rail , so let ep_check_xmtr() handle it */ -+ ep_kthread_schedule (&xmtr->Subsys->Thread, lbolt); -+ } -+ else -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ } -+ break; -+ } -+ default: -+ panic ("ep_xmtr_xid_msg_handler: XID match but invalid message type\n"); -+ } -+} -+ -+EP_XMTR * -+ep_alloc_xmtr (EP_SYS *sys) -+{ -+ EP_COMMS_SUBSYS *subsys; -+ EP_XMTR *xmtr; -+ struct list_head *el; -+ -+ if ((subsys = (EP_COMMS_SUBSYS *) ep_subsys_find (sys, EPCOMMS_SUBSYS_NAME)) == NULL) -+ return (NULL); -+ -+ KMEM_ZALLOC (xmtr, EP_XMTR *, sizeof (EP_XMTR), 1); -+ -+ if (xmtr == NULL) -+ return (NULL); -+ -+ xmtr->Subsys = subsys; -+ -+ spin_lock_init (&xmtr->Lock); -+ INIT_LIST_HEAD (&xmtr->ActiveDescList); -+ -+ kcondvar_init (&xmtr->FreeDescSleep); -+ spin_lock_init (&xmtr->FreeDescLock); -+ INIT_LIST_HEAD (&xmtr->FreeDescList); -+ INIT_LIST_HEAD (&xmtr->DescBlockList); -+ -+ ep_xid_cache_init (sys, &xmtr->XidCache); -+ -+ xmtr->XidCache.MessageHandler = ep_xmtr_xid_msg_handler; -+ xmtr->XidCache.Arg = xmtr; -+ -+ kmutex_lock (&subsys->Lock); -+ list_add_tail (&xmtr->Link, &subsys->Transmitters); -+ -+ ep_procfs_xmtr_add(xmtr); -+ -+ /* Now add all rails which are already started */ -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(commsRail, Xmtr.AddRail) (xmtr, commsRail); -+ } -+ kmutex_unlock (&subsys->Lock); -+ -+ ep_mod_inc_usecount(); -+ -+ return (xmtr); -+} -+ -+void -+ep_free_xmtr (EP_XMTR *xmtr) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head *el; -+ -+ kmutex_lock (&subsys->Lock); -+ list_for_each (el, &subsys->Rails) { -+ EP_COMMS_RAIL *rail = list_entry (el, EP_COMMS_RAIL, Link); -+ -+ EP_RAIL_OP(rail,Xmtr.DelRail) (xmtr, rail); -+ } -+ -+ list_del (&xmtr->Link); -+ kmutex_unlock (&subsys->Lock); -+ -+ /* all the desc's must be free */ -+ ASSERT(xmtr->FreeDescCount == xmtr->TotalDescCount); -+ -+ /* delete the descs */ -+ while (!list_empty (&xmtr->DescBlockList)) -+ FreeTxdBlock( xmtr, list_entry(xmtr->DescBlockList.next, EP_TXD_BLOCK , Link)); -+ -+ /* they had better all be gone now */ -+ ASSERT((xmtr->FreeDescCount == 0) && (xmtr->TotalDescCount == 0)); -+ -+ ep_procfs_xmtr_del(xmtr); -+ -+ ep_xid_cache_destroy (sys, &xmtr->XidCache); -+ -+ spin_lock_destroy (&xmtr->Lock); -+ KMEM_FREE (xmtr, sizeof (EP_XMTR)); -+ -+ ep_mod_dec_usecount(); -+} -+ -+long -+ep_check_xmtr (EP_XMTR *xmtr, long nextRunTime) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP_SYS *sys = subsys->Subsys.Sys; -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ int timed_out=0; -+ int i; -+ EP_MANAGER_MSG_BODY body; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ /* See if we have any txd's which need to be bound to a rail */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP_NODE *node = &sys->Nodes[txd->NodeId]; -+ EP_RAILMASK nodeRails = node->ConnectedRails & xmtr->RailMask; -+ EP_ENVELOPE *env = &txd->Envelope; -+ -+ if (EP_IS_TXD_STABALISING(txd->Envelope.Attr)) -+ { -+ ASSERT(txd->TxdRail != NULL); -+ -+ if (AFTER (lbolt, txd->RetryTime)) -+ { -+ EPRINTF6 (DBG_STABILISE, "ep_check_xmtr txd=%p txdRail=%p send get node state to %d Xid=%08x.%08x.%016llx\n", -+ txd, txd->TxdRail, txd->NodeId, env->Xid.Generation, env->Xid.Handle, env->Xid.Unique); -+ -+ body.Service = txd->Service; -+ if (ep_send_message ( txd->TxdRail->XmtrRail->CommsRail->Rail, txd->NodeId, EP_MANAGER_MSG_TYPE_GET_NODE_STATE, env->Xid, &body) == 0) -+ txd->RetryTime = lbolt + (MESSAGE_RETRY_TIME << ep_backoff (&txd->Backoff, EP_BACKOFF_STABILISE)); -+ else -+ txd->RetryTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+ -+ ep_kthread_schedule (&subsys->Thread, txd->RetryTime); -+ continue; -+ } -+ -+ if (txd->TxdRail != NULL) -+ continue; -+ -+ switch (EP_ATTR2TYPE(txd->Envelope.Attr)) -+ { -+ case EP_TYPE_SVC_INDICATOR: -+ { -+ EP_RAILMASK rmask=0; -+ struct list_head *tmp; -+ -+ list_for_each (tmp, &subsys->Rails) { -+ EP_COMMS_RAIL *commsRail = list_entry (tmp, EP_COMMS_RAIL, Link); -+ if ( cm_svc_indicator_is_set(commsRail->Rail, EP_ATTR2DATA(txd->Envelope.Attr), txd->NodeId)) -+ rmask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ } -+ nodeRails &= rmask; -+ break; -+ } -+ case EP_TYPE_TIMEOUT: -+ timed_out = AFTER(lbolt, txd->TimeStamp + EP_ATTR2DATA(txd->Envelope.Attr)) ? (1) : (0); -+ break; -+ case EP_TYPE_RAILMASK: -+ nodeRails &= EP_ATTR2DATA(txd->Envelope.Attr); -+ break; -+ default: -+ timed_out = AFTER(lbolt, txd->TimeStamp + EP_DEFAULT_TIMEOUT) ? (1) : (0); -+ break; -+ } -+ -+ if (nodeRails == 0 || timed_out || (EP_IS_NO_FAILOVER(env->Attr) && EP_IS_PREFRAIL_SET(env->Attr) && -+ (nodeRails & EP_RAIL2RAILMASK(EP_ATTR2PREFRAIL(env->Attr))) == 0)) -+ { -+ EPRINTF5 (timed_out ? DBG_STABILISE : DBG_XMTR, "ep_check_xmtr: txd=%p XID=%llx to %d no rails connected or cannot failover (nodeRails=0x%x,timed_out=%d\n", -+ txd, (long long) env->Xid.Unique, txd->NodeId, nodeRails, timed_out); -+ -+ list_del (&txd->Link); -+ list_add_tail (&txd->Link, &txdList); -+ } -+ else -+ { -+ EP_XMTR_RAIL *xmtrRail; -+ int i, len, rnum; -+ -+ if (EP_IS_PREFRAIL_SET(env->Attr) && (nodeRails & EP_RAIL2RAILMASK(EP_ATTR2PREFRAIL(env->Attr)))) -+ rnum = EP_ATTR2PREFRAIL(env->Attr); -+ else -+ rnum = ep_pickRail (nodeRails); -+ -+ EPRINTF3 (DBG_XMTR, "ep_check_xmtr: txd=%p XID=%llx mapping NMDs onto rail %d \n", txd, (long long) env->Xid.Unique, rnum); -+ -+ for (i = len = 0; i < env->nFrags; i++, len += env->Frags[i].nmd_len) -+ ep_nmd_map_rails (sys, &env->Frags[i], nodeRails); -+ -+ if ((xmtrRail = xmtr->Rails[rnum]) == NULL || -+ !EP_XMTR_OP(xmtrRail,BindTxd) (txd, xmtrRail, EP_TXD_PHASE_ACTIVE)) -+ ep_kthread_schedule (&subsys->Thread, lbolt + RESOURCE_RETRY_TIME); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_NODE_DOWN); -+ FreeTxd (xmtr, txd); -+ } -+ -+ /* Check to see if we're low on txds */ -+ if (xmtr->FreeDescCount < ep_txd_lowat) -+ AllocateTxdBlock (xmtr, 0, NULL); -+ -+ /* Then check each rail */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (xmtr->RailMask & (1 << i) ) -+ nextRunTime = EP_XMTR_OP (xmtr->Rails[i],Check) (xmtr->Rails[i], nextRunTime); -+ return (nextRunTime); -+} -+ -+void -+ep_display_txd (DisplayInfo *di, EP_TXD *txd) -+{ -+ EP_ENVELOPE *env = &txd->Envelope; -+ EP_TXD_RAIL *txdRail = txd->TxdRail; -+ -+ (di->func)(di->arg, "TXD: %p Version=%x Attr=%x Xid=%08x.%08x.%016llx\n", txd, -+ env->Version, env->Attr, env->Xid.Generation, env->Xid.Handle, (long long) env->Xid.Unique); -+ (di->func)(di->arg, " NodeId=%d Range=%d.%d TxdRail=%x TxdMain=%x.%x.%x nFrags=%d\n", -+ env->NodeId, EP_RANGE_LOW(env->Range), EP_RANGE_HIGH(env->Range), env->TxdRail, -+ env->TxdMain.nmd_addr, env->TxdMain.nmd_len, env->TxdMain.nmd_attr, env->nFrags); -+ (di->func)(di->arg, " Frag[0] %08x.%08x.%08x\n", env->Frags[0].nmd_addr, env->Frags[0].nmd_len, env->Frags[0].nmd_attr); -+ (di->func)(di->arg, " Frag[1] %08x.%08x.%08x\n", env->Frags[1].nmd_addr, env->Frags[1].nmd_len, env->Frags[1].nmd_attr); -+ (di->func)(di->arg, " Frag[2] %08x.%08x.%08x\n", env->Frags[2].nmd_addr, env->Frags[2].nmd_len, env->Frags[2].nmd_attr); -+ (di->func)(di->arg, " Frag[3] %08x.%08x.%08x\n", env->Frags[3].nmd_addr, env->Frags[3].nmd_len, env->Frags[3].nmd_attr); -+ -+ if (txdRail != NULL) EP_XMTR_OP (txdRail->XmtrRail, DisplayTxd) (di, txdRail); -+} -+ -+void -+ep_display_xmtr (DisplayInfo *di, EP_XMTR *xmtr) -+{ -+ int freeCount = 0; -+ int activeCount = 0; -+ struct list_head *el; -+ int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->FreeDescLock, flags); -+ list_for_each (el, &xmtr->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtr->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) -+ activeCount++; -+ -+ (di->func)(di->arg, "ep_display_xmtr: xmtr=%p Free=%d Active=%d\n", xmtr, freeCount, activeCount); -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (xmtr->Rails[i]) EP_XMTR_OP (xmtr->Rails[i], DisplayXmtr) (di, xmtr->Rails[i]); -+ -+ list_for_each (el,&xmtr->ActiveDescList) -+ ep_display_txd (di, list_entry (el, EP_TXD, Link)); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep_xmtr_fillout_stats(EP_XMTR *xmtr, char *str) -+{ -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(xmtr->stats,tx), GET_STAT_PER_SEC(xmtr->stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(xmtr->stats,tx_len) / (1024*1024), GET_STAT_PER_SEC(xmtr->stats,tx_len) / (1024*1024)); -+} -+ -+void -+ep_xmtr_rail_fillout_stats(EP_XMTR_RAIL *xmtr_rail, char *str) -+{ -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(xmtr_rail->stats,tx), GET_STAT_PER_SEC(xmtr_rail->stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu Mbytes/sec\n", GET_STAT_TOTAL(xmtr_rail->stats,tx_len) / (1024*1024), GET_STAT_PER_SEC(xmtr_rail->stats,tx_len) / (1024*1024)); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsTx_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcommsTx_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsTx_elan3.c 2005-07-28 14:52:52.878674176 -0400 -@@ -0,0 +1,1173 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx_elan3.c,v 1.17.2.2 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#define XMTR_TO_RAIL(xmtrRail) ((EP3_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail->Rail) -+#define XMTR_TO_DEV(xmtrRail) (XMTR_TO_RAIL(xmtrRail)->Device) -+#define XMTR_TO_SUBSYS(xmtrRail) (((EP_XMTR_RAIL *) xmtrRail)->Xmtr->Subsys) -+ -+static void TxEnveEvent (EP3_RAIL *rail, void *arg); -+static void TxEnveRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void TxEnveVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS EnveCookieOps = -+{ -+ TxEnveEvent, -+ TxEnveRetry, -+ NULL, /* DmaCancelled */ -+ TxEnveVerify -+}; -+ -+static void TxDataEvent (EP3_RAIL *rail, void *arg); -+static void TxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status); -+static void TxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS DataCookieOps = -+{ -+ TxDataEvent, -+ TxDataRetry, -+ NULL, /* DmaCancelled */ -+ TxDataVerify -+}; -+ -+static void TxDoneEvent (EP3_RAIL *dev, void *arg); -+static void TxDoneRetry (EP3_RAIL *dev, void *arg, E3_DMA_BE *dma, int status); -+static void TxDoneVerify (EP3_RAIL *dev, void *arg, E3_DMA_BE *dma); -+ -+static EP3_COOKIE_OPS DoneCookieOps = -+{ -+ TxDoneEvent, -+ TxDoneRetry, -+ NULL, /* DmaCancelled */ -+ TxDoneVerify, -+} ; -+ -+static int -+AllocateTxdRailBlock (EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN3_DEV *dev = rail->Device; -+ EP3_TXD_RAIL_BLOCK *blk; -+ EP3_TXD_RAIL *txdRail; -+ sdramaddr_t pTxdElan; -+ EP3_TXD_RAIL_MAIN *pTxdMain; -+ E3_Addr pTxdElanAddr; -+ E3_Addr pTxdMainAddr; -+ E3_BlockCopyEvent event; -+ int i; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (blk, EP3_TXD_RAIL_BLOCK *, sizeof (EP3_TXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((pTxdElan = ep_alloc_elan (&rail->Generic, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK, 0, &pTxdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((pTxdMain = ep_alloc_main (&rail->Generic, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK, 0, &pTxdMainAddr)) == (EP3_TXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->Generic, pTxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ReserveDmaRetries (rail, EP3_NUM_TXD_PER_BLOCK, 0) != ESUCCESS) -+ { -+ ep_free_main (&rail->Generic, pTxdMainAddr, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, pTxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (txdRail = &blk->Txd[0], i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ txdRail->Generic.XmtrRail = &xmtrRail->Generic; -+ txdRail->TxdElan = pTxdElan; -+ txdRail->TxdElanAddr = pTxdElanAddr; -+ txdRail->TxdMain = pTxdMain; -+ txdRail->TxdMainAddr = pTxdMainAddr; -+ -+ RegisterCookie (&rail->CookieTable, &txdRail->EnveCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent), &EnveCookieOps, (void *) txdRail); -+ RegisterCookie (&rail->CookieTable, &txdRail->DataCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), &DataCookieOps, (void *) txdRail); -+ RegisterCookie (&rail->CookieTable, &txdRail->DoneCookie, pTxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), &DoneCookieOps, (void *) txdRail); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->EnveCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, EnveEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->DataCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, DataEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), sizeof (E3_BlockCopyEvent)); -+ -+ EP3_INIT_COPY_EVENT (event, txdRail->DoneCookie, pTxdMainAddr + offsetof (EP3_TXD_RAIL_MAIN, DoneEvent), 0); -+ elan3_sdram_copyl_to_sdram (dev, &event, pTxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), sizeof (E3_BlockCopyEvent)); -+ -+ pTxdMain->EnveEvent = EP3_EVENT_FREE; -+ pTxdMain->DataEvent = EP3_EVENT_FREE; -+ pTxdMain->DoneEvent = EP3_EVENT_FREE; -+ -+ /* move onto next descriptor */ -+ pTxdElan += EP3_TXD_RAIL_ELAN_SIZE; -+ pTxdElanAddr += EP3_TXD_RAIL_ELAN_SIZE; -+ pTxdMain = (EP3_TXD_RAIL_MAIN *) ((unsigned long) pTxdMain + EP3_TXD_RAIL_MAIN_SIZE); -+ pTxdMainAddr += EP3_TXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_add (&blk->Link, &xmtrRail->DescBlockList); -+ xmtrRail->TotalDescCount += EP3_NUM_TXD_PER_BLOCK; -+ xmtrRail->FreeDescCount += EP3_NUM_TXD_PER_BLOCK; -+ -+ for (i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++) -+ list_add (&blk->Txd[i].Generic.Link, &xmtrRail->FreeDescList); -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ return 1; -+} -+ -+static void -+FreeTxdRailBlock (EP3_XMTR_RAIL *xmtrRail, EP3_TXD_RAIL_BLOCK *blk) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ EP3_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_del (&blk->Link); -+ -+ xmtrRail->TotalDescCount -= EP3_NUM_TXD_PER_BLOCK; -+ -+ for (txdRail = &blk->Txd[0], i = 0; i < EP3_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ xmtrRail->FreeDescCount--; -+ -+ list_del (&txdRail->Generic.Link); -+ -+ DeregisterCookie (&rail->CookieTable, &txdRail->EnveCookie); -+ DeregisterCookie (&rail->CookieTable, &txdRail->DataCookie); -+ DeregisterCookie (&rail->CookieTable, &txdRail->DoneCookie); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ ReleaseDmaRetries (rail, EP3_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->Generic, blk->Txd[0].TxdMainAddr, EP3_TXD_RAIL_MAIN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->Generic, blk->Txd[0].TxdElanAddr, EP3_TXD_RAIL_ELAN_SIZE * EP3_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP3_TXD_RAIL_BLOCK)); -+} -+ -+static EP3_TXD_RAIL * -+GetTxdRail (EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = xmtrRail->Generic.Xmtr->Subsys; -+ EP3_TXD_RAIL *txdRail; -+ int low_on_txds; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ if (list_empty (&xmtrRail->FreeDescList)) -+ txdRail = NULL; -+ else -+ { -+ txdRail = list_entry (xmtrRail->FreeDescList.next, EP3_TXD_RAIL, Generic.Link); -+ -+#if defined(DEBUG) -+ { -+ EP_RAIL *rail = xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = ((EP3_RAIL *) rail)->Device; -+ -+ EP_ASSERT (rail, txdRail->TxdMain->EnveEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, txdRail->TxdMain->DataEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, txdRail->TxdMain->DoneEvent == EP3_EVENT_FREE); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT(elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ } -+#endif -+ -+ list_del (&txdRail->Generic.Link); -+ -+ xmtrRail->FreeDescCount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (xmtrRail->FreeDescCount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ return (txdRail); -+} -+ -+static void -+FreeTxdRail (EP3_XMTR_RAIL *xmtrRail, EP3_TXD_RAIL *txdRail) -+{ -+ unsigned long flags; -+ -+#if defined(DEBUG_ASSERT) -+ { -+ EP_RAIL *rail = xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = ((EP3_RAIL *) rail)->Device; -+ -+ EP_ASSERT (rail, txdRail->Generic.XmtrRail == &xmtrRail->Generic); -+ -+ EP_ASSERT (rail, txdRail->TxdMain->EnveEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, txdRail->TxdMain->DataEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, txdRail->TxdMain->DoneEvent == EP3_EVENT_PRIVATE); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ EP_ASSERT (rail, SDRAM_ASSERT (elan3_sdram_readl (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)) == 0)); -+ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_FREE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_FREE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_FREE; -+ } -+#endif -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ -+ list_add (&txdRail->Generic.Link, &xmtrRail->FreeDescList); -+ -+ xmtrRail->FreeDescCount++; -+ -+ if (xmtrRail->FreeDescWaiting) -+ { -+ xmtrRail->FreeDescWaiting--; -+ kcondvar_wakeupall (&xmtrRail->FreeDescSleep, &xmtrRail->FreeDescLock); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+} -+ -+static void -+BindTxdToRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ASSERT (SPINLOCK_HELD (&txd->Xmtr->Lock)); -+ -+ EPRINTF6 (DBG_XMTR, "%s: BindTxdToRail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long) txd->Envelope.Xid.Unique); -+ -+ txd->TxdRail = &txdRail->Generic; -+ txdRail->Generic.Txd = txd; -+} -+ -+static void -+UnbindTxdFromRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ASSERT (SPINLOCK_HELD (&txd->Xmtr->Lock)); -+ ASSERT (txd->TxdRail == &txdRail->Generic && txdRail->Generic.Txd == txd); -+ -+ EPRINTF6 (DBG_XMTR, "%s: UnbindTxdToRail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, (long long) txd->Envelope.Xid.Unique); -+ txd->TxdRail = NULL; -+ txdRail->Generic.Txd = NULL; -+} -+ -+/* -+ * TxEnveEvent: arg == EP_TXD -+ * Called when envelope delivered -+ */ -+static void -+TxEnveEvent (EP3_RAIL *rail, void *arg) -+{ -+ panic ("TxEnveEvent"); -+} -+ -+/* -+ * TxEnveRetry: arg == EP3_TXD_RAIL -+ * Called on retry of dma of large message envelope. -+ */ -+static void -+TxEnveRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ -+ EPRINTF3 (DBG_XMTR, "%s: TxEnveRetry: xmtr %p txd %p\n", rail->Generic.Name, xmtrRail, txdRail); -+ -+ EP_ASSERT (&rail->Generic, txdRail->TxdMain->EnveEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 1)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txdRail->Generic.Txd->NodeId); -+ -+ if (! TxdShouldStabalise (&txdRail->Generic, &rail->Generic)) -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&txdRail->Backoff, EP_BACKOFF_ENVELOPE)); -+ else -+ QueueDmaForRetry (rail, dma, EP_RETRY_STABALISING); /* place dma on stabilising list for neterr fixup */ -+} -+ -+static void -+TxEnveVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ -+ EP_ASSERT (&rail->Generic, txdRail->TxdMain->EnveEvent == EP3_EVENT_ACTIVE); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 1)); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txdRail->Generic.Txd->NodeId); -+} -+ -+/* -+ * TxDataEvent: arg == EP3_TXD -+ * Called on completion of a large transmit. -+ */ -+static void -+TxDataEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_XMTR *xmtr = xmtrRail->Generic.Xmtr; -+ EP3_TXD_RAIL_MAIN *txdMain = txdRail->TxdMain; -+ sdramaddr_t txdElan = txdRail->TxdElan; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (txdRail->DataCookie, txdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (rail->Device, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), txdRail->DataCookie, txdMain->DataEvent)) /* PCI read */ -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("TxDataEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ EPRINTF3 (DBG_XMTR, "%s: TxDataEvent: xmtr %p txd %p previously collecting by polling\n", -+ rail->Generic.Name, xmtrRail, txdRail); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ if ((txd = txdRail->Generic.Txd) == NULL || /* If there is no txd, or if the descriptor is marked */ -+ !(EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr)) || /* as no interrupt, or been reused as an RPC, */ -+ (EP_IS_RPC(txd->Envelope.Attr))) /* then we were either called as a result of a previous */ -+ { /* tx which was completed by polling or as a result */ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); /* of a EnableTxCallBack/DisableTxCallback */ -+ -+ EPRINTF4 (DBG_XMTR, "%s: TxDataEvent: xmtr %p txd %p recyled (%x)\n", -+ rail->Generic.Name, xmtr, txd, txd ? txd->Envelope.Attr : 0); -+ return; -+ } -+ -+ ASSERT (EP3_EVENT_FIRED (txdRail->EnveCookie, txdMain->EnveEvent)); -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDataEvent : xmtrRail=%p txdRail=%p tx=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ /* remove from active transmit lists */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags for next time round */ -+ txdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+} -+ -+/* -+ * TxDataRetry: arg == EP3_TXD -+ * Called on retry of remote "put" dma of large transmit data. -+ */ -+static void -+TxDataRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ EP_ASSERT (&rail->Generic, ((txdRail->TxdMain->DataEvent == EP3_EVENT_ACTIVE && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) >= 1)) || /* PCI read */ -+ (EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)))); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txd->NodeId); -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDataRetry: xmtrRail=%p txdRail=%p txd=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ QueueDmaForRetry (rail, dma, EP_RETRY_LOW_PRI_RETRY + ep_backoff (&txdRail->Backoff, EP_BACKOFF_DATA)); -+} -+ -+static void -+TxDataVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ EP_ASSERT (&rail->Generic, ((txdRail->TxdMain->DataEvent == EP3_EVENT_ACTIVE && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) >= 1)) || /* PCI read */ -+ (EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ SDRAM_ASSERT (elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)))); /* PCI read */ -+ EP_ASSERT (&rail->Generic, dma->s.dma_direction == DMA_WRITE && EP_VP_TO_NODE(dma->s.dma_destVProc) == txd->NodeId); -+} -+ -+/* -+ * TxDoneEvent: arg == EP3_TXD -+ * Called on completion of a RPC. -+ */ -+static void -+TxDoneEvent (EP3_RAIL *rail, void *arg) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) arg; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP_XMTR *xmtr = xmtrRail->Generic.Xmtr; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ for (;;) -+ { -+ if (EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ break; -+ -+ if (EP3_EVENT_FIRING (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent), txdRail->DoneCookie, txdRail->TxdMain->DoneEvent) && -+ EP3_EVENT_FIRING (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent), txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ { -+ if (delay > EP3_EVENT_FIRING_TLIMIT) -+ panic ("TxDoneEvent: events set but block copy not completed\n"); -+ DELAY(delay); -+ delay <<= 1; -+ } -+ else -+ { -+ EPRINTF3 (DBG_XMTR, "%s: TxDoneEvent: xmtr %p txdRail %p previously collecting by polling\n", -+ rail->Generic.Name, xmtr, txdRail); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ mb(); -+ } -+ -+ if ((txd = txdRail->Generic.Txd) == NULL || /* If there is no txd, or if the descriptor is marked */ -+ !(EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr) || EP_IS_RPC(txd->Envelope.Attr))) /* marked as no interrupt, or been reused as an transmit, */ -+ { /* then we were either called as a result of a previous */ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); /* tx which was completed by polling or as a result */ -+ /* of a EnableTxCallBack/DisableTxCallback */ -+ -+ EPRINTF4 (DBG_XMTR, "%s: TxDoneEvent: xmtr %p txd %p recyled (%x)\n", -+ rail->Generic.Name, xmtr, txd, txd ? txd->Envelope.Attr : 0); -+ return; -+ } -+ -+ EPRINTF5 (DBG_XMTR, "%s: TxDoneEvent: xmtrRail=%p txdRail=%p txd=%p XID=%llx\n", -+ rail->Generic.Name, xmtrRail, txdRail, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ /* remove from active transmit list */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags for next time round */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ if (txd->Handler) -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+} -+ -+/* -+ * TxDoneRetry: arg == EP3_TXD -+ */ -+static void -+TxDoneRetry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int status) -+{ -+ panic ("TxDoneRetry"); -+} -+ -+static void -+TxDoneVerify (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma) -+{ -+ panic ("TxDoneVerify"); -+} -+ -+static void -+EnableTransmitCallback (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ELAN3_DEV *dev = XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Device; -+ -+ EPRINTF3 (DBG_XMTR, "%s: EnableTransmitCallback: txd %p txdRail %p\n", XMTR_TO_RAIL (txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail); -+ -+ txd->Envelope.Attr = EP_SET_INTERRUPT_ENABLED(txd->Envelope.Attr); -+ -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type), EV_TYPE_BCOPY); -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY | EV_TYPE_EVIRQ | txdRail->DoneCookie.Cookie); -+ } -+ else -+ { -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY | EV_TYPE_EVIRQ | txdRail->DataCookie.Cookie); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY); -+ } -+} -+ -+static void -+DisableTransmitCallback (EP_TXD *txd, EP3_TXD_RAIL *txdRail) -+{ -+ ELAN3_DEV *dev = XMTR_TO_RAIL(txdRail->Generic.XmtrRail)->Device; -+ -+ EPRINTF3 (DBG_XMTR, "%s: DisableTransmitCallback: txd %p txdRail %p\n", XMTR_TO_RAIL (txdRail->Generic.XmtrRail)->Generic.Name, txd, txdRail); -+ -+ txd->Envelope.Attr = EP_CLEAR_INTERRUPT_ENABLED(txd->Envelope.Attr); -+ -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (dev, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type), EV_TYPE_BCOPY); -+} -+ -+static void -+InitialiseTxdRail (EP_TXD *txd, EP3_TXD_RAIL *txdRail, int phase) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->Generic); -+ -+ /* Initialise the per-rail fields in the envelope */ -+ txd->Envelope.TxdRail = txdRail->TxdElanAddr; -+ txd->Envelope.NodeId = rail->Generic.Position.pos_nodeid; -+ -+ /* Initialise the dma backoff */ -+ txdRail->Backoff.type = EP_BACKOFF_FREE; -+ -+ /* Initialise the per-rail events */ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 1); -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), -+ (txd->Envelope.nFrags ? txd->Envelope.nFrags : 1) + (EP_IS_MULTICAST(txd->Envelope.Attr) ? 1 : 0)); -+ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_ACTIVE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_ACTIVE; -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ ASSERT (EP_IS_RPC(txd->Envelope.Attr)); -+ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); -+ -+ txdRail->TxdMain->EnveEvent = txdRail->EnveCookie.Cookie; -+ txdRail->TxdMain->DataEvent = txdRail->DataCookie.Cookie; -+ break; -+ } -+ -+ if (! EP_IS_RPC(txd->Envelope.Attr)) -+ txdRail->TxdMain->DoneEvent = txdRail->DoneCookie.Cookie; -+ else -+ { -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 1); -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_ACTIVE; -+ } -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ DisableTransmitCallback (txd, txdRail); -+ else -+ EnableTransmitCallback (txd, txdRail); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ if ( epdebug_check_sum ) -+ txd->Envelope.CheckSum = ep_calc_check_sum( txd->Xmtr->Subsys->Subsys.Sys, &txd->Envelope, txd->Envelope.Frags, txd->Envelope.nFrags); -+ else -+#endif -+ txd->Envelope.CheckSum = 0; -+ -+ /* copy the envelope and payload if present down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &txd->Envelope, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, Envelope), EP_ENVELOPE_SIZE); -+ -+ if (EP_HAS_PAYLOAD(txd->Envelope.Attr)) -+ elan3_sdram_copyl_to_sdram (rail->Device, &txd->Payload, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, Payload), EP_PAYLOAD_SIZE); -+} -+ -+void -+ep3xmtr_flush_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el; -+ unsigned long flags; -+ -+ switch (rail->Generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ /* only need to acquire/release the Lock to ensure that -+ * the node state transition has been noticed. */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ else if (! EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ } -+ else -+ { -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ default: -+ panic ("ep3xmtr_flush_callback: invalid callback step\n"); -+ break; -+ } -+} -+ -+void -+ep3xmtr_failover_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+#ifdef SUPPORT_RAIL_FAILOVER -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+#endif -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ /* Only progress relocation of txd's bound to this rail */ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+#ifdef SUPPORT_RAIL_FAILOVER -+ /* Transmit data not been sent, so just restart on different rail */ -+ if (! EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d unbind an retry\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset all events, since non of them could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ /* epcomms thread will restart on different rail */ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ continue; -+ } -+ -+ if (EP_IS_RPC(txd->Envelope.Attr) && !EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ if (EP_IS_NO_FAILOVER(txd->Envelope.Attr)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d - not able to failover\n", -+ rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ list_del (&txd->Link); -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* envelope and data events must have been set, so only clear the done event */ -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT(elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)) == 0)); -+ EP_ASSERT (&rail->Generic, SDRAM_ASSERT(elan3_sdram_readl (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)) == 0)); -+ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ list_add_tail (&txd->Link, &txdList); -+ continue; -+ } -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d passive\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ continue; -+ } -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_failover_callback - xmtr %p txd %p node %d completed\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+#endif -+ -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+ -+void -+ep3xmtr_disconnect_callback (EP_XMTR *xmtr, EP3_XMTR_RAIL *xmtrRail) -+{ -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[txd->NodeId]; -+ -+ if (!TXD_BOUND2RAIL(txdRail, xmtrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_disconnect_callback - xmtr %p txd %p completed to node %d\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ continue; -+ } -+ -+ /* Remove from active list */ -+ list_del (&txd->Link); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep3xmtr_disconnect_callback - xmtr %p txd %p node %d not conected\n", rail->Generic.Name, xmtr, txd, txd->NodeId); -+ -+ /* add to the list of txd's which are to be completed */ -+ list_add_tail (&txd->Link, &txdList); -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+int -+ep3xmtr_poll_txd (EP_XMTR_RAIL *x, EP_TXD_RAIL *t, int how) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) t; -+ EP_TXD *txd = txdRail->Generic.Txd; -+ -+ switch (how) -+ { -+ case ENABLE_TX_CALLBACK: -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ EnableTransmitCallback (txd, txdRail); -+ break; -+ -+ case DISABLE_TX_CALLBACK: -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ DisableTransmitCallback (txd, txdRail); -+ break; -+ } -+ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent) && -+ EP3_EVENT_FIRED (txdRail->DataCookie, txdRail->TxdMain->DataEvent) && -+ EP3_EVENT_FIRED (txdRail->DoneCookie, txdRail->TxdMain->DoneEvent)) -+ { -+ EPRINTF3 (DBG_XMTR, "%s: ep3xmtr_poll_txd: txd=%p XID=%llx completed\n", -+ XMTR_TO_RAIL (xmtrRail)->Generic.Name, txd, (long long) txd->Envelope.Xid.Unique); -+ -+ ep_xmtr_txd_stat(xmtrRail->Generic.Xmtr,txd); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep3xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *x, unsigned int phase) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP3_TXD_RAIL *txdRail; -+ E3_DMA_BE dmabe; -+ -+ if ((txdRail = GetTxdRail (xmtrRail)) == NULL) -+ return 0; -+ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ if (rail->Generic.Nodes[txd->NodeId].State != EP_NODE_CONNECTED) -+ { -+ EPRINTF2 (DBG_XMTR, "%s: TransmitTxdOnRail: node %u not connected on this rail\n", rail->Generic.Name, txd->NodeId); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset all events, since non of them could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ return 0; -+ } -+ -+ InitialiseTxdRail (txd, txdRail, phase); -+ -+ /* Initialise the dma descriptor */ -+ dmabe.s.dma_type = E3_DMA_TYPE (DMA_BYTE, DMA_WRITE, DMA_QUEUED, EP3_DMAFAILCOUNT); -+ dmabe.s.dma_size = (EP_HAS_PAYLOAD(txd->Envelope.Attr) ? EP_INPUTQ_SIZE : EP_ENVELOPE_SIZE); -+ dmabe.s.dma_source = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, Envelope); -+ dmabe.s.dma_dest = (E3_Addr) 0; -+ dmabe.s.dma_destEvent = EP_MSGQ_ADDR(txd->Service); -+ dmabe.s.dma_destCookieVProc = EP_VP_DATA (txd->NodeId); -+ dmabe.s.dma_srcEvent = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent); -+ dmabe.s.dma_srcCookieVProc = LocalCookie (rail, txd->NodeId); -+ -+ EPRINTF8 (DBG_XMTR, "%s: TransmitTxdOnRail: txd=%p txdRail=%p @ %x XID=%llx dest=%u srcEvent=%x srcCookie=%x\n", rail->Generic.Name, -+ txd, txdRail, txdRail->TxdElanAddr, (long long) txd->Envelope.Xid.Unique, txd->NodeId, dmabe.s.dma_srcEvent, dmabe.s.dma_srcCookieVProc); -+ -+ BindTxdToRail (txd, txdRail); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_LOW_PRI, FALSE) != ISSUE_COMMAND_OK) -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_LOW_PRI); -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ InitialiseTxdRail (txd, txdRail, EP_TXD_PHASE_PASSIVE); /* initialise as passive (updated envelope) */ -+ -+ EP_XMTR_OP (txd->TxdRail->XmtrRail, UnbindTxd) (txd, EP_TXD_PHASE_PASSIVE); /* unbind from existing rail */ -+ -+ BindTxdToRail (txd, txdRail); /* and bind it to our new rail */ -+ break; -+ } -+ -+ return 1; -+} -+ -+void -+ep3xmtr_unbind_txd (EP_TXD *txd, unsigned int phase) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ -+ /* XXXX - TBD assertions on phase */ -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+} -+ -+long -+ep3xmtr_check (EP_XMTR_RAIL *x, long nextRunTime) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ -+ if (xmtrRail->FreeDescCount < ep_txd_lowat && !AllocateTxdRailBlock(xmtrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow txd rail pool\n", XMTR_TO_RAIL(xmtrRail)->Generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ return nextRunTime; -+} -+ -+void -+ep3xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ -+ KMEM_ZALLOC (xmtrRail, EP3_XMTR_RAIL *, sizeof (EP3_XMTR_RAIL), 1); -+ -+ spin_lock_init (&xmtrRail->FreeDescLock); -+ kcondvar_init (&xmtrRail->FreeDescSleep); -+ INIT_LIST_HEAD (&xmtrRail->FreeDescList); -+ INIT_LIST_HEAD (&xmtrRail->DescBlockList); -+ -+ xmtrRail->Generic.CommsRail = commsRail; -+ xmtrRail->Generic.Xmtr = xmtr; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ xmtr->Rails[commsRail->Rail->Number] = &xmtrRail->Generic; -+ xmtr->RailMask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep3xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) commsRail->Rail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]; -+ unsigned long flags; -+ -+ /* rail mask set as not usable */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->RailMask &= ~EP_RAIL2RAILMASK (rail->Generic.Number); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* wait for all txd's for this rail to become free */ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ while (xmtrRail->FreeDescCount != xmtrRail->TotalDescCount) -+ { -+ xmtrRail->FreeDescWaiting++; -+ kcondvar_wait (&xmtrRail->FreeDescSleep, &xmtrRail->FreeDescLock, &flags); -+ } -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->Rails[commsRail->Rail->Number] = NULL; -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* need to free up the txd's and blocks */ -+ /* all the txd's accociated with DescBlocks must be in the FreeDescList */ -+ ASSERT (xmtrRail->TotalDescCount == xmtrRail->FreeDescCount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&xmtrRail->DescBlockList)) -+ FreeTxdRailBlock (xmtrRail, list_entry(xmtrRail->DescBlockList.next, EP3_TXD_RAIL_BLOCK , Link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((xmtrRail->FreeDescCount == 0) && (xmtrRail->TotalDescCount == 0)); -+ -+ spin_lock_destroy (&xmtrRail->FreeDescLock); -+ kcondvar_destroy (&xmtrRail->FreeDescSleep); -+ -+ KMEM_FREE (xmtrRail, sizeof (EP3_XMTR_RAIL)); -+} -+ -+void -+ep3xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *x) -+{ -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) x; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head *el; -+ unsigned long flags; -+ int freeCount = 0; -+ -+ spin_lock_irqsave (&xmtrRail->FreeDescLock, flags); -+ list_for_each (el, &xmtrRail->FreeDescList) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtrRail->FreeDescLock, flags); -+ -+ (di->func)(di->arg, " Rail=%d Free=%d Total=%d (%d)\n", -+ rail->Generic.Number, xmtrRail->FreeDescCount, xmtrRail->TotalDescCount, freeCount); -+} -+ -+void -+ep3xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *t) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) t; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_TXD_RAIL_MAIN *txdMain = txdRail->TxdMain; -+ sdramaddr_t txdElan = txdRail->TxdElan; -+ EP3_RAIL *rail = (EP3_RAIL *) xmtrRail->Generic.CommsRail->Rail; -+ ELAN3_DEV *dev = rail->Device; -+ -+ (di->func)(di->arg, " EnveEvent=%x DataEvent=%x DoneEvent=%x Rail=%s\n", -+ txdMain->EnveEvent, txdMain->DataEvent, txdMain->DoneEvent, rail->Generic.Name); -+ (di->func)(di->arg, " EnveEvent=%x.%x DataEvent=%x.%x DoneEvent=%x.%x\n", -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Type)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Type)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count)), -+ elan3_sdram_readl (dev, txdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Type))); -+} -+ -+int -+ep3xmtr_check_txd_state (EP_TXD *txd) -+{ -+ EP3_TXD_RAIL *txdRail = (EP3_TXD_RAIL *) txd->TxdRail; -+ EP3_XMTR_RAIL *xmtrRail = (EP3_XMTR_RAIL *) txdRail->Generic.XmtrRail; -+ EP3_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ E3_Addr enveEvent = txdRail->TxdElanAddr + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent); -+ EP3_RETRY_DMA *retry = NULL; -+ -+ struct list_head *el; -+ struct list_head *nel; -+ unsigned long flags; -+ -+ /* is enevelope event is really not set */ -+ if (EP3_EVENT_FIRED (txdRail->EnveCookie, txdRail->TxdMain->EnveEvent )) -+ return (0); -+ -+ /* remove matching dma from stalled list */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ list_for_each_safe(el, nel, &rail->DmaRetries[EP_RETRY_STABALISING]) { -+ retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if ( retry->Dma.s.dma_srcEvent == enveEvent ) { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ break; /* there can only be one */ -+ } -+ } -+ ASSERT ( retry != NULL); /* must find one in list */ -+ ASSERT ( retry->Dma.s.dma_srcEvent == enveEvent ); /* better still be the right type then */ -+ -+ /* add to free list */ -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ UnbindTxdFromRail (txd, txdRail); -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->TxdMain->EnveEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DataEvent = EP3_EVENT_PRIVATE; -+ txdRail->TxdMain->DoneEvent = EP3_EVENT_PRIVATE; -+ -+ /* reset the envelope and data events, since only they could have been set */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, EnveEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DataEvent.ev_Count), 0); /* PCI write */ -+ elan3_sdram_writel (rail->Device, txdRail->TxdElan + offsetof (EP3_TXD_RAIL_ELAN, DoneEvent.ev_Count), 0); /* PCI write */ -+ -+ FreeTxdRail (xmtrRail, txdRail); -+ -+ return (1); -+} -+ -+void -+ep3xmtr_fillout_rail_stats(EP_XMTR_RAIL *xmtr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP3_XMTR_RAIL * ep3xmtr_rail = (EP3_XMTR_RAIL *) xmtr_rail; */ -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsTx_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/epcommsTx_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/epcommsTx_elan4.c 2005-07-28 14:52:52.881673720 -0400 -@@ -0,0 +1,1389 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: epcommsTx_elan4.c,v 1.26.2.4 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcommsTx_elan4.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+#define XMTR_TO_COMMS(xmtrRail) ((EP4_COMMS_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail) -+#define XMTR_TO_RAIL(xmtrRail) ((EP4_RAIL *) ((EP_XMTR_RAIL *) xmtrRail)->CommsRail->Rail) -+#define XMTR_TO_DEV(xmtrRail) (XMTR_TO_RAIL(xmtrRail)->r_ctxt.ctxt_dev) -+#define XMTR_TO_SUBSYS(xmtrRail) (((EP_XMTR_RAIL *) xmtrRail)->Xmtr->Subsys) -+ -+#define TXD_TO_XMTR(txdRail) ((EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail) -+#define TXD_TO_RAIL(txdRail) XMTR_TO_RAIL(TXD_TO_XMTR(txdRail)) -+ -+static void txd_interrupt (EP4_RAIL *rail, void *arg); -+static void poll_interrupt (EP4_RAIL *rail, void *arg); -+ -+static __inline__ int -+on_list (struct list_head *ent, struct list_head *list) -+{ -+ struct list_head *el; -+ unsigned int count = 0; -+ list_for_each (el, list) { -+ if (el == ent) -+ count++; -+ } -+ return count; -+} -+ -+static __inline__ void -+__ep4_txd_assert_free (EP4_TXD_RAIL *txdRail, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ register int failed = 0; -+ -+ if ((txdRail)->txd_retry_time != 0) failed |= (1 << 0); -+ if ((txdRail)->txd_main->txd_env != EP4_STATE_FREE) failed |= (1 << 1); -+ if ((txdRail)->txd_main->txd_data != EP4_STATE_FREE) failed |= (1 << 2); -+ if ((txdRail)->txd_main->txd_done != EP4_STATE_FREE) failed |= (1 << 3); -+ -+ if (sdram_assert) -+ { -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)) >> 32) != -32) failed |= (1 << 4); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)) >> 32) != 0) failed |= (1 << 5); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)) >> 32) != 0) failed |= (1 << 6); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_txd_assert_free: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assert_free: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ (txdRail)->txd_retry_time = 0; -+ (txdRail)->txd_main->txd_env = EP4_STATE_FREE; -+ (txdRail)->txd_main->txd_data = EP4_STATE_FREE; -+ (txdRail)->txd_main->txd_done = EP4_STATE_FREE; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType) + 4, -32); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType) + 4, 0); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType) + 4, 0); -+ } -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "__ep4_txd_assert_free"); -+ } -+} -+ -+static __inline__ void -+__ep4_txd_assert_finished (EP4_TXD_RAIL *txdRail, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ register int failed = 0; -+ -+ if ((txdRail)->txd_retry_time != 0) failed |= (1 << 0); -+ if ((txdRail)->txd_main->txd_env != EP4_STATE_FINISHED) failed |= (1 << 1); -+ if ((txdRail)->txd_main->txd_data != EP4_STATE_FINISHED) failed |= (1 << 2); -+ if ((txdRail)->txd_main->txd_done != EP4_STATE_FINISHED) failed |= (1 << 3); -+ -+ if (sdram_assert) -+ { -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)) >> 32) != -32) failed |= (1 << 4); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)) >> 32) != 0) failed |= (1 << 5); -+ if ((int)(elan4_sdram_readq (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)) >> 32) != 0) failed |= (1 << 6); -+ } -+ -+ if (failed) -+ { -+ printk ("__ep4_txd_assert_finished: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assert_finished: failed=%x txdRail=%p at %s:%d\n", failed, txdRail, file, line); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ (txdRail)->txd_retry_time = 0; -+ (txdRail)->txd_main->txd_env = EP4_STATE_FINISHED; -+ (txdRail)->txd_main->txd_data = EP4_STATE_FINISHED; -+ (txdRail)->txd_main->txd_done = EP4_STATE_FINISHED; -+ -+ if (sdram_assert) -+ { -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType) + 4, -32); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType) + 4, 0); -+ elan4_sdram_writel (dev, (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType) + 4, 0); -+ } -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "__ep4_txd_assert_finished"); -+ } -+} -+ -+static __inline__ int -+__ep4_txd_assfail (EP4_TXD_RAIL *txdRail, const char *expr, const char *file, const int line) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR (txdRail); -+ -+ printk ("__ep4_txd_assfail: %s:%d '%s'\n", file, line, expr); -+ -+ ep_debugf (DBG_DEBUG, "__ep4_txd_assfail: %s:%d '%s'\n", file, line, expr); -+ ep4xmtr_display_txd (&di_ep_debug, &txdRail->txd_generic); -+ -+ EP_ASSFAIL (XMTR_TO_RAIL (xmtrRail), "__ep4_txd_assfail"); -+ -+ return 0; -+} -+ -+#define EP4_TXD_ASSERT(txdRail, EX) ((void) ((EX) || (__ep4_txd_assfail(txdRail, #EX, __FILE__, __LINE__)))) -+#define EP4_TXD_ASSERT_FREE(txdRail) __ep4_txd_assert_free(txdRail, __FILE__, __LINE__) -+#define EP4_TXD_ASSERT_FINISHED(txdRail) __ep4_txd_assert_finished(txdRail, __FILE__, __LINE__) -+ -+static int -+alloc_txd_block (EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV(xmtrRail); -+ EP4_TXD_RAIL_BLOCK *blk; -+ EP4_TXD_RAIL_MAIN *txdMain; -+ EP_ADDR txdMainAddr; -+ sdramaddr_t txdElan; -+ EP_ADDR txdElanAddr; -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ KMEM_ZALLOC (blk, EP4_TXD_RAIL_BLOCK *, sizeof (EP4_TXD_RAIL_BLOCK), 1); -+ -+ if (blk == NULL) -+ return 0; -+ -+ if ((txdElan = ep_alloc_elan (&rail->r_generic, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK, 0, &txdElanAddr)) == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if ((txdMain = ep_alloc_main (&rail->r_generic, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK, 0, &txdMainAddr)) == (EP4_TXD_RAIL_MAIN *) NULL) -+ { -+ ep_free_elan (&rail->r_generic, txdElanAddr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ if (ep4_reserve_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK, 0) != 0) -+ { -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, txdElanAddr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ return 0; -+ } -+ -+ for (txdRail = &blk->blk_txds[0], i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ txdRail->txd_generic.XmtrRail = &xmtrRail->xmtr_generic; -+ txdRail->txd_elan = txdElan; -+ txdRail->txd_elan_addr = txdElanAddr; -+ txdRail->txd_main = txdMain; -+ txdRail->txd_main_addr = txdMainAddr; -+ -+ /* We only need to reserve space for one command stream, since the sten packet -+ * can only be retrying *before* the dma source event is set. -+ * reserve bytes of "event" cq space for the completion write + interrupt */ -+ if ((txdRail->txd_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, EP4_INTR_CMD_NDWORDS)) == NULL) -+ goto failed; -+ -+ /* register the main interrupt cookies */ -+ ep4_register_intcookie (rail, &txdRail->txd_intcookie, txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_done), txd_interrupt, txdRail); -+ -+ /* initialise the events */ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CopySource), -+ txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CopyDest), -+ txdRail->txd_ecq->ecq_addr); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_WritePtr), -+ txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_data)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_WriteValue), -+ EP4_STATE_FINISHED); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CopySource), -+ txdElanAddr + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd)); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CopyDest), -+ txdRail->txd_ecq->ecq_addr); -+ -+ /* Initialise the command streams */ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_env))); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_write_value), -+ EP4_STATE_FAILED); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_write_cmd), -+ WRITE_DWORD_CMD | (txdMainAddr + offsetof (EP4_TXD_RAIL_MAIN, txd_done))); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_write_value), -+ EP4_STATE_FINISHED); -+ elan4_sdram_writeq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ txdMain->txd_env = EP4_STATE_FREE; -+ txdMain->txd_data = EP4_STATE_FREE; -+ txdMain->txd_done = EP4_STATE_FREE; -+ -+ /* move onto next descriptor */ -+ txdElan += EP4_TXD_RAIL_ELAN_SIZE; -+ txdElanAddr += EP4_TXD_RAIL_ELAN_SIZE; -+ txdMain = (EP4_TXD_RAIL_MAIN *) ((unsigned long) txdMain + EP4_TXD_RAIL_MAIN_SIZE); -+ txdMainAddr += EP4_TXD_RAIL_MAIN_SIZE; -+ } -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_add (&blk->blk_link, &xmtrRail->xmtr_blocklist); -+ -+ xmtrRail->xmtr_totalcount += EP4_NUM_TXD_PER_BLOCK; -+ xmtrRail->xmtr_freecount += EP4_NUM_TXD_PER_BLOCK; -+ -+ for (i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++) -+ list_add (&blk->blk_txds[i].txd_generic.Link, &xmtrRail->xmtr_freelist); -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ return 1; -+ -+ failed: -+ while (--i >= 0) -+ { -+ ep4_put_ecq (rail, txdRail->txd_ecq, EP4_INTR_CMD_NDWORDS); -+ ep4_deregister_intcookie (rail, &txdRail->txd_intcookie); -+ } -+ ep4_release_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_txds[0].txd_elan_addr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+ -+ return 0; -+} -+ -+static void -+free_txd_block (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL_BLOCK *blk) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_del (&blk->blk_link); -+ -+ xmtrRail->xmtr_totalcount -= EP4_NUM_TXD_PER_BLOCK; -+ -+ for (txdRail = &blk->blk_txds[0], i = 0; i < EP4_NUM_TXD_PER_BLOCK; i++, txdRail++) -+ { -+ xmtrRail->xmtr_freecount--; -+ -+ ep4_put_ecq (rail, txdRail->txd_ecq, EP4_INTR_CMD_NDWORDS); -+ -+ ep4_deregister_intcookie (rail, &txdRail->txd_intcookie); -+ -+ list_del (&txdRail->txd_generic.Link); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ ep4_release_dma_retries (rail, EP4_NUM_TXD_PER_BLOCK); -+ -+ ep_free_main (&rail->r_generic, blk->blk_txds[0].txd_main_addr, EP4_TXD_RAIL_MAIN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ ep_free_elan (&rail->r_generic, blk->blk_txds[0].txd_elan_addr, EP4_TXD_RAIL_ELAN_SIZE * EP4_NUM_TXD_PER_BLOCK); -+ -+ KMEM_FREE (blk, sizeof (EP4_TXD_RAIL_BLOCK)); -+} -+ -+static EP4_TXD_RAIL * -+get_txd_rail (EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP_COMMS_SUBSYS *subsys = XMTR_TO_SUBSYS(xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ int low_on_txds; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ if (list_empty (&xmtrRail->xmtr_freelist)) -+ txdRail = NULL; -+ else -+ { -+ txdRail = list_entry (xmtrRail->xmtr_freelist.next, EP4_TXD_RAIL, txd_generic.Link); -+ -+ EP4_TXD_ASSERT_FREE(txdRail); -+ -+ list_del (&txdRail->txd_generic.Link); -+ -+ xmtrRail->xmtr_freecount--; -+ } -+ /* Wakeup the descriptor primer thread if there's not many left */ -+ low_on_txds = (xmtrRail->xmtr_freecount < ep_txd_lowat); -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ if (low_on_txds) -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ -+ return (txdRail); -+} -+ -+static void -+free_txd_rail (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ unsigned long flags; -+ -+ EP4_TXD_ASSERT_FREE(txdRail); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ -+ list_add (&txdRail->txd_generic.Link, &xmtrRail->xmtr_freelist); -+ -+ xmtrRail->xmtr_freecount++; -+ -+ if (xmtrRail->xmtr_freewaiting) -+ { -+ xmtrRail->xmtr_freewaiting--; -+ kcondvar_wakeupall (&xmtrRail->xmtr_freesleep, &xmtrRail->xmtr_freelock); -+ } -+ -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+} -+ -+static void -+bind_txd_rail (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EPRINTF6 (DBG_XMTR, "%s: bind_txd_rail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->txd_generic.XmtrRail)->r_generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, txd->Envelope.Xid.Unique); -+ -+ txd->TxdRail = &txdRail->txd_generic; -+ txdRail->txd_generic.Txd = txd; -+} -+ -+static void -+unbind_txd_rail (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_TXD_ASSERT (txdRail, txd->TxdRail == &txdRail->txd_generic && txdRail->txd_generic.Txd == txd); -+ -+ EPRINTF6 (DBG_XMTR, "%s: unbind_txd_rail: txd=%p txdRail=%p XID=%08x.%08x.%016llx\n", -+ XMTR_TO_RAIL(txdRail->txd_generic.XmtrRail)->r_generic.Name, txd, txdRail, -+ txd->Envelope.Xid.Generation, txd->Envelope.Xid.Handle, txd->Envelope.Xid.Unique); -+ -+ -+ txdRail->txd_generic.Txd = NULL; -+ txd->TxdRail = NULL; -+} -+ -+static void -+initialise_txd (EP_TXD *txd, EP4_TXD_RAIL *txdRail, unsigned int phase) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ /* Flush the Elan TLB if mappings have changed */ -+ ep_perrail_dvma_sync (&rail->r_generic); -+ -+ /* Initialise the per-rail fields in the envelope */ -+ txd->Envelope.TxdRail = txdRail->txd_elan_addr; -+ txd->Envelope.NodeId = rail->r_generic.Position.pos_nodeid; -+ -+ /* Allocate a network error fixup cookie */ -+ txdRail->txd_cookie = ep4_neterr_cookie (rail, txd->NodeId) | EP4_COOKIE_STEN; -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ if ( epdebug_check_sum ) -+ txd->Envelope.CheckSum = ep_calc_check_sum( txd->Xmtr->Subsys->Subsys.Sys, &txd->Envelope, txd->Envelope.Frags, txd->Envelope.nFrags); -+ else -+#endif -+ txd->Envelope.CheckSum = 0; -+ -+ /* Initialise the per-rail events */ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ { -+ unsigned int nsets = (txd->Envelope.nFrags ? txd->Envelope.nFrags : 1) + ( EP_IS_MULTICAST(txd->Envelope.Attr) ? 1 : 0); -+ -+ if (! EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32 * nsets, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ txdRail->txd_main->txd_data = EP4_STATE_FINISHED; -+ } -+ else -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE(-32 * nsets , E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ txdRail->txd_main->txd_data = EP4_STATE_ACTIVE; -+ } -+ -+ txdRail->txd_main->txd_env = EP4_STATE_ACTIVE; -+ txdRail->txd_main->txd_done = EP4_STATE_ACTIVE; -+ break; -+ } -+ -+ case EP_TXD_PHASE_PASSIVE: -+ EP4_TXD_ASSERT (txdRail, EP_IS_RPC(txd->Envelope.Attr)); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ txdRail->txd_main->txd_data = EP4_STATE_FINISHED; -+ txdRail->txd_main->txd_done = EP4_STATE_ACTIVE; -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ break; -+ } -+ -+ if (EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), NOP_CMD); -+} -+ -+static void -+terminate_txd_rail (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_SDRAM_ASSERT (TXD_TO_RAIL(txdRail),\ -+ (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType),\ -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS));\ -+ -+ /* clear the done flags - so that it will be ignored if an event interrupt is generated */ -+ txdRail->txd_main->txd_env = EP4_STATE_FREE; -+ txdRail->txd_main->txd_data = EP4_STATE_FREE; -+ txdRail->txd_main->txd_done = EP4_STATE_FREE; -+ -+#if defined(DEBUG_ASSERT) -+ if (sdram_assert) -+ { -+ ELAN4_DEV *dev = XMTR_TO_RAIL (xmtrRail)->r_ctxt.ctxt_dev; -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ } -+#endif -+} -+ -+static void -+defer_txd_rail (EP4_TXD_RAIL *txdRail) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP4_RAIL *rail = XMTR_TO_RAIL(xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP_COMMS_SUBSYS *subsys = XMTR_TO_SUBSYS(xmtrRail); -+ -+ EPRINTF5 (DBG_XMTR, "%s: defer_txd_rail: xmtrRail=%p txdRail=%p env/data (%d,%d) not finished\n", -+ rail->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ /* transmit has completed, but the data dma has not completed -+ * (because of network error fixup), we queue the txdRail onto a list -+ * to be polled for completion later. -+ */ -+ if (txdRail->txd_retry_time) -+ { -+ EP4_TXD_ASSERT (txdRail, (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) == 1 || -+ on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1)); -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ } -+ -+ txdRail->txd_retry_time = lbolt; -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+} -+ -+static void -+finalise_txd (EP_TXD *txd, EP4_TXD_RAIL *txdRail) -+{ -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ -+ EP4_TXD_ASSERT_FINISHED (txdRail); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+} -+ -+static void -+txd_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) arg; -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP_XMTR *xmtr = xmtrRail->xmtr_generic.Xmtr; -+ int delay = 1; -+ EP_TXD *txd; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ for (;;) -+ { -+ if (txdRail->txd_main->txd_done == EP4_STATE_FINISHED || txdRail->txd_main->txd_env == EP4_STATE_FAILED) -+ break; -+ -+ /* The write to txd_done could be held up in the PCI bridge even though -+ * we've seen the interrupt cookie. Unlike elan3, there is no possibility -+ * of spurious interrupts since we flush the command queues on node -+ * disconnection and the txcallback mechanism */ -+ mb(); -+ -+ if (delay > EP4_EVENT_FIRING_TLIMIT) -+ { -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ EP_ASSFAIL (XMTR_TO_RAIL(xmtrRail), "txd_interrupt - not finished\n"); -+ return; -+ } -+ DELAY (delay); -+ delay <<= 1; -+ } -+ -+ txd = txdRail->txd_generic.Txd; -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FAILED) -+ { -+ spin_lock (&xmtrRail->xmtr_retrylock); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time == 0); /* cannot be on retry/poll list */ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_main->txd_done != EP4_STATE_FINISHED); /* data xfer cannot have finished */ -+ -+ if (TxdShouldStabalise (&txdRail->txd_generic, &rail->r_generic)) -+ { -+ EPRINTF6 (DBG_STABILISE, "%s: txd_interrupt: stablise xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ txdRail->txd_retry_time = lbolt; /* indicate on retry list */ -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]); -+ } -+ else -+ { -+ EPRINTF6 (DBG_RETRY, "%s: txd_interrupt: retry xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ txdRail->txd_retry_time = lbolt + EP_RETRY_LOW_PRI_TIME; /* XXXX: backoff ? */ -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, txdRail->txd_retry_time); -+ } -+ spin_unlock (&xmtrRail->xmtr_retrylock); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ return; -+ } -+ -+ EP4_TXD_ASSERT (txdRail, txd != NULL && !(EP_IS_NO_INTERRUPT(txd->Envelope.Attr))); -+ -+ EPRINTF6 (DBG_XMTR, "%s: txd_interrupt: xmtrRail=%p txdRail=%p txd=%p XID=%llx dest=%u\n", rail->r_generic.Name, -+ xmtrRail, txdRail, txd, txd->Envelope.Xid.Unique, txd->NodeId); -+ -+ if (txdRail->txd_main->txd_env != EP4_STATE_FINISHED || txdRail->txd_main->txd_data != EP4_STATE_FINISHED) -+ { -+ defer_txd_rail (txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ } -+ else -+ { -+ /* remove from active transmit list */ -+ list_del (&txd->Link); -+ -+ ep_xmtr_txd_stat(xmtr,txd); -+ -+ finalise_txd (txd, txdRail); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+static void -+poll_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) arg; -+ -+ ep_poll_transmits (xmtrRail->xmtr_generic.Xmtr); -+} -+ -+void -+issue_envelope_packet (EP4_XMTR_RAIL *xmtrRail, EP4_TXD_RAIL *txdRail) -+{ -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ ELAN4_CQ *cq = xmtrRail->xmtr_cq; -+ E4_uint64 *blk0 = (E4_uint64 *) &txd->Envelope; -+ E4_uint64 *blk1 = EP_HAS_PAYLOAD(txd->Envelope.Attr) ? (E4_uint64 *) &txd->Payload : NULL; -+ E4_Addr qaddr = EP_MSGQ_ADDR(txd->Service); -+ -+ EP4_SDRAM_ASSERT (TXD_TO_RAIL(txdRail),\ -+ (txdRail)->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType),\ -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS));\ -+ -+ elan4_open_packet (cq, OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_DATA(txd->NodeId))); -+ elan4_sendtrans0 (cq, TR_INPUT_Q_GETINDEX, EP_MSGQ_ADDR(txd->Service)); -+ -+ /* send the payload if present */ -+ if (blk0) elan4_sendtransp (cq, TR_WRITE(128 >> 3, 0, TR_DATATYPE_BYTE), 0, blk0); -+ if (blk1) elan4_sendtransp (cq, TR_WRITE(128 >> 3, 0, TR_DATATYPE_BYTE), 128, blk1); -+ -+ elan4_sendtrans1 (cq, TR_INPUT_Q_COMMIT, qaddr, txdRail->txd_cookie); -+ -+ elan4_guard (cq, GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_write_dword_cmd (cq, txdRail->txd_main_addr + offsetof (EP4_TXD_RAIL_MAIN, txd_env), EP4_STATE_FINISHED); -+ -+ elan4_guard (cq, GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (EP4_STEN_RETRYCOUNT)); -+ elan4_set_event_cmd (cq, txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_env)); -+ -+ elan4_write_dword_cmd (cq, xmtrRail->xmtr_main_addr + offsetof (EP4_XMTR_RAIL_MAIN, xmtr_flowcnt), ++xmtrRail->xmtr_flowcnt); -+} -+ -+void -+ep4xmtr_flush_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_COMMS_RAIL *commsRail = XMTR_TO_COMMS (xmtrRail); -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ switch (rail->r_generic.CallbackStep) -+ { -+ case EP_CB_FLUSH_FILTERING: -+ /* need to acquire/release the Lock to ensure that the node state -+ * transition has been noticed and no new envelopes are queued to -+ * nodes which are passivating. */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ /* Then we insert a "setevent" into the command queue to flush -+ * through the envelopes which have already been submitted */ -+ ep4comms_flush_setevent (commsRail, xmtrRail->xmtr_cq); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ break; -+ -+ case EP_CB_FLUSH_FLUSHING: -+ /* remove any envelopes which are retrying to nodes which are going down */ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ list_for_each_safe (el, nel, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) { -+ EP4_TXD_RAIL *txdRail = list_entry (el, EP4_TXD_RAIL, txd_retry_link); -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_main->txd_env == EP4_STATE_FAILED); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF2 (DBG_XMTR, "%s; ep4xmtr_flush_callback: removing txdRail %p from retry list\n", rail->r_generic.Name, txdRail); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ list_del (&txdRail->txd_retry_link); -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]); -+ } -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ /* Determine whether we have active or passive messages to -+ * any node which is passivating */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ if (txdRail == NULL || txdRail->txd_generic.XmtrRail != &xmtrRail->xmtr_generic || nodeRail->State != EP_NODE_LOCAL_PASSIVATE) -+ continue; -+ -+ EPRINTF5 (DBG_XMTR, "%s: flush txd=%p txdRail=%p data=%llx done=%llx\n", rail->r_generic.Name, -+ txd, txdRail, txdRail->txd_main->txd_data, txdRail->txd_main->txd_done); -+ -+ if (EP_IS_RPC(txd->Envelope.Attr)) -+ { -+ if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ else if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_PASSIVE_MESSAGES; -+ } -+ else -+ { -+ if (txdRail->txd_main->txd_data == EP4_STATE_ACTIVE) -+ nodeRail->MessageState |= EP_NODE_ACTIVE_MESSAGES; -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ break; -+ -+ default: -+ panic ("ep4xmtr_flush_callback: invalid callback step\n"); -+ break; -+ } -+} -+ -+void -+ep4xmtr_failover_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ /* Only progress relocation of txd's bound to this rail */ -+ if (! TXD_BOUND2RAIL (txdRail, xmtrRail) || nodeRail->State != EP_NODE_PASSIVATED) -+ continue; -+ -+ /* XXXX - no rail failover for now ....*/ -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep4xmtr_failover_callback - xmtr %p txd %p node %d completed\n", rail->r_generic.Name, xmtr, txd, txd->NodeId); -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+ -+void -+ep4xmtr_disconnect_callback (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el, *nel; -+ struct list_head txdList; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ list_for_each_safe (el, nel, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[txd->NodeId]; -+ -+ if ( ! TXD_BOUND2RAIL (txdRail, xmtrRail) || nodeRail->State != EP_NODE_DISCONNECTING) -+ continue; -+ -+ if (txdRail->txd_main->txd_done == EP4_STATE_ACTIVE) -+ { -+ -+ EPRINTF8 (DBG_DISCON, "ep4xmtr_disconnect_callback: txdRail=%p : events %llx,%llx,%llx done %llx,%llx,%llx retry %lx\n",txdRail, -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)), -+ txdRail->txd_main->txd_env, txdRail->txd_main->txd_data, txdRail->txd_main->txd_done, -+ txdRail->txd_retry_time); -+ -+ if (txdRail->txd_retry_time) -+ { -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ EP4_TXD_ASSERT (txdRail, on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1); -+ -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ } -+ -+ /* Remove from active list */ -+ list_del (&txd->Link); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+ -+ EPRINTF4 (DBG_XMTR, "%s: ep4xmtr_disconnect_callback - xmtr %p txd %p node %d not conected\n", rail->r_generic.Name, xmtr, txd, txd->NodeId); -+ -+ /* add to the list of txd's which are to be completed */ -+ list_add_tail (&txd->Link, &txdList); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ txd->Handler (txd, txd->Arg, EP_CONN_RESET); -+ -+ FreeTxd (xmtr, txd); -+ } -+} -+ -+void -+ep4xmtr_neterr_flush (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_COMMS_RAIL *commsRail = XMTR_TO_COMMS (xmtrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ /* insert a "setevent" into the command queue to flush -+ * through the envelopes which have already been submitted */ -+ ep4comms_flush_setevent (commsRail, xmtrRail->xmtr_cq); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+void -+ep4xmtr_neterr_check (EP_XMTR *xmtr, EP4_XMTR_RAIL *xmtrRail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each (el, &xmtr->ActiveDescList) { -+ EP_TXD *txd = list_entry (el, EP_TXD, Link); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ -+ if ( ! TXD_BOUND2RAIL (txdRail, xmtrRail) || txd->NodeId != nodeId) -+ continue; -+ -+ /* The only non-dma associated with a txd is the initial sten packet, if it has been acked -+ * and the neterr cookie matches, then change it to look like it's been acked since the -+ * INPUT_Q_COMMIT transaction has already been executed */ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FAILED && (txdRail->txd_cookie == cookies[0] || txdRail->txd_cookie == cookies[1])) -+ { -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: ep4xmtr_neterr_callback: cookie <%lld%s%s%s%s> matches txd %p txdRail %p\n", -+ rail->r_generic.Name, EP4_COOKIE_STRING(txdRail->txd_cookie), txd, txdRail); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_FINISHED; -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ spin_lock (&xmtrRail->xmtr_retrylock); -+ -+ EP4_TXD_ASSERT (txdRail, (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) == 1 || -+ on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1)); -+ -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ spin_unlock (&xmtrRail->xmtr_retrylock); -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+} -+ -+int -+ep4xmtr_poll_txd (EP_XMTR_RAIL *x, EP_TXD_RAIL *t, int how) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) t; -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ -+ if (! EP_IS_NO_INTERRUPT(txd->Envelope.Attr)) -+ return 0; -+ -+ switch (how) -+ { -+ case ENABLE_TX_CALLBACK: -+ if (!EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (xmtrRail->xmtr_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ txd->Envelope.Attr |= EP_INTERRUPT_ENABLED; -+ } -+ break; -+ -+ case DISABLE_TX_CALLBACK: -+ if (EP_IS_INTERRUPT_ENABLED(txd->Envelope.Attr & EP_INTERRUPT_ENABLED)) -+ { -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), NOP_CMD); -+ -+ txd->Envelope.Attr &= ~EP_INTERRUPT_ENABLED; -+ } -+ } -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FINISHED && txdRail->txd_main->txd_data == EP4_STATE_FINISHED && txdRail->txd_main->txd_done == EP4_STATE_FINISHED) -+ { -+ EPRINTF3 (DBG_XMTR, "%s: ep4xmtr_poll_txd: txd=%p XID=%llx completed\n", -+ XMTR_TO_RAIL (xmtrRail)->r_generic.Name, txd, txd->Envelope.Xid.Unique); -+ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_done_cmd.c_intr_cmd), -+ INTERRUPT_CMD | (txdRail->txd_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ -+ ep_xmtr_txd_stat(xmtrRail->xmtr_generic.Xmtr,txd); -+ -+ finalise_txd (txd, txdRail); -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep4xmtr_bind_txd (EP_TXD *txd, EP_XMTR_RAIL *x, unsigned int phase) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ EP4_TXD_RAIL *txdRail; -+ unsigned long flags; -+ -+ if ((txdRail = get_txd_rail (xmtrRail)) == NULL) -+ return 0; -+ -+ switch (phase) -+ { -+ case EP_TXD_PHASE_ACTIVE: -+ if (rail->r_generic.Nodes[txd->NodeId].State != EP_NODE_CONNECTED) -+ { -+ EPRINTF2 (DBG_XMTR, "%s: ep4xmtr_bind_txd: node %u not connected on this rail\n", rail->r_generic.Name, txd->NodeId); -+ -+ free_txd_rail (xmtrRail, txdRail); -+ return 0; -+ } -+ -+ initialise_txd (txd, txdRail, EP_TXD_PHASE_ACTIVE); -+ -+ bind_txd_rail (txd, txdRail); -+ -+ /* generate the STEN packet to transfer the envelope */ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ if (((int) (xmtrRail->xmtr_flowcnt - xmtrRail->xmtr_main->xmtr_flowcnt)) < EP4_XMTR_FLOWCNT) -+ issue_envelope_packet (xmtrRail, txdRail); -+ else -+ { -+ txdRail->txd_retry_time = lbolt; -+ -+ list_add_tail (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, txdRail->txd_retry_time); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ break; -+ -+ case EP_TXD_PHASE_PASSIVE: -+ initialise_txd (txd, txdRail, EP_TXD_PHASE_PASSIVE); -+ -+ EP_XMTR_OP (txd->TxdRail->XmtrRail, UnbindTxd) (txd, EP_TXD_PHASE_PASSIVE); /* unbind from existing rail */ -+ -+ bind_txd_rail (txd, txdRail); /* and bind it to our new rail */ -+ break; -+ } -+ -+ return 1; -+} -+ -+void -+ep4xmtr_unbind_txd (EP_TXD *txd, unsigned int phase) -+{ -+ /* XXXX - TBD */ -+} -+ -+long -+ep4xmtr_check (EP_XMTR_RAIL *x, long nextRunTime) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP_XMTR *xmtr = xmtrRail->xmtr_generic.Xmtr; -+ struct list_head txdList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&txdList); -+ -+ if (xmtrRail->xmtr_freecount < ep_txd_lowat && !alloc_txd_block (xmtrRail)) -+ { -+ EPRINTF1 (DBG_RCVR,"%s: failed to grow txd rail pool\n", XMTR_TO_RAIL(xmtrRail)->r_generic.Name); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + RESOURCE_RETRY_TIME)) -+ nextRunTime = lbolt + RESOURCE_RETRY_TIME; -+ } -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ list_for_each_safe (el, nel, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]) { -+ EP4_TXD_RAIL *txdRail = list_entry (el, EP4_TXD_RAIL, txd_retry_link); -+ -+ if (txdRail->txd_main->txd_env != EP4_STATE_FINISHED || txdRail->txd_main->txd_data != EP4_STATE_FINISHED) -+ { -+ ep_debugf (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) not finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ nextRunTime = lbolt + HZ; -+ } -+ else -+ { -+ EP_TXD *txd = txdRail->txd_generic.Txd; -+ -+ ep_debugf (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ -+ EPRINTF5 (DBG_XMTR, "%s: ep4xmtr_check: xmtrRail=%p txdRail=%p env/data (%d,%d) finished\n", -+ XMTR_TO_RAIL(xmtrRail)->r_generic.Name, xmtrRail, txdRail, (int)txdRail->txd_main->txd_env, (int)txdRail->txd_main->txd_data); -+ EPRINTF3 (DBG_XMTR, "%s: done %x data %x\n", XMTR_TO_RAIL(xmtrRail)->r_generic.Name, -+ txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_done), -+ txdRail->txd_elan_addr + offsetof (EP4_TXD_RAIL_ELAN, txd_data)); -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ /* remove txd from active list and add to list to call handlers */ -+ list_del (&txd->Link); -+ list_add_tail (&txd->Link, &txdList); -+ -+ /* remove and free of txdRail */ -+ txdRail->txd_retry_time = 0; -+ list_del (&txdRail->txd_retry_link); -+ -+ finalise_txd (txd, txdRail); -+ -+ } -+ } -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ while (! list_empty (&txdList)) -+ { -+ EP_TXD *txd = list_entry (txdList.next, EP_TXD, Link); -+ -+ list_del (&txd->Link); -+ -+ ep_xmtr_txd_stat (xmtr,txd); -+ -+ txd->Handler (txd, txd->Arg, EP_SUCCESS); -+ -+ FreeTxd (xmtr, txd); -+ } -+ -+ return nextRunTime; -+} -+ -+unsigned long -+ep4xmtr_retry (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) arg; -+ ELAN4_DEV *dev = XMTR_TO_DEV(xmtrRail); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ while (! list_empty (&xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY])) -+ { -+ EP4_TXD_RAIL *txdRail = list_entry (xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY].next, EP4_TXD_RAIL, txd_retry_link); -+ -+ if (BEFORE (lbolt, txdRail->txd_retry_time)) -+ { -+ if (nextRunTime == 0 || AFTER (nextRunTime, txdRail->txd_retry_time)) -+ nextRunTime = txdRail->txd_retry_time; -+ -+ break; -+ } -+ -+ if (((int) (xmtrRail->xmtr_flowcnt - xmtrRail->xmtr_main->xmtr_flowcnt)) < EP4_XMTR_FLOWCNT) -+ { -+ txdRail->txd_retry_time = 0; -+ -+ list_del (&txdRail->txd_retry_link); -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4xmtr_retry: re-issue envelope packet to %d for txdRail=%p\n", -+ rail->r_generic.Name, txdRail->txd_generic.Txd->Envelope.NodeId, txdRail); -+ -+ txdRail->txd_main->txd_env = EP4_STATE_ACTIVE; -+ -+ issue_envelope_packet (xmtrRail, txdRail); -+ } -+ else -+ { -+ EPRINTF2 (DBG_RETRY, "%s: ep4xmtr_retry: cannot re-issue envelope packet to %d\n", rail->r_generic.Name, txdRail->txd_generic.Txd->Envelope.NodeId); -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, txdRail->txd_retry_time)) -+ nextRunTime = txdRail->txd_retry_time; -+ -+ break; -+ } -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ return nextRunTime; -+} -+ -+void -+ep4xmtr_add_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP_COMMS_SUBSYS *subsys = xmtr->Subsys; -+ EP4_XMTR_RAIL *xmtrRail; -+ unsigned long flags; -+ int i; -+ -+ KMEM_ZALLOC (xmtrRail, EP4_XMTR_RAIL *, sizeof (EP4_XMTR_RAIL), 1); -+ -+ spin_lock_init (&xmtrRail->xmtr_freelock); -+ kcondvar_init (&xmtrRail->xmtr_freesleep); -+ INIT_LIST_HEAD (&xmtrRail->xmtr_freelist); -+ INIT_LIST_HEAD (&xmtrRail->xmtr_blocklist); -+ -+ for (i = 0; i < EP4_TXD_NUM_LISTS; i++) -+ INIT_LIST_HEAD (&xmtrRail->xmtr_retrylist[i]); -+ spin_lock_init (&xmtrRail->xmtr_retrylock); -+ -+ xmtrRail->xmtr_generic.CommsRail = commsRail; -+ xmtrRail->xmtr_generic.Xmtr = xmtr; -+ -+ xmtrRail->xmtr_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_XMTR_RAIL_MAIN), 0, &xmtrRail->xmtr_main_addr); -+ xmtrRail->xmtr_cq = elan4_alloccq (&rail->r_ctxt, EP4_XMTR_CQSIZE, CQ_EnableAllBits, CQ_Priority); -+ -+ xmtrRail->xmtr_retryops.op_func = ep4xmtr_retry; -+ xmtrRail->xmtr_retryops.op_arg = xmtrRail; -+ -+ ep4_add_retry_ops (rail, &xmtrRail->xmtr_retryops); -+ -+ ep4_register_intcookie (rail, &xmtrRail->xmtr_intcookie, xmtrRail->xmtr_main_addr, -+ poll_interrupt, xmtrRail); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ -+ xmtr->Rails[commsRail->Rail->Number] = &xmtrRail->xmtr_generic; -+ xmtr->RailMask |= EP_RAIL2RAILMASK(commsRail->Rail->Number); -+ -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ ep_kthread_schedule (&subsys->Thread, lbolt); -+ -+ ep_procfs_xmtr_add_rail(&(xmtrRail->xmtr_generic)); -+} -+ -+void -+ep4xmtr_del_rail (EP_XMTR *xmtr, EP_COMMS_RAIL *commsRail) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) commsRail->Rail; -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) xmtr->Rails[commsRail->Rail->Number]; -+ unsigned long flags; -+ -+ /* rail mask set as not usable */ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->RailMask &= ~EP_RAIL2RAILMASK (rail->r_generic.Number); -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ ep_procfs_xmtr_del_rail(&(xmtrRail->xmtr_generic)); -+ -+ /* wait for all txd's for this rail to become free */ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ while (xmtrRail->xmtr_freecount != xmtrRail->xmtr_totalcount) -+ { -+ xmtrRail->xmtr_freewaiting++; -+ kcondvar_wait (&xmtrRail->xmtr_freesleep, &xmtrRail->xmtr_freelock, &flags); -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ spin_lock_irqsave (&xmtr->Lock, flags); -+ xmtr->Rails[commsRail->Rail->Number] = NULL; -+ spin_unlock_irqrestore (&xmtr->Lock, flags); -+ -+ /* all the txd's accociated with DescBlocks must be in the freelist */ -+ ASSERT (xmtrRail->xmtr_totalcount == xmtrRail->xmtr_freecount); -+ -+ /* run through the DescBlockList deleting them */ -+ while (!list_empty (&xmtrRail->xmtr_blocklist)) -+ free_txd_block (xmtrRail, list_entry(xmtrRail->xmtr_blocklist.next, EP4_TXD_RAIL_BLOCK , blk_link)); -+ -+ /* it had better be empty after that */ -+ ASSERT ((xmtrRail->xmtr_freecount == 0) && (xmtrRail->xmtr_totalcount == 0)); -+ -+ ep4_deregister_intcookie (rail, &xmtrRail->xmtr_intcookie); -+ -+ ep4_remove_retry_ops (rail, &xmtrRail->xmtr_retryops); -+ -+ elan4_freecq (&rail->r_ctxt, xmtrRail->xmtr_cq); -+ ep_free_main (&rail->r_generic, xmtrRail->xmtr_main_addr, sizeof (EP4_XMTR_RAIL_MAIN)); -+ -+ spin_lock_destroy (&xmtrRail->xmtr_retrylock); -+ -+ spin_lock_destroy (&xmtrRail->xmtr_freelock); -+ kcondvar_destroy (&xmtrRail->xmtr_freesleep); -+ -+ KMEM_FREE (xmtrRail, sizeof (EP4_XMTR_RAIL)); -+} -+ -+void -+ep4xmtr_display_xmtr (DisplayInfo *di, EP_XMTR_RAIL *x) -+{ -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) x; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ unsigned int freeCount = 0; -+ unsigned int pollCount = 0; -+ unsigned int stalledCount = 0; -+ unsigned int retryCount = 0; -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_freelock, flags); -+ list_for_each (el, &xmtrRail->xmtr_freelist) -+ freeCount++; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_freelock, flags); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL]) -+ pollCount++; -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) -+ stalledCount++; -+ list_for_each (el, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY]) -+ retryCount++; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ (di->func)(di->arg, " rail=%d free=%d total=%d (%d) (retry %d,%d,%d)\n", -+ rail->r_generic.Number, xmtrRail->xmtr_freecount, xmtrRail->xmtr_totalcount, -+ freeCount, pollCount, stalledCount, retryCount); -+ (di->func)(di->arg, " cq %d flowcnt %lld,%lld\n", elan4_cq2num (xmtrRail->xmtr_cq), xmtrRail->xmtr_flowcnt, xmtrRail->xmtr_main->xmtr_flowcnt); -+} -+ -+void -+ep4xmtr_display_txd (DisplayInfo *di, EP_TXD_RAIL *t) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) t; -+ EP4_XMTR_RAIL *xmtrRail = TXD_TO_XMTR(txdRail); -+ EP4_TXD_RAIL_MAIN *txdMain = txdRail->txd_main; -+ sdramaddr_t txdElan = txdRail->txd_elan; -+ EP4_RAIL *rail = XMTR_TO_RAIL (xmtrRail); -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ char *list = ""; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ if (txdRail->txd_retry_time) -+ { -+ if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_POLL])) -+ list = " poll"; -+ else if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED])) -+ list = " stalled"; -+ else if (on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_RETRY])) -+ list = " retry"; -+ else -+ list = " ERROR"; -+ } -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ (di->func)(di->arg, " Rail %d txd %p elan %lx (%x) main %p (%x) cookie <%lld%s%s%s%s> ecq %d %s\n", rail->r_generic.Number, -+ txdRail, txdRail->txd_elan, txdRail->txd_elan_addr, txdRail->txd_main, txdRail->txd_main_addr, -+ EP4_COOKIE_STRING(txdRail->txd_cookie), elan4_cq2num (txdRail->txd_ecq->ecq_cq), list); -+ -+ (di->func)(di->arg, " env %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_Params[1])), -+ txdMain->txd_env); -+ (di->func)(di->arg, " data %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_data.ev_Params[1])), -+ txdMain->txd_data); -+ (di->func)(di->arg, " done %016llx %016llx %016llx -> %016llx\n", -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_CountAndType)), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_Params[0])), -+ elan4_sdram_readq (dev, txdElan + offsetof (EP4_TXD_RAIL_ELAN, txd_done.ev_Params[1])), -+ txdMain->txd_done); -+} -+ -+int -+ep4xmtr_check_txd_state (EP_TXD *txd) -+{ -+ EP4_TXD_RAIL *txdRail = (EP4_TXD_RAIL *) txd->TxdRail; -+ EP4_XMTR_RAIL *xmtrRail = (EP4_XMTR_RAIL *) txdRail->txd_generic.XmtrRail; -+ ELAN4_DEV *dev = XMTR_TO_DEV (xmtrRail); -+ unsigned long flags; -+ -+ if (txdRail->txd_main->txd_env == EP4_STATE_FINISHED) -+ return 0; -+ -+ EP4_TXD_ASSERT (txdRail, txdRail->txd_retry_time != 0); -+ -+ spin_lock_irqsave (&xmtrRail->xmtr_retrylock, flags); -+ EP4_TXD_ASSERT (txdRail, on_list (&txdRail->txd_retry_link, &xmtrRail->xmtr_retrylist[EP4_TXD_LIST_STALLED]) == 1); -+ -+ list_del (&txdRail->txd_retry_link); -+ txdRail->txd_retry_time = 0; -+ spin_unlock_irqrestore (&xmtrRail->xmtr_retrylock, flags); -+ -+ /* re-initialise the envelope event */ -+ elan4_sdram_writeq (dev, txdRail->txd_elan + offsetof (EP4_TXD_RAIL_ELAN, txd_env.ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_INTR_CMD_NDWORDS)); -+ -+ unbind_txd_rail (txd, txdRail); -+ -+ terminate_txd_rail (xmtrRail, txdRail); -+ free_txd_rail (xmtrRail, txdRail); -+ -+ return 1; -+} -+ -+void -+ep4xmtr_fillout_rail_stats(EP_XMTR_RAIL *xmtr_rail, char *str) { -+ /* no stats here yet */ -+ /* EP4_XMTR_RAIL * ep4xmtr_rail = (EP4_XMTR_RAIL *) xmtr_rail; */ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/ep_procfs.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/ep_procfs.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/ep_procfs.c 2005-07-28 14:52:52.881673720 -0400 -@@ -0,0 +1,331 @@ -+ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: ep_procfs.c,v 1.5.6.4 2005/03/20 11:23:33 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/ep_procfs.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+#include "conf_linux.h" -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "epcomms_elan4.h" -+ -+#include -+ -+struct proc_dir_entry *ep_procfs_xmtr_root; -+struct proc_dir_entry *ep_procfs_rcvr_root; -+ -+static int -+ep_proc_open (struct inode *inode, struct file *file) -+{ -+ PROC_PRIVATE *pr; -+ int pages = 4; -+ -+ if ((pr = kmalloc (sizeof (PROC_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ do { -+ pr->pr_data_len = PAGESIZE * pages; -+ -+ KMEM_ZALLOC (pr->pr_data, char *, pr->pr_data_len, 1); -+ if (pr->pr_data == NULL) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Out of Memory\n"); -+ break; -+ } -+ -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_data[0] = 0; -+ -+ pr->pr_di.func = proc_character_fill; -+ pr->pr_di.arg = (long)pr; -+ -+ if (!strcmp("debug_xmtr", file->f_dentry->d_iname)) -+ { -+ EP_XMTR *xmtr = (EP_XMTR *)(PDE(inode)->data); -+ ep_display_xmtr (&pr->pr_di, xmtr); -+ } -+ -+ if (!strcmp("debug_rcvr", file->f_dentry->d_iname)) -+ { -+ EP_RCVR *rcvr = (EP_RCVR *)(PDE(inode)->data); -+ ep_display_rcvr (&pr->pr_di, rcvr, 0); -+ } -+ -+ if (!strcmp("debug_full", file->f_dentry->d_iname)) -+ { -+ EP_RCVR *rcvr = (EP_RCVR *)(PDE(inode)->data); -+ ep_display_rcvr (&pr->pr_di, rcvr, 1); -+ } -+ -+ if ( pr->pr_len < pr->pr_data_len) -+ break; /* we managed to get all the output into the buffer */ -+ -+ pages++; -+ KMEM_FREE ( pr->pr_data, pr->pr_data_len); -+ } while (1); -+ -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+struct file_operations ep_proc_operations = -+{ -+ read: proc_read, -+ open: ep_proc_open, -+ release: proc_release, -+}; -+ -+static int -+proc_read_rcvr_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RCVR *rcvr = (EP_RCVR *)data; -+ -+ if (rcvr == NULL) -+ sprintf(page,"proc_read_rcvr_stats rcvr=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_rcvr_fillout_stats(rcvr,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_rcvr_rail_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RCVR_RAIL *rcvr_rail = (EP_RCVR_RAIL *)data; -+ -+ if (rcvr_rail == NULL) { -+ strcpy(page,"proc_read_rcvr_rail_stats rcvr_rail=NULL"); -+ } else { -+ page[0] = 0; -+ ep_rcvr_rail_fillout_stats(rcvr_rail, page); -+ EP_RCVR_OP(rcvr_rail,FillOutRailStats)(rcvr_rail,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+ep_procfs_rcvr_add(EP_RCVR *rcvr) -+{ -+ /* ep/rcvr/service_number/stats */ -+ /* ep/rcvr/service_number/debug_rcvr */ -+ /* ep/rcvr/service_number/debug_full */ -+ struct proc_dir_entry *p; -+ char str[32]; -+ -+ sprintf(str,"%d", rcvr->Service); -+ -+ rcvr->procfs_root = proc_mkdir (str, ep_procfs_rcvr_root); -+ -+ if ((p = create_proc_entry ("stats", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_rcvr_stats; -+ p->data = rcvr; -+ p->owner = THIS_MODULE; -+ } -+ -+ if ((p = create_proc_entry ("debug_rcvr", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rcvr; -+ } -+ -+ if ((p = create_proc_entry ("debug_full", 0, rcvr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rcvr; -+ } -+} -+ -+void -+ep_procfs_rcvr_del(EP_RCVR *rcvr) -+{ -+ char str[32]; -+ sprintf(str,"%d", rcvr->Service); -+ -+ remove_proc_entry ("debug_full", rcvr->procfs_root); -+ remove_proc_entry ("debug_rcvr", rcvr->procfs_root); -+ remove_proc_entry ("stats", rcvr->procfs_root); -+ -+ remove_proc_entry (str, ep_procfs_rcvr_root); -+} -+ -+void -+ep_procfs_rcvr_add_rail(EP_RCVR_RAIL *rcvrRail) -+{ -+ /* ep/rcvr/service_number/railN/stats */ -+ -+ struct proc_dir_entry *p; -+ char str[32]; -+ sprintf(str,"rail%d",rcvrRail->CommsRail->Rail->Number); -+ -+ rcvrRail->procfs_root = proc_mkdir (str, rcvrRail->Rcvr->procfs_root); -+ -+ if ((p = create_proc_entry ("stats", 0, rcvrRail->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_rcvr_rail_stats; -+ p->data = rcvrRail; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+ep_procfs_rcvr_del_rail(EP_RCVR_RAIL *rcvrRail) -+{ -+ char str[32]; -+ sprintf(str,"rail%d",rcvrRail->CommsRail->Rail->Number); -+ -+ remove_proc_entry ("stats", rcvrRail->procfs_root); -+ -+ remove_proc_entry (str, rcvrRail->Rcvr->procfs_root); -+} -+ -+ -+ -+ -+static int -+proc_read_xmtr_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_XMTR *xmtr = (EP_XMTR *)data; -+ -+ if (xmtr == NULL) -+ strcpy(page,"proc_read_xmtr_stats xmtr=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_xmtr_fillout_stats(xmtr, page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_xmtr_rail_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_XMTR_RAIL *xmtr_rail = (EP_XMTR_RAIL *)data; -+ -+ if (xmtr_rail == NULL) -+ strcpy(page,"proc_read_xmtr_rail_stats xmtr_rail=NULL\n"); -+ else { -+ page[0] = 0; -+ ep_xmtr_rail_fillout_stats(xmtr_rail, page); -+ EP_XMTR_OP(xmtr_rail,FillOutRailStats)(xmtr_rail,page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+void -+ep_procfs_xmtr_add(EP_XMTR *xmtr) -+{ -+ /* ep/xmtr/service_number/stats */ -+ /* ep/xmtr/service_number/debug_xmtr */ -+ struct proc_dir_entry *p; -+ char str[32]; -+ -+ sprintf(str,"%llx", (unsigned long long) (unsigned long)xmtr); -+ -+ xmtr->procfs_root = proc_mkdir (str, ep_procfs_xmtr_root); -+ -+ if ((p = create_proc_entry ("stats", 0, xmtr->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_xmtr_stats; -+ p->data = xmtr; -+ p->owner = THIS_MODULE; -+ } -+ -+ if ((p = create_proc_entry ("debug_xmtr", 0, xmtr->procfs_root)) != NULL) -+ { -+ p->proc_fops = &ep_proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = xmtr; -+ } -+} -+ -+void -+ep_procfs_xmtr_del(EP_XMTR *xmtr) -+{ -+ char str[32]; -+ sprintf(str,"%llx", (unsigned long long) (unsigned long)xmtr); -+ -+ remove_proc_entry ("stats", xmtr->procfs_root); -+ remove_proc_entry ("debug_xmtr", xmtr->procfs_root); -+ -+ remove_proc_entry (str, ep_procfs_xmtr_root); -+} -+ -+void -+ep_procfs_xmtr_add_rail(EP_XMTR_RAIL *xmtrRail) -+{ -+ /* ep/xmtr/service_number/railN/stats */ -+ -+ struct proc_dir_entry *p; -+ char str[32]; -+ sprintf(str,"rail%d",xmtrRail->CommsRail->Rail->Number); -+ -+ xmtrRail->procfs_root = proc_mkdir (str, xmtrRail->Xmtr->procfs_root); -+ -+ if ((p = create_proc_entry ("stats", 0, xmtrRail->procfs_root)) != NULL) -+ { -+ p->write_proc = NULL; -+ p->read_proc = proc_read_xmtr_rail_stats; -+ p->data = xmtrRail; -+ p->owner = THIS_MODULE; -+ } -+} -+ -+void -+ep_procfs_xmtr_del_rail(EP_XMTR_RAIL *xmtrRail) -+{ -+ char str[32]; -+ sprintf(str,"rail%d",xmtrRail->CommsRail->Rail->Number); -+ -+ remove_proc_entry ("stats", xmtrRail->procfs_root); -+ -+ remove_proc_entry (str, xmtrRail->Xmtr->procfs_root); -+} -+ -+void -+ep_procfs_rcvr_xmtr_init(void) -+{ -+ ep_procfs_rcvr_root = proc_mkdir ("rcvr", ep_procfs_root); -+ ep_procfs_xmtr_root = proc_mkdir ("xmtr", ep_procfs_root); -+} -+ -+void -+ep_procfs_rcvr_xmtr_fini(void) -+{ -+ remove_proc_entry ("rcvr", ep_procfs_root); -+ remove_proc_entry ("xmtr", ep_procfs_root); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kalloc.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kalloc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kalloc.c 2005-07-28 14:52:52.882673568 -0400 -@@ -0,0 +1,677 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kalloc.c,v 1.17.8.2 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kalloc.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+ -+static void -+HashInPool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ int idx0 = HASH (pool->Handle.nmh_nmd.nmd_addr); -+ int idx1 = HASH (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len); -+ -+ list_add (&pool->HashBase, &alloc->HashBase[idx0]); -+ list_add (&pool->HashTop, &alloc->HashTop[idx1]); -+} -+ -+static void -+HashOutPool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ list_del (&pool->HashBase); -+ list_del (&pool->HashTop); -+} -+ -+static EP_POOL * -+LookupPool (EP_ALLOC *alloc, EP_ADDR addr) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &alloc->HashBase[HASH(addr)]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ if (pool->Handle.nmh_nmd.nmd_addr <= addr && addr < (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len)) -+ return (pool); -+ } -+ -+ list_for_each (el, &alloc->HashTop[HASH(addr)]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashTop); -+ -+ if (pool->Handle.nmh_nmd.nmd_addr <= addr && addr < (pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len)) -+ return (pool); -+ } -+ -+ return (NULL); -+} -+ -+static EP_POOL * -+AllocatePool (EP_ALLOC *alloc, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr) -+{ -+ EP_ADDR base = 0; -+ EP_POOL *pool; -+ EP_RAIL *rail; -+ int i, railmask = 0; -+ struct list_head *el; -+ -+ KMEM_ZALLOC (pool, EP_POOL *, sizeof (EP_POOL), !(attr & EP_NO_SLEEP)); -+ -+ if (pool == NULL) -+ return (NULL); -+ -+ if (addr != 0) -+ base = addr; -+ else -+ { -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ { -+ KMEM_ZALLOC (pool->Bitmaps[i - LN2_MIN_SIZE], bitmap_t *, BT_BITOUL(1 << (LN2_MAX_SIZE-i)) * sizeof (bitmap_t), !(attr & EP_NO_SLEEP)); -+ if (pool->Bitmaps[i - LN2_MIN_SIZE] == NULL) -+ goto failed; -+ } -+ -+ if ((base = ep_rmalloc (alloc->ResourceMap, size, !(attr & EP_NO_SLEEP))) == 0) -+ goto failed; -+ } -+ -+ switch (alloc->Type) -+ { -+ case EP_ALLOC_TYPE_PRIVATE_SDRAM: -+ rail = alloc->Data.Private.Rail; -+ -+ if ((pool->Buffer.Sdram = rail->Operations.SdramAlloc (rail, base, size)) == 0) -+ goto failed; -+ -+ ep_perrail_sdram_map (rail, base, pool->Buffer.Sdram, size, perm, attr); -+ -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ break; -+ -+ case EP_ALLOC_TYPE_PRIVATE_MAIN: -+ KMEM_GETPAGES(pool->Buffer.Ptr, unsigned long, btop (size), !(attr & EP_NO_SLEEP)); -+ if (pool->Buffer.Ptr == 0) -+ goto failed; -+ -+ ep_perrail_kaddr_map (alloc->Data.Private.Rail, base, pool->Buffer.Ptr, size, perm, attr); -+ -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ break; -+ -+ case EP_ALLOC_TYPE_SHARED_MAIN: -+ KMEM_GETPAGES(pool->Buffer.Ptr, unsigned long, btop (size), !(attr & EP_NO_SLEEP)); -+ if (pool->Buffer.Ptr == 0) -+ goto failed; -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ ep_perrail_kaddr_map (rail, base, pool->Buffer.Ptr, size, perm, attr); -+ -+ railmask |= (1 << rail->Number); -+ } -+ pool->Handle.nmh_nmd.nmd_addr = base; -+ pool->Handle.nmh_nmd.nmd_len = size; -+ pool->Handle.nmh_nmd.nmd_attr = EP_NMD_ATTR (alloc->Data.Shared.System->Position.pos_nodeid, railmask); -+ -+ ep_nmh_insert (&alloc->Data.Shared.System->MappingTable, &pool->Handle); -+ break; -+ -+ default: -+ goto failed; -+ } -+ -+ return (pool); -+ -+ failed: -+ if (addr == 0 && base) -+ ep_rmfree (alloc->ResourceMap, size, base); -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ if (pool->Bitmaps[i - LN2_MIN_SIZE] != NULL) -+ KMEM_FREE (pool->Bitmaps[i - LN2_MIN_SIZE], BT_BITOUL(1 << (LN2_MAX_SIZE - i)) * sizeof (bitmap_t)); -+ -+ KMEM_FREE (pool, sizeof (EP_POOL)); -+ return (NULL); -+} -+ -+static void -+FreePool (EP_ALLOC *alloc, EP_POOL *pool) -+{ -+ struct list_head *el; -+ int i; -+ -+ switch (alloc->Type) -+ { -+ case EP_ALLOC_TYPE_PRIVATE_SDRAM: -+ ep_perrail_unmap (alloc->Data.Private.Rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ alloc->Data.Private.Rail->Operations.SdramFree (alloc->Data.Private.Rail, pool->Buffer.Sdram, pool->Handle.nmh_nmd.nmd_len); -+ break; -+ -+ case EP_ALLOC_TYPE_PRIVATE_MAIN: -+ ep_perrail_unmap (alloc->Data.Private.Rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ KMEM_FREEPAGES (pool->Buffer.Ptr, btop (pool->Handle.nmh_nmd.nmd_len)); -+ break; -+ -+ case EP_ALLOC_TYPE_SHARED_MAIN: -+ ep_nmh_remove (&alloc->Data.Shared.System->MappingTable, &pool->Handle); -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ ep_perrail_unmap (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ } -+ -+ KMEM_FREEPAGES (pool->Buffer.Ptr, btop (pool->Handle.nmh_nmd.nmd_len)); -+ break; -+ } -+ -+ if (pool->Bitmaps[0]) -+ { -+ ep_rmfree (alloc->ResourceMap, pool->Handle.nmh_nmd.nmd_len, pool->Handle.nmh_nmd.nmd_addr); -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i ++) -+ KMEM_FREE (pool->Bitmaps[i - LN2_MIN_SIZE], BT_BITOUL(1 << (LN2_MAX_SIZE - i)) * sizeof (bitmap_t)); -+ } -+ -+ KMEM_FREE (pool, sizeof (EP_POOL)); -+} -+ -+static int -+AddRail (EP_ALLOC *alloc, EP_RAIL *rail) -+{ -+ struct list_head *el; -+ EP_RAIL_ENTRY *l; -+ unsigned long flags; -+ int i; -+ -+ ASSERT (alloc->Type == EP_ALLOC_TYPE_SHARED_MAIN); -+ -+ KMEM_ZALLOC (l, EP_RAIL_ENTRY *, sizeof (EP_RAIL_ENTRY), 1); -+ -+ if (l == NULL) -+ return (ENOMEM); -+ -+ l->Rail = rail; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_perrail_kaddr_map (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Buffer.Ptr, -+ pool->Handle.nmh_nmd.nmd_len, EP_PERM_WRITE, EP_NO_SLEEP); -+ -+ pool->Handle.nmh_nmd.nmd_attr |= EP_NMD_ATTR (0, 1 << rail->Number); -+ } -+ } -+ -+ list_add (&l->Link, &alloc->Data.Shared.Rails); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ return (0); -+} -+ -+static void -+RemoveRail (EP_ALLOC *alloc, EP_RAIL *rail) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_perrail_unmap (rail, pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ pool->Handle.nmh_nmd.nmd_attr &= ~EP_NMD_ATTR (0, 1 << rail->Number); -+ } -+ } -+ -+ list_for_each (el, &alloc->Data.Shared.Rails) { -+ EP_RAIL_ENTRY *tmp = list_entry (el, EP_RAIL_ENTRY, Link); -+ if (tmp->Rail == rail) -+ { -+ list_del (el); -+ KMEM_FREE(tmp, sizeof (EP_RAIL_ENTRY)); -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+} -+ -+static EP_POOL * -+AllocateBlock (EP_ALLOC *alloc, unsigned size, EP_ATTRIBUTE attr, int *offset) -+{ -+ int block, j, k; -+ unsigned long flags; -+ EP_POOL *pool; -+ -+ -+ if (size > MAX_SIZE) -+ { -+ if ((attr & EP_NO_ALLOC) || (pool = AllocatePool (alloc, 0, size, alloc->Perm, attr)) == NULL) -+ return (NULL); -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ HashInPool (alloc, pool); -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ *offset = 0; -+ -+ return pool; -+ } -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ /* Round up size to next power of 2 */ -+ for (k = LN2_MIN_SIZE; (1 << k) < size; k++) -+ ; -+ -+ /* k now has ln2 of the size to allocate. */ -+ /* find the free list with the smallest block we can use*/ -+ for (j = k; j <= LN2_MAX_SIZE && list_empty (&alloc->Freelists[j - LN2_MIN_SIZE]); j++) -+ ; -+ -+ /* j has ln2 of the smallest size block we can use */ -+ if (j < LN2_MAX_SIZE) -+ { -+ int nbits = 1 << (LN2_MAX_SIZE-j); -+ -+ pool = list_entry (alloc->Freelists[j - LN2_MIN_SIZE].next, EP_POOL, Link[j - LN2_MIN_SIZE]); -+ block = (bt_lowbit (pool->Bitmaps[j - LN2_MIN_SIZE], nbits) << j); -+ -+ BT_CLEAR (pool->Bitmaps[j - LN2_MIN_SIZE], block >> j); -+ -+ if (bt_lowbit (pool->Bitmaps[j - LN2_MIN_SIZE], nbits) == -1) -+ list_del (&pool->Link[j - LN2_MIN_SIZE]); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ if ((attr & EP_NO_ALLOC) || (pool = AllocatePool (alloc, 0, MAX_SIZE, alloc->Perm, attr)) == NULL) -+ return (NULL); -+ -+ block = 0; -+ j = LN2_MAX_SIZE; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ HashInPool (alloc, pool); -+ } -+ -+ /* Split it until the buddies are the correct size, putting one -+ * buddy back on the free list and continuing to split the other */ -+ while (--j >= k) -+ { -+ list_add (&pool->Link[j - LN2_MIN_SIZE], &alloc->Freelists[j - LN2_MIN_SIZE]); -+ -+ BT_SET (pool->Bitmaps[j - LN2_MIN_SIZE], block >> j); -+ -+ block += (1 << j); -+ } -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ *offset = block; -+ -+ return (pool); -+} -+ -+static void -+FreeBlock (EP_ALLOC *alloc, EP_ADDR addr, unsigned size) -+{ -+ EP_POOL *pool; -+ int k, block = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ /* Round up size to next power of 2 */ -+ for (k = LN2_MIN_SIZE; (1 << k) < size; k++) -+ ; -+ -+ /* Find the pool containing this block */ -+ pool = LookupPool (alloc, addr); -+ -+ /* It must exist */ -+ ASSERT (pool != NULL); -+ -+ /* If we're freeing a subset of it, then update the bitmaps */ -+ if (size <= MAX_SIZE) -+ { -+ ASSERT (BT_TEST (pool->Bitmaps[k - LN2_MIN_SIZE], (addr - pool->Handle.nmh_nmd.nmd_addr) >> k) == 0); -+ -+ block = addr - pool->Handle.nmh_nmd.nmd_addr; -+ -+ while (k < LN2_MAX_SIZE && BT_TEST (pool->Bitmaps[k - LN2_MIN_SIZE], (block >> k) ^ 1)) -+ { -+ BT_CLEAR (pool->Bitmaps[k - LN2_MIN_SIZE], (block >> k) ^ 1); -+ -+ if (bt_lowbit (pool->Bitmaps[k - LN2_MIN_SIZE], (1 << (LN2_MAX_SIZE - k))) == -1) -+ list_del (&pool->Link[k - LN2_MIN_SIZE]); -+ -+ k++; -+ } -+ } -+ -+ if (k >= LN2_MAX_SIZE) -+ { -+ HashOutPool (alloc, pool); -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ -+ FreePool (alloc, pool); -+ } -+ else -+ { -+ if (bt_lowbit (pool->Bitmaps[k - LN2_MIN_SIZE], (1 << (LN2_MAX_SIZE - k))) == -1) -+ list_add (&pool->Link[k - LN2_MIN_SIZE], &alloc->Freelists[k - LN2_MIN_SIZE]); -+ -+ BT_SET (pool->Bitmaps[k - LN2_MIN_SIZE], block >> k); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+ } -+} -+ -+static void -+InitialiseAllocator (EP_ALLOC *alloc, EP_ALLOC_TYPE type, unsigned int perm, EP_RMAP *rmap) -+{ -+ int i; -+ -+ spin_lock_init (&alloc->Lock); -+ -+ alloc->Type = type; -+ alloc->ResourceMap = rmap; -+ alloc->Perm = perm; -+ -+ for (i = 0; i < NHASH; i++) -+ { -+ (&alloc->HashBase[i])->next = &alloc->HashBase[i]; -+ -+ INIT_LIST_HEAD (&alloc->HashBase[i]); -+ INIT_LIST_HEAD (&alloc->HashTop[i]); -+ } -+ -+ for (i = 0; i < NUM_FREELISTS; i++) -+ INIT_LIST_HEAD (&alloc->Freelists[i]); -+} -+ -+static void -+DestroyAllocator (EP_ALLOC *alloc) -+{ -+ struct list_head *el, *next; -+ int i; -+ -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each_safe (el, next, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ printk ("!!DestroyAllocator: pool=%p type=%d addr=%x len=%x\n", pool, alloc->Type, -+ pool->Handle.nmh_nmd.nmd_addr, pool->Handle.nmh_nmd.nmd_len); -+ -+ list_del (&pool->HashBase); -+ list_del (&pool->HashTop); -+ -+ // XXXX: FreePool (alloc, pool); -+ } -+ } -+ -+ spin_lock_destroy (&alloc->Lock); -+} -+ -+void -+ep_display_alloc (EP_ALLOC *alloc) -+{ -+ struct list_head *el; -+ int i; -+ int npools = 0; -+ int nbytes = 0; -+ int nfree = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&alloc->Lock, flags); -+ -+ ep_debugf (DBG_DEBUG, "Kernel comms memory allocator %p type %d\n", alloc, alloc->Type); -+ for (i = 0; i < NHASH; i++) -+ { -+ list_for_each (el, &alloc->HashBase[i]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, HashBase); -+ -+ ep_debugf (DBG_DEBUG, " POOL %4x: %p -> %x.%x\n", i, pool, pool->Handle.nmh_nmd.nmd_addr, -+ pool->Handle.nmh_nmd.nmd_addr + pool->Handle.nmh_nmd.nmd_len); -+ -+ npools++; -+ nbytes += pool->Handle.nmh_nmd.nmd_len; -+ } -+ } -+ -+ for (i = LN2_MIN_SIZE; i <= LN2_MAX_SIZE; i++) -+ { -+ int n = 0; -+ -+ list_for_each (el, &alloc->Freelists[i - LN2_MIN_SIZE]) { -+ EP_POOL *pool = list_entry (el, EP_POOL, Link[i - LN2_MIN_SIZE]); -+ int nbits = bt_nbits (pool->Bitmaps[i - LN2_MIN_SIZE], 1 << (LN2_MAX_SIZE - i)); -+ -+ n += nbits; -+ nfree += (nbits << i); -+ } -+ -+ if (n != 0) -+ ep_debugf (DBG_DEBUG, " SIZE %5d : num %d\n", (1 << i), n); -+ } -+ ep_debugf (DBG_DEBUG, "%d pools with %d bytes and %d bytes free\n", npools, nbytes, nfree); -+ -+ spin_unlock_irqrestore (&alloc->Lock, flags); -+} -+ -+/* per-rail allocators */ -+void -+ep_alloc_init (EP_RAIL *rail) -+{ -+ EP_RMAP *rmap = ep_rmallocmap (EP_PRIVATE_RMAP_SIZE, "PrivateMap", 1); -+ -+ ep_rmfree (rmap, EP_PRIVATE_TOP-EP_PRIVATE_BASE, EP_PRIVATE_BASE); -+ -+ InitialiseAllocator (&rail->ElanAllocator, EP_ALLOC_TYPE_PRIVATE_SDRAM, EP_PERM_ALL, rmap); -+ InitialiseAllocator (&rail->MainAllocator, EP_ALLOC_TYPE_PRIVATE_MAIN, EP_PERM_WRITE, rmap); -+ -+ rail->ElanAllocator.Data.Private.Rail = rail; -+ rail->MainAllocator.Data.Private.Rail = rail; -+} -+ -+void -+ep_alloc_fini (EP_RAIL *rail) -+{ -+ EP_RMAP *rmap = rail->ElanAllocator.ResourceMap; -+ -+ DestroyAllocator (&rail->ElanAllocator); -+ DestroyAllocator (&rail->MainAllocator); -+ -+ ep_rmfreemap (rmap); -+} -+ -+sdramaddr_t -+ep_alloc_memory_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr) -+{ -+ EP_POOL *pool = AllocatePool (&rail->ElanAllocator, addr, size, perm, attr); -+ unsigned long flags; -+ -+ if (pool == NULL) -+ return (0); -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ HashInPool (&rail->ElanAllocator, pool); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ return (pool->Buffer.Sdram); -+} -+ -+void -+ep_free_memory_elan (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ pool = LookupPool (&rail->ElanAllocator, addr); -+ -+ HashOutPool (&rail->ElanAllocator, pool); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ FreePool (&rail->ElanAllocator, pool); -+} -+ -+sdramaddr_t -+ep_alloc_elan (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&rail->ElanAllocator, size, attr, &offset)) == NULL) -+ return (0); -+ -+ *addrp = pool->Handle.nmh_nmd.nmd_addr + offset; -+ -+ return (pool->Buffer.Sdram + offset); -+} -+ -+void -+ep_free_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size) -+{ -+ FreeBlock (&rail->ElanAllocator, addr, size); -+} -+ -+void * -+ep_alloc_main (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&rail->MainAllocator, size, attr, &offset)) == NULL) -+ return (NULL); -+ -+ *addrp = pool->Handle.nmh_nmd.nmd_addr + offset; -+ -+ return ((void *) ((unsigned long) pool->Buffer.Ptr + offset)); -+} -+ -+void -+ep_free_main (EP_RAIL *rail, EP_ADDR addr, unsigned size) -+{ -+ FreeBlock (&rail->MainAllocator, addr, size); -+} -+ -+sdramaddr_t -+ep_elan2sdram (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ sdramaddr_t res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ElanAllocator.Lock, flags); -+ if ((pool = LookupPool (&rail->ElanAllocator, addr)) == NULL) -+ res = 0; -+ else -+ res = pool->Buffer.Sdram + (addr - pool->Handle.nmh_nmd.nmd_addr); -+ spin_unlock_irqrestore (&rail->ElanAllocator.Lock, flags); -+ -+ return (res); -+} -+ -+void * -+ep_elan2main (EP_RAIL *rail, EP_ADDR addr) -+{ -+ EP_POOL *pool; -+ void *res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->MainAllocator.Lock, flags); -+ if ((pool = LookupPool (&rail->MainAllocator, addr)) == NULL) -+ res = NULL; -+ else -+ res = (void *) ((unsigned long) pool->Buffer.Ptr + (addr - pool->Handle.nmh_nmd.nmd_addr)); -+ spin_unlock_irqrestore (&rail->MainAllocator.Lock, flags); -+ -+ return (res); -+} -+ -+/* shared allocators */ -+int -+ep_shared_alloc_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ return (AddRail (&sys->Allocator, rail)); -+} -+ -+void -+ep_shared_alloc_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ RemoveRail (&sys->Allocator, rail); -+} -+ -+void -+ep_shared_alloc_init (EP_SYS *sys) -+{ -+ EP_RMAP *rmap = ep_rmallocmap (EP_SHARED_RMAP_SIZE, "shared_alloc_map", 1); -+ -+ ep_rmfree (rmap, EP_SHARED_TOP - EP_SHARED_BASE, EP_SHARED_BASE); -+ -+ InitialiseAllocator (&sys->Allocator, EP_ALLOC_TYPE_SHARED_MAIN, EP_PERM_WRITE, rmap); -+ -+ INIT_LIST_HEAD (&sys->Allocator.Data.Shared.Rails); -+ -+ sys->Allocator.Data.Shared.System = sys; -+} -+ -+void -+ep_shared_alloc_fini (EP_SYS *sys) -+{ -+ EP_RMAP *rmap = sys->Allocator.ResourceMap; -+ -+ DestroyAllocator (&sys->Allocator); -+ -+ ep_rmfreemap (rmap); -+} -+ -+void * -+ep_shared_alloc_main (EP_SYS *sys, unsigned size, EP_ATTRIBUTE attr, EP_NMD *nmd) -+{ -+ int offset; -+ EP_POOL *pool; -+ -+ if ((pool = AllocateBlock (&sys->Allocator, size, attr, &offset)) == NULL) -+ return (NULL); -+ -+ ep_nmd_subset (nmd, &pool->Handle.nmh_nmd, offset, size); -+ -+ return ((void *) ((unsigned long) pool->Buffer.Ptr + offset)); -+} -+ -+void -+ep_shared_free_main (EP_SYS *sys, EP_NMD *nmd) -+{ -+ FreeBlock (&sys->Allocator, nmd->nmd_addr, nmd->nmd_len); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kcomm.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm.c 2005-07-28 14:52:52.885673112 -0400 -@@ -0,0 +1,1448 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm.c,v 1.50.2.9 2004/12/09 10:02:42 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+ -+int MaxSwitchLevels = 5; /* Max 1024 sized machine */ -+ -+static char *NodeStateNames[EP_NODE_NUM_STATES] = -+{ -+ "Disconnected", -+ "Connecting", -+ "Connnected", -+ "LeavingConnected", -+ "LocalPassivate", -+ "RemotePassivate", -+ "Passivated", -+ "Disconnecting", -+}; -+ -+static void -+ep_xid_cache_fill (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->XidLock, flags); -+ -+ cache->Current = sys->XidNext; -+ cache->Last = cache->Current + EP_XID_CACHE_CHUNKS-1; -+ -+ sys->XidNext += EP_XID_CACHE_CHUNKS; -+ -+ spin_unlock_irqrestore (&sys->XidLock, flags); -+} -+ -+EP_XID -+ep_xid_cache_alloc (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ EP_XID xid; -+ -+ if (cache->Current == cache->Last) -+ ep_xid_cache_fill (sys, cache); -+ -+ xid.Generation = sys->XidGeneration; -+ xid.Handle = cache->Handle; -+ xid.Unique = cache->Current++; -+ -+ return (xid); -+} -+ -+void -+ep_xid_cache_init (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ /* Stall manager thread - it doesn't lock the XidCacheList */ -+ ep_kthread_stall (&sys->ManagerThread); -+ -+ cache->Handle = ++sys->XidHandle; -+ -+ list_add_tail (&cache->Link, &sys->XidCacheList); -+ -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+void -+ep_xid_cache_destroy (EP_SYS *sys, EP_XID_CACHE *cache) -+{ -+ /* Stall manager thread - it doesn't lock the XidCacheList */ -+ ep_kthread_stall (&sys->ManagerThread); -+ -+ list_del (&cache->Link); -+ -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+EP_XID_CACHE * -+ep_xid_cache_find (EP_SYS *sys, EP_XID xid) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &sys->XidCacheList) { -+ EP_XID_CACHE *cache = list_entry (el, EP_XID_CACHE, Link); -+ -+ if (sys->XidGeneration == xid.Generation && cache->Handle == xid.Handle) -+ return (cache); -+ } -+ -+ return (NULL); -+} -+ -+static int -+MsgBusy (EP_RAIL *rail, EP_OUTPUTQ *outputq, int slotNum) -+{ -+ switch (rail->Operations.OutputQState (rail, outputq, slotNum)) -+ { -+ case EP_OUTPUTQ_BUSY: /* still busy */ -+ return 1; -+ -+ case EP_OUTPUTQ_FAILED: /* NACKed */ -+ { -+#if defined(DEBUG_PRINTF) -+ EP_MANAGER_MSG *msg = rail->Operations.OutputQMsg (rail, outputq, slotNum); -+ -+ EPRINTF4 (DBG_MANAGER, "%s: kcomm msg %d type %d to %d failed\n", rail->Name, slotNum, msg->Hdr.Type, msg->Hdr.DestId); -+#endif -+ break; -+ } -+ -+ case EP_OUTPUTQ_FINISHED: /* anything else is finished */ -+ break; -+ } -+ -+ return 0; -+} -+ -+int -+ep_send_message (EP_RAIL *rail, int nodeId, int type, EP_XID xid, EP_MANAGER_MSG_BODY *body) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ int n = EP_MANAGER_OUTPUTQ_SLOTS; -+ int slotNum; -+ int rnum; -+ EP_RAIL *msgRail; -+ EP_MANAGER_MSG *msg; -+ unsigned long flags; -+ -+ ASSERT (! EP_XID_INVALID (xid)); -+ -+ if ((rnum = ep_pickRail (node->ConnectedRails)) >= 0) -+ msgRail = sys->Rails[rnum]; -+ else -+ { -+ if (EP_MANAGER_MSG_TYPE_CONNECTED(type)) -+ { -+ ep_debugf (DBG_MANAGER, "%s: no rails available, trying to send type %d to %d\n", rail->Name, type, nodeId); -+ return -EHOSTDOWN; -+ } -+ -+ ep_debugf (DBG_MANAGER, "%s: no rails connected to %d - using receiving rail\n", rail->Name, nodeId); -+ -+ msgRail = rail; -+ } -+ -+ -+ spin_lock_irqsave (&msgRail->ManagerOutputQLock, flags); -+ -+ slotNum = msgRail->ManagerOutputQNextSlot; -+ -+ while (n-- > 0 && MsgBusy (msgRail, msgRail->ManagerOutputQ, slotNum)) /* search for idle message buffer */ -+ { -+ if (++(msgRail->ManagerOutputQNextSlot) == EP_MANAGER_OUTPUTQ_SLOTS) -+ msgRail->ManagerOutputQNextSlot = 0; -+ -+ slotNum = msgRail->ManagerOutputQNextSlot; -+ } -+ -+ if (n == 0) /* all message buffers busy */ -+ { -+ spin_unlock_irqrestore (&msgRail->ManagerOutputQLock, flags); -+ -+ ep_debugf (DBG_MANAGER, "%s: all message buffers busy: trying to send type %d to %d\n", msgRail->Name, type, nodeId); -+ return -EBUSY; -+ } -+ -+ msg = msgRail->Operations.OutputQMsg (msgRail, msgRail->ManagerOutputQ, slotNum); -+ -+ EPRINTF7 (DBG_MANAGER, "%s: ep_send_message: type=%d nodeId=%d rail=%d xid=%08x.%08x.%016llx\n", -+ msgRail->Name, type, nodeId, rail->Number, xid.Generation, xid.Handle, (long long) xid.Unique); -+ -+ msg->Hdr.Version = EP_MANAGER_MSG_VERSION; -+ msg->Hdr.Type = type; -+ msg->Hdr.Rail = rail->Number; -+ msg->Hdr.NodeId = msgRail->Position.pos_nodeid; -+ msg->Hdr.DestId = nodeId; -+ msg->Hdr.Xid = xid; -+ msg->Hdr.Checksum = 0; -+ -+ if (body) bcopy (body, &msg->Body, sizeof (EP_MANAGER_MSG_BODY)); -+ -+ msg->Hdr.Checksum = CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE); -+ -+ if (rail->Operations.OutputQSend (msgRail, msgRail->ManagerOutputQ, slotNum, EP_MANAGER_MSG_SIZE, -+ nodeId, EP_SYSTEMQ_MANAGER, EP_MANAGER_OUTPUTQ_RETRIES) < 0) -+ IncrStat (msgRail, SendMessageFailed); -+ -+ if (++(msgRail->ManagerOutputQNextSlot) == EP_MANAGER_OUTPUTQ_SLOTS) /* check this one last next time */ -+ msgRail->ManagerOutputQNextSlot = 0; -+ -+ spin_unlock_irqrestore (&msgRail->ManagerOutputQLock, flags); -+ -+ return 0; -+} -+ -+void -+ep_panic_node (EP_SYS *sys, int nodeId, unsigned char *reason) -+{ -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ EP_MANAGER_MSG_BODY body; -+ EP_XID xid; -+ kcondvar_t sleep; -+ int rnum; -+ unsigned long flags; -+ -+ if (nodeId > sys->Position.pos_nodes) -+ return; -+ -+ strncpy (body.PanicReason, reason, sizeof (body.PanicReason)); -+ -+ kcondvar_init (&sleep); -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ for (;;) -+ { -+ if (node->ConnectedRails == 0) -+ break; -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (node->ConnectedRails & (1 << rnum)) -+ break; -+ -+ xid = ep_xid_cache_alloc(sys, &sys->Rails[rnum]->XidCache); -+ -+ if (ep_send_message (sys->Rails[rnum], nodeId, EP_MANAGER_MSG_TYPE_REMOTE_PANIC, xid, &body) == 0) -+ break; -+ -+ if (kcondvar_timedwaitsig (&sleep, &sys->NodeLock, &flags, lbolt + hz) == CV_RET_SIGPENDING) -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ kcondvar_destroy (&sleep); -+} -+ -+static void -+ProcessNeterrRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: process neterr request - node %d cookies %llx %llx\n", rail->Name, msg->Hdr.NodeId, msg->Body.Cookies[0], msg->Body.Cookies[1]); -+ -+ rail->Operations.NeterrFixup (rail, msg->Hdr.NodeId, msg->Body.Cookies); -+ -+ ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_NETERR_RESPONSE, msg->Hdr.Xid, &msg->Body); -+} -+ -+ -+static void -+ProcessNeterrResponse (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ unsigned long flags; -+ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: process neterr response - node %d cookies %llx %llx\n", rail->Name, msg->Hdr.NodeId, msg->Body.Cookies[0], msg->Body.Cookies[1]); -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (EP_XIDS_MATCH (nodeRail->MsgXid, msg->Hdr.Xid)) -+ { -+ EP_INVALIDATE_XID (nodeRail->MsgXid); -+ -+ if (nodeRail->NetworkErrorCookies[0] != 0 && nodeRail->NetworkErrorCookies[0] == msg->Body.Cookies[0]) -+ nodeRail->NetworkErrorCookies[0] = 0; -+ -+ if (nodeRail->NetworkErrorCookies[1] != 0 && nodeRail->NetworkErrorCookies[1] == msg->Body.Cookies[1]) -+ nodeRail->NetworkErrorCookies[1] = 0; -+ -+ if (nodeRail->NetworkErrorCookies[0] == 0 && nodeRail->NetworkErrorCookies[1] == 0) -+ nodeRail->NetworkErrorState &= ~EP_NODE_NETERR_ATOMIC_PACKET; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+ -+static void -+ProcessGetNodeState (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ unsigned int service = msg->Body.Service; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessGetNodeState: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState ? " (NetworkError)" : ""); -+ -+ msg->Body.NodeState.State = nodeRail->State; -+ msg->Body.NodeState.NetworkErrorState = nodeRail->NetworkErrorState; -+ msg->Body.NodeState.Railmask = ep_rcvr_railmask (rail->System, service); -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE, msg->Hdr.Xid, &msg->Body) < 0) -+ printk ("%s: get node state for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+} -+ -+static void -+ProcessFlushRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[msg->Hdr.NodeId]; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessFlushRequest: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState ? " (NetworkError)" : ""); -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_REMOTE_PASSIVATE: -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; /* retransmit our flush request quickly */ -+ EPRINTF3 (DBG_MANAGER, "%s: ProcessFlushRequest: NextRunTime -> %lx (%lx)\n", rail->Name, nodeRail->NextRunTime, lbolt); -+ /* DROPTHROUGH */ -+ -+ case EP_NODE_PASSIVATED: -+ case EP_NODE_DISCONNECTED: -+ if (nodeRail->NetworkErrorState != 0) -+ break; -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE, msg->Hdr.Xid, NULL) < 0) -+ printk ("%s: flush request for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+ break; -+ -+ default: -+ EPRINTF4 (DBG_MANAGER, "%s: flush request for %s[%d] - node not in approriate state - %s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, NodeStateNames[nodeRail->State]); -+ break; -+ } -+} -+ -+static void -+ProcessFlushResponse (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_NODE_RAIL *nodeRail= &rail->Nodes[msg->Hdr.NodeId]; -+ -+ EPRINTF5 (DBG_MANAGER, "%s: ProcessFlushResponse: %s - %d %s%s\n", msgRail->Name, rail->Name, msg->Hdr.NodeId, -+ NodeStateNames[nodeRail->State], EP_XIDS_MATCH (nodeRail->MsgXid, msg->Hdr.Xid) ? " (XIDS match)" : ""); -+ -+ if (nodeRail->State == EP_NODE_REMOTE_PASSIVATE && EP_XIDS_MATCH(nodeRail->MsgXid, msg->Hdr.Xid)) -+ { -+ EP_INVALIDATE_XID (nodeRail->MsgXid); -+ -+ printk ("%s: flush response from %d - move to passivated list\n", rail->Name, msg->Hdr.NodeId); -+ list_del (&nodeRail->Link); -+ -+ /* Node is now passivated - attempt to failover messages */ -+ list_add_tail (&nodeRail->Link, &rail->PassivatedList); -+ nodeRail->State = EP_NODE_PASSIVATED; -+ } -+ else -+ { -+ printk ("%s: flush response from %d - not passivating (%s) or XIDs mismatch (%llx %llx)\n", rail->Name, -+ msg->Hdr.NodeId, NodeStateNames[nodeRail->State], (long long) nodeRail->MsgXid.Unique, (long long) msg->Hdr.Xid.Unique); -+ } -+} -+ -+static void -+ProcessMapNmdRequest (EP_RAIL *msgRail, EP_RAIL *rail, EP_MANAGER_MSG *msg) -+{ -+ EP_SYS *sys = rail->System; -+ EP_MAP_NMD_BODY *msgBody = &msg->Body.MapNmd; -+ int i; -+ -+ EPRINTF4 (DBG_MANAGER, "%s: Map NMD request from %d for %d NMDs to railmask %x\n", rail->Name, msg->Hdr.NodeId, msgBody->nFrags, msgBody->Railmask); -+ -+ for (i = 0; i < msgBody->nFrags; i++) -+ ep_nmd_map_rails (sys, &msgBody->Nmd[i], msgBody->Railmask); -+ -+ /* Must flush TLBs before responding */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (sys->Rails[i] && sys->Rails[i]->TlbFlushRequired) -+ ep_perrail_dvma_sync (sys->Rails[i]); -+ -+ if (ep_send_message (rail, msg->Hdr.NodeId, EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE, msg->Hdr.Xid, &msg->Body) < 0) -+ printk ("%s: map nmd request for %s[%d] - failed to send response\n", msgRail->Name, rail->Name, msg->Hdr.NodeId); -+} -+ -+static void -+ProcessXidMessage (EP_RAIL *msgRail, EP_MANAGER_MSG *msg, EP_XID xid) -+{ -+ EP_XID_CACHE *xidCache = ep_xid_cache_find (msgRail->System, xid); -+ -+ EPRINTF6 (DBG_MANAGER, "%s: ProcessXidMessage: XID=%08x.%0x8.%016llx -> %p(%p)\n", -+ msgRail->Name, xid.Generation, xid.Handle, (long long) xid.Unique, -+ xidCache ? xidCache->MessageHandler : 0, xidCache ? xidCache->Arg : 0); -+ -+ if (xidCache != NULL) -+ xidCache->MessageHandler (xidCache->Arg, msg); -+} -+ -+static void -+ProcessMessage (EP_RAIL *msgRail, void *arg, void *msgbuf) -+{ -+ EP_SYS *sys = msgRail->System; -+ EP_MANAGER_MSG *msg = (EP_MANAGER_MSG *) msgbuf; -+ uint16_t csum = msg->Hdr.Checksum; -+ EP_RAIL *rail; -+ -+ if (msg->Hdr.Version != EP_MANAGER_MSG_VERSION) -+ return; -+ -+ msg->Hdr.Checksum= 0; -+ if (CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE) != csum) -+ { -+ printk ("%s: checksum failed on msg from %d (%d) (%x != %x) ?\n", msgRail->Name, msg->Hdr.NodeId, msg->Hdr.Type, csum, CheckSum ((char *) msg, EP_MANAGER_MSG_SIZE)); -+ return; -+ } -+ -+ if ((rail = sys->Rails[msg->Hdr.Rail]) == NULL) -+ { -+ printk ("%s: rail no longer exists for msg from %d?\n", msgRail->Name, msg->Hdr.NodeId); -+ return; -+ } -+ -+ EPRINTF7 (DBG_MANAGER, "%s: ProcessMessage (%s) type=%d node=%d XID=%08x.%0x8.%016llx\n", -+ msgRail->Name, rail->Name, msg->Hdr.Type, msg->Hdr.NodeId, -+ msg->Hdr.Xid.Generation, msg->Hdr.Xid.Handle, msg->Hdr.Xid.Unique); -+ -+ switch (msg->Hdr.Type) -+ { -+ case EP_MANAGER_MSG_TYPE_REMOTE_PANIC: -+ msg->Body.PanicReason[EP_PANIC_STRLEN] = '\0'; /* ensure string terminated */ -+ -+ printk ("%s: remote panic call from elan node %d - %s\n", msgRail->Name, msg->Hdr.NodeId, msg->Body.PanicReason); -+ panic ("ep: remote panic request\n"); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_NETERR_REQUEST: -+ ProcessNeterrRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_NETERR_RESPONSE: -+ ProcessNeterrResponse (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FLUSH_REQUEST: -+ ProcessFlushRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE: -+ ProcessFlushResponse (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST: -+ ProcessMapNmdRequest (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST: -+ ProcessXidMessage (msgRail, msg, msg->Body.Failover.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE: -+ ProcessGetNodeState (msgRail, rail, msg); -+ break; -+ -+ case EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE: -+ ProcessXidMessage (msgRail, msg, msg->Hdr.Xid); -+ break; -+ -+ default: -+ printk ("%s: Unknown message type %d from %d\n", msgRail->Name, msg->Hdr.Type, msg->Hdr.NodeId); -+ break; -+ } -+} -+ -+ -+static void -+ManagerQueueEvent (EP_RAIL *rail, void *arg) -+{ -+ ep_kthread_schedule ((EP_KTHREAD *) arg, lbolt); -+} -+ -+void -+UpdateConnectionState (EP_RAIL *rail, statemap_t *map) -+{ -+ EP_SYS *sys = rail->System; -+ bitmap_t seg; -+ int offset, nodeId; -+ unsigned long flags; -+ -+ while ((offset = statemap_findchange (map, &seg, 1)) >= 0) -+ { -+ for (nodeId = offset; nodeId < (offset + BT_NBIPUL) && nodeId < rail->Position.pos_nodes; nodeId++) -+ { -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[nodeId]; -+ -+ if (statemap_getbits (map, nodeId, 1)) -+ { -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_DISCONNECTED: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Disconnected \n", rail->Name, nodeId); -+ break; -+ -+ case EP_NODE_CONNECTING: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Connect\n", rail->Name, nodeId); -+ -+ /* load the route table entry *before* setting the state -+ * to connected, since DMA's can be initiated as soon as -+ * the node is marked as connected */ -+ rail->Operations.LoadNodeRoute (rail, nodeId); -+ -+ nodeRail->State = EP_NODE_CONNECTED; -+ -+ statemap_setbits (rail->NodeSet, nodeId, 1, 1); -+ if (statemap_getbits (sys->NodeSet, nodeId, 1) == 0) -+ statemap_setbits (sys->NodeSet, nodeId, 1, 1); -+ -+ /* Add to rails connected to this node */ -+ node->ConnectedRails |= (1 << rail->Number); -+ -+ /* Finally lower the per-node context filter */ -+ rail->Operations.LowerFilter (rail, nodeId); -+ break; -+ -+ case EP_NODE_LEAVING_CONNECTED: -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Local Passivate\n", rail->Name, nodeId); -+ -+ /* Raise the per-node context filter */ -+ rail->Operations.RaiseFilter (rail, nodeId); -+ -+ /* If it's resolving network errors it will be on the NodeNeterrList, -+ * remove if from this list before placing it on the LocalPassivateList -+ * as we'll resolve the network error later in RemotePassivate */ -+ if (nodeRail->NetworkErrorState) -+ list_del (&nodeRail->Link); -+ -+ list_add_tail (&nodeRail->Link, &rail->LocalPassivateList); -+ nodeRail->State = EP_NODE_LOCAL_PASSIVATE; -+ -+ /* Remove from rails connected to this node */ -+ node->ConnectedRails &= ~(1 << rail->Number); -+ break; -+ -+ default: -+ printk ("%s: Node %d - in NodeChangeMap with state %d\n", rail->Name, nodeId, nodeRail->State); -+ panic ("Node in NodeChangeMap with invalid state\n"); -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ } -+ } -+ } -+} -+ -+void -+ProgressNetworkError (EP_RAIL *rail, EP_NODE_RAIL *nodeRail) -+{ -+ EP_SYS *sys = rail->System; -+ int nodeId = nodeRail - rail->Nodes; -+ EP_MANAGER_MSG_BODY msg; -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_REMOTE_PASSIVATE); -+ -+ if (BEFORE (lbolt, nodeRail->NextRunTime)) -+ return; -+ -+ if (nodeRail->NetworkErrorState & EP_NODE_NETERR_DMA_PACKET) -+ nodeRail->NetworkErrorState &= ~EP_NODE_NETERR_DMA_PACKET; -+ -+ if (nodeRail->NetworkErrorState & EP_NODE_NETERR_ATOMIC_PACKET) -+ { -+ if (EP_XID_INVALID (nodeRail->MsgXid)) -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ msg.Cookies[0] = nodeRail->NetworkErrorCookies[0]; -+ msg.Cookies[1] = nodeRail->NetworkErrorCookies[1]; -+ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: progress neterr - node %d cookies %llx %llx\n", rail->Name, nodeId, msg.Cookies[0], msg.Cookies[1]); -+ -+ if (ep_send_message (rail, nodeId, EP_MANAGER_MSG_TYPE_NETERR_REQUEST, nodeRail->MsgXid, &msg) == 0) -+ nodeRail->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+} -+ -+long -+ProgressNodeLists (EP_RAIL *rail, long nextRunTime) -+{ -+ EP_SYS *sys = rail->System; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ list_for_each_safe (el, nel, &rail->NetworkErrorList) { -+ EP_NODE_RAIL *nodeRail = list_entry (el, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ -+ ProgressNetworkError (rail, nodeRail); -+ -+ if (nodeRail->NetworkErrorState == 0) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: lower context filter for node %d due to network error\n", rail->Name, nodeId); -+ printk ("%s: lower context filter for node %d due to network error\n", rail->Name, nodeId); -+ -+ rail->Operations.LowerFilter (rail, nodeId); -+ -+ list_del (&nodeRail->Link); -+ continue; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (! list_empty (&rail->LocalPassivateList)) -+ { -+ EPRINTF1 (DBG_MANAGER, "%s: Locally Passivating Nodes\n", rail->Name); -+ -+ /* We have disconnected from some nodes or have left ourselves -+ * flush through all communications and determine whether we -+ * need to perform rail failover */ -+ rail->Operations.FlushFilters (rail); -+ -+ ep_call_callbacks (rail, EP_CB_FLUSH_FILTERING, rail->NodeSet); -+ -+ rail->Operations.FlushQueues (rail); -+ -+ ep_call_callbacks (rail, EP_CB_FLUSH_FLUSHING, rail->NodeSet); -+ -+ while (! list_empty (&rail->LocalPassivateList)) -+ { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->LocalPassivateList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ -+ list_del (&nodeRail->Link); -+ -+ rail->Operations.UnloadNodeRoute (rail, nodeId); -+ -+ if (nodeRail->NetworkErrorState == 0 && nodeRail->MessageState == 0) -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Disconnecting\n", rail->Name, nodeId); -+ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ } -+ else -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d -> Remote Passivate\n", rail->Name, nodeId); -+ -+ list_add_tail (&nodeRail->Link, &rail->RemotePassivateList); -+ nodeRail->State = EP_NODE_REMOTE_PASSIVATE; -+ -+ if (nodeRail->NetworkErrorState == 0) -+ nodeRail->NextRunTime = lbolt; -+ } -+ } -+ -+ ep_call_callbacks (rail, EP_CB_PASSIVATED, rail->NodeSet); -+ } -+ -+ list_for_each_safe (el, nel, &rail->RemotePassivateList) { -+ EP_NODE_RAIL *nodeRail = list_entry (el, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ if (node->ConnectedRails == 0) /* no rails connected to this node (anymore) */ -+ { -+ /* Remove from this list */ -+ list_del (&nodeRail->Link); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no rails, Remote Passivate -> Disconnecting\n", rail->Name, nodeId); -+ -+ /* transition towards disconnected */ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ EPRINTF6 (DBG_MANAGER, "%s: Node %d - %s NetworkErrorState=%x NextRunTime=%lx (%lx)\n", -+ rail->Name, nodeId, NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState, -+ nodeRail->NextRunTime, nextRunTime); -+ -+ if (nodeRail->NetworkErrorState) -+ { -+ ProgressNetworkError (rail, nodeRail); -+ } -+ else if (! BEFORE (lbolt, nodeRail->NextRunTime)) -+ { -+ if (EP_XID_INVALID (nodeRail->MsgXid)) -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ if (ep_send_message (rail, nodeId, EP_MANAGER_MSG_TYPE_FLUSH_REQUEST, nodeRail->MsgXid, NULL) == 0) -+ nodeRail->NextRunTime = lbolt + MESSAGE_RETRY_TIME; -+ else -+ nodeRail->NextRunTime = lbolt + MSGBUSY_RETRY_TIME; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ -+ if (! list_empty (&rail->PassivatedList)) -+ { -+ ep_call_callbacks (rail, EP_CB_FAILOVER, rail->NodeSet); -+ -+ list_for_each_safe (el, nel, &rail->PassivatedList) { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->PassivatedList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ ASSERT (nodeRail->NetworkErrorState == 0); -+ -+ if (node->ConnectedRails == 0) -+ { -+ /* Remove from this list */ -+ list_del (&nodeRail->Link); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no rails, Passivated -> Disconnecting\n", rail->Name, nodeId); -+ -+ /* transition towards disconnected */ -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ EPRINTF6 (DBG_MANAGER, "%s: Node %d - %s NetworkErrorState=%x NextRunTime=%lx (%lx)\n", -+ rail->Name, nodeId, NodeStateNames[nodeRail->State], nodeRail->NetworkErrorState, -+ nodeRail->NextRunTime, nextRunTime); -+ -+ if (nodeRail->MessageState == 0) -+ { -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, no messages, Passivated -> Disconnecting\n", rail->Name,nodeId); -+ -+ list_del (&nodeRail->Link); -+ list_add_tail (&nodeRail->Link, &rail->DisconnectingList); -+ nodeRail->State = EP_NODE_DISCONNECTING; -+ continue; -+ } -+ -+ nodeRail->MessageState = 0; -+ nodeRail->NextRunTime = lbolt + FAILOVER_RETRY_TIME; -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, nodeRail->NextRunTime)) -+ nextRunTime = nodeRail->NextRunTime; -+ } -+ } -+ -+ if (! list_empty (&rail->DisconnectingList)) -+ { -+ ep_call_callbacks (rail, EP_CB_DISCONNECTING, rail->NodeSet); -+ -+ while (! list_empty (&rail->DisconnectingList)) -+ { -+ EP_NODE_RAIL *nodeRail = list_entry (rail->DisconnectingList.next, EP_NODE_RAIL, Link); -+ int nodeId = nodeRail - rail->Nodes; -+ EP_NODE *node = &sys->Nodes[nodeId]; -+ -+ EPRINTF2 (DBG_MANAGER, "%s: Node %d, Disconnecting -> Disconnected\n", rail->Name, nodeId); -+ -+ list_del (&nodeRail->Link); -+ -+ rail->Operations.NodeDisconnected (rail, nodeId); -+ -+ /* Clear the network error state */ -+ nodeRail->NextRunTime = 0; -+ nodeRail->NetworkErrorState = 0; -+ nodeRail->NetworkErrorCookies[0] = 0; -+ nodeRail->NetworkErrorCookies[1] = 0; -+ -+ /* Clear the message state */ -+ nodeRail->MessageState = 0; -+ -+ cm_node_disconnected (rail, nodeId); -+ -+ nodeRail->State = EP_NODE_DISCONNECTED; -+ -+ statemap_setbits (rail->NodeSet, nodeId, 0, 1); -+ -+ if (node->ConnectedRails == 0) -+ statemap_setbits (sys->NodeSet, nodeId, 0, 1); -+ } -+ -+ ep_call_callbacks (rail, EP_CB_DISCONNECTED, rail->NodeSet); -+ } -+ -+ return (nextRunTime); -+} -+ -+void -+DisplayNodes (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ int i, state, count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ for (state = 0; state < EP_NODE_NUM_STATES; state++) -+ { -+ for (count = i = 0; i < rail->Position.pos_nodes; i++) -+ { -+ ASSERT (rail->Nodes[i].State < EP_NODE_NUM_STATES); -+ -+ if (rail->Nodes[i].State == state) -+ if (state != EP_NODE_DISCONNECTED) -+ printk ("%s %d", !count++ ? NodeStateNames[state] : "", i); -+ } -+ if (count) -+ printk ("%s (%d total)\n", state == EP_NODE_DISCONNECTED ? NodeStateNames[state] : "", count); -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+static void -+PositionFound (EP_RAIL *rail, ELAN_POSITION *pos) -+{ -+ EP_SYS *sys = rail->System; -+ struct list_head *el; -+ int i; -+ -+ /* only called from the ep_managage whilst rail->State == EP_RAIL_STATE_STARTED */ -+ ASSERT ( rail->State == EP_RAIL_STATE_STARTED ); -+ -+#if defined(PER_CPU_TIMEOUT) -+ /* -+ * On Tru64 - if we're running in a "funnelled" thread, then we will be -+ * unable to start the per-cpu timeouts, so if we return then eventually -+ * the ep_manager() thread will find the network position and we're -+ * in control of our own destiny. -+ */ -+ if (THREAD_IS_FUNNELED(current_thread())) -+ { -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+ return; -+ } -+#endif -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, pos->pos_nodeid); -+ -+ if (pos->pos_levels > MaxSwitchLevels) -+ { -+ for (i = 0; i < (pos->pos_levels - MaxSwitchLevels); i++) -+ pos->pos_nodes /= pos->pos_arity[i]; -+ -+ for (i = 0; i < MaxSwitchLevels; i++) -+ pos->pos_arity[i] = pos->pos_arity[i + (pos->pos_levels - MaxSwitchLevels)]; -+ -+ pos->pos_levels = MaxSwitchLevels; -+ pos->pos_nodeid = pos->pos_nodeid % pos->pos_nodes; -+ -+ printk ("%s: limiting switch levels to %d\n", rail->Name, MaxSwitchLevels); -+ printk ("%s: nodeid=%d level=%d numnodes=%d\n", rail->Name, pos->pos_nodeid, pos->pos_levels, pos->pos_nodes); -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, pos->pos_nodeid); -+ } -+ -+ if (rail->Position.pos_mode != ELAN_POS_UNKNOWN && rail->Position.pos_nodeid != pos->pos_nodeid) -+ { -+ printk ("%s: NodeId has changed from %d to %d\n", rail->Name, rail->Position.pos_nodeid, pos->pos_nodeid); -+ panic ("ep: PositionFound: NodeId has changed\n"); -+ } -+ -+ if (sys->Position.pos_mode != ELAN_POS_UNKNOWN && (sys->Position.pos_nodeid != pos->pos_nodeid || sys->Position.pos_nodes != pos->pos_nodes)) -+ { -+ printk ("%s: position incompatible - disabling rail\n", rail->Name); -+ rail->State = EP_RAIL_STATE_INCOMPATIBLE; -+ return; -+ } -+ -+ if (sys->Position.pos_mode == ELAN_POS_UNKNOWN) -+ { -+ sys->Position = *pos; -+ sys->NodeSet = statemap_create (pos->pos_nodes); -+ KMEM_ZALLOC (sys->Nodes, EP_NODE *, pos->pos_nodes * sizeof (EP_NODE), 1); -+ } -+ -+ rail->Position = *pos; -+ rail->SwitchBroadcastLevel = pos->pos_levels - 1; -+ rail->State = EP_RAIL_STATE_RUNNING; -+ -+ for (i = 0; i < pos->pos_levels; i++) -+ { -+ rail->SwitchProbeTick[i] = lbolt; -+ rail->SwitchLast[i].uplink = 4; -+ } -+ -+ rail->Operations.PositionFound (rail, pos); -+ -+ INIT_LIST_HEAD (&rail->NetworkErrorList); -+ INIT_LIST_HEAD (&rail->LocalPassivateList); -+ INIT_LIST_HEAD (&rail->RemotePassivateList); -+ INIT_LIST_HEAD (&rail->PassivatedList); -+ INIT_LIST_HEAD (&rail->DisconnectingList); -+ -+ rail->NodeSet = statemap_create (rail->Position.pos_nodes); -+ rail->NodeChangeMap = statemap_create (rail->Position.pos_nodes); -+ rail->NodeChangeTmp = statemap_create (rail->Position.pos_nodes); -+ -+ KMEM_ZALLOC (rail->Nodes, EP_NODE_RAIL *, rail->Position.pos_nodes * sizeof (EP_NODE_RAIL), 1); -+ -+ for (i = 0; i < rail->Position.pos_nodes; i++) -+ { -+ spin_lock_init (&rail->Nodes[i].CookieLock); -+ -+ INIT_LIST_HEAD (&rail->Nodes[i].StalledDmas); -+ -+ rail->Nodes[i].State = EP_NODE_DISCONNECTED; -+ } -+ -+ /* Notify all subsystems that a new rail has been enabled */ -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (subsys->AddRail) -+ subsys->AddRail (subsys, sys, rail); -+ -+ /* XXXX: what to do if the subsystem refused to add the rail ? */ -+ } -+ kmutex_unlock (&sys->SubsysLock); -+ -+ /* Now enable the manager input queue */ -+ ep_enable_inputq (rail, rail->ManagerInputQ); -+} -+ -+static void -+ep_manager (void *arg) -+{ -+ EP_SYS *sys = (EP_SYS *) arg; -+ struct list_head *el; -+ ELAN_POSITION pos; -+ unsigned long flags; -+ -+ kernel_thread_init ("ep_manager"); -+ kernel_thread_become_highpri(); -+ -+ for (;;) -+ { -+ long nextRunTime = lbolt + MSEC2TICKS(CM_THREAD_SCHEDULE_TIMEOUT); -+ -+ list_for_each (el, &sys->ManagedRails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL, ManagerLink); -+ -+ switch (rail->State) -+ { -+ case EP_RAIL_STATE_STARTED: -+ if (ProbeNetwork (rail, &pos) == 0) -+ { -+ PositionFound (rail, &pos); -+ break; -+ } -+ -+ if (nextRunTime == 0 || AFTER (nextRunTime, lbolt + HZ)) -+ nextRunTime = lbolt + HZ; -+ break; -+ -+ case EP_RAIL_STATE_RUNNING: -+ if (ep_poll_inputq (rail, rail->ManagerInputQ, 100, ProcessMessage, rail) >= 100) -+ nextRunTime = lbolt; -+ -+ /* Handle any nodes which the cluster membership subsystem -+ * has indicated are to begin connecting or disconnecting */ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ if (! statemap_changed (rail->NodeChangeMap)) -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ else -+ { -+ /* -+ * Take a copy of the statemap, and zero all entries so -+ * we only see new requests next time -+ */ -+ statemap_copy (rail->NodeChangeTmp, rail->NodeChangeMap); -+ statemap_zero (rail->NodeChangeMap); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ UpdateConnectionState (rail, rail->NodeChangeTmp); -+ } -+ -+ nextRunTime = ProgressNodeLists (rail, nextRunTime); -+ -+ if (statemap_changed (rail->NodeSet)) -+ { -+ ep_call_callbacks (rail, EP_CB_NODESET, rail->NodeSet); -+ -+ statemap_clearchanges (rail->NodeSet); -+ } -+ break; -+ -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ break; -+ } -+ } -+ -+ -+ EPRINTF5 (DBG_MANAGER, "ep_manager: sleep now=%lx nextRunTime=%lx (%ld) [%lx (%ld)]\n", -+ lbolt, nextRunTime, nextRunTime ? nextRunTime - lbolt : 0, sys->ManagerThread.next_run, -+ sys->ManagerThread.next_run ? sys->ManagerThread.next_run - lbolt : 0); -+ -+ if (ep_kthread_sleep (&sys->ManagerThread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_kthread_stopped (&sys->ManagerThread); -+ kernel_thread_exit(); -+} -+ -+void -+ep_connect_node (EP_RAIL *rail, int nodeId) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *node = &rail->Nodes[nodeId]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ EPRINTF2 (DBG_MANAGER, "%s: ep_connect_node: nodeId %d\n", rail->Name, nodeId); -+ -+ ASSERT (node->State == EP_NODE_DISCONNECTED && statemap_getbits (rail->NodeChangeMap, nodeId, 1) == 0); -+ -+ node->State = EP_NODE_CONNECTING; -+ -+ statemap_setbits (rail->NodeChangeMap, nodeId, 1, 1); -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+} -+ -+int -+ep_disconnect_node (EP_RAIL *rail, int nodeId) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *node = &rail->Nodes[nodeId]; -+ int state; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ EPRINTF3 (DBG_MANAGER, "%s: ep_disconnect_node: nodeId %d - %s\n", rail->Name, nodeId, NodeStateNames[node->State]); -+ -+ switch (state = node->State) -+ { -+ case EP_NODE_CONNECTING: -+ statemap_setbits (rail->NodeChangeMap, nodeId, 0, 1); -+ -+ node->State = EP_NODE_DISCONNECTED; -+ break; -+ -+ case EP_NODE_CONNECTED: -+ statemap_setbits (rail->NodeChangeMap, nodeId, 1, 1); -+ -+ node->State = EP_NODE_LEAVING_CONNECTED; -+ break; -+ -+ case EP_NODE_LEAVING_CONNECTED: -+ /* no assert on NodeChangeMap as the map could have been taken but not acted on */ -+ break; -+ -+ default: -+ ASSERT (statemap_getbits (rail->NodeChangeMap, nodeId, 1) == 0); -+ break; -+ } -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ if (state == EP_NODE_CONNECTED) -+ ep_kthread_schedule (&sys->ManagerThread, lbolt); -+ -+ return state; -+} -+ -+int -+ep_manager_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ if ((rail->ManagerOutputQ = ep_alloc_outputq (rail, EP_MANAGER_MSG_SIZE, EP_MANAGER_OUTPUTQ_SLOTS)) == NULL) -+ return -ENOMEM; -+ -+ if ((rail->ManagerInputQ = ep_alloc_inputq (rail, EP_SYSTEMQ_MANAGER, EP_MANAGER_MSG_SIZE, EP_MANAGER_INPUTQ_SLOTS, -+ ManagerQueueEvent, &sys->ManagerThread)) == NULL) -+ { -+ ep_free_outputq (rail, rail->ManagerOutputQ); -+ return -ENOMEM; -+ } -+ -+ spin_lock_init (&rail->ManagerOutputQLock); -+ -+ ep_xid_cache_init (sys, &rail->XidCache); -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ list_add_tail (&rail->ManagerLink, &sys->ManagedRails); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ return (0); -+} -+ -+void -+ep_manager_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ if (rail->ManagerInputQ != NULL) -+ { -+ ep_kthread_stall (&sys->ManagerThread); -+ list_del (&rail->ManagerLink); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ ep_xid_cache_destroy (sys, &rail->XidCache); -+ -+ spin_lock_destroy (&rail->ManagerOutputQLock); -+ -+ ep_disable_inputq (rail, rail->ManagerInputQ); -+ ep_free_inputq (rail, rail->ManagerInputQ); -+ ep_free_outputq (rail, rail->ManagerOutputQ); -+ } -+} -+ -+int -+ep_manager_init (EP_SYS *sys) -+{ -+ INIT_LIST_HEAD (&sys->ManagedRails); -+ -+ ep_kthread_init (&sys->ManagerThread); -+ -+ if (kernel_thread_create (ep_manager, (void *) sys) == 0) -+ return (ENOMEM); -+ -+ ep_kthread_started (&sys->ManagerThread); -+ -+ return (0); -+} -+ -+void -+ep_manager_fini (EP_SYS *sys) -+{ -+ ep_kthread_stop (&sys->ManagerThread); -+ ep_kthread_destroy (&sys->ManagerThread); -+} -+ -+int -+ep_sys_init (EP_SYS *sys) -+{ -+ kmutex_init (&sys->SubsysLock); -+ kmutex_init (&sys->StartStopLock); -+ spin_lock_init (&sys->NodeLock); -+ -+ INIT_LIST_HEAD (&sys->Subsystems); -+ -+ /* initialise the xid allocators */ -+ spin_lock_init (&sys->XidLock); -+ INIT_LIST_HEAD (&sys->XidCacheList); -+ -+ /* initially don't know where we are in the network */ -+ sys->Position.pos_mode = ELAN_POS_UNKNOWN; -+ -+ /* initialise the network mapping descriptor hash tables */ -+ ep_nmh_init (&sys->MappingTable); -+ -+ /* intialise the shared allocators */ -+ ep_shared_alloc_init (sys); -+ -+ /* initialise the dvma space */ -+ ep_dvma_init (sys); -+ -+ /* intiialise the rail manager */ -+ ep_manager_init (sys); -+ -+ /* initialise all subsystems */ -+ cm_init (sys); -+ ep_comms_init (sys); -+ //ep_msgsys_init (sys); -+ -+ return (0); -+} -+ -+void -+ep_sys_fini (EP_SYS *sys) -+{ -+ /* Destroy the subsystems in the reverse order to their creation */ -+ while (! list_empty (&sys->Subsystems)) -+ { -+ EP_SUBSYS *subsys = list_entry (sys->Subsystems.prev, EP_SUBSYS, Link); -+ -+ list_del (&subsys->Link); -+ -+ subsys->Destroy (subsys, sys); -+ } -+ -+ ep_manager_fini(sys); -+ ep_dvma_fini (sys); -+ ep_shared_alloc_fini (sys); -+ -+ ep_nmh_fini (&sys->MappingTable); -+ -+ if (sys->Position.pos_mode != ELAN_POS_UNKNOWN) { -+ statemap_destroy (sys->NodeSet); -+ KMEM_FREE(sys->Nodes, sys->Position.pos_nodes * sizeof (EP_NODE)); -+ } -+ -+ spin_lock_destroy (&sys->XidLock); -+ -+ spin_lock_destroy (&sys->NodeLock); -+ kmutex_destroy (&sys->SubsysLock); -+ kmutex_destroy (&sys->StartStopLock); -+} -+ -+void -+ep_shutdown (EP_SYS *sys) -+{ -+ sys->Shutdown = 1; -+} -+ -+int -+ep_init_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ static int rnum; -+ -+ rail->System = sys; -+ rail->State = EP_RAIL_STATE_UNINITIALISED; -+ rail->Number = rnum++; -+ rail->Position.pos_mode = ELAN_POS_UNKNOWN; -+ rail->Position.pos_nodeid = ELAN_INVALID_NODE; -+ -+ rail->CallbackRegistered = 0; -+ -+ sprintf (rail->Name, "ep%d", rail->Number); -+ -+ /* Initialise externally visible locks */ -+ kmutex_init (&rail->CallbackLock); -+ -+ ep_alloc_init (rail); -+ -+ sys->Rails[rail->Number] = rail; -+ -+ return 0; -+} -+ -+void -+ep_destroy_rail (EP_RAIL *rail) -+{ -+ ASSERT (rail->State == EP_RAIL_STATE_UNINITIALISED); -+ -+ ep_alloc_fini (rail); -+ -+ kmutex_destroy (&rail->CallbackLock); -+ -+ rail->System->Rails[rail->Number] = NULL; -+ -+ rail->Operations.DestroyRail (rail); -+} -+ -+/* We need to traverse the Subsystems lists backwards -+ * but it's not defined in */ -+#define list_for_each_backwards(pos,list) \ -+ for (pos = (list)->prev; pos != (list); \ -+ pos = (pos)->prev) -+ -+void -+__ep_stop_rail (EP_RAIL *rail) -+{ -+ /* called holding the sys->Lock */ -+ EP_SYS *sys = rail->System; -+ struct list_head *el; -+ -+ rail->Operations.StallRail (rail); -+ -+ /* Notify all subsystems that this rail is being stopped */ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each_backwards (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (subsys->RemoveRail) -+ subsys->RemoveRail (subsys, sys, rail); -+ } -+ kmutex_unlock (&sys->SubsysLock); -+ -+ ep_manager_remove_rail (sys, rail); -+ -+ KMEM_FREE (rail->Nodes, rail->Position.pos_nodes * sizeof (EP_NODE_RAIL)); -+ -+ statemap_destroy (rail->NodeChangeTmp); -+ statemap_destroy (rail->NodeChangeMap); -+ statemap_destroy (rail->NodeSet); -+ } -+ -+ ep_dvma_remove_rail (sys, rail); -+ ep_shared_alloc_remove_rail (sys, rail); -+ -+ rail->Operations.StopRail (rail); -+ -+ rail->State = EP_RAIL_STATE_UNINITIALISED; -+} -+ -+void -+ep_stop_rail (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ -+ /* stall ep_manager */ -+ /* and remove the rail from the manaager */ -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ if ( rail->State == EP_RAIL_STATE_STARTED ) -+ ep_manager_remove_rail (sys, rail); -+ ep_kthread_resume (&sys->ManagerThread); -+ -+ __ep_stop_rail (rail); -+} -+ -+int -+ep_start_rail (EP_RAIL *rail) -+{ -+ EP_SYS *sys = rail->System; -+ -+ ASSERT (rail->State == EP_RAIL_STATE_UNINITIALISED); -+ -+ if (rail->Operations.StartRail (rail) < 0) -+ return -ENXIO; -+ -+ kmutex_lock (&sys->StartStopLock); -+ /* Add this rail to the shared allocator */ -+ if (ep_shared_alloc_add_rail (rail->System, rail)) -+ goto failed; -+ -+ /* Add this rail to dvma kmap */ -+ if (ep_dvma_add_rail (rail->System, rail)) -+ goto failed; -+ -+ /* rail is now started */ -+ rail->State = EP_RAIL_STATE_STARTED; -+ -+ /* notify the rail manager of the new rail */ -+ if (ep_manager_add_rail (rail->System, rail)) -+ goto failed; -+ -+ kmutex_unlock (&sys->StartStopLock); -+ return (ESUCCESS); -+ -+ failed: -+ printk ("%s: start failed\n", rail->Name); -+ kmutex_unlock (&sys->StartStopLock); -+ __ep_stop_rail (rail); -+ -+ return (ENOMEM); -+} -+ -+void -+ep_subsys_add (EP_SYS *sys, EP_SUBSYS *subsys) -+{ -+ kmutex_lock (&sys->SubsysLock); -+ list_add_tail (&subsys->Link, &sys->Subsystems); -+ kmutex_unlock (&sys->SubsysLock); -+} -+ -+void -+ep_subsys_del (EP_SYS *sys, EP_SUBSYS *subsys) -+{ -+ kmutex_lock (&sys->SubsysLock); -+ list_del (&subsys->Link); -+ kmutex_unlock (&sys->SubsysLock); -+} -+ -+EP_SUBSYS * -+ep_subsys_find (EP_SYS *sys, char *name) -+{ -+ struct list_head *el; -+ -+ ASSERT ( !in_interrupt()); -+ -+ kmutex_lock (&sys->SubsysLock); -+ list_for_each (el, &sys->Subsystems) { -+ EP_SUBSYS *subsys = list_entry (el, EP_SUBSYS, Link); -+ -+ if (! strcmp (subsys->Name, name)) -+ { -+ kmutex_unlock (&sys->SubsysLock); -+ return (subsys); -+ } -+ } -+ -+ kmutex_unlock (&sys->SubsysLock); -+ return (NULL); -+} -+ -+int -+ep_waitfor_nodeid (EP_SYS *sys) -+{ -+ int i, printed = 0; -+ kcondvar_t Sleep; -+ spinlock_t Lock; -+ -+ kcondvar_init (&Sleep); -+ spin_lock_init (&Lock); -+ -+#define TICKS_TO_WAIT (10*hz) -+#define TICKS_PER_LOOP (hz/10) -+ for (i = 0; sys->Position.pos_mode == ELAN_POS_UNKNOWN && i < TICKS_TO_WAIT; i += TICKS_PER_LOOP) -+ { -+ if (! printed++) -+ printk ("ep: waiting for network position to be found\n"); -+ -+ spin_lock (&Lock); -+ kcondvar_timedwait (&Sleep, &Lock, NULL, lbolt + TICKS_PER_LOOP); -+ spin_unlock (&Lock); -+ } -+ -+ if (sys->Position.pos_mode == ELAN_POS_UNKNOWN) -+ printk ("ep: network position not found after waiting\n"); -+ else if (printed) -+ printk ("ep: network position found at nodeid %d\n", sys->Position.pos_nodeid); -+ -+ spin_lock_destroy (&Lock); -+ kcondvar_destroy (&Sleep); -+ -+ return (sys->Position.pos_mode == ELAN_POS_UNKNOWN ? ELAN_INVALID_NODE : sys->Position.pos_nodeid); -+} -+ -+int -+ep_nodeid (EP_SYS *sys) -+{ -+ return (sys->Position.pos_mode == ELAN_POS_UNKNOWN ? ELAN_INVALID_NODE : sys->Position.pos_nodeid); -+} -+ -+int -+ep_numnodes (EP_SYS *sys) -+{ -+ return (sys->Position.pos_nodes); -+} -+ -+void -+ep_fillout_stats(EP_RAIL *r, char *str) -+{ -+ sprintf(str+strlen(str),"SendMessageFailed %lu NeterrAtomicPacket %lu NeterrDmaPacket %lu \n", r->Stats.SendMessageFailed, r->Stats.NeterrAtomicPacket, r->Stats.NeterrDmaPacket); -+ sprintf(str+strlen(str),"Rx %lu %lu /sec\n", GET_STAT_TOTAL(r->Stats,rx), GET_STAT_PER_SEC(r->Stats,rx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu MB/sec\n", GET_STAT_TOTAL(r->Stats,rx_len)/ (1024*1024), GET_STAT_PER_SEC(r->Stats,rx_len) / (1024*1024)); -+ sprintf(str+strlen(str),"Tx %lu %lu /sec\n", GET_STAT_TOTAL(r->Stats,tx), GET_STAT_PER_SEC(r->Stats,tx) ); -+ sprintf(str+strlen(str),"MBytes %lu %lu MB/sec\n", GET_STAT_TOTAL(r->Stats,tx_len)/ (1024*1024), GET_STAT_PER_SEC(r->Stats,tx_len) / (1024*1024)); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kcomm_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan3.c 2005-07-28 14:52:52.886672960 -0400 -@@ -0,0 +1,504 @@ -+ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm_elan3.c,v 1.31.8.3 2004/11/30 12:02:17 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "conf_linux.h" -+ -+extern EP_CODE threadcode_elan3; -+ -+unsigned int -+ep3_create_rails (EP_SYS *sys, unsigned int disabled) -+{ -+ unsigned int rmask = 0; -+ ELAN3_DEV *dev; -+ EP_RAIL *rail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if ((dev = elan3_device (i)) != NULL) -+ { -+ if ((rail = ep3_create_rail (sys, dev)) != NULL) -+ { -+ if (disabled & (1 << rail->Number)) -+ printk ("%s: auto-start of device disabled by configuration\n", rail->Name); -+ else -+ ep_start_rail (rail); -+ -+ ep_procfs_rail_init(rail); -+ -+ rmask |= (1 << rail->Number); -+ } -+ } -+ } -+ -+ return rmask; -+} -+ -+EP_RAIL * -+ep3_create_rail (EP_SYS *sys, ELAN3_DEV *dev) -+{ -+ EP3_RAIL *rail; -+ int res; -+ -+ KMEM_ZALLOC (rail, EP3_RAIL *, sizeof (EP3_RAIL), TRUE); -+ -+ if (rail == NULL) -+ return (EP_RAIL *) NULL; -+ -+ if ((res = ep_init_rail (sys, &rail->Generic)) != 0) -+ { -+ KMEM_FREE (rail, sizeof (EP3_RAIL)); -+ return (EP_RAIL *) NULL; -+ } -+ -+ rail->Device = dev; -+ -+ /* Install our rail operations */ -+ rail->Generic.Operations.DestroyRail = ep3_destroy_rail; -+ rail->Generic.Operations.StartRail = ep3_start_rail; -+ rail->Generic.Operations.StallRail = ep3_stall_rail; -+ rail->Generic.Operations.StopRail = ep3_stop_rail; -+ -+ rail->Generic.Operations.SdramAlloc = ep3_sdram_alloc; -+ rail->Generic.Operations.SdramFree = ep3_sdram_free; -+ rail->Generic.Operations.SdramWriteb = ep3_sdram_writeb; -+ -+ rail->Generic.Operations.KaddrMap = ep3_kaddr_map; -+ rail->Generic.Operations.SdramMap = ep3_sdram_map; -+ rail->Generic.Operations.Unmap = ep3_unmap; -+ -+ rail->Generic.Operations.DvmaReserve = ep3_dvma_reserve; -+ rail->Generic.Operations.DvmaRelease = ep3_dvma_release; -+ rail->Generic.Operations.DvmaSetPte = ep3_dvma_set_pte; -+ rail->Generic.Operations.DvmaReadPte = ep3_dvma_read_pte; -+ rail->Generic.Operations.DvmaUnload = ep3_dvma_unload; -+ rail->Generic.Operations.FlushTlb = ep3_flush_tlb; -+ -+ rail->Generic.Operations.ProbeRoute = ep3_probe_route; -+ rail->Generic.Operations.PositionFound = ep3_position_found; -+ rail->Generic.Operations.CheckPosition = ep3_check_position; -+ rail->Generic.Operations.NeterrFixup = ep3_neterr_fixup; -+ -+ rail->Generic.Operations.LoadSystemRoute = ep3_load_system_route; -+ -+ rail->Generic.Operations.LoadNodeRoute = ep3_load_node_route; -+ rail->Generic.Operations.UnloadNodeRoute = ep3_unload_node_route; -+ rail->Generic.Operations.LowerFilter = ep3_lower_filter; -+ rail->Generic.Operations.RaiseFilter = ep3_raise_filter; -+ rail->Generic.Operations.NodeDisconnected = ep3_node_disconnected; -+ -+ rail->Generic.Operations.FlushFilters = ep3_flush_filters; -+ rail->Generic.Operations.FlushQueues = ep3_flush_queues; -+ -+ rail->Generic.Operations.AllocInputQ = ep3_alloc_inputq; -+ rail->Generic.Operations.FreeInputQ = ep3_free_inputq; -+ rail->Generic.Operations.EnableInputQ = ep3_enable_inputq; -+ rail->Generic.Operations.DisableInputQ = ep3_disable_inputq; -+ rail->Generic.Operations.PollInputQ = ep3_poll_inputq; -+ -+ rail->Generic.Operations.AllocOutputQ = ep3_alloc_outputq; -+ rail->Generic.Operations.FreeOutputQ = ep3_free_outputq; -+ rail->Generic.Operations.OutputQMsg = ep3_outputq_msg; -+ rail->Generic.Operations.OutputQState = ep3_outputq_state; -+ rail->Generic.Operations.OutputQSend = ep3_outputq_send; -+ -+ rail->Generic.Operations.FillOutStats = ep3_fillout_stats; -+ -+ rail->Generic.Devinfo = dev->Devinfo; -+ -+ printk ("%s: connected via elan3 rev%c device %d\n", rail->Generic.Name, -+ 'a' + dev->Devinfo.dev_revision_id, dev->Instance); -+ -+ return (EP_RAIL *) rail; -+} -+ -+void -+ep3_destroy_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ KMEM_FREE (rail, sizeof (EP3_RAIL)); -+} -+ -+static int -+ep3_attach_rail (EP3_RAIL *rail) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ ELAN3_CTXT *ctxt; -+ ELAN_CAPABILITY *cap; -+ int ctx; -+ unsigned long flags; -+ -+ if ((ctxt = elan3_alloc (dev, TRUE)) == (ELAN3_CTXT *) NULL) -+ { -+ printk ("%s: cannot allocate elan context\n", rail->Generic.Name); -+ return -ENXIO; -+ } -+ -+ ctxt->Operations = &ep3_elan3_ops; -+ ctxt->Private = (void *) rail; -+ -+ /* Initialise a capability and attach to the elan*/ -+ KMEM_ALLOC (cap, ELAN_CAPABILITY *, sizeof (ELAN_CAPABILITY), TRUE); -+ -+ elan_nullcap (cap); -+ -+ cap->cap_type = ELAN_CAP_TYPE_KERNEL; -+ cap->cap_version = ELAN_CAP_VERSION_NUMBER; -+ cap->cap_mycontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_lowcontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_highcontext = ELAN3_MRF_CONTEXT_NUM | SYS_CONTEXT_BIT; -+ cap->cap_railmask = 1 << dev->Devinfo.dev_rail; -+ -+ /* Ensure the context filter is raised while we initialise */ -+ elan3_block_inputter (ctxt, TRUE); -+ -+ if (elan3_doattach (ctxt, cap) != 0) -+ { -+ printk ("%s: cannot attach to kernel context\n", rail->Generic.Name); -+ -+ KMEM_FREE (cap, sizeof (ELAN_CAPABILITY)); -+ elan3_free (ctxt); -+ return -ENXIO; -+ } -+ KMEM_FREE (cap, sizeof (ELAN_CAPABILITY)); -+ -+ /* now attach to all the kernel comms input/dmaring/data contexts */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ { -+ /* place it in the info table. NOTE: don't call elan3mmu_set_info, as this */ -+ /* will queue the info again on the devices info list */ -+ dev->CtxtTable[ctx] = ctxt; -+ -+ elan3mmu_set_context_filter (dev, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ elan3mmu_attach (dev, ctx, ctxt->Elan3mmu, ctxt->RouteTable->Table, ctxt->RouteTable->Size-1); -+ } -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ { -+ /* place it in the info table. NOTE: don't call elan3mmu_set_info, as this */ -+ /* will queue the info again on the devices info list */ -+ dev->CtxtTable[ctx] = ctxt; -+ -+ elan3mmu_set_context_filter (dev, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ elan3mmu_attach (dev, ctx, ctxt->Elan3mmu, ctxt->RouteTable->Table, ctxt->RouteTable->Size-1); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ /* Stash the ctxt,commandport, mmu and route table */ -+ rail->Ctxt = ctxt; -+ rail->CommandPort = ctxt->CommandPort; -+ rail->Elan3mmu = ctxt->Elan3mmu; -+ rail->RouteTable = ctxt->RouteTable; -+ -+ return 0; -+} -+ -+static void -+ep3_detach_rail (EP3_RAIL *rail) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ int ctx; -+ -+ /* detach from the elan */ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ { -+ dev->CtxtTable[ctx] = NULL; -+ elan3mmu_detach (dev, ctx); -+ } -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ { -+ dev->CtxtTable[ctx] = NULL; -+ elan3mmu_detach (dev, ctx); -+ } -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ elan3_dodetach(rail->Ctxt); -+ elan3_free (rail->Ctxt); -+ -+ rail->Ctxt = NULL; -+ rail->CommandPort = 0; -+ rail->Elan3mmu = NULL; -+ rail->RouteTable = NULL; -+} -+ -+int -+ep3_start_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ int i, res; -+ unsigned long flags; -+ -+ if ((res = ep3_attach_rail (rail)) != 0) -+ return res; -+ -+ spin_lock_init (&rail->CookieLock); -+ kmutex_init (&rail->HaltOpMutex); -+ kcondvar_init (&rail->HaltOpSleep); -+ -+ /* Initialise event interrupt cookie table */ -+ InitialiseCookieTable (&rail->CookieTable); -+ -+ /* Load and map the thread code */ -+ rail->ThreadCode = threadcode_elan3; -+ if (ep_loadcode (&rail->Generic, &rail->ThreadCode) != ESUCCESS) -+ goto failed; -+ -+ /* Map the command port to be visible to the Elan */ -+ ep3_ioaddr_map (&rail->Generic, EP3_COMMANDPORT_ADDR, rail->Ctxt->CommandPage, PAGESIZE, EP_PERM_WRITE); -+ rail->CommandPortAddr = EP3_COMMANDPORT_ADDR + (rail->Ctxt->CommandPort - rail->Ctxt->CommandPage); -+ -+ /* Allocate the elan visible sdram/main memory */ -+ if ((rail->RailElan = ep_alloc_elan (&rail->Generic, sizeof (EP3_RAIL_ELAN), 0, &rail->RailElanAddr)) == 0 || -+ (rail->RailMain = ep_alloc_main (&rail->Generic, sizeof (EP3_RAIL_MAIN), 0, &rail->RailMainAddr)) == 0) -+ { -+ goto failed; -+ } -+ -+ /* Allocate the system input queues at their fixed elan address */ -+ if (! (rail->QueueDescs = ep_alloc_memory_elan (&rail->Generic, EP_SYSTEM_QUEUE_BASE, PAGESIZE, EP_PERM_ALL, 0))) -+ goto failed; -+ -+ /* Initialise all queue entries to be full */ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan3_sdram_writel (rail->Device, EP_SYSTEMQ_DESC(rail->QueueDescs, i) + offsetof (EP3_InputQueue, q_state), E3_QUEUE_FULL); -+ -+ /* initialise the dma rings */ -+ if (DmaRingsCreate (rail)) -+ goto failed; -+ -+ if (InitialiseDmaRetries (rail)) -+ goto failed; -+ -+ if (ep3_init_probenetwork (rail)) -+ goto failed; -+ -+ /* can now drop the context filter for the system context */ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, FALSE, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+ -+ return 0; -+ -+ failed: -+ printk ("ep3_start_rail: failed for rail %d\n", rail->Generic.Number); -+ ep3_stop_rail (&rail->Generic); -+ -+ return -ENOMEM; -+} -+ -+void -+ep3_stall_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ int ctx; -+ unsigned long flags; -+ -+ /* raise all the context filters */ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ -+ for (ctx = ELAN3_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN3_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan3mmu_set_context_filter (rail->Device, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ for (ctx = ELAN3_DMARING_BASE_CONTEXT_NUM; ctx <= ELAN3_DMARING_TOP_CONTEXT_NUM; ctx++) -+ elan3mmu_set_context_filter (rail->Device, ctx|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ elan3mmu_set_context_filter (rail->Device, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, TRUE, 0, NULL); -+ -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_stop_rail (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ep3_destroy_probenetwork (rail); -+ -+ if (rail->DmaRetryInitialised) -+ DestroyDmaRetries (rail); -+ -+ DmaRingsRelease(rail); -+ -+ if (rail->Generic.State == EP_RAIL_STATE_RUNNING) -+ { -+ KMEM_FREE (rail->MainCookies, rail->Generic.Position.pos_nodes * sizeof (E3_uint32)); -+ -+ ep_free_elan (&rail->Generic, rail->ElanCookies, rail->Generic.Position.pos_nodes * sizeof (E3_uint32)); -+ } -+ -+ if (rail->QueueDescs) -+ ep_free_memory_elan (&rail->Generic, EP_SYSTEM_QUEUE_BASE); -+ rail->QueueDescs = 0; -+ -+ if (rail->RailMain) -+ ep_free_main (&rail->Generic, rail->RailMainAddr, sizeof (EP3_RAIL_MAIN)); -+ rail->RailMain = 0; -+ -+ if (rail->RailElan) -+ ep_free_elan (&rail->Generic, rail->RailElanAddr, sizeof (EP3_RAIL_ELAN)); -+ rail->RailElan = 0; -+ -+ ep_unloadcode (&rail->Generic, &rail->ThreadCode); -+ -+ DestroyCookieTable (&rail->CookieTable); -+ -+ ep_perrail_unmap (&rail->Generic, rail->Ctxt->CommandPage, PAGESIZE); -+ -+ kcondvar_destroy (&rail->HaltOpSleep); -+ kmutex_destroy (&rail->HaltOpMutex); -+ spin_lock_destroy (&rail->CookieLock); -+ -+ ep3_detach_rail (rail); -+} -+ -+void -+ep3_position_found (EP_RAIL *r, ELAN_POSITION *pos) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t addr; -+ -+ rail->SwitchBroadcastLevelTick = lbolt; -+ -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, NodeId), pos->pos_nodeid); -+ -+ /* Allocate Network Identify cookie state */ -+ KMEM_ZALLOC (rail->MainCookies, E3_uint32 *, pos->pos_nodes * sizeof (E3_uint32), 1); -+ -+ if (! (addr = ep_alloc_elan (&rail->Generic, pos->pos_nodes * sizeof (E3_uint32), 0, &rail->ElanCookies))) -+ panic ("ep: PositionFound: cannot allocate elan cookies array\n"); -+ -+ elan3_sdram_zeroq_sdram (rail->Device, addr, pos->pos_nodes * sizeof (E3_uint32)); -+ -+ ep3_probe_position_found (rail, pos); -+} -+ -+sdramaddr_t -+ep3_sdram_alloc (EP_RAIL *r, EP_ADDR addr, unsigned size) -+{ -+ return elan3_sdram_alloc (((EP3_RAIL *) r)->Device, size); -+} -+ -+void -+ep3_sdram_free (EP_RAIL *r, sdramaddr_t addr, unsigned size) -+{ -+ elan3_sdram_free (((EP3_RAIL *) r)->Device, addr, size); -+} -+ -+void -+ep3_sdram_writeb (EP_RAIL *r, sdramaddr_t addr, unsigned char val) -+{ -+ elan3_sdram_writeb (((EP3_RAIL *) r)->Device, addr, val); -+} -+ -+void -+ep3_flush_tlb (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->TlbLock, flags); -+ -+ IncrStat (dev, TlbFlushes); -+ -+ write_reg32 (dev, Cache_Control_Reg.ContReg, dev->Cache_Control_Reg | MMU_FLUSH); -+ mmiob (); -+ spin_unlock_irqrestore (&dev->TlbLock, flags); -+ -+ while (! (read_reg32 (dev, Cache_Control_Reg.ContReg) & MMU_FLUSHED)) -+ mb(); -+} -+ -+void -+ep3_load_system_route (EP_RAIL *r, unsigned vp, unsigned lowNode, unsigned highNode) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ -+ nflits = GenerateRoute (&rail->Generic.Position, flits, lowNode, highNode, DEFAULT_ROUTE_TIMEOUT, HIGH_ROUTE_PRIORITY); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, vp, ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ { -+ /* XXXX: whilst LoadRoute() can fail - it is not likely. */ -+ panic ("ep3_load_system_route: cannot load p2p route entry\n"); -+ } -+} -+ -+void -+ep3_load_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ E3_uint16 flits[MAX_FLITS]; -+ int nflits; -+ -+ nflits = GenerateRoute (&rail->Generic.Position, flits, nodeId, nodeId, DEFAULT_ROUTE_TIMEOUT, DEFAULT_ROUTE_PRIORITY); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, EP_VP_DATA(nodeId), EP3_CONTEXT_NUM(rail->Generic.Position.pos_nodeid), nflits, flits) != 0) -+ panic ("ep3_load_node_route: cannot load p2p data route entry\n"); -+} -+ -+void -+ep3_unload_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ClearRoute (rail->Device, rail->RouteTable, EP_VP_DATA(nodeId)); -+} -+ -+void -+ep3_lower_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, EP3_CONTEXT_NUM(nodeId), 0, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_raise_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ elan3mmu_set_context_filter (rail->Device, EP3_CONTEXT_NUM(nodeId), 1, 0, NULL); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+} -+ -+void -+ep3_node_disconnected (EP_RAIL *r, unsigned nodeId) -+{ -+ FreeStalledDmas ((EP3_RAIL *) r, nodeId); -+} -+ -+void -+ep3_fillout_stats(EP_RAIL *r, char *str) -+{ -+ /* no stats here yet */ -+ /* EP3_RAIL *ep3rail = (EP3_RAIL *)r; */ -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan3.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kcomm_elan3.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan3.h 2005-07-28 14:52:52.887672808 -0400 -@@ -0,0 +1,431 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_ELAN3_H -+#define __EP_KCOMM_ELAN3_H -+ -+#ident "@(#)$Id: kcomm_elan3.h,v 1.50.8.3 2004/12/14 10:19:14 mike Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan3.h,v $*/ -+ -+#if !defined(__ELAN3__) -+#include -+#include -+#include -+#include -+#include -+#include -+#endif /* !defined(__ELAN3__) */ -+ -+#include -+ -+/* private address allocation */ -+#define EP3_TEXT_BASE 0xFF000000 /* base address for thread code (defined in makerules.elan3) */ -+#define EP3_COMMANDPORT_ADDR 0xFFF00000 /* mapping address for elan command port */ -+ -+#define EP3_STACK_SIZE 1024 /* default thread code stack size */ -+ -+#define EP3_PACEMAKER_EVENTADDR 0xfeedbeef /* mis-aligned address used by heartbeat pacemaker */ -+ -+/* context number allocation */ -+#define EP3_CONTEXT_NUM(nodeId) ((ELAN3_KCOMM_BASE_CONTEXT_NUM + (nodeId)) | SYS_CONTEXT_BIT) -+#define EP3_CONTEXT_ISDATA(ctx) (((ctx) & MAX_ROOT_CONTEXT_MASK) >= ELAN3_KCOMM_BASE_CONTEXT_NUM && \ -+ ((ctx) & MAX_ROOT_CONTEXT_MASK) <= ELAN3_KCOMM_TOP_CONTEXT_NUM) -+#define EP3_CONTEXT_TO_NODE(ctx) (((ctx) & MAX_ROOT_CONTEXT_MASK) - ELAN3_KCOMM_BASE_CONTEXT_NUM) -+ -+/* DMA issueing rings */ -+#define EP3_RING_CRITICAL 0 -+#define EP3_RING_CRITICAL_LEN 128 -+#define EP3_RING_HIGH_PRI 1 -+#define EP3_RING_HIGH_PRI_LEN 64 -+#define EP3_RING_LOW_PRI 2 -+#define EP3_RING_LOW_PRI_LEN 32 -+#define EP3_NUM_RINGS 3 -+ -+/* Value to "return" from c_close() when envelope handled by the trap handler */ -+#define EP3_PAckStolen 4 -+ -+/* unimplemented instruction trap types for thread code */ -+#define EP3_UNIMP_TRAP_NO_DESCS 0 -+#define EP3_UNIMP_TRAP_PACKET_NACKED 1 -+#define EP3_UNIMP_THREAD_HALTED 2 -+#define EP3_NUM_UNIMP_TRAPS 3 -+ -+/* forward declarations */ -+typedef struct ep3_rail EP3_RAIL; -+ -+/* block copy elan3 inputter queue - with waitvent0 */ -+typedef struct ep3_inputqueue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_BlockCopyEvent q_event; /* queue block copy event */ -+ E3_uint32 q_pad[4]; /* pad to 64 bytes */ -+ E3_Addr q_wevent; /* WaitEvent0 struct */ -+ E3_int32 q_wcount; -+} EP3_InputQueue; -+ -+ -+#if !defined(__ELAN3__) -+ -+/* dma retries types and retry times */ -+typedef struct ep3_retry_dma -+{ -+ struct list_head Link; /* chained on free/retry list */ -+ long RetryTime; /* "lbolt" to retry at */ -+ E3_DMA_BE Dma; /* DMA (in main memory) */ -+} EP3_RETRY_DMA; -+ -+typedef struct ep3_dma_ring -+{ -+ sdramaddr_t pEvent; -+ E3_Addr epEvent; -+ -+ sdramaddr_t pDma; -+ E3_Addr epDma; -+ -+ E3_uint32 *pDoneBlk; -+ E3_Addr epDoneBlk; -+ -+ int Entries; /* number of slots in array */ -+ int Position; /* current position in array */ -+ -+ ioaddr_t CommandPort; -+ ioaddr_t CommandPage; -+ DeviceMappingHandle CommandPageHandle; -+} EP3_DMA_RING; -+ -+#define DMA_RING_EVENT(ring,n) ((ring)->pEvent + (n)*sizeof (E3_BlockCopyEvent)) -+#define DMA_RING_EVENT_ELAN(ring,n) ((ring)->epEvent + (n)*sizeof (E3_BlockCopyEvent)) -+ -+#define DMA_RING_DMA(ring,n) ((ring)->pDma + (n)*sizeof (E3_DMA)) -+#define DMA_RING_DMA_ELAN(ring,n) ((ring)->epDma + (n)*sizeof (E3_DMA)) -+ -+#define DMA_RING_DONE_ELAN(ring,n) ((ring)->epDoneBlk + (n)*sizeof (E3_uint32)) -+ -+/* Event interrupt cookie operations and lookup table */ -+typedef struct ep3_cookie_ops -+{ -+ void (*Event) (EP3_RAIL *rail, void *arg); /* called from the interrupt handler when an event is "set" */ -+ void (*DmaRetry) (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int error); /* called from the interrupt handler when a DMA is "nacked" */ -+ void (*DmaCancelled)(EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); /* called from the interrupt handler/flush disconnecting when cancelled. */ -+ void (*DmaVerify) (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma); /* called from multiple places, to check dma is consistent with state. */ -+} EP3_COOKIE_OPS; -+ -+typedef struct ep3_cookie -+{ -+ struct ep3_cookie *Next; /* Cookies are chained in hash table. */ -+ E3_uint32 Cookie; /* Cooke store in ev_Type */ -+ EP3_COOKIE_OPS *Operations; /* Cookie operations */ -+ void *Arg; /* Users arguement. */ -+} EP3_COOKIE; -+ -+#define EP3_COOKIE_HASH_SIZE (256) -+#define EP3_HASH_COOKIE(a) ((((a) >> 3) ^ ((a) >> 7) ^ ((a) >> 11)) & (EP3_COOKIE_HASH_SIZE-1)) -+ -+typedef struct ep3_cookie_table -+{ -+ spinlock_t Lock; -+ EP3_COOKIE *Entries[EP3_COOKIE_HASH_SIZE]; -+} EP3_COOKIE_TABLE; -+ -+#endif /* !defined(__ELAN3__) */ -+ -+#define EP3_EVENT_FREE ((1 << 4) | EV_WCOPY) -+#define EP3_EVENT_ACTIVE ((2 << 4) | EV_WCOPY) -+/* DONE == Cookie */ -+#define EP3_EVENT_FAILED ((3 << 4) | EV_WCOPY) -+#define EP3_EVENT_PRIVATE ((4 << 4) | EV_WCOPY) -+ -+/* The event cookie can get posted (and seen) before the write has */ -+/* hit main memory - in this case the event count is <= 0 and the block */ -+/* will be marked as ACTIVE - but could transition to DONE at any time */ -+/* Also for a word copy event, the value written into the "done" word */ -+/* can be the event interrupt cookie rather than the "source" value */ -+/* this happens since the uCode does not wait for the write to have */ -+/* occured before overwriting TMP_0 with the cookie */ -+#define EP3_EVENT_FIRING(edev, event, cookie, done) \ -+ (((((done) & ~(EV_TYPE_BCOPY | EV_TYPE_MASK_EVIRQ)) == (cookie).Cookie) || (done) == EP3_EVENT_ACTIVE) && \ -+ (int) elan3_sdram_readl (edev, (event) + offsetof (E3_BlockCopyEvent, ev_Count)) <= 0) -+#define EP3_EVENT_FIRED(cookie, done) \ -+ (((done) & ~(EV_TYPE_BCOPY | EV_TYPE_MASK_EVIRQ)) == (cookie).Cookie) -+ -+ -+/* Time limit to wait while event is firing and block write has not occured */ -+#define EP3_EVENT_FIRING_TLIMIT 16384 /* 1023 uS */ -+ -+#define EP3_INIT_COPY_EVENT(event, cookie, dest, intr) \ -+{ \ -+ (event).ev_Count = 0; \ -+ (event).ev_Type = (intr) ? EV_TYPE_BCOPY | EV_TYPE_EVIRQ | (cookie).Cookie : EV_TYPE_BCOPY; \ -+ (event).ev_Source = (cookie).Cookie | EV_WCOPY; \ -+ (event).ev_Dest = (dest) | EV_TYPE_BCOPY_WORD; \ -+} -+ -+#if !defined(__ELAN3__) -+ -+/* Generic input queues which can be polled */ -+typedef struct ep3_inputq -+{ -+ EP3_COOKIE q_cookie; -+ unsigned int q_slotSize; -+ unsigned int q_slotCount; -+ -+ void *q_slots; -+ EP_ADDR q_slotsAddr; -+ -+ EP_INPUTQ_CALLBACK *q_callback; -+ void *q_arg; -+ -+ sdramaddr_t q_desc; -+ E3_Addr q_descAddr; -+ -+ E3_Addr q_base; -+ E3_Addr q_top; -+ E3_Addr q_fptr; -+ -+ E3_uint32 q_waitCount; -+} EP3_INPUTQ; -+ -+typedef struct ep3_outputq -+{ -+ EP3_COOKIE q_cookie; -+ -+ unsigned int q_slotCount; /* # slots allocated */ -+ unsigned int q_slotSize; /* size of each slot (rounded up) */ -+ -+ sdramaddr_t q_elan; -+ E3_Addr q_elanAddr; -+ -+ void *q_main; -+ E3_Addr q_mainAddr; -+} EP3_OUTPUTQ; -+ -+#endif /* !defined(__ELAN3__) */ -+ -+/* per-rail elan memory portion of device */ -+typedef struct ep3_rail_elan -+{ -+ E3_uint16 ProbeSource0[TR_TRACEROUTE_ENTRIES]; /* 32 byte aligned */ -+ E3_uint16 ProbeSource1[TR_TRACEROUTE_ENTRIES]; -+ -+ E3_BlockCopyEvent ProbeDone; /* 16 byte aligned */ -+ E3_Event ProbeStart; /* 8 byte aligned */ -+ -+ E3_uint32 ProbeType; /* 4 byte aligned */ -+ E3_uint32 ProbeLevel; -+ -+ E3_uint32 NodeId; -+} EP3_RAIL_ELAN; -+ -+/* values for ProbeType */ -+#define PROBE_SINGLE 0 -+#define PROBE_MULTIPLE 1 -+/* number of attempts for each type */ -+#define PROBE_SINGLE_ATTEMPTS 10 -+#define PROBE_SINGLE_TIMEOUTS 5 -+#define PROBE_MULTIPLE_ATTEMPTS 20 -+#define PROBE_MULTIPLE_TIMEOUTS 10 -+ -+/* per-rail elan memory portsion of device */ -+typedef struct ep3_rail_main -+{ -+ E3_uint16 ProbeDest0[TR_TRACEROUTE_ENTRIES]; /* 32 byte aligned */ -+ E3_uint16 ProbeDest1[TR_TRACEROUTE_ENTRIES]; -+ -+ E3_uint32 ProbeDone; /* 4 byte aligned */ -+ E3_uint32 ProbeResult; -+ E3_uint32 ProbeLevel; -+} EP3_RAIL_MAIN; -+ -+#if !defined(__ELAN3__) -+ -+struct ep3_rail -+{ -+ EP_RAIL Generic; /* Generic rail */ -+ -+ ELAN3_DEV *Device; /* Elan device we're using */ -+ ELAN3_CTXT *Ctxt; /* Elan context struct */ -+ ioaddr_t CommandPort; /* commandport from context */ -+ E3_Addr CommandPortAddr; /* and address mapped into elan */ -+ -+ ELAN3_ROUTE_TABLE *RouteTable; /* routetable from context */ -+ ELAN3MMU *Elan3mmu; /* elanmmu from context */ -+ -+ EP3_COOKIE_TABLE CookieTable; /* Event cookie table */ -+ -+ EP_CODE ThreadCode; /* copy of thread code */ -+ unsigned int CommandPortEventTrap; /* flag to indicate command port eventint queue overflow trap */ -+ -+ sdramaddr_t RailElan; /* Elan visible main/sdram portions of */ -+ E3_Addr RailElanAddr; /* device structure */ -+ EP3_RAIL_MAIN *RailMain; -+ E3_Addr RailMainAddr; -+ -+ /* small system message queues */ -+ sdramaddr_t QueueDescs; /* Input Queue descriptors */ -+ -+ /* Network position prober */ -+ E3_Addr ProbeStack; /* Network position thread command structure */ -+ EP3_COOKIE ProbeCookie; /* event cookie for Done event */ -+ kcondvar_t ProbeWait; /* place to wait on probe thread */ -+ spinlock_t ProbeLock; /* and lock */ -+ volatile int ProbeDone; /* and flag to indicate it's done */ -+ -+ E3_uint16 ProbeDest0[TR_TRACEROUTE_ENTRIES]; /* last result of CheckNetworkPosition */ -+ E3_uint16 ProbeDest1[TR_TRACEROUTE_ENTRIES]; -+ E3_uint32 ProbeResult; -+ -+ long ProbeLevelTick[ELAN_MAX_LEVELS]; -+ long SwitchBroadcastLevelTick; -+ -+ /* rings for issueing dmas */ -+ EP3_DMA_RING DmaRings[EP3_NUM_RINGS]; -+ -+ /* retry lists for dmas */ -+ struct list_head DmaRetries[EP_NUM_RETRIES]; /* Dma retry lists */ -+ struct list_head DmaRetryFreeList; /* and free list */ -+ u_int DmaRetryCount; /* and total retry count */ -+ u_int DmaRetryReserved; /* and number reserved */ -+ u_int DmaRetryThreadShouldStall; /* count of reasons to stall retries */ -+ u_int DmaRetryThreadStarted:1; /* dma retry thread running */ -+ u_int DmaRetryThreadShouldStop:1; /* but should stop */ -+ u_int DmaRetryThreadStopped:1; /* and now it's stopped */ -+ u_int DmaRetryInitialised:1; /* have initialise dma retries */ -+ -+ spinlock_t DmaRetryLock; /* spinlock protecting lists */ -+ kcondvar_t DmaRetryWait; /* place retry thread sleeps */ -+ long DmaRetryTime; /* and when it will next wakeup */ -+ unsigned int DmaRetrySleeping; /* and it's sleeping there */ -+ -+ /* Network Identify Cookies */ -+ E3_uint32 *MainCookies; /* One cookie allocator per-node for main*/ -+ E3_Addr ElanCookies; /* and one for elan */ -+ spinlock_t CookieLock; /* spinlock to protect main cookies */ -+ -+ /* Halt operation flags for flushing. */ -+ kmutex_t HaltOpMutex; /* serialize access to halt operations */ -+ unsigned int HaltOpCompleted; /* flag to indicate halt operation completed */ -+ kcondvar_t HaltOpSleep; /* place to wait for it to complete */ -+ -+ /* Network error state */ -+ kcondvar_t NetworkErrorSleep; /* place to sleep for network error halt operation */ -+ u_int NetworkErrorFlushed; /* and flag to indicate flushed */ -+ -+ -+ EP3_RAIL_STATS Stats; /* statistics */ -+}; -+ -+/* support.c */ -+ -+extern ELAN3_OPS ep3_elan3_ops; -+ -+extern E3_uint32 LocalCookie (EP3_RAIL *rail, unsigned int remoteNode); -+extern E3_uint32 RemoteCookie (EP3_RAIL *rail, unsigned int remoteNode); -+ -+extern void InitialiseCookieTable (EP3_COOKIE_TABLE *table); -+extern void DestroyCookieTable (EP3_COOKIE_TABLE *table); -+extern void RegisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cookie, -+ E3_Addr event, EP3_COOKIE_OPS *ops, void *arg); -+extern void DeregisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cookie); -+extern EP3_COOKIE *LookupCookie (EP3_COOKIE_TABLE *table, uint32_t cookie); -+extern EP3_COOKIE *LookupEventCookie (EP3_RAIL *rail, EP3_COOKIE_TABLE *table, E3_Addr); -+ -+extern int DmaRingsCreate (EP3_RAIL *rail); -+extern void DmaRingsRelease (EP3_RAIL *rail); -+extern int IssueDma (EP3_RAIL *rail, E3_DMA_BE *dma, int type, int retryThread); -+ -+extern int IssueWaitevent (EP3_RAIL *rail, E3_Addr value); -+extern void IssueSetevent (EP3_RAIL *rail, E3_Addr value); -+extern void IssueRunThread (EP3_RAIL *rail, E3_Addr value); -+extern long DmaRetryTime (int type); -+extern int InitialiseDmaRetries (EP3_RAIL *rail); -+extern void DestroyDmaRetries (EP3_RAIL *rail); -+extern int ReserveDmaRetries (EP3_RAIL *rail, int count, EP_ATTRIBUTE attr); -+extern void ReleaseDmaRetries (EP3_RAIL *rail, int count); -+extern void StallDmaRetryThread (EP3_RAIL *rail); -+extern void ResumeDmaRetryThread (EP3_RAIL *rail); -+extern void QueueDmaForRetry (EP3_RAIL *rail, E3_DMA_BE *dma, int interval); -+extern void QueueDmaOnStalledList (EP3_RAIL *rail, E3_DMA_BE *dma); -+extern void FreeStalledDmas (EP3_RAIL *rail, unsigned int nodeId); -+ -+extern void SetQueueLocked(EP3_RAIL *rail, sdramaddr_t qaddr); -+ -+/* threadcode_elan3.c */ -+extern E3_Addr ep3_init_thread (ELAN3_DEV *dev, E3_Addr fn, E3_Addr addr, sdramaddr_t stack, -+ int stackSize, int nargs, ...); -+ -+/* probenetwork.c */ -+extern int ep3_init_probenetwork (EP3_RAIL *rail); -+extern void ep3_destroy_probenetwork (EP3_RAIL *rail); -+extern void ep3_probe_position_found (EP3_RAIL *rail, ELAN_POSITION *pos); -+extern int ep3_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw); -+extern int ep3_check_position (EP_RAIL *rail); -+ -+/* neterr_elan3.c */ -+extern void ep3_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* kcomm_elan3.c */ -+extern EP_RAIL *ep3_create_rail (EP_SYS *sys, ELAN3_DEV *dev); -+extern void ep3_destroy_rail (EP_RAIL *rail); -+ -+extern int ep3_start_rail (EP_RAIL *rail); -+extern void ep3_stall_rail (EP_RAIL *rail); -+extern void ep3_stop_rail (EP_RAIL *rail); -+ -+extern void ep3_position_found (EP_RAIL *rail, ELAN_POSITION *pos); -+ -+extern sdramaddr_t ep3_sdram_alloc (EP_RAIL *rail, EP_ADDR addr, unsigned int size); -+extern void ep3_sdram_free (EP_RAIL *rail, sdramaddr_t addr, unsigned int size); -+extern void ep3_sdram_writeb (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+extern void ep3_flush_tlb (EP_RAIL *r); -+extern void ep3_load_system_route (EP_RAIL *r, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+extern void ep3_load_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_unload_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_lower_filter (EP_RAIL *r, unsigned int nodeId); -+extern void ep3_raise_filter (EP_RAIL *rail, unsigned int nodeId); -+extern void ep3_node_disconnected (EP_RAIL *r, unsigned int nodeId); -+ -+extern void ep3_fillout_stats(EP_RAIL *rail, char *str); -+ -+/* kmap_elan3.c */ -+extern void ep3_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep3_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep3_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned int len, unsigned int perm); -+extern void ep3_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len); -+extern void *ep3_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages); -+extern void ep3_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private); -+extern void ep3_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm); -+extern physaddr_t ep3_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index); -+extern void ep3_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages); -+ -+/* kmsg_elan3.c */ -+extern EP_INPUTQ *ep3_alloc_inputq (EP_RAIL *r, unsigned int qnum, unsigned int slotSize, unsigned int slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg); -+extern void ep3_free_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep3_enable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep3_disable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern int ep3_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+extern EP_OUTPUTQ *ep3_alloc_outputq (EP_RAIL *r, unsigned int slotSize, unsigned int slotCount); -+extern void ep3_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q); -+extern void *ep3_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep3_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep3_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum, unsigned int size, -+ unsigned int nodeId, unsigned int qnum, unsigned int retries); -+ -+/* support_elan3.c */ -+extern void ep3_flush_filters (EP_RAIL *r); -+extern void ep3_flush_queues (EP_RAIL *r); -+ -+#endif /* !defined(__ELAN3__) */ -+ -+#endif /* __EP_KCOMM_ELAN3_H */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kcomm_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan4.c 2005-07-28 14:52:52.888672656 -0400 -@@ -0,0 +1,526 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kcomm_elan4.c,v 1.16.2.3 2004/11/30 12:02:17 mike Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan4.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "conf_linux.h" -+ -+extern EP_CODE threadcode_elan4; -+ -+unsigned int -+ep4_create_rails (EP_SYS *sys, unsigned int disabled) -+{ -+ unsigned int rmask = 0; -+ ELAN4_DEV *dev; -+ EP_RAIL *rail; -+ int i; -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ { -+ if ((dev = elan4_reference_device (i, ELAN4_STATE_STARTED)) != NULL) -+ { -+ if ((rail = ep4_create_rail (sys, dev)) == NULL) -+ elan4_dereference_device (dev); -+ else -+ { -+ if (disabled & (1 << rail->Number)) -+ printk ("%s: auto-start of device disabled by configuration\n", rail->Name); -+ else -+ ep_start_rail (rail); -+ -+ ep_procfs_rail_init(rail); -+ -+ rmask |= (1 << rail->Number); -+ } -+ } -+ } -+ -+ if (rmask) -+ qsnet_debug_alloc(); -+ -+ return rmask; -+} -+ -+EP_RAIL * -+ep4_create_rail (EP_SYS *sys, ELAN4_DEV *dev) -+{ -+ EP4_RAIL *rail; -+ int res; -+ -+ KMEM_ZALLOC (rail, EP4_RAIL *, sizeof (EP4_RAIL), 1); -+ -+ if (rail == NULL) -+ return (EP_RAIL *) NULL; -+ -+ if ((res = ep_init_rail (sys, &rail->r_generic)) != 0) -+ { -+ KMEM_FREE (rail, sizeof (EP4_RAIL)); -+ return (EP_RAIL *) NULL; -+ } -+ -+ rail->r_ctxt.ctxt_dev = dev; -+ -+ /* install our rail operations */ -+ rail->r_generic.Operations.DestroyRail = ep4_destroy_rail; -+ rail->r_generic.Operations.StartRail = ep4_start_rail; -+ rail->r_generic.Operations.StallRail = ep4_stall_rail; -+ rail->r_generic.Operations.StopRail = ep4_stop_rail; -+ -+ rail->r_generic.Operations.SdramAlloc = ep4_sdram_alloc; -+ rail->r_generic.Operations.SdramFree = ep4_sdram_free; -+ rail->r_generic.Operations.SdramWriteb = ep4_sdram_writeb; -+ -+ rail->r_generic.Operations.KaddrMap = ep4_kaddr_map; -+ rail->r_generic.Operations.SdramMap = ep4_sdram_map; -+ rail->r_generic.Operations.Unmap = ep4_unmap; -+ -+ rail->r_generic.Operations.DvmaReserve = ep4_dvma_reserve; -+ rail->r_generic.Operations.DvmaRelease = ep4_dvma_release; -+ rail->r_generic.Operations.DvmaSetPte = ep4_dvma_set_pte; -+ rail->r_generic.Operations.DvmaReadPte = ep4_dvma_read_pte; -+ rail->r_generic.Operations.DvmaUnload = ep4_dvma_unload; -+ rail->r_generic.Operations.FlushTlb = ep4_flush_tlb; -+ -+ rail->r_generic.Operations.ProbeRoute = ep4_probe_route; -+ -+ rail->r_generic.Operations.PositionFound = ep4_position_found; -+ rail->r_generic.Operations.CheckPosition = ep4_check_position; -+ rail->r_generic.Operations.NeterrFixup = ep4_neterr_fixup; -+ -+ rail->r_generic.Operations.LoadSystemRoute = ep4_load_system_route; -+ -+ rail->r_generic.Operations.LoadNodeRoute = ep4_load_node_route; -+ rail->r_generic.Operations.UnloadNodeRoute = ep4_unload_node_route; -+ rail->r_generic.Operations.LowerFilter = ep4_lower_filter; -+ rail->r_generic.Operations.RaiseFilter = ep4_raise_filter; -+ rail->r_generic.Operations.NodeDisconnected = ep4_node_disconnected; -+ -+ rail->r_generic.Operations.FlushFilters = ep4_flush_filters; -+ rail->r_generic.Operations.FlushQueues = ep4_flush_queues; -+ -+ rail->r_generic.Operations.AllocInputQ = ep4_alloc_inputq; -+ rail->r_generic.Operations.FreeInputQ = ep4_free_inputq; -+ rail->r_generic.Operations.EnableInputQ = ep4_enable_inputq; -+ rail->r_generic.Operations.DisableInputQ = ep4_disable_inputq; -+ rail->r_generic.Operations.PollInputQ = ep4_poll_inputq; -+ -+ rail->r_generic.Operations.AllocOutputQ = ep4_alloc_outputq; -+ rail->r_generic.Operations.FreeOutputQ = ep4_free_outputq; -+ rail->r_generic.Operations.OutputQMsg = ep4_outputq_msg; -+ rail->r_generic.Operations.OutputQState = ep4_outputq_state; -+ rail->r_generic.Operations.OutputQSend = ep4_outputq_send; -+ -+ rail->r_generic.Operations.FillOutStats = ep4_fillout_stats; -+ rail->r_generic.Operations.Debug = ep4_debug_rail; -+ -+ rail->r_generic.Devinfo = dev->dev_devinfo; -+ -+ printk ("%s: connected via elan4 rev%c device %d\n", rail->r_generic.Name, -+ 'a' + dev->dev_devinfo.dev_revision_id, dev->dev_instance); -+ -+ return (EP_RAIL *) rail; -+} -+ -+void -+ep4_destroy_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_dereference_device (rail->r_ctxt.ctxt_dev); -+ -+ KMEM_FREE (rail, sizeof (EP4_RAIL)); -+} -+ -+static int -+ep4_attach_rail (EP4_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned ctx; -+ -+ if (elan4_insertctxt (dev, &rail->r_ctxt, &ep4_trap_ops) != 0) -+ return -ENOMEM; -+ -+ if ((rail->r_routetable = elan4_alloc_routetable (dev, 4)) == NULL) /* 512 << 4 == 8192 entries */ -+ { -+ elan4_removectxt (dev, &rail->r_ctxt); -+ return -ENOMEM; -+ } -+ elan4_set_routetable (&rail->r_ctxt, rail->r_routetable); -+ -+ /* Attach to the kernel comms nextwork context */ -+ if (elan4_attach_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM) < 0) -+ { -+ elan4_free_routetable (dev, rail->r_routetable); -+ elan4_removectxt (dev, &rail->r_ctxt); -+ -+ return -EBUSY; -+ } -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_attach_filter (&rail->r_ctxt, ctx); -+ -+ return 0; -+} -+ -+static void -+ep4_detach_rail (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned ctx; -+ -+ elan4_detach_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_detach_filter (&rail->r_ctxt, ctx); -+ -+ if (rail->r_routetable) -+ { -+ elan4_set_routetable (&rail->r_ctxt, NULL); -+ elan4_free_routetable (dev, rail->r_routetable); -+ } -+ -+ elan4_removectxt (dev, &rail->r_ctxt); -+} -+ -+int -+ep4_start_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_InputQueue qdesc; -+ int i, res; -+ -+ if ((res = ep4_attach_rail (rail)) < 0) -+ return res; -+ -+ /* Initialise main interrupt cookie table */ -+ spin_lock_init (&rail->r_intcookie_lock); -+ for (i = 0; i < EP4_INTCOOKIE_HASH_SIZE; i++) -+ INIT_LIST_HEAD (&rail->r_intcookie_hash[i]); -+ -+ kmutex_init (&rail->r_haltop_mutex); -+ kcondvar_init (&rail->r_haltop_sleep); -+ spin_lock_init (&rail->r_haltop_lock); -+ -+ spin_lock_init (&rail->r_cookie_lock); -+ -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_EVENT]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_ATOMIC]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_SINGLE]); -+ INIT_LIST_HEAD (&rail->r_ecq_list[EP4_ECQ_MAIN]); -+ spin_lock_init (&rail->r_ecq_lock); -+ -+ ep_kthread_init (&rail->r_retry_thread); -+ INIT_LIST_HEAD (&rail->r_retry_ops); -+ -+ INIT_LIST_HEAD (&rail->r_neterr_ops); -+ -+ kmutex_init (&rail->r_flush_mutex); -+ kcondvar_init (&rail->r_flush_sleep); -+ -+ /* Allocate the elan visible sdram/main memory */ -+ if ((rail->r_elan = ep_alloc_elan (&rail->r_generic, sizeof (EP4_RAIL_ELAN), 0, &rail->r_elan_addr)) == 0 || -+ (rail->r_main = ep_alloc_main (&rail->r_generic, sizeof (EP4_RAIL_MAIN), 0, &rail->r_main_addr)) == 0) -+ { -+ goto failed; -+ } -+ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_qevents[i].ev_CountAndType), 0); -+ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_CountAndType), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ /* Allocate the system input queues at their fixed elan address */ -+ /* avoid sdram address aliasing by allocating the min sdram pagesize */ -+ if (! (rail->r_queuedescs= ep_alloc_memory_elan (&rail->r_generic, EP_SYSTEM_QUEUE_BASE, SDRAM_PAGE_SIZE, EP_PERM_ALL, 0))) -+ goto failed; -+ -+ /* Initialise the input queue descriptor as "full" with no event */ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl(qdesc.q_bptr, qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ for (i = 0; i < EP_NUM_SYSTEMQ; i++) -+ elan4_sdram_copyq_to_sdram (dev, &qdesc, EP_SYSTEMQ_DESC (rail->r_queuedescs, i), sizeof (E4_InputQueue)); -+ -+ /* Allocate the resource map for command queue mappings */ -+ if ((rail->r_ecq_rmap = ep_rmallocmap (EP4_ECQ_RMAPSIZE, "r_ecq_rmap", 1)) == NULL) -+ goto failed; -+ -+ ep_rmfree (rail->r_ecq_rmap, EP4_ECQ_TOP - EP4_ECQ_BASE, EP4_ECQ_BASE); -+ -+ /* register an interrupt cookie & allocate command queues for command queue flushing */ -+ rail->r_flush_mcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4); -+ rail->r_flush_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1); -+ -+ if (rail->r_flush_mcq == NULL || rail->r_flush_ecq == NULL) -+ goto failed; -+ -+ ep4_register_intcookie (rail, &rail->r_flush_intcookie, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event), ep4_flush_interrupt, rail); -+ -+ /* startup the retry thread */ -+ if (kernel_thread_create (ep4_retry_thread, (void *) rail) == 0) -+ goto failed; -+ ep_kthread_started (&rail->r_retry_thread); -+ -+ ep4_initialise_dma_retries (rail); -+ -+ if ((rail->r_event_ecq = ep4_alloc_ecq (rail, CQ_Size1K)) == NULL) -+ goto failed; -+ -+ rail->r_threadcode = threadcode_elan4; -+ if (ep_loadcode (&rail->r_generic, &rail->r_threadcode)) -+ goto failed; -+ -+ elan4_flush_icache (&rail->r_ctxt); -+ -+ if (ep4_probe_init (rail)) -+ goto failed; -+ -+ /* can now drop the context filter for the system context */ -+ elan4_set_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM, E4_FILTER_HIGH_PRI); -+ -+ return 0; -+ -+ failed: -+ printk ("ep4_start_rail: failed for rail '%s'\n", rail->r_generic.Name); -+ ep4_stop_rail (&rail->r_generic); -+ -+ return -ENOMEM; -+} -+ -+void -+ep4_stall_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ unsigned ctx; -+ -+ /* Raise all the context filters */ -+ elan4_set_filter (&rail->r_ctxt, ELAN4_KCOMM_CONTEXT_NUM, E4_FILTER_DISCARD_ALL); -+ -+ for (ctx = ELAN4_KCOMM_BASE_CONTEXT_NUM; ctx <= ELAN4_KCOMM_TOP_CONTEXT_NUM; ctx++) -+ elan4_set_filter (&rail->r_ctxt, ctx, E4_FILTER_DISCARD_ALL); -+} -+ -+void -+ep4_stop_rail (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ if (rail->r_generic.State == EP_RAIL_STATE_RUNNING) /* undo ep4_position_found() */ -+ { -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ EP_ADDR addr = elan4_sdram_readq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_cookies)); -+ -+ ep_free_elan (&rail->r_generic, addr, pos->pos_nodes * sizeof (E4_uint64)); -+ -+ KMEM_FREE (rail->r_cookies, pos->pos_nodes * sizeof (E4_uint64)); -+ } -+ -+ ep4_probe_destroy (rail); -+ -+ ep_unloadcode (&rail->r_generic, &rail->r_threadcode); -+ -+ if (rail->r_event_ecq) -+ ep4_free_ecq (rail, rail->r_event_ecq); -+ rail->r_event_ecq = NULL; -+ -+ ep4_finalise_dma_retries (rail); -+ -+ ep_kthread_stop (&rail->r_retry_thread); -+ ep_kthread_destroy (&rail->r_retry_thread); -+ -+ if (rail->r_flush_intcookie.int_arg) -+ ep4_deregister_intcookie (rail, &rail->r_flush_intcookie); -+ rail->r_flush_intcookie.int_arg = NULL; -+ -+ if (rail->r_flush_mcq) -+ ep4_put_ecq (rail, rail->r_flush_mcq, 4); -+ rail->r_flush_mcq = NULL; -+ -+ if (rail->r_flush_ecq) -+ ep4_put_ecq (rail, rail->r_flush_ecq, 1); -+ rail->r_flush_ecq = NULL; -+ -+ if (rail->r_ecq_rmap) -+ ep_rmfreemap (rail->r_ecq_rmap); -+ -+ if (rail->r_queuedescs) -+ ep_free_memory_elan (&rail->r_generic, EP_SYSTEM_QUEUE_BASE); -+ rail->r_queuedescs = 0; -+ -+ if (rail->r_elan) -+ ep_free_elan (&rail->r_generic, rail->r_elan_addr, sizeof (EP4_RAIL_ELAN)); -+ rail->r_elan = 0; -+ -+ if (rail->r_main) -+ ep_free_main (&rail->r_generic, rail->r_main_addr, sizeof (EP4_RAIL_MAIN)); -+ rail->r_main = NULL; -+ -+ kcondvar_destroy (&rail->r_flush_sleep); -+ kmutex_destroy (&rail->r_flush_mutex); -+ -+ spin_lock_destroy (&rail->r_ecq_lock); -+ spin_lock_destroy (&rail->r_cookie_lock); -+ -+ spin_lock_destroy (&rail->r_haltop_lock); -+ kcondvar_destroy(&rail->r_haltop_sleep); -+ kmutex_destroy (&rail->r_haltop_mutex); -+ spin_lock_destroy (&rail->r_intcookie_lock); -+ -+ ep4_detach_rail (rail); -+} -+ -+void -+ep4_position_found (EP_RAIL *r, ELAN_POSITION *pos) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ sdramaddr_t cookies; -+ EP_ADDR addr; -+ int i; -+ -+ KMEM_ZALLOC (rail->r_cookies, E4_uint64 *, pos->pos_nodes * sizeof (E4_uint64), 1); -+ -+ if (! (cookies = ep_alloc_elan (&rail->r_generic, pos->pos_nodes * sizeof (E4_uint64), 0, &addr))) -+ panic ("ep4_position_found: cannot allocate elan cookies array\n"); -+ -+ for (i = 0; i < pos->pos_nodes; i++) -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, cookies + (i * sizeof (E4_uint64)), 0); -+ -+ for (i = 0; i < pos->pos_nodes; i++) -+ rail->r_cookies[i] = 0; -+ -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_nodeid), pos->pos_nodeid); -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_cookies), addr); -+ -+ ep4_probe_position_found (rail, pos); -+} -+ -+sdramaddr_t -+ep4_sdram_alloc (EP_RAIL *r, EP_ADDR addr, unsigned size) -+{ -+ ELAN4_DEV *dev = ((EP4_RAIL *) r)->r_ctxt.ctxt_dev; -+ -+ if (size >= SDRAM_PAGE_SIZE) -+ return elan4_sdram_alloc (dev, size); -+ else -+ { -+ sdramaddr_t block = elan4_sdram_alloc (dev, SDRAM_PAGE_SIZE); -+ sdramaddr_t sdram = block + (addr & (SDRAM_PAGE_SIZE-1)); -+ -+ /* free of the portion before sdram */ -+ if (sdram > block) -+ elan4_sdram_free (dev, block, sdram - block); -+ -+ /* free of the portion after sdram */ -+ if ((block + SDRAM_PAGE_SIZE) > (sdram + size)) -+ elan4_sdram_free (dev, sdram + size, block + SDRAM_PAGE_SIZE - (sdram + size)); -+ -+ return sdram; -+ } -+} -+ -+void -+ep4_sdram_free (EP_RAIL *r, sdramaddr_t addr, unsigned size) -+{ -+ elan4_sdram_free (((EP4_RAIL *) r)->r_ctxt.ctxt_dev, addr, size); -+} -+ -+void -+ep4_sdram_writeb (EP_RAIL *r, sdramaddr_t addr, unsigned char val) -+{ -+ elan4_sdram_writeb (((EP4_RAIL *) r)->r_ctxt.ctxt_dev, addr, val); -+} -+ -+void -+ep4_flush_tlb (EP_RAIL *r) -+{ -+ elan4mmu_flush_tlb (((EP4_RAIL *) r)->r_ctxt.ctxt_dev); -+} -+ -+void -+ep4_load_system_route (EP_RAIL *r, unsigned vp, unsigned lowNode, unsigned highNode) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ -+ if (elan4_generate_route (&rail->r_generic.Position, &route, ELAN4_KCOMM_CONTEXT_NUM, -+ lowNode, highNode, FIRST_SYSTEM_PACKET | FIRST_HIGH_PRI | FIRST_TIMEOUT(3)) < 0) -+ { -+ panic ("ep4_load_system_route: generate route failed\n"); -+ /* NOTREACHED */ -+ } -+ -+ elan4_write_route (dev, rail->r_routetable, vp, &route); -+} -+ -+void -+ep4_load_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_VirtualProcessEntry route; -+ -+ if (elan4_generate_route (&rail->r_generic.Position, &route, EP4_CONTEXT_NUM(rail->r_generic.Position.pos_nodeid), -+ nodeId, nodeId, FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3)) < 0) -+ { -+ panic ("ep4_load_node_route: generate route failed\n"); -+ /* NOTREACHED */ -+ } -+ -+ elan4_write_route (dev, rail->r_routetable, EP_VP_DATA(nodeId), &route); -+} -+ -+void -+ep4_unload_node_route (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ elan4_invalidate_route (dev, rail->r_routetable, EP_VP_DATA(nodeId)); -+} -+ -+void -+ep4_lower_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_set_filter (&rail->r_ctxt, EP4_CONTEXT_NUM(nodeId), E4_FILTER_HIGH_PRI); -+} -+ -+void -+ep4_raise_filter (EP_RAIL *r, unsigned nodeId) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ elan4_set_filter (&rail->r_ctxt, EP4_CONTEXT_NUM(nodeId), E4_FILTER_DISCARD_ALL); -+} -+ -+void -+ep4_node_disconnected (EP_RAIL *r, unsigned nodeId) -+{ -+ ep4_free_stalled_dmas ((EP4_RAIL *) r, nodeId); -+} -+ -+void -+ep4_fillout_stats(EP_RAIL *r, char *str) -+{ -+ /* no stats here yet */ -+ /* EP4_RAIL *ep4rail = (EP4_RAIL *)r; */ -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan4.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kcomm_elan4.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_elan4.h 2005-07-28 14:52:52.889672504 -0400 -@@ -0,0 +1,443 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_ELAN4_H -+#define __EP_KCOMM_ELAN4_H -+ -+#ident "@(#)$Id: kcomm_elan4.h,v 1.16.2.2 2004/12/14 10:19:14 mike Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_elan4.h,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#if !defined(__elan4__) -+#include -+#endif /* ! defined(__elan4__) */ -+ -+/* private address allocation */ -+#define EP4_TEXT_BASE 0xF8000000 /* base address for thread code (defined in makerules.elan4) */ -+#define EP4_ECQ_BASE 0xFF000000 /* address space for mapping command queues */ -+#define EP4_ECQ_TOP 0xFF800000 -+ -+#define EP4_ECQ_RMAPSIZE 128 -+#define EP4_STACK_SIZE 1024 /* default thread code stack size */ -+#define EP4_MAX_LEVELS 8 /* same as ELAN_MAX_LEVELS */ -+ -+/* context number allocation */ -+#define EP4_CONTEXT_NUM(nodeId) (ELAN4_KCOMM_BASE_CONTEXT_NUM + (nodeId)) -+#define EP4_CONTEXT_ISDATA(ctx) ((ctx) >= ELAN4_KCOMM_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN4_KCOMM_TOP_CONTEXT_NUM) -+#define EP4_CONTEXT_TO_NODE(ctx) ((ctx) - ELAN4_KCOMM_BASE_CONTEXT_NUM) -+ -+/* -+ * network error cookie format: -+ * ------------------------------------------------- -+ * | unique cookie value | Remote | DMA | Location | -+ * ------------------------------------------------- -+ * [63:4] Cookie - unique cookie number -+ * [3] Thread - cookie generated by thread code -+ * [2] Remote - cookie generated by remote end -+ * [1] STEN - cookie is for a STEN packet -+ * [0] DMA - cookie is for a DMA -+ */ -+#define EP4_COOKIE_DMA (1 << 0) -+#define EP4_COOKIE_STEN (1 << 1) -+#define EP4_COOKIE_REMOTE (1 << 2) -+#define EP4_COOKIE_THREAD (1 << 3) -+#define EP4_COOKIE_INC (1ull << 4) -+ -+#define EP4_COOKIE_STRING(val) ((val) & ~(EP4_COOKIE_INC-1)) >> 4, \ -+ ((val) & EP4_COOKIE_DMA) ? ",dma" : "", \ -+ ((val) & EP4_COOKIE_REMOTE) ? ",remote" : "", \ -+ ((val) & EP4_COOKIE_THREAD) ? ",thread" : "", \ -+ ((val) & EP4_COOKIE_STEN) ? ",sten" : "" -+/* -+ * Done "word" values -+ */ -+#define EP4_STATE_FREE 0 -+#define EP4_STATE_ACTIVE 1 -+#define EP4_STATE_FINISHED 2 -+#define EP4_STATE_FAILED 3 -+#define EP4_STATE_PRIVATE 4 -+ -+#define EP4_EVENT_FIRING_TLIMIT 16384 /* 1023 uS */ -+ -+/* forward declarations */ -+typedef struct ep4_rail EP4_RAIL; -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_intcookie -+{ -+ struct list_head int_link; -+ E4_uint64 int_val; -+ void (*int_callback)(EP4_RAIL *rail, void *arg); -+ void *int_arg; -+} EP4_INTCOOKIE; -+ -+#define EP4_INTCOOKIE_HASH_SIZE 256 -+#define EP4_INTCOOKIE_HASH(a) ((((a) >> 3) ^ ((a) >> 7) ^ ((a) >> 11)) & (EP4_INTCOOKIE_HASH_SIZE-1)) -+ -+typedef struct ep4_ecq -+{ -+ struct list_head ecq_link; /* linked on r_ecq_list */ -+ ELAN4_INTOP ecq_intop; /* main interrupt op space */ -+ ELAN4_CQ *ecq_cq; /* command queue */ -+ E4_Addr ecq_addr; /* address mapped into elan */ -+ unsigned int ecq_avail; /* # dwords still available */ -+ -+ spinlock_t ecq_lock; /* spinlock for main accesses */ -+ sdramaddr_t ecq_event; /* event for flushing "event" queues */ -+ EP_ADDR ecq_event_addr; -+ struct ep4_ecq *ecq_flushcq; /* and command port to issue setevent to */ -+} EP4_ECQ; -+ -+#define EP4_ECQ_EVENT 0 /* command queues targetted by multi-blocks events */ -+#define EP4_ECQ_ATOMIC 1 /* command queues targetted by atomic store operations */ -+#define EP4_ECQ_SINGLE 2 /* command queues targetted by single word commands from main */ -+#define EP4_ECQ_MAIN 3 /* command queues targetted by multi word commands from main */ -+#define EP4_NUM_ECQ 4 -+ -+#define EP4_ECQ_Size(which) ((which) == EP4_ECQ_EVENT ? CQ_Size64K : \ -+ (which) == EP4_ECQ_ATOMIC ? CQ_Size8K : \ -+ (which) == EP4_ECQ_SINGLE ? CQ_Size1K : \ -+ (which) == EP4_ECQ_MAIN ? CQ_Size8K : \ -+ CQ_Size1K) -+ -+typedef struct ep4_dma_retry -+{ -+ struct list_head retry_link; /* chained on free/retry list */ -+ unsigned long retry_time; /* "lbolt" to retry at */ -+ E4_DMA retry_dma; /* DMA (in main memory) */ -+} EP4_DMA_RETRY; -+ -+#define EP4_DMA_RETRY_CQSIZE CQ_Size8K /* size of command queue for dma retry */ -+#define EP4_DMA_RETRY_FLOWCNT (CQ_Size(EP4_DMA_RETRY_CQSIZE)/72) /* # of reissued DMA's which can fit in */ -+ -+typedef struct ep4_inputq -+{ -+ EP4_INTCOOKIE q_intcookie; -+ unsigned int q_slotSize; -+ unsigned int q_slotCount; -+ -+ void *q_slots; -+ EP_ADDR q_slotsAddr; -+ -+ EP_INPUTQ_CALLBACK *q_callback; -+ void *q_arg; -+ -+ sdramaddr_t q_desc; -+ EP_ADDR q_descAddr; -+ EP_ADDR q_eventAddr; -+ EP4_ECQ *q_wcq; /* command queue to issue waitevent to */ -+ EP4_ECQ *q_ecq; /* command queue targetted by event to generate interrupt */ -+ -+ EP_ADDR q_fptr; /* cached current front pointer */ -+ EP_ADDR q_last; /* elan addr for last queue slot */ -+ -+ atomic_t q_fired; /* atomic flag that interrupt received */ -+ unsigned int q_count; /* count of slots consumed */ -+} EP4_INPUTQ; -+ -+typedef struct ep4_outputq -+{ -+ spinlock_t q_lock; -+ unsigned int q_slotCount; -+ unsigned int q_slotSize; -+ unsigned int q_dwords; -+ ELAN4_CQ *q_cq; -+ void *q_main; -+ EP_ADDR q_mainAddr; -+ unsigned int q_retries; -+} EP4_OUTPUTQ; -+ -+#endif /* ! defined(__elan4__) */ -+ -+typedef struct ep4_check_sten -+{ -+ E4_uint64 c_reset_event_cmd; /* WRITEDWORD to reset start event */ -+ E4_uint64 c_reset_event_value; -+ -+ E4_uint64 c_open; /* OPEN VP_PROBE(lvl) */ -+ E4_uint64 c_trans_traceroute0; /* SENDTRANS TR_TRACEROUTE 0s */ -+ E4_uint64 c_addr_traceroute0; -+ E4_uint64 c_data_traceroute0[8]; -+ E4_uint64 c_trans_traceroute1; /* SENDTRANS TR_TRACEROUTE 1s */ -+ E4_uint64 c_addr_traceroute1; -+ E4_uint64 c_data_traceroute1[8]; -+ E4_uint64 c_trans_sendack; /* SENDTRANS SENDACK */ -+ E4_uint64 c_addr_sendack; -+ -+ E4_uint64 c_guard_ok; /* GUARD OK - write level */ -+ E4_uint64 c_writedword_ok; -+ E4_uint64 c_value_ok; -+ -+ E4_uint64 c_guard_fail; /* GUARD FAIL - chain setevent/write fail */ -+ E4_uint64 c_setevent_fail; -+ E4_uint64 c_setevent_nop; -+ E4_uint64 c_nop_pad; -+} EP4_CHECK_STEN; -+ -+#define EP4_CHECK_STEN_NDWORDS (sizeof (EP4_CHECK_STEN) >> 3) -+ -+typedef struct ep4_rail_elan -+{ -+ EP4_CHECK_STEN r_check_sten[EP4_MAX_LEVELS]; -+ E4_Event32 r_check_fail; /* Check failed (== r_check_start[-1]) */ -+ E4_Event32 r_check_start[EP4_MAX_LEVELS]; -+ -+ E4_Event32 r_qevents[EP_NUM_SYSTEMQ]; -+ E4_Event32 r_flush_event; -+ -+ E4_uint64 r_nodeid; -+#ifdef __elan4__ -+ E4_uint64 *r_cookies; -+#else -+ E4_Addr r_cookies; -+#endif -+} EP4_RAIL_ELAN; -+ -+#define TRACEROUTE_ENTRIES 16 /* 2 * ELAN_MAX_LEVELS */ -+#define TRACEROUTE_NDWORDS (TRACEROUTE_ENTRIES/2) -+ -+typedef struct ep4_rail_main -+{ -+ E4_uint32 r_probe_dest0[TRACEROUTE_ENTRIES]; -+ E4_uint32 r_probe_dest1[TRACEROUTE_ENTRIES]; -+ E4_uint64 r_probe_result; -+ E4_uint64 r_probe_level; -+ -+ E4_uint64 r_dma_flowcnt; /* count of dma's queued */ -+} EP4_RAIL_MAIN; -+ -+#define EP4_PROBE_ACTIVE (0xffff) -+#define EP4_PROBE_FAILED (0xfffe) -+ -+#if !defined(__elan4__) -+ -+typedef struct ep4_retry_ops -+{ -+ struct list_head op_link; -+ unsigned long (*op_func)(EP4_RAIL *rail, void *arg, unsigned long nextRunTime); -+ void *op_arg; -+} EP4_RETRY_OPS; -+ -+typedef struct ep4_neterr_ops -+{ -+ struct list_head op_link; -+ void (*op_func) (EP4_RAIL *rail, void *arg, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ void *op_arg; -+} EP4_NETERR_OPS; -+ -+struct ep4_rail -+{ -+ EP_RAIL r_generic; -+ ELAN4_CTXT r_ctxt; -+ ELAN4_ROUTE_TABLE *r_routetable; -+ -+ spinlock_t r_intcookie_lock; -+ struct list_head r_intcookie_hash[EP4_INTCOOKIE_HASH_SIZE]; -+ -+ sdramaddr_t r_elan; -+ EP_ADDR r_elan_addr; -+ EP4_RAIL_MAIN *r_main; -+ EP_ADDR r_main_addr; -+ -+ EP_CODE r_threadcode; /* copy of thread code */ -+ -+ sdramaddr_t r_queuedescs; /* systemq queue descriptors */ -+ -+ E4_uint64 *r_cookies; /* network error cookies */ -+ spinlock_t r_cookie_lock; /* and spin lock */ -+ -+ kcondvar_t r_probe_wait; /* network position probing */ -+ spinlock_t r_probe_lock; -+ volatile int r_probe_done; -+ EP4_INTCOOKIE r_probe_intcookie; -+ EP4_ECQ *r_probe_cq; -+ E4_uint32 r_probe_source0[TRACEROUTE_ENTRIES]; -+ E4_uint32 r_probe_source1[TRACEROUTE_ENTRIES]; -+ -+ kmutex_t r_haltop_mutex; /* halt/flush operations */ -+ ELAN4_HALTOP r_haltop; -+ ELAN4_DMA_FLUSHOP r_flushop; -+ kcondvar_t r_haltop_sleep; -+ spinlock_t r_haltop_lock; -+ -+ struct list_head r_ecq_list[EP4_NUM_ECQ]; /* list of statically allocated command queues */ -+ EP_RMAP *r_ecq_rmap; /* resource map for command queue mappings */ -+ spinlock_t r_ecq_lock; /* spinlock for list/space management */ -+ -+ kmutex_t r_flush_mutex; /* serialize command queue flushing */ -+ unsigned long r_flush_count; /* # setevents issued for flushing */ -+ EP4_ECQ *r_flush_mcq; /* and command queue for waitevent */ -+ EP4_ECQ *r_flush_ecq; /* and command queue for interrupt */ -+ EP4_INTCOOKIE r_flush_intcookie; /* and interrupt cookie */ -+ kcondvar_t r_flush_sleep; /* and place to sleep ... */ -+ -+ EP_KTHREAD r_retry_thread; /* retry thread */ -+ struct list_head r_retry_ops; /* list of retry operations */ -+ -+ EP4_RETRY_OPS r_dma_ops; /* dma retry operations */ -+ EP4_ECQ *r_dma_ecq; /* command queue to reissue DMAs */ -+ E4_uint64 r_dma_flowcnt; /* count of dma's reissued */ -+ struct list_head r_dma_retrylist[EP_NUM_RETRIES]; /* retry lists */ -+ struct list_head r_dma_freelist; /* and free list */ -+ spinlock_t r_dma_lock; /* and spinlock to protect lists */ -+ unsigned long r_dma_allocated; /* # retries allocated*/ -+ unsigned long r_dma_reserved; /* # retries reserved */ -+ -+ EP4_ECQ *r_event_ecq; /* command queue for occasional setevents */ -+ -+ struct list_head r_neterr_ops; /* list of neterr fixup operations */ -+ -+ ELAN4_IPROC_TRAP r_iproc_trap; -+ ELAN4_TPROC_TRAP r_tproc_trap; -+} ; -+ -+#define EP4_CTXT_TO_RAIL(ctxt) ((EP4_RAIL *) (((unsigned long) (ctxt)) - offsetof (EP4_RAIL, r_ctxt))) -+ -+#if defined(DEBUG_ASSERT) -+#define EP4_ASSERT(rail,EXPR) EP_ASSERT(&((rail)->r_generic), EXPR) -+#define EP4_SDRAM_ASSERT(rail,off,value) EP4_ASSERT(rail, (sdram_assert ? elan4_sdram_readq ((rail)->r_ctxt.ctxt_dev, (off)) == (value) : 1)) -+#else -+#define EP4_ASSERT(rail,EXPR) -+#define EP4_SDRAM_ASSERT(rail,off,value) -+#endif -+ -+/* kcomm_elan4.c */ -+extern EP_RAIL *ep4_create_rail (EP_SYS *sys, ELAN4_DEV *dev); -+extern void ep4_destroy_rail (EP_RAIL *rail); -+ -+extern int ep4_start_rail (EP_RAIL *rail); -+extern void ep4_stall_rail (EP_RAIL *rail); -+extern void ep4_stop_rail (EP_RAIL *rail); -+ -+extern void ep4_debug_rail (EP_RAIL *rail); -+ -+extern void ep4_position_found (EP_RAIL *rail, ELAN_POSITION *pos); -+ -+extern sdramaddr_t ep4_sdram_alloc (EP_RAIL *rail, EP_ADDR addr, unsigned int size); -+extern void ep4_sdram_free (EP_RAIL *rail, sdramaddr_t addr, unsigned int size); -+extern void ep4_sdram_writeb (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+extern void ep4_flush_tlb (EP_RAIL *r); -+extern void ep4_load_system_route (EP_RAIL *r, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+extern void ep4_load_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_unload_node_route (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_lower_filter (EP_RAIL *r, unsigned int nodeId); -+extern void ep4_raise_filter (EP_RAIL *rail, unsigned int nodeId); -+extern void ep4_node_disconnected (EP_RAIL *r, unsigned int nodeId); -+ -+/* kmap_elan4.c */ -+extern void ep4_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep4_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr); -+extern void ep4_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned int len, unsigned int perm); -+extern void ep4_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len); -+extern void *ep4_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages); -+extern void ep4_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private); -+extern void ep4_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm); -+extern physaddr_t ep4_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index); -+extern void ep4_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages); -+ -+/* kmsg_elan4.c */ -+extern EP_INPUTQ *ep4_alloc_inputq (EP_RAIL *r, unsigned int qnum, unsigned int slotSize, unsigned int slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg); -+extern void ep4_free_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep4_enable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern void ep4_disable_inputq (EP_RAIL *r, EP_INPUTQ *q); -+extern int ep4_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+extern EP_OUTPUTQ *ep4_alloc_outputq (EP_RAIL *r, unsigned int slotSize, unsigned int slotCount); -+extern void ep4_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q); -+extern void *ep4_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep4_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum); -+extern int ep4_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned int slotNum, unsigned int size, -+ unsigned int nodeId, unsigned int qnum, unsigned int retries); -+ -+/* probenetwork_elan4.c */ -+extern int ep4_probe_init (EP4_RAIL *r); -+extern void ep4_probe_destroy (EP4_RAIL *r); -+extern void ep4_probe_position_found (EP4_RAIL *rail, ELAN_POSITION *pos); -+extern int ep4_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw); -+extern int ep4_check_position (EP_RAIL *rail); -+ -+/* support_elan4.c */ -+extern ELAN4_TRAP_OPS ep4_trap_ops; -+extern void ep4_register_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp, E4_uint64 cookie, void (*callback)(EP4_RAIL *r, void *arg), void *arg); -+extern void ep4_deregister_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp); -+extern EP4_INTCOOKIE *ep4_lookup_intcookie (EP4_RAIL *rail, E4_uint64 cookie); -+extern E4_uint64 ep4_neterr_cookie (EP4_RAIL *rail, unsigned int node); -+ -+extern void ep4_flush_filters (EP_RAIL *r); -+extern void ep4_flush_queues (EP_RAIL *r); -+extern void ep4_write_qdesc (EP4_RAIL *rail, sdramaddr_t qaddr, E4_InputQueue *qdesc); -+ -+extern EP4_ECQ *ep4_alloc_ecq (EP4_RAIL *rail, unsigned int cqsize); -+extern void ep4_free_ecq (EP4_RAIL *rail, EP4_ECQ *ecq); -+extern EP4_ECQ *ep4_get_ecq (EP4_RAIL *rail, unsigned int which, unsigned int ndwords); -+extern void ep4_put_ecq (EP4_RAIL *rail, EP4_ECQ *ecq, unsigned int ndwords); -+ -+extern void ep4_nop_cmd (EP4_ECQ *ecq, E4_uint64 tag); -+extern void ep4_set_event_cmd (EP4_ECQ *ecq, E4_Addr event); -+extern void ep4_wait_event_cmd (EP4_ECQ *ecq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1); -+ -+extern void ep4_flush_interrupt (EP4_RAIL *rail, void *arg); -+extern void ep4_flush_ecqs (EP4_RAIL *rail); -+ -+extern void ep4_init_thread (EP4_RAIL *rail, E4_ThreadRegs *regs, sdramaddr_t stackTop, -+ EP_ADDR stackAddr, E4_Addr startpc, int nargs,...); -+ -+extern void ep4_initialise_dma_retries (EP4_RAIL *rail); -+extern void ep4_finalise_dma_retries (EP4_RAIL *rail); -+extern int ep4_reserve_dma_retries (EP4_RAIL *rail, unsigned int count, unsigned int attr); -+extern void ep4_release_dma_retries(EP4_RAIL *rail, unsigned int count); -+extern void ep4_queue_dma_retry (EP4_RAIL *rail, E4_DMA *dma, int interval); -+extern void ep4_queue_dma_stalled (EP4_RAIL *rail, E4_DMA *dma); -+extern void ep4_free_stalled_dmas (EP4_RAIL *rail, unsigned int nodeId); -+extern void ep4_display_rail (EP4_RAIL *rail); -+ -+extern void ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_retry_thread (EP4_RAIL *rail); -+ -+/* neterr_elan4.c */ -+extern void ep4_add_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops); -+extern void ep4_remove_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops); -+extern void ep4_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+/* commands_elan4.c */ -+extern void elan4_nop_cmd (ELAN4_CQ *cq, E4_uint64 tag); -+extern void elan4_write_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data); -+extern void elan4_add_dword_cmd (ELAN4_CQ *cq, E4_Addr addr, E4_uint64 data); -+extern void elan4_copy64_cmd (ELAN4_CQ *cq, E4_Addr from, E4_Addr to, E4_uint32 datatype); -+extern void elan4_interrupt_cmd (ELAN4_CQ *cq, E4_uint64 cookie); -+extern void elan4_run_thread_cmd (ELAN4_CQ *cq, E4_ThreadRegs *regs); -+extern void elan4_run_dma_cmd (ELAN4_CQ *cq, E4_DMA *dma); -+extern void elan4_set_event_cmd (ELAN4_CQ *cq, E4_Addr event); -+extern void elan4_set_eventn_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint32 count); -+extern void elan4_wait_event_cmd (ELAN4_CQ *cq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1); -+extern void elan4_open_packet (ELAN4_CQ *cq, E4_uint64 command); -+extern void elan4_guard (ELAN4_CQ *cq, E4_uint64 command); -+extern void elan4_sendtrans0 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr); -+extern void elan4_sendtrans1 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0); -+extern void elan4_sendtrans2 (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 p0, E4_uint64 p1); -+extern void elan4_sendtransn (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, ...); -+extern void elan4_sendtransp (ELAN4_CQ *cq, E4_uint16 trtype, E4_uint64 addr, E4_uint64 *ptr); -+ -+extern void ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops); -+extern void ep4_retry_thread (EP4_RAIL *rail); -+ -+extern void ep4_fillout_stats(EP_RAIL *rail, char *str); -+ -+#endif /* ! defined(__elan4__) */ -+ -+#endif /* __EP_KCOMM_ELAN4_H */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_vp.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kcomm_vp.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kcomm_vp.h 2005-07-28 14:52:52.889672504 -0400 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_KCOMM_VP_H -+#define __EP_KCOMM_VP_H -+ -+#ident "@(#)$Id: kcomm_vp.h,v 1.2 2004/03/24 11:32:56 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_vp.h,v $*/ -+ -+#define EP_MAX_NODES 2048 /* Max nodes we support */ -+ -+/* virtual process allocation */ -+#define EP_VP_NODE_BASE (0) -+#define EP_VP_DATA_BASE (EP_VP_NODE_BASE + EP_MAX_NODES) -+#define EP_VP_PROBE_BASE (EP_VP_DATA_BASE + EP_MAX_NODES) -+#define EP_VP_PROBE_COUNT ELAN_MAX_LEVELS -+ -+#define EP_VP_BCAST_BASE (EP_VP_PROBE_BASE + EP_VP_PROBE_COUNT) -+#define EP_VP_BCAST_COUNT (CM_SGMTS_PER_LEVEL * (CM_MAX_LEVELS - 1) + 1) -+ -+#define EP_VP_NODE(nodeId) (EP_VP_NODE_BASE + (nodeId)) -+#define EP_VP_DATA(nodeId) (EP_VP_DATA_BASE + (nodeId)) -+#define EP_VP_PROBE(lvl) (EP_VP_PROBE_BASE + (lvl)) -+#define EP_VP_BCAST(lvl,sgmt) (EP_VP_BCAST_BASE + ((lvl) - 1)*CM_SGMTS_PER_LEVEL + (sgmt)) -+ -+#define EP_VP_TO_NODE(vp) ((vp) & (EP_MAX_NODES-1)) -+#define EP_VP_ISDATA(vp) ((vp) >= EP_VP_DATA_BASE && (vp) < (EP_VP_DATA_BASE + EP_MAX_NODES)) -+ -+#endif /* __EP_KCOMM_VP_H */ -+ -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kmap.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kmap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kmap.c 2005-07-28 14:52:52.890672352 -0400 -@@ -0,0 +1,561 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap.c,v 1.10.6.2 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "debug.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+void -+ep_perrail_kaddr_map (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t kaddr, unsigned long len, unsigned int perm, int ep_attr) -+{ -+ rail->Operations.KaddrMap (rail, eaddr, kaddr, len, perm, ep_attr); -+} -+ -+void -+ep_perrail_sdram_map (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned long len, unsigned int perm, int ep_attr) -+{ -+ rail->Operations.SdramMap (rail, eaddr, saddr, len, perm, ep_attr); -+} -+ -+void -+ep_perrail_unmap (EP_RAIL *rail, EP_ADDR eaddr, unsigned long len) -+{ -+ rail->Operations.Unmap (rail, eaddr, len); -+} -+ -+void -+ep_perrail_dvma_sync (EP_RAIL *rail) -+{ -+ if (rail->TlbFlushRequired) -+ { -+ rail->TlbFlushRequired = 0; -+ -+ rail->Operations.FlushTlb (rail); -+ } -+} -+ -+ -+static int ep_dvma_map_rails (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+static uint16_t ep_dvma_calc_check_sum (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum); -+#endif -+ -+EP_NMH_OPS ep_dvma_nmh_ops = -+{ -+ ep_dvma_map_rails, -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ ep_dvma_calc_check_sum -+#endif -+}; -+ -+extern void -+ep_dvma_init (EP_SYS *sys) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ -+ kmutex_init (&d->dvma_lock); -+ -+ INIT_LIST_HEAD (&d->dvma_handles); -+ INIT_LIST_HEAD (&d->dvma_rails); -+ -+ d->dvma_rmap = ep_rmallocmap (EP_DVMA_RMAP_SIZE, "dvma_rmap", 1); -+ -+ ep_rmfree (d->dvma_rmap, EP_DVMA_TOP - EP_DVMA_BASE, EP_DVMA_BASE); -+} -+ -+extern void -+ep_dvma_fini (EP_SYS *sys) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ -+ ep_rmfreemap (d->dvma_rmap); -+ -+ kmutex_destroy (&d->dvma_lock); -+} -+ -+extern int -+ep_dvma_add_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_RAIL_ENTRY *l; -+ struct list_head *el; -+ -+ KMEM_ZALLOC (l, EP_RAIL_ENTRY *, sizeof (EP_RAIL_ENTRY), 1); -+ -+ if (l == NULL) -+ return (ENOMEM); -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ l->Rail = rail; -+ -+ list_add_tail (&l->Link, &d->dvma_rails); -+ -+ list_for_each (el, &d->dvma_handles) { -+ EP_DVMA_NMH *desc = list_entry (el, EP_DVMA_NMH, dvma_link); -+ int npages = desc->dvma_nmh.nmh_nmd.nmd_len >> PAGESHIFT; -+ -+ desc->dvma_rails[rail->Number] = rail; -+ desc->dvma_railmask |= ( 1 << rail->Number); -+ -+ desc->dvma_private[rail->Number] = rail->Operations.DvmaReserve (rail, desc->dvma_nmh.nmh_nmd.nmd_addr, npages); -+ } -+ -+ kmutex_unlock (&d->dvma_lock); -+ return (0); -+} -+ -+extern void -+ep_dvma_remove_rail (EP_SYS *sys, EP_RAIL *rail) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ struct list_head *el; -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ list_for_each (el, &d->dvma_handles) { -+ EP_DVMA_NMH *desc = list_entry (el, EP_DVMA_NMH, dvma_link); -+ int npages = desc->dvma_nmh.nmh_nmd.nmd_len >> PAGESHIFT; -+ -+ desc->dvma_rails[rail->Number] = NULL; -+ desc->dvma_railmask &= ~(1 << rail->Number); -+ -+ rail->Operations.DvmaRelease (rail, desc->dvma_nmh.nmh_nmd.nmd_addr, npages, desc->dvma_private[rail->Number]); -+ } -+ -+ list_for_each (el, &d->dvma_rails) { -+ EP_RAIL_ENTRY *tmp = list_entry (el, EP_RAIL_ENTRY, Link); -+ -+ if (tmp->Rail == rail) -+ { -+ list_del (el); -+ -+ KMEM_FREE (tmp, sizeof (EP_RAIL_ENTRY)); -+ break; -+ } -+ } -+ kmutex_unlock (&d->dvma_lock); -+} -+ -+EP_NMH * -+ep_dvma_reserve (EP_SYS *sys, unsigned npages, unsigned perm) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_DVMA_NMH *desc; -+ EP_ADDR addr; -+ struct list_head *el; -+ int i; -+ -+ KMEM_ZALLOC (desc, EP_DVMA_NMH *, offsetof (EP_DVMA_NMH, dvma_attrs[npages]), 1); -+ -+ if (desc == NULL) -+ return (NULL); -+ -+ if ((addr = ep_rmalloc (d->dvma_rmap, npages << PAGESHIFT, 0)) == 0) -+ { -+ -+ KMEM_FREE (desc, sizeof (EP_DVMA_NMH)); -+ return (NULL); -+ } -+ -+ spin_lock_init (&desc->dvma_lock); -+ -+ desc->dvma_perm = perm; -+ -+ kmutex_lock (&d->dvma_lock); -+ /* reserve the mapping resource */ -+ list_for_each (el, &d->dvma_rails) { -+ EP_RAIL *rail = list_entry (el, EP_RAIL_ENTRY, Link)->Rail; -+ -+ EPRINTF4 (DBG_KMAP, "%s: ep_dvma_reserve desc=%p npages=%d rail=%p\n", rail->Name, desc, npages, rail); -+ -+ if ((desc->dvma_private[rail->Number] = rail->Operations.DvmaReserve (rail, addr, npages)) == NULL) -+ { -+ printk ("%s: !!ep_dvma_reserve - rail->DvmaReserve failed\n", rail->Name); -+ goto failed; -+ } -+ -+ desc->dvma_rails[rail->Number] = rail; -+ desc->dvma_railmask |= (1 << rail->Number); -+ } -+ -+ /* insert into the network mapping handle table */ -+ desc->dvma_nmh.nmh_nmd.nmd_addr = addr; -+ desc->dvma_nmh.nmh_nmd.nmd_len = npages << PAGESHIFT; -+ desc->dvma_nmh.nmh_nmd.nmd_attr = EP_NMD_ATTR (sys->Position.pos_nodeid, 0); -+ desc->dvma_nmh.nmh_ops = &ep_dvma_nmh_ops; -+ -+ ep_nmh_insert (&sys->MappingTable, &desc->dvma_nmh); -+ -+ list_add (&desc->dvma_link, &d->dvma_handles); -+ -+ kmutex_unlock (&d->dvma_lock); -+ -+ return (&desc->dvma_nmh); -+ -+ failed: -+ -+ kmutex_unlock (&d->dvma_lock); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (desc->dvma_rails[i] != NULL) -+ desc->dvma_rails[i]->Operations.DvmaRelease (desc->dvma_rails[i], addr, npages, desc->dvma_private[i]); -+ -+ ep_rmfree (d->dvma_rmap, npages << PAGESHIFT, addr); -+ -+ KMEM_FREE (desc, sizeof (EP_DVMA_NMH)); -+ return (NULL); -+} -+ -+void -+ep_dvma_release (EP_SYS *sys, EP_NMH *nmh) -+{ -+ EP_DVMA_STATE *d = &sys->DvmaState; -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ EP_ADDR addr = nmh->nmh_nmd.nmd_addr; -+ int npages = nmh->nmh_nmd.nmd_len >> PAGESHIFT; -+ EP_RAIL *rail; -+ int i; -+ -+ kmutex_lock (&d->dvma_lock); -+ -+ list_del (&desc->dvma_link); -+ -+ ep_nmh_remove (&sys->MappingTable, nmh); -+ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if ((rail = desc->dvma_rails[i]) != NULL) -+ rail->Operations.DvmaRelease (rail, addr, npages, desc->dvma_private[i]); -+ -+ ep_rmfree (d->dvma_rmap, npages << PAGESHIFT, addr); -+ -+ KMEM_FREE (desc, offsetof (EP_DVMA_NMH, dvma_attrs[npages])); -+ -+ kmutex_unlock (&d->dvma_lock); -+} -+ -+void -+ep_dvma_load (EP_SYS *sys, void *map, caddr_t vaddr, unsigned len, EP_NMH *nmh, unsigned index, EP_RAILMASK *hints, EP_NMD *subset) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = (unsigned long) vaddr & PAGEOFFSET; -+ unsigned npages = btopr (len + offset); -+ EP_ADDR addr = nmh->nmh_nmd.nmd_addr + (index << PAGESHIFT); -+ int rmask = *hints; -+ EP_RAIL *rail; -+ register int i, rnum; -+ unsigned long flags; -+ -+ EPRINTF7 (DBG_KMAP, "ep_dvma_load: map=%p vaddr=%p len=%x nmh=%p(%x,%x) index=%d\n", -+ map, vaddr, len, nmh, nmh->nmh_nmd.nmd_addr, nmh->nmh_nmd.nmd_len, index); -+ -+ /* If no rail specified, then map into all rails */ -+ if (rmask == 0) -+ rmask = desc->dvma_railmask; -+ -+ ASSERT ((index + npages) <= (nmh->nmh_nmd.nmd_len >> PAGESHIFT)); -+ -+ /* If not map specified then use the kernel map */ -+ if (map == NULL) -+ map = kernel_map; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ /* Now map each of the specified pages (backwards) */ -+ -+ vaddr = (vaddr - offset) + (npages-1)*PAGESIZE; -+ for (i = npages-1; i >= 0; i--, vaddr -= PAGESIZE) -+ { -+ physaddr_t paddr = vaddr_to_phys (map, vaddr); -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ { -+ if (! (rmask & (1 << rnum)) || (rail = desc->dvma_rails[rnum]) == NULL) -+ rmask &= ~(1 << rnum); -+ else -+ { -+ rail->Operations.DvmaSetPte (rail, desc->dvma_private[rnum], index + i, paddr, desc->dvma_perm); -+ -+ desc->dvma_attrs[index + i] |= (1 << rnum); -+ } -+ } -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((rmask & (1 << rnum)) && (rail = desc->dvma_rails[rnum]) != NULL) -+ rail->TlbFlushRequired = 1; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ /* Construct the network mapping handle to be returned. */ -+ subset->nmd_addr = addr + offset; -+ subset->nmd_len = len; -+ subset->nmd_attr = EP_NMD_ATTR(sys->Position.pos_nodeid, rmask); -+} -+ -+void -+ep_dvma_unload (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ EP_RAIL *rail; -+ int rnum; -+ int rmask; -+ register int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ /* compute which rails we need to unload on */ -+ for (rmask = 0, i = 0; i < npages; i++) -+ { -+ rmask |= desc->dvma_attrs[index + i]; -+ -+ desc->dvma_attrs[index + i] = 0; -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((rmask & (1 << rnum)) && (rail = desc->dvma_rails[rnum]) != NULL) -+ rail->Operations.DvmaUnload (rail, desc->dvma_private[rnum], index, npages); -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+} -+ -+int -+ep_dvma_map_rails (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask) -+{ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ int r, rnum; -+ register int i; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_map_rails: nmd=%08x.%08x.%08x mask=%04x\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, mask); -+ -+ if ((mask &= desc->dvma_railmask) == 0) -+ { -+ printk ("ep_dvma_map_rails: no intersecting rails %04x.%04x\n", mask, desc->dvma_railmask); -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ return (-1); -+ } -+ -+ for (i = npages-1; i >= 0; i--) -+ { -+ int pgidx = (index + i); -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (desc->dvma_attrs[pgidx] & (1 << rnum)) -+ break; -+ -+ if (rnum == EP_MAX_RAILS) -+ { -+ EPRINTF3 (DBG_KMAP, "ep_dvma_map_rails: nmh=%p idx=%x [%08x] not ptes valid\n", nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + ((pgidx) << PAGESHIFT)); -+ mask = 0; -+ } -+ else -+ { -+ EP_RAIL *rail = desc->dvma_rails[rnum]; -+ physaddr_t paddr = rail->Operations.DvmaReadPte (rail, desc->dvma_private[rnum], pgidx); -+ -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_map_rails: nmh=%p idx=%x [%08x] paddr %llx\n", rail->Name, nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), (long long) paddr); -+ -+ for (r = 0; r < EP_MAX_RAILS; r++) -+ { -+ if ((mask & (1 << r)) == 0) -+ continue; -+ -+ if ((desc->dvma_attrs[pgidx] & (1 << r)) == 0) -+ { -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_map_rails: nmh=%p idx=%x [%08x] paddr=%llx\n", -+ desc->dvma_rails[rnum]->Name, nmh, pgidx, nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), -+ (long long) paddr); -+ -+ rail->Operations.DvmaSetPte (rail, desc->dvma_private[rnum], pgidx, paddr, desc->dvma_perm); -+ -+ desc->dvma_attrs[pgidx] |= (1 << r); -+ } -+ } -+ } -+ } -+ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if ((mask & (1 << rnum)) != 0) -+ desc->dvma_rails[rnum]->TlbFlushRequired = 1; -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_map_rails: nmd=%08x.%08x.%08x|%04x\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, mask); -+ -+ /* Finally update the network memory descriptor */ -+ nmd->nmd_attr |= mask; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ return (0); -+} -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+#include -+ -+/* Generic rolling checksum algorithm */ -+uint16_t -+rolling_check_sum (char *msg, int nob, uint16_t sum) -+{ -+ while (nob-- > 0) -+ sum = sum * 13 + *msg++; -+ -+ return (sum); -+} -+ -+#if ! defined(NO_RMAP) -+void -+unmap_phys_address(unsigned long phys_addr) -+{ -+ unsigned long pfn = (phys_addr >> PAGE_SHIFT); -+ -+ if (pfn_valid(pfn)) -+ kunmap(pfn_to_page(pfn)); -+} -+ -+void * -+map_phys_address(unsigned long phys_addr) -+{ -+ unsigned long pfn = (phys_addr >> PAGE_SHIFT); -+ -+ if (pfn_valid(pfn)) -+ return kmap(pfn_to_page(pfn)); -+ -+ return NULL; -+} -+#else -+void -+unmap_phys_address(unsigned long phys_addr) -+{ -+ struct page *p = virt_to_page(__va(phys_addr)); -+ -+ if (VALID_PAGE(p)) -+ kunmap(p); -+} -+ -+void * -+map_phys_address(unsigned long phys_addr) -+{ -+ struct page *p = virt_to_page(__va(phys_addr)); -+ -+ if (VALID_PAGE(p)) -+ return kmap(p); -+ -+ return NULL; -+} -+#endif -+ -+uint16_t -+ep_dvma_calc_check_sum (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum) -+{ -+ /* cant be called from an interupt */ -+ -+ EP_DVMA_NMH *desc = (EP_DVMA_NMH *) nmh; -+ unsigned offset = nmd->nmd_addr & PAGEOFFSET; -+ unsigned npages = btopr (nmd->nmd_len + offset); -+ unsigned index = (nmd->nmd_addr - nmh->nmh_nmd.nmd_addr) >> PAGESHIFT; -+ unsigned start, len; -+ int rnum; -+ register int i; -+ unsigned long flags; -+ EP_RAIL *rail; -+ -+ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ -+ EPRINTF3 (DBG_KMAP, "ep_dvma_calc_check_sum: nmd=%08x.%08x.%08x \n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ -+ /* find a rail */ -+ for (rnum = 0; rnum < EP_MAX_RAILS; rnum++) -+ if (desc->dvma_attrs[index] & (1 << rnum)) -+ break; -+ -+ ASSERT (rnum != EP_MAX_RAILS); -+ -+ rail = desc->dvma_rails[rnum]; -+ -+ for (i = 0; i <= (npages-1); i++) -+ { -+ int pgidx = (index + i); -+ physaddr_t paddr = rail->Operations.DvmaReadPte (rail, desc->dvma_private[rnum], pgidx); -+ void * virt; -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); /* unlock for check sum calc */ -+ -+ virt = map_phys_address(paddr); -+ -+ if (!virt) -+ printk("ep_dvma_calc_check_sum: virt = NULL ! \n"); -+ else { -+ if ( i == 0 ) { -+ /* last bit of the first page */ -+ start = (nmd->nmd_addr & (PAGESIZE - 1)) ; -+ len = PAGESIZE - start; -+ if ( len > nmd->nmd_len) /* less than the remaining page */ -+ len = nmd->nmd_len; -+ } else { -+ if ( i != (npages-1)) { -+ /* all of the middle pages */ -+ start = 0; -+ len = PAGESIZE; -+ } else { -+ /* first bit of the last page */ -+ start = 0; -+ len = ((nmd->nmd_addr + nmd->nmd_len -1) & (PAGESIZE -1)) +1; -+ } -+ } -+ -+ check_sum = rolling_check_sum (((char *)virt)+start, len, check_sum); -+ unmap_phys_address(paddr); -+ -+ /* re aquire the lock */ -+ spin_lock_irqsave (&desc->dvma_lock, flags); -+ } -+ -+ EPRINTF5 (DBG_KMAP, "%s: ep_dvma_calc_check_sum: nmh=%p idx=%x [%08x] paddr %llx\n", rail->Name, nmh, pgidx, -+ nmh->nmh_nmd.nmd_addr + (pgidx << PAGESHIFT), (long long) paddr); -+ } -+ -+ EPRINTF4 (DBG_KMAP, "ep_dvma_calc_check_sum: nmd=%08x.%08x.%08x = %d\n", nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr, check_sum); -+ -+ spin_unlock_irqrestore (&desc->dvma_lock, flags); -+ -+ return (check_sum); -+} -+#endif -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kmap_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kmap_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kmap_elan3.c 2005-07-28 14:52:52.891672200 -0400 -@@ -0,0 +1,209 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap_elan3.c,v 1.3.8.1 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap_elan3.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "kcomm_elan3.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+#define ELAN3_PTES_PER_PAGE (PAGESIZE/ELAN3_PAGE_SIZE) -+ -+#if defined(__LITTLE_ENDIAN__) -+#define PERM_ENDIAN 0 -+#else -+#define PERM_ENDIAN ELAN3_PTE_BIG_ENDIAN -+#endif -+ -+static unsigned int main_permtable[] = -+{ -+ ELAN3_PERM_REMOTEALL, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int sdram_permtable[] = -+{ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEALL, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int io_permtable[] = -+{ -+ ELAN3_PERM_LOCAL_READ, /* EP_PERM_EXECUTE */ -+ ELAN3_PERM_REMOTEREAD, /* EP_PERM_READ */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_WRITE */ -+ ELAN3_PERM_REMOTEWRITE, /* EP_PERM_ALL */ -+}; -+ -+void -+ep3_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned len, unsigned int perm, int ep_attr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (kaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) kaddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr + off, paddr + off, -+ main_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC | ((ep_attr & EP_NO_SLEEP) ? PTE_NO_SLEEP : 0)); -+ -+ eaddr += PAGESIZE; -+ kaddr += PAGESIZE; -+ } -+} -+ -+void -+ep3_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned len, unsigned int perm, int ep_attr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (saddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = elan3_sdram_to_phys (rail->Device, saddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr+off, paddr+off, -+ sdram_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC | ((ep_attr & EP_NO_SLEEP) ? PTE_NO_SLEEP : 0) ); -+ -+ eaddr += PAGESIZE; -+ saddr += PAGESIZE; -+ } -+} -+ -+void -+ep3_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned len, unsigned int perm) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ unsigned npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (ioaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) ioaddr); -+ -+ for (off = 0; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ elan3mmu_pteload (rail->Elan3mmu, PTBL_LEVEL_3, eaddr + off, paddr + off, -+ io_permtable[perm], PTE_LOAD_LOCK | PTE_LOAD_NOSYNC); -+ -+ eaddr += PAGESIZE; -+ ioaddr += PAGESIZE; -+ } -+} -+void -+ep3_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned len) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ elan3mmu_unload (rail->Elan3mmu, eaddr, len, PTE_UNLOAD_UNLOCK | PTE_UNLOAD_NOSYNC); -+} -+ -+void * -+ep3_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned npages) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ void *private; -+ -+ KMEM_ALLOC (private, void *, npages * ELAN3_PTES_PER_PAGE * sizeof (sdramaddr_t), 1); -+ -+ if (private == NULL) -+ return NULL; -+ -+ elan3mmu_reserve (rail->Elan3mmu, eaddr, npages * ELAN3_PTES_PER_PAGE, (sdramaddr_t *) private); -+ -+ return private; -+} -+ -+void -+ep3_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned npages, void *private) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ -+ elan3mmu_release (rail->Elan3mmu, eaddr, npages * ELAN3_PTES_PER_PAGE, (sdramaddr_t *) private); -+ -+ KMEM_FREE (private, npages * ELAN3_PTES_PER_PAGE * sizeof (sdramaddr_t)); -+} -+ -+void -+ep3_dvma_set_pte (EP_RAIL *r, void *private, unsigned index, physaddr_t paddr, unsigned int perm) -+{ -+ ELAN3_DEV *dev = ((EP3_RAIL *) r)->Device; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ int off; -+ -+ for (off =0 ; off < PAGESIZE; off += ELAN3_PAGE_SIZE) -+ { -+ ELAN3_PTE newpte = elan3mmu_phys_to_pte (dev, paddr + off, main_permtable[perm]) | ELAN3_PTE_REF | ELAN3_PTE_MOD; -+ -+ elan3_writepte (dev, *ptep, newpte); -+ -+ ptep++; -+ } -+} -+ -+physaddr_t -+ep3_dvma_read_pte (EP_RAIL *r, void *private, unsigned index) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ ELAN3_PTE pte = elan3_readpte (rail->Device, *ptep); -+ -+ return pte & ELAN3_PTE_PFN_MASK; -+} -+ -+void -+ep3_dvma_unload (EP_RAIL *r, void *private, unsigned index, unsigned npages) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ sdramaddr_t *ptep = &((sdramaddr_t *) private)[index * ELAN3_PTES_PER_PAGE]; -+ ELAN3_PTE tpte = elan3mmu_kernel_invalid_pte (rail->Elan3mmu); -+ int i; -+ -+ for (i = (npages * ELAN3_PTES_PER_PAGE) - 1; i >= 0; i--) -+ elan3_writepte (rail->Device, ptep[i], tpte); -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kmap_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kmap_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kmap_elan4.c 2005-07-28 14:52:52.892672048 -0400 -@@ -0,0 +1,226 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmap_elan4.c,v 1.7.8.3 2005/03/18 13:54:01 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+#include "kcomm_elan4.h" -+ -+#if defined(DIGITAL_UNIX) -+# define kernel_map (first_task->map) -+# define vaddr_to_phys(map, addr) (pmap_extract (vm_map_pmap ((vm_map_t) map), (unsigned long) addr)) -+#elif defined(LINUX) -+# define kernel_map get_kern_mm() -+# define vaddr_to_phys(map, addr) (kmem_to_phys(addr)) -+#elif defined(SOLARIS) -+# define kernel_map &kas -+# define vaddr_to_phys(map,addr) ptob(hat_getpfnum (((struct as *) map)->a_hat, (caddr_t) addr)) -+#endif -+ -+static unsigned int main_permtable[] = -+{ -+ PERM_Unused, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_DataReadWrite, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int sdram_permtable[] = -+{ -+ PERM_LocExecute, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_RemoteAll, /* EP_PERM_ALL */ -+}; -+ -+static unsigned int io_permtable[] = -+{ -+ PERM_Unused, /* EP_PERM_EXECUTE */ -+ PERM_RemoteReadOnly, /* EP_PERM_READ */ -+ PERM_DataReadWrite, /* EP_PERM_WRITE */ -+ PERM_Unused, /* EP_PERM_ALL */ -+}; -+ -+void -+ep4_kaddr_map (EP_RAIL *r, EP_ADDR eaddr, virtaddr_t kaddr, unsigned int len, unsigned int perm, int ep_attr) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (kaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) kaddr); -+ -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, main_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ kaddr += PAGESIZE; -+ } -+} -+ -+void -+ep4_sdram_map (EP_RAIL *r, EP_ADDR eaddr, sdramaddr_t saddr, unsigned int len, unsigned int perm, int ep_attr) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (saddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ if ((eaddr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT)) != (saddr & (SDRAM_PGOFF_OFFSET << PAGE_SHIFT))) -+ printk ("ep4_sdram_map: eaddr=%x saddr=%lx - incorrectly alised\n", eaddr, saddr); -+ -+ for (i = 0; i < npages; i++) -+ { -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = ((saddr + off) >> PTE_PADDR_SHIFT) | PTE_SetPerm (sdram_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ saddr += PAGESIZE; -+ } -+} -+ -+void -+ep4_ioaddr_map (EP_RAIL *r, EP_ADDR eaddr, ioaddr_t ioaddr, unsigned int len, unsigned int perm) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int npages = len >> PAGESHIFT; -+ int i; -+ unsigned int off; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (ioaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ for (i = 0; i < npages; i++) -+ { -+ physaddr_t paddr = vaddr_to_phys (kernel_map, (void *) ioaddr); -+ -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, io_permtable[perm]); -+ -+ elan4mmu_pteload (&rail->r_ctxt, 0, eaddr + off, newpte); -+ } -+ -+ eaddr += PAGESIZE; -+ ioaddr += PAGESIZE; -+ } -+} -+void -+ep4_unmap (EP_RAIL *r, EP_ADDR eaddr, unsigned int len) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ ASSERT ((eaddr & PAGEOFFSET) == 0 && (len & PAGEOFFSET) == 0); -+ -+ elan4mmu_unload_range (&rail->r_ctxt, 0, eaddr, len); -+} -+ -+void * -+ep4_dvma_reserve (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_reserve: eaddr=%x npages=%d (=> %d)\n", eaddr, npages, (npages << (PAGE_SHIFT - dev->dev_pageshift[0]))); -+ -+ return elan4mmu_reserve (&rail->r_ctxt, 0, (E4_Addr) eaddr, (npages << (PAGE_SHIFT - dev->dev_pageshift[0])), 1); -+} -+ -+void -+ep4_dvma_release (EP_RAIL *r, EP_ADDR eaddr, unsigned int npages, void *private) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_release: eaddr=%x npages=%d private=%p\n", eaddr, npages, private); -+ -+ elan4mmu_release (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private); -+} -+ -+void -+ep4_dvma_set_pte (EP_RAIL *r, void *private, unsigned int index, physaddr_t paddr, unsigned int perm) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ unsigned int off; -+ unsigned long flags; -+ -+ EPRINTF3 (DBG_KMAP, "ep4_dvma_set_pte: index %x -> eaddr %llx paddr %llx\n", -+ index, ((ELAN4_HASH_CACHE *) private)->hc_start + (index * PAGE_SIZE), (long long) paddr); -+ -+ local_irq_save (flags); -+ for (off = 0; off < PAGESIZE; off += (1 << dev->dev_pageshift[0])) -+ { -+ E4_uint64 newpte = elan4mmu_phys2pte (dev, paddr + off, main_permtable[perm]); -+ -+ elan4mmu_set_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, (index << (PAGE_SHIFT - dev->dev_pageshift[0])) + -+ (off >> dev->dev_pageshift[0]), newpte); -+ } -+ local_irq_restore (flags); -+} -+ -+physaddr_t -+ep4_dvma_read_pte (EP_RAIL *r, void *private, unsigned int index) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ E4_uint64 pte; -+ unsigned long flags; -+ -+ local_irq_save (flags); -+ pte = elan4mmu_get_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, index << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ local_irq_restore (flags); -+ -+ return elan4mmu_pte2phys (dev, pte); -+} -+ -+void -+ep4_dvma_unload (EP_RAIL *r, void *private, unsigned int index, unsigned int npages) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP_ADDR eaddr = ((ELAN4_HASH_CACHE *) private)->hc_start + (index * PAGE_SIZE); -+ unsigned long idx = (index << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ unsigned long lim = idx + (npages << (PAGE_SHIFT - dev->dev_pageshift[0])); -+ unsigned long flags; -+ -+ EPRINTF5 (DBG_KMAP, "ep4_dvma_unload: eaddr %x -> %lx : index=%d idx=%ld lim=%ld\n", -+ eaddr, (unsigned long)(eaddr + (npages * PAGE_SIZE)), index, idx, lim); -+ -+ local_irq_save (flags); -+ for (; idx < lim; idx++) -+ elan4mmu_clear_pte (&rail->r_ctxt, (ELAN4_HASH_CACHE *) private, idx); -+ local_irq_restore (flags); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kmsg_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kmsg_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kmsg_elan3.c 2005-07-28 14:52:52.892672048 -0400 -@@ -0,0 +1,345 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmsg_elan3.c,v 1.3.8.1 2004/09/30 09:52:37 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+static void -+ep3_inputq_event (EP3_RAIL *rail, void *arg) -+{ -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) arg; -+ -+ (*inputq->q_callback)((EP_RAIL *)rail, inputq->q_arg); -+} -+ -+static EP3_COOKIE_OPS ep3_inputq_cookie_ops = -+{ -+ ep3_inputq_event, -+}; -+ -+EP_INPUTQ * -+ep3_alloc_inputq (EP_RAIL *r, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq; -+ EP3_InputQueue qdesc; -+ void *slots; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (inputq, EP3_INPUTQ *, sizeof (EP3_INPUTQ), TRUE); -+ -+ if (inputq == NULL) -+ return (EP_INPUTQ *) NULL; -+ -+ if ((slots = ep_alloc_main (&rail->Generic, slotSize * slotCount, 0, &inputq->q_slotsAddr)) == NULL) -+ { -+ KMEM_FREE (inputq, sizeof (EP3_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ inputq->q_slotSize = slotSize; -+ inputq->q_slotCount = slotCount; -+ inputq->q_callback = callback; -+ inputq->q_arg = arg; -+ inputq->q_slots = slots; -+ -+ /* Initialise all the slots to be "unreceived" */ -+ for (i = 0; i < slotCount; i++) -+ ((uint32_t *) ((unsigned long) slots + (i+1) * slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ inputq->q_base = inputq->q_slotsAddr; -+ inputq->q_top = inputq->q_base + (slotCount-1) * slotSize; -+ inputq->q_fptr = inputq->q_base; -+ inputq->q_desc = EP_SYSTEMQ_DESC(rail->QueueDescs, qnum); -+ inputq->q_descAddr = EP_SYSTEMQ_ADDR (qnum); -+ -+ if (callback) -+ RegisterCookie (&rail->CookieTable, &inputq->q_cookie, inputq->q_descAddr, &ep3_inputq_cookie_ops, inputq); -+ -+ /* Initialise the input queue descriptor */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_bptr = inputq->q_base + slotSize; -+ qdesc.q_fptr = inputq->q_fptr; -+ qdesc.q_base = inputq->q_base; -+ qdesc.q_top = inputq->q_top; -+ qdesc.q_size = slotSize; -+ qdesc.q_event.ev_Count = 1; -+ qdesc.q_event.ev_Type = callback ? EV_TYPE_EVIRQ | inputq->q_cookie.Cookie : 0; -+ qdesc.q_wevent = inputq->q_descAddr + offsetof (EP3_InputQueue, q_event); -+ qdesc.q_wcount = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, inputq->q_desc, sizeof (EP3_InputQueue)); -+ -+ return (EP_INPUTQ *) inputq; -+} -+ -+void -+ep3_free_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ -+ ep_free_main (&rail->Generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ if (inputq->q_callback) -+ DeregisterCookie (&rail->CookieTable, &inputq->q_cookie); -+ -+ KMEM_FREE (inputq, sizeof (EP3_INPUTQ)); -+} -+ -+void -+ep3_enable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ -+ elan3_sdram_writel (rail->Device, inputq->q_desc + offsetof (EP3_InputQueue, q_state), 0); -+} -+ -+void -+ep3_disable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ EP3_InputQueue qdesc; -+ -+ /* mark the queue as locked */ -+ SetQueueLocked (rail, inputq->q_desc); -+ -+ /* re-initialise the queue as empty */ -+ qdesc.q_state = E3_QUEUE_FULL; -+ qdesc.q_bptr = (E3_Addr) inputq->q_base + inputq->q_slotSize; -+ qdesc.q_fptr = inputq->q_fptr; -+ qdesc.q_base = inputq->q_base; -+ qdesc.q_top = inputq->q_top; -+ qdesc.q_size = inputq->q_slotSize; -+ qdesc.q_event.ev_Count = 1; -+ qdesc.q_event.ev_Type = inputq->q_callback ? EV_TYPE_EVIRQ | inputq->q_cookie.Cookie : 0; -+ qdesc.q_wevent = inputq->q_descAddr + offsetof (EP3_InputQueue, q_event); -+ qdesc.q_wcount = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ elan3_sdram_copyl_to_sdram (rail->Device, &qdesc, inputq->q_desc, sizeof (EP3_InputQueue)); -+} -+ -+int -+ep3_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_INPUTQ *inputq = (EP3_INPUTQ *) q; -+ sdramaddr_t qdesc = inputq->q_desc; -+ E3_Addr nfptr; -+ int count = 0; -+ E3_uint32 state; -+ int delay; -+ -+ run_again_because_of_eventqueue_overflow: -+ nfptr = inputq->q_fptr + inputq->q_slotSize; -+ if (nfptr > inputq->q_top) -+ nfptr = inputq->q_base; -+ -+ while (nfptr != elan3_sdram_readl (rail->Device, qdesc + offsetof (EP3_InputQueue, q_bptr))) /* PCI read */ -+ { -+ unsigned long slot = (unsigned long) inputq->q_slots + (nfptr - inputq->q_base); -+ -+ /* Poll the final word of the message until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; ((uint32_t *) (slot + inputq->q_slotSize))[-1] == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ /* Call the message handler */ -+ (*handler) (r, arg, (void *) slot); -+ -+ state = elan3_sdram_readl (rail->Device, qdesc + offsetof (EP3_InputQueue, q_state)); /* PCI read */ -+ if ((state & E3_QUEUE_FULL) == 0) -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_fptr), nfptr); /* PCI write */ -+ else -+ { -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_fptr), nfptr); /* PCI write */ -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_state), (state & ~E3_QUEUE_FULL)); /* PCI write */ -+ } -+ inputq->q_fptr = nfptr; -+ -+ nfptr += roundup (inputq->q_slotSize, E3_BLK_ALIGN); -+ if (nfptr > inputq->q_top) -+ nfptr = inputq->q_base; -+ -+ if (++count >= maxCount && maxCount) -+ break; -+ } -+ -+ if (inputq->q_callback && count != 0) -+ { -+ if (count != inputq->q_waitCount) -+ elan3_sdram_writel (rail->Device, qdesc + offsetof (EP3_InputQueue, q_wcount), inputq->q_waitCount = count); -+ -+ if (IssueWaitevent (rail, inputq->q_descAddr + offsetof (EP3_InputQueue, q_wevent)) == ISSUE_COMMAND_TRAPPED) -+ goto run_again_because_of_eventqueue_overflow; -+ } -+ -+ return count; -+} -+ -+#define Q_EVENT(q,slotNum) ((q)->q_elan + (slotNum) * sizeof (E3_BlockCopyEvent)) -+#define Q_EVENT_ADDR(q,slotNum) ((q)->q_elanAddr + (slotNum) * sizeof (E3_BlockCopyEvent)) -+#define Q_MSG(q,slotNum) (void *)((q)->q_main + (slotNum) * (q)->q_slotSize) -+#define Q_MSG_ADDR(q,slotNum) ((q)->q_mainAddr + (slotNum) * (q)->q_slotSize) -+#define Q_DONE(q,slotNum) (*((int *)((q)->q_main + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E3_uint32)))) -+#define Q_DONE_ADDR(q,slotNum) ((q)->q_mainAddr + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E3_uint32)) -+ -+#define Q_ELAN_SIZE(q) ((q)->q_slotCount * sizeof (E3_BlockCopyEvent)) -+#define Q_MAIN_SIZE(q) ((q)->q_slotCount * ((q)->q_slotSize + sizeof (E3_uint32))) -+ -+static void -+ep3_outputq_retry (EP3_RAIL *rail, void *arg, E3_DMA_BE *dma, int error) -+{ -+ E3_DMA_BE *dmabe = (E3_DMA_BE *) dma; -+ sdramaddr_t event = ep_elan2sdram (&rail->Generic, dmabe->s.dma_srcEvent); -+ E3_Addr done = elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Dest)); -+ E3_uint32 *donep = ep_elan2main (&rail->Generic, done & ~EV_BCOPY_DTYPE_MASK); -+ -+ EPRINTF1 (DBG_KMSG, "ep3_ouputq_retry: donep at %p -> FAILED\n", donep); -+ -+ *donep = EP3_EVENT_FAILED; -+} -+ -+static EP3_COOKIE_OPS ep3_outputq_cookie_ops = -+{ -+ NULL, /* Event */ -+ ep3_outputq_retry, -+ NULL, /* DmaCancelled */ -+ NULL, /* DmaVerify */ -+}; -+ -+EP_OUTPUTQ * -+ep3_alloc_outputq (EP_RAIL *r, unsigned slotSize, unsigned slotCount) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq; -+ int i; -+ E3_BlockCopyEvent event; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (outputq, EP3_OUTPUTQ *, sizeof (EP3_OUTPUTQ), 1); -+ -+ if (outputq == NULL) -+ return NULL; -+ -+ outputq->q_slotCount = slotCount; -+ outputq->q_slotSize = slotSize; -+ -+ outputq->q_elan = ep_alloc_elan (r, Q_ELAN_SIZE(outputq), 0, &outputq->q_elanAddr); -+ -+ if (outputq->q_elan == (sdramaddr_t) 0) -+ { -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+ return NULL; -+ } -+ -+ outputq->q_main = ep_alloc_main (r, Q_MAIN_SIZE(outputq), 0, &outputq->q_mainAddr); -+ -+ if (outputq->q_main == (void *) NULL) -+ { -+ ep_free_elan (r, outputq->q_elanAddr, Q_ELAN_SIZE(outputq)); -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+ return NULL; -+ } -+ -+ RegisterCookie (&rail->CookieTable, &outputq->q_cookie, outputq->q_elanAddr, &ep3_outputq_cookie_ops, outputq); -+ -+ for (i = 0; i < slotCount; i++) -+ { -+ EP3_INIT_COPY_EVENT (event, outputq->q_cookie, Q_DONE_ADDR(outputq, i), 0); -+ -+ Q_DONE(outputq, i) = outputq->q_cookie.Cookie; -+ -+ elan3_sdram_copyl_to_sdram (rail->Device, &event, Q_EVENT(outputq, i), sizeof (E3_BlockCopyEvent)); -+ } -+ -+ return (EP_OUTPUTQ *) outputq; -+} -+ -+void -+ep3_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq = (EP3_OUTPUTQ *) q; -+ -+ DeregisterCookie (&rail->CookieTable, &outputq->q_cookie); -+ -+ ep_free_main (r, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ ep_free_elan (r, outputq->q_elanAddr, Q_ELAN_SIZE(outputq)); -+ -+ KMEM_FREE (outputq, sizeof (EP3_OUTPUTQ)); -+} -+ -+void * -+ep3_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ return Q_MSG ((EP3_OUTPUTQ *) q, slotNum); -+} -+ -+int -+ep3_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ switch (Q_DONE((EP3_OUTPUTQ *) q, slotNum)) -+ { -+ case EP3_EVENT_ACTIVE: -+ return EP_OUTPUTQ_BUSY; -+ -+ case EP3_EVENT_FAILED: -+ return EP_OUTPUTQ_FAILED; -+ -+ default: -+ return EP_OUTPUTQ_FINISHED; -+ } -+} -+ -+int -+ep3_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_OUTPUTQ *outputq = (EP3_OUTPUTQ *) q; -+ unsigned base = outputq->q_slotSize - roundup (size, E3_BLK_ALIGN); -+ E3_DMA_BE dmabe; -+ -+ dmabe.s.dma_type = E3_DMA_TYPE(DMA_BYTE, DMA_WRITE, DMA_QUEUED, retries); -+ dmabe.s.dma_size = roundup (size, E3_BLK_ALIGN); -+ dmabe.s.dma_source = Q_MSG_ADDR(outputq, slotNum) + base; -+ dmabe.s.dma_dest = base; -+ dmabe.s.dma_destEvent = EP_SYSTEMQ_ADDR(qnum); -+ dmabe.s.dma_destCookieVProc = vp; -+ dmabe.s.dma_srcEvent = Q_EVENT_ADDR(outputq, slotNum); -+ dmabe.s.dma_srcCookieVProc = 0; -+ -+ Q_DONE(outputq, slotNum) = EP3_EVENT_ACTIVE; -+ -+ elan3_sdram_writel (rail->Device, Q_EVENT(outputq, slotNum), 1); -+ -+ if (IssueDma (rail, &dmabe, EP_RETRY_CRITICAL, FALSE) != ISSUE_COMMAND_OK) -+ { -+ Q_DONE(outputq, slotNum) = EP3_EVENT_FAILED; -+ return FALSE; -+ } -+ -+ return TRUE; -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kmsg_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kmsg_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kmsg_elan4.c 2005-07-28 14:52:52.893671896 -0400 -@@ -0,0 +1,418 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kmsg_elan4.c,v 1.8.6.2 2005/02/28 14:06:56 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "debug.h" -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+ -+#include -+ -+static void -+ep4_inputq_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) arg; -+ -+ /* mark the queue as "fired" to cause a single waitevent -+ * to be issued next time the queue is polled */ -+ atomic_inc (&inputq->q_fired); -+ -+ (*inputq->q_callback)(&rail->r_generic, inputq->q_arg); -+} -+ -+EP_INPUTQ * -+ep4_alloc_inputq (EP_RAIL *r, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ EP_INPUTQ_CALLBACK *callback, void *arg) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq; -+ E4_Event32 qevent; -+ void *slots; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (inputq, EP4_INPUTQ *, sizeof (EP4_INPUTQ), 1); -+ -+ if (inputq == NULL) -+ return (EP_INPUTQ *) NULL; -+ -+ if ((slots = ep_alloc_main (&rail->r_generic, slotSize * slotCount, 0, &inputq->q_slotsAddr)) == NULL) -+ { -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ inputq->q_slotSize = slotSize; -+ inputq->q_slotCount = slotCount; -+ inputq->q_callback = callback; -+ inputq->q_arg = arg; -+ inputq->q_slots = slots; -+ -+ /* Initialise all the slots to be "unreceived" */ -+ for (i = 0; i < slotCount; i++) -+ ((uint32_t *) ((unsigned long) slots + (i+1) * slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ inputq->q_last = inputq->q_slotsAddr + (slotCount-1) * slotSize; -+ inputq->q_fptr = inputq->q_slotsAddr; -+ inputq->q_desc = EP_SYSTEMQ_DESC (rail->r_queuedescs, qnum); -+ inputq->q_descAddr = EP_SYSTEMQ_ADDR (qnum); -+ inputq->q_eventAddr = rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_qevents[qnum]); -+ -+ if (callback) -+ { -+ if ((inputq->q_ecq = ep4_get_ecq (rail, EP4_ECQ_EVENT, 1)) == 0) -+ { -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ if ((inputq->q_wcq = ep4_get_ecq (rail, EP4_ECQ_MAIN, 4)) == 0) -+ { -+ ep4_put_ecq (rail, inputq->q_ecq, 1); -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+ return (EP_INPUTQ *) NULL; -+ } -+ -+ ep4_register_intcookie (rail, &inputq->q_intcookie, inputq->q_descAddr, ep4_inputq_interrupt, inputq); -+ -+ inputq->q_count = 0; -+ -+ atomic_set (&inputq->q_fired, 0); -+ -+ /* Initialise the queue event */ -+ qevent.ev_CountAndType = E4_EVENT_INIT_VALUE (callback ? -32 : 0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0); -+ qevent.ev_WritePtr = inputq->q_ecq->ecq_addr; -+ qevent.ev_WriteValue = (inputq->q_intcookie.int_val << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD; -+ } -+ -+ /* copy the event down to sdram */ -+ elan4_sdram_copyq_to_sdram (rail->r_ctxt.ctxt_dev, &qevent, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_qevents[qnum]), sizeof (E4_Event32)); -+ -+ return (EP_INPUTQ *) inputq; -+} -+ -+void -+ep4_free_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ -+ ep_free_main (&rail->r_generic, inputq->q_slotsAddr, inputq->q_slotSize * inputq->q_slotCount); -+ -+ if (inputq->q_callback) -+ { -+ ep4_deregister_intcookie (rail, &inputq->q_intcookie); -+ ep4_put_ecq (rail, inputq->q_ecq, 1); -+ ep4_put_ecq (rail, inputq->q_wcq, 4); -+ } -+ -+ KMEM_FREE (inputq, sizeof (EP4_INPUTQ)); -+} -+ -+void -+ep4_enable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ EP_ADDR lastSlot = inputq->q_slotsAddr + (inputq->q_slotCount-1) * inputq->q_slotSize; -+ E4_InputQueue qdesc; -+ -+ qdesc.q_bptr = inputq->q_slotsAddr; -+ qdesc.q_fptr = inputq->q_slotsAddr; -+ qdesc.q_control = E4_InputQueueControl (inputq->q_slotsAddr, lastSlot, inputq->q_slotSize); -+ qdesc.q_event = inputq->q_callback ? inputq->q_eventAddr : 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ ep4_write_qdesc (rail, inputq->q_desc, &qdesc); -+ -+ EPRINTF5 (DBG_KMSG, "ep_enable_inputq: %x - %016llx %016llx %016llx %016llx\n", (int) inputq->q_descAddr, -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 0), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 8), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 16), -+ elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq->q_desc + 24)); -+} -+ -+void -+ep4_disable_inputq (EP_RAIL *r, EP_INPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ E4_InputQueue qdesc; -+ -+ /* Initialise the input queue descriptor as "full" with no event */ -+ qdesc.q_bptr = 0; -+ qdesc.q_fptr = 8; -+ qdesc.q_control = E4_InputQueueControl(qdesc.q_bptr, qdesc.q_fptr, 8); -+ qdesc.q_event = 0; -+ -+ /* copy the queue descriptor down to sdram */ -+ ep4_write_qdesc (rail, inputq->q_desc, &qdesc); -+} -+ -+int -+ep4_poll_inputq (EP_RAIL *r, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ EP4_INPUTQ *inputq = (EP4_INPUTQ *) q; -+ sdramaddr_t qdesc = inputq->q_desc; -+ E4_Addr fptr = inputq->q_fptr; -+ E4_Addr bptr = elan4_sdram_readl (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ int count = 0; -+ int delay; -+ -+ while (bptr != 0 && fptr != bptr) -+ { -+ while (fptr != bptr) -+ { -+ unsigned long slot = (unsigned long) inputq->q_slots + (fptr - inputq->q_slotsAddr); -+ -+ /* Poll the final word of the message until the message has completely -+ * arrived in main memory. */ -+ for (delay = 1; ((uint32_t *) (slot + inputq->q_slotSize))[-1] == EP_SYSTEMQ_UNRECEIVED && delay < EP_SYSTEMQ_UNRECEIVED_TLIMIT; delay <<= 1) -+ DELAY (delay); -+ -+ EPRINTF4(DBG_KMSG, "ep4_poll_inputq: %x slot %d of %d [%08x]\n", (int)inputq->q_descAddr, -+ ((int)(fptr - inputq->q_slotsAddr))/inputq->q_slotSize, -+ inputq->q_slotCount, ((uint32_t *) (slot + inputq->q_slotSize))[-1]); -+ -+ /* Call the message handler */ -+ (*handler) (r, arg, (void *) slot); -+ -+ /* reset the last word of the slot to "unreceived" */ -+ ((uint32_t *) (slot + inputq->q_slotSize))[-1] = EP_SYSTEMQ_UNRECEIVED; -+ -+ /* move on the front pointer */ -+ fptr = (fptr == inputq->q_last) ? inputq->q_slotsAddr : fptr + inputq->q_slotSize; -+ -+ elan4_sdram_writel (dev, qdesc + offsetof (E4_InputQueue, q_fptr), fptr); -+ -+ inputq->q_count++; -+ -+ if (++count >= maxCount && maxCount) -+ { -+ inputq->q_fptr = fptr; -+ -+ return count; -+ } -+ } -+ -+ bptr = elan4_sdram_readl (dev, qdesc + offsetof (E4_InputQueue, q_bptr)); -+ } -+ -+ inputq->q_fptr = fptr; -+ -+ /* Only insert a single wait event command if the callback has -+ * occured, otherwise just acrue the count as we've just periodically -+ * polled it. -+ */ -+ if (inputq->q_callback && atomic_read (&inputq->q_fired)) -+ { -+ atomic_dec (&inputq->q_fired); -+ -+ ep4_wait_event_cmd (inputq->q_wcq, inputq->q_eventAddr, -+ E4_EVENT_INIT_VALUE (-inputq->q_count << 5, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), -+ inputq->q_ecq->ecq_addr, -+ (inputq->q_intcookie.int_val << E4_MAIN_INT_SHIFT) | INTERRUPT_CMD); -+ -+ inputq->q_count = 0; -+ } -+ -+ return count; -+} -+ -+#define Q_MSG(q,slotNum) (unsigned long)((q)->q_main + (slotNum) * (q)->q_slotSize) -+#define Q_MSG_ADDR(q,slotNum) ((q)->q_mainAddr + (slotNum) * (q)->q_slotSize) -+#define Q_DONE(q,slotNum) *((E4_uint64 *)((q)->q_main + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E4_uint64))) -+#define Q_DONE_ADDR(q,slotNum) ((q)->q_mainAddr + (q)->q_slotCount * (q)->q_slotSize + (slotNum) * sizeof (E4_uint64)) -+ -+#define Q_MAIN_SIZE(q) ((q)->q_slotCount * ((q)->q_slotSize + sizeof (E4_uint64))) -+ -+#define Q_DONE_VAL(val,cnt) ((cnt) << 16 | (val)) -+#define Q_DONE_RET(done) ((int) ((done) & 0xffff)) -+#define Q_DONE_CNT(done) ((int) ((done) >> 16)) -+ -+EP_OUTPUTQ * -+ep4_alloc_outputq (EP_RAIL *r, unsigned slotSize, unsigned slotCount) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_OUTPUTQ *outputq; -+ int i; -+ -+ ASSERT ((slotSize & (EP_SYSTEMQ_MSG_ALIGN-1)) == 0); -+ -+ KMEM_ALLOC (outputq, EP4_OUTPUTQ *, sizeof (EP4_OUTPUTQ), 1); -+ -+ if (outputq == NULL) -+ return NULL; -+ -+ spin_lock_init (&outputq->q_lock); -+ -+ outputq->q_slotCount = slotCount; -+ outputq->q_slotSize = slotSize; -+ outputq->q_main = ep_alloc_main (r, Q_MAIN_SIZE(outputq), 0, &outputq->q_mainAddr); -+ -+ if (outputq->q_main == (E4_uint64 *) NULL) -+ { -+ KMEM_FREE (outputq, sizeof (EP_OUTPUTQ)); -+ return NULL; -+ } -+ -+ outputq->q_cq = elan4_alloccq (&rail->r_ctxt, CQ_Size64K, CQ_STENEnableBit | CQ_WriteEnableBit, CQ_Priority); -+ -+ if (outputq->q_cq == (ELAN4_CQ *) NULL) -+ { -+ ep_free_main (&rail->r_generic, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ -+ KMEM_FREE (outputq, sizeof (EP_OUTPUTQ)); -+ } -+ -+ outputq->q_dwords = CQ_Size (outputq->q_cq->cq_size) >> 3; -+ -+ /* mark all the queue slots as finished */ -+ for (i = 0; i < slotCount; i++) -+ Q_DONE(outputq, i) = Q_DONE_VAL (EP_OUTPUTQ_FINISHED, 0); -+ -+ return (EP_OUTPUTQ *) outputq; -+} -+ -+void -+ep4_free_outputq (EP_RAIL *r, EP_OUTPUTQ *q) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_OUTPUTQ *outputq = (EP4_OUTPUTQ *) q; -+ -+ elan4_freecq (&rail->r_ctxt, outputq->q_cq); -+ -+ ep_free_main (&rail->r_generic, outputq->q_mainAddr, Q_MAIN_SIZE(outputq)); -+ -+ spin_lock_destroy (&outputq->q_lock); -+ -+ KMEM_FREE (outputq, sizeof (EP4_OUTPUTQ)); -+} -+ -+void * -+ep4_outputq_msg (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ return (void *) Q_MSG ((EP4_OUTPUTQ *) q, slotNum); -+} -+ -+int -+ep4_outputq_state (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum) -+{ -+ EPRINTF2 (DBG_KMSG, "ep4_outputq_state: slotNum %d state %x\n", slotNum, (int)Q_DONE((EP4_OUTPUTQ *) q, slotNum)); -+ -+ return Q_DONE_RET(Q_DONE((EP4_OUTPUTQ *)q, slotNum)); -+} -+ -+int -+ep4_outputq_send (EP_RAIL *r, EP_OUTPUTQ *q, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries) -+{ -+ EP4_OUTPUTQ *outputq = (EP4_OUTPUTQ *) q; -+ unsigned int nbytes = roundup (size, 32); -+ unsigned int base = outputq->q_slotSize - nbytes; -+ unsigned int i, dwords; -+ unsigned long flags; -+ E4_uint64 val; -+ -+ spin_lock_irqsave (&outputq->q_lock, flags); -+ -+ EPRINTF4 (DBG_KMSG, "ep4_outputq_send: slotNum=%d size=%d vp=%d qnum=%d\n", slotNum, size, vp, qnum); -+ -+ /* compute command queue size as follows - each slot uses -+ * overhead: 14 dwords + -+ * data > 128 ? 36 dwords -+ * data > 64 ? 18 dwords -+ * data > 32 ? 10 dwords -+ * else 6 dwords -+ */ -+ dwords = 14 + (size > 128 ? 36 : -+ size > 64 ? 18 : -+ size ? 10 : 6); -+ -+ outputq->q_dwords += Q_DONE_CNT (Q_DONE(outputq, slotNum)); -+ -+ if (dwords > outputq->q_dwords) -+ { -+ /* attempt to reclaim command queue space from other slots */ -+ i = slotNum; -+ do { -+ if (++i == outputq->q_slotCount) -+ i = 0; -+ -+ val = Q_DONE(outputq, i); -+ -+ if ((Q_DONE_RET (val) == EP_OUTPUTQ_FINISHED || Q_DONE_RET (val) == EP_OUTPUTQ_FAILED) && Q_DONE_CNT(val) > 0) -+ { -+ outputq->q_dwords += Q_DONE_CNT (val); -+ -+ Q_DONE(outputq, i) = Q_DONE_VAL(Q_DONE_RET(val), 0); -+ } -+ } while (i != slotNum && dwords > outputq->q_dwords); -+ } -+ -+ if (dwords > outputq->q_dwords) -+ { -+ spin_unlock_irqrestore (&outputq->q_lock, flags); -+ -+ EPRINTF0 (DBG_KMSG, "ep4_outputq_state: no command queue space\n"); -+ return 0; -+ } -+ -+ outputq->q_dwords -= dwords; -+ -+ Q_DONE(outputq, slotNum) = Q_DONE_VAL (EP_OUTPUTQ_BUSY, dwords); -+ -+ if (outputq->q_retries != retries) -+ { -+ outputq->q_retries = retries; -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL(1) | GUARD_RESET(retries)); -+ elan4_nop_cmd (outputq->q_cq, 0); -+ } -+ -+ /* transfer the top "size" bytes from message buffer to top of input queue */ -+ elan4_open_packet (outputq->q_cq, OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, vp)); -+ elan4_sendtrans0 (outputq->q_cq, TR_INPUT_Q_GETINDEX, EP_SYSTEMQ_ADDR(qnum)); -+ -+ /* send upto EP_SYSTEMQ_MSG_MAX (256) bytes of message to the top of the slot */ -+ if (size > 128) -+ { -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base + 0, (void *) (Q_MSG(outputq, slotNum) + base + 0)); -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base + 128, (void *) (Q_MSG(outputq, slotNum) + base + 128)); -+ } -+ else if (size > 64) -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (128 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ else if (size > 32) -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (64 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ else -+ elan4_sendtransp (outputq->q_cq, TR_WRITE (32 >> 3, 0, TR_DATATYPE_DWORD), base, (void *) (Q_MSG(outputq, slotNum) + base)); -+ elan4_sendtrans1 (outputq->q_cq, TR_INPUT_Q_COMMIT, EP_SYSTEMQ_ADDR(qnum), 0 /* no cookie */); -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL (1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET (outputq->q_retries)); -+ elan4_write_dword_cmd (outputq->q_cq, Q_DONE_ADDR(outputq, slotNum), Q_DONE_VAL (EP_OUTPUTQ_FINISHED, dwords)); -+ -+ elan4_guard (outputq->q_cq, GUARD_CHANNEL (1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET (outputq->q_retries)); -+ elan4_write_dword_cmd (outputq->q_cq, Q_DONE_ADDR(outputq, slotNum), Q_DONE_VAL (EP_OUTPUTQ_FAILED, dwords)); -+ -+ spin_unlock_irqrestore (&outputq->q_lock, flags); -+ -+ return 1; -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kthread.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kthread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kthread.c 2005-07-28 14:52:52.894671744 -0400 -@@ -0,0 +1,186 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kthread.c,v 1.5 2004/05/19 08:54:57 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.c,v $*/ -+ -+#include -+ -+#include -+ -+void -+ep_kthread_init (EP_KTHREAD *kt) -+{ -+ spin_lock_init (&kt->lock); -+ kcondvar_init (&kt->wait); -+ -+ kt->next_run = 0; -+ kt->should_stall = 0; -+ kt->started = 0; -+ kt->should_stop = 0; -+ kt->stopped = 0; -+ kt->state = KT_STATE_RUNNING; -+} -+ -+void -+ep_kthread_destroy (EP_KTHREAD *kt) -+{ -+ spin_lock_destroy (&kt->lock); -+ kcondvar_destroy (&kt->wait); -+} -+ -+void -+ep_kthread_started (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->started = 1; -+ spin_unlock_irqrestore(&kt->lock, flags); -+} -+ -+void -+ep_kthread_stopped (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->stopped = 1; -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ spin_unlock_irqrestore(&kt->lock, flags); -+} -+ -+int -+ep_kthread_should_stall (EP_KTHREAD *kth) -+{ -+ return (kth->should_stall); -+} -+ -+int -+ep_kthread_sleep (EP_KTHREAD *kt, long next_run) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (next_run && (kt->next_run == 0 || BEFORE (next_run, kt->next_run))) -+ kt->next_run = next_run; -+ -+ if (kt->should_stop) -+ { -+ spin_unlock_irqrestore (&kt->lock, flags); -+ return (-1); -+ } -+ -+ do { -+ if (kt->should_stall) -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ -+ kt->state = KT_STATE_SLEEPING; -+ kt->running = 0; -+ if (kt->should_stall || kt->next_run == 0) -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ else -+ kcondvar_timedwait (&kt->wait,&kt->lock, &flags, kt->next_run); -+ kt->state = KT_STATE_RUNNING; -+ kt->running = lbolt; -+ } while (kt->should_stall); -+ kt->next_run = 0; -+ spin_unlock_irqrestore (&kt->lock, flags); -+ -+ return (0); -+} -+ -+void -+ep_kthread_schedule (EP_KTHREAD *kt, long tick) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (kt->next_run == 0 || BEFORE (tick, kt->next_run)) -+ { -+ kt->next_run = tick; -+ if (!kt->should_stall && kt->state == KT_STATE_SLEEPING) -+ { -+ kt->state = KT_STATE_SCHEDULED; -+ kcondvar_wakeupone (&kt->wait, &kt->lock); -+ } -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_stall (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (kt->should_stall++ == 0) -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ -+ while (kt->state != KT_STATE_SLEEPING) -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_resume (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ if (--kt->should_stall == 0) -+ { -+ kt->state = KT_STATE_SCHEDULED; -+ kcondvar_wakeupone (&kt->wait, &kt->lock); -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+void -+ep_kthread_stop (EP_KTHREAD *kt) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ kt->should_stop = 1; -+ while (kt->started && !kt->stopped) -+ { -+ kcondvar_wakeupall (&kt->wait, &kt->lock); -+ kcondvar_wait (&kt->wait, &kt->lock, &flags); -+ } -+ spin_unlock_irqrestore (&kt->lock, flags); -+} -+ -+int -+ep_kthread_state (EP_KTHREAD *kt, long *time) -+{ -+ unsigned long flags; -+ int res = KT_STATE_SLEEPING; -+ -+ spin_lock_irqsave (&kt->lock, flags); -+ -+ if (kt->next_run) { -+ *time = kt->next_run; -+ res = kt->should_stall ? KT_STATE_STALLED : KT_STATE_SCHEDULED; -+ } -+ -+ if (kt->running) { -+ *time = kt->running; -+ res = KT_STATE_RUNNING; -+ } -+ -+ spin_unlock_irqrestore (&kt->lock, flags); -+ -+ return res; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/kthread.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/kthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/kthread.h 2005-07-28 14:52:52.894671744 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KTHREAD_H -+#define __ELAN3_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.4 2004/05/06 14:24:08 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.h,v $*/ -+ -+typedef struct ep_kthread -+{ -+ kcondvar_t wait; /* place to sleep */ -+ spinlock_t lock; /* and lock */ -+ long next_run; /* tick when thread should next run */ -+ long running; /* tick when thread started to run */ -+ unsigned short should_stall; -+ unsigned char state; -+ unsigned int started:1; -+ unsigned int should_stop:1; -+ unsigned int stopped:1; -+} EP_KTHREAD; -+ -+#define KT_STATE_SLEEPING 0 -+#define KT_STATE_SCHEDULED 1 -+#define KT_STATE_RUNNING 2 -+#define KT_STATE_STALLED 3 -+ -+#define AFTER(a, b) ((((long)(a)) - ((long)(b))) > 0) -+#define BEFORE(a,b) ((((long)(a)) - ((long)(b))) < 0) -+ -+extern void ep_kthread_init (EP_KTHREAD *kt); -+extern void ep_kthread_destroy (EP_KTHREAD *kt); -+extern void ep_kthread_started (EP_KTHREAD *kt); -+extern void ep_kthread_stopped (EP_KTHREAD *kt); -+extern int ep_kthread_should_stall (EP_KTHREAD *kth); -+extern int ep_kthread_sleep (EP_KTHREAD *kth, long next_run); -+extern void ep_kthread_schedule (EP_KTHREAD *kt, long when); -+extern void ep_kthread_stall (EP_KTHREAD *kth); -+extern void ep_kthread_resume (EP_KTHREAD *kt); -+extern void ep_kthread_stop (EP_KTHREAD *kt); -+extern int ep_kthread_state (EP_KTHREAD *kt, long *time); -+#endif /* __ELAN3_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/Makefile 2005-07-28 14:52:52.894671744 -0400 -@@ -0,0 +1,17 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/ep/Makefile -+# -+ -+ -+ep3-$(CONFIG_ELAN3) := kcomm_elan3.o kmsg_elan3.o kmap_elan3.o neterr_elan3.o probenetwork_elan3.o support_elan3.o threadcode_elan3.o threadcode_elan3_Linux.o epcomms_elan3.o epcommsTx_elan3.o epcommsRx_elan3.o -+ep4-$(CONFIG_ELAN4) := kcomm_elan4.o kmsg_elan4.o kmap_elan4.o neterr_elan4.o probenetwork_elan4.o commands_elan4.o debug_elan4.o support_elan4.o threadcode_elan4_Linux.o epcomms_elan4.o epcommsTx_elan4.o epcommsRx_elan4.o -+# -+ -+obj-$(CONFIG_EP) += ep.o -+ep-objs := cm.o debug.o kalloc.o kcomm.o kmap.o kthread.o neterr.o nmh.o probenetwork.o railhints.o rmap.o statemap.o support.o threadcode.o epcomms.o epcommsRx.o epcommsTx.o epcommsFwd.o conf_linux.o procfs_linux.o ep_procfs.o cm_procfs.o $(ep3-$(CONFIG_EP)) $(ep4-$(CONFIG_EP)) -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/Makefile.conf 2005-07-28 14:52:52.895671592 -0400 -@@ -0,0 +1,12 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = ep.o -+MODULENAME = ep -+KOBJFILES = cm.o debug.o kalloc.o kcomm.o kmap.o kthread.o neterr.o nmh.o probenetwork.o railhints.o rmap.o statemap.o support.o threadcode.o epcomms.o epcommsRx.o epcommsTx.o epcommsFwd.o conf_linux.o procfs_linux.o ep_procfs.o cm_procfs.o \$\(ep3-\$\(CONFIG_EP\)\) \$\(ep4-\$\(CONFIG_EP\)\) -+EXPORT_KOBJS = conf_linux.o -+CONFIG_NAME = CONFIG_EP -+SGALFC = -+# EXTRALINES START -+ -+ep3-$(CONFIG_ELAN3) := kcomm_elan3.o kmsg_elan3.o kmap_elan3.o neterr_elan3.o probenetwork_elan3.o support_elan3.o threadcode_elan3.o threadcode_elan3_Linux.o epcomms_elan3.o epcommsTx_elan3.o epcommsRx_elan3.o -+ep4-$(CONFIG_ELAN4) := kcomm_elan4.o kmsg_elan4.o kmap_elan4.o neterr_elan4.o probenetwork_elan4.o commands_elan4.o debug_elan4.o support_elan4.o threadcode_elan4_Linux.o epcomms_elan4.o epcommsTx_elan4.o epcommsRx_elan4.o -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/neterr.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/neterr.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/neterr.c 2005-07-28 14:52:52.895671592 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr.c,v 1.25.8.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr.c,v $ */ -+ -+#include -+#include -+ -+#include "debug.h" -+ -+void -+ep_queue_network_error (EP_RAIL *rail, int nodeId, int what, int channel, EP_NETERR_COOKIE cookie) -+{ -+ EP_SYS *sys = rail->System; -+ EP_NODE_RAIL *nodeRail = &rail->Nodes[nodeId]; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ ASSERT (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (nodeRail->NetworkErrorState == 0) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: raise context filter for node %d due to network error\n", rail->Name, nodeId); -+ printk ("%s: raise context filter for node %d due to network error\n", rail->Name, nodeId); -+ -+ rail->Operations.RaiseFilter (rail, nodeId); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ printk ("%s: node %d is flushing - deferring network error fixup\n", rail->Name, nodeId); -+ else -+ list_add_tail (&nodeRail->Link, &rail->NetworkErrorList); -+ } -+ -+ switch (what) -+ { -+ case EP_NODE_NETERR_ATOMIC_PACKET: -+ ASSERT (nodeRail->NetworkErrorCookies[channel] == 0); -+ -+ /* Need to raise the approriate context filter for this node, -+ * and periodically send a neterr fixup message to it until -+ * we receive an ack from it -+ */ -+ IncrStat (rail, NeterrAtomicPacket); -+ -+ nodeRail->NetworkErrorCookies[channel] = cookie; -+ -+ nodeRail->NetworkErrorState |= EP_NODE_NETERR_ATOMIC_PACKET; -+ nodeRail->MsgXid = ep_xid_cache_alloc (sys, &rail->XidCache); -+ -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: atomic packet destroyed - node %d cookie %llx\n", rail->Name, nodeId, cookie); -+ -+ printk ("%s: atomic packet destroyed - node %d cookie %llx\n", rail->Name, nodeId, cookie); -+ break; -+ -+ case EP_NODE_NETERR_DMA_PACKET: -+ /* Must be an overlapped dma packet, raise the context filter, -+ * and hold it up for a NETWORK_ERROR_TIMEOUT */ -+ IncrStat (rail, NeterrDmaPacket); -+ -+ nodeRail->NetworkErrorState |= EP_NODE_NETERR_DMA_PACKET; -+ break; -+ } -+ -+ nodeRail->NextRunTime = lbolt + NETWORK_ERROR_TIMEOUT; -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ep_kthread_schedule (&sys->ManagerThread, nodeRail->NextRunTime); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/neterr_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/neterr_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/neterr_elan3.c 2005-07-28 14:52:52.896671440 -0400 -@@ -0,0 +1,326 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr_elan3.c,v 1.24 2003/11/17 13:26:45 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+typedef struct neterr_halt_args -+{ -+ EP3_RAIL *Rail; -+ unsigned int NodeId; -+ EP_NETERR_COOKIE *Cookies; -+} NETERR_HALT_ARGS; -+ -+static int -+DmaMatchesCookie (EP3_RAIL *rail, E3_DMA_BE *dma, int nodeId, EP_NETERR_COOKIE *cookies, char *where) -+{ -+ E3_uint32 cvproc; -+ E3_uint32 cookie; -+ -+ if (dma->s.dma_direction == DMA_WRITE) -+ { -+ cvproc = dma->s.dma_destCookieVProc; -+ cookie = dma->s.dma_srcCookieVProc; -+ } -+ else -+ { -+ cvproc = dma->s.dma_srcCookieVProc; -+ cookie = dma->s.dma_destCookieVProc; -+ } -+ -+ EPRINTF6 (DBG_NETWORK_ERROR, "%s: Neterr - %s: DMA %08x %08x %08x %08x\n", rail->Generic.Name, where, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_NETWORK_ERROR, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ if (EP_VP_ISDATA((cvproc & DMA_PROCESS_MASK)) && EP_VP_TO_NODE(cvproc & DMA_PROCESS_MASK) == nodeId) -+ { -+ /* -+ * This is a DMA going to the node which has a network fixup -+ * request pending, so check if the cookie matches. -+ */ -+ if ((cookie == cookies[0] || cookie == cookies[1]) /* && !WaitForEop */) -+ { -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: match cookie %08x on %s\n", rail->Generic.Name, cookie, where); -+ -+ return (TRUE); -+ } -+ } -+ -+ return (FALSE); -+} -+ -+ -+static void -+NetworkErrorHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ NETERR_HALT_ARGS *args = (NETERR_HALT_ARGS *) arg; -+ EP3_RAIL *rail = args->Rail; -+ EP_SYS *sys = rail->Generic.System; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ E3_DMA_BE dma; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ if (DmaMatchesCookie (rail, &dma, args->NodeId, args->Cookies, "runq ")) -+ { -+ /* -+ * Transfer the DMA to the node, it's source event will -+ * get executed later. -+ */ -+ QueueDmaOnStalledList (rail, &dma); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = (SYS_CONTEXT_BIT << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destEvent = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ rail->NetworkErrorFlushed = TRUE; -+ kcondvar_wakeupall (&rail->NetworkErrorSleep, &sys->NodeLock); -+ -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+} -+ -+void -+ep3_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP_SYS *sys = rail->Generic.System; -+ ELAN3_DEV *dev = rail->Device; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[nodeId]; -+ E3_DMA_BE dmabe; -+ EP3_COOKIE *cp; -+ E3_uint32 vp; -+ NETERR_HALT_ARGS args; -+ struct list_head *el, *nel, matchedList; -+ int i; -+ unsigned long flags; -+ -+ INIT_LIST_HEAD (&matchedList); -+ -+ StallDmaRetryThread (rail); -+ -+ args.Rail = rail; -+ args.NodeId = nodeId; -+ args.Cookies = cookies; -+ -+ spin_lock_irqsave (&rail->Device->IntrLock, flags); -+ QueueHaltOperation (rail->Device, 0, NULL, INT_TProcHalted | INT_DProcHalted, NetworkErrorHaltOperation, &args); -+ spin_unlock_irqrestore (&rail->Device->IntrLock, flags); -+ -+ spin_lock_irqsave (&sys->NodeLock, flags); -+ while (! rail->NetworkErrorFlushed) -+ kcondvar_wait (&rail->NetworkErrorSleep, &sys->NodeLock, &flags); -+ rail->NetworkErrorFlushed = FALSE; -+ -+ spin_lock (&rail->DmaRetryLock); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el, nel, &rail->DmaRetries[i]) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (DmaMatchesCookie (rail, &retry->Dma, nodeId, cookies, "retry")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->Link, &matchedList); -+ } -+ } -+ } -+ -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (DmaMatchesCookie (rail, &retry->Dma, nodeId, cookies, "stalled")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->Link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->Link, &matchedList); -+ } -+ } -+ -+ spin_unlock (&rail->DmaRetryLock); -+ spin_unlock_irqrestore (&sys->NodeLock, flags); -+ -+ ResumeDmaRetryThread (rail); -+ -+ /* Now "set" the source event of any write DMA's */ -+ while (! list_empty (&matchedList)) -+ { -+ EP3_RETRY_DMA *retry = list_entry (matchedList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ if (retry->Dma.s.dma_direction == DMA_WRITE && retry->Dma.s.dma_srcEvent) -+ { -+ sdramaddr_t event = ep_elan2sdram (&rail->Generic, retry->Dma.s.dma_srcEvent); -+ -+ /* Block local interrupts, since we need to atomically -+ * decrement the event count and perform the word write -+ */ -+ local_irq_save (flags); -+ { -+ E3_uint32 type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type)); -+ E3_uint32 count = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Count)); -+ -+ elan3_sdram_writel (dev, event + offsetof (E3_Event, ev_Count), count - 1); -+ -+ if (count == 1) -+ { -+ if (type & EV_TYPE_MASK_BCOPY) -+ { -+ E3_Addr srcVal = elan3_sdram_readl (dev, event + offsetof (E3_BlockCopyEvent, ev_Source)); -+ E3_Addr dstAddr = elan3_sdram_readl (dev, event + offsetof (E3_BlockCopyEvent, ev_Dest)) & ~EV_BCOPY_DTYPE_MASK; -+ -+ ASSERT ((srcVal & EV_WCOPY) != 0); -+ -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: neterr perform event word write at %08x with %08x\n", rail->Generic.Name, dstAddr, srcVal); -+ -+ ELAN3_OP_STORE32 (rail->Ctxt, dstAddr, srcVal); -+ } -+ -+ if ((type & ~EV_TYPE_MASK_BCOPY) != 0) -+ { -+ if ((type & EV_TYPE_MASK_CHAIN) == EV_TYPE_CHAIN) -+ { -+ printk ("%s: event at %08x - chained event %x is invalid\n", rail->Generic.Name, retry->Dma.s.dma_srcEvent, type); -+ panic ("ep: neterr invalid event type\n"); -+ } -+ else if ((type & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: neterr event interrupt - cookie %08x\n", rail->Generic.Name, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY))); -+ -+ cp = LookupCookie (&rail->CookieTable, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY))); -+ -+ if (cp->Operations->Event) -+ cp->Operations->Event(rail, cp->Arg); -+ } -+ else if ((type & EV_TYPE_MASK_DMA) == EV_TYPE_DMA) -+ { -+ sdramaddr_t dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2)); -+ -+ EPRINTF2 (DBG_NETWORK_ERROR, "%s: neterr chained dma - %08x\n", rail->Generic.Name, (type & ~EV_TYPE_MASK2)); -+ -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ { -+ vp = dmabe.s.dma_destVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ } -+ else -+ { -+ vp = dmabe.s.dma_srcVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the -+ * DMA descriptor will be read from the EP_RETRY_DMA rather than the -+ * original DMA - this can then get reused and an incorrect DMA -+ * descriptor sent -+ * eventp->ev_Type contains the dma address with type in the lower bits -+ */ -+ -+ dmabe.s.dma_source = (type & ~EV_TYPE_MASK2); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+ ASSERT (EP_VP_ISDATA(vp)); -+ -+ nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ switch (nodeRail->State) -+ { -+ case EP_NODE_CONNECTED: -+ case EP_NODE_LEAVING_CONNECTED: -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ -+ case EP_NODE_LOCAL_PASSIVATE: -+ QueueDmaOnStalledList (rail, &dmabe); -+ break; -+ -+ default: -+ panic ("ep: neterr incorrect state for node\n"); -+ } -+ } -+ else if ((type & EV_TYPE_MASK_THREAD) == EV_TYPE_THREAD) -+ { -+ printk ("%s: event at %08x - thread waiting %x is invalid\n", rail->Generic.Name, retry->Dma.s.dma_srcEvent, type); -+ panic ("ep: neterr invalid event type\n"); -+ } -+ } -+ } -+ } -+ local_irq_restore(flags); -+ } -+ -+ /* add to free list */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/neterr_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/neterr_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/neterr_elan4.c 2005-07-28 14:52:52.896671440 -0400 -@@ -0,0 +1,251 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: neterr_elan4.c,v 1.2 2003/11/24 17:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/neterr_elan4.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+struct neterr_desc -+{ -+ EP4_RAIL *rail; -+ unsigned int nodeid; -+ EP_NETERR_COOKIE *cookies; -+ int done; -+} ; -+ -+static int -+dma_matches_cookie (EP4_RAIL *rail, E4_uint64 vproc, E4_uint64 cookie, unsigned int nodeId, EP_NETERR_COOKIE *cookies, const char *where) -+{ -+ if ((EP_VP_ISDATA (vproc) && EP_VP_TO_NODE (vproc) == nodeId) && (cookie == cookies[0] || cookie == cookies[1])) -+ { -+ EPRINTF3 (DBG_NETWORK_ERROR, "%s: match cookie %016llx on %s\n", rail->r_generic.Name, cookie, where); -+ -+ return 1; -+ } -+ return 0; -+} -+ -+static void -+ep4_neterr_dma_flushop (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ struct neterr_desc *desc = (struct neterr_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ E4_uint64 qptrs = read_reg64 (dev, DProcHighPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ unsigned long flags; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 cookie = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_cookie)); -+ E4_uint64 vproc = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_vproc)); -+ -+ if (dma_matches_cookie (rail, vproc, cookie, desc->nodeid, desc->cookies, "runq ")) -+ { -+ elan4_sdram_copyq_from_sdram (dev, qfptr, &qentry, sizeof (E4_DProcQueueEntry)); -+ -+ ep4_queue_dma_stalled (rail, &qentry.Desc); -+ -+ /* Replace the dma with one which will "disappear" */ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+static void -+ep4_neterr_dma_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct neterr_desc *desc = (struct neterr_desc *) arg; -+ -+ elan4_queue_dma_flushop (dev, &desc->rail->r_flushop, 1); -+} -+ -+void -+ep4_neterr_fixup_dmas (EP4_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[nodeId]; -+ struct neterr_desc desc; -+ struct list_head matchedList; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ register int i; -+ -+ desc.rail = rail; -+ desc.nodeid = nodeId; -+ desc.cookies = cookies; -+ desc.done = 0; -+ -+ INIT_LIST_HEAD (&matchedList); -+ -+ /* First - stall the retry thread, so that it will no longer restart -+ * any dma's from the retry list */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ /* Second - flush through all command queues targetted by events, thread etc */ -+ ep4_flush_ecqs (rail); -+ -+ /* Third - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queues */ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DProcHalted; -+ rail->r_haltop.op_function = ep4_neterr_dma_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ rail->r_flushop.op_function = ep4_neterr_dma_flushop; -+ rail->r_flushop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ kmutex_unlock (&rail->r_haltop_mutex); -+ -+ /* Fourth - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el,nel, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ if (dma_matches_cookie (rail, retry->retry_dma.dma_vproc, retry->retry_dma.dma_cookie, nodeId, cookies, "retry")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->retry_link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->retry_link, &matchedList); -+ } -+ } -+ } -+ -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ if (dma_matches_cookie (rail, retry->retry_dma.dma_vproc, retry->retry_dma.dma_cookie, nodeId, cookies, "stalled")) -+ { -+ /* remove from retry list */ -+ list_del (&retry->retry_link); -+ -+ /* add to list of dmas which matched */ -+ list_add_tail (&retry->retry_link, &matchedList); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ /* Now "set" the source event of any put DMA#'s we can use the dma -+ * retry command queue as the retry thread is stalled */ -+ while (! list_empty (&matchedList)) -+ { -+ EP4_DMA_RETRY *retry = list_entry (matchedList.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ elan4_set_event_cmd (rail->r_dma_ecq->ecq_cq, retry->retry_dma.dma_srcEvent); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ -+ /* Flush through the command queues to ensure that all the setevents have executed */ -+ ep4_flush_ecqs (rail); -+ -+ /* Finally - allow the retry thread to run again */ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_add_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops) -+{ -+ /* we're called from the ManagerThread, so no need to stall it */ -+ list_add_tail (&ops->op_link, &rail->r_neterr_ops); -+} -+void -+ep4_remove_neterr_ops (EP4_RAIL *rail, EP4_NETERR_OPS *ops) -+{ -+ EP_SYS *sys = rail->r_generic.System; -+ -+ ep_kthread_stall (&sys->ManagerThread); -+ list_del (&ops->op_link); -+ ep_kthread_resume (&sys->ManagerThread); -+} -+ -+void -+ep4_neterr_fixup_sten (EP4_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ struct list_head *el; -+ -+ list_for_each (el, &rail->r_neterr_ops) { -+ EP4_NETERR_OPS *op = list_entry (el, EP4_NETERR_OPS, op_link); -+ -+ (op->op_func) (rail, op->op_arg, nodeId, cookies); -+ } -+} -+ -+void -+ep4_neterr_fixup (EP_RAIL *r, unsigned int nodeId, EP_NETERR_COOKIE *cookies) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ -+ /* network error cookies can come from the following : -+ * -+ * DMA engine -+ * if a DMA matches a network error cookie, then we just need to -+ * execute the local setevent *before* returning. -+ * -+ * STEN packet -+ * if the STEN packet was generated with as a WAIT_FOR_EOP -+ * and it's not present on the retry lists, then re-create -+ * it. -+ * -+ */ -+ EPRINTF4 (DBG_NETWORK_ERROR, "%s: ep4_neterr_fixup: node %d cookies <%lld%s%s%s%s> <%lld%s%s%s%s>\n", -+ rail->r_generic.Name, nodeId, EP4_COOKIE_STRING(cookies[0]), EP4_COOKIE_STRING(cookies[1])); -+ -+ if ((cookies[0] & EP4_COOKIE_DMA) || (cookies[1] & EP4_COOKIE_DMA)) -+ ep4_neterr_fixup_dmas (rail, nodeId, cookies); -+ -+ if ((cookies[0] & EP4_COOKIE_STEN) || (cookies[1] & EP4_COOKIE_STEN)) -+ ep4_neterr_fixup_sten (rail, nodeId, cookies); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/nmh.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/nmh.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/nmh.c 2005-07-28 14:52:52.897671288 -0400 -@@ -0,0 +1,181 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+#ident "@(#)$Id: nmh.c,v 1.6 2004/01/05 13:48:08 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/nmh.c,v $*/ -+ -+#include -+ -+#include -+ -+#define EP_NMD_SPANS(nmd, base, top) ((nmd)->nmd_addr <= (base) && \ -+ ((nmd)->nmd_addr + (nmd)->nmd_len - 1) >= (top)) -+ -+#define EP_NMD_OVERLAPS(nmd, addr, len) ((nmd)->nmd_addr <= ((addr) + (len)) && \ -+ ((nmd)->nmd_addr + (nmd)->nmd_len - 1) >= (addr)) -+ -+#define EP_NMH_HASH(tbl,idx,addr) ((addr) % (tbl)->tbl_size[idx]) -+ -+int -+ep_nmh_init (EP_NMH_TABLE *tbl) -+{ -+ int i, idx, hsize = 1; -+ -+ for (idx = EP_NMH_NUMHASH-1; idx >= 0; idx--, hsize <<= 1) -+ { -+ tbl->tbl_size[idx] = (hsize < EP_NMH_HASHSIZE) ? hsize : EP_NMH_HASHSIZE; -+ -+ KMEM_ZALLOC (tbl->tbl_hash[idx], struct list_head *, sizeof (struct list_head) * tbl->tbl_size[idx], 1); -+ -+ if (tbl->tbl_hash == NULL) -+ { -+ while (++idx < EP_NMH_NUMHASH) -+ KMEM_FREE (tbl->tbl_hash[idx], sizeof (struct list_head) * tbl->tbl_size[idx]); -+ return (ENOMEM); -+ } -+ -+ for (i = 0; i < tbl->tbl_size[idx]; i++) -+ INIT_LIST_HEAD (&tbl->tbl_hash[idx][i]); -+ } -+ -+ return (0); -+} -+ -+void -+ep_nmh_fini (EP_NMH_TABLE *tbl) -+{ -+ int idx; -+ -+ for (idx = 0; idx < EP_NMH_NUMHASH; idx++) -+ if (tbl->tbl_hash[idx]) -+ KMEM_FREE (tbl->tbl_hash[idx], sizeof (struct list_head) * tbl->tbl_size[idx]); -+ -+ bzero (tbl, sizeof (EP_NMH_TABLE)); -+} -+ -+void -+ep_nmh_insert (EP_NMH_TABLE *tbl, EP_NMH *nmh) -+{ -+ EP_ADDR base = nmh->nmh_nmd.nmd_addr; -+ EP_ADDR top = base + nmh->nmh_nmd.nmd_len - 1; -+ int idx; -+ -+ for (idx = 0, base >>= 12, top >>= 12; base != top && idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) -+ ; -+ -+ list_add_tail (&nmh->nmh_link, &tbl->tbl_hash[idx][EP_NMH_HASH(tbl, idx, base)]); -+} -+ -+void -+ep_nmh_remove (EP_NMH_TABLE *tbl, EP_NMH *nmh) -+{ -+ list_del (&nmh->nmh_link); -+} -+ -+EP_NMH * -+ep_nmh_find (EP_NMH_TABLE *tbl, EP_NMD *nmd) -+{ -+ EP_ADDR base = nmd->nmd_addr; -+ EP_ADDR top = base + nmd->nmd_len - 1; -+ int idx; -+ struct list_head *le; -+ -+ for (idx = 0, base >>= 12, top >>= 12; base != top && idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) -+ ; -+ -+ for (; idx < EP_NMH_NUMHASH; idx++, base >>= 1, top >>= 1) { -+ -+ list_for_each (le, &tbl->tbl_hash[idx][EP_NMH_HASH(tbl, idx, base)]) { -+ EP_NMH *nmh = list_entry (le, EP_NMH, nmh_link); -+ -+ if (EP_NMD_SPANS (&nmh->nmh_nmd, nmd->nmd_addr, nmd->nmd_addr + nmd->nmd_len - 1)) -+ return (nmh); -+ } -+ } -+ -+ return (0); -+} -+ -+void -+ep_nmd_subset (EP_NMD *subset, EP_NMD *nmd, unsigned off, unsigned len) -+{ -+ ASSERT ((off + len - 1) <= nmd->nmd_len); -+ -+ subset->nmd_addr = nmd->nmd_addr + off; -+ subset->nmd_len = len; -+ subset->nmd_attr = nmd->nmd_attr; -+} -+ -+int -+ep_nmd_merge (EP_NMD *merged, EP_NMD *a, EP_NMD *b) -+{ -+ if (EP_NMD_NODEID (a) != EP_NMD_NODEID (b)) /* not generated on the same node */ -+ return 0; -+ -+ if ((EP_NMD_RAILMASK (a) & EP_NMD_RAILMASK (b)) == 0) /* no common rails */ -+ return 0; -+ -+ if (b->nmd_addr == (a->nmd_addr + a->nmd_len)) -+ { -+ if (merged != NULL) -+ { -+ merged->nmd_addr = a->nmd_addr; -+ merged->nmd_len = a->nmd_len + b->nmd_len; -+ merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(a), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b)); -+ } -+ return 1; -+ } -+ -+ if (a->nmd_addr == (b->nmd_addr + b->nmd_len)) -+ { -+ if (merged != NULL) -+ { -+ merged->nmd_addr = b->nmd_addr; -+ merged->nmd_len = b->nmd_len + a->nmd_len; -+ merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(b), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b)); -+ } -+ -+ return 1; -+ } -+ -+ return 0; -+} -+ -+int -+ep_nmd_map_rails (EP_SYS *sys, EP_NMD *nmd, unsigned railmask) -+{ -+ EP_NMH *nmh = ep_nmh_find (&sys->MappingTable, nmd); -+ -+ if (nmh == NULL) -+ { -+ printk ("ep_nmd_map_rails: nmd=%08x.%08x.%08x cannot be found\n", -+ nmd->nmd_addr, nmd->nmd_len, nmd->nmd_attr); -+ return (-1); -+ } -+ -+ return (nmh->nmh_ops->op_map_rails (sys, nmh, nmd, railmask)); -+} -+ -+EP_RAILMASK -+ep_nmd2railmask (EP_NMD *frags, int nFrags) -+{ -+ EP_RAILMASK mask; -+ -+ if (nFrags == 0) -+ return ((EP_RAILMASK)-1); -+ -+ for (mask = EP_NMD_RAILMASK(frags); --nFrags; ) -+ mask &= EP_NMD_RAILMASK(++frags); -+ -+ return (mask); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/probenetwork.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork.c 2005-07-28 14:52:52.898671136 -0400 -@@ -0,0 +1,446 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork.c,v 1.43 2004/04/19 15:43:15 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork.c,v $ */ -+ -+#include -+ -+#include -+#include "debug.h" -+ -+int PositionCheck = 1; -+ -+#define NUM_DOWN_FROM_VAL(NumDownLinksVal, level) (((NumDownLinksVal) >> ((level) << 2)) & 0xF) -+ -+int -+ProbeNetwork (EP_RAIL *rail, ELAN_POSITION *pos) -+{ -+ int lvl, i; -+ int level; -+ int nodeid; -+ int numnodes; -+ int randomRoutingDisabled; -+ int sw; -+ int nacks; -+ int nowayup; -+ int nalias; -+ int upmask; -+ int partial; -+ int link; -+ int invalid; -+ int linkdown[ELAN_MAX_LEVELS]; -+ int linkup[ELAN_MAX_LEVELS]; -+ EP_SWITCH *switches[ELAN_MAX_LEVELS]; -+ int switchCount[ELAN_MAX_LEVELS+1]; -+ int lowestBcast; -+ int numUpLinks[ELAN_MAX_LEVELS]; -+ int routedown [ELAN_MAX_LEVELS]; -+ -+ EPRINTF1 (DBG_PROBE, "%s: ProbeNetwork started\n", rail->Name); -+ -+ switchCount[0] = 1; -+ numUpLinks [0] = 4; -+ -+ for (level = 0; level < ELAN_MAX_LEVELS; level++) -+ { -+ int ndown = NUM_DOWN_FROM_VAL (rail->Devinfo.dev_num_down_links_value, level); -+ -+ KMEM_ZALLOC (switches[level], EP_SWITCH *, sizeof (EP_SWITCH) * switchCount[level], 1); -+ -+ for (sw = 0, nacks = 0, nowayup = 0, lowestBcast=7; sw < switchCount[level]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[level][sw]; -+ int good = 1; -+ int tsw; -+ -+ for (nodeid = 0,tsw = sw, lvl = level-1 ; lvl >= 0 ; lvl--) -+ { -+ EP_SWITCH *lsw; -+ int link = (8-numUpLinks[lvl]) + (tsw % numUpLinks[lvl]); -+ -+ tsw = tsw / numUpLinks[lvl]; -+ lsw = &switches[lvl][tsw]; -+ -+ if (lsw->present == 0 || (lsw->lnr & (1 << link))) -+ { -+ EPRINTF4 (DBG_PROBE, "lvl %d sw %d present=%d lnr=%x\n", lvl, sw, lsw->present, lsw->lnr); -+ good = 0; -+ } -+ -+ linkup[lvl] = link; -+ linkdown[lvl] = lsw->link; -+ -+ if ( lvl ) nodeid = ((nodeid + linkdown[lvl]) * (8-numUpLinks[lvl-1])); -+ else nodeid += linkdown[0]; -+ -+ } -+ -+ /* -+ * don't bother probing routes which we we've already seen are unreachable -+ * because a link upwards was in reset or the switch previously nacked us. -+ */ -+ if (! good) -+ { -+ lsw->present = 0; -+ -+ nacks++; -+ nowayup++; -+ -+ continue; -+ } -+ -+ lsw->present = rail->Operations.ProbeRoute (rail, level, sw, nodeid, linkup, linkdown, 5, lsw); -+ -+ if (! lsw->present) -+ { -+ EPRINTF3 (DBG_PROBE, "%s: level %d switch %d - unexpected nack\n", rail->Name, level, sw); -+ -+ nacks++; -+ nowayup++; -+ } -+ else -+ { -+ EPRINTF5 (DBG_PROBE, "%s: level %d switch %d - link %d bcast %d\n", rail->Name, level, sw, lsw->link, lsw->bcast); -+ -+ if (level == 2 && rail->Devinfo.dev_device_id == PCI_DEVICE_ID_ELAN3) -+ { -+ /* If we see broadcast top as 7, and we came in on a low link, then we can't -+ * determine whether we're in a 128 way or a un-configured 64u64d switch, so -+ * we treat it as a 64u64d and detect the 128 way case by "going over the top" -+ * below. Unless we've been told what it really is by NumDownLinksVal. -+ */ -+ if (lsw->bcast == 7 && lsw->link < 4) -+ lsw->bcast = ndown ? (ndown - 1) : 3; -+ } -+ -+ if ( lowestBcast > lsw->bcast ) -+ lowestBcast = lsw->bcast; -+ -+ if (lsw->link > (ndown ? (ndown-1) : (lowestBcast == 7 ? 3 : lowestBcast))) -+ { -+ /* We've arrived on a "up-link" - this could be either -+ * we're in the top half of a x8 top-switch - or we're -+ * in the bottom half and have gone "over the top". We -+ * differentiate these cases since the switches below -+ * a x8 top-switch will have broadcast top set to 3, -+ * and the x8 topswitch have broadcast top set to 7. -+ */ -+ if (lsw->bcast == 7) -+ nowayup++; -+ else -+ { -+ EPRINTF2 (DBG_PROBE, "%s: level %d - gone over the top\n", -+ rail->Name, level); -+ -+ if (level > 0) -+ { -+ KMEM_FREE (switches[level], sizeof (EP_SWITCH) * switchCount[level] ); -+ level--; -+ } -+ -+ numUpLinks[level] = 0; -+ goto finished; -+ } -+ } -+ -+ } -+ } -+ -+ numUpLinks[level] = ndown ? (8 - ndown) : (7 - lowestBcast); -+ switchCount[level+1] = switchCount[level] * numUpLinks[level]; -+ -+ /* Now we know which links are uplinks, we can see whether there is -+ * any possible ways up */ -+ upmask = (ndown ? (0xFF << ndown) & 0xFF : (0xFF << (8 - numUpLinks[level])) & 0xFF); -+ -+ for (sw = 0; sw < switchCount[level]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[level][sw]; -+ -+ if (lsw->present && lsw->link <= (ndown ? (ndown-1) : (lowestBcast == 7 ? 3 : lowestBcast)) && (switches[level][sw].lnr & upmask) == upmask) -+ nowayup++; -+ } -+ -+ EPRINTF7 (DBG_PROBE, "%s: level %d - sw=%d nacks=%d nowayup=%d bcast=%d numup=%d\n", -+ rail->Name, level, sw, nacks, nowayup, lowestBcast, numUpLinks[level]); -+ -+ if (nacks == sw) -+ { -+ static bitmap_t printed[BT_BITOUL(EP_MAX_RAILS)]; -+ -+ if (! BT_TEST (printed, rail->Number)) -+ printk ("%s: cannot determine network position\n", rail->Name); -+ BT_SET (printed, rail->Number); -+ goto failed; -+ } -+ -+ if (nowayup == sw) -+ goto finished; -+ } -+ -+ printk ("%s: exceeded number of levels\n", rail->Name); -+ level = ELAN_MAX_LEVELS - 1; -+ -+ failed: -+ -+ for (lvl = 0; lvl <= level; lvl++) -+ KMEM_FREE (switches[lvl], sizeof (EP_SWITCH) * switchCount[lvl] ); -+ -+ return -EAGAIN; -+ -+ finished: -+ /* we've successfully probed the network - now calculate our node -+ * positon and what level of random routing is possible */ -+ nalias = 1; -+ for (lvl = 0, invalid = 0, partial = 0, randomRoutingDisabled = 0; lvl <= level; lvl++) -+ { -+ int ndown = NUM_DOWN_FROM_VAL (rail->Devinfo.dev_num_down_links_value, lvl); -+ int upmask = ndown ? (0xFF << ndown) & 0xFF : 0xF0; -+ -+ for (sw = 0, nalias = 0; sw < switchCount[lvl]; sw++) -+ { -+ EP_SWITCH *lsw = &switches[lvl][sw]; -+ -+ /* You can only use adaptive routing if links 4-7 are uplinks, and at least one of them is -+ * not in reset. Otherwise you can randomly select an "uplink" if all the uplinks are not -+ * in reset. */ -+ if (lsw->present && ((upmask == 0xF0) ? (lsw->lnr & upmask) == upmask : (lsw->lnr & upmask) != 0)) -+ randomRoutingDisabled |= (1 << lvl); -+ -+ if (!lsw->present) -+ partial++; -+ else -+ { -+ if (lsw->invalid) -+ { -+ printk ("%s: invalid switch detected (level %d switch %d)\n", rail->Name, lvl, sw); -+ invalid++; -+ } -+ -+ for (i = 0; i < nalias; i++) -+ if (linkdown[i] == lsw->link) -+ break; -+ if (i == nalias) -+ linkdown[nalias++] = lsw->link; -+ } -+ } -+ -+ link = linkdown[0]; -+ for (i = 1; i < nalias; i++) -+ if (linkdown[i] < link) -+ link = linkdown[i]; -+ -+ if (nalias > 1 && lvl != level) -+ { -+ printk ("%s: switch aliased below top level (level %d)\n", rail->Name, lvl); -+ invalid++; -+ } -+ -+ routedown[lvl] = link; -+ } -+ -+ for (lvl = 0; lvl <= level; lvl++) -+ KMEM_FREE (switches[lvl], sizeof (EP_SWITCH) * switchCount[lvl] ); -+ -+ if (invalid) -+ { -+ printk ("%s: invalid switch configuration\n", rail->Name); -+ return (EINVAL); -+ } -+ -+ /* Handle the aliasing case where a 16 way is used as multiple smaller switches */ -+ if (nalias == 1) -+ level++; -+ else if (nalias == 2) /* a 16 way as 2x8 ways */ -+ numUpLinks[level++] = 6; /* only 2 down links */ -+ else if (nalias > 4) /* a 16 way as 8x2 ways */ -+ numUpLinks[level-1] = 6; -+ -+ /* -+ * Compute my nodeid and number of nodes in the machine -+ * from the routedown and the number of downlinks at each level. -+ */ -+ for(nodeid=0, lvl = level - 1; lvl >= 0; lvl--) -+ { -+ if (lvl) nodeid = ((nodeid + routedown[lvl]) * (8-numUpLinks[lvl-1])); -+ else nodeid += routedown[0]; -+ } -+ -+ for (numnodes = 1, lvl = 0; lvl < level; lvl++) -+ numnodes *= (8 - numUpLinks[lvl]); -+ -+ sprintf (rail->Name, "ep%d[%d]", rail->Number, nodeid); -+ -+ if (randomRoutingDisabled & ((1 << (level-1))-1)) -+ printk ("%s: nodeid=%d level=%d numnodes=%d (random routing disabled 0x%x)\n", -+ rail->Name, nodeid, level, numnodes, randomRoutingDisabled); -+ else if (partial) -+ printk ("%s: nodeid=%d level=%d numnodes=%d (random routing ok)\n", -+ rail->Name, nodeid, level, numnodes); -+ else -+ printk ("%s: nodeid=%d level=%d numnodes=%d\n", -+ rail->Name, nodeid, level, numnodes); -+ -+ pos->pos_mode = ELAN_POS_MODE_SWITCHED; -+ pos->pos_nodeid = nodeid; -+ pos->pos_levels = level; -+ pos->pos_nodes = numnodes; -+ pos->pos_random_disabled = randomRoutingDisabled; -+ -+ for(lvl = 0; lvl < level; lvl++) -+ pos->pos_arity[level -lvl - 1] = (8-numUpLinks[lvl]); -+ pos->pos_arity[level] = 1; /* XXXX why does this need to be 1 ? */ -+ -+ return 0; -+} -+ -+/* -+ * broadcast top is invalid if it is not set to the number of downlinks-1, -+ * or at the topmost level it is less than ndown-1. -+ */ -+#define BCAST_TOP_INVALID(lvl, bcast, ndown) ((lvl) == 0 ? (bcast) < ((ndown)-1) : (bcast) != ((ndown) - 1)) -+ -+void -+CheckPosition (EP_RAIL *rail) -+{ -+ ELAN_POSITION *pos = &rail->Position; -+ unsigned int nodeid = pos->pos_nodeid; -+ unsigned int invalid = 0; -+ unsigned int changed = 0; -+ int lvl, slvl; -+ -+ if (! PositionCheck) -+ return; -+ -+ if (rail->Operations.CheckPosition(rail)) /* is update ready for this rail */ -+ { -+ EPRINTF2 (DBG_ROUTETABLE, "%s: check position: SwitchProbeLevel=%d\n", rail->Name, rail->SwitchProbeLevel); -+ -+ for (lvl = 0, slvl = pos->pos_levels-1; lvl <= rail->SwitchProbeLevel; lvl++, slvl--) -+ { -+ EP_SWITCHSTATE *state = &rail->SwitchState[lvl]; -+ EP_SWITCHSTATE *lstate = &rail->SwitchLast[lvl]; -+ unsigned int ndown = pos->pos_arity[slvl]; -+ unsigned int upmask = (0xFF << ndown) & 0xFF; -+ unsigned int mylink = nodeid % ndown; -+ unsigned int error = 0; -+ unsigned int binval = 0; -+ -+ nodeid /= ndown; -+ -+ /* -+ * broadcast top is invalid if it is not set to the number of downlinks-1, -+ * or at the topmost level it is less than ndown-1. -+ */ -+ if (BCAST_TOP_INVALID(lvl, state->bcast, ndown) || (state->LNR & upmask) == upmask) -+ { -+ /* no way up from here - we'd better be at the top */ -+ if (lvl != (pos->pos_levels-1)) -+ { -+ if (state->bcast != (ndown-1)) -+ printk ("%s: invalid broadcast top %d at level %d\n", rail->Name, state->bcast, lvl); -+ else if ((state->LNR & upmask) == upmask && (lstate->LNR & upmask) == upmask) -+ printk ("%s: no way up to switch at level %d (turned off ?)\n", rail->Name, lvl+1); -+ } -+ else -+ { -+ if (state->linkid != mylink) -+ printk ("%s: moved at top level was connected to link %d now connected to %d\n", rail->Name, mylink, state->linkid); -+ } -+ -+ if (state->linkid != mylink) -+ error++; -+ -+ if (BCAST_TOP_INVALID (lvl, state->bcast, ndown)) -+ binval++; -+ } -+ else -+ { -+ if (state->linkid != mylink) -+ { -+ if (state->linkid != rail->SwitchLast[lvl].linkid) -+ printk ("%s: moved at lvl %d was connected to link %d now connected to %d\n", rail->Name, lvl, mylink, state->linkid); -+ -+ error++; -+ } -+ } -+ -+ if (error == 0 && invalid == 0) -+ rail->SwitchProbeTick[lvl] = lbolt; -+ -+ EPRINTF10 (DBG_ROUTETABLE, "%s: lvl=%d (slvl=%d) linkid=%d bcast=%d lnr=%02x uplink=%d : error=%d binval=%d invalid=%d\n", -+ rail->Name, lvl, slvl, state->linkid, state->bcast, state->LNR, state->uplink, error, binval, invalid); -+ -+ invalid |= (error | binval); -+ } -+ -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ if (rail->SwitchState[lvl].uplink != rail->SwitchLast[lvl].uplink) -+ changed++; -+ -+ if (changed) -+ { -+ printk ("%s: broadcast tree has changed from", rail->Name); -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ printk ("%c%d", lvl == 0 ? ' ' : ',', rail->SwitchLast[lvl].uplink); -+ -+ for (lvl = 0; lvl < rail->SwitchProbeLevel; lvl++) -+ printk ("%s%d", lvl == 0 ? " to " : ",", rail->SwitchState[lvl].uplink); -+ printk ("\n"); -+ } -+ -+ if (rail->SwitchProbeLevel > 0) -+ bcopy (rail->SwitchState, rail->SwitchLast, rail->SwitchProbeLevel * sizeof (EP_SWITCHSTATE)); -+ } -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ EPRINTF4 (DBG_ROUTETABLE, "%s: level %d lbolt=%lx ProbeLevelTick=%lx\n", -+ rail->Name, lvl, lbolt, rail->SwitchProbeTick[lvl]); -+ -+ if (AFTER (lbolt, rail->SwitchProbeTick[lvl] + EP_POSITION_TIMEOUT)) -+ { -+ if (lvl < rail->SwitchBroadcastLevel+1) -+ { -+ if (lvl == 0) -+ printk ("%s: cable disconnected\n", rail->Name); -+ else -+ printk ("%s: broadcast level has dropped to %d (should be %d)\n", -+ rail->Name, lvl, rail->Position.pos_levels); -+ } -+ break; -+ } -+ } -+ -+ if (lvl > rail->SwitchBroadcastLevel+1) -+ { -+ if (rail->SwitchBroadcastLevel < 0) -+ printk ("%s: cable reconnected\n", rail->Name); -+ if (lvl == rail->Position.pos_levels) -+ printk ("%s: broadcast level has recovered\n", rail->Name); -+ else -+ printk ("%s: broadcast level has recovered to %d (should be %d)\n", -+ rail->Name, lvl, rail->Position.pos_levels); -+ } -+ -+ if (rail->SwitchBroadcastLevel != (lvl - 1)) -+ { -+ EPRINTF2 (DBG_ROUTETABLE, "%s: setting SwitchBroadcastLevel to %d\n", rail->Name, lvl-1); -+ -+ rail->SwitchBroadcastLevel = lvl - 1; -+ rail->SwitchBroadcastLevelTick = lbolt; -+ } -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/probenetwork_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork_elan3.c 2005-07-28 14:52:52.898671136 -0400 -@@ -0,0 +1,298 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan3.c,v 1.40 2004/04/15 12:30:08 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+#include -+ -+static void ep3_probe_event (EP3_RAIL *rail, void *arg); -+static EP3_COOKIE_OPS ep3_probe_ops = -+{ -+ ep3_probe_event -+} ; -+ -+int -+ep3_init_probenetwork (EP3_RAIL *rail) -+{ -+ sdramaddr_t stack; -+ E3_Addr sp; -+ E3_BlockCopyEvent event; -+ int i; -+ -+ if (! (stack = ep_alloc_elan (&rail->Generic, EP3_STACK_SIZE, 0, &rail->ProbeStack))) -+ return -ENOMEM; -+ -+ spin_lock_init (&rail->ProbeLock); -+ kcondvar_init (&rail->ProbeWait); -+ -+ /* Initialise the probe command structure */ -+ for (i = 0; i < TR_TRACEROUTE_ENTRIES; i++) -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[i]), 0); -+ for (i = 0; i < TR_TRACEROUTE_ENTRIES; i++) -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[i]), 1); -+ -+ RegisterCookie (&rail->CookieTable, &rail->ProbeCookie, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeDone), &ep3_probe_ops, rail); -+ -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeStart.ev_Type), 0); -+ elan3_sdram_writel (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeStart.ev_Count), 0); -+ -+ EP3_INIT_COPY_EVENT (event, rail->ProbeCookie, rail->RailMainAddr + offsetof (EP3_RAIL_MAIN, ProbeDone), 1); -+ elan3_sdram_copyl_to_sdram (rail->Device, &event, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeDone), sizeof (E3_BlockCopyEvent)); -+ -+ rail->RailMain->ProbeDone = EP3_EVENT_FREE; -+ -+ sp = ep3_init_thread (rail->Device, ep_symbol (&rail->ThreadCode, "kcomm_probe"), -+ rail->ProbeStack, stack, EP3_STACK_SIZE, -+ 3, rail->CommandPortAddr, rail->RailElanAddr, rail->RailMainAddr); -+ -+ IssueRunThread (rail, sp); -+ -+ return 0; -+} -+ -+void -+ep3_destroy_probenetwork (EP3_RAIL *rail) -+{ -+ if (rail->ProbeStack == (sdramaddr_t) 0) -+ return; -+ -+ /* XXXX: ensure that the network probe thread is stopped */ -+ -+ DeregisterCookie (&rail->CookieTable, &rail->ProbeCookie); -+ -+ kcondvar_destroy (&rail->ProbeWait); -+ spin_lock_destroy (&rail->ProbeLock); -+ -+ ep_free_elan (&rail->Generic, rail->ProbeStack, EP3_STACK_SIZE); -+} -+ -+static void -+ep3_probe_event (EP3_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ProbeLock, flags); -+ rail->ProbeDone = 1; -+ kcondvar_wakeupone (&rail->ProbeWait, &rail->ProbeLock); -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+} -+ -+int -+ep3_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_RAIL_MAIN *railMain = rail->RailMain; -+ sdramaddr_t railElan = rail->RailElan; -+ E3_uint16 flits[MAX_FLITS]; -+ E3_uint32 result; -+ int nflits; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->ProbeLock, flags); -+ -+ nflits = GenerateProbeRoute ( flits, nodeid, level, linkup, linkdown, 0); -+ -+ if (LoadRoute (rail->Device, rail->RouteTable, EP_VP_PROBE(level), ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ { -+ EPRINTF0 (DBG_ROUTETABLE, "ProbeRoute: cannot load route entry\n"); -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+ return (EINVAL); -+ } -+ -+ do { -+ /* Initialise the probe source to include our partially computed nodeid */ -+ elan3_sdram_writew (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[TR_TRACEROUTE_ENTRIES-1]), nodeid); -+ elan3_sdram_writew (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[TR_TRACEROUTE_ENTRIES-1]), nodeid); -+ -+ /* Initialise the count result etc */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeType), PROBE_SINGLE); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeLevel), level); -+ -+ railMain->ProbeResult = -1; -+ -+ /* Clear the receive area */ -+ bzero (railMain->ProbeDest0, sizeof (railMain->ProbeDest0)); -+ bzero (railMain->ProbeDest1, sizeof (railMain->ProbeDest1)); -+ -+ /* Re-arm the completion event */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Count), 1); -+ railMain->ProbeDone = EP3_EVENT_ACTIVE; -+ rail->ProbeDone = 0; -+ -+ /* And wakeup the thread to do the probe */ -+ IssueSetevent (rail, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeStart)); -+ -+ /* Now wait for it to complete */ -+ while (! rail->ProbeDone) -+ kcondvar_wait (&rail->ProbeWait, &rail->ProbeLock, &flags); -+ -+ /* wait for block copy event to flush write buffers */ -+ while (! EP3_EVENT_FIRED (rail->ProbeCookie, railMain->ProbeDone)) -+ if (! EP3_EVENT_FIRING(rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone), rail->ProbeCookie, railMain->ProbeDone)) -+ panic ("ProbeRoute: network probe event failure\n"); -+ -+ result = railMain->ProbeResult; -+ -+ if (result == C_ACK_ERROR) -+ kcondvar_timedwait (&rail->ProbeWait, &rail->ProbeLock, &flags, lbolt + (hz/8)); -+ -+ railMain->ProbeDone = EP3_EVENT_FREE; -+ -+ } while (result != C_ACK_OK && --attempts); -+ -+ if (result == C_ACK_OK) -+ { -+ if (railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != nodeid || -+ railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != nodeid) -+ { -+ printk ("%s: lost nodeid at level %d switch %d - %d != %d\n", rail->Generic.Name, level, sw, -+ railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level)+1) - 1], nodeid); -+ -+ result = C_ACK_ERROR; -+ } -+ else -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - level - 1]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - level - 1]; -+ -+ EPRINTF7 (DBG_PROBE, "%s: level %d switch %d - linkid=%d bcast=%d LNR=%02x%s\n", -+ rail->Generic.Name, level, sw, TR_TRACEROUTE0_LINKID(val0), -+ TR_TRACEROUTE1_BCAST_TOP(val1), TR_TRACEROUTE0_LNR(val0), -+ TR_TRACEROUTE0_REVID(val0) ? "" : " RevA Part"); -+ -+ lsw->lnr = TR_TRACEROUTE0_LNR(val0); -+ lsw->link = TR_TRACEROUTE0_LINKID(val0); -+ lsw->bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ lsw->invalid = (TR_TRACEROUTE0_REVID(val0) == 0); -+ } -+ } -+ spin_unlock_irqrestore (&rail->ProbeLock, flags); -+ -+ return (result == C_ACK_OK); -+} -+ -+void -+ep3_probe_position_found (EP3_RAIL *rail, ELAN_POSITION *pos) -+{ -+ E3_uint16 flits[MAX_FLITS]; -+ int lvl, nflits; -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ nflits = GenerateCheckRoute (pos, flits, pos->pos_levels - lvl - 1, 0); -+ -+ if (LoadRoute (rail->Device, rail->Ctxt->RouteTable, EP_VP_PROBE(lvl), ELAN3_MRF_CONTEXT_NUM|SYS_CONTEXT_BIT, nflits, flits) != 0) -+ panic ("ep3_probe_position_found: cannot load probe route entry\n"); -+ } -+ -+ /* Initialise the traceroute source data with our nodeid */ -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource0[TR_TRACEROUTE_ENTRIES-1]), pos->pos_nodeid); -+ elan3_sdram_writew (rail->Device, rail->RailElan + offsetof (EP3_RAIL_ELAN, ProbeSource1[TR_TRACEROUTE_ENTRIES-1]), pos->pos_nodeid); -+} -+ -+int -+ep3_check_position (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ EP3_RAIL_MAIN *railMain = rail->RailMain; -+ sdramaddr_t railElan = rail->RailElan; -+ ELAN_POSITION *pos = &rail->Generic.Position; -+ unsigned int level = rail->RailMain->ProbeLevel; -+ unsigned int updated = EP3_EVENT_FIRED (rail->ProbeCookie, railMain->ProbeDone); -+ unsigned int lvl; -+ -+ if (updated) -+ { -+ if (railMain->ProbeResult != C_ACK_OK) -+ { -+ EPRINTF2 (DBG_PROBE, "%s: CheckNetworkPosition: packet nacked result=%d\n", rail->Generic.Name, railMain->ProbeResult); -+ -+ rail->Generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - 2*(level+1)]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - 2*(level+1)]; -+ -+ if (val0 != pos->pos_nodeid || val1 != pos->pos_nodeid) -+ { -+ static unsigned long printed = 0; -+ -+ /* We've received a packet from another node - this probably means -+ * that we've moved */ -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: ep3_check_position - level %d lost nodeid\n", rail->Generic.Name, level); -+ printed = lbolt; -+ } -+ -+ rail->Generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ for (lvl = 0; lvl <= level; lvl++) -+ { -+ E3_uint16 val0 = railMain->ProbeDest0[TR_TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ E3_uint16 val1 = railMain->ProbeDest1[TR_TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ -+ rail->Generic.SwitchState[lvl].linkid = TR_TRACEROUTE0_LINKID(val0); -+ rail->Generic.SwitchState[lvl].LNR = TR_TRACEROUTE0_LNR(val0); -+ rail->Generic.SwitchState[lvl].bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ rail->Generic.SwitchState[lvl].uplink = 4; -+ -+ EPRINTF5 (DBG_PROBE, " --- lvl %d: linkid=%d LNR=%x bcast=%d uplink=%d\n", lvl, rail->Generic.SwitchState[lvl].linkid, -+ rail->Generic.SwitchState[lvl].LNR, rail->Generic.SwitchState[lvl].bcast ,rail->Generic.SwitchState[lvl].uplink); -+ } -+ rail->Generic.SwitchProbeLevel = level; -+ } -+ } -+ -+ railMain->ProbeDone = EP3_EVENT_FREE; -+ } -+ -+ if (railMain->ProbeDone == EP3_EVENT_FREE) -+ { -+ if (rail->Generic.SwitchBroadcastLevel == rail->Generic.Position.pos_levels-1) -+ level = rail->Generic.Position.pos_levels - 1; -+ else -+ level = rail->Generic.SwitchBroadcastLevel + 1; -+ -+ EPRINTF2 (DBG_PROBE, "%s: ep3_check_postiion: level %d\n", rail->Generic.Name, level); -+ -+ /* Initialise the count result etc */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeType), PROBE_MULTIPLE); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeLevel), level); -+ -+ railMain->ProbeResult = -1; -+ railMain->ProbeLevel = -1; -+ -+ /* Clear the receive area */ -+ bzero (railMain->ProbeDest0, sizeof (railMain->ProbeDest0)); -+ bzero (railMain->ProbeDest1, sizeof (railMain->ProbeDest1)); -+ -+ /* Re-arm the completion event */ -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Type), EV_TYPE_BCOPY); -+ elan3_sdram_writel (rail->Device, railElan + offsetof (EP3_RAIL_ELAN, ProbeDone.ev_Count), 1); -+ -+ railMain->ProbeDone = EP3_EVENT_ACTIVE; -+ -+ IssueSetevent (rail, rail->RailElanAddr + offsetof (EP3_RAIL_ELAN, ProbeStart)); -+ } -+ -+ return updated; -+} -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork_elan3_thread.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/probenetwork_elan3_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork_elan3_thread.c 2005-07-28 14:52:52.899670984 -0400 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan3_thread.c,v 1.19 2004/03/24 11:32:56 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan3_thread.c,v $*/ -+ -+#include -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+ -+static int -+kcomm_probe_vp (EP3_RAIL_ELAN *railElan, EP3_RAIL_MAIN *railMain, int vp, int attempts, int timeouts) -+{ -+ int rc; -+ -+ /* Since we use %g1 to hold the "rxd" so the trap handler can -+ * complete the envelope processing - we pass zero to indicate we're -+ * not a receiver thread */ -+ asm volatile ("mov %g0, %g1"); -+ -+ while (attempts && timeouts) -+ { -+ c_open (vp); -+ c_sendmem (TR_TRACEROUTE, &railMain->ProbeDest0, &railElan->ProbeSource0); -+ c_sendmem (TR_TRACEROUTE, &railMain->ProbeDest1, &railElan->ProbeSource1); -+ c_sendtrans0 (TR_SENDACK | TR_SETEVENT, (E3_Addr) 0); -+ -+ switch (rc = c_close()) -+ { -+ case C_ACK_OK: -+ return (C_ACK_OK); -+ -+ case C_ACK_DISCARD: -+ attempts--; -+ break; -+ -+ default: /* output timeout */ -+ timeouts--; -+ } -+ -+ c_break_busywait(); -+ } -+ -+ return (timeouts == 0 ? C_ACK_ERROR : C_ACK_DISCARD); -+} -+ -+void -+kcomm_probe (E3_CommandPort *cport, EP3_RAIL_ELAN *railElan, EP3_RAIL_MAIN *railMain) -+{ -+ int level; -+ -+ for (;;) -+ { -+ c_waitevent (&railElan->ProbeStart, 1); -+ -+ switch (railElan->ProbeType) -+ { -+ case PROBE_SINGLE: -+ railMain->ProbeResult = kcomm_probe_vp (railElan, railMain, EP_VP_PROBE(railElan->ProbeLevel), -+ PROBE_SINGLE_ATTEMPTS, PROBE_SINGLE_TIMEOUTS); -+ -+ cport->SetEvent = (E3_Addr) &railElan->ProbeDone; -+ break; -+ -+ case PROBE_MULTIPLE: -+ for (level = railElan->ProbeLevel; level >= 0; level--) -+ { -+ if (kcomm_probe_vp (railElan, railMain, EP_VP_PROBE(level), -+ PROBE_MULTIPLE_ATTEMPTS, PROBE_MULTIPLE_TIMEOUTS) == C_ACK_OK) -+ { -+ railMain->ProbeLevel = level; -+ railMain->ProbeResult = C_ACK_OK; -+ break; -+ } -+ -+ c_break_busywait(); -+ } -+ cport->SetEvent = (E3_Addr) &railElan->ProbeDone; -+ break; -+ } -+ -+ } -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/probenetwork_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/probenetwork_elan4.c 2005-07-28 14:52:52.900670832 -0400 -@@ -0,0 +1,396 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: probenetwork_elan4.c,v 1.9 2004/08/19 11:05:03 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/probenetwork_elan4.c,v $*/ -+ -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+#include -+ -+static void -+probe_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_probe_lock, flags); -+ rail->r_probe_done = 1; -+ kcondvar_wakeupone (&rail->r_probe_wait, &rail->r_probe_lock); -+ spin_unlock_irqrestore (&rail->r_probe_lock, flags); -+} -+ -+int -+ep4_probe_init (EP4_RAIL *rail) -+{ -+ spin_lock_init (&rail->r_probe_lock); -+ kcondvar_init (&rail->r_probe_wait); -+ -+ rail->r_probe_cq = ep4_alloc_ecq (rail, CQ_Size1K); -+ -+ if (rail->r_probe_cq == NULL) -+ return -ENOMEM; -+ -+ ep4_register_intcookie (rail, &rail->r_probe_intcookie, rail->r_elan_addr, probe_interrupt, rail); -+ -+ return 0; -+} -+ -+void -+ep4_probe_destroy (EP4_RAIL *rail) -+{ -+ if (rail->r_probe_cq) -+ ep4_free_ecq (rail, rail->r_probe_cq); -+ -+ if (rail->r_probe_intcookie.int_arg == NULL) -+ return; -+ ep4_deregister_intcookie (rail, &rail->r_probe_intcookie); -+ -+ kcondvar_destroy (&rail->r_probe_wait); -+ spin_lock_destroy (&rail->r_probe_lock); -+} -+ -+#define LINKDOWN(nodeid, level) ((nodeid >> (level << 1)) & 3) -+#define PROBE_PATTERN0(nodeid) (0xaddebabe ^ nodeid) -+#define PROBE_PATTERN1(nodeid) (0xfeedbeef ^ nodeid) -+ -+#define EP4_PROBE_RETRIES 4 -+ -+int -+ep4_probe_route (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, int *linkdown, int attempts, EP_SWITCH *lsw) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ EP4_RAIL_MAIN *rmain = rail->r_main; -+ E4_uint16 first = 0; -+ int rb = 0; -+ -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ E4_VirtualProcessEntry route; -+ unsigned long flags; -+ int i; -+ -+ for (i = 0; i < ROUTE_NUM_PACKED; i++) -+ packed[i] = 0; -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < level; i++) -+ if (first == 0) -+ first = linkup ? FIRST_ROUTE(linkup[i]) : FIRST_ADAPTIVE; -+ else -+ packed[rb++] = linkup ? PACKED_ROUTE(linkup[i]) : PACKED_ADAPTIVE; -+ -+ /* Generate a "to-me" route down */ -+ if (first == 0) -+ first = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ /* Generate the "down" routes */ -+ for (i = level-1; i >= 0; i--) -+ packed[rb++] = linkdown ? PACKED_ROUTE(linkdown[i]) : PACKED_ROUTE(LINKDOWN(nodeid, i)); -+ -+ /* Pack up the routes into the virtual process entry */ -+ route.Values[0] = first | FIRST_HIGH_PRI | FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3); -+ route.Values[1] = ROUTE_CTXT_VALUE(ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ route.Values[0] |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ route.Values[1] |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ elan4_write_route (rail->r_ctxt.ctxt_dev, rail->r_routetable, EP_VP_PROBE(level), &route); -+ -+ while (attempts--) -+ { -+ rail->r_probe_done = 0; -+ -+ /* generate the STEN packet - note we use a datatype of dword as we're copying to elan in dwords -+ * NB - no flow control is required, since the max packet size is less than the command queue -+ * size and it's dedicated for network probing. -+ */ -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_nop_cmd (rail->r_probe_cq->ecq_cq, 0); -+ -+ elan4_open_packet (rail->r_probe_cq->ecq_cq, OPEN_STEN_PKT_CMD | OPEN_PACKET(0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_PROBE(level))); -+ elan4_sendtransn (rail->r_probe_cq->ecq_cq, TR_TRACEROUTE(TRACEROUTE_NDWORDS), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest0), -+ 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, -+ 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull, 0x0000000000000000ull | ((E4_uint64)PROBE_PATTERN0(nodeid) << 32)); -+ elan4_sendtransn (rail->r_probe_cq->ecq_cq, TR_TRACEROUTE(TRACEROUTE_NDWORDS), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest1), -+ 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, -+ 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000100000001ull, 0x0000000000000001ull | ((E4_uint64)PROBE_PATTERN1(nodeid) << 32)); -+ elan4_sendtrans0 (rail->r_probe_cq->ecq_cq, TR_NOP_TRANS | TR_LAST_AND_SEND_ACK, 0); -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_write_dword_cmd (rail->r_probe_cq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_result), EP4_STATE_FINISHED); -+ -+ elan4_guard (rail->r_probe_cq->ecq_cq, GUARD_CHANNEL(1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_write_dword_cmd (rail->r_probe_cq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_result), EP4_STATE_FAILED); -+ -+ elan4_interrupt_cmd (rail->r_probe_cq->ecq_cq, rail->r_probe_intcookie.int_val); -+ -+ spin_lock_irqsave (&rail->r_probe_lock, flags); -+ while (! rail->r_probe_done) -+ kcondvar_wait (&rail->r_probe_wait, &rail->r_probe_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_probe_lock, flags); -+ -+ if (rmain->r_probe_result == EP4_STATE_FINISHED) -+ { -+ if (rmain->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != PROBE_PATTERN0(nodeid) || -+ rmain->r_probe_dest1[TRACEROUTE_ENTRIES - ((2*level)+1) - 1] != PROBE_PATTERN1(nodeid)) -+ { -+ printk ("%s: lost nodeid at level %d switch %d - %d != %d\n", rail->r_generic.Name, level, sw, -+ rmain->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level)+1) - 1], PROBE_PATTERN0(nodeid)); -+ } -+ else -+ { -+ E4_uint32 val0 = rmain->r_probe_dest0[TRACEROUTE_ENTRIES - level - 1]; -+ E4_uint32 val1 = rmain->r_probe_dest1[TRACEROUTE_ENTRIES - level - 1]; -+ -+ lsw->lnr = TR_TRACEROUTE0_LNR(val0); -+ lsw->link = TR_TRACEROUTE0_LINKID(val0); -+ lsw->bcast = TR_TRACEROUTE1_BCAST_TOP(val1); -+ lsw->invalid = 0; -+ -+ return 1; -+ } -+ } -+ -+ rmain->r_probe_result = EP4_STATE_FREE; -+ } -+ -+ return 0; -+} -+ -+ -+void -+ep4_probe_position_found (EP4_RAIL *rail, ELAN_POSITION *pos) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ int lvl; -+ -+ for (lvl = 0; lvl < pos->pos_levels; lvl++) -+ { -+ /* Initialise the "probe" route to use the broadcast tree */ -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ unsigned char *arityp = &pos->pos_arity[pos->pos_levels - 1]; -+ unsigned int spanned = *arityp; -+ E4_uint16 first = 0; -+ int rb = 0; -+ -+ E4_uint8 packed[ROUTE_NUM_PACKED]; -+ E4_VirtualProcessEntry route; -+ int i; -+ -+ for (i = 0; i < ROUTE_NUM_PACKED; i++) -+ packed[i] = 0; -+ -+ /* Generate "up" routes */ -+ for (i = 0; i < lvl; i++, spanned *= *(--arityp)) -+ { -+ if (first == 0) -+ first = FIRST_BCAST_TREE; -+ else -+ packed[rb++] = PACKED_BCAST_TREE; -+ } -+ -+ /* Generate a "to-me" route down */ -+ if (first == 0) -+ first = FIRST_MYLINK; -+ else -+ packed[rb++] = PACKED_MYLINK; -+ -+ spanned /= *arityp++; -+ -+ /* Generate the "down" routes */ -+ for (i = lvl-1; i >= 0; i--) -+ { -+ spanned /= *arityp; -+ packed[rb++] = PACKED_ROUTE((pos->pos_nodeid / spanned) % *arityp); -+ arityp++; -+ } -+ -+ -+ /* Pack up the routes into the virtual process entry */ -+ route.Values[0] = first | FIRST_HIGH_PRI | FIRST_SYSTEM_PACKET | FIRST_TIMEOUT(3); -+ route.Values[1] = ROUTE_CTXT_VALUE(ELAN4_KCOMM_CONTEXT_NUM); -+ -+ for (i = 0; i < (ROUTE_NUM_PACKED >> 1); i++) -+ { -+ route.Values[0] |= ((E4_uint64) packed[i]) << ((i << 2) + ROUTE_PACKED_OFFSET); -+ route.Values[1] |= ((E4_uint64) packed[i+(ROUTE_NUM_PACKED >> 1)]) << ((i << 2)); -+ } -+ -+ elan4_write_route (rail->r_ctxt.ctxt_dev, rail->r_routetable, EP_VP_PROBE(lvl), &route); -+ -+ /* Initialise "start" event for this level */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_CHECK_STEN_NDWORDS)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CopySource), -+ rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl])); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_start[lvl].ev_CopyDest), -+ rail->r_probe_cq->ecq_addr); -+ -+ /* Initiailise command stream - reset the start event */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_reset_event_cmd), -+ WRITE_DWORD_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[lvl]))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_reset_event_value), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, EP4_CHECK_STEN_NDWORDS)); -+ -+ /* Initiailise command stream - sten traceroute packet */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_open), -+ OPEN_STEN_PKT_CMD | OPEN_PACKET (0, PACK_OK | RESTART_COUNT_ZERO, EP_VP_PROBE(lvl))); -+ -+ /* Initiailise command stream - traceroute 0 */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_traceroute0), -+ SEND_TRANS_CMD | (TR_TRACEROUTE(TRACEROUTE_NDWORDS) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_traceroute0), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest0)); -+ for (i = 0; i < (TRACEROUTE_NDWORDS-1); i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute0[i]), -+ 0x0000000000000000ull); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute0[i]), -+ 0x0000000000000000ull | ((E4_uint64) PROBE_PATTERN0(pos->pos_nodeid) << 32)); -+ -+ /* Initiailise command stream - traceroute 1 */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_traceroute1), -+ SEND_TRANS_CMD | (TR_TRACEROUTE(TRACEROUTE_NDWORDS) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_traceroute1), -+ rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_dest1)); -+ for (i = 0; i < (TRACEROUTE_NDWORDS-1); i++) -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute1[i]), -+ 0x0000000100000001ull); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_data_traceroute1[i]), -+ 0x0000000000000001ull | ((E4_uint64) PROBE_PATTERN1(pos->pos_nodeid) << 32)); -+ -+ /* Initiailise command stream - null sendack */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_trans_sendack), -+ SEND_TRANS_CMD | ((TR_NOP_TRANS | TR_LAST_AND_SEND_ACK) << 16)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_addr_sendack), -+ 0); -+ -+ /* Initiailise command stream - guard ok, write done */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_guard_ok), -+ GUARD_CMD | GUARD_CHANNEL(1) | GUARD_TEST(0, PACK_OK) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_writedword_ok), -+ WRITE_DWORD_CMD | (rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_level))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_value_ok), -+ lvl); -+ -+ /* Initiailise command stream - guard fail, chain to next or write done */ -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_guard_fail), -+ GUARD_CMD | GUARD_CHANNEL(1) | GUARD_TEST(0, RESTART_COUNT_ZERO) | GUARD_RESET(EP4_PROBE_RETRIES)); -+ -+ if (lvl > 0) -+ { -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_fail), -+ SET_EVENT_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[lvl-1]))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_nop), -+ NOP_CMD); -+ } -+ else -+ { -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_fail), -+ WRITE_DWORD_CMD | (rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_probe_level))); -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_setevent_nop), -+ EP4_PROBE_FAILED); -+ } -+ elan4_sdram_writeq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_check_sten[lvl].c_nop_pad), -+ NOP_CMD); -+ } -+ -+ -+ rail->r_main->r_probe_level = EP4_PROBE_ACTIVE; -+ -+ mb(); -+ ep4_set_event_cmd (rail->r_probe_cq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[pos->pos_levels-1])); -+} -+ -+int -+ep4_check_position (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ ELAN_POSITION *pos = &rail->r_generic.Position; -+ unsigned int level = rail->r_main->r_probe_level; -+ unsigned int lvl; -+ -+ EPRINTF2 (DBG_PROBE, "%s: ep4_check_position: level=%lld\n", rail->r_generic.Name, rail->r_main->r_probe_level); -+ -+ if (rail->r_main->r_probe_level != EP4_PROBE_ACTIVE) -+ { -+ if (rail->r_main->r_probe_level == EP4_PROBE_FAILED) -+ { -+ EPRINTF1 (DBG_PROBE, "%s: ep4_check_position: packets all nacked\n", rail->r_generic.Name); -+ -+ rail->r_generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ E4_uint32 val0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - 2*(level+1)]; -+ E4_uint32 val1 = rail->r_main->r_probe_dest1[TRACEROUTE_ENTRIES - 2*(level+1)]; -+ -+ if (val0 != PROBE_PATTERN0 (pos->pos_nodeid) || val1 != PROBE_PATTERN1 (pos->pos_nodeid)) -+ { -+ static unsigned long printed = 0; -+ -+ /* We've received a packet from another node - this probably means -+ * that we've moved */ -+ if ((lbolt - printed) > (HZ*10)) -+ { -+ printk ("%s: ep4_check_position - level %d lost nodeid\n", rail->r_generic.Name, level); -+ printed = lbolt; -+ } -+ -+ rail->r_generic.SwitchProbeLevel = -1; -+ } -+ else -+ { -+ for (lvl = 0 ; lvl <= level; lvl++) -+ { -+ E4_uint32 uval0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - lvl - 1]; -+ E4_uint32 dval0 = rail->r_main->r_probe_dest0[TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ E4_uint32 dval1 = rail->r_main->r_probe_dest1[TRACEROUTE_ENTRIES - ((2*level) - lvl + 1)]; -+ -+ rail->r_generic.SwitchState[lvl].linkid = TR_TRACEROUTE0_LINKID (dval0); -+ rail->r_generic.SwitchState[lvl].LNR = TR_TRACEROUTE0_LNR(dval0); -+ rail->r_generic.SwitchState[lvl].bcast = TR_TRACEROUTE1_BCAST_TOP (dval1); -+ rail->r_generic.SwitchState[lvl].uplink = TR_TRACEROUTE0_LINKID (uval0); -+ -+ EPRINTF5 (DBG_PROBE, " --- lvl %d: linkid=%d LNR=%x bcast=%d uplink=%d\n", lvl, rail->r_generic.SwitchState[lvl].linkid, -+ rail->r_generic.SwitchState[lvl].LNR, rail->r_generic.SwitchState[lvl].bcast ,rail->r_generic.SwitchState[lvl].uplink); -+ -+ } -+ -+ rail->r_generic.SwitchProbeLevel = level; -+ } -+ } -+ -+ rail->r_main->r_probe_level = EP4_PROBE_ACTIVE; -+ mb(); -+ -+ if (rail->r_generic.SwitchBroadcastLevel == rail->r_generic.Position.pos_levels-1) -+ level = rail->r_generic.Position.pos_levels - 1; -+ else -+ level = rail->r_generic.SwitchBroadcastLevel + 1; -+ -+ ep4_set_event_cmd (rail->r_probe_cq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_check_start[level])); -+ -+ return 1; -+ } -+ -+ return 0; -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/procfs_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/procfs_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/procfs_linux.c 2005-07-28 14:52:52.901670680 -0400 -@@ -0,0 +1,693 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: procfs_linux.c,v 1.53.2.4 2005/01/18 14:18:42 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/procfs_linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "cm.h" -+#include "debug.h" -+#include "conf_linux.h" -+#include -+#include -+#include -+ -+#include -+ -+struct proc_dir_entry *ep_procfs_root; -+struct proc_dir_entry *ep_config_root; -+ -+/* -+ * We provide a slightly "special" interface for /proc/elan/device%d/nodeset, -+ * so that it can be included in a "poll" system call. On each "read" on the -+ * file, we generate a new nodeset if a) the previous one has been completely -+ * read and b) if it has changed since it was generated. -+ * -+ * Unfortunately ... this doesn't allow "tail -f" to work, since this uses -+ * fstat() on the fd, as we only hold the last nodeset string, we could not -+ * handle the case where two processes were reading a different rates. -+ * We could maybe have implemented this as a "sliding window", so that we -+ * add a new nodeset string, when it has changed and someone reads past -+ * end of the last one. Then if someone read from before out "window" -+ * we would produce "padding" data. The problem with this, is that a -+ * simple "cat" on /proc/elan/device%d/nodeset will read the whole "file" -+ * which will be mostly padding ! -+ * -+ * Just to not that the purpose of this interface is: -+ * 1) to allow cat /proc/elan/device%d/nodeset to show the current -+ * nodeset. -+ * 2) to allow rms (or similar) to poll() on the file, and when the -+ * nodeset changes read a new one. -+ * -+ * so ... we don't bother solving the troublesome "tail -f" problem. -+ */ -+ -+typedef struct nodeset_private -+{ -+ struct nodeset_private *pr_next; -+ EP_RAIL *pr_rail; -+ unsigned pr_changed; -+ char *pr_page; -+ unsigned pr_off; -+ unsigned pr_len; -+} NODESET_PRIVATE; -+ -+NODESET_PRIVATE *ep_nodeset_list; -+wait_queue_head_t ep_nodeset_wait; -+spinlock_t ep_nodeset_lock; -+ -+static int -+proc_write_state(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "start") && rail->State == EP_RAIL_STATE_UNINITIALISED) -+ ep_start_rail (rail); -+ -+ if (! strcmp (tmpbuf, "stop") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ ep_stop_rail (rail); -+ -+ if (! strcmp (tmpbuf, "offline") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ cm_force_offline (rail, 1, CM_OFFLINE_PROCFS); -+ -+ if (! strcmp (tmpbuf, "online") && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ cm_force_offline (rail, 0, CM_OFFLINE_PROCFS); -+ -+ if (! strncmp (tmpbuf, "restart=", 8) && rail->State == EP_RAIL_STATE_RUNNING) -+ cm_restart_node (rail, simple_strtol (tmpbuf + 8, NULL, 0)); -+ -+ if (! strncmp (tmpbuf, "panic=", 6)) -+ ep_panic_node (rail->System, simple_strtol(tmpbuf + 6, NULL, 0), -+ strchr (tmpbuf, ',') ? strchr(tmpbuf, ',') + 1 : "remote panic request"); -+ -+ if (! strncmp (tmpbuf, "raise=", 6) && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ rail->Operations.RaiseFilter (rail, simple_strtol (tmpbuf + 6, NULL, 0)); -+ -+ if (! strncmp (tmpbuf, "lower=", 6) && rail->State > EP_RAIL_STATE_UNINITIALISED) -+ rail->Operations.LowerFilter (rail, simple_strtol (tmpbuf + 6, NULL, 0)); -+ -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_state(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ int len; -+ -+ switch (rail->State) -+ { -+ case EP_RAIL_STATE_UNINITIALISED: -+ len = sprintf (page, "uninitialised\n"); -+ break; -+ case EP_RAIL_STATE_STARTED: -+ len = sprintf (page, "started\n"); -+ break; -+ case EP_RAIL_STATE_RUNNING: -+ len = sprintf (page, "running NodeId=%d NumNodes=%d\n", rail->Position.pos_nodeid, rail->Position.pos_nodes); -+ break; -+ case EP_RAIL_STATE_INCOMPATIBLE: -+ len = sprintf (page, "incompatible NodeId=%d NumNodes=%d\n", rail->Position.pos_nodeid, rail->Position.pos_nodes); -+ break; -+ default: -+ len = sprintf (page, "\n"); -+ break; -+ } -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+static int -+proc_write_display(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "rail")) -+ DisplayRail (rail); -+ if (! strcmp (tmpbuf, "segs")) -+ DisplaySegs (rail); -+ if (! strcmp (tmpbuf, "nodes")) -+ DisplayNodes (rail); -+ if (! strcmp (tmpbuf, "status")) -+ DisplayStatus (rail); -+ if (! strcmp (tmpbuf, "debug") && rail->Operations.Debug) -+ rail->Operations.Debug (rail); -+ if (! strncmp (tmpbuf, "epcomms", 7)) -+ ep_comms_display (rail->System, tmpbuf[7] == '=' ? tmpbuf + 8 : NULL); -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_display(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len = sprintf (page, "\n"); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+ -+static int -+proc_read_stats(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ -+ if ( rail == NULL ) { -+ strcpy(page,"proc_read_stats rail=NULL\n"); -+ } else { -+ page[0] = 0; -+ ep_fillout_stats(rail, page); -+ rail->Operations.FillOutStats (rail, page); -+ } -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, strlen(page))); -+} -+ -+static int -+proc_read_devinfo(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ EP_RAIL *rail = (EP_RAIL *) data; -+ ELAN_DEVINFO *devinfo = &rail->Devinfo; -+ ELAN_POSITION *pos = &rail->Position; -+ char *p = page; -+ -+ switch (devinfo->dev_device_id) -+ { -+ case PCI_DEVICE_ID_ELAN3: -+ p += sprintf (p, "ep%d is elan3 %d rev %c\n", rail->Number, -+ devinfo->dev_instance, 'a' + devinfo->dev_revision_id); -+ break; -+ -+ case PCI_DEVICE_ID_ELAN4: -+ p += sprintf (p, "ep%d is elan4 %d rev %c\n", rail->Number, -+ devinfo->dev_instance, 'a' + devinfo->dev_revision_id); -+ break; -+ default: -+ p += sprintf (p, "ep%d is unkown %x/%x\n", rail->Number, devinfo->dev_vendor_id, devinfo->dev_device_id); -+ break; -+ } -+ -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ p += sprintf (p, "ep%d nodeid %d numnodes %d\n", rail->Number, pos->pos_nodeid, pos->pos_nodes); -+ -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, p - page)); -+} -+ -+static struct rail_info -+{ -+ char *name; -+ int (*read_func) (char *page, char **start, off_t off, int count, int *eof, void *data); -+ int (*write_func) (struct file *file, const char *buf, unsigned long count, void *data); -+} rail_info[] = { -+ {"state", proc_read_state, proc_write_state}, -+ {"display", proc_read_display, proc_write_display}, -+ {"stats", proc_read_stats, NULL}, -+ {"devinfo", proc_read_devinfo, NULL}, -+}; -+ -+static int -+nodeset_open (struct inode *inode, struct file *file) -+{ -+ NODESET_PRIVATE *pr; -+ -+ if ((pr = kmalloc (sizeof (NODESET_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_changed = 1; -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_page = NULL; -+ pr->pr_rail = (EP_RAIL *)( PDE(inode)->data ); -+ -+ spin_lock (&ep_nodeset_lock); -+ pr->pr_next = ep_nodeset_list; -+ ep_nodeset_list = pr; -+ spin_unlock (&ep_nodeset_lock); -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+static int -+nodeset_release (struct inode *inode, struct file *file) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ NODESET_PRIVATE **ppr; -+ -+ spin_lock (&ep_nodeset_lock); -+ for (ppr = &ep_nodeset_list; (*ppr) != pr; ppr = &(*ppr)->pr_next) -+ ; -+ (*ppr) = pr->pr_next; -+ spin_unlock (&ep_nodeset_lock); -+ -+ if (pr->pr_page) -+ free_page ((unsigned long) pr->pr_page); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+nodeset_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ EP_RAIL *rail = pr->pr_rail; -+ int error; -+ unsigned long flags; -+ -+ if (!pr->pr_changed && pr->pr_off >= pr->pr_len) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (pr->pr_page == NULL && (pr->pr_page = (char *) __get_free_page (GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ kmutex_lock (&rail->CallbackLock); -+ if (rail->State == EP_RAIL_STATE_RUNNING) -+ { -+ spin_lock_irqsave (&rail->System->NodeLock, flags); -+ ep_sprintf_bitmap (pr->pr_page, PAGESIZE, statemap_tobitmap(rail->NodeSet), 0, 0, rail->Position.pos_nodes); -+ spin_unlock_irqrestore (&rail->System->NodeLock, flags); -+ -+ if (rail->SwitchBroadcastLevel == -1) -+ strcat (pr->pr_page, ""); -+ else if (rail->SwitchBroadcastLevel < (rail->Position.pos_levels-1)) -+ sprintf (pr->pr_page + strlen (pr->pr_page), "<%d>", rail->SwitchBroadcastLevel); -+ strcat (pr->pr_page, "\n"); -+ } -+ else -+ strcpy (pr->pr_page, "\n"); -+ kmutex_unlock (&rail->CallbackLock); -+ -+ pr->pr_len = strlen (pr->pr_page); -+ pr->pr_off = 0; -+ pr->pr_changed = 0; -+ } -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_page + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ if (pr->pr_off >= pr->pr_len) -+ { -+ free_page ((unsigned long) pr->pr_page); -+ pr->pr_page = NULL; -+ } -+ -+ return (count); -+} -+ -+static unsigned int -+nodeset_poll (struct file *file, poll_table *wait) -+{ -+ NODESET_PRIVATE *pr = (NODESET_PRIVATE *) file->private_data; -+ -+ poll_wait (file, &ep_nodeset_wait, wait); -+ if (pr->pr_changed || pr->pr_off < pr->pr_len) -+ return (POLLIN | POLLRDNORM); -+ return (0); -+} -+ -+static void -+nodeset_callback (void *arg, statemap_t *map) -+{ -+ EP_RAIL *rail = (EP_RAIL *) arg; -+ NODESET_PRIVATE *pr; -+ -+ ep_display_bitmap (rail->Name, "Nodeset", statemap_tobitmap(map), 0, ep_numnodes(rail->System)); -+ -+ spin_lock (&ep_nodeset_lock); -+ for (pr = ep_nodeset_list; pr; pr = pr->pr_next) -+ if (pr->pr_rail == rail) -+ pr->pr_changed = 1; -+ spin_unlock (&ep_nodeset_lock); -+ -+ wake_up_interruptible (&ep_nodeset_wait); -+} -+ -+void -+proc_character_fill (long mode, char *fmt, ...) -+{ -+ int len; -+ va_list ap; -+ PROC_PRIVATE *private = (PROC_PRIVATE *)mode; -+ -+ /* is the buffer already full */ -+ if (private->pr_len >= private->pr_data_len) -+ return; -+ -+ /* attempt to fill up to the remaining space */ -+ va_start (ap, fmt); -+ len = vsnprintf ( & private->pr_data[private->pr_len], (private->pr_data_len - private->pr_len), fmt, ap); -+ va_end (ap); -+ -+ if (len < 0 ) -+ { -+ /* we have reached the end of buffer and need to fail all future writes -+ * the caller can check (pr_len >= pr_data_len) and recall with more space -+ */ -+ private->pr_len = private->pr_data_len; -+ return; -+ } -+ -+ /* move the length along */ -+ private->pr_len += len; -+} -+ -+int -+proc_release (struct inode *inode, struct file *file) -+{ -+ PROC_PRIVATE *pr = (PROC_PRIVATE *) file->private_data; -+ -+ if (pr->pr_data) -+ KMEM_FREE (pr->pr_data, pr->pr_data_len); -+ kfree (pr); -+ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+ssize_t -+proc_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ PROC_PRIVATE *pr = (PROC_PRIVATE *) file->private_data; -+ int error; -+ -+ if (pr->pr_off >= pr->pr_len) -+ return (0); -+ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if (count >= (pr->pr_len - pr->pr_off)) -+ count = pr->pr_len - pr->pr_off; -+ -+ copy_to_user (buf, pr->pr_data + pr->pr_off, count); -+ -+ pr->pr_off += count; -+ *ppos += count; -+ -+ return (count); -+} -+ -+static int -+proc_open (struct inode *inode, struct file *file) -+{ -+ PROC_PRIVATE *pr; -+ CM_RAIL *cmRail; -+ int pages = 4; -+ unsigned long flags; -+ -+ if ((pr = kmalloc (sizeof (PROC_PRIVATE), GFP_KERNEL)) == NULL) -+ return (-ENOMEM); -+ -+ pr->pr_rail = (EP_RAIL *)(PDE(inode)->data); -+ -+ do { -+ pr->pr_data_len = PAGESIZE * pages; -+ -+ KMEM_ZALLOC (pr->pr_data, char *, pr->pr_data_len, 1); -+ if (pr->pr_data == NULL) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Out of Memory\n"); -+ break; -+ } -+ -+ pr->pr_off = 0; -+ pr->pr_len = 0; -+ pr->pr_data[0] = 0; -+ -+ if (pr->pr_rail->State != EP_RAIL_STATE_RUNNING) -+ { -+ pr->pr_len = sprintf (pr->pr_data, "Rail not Running\n"); -+ break; -+ } -+ else -+ { -+ pr->pr_di.func = proc_character_fill; -+ pr->pr_di.arg = (long)pr; -+ -+ if (!strcmp("maps", file->f_dentry->d_iname)) -+ { -+ cmRail = pr->pr_rail->ClusterRail; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ DisplayNodeMaps (&pr->pr_di, cmRail); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ if (!strcmp("segs", file->f_dentry->d_iname)) -+ { -+ cmRail = pr->pr_rail->ClusterRail; -+ -+ spin_lock_irqsave (&cmRail->Lock, flags); -+ DisplayNodeSgmts (&pr->pr_di, cmRail); -+ spin_unlock_irqrestore (&cmRail->Lock, flags); -+ } -+ -+ if (!strcmp("tree", file->f_dentry->d_iname)) -+ DisplayRailDo (&pr->pr_di, pr->pr_rail); -+ } -+ -+ if ( pr->pr_len < pr->pr_data_len) -+ break; /* we managed to get all the output into the buffer */ -+ -+ pages++; -+ KMEM_FREE ( pr->pr_data, pr->pr_data_len); -+ } while (1); -+ -+ -+ file->private_data = (void *) pr; -+ -+ MOD_INC_USE_COUNT; -+ return (0); -+} -+ -+struct file_operations proc_nodeset_operations = -+{ -+ read: nodeset_read, -+ poll: nodeset_poll, -+ open: nodeset_open, -+ release: nodeset_release, -+}; -+ -+struct file_operations proc_operations = -+{ -+ read: proc_read, -+ open: proc_open, -+ release: proc_release, -+}; -+ -+void -+ep_procfs_rail_init (EP_RAIL *rail) -+{ -+ struct proc_dir_entry *dir; -+ struct proc_dir_entry *p; -+ char name[10]; -+ int i; -+ -+ sprintf (name, "rail%d", rail->Number); -+ -+ if ((dir = rail->ProcDir = proc_mkdir (name, ep_procfs_root)) == NULL) -+ return; -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ { -+ if ((p = create_proc_entry (rail_info[i].name, 0, dir)) != NULL) -+ { -+ p->read_proc = rail_info[i].read_func; -+ p->write_proc = rail_info[i].write_func; -+ p->data = rail; -+ p->owner = THIS_MODULE; -+ } -+ } -+ -+ if ((p = create_proc_entry ("nodeset", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_nodeset_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ -+ rail->CallbackRegistered = 1; -+ ep_register_callback (rail, EP_CB_NODESET, nodeset_callback, rail); -+ } -+ -+ if ((p = create_proc_entry ("maps", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+ if ((p = create_proc_entry ("segs", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+ if ((p = create_proc_entry ("tree", 0, dir)) != NULL) -+ { -+ p->proc_fops = &proc_operations; -+ p->owner = THIS_MODULE; -+ p->data = rail; -+ } -+ -+} -+ -+void -+ep_procfs_rail_fini (EP_RAIL *rail) -+{ -+ struct proc_dir_entry *dir = rail->ProcDir; -+ char name[10]; -+ int i; -+ -+ if (dir == NULL) -+ return; -+ -+ if (rail->CallbackRegistered) -+ { -+ ep_remove_callback (rail, EP_CB_NODESET, nodeset_callback, rail); -+ -+ remove_proc_entry ("nodeset", dir); -+ } -+ -+ remove_proc_entry ("maps", dir); -+ remove_proc_entry ("segs", dir); -+ remove_proc_entry ("tree", dir); -+ -+ for (i = 0; i < sizeof (rail_info)/sizeof (rail_info[0]); i++) -+ remove_proc_entry (rail_info[i].name, dir); -+ -+ sprintf (name, "rail%d", rail->Number); -+ remove_proc_entry (name, ep_procfs_root); -+} -+ -+#include "quadrics_version.h" -+static char quadrics_version[] = QUADRICS_VERSION; -+ -+void -+ep_procfs_init() -+{ -+ extern int txd_stabilise; -+ extern int MaxSwitchLevels; -+ -+ spin_lock_init (&ep_nodeset_lock); -+ init_waitqueue_head (&ep_nodeset_wait); -+ -+ ep_procfs_root = proc_mkdir ("ep", qsnet_procfs_root); -+ ep_config_root = proc_mkdir ("config", ep_procfs_root); -+ -+ qsnet_proc_register_str (ep_procfs_root, "version", quadrics_version, 1); -+ -+ qsnet_proc_register_hex (ep_config_root, "epdebug", &epdebug, 0); -+ qsnet_proc_register_hex (ep_config_root, "epdebug_console", &epdebug_console, 0); -+ qsnet_proc_register_hex (ep_config_root, "epdebug_cmlevel", &epdebug_cmlevel, 0); -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ qsnet_proc_register_hex (ep_config_root, "epdebug_check_sum", &epdebug_check_sum, 0); -+#endif -+ qsnet_proc_register_hex (ep_config_root, "epcomms_forward_limit", &epcomms_forward_limit, 0); -+ qsnet_proc_register_int (ep_config_root, "txd_stabilise", &txd_stabilise, 0); -+ qsnet_proc_register_int (ep_config_root, "assfail_mode", &assfail_mode, 0); -+ qsnet_proc_register_int (ep_config_root, "max_switch_levels", &MaxSwitchLevels, 1); -+ -+ ep_procfs_rcvr_xmtr_init(); -+} -+ -+void -+ep_procfs_fini(void) -+{ -+ ep_procfs_rcvr_xmtr_fini(); -+ -+ remove_proc_entry ("max_switch_levels", ep_config_root); -+ remove_proc_entry ("assfail_mode", ep_config_root); -+ remove_proc_entry ("txd_stabilise", ep_config_root); -+ remove_proc_entry ("epcomms_forward_limit", ep_config_root); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ remove_proc_entry ("epdebug_check_sum", ep_config_root); -+#endif -+ remove_proc_entry ("epdebug_cmlevel", ep_config_root); -+ remove_proc_entry ("epdebug_console", ep_config_root); -+ remove_proc_entry ("epdebug", ep_config_root); -+ -+ remove_proc_entry ("version", ep_procfs_root); -+ -+ remove_proc_entry ("config", ep_procfs_root); -+ remove_proc_entry ("ep", qsnet_procfs_root); -+ -+ spin_lock_destroy (&ep_nodeset_lock); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/quadrics_version.h 2005-07-28 14:52:52.901670680 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/railhints.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/railhints.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/railhints.c 2005-07-28 14:52:52.902670528 -0400 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: railhints.c,v 1.5 2004/02/06 22:37:06 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/railhints.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include "debug.h" -+ -+int -+ep_pickRail(EP_RAILMASK railmask) -+{ -+ static volatile int lastGlobal; -+ int i, rnum, last = lastGlobal; -+ -+ /* Pick a single rail out of the railmask */ -+ for (i = 0; i < EP_MAX_RAILS; i++) -+ if (railmask & (1 << ((last + i) % EP_MAX_RAILS))) -+ break; -+ -+ if (i == EP_MAX_RAILS) -+ return (-1); -+ -+ rnum = (last + i) % EP_MAX_RAILS; -+ -+ lastGlobal = (rnum + 1) % EP_MAX_RAILS; -+ -+ ASSERT (railmask & (1 << rnum)); -+ -+ return (rnum); -+} -+ -+int -+ep_xmtr_bcastrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails) -+{ -+ /* Retrun a single rail out of allowed mask with the best connectivity for broadcast. */ -+ return (ep_pickRail (allowedRails & xmtr->RailMask)); -+} -+ -+int -+ep_xmtr_prefrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails, unsigned nodeId) -+{ -+ EP_NODE *node = &xmtr->Subsys->Subsys.Sys->Nodes[nodeId]; -+ -+ EPRINTF5 (DBG_XMTR, "ep_xmtr_prefrail: xmtr=%p allowedRails=%x nodeId=%d xmtr->RailMaks=%x Connected=%x\n", -+ xmtr, allowedRails, nodeId, xmtr->RailMask, node->ConnectedRails); -+ -+ /* Return a single rail which is currently connected to nodeId (limited to rails -+ * in allowedmask) - if more than one rail is possible, then round-robin between -+ * them */ -+ return (ep_pickRail (allowedRails & xmtr->RailMask & node->ConnectedRails)); -+} -+ -+EP_RAILMASK -+ep_xmtr_availrails (EP_XMTR *xmtr) -+{ -+ /* Return which rails can be used to transmit one. */ -+ -+ return (xmtr->RailMask); -+} -+ -+EP_RAILMASK -+ep_xmtr_noderails (EP_XMTR *xmtr, unsigned nodeId) -+{ -+ EP_NODE *node = &xmtr->Subsys->Subsys.Sys->Nodes[nodeId]; -+ -+ /* Return which rails can be used to transmit to this node. */ -+ -+ return (xmtr->RailMask & node->ConnectedRails); -+} -+ -+int -+ep_rcvr_prefrail (EP_RCVR *rcvr, EP_RAILMASK allowedRails) -+{ -+ /* Return the "best" rail for queueing a receive buffer out on - this will be a -+ * rail with ThreadWaiting set or the rail with the least descriptors queued -+ * on it. */ -+ -+ return (ep_pickRail (allowedRails & rcvr->RailMask)); -+} -+ -+EP_RAILMASK -+ep_rcvr_availrails (EP_RCVR *rcvr) -+{ -+ /* Return which rails can be used to queue receive buffers. */ -+ return (rcvr->RailMask); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/rmap.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/rmap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/rmap.c 2005-07-28 14:52:52.902670528 -0400 -@@ -0,0 +1,365 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: rmap.c,v 1.15 2004/05/19 10:24:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/rmap.c,v $ */ -+ -+#include -+#include -+ -+#include "debug.h" -+ -+void -+ep_display_rmap (EP_RMAP *mp) -+{ -+ EP_RMAP_ENTRY *bp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ ep_debugf (DBG_DEBUG, "map: %s size %d free %d\n", mp->m_name, mp->m_size, mp->m_free); -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ ep_debugf (DBG_DEBUG, " [%lx - %lx]\n", bp->m_addr, bp->m_addr+bp->m_size-1); -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+} -+ -+void -+ep_mapinit (EP_RMAP *mp, char *name, u_int mapsize) -+{ -+ spin_lock_init (&mp->m_lock); -+ kcondvar_init (&mp->m_wait); -+ -+ /* The final segment in the array has size 0 and acts as a delimiter -+ * we insure that we never use segments past the end of the array by -+ * maintaining a free segment count in m_free. When excess segments -+ * occur we discard some resources */ -+ -+ mp->m_size = mapsize; -+ mp->m_free = mapsize; -+ mp->m_name = name; -+ -+ bzero (mp->m_map, sizeof (EP_RMAP_ENTRY) * (mapsize+1)); -+} -+ -+EP_RMAP * -+ep_rmallocmap (size_t mapsize, char *name, int cansleep) -+{ -+ EP_RMAP *mp; -+ -+ KMEM_ZALLOC (mp, EP_RMAP *, sizeof (EP_RMAP) + mapsize*sizeof (EP_RMAP_ENTRY), cansleep); -+ -+ if (mp != NULL) -+ ep_mapinit (mp, name, mapsize); -+ -+ return (mp); -+} -+ -+void -+ep_rmfreemap (EP_RMAP *mp) -+{ -+ spin_lock_destroy (&mp->m_lock); -+ kcondvar_destroy (&mp->m_wait); -+ -+ KMEM_FREE (mp, sizeof (EP_RMAP) + mp->m_size * sizeof (EP_RMAP_ENTRY)); -+} -+ -+static u_long -+ep_rmalloc_locked (EP_RMAP *mp, size_t size) -+{ -+ EP_RMAP_ENTRY *bp; -+ u_long addr; -+ -+ ASSERT (size > 0); -+ ASSERT (SPINLOCK_HELD (&mp->m_lock)); -+ -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ { -+ if (bp->m_size >= size) -+ { -+ addr = bp->m_addr; -+ bp->m_addr += size; -+ -+ if ((bp->m_size -= size) == 0) -+ { -+ /* taken all of this slot - so shift the map down */ -+ do { -+ bp++; -+ (bp-1)->m_addr = bp->m_addr; -+ } while (((bp-1)->m_size = bp->m_size) != 0); -+ -+ mp->m_free++; -+ } -+ return (addr); -+ } -+ } -+ -+ return (0); -+} -+ -+u_long -+ep_rmalloc (EP_RMAP *mp, size_t size, int cansleep) -+{ -+ unsigned long addr; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ while ((addr = ep_rmalloc_locked (mp, size)) == 0 && cansleep) -+ { -+ mp->m_want = 1; -+ kcondvar_wait (&mp->m_wait, &mp->m_lock, &flags); -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ -+ return (addr); -+} -+ -+ -+ -+u_long -+ep_rmalloc_constrained (EP_RMAP *mp, size_t size, u_long alo, u_long ahi, u_long align, int cansleep) -+{ -+ EP_RMAP_ENTRY *bp, *bp2, *lbp; -+ unsigned long addr=0; -+ size_t delta; -+ int ok; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ again: -+ for (bp = &mp->m_map[0]; bp->m_size; bp++) -+ { -+ delta = 0; -+ -+ if (alo < bp->m_addr) -+ { -+ addr = bp->m_addr; -+ -+ if (addr & (align-1)) -+ addr = (addr + (align-1)) & ~(align-1); -+ -+ delta = addr - bp->m_addr; -+ -+ if (ahi >= bp->m_addr + bp->m_size) -+ ok = (bp->m_size >= (size + delta)); -+ else -+ ok = ((bp->m_addr + size + delta) <= ahi); -+ } -+ else -+ { -+ addr = alo; -+ if (addr & (align-1)) -+ addr = (addr + (align-1)) & ~(align-1); -+ delta = addr - bp->m_addr; -+ -+ if (ahi >= bp->m_addr + bp->m_size) -+ ok = ((alo + size + delta) <= (bp->m_addr + bp->m_size)); -+ else -+ ok = ((alo + size + delta) <= ahi); -+ } -+ -+ if (ok) -+ break; -+ } -+ -+ if (bp->m_size == 0) -+ { -+ if (cansleep) -+ { -+ mp->m_want = 1; -+ kcondvar_wait (&mp->m_wait, &mp->m_lock, &flags); -+ goto again; -+ } -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ return (0); -+ } -+ -+ /* found an approriate map entry - so take the bit out which we want */ -+ if (bp->m_addr == addr) -+ { -+ if (bp->m_size == size) -+ { -+ /* allocate entire segment and compress map */ -+ bp2 = bp; -+ while (bp2->m_size) -+ { -+ bp2++; -+ (bp2-1)->m_addr = bp2->m_addr; -+ (bp2-1)->m_size = bp2->m_size; -+ } -+ mp->m_free++; -+ } -+ else -+ { -+ /* take from start of segment */ -+ bp->m_addr += size; -+ bp->m_size -= size; -+ } -+ } -+ else -+ { -+ if (bp->m_addr + bp->m_size == addr + size) -+ { -+ /* take from end of segment */ -+ bp->m_size -= size; -+ } -+ else -+ { -+ /* split the segment loosing the last entry if there's no space */ -+ if (mp->m_free == 0) -+ { -+ /* find last map entry */ -+ for (lbp = bp; lbp->m_size != 0; lbp++) -+ ; -+ lbp--; -+ -+ if (lbp->m_size > (lbp-1)->m_size) -+ lbp--; -+ -+ printk ("%s: lost resource map entry [%lx, %lx]\n", -+ mp->m_name, lbp->m_addr, lbp->m_addr + lbp->m_size); -+ -+ *lbp = *(lbp+1); -+ (lbp+1)->m_size = 0; -+ -+ mp->m_free++; -+ } -+ -+ for (bp2 = bp; bp2->m_size != 0; bp2++) -+ continue; -+ -+ for (bp2--; bp2 > bp; bp2--) -+ { -+ (bp2+1)->m_addr = bp2->m_addr; -+ (bp2+1)->m_size = bp2->m_size; -+ } -+ -+ mp->m_free--; -+ -+ (bp+1)->m_addr = addr + size; -+ (bp+1)->m_size = bp->m_addr + bp->m_size - (addr + size); -+ bp->m_size = addr - bp->m_addr; -+ } -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+ return (addr); -+} -+ -+void -+ep_rmfree (EP_RMAP *mp, size_t size, u_long addr) -+{ -+ EP_RMAP_ENTRY *bp; -+ unsigned long t; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&mp->m_lock, flags); -+ -+ ASSERT (addr != 0 && size > 0); -+ -+again: -+ /* find the piece of the map which starts after the returned space -+ * or the end of the map */ -+ for (bp = &mp->m_map[0]; bp->m_addr <= addr && bp->m_size != 0; bp++) -+ ; -+ -+ /* bp points to the piece to the right of where we want to go */ -+ -+ if (bp > &mp->m_map[0] && (bp-1)->m_addr + (bp-1)->m_size >= addr) -+ { -+ /* merge with piece on the left */ -+ -+ ASSERT ((bp-1)->m_addr + (bp-1)->m_size <= addr); -+ -+ (bp-1)->m_size += size; -+ -+ ASSERT (bp->m_size == 0 || addr+size <= bp->m_addr); -+ -+ if (bp->m_size && (addr + size) == bp->m_addr) -+ { -+ /* merge witht he piece on the right by -+ * growing the piece on the left and shifting -+ * the map down */ -+ -+ ASSERT ((addr + size) <= bp->m_addr); -+ -+ (bp-1)->m_size += bp->m_size; -+ while (bp->m_size) -+ { -+ bp++; -+ (bp-1)->m_addr = bp->m_addr; -+ (bp-1)->m_size = bp->m_size; -+ } -+ -+ mp->m_free++; -+ } -+ } -+ else if (addr + size >= bp->m_addr && bp->m_size) -+ { -+ /* merge with piece to the right */ -+ -+ ASSERT ((addr + size) <= bp->m_addr); -+ -+ bp->m_addr -= size; -+ bp->m_size += size; -+ } -+ else -+ { -+ /* doesn't join with left or right - check for map -+ overflow and discard the smallest of the last or -+ next to last entries */ -+ -+ if (mp->m_free == 0) -+ { -+ EP_RMAP_ENTRY *lbp; -+ -+ /* find last map entry */ -+ for (lbp = bp; lbp->m_size != 0; lbp++) -+ ; -+ lbp--; -+ -+ if (lbp->m_size > (lbp-1)->m_size) -+ lbp--; -+ -+ printk ("%s: lost resource map entry [%lx, %lx]\n", -+ mp->m_name, lbp->m_addr, lbp->m_addr + lbp->m_size); -+ -+ *lbp = *(lbp+1); -+ (lbp+1)->m_size = 0; -+ -+ mp->m_free++; -+ goto again; -+ } -+ -+ /* make a new entry and push the remaining ones up */ -+ do { -+ t = bp->m_addr; -+ bp->m_addr = addr; -+ addr = t; -+ t = bp->m_size; -+ bp->m_size = size; -+ bp++; -+ } while ((size = t) != 0); -+ -+ mp->m_free--; -+ } -+ -+ /* if anyone blocked on rmalloc failure, wake 'em up */ -+ if (mp->m_want) -+ { -+ mp->m_want = 0; -+ kcondvar_wakeupall (&mp->m_wait, &mp->m_lock); -+ } -+ -+ spin_unlock_irqrestore (&mp->m_lock, flags); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/spinlock_elan3_thread.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/spinlock_elan3_thread.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/spinlock_elan3_thread.c 2005-07-28 14:52:52.903670376 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: spinlock_elan3_thread.c,v 1.9 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/spinlock_elan3_thread.c,v $ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+ -+void -+ep3_spinblock (EP3_SPINLOCK_ELAN *sle, EP3_SPINLOCK_MAIN *sl) -+{ -+ do { -+ sl->sl_seq = sle->sl_seq; /* Release my lock */ -+ -+ while (sle->sl_lock) /* Wait until the main */ -+ c_break(); /* releases the lock */ -+ -+ sle->sl_seq++; /* and try and relock */ -+ } while (sle->sl_lock); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/statemap.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/statemap.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/statemap.c 2005-07-28 14:52:52.903670376 -0400 -@@ -0,0 +1,385 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: statemap.c,v 1.11.8.1 2004/11/18 12:05:00 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statemap.c,v $ */ -+ -+#include -+#include -+ -+/******************************** global state bitmap stuff **********************************/ -+static int -+statemap_setmapbit (bitmap_t *map, int offset, int bit) -+{ -+ bitmap_t *e = &map[offset >> BT_ULSHIFT]; -+ bitmap_t mask = ((bitmap_t)1) << (offset & BT_ULMASK); -+ int rc = ((*e) & mask) != 0; -+ -+ if (bit) -+ { -+ *e |= mask; -+ return (!rc); -+ } -+ -+ *e &= ~mask; -+ return (rc); -+} -+ -+static int -+statemap_firstsegbit (bitmap_t seg) -+{ -+ int bit = 0; -+ -+ if (seg == 0) -+ return (-1); -+ -+#if (BT_ULSHIFT == 6) -+ if ((seg & 0xffffffffL) == 0) -+ { -+ seg >>= 32; -+ bit += 32; -+ } -+#elif (BT_ULSHIFT != 5) -+# error "Unexpected value of BT_ULSHIFT" -+#endif -+ -+ if ((seg & 0xffff) == 0) -+ { -+ seg >>= 16; -+ bit += 16; -+ } -+ -+ if ((seg & 0xff) == 0) -+ { -+ seg >>= 8; -+ bit += 8; -+ } -+ -+ if ((seg & 0xf) == 0) -+ { -+ seg >>= 4; -+ bit += 4; -+ } -+ -+ if ((seg & 0x3) == 0) -+ { -+ seg >>= 2; -+ bit += 2; -+ } -+ -+ return (((seg & 0x1) == 0) ? bit + 1 : bit); -+} -+ -+bitmap_t -+statemap_getseg (statemap_t *map, unsigned int offset) -+{ -+ ASSERT (offset < map->size); -+ ASSERT ((offset & BT_ULMASK) == 0); -+ -+ return (map->bitmap[offset >> BT_ULSHIFT]); -+} -+ -+void -+statemap_setseg (statemap_t *map, unsigned int offset, bitmap_t seg) -+{ -+ ASSERT (offset < map->size); -+ ASSERT ((offset & BT_ULMASK) == 0); -+ -+ offset >>= BT_ULSHIFT; -+ if (map->bitmap[offset] == seg) -+ return; -+ -+ map->bitmap[offset] = seg; -+ -+ if (statemap_setmapbit (map->changemap2, offset, 1) && -+ statemap_setmapbit (map->changemap1, offset >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, offset >>= BT_ULSHIFT, 1); -+} -+ -+bitmap_t -+statemap_getbits (statemap_t *map, unsigned int offset, int nbits) -+{ -+ int index = offset >> BT_ULSHIFT; -+ bitmap_t mask = (nbits == BT_NBIPUL) ? (bitmap_t) -1 : (((bitmap_t)1) << nbits) - 1; -+ -+ ASSERT (nbits <= BT_NBIPUL); -+ ASSERT (offset + nbits <= map->size); -+ -+ offset &= BT_ULMASK; -+ if (offset + nbits <= BT_NBIPUL) -+ return ((map->bitmap[index] >> offset) & mask); -+ -+ return (((map->bitmap[index] >> offset) | -+ (map->bitmap[index + 1] << (BT_NBIPUL - offset))) & mask); -+} -+ -+void -+statemap_setbits (statemap_t *map, unsigned int offset, bitmap_t bits, int nbits) -+{ -+ int index = offset >> BT_ULSHIFT; -+ bitmap_t mask; -+ bitmap_t seg; -+ bitmap_t newseg; -+ -+ ASSERT (nbits <= BT_NBIPUL); -+ ASSERT (offset + nbits <= map->size); -+ -+ offset &= BT_ULMASK; -+ if (offset + nbits <= BT_NBIPUL) -+ { -+ mask = ((nbits == BT_NBIPUL) ? -1 : ((((bitmap_t)1) << nbits) - 1)) << offset; -+ seg = map->bitmap[index]; -+ newseg = ((bits << offset) & mask) | (seg & ~mask); -+ -+ if (seg == newseg) -+ return; -+ -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >>= BT_ULSHIFT, 1); -+ return; -+ } -+ -+ mask = ((bitmap_t)-1) << offset; -+ seg = map->bitmap[index]; -+ newseg = ((bits << offset) & mask) | (seg & ~mask); -+ -+ if (seg != newseg) -+ { -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >> BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >> (2 * BT_ULSHIFT), 1); -+ } -+ -+ index++; -+ offset = BT_NBIPUL - offset; -+ mask = (((bitmap_t)1) << (nbits - offset)) - 1; -+ seg = map->bitmap[index]; -+ newseg = ((bits >> offset) & mask) | (seg & ~mask); -+ -+ if (seg == newseg) -+ return; -+ -+ map->bitmap[index] = newseg; -+ -+ if (statemap_setmapbit (map->changemap2, index, 1) && -+ statemap_setmapbit (map->changemap1, index >>= BT_ULSHIFT, 1)) -+ statemap_setmapbit (map->changemap0, index >>= BT_ULSHIFT, 1); -+} -+ -+void -+statemap_zero (statemap_t *dst) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, offset += BT_NBIPUL) -+ { -+ *dstmap = 0; -+ *changemap2 |= bit2; -+ } -+ *changemap1 |= bit1; -+ } -+ *changemap0 |= bit0; -+ } -+} -+ -+void -+statemap_setmap (statemap_t *dst, statemap_t *src) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t *srcmap = src->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ -+ ASSERT (src->size == size); -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, srcmap++, offset += BT_NBIPUL) -+ if (*dstmap != *srcmap) -+ { -+ *dstmap = *srcmap; -+ *changemap2 |= bit2; -+ } -+ if (*changemap2 != 0) -+ *changemap1 |= bit1; -+ } -+ if (*changemap1 != 0) -+ *changemap0 |= bit0; -+ } -+} -+ -+void -+statemap_ormap (statemap_t *dst, statemap_t *src) -+{ -+ int size = dst->size; -+ int offset = 0; -+ bitmap_t *changemap0 = dst->changemap0; -+ bitmap_t *changemap1 = dst->changemap1; -+ bitmap_t *changemap2 = dst->changemap2; -+ bitmap_t *dstmap = dst->bitmap; -+ bitmap_t *srcmap = src->bitmap; -+ bitmap_t bit0; -+ bitmap_t bit1; -+ bitmap_t bit2; -+ bitmap_t seg; -+ -+ ASSERT (src->size == size); -+ -+ for (bit0 = 1; offset < size; bit0 <<= 1, changemap1++) -+ { -+ for (bit1 = 1; bit1 != 0 && offset < size; bit1 <<= 1, changemap2++) -+ { -+ for (bit2 = 1; bit2 != 0 && offset < size; bit2 <<= 1, dstmap++, srcmap++, offset += BT_NBIPUL) -+ { -+ seg = *dstmap | *srcmap; -+ if (*dstmap != seg) -+ { -+ *dstmap = seg; -+ *changemap2 |= bit2; -+ } -+ } -+ if (*changemap2 != 0) -+ *changemap1 |= bit1; -+ } -+ if (*changemap1 != 0) -+ *changemap0 |= bit0; -+ } -+} -+ -+int -+statemap_findchange (statemap_t *map, bitmap_t *newseg, int clearchange) -+{ -+ int bit0; -+ bitmap_t *cm1; -+ int bit1; -+ bitmap_t *cm2; -+ int bit2; -+ unsigned int offset; -+ -+ bit0 = statemap_firstsegbit (*(map->changemap0)); -+ if (bit0 < 0) -+ return (-1); -+ -+ offset = bit0; -+ cm1 = map->changemap1 + offset; -+ bit1 = statemap_firstsegbit (*cm1); -+ ASSERT (bit1 >= 0); -+ -+ offset = (offset << BT_ULSHIFT) + bit1; -+ cm2 = map->changemap2 + offset; -+ bit2 = statemap_firstsegbit (*cm2); -+ ASSERT (bit2 >= 0); -+ -+ offset = (offset << BT_ULSHIFT) + bit2; -+ *newseg = map->bitmap[offset]; -+ -+ if (clearchange && -+ (*cm2 &= ~(((bitmap_t)1) << bit2)) == 0 && -+ (*cm1 &= ~(((bitmap_t)1) << bit1)) == 0) -+ map->changemap0[0] &= ~(((bitmap_t)1) << bit0); -+ -+ return (offset << BT_ULSHIFT); -+} -+ -+int -+statemap_changed (statemap_t *map) -+{ -+ return ((*(map->changemap0) != 0)); -+} -+ -+void -+statemap_reset (statemap_t *map) -+{ -+ bzero (map->changemap0, map->changemap_nob + map->bitmap_nob); -+} -+ -+void -+statemap_copy (statemap_t *dst, statemap_t *src) -+{ -+ ASSERT (dst->size == src->size); -+ bcopy (src->changemap0, dst->changemap0, src->changemap_nob + src->bitmap_nob); -+} -+ -+void -+statemap_clearchanges (statemap_t *map) -+{ -+ if (statemap_changed (map)) -+ bzero (map->changemap0, map->changemap_nob); -+} -+ -+bitmap_t * -+statemap_tobitmap (statemap_t *map) -+{ -+ return (map->bitmap); -+} -+ -+statemap_t * -+statemap_create (int size) -+{ -+ int struct_entries = (sizeof (statemap_t) * 8 + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int bitmap_entries = (size + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap2_entries = (bitmap_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap1_entries = (changemap2_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap0_entries = (changemap1_entries + (BT_NBIPUL-1)) >> BT_ULSHIFT; -+ int changemap_entries = changemap0_entries + changemap1_entries + changemap2_entries; -+ int nob = (struct_entries + bitmap_entries + changemap_entries) * sizeof (bitmap_t); -+ statemap_t *map; -+ -+ ASSERT ((1 << BT_ULSHIFT) == BT_NBIPUL); -+ ASSERT (changemap0_entries == 1); -+ -+ KMEM_ZALLOC (map, statemap_t *, nob, 1); -+ -+ map->size = size; -+ map->nob = nob; -+ map->changemap_nob = changemap_entries * sizeof (bitmap_t); -+ map->bitmap_nob = bitmap_entries * sizeof (bitmap_t); -+ map->changemap0 = ((bitmap_t *)map) + struct_entries; -+ map->changemap1 = map->changemap0 + changemap0_entries; -+ map->changemap2 = map->changemap1 + changemap1_entries; -+ map->bitmap = map->changemap2 + changemap2_entries; -+ -+ return (map); -+} -+ -+void -+statemap_destroy (statemap_t *map) -+{ -+ KMEM_FREE (map, map->nob); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/statusmon.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/statusmon.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/statusmon.h 2005-07-28 14:52:52.904670224 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: statusmon.h,v 1.6 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statusmon.h,v $*/ -+ -+#ifndef __ELAN3_STATUSMON_H -+#define __ELAN3_STATUSMON_H -+ -+typedef struct statusmon_node -+{ -+ u_int NodeId; -+ u_int State; -+} STATUSMON_SGMT; -+ -+typedef struct statusmon_level -+{ -+ unsigned Width; -+ STATUSMON_SGMT Nodes[CM_SGMTS_PER_LEVEL]; -+} STATUSMON_LEVEL; -+ -+typedef struct statusmon_msg -+{ -+ unsigned Type; -+ unsigned NodeId; -+ unsigned NumLevels; -+ unsigned TopLevel; -+ unsigned Role; -+ STATUSMON_LEVEL Levels[CM_MAX_LEVELS]; -+} STATUSMON_MSG; -+ -+ -+#endif /* __ELAN3_STATUSMON_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/support.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/support.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/support.c 2005-07-28 14:52:52.904670224 -0400 -@@ -0,0 +1,109 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support.c,v 1.37.8.1 2004/09/30 15:01:53 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/support.c,v $ */ -+ -+#include -+#include -+ -+/****************************************************************************************/ -+/* -+ * Nodeset/flush callbacks. -+ */ -+int -+ep_register_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg) -+{ -+ EP_CALLBACK *cb; -+ -+ KMEM_ALLOC (cb, EP_CALLBACK *, sizeof (EP_CALLBACK), 1); -+ -+ cb->Routine = routine; -+ cb->Arg = arg; -+ -+ kmutex_lock (&rail->CallbackLock); -+ cb->Next = rail->CallbackList[idx]; -+ rail->CallbackList[idx] = cb; -+ kmutex_unlock (&rail->CallbackLock); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_remove_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg) -+{ -+ EP_CALLBACK *cb; -+ EP_CALLBACK **predp; -+ -+ kmutex_lock (&rail->CallbackLock); -+ for (predp = &rail->CallbackList[idx]; (cb = *predp); predp = &cb->Next) -+ if (cb->Routine == routine && cb->Arg == arg) -+ break; -+ -+ if (cb == NULL) -+ panic ("ep_remove_member_callback"); -+ -+ *predp = cb->Next; -+ kmutex_unlock (&rail->CallbackLock); -+ -+ KMEM_FREE (cb, sizeof (EP_CALLBACK)); -+} -+ -+void -+ep_call_callbacks (EP_RAIL *rail, unsigned idx, statemap_t *map) -+{ -+ EP_CALLBACK *cb; -+ -+ kmutex_lock (&rail->CallbackLock); -+ -+ rail->CallbackStep = idx; -+ -+ for (cb = rail->CallbackList[idx]; cb; cb = cb->Next) { -+ (cb->Routine) (cb->Arg, map); -+ } -+ kmutex_unlock (&rail->CallbackLock); -+} -+ -+unsigned int -+ep_backoff (EP_BACKOFF *backoff, int type) -+{ -+ static int bcount[EP_NUM_BACKOFF] = {1, 16, 32, 64, 128, 256, 512, 1024}; -+ -+ if (backoff->type != type) -+ { -+ backoff->type = type; -+ backoff->indx = 0; -+ backoff->count = 0; -+ } -+ -+ if (++backoff->count > bcount[backoff->indx] && backoff->indx < (EP_NUM_BACKOFF-1)) -+ { -+ backoff->indx++; -+ backoff->count = 0; -+ } -+ -+ return (backoff->indx); -+} -+ -+/* Generic checksum algorithm */ -+uint16_t -+CheckSum (char *msg, int nob) -+{ -+ uint16_t sum = 0; -+ -+ while (nob-- > 0) -+ sum = sum * 13 + *msg++; -+ -+ return (sum); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/support_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/support_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/support_elan3.c 2005-07-28 14:52:52.908669616 -0400 -@@ -0,0 +1,2111 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support_elan3.c,v 1.42.8.3 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/support_elan3.c,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan3.h" -+#include "epcomms_elan3.h" -+#include "debug.h" -+ -+#include -+#include -+ -+/****************************************************************************************/ -+#define DMA_RING_NEXT_POS(ring) ((ring)->Position+1 == ring->Entries ? 0 : ((ring)->Position+1)) -+#define DMA_RING_PREV_POS(ring,pos) ((pos) == 0 ? (ring)->Entries-1 : (pos) - 1) -+ -+static int -+DmaRingCreate (EP3_RAIL *rail, EP3_DMA_RING *ring, int ctxnum, int entries) -+{ -+ unsigned long pgnum = (ctxnum * sizeof (E3_CommandPort)) / PAGE_SIZE; -+ unsigned long pgoff = (ctxnum * sizeof (E3_CommandPort)) & (PAGE_SIZE-1); -+ int s; -+ -+ /* set up the initial position */ -+ ring->Entries = entries; -+ ring->Position = 0; -+ -+ if (! (ring->pEvent = ep_alloc_elan (&rail->Generic, entries * sizeof (E3_BlockCopyEvent), 0, &ring->epEvent))) -+ { -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (! (ring->pDma = ep_alloc_elan (&rail->Generic, entries * sizeof (E3_DMA), 0, &ring->epDma))) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (! (ring->pDoneBlk = ep_alloc_main (&rail->Generic, entries * sizeof (E3_uint32), 0, &ring->epDoneBlk))) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, entries * sizeof (E3_DMA)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ -+ if (MapDeviceRegister (rail->Device, ELAN3_BAR_COMMAND_PORT, &ring->CommandPage, pgnum * PAGE_SIZE, PAGE_SIZE, &ring->CommandPageHandle) != ESUCCESS) -+ { -+ ep_free_elan (&rail->Generic, ring->epEvent, entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, entries * sizeof (E3_DMA)); -+ ep_free_main (&rail->Generic, ring->epDoneBlk, entries * sizeof (E3_uint32)); -+ -+ ring->CommandPort = (ioaddr_t) NULL; -+ return (ENOMEM); -+ } -+ ring->CommandPort = ring->CommandPage + pgoff; -+ -+ for (s = 0; s < entries; s++) -+ { -+ /* setup the event */ -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Type), -+ EV_TYPE_BCOPY | EV_TYPE_DMA | DMA_RING_DMA_ELAN(ring, s)); -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Source), DMA_RING_DMA_ELAN(ring,s) | EV_WCOPY); -+ elan3_sdram_writel(rail->Device, DMA_RING_EVENT(ring,s) + offsetof(E3_BlockCopyEvent,ev_Dest), DMA_RING_DONE_ELAN(ring,s) | EV_TYPE_BCOPY_WORD ); -+ -+ /* need to set all the doneBlks to appear that they have completed */ -+ ring->pDoneBlk[s] = DMA_RING_DMA_ELAN(ring,s) | EV_WCOPY; -+ } -+ -+ return 0; /* success */ -+} -+ -+static void -+DmaRingRelease(EP3_RAIL *rail, EP3_DMA_RING *ring) -+{ -+ if (ring->CommandPage != (ioaddr_t) 0) -+ { -+ UnmapDeviceRegister(rail->Device, &ring->CommandPageHandle); -+ -+ ep_free_elan (&rail->Generic, ring->epEvent, ring->Entries * sizeof (E3_BlockCopyEvent)); -+ ep_free_elan (&rail->Generic, ring->epDma, ring->Entries * sizeof (E3_DMA)); -+ ep_free_main (&rail->Generic, ring->epDoneBlk, ring->Entries * sizeof (E3_uint32)); -+ } -+ ring->CommandPage = (ioaddr_t) 0; -+} -+ -+void -+DmaRingsRelease (EP3_RAIL *rail) -+{ -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_CRITICAL]); -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_HIGH_PRI]); -+ DmaRingRelease (rail, &rail->DmaRings[EP3_RING_LOW_PRI]); -+} -+ -+int -+DmaRingsCreate (EP3_RAIL *rail) -+{ -+ if (DmaRingCreate (rail, &rail->DmaRings[EP3_RING_CRITICAL], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_CRITICAL, EP3_RING_CRITICAL_LEN) || -+ DmaRingCreate (rail, &rail->DmaRings[EP3_RING_HIGH_PRI], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_HIGH_PRI, EP3_RING_HIGH_PRI_LEN) || -+ DmaRingCreate (rail, &rail->DmaRings[EP3_RING_LOW_PRI], ELAN3_DMARING_BASE_CONTEXT_NUM + EP3_RING_LOW_PRI, EP3_RING_LOW_PRI_LEN)) -+ { -+ DmaRingsRelease (rail); -+ return (ENOMEM); -+ } -+ -+ return 0; -+} -+ -+static int -+DmaRingNextSlot (EP3_DMA_RING *ring) -+{ -+ int pos = ring->Position; -+ int npos = DMA_RING_NEXT_POS(ring); -+ -+ if (ring->pDoneBlk[npos] == EP3_EVENT_ACTIVE) -+ return (-1); -+ -+ ring->pDoneBlk[pos] = EP3_EVENT_ACTIVE; -+ -+ ring->Position = npos; /* move on one */ -+ -+ return (pos); -+} -+ -+ -+/****************************************************************************************/ -+/* -+ * Dma/event command issueing - these handle cproc queue overflow traps. -+ */ -+static int -+DmaRunQueueSizeCheck (EP3_RAIL *rail, E3_uint32 len) -+{ -+ E3_uint64 FandBPtr = read_reg64 (rail->Device, DProc_SysCntx_FPtr); -+ E3_uint32 FPtr, BPtr; -+ E3_uint32 qlen; -+ -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ FPtr = (FandBPtr & 0xFFFFFFFFull); -+ BPtr = (FandBPtr >> 32); -+#else -+ FPtr = (FandBPtr >> 32); -+ BPtr = (FandBPtr & 0xFFFFFFFFull); -+#endif -+ -+ qlen = (((BPtr - FPtr)/sizeof (E3_DMA)) & (E3_SysCntxQueueSize-1)); -+ -+ if (qlen < 4) IncrStat (rail, DmaQueueLength[0]); -+ else if (qlen < 8) IncrStat (rail, DmaQueueLength[1]); -+ else if (qlen < 16) IncrStat (rail, DmaQueueLength[2]); -+ else if (qlen < 32) IncrStat (rail, DmaQueueLength[3]); -+ else if (qlen < 64) IncrStat (rail, DmaQueueLength[4]); -+ else if (qlen < 128) IncrStat (rail, DmaQueueLength[5]); -+ else if (qlen < 240) IncrStat (rail, DmaQueueLength[6]); -+ else IncrStat (rail, DmaQueueLength[7]); -+ -+ return (qlen < len); -+} -+ -+int -+IssueDma (EP3_RAIL *rail, E3_DMA_BE * dmabe, int type, int retryThread) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ EP3_RETRY_DMA *retry; -+ EP3_DMA_RING *ring; -+ int slot; -+ int i, res; -+ unsigned long flags; -+ -+ ASSERT (dmabe->s.dma_direction == DMA_WRITE || dmabe->s.dma_direction == DMA_READ_REQUEUE); -+ -+ ASSERT (! EP_VP_ISDATA(dmabe->s.dma_destVProc) || -+ (dmabe->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dmabe->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dmabe->s.dma_destVProc) == rail->Generic.Position.pos_nodeid)); -+ -+ /* -+ * If we're not the retry thread - then don't issue this DMA -+ * if there are any already queued on the retry lists with -+ * higher or equal priority than this one that are ready to -+ * retry. -+ */ -+ if (! retryThread) -+ { -+ for (i = EP_RETRY_BASE; i < type; i++) -+ { -+ if (list_empty (&rail->DmaRetries[i])) -+ continue; -+ -+ retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ if (AFTER (lbolt, retry->RetryTime)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ } -+ } -+ -+ /* -+ * Depending on the type of DMA we're issuing - throttle back -+ * issueing of it if the DMA run queue is too full. This then -+ * prioritises the "special" messages and completing data -+ * transfers which have matched a receive buffer. -+ */ -+ -+ if (type >= EP_RETRY_LOW_PRI_RETRY) -+ { -+ if (! DmaRunQueueSizeCheck (rail, E3_SysCntxQueueSize / 2)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ ring = &rail->DmaRings[EP3_RING_LOW_PRI]; -+ } -+ else if (type == EP_RETRY_LOW_PRI) -+ { -+ if (! DmaRunQueueSizeCheck (rail, E3_SysCntxQueueSize / 3)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ return (ISSUE_COMMAND_RETRY); -+ } -+ ring = &rail->DmaRings[EP3_RING_LOW_PRI]; -+ } -+ else if (type >= EP_RETRY_HIGH_PRI) -+ ring = &rail->DmaRings[EP3_RING_HIGH_PRI]; -+ else -+ ring = &rail->DmaRings[EP3_RING_CRITICAL]; -+ -+ local_irq_save (flags); -+ if (! spin_trylock (&dev->CProcLock)) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ if ((slot = DmaRingNextSlot (ring)) == -1) -+ { -+ IncrStat (rail, IssueDmaFail[type]); -+ -+ res = ISSUE_COMMAND_RETRY; -+ } -+ else -+ { -+ EPRINTF4 (DBG_COMMAND, "IssueDma: type %08x size %08x Elan source %08x Elan dest %08x\n", -+ dmabe->s.dma_type, dmabe->s.dma_size, dmabe->s.dma_source, dmabe->s.dma_dest); -+ EPRINTF2 (DBG_COMMAND, " dst event %08x cookie/proc %08x\n", -+ dmabe->s.dma_destEvent, dmabe->s.dma_destCookieVProc); -+ EPRINTF2 (DBG_COMMAND, " src event %08x cookie/proc %08x\n", -+ dmabe->s.dma_srcEvent, dmabe->s.dma_srcCookieVProc); -+ -+ elan3_sdram_copyq_to_sdram (dev, dmabe, DMA_RING_DMA(ring, slot), sizeof (E3_DMA)); /* PCI write block */ -+ elan3_sdram_writel (dev, DMA_RING_EVENT(ring, slot) + offsetof (E3_BlockCopyEvent, ev_Count), 1); /* PCI write */ -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (DMA_RING_EVENT_ELAN(ring,slot), ring->CommandPort + offsetof (E3_CommandPort, SetEvent)); -+ mmiob(); /* and flush through IO writes */ -+ -+ res = ISSUE_COMMAND_OK; -+ } -+ spin_unlock (&dev->CProcLock); -+ } -+ local_irq_restore (flags); -+ -+ return (res); -+} -+ -+int -+IssueWaitevent (EP3_RAIL *rail, E3_Addr value) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ int res; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ ASSERT (rail->CommandPortEventTrap == FALSE); -+ -+ /* -+ * Disable the command processor interrupts, so that we don't see -+ * spurious interrupts appearing. -+ */ -+ DISABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, rail->CommandPort + offsetof (E3_CommandPort, WaitEvent0)); -+ mmiob(); /* and flush through IO writes */ -+ -+ do { -+ res = CheckCommandQueueFlushed (rail->Ctxt, EventComQueueNotEmpty, ISSUE_COMMAND_CANT_WAIT, &flags); -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: CheckCommandQueueFlushed -> %d\n", res); -+ -+ if (res == ISSUE_COMMAND_WAIT) -+ HandleCProcTrap (dev, 0, NULL); -+ } while (res != ISSUE_COMMAND_OK); -+ -+ if (! rail->CommandPortEventTrap) -+ res = ISSUE_COMMAND_OK; -+ else -+ { -+ rail->CommandPortEventTrap = FALSE; -+ res = ISSUE_COMMAND_TRAPPED; -+ } -+ -+ EPRINTF1 (DBG_COMMAND, "IssueWaitevent: -> %d\n", res); -+ -+ /* -+ * Re-enable the command processor interrupt as we've finished -+ * polling it. -+ */ -+ ENABLE_INT_MASK (dev, INT_CProc | INT_ComQueue); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ -+ return (res); -+} -+ -+void -+IssueSetevent (EP3_RAIL *rail, E3_Addr value) -+{ -+ EPRINTF1 (DBG_COMMAND, "IssueSetevent: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, rail->CommandPort + offsetof (E3_CommandPort, SetEvent)); -+ mmiob(); /* and flush through IO writes */ -+} -+ -+void -+IssueRunThread (EP3_RAIL *rail, E3_Addr value) -+{ -+ EPRINTF1 (DBG_COMMAND, "IssueRunThread: %08x\n", value); -+ -+ mb(); /* ensure writes to main memory completed */ -+ writel (value, rail->CommandPort + offsetof (E3_CommandPort, RunThread)); -+ mmiob(); /* and flush through IO writes */ -+} -+ -+/****************************************************************************************/ -+/* -+ * DMA retry list management -+ */ -+static unsigned DmaRetryTimes[EP_NUM_RETRIES]; -+ -+static void -+ep3_dma_retry (EP3_RAIL *rail) -+{ -+ EP3_COOKIE *cp; -+ int res; -+ int vp; -+ unsigned long flags; -+ int i; -+ -+ kernel_thread_init("ep3_dma_retry"); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ for (;;) -+ { -+ long yieldAt = lbolt + (hz/10); -+ long retryTime = 0; -+ -+ if (rail->DmaRetryThreadShouldStop) -+ break; -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ while (! list_empty (&rail->DmaRetries[i])) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ if (! AFTER (lbolt, retry->RetryTime)) -+ break; -+ -+ if (rail->DmaRetryThreadShouldStall || AFTER (lbolt, yieldAt)) -+ goto cant_do_more; -+ -+ EPRINTF2 (DBG_RETRY, "%s: DmaRetryThread: retry %p\n", rail->Generic.Name, retry); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", -+ rail->Generic.Name, retry->Dma.s.dma_type, retry->Dma.s.dma_size, retry->Dma.s.dma_source, retry->Dma.s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", -+ rail->Generic.Name, retry->Dma.s.dma_destEvent, retry->Dma.s.dma_destCookieVProc, -+ retry->Dma.s.dma_srcEvent, retry->Dma.s.dma_srcCookieVProc); -+#if defined(DEBUG) -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, retry->Dma.s.dma_srcEvent); -+ else -+ cp = LookupEventCookie (rail, &rail->CookieTable, retry->Dma.s.dma_destEvent); -+ -+ ASSERT (cp != NULL || (retry->Dma.s.dma_srcEvent == 0 && retry->Dma.s.dma_direction == DMA_WRITE && retry->Dma.s.dma_isRemote)); -+ -+ if (cp && cp->Operations->DmaVerify) -+ cp->Operations->DmaVerify (rail, cp->Arg, &retry->Dma); -+#endif -+ -+#if defined(DEBUG_ASSERT) -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ vp = retry->Dma.s.dma_destVProc; -+ else -+ vp = retry->Dma.s.dma_srcVProc; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || -+ (rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State >= EP_NODE_CONNECTED && -+ rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State <= EP_NODE_LOCAL_PASSIVATE)); -+#endif -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ res = IssueDma (rail, &(retry->Dma), i, TRUE); -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ if (res != ISSUE_COMMAND_OK) -+ goto cant_do_more; -+ -+ /* Command issued, so remove from list, and add to free list */ -+ list_del (&retry->Link); -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ } -+ } -+ cant_do_more: -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ if (!list_empty (&rail->DmaRetries[i])) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetries[i].next, EP3_RETRY_DMA, Link); -+ -+ retryTime = retryTime ? MIN(retryTime, retry->RetryTime) : retry->RetryTime; -+ } -+ } -+ -+ if (retryTime && !AFTER (retryTime, lbolt)) -+ retryTime = lbolt + 1; -+ -+ do { -+ EPRINTF3 (DBG_RETRY, "%s: ep_cm_retry: %s %lx\n", rail->Generic.Name, rail->DmaRetryThreadShouldStall ? "stalled" : "sleeping", retryTime); -+ -+ if (rail->DmaRetryTime == 0 || (retryTime != 0 && retryTime < rail->DmaRetryTime)) -+ rail->DmaRetryTime = retryTime; -+ -+ rail->DmaRetrySleeping = TRUE; -+ -+ if (rail->DmaRetryThreadShouldStall) /* wakeup threads waiting in StallDmaRetryThread */ -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); /* for us to really go to sleep for good. */ -+ -+ if (rail->DmaRetryTime == 0 || rail->DmaRetryThreadShouldStall) -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ else -+ kcondvar_timedwait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags, rail->DmaRetryTime); -+ -+ rail->DmaRetrySleeping = FALSE; -+ -+ } while (rail->DmaRetryThreadShouldStall); -+ -+ rail->DmaRetryTime = 0; -+ } -+ -+ rail->DmaRetryThreadStopped = 1; -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ kernel_thread_exit(); -+} -+ -+void -+StallDmaRetryThread (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryThreadShouldStall++; -+ -+ while (! rail->DmaRetrySleeping) -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+ResumeDmaRetryThread (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ ASSERT (rail->DmaRetrySleeping); -+ -+ if (--rail->DmaRetryThreadShouldStall == 0) -+ { -+ rail->DmaRetrySleeping = 0; -+ kcondvar_wakeupone (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+int -+InitialiseDmaRetries (EP3_RAIL *rail) -+{ -+ int i; -+ -+ spin_lock_init (&rail->DmaRetryLock); -+ kcondvar_init (&rail->DmaRetryWait); -+ -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ INIT_LIST_HEAD (&rail->DmaRetries[i]); -+ -+ INIT_LIST_HEAD (&rail->DmaRetryFreeList); -+ -+ DmaRetryTimes[EP_RETRY_HIGH_PRI] = EP_RETRY_HIGH_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ DmaRetryTimes[EP_RETRY_HIGH_PRI_RETRY+i] = EP_RETRY_HIGH_PRI_TIME << i; -+ -+ DmaRetryTimes[EP_RETRY_LOW_PRI] = EP_RETRY_LOW_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ DmaRetryTimes[EP_RETRY_LOW_PRI_RETRY+i] = EP_RETRY_LOW_PRI_TIME << i; -+ -+ DmaRetryTimes[EP_RETRY_ANONYMOUS] = EP_RETRY_ANONYMOUS_TIME; -+ DmaRetryTimes[EP_RETRY_NETERR] = EP_RETRY_NETERR_TIME; -+ -+ rail->DmaRetryInitialised = 1; -+ -+ if (kernel_thread_create (ep3_dma_retry, (void *) rail) == 0) -+ { -+ spin_lock_destroy (&rail->DmaRetryLock); -+ return (ENOMEM); -+ } -+ -+ rail->DmaRetryThreadStarted = 1; -+ -+ return (ESUCCESS); -+} -+ -+void -+DestroyDmaRetries (EP3_RAIL *rail) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryThreadShouldStop = 1; -+ while (rail->DmaRetryThreadStarted && !rail->DmaRetryThreadStopped) -+ { -+ kcondvar_wakeupall (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ kcondvar_wait (&rail->DmaRetryWait, &rail->DmaRetryLock, &flags); -+ } -+ rail->DmaRetryThreadStarted = 0; -+ rail->DmaRetryThreadStopped = 0; -+ rail->DmaRetryThreadShouldStop = 0; -+ rail->DmaRetryInitialised = 0; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ /* Everyone should have given back their retry dma's by now */ -+ ASSERT (rail->DmaRetryReserved == 0); -+ -+ while (! list_empty (&rail->DmaRetryFreeList)) -+ { -+ EP3_RETRY_DMA *retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ KMEM_FREE (retry, sizeof (EP3_RETRY_DMA)); -+ } -+ -+ kcondvar_destroy (&rail->DmaRetryWait); -+ spin_lock_destroy (&rail->DmaRetryLock); -+} -+ -+int -+ReserveDmaRetries (EP3_RAIL *rail, int count, EP_ATTRIBUTE attr) -+{ -+ EP3_RETRY_DMA *retry; -+ int remaining = count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ if (remaining <= (rail->DmaRetryCount - rail->DmaRetryReserved)) -+ { -+ rail->DmaRetryReserved += remaining; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ return (ESUCCESS); -+ } -+ -+ remaining -= (rail->DmaRetryCount - rail->DmaRetryReserved); -+ -+ rail->DmaRetryReserved = rail->DmaRetryCount; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ while (remaining) -+ { -+ KMEM_ALLOC (retry, EP3_RETRY_DMA *, sizeof (EP3_RETRY_DMA), !(attr & EP_NO_SLEEP)); -+ -+ if (retry == NULL) -+ goto failed; -+ -+ /* clear E3_DMA */ -+ bzero((char *)(&(retry->Dma.s)), sizeof(E3_DMA)); -+ -+ remaining--; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ list_add (&retry->Link, &rail->DmaRetryFreeList); -+ -+ rail->DmaRetryCount++; -+ rail->DmaRetryReserved++; -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ } -+ return (ESUCCESS); -+ -+ failed: -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryReserved -= (count - remaining); -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ return (ENOMEM); -+} -+ -+void -+ReleaseDmaRetries (EP3_RAIL *rail, int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ rail->DmaRetryReserved -= count; -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+QueueDmaForRetry (EP3_RAIL *rail, E3_DMA_BE *dma, int interval) -+{ -+ EP3_RETRY_DMA *retry; -+ unsigned long flags; -+ -+ /* -+ * When requeueing DMAs they must never be "READ" dma's since -+ * these would fetch the DMA descriptor from the retryn descriptor -+ */ -+ ASSERT (dma->s.dma_direction == DMA_WRITE || dma->s.dma_direction == DMA_READ_REQUEUE); -+ ASSERT (dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dma->s.dma_destVProc) == rail->Generic.Position.pos_nodeid); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ EP_ASSERT (&rail->Generic, !list_empty (&rail->DmaRetryFreeList)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: QueueDmaForRetry: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n",rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, -+ dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ /* copy the DMA into the retry descriptor */ -+ retry->Dma.s.dma_type = dma->s.dma_type; -+ retry->Dma.s.dma_size = dma->s.dma_size; -+ retry->Dma.s.dma_source = dma->s.dma_source; -+ retry->Dma.s.dma_dest = dma->s.dma_dest; -+ retry->Dma.s.dma_destEvent = dma->s.dma_destEvent; -+ retry->Dma.s.dma_destCookieVProc = dma->s.dma_destCookieVProc; -+ retry->Dma.s.dma_srcEvent = dma->s.dma_srcEvent; -+ retry->Dma.s.dma_srcCookieVProc = dma->s.dma_srcCookieVProc; -+ -+ retry->RetryTime = lbolt + DmaRetryTimes[interval]; -+ -+ /* chain onto the end of the approriate retry list */ -+ list_add_tail (&retry->Link, &rail->DmaRetries[interval]); -+ -+ /* now wakeup the retry thread */ -+ if (rail->DmaRetryTime == 0 || retry->RetryTime < rail->DmaRetryTime) -+ rail->DmaRetryTime = retry->RetryTime; -+ -+ if (rail->DmaRetrySleeping && !rail->DmaRetryThreadShouldStall) -+ { -+ rail->DmaRetrySleeping = 0; -+ kcondvar_wakeupone (&rail->DmaRetryWait, &rail->DmaRetryLock); -+ } -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+QueueDmaOnStalledList (EP3_RAIL *rail, E3_DMA_BE *dma) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) : -+ EP_VP_TO_NODE(dma->s.dma_destVProc)]; -+ EP3_RETRY_DMA *retry; -+ unsigned long flags; -+ -+ /* -+ * When requeueing DMAs they must never be "READ" dma's since -+ * these would fetch the DMA descriptor from the retryn descriptor -+ */ -+ ASSERT (dma->s.dma_direction == DMA_WRITE || dma->s.dma_direction == DMA_READ_REQUEUE); -+ ASSERT (dma->s.dma_direction == DMA_WRITE ? -+ EP_VP_TO_NODE(dma->s.dma_srcVProc) == rail->Generic.Position.pos_nodeid : -+ EP_VP_TO_NODE(dma->s.dma_destVProc) == rail->Generic.Position.pos_nodeid); -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ -+ EP_ASSERT (&rail->Generic, !list_empty (&rail->DmaRetryFreeList)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->DmaRetryFreeList.next, EP3_RETRY_DMA, Link); -+ -+ list_del (&retry->Link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: QueueDmaOnStalledList: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_type, dma->s.dma_size, dma->s.dma_source, dma->s.dma_dest); -+ EPRINTF5 (DBG_RETRY, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma->s.dma_destEvent, dma->s.dma_destCookieVProc, -+ dma->s.dma_srcEvent, dma->s.dma_srcCookieVProc); -+ -+ /* copy the DMA into the retry descriptor */ -+ retry->Dma.s.dma_type = dma->s.dma_type; -+ retry->Dma.s.dma_size = dma->s.dma_size; -+ retry->Dma.s.dma_source = dma->s.dma_source; -+ retry->Dma.s.dma_dest = dma->s.dma_dest; -+ retry->Dma.s.dma_destEvent = dma->s.dma_destEvent; -+ retry->Dma.s.dma_destCookieVProc = dma->s.dma_destCookieVProc; -+ retry->Dma.s.dma_srcEvent = dma->s.dma_srcEvent; -+ retry->Dma.s.dma_srcCookieVProc = dma->s.dma_srcCookieVProc; -+ -+ /* chain onto the node cancelled dma list */ -+ list_add_tail (&retry->Link, &nodeRail->StalledDmas); -+ -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+void -+FreeStalledDmas (EP3_RAIL *rail, unsigned int nodeId) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[nodeId]; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ list_del (el); -+ list_add (el, &rail->DmaRetryFreeList); -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+} -+ -+/****************************************************************************************/ -+/* -+ * Connection management. -+ */ -+static void -+DiscardingHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&rail->HaltOpSleep, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+typedef struct { -+ EP3_RAIL *rail; -+ sdramaddr_t qaddr; -+} SetQueueFullData; -+ -+static void -+SetQueueLockedOperation (ELAN3_DEV *dev, void *arg) -+{ -+ SetQueueFullData *data = (SetQueueFullData *) arg; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ -+ elan3_sdram_writel (dev, data->qaddr, E3_QUEUE_LOCKED | elan3_sdram_readl(dev, data->qaddr)); -+ -+ data->rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&data->rail->HaltOpSleep, &dev->IntrLock); -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+static void -+FlushDmaQueuesHaltOperation (ELAN3_DEV *dev, void *arg) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) arg; -+ sdramaddr_t FPtr, BPtr; -+ sdramaddr_t Base, Top; -+ E3_DMA_BE dma; -+ EP_NODE_RAIL *node; -+ int vp; -+ unsigned long flags; -+ -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProc.s.FSR)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData0.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData1.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData2.s.FSR.Status)) == 0); -+ ASSERT (elan3_sdram_readl (dev, dev->TAndQBase + offsetof (E3_TrapAndQueue, DProcData3.s.FSR.Status)) == 0); -+ -+ FPtr = read_reg32 (dev, DProc_SysCntx_FPtr); -+ BPtr = read_reg32 (dev, DProc_SysCntx_BPtr); -+ Base = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[0]); -+ Top = dev->TAndQBase + offsetof (E3_TrapAndQueue, SysCntxDmaQueue[E3_SysCntxQueueSize-1]); -+ -+ while (FPtr != BPtr) -+ { -+ elan3_sdram_copyq_from_sdram (dev, FPtr, &dma, sizeof (E3_DMA_BE)); -+ -+ EPRINTF5 (DBG_DISCON, "%s: FlushDmaQueuesHaltOperation: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma.s.dma_type, dma.s.dma_size, dma.s.dma_source, dma.s.dma_dest); -+ EPRINTF5 (DBG_DISCON, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ dma.s.dma_destEvent, dma.s.dma_destCookieVProc, -+ dma.s.dma_srcEvent, dma.s.dma_srcCookieVProc); -+ -+ ASSERT ((dma.s.dma_u.s.Context & SYS_CONTEXT_BIT) != 0); -+ -+ if (dma.s.dma_direction == DMA_WRITE) -+ vp = dma.s.dma_destVProc; -+ else -+ vp = dma.s.dma_srcVProc; -+ -+ node = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (node->State >= EP_NODE_CONNECTED && node->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && node->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ /* -+ * This is a DMA going to the node which is being removed, -+ * so move it onto the node dma list where it will get -+ * handled later. -+ */ -+ EPRINTF1 (DBG_DISCON, "%s: FlushDmaQueuesHaltOperation: move dma to cancelled list\n", rail->Generic.Name); -+ -+ if (dma.s.dma_direction != DMA_WRITE) -+ { -+ /* for read dma's set the DMA_READ_REQUEUE bits as the dma_source has been -+ * modified by the elan to point at the dma in the rxd where it was issued -+ * from */ -+ dma.s.dma_direction = (dma.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+ QueueDmaOnStalledList (rail, &dma); -+ -+ /* -+ * Remove the DMA from the queue by replacing it with one with -+ * zero size and no events. -+ * -+ * NOTE: we must preserve the SYS_CONTEXT_BIT since the Elan uses this -+ * to mark the approriate run queue as empty. -+ */ -+ dma.s.dma_type = (SYS_CONTEXT_BIT << 16); -+ dma.s.dma_size = 0; -+ dma.s.dma_source = (E3_Addr) 0; -+ dma.s.dma_dest = (E3_Addr) 0; -+ dma.s.dma_destEvent = (E3_Addr) 0; -+ dma.s.dma_destCookieVProc = 0; -+ dma.s.dma_srcEvent = (E3_Addr) 0; -+ dma.s.dma_srcCookieVProc = 0; -+ -+ elan3_sdram_copyq_to_sdram (dev, &dma, FPtr, sizeof (E3_DMA_BE)); -+ } -+ -+ FPtr = (FPtr == Top) ? Base : FPtr + sizeof (E3_DMA); -+ } -+ -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ rail->HaltOpCompleted = 1; -+ kcondvar_wakeupall (&rail->HaltOpSleep, &dev->IntrLock); -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+} -+ -+void -+SetQueueLocked (EP3_RAIL *rail, sdramaddr_t qaddr) -+{ -+ ELAN3_DEV *dev = rail->Device; -+ SetQueueFullData data; -+ unsigned long flags; -+ -+ /* Ensure that the context filter changes have been seen by halting -+ * then restarting the inputters - this also ensures that any setevent -+ * commands used to issue dma's have completed and any trap has been -+ * handled. */ -+ data.rail = rail; -+ data.qaddr = qaddr; -+ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DiscardingSysCntx | INT_TProcHalted, SetQueueLockedOperation, &data); -+ -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+} -+ -+void -+ep3_flush_filters (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ unsigned long flags; -+ -+ /* Ensure that the context filter changes have been seen by halting -+ * then restarting the inputters - this also ensures that any setevent -+ * commands used to issue dma's have completed and any trap has been -+ * handled. */ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DiscardingSysCntx, DiscardingHaltOperation, rail); -+ -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+} -+ -+void -+ep3_flush_queues (EP_RAIL *r) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) r; -+ ELAN3_DEV *dev = rail->Device; -+ struct list_head *el; -+ struct list_head *nel; -+ EP_NODE_RAIL *node; -+ unsigned long flags; -+ int vp, i; -+ -+ ASSERT (NO_LOCKS_HELD); -+ -+ /* First - stall the dma retry thread, so that it will no longer -+ * restart any dma's from the rety lists. */ -+ StallDmaRetryThread (rail); -+ -+ /* Second - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queue. */ -+ kmutex_lock (&rail->HaltOpMutex); -+ spin_lock_irqsave (&dev->IntrLock, flags); -+ QueueHaltOperation (dev, 0, NULL, INT_DProcHalted | INT_TProcHalted, FlushDmaQueuesHaltOperation, rail); -+ while (! rail->HaltOpCompleted) -+ kcondvar_wait (&rail->HaltOpSleep, &dev->IntrLock, &flags); -+ rail->HaltOpCompleted = 0; -+ spin_unlock_irqrestore (&dev->IntrLock, flags); -+ kmutex_unlock (&rail->HaltOpMutex); -+ -+ /* Third - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->DmaRetryLock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el, nel, &rail->DmaRetries[i]) { -+ EP3_RETRY_DMA *retry = list_entry (el, EP3_RETRY_DMA, Link); -+ -+ if (retry->Dma.s.dma_direction == DMA_WRITE) -+ vp = retry->Dma.s.dma_destVProc; -+ else -+ vp = retry->Dma.s.dma_srcVProc; -+ -+ node = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (node->State >= EP_NODE_CONNECTED && node->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && node->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ EPRINTF5 (DBG_DISCON, "%s: FlushDmaQueues: %08x %08x %08x %08x\n",rail->Generic.Name, -+ retry->Dma.s.dma_type, retry->Dma.s.dma_size, retry->Dma.s.dma_source, retry->Dma.s.dma_dest); -+ EPRINTF5 (DBG_DISCON, "%s: %08x %08x %08x %08x\n", rail->Generic.Name, -+ retry->Dma.s.dma_destEvent, retry->Dma.s.dma_destCookieVProc, -+ retry->Dma.s.dma_srcEvent, retry->Dma.s.dma_srcCookieVProc); -+ -+ list_del (&retry->Link); -+ -+ list_add_tail (&retry->Link, &node->StalledDmas); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rail->DmaRetryLock, flags); -+ -+ /* Finally - allow the dma retry thread to run again */ -+ ResumeDmaRetryThread (rail); -+} -+ -+/****************************************************************************************/ -+/* NOTE - we require that all cookies are non-zero, which is -+ * achieved because EP_VP_DATA() is non-zero for all -+ * nodes */ -+E3_uint32 -+LocalCookie (EP3_RAIL *rail, unsigned remoteNode) -+{ -+ E3_uint32 cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->CookieLock, flags); -+ cookie = DMA_COOKIE (rail->MainCookies[remoteNode], EP_VP_DATA(rail->Generic.Position.pos_nodeid)); -+ spin_unlock_irqrestore (&rail->CookieLock, flags); -+ -+ /* Main processor cookie for srcCookie - this is what is sent -+ * to the remote node along with the setevent from the put -+ * or the dma descriptor for a get */ -+ return (cookie); -+} -+ -+E3_uint32 -+RemoteCookie (EP3_RAIL *rail, u_int remoteNode) -+{ -+ uint32_t cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->CookieLock, flags); -+ cookie = DMA_REMOTE_COOKIE (rail->MainCookies[remoteNode], EP_VP_DATA(remoteNode)); -+ spin_unlock_irqrestore (&rail->CookieLock, flags); -+ -+ /* Main processor cookie for dstCookie - this is the cookie -+ * that the "remote put" dma uses for it's setevent packets for -+ * a get dma */ -+ -+ return (cookie); -+} -+ -+/****************************************************************************************/ -+/* -+ * Event Cookie management. -+ * -+ * We find the ep_cookie in one of two ways: -+ * 1) for block copy events -+ * the cookie value is stored in the ev_Source - for EVIRQ events -+ * it is also stored in the ev_Type -+ * 2) for normal events -+ * we just use the event address. -+ */ -+void -+InitialiseCookieTable (EP3_COOKIE_TABLE *table) -+{ -+ register int i; -+ -+ spin_lock_init (&table->Lock); -+ -+ for (i = 0; i < EP3_COOKIE_HASH_SIZE; i++) -+ table->Entries[i] = NULL; -+} -+ -+void -+DestroyCookieTable (EP3_COOKIE_TABLE *table) -+{ -+ register int i; -+ -+ for (i = 0; i < EP3_COOKIE_HASH_SIZE; i++) -+ if (table->Entries[i]) -+ printk ("DestroyCookieTable: entry %d not empty\n", i); -+ -+ spin_lock_destroy (&table->Lock); -+} -+ -+void -+RegisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cp, E3_uint32 cookie, EP3_COOKIE_OPS *ops, void *arg) -+{ -+ EP3_COOKIE *tcp; -+ int hashval = EP3_HASH_COOKIE(cookie); -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ cp->Operations = ops; -+ cp->Arg = arg; -+ cp->Cookie = cookie; -+ -+#if defined(DEBUG) -+ /* Check that the cookie is unique */ -+ for (tcp = table->Entries[hashval]; tcp; tcp = tcp->Next) -+ if (tcp->Cookie == cookie) -+ panic ("RegisterEventCookie: non unique cookie\n"); -+#endif -+ cp->Next = table->Entries[hashval]; -+ -+ table->Entries[hashval] = cp; -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+} -+ -+void -+DeregisterCookie (EP3_COOKIE_TABLE *table, EP3_COOKIE *cp) -+{ -+ EP3_COOKIE **predCookiep; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ for (predCookiep = &table->Entries[EP3_HASH_COOKIE (cp->Cookie)]; *predCookiep; predCookiep = &(*predCookiep)->Next) -+ { -+ if (*predCookiep == cp) -+ { -+ *predCookiep = cp->Next; -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+ -+ cp->Operations = NULL; -+ cp->Arg = NULL; -+ cp->Cookie = 0; -+ cp->Next = NULL; -+} -+ -+EP3_COOKIE * -+LookupCookie (EP3_COOKIE_TABLE *table, E3_Addr cookie) -+{ -+ EP3_COOKIE *cp; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&table->Lock, flags); -+ -+ for (cp = table->Entries[EP3_HASH_COOKIE(cookie)]; cp; cp = cp->Next) -+ if (cp->Cookie == cookie) -+ break; -+ -+ spin_unlock_irqrestore (&table->Lock, flags); -+ return (cp); -+} -+ -+EP3_COOKIE * -+LookupEventCookie (EP3_RAIL *rail, EP3_COOKIE_TABLE *table, E3_Addr eaddr) -+{ -+ sdramaddr_t event; -+ E3_uint32 type; -+ -+ if ((event = ep_elan2sdram (&rail->Generic, eaddr)) != (sdramaddr_t) 0) -+ { -+ type = elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Type)); -+ -+ if (type & EV_TYPE_BCOPY) -+ return (LookupCookie (table, elan3_sdram_readl (rail->Device, event + offsetof (E3_BlockCopyEvent, ev_Source)) & ~EV_WCOPY)); -+ else -+ return (LookupCookie (table, eaddr)); -+ } -+ -+ return (NULL); -+} -+ -+/****************************************************************************************/ -+/* -+ * Elan context operations - note only support interrupt ops. -+ */ -+static int ep3_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+static int ep3_dprocTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+static int ep3_tprocTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+static int ep3_iprocTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int chan); -+static int ep3_cprocTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+static int ep3_cprocReissue (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *tbuf); -+ -+static E3_uint8 ep3_load8 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+static E3_uint16 ep3_load16 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+static E3_uint32 ep3_load32 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+static E3_uint64 ep3_load64 (ELAN3_CTXT *ctxt, E3_Addr addr); -+static void ep3_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+ELAN3_OPS ep3_elan3_ops = -+{ -+ ELAN3_OPS_VERSION, /* Version */ -+ -+ NULL, /* Exception */ -+ NULL, /* GetWordItem */ -+ NULL, /* GetBlockItem */ -+ NULL, /* PutWordItem */ -+ NULL, /* PutBlockItem */ -+ NULL, /* PutbackItem */ -+ NULL, /* FreeWordItem */ -+ NULL, /* FreeBlockItem */ -+ NULL, /* CountItems */ -+ ep3_event, /* Event */ -+ NULL, /* SwapIn */ -+ NULL, /* SwapOut */ -+ NULL, /* FreePrivate */ -+ NULL, /* FixupNetworkError */ -+ ep3_dprocTrap, /* DProcTrap */ -+ ep3_tprocTrap, /* TProcTrap */ -+ ep3_iprocTrap, /* IProcTrap */ -+ ep3_cprocTrap, /* CProcTrap */ -+ ep3_cprocReissue, /* CProcReissue */ -+ NULL, /* StartFaultCheck */ -+ NULL, /* EndFaulCheck */ -+ ep3_load8, /* Load8 */ -+ ep3_store8, /* Store8 */ -+ ep3_load16, /* Load16 */ -+ ep3_store16, /* Store16 */ -+ ep3_load32, /* Load32 */ -+ ep3_store32, /* Store32 */ -+ ep3_load64, /* Load64 */ -+ ep3_store64, /* Store64 */ -+}; -+ -+static int -+ep3_event (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ EP3_COOKIE *cp = LookupCookie (&rail->CookieTable, cookie); -+ -+ if (cp == NULL) -+ { -+ printk ("ep3_event: cannot find event cookie for %x\n", cookie); -+ return (OP_HANDLED); -+ } -+ -+ if (cp->Operations->Event) -+ cp->Operations->Event(rail, cp->Arg); -+ -+ return (OP_HANDLED); -+} -+ -+/* Trap interface */ -+int -+ep3_dprocTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_COOKIE *cp; -+ E3_FaultSave_BE *FaultArea; -+ E3_uint16 vp; -+ int validTrap; -+ int numFaults; -+ int i; -+ sdramaddr_t event; -+ E3_uint32 type; -+ sdramaddr_t dma; -+ E3_DMA_BE dmabe; -+ int status = EAGAIN; -+ -+ EPRINTF4 (DBG_EPTRAP, "ep3_dprocTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ EPRINTF4 (DBG_EPTRAP, " type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ EPRINTF2 (DBG_EPTRAP, " Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ EPRINTF2 (DBG_EPTRAP, " Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+ ASSERT (trap->Status.s.Context & SYS_CONTEXT_BIT); -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaPacketTimedOutOrPacketError: -+ if (trap->Desc.s.dma_direction == DMA_WRITE) -+ vp = trap->Desc.s.dma_destVProc; -+ else -+ vp = trap->Desc.s.dma_srcVProc; -+ -+ if (! trap->PacketInfo.s.PacketTimeout) -+ status = ETIMEDOUT; -+ else -+ { -+ status = EHOSTDOWN; -+ -+ /* XXXX: dma timedout - might want to "restart" tree ? */ -+ } -+ goto retry_dma; -+ -+ case MI_DmaFailCountError: -+ goto retry_dma; -+ -+ case MI_TimesliceDmaQueueOverflow: -+ IncrStat (rail, DprocDmaQueueOverflow); -+ -+ goto retry_dma; -+ -+ case MI_RemoteDmaCommand: -+ case MI_RunDmaCommand: -+ case MI_DequeueNonSysCntxDma: -+ case MI_DequeueSysCntxDma: -+ /* -+ * The DMA processor has trapped due to outstanding prefetches from the previous -+ * dma. The "current" dma has not been consumed, so we just ignore the trap -+ */ -+ return (OP_HANDLED); -+ -+ case MI_EventQueueOverflow: -+ IncrStat (rail, DprocEventQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, trap->Desc.s.dma_srcEvent)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof(E3_Event,ev_Type))) & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ spin_unlock (&ctxt->Device->IntrLock); -+ ep3_event (ctxt, (type & ~(EV_TYPE_MASK_EVIRQ | EV_TYPE_MASK_BCOPY)), OP_LWP); -+ spin_lock (&ctxt->Device->IntrLock); -+ } -+ return (OP_HANDLED); -+ -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, DprocDmaQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, trap->Desc.s.dma_srcEvent)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_DMA) == EV_TYPE_DMA && -+ (dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2))) != (sdramaddr_t) 0) -+ { -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ /* We only chain together DMA's of the same direction, so since -+ * we took a DmaQueueOverflow trap - this means that DMA which -+ * trapped was a WRITE dma - hence the one we chain to must also -+ * be a WRITE dma. -+ */ -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = dmabe.s.dma_destVProc; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (cp != NULL && (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE))); -+ } -+#endif -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ -+ return (OP_HANDLED); -+ } -+ -+ panic ("ep3_dprocTrap\n"); -+ return (OP_HANDLED); -+ -+ default: -+ break; -+ } -+ -+ /* If it's a dma which traps past the end of the source, then */ -+ /* just re-issue it */ -+ numFaults = validTrap = (trap->FaultSave.s.FSR.Status != 0); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ { -+ if (FaultArea->s.FSR.Status != 0) -+ { -+ numFaults++; -+ -+ /* XXXX: Rev B Elans can prefetch data past the end of the dma descriptor */ -+ /* if the fault relates to this, then just ignore it */ -+ if (FaultArea->s.FaultAddress >= (trap->Desc.s.dma_source+trap->Desc.s.dma_size)) -+ { -+ static int i; -+ if (i < 10 && i++ < 10) -+ printk ("ep3_dprocTrap: Rev B prefetch trap error %08x %08x\n", -+ FaultArea->s.FaultAddress, (trap->Desc.s.dma_source+trap->Desc.s.dma_size)); -+ continue; -+ } -+ -+ validTrap++; -+ } -+ } -+ -+ /* -+ * NOTE: for physical errors (uncorrectable ECC/PCI parity errors) the FSR will -+ * be zero - hence we will not see any faults - and none will be valid, -+ * so only ignore a Rev B prefetch trap if we've seen some faults. Otherwise -+ * we can reissue a DMA which has already sent it's remote event ! -+ */ -+ if (numFaults != 0 && validTrap == 0) -+ { -+ retry_dma: -+ if (trap->Desc.s.dma_direction == DMA_WRITE) -+ { -+ vp = trap->Desc.s.dma_destVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, trap->Desc.s.dma_srcEvent); -+ } -+ else -+ { -+ ASSERT (EP3_CONTEXT_ISDATA(trap->Desc.s.dma_queueContext) || trap->Desc.s.dma_direction == DMA_READ_REQUEUE); -+ -+ vp = trap->Desc.s.dma_srcVProc; -+ cp = LookupEventCookie (rail, &rail->CookieTable, trap->Desc.s.dma_destEvent); -+ -+ /* for read dma's set the DMA_READ_REQUEUE bits as the dma_source has been -+ * modified by the elan to point at the dma in the rxd where it was issued -+ * from */ -+ trap->Desc.s.dma_direction = (trap->Desc.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+#ifdef DEBUG_ASSERT -+ { -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &trap->Desc, status); -+ else -+ { -+ ASSERT (trap->Desc.s.dma_direction == DMA_WRITE && trap->Desc.s.dma_srcEvent == 0 && trap->Desc.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &trap->Desc, EP_RETRY_ANONYMOUS); -+ } -+ -+ return (OP_HANDLED); -+ } -+ -+ printk ("ep3_dprocTrap: WakeupFnt=%x Cntx=%x SuspAddr=%x TrapType=%s\n", -+ trap->Status.s.WakeupFunction, trap->Status.s.Context, -+ trap->Status.s.SuspendAddr, MiToName (trap->Status.s.TrapType)); -+ printk (" FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ for (i = 0, FaultArea = &trap->Data0; i < 4; i++, FaultArea++) -+ printk (" %d FaultAddr=%x EventAddr=%x FSR=%x\n", i, -+ FaultArea->s.FaultAddress, FaultArea->s.EventAddress, FaultArea->s.FSR.Status); -+ -+ printk (" type %08x size %08x source %08x dest %08x\n", -+ trap->Desc.s.dma_type, trap->Desc.s.dma_size, trap->Desc.s.dma_source, trap->Desc.s.dma_dest); -+ printk (" Dest event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_destEvent, trap->Desc.s.dma_destCookieVProc); -+ printk (" Source event %08x cookie/proc %08x\n", -+ trap->Desc.s.dma_srcEvent, trap->Desc.s.dma_srcCookieVProc); -+ -+// panic ("ep3_dprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+int -+ep3_tprocTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ -+ EPRINTF6 (DBG_EPTRAP, "ep3_tprocTrap: SP=%08x PC=%08x NPC=%08x DIRTY=%08x TRAP=%08x MI=%s\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits, trap->TrapBits.Bits, MiToName (trap->mi)); -+ EPRINTF4 (DBG_EPTRAP, " g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ EPRINTF4 (DBG_EPTRAP, " i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ -+ ASSERT (trap->Status.s.Context & SYS_CONTEXT_BIT); -+ -+ switch (trap->mi) -+ { -+ case MI_UnimplementedError: -+ if (trap->TrapBits.s.ForcedTProcTrap) -+ { -+ ASSERT (trap->TrapBits.s.OutputWasOpen == 0); -+ -+ EPRINTF0 (DBG_EPTRAP, "ep3_tprocTrap: ForcedTProcTrap\n"); -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, FALSE)); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->TrapBits.s.ThreadTimeout) -+ { -+ EPRINTF0 (DBG_EPTRAP, "ep3_tprocTrap: ThreadTimeout\n"); -+ -+ if (trap->Registers[REG_GLOBALS + (1^WordEndianFlip)] == 0) -+ RollThreadToClose (ctxt, trap, trap->TrapBits.s.PacketAckValue); -+ else -+ { -+ CompleteEnvelope (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], trap->TrapBits.s.PacketAckValue); -+ -+ RollThreadToClose (ctxt, trap, EP3_PAckStolen); -+ } -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, FALSE)); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->TrapBits.s.Unimplemented) -+ { -+ E3_uint32 instr = ELAN3_OP_LOAD32 (ctxt, trap->pc & PC_MASK); -+ -+ PRINTF1 (ctxt, DBG_EPTRAP, "ep3_tprocTrap: unimplemented instruction %08x\n", instr); -+ -+ if ((instr & OPCODE_MASK) == OPCODE_Ticc && -+ (instr & OPCODE_IMM) == OPCODE_IMM && -+ (Ticc_COND(instr) == Ticc_TA)) -+ { -+ switch (INSTR_IMM(instr)) -+ { -+ case EP3_UNIMP_TRAP_NO_DESCS: -+ StallThreadForNoDescs (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], -+ SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ case EP3_UNIMP_TRAP_PACKET_NACKED: -+ CompleteEnvelope (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], E3_PAckDiscard); -+ -+ IssueRunThread (rail, SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ case EP3_UNIMP_THREAD_HALTED: -+ StallThreadForHalted (rail, trap->Registers[REG_GLOBALS + (1^WordEndianFlip)], -+ SaveThreadToStack (ctxt, trap, TRUE)); -+ return (OP_HANDLED); -+ -+ default: -+ break; -+ -+ } -+ } -+ } -+ break; -+ -+ default: -+ break; -+ } -+ -+ /* All other traps should not happen for kernel comms */ -+ printk ("ep3_tprocTrap: SP=%08x PC=%08x NPC=%08x DIRTY=%08x TRAP=%08x MI=%s\n", -+ trap->sp, trap->pc, trap->npc, trap->DirtyBits.Bits, -+ trap->TrapBits.Bits, MiToName (trap->mi)); -+ printk (" FaultSave : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, trap->FaultSave.s.FSR.Status); -+ printk (" DataFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->DataFaultSave.s.FaultAddress, trap->DataFaultSave.s.EventAddress, trap->DataFaultSave.s.FSR.Status); -+ printk (" InstFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->InstFaultSave.s.FaultAddress, trap->InstFaultSave.s.EventAddress, trap->InstFaultSave.s.FSR.Status); -+ printk (" OpenFault : FaultAddress %08x EventAddress %08x FSR %08x\n", -+ trap->OpenFaultSave.s.FaultAddress, trap->OpenFaultSave.s.EventAddress, trap->OpenFaultSave.s.FSR.Status); -+ -+ if (trap->DirtyBits.s.GlobalsDirty) -+ { -+ printk (" g0=%08x g1=%08x g2=%08x g3=%08x\n", -+ trap->Registers[REG_GLOBALS+(0^WordEndianFlip)], trap->Registers[REG_GLOBALS+(1^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(2^WordEndianFlip)], trap->Registers[REG_GLOBALS+(3^WordEndianFlip)]); -+ printk (" g4=%08x g5=%08x g6=%08x g7=%08x\n", -+ trap->Registers[REG_GLOBALS+(4^WordEndianFlip)], trap->Registers[REG_GLOBALS+(5^WordEndianFlip)], -+ trap->Registers[REG_GLOBALS+(6^WordEndianFlip)], trap->Registers[REG_GLOBALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.OutsDirty) -+ { -+ printk (" o0=%08x o1=%08x o2=%08x o3=%08x\n", -+ trap->Registers[REG_OUTS+(0^WordEndianFlip)], trap->Registers[REG_OUTS+(1^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(2^WordEndianFlip)], trap->Registers[REG_OUTS+(3^WordEndianFlip)]); -+ printk (" o4=%08x o5=%08x o6=%08x o7=%08x\n", -+ trap->Registers[REG_OUTS+(4^WordEndianFlip)], trap->Registers[REG_OUTS+(5^WordEndianFlip)], -+ trap->Registers[REG_OUTS+(6^WordEndianFlip)], trap->Registers[REG_OUTS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.LocalsDirty) -+ { -+ printk (" l0=%08x l1=%08x l2=%08x l3=%08x\n", -+ trap->Registers[REG_LOCALS+(0^WordEndianFlip)], trap->Registers[REG_LOCALS+(1^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(2^WordEndianFlip)], trap->Registers[REG_LOCALS+(3^WordEndianFlip)]); -+ printk (" l4=%08x l5=%08x l6=%08x l7=%08x\n", -+ trap->Registers[REG_LOCALS+(4^WordEndianFlip)], trap->Registers[REG_LOCALS+(5^WordEndianFlip)], -+ trap->Registers[REG_LOCALS+(6^WordEndianFlip)], trap->Registers[REG_LOCALS+(7^WordEndianFlip)]); -+ } -+ if (trap->DirtyBits.s.InsDirty) -+ { -+ printk (" i0=%08x i1=%08x i2=%08x i3=%08x\n", -+ trap->Registers[REG_INS+(0^WordEndianFlip)], trap->Registers[REG_INS+(1^WordEndianFlip)], -+ trap->Registers[REG_INS+(2^WordEndianFlip)], trap->Registers[REG_INS+(3^WordEndianFlip)]); -+ printk (" i4=%08x i5=%08x i6=%08x i7=%08x\n", -+ trap->Registers[REG_INS+(4^WordEndianFlip)], trap->Registers[REG_INS+(5^WordEndianFlip)], -+ trap->Registers[REG_INS+(6^WordEndianFlip)], trap->Registers[REG_INS+(7^WordEndianFlip)]); -+ } -+ -+// panic ("ep3_tprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+int -+ep3_iprocTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int channel) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ EP3_COOKIE *cp; -+ sdramaddr_t event; -+ E3_uint32 type; -+ sdramaddr_t dma; -+ E3_DMA_BE dmabe; -+ -+ ASSERT (trap->Transactions[0].s.TrTypeCntx.s.Context & SYS_CONTEXT_BIT); -+ -+ /* -+ * first process the trap to determine the cause -+ */ -+ InspectIProcTrap (ctxt, trap); -+ -+ if (! trap->AckSent && trap->LockQueuePointer) /* Must be a network error in a queueing DMA */ -+ { /* packet - unlock the queue */ -+ IncrStat (rail, QueueingPacketTrap); -+ -+ SimulateUnlockQueue (ctxt, trap->LockQueuePointer, FALSE); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->AckSent && trap->BadTransaction) -+ { -+ spin_unlock (&dev->IntrLock); -+ -+ /* NOTE - no network error fixup is necessary for system context -+ * messages since they are idempotent and are single packet -+ * dmas -+ */ -+ if (EP3_CONTEXT_ISDATA (trap->Transactions[0].s.TrTypeCntx.s.Context)) -+ { -+ int nodeId = EP3_CONTEXT_TO_NODE(trap->Transactions[0].s.TrTypeCntx.s.Context); -+ -+ if (trap->DmaIdentifyTransaction) -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, channel, trap->DmaIdentifyTransaction->s.TrAddr); -+ else if (trap->ThreadIdentifyTransaction) -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, channel, trap->ThreadIdentifyTransaction->s.TrAddr); -+ else -+ ep_queue_network_error (&rail->Generic, nodeId, EP_NODE_NETERR_DMA_PACKET, channel, 0); -+ } -+ -+ spin_lock (&dev->IntrLock); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->AckSent) -+ { -+ if (trap->TrappedTransaction == NULL) -+ return (OP_HANDLED); -+ -+ while (! trap->TrappedTransaction->s.TrTypeCntx.s.LastTrappedTrans) -+ { -+ E3_IprocTrapHeader_BE *hdrp = trap->TrappedTransaction; -+ E3_IprocTrapData_BE *datap = trap->TrappedDataBuffer; -+ -+ ASSERT (hdrp->s.TrTypeCntx.s.StatusRegValid != 0); -+ -+ if ((hdrp->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) != 0) -+ { -+ printk ("ep3_iprocTrap: WRITEBLOCK : Addr %x\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ } -+ else -+ { -+ switch (hdrp->s.TrTypeCntx.s.Type & TR_OPCODE_TYPE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_TYPE_MASK: -+ switch (GET_STATUS_TRAPTYPE (hdrp->s.IProcTrapStatus)) -+ { -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, IprocDmaQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, hdrp->s.TrAddr)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_DMA) == EV_TYPE_DMA && -+ (dma = ep_elan2sdram (&rail->Generic, (type & ~EV_TYPE_MASK2))) != (sdramaddr_t) 0) -+ { -+ elan3_sdram_copyq_from_sdram (dev, dma, &dmabe, sizeof (E3_DMA)); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ else -+ { -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ -+ /* we MUST convert this into a DMA_READ_REQUEUE dma as if we don't the -+ * DMA descriptor will be read from the EP3_RETRY_DMA rather than the -+ * original DMA - this can then get reused and an incorrect DMA -+ * descriptor sent -+ * eventp->ev_Type contains the dma address with type in the lower bits -+ */ -+ -+ dmabe.s.dma_source = (type & ~EV_TYPE_MASK2); -+ dmabe.s.dma_direction = (dmabe.s.dma_direction & ~DMA_READ) | DMA_READ_REQUEUE; -+ } -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = (dmabe.s.dma_direction == DMA_WRITE ? dmabe.s.dma_destVProc : dmabe.s.dma_srcVProc); -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ } -+ -+ printk ("ep3_iprocTrap: SETEVENT : %x - cannot find dma to restart\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ -+ case MI_EventQueueOverflow: -+ { -+ sdramaddr_t event; -+ E3_uint32 type; -+ -+ IncrStat (rail, IprocEventQueueOverflow); -+ -+ if ((event = ep_elan2sdram (&rail->Generic, hdrp->s.TrAddr)) != (sdramaddr_t) 0 && -+ ((type = elan3_sdram_readl (dev, event + offsetof (E3_Event, ev_Type))) & EV_TYPE_MASK_EVIRQ) == EV_TYPE_EVIRQ) -+ { -+ spin_unlock (&dev->IntrLock); -+ ep3_event (ctxt, (type & ~(EV_TYPE_MASK_EVIRQ|EV_TYPE_MASK_BCOPY)), OP_LWP); -+ spin_lock (&dev->IntrLock); -+ -+ break; -+ } -+ -+ printk ("ep3_iprocTrap: SETEVENT : %x - cannot find event\n", hdrp->s.TrAddr); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ } -+ -+ default: -+ printk ("ep3_iprocTrap: SETEVENT : %x MI=%x\n", hdrp->s.TrAddr, GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus)); -+// panic ("ep3_iprocTrap\n"); -+ break; -+ } -+ break; -+ -+ case TR_SENDDISCARD & TR_OPCODE_TYPE_MASK: -+ /* Just ignore send-discard transactions */ -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_TYPE_MASK: -+ { -+ E3_DMA_BE *dmap = (E3_DMA_BE *) datap; -+ -+ if (GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus) != MI_DmaQueueOverflow) -+ { -+ printk ("ep3_iprocTrap: MI=%x\n", GET_STATUS_TRAPTYPE(hdrp->s.IProcTrapStatus)); -+ break; -+ } -+ -+ IncrStat (rail, IprocDmaQueueOverflow); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmap->s.dma_srcEvent); -+ -+ /* modify the dma type since it will still be a "read" dma */ -+ dmap->s.dma_type = (dmap->s.dma_type & ~DMA_TYPE_READ) | DMA_TYPE_ISREMOTE; -+ -+#ifdef DEBUG_ASSERT -+ { -+ E3_uint16 vp = dmap->s.dma_destVProc; -+ EP_NODE_RAIL *nodeRail = &rail->Generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ } -+#endif -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, dmap, EAGAIN); -+ else -+ { -+ ASSERT (dmap->s.dma_direction == DMA_WRITE && dmap->s.dma_srcEvent == 0 && dmap->s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, dmap, EP_RETRY_ANONYMOUS); -+ } -+ break; -+ } -+ default: -+ printk ("ep3_iprocTrap: %s\n", IProcTrapString (hdrp, datap)); -+ break; -+ } -+ } -+ -+ /* -+ * We've successfully processed this transaction, so move onto the -+ * next one. -+ */ -+ trap->TrappedTransaction++; -+ trap->TrappedDataBuffer++; -+ } -+ -+ return (OP_HANDLED); -+ } -+ -+ /* Workaround WRITEBLOCK transaction executed when LOCKQUEUE transaction missed */ -+ if ((trap->TrappedTransaction->s.TrTypeCntx.s.Type & TR_WRITEBLOCK_BIT) && /* a DMA packet */ -+ trap->LockQueuePointer == 0 && trap->UnlockQueuePointer && /* a queueing DMA */ -+ trap->TrappedTransaction->s.TrAddr == trap->FaultSave.s.FaultAddress) /* and missed lockqueue */ -+ { -+ printk ("ep3_iprocTrap: missed lockqueue transaction for queue %x\n", trap->UnlockQueuePointer); -+ return (OP_HANDLED); -+ } -+ -+ if (trap->FaultSave.s.FaultContext != 0) -+ printk ("ep3_iprocTrap: pagefault at %08x in context %x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.FaultContext); -+ -+// panic ("ep3_iprocTrap: unexpected inputter trap\n"); -+ -+ return (OP_HANDLED); -+} -+ -+/* -+ * Command processor trap -+ * kernel comms should only be able to generate -+ * queue overflow traps -+ */ -+int -+ep3_cprocTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ int ctxnum = (trap->TrapBuf.r.Breg >> 16) & MAX_ROOT_CONTEXT_MASK; -+ ELAN3_DEV *dev = rail->Device; -+ EP3_DMA_RING *ring; -+ EP3_COOKIE *cp; -+ E3_DMA_BE dmabe; -+ int vp, slot; -+ unsigned long flags; -+ -+ switch (trap->Status.s.TrapType) -+ { -+ case MI_DmaQueueOverflow: -+ IncrStat (rail, CprocDmaQueueOverflow); -+ -+ /* Use the context number that the setevent was issued in, -+ * to find the appropriate dma ring, then since they are guaranteed -+ * to be issued in order, we just search backwards till we find the -+ * last one which has completed its word copy - this must be the -+ * one which had caused the DmaQueueOverflow trap ! */ -+ -+ ASSERT (ctxnum >= ELAN3_DMARING_BASE_CONTEXT_NUM && ctxnum < (ELAN3_DMARING_BASE_CONTEXT_NUM+EP3_NUM_RINGS)); -+ -+ spin_lock_irqsave (&dev->CProcLock, flags); -+ -+ ring = &rail->DmaRings[ctxnum - ELAN3_DMARING_BASE_CONTEXT_NUM]; -+ slot = DMA_RING_PREV_POS(ring, ring->Position); -+ -+ while (ring->pDoneBlk[slot] == EP3_EVENT_ACTIVE) -+ slot = DMA_RING_PREV_POS(ring, slot); -+ -+ elan3_sdram_copyq_from_sdram (rail->Device , DMA_RING_DMA(ring,slot), &dmabe, sizeof (E3_DMA)); -+ -+#if defined(DEBUG_ASSERT) -+ while (slot != DMA_RING_PREV_POS(ring, ring->Position)) -+ { -+ ASSERT (ring->pDoneBlk[slot] != EP3_EVENT_ACTIVE); -+ -+ slot = DMA_RING_PREV_POS(ring, slot); -+ } -+#endif -+ spin_unlock_irqrestore (&dev->CProcLock, flags); -+ -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_srcEvent); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction = DMA_READ_REQUEUE); -+ -+ cp = LookupEventCookie (rail, &rail->CookieTable, dmabe.s.dma_destEvent); -+ } -+ -+#if defined(DEBUG_ASSERT) -+ if (dmabe.s.dma_direction == DMA_WRITE) -+ vp = dmabe.s.dma_destVProc; -+ else -+ vp = dmabe.s.dma_srcVProc; -+ -+ ASSERT (!EP_VP_ISDATA(vp) || (rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State >= EP_NODE_CONNECTED && -+ rail->Generic.Nodes[EP_VP_TO_NODE(vp)].State <= EP_NODE_LOCAL_PASSIVATE)); -+#endif -+ -+ if (cp != NULL) -+ cp->Operations->DmaRetry (rail, cp->Arg, &dmabe, EAGAIN); -+ else -+ { -+ ASSERT (dmabe.s.dma_direction == DMA_WRITE && dmabe.s.dma_srcEvent == 0 && dmabe.s.dma_isRemote); -+ -+ QueueDmaForRetry (rail, &dmabe, EP_RETRY_ANONYMOUS); -+ } -+ -+ return (OP_HANDLED); -+ -+ case MI_EventQueueOverflow: -+ ASSERT (ctxnum == ELAN3_MRF_CONTEXT_NUM); -+ -+ IncrStat (rail, CprocEventQueueOverflow); -+ -+ rail->CommandPortEventTrap = TRUE; -+ return (OP_HANDLED); -+ -+#if defined(PER_CPU_TIMEOUT) -+ case MI_SetEventReadWait: -+ if (ctxnum == ELAN3_MRF_CONTEXT_NUM && trap->FaultSave.s.EventAddress == EP_PACEMAKER_EVENTADDR) -+ { -+ HeartbeatPacemaker (rail); -+ return (OP_HANDLED); -+ } -+#endif -+ -+ default: -+ printk ("ep3_cprocTrap : Context=%x Status=%x TrapType=%x\n", ctxnum, trap->Status.Status, trap->Status.s.TrapType); -+ printk (" FaultAddr=%x EventAddr=%x FSR=%x\n", -+ trap->FaultSave.s.FaultAddress, trap->FaultSave.s.EventAddress, -+ trap->FaultSave.s.FSR.Status); -+ break; -+ } -+ -+// panic ("ep3_cprocTrap"); -+ -+ return (OP_HANDLED); -+} -+ -+static int -+ep3_cprocReissue (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *tbuf) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ unsigned cmdoff = (tbuf->s.ContextType >> 5) & 0xFF; -+ int ctxnum = (tbuf->s.ContextType >> 16) & MAX_ROOT_CONTEXT_MASK; -+ -+ if (ctxnum >= ELAN3_DMARING_BASE_CONTEXT_NUM && ctxnum < (ELAN3_DMARING_BASE_CONTEXT_NUM+EP3_NUM_RINGS)) -+ { -+ EP3_DMA_RING *ring = &rail->DmaRings[ctxnum - ELAN3_DMARING_BASE_CONTEXT_NUM]; -+ -+ ASSERT ((cmdoff << 2) == offsetof (E3_CommandPort, SetEvent)); /* can only be setevent commands! */ -+ ASSERT (tbuf->s.Addr >= DMA_RING_EVENT_ELAN(ring,0) && tbuf->s.Addr < DMA_RING_EVENT_ELAN(ring, ring->Entries)); -+ -+ writel (tbuf->s.Addr, ring->CommandPort + (cmdoff << 2)); -+ } -+ else -+ { -+ ASSERT (ctxnum == ELAN3_MRF_CONTEXT_NUM); -+ -+ writel (tbuf->s.Addr, ctxt->CommandPort + (cmdoff << 2)); -+ } -+ -+ return (OP_HANDLED); -+} -+ -+static E3_uint8 -+ep3_load8 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint8 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readb (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != NULL) -+ return (*ptr); -+ -+ printk ("ep3_load8: %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store8 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint8 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writeb (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store8 %08x\n", addr); -+} -+ -+static E3_uint16 -+ep3_load16 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint16 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readw (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load16 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store16 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint16 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writew (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store16 %08x\n", addr); -+} -+ -+static E3_uint32 -+ep3_load32 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint32 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readl(dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load32 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store32 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint32 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writel (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store32 %08x\n", addr); -+} -+ -+static E3_uint64 -+ep3_load64 (ELAN3_CTXT *ctxt, E3_Addr addr) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint64 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ return (elan3_sdram_readq (dev, offset)); -+ if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ return (*ptr); -+ -+ printk ("ep3_load64 %08x\n", addr); -+ return (0); -+} -+ -+static void -+ep3_store64 (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val) -+{ -+ EP3_RAIL *rail = (EP3_RAIL *) ctxt->Private; -+ ELAN3_DEV *dev = ctxt->Device; -+ sdramaddr_t offset; -+ E3_uint64 *ptr; -+ -+ if ((offset = ep_elan2sdram (&rail->Generic, addr)) != 0) -+ elan3_sdram_writeq (dev, offset, val); -+ else if ((ptr = ep_elan2main (&rail->Generic, addr)) != 0) -+ *ptr = val; -+ else -+ printk ("ep3_store64 %08x\n", addr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/support_elan4.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/support_elan4.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/support_elan4.c 2005-07-28 14:52:52.910669312 -0400 -@@ -0,0 +1,1184 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: support_elan4.c,v 1.18.2.3 2004/11/18 12:05:00 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/support_elan4.c,v $*/ -+ -+#include -+#include -+ -+#include -+ -+#include "kcomm_vp.h" -+#include "kcomm_elan4.h" -+#include "debug.h" -+ -+#include -+#include -+ -+void -+ep4_register_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp, E4_uint64 cookie, void (*callback)(EP4_RAIL *r, void *arg), void *arg) -+{ -+ unsigned long flags; -+ -+ cp->int_val = cookie; -+ cp->int_callback = callback; -+ cp->int_arg = arg; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_add_tail (&cp->int_link, &rail->r_intcookie_hash[EP4_INTCOOKIE_HASH(cookie)]); -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+} -+ -+void -+ep4_deregister_intcookie (EP4_RAIL *rail, EP4_INTCOOKIE *cp) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_del (&cp->int_link); -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+} -+ -+ -+EP4_INTCOOKIE * -+ep4_lookup_intcookie (EP4_RAIL *rail, E4_uint64 cookie) -+{ -+ struct list_head *el; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_intcookie_lock, flags); -+ list_for_each (el, &rail->r_intcookie_hash[EP4_INTCOOKIE_HASH(cookie)]) { -+ EP4_INTCOOKIE *cp = list_entry (el, EP4_INTCOOKIE, int_link); -+ -+ if (cp->int_val == cookie) -+ { -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+ return cp; -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_intcookie_lock, flags); -+ return NULL; -+} -+ -+E4_uint64 -+ep4_neterr_cookie (EP4_RAIL *rail, unsigned int node) -+{ -+ E4_uint64 cookie; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_cookie_lock, flags); -+ cookie = rail->r_cookies[node]; -+ -+ rail->r_cookies[node] += EP4_COOKIE_INC; -+ -+ spin_unlock_irqrestore (&rail->r_cookie_lock, flags); -+ -+ return cookie; -+} -+ -+void -+ep4_eproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_EPROC_TRAP trap; -+ -+ elan4_extract_eproc_trap (ctxt->ctxt_dev, status, &trap, 0); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_eproc_trap (DBG_BUFFER, 0, "ep4_eproc_trap", &trap); -+ -+ switch (EPROC_TrapType (status)) -+ { -+ case EventProcNoFault: -+ EPRINTF1 (DBG_EPTRAP, "%s: EventProcNoFault\n", rail->r_generic.Name); -+ return; -+ -+ default: -+ printk ("%s: unhandled eproc trap %d\n", rail->r_generic.Name, EPROC_TrapType (status)); -+ elan4_display_eproc_trap (DBG_CONSOLE, 0, "ep4_eproc_trap", &trap); -+ } -+} -+ -+void -+ep4_cproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_CPROC_TRAP trap; -+ struct list_head *el; -+ register int i; -+ -+ elan4_extract_cproc_trap (ctxt->ctxt_dev, status, &trap, cqnum); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_cproc_trap (DBG_BUFFER, 0, "ep4_cproc_trap", &trap); -+ -+ switch (CPROC_TrapType (status)) -+ { -+ case CommandProcInterruptQueueOverflow: -+ /* -+ * Try and handle a bunch of elan main interrupts -+ */ -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (elan4_cq2num (ecq->ecq_cq) == cqnum) -+ { -+ printk ("%s: defer command queue %d after trap %x\n", -+ rail->r_generic.Name, cqnum, CPROC_TrapType (status)); -+ -+ elan4_queue_mainintop (ctxt->ctxt_dev, &ecq->ecq_intop); -+ return; -+ } -+ } -+ } -+ break; -+ -+ case CommandProcDmaQueueOverflow: -+ case CommandProcThreadQueueOverflow: -+ for (i = 0; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (elan4_cq2num (ecq->ecq_cq) == cqnum) -+ { -+ printk ("%s: restart command queue %d after trap %x\n", -+ rail->r_generic.Name, cqnum, CPROC_TrapType (status)); -+ -+ elan4_restartcq (ctxt->ctxt_dev, ecq->ecq_cq); -+ return; -+ } -+ } -+ } -+ break; -+ } -+ -+ printk ("%s: unhandled cproc trap %d for cqnum %d\n", rail->r_generic.Name, CPROC_TrapType (status), cqnum); -+ elan4_display_cproc_trap (DBG_CONSOLE, 0, "ep4_cproc_trap", &trap); -+} -+ -+void -+ep4_dproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_DPROC_TRAP trap; -+ -+ elan4_extract_dproc_trap (ctxt->ctxt_dev, status, &trap, unit); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_dproc_trap (DBG_BUFFER, 0, "ep4_dproc_trap", &trap); -+ -+ if (! DPROC_PrefetcherFault (trap.tr_status)) -+ { -+ switch (DPROC_TrapType (trap.tr_status)) -+ { -+ case DmaProcFailCountError: -+ goto retry_this_dma; -+ -+ case DmaProcPacketAckError: -+ goto retry_this_dma; -+ -+ case DmaProcQueueOverflow: -+ goto retry_this_dma; -+ } -+ } -+ -+ printk ("%s: unhandled dproc trap\n", rail->r_generic.Name); -+ elan4_display_dproc_trap (DBG_CONSOLE, 0, "ep4_dproc_trap", &trap); -+ return; -+ -+ retry_this_dma: -+ /*XXXX implement backoff .... */ -+ -+ ep4_queue_dma_retry (rail, &trap.tr_desc, EP_RETRY_LOW_PRI); -+} -+ -+void -+ep4_tproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_TPROC_TRAP *trap = &rail->r_tproc_trap; -+ -+ elan4_extract_tproc_trap (ctxt->ctxt_dev, status, trap); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_tproc_trap (DBG_BUFFER, 0, "ep4_tproc_trap", trap); -+ -+ printk ("%s: unhandled tproc trap\n", rail->r_generic.Name); -+ elan4_display_tproc_trap (DBG_CONSOLE, 0, "ep4_tproc_trap", trap); -+} -+ -+void -+ep4_iproc_trap (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ ELAN4_IPROC_TRAP *trap = &rail->r_iproc_trap; -+ -+ elan4_extract_iproc_trap (ctxt->ctxt_dev, status, trap, unit); -+ -+ if (epdebug & DBG_EPTRAP) -+ elan4_display_iproc_trap (DBG_BUFFER, 0, "ep4_iproc_trap", trap); -+ -+ elan4_inspect_iproc_trap (trap); -+ -+ switch (IPROC_TrapValue (trap->tr_transactions[trap->tr_trappedTrans].IProcStatusCntxAndTrType)) -+ { -+ case InputDmaQueueOverflow: -+ ep4_queue_dma_retry (rail, (E4_DMA *) &trap->tr_dataBuffers[trap->tr_trappedTrans], EP_RETRY_LOW_PRI); -+ return; -+ -+ case InputEventEngineTrapped: -+ { -+ E4_IprocTrapHeader *hdrp = &trap->tr_transactions[trap->tr_trappedTrans]; -+ sdramaddr_t inputq; -+ E4_Addr event; -+ -+ /* XXXX: flow control on the command queue which we issue to is -+ * rather difficult, we don't want to have space for an event -+ * for each possible context, nor the mechanism to hold the -+ * context filter up until the event has been executed. Given -+ * that the event engine will be restarted by this same interrupt -+ * and we're using high priority command queues, then we just use -+ * a single small command queue for this. -+ */ -+ switch (IPROC_TransactionType(hdrp->IProcStatusCntxAndTrType) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT & TR_OPCODE_MASK: -+ if (hdrp->TrAddr != 0) -+ ep4_set_event_cmd (rail->r_event_ecq, hdrp->TrAddr); -+ return; -+ -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: -+ if ((inputq = ep_elan2sdram (&rail->r_generic, hdrp->TrAddr)) == 0) -+ printk ("%s: TR_INPUT_Q_COMMIT at %llx is not sdram\n", rail->r_generic.Name, hdrp->TrAddr); -+ else -+ { -+ if ((event = elan4_sdram_readq (rail->r_ctxt.ctxt_dev, inputq + offsetof (E4_InputQueue, q_event))) != 0) -+ ep4_set_event_cmd (rail->r_event_ecq, event); -+ return; -+ } -+ } -+ break; -+ } -+ -+ case InputEopErrorOnWaitForEop: -+ case InputEopErrorTrap: -+ case InputCrcErrorAfterPAckOk: -+ if (! (trap->tr_flags & TR_FLAG_ACK_SENT) || (trap->tr_flags & TR_FLAG_EOP_BAD)) -+ return; -+ -+ if (EP4_CONTEXT_ISDATA (IPROC_NetworkContext (status))) -+ { -+ unsigned int nodeId = EP4_CONTEXT_TO_NODE (IPROC_NetworkContext (status)); -+ -+ if ((trap->tr_flags & (TR_FLAG_DMA_PACKET | TR_FLAG_BAD_TRANS)) || -+ ((trap->tr_flags & TR_FLAG_EOP_ERROR) && (trap->tr_identifyTrans == TR_TRANS_INVALID))) -+ { -+ printk ("%s: network error on dma packet from node %d\n", rail->r_generic.Name, nodeId); -+ -+ ep_queue_network_error (&rail->r_generic, EP4_CONTEXT_TO_NODE(IPROC_NetworkContext (status)), EP_NODE_NETERR_DMA_PACKET, unit & 1, 0); -+ return; -+ } -+ -+ if (trap->tr_flags & TR_FLAG_EOP_ERROR) -+ { -+ E4_uint64 status = trap->tr_transactions[trap->tr_identifyTrans].IProcStatusCntxAndTrType; -+ EP_NETERR_COOKIE cookie = 0; -+ -+ switch (IPROC_TransactionType (status) & TR_OPCODE_MASK) -+ { -+ case TR_SETEVENT_IDENTIFY & TR_OPCODE_MASK: -+ if (IPROC_TrapValue(status) == InputNoFault) -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ else -+ cookie = trap->tr_dataBuffers[trap->tr_identifyTrans].Data[0]; -+ printk ("%s: network error on setevent <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_INPUT_Q_COMMIT & TR_OPCODE_MASK: -+ if (IPROC_TrapValue(status) == InputNoFault) -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ else -+ cookie = trap->tr_dataBuffers[trap->tr_identifyTrans].Data[0]; -+ printk ("%s: network error on queue commit <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_REMOTEDMA & TR_OPCODE_MASK: -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ printk ("%s: network error on remote dma <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ case TR_IDENTIFY & TR_OPCODE_MASK: -+ cookie = trap->tr_transactions[trap->tr_identifyTrans].TrAddr; -+ printk ("%s: network error on identify <%lld%s%s%s%s> from node %d\n", rail->r_generic.Name, EP4_COOKIE_STRING(cookie), nodeId); -+ break; -+ -+ default: -+ panic ("%s: unknown identify transaction type %x for eop error from node %d\n", rail->r_generic.Name, -+ IPROC_TransactionType (trap->tr_transactions[trap->tr_identifyTrans].IProcStatusCntxAndTrType), nodeId); -+ break; -+ } -+ -+ ep_queue_network_error (&rail->r_generic, nodeId, EP_NODE_NETERR_ATOMIC_PACKET, unit & 1, cookie); -+ } -+ } -+ return; -+ } -+ -+ printk ("%s: unhandled iproc trap\n", rail->r_generic.Name); -+ elan4_display_iproc_trap (DBG_CONSOLE, 0, "ep4_iproc_trap", trap); -+} -+ -+void -+ep4_interrupt (ELAN4_CTXT *ctxt, E4_uint64 cookie) -+{ -+ EP4_RAIL *rail = EP4_CTXT_TO_RAIL (ctxt); -+ EP4_INTCOOKIE *cp = ep4_lookup_intcookie (rail, cookie); -+ -+ if (cp == NULL) -+ { -+ printk ("ep4_interrupt: cannot find event cookie for %016llx\n", (long long) cookie); -+ return; -+ } -+ -+ cp->int_callback (rail, cp->int_arg); -+} -+ -+ELAN4_TRAP_OPS ep4_trap_ops = -+{ -+ ep4_eproc_trap, -+ ep4_cproc_trap, -+ ep4_dproc_trap, -+ ep4_tproc_trap, -+ ep4_iproc_trap, -+ ep4_interrupt, -+}; -+ -+void -+ep4_flush_filters (EP_RAIL *r) -+{ -+ /* nothing to do here as elan4_set_filter() flushes the context filter */ -+} -+ -+struct flush_queues_desc -+{ -+ EP4_RAIL *rail; -+ volatile int done; -+} ; -+ -+static void -+ep4_flush_queues_flushop (ELAN4_DEV *dev, void *arg, int qfull) -+{ -+ struct flush_queues_desc *desc = (struct flush_queues_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ E4_uint64 qptrs = read_reg64 (dev, DProcHighPriPtrs); -+ E4_uint32 qsize = E4_QueueSize (E4_QueueSizeValue (qptrs)); -+ E4_uint32 qfptr = E4_QueueFrontPointer (qptrs); -+ E4_uint32 qbptr = E4_QueueBackPointer (qptrs); -+ E4_DProcQueueEntry qentry; -+ unsigned long flags; -+ -+ while ((qfptr != qbptr) || qfull) -+ { -+ E4_uint64 typeSize = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_typeSize)); -+ -+ if (DMA_Context (qentry.Desc.dma_typeSize) == rail->r_ctxt.ctxt_num) -+ { -+ E4_uint64 vp = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_vproc)); -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(vp)]; -+ -+ EP4_ASSERT (rail, !EP_VP_ISDATA(vp) || (nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE)); -+ -+ if (EP_VP_ISDATA(vp) && nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ /* -+ * This is a DMA going to the node which is being removed, -+ * so move it onto the node dma list where it will get -+ * handled later. -+ */ -+ qentry.Desc.dma_typeSize = typeSize; -+ qentry.Desc.dma_cookie = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_cookie)); -+ qentry.Desc.dma_vproc = vp; -+ qentry.Desc.dma_srcAddr = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_srcAddr)); -+ qentry.Desc.dma_dstAddr = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_dstAddr)); -+ qentry.Desc.dma_srcEvent = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_srcEvent)); -+ qentry.Desc.dma_dstEvent = elan4_sdram_readq (dev, qfptr + offsetof (E4_DProcQueueEntry, Desc.dma_dstEvent)); -+ -+ EPRINTF4 (DBG_RETRY, "ep4_flush_dmas: %016llx %016llx %016llx %016llx\n", qentry.Desc.dma_typeSize, -+ qentry.Desc.dma_cookie, qentry.Desc.dma_vproc, qentry.Desc.dma_srcAddr); -+ EPRINTF3 (DBG_RETRY, " %016llx %016llx %016llx\n", qentry.Desc.dma_dstAddr, -+ qentry.Desc.dma_srcEvent, qentry.Desc.dma_dstEvent); -+ -+ ep4_queue_dma_stalled (rail, &qentry.Desc); -+ -+ qentry.Desc.dma_typeSize = DMA_ShMemWrite | dev->dev_ctxt.ctxt_num; -+ qentry.Desc.dma_cookie = 0; -+ qentry.Desc.dma_vproc = 0; -+ qentry.Desc.dma_srcAddr = 0; -+ qentry.Desc.dma_dstAddr = 0; -+ qentry.Desc.dma_srcEvent = 0; -+ qentry.Desc.dma_dstEvent = 0; -+ -+ elan4_sdram_copyq_to_sdram (dev, &qentry, qfptr, sizeof (E4_DProcQueueEntry)); -+ } -+ } -+ -+ qfptr = (qfptr & ~(qsize-1)) | ((qfptr + sizeof (E4_DProcQueueEntry)) & (qsize-1)); -+ qfull = 0; -+ } -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+static void -+ep4_flush_queues_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct flush_queues_desc *desc = (struct flush_queues_desc *) arg; -+ -+ elan4_queue_dma_flushop (dev, &desc->rail->r_flushop, 1); -+} -+ -+void -+ep4_flush_queues (EP_RAIL *r) -+{ -+ EP4_RAIL *rail = (EP4_RAIL *) r; -+ struct flush_queues_desc desc; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ int i; -+ -+ /* initialise descriptor */ -+ desc.rail = rail; -+ desc.done = 0; -+ -+ /* First - stall the dma retry thread, so that it will no longer restart -+ * any dma's from the retry list */ -+ ep_kthread_stall (&rail->r_retry_thread); -+ -+ /* Second - flush through all command queues targetted by events, thread etc */ -+ ep4_flush_ecqs (rail); -+ -+ /* Third - queue a halt operation to flush through all DMA's which are executing -+ * or on the run queues */ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DProcHalted; -+ rail->r_haltop.op_function = ep4_flush_queues_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ rail->r_flushop.op_function = ep4_flush_queues_flushop; -+ rail->r_flushop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ kmutex_unlock (&rail->r_haltop_mutex); -+ -+ /* Fourth - run down the dma retry lists and move all entries to the cancelled -+ * list. Any dma's which were on the run queues have already been -+ * moved there */ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each_safe (el,nel, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(retry->retry_dma.dma_vproc)]; -+ -+ EP4_ASSERT (rail, nodeRail->State >= EP_NODE_CONNECTED && nodeRail->State <= EP_NODE_LOCAL_PASSIVATE); -+ -+ if (nodeRail->State == EP_NODE_LOCAL_PASSIVATE) -+ { -+ list_del (&retry->retry_link); -+ list_add_tail (&retry->retry_link, &nodeRail->StalledDmas); -+ } -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ /* Finally - allow the retry thread to run again */ -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+struct write_qdesc_desc -+{ -+ EP4_RAIL *rail; -+ sdramaddr_t qaddr; -+ E4_InputQueue *qdesc; -+ volatile int done; -+} ; -+ -+static void -+ep4_write_qdesc_haltop (ELAN4_DEV *dev, void *arg) -+{ -+ struct write_qdesc_desc *desc = (struct write_qdesc_desc *) arg; -+ EP4_RAIL *rail = desc->rail; -+ unsigned long flags; -+ -+ elan4_sdram_copyq_to_sdram (dev, desc->qdesc, desc->qaddr, sizeof (E4_InputQueue)); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ desc->done = 1; -+ kcondvar_wakeupall (&rail->r_haltop_sleep, &rail->r_haltop_lock); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+} -+ -+void -+ep4_write_qdesc (EP4_RAIL *rail, sdramaddr_t qaddr, E4_InputQueue *qdesc) -+{ -+ struct write_qdesc_desc desc; -+ unsigned long flags; -+ -+ /* initialise descriptor */ -+ desc.rail = rail; -+ desc.qaddr = qaddr; -+ desc.qdesc = qdesc; -+ desc.done = 0; -+ -+ kmutex_lock (&rail->r_haltop_mutex); -+ -+ rail->r_haltop.op_mask = INT_DiscardingHighPri; -+ rail->r_haltop.op_function = ep4_write_qdesc_haltop; -+ rail->r_haltop.op_arg = &desc; -+ -+ elan4_queue_haltop (rail->r_ctxt.ctxt_dev, &rail->r_haltop); -+ -+ spin_lock_irqsave (&rail->r_haltop_lock, flags); -+ while (! desc.done) -+ kcondvar_wait (&rail->r_haltop_sleep, &rail->r_haltop_lock, &flags); -+ spin_unlock_irqrestore (&rail->r_haltop_lock, flags); -+ -+ kmutex_unlock (&rail->r_haltop_mutex); -+} -+#define CQ_SIZE_NWORDS ((CQ_Size (ecq->ecq_cq->cq_size) >> 3) - 8) /* available number of dwords (less enough to flush) */ -+EP4_ECQ * -+ep4_alloc_ecq (EP4_RAIL *rail, unsigned cqsize) -+{ -+ EP4_ECQ *ecq; -+ unsigned long pgoff; -+ -+ /* no space available, so allocate a new entry */ -+ KMEM_ZALLOC (ecq, EP4_ECQ *, sizeof (EP4_ECQ), 1); -+ -+ if (ecq == NULL) -+ return 0; -+ -+ if ((ecq->ecq_cq = elan4_alloccq (&rail->r_ctxt, cqsize, CQ_EnableAllBits, CQ_Priority)) == NULL) -+ { -+ KMEM_FREE (ecq, sizeof (EP4_ECQ)); -+ return 0; -+ } -+ -+ pgoff = (ecq->ecq_cq->cq_mapping & (PAGE_SIZE-1)); -+ -+ ecq->ecq_addr = ep_rmalloc (rail->r_ecq_rmap, PAGESIZE, 0) + pgoff; -+ ecq->ecq_avail = CQ_SIZE_NWORDS; /* available number of dwords (less enough to flush) */ -+ -+ ecq->ecq_intop.op_function = (ELAN4_HALTFN *) elan4_restartcq; -+ ecq->ecq_intop.op_arg = ecq->ecq_cq; -+ -+ ep4_ioaddr_map (&rail->r_generic, ecq->ecq_addr - pgoff, ecq->ecq_cq->cq_mapping - pgoff, PAGESIZE, EP_PERM_WRITE); -+ -+ spin_lock_init (&ecq->ecq_lock); -+ -+ return ecq; -+} -+ -+void -+ep4_free_ecq (EP4_RAIL *rail, EP4_ECQ *ecq) -+{ -+ unsigned long pgoff = (ecq->ecq_cq->cq_mapping & (PAGE_SIZE-1)); -+ -+ spin_lock_destroy (&ecq->ecq_lock); -+ -+ ep4_unmap (&rail->r_generic, ecq->ecq_addr - pgoff, PAGESIZE); -+ ep_rmfree (rail->r_ecq_rmap, PAGESIZE, ecq->ecq_addr - pgoff); -+ -+ elan4_freecq (&rail->r_ctxt, ecq->ecq_cq); -+ -+ KMEM_FREE (ecq, sizeof (EP4_ECQ)); -+} -+ -+EP4_ECQ * -+ep4_get_ecq (EP4_RAIL *rail, unsigned which, unsigned ndwords) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ EP4_ECQ *ecq; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ list_for_each (el, &rail->r_ecq_list[which]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (ecq->ecq_avail >= ndwords) -+ { -+ ecq->ecq_avail -= ndwords; -+ -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ return ecq; -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ if ((ecq = ep4_alloc_ecq (rail, EP4_ECQ_Size (which))) == NULL) -+ return NULL; -+ -+ if (which == EP4_ECQ_EVENT) -+ { -+ if ((ecq->ecq_event = ep_alloc_elan (&rail->r_generic, sizeof (E4_Event32), 0, &ecq->ecq_event_addr)) == 0) -+ { -+ ep4_free_ecq (rail, ecq); -+ return NULL; -+ } -+ -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType), -+ E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WritePtr), -+ ecq->ecq_addr); -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WriteValue), -+ SET_EVENT_CMD | (rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event))); -+ -+ if ((ecq->ecq_flushcq = ep4_get_ecq (rail, EP4_ECQ_SINGLE, 1)) == NULL) -+ { -+ ep_free_elan (&rail->r_generic, ecq->ecq_event_addr, sizeof (E4_Event32)); -+ ep4_free_ecq (rail, ecq); -+ return NULL; -+ } -+ } -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ list_add (&ecq->ecq_link, &rail->r_ecq_list[which]); -+ -+ ecq->ecq_avail -= ndwords; -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ return ecq; -+} -+ -+void -+ep4_put_ecq (EP4_RAIL *rail, EP4_ECQ *ecq, unsigned ndwords) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ -+ ecq->ecq_avail += ndwords; -+ -+ if (ecq->ecq_avail != CQ_SIZE_NWORDS) -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ else -+ { -+ list_del (&ecq->ecq_link); -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ if (ecq->ecq_flushcq) -+ ep4_put_ecq (rail, ecq->ecq_flushcq, 1); -+ if (ecq->ecq_event_addr) -+ ep_free_elan (&rail->r_generic, ecq->ecq_event_addr, sizeof (E4_Event32)); -+ -+ ep4_free_ecq (rail, ecq); -+ } -+} -+ -+void -+ep4_nop_cmd (EP4_ECQ *ecq, E4_uint64 tag) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_nop_cmd (ecq->ecq_cq, tag); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+ -+} -+ -+void -+ep4_set_event_cmd (EP4_ECQ *ecq, E4_Addr event) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_set_event_cmd (ecq->ecq_cq, event); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+} -+ -+void -+ep4_wait_event_cmd (EP4_ECQ *ecq, E4_Addr event, E4_uint64 candt, E4_uint64 param0, E4_uint64 param1) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&ecq->ecq_lock, flags); -+ elan4_wait_event_cmd (ecq->ecq_cq, event, candt, param0, param1); -+ spin_unlock_irqrestore (&ecq->ecq_lock, flags); -+} -+ -+void -+ep4_flush_interrupt (EP4_RAIL *rail, void *arg) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ rail->r_flush_count = 0; -+ kcondvar_wakeupone (&rail->r_flush_sleep, &rail->r_ecq_lock); -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+} -+ -+void -+ep4_flush_ecqs (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ unsigned long flags; -+ int i; -+ -+ kmutex_lock (&rail->r_flush_mutex); -+ -+ EP4_SDRAM_ASSERT (rail, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0)); -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ /* first flush all the "event" queues */ -+ list_for_each (el, &rail->r_ecq_list[EP4_ECQ_EVENT]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ elan4_sdram_writeq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType), -+ E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0)); -+ -+ ep4_set_event_cmd (ecq->ecq_flushcq, ecq->ecq_event_addr); -+ -+ rail->r_flush_count++; -+ } -+ -+ /* next issue the setevents to all the other queues */ -+ for (i = EP4_ECQ_ATOMIC; i r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ ep4_set_event_cmd (ecq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event)); -+ -+ rail->r_flush_count++; -+ } -+ } -+ -+ /* issue the waitevent command */ -+ ep4_wait_event_cmd (rail->r_flush_mcq, rail->r_elan_addr + offsetof (EP4_RAIL_ELAN, r_flush_event), -+ E4_EVENT_INIT_VALUE (-32 * rail->r_flush_count, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0), -+ rail->r_flush_ecq->ecq_addr, -+ INTERRUPT_CMD | (rail->r_flush_intcookie.int_val << E4_MAIN_INT_SHIFT)); -+ -+ while (rail->r_flush_count) -+ kcondvar_wait (&rail->r_flush_sleep, &rail->r_ecq_lock, &flags); -+ -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ EP4_SDRAM_ASSERT (rail, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event), E4_EVENT_INIT_VALUE (0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG,0)); -+ -+ kmutex_unlock (&rail->r_flush_mutex); -+} -+ -+void -+ep4_init_thread (EP4_RAIL *rail, E4_ThreadRegs *regs, sdramaddr_t stackTop, -+ EP_ADDR stackAddr, E4_Addr startpc, int nargs,...) -+{ -+ sdramaddr_t sp = stackTop - roundup (nargs * sizeof (E4_uint64), E4_STACK_ALIGN); -+ int i; -+ va_list ap; -+ -+ /* -+ * the thread start code expects the following : -+ * %r1 = stack pointer -+ * %r6 = frame pointer -+ * %r2 = function to call -+ * -+ * function args are store on stack above %sp -+ */ -+ -+ va_start(ap, nargs); -+ for (i = 0; i < nargs; i++) -+ elan4_sdram_writeq (rail->r_ctxt.ctxt_dev, sp + (i * sizeof (E4_uint64)), va_arg (ap, E4_uint64)); -+ va_end (ap); -+ -+ regs->Registers[0] = ep_symbol (&rail->r_threadcode, ".thread_start"); /* %r0 - PC */ -+ regs->Registers[1] = stackAddr - (stackTop - sp); /* %r1 - stack pointer */ -+ regs->Registers[2] = startpc; /* %r2 - start pc */ -+ regs->Registers[3] = 0; -+ regs->Registers[4] = 0; -+ regs->Registers[5] = 0; -+ regs->Registers[6] = stackTop; /* %r6 - frame pointer */ -+} -+ -+/* retransmission thread */ -+ -+void -+ep4_add_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops) -+{ -+ ep_kthread_stall (&rail->r_retry_thread); -+ list_add_tail (&ops->op_link, &rail->r_retry_ops); -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_remove_retry_ops (EP4_RAIL *rail, EP4_RETRY_OPS *ops) -+{ -+ ep_kthread_stall (&rail->r_retry_thread); -+ list_del (&ops->op_link); -+ ep_kthread_resume (&rail->r_retry_thread); -+} -+ -+void -+ep4_retry_thread (EP4_RAIL *rail) -+{ -+ struct list_head *el; -+ -+ kernel_thread_init ("ep4_retry"); -+ -+ for (;;) -+ { -+ long nextRunTime = 0; -+ -+ list_for_each (el, &rail->r_retry_ops) { -+ EP4_RETRY_OPS *ops = list_entry (el, EP4_RETRY_OPS, op_link); -+ -+ nextRunTime = ops->op_func (rail, ops->op_arg, nextRunTime); -+ } -+ -+ if (ep_kthread_sleep (&rail->r_retry_thread, nextRunTime) < 0) -+ break; -+ } -+ -+ ep_kthread_stopped (&rail->r_retry_thread); -+ -+ kernel_thread_exit(); -+} -+ -+/* DMA retransmission */ -+static unsigned ep4_dma_retry_times[EP_NUM_RETRIES]; -+ -+static unsigned long -+ep4_retry_dmas (EP4_RAIL *rail, void *arg, unsigned long nextRunTime) -+{ -+ unsigned long yieldAt = lbolt + (hz/10); -+ unsigned long flags; -+ int i; -+ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ while (! list_empty (&rail->r_dma_retrylist[i])) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_retrylist[i].next, EP4_DMA_RETRY, retry_link); -+ -+ if (! AFTER(lbolt, retry->retry_time)) -+ break; -+ -+ if (ep_kthread_should_stall (&rail->r_retry_thread) || AFTER (lbolt, yieldAt)) -+ goto cant_do_more; -+ -+ EPRINTF3 (DBG_RETRY, "%s: ep4_retry_dmas: flowcnt %llx %llx\n", rail->r_generic.Name, rail->r_dma_flowcnt, rail->r_main->r_dma_flowcnt); -+ -+ if ((rail->r_dma_flowcnt - rail->r_main->r_dma_flowcnt) > EP4_DMA_RETRY_FLOWCNT) -+ { -+ printk ("ep4_retry_dmas: flowcnt %llx %llx\n", rail->r_dma_flowcnt, rail->r_main->r_dma_flowcnt); -+ -+ goto cant_do_more; -+ } -+ -+ EPRINTF4 (DBG_RETRY, "%s: ep4_retry_dmas: %016llx %016llx %016llx\n", rail->r_generic.Name, -+ retry->retry_dma.dma_typeSize, retry->retry_dma.dma_cookie, retry->retry_dma.dma_vproc); -+ EPRINTF5 (DBG_RETRY, "%s: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ retry->retry_dma.dma_srcAddr, retry->retry_dma.dma_dstAddr, retry->retry_dma.dma_srcEvent, -+ retry->retry_dma.dma_dstEvent); -+ -+ elan4_run_dma_cmd (rail->r_dma_ecq->ecq_cq, &retry->retry_dma); -+ elan4_write_dword_cmd (rail->r_dma_ecq->ecq_cq, rail->r_main_addr + offsetof (EP4_RAIL_MAIN, r_dma_flowcnt), ++rail->r_dma_flowcnt); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_del (&retry->retry_link); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ } -+ cant_do_more: -+ -+ /* re-compute the next retry time */ -+ for (i = EP_RETRY_BASE; i < EP_NUM_RETRIES; i++) -+ { -+ if (! list_empty (&rail->r_dma_retrylist[i])) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_retrylist[i].next, EP4_DMA_RETRY, retry_link); -+ -+ SET_NEXT_RUN_TIME (nextRunTime, retry->retry_time); -+ } -+ } -+ -+ return nextRunTime; -+} -+ -+void -+ep4_initialise_dma_retries (EP4_RAIL *rail) -+{ -+ int i; -+ -+ spin_lock_init (&rail->r_dma_lock); -+ -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ INIT_LIST_HEAD (&rail->r_dma_retrylist[i]); -+ -+ INIT_LIST_HEAD (&rail->r_dma_freelist); -+ -+ rail->r_dma_ecq = ep4_alloc_ecq (rail, EP4_DMA_RETRY_CQSIZE); -+ -+ rail->r_dma_allocated = 0; -+ rail->r_dma_reserved = 0; -+ -+ ep4_dma_retry_times[EP_RETRY_HIGH_PRI] = EP_RETRY_HIGH_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ ep4_dma_retry_times[EP_RETRY_HIGH_PRI_RETRY+i] = EP_RETRY_HIGH_PRI_TIME << i; -+ -+ ep4_dma_retry_times[EP_RETRY_LOW_PRI] = EP_RETRY_LOW_PRI_TIME; -+ -+ for (i =0 ; i < EP_NUM_BACKOFF; i++) -+ ep4_dma_retry_times[EP_RETRY_LOW_PRI_RETRY+i] = EP_RETRY_LOW_PRI_TIME << i; -+ -+ ep4_dma_retry_times[EP_RETRY_ANONYMOUS] = EP_RETRY_ANONYMOUS_TIME; -+ ep4_dma_retry_times[EP_RETRY_NETERR] = EP_RETRY_NETERR_TIME; -+ -+ rail->r_dma_ops.op_func = ep4_retry_dmas; -+ rail->r_dma_ops.op_arg = NULL; -+ -+ ep4_add_retry_ops (rail, &rail->r_dma_ops); -+} -+ -+void -+ep4_finalise_dma_retries (EP4_RAIL *rail) -+{ -+ ep4_remove_retry_ops (rail, &rail->r_dma_ops); -+ -+ /* Everyone should have given back their retry dma's by now */ -+ EP4_ASSERT (rail, rail->r_dma_reserved == 0); -+ -+ while (! list_empty (&rail->r_dma_freelist)) -+ { -+ EP4_DMA_RETRY *retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ KMEM_FREE (retry, sizeof (EP4_DMA_RETRY)); -+ } -+ -+ ep4_free_ecq (rail, rail->r_dma_ecq); -+ -+ spin_lock_destroy (&rail->r_dma_lock); -+} -+ -+int -+ep4_reserve_dma_retries (EP4_RAIL *rail, unsigned int count, EP_ATTRIBUTE attr) -+{ -+ EP4_DMA_RETRY *retry; -+ unsigned int remaining = count; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ if (remaining <= (rail->r_dma_allocated - rail->r_dma_reserved)) -+ { -+ rail->r_dma_reserved += remaining; -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ return 0; -+ } -+ -+ remaining -= (rail->r_dma_allocated - rail->r_dma_reserved); -+ -+ rail->r_dma_reserved = rail->r_dma_allocated; -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ while (remaining > 0) -+ { -+ KMEM_ALLOC (retry, EP4_DMA_RETRY *, sizeof (EP4_DMA_RETRY), !(attr & EP_NO_SLEEP)); -+ -+ if (retry == NULL) -+ goto failed; -+ -+ remaining--; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_add (&retry->retry_link, &rail->r_dma_freelist); -+ -+ rail->r_dma_allocated++; -+ rail->r_dma_reserved++; -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ } -+ -+ return 0; -+ -+ failed: -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ rail->r_dma_reserved -= (count - remaining); -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+ -+ return 1; -+} -+ -+void -+ep4_release_dma_retries (EP4_RAIL *rail, unsigned int count) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ rail->r_dma_reserved -= count; -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_queue_dma_retry (EP4_RAIL *rail, E4_DMA *dma, int interval) -+{ -+ EP4_DMA_RETRY *retry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ EP4_ASSERT (rail, !list_empty (&rail->r_dma_freelist)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: ep4_queue_dma_retry: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ dma->dma_typeSize, dma->dma_cookie, dma->dma_vproc, dma->dma_srcAddr); -+ EPRINTF5 (DBG_RETRY, "%s: %016llx %016llx %016llx (%d)\n", rail->r_generic.Name, -+ dma->dma_dstAddr, dma->dma_srcEvent, dma->dma_dstEvent, interval); -+ -+ retry->retry_dma.dma_typeSize = dma->dma_typeSize; -+ retry->retry_dma.dma_cookie = dma->dma_cookie; -+ retry->retry_dma.dma_vproc = dma->dma_vproc; -+ retry->retry_dma.dma_srcAddr = dma->dma_srcAddr; -+ retry->retry_dma.dma_dstAddr = dma->dma_dstAddr; -+ retry->retry_dma.dma_srcEvent = dma->dma_srcEvent; -+ retry->retry_dma.dma_dstEvent = dma->dma_dstEvent; -+ -+ retry->retry_time = lbolt + ep4_dma_retry_times[interval]; -+ -+ /* chain onto the end of the approriate retry list */ -+ list_add_tail (&retry->retry_link, &rail->r_dma_retrylist[interval]); -+ -+ ep_kthread_schedule (&rail->r_retry_thread, retry->retry_time); -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_queue_dma_stalled (EP4_RAIL *rail, E4_DMA *dma) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[EP_VP_TO_NODE(dma->dma_vproc)]; -+ EP4_DMA_RETRY *retry; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ -+ EP4_ASSERT (rail, !list_empty (&rail->r_dma_freelist)); -+ -+ /* take an item of the free list */ -+ retry = list_entry (rail->r_dma_freelist.next, EP4_DMA_RETRY, retry_link); -+ -+ list_del (&retry->retry_link); -+ -+ EPRINTF5 (DBG_RETRY, "%s: ep4_queue_dma_stalled: %016llx %016llx %016llx %016llx\n", rail->r_generic.Name, -+ dma->dma_typeSize, dma->dma_cookie, dma->dma_vproc, dma->dma_srcAddr); -+ EPRINTF4 (DBG_RETRY, "%s: %016llx %016llx %016llx\n", rail->r_generic.Name, -+ dma->dma_dstAddr, dma->dma_srcEvent, dma->dma_dstEvent); -+ -+ retry->retry_dma.dma_typeSize = dma->dma_typeSize; -+ retry->retry_dma.dma_cookie = dma->dma_cookie; -+ retry->retry_dma.dma_vproc = dma->dma_vproc; -+ retry->retry_dma.dma_srcAddr = dma->dma_srcAddr; -+ retry->retry_dma.dma_dstAddr = dma->dma_dstAddr; -+ retry->retry_dma.dma_srcEvent = dma->dma_srcEvent; -+ retry->retry_dma.dma_dstEvent = dma->dma_dstEvent; -+ -+ /* chain onto the node cancelled dma list */ -+ list_add_tail (&retry->retry_link, &nodeRail->StalledDmas); -+ -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_free_stalled_dmas (EP4_RAIL *rail, unsigned int nodeId) -+{ -+ EP_NODE_RAIL *nodeRail = &rail->r_generic.Nodes[nodeId]; -+ struct list_head *el, *nel; -+ unsigned long flags; -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ list_for_each_safe (el, nel, &nodeRail->StalledDmas) { -+ list_del (el); -+ list_add (el, &rail->r_dma_freelist); -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -+ -+void -+ep4_display_rail (EP4_RAIL *rail) -+{ -+ ELAN4_DEV *dev = rail->r_ctxt.ctxt_dev; -+ struct list_head *el; -+ register int i; -+ unsigned long flags; -+ -+ ep_debugf (DBG_DEBUG, "%s: vendorid=%x deviceid=%x\n", rail->r_generic.Name, -+ rail->r_generic.Devinfo.dev_vendor_id, rail->r_generic.Devinfo.dev_device_id); -+ -+ spin_lock_irqsave (&rail->r_ecq_lock, flags); -+ for (i = 0; i < EP4_NUM_ECQ; i++) -+ { -+ list_for_each (el, &rail->r_ecq_list[i]) { -+ EP4_ECQ *ecq = list_entry (el, EP4_ECQ, ecq_link); -+ -+ if (i == EP4_ECQ_EVENT) -+ ep_debugf (DBG_DEBUG, " ECQ[%d] ecq=%p cqnum=%d addr=%llx avail=%d event=%llx,%llx,%llx\n", -+ i, ecq, elan4_cq2num (ecq->ecq_cq), ecq->ecq_addr, ecq->ecq_avail, -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_CountAndType)), -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WriteValue)), -+ elan4_sdram_readq (dev, ecq->ecq_event + offsetof (E4_Event32, ev_WritePtr))); -+ -+ else -+ ep_debugf (DBG_DEBUG, " ECQ[%d] ecq=%p cqnum=%d addr=%llx avail=%d\n", -+ i, ecq, elan4_cq2num (ecq->ecq_cq), ecq->ecq_addr, ecq->ecq_avail); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_ecq_lock, flags); -+ -+ ep_debugf (DBG_DEBUG, " flush count=%ld mcq=%p ecq=%p event %llx.%llx.%llx\n", -+ rail->r_flush_count, rail->r_flush_mcq, rail->r_flush_ecq, -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_CountAndType)), -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_WritePtr)), -+ elan4_sdram_readq (dev, rail->r_elan + offsetof (EP4_RAIL_ELAN, r_flush_event.ev_WriteValue))); -+ -+ spin_lock_irqsave (&rail->r_dma_lock, flags); -+ for (i = 0; i < EP_NUM_RETRIES; i++) -+ { -+ list_for_each (el, &rail->r_dma_retrylist[i]) { -+ EP4_DMA_RETRY *retry = list_entry (el, EP4_DMA_RETRY, retry_link); -+ -+ ep_debugf (DBG_DEBUG, " RETRY[%d] typeSize %llx cookie %llx vproc %llx events %llx %llx\n", -+ i, retry->retry_dma.dma_typeSize, retry->retry_dma.dma_cookie, -+ retry->retry_dma.dma_vproc, retry->retry_dma.dma_srcEvent, retry->retry_dma.dma_dstEvent); -+ } -+ } -+ spin_unlock_irqrestore (&rail->r_dma_lock, flags); -+} -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/threadcode.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode.c 2005-07-28 14:52:52.911669160 -0400 -@@ -0,0 +1,146 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: threadcode.c,v 1.11 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/threadcode.c,v $ */ -+ -+#include -+ -+#include -+ -+EP_ADDR -+ep_symbol (EP_CODE *code, char *name) -+{ -+ EP_SYMBOL *s = code->symbols; -+ -+ while (s->name && strcmp (s->name, name)) -+ s++; -+ -+ return (s->name ? s->value : (EP_ADDR) 0); -+} -+ -+int -+ep_loadcode (EP_RAIL *rail, EP_CODE *code) -+{ -+ register int i; -+ -+ EP_ADDR _stext = ep_symbol (code, "_stext"); -+ EP_ADDR _etext = ep_symbol (code, "_etext"); -+ EP_ADDR _sdata = ep_symbol (code, "_sdata"); -+ EP_ADDR _edata = ep_symbol (code, "_edata"); -+ EP_ADDR _end = ep_symbol (code, "_end"); -+ EP_ADDR _rodata = roundup (_etext, sizeof (uint64_t)); -+ -+ if (_stext == (EP_ADDR) 0 || _etext == (EP_ADDR) 0 || -+ _sdata == (EP_ADDR) 0 || _edata == (EP_ADDR) 0 || -+ _end == (EP_ADDR) 0) -+ { -+ printk ("ep_loadcode: symbols not defined correctly for code at %p\n", code); -+ return (EINVAL); -+ } -+ -+ /* -+ * Include the rodata in the text segment -+ */ -+ _etext = _rodata + code->rodata_size; -+ -+ /* -+ * If _etext is in the same page as _sdata, then allocate a contiguous -+ * chunk of memory and map it as read/write. otherwise allocate two chunks -+ * and map the code in as read-only. -+ */ -+ if ((_etext & PAGEMASK) == (_sdata & PAGEMASK)) -+ { -+ code->ntext = btopr (_end - (_stext & PAGEMASK)); -+ code->pptext = ep_alloc_memory_elan (rail, _stext & PAGEMASK, ptob (code->ntext), EP_PERM_EXECUTE, 0); -+ -+ if (code->pptext == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ code->_stext = code->pptext + (_stext & PAGEOFFSET); -+ code->_rodata = code->_stext + (_rodata - _stext); -+ code->_sdata = code->_stext + (_sdata - _stext); -+ } -+ else -+ { -+ code->ntext = btopr (_etext - (_stext & PAGEMASK)); -+ code->ndata = btopr (_end - (_sdata & PAGEMASK)); -+ -+ if (code->ntext) -+ { -+ code->pptext = ep_alloc_memory_elan (rail, _stext & PAGEMASK, ptob (code->ntext), EP_PERM_EXECUTE, 0); -+ -+ if (code->pptext == (sdramaddr_t) 0) -+ return (ENOMEM); -+ -+ code->_stext = code->pptext + (_stext & PAGEOFFSET); -+ code->_rodata = code->_stext + (_rodata - _stext); -+ } -+ -+ if (code->ndata) -+ { -+ code->ppdata = ep_alloc_memory_elan (rail, _sdata & PAGEMASK, ptob (code->ndata), EP_PERM_WRITE, 0); -+ -+ if (code->ppdata == (sdramaddr_t) 0) -+ { -+ if (code->ntext) ep_free_memory_elan (rail, _sdata & PAGEMASK); -+ code->ntext = 0; -+ -+ return (ENOMEM); -+ } -+ -+ code->_sdata = code->ppdata + (_sdata & PAGEOFFSET); -+ } -+ } -+ -+#ifdef __LITTLE_ENDIAN__ -+# define Flip 3 -+#else -+# define Flip 0 -+#endif -+ -+ /* -+ * Now copy the text and rodata into the SDRAM -+ * this is linked into the module to be byte -+ * copied to the SDRAM, since we want to copy -+ * with word accesses we have to do the byte -+ * assembly correctly. -+ */ -+ for (i = 0; i < code->text_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_stext + i, code->text[i^Flip]); -+ -+ for (i = 0; i < code->rodata_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_rodata + i, code->rodata[i^Flip]); -+ -+ /* -+ * And the initialised data segment. -+ */ -+ for (i = 0; i < code->data_size; i++) -+ rail->Operations.SdramWriteb (rail, code->_sdata + i, code->data[i^Flip]); -+ -+ return (ESUCCESS); -+} -+ -+void -+ep_unloadcode (EP_RAIL *rail, EP_CODE *code) -+{ -+ EP_ADDR _stext = ep_symbol (code, "_stext"); -+ EP_ADDR _sdata = ep_symbol (code, "_sdata"); -+ -+ if (code->pptext) -+ ep_free_memory_elan (rail, _stext & PAGEMASK); -+ if (code->ppdata) -+ ep_free_memory_elan (rail, _sdata & PAGEMASK); -+ code->pptext = code->ppdata = 0; -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode_elan3.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/threadcode_elan3.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode_elan3.c 2005-07-28 14:52:52.911669160 -0400 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: threadcode_elan3.c,v 1.11 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/threadcode_elan3.c,v $ */ -+ -+#include -+ -+#include -+ -+#include "kcomm_elan3.h" -+#include "debug.h" -+ -+#include -+ -+E3_Addr -+ep3_init_thread (ELAN3_DEV *dev, -+ E3_Addr fn, /* Elan address of function */ -+ E3_Addr addr, /* Elan address of stack */ -+ sdramaddr_t stack, /* sdram address of stack */ -+ int stackSize, /* stack size (in bytes) */ -+ int nargs, -+ ...) -+{ -+ sdramaddr_t frame; -+ sdramaddr_t regs; -+ sdramaddr_t argsp; -+ int i; -+ va_list ap; -+ -+ /* -+ * Align the stack pointer at the top of the stack and leave space for a stack frame -+ */ -+ stack = ((stack + stackSize) & ~(E3_STACK_ALIGN-1)) - sizeof (E3_Frame); -+ addr = ((addr + stackSize) & ~(E3_STACK_ALIGN-1)) - sizeof (E3_Frame); -+ -+ va_start (ap, nargs); -+ -+ if (nargs > 6) -+ { -+ stack -= (((nargs*sizeof (E3_uint32))+E3_STACK_ALIGN-1) & ~(E3_STACK_ALIGN-1)); -+ addr -= (((nargs*sizeof (E3_uint32))+E3_STACK_ALIGN-1) & ~(E3_STACK_ALIGN-1)); -+ } -+ -+ frame = stack; -+ regs = stack - sizeof (E3_OutsRegs); -+ -+ /* -+ * Initialise the registers, and stack frame. -+ */ -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[6]), fn); -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[7]), 0); -+ -+ if (nargs <= 6) -+ { -+ for (i = 0; i < nargs; i++) -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[i]), va_arg (ap, E3_uint32)); -+ } -+ else -+ { -+ for (i = 0; i < 6; i++) -+ elan3_sdram_writel (dev, regs + offsetof (E3_OutsRegs, o[i]), va_arg (ap, E3_uint32)); -+ -+ for (argsp = frame + offsetof (E3_Frame, fr_argx[0]); i < nargs; i++, argsp += sizeof (E3_uint32)) -+ elan3_sdram_writel (dev, argsp, va_arg (ap, int)); -+ } -+ -+ elan3_sdram_writel (dev, frame + offsetof (E3_Frame, fr_savefp), 0); -+ elan3_sdram_writel (dev, frame + offsetof (E3_Frame, fr_savepc), 0); -+ -+ va_end (ap); -+ -+ return (addr); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode_elan3_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/threadcode_elan3_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode_elan3_Linux.c 2005-07-28 14:52:52.912669008 -0400 -@@ -0,0 +1,112 @@ -+/* --------------------------------------------------------*/ -+/* MACHINE GENERATED ELAN CODE */ -+#include -+#include -+#include "kcomm_elan3.h" -+static uint32_t threadcode_elan3_text[] = { -+0x80a0239c, 0x00001082, 0x00e0a280, 0x47008002, 0x0020a380, 0x20600288, 0x20200286, 0x43008002, -+0x00000001, 0x0a006081, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0xa800c613, 0xa300c609, 0x0020108a, 0x0080900b, 0x00006885, 0x0580a080, -+0x06008002, 0x02a0a080, 0x06008022, 0xffff0296, 0x04008010, 0xff3f0398, 0x1f008010, 0x00201090, -+0x00007081, 0x1600801c, 0x00000001, 0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, 0x40a0a3e0, 0x00c03f3f, -+0xf8e017be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, 0x0000a081, 0x06008010, 0x40a083e0, -+0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, 0x00a083c0, 0x60a0039c, 0x00e0a280, -+0xbfffbf12, 0x0020a380, 0x03008012, 0x02201090, 0x03201090, 0x08e0c381, 0x80a0039c, 0xe0a0239c, -+0x60a023de, 0x80a0a3e0, 0xa0a0a3f0, 0x080010b8, 0x090010b0, 0x0a0010b2, 0x04000037, 0x402006b4, -+0x50200690, 0x01201092, 0x20a0239c, 0x00a0a3f0, 0x00c03f3f, 0x8ce117be, 0x04e08f80, 0x06008012, -+0x00000001, 0x00c01ff8, 0x0000b081, 0x06008010, 0x00a083f0, 0x14e007be, 0x00c01ff8, 0x0000b081, -+0x00a083f0, 0x20a0039c, 0x582006d0, 0x0020a280, 0x05008002, 0x0900a280, 0x10008002, 0x50200690, -+0xeaffbf30, 0x5c2006d4, 0x18001090, 0x19001092, 0x1b800294, 0x0a201096, 0x8affff7f, 0x05201098, -+0x446026d0, 0x302027f4, 0xdfffbf10, 0x50200690, 0xfdffbf10, 0x446026c0, 0x5c2006e0, 0x0020a480, -+0xf9ffbf06, 0x18001090, 0x19001092, 0x1b000494, 0x14201096, 0x7bffff7f, 0x0a201098, 0x0020a280, -+0xf4ffbf22, 0x486026e0, 0x00007081, 0x1600801c, 0x00000001, 0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, -+0x40a0a3e0, 0x00c03f3f, 0x60e217be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, 0x0000a081, -+0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, 0x00a083c0, -+0x60a0039c, 0xff3f84a0, 0xe0ffbf1c, 0x18001090, 0xd5ffbf30, 0x60a003de, 0x80a083e0, 0xa0a083f0, -+0x08e0c381, 0xe0a0039c, 0x00a1239c, 0x60a023de, 0x80a0a3e0, 0xa0a0a3f0, 0x44a123d0, 0x090010b0, -+0x0a0010b6, 0x0b0010b8, 0x0c0010b4, 0x012010ba, 0xdca023fa, 0x142007d2, 0x082007d0, 0x084002b2, -+0x000027c0, 0xf42006d0, 0x0020a280, 0x15008032, 0xf42006d0, 0x18200790, 0xdca003d2, 0x20a0239c, -+0x00a0a3f0, 0x00c03f3f, 0x20e317be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ff8, 0x0000b081, -+0x06008010, 0x00a083f0, 0x14e007be, 0x00c01ff8, 0x0000b081, 0x00a083f0, 0x20a0039c, 0xf42006d0, -+0x0020a280, 0x0a008022, 0xdca023c0, 0x042007d0, 0x0840a680, 0x06008032, 0xdca023c0, 0x18001082, -+0x0220d091, 0xe1ffbf10, 0xf42006d0, 0x06008010, 0x190010a2, 0x042006d0, 0x00c026d0, 0x18001082, -+0x0020d091, 0x042006d0, 0x01200290, 0x042026d0, 0x000006d0, 0x0020a280, 0x04008002, 0x18001090, -+0x4f010040, 0x1b001092, 0xf02006e0, 0x0020a480, 0xf1ffbf02, 0x40b03611, 0x004004d2, 0x01201290, -+0x0840a280, 0x0e018012, 0x10001096, 0x046004d0, 0x01208a80, 0x33008002, 0xa0200484, 0x0c2610ba, -+0x000024fa, 0x00211090, 0x042024d0, 0x246004d0, 0x80200290, 0x082024d0, 0xec2004d0, 0x00210290, -+0x0c2024d0, 0x102024c4, 0x186004d2, 0x02602a93, 0x098006d0, 0x0001003b, 0x1d000290, 0x098026d0, -+0xc0ff3f3b, 0x1d000a90, 0x44a103fa, 0x606007d2, 0x00680292, 0x09001290, 0x4000003b, 0x1d001290, -+0x142024d0, 0x206004d0, 0x10210290, 0x182024d0, 0x186004d0, 0x02202a91, 0x088006d2, 0x0001003b, -+0x1d400292, 0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x186004d0, 0x00280290, 0x80000015, 0x0a001290, -+0x08401292, 0x4000003b, 0x1d401292, 0x1c2024d2, 0x01201090, 0xa02024d0, 0x20200496, 0xa8200484, -+0x306004d0, 0x0020a280, 0x2b008012, 0x00201098, 0x0c2610ba, 0x00c022fa, 0x04e022c0, 0xc0200490, -+0x10e022d0, 0x186004d2, 0x02602a93, 0x098006d0, 0x0001003b, 0x1d000290, 0x098026d0, 0xc0ff3f3b, -+0x1d000a90, 0x44a103fa, 0x606007d2, 0x00680292, 0x09001290, 0x4000003b, 0x1d001290, 0x14e022d0, -+0x206004d0, 0x10210290, 0x18e022d0, 0x186004d0, 0x02202a91, 0x088006d2, 0x0001003b, 0x1d400292, -+0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x186004d0, 0x00280290, 0x80000015, 0x0a001290, 0x08401292, -+0x4000003b, 0x1d401292, 0x1ce022d2, 0x4f008010, 0x0020109a, 0x0c00109a, 0x306004d0, 0x0840a380, -+0x3b00801a, 0xe02004c6, 0x0c2610ba, 0x00c022fa, 0x01202b91, 0x0c000290, 0x02202a91, 0x08400490, -+0x382002d2, 0x04e022d2, 0x342002d0, 0x08e022d0, 0x0ce022c6, 0x10e022c4, 0x186004d0, 0x02202a91, -+0x088006d2, 0x0001003b, 0x1d400292, 0x088026d2, 0xc0ff3f3b, 0x1d400a92, 0x44a103fa, 0x606007d0, -+0x00280290, 0x08401292, 0x4000003b, 0x1d401292, 0x14e022d2, 0x206004d0, 0x10210290, 0x18e022d0, -+0x186004d0, 0x02202a91, 0x088006d4, 0x0001003b, 0x1d800294, 0x088026d4, 0xc0ff3f3b, 0x1d800a94, -+0x186004d0, 0x00280290, 0x80000013, 0x09001290, 0x08801294, 0x4000003b, 0x1d801294, 0x1ce022d4, -+0x01201090, 0x008020d0, 0x04e002d0, 0x08c00086, 0x0840039a, 0x01200398, 0x20e00296, 0x306004d0, -+0x0800a380, 0xc9ffbf0a, 0x08a00084, 0xc0200490, 0xf0ff22d0, 0xe42004d0, 0x0d00a280, 0x0b00801a, -+0x00201098, 0x04008010, 0x10001096, 0x01200398, 0x20e00296, 0x306004d0, 0x0800a380, 0xfcffbf2a, -+0x04e022c0, 0xfc3f109a, 0xe42024da, 0x10001082, 0x186004d0, 0x00280290, 0x08006081, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00201098, -+0x0c00109a, 0x142004fa, 0xec00823b, 0x3080d61b, 0x00006891, 0x0420a280, 0x3b008002, 0x0c00a280, -+0x04008002, 0x00000001, 0x0120d091, 0x36008030, 0x7c2006d0, 0x01200290, 0x7c2026d0, 0x782006d0, -+0x0020a280, 0x04008002, 0x78200690, 0x64000040, 0x40e00692, 0xf02004d0, 0x0020a280, 0x03008012, -+0xf02026d0, 0x80e026c0, 0x7c2006d0, 0x40e026d0, 0x046004d0, 0x04208a80, 0x13008002, 0x1100108a, -+0xec2004cc, 0x3fa00b8e, 0x40e0018e, 0x0780239c, 0x0080bbe0, 0x006099e0, 0x00a0b9e0, 0x406099e0, -+0x40a0b9e0, 0x806099e0, 0x80a0b9e0, 0xc06099e0, 0xc0a0b9e0, 0x00809be0, 0x0780039c, 0x0e008010, -+0xec2004d2, 0xec2004cc, 0x3fa00b8e, 0x40e0018e, 0x0780239c, 0x0080bbe0, 0x006099e0, 0x00a0b9e0, -+0x406099e0, 0x40a0b9e0, 0x00809be0, 0x0780039c, 0xec2004d2, 0xe42004d0, 0x886222d0, 0x042006d0, -+0x00c026d0, 0x000007d0, 0x01208a80, 0x05008012, 0x00000001, 0x142027f2, 0x06008010, 0xdca003fa, -+0x142027f2, 0xfe3f0a90, 0x000027d0, 0xdca003fa, 0x016007ba, 0xdca023fa, 0x0c2007d0, 0x0840a680, -+0x04008032, 0x082007d0, 0x03008010, 0x102007f2, 0x084006b2, 0x00007081, 0x1600801c, 0x00000001, -+0x60a0239c, 0x00a0a3c0, 0x20a0a3f0, 0x40a0a3e0, 0x02c03f3f, 0x8ce017be, 0x04e08f80, 0x06008012, -+0x00000001, 0x00c01ffc, 0x0000a081, 0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, -+0x40a083e0, 0x20a083f0, 0x00a083c0, 0x60a0039c, 0x042007d0, 0x0840a680, 0xb3febf12, 0x190010a2, -+0x8afebf10, 0xf42006d0, 0x60a003de, 0x80a083e0, 0xa0a083f0, 0x08e0c381, 0x00a1039c, 0x80a0239c, -+0x042002c4, 0x004022c4, 0x18008030, 0x00007081, 0x16008012, 0x00000001, 0x60a0239c, 0x00a0a3c0, -+0x20a0a3f0, 0x40a0a3e0, 0x02c03f3f, 0x24e117be, 0x04e08f80, 0x06008012, 0x00000001, 0x00c01ffc, -+0x0000a081, 0x06008010, 0x40a083e0, 0x14e007be, 0x00c01ffc, 0x0000a081, 0x40a083e0, 0x20a083f0, -+0x00a083c0, 0x60a0039c, 0x000002c4, 0x00a0a080, 0xe7ffbf12, 0x00000001, 0x042002c4, 0x01a00084, -+0x042022c4, 0x000002c4, 0x00a0a080, 0xddffbf12, 0x00000001, 0x08e0c381, 0x80a0039c, }; -+#define threadcode_elan3_text_size 0x97c -+static uint32_t threadcode_elan3_data[] = { -+0}; -+#define threadcode_elan3_data_size 0x0 -+static uint32_t threadcode_elan3_rodata[] = { -+0}; -+#define threadcode_elan3_rodata_size 0x0 -+static EP_SYMBOL threadcode_elan3_symbols[] = { -+ {"__bss_start", 0xff00297c}, -+ {"_edata", 0xff00297c}, -+ {"_end", 0xff002988}, -+ {"_etext", 0xff00097c}, -+ {"_sdata", 0xff00297c}, -+ {"_stext", 0xff000000}, -+ {"ep3_spinblock", 0xff0008dc}, -+ {"ep3comms_rcvr", 0xff0002a8}, -+ {"kcomm_probe", 0xff00013c}, -+ {"r", 0xff00297c}, -+ {"rail", 0xff002984}, -+ {"rm", 0xff002980}, -+ {0, 0}}; -+EP_CODE threadcode_elan3 = { -+ (unsigned char *) threadcode_elan3_text, -+ threadcode_elan3_text_size, -+ (unsigned char *) threadcode_elan3_data, -+ threadcode_elan3_data_size, -+ (unsigned char *) threadcode_elan3_rodata, -+ threadcode_elan3_rodata_size, -+ threadcode_elan3_symbols, -+}; -Index: linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode_elan4_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/ep/threadcode_elan4_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/ep/threadcode_elan4_Linux.c 2005-07-28 14:52:52.912669008 -0400 -@@ -0,0 +1,112 @@ -+/* --------------------------------------------------------*/ -+/* MACHINE GENERATED ELAN CODE */ -+#include -+#include -+#include "kcomm_elan4.h" -+static uint32_t threadcode_elan4_text[] = { -+0x00a00087, 0xc04060cb, 0x00003080, 0x80001080, 0x02606180, 0x02004032, 0x807f60cb, 0x04606180, -+0x02004032, 0x407f60d3, 0x08606180, 0x02004032, 0x007f60db, 0x10606180, 0x02004032, 0xc07e60e3, -+0x20606180, 0x02004032, 0x807e60eb, 0x40606180, 0x02004032, 0x407e60f3, 0x80606180, 0x02004032, -+0x007e60fb, 0x40001180, 0xc3801080, 0xc07f60c3, 0x20002000, 0x20002000, 0x20002000, 0x20002000, -+0x407f8001, 0x4060c0c7, 0x4860c0d0, 0x5060c0d1, 0x5860c0d2, 0x6060c0d3, 0x6860c0d4, 0x00208292, -+0x00608291, 0x00a08294, 0xff3f8088, 0x1c381293, 0xc04044c8, 0x13004290, 0xc000c5d0, 0x08004030, -+0x00001088, 0x04204288, 0x0020b200, 0x04004003, 0x00208080, 0x9c010040, 0x00a08488, 0xc04044c8, -+0x20381288, 0x0020b200, 0xf6ff7f13, 0x01208408, 0x11161282, 0x804094c2, 0xc04044c8, 0x20381288, -+0x0020b200, 0xebff7f13, 0x00208080, 0x406040c7, 0x486040d0, 0x506040d1, 0x586040d2, 0x606040d3, -+0x686040d4, 0x08e00180, 0xc0608001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, -+0x807e8001, 0x4060c0c7, 0x4860c0d0, 0x5060c0d1, 0x5860c0d2, 0x6060c0d3, 0x6860c0d4, 0x7060c0d5, -+0x7860c0d6, 0x8060c0d7, 0x8860c0d8, 0x9060c0d9, 0x9860c0da, 0xa060c0db, 0xa860c0dc, 0xb060c0dd, -+0xb860c0de, 0xc060c0df, 0x8061c0c8, 0x00608296, 0x00a0829a, 0x9861c0cb, 0xa061c0cc, 0xa861c0cd, -+0x01208088, 0x3861c0c8, 0x08e042d2, 0x386140c9, 0x0900900a, 0xa06140c8, 0x986140cb, 0x18e042c9, -+0x72010040, 0x05b4128a, 0x0020808c, 0x3861c0cc, 0x986140c9, 0xc04042c8, 0x0880b400, 0x39014003, -+0xffff3f08, 0x90a0851c, 0xe023829f, 0x20f4179f, 0x10e3879f, 0xffff3f08, 0xe023829e, 0x20b4179e, -+0x03a3879e, 0xffff3f08, 0xe023829d, 0x2074179d, 0x0363879d, 0x00a08495, 0x18a08408, 0x800012c2, -+0x089a109b, 0x20f4169b, 0x20f8169b, 0x00e88609, 0x20741289, 0x01120008, 0x0a381288, 0x08408297, -+0x45208088, 0x06341288, 0x806140ca, 0xc88042c8, 0x00288218, 0x04a08408, 0x800012c2, 0x089a1088, -+0x20341288, 0x20381288, 0x00281299, 0x20a08408, 0x800012c2, 0x089a108a, 0x20b4128a, 0x20b8128a, -+0x30a08408, 0x800012c2, 0x089a1093, 0x20f41493, 0x20f81493, 0x03f41689, 0x806140cb, 0x2922808c, -+0x0334138c, 0xccc042c8, 0xc90042d1, 0x02604688, 0x0020b200, 0x03004002, 0x60a08214, 0x80a08214, -+0x90a08509, 0x804012c8, 0x01208208, 0x804092c8, 0x046012c8, 0x043a1288, 0x0020b200, 0x04004003, -+0xa86140c8, 0x67ffff7f, 0x00a0868a, 0x88a045d0, 0x0020b400, 0x12004013, 0x00208080, 0x800017c8, -+0x808096c8, 0x72010040, 0x00a08588, 0x00208290, 0x90a08509, 0x804012c8, 0x01208208, 0x804092c8, -+0x046012c8, 0x043a1288, 0x0020b200, 0x04004003, 0xa86140c8, 0x53ffff7f, 0x00a0868a, 0x804015c2, -+0x159a1089, 0x20741289, 0x20781289, 0x40b03608, 0x01208288, 0x0840b200, 0x06004023, 0xa02344c4, -+0x800017c8, 0x808096c8, 0xbb004010, 0xa8a045c8, 0x01604688, 0x00281288, 0x08009008, 0x00e0b400, -+0x05004003, 0x3f381289, 0x13408209, 0x03004010, 0x05208088, 0x04208088, 0x09009220, 0x07341889, -+0x0900840b, 0x05341888, 0x0023820a, 0x01604688, 0x0020b200, 0x1d004002, 0x0a00840c, 0xc900c4d7, -+0x40c40f08, 0x09208288, 0x08e0c2c8, 0x0a608488, 0x10e0c2c8, 0x81001008, 0x0a341288, 0x18e0c2c8, -+0x1d608488, 0x20e0c2c8, 0x28e0c2d8, 0x24608508, 0x800012c2, 0x089a1088, 0x20341288, 0x20381288, -+0x80208208, 0x30e0c2c8, 0x00218108, 0x38e0c2c8, 0x40e0c2d4, 0x48e0c2cc, 0xca00c4df, 0x20608411, -+0x80e0820b, 0x2020830c, 0x00e0b400, 0x13004013, 0x0020808e, 0xc0c0c2d7, 0x40c40f09, 0x09608289, -+0x08e0c2c9, 0x0a608488, 0x10e0c2c8, 0x00040008, 0x18e0c2c8, 0x1d608488, 0x20e0c2c8, 0x28e0c2d8, -+0x40e0c2d4, 0x48e0c2cc, 0xc000c3de, 0x00208083, 0x4c004010, 0x20608411, 0xb8238408, 0x800012c2, -+0x089a108f, 0x20f4138f, 0x20f8138f, 0x00208083, 0x13c0b000, 0x2e00401b, 0x40c40f08, 0x092082a2, -+0x00040021, 0xffff3f08, 0xe023828d, 0x2074138d, 0x1063838d, 0x0e808309, 0x0e408209, 0x02741289, -+0x1540820a, 0x38a0820a, 0x808012c2, 0x0a9a108a, 0x20b4128a, 0x20b8128a, 0xc0c0c2d7, 0x08e0c2e2, -+0x0a608488, 0x10e0c2c8, 0x20b41288, 0x21008288, 0x18e0c2c8, 0x1d608488, 0x20e0c2c8, 0x28e0c2d8, -+0x15408209, 0x34608209, 0x804012c2, 0x099a1089, 0x20741289, 0x20781289, 0x30e0c2c9, 0x38e0c2cf, -+0x40e0c2d4, 0x48e0c2cc, 0xc000c3cd, 0x0ac0830f, 0x0ac08003, 0x20608411, 0x80e0820b, 0x01a0830e, -+0x1380b300, 0xdcff7f0b, 0x2020830c, 0xe03f830c, 0xc000c3dd, 0xbc238408, 0x800012c2, 0x089a1088, -+0x20341288, 0x20381288, 0x0300b200, 0x0d00401b, 0x07341888, 0x0020888e, 0x0420b800, 0x08004019, -+0x0800840b, 0x00040008, 0x18e0c2c8, 0x01a0830e, 0x04a0b300, 0xfdff7f09, 0x80e0820b, 0xfc3f8083, -+0x07341888, 0x08008408, 0xa06140ca, 0xc00062e3, 0x402062f3, 0xc080e2e3, 0xc080e2f3, 0x982244c8, -+0x88a0c5c8, 0x88a045c8, 0x0020b200, 0x05004013, 0x04604688, 0x88a08508, 0x80a0c5c8, 0x04604688, -+0x0020b200, 0x0c004002, 0xd822c4c0, 0xc04065e3, 0x406065f3, 0xc000e1e3, 0x806065e3, 0x4020e1f3, -+0xc06065f3, 0x8020e1e3, 0xc020e1f3, 0x07004010, 0x88228108, 0xc04065e3, 0x406065f3, 0xc000e1e3, -+0x4020e1f3, 0x88228108, 0x08d61082, 0x800092c2, 0x03f41689, 0x806140cb, 0x2922808c, 0x0334138c, -+0xccc042c8, 0xc900c2d1, 0x800017c8, 0x808096c8, 0xa8a045c8, 0x0880b400, 0x03004013, 0x00a18412, -+0xa0a045d2, 0x98a045c8, 0x0020b200, 0x05004013, 0x386140c9, 0x986140c8, 0x0820c2d2, 0x386140c9, -+0x01608209, 0xfe61b200, 0x0e004015, 0x3861c0c9, 0x00001088, 0x02204288, 0x0020b200, 0x05004003, -+0x986140ca, 0x28000040, 0xa06140c8, 0x986140ca, 0xc08042c8, 0x0880b400, 0xd8fe7f13, 0x00a08495, -+0x98a045cb, 0x00e0b200, 0xbafe7f03, 0x386140c9, 0xa06140c8, 0x60a08509, 0x48000040, 0xe03f808a, -+0x986140cb, 0x08e0c2d2, 0x386140cc, 0x0120830c, 0xaffe7f10, 0x3861c0cc, 0x406040c7, 0x486040d0, -+0x506040d1, 0x586040d2, 0x606040d3, 0x686040d4, 0x706040d5, 0x786040d6, 0x806040d7, 0x886040d8, -+0x906040d9, 0x986040da, 0xa06040db, 0xa86040dc, 0xb06040dd, 0xb86040de, 0xc06040df, 0x08e00180, -+0x80618001, 0x807f8001, 0xc040e0d3, 0x4060e0db, 0x00208490, 0x00208698, 0x00208080, 0x00208080, -+0x00e08192, 0x02000040, 0x00608091, 0x14e08110, 0x17208097, 0xc000f2d3, 0xc04060d3, 0x406060db, -+0x08a00080, 0x80608001, 0x407f8001, 0x4060e0d3, 0x8060e0db, 0x00208490, 0x00208698, 0x00208080, -+0x00208080, 0x00e08192, 0x02000040, 0x00608091, 0x40e08110, 0xc040e0d1, 0x37208097, 0x3860c0d7, -+0x00208490, 0x00e08597, 0x00208080, 0x00208080, 0x1f608290, 0x20b41291, 0x08638491, 0x00608092, -+0x00208293, 0xc000f2d1, 0x406060d3, 0x806060db, 0x08a00080, 0xc0608001, 0x407f8001, 0x4060e0d3, -+0x8060e0db, 0x00208490, 0x00208698, 0x00208080, 0x00208080, 0x00e08192, 0x02000040, 0x00608091, -+0x54e08110, 0xc040e0d1, 0x37208097, 0x3860c0d7, 0x00208490, 0x00e08597, 0x00208080, 0x00208080, -+0x1f608290, 0x20b41291, 0x08638491, 0x00608092, 0x00208293, 0x0ef41294, 0x0d208594, 0x17208095, -+0x17208096, 0x17208097, 0xc000f2d3, 0x406060d3, 0x806060db, 0x08a00080, 0xc0608001, 0x01208097, -+0xb0e3c0d7, 0x80a060d2, 0x98e28004, 0x98e2c0c0, 0x80a0c0c4, 0xc080c4c3, 0x01e0b400, 0x06004002, -+0x00a08490, 0x00e08097, 0x02208097, 0xb0e3c0d7, 0xd8e2d0d0, 0xd8e2c0d0, 0x03208097, 0xb0e3c0d7, -+0x00e08088, 0x0e004010, 0x00a060c3, 0x407f8001, 0x4060e0d3, 0x8060e0db, 0x00208490, 0x00208698, -+0x00208080, 0x00208080, 0x01208089, 0x8820c2c9, 0x00608091, 0x00e08197, 0x0020f2d3, 0x406060d3, -+0x806060db, 0x08e00180, 0xc0608001, }; -+#define threadcode_elan4_text_size 0x90c -+static uint32_t threadcode_elan4_data[] = { -+0}; -+#define threadcode_elan4_data_size 0x0 -+static uint32_t threadcode_elan4_rodata[] = { -+0}; -+#define threadcode_elan4_rodata_size 0x0 -+static EP_SYMBOL threadcode_elan4_symbols[] = { -+ {".thread_restart", 0x00000000f800000c}, -+ {".thread_start", 0x00000000f8000000}, -+ {"__bss_start", 0x00000000f810090c}, -+ {"_edata", 0x00000000f810090c}, -+ {"_end", 0x00000000f8100910}, -+ {"_etext", 0x00000000f800090c}, -+ {"_sdata", 0x00000000f810090c}, -+ {"_stext", 0x00000000f8000000}, -+ {"c_queue_rxd", 0x00000000f800087c}, -+ {"c_reschedule", 0x00000000f8000744}, -+ {"c_stall_thread", 0x00000000f80008cc}, -+ {"c_waitevent", 0x00000000f8000788}, -+ {"c_waitevent_interrupt", 0x00000000f80007f8}, -+ {"ep4_spinblock", 0x00000000f8000080}, -+ {"ep4comms_rcvr", 0x00000000f8000140}, -+ {0, 0}}; -+EP_CODE threadcode_elan4 = { -+ (unsigned char *) threadcode_elan4_text, -+ threadcode_elan4_text_size, -+ (unsigned char *) threadcode_elan4_data, -+ threadcode_elan4_data_size, -+ (unsigned char *) threadcode_elan4_rodata, -+ threadcode_elan4_rodata_size, -+ threadcode_elan4_symbols, -+}; -Index: linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/jtag/jtagdrv.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv.c 2005-07-28 14:52:52.913668856 -0400 -@@ -0,0 +1,451 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: jtagdrv.c,v 1.12 2003/06/07 16:02:35 david Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv.c,v $*/ -+ -+#include -+ -+#include "jtagdrv.h" -+#include -+ -+int -+jtagdrv_strobe_data (JTAG_DEV *dev, u_char data) -+{ -+ u_char dsr; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_strobe_data: %s %s %s -> ", (data & LPT_DATA_TRST) ? "TRST" : "trst", -+ (data & LPT_DATA_TDI) ? "TDI" : "tdi", (data & LPT_DATA_TMS) ? "TMS" : "tms")); -+ -+ -+ LPT_WRITE_DATA (dev, data); DELAY(5); /* Drive NEW values on data wires */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_TCLK); DELAY(5); /* Drive strobe low */ -+ LPT_READ_STAT (dev, dsr); DELAY(5); /* Sample TDI from ring */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe high */ -+ -+ PRINTF (DBG_ECPP, ("%s\n", (dsr & LPT_STAT_PE) ? "TDO" : "tdo")); -+ -+ return ((dsr & LPT_STAT_PE) ? 1 : 0); -+} -+ -+void -+jtagdrv_select_ring (JTAG_DEV *dev, u_int ring) -+{ -+ PRINTF (DBG_ECPP, ("jtagdrv_select_ring: ring=0x%x\n", ring)); -+ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe and TCLK high */ -+ LPT_WRITE_DATA (dev, ring); DELAY(5); /* Drive ring address */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_RCLK); DELAY(5); /* Drive strobe low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* Drive strobe high */ -+} -+ -+void -+jtagdrv_reset (JTAG_DEV *dev) -+{ -+ register int i; -+ -+ for (i = 0; i < 5; i++) -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* 5 clocks to Reset from any state */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+void -+jtagdrv_shift_ir (JTAG_DEV *dev, u_char *value, int nbits) -+{ -+ register int i; -+ register int bit; -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select DR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select IR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Capture-IR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Shift-IR */ -+ -+ for (i = 0; i < nbits; i++) -+ { -+ /* strobe through the instruction bits, asserting TMS on the last bit */ -+ -+ if (i == (nbits-1)) -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ else -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ -+ if (bit) -+ JTAG_SET_BIT(value, i); -+ else -+ JTAG_CLR_BIT(value, i); -+ } -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Update-IR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+ -+void -+jtagdrv_shift_dr (JTAG_DEV *dev, u_char *value, int nbits) -+{ -+ register int i; -+ register int bit; -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Select DR-Scan */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Capture-DR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Shift-DR */ -+ -+ for (i = 0; i < nbits; i++) -+ { -+ /* strobe through the data bits, asserting TMS on the last bit */ -+ -+ if (i == (nbits-1)) -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ else -+ bit = jtagdrv_strobe_data (dev, LPT_DATA_TRST | (JTAG_BIT(value, i) ? LPT_DATA_TDI : 0)); -+ -+ if (bit) -+ JTAG_SET_BIT(value, i); -+ else -+ JTAG_CLR_BIT(value, i); -+ } -+ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST | LPT_DATA_TMS); /* to Update-DR */ -+ jtagdrv_strobe_data (dev, LPT_DATA_TRST); /* to Run-Test/Idle */ -+} -+ -+static int -+jtagdrv_i2c_start (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start\n")); -+ -+ /* Issue a stop sequence */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ -+ /* sample the line to see if we're idle */ -+ LPT_READ_STAT (dev, dsr); /* sample SDA */ -+ if ((dsr & LPT_STAT_SDA) == 0) /* Cannot start if SDA already driven */ -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start: cannot start - sda driven low\n")); -+ -+ for (i = 0; i < 16 ; i++) -+ { -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(5); /* SCLK low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); -+ -+ if (dsr & LPT_STAT_SDA) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start - stopped after %d clocks\n", i)); -+ break; -+ } -+ } -+ -+ if ((dsr & LPT_STAT_SDA) == 0) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_start - cannot start - not idle\n")); -+ return (0); -+ } -+ -+ /* seen SDA float high, so issue a stop sequence */ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ } -+ -+ LPT_WRITE_DATA (dev, 0); DELAY(4); /* drive SDA low */ -+ return (1); -+} -+ -+static void -+jtagdrv_i2c_stop (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop\n")); -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, 0); DELAY(5); /* SDA low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ -+ /* -+ * bug fix for temperature sensor chip -+ * if it's still driving SDA, then clock -+ * it until it stops driving it -+ */ -+ LPT_READ_STAT (dev, dsr); -+ if ((dsr & LPT_STAT_SDA) == 0) -+ { -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop - slave not stodeved\n")); -+ for (i = 0; i < 16 ; i++) -+ { -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(5); /* SCLK low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(5); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); -+ -+ if (dsr & LPT_STAT_SDA) -+ break; -+ } -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_stop - stodeved after %d clocks\n", i)); -+ } -+} -+ -+static int -+jtagdrv_i2c_strobe (JTAG_DEV *dev, u_char data) -+{ -+ u_char dsr; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_strobe : %s", (data & LPT_DATA_SDA) ? "SDA" : "sda")); -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, data); DELAY(5); /* write data */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA */ -+ -+ PRINTF (DBG_ECPP, (" -> %s\n", (dsr & LPT_STAT_SDA) ? "SDA" : "sda")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 1 : 0); -+} -+ -+static int -+jtagdrv_i2c_get_ack (JTAG_DEV *dev) -+{ -+ u_char dsr; -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, LPT_DATA_SDA); DELAY(5); /* SDA high */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA */ -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_get_ack -> %s\n", (dsr & LPT_STAT_SDA) ? "no ack" : "ack")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 0 : 1); -+} -+ -+static int -+jtagdrv_i2c_drive_ack (JTAG_DEV *dev, int nack) -+{ -+ u_char dsr; -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_SCLK); DELAY(1); /* SCLK low */ -+ LPT_WRITE_DATA (dev, nack ? LPT_DATA_SDA : 0); DELAY(5); /* SDA low for ack, high for nack */ -+ LPT_WRITE_CTRL (dev, 0); /* SCLK high */ -+ LPT_READ_STAT (dev, dsr); DELAY(4); /* Sample SDA for ack */ -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_drive_ack %d -> %s\n", nack, (dsr & LPT_STAT_SDA) ? "done" : "more")); -+ -+ return ((dsr & LPT_STAT_SDA) ? 1 : 0); -+} -+ -+static void -+jtagdrv_i2c_shift_addr (JTAG_DEV *dev, u_int address, int readNotWrite) -+{ -+ register int i; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_addr: %x\n", address)); -+ -+ for (i = I2C_ADDR_LEN-1; i >= 0; i--) -+ jtagdrv_i2c_strobe (dev, (address & (1 << i)) ? LPT_DATA_SDA : 0); -+ -+ jtagdrv_i2c_strobe (dev, readNotWrite ? LPT_DATA_SDA : 0); -+} -+ -+static u_char -+jtagdrv_i2c_shift_data (JTAG_DEV *dev, u_char data) -+{ -+ register int i; -+ u_char val = 0; -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_data : %02x\n", data)); -+ -+ for (i = I2C_DATA_LEN-1; i >= 0; i--) -+ if (jtagdrv_i2c_strobe (dev, data & (1 << i) ? LPT_DATA_SDA : 0)) -+ val |= (1 << i); -+ -+ PRINTF (DBG_ECPP, ("jtagdrv_i2c_shift_data : -> %02x\n", val)); -+ -+ return (val); -+} -+ -+int -+jtagdrv_i2c_write (JTAG_DEV *dev, u_int address, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: address=%x count=%d data=%02x\n", address, count, data[0])); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ jtagdrv_i2c_shift_data (dev, data[i]); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_write: no ack on data phase %d\n", i)); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_WRITE_TO_BIG); -+ } -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_read (JTAG_DEV *dev, u_int address, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_read: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 1); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_read: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ data[i] = jtagdrv_i2c_shift_data (dev, 0xff); -+ -+ jtagdrv_i2c_drive_ack (dev, (i == (count-1) ? 1 : 0)); -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_writereg (JTAG_DEV *dev, u_int address, u_int intaddress, u_int count, u_char *data) -+{ -+ register int i; -+ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_shift_data (dev, intaddress); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writereg: no ack on intaddress phase\n")); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ for (i = 0; i < count; i++) -+ { -+ jtagdrv_i2c_shift_data (dev, data[i]); -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_writedate: no ack on byte %d\n", i)); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_WRITE_TO_BIG); -+ } -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_SUCCESS); -+} -+ -+int -+jtagdrv_i2c_readreg (JTAG_DEV *dev, u_int address, u_int intaddress, u_int count, u_char *data) -+{ -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: address=%x count=%d\n", address, count)); -+ -+ if (! jtagdrv_i2c_start (dev)) -+ return (I2C_OP_NOT_IDLE); -+ -+ jtagdrv_i2c_shift_addr (dev, address, 0); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: no ack on address phase\n")); -+ -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_shift_data (dev, intaddress); -+ -+ if (! jtagdrv_i2c_get_ack (dev)) -+ { -+ PRINTF (DBG_FN, ("jtagdrv_i2c_readreg: no ack on intaddress phase\n")); -+ jtagdrv_i2c_stop (dev); -+ return (I2C_OP_NO_DEVICE); -+ } -+ -+ jtagdrv_i2c_stop (dev); -+ -+ return (jtagdrv_i2c_read (dev, address, count, data)); -+} -+ -+void -+jtagdrv_i2c_clock_shift (JTAG_DEV *dev, u_int t, u_int n, u_int m) -+{ -+ int i; -+ -+ for (i = 2; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((t & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((t & (1 << i)) ? LPT_DATA_TDI : 0) | LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ for (i = 1; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((n & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((n & (1 << i)) ? LPT_DATA_TDI : 0)| LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ for (i = 6; i >= 0; i--) -+ { -+ LPT_WRITE_DATA (dev, ((m & (1 << i)) ? LPT_DATA_TDI : 0)); DELAY(1); /* clock low | data */ -+ LPT_WRITE_DATA (dev, ((m & (1 << i)) ? LPT_DATA_TDI : 0) | LPT_DATA_TMS); DELAY(1); /* clock high | data */ -+ } -+ -+ LPT_WRITE_DATA (dev, 0); DELAY(1); /* clock low | 0 */ -+ -+ LPT_WRITE_CTRL (dev, LPT_CTRL_TCLK); DELAY(1); /* strobe low */ -+ LPT_WRITE_CTRL (dev, 0); DELAY(1); /* strobe low */ -+} -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/jtag/jtagdrv.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv.h 2005-07-28 14:52:52.914668704 -0400 -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __JTAGDRV_COMMON_H -+#define __JTAGDRV_COMMON_H -+ -+#ident "@(#)$Id: jtagdrv.h,v 1.5 2002/08/09 11:18:37 addy Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv.h,v $*/ -+ -+#include -+ -+/* include OS specific header file */ -+#if defined(LINUX) -+# include "jtagdrv_Linux.h" -+#elif defined(DIGITAL_UNIX) -+# include "jtagdrv_OSF1.h" -+#elif defined(QNX) -+# include "jtagdrv_QNX.h" -+#else -+# error cannot determint os type -+#endif -+ -+extern int jtagdebug; -+ -+#define DBG_CFG (1 << 0) -+#define DBG_OPEN (1 << 1) -+#define DBG_IOCTL (1 << 2) -+#define DBG_ECPP (1 << 3) -+#define DBG_FN (1 << 4) -+ -+#define DRIVER_NAME "jtag" -+ -+#if defined(LINUX) -+#define PRINTF(n,X) ((n) & jtagdebug ? (void) printk X : (void) 0) -+#define PRINTMSG(fmt, arg...) printk(KERN_INFO DRIVER_NAME ": " fmt, ##arg) -+#else -+#define PRINTF(n,X) ((n) & jtagdebug ? (void) printf X : (void) 0) -+#define PRINTMSG(M, A) printf ("jtag: " M, A) -+#endif -+ -+extern void jtagdrv_select_ring (JTAG_DEV *pp, u_int ring); -+extern void jtagdrv_reset (JTAG_DEV *pp); -+extern void jtagdrv_shift_ir (JTAG_DEV *pp, u_char *value, int nbits); -+extern void jtagdrv_shift_dr (JTAG_DEV *pp, u_char *value, int nbits); -+ -+extern int jtagdrv_i2c_write (JTAG_DEV *pp, u_int address, u_int count, u_char *data); -+extern int jtagdrv_i2c_read (JTAG_DEV *pp, u_int address, u_int count, u_char *data); -+extern int jtagdrv_i2c_writereg (JTAG_DEV *pp, u_int address, u_int intaddress, u_int count, u_char *data); -+extern int jtagdrv_i2c_readreg (JTAG_DEV *pp, u_int address, u_int intaddress, u_int count, u_char *data); -+extern void jtagdrv_i2c_clock_shift (JTAG_DEV *pp, u_int t, u_int n, u_int m); -+ -+ -+#endif /* __JTAGDRV_COMMON_H */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/jtag/jtagdrv_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv_Linux.c 2005-07-28 14:52:52.914668704 -0400 -@@ -0,0 +1,325 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * $Id: jtagdrv_Linux.c,v 1.18.2.1 2005/02/01 10:12:01 lee Exp $ -+ * $Source: /cvs/master/quadrics/jtagmod/jtagdrv_Linux.c,v $ -+ */ -+ -+#include "jtagdrv.h" -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("JTAG Parallel port QsNet switch interface"); -+ -+MODULE_LICENSE("GPL"); -+ -+#define MAJOR_INSTANCE 0 /* 0 is dynamic assign of device major */ -+#define MAX_JTAG_DEV 4 -+ -+int jtag_major = MAJOR_INSTANCE; -+int jtagdebug = 0; -+MODULE_PARM(jtag_major, "i"); -+MODULE_PARM(jtagdebug, "i"); -+ -+JTAG_DEV jtag_devs[MAX_JTAG_DEV]; -+ -+int io[MAX_JTAG_DEV]= { 0, }; -+MODULE_PARM(io, "1-4i"); -+ -+ -+/* The fops functions */ -+int jtag_open(struct inode *, struct file *); -+int jtag_close(struct inode *, struct file *); -+int jtag_ioctl(struct inode *, struct file *, unsigned int, unsigned long ); -+ -+struct file_operations jtag_fops = { -+ ioctl: jtag_ioctl, -+ open: jtag_open, -+ release: jtag_close, -+}; -+ -+int -+jtag_probe(void) -+{ -+ int i=0; -+ int default_io = 1; -+ JTAG_DEV *dev; -+ unsigned char value=0xff; -+ -+ -+ /* see if there are any user supplied io addr */ -+ for ( i = 0; i < MAX_JTAG_DEV; i++) { -+ if ( io[i] != 0x00) -+ default_io = 0; -+ jtag_devs[i].base = io[i]; -+ } -+ -+ if ( default_io ) { -+ jtag_devs[0].base = 0x3bc; -+ jtag_devs[1].base = 0x378; -+ jtag_devs[2].base = 0x278; -+ jtag_devs[3].base = 0x268; -+ } -+ -+ for ( i = 0 ; i < MAX_JTAG_DEV; i++) { -+ if ( jtag_devs[i].base == 0x3bc ) -+ jtag_devs[i].region = 3; -+ else -+ jtag_devs[i].region = 8; -+ jtag_devs[i].present = 0; -+ } -+ -+ -+ if( default_io ) -+ { -+ for( i = 0 ; i < MAX_JTAG_DEV; i++) { -+ dev=&(jtag_devs[i]); -+ if(dev->base && request_region(dev->base, dev->region, "jtag")) { -+ LPT_WRITE(dev, 0,0); -+ LPT_READ(dev, 0,value); -+ if ( value != 0xff) { -+ PRINTMSG("(%d , %d) present, io=0x%04lx\n",jtag_major,i,dev->base); -+ -+ dev->present=1; -+ } -+ else -+ release_region(dev->base, dev->region); -+ } -+ else -+ { -+ PRINTMSG("failed to request_region (%d , %d), io=0x%04lx\n",jtag_major,i,dev->base); -+ return -1; -+ } -+ } -+ return 0; -+ } -+ else /* Force the region to be present, this makes the PCI parallel cards work */ -+ { -+ for( i = 0 ; i < MAX_JTAG_DEV; i++) -+ { -+ dev=&(jtag_devs[i]); -+ if(dev->base && request_region(dev->base, dev->region, "jtag") && (dev->base != 0)) -+ { -+ PRINTMSG("(%d , %d) forced by user, io=0x%04lx\n",jtag_major,i,dev->base); -+ dev->present=1; -+ } -+ else -+ { -+ if( dev->base != 0) -+ release_region(dev->base, dev->region); -+ } -+ } -+ return 0; -+ } -+} -+ -+int init_module(void) -+{ -+ int result,i; -+ result = register_chrdev(jtag_major, DRIVER_NAME, &jtag_fops); -+ if (result < 0) { -+ PRINTMSG("Couldn't register char device err == %d\n",jtag_major); -+ return -1; -+ } -+ -+ if ( jtag_major == 0 ) -+ jtag_major = result; -+ -+ for ( i = 0; i < MAX_JTAG_DEV; i++) { -+ jtag_devs[i].base=io[i]; -+ } -+ -+ jtag_probe(); -+ -+ PRINTMSG("Registered character device, major == %d\n",jtag_major); -+ return 0; -+} -+ -+void cleanup_module(void) -+{ -+ int i=0; -+ -+ for( i = 0; i < MAX_JTAG_DEV; i++) { -+ if( jtag_devs[i].present) -+ release_region(jtag_devs[i].base, jtag_devs[i].region); -+ } -+ -+ unregister_chrdev(jtag_major, DRIVER_NAME); -+ PRINTMSG("Unloaded char device\n"); -+} -+ -+ -+int -+jtag_open (struct inode *inode, struct file *filp) -+{ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ /* -+ * Only allow a single open at a time -+ */ -+ if (dev->open) -+ return (-EBUSY); -+ dev->open = 1; -+ -+ /* -+ * Initialise the hardware registers -+ */ -+ -+ LPT_WRITE (dev, LPT_CTRL, 0); -+ DELAY(50); -+ LPT_WRITE (dev, LPT_CTRL, LPT_CTRL_INIT); -+ -+ MOD_INC_USE_COUNT; -+ -+ return (0); -+} -+ -+int -+jtag_close(struct inode *inode, struct file *filp) -+{ -+ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ dev->open = 0; -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (0); -+} -+ -+int -+jtag_ioctl (struct inode *inode, struct file *filp, unsigned int io_cmd, unsigned long io_data) -+{ -+ int unit = MINOR(inode->i_rdev); -+ JTAG_DEV *dev = &jtag_devs[unit]; -+ JTAG_RESET_ARGS *resetargs; -+ JTAG_SHIFT_ARGS *shiftargs; -+ I2C_ARGS *i2cargs; -+ I2C_CLOCK_SHIFT_ARGS *clockargs; -+ u_char *buf; -+ int freq; -+ -+ if (unit < 0 || unit > MAX_JTAG_DEV || !dev->present) -+ return (-ENXIO); -+ -+ PRINTF (DBG_IOCTL, ("jtag_ioctl: device %d cmd=%x\n", unit, io_cmd)); -+ -+ switch (io_cmd) -+ { -+ case JTAG_RESET: -+ resetargs = (JTAG_RESET_ARGS *) io_data; -+ -+ if (! VALID_JTAG_RING (resetargs->ring)) -+ return (-EINVAL); -+ -+ jtagdrv_select_ring (dev, resetargs->ring); -+ jtagdrv_reset (dev); -+ return (0); -+ -+ case JTAG_SHIFT_IR: -+ case JTAG_SHIFT_DR: -+ shiftargs = (JTAG_SHIFT_ARGS *) io_data; -+ -+ if (! VALID_JTAG_RING (shiftargs->ring) || shiftargs->nbits > (JTAG_MAX_DATA_LEN*JTAG_MAX_CHIPS)) { -+ return (-EFAULT); -+ } -+ -+ buf = (u_char *) kmalloc (JTAG_NBYTES(shiftargs->nbits), GFP_KERNEL); -+ -+ if (buf == (u_char *) NULL) -+ return (-ENOMEM); -+ -+ if (copy_from_user (buf, shiftargs->value, JTAG_NBYTES(shiftargs->nbits))) -+ { -+ kfree(buf); -+ return (-EFAULT); -+ } -+ -+ -+ jtagdrv_select_ring (dev, shiftargs->ring); -+ -+ if (io_cmd == JTAG_SHIFT_IR) -+ jtagdrv_shift_ir (dev, buf, shiftargs->nbits); -+ else -+ jtagdrv_shift_dr (dev, buf, shiftargs->nbits); -+ -+ if (copy_to_user (shiftargs->value, buf, JTAG_NBYTES (shiftargs->nbits))) -+ { -+ kfree (buf); -+ return (-EFAULT); -+ } -+ -+ kfree (buf); -+ return (0); -+ -+ case I2C_WRITE: -+ case I2C_READ: -+ case I2C_WRITEREG: -+ case I2C_READREG: -+ i2cargs = (I2C_ARGS *) io_data; -+ -+ if (! VALID_I2C_RING(i2cargs->ring) || i2cargs->count > I2C_MAX_DATA_LEN) -+ return (-EFAULT); -+ -+ jtagdrv_select_ring (dev, RING_I2C_BIT | i2cargs->ring); -+ switch (io_cmd) -+ { -+ case I2C_WRITE: -+ i2cargs->ok = jtagdrv_i2c_write (dev, i2cargs->device, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_READ: -+ i2cargs->ok = jtagdrv_i2c_read (dev, i2cargs->device, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_WRITEREG: -+ i2cargs->ok = jtagdrv_i2c_writereg (dev, i2cargs->device, i2cargs->reg, i2cargs->count, i2cargs->data); -+ break; -+ -+ case I2C_READREG: -+ i2cargs->ok = jtagdrv_i2c_readreg (dev, i2cargs->device, i2cargs->reg, i2cargs->count, i2cargs->data); -+ break; -+ } -+ return (0); -+ -+ case I2C_CLOCK_SHIFT: -+ clockargs = (I2C_CLOCK_SHIFT_ARGS *) io_data; -+ -+ freq = (10 * clockargs->m / (1 << (((clockargs->n + 1) & 3)))); -+ -+ /* validate the value, and initialise the ring */ -+ if (clockargs->t != 0 || clockargs->n > 3 || clockargs->m > 127) -+ return (-EINVAL); -+ -+ jtagdrv_select_ring (dev, RING_I2C_BIT | RING_CLOCK_SHIFT); -+ jtagdrv_i2c_clock_shift (dev, clockargs->t, clockargs->n, clockargs->m); -+ jtagdrv_select_ring (dev, 0); -+ return (0); -+ -+ default: -+ return (-EINVAL); -+ } -+ return (-EINVAL); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv_Linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/jtag/jtagdrv_Linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/jtag/jtagdrv_Linux.h 2005-07-28 14:52:52.915668552 -0400 -@@ -0,0 +1,174 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: jtagdrv_Linux.h,v 1.3 2002/08/09 11:18:37 addy Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagdrv_Linux.h,v $*/ -+ -+#ifndef __JTAGDRV_LINUX_H -+#define __JTAGDRV_LINUX_H -+ -+#include -+#include -+ -+typedef struct jtag_dev -+{ -+ unsigned long base; -+ int region; -+ -+ u_int present:1; -+ u_int open:1; -+} JTAG_DEV; -+ -+/* -+** -+** Hardware Defines -+** -+*/ -+ -+/* -+ * Assume that bit 4 of the Control Register is set to 1 (by default) -+ * to enable the printer port (CS3). -+ * -+ * The default base address is 3BC-3BF. -+ */ -+ -+#define LPT0 0x3BC /* CSR Base Address - note this can -+ * change depending on the setting -+ * in the Control Register 0. -+ * -+ * LPT1 0x378 -+ * LPT2 0x278 -+ * LPT3 0x268 -+ */ -+ -+/* -+ * Register offsets from the port base address -+ */ -+ -+#define LPT_REGISTER_0 0 -+#define LPT_REGISTER_1 1 -+#define LPT_REGISTER_2 2 -+#define LPT_REGISTER_3 0x400 -+#define LPT_REGISTER_4 0x401 -+#define LPT_REGISTER_5 0x402 -+ -+/* -+ * Chip control registers -+ */ -+ /* Base address for Super I/O National*/ -+ -+#define SIO_BASE_ADDR 0x26e /* Semiconductor PC87332VLJ combo-chip*/ -+#define CR4_REG 0x04 /* index 4, printer control reg 4 */ -+ -+#define LPT_EPP 0x01 /* Enable bit for epp */ -+#define LPT_ECP 0x04 /* Enable bit for ecp */ -+ -+/* -+ * Registers for use with centronics, nibble and byte modes. -+ */ -+ -+#define LPT_DATA LPT_REGISTER_0 /* line printer port data */ -+#define LPT_STAT LPT_REGISTER_1 /* LPT port status */ -+#define LPT_CTRL LPT_REGISTER_2 /* LPT port control */ -+ -+/* -+ * Registers for use with ECP mode. -+ */ -+ -+#define LPT_DFIFO LPT_REGISTER_3 /* r/w fifo register */ -+#define LPT_CFGB LPT_REGISTER_4 /* Configuration B */ -+#define LPT_ECR LPT_REGISTER_5 /* Exteded control */ -+ -+/* -+ * Bit assignments for ECR register. -+ */ -+ -+ /* Bits 0-4 */ -+ -+#define LPT_ECR_EMPTY 0x01 /* FIFO is empty */ -+#define LPT_ECR_FULL 0x02 /* FIFO is full */ -+#define LPT_ECR_SERV 0x04 /* Service bit */ -+#define LPT_ECR_DMA 0x08 /* DMA enable */ -+#define LPT_ECR_nINTR 0x10 /* Interrupt disable */ -+ -+ /* -+ * Bits 5-7 are ECR modes. -+ */ -+ -+#define LPT_ECR_PAR 0x20 /* Parallel port FIFO mode */ -+#define LPT_ECR_ECP 0x60 /* ECP mode */ -+#define LPT_ECR_CFG 0xE0 /* Configuration mode */ -+#define LPT_ECR_CLEAR ~0xE0 /* Cear mode bits */ -+ -+/* -+ * Bit assignments for the parallel port STATUS register: -+ */ -+ -+#define LPT_STAT_BIT0 0X1 /* Reserved. Bit always set. */ -+#define LPT_STAT_BIT1 0X2 /* Reserved. Bit always set. */ -+#define LPT_STAT_IRQ 0x4 /* interrupt status bit */ -+#define LPT_STAT_ERROR 0x8 /* set to 0 to indicate error */ -+#define LPT_STAT_SLCT 0x10 /* status of SLCT lead from printer */ -+#define LPT_STAT_PE 0x20 /* set to 1 when out of paper */ -+#define LPT_STAT_ACK 0x40 /* acknowledge - set to 0 when ready */ -+#define LPT_STAT_nBUSY 0x80 /* busy status bit, 0=busy, 1=ready */ -+ -+/* -+ * Bit assignments for the parallel port CONTROL register: -+ */ -+ -+#define LPT_CTRL_nSTROBE 0x1 /* Printer Strobe Control */ -+#define LPT_CTRL_nAUTOFD 0x2 /* Auto Feed Control */ -+#define LPT_CTRL_INIT 0x4 /* Initialize Printer Control */ -+#define LPT_CTRL_nSLCTIN 0x8 /* 0=select printer, 1=not selected */ -+#define LPT_CTRL_IRQ 0x10 /* Interrupt Request Enable Control */ -+#define LPT_CTRL_DIR 0x20 /* Direction control */ -+#define LPT_CTRL_BIT6 0X40 /* Reserved. Bit always set. */ -+#define LPT_CTRL_BIT7 0X80 /* Reserved. Bit always set. */ -+ -+ -+#define LPT_WRITE(dev, regname, value) do { outb(value, (dev)->base + regname); } while (0) -+#define LPT_READ(dev, regname,value) do { value = inb((dev)->base + regname); } while (0) -+ -+ -+ -+/* Standard register access macros */ -+#define LPT_WRITE_CTRL(dev, value) LPT_WRITE(dev, LPT_CTRL, LPT_CTRL_INIT | value) -+#define LPT_WRITE_DATA(dev, value) LPT_WRITE(dev, LPT_DATA, value) -+#define LPT_READ_STAT(dev, value) LPT_READ(dev, LPT_STAT, value) -+ -+/* -+ * The jtag signals are connected to the parallel port as follows : -+ * -+ * TRST bit 0 -+ * TDI bit 1 -+ * TMS bit 2 -+ * TCLK AFX -+ * TDO PE -+ */ -+#define LPT_DATA_TRST 1 -+#define LPT_DATA_TDI 2 -+#define LPT_DATA_TMS 4 -+#define LPT_CTRL_TCLK LPT_CTRL_nAUTOFD -+#define LPT_STAT_TDO LPT_STAT_PE -+ -+/* -+ * The I2C signals are connected as follows : -+ */ -+#define LPT_DATA_SDA 2 -+#define LPT_CTRL_SCLK LPT_CTRL_nAUTOFD -+#define LPT_STAT_SDA LPT_STAT_PE -+ -+/* -+ * The ring selection signals are as follows : -+ * addr bit 0-7 -+ * clock nSLCTIN -+ */ -+#define LPT_CTRL_RCLK LPT_CTRL_nSLCTIN -+ -+ -+#endif /* __JTAGDRV_LINUX_H */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/jtag/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/jtag/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/jtag/Makefile 2005-07-28 14:52:52.915668552 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/jtag/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_JTAG) += jtag.o -+jtag-objs := jtagdrv_Linux.o jtagdrv.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/jtag/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/jtag/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/jtag/Makefile.conf 2005-07-28 14:52:52.915668552 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = jtag.o -+MODULENAME = jtag -+KOBJFILES = jtagdrv_Linux.o jtagdrv.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_JTAG -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/jtag/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/jtag/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/jtag/quadrics_version.h 2005-07-28 14:52:52.915668552 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/Kconfig -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/Kconfig 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/Kconfig 2005-07-28 14:52:52.916668400 -0400 -@@ -0,0 +1,79 @@ -+# -+# Kconfig for Quadrics QsNet -+# -+# Copyright (c) 2004 Quadrics Ltd -+# -+# File: driver/net/qsnet/Kconfig -+# -+ -+menu "Quadrics QsNet" -+ depends on NETDEVICES -+ -+config QSNET -+ tristate "Quadrics QsNet support" -+ default m -+ depends on PCI -+ ---help--- -+ Quadrics QsNet is a high bandwidth, ultra low latency cluster interconnect -+ which provides both user and kernel programmers with secure, direct access -+ to the Quadrics network. -+ -+config ELAN3 -+ tristate "Elan 3 device driver" -+ default m -+ depends on QSNET -+ ---help--- -+ This is the main device driver for the Quadrics QsNet (Elan3) PCI device. -+ This is a high bandwidth, ultra low latency interconnect which provides -+ both user and kernel programmers with secure, direct access to the -+ Quadrics network. -+ -+config ELAN4 -+ tristate "Elan 4 device driver" -+ default m -+ depends on QSNET -+ ---help--- -+ This is the main device driver for the Quadrics QsNetII (Elan4) PCI-X device. -+ This is a high bandwidth, ultra low latency interconnect which provides -+ both user and kernel programmers with secure, direct access to the -+ Quadrics network. -+ -+config EP -+ tristate "Elan Kernel Comms" -+ default m -+ depends on QSNET && (ELAN4 || ELAN3) -+ ---help--- -+ This module implements the QsNet kernel communications layer. This -+ is used to layer kernel level facilities on top of the basic Elan -+ device drivers. These can be used to implement subsystems such as -+ TCP/IP and remote filing systems over the QsNet interconnect. -+ -+config EIP -+ tristate "Elan IP device driver" -+ default m -+ depends on QSNET && EP && NET -+ ---help--- -+ This is a network IP device driver for the Quadrics QsNet device. -+ It allows the TCP/IP protocol to be run over the Quadrics interconnect. -+ -+config RMS -+ tristate "Resource Management System support" -+ default m -+ depends on QSNET -+ ---help--- -+ This is a support module for the Quadrics RMS resource manager. It provides kernel -+ services for monitoring and controlling user job execution, termination and cleanup. -+ -+config JTAG -+ tristate "Switch monitoring" -+ default m -+ depends on QSNET -+ ---help--- -+ The jtag interface is used to allow processes to send and retrieve jtag -+ information to a Quadrics QsNet Elite switch via the parallel port. -+ The module requires a /dev/jtag[0-3] entry (usually there is only a /dev/jtag0) -+ device and a particular device only allows one process at a time to access this -+ resource. -+ For more information about JTag interface, please refer to the IEEE document on -+ http://www.ieee.org/ -+endmenu -Index: linux-2.6.5-7.191/drivers/net/qsnet/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/Makefile 2005-07-28 14:52:52.916668400 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd. -+# -+# File: driver/net/qsnet/Makefile -+# -+ -+obj-$(CONFIG_QSNET) += qsnet/ elan/ -+obj-$(CONFIG_ELAN3) += elan3/ -+obj-$(CONFIG_ELAN4) += elan4/ -+obj-$(CONFIG_EP) += ep/ -+obj-$(CONFIG_EIP) += eip/ -+obj-$(CONFIG_RMS) += rms/ -+obj-$(CONFIG_JTAG) += jtag/ -Index: linux-2.6.5-7.191/drivers/net/qsnet/qsnet/debug.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/qsnet/debug.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/qsnet/debug.c 2005-07-28 14:52:52.917668248 -0400 -@@ -0,0 +1,583 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: debug.c,v 1.21 2004/08/19 08:09:57 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/debug.c,v $ */ -+ -+#include -+#include -+#include -+ -+caddr_t qsnet_debug_buffer_ptr = NULL; -+int qsnet_debug_front = 0; -+int qsnet_debug_back = 0; -+int qsnet_debug_lost_lines = 0; -+int qsnet_debug_disabled = 0; -+ -+int qsnet_debug_line_size = 256; -+int qsnet_debug_num_lines = 8192; -+ -+int qsnet_assfail_mode = 1; /* default to BUG() */ -+ -+int qsnet_debug_running = 0; -+int kqsnet_debug_running = 0; -+ -+static spinlock_t qsnet_debug_lock; -+static kcondvar_t qsnet_debug_wait; -+static char qsnet_debug_buffer_space[8192]; -+ -+#define QSNET_DEBUG_PREFIX_MAX_SIZE 32 -+#define QSNET_DEBUG_MAX_WORDWRAP 15 -+ -+/* must be larger than QSNET_DEBUG_PREFIX_MAX_SIZE + QSNET_DEBUG_MAX_WORDWRAP + 2 */ -+#if defined(DIGITAL_UNIX) -+#define QSNET_DEBUG_CONSOLE_WIDTH 80 -+#elif defined(LINUX) -+#define QSNET_DEBUG_CONSOLE_WIDTH 128 -+#endif -+ -+#define isspace(CH) ((CH==' ') | (CH=='\t') | (CH=='\n')) -+ -+#ifdef LINUX -+#define ALLOC_DEBUG_BUFFER(ptr) do { (ptr) = (void *)__get_free_pages (GFP_KERNEL, get_order (qsnet_debug_num_lines * qsnet_debug_line_size)); } while (0) -+#define FREE_DEBUG_BUFFER(ptr) free_pages ((unsigned long) ptr, get_order (qsnet_debug_num_lines * qsnet_debug_line_size)) -+#else -+#define ALLOC_DEBUG_BUFFER(ptr) KMEM_ALLOC (ptr, caddr_t, qsnet_debug_num_lines * qsnet_debug_line_size, 1) -+#define FREE_DEBUG_BUFFER(ptr) KMEM_FREE (ptr, qsnet_debug_num_lines * qsnet_debug_line_size) -+#endif -+ -+void -+qsnet_debug_init () -+{ -+ spin_lock_init (&qsnet_debug_lock); -+ kcondvar_init (&qsnet_debug_wait); -+ -+ qsnet_debug_front = 0; -+ qsnet_debug_back = 0; -+ qsnet_debug_lost_lines = 0; -+ -+ if (qsnet_debug_line_size < (QSNET_DEBUG_PREFIX_MAX_SIZE + QSNET_DEBUG_MAX_WORDWRAP + 2)) -+ qsnet_debug_line_size = 256; -+ -+ qsnet_debug_running = 1; -+ -+ qsnet_proc_register_int (qsnet_procfs_config, "assfail_mode", &qsnet_assfail_mode, 0); -+} -+ -+void -+qsnet_debug_fini() -+{ -+ if (!qsnet_debug_running) return; -+ -+ remove_proc_entry ("assfail_mode", qsnet_procfs_config); -+ -+ spin_lock_destroy (&qsnet_debug_lock); -+ kcondvar_destroy (&qsnet_debug_wait); -+ -+ if (qsnet_debug_buffer_ptr) -+ FREE_DEBUG_BUFFER (qsnet_debug_buffer_ptr); -+ -+ qsnet_debug_buffer_ptr = NULL; -+ qsnet_debug_lost_lines = 0; -+ qsnet_debug_running = 0; -+} -+ -+void -+qsnet_debug_disable(int val) -+{ -+ qsnet_debug_disabled = val; -+} -+ -+void -+qsnet_debug_alloc() -+{ -+ caddr_t ptr; -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ ALLOC_DEBUG_BUFFER (ptr); -+ -+ if (ptr != NULL) -+ { -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ qsnet_debug_buffer_ptr = ptr; -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+ else -+ { -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ FREE_DEBUG_BUFFER (ptr); -+ } -+ } -+ } -+ -+} -+ -+static void -+qsnet_prefix_debug(unsigned int mode, char *prefix, char *buffer) -+{ -+ /* assumes caller has lock */ -+ -+ int prefixlen = strlen(prefix); -+ char pref[QSNET_DEBUG_PREFIX_MAX_SIZE]; -+ int prefix_done = 0; -+ -+ if (!qsnet_debug_running) return; -+ -+ if (qsnet_debug_disabled) -+ return; -+ -+ if (prefixlen >= QSNET_DEBUG_PREFIX_MAX_SIZE) -+ { -+ strncpy(pref,prefix,QSNET_DEBUG_PREFIX_MAX_SIZE -2); -+ strcpy (&pref[QSNET_DEBUG_PREFIX_MAX_SIZE-5],"... "); -+ -+ prefix = pref; -+ prefixlen = strlen(prefix); -+ } -+ -+#ifdef CONFIG_MPSAS -+ { -+ char *p; -+#define TRAP_PUTCHAR_B (0x17a - 256) -+#define SAS_PUTCHAR(c) do {\ -+ register int o0 asm ("o0") = (c);\ -+\ -+ asm volatile ("ta %0; nop" \ -+ : /* no outputs */\ -+ : /* inputs */ "i" (TRAP_PUTCHAR_B), "r" (o0)\ -+ : /* clobbered */ "o0");\ -+\ -+ if (o0 == '\n') {\ -+ o0 = '\r';\ -+\ -+ asm volatile ("ta %0; nop" \ -+ : /* no outputs */\ -+ : /* inputs */ "i" (TRAP_PUTCHAR_B), "r" (o0)\ -+ : /* clobbered */ "o0");\ -+ }\ -+ } while(0) -+ -+ for (p = prefix; *p; p++) -+ SAS_PUTCHAR (*p); -+ -+ for (p = buffer; *p; p++) -+ SAS_PUTCHAR (*p); -+ } -+#else -+ if (mode & QSNET_DEBUG_BUFFER) -+ { -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_lost_lines++; -+ else -+ { -+ caddr_t base = &qsnet_debug_buffer_ptr[qsnet_debug_line_size * qsnet_debug_back]; -+ caddr_t lim = base + qsnet_debug_line_size - 2; -+ caddr_t p; -+ -+ p = buffer; -+ prefix_done = 0; -+ while (*p) -+ { -+ /* sort out prefix */ -+ if ( prefix_done++ ) -+ { -+ int i; -+ for(i=0;i 0 ) -+ { -+ int i; -+ for(i=0;i remaining) len = remaining; -+ -+ strncpy(line, p, len); -+ line[len] = 0; -+ p += len; -+ -+ /* word wrap */ -+ if ((len == remaining) && *p && !isspace(*p)) -+ { -+ /* lets see if we can back track and find a white space to break on */ -+ char * ptr = &line[len-1]; -+ int count = 1; -+ -+ while ( ( !isspace(*ptr) ) && ( count < QSNET_DEBUG_MAX_WORDWRAP )) -+ { -+ count++; -+ ptr--; -+ } -+ -+ if ( isspace(*ptr) ) -+ { -+ /* found somewhere to wrap to */ -+ p -= (count-1); /* need to loose the white space */ -+ len -= count; -+ } -+ } -+ -+ if (line[len-1] != '\n' ) -+ { -+ line[len] = '\n'; -+ line[len+1] = 0; -+ } -+ -+ /* we put a \n in so dont need another one next */ -+ if ( *p == '\n') -+ p++; -+ -+#if defined(DIGITAL_UNIX) -+ { -+ char *pr; -+ -+ for (pr = pref; *pr; pr++) -+ cnputc (*pr); -+ -+ for (pr = line; *pr; pr++) -+ cnputc (*pr); -+ } -+#elif defined(LINUX) -+ printk("%s%s",pref,line); -+#endif -+ } -+ } -+#endif /* CONFIG_MPSAS */ -+} -+ -+void -+qsnet_vdebugf (unsigned int mode, char *prefix, char *fmt, va_list ap) -+{ -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ qsnet_debug_buffer_space[0] = '\0'; -+ -+#if defined(DIGITAL_UNIX) -+ prf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), NULL, fmt, ap); -+#elif defined(LINUX) -+ vsprintf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), fmt, ap); -+#endif -+ -+ if (prefix == NULL) -+ printk ("qsnet_vdebugf: prefix==NULL\n"); -+ else -+ qsnet_prefix_debug(mode, prefix, qsnet_debug_buffer_space); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+} -+ -+void kqsnet_debugf(char *fmt,...) -+{ -+ if ( kqsnet_debug_running ) { -+ va_list ap; -+ char string[20]; -+ -+ sprintf (string, "mm=%p:", current->mm); -+ va_start(ap, fmt); -+ qsnet_vdebugf(QSNET_DEBUG_BUFFER, string, fmt, ap); -+ va_end(ap); -+ } -+} -+void -+qsnet_debugf(unsigned int mode, char *fmt,...) -+{ -+ va_list ap; -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return; -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ qsnet_debug_buffer_space[0] = '\0'; -+ -+ va_start (ap, fmt); -+#if defined(DIGITAL_UNIX) -+ prf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), NULL, fmt, ap); -+#elif defined(LINUX) -+ vsprintf (qsnet_debug_buffer_space+strlen(qsnet_debug_buffer_space), fmt, ap); -+#endif -+ va_end (ap); -+ -+ qsnet_prefix_debug(mode, "", qsnet_debug_buffer_space); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+} -+ -+int -+qsnet_debug_buffer (caddr_t ubuffer, int len) -+{ -+ caddr_t buffer, ptr, base; -+ int remain, len1; -+ unsigned long flags; -+ static char qsnet_space[65536]; -+ -+ if (!qsnet_debug_running) return (0); -+ -+ if (len < qsnet_debug_line_size) -+ return (-1); -+ -+ if (len > (qsnet_debug_line_size * qsnet_debug_num_lines)) -+ len = qsnet_debug_line_size * qsnet_debug_num_lines; -+ -+ if ( len > 65536 ) { -+ KMEM_ZALLOC (buffer, caddr_t, len, 1); -+ } else -+ buffer = qsnet_space; -+ -+ if (buffer == NULL) -+ return (-1); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ { -+ if ( len > 65536 ) -+ KMEM_FREE (buffer, len); -+ return (-1); -+ } -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ while (!qsnet_debug_lost_lines && (qsnet_debug_back == qsnet_debug_front)) -+ if (kcondvar_waitsig (&qsnet_debug_wait, &qsnet_debug_lock, &flags) == 0) -+ break; -+ -+ ptr = buffer; -+ remain = len; -+ -+ if (qsnet_debug_lost_lines) -+ { -+ qsnet_debug_lost_lines = 0; -+ strcpy (ptr, "Debug Buffer has overflowed!!\n"); -+ len1 = strlen (ptr); -+ -+ remain -= len1; -+ ptr += len1; -+ } -+ -+ while (qsnet_debug_front != qsnet_debug_back) -+ { -+ /* copy the line from DebugFront */ -+ base = &qsnet_debug_buffer_ptr[qsnet_debug_front*qsnet_debug_line_size]; -+ -+ len1 = strlen (base); -+ -+ if (len1 > remain) -+ break; -+ -+ bcopy (base, ptr, len1); -+ -+ ptr += len1; -+ remain -= len1; -+ -+ qsnet_debug_front = (++qsnet_debug_front == qsnet_debug_num_lines) ? 0 : qsnet_debug_front; -+ } -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ len1 = ptr - buffer; -+ -+ if (len1 != 0 && copyout (buffer, ubuffer, len1)) -+ len1 = -1; -+ -+ if ( len > 65536 ) -+ KMEM_FREE (buffer, len); -+ -+ return (len1); -+} -+ -+void -+qsnet_debug_buffer_on() -+{ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+} -+ -+void -+qsnet_debug_buffer_clear() -+{ -+ unsigned long flags; -+ -+ qsnet_debug_buffer_on(); -+ -+ if (qsnet_debug_buffer_ptr != NULL){ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ qsnet_debug_front = 0; -+ qsnet_debug_back = 0; -+ qsnet_prefix_debug(QSNET_DEBUG_BUFFER,"Clear",""); -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+} -+ -+void -+qsnet_debug_buffer_mark(char *str) -+{ -+ unsigned long flags; -+ -+ qsnet_debug_buffer_on(); -+ -+ if (qsnet_debug_buffer_ptr != NULL) { -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ qsnet_prefix_debug(QSNET_DEBUG_BUFFER,"Mark",str); -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ } -+} -+int -+qsnet_debug_dump () -+{ -+ unsigned long flags; -+ -+ if (!qsnet_debug_running) return (0); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ qsnet_debug_alloc(); -+ -+ if (qsnet_debug_buffer_ptr == NULL) -+ return (-1); -+ -+ spin_lock_irqsave (&qsnet_debug_lock, flags); -+ -+ while (qsnet_debug_front != qsnet_debug_back) -+ { -+ printk ("%s", &qsnet_debug_buffer_ptr[qsnet_debug_front*qsnet_debug_line_size]); -+ -+ qsnet_debug_front = (++qsnet_debug_front == qsnet_debug_num_lines) ? 0 : qsnet_debug_front; -+ } -+ -+ if (qsnet_debug_lost_lines) -+ printk ("\n**** Debug buffer has lost %d lines\n****\n",qsnet_debug_lost_lines); -+ -+ spin_unlock_irqrestore (&qsnet_debug_lock, flags); -+ -+ return (0); -+} -+ -+int -+qsnet_debug_kmem (void *handle) -+{ -+ if (!qsnet_debug_running) return (0); -+ -+#ifdef KMEM_DEBUG -+ qsnet_kmem_display(handle); -+#endif -+ return (0); -+} -+ -+int -+qsnet_assfail (char *ex, const char *func, char *file, int line) -+{ -+ qsnet_debugf (QSNET_DEBUG_BUFFER, "qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ printk (KERN_EMERG "qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ -+ if (panicstr) -+ return (0); -+ -+ if (qsnet_assfail_mode & 1) /* return to BUG() */ -+ return 1; -+ -+ if (qsnet_assfail_mode & 2) -+ panic ("qsnet: assertion failure: %s, function: %s, file %s, line: %d\n", ex, func, file, line); -+ if (qsnet_assfail_mode & 4) -+ qsnet_debug_disable (1); -+ -+ return 0; -+ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/qsnet/i686_mmx.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/qsnet/i686_mmx.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/qsnet/i686_mmx.c 2005-07-28 14:52:52.917668248 -0400 -@@ -0,0 +1,99 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: i686_mmx.c,v 1.11 2004/01/05 12:08:25 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/i686_mmx.c,v $*/ -+ -+#include -+ -+#if defined(LINUX_I386) -+ -+#include -+#include -+#include -+#include -+ -+int mmx_disabled = 0; -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+/* These functions are lifted from arch/i386/kernel/i387.c -+ * and MUST be kept in step with the kernel (currently 2.4.17) -+ * alternatively we should export the kernel_fpu_begin() function -+ */ -+static inline void __save_init_fpu( struct task_struct *tsk ) -+{ -+ if ( cpu_has_fxsr ) { -+ asm volatile( "fxsave %0 ; fnclex" -+ : "=m" (tsk->thread.i387.fxsave) ); -+ } else { -+ asm volatile( "fnsave %0 ; fwait" -+ : "=m" (tsk->thread.i387.fsave) ); -+ } -+ tsk->flags &= ~PF_USEDFPU; -+} -+#if defined(MODULE) -+void kernel_fpu_begin(void) -+{ -+ struct task_struct *tsk = current; -+ -+ if (tsk->flags & PF_USEDFPU) { -+ __save_init_fpu(tsk); -+ return; -+ } -+ clts(); -+} -+#endif -+#endif -+ -+extern inline int -+mmx_preamble(void) -+{ -+ if (mmx_disabled || in_interrupt()) -+ return (0); -+ -+ kernel_fpu_begin(); -+ -+ return (1); -+} -+ -+extern inline void -+mmx_postamble(void) -+{ -+ kernel_fpu_end(); -+} -+ -+extern u64 -+qsnet_readq (volatile u64 *ptr) -+{ -+ u64 value; -+ -+ if (! mmx_preamble()) -+ value = *ptr; -+ else -+ { -+ asm volatile ("movq (%0), %%mm0\n" -+ "movq %%mm0, (%1)\n" -+ : : "r" (ptr), "r" (&value) : "memory"); -+ mmx_postamble(); -+ } -+ return (value); -+} -+ -+void -+qsnet_writeq(u64 value, volatile u64 *ptr) -+{ -+ if (! mmx_preamble()) -+ *ptr = value; -+ else -+ { -+ asm volatile ("movq (%0), %%mm0\n" -+ "movq %%mm0, (%1)\n" -+ : : "r" (&value), "r" (ptr) : "memory"); -+ mmx_postamble(); -+ } -+} -+#endif -Index: linux-2.6.5-7.191/drivers/net/qsnet/qsnet/kernel_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/qsnet/kernel_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/qsnet/kernel_linux.c 2005-07-28 14:52:52.919667944 -0400 -@@ -0,0 +1,856 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: kernel_linux.c,v 1.71.2.3 2004/11/04 11:03:47 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel_linux.c,v $*/ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include /* for smp_call_function() prototype */ -+#include -+#include -+ -+#include -+ -+extern int mmx_disabled; -+extern int qsnet_debug_line_size; -+extern int qsnet_debug_num_lines; -+ -+gid_t qsnet_procfs_gid; -+struct proc_dir_entry *qsnet_procfs_root; -+struct proc_dir_entry *qsnet_procfs_config; -+ -+MODULE_AUTHOR("Quadrics Ltd."); -+MODULE_DESCRIPTION("QsNet Kernel support code"); -+ -+MODULE_LICENSE("GPL"); -+ -+#if defined(LINUX_I386) -+MODULE_PARM(mmx_disabled, "i"); -+#endif -+ -+MODULE_PARM(qsnet_debug_line_size, "i"); -+MODULE_PARM(qsnet_debug_num_lines, "i"); -+ -+MODULE_PARM(qsnet_procfs_gid, "i"); -+ -+#ifdef KMEM_DEBUG -+EXPORT_SYMBOL(qsnet_kmem_alloc_debug); -+EXPORT_SYMBOL(qsnet_kmem_free_debug); -+#else -+EXPORT_SYMBOL(qsnet_kmem_alloc); -+EXPORT_SYMBOL(qsnet_kmem_free); -+#endif -+ -+EXPORT_SYMBOL(qsnet_kmem_display); -+EXPORT_SYMBOL(kmem_to_phys); -+ -+EXPORT_SYMBOL(cpu_hold_all); -+EXPORT_SYMBOL(cpu_release_all); -+ -+#if defined(LINUX_I386) -+EXPORT_SYMBOL(qsnet_readq); -+EXPORT_SYMBOL(qsnet_writeq); -+#endif -+ -+/* debug.c */ -+EXPORT_SYMBOL(qsnet_debugf); -+EXPORT_SYMBOL(kqsnet_debugf); -+EXPORT_SYMBOL(qsnet_vdebugf); -+EXPORT_SYMBOL(qsnet_debug_buffer); -+EXPORT_SYMBOL(qsnet_debug_alloc); -+EXPORT_SYMBOL(qsnet_debug_dump); -+EXPORT_SYMBOL(qsnet_debug_kmem); -+EXPORT_SYMBOL(qsnet_debug_disable); -+ -+EXPORT_SYMBOL(qsnet_assfail); -+ -+EXPORT_SYMBOL(qsnet_procfs_gid); -+EXPORT_SYMBOL(qsnet_procfs_root); -+ -+static int qsnet_open (struct inode *ino, struct file *fp); -+static int qsnet_release (struct inode *ino, struct file *fp); -+static int qsnet_ioctl (struct inode *ino, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static struct file_operations qsnet_ioctl_fops = -+{ -+ ioctl: qsnet_ioctl, -+ open: qsnet_open, -+ release: qsnet_release, -+}; -+ -+static int -+qsnet_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+qsnet_release (struct inode *inode, struct file *fp) -+{ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+qsnet_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int res=0; -+ -+ switch (cmd) -+ { -+ case QSNETIO_DEBUG_KMEM: -+ { -+ QSNETIO_DEBUG_KMEM_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (QSNETIO_DEBUG_KMEM_STRUCT))) -+ return (-EFAULT); -+ -+ /* doesnt use handle as a pointer */ -+ qsnet_kmem_display(args.handle); -+ break; -+ } -+ -+ case QSNETIO_DEBUG_DUMP : -+ { -+ res = qsnet_debug_dump(); -+ break; -+ } -+ -+ case QSNETIO_DEBUG_BUFFER : -+ { -+ QSNETIO_DEBUG_BUFFER_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (QSNETIO_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ -+ /* qsnet_debug_buffer uses copyout */ -+ if ((res = qsnet_debug_buffer (args.addr, args.len)) != -1) -+ { -+ args.len = res; -+ if (copy_to_user ((void *) arg, &args, sizeof (QSNETIO_DEBUG_BUFFER_STRUCT))) -+ return (-EFAULT); -+ res = 0; -+ } -+ break; -+ } -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+ -+#ifdef KMEM_DEBUG -+static int qsnet_kmem_open (struct inode *ino, struct file *fp); -+static int qsnet_kmem_release (struct inode *ino, struct file *fp); -+static ssize_t qsnet_kmem_read (struct file *file, char *buf, size_t count, loff_t *ppos); -+ -+static struct file_operations qsnet_kmem_fops = -+{ -+ open: qsnet_kmem_open, -+ release: qsnet_kmem_release, -+ read: qsnet_kmem_read, -+}; -+ -+typedef struct qsnet_private_space -+{ -+ char * space; -+ int size; -+ struct qsnet_private_space *next; -+} QSNET_PRIVATE_SPACE; -+ -+typedef struct qsnet_private -+{ -+ QSNET_PRIVATE_SPACE *space_chain; -+ QSNET_PRIVATE_SPACE *current_space; -+ int current_pos; -+ -+} QSNET_PRIVATE; -+ -+#define QSNET_KMEM_DEBUG_LINE_SIZE ((int)512) -+#define QSNET_PRIVATE_PAGE_SIZE ((int)(4*1024)) -+ -+static int qsnet_kmem_fill(QSNET_PRIVATE *pd); -+ -+void -+destroy_chain(QSNET_PRIVATE * pd) -+{ -+ QSNET_PRIVATE_SPACE *mem, *next; -+ -+ if (pd == NULL) return; -+ -+ for(mem = pd->space_chain ; mem != NULL; ) -+ { -+ next = mem->next; -+ if ( mem->space ) -+ kfree ( mem->space); -+ kfree(mem); -+ mem = next; -+ } -+ kfree (pd); -+} -+ -+QSNET_PRIVATE * -+make_chain(int len) -+{ -+ QSNET_PRIVATE * pd; -+ QSNET_PRIVATE_SPACE * mem; -+ int i; -+ -+ /* make the private data block */ -+ if ((pd = kmalloc (sizeof (QSNET_PRIVATE), GFP_KERNEL)) == NULL) -+ return NULL; -+ pd->space_chain = NULL; -+ -+ /* first make the holders */ -+ for(i=0;inext = pd->space_chain; -+ mem->size = 0; -+ mem->space = 0; -+ pd->space_chain = mem; -+ -+ /* now add the space */ -+ if ((mem->space = kmalloc (QSNET_PRIVATE_PAGE_SIZE, GFP_KERNEL)) == NULL) -+ { -+ destroy_chain(pd); -+ return (NULL); -+ } -+ -+ mem->space[0] = 0; -+ -+ } -+ -+ pd->current_space = pd->space_chain; -+ pd->current_pos = 0; -+ -+ return pd; -+} -+ -+static int -+qsnet_kmem_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ return (0); -+} -+ -+static int -+qsnet_kmem_release (struct inode *inode, struct file *fp) -+{ -+ if ( fp->private_data ) -+ { -+ QSNET_PRIVATE * pd = (QSNET_PRIVATE *) fp->private_data; -+ -+ /* free the space */ -+ if (pd->space_chain) -+ kfree (pd->space_chain); -+ -+ /* free struct */ -+ kfree (pd); -+ } -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static ssize_t -+qsnet_kmem_read (struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ QSNET_PRIVATE * pd = (QSNET_PRIVATE *) file->private_data; -+ int error; -+ int output_count; -+ int num_of_links=10; -+ -+ /* make a buffer to output count bytes in */ -+ if ((error = verify_area (VERIFY_WRITE, buf, count)) != 0) -+ return (error); -+ -+ if ( pd == NULL) -+ { -+ /* first time */ -+ -+ /* ok we have to guess at how much space we are going to need */ -+ /* if it fails we up the space and carry try again */ -+ /* we have to do it this way as we cant get more memory whilst */ -+ /* holding the lock */ -+ if ((pd = make_chain(num_of_links)) == NULL) -+ return (-ENOMEM); -+ -+ while ( qsnet_kmem_fill(pd) ) -+ { -+ destroy_chain(pd); -+ num_of_links += 10; -+ if ((pd = make_chain(num_of_links)) == NULL) -+ return (-ENOMEM); -+ } -+ -+ /* we have the space and filled it */ -+ file->private_data = (void *)pd; -+ } -+ -+ /* output buffer */ -+ if ( pd->current_pos >= pd->current_space->size ) -+ return (0); /* finished */ -+ -+ output_count = pd->current_space->size - pd->current_pos; -+ if ( output_count > count ) -+ output_count = count; -+ -+ copy_to_user(buf, (pd->current_space->space + pd->current_pos), output_count); -+ -+ pd->current_pos += output_count; -+ ppos += output_count; -+ -+ /* just check to see if we have finished the current space */ -+ if ( pd->current_pos >= pd->current_space->size ) -+ { -+ if ( pd->current_space->next ) -+ { -+ pd->current_space = pd->current_space->next; -+ pd->current_pos = 0; -+ } -+ } -+ -+ return (output_count); -+} -+#endif /* KMEM_DEBUG */ -+ -+static int -+proc_write_qsnetdebug(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char tmpbuf[128]; -+ int res; -+ -+ if (count > sizeof (tmpbuf)-1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ if (copy_from_user (tmpbuf, buffer, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ -+ if (tmpbuf[count-1] == '\n') -+ tmpbuf[count-1] = '\0'; -+ -+ if (! strcmp (tmpbuf, "on")) -+ qsnet_debug_buffer_on(); -+ -+ if (! strcmp (tmpbuf, "clear")) -+ qsnet_debug_buffer_clear(); -+ -+ if (! strncmp (tmpbuf, "mark",4)) -+ qsnet_debug_buffer_mark( &tmpbuf[4] ); -+ -+ res = count; -+ } -+ -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static int -+proc_read_qsnetdebug(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len = sprintf (page, "echo command > /proc/qsnet/config/qsnetdebug\ncommand = on | off | clear | mark text\n"); -+ return (qsnet_proc_calc_metrics (page, start, off, count, eof, len)); -+} -+ -+#include "quadrics_version.h" -+extern int kqsnet_debug_running; -+static char quadrics_version[] = QUADRICS_VERSION; -+ -+static int __init qsnet_init(void) -+{ -+ struct proc_dir_entry *p; -+ -+ if ((qsnet_procfs_root = proc_mkdir ("qsnet", 0)) == NULL) -+ { -+ printk ("qsnet: failed to create /proc/qsnet \n"); -+ return (-ENXIO); -+ } -+ -+ if ((p = create_proc_entry ("ioctl", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_root)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/ioctl\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &qsnet_ioctl_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+ -+ qsnet_proc_register_str (qsnet_procfs_root, "version", quadrics_version, S_IRUGO); -+ -+ if ((qsnet_procfs_config = proc_mkdir ("config", qsnet_procfs_root)) == NULL) -+ { -+ printk ("qsnet: failed to create /proc/qsnet/config \n"); -+ return (-ENXIO); -+ } -+ -+#ifdef KMEM_DEBUG -+ if ((p = create_proc_entry ("kmem_debug", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_config)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/config/kmem_debug\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &qsnet_kmem_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+#endif -+ -+ qsnet_debug_init(); -+ -+ qsnet_proc_register_int (qsnet_procfs_config, "kqsnet_debug_running", &kqsnet_debug_running, 0); -+ -+ if ((p = create_proc_entry ("qsnetdebug", S_IRUGO|S_IWUSR|S_IWGRP, qsnet_procfs_config)) == NULL) -+ { -+ printk ("qsnet: failed to register /proc/qsnet/config/qsnetdebug\n"); -+ return (-ENXIO); -+ } -+ p->read_proc = proc_read_qsnetdebug; -+ p->write_proc = proc_write_qsnetdebug; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->gid = qsnet_procfs_gid; -+ -+ return (0); -+} -+ -+static void __exit qsnet_exit(void) -+{ -+#ifdef KMEM_DEBUG -+ qsnet_kmem_display(0); -+#endif -+ qsnet_debug_fini(); -+ -+ remove_proc_entry ("qsnetdebug", qsnet_procfs_config); -+ remove_proc_entry ("kqsnet_debug_running", qsnet_procfs_config); -+#ifdef KMEM_DEBUG -+ remove_proc_entry ("kmem_debug", qsnet_procfs_config); -+#endif -+ remove_proc_entry ("config", qsnet_procfs_root); -+ -+ remove_proc_entry ("version", qsnet_procfs_root); -+ remove_proc_entry ("ioctl", qsnet_procfs_root); -+ -+ remove_proc_entry ("qsnet", 0); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(qsnet_init); -+module_exit(qsnet_exit); -+ -+#ifdef KMEM_DEBUG -+/* -+ * Kernel memory allocation. We maintain our own list of allocated mem -+ * segments so we can free them on module cleanup. -+ * -+ * We use kmalloc for allocations less than one page in size; vmalloc for -+ * larger sizes. -+ */ -+ -+typedef struct { -+ struct list_head list; -+ void *ptr; -+ int len; -+ int used_vmalloc; -+ void *owner; -+ void *caller; -+ unsigned int time; -+ int line; -+ char filename[20]; -+} kmalloc_t; -+ -+static LIST_HEAD(kmalloc_head); -+ -+static spinlock_t kmalloc_lock = SPIN_LOCK_UNLOCKED; -+ -+/* -+ * Kernel memory allocation. We use kmalloc for allocations less -+ * than one page in size; vmalloc for larger sizes. -+ */ -+ -+static int -+qsnet_kmem_fill(QSNET_PRIVATE *pd) -+{ -+ kmalloc_t *kp; -+ struct list_head *lp; -+ unsigned long flags; -+ char str[QSNET_KMEM_DEBUG_LINE_SIZE]; -+ QSNET_PRIVATE_SPACE * current_space; -+ int current_pos; -+ int len; -+ current_space = pd->space_chain; -+ current_pos = 0; -+ -+ -+ current_space->space[0] = 0; -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ -+ /* make the next line */ -+ sprintf(str,"%p %d %d %p %p %u %d %s\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->time, kp->line, kp->filename); -+ len = strlen(str); -+ -+ /* does it fit on the current page */ -+ if ( (current_pos + len + 1) >= QSNET_PRIVATE_PAGE_SIZE) -+ { -+ /* move onto next page */ -+ if ((current_space = current_space->next) == NULL) -+ { -+ /* run out of space !!!! */ -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ return (1); -+ } -+ current_space->space[0] = 0; -+ current_pos = 0; -+ } -+ strcat( current_space->space + current_pos, str); -+ current_pos += len; -+ -+ /* remember how much we wrote to this page */ -+ current_space->size = current_pos; -+ -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ return (0); -+} -+ -+void * -+qsnet_kmem_alloc_debug(int len, int cansleep, int zerofill, char *file, int line) -+{ -+ void *new; -+ unsigned long flags; -+ kmalloc_t *kp; -+ -+ if (len < PAGE_SIZE || !cansleep) -+ new = kmalloc(len, cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ else -+ new = vmalloc(len); -+ -+ if (len >= PAGE_SIZE) -+ ASSERT(PAGE_ALIGNED((uintptr_t) new)); -+ -+ if (new && zerofill) -+ memset(new,0,len); -+ -+ /* record allocation */ -+ kp = kmalloc(sizeof(kmalloc_t), cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ ASSERT(kp != NULL); -+ kp->len = len; -+ kp->ptr = new; -+ kp->used_vmalloc = (len >= PAGE_SIZE || cansleep); -+ kp->owner = current; -+ kp->caller = __builtin_return_address(0); -+ kp->time = lbolt; -+ kp->line = line; -+ len = strlen(file); -+ -+ if (len > 18) -+ strcpy(kp->filename,&file[len-18]); -+ else -+ strcpy(kp->filename,file); -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ list_add(&kp->list, &kmalloc_head); -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ return new; -+} -+ -+void -+qsnet_kmem_free_debug(void *ptr, int len, char *file, int line) -+{ -+ unsigned long flags; -+ kmalloc_t *kp; -+ struct list_head *lp; -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ if (kp->ptr == ptr) { -+ if (kp->len != len) -+ printk("qsnet_kmem_free_debug(%p) ptr %p len %d mismatch: expected %d caller %p owner %p (%s:%d)\n", -+ current, ptr, len, kp->len, __builtin_return_address(0), kp->caller, file, line); -+ list_del(lp); -+ kfree(kp); /* free off descriptor */ -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ if (lp == &kmalloc_head) /* segment must be found */ -+ { -+ printk( "qsnet_kmem_free_debug(%p) ptr %p len %d not found: caller %p (%s:%d)\n", -+ current, ptr, len, __builtin_return_address(0), file, line); -+ } -+ -+ if ((((unsigned long) ptr) >= VMALLOC_START && ((unsigned long) ptr) < VMALLOC_END)) -+ vfree (ptr); -+ else -+ kfree (ptr); -+} -+ -+#else /* !KMEM_DEBUG */ -+ -+void * -+qsnet_kmem_alloc(int len, int cansleep, int zerofill) -+{ -+ void *new; -+ -+ if (len < PAGE_SIZE || !cansleep) -+ new = kmalloc(len, cansleep ? GFP_KERNEL : GFP_ATOMIC); -+ else -+ new = vmalloc(len); -+ -+ if (len >= PAGE_SIZE) -+ ASSERT(PAGE_ALIGNED((unsigned long) new)); -+ -+ if (new && zerofill) -+ memset(new,0,len); -+ -+ return new; -+} -+ -+void -+qsnet_kmem_free(void *ptr, int len) -+{ -+ if ((((unsigned long) ptr) >= VMALLOC_START && ((unsigned long) ptr) < VMALLOC_END)) -+ vfree (ptr); -+ else -+ kfree (ptr); -+} -+#endif /* !KMEM_DEBUG */ -+ -+void -+qsnet_kmem_display(void *handle) -+{ -+#ifdef KMEM_DEBUG -+ kmalloc_t *kp; -+ struct list_head *lp; -+ unsigned long flags; -+ int count = 0, totsize = 0; -+ -+ spin_lock_irqsave(&kmalloc_lock, flags); -+ for (lp = kmalloc_head.next; lp != &kmalloc_head; lp = lp->next) { -+ kp = list_entry(lp, kmalloc_t, list); -+ -+ if (!handle || handle == kp->owner) -+ { -+ printk("qsnet_kmem_display(%p): mem %p len %d unfreed caller %p (%p) \n", -+ handle, kp->ptr, kp->len, kp->caller, kp->owner); -+ -+ count++; -+ totsize += kp->len; -+ } -+ } -+ spin_unlock_irqrestore(&kmalloc_lock, flags); -+ -+ printk("qsnet_kmem_display(%p): %d bytes left in %d objects\n", handle, totsize, count); -+#endif -+} -+ -+physaddr_t -+kmem_to_phys(void *ptr) -+{ -+ virtaddr_t virt = (virtaddr_t) ptr; -+ physaddr_t phys; -+ pte_t *pte; -+ -+ if ((virt >= VMALLOC_START && virt < VMALLOC_END)) -+ { -+ pte = find_pte_kernel(virt); -+ ASSERT(pte && !pte_none(*pte)); -+ phys = pte_phys(*pte) + (virt & (PAGE_SIZE-1)); -+ } -+#if defined(PKMAP_BASE) -+ else if (virt >= PKMAP_BASE && virt < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) -+ { -+ pte = find_pte_kernel(virt); -+ ASSERT(pte && !pte_none(*pte)); -+ phys = pte_phys(*pte) + (virt & (PAGE_SIZE-1)); -+ } -+#endif -+#if defined(__ia64) -+ else if (virt >= __IA64_UNCACHED_OFFSET && virt < PAGE_OFFSET) -+ { -+ /* ia64 non-cached KSEG */ -+ phys = ((physaddr_t) ptr - __IA64_UNCACHED_OFFSET); -+ } -+#endif -+ else /* otherwise it's KSEG */ -+ { -+ phys = __pa(virt); -+ } -+ -+#if defined(CONFIG_ALPHA_GENERIC) || (defined(CONFIG_ALPHA_EV6) && !defined(USE_48_BIT_KSEG)) -+ /* -+ * with TS_BIAS as bit 40 - the tsunami pci space is mapped into -+ * the kernel at 0xfffff500.00000000 however we need to convert -+ * this to the true physical address 0x00000800.00000000. -+ * -+ * there is no need for PHYS_TWIDDLE since we knew we'd get a kernel -+ * virtual address already and handled this with __pa(). -+ */ -+ if (phys & (1ul << 40)) { -+ phys &= ~(1ul << 40); /* clear bit 40 (kseg I/O select) */ -+ phys |= (1ul << 43); /* set bit 43 (phys I/O select) */ -+ } -+#endif -+ return phys; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ -+EXPORT_SYMBOL(pci_resource_size); -+EXPORT_SYMBOL(pci_get_base_address); -+EXPORT_SYMBOL(pci_base_to_kseg); -+ -+ -+/* -+ * PCI stuff. -+ * -+ * XXX pci_base_to_kseg() and pci_kseg_to_phys() are problematic -+ * in that they may not work on non-Tsunami (DS20, ES40, etc) -+ * architectures, and may not work in non-zero PCI bus numbers. -+ */ -+ -+unsigned long -+pci_get_base_address(struct pci_dev *pdev, int index) -+{ -+ unsigned long base; -+ -+ ASSERT(index >= 0 && index <= 5); -+ /* borrowed in part from drivers/scsi/sym53c8xx.c */ -+ base = pdev->base_address[index++]; -+ -+#if BITS_PER_LONG > 32 -+ if ((base & 0x7) == 0x4) -+ base |= (((unsigned long)pdev->base_address[index]) << 32); -+#endif -+ return base; -+} -+ -+unsigned long -+pci_resource_size(struct pci_dev *pdev, int index) -+{ -+ u32 addr, mask, size; -+ -+ static u32 bar_addr[] = { -+ PCI_BASE_ADDRESS_0, -+ PCI_BASE_ADDRESS_1, -+ PCI_BASE_ADDRESS_2, -+ PCI_BASE_ADDRESS_3, -+ PCI_BASE_ADDRESS_4, -+ PCI_BASE_ADDRESS_5, -+ }; -+ ASSERT(index >= 0 && index <= 5); -+ -+ /* algorithm from Rubini book */ -+ pci_read_config_dword (pdev, bar_addr[index], &addr); -+ pci_write_config_dword(pdev, bar_addr[index], ~0); -+ pci_read_config_dword (pdev, bar_addr[index], &mask); -+ pci_write_config_dword(pdev, bar_addr[index], addr); -+ -+ mask &= PCI_BASE_ADDRESS_MEM_MASK; -+ size = ~mask + 1; -+ return size; -+} -+ -+/* -+ * Convert BAR register value to KSEG address. -+ */ -+void * -+pci_base_to_kseg(u64 baddr, int bus) -+{ -+ u64 kseg; -+ -+ /* XXX tsunami specific */ -+ baddr &= ~(u64)0x100000000; /* mask out hose bit */ -+ kseg = TSUNAMI_MEM(bus) + baddr; -+ return (void *)kseg; -+} -+ -+#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,0) */ -+ -+/* -+ * Spin the other CPU's in an SMP system. -+ * smp_call_function() needed to be exported to modules. It will be -+ * papered over in if running on a non-SMP box. -+ */ -+static spinlock_t hold_lock = SPIN_LOCK_UNLOCKED; -+ -+#if 0 -+static void cpu_hold(void *unused) -+{ -+ spin_lock(&hold_lock); -+ spin_unlock(&hold_lock); -+} -+#endif -+ -+void cpu_hold_all(void) -+{ -+ spin_lock(&hold_lock); -+ -+#if 0 -+ { -+ int res; -+ int retries = 10; -+ -+ /* XXXXX: cannot call smp_call_function() from interrupt context */ -+ -+ do { -+ /* only request blocking retry if not in interrupt context */ -+ res = smp_call_function(cpu_hold, NULL, !in_interrupt(), 0); -+ if (res) -+ mdelay(5); -+ } while (res && retries--); -+ -+ if (res) -+ printk("cpu_hold_all: IPI timeout\n"); -+ } -+#endif -+} -+ -+void cpu_release_all(void) -+{ -+ spin_unlock(&hold_lock); -+} -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/qsnet/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/qsnet/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/qsnet/Makefile 2005-07-28 14:52:52.919667944 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/qsnet/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_QSNET) += qsnet.o -+qsnet-objs := debug.o kernel_linux.o i686_mmx.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/qsnet/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/qsnet/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/qsnet/Makefile.conf 2005-07-28 14:52:52.919667944 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = qsnet.o -+MODULENAME = qsnet -+KOBJFILES = debug.o kernel_linux.o i686_mmx.o -+EXPORT_KOBJS = kernel_linux.o -+CONFIG_NAME = CONFIG_QSNET -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/qsnet/qsnetkmem_linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/qsnet/qsnetkmem_linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/qsnet/qsnetkmem_linux.c 2005-07-28 14:52:52.920667792 -0400 -@@ -0,0 +1,325 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: qsnetkmem_linux.c,v 1.3 2003/08/13 10:03:27 fabien Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/qsnetkmem_linux.c,v $*/ -+ -+/* macro macros */ -+#define MACRO_BEGIN do { -+#define MACRO_END } while (0) -+#define offsetof(T,F) ((int )&(((T *)0)->F)) -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define LIST_HEAD_INIT(name) { &(name), &(name) } -+ -+#define LIST_HEAD(name) \ -+ struct list_head name = LIST_HEAD_INIT(name) -+ -+typedef struct { -+ struct list_head list; -+ void *ptr; -+ int len; -+ int used_vmalloc; -+ void *owner; -+ void *caller; -+ unsigned int time; -+ int mark; -+ int line; -+ char file[256]; -+ -+} kmalloc_t; -+ -+ -+static LIST_HEAD(current_kmem); -+static LIST_HEAD(stored_kmem); -+ -+void -+count_kmem(struct list_head * list, long * count, long * size ) -+{ -+ long c,s; -+ struct list_head *tmp; -+ kmalloc_t *kmem_ptr = NULL; -+ -+ -+ c = s = 0L; -+ -+ list_for_each(tmp, list) { -+ kmem_ptr = list_entry(tmp, kmalloc_t , list); -+ c++; -+ s += kmem_ptr->len; -+ } -+ -+ *count = c; -+ *size = s; -+} -+ -+void -+clear_kmem(struct list_head * list) -+{ -+ struct list_head *tmp,*tmp2; -+ kmalloc_t *kmem_ptr = NULL; -+ -+ list_for_each_safe(tmp, tmp2, list) { -+ kmem_ptr = list_entry(tmp, kmalloc_t , list); -+ list_del_init(&kmem_ptr->list); -+ free( kmem_ptr ); -+ } -+} -+ -+void -+move_kmem(struct list_head * dest, struct list_head *src) -+{ -+ struct list_head *tmp,*tmp2; -+ kmalloc_t *kp= NULL; -+ -+ list_for_each_safe(tmp, tmp2, src) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ list_del_init(&kp->list); -+ -+/* -+ printf("mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+*/ -+ -+ list_add_tail(&kp->list, dest); -+ } -+} -+ -+void -+read_kmem(struct list_head * list) -+{ -+ FILE * fd; -+ char line[1024]; -+ int line_size = 100; -+ char * rep; -+ kmalloc_t * kp; -+ -+ clear_kmem(list); -+ -+ fd = fopen(QSNET_PROCFS_KMEM_DEBUG,"r"); -+ if ( fd == NULL) -+ { -+ printf("No Kmem Debug\n"); -+ return; -+ } -+ -+ rep = fgets(line,line_size, fd); -+ -+ while ( rep != NULL ) -+ { -+ kp = malloc(sizeof(kmalloc_t)); -+ -+ sscanf(line,"%p %d %d %p %p %u %d %s\n", -+ &kp->ptr, &kp->len, &kp->used_vmalloc, &kp->caller, &kp->owner, &kp->time, &kp->line, &kp->file[0]); -+ -+/* -+ printf(">>%s<<\n",line); -+ printf("%p %d %d %p %p %u %d %s\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->time, kp->line, kp->file); -+*/ -+ -+ list_add_tail(&kp->list, list); -+ -+ rep = fgets(line,line_size, fd); -+ } -+ fclose(fd); -+} -+ -+void -+mark_kmem(struct list_head * list, int mark) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ -+ kp->mark = mark; -+ } -+} -+ -+kmalloc_t * -+find_kmem(kmalloc_t * value, struct list_head * list) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if ( (kp->ptr == value->ptr) -+ && (kp->len == value->len) -+ && (kp->used_vmalloc == value->used_vmalloc ) -+ && (kp->owner == value->owner ) -+ && (kp->caller == value->caller ) -+ && (kp->time == value->time ) -+ && (kp->line == value->line ) -+ && !(strcmp(kp->file,value->file) )) -+ return kp; -+ } -+ return NULL; -+} -+ -+void -+diff_kmem(struct list_head *curr, struct list_head *stored) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ long c,s; -+ -+ mark_kmem(stored, 0); -+ mark_kmem(curr, 0); -+ -+ list_for_each(tmp, stored) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (find_kmem( kp, curr) != NULL) -+ kp->mark = 1; -+ } -+ -+ list_for_each(tmp, curr) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (find_kmem( kp, stored) != NULL) -+ kp->mark = 1; -+ } -+ -+ c=s=0L; -+ list_for_each(tmp, stored) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (kp->mark != 1) -+ { -+ printf("-- mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ c++; -+ s+= kp->len; -+ } -+ } -+ printf("-- %4ld %10ld \n",c,s); -+ -+ c=s=0L; -+ list_for_each(tmp, curr) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ if (kp->mark != 1) -+ { -+ printf("++ mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ c++; -+ s+= kp->len; -+ } -+ } -+ printf("++ %4ld %10ld \n",c,s); -+} -+ -+ -+void -+print_kmem(struct list_head * list) -+{ -+ struct list_head *tmp; -+ kmalloc_t *kp = NULL; -+ -+ list_for_each(tmp, list) { -+ kp = list_entry(tmp, kmalloc_t , list); -+ -+ printf("mem %p len %d (vm=%d) caller %p owner %p (%s:%d)\n", -+ kp->ptr, kp->len, kp->used_vmalloc, kp->caller, kp->owner, kp->file, kp->line); -+ -+ } -+} -+ -+void -+print_cmds() -+{ -+ long c,s; -+ -+ printf("q : quits \n"); -+ printf("r : read\n"); -+ printf("c : print current\n"); -+ printf("o : print stored\n"); -+ printf("s : store\n"); -+ -+ count_kmem(¤t_kmem, &c, &s ); -+ printf("\ncurrent : %4ld %10ld\n", c , s); -+ -+ count_kmem(&stored_kmem, &c, &s ); -+ printf("store : %4ld %10ld\n", c , s); -+ -+} -+ -+int -+main() -+{ -+ char line[128]; -+ int line_size=127; -+ int len; -+ -+ -+ while (1) -+ { -+ -+ printf(">> "); -+ fgets(line,line_size, stdin); -+ -+ -+ len = strlen( line ) -1; -+ if ( len ) -+ { -+ switch ( tolower(line[0]) ) -+ { -+ case 'q': -+ exit(0); -+ -+ case 'r' : -+ read_kmem(¤t_kmem); -+ break; -+ -+ case 'c' : -+ print_kmem(¤t_kmem); -+ break; -+ -+ case 'o' : -+ print_kmem(&stored_kmem); -+ break; -+ -+ case 's' : -+ clear_kmem(&stored_kmem); -+ move_kmem(&stored_kmem, ¤t_kmem); -+ break; -+ -+ case 'd' : -+ diff_kmem(¤t_kmem, &stored_kmem); -+ break; -+ -+ default: -+ print_cmds(); -+ } -+ -+ -+ -+ } -+ else -+ print_cmds(); -+ } -+ -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/qsnet/qsnet/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/qsnet/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/qsnet/quadrics_version.h 2005-07-28 14:52:52.920667792 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/rms/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/rms/Makefile 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/rms/Makefile 2005-07-28 14:52:52.920667792 -0400 -@@ -0,0 +1,15 @@ -+# -+# Makefile for Quadrics QsNet -+# -+# Copyright (c) 2002-2004 Quadrics Ltd -+# -+# File: drivers/net/qsnet/rms/Makefile -+# -+ -+ -+# -+ -+obj-$(CONFIG_RMS) += rms.o -+rms-objs := rms_kern.o rms_kern_Linux.o -+ -+EXTRA_CFLAGS += -DDEBUG -DDEBUG_PRINTF -DDEBUG_ASSERT -Index: linux-2.6.5-7.191/drivers/net/qsnet/rms/Makefile.conf -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/rms/Makefile.conf 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/rms/Makefile.conf 2005-07-28 14:52:52.921667640 -0400 -@@ -0,0 +1,10 @@ -+# Flags for generating QsNet Linux Kernel Makefiles -+MODNAME = rms.o -+MODULENAME = rms -+KOBJFILES = rms_kern.o rms_kern_Linux.o -+EXPORT_KOBJS = -+CONFIG_NAME = CONFIG_RMS -+SGALFC = -+# EXTRALINES START -+ -+# EXTRALINES END -Index: linux-2.6.5-7.191/drivers/net/qsnet/rms/quadrics_version.h -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/rms/quadrics_version.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/rms/quadrics_version.h 2005-07-28 14:52:52.921667640 -0400 -@@ -0,0 +1 @@ -+#define QUADRICS_VERSION "4.31qsnet" -Index: linux-2.6.5-7.191/drivers/net/qsnet/rms/rms_kern.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/rms/rms_kern.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/rms/rms_kern.c 2005-07-28 14:52:52.923667336 -0400 -@@ -0,0 +1,1757 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * rms_kern.c: RMS kernel module -+ * -+ * $Source: /cvs/master/quadrics/rmsmod/rms_kern.c,v $ -+ */ -+ -+#ident "@(#)$Id: rms_kern.c,v 1.62.2.4 2005/01/18 11:05:45 duncan Exp $" -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * extend stats added in version 5 -+ */ -+#define RMS_MODVERSION 5 -+ -+#if defined(SOLARIS) -+ -+#define CURUID() CURPROC()->p_cred->cr_uid -+#define RMS_NCPUS() 4 -+#define PROC_STRUCT proc -+ -+#include -+ -+#elif defined(LINUX) -+ -+#ifdef PROCESS_ACCT -+#define TIMEVAL_TO_MSEC(tv) ((tv)->tv_sec * 1000 + (tv)->tv_usec / 1000) -+#define TIMEVAL_TO_CT(tv) ((tv)->tv_sec * HZ + (tv)->tv_usec / (1000000L / HZ)) -+#endif -+ -+#ifdef RSS_ATOMIC -+#define PROC_RSS(proc) ((proc)->mm ? atomic_read(&(proc)->mm->rss) : 0) -+#else -+#define PROC_RSS(proc) ((proc)->mm ? (proc)->mm->rss : 0) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+# define RMS_NCPUS() smp_num_cpus -+#else -+# define RMS_NCPUS() num_online_cpus() -+#endif -+ -+#define CURUID() CURPROC()->uid -+#define p_pid pid -+#define PROC_STRUCT task_struct -+ -+/* care needed with conversion to millisecs on 32-bit Linux */ -+#ifdef LINUX -+#ifdef LINUX_I386 -+#define CT_TO_MSEC(x) ct_to_msec(x) -+ -+uint64_t ct_to_msec(clock_t t) -+{ -+ uint64_t msecs; -+ if (t < 2000000) -+ { -+ t = (1000 * t)/HZ; -+ msecs = t; -+ } -+ else -+ { -+ t = t / HZ; -+ msecs = t * 1000; -+ } -+ return(msecs); -+} -+ -+#else -+#define CT_TO_MSEC(x) (((x) * 1000)/HZ) -+#endif -+#endif -+ -+#ifndef FALSE -+#define FALSE (0) -+#define TRUE (!FALSE) -+#endif -+ -+#include -+#include -+#include -+ -+#include -+ -+#elif defined(DIGITAL_UNIX) -+ -+#define CURUID() CURPROC()->p_ruid -+extern int ncpus; -+#define RMS_NCPUS() ncpus -+#define PROC_STRUCT proc -+#define TIMEVAL_TO_MSEC(tv) ((tv)->tv_sec * 1000 + (tv)->tv_usec / 1000) -+ -+#include -+ -+#else -+#error cannot determine operating system -+#endif -+ -+int shm_cleanup(void); -+ -+struct cap_desc { -+ -+ struct cap_desc *next; -+ int index; /* index of capability in program */ -+ ELAN_CAPABILITY cap; /* elan capability */ -+ -+}; -+ -+struct proc_desc { -+ -+ struct proc_desc *next; -+ struct PROC_STRUCT *proc; -+ struct prg_desc *program; /* controlling program */ -+ int mycap; /* index of my capability */ -+ int myctx; /* context number for process */ -+ int flags; -+ int vp; /* elan virtual process number */ -+}; -+ -+struct prg_desc { -+ -+ struct prg_desc *next; -+ int id; /* program id */ -+ int flags; /* program status flags */ -+ uid_t uid; /* user id */ -+ int ncpus; /* number of cpus allocated to program */ -+ int nprocs; /* number of processes in program */ -+ struct proc_desc *pdescs; /* processes in this program */ -+ int ncaps; /* number of capabilities */ -+ struct cap_desc *caps; /* elan capabilities */ -+ char *corepath; /* core path for parallel program */ -+ int psid; /* processor set id */ -+ -+ uint64_t cutime; /* user time accumulated by children */ -+ uint64_t cstime; /* system time accumulated by children */ -+ uint64_t start_time; /* time program created */ -+ uint64_t end_time; /* time last process exited */ -+ uint64_t sched_time; /* last time job was scheduled */ -+ uint64_t accum_atime; /* allocated time last deschedule */ -+ uint64_t memint; /* accumulated memory integral */ -+ uint64_t ebytes; /* data transferred by the Elan(s) */ -+ uint64_t exfers; /* number of Elan data transfers */ -+ long maxrss; /* maximum size to date */ -+ long majflt; -+ -+#ifdef LINUX -+ struct proc_dir_entry *proc_entry; -+#endif -+ -+}; -+ -+#if defined(LINUX) -+static int rms_ptrack_callback (void *arg, int phase, struct task_struct *child); -+#else -+static void rms_xd_callback(void *arg, int phase, void *ctask); -+static void rms_xa_callback (void *arg, int phase, void *ctask); -+#endif -+ -+static void prgsignal(struct prg_desc *program, int signo); -+static uint64_t gettime(void); -+static void freeProgram(struct prg_desc *program); -+ -+static struct prg_desc *programs = 0; -+ -+kmutex_t rms_lock; -+ -+int rms_init(void) -+{ -+ kmutex_init (&rms_lock); -+ -+ DBG(printk("rms: initialising\n")); -+ -+ return(ESUCCESS); -+} -+ -+int rms_reconfigure(void) -+{ -+ return(ESUCCESS); -+} -+ -+int rms_programs_registered(void) -+{ -+ /* -+ ** Called when trying to unload rms.mod will not succeed -+ ** if programs registered -+ */ -+ -+ struct prg_desc *program, **pp; -+ -+ kmutex_lock(&rms_lock); -+ -+ for (program = programs; program; program = program->next) -+ { -+ if (program->nprocs != 0) -+ { -+ kmutex_unlock(&rms_lock); -+ return(EBUSY); -+ } -+ } -+ -+ /* -+ ** We have traversed the programs list and no processes registered -+ ** Now free the memory -+ */ -+ -+ pp = &programs; -+ while ((program = *pp) != NULL) -+ { -+ *pp = program->next; -+ freeProgram(program); -+ } -+ kmutex_unlock(&rms_lock); -+ -+ return(ESUCCESS); -+ -+} -+ -+int rms_fini(void) -+{ -+ /* -+ * don't allow an unload if there are programs registered -+ */ -+ if (rms_programs_registered()) -+ return(EBUSY); -+ -+ kmutex_destroy (&rms_lock); -+ -+ DBG(printk("rms: removed\n")); -+ -+ return(ESUCCESS); -+} -+ -+#ifdef LINUX -+ -+extern struct proc_dir_entry *rms_procfs_programs; -+ -+/* -+ * display one pid per line if there isn't enough space -+ * for another pid then add "...\n" and stop -+ */ -+int pids_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ struct prg_desc *program = (struct prg_desc *)data; -+ struct proc_desc *pdesc; -+ char *ptr = page; -+ int bytes = 0, nb; -+ -+ kmutex_lock(&rms_lock); -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ if (bytes > count - 15) -+ { -+ bytes += sprintf(ptr,"...\n"); -+ break; -+ } -+ nb = sprintf(ptr, "%d %d\n", pdesc->proc->p_pid, pdesc->vp); -+ bytes += nb; -+ ptr += nb; -+ } -+ kmutex_unlock(&rms_lock); -+ -+ return(bytes); -+} -+ -+int status_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ struct prg_desc *program = (struct prg_desc *)data; -+ int bytes; -+ if (program->flags & PRG_KILLED) -+ bytes = sprintf(page, "killed\n"); -+ else -+ bytes = sprintf(page, "running\n"); -+ return(bytes); -+} -+ -+void rms_create_proc_entry(struct prg_desc *program) -+{ -+ struct proc_dir_entry *p; -+ char name[32]; -+ -+ if (rms_procfs_programs) -+ { -+ sprintf(name,"%d", program->id); -+ if ((program->proc_entry = proc_mkdir(name, rms_procfs_programs)) != NULL) -+ { -+ if ((p = create_proc_entry ("pids", S_IRUGO, program->proc_entry)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = program; -+ p->read_proc = pids_callback; -+ } -+ if ((p = create_proc_entry ("status", S_IRUGO, program->proc_entry)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = program; -+ p->read_proc = status_callback; -+ } -+ } -+ } -+} -+ -+void rms_remove_proc_entry(struct prg_desc *program) -+{ -+ char name[32]; -+ if (rms_procfs_programs) -+ { -+ if (program->proc_entry) -+ { -+ remove_proc_entry ("pids", program->proc_entry); -+ remove_proc_entry ("status", program->proc_entry); -+ } -+ sprintf(name,"%d", program->id); -+ remove_proc_entry (name, rms_procfs_programs); -+ } -+} -+ -+#endif -+ -+/* -+ * find a program from its index/pid -+ * -+ * Duncan: make the lookup more efficient for large numbers of programs/processes -+ */ -+static struct prg_desc *findProgram(const int id) -+{ -+ struct prg_desc *program; -+ for (program = programs; program; program = program->next) -+ if (program->id == id) -+ return(program); -+ return(0); -+} -+ -+static struct proc_desc *findProcess(const int pid) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ for (program = programs; program; program = program->next) -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ if (pdesc->proc->p_pid == pid) -+ return(pdesc); -+ return(0); -+} -+ -+static void freeProgram(struct prg_desc *program) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ -+#ifdef LINUX -+ rms_remove_proc_entry(program); -+#endif -+ -+ while ((pdesc = program->pdescs) != NULL) -+ { -+ program->pdescs = pdesc->next; -+ KMEM_FREE(pdesc, sizeof(struct proc_desc)); -+ } -+ -+ while ((cdesc = program->caps) != NULL) -+ { -+ program->caps = cdesc->next; -+ KMEM_FREE(cdesc, sizeof(struct cap_desc)); -+ } -+ -+ if (program->corepath) -+ KMEM_FREE(program->corepath, MAXCOREPATHLEN + 1); -+ -+ KMEM_FREE(program, sizeof(struct prg_desc)); -+ -+#ifdef LINUX -+ MOD_DEC_USE_COUNT; -+#endif -+} -+ -+/* -+ * rms_prgcreate -+ * -+ * create a new program description -+ */ -+int rms_prgcreate(int id, uid_t uid, int cpus) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ -+ DBG(printk("rms_prgcreate :: program %d pid %d uid %d cpus %d\n", id, CURPROC()->p_pid, uid, cpus)); -+ -+ /* -+ * parallel programs are created as root by the rmsd as it forks the loader -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ /* -+ * program ids must be unique -+ */ -+ kmutex_lock(&rms_lock); -+ program = findProgram(id); -+ kmutex_unlock(&rms_lock); -+ if (program) -+ return(EINVAL); -+ -+ /* -+ * create a new program description -+ */ -+ KMEM_ALLOC(program, struct prg_desc *, sizeof(struct prg_desc), TRUE); -+ if (!program) -+ return(ENOMEM); -+ -+ program->id = id; -+ program->flags = PRG_RUNNING; -+ program->ncpus = cpus; -+ program->nprocs = 1; -+ program->uid = uid; -+ program->ncaps = 0; -+ program->caps = 0; -+ program->corepath = 0; -+ program->psid = 0; -+ program->start_time = program->sched_time = gettime(); -+ program->end_time = 0; -+ program->accum_atime = 0; -+ program->cutime = 0; -+ program->cstime = 0; -+ program->maxrss = 0; -+ program->memint = 0; -+ program->majflt = 0; -+ program->ebytes = 0; -+ program->exfers = 0; -+ -+ KMEM_ALLOC(pdesc, struct proc_desc *, sizeof(struct proc_desc), TRUE); -+ if (!pdesc) -+ return(ENOMEM); -+ -+ pdesc->proc = CURPROC(); -+ pdesc->next = 0; -+ pdesc->mycap = ELAN_CAP_UNINITIALISED; -+ pdesc->myctx = ELAN_CAP_UNINITIALISED; -+ pdesc->vp = -1; /* rmsloader */ -+ pdesc->program = program; -+ program->pdescs = pdesc; -+ -+#ifdef LINUX -+ rms_create_proc_entry(program); -+#endif -+ -+ kmutex_lock(&rms_lock); -+ -+#if defined(LINUX) -+ if (ptrack_register (rms_ptrack_callback, NULL) != 0) -+ { -+ kmutex_unlock(&rms_lock); -+ KMEM_FREE(pdesc,sizeof(struct proc_desc)); -+ KMEM_FREE(program,sizeof(struct prg_desc)); -+ return(ENOMEM); -+ } -+#else -+ /* -+ * install a fork handler -+ */ -+ if (HANDLER_REGISTER((void *)(unsigned long)rms_xa_callback, NULL, XA_FORK | XA_EXIT | XA_IOF | XA_KOF | XA_KOE) == NULL) -+ { -+ kmutex_unlock(&rms_lock); -+ KMEM_FREE(pdesc,sizeof(struct proc_desc)); -+ KMEM_FREE(program,sizeof(struct prg_desc)); -+ return(ENOMEM); -+ } -+#endif -+ -+ program->next = programs; -+ programs = program; -+ -+#ifdef LINUX -+ MOD_INC_USE_COUNT; -+#endif -+ -+ kmutex_unlock(&rms_lock); -+ return(ESUCCESS); -+} -+ -+ -+/* -+ * rms_prgdestroy -+ * -+ * destroy a program description -+ */ -+int rms_prgdestroy(int id) -+{ -+ struct prg_desc *program, **pp; -+ int status = ESRCH; -+ -+ /* -+ * parallel programs are created and destroyed by the rmsd -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ kmutex_lock(&rms_lock); -+ -+ pp = &programs; -+ while ((program = *pp) != NULL) -+ { -+ if (program->id == id) -+ { -+ if (program->nprocs == 0) -+ { -+ DBG(printk("rms_prgdestro :: removing program %d\n", program->id)); -+ *pp = program->next; -+ freeProgram(program); -+ status = ESUCCESS; -+ } -+ else -+ { -+ DBG(printk("rms_prgdestro :: failed to remove program %d: %d\n", program->id, program->nprocs)); -+ status = ECHILD; -+ pp = &program->next; -+ } -+ } -+ else -+ pp = &program->next; -+ } -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+/* -+ * rms_prgids -+ */ -+int rms_prgids(int maxids, int *prgids, int *nprgs) -+{ -+ struct prg_desc *program; -+ int count = 0, *buf, *bufp; -+ int status = ESUCCESS; -+ -+ if (maxids < 1) -+ return(EINVAL); -+ -+ kmutex_lock(&rms_lock); -+ -+ for (program = programs; program; program = program->next) -+ count++; -+ count = MIN(count, maxids); -+ -+ if (count > 0) -+ { -+ KMEM_ALLOC(buf, int *, count * sizeof(int), TRUE); -+ if (buf) -+ { -+ for (program = programs, bufp=buf; bufp < buf + count; -+ program = program->next) -+ *bufp++ = program->id; -+ -+ if (copyout(buf, prgids, sizeof(int) * count)) -+ status = EFAULT; -+ -+ KMEM_FREE(buf, count * sizeof(int)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&count, nprgs, sizeof(int))) -+ status = EFAULT; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+/* -+ * rms_prginfo -+ */ -+int rms_prginfo(int id, int maxpids, pid_t *pids, int *nprocs) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ pid_t *pidp, *buf; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (program->nprocs > 0) -+ { -+ KMEM_ALLOC(buf, pid_t *, program->nprocs * sizeof(pid_t), TRUE); -+ if (buf) -+ { -+ for (pidp = buf, pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ *pidp++ = pdesc->proc->p_pid; -+ -+ if (copyout(buf, pids, sizeof(pid_t) * MIN(program->nprocs, maxpids))) -+ status = EFAULT; -+ -+ KMEM_FREE(buf, program->nprocs * sizeof(pid_t)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&program->nprocs, nprocs, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+/* -+ * rmsmod always used to use psignal but this doesn't work -+ * on Linux 2.6.7 so we have changed to kill_proc -+ */ -+static void prgsignal(struct prg_desc *program, int signo) -+{ -+ struct proc_desc *pdesc; -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ kill_proc(pdesc->proc->p_pid, signo, 1); -+} -+ -+ -+int rms_prgsignal(int id, int signo) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ prgsignal(program, signo); -+ if (signo == SIGKILL) -+ program->flags |= PRG_KILLED; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+} -+ -+int rms_prgaddcap(int id, int index, ELAN_CAPABILITY *cap) -+{ -+ struct prg_desc *program; -+ struct cap_desc *cdesc; -+ int status = ESUCCESS; -+ -+ if (cap == NULL) -+ return(EINVAL); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ KMEM_ALLOC(cdesc, struct cap_desc *, sizeof(struct cap_desc), TRUE); -+ if (cdesc) -+ { -+ cdesc->index = index; -+ if (copyin(cap, &cdesc->cap, sizeof(ELAN_CAPABILITY))) -+ { -+ KMEM_FREE(cdesc, sizeof(struct cap_desc)); -+ status = EFAULT; -+ } -+ else -+ { -+ DBG(printk("rms_prgaddcap :: program %d index %d context %d<-->%d\n", -+ program->id, index, cdesc->cap.cap_lowcontext, cdesc->cap.cap_highcontext)); -+ cdesc->next = program->caps; -+ program->caps = cdesc; -+ program->ncaps++; -+ } -+ } -+ else -+ status = ENOMEM; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+static uint64_t gettime(void) -+{ -+ uint64_t now; -+ -+#if defined(SOLARIS) -+ timespec_t tv; -+ gethrestime(&tv); -+ now = tv.tv_sec * 1000 + tv.tv_nsec / 1000000; -+#elif defined(LINUX) -+ struct timeval tv; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) -+ get_fast_time(&tv); -+#else -+ do_gettimeofday(&tv); -+#endif -+ now = tv.tv_sec * 1000 + tv.tv_usec / 1000; -+#elif defined(DIGITAL_UNIX) -+ struct timeval tv; -+ microtime(&tv); -+ now = tv.tv_sec * 1000 + tv.tv_usec / 1000; -+#endif -+ -+ return(now); -+} -+ -+#ifdef DIGITAL_UNIX -+ -+int rms_getrusage(struct proc_desc *pdesc, struct rusage *ru) -+{ -+ task_t task; -+ thread_t thread; -+ -+ if (!pdesc->proc) -+ return(-1); -+ -+ /* -+ * locking required unless called from the current proc -+ */ -+ if (pdesc->proc != CURPROC()) -+ { -+ if (!P_REF(pdesc->proc)) -+ return(-1); -+ -+ task = proc_to_task(pdesc->proc); -+ if (!task) -+ { -+ P_UNREF(pdesc->proc); -+ DBG(printk("rms_getrusage :: process (%d) has no task\n", pdesc->proc->p_pid)); -+ return(-1); -+ } -+ -+ task_reference(task); -+ task_lock(task); -+ -+ if (!queue_empty(&task->thread_list)) -+ thread = (thread_t) queue_first(&task->thread_list); -+ else -+ { -+ task_unlock(task); -+ task_deallocate(task); -+ P_UNREF(pdesc->proc); -+ return(-1); -+ } -+ -+ thread_reference(thread); -+ task_unlock(task); -+ } -+ -+ *ru = proc_to_utask(pdesc->proc)->uu_ru; -+ task_get_rusage(ru, proc_to_task(pdesc->proc)); -+ -+ if (pdesc->proc != CURPROC()) -+ { -+ task_deallocate(task); -+ thread_deallocate(thread); -+ P_UNREF(pdesc->proc); -+ } -+ return(0); -+} -+ -+#endif -+ -+/* -+ * new stats collection interface, 64-bit with addition of Elan stats -+ */ -+int rms_prggetstats(int id, prgstats_t *stats) -+{ -+#ifdef DIGITAL_UNIX -+ long ruixrss, ruidrss, ruisrss, rumaxrss, rumajflt; -+#endif -+ struct prg_desc *program = 0; -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ prgstats_t totals; -+ uint64_t now = gettime(); -+#if defined(SOLARIS) -+ clock_t utime, stime; -+#elif defined(LINUX) -+ uint64_t utime, stime; -+#endif -+ -+ long maxrss; -+ -+ kmutex_lock(&rms_lock); -+ -+ if (id < 0) -+ { -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ program = pdesc->program; -+ } -+ else -+ program = findProgram(id); -+ -+ if (program) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ totals.flags = program->flags; -+ totals.ncpus = program->ncpus; -+ maxrss = 0; -+ -+ if (program->nprocs > 0) -+ totals.etime = now - program->start_time; -+ else -+ totals.etime = program->end_time - program->start_time; -+ -+ totals.atime = program->accum_atime; -+ if (program->flags & PRG_RUNNING) -+ totals.atime += program->ncpus * (now - program->sched_time); -+ -+#if defined(SOLARIS) -+ utime = stime = 0; -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ utime += pdesc->proc->p_utime; -+ stime += pdesc->proc->p_stime; -+ } -+ totals.utime = TICK_TO_MSEC(utime); -+ totals.stime = TICK_TO_MSEC(stime); -+ -+#elif defined(LINUX) -+ utime = stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+#ifdef PROCESS_ACCT -+ DBG(printk("rms_prggetsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, TIMEVAL_TO_CT(&pdesc->proc->utime), -+ TIMEVAL_TO_CT(&pdesc->proc->stime))); -+ utime += TIMEVAL_TO_CT(&pdesc->proc->utime); -+ stime += TIMEVAL_TO_CT(&pdesc->proc->stime); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ DBG(printk("rms_prggetsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->times.tms_utime, -+ pdesc->proc->times.tms_stime)); -+ utime += pdesc->proc->times.tms_utime; -+ stime += pdesc->proc->times.tms_stime; -+#else -+ DBG(printk("rms_prggetsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->utime, pdesc->proc->stime)); -+ utime += pdesc->proc->utime; -+ stime += pdesc->proc->stime; -+#endif -+ -+ totals.pageflts += pdesc->proc->maj_flt; -+ -+ maxrss += PROC_RSS(pdesc->proc) >> (20 - PAGE_SHIFT); -+ } -+ -+ /* convert user and system times to millisecs */ -+ totals.utime = CT_TO_MSEC(utime); -+ totals.stime = CT_TO_MSEC(stime); -+ -+#elif defined(DIGITAL_UNIX) -+ totals.utime = totals.stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ struct rusage ru; -+ if (rms_getrusage(pdesc, &ru) < 0) -+ continue; -+ -+ totals.utime += TIMEVAL_TO_MSEC(&ru.ru_utime); -+ totals.stime += TIMEVAL_TO_MSEC(&ru.ru_stime); -+ -+ /* convert maxrss to megabytes */ -+ rumaxrss = ru.ru_maxrss >> 10; -+ rumajflt = ru.ru_majflt; -+ totals.pageflts += rumajflt; -+ -+ /* -+ * memory intergals are still broken in 5.1 -+ */ -+ -+#ifdef FIXED_MEMINIT -+ -+ /* convert from pages * clock ticks to Mbytes * secs */ -+ ruixrss = (ru.ru_ixrss >> (20 - PAGE_SHIFT)) / hz; -+ ruidrss = (ru.ru_idrss >> (20 - PAGE_SHIFT)) / hz; -+ ruisrss = (ru.ru_isrss >> (20 - PAGE_SHIFT)) / hz; -+ -+ DBG(printk("rms_prggetsta :: process %d mem %d int %d %d %d flt %d\n", pdesc->proc->p_pid, -+ rumaxrss, ruixrss, ruidrss, ruisrss, rumajflt)); -+ -+ totals.memint += ruixrss + ruidrss + ruisrss; -+#else -+ DBG(printk("rms_prggetsta :: process %d mem %d flt %d\n", pdesc->proc->p_pid, rumaxrss, rumajflt)); -+ totals.memint = 0; -+#endif -+ maxrss += rumaxrss; -+ } -+#endif /* DIGITAL_UNIX */ -+ -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+ -+ totals.utime += program->cutime; -+ totals.stime += program->cstime; -+ totals.mem = program->maxrss; -+ totals.ebytes = program->ebytes; -+ totals.exfers = program->exfers; -+ -+ DBG(printk("rms_prggetsta :: program %d mem %d flt %d\n", program->id, totals.mem, totals.pageflts)); -+ -+ if (copyout(&totals, stats, sizeof(prgstats_t))) -+ status = EFAULT; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+/* -+ * preserve the old stats stats collection interface -+ */ -+ -+int rms_prggetoldstats(int id, prgstats_old_t *stats) -+{ -+#ifdef DIGITAL_UNIX -+ long ruixrss, ruidrss, ruisrss, rumaxrss, rumajflt; -+#endif -+ struct prg_desc *program = 0; -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ prgstats_old_t totals; -+ uint64_t now = gettime(); -+#if defined(SOLARIS) || defined(LINUX) -+ clock_t utime, stime; -+#endif -+ long maxrss; -+ -+ kmutex_lock(&rms_lock); -+ -+ if (id < 0) -+ { -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ program = pdesc->program; -+ } -+ else -+ program = findProgram(id); -+ -+ if (program) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ totals.flags = program->flags; -+ totals.ncpus = program->ncpus; -+ maxrss = 0; -+ -+ if (program->nprocs > 0) -+ totals.etime = now - program->start_time; -+ else -+ totals.etime = program->end_time - program->start_time; -+ -+ totals.atime = program->accum_atime; -+ if (program->flags & PRG_RUNNING) -+ totals.atime += program->ncpus * (now - program->sched_time); -+ -+#if defined(SOLARIS) -+ utime = stime = 0; -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ utime += pdesc->proc->p_utime; -+ stime += pdesc->proc->p_stime; -+ } -+ totals.utime = TICK_TO_MSEC(utime); -+ totals.stime = TICK_TO_MSEC(stime); -+ -+#elif defined(LINUX) -+ utime = stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+#ifdef PROCESS_ACCT -+ DBG(printk("rms_getoldsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, TIMEVAL_TO_CT(&pdesc->proc->utime), -+ TIMEVAL_TO_CT(&pdesc->proc->stime))); -+ utime += TIMEVAL_TO_CT(&pdesc->proc->utime); -+ stime += TIMEVAL_TO_CT(&pdesc->proc->stime); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ DBG(printk("rms_getoldsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->times.tms_utime, -+ pdesc->proc->times.tms_stime)); -+ utime += pdesc->proc->times.tms_utime; -+ stime += pdesc->proc->times.tms_stime; -+#else -+ DBG(printk("rms_getoldsta :: process %d utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->utime, pdesc->proc->stime)); -+ utime += pdesc->proc->utime; -+ stime += pdesc->proc->stime; -+#endif -+ -+ totals.pageflts += pdesc->proc->maj_flt; -+ maxrss += PROC_RSS(pdesc->proc) >> (20 - PAGE_SHIFT); -+ } -+ -+ /* convert user and system times to millisecs */ -+ totals.utime = CT_TO_MSEC(utime); -+ totals.stime = CT_TO_MSEC(stime); -+ -+#elif defined(DIGITAL_UNIX) -+ totals.utime = totals.stime = 0; -+ totals.memint = program->memint; -+ totals.pageflts = program->majflt; -+ -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ { -+ struct rusage ru; -+ if (rms_getrusage(pdesc, &ru) < 0) -+ continue; -+ -+ totals.utime += TIMEVAL_TO_MSEC(&ru.ru_utime); -+ totals.stime += TIMEVAL_TO_MSEC(&ru.ru_stime); -+ -+ /* convert maxrss to megabytes */ -+ rumaxrss = ru.ru_maxrss >> 10; -+ rumajflt = ru.ru_majflt; -+ totals.pageflts += rumajflt; -+ -+ /* -+ * memory intergals are still broken in 5.1 -+ */ -+ -+#ifdef FIXED_MEMINIT -+ -+ /* convert from pages * clock ticks to Mbytes * secs */ -+ ruixrss = (ru.ru_ixrss >> (20 - PAGE_SHIFT)) / hz; -+ ruidrss = (ru.ru_idrss >> (20 - PAGE_SHIFT)) / hz; -+ ruisrss = (ru.ru_isrss >> (20 - PAGE_SHIFT)) / hz; -+ -+ DBG(printk("rms_getoldsta :: process %d mem %d int %d %d %d flt %d\n", pdesc->proc->p_pid, -+ rumaxrss, ruixrss, ruidrss, ruisrss, rumajflt)); -+ -+ totals.memint += ruixrss + ruidrss + ruisrss; -+#else -+ DBG(printk("rms_getoldsta :: process %d mem %d flt %d\n", pdesc->proc->p_pid, rumaxrss, rumajflt)); -+ totals.memint = 0; -+#endif -+ maxrss += rumaxrss; -+ } -+#endif /* DIGITAL_UNIX */ -+ -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+ -+ totals.utime += program->cutime; -+ totals.stime += program->cstime; -+ totals.mem = program->maxrss; -+ -+ DBG(printk("rms_getoldsta :: program %d mem %d flt %d\n", program->id, totals.mem, totals.pageflts)); -+ -+ if (copyout(&totals, stats, sizeof(prgstats_old_t))) -+ status = EFAULT; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_prgsuspend(int id) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->flags &= ~PRG_RUNNING; -+ program->flags |= PRG_SUSPEND; -+ program->accum_atime += program->ncpus * (gettime() - program->sched_time); -+ -+ /* suspend/resume just use signals for now */ -+ prgsignal(program, SIGSTOP); -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_prgresume(int id) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->flags &= ~PRG_SUSPEND; -+ program->flags |= PRG_RUNNING; -+ program->sched_time = gettime(); -+ prgsignal(program, SIGCONT); -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_ncaps(int *ncaps) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ if (copyout(&pdesc->program->ncaps, ncaps, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_getprgid(pid_t pid, int *id) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ if (pid == 0) -+ pid = CURPROC()->p_pid; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(pid)) != NULL) -+ { -+ if (copyout(&pdesc->program->id, id, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_setcap(int index, int ctx) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ int status = EINVAL; -+ -+ DBG(printk("rms_setcap :: process %d cap %d ctx %d\n",CURPROC()->p_pid,index,ctx)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ for (cdesc = pdesc->program->caps; cdesc; cdesc = cdesc->next) -+ if (cdesc->index == index && 0 <= ctx && ctx <= (cdesc->cap.cap_highcontext - cdesc->cap.cap_lowcontext + 1)) -+ { -+ pdesc->mycap = index; -+ pdesc->myctx = cdesc->cap.cap_lowcontext + ctx; -+ status = ESUCCESS; -+ } -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_mycap(int *index) -+{ -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ DBG(printk("rms_mycap :: process %d\n", CURPROC()->p_pid)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ DBG(printk("rms_mycap :: found process %d mycap = %d\n", CURPROC()->p_pid, pdesc->mycap)); -+ if (copyout(&pdesc->mycap, index, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int rms_getcap(int index, ELAN_CAPABILITY *cap) -+{ -+ struct proc_desc *pdesc; -+ struct cap_desc *cdesc; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ for (cdesc = pdesc->program->caps; cdesc; cdesc = cdesc->next) -+ if (cdesc->index == index) -+ break; -+ -+ if (cdesc) -+ { -+ /* tell each process about its own context */ -+ cdesc->cap.cap_mycontext = pdesc->myctx; -+ -+ if (copyout(&cdesc->cap, cap, ELAN_CAP_SIZE(&cdesc->cap))) -+ status = EFAULT; -+ -+ DBG(printk("rms_getcap :: program %d index %d context %d<-->%d\n", pdesc->program->id, -+ cdesc->index, cdesc->cap.cap_lowcontext, cdesc->cap.cap_highcontext)); -+ } -+ else -+ status = EINVAL; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+static int -+rms_fork_callback (struct PROC_STRUCT *curproc, struct PROC_STRUCT *child) -+{ -+ struct prg_desc *program; -+ struct proc_desc *parent; -+ struct proc_desc *pdesc = NULL; -+ -+ kmutex_lock(&rms_lock); -+ -+ DBG(printk("rms_fork_func :: phase is fork pid %d child %d\n", curproc->p_pid, child->p_pid)); -+ -+ /* -+ * find the process that forked -+ */ -+ if ((parent = findProcess(curproc->p_pid)) != NULL) -+ { -+ program = parent->program; -+ -+ DBG(printk("rms_fork_func :: program is %d flags %d\n", program->id, program->flags)); -+ -+ /* -+ * processes can be blocked in fork while prgsignal is in progress -+ * so check to see if the PRG_KILLED flag is set -+ */ -+ if (program->flags & PRG_KILLED) -+ DBG(printk("rms_fork_func :: fork handler called after program killed\n")); -+ else -+ { -+ /* -+ * create a new process description and add to program -+ */ -+ KMEM_ALLOC(pdesc, struct proc_desc *, sizeof(struct proc_desc), TRUE); -+ if (pdesc) -+ { -+ pdesc->next = program->pdescs; -+ program->pdescs = pdesc; -+ pdesc->proc = child; -+ pdesc->mycap = parent->mycap; -+ pdesc->myctx = parent->myctx; -+ pdesc->program = program; -+ pdesc->vp = -1; /* assigned by elaninitdone */ -+ program->nprocs++; -+ } -+ else -+ printk("rms_fork_func :: memory allocation failed\n"); -+ } -+ } -+ else -+ DBG(printk("rms_fork_func :: no program\n")); -+ -+ kmutex_unlock (&rms_lock); -+ -+ return pdesc == NULL; -+} -+ -+static void -+rms_exit_callback (struct PROC_STRUCT *curproc) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc, **pdescp, *p; -+#ifdef DIGITAL_UNIX -+ struct rusage ru; -+#endif -+ long maxrss; -+ -+ kmutex_lock(&rms_lock); -+ -+ DBG(printk("rms_exit_func :: process %d exiting\n", curproc->p_pid)); -+ -+ /* -+ * find the process that exited and accumulate -+ * resource usage in its parent program -+ */ -+ for (program = programs, pdesc = 0; program && !pdesc; program = program->next) -+ { -+ pdescp = &program->pdescs; -+ while ((pdesc = *pdescp) != NULL) -+ { -+ if (pdesc->proc->p_pid == curproc->p_pid) -+ { -+ /* -+ * keep track of the resources used -+ */ -+#if defined(SOLARIS) -+ program->cutime += TICK_TO_MSEC(pdesc->proc->p_utime); -+ program->cstime += TICK_TO_MSEC(pdesc->proc->p_stime); -+ -+#elif defined(LINUX) -+#ifdef PROCESS_ACCT -+ DBG(printk("rms_exit_func :: process %d exit utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, -+ TIMEVAL_TO_CT(&pdesc->proc->utime), -+ TIMEVAL_TO_CT(&pdesc->proc->stime))); -+ program->cutime += TIMEVAL_TO_MSEC(&pdesc->proc->utime); -+ program->cstime += TIMEVAL_TO_MSEC(&pdesc->proc->stime); -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+ DBG(printk("rms_exit_func :: process %d exit utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->times.tms_utime, -+ pdesc->proc->times.tms_stime)); -+ -+ program->cutime += CT_TO_MSEC(pdesc->proc->times.tms_utime); -+ program->cstime += CT_TO_MSEC(pdesc->proc->times.tms_stime); -+#else -+ DBG(printk("rms_exit_func :: process %d exit utime %ld clks stime %ld clks\n", -+ pdesc->proc->p_pid, pdesc->proc->utime, pdesc->proc->stime)); -+ -+ program->cutime += CT_TO_MSEC(pdesc->proc->utime); -+ program->cstime += CT_TO_MSEC(pdesc->proc->stime); -+#endif -+ program->majflt += pdesc->proc->maj_flt; -+ maxrss = PROC_RSS(pdesc->proc) >> (20 - PAGE_SHIFT); -+ -+#elif defined(DIGITAL_UNIX) -+ if (rms_getrusage(pdesc, &ru) == 0) -+ { -+ program->cutime += TIMEVAL_TO_MSEC(&ru.ru_utime); -+ program->cstime += TIMEVAL_TO_MSEC(&ru.ru_stime); -+ program->majflt += ru.ru_majflt; -+ -+ /* convert maxrss to megabytes */ -+ maxrss = ru.ru_maxrss >> 10; -+ } -+#endif -+ -+ /* -+ * shared memory segment cleanup -+ */ -+#if defined(DIGITAL_UNIX) -+ rms_shmcleanup(-1); -+#elif defined(LINUX) -+ shm_cleanup(); -+#endif -+ -+ /* -+ * remove process from program -+ */ -+ *pdescp = pdesc->next; -+ KMEM_FREE(pdesc, sizeof(struct proc_desc)); -+ program->nprocs--; -+ -+ /* -+ * update the memory high water mark for the program -+ */ -+ for (p = program->pdescs; p; p = p->next) -+ { -+#if defined(DIGITAL_UNIX) -+ if (rms_getrusage(p, &ru) < 0) -+ continue; -+ -+ /* convert maxrss to megabytes */ -+ maxrss += ru.ru_maxrss >> 10; -+ -+#elif defined(LINUX) -+ maxrss += PROC_RSS(p->proc) >> (20 - PAGE_SHIFT); -+#endif -+ } -+ if (maxrss > program->maxrss) -+ program->maxrss = maxrss; -+ -+ DBG(printk("rms_exit_func :: program %d procs %d mem %ld\n", program->id, program->nprocs, program->maxrss)); -+ -+ /* -+ * final update to the program if this is the last process -+ */ -+ if (program->nprocs == 0) -+ { -+ program->end_time = gettime(); -+ program->flags &= ~PRG_RUNNING; -+ program->accum_atime += program->ncpus * (program->end_time - program->sched_time); -+ DBG(printk("rms_exit_func :: last process has gone\n")); -+ } -+ break; -+ } -+ else -+ pdescp = &pdesc->next; -+ } -+ } -+ kmutex_unlock (&rms_lock); -+} -+ -+#if defined(LINUX) -+static int -+rms_ptrack_callback (void *arg, int phase, struct task_struct *child) -+{ -+ switch (phase) -+ { -+ case PTRACK_PHASE_CLONE: -+ if (rms_fork_callback (current, child)) -+ return PTRACK_DENIED; -+ else -+ return PTRACK_INNHERIT; -+ -+ case PTRACK_PHASE_CLONE_FAIL: -+ DBG(printk("rms_fork_func :: fork failed pid %d child %d\n", current->p_pid, child->p_pid)); -+ rms_exit_callback(child); -+ break; -+ -+ case PTRACK_PHASE_EXIT: -+ rms_exit_callback(current); -+ break; -+ } -+ return PTRACK_FINISHED; -+} -+ -+#else -+ -+static void -+rms_xa_callback (void *arg, int phase, void *ctask) -+{ -+ switch (phase) -+ { -+ case XA_FORK: -+ if (rms_fork_callback (CURPROC(), (struct PROC_STRUCT *)task_to_proc(ctask))) -+ psignal(task_to_proc(ctask), SIGKILL); -+ break; -+ case XA_EXIT: -+ rms_exit_callback (CURPROC()); -+ break; -+ } -+} -+ -+#endif -+ -+#ifdef DIGITAL_UNIX -+ -+/* -+ * NB: These functions will only work on steelos. -+ */ -+ -+/* -+ * rms_setcorepath -+ * -+ * set a path at which to dump core if the task aborts -+ * -+ * enhanced core file names must be enabled for this to work -+ */ -+int rms_setcorepath(char *corepath) -+{ -+ int length; -+ char *path; -+ int status; -+ struct proc_desc *pdesc; -+ -+ /* -+ * access restricted - we don't want users moving -+ * their corepath and generating a huge I/O load -+ */ -+ if (CURUID()) -+ return(EACCES); -+ -+ if (!(pdesc = findProcess(CURPROC()->p_pid))) -+ return(ESRCH); -+ -+ if (pdesc->program->corepath) -+ return(EEXIST); -+ -+ KMEM_ALLOC(path, char *, MAXCOREPATHLEN + 1, TRUE); -+ if (path == 0) -+ return(ENOMEM); -+ -+ if (copyinstr(corepath, path, MAXCOREPATHLEN, &length)) -+ return(EFAULT); -+ -+ path[length] = 0; -+ status = add_corepath(path); -+ -+ DBG(printk("rms_setcorepa :: id %d corepath %s status %d\n", pdesc->program->id, path, status)); -+ -+ if (status == ESUCCESS) -+ pdesc->program->corepath = path; -+ else -+ KMEM_FREE(path, MAXCOREPATHLEN + 1); -+ -+ return(status); -+} -+ -+static int find_corepath(pid_t pid, char *path, int len) -+{ -+ struct proc *procp; -+ struct utask *utask; -+ int status = ESUCCESS; -+ -+ procp = pfind(pid); -+ if (procp == NULL) -+ return(ENOENT); -+ -+ utask = proc_to_utask(procp); -+ -+ if (utask->uu_coredir) -+ bcopy(utask->uu_coredir,path,len); -+ else -+ status = ENOENT; -+ -+ /* pfind takes out a reference */ -+ P_UNREF(procp); -+ -+ return(status); -+} -+ -+int rms_getcorepath(pid_t pid, char *corepath, int maxlen) -+{ -+ char src[MAXCOREPATHLEN]; -+ int len; -+ int status; -+ -+ if (maxlen < 2) -+ return(EINVAL); -+ -+ len = MIN(maxlen, MAXCOREPATHLEN); -+ -+ status = find_corepath(pid, src, len); -+ -+ if (status == ESUCCESS) -+ len = strlen(src)+1; -+ else if (status == ENOENT) -+ { -+ len = 2; -+ src[0] = '.'; -+ src[1] = '\0'; -+ status = ESUCCESS; -+ } -+ -+ if (copyout(src, corepath, len)) -+ return(EFAULT); -+ -+ return(status); -+} -+ -+#endif -+ -+/* -+ * rms_elaninitdone - mark a process as having successfully completed elan initialisation -+ */ -+int rms_elaninitdone(int vp) -+{ -+ int status = ESUCCESS; -+ struct proc_desc *pdesc; -+ -+ DBG(printk("rms_elaninit :: process %d vp %d\n", CURPROC()->p_pid, vp)); -+ -+ kmutex_lock(&rms_lock); -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ pdesc->vp = vp; -+ else -+ status = ESRCH; -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+/* -+ * rms_prgelanpids - return the ids of processes that have completed elan initialisation -+ */ -+int rms_prgelanpids(int id, int maxpids, int *vps, pid_t *pids, int *npids) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ pid_t *pidbuf; -+ int status = ESUCCESS, count = 0, *vpbuf; -+ -+ DBG(printk("rms_elanpids :: process %d id %d\n", CURPROC()->p_pid, id)); -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (program->nprocs > 0) -+ { -+ KMEM_ALLOC(pidbuf, pid_t *, program->nprocs * sizeof(pid_t), TRUE); -+ KMEM_ALLOC(vpbuf, int *, program->nprocs * sizeof(int), TRUE); -+ if (pidbuf && vpbuf) -+ { -+ for (pdesc = program->pdescs; pdesc; pdesc = pdesc->next) -+ if (pdesc->vp >= 0) -+ { -+ pidbuf[count] = pdesc->proc->p_pid; -+ vpbuf[count] = pdesc->vp; -+ count++; -+ } -+ -+ if (count > 0 && (copyout(pidbuf, pids, sizeof(pid_t) * MIN(count, maxpids)) || -+ copyout(vpbuf, vps, sizeof(int) * MIN(count, maxpids)))) -+ status = EFAULT; -+ -+ KMEM_FREE(pidbuf, program->nprocs * sizeof(pid_t)); -+ KMEM_FREE(vpbuf, program->nprocs * sizeof(int)); -+ } -+ else -+ status = ENOMEM; -+ } -+ -+ if (copyout(&count, npids, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ -+ return(status); -+ -+} -+ -+int rms_setpset(int psid) -+{ -+ struct prg_desc *program; -+ struct proc_desc *pdesc; -+ int status = ESUCCESS; -+ -+ if (CURUID()) -+ return(EACCES); -+ -+ kmutex_lock(&rms_lock); -+ -+ if ((pdesc = findProcess(CURPROC()->p_pid)) != NULL) -+ { -+ program = pdesc->program; -+ program->psid = psid; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+ -+int rms_getpset(int id, int *psid) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (copyout(&program->psid, psid, sizeof(int))) -+ status = EFAULT; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+int -+rms_setelanstats(int id, uint64_t ebytes, uint64_t exfers) -+{ -+ struct prg_desc *program; -+ int status = ESUCCESS; -+ -+ DBG(printk("rms_setelanst :: process %d id %d\n", CURPROC()->p_pid, id)); -+ -+ kmutex_lock(&rms_lock); -+ if ((program = findProgram(id)) != NULL) -+ { -+ if (CURUID() == 0 || CURUID() == program->uid) -+ { -+ program->ebytes = ebytes; -+ program->exfers = exfers; -+ } -+ else -+ status = EACCES; -+ } -+ else -+ status = ESRCH; -+ -+ kmutex_unlock(&rms_lock); -+ return(status); -+} -+ -+rms_modversion() -+{ -+ return(RMS_MODVERSION); -+} -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -+ -+ -+ -+ -+ -+ -Index: linux-2.6.5-7.191/drivers/net/qsnet/rms/rms_kern_Linux.c -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/qsnet/rms/rms_kern_Linux.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/drivers/net/qsnet/rms/rms_kern_Linux.c 2005-07-28 14:52:52.924667184 -0400 -@@ -0,0 +1,430 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: rms_kern_Linux.c,v 1.20 2004/05/14 08:55:57 duncan Exp $" -+/* $Source: /cvs/master/quadrics/rmsmod/rms_kern_Linux.c,v $*/ -+ -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Quadrics Ltd"); -+MODULE_DESCRIPTION("RMS support module"); -+MODULE_LICENSE("GPL"); -+ -+int rms_debug = 0; -+ -+ctl_table rms_table[] = { -+ { -+ .ctl_name = 1, -+ .procname = "rms_debug", -+ .data = &rms_debug, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .child = NULL, -+ .proc_handler = &proc_dointvec, -+ }, -+ {0} -+}; -+ -+ctl_table rms_root_table[] = { -+ { -+ .ctl_name = CTL_DEBUG, -+ .procname = "rms", -+ .data = NULL, -+ .maxlen = 0, -+ .mode = 0555, -+ .child = rms_table, -+ }, -+ {0} -+}; -+ -+static struct ctl_table_header *rms_sysctl_header; -+ -+static int rms_open (struct inode *ino, struct file *fp); -+static int rms_release (struct inode *ino, struct file *fp); -+static int rms_ioctl (struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+rms_ioctl32_cmds[] = -+{ -+ RMSIO_GETPRGID32, -+ RMSIO_GETCAP32 -+}; -+ -+static int rms_ioctl32 (unsigned int fd, unsigned int cmd, -+ unsigned long arg, struct file *file); -+#endif -+ -+static struct file_operations rms_fops = -+{ -+ .owner = THIS_MODULE, -+ .ioctl = rms_ioctl, -+ .open = rms_open, -+ .release = rms_release, -+}; -+ -+struct proc_dir_entry *rms_procfs_programs; -+static struct proc_dir_entry *rms_procfs_root; -+ -+int version_callback(char* page, char** start, off_t off, int count, int* eof, void* data) -+{ -+ return(sprintf(page, "$Id: rms_kern_Linux.c,v 1.20 2004/05/14 08:55:57 duncan Exp $\n")); -+} -+ -+static int __init rms_start(void) -+{ -+ struct proc_dir_entry *p; -+ int res; -+ -+ if ((rms_sysctl_header = register_sysctl_table(rms_root_table, 1)) == 0) -+ { -+ printk ("rmsmod: failed to register sysctl table\n"); -+ return (-ENXIO); -+ } -+ -+ if ((rms_procfs_root = proc_mkdir("rms", NULL)) == NULL || -+ (rms_procfs_programs = proc_mkdir("programs", rms_procfs_root)) == NULL || -+ (p = create_proc_entry ("control", S_IRUGO, rms_procfs_root)) == NULL) -+ { -+ unregister_sysctl_table (rms_sysctl_header); -+ printk ("rmsmod: failed to register /proc/rms\n"); -+ return (-ENXIO); -+ } -+ p->proc_fops = &rms_fops; -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ -+ if ((p = create_proc_entry ("version", S_IRUGO, rms_procfs_root)) != NULL) -+ { -+ p->owner = THIS_MODULE; -+ p->data = NULL; -+ p->read_proc = version_callback; -+ } -+ -+ if ((res = rms_init()) != ESUCCESS) -+ { -+ remove_proc_entry ("programs", rms_procfs_root); -+ remove_proc_entry ("control", rms_procfs_root); -+ remove_proc_entry ("rms", NULL); -+ unregister_sysctl_table (rms_sysctl_header); -+ return (-res); -+ } -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *)); -+ register int i; -+ for (i = 0; i < sizeof (rms_ioctl32_cmds)/sizeof(rms_ioctl32_cmds[0]); i++) -+ register_ioctl32_conversion (rms_ioctl32_cmds[i], rms_ioctl32); -+ } -+ unlock_kernel(); -+#endif -+ return (0); -+} -+ -+static void __exit rms_exit(void) -+{ -+ rms_fini(); -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+ lock_kernel(); -+ { -+ extern void unregister_ioctl32_conversion(unsigned int cmd); -+ register int i; -+ -+ for (i = 0; i < sizeof (rms_ioctl32_cmds)/sizeof(rms_ioctl32_cmds[0]); i++) -+ unregister_ioctl32_conversion (rms_ioctl32_cmds[i]); -+ } -+ unlock_kernel(); -+#endif -+ -+ remove_proc_entry ("version", rms_procfs_root); -+ remove_proc_entry ("programs", rms_procfs_root); -+ remove_proc_entry ("control", rms_procfs_root); -+ remove_proc_entry ("rms", NULL); -+ unregister_sysctl_table(rms_sysctl_header); -+} -+ -+/* Declare the module init and exit functions */ -+module_init(rms_start); -+module_exit(rms_exit); -+ -+static int -+rms_open (struct inode *inode, struct file *fp) -+{ -+ MOD_INC_USE_COUNT; -+ fp->private_data = NULL; -+ -+ return (0); -+} -+ -+static int -+rms_release (struct inode *inode, struct file *fp) -+{ -+ MOD_DEC_USE_COUNT; -+ return (0); -+} -+ -+static int -+rms_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ int res; -+ -+ switch (cmd) -+ { -+/* no corepath support in Linux yet */ -+#if 0 -+ case RMSIO_SETCOREPATH: -+ res = rms_setcorepath((caddr_t)arg); -+ break; -+ -+ case RMSIO_GETCOREPATH: -+ { -+ RMSIO_GETCOREPATH_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcorepath(args.pid, args.corepath, args.maxlen); -+ break; -+ } -+#endif -+ -+ case RMSIO_PRGCREATE: -+ { -+ RMSIO_PRGCREATE_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgcreate(args.id, args.uid, args.cpus); -+ break; -+ } -+ -+ case RMSIO_PRGDESTROY: -+ res = rms_prgdestroy(arg); -+ break; -+ -+ case RMSIO_PRGIDS: -+ { -+ RMSIO_PRGIDS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgids(args.maxids, args.prgids, args.nprgs); -+ break; -+ } -+ -+ case RMSIO_PRGINFO: -+ { -+ RMSIO_PRGINFO_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prginfo(args.id, args.maxpids, args.pids, args.nprocs); -+ break; -+ } -+ -+ case RMSIO_PRGSIGNAL: -+ { -+ RMSIO_PRGSIGNAL_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgsignal(args.id, args.signo); -+ break; -+ } -+ -+ case RMSIO_PRGADDCAP: -+ { -+ RMSIO_PRGADDCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgaddcap(args.id, args.index, args.cap); -+ break; -+ } -+ -+ case RMSIO_SETCAP: -+ { -+ RMSIO_SETCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_setcap(args.index, args.ctx); -+ break; -+ } -+ -+ case RMSIO_NCAPS: -+ res = rms_ncaps((int *)arg); -+ break; -+ -+ case RMSIO_GETPRGID: -+ { -+ RMSIO_GETPRGID_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getprgid(args.pid, args.id); -+ break; -+ } -+ -+ case RMSIO_GETMYCAP: -+ res = rms_mycap((int *)arg); -+ break; -+ -+ case RMSIO_GETCAP: -+ { -+ RMSIO_GETCAP_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcap(args.index, args.cap); -+ break; -+ } -+ -+ case RMSIO_PRGGETSTATS: -+ { -+ RMSIO_PRGGETSTATS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prggetoldstats(args.id, args.stats); -+ break; -+ } -+ -+ case RMSIO_PRGGETSTATS2: -+ { -+ RMSIO_PRGGETSTATS2_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prggetstats(args.id, args.stats); -+ break; -+ } -+ -+ case RMSIO_PRGSUSPEND: -+ res = rms_prgsuspend(arg); -+ break; -+ -+ case RMSIO_PRGRESUME: -+ res = rms_prgresume(arg); -+ break; -+ -+ case RMSIO_ELANINITDONE: -+ res = rms_elaninitdone(arg); -+ break; -+ -+ case RMSIO_PRGELANPIDS: -+ { -+ RMSIO_PRGELANPIDS_STRUCT args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_prgelanpids(args.id, args.maxpids, args.vps, args.pids, args.npids); -+ break; -+ } -+ -+ case RMSIO_SETELANSTATS: -+ { -+ RMSIO_SETELANSTATS_STRUCT args; -+ elanstats_t estats; -+ -+ if (copy_from_user(&args, (void *)arg, sizeof(args)) || -+ copy_from_user(&estats, (void *)args.estats, sizeof(estats))) -+ return(-EFAULT); -+ -+ res = rms_setelanstats(args.id, estats.ebytes, estats.exfers); -+ break; -+ } -+ -+ case RMSIO_MODVERSION: -+ { -+ RMSIO_MODVERSION_STRUCT args; -+ int version = rms_modversion(); -+ -+ if (copy_from_user (&args, (void *)arg, sizeof (args))) -+ return (-EFAULT); -+ -+ if (copyout(&version, args.version, sizeof(int))) -+ res = EFAULT; -+ else -+ res = ESUCCESS; -+ -+ break; -+ } -+ -+ default: -+ res = EINVAL; -+ break; -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+ -+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) || defined(CONFIG_X86_64) -+static int -+rms_ioctl32 (unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file) -+{ -+ int res; -+ -+ switch (cmd) -+ { -+ case RMSIO_GETPRGID32: -+ { -+ RMSIO_GETPRGID_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getprgid(args.pid, (int *)(unsigned long) args.idptr); -+ break; -+ } -+ -+ case RMSIO_GETCAP32: -+ { -+ RMSIO_GETCAP_STRUCT32 args; -+ -+ if (copy_from_user (&args, (void *) arg, sizeof (args))) -+ return (-EFAULT); -+ -+ res = rms_getcap(args.index, (ELAN_CAPABILITY *)(unsigned long) args.capptr); -+ break; -+ } -+ -+ default: -+ return (sys_ioctl (fd, cmd, arg)); -+ } -+ -+ return ((res == 0) ? 0 : -res); -+} -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/drivers/net/Kconfig -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/Kconfig 2005-06-28 12:23:55.000000000 -0400 -+++ linux-2.6.5-7.191/drivers/net/Kconfig 2005-07-28 14:52:52.926666880 -0400 -@@ -2491,6 +2491,8 @@ - - source "drivers/net/tokenring/Kconfig" - -+source "drivers/net/qsnet/Kconfig" -+ - config NET_FC - bool "Fibre Channel driver support" - depends on NETDEVICES && SCSI && PCI -Index: linux-2.6.5-7.191/drivers/net/Makefile -=================================================================== ---- linux-2.6.5-7.191.orig/drivers/net/Makefile 2005-06-28 12:23:50.000000000 -0400 -+++ linux-2.6.5-7.191/drivers/net/Makefile 2005-07-28 14:52:52.927666728 -0400 -@@ -196,3 +196,5 @@ - - obj-$(CONFIG_NETCONSOLE) += netconsole.o - obj-$(CONFIG_XPNET) += xpnet.o -+ -+obj-$(CONFIG_QSNET) += qsnet/ -Index: linux-2.6.5-7.191/fs/exec.c -=================================================================== ---- linux-2.6.5-7.191.orig/fs/exec.c 2005-06-28 12:24:23.000000000 -0400 -+++ linux-2.6.5-7.191/fs/exec.c 2005-07-28 14:52:52.928666576 -0400 -@@ -65,6 +65,8 @@ - #include - #endif - -+#include -+ - int core_uses_pid; - char core_pattern[65] = "core"; - int suid_dumpable = 0; -@@ -1202,6 +1204,9 @@ - if (retval < 0) - goto out; - -+ /* notify any ptrack callbacks of the process exec */ -+ ptrack_call_callbacks(PTRACK_PHASE_EXEC, NULL); -+ - retval = search_binary_handler(&bprm,regs); - if (retval >= 0) { - TRIG_EVENT(exec_hook, file->f_dentry->d_name.len, -Index: linux-2.6.5-7.191/fs/select.c -=================================================================== ---- linux-2.6.5-7.191.orig/fs/select.c 2005-06-28 12:24:00.000000000 -0400 -+++ linux-2.6.5-7.191/fs/select.c 2005-07-28 14:52:52.928666576 -0400 -@@ -649,3 +649,4 @@ - } - return -EIOCBRETRY; - } -+EXPORT_SYMBOL_GPL(sys_poll); -Index: linux-2.6.5-7.191/fs/read_write.c -=================================================================== ---- linux-2.6.5-7.191.orig/fs/read_write.c 2005-06-28 12:24:09.000000000 -0400 -+++ linux-2.6.5-7.191/fs/read_write.c 2005-07-28 14:52:52.929666424 -0400 -@@ -339,6 +339,7 @@ - - return ret; - } -+EXPORT_SYMBOL(sys_write); - - asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, - size_t count, loff_t pos) -Index: linux-2.6.5-7.191/include/elan/bitmap.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/bitmap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/bitmap.h 2005-07-28 14:52:52.929666424 -0400 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_BITMAP_H -+#define __QSNET_BITMAP_H -+ -+#ident "$Id: bitmap.h,v 1.5 2004/01/20 17:32:15 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/bitmap.h,v $ */ -+ -+typedef unsigned int bitmap_t; -+ -+#define BT_NBIPUL 32 /* n bits per bitmap_t */ -+#define BT_ULSHIFT 5 /* log 2 BT_NBIPUL to extract word index */ -+#define BT_ULMASK 0x1f /* to extract bit index */ -+ -+#define BT_WIM(bitmap,bitindex) ((bitmap)[(bitindex) >> BT_ULSHIFT]) /* word in map */ -+#define BT_BIW(bitindex) (1 << ((bitindex) & BT_ULMASK)) /* bit in word */ -+ -+/* BT_BITOUL -- n bits to n words */ -+#define BT_BITOUL(nbits) (((nbits) + BT_NBIPUL -1) / BT_NBIPUL) -+ -+#define BT_TEST(bitmap,bitindex) ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0) -+#define BT_SET(bitmap,bitindex) do { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); } while (0) -+#define BT_CLEAR(bitmap,bitindex) do { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); } while (0) -+ -+/* return first free bit in the bitmap, or -1 for failure */ -+extern int bt_freebit (bitmap_t *bitmap, int nbits); -+ -+/* return the index of the lowest set bit in the bitmap or -1 for failure */ -+extern int bt_lowbit (bitmap_t *bitmap, int nbits); -+ -+/* return the index of the next set/clear bit in the bitmap or -1 for failure */ -+extern int bt_nextbit (bitmap_t *bitmap, int nbits, int last, int isset); -+ -+/* copy/zero/fill/compare a bit map */ -+extern void bt_copy (bitmap_t *a, bitmap_t *b, int nbits); -+extern void bt_zero (bitmap_t *a, int nbits); -+extern void bt_fill (bitmap_t *a, int nbits); -+extern int bt_cmp (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* intersect bitmap 'a' with bitmap 'b' and return in 'a' */ -+extern void bt_intersect (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* remove/add bitmap 'b' from bitmap 'a' */ -+extern void bt_remove (bitmap_t *a, bitmap_t *b, int nbits); -+extern void bt_add (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* check whether bitmap 'a' spans bitmap 'b' */ -+extern int bt_spans (bitmap_t *a, bitmap_t *b, int nbits); -+ -+/* copy [base,base+nbits-1] from 'a' to 'b' */ -+extern void bt_subset (bitmap_t *a, bitmap_t *b, int base, int nbits); -+ -+/* find bits clear in 'a' and set in 'b', put result in 'c' */ -+extern void bt_up (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits); -+ -+/* find bits set in 'a' and clear in 'b', put result in 'c' */ -+extern void bt_down (bitmap_t *a, bitmap_t *b, bitmap_t *c, int nbits); -+ -+/* return number of bits set in bitmap */ -+extern int bt_nbits (bitmap_t *a, int nbits); -+ -+ -+#endif /* __QSNET_BITMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/capability.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/capability.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/capability.h 2005-07-28 14:52:52.930666272 -0400 -@@ -0,0 +1,197 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: capability.h,v 1.16 2004/07/20 10:15:33 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/capability.h,v $*/ -+ -+#ifndef __ELAN_CAPABILITY_H -+#define __ELAN_CAPABILITY_H -+ -+#include -+ -+/* Maximum number of rails */ -+#define ELAN_MAX_RAILS (31) -+/* Maximum number of virtual processes we support */ -+#define ELAN_MAX_VPS (16384) -+ -+/* Number of words in a bitmap capability */ -+#define ELAN_BITMAPSIZE BT_BITOUL(ELAN_MAX_VPS) -+ -+/* Guaranteed invalid values */ -+#define ELAN_INVALID_PROCESS (0x7fffffff) /* A GUARANTEED invalid process # */ -+#define ELAN_INVALID_NODE (0xFFFF) -+#define ELAN_INVALID_CONTEXT (0xFFFF) -+ -+/* Number of values in a user key */ -+#define ELAN_USERKEY_ENTRIES 4 -+ -+typedef void * ELAN_CAP_OWNER; -+ -+/* -+ * When used in userspace this is relative to the base of -+ * the capabality but is an absolute location for kernel space. -+ */ -+typedef struct elan_location -+{ -+ unsigned short loc_node; -+ unsigned short loc_context; -+} ELAN_LOCATION; -+ -+typedef struct elan_userkey -+{ -+ unsigned key_values[ELAN_USERKEY_ENTRIES]; -+} ELAN_USERKEY; -+ -+typedef struct elan_capability -+{ -+ ELAN_USERKEY cap_userkey; /* User defined protection */ -+ -+ int cap_version; /* Version number */ -+ unsigned short cap_type; /* Capability Type */ -+ unsigned short cap_spare; /* spare was cap_elan_type */ -+ -+ int cap_lowcontext; /* low context number in block */ -+ int cap_highcontext; /* high context number in block */ -+ int cap_mycontext; /* my context number */ -+ -+ int cap_lownode; /* low elan id of group */ -+ int cap_highnode; /* high elan id of group */ -+ -+ unsigned int cap_railmask; /* which rails this capability is valid for */ -+ -+ bitmap_t cap_bitmap[ELAN_BITMAPSIZE]; /* Bitmap of process to processor translation */ -+} ELAN_CAPABILITY; -+ -+#define ELAN_CAP_UNINITIALISED (-1) -+ -+#define ELAN_CAP_VERSION_NUMBER (0x00010002) -+ -+#define ELAN_CAP_NUM_NODES(cap) ((cap)->cap_highnode - (cap)->cap_lownode + 1) -+#define ELAN_CAP_NUM_CONTEXTS(cap) ((cap)->cap_highcontext - (cap)->cap_lowcontext + 1) -+ -+/* using or defining our own MIN/MAX had confilicts with dunix so we define ELAN_ ones */ -+#define ELAN_MIN(a,b) ((a) > (b) ? (b) : (a)) -+#define ELAN_MAX(a,b) ((a) > (b) ? (a) : (b)) -+#define ELAN_CAP_BITMAPSIZE(cap) (ELAN_MAX (ELAN_MIN (ELAN_CAP_NUM_NODES(cap) * ELAN_CAP_NUM_CONTEXTS(cap), ELAN_MAX_VPS), 0)) -+ -+#define ELAN_CAP_SIZE(cap) (offsetof (ELAN_CAPABILITY, cap_bitmap[BT_BITOUL(ELAN_CAP_BITMAPSIZE(cap))])) -+#define ELAN_CAP_ENTRIES(cap) (((cap)->cap_type & ELAN_CAP_TYPE_NO_BITMAP) ? ELAN_CAP_BITMAPSIZE((cap)) : bt_nbits((cap)->cap_bitmap, ELAN_CAP_BITMAPSIZE((cap)))) -+ -+#define ELAN_CAP_IS_RAIL_SET(cap,rail) ((cap)->cap_railmask & (1<cap_userkey.key_values[0] == (cap2)->cap_userkey.key_values[0] && \ -+ (cap1)->cap_userkey.key_values[1] == (cap2)->cap_userkey.key_values[1] && \ -+ (cap1)->cap_userkey.key_values[2] == (cap2)->cap_userkey.key_values[2] && \ -+ (cap1)->cap_userkey.key_values[3] == (cap2)->cap_userkey.key_values[3]) -+ -+#define ELAN_CAP_TYPE_MATCH(cap1,cap2) ((cap1)->cap_version == (cap2)->cap_version && \ -+ (cap1)->cap_type == (cap2)->cap_type) -+ -+#define ELAN_CAP_GEOM_MATCH(cap1,cap2) ((cap1)->cap_lowcontext == (cap2)->cap_lowcontext && \ -+ (cap1)->cap_highcontext == (cap2)->cap_highcontext && \ -+ (cap1)->cap_lownode == (cap2)->cap_lownode && \ -+ (cap1)->cap_highnode == (cap2)->cap_highnode && \ -+ (cap1)->cap_railmask == (cap2)->cap_railmask && \ -+ !bcmp (&(cap1)->cap_bitmap[0], &(cap2)->cap_bitmap[0], \ -+ BT_BITOUL(ELAN_CAP_BITMAPSIZE(cap1)*sizeof(bitmap_t)))) -+ -+#define ELAN_CAP_MATCH(cap1,cap2) (ELAN_CAP_KEY_MATCH (cap1, cap2) && \ -+ ELAN_CAP_TYPE_MATCH (cap1, cap2) && \ -+ ELAN_CAP_GEOM_MATCH (cap1, cap2)) -+ -+#define ELAN_CAP_VALID_MYCONTEXT(cap) ( ((cap)->cap_lowcontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_mycontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_highcontext != ELAN_CAP_UNINITIALISED) \ -+ && ((cap)->cap_lowcontext <= (cap)->cap_mycontext) \ -+ && ((cap)->cap_mycontext <= (cap)->cap_highcontext)) -+ -+/* -+ * Definitions for type -+ */ -+#define ELAN_CAP_TYPE_BLOCK 1 /* Block distribution */ -+#define ELAN_CAP_TYPE_CYCLIC 2 /* Cyclic distribution */ -+#define ELAN_CAP_TYPE_KERNEL 3 /* Kernel capability */ -+ -+#define ELAN_CAP_TYPE_MASK (0xFFF) /* Mask for type */ -+ -+/* OR these bits in for extra features */ -+#define ELAN_CAP_TYPE_HWTEST (1 << 12) /* Hardware test capability type */ -+#define ELAN_CAP_TYPE_MULTI_RAIL (1 << 13) /* "new" multi rail capability */ -+#define ELAN_CAP_TYPE_NO_BITMAP (1 << 14) /* don't use bit map */ -+#define ELAN_CAP_TYPE_BROADCASTABLE (1 << 15) /* broadcastable */ -+ -+ -+extern void elan_nullcap (ELAN_CAPABILITY *cap); -+extern char *elan_capability_string (ELAN_CAPABILITY *cap, char *str); -+extern ELAN_LOCATION elan_vp2location (unsigned process, ELAN_CAPABILITY *cap); -+extern int elan_location2vp (ELAN_LOCATION location, ELAN_CAPABILITY *cap); -+extern int elan_nvps (ELAN_CAPABILITY *cap); -+extern int elan_nlocal (int node, ELAN_CAPABILITY *cap); -+extern int elan_maxlocal (ELAN_CAPABILITY *cap); -+extern int elan_localvps (int node, ELAN_CAPABILITY *cap, int *vps, int size); -+extern int elan_nrails (ELAN_CAPABILITY *cap); -+extern int elan_rails (ELAN_CAPABILITY *cap, int *rails); -+extern int elan_cap_overlap (ELAN_CAPABILITY *cap1, ELAN_CAPABILITY *cap2); -+ -+/* -+ * capability creation/access fns provide for running -+ * new libelan code on old OS releases -+ */ -+extern int elan_lowcontext(ELAN_CAPABILITY *cap); -+extern int elan_mycontext(ELAN_CAPABILITY *cap); -+extern int elan_highcontext(ELAN_CAPABILITY *cap); -+extern int elan_lownode(ELAN_CAPABILITY *cap); -+extern int elan_highnode(ELAN_CAPABILITY *cap); -+extern int elan_captype(ELAN_CAPABILITY *cap); -+extern int elan_railmask(ELAN_CAPABILITY *cap); -+ -+extern int elan_getenvCap (ELAN_CAPABILITY *cap, int index); -+extern ELAN_CAPABILITY *elan_createCapability(void); -+extern ELAN_CAPABILITY *elan_copyCapability(ELAN_CAPABILITY *from, int ctxShift); -+extern int elan_generateCapability(char *string); -+ -+typedef struct elan_cap_struct -+{ -+ ELAN_CAP_OWNER owner; -+ ELAN_CAPABILITY cap; -+ -+ unsigned int attached; /* count of people attached */ -+ unsigned int active; /* ie not being destroyed */ -+} ELAN_CAP_STRUCT; -+ -+#if ! defined(__KERNEL__) -+extern void elan_get_random_key(ELAN_USERKEY *key); -+extern int elan_prefrails(ELAN_CAPABILITY *cap, int *pref, int nvp); -+#endif -+ -+#if defined(__KERNEL__) -+/* capability.c */ -+extern int elan_validate_cap (ELAN_CAPABILITY *cap); -+extern int elan_validate_map (ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+extern int elan_create_cap (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+extern int elan_destroy_cap (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+extern int elan_create_vp (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+extern int elan_destroy_vp (ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+typedef void (*ELAN_DESTROY_CB)(void *args, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+extern int elan_attach_cap (ELAN_CAPABILITY *cap, unsigned int rail, void *args, ELAN_DESTROY_CB callback); -+extern int elan_detach_cap (ELAN_CAPABILITY *cap, unsigned int rail); -+ -+extern int elan_get_caps (uint *number_of_results, uint array_size, ELAN_CAP_STRUCT *caps); -+extern int elan_cap_dump (void); -+#endif /* __KERNEL__ */ -+ -+ -+#endif /* __ELAN_CAPABILITY_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/cm.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/cm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/cm.h 2005-07-28 14:52:52.931666120 -0400 -@@ -0,0 +1,412 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_CM_H -+#define __ELAN_CM_H -+ -+#ident "@(#)$Id: cm.h,v 1.14.2.1 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/cm.h,v $*/ -+ -+#include -+ -+#if defined(DIGITAL_UNIX) -+/* -+ * On Tru64 - SMP doesn't mean Symmetric - cpu 0 is a master cpu and is responsible -+ * for handling all PCI interrupts and "funneled" operations. When a kernel thread -+ * is made runnable, the scheduler will choose which cpu it will run on at that time, -+ * and will only execute a higher priority thread from another cpu's run queue when -+ * it becomes totally idle (apparently also including user processes). Also the -+ * assert_wait_mesg_timo function uses a per-cpu timeout - these can only get executed -+ * at "preemptable" places - so again have no guarantee on when they will execute if -+ * they happen to be queued on a "hogged" cpu. The combination of these mean that the Tru64 -+ * is incapable of scheduling a high priority kernel thread within a deterministic time -+ * of when it should have become runnable - wonderfull. -+ * -+ * Hence the solution Compaq have proposed it to schedule a timeout onto all of the -+ * cpu's timeouts lists at the maximum frequency that we could want to execute code, -+ * then to handle the scheduling of work between these ourselves. With a bit of luck -+ * ..... at least one cpu will be sufficiently unloaded to allow us to get a chance -+ * to do our important work. -+ * -+ * However ..... this still is not reliable, since timeouts under Tru64 are still -+ * only run when the currently running kernel thread "co-operates" by calling one -+ * of a number of functions which is permitted to run the "lwc"s AND is not holding -+ * any spinlocks AND is running ai IPL 0. However Compaq are unable to provide -+ * any upper limit on the time between the "lwc"'s being run and so it is possible -+ * for all 4 cpus to not run them for an unbounded time. -+ * -+ * The solution proposed is to use the RM_TEMP_BACKDOOR hook which was added to -+ * hardclock() to "solve" this problem for Memory Channel. However, since it -+ * is called within the clock interrupt it is not permissible to aquire any -+ * spinlocks, nor to run for "too long". This means that it is not possible to -+ * call the heartbeat algorithm from this hook. -+ * -+ * Our solution to these limitations is to use the hook to cause an elan interrupt -+ * to be delivered, by issueing a mis-aligned SetEvent command - this causes the device -+ * to trap and ep_cprocTrap() can then run the heartbeat code. However there is a lock -+ * order violation between the elan_dev::IntrLock and ep_dev::Lock, so we have to -+ * use a trylock and if we fail, then hope that when the interrupt is delievered again -+ * some time later we will succeed. -+ * -+ * However this only works if the kernel is able to respond to the Elan interrupt, -+ * so we panic inside the RM_TEMP_BACKDOOR hook if the SetEvent's interrupt has -+ * not been taken for more than an CM_TIMER_SCHEDULE_TIMEOUT interval. -+ * -+ * In fact this is exactly the mechanism that other operating systems use to -+ * execute timeouts, since the hardclock interrupt posts a low priority -+ * "soft interrupt" which "pre-eempts" the currently running thread and then -+ * executes the timeouts.To block timeouts you use splsoftclock() the same as -+ * in Tru64. -+ */ -+#define PER_CPU_TIMEOUT TRUE -+#endif -+ -+ -+#define CM_SGMTS_PER_LEVEL 8 /* maximum nodes in each segment */ -+#define CM_MAX_LEVELS 6 /* maximum depth of tree */ -+ -+/* message buffers/dmas/events etc */ -+#define CM_NUM_NODE_MSG_BUFFERS (CM_MAX_LEVELS * CM_SGMTS_PER_LEVEL) /* subordinates and leader */ -+#define CM_NUM_SPARE_MSG_BUFFERS 8 /* spare msg buffers for non-connected nodes */ -+#define CM_NUM_MSG_BUFFERS (CM_NUM_NODE_MSG_BUFFERS + CM_NUM_SPARE_MSG_BUFFERS) -+ -+#define CM_INPUTQ_ENTRIES 128 /* # entries in input queue */ -+ -+#define CM_PERIODIC_DISCOVER_INTERVAL (5000) /* 5s (infrequent resolution of established leader conflicts) */ -+#define CM_URGENT_DISCOVER_INTERVAL (50) /* 0.05s (more frequently than heartbeats 'cause they don't retry) */ -+#define CM_HEARTBEAT_INTERVAL (125) /* 0.125s */ -+#define CM_TIMER_SCHEDULE_TIMEOUT (4000) /* 4s Maximum time before a timer that's secheduled to run gets to run (eg blocked in interrupt handlers etc) */ -+#define CM_THREAD_SCHEDULE_TIMEOUT (30000) /* 30s Maximum time before a thread that's scheduled to run gets to run */ -+#define CM_THREAD_RUNNING_TIMEOUT (30000) /* 30s Don't expect the manager thread to be running longer than this */ -+ -+#ifdef PER_CPU_TIMEOUT -+#define CM_PERCPU_TIMEOUT_INTERVAL (50) /* 0.05s (must be less than all above intervals) */ -+#define CM_PACEMAKER_INTERVAL (500) /* 0.05s */ -+ -+#define CM_HEARTBEAT_OVERDUE (250) /* 0.25s Maximum time a timeout can be overdue before taking extreme action */ -+#endif -+ -+#define CM_P2P_DMA_RETRIES 31 -+ -+/* We expect at least 1 point-to-point message in CM_P2P_MSG_RETRIES -+ * attempts to send one to be successfully received */ -+#define CM_P2P_MSG_RETRIES 8 -+ -+/* We expect at least 1 broadcast message in CM_BCAST_MSG_RETRIES attempts -+ * to send one to be successfully received. */ -+#define CM_BCAST_MSG_RETRIES 40 -+ -+/* Heartbeat timeout allows for a node stalling and still getting its -+ * heartbeat. The 2 is to allow for unsynchronised polling times. */ -+#define CM_HEARTBEAT_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_P2P_MSG_RETRIES) * CM_HEARTBEAT_INTERVAL) -+ -+/* Discover timeout must be > CM_HEARTBEAT_TIMEOUT to guarantee that people -+ * who don't see discovery are considered dead by their leader. This -+ * ensures that by the time a node "discovers" it is a leader of a segment, -+ * the previous leader of that segment will have been deemed to be dead by -+ * its the parent segment's leader */ -+#define CM_DISCOVER_TIMEOUT (CM_TIMER_SCHEDULE_TIMEOUT + (2 + CM_BCAST_MSG_RETRIES) * CM_URGENT_DISCOVER_INTERVAL) -+ -+#define CM_WAITING_TIMEOUT (CM_DISCOVER_TIMEOUT * 100) -+ -+/* -+ * Convert all timeouts specified in mS into "ticks" -+ */ -+#define MSEC2TICKS(MSEC) (((MSEC)*HZ)/1000) -+ -+ -+/* statemap entry */ -+typedef struct cm_state_entry -+{ -+ int16_t level; /* cluster level to apply to */ -+ int16_t offset; /* from statemap_findchange() */ -+ uint16_t seg[BT_NBIPUL/16]; /* ditto */ -+} CM_STATEMAP_ENTRY; -+ -+/* offset is >= 0 for a change to apply and */ -+#define STATEMAP_NOMORECHANGES (-1) /* end of a set of updates */ -+#define STATEMAP_RESET (-2) /* reset the target map */ -+#define STATEMAP_NOOP (-3) /* null token */ -+ -+/* CM message format */ -+typedef int8_t CM_SEQ; /* heartbeat sequence numbers; at least 2 bits, signed */ -+ -+/* -+ * The message header is received into the last 64 byte block of -+ * the input queue and the Version *MUST* be the last word of the -+ * block to ensure that we can see that the whole of the message -+ * has reached main memory after we've seen the input queue pointer -+ * have been updated. -+ */ -+typedef struct ep_cm_hdr -+{ -+ uint32_t Pad0; -+ uint32_t Pad1; -+ -+ uint8_t Type; -+ uint8_t Level; -+ CM_SEQ Seq; /* precision at least 2 bits each*/ -+ CM_SEQ AckSeq; -+ -+ uint16_t NumMaps; -+ uint16_t MachineId; -+ -+ uint16_t NodeId; -+ uint16_t Checksum; -+ -+ uint32_t Timestamp; -+ uint32_t ParamHash; -+ uint32_t Version; -+} CM_HDR; -+ -+#define CM_HDR_SIZE sizeof (CM_HDR) -+ -+typedef struct cm_msg -+{ -+ union { -+ CM_STATEMAP_ENTRY Statemaps[1]; /* piggy-backed statemap updates start here */ -+ uint8_t Space[EP_SYSTEMQ_MSG_MAX - CM_HDR_SIZE]; -+ } Payload; -+ -+ CM_HDR Hdr; -+} CM_MSG; -+ -+/* The maximum number of statemap entries that can fit within an EP_CM_MSG_BUFFER */ -+#define CM_MSG_MAXMAPS (offsetof (CM_MSG, Hdr) / sizeof (CM_STATEMAP_ENTRY)) -+#define CM_MSG_MAP(mapno) (CM_MSG_MAXMAPS - (mapno) - 1) -+ -+/* The actual special message base & size, including 'nmaps' piggy-backed statemap entries */ -+#define CM_MSG_BASE(nmaps) (nmaps == 0 ? offsetof (CM_MSG, Hdr) : offsetof (CM_MSG, Payload.Statemaps[CM_MSG_MAXMAPS - nmaps])) -+#define CM_MSG_SIZE(nmaps) (sizeof (CM_MSG) - CM_MSG_BASE(nmaps)) -+ -+#define CM_MSG_VERSION 0xcad00005 -+#define CM_MSG_TYPE_RESOLVE_LEADER 0 -+#define CM_MSG_TYPE_DISCOVER_LEADER 1 -+#define CM_MSG_TYPE_NOTIFY 2 -+#define CM_MSG_TYPE_DISCOVER_SUBORDINATE 3 -+#define CM_MSG_TYPE_IMCOMING 4 -+#define CM_MSG_TYPE_HEARTBEAT 5 -+#define CM_MSG_TYPE_REJOIN 6 -+ -+/* CM machine segment */ -+typedef struct cm_sgmtMaps -+{ -+ u_char InputMapValid; /* Input map has been set */ -+ u_char OutputMapValid; /* Output map has been set */ -+ u_char SentChanges; /* got an outstanding STATEMAP_NOMORECHANGES to send */ -+ statemap_t *OutputMap; /* state to send */ -+ statemap_t *InputMap; /* state received */ -+ statemap_t *CurrentInputMap; /* state being received */ -+} CM_SGMTMAPS; -+ -+typedef struct cm_sgmt -+{ -+ u_char State; -+ u_char SendMaps; -+ u_char MsgAcked; -+ CM_SEQ MsgSeq; -+ CM_SEQ AckSeq; -+ u_int NodeId; -+ long UpdateTick; -+ long WaitingTick; -+ uint32_t Timestamp; -+ CM_SGMTMAPS Maps[CM_MAX_LEVELS]; /* Maps[i] == state for cluster level i */ -+ u_short MsgNumber; /* msg buffer to use */ -+ u_short NumMaps; /* # maps in message buffer */ -+ u_short Level; -+ u_short Sgmt; -+} CM_SGMT; -+ -+#define CM_SGMT_ABSENT 0 /* no one there at all */ -+#define CM_SGMT_WAITING 1 /* waiting for subtree to connect */ -+#define CM_SGMT_COMING 2 /* expecting a subtree to reconnect */ -+#define CM_SGMT_PRESENT 3 /* connected */ -+ -+typedef struct cm_level -+{ -+ int SwitchLevel; -+ u_int MinNodeId; -+ u_int NumNodes; -+ u_int NumSegs; -+ u_int MySgmt; -+ -+ /* SubordinateMap[i] == OR of all subordinate maps on this level and down for cluster level i */ -+ u_char SubordinateMapValid[CM_MAX_LEVELS]; -+ statemap_t *SubordinateMap[CM_MAX_LEVELS]; -+ -+ /* maps/flags for this cluster level */ -+ u_int Online:1; /* I've gone online (seen myself running) */ -+ u_int Restarting:1; /* driving my owm restart bit */ -+ u_char OfflineReasons; /* forced offline by broadcast */ -+ -+ u_char GlobalMapValid; -+ u_char SubTreeMapValid; -+ u_long Connected; -+ -+ statemap_t *LocalMap; /* state bits I drive */ -+ statemap_t *SubTreeMap; /* OR of my and my subtree states */ -+ statemap_t *GlobalMap; /* OR of all node states */ -+ statemap_t *LastGlobalMap; /* last map I saw */ -+ statemap_t *TmpMap; /* scratchpad */ -+ -+ CM_SGMT Sgmts[CM_SGMTS_PER_LEVEL]; -+} CM_LEVEL; -+ -+#define CM_ROLE_LEADER_CANDIDATE 0 -+#define CM_ROLE_LEADER 1 -+#define CM_ROLE_SUBORDINATE 2 -+ -+/* global status bits */ -+#define CM_GSTATUS_STATUS_MASK 0x03 /* bits nodes drive to broadcast their status */ -+#define CM_GSTATUS_ABSENT 0x00 /* Off the network */ -+#define CM_GSTATUS_STARTING 0x01 /* I'm waiting for everyone to see me online */ -+#define CM_GSTATUS_RUNNING 0x03 /* up and running */ -+#define CM_GSTATUS_CLOSING 0x02 /* I'm waiting for everyone to see me offline */ -+ -+#define CM_GSTATUS_ACK_MASK 0x0c /* bits node drive to ack other status */ -+#define CM_GSTATUS_MAY_START 0x04 /* Everyone thinks I may not start */ -+#define CM_GSTATUS_MAY_RUN 0x08 /* Everyone thinks I may not run */ -+ -+#define CM_GSTATUS_RESTART 0x10 /* Someone thinks I should restart */ -+#define CM_GSTATUS_BITS 5 -+ -+#define CM_GSTATUS_BASE(node) ((node) * CM_GSTATUS_BITS) -+ -+#if defined(PER_CPU_TIMEOUT) -+typedef struct cm_timeout_data -+{ -+ long ScheduledAt; /* lbolt timeout was scheduled to run at */ -+ -+ unsigned long EarlyCount; /* # times run early than NextRun */ -+ unsigned long MissedCount; /* # times run on time - but someone else was running it */ -+ unsigned long WastedCount; /* # times we failed to get the spinlock */ -+ unsigned long WorkCount; /* # times we're the one running */ -+ -+ unsigned long WorstDelay; /* worst scheduling delay */ -+ unsigned long BestDelay; /* best scheduling delay */ -+ -+ unsigned long WorstLockDelay; /* worst delay before getting rail->Lock */ -+ -+ unsigned long WorstHearbeatDelay; /* worst delay before calling DoHeartbeatWork */ -+} CM_TIMEOUT_DATA; -+#endif -+ -+typedef struct cm_rail -+{ -+ EP_RAIL *Rail; /* rail we're associated with */ -+ struct list_head Link; /* and linked on the CM_SUBSYS */ -+ -+ uint32_t ParamHash; /* hash of critical parameters */ -+ uint32_t Timestamp; -+ long DiscoverStartTick; /* when discovery start */ -+ -+ unsigned int NodeId; /* my node id */ -+ unsigned int NumNodes; /* and number of nodes */ -+ unsigned int NumLevels; /* number of levels computed from machine size */ -+ int BroadcastLevel; -+ long BroadcastLevelTick; -+ unsigned int TopLevel; /* level at which I'm not a leader */ -+ unsigned char Role; /* state at TopLevel */ -+ -+ EP_INPUTQ *PolledQueue; /* polled input queue */ -+ EP_INPUTQ *IntrQueue; /* intr input queue */ -+ EP_OUTPUTQ *MsgQueue; /* message */ -+ unsigned int NextSpareMsg; /* next "spare" message buffer to use */ -+ -+ EP_CM_RAIL_STATS Stats; /* statistics */ -+ -+ kmutex_t Mutex; -+ spinlock_t Lock; -+ -+ long NextHeartbeatTime; /* next time to check/send heartbeats */ -+ long NextDiscoverTime; /* next time to progress discovery */ -+ long NextRunTime; /* the earlier of the above two or intr requires inputq poll*/ -+ -+ unsigned int OfflineReasons; /* forced offline by procfs/manager thread stuck */ -+ -+#if defined(PER_CPU_TIMEOUT) -+ spinlock_t HeartbeatTimeoutsLock; /* spinlock to sequentialise per-cpu timeouts */ -+ long HeartbeatTimeoutsStarted; /* bitmap of which timeouts have started */ -+ long HeartbeatTimeoutsStopped; /* bitmap of which timeouts have stopped */ -+ long HeartbeatTimeoutsShouldStop; /* flag to indicate timeouts should stop */ -+ kcondvar_t HeartbeatTimeoutsWait; /* place to sleep waiting for timeouts to stop */ -+ long HeartbeatTimeoutRunning; /* someone is running the timeout - don't try for the lock */ -+ -+ long HeartbeatTimeoutOverdue; /* heartbeat seen as overdue - interrupt requested */ -+ -+ CM_TIMEOUT_DATA *HeartbeatTimeoutsData; /* per timeout data */ -+#else -+ struct timer_list HeartbeatTimer; /* timer for heartbeat/discovery */ -+#endif -+ -+ CM_LEVEL Levels[CM_MAX_LEVELS]; -+} CM_RAIL; -+ -+/* OfflineReasons (both per-rail and */ -+#define CM_OFFLINE_BROADCAST (1 << 0) -+#define CM_OFFLINE_PROCFS (1 << 1) -+#define CM_OFFLINE_MANAGER (1 << 2) -+ -+typedef struct cm_subsys -+{ -+ EP_SUBSYS Subsys; -+ CM_RAIL *Rails[EP_MAX_RAILS]; -+} CM_SUBSYS; -+ -+extern int MachineId; -+ -+extern void cm_node_disconnected (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_node (EP_RAIL *rail, unsigned nodeId); -+extern void cm_restart_comms (CM_RAIL *cmRail); -+extern int cm_init (EP_SYS *sys); -+ -+extern void DisplayRail(EP_RAIL *rail); -+extern void DisplaySegs (EP_RAIL *rail); -+extern void DisplayStatus (EP_RAIL *rail); -+ -+typedef struct proc_private -+{ -+ struct nodeset_private *pr_next; -+ EP_RAIL *pr_rail; -+ char *pr_data; -+ int pr_data_len; -+ unsigned pr_off; -+ unsigned pr_len; -+ DisplayInfo pr_di; -+} PROC_PRIVATE; -+ -+extern void proc_character_fill (long mode, char *fmt, ...); -+extern int proc_release (struct inode *inode, struct file *file); -+extern ssize_t proc_read (struct file *file, char *buf, size_t count, loff_t *ppos); -+ -+ -+extern void DisplayNodeMaps (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayNodeSgmts (DisplayInfo *di, CM_RAIL *cmRail); -+extern void DisplayRailDo (DisplayInfo *di, EP_RAIL *rail); -+ -+extern int cm_read_cluster(EP_RAIL *rail,char *page); -+extern void cm_force_offline (EP_RAIL *rail, int offline, unsigned int reason); -+ -+extern int cm_svc_indicator_set (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_clear (EP_RAIL *rail, int svc_indicator); -+extern int cm_svc_indicator_is_set (EP_RAIL *rail, int svc_indicator, int nodeId); -+extern int cm_svc_indicator_bitmap (EP_RAIL *rail, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+/* cm_procfs.c */ -+extern void cm_procfs_init (CM_SUBSYS *subsys); -+extern void cm_procfs_fini (CM_SUBSYS *subsys); -+extern void cm_procfs_rail_init (CM_RAIL *rail); -+extern void cm_procfs_rail_fini (CM_RAIL *rail); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_CM_H */ -+ -Index: linux-2.6.5-7.191/include/elan/compat.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/compat.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/compat.h 2005-07-28 14:52:52.931666120 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: compat.h,v 1.1 2003/12/03 13:18:48 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/compat.h,v $*/ -+ -+#ifndef __ELAN_COMPAT_H -+#define __ELAN_COMPAT_H -+ -+#define ELANMOD_STATS_MAP ELAN_STATS_MAP -+ -+#endif /* __ELAN_COMPAT_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/device.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/device.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/device.h 2005-07-28 14:52:52.932665968 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: device.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/device.h,v $*/ -+ -+#ifndef __ELAN_DEVICE_H -+#define __ELAN_DEVICE_H -+ -+/* non-kernel headings */ -+typedef unsigned int ELAN_DEV_IDX; -+ -+#if defined(__KERNEL__) -+ -+/* device callbacks */ -+#define ELAN_DEV_OPS_VERSION ((u_int)1) -+ -+typedef struct elan_dev_ops -+{ -+ /* dev info */ -+ int (*get_position) (void *user_data, ELAN_POSITION *position); -+ int (*set_position) (void *user_data, unsigned short nodeId, unsigned short numNodes); -+ -+ /* cap */ -+ -+ u_int ops_version; -+} ELAN_DEV_OPS; -+ -+typedef struct elan_dev_struct -+{ -+ struct list_head node; -+ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; -+ void *user_data; -+ ELAN_DEV_OPS *ops; -+} ELAN_DEV_STRUCT; -+ -+/* device.c */ -+extern ELAN_DEV_IDX elan_dev_register (ELAN_DEVINFO *devinfo, -+ ELAN_DEV_OPS *ops, -+ void *userdata); -+extern int elan_dev_deregister (ELAN_DEVINFO *devinfo); -+ -+extern ELAN_DEV_STRUCT * elan_dev_find (ELAN_DEV_IDX devidx); -+ -+extern ELAN_DEV_STRUCT * elan_dev_find_byrail(unsigned short deviceid, unsigned rail); -+extern int elan_dev_dump (void); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_DEVICE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/devinfo.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/devinfo.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/devinfo.h 2005-07-28 14:52:52.932665968 -0400 -@@ -0,0 +1,92 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: devinfo.h,v 1.11.2.1 2005/02/01 12:36:40 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/devinfo.h,v $*/ -+ -+#ifndef __ELAN_DEVINFO_H -+#define __ELAN_DEVINFO_H -+ -+#define ELAN_MAX_LEVELS 8 /* maximum number of levels in switch network */ -+ -+typedef struct elan_position -+{ -+ unsigned pos_mode; /* mode we're operating in */ -+ unsigned pos_nodeid; /* port this device connected to */ -+ unsigned pos_levels; /* number of levels to top switch */ -+ unsigned pos_nodes; /* number of nodes in the machine */ -+ unsigned pos_random_disabled; /* levels at which "random" routing is not possible */ -+ unsigned char pos_arity[ELAN_MAX_LEVELS]; /* number of downlinks per switch level */ -+} ELAN_POSITION; -+ -+#define ELAN4_PARAM_PCI_PADDING_FLAGS 0 /* A bit field, representing good places to burst across the pci */ -+#define ELAN4_PARAM_EVENT_COPY_WIN 1 /* The num of cmds when it becomes quicker to send via event copy than write directly */ -+#define ELAN4_PARAM_WRITE_COMBINING 2 /* If set the device supports bursts accesses across the pci bus */ -+#define ELAN4_PARAM_DRIVER_FEATURES 11 /* device driver features */ -+#define ELAN4_PARAM_COUNT 12 -+ -+/* values for ELAN4_PARAM_DRIVER_FEATURES, dev_features */ -+#define ELAN4_FEATURE_PCI_MAP (1 << 0) /* must use pci mapping functions */ -+#define ELAN4_FEATURE_64BIT_READ (1 << 1) /* must perform 64 bit PIO reads */ -+#define ELAN4_FEATURE_PIN_DOWN (1 << 2) /* must pin down pages */ -+#define ELAN4_FEATURE_NO_WRITE_COMBINE (1 << 3) /* don't allow write combinig at all */ -+#define ELAN4_FEATURE_NO_IOPROC (1 << 4) /* unpatched kernel or disabled by procfs */ -+#define ELAN4_FEATURE_NO_IOPROC_UPDATE (1 << 5) /* don't do coproc update xlation loading */ -+#define ELAN4_FEATURE_NO_PAGEFAULT (1 << 6) /* don't do pagefaulting */ -+#define ELAN4_FEATURE_NO_PREFETCH (1 << 7) /* don't allow prefetching of elan sdram/cports */ -+ -+typedef struct elan_params -+{ -+ unsigned values[ELAN4_PARAM_COUNT]; -+} ELAN_PARAMS; -+ -+/* values for pos_mode */ -+#define ELAN_POS_UNKNOWN 0 /* network position unknown */ -+#define ELAN_POS_MODE_SWITCHED 1 /* connected to a switch */ -+#define ELAN_POS_MODE_LOOPBACK 2 /* loopback connector */ -+#define ELAN_POS_MODE_BACKTOBACK 3 /* cabled back-to-back to another node */ -+ -+typedef struct elan_devinfo -+{ -+ unsigned short dev_vendor_id; /* pci vendor id */ -+ unsigned short dev_device_id; /* pci device id */ -+ unsigned char dev_revision_id; /* pci revision id */ -+ unsigned char dev_instance; /* device instance number */ -+ unsigned char dev_rail; /* device rail number */ -+ -+ unsigned short dev_driver_version; /* device driver version */ -+ unsigned short dev_params_mask; /* mask for valid entries in dev_params array */ -+ ELAN_PARAMS dev_params; /* device parametization */ -+ -+ unsigned dev_num_down_links_value; /* MRH hint as to machine size NEEDS coding XXXXX */ -+} ELAN_DEVINFO; -+ -+#define PCI_VENDOR_ID_QUADRICS 0x14fc -+#define PCI_DEVICE_ID_ELAN3 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVB 0x0001 -+#define PCI_DEVICE_ID_ELAN4 0x0001 -+#define PCI_REVISION_ID_ELAN4_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN4_REVB 0x0001 -+ -+#if defined(__KERNEL__) -+/* devinfo.c */ -+#include -+#include -+extern int elan_get_devinfo (ELAN_DEV_IDX devidx, ELAN_DEVINFO *devinfo); -+extern int elan_get_position (ELAN_DEV_IDX devidx, ELAN_POSITION *position); -+extern int elan_set_position (ELAN_DEV_IDX devidx, unsigned short nodeId, unsigned short numNodes); -+#endif /* __KERNEL__ */ -+ -+ -+#endif /* __ELAN_DEVINFO_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/elanmoddebug.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/elanmoddebug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/elanmoddebug.h 2005-07-28 14:52:52.932665968 -0400 -@@ -0,0 +1,63 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN_DEBUG_H -+#define _ELAN_DEBUG_H -+ -+ -+#ident "$Id: elanmoddebug.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmoddebug.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+/* 0 | QSNET_DEBUG_BUFFER | QSNET_DEBUG_CONSOLE */ -+extern int elan_debug_mode; -+extern int elan_debug_mask; -+ -+#define ELAN_DBG_VP 0x00000001 -+#define ELAN_DBG_CAP 0x00000002 -+#define ELAN_DBG_CTRL 0x00000004 -+#define ELAN_DBG_SYS_FN 0x00000008 -+#define ELAN_DBG_ALL 0xffffffff -+ -+ -+#if defined(DEBUG_PRINTF) -+# define ELAN_DEBUG0(m,fmt) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt) : (void)0) -+# define ELAN_DEBUG1(m,fmt,a) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a) : (void)0) -+# define ELAN_DEBUG2(m,fmt,a,b) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b) : (void)0) -+# define ELAN_DEBUG3(m,fmt,a,b,c) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c) : (void)0) -+# define ELAN_DEBUG4(m,fmt,a,b,c,d) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d) : (void)0) -+# define ELAN_DEBUG5(m,fmt,a,b,c,d,e) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d,e) : (void)0) -+# define ELAN_DEBUG6(m,fmt,a,b,c,d,e,f) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode,fmt,a,b,c,d,e,f) : (void)0) -+#ifdef __GNUC__ -+# define ELAN_DEBUG(m,args...) ((elan_debug_mask&(m)) ? qsnet_debugf(elan_debug_mode, ##args) : (void)0) -+#endif -+ -+#else -+ -+# define ELAN_DEBUG0(m,fmt) (0) -+# define ELAN_DEBUG1(m,fmt,a) (0) -+# define ELAN_DEBUG2(m,fmt,a,b) (0) -+# define ELAN_DEBUG3(m,fmt,a,b,c) (0) -+# define ELAN_DEBUG4(m,fmt,a,b,c,d) (0) -+# define ELAN_DEBUG5(m,fmt,a,b,c,d,e) (0) -+# define ELAN_DEBUG6(m,fmt,a,b,c,d,e,f) (0) -+#ifdef __GNUC__ -+# define ELAN_DEBUG(m,args...) -+#endif -+ -+#endif /* DEBUG_PRINTF */ -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ELAN_DEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/elanmod.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/elanmod.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/elanmod.h 2005-07-28 14:52:52.932665968 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod.h,v 1.10 2004/06/18 09:28:16 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod.h,v $*/ -+ -+#ifndef __ELAN_MOD_H -+#define __ELAN_MOD_H -+ -+#include -+#include -+#include -+#include -+ -+#if defined(__KERNEL__) -+ -+#include -+ -+extern kmutex_t elan_mutex; -+ -+/* elan_general.c */ -+extern int elan_init(void); -+extern int elan_fini(void); -+ -+/* return codes, -ve => errno, +ve => success */ -+#define ELAN_CAP_OK (0) -+#define ELAN_CAP_RMS (1) -+ -+#define ELAN_USER_ATTACH (1) -+#define ELAN_USER_DETACH (2) -+#define ELAN_USER_P2P (3) -+#define ELAN_USER_BROADCAST (4) -+ -+extern int elanmod_classify_cap (ELAN_POSITION *position, ELAN_CAPABILITY *cap, unsigned use); -+ -+#define ELAN_USER_BASE_CONTEXT_NUM 0x000 /* first user allowable context */ -+#define ELAN_USER_TOP_CONTEXT_NUM 0x7FF /* last user allowable context */ -+ -+#define ELAN_RMS_BASE_CONTEXT_NUM 0x400 /* reserved for RMS allocation */ -+#define ELAN_RMS_TOP_CONTEXT_NUM 0x7FF -+ -+#define ELAN_USER_CONTEXT(ctx) ((ctx) >= ELAN_USER_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN_USER_TOP_CONTEXT_NUM) -+ -+#define ELAN_RMS_CONTEXT(ctx) ((ctx) >= ELAN_RMS_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN_RMS_TOP_CONTEXT_NUM) -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_MOD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/elanmod_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/elanmod_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/elanmod_linux.h 2005-07-28 14:52:52.933665816 -0400 -@@ -0,0 +1,140 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: elanmod_linux.h,v 1.6 2003/09/29 15:36:20 mike Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/elanmod_linux.h,v $*/ -+ -+#ifndef __ELAN_MOD_LINUX_H -+#define __ELAN_MOD_LINUX_H -+ -+#define ELANCRTL_USER_BASE 0x40 -+ -+/* stats */ -+typedef struct elanctrl_stats_get_next_struct -+{ -+ ELAN_STATS_IDX statidx; -+ ELAN_STATS_IDX *next_statidx; /* return value */ -+} ELANCTRL_STATS_GET_NEXT_STRUCT; -+#define ELANCTRL_STATS_GET_NEXT _IOR ('e', ELANCRTL_USER_BASE + 0, ELANCTRL_STATS_GET_NEXT_STRUCT) -+ -+typedef struct elanctrl_stats_find_index_struct -+{ -+ caddr_t block_name; -+ ELAN_STATS_IDX *statidx; /* return value */ -+ uint *num_entries; /* return value */ -+} ELANCTRL_STATS_FIND_INDEX_STRUCT; -+#define ELANCTRL_STATS_FIND_INDEX _IOR ('e', ELANCRTL_USER_BASE + 1, ELANCTRL_STATS_FIND_INDEX_STRUCT) -+ -+typedef struct elanctrl_stats_get_block_info_struct -+{ -+ ELAN_STATS_IDX statidx; -+ caddr_t block_name; /* return value */ -+ uint *num_entries; /* return value */ -+} ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT; -+#define ELANCTRL_STATS_GET_BLOCK_INFO _IOR ('e', ELANCRTL_USER_BASE + 2, ELANCTRL_STATS_GET_BLOCK_INFO_STRUCT) -+ -+typedef struct elanctrl_stats_get_index_name_struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint index; -+ caddr_t name; /* return value */ -+} ELANCTRL_STATS_GET_INDEX_NAME_STRUCT; -+#define ELANCTRL_STATS_GET_INDEX_NAME _IOR ('e', ELANCRTL_USER_BASE + 3, ELANCTRL_STATS_GET_INDEX_NAME_STRUCT) -+ -+typedef struct elanctrl_stats_clear_block_struct -+{ -+ ELAN_STATS_IDX statidx; -+} ELANCTRL_STATS_CLEAR_BLOCK_STRUCT; -+#define ELANCTRL_STATS_CLEAR_BLOCK _IOR ('e', ELANCRTL_USER_BASE + 4, ELANCTRL_STATS_CLEAR_BLOCK_STRUCT) -+ -+typedef struct elanctrl_stats_get_block_struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint entries; -+ ulong *values; /* return values */ -+} ELANCTRL_STATS_GET_BLOCK_STRUCT; -+#define ELANCTRL_STATS_GET_BLOCK _IOR ('e', ELANCRTL_USER_BASE + 5, ELANCTRL_STATS_GET_BLOCK_STRUCT) -+ -+ -+typedef struct elanctrl_get_devinfo_struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; /* return values */ -+} ELANCTRL_GET_DEVINFO_STRUCT; -+#define ELANCTRL_GET_DEVINFO _IOR ('e', ELANCRTL_USER_BASE + 6, ELANCTRL_GET_DEVINFO_STRUCT) -+ -+typedef struct elanctrl_get_position_struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_POSITION *position; /* return values */ -+} ELANCTRL_GET_POSITION_STRUCT; -+#define ELANCTRL_GET_POSITION _IOR ('e', ELANCRTL_USER_BASE + 7, ELANCTRL_GET_POSITION_STRUCT) -+ -+typedef struct elanctrl_set_position_struct -+{ -+ ELAN_DEV_IDX devidx; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} ELANCTRL_SET_POSITION_STRUCT; -+#define ELANCTRL_SET_POSITION _IOR ('e', ELANCRTL_USER_BASE + 8, ELANCTRL_SET_POSITION_STRUCT) -+ -+typedef struct elanctrl_create_cap_struct -+{ -+ ELAN_CAPABILITY cap; -+} ELANCTRL_CREATE_CAP_STRUCT; -+#define ELANCTRL_CREATE_CAP _IOW ('e', ELANCRTL_USER_BASE + 9, ELANCTRL_CREATE_CAP_STRUCT) -+ -+typedef struct elanctrl_destroy_cap_struct -+{ -+ ELAN_CAPABILITY cap; -+} ELANCTRL_DESTROY_CAP_STRUCT; -+#define ELANCTRL_DESTROY_CAP _IOW ('e', ELANCRTL_USER_BASE + 10, ELANCTRL_DESTROY_CAP_STRUCT) -+ -+typedef struct elanctrl_create_vp_struct -+{ -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} ELANCTRL_CREATE_VP_STRUCT; -+#define ELANCTRL_CREATE_VP _IOW ('e', ELANCRTL_USER_BASE + 11, ELANCTRL_CREATE_VP_STRUCT) -+ -+typedef struct elanctrl_destroy_vp_struct -+{ -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} ELANCTRL_DESTROY_VP_STRUCT; -+#define ELANCTRL_DESTROY_VP _IOW ('e', ELANCRTL_USER_BASE + 12, ELANCTRL_DESTROY_VP_STRUCT) -+ -+#define ELANCTRL_DEBUG_DUMP _IO ('e', ELANCRTL_USER_BASE + 13) -+ -+typedef struct elanctrl_get_caps_struct -+{ -+ uint *number_of_results; -+ uint array_size; -+ ELAN_CAP_STRUCT *caps; -+} ELANCTRL_GET_CAPS_STRUCT; -+#define ELANCTRL_GET_CAPS _IOW ('e', ELANCRTL_USER_BASE + 14, ELANCTRL_GET_CAPS_STRUCT) -+ -+ -+typedef struct elanctrl_debug_buffer_struct -+{ -+ caddr_t buffer; -+ int size; -+} ELANCTRL_DEBUG_BUFFER_STRUCT; -+#define ELANCTRL_DEBUG_BUFFER _IOW ('e', ELANCRTL_USER_BASE + 15, ELANCTRL_DEBUG_BUFFER_STRUCT) -+ -+#define ELANMOD_PROCFS_IOCTL "/proc/qsnet/elan/ioctl" -+#define ELANMOD_PROCFS_VERSION "/proc/qsnet/elan/version" -+#define ELANMOD_PROCFS_DEBUG_MASK "/proc/qsnet/elan/debug_mask" -+#define ELANMOD_PROCFS_DEBUG_MODE "/proc/qsnet/elan/debug_mode" -+ -+#endif /* __ELAN_MOD_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/elanmod_subsystem.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/elanmod_subsystem.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/elanmod_subsystem.h 2005-07-28 14:52:52.933665816 -0400 -@@ -0,0 +1,138 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_SUBSYSTEM_H -+#define __ELAN_SUBSYSTEM_H -+ -+#include -+#include -+ -+#if defined( __KERNEL__) -+int elan_configure( -+ cfg_op_t op, -+ caddr_t indata, -+ ulong indata_size, -+ caddr_t outdata, -+ ulong outdata_size); -+#endif -+ -+#define ELAN_KMOD_CODE(x) ((x)+CFG_OP_SUBSYS_MIN) -+#define ELAN_MAX_KMOD_CODES 100 -+ -+#define ELAN_SUBSYS "elan" -+ -+#define ELAN_STATS_GET_NEXT 0x01 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ ELAN_STATS_IDX *next_statidx; -+} elan_stats_get_next_struct; -+ -+ -+#define ELAN_STATS_FIND_INDEX 0x02 -+typedef struct { -+ caddr_t block_name; -+ ELAN_STATS_IDX *statidx; /* return value */ -+ uint *num_entries; /* return value */ -+} elan_stats_find_index_struct; -+ -+#define ELAN_STATS_GET_BLOCK_INFO 0x03 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ caddr_t block_name; /* return value */ -+ uint *num_entries; /* return value */ -+} elan_stats_get_block_info_struct; -+ -+#define ELAN_STATS_GET_INDEX_NAME 0x04 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+ uint index; -+ caddr_t name; /* return value */ -+} elan_stats_get_index_name_struct; -+ -+#define ELAN_STATS_CLEAR_BLOCK 0x05 -+typedef struct { -+ ELAN_STATS_IDX statidx; -+} elan_stats_clear_block_struct; -+ -+#define ELAN_STATS_GET_BLOCK 0x06 -+typedef struct -+{ -+ ELAN_STATS_IDX statidx; -+ uint entries; -+ ulong *values; /* return values */ -+} elan_stats_get_block_struct; -+ -+#define ELAN_GET_DEVINFO 0x07 -+typedef struct -+{ -+ ELAN_DEV_IDX devidx; -+ ELAN_DEVINFO *devinfo; /* return values */ -+} elan_get_devinfo_struct; -+ -+#define ELAN_GET_POSITION 0x08 -+typedef struct { -+ ELAN_DEV_IDX devidx; -+ ELAN_POSITION *position; /* return values */ -+} elan_get_position_struct; -+ -+#define ELAN_SET_POSITION 0x09 -+typedef struct { -+ ELAN_DEV_IDX devidx; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} elan_set_position_struct; -+ -+#define ELAN_CREATE_CAP 0x0a -+typedef struct { -+ ELAN_CAPABILITY cap; -+} elan_create_cap_struct; -+ -+#define ELAN_DESTROY_CAP 0x0b -+typedef struct { -+ ELAN_CAPABILITY cap; -+} elan_destroy_cap_struct; -+ -+#define ELAN_CREATE_VP 0x0c -+typedef struct { -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} elan_create_vp_struct; -+ -+#define ELAN_DESTROY_VP 0x0d -+typedef struct { -+ ELAN_CAPABILITY cap; -+ ELAN_CAPABILITY map; -+} elan_destroy_vp_struct; -+ -+ -+#define ELAN_DEBUG_DUMP 0x0e -+ -+#define ELAN_GET_CAPS 0x0f -+typedef struct { -+ uint *number_of_results; -+ uint array_size; -+ ELAN_CAP_STRUCT *caps; -+} elan_get_caps_struct; -+ -+#define ELAN_DEBUG_BUFFER 0x10 -+typedef struct { -+ caddr_t addr; -+ int len; -+} elan_debug_buffer_struct; -+ -+#define ELANMOD_PROCFS_IOCTL "/proc/qsnet/elan/ioctl" -+#define ELANMOD_PROCFS_VERSION "/proc/qsnet/elan/version" -+#define ELANMOD_PROCFS_DEBUG_MASK "/proc/qsnet/elan/debug_mask" -+#define ELANMOD_PROCFS_DEBUG_MODE "/proc/qsnet/elan/debug_mode" -+ -+#endif /* __ELAN_SUBSYSTEM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/epcomms.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/epcomms.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/epcomms.h 2005-07-28 14:52:52.935665512 -0400 -@@ -0,0 +1,635 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_EPCOMMS_H -+#define __ELAN_EPCOMMS_H -+ -+#ident "$Id: epcomms.h,v 1.44.2.2 2004/11/12 10:54:50 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epcomms.h,v $ */ -+ -+#include -+#include -+ -+#define EPCOMMS_SUBSYS_NAME "epcomms" -+ -+/* message service numbers */ -+#define EP_MSG_SVC_EIP512 0x00 /* Quadrics EIP services */ -+#define EP_MSG_SVC_EIP1K 0x01 -+#define EP_MSG_SVC_EIP2K 0x02 -+#define EP_MSG_SVC_EIP4K 0x03 -+#define EP_MSG_SVC_EIP8K 0x04 -+#define EP_MSG_SVC_EIP16K 0x05 -+#define EP_MSG_SVC_EIP32K 0x06 -+#define EP_MSG_SVC_EIP64K 0x07 -+#define EP_MSG_SVC_EIP128K 0x08 -+ -+#define EP_MSG_SVC_PFS 0x09 /* Quadrics PFS rpc service */ -+ -+#define EP_MSG_SVC_PORTALS_SMALL 0x10 /* Lustre Portals */ -+#define EP_MSG_SVC_PORTALS_LARGE 0x11 -+ -+#define EP_MSG_NSVC 0x40 /* Max number of services */ -+ -+#define EP_MSGQ_ADDR(qnum) (EP_EPCOMMS_QUEUE_BASE + (qnum) * EP_QUEUE_DESC_SIZE) -+ -+/* -+ * EP_ENVELOPE -+ * Messages are sent by sending an envelope to the destination -+ * describing the source buffers to transfer. The receiving thread -+ * then allocates a receive buffer and fetches the data by issuing -+ * "get" dmas. -+ * -+ * NOTE: envelopes are not explicitly converted to network byte order -+ * since they are always transferred little endian as they are -+ * copied to/from elan memory using word operations. -+ */ -+typedef struct ep_envelope -+{ -+ uint32_t Version; /* Protocol version field */ -+ -+ EP_ATTRIBUTE Attr; /* Attributes */ -+ -+ EP_XID Xid; /* transaction id */ -+ -+ uint32_t NodeId; /* Source processor */ -+ uint32_t Range; /* range we're sending to (high << 16 | low) */ -+ -+ EP_ADDR TxdRail; /* address of per-rail txd */ -+ EP_NMD TxdMain; /* address of main memory portion of txd */ -+ -+ uint32_t nFrags; /* # fragments */ -+ EP_NMD Frags[EP_MAXFRAG]; /* network mapping handles of source data */ -+ -+ uint32_t CheckSum; /* holds the check sum value when active -+ * must be after all members to be checksum'd -+ */ -+ -+ uint32_t Pad[6]; /* Pad to 128 bytes */ -+} EP_ENVELOPE; -+ -+#define EP_ENVELOPE_VERSION 0xdac10001 -+#define EP_ENVELOPE_SIZE roundup (sizeof (EP_ENVELOPE), EP_BLK_SIZE) -+ -+/* -+ * RPC payload - this small amount of data is transfered in -+ * the envelope for RPCs -+ */ -+typedef struct ep_payload -+{ -+ uint32_t Data[128/sizeof(uint32_t)]; -+} EP_PAYLOAD; -+ -+#define EP_PAYLOAD_SIZE roundup (sizeof (EP_PAYLOAD), EP_BLK_SIZE) -+ -+#define EP_INPUTQ_SIZE (EP_ENVELOPE_SIZE + EP_PAYLOAD_SIZE) -+ -+/* -+ * EP_STATUSBLK -+ * RPC completion transfers a status block to the client. -+ */ -+typedef struct ep_statusblk -+{ -+ uint32_t Data[128/sizeof(uint32_t)]; -+} EP_STATUSBLK; -+ -+#define EP_STATUSBLK_SIZE roundup (sizeof(EP_STATUSBLK), EP_BLK_SIZE) -+ -+#define EP_RANGE(low,high) ((high) << 16 | (low)) -+#define EP_RANGE_LOW(range) ((range) & 0xFFFF) -+#define EP_RANGE_HIGH(range) (((range) >> 16) & 0xFFFF) -+ -+/* return codes from functions, + 'res' parameter to txd callback, ep_rxd_status() */ -+typedef enum -+{ -+ EP_SUCCESS = 0, /* message sent/received successfully */ -+ EP_RXD_PENDING = -1, /* rxd not completed by thread */ -+ EP_CONN_RESET = -2, /* virtual circuit reset */ -+ EP_NODE_DOWN = -3, /* node down - transmit not attempted */ -+ EP_MSG_TOO_BIG = -4, /* received message larger than buffer */ -+ EP_ENOMEM = -5, /* memory alloc failed */ -+ EP_EINVAL = -6, /* invalid parameters */ -+ EP_SHUTDOWN = -7, /* receiver is being shut down */ -+} EP_STATUS; -+ -+/* forward declarations */ -+typedef struct ep_rxd EP_RXD; -+typedef struct ep_txd EP_TXD; -+typedef struct ep_rcvr_rail EP_RCVR_RAIL; -+typedef struct ep_rcvr EP_RCVR; -+typedef struct ep_xmtr_rail EP_XMTR_RAIL; -+typedef struct ep_xmtr EP_XMTR; -+typedef struct ep_comms_rail EP_COMMS_RAIL; -+typedef struct ep_comms_subsys EP_COMMS_SUBSYS; -+ -+typedef struct ep_rcvr_stats EP_RCVR_STATS; -+typedef struct ep_xmtr_stats EP_XMTR_STATS; -+typedef struct ep_rcvr_rail_stats EP_RCVR_RAIL_STATS; -+typedef struct ep_xmtr_rail_stats EP_XMTR_RAIL_STATS; -+ -+typedef void (EP_RXH)(EP_RXD *rxd); /* callback function from receive completion */ -+typedef void (EP_TXH)(EP_TXD *txd, void *arg, EP_STATUS res); /* callback function from transmit completion */ -+ -+/* Main memory portion shared descriptor */ -+typedef struct ep_rxd_main -+{ -+ EP_ENVELOPE Envelope; /* 128 byte aligned envelope */ -+ EP_PAYLOAD Payload; /* 128 byte aligned payload */ -+ bitmap_t Bitmap[BT_BITOUL(EP_MAX_NODES)]; /* broadcast bitmap */ -+ EP_STATUSBLK StatusBlk; /* RPC status block to return */ -+ uint64_t Next; /* linked list when on active list (main address) */ -+ int32_t Len; /* Length of message received */ -+} EP_RXD_MAIN; -+ -+#define EP_RXD_MAIN_SIZE roundup (sizeof (EP_RXD_MAIN), EP_BLK_SIZE) -+ -+/* Phases for message/rpc */ -+#ifndef __ELAN__ -+ -+/* Kernel memory portion of per-rail receive descriptor */ -+typedef struct ep_rxd_rail -+{ -+ struct list_head Link; /* linked on freelist */ -+ EP_RCVR_RAIL *RcvrRail; /* rvcr we're associated with */ -+ -+ EP_RXD *Rxd; /* receive descriptor we're bound to */ -+} EP_RXD_RAIL; -+ -+#define RXD_BOUND2RAIL(rxdRail,rcvrRail) ((rxdRail) != NULL && ((EP_RXD_RAIL *) (rxdRail))->RcvrRail == (EP_RCVR_RAIL *) rcvrRail) -+ -+struct ep_rxd -+{ -+ struct list_head Link; /* linked on free/active list */ -+ EP_RCVR *Rcvr; /* owning receiver */ -+ -+ EP_RXD_MAIN *RxdMain; /* shared main memory portion. */ -+ EP_NMD NmdMain; /* and network mapping descriptor */ -+ -+ EP_RXD_RAIL *RxdRail; /* per-rail rxd we're bound to */ -+ -+ EP_RXH *Handler; /* completion function */ -+ void *Arg; /* and arguement */ -+ -+ unsigned int State; /* RXD status (active,stalled,failed) */ -+ -+ EP_NMD Data; /* network mapping descriptor for user buffer */ -+ -+ int nFrags; /* network mapping descriptor for put/get/complete */ -+ EP_NMD Local[EP_MAXFRAG]; -+ EP_NMD Remote[EP_MAXFRAG]; -+ -+ long NextRunTime; /* time to resend failover/map requests */ -+ EP_XID MsgXid; /* and transaction id */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ struct list_head CheckSumLink; /* linked on check sum list */ -+#endif -+}; -+ -+#define EP_NUM_RXD_PER_BLOCK 16 -+ -+/* rxd->State */ -+#define EP_RXD_FREE 0 -+ -+#define EP_RXD_RECEIVE_UNBOUND 1 -+#define EP_RXD_RECEIVE_ACTIVE 2 -+ -+#define EP_RXD_PUT_ACTIVE 3 -+#define EP_RXD_PUT_STALLED 4 -+#define EP_RXD_GET_ACTIVE 5 -+#define EP_RXD_GET_STALLED 6 -+ -+#define EP_RXD_COMPLETE_ACTIVE 7 -+#define EP_RXD_COMPLETE_STALLED 8 -+ -+#define EP_RXD_RPC_IN_PROGRESS 9 -+#define EP_RXD_COMPLETED 10 -+ -+#define EP_RXD_BEEN_ABORTED 11 /* rxd was aborted while in a private state */ -+ -+typedef struct ep_rxd_block -+{ -+ struct list_head Link; -+ -+ EP_NMD NmdMain; -+ -+ EP_RXD Rxd[EP_NUM_RXD_PER_BLOCK]; -+} EP_RXD_BLOCK; -+ -+struct ep_rcvr_rail_stats -+{ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+}; -+ -+struct ep_rcvr_rail -+{ -+ EP_RCVR *Rcvr; /* associated receiver */ -+ EP_COMMS_RAIL *CommsRail; /* comms rail */ -+ -+ struct proc_dir_entry *procfs_root; /* root of this rcvr_rail's procfs entry */ -+ EP_RCVR_RAIL_STATS stats; /* generic rcvr_rail stats */ -+}; -+ -+struct ep_rcvr_stats -+{ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+}; -+ -+struct ep_rcvr -+{ -+ struct list_head Link; /* queued on subsystem */ -+ EP_COMMS_SUBSYS *Subsys; /* kernel comms subsystem */ -+ EP_SERVICE Service; /* service number */ -+ -+ unsigned int InputQueueEntries; /* # entries on receive queue */ -+ -+ EP_RAILMASK RailMask; /* bitmap of which rails are available */ -+ EP_RCVR_RAIL *Rails[EP_MAX_RAILS]; -+ -+ spinlock_t Lock; /* spinlock for rails/receive lists */ -+ -+ struct list_head ActiveDescList; /* List of pending/active receive descriptors */ -+ -+ EP_XID_CACHE XidCache; /* XID cache (protected by Lock) */ -+ -+ struct list_head FreeDescList; /* List of free receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; /* total number created */ -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ kcondvar_t FreeDescSleep; /* with place to sleep for rx desc */ -+ int FreeDescWanted; /* and flag */ -+ struct list_head DescBlockList; -+ -+ unsigned int ForwardRxdCount; /* count of rxd's being forwarded */ -+ unsigned int CleanupWaiting; /* waiting for cleanup */ -+ kcondvar_t CleanupSleep; /* and place to sleep */ -+ -+ struct proc_dir_entry *procfs_root; /* place where this rcvr's proc entry is */ -+ EP_RCVR_STATS stats; -+}; -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+#define EP_ENVELOPE_CHECK_SUM (1<<31) -+extern uint32_t ep_calc_check_sum (EP_SYS *sys, EP_ENVELOPE *env, EP_NMD *nmd, int nFrags); -+#endif -+ -+#endif /* ! __ELAN__ */ -+ -+typedef struct ep_txd_main -+{ -+ EP_STATUSBLK StatusBlk; /* RPC status block */ -+ bitmap_t Bitmap[BT_BITOUL(EP_MAX_NODES)]; /* broadcast bitmap */ -+} EP_TXD_MAIN; -+ -+#define EP_TXD_MAIN_SIZE roundup (sizeof (EP_TXD_MAIN), EP_BLK_SIZE) -+ -+#ifndef __ELAN__ -+typedef struct ep_txd_rail -+{ -+ struct list_head Link; /* linked on freelist */ -+ EP_XMTR_RAIL *XmtrRail; /* xmtr we're associated with */ -+ -+ EP_TXD *Txd; /* txd we're bound to */ -+} EP_TXD_RAIL; -+ -+#define TXD_BOUND2RAIL(rxdRail,xmtrRail) ((txdRail) != NULL && ((EP_TXD_RAIL *) (txdRail))->XmtrRail == (EP_XMTR_RAIL *) xmtrRail) -+ -+struct ep_txd -+{ -+ struct list_head Link; /* linked on free/active list */ -+ EP_XMTR *Xmtr; /* service we're associated with */ -+ -+ EP_TXD_MAIN *TxdMain; /* shared main memory portion */ -+ EP_NMD NmdMain; /* and network mapping descriptor */ -+ -+ EP_TXD_RAIL *TxdRail; /* per-rail txd for this phase */ -+ -+ EP_TXH *Handler; /* completion function */ -+ void *Arg; /* and arguement */ -+ -+ unsigned short NodeId; /* node transmit is to. */ -+ EP_SERVICE Service; /* and seervice */ -+ -+ long TimeStamp; /* time we where created at, to find sends taking too long */ -+ long RetryTime; -+ EP_BACKOFF Backoff; -+ -+ EP_ENVELOPE Envelope; /* envelope for transmit */ -+ EP_PAYLOAD Payload; /* payload for transmit */ -+}; -+ -+#define EP_NUM_TXD_PER_BLOCK 16 -+ -+/* "phase" parameter to BindTxd */ -+#define EP_TXD_PHASE_ACTIVE 1 -+#define EP_TXD_PHASE_PASSIVE 2 -+ -+typedef struct ep_txd_block -+{ -+ struct list_head Link; -+ EP_NMD NmdMain; -+ EP_TXD Txd[EP_NUM_TXD_PER_BLOCK]; /* transmit descriptors */ -+} EP_TXD_BLOCK; -+ -+struct ep_xmtr_rail_stats -+{ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+}; -+ -+struct ep_xmtr_rail -+{ -+ EP_COMMS_RAIL *CommsRail; /* associated comms rail */ -+ EP_XMTR *Xmtr; /* associated transmitter */ -+ -+ struct proc_dir_entry *procfs_root; /* place where this xmtr's proc entry is */ -+ -+ EP_XMTR_RAIL_STATS stats; -+}; -+ -+struct ep_xmtr_stats -+{ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+}; -+ -+struct ep_xmtr -+{ -+ struct list_head Link; /* Linked on subsys */ -+ EP_COMMS_SUBSYS *Subsys; /* kernel comms subsystem */ -+ -+ EP_RAILMASK RailMask; /* bitmap of which rails are available */ -+ EP_XMTR_RAIL *Rails[EP_MAX_RAILS]; /* per-rail state */ -+ -+ spinlock_t Lock; /* lock for active descriptor list */ -+ -+ struct list_head ActiveDescList; /* list of active transmit descriptors */ -+ -+ EP_XID_CACHE XidCache; /* XID cache (protected by Lock) */ -+ -+ struct list_head FreeDescList; /* List of free receive descriptors */ -+ unsigned int FreeDescCount; /* and number on free list */ -+ unsigned int TotalDescCount; -+ spinlock_t FreeDescLock; /* and lock for free list */ -+ kcondvar_t FreeDescSleep; /* with place to sleep for rx desc */ -+ int FreeDescWanted; /* and flag */ -+ struct list_head DescBlockList; -+ -+ struct proc_dir_entry *procfs_root; /* place where this rcvr's proc entry is */ -+ EP_XMTR_STATS stats; -+}; -+ -+/* forward descriptor */ -+#define EP_TREE_ARITY 3 -+ -+typedef struct ep_fwd_desc -+{ -+ struct list_head Link; /* linked on forward/free lists */ -+ EP_RXD *Rxd; /* rxd to forward */ -+ EP_NMD Data; /* nmd of subset of receive buffer */ -+ unsigned NumChildren; /* number of places we're forwarding */ -+ unsigned Children[EP_TREE_ARITY]; -+} EP_FWD_DESC; -+ -+typedef struct ep_comms_ops -+{ -+ void (*DelRail) (EP_COMMS_RAIL *rail); -+ void (*DisplayRail) (EP_COMMS_RAIL *rail); -+ -+ struct { -+ void (*AddRail) (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ void (*DelRail) (EP_RCVR *rcvr, EP_COMMS_RAIL *rail); -+ -+ long (*Check) (EP_RCVR_RAIL *rcvrRail, long nextRunTime); -+ -+ int (*QueueRxd) (EP_RXD *rxd, EP_RCVR_RAIL *rcvrRail); -+ void (*RpcPut)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ void (*RpcGet)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ void (*RpcComplete)(EP_RXD *rxd, EP_NMD *local, EP_NMD *remote, unsigned nFrags); -+ -+ EP_RXD *(*StealRxd)(EP_RCVR_RAIL *rcvrRail); -+ -+ void (*DisplayRcvr) (DisplayInfo *di, EP_RCVR_RAIL *rcvrRail); -+ void (*DisplayRxd) (DisplayInfo *di, EP_RXD_RAIL *rxdRail); -+ -+ void (*FillOutRailStats) (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+ } Rcvr; -+ -+ struct { -+ void (*AddRail) (EP_XMTR *xmtr, EP_COMMS_RAIL *rail); -+ void (*DelRail) (EP_XMTR *xmtr, EP_COMMS_RAIL *rail); -+ -+ long (*Check) (EP_XMTR_RAIL *xmtrRail, long nextRunTime); -+ -+ int (*BindTxd) (EP_TXD *txd, EP_XMTR_RAIL *xmtrRail, unsigned int phase); -+ void (*UnbindTxd) (EP_TXD *txd, unsigned int phase); -+ int (*PollTxd) (EP_XMTR_RAIL *xmtrRail, EP_TXD_RAIL *txdRail, int how); -+ -+ void (*DisplayXmtr) (DisplayInfo *di, EP_XMTR_RAIL *xmtrRail); -+ void (*DisplayTxd) (DisplayInfo *di, EP_TXD_RAIL *txdRail); -+ -+ int (*CheckTxdState) (EP_TXD *txd); -+ -+ void (*FillOutRailStats) (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+ } Xmtr; -+} EP_COMMS_OPS; -+ -+#define EP_RAIL_OP(commsRail, Which) (commsRail)->Ops.Which -+#define EP_RCVR_OP(rcvrRail, Which) (rcvrRail)->CommsRail->Ops.Rcvr.Which -+#define EP_XMTR_OP(xmtrRail, Which) (xmtrRail)->CommsRail->Ops.Xmtr.Which -+ -+/* "how" parameter to PollTxd */ -+#define POLL_TX_LIST 0 -+#define ENABLE_TX_CALLBACK 1 -+#define DISABLE_TX_CALLBACK 2 -+ -+struct ep_comms_rail -+{ -+ struct list_head Link; /* Linked on subsys */ -+ EP_RAIL *Rail; /* kernel comms rail */ -+ EP_COMMS_SUBSYS *Subsys; -+ EP_COMMS_OPS Ops; -+ -+ EP_COMMS_RAIL_STATS Stats; /* statistics */ -+}; -+ -+struct ep_comms_subsys -+{ -+ EP_SUBSYS Subsys; /* is a kernel comms subsystem */ -+ -+ kmutex_t Lock; /* global lock */ -+ -+ EP_COMMS_STATS Stats; /* statistics */ -+ -+ struct list_head Rails; /* list of all rails */ -+ -+ struct list_head Receivers; /* list of receivers */ -+ struct list_head Transmitters; /* and transmitters */ -+ -+ /* forward/allocator thread */ -+ EP_KTHREAD Thread; /* place thread sleeps */ -+ -+ /* message passing "broadcast" forward lists */ -+ spinlock_t ForwardDescLock; /* Lock for broadcast forwarding */ -+ struct list_head ForwardDescList; /* List of rxd's to forward */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ spinlock_t CheckSumDescLock; /* Lock for CheckSums */ -+ struct list_head CheckSumDescList; /* List of rxd's to be CheckSumed */ -+#endif -+ -+ EP_XMTR *ForwardXmtr; /* and transmitter to forward with */ -+}; -+ -+/* epcomms.c subsystem initialisation */ -+extern unsigned int epcomms_forward_limit; -+ -+extern int ep_comms_init (EP_SYS *sys); -+extern void ep_comms_display (EP_SYS *sys, char *how); -+extern EP_RAILMASK ep_rcvr_railmask (EP_SYS *epsys, EP_SERVICE service); -+ -+/* epcomms_elan3.c */ -+extern EP_COMMS_RAIL *ep3comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail); -+ -+/* epcomms_elan4.c */ -+extern EP_COMMS_RAIL *ep4comms_add_rail (EP_SUBSYS *s, EP_SYS *sys, EP_RAIL *rail); -+ -+/* epcommsTx.c */ -+extern int TxdShouldStabalise (EP_TXD_RAIL *txdRail, EP_RAIL *rail); -+extern void FreeTxd (EP_XMTR *xmtr, EP_TXD *txd); -+ -+extern unsigned int ep_txd_lowat; -+extern long ep_check_xmtr (EP_XMTR *xmtr, long nextRunTime); -+extern void ep_display_xmtr (DisplayInfo *di, EP_XMTR *xmtr); -+extern void ep_xmtr_flush_callback (EP_XMTR *xmtr, EP_XMTR_RAIL *xmtrRail); -+extern void ep_xmtr_reloc_callback (EP_XMTR *xmtr, EP_XMTR_RAIL *xmtrRail); -+ -+extern void ep_xmtr_fillout_stats (EP_XMTR *xmtr, char *str); -+extern void ep_xmtr_rail_fillout_stats (EP_XMTR_RAIL *xmtr_rail, char *str); -+ -+extern void ep_xmtr_txd_stat (EP_XMTR *xmtr, EP_TXD *txd); -+ -+/* epcommsRx.c */ -+extern EP_RXD *StealRxdFromOtherRail (EP_RCVR *rcvr); -+ -+extern unsigned int ep_rxd_lowat; -+extern long ep_check_rcvr (EP_RCVR *rcvr, long nextRunTime); -+extern void ep_rcvr_flush_callback (EP_RCVR *rcvr, EP_RCVR_RAIL *rcvrRail); -+extern void ep_rcvr_reloc_callback (EP_RCVR *rcvr, EP_RCVR_RAIL *rcvrRail); -+extern void ep_display_rcvr (DisplayInfo *di, EP_RCVR *rcvr, int full); -+ -+extern long ep_forward_rxds (EP_COMMS_SUBSYS *subsys, long nextRunTime); -+ -+extern void ep_rcvr_fillout_stats (EP_RCVR *rcvr, char *str); -+extern void ep_rcvr_rail_fillout_stats (EP_RCVR_RAIL *rcvr_rail, char *str); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern void ep_csum_rxds (EP_COMMS_SUBSYS *subsys); -+extern void ep_rxd_queue_csum (EP_RXD *rxd); -+#endif -+ -+extern void ep_rxd_received (EP_RXD *rxd); -+extern void ep_rxd_received_now (EP_RXD *rxd); -+ -+/* ep_procfs.c */ -+extern struct proc_dir_entry *ep_procfs_root; -+ -+extern void ep_procfs_rcvr_xmtr_init(void); -+extern void ep_procfs_rcvr_xmtr_fini(void); -+ -+extern void ep_procfs_rcvr_add(EP_RCVR *rcvr); -+extern void ep_procfs_rcvr_del(EP_RCVR *rcvr); -+ -+extern void ep_procfs_rcvr_add_rail(EP_RCVR_RAIL *rcvrRail); -+extern void ep_procfs_rcvr_del_rail(EP_RCVR_RAIL *rcvrRail); -+ -+extern void ep_procfs_xmtr_add(EP_XMTR *xmtr); -+extern void ep_procfs_xmtr_del(EP_XMTR *xmtr); -+ -+extern void ep_procfs_xmtr_add_rail(EP_XMTR_RAIL *xmtrRail); -+extern void ep_procfs_xmtr_del_rail(EP_XMTR_RAIL *xmtrRail); -+ -+ -+/* Public Interface */ -+ -+ -+/* epcomms.c message xmtr functions */ -+extern EP_XMTR *ep_alloc_xmtr (EP_SYS *sys); -+extern void ep_free_xmtr (EP_XMTR *xmtr); -+ -+extern EP_STATUS ep_transmit_message (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, -+ EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_multicast_message (EP_XMTR *xmtr, unsigned int destLo, unsigned int destHi, bitmap_t *bitmap, -+ EP_SERVICE service, EP_ATTRIBUTE attr, EP_TXH *handler, void *arg, -+ EP_PAYLOAD *payload, EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_transmit_rpc (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_PAYLOAD *payload, -+ EP_NMD *nmd, int nFrag); -+extern EP_STATUS ep_multicast_forward (EP_XMTR *xmtr, unsigned int dest, EP_SERVICE service, EP_ATTRIBUTE attr, -+ EP_TXH *handler, void *arg, EP_ENVELOPE *env, EP_PAYLOAD *payload, -+ bitmap_t *bitmap, EP_NMD *nmd, int nFrags); -+ -+/* epcomms.c functions for use with polled transmits */ -+extern int ep_poll_transmits (EP_XMTR *xmtr); -+extern int ep_enable_txcallbacks (EP_XMTR *xmtr); -+extern int ep_disable_txcallbacks (EP_XMTR *xmtr); -+ -+/* epcomms.c message rcvr functions */ -+extern EP_RCVR *ep_alloc_rcvr (EP_SYS *sys, EP_SERVICE svc, unsigned int nenvelopes); -+extern void ep_free_rcvr (EP_RCVR *rcvr); -+ -+extern EP_STATUS ep_queue_receive (EP_RCVR *rcvr, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr); -+extern void ep_requeue_receive (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *nmd, EP_ATTRIBUTE attr); -+extern EP_STATUS ep_rpc_put (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *from, EP_NMD *to, int nFrags); -+extern EP_STATUS ep_rpc_get (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_NMD *from, EP_NMD *to, int nFrags); -+extern EP_STATUS ep_complete_rpc (EP_RXD *rxd, EP_RXH *handler, void *arg, EP_STATUSBLK *blk, -+ EP_NMD *from, EP_NMD *to, int nFrags); -+extern void ep_complete_receive (EP_RXD *rxd); -+ -+/* railhints.c */ -+extern int ep_xmtr_bcastrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails); -+extern int ep_xmtr_prefrail (EP_XMTR *xmtr, EP_RAILMASK allowedRails, unsigned nodeId); -+extern EP_RAILMASK ep_xmtr_availrails (EP_XMTR *xmtr); -+extern EP_RAILMASK ep_xmtr_noderails (EP_XMTR *xmtr, unsigned nodeId); -+extern int ep_rcvr_prefrail (EP_RCVR *rcvr, EP_RAILMASK allowedRails); -+extern EP_RAILMASK ep_rcvr_availrails (EP_RCVR *rcvr); -+extern EP_RAILMASK ep_rxd_railmask (EP_RXD *rxd); -+ -+/* epcomms.c functions for accessing fields of rxds */ -+extern void *ep_rxd_arg(EP_RXD *rxd); -+extern int ep_rxd_len(EP_RXD *rxd); -+extern EP_STATUS ep_rxd_status(EP_RXD *rxd); -+extern int ep_rxd_isrpc(EP_RXD *rxd); -+extern EP_ENVELOPE *ep_rxd_envelope(EP_RXD *rxd); -+extern EP_PAYLOAD *ep_rxd_payload(EP_RXD *rxd); -+extern int ep_rxd_node(EP_RXD *rxd); -+extern EP_STATUSBLK *ep_rxd_statusblk(EP_RXD *rxd); -+ -+/* functions for accessing fields of txds */ -+extern int ep_txd_node(EP_TXD *txd); -+extern EP_STATUSBLK *ep_txd_statusblk(EP_TXD *txd); -+ -+/* functions for controlling how many processes are using module */ -+extern void ep_mod_dec_usecount (void); -+extern void ep_mod_inc_usecount (void); -+ -+extern EP_RAILMASK ep_xmtr_svc_indicator_railmask (EP_XMTR *xmtr, int svc_indicator, int nodeId); -+extern int ep_xmtr_svc_indicator_bitmap (EP_XMTR *xmtr, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+ -+#endif /* ! __ELAN__ */ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN_EPCOMMS_H */ -+ -Index: linux-2.6.5-7.191/include/elan/epsvc.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/epsvc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/epsvc.h 2005-07-28 14:52:52.935665512 -0400 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_EPSVC_H -+#define __ELAN_EPSVC_H -+ -+#ident "@(#)$Id: epsvc.h,v 1.9 2004/02/13 10:03:27 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/epsvc.h,v $ */ -+ -+ -+#define EP_SVC_NUM_INDICATORS 8 -+#define EP_SVC_INDICATOR_MAX_NAME 32 -+ -+#define EP_SVC_EIP 0 -+#define EP_SVC_NAMES {"eip", "1", "2", "3", "4", "5", "6", "7"}; -+ -+#if defined(__KERNEL__) -+extern int ep_svc_indicator_set (EP_SYS *epsys, int svc_indicator); -+extern int ep_svc_indicator_clear (EP_SYS *epsys, int svc_indicator); -+extern int ep_svc_indicator_is_set (EP_SYS *epsys, int svc_indicator, int nodeId); -+extern int ep_svc_indicator_bitmap (EP_SYS *epsys, int svc_indicator, bitmap_t * bitmap, int low, int nnodes); -+extern EP_RAILMASK ep_svc_indicator_railmask (EP_SYS *epsys, int svc_indicator, int nodeId); -+#endif -+ -+#endif /* __ELAN_EPSVC_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/kalloc.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/kalloc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/kalloc.h 2005-07-28 14:52:52.935665512 -0400 -@@ -0,0 +1,108 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KALLOC_H -+#define __ELAN3_KALLOC_H -+ -+#ident "$Id: kalloc.h,v 1.11 2004/05/19 10:23:59 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kalloc.h,v $ */ -+ -+#include -+ -+/* -+ * Memory allocator -+ */ -+#define LN2_MIN_SIZE 6 /* 64 bytes */ -+#define LN2_MAX_SIZE 16 /* 64k bytes */ -+#define NUM_FREELISTS (LN2_MAX_SIZE-LN2_MIN_SIZE + 1) -+#define MIN_SIZE (1 << LN2_MIN_SIZE) -+#define MAX_SIZE (1 << LN2_MAX_SIZE) -+ -+#define HASHSHIFT LN2_MAX_SIZE -+#define NHASH 32 -+#define HASH(addr) (((addr) >> HASHSHIFT) & (NHASH-1)) -+ -+typedef enum -+{ -+ EP_ALLOC_TYPE_PRIVATE_SDRAM, -+ EP_ALLOC_TYPE_PRIVATE_MAIN, -+ EP_ALLOC_TYPE_SHARED_MAIN, -+} EP_ALLOC_TYPE; -+ -+typedef struct ep_pool -+{ -+ EP_NMH Handle; /* network mapping handle */ -+ -+ struct list_head HashBase; /* linked on hash lists */ -+ struct list_head HashTop; /* linked on hash lists */ -+ -+ struct list_head Link[NUM_FREELISTS]; /* linked on free lists */ -+ bitmap_t *Bitmaps[NUM_FREELISTS]; /* bitmaps for each size */ -+ -+ union { -+ sdramaddr_t Sdram; -+ unsigned long Ptr; -+ } Buffer; -+} EP_POOL; -+ -+typedef struct ep_alloc -+{ -+ spinlock_t Lock; -+ -+ EP_ALLOC_TYPE Type; -+ unsigned int Perm; -+ -+ EP_RMAP *ResourceMap; -+ -+ struct list_head HashBase[NHASH]; -+ struct list_head HashTop[NHASH]; -+ struct list_head Freelists[NUM_FREELISTS]; -+ -+ union { -+ struct { -+ EP_SYS *System; -+ struct list_head Rails; -+ } Shared; -+ -+ struct { -+ EP_RAIL *Rail; -+ } Private; -+ } Data; -+} EP_ALLOC; -+ -+extern void ep_display_alloc (EP_ALLOC *alloc); -+ -+extern void ep_alloc_init (EP_RAIL *rail); -+extern void ep_alloc_fini (EP_RAIL *rail); -+ -+extern sdramaddr_t ep_alloc_memory_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size, unsigned int perm, EP_ATTRIBUTE attr); -+extern void ep_free_memory_elan (EP_RAIL *rail, EP_ADDR addr); -+ -+extern sdramaddr_t ep_alloc_elan (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addrp); -+extern void ep_free_elan (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+extern void *ep_alloc_main (EP_RAIL *rail, unsigned size, EP_ATTRIBUTE attr, EP_ADDR *addr); -+extern void ep_free_main (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+ -+extern sdramaddr_t ep_elan2sdram (EP_RAIL *rail, EP_ADDR addr); -+extern void *ep_elan2main (EP_RAIL *rail, EP_ADDR addr); -+ -+extern void ep_shared_alloc_init (EP_SYS *sys); -+extern void ep_shared_alloc_fini (EP_SYS *sys); -+extern int ep_shared_alloc_add_rail (EP_SYS *sys, EP_RAIL *rail); -+extern void ep_shared_alloc_remove_rail (EP_SYS *sys, EP_RAIL *rail); -+ -+extern void *ep_shared_alloc_main (EP_SYS *sys, unsigned size, EP_ATTRIBUTE attr, EP_NMD *nmd); -+extern void ep_shared_free_main (EP_SYS *sys, EP_NMD *nmd); -+ -+#endif /* __ELAN_KALLOC_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/kcomm.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/kcomm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/kcomm.h 2005-07-28 14:52:52.937665208 -0400 -@@ -0,0 +1,839 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KCOMM_H -+#define __ELAN_KCOMM_H -+ -+#ident "$Id: kcomm.h,v 1.71.2.8 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm.h,v $*/ -+#define EP_KCOMM_MAJOR_VERSION 3 -+#define EP_KCOMM_MINOR_VERSION 1 -+ -+#define EP_PROTOCOL_VERSION 1 /* CM/KCOMM protocol revision */ -+ -+#define EP_MAX_NODES 2048 /* Max nodes we support */ -+#define EP_MAX_RAILS 16 /* max number of rails (we use an unsigned short for bitmaps !) */ -+#define EP_MAXFRAG 4 /* max number of fragments */ -+ -+#define EP_BLK_SIZE 64 /* align objects for elan access */ -+ -+/* Elan virtual address address space */ -+#define EP_SYSTEM_QUEUE_BASE 0x00010000 /* Base address for system queues */ -+#define EP_MSGSYS_QUEUE_BASE 0x00020000 /* Base address for msgsys queues */ -+#define EP_EPCOMMS_QUEUE_BASE 0x00030000 /* Base address for message queues */ -+#define EP_DVMA_BASE 0x10000000 /* elan address range for dvma mapping. */ -+#define EP_DVMA_TOP 0xE0000000 -+ -+#define EP_SHARED_BASE 0xE0000000 /* shared main/elan allocators */ -+#define EP_SHARED_TOP 0xF0000000 -+ -+#define EP_PRIVATE_BASE 0xF0000000 /* private main/elan allocators */ -+#define EP_PRIVATE_TOP 0xF8000000 -+ -+#define EP_DVMA_RMAP_SIZE 1024 /* size of resource map for dvma address space */ -+#define EP_SHARED_RMAP_SIZE 1024 /* size of resource map for shared address space */ -+#define EP_PRIVATE_RMAP_SIZE 1024 /* size of resource map for private address space */ -+ -+/* Input queue descriptors fit into 64 bytes */ -+#define EP_QUEUE_DESC_SIZE 64 -+ -+/* Timeouts for checking network position */ -+#define EP_POSITION_TIMEOUT (4*HZ) /* 1s time to notice CheckNetworkPosition changes */ -+#define EP_WITHDRAW_TIMEOUT (2*HZ) /* 2s time before withdrawing from unreachable nodes */ -+ -+/* Time to try again due to resource failue (eg malloc etc) */ -+#define RESOURCE_RETRY_TIME (HZ/20) -+ -+/* Time to retransmit message when send failed */ -+#define MSGBUSY_RETRY_TIME (HZ/20) -+ -+/* Time between retransmits of messages network flush requests */ -+#define MESSAGE_RETRY_TIME (HZ/5) -+ -+/* time to hold the context filter up to ensure that the -+ * next packet of a dma is guaranteed to get nacked (8mS) */ -+#define NETWORK_ERROR_TIMEOUT (1 + roundup (HZ * 8 / 1000, 1)) -+ -+/* Time between retransmits of message failover requests */ -+#define FAILOVER_RETRY_TIME (HZ/5) -+ -+/* compute earliest time */ -+#define SET_NEXT_RUN_TIME(nextRunTime, time) \ -+do { \ -+ if ((nextRunTime) == 0 || AFTER(nextRunTime, (time)))\ -+ (nextRunTime) = (time);\ -+} while (0) -+ -+/* DMA retry backoff/priorities/issue rings */ -+#define EP_NUM_BACKOFF 8 -+#define EP_RETRY_STABALISING 0 -+#define EP_RETRY_BASE 1 -+ -+#define EP_RETRY_CRITICAL EP_RETRY_BASE -+#define EP_RETRY_HIGH_PRI (EP_RETRY_CRITICAL + 1) -+#define EP_RETRY_HIGH_PRI_TIME (1) -+#define EP_RETRY_HIGH_PRI_RETRY (EP_RETRY_HIGH_PRI + 1) -+#define EP_RETRY_HIGH_PRI_RETRY_TIME (2) -+#define EP_RETRY_LOW_PRI (EP_RETRY_HIGH_PRI_RETRY + EP_NUM_BACKOFF) -+#define EP_RETRY_LOW_PRI_TIME (2) -+#define EP_RETRY_LOW_PRI_RETRY (EP_RETRY_LOW_PRI + 1) -+#define EP_RETRY_LOW_PRI_RETRY_TIME (4) -+#define EP_RETRY_ANONYMOUS (EP_RETRY_LOW_PRI_RETRY + EP_NUM_BACKOFF) -+#define EP_RETRY_ANONYMOUS_TIME (10) -+#define EP_RETRY_NETERR (EP_RETRY_ANONYMOUS + EP_NUM_BACKOFF) -+#define EP_RETRY_NETERR_TIME (10) -+#define EP_NUM_RETRIES (EP_RETRY_NETERR + 1) -+ -+typedef unsigned short EP_SERVICE; -+ -+/* EP_ATTRIBUTE 32 bits -+ * -+ * 0-2 -+ * for initial call :- -+ * 0 (0x1) EP_NO_ALLOC used once -+ * 1 (0x2) EP_NO_SLEEP used once -+ * 2 (0x4) EP_NOT_MYSELF used once -+ * -+ * when stored and transmited :- -+ * 0 (0x0) EP_MULTICAST envelope -+ * 1 (0x2) EP_RPC envelope -+ * 2 (0x4) EP_HAS_PAYLOAD envelope -+ * -+ * 3-11 -+ * 3 (0x08) EP_PREFRAIL_SET preserved -+ * 4-7 (0xf0) Pref Rail -+ * 8 (0x100) EP_NO_INTERUPT -+ * 9 (0x200) EP_NO_FAILOVER -+ * -+ * 10 (0x400) EP_INTERRUPT_ENABLED internal -+ * 11 (0x800) EP_TXD_STABALISING internal -+ * -+ * 12-13 Not Used. -+ * -+ * 14-15 (0xC000) Data Type. passed in -+ * 00 none. -+ * 01 Service Indicator. -+ * 10 TimeOut. -+ * 11 RailMask -+ * -+ * 16-31 (0x10000) Data. Service Indicator, TimeOut, RailMask, Pref Rail. -+ * -+*/ -+ -+typedef uint32_t EP_ATTRIBUTE; -+ -+#define EP_LOCAL_ATTR_MASK 0x07 -+#define EP_CLEAR_LOCAL_ATTR(ATTR) ( (ATTR) & ~EP_LOCAL_ATTR_MASK ) -+ -+#define EP_NO_ALLOC 0x01 /* Don't call allocators if no free descriptors */ -+#define EP_NO_SLEEP 0x02 /* Don't sleep if no free descriptors */ -+#define EP_NOT_MYSELF 0x04 /* Don't send multicast to me */ -+ -+#define EP_MULTICAST 0x01 /* Message is a multicast */ -+#define EP_RPC 0x02 /* Wait for RPC reply */ -+#define EP_HAS_PAYLOAD_BIT 0x04 /* transfer payload */ -+ -+ -+#define EP_PREFRAIL_SET 0x08 /* preferred rail is set (otherwise pick one from the NMDs) */ -+ -+#define EP_PREFRAIL_SHIFT (4) -+#define EP_PREFRAIL_MASK 0xf0 -+#define EP_IS_PREFRAIL_SET(ATTR) (((ATTR) & EP_PREFRAIL_SET) != 0) -+#define EP_CLEAR_PREFRAIL(ATTR) (((ATTR) & ~EP_PREFRAIL_SET) & ~EP_PREFRAIL_MASK) -+#define EP_SET_PREFRAIL(ATTR,RAIL) (EP_CLEAR_PREFRAIL(ATTR) | (((RAIL) << EP_PREFRAIL_SHIFT ) & EP_PREFRAIL_MASK ) | EP_PREFRAIL_SET) -+ -+ -+#define EP_ATTR2PREFRAIL(ATTR) (((ATTR) & EP_PREFRAIL_MASK) >> EP_PREFRAIL_SHIFT) -+ -+ -+#define EP_INTERRUPT_ENABLED 0x400 /* event interrupt enabled on EP_NO_INTERRUPT */ -+#define EP_TXD_STABALISING 0x800 /* flag to indicate this is attempting to stabalise */ -+ -+#define EP_IS_MULTICAST(ATTR) (((ATTR) & EP_MULTICAST) != 0) -+#define EP_SET_MULTICAST(ATTR) ( (ATTR) | EP_MULTICAST) -+#define EP_CLEAR_MULTICAST(ATTR) ( (ATTR) & ~EP_MULTICAST) -+ -+#define EP_IS_RPC(ATTR) (((ATTR) & EP_RPC) != 0) -+#define EP_SET_RPC(ATTR) ( (ATTR) | EP_RPC) -+#define EP_CLEAR_RPC(ATTR) ( (ATTR) & ~EP_RPC) -+ -+#define EP_HAS_PAYLOAD(ATTR) (((ATTR) & EP_HAS_PAYLOAD_BIT) != 0) -+#define EP_SET_HAS_PAYLOAD(ATTR) ( (ATTR) | EP_HAS_PAYLOAD_BIT) -+#define EP_CLEAR_HAS_PAYLOAD(ATTR) ( (ATTR) & ~EP_HAS_PAYLOAD_BIT) -+ -+#define EP_IS_INTERRUPT_ENABLED(ATTR) (((ATTR) & EP_INTERRUPT_ENABLED) != 0) -+#define EP_SET_INTERRUPT_ENABLED(ATTR) ( (ATTR) | EP_INTERRUPT_ENABLED) -+#define EP_CLEAR_INTERRUPT_ENABLED(ATTR) ( (ATTR) & ~EP_INTERRUPT_ENABLED) -+ -+#define EP_IS_TXD_STABALISING(ATTR) (((ATTR) & EP_TXD_STABALISING) != 0) -+#define EP_SET_TXD_STABALISING(ATTR) ( (ATTR) | EP_TXD_STABALISING) -+#define EP_CLEAR_TXD_STABALISING(ATTR) ( (ATTR) & ~EP_TXD_STABALISING) -+ -+#define EP_NO_INTERRUPT 0x100 /* Don't generate completion interrupt (tx) */ -+#define EP_NO_FAILOVER 0x200 /* don't attempt rail failover, just abort */ -+ -+#define EP_IS_NO_INTERRUPT(ATTR) (((ATTR) & EP_NO_INTERRUPT) != 0) -+#define EP_SET_NO_INTERRUPT(ATTR) ( (ATTR) | EP_NO_INTERRUPT) -+#define EP_CLEAR_NO_INTERRUPT(ATTR) ( (ATTR) & ~EP_NO_INTERRUPT) -+ -+#define EP_IS_NO_FAILOVER(ATTR) (((ATTR) & EP_NO_FAILOVER) != 0) -+#define EP_SET_NO_FAILOVER(ATTR) ( (ATTR) | EP_NO_FAILOVER) -+#define EP_CLEAR_NO_FAILOVER(ATTR) ( (ATTR) & ~EP_NO_FAILOVER) -+ -+#define EP_TYPE_MASK 0xC000 -+#define EP_TYPE_SVC_INDICATOR 0x4000 -+#define EP_TYPE_TIMEOUT 0x8000 -+#define EP_TYPE_RAILMASK 0xC000 -+ -+#define EP_ATTR2TYPE(ATTR) ( (ATTR) & EP_TYPE_MASK ) -+ -+#define EP_IS_SVC_INDICATOR(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_SVC_INDICATOR) -+#define EP_IS_TIMEOUT(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_TIMEOUT) -+#define EP_IS_RAILMASK(ATTR) (EP_ATTR2TYPE(ATTR) == EP_TYPE_RAILMASK) -+#define EP_IS_NO_TYPE(ATTR) (EP_ATTR2TYPE(ATTR) == 0) -+ -+#define EP_DATA_SHIFT (16) -+#define EP_DATA_MASK 0xffff0000 -+ -+#define EP_ATTR2DATA(ATTR) (((ATTR) & EP_DATA_MASK) >> EP_DATA_SHIFT) -+#define EP_DATA2ATTR(DATA) (((DATA) << EP_DATA_SHIFT) & EP_DATA_MASK) -+ -+#define EP_CLEAR_DATA(ATTR) (((ATTR) & ~EP_TYPE_MASK) & ~EP_DATA_MASK) -+#define EP_SET_DATA(ATTR,TYPE,DATA) (EP_CLEAR_DATA(ATTR) | ((TYPE) & EP_TYPE_MASK) | (((DATA) << EP_DATA_SHIFT) & EP_DATA_MASK)) -+ -+#define EP_DEFAULT_TIMEOUT (HZ*30) -+ -+#if !defined(offsetof) -+#define offsetof(s, m) (unsigned long)(&(((s *)0)->m)) -+#endif -+#if !defined(roundup) -+#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) -+#endif -+ -+/* -+ * Message transaction ID's - these are unique 64 bts -+ * numbers which include the initial rail number. -+ */ -+typedef struct ep_xid -+{ -+ uint32_t Generation; -+ uint32_t Handle; -+ uint64_t Unique; -+} EP_XID; -+ -+#define EP_INVALIDATE_XID(xid) ((xid).Generation = (xid).Handle = (xid).Unique = 0) -+ -+#define EP_XID_INVALID(xid) ((xid).Generation == 0 && (xid).Handle == 0 && (xid).Unique == 0) -+#define EP_XIDS_MATCH(a,b) ((a).Generation == (b).Generation && (a).Handle == (b).Handle && (a).Unique == (b).Unique) -+ -+typedef struct ep_backoff -+{ -+ unsigned char type; -+ unsigned char indx; -+ unsigned short count; -+} EP_BACKOFF; -+ -+/* values for "type" */ -+#define EP_BACKOFF_FREE 0 -+#define EP_BACKOFF_ENVELOPE 1 -+#define EP_BACKOFF_FETCH 2 -+#define EP_BACKOFF_DATA 3 -+#define EP_BACKOFF_DONE 4 -+#define EP_BACKOFF_STABILISE 5 -+ -+#ifndef __ELAN__ -+ -+/* forward declaration of types */ -+typedef struct ep_rail EP_RAIL; -+typedef struct ep_sys EP_SYS; -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+typedef struct ep_callback -+{ -+ struct ep_callback *Next; -+ void (*Routine)(void *, statemap_t *); -+ void *Arg; -+} EP_CALLBACK; -+ -+#define EP_CB_FLUSH_FILTERING 0 -+#define EP_CB_FLUSH_FLUSHING 1 -+#define EP_CB_PASSIVATED 2 -+#define EP_CB_FAILOVER 3 -+#define EP_CB_DISCONNECTING 4 -+#define EP_CB_DISCONNECTED 5 -+#define EP_CB_NODESET 6 -+#define EP_CB_COUNT 7 -+ -+#endif /* !defined(__ELAN__) */ -+ -+/* Small unreliable system message queues */ -+#define EP_SYSTEMQ_INTR 0 /* input queue for cluster membership generating an interrupt */ -+#define EP_SYSTEMQ_POLLED 1 /* input queue for cluster membership polled on clock tick */ -+#define EP_SYSTEMQ_MANAGER 2 /* input queue for manager messages */ -+#define EP_NUM_SYSTEMQ 64 -+ -+#define EP_SYSTEMQ_ADDR(qnum) (EP_SYSTEM_QUEUE_BASE + (qnum) * EP_QUEUE_DESC_SIZE) -+#define EP_SYSTEMQ_DESC(base,qnum) ((base) + (qnum) * EP_QUEUE_DESC_SIZE) -+ -+#define EP_SYSTEMQ_MSG_ALIGN 64 /* message sizes aligned to 64 byte boundaries */ -+#define EP_SYSTEMQ_MSG_MAX (4*64) /* max message size */ -+ -+/* Special flag for Version field to indicate message not -+ * seen in main memory yet and time limit to poll for it */ -+#define EP_SYSTEMQ_UNRECEIVED 0xdeadbabe -+#define EP_SYSTEMQ_UNRECEIVED_TLIMIT 16384 /* 1023 uS */ -+ -+#ifndef __ELAN__ -+ -+typedef void (EP_INPUTQ_HANDLER) (EP_RAIL *rail, void *arg, void *msg); -+typedef void (EP_INPUTQ_CALLBACK) (EP_RAIL *rail, void *arg); -+ -+typedef struct ep_inputq -+{ -+ unsigned long q_hidden; /* implementation hidden as ep3 or ep4 */ -+} EP_INPUTQ; -+ -+typedef struct ep_outputq -+{ -+ unsigned long q_hidden; /* implementation hidden as ep3 or ep4 */ -+} EP_OUTPUTQ; -+ -+/* returned values for ep_outputq_state */ -+#define EP_OUTPUTQ_BUSY 0 -+#define EP_OUTPUTQ_FAILED 1 -+#define EP_OUTPUTQ_FINISHED 2 -+ -+typedef struct ep_switch -+{ -+ unsigned present:1; -+ unsigned invalid:1; -+ unsigned link:3; -+ unsigned bcast:3; -+ unsigned lnr; -+} EP_SWITCH; -+ -+/* -+ * Network error fixup, flush, relocation messges -+ */ -+typedef struct ep_map_nmd_body -+{ -+ uint32_t nFrags; -+ EP_RAILMASK Railmask; -+ EP_NMD Nmd[EP_MAXFRAG]; -+} EP_MAP_NMD_BODY; -+ -+typedef struct ep_failover_body -+{ -+ EP_XID Xid; -+ EP_RAILMASK Railmask; -+} EP_FAILOVER_BODY; -+ -+typedef struct ep_failover_txd -+{ -+ EP_XID Xid; -+ uint32_t Rail; -+ EP_ADDR TxdRail; -+} EP_FAILOVER_TXD; -+ -+typedef uint64_t EP_NETERR_COOKIE; -+ -+#define EP_PANIC_STRLEN 31 -+ -+typedef struct ep_node_state -+{ -+ unsigned char State; -+ unsigned char NetworkErrorState; -+ EP_RAILMASK Railmask; -+} EP_NODE_STATE; -+ -+#define EP_MANAGER_MSG_SIZE (2 * EP_SYSTEMQ_MSG_ALIGN) -+ -+typedef struct ep_manager_msg_hdr -+{ -+ EP_XID Xid; /* Message transaction id */ -+ -+ uint16_t NodeId; /* Originating node number */ -+ uint16_t DestId; /* destination node id */ -+ -+ uint16_t Checksum; /* Message checksum */ -+ uint8_t Rail; /* Rail message associated with */ -+ uint8_t Type; /* Message type */ -+ -+ uint32_t Pad; /* pad to 32 bytes */ -+ -+ uint32_t Version; /* Message Version */ -+} EP_MANAGER_MSG_HDR; -+ -+typedef union ep_manager_msg_body -+{ -+ unsigned char Space[EP_MANAGER_MSG_SIZE - sizeof (EP_MANAGER_MSG_HDR)]; -+ -+ EP_NETERR_COOKIE Cookies[2]; /* EP_MSG_TYPE_NETERR */ -+ EP_MAP_NMD_BODY MapNmd; /* EP_MSG_TYPE_MAP_NMD */ -+ EP_FAILOVER_BODY Failover; /* EP_MSG_TYPE_FAILOVER_REQUEST */ -+ EP_FAILOVER_TXD FailoverTxd; /* EP_MSG_TYPE_FAILOVER_RESPONSE */ -+ unsigned char PanicReason[EP_PANIC_STRLEN+1]; /* EP_MSG_TYPE_REMOTE_PANIC */ -+ EP_NODE_STATE NodeState; /* EP_MSG_TYPE_GET_NODE_STATE_RESPONSE */ -+ EP_SERVICE Service; /* EP_MSG_TYPE_GET_NODE_STATE */ -+} EP_MANAGER_MSG_BODY; -+ -+typedef struct ep_manager_msg -+{ -+ EP_MANAGER_MSG_BODY Body; -+ EP_MANAGER_MSG_HDR Hdr; -+} EP_MANAGER_MSG; -+ -+#define EP_MANAGER_MSG_VERSION 0xcad01000 -+#define EP_MANAGER_MSG_TYPE_REMOTE_PANIC 0x00 -+#define EP_MANAGER_MSG_TYPE_NETERR_REQUEST 0x01 -+#define EP_MANAGER_MSG_TYPE_NETERR_RESPONSE 0x02 -+#define EP_MANAGER_MSG_TYPE_FLUSH_REQUEST 0x03 -+#define EP_MANAGER_MSG_TYPE_FLUSH_RESPONSE 0x04 -+#define EP_MANAGER_MSG_TYPE_MAP_NMD_REQUEST 0x05 -+#define EP_MANAGER_MSG_TYPE_MAP_NMD_RESPONSE 0x06 -+#define EP_MANAGER_MSG_TYPE_FAILOVER_REQUEST 0x07 -+#define EP_MANAGER_MSG_TYPE_FAILOVER_RESPONSE 0x08 -+#define EP_MANAGER_MSG_TYPE_GET_NODE_STATE 0x09 -+#define EP_MANAGER_MSG_TYPE_GET_NODE_STATE_RESPONSE 0x0a -+ -+/* Message types which should only be sent when a rail is connected */ -+#define EP_MANAGER_MSG_TYPE_CONNECTED(type) (((type) & 1) == 1) -+ -+#define EP_MANAGER_OUTPUTQ_SLOTS 128 /* # entries in outputq */ -+#define EP_MANAGER_INPUTQ_SLOTS 128 /* # entries in inputq */ -+#define EP_MANAGER_OUTPUTQ_RETRIES 31 /* # retries for manager messages */ -+ -+/* XID's are allocated from a cache, which doesn't -+ * require locking since it relies on the caller to -+ * manage the locking for us. -+ */ -+typedef struct ep_xid_cache -+{ -+ struct list_head Link; -+ -+ uint32_t Handle; /* my XID cache handle */ -+ uint64_t Current; /* range of XID.Unique we can allocate from */ -+ uint64_t Last; -+ -+ void (*MessageHandler)(void *arg, EP_MANAGER_MSG *); -+ void *Arg; -+} EP_XID_CACHE; -+ -+#define EP_XID_CACHE_CHUNKS (10000) -+ -+typedef struct ep_node_rail -+{ -+ struct list_head Link; /* can be linked on work lists */ -+ -+ unsigned char State; /* node connection state */ -+ unsigned char NetworkErrorState; /* reasons for keeping the context filter up */ -+ unsigned char MessageState; /* state of messages during passivate/relocate */ -+ -+ EP_XID MsgXid; /* neterr/flush transaction id */ -+ long NextRunTime; /* time to drop context filter for destroyed dma packet, or to send next request */ -+ EP_NETERR_COOKIE NetworkErrorCookies[2]; /* identify cookie for destroyed atomic packet */ -+ -+ uint32_t Cookie; /* per-node network error cookie */ -+ spinlock_t CookieLock; /* and spinlock for it. */ -+ -+ struct list_head StalledDmas; /* list of stalled DMAs */ -+} EP_NODE_RAIL; -+ -+#define EP_NODE_DISCONNECTED 0 /* node is disconnected */ -+#define EP_NODE_CONNECTING 1 /* awaiting connection */ -+#define EP_NODE_CONNECTED 2 /* node is connected */ -+#define EP_NODE_LEAVING_CONNECTED 3 /* node is starting to disconnect */ -+#define EP_NODE_LOCAL_PASSIVATE 4 /* flushing context filter/run queues */ -+#define EP_NODE_REMOTE_PASSIVATE 5 /* stalling for neterr flush */ -+#define EP_NODE_PASSIVATED 6 /* relocating active/passive messages */ -+#define EP_NODE_DISCONNECTING 7 /* entering disconncted - abort remaining comms */ -+#define EP_NODE_NUM_STATES 8 -+ -+#define EP_NODE_NETERR_ATOMIC_PACKET (1 << 0) -+#define EP_NODE_NETERR_DMA_PACKET (1 << 1) -+ -+#define EP_NODE_PASSIVE_MESSAGES (1 << 0) -+#define EP_NODE_ACTIVE_MESSAGES (1 << 1) -+ -+/* -+ * Kernel thread code is loaded as a table. -+ */ -+typedef struct ep_symbol -+{ -+ char *name; -+ EP_ADDR value; -+} EP_SYMBOL; -+ -+typedef struct ep_code -+{ -+ u_char *text; -+ u_int text_size; -+ u_char *data; -+ u_int data_size; -+ u_char *rodata; -+ u_int rodata_size; -+ EP_SYMBOL *symbols; -+ -+ int ntext; -+ sdramaddr_t pptext; -+ EP_ADDR etext; -+ sdramaddr_t _stext; -+ sdramaddr_t _rodata; -+ -+ int ndata; -+ sdramaddr_t ppdata; -+ EP_ADDR edata; -+ sdramaddr_t _sdata; -+} EP_CODE; -+ -+typedef struct ep_switchstate -+{ -+ unsigned char linkid; -+ unsigned char LNR; -+ unsigned char bcast; -+ unsigned char uplink; -+} EP_SWITCHSTATE; -+ -+typedef struct ep_rail_ops -+{ -+ void (*DestroyRail) (EP_RAIL *rail); -+ -+ int (*StartRail) (EP_RAIL *rail); -+ void (*StallRail) (EP_RAIL *rail); -+ void (*StopRail) (EP_RAIL *rail); -+ -+ sdramaddr_t (*SdramAlloc) (EP_RAIL *rail, EP_ADDR addr, unsigned size); -+ void (*SdramFree) (EP_RAIL *rail, sdramaddr_t addr, unsigned size); -+ void (*SdramWriteb) (EP_RAIL *rail, sdramaddr_t addr, unsigned char val); -+ -+ void (*KaddrMap) (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t kaddr, unsigned len, unsigned int perm, int ep_attr); -+ void (*SdramMap) (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned len, unsigned int perm, int ep_attr); -+ void (*Unmap) (EP_RAIL *rail, EP_ADDR eaddr, unsigned len); -+ -+ void *(*DvmaReserve) (EP_RAIL *rail, EP_ADDR eaddr, unsigned npages); -+ void (*DvmaRelease) (EP_RAIL *rail, EP_ADDR eaddr, unsigned npages, void *private); -+ void (*DvmaSetPte) (EP_RAIL *rail, void *private, unsigned index, physaddr_t phys, unsigned int perm); -+ physaddr_t (*DvmaReadPte) (EP_RAIL *rail, void *private, unsigned index); -+ void (*DvmaUnload)(EP_RAIL *rail, void *private, unsigned index, unsigned npages); -+ void (*FlushTlb) (EP_RAIL *rail); -+ -+ int (*ProbeRoute) (EP_RAIL *r, int level, int sw, int nodeid, int *linkup, -+ int *linkdown, int attempts, EP_SWITCH *lsw); -+ void (*PositionFound) (EP_RAIL *rail, ELAN_POSITION *pos); -+ int (*CheckPosition) (EP_RAIL *rail); -+ void (*NeterrFixup) (EP_RAIL *rail, unsigned int nodeId, EP_NETERR_COOKIE *cookies); -+ -+ void (*LoadSystemRoute) (EP_RAIL *rail, unsigned int vp, unsigned int lowNode, unsigned int highNode); -+ -+ void (*LoadNodeRoute) (EP_RAIL *rail, unsigned nodeId); -+ void (*UnloadNodeRoute) (EP_RAIL *rail, unsigned nodeId); -+ void (*LowerFilter) (EP_RAIL *rail, unsigned nodeId); -+ void (*RaiseFilter) (EP_RAIL *rail, unsigned nodeId); -+ void (*NodeDisconnected) (EP_RAIL *rail, unsigned nodeId); -+ -+ void (*FlushFilters) (EP_RAIL *rail); -+ void (*FlushQueues) (EP_RAIL *rail); -+ -+ -+ EP_INPUTQ *(*AllocInputQ) (EP_RAIL *rail, unsigned qnum, unsigned slotSize, unsigned slotCount, -+ void (*callback)(EP_RAIL *rail, void *arg), void *arg); -+ void (*FreeInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ void (*EnableInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ void (*DisableInputQ) (EP_RAIL *rail, EP_INPUTQ *q); -+ int (*PollInputQ) (EP_RAIL *rail, EP_INPUTQ *q, int maxCount, EP_INPUTQ_HANDLER *handler, void *arg); -+ -+ EP_OUTPUTQ *(*AllocOutputQ) (EP_RAIL *rail, unsigned slotSize, unsigned slotCount); -+ void (*FreeOutputQ) (EP_RAIL *rail, EP_OUTPUTQ *outputq); -+ void *(*OutputQMsg) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum); -+ int (*OutputQState) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum); -+ int (*OutputQSend) (EP_RAIL *rail, EP_OUTPUTQ *outputq, unsigned slotNum, unsigned size, -+ unsigned vp, unsigned qnum, unsigned retries); -+ -+ void (*FillOutStats) (EP_RAIL *rail, char *str); -+ void (*Debug) (EP_RAIL *rail); -+ -+} EP_RAIL_OPS; -+ -+#define ep_alloc_inputq(rail,qnum,slotSize,slotCount,callback,arg) \ -+ (rail)->Operations.AllocInputQ(rail,qnum,slotSize,slotCount,callback,arg) -+#define ep_free_inputq(rail,inputq) \ -+ (rail)->Operations.FreeInputQ(rail,inputq) -+#define ep_enable_inputq(rail,inputq) \ -+ (rail)->Operations.EnableInputQ(rail,inputq) -+#define ep_disable_inputq(rail,inputq) \ -+ (rail)->Operations.DisableInputQ(rail,inputq) -+#define ep_poll_inputq(rail,inputq,maxCount,handler,arg) \ -+ (rail)->Operations.PollInputQ(rail,inputq,maxCount,handler,arg) -+#define ep_alloc_outputq(rail,slotSize,slotCount)\ -+ (rail)->Operations.AllocOutputQ(rail,slotSize,slotCount) -+#define ep_free_outputq(rail,outputq)\ -+ (rail)->Operations.FreeOutputQ(rail,outputq) -+#define ep_outputq_msg(rail,outputq,slotNum)\ -+ (rail)->Operations.OutputQMsg(rail,outputq,slotNum) -+#define ep_outputq_state(rail,outputq,slotNum)\ -+ (rail)->Operations.OutputQState(rail,outputq,slotNum) -+#define ep_outputq_send(rail,outputq,slotNum,size,vp,qnum,retries)\ -+ (rail)->Operations.OutputQSend(rail,outputq,slotNum,size,vp,qnum,retries) -+ -+struct ep_rail -+{ -+ EP_SYS *System; /* "system" we've attached to */ -+ -+ unsigned char Number; /* Rail number */ -+ unsigned char State; /* Rail state */ -+ char Name[32]; /* Rail name */ -+ -+ struct list_head ManagerLink; /* linked on ManagedRails list */ -+ -+ ELAN_DEVINFO Devinfo; /* Device information for this rail */ -+ ELAN_POSITION Position; /* Position on switch device is connected to */ -+ -+ EP_RAIL_OPS Operations; /* device specific operations */ -+ EP_RAIL_STATS Stats; /* statistics */ -+ -+ EP_ALLOC ElanAllocator; /* per-rail elan memory allocator */ -+ EP_ALLOC MainAllocator; /* per-rail main memory allocator */ -+ -+ unsigned TlbFlushRequired; /* lazy TLB flushing */ -+ -+ int SwitchBroadcastLevel; /* current switch level ok for broadcast */ -+ unsigned long SwitchBroadcastLevelTick; -+ -+ int SwitchProbeLevel; /* result of last switch probe */ -+ EP_SWITCHSTATE SwitchState[ELAN_MAX_LEVELS]; -+ EP_SWITCHSTATE SwitchLast[ELAN_MAX_LEVELS]; -+ unsigned long SwitchProbeTick[ELAN_MAX_LEVELS]; -+ -+ /* Node disconnecting/connecting state */ -+ EP_CALLBACK *CallbackList[EP_CB_COUNT]; /* List of callbacks */ -+ kmutex_t CallbackLock; /* and lock for it. */ -+ unsigned CallbackStep; /* step through UpdateConnectionState. */ -+ -+ /* back pointer for cluster membership */ -+ void *ClusterRail; -+ -+ /* Per node state for message passing */ -+ EP_NODE_RAIL *Nodes; /* array of per-node state */ -+ statemap_t *NodeSet; /* per-rail statemap of connected nodes */ -+ statemap_t *NodeChangeMap; /* statemap of nodes to being connected/disconnected */ -+ statemap_t *NodeChangeTmp; /* and temporary copies */ -+ -+ struct list_head NetworkErrorList; /* list of nodes resolving network errors */ -+ struct list_head LocalPassivateList; /* list of nodes in state LOCAL_PASSIVATE */ -+ struct list_head RemotePassivateList; /* list of nodes waiting for remote network error flush */ -+ struct list_head PassivatedList; /* list of nodes performing message relocation */ -+ struct list_head DisconnectingList; /* list of nodes transitioning to disconnected */ -+ -+ EP_XID_CACHE XidCache; /* XID cache for node messages (single threaded access) */ -+ -+ /* Manager messages */ -+ EP_INPUTQ *ManagerInputQ; -+ EP_OUTPUTQ *ManagerOutputQ; -+ unsigned ManagerOutputQNextSlot; -+ spinlock_t ManagerOutputQLock; -+ -+ /* /proc entries */ -+ struct proc_dir_entry *ProcDir; -+ struct proc_dir_entry *SvcIndicatorDir; -+ int CallbackRegistered; -+}; -+ -+/* values for State */ -+#define EP_RAIL_STATE_UNINITIALISED 0 /* device uninitialised */ -+#define EP_RAIL_STATE_STARTED 1 /* device started but network position unknown */ -+#define EP_RAIL_STATE_RUNNING 2 /* device started and position known */ -+#define EP_RAIL_STATE_INCOMPATIBLE 3 /* device started, but position incompatible */ -+ -+typedef struct ep_rail_entry -+{ -+ struct list_head Link; -+ EP_RAIL *Rail; -+} EP_RAIL_ENTRY; -+ -+typedef struct ep_subsys -+{ -+ EP_SYS *Sys; -+ -+ struct list_head Link; /* Linked on sys->Subsystems */ -+ char *Name; /* Name to lookup */ -+ -+ void (*Destroy) (struct ep_subsys *subsys, EP_SYS *sys); -+ -+ int (*AddRail) (struct ep_subsys *subsys, EP_SYS *sys, EP_RAIL *rail); -+ void (*RemoveRail) (struct ep_subsys *subsys, EP_SYS *sys, EP_RAIL *rail); -+} EP_SUBSYS; -+ -+typedef struct ep_node -+{ -+ EP_RAILMASK ConnectedRails; -+} EP_NODE; -+ -+struct ep_sys -+{ -+ EP_RAIL *Rails[EP_MAX_RAILS]; /* array of all available devices */ -+ -+ kmutex_t StartStopLock; /* lock for starting stopping rails */ -+ -+ ELAN_POSITION Position; /* primary node position */ -+ -+ EP_NMH_TABLE MappingTable; /* Network mapping handle table */ -+ -+ EP_ALLOC Allocator; /* shared main memory allocator */ -+ -+ EP_DVMA_STATE DvmaState; /* dvma state */ -+ -+ kmutex_t SubsysLock; /* lock on the Subsytems list */ -+ struct list_head Subsystems; /* list of subsystems */ -+ -+ /* device manager state */ -+ struct list_head ManagedRails; /* list of managed devices */ -+ EP_KTHREAD ManagerThread; /* place for manager thread to sleep */ -+ -+ /* global node state */ -+ spinlock_t NodeLock; /* spinlock for node state (including per-device node state) */ -+ EP_NODE *Nodes; /* system wide node state */ -+ statemap_t *NodeSet; /* system wide nodeset */ -+ struct list_head NodesetCallbackList; /* list of "callbacks" */ -+ -+ /* Transaction Id */ -+ struct list_head XidCacheList; /* list of XID caches */ -+ uint32_t XidGeneration; /* XID generation number (distinguishes reboots) */ -+ uint32_t XidHandle; /* XID handles (distinguishes XID caches) */ -+ uint64_t XidNext; /* next XID to prime cache */ -+ spinlock_t XidLock; /* and it's spinlock */ -+ -+ /* Shutdown/Panic */ -+ unsigned int Shutdown; /* node has shutdown/panic'd */ -+}; -+ -+#if defined(DEBUG_ASSERT) -+extern int ep_assfail (EP_RAIL *rail, const char *string, const char *func, const char *file, const int line); -+extern int sdram_assert; -+extern int assfail_mode; -+ -+#define EP_ASSERT(rail, EX) do { \ -+ if (!(EX) && ep_assfail ((EP_RAIL *) (rail), #EX, __FUNCTION__, __FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#define EP_ASSFAIL(rail,EX) do { \ -+ if (ep_assfail ((EP_RAIL *) (rail), EX, __FUNCTION__, __FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#define SDRAM_ASSERT(EX) (sdram_assert ? (EX) : 1) -+#else -+#define EP_ASSERT(rail, EX) ((void) 0) -+#define EP_ASSFAIL(rail,str) ((void) 0) -+#define SDRAM_ASSERT(EX) (1) -+#endif -+ -+/* conf_osdep.c */ -+extern EP_SYS *ep_system(void); -+extern void ep_mod_dec_usecount (void); -+extern void ep_mod_inc_usecount (void); -+ -+/* procfs_osdep.c */ -+extern struct proc_dir_entry *ep_procfs_root; -+extern struct proc_dir_entry *ep_config_root; -+ -+/* kcomm.c */ -+extern int ep_sys_init (EP_SYS *sys); -+extern void ep_sys_fini (EP_SYS *sys); -+extern void ep_shutdown (EP_SYS *sys); -+extern int ep_init_rail (EP_SYS *sys, EP_RAIL *rail); -+extern void ep_destroy_rail (EP_RAIL *rail); -+extern int ep_start_rail (EP_RAIL *rail); -+extern void ep_stop_rail (EP_RAIL *rail); -+ -+extern void ep_connect_node (EP_RAIL *rail, int nodeId); -+extern int ep_disconnect_node (EP_RAIL *rail, int nodeId); -+ -+extern EP_XID ep_xid_cache_alloc (EP_SYS *sys, EP_XID_CACHE *cache); -+extern void ep_xid_cache_init (EP_SYS *sys, EP_XID_CACHE *cache); -+extern void ep_xid_cache_destroy (EP_SYS *sys, EP_XID_CACHE *cache); -+ -+extern int ep_send_message (EP_RAIL *rail, int nodeId, int type, EP_XID xid, EP_MANAGER_MSG_BODY *body); -+ -+extern void ep_panic_node (EP_SYS *sys, int nodeId, unsigned char *reason); -+ -+extern void ep_subsys_add (EP_SYS *sys, EP_SUBSYS *subsys); -+extern void ep_subsys_del (EP_SYS *sys, EP_SUBSYS *subsys); -+extern EP_SUBSYS *ep_subsys_find (EP_SYS *sys, char *name); -+ -+extern void DisplayNodes (EP_RAIL *rail); -+ -+extern void ep_fillout_stats(EP_RAIL *rail, char *str); -+ -+/* neterr.c */ -+extern void ep_queue_network_error (EP_RAIL *rail, int nodeId, int what, int channel, EP_NETERR_COOKIE cookie); -+ -+/* kcomm_elan3.c */ -+extern unsigned int ep3_create_rails (EP_SYS *sys, unsigned int disabled); -+ -+/* kcomm_elan4.c */ -+extern unsigned int ep4_create_rails (EP_SYS *sys, unsigned int disabled); -+ -+/* probenetwork.c */ -+extern int ProbeNetwork (EP_RAIL *rail, ELAN_POSITION *pos); -+extern void CheckPosition (EP_RAIL *rail); -+ -+extern uint16_t CheckSum (char *msg, int nob); -+ -+/* threadcode.c */ -+extern EP_ADDR ep_symbol (EP_CODE *code, char *name); -+extern int ep_loadcode (EP_RAIL *rail, EP_CODE *code); -+extern void ep_unloadcode (EP_RAIL *rail, EP_CODE *code); -+ -+/* Public interface */ -+/* debug.c */ -+extern int ep_sprintf_bitmap (char *str, unsigned nbytes, bitmap_t *bitmap, int base, int count, int off); -+extern void ep_display_bitmap (char *prefix, char *tag, bitmap_t *bitmap, unsigned base, unsigned nbits); -+ -+/* epcomms.c */ -+extern int ep_waitfor_nodeid (EP_SYS *sys); -+extern int ep_nodeid (EP_SYS *sys); -+extern int ep_numnodes (EP_SYS *sys); -+ -+/* railhints.c */ -+extern int ep_pickRail(EP_RAILMASK railmask); -+ -+/* support.c */ -+extern int ep_register_nodeset_callback (EP_SYS *sys, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_remove_nodeset_callback (EP_SYS *sys, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_call_nodeset_callbacks (EP_SYS *sys, statemap_t *map); -+ -+extern int ep_register_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_remove_callback (EP_RAIL *rail, unsigned idx, void (*routine)(void *, statemap_t *), void *arg); -+extern void ep_call_callbacks (EP_RAIL *rail, unsigned idx, statemap_t *); -+extern unsigned int ep_backoff (EP_BACKOFF *backoff, int type); -+ -+#endif /* !__ELAN__ */ -+ -+typedef struct display_info { -+ void (*func)(long, char *, ...); -+ long arg; -+} DisplayInfo; -+ -+extern DisplayInfo di_ep_debug; -+ -+ -+#endif /* __ELAN_KCOMM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/kcomm_stats.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/kcomm_stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/kcomm_stats.h 2005-07-28 14:52:52.937665208 -0400 -@@ -0,0 +1,153 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __EP_EPSTATS_H -+#define __EP_EPSTATS_H -+ -+#ident "$Id: kcomm_stats.h,v 1.4.8.1 2004/11/12 10:54:51 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kcomm_stats.h,v $ */ -+ -+#define EP_BUCKET_SLOTS 8 -+ -+#define BucketStat(obj,stat,size) ((size) < 128 ? (obj)->Stats.stat[0]++ : \ -+ (size) < 512 ? (obj)->Stats.stat[1]++ : \ -+ (size) < 1024 ? (obj)->Stats.stat[2]++ : \ -+ (size) < 8192 ? (obj)->Stats.stat[3]++ : \ -+ (size) < 16384 ? (obj)->Stats.stat[4]++ : \ -+ (size) < 32768 ? (obj)->Stats.stat[5]++ : \ -+ (size) < 65536 ? (obj)->Stats.stat[6]++ : \ -+ (obj)->Stats.stat[7]++) -+#define IncrStat(obj,stat) ((obj)->Stats.stat++) -+ -+ -+#define EP3_NUM_DMA_FAIL 11 /* NOTE - the same as EP_NUM_RETRIES */ -+ -+#define ADD_STAT(STATS,STAT,VALUE) { unsigned long now = lbolt;\ -+ STATS.STAT.total += VALUE; \ -+ if ( ( now - STATS.STAT.last_time ) > HZ ) { \ -+ STATS.STAT.last_per_sec = ( STATS.STAT.total - STATS.STAT.last_count)/ ( (( now - STATS.STAT.last_time ) + (HZ/2)) / HZ);\ -+ STATS.STAT.last_time = now; \ -+ STATS.STAT.last_count = STATS.STAT.total; \ -+ }} \ -+ -+#define INC_STAT(STATS,STAT) ADD_STAT(STATS,STAT,1) -+ -+#define GET_STAT_PER_SEC(STATS, STAT) ( (( lbolt - STATS.STAT.last_time ) < (HZ * 5)) ? STATS.STAT.last_per_sec : 0 ) -+#define GET_STAT_TOTAL(STATS, STAT) ( STATS.STAT.total ) -+ -+struct ep_stats_count -+{ -+ unsigned long total; -+ unsigned long last_time; -+ unsigned long last_count; -+ unsigned long last_per_sec; -+}; -+ -+typedef struct ep_stats_count EP_STATS_COUNT; -+ -+typedef struct ep3_rail_stats -+{ -+ unsigned long IssueDmaFail[EP3_NUM_DMA_FAIL]; -+ -+ unsigned long DmaQueueLength[EP_BUCKET_SLOTS]; -+ unsigned long CprocDmaQueueOverflow; -+ unsigned long DprocDmaQueueOverflow; -+ unsigned long IprocDmaQueueOverflow; -+ unsigned long CprocEventQueueOverflow; -+ unsigned long DprocEventQueueOverflow; -+ unsigned long IprocEventQueueOverflow; -+ -+ unsigned long QueueingPacketTrap; -+ unsigned long DmaIdentifyTrap; -+ unsigned long ThreadIdentifyTrap; -+ unsigned long DmaPacketTrap; -+} EP3_RAIL_STATS; -+ -+typedef struct ep4_rail_stats -+{ -+ unsigned long somestatsgohere; -+} EP4_RAIL_STATS; -+ -+typedef struct ep_rail_stats -+{ -+ unsigned long SendMessageFailed; -+ unsigned long NeterrAtomicPacket; -+ unsigned long NeterrDmaPacket; -+ -+ EP_STATS_COUNT rx; -+ EP_STATS_COUNT rx_len; -+ -+ EP_STATS_COUNT tx; -+ EP_STATS_COUNT tx_len; -+ -+} EP_RAIL_STATS; -+ -+typedef struct ep_cm_rail_stats -+{ -+ /* cluster membership statistics */ -+ unsigned long HeartbeatsSent; -+ unsigned long HeartbeatsRcvd; -+ -+ unsigned long RetryHeartbeat; -+ unsigned long RejoinRequest; -+ unsigned long RejoinTooSlow; -+ unsigned long LaunchMessageFail; -+ unsigned long MapChangesSent; -+ -+ /* Heartbeat scheduling stats */ -+ unsigned long HeartbeatOverdue; -+} EP_CM_RAIL_STATS; -+ -+typedef struct ep_comms_rail_stats -+{ -+ /* kernel comms large message statistics */ -+ unsigned long TxEnveEvent; -+ unsigned long TxDataEvent; -+ unsigned long TxDoneEvent; -+ unsigned long RxDoneEvent; -+ unsigned long MulticastTxDone; -+ unsigned long QueueReceive; -+ -+ unsigned long TxEnveRetry; -+ unsigned long TxDataRetry; -+ unsigned long TxDoneRetry; -+ unsigned long RxThrdEvent; -+ unsigned long RxDataRetry; -+ unsigned long RxDoneRetry; -+ unsigned long StallThread; -+ unsigned long ThrdWaiting; -+ unsigned long CompleteEnvelope; -+ -+ unsigned long NoFreeTxds; -+ unsigned long NoFreeRxds; -+ -+ unsigned long LockRcvrTrapped; -+} EP_COMMS_RAIL_STATS; -+ -+typedef struct ep_comms_stats -+{ -+ unsigned long DataXmit[8]; -+ unsigned long McastXmit[8]; -+ unsigned long RPCXmit[8]; -+ unsigned long RPCPut[8]; -+ unsigned long RPCGet[8]; -+ unsigned long CompleteRPC[8]; -+ unsigned long RxData[8]; -+ unsigned long RxMcast[8]; -+ -+ unsigned long NoFreeTxds; -+ unsigned long NoFreeRxds; -+} EP_COMMS_STATS; -+ -+#endif /* __EP_EPSTATS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/kmap.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/kmap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/kmap.h 2005-07-28 14:52:52.938665056 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KMAP_H -+#define __ELAN_KMAP_H -+ -+#ident "$Id: kmap.h,v 1.3.8.1 2004/12/14 10:19:14 mike Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmap.h,v $ */ -+ -+#include -+ -+extern void ep_perrail_kaddr_map (EP_RAIL *rail, EP_ADDR eaddr, virtaddr_t vaddr, unsigned long len, unsigned int perm, int ep_attr); -+extern void ep_perrail_sdram_map (EP_RAIL *rail, EP_ADDR eaddr, sdramaddr_t saddr, unsigned long len, unsigned int perm, int ep_attr); -+extern void ep_perrail_unmap (EP_RAIL *rail, EP_ADDR eaddr, unsigned long len); -+extern void ep_perrail_dvma_sync (EP_RAIL *rail); -+ -+typedef struct ep_dvma_nmh -+{ -+ EP_NMH dvma_nmh; -+ -+ struct list_head dvma_link; /* chained on ep_dvma_state */ -+ unsigned dvma_perm; /* permissions for region */ -+ -+ spinlock_t dvma_lock; -+ EP_RAILMASK dvma_railmask; /* bitmap of rails */ -+ EP_RAIL *dvma_rails[EP_MAX_RAILS]; /* assoicated rails */ -+ void *dvma_private[EP_MAX_RAILS]; /* pointers to rail private data */ -+ unsigned int dvma_attrs[1]; /* bitmap of which rails pages are loaded NOTE - max 32 rails */ -+} EP_DVMA_NMH; -+ -+/* values for dvma_perm */ -+#define EP_PERM_EXECUTE 0 -+#define EP_PERM_READ 1 -+#define EP_PERM_WRITE 2 -+#define EP_PERM_ALL 3 -+ -+typedef struct ep_dvma_state -+{ -+ kmutex_t dvma_lock; -+ struct list_head dvma_handles; -+ struct list_head dvma_rails; -+ EP_RMAP *dvma_rmap; -+} EP_DVMA_STATE; -+ -+extern void ep_dvma_init (EP_SYS *sys); -+extern void ep_dvma_fini (EP_SYS *sys); -+extern EP_NMH *ep_dvma_reserve (EP_SYS *sys, unsigned npages, unsigned perm); -+extern void ep_dvma_release (EP_SYS *sys, EP_NMH *nmh); -+extern void ep_dvma_load (EP_SYS *sys, void *map, caddr_t vaddr, unsigned len, -+ EP_NMH *nmh, unsigned index, EP_RAILMASK *hints, EP_NMD *subset); -+extern void ep_dvma_unload (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd); -+ -+extern void ep_dvma_remove_rail (EP_SYS *sys, EP_RAIL *rail); -+extern int ep_dvma_add_rail (EP_SYS *sys, EP_RAIL *rail); -+ -+extern uint16_t rolling_check_sum (char *msg, int nob, uint16_t sum); -+ -+#endif /* __ELAN_KMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/kmsg.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/kmsg.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/kmsg.h 2005-07-28 14:52:52.938665056 -0400 -@@ -0,0 +1,14 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_KMSG_H -+#define __ELAN_KMSG_H -+ -+#ident "@(#)$Id: kmsg.h,v 1.1 2003/09/23 13:55:12 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/kmsg.h,v $ */ -+ -+#endif /* __ELAN_KMSG_H */ -Index: linux-2.6.5-7.191/include/elan/kthread.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/kthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/kthread.h 2005-07-28 14:52:52.938665056 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_KTHREAD_H -+#define __ELAN3_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.4 2004/05/06 14:24:08 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/epmod/kthread.h,v $*/ -+ -+typedef struct ep_kthread -+{ -+ kcondvar_t wait; /* place to sleep */ -+ spinlock_t lock; /* and lock */ -+ long next_run; /* tick when thread should next run */ -+ long running; /* tick when thread started to run */ -+ unsigned short should_stall; -+ unsigned char state; -+ unsigned int started:1; -+ unsigned int should_stop:1; -+ unsigned int stopped:1; -+} EP_KTHREAD; -+ -+#define KT_STATE_SLEEPING 0 -+#define KT_STATE_SCHEDULED 1 -+#define KT_STATE_RUNNING 2 -+#define KT_STATE_STALLED 3 -+ -+#define AFTER(a, b) ((((long)(a)) - ((long)(b))) > 0) -+#define BEFORE(a,b) ((((long)(a)) - ((long)(b))) < 0) -+ -+extern void ep_kthread_init (EP_KTHREAD *kt); -+extern void ep_kthread_destroy (EP_KTHREAD *kt); -+extern void ep_kthread_started (EP_KTHREAD *kt); -+extern void ep_kthread_stopped (EP_KTHREAD *kt); -+extern int ep_kthread_should_stall (EP_KTHREAD *kth); -+extern int ep_kthread_sleep (EP_KTHREAD *kth, long next_run); -+extern void ep_kthread_schedule (EP_KTHREAD *kt, long when); -+extern void ep_kthread_stall (EP_KTHREAD *kth); -+extern void ep_kthread_resume (EP_KTHREAD *kt); -+extern void ep_kthread_stop (EP_KTHREAD *kt); -+extern int ep_kthread_state (EP_KTHREAD *kt, long *time); -+#endif /* __ELAN3_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/nmh.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/nmh.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/nmh.h 2005-07-28 14:52:52.939664904 -0400 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_NMH_H -+#define __ELAN3_NMH_H -+ -+#ident "@(#)$Id: nmh.h,v 1.7 2004/01/06 10:29:55 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/nmh.h,v $*/ -+ -+ -+/* Forward declarations */ -+typedef struct ep_nmd EP_NMD; -+typedef struct ep_nmh_ops EP_NMH_OPS; -+typedef struct ep_nmh EP_NMH; -+ -+/* Railmask held in 16 bit field (packs with nodeId into NMD */ -+typedef uint16_t EP_RAILMASK; -+ -+#define EP_RAIL2RAILMASK(rnum) (1 << (rnum)) -+#define EP_RAILMASK_ALL 0xffff -+ -+/* kernel comms elan network address */ -+typedef uint32_t EP_ADDR; -+ -+/* network mapping descriptor - this is returned to the user from a map operation, -+ * and is what is passed to all communication functions */ -+struct ep_nmd -+{ -+ EP_ADDR nmd_addr; /* base address */ -+ uint32_t nmd_len; /* size in bytes */ -+ uint32_t nmd_attr; /* nodeid << 16 | railmask */ -+}; -+ -+#define EP_NMD_ATTR(nodeid,railmask) (((nodeid) << 16) | (railmask)) -+#define EP_NMD_NODEID(nmd) ((nmd)->nmd_attr >> 16) -+#define EP_NMD_RAILMASK(nmd) ((nmd)->nmd_attr & EP_RAILMASK_ALL) -+ -+#if !defined(__ELAN__) -+ -+struct ep_nmh_ops -+{ -+ int (*op_map_rails) (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, EP_RAILMASK mask); /* add mappings to different rail(s) */ -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+ uint16_t (*op_calc_check_sum) (EP_SYS *sys, EP_NMH *nmh, EP_NMD *nmd, uint16_t check_sum); /* calculates check sum */ -+#endif -+}; -+ -+struct ep_nmh -+{ -+ EP_NMD nmh_nmd; /* public field */ -+ struct list_head nmh_link; /* linked on hash table */ -+ EP_NMH_OPS *nmh_ops; /* operations to perform on object */ -+}; -+ -+#define EP_NMH_NUMHASH (32 - 11 + 1) /* one hash table for each power of 2 above pagesize */ -+#define EP_NMH_HASHSIZE (64) /* max size of each hash table */ -+ -+typedef struct ep_nmh_table -+{ -+ struct list_head *tbl_hash[EP_NMH_NUMHASH]; -+ unsigned tbl_size[EP_NMH_NUMHASH]; -+} EP_NMH_TABLE; -+ -+extern int ep_nmh_init (EP_NMH_TABLE *tbl); -+extern void ep_nmh_fini (EP_NMH_TABLE *tbl); -+ -+extern void ep_nmh_insert (EP_NMH_TABLE *tbl, EP_NMH *nmd); -+extern void ep_nmh_remove (EP_NMH_TABLE *tbl, EP_NMH *nmd); -+extern EP_NMH *ep_nmh_find (EP_NMH_TABLE *tbl, EP_NMD *nmh); -+ -+#if ! defined(CONFIG_EP_NO_CHECK_SUM) -+extern uint32_t ep_nmd_calc_data_check_sum(EP_SYS *sys, EP_NMD *nmd, int nFrags); -+#endif -+ -+/* Public interface */ -+extern EP_RAILMASK ep_nmd2railmask (EP_NMD *frags, int nFrags); -+extern void ep_nmd_subset (EP_NMD *subset, EP_NMD *nmd, unsigned off, unsigned len); -+extern int ep_nmd_merge (EP_NMD *merged, EP_NMD *a, EP_NMD *b); -+extern int ep_nmd_map_rails (EP_SYS *sys, EP_NMD *nmd, unsigned railmask); -+ -+#endif /* __ELAN__ */ -+ -+#endif /* __ELAN3_NMH_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/rmap.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/rmap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/rmap.h 2005-07-28 14:52:52.939664904 -0400 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_RMAP_H -+#define __ELAN_RMAP_H -+ -+#ident "$Id: rmap.h,v 1.8 2004/05/19 10:24:40 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/rmap.h,v $ */ -+ -+ -+typedef struct ep_rmap_entry -+{ -+ size_t m_size; -+ u_long m_addr; -+} EP_RMAP_ENTRY; -+ -+typedef struct ep_rmap -+{ -+ spinlock_t m_lock; -+ kcondvar_t m_wait; -+ u_int m_size; -+ u_int m_free; -+ u_int m_want; -+ char *m_name; -+ EP_RMAP_ENTRY m_map[1]; -+} EP_RMAP; -+ -+extern void ep_display_rmap (EP_RMAP *map); -+ -+extern void ep_rmapinit (EP_RMAP *rmap, char *name, u_int mapsize); -+extern unsigned long ep_rmalloc (EP_RMAP *rmap, size_t size, int cansleep); -+extern unsigned long ep_rmalloc_constrained (EP_RMAP *mp, size_t size, unsigned long alo, unsigned long ahi, unsigned long align, int cansleep); -+extern void ep_rmfree (EP_RMAP *rmap, size_t size, unsigned long addr); -+extern unsigned long ep_rmget (EP_RMAP *rmap, size_t size, unsigned long addr); -+extern EP_RMAP *ep_rmallocmap (size_t size, char *name, int cansleep); -+extern void ep_rmfreemap (EP_RMAP *map); -+ -+#endif /* __ELAN3_RMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/statemap.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/statemap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/statemap.h 2005-07-28 14:52:52.939664904 -0400 -@@ -0,0 +1,52 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN_STATEMAP_H -+#define __ELAN_STATEMAP_H -+ -+#ident "$Id: statemap.h,v 1.8 2003/10/07 13:22:38 david Exp $" -+/* $Source: /cvs/master/quadrics/epmod/statemap.h,v $ */ -+ -+#include -+ -+/******************************** global state bitmap stuff **********************************/ -+typedef struct -+{ -+ unsigned int size; -+ unsigned int nob; -+ unsigned int changemap_nob; -+ unsigned int bitmap_nob; -+ bitmap_t *changemap0; -+ bitmap_t *changemap1; -+ bitmap_t *changemap2; -+ bitmap_t *bitmap; -+} statemap_t; -+ -+extern bitmap_t statemap_getseg (statemap_t *map, unsigned int offset); -+extern void statemap_setseg (statemap_t *map, unsigned int offset, bitmap_t seg); -+extern bitmap_t statemap_getbits (statemap_t *map, unsigned int offset, int nbits); -+extern void statemap_setbits (statemap_t *map, unsigned int offset, bitmap_t bits, int nbits); -+extern void statemap_zero (statemap_t *map); -+extern void statemap_setmap (statemap_t *dst, statemap_t *src); -+extern void statemap_ormap (statemap_t *dst, statemap_t *src); -+extern int statemap_findchange (statemap_t *map, bitmap_t *newseg, int clearchange); -+extern int statemap_changed (statemap_t *map); -+extern void statemap_reset (statemap_t *map); -+extern void statemap_copy (statemap_t *dst, statemap_t *src); -+extern void statemap_clearchanges (statemap_t *map); -+extern bitmap_t *statemap_tobitmap (statemap_t *map); -+extern statemap_t *statemap_create (int size); -+extern void statemap_destroy (statemap_t *map); -+ -+#endif /* __ELAN_STATEMAP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan/stats.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan/stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan/stats.h 2005-07-28 14:52:52.939664904 -0400 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.h,v 1.5 2003/09/24 13:55:37 david Exp $" -+/* $Source: /cvs/master/quadrics/elanmod/modsrc/stats.h,v $*/ -+ -+#ifndef __ELAN_STATS_H -+#define __ELAN_STATS_H -+ -+ -+/* non-kernel headings */ -+#define ELAN_STATS_NAME_MAX_LEN ((uint)64) -+typedef unsigned int ELAN_STATS_IDX; -+ -+typedef struct elan_stats_map -+{ -+ char entry_name[ELAN_STATS_NAME_MAX_LEN]; -+ int index; -+} ELAN_STATS_MAP; -+ -+#if defined(__KERNEL__) -+ -+/* stats callbacks */ -+#define ELAN_STATS_OPS_VERSION ((u_int)1) -+typedef struct elan_stats_ops -+{ -+ u_int ops_version; -+ -+ int (*elan_stats_get_name) (void * arg, uint index, caddr_t name); -+ int (*elan_stats_get_block) (void * arg, uint entries, ulong *values); -+ int (*elan_stats_clear_block) (void * arg); -+ -+} ELAN_STATS_OPS; -+ -+typedef struct elan_stats_struct -+{ -+ struct list_head node; -+ -+ ELAN_STATS_IDX statidx; -+ char block_name[ELAN_STATS_NAME_MAX_LEN]; -+ uint num_entries; -+ ELAN_STATS_OPS *ops; -+ void *arg; -+ -+} ELAN_STATS_STRUCT; -+ -+/* stats.c */ -+extern int elan_stats_register (ELAN_STATS_IDX *statidx, -+ char *block_name, -+ uint num_entries, -+ ELAN_STATS_OPS *ops, -+ void *arg); -+ -+extern int elan_stats_deregister (ELAN_STATS_IDX statidx); -+extern ELAN_STATS_STRUCT *elan_stats_find (ELAN_STATS_IDX statidx); -+extern ELAN_STATS_STRUCT *elan_stats_find_by_name(caddr_t block_name); -+extern ELAN_STATS_STRUCT *elan_stats_find_next (ELAN_STATS_IDX statidx); -+ -+ -+/* elan_stats.c */ -+extern int elan_stats_get_next_index (ELAN_STATS_IDX statidx, ELAN_STATS_IDX *next_statidx); -+ -+extern int elan_stats_find_index (caddr_t block_name, ELAN_STATS_IDX *statidx, uint *num_entries); -+ -+extern int elan_stats_get_block_info (ELAN_STATS_IDX statidx, caddr_t block_name, uint *num_entries); -+ -+extern int elan_stats_get_index_name (ELAN_STATS_IDX statidx, uint index, caddr_t name); -+ -+extern int elan_stats_get_block (ELAN_STATS_IDX statidx, uint entries, ulong *values); -+ -+extern int elan_stats_clear_block (ELAN_STATS_IDX statidx); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* __ELAN_STATS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/compat.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/compat.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/compat.h 2005-07-28 14:52:52.940664752 -0400 -@@ -0,0 +1,177 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: compat.h,v 1.4 2004/06/09 09:07:03 mike Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/compat.h,v $*/ -+ -+#ifndef __ELAN3_COMPAT_H -+#define __ELAN3_COMPAT_H -+ -+/* compatibility header to allow Eagle branch QSNETLIBS -+ * to compile against head kernel */ -+ -+#define ELAN_EAGLE_COMPAT -+ -+/* vmseg.h */ -+#define ELAN_FLAGSTATS ELAN3_FLAGSTATS -+ -+/* uregs.h */ -+#define ELAN_STATS_NAME ELAN3_STATS_NAME -+#define elan3_stats_names elan_stats_names -+ -+/* spinlock.h */ -+#define ELAN_SPINLOCK ELAN3_SPINLOCK -+#define ELAN_SPINLOCK_MAIN ELAN3_SPINLOCK_MAIN -+#define ELAN_SPINLOCK_ELAN ELAN3_SPINLOCK_ELAN -+#define ELAN_ME_SPINENTER ELAN3_ME_SPINENTER -+#define ELAN_ME_FORCEENTER ELAN3_ME_FORCEENTER -+#define ELAN_ME_SPINEXIT ELAN3_ME_SPINEXIT -+#define ELAN_SPINENTER ELAN3_SPINENTER -+#define ELAN_SPINEXIT ELAN3_SPINEXIT -+#define elan3_me_spinblock elan_me_spinblock -+#define elan3_spinenter elan_spinenter -+ -+/* elanio.h */ -+#define ELANIO_CONTROL_PATHNAME ELAN3IO_CONTROL_PATHNAME -+#define ELANIO_USER_PATHNAME ELAN3IO_USER_PATHNAME -+#define ELANIO_SDRAM_PATHNAME ELAN3IO_SDRAM_PATHNAME -+#define ELANIO_MAX_PATHNAMELEN ELAN3IO_MAX_PATHNAMELEN -+ -+#define ELANIO_SET_BOUNDARY_SCAN ELAN3IO_SET_BOUNDARY_SCAN -+#define ELANIO_CLEAR_BOUNDARY_SCAN ELAN3IO_CLEAR_BOUNDARY_SCAN -+#define ELANIO_READ_LINKVAL ELAN3IO_READ_LINKVAL -+#define ELANIO_WRITE_LINKVAL ELAN3IO_WRITE_LINKVAL -+#define ELANIO_SET_DEBUG_STRUCT ELAN3IO_SET_DEBUG_STRUCT -+#define ELANIO_SET_DEBUG ELAN3IO_SET_DEBUG -+#define ELANIO_DEBUG_BUFFER_STRUCT ELAN3IO_DEBUG_BUFFER_STRUCT -+#define ELANIO_DEBUG_BUFFER ELAN3IO_DEBUG_BUFFER -+#define ELANIO_NETERR_SERVER_STRUCT ELAN3IO_NETERR_SERVER_STRUCT -+#define ELANIO_NETERR_SERVER ELAN3IO_NETERR_SERVER -+#define ELANIO_NETERR_FIXUP ELAN3IO_NETERR_FIXUP -+ -+#define ELANIO_FREE ELAN3IO_FREE -+#define ELANIO_ATTACH ELAN3IO_ATTACH -+#define ELANIO_DETACH ELAN3IO_DETACH -+#define ELANIO_ADDVP_STRUCT ELAN3IO_ADDVP_STRUCT -+#define ELANIO_ADDVP ELAN3IO_ADDVP -+#define ELANIO_REMOVEVP ELAN3IO_REMOVEVP -+#define ELANIO_BCASTVP_STRUCT ELAN3IO_BCASTVP_STRUCT -+#define ELANIO_BCASTVP ELAN3IO_BCASTVP -+#define ELANIO_LOAD_ROUTE_STRUCT ELAN3IO_LOAD_ROUTE_STRUCT -+#define ELANIO_LOAD_ROUTE ELAN3IO_LOAD_ROUTE -+#define ELANIO_PROCESS ELAN3IO_PROCESS -+#define ELANIO_SETPERM_STRUCT ELAN3IO_SETPERM_STRUCT -+#define ELANIO_SETPERM ELAN3IO_SETPERM -+#define ELANIO_CLEARPERM_STRUCT ELAN3IO_CLEARPERM_STRUCT -+#define ELANIO_CLEARPERM ELAN3IO_CLEARPERM -+#define ELANIO_CHANGEPERM_STRUCT ELAN3IO_CHANGEPERM_STRUCT -+#define ELANIO_CHANGEPERM ELAN3IO_CHANGEPERM -+#define ELANIO_HELPER_THREAD ELAN3IO_HELPER_THREAD -+#define ELANIO_WAITCOMMAND ELAN3IO_WAITCOMMAND -+#define ELANIO_BLOCK_INPUTTER ELAN3IO_BLOCK_INPUTTER -+#define ELANIO_SET_FLAGS ELAN3IO_SET_FLAGS -+#define ELANIO_WAITEVENT ELAN3IO_WAITEVENT -+#define ELANIO_ALLOC_EVENTCOOKIE ELAN3IO_ALLOC_EVENTCOOKIE -+#define ELANIO_FREE_EVENTCOOKIE ELAN3IO_FREE_EVENTCOOKIE -+#define ELANIO_ARM_EVENTCOOKIE ELAN3IO_ARM_EVENTCOOKIE -+#define ELANIO_WAIT_EVENTCOOKIE ELAN3IO_WAIT_EVENTCOOKIE -+#define ELANIO_SWAPSPACE ELAN3IO_SWAPSPACE -+#define ELANIO_EXCEPTION_SPACE ELAN3IO_EXCEPTION_SPACE -+#define ELANIO_GET_EXCEPTION ELAN3IO_GET_EXCEPTION -+#define ELANIO_UNLOAD_STRUCT ELAN3IO_UNLOAD_STRUCT -+#define ELANIO_UNLOAD ELAN3IO_UNLOAD -+#define ELANIO_GET_ROUTE_STRUCT ELAN3IO_GET_ROUTE_STRUCT -+#define ELANIO_GET_ROUTE ELAN3IO_GET_ROUTE -+#define ELANIO_RESET_ROUTE_STRUCT ELAN3IO_RESET_ROUTE_STRUCT -+#define ELANIO_RESET_ROUTE ELAN3IO_RESET_ROUTE -+#define ELANIO_CHECK_ROUTE_STRUCT ELAN3IO_CHECK_ROUTE_STRUCT -+#define ELANIO_CHECK_ROUTE ELAN3IO_CHECK_ROUTE -+#define ELANIO_VP2NODEID_STRUCT ELAN3IO_VP2NODEID_STRUCT -+#define ELANIO_VP2NODEID ELAN3IO_VP2NODEID -+#define ELANIO_SET_SIGNAL ELAN3IO_SET_SIGNAL -+#define ELANIO_PROCESS_2_LOCATION_STRUCT ELAN3IO_PROCESS_2_LOCATION_STRUCT -+#define ELANIO_PROCESS_2_LOCATION ELAN3IO_PROCESS_2_LOCATION -+#define ELANIO_GET_DEVINFO_STRUCT ELAN3IO_GET_DEVINFO_STRUCT -+#define ELANIO_GET_DEVINFO ELAN3IO_GET_DEVINFO -+#define ELANIO_GET_POSITION_STRUCT ELAN3IO_GET_POSITION_STRUCT -+#define ELANIO_GET_POSITION ELAN3IO_GET_POSITION -+#define ELANIO_STATS_STRUCT ELAN3IO_STATS_STRUCT -+#define ELANIO_STATS ELAN3IO_STATS -+# define ELAN_SYS_STATS_DEVICE ELAN3_SYS_STATS_DEVICE -+# define ELAN_SYS_STATS_ELAN3MMU ELAN3_SYS_STATS_MMU -+ -+#define ELANIO_OFF_FLAG_PAGE ELAN3IO_OFF_FLAG_PAGE -+#define ELANIO_OFF_UREG_PAGE ELAN3IO_OFF_UREG_PAGE -+#define ELANIO_OFF_COMMAND_PAGE ELAN3IO_OFF_COMMAND_PAGE -+ -+ -+/* elanvp.h */ -+#define ELAN_ROUTE_SUCCESS ELAN3_ROUTE_SUCCESS -+#define ELAN_ROUTE_SYSCALL_FAILED ELAN3_ROUTE_SYSCALL_FAILED -+#define ELAN_ROUTE_INVALID ELAN3_ROUTE_INVALID -+#define ELAN_ROUTE_TOO_LONG ELAN3_ROUTE_TOO_LONG -+#define ELAN_ROUTE_LOAD_FAILED ELAN3_ROUTE_LOAD_FAILED -+#define ELAN_ROUTE_PROC_RANGE ELAN3_ROUTE_PROC_RANGE -+#define ELAN_ROUTE_INVALID_LEVEL ELAN3_ROUTE_INVALID_LEVEL -+#define ELAN_ROUTE_OCILATES ELAN3_ROUTE_OCILATES -+#define ELAN_ROUTE_WRONG_DEST ELAN3_ROUTE_WRONG_DEST -+#define ELAN_ROUTE_TURN_LEVEL ELAN3_ROUTE_TURN_LEVEL -+#define ELAN_ROUTE_NODEID_UNKNOWN ELAN3_ROUTE_NODEID_UNKNOWN -+ -+/* elandev.h */ -+#define ELAN_STATS ELAN3_STATS -+#define ELAN_STATS_VERSION ELAN3_STATS_VERSION -+ -+/* perm.h */ -+#define ELAN_PERM_NOREMOTE ELAN3_PERM_NOREMOTE -+#define ELAN_PERM_LOCAL_READ ELAN3_PERM_LOCAL_READ -+#define ELAN_PERM_REMOTEALL ELAN3_PERM_REMOTEALL -+ -+/* threadsyscall.h */ -+#define ELAN_ABORT_TRAPNUM ELAN3_ABORT_TRAPNUM -+#define ELAN_ELANCALL_TRAPNUM ELAN3_ELANCALL_TRAPNUM -+#define ELAN_SYSCALL_TRAPNUM ELAN3_SYSCALL_TRAPNUM -+#define ELAN_SYS_close ELAN3_SYS_close -+#define ELAN_SYS_getpid ELAN3_SYS_getpid -+#define ELAN_SYS_ioctl ELAN3_SYS_ioctl -+#define ELAN_SYS_kill ELAN3_SYS_kill -+#define ELAN_SYS_lseek ELAN3_SYS_lseek -+#define ELAN_SYS_mmap ELAN3_SYS_mmap -+#define ELAN_SYS_munmap ELAN3_SYS_munmap -+#define ELAN_SYS_open ELAN3_SYS_open -+#define ELAN_SYS_poll ELAN3_SYS_poll -+#define ELAN_SYS_read ELAN3_SYS_read -+#define ELAN_SYS_write ELAN3_SYS_write -+#define ELAN_T_SYSCALL_CODE ELAN3_T_SYSCALL_CODE -+#define ELAN_T_SYSCALL_ERRNO ELAN3_T_SYSCALL_ERRNO -+ -+/* elansyscall.h */ -+#define ELAN_SYS_FLAG_DMA_BADVP ELAN3_SYS_FLAG_DMA_BADVP -+#define ELAN_SYS_FLAG_THREAD_BADVP ELAN3_SYS_FLAG_THREAD_BADVP -+#define ELAN_SYS_FLAG_DMAFAIL ELAN3_SYS_FLAG_DMAFAIL -+#define ELAN_SYS_FLAG_NETERR ELAN3_SYS_FLAG_NETERR -+ -+/* intrinsics.h */ -+#define elan_copy64w elan3_copy64w -+#define elan_read64dw elan3_read64dw -+#define elan_write64dw elan3_write64dw -+ -+#ifndef ELAN_POLL_EVENT -+#define ELAN_POLL_EVENT ELAN3_POLL_EVENT -+#endif -+#ifndef ELAN_WAIT_EVENT -+#define ELAN_WAIT_EVENT ELAN3_WAIT_EVENT -+#endif -+ -+#endif /* __ELAN3_COMPAT_H */ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+ -Index: linux-2.6.5-7.191/include/elan3/dma.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/dma.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/dma.h 2005-07-28 14:52:52.941664600 -0400 -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_DMA_H -+#define __ELAN3_DMA_H -+ -+#ident "$Id: dma.h,v 1.38 2002/08/21 12:43:27 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/dma.h,v $ */ -+ -+#include -+#include -+ -+/* Alignment for a DMA descriptor */ -+#define E3_DMA_ALIGN (32) -+ -+/* The maximum size a DMA can be (i.e. < 2GB) */ -+#define E3_MAX_DMA_SIZE 0x7fffffff -+ -+/* This macro returns TRUE if a fixup for the ELAN_REVB_BUG_2 problem is required -+ * i.e. if the DMA begins in the last 64-bytes of a page and its size causes it to enter the -+ * next page, hence causing the Elan to issue 2 (64-byte) block reads to different pages. -+ * See GNAT hw-elan3/3263 -+ */ -+#define E3_DMA_REVB_BUG_2(SIZE, ADDR, PAGESIZE) \ -+ ( (((int) (ADDR) & (PAGESIZE-64)) == (PAGESIZE-64)) && (-(((int) (ADDR) | ~(PAGESIZE-1))) < (SIZE)) ) -+ -+/* There is a point where a dma runs quicker from main memory than -+ * when running from sdram and having to copy all the data down -+ * first. -+ */ -+#define E3_DMA_SDRAM_CUTOFF 128 -+ -+typedef union _e3_DmaType -+{ -+ E3_uint32 type; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 dataType:2; /* Bits 0 to 1 */ -+ E3_uint32 direction:3; /* Bit 4 to 2 */ -+ E3_uint32 opCode:4; /* Bits 5 to 8 */ -+ E3_uint32 failCount:6; /* Bits 9 to 14 */ -+ E3_uint32 isRemote:1; /* Bit 15 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+#else -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 isRemote:1; /* Bit 15 */ -+ E3_uint32 failCount:6; /* Bits 9 to 14 */ -+ E3_uint32 opCode:4; /* Bits 5 to 8 */ -+ E3_uint32 direction:3; /* Bit 4 to 2 */ -+ E3_uint32 dataType:2; /* Bits 0 to 1 */ -+#endif -+ } s; -+} E3_DmaType; -+ -+#define E3_DMA_CONTEXT_MASK (ALL_CONTEXT_BITS << 16) -+ -+#define E3_DMA_CONTEXT(type) (((type) >> 16) & ALL_CONTEXT_BITS) -+#define E3_DMA_ISREMOTE(type) (((type) >> 15) & 1) -+#define E3_DMA_FAILCOUNT(type) (((type) >> 9) & 0x3F) -+#define E3_DMA_OPCODE(type) (((type) >> 5) & 0xF) -+#define E3_DMA_DIRECTION(type) (((type) >> 2) & 0x7) -+#define EP_DMA_DATATYPE(type) (((type) >> 0) & 0x3) -+ -+#define E3_DMA_TYPE(dataType, direction, opCode, failCount) \ -+ (((dataType) & 0x3) | (((direction) & 7) << 2) | (((opCode) & 0xF) << 5) | (((failCount) & 0x3F) << 9)) -+ -+ -+typedef union _e3_CookieVProc -+{ -+ E3_uint32 cookie_vproc; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 vproc:16; /* Bit 15 to 0 */ -+ E3_uint32 cookie:16; /* Bits 31 to 16 */ -+#else -+ E3_uint32 cookie:16; /* Bits 31 to 16 */ -+ E3_uint32 vproc:16; /* Bit 15 to 0 */ -+#endif -+ } s; -+} E3_CookieVProc; -+ -+#define E3_DMA_COOKIE_PROC(Cookie, VProc) (((VProc) & 0xffff) | (((Cookie) << 16))) -+ -+#define DMA_COOKIE_MASK (0xffff0000) -+#define DMA_PROCESS_MASK (0x0000ffff) -+ -+/* We use the bottom bit of the cookie to -+ * distinguish main/thread generated cookies -+ */ -+#define DMA_COOKIE_THREAD (0x01 << 16) -+ -+/* We use the next bit of the cookie to -+ * distinguish locally/remotely generated cookies -+ */ -+#define DMA_COOKIE_REMOTE (0x02 << 16) -+ -+/* Assign and increment cookie (NB: we have reserved the bottom two bits) -+ */ -+#define DMA_COOKIE(COOKIE, VPROC) ((((COOKIE) += (0x4 << 16)) & DMA_COOKIE_MASK) | VPROC) -+#define DMA_REMOTE_COOKIE(COOKIE, VPROC) ((((COOKIE) += (0x4 << 16)) & DMA_COOKIE_MASK) | DMA_COOKIE_REMOTE | VPROC) -+ -+#define DMA_COOKIE_REFRESH(COOKIEVP, COOKIE) \ -+do { \ -+ COOKIEVP &= ~DMA_COOKIE_MASK; /* Clear cookie */ \ -+ COOKIEVP |= DMA_COOKIE(COOKIE,0); /* Assign new cookie */ \ -+} while (0) -+ -+typedef struct e3_dma -+{ -+ E3_DmaType dma_u; -+ E3_uint32 dma_size; -+ E3_Addr dma_source; -+ E3_Addr dma_dest; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_srcEvent; -+ E3_CookieVProc dma_srcCookieProc; -+} E3_DMA; -+ -+ -+/* -+ * Word-swapped version of DMA descriptor. -+ * This is used by the UltraSPARC code to format the descriptor -+ * in main memory before block-copying it down to Elan SDRAM. -+ * In the process it does a dword (64-bit) conversion and so swaps -+ * the word order on a double-word pair basis -+ */ -+typedef struct e3_dma_swapped -+{ -+ E3_uint32 dma_size; -+ E3_DmaType dma_u; -+ E3_Addr dma_dest; -+ E3_Addr dma_source; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_srcCookieProc; -+ E3_Addr dma_srcEvent; -+} E3_DMA_SWAPPED; -+ -+/* Define a Main memory structure for DMA desc based on Endianess of machine */ -+#if defined(__LITTLE_ENDIAN__) -+#define E3_DMA_MAIN E3_DMA -+#else -+#define E3_DMA_MAIN E3_DMA_SWAPPED; -+#endif -+ -+#define dma_type dma_u.type -+#define dma_failCount dma_u.s.failCount -+#define dma_isRemote dma_u.s.isRemote -+#define dma_opCode dma_u.s.opCode -+#define dma_direction dma_u.s.direction -+#define dma_dataType dma_u.s.dataType -+#define dma_queueContext dma_u.s.Context -+ -+#define dma_destCookieVProc dma_destCookieProc.cookie_vproc -+#define dma_destVProc dma_destCookieProc.s.vproc -+#define dma_destCookie dma_destCookieProc.s.cookie -+#define dma_srcCookieVProc dma_srcCookieProc.cookie_vproc -+#define dma_srcVProc dma_srcCookieProc.s.vproc -+#define dma_srcCookie dma_srcCookieProc.s.cookie -+ -+/* -+ * Values for dma_opCode -+ */ -+#define DMA_NORMAL 0 -+#define DMA_QUEUED 1 -+#define DMA_NORMAL_BROADCAST 2 -+#define DMA_QUEUED_BROADCAST 3 -+#define DMA_NORMAL_UNSAFE 4 -+#define DMA_QUEUED_UNSAFE 5 -+#define DMA_NORMAL_BROADCAST_UNSAFE 6 -+#define DMA_QUEUED_BROADCAST_UNSAFE 7 -+ -+/* -+ * Values for dma_direction -+ */ -+#define DMA_WRITE 0 -+#define DMA_READ_REQUEUE 1 -+#define DMA_READ 3 -+#define DMA_READ_BROADCAST 7 -+ -+/* -+ * Values for dma_dataType -+ */ -+#define DMA_BYTE 0 -+#define DMA_HALFWORD 1 -+#define DMA_WORD 2 -+#define DMA_DOUBLE 3 -+ -+/* OUT OF DATE ? -+ #define DMA_OPCODE_SHIFT 3 -+ #define DMA_FAILCOUNT_SHIFT 9 -+*/ -+#define DMA_TYPE_ISREMOTE (1 << 15) -+#define DMA_TYPE_READ (3 << 2) -+#define DMA_TYPE_READ_REQUEUE (1 << 2) -+#define DMA_TYPE_DIRECTION_MASK (3 << 2) -+ -+#endif /* __ELAN3_DMA_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/e3types.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/e3types.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/e3types.h 2005-07-28 14:52:52.941664600 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_E3TYPES_H -+#define __ELAN3_E3TYPES_H -+ -+#ident "$Id: e3types.h,v 1.18 2002/08/09 11:23:33 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/e3types.h,v $ */ -+ -+#include -+/* -+ * "flip" values for correctly indexing into -+ * block data which was copied from the Elan -+ * using 64 bit accesses. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+# define ByteEndianFlip 0 -+# define ShortEndianFlip 0 -+# define WordEndianFlip 0 -+#else -+# define ByteEndianFlip 7 -+# define ShortEndianFlip 3 -+# define WordEndianFlip 1 -+#endif -+ -+ -+#ifndef _ASM -+ -+typedef signed int E3_int; -+typedef unsigned int E3_uint; -+ -+typedef signed char E3_int8; -+typedef unsigned char E3_uint8; -+ -+typedef signed short E3_int16; -+typedef unsigned short E3_uint16; -+ -+typedef signed int E3_int32; -+typedef unsigned int E3_uint32; -+ -+#ifdef __ELAN3__ -+typedef signed long long E3_int64; -+typedef unsigned long long E3_uint64; -+#ifdef _MAIN_LP64 -+/* NOTE: If the Main is 64-bit we declare the Elan thread's -+ * E3_uintptr to be 64-bits too -+ */ -+typedef unsigned long long E3_uintptr; -+#else -+typedef unsigned long E3_uintptr; -+#endif -+ -+#else -+ -+#ifdef _LP64 -+typedef signed long E3_int64; -+typedef unsigned long E3_uint64; -+typedef unsigned long E3_uintptr; -+#else /* _ILP32 */ -+typedef signed long long E3_int64; -+typedef unsigned long long E3_uint64; -+typedef unsigned long E3_uintptr; -+#endif -+ -+#endif /* __ELAN3__ */ -+ -+/* 32-bit Elan3 address */ -+typedef E3_uint32 E3_Addr; -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN3_E3TYPES_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elan3mmu.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elan3mmu.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elan3mmu.h 2005-07-28 14:52:52.942664448 -0400 -@@ -0,0 +1,346 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELAN3MMU_H -+#define __ELAN3_ELAN3MMU_H -+ -+#ident "$Id: elan3mmu.h,v 1.40.2.1 2004/12/14 10:19:48 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3mmu.h,v $*/ -+ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct elan3mmu_global_stats -+{ -+ int version; -+ int pteload; -+ int pteunload; -+ int ptereload; -+ -+ int streamable_alloc; -+ int streamable_free; -+ int streamable_alloc_failed; -+ -+ int num_ptbl_level[4]; /* number of level N ptbls */ -+ -+ int create_ptbl_failed; /* count of ptbl creation failure */ -+ -+ int lX_alloc_l3; /* count of l3 ptbls used as lX */ -+ int lX_freed_l3; /* count of lX ptbls freed as l3 */ -+ -+ int l2_alloc_l3; /* count of l3 ptbls used as l2 */ -+ int l2_freed_l3; /* count of l2 ptbls freed as l3 */ -+ -+ int stolen_ptbls; /* count of l3 ptbls stolen */ -+} ELAN3MMU_GLOBAL_STATS; -+ -+#define ELAN3MMU_STATS_VERSION 1 -+ -+#define ELAN3MMU_STAT(what) (elan3mmu_global_stats.what++) -+#define ELAN3MMU_SET_STAT(what,count) (elan3mmu_global_stats.what = count) -+ -+#ifdef __KERNEL__ -+ -+#define ELAN3_PT_SHIFT (ELAN3_L2_SHIFT + 2) -+ -+typedef struct elan3_ptbl -+{ -+ struct elan3_ptbl *ptbl_parent; /* Parent page table, or next on freelist */ -+ struct elan3mmu *ptbl_elan3mmu; /* elan3mmu we're allocated for */ -+ E3_Addr ptbl_base; /* Virtual address we're mapping */ -+ u_char ptbl_index; /* Index in ptbl group */ -+ u_char ptbl_valid; /* Number of valid entries */ -+ u_char ptbl_flags; /* Flags, defined below. */ -+ u_char ptbl_spare; -+} ELAN3_PTBL; -+ -+#define ptbl_next ptbl_parent /* Parent pointer is next pointer when on free list */ -+ -+#define PTBL_LEVEL_X 0x00 -+#define PTBL_LEVEL_1 0x01 -+#define PTBL_LEVEL_2 0x02 -+#define PTBL_LEVEL_3 0x03 -+#define PTBL_LEVEL_MASK 0x03 -+#define PTBL_LOCKED 0x04 /* Page table is locked, protects all fields */ -+#define PTBL_KEEP 0x08 /* This ptbl is not to be stolen */ -+#define PTBL_ALLOCED 0x10 /* This ptbl has been allocated, and is not free */ -+#define PTBL_GROUPED 0x20 /* This ptbl is a member of a group of ptbls */ -+#define PTBL_KERNEL 0x80 /* This ptbl is allocated for the kernel */ -+ -+#define PTBL_LEVEL(flags) ((flags) & PTBL_LEVEL_MASK) -+#define PTBL_IS_LOCKED(flags) (((flags) & (PTBL_LOCKED|PTBL_ALLOCED)) == (PTBL_LOCKED|PTBL_ALLOCED)) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define PTBL_GROUP_SIZE 8192 /* page table groups are 8k bytes */ -+# define PTBLS_PER_GROUP_L1 8 /* Number of level 1 tables in a group */ -+# define PTBLS_PER_GROUP_L2 32 /* ... level 2 */ -+# define PTBLS_PER_GROUP_L3 32 /* ... level 3 */ -+# define PTBLS_PER_GROUP_LX 32 /* ... level X */ -+# define PTBLS_PER_GROUP_MAX 32 /* max of l1,l2,l3,lX */ -+#else -+# define PTBL_GROUP_SIZE 4096 /* page table groups are 4k bytes */ -+# define PTBLS_PER_GROUP_L1 4 /* Number of level 1 tables in a group */ -+# define PTBLS_PER_GROUP_L2 16 /* ... level 2 */ -+# define PTBLS_PER_GROUP_L3 8 /* ... level 3 */ -+# define PTBLS_PER_GROUP_LX 16 /* ... level X */ -+# define PTBLS_PER_GROUP_MAX 16 /* max of l1,l2,l3,lX */ -+#endif -+ -+#define HMES_PER_GROUP (PTBLS_PER_GROUP_L3*ELAN3_L3_ENTRIES) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define PTBLS_PER_PTBL_L1 4 /* 256 PTPs */ -+# define PTBLS_PER_PTBL_L2 1 /* 64 PTPs */ -+# define PTBLS_PER_PTBL_L3 1 /* 32 PTEs */ -+#else -+# define PTBLS_PER_PTBL_L1 4 /* 256 PTPs */ -+# define PTBLS_PER_PTBL_L2 1 /* 64 PTPs */ -+# define PTBLS_PER_PTBL_L3 2 /* 64 PTEs */ -+#endif -+ -+#define ELAN3_LX_ENTRIES (32) -+#define PTBLS_PER_PTBL_LX (1) -+ -+#define L1_VA_PER_PTBL (ELAN3_L1_SIZE*(ELAN3_L1_ENTRIES/PTBLS_PER_PTBL_L1)) /* 4 ptbl for L1 */ -+#define L2_VA_PER_PTBL (ELAN3_L2_SIZE*(ELAN3_L2_ENTRIES/PTBLS_PER_PTBL_L2)) /* 1 ptbl for L2 */ -+#define L3_VA_PER_PTBL (ELAN3_L3_SIZE*(ELAN3_L3_ENTRIES/PTBLS_PER_PTBL_L3)) /* 1 ptbl for L3 */ -+ -+typedef struct elan3_ptbl_gr -+{ -+ struct elan3_ptbl_gr *pg_next; /* Next in list. */ -+ int pg_level; /* Level PG allocated for */ -+ sdramaddr_t pg_addr; /* sdram offset of ptes/ptps */ -+ ELAN3_PTBL pg_ptbls[PTBLS_PER_GROUP_MAX]; /* The actual page tables */ -+} ELAN3_PTBL_GR; -+ -+ -+/* -+ * The elan3mmu structure is the mmu dependant hardware address translation -+ * structure linked to the address space structure to show the translatioms -+ * provided by the elan for an address sapce. -+ * -+ * We also have a doubly linked list of 'regions' which allow the -+ * elan3mmu code to determine the access permissions for the elan -+ * dependant on the virtual address that the translation is being -+ * loaded at. -+ */ -+ -+typedef struct elan3mmu_rgn -+{ -+ struct elan3mmu_rgn *rgn_mnext; /* Doubly linked list of regions */ -+ struct elan3mmu_rgn *rgn_mprev; /* sorted on main address */ -+ caddr_t rgn_mbase; /* main address of base of region */ -+ -+ struct elan3mmu_rgn *rgn_enext; /* Doubly linked list of regions */ -+ struct elan3mmu_rgn *rgn_eprev; /* sorted on elan address */ -+ E3_Addr rgn_ebase; /* elan address of base of region */ -+ -+ u_int rgn_len; /* length of region */ -+ u_int rgn_perm; /* elan access permission */ -+} ELAN3MMU_RGN; -+ -+typedef struct elan3mmu -+{ -+ spinlock_t elan3mmu_lock; /* spinlock lock for regions */ -+ ELAN3MMU_RGN *elan3mmu_mrgns; /* Doubly linked list of memory regions */ -+ ELAN3MMU_RGN *elan3mmu_mtail; /* Last memory region on list */ -+ ELAN3MMU_RGN *elan3mmu_mrgnlast; /* Last region 'hit' */ -+ -+ ELAN3MMU_RGN *elan3mmu_ergns; /* Doubly linked list of memory regions */ -+ ELAN3MMU_RGN *elan3mmu_etail; /* Last memory region on list */ -+ ELAN3MMU_RGN *elan3mmu_ergnlast; /* Last region 'hit' */ -+ -+ struct elan3_dev *elan3mmu_dev; /* Elan device we're using. */ -+ struct elan3_ctxt *elan3mmu_ctxt; /* Elan ctxt we're associated with */ -+ -+ sdramaddr_t elan3mmu_ctp; /* Context table entry for our context */ -+ ELAN3_PTBL *elan3mmu_l1ptbl; /* Level 1 Page table (first of 4) */ -+ -+ spinlock_t elan3mmu_lXptbl_lock; /* spinlock for level X table list */ -+ ELAN3_PTBL *elan3mmu_lXptbl; /* Level X Page table list */ -+ -+#ifdef LINUX -+ struct mm_struct *elan3mmu_coproc_mm; /* Linux mm we're mapping */ -+#endif -+} ELAN3MMU; -+ -+_NOTE(LOCK_ORDER(elan3mmu::elan3mmu_lock elan3_dev::IntrLock)) -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3mmu::elan3mmu_lock, -+ elan3mmu::elan3mmu_mrgns elan3mmu::elan3mmu_mtail -+ elan3mmu::elan3mmu_ergns elan3mmu::elan3mmu_etail)) -+/* protected by dev->IntrLock for read by device driver */ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3mmu::elan3mmu_mrgns elan3mmu::elan3mmu_mtail -+ elan3mmu::elan3mmu_ergns elan3mmu::elan3mmu_etail)) -+ -+_NOTE(SCHEME_PROTECTS_DATA("only set to valid region", -+ elan3mmu::elan3mmu_ergnlast elan3mmu::elan3mmu_mrgnlast)) -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3mmu::elan3mmu_l1ptbl -+ elan3mmu::elan3mmu_ctp -+ elan3mmu::elan3mmu_dev)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3mmu::elan3mmu_l1ptbl -+ elan3mmu::elan3mmu_ctp -+ elan3mmu::elan3mmu_dev)) -+ -+/* -+ * Macros for accessing ptes/ptbls/ptbl_grs -+ */ -+ -+#define OFFSETOF(object,member) /* calculate offset of structure member */ \ -+ ((size_t) (&(((object *)0)->member))) -+#define PTBL_TO_GR(ptbl) /* convert ptbl to ptbl group */ \ -+ ((ELAN3_PTBL_GR *) ((caddr_t) ((ptbl) - (ptbl)->ptbl_index) - OFFSETOF(ELAN3_PTBL_GR,pg_ptbls[0]))) -+#define PTBL_TO_PTADDR(ptbl) /* convert ptbl to a ptp pointing at it */ \ -+ (PTBL_TO_GR(ptbl)->pg_addr + ((ptbl)->ptbl_index<pg_hmes + ((pte) - (ELAN3_PTE *) PTBL_TO_GR(ptbl)->pg_vaddr)) -+#define HME_TO_PTE(ptebl,hme) /* convert hme to corresponding pte */ \ -+ ((ELAN3_PTE *) PTBL_TO_GR(ptbl)->pg_vaddr + ((hme) - (PTBL_TO_GR(ptbl)->pg_hmes))) -+ -+ -+/* Flags for lock_ptbl */ -+#define LK_PTBL_NOWAIT 0x1 -+#define LK_PTBL_FAILOK 0x2 -+ -+/* Return values for lock_ptbl */ -+#define LK_PTBL_OK 0x0 -+#define LK_PTBL_MISMATCH 0x1 -+#define LK_PTBL_FAILED 0x2 -+ -+/* Flags for elan3mmu_ptesync */ -+#define NO_MLIST_LOCK 0 -+#define MLIST_LOCKED 1 -+ -+/* Flags for elan3mmu_pteload */ -+#define PTE_LOAD 0x00 -+#define PTE_LOAD_LOCK 0x01 /* translation should be locked */ -+#define PTE_LOAD_NOSYNC 0x02 /* ref/mod bits should not be sync'ed to page */ -+#define PTE_NO_SLEEP 0x04 /* true if we cant sleep */ -+#define PTE_NO_STEAL 0x08 /* true if we don't want to steal ptbls */ -+ -+#define PTE_LOAD_ENDIAN_MASK 0x10 /* mask for endian-ness */ -+#define PTE_LOAD_LITTLE_ENDIAN 0x00 /* translation is to little-endian memory */ -+#define PTE_LOAD_BIG_ENDIAN 0x10 /* translation is to big-endian memory */ -+ -+ -+/* Flags for elan3mmu_unload */ -+#define PTE_UNLOAD 0x00 -+#define PTE_UNLOAD_UNLOCK 0x01 -+#define PTE_UNLOAD_NOFLUSH 0x02 -+#define PTE_UNLOAD_NOSYNC 0x04 -+ -+extern int elan3mmu_debug; -+#ifdef DEBUG_PRINTF -+# define HAT_PRINTF0(n,msg) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg) : (void) 0) -+# define HAT_PRINTF1(n,msg,a) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a) : (void) 0) -+# define HAT_PRINTF2(n,msg,a,b) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b) : (void) 0) -+# define HAT_PRINTF3(n,msg,a,b,c) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c) : (void) 0) -+# define HAT_PRINTF4(n,msg,a,b,c,d) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d) : (void) 0) -+# define HAT_PRINTF5(n,msg,a,b,c,d,e) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d,e) : (void) 0) -+# define HAT_PRINTF6(n,msg,a,b,c,d,e,f) ((elan3mmu_debug & n) ? (void) elan3_debugf (NULL, DBG_HAT, msg,a,b,c,d,e,f) : (void) 0) -+# ifdef LINUX -+# define HAT_PRINTF(n,args...) ((elan3mmu_debug & n) ? (void) elan3_debugf(NULL, DBG_HAT, ##args) : (void) 0) -+# endif -+#else -+# define HAT_PRINTF0(n,msg) -+# define HAT_PRINTF1(n,msg,a) -+# define HAT_PRINTF2(n,msg,a,b) -+# define HAT_PRINTF3(n,msg,a,b,c) -+# define HAT_PRINTF4(n,msg,a,b,c,d) -+# define HAT_PRINTF5(n,msg,a,b,c,d,e) -+# define HAT_PRINTF6(n,msg,a,b,c,d,e,f) -+# ifdef LINUX -+# define HAT_PRINTF(n,args...) -+# endif -+#endif -+ -+/* elan3mmu_generic.c */ -+extern ELAN3MMU_GLOBAL_STATS elan3mmu_global_stats; -+ -+extern void elan3mmu_init (void); -+extern void elan3mmu_fini (void); -+ -+extern ELAN3MMU *elan3mmu_alloc (struct elan3_ctxt *ctxt); -+extern void elan3mmu_free (ELAN3MMU *elan3mmu); -+ -+extern void elan3mmu_set_context_filter (ELAN3_DEV *dev, int ctx, int disabled, E3_uint32 Pend, E3_uint32 *Maskp); -+extern int elan3mmu_attach (ELAN3_DEV *dev, int ctx, ELAN3MMU *elan3mmu, sdramaddr_t routeTable, E3_uint32 routeMask); -+extern void elan3mmu_detach (ELAN3_DEV *dev, int ctx); -+ -+extern ELAN3MMU_RGN *elan3mmu_findrgn_elan (ELAN3MMU *elan3mmu, E3_Addr addr, int tail); -+extern int elan3mmu_addrgn_elan (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn); -+extern ELAN3MMU_RGN *elan3mmu_removergn_elan (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern ELAN3MMU_RGN *elan3mmu_rgnat_elan (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern ELAN3MMU_RGN *elan3mmu_findrgn_main (ELAN3MMU *elan3mmu, caddr_t addr, int tail); -+extern int elan3mmu_addrgn_main (ELAN3MMU *elan3mmu, ELAN3MMU_RGN *nrgn); -+extern ELAN3MMU_RGN *elan3mmu_removergn_main (ELAN3MMU *elan3mmu, caddr_t addr); -+extern ELAN3MMU_RGN *elan3mmu_rgnat_main (ELAN3MMU *elan3mmu, caddr_t addr); -+ -+extern int elan3mmu_setperm (ELAN3MMU *elan3mmu, caddr_t maddr, E3_Addr eaddr, u_int len, u_int perm); -+extern void elan3mmu_clrperm (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len); -+extern int elan3mmu_checkperm (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, u_int access); -+extern caddr_t elan3mmu_mainaddr (ELAN3MMU *elan3mmu, E3_Addr addr); -+extern E3_Addr elan3mmu_elanaddr (ELAN3MMU *elan3mmu, caddr_t addr); -+ -+extern void elan3mmu_expand (ELAN3MMU *elan3mmu, E3_Addr addr, int len, int level, int attr); -+extern void elan3mmu_reserve (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *); -+extern void elan3mmu_release (ELAN3MMU *elan3mmu, E3_Addr addr, u_int npages, sdramaddr_t *); -+ -+extern void elan3mmu_pteload (ELAN3MMU *elan3mmu, int level, E3_Addr addr, physaddr_t paddr, int perm, int attr); -+extern void elan3mmu_unload (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, int flags); -+extern void elan3mmu_sync (ELAN3MMU *elan3mmu, E3_Addr addr, u_int len, u_int clearflag); -+extern void elan3mmu_pteunload (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock); -+extern void elan3mmu_ptesync (ELAN3_PTBL *ptbl, sdramaddr_t pte, int flags, int got_mlist_lock); -+extern sdramaddr_t elan3mmu_ptp2pte (ELAN3MMU *elan3mmu, sdramaddr_t ptp, int level); -+extern sdramaddr_t elan3mmu_ptefind (ELAN3MMU *elan3mmu, E3_Addr, int *level, ELAN3_PTBL **pptbl, spinlock_t **plock, unsigned long *flags); -+extern sdramaddr_t elan3mmu_ptealloc (ELAN3MMU *elan3mmu, E3_Addr, int level, ELAN3_PTBL **pptbl, spinlock_t **plock, int attr, unsigned long *flags); -+extern void elan3mmu_l1inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l1ptbl, int flags); -+extern int elan3mmu_l2inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l2ptbl, int flags, E3_Addr addr, spinlock_t **pl2lock, unsigned long *lock_flags); -+extern int elan3mmu_l3inval (ELAN3MMU *elan3mmu, ELAN3_PTBL *l3ptbl, int flags, E3_Addr addr, spinlock_t **pl3lock, unsigned long *lock_flags); -+ -+extern void elan3mmu_free_l1ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+extern void elan3mmu_free_l2ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+extern void elan3mmu_free_l3ptbl (ELAN3_DEV *dev, ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+ -+extern int elan3mmu_lock_this_ptbl (ELAN3_PTBL *ptbl, int flag, spinlock_t **plock, unsigned long *flags); -+extern int elan3mmu_lock_ptbl (ELAN3_PTBL *ptbl, u_int flag, ELAN3MMU *elan3mmu, E3_Addr va, int level, spinlock_t **plock, unsigned long *flags); -+extern void elan3mmu_unlock_ptbl (ELAN3_PTBL *ptbl, spinlock_t *lock, unsigned long flags); -+ -+/* elan3mmu_osdep.c */ -+extern void elan3mmu_init_osdep (void); -+extern void elan3mmu_fini_osdep (void); -+extern void elan3mmu_alloc_osdep (ELAN3MMU *elan3mmu); -+extern void elan3mmu_free_osdep (ELAN3MMU *elan3mmu); -+extern ELAN3_PTE elan3mmu_phys_to_pte (ELAN3_DEV *dev, physaddr_t paddr, int perm); -+extern ELAN3_PTE elan3mmu_kernel_invalid_pte (ELAN3MMU *elan3mmu); -+ -+#if defined (DIGITAL_UNIX) -+# include -+#elif defined (LINUX) -+# include -+#endif -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELAN3MMU_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elan3mmu_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elan3mmu_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elan3mmu_linux.h 2005-07-28 14:52:52.942664448 -0400 -@@ -0,0 +1,39 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_MMU_LINUX_H -+#define __ELAN3_MMU_LINUX_H -+ -+#ident "$Id: elan3mmu_linux.h,v 1.12 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3mmu_linux.h,v $*/ -+ -+/* XXX copy of elan3mmu_dunix.h */ -+ -+#define ALLOC_ELAN3MMU(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3MMU *, sizeof (ELAN3MMU), cansleep) -+#define ALLOC_PTBL_GR(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3_PTBL_GR *, sizeof (ELAN3_PTBL_GR), cansleep) -+#define ALLOC_ELAN3MMU_RGN(ptr,cansleep) KMEM_ALLOC(ptr, ELAN3MMU_RGN *, sizeof (ELAN3MMU_RGN), cansleep) -+#define ALLOC_HMENTS(ptr,cansleep) KMEM_ALLOC((ptr,ELAN3_HMENT *, sizeof (ELAN3_HMENT), cansleep) -+ -+#define FREE_ELAN3MMU(ptr) KMEM_FREE(ptr,sizeof (ELAN3MMU)) -+#define FREE_PTBL_GR(ptr) KMEM_FREE(ptr,sizeof (ELAN3_PTBL_GR)) -+#define FREE_ELAN3MMU_RGN(ptr) KMEM_FREE(ptr,sizeof (ELAN3MMU_RGN)) -+#define FREE_HMENTS(ptr) KMEM_FREE(ptr,sizeof (ELAN3_HMENT)) -+ -+extern void elan3mmu_init_osdep(void); -+extern void elan3mmu_fini_osdep(void); -+ -+extern void elan3mmu_pte_range_unload (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len); -+extern void elan3mmu_pte_range_update (ELAN3MMU *elan3mmu, struct mm_struct *mm, caddr_t addr, unsigned long len); -+extern void elan3mmu_pte_ctxt_unload(ELAN3MMU *elan3mmu); -+ -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elan3ops.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elan3ops.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elan3ops.h 2005-07-28 14:52:52.942664448 -0400 -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* $Id: elan3ops.h,v 1.3 2003/09/24 13:57:24 david Exp $ */ -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elan3ops.h,v $ */ -+ -+#ifndef _ELAN3_OPS_H -+#define _ELAN3_OPS_H -+ -+int get_position (void *arg, ELAN_POSITION *position); -+int set_position (void *arg, unsigned short nodeId, unsigned short numNodes); -+ -+int elan3mod_create_cap (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+int elan3mod_destroy_cap (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap); -+ -+int elan3mod_create_vp (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+int elan3mod_destroy_vp (void *arg, ELAN_CAP_OWNER owner, ELAN_CAPABILITY *cap, ELAN_CAPABILITY *map); -+ -+int elan3mod_attach_cap (void *arg_ctxt, ELAN_CAPABILITY *cap); -+int elan3mod_detach_cap (void *arg_ctxt); -+ -+extern ELAN_DEV_OPS elan3_dev_ops; -+ -+int stats_get_index_name (void *arg, uint index, caddr_t name); -+int stats_get_block (void *arg, uint entries, ulong *value); -+int stats_clear_block (void *arg); -+ -+int elan3_register_dev_stats (ELAN3_DEV * dev); -+void elan3_deregister_dev_stats (ELAN3_DEV * dev); -+ -+ -+#endif /* __ELAN3_OPS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elanctxt.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elanctxt.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elanctxt.h 2005-07-28 14:52:52.944664144 -0400 -@@ -0,0 +1,856 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANCTXT_H -+#define _ELAN3_ELANCTXT_H -+ -+#ident "$Id: elanctxt.h,v 1.81 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanctxt.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include -+#include -+ -+#define BumpUserStat(ctxt, stat) ((ctxt)->FlagPage->stat++) -+ -+#if defined(__LITTLE_ENDIAN__) -+ -+typedef union _CProcTrapBuf -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ E3_uint32 Areg; -+ E3_uint32 Breg; -+ } r; -+ struct -+ { -+ E3_uint32 Addr; -+ E3_uint32 ContextType; -+ } s; -+} CProcTrapBuf_BE; -+ -+typedef E3_EventInt E3_EventInt_BE; -+typedef E3_IprocTrapHeader E3_IprocTrapHeader_BE; -+typedef E3_IprocTrapData E3_IprocTrapData_BE; -+typedef E3_FaultSave E3_FaultSave_BE; -+ -+typedef union -+{ -+ E3_uint64 Align64; -+ E3_DMA s; -+} E3_DMA_BE; -+ -+typedef E3_ThreadQueue E3_ThreadQueue_BE; -+ -+#else -+ -+/* "Big-Endian" data structures copied by 64 bit loads, these are 32 bit word flipped */ -+/* from the corresponding data structure. */ -+ -+typedef union _CProcTrapBuf -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ E3_uint32 Breg; -+ E3_uint32 Areg; -+ } r; -+ struct -+ { -+ E3_uint32 ContextType; -+ E3_uint32 Addr; -+ } s; -+} CProcTrapBuf_BE; -+ -+typedef union _E3_EventInt_BE -+{ -+ E3_uint64 Align64; -+ struct { -+ E3_uint32 EventContext; /* Bits 16 to 28 */ -+ E3_uint32 IntCookie; -+ } s; -+} E3_EventInt_BE; -+ -+typedef union _E3_IprocTrapHeader_BE -+{ -+ E3_uint64 Align64; -+ -+ struct -+ { -+ E3_uint32 TrAddr; -+ E3_TrTypeCntx TrTypeCntx; -+ union -+ { -+ E3_IProcStatus_Reg u_IProcStatus; -+ E3_uint32 u_TrData1; -+ } ipsotd; -+ E3_uint32 TrData0; -+ } s; -+} E3_IprocTrapHeader_BE; -+ -+typedef E3_IprocTrapData E3_IprocTrapData_BE; -+ -+typedef union _E3_FaultSave_be -+{ -+ E3_uint64 Align64; -+ struct { -+ volatile E3_uint32 FaultContext; -+ E3_FaultStatusReg FSR; -+ volatile E3_uint32 EventAddress; -+ volatile E3_uint32 FaultAddress; -+ } s; -+} E3_FaultSave_BE; -+ -+typedef union _e3_dma_be -+{ -+ E3_uint64 Align64; -+ struct { -+ E3_uint32 dma_size; -+ E3_DmaType dma_u; -+ E3_Addr dma_dest; -+ E3_Addr dma_source; -+ E3_CookieVProc dma_destCookieProc; -+ E3_Addr dma_destEvent; -+ E3_CookieVProc dma_srcCookieProc; -+ E3_Addr dma_srcEvent; -+ } s; -+} E3_DMA_BE; -+ -+typedef union _E3_ThreadQueue_BE -+{ -+ E3_uint64 Align64; -+ struct -+ { -+ /* copied by 64 bit copy from elan to main */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :16; /* Bits 0 to 15 */ -+ E3_Addr Thread; /* Bits 32 to 63 */ -+ } s; -+} E3_ThreadQueue_BE; -+ -+#endif /* defined(LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) */ -+ -+typedef struct neterr_msg -+{ -+ E3_uint32 Rail; /* Rail error received on */ -+ ELAN_CAPABILITY SrcCapability; /* Capability of source of packet */ -+ ELAN_CAPABILITY DstCapability; /* Capability of dest of packet */ -+ -+ E3_uint32 DstProcess; /* Virtual Process of dest of packet */ -+ E3_Addr CookieAddr; /* Cookie Address (or NULL for DMA) */ -+ E3_uint32 CookieVProc; /* Cookie and VP (identifies DMA) */ -+ E3_uint32 NextCookie; /* Next Cookie value (for thread) */ -+ E3_uint32 WaitForEop; /* Wait for EOP transaction */ -+} NETERR_MSG; -+ -+#ifdef __KERNEL__ -+ -+/* -+ * Associated with each input channel can be a network error -+ * resolver structure, which can be queued on the network -+ * error resolver threads to perform RPCs to the other kernels -+ * when a network error occurs with an identify transaction -+ * included -+ */ -+typedef struct neterr_resolver -+{ -+ struct neterr_resolver *Next; -+ -+ spinlock_t Lock; -+ -+ struct elan3_ctxt *Ctxt; -+ ELAN_LOCATION Location; -+ -+ int Completed; -+ int Status; -+ long Timestamp; -+ -+ NETERR_MSG Message; -+} NETERR_RESOLVER; -+ -+ -+typedef struct neterr_fixup -+{ -+ struct neterr_fixup *Next; -+ -+ kcondvar_t Wait; -+ int Completed; -+ int Status; -+ -+ NETERR_MSG Message; -+} NETERR_FIXUP; -+ -+#endif /* __KERNEL__ */ -+ -+/* Each of the following structures must be padded to a whole */ -+/* number of 64 bit words since the kernel uses 64 bit load/stores */ -+/* to transfer the elan register state. */ -+typedef struct command_trap -+{ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_uint32 Pad; /* 4 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ CProcTrapBuf_BE TrapBuf; /* 8 bytes */ -+} COMMAND_TRAP; -+ -+typedef struct thread_trap -+{ -+ E3_uint32 Registers[32]; /* 128 bytes */ -+#define REG_GLOBALS 0 -+#define REG_OUTS 8 -+#define REG_LOCALS 16 -+#define REG_INS 24 -+ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ E3_FaultSave_BE DataFaultSave; /* 16 bytes */ -+ E3_FaultSave_BE InstFaultSave; /* 16 bytes */ -+ E3_FaultSave_BE OpenFaultSave; /* 16 bytes */ -+ -+ E3_Status_Reg Status; /* 4 bytes */ -+ -+ E3_Addr pc; /* 4 bytes */ -+ E3_Addr npc; /* 4 bytes */ -+ E3_Addr StartPC; /* 4 bytes */ -+ E3_Addr sp; /* 4 bytes */ -+ E3_uint32 mi; /* 4 bytes */ -+ E3_TrapBits TrapBits; /* 4 bytes */ -+ E3_DirtyBits DirtyBits; /* 4 bytes */ -+} THREAD_TRAP; -+ -+typedef struct dma_trap -+{ -+ E3_DMA_BE Desc; /* 32 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ E3_FaultSave_BE Data0; /* 16 bytes */ -+ E3_FaultSave_BE Data1; /* 16 bytes */ -+ E3_FaultSave_BE Data2; /* 16 bytes */ -+ E3_FaultSave_BE Data3; /* 16 bytes */ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_DmaInfo PacketInfo; /* 4 bytes */ -+} DMA_TRAP; -+ -+typedef struct input_trap -+{ -+ E3_uint32 State; /* 4 bytes */ -+ E3_Status_Reg Status; /* 4 bytes */ -+ E3_FaultSave_BE FaultSave; /* 16 bytes */ -+ -+ u_int NumTransactions; /* 4 bytes */ -+ u_int Overflow; /* 4 bytes */ -+ u_int AckSent; /* 4 bytes */ -+ u_int BadTransaction; /* 4 bytes */ -+ -+ E3_IprocTrapHeader_BE *TrappedTransaction; /* 4 bytes */ -+ E3_IprocTrapData_BE *TrappedDataBuffer; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *WaitForEopTransaction; /* 4 bytes */ -+ E3_IprocTrapData_BE *WaitForEopDataBuffer; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *DmaIdentifyTransaction; /* 4 bytes */ -+ E3_IprocTrapHeader_BE *ThreadIdentifyTransaction; /* 4 bytes */ -+ E3_Addr LockQueuePointer; /* 4 bytes */ -+ E3_Addr UnlockQueuePointer; /* 4 bytes */ -+ -+ E3_IprocTrapHeader_BE Transactions[MAX_TRAPPED_TRANS]; /* n * 8 bytes */ -+ E3_IprocTrapData_BE DataBuffers[MAX_TRAPPED_TRANS]; /* n * 64 bytes */ -+} INPUT_TRAP; -+ -+typedef struct input_fault_save -+{ -+ struct input_fault_save *Next; -+ E3_Addr Addr; -+ E3_uint32 Count; -+} INPUT_FAULT_SAVE; -+ -+#define NUM_INPUT_FAULT_SAVE 32 -+#define MIN_INPUT_FAULT_PAGES 8 -+#define MAX_INPUT_FAULT_PAGES 128 -+ -+typedef E3_uint32 EVENT_COOKIE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct event_cookie_entry -+{ -+ struct event_cookie_entry *ent_next; -+ struct event_cookie_entry *ent_prev; -+ -+ spinlock_t ent_lock; -+ unsigned ent_ref; -+ -+ EVENT_COOKIE ent_cookie; -+ EVENT_COOKIE ent_fired; -+ kcondvar_t ent_wait; -+} EVENT_COOKIE_ENTRY; -+ -+typedef struct event_cookie_table -+{ -+ struct event_cookie_table *tbl_next; -+ struct event_cookie_table *tbl_prev; -+ -+ unsigned long tbl_task; -+ unsigned long tbl_handle; -+ -+ spinlock_t tbl_lock; -+ unsigned tbl_ref; -+ EVENT_COOKIE_ENTRY *tbl_entries; -+} EVENT_COOKIE_TABLE; -+ -+#define NBYTES_PER_SMALL_ROUTE 8 -+#define NBYTES_PER_LARGE_ROUTE 16 -+ -+#define ROUTE_BLOCK_SIZE ELAN3_PAGE_SIZE -+#define NROUTES_PER_BLOCK (ROUTE_BLOCK_SIZE/NBYTES_PER_LARGE_ROUTE) -+ -+typedef struct elan3_routes -+{ -+ struct elan3_routes *Next; /* Can be chained together */ -+ -+ sdramaddr_t Routes; /* sdram offset of route entries */ -+ bitmap_t Bitmap[BT_BITOUL(NROUTES_PER_BLOCK)]; /* Bitmap of which entries are used */ -+} ELAN3_ROUTES; -+ -+ -+typedef struct elan3_route_table -+{ -+ spinlock_t Lock; /* Route lock */ -+ sdramaddr_t Table; /* Kernel address for route table */ -+ u_int Size; /* # entries in route table */ -+ -+ ELAN3_ROUTES *LargeRoutes; /* Large routes */ -+} ELAN3_ROUTE_TABLE; -+ -+typedef struct elan3_vpseg -+{ -+ struct elan3_vpseg *Next; -+ int Process; /* Virtual process */ -+ int Entries; /* and # processes */ -+ int Type; /* Type of cookie */ -+ -+ union -+ { -+ -+ ELAN_CAPABILITY Capability; /* Capability of remote segment */ -+# define SegCapability SegUnion.Capability -+ struct { -+ u_short LowProc; /* Base process number */ -+ u_short HighProc; /* and high process number */ -+# define SegLowProc SegUnion.BROADCAST.LowProc -+# define SegHighProc SegUnion.BROADCAST.HighProc -+ } BROADCAST; -+ } SegUnion; -+} ELAN3_VPSEG; -+ -+#define ELAN3_VPSEG_UNINT 0 /* Unitialised */ -+#define ELAN3_VPSEG_P2P 1 /* Point to Point */ -+#define ELAN3_VPSEG_BROADCAST 2 /* Broadcast */ -+ -+#define NUM_LISTS 7 /* Number of "swap" lists */ -+ -+typedef struct elan3_ctxt -+{ -+ struct elan3_ctxt *Next; /* can be queued on a task */ -+ struct elan3_ctxt *Prev; -+ -+ CtxtHandle Handle; /* user handle */ -+ int RefCnt; /* reference count */ -+ -+ ELAN3MMU *Elan3mmu; /* elan3mmu allocated for Elan translations */ -+ -+ struct elan3_ops *Operations; /* User supplied helper functions */ -+ void *Private; /* Users private pointer */ -+ -+ int Status; /* Status (guarded by dev_mutex) */ -+ int OthersState; /* State of halt queueing for dma/thread */ -+ int LwpCount; /* Number of lwp's running */ -+ -+ ELAN3_DEV *Device; /* Elan device */ -+ -+ ELAN_CAPABILITY Capability; /* Capability I've attached as */ -+ ELAN_POSITION Position; /* Position when I was created */ -+ -+ ELAN3_VPSEG *VpSegs; /* List of virtual process segments */ -+ ELAN3_ROUTE_TABLE *RouteTable; -+ -+ krwlock_t VpLock; /* Reader/writer lock for vp list */ -+ kmutex_t SwapListsLock; /* mutex to lock swap lists */ -+ kmutex_t CmdLock; /* mutex to lock trapped dma command */ -+ kmutex_t CmdPortLock; /* mutex to load/unload commandport xlation */ -+ -+ kcondvar_t Wait; /* Condition variable to sleep on */ -+ kcondvar_t CommandPortWait; /* Condition variable to wait for commandport */ -+ kcondvar_t LwpWait; /* Condition variable to wait for lwps to stop */ -+ kcondvar_t HaltWait; /* Condition variable to wait for halt */ -+ int Halted; /* and flag for halt cv */ -+ -+ caddr_t CommandPageMapping; /* user virtual address for command page mapping */ -+ ioaddr_t CommandPage; /* Elan command port mapping page */ -+ DeviceMappingHandle CommandPageHandle; /* DDI Handle */ -+ ioaddr_t CommandPort; /* Elan command port */ -+ void *CommandPortItem; /* Item we're re-issuing to commandport */ -+ -+ ELAN3_FLAGSTATS *FlagPage; /* Page visible to user process */ -+ -+ COMMAND_TRAP *CommandTraps; /* Command port traps */ -+ ELAN3_SPLIT_QUEUE CommandTrapQ; -+ -+ CProcTrapBuf_BE *Commands; /* Overflowed commands */ -+ ELAN3_QUEUE CommandQ; -+ -+ THREAD_TRAP *ThreadTraps; /* Thread processor traps */ -+ ELAN3_QUEUE ThreadTrapQ; -+ -+ DMA_TRAP *DmaTraps; /* Dma processor tra[ed */ -+ ELAN3_QUEUE DmaTrapQ; -+ -+ INPUT_TRAP Input0Trap; /* Inputter channel 0 trap */ -+ INPUT_TRAP Input1Trap; /* Inputter channel 1 trap */ -+ NETERR_RESOLVER *Input0Resolver; /* Inputter channel 0 network error resolver */ -+ NETERR_RESOLVER *Input1Resolver; /* Inputter channel 1 network error resolver */ -+ -+ INPUT_FAULT_SAVE InputFaults[NUM_INPUT_FAULT_SAVE]; /* stored writeblock addresses */ -+ INPUT_FAULT_SAVE *InputFaultList; /* organized in list for LRU */ -+ spinlock_t InputFaultLock; /* and lock for list */ -+ -+ kmutex_t NetworkErrorLock; -+ NETERR_FIXUP *NetworkErrorFixups; -+ -+ EVENT_COOKIE *EventCookies; /* Event cookies. */ -+ ELAN3_QUEUE EventCookieQ; -+ -+ E3_Addr *SwapThreads; /* Swapped Thread Queue */ -+ ELAN3_QUEUE SwapThreadQ; -+ -+ E3_DMA_BE *SwapDmas; /* Swapped Dmas Queue */ -+ ELAN3_QUEUE SwapDmaQ; -+ -+ int ItemCount[NUM_LISTS]; /* Count of items on each swap list */ -+ int inhibit; /* if set lwp not to reload translations */ -+ -+ int Disabled; -+} ELAN3_CTXT; -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3_ctxt::Status elan3_ctxt::OthersState -+ elan3_ctxt::CommandTrapQ elan3_ctxt::CommandQ elan3_ctxt::ThreadTrapQ elan3_ctxt::DmaTrapQ -+ elan3_ctxt::Input0Trap elan3_ctxt::Input1Trap elan3_ctxt::EventCookieQ elan3_ctxt::SwapThreadQ -+ elan3_ctxt::SwapDmaQ elan3_ctxt::CommandPortItem elan3_ctxt::LwpCount)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_ctxt::SwapListsLock, -+ elan3_ctxt::ItemCount)) -+_NOTE(RWLOCK_PROTECTS_DATA(elan3_ctxt::VpLock, -+ elan3_ctxt::VpSegs elan3_vpseg::Next elan3_vpseg::Process -+ elan3_vpseg::Entries elan3_vpseg::Type)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_ctxt::ItemCount elan3_ctxt::Status elan3_ctxt::CommandPortItem)) -+ -+_NOTE(LOCK_ORDER(elan3_ctxt::SwapListsLock elan3_ctxt::CmdLock elan3_dev::IntrLock)) -+_NOTE(LOCK_ORDER(elan3_ctxt::SwapListsLock as::a_lock)) /* implicit by pagefault */ -+ -+#define CTXT_DETACHED (1 << 0) /* Context is detached. */ -+#define CTXT_NO_LWPS (1 << 1) /* No lwp's to handle faults */ -+#define CTXT_EXITING (1 << 2) /* User process is exiting */ -+ -+#define CTXT_SWAPPING_OUT (1 << 3) /* Context is swapping out */ -+#define CTXT_SWAPPED_OUT (1 << 4) /* Context is swapped out */ -+ -+#define CTXT_SWAP_FREE (1 << 5) /* Swap buffer is free */ -+#define CTXT_SWAP_VALID (1 << 6) /* Swap buffer has queue entries in it */ -+ -+#define CTXT_DMA_QUEUE_FULL (1 << 7) /* Dma trap queue is full */ -+#define CTXT_THREAD_QUEUE_FULL (1 << 8) /* Thread trap queue is full */ -+#define CTXT_EVENT_QUEUE_FULL (1 << 9) /* Event interrupt queue is full */ -+#define CTXT_COMMAND_OVERFLOW_ERROR (1 << 10) /* Trap queue overflow */ -+ -+#define CTXT_SWAP_WANTED (1 << 11) /* Some one wanted to swap */ -+#define CTXT_WAITING_SWAPIN (1 << 12) /* Someone waiting on swapin */ -+ -+#define CTXT_WAITING_COMMAND (1 << 13) /* swgelan waiting on command port */ -+#define CTXT_COMMAND_MAPPED_MAIN (1 << 14) /* segelan has mapped command port */ -+ -+#define CTXT_QUEUES_EMPTY (1 << 15) /* dma/thread run queues are empty */ -+#define CTXT_QUEUES_EMPTYING (1 << 16) /* dma/thread run queues are being emptied */ -+ -+#define CTXT_USER_FILTERING (1 << 17) /* user requested context filter */ -+ -+#define CTXT_KERNEL (1 << 18) /* context is a kernel context */ -+#define CTXT_COMMAND_MAPPED_ELAN (1 << 19) /* command port is mapped for elan */ -+#define CTXT_FIXUP_NETERR (1 << 20) /* fixing up a network error */ -+ -+ -+#define CTXT_SWAPPED_REASONS (CTXT_NO_LWPS | \ -+ CTXT_DETACHED | \ -+ CTXT_EXITING | \ -+ CTXT_FIXUP_NETERR) -+ -+#define CTXT_OTHERS_REASONS (CTXT_EVENT_QUEUE_FULL | \ -+ CTXT_DMA_QUEUE_FULL | \ -+ CTXT_THREAD_QUEUE_FULL | \ -+ CTXT_COMMAND_OVERFLOW_ERROR | \ -+ CTXT_SWAPPED_REASONS) -+ -+#define CTXT_INPUTTER_REASONS (CTXT_USER_FILTERING | \ -+ CTXT_OTHERS_REASONS) -+ -+#define CTXT_COMMAND_MAPPED (CTXT_COMMAND_MAPPED_MAIN | \ -+ CTXT_COMMAND_MAPPED_ELAN) -+ -+#define CTXT_IS_KERNEL(ctxt) ((ctxt)->Status & CTXT_KERNEL) -+ -+/* -+ * State values for ctxt_inputterState/ctxt_commandportStats -+ */ -+#define CTXT_STATE_OK 0 -+#define CTXT_STATE_TRAPPED 1 /* Inputter channel 0 trapped */ -+#define CTXT_STATE_RESOLVING 2 /* An LWP is resolving the trap */ -+#define CTXT_STATE_NEEDS_RESTART 3 /* Th trapped packet needs to be executed */ -+#define CTXT_STATE_NETWORK_ERROR 4 /* We're waiting on an RPC for the identify transaction */ -+#define CTXT_STATE_EXECUTING 5 /* An LWP is executing the trapped packet */ -+ -+/* -+ * State values for OthersState. -+ */ -+#define CTXT_OTHERS_RUNNING 0 -+#define CTXT_OTHERS_HALTING 1 -+#define CTXT_OTHERS_SWAPPING 2 -+#define CTXT_OTHERS_HALTING_MORE 3 -+#define CTXT_OTHERS_SWAPPING_MORE 4 -+#define CTXT_OTHERS_SWAPPED 5 -+ -+typedef struct elan3_ops -+{ -+ u_int Version; -+ -+ int (*Exception) (ELAN3_CTXT *ctxt, int type, int proc, void *trap, va_list ap); -+ -+ /* swap item list functions */ -+ int (*GetWordItem) (ELAN3_CTXT *ctxt, int list, void **itemp, E3_uint32 *valuep); -+ int (*GetBlockItem) (ELAN3_CTXT *ctxt, int list, void **itemp, E3_Addr *valuep); -+ void (*PutWordItem) (ELAN3_CTXT *ctxt, int list, E3_Addr value); -+ void (*PutBlockItem) (ELAN3_CTXT *ctxt, int list, E3_uint32 *ptr); -+ void (*PutbackItem) (ELAN3_CTXT *ctxt, int list, void *item); -+ void (*FreeWordItem) (ELAN3_CTXT *ctxt, void *item); -+ void (*FreeBlockItem) (ELAN3_CTXT *ctxt, void *item); -+ int (*CountItems) (ELAN3_CTXT *ctxt, int list); -+ -+ /* event interrupt cookie */ -+ int (*Event) (ELAN3_CTXT *ctxt, E3_uint32 cookie, int flag); -+ -+ /* swapin/swapout functions. */ -+ void (*Swapin) (ELAN3_CTXT *ctxt); -+ void (*Swapout) (ELAN3_CTXT *ctxt); -+ -+ /* Free of private data */ -+ void (*FreePrivate) (ELAN3_CTXT *ctxt); -+ -+ /* Fixup a network error */ -+ int (*FixupNetworkError) (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef); -+ -+ /* Interrupt handler trap interface */ -+ int (*DProcTrap) (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+ int (*TProcTrap) (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+ int (*IProcTrap) (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, int chan); -+ int (*CProcTrap) (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+ int (*CProcReissue) (ELAN3_CTXT *ctxt, CProcTrapBuf_BE *TrapBuf); -+ -+ /* User memory access functions */ -+ int (*StartFaultCheck)(ELAN3_CTXT *ctxt); -+ void (*EndFaultCheck) (ELAN3_CTXT *ctxt); -+ -+ E3_uint8 (*Load8) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store8) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint8 val); -+ E3_uint16 (*Load16) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store16) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint16 val); -+ E3_uint32 (*Load32) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store32) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint32 val); -+ E3_uint64 (*Load64) (ELAN3_CTXT *ctxt, E3_Addr addr); -+ void (*Store64) (ELAN3_CTXT *ctxt, E3_Addr addr, E3_uint64 val); -+ -+} ELAN3_OPS; -+ -+#define ELAN3_OPS_VERSION 0xdeef0001 -+ -+/* -+ * Flags for ops_event. -+ */ -+#define OP_INTR 0 /* Called from interrupt handler */ -+#define OP_LWP 1 /* Called from "lwp" */ -+ -+/* -+ * Return codes for "ops" functions. -+ */ -+#define OP_DEFER 0 /* Defer to next lower interrupt */ -+#define OP_IGNORE 1 /* No event hander, so ignore it */ -+#define OP_HANDLED 2 /* Handled event (resume thread) */ -+#define OP_FAILED 3 /* Failed */ -+ -+#define ELAN3_CALL_OP(ctxt,fn) ((ctxt)->Operations && (ctxt)->Operations->fn) ? (ctxt)->Operations->fn -+ -+#define ELAN3_OP_EXCEPTION(ctxt,type,proc,trap,ap) (ELAN3_CALL_OP(ctxt,Exception) (ctxt,type,proc,trap,ap) : OP_IGNORE) -+#define ELAN3_OP_GET_WORD_ITEM(ctxt,list,itemp,valuep) (ELAN3_CALL_OP(ctxt,GetWordItem) (ctxt,list,itemp,valuep) : 0) -+#define ELAN3_OP_GET_BLOCK_ITEM(ctxt,list,itemp,valuep) (ELAN3_CALL_OP(ctxt,GetBlockItem) (ctxt,list,itemp,valuep) : 0) -+#define ELAN3_OP_PUT_WORD_ITEM(ctxt,list,value) (ELAN3_CALL_OP(ctxt,PutWordItem) (ctxt,list,value) : (void)0) -+#define ELAN3_OP_PUT_BLOCK_ITEM(ctxt,list,ptr) (ELAN3_CALL_OP(ctxt,PutBlockItem) (ctxt,list,ptr) : (void)0) -+#define ELAN3_OP_PUTBACK_ITEM(ctxt,list,item) (ELAN3_CALL_OP(ctxt,PutbackItem) (ctxt,list,item) : (void)0) -+#define ELAN3_OP_FREE_WORD_ITEM(ctxt,item) (ELAN3_CALL_OP(ctxt,FreeWordItem) (ctxt,item) : (void)0) -+#define ELAN3_OP_FREE_BLOCK_ITEM(ctxt,item) (ELAN3_CALL_OP(ctxt,FreeBlockItem)(ctxt,item) : (void)0) -+#define ELAN3_OP_COUNT_ITEMS(ctxt,list) (ELAN3_CALL_OP(ctxt,CountItems)(ctxt,list) : 0) -+#define ELAN3_OP_EVENT(ctxt,cookie,flag) (ELAN3_CALL_OP(ctxt,Event)(ctxt,cookie,flag) : OP_IGNORE) -+#define ELAN3_OP_SWAPIN(ctxt) (ELAN3_CALL_OP(ctxt,Swapin)(ctxt) : (void)0) -+#define ELAN3_OP_SWAPOUT(ctxt) (ELAN3_CALL_OP(ctxt,Swapout)(ctxt) : (void)0) -+#define ELAN3_OP_FREE_PRIVATE(ctxt) (ELAN3_CALL_OP(ctxt,FreePrivate)(ctxt) : (void)0) -+#define ELAN3_OP_FIXUP_NETWORK_ERROR(ctxt, nef) (ELAN3_CALL_OP(ctxt,FixupNetworkError)(ctxt,nef) : OP_FAILED) -+ -+#define ELAN3_OP_DPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,DProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_TPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,TProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_IPROC_TRAP(ctxt, trap, chan) (ELAN3_CALL_OP(ctxt,IProcTrap)(ctxt,trap,chan) : OP_DEFER) -+#define ELAN3_OP_CPROC_TRAP(ctxt, trap) (ELAN3_CALL_OP(ctxt,CProcTrap)(ctxt,trap) : OP_DEFER) -+#define ELAN3_OP_CPROC_REISSUE(ctxt,tbuf) (ELAN3_CALL_OP(ctxt,CProcReissue)(ctxt, tbuf) : OP_DEFER) -+ -+#define ELAN3_OP_START_FAULT_CHECK(ctxt) (ELAN3_CALL_OP(ctxt,StartFaultCheck)(ctxt) : 0) -+#define ELAN3_OP_END_FAULT_CHECK(ctxt) (ELAN3_CALL_OP(ctxt,EndFaultCheck)(ctxt) : (void)0) -+#define ELAN3_OP_LOAD8(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load8)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE8(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store8)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD16(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load16)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE16(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store16)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD32(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load32)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE32(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store32)(ctxt,addr,val) : (void)0) -+#define ELAN3_OP_LOAD64(ctxt,addr) (ELAN3_CALL_OP(ctxt,Load64)(ctxt,addr) : 0) -+#define ELAN3_OP_STORE64(ctxt,addr,val) (ELAN3_CALL_OP(ctxt,Store64)(ctxt,addr,val) : (void)0) -+ -+#endif /* __KERNEL__ */ -+ -+/* "list" arguement to ops functions */ -+#define LIST_DMA_PTR 0 -+#define LIST_DMA_DESC 1 -+#define LIST_THREAD 2 -+#define LIST_COMMAND 3 -+#define LIST_SETEVENT 4 -+#define LIST_FREE_WORD 5 -+#define LIST_FREE_BLOCK 6 -+ -+#define MAX_LISTS 7 -+ -+#if defined(__KERNEL__) && MAX_LISTS != NUM_LISTS -+# error Check NUM_LISTS == MAX_LISTS -+#endif -+ -+/* -+ * Values for the 'type' field to PostException(). -+ */ -+#define EXCEPTION_INVALID_ADDR 1 /* FaultArea, res */ -+#define EXCEPTION_UNIMP_INSTR 2 /* instr */ -+#define EXCEPTION_INVALID_PROCESS 3 /* proc, res */ -+#define EXCEPTION_SIMULATION_FAILED 4 /* */ -+#define EXCEPTION_UNIMPLEMENTED 5 /* */ -+#define EXCEPTION_SWAP_FAULT 6 /* */ -+#define EXCEPTION_SWAP_FAILED 7 /* */ -+#define EXCEPTION_BAD_PACKET 8 /* */ -+#define EXCEPTION_FAULTED 9 /* addr */ -+#define EXCEPTION_QUEUE_OVERFLOW 10 /* FaultArea, TrapType */ -+#define EXCEPTION_COMMAND_OVERFLOW 11 /* count */ -+#define EXCEPTION_DMA_RETRY_FAIL 12 /* */ -+#define EXCEPTION_CHAINED_EVENT 13 /* EventAddr */ -+#define EXCEPTION_THREAD_KILLED 14 /* */ -+#define EXCEPTION_CANNOT_SAVE_THREAD 15 -+#define EXCEPTION_BAD_SYSCALL 16 /* */ -+#define EXCEPTION_DEBUG 17 -+#define EXCEPTION_BAD_EVENT 18 /* */ -+#define EXCEPTION_NETWORK_ERROR 19 /* rvp */ -+#define EXCEPTION_BUS_ERROR 20 -+#define EXCEPTION_COOKIE_ERROR 21 -+#define EXCEPTION_PACKET_TIMEOUT 22 -+#define EXCEPTION_BAD_DMA 23 /* */ -+#define EXCEPTION_ENOMEM 24 -+ -+/* -+ * Values for the 'proc' field to ElanException(). -+ */ -+#define COMMAND_PROC 1 -+#define THREAD_PROC 2 -+#define DMA_PROC 3 -+#define INPUT_PROC 4 -+#define EVENT_PROC 5 -+ -+/* Flags to IssueDmaCommand */ -+#define ISSUE_COMMAND_FOR_CPROC 1 -+#define ISSUE_COMMAND_CANT_WAIT 2 -+ -+/* Return code from IssueDmaCommand.*/ -+#define ISSUE_COMMAND_OK 0 -+#define ISSUE_COMMAND_TRAPPED 1 -+#define ISSUE_COMMAND_RETRY 2 -+#define ISSUE_COMMAND_WAIT 3 -+ -+#ifdef __KERNEL__ -+ -+extern ELAN3_CTXT *elan3_alloc(ELAN3_DEV *dev, int kernel); -+extern void elan3_free (ELAN3_CTXT *ctxt); -+ -+extern int elan3_attach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+extern int elan3_doattach (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+extern void elan3_detach (ELAN3_CTXT *ctxt); -+extern void elan3_dodetach (ELAN3_CTXT *ctxt); -+ -+extern int elan3_addvp (ELAN3_CTXT *ctxt, int process, ELAN_CAPABILITY *cap); -+extern int elan3_removevp (ELAN3_CTXT *ctxt, int process); -+extern int elan3_addbcastvp(ELAN3_CTXT *ctxt, int process, int base, int count); -+ -+extern int elan3_process (ELAN3_CTXT *ctxt); -+ -+extern int elan3_load_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits); -+extern int elan3_check_route(ELAN3_CTXT *ctxt, int process, E3_uint16 *flits, E3_uint32 *routeError); -+ -+extern int elan3_lwp (ELAN3_CTXT *ctxt); -+ -+extern void elan3_swapin (ELAN3_CTXT *ctxt, int reason); -+extern void elan3_swapout (ELAN3_CTXT *ctxt, int reason); -+extern int elan3_pagefault (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSave, int npages); -+extern void elan3_block_inputter (ELAN3_CTXT *ctxt, int block); -+ -+ -+extern E3_Addr elan3_init_thread (ELAN3_DEV *dev, E3_Addr fn, E3_Addr addr, sdramaddr_t stack, int stackSize, int nargs, ...); -+ -+extern void SetInputterState (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+extern void SetInputterStateForContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+extern void UnloadCommandPageMapping (ELAN3_CTXT *ctxt); -+extern void StartSwapoutContext (ELAN3_CTXT *ctxt, E3_uint32 Pend, E3_uint32 *Maskp); -+ -+extern int HandleExceptions (ELAN3_CTXT *ctxt, unsigned long *flags); -+extern int RestartContext (ELAN3_CTXT *ctxt, unsigned long *flags); -+extern int CheckCommandQueueFlushed (ELAN3_CTXT *ctxt, E3_uint32 cflags, int how, unsigned long *flags); -+extern int IssueCommand (ELAN3_CTXT *ctxt, unsigned cmdoff, E3_Addr value, int flags); -+extern int IssueDmaCommand (ELAN3_CTXT *ctxt, E3_Addr value, void *item, int flags); -+extern int WaitForDmaCommand (ELAN3_CTXT *ctxt, void *item, int flags); -+extern void FixupEventTrap (ELAN3_CTXT *ctxt, int proc, void *trap, E3_uint32 TrapType, -+ E3_FaultSave_BE *FaultSaveArea, int flags); -+extern int SimulateBlockCopy (ELAN3_CTXT *ctxt, E3_Addr EventAddress); -+extern void ReissueEvent (ELAN3_CTXT *ctxt, E3_Addr addr,int flags); -+extern int SetEventsNeedRestart (ELAN3_CTXT *ctxt); -+extern void RestartSetEvents (ELAN3_CTXT *ctxt); -+extern int RunEventType (ELAN3_CTXT *ctxt, E3_FaultSave_BE *FaultSaveArea, E3_uint32 EventType); -+extern void WakeupLwp (ELAN3_DEV *dev, void *arg); -+extern void QueueEventInterrupt (ELAN3_CTXT *ctxt, E3_uint32 cookie); -+extern int WaitForCommandPort (ELAN3_CTXT *ctxt); -+ -+extern int ElanException (ELAN3_CTXT *ctxt, int type, int proc, void *trap, ...); -+ -+/* context_osdep.c */ -+extern int LoadElanTranslation (ELAN3_CTXT *ctxt, E3_Addr elanAddr, int len, int protFault, int writeable); -+extern void LoadCommandPortTranslation (ELAN3_CTXT *ctxt); -+ -+#if defined(DIGITAL_UNIX) -+/* seg_elan.c */ -+extern caddr_t elan3_segelan3_create (ELAN3_CTXT *ctxt); -+extern void elan3_segelan3_destroy (ELAN3_CTXT *ctxt); -+extern int elan3_segelan3_map (ELAN3_CTXT *ctxt); -+extern void elan3_segelan3_unmap (ELAN3_CTXT *ctxt); -+ -+/* seg_elanmem.c */ -+extern int elan3_segelanmem_create (ELAN3_DEV *dev, unsigned object, unsigned off, vm_offset_t *addrp, int len); -+#endif /* defined(DIGITAL_UNIX) */ -+ -+/* route_table.c */ -+extern ELAN3_ROUTE_TABLE *AllocateRouteTable (ELAN3_DEV *dev, int size); -+extern void FreeRouteTable (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl); -+extern int LoadRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp, int ctxnum, int nflits, E3_uint16 *flits); -+extern int GetRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int process, E3_uint16 *flits); -+extern void InvalidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+extern void ValidateRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+extern void ClearRoute (ELAN3_DEV *dev, ELAN3_ROUTE_TABLE *tbl, int vp); -+ -+extern int GenerateRoute (ELAN_POSITION *pos, E3_uint16 *flits, int lowid, int highid, int timeout, int highPri); -+extern int GenerateProbeRoute (E3_uint16 *flits, int nodeid, int level, int *linkup, int *linkdown, int adaptive); -+extern int GenerateCheckRoute (ELAN_POSITION *pos, E3_uint16 *flits, int level, int adaptive); -+ -+/* virtual_process.c */ -+extern ELAN_LOCATION ProcessToLocation (ELAN3_CTXT *ctxt, ELAN3_VPSEG *seg, int process, ELAN_CAPABILITY *cap); -+extern int ResolveVirtualProcess (ELAN3_CTXT *ctxt, int process); -+extern caddr_t CapabilityString (ELAN_CAPABILITY *cap); -+extern void UnloadVirtualProcess (ELAN3_CTXT *ctxt, ELAN_CAPABILITY *cap); -+ -+extern int elan3_get_route (ELAN3_CTXT *ctxt, int process, E3_uint16 *flits); -+extern int elan3_reset_route (ELAN3_CTXT *ctxt, int process); -+ -+/* cproc.c */ -+extern int NextCProcTrap (ELAN3_CTXT *ctxt, COMMAND_TRAP *trap); -+extern void ResolveCProcTrap (ELAN3_CTXT *ctxt); -+extern int RestartCProcTrap (ELAN3_CTXT *ctxt); -+ -+/* iproc.c */ -+extern void InspectIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap); -+extern void ResolveIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvp); -+extern int RestartIProcTrap (ELAN3_CTXT *ctxt, INPUT_TRAP *trap); -+extern char *IProcTrapString (E3_IprocTrapHeader_BE *hdrp, E3_IprocTrapData *datap); -+extern void SimulateUnlockQueue (ELAN3_CTXT *ctxt, E3_Addr QueuePointer, int SentAck); -+ -+/* tproc.c */ -+extern int NextTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+extern void ResolveTProcTrap (ELAN3_CTXT *ctxt, THREAD_TRAP *trap); -+extern int TProcNeedsRestart (ELAN3_CTXT *ctxt); -+extern void RestartTProcItems (ELAN3_CTXT *ctxt); -+extern E3_Addr SaveThreadToStack (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int SkipInstruction); -+extern void ReissueStackPointer (ELAN3_CTXT *ctxt, E3_Addr StackPointer); -+ -+/* tprocinsts.c */ -+extern int RollThreadToClose (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, E3_uint32 PAckVal); -+ -+/* tproc_osdep.c */ -+extern int ThreadSyscall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip); -+extern int ThreadElancall (ELAN3_CTXT *ctxt, THREAD_TRAP *trap, int *skip); -+ -+/* dproc.c */ -+extern int NextDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern void ResolveDProcTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern int DProcNeedsRestart (ELAN3_CTXT *ctxt); -+extern void RestartDProcItems (ELAN3_CTXT *ctxt); -+extern void RestartDmaDesc (ELAN3_CTXT *ctxt, E3_DMA_BE *desc); -+extern void RestartDmaTrap (ELAN3_CTXT *ctxt, DMA_TRAP *trap); -+extern void RestartDmaPtr (ELAN3_CTXT *ctxt, E3_Addr ptr); -+ -+/* network_error.c */ -+extern void InitialiseNetworkErrorResolver (void); -+extern void FinaliseNetworkErrorResolver (void); -+extern int QueueNetworkErrorResolver (ELAN3_CTXT *ctxt, INPUT_TRAP *trap, NETERR_RESOLVER **rvpp); -+extern void FreeNetworkErrorResolver (NETERR_RESOLVER *rvp); -+extern void CancelNetworkErrorResolver (NETERR_RESOLVER *rvp); -+extern int ExecuteNetworkErrorFixup (NETERR_MSG *msg); -+extern void CompleteNetworkErrorFixup (ELAN3_CTXT *ctxt, NETERR_FIXUP *nef, int status); -+ -+extern int AddNeterrServerSyscall (int elanId, void *configp, void *addrp, char *namep); -+ -+/* eventcookie.c */ -+extern void cookie_init(void); -+extern void cookie_fini(void); -+extern EVENT_COOKIE_TABLE *cookie_alloc_table (unsigned long task, unsigned long handle); -+extern void cookie_free_table (EVENT_COOKIE_TABLE *tbl); -+extern int cookie_alloc_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_free_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_fire_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_wait_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+extern int cookie_arm_cookie (EVENT_COOKIE_TABLE *tbl, EVENT_COOKIE cookie); -+ -+/* routecheck.c */ -+extern int elan3_route_check (ELAN3_CTXT *ctxt, E3_uint16 *flits, int destNode); -+extern int elan3_route_broadcast_check(ELAN3_CTXT *ctxt, E3_uint16 *flitsA, int lowNode, int highNode); -+ -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _ELAN3_ELANCTXT_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elandebug.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elandebug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elandebug.h 2005-07-28 14:52:52.945663992 -0400 -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANDEBUG_H -+#define _ELAN3_ELANDEBUG_H -+ -+#ident "$Id: elandebug.h,v 1.38 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandebug.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+extern u_int elan3_debug; -+extern u_int elan3_debug_console; -+extern u_int elan3_debug_buffer; -+extern u_int elan3_debug_ignore_dev; -+extern u_int elan3_debug_ignore_kcomm; -+extern u_int elan3_debug_ignore_ctxt; -+extern u_int elan3_debug_display_ctxt; -+ -+#define DBG_CONFIG 0x00000001 /* Module configuration */ -+#define DBG_HAT 0x00000002 -+#define DBG_FN 0x00000004 -+#define DBG_SEG 0x00000008 -+#define DBG_INTR 0x00000010 -+#define DBG_LWP 0x00000020 -+#define DBG_FAULT 0x00000040 -+#define DBG_EVENT 0x00000080 -+#define DBG_CPROC 0x00000100 -+#define DBG_TPROC 0x00000200 -+#define DBG_DPROC 0x00000400 -+#define DBG_IPROC 0x00000800 -+#define DBG_SWAP 0x00001000 -+#define DBG_CMD 0x00002000 -+#define DBG_VP 0x00004000 -+#define DBG_SYSCALL 0x00008000 -+#define DBG_BSCAN 0x00010000 -+#define DBG_LINKERR 0x00020000 -+#define DBG_NETERR 0x00040000 -+#define DBG_NETRPC 0x00080000 -+#define DBG_EVENTCOOKIE 0x00100000 -+#define DBG_SDRAM 0x00200000 -+ -+#define DBG_EP 0x10000000 -+#define DBG_EPCONSOLE 0x20000000 -+ -+#define DBG_EIP 0x40000000 -+#define DBG_EIPFAIL 0x80000000 -+ -+#define DBG_ALL 0xffffffff -+ -+/* values to pass as "ctxt" rather than a "ctxt" pointer */ -+#define DBG_DEVICE ((void *) 0) -+#define DBG_KCOMM ((void *) 1) -+#define DBG_ICS ((void *) 2) -+#define DBG_USER ((void *) 3) -+#define DBG_NTYPES 64 -+ -+#if defined(DEBUG_PRINTF) -+# define DBG(m,fn) ((elan3_debug&(m)) ? (void)(fn) : (void)0) -+# define PRINTF0(ctxt,m,fmt) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt) : (void)0) -+# define PRINTF1(ctxt,m,fmt,a) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a) : (void)0) -+# define PRINTF2(ctxt,m,fmt,a,b) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b) : (void)0) -+# define PRINTF3(ctxt,m,fmt,a,b,c) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c) : (void)0) -+# define PRINTF4(ctxt,m,fmt,a,b,c,d) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d) : (void)0) -+# define PRINTF5(ctxt,m,fmt,a,b,c,d,e) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d,e) : (void)0) -+# define PRINTF6(ctxt,m,fmt,a,b,c,d,e,f) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m,fmt,a,b,c,d,e,f) : (void)0) -+#ifdef __GNUC__ -+# define PRINTF(ctxt,m,args...) ((elan3_debug&(m)) ? elan3_debugf(ctxt,m, ##args) : (void)0) -+#endif -+ -+#else -+ -+# define DBG(m, fn) do { ; } while (0) -+# define PRINTF0(ctxt,m,fmt) do { ; } while (0) -+# define PRINTF1(ctxt,m,fmt,a) do { ; } while (0) -+# define PRINTF2(ctxt,m,fmt,a,b) do { ; } while (0) -+# define PRINTF3(ctxt,m,fmt,a,b,c) do { ; } while (0) -+# define PRINTF4(ctxt,m,fmt,a,b,c,d) do { ; } while (0) -+# define PRINTF5(ctxt,m,fmt,a,b,c,d,e) do { ; } while (0) -+# define PRINTF6(ctxt,m,fmt,a,b,c,d,e,f) do { ; } while (0) -+#ifdef __GNUC__ -+# define PRINTF(ctxt,m,args...) do { ; } while (0) -+#endif -+ -+#endif /* DEBUG_PRINTF */ -+ -+#ifdef __GNUC__ -+extern void elan3_debugf (void *ctxt, unsigned int mode, char *fmt, ...) -+ __attribute__ ((format (printf,3,4))); -+#else -+extern void elan3_debugf (void *ctxt, unsigned int mode, char *fmt, ...); -+#endif -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ELAN3_ELANDEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elandev.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elandev.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elandev.h 2005-07-28 14:52:52.946663840 -0400 -@@ -0,0 +1,581 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANDEV_H -+#define __ELAN3_ELANDEV_H -+ -+#ident "$Id: elandev.h,v 1.74.2.2 2004/12/10 11:10:19 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandev.h,v $ */ -+ -+#include -+#include -+#include -+ -+#if defined(DIGITAL_UNIX) -+# include -+#elif defined(LINUX) -+# include -+#elif defined(SOLARIS) -+# include -+#endif -+ -+#ifndef TRUE -+# define TRUE 1 -+#endif -+#ifndef FALSE -+# define FALSE 0 -+#endif -+ -+/* -+ * Elan base address registers defined as follows : -+ */ -+#define ELAN3_BAR_SDRAM 0 -+#define ELAN3_BAR_COMMAND_PORT 1 -+#define ELAN3_BAR_REGISTERS 2 -+#define ELAN3_BAR_EBUS 3 -+ -+/* Macro to generate 'offset' to mmap "mem" device */ -+#define OFF_TO_SPACE(off) ((off) >> 28) -+#define OFF_TO_OFFSET(off) ((off) & 0x0FFFFFFF) -+#define GEN_OFF(space,off) (((space) << 28) | ((off) & 0x0FFFFFFF)) -+ -+#ifdef __KERNEL__ -+ -+/* -+ * Elan EBUS is configured as follows : -+ */ -+#define ELAN3_EBUS_ROM_OFFSET 0x000000 /* rom */ -+#define ELAN3_EBUS_INTPAL_OFFSET 0x180000 /* interrupt pal (write only) */ -+ -+#define ELAN3_EBUS_ROM_SIZE 0x100000 -+ -+/* -+ * Elan SDRAM is arranged as follows : -+ */ -+#define ELAN3_TANDQ_SIZE 0x0020000 /* Trap And Queue Size */ -+#define ELAN3_CONTEXT_SIZE 0x0010000 /* Context Table Size */ -+#define ELAN3_COMMAND_TRAP_SIZE 0x0010000 /* Command Port Trap Size */ -+ -+#ifdef MPSAS -+#define ELAN3_LN2_NUM_CONTEXTS 8 /* Support 256 contexts */ -+#else -+#define ELAN3_LN2_NUM_CONTEXTS 12 /* Support 4096 contexts */ -+#endif -+#define ELAN3_NUM_CONTEXTS (1 << ELAN3_LN2_NUM_CONTEXTS) /* Entries in context table */ -+ -+#define ELAN3_SDRAM_NUM_BANKS 4 /* Elan supports 4 Banks of Sdram */ -+#define ELAN3_SDRAM_BANK_SHIFT 26 /* each of which can be 64 mbytes ? */ -+#define ELAN3_SDRAM_BANK_SIZE (1 << ELAN3_SDRAM_BANK_SHIFT) -+ -+#define ELAN3_MAX_CACHE_SIZE (64 * 1024) /* Maximum cache size */ -+#define ELAN3_CACHE_SIZE (64 * 4 * E3_CACHELINE_SIZE) /* Elan3 has 8K cache */ -+ -+#ifndef offsetof -+#define offsetof(s, m) (size_t)(&(((s *)0)->m)) -+#endif -+ -+/* -+ * circular queue and macros to access members. -+ */ -+typedef struct -+{ -+ u_int q_back; /* Next free space */ -+ u_int q_front; /* First object to remove */ -+ u_int q_size; /* Size of queue */ -+ u_int q_count; /* Current number of entries */ -+ u_int q_slop; /* FULL <=> (count+slop) == size */ -+} ELAN3_QUEUE; -+ -+typedef struct -+{ -+ u_int q_back; /* Next free space */ -+ u_int q_middle; /* Middle pointer */ -+ u_int q_front; /* First object to remove */ -+ u_int q_size; /* Size of queue */ -+ u_int q_count; /* Current number of entries */ -+ u_int q_slop; /* FULL <=> (count+slop) == size */ -+} ELAN3_SPLIT_QUEUE; -+ -+#define ELAN3_QUEUE_INIT(q,num,slop) ((q).q_size = (num), (q).q_slop = (slop)+1, (q).q_front = (q).q_back = 0, (q).q_count = 0) -+#define ELAN3_QUEUE_FULL(q) ((q).q_count == ((q).q_size - (q).q_slop)) -+#define ELAN3_QUEUE_REALLY_FULL(q) ((q).q_count == (q).q_size - 1) -+#define ELAN3_QUEUE_EMPTY(q) ((q).q_count == 0) -+#define ELAN3_QUEUE_FRONT_EMPTY(q) ((q).q_front == (q).q_middle) -+#define ELAN3_QUEUE_BACK_EMPTY(q) ((q).q_middle == (q).q_back) -+#define ELAN3_QUEUE_ADD(q) ((q).q_back = ((q).q_back+1) % (q).q_size, (q).q_count++) -+#define ELAN3_QUEUE_REMOVE(q) ((q).q_front = ((q).q_front+1) % (q).q_size, (q).q_count--) -+#define ELAN3_QUEUE_ADD_FRONT(q) ((q).q_front = ((q).q_front-1) % (q).q_size, (q).q_count++) -+#define ELAN3_QUEUE_CONSUME(q) ((q).q_middle = ((q).q_middle+1) % (q).q_size) -+#define ELAN3_QUEUE_FRONT(q,qArea) (&(qArea)[(q).q_front]) -+#define ELAN3_QUEUE_MIDDLE(q,qArea) (&(qArea)[(q).q_middle]) -+#define ELAN3_QUEUE_BACK(q,qArea) (&(qArea)[(q).q_back]) -+ -+#define SDRAM_MIN_BLOCK_SHIFT 10 -+#define SDRAM_NUM_FREE_LISTS 17 /* allows max 64Mb block */ -+#define SDRAM_MIN_BLOCK_SIZE (1 << SDRAM_MIN_BLOCK_SHIFT) -+#define SDRAM_MAX_BLOCK_SIZE (SDRAM_MIN_BLOCK_SIZE << (SDRAM_NUM_FREE_LISTS-1)) -+#define SDRAM_FREELIST_TRIGGER 32 -+ -+typedef struct elan3_sdram_bank -+{ -+ u_int Size; /* Size of bank of memory */ -+ -+ ioaddr_t Mapping; /* Where mapped in the kernel */ -+ DeviceMappingHandle Handle; /* and mapping handle */ -+ -+ struct elan3_ptbl_gr **PtblGroups; -+ -+ bitmap_t *Bitmaps[SDRAM_NUM_FREE_LISTS]; -+} ELAN3_SDRAM_BANK; -+ -+typedef struct elan3_haltop -+{ -+ struct elan3_haltop *Next; /* Chain to next in list. */ -+ E3_uint32 Mask; /* Interrupt mask to see before calling function */ -+ -+ void (*Function)(void *, void *); /* Function to call */ -+ void *Arguement; /* Arguement to pass to function */ -+} ELAN3_HALTOP; -+ -+#define HALTOP_BATCH 32 -+ -+#endif /* __KERNEL__ */ -+ -+typedef struct elan3_stats -+{ -+ u_long Version; /* version field */ -+ u_long Interrupts; /* count of elan interrupts */ -+ u_long TlbFlushes; /* count of tlb flushes */ -+ u_long InvalidContext; /* count of traps with invalid context */ -+ u_long ComQueueHalfFull; /* count of interrupts due to com queue being half full */ -+ -+ u_long CProcTraps; /* count of cproc traps */ -+ u_long DProcTraps; /* count of dproc traps */ -+ u_long TProcTraps; /* cound of tproc traps */ -+ u_long IProcTraps; /* count of iproc traps */ -+ u_long EventInterrupts; /* count of event interrupts */ -+ -+ u_long PageFaults; /* count of elan page faults */ -+ -+ /* inputter related */ -+ u_long EopBadAcks; /* count of EOP_BAD_ACKs */ -+ u_long EopResets; /* count of EOP_ERROR_RESET */ -+ u_long InputterBadLength; /* count of BadLength */ -+ u_long InputterCRCDiscards; /* count of CRC_STATUS_DISCARD */ -+ u_long InputterCRCErrors; /* count of CRC_STATUS_ERROR */ -+ u_long InputterCRCBad; /* count of CRC_STATUS_BAD */ -+ u_long DmaNetworkErrors; /* count of errors in dma data */ -+ u_long DmaIdentifyNetworkErrors; /* count of errors after dma identify */ -+ u_long ThreadIdentifyNetworkErrors; /* count of errors after thread identify */ -+ -+ /* dma related */ -+ u_long DmaRetries; /* count of dma retries (due to retry fail count) */ -+ u_long DmaOutputTimeouts; /* count of dma output timeouts */ -+ u_long DmaPacketAckErrors; /* count of dma packet ack errors */ -+ -+ /* thread related */ -+ u_long ForcedTProcTraps; /* count of forced tproc traps */ -+ u_long TrapForTooManyInsts; /* count of too many instruction traps */ -+ u_long ThreadOutputTimeouts; /* count of thread output timeouts */ -+ u_long ThreadPacketAckErrors; /* count of thread packet ack errors */ -+ -+ /* link related */ -+ u_long LockError; /* count of RegPtr->Exts.LinkErrorTypes:LS_LockError */ -+ u_long DeskewError; /* count of RegPtr->Exts.LinkErrorTypes:LS_DeskewError */ -+ u_long PhaseError; /* count of RegPtr->Exts.LinkErrorTypes:LS_PhaseError */ -+ u_long DataError; /* count of RegPtr->Exts.LinkErrorTypes:LS_DataError */ -+ u_long FifoOvFlow0; /* count of RegPtr->Exts.LinkErrorTypes:LS_FifoOvFlow0 */ -+ u_long FifoOvFlow1; /* count of RegPtr->Exts.LinkErrorTypes:LS_FifoOvFlow1 */ -+ u_long LinkErrorValue; /* link error value on data error */ -+ -+ /* memory related */ -+ u_long CorrectableErrors; /* count of correctable ecc errors */ -+ u_long UncorrectableErrors; /* count of uncorrectable ecc errors */ -+ u_long MultipleErrors; /* count of multiple ecc errors */ -+ u_long SdramBytesFree; /* count of sdram bytes free */ -+ -+ /* Interrupt related */ -+ u_long LongestInterrupt; /* length of longest interrupt in ticks */ -+ -+ u_long EventPunts; /* count of punts of event interrupts to thread */ -+ u_long EventRescheds; /* count of reschedules of event interrupt thread */ -+} ELAN3_STATS; -+ -+#define ELAN3_STATS_VERSION (ulong)2 -+#define ELAN3_NUM_STATS (sizeof (ELAN3_STATS)/sizeof (u_long)) -+ -+#define ELAN3_STATS_DEV_FMT "elan3_stats_dev_%d" -+ -+#ifdef __KERNEL__ -+ -+#define BumpStat(dev,stat) ((dev)->Stats.stat++) -+ -+typedef struct elan3_level_ptbl_block -+{ -+ spinlock_t PtblLock; /* Page table freelist lock */ -+ int PtblTotal; /* Count of level N page tables allocated */ -+ int PtblFreeCount; /* Count of free level N page tables */ -+ struct elan3_ptbl *PtblFreeList; /* Free level N page tables */ -+ struct elan3_ptbl_gr *PtblGroupList; /* List of Groups of level N page tables */ -+} ELAN3_LEVEL_PTBL_BLOCK; -+ -+typedef struct elan3_dev -+{ -+ ELAN3_DEV_OSDEP Osdep; /* OS specific entries */ -+ int Instance; /* Device number */ -+ ELAN_DEVINFO Devinfo; -+ ELAN_POSITION Position; /* position in switch network (for user code) */ -+ ELAN_DEV_IDX DeviceIdx; /* device index registered with elanmod */ -+ -+ int ThreadsShouldStop; /* flag that kernel threads should stop */ -+ -+ spinlock_t IntrLock; -+ spinlock_t TlbLock; -+ spinlock_t CProcLock; -+ kcondvar_t IntrWait; /* place event interrupt thread sleeps */ -+ unsigned EventInterruptThreadStarted:1; /* event interrupt thread started */ -+ unsigned EventInterruptThreadStopped:1; /* event interrupt thread stopped */ -+ -+ DeviceMappingHandle RegHandle; /* DDI Handle */ -+ ioaddr_t RegPtr; /* Elan Registers */ -+ -+ volatile E3_uint32 InterruptMask; /* copy of RegPtr->InterruptMask */ -+ volatile E3_uint32 Event_Int_Queue_FPtr; /* copy of RegPtr->Event_Int_Queue_FPtr */ -+ volatile E3_uint32 SchCntReg; /* copy of RegPtr->SchCntReg */ -+ volatile E3_uint32 Cache_Control_Reg; /* true value for RegPtr->Cache_Control_Reg */ -+ -+ ELAN3_SDRAM_BANK SdramBanks[ELAN3_SDRAM_NUM_BANKS]; /* Elan sdram banks */ -+ spinlock_t SdramLock; /* Sdram allocator */ -+ sdramaddr_t SdramFreeLists[SDRAM_NUM_FREE_LISTS]; -+ unsigned SdramFreeCounts[SDRAM_NUM_FREE_LISTS]; -+ -+ sdramaddr_t TAndQBase; /* Trap and Queue area */ -+ sdramaddr_t ContextTable; /* Elan Context Table */ -+ u_int ContextTableSize; /* # entries in context table */ -+ -+ struct elan3_ctxt **CtxtTable; /* array of ctxt pointers or nulls */ -+ -+ sdramaddr_t CommandPortTraps[2]; /* Command port trap overflow */ -+ int CurrentCommandPortTrap; /* Which overflow queue we're using */ -+ -+ u_int HaltAllCount; /* Count of reasons to halt context 0 queues */ -+ u_int HaltNonContext0Count; /* Count of reasons to halt non-context 0 queues */ -+ u_int HaltDmaDequeueCount; /* Count of reasons to halt dma from dequeuing */ -+ u_int HaltThreadCount; /* Count of reasons to halt the thread processor */ -+ u_int FlushCommandCount; /* Count of reasons to flush command queues */ -+ u_int DiscardAllCount; /* Count of reasons to discard context 0 */ -+ u_int DiscardNonContext0Count; /* Count of reasons to discard non context 0 */ -+ -+ struct thread_trap *ThreadTrap; /* Thread Processor trap space */ -+ struct dma_trap *DmaTrap; /* DMA Processor trap space */ -+ -+ spinlock_t FreeHaltLock; /* Lock for haltop free list */ -+ ELAN3_HALTOP *FreeHaltOperations; /* Free list of haltops */ -+ u_int NumHaltOperations; /* Number of haltops allocated */ -+ u_int ReservedHaltOperations; /* Number of haltops reserved */ -+ -+ ELAN3_HALTOP *HaltOperations; /* List of operations to call */ -+ ELAN3_HALTOP **HaltOperationsTailpp; /* Pointer to last "next" pointer in list */ -+ E3_uint32 HaltOperationsMask; /* Or of all bits in list of operations */ -+ -+ physaddr_t SdramPhysBase; /* Physical address of SDRAM */ -+ physaddr_t SdramPhysMask; /* and mask of significant bits */ -+ -+ physaddr_t PciPhysBase; /* physical address of local PCI segment */ -+ physaddr_t PciPhysMask; /* and mask of significant bits */ -+ -+ long ErrorTime; /* lbolt at last error (link,ecc etc) */ -+ long ErrorsPerTick; /* count of errors for this tick */ -+ timer_fn_t ErrorTimeoutId; /* id of timeout when errors masked out */ -+ timer_fn_t DmaPollTimeoutId; /* id of timeout to poll for "bad" dmas */ -+ int FilterHaltQueued; -+ -+ /* -+ * HAT layer specific entries. -+ */ -+ ELAN3_LEVEL_PTBL_BLOCK Level[4]; -+ spinlock_t PtblGroupLock; /* Lock for Page Table group lists */ -+ struct elan3_ptbl_gr *Level3PtblGroupHand; /* Hand for ptbl stealing */ -+ -+ /* -+ * Per-Context Information structures. -+ */ -+ struct elan3_info *Infos; /* List of "infos" for this device */ -+ -+ char LinkShutdown; /* link forced into reset by panic/shutdown/dump */ -+ -+ /* -+ * Device statistics. -+ */ -+ ELAN3_STATS Stats; -+ ELAN_STATS_IDX StatsIndex; -+ -+ struct { -+ E3_Regs *RegPtr; -+ char *Sdram[ELAN3_SDRAM_NUM_BANKS]; -+ } PanicState; -+} ELAN3_DEV; -+ -+#define ELAN3_DEV_CTX_TABLE(dev,ctxtn) ( (dev)->CtxtTable[ (ctxtn) & MAX_ROOT_CONTEXT_MASK] ) -+ -+/* macros for accessing dev->RegPtr.Tags/Sets. */ -+#define write_cache_tag(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, Tags.what)) -+#define read_cache_tag(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, Tags.what)) -+#define write_cache_set(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, Sets.what)) -+#define read_cache_set(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, Sets.what)) -+ -+/* macros for accessing dev->RegPtr.Regs. */ -+#define write_reg64(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+#define write_reg32(dev,what,val) writel (val, dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+#define read_reg64(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+#define read_reg32(dev,what) readl (dev->RegPtr + offsetof (E3_Regs, Regs.what)) -+ -+/* macros for accessing dev->RegPtr.uRegs. */ -+#define write_ureg64(dev,what,val) writeq (val, dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+#define write_ureg32(dev,what,val) writel (val, dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+#define read_ureg64(dev,what) readq (dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+#define read_ureg32(dev,what) readl (dev->RegPtr + offsetof (E3_Regs, URegs.what)) -+ -+/* macros for accessing dma descriptor/thread regs */ -+#define copy_dma_regs(dev, desc) \ -+MACRO_BEGIN \ -+ register int i; \ -+ for (i = 0; i < sizeof (E3_DMA)/sizeof(E3_uint64); i++) \ -+ ((E3_uint64 *) desc)[i] = readq (dev->RegPtr + offsetof (E3_Regs, Regs.Dma_Desc) + i*sizeof (E3_uint64)); \ -+MACRO_END -+ -+#define copy_thread_regs(dev, regs) \ -+MACRO_BEGIN \ -+ register int i; \ -+ for (i = 0; i < (32*sizeof (E3_uint32))/sizeof(E3_uint64); i++) \ -+ ((E3_uint64 *) regs)[i] = readq (dev->RegPtr + offsetof (E3_Regs, Regs.Globals[0]) + i*sizeof (E3_uint64)); \ -+MACRO_END -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ _E3_DataBusMap::Exts _E3_DataBusMap::Input_Context_Fil_Flush -+ elan3_dev::CurrentCommandPortTrap elan3_dev::HaltAllCount elan3_dev::HaltDmaDequeueCount -+ elan3_dev::FlushCommandCount elan3_dev::DiscardAllCount elan3_dev::DiscardNonContext0Count -+ elan3_dev::HaltOperations elan3_dev::HaltOperationsMask)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::TlbLock, -+ _E3_DataBusMap::Cache_Control_Reg)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::InfoLock, -+ elan3_dev::Infos elan3_dev::InfoTable)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::FreeHaltLock, -+ elan3_dev::FreeHaltOperations elan3_dev::NumHaltOperations elan3_dev::ReservedHaltOperations)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::PageFreeListLock, -+ elan3_dev::PageFreeList elan3_dev::PageFreeListSize)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level1PtblLock, -+ elan3_dev::Level1PtblTotal elan3_dev::Level1PtblFreeCount elan3_dev::Level1PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level2PtblLock, -+ elan3_dev::Level2PtblTotal elan3_dev::Level2PtblFreeCount elan3_dev::Level2PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::Level3PtblLock, -+ elan3_dev::Level3PtblTotal elan3_dev::Level3PtblFreeCount elan3_dev::Level3PtblFreeList)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::PtblGroupLock, -+ elan3_dev::Level1PtblGroupList elan3_dev::Level2PtblGroupList elan3_dev::Level3PtblGroupList)) -+ -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_dev::InfoTable elan3_dev::Level1PtblFreeList -+ elan3_dev::Level2PtblFreeList elan3_dev::Level3PtblFreeList)) -+ -+_NOTE(LOCK_ORDER(elan3_dev::InfoLock elan3_dev::IntrLock)) -+_NOTE(LOCK_ORDER(as::a_lock elan3_dev::InfoLock)) -+_NOTE(LOCK_ORDER(as::a_lock elan3_dev::IntrLock)) -+ -+#define SET_INT_MASK(dev,Mask) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev)->InterruptMask = (Mask))); mmiob(); MACRO_END -+#define ENABLE_INT_MASK(dev, bits) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev->InterruptMask |= (bits)))); mmiob(); MACRO_END -+#define DISABLE_INT_MASK(dev, bits) MACRO_BEGIN write_reg32 (dev, Exts.InterruptMask, ((dev->InterruptMask &= ~(bits)))); mmiob(); MACRO_END -+ -+#define INIT_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ (dev)->SchCntReg = (val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define SET_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ ASSERT (((val) & HaltStopAndExtTestMask) == (val)); \ -+ (dev)->SchCntReg |= (val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob (); \ -+MACRO_END -+ -+#define CLEAR_SCHED_STATUS(dev, val) \ -+MACRO_BEGIN \ -+ ASSERT (((val) & HaltStopAndExtTestMask) == (val)); \ -+ (dev)->SchCntReg &= ~(val); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define MODIFY_SCHED_STATUS(dev, SetBits, ClearBits) \ -+MACRO_BEGIN \ -+ ASSERT ((((SetBits)|(ClearBits)) & HaltStopAndExtTestMask) == ((SetBits)|(ClearBits))); \ -+ (dev)->SchCntReg = (((dev)->SchCntReg | (SetBits)) & ~(ClearBits)); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#define PULSE_SCHED_STATUS(dev, RestartBits) \ -+MACRO_BEGIN \ -+ ASSERT (((RestartBits) & HaltStopAndExtTestMask) == 0); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg | (RestartBits)); \ -+ mmiob(); \ -+MACRO_END -+ -+#define SET_SCHED_LINK_VALUE(dev, enabled, val) \ -+MACRO_BEGIN \ -+ (dev)->SchCntReg = (((dev)->SchCntReg & HaltAndStopMask) | ((enabled) ? LinkBoundaryScan : 0) | LinkSetValue(val, 0)); \ -+ write_reg32 (dev, Exts.SchCntReg, (dev)->SchCntReg); \ -+ mmiob(); \ -+MACRO_END -+ -+#ifdef DEBUG_ASSERT -+# define ELAN3_ASSERT(dev, EX) ((void)((EX) || elan3_assfail(dev, #EX, __FILE__, __LINE__))) -+#else -+# define ELAN3_ASSERT(dev, EX) -+#endif -+ -+/* elandev_generic.c */ -+extern int InitialiseElan (ELAN3_DEV *dev, ioaddr_t CmdPort); -+extern void FinaliseElan (ELAN3_DEV *dev); -+extern int InterruptHandler (ELAN3_DEV *dev); -+extern void PollForDmaHungup (void *arg); -+ -+extern int SetLinkBoundaryScan (ELAN3_DEV *dev); -+extern void ClearLinkBoundaryScan (ELAN3_DEV *dev); -+extern int WriteBoundaryScanValue (ELAN3_DEV *dev, int value); -+extern int ReadBoundaryScanValue(ELAN3_DEV *dev, int link); -+ -+extern int ReadVitalProductData (ELAN3_DEV *dev, int *CasLatency); -+ -+extern struct elan3_ptbl_gr *ElanGetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset); -+extern void ElanSetPtblGr (ELAN3_DEV *dev, sdramaddr_t offset, struct elan3_ptbl_gr *ptg); -+ -+extern void ElanFlushTlb (ELAN3_DEV *dev); -+ -+extern void SetSchedStatusRegister (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp); -+extern void FreeHaltOperation (ELAN3_DEV *dev, ELAN3_HALTOP *op); -+extern int ReserveHaltOperations (ELAN3_DEV *dev, int count, int cansleep); -+extern void ReleaseHaltOperations (ELAN3_DEV *dev, int count); -+extern void ProcessHaltOperations (ELAN3_DEV *dev, E3_uint32 Pend); -+extern void QueueHaltOperation (ELAN3_DEV *dev, E3_uint32 Pend, volatile E3_uint32 *Maskp, -+ E3_uint32 ReqMask, void (*Function)(ELAN3_DEV *, void *), void *Arguement); -+ -+extern int ComputePosition (ELAN_POSITION *pos, unsigned NodeId, unsigned NumNodes, unsigned numDownLinksVal); -+ -+extern caddr_t MiToName (int mi); -+extern void ElanBusError (ELAN3_DEV *dev); -+ -+extern void TriggerLsa (ELAN3_DEV *dev); -+ -+extern ELAN3_DEV *elan3_device (int instance); -+extern int DeviceRegisterSize (ELAN3_DEV *dev, int rnumber, int *sizep); -+extern int MapDeviceRegister (ELAN3_DEV *dev, int rnumber, ioaddr_t *addrp, int offset, -+ int len, DeviceMappingHandle *handlep); -+extern void UnmapDeviceRegister (ELAN3_DEV *dev, DeviceMappingHandle *handlep); -+ -+ -+/* sdram.c */ -+/* sdram accessing functions - define 4 different types for 8,16,32,64 bit accesses */ -+extern unsigned char elan3_sdram_readb (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned short elan3_sdram_readw (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned int elan3_sdram_readl (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern unsigned long long elan3_sdram_readq (ELAN3_DEV *dev, sdramaddr_t ptr); -+extern void elan3_sdram_writeb (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned char val); -+extern void elan3_sdram_writew (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned short val); -+extern void elan3_sdram_writel (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned int val); -+extern void elan3_sdram_writeq (ELAN3_DEV *dev, sdramaddr_t ptr, unsigned long long val); -+ -+extern void elan3_sdram_zerob_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zerow_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zerol_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan3_sdram_zeroq_sdram (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+ -+extern void elan3_sdram_copyb_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyw_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyl_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyq_from_sdram (ELAN3_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan3_sdram_copyb_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyw_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyl_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan3_sdram_copyq_to_sdram (ELAN3_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+ -+extern void elan3_sdram_init (ELAN3_DEV *dev); -+extern void elan3_sdram_fini (ELAN3_DEV *dev); -+extern void elan3_sdram_add (ELAN3_DEV *dev, sdramaddr_t base, sdramaddr_t top); -+extern sdramaddr_t elan3_sdram_alloc (ELAN3_DEV *dev, int nbytes); -+extern void elan3_sdram_free (ELAN3_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern physaddr_t elan3_sdram_to_phys (ELAN3_DEV *dev, sdramaddr_t addr); -+ -+/* cproc.c */ -+extern void HandleCProcTrap (ELAN3_DEV *dev, E3_uint32 Pend, E3_uint32 *Mask); -+ -+/* iproc.c */ -+extern void HandleIProcTrap (ELAN3_DEV *dev, int Channel, E3_uint32 Pend, sdramaddr_t FaultSaveOff, -+ sdramaddr_t TransactionsOff, sdramaddr_t DataOff); -+ -+/* tproc.c */ -+extern int HandleTProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits); -+extern void DeliverTProcTrap (ELAN3_DEV *dev, struct thread_trap *threadTrap, E3_uint32 Pend); -+ -+/* dproc.c */ -+extern int HandleDProcTrap (ELAN3_DEV *dev, E3_uint32 *RestartBits); -+extern void DeliverDProcTrap (ELAN3_DEV *dev, struct dma_trap *dmaTrap, E3_uint32 Pend); -+ -+#if defined(LINUX) -+/* procfs_linux.h */ -+extern struct proc_dir_entry *elan3_procfs_root; -+extern struct proc_dir_entry *elan3_config_root; -+ -+extern void elan3_procfs_init(void); -+extern void elan3_procfs_fini(void); -+extern void elan3_procfs_device_init (ELAN3_DEV *dev); -+extern void elan3_procfs_device_fini (ELAN3_DEV *dev); -+#endif /* defined(LINUX) */ -+ -+/* elan3_osdep.c */ -+extern int BackToBackMaster; -+extern int BackToBackSlave; -+ -+#define ELAN_REG_REC_MAX (100) -+#define ELAN_REG_REC(REG) { \ -+elan_reg_rec_file [elan_reg_rec_index] = __FILE__; \ -+elan_reg_rec_line [elan_reg_rec_index] = __LINE__; \ -+elan_reg_rec_reg [elan_reg_rec_index] = REG; \ -+elan_reg_rec_cpu [elan_reg_rec_index] = smp_processor_id(); \ -+elan_reg_rec_lbolt[elan_reg_rec_index] = lbolt; \ -+elan_reg_rec_index = ((elan_reg_rec_index+1) % ELAN_REG_REC_MAX);} -+ -+extern char * elan_reg_rec_file [ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_line [ELAN_REG_REC_MAX]; -+extern long elan_reg_rec_lbolt[ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_cpu [ELAN_REG_REC_MAX]; -+extern E3_uint32 elan_reg_rec_reg [ELAN_REG_REC_MAX]; -+extern int elan_reg_rec_index; -+ -+#endif /* __KERNEL__ */ -+ -+ -+#define ELAN3_PROCFS_ROOT "/proc/qsnet/elan3" -+#define ELAN3_PROCFS_VERSION "/proc/qsnet/elan3/version" -+#define ELAN3_PROCFS_DEBUG "/proc/qsnet/elan3/config/elandebug" -+#define ELAN3_PROCFS_DEBUG_CONSOLE "/proc/qsnet/elan3/config/elandebug_console" -+#define ELAN3_PROCFS_DEBUG_BUFFER "/proc/qsnet/elan3/config/elandebug_buffer" -+#define ELAN3_PROCFS_MMU_DEBUG "/proc/qsnet/elan3/config/elan3mmu_debug" -+#define ELAN3_PROCFS_PUNT_LOOPS "/proc/qsnet/elan3/config/eventint_punt_loops" -+ -+#define ELAN3_PROCFS_DEVICE_STATS_FMT "/proc/qsnet/elan3/device%d/stats" -+#define ELAN3_PROCFS_DEVICE_POSITION_FMT "/proc/qsnet/elan3/device%d/position" -+#define ELAN3_PROCFS_DEVICE_NODESET_FMT "/proc/qsnet/elan3/device%d/nodeset" -+ -+#endif /* __ELAN3_ELANDEV_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elandev_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elandev_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elandev_linux.h 2005-07-28 14:52:52.946663840 -0400 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELANDEV_LINUX_H -+#define __ELANDEV_LINUX_H -+ -+#ident "$Id: elandev_linux.h,v 1.11.2.1 2005/03/07 16:27:42 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elandev_linux.h,v $*/ -+ -+#ifdef __KERNEL__ -+#include -+#include -+#include -+ -+#include -+ -+#if !defined(NO_COPROC) /* The older coproc kernel patch is applied */ -+#include -+ -+#define ioproc_ops coproc_ops_struct -+#define ioproc_register_ops register_coproc_ops -+#define ioproc_unregister_ops unregister_coproc_ops -+ -+#define IOPROC_MM_STRUCT_ARG 1 -+#define IOPROC_PATCH_APPLIED 1 -+ -+#elif !defined(NO_IOPROC) /* The new ioproc kernel patch is applied */ -+#include -+ -+#define IOPROC_PATCH_APPLIED 1 -+#endif -+#endif -+ -+#define ELAN3_MAJOR 60 -+#define ELAN3_NAME "elan3" -+#define ELAN3_MAX_CONTROLLER 16 /* limited to 4 bits */ -+ -+#define ELAN3_MINOR_DEVNUM(m) ((m) & 0x0f) /* card number */ -+#define ELAN3_MINOR_DEVFUN(m) (((m) >> 4) & 0x0f) /* function */ -+#define ELAN3_MINOR_CONTROL 0 /* function values */ -+#define ELAN3_MINOR_MEM 1 -+#define ELAN3_MINOR_USER 2 -+ -+typedef void *DeviceMappingHandle; -+ -+/* task and ctxt handle types */ -+typedef struct mm_struct *TaskHandle; -+typedef int CtxtHandle; -+ -+#define ELAN3_MY_TASK_HANDLE() (current->mm) -+#define KERNEL_TASK_HANDLE() (get_kern_mm()) -+ -+/* -+ * OS-dependent component of ELAN3_DEV struct. -+ */ -+typedef struct elan3_dev_osdep -+{ -+ struct pci_dev *pci; /* PCI config data */ -+ int ControlDeviceOpen; /* flag to indicate control */ -+ /* device open */ -+ struct proc_dir_entry *procdir; -+} ELAN3_DEV_OSDEP; -+ -+#endif /* __ELANDEV_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elanio.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elanio.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elanio.h 2005-07-28 14:52:52.947663688 -0400 -@@ -0,0 +1,226 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELAN3IO_H -+#define __ELAN3_ELAN3IO_H -+ -+#ident "$Id: elanio.h,v 1.19 2003/12/08 15:40:26 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanio.h,v $*/ -+ -+#define ELAN3IO_CONTROL_PATHNAME "/dev/elan3/control%d" -+#define ELAN3IO_MEM_PATHNAME "/dev/elan3/mem%d" -+#define ELAN3IO_USER_PATHNAME "/dev/elan3/user%d" -+#define ELAN3IO_SDRAM_PATHNAME "/dev/elan3/sdram%d" -+#define ELAN3IO_MAX_PATHNAMELEN 32 -+ -+/* ioctls on /dev/elan3/control */ -+#define ELAN3IO_CONTROL_BASE 0 -+ -+#define ELAN3IO_SET_BOUNDARY_SCAN _IO ('e', ELAN3IO_CONTROL_BASE + 0) -+#define ELAN3IO_CLEAR_BOUNDARY_SCAN _IO ('e', ELAN3IO_CONTROL_BASE + 1) -+#define ELAN3IO_READ_LINKVAL _IOWR ('e', ELAN3IO_CONTROL_BASE + 2, E3_uint32) -+#define ELAN3IO_WRITE_LINKVAL _IOWR ('e', ELAN3IO_CONTROL_BASE + 3, E3_uint32) -+ -+typedef struct elanio_set_debug_struct -+{ -+ char what[32]; -+ u_long value; -+} ELAN3IO_SET_DEBUG_STRUCT; -+#define ELAN3IO_SET_DEBUG _IOW ('e', ELAN3IO_CONTROL_BASE + 4, ELAN3IO_SET_DEBUG_STRUCT) -+ -+typedef struct elanio_debug_buffer_struct -+{ -+ caddr_t addr; -+ size_t len; -+} ELAN3IO_DEBUG_BUFFER_STRUCT; -+#define ELAN3IO_DEBUG_BUFFER _IOWR ('e', ELAN3IO_CONTROL_BASE + 5, ELAN3IO_DEBUG_BUFFER_STRUCT) -+ -+typedef struct elanio_neterr_server_struct -+{ -+ u_int elanid; -+ void *addr; -+ char *name; -+} ELAN3IO_NETERR_SERVER_STRUCT; -+#define ELAN3IO_NETERR_SERVER _IOW ('e', ELAN3IO_CONTROL_BASE + 6, ELAN3IO_NETERR_SERVER_STRUCT) -+#define ELAN3IO_NETERR_FIXUP _IOWR ('e', ELAN3IO_CONTROL_BASE + 7, NETERR_MSG) -+ -+typedef struct elanio_set_position_struct -+{ -+ u_int device; -+ unsigned short nodeId; -+ unsigned short numNodes; -+} ELAN3IO_SET_POSITION_STRUCT; -+#define ELAN3IO_SET_POSITION _IOW ('e', ELAN3IO_CONTROL_BASE + 8, ELAN3IO_SET_POSITION_STRUCT) -+ -+#if defined(LINUX) -+ -+/* ioctls on /dev/elan3/sdram */ -+#define ELAN3IO_SDRAM_BASE 20 -+ -+/* ioctls on /dev/elan3/user */ -+#define ELAN3IO_USER_BASE 30 -+ -+#define ELAN3IO_FREE _IO ('e', ELAN3IO_USER_BASE + 0) -+ -+#define ELAN3IO_ATTACH _IOWR('e', ELAN3IO_USER_BASE + 1, ELAN_CAPABILITY) -+#define ELAN3IO_DETACH _IO ('e', ELAN3IO_USER_BASE + 2) -+ -+typedef struct elanio_addvp_struct -+{ -+ u_int process; -+ ELAN_CAPABILITY capability; -+} ELAN3IO_ADDVP_STRUCT; -+#define ELAN3IO_ADDVP _IOWR('e', ELAN3IO_USER_BASE + 3, ELAN3IO_ADDVP_STRUCT) -+#define ELAN3IO_REMOVEVP _IOW ('e', ELAN3IO_USER_BASE + 4, int) -+ -+typedef struct elanio_bcastvp_struct -+{ -+ u_int process; -+ u_int lowvp; -+ u_int highvp; -+} ELAN3IO_BCASTVP_STRUCT; -+#define ELAN3IO_BCASTVP _IOW ('e', ELAN3IO_USER_BASE + 5, ELAN3IO_BCASTVP_STRUCT) -+ -+typedef struct elanio_loadroute_struct -+{ -+ u_int process; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_LOAD_ROUTE_STRUCT; -+#define ELAN3IO_LOAD_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 6, ELAN3IO_LOAD_ROUTE_STRUCT) -+ -+#define ELAN3IO_PROCESS _IO ('e', ELAN3IO_USER_BASE + 7) -+ -+typedef struct elanio_setperm_struct -+{ -+ caddr_t maddr; -+ E3_Addr eaddr; -+ size_t len; -+ int perm; -+} ELAN3IO_SETPERM_STRUCT; -+#define ELAN3IO_SETPERM _IOW ('e', ELAN3IO_USER_BASE + 8, ELAN3IO_SETPERM_STRUCT) -+ -+typedef struct elanio_clearperm_struct -+{ -+ E3_Addr eaddr; -+ size_t len; -+} ELAN3IO_CLEARPERM_STRUCT; -+#define ELAN3IO_CLEARPERM _IOW ('e', ELAN3IO_USER_BASE + 9, ELAN3IO_CLEARPERM_STRUCT) -+ -+typedef struct elanio_changeperm_struct -+{ -+ E3_Addr eaddr; -+ size_t len; -+ int perm; -+} ELAN3IO_CHANGEPERM_STRUCT; -+#define ELAN3IO_CHANGEPERM _IOW ('e', ELAN3IO_USER_BASE + 10, ELAN3IO_CHANGEPERM_STRUCT) -+ -+ -+#define ELAN3IO_HELPER_THREAD _IO ('e', ELAN3IO_USER_BASE + 11) -+#define ELAN3IO_WAITCOMMAND _IO ('e', ELAN3IO_USER_BASE + 12) -+#define ELAN3IO_BLOCK_INPUTTER _IOW ('e', ELAN3IO_USER_BASE + 13, int) -+#define ELAN3IO_SET_FLAGS _IOW ('e', ELAN3IO_USER_BASE + 14, int) -+ -+#define ELAN3IO_WAITEVENT _IOW ('e', ELAN3IO_USER_BASE + 15, E3_Event) -+#define ELAN3IO_ALLOC_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 16, EVENT_COOKIE) -+#define ELAN3IO_FREE_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 17, EVENT_COOKIE) -+#define ELAN3IO_ARM_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 18, EVENT_COOKIE) -+#define ELAN3IO_WAIT_EVENTCOOKIE _IOW ('e', ELAN3IO_USER_BASE + 19, EVENT_COOKIE) -+ -+#define ELAN3IO_SWAPSPACE _IOW ('e', ELAN3IO_USER_BASE + 20, SYS_SWAP_SPACE) -+#define ELAN3IO_EXCEPTION_SPACE _IOW ('e', ELAN3IO_USER_BASE + 21, SYS_EXCEPTION_SPACE) -+#define ELAN3IO_GET_EXCEPTION _IOR ('e', ELAN3IO_USER_BASE + 22, SYS_EXCEPTION) -+ -+typedef struct elanio_unload_struct -+{ -+ void *addr; -+ size_t len; -+} ELAN3IO_UNLOAD_STRUCT; -+#define ELAN3IO_UNLOAD _IOW ('e', ELAN3IO_USER_BASE + 23, ELAN3IO_UNLOAD_STRUCT) -+ -+ -+ -+typedef struct elanio_getroute_struct -+{ -+ u_int process; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_GET_ROUTE_STRUCT; -+#define ELAN3IO_GET_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 24, ELAN3IO_GET_ROUTE_STRUCT) -+ -+typedef struct elanio_resetroute_struct -+{ -+ u_int process; -+} ELAN3IO_RESET_ROUTE_STRUCT; -+#define ELAN3IO_RESET_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 25, ELAN3IO_RESET_ROUTE_STRUCT) -+ -+typedef struct elanio_checkroute_struct -+{ -+ u_int process; -+ E3_uint32 routeError; -+ E3_uint16 flits[MAX_FLITS]; -+} ELAN3IO_CHECK_ROUTE_STRUCT; -+#define ELAN3IO_CHECK_ROUTE _IOW ('e', ELAN3IO_USER_BASE + 26, ELAN3IO_CHECK_ROUTE_STRUCT) -+ -+typedef struct elanio_vp2nodeId_struct -+{ -+ u_int process; -+ unsigned short nodeId; -+ ELAN_CAPABILITY cap; -+} ELAN3IO_VP2NODEID_STRUCT; -+#define ELAN3IO_VP2NODEID _IOWR('e', ELAN3IO_USER_BASE + 27, ELAN3IO_VP2NODEID_STRUCT) -+ -+#define ELAN3IO_SET_SIGNAL _IOW ('e', ELAN3IO_USER_BASE + 28, int) -+ -+typedef struct elanio_process_2_location_struct -+{ -+ u_int process; -+ ELAN_LOCATION loc; -+} ELAN3IO_PROCESS_2_LOCATION_STRUCT; -+#define ELAN3IO_PROCESS_2_LOCATION _IOW ('e', ELAN3IO_USER_BASE + 29, ELAN3IO_PROCESS_2_LOCATION_STRUCT) -+ -+ -+ -+/* ioctls on all device */ -+#define ELAN3IO_GENERIC_BASE 100 -+typedef struct elanio_get_devinfo_struct -+{ -+ ELAN_DEVINFO *devinfo; -+} ELAN3IO_GET_DEVINFO_STRUCT; -+#define ELAN3IO_GET_DEVINFO _IOR ('e', ELAN3IO_GENERIC_BASE + 0, ELAN_DEVINFO) -+ -+typedef struct elanio_get_position_struct -+{ -+ ELAN_POSITION *position; -+} ELAN3IO_GET_POSITION_STRUCT; -+#define ELAN3IO_GET_POSITION _IOR ('e', ELAN3IO_GENERIC_BASE + 1, ELAN_POSITION) -+ -+typedef struct elanio_stats_struct -+{ -+ int which; -+ void *ptr; -+} ELAN3IO_STATS_STRUCT; -+#define ELAN3IO_STATS _IOR ('e', ELAN3IO_GENERIC_BASE + 2, ELAN3IO_STATS_STRUCT) -+# define ELAN3_SYS_STATS_DEVICE 0 -+# define ELAN3_SYS_STATS_MMU 1 -+ -+/* offsets on /dev/elan3/control */ -+ -+/* offsets on /dev/elan3/mem */ -+ -+/* page numbers on /dev/elan3/user */ -+#define ELAN3IO_OFF_COMMAND_PAGE 0 -+#define ELAN3IO_OFF_FLAG_PAGE 1 -+#define ELAN3IO_OFF_UREG_PAGE 2 -+ -+#endif /* LINUX */ -+ -+#endif /* __ELAN3_ELAN3IO_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elanregs.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elanregs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elanregs.h 2005-07-28 14:52:52.949663384 -0400 -@@ -0,0 +1,1063 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* -+ * Header file for internal slave mapping of the ELAN3 registers -+ */ -+ -+#ifndef _ELAN3_ELANREGS_H -+#define _ELAN3_ELANREGS_H -+ -+#ident "$Id: elanregs.h,v 1.87 2004/04/22 12:27:21 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanregs.h,v $*/ -+ -+#include -+#include -+#include -+ -+#define MAX_ROOT_CONTEXT_MASK 0xfff -+#define SYS_CONTEXT_BIT 0x1000 -+#define ALL_CONTEXT_BITS (MAX_ROOT_CONTEXT_MASK | SYS_CONTEXT_BIT) -+#define ROOT_TAB_OFFSET(Cntxt) (((Cntxt) & MAX_ROOT_CONTEXT_MASK) << 4) -+#define CLEAR_SYS_BIT(Cntxt) ((Cntxt) & ~SYS_CONTEXT_BIT) -+ -+#define E3_CACHELINE_SIZE (32) -+#define E3_CACHE_SIZE (8192) -+ -+typedef volatile struct _E3_CacheSets -+{ -+ E3_uint64 Set0[256]; /* 2k bytes per set */ -+ E3_uint64 Set1[256]; /* 2k bytes per set */ -+ E3_uint64 Set2[256]; /* 2k bytes per set */ -+ E3_uint64 Set3[256]; /* 2k bytes per set */ -+} E3_CacheSets; -+ -+typedef union e3_cache_tag -+{ -+ E3_uint64 Value; -+ struct { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 pad2:8; /* Undefined value when read */ -+ E3_uint32 LineError:1; /* A line error has occured */ -+ E3_uint32 Modified:1; /* Cache data is modified */ -+ E3_uint32 FillPending:1; /* Pipelined fill occuring*/ -+ E3_uint32 AddrTag27to11:17; /* Tag address bits 27 to 11 */ -+ E3_uint32 pad1:4; /* Undefined value when read */ -+ E3_uint32 pad0; /* Undefined value when read */ -+#else -+ E3_uint32 pad0; /* Undefined value when read */ -+ E3_uint32 pad1:4; /* Undefined value when read */ -+ E3_uint32 AddrTag27to11:17; /* Tag address bits 27 to 11 */ -+ E3_uint32 FillPending:1; /* Pipelined fill occuring*/ -+ E3_uint32 Modified:1; /* Cache data is modified */ -+ E3_uint32 LineError:1; /* A line error has occured */ -+ E3_uint32 pad2:8; /* Undefined value when read */ -+#endif -+ } s; -+} E3_CacheTag; -+ -+#define E3_NumCacheLines 64 -+#define E3_NumCacheSets 4 -+ -+typedef volatile struct _E3_CacheTags -+{ -+ E3_CacheTag Tags[E3_NumCacheLines][E3_NumCacheSets]; /* 2k bytes per set */ -+} E3_CacheTags; -+ -+typedef union E3_IProcStatus_Reg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 TrapType:8; /* iprocs trap ucode address */ -+ E3_uint32 SuspendAddr:8; /* iprocs suspend address */ -+ E3_uint32 EopType:2; /* Type of Eop Received */ -+ E3_uint32 QueueingPacket:1; /* receiving a queueing packet */ -+ E3_uint32 AckSent:1; /* a packet ack has been sent */ -+ E3_uint32 Reject:1; /* a packet nack has been sent */ -+ E3_uint32 CrcStatus:2; /* Crc Status value */ -+ E3_uint32 BadLength:1; /* Eop was received in a bad place */ -+ E3_uint32 Chan1:1; /* This packet received on v chan1 */ -+ E3_uint32 First:1; /* This is the first transaction in the packet */ -+ E3_uint32 Last:1; /* This is the last transaction in the packet */ -+ E3_uint32 Unused:2; -+ E3_uint32 WakeupFunction:3; /* iprocs wakeup function */ -+#else -+ E3_uint32 WakeupFunction:3; /* iprocs wakeup function */ -+ E3_uint32 Unused:2; -+ E3_uint32 Last:1; /* This is the last transaction in the packet */ -+ E3_uint32 First:1; /* This is the first transaction in the packet */ -+ E3_uint32 Chan1:1; /* This packet received on v chan1 */ -+ E3_uint32 BadLength:1; /* Eop was received in a bad place */ -+ E3_uint32 CrcStatus:2; /* Crc Status value */ -+ E3_uint32 Reject:1; /* a packet nack has been sent */ -+ E3_uint32 AckSent:1; /* a packet ack has been sent */ -+ E3_uint32 QueueingPacket:1; /* receiving a queueing packet */ -+ E3_uint32 EopType:2; /* Type of Eop Received */ -+ E3_uint32 SuspendAddr:8; /* iprocs suspend address */ -+ E3_uint32 TrapType:8; /* iprocs trap ucode address */ -+#endif -+ } s; -+} E3_IProcStatus_Reg; -+ -+#define CRC_STATUS_GOOD (0 << 21) -+#define CRC_STATUS_DISCARD (1 << 21) -+#define CRC_STATUS_ERROR (2 << 21) -+#define CRC_STATUS_BAD (3 << 21) -+ -+#define CRC_MASK (3 << 21) -+ -+#define EOP_GOOD (1 << 16) -+#define EOP_BADACK (2 << 16) -+#define EOP_ERROR_RESET (3 << 16) -+ -+#define E3_IPS_LastTrans (1 << 26) -+#define E3_IPS_FirstTrans (1 << 25) -+#define E3_IPS_VChan1 (1 << 24) -+#define E3_IPS_BadLength (1 << 23) -+#define E3_IPS_CrcMask (3 << 21) -+#define E3_IPS_Rejected (1 << 20) -+#define E3_IPS_AckSent (1 << 19) -+#define E3_IPS_QueueingPacket (1 << 18) -+#define E3_IPS_EopType (3 << 16) -+ -+typedef union E3_Status_Reg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 TrapType:8; /* procs trap ucode address */ -+ E3_uint32 SuspendAddr:8; /* procs suspend address */ -+ E3_uint32 Context:13; /* procs current context */ -+ E3_uint32 WakeupFunction:3; /* procs wakeup function */ -+#else -+ E3_uint32 WakeupFunction:3; /* procs wakeup function */ -+ E3_uint32 Context:13; /* procs current context */ -+ E3_uint32 SuspendAddr:8; /* procs suspend address */ -+ E3_uint32 TrapType:8; /* procs trap ucode address */ -+#endif -+ } s; -+} E3_Status_Reg; -+ -+/* values for WakeupFunction */ -+#define SleepOneTick 0 -+#define WakeupToSendTransOrEop 1 -+#define SleepOneTickThenRunnable 2 -+#define WakeupNever 4 -+/* extra dma wakeup functions */ -+#define WakupeToSendTransOrEop 1 -+#define WakeupForPacketAck 3 -+#define WakeupToSendTrans 5 -+/* extra thread wakup function */ -+#define WakeupStopped 3 -+/* extra cproc wakup function */ -+#define WakeupSetEvent 3 -+ -+#define GET_STATUS_CONTEXT(Ptr) ((Ptr.Status >> 16) & 0x1fff) -+#define GET_STATUS_SUSPEND_ADDR(Ptr) ((Ptr.Status >> 8) & 0xff) -+#define GET_STATUS_TRAPTYPE(Ptr) ((E3_uint32)(Ptr.Status & 0xff)) -+ -+/* -+ * Interrupt register bits -+ */ -+#define INT_PciMemErr (1<<15) /* Pci memory access error */ -+#define INT_SDRamInt (1<<14) /* SDRam ECC interrupt */ -+#define INT_EventInterrupt (1<<13) /* Event Interrupt */ -+#define INT_LinkError (1<<12) /* Link Error */ -+#define INT_ComQueue (1<<11) /* a comm queue half full */ -+#define INT_TProcHalted (1<<10) /* Tproc Halted */ -+#define INT_DProcHalted (1<<9) /* Dmas Halted */ -+#define INT_DiscardingNonSysCntx (1<<8) /* Inputters Discarding Non-SysCntx */ -+#define INT_DiscardingSysCntx (1<<7) /* Inputters Discarding SysCntx */ -+#define INT_TProc (1<<6) /* tproc interrupt */ -+#define INT_CProc (1<<5) /* cproc interrupt */ -+#define INT_DProc (1<<4) /* dproc interrupt */ -+#define INT_IProcCh1NonSysCntx (1<<3) /* iproc non-SysCntx interrupt */ -+#define INT_IProcCh1SysCntx (1<<2) /* iproc SysCntx interrupt */ -+#define INT_IProcCh0NonSysCntx (1<<1) /* iproc non-SysCntx interrupt */ -+#define INT_IProcCh0SysCntx (1<<0) /* iproc SysCntx interrupt */ -+ -+#define INT_Inputters (INT_IProcCh0SysCntx | INT_IProcCh0NonSysCntx | INT_IProcCh1SysCntx | INT_IProcCh1NonSysCntx) -+#define INT_Discarding (INT_DiscardingSysCntx | INT_DiscardingNonSysCntx) -+#define INT_Halted (INT_DProcHalted | INT_TProcHalted) -+#define INT_ErrorInterrupts (INT_PciMemErr | INT_SDRamInt | INT_LinkError) -+ -+/* -+ * Link state bits. -+ */ -+#define LS_LinkNotReady (1 << 0) /* Link is in reset or recovering from an error */ -+#define LS_Locked (1 << 1) /* Linkinput PLL is locked */ -+#define LS_LockError (1 << 2) /* Linkinput PLL was unable to lock onto the input clock. */ -+#define LS_DeskewError (1 << 3) /* Linkinput was unable to Deskew all the inputs. (Broken wire?) */ -+#define LS_PhaseError (1 << 4) /* Linkinput Phase alignment error. */ -+#define LS_DataError (1 << 5) /* Received value was neither good data or a token. */ -+#define LS_FifoOvFlow0 (1 << 6) /* Channel 0 input fifo overflowed. */ -+#define LS_FifoOvFlow1 (1 << 7) /* Channel 1 input fifo overflowed. */ -+ -+/* -+ * Link State Constant defines, used for writing to LinkSetValue -+ */ -+ -+#define LRS_DataDel0 0x0 -+#define LRS_DataDel1 0x1 -+#define LRS_DataDel2 0x2 -+#define LRS_DataDel3 0x3 -+#define LRS_DataDel4 0x4 -+#define LRS_DataDel5 0x5 -+#define LRS_DataDel6 0x6 -+#define LRS_DataDel7 0x7 -+#define LRS_DataDel8 0x8 -+#define LRS_PllDelValue 0x9 -+#define LRS_ClockEven 0xA -+#define LRS_ClockOdd 0xB -+#define LRS_ErrorLSW 0xC -+#define LRS_ErrorMSW 0xD -+#define LRS_FinCoarseDeskew 0xE -+#define LRS_LinkInValue 0xF -+#define LRS_NumLinkDels 0x10 -+ -+#define LRS_Pllfast 0x40 -+ -+union Sched_Status -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 StopNonSysCntxs:1; -+ E3_uint32 FlushCommandQueues:1; -+ E3_uint32 HaltDmas:1; -+ E3_uint32 HaltDmaDequeue:1; -+ E3_uint32 HaltThread:1; -+ E3_uint32 CProcStop:1; -+ E3_uint32 DiscardSysCntxIn:1; -+ E3_uint32 DiscardNonSysCntxIn:1; -+ E3_uint32 RestartCh0SysCntx:1; -+ E3_uint32 RestartCh0NonSysCntx:1; -+ E3_uint32 RestartCh1SysCntx:1; -+ E3_uint32 RestartCh1NonSysCntx:1; -+ E3_uint32 RestartDProc:1; -+ E3_uint32 RestartTProc:1; -+ E3_uint32 RestartCProc:1; -+ E3_uint32 ClearLinkErrorInt:1; -+ E3_uint32 :3; -+ E3_uint32 LinkSetValue:10; -+ E3_uint32 FixLinkDelays:1; -+ E3_uint32 LinkBoundaryScan:1; -+#else -+ E3_uint32 LinkBoundaryScan:1; -+ E3_uint32 FixLinkDelays:1; -+ E3_uint32 LinkSetValue:10; -+ E3_uint32 :3; -+ E3_uint32 ClearLinkErrorInt:1; -+ E3_uint32 RestartCProc:1; -+ E3_uint32 RestartTProc:1; -+ E3_uint32 RestartDProc:1; -+ E3_uint32 RestartCh1NonSysCntx:1; -+ E3_uint32 RestartCh1SysCntx:1; -+ E3_uint32 RestartCh0NonSysCntx:1; -+ E3_uint32 RestartCh0SysCntx:1; -+ E3_uint32 DiscardNonSysCntxIn:1; -+ E3_uint32 DiscardSysCntxIn:1; -+ E3_uint32 CProcStop:1; -+ E3_uint32 HaltThread:1; -+ E3_uint32 HaltDmaDequeue:1; -+ E3_uint32 HaltDmas:1; -+ E3_uint32 FlushCommandQueues:1; -+ E3_uint32 StopNonSysCntxs:1; -+#endif -+ } s; -+}; -+ -+#define LinkBoundaryScan ((E3_uint32) 1<<31) /* Clears the link error interrupt */ -+#define FixLinkDelays ((E3_uint32) 1<<30) /* Clears the link error interrupt */ -+#define LinkSetValue(Val, OldVal) ((E3_uint32) (((Val) & 0x3ff) << 20) | ((OldVal) & ((~0x3ff) << 20))) -+ -+#define ClearLinkErrorInt ((E3_uint32) 1<<16) /* Clears the link error interrupt */ -+#define RestartCProc ((E3_uint32) 1<<15) /* Clears command proc interrupt */ -+#define RestartTProc ((E3_uint32) 1<<14) /* Clears thread interrupt */ -+#define RestartDProc ((E3_uint32) 1<<13) /* Clears dma0 interrupt */ -+#define RestartCh1NonSysCntx ((E3_uint32) 1<<12) /* Clears interrupt */ -+#define RestartCh1SysCntx ((E3_uint32) 1<<11) /* Clears interrupt */ -+#define RestartCh0NonSysCntx ((E3_uint32) 1<<10) /* Clears interrupt */ -+#define RestartCh0SysCntx ((E3_uint32) 1<<9) /* Clears interrupt */ -+#define CProcStopped ((E3_uint32) 1<<9) /* Read value only */ -+ -+#define TraceSetEvents ((E3_uint32) 1<<8) -+#define DiscardNonSysCntxIn ((E3_uint32) 1<<7) -+#define DiscardSysCntxIn ((E3_uint32) 1<<6) -+#define CProcStop ((E3_uint32) 1<<5) /* Will empty all the command port queues. */ -+#define HaltThread ((E3_uint32) 1<<4) /* Will stop the thread proc and clear the tproc command queue */ -+#define HaltDmaDequeue ((E3_uint32) 1<<3) /* Will stop the dmaers starting new dma's. */ -+#define HaltDmas ((E3_uint32) 1<<2) /* Will stop the dmaers and clear the dma command queues */ -+#define FlushCommandQueues ((E3_uint32) 1<<1) /* Causes the command ports to be flushed. */ -+#define StopNonSysCntxs ((E3_uint32) 1<<0) /* Prevents a non-SysCntx from starting. */ -+ -+/* Initial value of schedule status register */ -+#define LinkResetToken 0x00F -+ -+#define Sched_Initial_Value (LinkBoundaryScan | (LinkResetToken << 20) | \ -+ DiscardSysCntxIn | DiscardNonSysCntxIn | HaltThread | HaltDmas) -+ -+#define StopDmaQueues (HaltDmaDequeue | HaltDmas | \ -+ DiscardNonSysCntxIn | DiscardSysCntxIn) -+#define CheckDmaQueueStopped (INT_DiscardingNonSysCntx | INT_DiscardingSysCntx | INT_DProcHalted) -+ -+#define HaltStopAndExtTestMask 0xfff001ff -+#define HaltAndStopMask 0x000001ff -+ -+ -+#define DmaComQueueNotEmpty (1<<0) -+#define ThreadComQueueNotEmpty (1<<1) -+#define EventComQueueNotEmpty (1<<2) -+#define DmaComQueueHalfFull (1<<3) -+#define ThreadComQueueHalfFull (1<<4) -+#define EventComQueueHalfFull (1<<5) -+#define DmaComQueueError (1<<6) -+#define ThreadComQueueError (1<<7) -+#define EventComQueueError (1<<8) -+ -+#define ComQueueNotEmpty (DmaComQueueNotEmpty | ThreadComQueueNotEmpty | EventComQueueNotEmpty) -+#define ComQueueError (DmaComQueueError | ThreadComQueueError | EventComQueueError) -+ -+typedef union _E3_DmaInfo -+{ -+ E3_uint32 Value; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 DmaOutputOpen:1; /* The packet is currently open */ -+ E3_uint32 :7; -+ E3_uint32 TimeSliceCount:2; /* Time left to timeslice */ -+ E3_uint32 UseRemotePriv:1; /* Set for remote read dmas */ -+ E3_uint32 DmaLastPacket:1; /* Set for the last packet of a dma */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 AckBufferValid:1; /* Packet ack is valid. */ -+ E3_uint32 :16; /* read as Zero */ -+#else -+ E3_uint32 :16; /* read as Zero */ -+ E3_uint32 AckBufferValid:1; /* Packet ack is valid. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 DmaLastPacket:1; /* Set for the last packet of a dma */ -+ E3_uint32 UseRemotePriv:1; /* Set for remote read dmas */ -+ E3_uint32 TimeSliceCount:2; /* Time left to timeslice */ -+ E3_uint32 :7; -+ E3_uint32 DmaOutputOpen:1; /* The packet is currently open */ -+#endif -+ } s; -+} E3_DmaInfo; -+ -+typedef volatile struct _E3_DmaRds -+{ -+ E3_uint32 DMA_Source4to0AndTwoReads; -+ E3_uint32 pad13; -+ E3_uint32 DMA_BytesToRead; -+ E3_uint32 pad14; -+ E3_uint32 DMA_MinusPacketSize; -+ E3_uint32 pad15; -+ E3_uint32 DMA_MaxMinusPacketSize; -+ E3_uint32 pad16; -+ E3_uint32 DMA_DmaOutputOpen; -+ E3_uint32 pad16a; -+ E3_DmaInfo DMA_PacketInfo; -+ E3_uint32 pad17[7]; -+ E3_uint32 IProcTrapBase; -+ E3_uint32 pad18; -+ E3_uint32 IProcBlockTrapBase; -+ E3_uint32 pad19[11]; -+} E3_DmaRds; -+ -+typedef volatile struct _E3_DmaWrs -+{ -+ E3_uint64 pad0; -+ E3_uint64 LdAlignment; -+ E3_uint64 ResetAckNLdBytesToWr; -+ E3_uint64 SetAckNLdBytesToWr; -+ E3_uint64 LdBytesToRd; -+ E3_uint64 LdDmaType; -+ E3_uint64 SendRoutes; -+ E3_uint64 SendEop; -+ E3_uint64 pad1[8]; -+} E3_DmaWrs; -+ -+typedef volatile struct _E3_Exts -+{ -+ E3_uint32 CurrContext; /* 0x12a00 */ -+ E3_uint32 pad0; -+ E3_Status_Reg DProcStatus; /* 0x12a08 */ -+ E3_uint32 pad1; -+ E3_Status_Reg CProcStatus; /* 0x12a10 */ -+ E3_uint32 pad2; -+ E3_Status_Reg TProcStatus; /* 0x12a18 */ -+ E3_uint32 pad3; -+ E3_IProcStatus_Reg IProcStatus; /* 0x12a20 */ -+ E3_uint32 pad4[3]; -+ -+ E3_uint32 IProcTypeContext; /* 0x12a30 */ -+ E3_uint32 pad5; -+ E3_uint32 IProcTransAddr; /* 0x12a38 */ -+ E3_uint32 pad6; -+ E3_uint32 IProcCurrTransData0; /* 0x12a40 */ -+ E3_uint32 pad7; -+ E3_uint32 IProcCurrTransData1; /* 0x12a48 */ -+ E3_uint32 pad8; -+ -+ E3_uint32 SchCntReg; /* 0x12a50 */ -+ E3_uint32 pad9; -+ E3_uint32 InterruptReg; /* 0x12a58 */ -+ E3_uint32 pad10; -+ E3_uint32 InterruptMask; /* 0x12a60 */ -+ E3_uint32 pad11; -+ E3_uint32 LinkErrorTypes; /* 0x12a68 */ -+ E3_uint32 pad12[3]; -+ E3_uint32 LinkState; /* a read here returens the DataDel value for the */ -+ /* link that has just been defined by a write to */ -+ /* Regs.Exts.SchCntReg.LinkSetValue */ -+ E3_uint32 pad13; -+ -+ union /* 0x12a80 */ -+ { -+ E3_DmaWrs DmaWrs; -+ E3_DmaRds DmaRds; -+ } Dmas; -+} E3_Exts; -+ -+typedef union com_port_entry -+{ -+ E3_uint64 type; -+ struct -+ { -+ E3_uint32 Address; /* Command VAddr */ -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 Context0Issue:1; /* Issue was for context 0 */ -+ E3_uint32 EventNotCommand:1; /* Issue address bit 3 */ -+ E3_uint32 RemoteDesc:1; /* Issue address bit 5 */ -+ E3_uint32 :13; /* read as Zero */ -+ E3_uint32 Context:12; /* Command Context */ -+ E3_uint32 :4; /* read as Zero */ -+#else -+ E3_uint32 :4; /* read as Zero */ -+ E3_uint32 Context:12; /* Command Context */ -+ E3_uint32 :13; /* read as Zero */ -+ E3_uint32 RemoteDesc:1; /* Issue address bit 5 */ -+ E3_uint32 EventNotCommand:1; /* Issue address bit 3 */ -+ E3_uint32 Context0Issue:1; /* Issue was for context 0 */ -+#endif -+ } s; -+} E3_ComPortEntry; -+ -+/* control reg bits */ -+#define CONT_MMU_ENABLE (1 << 0) /* bit 0 enables mmu */ -+#define CONT_ENABLE_8K_PAGES (1 << 1) /* When set smallest page is 8k instead of 4k. */ -+#define CONT_EN_ALL_SETS (1 << 2) /* enable cache */ -+#define CONT_CACHE_LEVEL0 (1 << 3) /* cache context table */ -+#define CONT_CACHE_LEVEL1 (1 << 4) /* cache up level 1 PTD/PTE */ -+#define CONT_CACHE_LEVEL2 (1 << 5) /* cache up level 2 PTD/PTE */ -+#define CONT_CACHE_LEVEL3 (1 << 6) /* cache up level 3 PTD/PTE */ -+#define CONT_CACHE_TRAPS (1 << 7) /* cache up traps */ -+#define CONT_CACHE_LEV0_ROUTES (1 << 8) /* cache up small routes */ -+#define CONT_CACHE_LEV1_ROUTES (1 << 9) /* cache up large routes */ -+#define CONT_CACHE_ALL (CONT_CACHE_LEVEL0 | CONT_CACHE_LEVEL1 | CONT_CACHE_LEVEL2 | \ -+ CONT_CACHE_LEVEL3 | CONT_CACHE_TRAPS | \ -+ CONT_CACHE_LEV0_ROUTES | CONT_CACHE_LEV1_ROUTES) -+ -+#define CONT_SYNCHRONOUS (1 << 10) /* PCI running sync */ -+#define CONT_SER (1 << 11) /* Single bit output (Elan1 SER bit) */ -+#define CONT_SIR (1 << 12) /* Writing 1 resets elan. */ -+ -+#define CONT_PSYCHO_MODE (1 << 13) /* Enables all the perversion required by psycho */ -+#define CONT_ENABLE_ECC (1 << 14) /* Enables error detecting on the ECC */ -+#define CONT_SDRAM_TESTING (1 << 15) /* Switches to test mode for checking EEC data bits */ -+ -+/* defines SDRam CasLatency. Once set will not change again unless reset is reasserted. */ -+/* 1 = Cas Latency is 3, 0 = Cas Latency is 2 */ -+#define CAS_LATENCY_2 (0 << 16) -+#define CAS_LATENCY_3 (1 << 16) -+#define REFRESH_RATE_2US (0 << 17) /* defines 2us SDRam Refresh rate. */ -+#define REFRESH_RATE_4US (1 << 17) /* defines 4us SDRam Refresh rate. */ -+#define REFRESH_RATE_8US (2 << 17) /* defines 8us SDRam Refresh rate. */ -+#define REFRESH_RATE_16US (3 << 17) /* defines 16us SDRam Refresh rate. */ -+ -+#define CONT_PCI_ERR (1 << 19) /* Read 1 if PCI Error */ -+#define CONT_CLEAR_PCI_ERROR (1 << 19) /* Clears an PCI error. */ -+ -+/* Will cause the PCI error bit to become set. This is used to force the threads proc -+ and the uProc to start to stall. */ -+#define CONT_SET_PCI_ERROR (1 << 20) -+ -+/* Writes SDram control reg when set. Also starts SDram memory system refreshing. */ -+#define SETUP_SDRAM (1 << 21) -+ -+/* Flushes the tlb */ -+#define MMU_FLUSH (1 << 22) -+/* and read back when it's finished */ -+#define MMU_FLUSHED (1 << 0) -+ -+/* Clears any ECC error detected by SDRam interface */ -+#define CLEAR_SDRAM_ERROR (1 << 23) -+ -+#define ECC_ADDR_MASK 0x0ffffff8 -+#define ECC_UE_MASK 0x1 -+#define ECC_CE_MASK 0x2 -+#define ECC_ME_MASK 0x4 -+#define ECC_SYN_MASK 0xff -+ -+/* define page table entry bit fields */ -+#define TLB_PageSizeBits (3 << 0) -+#define TLB_ACCBits (7 << 2) -+#define TLB_LocalBit (1 << 5) -+#define TLB_PCI64BitTargetBit (1 << 6) -+#define TLB_PCIBigEndianBit (1 << 7) -+ -+#define TLB_ModifiedBit (1 << 55) -+#define TLB_ReferencedBit (1 << 63) -+ -+/* Used to read values from the tlb. */ -+#define TLB_TlbReadCntBitsSh 56 -+#define TLB_UseSelAddrSh (1ULL << 60) -+#define TLB_WriteTlbLine (1ULL << 61) -+ -+#define TLB_SEL_LINE(LineNo) (TLB_UseSelAddrSh | \ -+ ((E3_uint64)((LineNo) & 0xf) << TLB_TlbReadCntBitsSh)) -+ -+typedef union _E3_CacheContReg -+{ -+ E3_uint32 ContReg; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 MMU_Enable:1; /* wr 1 to enable the MMU */ -+ E3_uint32 Set8kPages:1; /* wr 1 smallest page is 8k. */ -+ E3_uint32 EnableAllSets:1; /* wr 1 All the cache sets are enabled */ -+ E3_uint32 Cache_Level0:1; /* wr 1 lev0 page tabs will be cached */ -+ E3_uint32 Cache_Level1:1; /* wr 1 lev1 page tabs will be cached */ -+ E3_uint32 Cache_Level2:1; /* wr 1 lev2 page tabs will be cached */ -+ E3_uint32 Cache_Level3:1; /* wr 1 lev3 page tabs will be cached */ -+ E3_uint32 Cache_Traps:1; /* wr 1 trap info will be cached */ -+ E3_uint32 Cache_Lev0_Routes:1; /* wr 1 small routes will be cached */ -+ E3_uint32 Cache_Lev1_Routes:1; /* wr 1 big routes will be cached */ -+ E3_uint32 PCI_Synchronous:1; /* Pci and sys clocks are running synchronously*/ -+ E3_uint32 SER:1; /* 1 bit output port */ -+ E3_uint32 SIR:1; /* write 1 will reset elan */ -+ E3_uint32 PsychoMode:1; /* Enables psycho perversion mode. */ -+ E3_uint32 CasLatency:1; /* 1=cas latency=3, 1=cas latency=2 */ -+ E3_uint32 RefreshRate:2; /* 0=2us, 1=4us, 2=8us, 3=16us */ -+ E3_uint32 Pci_Err:1; /* pci error. Write 1 clears err */ -+ E3_uint32 Set_Pci_Error:1; /* Will simulate an Pci error */ -+ E3_uint32 StartSDRam:1; /* Starts the sdram subsystem */ -+ E3_uint32 FlushTlb:1; /* Flush the contence of the tlb */ -+ E3_uint32 :11; -+#else -+ E3_uint32 :11; -+ E3_uint32 FlushTlb:1; /* Flush the contence of the tlb */ -+ E3_uint32 StartSDRam:1; /* Starts the sdram subsystem */ -+ E3_uint32 Set_Pci_Error:1; /* Will simulate an Pci error */ -+ E3_uint32 Pci_Err:1; /* pci error. Write 1 clears err */ -+ E3_uint32 RefreshRate:2; /* 0=2us, 1=4us, 2=8us, 3=16us */ -+ E3_uint32 CasLatency:1; /* 1=cas latency=3, 1=cas latency=2 */ -+ E3_uint32 PsychoMode:1; /* Enables psycho perversion mode. */ -+ E3_uint32 SIR:1; /* write 1 will reset elan */ -+ E3_uint32 SER:1; /* 1 bit output port */ -+ E3_uint32 PCI_Synchronous:1; /* Pci and sys clocks are running synchronously*/ -+ E3_uint32 Cache_Lev1_Routes:1; /* wr 1 big routes will be cached */ -+ E3_uint32 Cache_Lev0_Routes:1; /* wr 1 small routes will be cached */ -+ E3_uint32 Cache_Traps:1; /* wr 1 trap info will be cached */ -+ E3_uint32 Cache_Level3:1; /* wr 1 lev3 page tabs will be cached */ -+ E3_uint32 Cache_Level2:1; /* wr 1 lev2 page tabs will be cached */ -+ E3_uint32 Cache_Level1:1; /* wr 1 lev1 page tabs will be cached */ -+ E3_uint32 Cache_Level0:1; /* wr 1 lev0 page tabs will be cached */ -+ E3_uint32 EnableAllSets:1; /* wr 1 All the cache sets are enabled */ -+ E3_uint32 Set8kPages:1; /* wr 1 smallest page is 8k. */ -+ E3_uint32 MMU_Enable:1; /* wr 1 to enable the MMU */ -+#endif -+ } s; -+} E3_CacheContReg; -+ -+typedef union _E3_TrapBits -+{ -+ volatile E3_uint32 Bits; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 ForcedTProcTrap:1; /* The theads proc has been halted */ -+ E3_uint32 InstAccessException:1; /* An instruction access exception */ -+ E3_uint32 Unimplemented:1; /* Unimplemented instruction executed */ -+ E3_uint32 DataAccessException:1; /* A data access exception */ -+ -+ E3_uint32 ThreadTimeout:1; /* The threads outputer has timed out */ -+ E3_uint32 OpenException:1; /* Invalid sequence of open, sendtr or close */ -+ E3_uint32 OpenRouteFetch:1; /* Fault while fetching routes for previous open*/ -+ E3_uint32 TrapForTooManyInsts:1; /* Thread has been executing for too long */ -+ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ -+ E3_uint32 AckBufferValid:1; /* The PacketAckValue bits are valid */ -+ E3_uint32 OutputWasOpen:1; /* The output was open when tproc trapped */ -+ E3_uint32 TProcDeschedule:2; /* The reason the tproc stopped running. */ -+ E3_uint32 :17; -+#else -+ E3_uint32 :17; -+ E3_uint32 TProcDeschedule:2; /* The reason the tproc stopped running. */ -+ E3_uint32 OutputWasOpen:1; /* The output was open when tproc trapped */ -+ E3_uint32 AckBufferValid:1; /* The PacketAckValue bits are valid */ -+ -+ E3_uint32 PacketTimeout:1; /* Packet timeout. Sent an EopError. Valid if AckBufferValid set. */ -+ E3_uint32 PacketAckValue:2; /* Packet ack type. Valid if AckBufferValid set. */ -+ -+ E3_uint32 TrapForTooManyInsts:1; /* Thread has been executing for too long */ -+ E3_uint32 OpenRouteFetch:1; /* Fault while fetching routes for previous open*/ -+ E3_uint32 OpenException:1; /* Invalid sequence of open, sendtr or close */ -+ E3_uint32 ThreadTimeout:1; /* The threads outputer has timed out */ -+ -+ E3_uint32 DataAccessException:1; /* A data access exception */ -+ E3_uint32 Unimplemented:1; /* Unimplemented instruction executed */ -+ E3_uint32 InstAccessException:1; /* An instruction access exception */ -+ E3_uint32 ForcedTProcTrap:1; /* The theads proc has been halted */ -+#endif -+ } s; -+} E3_TrapBits; -+ -+typedef union _E3_DirtyBits -+{ -+ volatile E3_uint32 Bits; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 GlobalsDirty:8; -+ E3_uint32 OutsDirty:8; /* will always read as dirty. */ -+ E3_uint32 LocalsDirty:8; -+ E3_uint32 InsDirty:8; -+#else -+ E3_uint32 InsDirty:8; -+ E3_uint32 LocalsDirty:8; -+ E3_uint32 OutsDirty:8; /* will always read as dirty. */ -+ E3_uint32 GlobalsDirty:8; -+#endif -+ } s; -+} E3_DirtyBits; -+ -+#define E3_TProcDescheduleMask 0x6000 -+#define E3_TProcDescheduleWait 0x2000 -+#define E3_TProcDescheduleSuspend 0x4000 -+#define E3_TProcDescheduleBreak 0x6000 -+ -+#define E3_TrapBitsMask 0x7fff -+ -+#define ThreadRestartFromTrapBit 1 -+#define ThreadReloadAllRegs 2 -+ -+#define E3_PAckOk 0 -+#define E3_PAckTestFail 1 -+#define E3_PAckDiscard 2 -+#define E3_PAckError 3 -+ -+typedef volatile struct _E3_DataBusMap -+{ -+ E3_uint64 Dma_Alignment_Port[8]; /* 0x00002800 */ -+ E3_uint32 pad0[0x30]; /* 0x00002840 */ -+ -+ E3_uint32 Input_Trans0_Data[0x10]; /* 0x00002900 */ -+ E3_uint32 Input_Trans1_Data[0x10]; -+ E3_uint32 Input_Trans2_Data[0x10]; -+ E3_uint32 Input_Trans3_Data[0x10]; -+ -+/* this is the start of the exts directly addressable from the ucode. */ -+ E3_Exts Exts; /* 0x00002a00 */ -+ -+/* this is the start of the registers directly addressable from the ucode. */ -+ E3_DMA Dma_Desc; /* 0x00002b00 */ -+ -+ E3_uint32 Dma_Last_Packet_Size; /* 0x00002b20 */ -+ E3_uint32 Dma_This_Packet_Size; /* 0x00002b24 */ -+ E3_uint32 Dma_Tmp_Source; /* 0x00002b28 */ -+ E3_uint32 Dma_Tmp_Dest; /* 0x00002b2c */ -+ -+ E3_Addr Thread_SP_Save_Ptr; /* points to the thread desched save word. */ -+ E3_uint32 Dma_Desc_Size_InProg; /* 0x00002b34 */ -+ -+ E3_uint32 Thread_Desc_SP; /* 0x00002b38 */ -+ E3_uint32 Thread_Desc_Context; /* 0x00002b3c */ -+ -+ E3_uint32 uCode_TMP[0x10]; /* 0x00002b40 */ -+ -+ E3_uint32 TProc_NonSysCntx_FPtr; /* 0x00002b80 */ -+ E3_uint32 TProc_NonSysCntx_BPtr; /* 0x00002b84 */ -+ E3_uint32 TProc_SysCntx_FPtr; /* 0x00002b88 */ -+ E3_uint32 TProc_SysCntx_BPtr; /* 0x00002b8c */ -+ E3_uint32 DProc_NonSysCntx_FPtr; /* 0x00002b90 */ -+ E3_uint32 DProc_NonSysCntx_BPtr; /* 0x00002b94 */ -+ E3_uint32 DProc_SysCntx_FPtr; /* 0x00002b98 */ -+ E3_uint32 DProc_SysCntx_BPtr; /* 0x00002b9c */ -+ -+ E3_uint32 Input_Trap_Base; /* 0x00002ba0 */ -+ E3_uint32 Input_Queue_Offset; /* 0x00002ba4 */ -+ E3_uint32 CProc_TrapSave_Addr; /* 0x00002ba8 */ -+ E3_uint32 Input_Queue_Addr; /* 0x00002bac */ -+ E3_uint32 uCode_TMP10; /* 0x00002bb0 */ -+ E3_uint32 uCode_TMP11; /* 0x00002bb4 */ -+ E3_uint32 Event_Trace_Ptr; /* 0x00002bb8 */ -+ E3_uint32 Event_Trace_Mask; /* 0x00002bbc */ -+ -+ E3_ComPortEntry DmaComQueue[3]; /* 0x00002bc0 */ -+ -+ E3_uint32 Event_Int_Queue_FPtr; /* 0x00002bd8 */ -+ E3_uint32 Event_Int_Queue_BPtr; /* 0x00002bdc */ -+ -+ E3_ComPortEntry ThreadComQueue[2]; /* 0x00002be0 */ -+ E3_ComPortEntry SetEventComQueue[2]; /* 0x00002bf0 */ -+ -+ E3_uint32 pad1[96]; /* 0x00002c00 */ -+ E3_uint32 ComQueueStatus; /* 0x00002d80 */ -+ E3_uint32 pad2[31]; /* 0x00002d84 */ -+ -+/* These are the internal registers of the threads proc. */ -+ E3_uint32 Globals[8]; /* 0x00002e00 */ -+ E3_uint32 Outs[8]; -+ E3_uint32 Locals[8]; -+ E3_uint32 Ins[8]; -+ -+ E3_uint32 pad3[16]; -+ -+ E3_uint32 IBufferReg[4]; -+ -+ E3_uint32 ExecuteNPC; -+ E3_uint32 ExecutePC; -+ -+ E3_uint32 StartPC; -+ E3_uint32 pad4; -+ -+ E3_uint32 StartnPC; -+ E3_uint32 pad5; -+ -+ E3_TrapBits TrapBits; -+ E3_DirtyBits DirtyBits; -+ E3_uint64 LoadDataReg; -+ E3_uint64 StoreDataReg; -+ -+ E3_uint32 ECC_STATUS0; -+ E3_uint32 ECC_STATUS1; -+ E3_uint32 pad6[0xe]; -+ -+/* Pci slave port regs */ -+ E3_uint32 PciSlaveReadCache[0x10]; -+ -+ E3_uint32 Fault_Base_Ptr; -+ E3_uint32 pad7; -+ E3_uint32 Context_Ptr; -+ E3_uint32 pad8; -+ E3_uint32 Input_Context_Filter; /* write only, No data */ -+ E3_uint32 Input_Context_Fil_Flush; /* write only, No data */ -+ E3_CacheContReg Cache_Control_Reg; -+ E3_uint32 pad9; -+ -+ E3_uint64 Tlb_Line_Value; -+ -+ E3_uint32 Walk_Datareg1; -+ E3_uint32 Walk_VAddr_Tab_Base; -+ E3_uint32 Walk_Datareg; -+ E3_uint32 Walk_ContextReg; -+ E3_uint32 Walk_FaultAddr; -+ E3_uint32 Walk_EventAddr; -+ -+/* outputers output cont ext registers. */ -+ E3_uint64 Dma_Route_012345_Context; -+ E3_uint64 pad10; -+ E3_uint64 Dma_Route_01234567; -+ E3_uint64 Dma_Route_89ABCDEF; -+ -+ E3_uint64 Thread_Route_012345_Context; -+ E3_uint64 pad11; -+ E3_uint64 Thread_Route_01234567; -+ E3_uint64 Thread_Route_89ABCDEF; -+} E3_DataBusMap; -+ -+typedef volatile struct _E3_Regs -+{ -+ E3_CacheSets Sets; /* 0x00000000 */ -+ E3_CacheTags Tags; /* 0x00002000 */ -+ E3_DataBusMap Regs; /* 0x00002800 */ -+ E3_uint32 pad1[0x400]; -+ E3_User_Regs URegs; -+} E3_Regs; -+ -+#define MAX_TRAPPED_TRANS 16 -+#define TRANS_DATA_WORDS 16 -+#define TRANS_DATA_BYTES 64 -+ -+/* -+ * Event interrupt -+ */ -+typedef volatile union _E3_EventInt -+{ -+ E3_uint64 ForceAlign; -+ struct { -+ E3_uint32 IntCookie; -+ E3_uint32 EventContext; /* Bits 16 to 28 */ -+ } s; -+} E3_EventInt; -+ -+#define GET_EVENT_CONTEXT(Ptr) ((Ptr->s.EventContext >> 16) & MAX_ROOT_CONTEXT_MASK) -+ -+typedef volatile union _E3_ThreadQueue -+{ -+ E3_uint64 ForceAlign; -+ struct -+ { -+ E3_Addr Thread; -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 :16; /* Bits 0 to 15 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :3; /* Bits 29 to 31 */ -+#else -+ E3_uint32 :3; /* Bits 29 to 31 */ -+ E3_uint32 Context:13; /* Bits 16 to 28 */ -+ E3_uint32 :16; /* Bits 0 to 15 */ -+#endif -+ } s; -+} E3_ThreadQueue; -+ -+typedef volatile union _E3_FaultStatusReg -+{ -+ E3_uint32 Status; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 AccTypePerm:3; /* Access permission. See below. Bits 0 to 2 */ -+ E3_uint32 AccSize:4; /* Access size. See below for different types. Bits 3 to 6 */ -+ E3_uint32 WrAcc:1; /* Access was a write. Bit 7 */ -+ E3_uint32 NonAllocAcc:1; /* Access was a cache non allocate type. Bit 8 */ -+ E3_uint32 BlkDataType:2; /* Data size used for endian flips. Bits 9 to 10 */ -+ E3_uint32 RdLine:1; /* Access was a dma read line. Bit 11 */ -+ E3_uint32 RdMult:1; /* Access was a dma read multiple. Bit 12 */ -+ E3_uint32 Walking:1; /* The fault occued when walking. Bit 13 */ -+ E3_uint32 Level:2; /* Page table level when the fault occued. Bits 14 to 15 */ -+ E3_uint32 ProtFault:1; /* A protection fault occured. Bit 16 */ -+ E3_uint32 FaultPte:2; /* Page table type when the fault occured. Bit 17 */ -+ E3_uint32 AlignmentErr:1; /* Address alignment did not match the access size. Bit 19 */ -+ E3_uint32 VProcSizeErr:1; /* VProc number is out of range. Bit 20 */ -+ E3_uint32 WalkBadData:1; /* Memory CRC error during a walk. Bit 21 */ -+ E3_uint32 :10; /* Bits 22 to 31 */ -+#else -+ E3_uint32 :10; /* Bits 22 to 31 */ -+ E3_uint32 WalkBadData:1; /* Memory CRC error during a walk. Bit 21 */ -+ E3_uint32 VProcSizeErr:1; /* VProc number is out of range. Bit 20 */ -+ E3_uint32 AlignmentErr:1; /* Address alignment did not match the access size. Bit 19 */ -+ E3_uint32 FaultPte:2; /* Page table type when the fault occured. Bit 17 */ -+ E3_uint32 ProtFault:1; /* A protection fault occured. Bit 16 */ -+ E3_uint32 Level:2; /* Page table level when the fault occued. Bits 14 to 15 */ -+ E3_uint32 Walking:1; /* The fault occued when walking. Bit 13 */ -+ E3_uint32 RdMult:1; /* Access was a dma read multiple. Bit 12 */ -+ E3_uint32 RdLine:1; /* Access was a dma read line. Bit 11 */ -+ E3_uint32 BlkDataType:2; /* Data size used for endian flips. Bits 9 to 10 */ -+ E3_uint32 NonAllocAcc:1; /* Access was a cache non allocate type. Bit 8 */ -+ E3_uint32 WrAcc:1; /* Access was a write. Bit 7 */ -+ E3_uint32 AccSize:4; /* Access size. See below for different types. Bits 3 to 6 */ -+ E3_uint32 AccTypePerm:3; /* Access permission. See below. Bits 0 to 2 */ -+#endif -+ } s; -+} E3_FaultStatusReg; -+ -+typedef union _E3_FaultSave -+{ -+ E3_uint64 ForceAlign; -+ struct { -+ E3_FaultStatusReg FSR; -+ volatile E3_uint32 FaultContext; -+ volatile E3_uint32 FaultAddress; -+ volatile E3_uint32 EventAddress; -+ } s; -+} E3_FaultSave; -+ -+/* MMU fault status reg bit positions. */ -+#define FSR_WritePermBit 0 /* 1=Write access perm, 0=Read access perm */ -+#define FSR_RemotePermBit 1 /* 1=Remote access perm, 0=local access perm */ -+#define FSR_EventPermBit 2 /* 1=Event access perm, 0=data access perm */ -+#define FSR_Size0Bit 3 -+#define FSR_Size1Bit 4 -+#define FSR_Size2Bit 5 -+#define FSR_Size3Bit 6 -+#define FSR_WriteAccBit 7 /* 1=Write access, 0=Read access. */ -+#define FSR_NonAllocBit 8 /* 1=Do not fill cache with this data */ -+#define FSR_BlkDataTy0Bit 9 -+#define FSR_BlkDataTy1Bit 10 -+#define FSR_ReadLineBit 11 -+#define FSR_ReadMultipleBit 12 -+ -+#define FSR_PermMask (0xf << FSR_WritePermBit) -+#define FSR_SizeMask (0xf << FSR_Size0Bit) -+#define FSR_AccTypeMask (3 << FSR_WriteAccBit) -+#define FSR_BlkDataTyMask (3 << FSR_BlkDataTy0Bit) -+#define FSR_PciAccTyMask (3 << FSR_ReadLineBit) -+#define FSR_Walking (0x1 << 13) -+#define FSR_Level_Mask (0x3 << 14) -+#define FSR_ProtFault (0x1 << 16) -+#define FSR_FaultPTEType (0x2 << 17) -+#define FSR_AddrSizeError (0x1 << 19) -+#define FSR_VProcSizeError (0x1 << 20) -+#define FSR_WalkBadData (0x1 << 21) -+ -+#define FSR_PermRead 0 -+#define FSR_PermWrite 1 -+#define FSR_PermRemoteRead 2 -+#define FSR_PermRemoteWrite 3 -+#define FSR_PermEventRd 4 -+#define FSR_PermEventWr 5 -+#define FSR_PermRemoteEventRd 6 -+#define FSR_PermRemoteEventWr 7 -+ -+/* AT size values for each access type */ -+#define FSR_Word (0x0 << FSR_Size0Bit) -+#define FSR_DWord (0x1 << FSR_Size0Bit) -+#define FSR_QWord (0x2 << FSR_Size0Bit) -+#define FSR_Block32 (0x3 << FSR_Size0Bit) -+#define FSR_ReservedBlock (0x6 << FSR_Size0Bit) -+#define FSR_Block64 (0x7 << FSR_Size0Bit) -+#define FSR_GetCntxFilter (0x8 << FSR_Size0Bit) -+#define FSR_QueueDWord (0x9 << FSR_Size0Bit) -+#define FSR_RouteFetch (0xa << FSR_Size0Bit) -+#define FSR_QueueBlock (0xb << FSR_Size0Bit) -+#define FSR_Block32PartWrite (0xe << FSR_Size0Bit) -+#define FSR_Block64PartWrite (0xf << FSR_Size0Bit) -+ -+#define FSR_AllocRead (0 << FSR_WriteAccBit) -+#define FSR_AllocWrite (1 << FSR_WriteAccBit) -+#define FSR_NonAllocRd (2 << FSR_WriteAccBit) -+#define FSR_NonAllocWr (3 << FSR_WriteAccBit) -+ -+#define FSR_TypeByte (0 << FSR_BlkDataTy0Bit) -+#define FSR_TypeHWord (1 << FSR_BlkDataTy0Bit) -+#define FSR_TypeWord (2 << FSR_BlkDataTy0Bit) -+#define FSR_TypeDWord (3 << FSR_BlkDataTy0Bit) -+ -+typedef union E3_TrTypeCntx -+{ -+ E3_uint32 TypeContext; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 Type:16; /* Transaction type field */ -+ E3_uint32 Context:13; /* Transaction context */ -+ E3_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E3_uint32 StatusRegValid:1; /* Bit 30 */ -+ E3_uint32 LastTrappedTrans:1; /* Bit 31 */ -+#else -+ E3_uint32 LastTrappedTrans:1; /* Bit 31 */ -+ E3_uint32 StatusRegValid:1; /* Bit 30 */ -+ E3_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E3_uint32 Context:13; /* Transaction context */ -+ E3_uint32 Type:16; /* Transaction type field */ -+#endif -+ } s; -+} E3_TrTypeCntx; -+ -+#define GET_TRAP_TYPE(Ptr) (Ptr.TypeContext & 0xfff) -+#define GET_TRAP_CONTEXT(Ptr) ((Ptr.TypeContext >> 16) & 0x1fff) -+ -+/* Words have been swapped for big endian access when fetched with dword access from elan.*/ -+typedef union _E3_IprocTrapHeader -+{ -+ E3_uint64 forceAlign; -+ -+ struct -+ { -+ E3_TrTypeCntx TrTypeCntx; -+ E3_uint32 TrAddr; -+ E3_uint32 TrData0; -+ union -+ { -+ E3_IProcStatus_Reg u_IProcStatus; -+ E3_uint32 u_TrData1; -+ } ipsotd; -+ } s; -+} E3_IprocTrapHeader; -+ -+#define IProcTrapStatus ipsotd.u_IProcStatus -+#define TrData1 ipsotd.u_TrData1 -+ -+typedef struct E3_IprocTrapData -+{ -+ E3_uint32 TrData[TRANS_DATA_WORDS]; -+} E3_IprocTrapData; -+ -+/* -+ * 64 kbytes of elan local memory. Must be aligned on a 64k boundary -+ */ -+#define E3_NonSysCntxQueueSize 0x400 -+#define E3_SysCntxQueueSize 0x100 -+ -+typedef struct _E3_TrapAndQueue -+{ -+ E3_DMA NonSysCntxDmaQueue[E3_NonSysCntxQueueSize]; /* 0x000000 */ -+ E3_DMA SysCntxDmaQueue[E3_SysCntxQueueSize]; /* 0x008000 */ -+ E3_EventInt EventIntQueue[E3_NonSysCntxQueueSize]; /* 0x00A000 */ -+ E3_ThreadQueue NonSysCntxThreadQueue[E3_NonSysCntxQueueSize]; /* 0x00C000 */ -+ E3_ThreadQueue SysCntxThreadQueue[E3_SysCntxQueueSize]; /* 0x00E000 */ -+ E3_FaultSave IProcSysCntx; /* 0x00E800 */ -+ E3_Addr Thread_SP_Save; /* 0x00E810 */ -+ E3_uint32 dummy0[3]; /* 0x00E814 */ -+ E3_FaultSave ThreadProcData; /* 0x00E820 */ -+ E3_FaultSave ThreadProcInst; /* 0x00E830 */ -+ E3_FaultSave dummy1[2]; /* 0x00E840 */ -+ E3_FaultSave ThreadProcOpen; /* 0x00E860 */ -+ E3_FaultSave dummy2; /* 0x00E870 */ -+ E3_FaultSave IProcNonSysCntx; /* 0x00E880 */ -+ E3_FaultSave DProc; /* 0x00E890 */ -+ E3_FaultSave CProc; /* 0x00E8A0 */ -+ E3_FaultSave TProc; /* 0x00E8B0 */ -+ E3_FaultSave DProcData0; /* 0x00E8C0 */ -+ E3_FaultSave DProcData1; /* 0x00E8D0 */ -+ E3_FaultSave DProcData2; /* 0x00E8E0 */ -+ E3_FaultSave DProcData3; /* 0x00E8F0 */ -+ E3_uint32 dummy3[0xc0]; /* 0x00E900 */ -+ E3_IprocTrapHeader VCh0_C0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh0_NonC0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh1_C0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapHeader VCh1_NonC0_TrHead[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh0_C0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh0_NonC0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh1_C0_TrData[MAX_TRAPPED_TRANS]; -+ E3_IprocTrapData VCh1_NonC0_TrData[MAX_TRAPPED_TRANS]; -+ E3_uint64 DmaOverflowQueueSpace[0x1000]; -+ E3_uint64 ThreadOverflowQueueSpace[0x800]; -+ E3_uint64 EventOverflowQueueSpace[0x800]; -+} E3_TrapAndQueue; -+ -+ -+typedef struct _E3_ContextControlBlock -+{ -+ E3_uint32 rootPTP; -+ E3_uint32 filter; -+ E3_uint32 VPT_ptr; -+ E3_uint32 VPT_mask; -+} E3_ContextControlBlock; -+ -+#define E3_CCB_CNTX0 (0x20000000) -+#define E3_CCB_DISCARD_ALL (0x40000000) -+#define E3_CCB_ACKOK_ALL (0x80000000) -+#define E3_CCB_MASK (0xc0000000) -+ -+#define E3_NUM_CONTEXT_0 (0x20) -+ -+/* Macros to manipulate event queue pointers */ -+/* generate index in EventIntQueue */ -+#define E3_EVENT_INTQ_INDEX(fptr) (((fptr) & 0x1fff) >> 3) -+/* generate next fptr */ -+#define E3_EVENT_INTQ_NEXT(fptr) ((((fptr) + 8) & ~0x4000) | 0x2000) -+ -+ -+#endif /* notdef _ELAN3_ELANREGS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elansyscall.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elansyscall.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elansyscall.h 2005-07-28 14:52:52.949663384 -0400 -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANSYSCALL_H -+#define __ELAN3_ELANSYSCALL_H -+ -+#ident "$Id: elansyscall.h,v 1.34 2004/06/07 13:50:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elansyscall.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#ifndef _ASM -+ -+typedef struct sys_word_item -+{ -+ struct sys_word_item *Next; -+ E3_uint32 Value; -+} SYS_WORD_ITEM; -+ -+typedef struct sys_block_item -+{ -+ struct sys_block_item *Next; -+ E3_uint32 *Pointer; -+} SYS_BLOCK_ITEM; -+ -+typedef struct sys_swap_space -+{ -+ int Magic; -+ void *ItemListsHead[MAX_LISTS]; -+ void **ItemListsTailp[MAX_LISTS]; -+} SYS_SWAP_SPACE; -+ -+typedef struct sys_exception -+{ -+ int Type; -+ int Proc; -+ u_long Res; -+ u_long Value; -+ E3_FaultSave_BE FaultArea; -+ -+ union -+ { -+ DMA_TRAP Dma; -+ THREAD_TRAP Thread; -+ COMMAND_TRAP Command; -+ INPUT_TRAP Input; -+ } Union; -+} SYS_EXCEPTION; -+ -+typedef struct sys_exception_space -+{ -+ struct sys_exception_space *Next; -+ int Magic; -+ int Front; -+ int Back; -+ int Count; -+ int Overflow; -+ SYS_EXCEPTION Exceptions[1]; -+} SYS_EXCEPTION_SPACE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct sys_ctxt -+{ -+ SYS_SWAP_SPACE *Swap; -+ SYS_EXCEPTION_SPACE *Exceptions; -+ kmutex_t Lock; -+ -+ spinlock_t WaitLock; -+ kcondvar_t NetworkErrorWait; -+ -+ int Armed; -+ int Backoff; -+ long Time; -+ -+ u_long Flags; -+ int signal; -+ -+ EVENT_COOKIE_TABLE *Table; -+} SYS_CTXT; -+ -+extern SYS_CTXT *sys_init (ELAN3_CTXT *ctxt); -+extern int sys_waitevent (ELAN3_CTXT *ctxt, E3_Event *event); -+extern void sys_addException (SYS_CTXT *sctx, int type, int proc, caddr_t ptr, int size, -+ E3_FaultSave_BE *, u_long res, u_long value); -+extern int sys_getException (SYS_CTXT *sctx, SYS_EXCEPTION *ex); -+ -+/* returns -ve error or ELAN_CAP_OK or ELAN_CAP_RMS */ -+/* use = ELAN_USER_ATTACH, ELAN_USER_P2P, ELAN_USER_BROADCAST */ -+extern int elan3_validate_cap (ELAN3_DEV *dev, ELAN_CAPABILITY *cap ,int use); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _ASM */ -+ -+/* values for "Flags" */ -+#define ELAN3_SYS_FLAG_DMA_BADVP 1 -+#define ELAN3_SYS_FLAG_THREAD_BADVP 2 -+#define ELAN3_SYS_FLAG_DMAFAIL 4 -+#define ELAN3_SYS_FLAG_NETERR 8 -+ -+#define SYS_SWAP_MAGIC 0xB23C52DF -+#define SYS_EXCEPTION_MAGIC 0xC34D63E0 -+ -+#define EXCEPTION_GLOBAL_STRING "elan3_exceptions" -+#define EXCEPTION_ABORT_STRING "elan3_abortstring" -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELANSYSCALL_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elanuregs.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elanuregs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elanuregs.h 2005-07-28 14:52:52.950663232 -0400 -@@ -0,0 +1,295 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_ELANUREGS_H -+#define __ELAN3_ELANUREGS_H -+ -+#ident "$Id: elanuregs.h,v 1.10 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanuregs.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Statistic control reg values -+ * Each 4-bit nibble of the control word specifies what statistic -+ * is to be recorded in each of the 8 statistic counters -+ */ -+ -+/* Count reg 0 */ -+#define STC_INPUT_TRANSACTIONS 0 -+#define STP_DMA_EOP_WAIT_ACK 1 -+#define STP_THREAD_RUNNING 2 -+#define STP_UCODE_WAIT_MEM 3 -+#define STC_CACHE_WRITE_BACKS 4 -+#define STC_PCI_SLAVE_READS 5 -+#define STC_REG0_UNUSED6 6 -+#define STP_REG0_UNUSED7 7 -+ -+#define STATS_REG0_NAMES { \ -+ "STC_INPUT_TRANSACTIONS", \ -+ "STP_DMA_EOP_WAIT_ACK", \ -+ "STP_THREAD_RUNNING", \ -+ "STP_UCODE_WAIT_MEM", \ -+ "STC_CACHE_WRITE_BACKS", \ -+ "STC_PCI_SLAVE_READS", \ -+ "STC_REG0_UNUSED6", \ -+ "STP_REG0_UNUSED7" \ -+} -+ -+/* Count reg 1 */ -+#define STC_INPUT_WRITE_BLOCKS (0 << 4) -+#define STP_DMA_DATA_TRANSMITTING (1 << 4) -+#define STP_THEAD_WAITING_INST (2 << 4) -+#define STC_REG1_UNUSED3 (3 << 4) -+#define STP_FETCHING_ROUTES (4 << 4) -+#define STC_REG1_UNUSED5 (5 << 4) -+#define STC_PCI_SLAVE_WRITES (6 << 4) -+#define STP_PCI_SLAVE_READ_WAITING (7 << 4) -+ -+#define STATS_REG1_NAMES { \ -+ "STC_INPUT_WRITE_BLOCKS", \ -+ "STP_DMA_DATA_TRANSMITTING", \ -+ "STP_THEAD_WAITING_INST", \ -+ "STC_REG1_UNUSED3", \ -+ "STP_FETCHING_ROUTES", \ -+ "STC_REG1_UNUSED5", \ -+ "STC_PCI_SLAVE_WRITES", \ -+ "STP_PCI_SLAVE_READ_WAITING" \ -+} -+ -+/* Count reg 2 */ -+#define STC_INPUT_PKTS (0 << 8) -+#define STP_DMA_WAITING_MEM (1 << 8) -+#define STP_THREAD_WAIT_OPEN_PKT (2 << 8) -+#define STC_REG2_UNUSED3 (3 << 8) -+#define STC_ROUTE_FETCHES (4 << 8) -+#define STC_CACHE_NON_ALLOC_MISSES (5 << 8) -+#define STC_REG2_UNUSED6 (6 << 8) -+#define STP_PCI_SLAVE_WRITE_WAITING (7 << 8) -+ -+#define STATS_REG2_NAMES { \ -+ "STC_INPUT_PKTS", \ -+ "STP_DMA_WAITING_MEM", \ -+ "STP_THREAD_WAIT_OPEN_PKT", \ -+ "STC_REG2_UNUSED3", \ -+ "STC_ROUTE_FETCHES", \ -+ "STC_CACHE_NON_ALLOC_MISSES", \ -+ "STC_REG2_UNUSED6", \ -+ "STP_PCI_SLAVE_WRITE_WAITING" \ -+} -+ -+/* Count reg 3 */ -+#define STC_INPUT_PKTS_REJECTED (0 << 12) -+#define STP_DMA_WAIT_NETWORK_BUSY (1 << 12) -+#define STP_THREAD_WAIT_PACK (2 << 12) -+#define STP_UCODE_BLOCKED_UCODE (3 << 12) -+#define STC_TLB_HITS (4 << 12) -+#define STC_REG3_UNUSED5 (5 << 12) -+#define STC_PCI_MASTER_READS (6 << 12) -+#define STP_PCI_MASTER_WRITE_WAITING (7 << 12) -+ -+#define STATS_REG3_NAMES { \ -+ "STC_INPUT_PKTS_REJECTED", \ -+ "STP_DMA_WAIT_NETWORK_BUSY", \ -+ "STP_THREAD_WAIT_PACK", \ -+ "STP_UCODE_BLOCKED_UCODE", \ -+ "STC_TLB_HITS", \ -+ "STC_REG3_UNUSED5", \ -+ "STC_PCI_MASTER_READS", \ -+ "STP_PCI_MASTER_WRITE_WAITING"\ -+} -+ -+/* Count reg 4 */ -+#define STP_INPUT_DATA_TRANSMITTING (0 << 16) -+#define STC_DMA_NON_CTX0_PKTS (1 << 16) -+#define STP_THREAD_EOP_WAIT_ACK (2 << 16) -+#define STP_UCODE_DPROC_RUNNING (3 << 16) -+#define STC_TLB_MEM_WALKS (4 << 16) -+#define STC_REG4_UNUSED5 (5 << 16) -+#define STC_PCI_MASTER_WRITES (6 << 16) -+#define STP_PCI_MASTER_READ_WAITING (7 << 16) -+ -+#define STATS_REG4_NAMES { \ -+ "STP_INPUT_DATA_TRANSMITTING", \ -+ "STC_DMA_NON_CTX0_PKTS", \ -+ "STP_THREAD_EOP_WAIT_ACK", \ -+ "STP_UCODE_DPROC_RUNNING", \ -+ "STC_TLB_MEM_WALKS", \ -+ "STC_REG4_UNUSED5", \ -+ "STC_PCI_MASTER_WRITES", \ -+ "STP_PCI_MASTER_READ_WAITING" \ -+} -+ -+/* Count reg 5 */ -+#define STP_INPUT_WAITING_NETWORK_DATA (0 << 20) -+#define STC_DMA_NON_CTX0_PKTS_REJECTED (1 << 20) -+#define STP_THREAD_WAITING_DATA (2 << 20) -+#define STP_UCODE_CPROC_RUNNING (3 << 20) -+#define STP_THREAD_TRANSMITTING_DATA (4 << 20) -+#define STP_PCI_WAITING_MAIN (5 << 20) -+#define STC_REG5_UNUSED6 (6 << 20) -+#define STC_REG5_UNUSED7 (7 << 20) -+ -+#define STATS_REG5_NAMES { \ -+ "STP_INPUT_WAITING_NETWORK_DATA", \ -+ "STC_DMA_NON_CTX0_PKTS_REJECTED", \ -+ "STP_THREAD_WAITING_DATA", \ -+ "STP_UCODE_CPROC_RUNNING", \ -+ "STP_THREAD_TRANSMITTING_DATA", \ -+ "STP_PCI_WAITING_MAIN", \ -+ "STC_REG5_UNUSED6", \ -+ "STC_REG5_UNUSED7" \ -+} -+ -+/* Count reg 6 */ -+#define STP_INPUT_WAITING_MEMORY (0 << 24) -+#define STC_DMA_CTX0_PKTS (1 << 24) -+#define STP_THREAD_WAITING_MEMORY (2 << 24) -+#define STP_UCODE_TPROC_RUNNING (3 << 24) -+#define STC_CACHE_HITS (4 << 24) -+#define STP_PCI_WAITING_ELAN (5 << 24) -+#define STC_REG6_UNUSED4 (6 << 24) -+#define STC_REG6_UNUSED7 (7 << 24) -+ -+#define STATS_REG6_NAMES { \ -+ "STP_INPUT_WAITING_MEMORY", \ -+ "STC_DMA_CTX0_PKTS", \ -+ "STP_THREAD_WAITING_MEMORY", \ -+ "STP_UCODE_TPROC_RUNNING", \ -+ "STC_CACHE_HITS", \ -+ "STP_PCI_WAITING_ELAN", \ -+ "STC_REG6_UNUSED4", \ -+ "STC_REG6_UNUSED7" \ -+} -+ -+/* Count reg 7 */ -+#define STC_INPUT_CTX_FILTER_FILL (0 << 28) -+#define STC_DMA_CTX0_PKTS_REJECTED (1 << 28) -+#define STP_THREAD_WAIT_NETWORK_BUSY (2 << 28) -+#define STP_UCODE_IPROC_RUNNING (3 << 28) -+#define STP_TLB_MEM_WALKING (4 << 28) -+#define STC_CACHE_ALLOC_MISSES (5 << 28) -+#define STP_PCI_DATA_TRANSFER (6 << 28) -+#define STC_REG7_UNUSED7 (7 << 28) -+ -+#define STATS_REG7_NAMES { \ -+ "STC_INPUT_CTX_FILTER_FILL", \ -+ "STC_DMA_CTX0_PKTS_REJECTED", \ -+ "STP_THREAD_WAIT_NETWORK_BUSY",\ -+ "STP_UCODE_IPROC_RUNNING", \ -+ "STP_TLB_MEM_WALKING", \ -+ "STC_CACHE_ALLOC_MISSES", \ -+ "STP_PCI_DATA_TRANSFER", \ -+ "STC_REG7_UNUSED7" \ -+} -+ -+#define STATS_REG_NAMES { \ -+ STATS_REG0_NAMES, \ -+ STATS_REG1_NAMES, \ -+ STATS_REG2_NAMES, \ -+ STATS_REG3_NAMES, \ -+ STATS_REG4_NAMES, \ -+ STATS_REG5_NAMES, \ -+ STATS_REG6_NAMES, \ -+ STATS_REG7_NAMES, \ -+} -+ -+extern const char *elan3_stats_names[8][8]; -+ -+#define ELAN3_STATS_NAME(COUNT, CONTROL) (elan3_stats_names[(COUNT)][(CONTROL) & 7]) -+ -+typedef volatile union e3_StatsControl -+{ -+ E3_uint32 StatsControl; -+ struct -+ { -+#if defined(__LITTLE_ENDIAN__) -+ E3_uint32 StatCont0:4; -+ E3_uint32 StatCont1:4; -+ E3_uint32 StatCont2:4; -+ E3_uint32 StatCont3:4; -+ E3_uint32 StatCont4:4; -+ E3_uint32 StatCont5:4; -+ E3_uint32 StatCont6:4; -+ E3_uint32 StatCont7:4; -+#else -+ E3_uint32 StatCont7:4; -+ E3_uint32 StatCont6:4; -+ E3_uint32 StatCont5:4; -+ E3_uint32 StatCont4:4; -+ E3_uint32 StatCont3:4; -+ E3_uint32 StatCont2:4; -+ E3_uint32 StatCont1:4; -+ E3_uint32 StatCont0:4; -+#endif -+ } s; -+} E3_StatsControl; -+ -+typedef volatile union e3_StatsCount -+{ -+ E3_uint64 ClockStat; -+ struct -+ { -+ E3_uint32 ClockLSW; /* read only */ -+ E3_uint32 StatsCount; -+ } s; -+} E3_StatsCount; -+ -+typedef volatile union e3_clock -+{ -+ E3_uint64 NanoSecClock; -+ struct -+ { -+ E3_uint32 ClockLSW; -+ E3_uint32 ClockMSW; -+ } s; -+} E3_Clock; -+#define E3_TIME( X ) ((X).NanoSecClock) -+ -+typedef volatile struct _E3_User_Regs -+{ -+ E3_StatsCount StatCounts[8]; -+ E3_StatsCount InstCount; -+ E3_uint32 pad0; -+ E3_StatsControl StatCont; -+ E3_Clock Clock; -+ E3_uint32 pad1[0x7ea]; -+} E3_User_Regs; -+ -+typedef volatile struct _E3_CommandPort -+{ -+ E3_Addr PutDma; /* 0x000 */ -+ E3_uint32 Pad1; -+ E3_Addr GetDma; /* 0x008 */ -+ E3_uint32 Pad2; -+ E3_Addr RunThread; /* 0x010 */ -+ E3_uint32 Pad3[3]; -+ E3_Addr WaitEvent0; /* 0x020 */ -+ E3_uint32 Pad4; -+ E3_Addr WaitEvent1; /* 0x028 */ -+ E3_uint32 Pad5; -+ E3_Addr SetEvent; /* 0x030 */ -+ E3_uint32 Pad6[3]; -+ E3_uint32 Pad7[0x7f0]; /* Fill out to an 8K page */ -+} E3_CommandPort; -+/* Should have the new structures for the top four pages of the elan3 space */ -+ -+#define E3_COMMANDPORT_SIZE (sizeof (E3_CommandPort)) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_ELANUREGS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/elanvp.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/elanvp.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/elanvp.h 2005-07-28 14:52:52.950663232 -0400 -@@ -0,0 +1,165 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_ELANVP_H -+#define _ELAN3_ELANVP_H -+ -+#ident "$Id: elanvp.h,v 1.45 2004/06/18 09:28:06 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/elanvp.h,v $ */ -+ -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Context number allocation. -+ * [0-31] system contexts -+ * [32-63] hardware test -+ * [64-1023] available -+ * [1024-2047] RMS allocatable -+ * [2048-4095] kernel comms data contexts -+ */ -+#define ELAN3_KCOMM_CONTEXT_NUM 0x001 /* old kernel comms context (system) */ -+#define ELAN3_CM_CONTEXT_NUM 0x002 /* new cluster member ship comms context (system) */ -+#define ELAN3_MRF_CONTEXT_NUM 0x003 /* multi-rail kernel comms context */ -+#define ELAN3_DMARING_BASE_CONTEXT_NUM 0x010 /* 16 contexts for dma ring issue (system) */ -+#define ELAN3_DMARING_TOP_CONTEXT_NUM 0x01f -+ -+#define ELAN3_HWTEST_BASE_CONTEXT_NUM 0x020 /* reserved for hardware test */ -+#define ELAN3_HWTEST_TOP_CONTEXT_NUM 0x03f -+ -+#define ELAN3_KCOMM_BASE_CONTEXT_NUM 0x800 /* kernel comms data transfer contexts */ -+#define ELAN3_KCOMM_TOP_CONTEXT_NUM 0xfff -+ -+#define ELAN3_HWTEST_CONTEXT(ctx) ((ctx) >= ELAN3_HWTEST_BASE_CONTEXT_NUM && \ -+ (ctx) <= ELAN3_HWTEST_TOP_CONTEXT_NUM) -+ -+#define ELAN3_SYSTEM_CONTEXT(ctx) (((ctx) & SYS_CONTEXT_BIT) != 0 || \ -+ (ctx) < E3_NUM_CONTEXT_0 || \ -+ (ctx) >= ELAN3_KCOMM_BASE_CONTEXT_NUM) -+ -+/* Maximum number of virtual processes */ -+#define ELAN3_MAX_VPS (16384) -+ -+#define ELAN3_INVALID_PROCESS (0x7fffffff) /* A GUARANTEED invalid process # */ -+#define ELAN3_INVALID_NODE (0xFFFF) -+#define ELAN3_INVALID_CONTEXT (0xFFFF) -+ -+ -+ -+#if defined(__KERNEL__) && !defined(__ELAN3__) -+ -+/* -+ * Contexts are accessible via Elan capabilities, -+ * for each context that can be "attached" to there -+ * is a ELAN3_CTXT_INFO structure created by its -+ * "owner". This also "remembers" all remote -+ * segments that have "blazed" a trail to it. -+ * -+ * If the "owner" goes away the soft info is -+ * destroyed when it is no longer "attached" or -+ * "referenced" by a remote segment. -+ * -+ * If the owner changes the capability, then -+ * the soft info must be not "referenced" or -+ * "attached" before a new process can "attach" -+ * to it. -+ */ -+ -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::InfoLock, -+ elan3_info::Next elan3_info::Prev elan3_info::Device elan3_info::Owner -+ elan3_info::Capability elan3_info::AttachedCapability elan3_info::Context)) -+_NOTE(MUTEX_PROTECTS_DATA(elan3_dev::IntrLock, -+ elan3_info::Nacking elan3_info::Disabled)) -+_NOTE(DATA_READABLE_WITHOUT_LOCK(elan3_info::Context elan3_info::Device elan3_info::Capability)) -+ -+#endif /* __KERNEL__ */ -+ -+#define LOW_ROUTE_PRIORITY 0 -+#define HIGH_ROUTE_PRIORITY 1 -+ -+#define DEFAULT_ROUTE_TIMEOUT 3 -+#define DEFAULT_ROUTE_PRIORITY LOW_ROUTE_PRIORITY -+ -+ -+/* a small route is 4 flits (8 bytes), a big route */ -+/* is 8 flits (16 bytes) - each packed route is 4 bits */ -+/* so giving us a maximum of 28 as flit0 does not contain */ -+/* packed routes */ -+#define MAX_FLITS 8 -+#define MAX_PACKED 28 -+ -+/* bit definitions for 64 bit route pointer */ -+#define ROUTE_VALID (1ULL << 63) -+#define ROUTE_PTR (1ULL << 62) -+#define ROUTE_CTXT_SHIFT 48 -+#define ROUTE_PTR_MASK ((1ull << ROUTE_CTXT_SHIFT)-1) -+#define ROUTE_GET_CTXT ((VAL >> ROUTE_CTXT_SHIFT) & 0x3fff ) -+ -+#define SMALL_ROUTE(flits, context) (((E3_uint64) (flits)[0] << 0) | ((E3_uint64) (flits)[1] << 16) | \ -+ ((E3_uint64) (flits)[2] << 32) | ((E3_uint64) (context) << ROUTE_CTXT_SHIFT) | \ -+ ROUTE_VALID) -+ -+#define BIG_ROUTE_PTR(paddr, context) ((E3_uint64) (paddr) | ((E3_uint64) context << ROUTE_CTXT_SHIFT) | ROUTE_VALID | ROUTE_PTR) -+ -+#define BIG_ROUTE0(flits) (((E3_uint64) (flits)[0] << 0) | ((E3_uint64) (flits)[1] << 16) | \ -+ ((E3_uint64) (flits)[2] << 32) | ((E3_uint64) (flits)[3] << 48)) -+#define BIG_ROUTE1(flits) (((E3_uint64) (flits)[4] << 0) | ((E3_uint64) (flits)[5] << 16) | \ -+ ((E3_uint64) (flits)[6] << 32) | ((E3_uint64) (flits)[7] << 48)) -+ -+ -+/* defines for first flit of a route */ -+#define FIRST_HIGH_PRI (1 << 15) -+#define FIRST_AGE(Val) ((Val) << 11) -+#define FIRST_TIMEOUT(Val) ((Val) << 9) -+#define FIRST_PACKED(X) ((X) << 7) -+#define FIRST_ROUTE(Val) (Val) -+#define FIRST_ADAPTIVE (0x30) -+#define FIRST_BCAST_TREE (0x20) -+#define FIRST_MYLINK (0x10) -+#define FIRST_BCAST(Top, Bot) (0x40 | ((Top) << 3) | (Bot)) -+ -+/* defines for 3 bit packed entries for subsequent flits */ -+#define PACKED_ROUTE(Val) (8 | (Val)) -+#define PACKED_ADAPTIVE (3) -+#define PACKED_BCAST_TREE (2) -+#define PACKED_MYLINK (1) -+#define PACKED_BCAST0(Top,Bot) (4 | (Bot & 3)) -+#define PACKED_BCAST1(Top,Bot) ((Top << 1) | (Bot >> 2)) -+ -+/* ---------------------------------------------------------- -+ * elan3_route functions -+ * return ELAN3_ROUTE_xxx codes -+ * ---------------------------------------------------------- */ -+ -+#define ELAN3_ROUTE_SUCCESS (0x00) -+#define ELAN3_ROUTE_SYSCALL_FAILED (0x01) -+#define ELAN3_ROUTE_INVALID (0x02) -+#define ELAN3_ROUTE_TOO_LONG (0x04) -+#define ELAN3_ROUTE_LOAD_FAILED (0x08) -+#define ELAN3_ROUTE_PROC_RANGE (0x0f) -+#define ELAN3_ROUTE_INVALID_LEVEL (0x10) -+#define ELAN3_ROUTE_OCILATES (0x20) -+#define ELAN3_ROUTE_WRONG_DEST (0x40) -+#define ELAN3_ROUTE_TURN_LEVEL (0x80) -+#define ELAN3_ROUTE_NODEID_UNKNOWN (0xf0) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _ELAN3_ELANVP_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/events.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/events.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/events.h 2005-07-28 14:52:52.951663080 -0400 -@@ -0,0 +1,183 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_EVENTS_H -+#define _ELAN3_EVENTS_H -+ -+#ident "$Id: events.h,v 1.45 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/events.h,v $*/ -+ -+/* -+ * Alignments for events, event queues and blockcopy blocks. -+ */ -+#define E3_EVENT_ALIGN (8) -+#define E3_QUEUE_ALIGN (32) -+#define E3_BLK_ALIGN (64) -+#define E3_BLK_SIZE (64) -+#define E3_BLK_PATTERN (0xfeedface) -+ -+#define E3_EVENT_FREE ((0 << 4) | EV_WCOPY) -+#define E3_EVENT_PENDING ((1 << 4) | EV_WCOPY) -+#define E3_EVENT_ACTIVE ((2 << 4) | EV_WCOPY) -+#define E3_EVENT_FIRED ((3 << 4) | EV_WCOPY) -+#define E3_EVENT_FAILED ((4 << 4) | EV_WCOPY) -+#define E3_EVENT_DONE ((5 << 4) | EV_WCOPY) -+#define E3_EVENT_PRIVATE ((6 << 4) | EV_WCOPY) -+ -+/* -+ * Event values and masks -+ * -+ * Block Copy event xxxxxxxxxxxxxxxx1 -+ * Chained event 30 bit ptr ....0x -+ * Event interrupt 29 bit cookie 01x -+ * Dma event 28 bit ptr 011x -+ * thread event 28 bit ptr 111x -+ */ -+#define EV_CLEAR (0x00000000) -+#define EV_TYPE_BCOPY (0x00000001) -+#define EV_TYPE_CHAIN (0x00000000) -+#define EV_TYPE_EVIRQ (0x00000002) -+#define EV_TYPE_DMA (0x00000006) -+#define EV_TYPE_THREAD (0x0000000e) -+ -+#define EV_TYPE_BCOPY_BYTE (0) -+#define EV_TYPE_BCOPY_HWORD (1) -+#define EV_TYPE_BCOPY_WORD (2) -+#define EV_TYPE_BCOPY_DWORD (3) -+ -+/* -+ * Data type is in the lowest two bits of the Dest pointer. -+ */ -+#define EV_BCOPY_DTYPE_MASK (3) -+#define EV_WCOPY (1) /* [DestWord] = Source */ -+#define EV_BCOPY (0) /* [DestBlock] = [SourceBlock] */ -+ -+#define EV_TYPE_MASK (0x0000000e) -+#define EV_TYPE_MASK_BCOPY (0x00000001) -+#define EV_TYPE_MASK_CHAIN (0x00000002) -+#define EV_TYPE_MASK_EVIRQ (0x00000006) -+#define EV_TYPE_MASK_DMA (0x0000000e) -+#define EV_TYPE_MASK_THREAD (0x0000000e) -+#define EV_TYPE_MASK2 (0x0000000f) -+ -+/* -+ * Min/Max size for Elan queue entries -+ */ -+#define E3_QUEUE_MIN E3_BLK_SIZE -+#define E3_QUEUE_MAX (E3_BLK_SIZE * 5) -+ -+/* -+ * Elan queue state bits -+ */ -+#define E3_QUEUE_FULL (1<<0) -+#define E3_QUEUE_LOCKED (1<<8) -+ -+#ifndef _ASM -+ -+typedef union _E3_Event -+{ -+ E3_uint64 ev_Int64; -+ struct { -+ volatile E3_int32 u_Count; -+ E3_uint32 u_Type; -+ } ev_u; -+} E3_Event; -+ -+typedef union _E3_BlockCopyEvent -+{ -+ E3_uint64 ev_ForceAlign; -+ struct E3_BlockCopyEvent_u { -+ volatile E3_int32 u_Count; -+ E3_uint32 u_Type; -+ E3_Addr u_Source; -+ E3_Addr u_Dest; /* lowest bits are the data type for endian conversion */ -+ } ev_u; -+} E3_BlockCopyEvent; -+ -+#define ev_Type ev_u.u_Type -+#define ev_Count ev_u.u_Count -+#define ev_Source ev_u.u_Source -+#define ev_Dest ev_u.u_Dest -+ -+typedef union _E3_WaitEvent0 -+{ -+ E3_uint64 we_ForceAlign; -+ struct { -+ E3_Addr u_EventLoc; -+ E3_int32 u_WaitCount; -+ } we_u; -+} E3_WaitEvent0; -+#define we_EventLoc we_u.u_EventLoc -+#define we_WaitCount we_u.u_WaitCount -+ -+typedef union _E3_Event_Blk -+{ -+ E3_uint8 eb_Bytes[E3_BLK_SIZE]; -+ E3_uint32 eb_Int32[E3_BLK_SIZE/sizeof (E3_uint32)]; -+ E3_uint64 eb_Int64[E3_BLK_SIZE/sizeof (E3_uint64)]; -+} E3_Event_Blk; -+ -+/* We make eb_done the last word of the blk -+ * so that we can guarantee the rest of the blk is -+ * correct when this value is set. -+ * However, when the TPORT code copies the envelope -+ * info into the blk, it uses a dword endian type. -+ * Thus we must correct for this when initialising -+ * the pattern in the Elan SDRAM blk (eeb_done) -+ */ -+#define eb_done eb_Int32[15] -+#define eeb_done eb_Int32[15^WordEndianFlip] -+ -+#define EVENT_WORD_READY(WORD) (*((volatile E3_uint32 *) WORD) != 0) -+#define EVENT_BLK_READY(BLK) (((volatile E3_Event_Blk *) (BLK))->eb_done != 0) -+#define EVENT_READY(EVENT) (((volatile E3_Event *) (EVENT))->ev_Count <= 0) -+ -+#define ELAN3_WAIT_EVENT (0) -+#define ELAN3_POLL_EVENT (-1) -+ -+#define SETUP_EVENT_TYPE(ptr,typeval) (((unsigned long)(ptr)) | (typeval)) -+ -+#define E3_RESET_BCOPY_BLOCK(BLK) \ -+ do { \ -+ (BLK)->eb_done = 0; \ -+ } while (0) -+ -+typedef struct e3_queue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_Event q_event; /* queue event */ -+} E3_Queue; -+ -+typedef struct e3_blockcopy_queue -+{ -+ volatile E3_uint32 q_state; /* queue is full=bit0, queue is locked=bit8 */ -+ volatile E3_Addr q_bptr; /* block aligned ptr to current back item */ -+ E3_uint32 q_size; /* size of queue item; 0x1 <= size <= (0x40 * 5) */ -+ E3_Addr q_top; /* block aligned ptr to last queue item */ -+ E3_Addr q_base; /* block aligned ptr to first queue item */ -+ volatile E3_Addr q_fptr; /* block aligned ptr to current front item */ -+ E3_BlockCopyEvent q_event; /* queue event */ -+ E3_uint32 q_pad[6]; -+} E3_BlockCopyQueue; -+ -+#define E3_QUEUE_EVENT_OFFSET 24 -+#define QUEUE_FULL(Q) ((Q)->q_state & E3_QUEUE_FULL) -+ -+#endif /* ! _ASM */ -+ -+#endif /* _ELAN3_EVENTS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/intrinsics.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/intrinsics.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/intrinsics.h 2005-07-28 14:52:52.952662928 -0400 -@@ -0,0 +1,320 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_INTRINSICS_H -+#define _ELAN3_INTRINSICS_H -+ -+#ident "$Id: intrinsics.h,v 1.35 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/intrinsics.h,v $ */ -+ -+#include -+#include -+ -+/* -+ * This file contains definitions of the macros for accessing the QSW -+ * specific instructions, as if they were functions. -+ * The results from the function -+ */ -+ -+#define C_ACK_OK 0 /* return from c_close() */ -+#define C_ACK_TESTFAIL 1 /* return from c_close() */ -+#define C_ACK_DISCARD 2 /* return from c_close() */ -+#define C_ACK_ERROR 3 /* return from c_close() */ -+ -+/* -+ * Elan asi's for tproc block accesses -+ */ -+#define EASI_BYTE 0 -+#define EASI_HALF 1 -+#define EASI_WORD 2 -+#define EASI_DOUBLE 3 -+ -+#if defined(__ELAN3__) && !defined (_ASM) -+ -+extern inline void c_abort(void) -+{ -+ asm volatile (".word 0x0000 ! die you thread you " : : ); -+} -+ -+extern inline void c_suspend(void) -+{ -+ asm volatile ( -+ "set 1f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 1f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "suspend ! do the real suspend\n" -+ "1: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "suspend ! do the real suspend\n" : : ); -+} -+ -+extern inline int c_close(void) -+{ -+ register int rc asm("o0"); -+ -+ asm volatile ("close %0" : "=r" (rc) : ); -+ -+ return (rc); -+} -+ -+extern inline int c_close_cookie(volatile E3_uint32 *cookiep, E3_uint32 next) -+{ -+ register int rc asm("o0"); -+ -+ asm volatile ("close %0 ! close the packet\n" -+ "bz,a 1f ! ack received\n" -+ "st %1, [%2] ! update cookie on ack\n" -+ "1: ! label for not-ack\n" -+ : "=r" (rc) : "r" (next), "r" (cookiep)); -+ -+ return (rc); -+} -+ -+extern inline void c_break_busywait(void) -+{ -+ asm volatile ( -+ "breaktest ! test to see if break necessary\n" -+ "bpos 1f ! no other thread ready\n" -+ "nop ! delay slot\n" -+ "sub %%sp,3*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%g0,[%%sp+0] ! save the globals\n" -+ "stblock %%i0,[%%sp+8*4] ! save the ins\n" -+ "stblock %%l0,[%%sp+16*4] ! save the locals\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+16*4],%%l0 ! RevB bug fix. restore locals in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "ldblock [%%sp+16*4],%%l0 ! restore locals\n" -+ "4: ldblock [%%sp+8*4], %%i0 ! restore ins\n" -+ "ldblock [%%sp+0],%%g0 ! restore globals\n" -+ "add %%sp,3*8*4,%%sp ! restore stack pointer\n" -+ "1: " : : ); -+} -+ -+extern inline void c_break(void) -+{ -+ asm volatile ( -+ "breaktest ! test to see if break necessary\n" -+ "bne 1f ! haven't exceeded our inst count yet\n" -+ "nop ! delay slot\n" -+ "sub %%sp,3*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%g0,[%%sp+0] ! save the globals\n" -+ "stblock %%i0,[%%sp+8*4] ! save the ins\n" -+ "stblock %%l0,[%%sp+16*4] ! save the locals\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+16*4],%%l0 ! RevB bug fix. restore locals in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i6 ! RevB bug fix. data fetch of instructions\n" -+ "break ! do the real break\n" -+ "ldblock [%%sp+16*4],%%l0 ! restore locals\n" -+ "4: ldblock [%%sp+8*4], %%i0 ! restore ins\n" -+ "ldblock [%%sp+0],%%g0 ! restore globals\n" -+ "add %%sp,3*8*4,%%sp ! restore stack pointer\n" -+ "1: " : : ); -+} -+ -+extern inline void c_open( const int arg ) -+{ -+ asm volatile ("open %0" : : "r" (arg) ); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+ asm volatile ("nop; nop; nop; nop"); -+} -+ -+extern inline void c_waitevent( volatile E3_Event *const ptr, -+ const int count) -+{ -+ register volatile E3_Event *a_unlikely asm("o0") = ptr; -+ register int a_very_unlikely asm("o1") = count; -+ -+ asm volatile ( -+ "sub %%sp,1*8*4,%%sp ! Space to save the registers\n" -+ "stblock %%i0,[%%sp+0] ! save the ins\n" -+ "set 2f, %%i7 ! RevB bug fix. get address of the wakeup inst\n" -+ "andcc %%i7,0x4,%%g0 ! RevB bug fix. check alignment\n" -+ "bne 3f ! RevB bug fix. jump to other alignment\n" -+ "nop ! RevB bug fix. delay slot\n" -+ "ldd [%%i7],%%i4 ! RevB bug fix. data fetch of instructions\n" -+ "waitevent ! do the business\n" -+ "2: b 4f ! RevB bug fix. Branch over other alignment case\n" -+ " ldblock [%%sp+0],%%i0 ! RevB bug fix. restore ins in delay slot\n" -+ "3: add %%i7,5*4,%%i7 ! RevB bug fix. Point i7 to first ldblock\n" -+ "ldd [%%i7],%%i4 ! RevB bug fix. data fetch of instructions\n" -+ "waitevent ! do the business\n" -+ "ldblock [%%sp+0],%%i0 ! restore ins\n" -+ "4: add %%sp,1*8*4,%%sp ! restore stack pointer\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (a_unlikely), "r" (a_very_unlikely) -+ : /* clobbered */ "g0", "g1", "g2", "g3", "g4", "g5", "g6", "g7", -+ "l0", "l1", "l2", "l3", "l4", "l5", "l6", "l7" ); -+ -+} -+ -+#define c_sendtrans0(type,dest) \ -+ asm volatile ("sendtrans %0, %%g0, %1" : : "i" (type), "r" (dest)) -+ -+#define c_sendtrans1(type,dest,arg) \ -+ asm volatile ("sendtrans %0, %2, %1" : : "i" (type), "r" (dest), "r" (arg)) -+ -+#define c_sendtrans2(type,dest,arg1,arg2) \ -+ do { \ -+ register const unsigned long a_unlikely_1 asm("o4") = arg1; \ -+ register const unsigned long a_unlikely_2 asm("o5") = arg2; \ -+ asm volatile ("sendtrans %0, %2, %1" \ -+ : : "i" (type), "r" (dest), "r" (a_unlikely_1), "r" (a_unlikely_2)); \ -+ } while(0) -+ -+#define c_sendmem(type,dest,ptr) \ -+ asm volatile ("sendtrans %0, [%2], %1" : : "i" (type), "r" (dest), "r" (ptr)) -+ -+/* Copy a single 64-byte block (src blk is read using a BYTE endian type) */ -+extern inline void elan3_copy64b(void *src, void *dst) -+{ -+ /* Copy 64 bytes using ldblock/stblock -+ * We save and restore the locals/ins because if we don't gcc -+ * really makes a bad job of optimisising the rest of the thread code! -+ * -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ register void *tmp1 asm("g5") = src; -+ register void *tmp2 asm("g6") = dst; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "sub %%sp,2*8*4,%%sp ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ "ldblock64a [%0]%2,%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64a %%l0,[%1]%2 ! store 64-byte block from local/ins\n" -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7, %%sp ! undo alignment\n" -+ "add %%sp,2*8*4,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (tmp1), "r" (tmp2), "n" (EASI_BYTE) -+ : /* clobbered */ "g5", "g6", "g7" ); -+} -+ -+/* Copy a single 64-byte block (src blk is read using a WORD endian type) */ -+extern inline void elan3_copy64w(void *src, void *dst) -+{ -+ /* Copy 64 bytes using ldblock/stblock -+ * We save and restore the locals/ins because if we don't gcc -+ * really makes a bad job of optimisising the rest of the thread code! -+ * -+ * We force the parameters in g5, g6 so that they aren't -+ * trashed by the loadblk32 into the locals/ins -+ */ -+ register void *tmp1 asm("g5") = src; -+ register void *tmp2 asm("g6") = dst; -+ -+ asm volatile ( -+ "and %%sp,63,%%g7 ! Calculate stack alignment\n" -+ "sub %%sp,2*8*4,%%sp ! Space to save the registers\n" -+ "sub %%sp,%%g7,%%sp ! align stack\n" -+ "stblock64 %%l0,[%%sp] ! save the locals and ins\n" -+ "ldblock64a [%0]%2,%%l0 ! load 64-byte block into locals/ins\n" -+ "stblock64a %%l0,[%1]%2 ! store 64-byte block from local/ins\n" -+ "ldblock64 [%%sp],%%l0 ! restore locals and ins\n" -+ "add %%sp,%%g7, %%sp ! undo alignment\n" -+ "add %%sp,2*8*4,%%sp ! restore stack pointer\n" -+ : /* outputs */ -+ : /* inputs */ "r" (tmp1), "r" (tmp2), "n" (EASI_WORD) -+ : /* clobbered */ "g5", "g6", "g7" ); -+} -+ -+/* Read a 64-bit value with a WORD (32-bit) endian type */ -+extern inline E3_uint64 elan3_read64w( volatile E3_uint64 *const ptr ) -+{ -+ E3_uint64 result; -+ -+ asm volatile ( -+ "ldblock8a [%1]%2, %0\n" -+ : /* outputs */ "=r" (result) -+ : /* inputs */ "r" (ptr), "n" (EASI_WORD) ); -+ -+ return( result ); -+} -+ -+/* Read a 64-bit value with a DOUBLEWORD (64-bit) endian type */ -+extern inline E3_uint64 elan3_read64dw( volatile E3_uint64 *const ptr ) -+{ -+ E3_uint64 result; -+ -+ asm volatile ( -+ "ldblock8a [%1]%2, %0\n" -+ : /* outputs */ "=r" (result) -+ : /* inputs */ "r" (ptr), "n" (EASI_DOUBLE) ); -+ -+ return( result ); -+} -+ -+/* Write a 32-bit value with a WORD (32-bit) endian type */ -+extern inline void elan3_write64w( volatile E3_uint64 *const ptr, E3_uint64 value ) -+{ -+ asm volatile ( -+ "stblock8a %1, [%0]%2\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (ptr), "r" (value), "n" (EASI_WORD) ); -+} -+ -+/* Write a 64-bit value with a DOUBLEWORD (64-bit) endian type */ -+extern inline void elan3_write64dw( volatile E3_uint64 *const ptr, E3_uint64 value ) -+{ -+ asm volatile ( -+ "stblock8a %1, [%0]%2\n" -+ : /* no outputs */ -+ : /* inputs */ "r" (ptr), "r" (value), "n" (EASI_DOUBLE) ); -+} -+ -+extern inline E3_uint32 c_swap(volatile E3_uint32 *source, E3_uint32 result) -+{ -+ asm volatile("swap [%1],%0\n" -+ : "=r" (result) -+ : "r" (source) ,"0" (result) -+ : "memory"); -+ return result; -+} -+ -+extern inline E3_uint32 c_swap_save(volatile E3_uint32 *source, const E3_uint32 result) -+{ -+ register E3_uint32 a_unlikely; -+ asm volatile("" : "=r" (a_unlikely) : ); -+ -+ asm volatile("mov %2,%0; swap [%1],%0\n" -+ : "=r" (a_unlikely) -+ : "r" (source) ,"r" (result), "0" (a_unlikely) -+ : "memory"); -+ return a_unlikely; -+} -+#endif /* (__ELAN3__) && !(_ASM) */ -+ -+#endif /* _ELAN3_INTRINSICS_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/minames.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/minames.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/minames.h 2005-07-28 14:52:52.952662928 -0400 -@@ -0,0 +1,256 @@ -+{MI_WaitForRemoteDescRead, "MI_WaitForRemoteDescRead"}, -+{MI_WaitForRemoteDescRead2, "MI_WaitForRemoteDescRead2"}, -+{MI_WaitForRemoteDescRead2_seq1, "MI_WaitForRemoteDescRead2_seq1"}, -+{MI_SendRemoteDmaRoutes, "MI_SendRemoteDmaRoutes"}, -+{MI_IProcTrapped, "MI_IProcTrapped"}, -+{MI_DProcTrapped, "MI_DProcTrapped"}, -+{MI_CProcTrapped, "MI_CProcTrapped"}, -+{MI_TProcTrapped, "MI_TProcTrapped"}, -+{MI_TestWhichDmaQueue, "MI_TestWhichDmaQueue"}, -+{MI_TestWhichDmaQueue_seq1, "MI_TestWhichDmaQueue_seq1"}, -+{MI_InputRemoteDmaUpdateBPtr, "MI_InputRemoteDmaUpdateBPtr"}, -+{MI_FixupQueueContextAndRemoteBit, "MI_FixupQueueContextAndRemoteBit"}, -+{MI_FixupQueueContextAndRemoteBit_seq1, "MI_FixupQueueContextAndRemoteBit_seq1"}, -+{MI_FixupQueueContextAndRemoteBit_seq2, "MI_FixupQueueContextAndRemoteBit_seq2"}, -+{MI_FixupQueueContextAndRemoteBit_seq3, "MI_FixupQueueContextAndRemoteBit_seq3"}, -+{MI_FixupQueueContextAndRemoteBit_seq4, "MI_FixupQueueContextAndRemoteBit_seq4"}, -+{MI_RunDmaCommand, "MI_RunDmaCommand"}, -+{MI_DoSendRemoteDmaDesc, "MI_DoSendRemoteDmaDesc"}, -+{MI_DequeueNonSysCntxDma, "MI_DequeueNonSysCntxDma"}, -+{MI_WaitForRemoteDescRead1, "MI_WaitForRemoteDescRead1"}, -+{MI_RemoteDmaCommand, "MI_RemoteDmaCommand"}, -+{MI_WaitForRemoteRoutes, "MI_WaitForRemoteRoutes"}, -+{MI_DequeueSysCntxDma, "MI_DequeueSysCntxDma"}, -+{MI_ExecuteDmaDescriptorForQueue, "MI_ExecuteDmaDescriptorForQueue"}, -+{MI_ExecuteDmaDescriptor1, "MI_ExecuteDmaDescriptor1"}, -+{MI_ExecuteDmaDescriptor1_seq1, "MI_ExecuteDmaDescriptor1_seq1"}, -+{MI_ExecuteDmaDescriptor1_seq2, "MI_ExecuteDmaDescriptor1_seq2"}, -+{MI_ExecuteDmaDescriptor1_seq3, "MI_ExecuteDmaDescriptor1_seq3"}, -+{MI_GetNewSizeInProg, "MI_GetNewSizeInProg"}, -+{MI_GetNewSizeInProg_seq1, "MI_GetNewSizeInProg_seq1"}, -+{MI_FirstBlockRead, "MI_FirstBlockRead"}, -+{MI_ExtraFirstBlockRead, "MI_ExtraFirstBlockRead"}, -+{MI_UnimplementedError, "MI_UnimplementedError"}, -+{MI_UpdateDescriptor, "MI_UpdateDescriptor"}, -+{MI_UpdateDescriptor_seq1, "MI_UpdateDescriptor_seq1"}, -+{MI_UpdateDescriptor_seq2, "MI_UpdateDescriptor_seq2"}, -+{MI_UpdateDescriptor_seq3, "MI_UpdateDescriptor_seq3"}, -+{MI_UpdateDescriptor_seq4, "MI_UpdateDescriptor_seq4"}, -+{MI_UpdateDescriptor_seq5, "MI_UpdateDescriptor_seq5"}, -+{MI_GetNextSizeInProg, "MI_GetNextSizeInProg"}, -+{MI_DoStopThisDma, "MI_DoStopThisDma"}, -+{MI_DoStopThisDma_seq1, "MI_DoStopThisDma_seq1"}, -+{MI_GenNewBytesToRead, "MI_GenNewBytesToRead"}, -+{MI_WaitForEventReadTy1, "MI_WaitForEventReadTy1"}, -+{MI_WaitUpdateEvent, "MI_WaitUpdateEvent"}, -+{MI_WaitUpdateEvent_seq1, "MI_WaitUpdateEvent_seq1"}, -+{MI_DoSleepOneTickThenRunable, "MI_DoSleepOneTickThenRunable"}, -+{MI_RunEvent, "MI_RunEvent"}, -+{MI_EnqueueThread, "MI_EnqueueThread"}, -+{MI_CheckContext0, "MI_CheckContext0"}, -+{MI_EnqueueDma, "MI_EnqueueDma"}, -+{MI_CprocTrapping, "MI_CprocTrapping"}, -+{MI_CprocTrapping_seq1, "MI_CprocTrapping_seq1"}, -+{MI_WaitForRemoteRoutes1, "MI_WaitForRemoteRoutes1"}, -+{MI_SetEventCommand, "MI_SetEventCommand"}, -+{MI_DoSetEvent, "MI_DoSetEvent"}, -+{MI_DoRemoteSetEventNowOrTrapQueueingDma, "MI_DoRemoteSetEventNowOrTrapQueueingDma"}, -+{MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1, "MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1"}, -+{MI_SendRemoteDmaRoutes2, "MI_SendRemoteDmaRoutes2"}, -+{MI_WaitForRemoteRoutes2, "MI_WaitForRemoteRoutes2"}, -+{MI_WaitEventCommandTy0, "MI_WaitEventCommandTy0"}, -+{MI_DequeueNonSysCntxDma2, "MI_DequeueNonSysCntxDma2"}, -+{MI_WaitEventCommandTy1, "MI_WaitEventCommandTy1"}, -+{MI_WaitEventCommandTy1_seq1, "MI_WaitEventCommandTy1_seq1"}, -+{MI_DequeueNonSysCntxThread, "MI_DequeueNonSysCntxThread"}, -+{MI_DequeueSysCntxDma1, "MI_DequeueSysCntxDma1"}, -+{MI_DequeueSysCntxThread, "MI_DequeueSysCntxThread"}, -+{MI_TestNonSysCntxDmaQueueEmpty, "MI_TestNonSysCntxDmaQueueEmpty"}, -+{MI_TestNonSysCntxDmaQueueEmpty_seq1, "MI_TestNonSysCntxDmaQueueEmpty_seq1"}, -+{MI_TestNonSysCntxDmaQueueEmpty_seq2, "MI_TestNonSysCntxDmaQueueEmpty_seq2"}, -+{MI_RunThreadCommand, "MI_RunThreadCommand"}, -+{MI_SetEventWaitForLastAcess, "MI_SetEventWaitForLastAcess"}, -+{MI_SetEventReadWait, "MI_SetEventReadWait"}, -+{MI_SetEventReadWait_seq1, "MI_SetEventReadWait_seq1"}, -+{MI_TestEventType, "MI_TestEventType"}, -+{MI_TestEventType_seq1, "MI_TestEventType_seq1"}, -+{MI_TestEventBit2, "MI_TestEventBit2"}, -+{MI_DmaDescOrBlockCopyOrChainedEvent, "MI_DmaDescOrBlockCopyOrChainedEvent"}, -+{MI_RunThread, "MI_RunThread"}, -+{MI_RunThread1, "MI_RunThread1"}, -+{MI_RunThread1_seq1, "MI_RunThread1_seq1"}, -+{MI_IncDmaSysCntxBPtr, "MI_IncDmaSysCntxBPtr"}, -+{MI_IncDmaSysCntxBPtr_seq1, "MI_IncDmaSysCntxBPtr_seq1"}, -+{MI_IncDmaSysCntxBPtr_seq2, "MI_IncDmaSysCntxBPtr_seq2"}, -+{MI_WaitForCntxDmaDescRead, "MI_WaitForCntxDmaDescRead"}, -+{MI_FillInContext, "MI_FillInContext"}, -+{MI_FillInContext_seq1, "MI_FillInContext_seq1"}, -+{MI_WriteNewDescToQueue, "MI_WriteNewDescToQueue"}, -+{MI_WriteNewDescToQueue_seq1, "MI_WriteNewDescToQueue_seq1"}, -+{MI_TestForQueueWrap, "MI_TestForQueueWrap"}, -+{MI_TestForQueueWrap_seq1, "MI_TestForQueueWrap_seq1"}, -+{MI_TestQueueIsFull, "MI_TestQueueIsFull"}, -+{MI_TestQueueIsFull_seq1, "MI_TestQueueIsFull_seq1"}, -+{MI_TestQueueIsFull_seq2, "MI_TestQueueIsFull_seq2"}, -+{MI_CheckPsychoShitFixup, "MI_CheckPsychoShitFixup"}, -+{MI_PsychoShitFixupForcedRead, "MI_PsychoShitFixupForcedRead"}, -+{MI_PrepareDMATimeSlice, "MI_PrepareDMATimeSlice"}, -+{MI_PrepareDMATimeSlice_seq1, "MI_PrepareDMATimeSlice_seq1"}, -+{MI_TProcRestartFromTrapOrTestEventBit2, "MI_TProcRestartFromTrapOrTestEventBit2"}, -+{MI_TProcRestartFromTrapOrTestEventBit2_seq1, "MI_TProcRestartFromTrapOrTestEventBit2_seq1"}, -+{MI_WaitForGlobalsRead, "MI_WaitForGlobalsRead"}, -+{MI_WaitForNPCRead, "MI_WaitForNPCRead"}, -+{MI_EventInterrupt, "MI_EventInterrupt"}, -+{MI_EventInterrupt_seq1, "MI_EventInterrupt_seq1"}, -+{MI_EventInterrupt_seq2, "MI_EventInterrupt_seq2"}, -+{MI_EventInterrupt_seq3, "MI_EventInterrupt_seq3"}, -+{MI_TestSysCntxDmaQueueEmpty, "MI_TestSysCntxDmaQueueEmpty"}, -+{MI_TestSysCntxDmaQueueEmpty_seq1, "MI_TestSysCntxDmaQueueEmpty_seq1"}, -+{MI_TestIfRemoteDesc, "MI_TestIfRemoteDesc"}, -+{MI_DoDmaLocalSetEvent, "MI_DoDmaLocalSetEvent"}, -+{MI_DoDmaLocalSetEvent_seq1, "MI_DoDmaLocalSetEvent_seq1"}, -+{MI_DoDmaLocalSetEvent_seq2, "MI_DoDmaLocalSetEvent_seq2"}, -+{MI_DmaLoop1, "MI_DmaLoop1"}, -+{MI_ExitDmaLoop, "MI_ExitDmaLoop"}, -+{MI_ExitDmaLoop_seq1, "MI_ExitDmaLoop_seq1"}, -+{MI_RemoteDmaTestPAckType, "MI_RemoteDmaTestPAckType"}, -+{MI_PacketDiscardOrTestFailRecIfCCis0, "MI_PacketDiscardOrTestFailRecIfCCis0"}, -+{MI_PacketDiscardOrTestFailRecIfCCis0_seq1, "MI_PacketDiscardOrTestFailRecIfCCis0_seq1"}, -+{MI_TestNackFailIsZero2, "MI_TestNackFailIsZero2"}, -+{MI_TestNackFailIsZero3, "MI_TestNackFailIsZero3"}, -+{MI_DmaFailCountError, "MI_DmaFailCountError"}, -+{MI_TestDmaForSysCntx, "MI_TestDmaForSysCntx"}, -+{MI_TestDmaForSysCntx_seq1, "MI_TestDmaForSysCntx_seq1"}, -+{MI_TestDmaForSysCntx_seq2, "MI_TestDmaForSysCntx_seq2"}, -+{MI_TestAeqB2, "MI_TestAeqB2"}, -+{MI_TestAeqB2_seq1, "MI_TestAeqB2_seq1"}, -+{MI_GetNextDmaDescriptor, "MI_GetNextDmaDescriptor"}, -+{MI_DequeueSysCntxDma2, "MI_DequeueSysCntxDma2"}, -+{MI_InputSetEvent, "MI_InputSetEvent"}, -+{MI_PutBackSysCntxDma, "MI_PutBackSysCntxDma"}, -+{MI_PutBackSysCntxDma_seq1, "MI_PutBackSysCntxDma_seq1"}, -+{MI_PutBackSysCntxDma_seq2, "MI_PutBackSysCntxDma_seq2"}, -+{MI_InputRemoteDma, "MI_InputRemoteDma"}, -+{MI_InputRemoteDma_seq1, "MI_InputRemoteDma_seq1"}, -+{MI_WaitOneTickForWakeup1, "MI_WaitOneTickForWakeup1"}, -+{MI_SendRemoteDmaDesc, "MI_SendRemoteDmaDesc"}, -+{MI_InputLockQueue, "MI_InputLockQueue"}, -+{MI_CloseTheTrappedPacketIfCCis1, "MI_CloseTheTrappedPacketIfCCis1"}, -+{MI_CloseTheTrappedPacketIfCCis1_seq1, "MI_CloseTheTrappedPacketIfCCis1_seq1"}, -+{MI_PostDmaInterrupt, "MI_PostDmaInterrupt"}, -+{MI_InputUnLockQueue, "MI_InputUnLockQueue"}, -+{MI_WaitForUnLockDescRead, "MI_WaitForUnLockDescRead"}, -+{MI_SendEOPforRemoteDma, "MI_SendEOPforRemoteDma"}, -+{MI_LookAtRemoteAck, "MI_LookAtRemoteAck"}, -+{MI_InputWriteBlockQueue, "MI_InputWriteBlockQueue"}, -+{MI_WaitForSpStore, "MI_WaitForSpStore"}, -+{MI_TProcNext, "MI_TProcNext"}, -+{MI_TProcStoppedRunning, "MI_TProcStoppedRunning"}, -+{MI_InputWriteBlock, "MI_InputWriteBlock"}, -+{MI_RunDmaOrDeqNonSysCntxDma, "MI_RunDmaOrDeqNonSysCntxDma"}, -+{MI_ExecuteDmaDescriptorForRun, "MI_ExecuteDmaDescriptorForRun"}, -+{MI_ConfirmQueueLock, "MI_ConfirmQueueLock"}, -+{MI_DmaInputIdentify, "MI_DmaInputIdentify"}, -+{MI_TProcStoppedRunning2, "MI_TProcStoppedRunning2"}, -+{MI_TProcStoppedRunning2_seq1, "MI_TProcStoppedRunning2_seq1"}, -+{MI_TProcStoppedRunning2_seq2, "MI_TProcStoppedRunning2_seq2"}, -+{MI_ThreadInputIdentify, "MI_ThreadInputIdentify"}, -+{MI_InputIdWriteAddrAndType3, "MI_InputIdWriteAddrAndType3"}, -+{MI_IProcTrappedWriteStatus, "MI_IProcTrappedWriteStatus"}, -+{MI_FinishTrappingEop, "MI_FinishTrappingEop"}, -+{MI_InputTestTrans, "MI_InputTestTrans"}, -+{MI_TestAeqB3, "MI_TestAeqB3"}, -+{MI_ThreadUpdateNonSysCntxBack, "MI_ThreadUpdateNonSysCntxBack"}, -+{MI_ThreadQueueOverflow, "MI_ThreadQueueOverflow"}, -+{MI_RunContext0Thread, "MI_RunContext0Thread"}, -+{MI_RunContext0Thread_seq1, "MI_RunContext0Thread_seq1"}, -+{MI_RunContext0Thread_seq2, "MI_RunContext0Thread_seq2"}, -+{MI_RunDmaDesc, "MI_RunDmaDesc"}, -+{MI_RunDmaDesc_seq1, "MI_RunDmaDesc_seq1"}, -+{MI_RunDmaDesc_seq2, "MI_RunDmaDesc_seq2"}, -+{MI_TestAeqB, "MI_TestAeqB"}, -+{MI_WaitForNonCntxDmaDescRead, "MI_WaitForNonCntxDmaDescRead"}, -+{MI_DmaQueueOverflow, "MI_DmaQueueOverflow"}, -+{MI_BlockCopyEvent, "MI_BlockCopyEvent"}, -+{MI_BlockCopyEventReadBlock, "MI_BlockCopyEventReadBlock"}, -+{MI_BlockCopyWaitForReadData, "MI_BlockCopyWaitForReadData"}, -+{MI_InputWriteWord, "MI_InputWriteWord"}, -+{MI_TraceSetEvents, "MI_TraceSetEvents"}, -+{MI_TraceSetEvents_seq1, "MI_TraceSetEvents_seq1"}, -+{MI_TraceSetEvents_seq2, "MI_TraceSetEvents_seq2"}, -+{MI_InputWriteDoubleWd, "MI_InputWriteDoubleWd"}, -+{MI_SendLockTransIfCCis1, "MI_SendLockTransIfCCis1"}, -+{MI_WaitForDmaRoutes1, "MI_WaitForDmaRoutes1"}, -+{MI_LoadDmaContext, "MI_LoadDmaContext"}, -+{MI_InputTestAndSetWord, "MI_InputTestAndSetWord"}, -+{MI_InputTestAndSetWord_seq1, "MI_InputTestAndSetWord_seq1"}, -+{MI_GetDestEventValue, "MI_GetDestEventValue"}, -+{MI_SendDmaIdentify, "MI_SendDmaIdentify"}, -+{MI_InputAtomicAddWord, "MI_InputAtomicAddWord"}, -+{MI_LoadBFromTransD0, "MI_LoadBFromTransD0"}, -+{MI_ConditionalWriteBackCCTrue, "MI_ConditionalWriteBackCCTrue"}, -+{MI_WaitOneTickForWakeup, "MI_WaitOneTickForWakeup"}, -+{MI_SendFinalUnlockTrans, "MI_SendFinalUnlockTrans"}, -+{MI_SendDmaEOP, "MI_SendDmaEOP"}, -+{MI_GenLastAddrForPsycho, "MI_GenLastAddrForPsycho"}, -+{MI_FailedAckIfCCis0, "MI_FailedAckIfCCis0"}, -+{MI_FailedAckIfCCis0_seq1, "MI_FailedAckIfCCis0_seq1"}, -+{MI_WriteDmaSysCntxDesc, "MI_WriteDmaSysCntxDesc"}, -+{MI_TimesliceDmaQueueOverflow, "MI_TimesliceDmaQueueOverflow"}, -+{MI_DequeueNonSysCntxThread1, "MI_DequeueNonSysCntxThread1"}, -+{MI_DequeueNonSysCntxThread1_seq1, "MI_DequeueNonSysCntxThread1_seq1"}, -+{MI_TestThreadQueueEmpty, "MI_TestThreadQueueEmpty"}, -+{MI_ClearThreadQueueIfCC, "MI_ClearThreadQueueIfCC"}, -+{MI_DequeueSysCntxThread1, "MI_DequeueSysCntxThread1"}, -+{MI_DequeueSysCntxThread1_seq1, "MI_DequeueSysCntxThread1_seq1"}, -+{MI_TProcStartUpGeneric, "MI_TProcStartUpGeneric"}, -+{MI_WaitForPCload2, "MI_WaitForPCload2"}, -+{MI_WaitForNPCWrite, "MI_WaitForNPCWrite"}, -+{MI_WaitForEventWaitAddr, "MI_WaitForEventWaitAddr"}, -+{MI_WaitForWaitEventAccess, "MI_WaitForWaitEventAccess"}, -+{MI_WaitForWaitEventAccess_seq1, "MI_WaitForWaitEventAccess_seq1"}, -+{MI_WaitForWaitEventDesc, "MI_WaitForWaitEventDesc"}, -+{MI_WaitForEventReadTy0, "MI_WaitForEventReadTy0"}, -+{MI_SendCondTestFail, "MI_SendCondTestFail"}, -+{MI_InputMoveToNextTrans, "MI_InputMoveToNextTrans"}, -+{MI_ThreadUpdateSysCntxBack, "MI_ThreadUpdateSysCntxBack"}, -+{MI_FinishedSetEvent, "MI_FinishedSetEvent"}, -+{MI_EventIntUpdateBPtr, "MI_EventIntUpdateBPtr"}, -+{MI_EventQueueOverflow, "MI_EventQueueOverflow"}, -+{MI_MaskLowerSource, "MI_MaskLowerSource"}, -+{MI_DmaLoop, "MI_DmaLoop"}, -+{MI_SendNullSetEvent, "MI_SendNullSetEvent"}, -+{MI_SendFinalSetEvent, "MI_SendFinalSetEvent"}, -+{MI_TestNackFailIsZero1, "MI_TestNackFailIsZero1"}, -+{MI_DmaPacketTimedOutOrPacketError, "MI_DmaPacketTimedOutOrPacketError"}, -+{MI_NextPacketIsLast, "MI_NextPacketIsLast"}, -+{MI_TestForZeroLengthDma, "MI_TestForZeroLengthDma"}, -+{MI_WaitForPCload, "MI_WaitForPCload"}, -+{MI_ReadInIns, "MI_ReadInIns"}, -+{MI_WaitForInsRead, "MI_WaitForInsRead"}, -+{MI_WaitForLocals, "MI_WaitForLocals"}, -+{MI_WaitForOutsWrite, "MI_WaitForOutsWrite"}, -+{MI_WaitForWaitEvWrBack, "MI_WaitForWaitEvWrBack"}, -+{MI_WaitForLockRead, "MI_WaitForLockRead"}, -+{MI_TestQueueLock, "MI_TestQueueLock"}, -+{MI_InputIdWriteAddrAndType, "MI_InputIdWriteAddrAndType"}, -+{MI_InputIdWriteAddrAndType2, "MI_InputIdWriteAddrAndType2"}, -+{MI_ThreadInputIdentify2, "MI_ThreadInputIdentify2"}, -+{MI_WriteIntoTrapArea0, "MI_WriteIntoTrapArea0"}, -+{MI_GenQueueBlockWrAddr, "MI_GenQueueBlockWrAddr"}, -+{MI_InputDiscardFreeLock, "MI_InputDiscardFreeLock"}, -+{MI_WriteIntoTrapArea1, "MI_WriteIntoTrapArea1"}, -+{MI_WriteIntoTrapArea2, "MI_WriteIntoTrapArea2"}, -+{MI_ResetBPtrToBase, "MI_ResetBPtrToBase"}, -+{MI_InputDoTrap, "MI_InputDoTrap"}, -+{MI_RemoteDmaCntxt0Update, "MI_RemoteDmaCntxt0Update"}, -+{MI_ClearQueueLock, "MI_ClearQueueLock"}, -+{MI_IProcTrappedBlockWriteData, "MI_IProcTrappedBlockWriteData"}, -+{MI_FillContextFilter, "MI_FillContextFilter"}, -+{MI_IProcTrapped4, "MI_IProcTrapped4"}, -+{MI_RunSysCntxDma, "MI_RunSysCntxDma"}, -+{MI_ChainedEventError, "MI_ChainedEventError"}, -+{MI_InputTrappingEOP, "MI_InputTrappingEOP"}, -+{MI_CheckForRunIfZero, "MI_CheckForRunIfZero"}, -+{MI_TestForBreakOrSuspend, "MI_TestForBreakOrSuspend"}, -+{MI_SwapForRunable, "MI_SwapForRunable"}, -Index: linux-2.6.5-7.191/include/elan3/neterr_rpc.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/neterr_rpc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/neterr_rpc.h 2005-07-28 14:52:52.953662776 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_NETERR_RPC_H -+#define __ELAN3_NETERR_RPC_H -+ -+#ident "$Id: neterr_rpc.h,v 1.20 2003/06/26 16:05:22 fabien Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/neterr_rpc.h,v $*/ -+ -+#define NETERR_SERVICE "neterr-srv" -+#define NETERR_PROGRAM ((u_long) 170002) -+#define NETERR_VERSION ((u_long) 1) -+ -+#define NETERR_NULL_RPC 0 -+#define NETERR_FIXUP_RPC 1 -+ -+/* network error rpc timeout */ -+#define NETERR_RPC_TIMEOUT 5 -+ -+/* -+ * XDR functions for Tru64 and Linux in userspace. -+ * NB Linux kernelspace xdr routines are in network_error. -+ * and *must* be kept consistent. -+ */ -+#if defined(DIGITAL_UNIX) || !defined(__KERNEL__) -+bool_t -+xdr_capability (XDR *xdrs, void *arg) -+{ -+ ELAN_CAPABILITY *cap = (ELAN_CAPABILITY *) arg; -+ -+ return (xdr_opaque (xdrs, (caddr_t) &cap->cap_userkey, sizeof (cap->cap_userkey)) && -+ xdr_int (xdrs, &cap->cap_version) && -+ xdr_u_short (xdrs, &cap->cap_type) && -+ xdr_int (xdrs, &cap->cap_lowcontext) && -+ xdr_int (xdrs, &cap->cap_highcontext) && -+ xdr_int (xdrs, &cap->cap_mycontext) && -+ xdr_int (xdrs, &cap->cap_lownode) && -+ xdr_int (xdrs, &cap->cap_highnode) && -+ xdr_u_int (xdrs, &cap->cap_railmask) && -+ xdr_opaque (xdrs, (caddr_t) &cap->cap_bitmap[0], sizeof (cap->cap_bitmap))); -+} -+ -+bool_t -+xdr_neterr_msg (XDR *xdrs, void *req) -+{ -+ NETERR_MSG *msg = (NETERR_MSG *) req; -+ -+ return (xdr_u_int (xdrs, &msg->Rail) && -+ xdr_capability (xdrs, &msg->SrcCapability) && -+ xdr_capability (xdrs, &msg->DstCapability) && -+ xdr_u_int (xdrs, &msg->DstProcess) && -+ xdr_u_int (xdrs, &msg->CookieAddr) && -+ xdr_u_int (xdrs, &msg->CookieVProc) && -+ xdr_u_int (xdrs, &msg->NextCookie) && -+ xdr_u_int (xdrs, &msg->WaitForEop)); -+} -+#endif /* INCLUDE_XDR_INLINE */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN3_NETERR_RPC_H */ -Index: linux-2.6.5-7.191/include/elan3/perm.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/perm.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/perm.h 2005-07-28 14:52:52.953662776 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_PERM_H -+#define __ELAN3_PERM_H -+ -+#ident "$Id: perm.h,v 1.7 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/perm.h,v $*/ -+ -+#define ELAN3_PERM_NULL 0x00 -+#define ELAN3_PERM_LOCAL_READ 0x04 -+#define ELAN3_PERM_READ 0x08 -+#define ELAN3_PERM_NOREMOTE 0x0c -+#define ELAN3_PERM_REMOTEREAD 0x10 -+#define ELAN3_PERM_REMOTEWRITE 0x14 -+#define ELAN3_PERM_REMOTEEVENT 0x18 -+#define ELAN3_PERM_REMOTEALL 0x1c -+ -+#endif /* __ELAN3_PERM_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/pte.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/pte.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/pte.h 2005-07-28 14:52:52.953662776 -0400 -@@ -0,0 +1,139 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_PTE_H -+#define __ELAN3_PTE_H -+ -+#ident "$Id: pte.h,v 1.26 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/pte.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" -+{ -+#endif -+ -+#include -+#include -+ -+typedef E3_uint64 ELAN3_PTE; -+typedef E3_uint32 ELAN3_PTP; -+ -+#define ELAN3_PTE_SIZE (8) -+#define ELAN3_PTP_SIZE (4) -+ -+#define ELAN3_PTE_REF ((E3_uint64) 1 << 63) /* 63 - referenced bit */ -+#define ELAN3_PTE_MOD ((E3_uint64) 1 << 55) /* 55 - modified bit */ -+#define ELAN3_RM_MASK (ELAN3_PTE_REF | ELAN3_PTE_MOD) -+ -+#define ELAN3_PTE_PFN_MASK 0x0000fffffffff000ull /* [12:48] - Physical address */ -+ -+#define ELAN3_PTE_BIG_ENDIAN 0x80 /* 7 - big endian */ -+#define ELAN3_PTE_64_BIT 0x40 /* 6 - 64 bit pci address */ -+#define ELAN3_PTE_LOCAL 0x20 /* 5 - local sdram */ -+ -+#define ELAN3_PTE_PERM_MASK 0x1c /* [2:4] - Permissions */ -+#define ELAN3_PTE_PERM_SHIFT 2 -+ -+#define ELAN3_ET_MASK 0x3 -+#define ELAN3_ET_INVALID 0x0 /* [0:1] */ -+#define ELAN3_ET_PTP 0x1 -+#define ELAN3_ET_PTE 0x2 -+ -+#define ELAN3_INVALID_PTP ((ELAN3_PTP) 0) -+#define ELAN3_INVALID_PTE ((ELAN3_PTE) 0) -+ -+#define ELAN3_PTP_TYPE(ptp) ((ptp) & ELAN3_ET_MASK) -+#define ELAN3_PTE_TYPE(pte) ((pte) & ELAN3_ET_MASK) -+#define ELAN3_PTE_PERM(pte) ((pte) & ELAN3_PTE_PERM_MASK) -+#define ELAN3_PTE_VALID(pte) (((pte) & ELAN3_ET_MASK) == ELAN3_ET_PTE) -+#define ELAN3_PTE_ISREF(pte) ((pte) & ELAN3_PTE_REF) -+#define ELAN3_PTE_ISMOD(pte) ((pte) & ELAN3_PTE_MOD) -+#define ELAN3_PTE_WRITEABLE(pte) (ELAN3_PERM_WRITEABLE(ELAN3_PTE_PERM(pte))) -+ -+#define ELAN3_PERM_WRITEABLE(perm) ((perm) == ELAN3_PERM_NOREMOTE || (perm) > ELAN3_PERM_REMOTEREAD) -+#define ELAN3_PERM_REMOTE(perm) ((perm) > ELAN3_PERM_NOREMOTE) -+ -+#define ELAN3_PERM_READONLY(perm) ((perm) == ELAN3_PERM_NOREMOTE ? ELAN3_PERM_LOCAL_READ : \ -+ (perm) > ELAN3_PERM_REMOTEREAD ? ELAN3_PERM_READ : (perm)) -+#if PAGE_SHIFT == 12 -+# define ELAN3_PAGE_SHIFT 12 -+#else -+# define ELAN3_PAGE_SHIFT 13 -+#endif -+ -+#define ELAN3_PAGE_SIZE (1 << ELAN3_PAGE_SHIFT) -+#define ELAN3_PAGE_OFFSET (ELAN3_PAGE_SIZE-1) -+#define ELAN3_PAGE_MASK (~ELAN3_PAGE_OFFSET) -+ -+#if ELAN3_PAGE_SHIFT == 13 -+# define ELAN3_L3_SHIFT 5 -+#else -+# define ELAN3_L3_SHIFT 6 -+#endif -+#define ELAN3_L2_SHIFT 6 -+#define ELAN3_L1_SHIFT 8 -+ -+/* Number of entries in a given level ptbl */ -+#define ELAN3_L3_ENTRIES (1 << ELAN3_L3_SHIFT) -+#define ELAN3_L2_ENTRIES (1 << ELAN3_L2_SHIFT) -+#define ELAN3_L1_ENTRIES (1 << ELAN3_L1_SHIFT) -+ -+/* Virtual address spanned by each entry */ -+#define ELAN3_L3_SIZE (1 << (ELAN3_PAGE_SHIFT)) -+#define ELAN3_L2_SIZE (1 << (ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L1_SIZE (1 << (ELAN3_L3_SHIFT+ELAN3_L2_SHIFT+ELAN3_PAGE_SHIFT)) -+ -+/* Virtual address size of page table */ -+#define ELAN3_L1_PTSIZE (ELAN3_L1_ENTRIES * ELAN3_L1_SIZE) -+#define ELAN3_L3_PTSIZE (ELAN3_L3_ENTRIES * ELAN3_L3_SIZE) -+#define ELAN3_L2_PTSIZE (ELAN3_L2_ENTRIES * ELAN3_L2_SIZE) -+ -+/* Mask for offset into page table */ -+#define ELAN3_L1_PTOFFSET ((ELAN3_L1_SIZE*ELAN3_L1_ENTRIES)-1) -+#define ELAN3_L3_PTOFFSET ((ELAN3_L3_SIZE*ELAN3_L3_ENTRIES)-1) -+#define ELAN3_L2_PTOFFSET ((ELAN3_L2_SIZE*ELAN3_L2_ENTRIES)-1) -+ -+#define ELAN3_L1_INDEX(addr) (((E3_Addr) (addr) & 0xFF000000) >> (ELAN3_L2_SHIFT+ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L2_INDEX(addr) (((E3_Addr) (addr) & 0x00FD0000) >> (ELAN3_L3_SHIFT+ELAN3_PAGE_SHIFT)) -+#define ELAN3_L3_INDEX(addr) (((E3_Addr) (addr) & 0x0003F000) >> ELAN3_PAGE_SHIFT) -+ -+#define ELAN3_L1_BASE(addr) (((E3_Addr)(addr)) & 0x00000000) -+#define ELAN3_L2_BASE(addr) (((E3_Addr)(addr)) & 0xFF000000) -+#define ELAN3_L3_BASE(addr) (((E3_Addr)(addr)) & 0xFFFC0000) -+ -+/* Convert a page table pointer entry to the PT */ -+#define PTP_TO_PT_PADDR(ptp) ((E3_Addr)(ptp & 0xFFFFFFFC)) -+ -+#ifdef __KERNEL__ -+/* -+ * incompatible access for permission macro. -+ */ -+extern u_char elan3mmu_permissionTable[8]; -+#define ELAN3_INCOMPAT_ACCESS(perm,access) (! (elan3mmu_permissionTable[(perm)>>ELAN3_PTE_PERM_SHIFT] & (1 << (access)))) -+ -+#define elan3_readptp(dev, ptp) (elan3_sdram_readl (dev, ptp)) -+#define elan3_writeptp(dev, ptp, value) (elan3_sdram_writel (dev, ptp, value)) -+#define elan3_readpte(dev, pte) (elan3_sdram_readq (dev, pte)) -+#define elan3_writepte(dev,pte, value) (elan3_sdram_writeq (dev, pte, value)) -+ -+#define elan3_invalidatepte(dev, pte) (elan3_sdram_writel (dev, pte, 0)) -+#define elan3_modifypte(dev,pte,new) (elan3_sdram_writel (dev, pte, (int) (new))) -+#define elan3_clrref(dev,pte) (elan3_sdram_writeb (dev, pte + 7) -+ -+#endif /* __KERNEL__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_PTE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/spinlock.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/spinlock.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/spinlock.h 2005-07-28 14:52:52.954662624 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_SPINLOCK_ -+#define _ELAN3_SPINLOCK_ -+ -+#ident "$Id: spinlock.h,v 1.31 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/spinlock.h,v $*/ -+ -+/* -+ * This spinlock is designed for main/elan processor interactions. -+ * The lock is split over Elan/Main memory in such a way that -+ * we don't end up busy-polling over the PCI. -+ * In the Elan memory we have two words; one is a sequence number -+ * and the other is a lock word for main. -+ * In main memory we have a copy of the sequence number which main polls when it is -+ * waiting for the Elan to drop the lock. Main polls this word until it becomes -+ * equal to the sequence number it sampled. -+ * The Elan drops the lock by writing the current sequence number to main memory. -+ * It is coded to always give priority to the Elan thread, and so when both go for the -+ * lock, main will back off first. -+ * -+ * 18/3/98 -+ * This has been extended to avoid a starvation case where both the main and thread claim the -+ * lock and so both backoff (thread does a break). So now, main attempts to claim the -+ * lock by writing 'mainLock' then samples the 'sl_seq' and if it has the lock -+ * it sets 'mainGotLock'. The thread will now see the 'sl_mainLock' set, but will only -+ * backoff with a c_break_busywait() if 'mainGotLock' is set too. -+ */ -+typedef struct elan3_spinlock_elan { -+ union { -+ volatile E3_uint64 mainLocks; /* main writes this dble word */ -+ struct { -+ volatile E3_uint32 mainLock; /* main wants a lock */ -+ volatile E3_uint32 mainGotLock; /* main has the lock */ -+ } s; -+ } sl_u; -+ volatile E3_uint32 sl_seq; /* thread owns this word */ -+ volatile E3_uint32 sl_mainWait; /* performance counter */ -+ volatile E3_uint32 sl_elanWait; /* performance counter */ -+ volatile E3_uint32 sl_elanBusyWait; /* performance counter */ -+ /* NOTE: The lock/seq words must be within the same 32-byte Elan cache-line */ -+ E3_uint64 sl_pad[5]; /* pad to 64-bytes */ -+} ELAN3_SPINLOCK_ELAN; -+ -+#define sl_mainLocks sl_u.mainLocks -+#define sl_mainLock sl_u.s.mainLock -+#define sl_mainGotLock sl_u.s.mainGotLock -+ -+#define SL_MAIN_RECESSIVE 1 -+#define SL_MAIN_DOMINANT 2 -+ -+/* Declare this as a main memory cache block for efficiency */ -+typedef union elan3_spinlock_main { -+ volatile E3_uint32 sl_seq; /* copy of seq number updated by Elan */ -+ volatile E3_uint32 sl_Int32[E3_BLK_SIZE/sizeof (E3_uint32)]; -+} ELAN3_SPINLOCK_MAIN; -+ -+/* Main/Main or Elan/Elan lock word */ -+typedef volatile int ELAN3_SPINLOCK; -+ -+#ifdef __ELAN3__ -+ -+/* Main/Elan interlock */ -+ -+#define ELAN3_ME_SPINENTER(SLE,SL) do {\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ (SLE)->sl_seq++; \ -+ if ((SLE)->sl_mainLock) \ -+ elan3_me_spinblock(SLE, SL);\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ } while (0) -+#define ELAN3_ME_SPINEXIT(SLE,SL) do {\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ (SL)->sl_seq = (SLE)->sl_seq;\ -+ asm volatile ("! elan3_spinlock store barrier");\ -+ } while (0) -+ -+ -+/* Elan/Elan interlock */ -+#define ELAN3_SPINENTER(L) do {\ -+ asm volatile ("! store barrier");\ -+ if (c_swap ((L), 1)) elan3_spinenter(L);\ -+ asm volatile ("! store barrier");\ -+ } while (0) -+#define ELAN3_SPINEXIT(L) do {\ -+ asm volatile ("! store barrier");\ -+ c_swap((L), 0);\ -+ asm volatile ("! store barrier");\ -+ } while (0) -+ -+extern void elan3_me_spinblock (ELAN3_SPINLOCK_ELAN *sle, ELAN3_SPINLOCK_MAIN *sl); -+extern void elan3_spinenter (ELAN3_SPINLOCK *l); -+ -+#else -+ -+/* Main/Elan interlock */ -+#ifdef DEBUG -+#define ELAN3_ME_SPINENTER(SDRAM,SLE,SL) do {\ -+ register E3_int32 maxLoops = 0x7fffffff; \ -+ register E3_uint32 seq;\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) {\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), 0); \ -+ while ((SL)->sl_seq == (seq-1) && maxLoops--) ; \ -+ if (maxLoops < 0) { \ -+ printf("Failed to get ME lock %lx/%lx seq %d sle_seq %d sl_seq %d\n", \ -+ SL, SLE, seq, \ -+ elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)), \ -+ (SL)->sl_seq); \ -+ } \ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ }\ -+ elan3_write32_sdram(SDRAM, (SLE) + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } while (0) -+#else -+#define ELAN3_ME_SPINENTER(SDRAM,SLE,SL) do {\ -+ register E3_uint32 seq;\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) {\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), 0); \ -+ while ((SL)->sl_seq == (seq-1)) ; \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ }\ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } while (0) -+#endif -+#define ELAN3_ME_FORCEENTER(SDRAM,SLE,SL) do { \ -+ register E3_uint32 seq; \ -+ MEMBAR_STORELOAD(); \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_DOMINANT); \ -+ MEMBAR_STORELOAD(); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ while (seq != (SL)->sl_seq) \ -+ { \ -+ /* NOTE: we MUST call elan3_usecspin here for kernel comms */\ -+ while ((SL)->sl_seq == (seq)-1) \ -+ elan3_usecspin (1); \ -+ seq = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+ } \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD(); \ -+} while (0) -+ -+#define ELAN3_ME_TRYENTER(SDRAM,SLE,SL,SEQ) do { \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLock), SL_MAIN_RECESSIVE); \ -+ MEMBAR_STORELOAD(); \ -+ SEQ = elan3_read32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_seq)); \ -+} while (0) -+ -+#define ELAN3_ME_CHECKENTER(SDRAM,SLE,SL,SEQ) do { \ -+ if ((SEQ) == ((SL)->sl_seq)) { \ -+ elan3_write32_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainGotLock), 1); \ -+ MEMBAR_LOADLOAD();\ -+ } \ -+ else ELAN3_ME_SPINENTER(SLE,SL); \ -+} while (0) -+ -+#define ELAN3_ME_SPINEXIT(SDRAM,SLE,SL) do {\ -+ MEMBAR_STORESTORE(); \ -+ elan3_write64_sdram(SDRAM, SLE + offsetof(ELAN3_SPINLOCK_ELAN, sl_mainLocks), 0); \ -+ MEMBAR_STORESTORE(); \ -+ } while (0) -+ -+ -+/* Main/Main */ -+#define ELAN3_SPINENTER(L) do {\ -+ while (c_swap ((L), 1)) ; \ -+ } while (0) -+#define ELAN3_SPINEXIT(L) do {\ -+ c_swap((L), 0);\ -+ } while (0) -+#endif /* _ELAN3_ */ -+ -+#endif /* _ELAN3_SPINLOCK_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/thread.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/thread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/thread.h 2005-07-28 14:52:52.954662624 -0400 -@@ -0,0 +1,137 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_THREAD_H -+#define _ELAN3_THREAD_H -+ -+#ident "$Id: thread.h,v 1.17 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/thread.h,v $*/ -+ -+/* Alignment for a stack frame */ -+#define E3_STACK_ALIGN (64) -+ -+typedef struct _E3_Frame { -+ E3_uint32 fr_local[8]; /* saved locals (not used) */ -+ E3_uint32 fr_arg[6]; /* saved arguements o0 -> o5 */ -+ E3_Addr fr_savefp; /* saved frame pointer o6 */ -+ E3_Addr fr_savepc; /* saved program counter o7 */ -+ E3_Addr fr_stret; /* stuct return addr */ -+ E3_uint32 fr_argd[6]; /* arg dump area */ -+ E3_uint32 fr_argx[1]; /* array of args past the sixth */ -+} E3_Frame; -+ -+typedef struct _E3_Stack { -+ E3_uint32 Locals[8]; -+ E3_uint32 Ins[8]; -+ E3_uint32 Globals[8]; -+ E3_uint32 Outs[8]; -+} E3_Stack; -+ -+typedef struct _E3_OutsRegs { -+ E3_uint32 o[8]; /* o6 == pc, o7 == fptr */ -+} E3_OutsRegs; -+ -+/* -+ * "Magic" value for stack pointer to be ignored. -+ */ -+#define VanishingStackPointer 0x42 -+ -+ -+/* -+ * When the Elan traps the N & Z CC bits are held in the NPC -+ * and the V & C bits are in the PC -+ */ -+#define PSR_C_BIT (1) -+#define PSR_V_BIT (2) -+#define PSR_Z_BIT (1) -+#define PSR_N_BIT (2) -+#define CC_MASK (3) -+#define PC_MASK (~3) -+#define SP_MASK (~3) -+ -+/* -+ * Threads processor Opcodes. -+ */ -+#define OPCODE_MASK (0xC1F80000) -+#define OPCODE_IMM (1 << 13) -+ -+#define OPCODE_CLASS(instr) ((instr) & 0xC0000000) -+#define OPCODE_CLASS_0 0x00000000 -+#define OPCODE_CLASS_1 0x40000000 -+#define OPCODE_CLASS_2 0x80000000 -+#define OPCODE_CLASS_3 0xC0000000 -+ -+#define OPCODE_CPOP 0x81B00000 -+#define OPCODE_Ticc 0x81D00000 -+ -+#define OPCODE_FCODE_SHIFT 19 -+#define OPCODE_FCODE_MASK 0x1f -+#define OPCODE_NOT_ALUOP 0x01000000 -+ -+#define OPCODE_SLL 0x81280000 -+#define OPCODE_SRL 0x81300000 -+#define OPCODE_SRA 0x81380000 -+ -+#define OPCODE_OPEN 0x81600000 -+#define OPCODE_CLOSE 0x81680000 -+#define OPCODE_BREAKTEST 0x81700000 -+ -+#define OPCODE_BREAK 0x81a00000 -+#define OPCODE_SUSPEND 0x81a80000 -+#define OPCODE_WAIT 0x81b00000 -+ -+#define OPCODE_JMPL 0x81c00000 -+ -+#define OPCODE_LD 0xC0000000 -+#define OPCODE_LDD 0xC0180000 -+ -+#define OPCODE_LDBLOCK16 0xC0900000 -+#define OPCODE_LDBLOCK32 0xC0800000 -+#define OPCODE_LDBLOCK64 0xC0980000 -+ -+#define OPCODE_ST 0xC0200000 -+#define OPCODE_STD 0xC0380000 -+ -+#define OPCODE_SWAP 0xC0780000 -+ -+#define OPCODE_STBLOCK16 0xC0b00000 -+#define OPCODE_STBLOCK32 0xC0a00000 -+#define OPCODE_STBLOCK64 0xC0b80000 -+ -+#define OPCODE_CLASS0_MASK 0xC1C00000 -+#define OPCODE_SETHI 0x01000000 -+#define OPCODE_BICC 0x00800000 -+#define OPCODE_SENDREG 0x01800000 -+#define OPCODE_SENDMEM 0x01c00000 -+ -+#define OPCODE_BICC_BN 0x00000000 -+#define OPCODE_BICC_BE 0x02000000 -+#define OPCODE_BICC_BLE 0x04000000 -+#define OPCODE_BICC_BL 0x06000000 -+#define OPCODE_BICC_BLEU 0x08000000 -+#define OPCODE_BICC_BCS 0x0A000000 -+#define OPCODE_BICC_BNEG 0x0C000000 -+#define OPCODE_BICC_BVS 0x0E000000 -+ -+#define OPCODE_BICC_MASK 0x0E000000 -+#define OPCODE_BICC_ANNUL 0x20000000 -+ -+#define INSTR_RS2(instr) (((instr) >> 0) & 0x1F) -+#define INSTR_RS1(instr) (((instr) >> 14) & 0x1F) -+#define INSTR_RD(instr) (((instr) >> 25) & 0x1F) -+#define INSTR_IMM(instr) (((instr) & 0x1000) ? ((instr) & 0xFFF) | 0xFFFFF000 : (instr) & 0xFFF) -+ -+#define Ticc_COND(instr) INSTR_RD(instr) -+#define Ticc_TA 8 -+ -+#endif /* _ELAN3_THREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/threadlinkage.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/threadlinkage.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/threadlinkage.h 2005-07-28 14:52:52.955662472 -0400 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_THREADLINKAGE_H -+#define __ELAN3_THREADLINKAGE_H -+ -+#ident "$Id: threadlinkage.h,v 1.6 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/threadlinkage.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if defined(_ASM) || defined(__LANGUAGE_ASSEMBLY__) -+ -+/* -+ * Macro to define weak symbol aliases. These are similar to the ANSI-C -+ * #pragma weak name = _name -+ * except a compiler can determine type. The assembler must be told. Hence, -+ * the second parameter must be the type of the symbol (i.e.: function,...) -+ */ -+#define ANSI_PRAGMA_WEAK(sym, stype) \ -+ .weak sym; \ -+ .type sym, #stype; \ -+/* CSTYLED */ \ -+sym = _/**/sym -+ -+/* -+ * ENTRY provides the standard procedure entry code -+ */ -+#define ENTRY(x) \ -+ .section ".text"; \ -+ .align 4; \ -+ .global x; \ -+x: -+ -+/* -+ * ENTRY2 is identical to ENTRY but provides two labels for the entry point. -+ */ -+#define ENTRY2(x, y) \ -+ .section ".text"; \ -+ .align 4; \ -+ .global x, y; \ -+/* CSTYLED */ \ -+x: ; \ -+y: -+ -+ -+/* -+ * ALTENTRY provides for additional entry points. -+ */ -+#define ALTENTRY(x) \ -+ .global x; \ -+x: -+ -+/* -+ * DGDEF and DGDEF2 provide global data declarations. -+ * -+ * DGDEF provides a word aligned word of storage. -+ * -+ * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This -+ * implies this macro is best used for byte arrays. -+ * -+ * DGDEF3 allocates "sz" bytes of storage with "algn" alignment. -+ */ -+#define DGDEF2(name, sz) \ -+ .section ".data"; \ -+ .global name; \ -+ .size name, sz; \ -+name: -+ -+#define DGDEF3(name, sz, algn) \ -+ .section ".data"; \ -+ .align algn; \ -+ .global name; \ -+ .size name, sz; \ -+name: -+ -+#define DGDEF(name) DGDEF3(name, 4, 4) -+ -+/* -+ * SET_SIZE trails a function and set the size for the ELF symbol table. -+ */ -+#define SET_SIZE(x) \ -+ .size x, (.-x) -+ -+#endif /* _ASM || __LANGUAGE_ASSEMBLY__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN3_THREADLINKAGE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/threadsyscall.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/threadsyscall.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/threadsyscall.h 2005-07-28 14:52:52.955662472 -0400 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN3_SYSCALL_H -+#define __ELAN3_SYSCALL_H -+ -+#ident "$Id: threadsyscall.h,v 1.12 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/threadsyscall.h,v $*/ -+ -+/* -+ * This file contains the system calls supported from the Elan. -+ */ -+#define ELAN3_DEBUG_TRAPNUM 5 /* thread debugging trap */ -+#define ELAN3_ABORT_TRAPNUM 6 /* bad abort trap */ -+#define ELAN3_ELANCALL_TRAPNUM 7 /* elansyscall trap */ -+#define ELAN3_SYSCALL_TRAPNUM 8 /* new syscall trap */ -+ -+#define ELAN3_T_SYSCALL_CODE 0 /* offsets in struct elan3_t_syscall */ -+#define ELAN3_T_SYSCALL_ERRNO 4 -+ -+#define ELAN3_SYS_open 1 -+#define ELAN3_SYS_close 2 -+#define ELAN3_SYS_write 3 -+#define ELAN3_SYS_read 4 -+#define ELAN3_SYS_poll 5 -+#define ELAN3_SYS_ioctl 6 -+#define ELAN3_SYS_lseek 7 -+#define ELAN3_SYS_mmap 8 -+#define ELAN3_SYS_munmap 9 -+#define ELAN3_SYS_kill 10 -+#define ELAN3_SYS_getpid 11 -+ -+#if !defined(SYS_getpid) && defined(__NR_getxpid) -+#define SYS_getpid __NR_getxpid /* for linux */ -+#endif -+ -+#if !defined(_ASM) && !defined(__LANGUAGE_ASSEMBLY__) -+ -+extern int elan3_t_open (const char *, int, ...); -+extern ssize_t elan3_t_write (int, const void *, unsigned); -+extern ssize_t elan3_t_read(int, void *, unsigned); -+extern int elan3_t_ioctl(int, int, ...); -+extern int elan3_t_close(int); -+extern off_t elan3_t_lseek(int filedes, off_t offset, int whence); -+ -+extern caddr_t elan3_t_mmap(caddr_t, size_t, int, int, int, off_t); -+extern int elan3_t_munmap(caddr_t, size_t); -+ -+extern int elan3_t_getpid(void); -+extern void elan3_t_abort(char *str); -+ -+#endif /* !_ASM && ! __LANGUAGE_ASSEMBLY__ */ -+ -+#endif /* __ELAN3_SYSCALL_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/trtype.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/trtype.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/trtype.h 2005-07-28 14:52:52.955662472 -0400 -@@ -0,0 +1,116 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN3_TRTYPE_H -+#define _ELAN3_TRTYPE_H -+ -+#ident "$Id: trtype.h,v 1.13 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/trtype.h,v $ */ -+ -+/*<15> ackNow */ -+#define TR_SENDACK (1 << 15) -+ -+#define TR_SIZE_SHIFT 12 -+#define TR_SIZE_MASK 7 -+ -+/*<14:12> Size 0, 1, 2, 4, 8, 16, 32, 64 Double Words -+ Bit 14 is forced to zero currently so that only size 0, 1, 2, 4 are -+ allowed */ -+ -+#define TR_SIZE0 (0 << TR_SIZE_SHIFT) -+#define TR_SIZE1 (1 << TR_SIZE_SHIFT) -+#define TR_SIZE2 (2 << TR_SIZE_SHIFT) -+#define TR_SIZE4 (3 << TR_SIZE_SHIFT) -+#define TR_SIZE8 (4 << TR_SIZE_SHIFT) -+ -+#define TR_64_BIT_ADDR (1 << 11) -+#define TR_LAST_TRANS (1 << 10) -+ -+#define TR_WRITEBLOCK_BIT (1 << 9) -+#define TR_WRITEBLOCK (TR_WRITEBLOCK_BIT | TR_SIZE8) -+ -+ -+#define TR_WRITEBLOCK_SIZE 64 -+ -+/* -+ * write-block -+ */ -+/* WriteBlock <8:7> Data type -+ <6:0> Part write size */ -+#define TR_TYPE_SHIFT 7 -+#define TR_TYPE_MASK ((1 << 2) - 1) -+ -+#define TR_TYPE_BYTE 0 -+#define TR_TYPE_SHORT 1 -+#define TR_TYPE_WORD 2 -+#define TR_TYPE_DWORD 3 -+ -+#define TR_PARTSIZE_MASK ((1 << 7) -1) -+ -+#define TR_WAIT_FOR_EOP (1 << 8) -+ -+/* -+ * trace-route format -+ */ -+#define TR_TRACEROUTE0_CHANID(val) ((val) & 1) /* 0 Chan Id */ -+#define TR_TRACEROUTE0_LINKID(val) (((val) >> 1) & 7) /* 1:3 Link Id */ -+#define TR_TRACEROUTE0_REVID(val) (((val) >> 4) & 7) /* 4:6 Revision ID */ -+#define TR_TRACEROUTE0_BCAST_TOP_PIN(val) (((val) >> 7) & 1) /* 7 Broadcast Top Pin (REV B) */ -+#define TR_TRACEROUTE0_LNR(val) ((val) >> 8) /* 8:15 Global Link Not Ready */ -+ -+#define TR_TRACEROUTE1_PRIO(val) ((val & 0xF)) /* 0:3 Arrival Priority (REV A) */ -+#define TR_TRACEROUTE1_AGE(val) (((val) >> 4) & 0xF) /* 4:7 Priority Held(Age) (REV A) */ -+#define TR_TRACEROUTE1_ROUTE_SELECTED(val) ((val) & 0xFF) /* 0:7 Arrival age (REV B) */ -+#define TR_TRACEROUTE1_BCAST_TOP(val) (((val) >> 8) & 7) /* 8:10 Broadcast Top */ -+#define TR_TRACEROUTE1_ADAPT(val) (((val) >> 12) & 3) /* 12:13 This Adaptive Value (REV A) */ -+#define TR_TRACEROUTE1_BCAST_BOT(val) (((val) >> 12) & 7) /* 12:14 Broadcast Bottom (REV B) */ -+ -+#define TR_TRACEROUTE2_ARRIVAL_AGE(val) ((val) & 0xF) /* 0:3 Arrival Age (REV B) */ -+#define TR_TRACEROUTE2_CURR_AGE(val) (((val) >> 4) & 0xF) /* 4:7 Current Age (REV B) */ -+#define TR_TRACEROUTE2_BUSY(val) (((val) >> 8) & 0xFF) /* 8:15 Busy (REV B) */ -+ -+#define TR_TRACEROUTE_SIZE 32 -+#define TR_TRACEROUTE_ENTRIES (TR_TRACEROUTE_SIZE/2) -+ -+/* -+ * non-write block -+ */ -+#define TR_OPCODE_MASK (((1 << 8) - 1) | \ -+ (TR_SIZE_MASK << TR_SIZE_SHIFT) | \ -+ TR_WRITEBLOCK_BIT) -+ -+#define TR_NOP_TRANS (0x0 | TR_SIZE0) -+#define TR_SETEVENT (0x0 | TR_SIZE0 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_REMOTEDMA (0x1 | TR_SIZE4 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_LOCKQUEUE (0x2 | TR_SIZE0) -+#define TR_UNLOCKQUEUE (0x3 | TR_SIZE0 | TR_SENDACK | TR_LAST_TRANS) -+ -+#define TR_SENDDISCARD (0x4 | TR_SIZE0) -+#define TR_TRACEROUTE (0x5 | TR_SIZE4) -+ -+#define TR_DMAIDENTIFY (0x6 | TR_SIZE0) -+#define TR_THREADIDENTIFY (0x7 | TR_SIZE1) -+ -+#define TR_GTE (0x8 | TR_SIZE1) -+#define TR_LT (0x9 | TR_SIZE1) -+#define TR_EQ (0xA | TR_SIZE1) -+#define TR_NEQ (0xB | TR_SIZE1) -+ -+#define TR_WRITEWORD (0xC | TR_SIZE1) -+#define TR_WRITEDOUBLEWORD (0xD | TR_SIZE1) -+#define TR_TESTANDWRITE (0xE | TR_SIZE1) -+#define TR_ATOMICADDWORD (0xF | TR_SIZE1 | TR_SENDACK | TR_LAST_TRANS) -+#define TR_OPCODE_TYPE_MASK 0xff -+ -+ -+#endif /* notdef _ELAN3_TRTYPE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/urom_addrs.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/urom_addrs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/urom_addrs.h 2005-07-28 14:52:52.956662320 -0400 -@@ -0,0 +1,262 @@ -+#define MI_WaitForRemoteDescRead 0x0 -+#define MI_WaitForRemoteDescRead2 0x1 -+#define MI_WaitForRemoteDescRead2_seq1 0x2 -+#define MI_SendRemoteDmaRoutes 0x3 -+#define MI_IProcTrapped 0x4 -+#define MI_DProcTrapped 0x5 -+#define MI_CProcTrapped 0x6 -+#define MI_TProcTrapped 0x7 -+#define MI_TestWhichDmaQueue 0x8 -+#define MI_TestWhichDmaQueue_seq1 0x9 -+#define MI_InputRemoteDmaUpdateBPtr 0xa -+#define MI_FixupQueueContextAndRemoteBit 0xb -+#define MI_FixupQueueContextAndRemoteBit_seq1 0xc -+#define MI_FixupQueueContextAndRemoteBit_seq2 0xd -+#define MI_FixupQueueContextAndRemoteBit_seq3 0xe -+#define MI_FixupQueueContextAndRemoteBit_seq4 0xf -+#define MI_RunDmaCommand 0x10 -+#define MI_DoSendRemoteDmaDesc 0x11 -+#define MI_DequeueNonSysCntxDma 0x12 -+#define MI_WaitForRemoteDescRead1 0x13 -+#define MI_RemoteDmaCommand 0x14 -+#define MI_WaitForRemoteRoutes 0x15 -+#define MI_DequeueSysCntxDma 0x16 -+#define MI_ExecuteDmaDescriptorForQueue 0x17 -+#define MI_ExecuteDmaDescriptor1 0x18 -+#define MI_ExecuteDmaDescriptor1_seq1 0x19 -+#define MI_ExecuteDmaDescriptor1_seq2 0x1a -+#define MI_ExecuteDmaDescriptor1_seq3 0x1b -+#define MI_GetNewSizeInProg 0x1c -+#define MI_GetNewSizeInProg_seq1 0x1d -+#define MI_FirstBlockRead 0x1e -+#define MI_ExtraFirstBlockRead 0x1f -+#define MI_UnimplementedError 0x20 -+#define MI_UpdateDescriptor 0x21 -+#define MI_UpdateDescriptor_seq1 0x22 -+#define MI_UpdateDescriptor_seq2 0x23 -+#define MI_UpdateDescriptor_seq3 0x24 -+#define MI_UpdateDescriptor_seq4 0x25 -+#define MI_UpdateDescriptor_seq5 0x26 -+#define MI_GetNextSizeInProg 0x27 -+#define MI_DoStopThisDma 0x28 -+#define MI_DoStopThisDma_seq1 0x29 -+#define MI_GenNewBytesToRead 0x2a -+#define MI_WaitForEventReadTy1 0x2b -+#define MI_WaitUpdateEvent 0x2c -+#define MI_WaitUpdateEvent_seq1 0x2d -+#define MI_DoSleepOneTickThenRunable 0x2e -+#define MI_RunEvent 0x2f -+#define MI_EnqueueThread 0x30 -+#define MI_CheckContext0 0x31 -+#define MI_EnqueueDma 0x32 -+#define MI_CprocTrapping 0x33 -+#define MI_CprocTrapping_seq1 0x34 -+#define MI_WaitForRemoteRoutes1 0x35 -+#define MI_SetEventCommand 0x36 -+#define MI_DoSetEvent 0x37 -+#define MI_DoRemoteSetEventNowOrTrapQueueingDma 0x38 -+#define MI_DoRemoteSetEventNowOrTrapQueueingDma_seq1 0x39 -+#define MI_SendRemoteDmaRoutes2 0x3a -+#define MI_WaitForRemoteRoutes2 0x3b -+#define MI_WaitEventCommandTy0 0x3c -+#define MI_DequeueNonSysCntxDma2 0x3d -+#define MI_WaitEventCommandTy1 0x3e -+#define MI_WaitEventCommandTy1_seq1 0x3f -+#define MI_DequeueNonSysCntxThread 0x40 -+#define MI_DequeueSysCntxDma1 0x41 -+#define MI_DequeueSysCntxThread 0x42 -+#define MI_TestNonSysCntxDmaQueueEmpty 0x43 -+#define MI_TestNonSysCntxDmaQueueEmpty_seq1 0x44 -+#define MI_TestNonSysCntxDmaQueueEmpty_seq2 0x45 -+#define MI_RunThreadCommand 0x46 -+#define MI_SetEventWaitForLastAcess 0x47 -+#define MI_SetEventReadWait 0x48 -+#define MI_SetEventReadWait_seq1 0x49 -+#define MI_TestEventType 0x4a -+#define MI_TestEventType_seq1 0x4b -+#define MI_TestEventBit2 0x4c -+#define MI_DmaDescOrBlockCopyOrChainedEvent 0x4d -+#define MI_RunThread 0x4e -+#define MI_RunThread1 0x4f -+#define MI_RunThread1_seq1 0x50 -+#define MI_IncDmaSysCntxBPtr 0x51 -+#define MI_IncDmaSysCntxBPtr_seq1 0x52 -+#define MI_IncDmaSysCntxBPtr_seq2 0x53 -+#define MI_WaitForCntxDmaDescRead 0x54 -+#define MI_FillInContext 0x55 -+#define MI_FillInContext_seq1 0x56 -+#define MI_WriteNewDescToQueue 0x57 -+#define MI_WriteNewDescToQueue_seq1 0x58 -+#define MI_TestForQueueWrap 0x59 -+#define MI_TestForQueueWrap_seq1 0x5a -+#define MI_TestQueueIsFull 0x5b -+#define MI_TestQueueIsFull_seq1 0x5c -+#define MI_TestQueueIsFull_seq2 0x5d -+#define MI_CheckPsychoShitFixup 0x5e -+#define MI_PsychoShitFixupForcedRead 0x5f -+#define MI_PrepareDMATimeSlice 0x60 -+#define MI_PrepareDMATimeSlice_seq1 0x61 -+#define MI_TProcRestartFromTrapOrTestEventBit2 0x62 -+#define MI_TProcRestartFromTrapOrTestEventBit2_seq1 0x63 -+#define MI_WaitForGlobalsRead 0x64 -+#define MI_WaitForNPCRead 0x65 -+#define MI_EventInterrupt 0x66 -+#define MI_EventInterrupt_seq1 0x67 -+#define MI_EventInterrupt_seq2 0x68 -+#define MI_EventInterrupt_seq3 0x69 -+#define MI_TestSysCntxDmaQueueEmpty 0x6a -+#define MI_TestSysCntxDmaQueueEmpty_seq1 0x6b -+#define MI_TestIfRemoteDesc 0x6c -+#define MI_DoDmaLocalSetEvent 0x6d -+#define MI_DoDmaLocalSetEvent_seq1 0x6e -+#define MI_DoDmaLocalSetEvent_seq2 0x6f -+#define MI_DmaLoop1 0x70 -+#define MI_ExitDmaLoop 0x71 -+#define MI_ExitDmaLoop_seq1 0x72 -+#define MI_RemoteDmaTestPAckType 0x73 -+#define MI_PacketDiscardOrTestFailRecIfCCis0 0x74 -+#define MI_PacketDiscardOrTestFailRecIfCCis0_seq1 0x75 -+#define MI_TestNackFailIsZero2 0x76 -+#define MI_TestNackFailIsZero3 0x77 -+#define MI_DmaFailCountError 0x78 -+#define MI_TestDmaForSysCntx 0x79 -+#define MI_TestDmaForSysCntx_seq1 0x7a -+#define MI_TestDmaForSysCntx_seq2 0x7b -+#define MI_TestAeqB2 0x7c -+#define MI_TestAeqB2_seq1 0x7d -+#define MI_GetNextDmaDescriptor 0x7e -+#define MI_DequeueSysCntxDma2 0x7f -+#define MI_InputSetEvent 0x80 -+#define MI_PutBackSysCntxDma 0x81 -+#define MI_PutBackSysCntxDma_seq1 0x82 -+#define MI_PutBackSysCntxDma_seq2 0x83 -+#define MI_InputRemoteDma 0x84 -+#define MI_InputRemoteDma_seq1 0x85 -+#define MI_WaitOneTickForWakeup1 0x86 -+#define MI_SendRemoteDmaDesc 0x87 -+#define MI_InputLockQueue 0x88 -+#define MI_CloseTheTrappedPacketIfCCis1 0x89 -+#define MI_CloseTheTrappedPacketIfCCis1_seq1 0x8a -+#define MI_PostDmaInterrupt 0x8b -+#define MI_InputUnLockQueue 0x8c -+#define MI_WaitForUnLockDescRead 0x8d -+#define MI_SendEOPforRemoteDma 0x8e -+#define MI_LookAtRemoteAck 0x8f -+#define MI_InputWriteBlockQueue 0x90 -+#define MI_WaitForSpStore 0x91 -+#define MI_TProcNext 0x92 -+#define MI_TProcStoppedRunning 0x93 -+#define MI_InputWriteBlock 0x94 -+#define MI_RunDmaOrDeqNonSysCntxDma 0x95 -+#define MI_ExecuteDmaDescriptorForRun 0x96 -+#define MI_ConfirmQueueLock 0x97 -+#define MI_DmaInputIdentify 0x98 -+#define MI_TProcStoppedRunning2 0x99 -+#define MI_TProcStoppedRunning2_seq1 0x9a -+#define MI_TProcStoppedRunning2_seq2 0x9b -+#define MI_ThreadInputIdentify 0x9c -+#define MI_InputIdWriteAddrAndType3 0x9d -+#define MI_IProcTrappedWriteStatus 0x9e -+#define MI_FinishTrappingEop 0x9f -+#define MI_InputTestTrans 0xa0 -+#define MI_TestAeqB3 0xa1 -+#define MI_ThreadUpdateNonSysCntxBack 0xa2 -+#define MI_ThreadQueueOverflow 0xa3 -+#define MI_RunContext0Thread 0xa4 -+#define MI_RunContext0Thread_seq1 0xa5 -+#define MI_RunContext0Thread_seq2 0xa6 -+#define MI_RunDmaDesc 0xa7 -+#define MI_RunDmaDesc_seq1 0xa8 -+#define MI_RunDmaDesc_seq2 0xa9 -+#define MI_TestAeqB 0xaa -+#define MI_WaitForNonCntxDmaDescRead 0xab -+#define MI_DmaQueueOverflow 0xac -+#define MI_BlockCopyEvent 0xad -+#define MI_BlockCopyEventReadBlock 0xae -+#define MI_BlockCopyWaitForReadData 0xaf -+#define MI_InputWriteWord 0xb0 -+#define MI_TraceSetEvents 0xb1 -+#define MI_TraceSetEvents_seq1 0xb2 -+#define MI_TraceSetEvents_seq2 0xb3 -+#define MI_InputWriteDoubleWd 0xb4 -+#define MI_SendLockTransIfCCis1 0xb5 -+#define MI_WaitForDmaRoutes1 0xb6 -+#define MI_LoadDmaContext 0xb7 -+#define MI_InputTestAndSetWord 0xb8 -+#define MI_InputTestAndSetWord_seq1 0xb9 -+#define MI_GetDestEventValue 0xba -+#define MI_SendDmaIdentify 0xbb -+#define MI_InputAtomicAddWord 0xbc -+#define MI_LoadBFromTransD0 0xbd -+#define MI_ConditionalWriteBackCCTrue 0xbe -+#define MI_WaitOneTickForWakeup 0xbf -+#define MI_SendFinalUnlockTrans 0xc0 -+#define MI_SendDmaEOP 0xc1 -+#define MI_GenLastAddrForPsycho 0xc2 -+#define MI_FailedAckIfCCis0 0xc3 -+#define MI_FailedAckIfCCis0_seq1 0xc4 -+#define MI_WriteDmaSysCntxDesc 0xc5 -+#define MI_TimesliceDmaQueueOverflow 0xc6 -+#define MI_DequeueNonSysCntxThread1 0xc7 -+#define MI_DequeueNonSysCntxThread1_seq1 0xc8 -+#define MI_TestThreadQueueEmpty 0xc9 -+#define MI_ClearThreadQueueIfCC 0xca -+#define MI_DequeueSysCntxThread1 0xcb -+#define MI_DequeueSysCntxThread1_seq1 0xcc -+#define MI_TProcStartUpGeneric 0xcd -+#define MI_WaitForPCload2 0xce -+#define MI_WaitForNPCWrite 0xcf -+#define MI_WaitForEventWaitAddr 0xd0 -+#define MI_WaitForWaitEventAccess 0xd1 -+#define MI_WaitForWaitEventAccess_seq1 0xd2 -+#define MI_WaitForWaitEventDesc 0xd3 -+#define MI_WaitForEventReadTy0 0xd4 -+#define MI_SendCondTestFail 0xd5 -+#define MI_InputMoveToNextTrans 0xd6 -+#define MI_ThreadUpdateSysCntxBack 0xd7 -+#define MI_FinishedSetEvent 0xd8 -+#define MI_EventIntUpdateBPtr 0xd9 -+#define MI_EventQueueOverflow 0xda -+#define MI_MaskLowerSource 0xdb -+#define MI_DmaLoop 0xdc -+#define MI_SendNullSetEvent 0xdd -+#define MI_SendFinalSetEvent 0xde -+#define MI_TestNackFailIsZero1 0xdf -+#define MI_DmaPacketTimedOutOrPacketError 0xe0 -+#define MI_NextPacketIsLast 0xe1 -+#define MI_TestForZeroLengthDma 0xe2 -+#define MI_WaitForPCload 0xe3 -+#define MI_ReadInIns 0xe4 -+#define MI_WaitForInsRead 0xe5 -+#define MI_WaitForLocals 0xe6 -+#define MI_WaitForOutsWrite 0xe7 -+#define MI_WaitForWaitEvWrBack 0xe8 -+#define MI_WaitForLockRead 0xe9 -+#define MI_TestQueueLock 0xea -+#define MI_InputIdWriteAddrAndType 0xeb -+#define MI_InputIdWriteAddrAndType2 0xec -+#define MI_ThreadInputIdentify2 0xed -+#define MI_WriteIntoTrapArea0 0xee -+#define MI_GenQueueBlockWrAddr 0xef -+#define MI_InputDiscardFreeLock 0xf0 -+#define MI_WriteIntoTrapArea1 0xf1 -+#define MI_WriteIntoTrapArea2 0xf2 -+#define MI_ResetBPtrToBase 0xf3 -+#define MI_InputDoTrap 0xf4 -+#define MI_RemoteDmaCntxt0Update 0xf5 -+#define MI_ClearQueueLock 0xf6 -+#define MI_IProcTrappedBlockWriteData 0xf7 -+#define MI_FillContextFilter 0xf8 -+#define MI_IProcTrapped4 0xf9 -+#define MI_RunSysCntxDma 0xfa -+#define MI_ChainedEventError 0xfb -+#define MI_InputTrappingEOP 0xfc -+#define MI_CheckForRunIfZero 0xfd -+#define MI_TestForBreakOrSuspend 0xfe -+#define MI_SwapForRunable 0xff -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/vmseg.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/vmseg.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/vmseg.h 2005-07-28 14:52:52.956662320 -0400 -@@ -0,0 +1,75 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _VM_SEG_ELAN3_H -+#define _VM_SEG_ELAN3_H -+ -+#ident "$Id: vmseg.h,v 1.20 2003/09/24 13:57:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/vmseg.h,v $*/ -+ -+#include -+ -+/* -+ * This segment maps Elan registers, it is fixed size and has 8K -+ * pages split up as follows -+ * -+ * ---------------------------------------- -+ * | Performance Counters (read-only) | -+ * ---------------------------------------- -+ * | Flag Page (read-only) | -+ * ---------------------------------------- -+ * | Command Port | -+ * ---------------------------------------- -+ */ -+typedef volatile struct elan3_flagstats -+{ -+ u_int CommandFlag; -+ u_int PageFaults; -+ u_int CProcTraps; -+ u_int DProcTraps; -+ u_int TProcTraps; -+ u_int IProcTraps; -+ u_int EopBadAcks; -+ u_int EopResets; -+ u_int DmaNetworkErrors; -+ u_int DmaIdentifyNetworkErrors; -+ u_int ThreadIdentifyNetworkErrors; -+ u_int DmaRetries; -+ u_int ThreadSystemCalls; -+ u_int ThreadElanCalls; -+ u_int LoadVirtualProcess; -+} ELAN3_FLAGSTATS; -+ -+#ifdef DIGITAL_UNIX -+typedef volatile union elan3_flagpage -+{ -+ u_char Padding[8192]; -+ ELAN3_FLAGSTATS Stats; -+} ELAN3_FLAGPAGE; -+ -+typedef volatile struct elan3_vmseg -+{ -+ E3_CommandPort CommandPort; -+ ELAN3_FLAGPAGE FlagPage; -+ E3_User_Regs UserRegs; -+} ELAN3_VMSEG; -+ -+#define SEGELAN3_SIZE (sizeof (ELAN3_VMSEG)) -+ -+#define SEGELAN3_COMMAND_PORT 0 -+#define SEGELAN3_FLAG_PAGE 1 -+#define SEGELAN3_PERF_COUNTERS 2 -+ -+#endif /* DIGITAL_UNIX */ -+ -+#endif /* _VM_SEG_ELAN3_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan3/vpd.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan3/vpd.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan3/vpd.h 2005-07-28 14:52:52.957662168 -0400 -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: vpd.h,v 1.5 2002/08/09 11:23:34 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan3mod/elan3/elan3/vpd.h,v $*/ -+ -+#ifndef __ELAN3_VPD_H -+#define __ELAN3_VPD_H -+ -+#define LARGE_RESOURCE_BIT 0x80 -+ -+#define SMALL_RESOURCE_COMPATIBLE_DEVICE_ID 0x3 -+#define SMALL_RESOURCE_VENDOR_DEFINED 0xE -+#define SMALL_RESOURCE_END_TAG 0xF -+ -+#define LARGE_RESOURCE_STRING 0x2 -+#define LARGE_RESOURCE_VENDOR_DEFINED 0x4 -+#define LARGE_RESOURCE_VITAL_PRODUCT_DATA 0x10 -+ -+#define VPD_PART_NUMBER "PN" -+#define VPD_FRU_PART_NUMBER "FN" -+#define VPD_EC_LEVEL "EC" -+#define VPD_MANUFACTURE_ID "MN" -+#define VPD_SERIAL_NUMBER "SN" -+ -+#define VPD_LOAD_ID "LI" -+#define VPD_ROM_LEVEL "RL" -+#define VPD_ALTERABLE_ROM_LEVEL "RM" -+#define VPD_NETWORK_ADDRESS "NA" -+#define VPD_DEVICE_DRIVER_LEVEL "DD" -+#define VPD_DIAGNOSTIC_LEVEL "DG" -+#define VPD_LOADABLE_MICROCODE_LEVEL "LL" -+#define VPD_VENDOR_ID "VI" -+#define VPD_FUNCTION_NUMBER "FU" -+#define VPD_SUBSYSTEM_VENDOR_ID "SI" -+ -+#endif /* __ELAN3_VPD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/elan4/commands.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/commands.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/commands.h 2005-07-28 14:52:52.957662168 -0400 -@@ -0,0 +1,247 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_COMMANDS_H -+#define __ELAN4_COMMANDS_H -+ -+#ident "$Id: commands.h,v 1.29 2004/06/16 15:45:02 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/commands.h,v $*/ -+ -+/* -+ * This header file describes the command format for the Elan 4 -+ * See CommandFormat.doc -+ */ -+ -+/* -+ * Number of channels in traced elanlib_trace.c -+ */ -+#define TRACE_MAX_CHANNELS 2 -+ -+/* -+ * Define encoding for the commands issued into the command queues -+ */ -+#define RUN_THREAD_CMD 0x00 -+#define OPEN_STEN_PKT_CMD 0x01 -+#define WRITE_DWORD_CMD 0x02 -+#define ADD_DWORD_CMD 0x03 -+#define COPY64_CMD 0x05 -+#define GUARD_CMD 0x06 -+#define SET_EVENT_CMD 0x07 -+#define SEND_TRANS_CMD 0x09 -+#define INTERRUPT_CMD 0x0d -+#define RUN_DMA_CMD 0x0e -+#define SET_EVENTN_CMD 0x0f -+#define NOP_CMD 0x17 -+#define MAKE_EXT_CLEAN_CMD 0x37 -+#define WAIT_EVENT_CMD 0x1f -+ -+/* -+ * Define the portion of the data word the user is NOT -+ * allowed to use. This varies with Commmand type -+ */ -+#define RUN_THREAD_CMD_MASK 0x03 -+#define OPEN_STEN_PKT_CMD_MASK 0x0f -+#define WRITE_DWORD_CMD_MASK 0x07 -+#define ADD_DWORD_CMD_MASK 0x07 -+#define COPY64_CMD_MASK 0x0f -+#define GUARD_CMD_MASK 0x0f -+#define SET_EVENT_CMD_MASK 0x1f -+#define SEND_TRANS_CMD_MASK 0x1f -+#define INTERRUPT_CMD_MASK 0x0f -+#define RUN_DMA_CMD_MASK 0x0f -+#define SET_EVENTN_CMD_MASK 0x1f -+#define NOP_CMD_MASK 0x3f -+#define MAKE_EXT_CLEAN_MASK 0x3f -+#define WAIT_EVENT_CMD_MASK 0x1f -+ -+#define COPY64_DATA_TYPE_SHIFT 0x4 -+#define COPY64_DTYPE_BYTE (0 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_SHORT (1 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_WORD (2 << COPY64_DATA_TYPE_SHIFT) -+#define COPY64_DTYPE_LONG (3 << COPY64_DATA_TYPE_SHIFT) -+ -+/* -+ * SET_EVENTN - word 1 has following form -+ * [63:5] Event Address -+ * [4:0] Part Set Value. -+ */ -+#define SET_EVENT_PART_SET_MASK 0x1f -+ -+/* OPEN_STEN_PKT_CMD -+ * [63:32] Vproc -+ * [31] Use Test -+ * [30:28] unused -+ * [27:21] Test Acceptable PAck code -+ * [20:16] Test Ack Channel Number -+ * [15:9] Acceptable PAck code -+ * [8:4] Ack Channel Number (1 bit on Elan4) -+ * [3:0] Command type -+ */ -+/* Acceptable PAck code */ -+#define PACK_OK (1 << 0) -+#define PACK_TESTFAIL (1 << 1) -+#define PACK_DISCARD (1 << 2) -+#define RESTART_COUNT_ZERO (1 << 3) -+#define PACK_ERROR (1 << 7) -+#define PACK_TIMEOUT (1 << 8) -+ -+/* -+ *#ifndef USE_DIRTY_COMMANDS -+ *#define USE_DIRTY_COMMANDS -+ *#endif -+ */ -+#ifdef USE_DIRTY_COMMANDS -+#define OPEN_PACKET_USED_MASK 0x00000000780f00e0ULL -+#define SEND_TRANS_USED_MASK 0xffffffff0000fff0ULL -+#define COPY64_WRITE_USED_MASK 0x000000000000000fULL -+#define MAIN_INT_USED_MASK 0x0000000000003ff0ULL -+#define GUARD_USED_MASK 0xfffffe007000fde0ULL -+#define DMA_TYPESIZE_USED_MASK 0x000000000000fff0ULL -+#define SETEVENTN_USED_MASK 0xffffffffffffffe0ULL -+#define NOP_USED_MASK 0xffffffffffffffc0ULL -+#define EXT_CLEAN_USED_MASK 0xffffffffffffffc0ULL -+#define WAIT_CNT_TYPE_USED_MASK 0x00000000fffff800ULL -+#else -+#define OPEN_PACKET_USED_MASK 0x0ULL -+#define SEND_TRANS_USED_MASK 0x0ULL -+#define COPY64_WRITE_USED_MASK 0x0ULL -+#define MAIN_INT_USED_MASK 0x0ULL -+#define GUARD_USED_MASK 0x0ULL -+#define DMA_TYPESIZE_USED_MASK 0x0ULL -+#define SETEVENTN_USED_MASK 0x0ULL -+#define NOP_USED_MASK 0x0ULL -+#define EXT_CLEAN_USED_MASK 0x0ULL -+#define WAIT_CNT_TYPE_USED_MASK 0x0ULL -+#endif -+ -+#define OPEN_PACKET(chan, code, vproc) \ -+ ((((chan) & 1) << 4) | (((code) & 0x7f) << 9) | ((E4_uint64)(vproc) << 32) | OPEN_STEN_PKT_CMD) -+ -+#define OPEN_PACKET_TEST(chan, code, vproc, tchan, tcode) \ -+ ((((chan) & 1) << 4) | (((code) & 0x7f) << 9) | ((E4_uint64)(vproc) << 32) | \ -+ (((tchan) & 1) << 16) | (((tcode) & 0x7f) << 21) | (((E4_uint64) 1) << 31) | OPEN_STEN_PKT_CMD) -+ -+/* -+ * GUARD_CMD -+ * [63:41] unused -+ * [40] Reset Restart Fail Count // only performed if the Guard executes the next command. -+ * [39:32] New Restart Fail Count value -+ * [31] Use Test -+ * [30:28] unused -+ * [27:21] Test Acceptable PAck code -+ * [20:16] Test Ack Channel Number -+ * [15:9] unused -+ * [8:4] Ack Channel Number -+ * [3:0] Command type -+ */ -+/* GUARD_CHANNEL(chan) -+ */ -+#define GUARD_ALL_CHANNELS ((1 << 9) | GUARD_CMD) -+#define GUARD_CHANNEL(chan) ((((chan) & 1) << 4) | GUARD_CMD) -+#define GUARD_TEST(chan,code) ((1ull << 31) | (((code) & 0x7f) << 21) | (((chan) & 1) << 16)) -+#define GUARD_RESET(count) ((1ull << 40) | ((((E4_uint64) count) & 0xff) << 32)) -+ -+#define GUARD_CHANNEL_TEST(chan,tchan,tcode) \ -+ ((((chan) & 1) << 4) | (((tchan) & 1) << 16) | (((tcode) & 0x7f) << 21) | \ -+ (((E4_uint64) 1) << 31) | GUARD_CMD) -+ -+/* -+ * SEND_TRANS_CMD -+ * [63:32] unused -+ * [31:16] transaction type -+ * [15:4] unused -+ * [3:0] Command type -+ */ -+#define SEND_TRANS(TransType) (((TransType) << 16) | SEND_TRANS_CMD) -+ -+/* -+ * Command port trace debug levels -+ */ -+#define TRACE_CMD_BUFFER 0x01 -+#define TRACE_CMD_TYPE 0x02 -+#define TRACE_CHANNEL_OPENS 0x04 -+#define TRACE_GUARDED_ATOMICS 0x08 -+#define TRACE_CMD_TIMEOUT 0x10 -+ -+/* -+ * Commands that should be preceeded by a GUARD_CMD. -+ */ -+#define IS_ATOMIC_CMD(cmd) \ -+ ((cmd) == RUN_THREAD_CMD || (cmd) == ADD_DWORD_CMD || (cmd) == INTERRUPT_CMD || \ -+ (cmd) == RUN_DMA_CMD || (cmd) == SET_EVENT_CMD || (cmd) == SET_EVENTN_CMD || \ -+ (cmd) == WAIT_EVENT_CMD) -+ -+#ifndef _ASM -+ -+/* -+ * These structures are used to build event copy command streams. They are intended to be included -+ * in a larger structure to form a self documenting command sequence that can be easily coped and manipulated. -+ */ -+ -+typedef struct e4_runthreadcmd -+{ -+ E4_Addr PC; -+ E4_uint64 r[6]; -+} E4_RunThreadCmd; -+ -+typedef E4_uint64 E4_OpenCmd; -+ -+typedef struct e4_writecmd -+{ -+ E4_Addr WriteAddr; -+ E4_uint64 WriteValue; -+} E4_WriteCmd; -+ -+typedef struct e4_addcmd -+{ -+ E4_Addr AddAddr; -+ E4_uint64 AddValue; -+} E4_AddCmd; -+ -+typedef struct e4_copycmd -+{ -+ E4_Addr SrcAddr; -+ E4_Addr DstAddr; -+} E4_CopyCmd; -+ -+typedef E4_uint64 E4_GaurdCmd; -+typedef E4_uint64 E4_SetEventCmd; -+ -+/* -+ * The data to this command must be declared as a vector after the use of this. -+ */ -+typedef struct e4_sendtranscmd -+{ -+ E4_Addr Type; -+ E4_Addr Addr; -+} E4_SendTransCmd; -+ -+typedef E4_uint64 E4_IntCmd; -+ -+/* The normal Dma struc can be used here. */ -+ -+typedef struct e4_seteventncmd -+{ -+ E4_Addr Event; -+ E4_Addr SetCount; -+} E4_SetEventNCmd; -+ -+typedef E4_uint64 E4_NopCmd; -+typedef E4_uint64 E4_MakeExtCleanCmd; -+ -+typedef struct e4_waitcmd -+{ -+ E4_Addr ev_Event; -+ E4_Addr ev_CountType; -+ E4_Addr ev_Params[2]; -+} E4_WaitCmd; -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_COMMANDS_H */ -+ -Index: linux-2.6.5-7.191/include/elan4/debug.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/debug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/debug.h 2005-07-28 14:52:52.958662016 -0400 -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_ELANDEBUG_H -+#define _ELAN4_ELANDEBUG_H -+ -+#ident "$Id: debug.h,v 1.19.6.1 2005/01/18 14:36:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/debug.h,v $ */ -+ -+/* values for "type" field - note a "ctxt" is permissible */ -+/* and BUFFER/CONSOLE are for explict calls to elan4_debugf() */ -+#define DBG_DEVICE ((void *) 0) -+#define DBG_USER ((void *) 1) -+ -+#define DBG_BUFFER ((void *) 62) -+#define DBG_CONSOLE ((void *) 63) -+#define DBG_NTYPES 64 -+ -+/* values for "mode" field */ -+#define DBG_CONFIG 0x00000001 -+#define DBG_INTR 0x00000002 -+#define DBG_MAININT 0x00000004 -+#define DBG_SDRAM 0x00000008 -+#define DBG_MMU 0x00000010 -+#define DBG_REGISTER 0x00000020 -+#define DBG_CQ 0x00000040 -+#define DBG_NETWORK_CTX 0x00000080 -+ -+#define DBG_FLUSH 0x00000100 -+#define DBG_FILE 0x00000200 -+#define DBG_CONTROL 0x00000400 -+#define DBG_MEM 0x00000800 -+ -+#define DBG_PERM 0x00001000 -+#define DBG_FAULT 0x00002000 -+#define DBG_SWAP 0x00004000 -+#define DBG_TRAP 0x00008000 -+#define DBG_DDCQ 0x00010000 -+#define DBG_VP 0x00020000 -+#define DBG_RESTART 0x00040000 -+#define DBG_RESUME 0x00080000 -+#define DBG_CPROC 0x00100000 -+#define DBG_DPROC 0x00200000 -+#define DBG_EPROC 0x00400000 -+#define DBG_IPROC 0x00800000 -+#define DBG_TPROC 0x01000000 -+#define DBG_IOPROC 0x02000000 -+#define DBG_ROUTE 0x04000000 -+#define DBG_NETERR 0x08000000 -+ -+#define DBG_ALL 0x7FFFFFFF -+ -+ -+#ifdef DEBUG_PRINTF -+ -+# define PRINTF0(type,m,fmt) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt) : (void)0) -+# define PRINTF1(type,m,fmt,a) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a) : (void)0) -+# define PRINTF2(type,m,fmt,a,b) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b) : (void)0) -+# define PRINTF3(type,m,fmt,a,b,c) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c) : (void)0) -+# define PRINTF4(type,m,fmt,a,b,c,d) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d) : (void)0) -+# define PRINTF5(type,m,fmt,a,b,c,d,e) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e) : (void)0) -+# define PRINTF6(type,m,fmt,a,b,c,d,e,f) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f) : (void)0) -+# define PRINTF7(type,m,fmt,a,b,c,d,e,f,g) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g) : (void)0) -+# define PRINTF8(type,m,fmt,a,b,c,d,e,f,g,h) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g,h) : (void)0) -+# define PRINTF9(type,m,fmt,a,b,c,d,e,f,g,h,i) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m,fmt,a,b,c,d,e,f,g,h,i): (void)0) -+#ifdef __GNUC__ -+# define PRINTF(type,m,args...) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? elan4_debugf(type,m, ##args) : (void)0) -+#endif -+# define DBGCMD(type,m,cmd) ((elan4_debug&(m) || (type) == DBG_CONSOLE) ? (void) (cmd) : (void) 0) -+ -+#else -+ -+# define PRINTF0(type,m,fmt) (0) -+# define PRINTF1(type,m,fmt,a) (0) -+# define PRINTF2(type,m,fmt,a,b) (0) -+# define PRINTF3(type,m,fmt,a,b,c) (0) -+# define PRINTF4(type,m,fmt,a,b,c,d) (0) -+# define PRINTF5(type,m,fmt,a,b,c,d,e) (0) -+# define PRINTF6(type,m,fmt,a,b,c,d,e,f) (0) -+# define PRINTF7(type,m,fmt,a,b,c,d,e,f,g) (0) -+# define PRINTF8(type,m,fmt,a,b,c,d,e,f,g,h) (0) -+# define PRINTF9(type,m,fmt,a,b,c,d,e,f,g,h,i) (0) -+#ifdef __GNUC__ -+# define PRINTF(type,m,args...) -+#endif -+# define DBGCMD(type,m,cmd) ((void) 0) -+ -+#endif /* DEBUG_PRINTF */ -+ -+extern unsigned elan4_debug; -+extern unsigned elan4_debug_toconsole; -+extern unsigned elan4_debug_tobuffer; -+extern unsigned elan4_debug_display_ctxt; -+extern unsigned elan4_debug_ignore_ctxt; -+extern unsigned elan4_debug_ignore_type; -+ -+extern void elan4_debug_init(void); -+extern void elan4_debug_fini(void); -+extern void elan4_debugf (void *type, int mode, char *fmt, ...); -+extern int elan4_debug_snapshot (caddr_t ubuffer, int len); -+extern int elan4_debug_display (void); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN4_ELANDEBUG_H */ -Index: linux-2.6.5-7.191/include/elan4/device.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/device.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/device.h 2005-07-28 14:52:52.960661712 -0400 -@@ -0,0 +1,811 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_ELANDEV_H -+#define __ELAN4_ELANDEV_H -+ -+#ident "$Id: device.h,v 1.68.2.12 2005/03/09 12:00:08 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device.h,v $ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_MPSAS -+#include -+#endif -+ -+#if defined(LINUX) -+#include -+#elif defined(TRU64UNIX) -+#include -+#elif defined(SOLARIS) -+#include -+#endif -+ -+/* -+ * Network context number allocation. -+ * [0] neterr fixup system context -+ * [1] kernel comms system context -+ * [2048-4095] kernel comms data contexts -+ */ -+#define ELAN4_NETERR_CONTEXT_NUM 0x00 /* network error fixup context number */ -+#define ELAN4_KCOMM_CONTEXT_NUM 0x01 /* kernel comms context number */ -+#define ELAN4_KCOMM_BASE_CONTEXT_NUM 0x800 /* kernel comms data transfer contexts */ -+#define ELAN4_KCOMM_TOP_CONTEXT_NUM 0xfff -+ -+#define ELAN4_SYSTEM_CONTEXT(ctx) ((ctx) >= ELAN4_KCOMM_BASE_CONTEXT_NUM) -+ -+typedef void (ELAN4_HALTFN)(struct elan4_dev *dev, void *arg); -+ -+typedef struct elan4_haltop -+{ -+ struct list_head op_link; /* chain on a list */ -+ E4_uint32 op_mask; /* Interrupt mask to see before calling function */ -+ -+ ELAN4_HALTFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+} ELAN4_HALTOP; -+ -+typedef void (ELAN4_DMA_FLUSHFN)(struct elan4_dev *dev, void *arg, int qfull); -+ -+typedef struct elan4_dma_flushop -+{ -+ struct list_head op_link; /* chain on a list */ -+ ELAN4_DMA_FLUSHFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+} ELAN4_DMA_FLUSHOP; -+ -+typedef void (ELAN4_INTFN)(struct elan4_dev *dev, void *arg); -+ -+typedef struct elan4_intop -+{ -+ struct list_head op_link; /* chain on a list */ -+ ELAN4_INTFN *op_function; /* function to call */ -+ void *op_arg; /* arguement to pass to function */ -+ E4_uint64 op_cookie; /* and main interrupt cookie */ -+} ELAN4_INTOP; -+ -+typedef struct elan4_eccerrs -+{ -+ E4_uint64 EccStatus; -+ E4_uint64 ConfigReg; -+ E4_uint32 ErrorCount; -+} ELAN4_ECCERRS; -+ -+#define SDRAM_MIN_BLOCK_SHIFT 10 -+#define SDRAM_NUM_FREE_LISTS 19 /* allows max 256 Mb block */ -+#define SDRAM_MIN_BLOCK_SIZE (1 << SDRAM_MIN_BLOCK_SHIFT) -+#define SDRAM_MAX_BLOCK_SIZE (SDRAM_MIN_BLOCK_SIZE << (SDRAM_NUM_FREE_LISTS-1)) -+ -+#if PAGE_SHIFT < 13 -+#define SDRAM_PAGE_SIZE 8192 -+#define SDRAM_PGOFF_OFFSET 1 -+#define SDRAM_PGOFF_MASK (~SDRAM_PGOFF_OFFSET) -+#else -+#define SDRAM_PAGE_SIZE PAGE_SIZE -+#define SDRAM_PGOFF_OFFSET 0 -+#define SDRAM_PGOFF_MASK (~SDRAM_PGOFF_OFFSET) -+#endif -+ -+typedef struct elan4_sdram -+{ -+ sdramaddr_t b_base; /* offset in sdram bar */ -+ unsigned b_size; /* size of bank */ -+ ioaddr_t b_ioaddr; /* ioaddr where mapped into the kernel */ -+ ELAN4_MAP_HANDLE b_handle; /* and mapping handle */ -+ bitmap_t *b_bitmaps[SDRAM_NUM_FREE_LISTS]; /* buddy allocator bitmaps */ -+} ELAN4_SDRAM_BANK; -+ -+/* command queue */ -+typedef struct elan4_cq -+{ -+ struct elan4_cqa *cq_cqa; /* command queue allocator this belongs to */ -+ unsigned cq_idx; /* and which command queue this is */ -+ -+ sdramaddr_t cq_space; /* sdram backing up command queue */ -+ unsigned cq_size; /* size value */ -+ unsigned cq_perm; /* permissions */ -+ ioaddr_t cq_mapping; /* mapping of command queue page */ -+ ELAN4_MAP_HANDLE cq_handle; /* and mapping handle */ -+} ELAN4_CQ; -+ -+/* cqtype flags to elan4_alloccq() */ -+#define CQ_Priority (1 << 0) -+#define CQ_Reorder (1 << 1) -+ -+/* command queues are allocated in chunks,so that all the -+ * command ports are in a single system page */ -+#define ELAN4_CQ_PER_CQA MAX(1, (PAGESIZE/CQ_CommandMappingSize)) -+ -+/* maximum number of command queues per context */ -+#define ELAN4_MAX_CQA (256 / ELAN4_CQ_PER_CQA) -+ -+typedef struct elan4_cqa -+{ -+ struct list_head cqa_link; /* linked together */ -+ bitmap_t cqa_bitmap[BT_BITOUL(ELAN4_CQ_PER_CQA)]; /* bitmap of which are free */ -+ unsigned int cqa_type; /* allocation type */ -+ unsigned int cqa_cqnum; /* base cq number */ -+ unsigned int cqa_ref; /* "mappings" to a queue */ -+ unsigned int cqa_idx; /* index number */ -+ ELAN4_CQ cqa_cq[ELAN4_CQ_PER_CQA]; /* command queue entries */ -+} ELAN4_CQA; -+ -+#define elan4_cq2num(cq) ((cq)->cq_cqa->cqa_cqnum + (cq)->cq_idx) -+#define elan4_cq2idx(cq) ((cq)->cq_cqa->cqa_idx * ELAN4_CQ_PER_CQA + (cq)->cq_idx) -+ -+typedef struct elan4_ctxt -+{ -+ struct elan4_dev *ctxt_dev; /* device we're associated with */ -+ struct list_head ctxt_link; /* chained on device */ -+ -+ struct elan4_trap_ops *ctxt_ops; /* client specific operations */ -+ -+ signed ctxt_num; /* local context number */ -+ -+ struct list_head ctxt_cqalist; /* link list of command queue allocators */ -+ bitmap_t ctxt_cqamap[BT_BITOUL(ELAN4_MAX_CQA)]; /* bitmap for allocating cqa_idx */ -+ -+ ELAN4_HASH_ENTRY **ctxt_mmuhash[2]; /* software hash tables */ -+ spinlock_t ctxt_mmulock; /* and spinlock. */ -+} ELAN4_CTXT; -+ -+typedef struct elan4_trap_ops -+{ -+ void (*op_eproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status); -+ void (*op_cproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned cqnum); -+ void (*op_dproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit); -+ void (*op_tproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status); -+ void (*op_iproc_trap) (ELAN4_CTXT *ctxt, E4_uint64 status, unsigned unit); -+ void (*op_interrupt) (ELAN4_CTXT *ctxt, E4_uint64 cookie); -+ void (*op_neterrmsg) (ELAN4_CTXT *ctxt, ELAN4_NETERR_MSG *msg); -+} ELAN4_TRAP_OPS; -+ -+typedef struct elan4_route_table -+{ -+ spinlock_t tbl_lock; -+ unsigned tbl_size; -+ sdramaddr_t tbl_entries; -+} ELAN4_ROUTE_TABLE; -+ -+#ifdef ELAN4_LARGE_PAGE_SUPPORT -+#define NUM_HASH_TABLES 2 -+#else -+#define NUM_HASH_TABLES 1 -+#endif -+ -+#define DEV_STASH_ROUTE_COUNT 20 -+ -+typedef struct elan4_route_ringbuf { -+ int start; -+ int end; -+ E4_VirtualProcessEntry routes[DEV_STASH_ROUTE_COUNT]; -+} ELAN4_ROUTE_RINGBUF; -+ -+#define elan4_ringbuf_init(ringbuf) memset(&ringbuf, 0, sizeof(ELAN4_ROUTE_RINGBUF)); -+ -+typedef struct elan4_dev -+{ -+ ELAN4_CTXT dev_ctxt; /* context for device operations */ -+ -+ ELAN4_DEV_OSDEP dev_osdep; /* OS specific entries */ -+ -+ int dev_instance; /* device number */ -+ ELAN_DEVINFO dev_devinfo; /* device information (revision etc */ -+ ELAN_POSITION dev_position; /* position connected to switch */ -+ ELAN_DEV_IDX dev_idx; /* device idx registered with elanmod */ -+ -+ kmutex_t dev_lock; /* lock for device state/references */ -+ unsigned dev_state; /* device state */ -+ unsigned dev_references; /* # references */ -+ -+ ioaddr_t dev_regs; /* Mapping of device registers */ -+ ELAN4_MAP_HANDLE dev_regs_handle; -+ ioaddr_t dev_rom; /* Mapping of rom */ -+ ELAN4_MAP_HANDLE dev_rom_handle; -+ ioaddr_t dev_i2c; /* Mapping of I2C registers */ -+ ELAN4_MAP_HANDLE dev_i2c_handle; -+ -+ E4_uint64 dev_sdram_cfg; /* SDRAM config value (from ROM) */ -+ E4_uint64 dev_sdram_initial_ecc_val; /* power on ECC register value */ -+ int dev_sdram_numbanks; /* # banks of sdram */ -+ ELAN4_SDRAM_BANK dev_sdram_banks[SDRAM_MAX_BANKS]; /* Mapping of sdram banks */ -+ spinlock_t dev_sdram_lock; /* spinlock for buddy allocator */ -+ sdramaddr_t dev_sdram_freelists[SDRAM_NUM_FREE_LISTS]; -+ unsigned dev_sdram_freecounts[SDRAM_NUM_FREE_LISTS]; -+ -+ sdramaddr_t dev_cacheflush_space; /* sdram reserved for cache flush operation */ -+ -+ sdramaddr_t dev_faultarea; /* fault areas for each unit */ -+ sdramaddr_t dev_inputtraparea; /* trap area for trapped transactions */ -+ sdramaddr_t dev_ctxtable; /* context table (E4_ContextControlBlock) */ -+ int dev_ctxtableshift; /* and size (in bits) */ -+ -+ E4_uint32 dev_syscontrol; /* copy of system control register */ -+ spinlock_t dev_syscontrol_lock; /* spinlock to sequentialise modifications */ -+ unsigned dev_direct_map_pci_writes; /* # counts for CONT_DIRECT_MAP_PCI_WRITES */ -+ -+ volatile E4_uint32 dev_intmask; /* copy of interrupt mask register */ -+ spinlock_t dev_intmask_lock; /* spinlock to sequentialise modifications */ -+ -+ /* i2c section */ -+ spinlock_t dev_i2c_lock; /* spinlock for i2c operations */ -+ unsigned int dev_i2c_led_disabled; /* count of reasons led auto update disabled */ -+ -+ /* mmu section */ -+ unsigned dev_pagesizeval[NUM_HASH_TABLES]; /* page size value */ -+ unsigned dev_pageshift[NUM_HASH_TABLES]; /* pageshift in bits. */ -+ unsigned dev_hashsize[NUM_HASH_TABLES]; /* # entries in mmu hash table */ -+ sdramaddr_t dev_hashtable[NUM_HASH_TABLES]; /* mmu hash table */ -+ ELAN4_HASH_ENTRY *dev_mmuhash[NUM_HASH_TABLES]; /* and software shadow */ -+ ELAN4_HASH_ENTRY **dev_mmufree[NUM_HASH_TABLES]; /* and partially free blocks */ -+ ELAN4_HASH_ENTRY *dev_mmufreelist; /* and free blocks */ -+ spinlock_t dev_mmulock; -+ E4_uint16 dev_topaddr[4]; /* top address values */ -+ unsigned char dev_topaddrvalid; -+ unsigned char dev_topaddrmode; -+ unsigned char dev_pteval; /* allow setting of relaxed order/dont snoop attributes */ -+ -+ unsigned dev_rsvd_hashmask[NUM_HASH_TABLES]; -+ unsigned dev_rsvd_hashval[NUM_HASH_TABLES]; -+ -+ /* run queues */ -+ sdramaddr_t dev_comqlowpri; /* CProc low & high pri run queues */ -+ sdramaddr_t dev_comqhighpri; -+ -+ sdramaddr_t dev_dmaqlowpri; /* DProc,TProc,Interrupt queues */ -+ sdramaddr_t dev_dmaqhighpri; -+ sdramaddr_t dev_threadqlowpri; -+ sdramaddr_t dev_threadqhighpri; -+ sdramaddr_t dev_interruptq; -+ -+ E4_uint32 dev_interruptq_nfptr; /* cache next main interrupt fptr */ -+ struct list_head dev_interruptq_list; /* list of operations to call when space in interruptq*/ -+ -+ /* command queue section */ -+ sdramaddr_t dev_cqaddr; /* SDRAM address of command queues */ -+ unsigned dev_cqoffset; /* offset for command queue alignment constraints */ -+ unsigned dev_cqcount; /* number of command queue descriptors */ -+ bitmap_t *dev_cqamap; /* bitmap for allocation */ -+ spinlock_t dev_cqlock; /* spinlock to protect bitmap */ -+ unsigned dev_cqreorder; /* offset for first re-ordering queue with mtrr */ -+ -+ /* halt operation section */ -+ struct list_head dev_haltop_list; /* list of operations to call when units halted */ -+ E4_uint32 dev_haltop_mask; /* mask of which ones to halt */ -+ E4_uint32 dev_haltop_active; /* mask of which haltops are executing */ -+ spinlock_t dev_haltop_lock; /* and their spinlock */ -+ -+ struct { -+ struct list_head list; /* list of halt operations for DMAs */ -+ ELAN4_CQ *cq; /* and command queue's */ -+ ELAN4_INTOP intop; /* and main interrupt op */ -+ E4_uint64 status; /* status register (when waiting for intop)*/ -+ } dev_dma_flushop[2]; -+ -+ unsigned dev_halt_all_count; /* count of reasons to halt all units */ -+ unsigned dev_halt_lowpri_count; /* count of reasons to halt lowpri queues */ -+ unsigned dev_halt_cproc_count; /* count of reasons to halt command processor */ -+ unsigned dev_halt_dproc_count; /* count of reasons to halt dma processor */ -+ unsigned dev_halt_tproc_count; /* count of reasons to halt thread processor */ -+ unsigned dev_discard_all_count; /* count of reasons to discard all packets */ -+ unsigned dev_discard_lowpri_count; /* count of reasons to discard non-system packets */ -+ unsigned dev_discard_highpri_count; /* count of reasons to discard system packets */ -+ -+ E4_uint32 dev_schedstatus; /* copy of schedule status register */ -+ -+ /* local context allocation section */ -+ spinlock_t dev_ctxlock; /* spinlock to protect bitmap */ -+ bitmap_t *dev_ctxmap; /* bitmap for local context allocation */ -+ -+ spinlock_t dev_ctxt_lock; /* spinlock to protect context list */ -+ struct list_head dev_ctxt_list; /* linked list of contexts */ -+ -+ /* locks to sequentialise interrupt handling */ -+ spinlock_t dev_trap_lock; /* spinlock while handling a trap */ -+ spinlock_t dev_requeue_lock; /* spinlock sequentialising cproc requeue */ -+ -+ /* error rate interrupt section */ -+ long dev_error_time; /* lbolt at start of sampling period */ -+ unsigned dev_errors_per_period; /* errors so far this sampling period */ -+ timer_fn_t dev_error_timeoutid; /* timeout to re-enable error interrupts */ -+ timer_fn_t dev_linkerr_timeoutid; /* timeout to clear link error led */ -+ -+ /* kernel threads */ -+ unsigned dev_stop_threads:1; /* kernel threads should exit */ -+ -+ /* main interrupt thread */ -+ kcondvar_t dev_mainint_wait; /* place for mainevent interrupt thread to sleep */ -+ spinlock_t dev_mainint_lock; /* and it's spinlock */ -+ unsigned dev_mainint_started:1; -+ unsigned dev_mainint_stopped:1; -+ -+ /* device context - this is used to flush insert cache/instruction cache/dmas & threads */ -+ ELAN4_CPROC_TRAP dev_cproc_trap; /* space to extract cproc trap into */ -+ -+ struct list_head dev_intop_list; /* list of main interrupt operations */ -+ spinlock_t dev_intop_lock; /* and spinlock */ -+ E4_uint64 dev_intop_cookie; /* and next cookie to use */ -+ -+ spinlock_t dev_flush_lock; /* spinlock for flushing */ -+ kcondvar_t dev_flush_wait; /* and place to sleep */ -+ -+ ELAN4_CQ *dev_flush_cq[COMMAND_INSERTER_CACHE_ENTRIES]; /* command queues to flush the insert cache */ -+ ELAN4_INTOP dev_flush_op[COMMAND_INSERTER_CACHE_ENTRIES]; /* and a main interrupt operation for each one */ -+ unsigned dev_flush_finished; /* flush command finished */ -+ -+ ELAN4_HALTOP dev_iflush_haltop; /* halt operation for icache flush */ -+ unsigned dev_iflush_queued:1; /* icache haltop queued */ -+ -+ ELAN4_ROUTE_TABLE *dev_routetable; /* virtual process table (for dma queue flush)*/ -+ sdramaddr_t dev_sdrampages[2]; /* pages of sdram to hold suspend code sequence */ -+ E4_Addr dev_tproc_suspend; /* st8suspend instruction */ -+ E4_Addr dev_tproc_space; /* and target memory */ -+ -+ sdramaddr_t dev_neterr_inputq; /* network error input queue descriptor & event */ -+ sdramaddr_t dev_neterr_slots; /* network error message slots */ -+ ELAN4_CQ *dev_neterr_msgcq; /* command queue for sending messages */ -+ ELAN4_CQ *dev_neterr_intcq; /* command queue for message received interrupt */ -+ ELAN4_INTOP dev_neterr_intop; /* and it's main interrupt operation */ -+ E4_uint64 dev_neterr_queued; /* # message queued in msgcq */ -+ spinlock_t dev_neterr_lock; /* and spinlock .... */ -+ -+ ELAN4_DEV_STATS dev_stats; /* device statistics */ -+ ELAN4_ECCERRS dev_sdramerrs[30]; /* last few sdram errors for procfs */ -+ -+ unsigned int *dev_ack_errors; /* Map of source of dproc ack errors */ -+ ELAN4_ROUTE_RINGBUF dev_ack_error_routes; -+ unsigned int *dev_dproc_timeout; /* Ditto dproc timeout errors */ -+ ELAN4_ROUTE_RINGBUF dev_dproc_timeout_routes; -+ unsigned int *dev_cproc_timeout; /* Ditto cproc timeout errors */ -+ ELAN4_ROUTE_RINGBUF dev_cproc_timeout_routes; -+ -+ unsigned dev_linkerr_signalled; /* linkerror signalled to switch controller */ -+ -+ struct list_head dev_hc_list; /* list of the allocated hash_chunks */ -+ -+ ELAN4_IPROC_TRAP dev_iproc_trap; /* space for iproc trap */ -+} ELAN4_DEV; -+ -+/* values for dev_state */ -+#define ELAN4_STATE_STOPPED (1 << 0) /* device initialised but not started */ -+#define ELAN4_STATE_STARTING (1 << 1) /* device in process of starting */ -+#define ELAN4_STATE_STARTED (1 << 2) /* device started */ -+#define ELAN4_STATE_STOPPING (1 << 3) /* device in process of stopping */ -+ -+extern __inline__ unsigned long long -+__elan4_readq (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+#if defined(__i386) -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t save, rval; -+ unsigned long flags; -+ -+ local_irq_save (flags); -+ asm volatile ("sfence\n" \ -+ "movq %%xmm0, %0\n" \ -+ "sfence\n" \ -+ "movq (%2), %%xmm0\n" \ -+ "sfence\n" \ -+ "movq %%xmm0, %1\n" -+ "sfence\n" -+ "movq %0, %%xmm0\n" -+ "sfence\n" -+ : "=m" (save), "=m" (rval) : "r" (addr) : "memory"); -+ -+ local_irq_restore(flags); -+ -+ return rval; -+ } -+#endif -+ return readq ((void *)addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readl (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t val = __elan4_readq (dev, ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xffffffff); -+ } -+ return readl ((void *)addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readw (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t val = __elan4_readq (dev, ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xffff); -+ } -+ return readw ((void *)addr); -+} -+ -+extern __inline__ unsigned int -+__elan4_readb (ELAN4_DEV *dev, ioaddr_t addr) -+{ -+ if (dev->dev_devinfo.dev_params.values[ELAN4_PARAM_DRIVER_FEATURES] & ELAN4_FEATURE_64BIT_READ) -+ { -+ uint64_t val = __elan4_readq (dev, ((unsigned long) addr & ~7)); -+ return ((val >> (((unsigned long) addr & 7) << 3)) & 0xff); -+ } -+ return readb ((void *)addr); -+} -+ -+/* macros for accessing dev->dev_regs.Tags. */ -+#define write_tag(dev,what,val) writeq (val, (void *) (dev->dev_regs + offsetof (E4_Registers, Tags.what))) -+#define read_tag(dev,what) __elan4_readq (dev, dev->dev_regs + offsetof (E4_Registers, Tags.what)) -+ -+/* macros for accessing dev->dev_regs.Regs. */ -+#define write_reg64(dev,what,val) writeq (val, (void *) (dev->dev_regs + offsetof (E4_Registers, Regs.what))) -+#define write_reg32(dev,what,val) writel (val, (void *) (dev->dev_regs + offsetof (E4_Registers, Regs.what))) -+#define read_reg64(dev,what) __elan4_readq (dev, dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+#define read_reg32(dev,what) __elan4_readl (dev, dev->dev_regs + offsetof (E4_Registers, Regs.what)) -+ -+/* macros for accessing dev->dev_regs.uRegs. */ -+#define write_ureg64(dev,what,val) writeq (val, (void *) (dev->dev_regs + offsetof (E4_Registers, uRegs.what))) -+#define write_ureg32(dev,what,val) writel (val, (void *) (dev->dev_regs + offsetof (E4_Registers, uRegs.what))) -+#define read_ureg64(dev,what) __elan4_readq (dev, dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+#define read_ureg32(dev,what) __elan4_readl (dev, dev->dev_regs + offsetof (E4_Registers, uRegs.what)) -+ -+/* macros for accessing dev->dev_i2c */ -+#define write_i2c(dev,what,val) writeb (val, (void *) (dev->dev_i2c + offsetof (E4_I2C, what))) -+#define read_i2c(dev,what) __elan4_readb (dev, dev->dev_i2c + offsetof (E4_I2C, what)) -+ -+/* macros for accessing dev->dev_rom */ -+#define read_ebus_rom(dev,off) __elan4_readb (dev, dev->dev_rom + off) -+ -+/* PIO flush operations - ensure writes to registers/sdram are ordered */ -+#ifdef CONFIG_IA64_SGI_SN2 -+#define pioflush_reg(dev) read_reg32(dev,InterruptReg) -+#define pioflush_sdram(dev) elan4_sdram_readl(dev, 0) -+#else -+#define pioflush_reg(dev) mb() -+#define pioflush_sdram(dev) mb() -+#endif -+ -+/* macros for manipulating the interrupt mask register */ -+#define SET_INT_MASK(dev,value) \ -+do { \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask = (value)); \ -+ pioflush_reg(dev);\ -+} while (0) -+ -+#define CHANGE_INT_MASK(dev, value) \ -+do { \ -+ if ((dev)->dev_intmask != (value)) \ -+ {\ -+ write_reg32 (dev, InterruptMask, (dev)->dev_intmask = (value));\ -+ pioflush_reg(dev);\ -+ }\ -+} while (0) -+ -+#define ENABLE_INT_MASK(dev,value) \ -+do { \ -+ unsigned long flags; \ -+ \ -+ spin_lock_irqsave (&(dev)->dev_intmask_lock, flags); \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask |= (value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_intmask_lock, flags); \ -+} while (0) -+ -+#define DISABLE_INT_MASK(dev,value) \ -+do { \ -+ unsigned long flags; \ -+ \ -+ spin_lock_irqsave (&(dev)->dev_intmask_lock, flags); \ -+ write_reg32(dev, InterruptMask, (dev)->dev_intmask &= ~(value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_intmask_lock, flags); \ -+} while (0) -+ -+#define SET_SYSCONTROL(dev,what,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ if ((dev)->what++ == 0) \ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol |= (value)); \ -+ pioflush_reg(dev);\ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define CLEAR_SYSCONTROL(dev,what,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ if (--(dev)->what == 0)\ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol &= ~(value)); \ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define PULSE_SYSCONTROL(dev,value) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol | (value)); \ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define CHANGE_SYSCONTROL(dev,add,sub) \ -+do { \ -+ unsigned long flags; \ -+\ -+ spin_lock_irqsave (&(dev)->dev_syscontrol_lock, flags); \ -+ dev->dev_syscontrol |= (add);\ -+ dev->dev_syscontrol &= ~(sub);\ -+ write_reg64 (dev, SysControlReg, (dev)->dev_syscontrol);\ -+ pioflush_reg (dev); \ -+ spin_unlock_irqrestore (&(dev)->dev_syscontrol_lock, flags); \ -+} while (0) -+ -+#define SET_SCHED_STATUS(dev, value)\ -+do {\ -+ write_reg32 (dev, SchedStatus.Status, (dev)->dev_schedstatus = (value));\ -+ pioflush_reg (dev);\ -+} while (0) -+ -+#define CHANGE_SCHED_STATUS(dev, value)\ -+do {\ -+ if ((dev)->dev_schedstatus != (value))\ -+ {\ -+ write_reg32 (dev, SchedStatus.Status, (dev)->dev_schedstatus = (value));\ -+ pioflush_reg (dev);\ -+ }\ -+} while (0) -+ -+#define PULSE_SCHED_RESTART(dev,value)\ -+do {\ -+ write_reg32 (dev, SchedStatus.Restart, value);\ -+ pioflush_reg (dev);\ -+} while (0) -+ -+/* device context elan address space */ -+#define DEVICE_TPROC_SUSPEND_ADDR (0x1000000000000000ull) -+#define DEVICE_TPROC_SPACE_ADDR (0x1000000000000000ull + SDRAM_PAGE_SIZE) -+#if defined(__LITTLE_ENDIAN__) -+# define DEVICE_TPROC_SUSPEND_INSTR 0xd3f040c0 /* st64suspend %r16, [%r1] */ -+#else -+# define DEVICE_TPROC_SUSPEND_INSTR 0xc040f0d3 /* st64suspend %r16, [%r1] */ -+#endif -+ -+#define DEVICE_NETERR_INPUTQ_ADDR (0x2000000000000000ull) -+#define DEVICE_NETERR_INTCQ_ADDR (0x2000000000000000ull + SDRAM_PAGE_SIZE) -+#define DEVICE_NETERR_SLOTS_ADDR (0x2000000000000000ull + SDRAM_PAGE_SIZE*2) -+ -+/* -+ * Interrupt operation cookie space -+ * [50:48] type -+ * [47:0] value -+ */ -+#define INTOP_PERSISTENT (0x1000000000000ull) -+#define INTOP_ONESHOT (0x2000000000000ull) -+#define INTOP_TYPE_MASK (0x3000000000000ull) -+#define INTOP_VALUE_MASK (0x0ffffffffffffull) -+ -+/* functions for accessing sdram - sdram.c */ -+extern unsigned char elan4_sdram_readb (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned short elan4_sdram_readw (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned int elan4_sdram_readl (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern unsigned long long elan4_sdram_readq (ELAN4_DEV *dev, sdramaddr_t ptr); -+extern void elan4_sdram_writeb (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned char val); -+extern void elan4_sdram_writew (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned short val); -+extern void elan4_sdram_writel (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned int val); -+extern void elan4_sdram_writeq (ELAN4_DEV *dev, sdramaddr_t ptr, unsigned long long val); -+ -+extern void elan4_sdram_zerob_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zerow_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zerol_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_zeroq_sdram (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+ -+extern void elan4_sdram_copyb_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyw_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyl_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyq_from_sdram (ELAN4_DEV *dev, sdramaddr_t from, void *to, int nbytes); -+extern void elan4_sdram_copyb_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyw_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyl_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+extern void elan4_sdram_copyq_to_sdram (ELAN4_DEV *dev, void *from, sdramaddr_t to, int nbytes); -+ -+/* device.c - configuration */ -+extern unsigned int elan4_hash_0_size_val; -+extern unsigned int elan4_hash_1_size_val; -+extern unsigned int elan4_ctxt_table_shift; -+extern unsigned int elan4_ln2_max_cqs; -+extern unsigned int elan4_dmaq_highpri_size; -+extern unsigned int elan4_threadq_highpri_size; -+extern unsigned int elan4_dmaq_lowpri_size; -+extern unsigned int elan4_threadq_lowpri_size; -+extern unsigned int elan4_interruptq_size; -+extern unsigned int elan4_mainint_punt_loops; -+extern unsigned int elan4_mainint_resched_ticks; -+extern unsigned int elan4_linkport_lock; -+extern unsigned int elan4_eccerr_recheck; -+ -+/* device.c */ -+extern void elan4_set_schedstatus (ELAN4_DEV *dev, E4_uint32 intreg); -+extern void elan4_queue_haltop (ELAN4_DEV *dev, ELAN4_HALTOP *op); -+extern void elan4_queue_intop (ELAN4_DEV *dev, ELAN4_CQ *cq, ELAN4_INTOP *op); -+extern void elan4_register_intop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+extern void elan4_deregister_intop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+extern void elan4_queue_dma_flushop (ELAN4_DEV *dev, ELAN4_DMA_FLUSHOP *op, int hipri); -+extern void elan4_queue_mainintop (ELAN4_DEV *dev, ELAN4_INTOP *op); -+ -+extern int elan4_1msi0 (ELAN4_DEV *dev); -+ -+extern int elan4_insertctxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt, ELAN4_TRAP_OPS *ops); -+extern void elan4_removectxt (ELAN4_DEV *dev, ELAN4_CTXT *ctxt); -+extern ELAN4_CTXT *elan4_localctxt (ELAN4_DEV *dev, unsigned num); -+extern ELAN4_CTXT *elan4_networkctxt (ELAN4_DEV *dev, unsigned num); -+ -+extern int elan4_attach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum); -+extern void elan4_detach_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum); -+extern void elan4_set_filter (ELAN4_CTXT *ctxt, unsigned int ctxnum, E4_uint32 state); -+extern void elan4_set_routetable (ELAN4_CTXT *ctxt, ELAN4_ROUTE_TABLE *tbl); -+ -+extern ELAN4_CQA * elan4_getcqa (ELAN4_CTXT *ctxt, unsigned int idx); -+extern void elan4_putcqa (ELAN4_CTXT *ctxt, unsigned int idx); -+extern ELAN4_CQ *elan4_alloccq (ELAN4_CTXT *ctxt, unsigned cqsize, unsigned cqperm, unsigned cqtype); -+extern void elan4_freecq (ELAN4_CTXT *ctxt, ELAN4_CQ *cq); -+extern void elan4_restartcq (ELAN4_DEV *dev, ELAN4_CQ *cq); -+extern void elan4_flushcq (ELAN4_DEV *dev, ELAN4_CQ *cq); -+extern void elan4_updatecq (ELAN4_DEV *dev, ELAN4_CQ *cq, unsigned perm, unsigned restart); -+ -+extern void elan4_flush_icache (ELAN4_CTXT *ctxt); -+extern void elan4_flush_icache_halted (ELAN4_CTXT *ctxt); -+ -+extern int elan4_initialise_device (ELAN4_DEV *dev); -+extern void elan4_finalise_device (ELAN4_DEV *dev); -+extern int elan4_start_device (ELAN4_DEV *dev); -+extern void elan4_stop_device (ELAN4_DEV *dev); -+ -+extern int elan4_compute_position (ELAN_POSITION *pos, unsigned nodeid, unsigned numnodes, unsigned aritiyval); -+extern int elan4_get_position (ELAN4_DEV *dev, ELAN_POSITION *pos); -+extern int elan4_set_position (ELAN4_DEV *dev, ELAN_POSITION *pos); -+extern void elan4_get_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short *mask); -+extern void elan4_set_params (ELAN4_DEV *dev, ELAN_PARAMS *params, unsigned short mask); -+ -+ -+extern int elan4_read_vpd(ELAN4_DEV *dev, unsigned char *tag, unsigned char *result) ; -+ -+ -+/* device_osdep.c */ -+extern unsigned int elan4_pll_cfg; -+extern int elan4_pll_div; -+extern int elan4_mod45disable; -+ -+extern int elan4_pciinit (ELAN4_DEV *dev); -+extern void elan4_pcifini (ELAN4_DEV *dev); -+extern void elan4_updatepll (ELAN4_DEV *dev, unsigned int val); -+extern void elan4_pcierror (ELAN4_DEV *dev); -+ -+extern ELAN4_DEV *elan4_reference_device (int instance, int state); -+extern void elan4_dereference_device (ELAN4_DEV *dev); -+ -+extern ioaddr_t elan4_map_device (ELAN4_DEV *dev, unsigned bar, unsigned off, unsigned size, ELAN4_MAP_HANDLE *handlep); -+extern void elan4_unmap_device (ELAN4_DEV *dev, ioaddr_t ptr, unsigned size, ELAN4_MAP_HANDLE *handlep); -+extern unsigned long elan4_resource_len (ELAN4_DEV *dev, unsigned bar); -+ -+extern void elan4_configure_writecombining (ELAN4_DEV *dev); -+extern void elan4_unconfigure_writecombining (ELAN4_DEV *dev); -+ -+/* i2c.c */ -+extern int i2c_disable_auto_led_update (ELAN4_DEV *dev); -+extern void i2c_enable_auto_led_update (ELAN4_DEV *dev); -+extern int i2c_write (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+extern int i2c_read (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+extern int i2c_writereg (ELAN4_DEV *dev, unsigned int addr, unsigned int reg, unsigned int count, unsigned char *data); -+extern int i2c_readreg (ELAN4_DEV *dev, unsigned int addr, unsigned int reg, unsigned int count, unsigned char *data); -+extern int i2c_read_rom (ELAN4_DEV *dev, unsigned int addr, unsigned int count, unsigned char *data); -+ -+#if defined(__linux__) -+/* procfs_Linux.c */ -+extern void elan4_procfs_device_init (ELAN4_DEV *dev); -+extern void elan4_procfs_device_fini (ELAN4_DEV *dev); -+extern void elan4_procfs_init(void); -+extern void elan4_procfs_fini(void); -+ -+extern struct proc_dir_entry *elan4_procfs_root; -+extern struct proc_dir_entry *elan4_config_root; -+#endif -+ -+/* sdram.c */ -+extern void elan4_sdram_init (ELAN4_DEV *dev); -+extern void elan4_sdram_fini (ELAN4_DEV *dev); -+extern void elan4_sdram_setup_delay_lines (ELAN4_DEV *dev, int factor); -+extern int elan4_sdram_init_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern void elan4_sdram_fini_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern void elan4_sdram_add_bank (ELAN4_DEV *dev, ELAN4_SDRAM_BANK *bank); -+extern sdramaddr_t elan4_sdram_alloc (ELAN4_DEV *dev, int nbytes); -+extern void elan4_sdram_free (ELAN4_DEV *dev, sdramaddr_t ptr, int nbytes); -+extern void elan4_sdram_flushcache (ELAN4_DEV *dev, sdramaddr_t base, int nbytes); -+extern char *elan4_sdramerr2str (ELAN4_DEV *dev, E4_uint64 status, E4_uint64 ConfigReg, char *str); -+ -+/* traps.c */ -+extern void elan4_display_eproc_trap (void *type, int mode, char *str, ELAN4_EPROC_TRAP *trap); -+extern void elan4_display_cproc_trap (void *type, int mode, char *str, ELAN4_CPROC_TRAP *trap); -+extern void elan4_display_dproc_trap (void *type, int mode, char *str, ELAN4_DPROC_TRAP *trap); -+extern void elan4_display_tproc_trap (void *type, int mode, char *str, ELAN4_TPROC_TRAP *trap); -+extern void elan4_display_iproc_trap (void *type, int mode, char *str, ELAN4_IPROC_TRAP *trap); -+ -+ -+extern void elan4_extract_eproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_EPROC_TRAP *trap, int iswaitevent); -+extern void elan4_extract_cproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_CPROC_TRAP *trap, unsigned cqnum); -+extern void elan4_extract_dproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_DPROC_TRAP *trap, unsigned unit); -+extern void elan4_extract_tproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_TPROC_TRAP *trap); -+extern void elan4_extract_iproc_trap (ELAN4_DEV *dev, E4_uint64 status, ELAN4_IPROC_TRAP *trap, unsigned unit); -+extern void elan4_ringbuf_store(ELAN4_ROUTE_RINGBUF *ringbuf, E4_VirtualProcessEntry *route, ELAN4_DEV *dev); -+extern int cproc_open_extract_vp (ELAN4_DEV *dev, ELAN4_CQ *cq, int chan); -+ -+extern void elan4_inspect_iproc_trap (ELAN4_IPROC_TRAP *trap); -+extern E4_uint64 elan4_trapped_open_command (ELAN4_DEV *dev, ELAN4_CQ *cq); -+ -+/* mmu.c */ -+extern void elan4mmu_flush_tlb (ELAN4_DEV *dev); -+extern ELAN4_HASH_ENTRY *elan4mmu_ptealloc (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, unsigned int *tagidxp); -+extern int elan4mmu_pteload (ELAN4_CTXT *ctxt, int tbl, E4_Addr vaddr, E4_uint64 pte); -+extern void elan4mmu_unload_range (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned long len); -+extern void elan4mmu_invalidate_ctxt (ELAN4_CTXT *ctxt); -+ -+extern ELAN4_HASH_CACHE *elan4mmu_reserve (ELAN4_CTXT *ctxt, int tbl, E4_Addr start, unsigned int npages, int cansleep); -+extern void elan4mmu_release (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc); -+extern void elan4mmu_set_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx, E4_uint64 newpte); -+extern E4_uint64 elan4mmu_get_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx); -+extern void elan4mmu_clear_pte (ELAN4_CTXT *ctxt, ELAN4_HASH_CACHE *hc, unsigned int idx); -+ -+/* mmu_osdep.c */ -+extern int elan4mmu_categorise_paddr (ELAN4_DEV *dev, physaddr_t *physp); -+extern int elan4mmu_alloc_topaddr (ELAN4_DEV *dev, physaddr_t paddr, unsigned type); -+extern E4_uint64 elan4mmu_phys2pte (ELAN4_DEV *dev, physaddr_t paddr, unsigned perm); -+extern physaddr_t elan4mmu_pte2phys (ELAN4_DEV *dev, E4_uint64 pte); -+ -+/* neterr.c */ -+extern int elan4_neterr_init (ELAN4_DEV *dev); -+extern void elan4_neterr_destroy (ELAN4_DEV *dev); -+extern int elan4_neterr_sendmsg (ELAN4_DEV *dev, unsigned int nodeid, unsigned int retries, ELAN4_NETERR_MSG *msg); -+extern int elan4_neterr_iproc_trap (ELAN4_DEV *dev, ELAN4_IPROC_TRAP *trap); -+ -+/* routetable.c */ -+extern ELAN4_ROUTE_TABLE *elan4_alloc_routetable (ELAN4_DEV *dev, unsigned size); -+extern void elan4_free_routetable (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl); -+extern void elan4_write_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry); -+extern void elan4_read_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp, E4_VirtualProcessEntry *entry); -+extern void elan4_invalidate_route (ELAN4_DEV *dev, ELAN4_ROUTE_TABLE *tbl, unsigned vp); -+extern int elan4_generate_route (ELAN_POSITION *pos, E4_VirtualProcessEntry *route, unsigned ctxnum, -+ unsigned lowid, unsigned highid, unsigned options); -+extern int elan4_check_route (ELAN_POSITION *pos, ELAN_LOCATION location, E4_VirtualProcessEntry *route, unsigned flags); -+ -+/* user.c */ -+extern int __categorise_command (E4_uint64 command, int *cmdSize); -+extern int __whole_command (sdramaddr_t *commandPtr, sdramaddr_t insertPtr, unsigned int cqSize, unsigned int cmdSize); -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_ELANDEV_H */ -Index: linux-2.6.5-7.191/include/elan4/device_Linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/device_Linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/device_Linux.h 2005-07-28 14:52:52.960661712 -0400 -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_ELANDEV_LINUX_H -+#define __ELAN4_ELANDEV_LINUX_H -+ -+#ident "$Id: device_Linux.h,v 1.19.2.1 2005/03/07 16:29:06 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/device_Linux.h,v $*/ -+ -+#include -+ -+#if !defined(NO_COPROC) /* The older coproc kernel patch is applied */ -+#include -+ -+#define ioproc_ops coproc_ops_struct -+#define ioproc_register_ops register_coproc_ops -+#define ioproc_unregister_ops unregister_coproc_ops -+ -+#define IOPROC_MM_STRUCT_ARG 1 -+#define IOPROC_PATCH_APPLIED 1 -+ -+#elif !defined(NO_IOPROC) /* The new ioproc kernel patch is applied */ -+#include -+ -+#define IOPROC_PATCH_APPLIED 1 -+#endif -+ -+ -+#if defined(MPSAS) -+#include -+#endif -+ -+#if defined(CONFIG_DEVFS_FS) -+#include -+#endif -+ -+#define ELAN4_MAJOR 61 -+#define ELAN4_NAME "elan4" -+#define ELAN4_MAX_CONTROLLER 16 /* limited to 4 bits */ -+ -+/* OS dependant component of ELAN4_DEV struct */ -+typedef struct elan4_dev_osdep -+{ -+ struct pci_dev *pdev; /* PCI config data */ -+ -+ struct proc_dir_entry *procdir; -+ struct proc_dir_entry *configdir; -+ struct proc_dir_entry *statsdir; -+ -+#if defined(CONFIG_DEVFS_FS) -+ devfs_handle_t devfs_control; -+ devfs_handle_t devfs_sdram; -+ devfs_handle_t devfs_user; -+#endif -+ -+#if defined(CONFIG_MTRR) -+ int sdram_mtrr; -+ int regs_mtrr; -+#endif -+} ELAN4_DEV_OSDEP; -+ -+/* /dev/elan/rmsX */ -+ -+/* /dev/elan4/controlX */ -+typedef struct control_private -+{ -+ struct elan4_dev *pr_dev; -+ unsigned pr_boundary_scan; -+} CONTROL_PRIVATE; -+ -+/* /dev/elan4/sdramX */ -+typedef struct mem_page -+{ -+ struct mem_page *pg_next; -+ sdramaddr_t pg_addr; -+ unsigned long pg_pgoff; -+ unsigned pg_ref; -+} MEM_PAGE; -+ -+#define MEM_HASH_SIZE 32 -+#define MEM_HASH(pgoff) ((pgoff) & (MEM_HASH_SIZE-1)) -+ -+typedef struct mem_private -+{ -+ struct elan4_dev *pr_dev; -+ MEM_PAGE *pr_pages[MEM_HASH_SIZE]; -+ spinlock_t pr_lock; -+} MEM_PRIVATE; -+ -+/* /dev/elan4/userX */ -+typedef struct user_private -+{ -+ atomic_t pr_ref; -+ struct user_ctxt *pr_uctx; -+ struct mm_struct *pr_mm; -+ -+#if defined(IOPROC_PATCH_APPLIED) -+ struct ioproc_ops pr_ioproc; -+#endif -+} USER_PRIVATE; -+ -+/* No mapping handles on linux */ -+typedef void *ELAN4_MAP_HANDLE; -+ -+#define ELAN4_TASK_HANDLE() ((unsigned long) current->mm) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_ELANDEV_LINUX_H */ -Index: linux-2.6.5-7.191/include/elan4/dma.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/dma.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/dma.h 2005-07-28 14:52:52.960661712 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_DMA_H -+#define __ELAN4_DMA_H -+ -+#ident "$Id: dma.h,v 1.16 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/dma.h,v $*/ -+ -+#include -+ -+/* Alignment for a DMA descriptor */ -+#define E4_DMA_ALIGN (64) -+ -+/* Maximum size of a single DMA ((1 << 31)-1) */ -+#define E4_MAX_DMA_SIZE (0x7fffffff) -+ -+/* -+ * dma_typeSize -+ * -+ * [63:32] Size -+ * [31] unused -+ * [30] IsRemote -+ * [29] QueueWrite -+ * [28] ShmemWrite -+ * [27:26] DataType -+ * [25] Broadcast -+ * [24] AlignPackets -+ * [23:16] FailCount -+ * [15:14] unused -+ * [13:0] Context -+ */ -+ -+#define DMA_FailCount(val) (((val) & 0xff) << 16) -+#define DMA_AlignPackets (1 << 24) -+#define DMA_Broadcast (1 << 25) -+#define DMA_ShMemWrite (1 << 28) -+#define DMA_QueueWrite (1 << 29) -+#define DMA_IsRemote (1 << 30) -+#define DMA_Context(val) ((unsigned) (val) & 0x3ff) -+#define DMA_ContextMask 0x3fffull -+#define Dma_TypeSizeMask 0xfffffffffff00000ull -+ -+#define DMA_DataTypeByte (E4_DATATYPE_BYTE << 26) -+#define DMA_DataTypeShort (E4_DATATYPE_SHORT << 26) -+#define DMA_DataTypeWord (E4_DATATYPE_WORD << 26) -+#define DMA_DataTypeLong (E4_DATATYPE_DWORD << 26) -+ -+#define E4_DMA_TYPE_SIZE(size, dataType, flags, failCount) \ -+ ((((E4_uint64)(size)) << 32) | ((dataType) & DMA_DataTypeLong) | \ -+ (flags) | DMA_FailCount(failCount)) -+ -+typedef volatile struct e4_dma -+{ -+ E4_uint64 dma_typeSize; -+ E4_uint64 dma_cookie; -+ E4_uint64 dma_vproc; -+ E4_Addr dma_srcAddr; -+ E4_Addr dma_dstAddr; -+ E4_Addr dma_srcEvent; -+ E4_Addr dma_dstEvent; -+} E4_DMA; -+ -+/* Same as above but padded to 64-bytes */ -+typedef volatile struct e4_dma64 -+{ -+ E4_uint64 dma_typeSize; -+ E4_uint64 dma_cookie; -+ E4_uint64 dma_vproc; -+ E4_Addr dma_srcAddr; -+ E4_Addr dma_dstAddr; -+ E4_Addr dma_srcEvent; -+ E4_Addr dma_dstEvent; -+ E4_Addr dma_pad; -+} E4_DMA64; -+ -+#endif /* __ELAN4_DMA_H */ -Index: linux-2.6.5-7.191/include/elan4/events.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/events.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/events.h 2005-07-28 14:52:52.961661560 -0400 -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_EVENTS_H -+#define __ELAN4_EVENTS_H -+ -+#ident "$Id: events.h,v 1.22 2004/06/23 11:07:18 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/events.h,v $*/ -+ -+#define E4_EVENT_ALIGN 32 -+#define E4_EVENTBLOCK_SIZE 64 -+ -+#ifndef _ASM -+/* -+ * Event locations must be aligned to a 32 byte boundary. It is very much more efficent to place -+ * them in elan local memory but is not essential. -+ */ -+typedef struct _E4_Event -+{ -+ volatile E4_uint64 ev_CountAndType; -+ E4_uint64 ev_Params[2]; -+} E4_Event; -+ -+/* Same as above but padded to correct Event alignment */ -+typedef struct _E4_Event32 -+{ -+ volatile E4_uint64 ev_CountAndType; -+ E4_uint64 ev_Params[2]; -+ E4_uint64 ev_pad; -+} E4_Event32; -+ -+/* -+ * An E4_EVENTBLOCK_SIZE aligned block of Main or Elan memory -+ */ -+typedef union _E4_Event_Blk -+{ -+ /* Padded to 64-bytes in case a cache-line write is more efficient */ -+ volatile E4_uint8 eb_unit8[E4_EVENTBLOCK_SIZE]; -+ volatile E4_uint32 eb_uint32[E4_EVENTBLOCK_SIZE/sizeof(E4_uint32)]; -+ volatile E4_uint64 eb_uint64[E4_EVENTBLOCK_SIZE/sizeof(E4_uint64)]; -+} E4_Event_Blk; -+#define eb_done eb_uint32[14] -+#define eb_done_dword eb_uint64[7] -+ -+#endif /* ! _ASM */ -+ -+/* -+ * ev_CountAndType -+ * [63:31] Count -+ * [10] CopyType -+ * [9:8] DataType -+ * [7:0] CopySize -+ */ -+#define E4_EVENT_TYPE_MASK 0x00000000ffffffffull -+#define E4_EVENT_COUNT_MASK 0xffffffff00000000ull -+#define E4_EVENT_COUNT_SHIFT 32 -+#define E4_EVENT_COPY_TYPE_MASK (1 << 10) -+#define E4_EVENT_DATA_TYPE_MASK (3 << 8) -+#define E4_EVENT_COPY_SIZE_MASK (0xff) -+ -+/* CopyType */ -+#define E4_EVENT_COPY (0 << 10) -+#define E4_EVENT_WRITE (1 << 10) -+ -+/* DataType */ -+#define E4_EVENT_DTYPE_BYTE (0 << 8) -+#define E4_EVENT_DTYPE_SHORT (1 << 8) -+#define E4_EVENT_DTYPE_WORD (2 << 8) -+#define E4_EVENT_DTYPE_LONG (3 << 8) -+ -+#define EVENT_COUNT(EventPtr) ((E4_int32)(elan4_load64 (&(EventPtr)->ev_CountAndType) >> E4_EVENT_COUNT_SHIFT)) -+#define EVENT_TYPE(EventPtr) ((E4_uint32)(elan4_load64 (&(EventPtr)->ev_CountAndType) & E4_EVENT_TYPE_MASK)) -+ -+#define E4_WAITEVENT_COUNT_TYPE_VALUE(Count, EventType, DataType, CopySize) \ -+ (((E4_uint64)(Count) << E4_EVENT_COUNT_SHIFT) | (EventType) | (DataType) | (CopySize)) -+ -+#define E4_EVENT_TYPE_VALUE(EventType, DataType, CopySize) \ -+ ((EventType) | (DataType) | (CopySize)) -+ -+#define E4_EVENT_INIT_VALUE(InitialCount, EventType, DataType, CopySize) \ -+ (((E4_uint64)(InitialCount) << E4_EVENT_COUNT_SHIFT) | E4_EVENT_TYPE_VALUE(EventType, DataType, CopySize)) -+ -+#define ev_CopySource ev_Params[0] -+#define ev_CopyDest ev_Params[1] -+#define ev_WritePtr ev_Params[0] -+#define ev_WriteValue ev_Params[1] -+ -+#define EVENT_BLK_READY(BLK) ((BLK)->eb_done != 0) -+#define EVENT_READY(EVENT) ((E4_uint32)((((volatile E4_Event *) (EVENT))->ev_CountAndType) >> E4_EVENT_COUNT_SHIFT) >= 0) -+ -+#define ELAN_WAIT_EVENT (0) -+#define ELAN_POLL_EVENT (-1) -+ -+#define E4_BLK_PATTERN ((E4_uint32)0xfeedface) -+ -+#define E4_INIT_COPY_EVENT(EVENT, BLK_ELAN, BLK, SIZE) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(0, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, SIZE), &(EVENT)->ev_CountAndType); \ -+ elan4_store64 ((BLK_ELAN), &(EVENT)->ev_CopySource); \ -+ elan4_store64 ((BLK), &(EVENT)->ev_CopyDest); \ -+ } while (0) -+ -+#define E4_INIT_WRITE_EVENT(EVENT, DWORD) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(0, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), &(EVENT)->ev_CountAndType); \ -+ elan4_store64 ((DWORD), &(EVENT)->ev_WritePtr); \ -+ elan4_store64 ((E4_Addr) (E4_BLK_PATTERN), &(EVENT)->ev_WriteValue); \ -+ } while (0) -+ -+#define E4_RESET_BLK_EVENT(BLK) \ -+ do { \ -+ (BLK)->eb_done = (0); \ -+ } while (0) -+ -+#define E4_PRIME_BLK_EVENT(EVENT, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#define E4_PRIME_COPY_EVENT(EVENT, SIZE, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, (SIZE >> 3)), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#define E4_PRIME_WRITE_EVENT(EVENT, COUNT) \ -+ do { \ -+ elan4_store64 (E4_EVENT_INIT_VALUE(COUNT, E4_EVENT_WRITE, E4_EVENT_DTYPE_LONG, 0), &(EVENT)->ev_CountAndType);\ -+ } while (0) -+ -+#ifndef _ASM -+ -+#define E4_INPUTQ_ALIGN 32 /* Descriptor must be 32-byte aligned */ -+ -+typedef struct _E4_InputQueue -+{ -+ volatile E4_Addr q_bptr; /* 64 bit aligned ptr to current back item */ -+ E4_Addr q_fptr; /* 64 bit aligned ptr to current front item */ -+ E4_uint64 q_control; /* this defines the last item, item size, and offset back to the first item. */ -+ E4_Addr q_event; /* queue event */ -+} E4_InputQueue; -+ -+#define E4_INPUTQ_LASTITEM_MASK 0x00000000ffffffffULL -+#define E4_INPUTQ_ITEMSIZE_MASK 0x000000ff00000000ULL -+#define E4_INPUTQ_LASTITEM_OFFSET_MASK 0xffffff0000000000ULL -+#define E4_INPUTQ_LASTITEM_SHIFT 0 -+#define E4_INPUTQ_ITEMSIZE_SHIFT 32 -+#define E4_INPUTQ_LASTITEM_OFFSET_SHIFT 40 -+ -+/* -+ * Macro to initialise the InputQueue control word given the FirstItem, LastItem & ItemSize -+ * FirstItem and LastItem are 64 bit double word aligned elan addresses. -+ */ -+#define E4_InputQueueControl(FirstItem, LastItem, ItemSizeInBytes)\ -+ (((((E4_uint64)(LastItem))) & E4_INPUTQ_LASTITEM_MASK) |\ -+ ((((E4_uint64)(ItemSizeInBytes)) << (E4_INPUTQ_ITEMSIZE_SHIFT-3)) & E4_INPUTQ_ITEMSIZE_MASK) |\ -+ ((((E4_uint64)((FirstItem)-(LastItem))) << (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3)) & E4_INPUTQ_LASTITEM_OFFSET_MASK)) -+ -+/* -+ * LastItemOffset is a sign extended -ve quantity with LastItemOffset[26:3] == q_control[63:40] -+ * we sign extend this by setting LastItemOffset[63:27] to be #one. -+ */ -+#define E4_InputQueueLastItemOffset(control) ((((E4_int64) -1) << (64 - (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3))) | \ -+ ((E4_int64) (((control) & E4_INPUTQ_LASTITEM_OFFSET_MASK) >> (E4_INPUTQ_LASTITEM_OFFSET_SHIFT-3)))) -+#define E4_InputQueueItemSize(control) (((control) & E4_INPUTQ_ITEMSIZE_MASK) >> (E4_INPUTQ_ITEMSIZE_SHIFT-3)) -+ -+/* -+ * Macro to increment the InputQ front pointer taking into account wrap -+ */ -+#define E4_InputQueueFptrIncrement(Q, FirstItem, LastItem, ItemSizeInBytes) \ -+ ((Q)->q_fptr = ( ((Q)->q_fptr == (LastItem)) ? (FirstItem) : ((Q)->q_fptr + (ItemSizeInBytes))) ) -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_EVENTS_H */ -Index: linux-2.6.5-7.191/include/elan4/i2c.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/i2c.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/i2c.h 2005-07-28 14:52:52.961661560 -0400 -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_I2C_H -+#define _ELAN4_I2C_H -+ -+#ident "@(#)$Id: i2c.h,v 1.10 2003/12/02 16:11:22 lee Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/elan4hdr/i2c.h,v $*/ -+ -+/* I2C address space - bits[7:1] */ -+#define I2C_LED_I2C_ADDR 0x20 -+#define I2C_TEMP_ADDR 0x48 -+#define I2C_EEPROM_ADDR 0x50 -+ -+#define I2C_WRITE_ADDR(addr) ((addr) << 1 | 0) -+#define I2C_READ_ADDR(addr) ((addr) << 1 | 1) -+ -+/* I2C EEPROM appears as 8 I2C 256 byte devices */ -+#define I2C_24LC16B_BLOCKSIZE (256) -+#define I2C_24LC16B_BLOCKADDR(addr) ((addr) >> 8) -+#define I2C_24LC16B_BLOCKOFFSET(addr) ((addr) & 0xff) -+ -+#define I2C_ELAN_EEPROM_PCI_BASEADDR 0 /* PCI config starts at addr 0 in the EEPROM */ -+#define I2C_ELAN_EEPROM_VPD_BASEADDR 256 /* VPD data start */ -+#define I2C_ELAN_EEPROM_PCI_SIZE 256 /* PCI data max size */ -+#define I2C_ELAN_EEPROM_VPD_SIZE 256 /* VPD data max size */ -+ -+#define I2C_ELAN_EEPROM_SIZE 2048 -+ -+#define I2C_ELAN_EEPROM_DEVICE_ID 0xA0 -+#define I2C_ELAN_EEPROM_FAIL_LIMIT 8 -+ -+#define I2C_ELAN_EEPROM_ADDR_BLOCKSIZE_SHIFT 0x8 -+#define I2C_ELAN_EEPROM_ADDR_BLOCK_MASK 0x7 -+#define I2C_ELAN_EEPROM_ADDR_BLOCK_SHIFT 0x1 -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* _ELAN4_I2C_H */ -Index: linux-2.6.5-7.191/include/elan4/intcookie.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/intcookie.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/intcookie.h 2005-07-28 14:52:52.961661560 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: intcookie.h,v 1.10 2004/08/09 14:02:37 daniel Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/intcookie.h,v $*/ -+ -+#ifndef __ELAN4_INTCOOKIE_H -+#define __ELAN4_INTCOOKIE_H -+ -+typedef E4_uint64 ELAN4_INTCOOKIE; -+ -+#ifdef __KERNEL__ -+ -+typedef struct intcookie_entry -+{ -+ struct intcookie_entry *ent_next; -+ struct intcookie_entry *ent_prev; -+ -+ spinlock_t ent_lock; -+ unsigned ent_ref; -+ -+ ELAN4_INTCOOKIE ent_cookie; -+ ELAN4_INTCOOKIE ent_fired; -+ kcondvar_t ent_wait; -+} INTCOOKIE_ENTRY; -+ -+typedef struct intcookie_table -+{ -+ struct intcookie_table *tbl_next; -+ struct intcookie_table *tbl_prev; -+ -+ ELAN_CAPABILITY *tbl_cap; -+ -+ spinlock_t tbl_lock; -+ unsigned tbl_ref; -+ INTCOOKIE_ENTRY *tbl_entries; -+} INTCOOKIE_TABLE; -+ -+extern void intcookie_init(void); -+extern void intcookie_fini(void); -+extern INTCOOKIE_TABLE *intcookie_alloc_table (ELAN_CAPABILITY *cap); -+extern void intcookie_free_table (INTCOOKIE_TABLE *tbl); -+extern int intcookie_alloc (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_free (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_fire (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_fire_cap (ELAN_CAPABILITY *cap, ELAN4_INTCOOKIE cookie); -+extern int intcookie_wait (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+extern int intcookie_arm (INTCOOKIE_TABLE *tbl, ELAN4_INTCOOKIE cookie); -+ -+#endif /* __KERNEL */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_INTCOOKIE_H */ -Index: linux-2.6.5-7.191/include/elan4/ioctl.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/ioctl.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/ioctl.h 2005-07-28 14:52:52.962661408 -0400 -@@ -0,0 +1,320 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_IOCTL_H -+#define __ELAN4_IOCTL_H -+ -+#ident "@(#)$Id: ioctl.h,v 1.27.6.2 2005/01/11 12:15:39 duncant Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/ioctl.h,v $*/ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#define ELAN4IO_CONTROL_PATHNAME "/dev/elan4/control%d" -+#define ELAN4IO_USER_PATHNAME "/dev/elan4/user%d" -+#define ELAN4IO_SDRAM_PATHNAME "/dev/elan4/sdram%d" -+#define ELAN4IO_MAX_PATHNAMELEN 32 -+ -+/* -+ * NOTE - ioctl values 0->0x1f are defined for -+ * generic/control usage. -+ */ -+ -+/* Macro to generate 'offset' to mmap "control" device */ -+#define OFF_TO_BAR(off) (((off) >> 28) & 0xF) -+#define OFF_TO_OFFSET(off) ((off) & 0x0FFFFFFF) -+#define GEN_OFF(bar,off) (((bar) << 28) | ((off) & 0x0FFFFFFF)) -+ -+/* Definiations for generic ioctls */ -+#define ELAN4IO_GENERIC_BASE 0x00 -+ -+typedef struct elan4io_stats_struct -+{ -+ int which; -+ unsigned long long ptr; /* always pass pointer as 64 bit */ -+} ELAN4IO_STATS_STRUCT; -+ -+#define ELAN4IO_STATS _IOR ('e', ELAN4IO_GENERIC_BASE + 0, ELAN4IO_STATS_STRUCT) -+#define ELAN4IO_DEVINFO _IOR ('e', ELAN4IO_GENERIC_BASE + 1, ELAN_DEVINFO) -+#define ELAN4IO_POSITION _IOR ('e', ELAN4IO_GENERIC_BASE + 2, ELAN_POSITION) -+ -+ -+/* -+ * Definitions for /dev/elan4/controlX -+ */ -+#define ELAN4IO_CONTROL_BASE 0x20 -+ -+#define ELAN4IO_GET_POSITION _IOR ('e', ELAN4IO_CONTROL_BASE + 0, ELAN_POSITION) -+#define ELAN4IO_SET_POSITION _IOW ('e', ELAN4IO_CONTROL_BASE + 1, ELAN_POSITION) -+#define ELAN4IO_DEBUG_SNAPSHOT _IOW ('e', ELAN4IO_CONTROL_BASE + 2, ) -+ -+typedef struct elan4io_params_mask_struct -+{ -+ unsigned short p_mask; -+ ELAN_PARAMS p_params; -+} ELAN4IO_PARAMS_STRUCT; -+#define ELAN4IO_GET_PARAMS _IOR ('e', ELAN4IO_CONTROL_BASE + 3, ELAN4IO_PARAMS_STRUCT) -+#define ELAN4IO_SET_PARAMS _IOW ('e', ELAN4IO_CONTROL_BASE + 4, ELAN4IO_PARAMS_STRUCT) -+ -+/* old versions - implicit p_mask == 3 */ -+#define ELAN4IO_OLD_GET_PARAMS _IOR ('e', ELAN4IO_CONTROL_BASE + 3, ELAN_PARAMS) -+#define ELAN4IO_OLD_SET_PARAMS _IOW ('e', ELAN4IO_CONTROL_BASE + 4, ELAN_PARAMS) -+ -+/* -+ * Definitions for /dev/elan4/userX -+ */ -+#define ELAN4IO_USER_BASE 0x40 -+ -+#define ELAN4IO_FREE _IO ('e', ELAN4IO_USER_BASE + 0) -+#define ELAN4IO_ATTACH _IOWR ('e', ELAN4IO_USER_BASE + 1, ELAN_CAPABILITY) -+#define ELAN4IO_DETACH _IOWR ('e', ELAN4IO_USER_BASE + 2, ELAN_CAPABILITY) -+#define ELAN4IO_BLOCK_INPUTTER _IO ('e', ELAN4IO_USER_BASE + 3) -+ -+typedef struct elan4io_add_p2pvp_struct -+{ -+ unsigned vp_process; -+ ELAN_CAPABILITY vp_capability; -+} ELAN4IO_ADD_P2PVP_STRUCT; -+ -+#define ELAN4IO_ADD_P2PVP _IOW ('e', ELAN4IO_USER_BASE + 4, ELAN4IO_ADD_P2PVP_STRUCT) -+ -+typedef struct elan4io_add_bcastvp_struct -+{ -+ unsigned int vp_process; -+ unsigned int vp_lowvp; -+ unsigned int vp_highvp; -+} ELAN4IO_ADD_BCASTVP_STRUCT; -+ -+#define ELAN4IO_ADD_BCASTVP _IOW ('e', ELAN4IO_USER_BASE + 5, ELAN4IO_ADD_BCASTVP_STRUCT) -+ -+#define ELAN4IO_REMOVEVP _IO ('e', ELAN4IO_USER_BASE + 6) -+ -+typedef struct elan4io_route_struct -+{ -+ unsigned int rt_process; -+ unsigned int rt_error; -+ E4_VirtualProcessEntry rt_route; -+} ELAN4IO_ROUTE_STRUCT; -+ -+#define ELAN4IO_SET_ROUTE _IOW ('e', ELAN4IO_USER_BASE + 7, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_RESET_ROUTE _IOW ('e', ELAN4IO_USER_BASE + 9, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_GET_ROUTE _IOWR ('e', ELAN4IO_USER_BASE + 8, ELAN4IO_ROUTE_STRUCT) -+#define ELAN4IO_CHECK_ROUTE _IOWR ('e', ELAN4IO_USER_BASE + 10, ELAN4IO_ROUTE_STRUCT) -+ -+typedef struct elan4io_alloc_cq_struct -+{ -+ unsigned int cq_size; /* input: size of queue */ -+ unsigned int cq_perm; /* input: requested permissions */ -+ unsigned int cq_type; /* input: queue type */ -+ unsigned int cq_indx; /* output: queue number */ -+} ELAN4IO_ALLOCCQ_STRUCT; -+ -+#define ELAN4IO_ALLOCCQ _IOWR ('e', ELAN4IO_USER_BASE + 11, ELAN4IO_ALLOCCQ_STRUCT) -+#define ELAN4IO_FREECQ _IOWR ('e', ELAN4IO_USER_BASE + 12, unsigned) -+ -+#define ELAN4IO_CQ_TYPE_REORDER 1 /* revb reordering command queue */ -+ -+typedef struct elan4io_perm_struct -+{ -+ E4_Addr ps_eaddr; -+ E4_uint64 ps_len; -+ unsigned long ps_maddr; -+ unsigned int ps_perm; -+} ELAN4IO_PERM_STRUCT; -+ -+typedef struct elan4io_perm_struct32 -+{ -+ E4_Addr ps_eaddr; -+ E4_uint64 ps_len; -+ unsigned int ps_maddr; -+ unsigned int ps_perm; -+} ELAN4IO_PERM_STRUCT32; -+ -+#define ELAN4IO_SETPERM _IOWR ('e', ELAN4IO_USER_BASE + 13, ELAN4IO_PERM_STRUCT) -+#define ELAN4IO_SETPERM32 _IOWR ('e', ELAN4IO_USER_BASE + 13, ELAN4IO_PERM_STRUCT32) -+#define ELAN4IO_CLRPERM _IOWR ('e', ELAN4IO_USER_BASE + 14, ELAN4IO_PERM_STRUCT) -+#define ELAN4IO_CLRPERM32 _IOWR ('e', ELAN4IO_USER_BASE + 14, ELAN4IO_PERM_STRUCT32) -+ -+typedef struct elan4io_trapsig_struct -+{ -+ int ts_signo; -+} ELAN4IO_TRAPSIG_STRUCT; -+#define ELAN4IO_TRAPSIG _IOW ('e', ELAN4IO_USER_BASE + 15, ELAN4IO_TRAPSIG_STRUCT) -+ -+typedef struct elan4io_traphandler_struct -+{ -+ unsigned int th_nticks; /* number of ticks to sleep for next trap */ -+ unsigned int th_proc; /* elan processor involved */ -+ unsigned long th_trapp; /* space to store trap */ -+} ELAN4IO_TRAPHANDLER_STRUCT; -+ -+typedef struct elan4io_traphandler_struct32 -+{ -+ unsigned int th_nticks; /* number of ticks to sleep for next trap */ -+ unsigned int th_proc; /* elan processor involved */ -+ unsigned int th_trapp; /* space to store trap */ -+} ELAN4IO_TRAPHANDLER_STRUCT32; -+ -+#define ELAN4IO_TRAPHANDLER _IOW ('e', ELAN4IO_USER_BASE + 16, ELAN4IO_TRAPHANDLER_STRUCT) -+#define ELAN4IO_TRAPHANDLER32 _IOW ('e', ELAN4IO_USER_BASE + 16, ELAN4IO_TRAPHANDLER_STRUCT32) -+ -+typedef struct elan4io_required_mappings_struct -+{ -+ E4_Addr rm_upage_addr; /* elan address of user page */ -+ E4_Addr rm_trestart_addr; /* elan address of tproc restart trampoline */ -+} ELAN4IO_REQUIRED_MAPPINGS_STRUCT; -+#define ELAN4IO_REQUIRED_MAPPINGS _IOW ('e', ELAN4IO_USER_BASE + 17, ELAN4IO_REQUIRED_MAPPINGS_STRUCT) -+ -+typedef struct elan4io_resume_eproc_trap_struct -+{ -+ E4_Addr rs_addr; -+} ELAN4IO_RESUME_EPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_EPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 18, ELAN4IO_RESUME_EPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_cproc_trap_struct -+{ -+ unsigned int rs_indx; -+} ELAN4IO_RESUME_CPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_CPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 19, ELAN4IO_RESUME_CPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_dproc_trap_struct -+{ -+ E4_DMA rs_desc; -+} ELAN4IO_RESUME_DPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_DPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 20, ELAN4IO_RESUME_DPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_tproc_trap_struct -+{ -+ E4_ThreadRegs rs_regs; -+} ELAN4IO_RESUME_TPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_TPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 21, ELAN4IO_RESUME_TPROC_TRAP_STRUCT) -+ -+typedef struct elan4io_resume_iproc_trap_struct -+{ -+ unsigned int rs_channel; -+ unsigned int rs_trans; -+ E4_IprocTrapHeader rs_header; -+ E4_IprocTrapData rs_data; -+} ELAN4IO_RESUME_IPROC_TRAP_STRUCT; -+#define ELAN4IO_RESUME_IPROC_TRAP _IOW ('e', ELAN4IO_USER_BASE + 22, ELAN4IO_RESUME_IPROC_TRAP_STRUCT) -+ -+#define ELAN4IO_FLUSH_ICACHE _IO ('e', ELAN4IO_USER_BASE + 23) -+#define ELAN4IO_STOP_CTXT _IO ('e', ELAN4IO_USER_BASE + 24) -+ -+#define ELAN4IO_ALLOC_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 25, ELAN4_INTCOOKIE) -+#define ELAN4IO_FREE_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 26, ELAN4_INTCOOKIE) -+#define ELAN4IO_ARM_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 27, ELAN4_INTCOOKIE) -+#define ELAN4IO_WAIT_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 28, ELAN4_INTCOOKIE) -+ -+typedef struct elan4io_alloc_trap_queues_struct -+{ -+ unsigned int tq_ndproc_traps; -+ unsigned int tq_neproc_traps; -+ unsigned int tq_ntproc_traps; -+ unsigned int tq_nthreads; -+ unsigned int tq_ndmas; -+} ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT; -+#define ELAN4IO_ALLOC_TRAP_QUEUES _IOW ('e', ELAN4IO_USER_BASE + 29, ELAN4IO_ALLOC_TRAP_QUEUES_STRUCT) -+ -+typedef struct elan4io_neterr_msg_struct -+{ -+ unsigned int nm_vp; -+ unsigned int nm_nctx; -+ unsigned int nm_retries; -+ unsigned int nm_pad; -+ ELAN4_NETERR_MSG nm_msg; -+} ELAN4IO_NETERR_MSG_STRUCT; -+#define ELAN4IO_NETERR_MSG _IOW ('e', ELAN4IO_USER_BASE + 30, ELAN4IO_NETERR_MSG_STRUCT) -+ -+typedef struct elan4io_neterr_timer_struct -+{ -+ unsigned int nt_usecs; -+} ELAN4IO_NETERR_TIMER_STUCT; -+ -+#define ELAN4IO_NETERR_TIMER _IO ('e', ELAN4IO_USER_BASE + 31) -+ -+typedef struct elan4io_neterr_fixup_struct -+{ -+ E4_uint64 nf_cookie; -+ unsigned int nf_waitforeop; -+ unsigned int nf_sten; -+ unsigned int nf_vp; -+ unsigned int nf_pad; -+} ELAN4IO_NETERR_FIXUP_STRUCT; -+ -+#define ELAN4IO_NETERR_FIXUP _IOW ('e', ELAN4IO_USER_BASE + 32, ELAN4IO_NETERR_FIXUP_STRUCT) -+ -+typedef struct elan4io_firecap_struct -+{ -+ ELAN_CAPABILITY fc_capability; -+ ELAN4_INTCOOKIE fc_cookie; -+} ELAN4IO_FIRECAP_STRUCT; -+ -+#define ELAN4IO_FIRE_INTCOOKIE _IOW ('e', ELAN4IO_USER_BASE + 33, ELAN4IO_FIRECAP_STRUCT) -+ -+#define ELAN4IO_ALLOC_INTCOOKIE_TABLE _IOW ('e', ELAN4IO_USER_BASE + 34, ELAN_CAPABILITY) -+#define ELAN4IO_FREE_INTCOOKIE_TABLE _IO ('e', ELAN4IO_USER_BASE + 35) -+ -+typedef struct elan4io_translation -+{ -+ E4_Addr tr_addr; -+ unsigned long tr_len; -+ unsigned int tr_access; -+} ELAN4IO_TRANSLATION_STRUCT; -+ -+#define ELAN4IO_LOAD_TRANSLATION _IOW ('e', ELAN4IO_USER_BASE + 36, ELAN4IO_TRANSLATION_STRUCT) -+#define ELAN4IO_UNLOAD_TRANSLATION _IOW ('e', ELAN4IO_USER_BASE + 37, ELAN4IO_TRANSLATION_STRUCT) -+ -+typedef struct elan4io_dumpcq_struct32 -+{ -+ E4_uint64 cq_space; /* output: sdram addr of q, used to decode ptrs */ -+ E4_uint32 cq_size; /* output: The real size of the command queue */ -+ E4_uint32 bufsize; /* input: The size of the buffer to dump to */ -+ E4_uint32 cq_indx; /* input: index of cq to dump */ -+ unsigned int buffer; /* input: user address of rgs->buffer to dump to */ -+} ELAN4IO_DUMPCQ_STRUCT32; -+ -+typedef struct elan4io_dumpcq_struct -+{ -+ E4_uint64 cq_space; /* output: sdram addr of q, used to decode ptrs */ -+ E4_uint32 cq_size; /* output: The real size of the command queue */ -+ E4_uint32 bufsize; /* input: The size of the buffer to dump to */ -+ E4_uint32 cq_indx; /* input: index of cq to dump */ -+ unsigned long buffer; /* input: user address of rgs->buffer to dump to */ -+} ELAN4IO_DUMPCQ_STRUCT; -+ -+#define ELAN4IO_DUMPCQ _IOWR ('e', ELAN4IO_USER_BASE + 38, ELAN4IO_DUMPCQ_STRUCT) -+#define ELAN4IO_DUMPCQ32 _IOWR ('e', ELAN4IO_USER_BASE + 38, ELAN4IO_DUMPCQ_STRUCT32) -+ -+/* mmap offsets - - we define the file offset space as follows: -+ * -+ * page 0 - 4095 - command queues -+ * page 4096 - device user registers -+ * page 4097 - flag page/user stats -+ * page 4098 - device stats -+ * page 4099 - tproc trampoline -+ */ -+ -+#define ELAN4_OFF_COMMAND_QUEUES 0 -+#define ELAN4_OFF_USER_REGS 4096 -+#define ELAN4_OFF_USER_PAGE 4097 -+#define ELAN4_OFF_DEVICE_STATS 4098 -+#define ELAN4_OFF_TPROC_TRAMPOLINE 4099 -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_IOCTL_H */ -Index: linux-2.6.5-7.191/include/elan4/mmu.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/mmu.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/mmu.h 2005-07-28 14:52:52.963661256 -0400 -@@ -0,0 +1,94 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: mmu.h,v 1.11 2004/04/21 12:04:24 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/mmu.h,v $*/ -+ -+ -+#ifndef __ELAN4_MMU_H -+#define __ELAN4_MMU_H -+ -+typedef struct elan4_hash_entry -+{ -+ struct elan4_hash_entry *he_next; -+ struct elan4_hash_entry *he_prev; -+ -+ sdramaddr_t he_entry; -+ -+ struct elan4_hash_entry *he_chain[2]; -+ E4_uint64 he_tag[2]; -+ E4_uint32 he_pte[2]; -+} ELAN4_HASH_ENTRY; -+ -+#define ELAN4_HENT_CHUNKS 16 /* SDRAM_MIN_BLOCK_SIZE/sizeof (E4_HashTableEntry) */ -+ -+typedef struct elan4_hash_chunk -+{ -+ struct list_head hc_link; -+ ELAN4_HASH_ENTRY hc_hents[ELAN4_HENT_CHUNKS]; -+} ELAN4_HASH_CHUNK; -+ -+typedef struct elan4_hash_cache -+{ -+ E4_Addr hc_start; -+ E4_Addr hc_end; -+ int hc_tbl; -+ -+ ELAN4_HASH_ENTRY *hc_hes[1]; -+} ELAN4_HASH_CACHE; -+ -+/* -+ * he_pte is really 4 bytes of pte "type" one for each pte -+ * entry - however we declare it as an "int" so we can -+ * easily determine that all 4 entries are invalid -+ */ -+#define HE_SET_PTE(he,tagidx,pteidx,val) (((E4_uint8 *) &(he->he_pte[tagidx]))[pteidx] = (val)) -+#define HE_GET_PTE(he,tagidx,pteidx) (((E4_uint8 *) &(he->he_pte[tagidx]))[pteidx]) -+ -+/* -+ * he_tag has the following form : -+ * [63:27] tag -+ * [20:17] pte valid -+ * [16] locked -+ * [15] copy -+ * [14] valid -+ * [13:0] context -+ */ -+ -+#define HE_TAG_VALID (1 << 14) -+#define HE_TAG_COPY (1 << 15) -+#define HE_TAG_LOCKED (1 << 16) -+ -+#define INVALID_CONTEXT 0 -+ -+extern u_char elan4_permtable[]; -+#define ELAN4_INCOMPAT_ACCESS(perm,access) ((elan4_permtable[(perm)] & (1 << (access))) == 0) -+extern u_char elan4_permreadonly[]; -+#define ELAN4_PERM_READONLY(perm) (elan4_permreadonly[(perm)]) -+ -+/* return code from elan4mmu_categorise_paddr */ -+#define ELAN4MMU_PADDR_SDRAM 0 -+#define ELAN4MMU_PADDR_COMMAND 1 -+#define ELAN4MMU_PADDR_LOCALPCI 2 -+#define ELAN4MMU_PADDR_PAGE 3 -+#define ELAN4MMU_PADDR_OTHER 4 -+ -+extern int elan4_debug_mmu; -+ -+#ifdef DEBUG_PRINTF -+# define MPRINTF(ctxt,lvl,args...) (elan4_debug_mmu > (lvl) ? elan4_debugf(ctxt,DBG_MMU, ##args) : (void)0) -+#else -+# define MPRINTF(ctxt,lvl,args...) ((void) 0) -+#endif -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_MMU_H */ -Index: linux-2.6.5-7.191/include/elan4/neterr.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/neterr.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/neterr.h 2005-07-28 14:52:52.963661256 -0400 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_NETERR_H -+#define __ELAN4_NETERR_H -+ -+#ident "@(#)$Id: neterr.h,v 1.1 2004/01/19 14:38:34 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/elan4mod/neterr.h,v $*/ -+ -+typedef struct elan4_neterr_msg -+{ -+ E4_uint8 msg_type; -+ E4_uint8 msg_waitforeop; -+ E4_uint16 msg_context; /* network context # message sent to */ -+ E4_int16 msg_found; /* # cookie found (response) */ -+ -+ ELAN_LOCATION msg_sender; /* nodeid/context # message sent from */ -+ E4_uint32 msg_pad; -+ -+ E4_uint64 msg_cookies[6]; /* 64 bit cookies from identify packets */ -+} ELAN4_NETERR_MSG; -+ -+#define ELAN4_NETERR_MSG_SIZE sizeof (ELAN4_NETERR_MSG) -+#define ELAN4_NETERR_MSG_REQUEST 1 -+#define ELAN4_NETERR_MSG_RESPONSE 2 -+ -+#define ELAN4_NETERR_MAX_COOKIES (sizeof (((ELAN4_NETERR_MSG *) 0)->msg_cookies) / \ -+ sizeof (((ELAN4_NETERR_MSG *) 0)->msg_cookies[0])) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_NETERR_H */ -Index: linux-2.6.5-7.191/include/elan4/pci.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/pci.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/pci.h 2005-07-28 14:52:52.964661104 -0400 -@@ -0,0 +1,227 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_PCI_H -+#define __ELAN4_PCI_H -+ -+#ident "$Id: pci.h,v 1.32 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/pci.h,v $*/ -+ -+/* Elan has 2 64 bit bars */ -+#define ELAN4_BAR_SDRAM 0 -+#define ELAN4_BAR_REGISTERS 2 -+ -+#define PCI_VENDOR_ID_QUADRICS 0x14fc -+#define PCI_DEVICE_ID_ELAN3 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN3_REVB 0x0001 -+#define PCI_DEVICE_ID_ELAN4 0x0001 -+#define PCI_REVISION_ID_ELAN4_REVA 0x0000 -+#define PCI_REVISION_ID_ELAN4_REVB 0x0001 -+ -+/* support standard pseudo bars */ -+#define ELAN4_PSEUDO_BAR_ROM 8 -+ -+/* Elan PCI control -+ configuration space register. ElanControlRegister */ -+#define PCI_ELAN_PARITY_ADDR_LO 0x40 -+#define PCI_ELAN_PARITY_ADDR_HI 0x44 -+#define PCI_ELAN_PARITY_TYPE 0x48 -+#define PCI_ELAN_CONTROL 0x4c -+#define PCI_ELAN_PLL_CONTROL 0x50 -+#define PCI_ELAN_SPLIT_MESSAGE_ATTR 0x54 -+#define PCI_ELAN_SPLIT_MESSAGE_VALUE 0x54 -+#define PCI_ELAN_RAMBIST_FAILED 0x54 -+#define PCI_ELAN_TOPPHYSADDR(i) (0x58 + ((i)<<1)) -+ -+/* -+ * [31] PciM66EN This is set it the bus is running in PCI2.3 - 66MHz mode. -+ * [30:28] InitPattern This gives the PCI-X startup mode. See "Pci intialisation patterns" below. -+ * [27] notBusIs64Bits If set the bus is running 32 bits wide. If Clear it is a 64 bit bus. -+ * [26:24] RamBistCntl Used to control the Elan4 RAM BIST. Not acitive it zero. -+ * [23] RamBistFinished Only used when performing the RAM BIST test. -+ * [22] SelectSplitMessAttr See ECTRL_SELECT_SPLIT_MESS_ATTR below. -+ * [21] ReceivedSplitCompError See ECTRL_REC_SPLIT_COMP_MESSAGE below -+ * [20:16] WriteHighPriTime Used with ReadHighPriTime to control the ratio of PCI master write to PCI master -+ * read bandwidth under heavy load. The high the value of WriteHighPriTime the longer -+ * the PCI write bursts will be allowed without interruption from a read transfer. -+ * [15] DisableCouplingTest This is only used as part of the RAM BIST test. It effects the testing of the main -+ * cache tag RAMS. -+ * [14:13] Not used Will read as zero. -+ * [12:8] ReadHighPriTime Used with WriteHighPriTime to control the ratio of PCI master write to PCI master -+ * read bandwidth under heavy load. The high the value of ReadHighPriTime the longer -+ * the PCI read bursts will be allowed without interruption from a write transfer. -+ * [7] EnableLatencyCountReset This bit effect the behaviour of disconnects due to the removal of GNT# after the latency -+ * counter has expired. If set it will allow the latency counter to be reset each time the -+ * GNT# is reasserted. If asserted it should provided improved bandwidth on the PCI bus -+ * without increasing the maximum latency another device would have for access to the bus. -+ * It will increase the average latency of other devices. -+ * [6] ExtraMasterAddrBits This bit used to control the physical PCI addresses generated by the MMU. -+ * [5] ReducedPciDecode If set the PCI local memory BAR will decode 256Mbytes of PCI address space. If clear it -+ * will decode 2Gbyte of PCI address space. -+ * [4] ConfigInEBusRom If set the constant values of the Elan4 PCI configuration space will be taken from the -+ * EEPROM. If clear the internal values will be used. -+ * [3] EnableRd2_2Bursts This bit only effects the behaviour of burst reads when the PCI bus is operating in -+ * PCI-2.2 mode. It allows adjacent reads to be merged into longer bursts for higher -+ * performance. -+ * [2] SoftIntReset If set this bit will cause the Elan4 to reset itself with the exception of the PCI -+ * configuation space. All internal state machines will be put into the reset state. -+ * [1] EnableWrBursts This bit allows much longer PCI-X write bursts. If set it will stop the Elan4 from -+ * being completely PCI-X compliant as the Elan4 may request a long PCI-X write burst that -+ * it does not complete. However it should significantly increase the maximum PCI-X write -+ * bandwidth and is unlikely to cause problems with many PCI-X bridge chips. -+ * [0] InvertMSIPriority This bit effect the way MSI interrupts are generated. It provides flexiblity to generate -+ * the MSI interrupts in a different way to allow for different implimentations of MSI -+ * logic and still give the correct priority of Elan4 interrupts. -+ * -+ * {PciM66EN, InitPattern, notBusIs64Bits, RamBistCntl, RamBistFinished, -+ * SelectSplitMessAttr, ReceivedSplitCompError, WriteHighPriTime, -+ * DisableCouplingTest, 2'h0, ReadHighPriTime, -+ * EnableLatencyCountReset, ExtraMasterAddrBits, ReducedPciDecode, ConfigInEBusRom, -+ * EnableRd2_2Bursts, SoftIntReset, EnableWrBursts, InvertMSIPriority} -+ */ -+ -+#define ECTRL_INVERT_MSI_PRIO (1 << 0) -+#define ECTRL_ENABLE_WRITEBURSTS (1 << 1) -+#define ECTRL_SOFTWARE_INTERNAL_RESET (1 << 2) -+#define ECTRL_ENABLE_2_2READBURSTS (1 << 3) -+#define ECTRL_CONFIG_IN_EBUS_ROM (1 << 4) -+#define ECTRL_28_NOT_30_BIT_LOCAL_BAR (1 << 5) -+#define ECTRL_ExtraMasterAddrBits (1 << 6) -+#define ECTRL_ENABLE_LATENCY_RESET (1 << 7) -+#define ECTRL_DISABLE_COUPLING_TEST (1 << 15) -+ -+/* -+ * Ratio of the following two registers set the relative bandwidth given to intputer data -+ * versus other PCI pci traffic when scheduling new PCI master accesses. -+ */ -+#define ECTRL_OTHER_HIGH_PRI_TIME_SHIFT (8) /* Sets top 4 bits of 8 bit counter */ -+#define ECTRL_OTHER_HIGH_PRI_TIME_MASK (0x1f) -+ -+ -+#define ECTRL_IPROC_HIGH_PRI_TIME_SHIFT (16) /* Sets top 4 bits of 8 bit counter */ -+#define ECTRL_IPROC_HIGH_PRI_TIME_MASK (0x1f) -+ -+/* -+ * This is set if a split completion message is received. -+ * This will cause a PCI error interrupt. -+ * This error is cleared by writting a 1 to this bit. -+ */ -+#define ECTRL_REC_SPLIT_COMP_MESSAGE (1 << 21) -+/* -+ * This bit is used to select reading of either the Split message attribute value when -+ * set or the split completion message data value from 0x54 in the config space -+ * if the ECTRL_REC_SPLIT_COMP_MESSAGE bit is set. 0x54 returns the the BistFailed flags -+ * if any of the BIST control bits are set (bits 26 to 24) -+ */ -+#define ECTRL_SELECT_SPLIT_MESS_ATTR (1 << 22) -+ -+// Internal RAM bist control bits. -+// Three bits of state control the RAM BIST (Built in self test). -+// -+// These bits must not be set unless the ECTRL_SOFTWARE_INTERNAL_RESET bit has also been set! -+// -+// For a normal fast ram test assert ECTRL_BIST_FAST_TEST. -+// For a data retention test first write ECTRL_START_RETENTION_TEST then wait the retention period of -+// at least 1ms and preferably much longer then write ECTRL_CONTINUE_RETENTION_TEST then wait -+// again and finallly write ECTRL_FINISH_RETENTION_TEST. -+// -+// The read only bit ECTRL_BIST_FINISHED_TEST can be polled to check that the test has compleated. -+#define ECTRL_BIST_CTRL_SHIFT (24) -+#define ECTRL_BIST_CTRL_MASK (7 << 24) -+ -+#define ECTRL_BIST_FAST_TEST ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) // old scheme -+#define ECTRL_START_RETENTION_TEST ((1 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_CONTINUE_RETENTION_TEST ((3 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_FINISH_RETENTION_TEST ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+ -+#define ECTRL_BIST_KICK_OFF ((1 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) // new scheme -+#define ECTRL_BIST_MOVE_ON_ODD ((3 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_BIST_MOVE_ON_EVEN ((5 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+#define ECTRL_BIST_SCREAM_THROUGH ((7 << 24) | ECTRL_SOFTWARE_INTERNAL_RESET) -+ -+#define ECTRL_CLEAR_BIST_TEST (0 << 24) -+#define ECTRL_BIST_FINISHED_TEST (1 << 23) -+ -+// Read only current PCI bus type. -+#define ECTRL_RUNNING_32BIT_MODE (1 << 27) -+#define ECTRL_INITIALISATION_MODE (7 << 28) -+#define ECTRL_RUNNING_M66EN_MODE (1 << 31) -+ -+#define ECTRL_INIT_PATTERN_SHIFT (28) -+#define ECTRL_INIT_PATTERN_MASK (0x7) -+ -+// Pci intialisation patterns -+#define Pci2_2 (0 << 28) -+#define PciX50To66MHz (1 << 28) -+#define PciX66to100MHz (2 << 28) -+#define PciX100to133MHz (3 << 28) -+#define PciXReserved1 (4 << 28) -+#define PciXReserved2 (5 << 28) -+#define PciXReserved3 (6 << 28) -+#define PciXReserved4 (7 << 28) -+ -+/* Elan PCI pll and pad control configuration space register. ElanPllControlReg */ -+// This overrides the default PCI pll control settings. -+#define PciPll_FeedForwardISel0 (1 << 0) // Lsi name Z0 -+#define PciPll_FeedForwardISel1 (1 << 1) // Lsi name Z1 -+#define PciPll_ChargePumpISel0 (1 << 2) // Lsi name P0 -+#define PciPll_ChargePumpISel1 (1 << 3) // Lsi name P1 -+#define PciPll_EnableAutoReset (1 << 4) // Lsi name ENARST -+#define PciPll_RSEL200500 (1 << 5) // Lsi name Range Select, 0: 100 - 250MHz, 1: 200 - 500MHz -+#define PciPll_DivideFeedback (1 << 6) // Just used for test - This divides the shortcut feedback to the PCI PLL so that it can lock to the tester clock. -+#define PciPll_CutFeedback (1 << 7) // Just used for test - This disables the shortcut feedback. -+ -+// This overrides the default PCI BZ controler settings. -+#define PciBZ_UPDI (0xf << 8) -+#define PciBZ_WAIT_INT (0xf << 12) -+ -+// This overrides the default Sys and SDRam pll control settings. -+#define SysPll_FeedForwardISel0 (1 << 16) // Lsi name P0 -+#define SysPll_FeedForwardISel1 (1 << 17) // Lsi name P1 -+#define SysPll_ChargePumpISel0 (1 << 18) // Lsi name Z0 -+#define SysPll_ChargePumpISel1 (1 << 19) // Lsi name Z1 -+#define SysPll_EnableAutoReset (1 << 20) // Lsi name ENARST -+#define SysPll_DivPhaseCompInBy2 (1 << 21) // Lsi name NODIV (Should be DIV) -+#define SysPll_PllTestClkSel (1 << 22) // If asserted the master clock source is not taken from the pll. -+ -+#define Pll_ForceEBusADTristate (1 << 23) // Required to enable the testing of EnableAutoReset. Enables use of EBusAD[7] (rev A) -+#define Pll_LinkErrDirectToSDA (1 << 23) // Access to link error flag for triggering (rev B) -+ -+ -+#define ECTRL_SYS_CLOCK_RATIO_SHIFT (24) -+// Config: with 800MHz Speeds are 266 200 160 133. -+// 0 = 133/133 (1:1) 6:6 1 -+// 1 = 160/133 (6:5) 5:6 1.2 -+// 2 = 200/133 (3:2) 4:6 1.5 -+// 3 = 266/133 (2:1) 3:6 2 -+// 4 = 200/200 (1:1) 4:4 1 -+// 5 = 266/200 (4:3) 3:4 1.33 -+ -+// Config: with 600MHz Speeds are 200 150 120 100 -+// 0 = 100/100 (1:1) 6:6 1 -+// 1 = 120/100 (6:5) 5:6 1.2 -+// 2 = 150/100 (3:2) 4:6 1.5 -+// 3 = 200/100 (2:1) 3:6 2 -+// 4 = 150/150 (1:1) 4:4 1 -+// 5 = 200/150 (4:3) 3:4 1.33 -+ -+#define ECTRL_SYS_CLOCK_RATIO_SHIFT (24) -+#define ECTRL_SYS_CLOCK_RATIO_1_1Slow (0 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_6_5 (1 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_3_2 (2 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_2_1 (3 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_1_1Fast (4 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_4_3 (5 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_MAX_NORMAL (6) /* used to generate a valid random value */ -+#define GET_RANDOM_CLOCK_RATIO (Random(ECTRL_SYS_CLOCK_MAX_NORMAL) << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_PLL_TEST (6 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_TEST (7 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+#define ECTRL_SYS_CLOCK_RATIO_MASK (7 << ECTRL_SYS_CLOCK_RATIO_SHIFT) -+ -+#endif /* __ELAN4_PCI_H */ -Index: linux-2.6.5-7.191/include/elan4/registers.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/registers.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/registers.h 2005-07-28 14:52:52.967660648 -0400 -@@ -0,0 +1,1587 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_REGISTERS_H -+#define _ELAN4_REGISTERS_H -+ -+#ident "$Id: registers.h,v 1.117.2.3 2005/03/03 16:29:57 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/registers.h,v $*/ -+ -+/* -+ * Header file for internal slave mapping of the ELAN4 registers -+ */ -+ -+#define E4_CACHELINE_SIZE (64) -+#define E4_STACK_ALIGN (64) -+ -+#ifndef _ASM -+ -+#include -+#include -+#include -+ -+typedef volatile struct _E4_CacheSets -+{ -+ E4_uint64 Set0[1024]; /* 8k bytes per set */ -+ E4_uint64 Set1[1024]; /* 8k bytes per set */ -+ E4_uint64 Set2[1024]; /* 8k bytes per set */ -+ E4_uint64 Set3[1024]; /* 8k bytes per set */ -+} E4_CacheSets; -+ -+typedef union e4_cache_tag -+{ -+ struct { -+ E4_uint32 pad0; /* Undefined value when read */ -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 :10; /* 0-9 - reserved */ -+ E4_uint32 LineError:1; /* 10 - line error */ -+ E4_uint32 Modified:1; /* 11 - modified */ -+ E4_uint32 FillPending:1; /* 12 - fill pending */ -+ E4_uint32 AddrTag30to13:18; /* 30-13 - tag */ -+ E4_uint32 :1; /* 31 - */ -+#else -+ E4_uint32 :1; /* 31 - */ -+ E4_uint32 AddrTag30to13:18; /* 30-13 - tag */ -+ E4_uint32 FillPending:1; /* 12 - fill pending */ -+ E4_uint32 Modified:1; /* 11 - modified */ -+ E4_uint32 LineError:1; /* 10 - line error */ -+ E4_uint32 :10; /* 0-9 - reserved */ -+#endif -+ } s; -+ E4_uint64 Value; -+} E4_CacheTag; -+ -+typedef volatile struct _E4_CacheTags -+{ -+ E4_CacheTag Tags[4][128]; /* 8k bytes per set, 64 byte cache line */ -+} E4_CacheTags; -+ -+#define E4_NumCacheSets 4 -+#define E4_NumCacheLines 128 -+#define E4_CacheLineSize 64 -+#define E4_CacheSize (E4_NumCacheSets * E4_NumCacheLines * E4_CacheLineSize) -+#define E4_CacheSetSize (E4_NumCacheLines * E4_CacheLineSize) -+ -+/* -+ * Run Queue pointers -+ * -+ * [62:35] FrontPointer[30:3] -+ * [33:32] Size Value -+ * [30:3] BackPointer[30:3] -+ */ -+#define E4_QueuePtrMask (0x7ffffff8ULL) -+#define E4_QueueSizeMask 3 -+#define E4_QueueEntrySize sizeof (E4_uint64) -+ -+#define E4_Queue8KBytes 0 -+#define E4_Queue64KBytes 1 -+#define E4_Queue512KBytes 2 -+#define E4_Queue4MBytes 3 -+ -+#define E4_QueueFrontValue(val,size) ((val) | (size)) -+#define E4_QueueValue(queue,size) (((E4_uint64) E4_QueueFrontValue(queue,size)) << 32 | ((E4_uint64) (queue))) -+ -+#define E4_QueueFrontPointer(val) /* extract queue front pointer from register */\ -+ (((val) >> 32) & E4_QueuePtrMask) -+#define E4_QueueBackPointer(val) /* extract queue back pointer from register */ \ -+ ((val) & E4_QueuePtrMask) -+#define E4_QueueSizeValue(val) /* extract queue size value from register */ \ -+ (((val) >> 32) & E4_QueueSizeMask) -+#define E4_QueueSize(value) /* queue size in bytes from size value */ \ -+ (1 << (((value)*3) + 13)) -+#define E4_QueueOffsetMask(fptr)\ -+ ((8192 << (((fptr) & E4_QueueSizeMask) << 3)) - 1) -+#define E4_QueueOffset(fptr)\ -+ ((fptr) & E4_QueueOffsetMask(fptr)) -+#define E4_QueueFrontPointerInc(fptr) \ -+ ( ((fptr) & ~E4_QueueOffsetMask(fptr)) | ((E4_QueueOffset(fptr) + 8) & E4_QueueOffsetMask(fptr)) ) -+ -+typedef union _E4_QueuePtr -+{ -+ E4_uint64 Value; -+ struct { -+ E4_uint32 Back; -+ E4_uint32 Front; -+ } s; -+} E4_QueuePtr; -+ -+/* -+ * DMA processor status register. -+ * -+ * [48] FirstSendTrans Set for the first packet of a dma. -+ * [47:46] TimeSliceCount Time left to timeslice. -+ * [45] DmaLastPacket Set for the last packet of a dma. -+ * [44] CurrPrefetchDma Dma descriptor the prefetcher is valid for. -+ * [43:39] PrefetcherState Dma prefetcher's state machines value. -+ * [38:33] PacketAssemblyState Packet assembler's state machines value. -+ * [32:31] PrefetcherWakeupFnt Dma prefetcher's wakeup function. -+ * [30:28] PacketAssWakeupFnt Packet assembler's wakeup function. -+ * [27] AckBufferValid Packet ack is valid. -+ * [26] PrefetchedDataProblem Had either a data read fault or data error. Valid if AckBufferValid. -+ * [25] PrefetcherHalting Prefetch data about to stop for halt. Valid if AckBufferValid. -+ * [24] PacketTimeout Packet timeout. Sent an EopError. Valid if AckBufferValid set. -+ * [23:22] PacketAckValue Packet ack type. Valid if AckBufferValid set. -+ * [21:20] FaultUnitNo Set if the dma prefetcher has faulted. -+ * [19:17] TrapType Packet assembler's trap type. -+ * [16] PrefetcherFault Set if the dma prefetcher has faulted for this DMA unit. -+ * [15] Remote The Dma had been issued remotly -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define DPROC_FirstSendTrans(s) ((unsigned)((s) >> 48) & 1) -+#define DPROC_TimeSliceCount(s) ((unsigned)(((s) >> 46) & 3) -+#define DPROC_DmaLastPacket(s) ((unsigned)((s) >> 45) & 1) -+#define DPROC_CurrPrefetchDma(s) ((unsigned)((s) >> 44) & 1) -+#define DPROC_PrefetcerState(s) ((unsigned)((s) >> 39) & 0x1f) -+#define DPROC_PacketAssemblerState(s) ((unsigned)((s) >> 33) & 0x1f) -+#define DPROC_PrefetcherWakeupFn(s) ((unsigned)((s) >> 31) & 3) -+#define DPROC_PacketAssemblerWakeupFn(s)((unsigned)((s) >> 28) & 3) -+#define DPROC_AckBufferValid(s) ((unsigned)((s) >> 27) & 1) -+#define DPROC_PrefetcherDataProblem(s) ((unsigned)((s) >> 26) & 1) -+#define DPROC_PrefetcherHalting(s) ((unsigned)((s) >> 25) & 1) -+#define DPROC_PacketTimeout(s) ((unsigned)((s) >> 24) & 1) -+#define DPROC_PacketAckValue(s) ((unsigned)((s) >> 22) & 3) -+#define DPROC_FaultUnitNo(s) ((unsigned)((s) >> 20) & 3) -+#define DPROC_TrapType(s) ((unsigned)((s) >> 17) & 7) -+#define DPROC_PrefetcherFault(s) ((unsigned)((s) >> 16) & 1) -+#define DPROC_Remote(s) ((unsigned)((s) >> 15) & 1) -+#define DPROC_Priority(s) ((unsigned)((s) >> 14) & 1) -+#define DPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Command processor status register. -+ * -+ * [26:21] CPState procs current state. -+ * [20] WakeupFnt procs wakeup function. -+ * [19:16] TrapValue procs trap value. -+ * [15] Remote Issued remotely. -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define CPROC_TrapType(s) ((unsigned)((s) >> 16) & 0xf) -+#define CPROC_Remote(s) ((unsigned)((s) >> 15) & 0x1) -+#define CPROC_Priority(s) ((unsigned)((s) >> 14) & 0x1) -+#define CPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Event processor status register. -+ * -+ * [34:30] CPState event procs current state. -+ * [29:28] WakeupFnt event procs wakeup function. -+ * [27:20] EventCopySize This is the number of DWords to still be copied on a copy dword event. -+ * [19] EProcPort1Fault CUN_EventProc1 has taken a translation fault. -+ * [18] EProcPort0Fault CUN_EventProc0 has taken a translation fault. -+ * [17:16] TrapValue event proc's trap value. -+ * [15] Remote Issued remotely. -+ * [14] Priority Running at high priority. -+ * [13:0] Context procs current context. -+ */ -+ -+#define EPROC_CPState(s) ((unsigned)((s) >> 30) & 0x1f) -+#define EPROC_WakeupFunction(s) ((unsigned)((s) >> 28) & 3) -+#define EPROC_CopySize(s) ((unsigned)((s) >> 20) & 0xFF) -+#define EPROC_Port1Fault(s) ((unsigned)((s) >> 19) & 1) -+#define EPROC_Port0Fault(s) ((unsigned)((s) >> 18) & 1) -+#define EPROC_TrapType(s) ((unsigned)((s) >> 16) & 3) -+#define EPROC_Remote(s) ((unsigned)((s) >> 15) & 1) -+#define EPROC_Priority(s) ((unsigned)((s) >> 14) & 1) -+#define EPROC_Context(s) ((unsigned)(s) & 0x3fff) -+ -+/* -+ * Thread processor status register. -+ * -+ * [39:24] MemPortBusy 16 bits of port busy flags for all FFU memory ports. -+ * [23:21] Reads as zero -+ * [20:18] TQState State vector for thread queuing proc. -+ * [17] HighRunQueueFull High priority run queue is full -+ * [16] LowRunQueueFull Low priority run queue is full -+ * [15] ReadyHigh More runable threads at high priority -+ * [14] ReadyLow More runable threads at low priority -+ * [13:0] Context procs current context. -+ */ -+#define TPROC_HighRunQueueFull(s) ((unsigned)((s) >> 17) & 1) -+#define TPROC_LowRunQueueFull(s) ((unsigned)((s) >> 16) & 1) -+#define TPROC_ReadyHigh(s) ((unsigned)((s) >> 15) & 1) -+#define TPROC_ReadyLow(s) ((unsigned)((s) >> 14) & 1) -+#define TPROC_Context(s) ((unsigned)((s) & 0x3fff)) -+ -+/* -+ * Input processor status register -+ * -+ * [55] Last Trans (~EOP) -+ * [54] First Trans (~EOP) -+ * [53] Channel (~EOP) -+ * [52] Bad Length (~EOP) -+ * [51:50] Trans CRC Status (~EOP) -+ * [49:48] EOP type -+ * [47] EOP trap -+ * [46] Trapping priority -+ * [45] Trapping Channel -+ * [44:43] Bad ack sent -+ * [42:41] Good ack sent -+ * [40] Queueing Packet (~EOP) -+ * [39:36] Channel trapped bits -+ * [35:32] IProc Trap Value -+ * [31:16] Network Context (~EOP) -+ * [15:0] Transaction Type (~EOP) -+ */ -+#define IPROC_LastTrans(s) ((unsigned)((s) >> 55) & 0x1) -+#define IPROC_FirstTrans(s) ((unsigned)((s) >> 54) & 0x1) -+#define IPROC_Channel(s) ((unsigned)((s) >> 53) & 0x1) -+#define IPROC_BadLength(s) ((unsigned)((s) >> 52) & 0x1) -+#define IPROC_TransCRCStatus(s) ((unsigned)((s) >> 50) & 0x3) -+#define IPROC_EOPType(s) ((unsigned)((s) >> 48) & 0x3) -+#define IPROC_EOPTrap(s) ((unsigned)((s) >> 47) & 0x1) -+#define IPROC_InputterPri(s) ((unsigned)((s) >> 46) & 0x1) -+#define IPROC_InputterChan(s) ((unsigned)((s) >> 45) & 0x1) -+#define IPROC_BadAckSent(s) ((unsigned)((s) >> 43) & 0x3) -+#define IPROC_GoodAckSent(s) ((unsigned)((s) >> 41) & 0x3) -+#define IPROC_QueueingPacket(s) ((unsigned)((s) >> 40) & 0x1) -+#define IPROC_ChannelTrapped(s) ((unsigned)((s) >> 36) & 0xF) -+#define IPROC_TrapValue(s) ((unsigned)((s) >> 32) & 0xF) -+#define IPROC_NetworkContext(s) ((unsigned)((s) >> 16) & 0xFFFF) -+#define IPROC_TransactionType(s) ((unsigned)(s) & 0xFFFF) -+ -+/* values for IPROC_TransCRCStatus */ -+#define CRC_STATUS_GOOD (0) -+#define CRC_STATUS_DISCARD (1) -+#define CRC_STATUS_ERROR (2) -+#define CRC_STATUS_BAD (3) -+ -+/* values for IPROC_EOPType */ -+#define EOP_GOOD (1) -+#define EOP_BADACK (2) -+#define EOP_ERROR_RESET (3) -+ -+/* -+ * Interrupt register bits -+ * -+ * There are up to four sources of interrupt for the MSI port. -+ * The Elan will request 4 ports but may only get either 2 or 1 port. The Interrupts are assigned -+ * as shown below: -+ * No Of MSI ints Low Prioity High Prioity -+ * 4 Event Ints OtherInts Inputer Ints Hard Error ints. -+ * i.e. Dproc, Tproc, Sten. HighPri and LowPri Link errs, ECC errs, -+ * -+ * 2 Event Ints All other interrupts. -+ * 1 All together. -+ * -+ * It is not safe to change the number of sources of interrupt while there may be outstanding, -+ * unserviced interrupts pending. -+ * There two forms of encoding. This has been provided in case an MSI implimentation assumes either -+ * a high value to have a high priority or a low value to have a high priority. This is controled -+ * by a bit in the Elan Pci Control register. -+ */ -+#define INT_LinkPortKeyFail (1<<18) -+#define INT_PciMemErr (1<<17) -+#define INT_SDRamInt (1<<16) -+#define INT_LinkError (1<<15) -+#define INT_IProcCh1HighPri (1<<14) -+#define INT_IProcCh0HighPri (1<<13) -+#define INT_IProcCh1LowPri (1<<12) -+#define INT_IProcCh0LowPri (1<<11) -+#define INT_DiscardingHighPri (1<<10) -+#define INT_DiscardingLowPri (1<<9) -+#define INT_CProcHalted (1<<8) -+#define INT_TProcHalted (1<<7) -+#define INT_DProcHalted (1<<6) -+#define INT_EProc (1<<5) -+#define INT_TProc (1<<4) -+#define INT_CProc (1<<3) -+#define INT_Dma1Proc (1<<2) -+#define INT_Dma0Proc (1<<1) -+#define INT_MainInterrupt (1<<0) -+ -+#define INT_Units (INT_EProc | INT_TProc | INT_CProc | INT_Dma1Proc | INT_Dma0Proc) -+#define INT_Inputters (INT_IProcCh1HighPri | INT_IProcCh0HighPri | INT_IProcCh1LowPri | INT_IProcCh0LowPri) -+#define INT_Discarding (INT_DiscardingHighPri | INT_DiscardingLowPri) -+#define INT_Halted (INT_CProcHalted | INT_TProcHalted | INT_DProcHalted) -+#define INT_ErrorInterrupts (INT_PciMemErr | INT_SDRamInt | INT_LinkError) -+ -+#define INT_MSI0 INT_MainInterrupt -+#define INT_MSI1 (INT_Units | INT_Discarding | INT_Halted) -+#define INT_MSI2 (INT_Inputters) -+#define INT_MSI3 (INT_ErrorInterrupts) -+ -+#define E4_INTERRUPT_REG_SHIFT 32 -+#define E4_INTERRUPT_MASK_MASK (0xffffffffULL) -+ -+/* -+ * Trap type values - see trapvalues.v -+ */ -+ -+#define CommandProcInserterError 0x1 -+#define CommandProcPermissionTrap 0x2 -+#define CommandProcSendTransInvalid 0x3 -+#define CommandProcSendTransExpected 0x4 -+#define CommandProcDmaQueueOverflow 0x5 -+#define CommandProcInterruptQueueOverflow 0x6 -+#define CommandProcMemoryFault 0x7 -+#define CommandProcRouteFetchFault 0x8 -+#define CommandProcFailCountZero 0x9 -+#define CommandProcAddressAlignment 0xa -+#define CommandProcWaitTrap 0xb -+#define CommandProcMultipleGuards 0xc -+#define CommandProcOpenOnGuardedChan 0xd -+#define CommandProcThreadQueueOverflow 0xe -+#define CommandProcBadData 0xf -+ -+#define DmaProcNoFault 0x0 -+#define DmaProcRouteFetchFault 0x1 -+#define DmaProcFailCountError 0x2 -+#define DmaProcPacketAckError 0x3 -+#define DmaProcRunQueueReadFault 0x4 -+#define DmaProcQueueOverflow 0x5 -+ -+#define EventProcNoFault 0x0 -+#define EventProcAddressAlignment 0x1 -+#define EventProcMemoryFault 0x2 -+#define EventProcCountWrapError 0x3 -+ -+#define InputNoFault 0x0 -+#define InputAddressAlignment 0x1 -+#define InputMemoryFault 0x2 -+#define InputInvalidTransType 0x3 -+#define InputDmaQueueOverflow 0x4 -+#define InputEventEngineTrapped 0x5 -+#define InputCrcErrorAfterPAckOk 0x6 -+#define InputEopErrorOnWaitForEop 0x7 -+#define InputEopErrorTrap 0x8 -+#define InputDiscardAfterAckOk 0x9 -+ -+typedef struct _E4_Sched_Status -+{ -+ E4_uint32 Status; -+ E4_uint32 Restart; -+} E4_Sched_Status; -+ -+typedef struct _E4_Input_Ptrs -+{ -+ E4_uint32 ContextFilterTable; -+ E4_uint32 TrapBasePtr; -+} E4_Input_Ptrs; -+ -+#define SCH_StopLowPriQueues (1 << 0) -+#define SCH_DProcHalt (1 << 1) -+#define SCH_TProcHalt (1 << 2) -+#define SCH_CProcHalt (1 << 3) -+ -+#define SCH_CProcTimeout600ns (1 << 4) -+#define SCH_CProcTimeout1p4us (2 << 4) -+#define SCH_CProcTimeout3p0us (3 << 4) -+#define SCH_CProcTimeout6p2us (4 << 4) -+#define SCH_CProcTimeout12p6us (5 << 4) -+#define SCH_CProcTimeout25p4us (6 << 4) -+#define SCH_CProcTimeout51p0us (7 << 4) -+#define SCH_DiscardLowPriInput (1 << 7) -+#define SCH_DiscardHighPriInput (1 << 8) -+ -+#define SCH_DProcTimeslice64us (0 << 9) -+#define SCH_DProcTimeslice128us (1 << 9) -+#define SCH_DProcTimeslice256us (2 << 9) -+#define SCH_DProcTimeslice512us (3 << 9) -+ -+#define SCH_Halt (SCH_StopLowPriQueues | SCH_DProcHalt | SCH_TProcHalt | SCH_CProcHalt) -+#define SCH_Discard (SCH_DiscardLowPriInput | SCH_DiscardHighPriInput) -+ -+#define SCH_RestartCProc (1 << 0) -+#define SCH_RestartTProc (1 << 1) -+#define SCH_RestartEProc (1 << 2) -+#define SCH_RestartDma0Proc (1 << 3) -+#define SCH_RestartDma1Proc (1 << 4) -+#define SCH_RestartDmaPrefetchProc (1 << 5) -+#define SCH_RestartCh0LowPriInput (1 << 6) -+#define SCH_RestartCh1LowPriInput (1 << 7) -+#define SCH_RestartCh0HighPriInput (1 << 8) -+#define SCH_RestartCh1HighPriInput (1 << 9) -+#define SCH_ClearLinkErrorInt (1 << 10) -+#define SCH_ContextFilterFlush (1 << 11) -+ -+/* -+ * Link state bits. -+ */ -+#define LS_LinkNotReady (1 << 0) /* Link is in reset or recovering from an error */ -+#define LS_Locked (1 << 1) /* Linkinput PLL is locked */ -+#define LS_LockError (1 << 2) /* Linkinput PLL was unable to lock onto the input clock. */ -+#define LS_DeskewError (1 << 3) /* Linkinput was unable to Deskew all the inputs. (Broken wire?) */ -+#define LS_PhaseError (1 << 4) /* Linkinput Phase alignment error. */ -+#define LS_DataError (1 << 5) /* Received value was neither good data or a token. */ -+#define LS_FifoOvFlow0 (1 << 6) /* Channel 0 input fifo overflowed. */ -+#define LS_FifoOvFlow1 (1 << 7) /* Channel 1 input fifo overflowed. */ -+#define LS_Mod45Changed (1 << 8) /* Mod45 bit has changed. Error setr to force reset. */ -+#define LS_PAckNotSeenError (1 << 9) /* PAck value not returned for this packet. */ -+ -+/* -+ * Link State Constant defines, used for writing to LinkSetValue -+ */ -+ -+#define LRS_DataDel0 0x0 -+#define LRS_DataDel1 0x1 -+#define LRS_DataDel2 0x2 -+#define LRS_DataDel3 0x3 -+#define LRS_DataDel4 0x4 -+#define LRS_DataDel5 0x5 -+#define LRS_DataDel6 0x6 -+#define LRS_DataDel7 0x7 -+#define LRS_DataDel8 0x8 -+#define LRS_LinkInValue 0x9 -+#define LRS_PllDelValue 0xA -+#define LRS_ClockEven 0xB -+#define LRS_ErrorVal8to0 0xC -+#define LRS_ErrorVal17to9 0xD -+#define LRS_ErrorVal26to18 0xE -+#define LRS_ErrorVal35to27 0xF -+#define LRS_NumLinkDels 0x10 -+ -+#define LRS_Pllfast 0x40 -+ -+typedef struct _E4_CommandControl -+{ -+ volatile E4_uint32 CommandQueueDescsBase; -+ volatile E4_uint32 CommandRequeuePtr; -+} E4_CommandControl; -+ -+#define E4_CommandRequeueBusy 0x80000000 /* Test against read value of CommandRequeuePtr */ -+#define E4_CommandRequeueHighPri 0x1 /* Will requeue onto the high pri queue */ -+#define E4_QueueDescPtrMask 0x7fffffe0 -+ -+typedef struct _E4_CommandQueueDesc -+{ -+ E4_uint64 CQ_QueuePtrs; -+ E4_uint64 CQ_HoldingValue; /* 32 bit value for 32 bit accesses or OutOfOrderMask*/ -+ E4_uint64 CQ_AckBuffers; /* Space for 32 4 bit ack buffer values. */ -+ E4_uint64 CQ_Control; -+} E4_CommandQueueDesc; -+ -+/* -+ * Rev A - CQ_QueuePtrs -+ * [63] Unused Should be set to zero. -+ * [62:51] Unused (reads as top of InsertPtr) -+ * [50:35] CompletedPtr Completed pointer. This is alligned to a byte address. -+ * [34] Trapped Will be set if the command has trapped. -+ * [33:32] Size Size of queue. -+ * [31] Used Will be set if the descriptor has been changed and written back by the elan. -+ * [30:3] InsertPtr Insert pointer. This is alligned to a byte address. -+ * [2] TimedOut Will be set if the queue timedout executing a command. -+ * [1] Priority When set the queue runs at high priority. -+ * [0] Error If this becomes set all new data written to the queue is * discarded. -+ * -+ * Rev B - CQ_QueuePtrs -+ * [63] TimedOut Will be set if the queue timedout executing a command. -+ * [62] Priority When set the queue runs at high priority. -+ * [61] QueueType 1=will accept unordered 64 bit PCI writes. 0=will accept ordered 32 or 64 bit PCI writes. -+ * [60:51] Unused (reads as top of InsertPtr) -+ * [50:35] CompletedPtr Completed pointer. This is alligned to a byte address. -+ * [34] Trapped Will be set if the command has trapped. -+ * [33:32] Size Size of queue. -+ * [31] Used Will be set if the descriptor has been changed and written back by the elan. -+ * [30:3] InsertPtr Insert pointer. This is alligned to a byte address. -+ * [2] OrderControl Holds bit 8 of last PCI accesses. Used by a reordering queue. -+ * [1:0] ErrorType This field has the current error status of the queue. -+ */ -+ -+/* Common between revA and RevB */ -+#define CQ_PtrMask (0x7ffffff8) /* 31 bit sdram address */ -+#define CQ_PtrOffsetMask (0x7fff8) -+#define CQ_PtrBaseMask (0x7ff80000) -+ -+#define CQ_InsertPtrShift (3 - 3) /* InsertPtr is 64 bit aligned */ -+#define CQ_SizeShift (32) -+# define CQ_Size1K 0 -+# define CQ_Size8K 1 -+# define CQ_Size64K 2 -+# define CQ_Size512K 3 -+# define CQ_SizeMask 3 -+ -+#define CQ_CompletedPtrShift (35 - 3) /* CompletedPtr is 64 but aligned */ -+ -+#define CQ_Used (1ull << 31) -+#define CQ_Trapped (1ull << 34) -+ -+#define CQ_QueuePtrsValue(Size,Inserter,Completer) \ -+ (((E4_uint64) (Size) << CQ_SizeShift) | \ -+ ((E4_uint64) (Inserter) << CQ_InsertPtrShift) | \ -+ ((E4_uint64) (Completer) << CQ_CompletedPtrShift)) -+ -+#define CQ_InsertPtr(QueuePtrs) \ -+ (((E4_uint64) QueuePtrs) & CQ_PtrMask) -+ -+#define CQ_CompletedPtr(QueuePtrs) \ -+ (((E4_uint32)((QueuePtrs) >> CQ_CompletedPtrShift) & CQ_PtrOffsetMask) | \ -+ (CQ_InsertPtr(QueuePtrs) & CQ_PtrBaseMask)) -+ -+#define CQ_Size(SizeVal) (1024 * (1 << ((SizeVal)*3))) -+ -+/* Rev A specific */ -+#define CQ_RevA_Error (1 << 0) -+#define CQ_RevA_Priority (1 << 1) -+#define CQ_RevA_TimedOut (1 << 2) -+ -+/* Rev B specific */ -+#define CQ_RevB_ErrorType(QueuePtr) ((QueuePtr) & (3 << 0)) -+# define CQ_RevB_NoError (0ull << 0) -+# define CQ_RevB_Overflowed (1ull << 0) -+# define CQ_RevB_InvalidWriteSize (2ull << 0) -+# define CQ_RevB_InvalidWriteOrder (3ull << 0) -+#define CQ_RevB_OrderControl (1ull << 2) -+ -+#define CQ_RevB_QueueType(QueuePtr) ((QueuePtr) & (1ull << 61)) -+# define CQ_RevB_ReorderingQueue (1ull << 61) -+# define CQ_RevB_32bitWriteQueue (0ull << 61) -+ -+#define CQ_RevB_Priority (1ull << 62) -+#define CQ_RevB_TimedOut (1ull << 62) -+ -+/* -+ * CQ_AckBuffers - Packet Ack Values -+ */ -+#define PackOk (0x0) -+#define PackTestFail (0x1) -+#define PackDiscard (0x2) -+#define PackError (0x7) -+#define PackTimeout (0x8) -+#define PackWaiting (0xF) -+#define PackValue(val,chan) (((val) >> ((chan) * 4)) & 0xf) -+ -+/* -+ * CQ_Control -+ * [63:35] ExtractPtr -+ * [34] Unused -+ * [33:32] ChannelNotCompleted -+ * [31:24] Permissions -+ * [23:16] RestartCount Decremented after each restart. Will trap when zero -+ * [15:14] Unused Should be set to zero -+ * [13:0] Context -+ */ -+#define CQ_Context(Control) ((E4_uint32) ((Control) >> 0) & 0x3fff) -+#define CQ_RestartCount(Control) ((E4_uint32) ((Control) >> 16) & 0x7f) -+#define CQ_ChannelNotCompleted(Control) ((E4_uint32) ((Control) >> 32) & 3) -+#define CQ_ExtractPtr(Control) ((E4_uint32) ((Control) >> 32) & 0xFFFFFFF8) -+ -+#define CQ_RestartCountShift 16 -+ -+#define CQ_SetEventEnableBit (1 << 24) -+#define CQ_WaitEventEnableBit (1 << 25) -+#define CQ_ModifyEnableBit (1 << 26) -+#define CQ_WriteEnableBit (1 << 27) -+#define CQ_ThreadStartEnableBit (1 << 28) -+#define CQ_DmaStartEnableBit (1 << 29) -+#define CQ_STENEnableBit (1 << 30) -+#define CQ_InterruptEnableBit (1 << 31) -+#define CQ_EnableAllBits (0xFF000000) -+#define CQ_PermissionMask (0xFF000000) -+ -+#define CQ_ControlValue(Cntx, RestartCount, Permissions) \ -+ (((Cntx) & 0x3fff) | (((RestartCount) & 0xff) << 16) | ((Permissions) & CQ_PermissionMask)) -+ -+/* -+ * This file describes the slave address map of Elan4. -+ * -+ * Elan4 has two PCI 64 bit base address registers. One is setup for elan -+ * local memory and the other is for the command port, elan registers and ebus. -+ * -+ * This file describes the command port, elan registers and ebus BAR. This is a -+ * 26 bit base address register and is split up as follows: -+ * 1 The ebus requires 21 bits of address. 26'h3e00000 to 26'h3ffffff -+ * 2 The control regsiters requires 16 bits of address. 26'h3df0000 to 26'h3dfffff -+ * 3 The command port has the rest. This give just under 8k command ports or about 123 per -+ * processor of a 64 node SMP. -+ */ -+ -+/* BAR1 contains the command queues followed by the registers and the Ebus - and is 26 bits */ -+/* each command queue has an 8K page associated with it */ -+#define CQ_CommandMappingSize (1 << 13) -+#define CQ_NumCommandDescs ((1 << (26 - 13))) -+#define CQ_CommandDescsAlignment ((1 << (26 - 13)) * sizeof (E4_CommandQueueDesc)) -+ -+/* control reg bits i.e. E4_DataBusMap.SysControlReg */ -+#define CONT_EN_ALL_SETS (1ULL << 0) /* enable cache */ -+#define CONT_MMU_ENABLE (1ULL << 1) /* bit 0 enables mmu */ -+#define CONT_CACHE_HASH_TABLE (1ULL << 2) /* cache up hash table entries */ -+#define CONT_CACHE_CHAINS (1ULL << 3) /* cache up chain entries */ -+#define CONT_CACHE_ROOT_CNTX (1ULL << 4) /* cache root context table for routes and filters. */ -+#define CONT_CACHE_STEN_ROUTES (1ULL << 5) /* cache up sten packet routes */ -+#define CONT_CACHE_DMA_ROUTES (1ULL << 6) /* cache up dma packet routes */ -+ -+#define CONT_CACHE_NONE 0ULL -+#define CONT_CACHE_ALL (CONT_CACHE_HASH_TABLE | CONT_CACHE_CHAINS | CONT_CACHE_ROOT_CNTX | \ -+ CONT_CACHE_STEN_ROUTES | CONT_CACHE_DMA_ROUTES) -+ -+/* This controls the format size and position of the MMU hash tables. */ -+#define CONT_INHIBIT_MAX_CHAIN_ITEMS (1ULL << 7) /* Prevents the MaxChainItems value of 1024 from forcing a translation miss */ -+#define CONT_TABLE0_MASK_SIZE_SHIFT 8 /* Defines the size of hash table 0 */ -+#define CONT_TABLE0_PAGE_SIZE_SHIFT 13 /* Set the page size for hash table 0 */ -+#define CONT_TABLE1_MASK_SIZE_SHIFT 16 /* Defines the size of hash table 1 */ -+#define CONT_TABLE1_PAGE_SIZE_SHIFT 21 /* Set the page size for hash table 1 */ -+#define CONT_TWO_HASH_TABLES (1ULL << 24) /* Sets the MMU to use two hash tables. If not set only 0 used. */ -+#define CONT_2K_NOT_1K_DMA_PACKETS (1ULL << 25) /* Used to select the default DMA packet size. */ -+#define CONT_ALIGN_ALL_DMA_PACKETS (1ULL << 26) /* Will force all dma packets to be aligned to a page.*/ -+#define CONT_DIRECT_MAP_PCI_WRITES (1ULL << 27) /* Will force pci writes to write and flush the dcache.*/ -+#define CONT_TLB_FLUSH (1ULL << 28) /* Invalidates the TLB and indicates when flushed */ -+#define CONT_CLEAR_WALK_WROTE_TABLES (1ULL << 29) /* Used to guarantee that the elan is using new PTE values. */ -+#define CONT_ROUTE_FLUSH (1ULL << 30) /* Invalidates all route cache entries. */ -+#define CONT_CLEAR_LINKPORT_INT (1ULL << 31) /* Clears the Linkport key fail interrupt. Reads as 0. */ -+#define CONT_CLEAR_SDRAM_ERROR (1ULL << 32) /* Clears an EEC error interrupt. Reads as 0. */ -+ -+/* -+ * These are extra control bits used for testing the DLLs of the SDRAM interface. Most of the Sdram -+ * control bits are defined in xsdram.h -+ */ -+#define SDRAM_FIXED_DLL_DELAY_SHIFT 47 -+#define SDRAM_FIXED_DLL_DELAY_BITS 5 -+#define SDRAM_FIXED_DLL_DELAY_MASK ((1ULL << SDRAM_FIXED_DLL_DELAY_BITS) - 1ULL) -+#define SDRAM_FIXED_DLL_DELAY(Value) ((SDRAM_FIXED_DLL_DELAY_MASK & (Value)) << SDRAM_FIXED_DLL_DELAY_SHIFT) -+#define SDRAM_FIXED_DELAY_ENABLE (1ULL << 52) -+#define SDRAM_GET_DLL_DELAY(Value) (((Value) >> SDRAM_FIXED_DLL_DELAY_SHIFT) & SDRAM_FIXED_DLL_DELAY_MASK) -+ -+#define SDRAM_166_DLL_CORRECTION_FACTOR 3 /* This is to allow for SSO and ringing on the DQ lines */ -+#define SDRAM_150_DLL_CORRECTION_FACTOR 2 /* This is to allow for SSO and ringing on the DQ lines */ -+ -+#define PAGE_SIZE_4K 0x0 -+#define PAGE_SIZE_8K 0x1 -+#define PAGE_SIZE_64K 0x2 -+#define PAGE_SIZE_512K 0x3 -+#define PAGE_SIZE_2M 0x4 -+#define PAGE_SIZE_4M 0x5 -+#define PAGE_SIZE_64M 0x6 -+#define PAGE_SIZE_512M 0x7 -+ -+#define PAGE_SIZE_MASK 0x7 -+#define PAGE_MASK_MASK 0x1f -+ -+/* control reg bits i.e. E4_DataBusMap.LinkControlReg */ -+#define LCONT_REVA_GREEN_LED (1 << 0) -+#define LCONT_REVA_YELLOW_LED (1 << 1) -+#define LCONT_REVA_RED_LED (1 << 2) -+#define LCONT_REVA_ENABLE_LED_DRIVE (1 << 3) /* Enable manual setting of the Leds to the bits set above. */ -+ -+#define LCONT_REVB_DISABLE_TLB_PREFETCH (1 << 0) -+#define LCONT_REVB_DISABLE_CRC_ERROR_CHECKING (1 << 1) -+ -+ -+#define LCONT_EN_SYS_WRITES (1 << 4) /* Enable linkport writes to sys registers. i.e. all of E4_DataBusMap. */ -+#define LCONT_EN_SYS_READS (1 << 5) /* Enable linkport reads from sys registers. i.e. all of E4_DataBusMap. */ -+#define LCONT_EN_USER_WRITES (1 << 6) /* Enable linkport writes to user registers. i.e. all of E4_User_Regs. */ -+#define LCONT_EN_USER_READS (1 << 7) /* Enable linkport reads from user registers. i.e. all of E4_User_Regs. */ -+ -+#define LCONT_TEST_VALUE_MASK 0x3ff /* Value used for test writes and link boundary scan. */ -+#define LCONT_TEST_VALUE_SHIFT 8 -+#define LCONT_TEST_VALUE(Value) ((LCONT_LINK_STATE_MASK & (Value)) << LCONT_TEST_VALUE_SHIFT) -+ -+/* -+ * State read from LINK_STATE when TEST_VALUE is set to the following values. -+ * TEST_VALUE LINK_STATE read TEST_VALUE LINK_STATE read -+ * 000 - Data delay count 0 008 - Data delay count 8 -+ * 001 - Data delay count 1 009 - Link in value -+ * 002 - Data delay count 2 00a - PLL delay -+ * 003 - Data delay count 3 00b - Clock Delay -+ * 004 - Data delay count 4 00c ? ErrorVal8to0 -+ * 005 - Data delay count 5 00d ? ErrorVal17to9 -+ * 006 - Data delay count 6 00e ? ErrorVal26to18 -+ * 007 - Data delay count 7 00f ? ErrorVal35to27 -+ */ -+ -+#define LCONT_TEST_CONTROL_MASK 0x3 /* Selects and controls the action of the LINK_STATE value. */ -+#define LCONT_TEST_CONTROL_SHIFT 18 -+ -+#define LCONT_READ_ERRORS 0 /* {Mod45RequestChanged, FifoOverflowError, DataError, PhaseError, -+ * DeskewError, LockError, Locked, LinkNotReady} */ -+#define LCONT_READ_STATE 1 /* Read valus addressed by TEST_CONTROL value */ -+#define LCONT_FIX_LINK_DELAYS 2 /* Sets delays to TEST_CONTROL value */ -+#define LCONT_BOUNDARY_SCAN 3 /* Puts link into boundary scan. Outputs TEST_CONTROL value to link, -+ * reads LINK_STATE from link. */ -+ -+#define LCONT_LINK_STATE_MASK 0x3ff /* Read only */ -+#define LCONT_LINK_STATE_SHIFT 20 /* Read only */ -+#define LCONT_LINK_STATE(ControlRegValue) (LCONT_LINK_STATE_MASK & ((ControlRegValue) >> LCONT_LINK_STATE_SHIFT)) -+ -+/* control reg bits i.e. E4_DataBusMap.LinkContSettings */ -+#define LCONT_MOD45_DISABLE (1 << 0) /* is set the link will try to run in TNB mode. */ -+#define LCONT_CONFIG_PHASE_MASK 0x7 /* This set the delay through the phase alignment buffer. */ -+#define LCONT_CONFIG_PHASE_SHIFT 1 -+ -+#define LCONT_PLL_REF_VAL_BITS_MASK 0x7f /* This is the divide value on the LinkIn clock to form the comms PLL */ -+#define LCONT_PLL_REF_VAL_BITS_SHIFT 4 /* reference clock. Div value is (n - 2). e.g. to Divide by 7 set to 5. */ -+ -+#define LCONT_FORCE_COMMSCLK_LOCAL (1 << 11) /* This must be set at one end of a back to back Elan configuration. */ -+#define LCONT_LVDS_VOLTAGE_BITS_MASK 0x3 /* This is used to set the voltage swing on the LVDS link output pads. */ -+#define LCONT_LVDS_VOLTAGE_BITS_SHIFT 12 /* reference clock. Div value is (n - 2). e.g. to Divide by 7 set to 5. */ -+ -+#define LCONT_VOD_170 0 /* Approximate differential voltage swing in mV of link outputs into */ -+#define LCONT_VOD_360 1 /* a 100 ohm diferential load. */ -+#define LCONT_VOD_460 2 -+#define LCONT_VOD_550 3 -+ -+#define LCONT_LVDS_TERMINATION_MASK 0x3 /* This set the resistor values of the internal single ended termation */ -+#define LCONT_LVDS_TERMINATION_SHIFT 14 /* resistors of the link input and comms input clcok. */ -+ -+#define LCONT_TERM_55_OHM 0 /* Resistor values for internal termination of LVDS pads. */ -+#define LCONT_TERM_50_OHM 1 -+#define LCONT_TERM_AUTO_OHM 2 /* Should normally be set to auto. */ -+#define LCONT_TERM_45_OHM 3 -+ -+#define LCONT_LVDS_EN_TERM_UPDATE (1 << 47) /* This should be asserted and deasserted if LCONT_LVDS_TERMINATION is changed. */ -+ -+/* Macros used to access and construct MMU hash table and chain entries. */ -+/* -+ * Each hash entry is made up of a 64 byte block. Each entry hash two tags where each -+ * tag has 4 PTE's. PTE's 0 to 2 use the bottom 48 bits of a 64 bit word and PTE 3 -+ * uses the top 16 bits of 3 64 bit words. -+ * -+ * These macros can be used to build a single PTE. PTE3 needs to be built into a 48 bit -+ * object before they can be used. -+ */ -+#define PTE_ENTRY_MASK 0x0000ffffffffffffULL -+#define PTE_TYPE_MASK 0x000000000000000fULL -+#define PTE_PERM_MASK 0x00000000000000f0ULL -+#define PTE_PERM_TYPE_MASK 0x00000000000000ffULL -+#define PTE_REF_MASK 0x0000000000000100ULL -+#define PTE_PPN_MASK 0x00007ffffffffe00ULL -+#define PTE_MOD_MASK 0x0000800000000000ULL -+#define PTE_TOPADDR_MASK 0x0000600000000000ULL -+ -+#define PTE_MOD_SHIFT 47 -+#define PTE_PPN_SHIFT 9 -+#define PTE_REF_SHIFT 8 -+#define PTE_PERM_SHIFT 4 -+#define PTE_TYPE_SHIFT 0 -+ -+#define PTE_PADDR_SHIFT (12 - 9) /* Physical addresses are shifted down 3 this to go into the PTE */ -+ -+ -+/* Values required for tag 3 */ -+#define PTE_REF_3 0x0100000000000000ULL -+#define PTE_MOD_3 0x8000000000000000ULL -+#define PTE_ENTRY_MASK_3 0xffff000000000000ULL -+#define PTE_PERM_TYPE_MASK_3 0x00ff000000000000ULL -+#define PTE_ENTRY_3_FOR_0(NewPte) ((NewPte << (48)) & PTE_ENTRY_MASK_3) -+#define PTE_ENTRY_3_FOR_1(NewPte) ((NewPte << (32)) & PTE_ENTRY_MASK_3) -+#define PTE_ENTRY_3_FOR_2(NewPte) ((NewPte << (16)) & PTE_ENTRY_MASK_3) -+ -+/* Values required for the tags */ -+#define TAG_CONTEXT_MASK 0x0000000000003fffULL -+#define TAG_ADDRESS_MASK 0xfffffffff8000000ULL -+#define TAG_CHAINPTR_18TO6_MASK 0x0000000007ffc000ULL -+#define TAG_CHAINPTR_LOW_SHIFT (14 - 6) -+#define TAG_CHAINPTR_30TO19_MASK 0x0000000003ffc000ULL -+#define TAG_CHAINPTR_HIGH_SHIFT (19 - 14) -+#define TAG_COPY_BIT 0x0000000004000000ULL -+ -+/* -+ * This takes number loaded into the control register and returns the page size as a power of two. -+ */ -+ -+#define E4_PAGE_SIZE_TABLE E4_uint32 const PageSizeTable[] = {12, 13, 16, 19, 21, 22, 26, 29} -+#define E4_PAGE_SIZE_TABLE_SIZE (sizeof(PageSizeTable)/sizeof(PageSizeTable[0])) -+ -+/* -+ * This macro generates a hash block index. -+ * -+ * Cntx This is the 14 bit context. It should not be larger than 14 bits. -+ * VAddr This is the 64 bit virtual address. It does not require any masking and can be a byte address. -+ * PageSize This is the value loaded into the control register for this hash table. -+ * HashTableMask This should be set mask out upper bits past the end of the hash table. -+ */ -+#define E4MMU_SHIFT_ADDR(VAddr, Shift) \ -+ ((((E4_uint32)(VAddr)) >> (Shift)) | (((E4_uint32)((VAddr) >> 32)) << (32 - (Shift)))) -+ -+#define E4MMU_CONTEXT_SCRAMBLE(Cntx) \ -+ ((((Cntx) << 8) | ((Cntx) >> 6)) ^ (((Cntx) << 15) | ((Cntx) << 1))) -+ -+#define E4MMU_HASH_INDEX(Cntx, VAddr, PageShift, HashTableMask) \ -+ ((E4MMU_SHIFT_ADDR(VAddr, (PageShift) + 2) ^ E4MMU_CONTEXT_SCRAMBLE(Cntx)) & (HashTableMask)) -+ -+#define E4MMU_TAG(vaddr,ctx) (((vaddr) & TAG_ADDRESS_MASK) | ((ctx) & TAG_CONTEXT_MASK)) -+ -+#define E4MMU_TAG2VADDR(tag,hashidx,PageShift,HashTableMask) \ -+ (((tag) & TAG_ADDRESS_MASK) | ((((hashidx) ^ E4MMU_CONTEXT_SCRAMBLE((tag) & TAG_CONTEXT_MASK)) & (HashTableMask)) << ((PageShift + 2)))) -+ -+/* -+ * Detailed bit descriptions for the tags and PTE's are better done with the macros -+ * defined above. -+ */ -+typedef struct _E4_HashTableEntry -+{ -+ E4_uint64 Tag[2]; -+ E4_uint64 TagPTE[2][3]; -+} E4_HashTableEntry; -+ -+#define E4MMU_TAG_OFFSET(tag) ((tag) << 3) -+#define E4MMU_PTE_LOW_OFFSET(tag,pte) ((((tag)*3 + (pte) + 2) << 3)) -+#define E4MMU_PTE_HIGH_OFFSET(tag,pte) ((((tag)*3 + (pte) + 2) << 3) + 4) -+#define E4MMU_PTE3_WORD0_OFFSET(tag) ((((tag)*3 + 2) << 3) + 6) -+#define E4MMU_PTE3_WORD1_OFFSET(tag) ((((tag)*3 + 3) << 3) + 6) -+#define E4MMU_PTE3_WORD2_OFFSET(tag) ((((tag)*3 + 4) << 3) + 6) -+ -+ -+/* -+ * Hash0AddrBits is the size of the hash table in bytes as a power of 2. -+ * e.g. 11 would give 32 hash entries where each entry is 64 bytes. -+ */ -+#define SETUP_HASH_TABLES(Hash0PageSize, Hash0AddrBits, Hash1PageSize, Hash1AddrBits) \ -+ (((Hash0PageSize) << CONT_TABLE0_PAGE_SIZE_SHIFT) | \ -+ ((Hash0AddrBits) << CONT_TABLE0_MASK_SIZE_SHIFT) | \ -+ ((Hash1PageSize) << CONT_TABLE1_PAGE_SIZE_SHIFT) | \ -+ ((Hash1AddrBits) << CONT_TABLE1_MASK_SIZE_SHIFT)) -+ -+/* ECC status register */ -+#define ECC_Addr(s) ((s) & 0x7ffffff8ULL) -+#define ECC_Syndrome(s) (((s) >> 32) & 0xffffULL) -+#define ECC_RisingDQSSyndrome(s) (((s) >> 32) & 0xffULL) -+#define ECC_FallingDQSSyndrome(s) (((s) >> 40) & 0xffULL) -+#define ECC_UncorrectableErr(s) (((s) >> 48) & 1ULL) -+#define ECC_MultUncorrectErrs(s) (((s) >> 49) & 1ULL) -+#define ECC_CorrectableErr(s) (((s) >> 50) & 1ULL) -+#define ECC_MultCorrectErrs(s) (((s) >> 51) & 1ULL) -+ -+/* Permission type saved in a PTE. This is a four bit field */ -+#define PERM_Disabled 0x0 -+#define PERM_Unused 0x1 -+#define PERM_LocDataRead 0x2 -+#define PERM_LocDataWrite 0x3 -+#define PERM_LocRead 0x4 -+#define PERM_LocExecute 0x5 -+#define PERM_ReadOnly 0x6 -+#define PERM_LocWrite 0x7 -+#define PERM_LocEventOnly 0x8 -+#define PERM_LocEventWrite 0x9 -+#define PERM_RemoteEvent 0xa -+#define PERM_RemoteAll 0xb -+#define PERM_RemoteReadOnly 0xc -+#define PERM_RemoteWriteLocRead 0xd -+#define PERM_DataReadWrite 0xe -+#define PERM_NoFault 0xf -+ -+#define PERM_Mask 0xf -+ -+/* Permission type hints to device driver */ -+#define PERM_Preload 0x10 -+ -+#define PTE_SetPerm(Perm) (((Perm) & PERM_Mask) << 4) -+ -+/* Control info saved in the lookup field of the TLB */ -+#define PTE_PciNotLocal (1ULL << 0) /* Directs the access to the PCI interface */ -+#define PTE_BigEndian (1ULL << 1) /* Valid for PCI entries only */ -+#define PTE_RelaxedOrder (1ULL << 2) /* Valid for PCI entries only */ -+#define PTE_DontSnoop (1ULL << 3) /* Valid for PCI entries only */ -+ -+#define PTE_UseFixedSet (1ULL << 1) /* Value for non PCI entries only */ -+#define PTE_CommandQueue (1ULL << 2) /* Value for non PCI entries only */ -+#define PTE_SetFixedSetNo(Set) ((((Set) & 3) << 2) | PTE_UseFixedSet) -+ -+#define PTE_TypeBitsMask (0xfULL) -+#define PTE_PermissionTypeMask (0xfULL << 4) -+#define PTE_Referenced (1ULL << 8) -+#define PTE_PhysicalPageNoMask (0x7ffffffffe00ULL) -+#define PTE_Modified (1ULL << 47) -+ -+#define PTE_PhysicalAddrShiftIntoPTE (12 - 9) -+ -+/* define page table entry bit fields */ -+#define TLB_PageSizeBits (3 << 0) -+#define TLB_ACCBits (7 << 2) -+#define TLB_LocalBit (1 << 5) -+#define TLB_PCI64BitTargetBit (1 << 6) -+#define TLB_PCIBigEndianBit (1 << 7) -+ -+#define TLB_ModifiedBit (1 << 55) -+#define TLB_ReferencedBit (1 << 63) -+ -+/* Used to read values from the tlb. */ -+#define TLB_TlbReadCntBitsSh 56 -+#define TLB_UseSelAddrSh (1ULL << 60) -+#define TLB_WriteTlbLine (1ULL << 61) -+ -+#define TLB_SEL_LINE(LineNo) (TLB_UseSelAddrSh | \ -+ ((E4_uint64)((LineNo) & 0xf) << TLB_TlbReadCntBitsSh)) -+ -+#define TLB_NUM_ENTRIES 16 -+/* -+ * The following macros are used with the test access port (TlbLineValue) for the TLBs. -+ */ -+#define TLV_DoPciAccess (1ULL << 0) -+#define TLV_CommandAccess (1ULL << 1) -+#define TLV_DoCacheAccess (1ULL << 2) -+#define TLV_notStartTLBWalk (1ULL << 3) -+#define TLV_UseFixedSet (1ULL << 4) -+#define TLV_BigEndian (1ULL << 4) -+#define TLV_RelaxedOrder (1ULL << 5) -+#define TLV_DontSnoop (1ULL << 6) -+#define TLV_FixedSetNo_MASK (3ULL << 5) -+#define TLV_PciTypeBits_MASK (7ULL << 4) -+#define TLV_LookupBits_MASK (0x7fULL) -+#define TLV_MissErr (1ULL << 7) -+#define TLV_TypeBits (0xffULL) -+ -+#define TLV_PhysicalAddr_MASK (0x3fffffffff000ULL) -+ -+#define TLV_TlbTesting (1ULL << 51) -+#define TLV_SelectUnitsTlbRead (1ULL << 52) -+#define TLV_SelectTProcTlbRead (1ULL << 53) -+ -+#define TLV_TlbLineSelect_MASK (0xf) -+#define TLV_UnitsTlbLineSelect_SHIFT (54) -+#define TLV_TProcTlbLineSelect_SHIFT (59) -+#define TLV_EnableUnitsTlbRead (1ULL << 58) -+#define TLV_EnableTProcTlbRead (1ULL << 63) -+ -+/* -+ * Use this macro to enable direct testing of the Units TLB. -+ * When Line is in the range 0 to 15 a TLB line is selected for reading or writing. -+ * When Line is set to -1 the tlb will be activated to perform a match. -+ */ -+#define TLV_UnitsTlbLineSel(Line) (((Line) == -1) ? 0ULL : \ -+ (TLV_EnableUnitsTlbRead | ((E4_uint64)((Line) & TLV_TlbLineSelect_MASK) << TLV_UnitsTlbLineSelect_SHIFT))) -+#define TLV_TProcTlbLineSel(Line) (((Line) == -1) ? 0ULL : \ -+ (TLV_EnableTProcTlbRead | ((E4_uint64)((Line) & TLV_TlbLineSelect_MASK) << TLV_TProcTlbLineSelect_SHIFT))) -+ -+/* -+ * Thread_Trap_State -+ * see f_RegFileControl.v TProcStatus -+ */ -+#define TS_HaltThread (1 << 0) -+#define TS_TrapForTooManyInstructions (1 << 1) -+#define TS_InstAccessException (1 << 2) -+#define TS_Unimplemented (1 << 3) -+#define TS_DataAccessException (1 << 4) -+#define TS_DataAlignmentError (1 << 5) -+#define TS_TrapForUsingBadData (1 << 6) -+#define TS_TrapTypeMask (0x7f) -+#define TS_DataPortNo(ts) (((ts) >> 7) & 7) -+#define TS_TrappedFlag (1 << 10) -+#define TS_MemLock (1 << 11) -+#define TS_XCCshift 12 -+#define TS_XCCmask 0xff -+#define TS_ICC(ts) (((ts) >> 12) & 15) -+#define TS_XCC(ts) (((ts) >> 16) & 15) -+#define TS_InstValid_F (1 << 20) -+#define TS_InstValid_R (1 << 21) -+#define TS_InstValid_E (1 << 22) -+#define TS_InstValid_W (1 << 23) -+#define TS_HighPriority (1 << 24) -+#define TS_RemoteThread (1 << 25) -+#define TS_TProcTranslationInProgress (1 << 26) -+#define TS_MemLock_E (1 << 27) -+ -+/* Thread run queue entries */ -+typedef struct E4_ThreadRegs -+{ -+ E4_uint64 Registers[7]; -+} E4_ThreadRegs; -+ -+typedef struct E4_TProcQueueEntry -+{ -+ E4_ThreadRegs Regs; /* XXXX: jon check this */ -+ E4_uint64 Context; /* XXXX: jon check this */ -+} E4_TProcQueueEntry; -+ -+typedef struct E4_DProcQueueEntry -+{ -+ E4_DMA Desc; -+ E4_uint64 Pad; -+} E4_DProcQueueEntry; -+ -+/* -+ * Packet acknowledge values. -+ */ -+#define E4_PAckOk 0 -+#define E4_PAckTestFail 1 -+#define E4_PAckDiscard 2 -+#define E4_PAckError 3 -+ -+/* -+ * return values from breaktest instruction. -+ */ -+#define ICC_CARRY_BIT (0x1ULL << 0) /* Breaktest: Load pending */ -+#define ICC_ZERO_BIT (0x1ULL << 1) /* Breaktest: Time to break */ -+#define ICC_SIGNED_BIT (0x1ULL << 2) /* Breaktest: Another thread ready */ -+#define ICC_TPROC_RDY_LOW_PRI (0x1ULL << 3) -+#define ICC_TPROC_RDY_HIGH_PRI (0x1ULL << 4) -+#define ICC_RUNNING_HIGH_PRI (0x1ULL << 5) -+#define ICC_RUNNING_AS_REMOTE (0x1ULL << 6) -+#define ICC_TIME_TO_BREAK (0x1ULL << 7) -+#define ICC_RS1LOAD_PENDING (0x1ULL << 8) -+#define ICC_TPROC_HALT (0x1ULL << 9) -+ -+/* -+ * Main Interrupt cookies -+ * [63:14] user cookie -+ * [13:0] context -+ */ -+#define E4_MAIN_INT_SHIFT 14 -+#define E4_MAIN_INT_COOKIE(cookie) ((cookie) >> E4_MAIN_INT_SHIFT) -+#define E4_MAIN_INT_CTX(cookie) ((cookie) & 0x3FFF) -+ -+typedef E4_uint64 E4_MainIntEntry; -+ -+#define E4_MainIntEntrySize sizeof (E4_MainIntEntry) -+ -+/* -+ * The internal databus is 64 bits wide. -+ * All writes to the internal registers MUST be made with 64 bit write operations. -+ * These can be made up of pairs 32 bit writes on the PCI bus. The writes will be -+ * treated as nops if they are performed with two separate 32 bit writes. -+ */ -+typedef volatile struct _E4_DataBusMap -+{ -+ E4_uint64 InputTrans[4][16]; /* 0x000 */ -+ -+ E4_uint64 Dma0TransAddr; /* 0x200 */ -+ E4_DMA Dma0Desc; /* Current Dma0 registers */ /* 0x208 */ -+ -+ E4_uint64 Dma1TransAddr; /* 0x240 */ -+ E4_DMA Dma1Desc; /* Current Dma1 registers */ /* 0x248 */ -+ -+ E4_uint64 Dma0LastPacketSize; /* 0x280 */ -+ E4_uint64 Dma0ThisPacketSize; /* 0x288 */ -+ E4_uint64 Dma0DescSizeInProg; /* 0x290 */ -+ E4_uint64 Dma0BytesToPrefetch; /* 0x298 */ -+ E4_uint64 Dma0PrefetchAddr; /* 0x2a0 */ -+ E4_uint64 EventCountAndType; /* 0x2a8 */ -+ E4_uint64 EventParameters[2]; /* 0x2b0 */ -+ -+ E4_uint64 Dma1LastPacketSize; /* 0x2c0 */ -+ E4_uint64 Dma1ThisPacketSize; /* 0x2c8 */ -+ E4_uint64 Dma1DescSizeInProg; /* 0x2d0 */ -+ E4_uint64 Dma1BytesToPrefetch; /* 0x2d8 */ -+ E4_uint64 Dma1PrefetchAddr; /* 0x2e0 */ -+ E4_Input_Ptrs InputTrapAndFilter; /* 0x2e8 */ -+ E4_uint64 EventAddress; /* 0x2f0 */ -+ E4_QueuePtr MainIntQueuePtrs; /* 0x2f8 */ -+ -+ E4_uint64 Event_Copy[16]; /* 0x300 */ -+ -+ E4_uint64 CommandCopy[7]; /* 0x380 */ -+ E4_uint64 CommandHold; /* 0x3b8 */ -+ -+ E4_uint64 InputQueueDesc[4]; /* 0x3c0 */ -+ -+ /* Run queue Pointers */ -+ E4_uint64 DProcLowPriPtrs; /* 0x3e0 */ -+ E4_uint64 DProcHighPriPtrs; /* 0x3e8 */ -+ E4_uint64 TProcLowPriPtrs; /* 0x3f0 */ -+ E4_uint64 TProcHighPriPtrs; /* 0x3f8 */ -+ -+ E4_uint64 CProcStatus; /* 0x400 */ -+ E4_uint64 TProcStatus; /* 0x408 */ -+ E4_uint64 IProcStatus; /* 0x410 */ -+ E4_uint64 EProcStatus; /* 0x418 */ -+ E4_uint64 DProc0Status; /* 0x420 */ -+ E4_uint64 DProc1Status; /* 0x428 */ -+ E4_Sched_Status SchedStatus; /* 0x430 */ -+ -+ E4_uint64 LoadIProcCntxFilter; /* Will load one of 4 cntx filter regs. Write only */ /* 0x438 */ -+ -+ E4_CommandControl CommandControl; /* 0x440 */ -+ E4_uint64 CommandCacheTestPort; /* 0x448 */ -+ E4_uint64 CommandLowPriRunPtrs; /* 0x450 */ -+ E4_uint64 CommandHighPriRunPtrs; /* 0x458 */ -+ E4_uint64 CommandSchedDataPort[4]; /* 0x460 */ -+ -+ E4_uint64 DmaRouteBuffer[2][2]; /* Write only. Should not be written to. */ /* 0x480 */ -+ E4_uint64 StenRouteBuffer[2]; /* Write only. Should not be written to. */ /* 0x4a0 */ -+ E4_uint64 pad4[0x098 - 0x096]; /* 0x4b0 */ -+ -+ E4_uint64 DmaAlignmentPort[8]; /* Write only. Should only be written to clear the prev reg. */ /* 0x4c0 */ -+ -+ E4_uint64 MmuBlockEntry[8]; /* Used for hash table and chain fetches */ /* 0x500 */ -+ E4_uint64 WriteUnitsTlbLine[3]; /* 0x550 */ -+ E4_uint64 pad5; /* 0x540 */ -+ E4_uint64 WriteTProcTlbLine[3]; /* 0x568 */ -+ E4_uint64 pad6; /* 0x540 */ -+ -+ E4_uint64 MmuTableBasePtrs; /* Both tables packed into a single 64 bit value */ /* 0x580 */ -+ E4_uint64 MmuFaultAndRootCntxPtr; /* Both packed into a single 64 bit value */ /* 0x588 */ -+ E4_uint64 UnitsVAddr; /* 0x590 */ -+ E4_uint64 TProcVAddr; /* 0x598 */ -+ E4_uint64 UnitsCntx; /* 0x5a0 */ -+ E4_uint64 TProcCntx; /* Read only. Writes access VProcCacheWritePort */ /* 0x5a8 */ -+ E4_uint64 FaultAddrReg; /* 0x5b0 */ -+ E4_uint64 FaultTypeAndContextReg; /* 0x5b8 */ -+ -+ E4_uint32 SysControlReg; /* 0x5c0 */ -+ E4_uint32 CacheTagValue; /* 0x5c4 */ -+ E4_uint64 TlbLineValue; /* 0x5c8 */ -+ E4_uint64 SDRamConfigReg; /* 0x5d0 */ -+ E4_uint32 InterruptMask; /* 0x5d8 */ -+ E4_uint32 InterruptReg; /* 0x5dc */ -+ E4_uint64 SDRamECCStatus; /* 0x5e0 */ -+ E4_uint32 LinkControlReg; /* 0x5e8 */ -+ E4_uint32 LinkContSettings; /* 0x5ec */ -+ E4_uint64 LinkPortKey; /* 0x5f0 */ -+ E4_uint64 LinkPortLock; /* 0x5f8 */ -+ -+ E4_uint64 SDRamWriteBuffer[4][8]; /* 0x600 */ -+ E4_uint64 SDRamReadBuffer[4][8]; /* 0x700 */ -+ -+ E4_uint64 TProcRegs[64]; /* 0x800 */ -+ E4_uint64 TProcStartUp[8]; /* Not to be used except by the elan itself */ /* 0xa00 */ -+ -+ E4_uint64 LoadPending; /* 0xa40 */ -+ E4_uint64 StortPending; /* 0xa48 */ -+ E4_uint64 DirtyBits; /* 0xa50 */ -+ E4_uint64 BadBits; /* 0xa58 */ -+ -+ E4_uint64 ICachePort_Cntl_Addr; /* 0xa60 */ -+ E4_uint64 Thread_Trap_State; /* 0xa68 */ -+ -+/* Instruction buffer (4 * 32 bit words) */ -+ E4_uint64 nPC_W; /* 0xa70 */ -+ E4_uint64 PC_W; /* 0xa78 */ -+ -+ E4_uint64 ICacheFillData[8]; /* 0xa80 */ -+ E4_uint64 ICachePort[8]; /* 0xac0 */ -+ -+ E4_uint64 PciDataBufs[4][8]; /* 0xb00 */ -+ -+ E4_uint64 CommandQueueBuffer[128]; /* 0xc00 */ -+} E4_DataBusMap; -+ -+/* -+ * These macros are used to setup the thread pcoessors ICache. -+ */ -+#define E4_ICacheTagAddrShift 6 -+#define E4_AccessICacheRams 1 -+#define E4_InvalidTagValue 0xffffffffffffffffULL -+#define E4_ICacheSizeInBytes (1024*16) -+#define E4_ICacheLineSizeInBytes (64) -+#define E4_ICacheLines (E4_ICacheSizeInBytes/E4_ICacheLineSizeInBytes) -+#define E4_ICachePortSize ( (sizeof((E4_DataBusMap *) 0)->ICachePort) / \ -+ (sizeof((E4_DataBusMap *) 0)->ICachePort[0])) -+ -+#define E4_ICacheFixupInsn 0xc0b02f95ull /* st1 [%r0 + 0xf95] */ -+#define E4_ICacheFixupAddr 0xf95ull -+#define E4_ICacheFixupOffset 0xfc0 -+ -+/* -+ * Event interrupt -+ */ -+typedef volatile union _E4_EventInt -+{ -+ E4_uint64 ForceAlign; -+ struct { -+ E4_uint32 IntCookie; -+ E4_uint32 EventContext; /* Bits 16 to 28 */ -+ } s; -+} E4_EventInt; -+ -+/* -+ * The following are used to interpret a fault status register. -+ */ -+ -+/* -+ * FSR[14:0] - AccessType -+ * -+ * T = Type bit -+ * S = size bit. Size is in units of 64 bits or 8 bytes. -+ * E = Byte end pointer. Used to define the last written byte of the last 64 bits written. -+ * D = Data type bit. Used for endian conversion in the PCI interface. -+ * C = Used by the cache to decide if this access should allocate a cache line. -+ * d = Set if dma read or write data data. This is used to guarantee order at the PCI interface. -+ * A = Access type used to check permissions by the MMU in a virtual access. -+ * P = Part Write. If set some byte enables may be used. Effects the action of a cache miss. -+ */ -+ -+/* FSR[7:0] */ -+/* bit 7 => virtual write */ -+#define AT_VirtualWriteAccBit (1 << 7) /* AAADDdC1EEESSSS = Virtual Write */ -+#define AT_VirtualWriteSizeMask 0xf /* size of write access (0 => 128 bytes) */ -+#define AT_VirtualWriteEndPtrShift 4 /* end byte pointer for part write block */ -+#define AT_VirtualWriteEndPtrMask 0x7 -+ -+/* else bit 6 => virtual read */ -+#define AT_VirtualReadAccBit (1 << 6) /* AAADDdC01SSSSSS = Virtual Read */ -+#define AT_VirtualReadSizeMask 0x3f /* size of read access (0 => 512 bytes) */ -+ -+/* else => special access */ -+#define AT_SelBitsMask 0xf /* Bits to select the type of acces from */ -+#define AT_SelBitsShift 0x4 -+#define AT_SpecialRd (0x0 << 4) /* AAADDdC0000TTTT = Special read Access */ -+#define AT_SpecialWr (0x1 << 4) /* AAADDdC0001TTTT = Special write Access */ -+#define AT_PhysicalRd (0x2 << 4) /* AAADDdC00100SSS = Physical Read */ -+#define AT_PhysicalWr (0x3 << 4) /* AAADDdC0011PSSS = Physical write */ -+ -+#define AT_OtherSizeMask 0xf /* Size bits used by all other accesses. 0=128 bytes */ -+#define AT_SpecialBitsMask 0xf /* Bits used to define the special access types */ -+#define AT_CacheSizeBitsMask 0x7 /* Size bits used for local accesses. 0=64 */ -+#define AT_CachePhysPartWriteBit 0x8 /* This bit is set if the access is a part write to the cache */ -+ -+/* Special memory access operations */ -+#define AT_RegAccess 0x0 -+#define AT_GetCntxFilter 0xe /* Only used by special reads */ -+#define AT_RouteFetch 0xf /* Only used by special reads */ -+ -+/* FSR[9:8] */ -+#define AT_NonAlloc (1 << 8) /* 1=Do not fill cache with this data */ -+#define AT_DmaData (1 << 9) /* This is a DMA read access. Required to guarantee dma read order. */ -+ -+/* FSR[11:10] - Data Type - defines data type for endian conversion in PCI interface*/ -+#define AT_BlkDataTyMask 0x3 -+#define AT_BlkDataTyShift 10 -+ -+#define AT_BlkDataType(FSR) (((FSR) >> AT_BlkDataTyShift) & AT_BlkDataTyMask) -+#define AT_TypeByte 0x0 -+#define AT_TypeHWord 0x1 -+#define AT_TypeWord 0x2 -+#define AT_TypeDWord 0x3 -+ -+/* FSR[14:12] - Access Permissions */ -+#define AT_PermBitsMask 0x7 -+#define AT_PermBitsShift 12 -+ -+#define AT_Perm(FSR) (((FSR) >> AT_PermBitsShift) & AT_PermBitsMask) -+#define AT_PermLocalDataRead 0x0 -+#define AT_PermLocalDataWrite 0x1 -+#define AT_PermRemoteRead 0x2 -+#define AT_PermRemoteWrite 0x3 -+#define AT_PermExecute 0x4 -+#define AT_PermLocalEvent 0x5 -+#define AT_PermRemoteEvent 0x7 -+ -+/* FSR[22:15] - reason for fault */ -+ -+#define FSR_WalkForThread (1 << 15) /* The thread processor caused the fault */ -+#define FSR_Walking (1 << 16) /* The fault was caused during a hash table access */ -+#define FSR_NoTranslationsFound (1 << 17) /* The hash table did not contain a matching tag */ -+#define FSR_WalkingProtectionFault (1 << 18) /* A protection fault was detected while walking */ -+#define FSR_HashTable1 (1 << 19) /* Was accessing hash table 1 not 0 */ -+#define FSR_RouteVProcErr (1 << 20) /* This is an invalid vproc for a route fetch */ -+#define FSR_FaultForBadData (1 << 21) /* Bad data (double bit ECC error) while performing a walk access */ -+#define FSR_FaultForMaxChainCount (1 << 22) /* The Elan4 has walked a chain of 1024 items. */ -+ -+typedef volatile struct _E4_FaultSave -+{ -+ E4_uint64 FSRAndFaultContext; /* Bits 0-31 : FaultContext. Bits 32-63 : FaultStatus Register */ -+ E4_uint64 FaultAddress; -+} E4_FaultSave; -+ -+#define FaultSaveContext(FSRAndFaultContext) ((E4_uint32) ((FSRAndFaultContext) & 0xFFFFFFFF)) -+#define FaultSaveFSR(FSRAndFaultContext) ((E4_uint32) ((FSRAndFaultContext) >> 32)) -+ -+typedef union E4_TrTypeCntx -+{ -+ E4_uint32 TypeContext; -+ struct -+ { -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 Type:16; /* Transaction type field */ -+ E4_uint32 Context:13; /* Transaction context */ -+ E4_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E4_uint32 StatusRegValid:1; /* Bit 30 */ -+ E4_uint32 LastTrappedTrans:1; /* Bit 31 */ -+#else -+ E4_uint32 LastTrappedTrans:1; /* Bit 31 */ -+ E4_uint32 StatusRegValid:1; /* Bit 30 */ -+ E4_uint32 TypeCntxInvalid:1; /* Bit 29 */ -+ E4_uint32 Context:13; /* Transaction context */ -+ E4_uint32 Type:16; /* Transaction type field */ -+#endif -+ } s; -+} E4_TrTypeCntx; -+ -+#define MAX_TRAPPED_TRANS 28 -+#define TRANS_DATA_DWORDS 16 -+#define TRANS_DATA_BYTES 128 -+#define NO_OF_INPUT_CHANNELS 4 -+ -+#define CH0_LOW_PRI_CHAN 0 -+#define CH1_LOW_PRI_CHAN 1 -+#define CH0_HIGH_PRI_CHAN 2 -+#define CH1_HIGH_PRI_CHAN 3 -+ -+/* Words have been swapped for big endian access when fetched with dword access from elan.*/ -+typedef struct _E4_IprocTrapHeader -+{ -+ E4_uint64 TrAddr; -+ E4_uint64 IProcStatusCntxAndTrType; -+} E4_IprocTrapHeader; -+ -+typedef struct _E4_IprocTrapData -+{ -+ E4_uint64 Data[TRANS_DATA_DWORDS]; -+} E4_IprocTrapData; -+ -+/* -+ * This struct defines the trap state for the inputers. It requires a contiguous 16K byte block of local memory. -+ * The channel bits have been grouped to the low end of the address to force all Identify cookies to use the -+ * same cache line. -+ */ -+typedef struct _E4_IprocTrapState -+{ -+ E4_IprocTrapData TrData[MAX_TRAPPED_TRANS][NO_OF_INPUT_CHANNELS]; -+ E4_IprocTrapHeader TrHeader[MAX_TRAPPED_TRANS][NO_OF_INPUT_CHANNELS]; -+ E4_uint64 pad[8*NO_OF_INPUT_CHANNELS]; -+} E4_IprocTrapState; -+ -+/* -+ * 64 kbytes of elan local memory. Must be aligned on a 64k boundary -+ */ -+#define E4_LowPriQueueSize 0x400 -+#define E4_HighPriQueueSize 0x100 -+ -+typedef struct _E4_FaultSaveArea -+{ -+ E4_FaultSave TProcData[8]; -+ E4_FaultSave TProcInst; -+ E4_FaultSave Dummy[7]; -+ E4_FaultSave SchedProc; -+ E4_FaultSave DProc; -+ E4_FaultSave EventProc; -+ E4_FaultSave IProc; -+ E4_FaultSave DProcData[4]; -+ E4_FaultSave QReadData[8]; -+} E4_FaultSaveArea; -+ -+/* Macros to manipulate event queue pointers */ -+/* generate index in EventIntQueue */ -+#define E4_EVENT_INTQ_INDEX(fptr) (((fptr) & 0x1fff) >> 3) -+/* generate next fptr */ -+#define E4_EVENT_INTQ_NEXT(fptr) ((((fptr) + 8) & ~0x4000) | 0x2000) -+ -+typedef struct _E4_CommandPort -+{ -+ volatile E4_uint64 Command[1024]; /* a whole 8k page */ -+} E4_CommandPort; -+ -+/* -+ * This is the allocation of unit numbers within the ELAN. It is used to extract the fault address -+ * and fault type after a unit has trapped on a memory fetch. Only units that can generate traps -+ * have been included. -+ */ -+#define CUN_TProcData0 0x00 -+#define CUN_TProcData1 0x01 -+#define CUN_TProcData2 0x02 -+#define CUN_TProcData3 0x03 -+#define CUN_TProcData4 0x04 -+#define CUN_TProcData5 0x05 -+#define CUN_TProcData6 0x06 -+#define CUN_TProcData7 0x07 -+#define CUN_TProcInst 0x08 -+ -+/* memory current unit numbers -+ * TProc data bus */ -+#define CUN_DProcPA0 0x10 -+#define CUN_DProcPA1 0x11 -+#define CUN_DProcPrefetch 0x12 -+#define CUN_CommandProc 0x13 -+#define CUN_DProcData0 0x14 /* Dma prefetch reads. */ -+#define CUN_DProcData1 0x15 /* Dma prefetch reads. */ -+#define CUN_DProcData2 0x16 /* Dma prefetch reads. */ -+#define CUN_DProcData3 0x17 /* Dma prefetch reads. */ -+ -+#define CUN_IProcLowPri 0x18 -+#define CUN_IProcHighPri 0x19 -+#define CUN_Spare0 0x1A -+#define CUN_Spare1 0x1B -+#define CUN_Spare2 0x1C -+#define CUN_ThreadQueue 0x1D -+#define CUN_EventProc0 0x1e -+#define CUN_EventProc1 0x1f -+ -+#define CUN_Entries 0x20 -+ -+typedef struct E4_Registers -+{ -+ E4_CacheTags Tags; /* 4k bytes c000 -> cfff */ -+ E4_DataBusMap Regs; /* 4k bytes d000 -> dfff */ -+ E4_User_Regs uRegs; /* 8k bytes e000 -> ffff */ -+} E4_Registers; -+ -+#define I2cCntl_I2cPortWrite (0 << 0) -+#define I2cCntl_I2cPortRead (1 << 0) -+#define I2cCntl_I2cPortGenStopBit (1 << 1) -+#define I2cCntl_I2cPortGenRestartBit (1 << 2) -+#define I2cCntl_I2cPortAccFailed (1 << 3) -+#define I2cCntl_I2cStopped (1 << 4) -+#define I2cCntl_I2cWakeupFailed (1 << 5) -+#define I2cCntl_I2cFastMode (1 << 6) -+#define I2cCntl_I2cPortBusy (1 << 7) -+ -+#define I2cCntl_LedI2cRegBase_Mask 0x7f -+#define I2cCntl_I2cUpdatingLedReg (1 << 7) -+ -+#define I2cCntl_InvertLedValues (1 << 0) /* read/write */ -+#define I2cCntl_LedRegWriteFailed (1 << 1) /* read only */ -+#define I2cCntl_EEPromLoadFailed (1 << 2) /* read only */ -+#define I2cCntl_InhibitI2CRom (1 << 3) /* read only */ -+#define I2cCntl_BadRomCrc (1 << 4) /* read only */ -+#define I2cCntl_MapInI2cConfigData (1 << 5) /* read/write */ -+#define I2cCntl_SampleNewLedValues (1 << 6) /* read/write */ -+#define I2cCntl_ClearLinkError (1 << 7) /* write only */ -+ -+typedef struct E4_I2C -+{ -+ volatile E4_uint8 I2cWrData; -+ volatile E4_uint8 I2cRdData; -+ volatile E4_uint8 I2cPortControl; -+ volatile E4_uint8 I2cLedBase; -+ volatile E4_uint8 I2cStatus; -+ volatile E4_uint8 I2cLedsValue; -+ volatile E4_uint16 I2cPad; -+ -+ E4_uint8 pad[256 - sizeof(E4_uint64)]; -+ -+ E4_uint8 UnchangedElan4ConfigRegs[256]; -+ E4_uint8 I2cRomConfigShadowValues[256]; -+ E4_uint8 ChangedElan4ConfigRegs[256]; -+} E4_I2C; -+ -+typedef struct _E4_ContextControlBlock -+{ -+ E4_uint32 Filter; /* Use a Network context to index for this value */ -+ E4_uint32 VirtualProcessTable; /* Use a local context to index for this value */ -+} E4_ContextControlBlock; -+ -+/* -+ * Filter -+ * [13:0] Context -+ * [14] DiscardAll -+ * [15] AckAll -+ * [16] HighPri -+ * [17] CountStats -+ * [31:18] Unused -+ */ -+#define E4_FILTER_STATS (1 << 17) -+#define E4_FILTER_HIGH_PRI (1 << 16) -+#define E4_FILTER_ACKOK_ALL (1 << 15) -+#define E4_FILTER_DISCARD_ALL (1 << 14) -+#define E4_FILTER_CONTEXT_MASK (0x3FFF) -+ -+/* -+ * VirtualProcessTable -+ * [8:0] Unused -+ * [12:9] Size num vp entries = 512 << Size -+ * [30:13] Pointer -+ * [31] Valid -+ */ -+#define E4_VPT_MIN_ENTRIES 512 -+#define E4_VPT_VALID ((unsigned)1 << 31) -+#define E4_VPT_PTR_SHIFT 0 -+#define E4_VPT_SIZE_SHIFT 9 -+#define E4_VPT_SIZE_MASK 0xf -+#define E4_VPT_NUM_VP(vpt_val) (E4_VPT_MIN_ENTRIES << (((vpt_val) >> E4_VPT_SIZE_SHIFT) & E4_VPT_SIZE_MASK)) -+#define E4_VPT_VALUE(ptr,size) (((ptr) << E4_VPT_PTR_SHIFT) | ((size) << E4_VPT_SIZE_SHIFT)) -+ -+ -+/* Virtual Process Table */ -+typedef struct _E4_VirtualProcessEntry -+{ -+ E4_uint64 Values[2]; -+} E4_VirtualProcessEntry; -+ -+/* -+ * Entries have the following format - rtX is a packed route -+ * -+ * |rt11|rt10|rt9 |rt8 |rt7 |rt6 |rt5 |rt4 |rt3 |rt2 |rt2 |rt0 |PAAADD RRRRRR| -+ * |output context |rt23|rt22|rt21|rt20|rt19|rt18|rt17|rt16|rt15|rt14|rt13|rt12| -+ */ -+ -+#define ROUTE_CTXT_SHIFT 48 -+#define ROUTE_CTXT_MASK (~((1ull << ROUTE_CTXT_SHIFT)-1)) -+#define ROUTE_CTXT_VALUE(ctx) (((E4_uint64) ctx) << ROUTE_CTXT_SHIFT) -+ -+#define ROUTE_PACKED_OFFSET 16 -+#define ROUTE_NUM_PACKED 24 -+ -+/* defines for first flit of a route */ -+#define FIRST_TIMEOUT(Val) ((Val) << 14) /* [15:14] */ -+#define FIRST_SYSTEM_PACKET (1 << 13) /* [13] */ -+#define FIRST_FLOOD_PACKET (1 << 12) /* [12] */ -+#define FIRST_HIGH_PRI (1 << 11) /* [11] */ -+#define FIRST_AGE(Val) ((Val) << 7) /* [10:7] */ -+#define FIRST_OPTIONS_MASK (0xFF80) -+ -+/* [6:0] unpacked 1st route value */ -+#define FIRST_INVALID (0) -+#define FIRST_ROUTE(Val) (0x08 | (Val)) -+#define FIRST_ADAPTIVE (0x30) -+#define FIRST_BCAST_TREE (0x20) -+#define FIRST_MYLINK (0x10) -+#define FIRST_BCAST(Top, Bot) (0x40 | ((Top) << 3) | (Bot)) -+ -+/* defines for 3 bit packed entries for subsequent flits */ -+#define PACKED_INVALID (0) -+#define PACKED_ROUTE(Val) (8 | (Val)) -+#define PACKED_ADAPTIVE (3) -+#define PACKED_BCAST_TREE (2) -+#define PACKED_MYLINK (1) -+#define PACKED_BCAST0(Top,Bot) (4 | (Bot & 3)) -+#define PACKED_BCAST1(Top,Bot) ((Top << 1) | (Bot >> 2)) -+ -+#endif /* _ASM */ -+/* The MMU root context pointer has a mask to bounds check -+ * it - this is computed as follows. -+ */ -+#define E4_CONTEXT_MASK(num) (((num) >= 0x2000) ? 0x00 : \ -+ ((num) >= 0x1000) ? 0x80 : \ -+ ((num) >= 0x0800) ? 0xc0 : \ -+ ((num) >= 0x0400) ? 0xe0 : \ -+ ((num) >= 0x0200) ? 0xf0 : \ -+ ((num) >= 0x0100) ? 0xf8 : \ -+ ((num) >= 0x0080) ? 0xfc : \ -+ ((num) >= 0x0040) ? 0xfe : 0xff) -+/* -+ * This generates the size field for a virtual process table. -+ * Size defined as 2^n no of 8K pages. -+ * Single cycle route fetches are possible if the minimum vproc table size is 8k. -+ */ -+#define E4_GEN_VPT_SIZE(Size) (((Size) & E4_VPT_SIZE_MASK) << E4_VPT_SIZE_SHIFT) -+ -+#define COMMAND_RUN_QUEUE_BITS (13 + 2) /* 8K entries of 4 bytes. This is fixed in hardware. */ -+#define COMMAND_DESCS_SPACE_BITS (13 + 5) /* 8K entries of 32 bytes. This is fixed in hardware. */ -+#define COMMAND_INSERTER_CACHE_ENTRIES 16 -+ -+#define COM_TEST_PORT_ADDR_MASK 0xfULL -+#define COM_TEST_PORT_ADDR_SH 0 -+ -+/* -+ * The flush register is accessed through the CommandControl register. -+ * The address is naturally alligned. It also positions the command descriptors in memory. -+ * When no command queues need flushing it should be or with COM_FLUSH_INVALID. This sets -+ * it to the top command queue descriptor. This cannot be accessed from the PCI. -+ */ -+#define COM_ENABLE_DEQUEUE (1 << 4) -+#define COM_FLUSH_DESCRIPTOR_MASK 0x7fffffe0ULL -+#define COM_FLUSH_INVALID 0x0003ffe0ULL -+ -+ -+/* -+ * Elan4 BAR1 is split up as follows : -+ * -+ * RevA -+ * 0x3f00000 EBUS other -+ * 0x3e00000 EBUS ROM -+ * 0x3dfc000 registers -+ * 0x0000000 command ports -+ * -+ * RevB -+ * 0x3ffc000 registers -+ * 0x3ff8000 padding -+ * 0x3ff6000 i2c registers -+ * 0x0000000 command ports -+ */ -+#define ELAN4_BAR1_SIZE (1 << 26) /* 64M */ -+#define ELAN4_REG_SIZE (1 << 14) /* 16K */ -+ -+#define ELAN4_REVA_EBUS_SIZE (1 << 21) /* 2M */ -+#define ELAN4_REVA_EBUS_OFFSET (ELAN4_BAR1_SIZE - ELAN4_REVA_EBUS_SIZE) -+#define ELAN4_REVA_REG_OFFSET (ELAN4_REVA_EBUS_OFFSET - ELAN4_REG_SIZE) -+#define ELAN4_REVA_NUM_COMMAND_QUEUES (ELAN4_REVA_REG_OFFSET >> 13) -+ -+#define ELAN4_REVA_EBUS_ROM_SIZE (1 << 20) /* 1M */ -+#define ELAN4_REVA_EBUS_ROM_OFFSET 0 -+ -+#define ELAN4_REVB_I2C_PADDING (1 << 14) /* 16K */ -+#define ELAN4_REVB_I2C_SIZE (1 << 13) /* 8k */ -+#define ELAN4_REVB_REG_OFFSET (ELAN4_BAR1_SIZE - ELAN4_REG_SIZE) -+#define ELAN4_REVB_I2C_OFFSET (ELAN4_REVB_REG_OFFSET - ELAN4_REVB_I2C_PADDING - ELAN4_REVB_I2C_SIZE) -+#define ELAN4_REVB_NUM_COMMAND_QUEUES (ELAN4_REVB_I2C_OFFSET >> 13) -+ -+#endif /* notdef _ELAN4_REGISTERS_H */ -Index: linux-2.6.5-7.191/include/elan4/sdram.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/sdram.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/sdram.h 2005-07-28 14:52:52.967660648 -0400 -@@ -0,0 +1,41 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_SDRAM_H -+#define __ELAN4_SDRAM_H -+ -+#ident "$Id: sdram.h,v 1.8 2003/09/24 13:55:55 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/sdram.h,v $*/ -+ -+/* Include header file generated by sdram configuration program */ -+#include -+ -+/* SDRAM bank shift definitions */ -+#define SDRAM_0_CS_SHIFT 25 -+#define SDRAM_1_CS_SHIFT 27 -+#define SDRAM_2_CS_SHIFT 28 -+#define SDRAM_3_CS_SHIFT 29 -+ -+#define SDRAM_BANK_SHIFT(cfg) \ -+ (((cfg >> SDRAM_RamSize_SH) & 3) == 0 ? SDRAM_0_CS_SHIFT : \ -+ ((cfg >> SDRAM_RamSize_SH) & 3) == 1 ? SDRAM_1_CS_SHIFT : \ -+ ((cfg >> SDRAM_RamSize_SH) & 3) == 2 ? SDRAM_2_CS_SHIFT : SDRAM_3_CS_SHIFT) -+ -+#define SDRAM_BANK_SIZE(cfg) (1ULL << SDRAM_BANK_SHIFT(cfg)) -+#define SDRAM_BANK_OFFSET(cfg,bank) ((unsigned long long)(bank) << SDRAM_BANK_SHIFT(cfg)) -+#define SDRAM_NUM_BANKS(cfg) (4) -+#define SDRAM_MAX_BANKS 4 -+ -+/* When the elan access sdram it passes eaddr[12] as sdramaddr[12] when -+ * running with a 4k page size, however PCI accesses pass paddr[12], so -+ * we must ensure that sdram pages are allocated such that eaddr[12] is the -+ * same as paddr[12] - the easiest way is to allocate sdram in 8k chunks and -+ * ensure that maddr[12] == eaddr[12] == pgoff[0] */ -+#define SDRAM_MIN_PAGE_SIZE (8192) -+ -+#endif /* __ELAN4_SDRAM_H */ -Index: linux-2.6.5-7.191/include/elan4/stats.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/stats.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/stats.h 2005-07-28 14:52:52.967660648 -0400 -@@ -0,0 +1,83 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: stats.h,v 1.10.12.1 2004/10/06 11:09:12 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/stats.h,v $*/ -+ -+#ifndef __ELAN4_STATS_H -+#define __ELAN4_STATS_H -+ -+#define ELAN4_DEV_STATS_BUCKETS 8 -+ -+ -+typedef struct elan4_dev_stats -+{ -+ unsigned long s_interrupts; -+ -+ unsigned long s_mainints[ELAN4_DEV_STATS_BUCKETS]; -+ unsigned long s_mainint_punts; -+ unsigned long s_mainint_rescheds; -+ -+ unsigned long s_haltints; -+ -+ unsigned long s_cproc_traps; -+ unsigned long s_dproc_traps; -+ unsigned long s_eproc_traps; -+ unsigned long s_iproc_traps; -+ unsigned long s_tproc_traps; -+ -+ unsigned long s_cproc_trap_types[0x10]; -+ unsigned long s_dproc_trap_types[6]; -+ unsigned long s_eproc_trap_types[4]; -+ unsigned long s_iproc_trap_types[0xa]; -+ unsigned long s_tproc_trap_types[7]; -+ -+ unsigned long s_correctable_errors; -+ unsigned long s_multiple_errors; -+ -+ unsigned long s_link_errors; -+ unsigned long s_lock_errors; -+ unsigned long s_deskew_errors; -+ unsigned long s_phase_errors; -+ unsigned long s_data_errors; -+ unsigned long s_fifo_overflow0; -+ unsigned long s_fifo_overflow1; -+ unsigned long s_mod45changed; -+ unsigned long s_pack_not_seen; -+ unsigned long s_linkport_keyfail; -+ -+ unsigned long s_eop_reset; -+ unsigned long s_bad_length; -+ unsigned long s_crc_bad; -+ unsigned long s_crc_error; -+ -+ unsigned long s_cproc_timeout; -+ unsigned long s_dproc_timeout; -+ -+ unsigned long s_sdram_bytes_free; -+} ELAN4_DEV_STATS; -+ -+#define MainIntBuckets ((int[ELAN4_DEV_STATS_BUCKETS-1]) {1, 2, 3, 4, 8, 16, 32}) -+ -+#define BumpDevStat(dev,stat) ((dev)->dev_stats.stat++) -+#define BucketDevStat(dev,stat,n,bucket) ((n) <= (bucket)[0] ? (dev)->dev_stats.stat[0]++ : \ -+ (n) <= (bucket)[1] ? (dev)->dev_stats.stat[1]++ : \ -+ (n) <= (bucket)[2] ? (dev)->dev_stats.stat[2]++ : \ -+ (n) <= (bucket)[3] ? (dev)->dev_stats.stat[3]++ : \ -+ (n) <= (bucket)[4] ? (dev)->dev_stats.stat[4]++ : \ -+ (n) <= (bucket)[5] ? (dev)->dev_stats.stat[5]++ : \ -+ (n) <= (bucket)[6] ? (dev)->dev_stats.stat[6]++ : \ -+ (dev)->dev_stats.stat[7]++) -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /*__ELAN4_STATS_H */ -Index: linux-2.6.5-7.191/include/elan4/tprintf.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/tprintf.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/tprintf.h 2005-07-28 14:52:52.968660496 -0400 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_TPRINTF_H -+#define __ELAN4_TPRINTF_H -+ -+#ident "$Id: tprintf.h,v 1.6 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/tprintf.h,v $*/ -+ -+ -+#ifdef _ASM -+#define TPRINTF0(string) add %r0, __LINE__, %r0 -+#define TPRINTF1(string,reg) add reg, __LINE__, %r0 -+#else -+#define TPRINTF0(string) asm volatile ("add %%r0, %0, %%r0" : : "i" (__LINE__)) -+#define TPRINTF1(string, value) asm volatile ("add %0, %1, %%r0" : : "r" (value), "i" (__LINE__)) -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_TPRINTF_H */ -Index: linux-2.6.5-7.191/include/elan4/trap.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/trap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/trap.h 2005-07-28 14:52:52.968660496 -0400 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: trap.h,v 1.10 2003/10/07 12:11:10 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/trap.h,v $*/ -+ -+#ifndef __ELAN4_TRAP_H -+#define __ELAN4_TRAP_H -+ -+/* -+ * If the EProc Faults whilst performing an action (e.g. Read/Write on the data src or dest Addr) -+ * the Eproc increments the Addr(s) by a block size (64 bytes): -+ * 1: Fault on Read: -+ * Src EventAddr = Read Addr + block -+ * 2: Fault on Write: -+ * Src EventAddr = Read Addr + block -+ * Dst EventAddr = Read Addr + block -+ * Size = Size - block ndwords -+ * We must rewind the addr correctly to completely the transfer successfully -+ */ -+#define EVENT_COPY_NDWORDS 0x8 -+#define EVENT_COPY_BLOCK_SIZE 0x40 -+ -+typedef struct elan4_eproc_trap -+{ -+ E4_uint64 tr_status; -+ E4_FaultSave tr_faultarea; -+ E4_Event tr_event; -+ E4_Addr tr_eventaddr; -+} ELAN4_EPROC_TRAP; -+ -+typedef struct elan4_cproc_trap -+{ -+ E4_uint64 tr_status; /* cproc status register */ -+ E4_uint64 tr_command; /* cproc command */ -+ E4_CommandQueueDesc tr_qdesc; /* copy of command queue descriptor */ -+ E4_FaultSave tr_faultarea; /* fault area for mmu traps */ -+ ELAN4_EPROC_TRAP tr_eventtrap; /* associated event trap (waitevent) */ -+} ELAN4_CPROC_TRAP; -+ -+typedef struct elan4_dproc_trap -+{ -+ E4_DMA tr_desc; -+ E4_FaultSave tr_packAssemFault; -+ E4_FaultSave tr_prefetchFault; -+ E4_uint64 tr_status; -+} ELAN4_DPROC_TRAP; -+ -+typedef struct elan4_tproc_trap -+{ -+ E4_uint64 tr_regs[64]; -+ E4_FaultSave tr_dataFault; -+ E4_FaultSave tr_instFault; -+ E4_uint64 tr_status; -+ E4_uint64 tr_state; -+ E4_Addr tr_pc; -+ E4_Addr tr_npc; -+ E4_uint64 tr_dirty; -+ E4_uint64 tr_bad; -+} ELAN4_TPROC_TRAP; -+ -+typedef struct elan4_iproc_trap -+{ -+ E4_uint32 tr_numTransactions; -+ E4_uint32 tr_flags; -+ E4_uint32 tr_trappedTrans; -+ E4_uint32 tr_waitForEopTrans; -+ E4_uint32 tr_identifyTrans; -+ E4_uint32 tr_pad; -+ -+ E4_FaultSave tr_faultarea; -+ E4_IprocTrapHeader tr_transactions[MAX_TRAPPED_TRANS]; -+ E4_IprocTrapData tr_dataBuffers[MAX_TRAPPED_TRANS]; -+} ELAN4_IPROC_TRAP; -+ -+#define TR_FLAG_ACK_SENT (1 << 0) -+#define TR_FLAG_EOP_ERROR (1 << 1) -+#define TR_FLAG_BAD_TRANS (1 << 2) -+#define TR_FLAG_DMA_PACKET (1 << 3) -+#define TR_FLAG_EOP_BAD (1 << 4) -+#define TR_FLAG_TOOMANY_TRANS (1 << 5) -+ -+#define TR_TRANS_INVALID (0xffffffff) -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_TRAP_H */ -Index: linux-2.6.5-7.191/include/elan4/trtype.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/trtype.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/trtype.h 2005-07-28 14:52:52.968660496 -0400 -@@ -0,0 +1,112 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _ELAN4_TRTYPE_H -+#define _ELAN4_TRTYPE_H -+ -+#ident "$Id: trtype.h,v 1.20 2004/02/06 10:38:21 mike Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/trtype.h,v $*/ -+ -+/*<15:11> Size field is used to give the number of additional 64 bit data values. -+ A value from 0 to 16 inclusive is valid. */ -+ -+#include -+ -+#define TR_SIZE_SHIFT (11) -+#define TR_SIZE_MASK (0x1f << TR_SIZE_SHIFT) -+#define SET_TR_SIZE(Size) (((Size) << TR_SIZE_SHIFT) & TR_SIZE_MASK) -+ -+/* <10:9> Last Transaction and AckNow bits, marks the last transaction and -+ enables a PACK_OK to be sent. */ -+#define TR_LAST_AND_SEND_ACK (3 << 9) -+ -+ -+/* <8> Only valid on the last transaction. Delays execution until an EOP_GOOD is received. -+ * Any other EOP type will abort execution of this transaction. */ -+#define TR_WAIT_FOR_EOP (1 << 8) -+ -+/* -+ * Data type. This is used by transactions of variable data type. It controls any endian -+ * converion required if the destiantion host processor has a big endian memory format. -+ */ -+/* WriteBlock <8:7> Data type -+ <6:0> Part write size */ -+#define TR_DATATYPE_SHIFT (6) -+#define TR_DATATYPE_MASK ((1 << 2) - 1) -+ -+#define TR_DATATYPE_BYTE E4_DATATYPE_BYTE -+#define TR_DATATYPE_SHORT E4_DATATYPE_SHORT -+#define TR_DATATYPE_WORD E4_DATATYPE_WORD -+#define TR_DATATYPE_DWORD E4_DATATYPE_DWORD -+ -+/* <5:0> Transaction Type -+ * For Writeblock <5:3> 000 => Write, 0001 => Read -+ * <2:0> End Byte Addr */ -+#define TR_OPCODE_MASK 0x3F -+#define TR_BLOCK_OPCODE_MASK 0x38 -+ -+#define TR_WRITEBLOCK 0x0 -+#define TR_ENDBYTE_MASK 0x7 -+#define TR_WRITE(Size, EndByte, DataType) \ -+ (0x0 | SET_TR_SIZE(Size) | ((EndByte) & TR_ENDBYTE_MASK) | \ -+ (((DataType) & TR_DATATYPE_MASK) << TR_DATATYPE_SHIFT)) -+ -+#define TR_NOP_TRANS (0x10 | SET_TR_SIZE(0)) -+#define TR_SETEVENT 0x10 -+#define TR_SETEVENT_NOIDENT (TR_SETEVENT | SET_TR_SIZE(0) | TR_LAST_AND_SEND_ACK) -+#define TR_SETEVENT_IDENTIFY (TR_SETEVENT | SET_TR_SIZE(1) | TR_LAST_AND_SEND_ACK) -+#define TR_REMOTEDMA (0x11 | SET_TR_SIZE(7) | TR_LAST_AND_SEND_ACK) -+#define TR_SENDDISCARD (0x12 | SET_TR_SIZE(0)) -+ -+/* -+ * Conditional transactions that might return PAckTestFail. -+ * All will allow further exection of the packet if ([Address] operator DataValue) is true. -+ * e.g. for TR_GTE further execution if ([Address] >= DataValue) is true. -+ * These should be used where a definite TRUE/FALSE answer is required. -+ */ -+#define TR_GTE (0x14 | SET_TR_SIZE(1)) -+#define TR_LT (0x15 | SET_TR_SIZE(1)) -+#define TR_EQ (0x16 | SET_TR_SIZE(1)) -+#define TR_NEQ (0x17 | SET_TR_SIZE(1)) -+ -+/* -+ * Conditional transactions that might return PAckDiscard. -+ * All will allow further exection of the packet if ([Address] operator DataValue) is true. -+ * e.g. for TR_GTE further execution if ([Address] >= DataValue) is true. -+ * These should be used where eventually a TRUE answer is expected but the node might not be ready yet. -+ * These can be mixed with the normal conditionals to allow a single packet to test for readyness and -+ * a TRUE/FALSE answer. -+ */ -+#define TR_GTE_DISCARD (0x34 | SET_TR_SIZE(1)) -+#define TR_LT_DISCARD (0x35 | SET_TR_SIZE(1)) -+#define TR_EQ_DISCARD (0x36 | SET_TR_SIZE(1)) -+#define TR_NEQ_DISCARD (0x37 | SET_TR_SIZE(1)) -+ -+#define TR_TRACEROUTE_TRANS 0x18 -+#define TR_TRACEROUTE(Size) (TR_TRACEROUTE_TRANS | (TR_DATATYPE_WORD << TR_DATATYPE_SHIFT) |SET_TR_SIZE(Size)) -+#define TR_IDENTIFY (0x19 | SET_TR_SIZE(0)) -+ -+#define TR_ADDWORD (0x1c | SET_TR_SIZE(2) | TR_LAST_AND_SEND_ACK) -+#define TR_INPUT_Q_COMMIT (0x1d | SET_TR_SIZE(1) | TR_LAST_AND_SEND_ACK) -+#define TR_TESTANDWRITE (0x1e | SET_TR_SIZE(3) | TR_LAST_AND_SEND_ACK) -+#define TR_INPUT_Q_GETINDEX (0x1f | SET_TR_SIZE(0)) -+ -+ -+ -+/* TraceRoute formate */ -+#define TR_TRACEROUTE0_CHANID(val) ((val) & 1) /* 0 Chan Id */ -+#define TR_TRACEROUTE0_LINKID(val) (((val) >> 1) & 7) /* 1:3 Link Id */ -+#define TR_TRACEROUTE0_REVID(val) (((val) >> 4) & 7) /* 4:6 Revision Id */ -+#define TR_TRACEROUTE0_BCAST_PIN(val) (((val) >> 7) & 1) /* 7 Bcast Top Pin */ -+#define TR_TRACEROUTE0_LNR(val) (((val) >> 8) & 0xFF) /* 8:15 Global Link Not Ready */ -+ -+#define TR_TRACEROUTE1_ROUTES_SELECTED(val) ((val & 0xFF)) /* 0:7 Routes Selected */ -+#define TR_TRACEROUTE1_BCAST_TOP(val) (((val) >> 8) & 7) /* 8:10 Broadcast Top */ -+#define TR_TRACEROUTE1_BCAST_BOTTOM(val) (((val) >> 12) & 7) /* 12:14 Broadcast Bottom */ -+ -+#endif /* _ELAN4_TRANSACTIONTYPE_H */ -Index: linux-2.6.5-7.191/include/elan4/types.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/types.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/types.h 2005-07-28 14:52:52.969660344 -0400 -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_TYPES_H -+#define __ELAN4_TYPES_H -+ -+#ident "@(#)$Id: types.h,v 1.9 2003/09/04 12:39:17 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/types.h,v $*/ -+ -+#include -+/* -+ * "flip" values for correctly indexing into -+ * block data which was copied from the Elan -+ * using 64 bit accesses. -+ */ -+#if defined(__LITTLE_ENDIAN__) -+# define ByteEndianFlip 0 -+# define ShortEndianFlip 0 -+# define WordEndianFlip 0 -+#else -+# define ByteEndianFlip 7 -+# define ShortEndianFlip 3 -+# define WordEndianFlip 1 -+#endif -+ -+ -+#ifndef _ASM -+ -+typedef signed int E4_int; -+typedef unsigned int E4_uint; -+ -+typedef signed char E4_int8; -+typedef unsigned char E4_uint8; -+ -+typedef signed short E4_int16; -+typedef unsigned short E4_uint16; -+ -+typedef signed int E4_int32; -+typedef unsigned int E4_uint32; -+ -+#ifdef _LP64 -+typedef signed long E4_int64; -+typedef unsigned long E4_uint64; -+#else -+typedef signed long long E4_int64; -+typedef unsigned long long E4_uint64; -+#endif -+ -+/* 64-bit Elan4 */ -+typedef E4_uint64 E4_Addr; -+typedef E4_uint32 E4_LocPhysAddr; /* Really 31 bits */ -+ -+#define OneK (1024) -+#define EightK (8*OneK) -+ -+#define E4_DATATYPE_BYTE 0 -+#define E4_DATATYPE_SHORT 1 -+#define E4_DATATYPE_WORD 2 -+#define E4_DATATYPE_DWORD 3 -+ -+#endif /* _ASM */ -+ -+#endif /* __ELAN4_TYPES_H */ -+ -Index: linux-2.6.5-7.191/include/elan4/user.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/user.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/user.h 2005-07-28 14:52:52.970660192 -0400 -@@ -0,0 +1,344 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: user.h,v 1.37.2.2 2004/11/18 17:54:17 duncant Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/user.h,v $*/ -+ -+#ifndef __ELAN4_USER_H -+#define __ELAN4_USER_H -+ -+#include -+#include -+#include -+ -+typedef struct trap_queue -+{ -+ unsigned q_back; /* Next free space */ -+ unsigned q_front; /* First object to remove */ -+ unsigned q_size; /* Size of queue */ -+ unsigned q_count; /* Current number of entries */ -+ unsigned q_slop; /* FULL <=> (count+slop) == size */ -+} RING_QUEUE; -+ -+#define RING_QUEUE_INIT(q,num,slop) ((q).q_size = (num), (q).q_slop = (slop), (q).q_front = (q).q_back = 0, (q).q_count = 0) -+#define RING_QUEUE_FULL(q) ((q).q_count >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_REALLY_FULL(q) ((q).q_count == (q).q_size) -+#define RING_QUEUE_EMPTY(q) ((q).q_count == 0) -+#define RING_QUEUE_NEXT(q,indx) ((indx) = (((indx)+1) % (q).q_size)) -+#define RING_QUEUE_PREV(q,indx) ((indx) = (((indx)+(q).q_size-1) % (q).q_size)) -+#define RING_QUEUE_ADD(q) (RING_QUEUE_NEXT(q ,(q).q_back), (++(q).q_count) >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_REMOVE(q) (RING_QUEUE_NEXT(q, (q).q_front), (--(q).q_count) == 0) -+#define RING_QUEUE_ADD_FRONT(q) (RING_QUEUE_PREV(q, (q).q_front), (++(q).q_count) >= ((q).q_size - (q).q_slop)) -+#define RING_QUEUE_ENTRY(qArea,indx) (&(qArea)[(indx)]) -+#define RING_QUEUE_FRONT(q,qArea) RING_QUEUE_ENTRY(qArea, (q).q_front) -+#define RING_QUEUE_BACK(q,qArea) RING_QUEUE_ENTRY(qArea, (q).q_back) -+#define RING_QUEUE_ITERATE(q,idx) for (idx = (q).q_front; idx != (q).q_back; idx = (((idx) + 1) % (q).q_size)) -+ -+typedef struct user_rgn -+{ -+ struct user_rgn *rgn_mnext; /* Doubly linked list of regions */ -+ struct user_rgn *rgn_mprev; /* sorted on main address */ -+ virtaddr_t rgn_mbase; /* main address of base of region */ -+ -+ struct user_rgn *rgn_enext; /* Doubly linked list of regions */ -+ struct user_rgn *rgn_eprev; /* sorted on elan address */ -+ E4_Addr rgn_ebase; /* elan address of base of region */ -+ -+ unsigned long rgn_len; /* length of region */ -+ unsigned rgn_perm; /* elan access permission */ -+} USER_RGN; -+ -+typedef struct user_vpseg -+{ -+ struct list_head vps_link; -+ -+ unsigned short vps_process; /* virtual process number */ -+ unsigned short vps_entries; /* and # virtual processes */ -+ -+ unsigned vps_type; -+ union -+ { -+ struct { -+ ELAN_CAPABILITY *cap; -+ E4_VirtualProcessEntry *routes; -+ } p2p; -+#define vps_p2p_cap vps_u.p2p.cap -+#define vps_p2p_routes vps_u.p2p.routes -+ -+ struct { -+ unsigned short lowvp; -+ unsigned short highvp; -+ } bcast; -+#define vps_bcast_lowvp vps_u.bcast.lowvp -+#define vps_bcast_highvp vps_u.bcast.highvp -+ } vps_u; -+} USER_VPSEG; -+ -+/* values for vps_type */ -+#define USER_VPSEG_P2P 0 -+#define USER_VPSEG_BCAST 1 -+ -+typedef struct user_cq -+{ -+ struct list_head ucq_link; -+ -+ ELAN4_CQ *ucq_cq; /* the real command queue */ -+ -+ unsigned char ucq_state; /* command queue state */ -+ unsigned char ucq_errored; /* command queue has errored */ -+ unsigned char ucq_flags; /* flags */ -+ ELAN4_CPROC_TRAP ucq_trap; /* trap state */ -+ -+ atomic_t ucq_ref; /* # references to this cq (mmaps) */ -+} USER_CQ; -+ -+/* values for ucq_state */ -+#define UCQ_RUNNING 0 /* command queue is running */ -+#define UCQ_TRAPPED 1 /* command queue has trapped */ -+#define UCQ_NEEDS_RESTART 2 /* command queue has trapped, and needs restarting */ -+#define UCQ_STOPPED 3 /* command queue has trapped, and delivered to user */ -+ -+/* values for ucq_flags */ -+#define UCQ_SYSTEM (1 << 0) -+#define UCQ_REORDER (1 << 1) -+ -+extern int num_fault_save; -+extern int min_fault_pages; -+extern int max_fault_pages; -+ -+typedef struct fault_save -+{ -+ struct fault_save *next; -+ E4_Addr addr; -+ E4_uint32 count; -+} FAULT_SAVE; -+ -+typedef struct user_iproc_trap -+{ -+ unsigned char ut_state; -+ ELAN4_IPROC_TRAP ut_trap; -+} USER_IPROC_TRAP; -+ -+/* values for ut_state */ -+#define UTS_IPROC_RUNNING 0 -+#define UTS_IPROC_TRAPPED 1 -+#define UTS_IPROC_RESOLVING 2 -+#define UTS_IPROC_EXECUTE_PACKET 3 -+#define UTS_IPROC_EXECUTING 4 -+#define UTS_IPROC_NETWORK_ERROR 5 -+#define UTS_IPROC_STOPPED 6 -+ -+typedef struct user_ctxt_entry -+{ -+ struct list_head cent_link; /* entry chained on context */ -+ ELAN_CAPABILITY *cent_cap; /* capability we attached with */ -+} USER_CTXT_ENTRY; -+ -+typedef struct user_ctxt -+{ -+ ELAN4_CTXT uctx_ctxt; /* is also an elan context */ -+ -+ spinlock_t uctx_spinlock; /* spinlock for items used with interrupt handler */ -+ kcondvar_t uctx_wait; /* place to sleep (traphandler/swapout/swapin/neterr fixup) */ -+ -+ unsigned uctx_status; /* status (uctx_spinlock) */ -+ -+ pid_t uctx_trap_pid; /* pid to deliver signals to on trap */ -+ int uctx_trap_signo; /* signal number to deliver */ -+ unsigned uctx_trap_state; /* state of trap handling code */ -+ unsigned uctx_trap_count; /* count of "thread" in user_trap_handler() */ -+ -+ unsigned uctx_int_count; /* # interrupts since last zeroed */ -+ unsigned long uctx_int_start; /* tick when int_count last zeroed */ -+ unsigned long uctx_int_delay; /* # ticks to delay next wakeup */ -+ struct timer_list uctx_int_timer; /* and timer to use to delay signal */ -+ -+ struct timer_list uctx_neterr_timer; /* network error timer */ -+ -+ struct list_head uctx_vpseg_list; /* list of vp segments we've got */ -+ kmutex_t uctx_vpseg_lock; /* and lock to protect it. */ -+ ELAN4_ROUTE_TABLE *uctx_routetable; /* our virtual process table */ -+ ELAN_POSITION uctx_position; /* position in network */ -+ -+ struct list_head uctx_cent_list; /* list of attached network contexts */ -+ -+ USER_CQ *uctx_ddcq; /* command queue for re-issueing traps */ -+ E4_uint64 uctx_ddcq_insertcnt; /* # dwords inserted into command queue */ -+ E4_uint64 uctx_ddcq_completed; /* last "completed" write was here */ -+ int uctx_ddcq_intr; /* count of outstanding ddcq interrupts */ -+ -+ ELAN4_HALTOP uctx_haltop; /* halt operation for flushing */ -+ ELAN4_DMA_FLUSHOP uctx_dma_flushop; /* flush operation for flushing dma runqueue */ -+ -+ INTCOOKIE_TABLE *uctx_intcookie_table; /* table of interrupt cookies (shared with other uctxs for this task) */ -+ -+ kmutex_t uctx_cqlock; /* lock for create/destory cqs */ -+ struct list_head uctx_cqlist; /* list of command queues (uctx_cqlock,uctx_spinlock) */ -+ -+ ELAN4_DPROC_TRAP *uctx_dprocTraps; /* queue of dproc traps to resolve/reissue */ -+ RING_QUEUE uctx_dprocTrapQ; -+ -+ ELAN4_TPROC_TRAP *uctx_tprocTraps; /* queue of tproc traps to resolve/reissue */ -+ RING_QUEUE uctx_tprocTrapQ; -+ -+ ELAN4_EPROC_TRAP *uctx_eprocTraps; /* queue of eproc traps to resolve */ -+ RING_QUEUE uctx_eprocTrapQ; -+ -+ USER_IPROC_TRAP uctx_iprocTrap[2]; /* input trap state, 1 per virtual channel */ -+ -+ E4_DMA *uctx_dmas; /* queue of dmas to restart */ -+ RING_QUEUE uctx_dmaQ; -+ -+ E4_ThreadRegs *uctx_threads; /* queue of threads to restart */ -+ RING_QUEUE uctx_threadQ; -+ -+ ELAN4_NETERR_MSG *uctx_msgs; /* queue of neterr messages */ -+ RING_QUEUE uctx_msgQ; -+ kmutex_t uctx_rgnmutex; /* lock for create/destroy regions */ -+ spinlock_t uctx_rgnlock; /* spinlock to protect linked lists */ -+ USER_RGN *uctx_mrgns; /* Doubly linked list of memory regions (uctx_rgnlock) */ -+ USER_RGN *uctx_mtail; /* Last memory region on list (uctx_rgnlock) */ -+ USER_RGN *uctx_mrgnlast; /* Last region 'hit' (uctx_rgnlock) */ -+ -+ USER_RGN *uctx_ergns; /* Doubly linked list of memory regions (uctx_rgnlock) */ -+ USER_RGN *uctx_etail; /* Last memory region on list (uctx_rgnlock) */ -+ USER_RGN *uctx_ergnlast; /* Last region 'hit' (uctx_rgnlock) */ -+ -+ ELAN4_USER_PAGE *uctx_upage; /* kernel page shared with user */ -+ sdramaddr_t uctx_trampoline; /* sdram page for tproc trampoline */ -+ -+ E4_Addr uctx_upage_addr; /* elan addr page mapped into */ -+ E4_Addr uctx_trestart_addr; /* address of thread restart code */ -+ FAULT_SAVE *uctx_faults; -+ FAULT_SAVE *uctx_fault_list; -+ int uctx_num_fault_save; -+ spinlock_t uctx_fault_lock; -+} USER_CTXT; -+ -+/* bit values for uctx_status */ -+#define UCTX_EXITING (1 << 0) /* context is exiting. */ -+#define UCTX_USER_FILTERING (1 << 1) /* user requested context filter */ -+#define UCTX_USER_STOPPED (1 << 2) /* user requested stop */ -+ -+#define UCTX_SWAPPING (1 << 3) /* context is swapping out */ -+#define UCTX_SWAPPED (1 << 4) /* context is swapped out */ -+ -+#define UCTX_STOPPING (1 << 5) /* stopping elan from running this context */ -+#define UCTX_STOPPED (1 << 6) /* elan no longer running this context */ -+ -+#define UCTX_EPROC_QUEUE_FULL (1 << 7) /* reasons for stopping running */ -+#define UCTX_DPROC_QUEUE_FULL (1 << 8) -+#define UCTX_TPROC_QUEUE_FULL (1 << 9) -+#define UCTX_IPROC_CH0_TRAPPED (1 << 10) -+#define UCTX_IPROC_CH1_TRAPPED (1 << 11) -+ -+#define UCTX_NETERR_TIMER (1 << 12) -+#define UCTX_NETERR_FIXUP (1 << 13) -+ -+#define UCTX_EPROC_QUEUE_OVERFLOW (1 << 14) -+#define UCTX_DPROC_QUEUE_OVERFLOW (1 << 15) -+#define UCTX_TPROC_QUEUE_OVERFLOW (1 << 16) -+ -+#define UCTX_EPROC_QUEUE_ERROR (1 << 17) -+#define UCTX_DPROC_QUEUE_ERROR (1 << 18) -+#define UCTX_TPROC_QUEUE_ERROR (1 << 19) -+ -+#define UCTX_STOPPED_REASONS (UCTX_EPROC_QUEUE_FULL | UCTX_DPROC_QUEUE_FULL | UCTX_TPROC_QUEUE_FULL) -+#define UCTX_SWAPPED_REASONS (UCTX_EXITING | UCTX_USER_STOPPED | UCTX_NETERR_FIXUP) -+#define UCTX_NACKING_REASONS (UCTX_USER_FILTERING | UCTX_IPROC_CH0_TRAPPED | UCTX_IPROC_CH1_TRAPPED) -+ -+#define UCTX_OVERFLOW_REASONS (UCTX_EPROC_QUEUE_OVERFLOW | UCTX_DPROC_QUEUE_OVERFLOW | UCTX_TPROC_QUEUE_OVERFLOW) -+#define UCTX_ERROR_REASONS (UCTX_EPROC_QUEUE_ERROR | UCTX_DPROC_QUEUE_ERROR | UCTX_TPROC_QUEUE_ERROR) -+ -+#define UCTX_RUNNABLE(uctx) (((uctx)->uctx_status & (UCTX_SWAPPED_REASONS | UCTX_STOPPED_REASONS)) == 0) -+#define UCTX_NACKING(uctx) (((uctx)->uctx_status & (UCTX_SWAPPED_REASONS | UCTX_STOPPED_REASONS | UCTX_NACKING_REASONS)) != 0) -+ -+/* values for uctx_trap_signalled */ -+#define UCTX_TRAP_IDLE 0 -+#define UCTX_TRAP_SLEEPING 1 -+#define UCTX_TRAP_SIGNALLED 2 -+#define UCTX_TRAP_ACTIVE 3 -+ -+extern int user_p2p_route_options; -+extern int user_bcast_route_options; -+extern int user_dproc_retry_count; -+extern int user_cproc_retry_count; -+ -+extern USER_CTXT *user_alloc (ELAN4_DEV *dev); -+extern void user_free (USER_CTXT *uctx); -+extern void user_swapout (USER_CTXT *uctx, unsigned reason); -+extern void user_swapin (USER_CTXT *uctx, unsigned reason); -+extern int user_attach (USER_CTXT *uctx, ELAN_CAPABILITY *cap); -+extern void user_detach (USER_CTXT *uctx, ELAN_CAPABILITY *cap); -+extern void user_block_inputter (USER_CTXT *uctx, unsigned blocked); -+extern int user_alloc_trap_queues (USER_CTXT *uctx, unsigned ndproc_traps, unsigned neproc_traps, -+ unsigned ntproc_traps, unsigned nthreads, unsigned ndmas); -+ -+extern int user_add_p2pvp (USER_CTXT *uctx, unsigned process, ELAN_CAPABILITY *cap); -+extern int user_add_bcastvp (USER_CTXT *uctx, unsigned process, unsigned lowvp, unsigned highvp); -+extern int user_removevp (USER_CTXT *uctx, unsigned process); -+ -+extern int user_set_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route); -+extern int user_reset_route (USER_CTXT *uctx, unsigned process); -+extern int user_get_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route); -+extern int user_check_route (USER_CTXT *uctx, unsigned process, E4_VirtualProcessEntry *route, unsigned *error); -+extern int user_send_neterr_msg (USER_CTXT *uctx, unsigned int vp, unsigned int nctx, unsigned int retries, ELAN4_NETERR_MSG *msg); -+extern int user_neterr_sten (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop); -+extern int user_neterr_dma (USER_CTXT *uctx, unsigned int vp, E4_uint64 cookie, int waitforeop); -+ -+extern int user_resume_eproc_trap (USER_CTXT *uctx, E4_Addr addr); -+extern int user_resume_cproc_trap (USER_CTXT *uctx, unsigned indx); -+extern int user_resume_dproc_trap (USER_CTXT *uctx, E4_DMA *dma); -+extern int user_resume_tproc_trap (USER_CTXT *uctx, E4_ThreadRegs *regs); -+extern int user_resume_iproc_trap (USER_CTXT *uctx, unsigned channel, unsigned trans, -+ E4_IprocTrapHeader *hdrp, E4_IprocTrapData *datap); -+ -+extern int user_trap_handler (USER_CTXT *uctx, ELAN4_USER_TRAP *utrapp, int nticks); -+extern USER_CQ *user_findcq (USER_CTXT *uctx, unsigned num); -+extern USER_CQ *user_alloccq (USER_CTXT *uctx, unsigned size, unsigned perm, unsigned flags); -+extern void user_freecq (USER_CTXT *uctx, USER_CQ *cq); -+extern void user_dropcq (USER_CTXT *uctx, USER_CQ *cq); -+ -+/* user_osdep.c */ -+extern int user_load_range (USER_CTXT *uctx, E4_Addr addr, unsigned long nbytes, E4_uint32 fsr); -+extern void user_update_main (USER_CTXT *uctx, struct mm_struct *mm, unsigned long start, unsigned long len); -+extern void user_unload_main (USER_CTXT *uctx, unsigned long start, unsigned long len); -+ -+ -+/* regions.c */ -+extern USER_RGN *user_findrgn_elan (USER_CTXT *uctx, E4_Addr addr, int tail); -+extern USER_RGN *user_findrgn_main (USER_CTXT *uctx, virtaddr_t addr, int tail); -+extern USER_RGN *user_rgnat_elan (USER_CTXT *uctx, E4_Addr addr); -+extern USER_RGN *user_rgnat_main (USER_CTXT *uctx, virtaddr_t addr); -+extern int user_setperm (USER_CTXT *uctx, virtaddr_t maddr, E4_Addr eaddr, unsigned long len, unsigned perm); -+extern void user_clrperm (USER_CTXT *uctx, E4_Addr addr, unsigned long len); -+extern int user_checkperm (USER_CTXT *uctx, E4_Addr raddr, unsigned long rsize, unsigned access); -+extern virtaddr_t user_elan2main (USER_CTXT *uctx, E4_Addr addr); -+extern E4_Addr user_main2elan (USER_CTXT *uctx, virtaddr_t addr); -+extern void user_preload_main (USER_CTXT *uctx, virtaddr_t addr, unsigned long len); -+extern void user_freergns (USER_CTXT *uctx); -+ -+/* user_ddcq.c */ -+extern int user_ddcq_check (USER_CTXT *uctx, unsigned num); -+extern int user_ddcq_flush (USER_CTXT *uctx); -+extern void user_ddcq_intr (USER_CTXT *uctx); -+extern void user_ddcq_write_dword (USER_CTXT *uctx, E4_Addr addr, E4_uint64 value); -+extern void user_ddcq_interrupt (USER_CTXT *uctx, E4_uint64 cookie); -+extern void user_ddcq_run_dma (USER_CTXT *uctx, E4_DMA *dma); -+extern void user_ddcq_run_thread (USER_CTXT *uctx, E4_ThreadRegs *regs); -+extern void user_ddcq_setevent (USER_CTXT *uctx, E4_Addr addr); -+extern void user_ddcq_seteventn (USER_CTXT *uctx, E4_Addr addr, E4_uint32 count); -+extern void user_ddcq_waitevent (USER_CTXT *uctx, E4_Addr addr, E4_uint64 CountAndType, E4_uint64 Param0, E4_uint64 Param1); -+ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_USER_H */ -Index: linux-2.6.5-7.191/include/elan4/userregs.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/userregs.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/userregs.h 2005-07-28 14:52:52.971660040 -0400 -@@ -0,0 +1,383 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_USERREGS_H -+#define __ELAN4_USERREGS_H -+ -+#ident "$Id: userregs.h,v 1.14.2.1 2004/10/07 10:57:40 addy Exp $" -+/* $Source: /cvs/master/quadrics/elan4hdr/userregs.h,v $*/ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * Statistic control reg values -+ * Each 4-bit nibble of the control word specifies what statistic -+ * is to be recorded in each of the 8 statistic counters -+ */ -+#define COUNT_REG0_SHIFT 32ull -+#define COUNT_REG1_SHIFT 36ull -+#define COUNT_REG2_SHIFT 40ull -+#define COUNT_REG3_SHIFT 44ull -+#define COUNT_REG4_SHIFT 48ull -+#define COUNT_REG5_SHIFT 52ull -+#define COUNT_REG6_SHIFT 56ull -+#define COUNT_REG7_SHIFT 60ull -+ -+ -+/* Count reg 0 */ -+#define STC_INPUT_NON_WRITE_BLOCKS (0x0ull << COUNT_REG0_SHIFT) -+#define STP_DMA_EOP_WAIT_ACK (0x1ull << COUNT_REG0_SHIFT) -+#define STP_TPROC_RUNNING (0x2ull << COUNT_REG0_SHIFT) -+#define STC_STEN_PKTS_OPEN (0x3ull << COUNT_REG0_SHIFT) -+#define STP_CPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG0_SHIFT) -+#define STC_TLB_TABLE_WALKS (0x5ull << COUNT_REG0_SHIFT) -+#define STC_CACHE_HITS (0x6ull << COUNT_REG0_SHIFT) -+#define STC_PCI_SLAVE_READS (0x7ull << COUNT_REG0_SHIFT) -+#define STP_PCI_WAITING_FOR_GNT (0x8ull << COUNT_REG0_SHIFT) -+#define STP_SYS_CLOCK_RATE0 (0xfull << COUNT_REG0_SHIFT) -+ -+#define STATS_REG0_NAMES { \ -+ "STC_INPUT_NON_WRITE_BLOCKS", \ -+ "STP_DMA_EOP_WAIT_ACK", \ -+ "STP_TPROC_RUNNING", \ -+ "STC_STEN_PKTS_OPEN", \ -+ "STP_CPROC_HOLDS_FFU_DP", \ -+ "STC_TLB_TABLE_WALKS", \ -+ "STC_CACHE_HITS", \ -+ "STC_PCI_SLAVE_READS", \ -+ "STP_PCI_WAITING_FOR_GNT", \ -+ "STP_SYS_CLOCK_RATE0" \ -+} -+ -+/* Count reg 1 */ -+#define STC_INPUT_WRITE_BLOCKS (0x0ull << COUNT_REG1_SHIFT) -+#define STP_DMA_DATA_TRANSMITTING (0x1ull << COUNT_REG1_SHIFT) -+#define STC_CPROC_VALUES_EXE (0x2ull << COUNT_REG1_SHIFT) -+#define STC_STEN_TRANS_SENT (0x3ull << COUNT_REG1_SHIFT) -+#define STP_TPROC_DQ_HOLDS_FFU_DP (0x4ull << COUNT_REG1_SHIFT) -+#define STC_TPROC_TLB_HITS (0x5ull << COUNT_REG1_SHIFT) -+#define STC_CACHE_ALLOC_MISSES (0x6ull << COUNT_REG1_SHIFT) -+#define STP_PCI_MASTER_READ_WAITING (0x7ull << COUNT_REG1_SHIFT) -+#define STP_PCI_WAITING_FOR_DEVSEL (0x8ull << COUNT_REG1_SHIFT) -+#define STP_SYS_CLOCK_RATE1 (0xfull << COUNT_REG1_SHIFT) -+ -+#define STATS_REG1_NAMES { \ -+ "STC_INPUT_WRITE_BLOCKS", \ -+ "STP_DMA_DATA_TRANSMITTING", \ -+ "STC_CPROC_VALUES_EXE", \ -+ "STC_STEN_TRANS_SENT", \ -+ "STP_TPROC_DQ_HOLDS_FFU_DP", \ -+ "STC_TPROC_TLB_HITS", \ -+ "STC_CACHE_ALLOC_MISSES", \ -+ "STP_PCI_MASTER_READ_WAITING", \ -+ "STP_PCI_WAITING_FOR_DEVSEL", \ -+ "STP_SYS_CLOCK_RATE1" \ -+} -+ -+/* Count reg 2 */ -+#define STC_INPUT_PKTS (0x0ull << COUNT_REG2_SHIFT) -+#define STP_DMA_WAITING_MEM (0x1ull << COUNT_REG2_SHIFT) -+#define STC_CPROC_TRANSFERS (0x2ull << COUNT_REG2_SHIFT) -+#define STP_STEN_WAIT_NETWORK_BUSY (0x3ull << COUNT_REG2_SHIFT) -+#define STP_IPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG2_SHIFT) -+#define STC_UNITS_TLB_HITS (0x5ull << COUNT_REG2_SHIFT) -+#define STC_CACHE_NON_ALLOC_MISSES (0x6ull << COUNT_REG2_SHIFT) -+#define STP_PCI_MASTER_WRITE_WAITING (0x7ull << COUNT_REG2_SHIFT) -+#define STC_PCI_OUT_OF_ORDER_SPLIT_COMP (0x8ull << COUNT_REG2_SHIFT) -+#define STP_SYS_CLOCK_RATE2 (0xfull << COUNT_REG2_SHIFT) -+ -+#define STATS_REG2_NAMES { \ -+ "STC_INPUT_PKTS", \ -+ "STP_DMA_WAITING_MEM", \ -+ "STC_CPROC_TRANSFERS", \ -+ "STP_STEN_WAIT_NETWORK_BUSY", \ -+ "STP_IPROC_HOLDS_FFU_DP", \ -+ "STC_UNITS_TLB_HITS", \ -+ "STC_CACHE_NON_ALLOC_MISSES", \ -+ "STP_PCI_MASTER_WRITE_WAITING", \ -+ "STC_PCI_OUT_OF_ORDER_SPLIT_COMP", \ -+ "STP_SYS_CLOCK_RATE2" \ -+} -+ -+/* Count reg 3 */ -+#define STC_INPUT_PKTS_REJECTED (0x0ull << COUNT_REG3_SHIFT) -+#define STP_DMA_WAIT_NETWORK_BUSY (0x1ull << COUNT_REG3_SHIFT) -+#define STC_CPROC_PREFETCH_SDRAM (0x2ull << COUNT_REG3_SHIFT) -+#define STP_STEN_BLOCKED_ACKS_OR_VC (0x3ull << COUNT_REG3_SHIFT) -+#define STP_EPROC_HOLDS_FFU_DP (0x4ull << COUNT_REG3_SHIFT) -+#define STP_TPROC_BLOCKED_MEMSYS (0x5ull << COUNT_REG3_SHIFT) -+#define STC_CACHE_WRITE_BACKS (0x6ull << COUNT_REG3_SHIFT) -+#define STP_PCI_SLAVE_READ_WAITING (0x7ull << COUNT_REG3_SHIFT) -+#define STP_PCI_IDLE_CYCLES (0x8ull << COUNT_REG3_SHIFT) -+#define STP_SYS_CLOCK_RATE3 (0xfull << COUNT_REG3_SHIFT) -+ -+#define STATS_REG3_NAMES { \ -+ "STC_INPUT_PKTS_REJECTED", \ -+ "STP_DMA_WAIT_NETWORK_BUSY", \ -+ "STC_CPROC_PREFETCH_SDRAM", \ -+ "STP_STEN_BLOCKED_ACKS_OR_VC", \ -+ "STP_EPROC_HOLDS_FFU_DP", \ -+ "STP_TPROC_BLOCKED_MEMSYS", \ -+ "STC_CACHE_WRITE_BACKS", \ -+ "STP_PCI_SLAVE_READ_WAITING", \ -+ "STP_PCI_IDLE_CYCLES", \ -+ "STP_SYS_CLOCK_RATE3" \ -+} -+ -+/* Count reg 4 */ -+#define STP_INPUT_DATA_TRANSMITTING (0x0ull << COUNT_REG4_SHIFT) -+#define STC_DMA_PKTS_ACCEPTED (0x1ull << COUNT_REG4_SHIFT) -+#define STC_CPROC_FLUSH_REQ_SDRAM (0x2ull << COUNT_REG4_SHIFT) -+#define STP_STEN_EOP_WAIT_ACK (0x3ull << COUNT_REG4_SHIFT) -+#define STP_DMA_HOLDS_FFU_DP (0x4ull << COUNT_REG4_SHIFT) -+#define STP_UNIT_BLOCKED_MEMSYS (0x5ull << COUNT_REG4_SHIFT) -+#define STC_PCI_MASTER_READS (0x6ull << COUNT_REG4_SHIFT) -+#define STP_PCI_SLAVE_WRITE_WAITING (0x7ull << COUNT_REG4_SHIFT) -+#define STC_INPUT_PACKETS_DISCARDED (0x8ull << COUNT_REG4_SHIFT) -+#define STP_SYS_CLOCK_RATE4 (0xfull << COUNT_REG4_SHIFT) -+ -+#define STATS_REG4_NAMES { \ -+ "STP_INPUT_DATA_TRANSMITTING", \ -+ "STC_DMA_PKTS_ACCEPTED", \ -+ "STC_CPROC_FLUSH_REQ_SDRAM", \ -+ "STP_STEN_EOP_WAIT_ACK", \ -+ "STP_DMA_HOLDS_FFU_DP", \ -+ "STP_UNIT_BLOCKED_MEMSYS", \ -+ "STC_PCI_MASTER_READS", \ -+ "STP_PCI_SLAVE_WRITE_WAITING", \ -+ "STC_INPUT_PACKETS_DISCARDED", \ -+ "STP_SYS_CLOCK_RATE4" \ -+} -+ -+/* Count reg 5 */ -+#define STP_INPUT_WAITING_NETWORK_DATA (0x0ull << COUNT_REG5_SHIFT) -+#define STC_DMA_PKTS_REJECTED (0x1ull << COUNT_REG5_SHIFT) -+#define STC_CPROC_INSERT_CACHE_MISSES (0x2ull << COUNT_REG5_SHIFT) -+#define STP_STEN_TRANSMITTING_DATA (0x3ull << COUNT_REG5_SHIFT) -+#define FFU_BLOCKED_DIFF_FFU_PROC (0x4ull << COUNT_REG5_SHIFT) -+#define STP_TABLE_WALKS_BLOCKED_MEMSYS (0x5ull << COUNT_REG5_SHIFT) -+#define STC_PCI_MASTER_WRITES (0x6ull << COUNT_REG5_SHIFT) -+#define STP_PCI_MASTER_HOLDS_BUS (0x7ull << COUNT_REG5_SHIFT) -+#define STC_PCI_NO_SPLIT_COMPS (0x8ull << COUNT_REG5_SHIFT) -+#define STP_SYS_CLOCK_RATE5 (0xfull << COUNT_REG5_SHIFT) -+ -+#define STATS_REG5_NAMES { \ -+ "STP_INPUT_WAITING_NETWORK_DATA", \ -+ "STC_DMA_PKTS_REJECTED", \ -+ "STC_CPROC_INSERT_CACHE_MISSES", \ -+ "STP_STEN_TRANSMITTING_DATA", \ -+ "FFU_BLOCKED_DIFF_FFU_PROC", \ -+ "STP_TABLE_WALKS_BLOCKED_MEMSYS", \ -+ "STC_PCI_MASTER_WRITES", \ -+ "STP_PCI_MASTER_HOLDS_BUS", \ -+ "STC_PCI_NO_SPLIT_COMPS", \ -+ "STP_SYS_CLOCK_RATE5" \ -+} -+ -+/* Count reg 6 */ -+#define STP_INPUT_BLOCKED_WAITING_TRANS (0x0ull << COUNT_REG6_SHIFT) -+#define STP_TPROC_INST_STALL (0x1ull << COUNT_REG6_SHIFT) -+#define STP_CPROC_WAITING_DESCHED (0x2ull << COUNT_REG6_SHIFT) -+#define STP_STEN_PKT_OPEN_WAITING_DATA (0x3ull << COUNT_REG6_SHIFT) -+#define STP_TLB_HASH_TABLE_ACCESSES (0x4ull << COUNT_REG6_SHIFT) -+#define STP_PCI_SLAVE_BLOCKED_MEMSYS (0x5ull << COUNT_REG6_SHIFT) -+#define STP_PCI_TRANSFERRING_DATA (0x6ull << COUNT_REG6_SHIFT) -+#define STP_PCI_MASTER_WAITING_BUS (0x7ull << COUNT_REG6_SHIFT) -+#define STP_PCI_READ_LATENCY (0x8ull << COUNT_REG6_SHIFT) -+#define STP_SYS_CLOCK_RATE6 (0xfull << COUNT_REG6_SHIFT) -+ -+#define STATS_REG6_NAMES { \ -+ "STP_INPUT_BLOCKED_WAITING_TRANS", \ -+ "STP_TPROC_INST_STALL", \ -+ "STP_CPROC_WAITING_DESCHED", \ -+ "STP_STEN_PKT_OPEN_WAITING_DATA", \ -+ "STP_TLB_HASH_TABLE_ACCESSES", \ -+ "STP_PCI_SLAVE_BLOCKED_MEMSYS", \ -+ "STP_PCI_TRANSFERRING_DATA", \ -+ "STP_PCI_MASTER_WAITING_BUS", \ -+ "STP_PCI_READ_LATENCY", \ -+ "STP_SYS_CLOCK_RATE6" \ -+} -+ -+/* Count reg 7 */ -+#define STC_INPUT_CTX_FILTER_FILL (0x0ull << COUNT_REG7_SHIFT) -+#define STP_TPROC_LOAD_STORE_STALL (0x1ull << COUNT_REG7_SHIFT) -+#define STC_CPROC_TIMEOUTS (0x2ull << COUNT_REG7_SHIFT) -+#define STP_STEN_BLOCKED_NETWORK (0x3ull << COUNT_REG7_SHIFT) -+#define STP_TLB_CHAIN_ACCESSES (0x4ull << COUNT_REG7_SHIFT) -+#define STP_CPROC_SCHED_BLOCKED_MEMSYS (0x5ull << COUNT_REG7_SHIFT) -+#define STC_PCI_SLAVE_WRITES (0x6ull << COUNT_REG7_SHIFT) -+#define STC_PCI_DISCONNECTS_RETRIES (0x7ull << COUNT_REG7_SHIFT) -+#define STC_RING_OSCILLATOR (0x8ull << COUNT_REG7_SHIFT) -+#define STP_SYS_CLOCK_RATE7 (0xfull << COUNT_REG7_SHIFT) -+ -+#define STATS_REG7_NAMES { \ -+ "STC_INPUT_CTX_FILTER_FILL", \ -+ "STP_TPROC_LOAD_STORE_STALL", \ -+ "STC_CPROC_TIMEOUTS", \ -+ "STP_STEN_BLOCKED_NETWORK", \ -+ "STP_TLB_CHAIN_ACCESSES", \ -+ "STP_CPROC_SCHED_BLOCKED_MEMSYS", \ -+ "STC_PCI_SLAVE_WRITES", \ -+ "STC_PCI_DISCONNECTS_RETRIES", \ -+ "STC_RING_OSCILLATOR", \ -+ "STP_SYS_CLOCK_RATE7" \ -+} -+ -+#define STATS_REG_NAMES { \ -+ STATS_REG0_NAMES, \ -+ STATS_REG1_NAMES, \ -+ STATS_REG2_NAMES, \ -+ STATS_REG3_NAMES, \ -+ STATS_REG4_NAMES, \ -+ STATS_REG5_NAMES, \ -+ STATS_REG6_NAMES, \ -+ STATS_REG7_NAMES, \ -+} -+ -+ -+#define INPUT_PERF_STATS (STC_INPUT_NON_WRITE_BLOCKS | STC_INPUT_WRITE_BLOCKS | \ -+ STC_INPUT_PKTS | STC_INPUT_PKTS_REJECTED | \ -+ STC_INPUT_CTX_FILTER_FILL | STP_INPUT_DATA_TRANSMITTING | \ -+ STP_INPUT_WAITING_NETWORK_DATA | STP_INPUT_BLOCKED_WAITING_TRANS | STC_INPUT_PACKETS_DISCARDED) -+ -+#define DMA_PERF_STATS (STC_DMA_PKTS_ACCEPTED | STC_DMA_PKTS_REJECTED | \ -+ STP_DMA_EOP_WAIT_ACK | STP_DMA_DATA_TRANSMITTING | \ -+ STP_DMA_WAITING_MEM | STP_DMA_WAIT_NETWORK_BUSY) -+ -+ -+#define TPROC_PERF_STATS (STP_TPROC_RUNNING | STP_TPROC_INST_STALL | \ -+ STP_TPROC_LOAD_STORE_STALL) -+ -+#define CPROC_PERF_STATS (STC_CPROC_VALUES_EXE | STC_CPROC_TRANSFERS | \ -+ STC_CPROC_PREFETCH_SDRAM | STC_CPROC_FLUSH_REQ_SDRAM | \ -+ STC_CPROC_INSERT_CACHE_MISSES | STP_CPROC_WAITING_DESCHED | \ -+ STC_CPROC_TIMEOUTS) -+ -+#define STEN_PERF_STATS (STC_STEN_PKTS_OPEN | STC_STEN_TRANS_SENT | \ -+ STP_STEN_WAIT_NETWORK_BUSY | STP_STEN_BLOCKED_ACKS_OR_VC | \ -+ STP_STEN_EOP_WAIT_ACK | STP_STEN_TRANSMITTING_DATA | \ -+ STP_STEN_PKT_OPEN_WAITING_DATA | STP_STEN_BLOCKED_NETWORK) -+ -+#define FFU_PREF_STATS (STP_CPROC_HOLDS_FFU_DP | STP_TPROC_DQ_HOLDS_FFU_DP | \ -+ STP_IPROC_HOLDS_FFU_DP | STP_EPROC_HOLDS_FFU_DP | \ -+ STP_DMA_HOLDS_FFU_DP | FFU_BLOCKED_DIFF_FFU_PROC) -+ -+#define TABLE_WALK_PERF_STATS (STC_TPROC_TLB_HITS | STC_UNITS_TLB_HITS | \ -+ STP_TLB_HASH_TABLE_ACCESSES | STP_TLB_CHAIN_ACCESSES | \ -+ STC_TLB_TABLE_WALKS) -+ -+#define ADDRESS_ARB_PERF_STATS (STP_UNIT_BLOCKED_MEMSYS | STP_TPROC_BLOCKED_MEMSYS | \ -+ STP_TABLE_WALKS_BLOCKED_MEMSYS | STP_CPROC_SCHED_BLOCKED_MEMSYS | \ -+ STP_PCI_SLAVE_BLOCKED_MEMSYS) -+ -+#define CACHE_PERF_STATS (STC_CACHE_HITS | STC_CACHE_ALLOC_MISSES | \ -+ STC_CACHE_NON_ALLOC_MISSES | STC_CACHE_WRITE_BACKS) -+ -+ -+#define PCI_PERF_STATS (STC_PCI_SLAVE_READS | STP_PCI_MASTER_READ_WAITING | \ -+ STP_PCI_MASTER_WRITE_WAITING | STP_PCI_SLAVE_READ_WAITING | \ -+ STP_PCI_SLAVE_WRITE_WAITING | STC_PCI_MASTER_WRITES | \ -+ STP_PCI_TRANSFERRING_DATA | STC_PCI_SLAVE_WRITES) -+ -+#define PCIBUS_PERF_STATS (STP_PCI_WAITING_FOR_GNT | STP_PCI_WAITING_FOR_DEVSEL | \ -+ STC_PCI_OUT_OF_ORDER_SPLIT_COMP | STP_PCI_IDLE_CYCLES | \ -+ STC_PCI_MASTER_READS | STP_PCI_MASTER_HOLDS_BUS | \ -+ STP_PCI_MASTER_WAITING_BUS | STC_PCI_DISCONNECTS_RETRIES) -+ -+ -+ extern const char *elan_stats_names[8][10]; -+ -+#define ELAN_STATS_NAME(COUNT, CONTROL) (elan_stats_names[(COUNT)][(CONTROL) & 7]) -+ -+ typedef volatile union e4_StatsControl -+ { -+ E4_uint64 StatsControl; -+ struct -+ { -+#if (BYTE_ORDER == LITTLE_ENDIAN) || defined(__LITTLE_ENDIAN__) -+ E4_uint32 StatCont0:4; -+ E4_uint32 StatCont1:4; -+ E4_uint32 StatCont2:4; -+ E4_uint32 StatCont3:4; -+ E4_uint32 StatCont4:4; -+ E4_uint32 StatCont5:4; -+ E4_uint32 StatCont6:4; -+ E4_uint32 StatCont7:4; -+#else -+ E4_uint32 StatCont7:4; -+ E4_uint32 StatCont6:4; -+ E4_uint32 StatCont5:4; -+ -+ E4_uint32 StatCont4:4; -+ E4_uint32 StatCont3:4; -+ E4_uint32 StatCont2:4; -+ E4_uint32 StatCont1:4; -+ E4_uint32 StatCont0:4; -+#endif -+ E4_uint32 pad; -+ } s; -+ } E4_StatsControl; -+ -+typedef volatile union e4_StatsCount -+{ -+ E4_uint64 ClockStat; -+ struct -+ { -+ E4_uint32 ClockLSW; /* read only */ -+ E4_uint32 StatsCount; -+ } s; -+} E4_StatsCount; -+ -+typedef volatile union e4_clock -+{ -+ E4_uint64 NanoSecClock; -+ struct -+ { -+ E4_uint32 ClockLSW; -+ E4_uint32 ClockMSW; -+ } s; -+} E4_Clock; -+#define E4_TIME( X ) ((X).NanoSecClock) -+ -+#define ELAN4_COMMS_CLOCK_FREQUENCY 660 /* In Mhz. This is half the bit rate. */ -+#define ELAN4_CLOCK_ADD_VALUE 200 /* For 200ns increment rate */ -+#define ELAN4_CLOCK_COMMS_DIV_VALUE (((ELAN4_COMMS_CLOCK_FREQUENCY * ELAN4_CLOCK_ADD_VALUE) / (1000 * 4)) - 1) -+#define ELAN4_CLOCK_TICK_RATE ((ELAN4_CLOCK_ADD_VALUE << 8) + ELAN4_CLOCK_COMMS_DIV_VALUE) -+ -+typedef volatile union e4_clocktickrate -+{ -+ E4_uint64 NanoSecClock; -+ struct -+ { -+ E4_uint32 pad1; -+ E4_uint32 TickRates; -+ } s; -+} E4_ClockTickRate; -+ -+/* -+ * This is made into an 8k byte object. -+ */ -+typedef volatile struct _E4_User_Regs -+{ -+ E4_StatsCount StatCounts[8]; -+ E4_StatsCount InstCount; -+ E4_Clock Clock; -+ E4_StatsControl StatCont; -+ E4_ClockTickRate ClockTickRate; -+ E4_uint8 pad1[EightK - ((sizeof(E4_StatsCount)*9)+sizeof(E4_StatsControl)+ -+ sizeof(E4_Clock)+sizeof(E4_ClockTickRate))]; -+} E4_User_Regs; -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ELAN4_USERREGS_H */ -Index: linux-2.6.5-7.191/include/elan4/usertrap.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/usertrap.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/usertrap.h 2005-07-28 14:52:52.971660040 -0400 -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (c) 2001-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: usertrap.h,v 1.17 2004/05/05 09:08:35 david Exp $" -+/* $Source: /cvs/master/quadrics/elan4mod/usertrap.h,v $*/ -+ -+#ifndef __ELAN4_USERTRAP_H -+#define __ELAN4_USERTRAP_H -+ -+#ifndef _ASM -+typedef struct elan4_user_page -+{ -+ E4_uint64 upage_ddcq_completed; -+} ELAN4_USER_PAGE; -+ -+typedef struct elan4_user_trap -+{ -+ int ut_type; -+ unsigned ut_proc; -+ unsigned ut_args[4]; -+ -+ union { -+ ELAN4_EPROC_TRAP eproc; -+ ELAN4_CPROC_TRAP cproc; -+ ELAN4_DPROC_TRAP dproc; -+ ELAN4_IPROC_TRAP iproc; -+ ELAN4_TPROC_TRAP tproc; -+ ELAN4_NETERR_MSG msg; -+ } ut_trap; -+} ELAN4_USER_TRAP; -+ -+#endif /* _ASM */ -+ -+ -+/* value for ut_type */ -+#define UTS_FINISHED 0 /* all pending traps have been handled */ -+#define UTS_RESCHEDULE 1 /* must return to user mode and re-enter */ -+#define UTS_UNIMP_INSTR 2 /* unimplemented thread instruction */ -+#define UTS_EXECUTE_PACKET 3 /* iproc trap needs packet executing */ -+#define UTS_NETWORK_ERROR_TRAP 4 /* network error on this trap */ -+#define UTS_NETWORK_ERROR_MSG 5 /* network error message */ -+#define UTS_NETWORK_ERROR_TIMER 6 /* network error timer expired */ -+ -+#define UTS_EFAULT -1 /* failed to copyout trap */ -+#define UTS_INVALID_ADDR -2 /* all -ve codes mean trap could not be resolved. */ -+#define UTS_INVALID_VPROC -3 -+#define UTS_INVALID_COMMAND -4 -+#define UTS_BAD_TRAP -5 -+#define UTS_ALIGNMENT_ERROR -6 -+#define UTS_QUEUE_OVERFLOW -7 -+#define UTS_QUEUE_ERROR -8 -+#define UTS_INVALID_TRANS -9 -+#define UTS_PERMISSION_DENIED -10 -+#define UTS_CPROC_ERROR -11 -+#define UTS_INVALID_COOKIE -12 -+#define UTS_NETERR_ERROR -13 -+ -+/* "special" values for registering handlers */ -+#define UTS_ALL_TRAPS -9999 -+ -+/* value for ut_proc */ -+#define UTS_NOPROC 0 -+#define UTS_EPROC 1 -+#define UTS_CPROC 2 -+#define UTS_DPROC 3 -+#define UTS_TPROC 4 -+#define UTS_IPROC 5 -+#define UTS_NETERR_MSG 6 -+ -+/* unimplemented trap numbers for thread processor */ -+#define ELAN4_T_TRAP_INSTR(t) (0x80202000 | ((t) & 0xFF)) -+ -+#define ELAN4_T_SYSCALL_TRAP 1 -+# define ELAN4_T_OPEN 0 -+# define ELAN4_T_WRITE 1 -+# define ELAN4_T_READ 2 -+# define ELAN4_T_IOCTL 3 -+# define ELAN4_T_LSEEK 4 -+# define ELAN4_T_POLL 5 -+# define ELAN4_T_CLOSE 6 -+# define ELAN4_T_KILL 7 -+# define ELAN4_T_MMAP 8 -+# define ELAN4_T_MUNMAP 9 -+# define ELAN4_T_ABORT 100 -+# define ELAN4_T_DEBUG 101 -+# define ELAN4_T_REGDUMP 102 -+ -+#define ELAN4_T_REGDUMP_TRAP 2 -+ -+#define ELAN4_T_LIBELAN_TRAP 3 -+# define ELAN4_T_TPORT_NEWBUF 0 -+# define ELAN4_T_TPORT_GC 1 -+# define ELAN4_T_TPORT_DEBUG 2 -+ -+#define ELAN4_T_ALLOC_TRAP 4 -+# define ELAN4_T_ALLOC_ELAN 0 -+# define ELAN4_T_ALLOC_MAIN 1 -+# define ELAN4_T_FREE_ELAN 2 -+# define ELAN4_T_FREE_MAIN 3 -+ -+/* reserved main interrupt cookies */ -+#define ELAN4_INT_COOKIE_DDCQ 0 -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -+#endif /* __ELAN4_USERTRAP_H */ -Index: linux-2.6.5-7.191/include/elan4/xsdram.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/elan4/xsdram.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/elan4/xsdram.h 2005-07-28 14:52:52.971660040 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2003 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __ELAN4_XSDRAM_H -+#define __ELAN4_XSDRAM_H -+ -+#ident "@(#)$Id: xsdram.h,v 1.13 2004/03/05 12:32:04 jon Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/elan4hdr/xsdram.h,v $*/ -+ -+/* SAMSUNG K4H281638D-TCB3 */ -+ -+#define SDRAM_tRCF_1_SH 0 -+#define SDRAM_tRP_1_SH 4 -+#define SDRAM_tRCD_SH 8 -+#define SDRAM_tRRD_SH 12 -+#define SDRAM_tEndWr_SH 16 -+#define SDRAM_tEndRd_SH 20 -+#define SDRAM_Burst_SH 24 -+#define SDRAM_CL_SH 28 -+#define SDRAM_DsblBypass (1ULL << 31) -+#define SDRAM_RefreshRate_SH 32 -+#define SDRAM_RamSize_SH 34 -+#define SDRAM_ReadLtncy_1_SH 36 -+#define SDRAM_RdOffset_SH 40 -+#define SDRAM_FlightDelay_SH 42 -+ -+#define SDRAM_ENABLE_ECC (1ULL << 44) // Enables error detecting on the ECC. -+#define SDRAM_SDRAM_TESTING (1ULL << 45) // Switches to test mode for checking EEC data bits -+#define SDRAM_SETUP (1ULL << 46) // Writes SDram control reg when set. Also starts -+ -+#define SDRAM_CS_MODE0 0ULL // 64Mbit, 128Mbit, 256Mbit, 512Mbit or 1Gbit (16-bit output) -+#define SDRAM_CS_MODE1 1ULL // 64Mbit, 128Mbit, 256Mbit or 512Mbit (8-bit output) -+#define SDRAM_CS_MODE2 2ULL // 2Gbit (16-bit output) or 1Gbit (8-bit output) -+#define SDRAM_CS_MODE3 3ULL // 4Gbit (16-bit output) or 2Gbit (8-bit output) -+ -+#if defined(LINUX) && !defined(CONFIG_MPSAS) -+#define SDRAM_STARTUP_VALUE ((0xbULL << SDRAM_tRCF_1_SH) | (0x2ULL << SDRAM_tRP_1_SH) | \ -+ (0x3ULL << SDRAM_tRCD_SH) | (0x2ULL << SDRAM_tRRD_SH) | \ -+ (0xaULL << SDRAM_tEndWr_SH) | (0x6ULL << SDRAM_tEndRd_SH) | \ -+ (0x8ULL << SDRAM_Burst_SH) | (0x6ULL << SDRAM_CL_SH) | \ -+ (0x2ULL << SDRAM_RefreshRate_SH) | (0x3ULL << SDRAM_RamSize_SH) | \ -+ (0x1ULL << SDRAM_RdOffset_SH) | (0x1ULL << SDRAM_FlightDelay_SH) | \ -+ (0x4ULL << SDRAM_ReadLtncy_1_SH)) -+#else -+#define SDRAM_STARTUP_VALUE ((0xbULL << SDRAM_tRCF_1_SH) | (0x2ULL << SDRAM_tRP_1_SH) | \ -+ (0x3ULL << SDRAM_tRCD_SH) | (0x2ULL << SDRAM_tRRD_SH) | \ -+ (0xaULL << SDRAM_tEndWr_SH) | (0x6ULL << SDRAM_tEndRd_SH) | \ -+ (0x8ULL << SDRAM_Burst_SH) | (0x6ULL << SDRAM_CL_SH) | \ -+ (0x0ULL << SDRAM_RefreshRate_SH) | (0x0ULL << SDRAM_RamSize_SH) | \ -+ (0x1ULL << SDRAM_RdOffset_SH) | (0x1ULL << SDRAM_FlightDelay_SH) | \ -+ (0x4ULL << SDRAM_ReadLtncy_1_SH) | SDRAM_ENABLE_ECC | SDRAM_SETUP) -+#endif -+ -+#endif /* __ELAN4_XSDRAM_H */ -Index: linux-2.6.5-7.191/include/jtag/jtagio.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/jtag/jtagio.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/jtag/jtagio.h 2005-07-28 14:52:52.972659888 -0400 -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "$Id: jtagio.h,v 1.7.8.1 2005/01/27 15:21:47 lee Exp $" -+/* $Source: /cvs/master/quadrics/jtagmod/jtagio.h,v $*/ -+ -+ -+#ifndef __SYS_JTAGMOD_H -+#define __SYS_JTAGMOD_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define JTAG_MAX_CHIPS 8 -+#define JTAG_MAX_INSTR_LEN 8 -+#define JTAG_MAX_BITS (JTAG_MAX_CHIPS * JTAG_MAX_INSTR_LEN) -+#define JTAG_MAX_DATA_LEN 1024 -+ -+#define JTAG_BYPASS 0xFF -+ -+#define I2C_ADDR_LEN 7 /* 7 bits of address */ -+#define I2C_DATA_LEN 8 /* 8 bits of data */ -+#define I2C_MAX_DATA_LEN 9 /* and upto 9 bytes worth */ -+ -+#define BITS_PER_BYTE 8 -+#define JTAG_NBYTES(nbits) (((nbits)+BITS_PER_BYTE-1)/BITS_PER_BYTE) -+#define JTAG_BIT(v, num) (((v)[(num) / BITS_PER_BYTE] >> ((num) % BITS_PER_BYTE)) & 1) -+#define JTAG_SET_BIT(v, num) ((v)[(num) / BITS_PER_BYTE] |= (1 << ((num) % BITS_PER_BYTE))) -+#define JTAG_CLR_BIT(v, num) ((v)[(num) / BITS_PER_BYTE] &= ~(1 << ((num) % BITS_PER_BYTE))) -+ -+#define RING_CLOCK_CARD (0x3D) -+#define RING_CLOCK_SHIFT (0x3E) -+#define RING_JTAG_LOOPBACK (0x3F) -+#define RING_MAX (0x40) -+ -+#define RING_QUAD_BIT (0x40) -+#define RING_I2C_BIT (0x80) -+ -+#define VALID_JTAG_RING(ring) ((ring) < 0x20 || (ring) == RING_JTAG_LOOPBACK) -+#define VALID_I2C_RING(ring) ((ring) < 0x20 || (ring) == RING_CLOCK_CARD) -+ -+ -+typedef struct jtag_value -+{ -+ u_char bytes[JTAG_NBYTES(JTAG_MAX_DATA_LEN)]; -+} JTAG_VALUE; -+ -+/* arguements to JTAG_SHIFT_IR/JTAG_SHIFT_DR */ -+typedef struct jtag_reset_args -+{ -+ u_int ring; -+} JTAG_RESET_ARGS; -+ -+typedef struct jtag_shift_args -+{ -+ u_int ring; -+ u_int nbits; -+ u_char *value; -+} JTAG_SHIFT_ARGS; -+ -+typedef struct i2c_args -+{ -+ u_int ring; -+ u_int device; -+ u_int reg; -+ u_int count; -+ u_int ok; -+ u_char data[I2C_MAX_DATA_LEN]; -+} I2C_ARGS; -+ -+/* values for 'ok' - the return value from i2c_xx functions */ -+#define I2C_OP_SUCCESS 0 -+#define I2C_OP_ERROR 1 -+#define I2C_OP_NOT_IDLE 2 -+#define I2C_OP_NO_DEVICE 3 -+#define I2C_OP_WRITE_TO_BIG 4 -+#define I2C_OP_BAD_RESOURCE 5 -+ -+typedef struct i2c_clock_shift_args -+{ -+ u_int t; -+ u_int n; -+ u_int m; -+} I2C_CLOCK_SHIFT_ARGS; -+ -+#define JTAG_RESET _IOWR('j', '0', JTAG_RESET_ARGS) -+#define JTAG_SHIFT_IR _IOWR('j', '1', JTAG_SHIFT_ARGS) -+#define JTAG_SHIFT_DR _IOWR('j', '2', JTAG_SHIFT_ARGS) -+ -+#define I2C_CLOCK_SHIFT _IOWR('j', '4', I2C_CLOCK_SHIFT_ARGS) -+#define I2C_WRITE _IOWR('j', '5', I2C_ARGS) -+#define I2C_READ _IOWR('j', '6', I2C_ARGS) -+#define I2C_WRITEREG _IOWR('j', '7', I2C_ARGS) -+#define I2C_READREG _IOWR('j', '8', I2C_ARGS) -+ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __SYS_JTAGMOD_H */ -Index: linux-2.6.5-7.191/include/linux/init_task.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/linux/init_task.h 2005-06-28 12:24:09.000000000 -0400 -+++ linux-2.6.5-7.191/include/linux/init_task.h 2005-07-28 14:52:52.972659888 -0400 -@@ -3,6 +3,7 @@ - - #include - #include -+#include - - #define INIT_FILES \ - { \ -@@ -116,6 +117,7 @@ - .map_base = __TASK_UNMAPPED_BASE, \ - .io_wait = NULL, \ - INIT_TASK_PAGG(tsk) \ -+ INIT_TASK_PTRACK(tsk) \ - } - - -Index: linux-2.6.5-7.191/include/linux/ioproc.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/linux/ioproc.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/linux/ioproc.h 2005-07-28 14:52:52.973659736 -0400 -@@ -0,0 +1,271 @@ -+/* -*- linux-c -*- -+ * -+ * Copyright (C) 2002-2004 Quadrics Ltd. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * -+ */ -+ -+/* -+ * Callbacks for IO processor page table updates. -+ */ -+ -+#ifndef __LINUX_IOPROC_H__ -+#define __LINUX_IOPROC_H__ -+ -+#include -+#include -+ -+typedef struct ioproc_ops { -+ struct ioproc_ops *next; -+ void *arg; -+ -+ void (*release)(void *arg, struct mm_struct *mm); -+ void (*sync_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); -+ void (*invalidate_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); -+ void (*update_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end); -+ -+ void (*change_protection)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot); -+ -+ void (*sync_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ void (*invalidate_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ void (*update_page)(void *arg, struct vm_area_struct *vma, unsigned long address); -+ -+} ioproc_ops_t; -+ -+/* IOPROC Registration -+ * -+ * Called by the IOPROC device driver to register its interest in page table -+ * changes for the process associated with the supplied mm_struct -+ * -+ * The caller should first allocate and fill out an ioproc_ops structure with -+ * the function pointers initialised to the device driver specific code for -+ * each callback. If the device driver doesn't have code for a particular -+ * callback then it should set the function pointer to be NULL. -+ * The ioproc_ops arg parameter will be passed unchanged as the first argument -+ * to each callback function invocation. -+ * -+ * The ioproc registration is not inherited across fork() and should be called -+ * once for each process that the IOPROC device driver is interested in. -+ * -+ * Must be called holding the mm->page_table_lock -+ */ -+extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip); -+ -+ -+/* IOPROC De-registration -+ * -+ * Called by the IOPROC device driver when it is no longer interested in page -+ * table changes for the process associated with the supplied mm_struct -+ * -+ * Normally this is not needed to be called as the ioproc_release() code will -+ * automatically unlink the ioproc_ops struct from the mm_struct as the -+ * process exits -+ * -+ * Must be called holding the mm->page_table_lock -+ */ -+extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip); -+ -+#ifdef CONFIG_IOPROC -+ -+/* IOPROC Release -+ * -+ * Called during exit_mmap() as all vmas are torn down and unmapped. -+ * -+ * Also unlinks the ioproc_ops structure from the mm list as it goes. -+ * -+ * No need for locks as the mm can no longer be accessed at this point -+ * -+ */ -+static inline void -+ioproc_release(struct mm_struct *mm) -+{ -+ struct ioproc_ops *cp; -+ -+ while ((cp = mm->ioproc_ops) != NULL) { -+ mm->ioproc_ops = cp->next; -+ -+ if (cp->release) -+ cp->release(cp->arg, mm); -+ } -+} -+ -+/* IOPROC SYNC RANGE -+ * -+ * Called when a memory map is synchronised with its disk image i.e. when the -+ * msync() syscall is invoked. Any future read or write to the associated -+ * pages by the IOPROC should cause the page to be marked as referenced or -+ * modified. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_sync_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->sync_range) -+ cp->sync_range(cp->arg, vma, start, end); -+} -+ -+/* IOPROC INVALIDATE RANGE -+ * -+ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the -+ * user or paged out by the kernel. -+ * -+ * After this call the IOPROC must not access the physical memory again unless -+ * a new translation is loaded. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_invalidate_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->invalidate_range) -+ cp->invalidate_range(cp->arg, vma, start, end); -+} -+ -+/* IOPROC UPDATE RANGE -+ * -+ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk -+ * up, when breaking COW or faulting in an anonymous page of memory. -+ * -+ * These give the IOPROC device driver the opportunity to load translations -+ * speculatively, which can improve performance by avoiding device translation -+ * faults. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_update_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->update_range) -+ cp->update_range(cp->arg, vma, start, end); -+} -+ -+ -+/* IOPROC CHANGE PROTECTION -+ * -+ * Called when the protection on a region of memory is changed i.e. when the -+ * mprotect() syscall is invoked. -+ * -+ * The IOPROC must not be able to write to a read-only page, so if the -+ * permissions are downgraded then it must honour them. If they are upgraded -+ * it can treat this in the same way as the ioproc_update_[range|sync]() calls -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->change_protection) -+ cp->change_protection(cp->arg, vma, start, end, newprot); -+} -+ -+/* IOPROC SYNC PAGE -+ * -+ * Called when a memory map is synchronised with its disk image i.e. when the -+ * msync() syscall is invoked. Any future read or write to the associated page -+ * by the IOPROC should cause the page to be marked as referenced or modified. -+ * -+ * Not currently called as msync() calls ioproc_sync_range() instead -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_sync_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->sync_page) -+ cp->sync_page(cp->arg, vma, addr); -+} -+ -+/* IOPROC INVALIDATE PAGE -+ * -+ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the -+ * user or paged out by the kernel. -+ * -+ * After this call the IOPROC must not access the physical memory again unless -+ * a new translation is loaded. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_invalidate_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->invalidate_page) -+ cp->invalidate_page(cp->arg, vma, addr); -+} -+ -+/* IOPROC UPDATE PAGE -+ * -+ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk -+ * up, when breaking COW or faulting in an anoymous page of memory. -+ * -+ * These give the IOPROC device the opportunity to load translations -+ * speculatively, which can improve performance by avoiding device translation -+ * faults. -+ * -+ * Called holding the mm->page_table_lock -+ */ -+static inline void -+ioproc_update_page(struct vm_area_struct *vma, unsigned long addr) -+{ -+ struct ioproc_ops *cp; -+ -+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) -+ if (cp->update_page) -+ cp->update_page(cp->arg, vma, addr); -+} -+ -+#else -+ -+/* ! CONFIG_IOPROC so make all hooks empty */ -+ -+#define ioproc_release(mm) do { } while (0) -+ -+#define ioproc_sync_range(vma,start,end) do { } while (0) -+ -+#define ioproc_invalidate_range(vma, start,end) do { } while (0) -+ -+#define ioproc_update_range(vma, start, end) do { } while (0) -+ -+#define ioproc_change_protection(vma, start, end, prot) do { } while (0) -+ -+#define ioproc_sync_page(vma, addr) do { } while (0) -+ -+#define ioproc_invalidate_page(vma, addr) do { } while (0) -+ -+#define ioproc_update_page(vma, addr) do { } while (0) -+ -+#endif /* CONFIG_IOPROC */ -+ -+#endif /* __LINUX_IOPROC_H__ */ -Index: linux-2.6.5-7.191/include/linux/ptrack.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/linux/ptrack.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/linux/ptrack.h 2005-07-28 14:52:52.973659736 -0400 -@@ -0,0 +1,65 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * Derived from exit_actn.c by -+ * Copyright (C) 2003 Quadrics Ltd. -+ * -+ */ -+#ifndef __LINUX_PTRACK_H -+#define __LINUX_PTRACK_H -+ -+/* -+ * Process tracking - this allows a module to keep track of processes -+ * in order that it can manage all tasks derived from a single process. -+ */ -+ -+#define PTRACK_PHASE_CLONE 1 -+#define PTRACK_PHASE_CLONE_FAIL 2 -+#define PTRACK_PHASE_EXEC 3 -+#define PTRACK_PHASE_EXIT 4 -+ -+#define PTRACK_FINISHED 0 -+#define PTRACK_INNHERIT 1 -+#define PTRACK_DENIED 2 -+ -+#ifdef CONFIG_PTRACK -+ -+typedef int (*ptrack_callback_t)(void *arg, int phase, struct task_struct *child); -+ -+struct ptrack_desc { -+ struct list_head link; -+ ptrack_callback_t callback; -+ void *arg; -+}; -+ -+extern int ptrack_register (ptrack_callback_t callback, void *arg); -+extern void ptrack_deregister (ptrack_callback_t callback, void *arg); -+extern int ptrack_registered (ptrack_callback_t callback, void *arg); -+ -+extern int ptrack_call_callbacks (int phase, struct task_struct *child); -+ -+#define INIT_TASK_PTRACK(tsk) \ -+ .ptrack_list = LIST_HEAD_INIT(tsk.ptrack_list), -+ -+#else -+#define ptrack_call_callbacks(phase, child) (0) -+ -+#define INIT_TASK_PTRACK(tsk) -+ -+#endif -+ -+#endif /* __LINUX_PTRACK_H */ -Index: linux-2.6.5-7.191/include/linux/sched.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/linux/sched.h 2005-06-28 12:24:22.000000000 -0400 -+++ linux-2.6.5-7.191/include/linux/sched.h 2005-07-28 14:52:52.974659584 -0400 -@@ -188,6 +188,9 @@ - extern int max_timeslice, min_timeslice; - - struct namespace; -+#ifdef CONFIG_IOPROC -+struct ioproc_ops; -+#endif - - /* Maximum number of active map areas.. This is a random (large) number */ - #define DEFAULT_MAX_MAP_COUNT 65536 -@@ -241,6 +244,15 @@ - struct kioctx default_kioctx; - - unsigned long hiwater_rss, hiwater_vm; -+ -+#ifdef CONFIG_IOPROC -+ /* hooks for io devices with advanced RDMA capabilities */ -+ struct ioproc_ops *ioproc_ops; -+#endif -+#ifdef CONFIG_PTRACK -+/* process tracking callback */ -+ struct list_head ptrack_list; -+#endif - }; - - extern int mmlist_nr; -@@ -601,6 +613,10 @@ - struct rw_semaphore pagg_sem; - #endif - -+#ifdef CONFIG_PTRACK -+/* process tracking callback */ -+ struct list_head ptrack_list; -+#endif - }; - - static inline pid_t process_group(struct task_struct *tsk) -Index: linux-2.6.5-7.191/include/qsnet/autoconf.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/autoconf.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/autoconf.h 2005-07-28 14:52:52.975659432 -0400 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ * NOTE: This file has been automatically generated: -+ * node : milano -+ * kernel : /src/linux/2.6/linux-2.6.5 -+ * date : Wed May 11 12:17:34 EDT 2005 -+ * -+ */ -+ -+#include -+#undef NO_RMAP -+#undef AC -+#undef NO_O1_SCHED -+#undef NO_NPTL -+#undef NO_ABI -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) -+#define PROCESS_ACCT -+#endif -+#undef RSS_ATOMIC -+#define NO_COPROC -+#undef NO_IOPROC -+#undef NO_PTRACK -+#define NO_PANIC_NOTIFIER -+#undef NO_SHM_CLEANUP -+#undef NO_PDE -+ -+ -+#define CONFIG_EIP -+#define CONFIG_ELAN -+#define CONFIG_ELAN3 -+#define CONFIG_ELAN4 -+#define CONFIG_EP -+#define CONFIG_JTAG -+#define CONFIG_QSNET -+#define CONFIG_RMS -Index: linux-2.6.5-7.191/include/qsnet/condvar.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/condvar.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/condvar.h 2005-07-28 14:52:52.975659432 -0400 -@@ -0,0 +1,140 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+#if !defined(_LINUX_CONDVAR_H) -+#define _LINUX_CONDVAR_H -+ -+#if defined(__KERNEL__) -+ -+#include -+#include -+ -+#define CV_RET_SIGPENDING 0 -+#define CV_RET_TIMEOUT (-1) -+#define CV_RET_NORMAL 1 -+ -+struct kcondvar_task { -+ struct task_struct *task; /* need to wrap task in this */ -+ struct list_head list; /* to thread as a list */ -+ int blocked; -+}; -+ -+typedef struct { -+ struct list_head task_list; /* list of kcondvar_task's */ -+} kcondvar_t; -+ -+#define kcondvar_wait(c,l,fl) debug_kcondvar_wait(c, l, fl, 0, TASK_UNINTERRUPTIBLE) -+#define kcondvar_waitsig(c,l,fl) debug_kcondvar_wait(c, l, fl, 0, TASK_INTERRUPTIBLE) -+#define kcondvar_timedwait(c,l,fl,to) debug_kcondvar_wait(c, l, fl, to, TASK_UNINTERRUPTIBLE) -+#define kcondvar_timedwaitsig(c,l,fl,to) debug_kcondvar_wait(c, l, fl, to, TASK_INTERRUPTIBLE) -+#define kcondvar_wakeupone(c,l) kcondvar_wakeup(c, l, 0) -+#define kcondvar_wakeupall(c,l) kcondvar_wakeup(c, l, 1) -+ -+extern __inline__ void -+kcondvar_init(kcondvar_t *c) -+{ -+ INIT_LIST_HEAD(&c->task_list); -+} -+ -+extern __inline__ void -+kcondvar_destroy(kcondvar_t *c) -+{ -+ ASSERT(list_empty(&c->task_list)); -+} -+ -+/* -+ * We thread a struct kcondvar_task, allocated on the stack, onto the kcondvar_t's -+ * task_list, and take it off again when we wake up. -+ */ -+extern __inline__ int -+debug_kcondvar_wait(kcondvar_t *c, spinlock_t *l, unsigned long *fl, long tmo, int state) -+{ -+ struct kcondvar_task cvt; -+ int ret = CV_RET_NORMAL; -+ -+ ASSERT(!in_interrupt()); /* we can block */ -+ ASSERT(SPINLOCK_HELD(l)); /* enter holding lock */ -+ -+ cvt.task = current; -+ cvt.blocked = 1; -+ list_add(&cvt.list, &c->task_list); -+ do { -+ /* Note: we avoid using TASK_UNINTERRUPTIBLE here because avenrun() -+ * (linux/kernel/timer.c:calc_load()) -+ * computation treats it like TASK_RUNNABLE hence creates false high -+ * load averages when we create kernel threads. -+ * The cvt.blocked flag distinguishes a signal wakeup from a kcondvar_wakeup. -+ * -+ * However, if we do take a signal we could end up busily spinning here, if -+ * we ignore it (state == TASK_UNINTERRUPTIBLE) so once we see a signal -+ * pending we do sleep TASK_UNINTERRUPTIBLE to stop a busy spin. -+ * I have now blocked all signals for kernel threads to prevent this -+ * happening but other users of kcondvar_wait may still hit this spin. -+ */ -+ set_current_state (signal_pending(current) ? state : TASK_INTERRUPTIBLE); -+ -+ if (fl) -+ spin_unlock_irqrestore(l, *fl); -+ else -+ spin_unlock(l); -+ if (tmo) { -+ if (tmo <= jiffies || !schedule_timeout(tmo - jiffies)) -+ ret = CV_RET_TIMEOUT; -+ } else -+ schedule(); -+ if (fl) -+ spin_lock_irqsave (l, *fl); -+ else -+ spin_lock(l); -+ -+ /* signal_pending - Only exit the loop if the user was waiting TASK_INTERRUPTIBLE */ -+ if ((state == TASK_INTERRUPTIBLE) && signal_pending(current)) -+ ret = CV_RET_SIGPENDING; -+ -+ } while (cvt.blocked && ret == CV_RET_NORMAL); -+ list_del(&cvt.list); -+ -+ /* Reset task state in case we didn't sleep above */ -+ set_current_state (TASK_RUNNING); -+ -+ return ret; /* return holding lock */ -+} -+ -+extern __inline__ void -+kcondvar_wakeup(kcondvar_t *c, spinlock_t *l, int wakeall) -+{ -+ struct list_head *lp; -+ struct kcondvar_task *cvtp; -+ -+ ASSERT(SPINLOCK_HELD(l)); /* already holding lock */ -+ for (lp = c->task_list.next; lp != &c->task_list; lp = lp->next) { -+ cvtp = list_entry(lp, struct kcondvar_task, list); -+ if (cvtp->blocked) { -+ cvtp->blocked = 0; -+ /* wake_up_process added to kernel/ksyms.c */ -+ wake_up_process(cvtp->task); -+ if (!wakeall) -+ break; -+ } -+ } -+} /* return still holding lock */ -+ -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_CONDVAR_H */ -Index: linux-2.6.5-7.191/include/qsnet/config.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/config.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/config.h 2005-07-28 14:52:52.976659280 -0400 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef _QSNET_CONFIG_H -+#define _QSNET_CONFIG_H -+ -+#ident "$Id: config.h,v 1.23 2003/07/24 21:31:19 robin Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/config.h,v $*/ -+ -+ -+/* -+ * QSNET standard defines : -+ * -+ * Target operating system defines -+ * SOLARIS -+ * TRU64UNIX/DIGITAL_UNIX -+ * LINUX -+ * -+ * Target processor defines -+ * SPARC -+ * ALPHA -+ * I386 -+ * IA64 -+ * X86_64 -+ * -+ * Byte order defines -+ * __LITTLE_ENDIAN__ -+ * __BIG_ENDIAN__ -+ * -+ * Data size defines -+ * _LP64 - LP64 - long/pointer is 64 bits -+ * _ILP32 - LP32 - long/pointer is 32 bits -+ * -+ * Elan defines for main processor -+ * __MAIN_LITTLE_ENDIAN__ - main byte order (for thread code) -+ * __MAIN_BIG_ENDIAN__ -+ * _MAIN_LP64 - main long size (for thread code) -+ * _MAIN_ILP32 -+ * -+ * Compiling for kernel (defined in makefile) -+ * _KERNEL -+ * -+ */ -+ -+#if defined(__LP64__) && !defined(_LP64) -+# define _LP64 -+#endif -+ -+#if defined(__arch64__) && !defined(_LP64) && !defined(_ILP32) -+# define _LP64 -+#endif -+ -+#if defined(__alpha__) && !defined(_LP64) && !defined(_ILP32) -+# define _LP64 -+#endif -+ -+#if !defined(__arch64__) && !defined(_ILP32) && !defined(_LP64) -+# define _ILP32 -+#endif -+ -+#if defined(__ELAN__) || defined(__ELAN3__) -+ -+#define __LITTLE_ENDIAN__ -+ -+#if defined(__host_solaris) && defined(__host_sparc) -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#define _MAIN_ILP32 -+#define __MAIN_BIG_ENDIAN__ -+ -+#elif defined(__host_osf) -+#define TRU64UNIX -+#define DIGITAL_UNIX -+#define ALPHA -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_alpha) -+#define LINUX -+#define ALPHA -+#define LINUX_ALPHA -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_sparc) -+#define LINUX -+#define SPARC -+#define LINUX_SPARC -+#define __MAIN_BIG_ENDIAN__ -+#ifdef __KERNEL__ -+# define _MAIN_LP64 -+#else -+# define _MAIN_ILP32 -+#endif -+ -+#elif defined(__host_linux) && defined(__host_i386) -+#define LINUX -+#define I386 -+#define LINUX_I386 -+#define _MAIN_ILP32 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_ia64) -+#define LINUX -+#define IA64 -+#define LINUX_IA64 -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#elif defined(__host_linux) && defined(__host_x86_64) -+#define LINUX -+#define X86_64 -+#define LINUX_X86_64 -+#define _MAIN_LP64 -+#define __MAIN_LITTLE_ENDIAN__ -+ -+#else -+#error Cannot determine operating system/processor architecture. -+#endif -+ -+#else /* !defined(__ELAN3__) */ -+ -+#if (defined(sun) || defined(__sun)) && defined(sparc) && !defined(__sparcv9) /* Sun Solaris 5.6 */ -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#ifndef __BIG_ENDIAN__ -+#define __BIG_ENDIAN__ -+#endif -+ -+#elif (defined(sun) || defined(__sun)) && defined(sparc) && defined(__sparcv9) /* Sun Solaris 5.7 */ -+#define SOLARIS -+#define SPARC -+#define SOLARIS_SPARC -+#define __BIG_ENDIAN__ -+ -+#elif defined(__osf__) && defined(__alpha) /* Digital Unix */ -+#define TRU64UNIX -+#define DIGITAL_UNIX -+#define ALPHA -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__alpha) /* Linux Alpha */ -+ -+#define LINUX -+#define ALPHA -+#define LINUX_ALPHA -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__sparc) /* Linux Sparc */ -+ -+#define LINUX -+#define SPARC -+#define LINUX_SPARC -+#define __BIG_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__i386) /* Linux i386 */ -+ -+#define LINUX -+#define I386 -+#define LINUX_I386 -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__ia64) /* Linux ia64 */ -+ -+#define LINUX -+#define IA64 -+#define LINUX_IA64 -+#define __LITTLE_ENDIAN__ -+ -+#elif (defined(linux) || defined(__linux__)) && defined(__x86_64) /* Linux x86_64 */ -+ -+#define LINUX -+#define X86_64 -+#define LINUX_X86_64 -+#define __LITTLE_ENDIAN__ -+ -+#elif defined(__QNXNTO__) -+#define QNX -+#define I386 -+#define __LITTLE_ENDIAN__ -+#else -+#error Cannot determine operating system/processor architecture. -+#endif -+ -+#endif -+ -+#include -+ -+#endif /* _QSNET_CONFIG_H */ -Index: linux-2.6.5-7.191/include/qsnet/crwlock.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/crwlock.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/crwlock.h 2005-07-28 14:52:52.976659280 -0400 -@@ -0,0 +1,207 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+/* -+ * Complex - Reader/Writer locks -+ * Ref: "UNIX Systems for Modern Architectures", by Curt Schimmel, -+ * sec 11.6.3. -+ * -+ * This implementation is based on semaphores and may not be called from -+ * interrupt handlers. -+ * -+ */ -+ -+#if !defined(_LINUX_RWLOCK_H) -+#define _LINUX_RWLOCK_H -+ -+#if defined(__KERNEL__) -+ -+typedef enum { RD, WRT, ANY } crwlock_type_t; -+ -+#define crwlock_write_held(l) debug_crwlock_held(l, WRT, __BASE_FILE__,__LINE__) -+#define crwlock_read_held(l) debug_crwlock_held(l, RD, __BASE_FILE__, __LINE__) -+#define crwlock_held(l) debug_crwlock_held(l, ANY, __BASE_FILE__, __LINE__) -+ -+#define crwlock_read(l) debug_crwlock_read(l, __BASE_FILE__, __LINE__) -+#define crwlock_write(l) debug_crwlock_write(l, __BASE_FILE__, __LINE__) -+#define crwlock_done(l) debug_crwlock_done(l, __BASE_FILE__, __LINE__) -+ -+#if defined(DEBUG_RWLOCK) && defined(__alpha__) && !defined(DEBUG_SPINLOCK) -+#define DEBUG_SPINLOCK -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if !defined(DEBUG_SPINLOCK) -+#define debug_spin_lock(lock, file, line) spin_lock(lock) -+#endif -+ -+typedef struct { -+ spinlock_t m_lock; /* protects cnt fields below */ -+ int m_rdcnt; /* # of rdrs in crit section */ -+ int m_wrcnt; /* # of wrtrs in crit section */ -+ int m_rdwcnt; /* # of waiting readers */ -+ int m_wrwcnt; /* # of waiting writers */ -+ struct semaphore m_rdwait; /* sema where readers wait */ -+ struct semaphore m_wrwait; /* sema where writers wait */ -+ pid_t m_wrholder; /* task holding write lock */ -+} crwlock_t; -+ -+extern __inline__ void -+crwlock_init(crwlock_t *l) -+{ -+ l->m_lock = SPIN_LOCK_UNLOCKED; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0) -+ l->m_rdwait = MUTEX_LOCKED; -+ l->m_wrwait = MUTEX_LOCKED; -+#else -+ sema_init(&l->m_rdwait,0); -+ sema_init(&l->m_wrwait,0); -+#endif -+ l->m_rdcnt = l->m_wrcnt = l->m_rdwcnt = l->m_wrwcnt = 0; -+ l->m_wrholder = PID_NONE; -+} -+ -+extern __inline__ void -+crwlock_destroy(crwlock_t *l) -+{ -+ ASSERT(l->m_rdcnt == 0 && l->m_wrcnt == 0); -+} -+ -+/* -+ * If a writer has the lock presently or there are writers waiting, -+ * then we have to wait. -+ */ -+extern __inline__ void -+debug_crwlock_read(crwlock_t *l, char *file, int line) -+{ -+ ASSERT(!in_interrupt()); -+ spin_lock(&l->m_lock); -+ if (l->m_wrcnt || l->m_wrwcnt) { -+ l->m_rdwcnt++; -+ spin_unlock(&l->m_lock); -+ down(&l->m_rdwait); /* P */ -+ } else { -+ l->m_rdcnt++; -+ spin_unlock(&l->m_lock); -+ } -+} -+ -+/* -+ * If we're the last reader, and a writer is waiting, -+ * then let the writer go now. -+ */ -+/* private */ -+extern __inline__ void -+debug_crwlock_read_done(crwlock_t *l, char *file, int line) -+{ -+ spin_lock(&l->m_lock); -+ l->m_rdcnt--; -+ if (l->m_wrwcnt && l->m_rdcnt == 0) { -+ l->m_wrcnt = 1; -+ l->m_wrwcnt--; -+ spin_unlock(&l->m_lock); -+ up(&l->m_wrwait); /* V */ -+ return; -+ } -+ spin_unlock(&l->m_lock); -+} -+ -+extern __inline__ void -+debug_crwlock_write(crwlock_t *l, char *file, int line) -+{ -+ ASSERT(!in_interrupt()); -+ spin_lock(&l->m_lock); -+ if (l->m_wrcnt || l->m_rdcnt) { /* block if lock is in use */ -+ l->m_wrwcnt++; -+ spin_unlock(&l->m_lock); -+ down(&l->m_wrwait); /* P */ -+ } else { /* lock is not in use */ -+ l->m_wrcnt = 1; -+ spin_unlock(&l->m_lock); -+ } -+ l->m_wrholder = current->pid; -+} -+ -+/* private */ -+extern __inline__ void -+debug_crwlock_write_done(crwlock_t *l, char *file, int line) -+{ -+ int rdrs; -+ -+ spin_lock(&l->m_lock); -+ l->m_wrholder = PID_NONE; -+ if (l->m_rdwcnt) { /* let any readers go first */ -+ l->m_wrcnt = 0; -+ rdrs = l->m_rdwcnt; -+ l->m_rdcnt = rdrs; -+ l->m_rdwcnt = 0; -+ spin_unlock(&l->m_lock); -+ while (rdrs--) -+ up(&l->m_rdwait); /* V */ -+ } else if (l->m_wrwcnt) { /* or let any writer go */ -+ l->m_wrwcnt--; -+ spin_unlock(&l->m_lock); -+ up(&l->m_wrwait); /* V */ -+ } else { /* nobody waiting, unlock */ -+ l->m_wrcnt = 0; -+ spin_unlock(&l->m_lock); -+ } -+} -+ -+extern __inline__ void -+debug_crwlock_done(crwlock_t *l, char *file, int line) -+{ -+ if (l->m_wrholder == current->pid) -+ debug_crwlock_write_done(l, file, line); -+ else -+ debug_crwlock_read_done(l, file, line); -+} -+ -+/* -+ * Return nonzero if lock is held -+ */ -+extern __inline__ int -+debug_crwlock_held(crwlock_t *l, crwlock_type_t t, char *file, int line) -+{ -+ int res; -+ -+ spin_lock(&l->m_lock); -+ switch(t) { -+ case RD: -+ res = l->m_rdcnt; -+ break; -+ case WRT: -+ res = l->m_wrcnt; -+ break; -+ case ANY: -+ res = l->m_wrcnt + l->m_rdcnt; -+ break; -+ } -+ spin_unlock(&l->m_lock); -+ -+ return res; -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_RWLOCK_H */ -Index: linux-2.6.5-7.191/include/qsnet/ctrl_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/ctrl_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/ctrl_linux.h 2005-07-28 14:52:52.977659128 -0400 -@@ -0,0 +1,37 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_CTRL_LINUX_H -+#define __QSNET_CTRL_LINUX_H -+ -+#ident "$Id: ctrl_linux.h,v 1.3 2003/03/26 09:32:03 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/ctrl_linux.h,v $*/ -+ -+#define QSNETIO_USER_BASE 0x40 -+ -+#define QSNETIO_DEBUG_DUMP _IO ('e', QSNETIO_USER_BASE + 0) -+ -+typedef struct qsnetio_debug_buffer_struct -+{ -+ caddr_t addr; -+ size_t len; -+} QSNETIO_DEBUG_BUFFER_STRUCT; -+#define QSNETIO_DEBUG_BUFFER _IOWR ('e', QSNETIO_USER_BASE + 1, QSNETIO_DEBUG_BUFFER_STRUCT) -+ -+typedef struct qsnetio_debug_kmem_struct -+{ -+ void *handle; -+} QSNETIO_DEBUG_KMEM_STRUCT; -+#define QSNETIO_DEBUG_KMEM _IOWR ('e', QSNETIO_USER_BASE + 2, QSNETIO_DEBUG_KMEM_STRUCT) -+ -+#endif /* __QSNET_CTRL_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/debug.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/debug.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/debug.h 2005-07-28 14:52:52.977659128 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+#ifndef _QSNET_DEBUG_H -+#define _QSNET_DEBUG_H -+ -+#if defined(DIGITAL_UNIX) -+#include -+#elif defined(LINUX) -+extern int qsnet_assfail (char *ex, const char *func, char *file, int line); -+ -+#define ASSERT(EX) do { \ -+ if (!(EX) && qsnet_assfail (#EX, __FUNCTION__, __BASE_FILE__, __LINE__)) { \ -+ BUG(); \ -+ } \ -+} while (0) -+#endif /* DIGITAL_UNIX */ -+ -+/* debug.c */ -+extern void qsnet_debug_init(void); -+extern void qsnet_debug_fini(void); -+extern void qsnet_debug_disable(int); -+extern void qsnet_debug_alloc(void); -+ -+#define QSNET_DEBUG_BUFFER ((unsigned int)(0x01)) -+#define QSNET_DEBUG_CONSOLE ((unsigned int)(0x02)) -+#define QSNET_DEBUG_BUF_CON ( QSNET_DEBUG_BUFFER | QSNET_DEBUG_CONSOLE ) -+ -+#ifdef __GNUC__ -+extern void qsnet_debugf (unsigned int mode, char *fmt, ...) -+ __attribute__ ((format (printf,2,3))); -+extern void kqsnet_debugf (char *fmt, ...) -+ __attribute__ ((format (printf,1,2))); -+#else -+extern void qsnet_debugf (unsigned int mode, char *fmt, ...); -+extern void kqsnet_debugf (char *fmt, ...); -+#endif -+extern void qsnet_vdebugf (unsigned int mode, char * prefix, char *fmt, va_list ap); -+extern int qsnet_debug_buffer(caddr_t ubuffer, int len); -+extern int qsnet_debug_dump (void); -+extern int qsnet_debug_kmem (void *handle); -+ -+extern void qsnet_debug_buffer_on(void); -+extern void qsnet_debug_buffer_clear(void); -+extern void qsnet_debug_buffer_mark(char *str); -+ -+#endif /* _QSNET_DEBUG_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/fence.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/fence.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/fence.h 2005-07-28 14:52:52.977659128 -0400 -@@ -0,0 +1,178 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+/* $Id: fence.h,v 1.21.6.4 2004/11/23 14:34:45 addy Exp $ */ -+/* $Source: /cvs/master/quadrics/qsnet/fence.h,v $*/ -+ -+#ifndef _CONFIG_FENCE_H -+#define _CONFIG_FENCE_H -+ -+#ident "$Id: fence.h,v 1.21.6.4 2004/11/23 14:34:45 addy Exp $" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if defined(__ELAN__) || defined(__ELAN3__) -+ -+/* no memory barriers required on elan3/elan4 */ -+ -+#elif defined QSNET_MEMBARS_ASSERT -+ -+#include -+#define MEMBAR_MEMISSUE() assert(0); -+#define MEMBAR_SYNC() assert(0); -+#define MEMBAR_STORELOAD() assert(0); -+#define MEMBAR_LOADSTORE() assert(0); -+#define MEMBAR_STORESTORE() assert(0); -+#define MEMBAR_LOADLOAD() assert(0); -+#define MEMBAR_VISIBLE() assert(0); -+#define MEMBAR_DRAIN() assert(0); -+ -+#elif defined(__alpha) -+ -+/* Memory barrier instructions */ -+#if defined(__DECC) || defined(__DECXX) -+long asm( const char *,...); -+#pragma intrinsic( asm ) -+#define MEMBAR_MEMISSUE() asm("mb") -+#define MEMBAR_SYNC() asm("mb") -+#define MEMBAR_STORELOAD() asm("wmb") -+#define MEMBAR_LOADSTORE() asm("mb") -+#define MEMBAR_STORESTORE() asm("wmb") -+#define MEMBAR_LOADLOAD() asm("mb") -+#define MEMBAR_VISIBLE() asm("") -+#define MEMBAR_DRAIN() asm("wmb") -+ -+#else -+/* Assume gcc */ -+#define MEMBAR_MEMISSUE() asm volatile ("mb"::) -+#define MEMBAR_SYNC() asm volatile ("mb"::) -+#define MEMBAR_STORELOAD() asm volatile ("wmb"::) -+#define MEMBAR_LOADSTORE() asm volatile ("mb"::) -+#define MEMBAR_STORESTORE() asm volatile ("wmb"::) -+#define MEMBAR_LOADLOAD() asm volatile ("mb"::) -+#define MEMBAR_VISIBLE() asm volatile ("" ::: "memory") -+#define MEMBAR_DRAIN() asm volatile ("wmb"::: "memory") -+ -+#endif /* __DECC */ -+ -+#elif defined(__sparc) -+ -+/* UltraSPARC with WRITE MERGING enabled */ -+#define MEMBAR_MEMISSUE() asm volatile ("membar #MemIssue"); -+#define MEMBAR_SYNC() asm volatile ("membar #Sync"); -+#define MEMBAR_STORELOAD() asm volatile ("membar #StoreLoad"); -+#define MEMBAR_LOADSTORE() asm volatile ("membar #LoadStore"); -+#define MEMBAR_STORESTORE() asm volatile ("membar #StoreStore"); -+#define MEMBAR_LOADLOAD() asm volatile ("membar #LoadLoad"); -+#define MEMBAR_VISIBLE() asm volatile (""::: "memory") -+#define MEMBAR_DRAIN() asm volatile (""::: "memory") -+ -+#elif defined(__linux__) -+ -+#if defined(__INTEL_COMPILER) -+ -+/* NB: Intel compiler version 8.0 now also defines __GNUC__ unless you set the -no-gcc cmdline option -+ * I've moved the check for __INTEL_COMPILER to be first to get around this -+ */ -+#ifdef __ECC -+ -+#include -+ -+#define MEMBAR_MEMISSUE() __mf() -+#define MEMBAR_SYNC() __mf() -+#define MEMBAR_STORELOAD() __mf() -+#define MEMBAR_LOADSTORE() __mf() -+#define MEMBAR_STORESTORE() __mf() -+#define MEMBAR_LOADLOAD() __mf() -+#define MEMBAR_VISIBLE() __mf() -+#define MEMBAR_DRAIN() __mf() -+ -+#else -+ -+#warning Membars not implemented with this compiler. -+#define MEMBAR_MEMISSUE() ; -+#define MEMBAR_SYNC() ; -+#define MEMBAR_STORELOAD() ; -+#define MEMBAR_LOADSTORE() ; -+#define MEMBAR_STORESTORE() ; -+#define MEMBAR_LOADLOAD() ; -+#define MEMBAR_VISIBLE() ; -+#define MEMBAR_DRAIN() ; -+ -+#endif /* __ECC */ -+ -+#elif defined(__GNUC__) -+ -+#ifndef __ia64 -+ -+/* These are needed by on AMD64 */ -+#include -+#include -+ -+#ifndef __cplusplus -+/* this header file has a parameter called "new" - great huh */ -+#include -+#endif -+ -+#else -+# define mb() __asm__ __volatile__ ("mf" ::: "memory") -+# define rmb() mb() -+# define wmb() mb() -+#endif /* !__ia64 */ -+ -+#if defined(__x86_64) || defined(__i386) -+/* For some reason the AMD64 definition (glibc-devel 2.3.X) of this -+ * is not useful (compiler only directive) so we overload it here -+ */ -+/* I don't trust the IA32 header files either as with mtrr enabled -+ * we really need a membar and not a compiler directive -+ * NB: sfence is only available with X86_FEATURE_XMM CPUs -+ */ -+#undef wmb -+#define wmb() asm volatile("sfence":::"memory"); -+#endif /* __x86_64 */ -+ -+#define MEMBAR_MEMISSUE() mb() -+#define MEMBAR_SYNC() mb() -+#define MEMBAR_STORELOAD() wmb() -+#define MEMBAR_LOADSTORE() mb() -+#define MEMBAR_STORESTORE() wmb() -+#define MEMBAR_LOADLOAD() mb() -+ -+#ifdef __ia64 -+#define MEMBAR_VISIBLE() asm volatile ("mf.a;;mf;;"::: "memory") -+#define MEMBAR_DRAIN() asm volatile ("mf;"::: "memory") -+#else -+#define MEMBAR_VISIBLE() asm volatile (""::: "memory") -+#define MEMBAR_DRAIN() wmb() -+#endif -+ -+#else /* elif __GNUC__ */ -+ -+#error Membars not implemented for this architecture/compiler. -+ -+#endif /* __INTEL_COMPILER */ -+ -+#else /* elif __linux__ */ -+ -+#error Membars not implemented for this architecture/compiler. -+ -+#endif -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _CONFIG_FENCE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/kernel.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/kernel.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/kernel.h 2005-07-28 14:52:52.978658976 -0400 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KERNEL_H -+#define __QSNET_KERNEL_H -+ -+#ident "$Id: kernel.h,v 1.8 2003/03/14 10:18:22 mike Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel.h,v $*/ -+ -+#include -+#include -+ -+#if defined(SOLARIS) -+#include -+#endif -+ -+#if defined(DIGITAL_UNIX) -+#include -+#endif -+ -+#if defined(LINUX) -+#include -+#endif -+ -+#include -+ -+#endif /* __QSNET_KERNEL_H */ -+ -+ -+ -+ -+ -+ -+ -Index: linux-2.6.5-7.191/include/qsnet/kernel_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/kernel_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/kernel_linux.h 2005-07-28 14:52:52.978658976 -0400 -@@ -0,0 +1,352 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KERNEL_LINUX_H -+#define __QSNET_KERNEL_LINUX_H -+ -+#ident "$Id: kernel_linux.h,v 1.62.6.6 2005/03/07 16:43:32 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/kernel_linux.h,v $*/ -+ -+#if defined(MODVERSIONS) -+#include -+#endif -+ -+#include -+#include -+ -+ -+/* ASSERT(spin_is_locked(l)) would always fail on UP kernels */ -+#if defined(CONFIG_SMP) -+#define SPINLOCK_HELD(l) spin_is_locked(l) -+#else -+#define SPINLOCK_HELD(l) (1) -+#endif -+ -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+ -+#if defined(LINUX_ALPHA) -+# include /* for TSUNAMI_MEM */ -+#endif -+ -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0) -+# undef MOD_INC_USE_COUNT -+# undef MOD_DEC_USE_COUNT -+# define MOD_INC_USE_COUNT -+# define MOD_DEC_USE_COUNT -+#endif -+ -+#define MIN(a,b) ((a) > (b) ? (b) : (a)) -+#define MAX(a,b) ((a) > (b) ? (a) : (b)) -+ -+/* stray types */ -+typedef u64 u_longlong_t; -+typedef unsigned long uintptr_t; -+typedef int bool_t; -+ -+typedef unsigned long virtaddr_t; /* virtual address */ -+typedef unsigned long ioaddr_t; /* io address */ -+typedef unsigned long sdramaddr_t; /* elan sdram offset */ -+ -+/* 386 kernel can be compiled with PAE enabled to use a 44 bit physical address */ -+#if defined(CONFIG_X86_PAE) -+typedef unsigned long long physaddr_t; -+#else -+typedef unsigned long physaddr_t; -+#endif -+ -+/* ticks since reboot, and tick freq */ -+#define lbolt jiffies -+#define hz HZ -+ -+/* System page size and friends */ -+#define PAGESIZE PAGE_SIZE -+#define PAGESHIFT PAGE_SHIFT -+#define PAGEOFFSET (PAGE_SIZE - 1) -+#define PAGEMASK PAGE_MASK -+ -+#define PAGE_ALIGNED(a) (((a) & PAGE_MASK) == a) -+ -+/* convert between bytes and pages */ -+#define btop(b) ((unsigned long)(b) >> PAGE_SHIFT) /* rnd down */ -+#define btopr(b) btop(PAGE_ALIGN((unsigned long) b)) /* rnd up */ -+#define ptob(p) ((unsigned long)(p) << PAGE_SHIFT) -+ -+/* round up sz to the nearest multiple of blk */ -+#define roundup(sz,blk) ((blk) * ((sz) / (blk) + ((sz) % (blk) ? 1 : 0))) -+ -+/* send a signal to a process */ -+#define psignal(pr,sig) send_sig(sig,pr,0) -+ -+/* microsecond delay */ -+#define DELAY(us) udelay(us) -+ -+/* macro macros */ -+#define MACRO_BEGIN do { -+#define MACRO_END } while (0) -+ -+/* D-Unix compatable errno values */ -+#define ESUCCESS 0 -+#define EFAIL 255 -+ -+/* ASSERT(NO_LOCKS_HELD) will be a no-op */ -+#define NO_LOCKS_HELD 1 -+ -+/* misc */ -+typedef int label_t; -+#define on_fault(ljp) ((ljp) == NULL) -+#define _NOTE(X) -+#define no_fault() ((void) 0) -+#define panicstr 0 -+ -+/* return from system call is -EXXX on linux */ -+#define set_errno(e) (-(e)) -+ -+/* -+ * BSD-style byte ops -+ */ -+ -+#define bcmp(src1,src2,len) memcmp(src1,src2,len) -+#define bzero(dst,len) memset(dst,0,len) -+#define bcopy(src,dst,len) memcpy(dst,src,len) -+ -+#define preemptable_start do { long must_yield_at = lbolt + (hz/10); -+#define preemptable_end } while (0) -+#define preemptable_check() do {\ -+ if ((lbolt - must_yield_at) > 0)\ -+ {\ -+ preemptable_yield() ; \ -+ must_yield_at = lbolt + (hz/10);\ -+ }\ -+ } while (0) -+ -+#define preemptable_yield() schedule() -+ -+#define CURPROC() current -+#define CURTHREAD() current -+#define SUSER() suser() -+ -+/* 64 bit IO operations on 32 bit intel cpus using MMX */ -+#if defined(LINUX_I386) -+extern u64 qsnet_readq (volatile u64 *ptr); -+extern void qsnet_writeq (u64 value, volatile u64 *ptr); -+ -+#define readq(ptr) qsnet_readq((void *) ptr) -+#define writeq(val,ptr) qsnet_writeq(val, (void *)ptr) -+#endif -+ -+/* -+ * Memory barriers -+ */ -+#ifndef mmiob -+# define mmiob() mb() -+#endif -+ -+/* -+ * Exit handlers -+ */ -+#define HANDLER_REGISTER(func,arg,flags) xa_handler_register(func,arg,flags) -+#define HANDLER_UNREGISTER(func,arg,flags) xa_handler_unregister(func,arg,flags) -+ -+/* -+ * KMEM_GETPAGES and KMEM_ALLOC both call kmem_alloc, which -+ * translates the call to kmalloc if < PAGE_SIZE, or vmalloc -+ * if >= PAGE_SIZE. vmalloc will always return a page-aligned -+ * region rounded up to the nearest page, while kmalloc will -+ * return bits and pieces of a page. -+ */ -+ -+#ifdef KMEM_DEBUG -+extern void *qsnet_kmem_alloc_debug(int len, int sleep, int zerofill, char *file, int line); -+extern void qsnet_kmem_free_debug(void *ptr, int len, char *file, int line); -+#define KMEM_ALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc_debug(len,sleep,0,__FILE__,__LINE__); } -+#define KMEM_ZALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc_debug(len,sleep,1,__FILE__,__LINE__); } -+ -+#define KMEM_FREE(ptr,len) qsnet_kmem_free_debug((void *)ptr,len,__FILE__,__LINE__) -+ -+#else -+ -+extern void *qsnet_kmem_alloc(int len, int sleep, int zerofill); -+extern void qsnet_kmem_free(void *ptr, int len); -+ -+#define KMEM_ALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc(len,sleep,0); } -+#define KMEM_ZALLOC(ptr,type,len,sleep) \ -+ { KMEM_ASSERT(sleep); (ptr)=(type)qsnet_kmem_alloc(len,sleep,1); } -+ -+#define KMEM_FREE(ptr,len) qsnet_kmem_free((void *)ptr,len) -+ -+#endif -+extern void qsnet_kmem_display(void *handle); -+extern physaddr_t kmem_to_phys(void *ptr); -+ -+#define KMEM_ASSERT(sleep) ASSERT(!(in_interrupt() && sleep)) -+ -+ -+#define KMEM_GETPAGES(ptr,type,pgs,sleep) KMEM_ZALLOC(ptr,type,ptob(pgs),sleep) -+#define KMEM_FREEPAGES(ptr,pgs) KMEM_FREE(ptr,ptob(pgs)); -+ -+/* -+ * Copying from user space -> kernel space (perms checked) -+ */ -+#define copyin(up,kp,size) copy_from_user(kp,up,size) -+#define copyin_noerr(up,kp,size) copy_from_user(kp,up,size) -+ -+/* get_user() gets xfer width right */ -+#define fulinux(ret, up) (get_user(ret, (up)) == 0 ? ret : -1) -+#define fulinuxp(ret, up) (get_user(ret, (up)) == 0 ? ret : NULL) -+ -+extern __inline__ int fubyte (u8 *up) { u8 ret; return fulinux(ret, up);} -+extern __inline__ int fusword (u16 *up) { u16 ret; return fulinux(ret, up);} -+extern __inline__ int fuword (u32 *up) { u32 ret; return fulinux(ret, up);} -+#if BITS_PER_LONG > 32 -+extern __inline__ u64 fulonglong(u64 *up) { u64 ret; return fulinux(ret, up);} -+#else -+extern __inline__ u64 fulonglong(u64 *up) { return ((u64) fuword((u32 *)up) | (((u64) fuword(((u32 *)up)+1))<<32)); } -+#endif -+extern __inline__ void *fuptr (void **up) { void *ret; return fulinuxp(ret,up);} -+ -+#define fubyte_noerr(up) fubyte(up) -+#define fusword_noerr(up) fusword(up) -+#define fuword_noerr(up) fuword(up) -+#define fulonglong_noerr(up) fulonglong(up) -+#define fuptr_noerr(up) fuptr(up) -+ -+extern __inline__ int copyinstr(char *up, char *kp, int max, int *size) -+{ -+ for (*size = 1; *size <= max; (*size)++) { -+ if (get_user(*kp, up++) != 0) -+ return EFAULT; /* bad user space addr */ -+ if (*kp++ == '\0') -+ return 0; /* success */ -+ } -+ *size = max; -+ return ENAMETOOLONG; /* runaway string */ -+} -+ -+/* -+ * Copying from kernel space -> user space (perms checked) -+ */ -+ -+#define copyout(kp,up,size) copy_to_user(up,kp,size) -+#define copyout_noerr(kp,up,size) copy_to_user(up,kp,size) -+ -+/* put_user() gets xfer width right */ -+#define sulinux(val, up) (put_user(val, (up)) == 0 ? 0 : -1) -+ -+extern __inline__ int subyte (u8 *up, u8 val) { return sulinux(val, up); } -+extern __inline__ int susword (u16 *up, u16 val) { return sulinux(val, up); } -+extern __inline__ int suword (u32 *up, u32 val) { return sulinux(val, up); } -+#if BITS_PER_LONG > 32 -+extern __inline__ int sulonglong(u64 *up, u64 val) { return sulinux(val, up); } -+#else -+extern __inline__ int sulonglong(u64 *up, u64 val) { return (suword((u32 *) up, (u32) val) == 0 ? -+ suword(((u32 *) up)+1, (u32) (val >> 32)) : -1); } -+#endif -+extern __inline__ int suptr (void **up,void *val){ return sulinux(val, up); } -+ -+#define subyte_noerr(up,val) subyte(up,val) -+#define susword_noerr(up,val) susword(up,val) -+#define suword_noerr(up,val) suword(up,val) -+#define sulonglong_noerr(up,val) sulonglong(up,val) -+#define suptr_noerr(up,val) suptr(up,val) -+ -+/* -+ * /proc/qsnet interface -+ */ -+extern inline int -+str_append(char *buf, char *add, int size) -+{ -+#define TRUNC_MSG "[Output truncated]\n" -+ int full = 0; -+ int max = size - strlen(TRUNC_MSG) - strlen(add) - 1; -+ -+ if (strlen(buf) > max) { -+ strcat(buf, TRUNC_MSG); -+ full = 1; -+ } else -+ strcat(buf, add); -+ return full; -+} -+ -+/* Spinlocks */ -+#define spin_lock_destroy(l) ((void) 0) -+ -+/* Complex - Reader/Writer locks - we added */ -+typedef crwlock_t krwlock_t; -+#define krwlock_init(l) crwlock_init(l) -+#define krwlock_destroy(l) crwlock_destroy(l) -+#define krwlock_write(l) crwlock_write(l) -+#define krwlock_read(l) crwlock_read(l) -+#define krwlock_done(l) crwlock_done(l) -+#define krwlock_is_locked(l) crwlock_held(l) -+#define krwlock_is_write_locked(l) crwlock_write_held(l) -+#define krwlock_is_read_locked(l) crwlock_read_held(l) -+ -+/* -+ * Timeouts - Solaris style. -+ */ -+typedef struct timer_list timer_fn_t; -+ -+extern inline void -+schedule_timer_fn(timer_fn_t *timer, void (*fun)(void *), void *arg, long hz_delay) -+{ -+ init_timer(timer); -+ -+ timer->function = (void (*)(unsigned long)) fun; -+ timer->data = (unsigned long) arg; -+ timer->expires = jiffies + hz_delay; -+ -+ add_timer(timer); -+} -+ -+/* returns 1 if timer_fn was cancelled */ -+extern inline int -+cancel_timer_fn(timer_fn_t *timer) -+{ -+ return (del_timer_sync(timer)); -+} -+ -+extern inline int -+timer_fn_queued(timer_fn_t *timer) -+{ -+ return (timer_pending (timer)); -+} -+/* -+ * Hold/release CPU's. -+ */ -+ -+extern void cpu_hold_all(void); -+extern void cpu_release_all(void); -+#define CAPTURE_CPUS() cpu_hold_all() -+#define RELEASE_CPUS() cpu_release_all() -+ -+#define IASSERT ASSERT -+ -+#endif /* __QSNET_KERNEL_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/kpte.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/kpte.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/kpte.h 2005-07-28 14:52:52.979658824 -0400 -@@ -0,0 +1,109 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KPTE_H -+#define __QSNET_KPTE_H -+ -+#ident "@(#)$Id: kpte.h,v 1.1.2.2 2005/03/02 09:51:49 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/qsnet/kpte.h,v $*/ -+ -+#include -+ -+#ifdef NO_RMAP -+# define pte_offset_kernel pte_offset -+# define pte_offset_map pte_offset -+# define pte_unmap(A) do { ; } while (0) -+#endif -+ -+/* -+ * Pte stuff -+ */ -+static __inline__ struct mm_struct * -+get_kern_mm(void) -+{ -+ return &init_mm; -+} -+ -+static __inline__ pte_t * -+find_pte_map(struct mm_struct *mm, unsigned long vaddr) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *ptep; -+ -+/* XXXX - handle hugh tlb code */ -+ pgd = pgd_offset(mm, vaddr); -+ if (pgd_none(*pgd) || pgd_bad(*pgd)) -+ goto out; -+ -+ pmd = pmd_offset(pgd, vaddr); -+ if (pmd_none(*pmd) || pmd_bad (*pmd)) -+ goto out; -+ -+ ptep = pte_offset_map (pmd, vaddr); -+ if (! ptep) -+ goto out; -+ -+ if (pte_present (*ptep)) -+ return ptep; -+ -+ pte_unmap (ptep); -+out: -+ return NULL; -+} -+ -+static __inline__ pte_t * -+find_pte_kernel(unsigned long vaddr) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ pgd = pgd_offset_k(vaddr); -+ if (pgd && !pgd_none(*pgd)) { -+ pmd = pmd_offset(pgd, vaddr); -+ if (pmd && pmd_present(*pmd)) { -+ pte = pte_offset_kernel(pmd, vaddr); -+ if (pte && pte_present(*pte)) -+ return (pte); -+ } -+ } -+ return (NULL); -+} -+ -+static __inline__ physaddr_t -+pte_phys(pte_t pte) -+{ -+#if defined(LINUX_ALPHA) -+ /* RedHat 7.1 2.4.3-12 -+ * They have now enabled Monster windows on Tsunami -+ * and so can use the Main's phys pte value -+ */ -+ return (pte_val(pte) >> (32-PAGE_SHIFT)); -+#elif defined(LINUX_I386) || defined(LINUX_X86_64) -+#if defined(_PAGE_NX) -+ return (pte_val(pte) & ~((1 << PAGE_SHIFT)-1) & ~_PAGE_NX); -+#else -+ return (pte_val(pte) & ~((1 << PAGE_SHIFT)-1)); -+#endif -+#elif defined(LINUX_SPARC) -+ return (pte_val(pte) & _PAGE_PADDR); -+#elif defined(LINUX_IA64) -+ return (pte_val(pte) & _PFN_MASK); -+#else -+#error Unknown architecture -+#endif -+} -+ -+#endif /* __QSNET_KPTE_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/kthread.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/kthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/kthread.h 2005-07-28 14:52:52.979658824 -0400 -@@ -0,0 +1,71 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * Copyright (c) 2002-2004 by Quadrics Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __QSNET_KTHREAD_H -+#define __QSNET_KTHREAD_H -+ -+#ident "@(#)$Id: kthread.h,v 1.1 2004/10/28 11:50:29 david Exp $ $Name: QSNETMODULES-4-31_20050321 $" -+/* $Source: /cvs/master/quadrics/qsnet/kthread.h,v $*/ -+ -+#include -+ -+/* -+ * kernel threads -+ */ -+extern __inline__ void -+kernel_thread_init(char *comm) -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#ifndef NO_NPTL -+# define sigmask_lock sighand->siglock -+#endif -+ lock_kernel(); -+ daemonize(); -+ reparent_to_init(); -+ -+ /* avoid getting signals */ -+ spin_lock_irq(¤t->sigmask_lock); -+ flush_signals(current); -+ sigfillset(¤t->blocked); -+ -+#ifdef NO_NPTL -+ recalc_sigpending(current); -+#else -+ recalc_sigpending(); -+#endif -+ -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ /* set our name for identification purposes */ -+ strncpy(current->comm, comm, sizeof(current->comm)); -+ -+ unlock_kernel(); -+#else -+ daemonize(comm); -+#endif -+} -+ -+extern __inline__ void * -+kernel_thread_wrap(caddr_t stk, int stksize, void (*proc)(void *), void *arg) -+{ -+ ASSERT(stk == NULL && stksize == 0); -+ kernel_thread((int (*)(void *))proc, arg, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); -+ return (void *)1; /* non-null value */ -+} -+ -+#define kernel_thread_create(proc,arg) kernel_thread_wrap(NULL,0,(void (*)(void *))proc,arg) -+#define kernel_thread_exit() ((void) 0) -+#define kernel_thread_become_highpri() ((void) 0) -+ -+#endif /* __QSNET_KTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/list.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/list.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/list.h 2005-07-28 14:52:52.979658824 -0400 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Limited. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: list.h,v 1.5 2003/10/27 13:55:33 david Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/list.h,v $*/ -+ -+#ifndef __QSNET_LIST_H -+#define __QSNET_LIST_H -+ -+/* Implementation of doubly linked lists - compatible with linux */ -+struct list_head -+{ -+ struct list_head *next; -+ struct list_head *prev; -+}; -+ -+#if !defined(LINUX) -+#if ! defined( offsetof ) -+#define offsetof(T,F) ((int )&(((T *)0)->F)) -+#endif -+ -+#define LIST_HEAD_INIT(name) { &(name), &(name) } -+ -+#define LIST_HEAD(name) \ -+ struct list_head name = LIST_HEAD_INIT(name) -+#endif -+ -+#define list_entry(ptr, type, off) \ -+ ((type *) ((unsigned long)(ptr) - offsetof (type,off))) -+ -+#define INIT_LIST_HEAD(list) \ -+MACRO_BEGIN \ -+ (list)->next = (list)->prev = (list); \ -+MACRO_END -+ -+#define list_add(new, list) \ -+MACRO_BEGIN \ -+ (list)->next->prev = (new); \ -+ (new)->next = (list)->next; \ -+ (new)->prev = (list); \ -+ (list)->next = (new); \ -+MACRO_END -+ -+#define list_add_tail(new, list) \ -+MACRO_BEGIN \ -+ (list)->prev->next = new; \ -+ (new)->prev = (list)->prev; \ -+ (new)->next = (list); \ -+ (list)->prev = (new); \ -+MACRO_END -+ -+#define list_del(entry) \ -+MACRO_BEGIN \ -+ (entry)->prev->next = (entry)->next; \ -+ (entry)->next->prev = (entry)->prev; \ -+MACRO_END -+ -+#define list_del_init(entry) \ -+MACRO_BEGIN \ -+ (entry)->prev->next = (entry)->next; \ -+ (entry)->next->prev = (entry)->prev; \ -+ (entry)->next = (entry)->prev = (entry); \ -+MACRO_END -+ -+#define list_empty(list) \ -+ ((list)->next == (list)) -+ -+#define list_for_each(pos,list) \ -+ for (pos = (list)->next; pos != (list); \ -+ pos = (pos)->next) -+ -+#define list_for_each_safe(pos,n,list) \ -+ for (pos = (list)->next, n = (pos)->next; pos != (list); \ -+ pos = n, n = (pos)->next) -+ -+#endif /* __QSNET_LIST_H */ -Index: linux-2.6.5-7.191/include/qsnet/mutex.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/mutex.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/mutex.h 2005-07-28 14:52:52.980658672 -0400 -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2000 Regents of the University of California -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ */ -+ -+#if !defined(_LINUX_MUTEX_H) -+#define _LINUX_MUTEX_H -+#if defined(__KERNEL__) -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define PID_NONE 0 -+ -+typedef struct -+{ -+ struct semaphore sem; -+ pid_t holder; -+} kmutex_t; -+ -+extern __inline__ void -+kmutex_init (kmutex_t *l) -+{ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0) -+ l->sem = MUTEX; -+#else -+ init_MUTEX(&l->sem); -+#endif -+ l->holder = PID_NONE; -+} -+ -+extern __inline__ void -+kmutex_destroy (kmutex_t *l) -+{ -+ ASSERT (l->holder == PID_NONE); -+} -+ -+extern __inline__ void -+kmutex_lock (kmutex_t *l) -+{ -+ ASSERT(l->holder != current->pid); -+ down (&l->sem); -+ l->holder = current->pid; -+} -+ -+extern __inline__ void -+kmutex_unlock (kmutex_t *l) -+{ -+ ASSERT(l->holder == current->pid); -+ -+ l->holder = PID_NONE; -+ up (&l->sem); -+} -+ -+extern __inline__ int -+kmutex_trylock (kmutex_t *l) -+{ -+ if (down_trylock (&l->sem) == 0) -+ { -+ l->holder = current->pid; -+ return (1); -+ } -+ return (0); -+} -+ -+extern __inline__ int -+kmutex_is_locked (kmutex_t *l) -+{ -+ return (l->holder == current->pid); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* _LINUX_MUTEX_H */ -Index: linux-2.6.5-7.191/include/qsnet/procfs_linux.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/procfs_linux.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/procfs_linux.h 2005-07-28 14:52:52.980658672 -0400 -@@ -0,0 +1,234 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ifndef __PROCFS_LINUX_H -+#define __PROCFS_LINUX_H -+ -+#ident "$Id: procfs_linux.h,v 1.6.2.6 2004/12/06 17:36:24 robin Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/procfs_linux.h,v $ */ -+ -+#if defined(__KERNEL__) -+ -+#include -+#include -+#include -+ -+extern gid_t qsnet_procfs_gid; -+ -+/* borrowed from fs/proc/proc_misc - helper for proc_read_int */ -+static inline int -+qsnet_proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) -+{ -+ if (len <= off+count) *eof = 1; -+ *start = page + off; -+ len -= off; -+ if (len>count) len = count; -+ if (len<0) len = 0; -+ return len; -+} -+ -+static inline int -+qsnet_proc_write_int(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char tmpbuf[16]; -+ int res = count; -+ -+ if (count > sizeof(tmpbuf) - 1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user(tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ *(int *)data = simple_strtoul(tmpbuf, NULL, 0); -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_int(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ MOD_INC_USE_COUNT; -+ -+ len = sprintf(page, "%d\n", *(int *)data); -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_int(struct proc_dir_entry *dir, char *path, int *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_int; -+ p->read_proc = qsnet_proc_read_int; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+static inline int -+qsnet_proc_write_hex(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ char tmpbuf[16]; -+ int res = count; -+ -+ if (count > sizeof(tmpbuf) - 1) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user(tmpbuf, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ tmpbuf[count] = '\0'; -+ *(int *)data = simple_strtoul(tmpbuf, NULL, 0); -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_hex(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ MOD_INC_USE_COUNT; -+ -+ len = sprintf(page, "0x%x\n", *(int *)data); -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_hex(struct proc_dir_entry *dir, char *path, int *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_hex; -+ p->read_proc = qsnet_proc_read_hex; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+#define QSNET_PROC_STR_LEN_MAX ((int)256) -+ -+static inline int -+qsnet_proc_write_str(struct file *file, const char *buf, unsigned long count, void *data) -+{ -+ int res = count; -+ -+ if (count > (QSNET_PROC_STR_LEN_MAX - 1)) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ if (copy_from_user((char *)data, buf, count)) -+ res = -EFAULT; -+ else -+ { -+ ((char *)data)[count] = '\0'; -+ /* remove linefeed */ -+ if ( (count) && (((char *)data)[count -1] == '\n')) -+ ((char *)data)[count -1] = '\0'; -+ } -+ MOD_DEC_USE_COUNT; -+ -+ return (res); -+} -+ -+static inline int -+qsnet_proc_read_str(char *page, char **start, off_t off, int count, int *eof, void *data) -+{ -+ int len, res; -+ -+ if ( strlen(data) > (count + 1)) -+ return (-EINVAL); -+ -+ MOD_INC_USE_COUNT; -+ -+ /* cant output too much */ -+ if ( strlen(data) > (count + 1)) -+ { -+ MOD_DEC_USE_COUNT; -+ return (-EINVAL); -+ } -+ -+ -+ len = sprintf(page, "%s\n", (char *)data); -+ if (len > count) -+ { -+ MOD_DEC_USE_COUNT; -+ return (-EINVAL); -+ } -+ -+ res = qsnet_proc_calc_metrics(page, start, off, count, eof, len); -+ -+ MOD_DEC_USE_COUNT; -+ return (res); -+} -+ -+static inline struct proc_dir_entry * -+qsnet_proc_register_str(struct proc_dir_entry *dir, char *path, char *var, int read_only) -+{ -+ struct proc_dir_entry *p; -+ -+ p = create_proc_entry(path, read_only ? S_IRUGO : S_IRUGO|S_IWUSR|S_IWGRP, dir); -+ if (p) { -+ if (! read_only) -+ p->write_proc = qsnet_proc_write_str; -+ p->read_proc = qsnet_proc_read_str; -+ p->data = var; -+ p->owner = THIS_MODULE; -+ p->gid = qsnet_procfs_gid; -+ } -+ return p; -+} -+ -+extern struct proc_dir_entry *qsnet_procfs_root; -+extern struct proc_dir_entry *qsnet_procfs_config; -+ -+#ifdef NO_PDE -+static inline struct proc_dir_entry *PDE(const struct inode *inode) -+{ -+ return inode->u.generic_ip; -+} -+#endif -+#endif /* __KERNEL__ */ -+ -+#define QSNET_PROCFS_IOCTL "/proc/qsnet/ioctl" -+#define QSNET_PROCFS_KMEM_DEBUG "/proc/qsnet/kmem_debug" -+#define QSNET_PROCFS_VERSION "/proc/qsnet/version" -+ -+#endif /* __PROCFS_LINUX_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/pthread.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/pthread.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/pthread.h 2005-07-28 14:52:52.980658672 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (c) 2003 by Quadrics Supercomputers World Ltd. ++/* IOPROC SYNC PAGE + * -+ * For licensing information please see the supplied COPYING file ++ * Called when a memory map is synchronised with its disk image i.e. when the ++ * msync() syscall is invoked. Any future read or write to the associated page ++ * by the IOPROC should cause the page to be marked as referenced or modified. + * -+ */ -+ -+/* $Id: pthread.h,v 1.5 2004/06/07 10:47:06 addy Exp $ */ -+/* $Source: /cvs/master/quadrics/qsnet/pthread.h,v $*/ -+ -+#ifndef _CONFIG_PTHREAD_H -+#define _CONFIG_PTHREAD_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if defined(__ELAN__) -+ -+/* No pthread support on Elan co-processor */ -+ -+#define MUTEX unsigned long long -+#define MUTEX_INIT(X) ; -+#define MUTEX_LOCK(X) ; -+#define MUTEX_UNLOCK(X) ; -+ -+#else -+#if defined(DIGITAL_UNIX) -+#include -+#define MUTEX pthread_mutex_t -+#define MUTEX_INIT(X) tis_mutex_init(X) -+#define MUTEX_LOCK(X) tis_mutex_lock(X) -+#define MUTEX_UNLOCK(X) tis_mutex_unlock(X) -+#define MUTEX_TRYLOCK(X) (tis_mutex_trylock(X) == 0) -+ -+#else /* Linux... */ -+ -+/* Use standard pthread calls */ -+#include -+#define MUTEX pthread_mutex_t -+#define MUTEX_INIT(X) pthread_mutex_init(X, NULL) -+#define MUTEX_LOCK(X) pthread_mutex_lock(X) -+#define MUTEX_UNLOCK(X) pthread_mutex_unlock(X) -+#define MUTEX_TRYLOCK(X) (pthread_mutex_trylock(X) == 0) -+ -+#endif /* DIGITAL_UNIX */ -+#endif /* __ELAN__ */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _CONFIG_PTHREAD_H */ -+ -+/* -+ * Local variables: -+ * c-file-style: "stroustrup" -+ * End: -+ */ -Index: linux-2.6.5-7.191/include/qsnet/statsformat.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/statsformat.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/statsformat.h 2005-07-28 14:52:52.981658520 -0400 -@@ -0,0 +1,25 @@ -+#ifndef _QSNET_STATSFORMAT_H -+#define _QSNET_STATSFORMAT_H -+ -+#ident "$Id: statsformat.h,v 1.2 2003/05/22 19:37:14 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/statsformat.h,v $*/ -+ -+#include -+ -+/* -+ * format of an Elan stats record ++ * Not currently called as msync() calls ioproc_sync_range() instead + * -+ * type char(8), type of statistic, e.g. FPAGE, ELAN3, TPORT -+ * time uint64, 10 digits, time in millisecs since counters initialised -+ * device uint, 2 digits, Elan device id -+ * name char(32), name of the statistic -+ * value uint64, current value of statistic ++ * Called holding the mm->page_table_lock + */ -+ -+#ifdef _ILP32 -+#define ELAN_STATSFORMAT "%-8s %10llu %2d %-32s %llu\n" -+#else -+#define ELAN_STATSFORMAT "%-8s %10lu %2d %-32s %lu\n" -+#endif ++static inline void ++ioproc_sync_page(struct vm_area_struct *vma, unsigned long addr) ++{ ++ struct ioproc_ops *cp; + -+#endif -Index: linux-2.6.5-7.191/include/qsnet/types.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/types.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/types.h 2005-07-28 14:52:52.981658520 -0400 -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->sync_page) ++ cp->sync_page(cp->arg, vma, addr); ++} ++ ++/* IOPROC INVALIDATE PAGE ++ * ++ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the ++ * user or paged out by the kernel. + * -+ * For licensing information please see the supplied COPYING file ++ * After this call the IOPROC must not access the physical memory again unless ++ * a new translation is loaded. + * ++ * Called holding the mm->page_table_lock + */ ++static inline void ++ioproc_invalidate_page(struct vm_area_struct *vma, unsigned long addr) ++{ ++ struct ioproc_ops *cp; + -+#ifndef __QSNET_TYPES_H -+#define __QSNET_TYPES_H -+ -+#ident "$Id: types.h,v 1.16 2003/08/01 16:21:38 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/types.h,v $*/ ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->invalidate_page) ++ cp->invalidate_page(cp->arg, vma, addr); ++} + -+/* -+ * Include typedefs for ISO/IEC 9899:1990 standard types -+ * ++/* IOPROC UPDATE PAGE + * -+ * The following integer typedefs are used: ++ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk ++ * up, when breaking COW or faulting in an anoymous page of memory. + * -+ * int8_t, int16_t, int32_t, int64_t, intptr_t -+ * uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t -+ * uchar_t, ushort_t, uint_t, ulong_t ++ * These give the IOPROC device the opportunity to load translations ++ * speculatively, which can improve performance by avoiding device translation ++ * faults. + * -+ * also defines the following: -+ * u_char, u_short, u_int, u_long, caddr_t ++ * Called holding the mm->page_table_lock + */ ++static inline void ++ioproc_update_page(struct vm_area_struct *vma, unsigned long addr) ++{ ++ struct ioproc_ops *cp; + -+#include -+ -+#if defined(SOLARIS) && defined(__KERNEL__) -+# include -+#endif -+ -+#if defined(SOLARIS) && !defined(__KERNEL__) -+# include -+# include -+#endif -+ -+#if defined(DIGITAL_UNIX) && defined(__KERNEL__) -+# include -+#endif ++ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next) ++ if (cp->update_page) ++ cp->update_page(cp->arg, vma, addr); ++} + -+#if defined(DIGITAL_UNIX) && !defined(__KERNEL__) -+# include -+# include -+#endif ++#else + -+#if defined(LINUX) && defined(__KERNEL__) -+# include -+#endif ++/* ! CONFIG_IOPROC so make all hooks empty */ + -+#if defined(LINUX) && !defined(__KERNEL__) -+# include -+# include -+# include ++#define ioproc_release(mm) do { } while (0) + -+typedef unsigned char uchar_t; -+typedef unsigned short ushort_t; -+typedef unsigned int uint_t; -+typedef unsigned long ulong_t; -+#endif ++#define ioproc_sync_range(vma,start,end) do { } while (0) + -+#if defined(QNX) -+# include -+# include -+#endif ++#define ioproc_invalidate_range(vma, start,end) do { } while (0) + -+/* Define a type that will represent a Main CPU pointer -+ * on both the Main and the Elan -+ */ -+#ifdef __ELAN__ ++#define ioproc_update_range(vma, start, end) do { } while (0) + -+#if defined(_MAIN_LP64) -+#define QSNET_MAIN_PTR uint64_t -+#else -+#define QSNET_MAIN_PTR uint32_t -+#endif ++#define ioproc_change_protection(vma, start, end, prot) do { } while (0) + -+#else ++#define ioproc_sync_page(vma, addr) do { } while (0) + -+#ifdef _LP64 -+#define QSNET_MAIN_PTR uint64_t -+#else -+#define QSNET_MAIN_PTR uint32_t -+#endif ++#define ioproc_invalidate_page(vma, addr) do { } while (0) + -+#endif ++#define ioproc_update_page(vma, addr) do { } while (0) + ++#endif /* CONFIG_IOPROC */ + -+#endif /* __QSNET_TYPES_H */ -Index: linux-2.6.5-7.191/include/qsnet/workarounds.h ++#endif /* __LINUX_IOPROC_H__ */ +Index: LINUX-SRC-TREE/include/linux/ptrack.h =================================================================== ---- linux-2.6.5-7.191.orig/include/qsnet/workarounds.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/qsnet/workarounds.h 2005-07-28 14:52:52.981658520 -0400 -@@ -0,0 +1,24 @@ +--- /dev/null ++++ LINUX-SRC-TREE/include/linux/ptrack.h +@@ -0,0 +1,65 @@ +/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file ++ * Copyright (C) 2000 Regents of the University of California + * -+ */ -+ -+#ifndef _QSNET_WORKAROUNDS_H -+#define _QSNET_WORKAROUNDS_H -+ -+#ident "$Id: workarounds.h,v 1.11 2002/08/09 11:15:55 addy Exp $" -+/* $Source: /cvs/master/quadrics/qsnet/workarounds.h,v $ */ -+ -+/* Elan workarounds */ -+#undef ELAN_REVA_SUPPORTED /* rev a elans no longer supported. */ -+#undef ELITE_REVA_SUPPORTED /* removed since RMS disables broadcast on rev A elites. */ -+#define ELAN_REVB_BUG_1 -+/* WORKAROUND for GNAT hw-elan3/3263 */ -+#define ELAN_REVB_BUG_2 -+ -+/* WORKAROUND for GNATs ic-elan3/3637 & ic-elan3/3550 */ -+#define ELAN_REVB_BUG_3 -+ -+#endif /* _QSNET_WORKAROUNDS_H */ -Index: linux-2.6.5-7.191/include/rms/rmscall.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/rms/rmscall.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/rms/rmscall.h 2005-07-28 14:52:52.982658368 -0400 -@@ -0,0 +1,144 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. + * -+ * For licensing information please see the supplied COPYING file ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * -+ * rmscall.h: user interface to rms kernel module ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * -+ * $Id: rmscall.h,v 1.25 2004/05/14 08:55:57 duncan Exp $ -+ * $Source: /cvs/master/quadrics/rmsmod/rmscall.h,v $ ++ * Derived from exit_actn.c by ++ * Copyright (C) 2003 Quadrics Ltd. + * + */ ++#ifndef __LINUX_PTRACK_H ++#define __LINUX_PTRACK_H + -+#ifndef RMSCALL_H_INCLUDED -+#define RMSCALL_H_INCLUDED 1 -+ -+#ident "$Id: rmscall.h,v 1.25 2004/05/14 08:55:57 duncan Exp $" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+ * flags for rms_fork_register -+ * -+ * RMS_IOF is not in a public header file ++/* ++ * Process tracking - this allows a module to keep track of processes ++ * in order that it can manage all tasks derived from a single process. + */ -+#define RMS_IOF 1 /* inherit on fork */ -+ -+#ifndef __KERNEL__ -+#include -+#endif -+ -+#include -+#include -+ -+#define MAXCOREPATHLEN 32 -+ -+#if defined(SOLARIS) -+typedef long long rmstime_t; -+#else /* DIGITAL_UNIX */ -+typedef long rmstime_t; -+#endif -+ -+typedef enum { -+ -+ PRG_RUNNING = 0x01, /* program is running */ -+ PRG_ZOMBIE = 0x02, /* last process on a node has exited */ -+ PRG_NODE = 0x04, /* stats are complete for this node */ -+ PRG_KILLED = 0x08, /* program was killed */ -+ PRG_SUSPEND = 0x10 /* program is suspended */ + -+} PRGSTATUS_FLAGS; ++#define PTRACK_PHASE_CLONE 1 ++#define PTRACK_PHASE_CLONE_FAIL 2 ++#define PTRACK_PHASE_EXEC 3 ++#define PTRACK_PHASE_EXIT 4 + -+/* -+ * program time statistics extended in version 5 of the kernel module -+ */ -+typedef struct { -+ rmstime_t etime; /* elapsed cpu time (milli-secs) */ -+ rmstime_t atime; /* allocated cpu time (cpu milli-secs) */ -+ rmstime_t utime; /* user cpu time (cpu milli-secs) */ -+ rmstime_t stime; /* system cpu time (cpu milli-secs) */ -+ int ncpus; /* number of cpus allocated */ -+ int flags; /* program status flags */ -+ int mem; /* max memory size in MBytes */ -+ int pageflts; /* number of page faults */ -+ rmstime_t memint; /* memory integral */ -+} prgstats_old_t; ++#define PTRACK_FINISHED 0 ++#define PTRACK_INNHERIT 1 ++#define PTRACK_DENIED 2 + -+typedef struct { -+ uint64_t etime; /* elapsed cpu time (milli-secs) */ -+ uint64_t atime; /* allocated cpu time (cpu milli-secs) */ -+ uint64_t utime; /* user cpu time (cpu milli-secs) */ -+ uint64_t stime; /* system cpu time (cpu milli-secs) */ -+ uint64_t pageflts; /* number of page faults */ -+ uint64_t memint; /* memory integral */ -+ uint64_t ebytes; /* data transferred by the Elan(s) */ -+ uint64_t exfers; /* number of Elan data transfers */ -+ uint64_t spare64[4]; /* expansion space */ -+ int ncpus; /* number of cpus allocated */ -+ int flags; /* program status flags */ -+ int mem; /* max memory size in MBytes */ -+ int spare32[5]; /* expansion space */ -+} prgstats_t; ++#ifdef CONFIG_PTRACK + -+int rmsmod_init(void); -+void rmsmod_fini(void); ++typedef int (*ptrack_callback_t)(void *arg, int phase, struct task_struct *child); + -+int rms_setcorepath(caddr_t path); -+int rms_getcorepath(pid_t pid, caddr_t path, int maxlen); -+int rms_prgcreate(int id, uid_t uid, int cpus); -+int rms_prgdestroy(int id); -+int rms_prgids(int maxids, int *prgids, int *nprgs); -+int rms_prginfo(int id, int maxpids, pid_t *pids, int *nprocs); -+int rms_prgaddcap(int id, int index, ELAN_CAPABILITY *cap); ++struct ptrack_desc { ++ struct list_head link; ++ ptrack_callback_t callback; ++ void *arg; ++}; + -+int rms_prgsuspend(int id); -+int rms_prgresume(int id); -+int rms_prgsignal(int id, int signo); ++extern int ptrack_register (ptrack_callback_t callback, void *arg); ++extern void ptrack_deregister (ptrack_callback_t callback, void *arg); ++extern int ptrack_registered (ptrack_callback_t callback, void *arg); + -+int rms_getprgid(pid_t pid, int *id); -+int rms_ncaps(int *ncaps); -+int rms_getcap(int index, ELAN_CAPABILITY *cap); -+int rms_mycap(int *index); -+int rms_setcap(int index, int ctx); -+int rms_prefcap(int nprocess, int *index); ++extern int ptrack_call_callbacks (int phase, struct task_struct *child); + -+int rms_prggetstats(int id, prgstats_t *stats); -+void rms_accumulatestats(prgstats_t *total, prgstats_t *stats); -+char *rms_statsreport(prgstats_t *stats, char *buf); ++#define INIT_TASK_PTRACK(tsk) \ ++ .ptrack_list = LIST_HEAD_INIT(tsk.ptrack_list) + -+int rms_elaninitdone(int vp); -+int rms_prgelanpids(int id, int maxpids, int *vps, pid_t *pids, int *npids); -+int rms_setelanstats(int id, uint64_t ebytes, uint64_t exfers); ++#else ++#define ptrack_call_callbacks (phase, child) (0) + -+int rms_setpset(int psid); -+int rms_getpset(int id, int *psid); -+int rms_modversion(); ++#define INIT_TASK_PTRACK(tsk) + -+#ifdef __cplusplus -+} +#endif + -+ -+#if defined(__KERNEL__) -+ -+int rms_init(void); -+int rms_fini(void); -+int rms_reconfigure(void); -+ -+extern int rms_debug; -+ -+#if 1 -+#define DBG(x) do if (rms_debug) x ; while (0) -+#else -+#define DBG(x) ++#endif /* __LINUX_PTRACK_H */ +Index: LINUX-SRC-TREE/include/linux/sched.h +=================================================================== +--- LINUX-SRC-TREE.orig/include/linux/sched.h ++++ LINUX-SRC-TREE/include/linux/sched.h +@@ -188,6 +188,9 @@ asmlinkage void schedule(void); + extern int max_timeslice, min_timeslice; + + struct namespace; ++#ifdef CONFIG_IOPROC ++struct ioproc_ops; +#endif + + /* Maximum number of active map areas.. This is a random (large) number */ + #define DEFAULT_MAX_MAP_COUNT 65536 +@@ -241,6 +244,11 @@ struct mm_struct { + struct kioctx default_kioctx; + + unsigned long hiwater_rss, hiwater_vm; + ++#ifdef CONFIG_IOPROC ++ /* hooks for io devices with advanced RDMA capabilities */ ++ struct ioproc_ops *ioproc_ops; +#endif -+ -+#endif /* RMSCALL_H_INCLUDED */ -+ -+ -+ -+ -Index: linux-2.6.5-7.191/include/rms/rmsio.h -=================================================================== ---- linux-2.6.5-7.191.orig/include/rms/rmsio.h 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/include/rms/rmsio.h 2005-07-28 14:52:52.982658368 -0400 -@@ -0,0 +1,185 @@ -+/* -+ * Copyright (c) 1996-2002 by Quadrics Supercomputers World Ltd. -+ * -+ * For licensing information please see the supplied COPYING file -+ * -+ */ -+ -+#ident "@(#)$Id: rmsio.h,v 1.6 2004/05/14 08:55:57 duncan Exp $" -+/* $Source: /cvs/master/quadrics/rmsmod/rmsio.h,v $*/ -+ -+ -+#ifndef __RMSMOD_RMSIO_H -+#define __RMSMOD_RMSIO_H -+ -+/* arg is corepath string */ -+#define RMSIO_SETCOREPATH _IOW ('r', 1, char) -+ -+typedef struct rmsio_getcorepath_struct -+{ -+ pid_t pid; -+ char *corepath; -+ int maxlen; -+} RMSIO_GETCOREPATH_STRUCT; -+#define RMSIO_GETCOREPATH _IOW ('r', 2, RMSIO_GETCOREPATH_STRUCT) -+ -+typedef struct rmsio_prgcreate_struct -+{ -+ int id; -+ uid_t uid; -+ int cpus; -+} RMSIO_PRGCREATE_STRUCT; -+#define RMSIO_PRGCREATE _IOW ('r', 3, RMSIO_PRGCREATE_STRUCT) -+ -+typedef struct rmsio_prginfo_struct -+{ -+ int id; -+ int maxpids; -+ pid_t *pids; -+ int *nprocs; -+} RMSIO_PRGINFO_STRUCT; -+#define RMSIO_PRGINFO _IOW ('r', 4, RMSIO_PRGINFO_STRUCT) -+ -+typedef struct rmsio_prgsignal_struct -+{ -+ int id; -+ int signo; -+} RMSIO_PRGSIGNAL_STRUCT; -+#define RMSIO_PRGSIGNAL _IOW ('r', 5, RMSIO_PRGSIGNAL_STRUCT) -+ -+typedef struct rmsio_prgaddcap_struct -+{ -+ int id; -+ int index; -+ ELAN_CAPABILITY *cap; -+} RMSIO_PRGADDCAP_STRUCT; -+#define RMSIO_PRGADDCAP _IOW ('r', 6, RMSIO_PRGADDCAP_STRUCT) -+typedef struct rmsio_setcap_struct -+{ -+ int index; -+ int ctx; -+} RMSIO_SETCAP_STRUCT; -+#define RMSIO_SETCAP _IOW ('r', 7, RMSIO_SETCAP_STRUCT) -+ -+typedef struct rmsio_getcap_struct -+{ -+ int index; -+ ELAN_CAPABILITY *cap; -+} RMSIO_GETCAP_STRUCT; -+#define RMSIO_GETCAP _IOW ('r', 8, RMSIO_GETCAP_STRUCT) -+ -+typedef struct rmsio_getcap_struct32 -+{ -+ int index; -+ unsigned int capptr; -+} RMSIO_GETCAP_STRUCT32; -+#define RMSIO_GETCAP32 _IOW ('r', 8, RMSIO_GETCAP_STRUCT32) -+ -+/* arg is pointer to ncaps */ -+#define RMSIO_NCAPS _IOW ('r', 9, int) -+ -+typedef struct rmsio_prggetstats_struct -+{ -+ int id; -+ prgstats_old_t *stats; -+} RMSIO_PRGGETSTATS_STRUCT; -+#define RMSIO_PRGGETSTATS _IOW ('r', 10, RMSIO_PRGGETSTATS_STRUCT) -+ -+/* arg is program id */ -+#define RMSIO_PRGSUSPEND _IOW ('r', 11, int) -+#define RMSIO_PRGRESUME _IOW ('r', 12, int) -+#define RMSIO_PRGDESTROY _IOW ('r', 13, int) -+ -+typedef struct rmsio_getprgid_struct -+{ -+ pid_t pid; -+ int *id; -+} RMSIO_GETPRGID_STRUCT; -+#define RMSIO_GETPRGID _IOW ('r', 14, RMSIO_GETPRGID_STRUCT) -+ -+typedef struct rmsio_getprgid_struct32 -+{ -+ pid_t pid; -+ unsigned int idptr; -+} RMSIO_GETPRGID_STRUCT32; -+#define RMSIO_GETPRGID32 _IOW ('r', 14, RMSIO_GETPRGID_STRUCT32) -+ -+/* arg is pointer to index */ -+#define RMSIO_GETMYCAP _IOW ('r', 15, int) -+ -+typedef struct rmsio_prgids_struct -+{ -+ int maxids; -+ int *prgids; -+ int *nprgs; -+} RMSIO_PRGIDS_STRUCT; -+#define RMSIO_PRGIDS _IOW ('r', 16, RMSIO_PRGIDS_STRUCT) -+ -+/* arg is pointer to vp */ -+#define RMSIO_ELANINITDONE _IOW ('r', 17, int) -+ -+typedef struct rmsio_prgelanpids_struct -+{ -+ int id; -+ int maxpids; -+ int *vps; -+ int *pids; -+ int *npids; -+} RMSIO_PRGELANPIDS_STRUCT; -+#define RMSIO_PRGELANPIDS _IOW ('r', 18, RMSIO_PRGELANPIDS_STRUCT) -+ -+typedef struct rmsio_setpset_struct -+{ -+ int id; -+ int psid; -+} RMSIO_SETPSET_STRUCT; -+#define RMSIO_SETPSET _IOW ('r', 19, RMSIO_SETPSET_STRUCT) -+ -+typedef struct rmsio_getpset_struct -+{ -+ int id; -+ int *psid; -+} RMSIO_GETPSET_STRUCT; -+#define RMSIO_GETPSET _IOW ('r', 20, RMSIO_GETPSET_STRUCT) -+ -+/* -+ * have to pass a pointer to the stats, the switch -+ * statement goes wrong in the module of the size -+ * is too large -+ */ -+typedef struct { -+ uint64_t ebytes; -+ uint64_t exfers; -+} elanstats_t; -+ -+typedef struct rmsio_setelanstats_struct -+{ -+ int id; -+ elanstats_t *estats; -+} RMSIO_SETELANSTATS_STRUCT; -+#define RMSIO_SETELANSTATS _IOW ('r', 21, RMSIO_SETELANSTATS_STRUCT) -+ -+typedef struct rmsio_prggetstats2_struct -+{ -+ int id; -+ prgstats_t *stats; -+} RMSIO_PRGGETSTATS2_STRUCT; -+#define RMSIO_PRGGETSTATS2 _IOW ('r', 22, RMSIO_PRGGETSTATS2_STRUCT) -+ -+typedef struct rmsio_modversion_struct -+{ -+ int *version; -+} RMSIO_MODVERSION_STRUCT; -+#define RMSIO_MODVERSION _IOW ('r', 23, RMSIO_MODVERSION_STRUCT) -+ -+ -+#endif /* __RMSMOD_RMSIO_H */ -+ -+ -+ -+ -+ -+ -+ -+ -+ -Index: linux-2.6.5-7.191/ipc/shm.c + }; + + extern int mmlist_nr; +@@ -603,6 +611,10 @@ struct task_struct { + struct rw_semaphore pagg_sem; + #endif + ++#ifdef CONFIG_PTRACK ++/* process tracking callback */ ++ struct list_head ptrack_list; ++#endif + }; + + static inline pid_t process_group(struct task_struct *tsk) +Index: LINUX-SRC-TREE/ipc/shm.c =================================================================== ---- linux-2.6.5-7.191.orig/ipc/shm.c 2005-06-28 12:24:24.000000000 -0400 -+++ linux-2.6.5-7.191/ipc/shm.c 2005-07-28 14:52:52.983658216 -0400 +--- LINUX-SRC-TREE.orig/ipc/shm.c ++++ LINUX-SRC-TREE/ipc/shm.c @@ -27,6 +27,7 @@ #include #include @@ -94178,7 +1047,7 @@ Index: linux-2.6.5-7.191/ipc/shm.c #include #include -@@ -877,6 +878,44 @@ +@@ -879,6 +880,44 @@ asmlinkage long sys_shmdt(char __user *s return audit_result(retval); } @@ -94223,10 +1092,10 @@ Index: linux-2.6.5-7.191/ipc/shm.c #ifdef CONFIG_PROC_FS static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { -Index: linux-2.6.5-7.191/kernel/exit.c +Index: LINUX-SRC-TREE/kernel/exit.c =================================================================== ---- linux-2.6.5-7.191.orig/kernel/exit.c 2005-06-28 12:24:24.000000000 -0400 -+++ linux-2.6.5-7.191/kernel/exit.c 2005-07-28 14:52:52.984658064 -0400 +--- LINUX-SRC-TREE.orig/kernel/exit.c ++++ LINUX-SRC-TREE/kernel/exit.c @@ -40,6 +40,8 @@ /* tng related changes */ int (*tng_exitfunc)(int) = NULL; @@ -94236,7 +1105,7 @@ Index: linux-2.6.5-7.191/kernel/exit.c extern void sem_exit (void); extern struct task_struct *child_reaper; void (*do_eop_acct) (int, struct task_struct *); -@@ -838,6 +840,8 @@ +@@ -848,6 +850,8 @@ asmlinkage NORET_TYPE void do_exit(long audit_exit(tsk, code); audit_free(tsk->audit); #endif @@ -94245,10 +1114,10 @@ Index: linux-2.6.5-7.191/kernel/exit.c __exit_mm(tsk); if (unlikely(tng_exitfunc)) -Index: linux-2.6.5-7.191/kernel/fork.c +Index: LINUX-SRC-TREE/kernel/fork.c =================================================================== ---- linux-2.6.5-7.191.orig/kernel/fork.c 2005-06-28 12:24:10.000000000 -0400 -+++ linux-2.6.5-7.191/kernel/fork.c 2005-07-28 14:52:52.985657912 -0400 +--- LINUX-SRC-TREE.orig/kernel/fork.c ++++ LINUX-SRC-TREE/kernel/fork.c @@ -14,6 +14,7 @@ #include #include @@ -94257,7 +1126,7 @@ Index: linux-2.6.5-7.191/kernel/fork.c #include #include #include -@@ -432,6 +433,9 @@ +@@ -432,6 +433,9 @@ static struct mm_struct * mm_init(struct mm->page_table_lock = SPIN_LOCK_UNLOCKED; mm->ioctx_list_lock = RW_LOCK_UNLOCKED; mm->ioctx_list = NULL; @@ -94267,7 +1136,7 @@ Index: linux-2.6.5-7.191/kernel/fork.c mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); mm->free_area_cache = TASK_UNMAPPED_BASE; -@@ -1267,6 +1271,11 @@ +@@ -1276,6 +1280,11 @@ long do_fork(unsigned long clone_flags, audit_fork(current, p); #endif @@ -94279,10 +1148,10 @@ Index: linux-2.6.5-7.191/kernel/fork.c /* Trace the event */ TRIG_EVENT(fork_hook, clone_flags, p, pid); if (!(clone_flags & CLONE_STOPPED)) { -Index: linux-2.6.5-7.191/kernel/Kconfig +Index: LINUX-SRC-TREE/kernel/Kconfig =================================================================== ---- linux-2.6.5-7.191.orig/kernel/Kconfig 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/kernel/Kconfig 2005-07-28 14:52:52.985657912 -0400 +--- /dev/null ++++ LINUX-SRC-TREE/kernel/Kconfig @@ -0,0 +1,14 @@ +# +# Kernel subsystem specific config @@ -94298,22 +1167,22 @@ Index: linux-2.6.5-7.191/kernel/Kconfig + created and destoryed in order for a resource management + system to know which processes are a member of a "job" and + to be able to clean up when the job is terminated. -Index: linux-2.6.5-7.191/kernel/Makefile +Index: LINUX-SRC-TREE/kernel/Makefile =================================================================== ---- linux-2.6.5-7.191.orig/kernel/Makefile 2005-06-28 12:24:10.000000000 -0400 -+++ linux-2.6.5-7.191/kernel/Makefile 2005-07-28 14:52:52.985657912 -0400 -@@ -25,6 +25,7 @@ - obj-$(CONFIG_EVLOG) += evlbuf.o evlapi.o evlposix.o - obj-$(CONFIG_HOOK) += hook.o - obj-$(CONFIG_TRIGEVENT_HOOKS) += trigevent_hooks.o -+obj-$(CONFIG_PTRACK) += ptrack.o - obj-$(CONFIG_LTT) += ltt/ +--- LINUX-SRC-TREE.orig/kernel/Makefile ++++ LINUX-SRC-TREE/kernel/Makefile +@@ -29,6 +29,7 @@ obj-$(CONFIG_LTT) += ltt/ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_CPUSETS) += cpuset.o -Index: linux-2.6.5-7.191/kernel/ptrack.c + obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_classqueue.o ckrm_sched.o ++obj-$(CONFIG_PTRACK) += ptrack.o + + ifneq ($(CONFIG_IA64),y) + # According to Alan Modra , the -fno-omit-frame-pointer is +Index: LINUX-SRC-TREE/kernel/ptrack.c =================================================================== ---- linux-2.6.5-7.191.orig/kernel/ptrack.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/kernel/ptrack.c 2005-07-28 14:52:52.986657760 -0400 +--- /dev/null ++++ LINUX-SRC-TREE/kernel/ptrack.c @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2000 Regents of the University of California @@ -94460,11 +1329,11 @@ Index: linux-2.6.5-7.191/kernel/ptrack.c +EXPORT_SYMBOL(ptrack_register); +EXPORT_SYMBOL(ptrack_deregister); +EXPORT_SYMBOL(ptrack_registered); -Index: linux-2.6.5-7.191/kernel/signal.c +Index: LINUX-SRC-TREE/kernel/signal.c =================================================================== ---- linux-2.6.5-7.191.orig/kernel/signal.c 2005-06-28 12:24:23.000000000 -0400 -+++ linux-2.6.5-7.191/kernel/signal.c 2005-07-28 14:52:52.987657608 -0400 -@@ -2282,6 +2282,7 @@ +--- LINUX-SRC-TREE.orig/kernel/signal.c ++++ LINUX-SRC-TREE/kernel/signal.c +@@ -2315,6 +2315,7 @@ sys_tkill(int pid, int sig) read_unlock(&tasklist_lock); return audit_lresult(error); } @@ -94472,10 +1341,10 @@ Index: linux-2.6.5-7.191/kernel/signal.c asmlinkage long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) -Index: linux-2.6.5-7.191/mm/fremap.c +Index: LINUX-SRC-TREE/mm/fremap.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/fremap.c 2005-06-28 12:23:58.000000000 -0400 -+++ linux-2.6.5-7.191/mm/fremap.c 2005-07-28 14:52:52.988657456 -0400 +--- LINUX-SRC-TREE.orig/mm/fremap.c ++++ LINUX-SRC-TREE/mm/fremap.c @@ -14,6 +14,7 @@ #include #include @@ -94484,7 +1353,7 @@ Index: linux-2.6.5-7.191/mm/fremap.c #include #include -@@ -29,6 +30,7 @@ +@@ -29,6 +30,7 @@ static inline void zap_pte(struct mm_str if (pte_present(pte)) { unsigned long pfn = pte_pfn(pte); @@ -94492,7 +1361,7 @@ Index: linux-2.6.5-7.191/mm/fremap.c flush_cache_page(vma, addr); pte = ptep_clear_flush(vma, addr, ptep); if (pfn_valid(pfn)) { -@@ -80,6 +82,7 @@ +@@ -80,6 +82,7 @@ int install_page(struct mm_struct *mm, s pte_val = *pte; pte_unmap(pte); update_mmu_cache(vma, addr, pte_val); @@ -94500,7 +1369,7 @@ Index: linux-2.6.5-7.191/mm/fremap.c err = 0; err_unlock: -@@ -118,6 +121,7 @@ +@@ -118,6 +121,7 @@ int install_file_pte(struct mm_struct *m pte_val = *pte; pte_unmap(pte); update_mmu_cache(vma, addr, pte_val); @@ -94508,10 +1377,10 @@ Index: linux-2.6.5-7.191/mm/fremap.c spin_unlock(&mm->page_table_lock); return 0; -Index: linux-2.6.5-7.191/mm/ioproc.c +Index: LINUX-SRC-TREE/mm/ioproc.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/ioproc.c 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/mm/ioproc.c 2005-07-28 14:52:52.988657456 -0400 +--- /dev/null ++++ LINUX-SRC-TREE/mm/ioproc.c @@ -0,0 +1,58 @@ +/* -*- linux-c -*- + * @@ -94571,10 +1440,10 @@ Index: linux-2.6.5-7.191/mm/ioproc.c +} + +EXPORT_SYMBOL_GPL(ioproc_unregister_ops); -Index: linux-2.6.5-7.191/mm/Kconfig +Index: LINUX-SRC-TREE/mm/Kconfig =================================================================== ---- linux-2.6.5-7.191.orig/mm/Kconfig 2004-02-23 16:02:56.000000000 -0500 -+++ linux-2.6.5-7.191/mm/Kconfig 2005-07-28 14:52:52.989657304 -0400 +--- /dev/null ++++ LINUX-SRC-TREE/mm/Kconfig @@ -0,0 +1,15 @@ +# +# VM subsystem specific config @@ -94591,20 +1460,20 @@ Index: linux-2.6.5-7.191/mm/Kconfig + incorporate advanced RDMA capabilities can be kept in sync with CPU + page table changes. + See Documentation/vm/ioproc.txt for more details. -Index: linux-2.6.5-7.191/mm/Makefile +Index: LINUX-SRC-TREE/mm/Makefile =================================================================== ---- linux-2.6.5-7.191.orig/mm/Makefile 2005-06-28 12:23:58.000000000 -0400 -+++ linux-2.6.5-7.191/mm/Makefile 2005-07-28 14:52:52.989657304 -0400 -@@ -15,4 +15,5 @@ +--- LINUX-SRC-TREE.orig/mm/Makefile ++++ LINUX-SRC-TREE/mm/Makefile +@@ -15,4 +15,5 @@ obj-y := bootmem.o filemap.o mempool.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_PROC_MM) += proc_mm.o obj-$(CONFIG_NUMA) += policy.o +obj-$(CONFIG_IOPROC) += ioproc.o -Index: linux-2.6.5-7.191/mm/memory.c +Index: LINUX-SRC-TREE/mm/memory.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/memory.c 2005-06-28 12:24:14.000000000 -0400 -+++ linux-2.6.5-7.191/mm/memory.c 2005-07-28 14:55:16.271875096 -0400 +--- LINUX-SRC-TREE.orig/mm/memory.c ++++ LINUX-SRC-TREE/mm/memory.c @@ -43,6 +43,7 @@ #include #include @@ -94613,7 +1482,7 @@ Index: linux-2.6.5-7.191/mm/memory.c #include #include #include -@@ -630,6 +631,7 @@ +@@ -627,6 +628,7 @@ void zap_page_range(struct vm_area_struc lru_add_drain(); spin_lock(&mm->page_table_lock); @@ -94621,7 +1490,7 @@ Index: linux-2.6.5-7.191/mm/memory.c tlb = tlb_gather_mmu(mm, 0); unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); tlb_finish_mmu(tlb, address, end); -@@ -936,6 +938,7 @@ +@@ -927,6 +929,7 @@ int zeromap_page_range(struct vm_area_st BUG(); spin_lock(&mm->page_table_lock); @@ -94629,7 +1498,7 @@ Index: linux-2.6.5-7.191/mm/memory.c do { pmd_t *pmd = pmd_alloc(mm, dir, address); error = -ENOMEM; -@@ -950,6 +953,7 @@ +@@ -941,6 +944,7 @@ int zeromap_page_range(struct vm_area_st /* * Why flush? zeromap_pte_range has a BUG_ON for !pte_none() */ @@ -94637,7 +1506,7 @@ Index: linux-2.6.5-7.191/mm/memory.c flush_tlb_range(vma, beg, end); spin_unlock(&mm->page_table_lock); return error; -@@ -1020,6 +1024,7 @@ +@@ -1011,6 +1015,7 @@ int remap_page_range(struct vm_area_stru BUG(); spin_lock(&mm->page_table_lock); @@ -94645,7 +1514,7 @@ Index: linux-2.6.5-7.191/mm/memory.c do { pmd_t *pmd = pmd_alloc(mm, dir, from); error = -ENOMEM; -@@ -1034,6 +1039,7 @@ +@@ -1025,6 +1030,7 @@ int remap_page_range(struct vm_area_stru /* * Why flush? remap_pte_range has a BUG_ON for !pte_none() */ @@ -94653,7 +1522,7 @@ Index: linux-2.6.5-7.191/mm/memory.c flush_tlb_range(vma, beg, end); spin_unlock(&mm->page_table_lock); return error; -@@ -1122,6 +1128,7 @@ +@@ -1098,6 +1104,7 @@ static int do_wp_page(struct mm_struct * update_mmu_cache(vma, address, entry); lazy_mmu_prot_update(entry); pte_unmap(page_table); @@ -94661,7 +1530,7 @@ Index: linux-2.6.5-7.191/mm/memory.c spin_unlock(&mm->page_table_lock); return VM_FAULT_MINOR; } -@@ -1157,6 +1164,7 @@ +@@ -1133,6 +1140,7 @@ static int do_wp_page(struct mm_struct * } page_remove_rmap(old_page); @@ -94669,7 +1538,7 @@ Index: linux-2.6.5-7.191/mm/memory.c break_cow(vma, new_page, address, page_table); page_add_rmap(new_page, vma, address, 1); lru_cache_add_active(new_page); -@@ -1165,6 +1173,7 @@ +@@ -1141,6 +1149,7 @@ static int do_wp_page(struct mm_struct * new_page = old_page; } pte_unmap(page_table); @@ -94677,42 +1546,42 @@ Index: linux-2.6.5-7.191/mm/memory.c page_cache_release(new_page); page_cache_release(old_page); spin_unlock(&mm->page_table_lock); -@@ -1472,6 +1481,7 @@ - update_mmu_cache(vma, address, pte); - lazy_mmu_prot_update(pte); +@@ -1376,6 +1385,7 @@ static int do_swap_page(struct mm_struct + int ret; + pte_unmap(page_table); + ioproc_update_page(vma, address); spin_unlock(&mm->page_table_lock); - out: - return ret; -@@ -1534,6 +1544,7 @@ + + BUG_ON(!vma->anon_vma); +@@ -1508,6 +1518,7 @@ do_anonymous_page(struct mm_struct *mm, + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); - lazy_mmu_prot_update(entry); + ioproc_update_page(vma, addr); + lazy_mmu_prot_update(entry); spin_unlock(&mm->page_table_lock); ret = VM_FAULT_MINOR; +@@ -1658,6 +1669,7 @@ retry: -@@ -1674,6 +1685,7 @@ /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); - lazy_mmu_prot_update(entry); + ioproc_update_page(vma, address); + lazy_mmu_prot_update(entry); spin_unlock(&mm->page_table_lock); out: - return ret; -@@ -1774,6 +1786,7 @@ +@@ -1771,6 +1783,7 @@ static inline int handle_pte_fault(struc spin_unlock(&mm->page_table_lock); return VM_FAULT_MINOR; } -+EXPORT_SYMBOL(make_pages_present); ++EXPORT_SYMBOL_GPL(make_pages_present); /* Can be overwritten by the architecture */ -Index: linux-2.6.5-7.191/mm/mmap.c +Index: LINUX-SRC-TREE/mm/mmap.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/mmap.c 2005-06-28 12:24:15.000000000 -0400 -+++ linux-2.6.5-7.191/mm/mmap.c 2005-07-28 14:52:52.992656848 -0400 +--- LINUX-SRC-TREE.orig/mm/mmap.c ++++ LINUX-SRC-TREE/mm/mmap.c @@ -25,6 +25,7 @@ #include #include @@ -94721,7 +1590,7 @@ Index: linux-2.6.5-7.191/mm/mmap.c #include #include #include -@@ -1378,6 +1379,7 @@ +@@ -1389,6 +1390,7 @@ static void unmap_region(struct mm_struc unsigned long nr_accounted = 0; lru_add_drain(); @@ -94729,7 +1598,7 @@ Index: linux-2.6.5-7.191/mm/mmap.c tlb = tlb_gather_mmu(mm, 0); unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); -@@ -1697,6 +1699,7 @@ +@@ -1713,6 +1715,7 @@ void exit_mmap(struct mm_struct *mm) spin_lock(&mm->page_table_lock); @@ -94737,10 +1606,10 @@ Index: linux-2.6.5-7.191/mm/mmap.c tlb = tlb_gather_mmu(mm, 1); flush_cache_mm(mm); /* Use ~0UL here to ensure all VMAs in the mm are unmapped */ -Index: linux-2.6.5-7.191/mm/mprotect.c +Index: LINUX-SRC-TREE/mm/mprotect.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/mprotect.c 2005-06-28 12:24:14.000000000 -0400 -+++ linux-2.6.5-7.191/mm/mprotect.c 2005-07-28 14:52:52.992656848 -0400 +--- LINUX-SRC-TREE.orig/mm/mprotect.c ++++ LINUX-SRC-TREE/mm/mprotect.c @@ -10,6 +10,7 @@ #include @@ -94749,7 +1618,7 @@ Index: linux-2.6.5-7.191/mm/mprotect.c #include #include #include -@@ -101,6 +102,7 @@ +@@ -101,6 +102,7 @@ change_protection(struct vm_area_struct if (start >= end) BUG(); spin_lock(¤t->mm->page_table_lock); @@ -94757,10 +1626,10 @@ Index: linux-2.6.5-7.191/mm/mprotect.c do { change_pmd_range(dir, start, end - start, newprot); start = (start + PGDIR_SIZE) & PGDIR_MASK; -Index: linux-2.6.5-7.191/mm/mremap.c +Index: LINUX-SRC-TREE/mm/mremap.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/mremap.c 2005-06-28 12:24:09.000000000 -0400 -+++ linux-2.6.5-7.191/mm/mremap.c 2005-07-28 14:52:52.993656696 -0400 +--- LINUX-SRC-TREE.orig/mm/mremap.c ++++ LINUX-SRC-TREE/mm/mremap.c @@ -9,6 +9,7 @@ #include @@ -94769,7 +1638,7 @@ Index: linux-2.6.5-7.191/mm/mremap.c #include #include #include -@@ -144,6 +145,8 @@ +@@ -144,6 +145,8 @@ static int move_page_tables(struct vm_ar { unsigned long offset = len; @@ -94778,10 +1647,10 @@ Index: linux-2.6.5-7.191/mm/mremap.c flush_cache_range(vma, old_addr, old_addr + len); /* -Index: linux-2.6.5-7.191/mm/msync.c +Index: LINUX-SRC-TREE/mm/msync.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/msync.c 2005-06-28 12:23:58.000000000 -0400 -+++ linux-2.6.5-7.191/mm/msync.c 2005-07-28 14:52:52.993656696 -0400 +--- LINUX-SRC-TREE.orig/mm/msync.c ++++ LINUX-SRC-TREE/mm/msync.c @@ -12,6 +12,7 @@ #include #include @@ -94790,7 +1659,7 @@ Index: linux-2.6.5-7.191/mm/msync.c #include #include -@@ -116,6 +117,7 @@ +@@ -116,6 +117,7 @@ static int filemap_sync(struct vm_area_s if (address >= end) BUG(); @@ -94798,10 +1667,10 @@ Index: linux-2.6.5-7.191/mm/msync.c do { error |= filemap_sync_pmd_range(dir, address, end, vma, flags); address = (address + PGDIR_SIZE) & PGDIR_MASK; -Index: linux-2.6.5-7.191/mm/objrmap.c +Index: LINUX-SRC-TREE/mm/objrmap.c =================================================================== ---- linux-2.6.5-7.191.orig/mm/objrmap.c 2005-06-28 12:24:10.000000000 -0400 -+++ linux-2.6.5-7.191/mm/objrmap.c 2005-07-28 14:52:52.994656544 -0400 +--- LINUX-SRC-TREE.orig/mm/objrmap.c ++++ LINUX-SRC-TREE/mm/objrmap.c @@ -29,6 +29,7 @@ #include #include @@ -94810,7 +1679,7 @@ Index: linux-2.6.5-7.191/mm/objrmap.c #include kmem_cache_t * anon_vma_cachep; -@@ -393,6 +394,8 @@ +@@ -393,6 +394,8 @@ unmap_pte_page(struct page * page, struc { pte_t pteval; diff --git a/lustre/kernel_patches/patches/quota-deadlock-on-pagelock-core.patch b/lustre/kernel_patches/patches/quota-deadlock-on-pagelock-core.patch index 892a61f..576765c 100644 --- a/lustre/kernel_patches/patches/quota-deadlock-on-pagelock-core.patch +++ b/lustre/kernel_patches/patches/quota-deadlock-on-pagelock-core.patch @@ -1,4 +1,3 @@ - From: Jan Kara The four patches in this series fix deadlocks with quotas of pagelock (the @@ -55,9 +54,9 @@ Signed-off-by: Andrew Morton 25-akpm/security/selinux/hooks.c | 4 9 files changed, 247 insertions(+), 268 deletions(-) -diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c ---- 25/fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.293107536 -0800 -+++ 25-akpm/fs/dquot.c 2004-12-03 20:56:04.312104648 -0800 +diff -rup RH_2_6_9_55.orig/fs/dquot.c RH_2_6_9_55/fs/dquot.c +--- RH_2_6_9_55.orig/fs/dquot.c ++++ RH_2_6_9_55/fs/dquot.c @@ -49,7 +49,7 @@ * New SMP locking. * Jan Kara, , 10/2002 @@ -83,8 +82,8 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c * - * Lock ordering (including related VFS locks) is following: + * Lock ordering (including related VFS locks) is the following: - * i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem > - * > dquot->dq_lock > dqio_sem + * i_sem > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > + * dqio_sem * i_sem on quota files is special (it's below dqio_sem) @@ -183,8 +184,7 @@ static void put_quota_format(struct quot * on all three lists, depending on its current state. @@ -96,7 +95,7 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c * * Unused dquots (dq_count == 0) are added to the free_dquots list when freed, * and this list is searched whenever we need an available dquot. Dquots are -@@ -1314,10 +1314,12 @@ int vfs_quota_off(struct super_block *sb +@@ -1341,10 +1341,12 @@ int vfs_quota_off(struct super_block *sb { int cnt; struct quota_info *dqopt = sb_dqopt(sb); @@ -109,7 +108,7 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c if (type != -1 && cnt != type) continue; if (!sb_has_quota_enabled(sb, cnt)) -@@ -1337,7 +1339,7 @@ int vfs_quota_off(struct super_block *sb +@@ -1364,7 +1366,7 @@ int vfs_quota_off(struct super_block *sb dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); @@ -118,7 +117,7 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c dqopt->files[cnt] = NULL; dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; -@@ -1345,6 +1347,26 @@ int vfs_quota_off(struct super_block *sb +@@ -1372,6 +1374,26 @@ int vfs_quota_off(struct super_block *sb dqopt->ops[cnt] = NULL; } up(&dqopt->dqonoff_sem); @@ -145,7 +144,7 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c return 0; } -@@ -1352,68 +1374,56 @@ int vfs_quota_off(struct super_block *sb +@@ -1379,68 +1401,56 @@ int vfs_quota_off(struct super_block *sb * Turn quotas on on a device */ @@ -242,7 +241,7 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; -@@ -1424,6 +1434,7 @@ static int vfs_quota_on_file(struct file +@@ -1451,6 +1461,7 @@ static int vfs_quota_on_file(struct file goto out_file_init; } up(&dqopt->dqio_sem); @@ -250,7 +249,7 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c set_enable_flags(dqopt, type); add_dquot_ref(sb, type); -@@ -1433,19 +1444,18 @@ static int vfs_quota_on_file(struct file +@@ -1460,19 +1471,18 @@ static int vfs_quota_on_file(struct file out_file_init: dqopt->files[type] = NULL; @@ -275,7 +274,7 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c out_fmt: put_quota_format(fmt); -@@ -1455,47 +1465,37 @@ out_fmt: +@@ -1482,47 +1492,37 @@ out_fmt: /* Actual function called from quotactl() */ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) { @@ -342,9 +341,9 @@ diff -puN fs/dquot.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/dquot.c } /* Generic routine for getting common part of quota structure */ -diff -puN fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota.c ---- 25/fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.295107232 -0800 -+++ 25-akpm/fs/quota.c 2004-12-03 20:56:04.313104496 -0800 +diff -rup RH_2_6_9_55.orig/fs/quota.c RH_2_6_9_55/fs/quota.c +--- RH_2_6_9_55.orig/fs/quota.c ++++ RH_2_6_9_55/fs/quota.c @@ -13,6 +13,8 @@ #include #include @@ -354,7 +353,7 @@ diff -puN fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota.c /* Check validity of quotactl */ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) -@@ -135,16 +136,54 @@ restart: +@@ -134,16 +136,54 @@ restart: return NULL; } @@ -412,9 +411,9 @@ diff -puN fs/quota.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota.c drop_super(sb); } } -diff -puN fs/quota_v1.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v1.c ---- 25/fs/quota_v1.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.296107080 -0800 -+++ 25-akpm/fs/quota_v1.c 2004-12-03 20:56:04.314104344 -0800 +diff -rup RH_2_6_9_55.orig/fs/quota_v1.c RH_2_6_9_55/fs/quota_v1.c +--- RH_2_6_9_55.orig/fs/quota_v1.c ++++ RH_2_6_9_55/fs/quota_v1.c @@ -7,7 +7,6 @@ #include #include @@ -576,9 +575,9 @@ diff -puN fs/quota_v1.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v1 return ret; } -diff -puN fs/quota_v2.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v2.c ---- 25/fs/quota_v2.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.297106928 -0800 -+++ 25-akpm/fs/quota_v2.c 2004-12-03 20:56:04.318103736 -0800 +diff -rup RH_2_6_9_55.orig/fs/quota_v2.c RH_2_6_9_55/fs/quota_v2.c +--- RH_2_6_9_55.orig/fs/quota_v2.c ++++ RH_2_6_9_55/fs/quota_v2.c @@ -13,7 +13,6 @@ #include @@ -1175,11 +1174,11 @@ diff -puN fs/quota_v2.c~fix-of-quota-deadlock-on-pagelock-quota-core fs/quota_v2 disk2memdqb(&dquot->dq_dqb, &ddquot); if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit && -diff -puN include/linux/fs.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/fs.h ---- 25/include/linux/fs.h~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.300106472 -0800 -+++ 25-akpm/include/linux/fs.h 2004-12-03 20:56:04.319103584 -0800 -@@ -1004,6 +1004,9 @@ struct super_operations { - void (*umount_begin) (struct super_block *); +diff -rup RH_2_6_9_55.orig/include/linux/fs.h RH_2_6_9_55/include/linux/fs.h +--- RH_2_6_9_55.orig/include/linux/fs.h ++++ RH_2_6_9_55/include/linux/fs.h +@@ -1042,6 +1042,9 @@ struct super_operations { + void (*umount_lustre) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); + @@ -1188,10 +1187,9 @@ diff -puN include/linux/fs.h~fix-of-quota-deadlock-on-pagelock-quota-core includ }; /* Inode state bits. Protected by inode_lock. */ - -diff -puN include/linux/quota.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/quota.h ---- 25/include/linux/quota.h~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.301106320 -0800 -+++ 25-akpm/include/linux/quota.h 2004-12-03 20:56:04.320103432 -0800 +diff -rup RH_2_6_9_55.orig/include/linux/quota.h RH_2_6_9_55/include/linux/quota.h +--- RH_2_6_9_55.orig/include/linux/quota.h ++++ RH_2_6_9_55/include/linux/quota.h @@ -285,7 +285,7 @@ struct quota_info { struct semaphore dqio_sem; /* lock device while I/O in progress */ struct semaphore dqonoff_sem; /* Serialize quotaon & quotaoff */ @@ -1201,9 +1199,9 @@ diff -puN include/linux/quota.h~fix-of-quota-deadlock-on-pagelock-quota-core inc struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; -diff -puN include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core include/linux/security.h ---- 25/include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.303106016 -0800 -+++ 25-akpm/include/linux/security.h 2004-12-03 20:56:04.322103128 -0800 +diff -rup RH_2_6_9_55.orig/include/linux/security.h RH_2_6_9_55/include/linux/security.h +--- RH_2_6_9_55.orig/include/linux/security.h ++++ RH_2_6_9_55/include/linux/security.h @@ -1033,7 +1033,7 @@ struct security_operations { int (*sysctl) (ctl_table * table, int op); int (*capable) (struct task_struct * tsk, int cap); @@ -1225,7 +1223,7 @@ diff -puN include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core } static inline int security_syslog(int type) -@@ -1959,7 +1959,7 @@ static inline int security_quotactl (int +@@ -1953,7 +1953,7 @@ static inline int security_quotactl (int return 0; } @@ -1234,9 +1232,9 @@ diff -puN include/linux/security.h~fix-of-quota-deadlock-on-pagelock-quota-core { return 0; } -diff -puN security/dummy.c~fix-of-quota-deadlock-on-pagelock-quota-core security/dummy.c ---- 25/security/dummy.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.304105864 -0800 -+++ 25-akpm/security/dummy.c 2004-12-03 20:56:04.323102976 -0800 +diff -rup RH_2_6_9_55.orig/security/dummy.c RH_2_6_9_55/security/dummy.c +--- RH_2_6_9_55.orig/security/dummy.c ++++ RH_2_6_9_55/security/dummy.c @@ -92,7 +92,7 @@ static int dummy_quotactl (int cmds, int return 0; } @@ -1246,10 +1244,10 @@ diff -puN security/dummy.c~fix-of-quota-deadlock-on-pagelock-quota-core security { return 0; } -diff -puN security/selinux/hooks.c~fix-of-quota-deadlock-on-pagelock-quota-core security/selinux/hooks.c ---- 25/security/selinux/hooks.c~fix-of-quota-deadlock-on-pagelock-quota-core 2004-12-03 20:56:04.306105560 -0800 -+++ 25-akpm/security/selinux/hooks.c 2004-12-03 20:56:04.326102520 -0800 -@@ -1494,9 +1494,9 @@ static int selinux_quotactl(int cmds, in +diff -rup RH_2_6_9_55.orig/security/selinux/hooks.c RH_2_6_9_55/security/selinux/hooks.c +--- RH_2_6_9_55.orig/security/selinux/hooks.c ++++ RH_2_6_9_55/security/selinux/hooks.c +@@ -1485,9 +1485,9 @@ static int selinux_quotactl(int cmds, in return rc; } @@ -1261,4 +1259,3 @@ diff -puN security/selinux/hooks.c~fix-of-quota-deadlock-on-pagelock-quota-core } static int selinux_syslog(int type) -_ diff --git a/lustre/kernel_patches/patches/raid5-zerocopy.patch b/lustre/kernel_patches/patches/raid5-zerocopy.patch new file mode 100644 index 0000000..0986343 --- /dev/null +++ b/lustre/kernel_patches/patches/raid5-zerocopy.patch @@ -0,0 +1,374 @@ +diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c +--- linux-2.6.9.orig/drivers/md/raid5.c 2007-07-09 02:43:33.000000000 -0600 ++++ linux-2.6.9/drivers/md/raid5.c 2007-07-13 00:39:15.000000000 -0600 +@@ -412,6 +412,7 @@ static int raid5_end_read_request (struc + clear_buffer_uptodate(bh); + } + #endif ++ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)); + clear_bit(R5_LOCKED, &sh->dev[i].flags); + set_bit(STRIPE_HANDLE, &sh->state); + release_stripe(sh); +@@ -450,6 +451,10 @@ static int raid5_end_write_request (stru + + rdev_dec_pending(conf->disks[i].rdev, conf->mddev); + ++ if (test_bit(R5_Direct, &sh->dev[i].flags)) { ++ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page); ++ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page; ++ } + clear_bit(R5_LOCKED, &sh->dev[i].flags); + set_bit(STRIPE_HANDLE, &sh->state); + __release_stripe(conf, sh); +@@ -621,6 +626,25 @@ static sector_t compute_blocknr(struct s + } + + ++static struct page *zero_copy_data(struct bio *bio, sector_t sector) ++{ ++ sector_t bi_sector = bio->bi_sector; ++ struct page *page; ++ struct bio_vec *bvl; ++ int i; ++ ++ bio_for_each_segment(bvl, bio, i) { ++ if (sector > bi_sector) { ++ bi_sector += bio_iovec_idx(bio, i)->bv_len >> 9; ++ continue; ++ } ++ BUG_ON(sector != bi_sector); ++ page = bio_iovec_idx(bio, i)->bv_page; ++ return PageConstant(page) ? page : NULL; ++ } ++ BUG(); ++ return NULL; ++} + + /* + * Copy data between a page in the stripe cache, and one or more bion +@@ -716,8 +740,9 @@ static void compute_parity(struct stripe + { + raid5_conf_t *conf = sh->raid_conf; + int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; +- void *ptr[MAX_XOR_BLOCKS]; ++ void *ptr[MAX_XOR_BLOCKS], *h_ptr[2]; + struct bio *chosen; ++ struct page *page; + + PRINTK("compute_parity, stripe %llu, method %d\n", + (unsigned long long)sh->sector, method); +@@ -744,13 +769,14 @@ static void compute_parity(struct stripe + break; + case RECONSTRUCT_WRITE: + memset(ptr[0], 0, STRIPE_SIZE); +- for (i= disks; i-- ;) ++ for (i= disks; i-- ;) { + if (i!=pd_idx && sh->dev[i].towrite) { + chosen = sh->dev[i].towrite; + sh->dev[i].towrite = NULL; + if (sh->dev[i].written) BUG(); + sh->dev[i].written = chosen; + } ++ } + break; + case CHECK_PARITY: + break; +@@ -760,34 +786,88 @@ static void compute_parity(struct stripe + count = 1; + } + +- for (i = disks; i--;) +- if (sh->dev[i].written) { +- sector_t sector = sh->dev[i].sector; +- struct bio *wbi = sh->dev[i].written; +- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { +- copy_data(1, wbi, sh->dev[i].page, sector); +- wbi = r5_next_bio(wbi, sector); ++ for (i = disks; i--;) { ++ struct bio *wbi = sh->dev[i].written; ++ sector_t sector; ++ ++ if (!wbi) ++ continue; ++ ++ sector = sh->dev[i].sector; ++ set_bit(R5_LOCKED, &sh->dev[i].flags); ++ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)); ++ ++ /* check if it's covered by a single page ++ and whole stripe is written at once. ++ * in this case we can avoid memcpy() */ ++ if (!wbi->bi_next && test_bit(R5_OVERWRITE, &sh->dev[i].flags) && ++ test_bit(R5_Insync, &sh->dev[i].flags)) { ++ page = zero_copy_data(wbi, sector); ++ if (page) { ++ atomic_inc(&conf->writes_zcopy); ++ sh->dev[i].req.bi_io_vec[0].bv_page = page; ++ set_bit(R5_Direct, &sh->dev[i].flags); ++ clear_bit(R5_UPTODATE, &sh->dev[i].flags); ++ clear_bit(R5_OVERWRITE, &sh->dev[i].flags); ++ continue; + } ++ } + +- set_bit(R5_LOCKED, &sh->dev[i].flags); +- set_bit(R5_UPTODATE, &sh->dev[i].flags); ++ atomic_inc(&conf->writes_copied); ++ test_and_clear_bit(R5_OVERWRITE, &sh->dev[i].flags); ++ set_bit(R5_UPTODATE, &sh->dev[i].flags); ++ while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { ++ copy_data(1, wbi, sh->dev[i].page, sector); ++ wbi = r5_next_bio(wbi, sector); + } ++ } + ++ h_ptr[0] = ptr[0]; + switch(method) { + case RECONSTRUCT_WRITE: + case CHECK_PARITY: +- for (i=disks; i--;) +- if (i != pd_idx) { +- ptr[count++] = page_address(sh->dev[i].page); +- check_xor(); ++ for (i=disks; i--;) { ++ if (i == pd_idx) ++ continue; ++ if (test_bit(R5_Direct, &sh->dev[i].flags)) ++ page = sh->dev[i].req.bi_io_vec[0].bv_page; ++ else ++ page = sh->dev[i].page; ++ ++ /* have to compute the parity immediately for ++ * a highmem page. it would happen for zerocopy. -jay ++ */ ++ if (PageHighMem(page)) { ++ h_ptr[1] = kmap_atomic(page, KM_USER0); ++ xor_block(2, STRIPE_SIZE, h_ptr); ++ kunmap_atomic(page, KM_USER0); ++ } else { ++ ptr[count++] = page_address(page); + } ++ check_xor(); ++ } + break; + case READ_MODIFY_WRITE: +- for (i = disks; i--;) +- if (sh->dev[i].written) { +- ptr[count++] = page_address(sh->dev[i].page); +- check_xor(); ++ for (i = disks; i--;) { ++ if (!sh->dev[i].written) ++ continue; ++ if (test_bit(R5_Direct, &sh->dev[i].flags)) ++ page = sh->dev[i].req.bi_io_vec[0].bv_page; ++ else ++ page = sh->dev[i].page; ++ ++ /* have to compute the parity immediately for ++ * a highmem page. it would happen for zerocopy. -jay ++ */ ++ if (PageHighMem(page)) { ++ h_ptr[1] = kmap_atomic(page, KM_USER0); ++ xor_block(2, STRIPE_SIZE, h_ptr); ++ kunmap_atomic(page, KM_USER0); ++ } else { ++ ptr[count++] = page_address(page); + } ++ check_xor(); ++ } + } + if (count != 1) + xor_block(count, STRIPE_SIZE, ptr); +@@ -1059,13 +1139,15 @@ static void handle_stripe(struct stripe_ + if (sh->dev[i].written) { + dev = &sh->dev[i]; + if (!test_bit(R5_LOCKED, &dev->flags) && +- test_bit(R5_UPTODATE, &dev->flags) ) { ++ (test_bit(R5_UPTODATE, &dev->flags) || ++ test_bit(R5_Direct, &dev->flags)) ) { + /* We can return any write requests */ + struct bio *wbi, *wbi2; + PRINTK("Return write for disc %d\n", i); + spin_lock_irq(&conf->device_lock); + wbi = dev->written; + dev->written = NULL; ++ test_and_clear_bit(R5_Direct, &dev->flags); + while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { + wbi2 = r5_next_bio(wbi, dev->sector); + if (--wbi->bi_phys_segments == 0) { +@@ -1831,6 +1913,7 @@ memory = conf->max_nr_stripes * (sizeof( + if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) + mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + } ++ mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONST_WRITE; + + /* Ok, everything is just fine now */ + mddev->array_size = mddev->size * (mddev->raid_disks - 1); +@@ -1918,9 +2001,11 @@ static void status (struct seq_file *seq + atomic_read(&conf->handled_in_raid5d), + atomic_read(&conf->out_of_stripes), + atomic_read(&conf->handle_called)); +- seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw", ++ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u", + atomic_read(&conf->reads_for_rmw), +- atomic_read(&conf->reads_for_rcw)); ++ atomic_read(&conf->reads_for_rcw), ++ atomic_read(&conf->writes_zcopy), ++ atomic_read(&conf->writes_copied)); + seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n", + atomic_read(&conf->delayed), + atomic_read(&conf->active_stripes), +diff -pru linux-2.6.9.orig/include/linux/backing-dev.h linux-2.6.9/include/linux/backing-dev.h +--- linux-2.6.9.orig/include/linux/backing-dev.h 2004-10-18 15:53:46.000000000 -0600 ++++ linux-2.6.9/include/linux/backing-dev.h 2007-07-13 00:12:46.000000000 -0600 +@@ -30,8 +30,11 @@ struct backing_dev_info { + void *congested_data; /* Pointer to aux data for congested func */ + void (*unplug_io_fn)(struct backing_dev_info *, struct page *); + void *unplug_io_data; ++ unsigned int capabilities; + }; + ++#define BDI_CAP_PAGE_CONST_WRITE 0x00000001 ++ + extern struct backing_dev_info default_backing_dev_info; + void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page); + +@@ -62,4 +65,7 @@ static inline int bdi_rw_congested(struc + (1 << BDI_write_congested)); + } + ++#define mapping_cap_page_constant_write(mapping) \ ++ ((mapping)->backing_dev_info->capabilities & BDI_CAP_PAGE_CONST_WRITE) ++ + #endif /* _LINUX_BACKING_DEV_H */ +diff -pru linux-2.6.9.orig/include/linux/page-flags.h linux-2.6.9/include/linux/page-flags.h +--- linux-2.6.9.orig/include/linux/page-flags.h 2004-10-18 15:54:39.000000000 -0600 ++++ linux-2.6.9/include/linux/page-flags.h 2007-07-13 00:12:46.000000000 -0600 +@@ -74,6 +74,7 @@ + #define PG_swapcache 16 /* Swap page: swp_entry_t in private */ + #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ + #define PG_reclaim 18 /* To be reclaimed asap */ ++#define PG_constant 19 /* To mark the page is constant */ + + + /* +@@ -298,6 +299,11 @@ extern unsigned long __read_page_state(u + #define PageSwapCache(page) 0 + #endif + ++#define PageConstant(page) test_bit(PG_constant, &(page)->flags) ++#define SetPageConstant(page) set_bit(PG_constant, &(page)->flags) ++#define ClearPageConstant(page) clear_bit(PG_constant, &(page->flags)) ++#define TestSetPageConstant(page) test_and_set_bit(PG_constant, &(page)->flags) ++ + struct page; /* forward declaration */ + + int test_clear_page_dirty(struct page *page); +diff -pru linux-2.6.9.orig/include/linux/pagemap.h linux-2.6.9/include/linux/pagemap.h +--- linux-2.6.9.orig/include/linux/pagemap.h 2004-10-18 15:53:06.000000000 -0600 ++++ linux-2.6.9/include/linux/pagemap.h 2007-07-13 00:12:46.000000000 -0600 +@@ -191,6 +191,19 @@ static inline void wait_on_page_writebac + + extern void end_page_writeback(struct page *page); + ++extern int set_page_constant(struct page *page); ++extern void clear_page_constant(struct page *); ++static inline int set_page_constant_lock(struct page *page) ++{ ++ BUG_ON(PageLocked(page)); ++ lock_page(page); ++ if (set_page_constant(page)) { ++ unlock_page(page); ++ return 1; ++ } ++ return 0; ++} ++ + /* + * Fault a userspace page into pagetables. Return non-zero on a fault. + * +diff -pru linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/raid/raid5.h +--- linux-2.6.9.orig/include/linux/raid/raid5.h 2007-07-09 02:43:33.000000000 -0600 ++++ linux-2.6.9/include/linux/raid/raid5.h 2007-07-13 00:39:15.000000000 -0600 +@@ -153,6 +153,7 @@ struct stripe_head { + #define R5_Wantread 4 /* want to schedule a read */ + #define R5_Wantwrite 5 + #define R5_Syncio 6 /* this io need to be accounted as resync io */ ++#define R5_Direct 7 /* use page from passed bio to avoid memcpy */ + + /* + * Write method +@@ -234,6 +235,8 @@ struct raid5_private_data { + atomic_t out_of_stripes; + atomic_t reads_for_rmw; + atomic_t reads_for_rcw; ++ atomic_t writes_zcopy; ++ atomic_t writes_copied; + atomic_t handle_called; + atomic_t delayed; + atomic_t in_reqs_in_queue; +diff -pru linux-2.6.9.orig/mm/filemap.c linux-2.6.9/mm/filemap.c +--- linux-2.6.9.orig/mm/filemap.c 2007-07-09 02:43:33.000000000 -0600 ++++ linux-2.6.9/mm/filemap.c 2007-07-13 00:12:46.000000000 -0600 +@@ -27,6 +27,8 @@ + #include + #include + #include ++#include ++ + /* + * This is needed for the following functions: + * - try_to_release_page +@@ -486,11 +488,52 @@ void end_page_writeback(struct page *pag + BUG(); + smp_mb__after_clear_bit(); + } ++ clear_page_constant(page); + wake_up_page(page); + } + + EXPORT_SYMBOL(end_page_writeback); + ++/* Mark a page in bio to be constant, page must be locked */ ++int set_page_constant(struct page *page) ++{ ++ BUG_ON(!PageLocked(page)); ++ ++ /* If it's an anonymous page and haven't been added to swap cache, ++ * do it here. ++ */ ++ if (PageAnon(page) && !PageSwapCache(page)) ++ return 1; ++ ++ BUG_ON(!PageUptodate(page)); ++ ++ /* I have to clear page uptodate before trying to remove ++ * it from user's page table because otherwise, the page may be ++ * reinstalled by a page access which happens between try_to_unmap() ++ * and ClearPageUptodate(). -jay ++ */ ++ ClearPageUptodate(page); ++ if (page_mapped(page) && try_to_unmap(page) != SWAP_SUCCESS) { ++ SetPageUptodate(page); ++ return 1; ++ } ++ SetPageConstant(page); ++ return 0; ++} ++ ++void clear_page_constant(struct page *page) ++{ ++ if (PageConstant(page)) { ++ BUG_ON(!PageLocked(page)); ++ BUG_ON(PageUptodate(page)); ++ ClearPageConstant(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ } ++} ++EXPORT_SYMBOL(set_page_constant); ++EXPORT_SYMBOL(clear_page_constant); ++ + /* + * Get a lock on the page, assuming we need to sleep to get it. + * diff --git a/lustre/kernel_patches/patches/remove-suid-2.4-rhel.patch b/lustre/kernel_patches/patches/remove-suid-2.4-rhel.patch deleted file mode 100644 index 4f9fe49..0000000 --- a/lustre/kernel_patches/patches/remove-suid-2.4-rhel.patch +++ /dev/null @@ -1,23 +0,0 @@ ---- uml-2.4.24/mm/filemap.c.orig 2005-02-18 22:27:31.000000000 +0200 -+++ uml-2.4.24/mm/filemap.c 2005-02-18 22:49:02.960952568 +0200 -@@ -2993,7 +2993,20 @@ inline void remove_suid(struct inode *in - /* was any of the uid bits set? */ - mode &= inode->i_mode; - if (mode && !capable(CAP_FSETID)) { -+ struct inode_operations *op = inode->i_op; - inode->i_mode &= ~mode; -+ -+ if (op && op->setattr_raw) { -+ struct iattr newattrs; -+ int result; -+ newattrs.ia_mode = inode->i_mode; -+ -+ newattrs.ia_valid = ATTR_FORCE | ATTR_MODE; -+ result = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (result != -EOPNOTSUPP) -+ return; /* No way to return status. Whoops! */ -+ } - mark_inode_dirty(inode); - } - } diff --git a/lustre/kernel_patches/patches/removepage-2.4.20.patch b/lustre/kernel_patches/patches/removepage-2.4.20.patch deleted file mode 100644 index cc721e1..0000000 --- a/lustre/kernel_patches/patches/removepage-2.4.20.patch +++ /dev/null @@ -1,28 +0,0 @@ - include/linux/fs.h | 1 + - mm/filemap.c | 3 +++ - 2 files changed, 4 insertions(+) - ---- linux-2.4.20-b_llpmd-l24/include/linux/fs.h~removepage-2.4.20 2003-09-05 11:45:42.000000000 -0700 -+++ linux-2.4.20-b_llpmd-l24-zab/include/linux/fs.h 2003-09-05 11:46:25.000000000 -0700 -@@ -402,6 +402,7 @@ struct address_space_operations { - int (*releasepage) (struct page *, int); - #define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */ - int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int); -+ void (*removepage)(struct page *); /* called when page gets removed from the inode */ - }; - - struct address_space { ---- linux-2.4.20-b_llpmd-l24/mm/filemap.c~removepage-2.4.20 2003-09-05 11:45:42.000000000 -0700 -+++ linux-2.4.20-b_llpmd-l24-zab/mm/filemap.c 2003-09-05 11:46:25.000000000 -0700 -@@ -95,6 +95,9 @@ static inline void remove_page_from_inod - { - struct address_space * mapping = page->mapping; - -+ if (mapping->a_ops->removepage) -+ mapping->a_ops->removepage(page); -+ - mapping->nrpages--; - list_del(&page->list); - page->mapping = NULL; - -_ diff --git a/lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch b/lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch deleted file mode 100644 index bf0adc7..0000000 --- a/lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch +++ /dev/null @@ -1,442 +0,0 @@ -Index: linux/Documentation/Configure.help -=================================================================== -RCS file: /home/cvs/master/68chaos_eebperf/Documentation/Configure.help,v -retrieving revision 1.1.1.1 -diff -u -p -r1.1.1.1 Configure.help ---- linux/Documentation/Configure.help 20 Aug 2004 18:09:23 -0000 1.1.1.1 -+++ linux/Documentation/Configure.help 26 Aug 2004 12:34:40 -0000 -@@ -7679,6 +7679,11 @@ CONFIG_SCSI_LOGGING - there should be no noticeable performance impact as long as you have - logging turned off. - -+SCSI disk I/O stats -+CONFIG_SD_IOSTATS -+ This enables SCSI disk I/O stats collection. You must also enable -+ /proc file system support if you want this feature. -+ - QDIO base support for IBM S/390 and zSeries - CONFIG_QDIO - This driver provides the Queued Direct I/O base support for the -Index: linux/drivers/scsi/Config.in -=================================================================== -RCS file: /home/cvs/master/68chaos_eebperf/drivers/scsi/Config.in,v -retrieving revision 1.1.1.1 -diff -u -p -r1.1.1.1 Config.in ---- linux/drivers/scsi/Config.in 20 Aug 2004 18:10:13 -0000 1.1.1.1 -+++ linux/drivers/scsi/Config.in 24 Aug 2004 14:30:08 -0000 -@@ -4,6 +4,7 @@ dep_tristate ' SCSI disk support' CONFI - - if [ "$CONFIG_BLK_DEV_SD" != "n" ]; then - int 'Maximum number of SCSI disks that can be loaded as modules' CONFIG_SD_EXTRA_DEVS 40 -+ bool 'SCSI disk I/O stats' CONFIG_SD_IOSTATS y - fi - - dep_tristate ' SCSI tape support' CONFIG_CHR_DEV_ST $CONFIG_SCSI -Index: linux/drivers/scsi/sd.c -=================================================================== -RCS file: /home/cvs/master/68chaos_eebperf/drivers/scsi/sd.c,v -retrieving revision 1.1.1.1 -diff -u -p -r1.1.1.1 sd.c ---- linux/drivers/scsi/sd.c 20 Aug 2004 18:10:16 -0000 1.1.1.1 -+++ linux/drivers/scsi/sd.c 26 Aug 2004 13:34:39 -0000 -@@ -65,6 +65,40 @@ - * static const char RCSid[] = "$Header:"; - */ - -+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) -+#include -+#include -+ -+typedef struct -+{ -+ unsigned long long iostat_size; -+ unsigned long long iostat_count; -+} iostat_counter_t; -+ -+#define IOSTAT_NCOUNTERS 16 -+typedef struct -+{ -+ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; -+ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; -+ struct timeval iostat_timeval; -+} iostat_stats_t; -+ -+iostat_stats_t **sd_iostats; -+spinlock_t sd_iostats_lock; -+struct proc_dir_entry *sd_iostats_procdir; -+char sd_iostats_procdir_name[] = "sd_iostats"; -+ -+extern void sd_iostats_init(void); -+extern void sd_iostats_init_disk(int disk); -+extern void sd_iostats_fini(void); -+extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite); -+#else -+static inline void sd_iostats_init(void) {} -+static inline void sd_iostats_init_disk(int disk) {} -+static inline void sd_iostats_fini(void) {} -+static inline void sd_iostats_bump(int dev, unsigned int nsect, int iswrite) {} -+#endif -+ - /* device number --> sd_gendisks index */ - #define SD_MAJOR_IDX(i) ( ((MAJOR(i) & 0x80) >> 4) + (MAJOR(i) & 7) ) - /* sd_gendisks index --> system major */ -@@ -351,6 +385,8 @@ static int sd_init_command(Scsi_Cmnd * S - SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n", - nbuff, dev, block)); - -+ sd_iostats_bump(dev, this_count, SCpnt->request.cmd == WRITE); -+ - /* - * If we have a 1K hardware sectorsize, prevent access to single - * 512 byte sectors. In theory we could handle this - in fact -@@ -545,7 +581,7 @@ static int sd_open(struct inode *inode, - if (scsi_block_when_processing_errors(SDev)) - scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL); - -- -+ sd_iostats_init_disk(target); - return 0; - - error_out: -@@ -1179,6 +1215,8 @@ static int sd_init() - - memset(sd_varyio, 0, (sd_template.dev_max << 4)); - -+ sd_iostats_init(); -+ - for (i = 0; i < sd_template.dev_max << 4; i++) { - sd_blocksizes[i] = 1024; - sd_hardsizes[i] = 512; -@@ -1243,6 +1281,7 @@ cleanup_gendisks_de_arr: - kfree(sd_gendisks); - sd_gendisks = NULL; - cleanup_sd_gendisks: -+ sd_iostats_fini(); - kfree(sd_varyio); - cleanup_varyio: - kfree(sd_max_sectors); -@@ -1466,6 +1505,316 @@ static void sd_detach(Scsi_Device * SDp) - return; - } - -+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) -+static int -+sd_iostats_seq_show(struct seq_file *seq, void *v) -+{ -+ struct timeval now; -+ unsigned long index = (unsigned long)(seq->private); -+ iostat_stats_t *stats; -+ unsigned long long read_len; -+ unsigned long long read_len_tot; -+ unsigned long read_num; -+ unsigned long read_num_tot; -+ unsigned long long write_len; -+ unsigned long long write_len_tot; -+ unsigned long write_num; -+ unsigned long write_num_tot; -+ int i; -+ int maxi; -+ -+ if (sd_iostats == NULL) { -+ printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); -+ BUG(); -+ } -+ -+ stats = sd_iostats[index]; -+ if (stats == NULL) { -+ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); -+ BUG(); -+ } -+ -+ do_gettimeofday(&now); -+ now.tv_sec -= stats->iostat_timeval.tv_sec; -+ now.tv_usec -= stats->iostat_timeval.tv_usec; -+ if (now.tv_usec < 0) { -+ now.tv_usec += 1000000; -+ now.tv_sec--; -+ } -+ -+ /* this sampling races with updates */ -+ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n", -+ index, now.tv_sec, now.tv_usec); -+ -+ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) -+ if (stats->iostat_read_histogram[i].iostat_count != 0 || -+ stats->iostat_write_histogram[i].iostat_count != 0) -+ break; -+ maxi = i; -+ -+ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", -+ "reads", "total", "writes", "total"); -+ -+ read_len_tot = write_len_tot = 0; -+ read_num_tot = write_num_tot = 0; -+ for (i = 0; i <= maxi; i++) { -+ read_len = stats->iostat_read_histogram[i].iostat_size; -+ read_len_tot += read_len; -+ read_num = stats->iostat_read_histogram[i].iostat_count; -+ read_num_tot += read_num; -+ -+ write_len = stats->iostat_write_histogram[i].iostat_size; -+ write_len_tot += write_len; -+ write_num = stats->iostat_write_histogram[i].iostat_count; -+ write_num_tot += write_num; -+ -+ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", -+ 512<private_data)->private = PDE(inode)->data; -+ return 0; -+} -+ -+static int -+sd_iostats_seq_write(struct file *file, const char *buffer, -+ size_t len, loff_t *off) -+{ -+ struct seq_file *seq = file->private_data; -+ unsigned long index = (unsigned long)seq->private; -+ iostat_stats_t *stats = sd_iostats[index]; -+ unsigned long flags; -+ -+ -+ spin_lock_irqsave (&sd_iostats_lock, flags); -+ memset (stats, 0, sizeof(*stats)); -+ do_gettimeofday(&stats->iostat_timeval); -+ spin_unlock_irqrestore (&sd_iostats_lock, flags); -+ -+ return len; -+} -+ -+static struct file_operations sd_iostats_proc_fops = { -+ .owner = THIS_MODULE, -+ .open = sd_iostats_seq_open, -+ .read = seq_read, -+ .write = sd_iostats_seq_write, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+void -+sd_iostats_init(void) -+{ -+ int maxdevs = sd_template.dev_max; -+ int i; -+ -+ spin_lock_init(&sd_iostats_lock); -+ -+ sd_iostats = kmalloc(maxdevs * sizeof(iostat_stats_t *), GFP_KERNEL); -+ if (sd_iostats == NULL) { -+ printk(KERN_WARNING "Can't keep sd iostats: " -+ "ENOMEM allocating stats array size %d\n", -+ sd_template.dev_max * sizeof(iostat_stats_t *)); -+ return; -+ } -+ -+ for (i = 0; i < maxdevs; i++) -+ sd_iostats[i] = NULL; -+ -+ if (proc_scsi == NULL) { -+ printk(KERN_WARNING "No access to sd iostats: " -+ "proc_scsi is NULL\n"); -+ return; -+ } -+ -+ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, -+ S_IFDIR | S_IRUGO | S_IXUGO, -+ proc_scsi); -+ if (sd_iostats_procdir == NULL) { -+ printk(KERN_WARNING "No access to sd iostats: " -+ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); -+ return; -+ } -+} -+ -+void -+sd_iostats_init_disk(int disk) -+{ -+ char name[6]; -+ struct proc_dir_entry *pde; -+ int i; -+ unsigned long flags; -+ iostat_stats_t *stats; -+ int maxdevs = sd_template.dev_max; -+ -+ if (sd_iostats == NULL || -+ sd_iostats_procdir == NULL) -+ return; -+ -+ if (disk > sd_template.dev_max) { -+ printk(KERN_ERR "sd_iostats_init_disk: " -+ "unexpected disk index %d(%d)\n", -+ disk, sd_template.dev_max); -+ BUG(); -+ } -+ -+ if (sd_iostats[disk] != NULL) -+ return; -+ -+ sd_devname(disk, name); -+ stats = kmalloc(sizeof(*stats), GFP_KERNEL); -+ if (stats == NULL) { -+ printk(KERN_WARNING "Can't keep %s iostats: " -+ "ENOMEM allocating stats size %d\n", -+ name, sizeof(*stats)); -+ return; -+ } -+ -+ memset (stats, 0, sizeof(*stats)); -+ do_gettimeofday(&stats->iostat_timeval); -+ -+ spin_lock_irqsave(&sd_iostats_lock, flags); -+ -+ if (sd_iostats[disk] != NULL) { -+ spin_unlock_irqrestore(&sd_iostats_lock, flags); -+ kfree (stats); -+ return; -+ } -+ -+ sd_iostats[disk] = stats; -+ -+ spin_unlock_irqrestore(&sd_iostats_lock, flags); -+ -+ pde = create_proc_entry(name, S_IRUGO | S_IWUSR, -+ sd_iostats_procdir); -+ if (pde == NULL) { -+ printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", -+ sd_iostats_procdir_name, name); -+ } else { -+ pde->proc_fops = &sd_iostats_proc_fops; -+ pde->data = (void *)((long)disk); -+ } -+} -+ -+void -+sd_iostats_fini(void) -+{ -+ char name[6]; -+ int i; -+ int maxdevs = sd_template.dev_max; -+ -+ if (sd_iostats_procdir != NULL) { -+ for (i = 0; i < maxdevs; i++) { -+ sd_devname(i, name); -+ remove_proc_entry(name, sd_iostats_procdir); -+ } -+ -+ if (proc_scsi == NULL) { -+ printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n"); -+ BUG(); -+ } -+ remove_proc_entry(sd_iostats_procdir_name, -+ proc_scsi); -+ -+ sd_iostats_procdir = NULL; -+ } -+ -+ if (sd_iostats != NULL) { -+ for (i = 0; i < maxdevs; i++) { -+ if (sd_iostats[i] != NULL) -+ kfree (sd_iostats[i]); -+ } -+ -+ kfree(sd_iostats); -+ sd_iostats = NULL; -+ } -+} -+ -+void -+sd_iostats_bump(int disk, unsigned int nsect, int iswrite) -+{ -+ iostat_stats_t *stats; -+ iostat_counter_t *counter; -+ int bucket; -+ int tmp; -+ unsigned long irqflags; -+ -+ if (sd_iostats == NULL) -+ return; -+ -+ if (disk < 0 || disk >= sd_template.dev_max) { -+ printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", -+ disk, sd_template.dev_max); -+ BUG(); -+ } -+ -+ for (bucket = 0, tmp = nsect; tmp > 1; bucket++) -+ tmp /= 2; -+ -+ if (bucket >= IOSTAT_NCOUNTERS) { -+ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); -+ BUG(); -+ } -+ -+ spin_lock_irqsave(&sd_iostats_lock, irqflags); -+ -+ stats = sd_iostats[disk]; -+ if (stats != NULL) { -+ counter = iswrite ? -+ &stats->iostat_write_histogram[bucket] : -+ &stats->iostat_read_histogram[bucket]; -+ -+ counter->iostat_size += nsect; -+ counter->iostat_count++; -+ } -+ -+ spin_unlock_irqrestore(&sd_iostats_lock, irqflags); -+} -+#endif -+ - static int __init init_sd(void) - { - sd_template.module = THIS_MODULE; -@@ -1488,6 +1837,7 @@ static void __exit exit_sd(void) - kfree(sd_blocksizes); - kfree(sd_hardsizes); - kfree(sd_varyio); -+ sd_iostats_fini(); - for (i = 0; i < N_USED_SD_MAJORS; i++) { - kfree(sd_gendisks[i].de_arr); - kfree(sd_gendisks[i].flags); diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch index 12e4ac0..4e06c09 100644 --- a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch +++ b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel4.patch @@ -1,8 +1,10 @@ -Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig +Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig +=================================================================== +Index: linux+rhel4+chaos/drivers/scsi/Kconfig =================================================================== ---- linux-2.6.9-5.0.3.EL.orig/drivers/scsi/Kconfig 2005-04-01 18:36:39.218039672 +0300 -+++ linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig 2005-04-01 18:36:45.571073864 +0300 -@@ -61,6 +61,14 @@ +--- linux+rhel4+chaos.orig/drivers/scsi/Kconfig ++++ linux+rhel4+chaos/drivers/scsi/Kconfig +@@ -61,6 +61,14 @@ config SCSI_DUMP help SCSI dump support @@ -17,10 +19,10 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig config CHR_DEV_ST tristate "SCSI tape support" depends on SCSI -Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c +Index: linux+rhel4+chaos/drivers/scsi/sd.c =================================================================== ---- linux-2.6.9-5.0.3.EL.orig/drivers/scsi/sd.c 2005-04-01 18:36:39.223038912 +0300 -+++ linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c 2005-04-01 18:37:42.537413664 +0300 +--- linux+rhel4+chaos.orig/drivers/scsi/sd.c ++++ linux+rhel4+chaos/drivers/scsi/sd.c @@ -63,6 +63,38 @@ #include "scsi_logging.h" @@ -68,7 +70,7 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c /* * Time out in seconds for disks and Magneto-opticals (which are slower). */ -@@ -276,6 +309,9 @@ +@@ -278,6 +311,9 @@ static int sd_init_command(struct scsi_c SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n", disk->disk_name, (unsigned long long)block)); @@ -78,7 +80,7 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c /* * If we have a 1K hardware sectorsize, prevent access to single * 512 byte sectors. In theory we could handle this - in fact -@@ -472,6 +508,7 @@ +@@ -474,6 +510,7 @@ static int sd_open(struct inode *inode, scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); } @@ -86,7 +88,30 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c return 0; error_out: -@@ -1573,6 +1610,327 @@ +@@ -500,8 +537,20 @@ static int sd_release(struct inode *inod + + SCSI_LOG_HLQUEUE(3, printk("sd_release: disk=%s\n", disk->disk_name)); + +- if (!--sdkp->openers && sdev->removable) { +- if (scsi_block_when_processing_errors(sdev)) ++ if (!--sdkp->openers) { ++ /* ++ * Remove sd_iostats information about this disk ++ */ ++ if (sd_iostats_procdir != NULL) { ++ remove_proc_entry(disk->disk_name, sd_iostats_procdir); ++ } ++ if (sd_iostats != NULL) { ++ if (sd_iostats[sdkp->index] != NULL) { ++ kfree (sd_iostats[sdkp->index]); ++ sd_iostats[sdkp->index] = NULL; ++ } ++ } ++ if (sdev->removable && scsi_block_when_processing_errors(sdev)) + scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); + } + +@@ -1575,6 +1624,342 @@ static void sd_shutdown(struct device *d sd_sync_cache(sdp); } @@ -95,7 +120,7 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c +sd_iostats_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; -+ struct gendisk *disk = seq->private; ++ struct gendisk *disk; + iostat_stats_t *stats; + unsigned long long read_len; + unsigned long long read_len_tot; @@ -108,6 +133,19 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c + int i; + int maxi; + ++ if (seq == NULL || seq->private == NULL) { ++ printk(KERN_ERR "sd_iostats_seq_show: NULL disk\n"); ++ BUG(); ++ } ++ ++ disk = seq->private; ++ ++ if (scsi_disk(disk) == NULL || (disk->flags & GENHD_FL_UP) == 0) { ++ seq_printf(seq, "sd_iostats_seq_show: Device %s " ++ "does not exist\n", disk->disk_name); ++ return 0; ++ } ++ + if (sd_iostats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); + BUG(); @@ -115,8 +153,10 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c + + stats = sd_iostats[scsi_disk(disk)->index]; + if (stats == NULL) { -+ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); -+ BUG(); ++ seq_printf(seq, "sd_iostats_seq_show: sd_iostats " ++ "entry %d does not exist\n", ++ scsi_disk(disk)->index); ++ return 0; + } + + do_gettimeofday(&now); @@ -414,7 +454,7 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c /** * init_sd - entry point for this driver (both when built in or when * a module). -@@ -1582,6 +1940,7 @@ +@@ -1584,6 +1969,7 @@ static void sd_shutdown(struct device *d static int __init init_sd(void) { int majors = 0, i; @@ -422,7 +462,7 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); -@@ -1592,7 +1951,10 @@ +@@ -1594,7 +1980,10 @@ static int __init init_sd(void) if (!majors) return -ENODEV; @@ -434,7 +474,7 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c } /** -@@ -1606,6 +1968,7 @@ +@@ -1608,6 +1997,7 @@ static void __exit exit_sd(void) SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); @@ -442,10 +482,10 @@ Index: linux-2.6.9-5.0.3.EL/drivers/scsi/sd.c scsi_unregister_driver(&sd_template.gendrv); for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); -Index: linux-2.6.9-5.0.3.EL/drivers/scsi/scsi_proc.c +Index: linux+rhel4+chaos/drivers/scsi/scsi_proc.c =================================================================== ---- linux-2.6.9-5.0.3.EL.orig/drivers/scsi/scsi_proc.c 2005-04-01 18:36:39.234037240 +0300 -+++ linux-2.6.9-5.0.3.EL/drivers/scsi/scsi_proc.c 2005-04-01 18:36:45.578072800 +0300 +--- linux+rhel4+chaos.orig/drivers/scsi/scsi_proc.c ++++ linux+rhel4+chaos/drivers/scsi/scsi_proc.c @@ -38,7 +38,8 @@ /* 4K page size, but our output routines, use some slack for overruns */ #define PROC_BLOCK_SIZE (3*1024) diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6-suse.patch b/lustre/kernel_patches/patches/sd_iostats-2.6-suse.patch deleted file mode 100644 index f12319c..0000000 --- a/lustre/kernel_patches/patches/sd_iostats-2.6-suse.patch +++ /dev/null @@ -1,456 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/drivers/scsi/Kconfig 2004-11-11 07:28:52.000000000 -0800 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/Kconfig 2005-01-06 22:58:42.338770968 -0800 -@@ -55,6 +55,14 @@ - In this case, do not compile the driver for your SCSI host adapter - (below) as a module either. - -+config SD_IOSTATS -+ bool "Enable SCSI disk I/O stats" -+ depends on BLK_DEV_SD -+ default y -+ ---help--- -+ This enables SCSI disk I/O stats collection. You must also enable -+ /proc file system support if you want this feature. -+ - config CHR_DEV_ST - tristate "SCSI tape support" - depends on SCSI -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/sd.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/drivers/scsi/sd.c 2004-11-11 07:28:28.000000000 -0800 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/sd.c 2005-01-07 01:29:47.033727872 -0800 -@@ -59,12 +59,44 @@ - - #include "scsi_logging.h" - -+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) -+# include -+# include -+ -+typedef struct { -+ unsigned long long iostat_size; -+ unsigned long long iostat_count; -+} iostat_counter_t; -+ -+#define IOSTAT_NCOUNTERS 16 -+typedef struct { -+ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; -+ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; -+ struct timeval iostat_timeval; -+} iostat_stats_t; -+ -+iostat_stats_t **sd_iostats; -+spinlock_t sd_iostats_lock; -+struct proc_dir_entry *sd_iostats_procdir; -+char sd_iostats_procdir_name[] = "sd_iostats"; -+ -+extern void sd_iostats_init(void); -+extern void sd_iostats_init_disk(struct gendisk *); -+extern void sd_iostats_fini(void); -+extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite); -+#else -+static inline void sd_iostats_init(void) {} -+static inline void sd_iostats_init_disk(struct gendisk *disk) {} -+static inline void sd_iostats_fini(void) {} -+static inline void sd_iostats_bump(int disk, unsigned int nsect, int iswrite) {} -+#endif - - /* - * Remaining dev_t-handling stuff - */ - #define SD_MAJORS 16 - #define SD_DISKS 32768 /* anything between 256 and 262144 */ -+#define SD_STATS 256 - - /* - * Time out in seconds for disks and Magneto-opticals (which are slower). -@@ -264,6 +296,9 @@ - SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n", - disk->disk_name, (unsigned long long)block)); - -+ sd_iostats_bump(scsi_disk(disk)->index, this_count, -+ rq_data_dir(SCpnt->request) == WRITE); -+ - /* - * If we have a 1K hardware sectorsize, prevent access to single - * 512 byte sectors. In theory we could handle this - in fact -@@ -460,6 +495,7 @@ - scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); - } - -+ sd_iostats_init_disk(disk); - return 0; - - error_out: -@@ -1548,6 +1584,327 @@ - sd_sync_cache(sdp); - } - -+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) -+static int -+sd_iostats_seq_show(struct seq_file *seq, void *v) -+{ -+ struct timeval now; -+ struct gendisk *disk = seq->private; -+ iostat_stats_t *stats; -+ unsigned long long read_len; -+ unsigned long long read_len_tot; -+ unsigned long read_num; -+ unsigned long read_num_tot; -+ unsigned long long write_len; -+ unsigned long long write_len_tot; -+ unsigned long write_num; -+ unsigned long write_num_tot; -+ int i; -+ int maxi; -+ -+ if (sd_iostats == NULL) { -+ printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); -+ BUG(); -+ } -+ -+ stats = sd_iostats[scsi_disk(disk)->index]; -+ if (stats == NULL) { -+ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); -+ BUG(); -+ } -+ -+ do_gettimeofday(&now); -+ now.tv_sec -= stats->iostat_timeval.tv_sec; -+ now.tv_usec -= stats->iostat_timeval.tv_usec; -+ if (now.tv_usec < 0) { -+ now.tv_usec += 1000000; -+ now.tv_sec--; -+ } -+ -+ /* this sampling races with updates */ -+ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n", -+ scsi_disk(disk)->index, now.tv_sec, now.tv_usec); -+ -+ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) -+ if (stats->iostat_read_histogram[i].iostat_count != 0 || -+ stats->iostat_write_histogram[i].iostat_count != 0) -+ break; -+ maxi = i; -+ -+ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", -+ "reads", "total", "writes", "total"); -+ -+ read_len_tot = write_len_tot = 0; -+ read_num_tot = write_num_tot = 0; -+ for (i = 0; i <= maxi; i++) { -+ read_len = stats->iostat_read_histogram[i].iostat_size; -+ read_len_tot += read_len; -+ read_num = stats->iostat_read_histogram[i].iostat_count; -+ read_num_tot += read_num; -+ -+ write_len = stats->iostat_write_histogram[i].iostat_size; -+ write_len_tot += write_len; -+ write_num = stats->iostat_write_histogram[i].iostat_count; -+ write_num_tot += write_num; -+ -+ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", -+ 512<private_data)->private = PDE(inode)->data; -+ return 0; -+} -+ -+static ssize_t -+sd_iostats_seq_write(struct file *file, const char *buffer, -+ size_t len, loff_t *off) -+{ -+ struct seq_file *seq = file->private_data; -+ struct gendisk *disk = seq->private; -+ iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index]; -+ unsigned long flags; -+ -+ -+ spin_lock_irqsave (&sd_iostats_lock, flags); -+ memset (stats, 0, sizeof(*stats)); -+ do_gettimeofday(&stats->iostat_timeval); -+ spin_unlock_irqrestore (&sd_iostats_lock, flags); -+ -+ return len; -+} -+ -+static struct file_operations sd_iostats_proc_fops = { -+ .owner = THIS_MODULE, -+ .open = sd_iostats_seq_open, -+ .read = seq_read, -+ .write = sd_iostats_seq_write, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+extern struct proc_dir_entry *proc_scsi; -+ -+void -+sd_iostats_init(void) -+{ -+ int i; -+ -+ spin_lock_init(&sd_iostats_lock); -+ -+ sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL); -+ if (sd_iostats == NULL) { -+ printk(KERN_WARNING "Can't keep sd iostats: " -+ "ENOMEM allocating stats array size %ld\n", -+ SD_STATS * sizeof(iostat_stats_t *)); -+ return; -+ } -+ -+ for (i = 0; i < SD_STATS; i++) -+ sd_iostats[i] = NULL; -+ -+ if (proc_scsi == NULL) { -+ printk(KERN_WARNING "No access to sd iostats: " -+ "proc_scsi is NULL\n"); -+ return; -+ } -+ -+ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, -+ S_IFDIR | S_IRUGO | S_IXUGO, -+ proc_scsi); -+ if (sd_iostats_procdir == NULL) { -+ printk(KERN_WARNING "No access to sd iostats: " -+ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); -+ return; -+ } -+} -+ -+void -+sd_iostats_init_disk(struct gendisk *disk) -+{ -+ struct proc_dir_entry *pde; -+ unsigned long flags; -+ iostat_stats_t *stats; -+ -+ if (sd_iostats == NULL || -+ sd_iostats_procdir == NULL) -+ return; -+ -+ if (scsi_disk(disk)->index > SD_STATS) { -+ printk(KERN_ERR "sd_iostats_init_disk: " -+ "unexpected disk index %d(%d)\n", -+ scsi_disk(disk)->index, SD_STATS); -+ return; -+ } -+ -+ if (sd_iostats[scsi_disk(disk)->index] != NULL) -+ return; -+ -+ stats = kmalloc(sizeof(*stats), GFP_KERNEL); -+ if (stats == NULL) { -+ printk(KERN_WARNING "Can't keep %s iostats: " -+ "ENOMEM allocating stats size %ld\n", -+ disk->disk_name, sizeof(*stats)); -+ return; -+ } -+ -+ memset (stats, 0, sizeof(*stats)); -+ do_gettimeofday(&stats->iostat_timeval); -+ -+ spin_lock_irqsave(&sd_iostats_lock, flags); -+ -+ if (sd_iostats[scsi_disk(disk)->index] != NULL) { -+ spin_unlock_irqrestore(&sd_iostats_lock, flags); -+ kfree (stats); -+ return; -+ } -+ -+ sd_iostats[scsi_disk(disk)->index] = stats; -+ -+ spin_unlock_irqrestore(&sd_iostats_lock, flags); -+ -+ pde = create_proc_entry(disk->disk_name, S_IRUGO | S_IWUSR, -+ sd_iostats_procdir); -+ if (pde == NULL) { -+ printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", -+ sd_iostats_procdir_name, disk->disk_name); -+ } else { -+ pde->proc_fops = &sd_iostats_proc_fops; -+ pde->data = disk; -+ } -+} -+ -+static void sd_devname(unsigned int disknum, char *buffer) -+{ -+ if (disknum < 26) -+ sprintf(buffer, "sd%c", 'a' + disknum); -+ else { -+ unsigned int min1; -+ unsigned int min2; -+ /* -+ * For larger numbers of disks, we need to go to a new -+ * naming scheme. -+ */ -+ min1 = disknum / 26; -+ min2 = disknum % 26; -+ sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2); -+ } -+} -+ -+void -+sd_iostats_fini(void) -+{ -+ char name[6]; -+ int i; -+ -+ if (sd_iostats_procdir != NULL) { -+ for (i = 0; i < SD_STATS; i++) { -+ sd_devname(i, name); -+ remove_proc_entry(name, sd_iostats_procdir); -+ } -+ -+ if (proc_scsi == NULL) { -+ printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n"); -+ BUG(); -+ } -+ remove_proc_entry(sd_iostats_procdir_name, -+ proc_scsi); -+ -+ sd_iostats_procdir = NULL; -+ } -+ -+ if (sd_iostats != NULL) { -+ for (i = 0; i < SD_STATS; i++) { -+ if (sd_iostats[i] != NULL) -+ kfree (sd_iostats[i]); -+ } -+ -+ kfree(sd_iostats); -+ sd_iostats = NULL; -+ } -+} -+ -+void -+sd_iostats_bump(int disk, unsigned int nsect, int iswrite) -+{ -+ iostat_stats_t *stats; -+ iostat_counter_t *counter; -+ int bucket; -+ int tmp; -+ unsigned long irqflags; -+ -+ if (sd_iostats == NULL) -+ return; -+ -+ if (disk < 0 || disk >= SD_STATS) { -+ printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", -+ disk, SD_STATS); -+ BUG(); -+ } -+ -+ for (bucket = 0, tmp = nsect; tmp > 1; bucket++) -+ tmp /= 2; -+ -+ if (bucket >= IOSTAT_NCOUNTERS) { -+ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); -+ BUG(); -+ } -+ -+ spin_lock_irqsave(&sd_iostats_lock, irqflags); -+ -+ stats = sd_iostats[disk]; -+ if (stats != NULL) { -+ counter = iswrite ? -+ &stats->iostat_write_histogram[bucket] : -+ &stats->iostat_read_histogram[bucket]; -+ -+ counter->iostat_size += nsect; -+ counter->iostat_count++; -+ } -+ -+ spin_unlock_irqrestore(&sd_iostats_lock, irqflags); -+} -+#endif -+ - /** - * init_sd - entry point for this driver (both when built in or when - * a module). -@@ -1557,6 +1914,7 @@ - static int __init init_sd(void) - { - int majors = 0, i; -+ int rc = 0; - - SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); - -@@ -1567,7 +1925,10 @@ - if (!majors) - return -ENODEV; - -- return scsi_register_driver(&sd_template.gendrv); -+ rc = scsi_register_driver(&sd_template.gendrv); -+ if (rc == 0) -+ sd_iostats_init(); -+ return rc; - } - - /** -@@ -1581,6 +1942,7 @@ - - SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); - -+ sd_iostats_fini(); - scsi_unregister_driver(&sd_template.gendrv); - for (i = 0; i < SD_MAJORS; i++) - unregister_blkdev(sd_major(i), "sd"); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/scsi_proc.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik.orig/drivers/scsi/scsi_proc.c 2004-04-03 19:36:17.000000000 -0800 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891_lustre.1.4.0-phik/drivers/scsi/scsi_proc.c 2005-01-07 00:15:53.905665776 -0800 -@@ -37,7 +37,8 @@ - /* 4K page size, but our output routines, use some slack for overruns */ - #define PROC_BLOCK_SIZE (3*1024) - --static struct proc_dir_entry *proc_scsi; -+struct proc_dir_entry *proc_scsi; -+EXPORT_SYMBOL(proc_scsi); - - /* Protect sht->present and sht->proc_dir */ - static DECLARE_MUTEX(global_host_template_sem); diff --git a/lustre/kernel_patches/patches/slab-use-after-free-debug-2.4.24.patch b/lustre/kernel_patches/patches/slab-use-after-free-debug-2.4.24.patch deleted file mode 100644 index 05c0209..0000000 --- a/lustre/kernel_patches/patches/slab-use-after-free-debug-2.4.24.patch +++ /dev/null @@ -1,748 +0,0 @@ -%patch -Index: linux-2.4.24/mm/slab.c -=================================================================== ---- linux-2.4.24.orig/mm/slab.c 2004-02-06 11:15:22.000000000 +0300 -+++ linux-2.4.24/mm/slab.c 2004-02-07 00:42:38.000000000 +0300 -@@ -97,6 +97,8 @@ - #define FORCED_DEBUG 0 - #endif - -+#include -+ - /* - * Parameters for kmem_cache_reap - */ -@@ -825,6 +827,12 @@ - return cachep; - } - -+#ifdef CONFIG_DEBUG_UAF -+void * uaf_alloc(kmem_cache_t *, int gfp_mask); -+int uaf_cache_free(kmem_cache_t *, void *addr); -+int uaf_free(void *addr); -+struct page *uaf_vaddr_to_page(void *obj); -+#endif - - #if DEBUG - /* -@@ -1342,6 +1350,20 @@ - unsigned long save_flags; - void* objp; - -+#ifdef CONFIG_DEBUG_UAF -+ /* try to use uaf-allocator first */ -+ objp = uaf_alloc(cachep, flags); -+ if (objp) { -+ if (cachep->ctor) { -+ unsigned long ctor_flags; -+ ctor_flags = SLAB_CTOR_CONSTRUCTOR; -+ if (!(flags & __GFP_WAIT)) -+ ctor_flags |= SLAB_CTOR_ATOMIC; -+ cachep->ctor(objp, cachep, ctor_flags); -+ } -+ return objp; -+ } -+#endif - kmem_cache_alloc_head(cachep, flags); - try_again: - local_irq_save(save_flags); -@@ -1436,13 +1458,17 @@ - - if (cachep->flags & SLAB_RED_ZONE) { - objp -= BYTES_PER_WORD; -- if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2) -+ if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2) { - /* Either write before start, or a double free. */ -+ printk("inconsistency at start of %s\n", cachep->name); - BUG(); -+ } - if (xchg((unsigned long *)(objp+cachep->objsize - -- BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2) -+ BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2) { - /* Either write past end, or a double free. */ -+ printk("inconsistency at end of %s\n", cachep->name); - BUG(); -+ } - } - if (cachep->flags & SLAB_POISON) - kmem_poison_obj(cachep, objp); -@@ -1578,6 +1604,10 @@ - void kmem_cache_free (kmem_cache_t *cachep, void *objp) - { - unsigned long flags; -+#ifdef CONFIG_DEBUG_UAF -+ if (uaf_cache_free(cachep, objp)) -+ return; -+#endif - #if DEBUG - CHECK_PAGE(virt_to_page(objp)); - if (cachep != GET_PAGE_CACHE(virt_to_page(objp))) -@@ -1603,6 +1633,10 @@ - - if (!objp) - return; -+#ifdef CONFIG_DEBUG_UAF -+ if (uaf_free((void *) objp)) -+ return; -+#endif - local_irq_save(flags); - CHECK_PAGE(virt_to_page(objp)); - c = GET_PAGE_CACHE(virt_to_page(objp)); -@@ -2078,3 +2112,471 @@ - #endif - } - #endif -+ -+ -+ -+#ifdef CONFIG_DEBUG_UAF -+ -+#define MAX_UAF_OBJ_SIZE 8 /* in pages */ -+#define UAF_ASSERT(xxx) if (!(xxx)) BUG(); -+#define UAF_DEBUG__ -+#ifdef UAF_DEBUG -+#define uaf_printk(fmt,a...) printk(fmt, ##a) -+#else -+#define uaf_printk(a,...) -+#endif -+ -+struct uaf_stats { -+ atomic_t uaf_allocated; -+ atomic_t uaf_allocations; -+ atomic_t uaf_failed; -+}; -+ -+static int uaf_max = 32768; -+static void *uaf_bitmap = NULL; -+static spinlock_t uaf_lock; -+static int uaf_last_found = 0; -+static int uaf_used = 0; -+static struct vm_struct *uaf_area = NULL; -+static struct uaf_stats uaf_stats[MAX_UAF_OBJ_SIZE + 1]; -+ -+static int __init uaf_setup(char *str) -+{ -+ uaf_max = simple_strtoul(str, NULL, 0); -+ return 1; -+} -+ -+__setup("uaf=", uaf_setup); -+ -+ -+void uaf_init(void) -+{ -+ int size; -+ -+ printk("UAF: total vmalloc-space - %lu\n", -+ VMALLOC_END - VMALLOC_START); -+ -+ uaf_area = get_vm_area(PAGE_SIZE * uaf_max, VM_ALLOC); -+ if (!uaf_area) { -+ printk(KERN_ALERT "UAF: can't reserve %lu bytes in KVA\n", -+ PAGE_SIZE * uaf_max); -+ return; -+ } -+ -+ printk("UAF: reserved %lu bytes in KVA at 0x%p\n", -+ PAGE_SIZE * uaf_max, uaf_area->addr); -+ -+ /* how many bytes we need to track space usage? */ -+ size = uaf_max / 8 + 8; -+ -+ uaf_bitmap = vmalloc(size); -+ if (!uaf_bitmap) { -+ printk(KERN_ALERT -+ "UAF: can't allocate %d bytes for bitmap\n", size); -+ return; -+ } -+ memset(uaf_bitmap, 0, size); -+ spin_lock_init(&uaf_lock); -+ memset(uaf_stats, 0, sizeof(uaf_stats)); -+ -+ printk("UAF: allocated %d for bitmap\n", size); -+} -+ -+static int uaf_find(int len) -+{ -+ int new_last_found = -1; -+ int loop = 0; -+ int i, j; -+ -+ j = uaf_last_found; -+ -+ do { -+ i = find_next_zero_bit(uaf_bitmap, uaf_max, j); -+ if (i >= uaf_max) { -+ /* repeat from 0 */ -+ if (++loop > 1) { -+ /* this is 2nd loop and it's useless */ -+ return -1; -+ } -+ -+ i = find_next_zero_bit(uaf_bitmap, uaf_max, 0); -+ if (i >= uaf_max) -+ return -1; -+ -+ /* save found num for subsequent searches */ -+ if (new_last_found == -1) -+ new_last_found = uaf_last_found = i; -+ UAF_ASSERT(new_last_found < uaf_max); -+ } -+ -+ /* -+ * OK. found first zero bit. -+ * now, try to find requested cont. zero-space -+ */ -+ -+ /* FIXME: implmement multipage allocation! */ -+ break; -+ -+ /* -+ j = find_next_bit(uaf_bitmap, uaf_max, i); -+ if (++loop2 > 10000) { -+ printk("ALERT: loop2=%d\n", loop2); -+ return -1; -+ } -+ */ -+ } while (j - i < len); -+ -+ /* found! */ -+ if (new_last_found == -1) -+ uaf_last_found = i + 1; -+ if (uaf_last_found >= uaf_max) -+ uaf_last_found = 0; -+ return i; -+} -+ -+extern int __vmalloc_area_pages (unsigned long address, unsigned long size, -+ int gfp_mask, pgprot_t prot, -+ struct page ***pages); -+void * uaf_alloc(kmem_cache_t *cachep, int gfp_mask) -+{ -+ struct page *ptrs[MAX_UAF_OBJ_SIZE]; -+ int size = cachep->objsize; -+ struct page **pages; -+ unsigned long flags; -+ unsigned long addr; -+ int i, j, err = -2000; -+ -+ if (uaf_bitmap == NULL) -+ return NULL; -+ -+ if (!(cachep->flags & SLAB_USE_UAF)) -+ return NULL; -+ -+ pages = (struct page **) ptrs; -+ size = (size + (PAGE_SIZE - 1)) / PAGE_SIZE; -+ /* FIXME: implement multipage allocation! */ -+ if (size > 1) -+ return NULL; -+ if (size > MAX_UAF_OBJ_SIZE) { -+ printk(KERN_ALERT "size is too big: %d\n", size); -+ return NULL; -+ } -+ -+ if (uaf_used == uaf_max) { -+ uaf_printk("UAF: space exhausted!\n"); -+ atomic_inc(&uaf_stats[size].uaf_failed); -+ return NULL; -+ } -+ -+ -+ spin_lock_irqsave(&uaf_lock, flags); -+ i = uaf_find(size); -+ if (i < 0) { -+ spin_unlock_irqrestore(&uaf_lock, flags); -+ atomic_inc(&uaf_stats[size].uaf_failed); -+ return NULL; -+ } -+ for (j = 0; j < size; j++) { -+ UAF_ASSERT(!test_bit(i + j, uaf_bitmap)); -+ set_bit(i + j, uaf_bitmap); -+ uaf_used++; -+ } -+ spin_unlock_irqrestore(&uaf_lock, flags); -+ -+ addr = ((unsigned long) uaf_area->addr) + (PAGE_SIZE * i); -+ uaf_printk("UAF: found %d/%d, base 0x%p, map at 0x%lx: ", i, -+ size, uaf_area->addr, addr); -+ -+ /* OK. we've found free space, let's allocate pages */ -+ memset(pages, 0, sizeof(struct page *) * MAX_UAF_OBJ_SIZE); -+ for (j = 0; j < size; j++) { -+ pages[j] = alloc_page(gfp_mask); -+ if (pages[j] == NULL) -+ goto nomem; -+ uaf_printk("0x%p ", pages[j]); -+ } -+ -+ /* time to map just allocated pages */ -+ err = __vmalloc_area_pages(addr, PAGE_SIZE * size, gfp_mask, -+ PAGE_KERNEL, &pages); -+ pages = (struct page **) ptrs; -+ if (err == 0) { -+ /* put slab cache pointer in first page */ -+ ptrs[0]->list.next = (void *) cachep; -+ uaf_printk(" -> 0x%lx\n", addr); -+ atomic_inc(&uaf_stats[size].uaf_allocated); -+ atomic_inc(&uaf_stats[size].uaf_allocations); -+ if (!in_interrupt() && !in_softirq()) -+ flush_tlb_all(); -+ else -+ local_flush_tlb(); -+ size = cachep->objsize; -+ if (size < PAGE_SIZE) -+ memset((char *) addr + size, 0xa7, PAGE_SIZE - size); -+ return (void *) addr; -+ } -+ -+nomem: -+ printk(KERN_ALERT "can't map pages: %d\n", err); -+ for (j = 0; j < size; j++) -+ if (pages[j]) -+ __free_page(pages[j]); -+ -+ /* can't find free pages */ -+ spin_lock_irqsave(&uaf_lock, flags); -+ for (j = 0; j < size; j++) { -+ clear_bit(i + j, uaf_bitmap); -+ uaf_used--; -+ } -+ spin_unlock_irqrestore(&uaf_lock, flags); -+ atomic_inc(&uaf_stats[size].uaf_failed); -+ -+ return NULL; -+} -+ -+extern void free_area_pmd(pgd_t *dir, unsigned long address, -+ unsigned long size); -+static void uaf_unmap(unsigned long address, unsigned long size) -+{ -+ unsigned long end = (address + size); -+ pgd_t *dir; -+ -+ dir = pgd_offset_k(address); -+ flush_cache_all(); -+ do { -+ free_area_pmd(dir, address, end - address); -+ address = (address + PGDIR_SIZE) & PGDIR_MASK; -+ dir++; -+ } while (address && (address < end)); -+ -+ /* -+ * we must not call smp_call_function() with interrtups disabled -+ * otherwise we can get into deadlock -+ */ -+ if (!in_interrupt() && !in_softirq()) -+ flush_tlb_all(); -+ else -+ local_flush_tlb(); -+} -+ -+/* -+ * returns 1 if free was successfull -+ */ -+int uaf_cache_free(kmem_cache_t *cachep, void *addr) -+{ -+ struct page *pages[MAX_UAF_OBJ_SIZE]; -+ int size = cachep->objsize; -+ unsigned long flags; -+ int i, j; -+ -+ uaf_printk("UAF: to free 0x%p/%d\n", addr, size); -+ -+ size = (size + (PAGE_SIZE - 1)) / PAGE_SIZE; -+ if (size > MAX_UAF_OBJ_SIZE) -+ return 0; -+ -+ if (uaf_bitmap == NULL) -+ return 0; -+ -+ /* first, check is address is in UAF space */ -+ if ((unsigned) addr < (unsigned) uaf_area->addr || -+ (unsigned) addr >= (unsigned) uaf_area->addr + uaf_area->size) -+ return 0; -+ -+ if (cachep->objsize < PAGE_SIZE) { -+ unsigned char *a = (void *) addr; -+ for (i = 0; i < PAGE_SIZE - cachep->objsize; i++) -+ if (a[cachep->objsize + i] != 0xa7) { -+ printk("corruption(0x%x) at %u in %s/0x%p\n", -+ (unsigned) a[cachep->objsize + i], -+ cachep->objsize + i, cachep->name, addr); -+ BUG(); -+ } -+ } -+ UAF_ASSERT(((unsigned long) addr & ~PAGE_MASK) == 0UL); -+ -+ /* calculate placement in bitmap */ -+ i = (unsigned) addr - (unsigned) uaf_area->addr; -+ UAF_ASSERT(i >= 0); -+ i = i / PAGE_SIZE; -+ -+ /* collect all the pages */ -+ uaf_printk("free/unmap %d pages: ", size); -+ /* NOTE: we need not page_table_lock here. bits in bitmap -+ * protect those pte's from to be reused */ -+ for (j = 0; j < size; j++) { -+ unsigned long address; -+ address = ((unsigned long) addr) + (PAGE_SIZE * j); -+ pages[j] = vmalloc_to_page((void *) address); -+ uaf_printk("0x%lx->0x%p ", address, pages[j]); -+ } -+ uaf_printk("\n"); -+ -+ uaf_unmap((unsigned long) addr, PAGE_SIZE * size); -+ /* free all the pages */ -+ for (j = 0; j < size; j++) -+ __free_page(pages[j]); -+ -+ spin_lock_irqsave(&uaf_lock, flags); -+ for (j = 0; j < size; j++) { -+ /* now check is correspondend bit set */ -+ UAF_ASSERT(i+j >= 0 && i+j < uaf_max); -+ UAF_ASSERT(test_bit(i+j, uaf_bitmap)); -+ -+ /* now free space in UAF */ -+ clear_bit(i+j, uaf_bitmap); -+ uaf_used--; -+ } -+ spin_unlock_irqrestore(&uaf_lock, flags); -+ -+ atomic_dec(&uaf_stats[size].uaf_allocated); -+ -+ uaf_printk("UAF: freed %d/%d at 0x%p\n", i, size, addr); -+ //printk("UAF: freed %d/%d at 0x%p\n", i, size, addr); -+ -+ return 1; -+} -+ -+struct page *uaf_vaddr_to_page(void *obj) -+{ -+ if (uaf_bitmap == NULL) -+ return NULL; -+ -+ /* first, check is address is in UAF space */ -+ if ((unsigned) obj < (unsigned) uaf_area->addr || -+ (unsigned) obj >= (unsigned) uaf_area->addr + uaf_area->size) -+ return NULL; -+ -+ return vmalloc_to_page(obj); -+} -+ -+int uaf_free(void *obj) -+{ -+ struct page *page = uaf_vaddr_to_page((void *) obj); -+ kmem_cache_t *c; -+ -+ if (!page) -+ return 0; -+ -+ c = GET_PAGE_CACHE(page); -+ return uaf_cache_free(c, (void *) obj); -+} -+ -+int uaf_is_allocated(void *obj) -+{ -+ unsigned long addr = (unsigned long) obj; -+ int i; -+ -+ if (uaf_bitmap == NULL) -+ return 0; -+ -+ addr &= PAGE_MASK; -+ /* first, check is address is in UAF space */ -+ if (addr < (unsigned long) uaf_area->addr || -+ addr >= (unsigned long) uaf_area->addr + uaf_area->size) -+ return 0; -+ -+ /* calculate placement in bitmap */ -+ i = (unsigned) addr - (unsigned) uaf_area->addr; -+ i = i / PAGE_SIZE; -+ return test_bit(i, uaf_bitmap); -+} -+ -+static void *uaf_s_start(struct seq_file *m, loff_t *pos) -+{ -+ loff_t n = *pos; -+ -+ if (!n) -+ seq_printf(m, "size(pgs) allocated failed allocations. " -+ "%d reserved, %d in use, %d last\n", -+ uaf_max, uaf_used, uaf_last_found); -+ else if (n > MAX_UAF_OBJ_SIZE) -+ return NULL; -+ -+ *pos = 1; -+ return (void *) 1; -+} -+ -+static void *uaf_s_next(struct seq_file *m, void *p, loff_t *pos) -+{ -+ unsigned long n = *pos; -+ ++*pos; -+ if (n + 1 > MAX_UAF_OBJ_SIZE) -+ return NULL; -+ return (void *) (n + 1); -+} -+ -+static void uaf_s_stop(struct seq_file *m, void *p) -+{ -+} -+ -+static int uaf_s_show(struct seq_file *m, void *p) -+{ -+ int n = (int) p; -+ -+ if (n > MAX_UAF_OBJ_SIZE) -+ return 0; -+ seq_printf(m, "%d %d %d %d\n", n, -+ atomic_read(&uaf_stats[n].uaf_allocated), -+ atomic_read(&uaf_stats[n].uaf_failed), -+ atomic_read(&uaf_stats[n].uaf_allocations)); -+ return 0; -+} -+ -+struct seq_operations uafinfo_op = { -+ .start = uaf_s_start, -+ .next = uaf_s_next, -+ .stop = uaf_s_stop, -+ .show = uaf_s_show, -+}; -+ -+ssize_t uafinfo_write(struct file *file, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ char kbuf[MAX_SLABINFO_WRITE+1], *tmp; -+ char *key, *name; -+ int res; -+ struct list_head *p; -+ -+ if (count > MAX_SLABINFO_WRITE) -+ return -EINVAL; -+ if (copy_from_user(&kbuf, buffer, count)) -+ return -EFAULT; -+ kbuf[MAX_SLABINFO_WRITE] = '\0'; -+ -+ tmp = kbuf; -+ key = strsep(&tmp, " \t\n"); -+ if (!key) -+ return -EINVAL; -+ if (!strcmp(key, "on")) -+ res = 1; -+ else if (!strcmp(key, "off")) -+ res = 0; -+ else -+ return -EINVAL; -+ -+ name = strsep(&tmp, " \t\n"); -+ if (!name) -+ return -EINVAL; -+ -+ /* Find the cache in the chain of caches. */ -+ down(&cache_chain_sem); -+ list_for_each(p,&cache_chain) { -+ kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next); -+ -+ if (!strcmp(cachep->name, name)) { -+ if (res) { -+ printk("UAF: use on %s\n", cachep->name); -+ cachep->flags |= SLAB_USE_UAF; -+ } else { -+ printk("UAF: dont use on %s\n", cachep->name); -+ cachep->flags &= ~SLAB_USE_UAF; -+ } -+ break; -+ } -+ } -+ up(&cache_chain_sem); -+ return count; -+} -+#endif -+ -Index: linux-2.4.24/mm/vmalloc.c -=================================================================== ---- linux-2.4.24.orig/mm/vmalloc.c 2004-01-10 17:05:20.000000000 +0300 -+++ linux-2.4.24/mm/vmalloc.c 2004-02-06 11:17:09.000000000 +0300 -@@ -53,7 +53,7 @@ - } while (address < end); - } - --static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size) -+void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size) - { - pmd_t * pmd; - unsigned long end; -@@ -152,7 +152,7 @@ - return 0; - } - --static inline int __vmalloc_area_pages (unsigned long address, -+int __vmalloc_area_pages (unsigned long address, - unsigned long size, - int gfp_mask, - pgprot_t prot, -Index: linux-2.4.24/init/main.c -=================================================================== ---- linux-2.4.24.orig/init/main.c 2004-01-10 17:05:59.000000000 +0300 -+++ linux-2.4.24/init/main.c 2004-02-06 11:17:43.000000000 +0300 -@@ -437,6 +437,9 @@ - #if defined(CONFIG_SYSVIPC) - ipc_init(); - #endif -+#ifdef CONFIG_DEBUG_UAF -+ uaf_init(); -+#endif - rest_init(); - } - -Index: linux-2.4.24/fs/proc/proc_misc.c -=================================================================== ---- linux-2.4.24.orig/fs/proc/proc_misc.c 2004-01-10 17:05:55.000000000 +0300 -+++ linux-2.4.24/fs/proc/proc_misc.c 2004-02-06 11:35:27.000000000 +0300 -@@ -303,6 +303,22 @@ - release: seq_release, - }; - -+#ifdef CONFIG_DEBUG_UAF -+extern struct seq_operations uafinfo_op; -+extern ssize_t uafinfo_write(struct file *, const char *, size_t, loff_t *); -+static int uafinfo_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &uafinfo_op); -+} -+static struct file_operations proc_uafinfo_operations = { -+ .open = uafinfo_open, -+ .read = seq_read, -+ .write = uafinfo_write, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+#endif -+ - static int kstat_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) - { -@@ -640,6 +656,9 @@ - create_seq_entry("iomem", 0, &proc_iomem_operations); - create_seq_entry("partitions", 0, &proc_partitions_operations); - create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); -+#ifdef CONFIG_DEBUG_UAF -+ create_seq_entry("uafinfo",S_IWUSR|S_IRUGO,&proc_uafinfo_operations); -+#endif - #ifdef CONFIG_MODULES - create_seq_entry("ksyms", 0, &proc_ksyms_operations); - #endif -Index: linux-2.4.24/include/linux/slab.h -=================================================================== ---- linux-2.4.24.orig/include/linux/slab.h 2004-01-29 15:01:10.000000000 +0300 -+++ linux-2.4.24/include/linux/slab.h 2004-02-06 11:18:26.000000000 +0300 -@@ -40,6 +40,7 @@ - #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ - #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ - #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ -+#define SLAB_USE_UAF 0x00040000UL /* use UAF allocator */ - - /* flags passed to a constructor func */ - #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ -Index: linux-2.4.24/include/asm-i386/io.h -=================================================================== ---- linux-2.4.24.orig/include/asm-i386/io.h 2004-01-29 15:01:10.000000000 +0300 -+++ linux-2.4.24/include/asm-i386/io.h 2004-02-06 11:18:26.000000000 +0300 -@@ -75,6 +75,16 @@ - - static inline unsigned long virt_to_phys(volatile void * address) - { -+#ifdef CONFIG_DEBUG_UAF -+ unsigned long addr = (unsigned long) address; -+ if (vmlist && addr >= VMALLOC_START && addr < VMALLOC_END) { -+ struct page *page = vmalloc_to_page((void *) address); -+ if (page) { -+ unsigned long offset = addr & ~PAGE_MASK; -+ address = page_address(page) + offset; -+ } -+ } -+#endif - return __pa(address); - } - -Index: linux-2.4.24/include/asm-i386/page.h -=================================================================== ---- linux-2.4.24.orig/include/asm-i386/page.h 2004-01-14 02:58:46.000000000 +0300 -+++ linux-2.4.24/include/asm-i386/page.h 2004-02-06 11:17:09.000000000 +0300 -@@ -131,9 +131,49 @@ - #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) - #define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) - #define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) -+ -+#ifndef CONFIG_DEBUG_UAF - #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) - #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) - #define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) -+#else -+#define __pa(x) ({ \ -+ unsigned long __pn, __fr; \ -+ __pn = (unsigned long)(x)-PAGE_OFFSET; \ -+ __fr = __pn >> PAGE_SHIFT; \ -+ if (jiffies > HZ*3 && __fr >= max_mapnr) { \ -+ printk("invalid arg __pa(0x%x)" \ -+ " at %s:%d\n", (unsigned) (x), \ -+ __FILE__, __LINE__); \ -+ dump_stack(); \ -+ } \ -+ __pn; \ -+ }) -+ -+#define __va(x) ({ \ -+ unsigned long __pn; \ -+ __pn = (unsigned long) (x) >> PAGE_SHIFT; \ -+ if (jiffies > HZ*3 && __pn >= max_mapnr) { \ -+ printk("invalid arg __va(0x%x)" \ -+ " at %s:%d\n", (unsigned) (x), \ -+ __FILE__, __LINE__); \ -+ dump_stack(); \ -+ } \ -+ ((void *)((unsigned long)(x) + PAGE_OFFSET)); \ -+ }) -+ -+#define virt_to_page(ka) ({ \ -+ struct page *_p; \ -+ if ((unsigned long)(ka) >= VMALLOC_START) { \ -+ _p = vmalloc_to_page((void *)(ka)); \ -+ BUG_ON(!_p); \ -+ } else \ -+ _p = mem_map+(__pa(ka) >> PAGE_SHIFT); \ -+ (_p); \ -+ }) -+#endif -+ -+ - #define VALID_PAGE(page) ((page - mem_map) < max_mapnr) - - #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ -Index: linux-2.4.24/arch/i386/config.in -=================================================================== ---- linux-2.4.24.orig/arch/i386/config.in 2004-01-14 02:58:46.000000000 +0300 -+++ linux-2.4.24/arch/i386/config.in 2004-02-06 11:17:09.000000000 +0300 -@@ -508,6 +508,9 @@ - bool ' Check for stack overflows' CONFIG_DEBUG_STACKOVERFLOW - bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM - bool ' Debug memory allocations' CONFIG_DEBUG_SLAB -+ if [ "$CONFIG_DEBUG_SLAB" != "n" ]; then -+ bool ' Debug memory allocations (use-after-free via vmalloced space)' CONFIG_DEBUG_UAF -+ fi - bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT - bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ - bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK - -%diffstat - arch/i386/config.in | 3 - fs/proc/proc_misc.c | 19 + - include/asm-i386/io.h | 10 - include/asm-i386/page.h | 40 +++ - include/linux/slab.h | 1 - init/main.c | 3 - mm/slab.c | 506 +++++++++++++++++++++++++++++++++++++++++++++++- - mm/vmalloc.c | 4 - 8 files changed, 582 insertions(+), 4 deletions(-) - diff --git a/lustre/kernel_patches/patches/socket-exports-vanilla.patch b/lustre/kernel_patches/patches/socket-exports-vanilla.patch deleted file mode 100644 index 9dd5008..0000000 --- a/lustre/kernel_patches/patches/socket-exports-vanilla.patch +++ /dev/null @@ -1,42 +0,0 @@ - include/linux/socket.h | 4 ++++ - net/netsyms.c | 2 ++ - net/socket.c | 2 +- - 3 files changed, 7 insertions(+), 1 deletion(-) - ---- linux-2.4.20-l18/include/linux/socket.h~socket-exports-vanilla Fri Aug 22 15:43:58 2003 -+++ linux-2.4.20-l18-phil/include/linux/socket.h Fri Aug 22 16:26:37 2003 -@@ -260,6 +260,10 @@ extern void memcpy_tokerneliovec(struct - extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen); - extern int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr); - extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); -+struct socket; -+extern int sock_map_fd(struct socket *sock); -+extern struct socket *sockfd_lookup(int fd, int *err); -+ - #endif - #endif /* not kernel and not glibc */ - #endif /* _LINUX_SOCKET_H */ ---- linux-2.4.20-l18/net/netsyms.c~socket-exports-vanilla Fri Aug 22 15:43:58 2003 -+++ linux-2.4.20-l18-phil/net/netsyms.c Fri Aug 22 16:25:04 2003 -@@ -159,6 +159,8 @@ EXPORT_SYMBOL(datagram_poll); - EXPORT_SYMBOL(put_cmsg); - EXPORT_SYMBOL(sock_kmalloc); - EXPORT_SYMBOL(sock_kfree_s); -+EXPORT_SYMBOL(sockfd_lookup); -+EXPORT_SYMBOL(sock_map_fd); - - #ifdef CONFIG_FILTER - EXPORT_SYMBOL(sk_run_filter); ---- linux-2.4.20-l18/net/socket.c~socket-exports-vanilla Fri Aug 22 15:43:58 2003 -+++ linux-2.4.20-l18-phil/net/socket.c Fri Aug 22 16:25:04 2003 -@@ -325,7 +325,7 @@ static struct dentry_operations sockfs_d - * but we take care of internal coherence yet. - */ - --static int sock_map_fd(struct socket *sock) -+int sock_map_fd(struct socket *sock) - { - int fd; - struct qstr this; - -_ diff --git a/lustre/kernel_patches/patches/statfs64-cast-unsigned-2.4-rhel.patch b/lustre/kernel_patches/patches/statfs64-cast-unsigned-2.4-rhel.patch deleted file mode 100644 index a1063ae..0000000 --- a/lustre/kernel_patches/patches/statfs64-cast-unsigned-2.4-rhel.patch +++ /dev/null @@ -1,28 +0,0 @@ -Index: linux-2.4.21/fs/open.c -=================================================================== ---- linux-2.4.21.orig/fs/open.c 2005-05-04 16:09:11.702098704 -0400 -+++ linux-2.4.21/fs/open.c 2005-05-04 16:17:31.597103112 -0400 -@@ -92,15 +92,15 @@ - if (retval) - return retval; - /* Stuff the 32 bit values into the 64 bit struct */ -- buf->f_type = st.f_type; -- buf->f_bsize = st.f_bsize; -- buf->f_blocks = st.f_blocks; -- buf->f_bfree = st.f_bfree; -- buf->f_bavail = st.f_bavail; -- buf->f_files = st.f_files; -- buf->f_ffree = st.f_ffree; -+ buf->f_type = (unsigned long) st.f_type; -+ buf->f_bsize = (unsigned long) st.f_bsize; -+ buf->f_blocks = (unsigned long) st.f_blocks; -+ buf->f_bfree = (unsigned long) st.f_bfree; -+ buf->f_bavail = (unsigned long) st.f_bavail; -+ buf->f_files = (unsigned long) st.f_files; -+ buf->f_ffree = (unsigned long) st.f_ffree; - buf->f_fsid = st.f_fsid; -- buf->f_namelen = st.f_namelen; -+ buf->f_namelen = (unsigned long) st.f_namelen; - memset(buf->f_spare, 0, sizeof(buf->f_spare)); - } - return 0; diff --git a/lustre/kernel_patches/patches/uml-2.4.20-do_mmap_pgoff-fix.patch b/lustre/kernel_patches/patches/uml-2.4.20-do_mmap_pgoff-fix.patch deleted file mode 100644 index 844d735..0000000 --- a/lustre/kernel_patches/patches/uml-2.4.20-do_mmap_pgoff-fix.patch +++ /dev/null @@ -1,16 +0,0 @@ - arch/i386/kernel/sys_i386.c | 2 +- - 1 files changed, 1 insertion(+), 1 deletion(-) - ---- linux-2.4.20-vanilla/arch/i386/kernel/sys_i386.c~uml-2.4.20-do_mmap_pgoff-fix 2001-03-19 23:35:09.000000000 +0300 -+++ linux-2.4.20-vanilla-alexey/arch/i386/kernel/sys_i386.c 2003-09-15 10:26:19.000000000 +0400 -@@ -56,7 +56,7 @@ static inline long do_mmap2( - } - - down_write(¤t->mm->mmap_sem); -- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); -+ error = do_mmap_pgoff(current->mm, file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - -_ diff --git a/lustre/kernel_patches/patches/uml-2.6.10-fc3.patch b/lustre/kernel_patches/patches/uml-2.6.10-fc3.patch index aff5601..625b21c 100644 --- a/lustre/kernel_patches/patches/uml-2.6.10-fc3.patch +++ b/lustre/kernel_patches/patches/uml-2.6.10-fc3.patch @@ -511,7 +511,7 @@ Index: linux-2.6.10/arch/um/os-Linux/elf_aux.c + elf_aux_hwcap = auxv->a_un.a_val; + break; + case AT_PLATFORM: -+ elf_aux_platform = auxv->a_un.a_ptr; ++ elf_aux_platform = auxv->a_un.a_val; + break; + case AT_PAGESZ: + page_size = auxv->a_un.a_val; diff --git a/lustre/kernel_patches/patches/uml-patch-2.4.24-1.patch b/lustre/kernel_patches/patches/uml-patch-2.4.24-1.patch deleted file mode 100644 index e74862c..0000000 --- a/lustre/kernel_patches/patches/uml-patch-2.4.24-1.patch +++ /dev/null @@ -1,41972 +0,0 @@ -diff -Naur -X ../exclude-files orig/arch/um/common.ld.in um/arch/um/common.ld.in ---- orig/arch/um/common.ld.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/common.ld.in 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,53 @@ -+ .kstrtab : { *(.kstrtab) } -+ -+ . = ALIGN(16); /* Exception table */ -+ __start___ex_table = .; -+ __ex_table : { *(__ex_table) } -+ __stop___ex_table = .; -+ -+ __start___ksymtab = .; /* Kernel symbol table */ -+ __ksymtab : { *(__ksymtab) } -+ __stop___ksymtab = .; -+ -+ .unprotected : { *(.unprotected) } -+ . = ALIGN(4096); -+ PROVIDE (_unprotected_end = .); -+ -+ . = ALIGN(4096); -+ __uml_setup_start = .; -+ .uml.setup.init : { *(.uml.setup.init) } -+ __uml_setup_end = .; -+ __uml_help_start = .; -+ .uml.help.init : { *(.uml.help.init) } -+ __uml_help_end = .; -+ __uml_postsetup_start = .; -+ .uml.postsetup.init : { *(.uml.postsetup.init) } -+ __uml_postsetup_end = .; -+ __setup_start = .; -+ .setup.init : { *(.setup.init) } -+ __setup_end = .; -+ __initcall_start = .; -+ .initcall.init : { *(.initcall.init) } -+ __initcall_end = .; -+ __uml_initcall_start = .; -+ .uml.initcall.init : { *(.uml.initcall.init) } -+ __uml_initcall_end = .; -+ __init_end = .; -+ __exitcall_begin = .; -+ .exitcall : { *(.exitcall.exit) } -+ __exitcall_end = .; -+ __uml_exitcall_begin = .; -+ .uml.exitcall : { *(.uml.exitcall.exit) } -+ __uml_exitcall_end = .; -+ -+ __preinit_array_start = .; -+ .preinit_array : { *(.preinit_array) } -+ __preinit_array_end = .; -+ __init_array_start = .; -+ .init_array : { *(.init_array) } -+ __init_array_end = .; -+ __fini_array_start = .; -+ .fini_array : { *(.fini_array) } -+ __fini_array_end = .; -+ -+ .data.init : { *(.data.init) } -diff -Naur -X ../exclude-files orig/arch/um/config_block.in um/arch/um/config_block.in ---- orig/arch/um/config_block.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/config_block.in 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,22 @@ -+mainmenu_option next_comment -+comment 'Block Devices' -+ -+bool 'Virtual block device' CONFIG_BLK_DEV_UBD -+dep_bool ' Always do synchronous disk IO for UBD' CONFIG_BLK_DEV_UBD_SYNC $CONFIG_BLK_DEV_UBD -+bool 'COW device' CONFIG_COW -+ -+if [ "$CONFIG_BLK_DEV_UBD" = "y" -o "$CONFIG_COW" = "y" ] ; then -+ define_bool CONFIG_COW_COMMON y -+fi -+ -+tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP -+dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET -+tristate 'RAM disk support' CONFIG_BLK_DEV_RAM -+if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then -+ int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 -+fi -+dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM -+ -+tristate 'Example IO memory driver' CONFIG_MMAPPER -+ -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config_char.in um/arch/um/config_char.in ---- orig/arch/um/config_char.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/config_char.in 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,37 @@ -+mainmenu_option next_comment -+comment 'Character Devices' -+ -+define_bool CONFIG_STDIO_CONSOLE y -+ -+bool 'Virtual serial line' CONFIG_SSL -+ -+bool 'file descriptor channel support' CONFIG_FD_CHAN -+bool 'null channel support' CONFIG_NULL_CHAN -+bool 'port channel support' CONFIG_PORT_CHAN -+bool 'pty channel support' CONFIG_PTY_CHAN -+bool 'tty channel support' CONFIG_TTY_CHAN -+bool 'xterm channel support' CONFIG_XTERM_CHAN -+string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \ -+ "fd:0,fd:1" -+string 'Default console channel initialization' CONFIG_CON_CHAN "xterm" -+string 'Default serial line channel initialization' CONFIG_SSL_CHAN "pty" -+ -+ -+bool 'Unix98 PTY support' CONFIG_UNIX98_PTYS -+if [ "$CONFIG_UNIX98_PTYS" = "y" ]; then -+ int 'Maximum number of Unix98 PTYs in use (0-2048)' CONFIG_UNIX98_PTY_COUNT 256 -+fi -+ -+bool 'Watchdog Timer Support' CONFIG_WATCHDOG -+dep_bool ' Disable watchdog shutdown on close' CONFIG_WATCHDOG_NOWAYOUT \ -+ $CONFIG_WATCHDOG -+dep_tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG $CONFIG_WATCHDOG -+dep_tristate ' UML watchdog' CONFIG_UML_WATCHDOG $CONFIG_WATCHDOG -+ -+tristate 'Sound support' CONFIG_UML_SOUND -+define_tristate CONFIG_SOUND $CONFIG_UML_SOUND -+define_tristate CONFIG_HOSTAUDIO $CONFIG_UML_SOUND -+ -+bool 'Enable tty logging' CONFIG_TTY_LOG -+ -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config.in um/arch/um/config.in ---- orig/arch/um/config.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/config.in 2003-12-14 11:34:02.000000000 -0500 -@@ -0,0 +1,105 @@ -+define_bool CONFIG_USERMODE y -+ -+mainmenu_name "Linux/Usermode Kernel Configuration" -+ -+define_bool CONFIG_ISA n -+define_bool CONFIG_SBUS n -+define_bool CONFIG_PCI n -+ -+define_bool CONFIG_UID16 y -+ -+define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y -+ -+mainmenu_option next_comment -+comment 'Code maturity level options' -+bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -+endmenu -+ -+mainmenu_option next_comment -+comment 'General Setup' -+ -+bool 'Separate kernel address space support' CONFIG_MODE_SKAS -+ -+# This is to ensure that at least one of the modes is enabled. When neither -+# is present in defconfig, they default to N, which is bad. -+if [ "$CONFIG_MODE_SKAS" != "y" ]; then -+ define_bool CONFIG_MODE_TT y -+fi -+ -+bool 'Tracing thread support' CONFIG_MODE_TT -+if [ "$CONFIG_MODE_TT" != "y" ]; then -+ bool 'Statically linked binary when CONFIG_MODE_TT is disabled' CONFIG_STATIC_LINK -+fi -+bool 'Networking support' CONFIG_NET -+bool 'System V IPC' CONFIG_SYSVIPC -+bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -+bool 'Sysctl support' CONFIG_SYSCTL -+tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -+tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF -+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC -+tristate 'Host filesystem' CONFIG_HOSTFS -+tristate 'Honeypot proc filesystem' CONFIG_HPPFS -+bool 'Management console' CONFIG_MCONSOLE -+dep_bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ $CONFIG_MCONSOLE -+bool '2G/2G host address space split' CONFIG_HOST_2G_2G -+bool 'Symmetric multi-processing support' CONFIG_UML_SMP -+define_bool CONFIG_SMP $CONFIG_UML_SMP -+int 'Nesting level' CONFIG_NEST_LEVEL 0 -+int 'Kernel address space size (in .5G units)' CONFIG_KERNEL_HALF_GIGS 1 -+bool 'Highmem support' CONFIG_HIGHMEM -+bool '/proc/mm' CONFIG_PROC_MM -+int 'Kernel stack size order' CONFIG_KERNEL_STACK_ORDER 2 -+bool 'Real-time Clock' CONFIG_UML_REAL_TIME_CLOCK -+endmenu -+ -+mainmenu_option next_comment -+comment 'Loadable module support' -+bool 'Enable loadable module support' CONFIG_MODULES -+if [ "$CONFIG_MODULES" = "y" ]; then -+# MODVERSIONS does not yet work in this architecture -+# bool ' Set version information on all module symbols' CONFIG_MODVERSIONS -+ bool ' Kernel module loader' CONFIG_KMOD -+fi -+endmenu -+ -+source arch/um/config_char.in -+ -+source arch/um/config_block.in -+ -+define_bool CONFIG_NETDEVICES $CONFIG_NET -+ -+if [ "$CONFIG_NET" = "y" ]; then -+ source arch/um/config_net.in -+ source net/Config.in -+fi -+ -+source fs/Config.in -+ -+mainmenu_option next_comment -+comment 'SCSI support' -+ -+tristate 'SCSI support' CONFIG_SCSI -+ -+if [ "$CONFIG_SCSI" != "n" ]; then -+ source arch/um/config_scsi.in -+fi -+endmenu -+ -+source drivers/md/Config.in -+ -+source drivers/mtd/Config.in -+ -+source lib/Config.in -+ -+mainmenu_option next_comment -+comment 'Kernel hacking' -+bool 'Debug memory allocations' CONFIG_DEBUG_SLAB -+bool 'Enable kernel debugging symbols' CONFIG_DEBUGSYM -+if [ "$CONFIG_XTERM_CHAN" = "y" ]; then -+ dep_bool 'Enable ptrace proxy' CONFIG_PT_PROXY $CONFIG_DEBUGSYM -+else -+ define_bool CONFIG_PT_PROXY n -+fi -+dep_bool 'Enable gprof support' CONFIG_GPROF $CONFIG_DEBUGSYM -+dep_bool 'Enable gcov support' CONFIG_GCOV $CONFIG_DEBUGSYM -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config_net.in um/arch/um/config_net.in ---- orig/arch/um/config_net.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/config_net.in 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,47 @@ -+mainmenu_option next_comment -+comment 'Network Devices' -+ -+# UML virtual driver -+bool 'Virtual network device' CONFIG_UML_NET -+ -+dep_bool ' Ethertap transport' CONFIG_UML_NET_ETHERTAP $CONFIG_UML_NET -+dep_bool ' TUN/TAP transport' CONFIG_UML_NET_TUNTAP $CONFIG_UML_NET -+dep_bool ' SLIP transport' CONFIG_UML_NET_SLIP $CONFIG_UML_NET -+dep_bool ' SLiRP transport' CONFIG_UML_NET_SLIRP $CONFIG_UML_NET -+dep_bool ' Daemon transport' CONFIG_UML_NET_DAEMON $CONFIG_UML_NET -+dep_bool ' Multicast transport' CONFIG_UML_NET_MCAST $CONFIG_UML_NET -+dep_bool ' pcap transport' CONFIG_UML_NET_PCAP $CONFIG_UML_NET -+ -+# Below are hardware-independent drivers mirrored from -+# drivers/net/Config.in. It would be nice if Linux -+# had HW independent drivers separated from the other -+# but it does not. Until then each non-ISA/PCI arch -+# needs to provide it's own menu of network drivers -+ -+tristate 'Dummy net driver support' CONFIG_DUMMY -+tristate 'Bonding driver support' CONFIG_BONDING -+tristate 'EQL (serial line load balancing) support' CONFIG_EQUALIZER -+tristate 'Universal TUN/TAP device driver support' CONFIG_TUN -+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ if [ "$CONFIG_NETLINK" = "y" ]; then -+ tristate 'Ethertap network tap (OBSOLETE)' CONFIG_ETHERTAP -+ fi -+fi -+ -+tristate 'PPP (point-to-point protocol) support' CONFIG_PPP -+if [ ! "$CONFIG_PPP" = "n" ]; then -+ dep_bool ' PPP multilink support (EXPERIMENTAL)' CONFIG_PPP_MULTILINK $CONFIG_EXPERIMENTAL -+ dep_bool ' PPP filtering' CONFIG_PPP_FILTER $CONFIG_FILTER -+ dep_tristate ' PPP support for async serial ports' CONFIG_PPP_ASYNC $CONFIG_PPP -+ dep_tristate ' PPP support for sync tty ports' CONFIG_PPP_SYNC_TTY $CONFIG_PPP -+ dep_tristate ' PPP Deflate compression' CONFIG_PPP_DEFLATE $CONFIG_PPP -+ dep_tristate ' PPP BSD-Compress compression' CONFIG_PPP_BSDCOMP $CONFIG_PPP -+ dep_tristate ' PPP over Ethernet (EXPERIMENTAL)' CONFIG_PPPOE $CONFIG_PPP $CONFIG_EXPERIMENTAL -+fi -+ -+tristate 'SLIP (serial line) support' CONFIG_SLIP -+dep_bool ' CSLIP compressed headers' CONFIG_SLIP_COMPRESSED $CONFIG_SLIP -+dep_bool ' Keepalive and linefill' CONFIG_SLIP_SMART $CONFIG_SLIP -+dep_bool ' Six bit SLIP encapsulation' CONFIG_SLIP_MODE_SLIP6 $CONFIG_SLIP -+ -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config.release um/arch/um/config.release ---- orig/arch/um/config.release 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/config.release 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,302 @@ -+# -+# Automatically generated make config: don't edit -+# -+CONFIG_USERMODE=y -+# CONFIG_ISA is not set -+# CONFIG_SBUS is not set -+# CONFIG_PCI is not set -+CONFIG_UID16=y -+CONFIG_RWSEM_XCHGADD_ALGORITHM=y -+ -+# -+# Code maturity level options -+# -+CONFIG_EXPERIMENTAL=y -+ -+# -+# General Setup -+# -+CONFIG_NET=y -+CONFIG_SYSVIPC=y -+CONFIG_BSD_PROCESS_ACCT=y -+CONFIG_SYSCTL=y -+CONFIG_BINFMT_AOUT=y -+CONFIG_BINFMT_ELF=y -+CONFIG_BINFMT_MISC=y -+CONFIG_HOSTFS=y -+# CONFIG_HPPFS is not set -+CONFIG_MCONSOLE=y -+CONFIG_MAGIC_SYSRQ=y -+# CONFIG_HOST_2G_2G is not set -+# CONFIG_UML_SMP is not set -+# CONFIG_SMP is not set -+CONFIG_NEST_LEVEL=0 -+CONFIG_KERNEL_HALF_GIGS=1 -+ -+# -+# Loadable module support -+# -+CONFIG_MODULES=y -+CONFIG_KMOD=y -+ -+# -+# Character Devices -+# -+CONFIG_STDIO_CONSOLE=y -+CONFIG_SSL=y -+CONFIG_FD_CHAN=y -+# CONFIG_NULL_CHAN is not set -+CONFIG_PORT_CHAN=y -+CONFIG_PTY_CHAN=y -+CONFIG_TTY_CHAN=y -+CONFIG_XTERM_CHAN=y -+CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -+CONFIG_CON_CHAN="xterm" -+CONFIG_SSL_CHAN="pty" -+CONFIG_UNIX98_PTYS=y -+CONFIG_UNIX98_PTY_COUNT=256 -+# CONFIG_WATCHDOG is not set -+CONFIG_UML_SOUND=y -+CONFIG_SOUND=y -+CONFIG_HOSTAUDIO=y -+# CONFIG_TTY_LOG is not set -+ -+# -+# Block Devices -+# -+CONFIG_BLK_DEV_UBD=y -+# CONFIG_BLK_DEV_UBD_SYNC is not set -+CONFIG_BLK_DEV_LOOP=y -+CONFIG_BLK_DEV_NBD=y -+CONFIG_BLK_DEV_RAM=y -+CONFIG_BLK_DEV_RAM_SIZE=4096 -+CONFIG_BLK_DEV_INITRD=y -+# CONFIG_MMAPPER is not set -+CONFIG_NETDEVICES=y -+ -+# -+# Network Devices -+# -+CONFIG_UML_NET=y -+CONFIG_UML_NET_ETHERTAP=y -+CONFIG_UML_NET_TUNTAP=y -+CONFIG_UML_NET_SLIP=y -+CONFIG_UML_NET_DAEMON=y -+CONFIG_UML_NET_MCAST=y -+CONFIG_DUMMY=y -+CONFIG_BONDING=m -+CONFIG_EQUALIZER=m -+CONFIG_TUN=y -+CONFIG_PPP=m -+CONFIG_PPP_MULTILINK=y -+# CONFIG_PPP_ASYNC is not set -+CONFIG_PPP_SYNC_TTY=m -+CONFIG_PPP_DEFLATE=m -+CONFIG_PPP_BSDCOMP=m -+CONFIG_PPPOE=m -+CONFIG_SLIP=m -+ -+# -+# Networking options -+# -+CONFIG_PACKET=y -+CONFIG_PACKET_MMAP=y -+# CONFIG_NETLINK_DEV is not set -+# CONFIG_NETFILTER is not set -+# CONFIG_FILTER is not set -+CONFIG_UNIX=y -+CONFIG_INET=y -+# CONFIG_IP_MULTICAST is not set -+# CONFIG_IP_ADVANCED_ROUTER is not set -+# CONFIG_IP_PNP is not set -+# CONFIG_NET_IPIP is not set -+# CONFIG_NET_IPGRE is not set -+# CONFIG_ARPD is not set -+# CONFIG_INET_ECN is not set -+# CONFIG_SYN_COOKIES is not set -+# CONFIG_IPV6 is not set -+# CONFIG_KHTTPD is not set -+# CONFIG_ATM is not set -+# CONFIG_VLAN_8021Q is not set -+ -+# -+# -+# -+# CONFIG_IPX is not set -+# CONFIG_ATALK is not set -+ -+# -+# Appletalk devices -+# -+# CONFIG_DECNET is not set -+# CONFIG_BRIDGE is not set -+# CONFIG_X25 is not set -+# CONFIG_LAPB is not set -+# CONFIG_LLC is not set -+# CONFIG_NET_DIVERT is not set -+# CONFIG_ECONET is not set -+# CONFIG_WAN_ROUTER is not set -+# CONFIG_NET_FASTROUTE is not set -+# CONFIG_NET_HW_FLOWCONTROL is not set -+ -+# -+# QoS and/or fair queueing -+# -+# CONFIG_NET_SCHED is not set -+ -+# -+# Network testing -+# -+# CONFIG_NET_PKTGEN is not set -+ -+# -+# File systems -+# -+CONFIG_QUOTA=y -+CONFIG_AUTOFS_FS=m -+CONFIG_AUTOFS4_FS=m -+CONFIG_REISERFS_FS=m -+# CONFIG_REISERFS_CHECK is not set -+# CONFIG_REISERFS_PROC_INFO is not set -+CONFIG_ADFS_FS=m -+# CONFIG_ADFS_FS_RW is not set -+CONFIG_AFFS_FS=m -+CONFIG_HFS_FS=m -+CONFIG_BFS_FS=m -+CONFIG_EXT3_FS=y -+CONFIG_JBD=y -+# CONFIG_JBD_DEBUG is not set -+CONFIG_FAT_FS=y -+CONFIG_MSDOS_FS=y -+CONFIG_UMSDOS_FS=y -+CONFIG_VFAT_FS=y -+CONFIG_EFS_FS=m -+CONFIG_CRAMFS=m -+CONFIG_TMPFS=y -+CONFIG_RAMFS=y -+CONFIG_ISO9660_FS=y -+# CONFIG_JOLIET is not set -+# CONFIG_ZISOFS is not set -+CONFIG_MINIX_FS=m -+CONFIG_VXFS_FS=m -+# CONFIG_NTFS_FS is not set -+CONFIG_HPFS_FS=m -+CONFIG_PROC_FS=y -+CONFIG_DEVFS_FS=y -+CONFIG_DEVFS_MOUNT=y -+# CONFIG_DEVFS_DEBUG is not set -+CONFIG_DEVPTS_FS=y -+CONFIG_QNX4FS_FS=m -+# CONFIG_QNX4FS_RW is not set -+CONFIG_ROMFS_FS=m -+CONFIG_EXT2_FS=y -+CONFIG_SYSV_FS=m -+CONFIG_UDF_FS=m -+# CONFIG_UDF_RW is not set -+CONFIG_UFS_FS=m -+# CONFIG_UFS_FS_WRITE is not set -+ -+# -+# Network File Systems -+# -+# CONFIG_CODA_FS is not set -+# CONFIG_INTERMEZZO_FS is not set -+CONFIG_NFS_FS=y -+CONFIG_NFS_V3=y -+CONFIG_NFSD=y -+CONFIG_NFSD_V3=y -+CONFIG_SUNRPC=y -+CONFIG_LOCKD=y -+CONFIG_LOCKD_V4=y -+# CONFIG_SMB_FS is not set -+# CONFIG_NCP_FS is not set -+# CONFIG_ZISOFS_FS is not set -+CONFIG_ZLIB_FS_INFLATE=m -+ -+# -+# Partition Types -+# -+# CONFIG_PARTITION_ADVANCED is not set -+CONFIG_MSDOS_PARTITION=y -+# CONFIG_SMB_NLS is not set -+CONFIG_NLS=y -+ -+# -+# Native Language Support -+# -+CONFIG_NLS_DEFAULT="iso8859-1" -+# CONFIG_NLS_CODEPAGE_437 is not set -+# CONFIG_NLS_CODEPAGE_737 is not set -+# CONFIG_NLS_CODEPAGE_775 is not set -+# CONFIG_NLS_CODEPAGE_850 is not set -+# CONFIG_NLS_CODEPAGE_852 is not set -+# CONFIG_NLS_CODEPAGE_855 is not set -+# CONFIG_NLS_CODEPAGE_857 is not set -+# CONFIG_NLS_CODEPAGE_860 is not set -+# CONFIG_NLS_CODEPAGE_861 is not set -+# CONFIG_NLS_CODEPAGE_862 is not set -+# CONFIG_NLS_CODEPAGE_863 is not set -+# CONFIG_NLS_CODEPAGE_864 is not set -+# CONFIG_NLS_CODEPAGE_865 is not set -+# CONFIG_NLS_CODEPAGE_866 is not set -+# CONFIG_NLS_CODEPAGE_869 is not set -+# CONFIG_NLS_CODEPAGE_936 is not set -+# CONFIG_NLS_CODEPAGE_950 is not set -+# CONFIG_NLS_CODEPAGE_932 is not set -+# CONFIG_NLS_CODEPAGE_949 is not set -+# CONFIG_NLS_CODEPAGE_874 is not set -+# CONFIG_NLS_ISO8859_8 is not set -+# CONFIG_NLS_CODEPAGE_1250 is not set -+# CONFIG_NLS_CODEPAGE_1251 is not set -+# CONFIG_NLS_ISO8859_1 is not set -+# CONFIG_NLS_ISO8859_2 is not set -+# CONFIG_NLS_ISO8859_3 is not set -+# CONFIG_NLS_ISO8859_4 is not set -+# CONFIG_NLS_ISO8859_5 is not set -+# CONFIG_NLS_ISO8859_6 is not set -+# CONFIG_NLS_ISO8859_7 is not set -+# CONFIG_NLS_ISO8859_9 is not set -+# CONFIG_NLS_ISO8859_13 is not set -+# CONFIG_NLS_ISO8859_14 is not set -+# CONFIG_NLS_ISO8859_15 is not set -+# CONFIG_NLS_KOI8_R is not set -+# CONFIG_NLS_KOI8_U is not set -+# CONFIG_NLS_UTF8 is not set -+ -+# -+# SCSI support -+# -+CONFIG_SCSI=y -+ -+# -+# SCSI support type (disk, tape, CD-ROM) -+# -+# CONFIG_BLK_DEV_SD is not set -+# CONFIG_CHR_DEV_ST is not set -+# CONFIG_BLK_DEV_SR is not set -+# CONFIG_CHR_DEV_SG is not set -+ -+# -+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -+# -+# CONFIG_SCSI_DEBUG_QUEUES is not set -+# CONFIG_SCSI_MULTI_LUN is not set -+# CONFIG_SCSI_CONSTANTS is not set -+# CONFIG_SCSI_LOGGING is not set -+CONFIG_SCSI_DEBUG=m -+ -+# -+# Multi-device support (RAID and LVM) -+# -+# CONFIG_MD is not set -+ -+# -+# Memory Technology Devices (MTD) -+# -+# CONFIG_MTD is not set -+ -+# -+# Kernel hacking -+# -+# CONFIG_DEBUG_SLAB is not set -+# CONFIG_DEBUGSYM is not set -diff -Naur -X ../exclude-files orig/arch/um/config_scsi.in um/arch/um/config_scsi.in ---- orig/arch/um/config_scsi.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/config_scsi.in 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,30 @@ -+comment 'SCSI support type (disk, tape, CD-ROM)' -+ -+dep_tristate ' SCSI disk support' CONFIG_BLK_DEV_SD $CONFIG_SCSI -+ -+if [ "$CONFIG_BLK_DEV_SD" != "n" ]; then -+ int 'Maximum number of SCSI disks that can be loaded as modules' CONFIG_SD_EXTRA_DEVS 40 -+fi -+ -+dep_tristate ' SCSI tape support' CONFIG_CHR_DEV_ST $CONFIG_SCSI -+ -+dep_tristate ' SCSI CD-ROM support' CONFIG_BLK_DEV_SR $CONFIG_SCSI -+ -+if [ "$CONFIG_BLK_DEV_SR" != "n" ]; then -+ bool ' Enable vendor-specific extensions (for SCSI CDROM)' CONFIG_BLK_DEV_SR_VENDOR -+ int 'Maximum number of CDROM devices that can be loaded as modules' CONFIG_SR_EXTRA_DEVS 2 -+fi -+dep_tristate ' SCSI generic support' CONFIG_CHR_DEV_SG $CONFIG_SCSI -+ -+comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -+ -+#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -+#fi -+ -+bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN -+ -+bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS -+bool ' SCSI logging facility' CONFIG_SCSI_LOGGING -+ -+dep_tristate 'SCSI debugging host simulator (EXPERIMENTAL)' CONFIG_SCSI_DEBUG $CONFIG_SCSI -diff -Naur -X ../exclude-files orig/arch/um/defconfig um/arch/um/defconfig ---- orig/arch/um/defconfig 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/defconfig 2003-12-17 02:15:39.000000000 -0500 -@@ -0,0 +1,423 @@ -+# -+# Automatically generated make config: don't edit -+# -+CONFIG_USERMODE=y -+# CONFIG_ISA is not set -+# CONFIG_SBUS is not set -+# CONFIG_PCI is not set -+CONFIG_UID16=y -+CONFIG_RWSEM_XCHGADD_ALGORITHM=y -+ -+# -+# Code maturity level options -+# -+CONFIG_EXPERIMENTAL=y -+ -+# -+# General Setup -+# -+CONFIG_MODE_SKAS=y -+CONFIG_MODE_TT=y -+CONFIG_NET=y -+CONFIG_SYSVIPC=y -+CONFIG_BSD_PROCESS_ACCT=y -+CONFIG_SYSCTL=y -+CONFIG_BINFMT_AOUT=y -+CONFIG_BINFMT_ELF=y -+CONFIG_BINFMT_MISC=y -+CONFIG_HOSTFS=y -+CONFIG_HPPFS=y -+CONFIG_MCONSOLE=y -+CONFIG_MAGIC_SYSRQ=y -+# CONFIG_HOST_2G_2G is not set -+# CONFIG_UML_SMP is not set -+# CONFIG_SMP is not set -+CONFIG_NEST_LEVEL=0 -+CONFIG_KERNEL_HALF_GIGS=1 -+# CONFIG_HIGHMEM is not set -+CONFIG_PROC_MM=y -+CONFIG_KERNEL_STACK_ORDER=2 -+CONFIG_UML_REAL_TIME_CLOCK=y -+ -+# -+# Loadable module support -+# -+CONFIG_MODULES=y -+# CONFIG_KMOD is not set -+ -+# -+# Character Devices -+# -+CONFIG_STDIO_CONSOLE=y -+CONFIG_SSL=y -+CONFIG_FD_CHAN=y -+CONFIG_NULL_CHAN=y -+CONFIG_PORT_CHAN=y -+CONFIG_PTY_CHAN=y -+CONFIG_TTY_CHAN=y -+CONFIG_XTERM_CHAN=y -+CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -+CONFIG_CON_CHAN="xterm" -+CONFIG_SSL_CHAN="pty" -+CONFIG_UNIX98_PTYS=y -+CONFIG_UNIX98_PTY_COUNT=256 -+# CONFIG_WATCHDOG is not set -+# CONFIG_WATCHDOG_NOWAYOUT is not set -+# CONFIG_SOFT_WATCHDOG is not set -+# CONFIG_UML_WATCHDOG is not set -+CONFIG_UML_SOUND=y -+CONFIG_SOUND=y -+CONFIG_HOSTAUDIO=y -+# CONFIG_TTY_LOG is not set -+ -+# -+# Block Devices -+# -+CONFIG_BLK_DEV_UBD=y -+# CONFIG_BLK_DEV_UBD_SYNC is not set -+# CONFIG_COW is not set -+CONFIG_COW_COMMON=y -+CONFIG_BLK_DEV_LOOP=y -+CONFIG_BLK_DEV_NBD=y -+CONFIG_BLK_DEV_RAM=y -+CONFIG_BLK_DEV_RAM_SIZE=4096 -+CONFIG_BLK_DEV_INITRD=y -+# CONFIG_MMAPPER is not set -+CONFIG_NETDEVICES=y -+ -+# -+# Network Devices -+# -+CONFIG_UML_NET=y -+CONFIG_UML_NET_ETHERTAP=y -+CONFIG_UML_NET_TUNTAP=y -+CONFIG_UML_NET_SLIP=y -+CONFIG_UML_NET_SLIRP=y -+CONFIG_UML_NET_DAEMON=y -+CONFIG_UML_NET_MCAST=y -+# CONFIG_UML_NET_PCAP is not set -+CONFIG_DUMMY=y -+# CONFIG_BONDING is not set -+# CONFIG_EQUALIZER is not set -+CONFIG_TUN=y -+CONFIG_PPP=y -+# CONFIG_PPP_MULTILINK is not set -+# CONFIG_PPP_FILTER is not set -+# CONFIG_PPP_ASYNC is not set -+# CONFIG_PPP_SYNC_TTY is not set -+# CONFIG_PPP_DEFLATE is not set -+# CONFIG_PPP_BSDCOMP is not set -+# CONFIG_PPPOE is not set -+CONFIG_SLIP=y -+# CONFIG_SLIP_COMPRESSED is not set -+# CONFIG_SLIP_SMART is not set -+# CONFIG_SLIP_MODE_SLIP6 is not set -+ -+# -+# Networking options -+# -+CONFIG_PACKET=y -+CONFIG_PACKET_MMAP=y -+# CONFIG_NETLINK_DEV is not set -+# CONFIG_NETFILTER is not set -+# CONFIG_FILTER is not set -+CONFIG_UNIX=y -+CONFIG_INET=y -+# CONFIG_IP_MULTICAST is not set -+# CONFIG_IP_ADVANCED_ROUTER is not set -+# CONFIG_IP_PNP is not set -+# CONFIG_NET_IPIP is not set -+# CONFIG_NET_IPGRE is not set -+# CONFIG_ARPD is not set -+# CONFIG_INET_ECN is not set -+# CONFIG_SYN_COOKIES is not set -+# CONFIG_IPV6 is not set -+# CONFIG_KHTTPD is not set -+ -+# -+# SCTP Configuration (EXPERIMENTAL) -+# -+CONFIG_IPV6_SCTP__=y -+# CONFIG_IP_SCTP is not set -+# CONFIG_ATM is not set -+# CONFIG_VLAN_8021Q is not set -+ -+# -+# -+# -+# CONFIG_IPX is not set -+# CONFIG_ATALK is not set -+ -+# -+# Appletalk devices -+# -+# CONFIG_DEV_APPLETALK is not set -+# CONFIG_DECNET is not set -+# CONFIG_BRIDGE is not set -+# CONFIG_X25 is not set -+# CONFIG_LAPB is not set -+# CONFIG_LLC is not set -+# CONFIG_NET_DIVERT is not set -+# CONFIG_ECONET is not set -+# CONFIG_WAN_ROUTER is not set -+# CONFIG_NET_FASTROUTE is not set -+# CONFIG_NET_HW_FLOWCONTROL is not set -+ -+# -+# QoS and/or fair queueing -+# -+# CONFIG_NET_SCHED is not set -+ -+# -+# Network testing -+# -+# CONFIG_NET_PKTGEN is not set -+ -+# -+# File systems -+# -+CONFIG_QUOTA=y -+# CONFIG_QFMT_V2 is not set -+CONFIG_AUTOFS_FS=y -+CONFIG_AUTOFS4_FS=y -+CONFIG_REISERFS_FS=y -+# CONFIG_REISERFS_CHECK is not set -+# CONFIG_REISERFS_PROC_INFO is not set -+# CONFIG_ADFS_FS is not set -+# CONFIG_ADFS_FS_RW is not set -+# CONFIG_AFFS_FS is not set -+# CONFIG_HFS_FS is not set -+# CONFIG_HFSPLUS_FS is not set -+# CONFIG_BEFS_FS is not set -+# CONFIG_BEFS_DEBUG is not set -+# CONFIG_BFS_FS is not set -+# CONFIG_EXT3_FS is not set -+# CONFIG_JBD is not set -+# CONFIG_JBD_DEBUG is not set -+CONFIG_FAT_FS=y -+CONFIG_MSDOS_FS=y -+CONFIG_UMSDOS_FS=y -+CONFIG_VFAT_FS=y -+# CONFIG_EFS_FS is not set -+CONFIG_JFFS_FS=y -+CONFIG_JFFS_FS_VERBOSE=0 -+CONFIG_JFFS_PROC_FS=y -+CONFIG_JFFS2_FS=y -+CONFIG_JFFS2_FS_DEBUG=0 -+# CONFIG_CRAMFS is not set -+# CONFIG_TMPFS is not set -+CONFIG_RAMFS=y -+CONFIG_ISO9660_FS=y -+# CONFIG_JOLIET is not set -+# CONFIG_ZISOFS is not set -+# CONFIG_JFS_FS is not set -+# CONFIG_JFS_DEBUG is not set -+# CONFIG_JFS_STATISTICS is not set -+CONFIG_MINIX_FS=y -+# CONFIG_VXFS_FS is not set -+# CONFIG_NTFS_FS is not set -+# CONFIG_NTFS_RW is not set -+# CONFIG_HPFS_FS is not set -+CONFIG_PROC_FS=y -+CONFIG_DEVFS_FS=y -+CONFIG_DEVFS_MOUNT=y -+# CONFIG_DEVFS_DEBUG is not set -+CONFIG_DEVPTS_FS=y -+# CONFIG_QNX4FS_FS is not set -+# CONFIG_QNX4FS_RW is not set -+# CONFIG_ROMFS_FS is not set -+CONFIG_EXT2_FS=y -+# CONFIG_SYSV_FS is not set -+# CONFIG_UDF_FS is not set -+# CONFIG_UDF_RW is not set -+# CONFIG_UFS_FS is not set -+# CONFIG_UFS_FS_WRITE is not set -+ -+# -+# Network File Systems -+# -+# CONFIG_CODA_FS is not set -+# CONFIG_INTERMEZZO_FS is not set -+# CONFIG_NFS_FS is not set -+# CONFIG_NFS_V3 is not set -+# CONFIG_NFS_DIRECTIO is not set -+# CONFIG_ROOT_NFS is not set -+# CONFIG_NFSD is not set -+# CONFIG_NFSD_V3 is not set -+# CONFIG_NFSD_TCP is not set -+# CONFIG_SUNRPC is not set -+# CONFIG_LOCKD is not set -+# CONFIG_SMB_FS is not set -+# CONFIG_NCP_FS is not set -+# CONFIG_NCPFS_PACKET_SIGNING is not set -+# CONFIG_NCPFS_IOCTL_LOCKING is not set -+# CONFIG_NCPFS_STRONG is not set -+# CONFIG_NCPFS_NFS_NS is not set -+# CONFIG_NCPFS_OS2_NS is not set -+# CONFIG_NCPFS_SMALLDOS is not set -+# CONFIG_NCPFS_NLS is not set -+# CONFIG_NCPFS_EXTRAS is not set -+# CONFIG_ZISOFS_FS is not set -+ -+# -+# Partition Types -+# -+# CONFIG_PARTITION_ADVANCED is not set -+CONFIG_MSDOS_PARTITION=y -+# CONFIG_SMB_NLS is not set -+CONFIG_NLS=y -+ -+# -+# Native Language Support -+# -+CONFIG_NLS_DEFAULT="iso8859-1" -+# CONFIG_NLS_CODEPAGE_437 is not set -+# CONFIG_NLS_CODEPAGE_737 is not set -+# CONFIG_NLS_CODEPAGE_775 is not set -+# CONFIG_NLS_CODEPAGE_850 is not set -+# CONFIG_NLS_CODEPAGE_852 is not set -+# CONFIG_NLS_CODEPAGE_855 is not set -+# CONFIG_NLS_CODEPAGE_857 is not set -+# CONFIG_NLS_CODEPAGE_860 is not set -+# CONFIG_NLS_CODEPAGE_861 is not set -+# CONFIG_NLS_CODEPAGE_862 is not set -+# CONFIG_NLS_CODEPAGE_863 is not set -+# CONFIG_NLS_CODEPAGE_864 is not set -+# CONFIG_NLS_CODEPAGE_865 is not set -+# CONFIG_NLS_CODEPAGE_866 is not set -+# CONFIG_NLS_CODEPAGE_869 is not set -+# CONFIG_NLS_CODEPAGE_936 is not set -+# CONFIG_NLS_CODEPAGE_950 is not set -+# CONFIG_NLS_CODEPAGE_932 is not set -+# CONFIG_NLS_CODEPAGE_949 is not set -+# CONFIG_NLS_CODEPAGE_874 is not set -+# CONFIG_NLS_ISO8859_8 is not set -+# CONFIG_NLS_CODEPAGE_1250 is not set -+# CONFIG_NLS_CODEPAGE_1251 is not set -+# CONFIG_NLS_ISO8859_1 is not set -+# CONFIG_NLS_ISO8859_2 is not set -+# CONFIG_NLS_ISO8859_3 is not set -+# CONFIG_NLS_ISO8859_4 is not set -+# CONFIG_NLS_ISO8859_5 is not set -+# CONFIG_NLS_ISO8859_6 is not set -+# CONFIG_NLS_ISO8859_7 is not set -+# CONFIG_NLS_ISO8859_9 is not set -+# CONFIG_NLS_ISO8859_13 is not set -+# CONFIG_NLS_ISO8859_14 is not set -+# CONFIG_NLS_ISO8859_15 is not set -+# CONFIG_NLS_KOI8_R is not set -+# CONFIG_NLS_KOI8_U is not set -+# CONFIG_NLS_UTF8 is not set -+ -+# -+# SCSI support -+# -+CONFIG_SCSI=y -+ -+# -+# SCSI support type (disk, tape, CD-ROM) -+# -+# CONFIG_BLK_DEV_SD is not set -+# CONFIG_CHR_DEV_ST is not set -+# CONFIG_BLK_DEV_SR is not set -+# CONFIG_CHR_DEV_SG is not set -+ -+# -+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -+# -+# CONFIG_SCSI_DEBUG_QUEUES is not set -+# CONFIG_SCSI_MULTI_LUN is not set -+# CONFIG_SCSI_CONSTANTS is not set -+# CONFIG_SCSI_LOGGING is not set -+CONFIG_SCSI_DEBUG=y -+ -+# -+# Multi-device support (RAID and LVM) -+# -+# CONFIG_MD is not set -+# CONFIG_BLK_DEV_MD is not set -+# CONFIG_MD_LINEAR is not set -+# CONFIG_MD_RAID0 is not set -+# CONFIG_MD_RAID1 is not set -+# CONFIG_MD_RAID5 is not set -+# CONFIG_MD_MULTIPATH is not set -+# CONFIG_BLK_DEV_LVM is not set -+ -+# -+# Memory Technology Devices (MTD) -+# -+CONFIG_MTD=y -+# CONFIG_MTD_DEBUG is not set -+# CONFIG_MTD_PARTITIONS is not set -+# CONFIG_MTD_CONCAT is not set -+# CONFIG_MTD_REDBOOT_PARTS is not set -+# CONFIG_MTD_CMDLINE_PARTS is not set -+ -+# -+# User Modules And Translation Layers -+# -+CONFIG_MTD_CHAR=y -+CONFIG_MTD_BLOCK=y -+# CONFIG_FTL is not set -+# CONFIG_NFTL is not set -+ -+# -+# RAM/ROM/Flash chip drivers -+# -+# CONFIG_MTD_CFI is not set -+# CONFIG_MTD_JEDECPROBE is not set -+# CONFIG_MTD_GEN_PROBE is not set -+# CONFIG_MTD_CFI_INTELEXT is not set -+# CONFIG_MTD_CFI_AMDSTD is not set -+# CONFIG_MTD_CFI_STAA is not set -+# CONFIG_MTD_RAM is not set -+# CONFIG_MTD_ROM is not set -+# CONFIG_MTD_ABSENT is not set -+# CONFIG_MTD_OBSOLETE_CHIPS is not set -+# CONFIG_MTD_AMDSTD is not set -+# CONFIG_MTD_SHARP is not set -+# CONFIG_MTD_JEDEC is not set -+ -+# -+# Mapping drivers for chip access -+# -+# CONFIG_MTD_PHYSMAP is not set -+# CONFIG_MTD_PCI is not set -+# CONFIG_MTD_PCMCIA is not set -+ -+# -+# Self-contained MTD device drivers -+# -+# CONFIG_MTD_PMC551 is not set -+# CONFIG_MTD_SLRAM is not set -+# CONFIG_MTD_MTDRAM is not set -+CONFIG_MTD_BLKMTD=y -+ -+# -+# Disk-On-Chip Device Drivers -+# -+# CONFIG_MTD_DOC1000 is not set -+# CONFIG_MTD_DOC2000 is not set -+# CONFIG_MTD_DOC2001 is not set -+# CONFIG_MTD_DOCPROBE is not set -+ -+# -+# NAND Flash Device Drivers -+# -+# CONFIG_MTD_NAND is not set -+ -+# -+# Library routines -+# -+# CONFIG_CRC32 is not set -+CONFIG_ZLIB_INFLATE=y -+CONFIG_ZLIB_DEFLATE=y -+ -+# -+# Kernel hacking -+# -+# CONFIG_DEBUG_SLAB is not set -+CONFIG_DEBUGSYM=y -+CONFIG_PT_PROXY=y -+# CONFIG_GPROF is not set -+# CONFIG_GCOV is not set -diff -Naur -X ../exclude-files orig/arch/um/drivers/chan_kern.c um/arch/um/drivers/chan_kern.c ---- orig/arch/um/drivers/chan_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/chan_kern.c 2003-11-07 07:25:34.000000000 -0500 -@@ -0,0 +1,519 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "sigio.h" -+#include "line.h" -+ -+static void *not_configged_init(char *str, int device, struct chan_opts *opts) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(NULL); -+} -+ -+static int not_configged_open(int input, int output, int primary, void *data, -+ char **dev_out) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-ENODEV); -+} -+ -+static void not_configged_close(int fd, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+} -+ -+static int not_configged_read(int fd, char *c_out, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_write(int fd, const char *buf, int len, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_console_write(int fd, const char *buf, int len, -+ void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_window_size(int fd, void *data, unsigned short *rows, -+ unsigned short *cols) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-ENODEV); -+} -+ -+static void not_configged_free(void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+} -+ -+static struct chan_ops not_configged_ops = { -+ .init = not_configged_init, -+ .open = not_configged_open, -+ .close = not_configged_close, -+ .read = not_configged_read, -+ .write = not_configged_write, -+ .console_write = not_configged_console_write, -+ .window_size = not_configged_window_size, -+ .free = not_configged_free, -+ .winch = 0, -+}; -+ -+static void tty_receive_char(struct tty_struct *tty, char ch) -+{ -+ if(tty == NULL) return; -+ -+ if(I_IXON(tty) && !I_IXOFF(tty) && !tty->raw) { -+ if(ch == STOP_CHAR(tty)){ -+ stop_tty(tty); -+ return; -+ } -+ else if(ch == START_CHAR(tty)){ -+ start_tty(tty); -+ return; -+ } -+ } -+ -+ if((tty->flip.flag_buf_ptr == NULL) || -+ (tty->flip.char_buf_ptr == NULL)) -+ return; -+ tty_insert_flip_char(tty, ch, TTY_NORMAL); -+} -+ -+static int open_one_chan(struct chan *chan, int input, int output, int primary) -+{ -+ int fd; -+ -+ if(chan->opened) return(0); -+ if(chan->ops->open == NULL) fd = 0; -+ else fd = (*chan->ops->open)(input, output, primary, chan->data, -+ &chan->dev); -+ if(fd < 0) return(fd); -+ chan->fd = fd; -+ -+ chan->opened = 1; -+ return(0); -+} -+ -+int open_chan(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int ret, err = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ ret = open_one_chan(chan, chan->input, chan->output, -+ chan->primary); -+ if(chan->primary) err = ret; -+ } -+ return(err); -+} -+ -+void chan_enable_winch(struct list_head *chans, void *line) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary && chan->output && chan->ops->winch){ -+ register_winch(chan->fd, line); -+ return; -+ } -+ } -+} -+ -+void enable_chan(struct list_head *chans, void *data) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->opened) continue; -+ -+ line_setup_irq(chan->fd, chan->input, chan->output, data); -+ } -+} -+ -+void close_chan(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ /* Close in reverse order as open in case more than one of them -+ * refers to the same device and they save and restore that device's -+ * state. Then, the first one opened will have the original state, -+ * so it must be the last closed. -+ */ -+ for(ele = chans->prev; ele != chans; ele = ele->prev){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->opened) continue; -+ if(chan->ops->close != NULL) -+ (*chan->ops->close)(chan->fd, chan->data); -+ chan->opened = 0; -+ chan->fd = -1; -+ } -+} -+ -+int write_chan(struct list_head *chans, const char *buf, int len, -+ int write_irq) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int n, ret = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->output || (chan->ops->write == NULL)) continue; -+ n = chan->ops->write(chan->fd, buf, len, chan->data); -+ if(chan->primary){ -+ ret = n; -+ if((ret == -EAGAIN) || ((ret >= 0) && (ret < len))){ -+ reactivate_fd(chan->fd, write_irq); -+ if(ret == -EAGAIN) ret = 0; -+ } -+ } -+ } -+ return(ret); -+} -+ -+int console_write_chan(struct list_head *chans, const char *buf, int len) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int n, ret = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->output || (chan->ops->console_write == NULL)) -+ continue; -+ n = chan->ops->console_write(chan->fd, buf, len, chan->data); -+ if(chan->primary) ret = n; -+ } -+ return(ret); -+} -+ -+int chan_window_size(struct list_head *chans, unsigned short *rows_out, -+ unsigned short *cols_out) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary){ -+ if(chan->ops->window_size == NULL) return(0); -+ return(chan->ops->window_size(chan->fd, chan->data, -+ rows_out, cols_out)); -+ } -+ } -+ return(0); -+} -+ -+void free_one_chan(struct chan *chan) -+{ -+ list_del(&chan->list); -+ if(chan->ops->free != NULL) -+ (*chan->ops->free)(chan->data); -+ free_irq_by_fd(chan->fd); -+ if(chan->primary && chan->output) ignore_sigio_fd(chan->fd); -+ kfree(chan); -+} -+ -+void free_chan(struct list_head *chans) -+{ -+ struct list_head *ele, *next; -+ struct chan *chan; -+ -+ list_for_each_safe(ele, next, chans){ -+ chan = list_entry(ele, struct chan, list); -+ free_one_chan(chan); -+ } -+} -+ -+static int one_chan_config_string(struct chan *chan, char *str, int size, -+ char **error_out) -+{ -+ int n = 0; -+ -+ if(chan == NULL){ -+ CONFIG_CHUNK(str, size, n, "none", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, chan->ops->type, 0); -+ -+ if(chan->dev == NULL){ -+ CONFIG_CHUNK(str, size, n, "", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, ":", 0); -+ CONFIG_CHUNK(str, size, n, chan->dev, 0); -+ -+ return(n); -+} -+ -+static int chan_pair_config_string(struct chan *in, struct chan *out, -+ char *str, int size, char **error_out) -+{ -+ int n; -+ -+ n = one_chan_config_string(in, str, size, error_out); -+ str += n; -+ size -= n; -+ -+ if(in == out){ -+ CONFIG_CHUNK(str, size, n, "", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, ",", 1); -+ n = one_chan_config_string(out, str, size, error_out); -+ str += n; -+ size -= n; -+ CONFIG_CHUNK(str, size, n, "", 1); -+ -+ return(n); -+} -+ -+int chan_config_string(struct list_head *chans, char *str, int size, -+ char **error_out) -+{ -+ struct list_head *ele; -+ struct chan *chan, *in = NULL, *out = NULL; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->primary) -+ continue; -+ if(chan->input) -+ in = chan; -+ if(chan->output) -+ out = chan; -+ } -+ -+ return(chan_pair_config_string(in, out, str, size, error_out)); -+} -+ -+struct chan_type { -+ char *key; -+ struct chan_ops *ops; -+}; -+ -+struct chan_type chan_table[] = { -+#ifdef CONFIG_FD_CHAN -+ { "fd", &fd_ops }, -+#else -+ { "fd", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_NULL_CHAN -+ { "null", &null_ops }, -+#else -+ { "null", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_PORT_CHAN -+ { "port", &port_ops }, -+#else -+ { "port", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_PTY_CHAN -+ { "pty", &pty_ops }, -+ { "pts", &pts_ops }, -+#else -+ { "pty", ¬_configged_ops }, -+ { "pts", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_TTY_CHAN -+ { "tty", &tty_ops }, -+#else -+ { "tty", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_XTERM_CHAN -+ { "xterm", &xterm_ops }, -+#else -+ { "xterm", ¬_configged_ops }, -+#endif -+}; -+ -+static struct chan *parse_chan(char *str, int pri, int device, -+ struct chan_opts *opts) -+{ -+ struct chan_type *entry; -+ struct chan_ops *ops; -+ struct chan *chan; -+ void *data; -+ int i; -+ -+ ops = NULL; -+ data = NULL; -+ for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){ -+ entry = &chan_table[i]; -+ if(!strncmp(str, entry->key, strlen(entry->key))){ -+ ops = entry->ops; -+ str += strlen(entry->key); -+ break; -+ } -+ } -+ if(ops == NULL){ -+ printk(KERN_ERR "parse_chan couldn't parse \"%s\"\n", -+ str); -+ return(NULL); -+ } -+ if(ops->init == NULL) return(NULL); -+ data = (*ops->init)(str, device, opts); -+ if(data == NULL) return(NULL); -+ -+ chan = kmalloc(sizeof(*chan), GFP_KERNEL); -+ if(chan == NULL) return(NULL); -+ *chan = ((struct chan) { .list = LIST_HEAD_INIT(chan->list), -+ .primary = 1, -+ .input = 0, -+ .output = 0, -+ .opened = 0, -+ .fd = -1, -+ .pri = pri, -+ .ops = ops, -+ .data = data }); -+ return(chan); -+} -+ -+int parse_chan_pair(char *str, struct list_head *chans, int pri, int device, -+ struct chan_opts *opts) -+{ -+ struct chan *new, *chan; -+ char *in, *out; -+ -+ if(!list_empty(chans)){ -+ chan = list_entry(chans->next, struct chan, list); -+ if(chan->pri >= pri) return(0); -+ free_chan(chans); -+ INIT_LIST_HEAD(chans); -+ } -+ -+ out = strchr(str, ','); -+ if(out != NULL){ -+ in = str; -+ *out = '\0'; -+ out++; -+ new = parse_chan(in, pri, device, opts); -+ if(new == NULL) return(-1); -+ new->input = 1; -+ list_add(&new->list, chans); -+ -+ new = parse_chan(out, pri, device, opts); -+ if(new == NULL) return(-1); -+ list_add(&new->list, chans); -+ new->output = 1; -+ } -+ else { -+ new = parse_chan(str, pri, device, opts); -+ if(new == NULL) return(-1); -+ list_add(&new->list, chans); -+ new->input = 1; -+ new->output = 1; -+ } -+ return(0); -+} -+ -+int chan_out_fd(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary && chan->output) -+ return(chan->fd); -+ } -+ return(-1); -+} -+ -+void chan_interrupt(struct list_head *chans, struct tq_struct *task, -+ struct tty_struct *tty, int irq, void *dev) -+{ -+ struct list_head *ele, *next; -+ struct chan *chan; -+ int err; -+ char c; -+ -+ list_for_each_safe(ele, next, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->input || (chan->ops->read == NULL)) continue; -+ do { -+ if((tty != NULL) && -+ (tty->flip.count >= TTY_FLIPBUF_SIZE)){ -+ queue_task(task, &tq_timer); -+ goto out; -+ } -+ err = chan->ops->read(chan->fd, &c, chan->data); -+ if(err > 0) -+ tty_receive_char(tty, c); -+ } while(err > 0); -+ -+ if(err == 0) reactivate_fd(chan->fd, irq); -+ if(err == -EIO){ -+ if(chan->primary){ -+ if(tty != NULL) -+ tty_hangup(tty); -+ line_disable(dev, irq); -+ close_chan(chans); -+ free_chan(chans); -+ return; -+ } -+ else { -+ if(chan->ops->close != NULL) -+ chan->ops->close(chan->fd, chan->data); -+ free_one_chan(chan); -+ } -+ } -+ } -+ out: -+ if(tty) tty_flip_buffer_push(tty); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/chan_user.c um/arch/um/drivers/chan_user.c ---- orig/arch/um/drivers/chan_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/chan_user.c 2003-11-07 07:23:45.000000000 -0500 -@@ -0,0 +1,217 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "kern_util.h" -+#include "user_util.h" -+#include "chan_user.h" -+#include "user.h" -+#include "helper.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+void generic_close(int fd, void *unused) -+{ -+ os_close_file(fd); -+} -+ -+int generic_read(int fd, char *c_out, void *unused) -+{ -+ int n; -+ -+ n = os_read_file(fd, c_out, sizeof(*c_out)); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-EIO); -+ return(n); -+} -+ -+/* XXX Trivial wrapper around os_write_file */ -+ -+int generic_write(int fd, const char *buf, int n, void *unused) -+{ -+ return(os_write_file(fd, buf, n)); -+} -+ -+int generic_console_write(int fd, const char *buf, int n, void *unused) -+{ -+ struct termios save, new; -+ int err; -+ -+ if(isatty(fd)){ -+ tcgetattr(fd, &save); -+ new = save; -+ new.c_oflag |= OPOST; -+ tcsetattr(fd, TCSAFLUSH, &new); -+ } -+ err = generic_write(fd, buf, n, NULL); -+ if(isatty(fd)) tcsetattr(fd, TCSAFLUSH, &save); -+ return(err); -+} -+ -+int generic_window_size(int fd, void *unused, unsigned short *rows_out, -+ unsigned short *cols_out) -+{ -+ int rows, cols; -+ int ret; -+ -+ ret = os_window_size(fd, &rows, &cols); -+ if(ret < 0) -+ return(ret); -+ -+ ret = ((*rows_out != rows) || (*cols_out != cols)); -+ -+ *rows_out = rows; -+ *cols_out = cols; -+ -+ return(ret); -+} -+ -+void generic_free(void *data) -+{ -+ kfree(data); -+} -+ -+static void winch_handler(int sig) -+{ -+} -+ -+struct winch_data { -+ int pty_fd; -+ int pipe_fd; -+ int close_me; -+}; -+ -+static int winch_thread(void *arg) -+{ -+ struct winch_data *data = arg; -+ sigset_t sigs; -+ int pty_fd, pipe_fd; -+ int count, err; -+ char c = 1; -+ -+ os_close_file(data->close_me); -+ pty_fd = data->pty_fd; -+ pipe_fd = data->pipe_fd; -+ count = os_write_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("winch_thread : failed to write synchronization " -+ "byte, err = %d\n", -count); -+ -+ signal(SIGWINCH, winch_handler); -+ sigfillset(&sigs); -+ sigdelset(&sigs, SIGWINCH); -+ if(sigprocmask(SIG_SETMASK, &sigs, NULL) < 0){ -+ printk("winch_thread : sigprocmask failed, errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ if(setsid() < 0){ -+ printk("winch_thread : setsid failed, errno = %d\n", errno); -+ exit(1); -+ } -+ -+ err = os_new_tty_pgrp(pty_fd, os_getpid()); -+ if(err < 0){ -+ printk("winch_thread : new_tty_pgrp failed, err = %d\n", -err); -+ exit(1); -+ } -+ -+ count = os_read_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("winch_thread : failed to read synchronization byte, " -+ "err = %d\n", -count); -+ -+ while(1){ -+ pause(); -+ -+ count = os_write_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("winch_thread : write failed, err = %d\n", -+ -count); -+ } -+} -+ -+static int winch_tramp(int fd, void *device_data, int *fd_out) -+{ -+ struct winch_data data; -+ unsigned long stack; -+ int fds[2], pid, n, err; -+ char c; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err < 0){ -+ printk("winch_tramp : os_pipe failed, err = %d\n", -err); -+ return(err); -+ } -+ -+ data = ((struct winch_data) { .pty_fd = fd, -+ .pipe_fd = fds[1], -+ .close_me = fds[0] } ); -+ pid = run_helper_thread(winch_thread, &data, 0, &stack, 0); -+ if(pid < 0){ -+ printk("fork of winch_thread failed - errno = %d\n", errno); -+ return(pid); -+ } -+ -+ os_close_file(fds[1]); -+ *fd_out = fds[0]; -+ n = os_read_file(fds[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("winch_tramp : failed to read synchronization byte\n"); -+ printk("read failed, err = %d\n", -n); -+ printk("fd %d will not support SIGWINCH\n", fd); -+ *fd_out = -1; -+ } -+ return(pid); -+} -+ -+void register_winch(int fd, void *device_data) -+{ -+ int pid, thread, thread_fd; -+ int count; -+ char c = 1; -+ -+ if(!isatty(fd)) -+ return; -+ -+ pid = tcgetpgrp(fd); -+ if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, -+ device_data) && (pid == -1)){ -+ thread = winch_tramp(fd, device_data, &thread_fd); -+ if(fd != -1){ -+ register_winch_irq(thread_fd, fd, thread, device_data); -+ -+ count = os_write_file(thread_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("register_winch : failed to write " -+ "synchronization byte, err = %d\n", -+ -count); -+ } -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/cow.h um/arch/um/drivers/cow.h ---- orig/arch/um/drivers/cow.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/cow.h 2004-01-16 03:45:20.000000000 -0500 -@@ -0,0 +1,41 @@ -+#ifndef __COW_H__ -+#define __COW_H__ -+ -+#include -+ -+#if __BYTE_ORDER == __BIG_ENDIAN -+# define ntohll(x) (x) -+# define htonll(x) (x) -+#elif __BYTE_ORDER == __LITTLE_ENDIAN -+# define ntohll(x) bswap_64(x) -+# define htonll(x) bswap_64(x) -+#else -+#error "__BYTE_ORDER not defined" -+#endif -+ -+extern int init_cow_file(int fd, char *cow_file, char *backing_file, -+ int sectorsize, int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out); -+ -+extern int file_reader(__u64 offset, char *buf, int len, void *arg); -+extern int read_cow_header(int (*reader)(__u64, char *, int, void *), -+ void *arg, __u32 *version_out, -+ char **backing_file_out, time_t *mtime_out, -+ __u64 *size_out, int *sectorsize_out, -+ __u32 *align_out, int *bitmap_offset_out); -+ -+extern int write_cow_header(char *cow_file, int fd, char *backing_file, -+ int sectorsize, int alignment, long long *size); -+ -+extern void cow_sizes(int version, __u64 size, int sectorsize, int align, -+ int bitmap_offset, unsigned long *bitmap_len_out, -+ int *data_offset_out); -+ -+#endif -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/cow_kern.c um/arch/um/drivers/cow_kern.c ---- orig/arch/um/drivers/cow_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/cow_kern.c 2004-01-10 05:55:04.000000000 -0500 -@@ -0,0 +1,630 @@ -+#define COW_MAJOR 60 -+#define MAJOR_NR COW_MAJOR -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "2_5compat.h" -+#include "cow.h" -+#include "ubd_user.h" -+ -+#define COW_SHIFT 4 -+ -+struct cow { -+ int count; -+ char *cow_path; -+ dev_t cow_dev; -+ struct block_device *cow_bdev; -+ char *backing_path; -+ dev_t backing_dev; -+ struct block_device *backing_bdev; -+ int sectorsize; -+ unsigned long *bitmap; -+ unsigned long bitmap_len; -+ int bitmap_offset; -+ int data_offset; -+ devfs_handle_t devfs; -+ struct semaphore sem; -+ struct semaphore io_sem; -+ atomic_t working; -+ spinlock_t io_lock; -+ struct buffer_head *bh; -+ struct buffer_head *bhtail; -+ void *end_io; -+}; -+ -+#define DEFAULT_COW { \ -+ .count = 0, \ -+ .cow_path = NULL, \ -+ .cow_dev = 0, \ -+ .backing_path = NULL, \ -+ .backing_dev = 0, \ -+ .bitmap = NULL, \ -+ .bitmap_len = 0, \ -+ .bitmap_offset = 0, \ -+ .data_offset = 0, \ -+ .devfs = NULL, \ -+ .working = ATOMIC_INIT(0), \ -+ .io_lock = SPIN_LOCK_UNLOCKED, \ -+} -+ -+#define MAX_DEV (8) -+#define MAX_MINOR (MAX_DEV << COW_SHIFT) -+ -+struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW }; -+ -+/* Not modified by this driver */ -+static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; -+static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; -+ -+/* Protected by cow_lock */ -+static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; -+ -+static struct hd_struct cow_part[MAX_MINOR] = -+ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; -+ -+/* Protected by io_request_lock */ -+static request_queue_t *cow_queue; -+ -+static int cow_open(struct inode *inode, struct file *filp); -+static int cow_release(struct inode * inode, struct file * file); -+static int cow_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg); -+static int cow_revalidate(kdev_t rdev); -+ -+static struct block_device_operations cow_blops = { -+ .open = cow_open, -+ .release = cow_release, -+ .ioctl = cow_ioctl, -+ .revalidate = cow_revalidate, -+}; -+ -+/* Initialized in an initcall, and unchanged thereafter */ -+devfs_handle_t cow_dir_handle; -+ -+#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ -+{ \ -+ .major = maj, \ -+ .major_name = name, \ -+ .minor_shift = shift, \ -+ .max_p = 1 << shift, \ -+ .part = parts, \ -+ .sizes = bsizes, \ -+ .nr_real = max, \ -+ .real_devices = NULL, \ -+ .next = NULL, \ -+ .fops = blops, \ -+ .de_arr = NULL, \ -+ .flags = 0 \ -+} -+ -+static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED; -+ -+static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part, -+ COW_SHIFT, sizes, MAX_DEV, -+ &cow_blops); -+ -+static int cow_add(int n) -+{ -+ struct cow *dev = &cow_dev[n]; -+ char name[sizeof("nnnnnn\0")]; -+ int err = -ENODEV; -+ -+ if(dev->cow_path == NULL) -+ goto out; -+ -+ sprintf(name, "%d", n); -+ dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE, -+ MAJOR_NR, n << COW_SHIFT, S_IFBLK | -+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, -+ &cow_blops, NULL); -+ -+ init_MUTEX_LOCKED(&dev->sem); -+ init_MUTEX(&dev->io_sem); -+ -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+/* -+ * Add buffer_head to back of pending list -+ */ -+static void cow_add_bh(struct cow *cow, struct buffer_head *bh) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&cow->io_lock, flags); -+ if(cow->bhtail != NULL){ -+ cow->bhtail->b_reqnext = bh; -+ cow->bhtail = bh; -+ } -+ else { -+ cow->bh = bh; -+ cow->bhtail = bh; -+ } -+ spin_unlock_irqrestore(&cow->io_lock, flags); -+} -+ -+/* -+ * Grab first pending buffer -+ */ -+static struct buffer_head *cow_get_bh(struct cow *cow) -+{ -+ struct buffer_head *bh; -+ -+ spin_lock_irq(&cow->io_lock); -+ bh = cow->bh; -+ if(bh != NULL){ -+ if(bh == cow->bhtail) -+ cow->bhtail = NULL; -+ cow->bh = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ } -+ spin_unlock_irq(&cow->io_lock); -+ -+ return(bh); -+} -+ -+static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, -+ struct buffer_head **cow_bh, int ncow_bh) -+{ -+ int i; -+ -+ if(ncow_bh > 0) -+ ll_rw_block(WRITE, ncow_bh, cow_bh); -+ -+ for(i = 0; i < ncow_bh ; i++){ -+ wait_on_buffer(cow_bh[i]); -+ brelse(cow_bh[i]); -+ } -+ -+ ll_rw_block(WRITE, 1, &bh); -+ brelse(bh); -+} -+ -+static struct buffer_head *cow_new_bh(struct cow *dev, int sector) -+{ -+ struct buffer_head *bh; -+ -+ sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize; -+ bh = getblk(dev->cow_dev, sector, dev->sectorsize); -+ memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])), -+ dev->sectorsize); -+ return(bh); -+} -+ -+/* Copied from loop.c, needed to avoid deadlocking in make_request. */ -+ -+static int cow_thread(void *data) -+{ -+ struct cow *dev = data; -+ struct buffer_head *bh; -+ -+ daemonize(); -+ exit_files(current); -+ -+ sprintf(current->comm, "cow%d", dev - cow_dev); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ sigfillset(¤t->blocked); -+ flush_signals(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ atomic_inc(&dev->working); -+ -+ current->policy = SCHED_OTHER; -+ current->nice = -20; -+ -+ current->flags |= PF_NOIO; -+ -+ /* -+ * up sem, we are running -+ */ -+ up(&dev->sem); -+ -+ for(;;){ -+ int start, len, nbh, i, update_bitmap = 0; -+ struct buffer_head *cow_bh[2]; -+ -+ down_interruptible(&dev->io_sem); -+ /* -+ * could be upped because of tear-down, not because of -+ * pending work -+ */ -+ if(!atomic_read(&dev->working)) -+ break; -+ -+ bh = cow_get_bh(dev); -+ if(bh == NULL){ -+ printk(KERN_ERR "cow: missing bh\n"); -+ continue; -+ } -+ -+ start = bh->b_blocknr * bh->b_size / dev->sectorsize; -+ len = bh->b_size / dev->sectorsize; -+ for(i = 0; i < len ; i++){ -+ if(ubd_test_bit(start + i, -+ (unsigned char *) dev->bitmap)) -+ continue; -+ -+ update_bitmap = 1; -+ ubd_set_bit(start + i, (unsigned char *) dev->bitmap); -+ } -+ -+ cow_bh[0] = NULL; -+ cow_bh[1] = NULL; -+ nbh = 0; -+ if(update_bitmap){ -+ cow_bh[0] = cow_new_bh(dev, start); -+ nbh++; -+ if(start / dev->sectorsize != -+ (start + len) / dev->sectorsize){ -+ cow_bh[1] = cow_new_bh(dev, start + len); -+ nbh++; -+ } -+ } -+ -+ bh->b_dev = dev->cow_dev; -+ bh->b_blocknr += dev->data_offset / dev->sectorsize; -+ -+ cow_handle_bh(dev, bh, cow_bh, nbh); -+ -+ /* -+ * upped both for pending work and tear-down, lo_pending -+ * will hit zero then -+ */ -+ if(atomic_dec_and_test(&dev->working)) -+ break; -+ } -+ -+ up(&dev->sem); -+ return(0); -+} -+ -+static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh) -+{ -+ struct cow *dev; -+ int n, minor; -+ -+ minor = MINOR(bh->b_rdev); -+ n = minor >> COW_SHIFT; -+ dev = &cow_dev[n]; -+ -+ dev->end_io = NULL; -+ if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){ -+ bh->b_rdev = dev->cow_dev; -+ bh->b_rsector += dev->data_offset / dev->sectorsize; -+ } -+ else if(rw == WRITE){ -+ bh->b_dev = dev->cow_dev; -+ bh->b_blocknr += dev->data_offset / dev->sectorsize; -+ -+ cow_add_bh(dev, bh); -+ up(&dev->io_sem); -+ return(0); -+ } -+ else { -+ bh->b_rdev = dev->backing_dev; -+ } -+ -+ return(1); -+} -+ -+int cow_init(void) -+{ -+ int i; -+ -+ cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL); -+ if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) { -+ printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR); -+ return -1; -+ } -+ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[MAJOR_NR] = blk_sizes; -+ blk_size[MAJOR_NR] = sizes; -+ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); -+ -+ cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); -+ blk_init_queue(cow_queue, NULL); -+ INIT_ELV(cow_queue, &cow_queue->elevator); -+ blk_queue_make_request(cow_queue, cow_make_request); -+ -+ add_gendisk(&cow_gendisk); -+ -+ for(i=0;i 0){ -+ n = (left > blocksize) ? blocksize : left; -+ -+ bh = bread(dev, block, (n < 512) ? 512 : n); -+ if(bh == NULL) -+ return(-EIO); -+ -+ n -= offset; -+ memcpy(&buf[cur], bh->b_data + offset, n); -+ block++; -+ left -= n; -+ cur += n; -+ offset = 0; -+ brelse(bh); -+ } -+ -+ return(count); -+} -+ -+static int cow_open(struct inode *inode, struct file *filp) -+{ -+ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, -+ unsigned long); -+ mm_segment_t fs; -+ struct cow *dev; -+ __u64 size; -+ __u32 version, align; -+ time_t mtime; -+ char *backing_file; -+ int n, offset, err = 0; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ offset = n << COW_SHIFT; -+ -+ spin_lock(&cow_lock); -+ -+ if(dev->count == 0){ -+ dev->cow_dev = name_to_kdev_t(dev->cow_path); -+ if(dev->cow_dev == 0){ -+ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " -+ "failed\n", dev->cow_path); -+ err = -ENODEV; -+ } -+ -+ dev->backing_dev = name_to_kdev_t(dev->backing_path); -+ if(dev->backing_dev == 0){ -+ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " -+ "failed\n", dev->backing_path); -+ err = -ENODEV; -+ } -+ -+ if(err) -+ goto out; -+ -+ dev->cow_bdev = bdget(dev->cow_dev); -+ if(dev->cow_bdev == NULL){ -+ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", -+ dev->cow_path); -+ err = -ENOMEM; -+ } -+ dev->backing_bdev = bdget(dev->backing_dev); -+ if(dev->backing_bdev == NULL){ -+ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", -+ dev->backing_path); -+ err = -ENOMEM; -+ } -+ -+ if(err) -+ goto out; -+ -+ err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, -+ BDEV_RAW); -+ if(err){ -+ printk("cow_open - blkdev_get of COW device failed, " -+ "error = %d\n", err); -+ goto out; -+ } -+ -+ err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW); -+ if(err){ -+ printk("cow_open - blkdev_get of backing device " -+ "failed, error = %d\n", err); -+ goto out; -+ } -+ -+ err = read_cow_header(reader, &dev->cow_dev, &version, -+ &backing_file, &mtime, &size, -+ &dev->sectorsize, &align, -+ &dev->bitmap_offset); -+ if(err){ -+ printk(KERN_ERR "cow_open - read_cow_header failed, " -+ "err = %d\n", err); -+ goto out; -+ } -+ -+ cow_sizes(version, size, dev->sectorsize, align, -+ dev->bitmap_offset, &dev->bitmap_len, -+ &dev->data_offset); -+ dev->bitmap = (void *) vmalloc(dev->bitmap_len); -+ if(dev->bitmap == NULL){ -+ err = -ENOMEM; -+ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); -+ goto out; -+ } -+ flush_tlb_kernel_vm(); -+ -+ err = reader(dev->bitmap_offset, (char *) dev->bitmap, -+ dev->bitmap_len, &dev->cow_dev); -+ if(err < 0){ -+ printk(KERN_ERR "Failed to read COW bitmap\n"); -+ vfree(dev->bitmap); -+ goto out; -+ } -+ -+ dev_ioctl = dev->backing_bdev->bd_op->ioctl; -+ fs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = (*dev_ioctl)(inode, filp, BLKGETSIZE, -+ (unsigned long) &sizes[offset]); -+ set_fs(fs); -+ if(err){ -+ printk(KERN_ERR "cow_open - BLKGETSIZE failed, " -+ "error = %d\n", err); -+ goto out; -+ } -+ -+ kernel_thread(cow_thread, dev, -+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND); -+ down(&dev->sem); -+ } -+ dev->count++; -+ out: -+ spin_unlock(&cow_lock); -+ return(err); -+} -+ -+static int cow_release(struct inode * inode, struct file * file) -+{ -+ struct cow *dev; -+ int n, err; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ -+ spin_lock(&cow_lock); -+ -+ if(--dev->count > 0) -+ goto out; -+ -+ err = blkdev_put(dev->cow_bdev, BDEV_RAW); -+ if(err) -+ printk("cow_release - blkdev_put of cow device failed, " -+ "error = %d\n", err); -+ bdput(dev->cow_bdev); -+ dev->cow_bdev = 0; -+ -+ err = blkdev_put(dev->backing_bdev, BDEV_RAW); -+ if(err) -+ printk("cow_release - blkdev_put of backing device failed, " -+ "error = %d\n", err); -+ bdput(dev->backing_bdev); -+ dev->backing_bdev = 0; -+ -+ out: -+ spin_unlock(&cow_lock); -+ return(0); -+} -+ -+static int cow_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct cow *dev; -+ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, -+ unsigned long); -+ int n; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ -+ dev_ioctl = dev->backing_bdev->bd_op->ioctl; -+ return((*dev_ioctl)(inode, file, cmd, arg)); -+} -+ -+static int cow_revalidate(kdev_t rdev) -+{ -+ printk(KERN_ERR "Need to implement cow_revalidate\n"); -+ return(0); -+} -+ -+static int parse_unit(char **ptr) -+{ -+ char *str = *ptr, *end; -+ int n = -1; -+ -+ if(isdigit(*str)) { -+ n = simple_strtoul(str, &end, 0); -+ if(end == str) -+ return(-1); -+ *ptr = end; -+ } -+ else if (('a' <= *str) && (*str <= 'h')) { -+ n = *str - 'a'; -+ str++; -+ *ptr = str; -+ } -+ return(n); -+} -+ -+static int cow_setup(char *str) -+{ -+ struct cow *dev; -+ char *cow_name, *backing_name; -+ int unit; -+ -+ unit = parse_unit(&str); -+ if(unit < 0){ -+ printk(KERN_ERR "cow_setup - Couldn't parse unit number\n"); -+ return(1); -+ } -+ -+ if(*str != '='){ -+ printk(KERN_ERR "cow_setup - Missing '=' after unit " -+ "number\n"); -+ return(1); -+ } -+ str++; -+ -+ cow_name = str; -+ backing_name = strchr(str, ','); -+ if(backing_name == NULL){ -+ printk(KERN_ERR "cow_setup - missing backing device name\n"); -+ return(0); -+ } -+ *backing_name = '\0'; -+ backing_name++; -+ -+ spin_lock(&cow_lock); -+ -+ dev = &cow_dev[unit]; -+ dev->cow_path = cow_name; -+ dev->backing_path = backing_name; -+ -+ spin_unlock(&cow_lock); -+ return(0); -+} -+ -+__setup("cow", cow_setup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/cow_sys.h um/arch/um/drivers/cow_sys.h ---- orig/arch/um/drivers/cow_sys.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/cow_sys.h 2003-12-17 10:53:03.000000000 -0500 -@@ -0,0 +1,48 @@ -+#ifndef __COW_SYS_H__ -+#define __COW_SYS_H__ -+ -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "user.h" -+ -+static inline void *cow_malloc(int size) -+{ -+ return(um_kmalloc(size)); -+} -+ -+static inline void cow_free(void *ptr) -+{ -+ kfree(ptr); -+} -+ -+#define cow_printf printk -+ -+static inline char *cow_strdup(char *str) -+{ -+ return(uml_strdup(str)); -+} -+ -+static inline int cow_seek_file(int fd, __u64 offset) -+{ -+ return(os_seek_file(fd, offset)); -+} -+ -+static inline int cow_file_size(char *file, __u64 *size_out) -+{ -+ return(os_file_size(file, size_out)); -+} -+ -+static inline int cow_write_file(int fd, char *buf, int size) -+{ -+ return(os_write_file(fd, buf, size)); -+} -+ -+#endif -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/cow_user.c um/arch/um/drivers/cow_user.c ---- orig/arch/um/drivers/cow_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/cow_user.c 2004-01-10 05:56:22.000000000 -0500 -@@ -0,0 +1,375 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "os.h" -+ -+#include "cow.h" -+#include "cow_sys.h" -+ -+#define PATH_LEN_V1 256 -+ -+struct cow_header_v1 { -+ int magic; -+ int version; -+ char backing_file[PATH_LEN_V1]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+#define PATH_LEN_V2 MAXPATHLEN -+ -+struct cow_header_v2 { -+ unsigned long magic; -+ unsigned long version; -+ char backing_file[PATH_LEN_V2]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in -+ * case other systems have different values for MAXPATHLEN -+ */ -+#define PATH_LEN_V3 4096 -+ -+/* Changes from V2 - -+ * PATH_LEN_V3 as described above -+ * Explicitly specify field bit lengths for systems with different -+ * lengths for the usual C types. Not sure whether char or -+ * time_t should be changed, this can be changed later without -+ * breaking compatibility -+ * Add alignment field so that different alignments can be used for the -+ * bitmap and data -+ * Add cow_format field to allow for the possibility of different ways -+ * of specifying the COW blocks. For now, the only value is 0, -+ * for the traditional COW bitmap. -+ * Move the backing_file field to the end of the header. This allows -+ * for the possibility of expanding it into the padding required -+ * by the bitmap alignment. -+ * The bitmap and data portions of the file will be aligned as specified -+ * by the alignment field. This is to allow COW files to be -+ * put on devices with restrictions on access alignments, such as -+ * /dev/raw, with a 512 byte alignment restriction. This also -+ * allows the data to be more aligned more strictly than on -+ * sector boundaries. This is needed for ubd-mmap, which needs -+ * the data to be page aligned. -+ * Fixed (finally!) the rounding bug -+ */ -+ -+struct cow_header_v3 { -+ __u32 magic; -+ __u32 version; -+ time_t mtime; -+ __u64 size; -+ __u32 sectorsize; -+ __u32 alignment; -+ __u32 cow_format; -+ char backing_file[PATH_LEN_V3]; -+}; -+ -+/* COW format definitions - for now, we have only the usual COW bitmap */ -+#define COW_BITMAP 0 -+ -+union cow_header { -+ struct cow_header_v1 v1; -+ struct cow_header_v2 v2; -+ struct cow_header_v3 v3; -+}; -+ -+#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -+#define COW_VERSION 3 -+ -+#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) -+#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) -+ -+void cow_sizes(int version, __u64 size, int sectorsize, int align, -+ int bitmap_offset, unsigned long *bitmap_len_out, -+ int *data_offset_out) -+{ -+ if(version < 3){ -+ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); -+ -+ *data_offset_out = bitmap_offset + *bitmap_len_out; -+ *data_offset_out = (*data_offset_out + sectorsize - 1) / -+ sectorsize; -+ *data_offset_out *= sectorsize; -+ } -+ else { -+ *bitmap_len_out = DIV_ROUND(size, sectorsize); -+ *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); -+ -+ *data_offset_out = bitmap_offset + *bitmap_len_out; -+ *data_offset_out = ROUND_UP(*data_offset_out, align); -+ } -+} -+ -+static int absolutize(char *to, int size, char *from) -+{ -+ char save_cwd[256], *slash; -+ int remaining; -+ -+ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { -+ cow_printf("absolutize : unable to get cwd - errno = %d\n", -+ errno); -+ return(-1); -+ } -+ slash = strrchr(from, '/'); -+ if(slash != NULL){ -+ *slash = '\0'; -+ if(chdir(from)){ -+ *slash = '/'; -+ cow_printf("absolutize : Can't cd to '%s' - " -+ "errno = %d\n", from, errno); -+ return(-1); -+ } -+ *slash = '/'; -+ if(getcwd(to, size) == NULL){ -+ cow_printf("absolutize : unable to get cwd of '%s' - " -+ "errno = %d\n", from, errno); -+ return(-1); -+ } -+ remaining = size - strlen(to); -+ if(strlen(slash) + 1 > remaining){ -+ cow_printf("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcat(to, slash); -+ } -+ else { -+ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ -+ cow_printf("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcpy(to, save_cwd); -+ strcat(to, "/"); -+ strcat(to, from); -+ } -+ chdir(save_cwd); -+ return(0); -+} -+ -+int write_cow_header(char *cow_file, int fd, char *backing_file, -+ int sectorsize, int alignment, long long *size) -+{ -+ struct cow_header_v3 *header; -+ unsigned long modtime; -+ int err; -+ -+ err = cow_seek_file(fd, 0); -+ if(err < 0){ -+ cow_printf("write_cow_header - lseek failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ err = -ENOMEM; -+ header = cow_malloc(sizeof(*header)); -+ if(header == NULL){ -+ cow_printf("Failed to allocate COW V3 header\n"); -+ goto out; -+ } -+ header->magic = htonl(COW_MAGIC); -+ header->version = htonl(COW_VERSION); -+ -+ err = -EINVAL; -+ if(strlen(backing_file) > sizeof(header->backing_file) - 1){ -+ cow_printf("Backing file name \"%s\" is too long - names are " -+ "limited to %d characters\n", backing_file, -+ sizeof(header->backing_file) - 1); -+ goto out_free; -+ } -+ -+ if(absolutize(header->backing_file, sizeof(header->backing_file), -+ backing_file)) -+ goto out_free; -+ -+ err = os_file_modtime(header->backing_file, &modtime); -+ if(err < 0){ -+ cow_printf("Backing file '%s' mtime request failed, " -+ "err = %d\n", header->backing_file, -err); -+ goto out_free; -+ } -+ -+ err = cow_file_size(header->backing_file, size); -+ if(err < 0){ -+ cow_printf("Couldn't get size of backing file '%s', " -+ "err = %d\n", header->backing_file, -err); -+ goto out_free; -+ } -+ -+ header->mtime = htonl(modtime); -+ header->size = htonll(*size); -+ header->sectorsize = htonl(sectorsize); -+ header->alignment = htonl(alignment); -+ header->cow_format = COW_BITMAP; -+ -+ err = os_write_file(fd, header, sizeof(*header)); -+ if(err != sizeof(*header)){ -+ cow_printf("Write of header to new COW file '%s' failed, " -+ "err = %d\n", cow_file, -err); -+ goto out_free; -+ } -+ err = 0; -+ out_free: -+ cow_free(header); -+ out: -+ return(err); -+} -+ -+int file_reader(__u64 offset, char *buf, int len, void *arg) -+{ -+ int fd = *((int *) arg); -+ -+ return(pread(fd, buf, len, offset)); -+} -+ -+/* XXX Need to sanity-check the values read from the header */ -+ -+int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, -+ __u32 *version_out, char **backing_file_out, -+ time_t *mtime_out, __u64 *size_out, -+ int *sectorsize_out, __u32 *align_out, -+ int *bitmap_offset_out) -+{ -+ union cow_header *header; -+ char *file; -+ int err, n; -+ unsigned long version, magic; -+ -+ header = cow_malloc(sizeof(*header)); -+ if(header == NULL){ -+ cow_printf("read_cow_header - Failed to allocate header\n"); -+ return(-ENOMEM); -+ } -+ err = -EINVAL; -+ n = (*reader)(0, (char *) header, sizeof(*header), arg); -+ if(n < offsetof(typeof(header->v1), backing_file)){ -+ cow_printf("read_cow_header - short header\n"); -+ goto out; -+ } -+ -+ magic = header->v1.magic; -+ if(magic == COW_MAGIC) { -+ version = header->v1.version; -+ } -+ else if(magic == ntohl(COW_MAGIC)){ -+ version = ntohl(header->v1.version); -+ } -+ /* No error printed because the non-COW case comes through here */ -+ else goto out; -+ -+ *version_out = version; -+ -+ if(version == 1){ -+ if(n < sizeof(header->v1)){ -+ cow_printf("read_cow_header - failed to read V1 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = header->v1.mtime; -+ *size_out = header->v1.size; -+ *sectorsize_out = header->v1.sectorsize; -+ *bitmap_offset_out = sizeof(header->v1); -+ *align_out = *sectorsize_out; -+ file = header->v1.backing_file; -+ } -+ else if(version == 2){ -+ if(n < sizeof(header->v2)){ -+ cow_printf("read_cow_header - failed to read V2 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = ntohl(header->v2.mtime); -+ *size_out = ntohll(header->v2.size); -+ *sectorsize_out = ntohl(header->v2.sectorsize); -+ *bitmap_offset_out = sizeof(header->v2); -+ *align_out = *sectorsize_out; -+ file = header->v2.backing_file; -+ } -+ else if(version == 3){ -+ if(n < sizeof(header->v3)){ -+ cow_printf("read_cow_header - failed to read V2 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = ntohl(header->v3.mtime); -+ *size_out = ntohll(header->v3.size); -+ *sectorsize_out = ntohl(header->v3.sectorsize); -+ *align_out = ntohl(header->v3.alignment); -+ *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); -+ file = header->v3.backing_file; -+ } -+ else { -+ cow_printf("read_cow_header - invalid COW version\n"); -+ goto out; -+ } -+ err = -ENOMEM; -+ *backing_file_out = cow_strdup(file); -+ if(*backing_file_out == NULL){ -+ cow_printf("read_cow_header - failed to allocate backing " -+ "file\n"); -+ goto out; -+ } -+ err = 0; -+ out: -+ cow_free(header); -+ return(err); -+} -+ -+int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, -+ int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out) -+{ -+ __u64 size, offset; -+ char zero = 0; -+ int err; -+ -+ err = write_cow_header(cow_file, fd, backing_file, sectorsize, -+ alignment, &size); -+ if(err) -+ goto out; -+ -+ *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); -+ cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, -+ bitmap_len_out, data_offset_out); -+ -+ offset = *data_offset_out + size - sizeof(zero); -+ err = cow_seek_file(fd, offset); -+ if(err < 0){ -+ cow_printf("cow bitmap lseek failed : err = %d\n", -err); -+ goto out; -+ } -+ -+ /* does not really matter how much we write it is just to set EOF -+ * this also sets the entire COW bitmap -+ * to zero without having to allocate it -+ */ -+ err = cow_write_file(fd, &zero, sizeof(zero)); -+ if(err != sizeof(zero)){ -+ cow_printf("Write of bitmap to new COW file '%s' failed, " -+ "err = %d\n", cow_file, -err); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/daemon.h um/arch/um/drivers/daemon.h ---- orig/arch/um/drivers/daemon.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/daemon.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+#define SWITCH_VERSION 3 -+ -+struct daemon_data { -+ char *sock_type; -+ char *ctl_sock; -+ void *ctl_addr; -+ void *data_addr; -+ void *local_addr; -+ int fd; -+ int control; -+ void *dev; -+}; -+ -+extern struct net_user_info daemon_user_info; -+ -+extern int daemon_user_write(int fd, void *buf, int len, -+ struct daemon_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/daemon_kern.c um/arch/um/drivers/daemon_kern.c ---- orig/arch/um/drivers/daemon_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/daemon_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "daemon.h" -+ -+struct daemon_init { -+ char *sock_type; -+ char *ctl_sock; -+}; -+ -+void daemon_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct daemon_data *dpri; -+ struct daemon_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ dpri = (struct daemon_data *) pri->user; -+ *dpri = ((struct daemon_data) -+ { .sock_type = init->sock_type, -+ .ctl_sock = init->ctl_sock, -+ .ctl_addr = NULL, -+ .data_addr = NULL, -+ .local_addr = NULL, -+ .fd = -1, -+ .control = -1, -+ .dev = dev }); -+ -+ printk("daemon backend (uml_switch version %d) - %s:%s", -+ SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock); -+ printk("\n"); -+} -+ -+static int daemon_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int daemon_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(daemon_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct daemon_data *) &lp->user)); -+} -+ -+static struct net_kern_info daemon_kern_info = { -+ .init = daemon_init, -+ .protocol = eth_protocol, -+ .read = daemon_read, -+ .write = daemon_write, -+}; -+ -+int daemon_setup(char *str, char **mac_out, void *data) -+{ -+ struct daemon_init *init = data; -+ char *remain; -+ -+ *init = ((struct daemon_init) -+ { .sock_type = "unix", -+ .ctl_sock = "/tmp/uml.ctl" }); -+ -+ remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock, -+ NULL); -+ if(remain != NULL) -+ printk(KERN_WARNING "daemon_setup : Ignoring data socket " -+ "specification\n"); -+ -+ return(1); -+} -+ -+static struct transport daemon_transport = { -+ .list = LIST_HEAD_INIT(daemon_transport.list), -+ .name = "daemon", -+ .setup = daemon_setup, -+ .user = &daemon_user_info, -+ .kern = &daemon_kern_info, -+ .private_size = sizeof(struct daemon_data), -+ .setup_size = sizeof(struct daemon_init), -+}; -+ -+static int register_daemon(void) -+{ -+ register_transport(&daemon_transport); -+ return(1); -+} -+ -+__initcall(register_daemon); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/daemon_user.c um/arch/um/drivers/daemon_user.c ---- orig/arch/um/drivers/daemon_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/daemon_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,197 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "net_user.h" -+#include "daemon.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+enum request_type { REQ_NEW_CONTROL }; -+ -+#define SWITCH_MAGIC 0xfeedface -+ -+struct request_v3 { -+ uint32_t magic; -+ uint32_t version; -+ enum request_type type; -+ struct sockaddr_un sock; -+}; -+ -+static struct sockaddr_un *new_addr(void *name, int len) -+{ -+ struct sockaddr_un *sun; -+ -+ sun = um_kmalloc(sizeof(struct sockaddr_un)); -+ if(sun == NULL){ -+ printk("new_addr: allocation of sockaddr_un failed\n"); -+ return(NULL); -+ } -+ sun->sun_family = AF_UNIX; -+ memcpy(sun->sun_path, name, len); -+ return(sun); -+} -+ -+static int connect_to_switch(struct daemon_data *pri) -+{ -+ struct sockaddr_un *ctl_addr = pri->ctl_addr; -+ struct sockaddr_un *local_addr = pri->local_addr; -+ struct sockaddr_un *sun; -+ struct request_v3 req; -+ int fd, n, err; -+ -+ pri->control = socket(AF_UNIX, SOCK_STREAM, 0); -+ if(pri->control < 0){ -+ printk("daemon_open : control socket failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ if(connect(pri->control, (struct sockaddr *) ctl_addr, -+ sizeof(*ctl_addr)) < 0){ -+ printk("daemon_open : control connect failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out; -+ } -+ -+ fd = socket(AF_UNIX, SOCK_DGRAM, 0); -+ if(fd < 0){ -+ printk("daemon_open : data socket failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out; -+ } -+ if(bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0){ -+ printk("daemon_open : data bind failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out_close; -+ } -+ -+ sun = um_kmalloc(sizeof(struct sockaddr_un)); -+ if(sun == NULL){ -+ printk("new_addr: allocation of sockaddr_un failed\n"); -+ err = -ENOMEM; -+ goto out_close; -+ } -+ -+ req.magic = SWITCH_MAGIC; -+ req.version = SWITCH_VERSION; -+ req.type = REQ_NEW_CONTROL; -+ req.sock = *local_addr; -+ n = os_write_file(pri->control, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ printk("daemon_open : control setup request failed, err = %d\n", -+ -n); -+ err = -ENOTCONN; -+ goto out; -+ } -+ -+ n = os_read_file(pri->control, sun, sizeof(*sun)); -+ if(n != sizeof(*sun)){ -+ printk("daemon_open : read of data socket failed, err = %d\n", -+ -n); -+ err = -ENOTCONN; -+ goto out_close; -+ } -+ -+ pri->data_addr = sun; -+ return(fd); -+ -+ out_close: -+ os_close_file(fd); -+ out: -+ os_close_file(pri->control); -+ return(err); -+} -+ -+static void daemon_user_init(void *data, void *dev) -+{ -+ struct daemon_data *pri = data; -+ struct timeval tv; -+ struct { -+ char zero; -+ int pid; -+ int usecs; -+ } name; -+ -+ if(!strcmp(pri->sock_type, "unix")) -+ pri->ctl_addr = new_addr(pri->ctl_sock, -+ strlen(pri->ctl_sock) + 1); -+ name.zero = 0; -+ name.pid = os_getpid(); -+ gettimeofday(&tv, NULL); -+ name.usecs = tv.tv_usec; -+ pri->local_addr = new_addr(&name, sizeof(name)); -+ pri->dev = dev; -+ pri->fd = connect_to_switch(pri); -+ if(pri->fd < 0){ -+ kfree(pri->local_addr); -+ pri->local_addr = NULL; -+ } -+} -+ -+static int daemon_open(void *data) -+{ -+ struct daemon_data *pri = data; -+ return(pri->fd); -+} -+ -+static void daemon_remove(void *data) -+{ -+ struct daemon_data *pri = data; -+ -+ os_close_file(pri->fd); -+ os_close_file(pri->control); -+ if(pri->data_addr != NULL) kfree(pri->data_addr); -+ if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); -+ if(pri->local_addr != NULL) kfree(pri->local_addr); -+} -+ -+int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) -+{ -+ struct sockaddr_un *data_addr = pri->data_addr; -+ -+ return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); -+} -+ -+static int daemon_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info daemon_user_info = { -+ .init = daemon_user_init, -+ .open = daemon_open, -+ .close = NULL, -+ .remove = daemon_remove, -+ .set_mtu = daemon_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/fd.c um/arch/um/drivers/fd.c ---- orig/arch/um/drivers/fd.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/fd.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,97 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include "user.h" -+#include "user_util.h" -+#include "chan_user.h" -+ -+struct fd_chan { -+ int fd; -+ int raw; -+ struct termios tt; -+ char str[sizeof("1234567890\0")]; -+}; -+ -+void *fd_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct fd_chan *data; -+ char *end; -+ int n; -+ -+ if(*str != ':'){ -+ printk("fd_init : channel type 'fd' must specify a file " -+ "descriptor\n"); -+ return(NULL); -+ } -+ str++; -+ n = strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk("fd_init : couldn't parse file descriptor '%s'\n", str); -+ return(NULL); -+ } -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) return(NULL); -+ *data = ((struct fd_chan) { .fd = n, -+ .raw = opts->raw }); -+ return(data); -+} -+ -+int fd_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct fd_chan *data = d; -+ -+ if(data->raw && isatty(data->fd)){ -+ tcgetattr(data->fd, &data->tt); -+ raw(data->fd, 0); -+ } -+ sprintf(data->str, "%d", data->fd); -+ *dev_out = data->str; -+ return(data->fd); -+} -+ -+void fd_close(int fd, void *d) -+{ -+ struct fd_chan *data = d; -+ -+ if(data->raw && isatty(fd)){ -+ tcsetattr(fd, TCSAFLUSH, &data->tt); -+ data->raw = 0; -+ } -+} -+ -+int fd_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct fd_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops fd_ops = { -+ .type = "fd", -+ .init = fd_init, -+ .open = fd_open, -+ .close = fd_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = fd_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 1, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/harddog_kern.c um/arch/um/drivers/harddog_kern.c ---- orig/arch/um/drivers/harddog_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/harddog_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,194 @@ -+/* UML hardware watchdog, shamelessly stolen from: -+ * -+ * SoftDog 0.05: A Software Watchdog Device -+ * -+ * (c) Copyright 1996 Alan Cox , All Rights Reserved. -+ * http://www.redhat.com -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * Neither Alan Cox nor CymruNet Ltd. admit liability nor provide -+ * warranty for any of this software. This material is provided -+ * "AS-IS" and at no charge. -+ * -+ * (c) Copyright 1995 Alan Cox -+ * -+ * Software only watchdog driver. Unlike its big brother the WDT501P -+ * driver this won't always recover a failed machine. -+ * -+ * 03/96: Angelo Haritsis : -+ * Modularised. -+ * Added soft_margin; use upon insmod to change the timer delay. -+ * NB: uses same minor as wdt (WATCHDOG_MINOR); we could use separate -+ * minors. -+ * -+ * 19980911 Alan Cox -+ * Made SMP safe for 2.3.x -+ * -+ * 20011127 Joel Becker (jlbec@evilplan.org> -+ * Added soft_noboot; Allows testing the softdog trigger without -+ * requiring a recompile. -+ * Added WDIOC_GETTIMEOUT and WDIOC_SETTIMOUT. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "helper.h" -+#include "mconsole.h" -+ -+MODULE_LICENSE("GPL"); -+ -+/* Locked by the BKL in harddog_open and harddog_release */ -+static int timer_alive; -+static int harddog_in_fd = -1; -+static int harddog_out_fd = -1; -+ -+/* -+ * Allow only one person to hold it open -+ */ -+ -+extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); -+ -+static int harddog_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ char *sock = NULL; -+ -+ lock_kernel(); -+ if(timer_alive) -+ return -EBUSY; -+#ifdef CONFIG_HARDDOG_NOWAYOUT -+ MOD_INC_USE_COUNT; -+#endif -+ -+#ifdef CONFIG_MCONSOLE -+ sock = mconsole_notify_socket(); -+#endif -+ err = start_watchdog(&harddog_in_fd, &harddog_out_fd, sock); -+ if(err) return(err); -+ -+ timer_alive = 1; -+ unlock_kernel(); -+ return 0; -+} -+ -+extern void stop_watchdog(int in_fd, int out_fd); -+ -+static int harddog_release(struct inode *inode, struct file *file) -+{ -+ /* -+ * Shut off the timer. -+ */ -+ lock_kernel(); -+ -+ stop_watchdog(harddog_in_fd, harddog_out_fd); -+ harddog_in_fd = -1; -+ harddog_out_fd = -1; -+ -+ timer_alive=0; -+ unlock_kernel(); -+ return 0; -+} -+ -+extern int ping_watchdog(int fd); -+ -+static ssize_t harddog_write(struct file *file, const char *data, size_t len, -+ loff_t *ppos) -+{ -+ /* Can't seek (pwrite) on this device */ -+ if (ppos != &file->f_pos) -+ return -ESPIPE; -+ -+ /* -+ * Refresh the timer. -+ */ -+ if(len) -+ return(ping_watchdog(harddog_out_fd)); -+ return 0; -+} -+ -+static int harddog_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ static struct watchdog_info ident = { -+ WDIOF_SETTIMEOUT, -+ 0, -+ "UML Hardware Watchdog" -+ }; -+ switch (cmd) { -+ default: -+ return -ENOTTY; -+ case WDIOC_GETSUPPORT: -+ if(copy_to_user((struct harddog_info *)arg, &ident, -+ sizeof(ident))) -+ return -EFAULT; -+ return 0; -+ case WDIOC_GETSTATUS: -+ case WDIOC_GETBOOTSTATUS: -+ return put_user(0,(int *)arg); -+ case WDIOC_KEEPALIVE: -+ return(ping_watchdog(harddog_out_fd)); -+ } -+} -+ -+static struct file_operations harddog_fops = { -+ .owner = THIS_MODULE, -+ .write = harddog_write, -+ .ioctl = harddog_ioctl, -+ .open = harddog_open, -+ .release = harddog_release, -+}; -+ -+static struct miscdevice harddog_miscdev = { -+ .minor = WATCHDOG_MINOR, -+ .name = "watchdog", -+ .fops = &harddog_fops, -+}; -+ -+static char banner[] __initdata = KERN_INFO "UML Watchdog Timer\n"; -+ -+static int __init harddog_init(void) -+{ -+ int ret; -+ -+ ret = misc_register(&harddog_miscdev); -+ -+ if (ret) -+ return ret; -+ -+ printk(banner); -+ -+ return(0); -+} -+ -+static void __exit harddog_exit(void) -+{ -+ misc_deregister(&harddog_miscdev); -+} -+ -+module_init(harddog_init); -+module_exit(harddog_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/harddog_user.c um/arch/um/drivers/harddog_user.c ---- orig/arch/um/drivers/harddog_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/harddog_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include "user_util.h" -+#include "user.h" -+#include "helper.h" -+#include "mconsole.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+struct dog_data { -+ int stdin; -+ int stdout; -+ int close_me[2]; -+}; -+ -+static void pre_exec(void *d) -+{ -+ struct dog_data *data = d; -+ -+ dup2(data->stdin, 0); -+ dup2(data->stdout, 1); -+ dup2(data->stdout, 2); -+ os_close_file(data->stdin); -+ os_close_file(data->stdout); -+ os_close_file(data->close_me[0]); -+ os_close_file(data->close_me[1]); -+} -+ -+int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) -+{ -+ struct dog_data data; -+ int in_fds[2], out_fds[2], pid, n, err; -+ char pid_buf[sizeof("nnnnn\0")], c; -+ char *pid_args[] = { "/usr/bin/uml_watchdog", "-pid", pid_buf, NULL }; -+ char *mconsole_args[] = { "/usr/bin/uml_watchdog", "-mconsole", NULL, -+ NULL }; -+ char **args = NULL; -+ -+ err = os_pipe(in_fds, 1, 0); -+ if(err < 0){ -+ printk("harddog_open - os_pipe failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ err = os_pipe(out_fds, 1, 0); -+ if(err < 0){ -+ printk("harddog_open - os_pipe failed, err = %d\n", -err); -+ goto out_close_in; -+ } -+ -+ data.stdin = out_fds[0]; -+ data.stdout = in_fds[1]; -+ data.close_me[0] = out_fds[1]; -+ data.close_me[1] = in_fds[0]; -+ -+ if(sock != NULL){ -+ mconsole_args[2] = sock; -+ args = mconsole_args; -+ } -+ else { -+ /* XXX The os_getpid() is not SMP correct */ -+ sprintf(pid_buf, "%d", CHOOSE_MODE(tracing_pid, os_getpid())); -+ args = pid_args; -+ } -+ -+ pid = run_helper(pre_exec, &data, args, NULL); -+ -+ os_close_file(out_fds[0]); -+ os_close_file(in_fds[1]); -+ -+ if(pid < 0){ -+ err = -pid; -+ printk("harddog_open - run_helper failed, errno = %d\n", -err); -+ goto out_close_out; -+ } -+ -+ n = os_read_file(in_fds[0], &c, sizeof(c)); -+ if(n == 0){ -+ printk("harddog_open - EOF on watchdog pipe\n"); -+ helper_wait(pid); -+ err = -EIO; -+ goto out_close_out; -+ } -+ else if(n < 0){ -+ printk("harddog_open - read of watchdog pipe failed, " -+ "err = %d\n", -n); -+ helper_wait(pid); -+ err = n; -+ goto out_close_out; -+ } -+ *in_fd_ret = in_fds[0]; -+ *out_fd_ret = out_fds[1]; -+ return(0); -+ -+ out_close_in: -+ os_close_file(in_fds[0]); -+ os_close_file(in_fds[1]); -+ out_close_out: -+ os_close_file(out_fds[0]); -+ os_close_file(out_fds[1]); -+ out: -+ return(err); -+} -+ -+void stop_watchdog(int in_fd, int out_fd) -+{ -+ os_close_file(in_fd); -+ os_close_file(out_fd); -+} -+ -+int ping_watchdog(int fd) -+{ -+ int n; -+ char c = '\n'; -+ -+ n = os_write_file(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("ping_watchdog - write failed, err = %d\n", -n); -+ if(n < 0) -+ return(n); -+ return(-EIO); -+ } -+ return 1; -+ -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/hostaudio_kern.c um/arch/um/drivers/hostaudio_kern.c ---- orig/arch/um/drivers/hostaudio_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/hostaudio_kern.c 2004-02-12 05:46:22.000000000 -0500 -@@ -0,0 +1,352 @@ -+/* -+ * Copyright (C) 2002 Steve Schmidtke -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/slab.h" -+#include "linux/fs.h" -+#include "linux/sound.h" -+#include "linux/soundcard.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+#include "init.h" -+#include "os.h" -+ -+struct hostaudio_state { -+ int fd; -+}; -+ -+struct hostmixer_state { -+ int fd; -+}; -+ -+#define HOSTAUDIO_DEV_DSP "/dev/sound/dsp" -+#define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer" -+ -+/* Only changed from linux_main at boot time */ -+char *dsp = HOSTAUDIO_DEV_DSP; -+char *mixer = HOSTAUDIO_DEV_MIXER; -+ -+#define DSP_HELP \ -+" This is used to specify the host dsp device to the hostaudio driver.\n" \ -+" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" -+ -+#define MIXER_HELP \ -+" This is used to specify the host mixer device to the hostaudio driver.\n" \ -+" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" -+ -+#ifndef MODULE -+static int set_dsp(char *name, int *add) -+{ -+ dsp = name; -+ return(0); -+} -+ -+__uml_setup("dsp=", set_dsp, "dsp=\n" DSP_HELP); -+ -+static int set_mixer(char *name, int *add) -+{ -+ mixer = name; -+ return(0); -+} -+ -+__uml_setup("mixer=", set_mixer, "mixer=\n" MIXER_HELP); -+ -+#else /*MODULE*/ -+ -+MODULE_PARM(dsp, "s"); -+MODULE_PARM_DESC(dsp, DSP_HELP); -+ -+MODULE_PARM(mixer, "s"); -+MODULE_PARM_DESC(mixer, MIXER_HELP); -+ -+#endif -+ -+/* /dev/dsp file operations */ -+ -+static ssize_t hostaudio_read(struct file *file, char *buffer, size_t count, -+ loff_t *ppos) -+{ -+ struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: read called, count = %d\n", count); -+#endif -+ -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ err = os_read_file(state->fd, kbuf, count); -+ if(err < 0) -+ goto out; -+ -+ if(copy_to_user(buffer, kbuf, err)) -+ err = -EFAULT; -+ -+ out: -+ kfree(kbuf); -+ return(err); -+} -+ -+static ssize_t hostaudio_write(struct file *file, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: write called, count = %d\n", count); -+#endif -+ -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ err = -EFAULT; -+ if(copy_from_user(kbuf, buffer, count)) -+ goto out; -+ -+ err = os_write_file(state->fd, kbuf, count); -+ if(err < 0) -+ goto out; -+ *ppos += err; -+ -+ out: -+ kfree(kbuf); -+ return(err); -+} -+ -+static unsigned int hostaudio_poll(struct file *file, -+ struct poll_table_struct *wait) -+{ -+ unsigned int mask = 0; -+ -+#ifdef DEBUG -+ printk("hostaudio: poll called (unimplemented)\n"); -+#endif -+ -+ return(mask); -+} -+ -+static int hostaudio_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hostaudio_state *state = file->private_data; -+ unsigned long data = 0; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: ioctl called, cmd = %u\n", cmd); -+#endif -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(get_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } -+ -+ err = os_ioctl_generic(state->fd, cmd, (unsigned long) &data); -+ -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(put_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } -+ -+ return(err); -+} -+ -+static int hostaudio_open(struct inode *inode, struct file *file) -+{ -+ struct hostaudio_state *state; -+ int r = 0, w = 0; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostaudio: open called (host: %s)\n", dsp); -+#endif -+ -+ state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); -+ if(state == NULL) -+ return(-ENOMEM); -+ -+ if(file->f_mode & FMODE_READ) r = 1; -+ if(file->f_mode & FMODE_WRITE) w = 1; -+ -+ ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); -+ if(ret < 0){ -+ kfree(state); -+ return(ret); -+ } -+ -+ state->fd = ret; -+ file->private_data = state; -+ return(0); -+} -+ -+static int hostaudio_release(struct inode *inode, struct file *file) -+{ -+ struct hostaudio_state *state = file->private_data; -+ -+#ifdef DEBUG -+ printk("hostaudio: release called\n"); -+#endif -+ -+ os_close_file(state->fd); -+ kfree(state); -+ -+ return(0); -+} -+ -+/* /dev/mixer file operations */ -+ -+static int hostmixer_ioctl_mixdev(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hostmixer_state *state = file->private_data; -+ -+#ifdef DEBUG -+ printk("hostmixer: ioctl called\n"); -+#endif -+ -+ return(os_ioctl_generic(state->fd, cmd, arg)); -+} -+ -+static int hostmixer_open_mixdev(struct inode *inode, struct file *file) -+{ -+ struct hostmixer_state *state; -+ int r = 0, w = 0; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostmixer: open called (host: %s)\n", mixer); -+#endif -+ -+ state = kmalloc(sizeof(struct hostmixer_state), GFP_KERNEL); -+ if(state == NULL) return(-ENOMEM); -+ -+ if(file->f_mode & FMODE_READ) r = 1; -+ if(file->f_mode & FMODE_WRITE) w = 1; -+ -+ ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); -+ -+ if(ret < 0){ -+ printk("hostaudio_open_mixdev failed to open '%s', err = %d\n", -+ dsp, -ret); -+ kfree(state); -+ return(ret); -+ } -+ -+ file->private_data = state; -+ return(0); -+} -+ -+static int hostmixer_release(struct inode *inode, struct file *file) -+{ -+ struct hostmixer_state *state = file->private_data; -+ -+#ifdef DEBUG -+ printk("hostmixer: release called\n"); -+#endif -+ -+ os_close_file(state->fd); -+ kfree(state); -+ -+ return(0); -+} -+ -+ -+/* kernel module operations */ -+ -+static struct file_operations hostaudio_fops = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .read = hostaudio_read, -+ .write = hostaudio_write, -+ .poll = hostaudio_poll, -+ .ioctl = hostaudio_ioctl, -+ .mmap = NULL, -+ .open = hostaudio_open, -+ .release = hostaudio_release, -+}; -+ -+static struct file_operations hostmixer_fops = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .ioctl = hostmixer_ioctl_mixdev, -+ .open = hostmixer_open_mixdev, -+ .release = hostmixer_release, -+}; -+ -+struct { -+ int dev_audio; -+ int dev_mixer; -+} module_data; -+ -+MODULE_AUTHOR("Steve Schmidtke"); -+MODULE_DESCRIPTION("UML Audio Relay"); -+MODULE_LICENSE("GPL"); -+ -+static int __init hostaudio_init_module(void) -+{ -+ printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", -+ dsp, mixer); -+ -+ module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); -+ if(module_data.dev_audio < 0){ -+ printk(KERN_ERR "hostaudio: couldn't register DSP device!\n"); -+ return -ENODEV; -+ } -+ -+ module_data.dev_mixer = register_sound_mixer(&hostmixer_fops, -1); -+ if(module_data.dev_mixer < 0){ -+ printk(KERN_ERR "hostmixer: couldn't register mixer " -+ "device!\n"); -+ unregister_sound_dsp(module_data.dev_audio); -+ return -ENODEV; -+ } -+ -+ return 0; -+} -+ -+static void __exit hostaudio_cleanup_module (void) -+{ -+ unregister_sound_mixer(module_data.dev_mixer); -+ unregister_sound_dsp(module_data.dev_audio); -+} -+ -+module_init(hostaudio_init_module); -+module_exit(hostaudio_cleanup_module); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/line.c um/arch/um/drivers/line.c ---- orig/arch/um/drivers/line.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/line.c 2003-11-07 03:03:57.000000000 -0500 -@@ -0,0 +1,610 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "linux/list.h" -+#include "linux/devfs_fs_kernel.h" -+#include "asm/irq.h" -+#include "asm/uaccess.h" -+#include "chan_kern.h" -+#include "irq_user.h" -+#include "line.h" -+#include "kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "os.h" -+#include "irq_kern.h" -+ -+#define LINE_BUFSIZE 4096 -+ -+static void line_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct line *dev = data; -+ -+ if(dev->count > 0) -+ chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, -+ dev); -+} -+ -+static void line_timer_cb(void *arg) -+{ -+ struct line *dev = arg; -+ -+ line_interrupt(dev->driver->read_irq, dev, NULL); -+} -+ -+static int write_room(struct line *dev) -+{ -+ int n; -+ -+ if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); -+ -+ n = dev->head - dev->tail; -+ if(n <= 0) n = LINE_BUFSIZE + n; -+ return(n - 1); -+} -+ -+static int buffer_data(struct line *line, const char *buf, int len) -+{ -+ int end, room; -+ -+ if(line->buffer == NULL){ -+ line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); -+ if(line->buffer == NULL){ -+ printk("buffer_data - atomic allocation failed\n"); -+ return(0); -+ } -+ line->head = line->buffer; -+ line->tail = line->buffer; -+ } -+ -+ room = write_room(line); -+ len = (len > room) ? room : len; -+ -+ end = line->buffer + LINE_BUFSIZE - line->tail; -+ if(len < end){ -+ memcpy(line->tail, buf, len); -+ line->tail += len; -+ } -+ else { -+ memcpy(line->tail, buf, end); -+ buf += end; -+ len -= end; -+ memcpy(line->buffer, buf, len); -+ line->tail = line->buffer + len; -+ } -+ -+ return(len); -+} -+ -+static int flush_buffer(struct line *line) -+{ -+ int n, count; -+ -+ if((line->buffer == NULL) || (line->head == line->tail)) return(1); -+ -+ if(line->tail < line->head){ -+ count = line->buffer + LINE_BUFSIZE - line->head; -+ n = write_chan(&line->chan_list, line->head, count, -+ line->driver->write_irq); -+ if(n < 0) return(n); -+ if(n == count) line->head = line->buffer; -+ else { -+ line->head += n; -+ return(0); -+ } -+ } -+ -+ count = line->tail - line->head; -+ n = write_chan(&line->chan_list, line->head, count, -+ line->driver->write_irq); -+ if(n < 0) return(n); -+ -+ line->head += n; -+ return(line->head == line->tail); -+} -+ -+int line_write(struct line *lines, struct tty_struct *tty, int from_user, -+ const char *buf, int len) -+{ -+ struct line *line; -+ char *new; -+ unsigned long flags; -+ int n, err, i, ret = 0; -+ -+ if(tty->stopped) return 0; -+ -+ if(from_user){ -+ new = kmalloc(len, GFP_KERNEL); -+ if(new == NULL) -+ return(0); -+ n = copy_from_user(new, buf, len); -+ buf = new; -+ if(n == len){ -+ len = -EFAULT; -+ goto out_free; -+ } -+ -+ len -= n; -+ } -+ -+ i = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[i]; -+ -+ down(&line->sem); -+ if(line->head != line->tail){ -+ local_irq_save(flags); -+ ret += buffer_data(line, buf, len); -+ err = flush_buffer(line); -+ local_irq_restore(flags); -+ if(err <= 0) -+ goto out_up; -+ } -+ else { -+ n = write_chan(&line->chan_list, buf, len, -+ line->driver->write_irq); -+ if(n < 0){ -+ ret = n; -+ goto out_up; -+ } -+ -+ len -= n; -+ ret += n; -+ if(len > 0) -+ ret += buffer_data(line, buf + n, len); -+ } -+ out_up: -+ up(&line->sem); -+ -+ out_free: -+ if(from_user) -+ kfree(buf); -+ return(ret); -+} -+ -+static void line_write_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct line *dev = data; -+ struct tty_struct *tty = dev->tty; -+ int err; -+ -+ err = flush_buffer(dev); -+ if(err == 0) return; -+ else if(err < 0){ -+ dev->head = dev->buffer; -+ dev->tail = dev->buffer; -+ } -+ -+ if(tty == NULL) return; -+ -+ if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && -+ (tty->ldisc.write_wakeup != NULL)) -+ (tty->ldisc.write_wakeup)(tty); -+ -+ /* BLOCKING mode -+ * In blocking mode, everything sleeps on tty->write_wait. -+ * Sleeping in the console driver would break non-blocking -+ * writes. -+ */ -+ -+ if (waitqueue_active(&tty->write_wait)) -+ wake_up_interruptible(&tty->write_wait); -+ -+} -+ -+int line_setup_irq(int fd, int input, int output, void *data) -+{ -+ struct line *line = data; -+ struct line_driver *driver = line->driver; -+ int err = 0, flags = SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM; -+ -+ if(input) err = um_request_irq(driver->read_irq, fd, IRQ_READ, -+ line_interrupt, flags, -+ driver->read_irq_name, line); -+ if(err) return(err); -+ if(output) err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, -+ line_write_interrupt, flags, -+ driver->write_irq_name, line); -+ line->have_irq = 1; -+ return(err); -+} -+ -+void line_disable(struct line *line, int current_irq) -+{ -+ if(!line->have_irq) return; -+ -+ if(line->driver->read_irq == current_irq) -+ free_irq_later(line->driver->read_irq, line); -+ else -+ free_irq(line->driver->read_irq, line); -+ -+ if(line->driver->write_irq == current_irq) -+ free_irq_later(line->driver->write_irq, line); -+ else -+ free_irq(line->driver->write_irq, line); -+ -+ line->have_irq = 0; -+} -+ -+int line_open(struct line *lines, struct tty_struct *tty, -+ struct chan_opts *opts) -+{ -+ struct line *line; -+ int n, err = 0; -+ -+ if(tty == NULL) n = 0; -+ else n = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[n]; -+ -+ down(&line->sem); -+ if(line->count == 0){ -+ if(!line->valid){ -+ err = -ENODEV; -+ goto out; -+ } -+ if(list_empty(&line->chan_list)){ -+ err = parse_chan_pair(line->init_str, &line->chan_list, -+ line->init_pri, n, opts); -+ if(err) goto out; -+ err = open_chan(&line->chan_list); -+ if(err) goto out; -+ } -+ enable_chan(&line->chan_list, line); -+ INIT_TQUEUE(&line->task, line_timer_cb, line); -+ } -+ -+ if(!line->sigio){ -+ chan_enable_winch(&line->chan_list, line); -+ line->sigio = 1; -+ } -+ -+ /* This is outside the if because the initial console is opened -+ * with tty == NULL -+ */ -+ line->tty = tty; -+ -+ if(tty != NULL){ -+ tty->driver_data = line; -+ chan_window_size(&line->chan_list, &tty->winsize.ws_row, -+ &tty->winsize.ws_col); -+ } -+ -+ line->count++; -+ out: -+ up(&line->sem); -+ return(err); -+} -+ -+void line_close(struct line *lines, struct tty_struct *tty) -+{ -+ struct line *line; -+ int n; -+ -+ if(tty == NULL) n = 0; -+ else n = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[n]; -+ -+ down(&line->sem); -+ line->count--; -+ -+ /* I don't like this, but I can't think of anything better. What's -+ * going on is that the tty is in the process of being closed for -+ * the last time. Its count hasn't been dropped yet, so it's still -+ * at 1. This may happen when line->count != 0 because of the initial -+ * console open (without a tty) bumping it up to 1. -+ */ -+ if((line->tty != NULL) && (line->tty->count == 1)) -+ line->tty = NULL; -+ if(line->count == 0) -+ line_disable(line, -1); -+ up(&line->sem); -+} -+ -+void close_lines(struct line *lines, int nlines) -+{ -+ int i; -+ -+ for(i = 0; i < nlines; i++) -+ close_chan(&lines[i].chan_list); -+} -+ -+int line_setup(struct line *lines, int num, char *init, int all_allowed) -+{ -+ int i, n; -+ char *end; -+ -+ if(*init == '=') n = -1; -+ else { -+ n = simple_strtoul(init, &end, 0); -+ if(*end != '='){ -+ printk(KERN_ERR "line_setup failed to parse \"%s\"\n", -+ init); -+ return(0); -+ } -+ init = end; -+ } -+ init++; -+ if((n >= 0) && (n >= num)){ -+ printk("line_setup - %d out of range ((0 ... %d) allowed)\n", -+ n, num); -+ return(0); -+ } -+ else if(n >= 0){ -+ if(lines[n].count > 0){ -+ printk("line_setup - device %d is open\n", n); -+ return(0); -+ } -+ if(lines[n].init_pri <= INIT_ONE){ -+ lines[n].init_pri = INIT_ONE; -+ if(!strcmp(init, "none")) lines[n].valid = 0; -+ else { -+ lines[n].init_str = init; -+ lines[n].valid = 1; -+ } -+ } -+ } -+ else if(!all_allowed){ -+ printk("line_setup - can't configure all devices from " -+ "mconsole\n"); -+ return(0); -+ } -+ else { -+ for(i = 0; i < num; i++){ -+ if(lines[i].init_pri <= INIT_ALL){ -+ lines[i].init_pri = INIT_ALL; -+ if(!strcmp(init, "none")) lines[i].valid = 0; -+ else { -+ lines[i].init_str = init; -+ lines[i].valid = 1; -+ } -+ } -+ } -+ } -+ return(1); -+} -+ -+int line_config(struct line *lines, int num, char *str) -+{ -+ char *new = uml_strdup(str); -+ -+ if(new == NULL){ -+ printk("line_config - uml_strdup failed\n"); -+ return(-ENOMEM); -+ } -+ return(!line_setup(lines, num, new, 0)); -+} -+ -+int line_get_config(char *name, struct line *lines, int num, char *str, -+ int size, char **error_out) -+{ -+ struct line *line; -+ char *end; -+ int dev, n = 0; -+ -+ dev = simple_strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ *error_out = "line_get_config failed to parse device number"; -+ return(0); -+ } -+ -+ if((dev < 0) || (dev >= num)){ -+ *error_out = "device number of of range"; -+ return(0); -+ } -+ -+ line = &lines[dev]; -+ -+ down(&line->sem); -+ if(!line->valid) -+ CONFIG_CHUNK(str, size, n, "none", 1); -+ else if(line->count == 0) -+ CONFIG_CHUNK(str, size, n, line->init_str, 1); -+ else n = chan_config_string(&line->chan_list, str, size, error_out); -+ up(&line->sem); -+ -+ return(n); -+} -+ -+int line_remove(struct line *lines, int num, char *str) -+{ -+ char config[sizeof("conxxxx=none\0")]; -+ -+ sprintf(config, "%s=none", str); -+ return(!line_setup(lines, num, config, 0)); -+} -+ -+static int line_write_room(struct tty_struct *tty) -+{ -+ struct line *dev = tty->driver_data; -+ -+ return(write_room(dev)); -+} -+ -+void line_register_devfs(struct lines *set, struct line_driver *line_driver, -+ struct tty_driver *driver, struct line *lines, -+ int nlines) -+{ -+ int err, i, n; -+ char *from, *to; -+ -+ driver->driver_name = line_driver->name; -+ driver->name = line_driver->devfs_name; -+ driver->major = line_driver->major; -+ driver->minor_start = line_driver->minor_start; -+ driver->type = line_driver->type; -+ driver->subtype = line_driver->subtype; -+ driver->magic = TTY_DRIVER_MAGIC; -+ driver->flags = TTY_DRIVER_REAL_RAW; -+ -+ n = set->num; -+ driver->num = n; -+ driver->table = kmalloc(n * sizeof(driver->table[0]), GFP_KERNEL); -+ driver->termios = kmalloc(n * sizeof(driver->termios[0]), GFP_KERNEL); -+ driver->termios_locked = kmalloc(n * sizeof(driver->termios_locked[0]), -+ GFP_KERNEL); -+ if((driver->table == NULL) || (driver->termios == NULL) || -+ (driver->termios_locked == NULL)) -+ panic("Failed to allocate driver table"); -+ -+ memset(driver->table, 0, n * sizeof(driver->table[0])); -+ memset(driver->termios, 0, n * sizeof(driver->termios[0])); -+ memset(driver->termios_locked, 0, -+ n * sizeof(driver->termios_locked[0])); -+ -+ driver->write_room = line_write_room; -+ driver->init_termios = tty_std_termios; -+ -+ if (tty_register_driver(driver)) -+ panic("line_register_devfs : Couldn't register driver\n"); -+ -+ from = line_driver->symlink_from; -+ to = line_driver->symlink_to; -+ err = devfs_mk_symlink(NULL, from, 0, to, NULL, NULL); -+ if(err) printk("Symlink creation from /dev/%s to /dev/%s " -+ "returned %d\n", from, to, err); -+ -+ for(i = 0; i < nlines; i++){ -+ if(!lines[i].valid) -+ tty_unregister_devfs(driver, driver->minor_start + i); -+ } -+ -+ mconsole_register_dev(&line_driver->mc); -+} -+ -+void lines_init(struct line *lines, int nlines) -+{ -+ struct line *line; -+ int i; -+ -+ for(i = 0; i < nlines; i++){ -+ line = &lines[i]; -+ INIT_LIST_HEAD(&line->chan_list); -+ sema_init(&line->sem, 1); -+ if(line->init_str != NULL){ -+ line->init_str = uml_strdup(line->init_str); -+ if(line->init_str == NULL) -+ printk("lines_init - uml_strdup returned " -+ "NULL\n"); -+ } -+ } -+} -+ -+struct winch { -+ struct list_head list; -+ int fd; -+ int tty_fd; -+ int pid; -+ struct line *line; -+}; -+ -+void winch_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct winch *winch = data; -+ struct tty_struct *tty; -+ int err; -+ char c; -+ -+ if(winch->fd != -1){ -+ err = generic_read(winch->fd, &c, NULL); -+ if(err < 0){ -+ if(err != -EAGAIN){ -+ printk("winch_interrupt : read failed, " -+ "errno = %d\n", -err); -+ printk("fd %d is losing SIGWINCH support\n", -+ winch->tty_fd); -+ return; -+ } -+ goto out; -+ } -+ } -+ tty = winch->line->tty; -+ if(tty != NULL){ -+ chan_window_size(&winch->line->chan_list, -+ &tty->winsize.ws_row, -+ &tty->winsize.ws_col); -+ kill_pg(tty->pgrp, SIGWINCH, 1); -+ } -+ out: -+ if(winch->fd != -1) -+ reactivate_fd(winch->fd, WINCH_IRQ); -+} -+ -+DECLARE_MUTEX(winch_handler_sem); -+LIST_HEAD(winch_handlers); -+ -+void register_winch_irq(int fd, int tty_fd, int pid, void *line) -+{ -+ struct winch *winch; -+ -+ down(&winch_handler_sem); -+ winch = kmalloc(sizeof(*winch), GFP_KERNEL); -+ if(winch == NULL){ -+ printk("register_winch_irq - kmalloc failed\n"); -+ goto out; -+ } -+ *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), -+ .fd = fd, -+ .tty_fd = tty_fd, -+ .pid = pid, -+ .line = line }); -+ list_add(&winch->list, &winch_handlers); -+ if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "winch", winch) < 0) -+ printk("register_winch_irq - failed to register IRQ\n"); -+ out: -+ up(&winch_handler_sem); -+} -+ -+static void winch_cleanup(void) -+{ -+ struct list_head *ele; -+ struct winch *winch; -+ -+ list_for_each(ele, &winch_handlers){ -+ winch = list_entry(ele, struct winch, list); -+ if(winch->fd != -1){ -+ deactivate_fd(winch->fd, WINCH_IRQ); -+ os_close_file(winch->fd); -+ } -+ if(winch->pid != -1) -+ os_kill_process(winch->pid, 1); -+ } -+} -+ -+__uml_exitcall(winch_cleanup); -+ -+char *add_xterm_umid(char *base) -+{ -+ char *umid, *title; -+ int len; -+ -+ umid = get_umid(1); -+ if(umid == NULL) return(base); -+ -+ len = strlen(base) + strlen(" ()") + strlen(umid) + 1; -+ title = kmalloc(len, GFP_KERNEL); -+ if(title == NULL){ -+ printk("Failed to allocate buffer for xterm title\n"); -+ return(base); -+ } -+ -+ strncpy(title, base, len); -+ len -= strlen(title); -+ snprintf(&title[strlen(title)], len, " (%s)", umid); -+ return(title); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/Makefile um/arch/um/drivers/Makefile ---- orig/arch/um/drivers/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/Makefile 2004-02-12 03:55:08.000000000 -0500 -@@ -0,0 +1,97 @@ -+# -+# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := built-in.o -+ -+CHAN_OBJS := chan_kern.o chan_user.o line.o -+ -+list-multi := slip.o slirp.o daemon.o mcast.o mconsole.o net.o ubd.o \ -+ hostaudio.o pcap.o port.o harddog.o -+ -+slip-objs := slip_kern.o slip_user.o -+slirp-objs := slirp_kern.o slirp_user.o -+daemon-objs := daemon_kern.o daemon_user.o -+mcast-objs := mcast_kern.o mcast_user.o -+pcap-objs := pcap_kern.o pcap_user.o -+pcap-libs := -lpcap -L/usr/lib -+net-objs := net_kern.o net_user.o -+mconsole-objs := mconsole_kern.o mconsole_user.o -+hostaudio-objs := hostaudio_kern.o -+ubd-objs := ubd_kern.o ubd_user.o -+port-objs := port_kern.o port_user.o -+harddog-objs := harddog_kern.o harddog_user.o -+ -+export-objs := mconsole_kern.o -+ -+obj-y = -+obj-$(CONFIG_SSL) += ssl.o -+obj-$(CONFIG_UML_NET_SLIP) += slip.o -+obj-$(CONFIG_UML_NET_SLIRP) += slirp.o -+obj-$(CONFIG_UML_NET_DAEMON) += daemon.o -+obj-$(CONFIG_UML_NET_MCAST) += mcast.o -+obj-$(CONFIG_UML_NET_PCAP) += pcap.o -+obj-$(CONFIG_UML_NET) += net.o -+obj-$(CONFIG_MCONSOLE) += mconsole.o -+obj-$(CONFIG_MMAPPER) += mmapper_kern.o -+obj-$(CONFIG_BLK_DEV_UBD) += ubd.o -+obj-$(CONFIG_HOSTAUDIO) += hostaudio.o -+obj-$(CONFIG_FD_CHAN) += fd.o -+obj-$(CONFIG_NULL_CHAN) += null.o -+obj-$(CONFIG_PORT_CHAN) += port.o -+obj-$(CONFIG_PTY_CHAN) += pty.o -+obj-$(CONFIG_TTY_CHAN) += tty.o -+obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o -+obj-$(CONFIG_UML_WATCHDOG) += harddog.o -+obj-$(CONFIG_COW) += cow_kern.o -+obj-$(CONFIG_COW_COMMON) += cow_user.o -+ -+CFLAGS_pcap_user.o = -I/usr/include/pcap -+ -+obj-y += stdio_console.o $(CHAN_OBJS) -+ -+USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \ -+ null.o pty.o tty.o xterm.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean: -+ -+modules: -+ -+fastdep: -+ -+dep: -+ -+archmrproper: -+ -+daemon.o : $(daemon-objs) -+ -+slip.o : $(slip-objs) -+ -+slirp.o : $(slirp-objs) -+ -+mcast.o : $(mcast-objs) -+ -+pcap.o : $(pcap-objs) -+ -+mconsole.o : $(mconsole-objs) -+ -+net.o : $(net-objs) -+ -+hostaudio.o : $(hostaudio-objs) -+ -+ubd.o : $(ubd-objs) -+ -+port.o : $(port-objs) -+ -+harddog.o : $(harddog-objs) -+ -+$(list-multi) : # This doesn't work, but should : '%.o : $(%-objs)' -+ $(LD) -r -o $@ $($(patsubst %.o,%,$@)-objs) $($(patsubst %.o,%,$@)-libs) -diff -Naur -X ../exclude-files orig/arch/um/drivers/mcast.h um/arch/um/drivers/mcast.h ---- orig/arch/um/drivers/mcast.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/mcast.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct mcast_data { -+ char *addr; -+ unsigned short port; -+ void *mcast_addr; -+ int ttl; -+ void *dev; -+}; -+ -+extern struct net_user_info mcast_user_info; -+ -+extern int mcast_user_write(int fd, void *buf, int len, -+ struct mcast_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mcast_kern.c um/arch/um/drivers/mcast_kern.c ---- orig/arch/um/drivers/mcast_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/mcast_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,145 @@ -+/* -+ * user-mode-linux networking multicast transport -+ * Copyright (C) 2001 by Harald Welte -+ * -+ * based on the existing uml-networking code, which is -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/in.h" -+#include "linux/inet.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "mcast.h" -+ -+struct mcast_init { -+ char *addr; -+ int port; -+ int ttl; -+}; -+ -+void mcast_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct mcast_data *dpri; -+ struct mcast_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ dpri = (struct mcast_data *) pri->user; -+ *dpri = ((struct mcast_data) -+ { .addr = init->addr, -+ .port = init->port, -+ .ttl = init->ttl, -+ .mcast_addr = NULL, -+ .dev = dev }); -+ printk("mcast backend "); -+ printk("multicast adddress: %s:%u, TTL:%u ", -+ dpri->addr, dpri->port, dpri->ttl); -+ -+ printk("\n"); -+} -+ -+static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int mcast_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return mcast_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct mcast_data *) &lp->user); -+} -+ -+static struct net_kern_info mcast_kern_info = { -+ .init = mcast_init, -+ .protocol = eth_protocol, -+ .read = mcast_read, -+ .write = mcast_write, -+}; -+ -+int mcast_setup(char *str, char **mac_out, void *data) -+{ -+ struct mcast_init *init = data; -+ char *port_str = NULL, *ttl_str = NULL, *remain; -+ char *last; -+ int n; -+ -+ *init = ((struct mcast_init) -+ { .addr = "239.192.168.1", -+ .port = 1102, -+ .ttl = 1 }); -+ -+ remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str, -+ NULL); -+ if(remain != NULL){ -+ printk(KERN_ERR "mcast_setup - Extra garbage on " -+ "specification : '%s'\n", remain); -+ return(0); -+ } -+ -+ if(port_str != NULL){ -+ n = simple_strtoul(port_str, &last, 10); -+ if((*last != '\0') || (last == port_str)){ -+ printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", -+ port_str); -+ return(0); -+ } -+ init->port = htons(n); -+ } -+ -+ if(ttl_str != NULL){ -+ init->ttl = simple_strtoul(ttl_str, &last, 10); -+ if((*last != '\0') || (last == ttl_str)){ -+ printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", -+ ttl_str); -+ return(0); -+ } -+ } -+ -+ printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr, -+ init->port, init->ttl); -+ -+ return(1); -+} -+ -+static struct transport mcast_transport = { -+ .list = LIST_HEAD_INIT(mcast_transport.list), -+ .name = "mcast", -+ .setup = mcast_setup, -+ .user = &mcast_user_info, -+ .kern = &mcast_kern_info, -+ .private_size = sizeof(struct mcast_data), -+ .setup_size = sizeof(struct mcast_init), -+}; -+ -+static int register_mcast(void) -+{ -+ register_transport(&mcast_transport); -+ return(1); -+} -+ -+__initcall(register_mcast); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mcast_user.c um/arch/um/drivers/mcast_user.c ---- orig/arch/um/drivers/mcast_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/mcast_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,177 @@ -+/* -+ * user-mode-linux networking multicast transport -+ * Copyright (C) 2001 by Harald Welte -+ * -+ * based on the existing uml-networking code, which is -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * -+ * Licensed under the GPL. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "net_user.h" -+#include "mcast.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+static struct sockaddr_in *new_addr(char *addr, unsigned short port) -+{ -+ struct sockaddr_in *sin; -+ -+ sin = um_kmalloc(sizeof(struct sockaddr_in)); -+ if(sin == NULL){ -+ printk("new_addr: allocation of sockaddr_in failed\n"); -+ return(NULL); -+ } -+ sin->sin_family = AF_INET; -+ sin->sin_addr.s_addr = in_aton(addr); -+ sin->sin_port = port; -+ return(sin); -+} -+ -+static void mcast_user_init(void *data, void *dev) -+{ -+ struct mcast_data *pri = data; -+ -+ pri->mcast_addr = new_addr(pri->addr, pri->port); -+ pri->dev = dev; -+} -+ -+static int mcast_open(void *data) -+{ -+ struct mcast_data *pri = data; -+ struct sockaddr_in *sin = pri->mcast_addr; -+ struct ip_mreq mreq; -+ int fd, yes = 1; -+ -+ -+ if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) { -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ fd = socket(AF_INET, SOCK_DGRAM, 0); -+ if (fd < 0){ -+ printk("mcast_open : data socket failed, errno = %d\n", -+ errno); -+ fd = -ENOMEM; -+ goto out; -+ } -+ -+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { -+ printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", -+ errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* set ttl according to config */ -+ if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, -+ sizeof(pri->ttl)) < 0) { -+ printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", -+ errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* set LOOP, so data does get fed back to local sockets */ -+ if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { -+ printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", -+ errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* bind socket to mcast address */ -+ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { -+ printk("mcast_open : data bind failed, errno = %d\n", errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* subscribe to the multicast group */ -+ mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; -+ mreq.imr_interface.s_addr = 0; -+ if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, -+ &mreq, sizeof(mreq)) < 0) { -+ printk("mcast_open: IP_ADD_MEMBERSHIP failed, error = %d\n", -+ errno); -+ printk("There appears not to be a multicast-capable network " -+ "interface on the host.\n"); -+ printk("eth0 should be configured in order to use the " -+ "multicast transport.\n"); -+ os_close_file(fd); -+ fd = -EINVAL; -+ } -+ -+ out: -+ return(fd); -+} -+ -+static void mcast_close(int fd, void *data) -+{ -+ struct ip_mreq mreq; -+ struct mcast_data *pri = data; -+ struct sockaddr_in *sin = pri->mcast_addr; -+ -+ mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; -+ mreq.imr_interface.s_addr = 0; -+ if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, -+ &mreq, sizeof(mreq)) < 0) { -+ printk("mcast_open: IP_DROP_MEMBERSHIP failed, error = %d\n", -+ errno); -+ } -+ -+ os_close_file(fd); -+} -+ -+int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) -+{ -+ struct sockaddr_in *data_addr = pri->mcast_addr; -+ -+ return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); -+} -+ -+static int mcast_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info mcast_user_info = { -+ .init = mcast_user_init, -+ .open = mcast_open, -+ .close = mcast_close, -+ .remove = NULL, -+ .set_mtu = mcast_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mconsole_kern.c um/arch/um/drivers/mconsole_kern.c ---- orig/arch/um/drivers/mconsole_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/mconsole_kern.c 2004-02-12 03:09:12.000000000 -0500 -@@ -0,0 +1,560 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/slab.h" -+#include "linux/init.h" -+#include "linux/notifier.h" -+#include "linux/reboot.h" -+#include "linux/utsname.h" -+#include "linux/ctype.h" -+#include "linux/interrupt.h" -+#include "linux/sysrq.h" -+#include "linux/tqueue.h" -+#include "linux/module.h" -+#include "linux/file.h" -+#include "linux/fs.h" -+#include "linux/proc_fs.h" -+#include "asm/irq.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mconsole.h" -+#include "mconsole_kern.h" -+#include "irq_user.h" -+#include "init.h" -+#include "os.h" -+#include "umid.h" -+#include "irq_kern.h" -+ -+static int do_unlink_socket(struct notifier_block *notifier, -+ unsigned long what, void *data) -+{ -+ return(mconsole_unlink_socket()); -+} -+ -+ -+static struct notifier_block reboot_notifier = { -+ .notifier_call = do_unlink_socket, -+ .priority = 0, -+}; -+ -+/* Safe without explicit locking for now. Tasklets provide their own -+ * locking, and the interrupt handler is safe because it can't interrupt -+ * itself and it can only happen on CPU 0. -+ */ -+ -+LIST_HEAD(mc_requests); -+ -+void mc_task_proc(void *unused) -+{ -+ struct mconsole_entry *req; -+ unsigned long flags; -+ int done; -+ -+ do { -+ save_flags(flags); -+ req = list_entry(mc_requests.next, struct mconsole_entry, -+ list); -+ list_del(&req->list); -+ done = list_empty(&mc_requests); -+ restore_flags(flags); -+ req->request.cmd->handler(&req->request); -+ kfree(req); -+ } while(!done); -+} -+ -+struct tq_struct mconsole_task = { -+ .routine = mc_task_proc, -+ .data = NULL -+}; -+ -+void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ int fd; -+ struct mconsole_entry *new; -+ struct mc_request req; -+ -+ fd = (int) dev_id; -+ while (mconsole_get_request(fd, &req)){ -+ if(req.cmd->context == MCONSOLE_INTR) -+ (*req.cmd->handler)(&req); -+ else { -+ new = kmalloc(sizeof(*new), GFP_ATOMIC); -+ if(new == NULL) -+ mconsole_reply(&req, "Out of memory", 1, 0); -+ else { -+ new->request = req; -+ list_add(&new->list, &mc_requests); -+ } -+ } -+ } -+ if(!list_empty(&mc_requests)) schedule_task(&mconsole_task); -+ reactivate_fd(fd, MCONSOLE_IRQ); -+} -+ -+void mconsole_version(struct mc_request *req) -+{ -+ char version[256]; -+ -+ sprintf(version, "%s %s %s %s %s", system_utsname.sysname, -+ system_utsname.nodename, system_utsname.release, -+ system_utsname.version, system_utsname.machine); -+ mconsole_reply(req, version, 0, 0); -+} -+ -+void mconsole_log(struct mc_request *req) -+{ -+ int len; -+ char *ptr = req->request.data; -+ -+ ptr += strlen("log "); -+ -+ len = req->len - (ptr - req->request.data); -+ printk("%.*s", len, ptr); -+ mconsole_reply(req, "", 0, 0); -+} -+ -+void mconsole_proc(struct mc_request *req) -+{ -+ struct nameidata nd; -+ struct file_system_type *proc; -+ struct super_block *super; -+ struct file *file; -+ int n, err; -+ char *ptr = req->request.data, *buf; -+ -+ ptr += strlen("proc"); -+ while(isspace(*ptr)) ptr++; -+ -+ proc = get_fs_type("proc"); -+ if(proc == NULL){ -+ mconsole_reply(req, "procfs not registered", 1, 0); -+ goto out; -+ } -+ -+ super = get_anon_super(proc, NULL, NULL); -+ if(super == NULL){ -+ mconsole_reply(req, "Failed to get procfs superblock", 1, 0); -+ goto out_put; -+ } -+ -+ if(super->s_root == NULL){ -+ super = (*proc->read_super)(super, NULL, 0); -+ if(super == NULL){ -+ mconsole_reply(req, "Failed to read superblock", 1, 0); -+ goto out_put; -+ } -+ } -+ up_write(&super->s_umount); -+ -+ nd.dentry = super->s_root; -+ nd.mnt = NULL; -+ nd.flags = O_RDONLY + 1; -+ nd.last_type = LAST_ROOT; -+ -+ err = link_path_walk(ptr, &nd); -+ if(err){ -+ mconsole_reply(req, "Failed to look up file", 1, 0); -+ goto out_kill; -+ } -+ -+ file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ if(IS_ERR(file)){ -+ mconsole_reply(req, "Failed to open file", 1, 0); -+ goto out_kill; -+ } -+ -+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if(buf == NULL){ -+ mconsole_reply(req, "Failed to allocate buffer", 1, 0); -+ goto out_fput; -+ } -+ -+ if((file->f_op != NULL) && (file->f_op->read != NULL)){ -+ do { -+ n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1, -+ &file->f_pos); -+ if(n >= 0){ -+ buf[n] = '\0'; -+ mconsole_reply(req, buf, 0, (n > 0)); -+ } -+ else { -+ mconsole_reply(req, "Read of file failed", -+ 1, 0); -+ goto out_free; -+ } -+ } while(n > 0); -+ } -+ else mconsole_reply(req, "", 0, 0); -+ -+ out_free: -+ kfree(buf); -+ out_fput: -+ fput(file); -+ out_kill: -+ kill_super(super); -+ out_put: -+ /* put_filesystem(proc); */ -+ out: ; -+} -+ -+#define UML_MCONSOLE_HELPTEXT \ -+"Commands: \n\ -+ version - Get kernel version \n\ -+ help - Print this message \n\ -+ halt - Halt UML \n\ -+ reboot - Reboot UML \n\ -+ config = - Add a new device to UML; \n\ -+ same syntax as command line \n\ -+ config - Query the configuration of a device \n\ -+ remove - Remove a device from UML \n\ -+ sysrq - Performs the SysRq action controlled by the letter \n\ -+ cad - invoke the Ctl-Alt-Del handler \n\ -+ stop - pause the UML; it will do nothing until it receives a 'go' \n\ -+ go - continue the UML after a 'stop' \n\ -+ log - make UML enter into the kernel log\n\ -+ proc - returns the contents of the UML's /proc/\n\ -+" -+ -+void mconsole_help(struct mc_request *req) -+{ -+ mconsole_reply(req, UML_MCONSOLE_HELPTEXT, 0, 0); -+} -+ -+void mconsole_halt(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ machine_halt(); -+} -+ -+void mconsole_reboot(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ machine_restart(NULL); -+} -+ -+extern void ctrl_alt_del(void); -+ -+void mconsole_cad(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ ctrl_alt_del(); -+} -+ -+void mconsole_go(struct mc_request *req) -+{ -+ mconsole_reply(req, "Not stopped", 1, 0); -+} -+ -+void mconsole_stop(struct mc_request *req) -+{ -+ deactivate_fd(req->originating_fd, MCONSOLE_IRQ); -+ os_set_fd_block(req->originating_fd, 1); -+ mconsole_reply(req, "", 0, 0); -+ while(mconsole_get_request(req->originating_fd, req)){ -+ if(req->cmd->handler == mconsole_go) break; -+ (*req->cmd->handler)(req); -+ } -+ os_set_fd_block(req->originating_fd, 0); -+ reactivate_fd(req->originating_fd, MCONSOLE_IRQ); -+ mconsole_reply(req, "", 0, 0); -+} -+ -+/* This list is populated by __initcall routines. */ -+ -+LIST_HEAD(mconsole_devices); -+ -+void mconsole_register_dev(struct mc_device *new) -+{ -+ list_add(&new->list, &mconsole_devices); -+} -+ -+static struct mc_device *mconsole_find_dev(char *name) -+{ -+ struct list_head *ele; -+ struct mc_device *dev; -+ -+ list_for_each(ele, &mconsole_devices){ -+ dev = list_entry(ele, struct mc_device, list); -+ if(!strncmp(name, dev->name, strlen(dev->name))) -+ return(dev); -+ } -+ return(NULL); -+} -+ -+#define CONFIG_BUF_SIZE 64 -+ -+static void mconsole_get_config(int (*get_config)(char *, char *, int, -+ char **), -+ struct mc_request *req, char *name) -+{ -+ char default_buf[CONFIG_BUF_SIZE], *error, *buf; -+ int n, size; -+ -+ if(get_config == NULL){ -+ mconsole_reply(req, "No get_config routine defined", 1, 0); -+ return; -+ } -+ -+ error = NULL; -+ size = sizeof(default_buf)/sizeof(default_buf[0]); -+ buf = default_buf; -+ -+ while(1){ -+ n = (*get_config)(name, buf, size, &error); -+ if(error != NULL){ -+ mconsole_reply(req, error, 1, 0); -+ goto out; -+ } -+ -+ if(n <= size){ -+ mconsole_reply(req, buf, 0, 0); -+ goto out; -+ } -+ -+ if(buf != default_buf) -+ kfree(buf); -+ -+ size = n; -+ buf = kmalloc(size, GFP_KERNEL); -+ if(buf == NULL){ -+ mconsole_reply(req, "Failed to allocate buffer", 1, 0); -+ return; -+ } -+ } -+ out: -+ if(buf != default_buf) -+ kfree(buf); -+ -+} -+ -+void mconsole_config(struct mc_request *req) -+{ -+ struct mc_device *dev; -+ char *ptr = req->request.data, *name; -+ int err; -+ -+ ptr += strlen("config"); -+ while(isspace(*ptr)) ptr++; -+ dev = mconsole_find_dev(ptr); -+ if(dev == NULL){ -+ mconsole_reply(req, "Bad configuration option", 1, 0); -+ return; -+ } -+ -+ name = &ptr[strlen(dev->name)]; -+ ptr = name; -+ while((*ptr != '=') && (*ptr != '\0')) -+ ptr++; -+ -+ if(*ptr == '='){ -+ err = (*dev->config)(name); -+ mconsole_reply(req, "", err, 0); -+ } -+ else mconsole_get_config(dev->get_config, req, name); -+} -+ -+void mconsole_remove(struct mc_request *req) -+{ -+ struct mc_device *dev; -+ char *ptr = req->request.data; -+ int err; -+ -+ ptr += strlen("remove"); -+ while(isspace(*ptr)) ptr++; -+ dev = mconsole_find_dev(ptr); -+ if(dev == NULL){ -+ mconsole_reply(req, "Bad remove option", 1, 0); -+ return; -+ } -+ err = (*dev->remove)(&ptr[strlen(dev->name)]); -+ mconsole_reply(req, "", err, 0); -+} -+ -+#ifdef CONFIG_MAGIC_SYSRQ -+void mconsole_sysrq(struct mc_request *req) -+{ -+ char *ptr = req->request.data; -+ -+ ptr += strlen("sysrq"); -+ while(isspace(*ptr)) ptr++; -+ -+ mconsole_reply(req, "", 0, 0); -+ handle_sysrq(*ptr, ¤t->thread.regs, NULL, NULL); -+} -+#else -+void mconsole_sysrq(struct mc_request *req) -+{ -+ mconsole_reply(req, "Sysrq not compiled in", 1, 0); -+} -+#endif -+ -+/* Changed by mconsole_setup, which is __setup, and called before SMP is -+ * active. -+ */ -+static char *notify_socket = NULL; -+ -+int mconsole_init(void) -+{ -+ int err, sock; -+ char file[256]; -+ -+ if(umid_file_name("mconsole", file, sizeof(file))) return(-1); -+ snprintf(mconsole_socket_name, sizeof(file), "%s", file); -+ -+ sock = os_create_unix_socket(file, sizeof(file), 1); -+ if (sock < 0){ -+ printk("Failed to initialize management console\n"); -+ return(1); -+ } -+ -+ register_reboot_notifier(&reboot_notifier); -+ -+ err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "mconsole", (void *)sock); -+ if (err){ -+ printk("Failed to get IRQ for management console\n"); -+ return(1); -+ } -+ -+ if(notify_socket != NULL){ -+ notify_socket = uml_strdup(notify_socket); -+ if(notify_socket != NULL) -+ mconsole_notify(notify_socket, MCONSOLE_SOCKET, -+ mconsole_socket_name, -+ strlen(mconsole_socket_name) + 1); -+ else printk(KERN_ERR "mconsole_setup failed to strdup " -+ "string\n"); -+ } -+ -+ printk("mconsole (version %d) initialized on %s\n", -+ MCONSOLE_VERSION, mconsole_socket_name); -+ return(0); -+} -+ -+__initcall(mconsole_init); -+ -+static int write_proc_mconsole(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char *buf; -+ -+ buf = kmalloc(count + 1, GFP_KERNEL); -+ if(buf == NULL) -+ return(-ENOMEM); -+ -+ if(copy_from_user(buf, buffer, count)){ -+ count = -EFAULT; -+ goto out; -+ } -+ -+ buf[count] = '\0'; -+ -+ mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); -+ out: -+ kfree(buf); -+ return(count); -+} -+ -+static int create_proc_mconsole(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ if(notify_socket == NULL) return(0); -+ -+ ent = create_proc_entry("mconsole", S_IFREG | 0200, NULL); -+ if(ent == NULL){ -+ printk("create_proc_mconsole : create_proc_entry failed\n"); -+ return(0); -+ } -+ -+ ent->read_proc = NULL; -+ ent->write_proc = write_proc_mconsole; -+ return(0); -+} -+ -+static spinlock_t notify_spinlock = SPIN_LOCK_UNLOCKED; -+ -+void lock_notify(void) -+{ -+ spin_lock(¬ify_spinlock); -+} -+ -+void unlock_notify(void) -+{ -+ spin_unlock(¬ify_spinlock); -+} -+ -+__initcall(create_proc_mconsole); -+ -+#define NOTIFY "=notify:" -+ -+static int mconsole_setup(char *str) -+{ -+ if(!strncmp(str, NOTIFY, strlen(NOTIFY))){ -+ str += strlen(NOTIFY); -+ notify_socket = str; -+ } -+ else printk(KERN_ERR "mconsole_setup : Unknown option - '%s'\n", str); -+ return(1); -+} -+ -+__setup("mconsole", mconsole_setup); -+ -+__uml_help(mconsole_setup, -+"mconsole=notify:\n" -+" Requests that the mconsole driver send a message to the named Unix\n" -+" socket containing the name of the mconsole socket. This also serves\n" -+" to notify outside processes when UML has booted far enough to respond\n" -+" to mconsole requests.\n\n" -+); -+ -+static int notify_panic(struct notifier_block *self, unsigned long unused1, -+ void *ptr) -+{ -+ char *message = ptr; -+ -+ if(notify_socket == NULL) return(0); -+ -+ mconsole_notify(notify_socket, MCONSOLE_PANIC, message, -+ strlen(message) + 1); -+ return(0); -+} -+ -+static struct notifier_block panic_exit_notifier = { -+ .notifier_call = notify_panic, -+ .next = NULL, -+ .priority = 1 -+}; -+ -+static int add_notifier(void) -+{ -+ notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); -+ return(0); -+} -+ -+__initcall(add_notifier); -+ -+char *mconsole_notify_socket(void) -+{ -+ return(notify_socket); -+} -+ -+EXPORT_SYMBOL(mconsole_notify_socket); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mconsole_user.c um/arch/um/drivers/mconsole_user.c ---- orig/arch/um/drivers/mconsole_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/mconsole_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,215 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user.h" -+#include "mconsole.h" -+#include "umid.h" -+ -+static struct mconsole_command commands[] = { -+ { "version", mconsole_version, MCONSOLE_INTR }, -+ { "halt", mconsole_halt, MCONSOLE_PROC }, -+ { "reboot", mconsole_reboot, MCONSOLE_PROC }, -+ { "config", mconsole_config, MCONSOLE_PROC }, -+ { "remove", mconsole_remove, MCONSOLE_PROC }, -+ { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, -+ { "help", mconsole_help, MCONSOLE_INTR }, -+ { "cad", mconsole_cad, MCONSOLE_INTR }, -+ { "stop", mconsole_stop, MCONSOLE_PROC }, -+ { "go", mconsole_go, MCONSOLE_INTR }, -+ { "log", mconsole_log, MCONSOLE_INTR }, -+ { "proc", mconsole_proc, MCONSOLE_PROC }, -+}; -+ -+/* Initialized in mconsole_init, which is an initcall */ -+char mconsole_socket_name[256]; -+ -+int mconsole_reply_v0(struct mc_request *req, char *reply) -+{ -+ struct iovec iov; -+ struct msghdr msg; -+ -+ iov.iov_base = reply; -+ iov.iov_len = strlen(reply); -+ -+ msg.msg_name = &(req->origin); -+ msg.msg_namelen = req->originlen; -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ msg.msg_control = NULL; -+ msg.msg_controllen = 0; -+ msg.msg_flags = 0; -+ -+ return sendmsg(req->originating_fd, &msg, 0); -+} -+ -+static struct mconsole_command *mconsole_parse(struct mc_request *req) -+{ -+ struct mconsole_command *cmd; -+ int i; -+ -+ for(i=0;irequest.data, cmd->command, -+ strlen(cmd->command))){ -+ return(cmd); -+ } -+ } -+ return(NULL); -+} -+ -+#define MIN(a,b) ((a)<(b) ? (a):(b)) -+ -+#define STRINGX(x) #x -+#define STRING(x) STRINGX(x) -+ -+int mconsole_get_request(int fd, struct mc_request *req) -+{ -+ int len; -+ -+ req->originlen = sizeof(req->origin); -+ req->len = recvfrom(fd, &req->request, sizeof(req->request), 0, -+ (struct sockaddr *) req->origin, &req->originlen); -+ if (req->len < 0) -+ return 0; -+ -+ req->originating_fd = fd; -+ -+ if(req->request.magic != MCONSOLE_MAGIC){ -+ /* Unversioned request */ -+ len = MIN(sizeof(req->request.data) - 1, -+ strlen((char *) &req->request)); -+ memmove(req->request.data, &req->request, len); -+ req->request.data[len] = '\0'; -+ -+ req->request.magic = MCONSOLE_MAGIC; -+ req->request.version = 0; -+ req->request.len = len; -+ -+ mconsole_reply_v0(req, "ERR Version 0 mconsole clients are " -+ "not supported by this driver"); -+ return(0); -+ } -+ -+ if(req->request.len >= MCONSOLE_MAX_DATA){ -+ mconsole_reply(req, "Request too large", 1, 0); -+ return(0); -+ } -+ if(req->request.version != MCONSOLE_VERSION){ -+ mconsole_reply(req, "This driver only supports version " -+ STRING(MCONSOLE_VERSION) " clients", 1, 0); -+ } -+ -+ req->request.data[req->request.len] = '\0'; -+ req->cmd = mconsole_parse(req); -+ if(req->cmd == NULL){ -+ mconsole_reply(req, "Unknown command", 1, 0); -+ return(0); -+ } -+ -+ return(1); -+} -+ -+int mconsole_reply(struct mc_request *req, char *str, int err, int more) -+{ -+ struct mconsole_reply reply; -+ int total, len, n; -+ -+ total = strlen(str); -+ do { -+ reply.err = err; -+ -+ /* err can only be true on the first packet */ -+ err = 0; -+ -+ len = MIN(total, MCONSOLE_MAX_DATA - 1); -+ -+ if(len == total) reply.more = more; -+ else reply.more = 1; -+ -+ memcpy(reply.data, str, len); -+ reply.data[len] = '\0'; -+ total -= len; -+ str += len; -+ reply.len = len + 1; -+ -+ len = sizeof(reply) + reply.len - sizeof(reply.data); -+ -+ n = sendto(req->originating_fd, &reply, len, 0, -+ (struct sockaddr *) req->origin, req->originlen); -+ -+ if(n < 0) return(-errno); -+ } while(total > 0); -+ return(0); -+} -+ -+int mconsole_unlink_socket(void) -+{ -+ unlink(mconsole_socket_name); -+ return 0; -+} -+ -+static int notify_sock = -1; -+ -+int mconsole_notify(char *sock_name, int type, const void *data, int len) -+{ -+ struct sockaddr_un target; -+ struct mconsole_notify packet; -+ int n, err = 0; -+ -+ lock_notify(); -+ if(notify_sock < 0){ -+ notify_sock = socket(PF_UNIX, SOCK_DGRAM, 0); -+ if(notify_sock < 0){ -+ printk("mconsole_notify - socket failed, errno = %d\n", -+ errno); -+ err = -errno; -+ } -+ } -+ unlock_notify(); -+ -+ if(err) -+ return(err); -+ -+ target.sun_family = AF_UNIX; -+ strcpy(target.sun_path, sock_name); -+ -+ packet.magic = MCONSOLE_MAGIC; -+ packet.version = MCONSOLE_VERSION; -+ packet.type = type; -+ len = (len > sizeof(packet.data)) ? sizeof(packet.data) : len; -+ packet.len = len; -+ memcpy(packet.data, data, len); -+ -+ err = 0; -+ len = sizeof(packet) + packet.len - sizeof(packet.data); -+ n = sendto(notify_sock, &packet, len, 0, (struct sockaddr *) &target, -+ sizeof(target)); -+ if(n < 0){ -+ printk("mconsole_notify - sendto failed, errno = %d\n", errno); -+ err = -errno; -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mmapper_kern.c um/arch/um/drivers/mmapper_kern.c ---- orig/arch/um/drivers/mmapper_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/mmapper_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,151 @@ -+/* -+ * arch/um/drivers/mmapper_kern.c -+ * -+ * BRIEF MODULE DESCRIPTION -+ * -+ * Copyright (C) 2000 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "mem_user.h" -+#include "user_util.h" -+ -+/* These are set in mmapper_init, which is called at boot time */ -+static unsigned long mmapper_size; -+static unsigned long p_buf = 0; -+static char *v_buf = NULL; -+ -+static ssize_t -+mmapper_read(struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ if(*ppos > mmapper_size) -+ return -EINVAL; -+ -+ if(count + *ppos > mmapper_size) -+ count = count + *ppos - mmapper_size; -+ -+ if(count < 0) -+ return -EINVAL; -+ -+ copy_to_user(buf,&v_buf[*ppos],count); -+ -+ return count; -+} -+ -+static ssize_t -+mmapper_write(struct file *file, const char *buf, size_t count, loff_t *ppos) -+{ -+ if(*ppos > mmapper_size) -+ return -EINVAL; -+ -+ if(count + *ppos > mmapper_size) -+ count = count + *ppos - mmapper_size; -+ -+ if(count < 0) -+ return -EINVAL; -+ -+ copy_from_user(&v_buf[*ppos],buf,count); -+ -+ return count; -+} -+ -+static int -+mmapper_ioctl(struct inode *inode, struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ return(-ENOIOCTLCMD); -+} -+ -+static int -+mmapper_mmap(struct file *file, struct vm_area_struct * vma) -+{ -+ int ret = -EINVAL; -+ int size; -+ -+ lock_kernel(); -+ if (vma->vm_pgoff != 0) -+ goto out; -+ -+ size = vma->vm_end - vma->vm_start; -+ if(size > mmapper_size) return(-EFAULT); -+ -+ /* XXX A comment above remap_page_range says it should only be -+ * called when the mm semaphore is held -+ */ -+ if (remap_page_range(vma->vm_start, p_buf, size, vma->vm_page_prot)) -+ goto out; -+ ret = 0; -+out: -+ unlock_kernel(); -+ return ret; -+} -+ -+static int -+mmapper_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static int -+mmapper_release(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static struct file_operations mmapper_fops = { -+ .owner = THIS_MODULE, -+ .read = mmapper_read, -+ .write = mmapper_write, -+ .ioctl = mmapper_ioctl, -+ .mmap = mmapper_mmap, -+ .open = mmapper_open, -+ .release = mmapper_release, -+}; -+ -+static int __init mmapper_init(void) -+{ -+ printk(KERN_INFO "Mapper v0.1\n"); -+ -+ v_buf = (char *) find_iomem("mmapper", &mmapper_size); -+ if(mmapper_size == 0){ -+ printk(KERN_ERR "mmapper_init - find_iomem failed\n"); -+ return(0); -+ } -+ -+ p_buf = __pa(v_buf); -+ -+ devfs_register (NULL, "mmapper", DEVFS_FL_DEFAULT, -+ 30, 0, S_IFCHR | S_IRUGO | S_IWUGO, -+ &mmapper_fops, NULL); -+ devfs_mk_symlink(NULL, "mmapper0", DEVFS_FL_DEFAULT, "mmapper", -+ NULL, NULL); -+ return(0); -+} -+ -+static void mmapper_exit(void) -+{ -+} -+ -+module_init(mmapper_init); -+module_exit(mmapper_exit); -+ -+MODULE_AUTHOR("Greg Lonnon "); -+MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/net_kern.c um/arch/um/drivers/net_kern.c ---- orig/arch/um/drivers/net_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/net_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,872 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/netdevice.h" -+#include "linux/rtnetlink.h" -+#include "linux/skbuff.h" -+#include "linux/socket.h" -+#include "linux/spinlock.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/etherdevice.h" -+#include "linux/list.h" -+#include "linux/inetdevice.h" -+#include "linux/ctype.h" -+#include "linux/bootmem.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+ -+static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED; -+LIST_HEAD(opened); -+ -+static int uml_net_rx(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ int pkt_len; -+ struct sk_buff *skb; -+ -+ /* If we can't allocate memory, try again next round. */ -+ skb = dev_alloc_skb(dev->mtu); -+ if (skb == NULL) { -+ lp->stats.rx_dropped++; -+ return 0; -+ } -+ -+ skb->dev = dev; -+ skb_put(skb, dev->mtu); -+ skb->mac.raw = skb->data; -+ pkt_len = (*lp->read)(lp->fd, &skb, lp); -+ -+ if (pkt_len > 0) { -+ skb_trim(skb, pkt_len); -+ skb->protocol = (*lp->protocol)(skb); -+ netif_rx(skb); -+ -+ lp->stats.rx_bytes += skb->len; -+ lp->stats.rx_packets++; -+ return pkt_len; -+ } -+ -+ kfree_skb(skb); -+ return pkt_len; -+} -+ -+void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ struct net_device *dev = dev_id; -+ struct uml_net_private *lp = dev->priv; -+ int err; -+ -+ if(!netif_running(dev)) -+ return; -+ -+ spin_lock(&lp->lock); -+ while((err = uml_net_rx(dev)) > 0) ; -+ if(err < 0) { -+ printk(KERN_ERR -+ "Device '%s' read returned %d, shutting it down\n", -+ dev->name, err); -+ dev_close(dev); -+ goto out; -+ } -+ reactivate_fd(lp->fd, UM_ETH_IRQ); -+ -+ out: -+ spin_unlock(&lp->lock); -+} -+ -+static int uml_net_open(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ char addr[sizeof("255.255.255.255\0")]; -+ int err; -+ -+ spin_lock(&lp->lock); -+ -+ if(lp->fd >= 0){ -+ err = -ENXIO; -+ goto out; -+ } -+ -+ if(!lp->have_mac){ -+ dev_ip_addr(dev, addr, &lp->mac[2]); -+ set_ether_mac(dev, lp->mac); -+ } -+ -+ lp->fd = (*lp->open)(&lp->user); -+ if(lp->fd < 0){ -+ err = lp->fd; -+ goto out; -+ } -+ -+ err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, -+ SA_INTERRUPT | SA_SHIRQ, dev->name, dev); -+ if(err != 0){ -+ printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ lp->fd = -1; -+ err = -ENETUNREACH; -+ } -+ -+ lp->tl.data = (unsigned long) &lp->user; -+ netif_start_queue(dev); -+ -+ spin_lock(&opened_lock); -+ list_add(&lp->list, &opened); -+ spin_unlock(&opened_lock); -+ MOD_INC_USE_COUNT; -+ out: -+ spin_unlock(&lp->lock); -+ return(err); -+} -+ -+static int uml_net_close(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ -+ netif_stop_queue(dev); -+ spin_lock(&lp->lock); -+ -+ free_irq(dev->irq, dev); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ lp->fd = -1; -+ spin_lock(&opened_lock); -+ list_del(&lp->list); -+ spin_unlock(&opened_lock); -+ -+ MOD_DEC_USE_COUNT; -+ spin_unlock(&lp->lock); -+ return 0; -+} -+ -+static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ unsigned long flags; -+ int len; -+ -+ netif_stop_queue(dev); -+ -+ spin_lock_irqsave(&lp->lock, flags); -+ -+ len = (*lp->write)(lp->fd, &skb, lp); -+ -+ if(len == skb->len) { -+ lp->stats.tx_packets++; -+ lp->stats.tx_bytes += skb->len; -+ dev->trans_start = jiffies; -+ netif_start_queue(dev); -+ -+ /* this is normally done in the interrupt when tx finishes */ -+ netif_wake_queue(dev); -+ } -+ else if(len == 0){ -+ netif_start_queue(dev); -+ lp->stats.tx_dropped++; -+ } -+ else { -+ netif_start_queue(dev); -+ printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); -+ } -+ -+ spin_unlock_irqrestore(&lp->lock, flags); -+ -+ dev_kfree_skb(skb); -+ -+ return 0; -+} -+ -+static struct net_device_stats *uml_net_get_stats(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ return &lp->stats; -+} -+ -+static void uml_net_set_multicast_list(struct net_device *dev) -+{ -+ if (dev->flags & IFF_PROMISC) return; -+ else if (dev->mc_count) dev->flags |= IFF_ALLMULTI; -+ else dev->flags &= ~IFF_ALLMULTI; -+} -+ -+static void uml_net_tx_timeout(struct net_device *dev) -+{ -+ dev->trans_start = jiffies; -+ netif_wake_queue(dev); -+} -+ -+static int uml_net_set_mac(struct net_device *dev, void *addr) -+{ -+ struct uml_net_private *lp = dev->priv; -+ struct sockaddr *hwaddr = addr; -+ -+ spin_lock(&lp->lock); -+ memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN); -+ spin_unlock(&lp->lock); -+ -+ return(0); -+} -+ -+static int uml_net_change_mtu(struct net_device *dev, int new_mtu) -+{ -+ struct uml_net_private *lp = dev->priv; -+ int err = 0; -+ -+ spin_lock(&lp->lock); -+ -+ new_mtu = (*lp->set_mtu)(new_mtu, &lp->user); -+ if(new_mtu < 0){ -+ err = new_mtu; -+ goto out; -+ } -+ -+ dev->mtu = new_mtu; -+ -+ out: -+ spin_unlock(&lp->lock); -+ return err; -+} -+ -+static int uml_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -+{ -+ return(-EINVAL); -+} -+ -+void uml_net_user_timer_expire(unsigned long _conn) -+{ -+#ifdef undef -+ struct connection *conn = (struct connection *)_conn; -+ -+ dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); -+ do_connect(conn); -+#endif -+} -+ -+/* -+ * default do nothing hard header packet routines for struct net_device init. -+ * real ethernet transports will overwrite with real routines. -+ */ -+static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev, -+ unsigned short type, void *daddr, void *saddr, unsigned len) -+{ -+ return(0); /* no change */ -+} -+ -+static int uml_net_rebuild_header(struct sk_buff *skb) -+{ -+ return(0); /* ignore */ -+} -+ -+static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh) -+{ -+ return(-1); /* fail */ -+} -+ -+static void uml_net_header_cache_update(struct hh_cache *hh, -+ struct net_device *dev, unsigned char * haddr) -+{ -+ /* ignore */ -+} -+ -+static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr) -+{ -+ return(0); /* nothing */ -+} -+ -+static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; -+static struct list_head devices = LIST_HEAD_INIT(devices); -+ -+static int eth_configure(int n, void *init, char *mac, -+ struct transport *transport) -+{ -+ struct uml_net *device; -+ struct net_device *dev; -+ struct uml_net_private *lp; -+ int save, err, size; -+ -+ size = transport->private_size + sizeof(struct uml_net_private) + -+ sizeof(((struct uml_net_private *) 0)->user); -+ -+ device = kmalloc(sizeof(*device), GFP_KERNEL); -+ if(device == NULL){ -+ printk(KERN_ERR "eth_configure failed to allocate uml_net\n"); -+ return(1); -+ } -+ -+ *device = ((struct uml_net) { .list = LIST_HEAD_INIT(device->list), -+ .dev = NULL, -+ .index = n, -+ .mac = { [ 0 ... 5 ] = 0 }, -+ .have_mac = 0 }); -+ -+ spin_lock(&devices_lock); -+ list_add(&device->list, &devices); -+ spin_unlock(&devices_lock); -+ -+ if(setup_etheraddr(mac, device->mac)) -+ device->have_mac = 1; -+ -+ printk(KERN_INFO "Netdevice %d ", n); -+ if(device->have_mac) printk("(%02x:%02x:%02x:%02x:%02x:%02x) ", -+ device->mac[0], device->mac[1], -+ device->mac[2], device->mac[3], -+ device->mac[4], device->mac[5]); -+ printk(": "); -+ dev = kmalloc(sizeof(*dev) + size, GFP_KERNEL); -+ if(dev == NULL){ -+ printk(KERN_ERR "eth_configure: failed to allocate device\n"); -+ return(1); -+ } -+ memset(dev, 0, sizeof(*dev) + size); -+ -+ snprintf(dev->name, sizeof(dev->name), "eth%d", n); -+ dev->priv = (void *) &dev[1]; -+ device->dev = dev; -+ -+ dev->hard_header = uml_net_hard_header; -+ dev->rebuild_header = uml_net_rebuild_header; -+ dev->hard_header_cache = uml_net_header_cache; -+ dev->header_cache_update= uml_net_header_cache_update; -+ dev->hard_header_parse = uml_net_header_parse; -+ -+ (*transport->kern->init)(dev, init); -+ -+ dev->mtu = transport->user->max_packet; -+ dev->open = uml_net_open; -+ dev->hard_start_xmit = uml_net_start_xmit; -+ dev->stop = uml_net_close; -+ dev->get_stats = uml_net_get_stats; -+ dev->set_multicast_list = uml_net_set_multicast_list; -+ dev->tx_timeout = uml_net_tx_timeout; -+ dev->set_mac_address = uml_net_set_mac; -+ dev->change_mtu = uml_net_change_mtu; -+ dev->do_ioctl = uml_net_ioctl; -+ dev->watchdog_timeo = (HZ >> 1); -+ dev->irq = UM_ETH_IRQ; -+ -+ rtnl_lock(); -+ err = register_netdevice(dev); -+ rtnl_unlock(); -+ if(err) -+ return(1); -+ lp = dev->priv; -+ -+ /* lp.user is the first four bytes of the transport data, which -+ * has already been initialized. This structure assignment will -+ * overwrite that, so we make sure that .user gets overwritten with -+ * what it already has. -+ */ -+ save = lp->user[0]; -+ *lp = ((struct uml_net_private) -+ { .list = LIST_HEAD_INIT(lp->list), -+ .lock = SPIN_LOCK_UNLOCKED, -+ .dev = dev, -+ .fd = -1, -+ .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, -+ .have_mac = device->have_mac, -+ .protocol = transport->kern->protocol, -+ .open = transport->user->open, -+ .close = transport->user->close, -+ .remove = transport->user->remove, -+ .read = transport->kern->read, -+ .write = transport->kern->write, -+ .add_address = transport->user->add_address, -+ .delete_address = transport->user->delete_address, -+ .set_mtu = transport->user->set_mtu, -+ .user = { save } }); -+ init_timer(&lp->tl); -+ lp->tl.function = uml_net_user_timer_expire; -+ memset(&lp->stats, 0, sizeof(lp->stats)); -+ if(lp->have_mac) memcpy(lp->mac, device->mac, sizeof(lp->mac)); -+ -+ if(transport->user->init) -+ (*transport->user->init)(&lp->user, dev); -+ -+ if(device->have_mac) -+ set_ether_mac(dev, device->mac); -+ return(0); -+} -+ -+static struct uml_net *find_device(int n) -+{ -+ struct uml_net *device; -+ struct list_head *ele; -+ -+ spin_lock(&devices_lock); -+ list_for_each(ele, &devices){ -+ device = list_entry(ele, struct uml_net, list); -+ if(device->index == n) -+ goto out; -+ } -+ device = NULL; -+ out: -+ spin_unlock(&devices_lock); -+ return(device); -+} -+ -+static int eth_parse(char *str, int *index_out, char **str_out) -+{ -+ char *end; -+ int n; -+ -+ n = simple_strtoul(str, &end, 0); -+ if(end == str){ -+ printk(KERN_ERR "eth_setup: Failed to parse '%s'\n", str); -+ return(1); -+ } -+ if(n < 0){ -+ printk(KERN_ERR "eth_setup: device %d is negative\n", n); -+ return(1); -+ } -+ str = end; -+ if(*str != '='){ -+ printk(KERN_ERR -+ "eth_setup: expected '=' after device number\n"); -+ return(1); -+ } -+ str++; -+ if(find_device(n)){ -+ printk(KERN_ERR "eth_setup: Device %d already configured\n", -+ n); -+ return(1); -+ } -+ if(index_out) *index_out = n; -+ *str_out = str; -+ return(0); -+} -+ -+struct eth_init { -+ struct list_head list; -+ char *init; -+ int index; -+}; -+ -+/* Filled in at boot time. Will need locking if the transports become -+ * modular. -+ */ -+struct list_head transports = LIST_HEAD_INIT(transports); -+ -+/* Filled in during early boot */ -+struct list_head eth_cmd_line = LIST_HEAD_INIT(eth_cmd_line); -+ -+static int check_transport(struct transport *transport, char *eth, int n, -+ void **init_out, char **mac_out) -+{ -+ int len; -+ -+ len = strlen(transport->name); -+ if(strncmp(eth, transport->name, len)) -+ return(0); -+ -+ eth += len; -+ if(*eth == ',') -+ eth++; -+ else if(*eth != '\0') -+ return(0); -+ -+ *init_out = kmalloc(transport->setup_size, GFP_KERNEL); -+ if(*init_out == NULL) -+ return(1); -+ -+ if(!transport->setup(eth, mac_out, *init_out)){ -+ kfree(*init_out); -+ *init_out = NULL; -+ } -+ return(1); -+} -+ -+void register_transport(struct transport *new) -+{ -+ struct list_head *ele, *next; -+ struct eth_init *eth; -+ void *init; -+ char *mac = NULL; -+ int match; -+ -+ list_add(&new->list, &transports); -+ -+ list_for_each_safe(ele, next, ð_cmd_line){ -+ eth = list_entry(ele, struct eth_init, list); -+ match = check_transport(new, eth->init, eth->index, &init, -+ &mac); -+ if(!match) -+ continue; -+ else if(init != NULL){ -+ eth_configure(eth->index, init, mac, new); -+ kfree(init); -+ } -+ list_del(ð->list); -+ } -+} -+ -+static int eth_setup_common(char *str, int index) -+{ -+ struct list_head *ele; -+ struct transport *transport; -+ void *init; -+ char *mac = NULL; -+ -+ list_for_each(ele, &transports){ -+ transport = list_entry(ele, struct transport, list); -+ if(!check_transport(transport, str, index, &init, &mac)) -+ continue; -+ if(init != NULL){ -+ eth_configure(index, init, mac, transport); -+ kfree(init); -+ } -+ return(1); -+ } -+ return(0); -+} -+ -+static int eth_setup(char *str) -+{ -+ struct eth_init *new; -+ int n, err; -+ -+ err = eth_parse(str, &n, &str); -+ if(err) return(1); -+ -+ new = alloc_bootmem(sizeof(new)); -+ if(new == NULL){ -+ printk("eth_init : alloc_bootmem failed\n"); -+ return(1); -+ } -+ *new = ((struct eth_init) { .list = LIST_HEAD_INIT(new->list), -+ .index = n, -+ .init = str }); -+ list_add_tail(&new->list, ð_cmd_line); -+ return(1); -+} -+ -+__setup("eth", eth_setup); -+__uml_help(eth_setup, -+"eth[0-9]+=,\n" -+" Configure a network device.\n\n" -+); -+ -+static int eth_init(void) -+{ -+ struct list_head *ele, *next; -+ struct eth_init *eth; -+ -+ list_for_each_safe(ele, next, ð_cmd_line){ -+ eth = list_entry(ele, struct eth_init, list); -+ -+ if(eth_setup_common(eth->init, eth->index)) -+ list_del(ð->list); -+ } -+ -+ return(1); -+} -+ -+__initcall(eth_init); -+ -+static int net_config(char *str) -+{ -+ int n, err; -+ -+ err = eth_parse(str, &n, &str); -+ if(err) return(err); -+ -+ str = uml_strdup(str); -+ if(str == NULL){ -+ printk(KERN_ERR "net_config failed to strdup string\n"); -+ return(-1); -+ } -+ err = !eth_setup_common(str, n); -+ if(err) -+ kfree(str); -+ return(err); -+} -+ -+static int net_remove(char *str) -+{ -+ struct uml_net *device; -+ struct net_device *dev; -+ struct uml_net_private *lp; -+ char *end; -+ int n; -+ -+ n = simple_strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)) -+ return(-1); -+ -+ device = find_device(n); -+ if(device == NULL) -+ return(0); -+ -+ dev = device->dev; -+ lp = dev->priv; -+ if(lp->fd > 0) return(-1); -+ if(lp->remove != NULL) (*lp->remove)(&lp->user); -+ unregister_netdev(dev); -+ -+ list_del(&device->list); -+ kfree(device); -+ return(0); -+} -+ -+static struct mc_device net_mc = { -+ .name = "eth", -+ .config = net_config, -+ .get_config = NULL, -+ .remove = net_remove, -+}; -+ -+static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, -+ void *ptr) -+{ -+ struct in_ifaddr *ifa = ptr; -+ u32 addr = ifa->ifa_address; -+ u32 netmask = ifa->ifa_mask; -+ struct net_device *dev = ifa->ifa_dev->dev; -+ struct uml_net_private *lp; -+ void (*proc)(unsigned char *, unsigned char *, void *); -+ unsigned char addr_buf[4], netmask_buf[4]; -+ -+ if(dev->open != uml_net_open) return(NOTIFY_DONE); -+ -+ lp = dev->priv; -+ -+ proc = NULL; -+ switch (event){ -+ case NETDEV_UP: -+ proc = lp->add_address; -+ break; -+ case NETDEV_DOWN: -+ proc = lp->delete_address; -+ break; -+ } -+ if(proc != NULL){ -+ addr_buf[0] = addr & 0xff; -+ addr_buf[1] = (addr >> 8) & 0xff; -+ addr_buf[2] = (addr >> 16) & 0xff; -+ addr_buf[3] = addr >> 24; -+ netmask_buf[0] = netmask & 0xff; -+ netmask_buf[1] = (netmask >> 8) & 0xff; -+ netmask_buf[2] = (netmask >> 16) & 0xff; -+ netmask_buf[3] = netmask >> 24; -+ (*proc)(addr_buf, netmask_buf, &lp->user); -+ } -+ return(NOTIFY_DONE); -+} -+ -+struct notifier_block uml_inetaddr_notifier = { -+ .notifier_call = uml_inetaddr_event, -+}; -+ -+static int uml_net_init(void) -+{ -+ struct list_head *ele; -+ struct uml_net_private *lp; -+ struct in_device *ip; -+ struct in_ifaddr *in; -+ -+ mconsole_register_dev(&net_mc); -+ register_inetaddr_notifier(¨_inetaddr_notifier); -+ -+ /* Devices may have been opened already, so the uml_inetaddr_notifier -+ * didn't get a chance to run for them. This fakes it so that -+ * addresses which have already been set up get handled properly. -+ */ -+ list_for_each(ele, &opened){ -+ lp = list_entry(ele, struct uml_net_private, list); -+ ip = lp->dev->ip_ptr; -+ if(ip == NULL) continue; -+ in = ip->ifa_list; -+ while(in != NULL){ -+ uml_inetaddr_event(NULL, NETDEV_UP, in); -+ in = in->ifa_next; -+ } -+ } -+ -+ return(0); -+} -+ -+__initcall(uml_net_init); -+ -+static void close_devices(void) -+{ -+ struct list_head *ele; -+ struct uml_net_private *lp; -+ -+ list_for_each(ele, &opened){ -+ lp = list_entry(ele, struct uml_net_private, list); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ if(lp->remove != NULL) (*lp->remove)(&lp->user); -+ } -+} -+ -+__uml_exitcall(close_devices); -+ -+int setup_etheraddr(char *str, unsigned char *addr) -+{ -+ char *end; -+ int i; -+ -+ if(str == NULL) -+ return(0); -+ for(i=0;i<6;i++){ -+ addr[i] = simple_strtoul(str, &end, 16); -+ if((end == str) || -+ ((*end != ':') && (*end != ',') && (*end != '\0'))){ -+ printk(KERN_ERR -+ "setup_etheraddr: failed to parse '%s' " -+ "as an ethernet address\n", str); -+ return(0); -+ } -+ str = end + 1; -+ } -+ if(addr[0] & 1){ -+ printk(KERN_ERR -+ "Attempt to assign a broadcast ethernet address to a " -+ "device disallowed\n"); -+ return(0); -+ } -+ return(1); -+} -+ -+void dev_ip_addr(void *d, char *buf, char *bin_buf) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ u32 addr; -+ -+ if((ip == NULL) || ((in = ip->ifa_list) == NULL)){ -+ printk(KERN_WARNING "dev_ip_addr - device not assigned an " -+ "IP address\n"); -+ return; -+ } -+ addr = in->ifa_address; -+ sprintf(buf, "%d.%d.%d.%d", addr & 0xff, (addr >> 8) & 0xff, -+ (addr >> 16) & 0xff, addr >> 24); -+ if(bin_buf){ -+ bin_buf[0] = addr & 0xff; -+ bin_buf[1] = (addr >> 8) & 0xff; -+ bin_buf[2] = (addr >> 16) & 0xff; -+ bin_buf[3] = addr >> 24; -+ } -+} -+ -+void set_ether_mac(void *d, unsigned char *addr) -+{ -+ struct net_device *dev = d; -+ -+ memcpy(dev->dev_addr, addr, ETH_ALEN); -+} -+ -+struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra) -+{ -+ if((skb != NULL) && (skb_tailroom(skb) < extra)){ -+ struct sk_buff *skb2; -+ -+ skb2 = skb_copy_expand(skb, 0, extra, GFP_ATOMIC); -+ dev_kfree_skb(skb); -+ skb = skb2; -+ } -+ if(skb != NULL) skb_put(skb, extra); -+ return(skb); -+} -+ -+void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, -+ void *), -+ void *arg) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ unsigned char address[4], netmask[4]; -+ -+ if(ip == NULL) return; -+ in = ip->ifa_list; -+ while(in != NULL){ -+ address[0] = in->ifa_address & 0xff; -+ address[1] = (in->ifa_address >> 8) & 0xff; -+ address[2] = (in->ifa_address >> 16) & 0xff; -+ address[3] = in->ifa_address >> 24; -+ netmask[0] = in->ifa_mask & 0xff; -+ netmask[1] = (in->ifa_mask >> 8) & 0xff; -+ netmask[2] = (in->ifa_mask >> 16) & 0xff; -+ netmask[3] = in->ifa_mask >> 24; -+ (*cb)(address, netmask, arg); -+ in = in->ifa_next; -+ } -+} -+ -+int dev_netmask(void *d, void *m) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ __u32 *mask_out = m; -+ -+ if(ip == NULL) -+ return(1); -+ -+ in = ip->ifa_list; -+ if(in == NULL) -+ return(1); -+ -+ *mask_out = in->ifa_mask; -+ return(0); -+} -+ -+void *get_output_buffer(int *len_out) -+{ -+ void *ret; -+ -+ ret = (void *) __get_free_pages(GFP_KERNEL, 0); -+ if(ret) *len_out = PAGE_SIZE; -+ else *len_out = 0; -+ return(ret); -+} -+ -+void free_output_buffer(void *buffer) -+{ -+ free_pages((unsigned long) buffer, 0); -+} -+ -+int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, -+ char **gate_addr) -+{ -+ char *remain; -+ -+ remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL); -+ if(remain != NULL){ -+ printk("tap_setup_common - Extra garbage on specification : " -+ "'%s'\n", remain); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+unsigned short eth_protocol(struct sk_buff *skb) -+{ -+ return(eth_type_trans(skb, skb->dev)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/net_user.c um/arch/um/drivers/net_user.c ---- orig/arch/um/drivers/net_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/net_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,252 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "net_user.h" -+#include "helper.h" -+#include "os.h" -+ -+int tap_open_common(void *dev, char *gate_addr) -+{ -+ int tap_addr[4]; -+ -+ if(gate_addr == NULL) return(0); -+ if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], -+ &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ -+ printk("Invalid tap IP address - '%s'\n", gate_addr); -+ return(-EINVAL); -+ } -+ return(0); -+} -+ -+void tap_check_ips(char *gate_addr, char *eth_addr) -+{ -+ int tap_addr[4]; -+ -+ if((gate_addr != NULL) && -+ (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], -+ &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) && -+ (eth_addr[0] == tap_addr[0]) && -+ (eth_addr[1] == tap_addr[1]) && -+ (eth_addr[2] == tap_addr[2]) && -+ (eth_addr[3] == tap_addr[3])){ -+ printk("The tap IP address and the UML eth IP address" -+ " must be different\n"); -+ } -+} -+ -+void read_output(int fd, char *output, int len) -+{ -+ int remain, n, actual; -+ char c; -+ -+ if(output == NULL){ -+ output = &c; -+ len = sizeof(c); -+ } -+ -+ *output = '\0'; -+ n = os_read_file(fd, &remain, sizeof(remain)); -+ if(n != sizeof(remain)){ -+ printk("read_output - read of length failed, err = %d\n", -n); -+ return; -+ } -+ -+ while(remain != 0){ -+ n = (remain < len) ? remain : len; -+ actual = os_read_file(fd, output, n); -+ if(actual != n){ -+ printk("read_output - read of data failed, " -+ "err = %d\n", -actual); -+ return; -+ } -+ remain -= actual; -+ } -+ return; -+} -+ -+int net_read(int fd, void *buf, int len) -+{ -+ int n; -+ -+ n = os_read_file(fd, buf, len); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-ENOTCONN); -+ return(n); -+} -+ -+int net_recvfrom(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = recvfrom(fd, buf, len, 0, NULL, NULL)) < 0) && -+ (errno == EINTR)) ; -+ -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_write(int fd, void *buf, int len) -+{ -+ int n; -+ -+ n = os_write_file(fd, buf, len); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-ENOTCONN); -+ return(n); -+} -+ -+int net_send(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = send(fd, buf, len, 0)) < 0) && (errno == EINTR)) ; -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_sendto(int fd, void *buf, int len, void *to, int sock_len) -+{ -+ int n; -+ -+ while(((n = sendto(fd, buf, len, 0, (struct sockaddr *) to, -+ sock_len)) < 0) && (errno == EINTR)) ; -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+struct change_pre_exec_data { -+ int close_me; -+ int stdout; -+}; -+ -+static void change_pre_exec(void *arg) -+{ -+ struct change_pre_exec_data *data = arg; -+ -+ os_close_file(data->close_me); -+ dup2(data->stdout, 1); -+} -+ -+static int change_tramp(char **argv, char *output, int output_len) -+{ -+ int pid, fds[2], err; -+ struct change_pre_exec_data pe_data; -+ -+ err = os_pipe(fds, 1, 0); -+ if(err < 0){ -+ printk("change_tramp - pipe failed, err = %d\n", -err); -+ return(err); -+ } -+ pe_data.close_me = fds[0]; -+ pe_data.stdout = fds[1]; -+ pid = run_helper(change_pre_exec, &pe_data, argv, NULL); -+ -+ os_close_file(fds[1]); -+ read_output(fds[0], output, output_len); -+ waitpid(pid, NULL, 0); -+ return(pid); -+} -+ -+static void change(char *dev, char *what, unsigned char *addr, -+ unsigned char *netmask) -+{ -+ char addr_buf[sizeof("255.255.255.255\0")]; -+ char netmask_buf[sizeof("255.255.255.255\0")]; -+ char version[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version, what, dev, addr_buf, -+ netmask_buf, NULL }; -+ char *output; -+ int output_len, pid; -+ -+ sprintf(version, "%d", UML_NET_VERSION); -+ sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); -+ sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], -+ netmask[2], netmask[3]); -+ -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ if(output == NULL) -+ printk("change : failed to allocate output buffer\n"); -+ -+ pid = change_tramp(argv, output, output_len); -+ if(pid < 0) return; -+ -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+} -+ -+void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) -+{ -+ change(arg, "add", addr, netmask); -+} -+ -+void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) -+{ -+ change(arg, "del", addr, netmask); -+} -+ -+char *split_if_spec(char *str, ...) -+{ -+ char **arg, *end; -+ va_list ap; -+ -+ va_start(ap, str); -+ while((arg = va_arg(ap, char **)) != NULL){ -+ if(*str == '\0') -+ return(NULL); -+ end = strchr(str, ','); -+ if(end != str) -+ *arg = str; -+ if(end == NULL) -+ return(NULL); -+ *end++ = '\0'; -+ str = end; -+ } -+ va_end(ap); -+ return(str); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/null.c um/arch/um/drivers/null.c ---- orig/arch/um/drivers/null.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/null.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include "chan_user.h" -+#include "os.h" -+ -+static int null_chan; -+ -+void *null_init(char *str, int device, struct chan_opts *opts) -+{ -+ return(&null_chan); -+} -+ -+int null_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ *dev_out = NULL; -+ return(os_open_file(DEV_NULL, of_rdwr(OPENFLAGS()), 0)); -+} -+ -+int null_read(int fd, char *c_out, void *unused) -+{ -+ return(-ENODEV); -+} -+ -+void null_free(void *data) -+{ -+} -+ -+struct chan_ops null_ops = { -+ .type = "null", -+ .init = null_init, -+ .open = null_open, -+ .close = generic_close, -+ .read = null_read, -+ .write = generic_write, -+ .console_write = generic_console_write, -+ .window_size = generic_window_size, -+ .free = null_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pcap_kern.c um/arch/um/drivers/pcap_kern.c ---- orig/arch/um/drivers/pcap_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/pcap_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,127 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "pcap_user.h" -+ -+struct pcap_init { -+ char *host_if; -+ int promisc; -+ int optimize; -+ char *filter; -+}; -+ -+void pcap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct pcap_data *ppri; -+ struct pcap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ ppri = (struct pcap_data *) pri->user; -+ *ppri = ((struct pcap_data) -+ { .host_if = init->host_if, -+ .promisc = init->promisc, -+ .optimize = init->optimize, -+ .filter = init->filter, -+ .compiled = NULL, -+ .pcap = NULL }); -+} -+ -+static int pcap_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(pcap_user_read(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER, -+ (struct pcap_data *) &lp->user)); -+} -+ -+static int pcap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ return(-EPERM); -+} -+ -+static struct net_kern_info pcap_kern_info = { -+ .init = pcap_init, -+ .protocol = eth_protocol, -+ .read = pcap_read, -+ .write = pcap_write, -+}; -+ -+int pcap_setup(char *str, char **mac_out, void *data) -+{ -+ struct pcap_init *init = data; -+ char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; -+ int i; -+ -+ *init = ((struct pcap_init) -+ { .host_if = "eth0", -+ .promisc = 1, -+ .optimize = 0, -+ .filter = NULL }); -+ -+ remain = split_if_spec(str, &host_if, &init->filter, -+ &options[0], &options[1], NULL); -+ if(remain != NULL){ -+ printk(KERN_ERR "pcap_setup - Extra garbage on " -+ "specification : '%s'\n", remain); -+ return(0); -+ } -+ -+ if(host_if != NULL) -+ init->host_if = host_if; -+ -+ for(i = 0; i < sizeof(options)/sizeof(options[0]); i++){ -+ if(options[i] == NULL) -+ continue; -+ if(!strcmp(options[i], "promisc")) -+ init->promisc = 1; -+ else if(!strcmp(options[i], "nopromisc")) -+ init->promisc = 0; -+ else if(!strcmp(options[i], "optimize")) -+ init->optimize = 1; -+ else if(!strcmp(options[i], "nooptimize")) -+ init->optimize = 0; -+ else printk("pcap_setup : bad option - '%s'\n", options[i]); -+ } -+ -+ return(1); -+} -+ -+static struct transport pcap_transport = { -+ .list = LIST_HEAD_INIT(pcap_transport.list), -+ .name = "pcap", -+ .setup = pcap_setup, -+ .user = &pcap_user_info, -+ .kern = &pcap_kern_info, -+ .private_size = sizeof(struct pcap_data), -+ .setup_size = sizeof(struct pcap_init), -+}; -+ -+static int register_pcap(void) -+{ -+ register_transport(&pcap_transport); -+ return(1); -+} -+ -+__initcall(register_pcap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pcap_user.c um/arch/um/drivers/pcap_user.c ---- orig/arch/um/drivers/pcap_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/pcap_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike -+ * Licensed under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "net_user.h" -+#include "pcap_user.h" -+#include "user.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+#define PCAP_FD(p) (*(int *)(p)) -+ -+static void pcap_user_init(void *data, void *dev) -+{ -+ struct pcap_data *pri = data; -+ pcap_t *p; -+ char errors[PCAP_ERRBUF_SIZE]; -+ -+ p = pcap_open_live(pri->host_if, MAX_PACKET, pri->promisc, 0, errors); -+ if(p == NULL){ -+ printk("pcap_user_init : pcap_open_live failed - '%s'\n", -+ errors); -+ return; -+ } -+ -+ pri->dev = dev; -+ pri->pcap = p; -+} -+ -+static int pcap_open(void *data) -+{ -+ struct pcap_data *pri = data; -+ __u32 netmask; -+ int err; -+ -+ if(pri->pcap == NULL) -+ return(-ENODEV); -+ -+ if(pri->filter != NULL){ -+ err = dev_netmask(pri->dev, &netmask); -+ if(err < 0){ -+ printk("pcap_open : dev_netmask failed\n"); -+ return(-EIO); -+ } -+ -+ pri->compiled = um_kmalloc(sizeof(struct bpf_program)); -+ if(pri->compiled == NULL){ -+ printk("pcap_open : kmalloc failed\n"); -+ return(-ENOMEM); -+ } -+ -+ err = pcap_compile(pri->pcap, -+ (struct bpf_program *) pri->compiled, -+ pri->filter, pri->optimize, netmask); -+ if(err < 0){ -+ printk("pcap_open : pcap_compile failed - '%s'\n", -+ pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ -+ err = pcap_setfilter(pri->pcap, pri->compiled); -+ if(err < 0){ -+ printk("pcap_open : pcap_setfilter failed - '%s'\n", -+ pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ } -+ -+ return(PCAP_FD(pri->pcap)); -+} -+ -+static void pcap_remove(void *data) -+{ -+ struct pcap_data *pri = data; -+ -+ if(pri->compiled != NULL) -+ pcap_freecode(pri->compiled); -+ -+ pcap_close(pri->pcap); -+} -+ -+struct pcap_handler_data { -+ char *buffer; -+ int len; -+}; -+ -+static void handler(u_char *data, const struct pcap_pkthdr *header, -+ const u_char *packet) -+{ -+ int len; -+ -+ struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; -+ -+ len = hdata->len < header->caplen ? hdata->len : header->caplen; -+ memcpy(hdata->buffer, packet, len); -+ hdata->len = len; -+} -+ -+int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) -+{ -+ struct pcap_handler_data hdata = ((struct pcap_handler_data) -+ { .buffer = buffer, -+ .len = len }); -+ int n; -+ -+ n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); -+ if(n < 0){ -+ printk("pcap_dispatch failed - %s\n", pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ else if(n == 0) -+ return(0); -+ return(hdata.len); -+} -+ -+struct net_user_info pcap_user_info = { -+ .init = pcap_user_init, -+ .open = pcap_open, -+ .close = NULL, -+ .remove = pcap_remove, -+ .set_mtu = NULL, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pcap_user.h um/arch/um/drivers/pcap_user.h ---- orig/arch/um/drivers/pcap_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/pcap_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct pcap_data { -+ char *host_if; -+ int promisc; -+ int optimize; -+ char *filter; -+ void *compiled; -+ void *pcap; -+ void *dev; -+}; -+ -+extern struct net_user_info pcap_user_info; -+ -+extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/port.h um/arch/um/drivers/port.h ---- orig/arch/um/drivers/port.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/port.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PORT_H__ -+#define __PORT_H__ -+ -+extern void *port_data(int port); -+extern int port_wait(void *data); -+extern void port_kern_close(void *d); -+extern int port_connection(int fd, int *socket_out, int *pid_out); -+extern int port_listen_fd(int port); -+extern void port_read(int fd, void *data); -+extern void port_kern_free(void *d); -+extern int port_rcv_fd(int fd); -+extern void port_remove_dev(void *d); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/port_kern.c um/arch/um/drivers/port_kern.c ---- orig/arch/um/drivers/port_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/port_kern.c 2004-01-10 06:18:49.000000000 -0500 -@@ -0,0 +1,303 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/list.h" -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "linux/irq.h" -+#include "linux/spinlock.h" -+#include "linux/errno.h" -+#include "asm/semaphore.h" -+#include "asm/errno.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+#include "port.h" -+#include "init.h" -+#include "os.h" -+ -+struct port_list { -+ struct list_head list; -+ int has_connection; -+ struct semaphore sem; -+ int port; -+ int fd; -+ spinlock_t lock; -+ struct list_head pending; -+ struct list_head connections; -+}; -+ -+struct port_dev { -+ struct port_list *port; -+ int helper_pid; -+ int telnetd_pid; -+}; -+ -+struct connection { -+ struct list_head list; -+ int fd; -+ int helper_pid; -+ int socket[2]; -+ int telnetd_pid; -+ struct port_list *port; -+}; -+ -+static void pipe_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct connection *conn = data; -+ int fd; -+ -+ fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); -+ if(fd < 0){ -+ if(fd == -EAGAIN) -+ return; -+ -+ printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", -+ -fd); -+ os_close_file(conn->fd); -+ } -+ -+ list_del(&conn->list); -+ -+ conn->fd = fd; -+ list_add(&conn->list, &conn->port->connections); -+ -+ up(&conn->port->sem); -+} -+ -+static int port_accept(struct port_list *port) -+{ -+ struct connection *conn; -+ int fd, socket[2], pid, ret = 0; -+ -+ fd = port_connection(port->fd, socket, &pid); -+ if(fd < 0){ -+ if(fd != -EAGAIN) -+ printk(KERN_ERR "port_accept : port_connection " -+ "returned %d\n", -fd); -+ goto out; -+ } -+ -+ conn = kmalloc(sizeof(*conn), GFP_ATOMIC); -+ if(conn == NULL){ -+ printk(KERN_ERR "port_accept : failed to allocate " -+ "connection\n"); -+ goto out_close; -+ } -+ *conn = ((struct connection) -+ { .list = LIST_HEAD_INIT(conn->list), -+ .fd = fd, -+ .socket = { socket[0], socket[1] }, -+ .telnetd_pid = pid, -+ .port = port }); -+ -+ if(um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "telnetd", conn)){ -+ printk(KERN_ERR "port_accept : failed to get IRQ for " -+ "telnetd\n"); -+ goto out_free; -+ } -+ -+ list_add(&conn->list, &port->pending); -+ return(1); -+ -+ out_free: -+ kfree(conn); -+ out_close: -+ os_close_file(fd); -+ if(pid != -1) -+ os_kill_process(pid, 1); -+ out: -+ return(ret); -+} -+ -+DECLARE_MUTEX(ports_sem); -+struct list_head ports = LIST_HEAD_INIT(ports); -+ -+void port_task_proc(void *unused) -+{ -+ struct port_list *port; -+ struct list_head *ele; -+ unsigned long flags; -+ -+ save_flags(flags); -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ if(!port->has_connection) -+ continue; -+ reactivate_fd(port->fd, ACCEPT_IRQ); -+ while(port_accept(port)) ; -+ port->has_connection = 0; -+ } -+ restore_flags(flags); -+} -+ -+struct tq_struct port_task = { -+ .routine = port_task_proc, -+ .data = NULL -+}; -+ -+static void port_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct port_list *port = data; -+ -+ port->has_connection = 1; -+ schedule_task(&port_task); -+} -+ -+void *port_data(int port_num) -+{ -+ struct list_head *ele; -+ struct port_list *port; -+ struct port_dev *dev = NULL; -+ int fd; -+ -+ down(&ports_sem); -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ if(port->port == port_num) goto found; -+ } -+ port = kmalloc(sizeof(struct port_list), GFP_KERNEL); -+ if(port == NULL){ -+ printk(KERN_ERR "Allocation of port list failed\n"); -+ goto out; -+ } -+ -+ fd = port_listen_fd(port_num); -+ if(fd < 0){ -+ printk(KERN_ERR "binding to port %d failed, errno = %d\n", -+ port_num, -fd); -+ goto out_free; -+ } -+ if(um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, "port", -+ port)){ -+ printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); -+ goto out_close; -+ } -+ -+ *port = ((struct port_list) -+ { .list = LIST_HEAD_INIT(port->list), -+ .has_connection = 0, -+ .sem = __SEMAPHORE_INITIALIZER(port->sem, -+ 0), -+ .lock = SPIN_LOCK_UNLOCKED, -+ .port = port_num, -+ .fd = fd, -+ .pending = LIST_HEAD_INIT(port->pending), -+ .connections = LIST_HEAD_INIT(port->connections) }); -+ list_add(&port->list, &ports); -+ -+ found: -+ dev = kmalloc(sizeof(struct port_dev), GFP_KERNEL); -+ if(dev == NULL){ -+ printk(KERN_ERR "Allocation of port device entry failed\n"); -+ goto out; -+ } -+ -+ *dev = ((struct port_dev) { .port = port, -+ .helper_pid = -1, -+ .telnetd_pid = -1 }); -+ goto out; -+ -+ out_free: -+ kfree(port); -+ out_close: -+ os_close_file(fd); -+ out: -+ up(&ports_sem); -+ return(dev); -+} -+ -+int port_wait(void *data) -+{ -+ struct port_dev *dev = data; -+ struct connection *conn; -+ struct port_list *port = dev->port; -+ int fd; -+ -+ while(1){ -+ if(down_interruptible(&port->sem)) -+ return(-ERESTARTSYS); -+ -+ spin_lock(&port->lock); -+ -+ conn = list_entry(port->connections.next, struct connection, -+ list); -+ list_del(&conn->list); -+ spin_unlock(&port->lock); -+ -+ os_shutdown_socket(conn->socket[0], 1, 1); -+ os_close_file(conn->socket[0]); -+ os_shutdown_socket(conn->socket[1], 1, 1); -+ os_close_file(conn->socket[1]); -+ -+ /* This is done here because freeing an IRQ can't be done -+ * within the IRQ handler. So, pipe_interrupt always ups -+ * the semaphore regardless of whether it got a successful -+ * connection. Then we loop here throwing out failed -+ * connections until a good one is found. -+ */ -+ free_irq(TELNETD_IRQ, conn); -+ -+ if(conn->fd >= 0) break; -+ os_close_file(conn->fd); -+ kfree(conn); -+ } -+ -+ fd = conn->fd; -+ dev->helper_pid = conn->helper_pid; -+ dev->telnetd_pid = conn->telnetd_pid; -+ kfree(conn); -+ -+ return(fd); -+} -+ -+void port_remove_dev(void *d) -+{ -+ struct port_dev *dev = d; -+ -+ if(dev->helper_pid != -1) -+ os_kill_process(dev->helper_pid, 0); -+ if(dev->telnetd_pid != -1) -+ os_kill_process(dev->telnetd_pid, 1); -+ dev->helper_pid = -1; -+ dev->telnetd_pid = -1; -+} -+ -+void port_kern_free(void *d) -+{ -+ struct port_dev *dev = d; -+ -+ port_remove_dev(dev); -+ kfree(dev); -+} -+ -+static void free_port(void) -+{ -+ struct list_head *ele; -+ struct port_list *port; -+ -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ free_irq_by_fd(port->fd); -+ os_close_file(port->fd); -+ } -+} -+ -+__uml_exitcall(free_port); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/port_user.c um/arch/um/drivers/port_user.c ---- orig/arch/um/drivers/port_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/port_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "chan_user.h" -+#include "port.h" -+#include "helper.h" -+#include "os.h" -+ -+struct port_chan { -+ int raw; -+ struct termios tt; -+ void *kernel_data; -+ char dev[sizeof("32768\0")]; -+}; -+ -+void *port_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct port_chan *data; -+ void *kern_data; -+ char *end; -+ int port; -+ -+ if(*str != ':'){ -+ printk("port_init : channel type 'port' must specify a " -+ "port number\n"); -+ return(NULL); -+ } -+ str++; -+ port = strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk("port_init : couldn't parse port '%s'\n", str); -+ return(NULL); -+ } -+ -+ kern_data = port_data(port); -+ if(kern_data == NULL) -+ return(NULL); -+ -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) -+ goto err; -+ -+ *data = ((struct port_chan) { .raw = opts->raw, -+ .kernel_data = kern_data }); -+ sprintf(data->dev, "%d", port); -+ -+ return(data); -+ err: -+ port_kern_free(kern_data); -+ return(NULL); -+} -+ -+void port_free(void *d) -+{ -+ struct port_chan *data = d; -+ -+ port_kern_free(data->kernel_data); -+ kfree(data); -+} -+ -+int port_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct port_chan *data = d; -+ int fd; -+ -+ fd = port_wait(data->kernel_data); -+ if((fd >= 0) && data->raw){ -+ tcgetattr(fd, &data->tt); -+ raw(fd, 0); -+ } -+ *dev_out = data->dev; -+ return(fd); -+} -+ -+void port_close(int fd, void *d) -+{ -+ struct port_chan *data = d; -+ -+ port_remove_dev(data->kernel_data); -+ os_close_file(fd); -+} -+ -+int port_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct port_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops port_ops = { -+ .type = "port", -+ .init = port_init, -+ .open = port_open, -+ .close = port_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = port_console_write, -+ .window_size = generic_window_size, -+ .free = port_free, -+ .winch = 1, -+}; -+ -+int port_listen_fd(int port) -+{ -+ struct sockaddr_in addr; -+ int fd, err; -+ -+ fd = socket(PF_INET, SOCK_STREAM, 0); -+ if(fd == -1) -+ return(-errno); -+ -+ addr.sin_family = AF_INET; -+ addr.sin_port = htons(port); -+ addr.sin_addr.s_addr = htonl(INADDR_ANY); -+ if(bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0){ -+ err = -errno; -+ goto out; -+ } -+ -+ if(listen(fd, 1) < 0){ -+ err = -errno; -+ goto out; -+ } -+ -+ err = os_set_fd_block(fd, 0); -+ if(err < 0) -+ goto out; -+ -+ return(fd); -+ out: -+ os_close_file(fd); -+ return(err); -+} -+ -+struct port_pre_exec_data { -+ int sock_fd; -+ int pipe_fd; -+}; -+ -+void port_pre_exec(void *arg) -+{ -+ struct port_pre_exec_data *data = arg; -+ -+ dup2(data->sock_fd, 0); -+ dup2(data->sock_fd, 1); -+ dup2(data->sock_fd, 2); -+ os_close_file(data->sock_fd); -+ dup2(data->pipe_fd, 3); -+ os_shutdown_socket(3, 1, 0); -+ os_close_file(data->pipe_fd); -+} -+ -+int port_connection(int fd, int *socket, int *pid_out) -+{ -+ int new, err; -+ char *argv[] = { "/usr/sbin/in.telnetd", "-L", -+ "/usr/lib/uml/port-helper", NULL }; -+ struct port_pre_exec_data data; -+ -+ new = os_accept_connection(fd); -+ if(new < 0) -+ return(new); -+ -+ err = os_pipe(socket, 0, 0); -+ if(err < 0) -+ goto out_close; -+ -+ data = ((struct port_pre_exec_data) -+ { .sock_fd = new, -+ .pipe_fd = socket[1] }); -+ -+ err = run_helper(port_pre_exec, &data, argv, NULL); -+ if(err < 0) -+ goto out_shutdown; -+ -+ *pid_out = err; -+ return(new); -+ -+ out_shutdown: -+ os_shutdown_socket(socket[0], 1, 1); -+ os_close_file(socket[0]); -+ os_shutdown_socket(socket[1], 1, 1); -+ os_close_file(socket[1]); -+ out_close: -+ os_close_file(new); -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pty.c um/arch/um/drivers/pty.c ---- orig/arch/um/drivers/pty.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/pty.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,148 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "chan_user.h" -+#include "user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "os.h" -+ -+struct pty_chan { -+ void (*announce)(char *dev_name, int dev); -+ int dev; -+ int raw; -+ struct termios tt; -+ char dev_name[sizeof("/dev/pts/0123456\0")]; -+}; -+ -+void *pty_chan_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct pty_chan *data; -+ -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) return(NULL); -+ *data = ((struct pty_chan) { .announce = opts->announce, -+ .dev = device, -+ .raw = opts->raw }); -+ return(data); -+} -+ -+int pts_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct pty_chan *data = d; -+ char *dev; -+ int fd; -+ -+ fd = get_pty(); -+ if(fd < 0){ -+ printk("open_pts : Failed to open pts\n"); -+ return(-errno); -+ } -+ if(data->raw){ -+ tcgetattr(fd, &data->tt); -+ raw(fd, 0); -+ } -+ -+ dev = ptsname(fd); -+ sprintf(data->dev_name, "%s", dev); -+ *dev_out = data->dev_name; -+ if(data->announce) (*data->announce)(dev, data->dev); -+ return(fd); -+} -+ -+int getmaster(char *line) -+{ -+ char *pty, *bank, *cp; -+ int master, err; -+ -+ pty = &line[strlen("/dev/ptyp")]; -+ for (bank = "pqrs"; *bank; bank++) { -+ line[strlen("/dev/pty")] = *bank; -+ *pty = '0'; -+ if (os_stat_file(line, NULL) < 0) -+ break; -+ for (cp = "0123456789abcdef"; *cp; cp++) { -+ *pty = *cp; -+ master = os_open_file(line, of_rdwr(OPENFLAGS()), 0); -+ if (master >= 0) { -+ char *tp = &line[strlen("/dev/")]; -+ -+ /* verify slave side is usable */ -+ *tp = 't'; -+ err = os_access(line, OS_ACC_RW_OK); -+ *tp = 'p'; -+ if(err == 0) return(master); -+ (void) os_close_file(master); -+ } -+ } -+ } -+ return(-1); -+} -+ -+int pty_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct pty_chan *data = d; -+ int fd; -+ char dev[sizeof("/dev/ptyxx\0")] = "/dev/ptyxx"; -+ -+ fd = getmaster(dev); -+ if(fd < 0) return(-errno); -+ -+ if(data->raw) raw(fd, 0); -+ if(data->announce) (*data->announce)(dev, data->dev); -+ -+ sprintf(data->dev_name, "%s", dev); -+ *dev_out = data->dev_name; -+ return(fd); -+} -+ -+int pty_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct pty_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops pty_ops = { -+ .type = "pty", -+ .init = pty_chan_init, -+ .open = pty_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = pty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+struct chan_ops pts_ops = { -+ .type = "pts", -+ .init = pty_chan_init, -+ .open = pts_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = pty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip.h um/arch/um/drivers/slip.h ---- orig/arch/um/drivers/slip.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/slip.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,39 @@ -+#ifndef __UM_SLIP_H -+#define __UM_SLIP_H -+ -+#define BUF_SIZE 1500 -+ /* two bytes each for a (pathological) max packet of escaped chars + * -+ * terminating END char + initial END char */ -+#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) -+ -+struct slip_data { -+ void *dev; -+ char name[sizeof("slnnnnn\0")]; -+ char *addr; -+ char *gate_addr; -+ int slave; -+ char ibuf[ENC_BUF_SIZE]; -+ char obuf[ENC_BUF_SIZE]; -+ int more; /* more data: do not read fd until ibuf has been drained */ -+ int pos; -+ int esc; -+}; -+ -+extern struct net_user_info slip_user_info; -+ -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); -+extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip_kern.c um/arch/um/drivers/slip_kern.c ---- orig/arch/um/drivers/slip_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/slip_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,109 @@ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/stddef.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/if_arp.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "kern.h" -+#include "slip.h" -+ -+struct slip_init { -+ char *gate_addr; -+}; -+ -+void slip_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *private; -+ struct slip_data *spri; -+ struct slip_init *init = data; -+ -+ private = dev->priv; -+ spri = (struct slip_data *) private->user; -+ *spri = ((struct slip_data) -+ { .name = { '\0' }, -+ .addr = NULL, -+ .gate_addr = init->gate_addr, -+ .slave = -1, -+ .ibuf = { '\0' }, -+ .obuf = { '\0' }, -+ .pos = 0, -+ .esc = 0, -+ .dev = dev }); -+ -+ dev->init = NULL; -+ dev->hard_header_len = 0; -+ dev->addr_len = 4; -+ dev->type = ARPHRD_ETHER; -+ dev->tx_queue_len = 256; -+ dev->flags = IFF_NOARP; -+ printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); -+} -+ -+static unsigned short slip_protocol(struct sk_buff *skbuff) -+{ -+ return(htons(ETH_P_IP)); -+} -+ -+static int slip_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slip_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, -+ (struct slip_data *) &lp->user)); -+} -+ -+static int slip_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slip_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct slip_data *) &lp->user)); -+} -+ -+struct net_kern_info slip_kern_info = { -+ .init = slip_init, -+ .protocol = slip_protocol, -+ .read = slip_read, -+ .write = slip_write, -+}; -+ -+static int slip_setup(char *str, char **mac_out, void *data) -+{ -+ struct slip_init *init = data; -+ -+ *init = ((struct slip_init) -+ { .gate_addr = NULL }); -+ -+ if(str[0] != '\0') -+ init->gate_addr = str; -+ return(1); -+} -+ -+static struct transport slip_transport = { -+ .list = LIST_HEAD_INIT(slip_transport.list), -+ .name = "slip", -+ .setup = slip_setup, -+ .user = &slip_user_info, -+ .kern = &slip_kern_info, -+ .private_size = sizeof(struct slip_data), -+ .setup_size = sizeof(struct slip_init), -+}; -+ -+static int register_slip(void) -+{ -+ register_transport(&slip_transport); -+ return(1); -+} -+ -+__initcall(register_slip); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip_proto.h um/arch/um/drivers/slip_proto.h ---- orig/arch/um/drivers/slip_proto.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/slip_proto.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,93 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SLIP_PROTO_H__ -+#define __UM_SLIP_PROTO_H__ -+ -+/* SLIP protocol characters. */ -+#define SLIP_END 0300 /* indicates end of frame */ -+#define SLIP_ESC 0333 /* indicates byte stuffing */ -+#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ -+#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ -+ -+static inline int slip_unesc(unsigned char c,char *buf,int *pos, int *esc) -+{ -+ int ret; -+ -+ switch(c){ -+ case SLIP_END: -+ *esc = 0; -+ ret=*pos; -+ *pos=0; -+ return(ret); -+ case SLIP_ESC: -+ *esc = 1; -+ return(0); -+ case SLIP_ESC_ESC: -+ if(*esc){ -+ *esc = 0; -+ c = SLIP_ESC; -+ } -+ break; -+ case SLIP_ESC_END: -+ if(*esc){ -+ *esc = 0; -+ c = SLIP_END; -+ } -+ break; -+ } -+ buf[(*pos)++] = c; -+ return(0); -+} -+ -+static inline int slip_esc(unsigned char *s, unsigned char *d, int len) -+{ -+ unsigned char *ptr = d; -+ unsigned char c; -+ -+ /* -+ * Send an initial END character to flush out any -+ * data that may have accumulated in the receiver -+ * due to line noise. -+ */ -+ -+ *ptr++ = SLIP_END; -+ -+ /* -+ * For each byte in the packet, send the appropriate -+ * character sequence, according to the SLIP protocol. -+ */ -+ -+ while (len-- > 0) { -+ switch(c = *s++) { -+ case SLIP_END: -+ *ptr++ = SLIP_ESC; -+ *ptr++ = SLIP_ESC_END; -+ break; -+ case SLIP_ESC: -+ *ptr++ = SLIP_ESC; -+ *ptr++ = SLIP_ESC_ESC; -+ break; -+ default: -+ *ptr++ = c; -+ break; -+ } -+ } -+ *ptr++ = SLIP_END; -+ return (ptr - d); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip_user.c um/arch/um/drivers/slip_user.c ---- orig/arch/um/drivers/slip_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/slip_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,274 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "net_user.h" -+#include "slip.h" -+#include "slip_proto.h" -+#include "helper.h" -+#include "os.h" -+ -+void slip_user_init(void *data, void *dev) -+{ -+ struct slip_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+static int set_up_tty(int fd) -+{ -+ int i; -+ struct termios tios; -+ -+ if (tcgetattr(fd, &tios) < 0) { -+ printk("could not get initial terminal attributes\n"); -+ return(-1); -+ } -+ -+ tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; -+ tios.c_iflag = IGNBRK | IGNPAR; -+ tios.c_oflag = 0; -+ tios.c_lflag = 0; -+ for (i = 0; i < NCCS; i++) -+ tios.c_cc[i] = 0; -+ tios.c_cc[VMIN] = 1; -+ tios.c_cc[VTIME] = 0; -+ -+ cfsetospeed(&tios, B38400); -+ cfsetispeed(&tios, B38400); -+ -+ if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { -+ printk("failed to set terminal attributes\n"); -+ return(-1); -+ } -+ return(0); -+} -+ -+struct slip_pre_exec_data { -+ int stdin; -+ int stdout; -+ int close_me; -+}; -+ -+static void slip_pre_exec(void *arg) -+{ -+ struct slip_pre_exec_data *data = arg; -+ -+ if(data->stdin >= 0) dup2(data->stdin, 0); -+ dup2(data->stdout, 1); -+ if(data->close_me >= 0) os_close_file(data->close_me); -+} -+ -+static int slip_tramp(char **argv, int fd) -+{ -+ struct slip_pre_exec_data pe_data; -+ char *output; -+ int status, pid, fds[2], err, output_len; -+ -+ err = os_pipe(fds, 1, 0); -+ if(err < 0){ -+ printk("slip_tramp : pipe failed, err = %d\n", -err); -+ return(err); -+ } -+ -+ err = 0; -+ pe_data.stdin = fd; -+ pe_data.stdout = fds[1]; -+ pe_data.close_me = fds[0]; -+ pid = run_helper(slip_pre_exec, &pe_data, argv, NULL); -+ -+ if(pid < 0) err = pid; -+ else { -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ if(output == NULL) -+ printk("slip_tramp : failed to allocate output " -+ "buffer\n"); -+ -+ os_close_file(fds[1]); -+ read_output(fds[0], output, output_len); -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+ if(waitpid(pid, &status, 0) < 0) err = errno; -+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ -+ printk("'%s' didn't exit with status 0\n", argv[0]); -+ err = -EINVAL; -+ } -+ } -+ return(err); -+} -+ -+static int slip_open(void *data) -+{ -+ struct slip_data *pri = data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, -+ NULL }; -+ int sfd, mfd, err; -+ -+ mfd = get_pty(); -+ if(mfd < 0){ -+ printk("umn : Failed to open pty, err = %d\n", -mfd); -+ return(mfd); -+ } -+ sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); -+ if(sfd < 0){ -+ printk("Couldn't open tty for slip line, err = %d\n", -sfd); -+ return(sfd); -+ } -+ if(set_up_tty(sfd)) return(-1); -+ pri->slave = sfd; -+ pri->pos = 0; -+ pri->esc = 0; -+ if(pri->gate_addr != NULL){ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ strcpy(gate_buf, pri->gate_addr); -+ -+ err = slip_tramp(argv, sfd); -+ -+ if(err < 0){ -+ printk("slip_tramp failed - err = %d\n", -err); -+ return(err); -+ } -+ err = os_get_ifname(pri->slave, pri->name); -+ if(err < 0){ -+ printk("get_ifname failed, err = %d\n", -err); -+ return(err); -+ } -+ iter_addresses(pri->dev, open_addr, pri->name); -+ } -+ else { -+ err = os_set_slip(sfd); -+ if(err < 0){ -+ printk("Failed to set slip discipline encapsulation - " -+ "err = %d\n", -err); -+ return(err); -+ } -+ } -+ return(mfd); -+} -+ -+static void slip_close(int fd, void *data) -+{ -+ struct slip_data *pri = data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, -+ NULL }; -+ int err; -+ -+ if(pri->gate_addr != NULL) -+ iter_addresses(pri->dev, close_addr, pri->name); -+ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ -+ err = slip_tramp(argv, -1); -+ -+ if(err != 0) -+ printk("slip_tramp failed - errno = %d\n", -err); -+ os_close_file(fd); -+ os_close_file(pri->slave); -+ pri->slave = -1; -+} -+ -+int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) -+{ -+ int i, n, size, start; -+ -+ if(pri->more>0) { -+ i = 0; -+ while(i < pri->more) { -+ size = slip_unesc(pri->ibuf[i++], -+ pri->ibuf, &pri->pos, &pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); -+ pri->more=pri->more-i; -+ return(size); -+ } -+ } -+ pri->more=0; -+ } -+ -+ n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); -+ if(n <= 0) return(n); -+ -+ start = pri->pos; -+ for(i = 0; i < n; i++){ -+ size = slip_unesc(pri->ibuf[start + i], -+ pri->ibuf, &pri->pos, &pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); -+ pri->more=n-(i+1); -+ return(size); -+ } -+ } -+ return(0); -+} -+ -+int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) -+{ -+ int actual, n; -+ -+ actual = slip_esc(buf, pri->obuf, len); -+ n = net_write(fd, pri->obuf, actual); -+ if(n < 0) return(n); -+ else return(len); -+} -+ -+static int slip_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+static void slip_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct slip_data *pri = data; -+ -+ if(pri->slave < 0) return; -+ open_addr(addr, netmask, pri->name); -+} -+ -+static void slip_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct slip_data *pri = data; -+ -+ if(pri->slave < 0) return; -+ close_addr(addr, netmask, pri->name); -+} -+ -+struct net_user_info slip_user_info = { -+ .init = slip_user_init, -+ .open = slip_open, -+ .close = slip_close, -+ .remove = NULL, -+ .set_mtu = slip_set_mtu, -+ .add_address = slip_add_addr, -+ .delete_address = slip_del_addr, -+ .max_packet = BUF_SIZE -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slirp.h um/arch/um/drivers/slirp.h ---- orig/arch/um/drivers/slirp.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/slirp.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,51 @@ -+#ifndef __UM_SLIRP_H -+#define __UM_SLIRP_H -+ -+#define BUF_SIZE 1500 -+ /* two bytes each for a (pathological) max packet of escaped chars + * -+ * terminating END char + initial END char */ -+#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) -+ -+#define SLIRP_MAX_ARGS 100 -+/* -+ * XXX this next definition is here because I don't understand why this -+ * initializer doesn't work in slirp_kern.c: -+ * -+ * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] }, -+ * -+ * or why I can't typecast like this: -+ * -+ * argv : (char* [SLIRP_MAX_ARGS])(init->argv), -+ */ -+struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; }; -+ -+struct slirp_data { -+ void *dev; -+ struct arg_list_dummy_wrapper argw; -+ int pid; -+ int slave; -+ char ibuf[ENC_BUF_SIZE]; -+ char obuf[ENC_BUF_SIZE]; -+ int more; /* more data: do not read fd until ibuf has been drained */ -+ int pos; -+ int esc; -+}; -+ -+extern struct net_user_info slirp_user_info; -+ -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); -+extern int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slirp_kern.c um/arch/um/drivers/slirp_kern.c ---- orig/arch/um/drivers/slirp_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/slirp_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,132 @@ -+#include "linux/kernel.h" -+#include "linux/stddef.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/if_arp.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "kern.h" -+#include "slirp.h" -+ -+struct slirp_init { -+ struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ -+}; -+ -+void slirp_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *private; -+ struct slirp_data *spri; -+ struct slirp_init *init = data; -+ int i; -+ -+ private = dev->priv; -+ spri = (struct slirp_data *) private->user; -+ *spri = ((struct slirp_data) -+ { .argw = init->argw, -+ .pid = -1, -+ .slave = -1, -+ .ibuf = { '\0' }, -+ .obuf = { '\0' }, -+ .pos = 0, -+ .esc = 0, -+ .dev = dev }); -+ -+ dev->init = NULL; -+ dev->hard_header_len = 0; -+ dev->addr_len = 4; -+ dev->type = ARPHRD_ETHER; -+ dev->tx_queue_len = 256; -+ dev->flags = IFF_NOARP; -+ printk("SLIRP backend - command line:"); -+ for(i=0;spri->argw.argv[i]!=NULL;i++) { -+ printk(" '%s'",spri->argw.argv[i]); -+ } -+ printk("\n"); -+} -+ -+static unsigned short slirp_protocol(struct sk_buff *skbuff) -+{ -+ return(htons(ETH_P_IP)); -+} -+ -+static int slirp_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slirp_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, -+ (struct slirp_data *) &lp->user)); -+} -+ -+static int slirp_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slirp_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct slirp_data *) &lp->user)); -+} -+ -+struct net_kern_info slirp_kern_info = { -+ .init = slirp_init, -+ .protocol = slirp_protocol, -+ .read = slirp_read, -+ .write = slirp_write, -+}; -+ -+static int slirp_setup(char *str, char **mac_out, void *data) -+{ -+ struct slirp_init *init = data; -+ int i=0; -+ -+ *init = ((struct slirp_init) -+ { argw : { { "slirp", NULL } } }); -+ -+ str = split_if_spec(str, mac_out, NULL); -+ -+ if(str == NULL) { /* no command line given after MAC addr */ -+ return(1); -+ } -+ -+ do { -+ if(i>=SLIRP_MAX_ARGS-1) { -+ printk("slirp_setup: truncating slirp arguments\n"); -+ break; -+ } -+ init->argw.argv[i++] = str; -+ while(*str && *str!=',') { -+ if(*str=='_') *str=' '; -+ str++; -+ } -+ if(*str!=',') -+ break; -+ *str++='\0'; -+ } while(1); -+ init->argw.argv[i]=NULL; -+ return(1); -+} -+ -+static struct transport slirp_transport = { -+ .list = LIST_HEAD_INIT(slirp_transport.list), -+ .name = "slirp", -+ .setup = slirp_setup, -+ .user = &slirp_user_info, -+ .kern = &slirp_kern_info, -+ .private_size = sizeof(struct slirp_data), -+ .setup_size = sizeof(struct slirp_init), -+}; -+ -+static int register_slirp(void) -+{ -+ register_transport(&slirp_transport); -+ return(1); -+} -+ -+__initcall(register_slirp); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slirp_user.c um/arch/um/drivers/slirp_user.c ---- orig/arch/um/drivers/slirp_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/slirp_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,201 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "net_user.h" -+#include "slirp.h" -+#include "slip_proto.h" -+#include "helper.h" -+#include "os.h" -+ -+void slirp_user_init(void *data, void *dev) -+{ -+ struct slirp_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+struct slirp_pre_exec_data { -+ int stdin; -+ int stdout; -+}; -+ -+static void slirp_pre_exec(void *arg) -+{ -+ struct slirp_pre_exec_data *data = arg; -+ -+ if(data->stdin != -1) dup2(data->stdin, 0); -+ if(data->stdout != -1) dup2(data->stdout, 1); -+} -+ -+static int slirp_tramp(char **argv, int fd) -+{ -+ struct slirp_pre_exec_data pe_data; -+ int pid; -+ -+ pe_data.stdin = fd; -+ pe_data.stdout = fd; -+ pid = run_helper(slirp_pre_exec, &pe_data, argv, NULL); -+ -+ return(pid); -+} -+ -+/* XXX This is just a trivial wrapper around os_pipe */ -+static int slirp_datachan(int *mfd, int *sfd) -+{ -+ int fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err < 0){ -+ printk("slirp_datachan: Failed to open pipe, err = %d\n", -err); -+ return(err); -+ } -+ -+ *mfd = fds[0]; -+ *sfd = fds[1]; -+ return(0); -+} -+ -+static int slirp_open(void *data) -+{ -+ struct slirp_data *pri = data; -+ int sfd, mfd, pid, err; -+ -+ err = slirp_datachan(&mfd, &sfd); -+ if(err) -+ return(err); -+ -+ pid = slirp_tramp(pri->argw.argv, sfd); -+ -+ if(pid < 0){ -+ printk("slirp_tramp failed - errno = %d\n", -pid); -+ os_close_file(sfd); -+ os_close_file(mfd); -+ return(pid); -+ } -+ -+ pri->slave = sfd; -+ pri->pos = 0; -+ pri->esc = 0; -+ -+ pri->pid = pid; -+ -+ return(mfd); -+} -+ -+static void slirp_close(int fd, void *data) -+{ -+ struct slirp_data *pri = data; -+ int status,err; -+ -+ os_close_file(fd); -+ os_close_file(pri->slave); -+ -+ pri->slave = -1; -+ -+ if(pri->pid<1) { -+ printk("slirp_close: no child process to shut down\n"); -+ return; -+ } -+ -+#if 0 -+ if(kill(pri->pid, SIGHUP)<0) { -+ printk("slirp_close: sending hangup to %d failed (%d)\n", -+ pri->pid, errno); -+ } -+#endif -+ -+ err = waitpid(pri->pid, &status, WNOHANG); -+ if(err<0) { -+ printk("slirp_close: waitpid returned %d\n", errno); -+ return; -+ } -+ -+ if(err==0) { -+ printk("slirp_close: process %d has not exited\n"); -+ return; -+ } -+ -+ pri->pid = -1; -+} -+ -+int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) -+{ -+ int i, n, size, start; -+ -+ if(pri->more>0) { -+ i = 0; -+ while(i < pri->more) { -+ size = slip_unesc(pri->ibuf[i++], -+ pri->ibuf,&pri->pos,&pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); -+ pri->more=pri->more-i; -+ return(size); -+ } -+ } -+ pri->more=0; -+ } -+ -+ n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); -+ if(n <= 0) return(n); -+ -+ start = pri->pos; -+ for(i = 0; i < n; i++){ -+ size = slip_unesc(pri->ibuf[start + i], -+ pri->ibuf,&pri->pos,&pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); -+ pri->more=n-(i+1); -+ return(size); -+ } -+ } -+ return(0); -+} -+ -+int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) -+{ -+ int actual, n; -+ -+ actual = slip_esc(buf, pri->obuf, len); -+ n = net_write(fd, pri->obuf, actual); -+ if(n < 0) return(n); -+ else return(len); -+} -+ -+static int slirp_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info slirp_user_info = { -+ .init = slirp_user_init, -+ .open = slirp_open, -+ .close = slirp_close, -+ .remove = NULL, -+ .set_mtu = slirp_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = BUF_SIZE -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ssl.c um/arch/um/drivers/ssl.c ---- orig/arch/um/drivers/ssl.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/ssl.c 2004-01-10 06:20:17.000000000 -0500 -@@ -0,0 +1,300 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/fs.h" -+#include "linux/tty.h" -+#include "linux/tty_driver.h" -+#include "linux/major.h" -+#include "linux/mm.h" -+#include "linux/init.h" -+#include "linux/console.h" -+#include "asm/termbits.h" -+#include "asm/irq.h" -+#include "line.h" -+#include "ssl.h" -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "mconsole_kern.h" -+#include "2_5compat.h" -+ -+static int ssl_version = 1; -+ -+/* Referenced only by tty_driver below - presumably it's locked correctly -+ * by the tty driver. -+ */ -+static int ssl_refcount = 0; -+ -+static struct tty_driver ssl_driver; -+ -+#define NR_PORTS 64 -+ -+void ssl_announce(char *dev_name, int dev) -+{ -+ printk(KERN_INFO "Serial line %d assigned device '%s'\n", dev, -+ dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = ssl_announce, -+ .xterm_title = "Serial Line #%d", -+ .raw = 1, -+ .tramp_stack = 0, -+ .in_kernel = 1, -+}; -+ -+static int ssl_config(char *str); -+static int ssl_get_config(char *dev, char *str, int size, char **error_out); -+static int ssl_remove(char *str); -+ -+static struct line_driver driver = { -+ .name = "UML serial line", -+ .devfs_name = "tts/%d", -+ .major = TTY_MAJOR, -+ .minor_start = 64, -+ .type = TTY_DRIVER_TYPE_SERIAL, -+ .subtype = 0, -+ .read_irq = SSL_IRQ, -+ .read_irq_name = "ssl", -+ .write_irq = SSL_WRITE_IRQ, -+ .write_irq_name = "ssl-write", -+ .symlink_from = "serial", -+ .symlink_to = "tts", -+ .mc = { -+ .name = "ssl", -+ .config = ssl_config, -+ .get_config = ssl_get_config, -+ .remove = ssl_remove, -+ }, -+}; -+ -+/* The array is initialized by line_init, which is an initcall. The -+ * individual elements are protected by individual semaphores. -+ */ -+static struct line serial_lines[NR_PORTS] = -+ { [0 ... NR_PORTS - 1] = LINE_INIT(CONFIG_SSL_CHAN, &driver) }; -+ -+static struct lines lines = LINES_INIT(NR_PORTS); -+ -+static int ssl_config(char *str) -+{ -+ return(line_config(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), str)); -+} -+ -+static int ssl_get_config(char *dev, char *str, int size, char **error_out) -+{ -+ return(line_get_config(dev, serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), -+ str, size, error_out)); -+} -+ -+static int ssl_remove(char *str) -+{ -+ return(line_remove(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), str)); -+} -+ -+int ssl_open(struct tty_struct *tty, struct file *filp) -+{ -+ return(line_open(serial_lines, tty, &opts)); -+} -+ -+static void ssl_close(struct tty_struct *tty, struct file * filp) -+{ -+ line_close(serial_lines, tty); -+} -+ -+static int ssl_write(struct tty_struct * tty, int from_user, -+ const unsigned char *buf, int count) -+{ -+ return(line_write(serial_lines, tty, from_user, buf, count)); -+} -+ -+static void ssl_put_char(struct tty_struct *tty, unsigned char ch) -+{ -+ line_write(serial_lines, tty, 0, &ch, sizeof(ch)); -+} -+ -+static void ssl_flush_chars(struct tty_struct *tty) -+{ -+ return; -+} -+ -+static int ssl_chars_in_buffer(struct tty_struct *tty) -+{ -+ return(0); -+} -+ -+static void ssl_flush_buffer(struct tty_struct *tty) -+{ -+ return; -+} -+ -+static int ssl_ioctl(struct tty_struct *tty, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ int ret; -+ -+ ret = 0; -+ switch(cmd){ -+ case TCGETS: -+ case TCSETS: -+ case TCFLSH: -+ case TCSETSF: -+ case TCSETSW: -+ case TCGETA: -+ case TIOCMGET: -+ case TCSBRK: -+ case TCSBRKP: -+ case TIOCMSET: -+ ret = -ENOIOCTLCMD; -+ break; -+ default: -+ printk(KERN_ERR -+ "Unimplemented ioctl in ssl_ioctl : 0x%x\n", cmd); -+ ret = -ENOIOCTLCMD; -+ break; -+ } -+ return(ret); -+} -+ -+static void ssl_throttle(struct tty_struct * tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_throttle\n"); -+} -+ -+static void ssl_unthrottle(struct tty_struct * tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_unthrottle\n"); -+} -+ -+static void ssl_set_termios(struct tty_struct *tty, -+ struct termios *old_termios) -+{ -+} -+ -+static void ssl_stop(struct tty_struct *tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_stop\n"); -+} -+ -+static void ssl_start(struct tty_struct *tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_start\n"); -+} -+ -+void ssl_hangup(struct tty_struct *tty) -+{ -+} -+ -+static struct tty_driver ssl_driver = { -+ .refcount = &ssl_refcount, -+ .open = ssl_open, -+ .close = ssl_close, -+ .write = ssl_write, -+ .put_char = ssl_put_char, -+ .flush_chars = ssl_flush_chars, -+ .chars_in_buffer = ssl_chars_in_buffer, -+ .flush_buffer = ssl_flush_buffer, -+ .ioctl = ssl_ioctl, -+ .throttle = ssl_throttle, -+ .unthrottle = ssl_unthrottle, -+ .set_termios = ssl_set_termios, -+ .stop = ssl_stop, -+ .start = ssl_start, -+ .hangup = ssl_hangup -+}; -+ -+/* Changed by ssl_init and referenced by ssl_exit, which are both serialized -+ * by being an initcall and exitcall, respectively. -+ */ -+static int ssl_init_done = 0; -+ -+static void ssl_console_write(struct console *c, const char *string, -+ unsigned len) -+{ -+ struct line *line = &serial_lines[c->index]; -+ if(ssl_init_done) -+ down(&line->sem); -+ console_write_chan(&line->chan_list, string, len); -+ if(ssl_init_done) -+ up(&line->sem); -+} -+ -+static kdev_t ssl_console_device(struct console *c) -+{ -+ return mk_kdev(TTY_MAJOR, c->index); -+} -+ -+static int ssl_console_setup(struct console *co, char *options) -+{ -+ return(0); -+} -+ -+static struct console ssl_cons = { -+ name: "ttyS", -+ write: ssl_console_write, -+ device: ssl_console_device, -+ setup: ssl_console_setup, -+ flags: CON_PRINTBUFFER, -+ index: -1, -+}; -+ -+int ssl_init(void) -+{ -+ char *new_title; -+ -+ printk(KERN_INFO "Initializing software serial port version %d\n", -+ ssl_version); -+ -+ line_register_devfs(&lines, &driver, &ssl_driver, serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0])); -+ -+ lines_init(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0])); -+ -+ new_title = add_xterm_umid(opts.xterm_title); -+ if(new_title != NULL) opts.xterm_title = new_title; -+ -+ register_console(&ssl_cons); -+ ssl_init_done = 1; -+ return(0); -+} -+ -+__initcall(ssl_init); -+ -+static int ssl_chan_setup(char *str) -+{ -+ return(line_setup(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), -+ str, 1)); -+} -+ -+__setup("ssl", ssl_chan_setup); -+__channel_help(ssl_chan_setup, "ssl"); -+ -+static void ssl_exit(void) -+{ -+ if(!ssl_init_done) return; -+ close_lines(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0])); -+} -+ -+__uml_exitcall(ssl_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ssl.h um/arch/um/drivers/ssl.h ---- orig/arch/um/drivers/ssl.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/ssl.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SSL_H__ -+#define __SSL_H__ -+ -+extern int ssl_read(int fd, int line); -+extern void ssl_receive_char(int line, char ch); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/stdio_console.c um/arch/um/drivers/stdio_console.c ---- orig/arch/um/drivers/stdio_console.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/stdio_console.c 2003-11-07 03:03:03.000000000 -0500 -@@ -0,0 +1,258 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/posix_types.h" -+#include "linux/tty.h" -+#include "linux/tty_flip.h" -+#include "linux/types.h" -+#include "linux/major.h" -+#include "linux/kdev_t.h" -+#include "linux/console.h" -+#include "linux/string.h" -+#include "linux/sched.h" -+#include "linux/list.h" -+#include "linux/init.h" -+#include "linux/interrupt.h" -+#include "linux/slab.h" -+#include "asm/current.h" -+#include "asm/softirq.h" -+#include "asm/hardirq.h" -+#include "asm/irq.h" -+#include "stdio_console.h" -+#include "line.h" -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "2_5compat.h" -+ -+#define MAX_TTYS (8) -+ -+/* Referenced only by tty_driver below - presumably it's locked correctly -+ * by the tty driver. -+ */ -+ -+static struct tty_driver console_driver; -+ -+static int console_refcount = 0; -+ -+static struct chan_ops init_console_ops = { -+ .type = "you shouldn't see this", -+ .init = NULL, -+ .open = NULL, -+ .close = NULL, -+ .read = NULL, -+ .write = NULL, -+ .console_write = generic_write, -+ .window_size = NULL, -+ .free = NULL, -+ .winch = 0, -+}; -+ -+static struct chan init_console_chan = { -+ .list = { }, -+ .primary = 1, -+ .input = 0, -+ .output = 1, -+ .opened = 1, -+ .fd = 1, -+ .pri = INIT_STATIC, -+ .ops = &init_console_ops, -+ .data = NULL -+}; -+ -+void stdio_announce(char *dev_name, int dev) -+{ -+ printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev, -+ dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = stdio_announce, -+ .xterm_title = "Virtual Console #%d", -+ .raw = 1, -+ .tramp_stack = 0, -+ .in_kernel = 1, -+}; -+ -+static int con_config(char *str); -+static int con_get_config(char *dev, char *str, int size, char **error_out); -+static int con_remove(char *str); -+ -+static struct line_driver driver = { -+ .name = "UML console", -+ .devfs_name = "vc/%d", -+ .major = TTY_MAJOR, -+ .minor_start = 0, -+ .type = TTY_DRIVER_TYPE_CONSOLE, -+ .subtype = SYSTEM_TYPE_CONSOLE, -+ .read_irq = CONSOLE_IRQ, -+ .read_irq_name = "console", -+ .write_irq = CONSOLE_WRITE_IRQ, -+ .write_irq_name = "console-write", -+ .symlink_from = "ttys", -+ .symlink_to = "vc", -+ .mc = { -+ .name = "con", -+ .config = con_config, -+ .get_config = con_get_config, -+ .remove = con_remove, -+ }, -+}; -+ -+static struct lines console_lines = LINES_INIT(MAX_TTYS); -+ -+/* The array is initialized by line_init, which is an initcall. The -+ * individual elements are protected by individual semaphores. -+ */ -+struct line vts[MAX_TTYS] = { LINE_INIT(CONFIG_CON_ZERO_CHAN, &driver), -+ [ 1 ... MAX_TTYS - 1 ] = -+ LINE_INIT(CONFIG_CON_CHAN, &driver) }; -+ -+static int con_config(char *str) -+{ -+ return(line_config(vts, sizeof(vts)/sizeof(vts[0]), str)); -+} -+ -+static int con_get_config(char *dev, char *str, int size, char **error_out) -+{ -+ return(line_get_config(dev, vts, sizeof(vts)/sizeof(vts[0]), str, -+ size, error_out)); -+} -+ -+static int con_remove(char *str) -+{ -+ return(line_remove(vts, sizeof(vts)/sizeof(vts[0]), str)); -+} -+ -+static int open_console(struct tty_struct *tty) -+{ -+ return(line_open(vts, tty, &opts)); -+} -+ -+static int con_open(struct tty_struct *tty, struct file *filp) -+{ -+ return(open_console(tty)); -+} -+ -+static void con_close(struct tty_struct *tty, struct file *filp) -+{ -+ line_close(vts, tty); -+} -+ -+static int con_write(struct tty_struct *tty, int from_user, -+ const unsigned char *buf, int count) -+{ -+ return(line_write(vts, tty, from_user, buf, count)); -+} -+ -+static void set_termios(struct tty_struct *tty, struct termios * old) -+{ -+} -+ -+static int chars_in_buffer(struct tty_struct *tty) -+{ -+ return(0); -+} -+ -+static int con_init_done = 0; -+ -+int stdio_init(void) -+{ -+ char *new_title; -+ -+ printk(KERN_INFO "Initializing stdio console driver\n"); -+ -+ line_register_devfs(&console_lines, &driver, &console_driver, vts, -+ sizeof(vts)/sizeof(vts[0])); -+ -+ lines_init(vts, sizeof(vts)/sizeof(vts[0])); -+ -+ new_title = add_xterm_umid(opts.xterm_title); -+ if(new_title != NULL) opts.xterm_title = new_title; -+ -+ open_console(NULL); -+ con_init_done = 1; -+ return(0); -+} -+ -+__initcall(stdio_init); -+ -+static void console_write(struct console *console, const char *string, -+ unsigned len) -+{ -+ struct line *line = &vts[console->index]; -+ -+ if(con_init_done) -+ down(&line->sem); -+ console_write_chan(&line->chan_list, string, len); -+ if(con_init_done) -+ up(&line->sem); -+} -+ -+static struct tty_driver console_driver = { -+ .refcount = &console_refcount, -+ .open = con_open, -+ .close = con_close, -+ .write = con_write, -+ .chars_in_buffer = chars_in_buffer, -+ .set_termios = set_termios -+}; -+ -+static kdev_t console_device(struct console *c) -+{ -+ return mk_kdev(TTY_MAJOR, c->index); -+} -+ -+static int console_setup(struct console *co, char *options) -+{ -+ return(0); -+} -+ -+static struct console stdiocons = { -+ name: "tty", -+ write: console_write, -+ device: console_device, -+ setup: console_setup, -+ flags: CON_PRINTBUFFER, -+ index: -1, -+}; -+ -+void stdio_console_init(void) -+{ -+ INIT_LIST_HEAD(&vts[0].chan_list); -+ list_add(&init_console_chan.list, &vts[0].chan_list); -+ register_console(&stdiocons); -+} -+ -+static int console_chan_setup(char *str) -+{ -+ return(line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1)); -+} -+ -+__setup("con", console_chan_setup); -+__channel_help(console_chan_setup, "con"); -+ -+static void console_exit(void) -+{ -+ if(!con_init_done) return; -+ close_lines(vts, sizeof(vts)/sizeof(vts[0])); -+} -+ -+__uml_exitcall(console_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/stdio_console.h um/arch/um/drivers/stdio_console.h ---- orig/arch/um/drivers/stdio_console.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/stdio_console.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,21 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __STDIO_CONSOLE_H -+#define __STDIO_CONSOLE_H -+ -+extern void save_console_flags(void); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/tty.c um/arch/um/drivers/tty.c ---- orig/arch/um/drivers/tty.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/tty.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,86 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include "chan_user.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+struct tty_chan { -+ char *dev; -+ int raw; -+ struct termios tt; -+}; -+ -+void *tty_chan_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct tty_chan *data; -+ -+ if(*str != ':'){ -+ printk("tty_init : channel type 'tty' must specify " -+ "a device\n"); -+ return(NULL); -+ } -+ str++; -+ -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) -+ return(NULL); -+ *data = ((struct tty_chan) { .dev = str, -+ .raw = opts->raw }); -+ -+ return(data); -+} -+ -+int tty_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct tty_chan *data = d; -+ int fd; -+ -+ fd = os_open_file(data->dev, of_set_rw(OPENFLAGS(), input, output), 0); -+ if(fd < 0) return(fd); -+ if(data->raw){ -+ tcgetattr(fd, &data->tt); -+ raw(fd, 0); -+ } -+ -+ *dev_out = data->dev; -+ return(fd); -+} -+ -+int tty_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct tty_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops tty_ops = { -+ .type = "tty", -+ .init = tty_chan_init, -+ .open = tty_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = tty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ubd_kern.c um/arch/um/drivers/ubd_kern.c ---- orig/arch/um/drivers/ubd_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/ubd_kern.c 2004-01-21 01:28:43.000000000 -0500 -@@ -0,0 +1,1396 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+/* 2001-09-28...2002-04-17 -+ * Partition stuff by James_McMechan@hotmail.com -+ * old style ubd by setting UBD_SHIFT to 0 -+ */ -+ -+#define MAJOR_NR UBD_MAJOR -+#define UBD_SHIFT 4 -+ -+#include "linux/config.h" -+#include "linux/blk.h" -+#include "linux/blkdev.h" -+#include "linux/hdreg.h" -+#include "linux/init.h" -+#include "linux/devfs_fs_kernel.h" -+#include "linux/cdrom.h" -+#include "linux/proc_fs.h" -+#include "linux/ctype.h" -+#include "linux/capability.h" -+#include "linux/mm.h" -+#include "linux/vmalloc.h" -+#include "linux/blkpg.h" -+#include "linux/genhd.h" -+#include "linux/spinlock.h" -+#include "asm/segment.h" -+#include "asm/uaccess.h" -+#include "asm/irq.h" -+#include "asm/types.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+#include "ubd_user.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "mem.h" -+#include "mem_kern.h" -+ -+static int ubd_open(struct inode * inode, struct file * filp); -+static int ubd_release(struct inode * inode, struct file * file); -+static int ubd_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg); -+static int ubd_revalidate(kdev_t rdev); -+static int ubd_revalidate1(kdev_t rdev); -+ -+#define MAX_DEV (8) -+#define MAX_MINOR (MAX_DEV << UBD_SHIFT) -+ -+/* Changed in early boot */ -+static int ubd_do_mmap = 0; -+#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE -+ -+/* Not modified by this driver */ -+static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; -+static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; -+ -+/* Protected by ubd_lock */ -+static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; -+ -+static struct block_device_operations ubd_blops = { -+ .open = ubd_open, -+ .release = ubd_release, -+ .ioctl = ubd_ioctl, -+ .revalidate = ubd_revalidate, -+}; -+ -+/* Protected by ubd_lock, except in prepare_request and ubd_ioctl because -+ * the block layer should ensure that the device is idle before closing it. -+ */ -+static struct hd_struct ubd_part[MAX_MINOR] = -+ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; -+ -+/* Protected by io_request_lock */ -+static request_queue_t *ubd_queue; -+ -+/* Protected by ubd_lock */ -+static int fake_major = MAJOR_NR; -+ -+static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; -+ -+#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ -+{ \ -+ .major = maj, \ -+ .major_name = name, \ -+ .minor_shift = shift, \ -+ .max_p = 1 << shift, \ -+ .part = parts, \ -+ .sizes = bsizes, \ -+ .nr_real = max, \ -+ .real_devices = NULL, \ -+ .next = NULL, \ -+ .fops = blops, \ -+ .de_arr = NULL, \ -+ .flags = 0 \ -+} -+ -+static struct gendisk ubd_gendisk = INIT_GENDISK(MAJOR_NR, "ubd", ubd_part, -+ UBD_SHIFT, sizes, MAX_DEV, -+ &ubd_blops); -+static struct gendisk fake_gendisk = INIT_GENDISK(0, "ubd", ubd_part, -+ UBD_SHIFT, sizes, MAX_DEV, -+ &ubd_blops); -+ -+#ifdef CONFIG_BLK_DEV_UBD_SYNC -+#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ -+ .cl = 1 }) -+#else -+#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ -+ .cl = 1 }) -+#endif -+ -+/* Not protected - changed only in ubd_setup_common and then only to -+ * to enable O_SYNC. -+ */ -+static struct openflags global_openflags = OPEN_FLAGS; -+ -+struct cow { -+ char *file; -+ int fd; -+ unsigned long *bitmap; -+ unsigned long bitmap_len; -+ int bitmap_offset; -+ int data_offset; -+}; -+ -+struct ubd { -+ char *file; -+ int count; -+ int fd; -+ __u64 size; -+ struct openflags boot_openflags; -+ struct openflags openflags; -+ devfs_handle_t devfs; -+ int no_cow; -+ struct cow cow; -+ -+ int map_writes; -+ int map_reads; -+ int nomap_writes; -+ int nomap_reads; -+ int write_maps; -+}; -+ -+#define DEFAULT_COW { \ -+ .file = NULL, \ -+ .fd = -1, \ -+ .bitmap = NULL, \ -+ .bitmap_offset = 0, \ -+ .data_offset = 0, \ -+} -+ -+#define DEFAULT_UBD { \ -+ .file = NULL, \ -+ .count = 0, \ -+ .fd = -1, \ -+ .size = -1, \ -+ .boot_openflags = OPEN_FLAGS, \ -+ .openflags = OPEN_FLAGS, \ -+ .devfs = NULL, \ -+ .no_cow = 0, \ -+ .cow = DEFAULT_COW, \ -+ .map_writes = 0, \ -+ .map_reads = 0, \ -+ .nomap_writes = 0, \ -+ .nomap_reads = 0, \ -+ .write_maps = 0, \ -+} -+ -+struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; -+ -+static int ubd0_init(void) -+{ -+ struct ubd *dev = &ubd_dev[0]; -+ -+ if(dev->file == NULL) -+ dev->file = "root_fs"; -+ return(0); -+} -+ -+__initcall(ubd0_init); -+ -+/* Only changed by fake_ide_setup which is a setup */ -+static int fake_ide = 0; -+static struct proc_dir_entry *proc_ide_root = NULL; -+static struct proc_dir_entry *proc_ide = NULL; -+ -+static void make_proc_ide(void) -+{ -+ proc_ide_root = proc_mkdir("ide", 0); -+ proc_ide = proc_mkdir("ide0", proc_ide_root); -+} -+ -+static int proc_ide_read_media(char *page, char **start, off_t off, int count, -+ int *eof, void *data) -+{ -+ int len; -+ -+ strcpy(page, "disk\n"); -+ len = strlen("disk\n"); -+ len -= off; -+ if (len < count){ -+ *eof = 1; -+ if (len <= 0) return 0; -+ } -+ else len = count; -+ *start = page + off; -+ return len; -+} -+ -+static void make_ide_entries(char *dev_name) -+{ -+ struct proc_dir_entry *dir, *ent; -+ char name[64]; -+ -+ if(!fake_ide) return; -+ -+ /* Without locking this could race if a UML was booted with no -+ * disks and then two mconsole requests which add disks came in -+ * at the same time. -+ */ -+ spin_lock(&ubd_lock); -+ if(proc_ide_root == NULL) make_proc_ide(); -+ spin_unlock(&ubd_lock); -+ -+ dir = proc_mkdir(dev_name, proc_ide); -+ if(!dir) return; -+ -+ ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); -+ if(!ent) return; -+ ent->nlink = 1; -+ ent->data = NULL; -+ ent->read_proc = proc_ide_read_media; -+ ent->write_proc = NULL; -+ sprintf(name,"ide0/%s", dev_name); -+ proc_symlink(dev_name, proc_ide_root, name); -+} -+ -+static int fake_ide_setup(char *str) -+{ -+ fake_ide = 1; -+ return(1); -+} -+ -+__setup("fake_ide", fake_ide_setup); -+ -+__uml_help(fake_ide_setup, -+"fake_ide\n" -+" Create ide0 entries that map onto ubd devices.\n\n" -+); -+ -+static int parse_unit(char **ptr) -+{ -+ char *str = *ptr, *end; -+ int n = -1; -+ -+ if(isdigit(*str)) { -+ n = simple_strtoul(str, &end, 0); -+ if(end == str) -+ return(-1); -+ *ptr = end; -+ } -+ else if (('a' <= *str) && (*str <= 'h')) { -+ n = *str - 'a'; -+ str++; -+ *ptr = str; -+ } -+ return(n); -+} -+ -+static int ubd_setup_common(char *str, int *index_out) -+{ -+ struct openflags flags = global_openflags; -+ struct ubd *dev; -+ char *backing_file; -+ int n, err; -+ -+ if(index_out) *index_out = -1; -+ n = *str; -+ if(n == '='){ -+ char *end; -+ int major; -+ -+ str++; -+ if(!strcmp(str, "mmap")){ -+ CHOOSE_MODE(printk("mmap not supported by the ubd " -+ "driver in tt mode\n"), -+ ubd_do_mmap = 1); -+ return(0); -+ } -+ -+ if(!strcmp(str, "sync")){ -+ global_openflags.s = 1; -+ return(0); -+ } -+ major = simple_strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk(KERN_ERR -+ "ubd_setup : didn't parse major number\n"); -+ return(1); -+ } -+ -+ err = 1; -+ spin_lock(&ubd_lock); -+ if(fake_major != MAJOR_NR){ -+ printk(KERN_ERR "Can't assign a fake major twice\n"); -+ goto out1; -+ } -+ -+ fake_gendisk.major = major; -+ fake_major = major; -+ -+ printk(KERN_INFO "Setting extra ubd major number to %d\n", -+ major); -+ err = 0; -+ out1: -+ spin_unlock(&ubd_lock); -+ return(err); -+ } -+ -+ n = parse_unit(&str); -+ if(n < 0){ -+ printk(KERN_ERR "ubd_setup : couldn't parse unit number " -+ "'%s'\n", str); -+ return(1); -+ } -+ -+ if(n >= MAX_DEV){ -+ printk(KERN_ERR "ubd_setup : index %d out of range " -+ "(%d devices)\n", n, MAX_DEV); -+ return(1); -+ } -+ -+ err = 1; -+ spin_lock(&ubd_lock); -+ -+ dev = &ubd_dev[n]; -+ if(dev->file != NULL){ -+ printk(KERN_ERR "ubd_setup : device already configured\n"); -+ goto out2; -+ } -+ -+ if(index_out) *index_out = n; -+ -+ if(*str == 'r'){ -+ flags.w = 0; -+ str++; -+ } -+ if(*str == 's'){ -+ flags.s = 1; -+ str++; -+ } -+ if(*str == 'd'){ -+ dev->no_cow = 1; -+ str++; -+ } -+ -+ if(*str++ != '='){ -+ printk(KERN_ERR "ubd_setup : Expected '='\n"); -+ goto out2; -+ } -+ -+ err = 0; -+ backing_file = strchr(str, ','); -+ if(backing_file){ -+ if(dev->no_cow) -+ printk(KERN_ERR "Can't specify both 'd' and a " -+ "cow file\n"); -+ else { -+ *backing_file = '\0'; -+ backing_file++; -+ } -+ } -+ dev->file = str; -+ dev->cow.file = backing_file; -+ dev->boot_openflags = flags; -+ out2: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_setup(char *str) -+{ -+ ubd_setup_common(str, NULL); -+ return(1); -+} -+ -+__setup("ubd", ubd_setup); -+__uml_help(ubd_setup, -+"ubd=\n" -+" This is used to associate a device with a file in the underlying\n" -+" filesystem. Usually, there is a filesystem in the file, but \n" -+" that's not required. Swap devices containing swap files can be\n" -+" specified like this. Also, a file which doesn't contain a\n" -+" filesystem can have its contents read in the virtual \n" -+" machine by running dd on the device. n must be in the range\n" -+" 0 to 7. Appending an 'r' to the number will cause that device\n" -+" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" -+" an 's' (has to be _after_ 'r', if there is one) will cause data\n" -+" to be written to disk on the host immediately.\n\n" -+); -+ -+static int fakehd(char *str) -+{ -+ printk(KERN_INFO -+ "fakehd : Changing ubd_gendisk.major_name to \"hd\".\n"); -+ ubd_gendisk.major_name = "hd"; -+ return(1); -+} -+ -+__setup("fakehd", fakehd); -+__uml_help(fakehd, -+"fakehd\n" -+" Change the ubd device name to \"hd\".\n\n" -+); -+ -+static void do_ubd_request(request_queue_t * q); -+ -+/* Only changed by ubd_init, which is an initcall. */ -+int thread_fd = -1; -+ -+/* Changed by ubd_handler, which is serialized because interrupts only -+ * happen on CPU 0. -+ */ -+int intr_count = 0; -+ -+static void ubd_finish(int error) -+{ -+ int nsect; -+ -+ if(error){ -+ end_request(0); -+ return; -+ } -+ nsect = CURRENT->current_nr_sectors; -+ CURRENT->sector += nsect; -+ CURRENT->buffer += nsect << 9; -+ CURRENT->errors = 0; -+ CURRENT->nr_sectors -= nsect; -+ CURRENT->current_nr_sectors = 0; -+ end_request(1); -+} -+ -+static void ubd_handler(void) -+{ -+ struct io_thread_req req; -+ int n, err; -+ -+ DEVICE_INTR = NULL; -+ intr_count++; -+ n = read_ubd_fs(thread_fd, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " -+ "err = %d\n", os_getpid(), -n); -+ spin_lock(&io_request_lock); -+ end_request(0); -+ spin_unlock(&io_request_lock); -+ return; -+ } -+ -+ if((req.op != UBD_MMAP) && -+ ((req.offset != ((__u64) (CURRENT->sector)) << 9) || -+ (req.length != (CURRENT->current_nr_sectors) << 9))) -+ panic("I/O op mismatch"); -+ -+ if(req.map_fd != -1){ -+ err = physmem_subst_mapping(req.buffer, req.map_fd, -+ req.map_offset, 1); -+ if(err) -+ printk("ubd_handler - physmem_subst_mapping failed, " -+ "err = %d\n", -err); -+ } -+ -+ spin_lock(&io_request_lock); -+ ubd_finish(req.error); -+ reactivate_fd(thread_fd, UBD_IRQ); -+ do_ubd_request(ubd_queue); -+ spin_unlock(&io_request_lock); -+} -+ -+static void ubd_intr(int irq, void *dev, struct pt_regs *unused) -+{ -+ ubd_handler(); -+} -+ -+/* Only changed by ubd_init, which is an initcall. */ -+static int io_pid = -1; -+ -+void kill_io_thread(void) -+{ -+ if(io_pid != -1) -+ os_kill_process(io_pid, 1); -+} -+ -+__uml_exitcall(kill_io_thread); -+ -+/* Initialized in an initcall, and unchanged thereafter */ -+devfs_handle_t ubd_dir_handle; -+ -+static int ubd_add(int n) -+{ -+ struct ubd *dev = &ubd_dev[n]; -+ char name[sizeof("nnnnnn\0")], dev_name[sizeof("ubd0x")]; -+ int err = -EISDIR; -+ -+ if(dev->file == NULL) -+ goto out; -+ -+ err = ubd_revalidate1(MKDEV(MAJOR_NR, n << UBD_SHIFT)); -+ if(err) -+ goto out; -+ -+ if(dev->cow.file == NULL) -+ blk_sizes[n] = UBD_MMAP_BLOCK_SIZE; -+ -+ sprintf(name, "%d", n); -+ dev->devfs = devfs_register(ubd_dir_handle, name, DEVFS_FL_REMOVABLE, -+ MAJOR_NR, n << UBD_SHIFT, S_IFBLK | -+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, -+ &ubd_blops, NULL); -+ -+#if 0 /* 2.5 ... */ -+ sprintf(disk->disk_name, "ubd%c", 'a' + unit); -+#endif -+ -+ sprintf(dev_name, "%s%c", ubd_gendisk.major_name, -+ n + 'a'); -+ -+ make_ide_entries(dev_name); -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+static int ubd_config(char *str) -+{ -+ int n, err; -+ -+ str = uml_strdup(str); -+ if(str == NULL){ -+ printk(KERN_ERR "ubd_config failed to strdup string\n"); -+ return(1); -+ } -+ err = ubd_setup_common(str, &n); -+ if(err){ -+ kfree(str); -+ return(-1); -+ } -+ if(n == -1) return(0); -+ -+ spin_lock(&ubd_lock); -+ err = ubd_add(n); -+ if(err) -+ ubd_dev[n].file = NULL; -+ spin_unlock(&ubd_lock); -+ -+ return(err); -+} -+ -+static int ubd_get_config(char *name, char *str, int size, char **error_out) -+{ -+ struct ubd *dev; -+ char *end; -+ int n, len = 0; -+ -+ n = simple_strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ *error_out = "ubd_get_config : didn't parse device number"; -+ return(-1); -+ } -+ -+ if((n >= MAX_DEV) || (n < 0)){ -+ *error_out = "ubd_get_config : device number out of range"; -+ return(-1); -+ } -+ -+ dev = &ubd_dev[n]; -+ spin_lock(&ubd_lock); -+ -+ if(dev->file == NULL){ -+ CONFIG_CHUNK(str, size, len, "", 1); -+ goto out; -+ } -+ -+ CONFIG_CHUNK(str, size, len, dev->file, 0); -+ -+ if(dev->cow.file != NULL){ -+ CONFIG_CHUNK(str, size, len, ",", 0); -+ CONFIG_CHUNK(str, size, len, dev->cow.file, 1); -+ } -+ else CONFIG_CHUNK(str, size, len, "", 1); -+ -+ out: -+ spin_unlock(&ubd_lock); -+ return(len); -+} -+ -+static int ubd_remove(char *str) -+{ -+ struct ubd *dev; -+ int n, err = -ENODEV; -+ -+ if(isdigit(*str)){ -+ char *end; -+ n = simple_strtoul(str, &end, 0); -+ if ((*end != '\0') || (end == str)) -+ return(err); -+ } -+ else if (('a' <= *str) && (*str <= 'h')) -+ n = *str - 'a'; -+ else -+ return(err); /* it should be a number 0-7/a-h */ -+ -+ if((n < 0) || (n >= MAX_DEV)) -+ return(err); -+ -+ dev = &ubd_dev[n]; -+ -+ spin_lock(&ubd_lock); -+ err = 0; -+ if(dev->file == NULL) -+ goto out; -+ err = -1; -+ if(dev->count > 0) -+ goto out; -+ if(dev->devfs != NULL) -+ devfs_unregister(dev->devfs); -+ -+ *dev = ((struct ubd) DEFAULT_UBD); -+ err = 0; -+ out: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static struct mc_device ubd_mc = { -+ .name = "ubd", -+ .config = ubd_config, -+ .get_config = ubd_get_config, -+ .remove = ubd_remove, -+}; -+ -+static int ubd_mc_init(void) -+{ -+ mconsole_register_dev(&ubd_mc); -+ return(0); -+} -+ -+__initcall(ubd_mc_init); -+ -+static request_queue_t *ubd_get_queue(kdev_t device) -+{ -+ return(ubd_queue); -+} -+ -+int ubd_init(void) -+{ -+ unsigned long stack; -+ int i, err; -+ -+ ubd_dir_handle = devfs_mk_dir (NULL, "ubd", NULL); -+ if (devfs_register_blkdev(MAJOR_NR, "ubd", &ubd_blops)) { -+ printk(KERN_ERR "ubd: unable to get major %d\n", MAJOR_NR); -+ return -1; -+ } -+ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[MAJOR_NR] = blk_sizes; -+ blk_size[MAJOR_NR] = sizes; -+ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); -+ -+ ubd_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); -+ blk_init_queue(ubd_queue, DEVICE_REQUEST); -+ INIT_ELV(ubd_queue, &ubd_queue->elevator); -+ -+ add_gendisk(&ubd_gendisk); -+ if (fake_major != MAJOR_NR){ -+ /* major number 0 is used to auto select */ -+ err = devfs_register_blkdev(fake_major, "fake", &ubd_blops); -+ if(fake_major == 0){ -+ /* auto device number case */ -+ fake_major = err; -+ if(err == 0) -+ return(-ENODEV); -+ } -+ else if (err){ -+ /* not auto so normal error */ -+ printk(KERN_ERR "ubd: error %d getting major %d\n", -+ -err, fake_major); -+ return(-ENODEV); -+ } -+ -+ blk_dev[fake_major].queue = ubd_get_queue; -+ read_ahead[fake_major] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[fake_major] = blk_sizes; -+ blk_size[fake_major] = sizes; -+ INIT_HARDSECT(hardsect_size, fake_major, hardsect_sizes); -+ add_gendisk(&fake_gendisk); -+ } -+ -+ for(i=0;ifd); -+ os_close_file(dev->fd); -+ if(dev->cow.file != NULL) -+ return; -+ -+ if(ubd_do_mmap) -+ physmem_forget_descriptor(dev->cow.fd); -+ os_close_file(dev->cow.fd); -+ vfree(dev->cow.bitmap); -+ dev->cow.bitmap = NULL; -+} -+ -+static int ubd_open_dev(struct ubd *dev) -+{ -+ struct openflags flags; -+ char **back_ptr; -+ int err, create_cow, *create_ptr; -+ -+ dev->openflags = dev->boot_openflags; -+ create_cow = 0; -+ create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; -+ back_ptr = dev->no_cow ? NULL : &dev->cow.file; -+ dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, -+ &dev->cow.bitmap_offset, &dev->cow.bitmap_len, -+ &dev->cow.data_offset, create_ptr); -+ -+ if((dev->fd == -ENOENT) && create_cow){ -+ dev->fd = create_cow_file(dev->file, dev->cow.file, -+ dev->openflags, 1 << 9, PAGE_SIZE, -+ &dev->cow.bitmap_offset, -+ &dev->cow.bitmap_len, -+ &dev->cow.data_offset); -+ if(dev->fd >= 0){ -+ printk(KERN_INFO "Creating \"%s\" as COW file for " -+ "\"%s\"\n", dev->file, dev->cow.file); -+ } -+ } -+ -+ if(dev->fd < 0) return(dev->fd); -+ -+ if(dev->cow.file != NULL){ -+ err = -ENOMEM; -+ dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); -+ if(dev->cow.bitmap == NULL){ -+ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); -+ goto error; -+ } -+ flush_tlb_kernel_vm(); -+ -+ err = read_cow_bitmap(dev->fd, dev->cow.bitmap, -+ dev->cow.bitmap_offset, -+ dev->cow.bitmap_len); -+ if(err < 0) -+ goto error; -+ -+ flags = dev->openflags; -+ flags.w = 0; -+ err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, -+ NULL, NULL); -+ if(err < 0) goto error; -+ dev->cow.fd = err; -+ } -+ return(0); -+ error: -+ os_close_file(dev->fd); -+ return(err); -+} -+ -+static int ubd_file_size(struct ubd *dev, __u64 *size_out) -+{ -+ char *file; -+ -+ file = dev->cow.file ? dev->cow.file : dev->file; -+ return(os_file_size(file, size_out)); -+} -+ -+static int ubd_open(struct inode *inode, struct file *filp) -+{ -+ struct ubd *dev; -+ int n, offset, err = 0; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ dev = &ubd_dev[n]; -+ if(n >= MAX_DEV) -+ return -ENODEV; -+ -+ spin_lock(&ubd_lock); -+ offset = n << UBD_SHIFT; -+ -+ if(dev->count == 0){ -+ err = ubd_open_dev(dev); -+ if(err){ -+ printk(KERN_ERR "ubd%d: Can't open \"%s\": " -+ "errno = %d\n", n, dev->file, -err); -+ goto out; -+ } -+ err = ubd_file_size(dev, &dev->size); -+ if(err < 0) -+ goto out; -+ sizes[offset] = dev->size / BLOCK_SIZE; -+ ubd_part[offset].nr_sects = dev->size / hardsect_sizes[offset]; -+ } -+ dev->count++; -+ if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){ -+ if(--dev->count == 0) ubd_close(dev); -+ err = -EROFS; -+ } -+ out: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_release(struct inode * inode, struct file * file) -+{ -+ int n, offset; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ offset = n << UBD_SHIFT; -+ if(n >= MAX_DEV) -+ return -ENODEV; -+ -+ spin_lock(&ubd_lock); -+ if(--ubd_dev[n].count == 0) -+ ubd_close(&ubd_dev[n]); -+ spin_unlock(&ubd_lock); -+ -+ return(0); -+} -+ -+static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, -+ __u64 *cow_offset, unsigned long *bitmap, -+ __u64 bitmap_offset, unsigned long *bitmap_words, -+ __u64 bitmap_len) -+{ -+ __u64 sector = io_offset >> 9; -+ int i, update_bitmap = 0; -+ -+ for(i = 0; i < length >> 9; i++){ -+ if(cow_mask != NULL) -+ ubd_set_bit(i, (unsigned char *) cow_mask); -+ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) -+ continue; -+ -+ update_bitmap = 1; -+ ubd_set_bit(sector + i, (unsigned char *) bitmap); -+ } -+ -+ if(!update_bitmap) -+ return; -+ -+ *cow_offset = sector / (sizeof(unsigned long) * 8); -+ -+ /* This takes care of the case where we're exactly at the end of the -+ * device, and *cow_offset + 1 is off the end. So, just back it up -+ * by one word. Thanks to Lynn Kerby for the fix and James McMechan -+ * for the original diagnosis. -+ */ -+ if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / -+ sizeof(unsigned long) - 1)) -+ (*cow_offset)--; -+ -+ bitmap_words[0] = bitmap[*cow_offset]; -+ bitmap_words[1] = bitmap[*cow_offset + 1]; -+ -+ *cow_offset *= sizeof(unsigned long); -+ *cow_offset += bitmap_offset; -+} -+ -+static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, -+ __u64 bitmap_offset, __u64 bitmap_len) -+{ -+ __u64 sector = req->offset >> 9; -+ int i; -+ -+ if(req->length > (sizeof(req->sector_mask) * 8) << 9) -+ panic("Operation too long"); -+ -+ if(req->op == UBD_READ) { -+ for(i = 0; i < req->length >> 9; i++){ -+ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)){ -+ ubd_set_bit(i, (unsigned char *) -+ &req->sector_mask); -+ } -+ } -+ } -+ else cowify_bitmap(req->offset, req->length, &req->sector_mask, -+ &req->cow_offset, bitmap, bitmap_offset, -+ req->bitmap_words, bitmap_len); -+} -+ -+static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) -+{ -+ __u64 sector; -+ unsigned char *bitmap; -+ int bit, i; -+ -+ /* mmap must have been requested on the command line */ -+ if(!ubd_do_mmap) -+ return(-1); -+ -+ /* The buffer must be page aligned */ -+ if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) -+ return(-1); -+ -+ /* The request must be a page long */ -+ if((req->current_nr_sectors << 9) != PAGE_SIZE) -+ return(-1); -+ -+ if(dev->cow.file == NULL) -+ return(dev->fd); -+ -+ sector = offset >> 9; -+ bitmap = (unsigned char *) dev->cow.bitmap; -+ bit = ubd_test_bit(sector, bitmap); -+ -+ for(i = 1; i < req->current_nr_sectors; i++){ -+ if(ubd_test_bit(sector + i, bitmap) != bit) -+ return(-1); -+ } -+ -+ if(bit || (req->cmd == WRITE)) -+ offset += dev->cow.data_offset; -+ -+ /* The data on disk must be page aligned */ -+ if((offset % UBD_MMAP_BLOCK_SIZE) != 0) -+ return(-1); -+ -+ return(bit ? dev->fd : dev->cow.fd); -+} -+ -+static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, -+ struct request *req, -+ struct io_thread_req *io_req) -+{ -+ int err; -+ -+ if(req->cmd == WRITE){ -+ /* Writes are almost no-ops since the new data is already in the -+ * host page cache -+ */ -+ dev->map_writes++; -+ if(dev->cow.file != NULL) -+ cowify_bitmap(io_req->offset, io_req->length, -+ &io_req->sector_mask, &io_req->cow_offset, -+ dev->cow.bitmap, dev->cow.bitmap_offset, -+ io_req->bitmap_words, -+ dev->cow.bitmap_len); -+ } -+ else { -+ int w; -+ -+ if((dev->cow.file != NULL) && (fd == dev->cow.fd)) -+ w = 0; -+ else w = dev->openflags.w; -+ -+ if((dev->cow.file != NULL) && (fd == dev->fd)) -+ offset += dev->cow.data_offset; -+ -+ err = physmem_subst_mapping(req->buffer, fd, offset, w); -+ if(err){ -+ printk("physmem_subst_mapping failed, err = %d\n", -+ -err); -+ return(1); -+ } -+ dev->map_reads++; -+ } -+ io_req->op = UBD_MMAP; -+ io_req->buffer = req->buffer; -+ return(0); -+} -+ -+static int prepare_request(struct request *req, struct io_thread_req *io_req) -+{ -+ struct ubd *dev; -+ __u64 offset; -+ int minor, n, len, fd; -+ -+ if(req->rq_status == RQ_INACTIVE) return(1); -+ -+ minor = MINOR(req->rq_dev); -+ n = minor >> UBD_SHIFT; -+ dev = &ubd_dev[n]; -+ -+ if(IS_WRITE(req) && !dev->openflags.w){ -+ printk("Write attempted on readonly ubd device %d\n", n); -+ end_request(0); -+ return(1); -+ } -+ -+ req->sector += ubd_part[minor].start_sect; -+ offset = ((__u64) req->sector) << 9; -+ len = req->current_nr_sectors << 9; -+ -+ io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; -+ io_req->fds[1] = dev->fd; -+ io_req->map_fd = -1; -+ io_req->cow_offset = -1; -+ io_req->offset = offset; -+ io_req->length = len; -+ io_req->error = 0; -+ io_req->sector_mask = 0; -+ -+ fd = mmap_fd(req, dev, io_req->offset); -+ if(fd > 0){ -+ /* If mmapping is otherwise OK, but the first access to the -+ * page is a write, then it's not mapped in yet. So we have -+ * to write the data to disk first, then we can map the disk -+ * page in and continue normally from there. -+ */ -+ if((req->cmd == WRITE) && !is_remapped(req->buffer)){ -+ io_req->map_fd = dev->fd; -+ io_req->map_offset = io_req->offset + -+ dev->cow.data_offset; -+ dev->write_maps++; -+ } -+ else return(prepare_mmap_request(dev, fd, io_req->offset, req, -+ io_req)); -+ } -+ -+ if(req->cmd == READ) -+ dev->nomap_reads++; -+ else dev->nomap_writes++; -+ -+ io_req->op = (req->cmd == READ) ? UBD_READ : UBD_WRITE; -+ io_req->offsets[0] = 0; -+ io_req->offsets[1] = dev->cow.data_offset; -+ io_req->buffer = req->buffer; -+ io_req->sectorsize = 1 << 9; -+ -+ if(dev->cow.file != NULL) -+ cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, -+ dev->cow.bitmap_len); -+ return(0); -+} -+ -+static void do_ubd_request(request_queue_t *q) -+{ -+ struct io_thread_req io_req; -+ struct request *req; -+ int err, n; -+ -+ if(thread_fd == -1){ -+ while(!list_empty(&q->queue_head)){ -+ req = blkdev_entry_next_request(&q->queue_head); -+ err = prepare_request(req, &io_req); -+ if(!err){ -+ do_io(&io_req); -+ ubd_finish(io_req.error); -+ } -+ } -+ } -+ else { -+ if(DEVICE_INTR || list_empty(&q->queue_head)) return; -+ req = blkdev_entry_next_request(&q->queue_head); -+ err = prepare_request(req, &io_req); -+ if(!err){ -+ SET_INTR(ubd_handler); -+ n = write_ubd_fs(thread_fd, (char *) &io_req, -+ sizeof(io_req)); -+ if(n != sizeof(io_req)) -+ printk("write to io thread failed, " -+ "errno = %d\n", -n); -+ } -+ } -+} -+ -+static int ubd_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hd_geometry *loc = (struct hd_geometry *) arg; -+ struct ubd *dev; -+ int n, minor, err; -+ struct hd_driveid ubd_id = { -+ .cyls = 0, -+ .heads = 128, -+ .sectors = 32, -+ }; -+ -+ if(!inode) return(-EINVAL); -+ minor = MINOR(inode->i_rdev); -+ n = minor >> UBD_SHIFT; -+ if(n >= MAX_DEV) -+ return(-EINVAL); -+ dev = &ubd_dev[n]; -+ switch (cmd) { -+ struct hd_geometry g; -+ struct cdrom_volctrl volume; -+ case HDIO_GETGEO: -+ if(!loc) return(-EINVAL); -+ g.heads = 128; -+ g.sectors = 32; -+ g.cylinders = dev->size / (128 * 32 * hardsect_sizes[minor]); -+ g.start = ubd_part[minor].start_sect; -+ return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); -+ case BLKGETSIZE: /* Return device size */ -+ if(!arg) return(-EINVAL); -+ err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); -+ if(err) -+ return(err); -+ put_user(ubd_part[minor].nr_sects, (long *) arg); -+ return(0); -+ case BLKRRPART: /* Re-read partition tables */ -+ return(ubd_revalidate(inode->i_rdev)); -+ -+ case HDIO_SET_UNMASKINTR: -+ if(!capable(CAP_SYS_ADMIN)) return(-EACCES); -+ if((arg > 1) || (minor & 0x3F)) return(-EINVAL); -+ return(0); -+ -+ case HDIO_GET_UNMASKINTR: -+ if(!arg) return(-EINVAL); -+ err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); -+ if(err) -+ return(err); -+ return(0); -+ -+ case HDIO_GET_MULTCOUNT: -+ if(!arg) return(-EINVAL); -+ err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); -+ if(err) -+ return(err); -+ return(0); -+ -+ case HDIO_SET_MULTCOUNT: -+ if(!capable(CAP_SYS_ADMIN)) return(-EACCES); -+ if(MINOR(inode->i_rdev) & 0x3F) return(-EINVAL); -+ return(0); -+ -+ case HDIO_GET_IDENTITY: -+ ubd_id.cyls = dev->size / (128 * 32 * hardsect_sizes[minor]); -+ if(copy_to_user((char *) arg, (char *) &ubd_id, -+ sizeof(ubd_id))) -+ return(-EFAULT); -+ return(0); -+ -+ case CDROMVOLREAD: -+ if(copy_from_user(&volume, (char *) arg, sizeof(volume))) -+ return(-EFAULT); -+ volume.channel0 = 255; -+ volume.channel1 = 255; -+ volume.channel2 = 255; -+ volume.channel3 = 255; -+ if(copy_to_user((char *) arg, &volume, sizeof(volume))) -+ return(-EFAULT); -+ return(0); -+ -+ default: -+ return blk_ioctl(inode->i_rdev, cmd, arg); -+ } -+} -+ -+static int ubd_revalidate1(kdev_t rdev) -+{ -+ int i, n, offset, err = 0, pcount = 1 << UBD_SHIFT; -+ struct ubd *dev; -+ struct hd_struct *part; -+ -+ n = DEVICE_NR(rdev); -+ offset = n << UBD_SHIFT; -+ dev = &ubd_dev[n]; -+ -+ part = &ubd_part[offset]; -+ -+ /* clear all old partition counts */ -+ for(i = 1; i < pcount; i++) { -+ part[i].start_sect = 0; -+ part[i].nr_sects = 0; -+ } -+ -+ /* If it already has been opened we can check the partitions -+ * directly -+ */ -+ if(dev->count){ -+ part->start_sect = 0; -+ register_disk(&ubd_gendisk, MKDEV(MAJOR_NR, offset), pcount, -+ &ubd_blops, part->nr_sects); -+ } -+ else if(dev->file){ -+ err = ubd_open_dev(dev); -+ if(err){ -+ printk(KERN_ERR "unable to open %s for validation\n", -+ dev->file); -+ goto out; -+ } -+ -+ /* have to recompute sizes since we opened it */ -+ err = ubd_file_size(dev, &dev->size); -+ if(err < 0) { -+ ubd_close(dev); -+ goto out; -+ } -+ part->start_sect = 0; -+ part->nr_sects = dev->size / hardsect_sizes[offset]; -+ register_disk(&ubd_gendisk, MKDEV(MAJOR_NR, offset), pcount, -+ &ubd_blops, part->nr_sects); -+ -+ /* we are done so close it */ -+ ubd_close(dev); -+ } -+ else err = -ENODEV; -+ out: -+ return(err); -+} -+ -+static int ubd_revalidate(kdev_t rdev) -+{ -+ int err; -+ -+ spin_lock(&ubd_lock); -+ err = ubd_revalidate1(rdev); -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_check_remapped(int fd, unsigned long address, int is_write, -+ __u64 offset) -+{ -+ __u64 bitmap_offset; -+ unsigned long new_bitmap[2]; -+ int i, err, n; -+ -+ /* If it's not a write access, we can't do anything about it */ -+ if(!is_write) -+ return(0); -+ -+ /* We have a write */ -+ for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ -+ struct ubd *dev = &ubd_dev[i]; -+ -+ if((dev->fd != fd) && (dev->cow.fd != fd)) -+ continue; -+ -+ /* It's a write to a ubd device */ -+ -+ if(!dev->openflags.w){ -+ /* It's a write access on a read-only device - probably -+ * shouldn't happen. If the kernel is trying to change -+ * something with no intention of writing it back out, -+ * then this message will clue us in that this needs -+ * fixing -+ */ -+ printk("Write access to mapped page from readonly ubd " -+ "device %d\n", i); -+ return(0); -+ } -+ -+ /* It's a write to a writeable ubd device - it must be COWed -+ * because, otherwise, the page would have been mapped in -+ * writeable -+ */ -+ -+ if(!dev->cow.file) -+ panic("Write fault on writeable non-COW ubd device %d", -+ i); -+ -+ /* It should also be an access to the backing file since the -+ * COW pages should be mapped in read-write -+ */ -+ -+ if(fd == dev->fd) -+ panic("Write fault on a backing page of ubd " -+ "device %d\n", i); -+ -+ /* So, we do the write, copying the backing data to the COW -+ * file... -+ */ -+ -+ err = os_seek_file(dev->fd, offset + dev->cow.data_offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of ubd " -+ "device %d, err = %d", -+ offset + dev->cow.data_offset, i, -err); -+ -+ n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); -+ if(n != PAGE_SIZE) -+ panic("Couldn't copy data to COW file of ubd " -+ "device %d, err = %d", i, -n); -+ -+ /* ... updating the COW bitmap... */ -+ -+ cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, -+ dev->cow.bitmap, dev->cow.bitmap_offset, -+ new_bitmap, dev->cow.bitmap_len); -+ -+ err = os_seek_file(dev->fd, bitmap_offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of ubd " -+ "device %d, err = %d", bitmap_offset, i, -err); -+ -+ n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); -+ if(n != sizeof(new_bitmap)) -+ panic("Couldn't update bitmap of ubd device %d, " -+ "err = %d", i, -n); -+ -+ /* Maybe we can map the COW page in, and maybe we can't. If -+ * it is a pre-V3 COW file, we can't, since the alignment will -+ * be wrong. If it is a V3 or later COW file which has been -+ * moved to a system with a larger page size, then maybe we -+ * can't, depending on the exact location of the page. -+ */ -+ -+ offset += dev->cow.data_offset; -+ -+ /* Remove the remapping, putting the original anonymous page -+ * back. If the COW file can be mapped in, that is done. -+ * Otherwise, the COW page is read in. -+ */ -+ -+ if(!physmem_remove_mapping((void *) address)) -+ panic("Address 0x%lx not remapped by ubd device %d", -+ address, i); -+ if((offset % UBD_MMAP_BLOCK_SIZE) == 0) -+ physmem_subst_mapping((void *) address, dev->fd, -+ offset, 1); -+ else { -+ err = os_seek_file(dev->fd, offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of " -+ "ubd device %d, err = %d", offset, i, -+ -err); -+ -+ n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); -+ if(n != PAGE_SIZE) -+ panic("Failed to read page from offset %llx of " -+ "COW file of ubd device %d, err = %d", -+ offset, i, -n); -+ } -+ -+ return(1); -+ } -+ -+ /* It's not a write on a ubd device */ -+ return(0); -+} -+ -+static struct remapper ubd_remapper = { -+ .list = LIST_HEAD_INIT(ubd_remapper.list), -+ .proc = ubd_check_remapped, -+}; -+ -+static int ubd_remapper_setup(void) -+{ -+ if(ubd_do_mmap) -+ register_remapper(&ubd_remapper); -+ -+ return(0); -+} -+ -+__initcall(ubd_remapper_setup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ubd_user.c um/arch/um/drivers/ubd_user.c ---- orig/arch/um/drivers/ubd_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/ubd_user.c 2004-01-10 06:24:52.000000000 -0500 -@@ -0,0 +1,377 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "asm/types.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "ubd_user.h" -+#include "os.h" -+#include "cow.h" -+ -+#include -+#include -+ -+static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) -+{ -+ struct uml_stat buf1, buf2; -+ int err; -+ -+ if(from_cmdline == NULL) return(1); -+ if(!strcmp(from_cmdline, from_cow)) return(1); -+ -+ err = os_stat_file(from_cmdline, &buf1); -+ if(err < 0){ -+ printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); -+ return(1); -+ } -+ err = os_stat_file(from_cow, &buf2); -+ if(err < 0){ -+ printk("Couldn't stat '%s', err = %d\n", from_cow, -err); -+ return(1); -+ } -+ if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) -+ return(1); -+ -+ printk("Backing file mismatch - \"%s\" requested,\n" -+ "\"%s\" specified in COW header of \"%s\"\n", -+ from_cmdline, from_cow, cow); -+ return(0); -+} -+ -+static int backing_file_mismatch(char *file, __u64 size, time_t mtime) -+{ -+ unsigned long modtime; -+ long long actual; -+ int err; -+ -+ err = os_file_modtime(file, &modtime); -+ if(err < 0){ -+ printk("Failed to get modification time of backing file " -+ "\"%s\", err = %d\n", file, -err); -+ return(err); -+ } -+ -+ err = os_file_size(file, &actual); -+ if(err < 0){ -+ printk("Failed to get size of backing file \"%s\", " -+ "err = %d\n", file, -err); -+ return(err); -+ } -+ -+ if(actual != size){ -+ printk("Size mismatch (%ld vs %ld) of COW header vs backing " -+ "file\n", size, actual); -+ return(-EINVAL); -+ } -+ if(modtime != mtime){ -+ printk("mtime mismatch (%ld vs %ld) of COW header vs backing " -+ "file\n", mtime, modtime); -+ return(-EINVAL); -+ } -+ return(0); -+} -+ -+int read_cow_bitmap(int fd, void *buf, int offset, int len) -+{ -+ int err; -+ -+ err = os_seek_file(fd, offset); -+ if(err < 0) -+ return(err); -+ -+ err = os_read_file(fd, buf, len); -+ if(err < 0) -+ return(err); -+ -+ return(0); -+} -+ -+int open_ubd_file(char *file, struct openflags *openflags, -+ char **backing_file_out, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out, -+ int *create_cow_out) -+{ -+ time_t mtime; -+ __u64 size; -+ __u32 version, align; -+ char *backing_file; -+ int fd, err, sectorsize, same, mode = 0644; -+ -+ fd = os_open_file(file, *openflags, mode); -+ if(fd < 0){ -+ if((fd == -ENOENT) && (create_cow_out != NULL)) -+ *create_cow_out = 1; -+ if(!openflags->w || -+ ((errno != EROFS) && (errno != EACCES))) return(-errno); -+ openflags->w = 0; -+ fd = os_open_file(file, *openflags, mode); -+ if(fd < 0) -+ return(fd); -+ } -+ -+ err = os_lock_file(fd, openflags->w); -+ if(err < 0){ -+ printk("Failed to lock '%s', err = %d\n", file, -err); -+ goto out_close; -+ } -+ -+ if(backing_file_out == NULL) return(fd); -+ -+ err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, -+ &size, §orsize, &align, bitmap_offset_out); -+ if(err && (*backing_file_out != NULL)){ -+ printk("Failed to read COW header from COW file \"%s\", " -+ "errno = %d\n", file, -err); -+ goto out_close; -+ } -+ if(err) return(fd); -+ -+ if(backing_file_out == NULL) return(fd); -+ -+ same = same_backing_files(*backing_file_out, backing_file, file); -+ -+ if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ -+ printk("Switching backing file to '%s'\n", *backing_file_out); -+ err = write_cow_header(file, fd, *backing_file_out, -+ sectorsize, align, &size); -+ if(err){ -+ printk("Switch failed, errno = %d\n", -err); -+ return(err); -+ } -+ } -+ else { -+ *backing_file_out = backing_file; -+ err = backing_file_mismatch(*backing_file_out, size, mtime); -+ if(err) goto out_close; -+ } -+ -+ cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, -+ bitmap_len_out, data_offset_out); -+ -+ return(fd); -+ out_close: -+ os_close_file(fd); -+ return(err); -+} -+ -+int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, -+ int sectorsize, int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out) -+{ -+ int err, fd; -+ -+ flags.c = 1; -+ fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); -+ if(fd < 0){ -+ err = fd; -+ printk("Open of COW file '%s' failed, errno = %d\n", cow_file, -+ -err); -+ goto out; -+ } -+ -+ err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, -+ bitmap_offset_out, bitmap_len_out, -+ data_offset_out); -+ if(!err) -+ return(fd); -+ -+ os_close_file(fd); -+ out: -+ return(err); -+} -+ -+/* XXX Just trivial wrappers around os_read_file and os_write_file */ -+int read_ubd_fs(int fd, void *buffer, int len) -+{ -+ return(os_read_file(fd, buffer, len)); -+} -+ -+int write_ubd_fs(int fd, char *buffer, int len) -+{ -+ return(os_write_file(fd, buffer, len)); -+} -+ -+static int update_bitmap(struct io_thread_req *req) -+{ -+ int n; -+ -+ if(req->cow_offset == -1) -+ return(0); -+ -+ n = os_seek_file(req->fds[1], req->cow_offset); -+ if(n < 0){ -+ printk("do_io - bitmap lseek failed : err = %d\n", -n); -+ return(1); -+ } -+ -+ n = os_write_file(req->fds[1], &req->bitmap_words, -+ sizeof(req->bitmap_words)); -+ if(n != sizeof(req->bitmap_words)){ -+ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, -+ req->fds[1]); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+void do_io(struct io_thread_req *req) -+{ -+ char *buf; -+ unsigned long len; -+ int n, nsectors, start, end, bit; -+ int err; -+ __u64 off; -+ -+ if(req->op == UBD_MMAP){ -+ /* Touch the page to force the host to do any necessary IO to -+ * get it into memory -+ */ -+ n = *((volatile int *) req->buffer); -+ req->error = update_bitmap(req); -+ return; -+ } -+ -+ nsectors = req->length / req->sectorsize; -+ start = 0; -+ do { -+ bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); -+ end = start; -+ while((end < nsectors) && -+ (ubd_test_bit(end, (unsigned char *) -+ &req->sector_mask) == bit)) -+ end++; -+ -+ off = req->offset + req->offsets[bit] + -+ start * req->sectorsize; -+ len = (end - start) * req->sectorsize; -+ buf = &req->buffer[start * req->sectorsize]; -+ -+ err = os_seek_file(req->fds[bit], off); -+ if(err < 0){ -+ printk("do_io - lseek failed : err = %d\n", -err); -+ req->error = 1; -+ return; -+ } -+ if(req->op == UBD_READ){ -+ n = 0; -+ do { -+ buf = &buf[n]; -+ len -= n; -+ n = os_read_file(req->fds[bit], buf, len); -+ if (n < 0) { -+ printk("do_io - read failed, err = %d " -+ "fd = %d\n", -n, req->fds[bit]); -+ req->error = 1; -+ return; -+ } -+ } while((n < len) && (n != 0)); -+ if (n < len) memset(&buf[n], 0, len - n); -+ } -+ else { -+ n = os_write_file(req->fds[bit], buf, len); -+ if(n != len){ -+ printk("do_io - write failed err = %d " -+ "fd = %d\n", -n, req->fds[bit]); -+ req->error = 1; -+ return; -+ } -+ } -+ -+ start = end; -+ } while(start < nsectors); -+ -+ req->error = update_bitmap(req); -+} -+ -+/* Changed in start_io_thread, which is serialized by being called only -+ * from ubd_init, which is an initcall. -+ */ -+int kernel_fd = -1; -+ -+/* Only changed by the io thread */ -+int io_count = 0; -+ -+int io_thread(void *arg) -+{ -+ struct io_thread_req req; -+ int n; -+ -+ signal(SIGWINCH, SIG_IGN); -+ while(1){ -+ n = os_read_file(kernel_fd, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ if(n < 0) -+ printk("io_thread - read failed, fd = %d, " -+ "err = %d\n", kernel_fd, -n); -+ else { -+ printk("io_thread - short read, fd = %d, " -+ "length = %d\n", kernel_fd, n); -+ } -+ continue; -+ } -+ io_count++; -+ do_io(&req); -+ n = os_write_file(kernel_fd, &req, sizeof(req)); -+ if(n != sizeof(req)) -+ printk("io_thread - write failed, fd = %d, err = %d\n", -+ kernel_fd, -n); -+ } -+} -+ -+int start_io_thread(unsigned long sp, int *fd_out) -+{ -+ int pid, fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err < 0){ -+ printk("start_io_thread - os_pipe failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ kernel_fd = fds[0]; -+ *fd_out = fds[1]; -+ -+ pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, -+ NULL); -+ if(pid < 0){ -+ printk("start_io_thread - clone failed : errno = %d\n", errno); -+ goto out_close; -+ } -+ -+ return(pid); -+ -+ out_close: -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ kernel_fd = -1; -+ *fd_out = -1; -+ out: -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/xterm.c um/arch/um/drivers/xterm.c ---- orig/arch/um/drivers/xterm.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/xterm.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,201 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "kern_util.h" -+#include "chan_user.h" -+#include "helper.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+#include "xterm.h" -+ -+struct xterm_chan { -+ int pid; -+ int helper_pid; -+ char *title; -+ int device; -+ int raw; -+ struct termios tt; -+ unsigned long stack; -+ int direct_rcv; -+}; -+ -+void *xterm_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct xterm_chan *data; -+ -+ data = malloc(sizeof(*data)); -+ if(data == NULL) return(NULL); -+ *data = ((struct xterm_chan) { .pid = -1, -+ .helper_pid = -1, -+ .device = device, -+ .title = opts->xterm_title, -+ .raw = opts->raw, -+ .stack = opts->tramp_stack, -+ .direct_rcv = !opts->in_kernel } ); -+ return(data); -+} -+ -+/* Only changed by xterm_setup, which is a setup */ -+static char *terminal_emulator = "xterm"; -+static char *title_switch = "-T"; -+static char *exec_switch = "-e"; -+ -+static int __init xterm_setup(char *line, int *add) -+{ -+ *add = 0; -+ terminal_emulator = line; -+ -+ line = strchr(line, ','); -+ if(line == NULL) return(0); -+ *line++ = '\0'; -+ if(*line) title_switch = line; -+ -+ line = strchr(line, ','); -+ if(line == NULL) return(0); -+ *line++ = '\0'; -+ if(*line) exec_switch = line; -+ -+ return(0); -+} -+ -+__uml_setup("xterm=", xterm_setup, -+"xterm=,,<exec switch>\n" -+" Specifies an alternate terminal emulator to use for the debugger,\n" -+" consoles, and serial lines when they are attached to the xterm channel.\n" -+" The values are the terminal emulator binary, the switch it uses to set\n" -+" its title, and the switch it uses to execute a subprocess,\n" -+" respectively. The title switch must have the form '<switch> title',\n" -+" not '<switch>=title'. Similarly, the exec switch must have the form\n" -+" '<switch> command arg1 arg2 ...'.\n" -+" The default values are 'xterm=xterm,-T,-e'. Values for gnome-terminal\n" -+" are 'xterm=gnome-terminal,-t,-x'.\n\n" -+); -+ -+int xterm_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct xterm_chan *data = d; -+ unsigned long stack; -+ int pid, fd, new, err; -+ char title[256], file[] = "/tmp/xterm-pipeXXXXXX"; -+ char *argv[] = { terminal_emulator, title_switch, title, exec_switch, -+ "/usr/lib/uml/port-helper", "-uml-socket", -+ file, NULL }; -+ -+ if(os_access(argv[4], OS_ACC_X_OK) < 0) -+ argv[4] = "port-helper"; -+ -+ fd = mkstemp(file); -+ if(fd < 0){ -+ printk("xterm_open : mkstemp failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(unlink(file)){ -+ printk("xterm_open : unlink failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ os_close_file(fd); -+ -+ fd = os_create_unix_socket(file, sizeof(file), 1); -+ if(fd < 0){ -+ printk("xterm_open : create_unix_socket failed, errno = %d\n", -+ -fd); -+ return(fd); -+ } -+ -+ sprintf(title, data->title, data->device); -+ stack = data->stack; -+ pid = run_helper(NULL, NULL, argv, &stack); -+ if(pid < 0){ -+ printk("xterm_open : run_helper failed, errno = %d\n", -pid); -+ return(pid); -+ } -+ -+ if(data->stack == 0) free_stack(stack, 0); -+ -+ if(data->direct_rcv) -+ new = os_rcv_fd(fd, &data->helper_pid); -+ else { -+ err = os_set_fd_block(fd, 0); -+ if(err < 0){ -+ printk("xterm_open : failed to set descriptor " -+ "non-blocking, err = %d\n", -err); -+ return(err); -+ } -+ new = xterm_fd(fd, &data->helper_pid); -+ } -+ if(new < 0){ -+ printk("xterm_open : os_rcv_fd failed, err = %d\n", -new); -+ goto out; -+ } -+ -+ tcgetattr(new, &data->tt); -+ if(data->raw) raw(new, 0); -+ -+ data->pid = pid; -+ *dev_out = NULL; -+ out: -+ unlink(file); -+ return(new); -+} -+ -+void xterm_close(int fd, void *d) -+{ -+ struct xterm_chan *data = d; -+ -+ if(data->pid != -1) -+ os_kill_process(data->pid, 1); -+ data->pid = -1; -+ if(data->helper_pid != -1) -+ os_kill_process(data->helper_pid, 0); -+ data->helper_pid = -1; -+ os_close_file(fd); -+} -+ -+void xterm_free(void *d) -+{ -+ free(d); -+} -+ -+int xterm_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct xterm_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops xterm_ops = { -+ .type = "xterm", -+ .init = xterm_init, -+ .open = xterm_open, -+ .close = xterm_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = xterm_console_write, -+ .window_size = generic_window_size, -+ .free = xterm_free, -+ .winch = 1, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/xterm.h um/arch/um/drivers/xterm.h ---- orig/arch/um/drivers/xterm.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/xterm.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __XTERM_H__ -+#define __XTERM_H__ -+ -+extern int xterm_fd(int socket, int *pid_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/xterm_kern.c um/arch/um/drivers/xterm_kern.c ---- orig/arch/um/drivers/xterm_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/drivers/xterm_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/errno.h" -+#include "linux/slab.h" -+#include "asm/semaphore.h" -+#include "asm/irq.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+#include "kern_util.h" -+#include "os.h" -+#include "xterm.h" -+ -+struct xterm_wait { -+ struct semaphore sem; -+ int fd; -+ int pid; -+ int new_fd; -+}; -+ -+static void xterm_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct xterm_wait *xterm = data; -+ int fd; -+ -+ fd = os_rcv_fd(xterm->fd, &xterm->pid); -+ if(fd == -EAGAIN) -+ return; -+ -+ xterm->new_fd = fd; -+ up(&xterm->sem); -+} -+ -+int xterm_fd(int socket, int *pid_out) -+{ -+ struct xterm_wait *data; -+ int err, ret; -+ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL){ -+ printk(KERN_ERR "xterm_fd : failed to allocate xterm_wait\n"); -+ return(-ENOMEM); -+ } -+ *data = ((struct xterm_wait) -+ { .sem = __SEMAPHORE_INITIALIZER(data->sem, 0), -+ .fd = socket, -+ .pid = -1, -+ .new_fd = -1 }); -+ -+ err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "xterm", data); -+ if(err){ -+ printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " -+ "err = %d\n", err); -+ ret = err; -+ goto out; -+ } -+ down(&data->sem); -+ -+ free_irq(XTERM_IRQ, data); -+ -+ ret = data->new_fd; -+ *pid_out = data->pid; -+ out: -+ kfree(data); -+ -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/dyn_link.ld.in um/arch/um/dyn_link.ld.in ---- orig/arch/um/dyn_link.ld.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/dyn_link.ld.in 2004-01-10 04:28:44.000000000 -0500 -@@ -0,0 +1,171 @@ -+OUTPUT_FORMAT("ELF_FORMAT") -+OUTPUT_ARCH(ELF_ARCH) -+ENTRY(_start) -+SEARCH_DIR("/usr/local/i686-pc-linux-gnu/lib"); SEARCH_DIR("/usr/local/lib"); SEARCH_DIR("/lib"); SEARCH_DIR("/usr/lib"); -+/* Do we need any of these for elf? -+ __DYNAMIC = 0; */ -+SECTIONS -+{ -+ . = START() + SIZEOF_HEADERS; -+ .interp : { *(.interp) } -+ __binary_start = .; -+ . = ALIGN(4096); /* Init code and data */ -+ _stext = .; -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ -+ . = ALIGN(4096); -+ -+ /* Read-only sections, merged into text segment: */ -+ .hash : { *(.hash) } -+ .dynsym : { *(.dynsym) } -+ .dynstr : { *(.dynstr) } -+ .gnu.version : { *(.gnu.version) } -+ .gnu.version_d : { *(.gnu.version_d) } -+ .gnu.version_r : { *(.gnu.version_r) } -+ .rel.init : { *(.rel.init) } -+ .rela.init : { *(.rela.init) } -+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) } -+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) } -+ .rel.fini : { *(.rel.fini) } -+ .rela.fini : { *(.rela.fini) } -+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) } -+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) } -+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) } -+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) } -+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } -+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } -+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } -+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } -+ .rel.ctors : { *(.rel.ctors) } -+ .rela.ctors : { *(.rela.ctors) } -+ .rel.dtors : { *(.rel.dtors) } -+ .rela.dtors : { *(.rela.dtors) } -+ .rel.got : { *(.rel.got) } -+ .rela.got : { *(.rela.got) } -+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } -+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } -+ .rel.plt : { *(.rel.plt) } -+ .rela.plt : { *(.rela.plt) } -+ .init : { -+ KEEP (*(.init)) -+ } =0x90909090 -+ .plt : { *(.plt) } -+ .text : { -+ *(.text .stub .text.* .gnu.linkonce.t.*) -+ /* .gnu.warning sections are handled specially by elf32.em. */ -+ *(.gnu.warning) -+ } =0x90909090 -+ .fini : { -+ KEEP (*(.fini)) -+ } =0x90909090 -+ -+ PROVIDE (__etext = .); -+ PROVIDE (_etext = .); -+ PROVIDE (etext = .); -+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } -+ .rodata1 : { *(.rodata1) } -+ .eh_frame_hdr : { *(.eh_frame_hdr) } -+ -+ -+ . = ALIGN(4096); -+ PROVIDE (_sdata = .); -+ -+include(`arch/um/common.ld.in') -+ -+ /* Ensure the __preinit_array_start label is properly aligned. We -+ could instead move the label definition inside the section, but -+ the linker would then create the section even if it turns out to -+ be empty, which isn't pretty. */ -+ . = ALIGN(32 / 8); -+ .preinit_array : { *(.preinit_array) } -+ .init_array : { *(.init_array) } -+ .fini_array : { *(.fini_array) } -+ .data : { -+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ -+ *(.data.init_task) -+ *(.data .data.* .gnu.linkonce.d.*) -+ SORT(CONSTRUCTORS) -+ } -+ .data1 : { *(.data1) } -+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } -+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } -+ .eh_frame : { KEEP (*(.eh_frame)) } -+ .gcc_except_table : { *(.gcc_except_table) } -+ .dynamic : { *(.dynamic) } -+ .ctors : { -+ /* gcc uses crtbegin.o to find the start of -+ the constructors, so we make sure it is -+ first. Because this is a wildcard, it -+ doesn't matter if the user does not -+ actually link against crtbegin.o; the -+ linker won't look for a file to match a -+ wildcard. The wildcard also means that it -+ doesn't matter which directory crtbegin.o -+ is in. */ -+ KEEP (*crtbegin.o(.ctors)) -+ /* We don't want to include the .ctor section from -+ from the crtend.o file until after the sorted ctors. -+ The .ctor section from the crtend file contains the -+ end of ctors marker and it must be last */ -+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors)) -+ KEEP (*(SORT(.ctors.*))) -+ KEEP (*(.ctors)) -+ } -+ .dtors : { -+ KEEP (*crtbegin.o(.dtors)) -+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors)) -+ KEEP (*(SORT(.dtors.*))) -+ KEEP (*(.dtors)) -+ } -+ .jcr : { KEEP (*(.jcr)) } -+ .got : { *(.got.plt) *(.got) } -+ _edata = .; -+ PROVIDE (edata = .); -+ __bss_start = .; -+ .bss : { -+ *(.dynbss) -+ *(.bss .bss.* .gnu.linkonce.b.*) -+ *(COMMON) -+ /* Align here to ensure that the .bss section occupies space up to -+ _end. Align after .bss to ensure correct alignment even if the -+ .bss section disappears because there are no input sections. */ -+ . = ALIGN(32 / 8); -+ . = ALIGN(32 / 8); -+ } -+ _end = .; -+ PROVIDE (end = .); -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+ /* DWARF debug sections. -+ Symbols in the DWARF debugging sections are relative to the beginning -+ of the section so we begin them at 0. */ -+ /* DWARF 1 */ -+ .debug 0 : { *(.debug) } -+ .line 0 : { *(.line) } -+ /* GNU DWARF 1 extensions */ -+ .debug_srcinfo 0 : { *(.debug_srcinfo) } -+ .debug_sfnames 0 : { *(.debug_sfnames) } -+ /* DWARF 1.1 and DWARF 2 */ -+ .debug_aranges 0 : { *(.debug_aranges) } -+ .debug_pubnames 0 : { *(.debug_pubnames) } -+ /* DWARF 2 */ -+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } -+ .debug_abbrev 0 : { *(.debug_abbrev) } -+ .debug_line 0 : { *(.debug_line) } -+ .debug_frame 0 : { *(.debug_frame) } -+ .debug_str 0 : { *(.debug_str) } -+ .debug_loc 0 : { *(.debug_loc) } -+ .debug_macinfo 0 : { *(.debug_macinfo) } -+ /* SGI/MIPS DWARF 2 extensions */ -+ .debug_weaknames 0 : { *(.debug_weaknames) } -+ .debug_funcnames 0 : { *(.debug_funcnames) } -+ .debug_typenames 0 : { *(.debug_typenames) } -+ .debug_varnames 0 : { *(.debug_varnames) } -+} -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/hostfs.h um/arch/um/fs/hostfs/hostfs.h ---- orig/arch/um/fs/hostfs/hostfs.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/fs/hostfs/hostfs.h 2003-12-17 10:52:50.000000000 -0500 -@@ -0,0 +1,69 @@ -+#ifndef __UM_FS_HOSTFS -+#define __UM_FS_HOSTFS -+ -+#include "os.h" -+ -+/* These are exactly the same definitions as in fs.h, but the names are -+ * changed so that this file can be included in both kernel and user files. -+ */ -+ -+#define HOSTFS_ATTR_MODE 1 -+#define HOSTFS_ATTR_UID 2 -+#define HOSTFS_ATTR_GID 4 -+#define HOSTFS_ATTR_SIZE 8 -+#define HOSTFS_ATTR_ATIME 16 -+#define HOSTFS_ATTR_MTIME 32 -+#define HOSTFS_ATTR_CTIME 64 -+#define HOSTFS_ATTR_ATIME_SET 128 -+#define HOSTFS_ATTR_MTIME_SET 256 -+#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ -+#define HOSTFS_ATTR_ATTR_FLAG 1024 -+ -+struct hostfs_iattr { -+ unsigned int ia_valid; -+ mode_t ia_mode; -+ uid_t ia_uid; -+ gid_t ia_gid; -+ loff_t ia_size; -+ time_t ia_atime; -+ time_t ia_mtime; -+ time_t ia_ctime; -+ unsigned int ia_attr_flags; -+}; -+ -+extern int stat_file(const char *path, int *dev_out, -+ unsigned long long *inode_out, int *mode_out, -+ int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, unsigned long *atime_out, -+ unsigned long *mtime_out, unsigned long *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out); -+extern int access_file(char *path, int r, int w, int x); -+extern int open_file(char *path, int r, int w, int append); -+extern int file_type(const char *path, int *rdev); -+extern void *open_dir(char *path, int *err_out); -+extern char *read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out); -+extern void close_file(void *stream); -+extern void close_dir(void *stream); -+extern int read_file(int fd, unsigned long long *offset, char *buf, int len); -+extern int write_file(int fd, unsigned long long *offset, const char *buf, -+ int len); -+extern int lseek_file(int fd, long long offset, int whence); -+extern int file_create(char *name, int ur, int uw, int ux, int gr, -+ int gw, int gx, int or, int ow, int ox); -+extern int set_attr(const char *file, struct hostfs_iattr *attrs); -+extern int make_symlink(const char *from, const char *to); -+extern int unlink_file(const char *file); -+extern int do_mkdir(const char *file, int mode); -+extern int do_rmdir(const char *file); -+extern int do_mknod(const char *file, int mode, int dev); -+extern int link_file(const char *from, const char *to); -+extern int do_readlink(char *file, char *buf, int size); -+extern int rename_file(char *from, char *to); -+extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/hostfs_kern.c um/arch/um/fs/hostfs/hostfs_kern.c ---- orig/arch/um/fs/hostfs/hostfs_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/fs/hostfs/hostfs_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,965 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/stddef.h> -+#include <linux/fs.h> -+#include <linux/version.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <linux/pagemap.h> -+#include <linux/blkdev.h> -+#include <asm/uaccess.h> -+#include "hostfs.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "user_util.h" -+#include "2_5compat.h" -+#include "init.h" -+ -+#define file_hostfs_i(file) (&(file)->f_dentry->d_inode->u.hostfs_i) -+ -+int hostfs_d_delete(struct dentry *dentry) -+{ -+ return(1); -+} -+ -+struct dentry_operations hostfs_dentry_ops = { -+ .d_delete = hostfs_d_delete, -+}; -+ -+#define DEFAULT_ROOT "/" -+ -+/* Changed in hostfs_args before the kernel starts running */ -+static char *jail_dir = NULL; -+static int append = 0; -+ -+#define HOSTFS_SUPER_MAGIC 0x00c0ffee -+ -+static struct inode_operations hostfs_iops; -+static struct inode_operations hostfs_dir_iops; -+static struct address_space_operations hostfs_link_aops; -+ -+static int __init hostfs_args(char *options, int *add) -+{ -+ char *ptr; -+ -+ ptr = strchr(options, ','); -+ if(ptr != NULL) -+ *ptr++ = '\0'; -+ if(*options != '\0') -+ jail_dir = options; -+ -+ options = ptr; -+ while(options){ -+ ptr = strchr(options, ','); -+ if(ptr != NULL) -+ *ptr++ = '\0'; -+ if(*options != '\0'){ -+ if(!strcmp(options, "append")) -+ append = 1; -+ else printf("hostfs_args - unsupported option - %s\n", -+ options); -+ } -+ options = ptr; -+ } -+ return(0); -+} -+ -+__uml_setup("hostfs=", hostfs_args, -+"hostfs=<root dir>,<flags>,...\n" -+" This is used to set hostfs parameters. The root directory argument\n" -+" is used to confine all hostfs mounts to within the specified directory\n" -+" tree on the host. If this isn't specified, then a user inside UML can\n" -+" mount anything on the host that's accessible to the user that's running\n" -+" it.\n" -+" The only flag currently supported is 'append', which specifies that all\n" -+" files opened by hostfs will be opened in append mode.\n\n" -+); -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *root, *name; -+ int len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ root = parent->d_inode->u.hostfs_i.host_filename; -+ len += strlen(root); -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len -= parent->d_name.len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], parent->d_name.name, -+ parent->d_name.len); -+ parent = parent->d_parent; -+ } -+ strncpy(name, root, strlen(root)); -+ return(name); -+} -+ -+static char *inode_name(struct inode *ino, int extra) -+{ -+ struct dentry *dentry; -+ -+ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); -+ return(dentry_name(dentry, extra)); -+} -+ -+static int read_name(struct inode *ino, char *name) -+{ -+ /* The non-int inode fields are copied into ints by stat_file and -+ * then copied into the inode because passing the actual pointers -+ * in and having them treated as int * breaks on big-endian machines -+ */ -+ int err; -+ int i_dev, i_mode, i_nlink, i_blksize; -+ unsigned long long i_size; -+ unsigned long long i_ino; -+ unsigned long long i_blocks; -+ err = stat_file(name, &i_dev, &i_ino, &i_mode, &i_nlink, -+ &ino->i_uid, &ino->i_gid, &i_size, &ino->i_atime, -+ &ino->i_mtime, &ino->i_ctime, &i_blksize, &i_blocks); -+ if(err) return(err); -+ ino->i_ino = i_ino; -+ ino->i_dev = i_dev; -+ ino->i_mode = i_mode; -+ ino->i_nlink = i_nlink; -+ ino->i_size = i_size; -+ ino->i_blksize = i_blksize; -+ ino->i_blocks = i_blocks; -+ if(kdev_same(ino->i_sb->s_dev, ROOT_DEV) && (ino->i_uid == getuid())) -+ ino->i_uid = 0; -+ return(0); -+} -+ -+static char *follow_link(char *link) -+{ -+ int len, n; -+ char *name, *resolved, *end; -+ -+ len = 64; -+ while(1){ -+ n = -ENOMEM; -+ name = kmalloc(len, GFP_KERNEL); -+ if(name == NULL) -+ goto out; -+ -+ n = do_readlink(link, name, len); -+ if(n < len) -+ break; -+ len *= 2; -+ kfree(name); -+ } -+ if(n < 0) -+ goto out_free; -+ -+ if(*name == '/') -+ return(name); -+ -+ end = strrchr(link, '/'); -+ if(end == NULL) -+ return(name); -+ -+ *(end + 1) = '\0'; -+ len = strlen(link) + strlen(name) + 1; -+ -+ resolved = kmalloc(len, GFP_KERNEL); -+ if(resolved == NULL){ -+ n = -ENOMEM; -+ goto out_free; -+ } -+ -+ sprintf(resolved, "%s%s", link, name); -+ kfree(name); -+ kfree(link); -+ return(resolved); -+ -+ out_free: -+ kfree(name); -+ out: -+ return(ERR_PTR(n)); -+} -+ -+static int read_inode(struct inode *ino) -+{ -+ char *name; -+ int err; -+ -+ err = -ENOMEM; -+ name = inode_name(ino, 0); -+ if(name == NULL) -+ goto out; -+ -+ if(file_type(name, NULL) == OS_TYPE_SYMLINK){ -+ name = follow_link(name); -+ if(IS_ERR(name)){ -+ err = PTR_ERR(name); -+ goto out; -+ } -+ } -+ -+ err = read_name(ino, name); -+ kfree(name); -+ out: -+ return(err); -+} -+ -+void hostfs_delete_inode(struct inode *ino) -+{ -+ if(ino->u.hostfs_i.host_filename) -+ kfree(ino->u.hostfs_i.host_filename); -+ ino->u.hostfs_i.host_filename = NULL; -+ -+ if(ino->u.hostfs_i.fd != -1) -+ close_file(&ino->u.hostfs_i.fd); -+ -+ ino->u.hostfs_i.mode = 0; -+ clear_inode(ino); -+} -+ -+int hostfs_statfs(struct super_block *sb, struct statfs *sf) -+{ -+ /* do_statfs uses struct statfs64 internally, but the linux kernel -+ * struct statfs still has 32-bit versions for most of these fields, -+ * so we convert them here -+ */ -+ int err; -+ long long f_blocks; -+ long long f_bfree; -+ long long f_bavail; -+ long long f_files; -+ long long f_ffree; -+ -+ err = do_statfs(sb->s_root->d_inode->u.hostfs_i.host_filename, -+ &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, -+ &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), -+ &sf->f_namelen, sf->f_spare); -+ if(err) return(err); -+ sf->f_blocks = f_blocks; -+ sf->f_bfree = f_bfree; -+ sf->f_bavail = f_bavail; -+ sf->f_files = f_files; -+ sf->f_ffree = f_ffree; -+ sf->f_type = HOSTFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct super_operations hostfs_sbops = { -+ .put_inode = force_delete, -+ .delete_inode = hostfs_delete_inode, -+ .statfs = hostfs_statfs, -+}; -+ -+int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ void *dir; -+ char *name; -+ unsigned long long next, ino; -+ int error, len; -+ -+ name = dentry_name(file->f_dentry, 0); -+ if(name == NULL) return(-ENOMEM); -+ dir = open_dir(name, &error); -+ kfree(name); -+ if(dir == NULL) return(-error); -+ next = file->f_pos; -+ while((name = read_dir(dir, &next, &ino, &len)) != NULL){ -+ error = (*filldir)(ent, name, len, file->f_pos, -+ ino, DT_UNKNOWN); -+ if(error) break; -+ file->f_pos = next; -+ } -+ close_dir(dir); -+ return(0); -+} -+ -+int hostfs_file_open(struct inode *ino, struct file *file) -+{ -+ char *name; -+ int mode = 0, r = 0, w = 0, fd; -+ -+ mode = file->f_mode & (FMODE_READ | FMODE_WRITE); -+ if((mode & ino->u.hostfs_i.mode) == mode) -+ return(0); -+ -+ /* The file may already have been opened, but with the wrong access, -+ * so this resets things and reopens the file with the new access. -+ */ -+ if(ino->u.hostfs_i.fd != -1){ -+ close_file(&ino->u.hostfs_i.fd); -+ ino->u.hostfs_i.fd = -1; -+ } -+ -+ ino->u.hostfs_i.mode |= mode; -+ if(ino->u.hostfs_i.mode & FMODE_READ) -+ r = 1; -+ if(ino->u.hostfs_i.mode & FMODE_WRITE) -+ w = 1; -+ if(w) -+ r = 1; -+ -+ name = dentry_name(file->f_dentry, 0); -+ if(name == NULL) -+ return(-ENOMEM); -+ -+ fd = open_file(name, r, w, append); -+ kfree(name); -+ if(fd < 0) return(fd); -+ file_hostfs_i(file)->fd = fd; -+ -+ return(0); -+} -+ -+int hostfs_dir_open(struct inode *ino, struct file *file) -+{ -+ return(0); -+} -+ -+int hostfs_dir_release(struct inode *ino, struct file *file) -+{ -+ return(0); -+} -+ -+int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ return(0); -+} -+ -+static struct file_operations hostfs_file_fops = { -+ .owner = NULL, -+ .read = generic_file_read, -+ .write = generic_file_write, -+ .mmap = generic_file_mmap, -+ .open = hostfs_file_open, -+ .release = NULL, -+ .fsync = hostfs_fsync, -+}; -+ -+static struct file_operations hostfs_dir_fops = { -+ .owner = NULL, -+ .readdir = hostfs_readdir, -+ .open = hostfs_dir_open, -+ .release = hostfs_dir_release, -+ .fsync = hostfs_fsync, -+}; -+ -+int hostfs_writepage(struct page *page) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ char *buffer; -+ unsigned long long base; -+ int count = PAGE_CACHE_SIZE; -+ int end_index = inode->i_size >> PAGE_CACHE_SHIFT; -+ int err; -+ -+ if (page->index >= end_index) -+ count = inode->i_size & (PAGE_CACHE_SIZE-1); -+ -+ buffer = kmap(page); -+ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; -+ -+ err = write_file(inode->u.hostfs_i.fd, &base, buffer, count); -+ if(err != count){ -+ ClearPageUptodate(page); -+ goto out; -+ } -+ -+ if (base > inode->i_size) -+ inode->i_size = base; -+ -+ if (PageError(page)) -+ ClearPageError(page); -+ err = 0; -+ -+ out: -+ kunmap(page); -+ -+ UnlockPage(page); -+ return err; -+} -+ -+int hostfs_readpage(struct file *file, struct page *page) -+{ -+ char *buffer; -+ long long start; -+ int err = 0; -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ err = read_file(file_hostfs_i(file)->fd, &start, buffer, -+ PAGE_CACHE_SIZE); -+ if(err < 0) goto out; -+ -+ memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); -+ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) ClearPageError(page); -+ err = 0; -+ out: -+ kunmap(page); -+ UnlockPage(page); -+ return(err); -+} -+ -+int hostfs_prepare_write(struct file *file, struct page *page, -+ unsigned int from, unsigned int to) -+{ -+ char *buffer; -+ long long start, tmp; -+ int err; -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ if(from != 0){ -+ tmp = start; -+ err = read_file(file_hostfs_i(file)->fd, &tmp, buffer, -+ from); -+ if(err < 0) goto out; -+ } -+ if(to != PAGE_CACHE_SIZE){ -+ start += to; -+ err = read_file(file_hostfs_i(file)->fd, &start, buffer + to, -+ PAGE_CACHE_SIZE - to); -+ if(err < 0) goto out; -+ } -+ err = 0; -+ out: -+ kunmap(page); -+ return(err); -+} -+ -+int hostfs_commit_write(struct file *file, struct page *page, unsigned from, -+ unsigned to) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ char *buffer; -+ long long start; -+ int err = 0; -+ -+ start = (long long) (page->index << PAGE_CACHE_SHIFT) + from; -+ buffer = kmap(page); -+ err = write_file(file_hostfs_i(file)->fd, &start, buffer + from, -+ to - from); -+ if(err > 0) err = 0; -+ if(!err && (start > inode->i_size)) -+ inode->i_size = start; -+ -+ kunmap(page); -+ return(err); -+} -+ -+static struct address_space_operations hostfs_aops = { -+ .writepage = hostfs_writepage, -+ .readpage = hostfs_readpage, -+/* .set_page_dirty = __set_page_dirty_nobuffers, */ -+ .prepare_write = hostfs_prepare_write, -+ .commit_write = hostfs_commit_write -+}; -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error) -+{ -+ struct inode *inode; -+ char *name; -+ int type, err = -ENOMEM, rdev; -+ -+ inode = new_inode(sb); -+ if(inode == NULL) -+ goto out; -+ -+ inode->u.hostfs_i.host_filename = NULL; -+ inode->u.hostfs_i.fd = -1; -+ inode->u.hostfs_i.mode = 0; -+ insert_inode_hash(inode); -+ if(dentry){ -+ name = dentry_name(dentry, 0); -+ if(name == NULL){ -+ err = -ENOMEM; -+ goto out_put; -+ } -+ type = file_type(name, &rdev); -+ kfree(name); -+ } -+ else type = OS_TYPE_DIR; -+ inode->i_sb = sb; -+ -+ err = 0; -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_op = &page_symlink_inode_operations; -+ else if(type == OS_TYPE_DIR) -+ inode->i_op = &hostfs_dir_iops; -+ else inode->i_op = &hostfs_iops; -+ -+ if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; -+ else inode->i_fop = &hostfs_file_fops; -+ -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_mapping->a_ops = &hostfs_link_aops; -+ else inode->i_mapping->a_ops = &hostfs_aops; -+ -+ switch (type) { -+ case OS_TYPE_CHARDEV: -+ init_special_inode(inode, S_IFCHR, rdev); -+ break; -+ case OS_TYPE_BLOCKDEV: -+ init_special_inode(inode, S_IFBLK, rdev); -+ break; -+ case OS_TYPE_FIFO: -+ init_special_inode(inode, S_IFIFO, 0); -+ break; -+ case OS_TYPE_SOCK: -+ init_special_inode(inode, S_IFSOCK, 0); -+ break; -+ } -+ -+ if(error) *error = err; -+ return(inode); -+ out_put: -+ make_bad_inode(inode); -+ iput(inode); -+ out: -+ if(error) *error = err; -+ return(NULL); -+} -+ -+int hostfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ struct inode *inode; -+ char *name; -+ int error, fd; -+ -+ inode = get_inode(dir->i_sb, dentry, &error); -+ if(error) return(error); -+ name = dentry_name(dentry, 0); -+ if(name == NULL){ -+ iput(inode); -+ return(-ENOMEM); -+ } -+ fd = file_create(name, -+ mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, -+ mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, -+ mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); -+ if(fd < 0) -+ error = fd; -+ else error = read_name(inode, name); -+ -+ kfree(name); -+ if(error){ -+ iput(inode); -+ return(error); -+ } -+ inode->u.hostfs_i.fd = fd; -+ inode->u.hostfs_i.mode = FMODE_READ | FMODE_WRITE; -+ d_instantiate(dentry, inode); -+ return(0); -+} -+ -+struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry) -+{ -+ struct inode *inode; -+ char *name; -+ int error; -+ -+ inode = get_inode(ino->i_sb, dentry, &error); -+ if(error != 0) return(ERR_PTR(error)); -+ name = dentry_name(dentry, 0); -+ if(name == NULL) return(ERR_PTR(-ENOMEM)); -+ error = read_name(inode, name); -+ kfree(name); -+ if(error){ -+ iput(inode); -+ if(error == -ENOENT) inode = NULL; -+ else return(ERR_PTR(error)); -+ } -+ d_add(dentry, inode); -+ dentry->d_op = &hostfs_dentry_ops; -+ return(NULL); -+} -+ -+static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int len; -+ -+ file = inode_name(ino, dentry->d_name.len + 1); -+ if(file == NULL) return(NULL); -+ strcat(file, "/"); -+ len = strlen(file); -+ strncat(file, dentry->d_name.name, dentry->d_name.len); -+ file[len + dentry->d_name.len] = '\0'; -+ return(file); -+} -+ -+int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) -+{ -+ char *from_name, *to_name; -+ int err; -+ -+ from_name = inode_dentry_name(ino, from); -+ if(from_name == NULL) -+ return(-ENOMEM); -+ to_name = dentry_name(to, 0); -+ if(to_name == NULL){ -+ kfree(from_name); -+ return(-ENOMEM); -+ } -+ err = link_file(to_name, from_name); -+ kfree(from_name); -+ kfree(to_name); -+ return(err); -+} -+ -+int hostfs_unlink(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) return(-ENOMEM); -+ if(append) -+ return(-EPERM); -+ -+ err = unlink_file(file); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) -+{ -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) return(-ENOMEM); -+ err = make_symlink(file, to); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) -+{ -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) return(-ENOMEM); -+ err = do_mkdir(file, mode); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_rmdir(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) return(-ENOMEM); -+ err = do_rmdir(file); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) -+{ -+ struct inode *inode; -+ char *name; -+ int error; -+ -+ inode = get_inode(dir->i_sb, dentry, &error); -+ if(error) return(error); -+ name = dentry_name(dentry, 0); -+ if(name == NULL){ -+ iput(inode); -+ return(-ENOMEM); -+ } -+ init_special_inode(inode, mode, dev); -+ error = do_mknod(name, mode, dev); -+ if(!error) error = read_name(inode, name); -+ kfree(name); -+ if(error){ -+ iput(inode); -+ return(error); -+ } -+ d_instantiate(dentry, inode); -+ return(0); -+} -+ -+int hostfs_rename(struct inode *from_ino, struct dentry *from, -+ struct inode *to_ino, struct dentry *to) -+{ -+ char *from_name, *to_name; -+ int err; -+ -+ from_name = inode_dentry_name(from_ino, from); -+ if(from_name == NULL) -+ return(-ENOMEM); -+ to_name = inode_dentry_name(to_ino, to); -+ if(to_name == NULL){ -+ kfree(from_name); -+ return(-ENOMEM); -+ } -+ err = rename_file(from_name, to_name); -+ kfree(from_name); -+ kfree(to_name); -+ return(err); -+} -+ -+void hostfs_truncate(struct inode *ino) -+{ -+ not_implemented(); -+} -+ -+int hostfs_permission(struct inode *ino, int desired) -+{ -+ char *name; -+ int r = 0, w = 0, x = 0, err; -+ -+ if(desired & MAY_READ) r = 1; -+ if(desired & MAY_WRITE) w = 1; -+ if(desired & MAY_EXEC) x = 1; -+ name = inode_name(ino, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = access_file(name, r, w, x); -+ kfree(name); -+ if(!err) err = vfs_permission(ino, desired); -+ return(err); -+} -+ -+int hostfs_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ struct hostfs_iattr attrs; -+ char *name; -+ int err; -+ -+ if(append) -+ attr->ia_valid &= ~ATTR_SIZE; -+ -+ attrs.ia_valid = 0; -+ if(attr->ia_valid & ATTR_MODE){ -+ attrs.ia_valid |= HOSTFS_ATTR_MODE; -+ attrs.ia_mode = attr->ia_mode; -+ } -+ if(attr->ia_valid & ATTR_UID){ -+ if(kdev_same(dentry->d_inode->i_sb->s_dev, ROOT_DEV) && -+ (attr->ia_uid == 0)) -+ attr->ia_uid = getuid(); -+ attrs.ia_valid |= HOSTFS_ATTR_UID; -+ attrs.ia_uid = attr->ia_uid; -+ } -+ if(attr->ia_valid & ATTR_GID){ -+ if(kdev_same(dentry->d_inode->i_sb->s_dev, ROOT_DEV) && -+ (attr->ia_gid == 0)) -+ attr->ia_gid = getuid(); -+ attrs.ia_valid |= HOSTFS_ATTR_GID; -+ attrs.ia_gid = attr->ia_gid; -+ } -+ if(attr->ia_valid & ATTR_SIZE){ -+ attrs.ia_valid |= HOSTFS_ATTR_SIZE; -+ attrs.ia_size = attr->ia_size; -+ } -+ if(attr->ia_valid & ATTR_ATIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_ATIME; -+ attrs.ia_atime = attr->ia_atime; -+ } -+ if(attr->ia_valid & ATTR_MTIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_MTIME; -+ attrs.ia_mtime = attr->ia_mtime; -+ } -+ if(attr->ia_valid & ATTR_CTIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_CTIME; -+ attrs.ia_ctime = attr->ia_ctime; -+ } -+ if(attr->ia_valid & ATTR_ATIME_SET){ -+ attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; -+ } -+ if(attr->ia_valid & ATTR_MTIME_SET){ -+ attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; -+ } -+ name = dentry_name(dentry, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = set_attr(name, &attrs); -+ kfree(name); -+ if(err) -+ return(err); -+ -+ return(inode_setattr(dentry->d_inode, attr)); -+} -+ -+int hostfs_getattr(struct dentry *dentry, struct iattr *attr) -+{ -+ not_implemented(); -+ return(-EINVAL); -+} -+ -+static struct inode_operations hostfs_iops = { -+ .create = hostfs_create, -+ .link = hostfs_link, -+ .unlink = hostfs_unlink, -+ .symlink = hostfs_symlink, -+ .mkdir = hostfs_mkdir, -+ .rmdir = hostfs_rmdir, -+ .mknod = hostfs_mknod, -+ .rename = hostfs_rename, -+ .truncate = hostfs_truncate, -+ .permission = hostfs_permission, -+ .setattr = hostfs_setattr, -+ .getattr = hostfs_getattr, -+}; -+ -+static struct inode_operations hostfs_dir_iops = { -+ .create = hostfs_create, -+ .lookup = hostfs_lookup, -+ .link = hostfs_link, -+ .unlink = hostfs_unlink, -+ .symlink = hostfs_symlink, -+ .mkdir = hostfs_mkdir, -+ .rmdir = hostfs_rmdir, -+ .mknod = hostfs_mknod, -+ .rename = hostfs_rename, -+ .truncate = hostfs_truncate, -+ .permission = hostfs_permission, -+ .setattr = hostfs_setattr, -+ .getattr = hostfs_getattr, -+}; -+ -+int hostfs_link_readpage(struct file *file, struct page *page) -+{ -+ char *buffer, *name; -+ long long start; -+ int err; -+ -+ start = page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ name = inode_name(page->mapping->host, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = do_readlink(name, buffer, PAGE_CACHE_SIZE); -+ kfree(name); -+ if(err == PAGE_CACHE_SIZE) -+ err = -E2BIG; -+ else if(err > 0){ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) ClearPageError(page); -+ err = 0; -+ } -+ kunmap(page); -+ UnlockPage(page); -+ return(err); -+} -+ -+static struct address_space_operations hostfs_link_aops = { -+ .readpage = hostfs_link_readpage, -+}; -+ -+static char *get_root(char *mount_arg) -+{ -+ char *root, *slash = ""; -+ int len = 0; -+ -+ if(jail_dir != NULL){ -+ len += strlen(jail_dir); -+ if((*jail_dir == '\0') || -+ (jail_dir[strlen(jail_dir) - 1] != '/')) -+ slash = "/"; -+ len += strlen(slash); -+ } -+ -+ if((mount_arg == NULL) || (*mount_arg == '\0')) -+ mount_arg = DEFAULT_ROOT; -+ -+ len += strlen(mount_arg) + 1; -+ -+ root = kmalloc(len, GFP_KERNEL); -+ if(root == NULL) -+ return(NULL); -+ -+ if(jail_dir != NULL) -+ sprintf(root, "%s%s%s", jail_dir, slash, mount_arg); -+ else -+ strcpy(root, mount_arg); -+ -+ return(root); -+} -+ -+struct super_block *hostfs_read_super(struct super_block *sb, void *data, -+ int silent) -+{ -+ struct inode *root_inode; -+ char *root_dir; -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = HOSTFS_SUPER_MAGIC; -+ sb->s_op = &hostfs_sbops; -+ -+ root_inode = get_inode(sb, NULL, NULL); -+ if(root_inode == NULL) -+ goto out; -+ -+ root_dir = get_root(data); -+ if(root_dir == NULL) -+ goto out_put; -+ -+ root_inode->u.hostfs_i.host_filename = root_dir; -+ sb->s_root = d_alloc_root(root_inode); -+ if(sb->s_root == NULL) -+ goto out_free; -+ -+ if(read_inode(root_inode)) -+ goto out_dput; -+ return(sb); -+ -+ out_dput: -+ dput(sb->s_root); -+ out_free: -+ kfree(root_dir); -+ out_put: -+ make_bad_inode(root_inode); -+ iput(root_inode); -+ out: -+ return(NULL); -+} -+ -+DECLARE_FSTYPE(hostfs_type, "hostfs", hostfs_read_super, 0); -+ -+static int __init init_hostfs(void) -+{ -+ return(register_filesystem(&hostfs_type)); -+} -+ -+static void __exit exit_hostfs(void) -+{ -+ unregister_filesystem(&hostfs_type); -+} -+ -+module_init(init_hostfs) -+module_exit(exit_hostfs) -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/hostfs_user.c um/arch/um/fs/hostfs/hostfs_user.c ---- orig/arch/um/fs/hostfs/hostfs_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/fs/hostfs/hostfs_user.c 2003-11-14 08:51:20.000000000 -0500 -@@ -0,0 +1,359 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <fcntl.h> -+#include <dirent.h> -+#include <errno.h> -+#include <utime.h> -+#include <string.h> -+#include <sys/stat.h> -+#include <sys/time.h> -+#include <sys/vfs.h> -+#include "hostfs.h" -+#include "kern_util.h" -+#include "user.h" -+ -+int stat_file(const char *path, int *dev_out, unsigned long long *inode_out, -+ int *mode_out, int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, unsigned long *atime_out, -+ unsigned long *mtime_out, unsigned long *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out) -+{ -+ struct stat64 buf; -+ -+ if(lstat64(path, &buf) < 0) -+ return(-errno); -+ if(dev_out != NULL) *dev_out = buf.st_dev; -+ -+ /* See the Makefile for why STAT64_INO_FIELD is passed in -+ * by the build -+ */ -+ if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD; -+ if(mode_out != NULL) *mode_out = buf.st_mode; -+ if(nlink_out != NULL) *nlink_out = buf.st_nlink; -+ if(uid_out != NULL) *uid_out = buf.st_uid; -+ if(gid_out != NULL) *gid_out = buf.st_gid; -+ if(size_out != NULL) *size_out = buf.st_size; -+ if(atime_out != NULL) *atime_out = buf.st_atime; -+ if(mtime_out != NULL) *mtime_out = buf.st_mtime; -+ if(ctime_out != NULL) *ctime_out = buf.st_ctime; -+ if(blksize_out != NULL) *blksize_out = buf.st_blksize; -+ if(blocks_out != NULL) *blocks_out = buf.st_blocks; -+ return(0); -+} -+ -+int file_type(const char *path, int *rdev) -+{ -+ struct stat64 buf; -+ -+ if(lstat64(path, &buf) < 0) -+ return(-errno); -+ if(rdev != NULL) -+ *rdev = buf.st_rdev; -+ -+ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); -+ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); -+ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); -+ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); -+ else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO); -+ else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK); -+ else return(OS_TYPE_FILE); -+} -+ -+int access_file(char *path, int r, int w, int x) -+{ -+ int mode = 0; -+ -+ if(r) mode = R_OK; -+ if(w) mode |= W_OK; -+ if(x) mode |= X_OK; -+ if(access(path, mode) != 0) return(-errno); -+ else return(0); -+} -+ -+int open_file(char *path, int r, int w, int append) -+{ -+ int mode = 0, fd; -+ -+ if(r && !w) -+ mode = O_RDONLY; -+ else if(!r && w) -+ mode = O_WRONLY; -+ else if(r && w) -+ mode = O_RDWR; -+ else panic("Impossible mode in open_file"); -+ -+ if(append) -+ mode |= O_APPEND; -+ fd = open64(path, mode); -+ if(fd < 0) return(-errno); -+ else return(fd); -+} -+ -+void *open_dir(char *path, int *err_out) -+{ -+ DIR *dir; -+ -+ dir = opendir(path); -+ *err_out = errno; -+ if(dir == NULL) return(NULL); -+ return(dir); -+} -+ -+char *read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out) -+{ -+ DIR *dir = stream; -+ struct dirent *ent; -+ -+ seekdir(dir, *pos); -+ ent = readdir(dir); -+ if(ent == NULL) return(NULL); -+ *len_out = strlen(ent->d_name); -+ *ino_out = ent->d_ino; -+ *pos = telldir(dir); -+ return(ent->d_name); -+} -+ -+int read_file(int fd, unsigned long long *offset, char *buf, int len) -+{ -+ int n, err; -+ -+ err = os_seek_file(fd, *offset); -+ if(err) -+ return(err); -+ -+ n = os_read_file(fd, buf, len); -+ if(n < 0) -+ return(n); -+ -+ *offset += n; -+ return(n); -+} -+ -+int write_file(int fd, unsigned long long *offset, const char *buf, int len) -+{ -+ int n, err; -+ -+ err = os_seek_file(fd, *offset); -+ if(err) -+ return(err); -+ -+ n = os_write_file(fd, buf, len); -+ if(n < 0) -+ return(n); -+ -+ *offset += n; -+ return(n); -+} -+ -+int lseek_file(int fd, long long offset, int whence) -+{ -+ int ret; -+ -+ ret = lseek64(fd, offset, whence); -+ if(ret < 0) return(-errno); -+ return(0); -+} -+ -+void close_file(void *stream) -+{ -+ close(*((int *) stream)); -+} -+ -+void close_dir(void *stream) -+{ -+ closedir(stream); -+} -+ -+int file_create(char *name, int ur, int uw, int ux, int gr, -+ int gw, int gx, int or, int ow, int ox) -+{ -+ int mode, fd; -+ -+ mode = 0; -+ mode |= ur ? S_IRUSR : 0; -+ mode |= uw ? S_IWUSR : 0; -+ mode |= ux ? S_IXUSR : 0; -+ mode |= gr ? S_IRGRP : 0; -+ mode |= gw ? S_IWGRP : 0; -+ mode |= gx ? S_IXGRP : 0; -+ mode |= or ? S_IROTH : 0; -+ mode |= ow ? S_IWOTH : 0; -+ mode |= ox ? S_IXOTH : 0; -+ fd = open64(name, O_CREAT | O_RDWR, mode); -+ if(fd < 0) -+ return(-errno); -+ return(fd); -+} -+ -+int set_attr(const char *file, struct hostfs_iattr *attrs) -+{ -+ struct utimbuf buf; -+ int err, ma; -+ -+ if(attrs->ia_valid & HOSTFS_ATTR_MODE){ -+ if(chmod(file, attrs->ia_mode) != 0) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_UID){ -+ if(chown(file, attrs->ia_uid, -1)) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_GID){ -+ if(chown(file, -1, attrs->ia_gid)) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ -+ if(truncate64(file, attrs->ia_size)) return(-errno); -+ } -+ ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; -+ if((attrs->ia_valid & ma) == ma){ -+ buf.actime = attrs->ia_atime; -+ buf.modtime = attrs->ia_mtime; -+ if(utime(file, &buf) != 0) return(-errno); -+ } -+ else { -+ if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, &buf.modtime, NULL, -+ NULL, NULL); -+ if(err != 0) return(err); -+ buf.actime = attrs->ia_atime; -+ if(utime(file, &buf) != 0) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, &buf.actime, NULL, NULL, -+ NULL, NULL); -+ if(err != 0) return(err); -+ buf.modtime = attrs->ia_mtime; -+ if(utime(file, &buf) != 0) return(-errno); -+ } -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; -+ if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, &attrs->ia_atime, &attrs->ia_mtime, -+ NULL, NULL, NULL); -+ if(err != 0) return(err); -+ } -+ return(0); -+} -+ -+int make_symlink(const char *from, const char *to) -+{ -+ int err; -+ -+ err = symlink(to, from); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int unlink_file(const char *file) -+{ -+ int err; -+ -+ err = unlink(file); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_mkdir(const char *file, int mode) -+{ -+ int err; -+ -+ err = mkdir(file, mode); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_rmdir(const char *file) -+{ -+ int err; -+ -+ err = rmdir(file); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_mknod(const char *file, int mode, int dev) -+{ -+ int err; -+ -+ err = mknod(file, mode, dev); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int link_file(const char *to, const char *from) -+{ -+ int err; -+ -+ err = link(to, from); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_readlink(char *file, char *buf, int size) -+{ -+ int n; -+ -+ n = readlink(file, buf, size); -+ if(n < 0) -+ return(-errno); -+ if(n < size) -+ buf[n] = '\0'; -+ return(n); -+} -+ -+int rename_file(char *from, char *to) -+{ -+ int err; -+ -+ err = rename(from, to); -+ if(err < 0) return(-errno); -+ return(0); -+} -+ -+int do_statfs(char *root, long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out) -+{ -+ struct statfs64 buf; -+ int err; -+ -+ err = statfs64(root, &buf); -+ if(err < 0) return(-errno); -+ *bsize_out = buf.f_bsize; -+ *blocks_out = buf.f_blocks; -+ *bfree_out = buf.f_bfree; -+ *bavail_out = buf.f_bavail; -+ *files_out = buf.f_files; -+ *ffree_out = buf.f_ffree; -+ memcpy(fsid_out, &buf.f_fsid, -+ sizeof(buf.f_fsid) > fsid_size ? fsid_size : -+ sizeof(buf.f_fsid)); -+ *namelen_out = buf.f_namelen; -+ spare_out[0] = buf.f_spare[0]; -+ spare_out[1] = buf.f_spare[1]; -+ spare_out[2] = buf.f_spare[2]; -+ spare_out[3] = buf.f_spare[3]; -+ spare_out[4] = buf.f_spare[4]; -+ spare_out[5] = buf.f_spare[5]; -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/Makefile um/arch/um/fs/hostfs/Makefile ---- orig/arch/um/fs/hostfs/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/fs/hostfs/Makefile 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,24 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino -+# to __st_ino. It stayed in the same place, so as long as the correct name -+# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa. -+ -+STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \ -+ echo __)st_ino -+ -+USER_CFLAGS := $(USER_CFLAGS) -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD) -+ -+O_TARGET := hostfs.o -+obj-y = hostfs_kern.o hostfs_user.o -+obj-m = $(O_TARGET) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -diff -Naur -X ../exclude-files orig/arch/um/fs/hppfs/hppfs_kern.c um/arch/um/fs/hppfs/hppfs_kern.c ---- orig/arch/um/fs/hppfs/hppfs_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/fs/hppfs/hppfs_kern.c 2004-01-08 23:35:58.000000000 -0500 -@@ -0,0 +1,737 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/fs.h> -+#include <linux/module.h> -+#include <linux/slab.h> -+#include <linux/list.h> -+#include <linux/kernel.h> -+#include <linux/ctype.h> -+#include <asm/uaccess.h> -+#include "os.h" -+ -+struct hppfs_data { -+ struct list_head list; -+ char contents[PAGE_SIZE - sizeof(struct list_head)]; -+}; -+ -+struct hppfs_private { -+ struct file proc_file; -+ int host_fd; -+ loff_t len; -+ struct hppfs_data *contents; -+}; -+ -+#define HPPFS_SUPER_MAGIC 0xb00000ee -+ -+static struct super_operations hppfs_sbops; -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error); -+ -+static int is_pid(struct dentry *dentry) -+{ -+ struct super_block *sb; -+ int i; -+ -+ sb = dentry->d_sb; -+ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) -+ return(0); -+ -+ for(i = 0; i < dentry->d_name.len; i++){ -+ if(!isdigit(dentry->d_name.name[i])) -+ return(0); -+ } -+ return(1); -+} -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *root, *name; -+ const char *seg_name; -+ int len, seg_len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)) -+ len += strlen("pid") + 1; -+ else len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ root = "proc"; -+ len += strlen(root); -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)){ -+ seg_name = "pid"; -+ seg_len = strlen("pid"); -+ } -+ else { -+ seg_name = parent->d_name.name; -+ seg_len = parent->d_name.len; -+ } -+ -+ len -= seg_len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], seg_name, seg_len); -+ parent = parent->d_parent; -+ } -+ strncpy(name, root, strlen(root)); -+ return(name); -+} -+ -+struct dentry_operations hppfs_dentry_ops = { -+}; -+ -+static int file_removed(struct dentry *dentry, const char *file) -+{ -+ char *host_file; -+ int extra, fd; -+ -+ extra = 0; -+ if(file != NULL) extra += strlen(file) + 1; -+ -+ host_file = dentry_name(dentry, extra + strlen("/remove")); -+ if(host_file == NULL){ -+ printk("file_removed : allocation failed\n"); -+ return(-ENOMEM); -+ } -+ -+ if(file != NULL){ -+ strcat(host_file, "/"); -+ strcat(host_file, file); -+ } -+ strcat(host_file, "/remove"); -+ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ kfree(host_file); -+ if(fd >= 0){ -+ os_close_file(fd); -+ return(1); -+ } -+ return(0); -+} -+ -+static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry) -+{ -+ struct dentry *proc_dentry; -+ struct inode *inode; -+ int err, deleted; -+ -+ deleted = file_removed(dentry, NULL); -+ if(deleted < 0) -+ return(ERR_PTR(deleted)); -+ else if(deleted) -+ return(ERR_PTR(-ENOENT)); -+ -+ proc_dentry = lookup_hash(&dentry->d_name, ino->u.hppfs_i.proc_dentry); -+ if(IS_ERR(proc_dentry)) -+ return(proc_dentry); -+ -+ inode = get_inode(ino->i_sb, proc_dentry, &err); -+ if(err != 0) -+ return(ERR_PTR(err)); -+ -+ d_add(dentry, inode); -+ dentry->d_op = &hppfs_dentry_ops; -+ return(NULL); -+} -+ -+static struct inode_operations hppfs_file_iops = { -+}; -+ -+static struct inode_operations hppfs_dir_iops = { -+ .lookup = hppfs_lookup, -+}; -+ -+static ssize_t read_proc(struct file *file, char *buf, ssize_t count, -+ loff_t *ppos, int is_user) -+{ -+ ssize_t (*read)(struct file *, char *, size_t, loff_t *); -+ ssize_t n; -+ -+ read = file->f_dentry->d_inode->i_fop->read; -+ if(read == NULL) -+ return(-EOPNOTSUPP); -+ -+ if(!is_user) -+ set_fs(KERNEL_DS); -+ -+ n = (*read)(file, buf, count, &file->f_pos); -+ -+ if(!is_user) -+ set_fs(USER_DS); -+ -+ if(ppos) *ppos = file->f_pos; -+ return(n); -+} -+ -+static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) -+{ -+ ssize_t n; -+ int cur, err; -+ char *new_buf; -+ -+ n = -ENOMEM; -+ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if(new_buf == NULL){ -+ printk("hppfs_read_file : kmalloc failed\n"); -+ goto out; -+ } -+ n = 0; -+ while(count > 0){ -+ cur = min_t(ssize_t, count, PAGE_SIZE); -+ err = os_read_file(fd, new_buf, cur); -+ if(err < 0){ -+ printk("hppfs_read : read failed, err = %d\n", -err); -+ n = err; -+ goto out_free; -+ } -+ else if(err == 0) -+ break; -+ -+ if(copy_to_user(buf, new_buf, err)){ -+ n = -EFAULT; -+ goto out_free; -+ } -+ n += err; -+ count -= err; -+ } -+ out_free: -+ kfree(new_buf); -+ out: -+ return(n); -+} -+ -+static ssize_t hppfs_read(struct file *file, char *buf, size_t count, -+ loff_t *ppos) -+{ -+ struct hppfs_private *hppfs = file->private_data; -+ struct hppfs_data *data; -+ loff_t off; -+ int err; -+ -+ if(hppfs->contents != NULL){ -+ if(*ppos >= hppfs->len) return(0); -+ -+ data = hppfs->contents; -+ off = *ppos; -+ while(off >= sizeof(data->contents)){ -+ data = list_entry(data->list.next, struct hppfs_data, -+ list); -+ off -= sizeof(data->contents); -+ } -+ -+ if(off + count > hppfs->len) -+ count = hppfs->len - off; -+ copy_to_user(buf, &data->contents[off], count); -+ *ppos += count; -+ } -+ else if(hppfs->host_fd != -1){ -+ err = os_seek_file(hppfs->host_fd, *ppos); -+ if(err < 0){ -+ printk("hppfs_read : seek failed, err = %d\n", -err); -+ return(err); -+ } -+ count = hppfs_read_file(hppfs->host_fd, buf, count); -+ if(count > 0) -+ *ppos += count; -+ } -+ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1); -+ -+ return(count); -+} -+ -+static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, -+ loff_t *ppos) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ ssize_t (*write)(struct file *, const char *, size_t, loff_t *); -+ int err; -+ -+ write = proc_file->f_dentry->d_inode->i_fop->write; -+ if(write == NULL) -+ return(-EOPNOTSUPP); -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*write)(proc_file, buf, len, &proc_file->f_pos); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int open_host_sock(char *host_file, int *filter_out) -+{ -+ char *end; -+ int fd; -+ -+ end = &host_file[strlen(host_file)]; -+ strcpy(end, "/rw"); -+ *filter_out = 1; -+ fd = os_connect_socket(host_file); -+ if(fd >= 0) -+ return(fd); -+ -+ strcpy(end, "/r"); -+ *filter_out = 0; -+ fd = os_connect_socket(host_file); -+ return(fd); -+} -+ -+static void free_contents(struct hppfs_data *head) -+{ -+ struct hppfs_data *data; -+ struct list_head *ele, *next; -+ -+ if(head == NULL) return; -+ -+ list_for_each_safe(ele, next, &head->list){ -+ data = list_entry(ele, struct hppfs_data, list); -+ kfree(data); -+ } -+ kfree(head); -+} -+ -+static struct hppfs_data *hppfs_get_data(int fd, int filter, -+ struct file *proc_file, -+ struct file *hppfs_file, -+ loff_t *size_out) -+{ -+ struct hppfs_data *data, *new, *head; -+ int n, err; -+ -+ err = -ENOMEM; -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL){ -+ printk("hppfs_get_data : head allocation failed\n"); -+ goto failed; -+ } -+ -+ INIT_LIST_HEAD(&data->list); -+ -+ head = data; -+ *size_out = 0; -+ -+ if(filter){ -+ while((n = read_proc(proc_file, data->contents, -+ sizeof(data->contents), NULL, 0)) > 0) { -+ err = os_write_file(fd, data->contents, n); -+ if(err != n) -+ printk("hppfs_get_data : failed to write out " -+ "%d bytes, err = %d\n", n, -err); -+ } -+ err = os_shutdown_socket(fd, 0, 1); -+ if(err < 0){ -+ printk("hppfs_get_data : failed to shut down " -+ "socket\n"); -+ goto failed_free; -+ } -+ } -+ while(1){ -+ n = os_read_file(fd, data->contents, sizeof(data->contents)); -+ if(n < 0){ -+ err = n; -+ printk("hppfs_get_data : read failed, err = %d\n", -n); -+ goto failed_free; -+ } -+ else if(n == 0) -+ break; -+ -+ *size_out += n; -+ -+ if(n < sizeof(data->contents)) -+ break; -+ -+ new = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(new == 0){ -+ printk("hppfs_get_data : data allocation failed\n"); -+ err = -ENOMEM; -+ goto failed_free; -+ } -+ -+ INIT_LIST_HEAD(&new->list); -+ list_add(&new->list, &data->list); -+ data = new; -+ } -+ return(head); -+ -+ failed_free: -+ free_contents(head); -+ failed: -+ return(ERR_PTR(err)); -+} -+ -+static struct hppfs_private *hppfs_data(void) -+{ -+ struct hppfs_private *data; -+ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL) -+ return(data); -+ -+ *data = ((struct hppfs_private ) { .host_fd = -1, -+ .len = -1, -+ .contents = NULL } ); -+ return(data); -+} -+ -+static int hppfs_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ char *host_file; -+ int err, fd, type, filter; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ host_file = dentry_name(file->f_dentry, strlen("/rw")); -+ if(host_file == NULL) -+ goto out_free2; -+ -+ proc_dentry = inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&data->proc_file, proc_dentry, file->f_mode); -+ if(err) -+ goto out_free1; -+ -+ type = os_file_type(host_file); -+ if(type == OS_TYPE_FILE){ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ if(fd >= 0) -+ data->host_fd = fd; -+ else printk("hppfs_open : failed to open '%s', err = %d\n", -+ host_file, -fd); -+ -+ data->contents = NULL; -+ } -+ else if(type == OS_TYPE_DIR){ -+ fd = open_host_sock(host_file, &filter); -+ if(fd >= 0){ -+ data->contents = hppfs_get_data(fd, filter, -+ &data->proc_file, -+ file, &data->len); -+ if(!IS_ERR(data->contents)) -+ data->host_fd = fd; -+ } -+ else printk("hppfs_open : failed to open a socket in " -+ "'%s', err = %d\n", host_file, -fd); -+ } -+ kfree(host_file); -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free1: -+ kfree(host_file); -+ out_free2: -+ free_contents(data->contents); -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static int hppfs_dir_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ int err; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ proc_dentry = inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&data->proc_file, proc_dentry, file->f_mode); -+ if(err) -+ goto out_free; -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free: -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static loff_t hppfs_llseek(struct file *file, loff_t off, int where) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ loff_t (*llseek)(struct file *, loff_t, int); -+ loff_t ret; -+ -+ llseek = proc_file->f_dentry->d_inode->i_fop->llseek; -+ if(llseek != NULL){ -+ ret = (*llseek)(proc_file, off, where); -+ if(ret < 0) -+ return(ret); -+ } -+ -+ return(default_llseek(file, off, where)); -+} -+ -+struct hppfs_dirent { -+ void *vfs_dirent; -+ filldir_t filldir; -+ struct dentry *dentry; -+}; -+ -+static int hppfs_filldir(void *d, const char *name, int size, -+ loff_t offset, ino_t inode, unsigned int type) -+{ -+ struct hppfs_dirent *dirent = d; -+ -+ if(file_removed(dirent->dentry, name)) -+ return(0); -+ -+ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, -+ inode, type)); -+} -+ -+static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ int (*readdir)(struct file *, void *, filldir_t); -+ struct hppfs_dirent dirent = ((struct hppfs_dirent) -+ { .vfs_dirent = ent, -+ .filldir = filldir, -+ .dentry = file->f_dentry } ); -+ int err; -+ -+ readdir = proc_file->f_dentry->d_inode->i_fop->readdir; -+ if(readdir == NULL) -+ return(-EOPNOTSUPP); -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*readdir)(proc_file, &dirent, hppfs_filldir); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ return(0); -+} -+ -+static struct file_operations hppfs_file_fops = { -+ .owner = NULL, -+ .llseek = hppfs_llseek, -+ .read = hppfs_read, -+ .write = hppfs_write, -+ .open = hppfs_open, -+}; -+ -+static struct file_operations hppfs_dir_fops = { -+ .owner = NULL, -+ .readdir = hppfs_readdir, -+ .open = hppfs_dir_open, -+ .fsync = hppfs_fsync, -+}; -+ -+static int hppfs_statfs(struct super_block *sb, struct statfs *sf) -+{ -+ sf->f_blocks = 0; -+ sf->f_bfree = 0; -+ sf->f_bavail = 0; -+ sf->f_files = 0; -+ sf->f_ffree = 0; -+ sf->f_type = HPPFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct super_operations hppfs_sbops = { -+ .put_inode = force_delete, -+ .delete_inode = NULL, -+ .statfs = hppfs_statfs, -+}; -+ -+static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*readlink)(struct dentry *, char *, int); -+ int err, n; -+ -+ proc_dentry = dentry->d_inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&proc_file, proc_dentry, FMODE_READ); -+ if(err) -+ return(err); -+ -+ readlink = proc_dentry->d_inode->i_op->readlink; -+ if(readlink == NULL) -+ return(-EOPNOTSUPP); -+ n = (*readlink)(proc_dentry, buffer, buflen); -+ -+ if(proc_file.f_op->release) -+ (*proc_file.f_op->release)(proc_dentry->d_inode, &proc_file); -+ -+ return(n); -+} -+ -+static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*follow_link)(struct dentry *, struct nameidata *); -+ int err, n; -+ -+ proc_dentry = dentry->d_inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&proc_file, proc_dentry, FMODE_READ); -+ if(err) -+ return(err); -+ -+ follow_link = proc_dentry->d_inode->i_op->follow_link; -+ if(follow_link == NULL) -+ return(-EOPNOTSUPP); -+ n = (*follow_link)(proc_dentry, nd); -+ -+ if(proc_file.f_op->release) -+ (*proc_file.f_op->release)(proc_dentry->d_inode, &proc_file); -+ -+ return(n); -+} -+ -+static struct inode_operations hppfs_link_iops = { -+ .readlink = hppfs_readlink, -+ .follow_link = hppfs_follow_link, -+}; -+ -+static void read_inode(struct inode *ino) -+{ -+ struct inode *proc_ino; -+ -+ proc_ino = ino->u.hppfs_i.proc_dentry->d_inode; -+ ino->i_uid = proc_ino->i_uid; -+ ino->i_gid = proc_ino->i_gid; -+ ino->i_atime = proc_ino->i_atime; -+ ino->i_mtime = proc_ino->i_mtime; -+ ino->i_ctime = proc_ino->i_ctime; -+ ino->i_ino = proc_ino->i_ino; -+ ino->i_dev = proc_ino->i_dev; -+ ino->i_mode = proc_ino->i_mode; -+ ino->i_nlink = proc_ino->i_nlink; -+ ino->i_size = proc_ino->i_size; -+ ino->i_blksize = proc_ino->i_blksize; -+ ino->i_blocks = proc_ino->i_blocks; -+} -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error) -+{ -+ struct inode *inode; -+ int err = -ENOMEM; -+ -+ inode = new_inode(sb); -+ if(inode == NULL) -+ goto out; -+ -+ insert_inode_hash(inode); -+ if(S_ISDIR(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_dir_iops; -+ inode->i_fop = &hppfs_dir_fops; -+ } -+ else if(S_ISLNK(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_link_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ else { -+ inode->i_op = &hppfs_file_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ -+ inode->i_sb = sb; -+ inode->u.hppfs_i.proc_dentry = dentry; -+ -+ read_inode(inode); -+ err = 0; -+ -+ if(error) *error = err; -+ return(inode); -+ out: -+ if(error) *error = err; -+ return(NULL); -+} -+ -+static struct super_block *hppfs_read_super(struct super_block *sb, void *d, -+ int silent) -+{ -+ struct inode *root_inode; -+ struct file_system_type *procfs; -+ struct super_block *proc_sb; -+ -+ procfs = get_fs_type("proc"); -+ if(procfs == NULL) -+ goto out; -+ -+ if(list_empty(&procfs->fs_supers)) -+ goto out; -+ -+ proc_sb = list_entry(procfs->fs_supers.next, struct super_block, -+ s_instances); -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = HPPFS_SUPER_MAGIC; -+ sb->s_op = &hppfs_sbops; -+ -+ dget(proc_sb->s_root); -+ root_inode = get_inode(sb, proc_sb->s_root, NULL); -+ if(root_inode == NULL) -+ goto out_dput; -+ -+ sb->s_root = d_alloc_root(root_inode); -+ if(sb->s_root == NULL) -+ goto out_put; -+ -+ return(sb); -+ -+ out_put: -+ iput(root_inode); -+ out_dput: -+ dput(proc_sb->s_root); -+ out: -+ return(NULL); -+} -+ -+DECLARE_FSTYPE(hppfs_type, "hppfs", hppfs_read_super, 0); -+ -+static int __init init_hppfs(void) -+{ -+ return(register_filesystem(&hppfs_type)); -+} -+ -+static void __exit exit_hppfs(void) -+{ -+ unregister_filesystem(&hppfs_type); -+} -+ -+module_init(init_hppfs) -+module_exit(exit_hppfs) -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/fs/hppfs/Makefile um/arch/um/fs/hppfs/Makefile ---- orig/arch/um/fs/hppfs/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/fs/hppfs/Makefile 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,10 @@ -+O_TARGET := hppfs.o -+obj-y = hppfs_kern.o #hppfs_user.o -+obj-m = $(O_TARGET) -+ -+CFLAGS_hppfs_kern.o := $(CFLAGS) -+#CFLAGS_hppfs_user.o := $(USER_CFLAGS) -+ -+override CFLAGS = -+ -+include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/arch/um/fs/Makefile um/arch/um/fs/Makefile ---- orig/arch/um/fs/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/fs/Makefile 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,23 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := built-in.o -+ -+subdir-y = -+subdir-m = -+ -+subdir-$(CONFIG_HOSTFS) += hostfs -+subdir-$(CONFIG_HPPFS) += hppfs -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+obj-m += $(join $(subdir-m),$(subdir-m:%=/%.o)) -+ -+include $(TOPDIR)/Rules.make -+ -+dep: -+ -+clean: -+ -+archmrproper: -diff -Naur -X ../exclude-files orig/arch/um/include/2_5compat.h um/arch/um/include/2_5compat.h ---- orig/arch/um/include/2_5compat.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/2_5compat.h 2003-11-07 02:49:26.000000000 -0500 -@@ -0,0 +1,33 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __2_5_COMPAT_H__ -+#define __2_5_COMPAT_H__ -+ -+#include "linux/version.h" -+ -+#define INIT_ELV(queue, elv) elevator_init(elv, ELV_NOOP) -+ -+#define ELV_NOOP ELEVATOR_NOOP -+ -+#define INIT_HARDSECT(arr, maj, sizes) arr[maj] = sizes -+ -+#define IS_WRITE(req) ((req)->cmd == WRITE) -+ -+#define SET_PRI(task) \ -+ do { (task)->nice = 20; (task)->counter = -100; } while(0); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/chan_kern.h um/arch/um/include/chan_kern.h ---- orig/arch/um/include/chan_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/chan_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,56 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHAN_KERN_H__ -+#define __CHAN_KERN_H__ -+ -+#include "linux/tty.h" -+#include "linux/list.h" -+#include "chan_user.h" -+ -+struct chan { -+ struct list_head list; -+ char *dev; -+ unsigned int primary:1; -+ unsigned int input:1; -+ unsigned int output:1; -+ unsigned int opened:1; -+ int fd; -+ enum chan_init_pri pri; -+ struct chan_ops *ops; -+ void *data; -+}; -+ -+extern void chan_interrupt(struct list_head *chans, struct tq_struct *task, -+ struct tty_struct *tty, int irq, void *dev); -+extern int parse_chan_pair(char *str, struct list_head *chans, int pri, -+ int device, struct chan_opts *opts); -+extern int open_chan(struct list_head *chans); -+extern int write_chan(struct list_head *chans, const char *buf, int len, -+ int write_irq); -+extern int console_write_chan(struct list_head *chans, const char *buf, -+ int len); -+extern void close_chan(struct list_head *chans); -+extern void chan_enable_winch(struct list_head *chans, void *line); -+extern void enable_chan(struct list_head *chans, void *data); -+extern int chan_window_size(struct list_head *chans, -+ unsigned short *rows_out, -+ unsigned short *cols_out); -+extern int chan_out_fd(struct list_head *chans); -+extern int chan_config_string(struct list_head *chans, char *str, int size, -+ char **error_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/chan_user.h um/arch/um/include/chan_user.h ---- orig/arch/um/include/chan_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/chan_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHAN_USER_H__ -+#define __CHAN_USER_H__ -+ -+#include "init.h" -+ -+struct chan_opts { -+ void (*announce)(char *dev_name, int dev); -+ char *xterm_title; -+ int raw; -+ unsigned long tramp_stack; -+ int in_kernel; -+}; -+ -+enum chan_init_pri { INIT_STATIC, INIT_ALL, INIT_ONE }; -+ -+struct chan_ops { -+ char *type; -+ void *(*init)(char *, int, struct chan_opts *); -+ int (*open)(int, int, int, void *, char **); -+ void (*close)(int, void *); -+ int (*read)(int, char *, void *); -+ int (*write)(int, const char *, int, void *); -+ int (*console_write)(int, const char *, int, void *); -+ int (*window_size)(int, void *, unsigned short *, unsigned short *); -+ void (*free)(void *); -+ int winch; -+}; -+ -+extern struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops, tty_ops, -+ xterm_ops; -+ -+extern void generic_close(int fd, void *unused); -+extern int generic_read(int fd, char *c_out, void *unused); -+extern int generic_write(int fd, const char *buf, int n, void *unused); -+extern int generic_console_write(int fd, const char *buf, int n, void *state); -+extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, -+ unsigned short *cols_out); -+extern void generic_free(void *data); -+ -+extern void register_winch(int fd, void *device_data); -+extern void register_winch_irq(int fd, int tty_fd, int pid, void *line); -+ -+#define __channel_help(fn, prefix) \ -+__uml_help(fn, prefix "[0-9]*=<channel description>\n" \ -+" Attach a console or serial line to a host channel. See\n" \ -+" http://user-mode-linux.sourceforge.net/input.html for a complete\n" \ -+" description of this switch.\n\n" \ -+); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/choose-mode.h um/arch/um/include/choose-mode.h ---- orig/arch/um/include/choose-mode.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/choose-mode.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHOOSE_MODE_H__ -+#define __CHOOSE_MODE_H__ -+ -+#include "uml-config.h" -+ -+#if defined(UML_CONFIG_MODE_TT) && defined(UML_CONFIG_MODE_SKAS) -+#define CHOOSE_MODE(tt, skas) (mode_tt ? (tt) : (skas)) -+ -+#elif defined(UML_CONFIG_MODE_SKAS) -+#define CHOOSE_MODE(tt, skas) (skas) -+ -+#elif defined(UML_CONFIG_MODE_TT) -+#define CHOOSE_MODE(tt, skas) (tt) -+#endif -+ -+#define CHOOSE_MODE_PROC(tt, skas, args...) \ -+ CHOOSE_MODE(tt(args), skas(args)) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/frame.h um/arch/um/include/frame.h ---- orig/arch/um/include/frame.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/frame.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_H_ -+#define __FRAME_H_ -+ -+#include "sysdep/frame.h" -+ -+struct frame_common { -+ void *data; -+ int len; -+ int sig_index; -+ int sr_index; -+ int sr_relative; -+ int sp_index; -+ struct arch_frame_data arch; -+}; -+ -+struct sc_frame { -+ struct frame_common common; -+ int sc_index; -+}; -+ -+extern struct sc_frame signal_frame_sc; -+ -+extern struct sc_frame signal_frame_sc_sr; -+ -+struct si_frame { -+ struct frame_common common; -+ int sip_index; -+ int si_index; -+ int ucp_index; -+ int uc_index; -+}; -+ -+extern struct si_frame signal_frame_si; -+ -+extern void capture_signal_stack(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/frame_kern.h um/arch/um/include/frame_kern.h ---- orig/arch/um/include/frame_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/frame_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_KERN_H_ -+#define __FRAME_KERN_H_ -+ -+#include "frame.h" -+#include "sysdep/frame_kern.h" -+ -+extern int setup_signal_stack_sc(unsigned long stack_top, int sig, -+ unsigned long handler, -+ void (*restorer)(void), -+ struct pt_regs *regs, -+ sigset_t *mask); -+extern int setup_signal_stack_si(unsigned long stack_top, int sig, -+ unsigned long handler, -+ void (*restorer)(void), -+ struct pt_regs *regs, siginfo_t *info, -+ sigset_t *mask); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/frame_user.h um/arch/um/include/frame_user.h ---- orig/arch/um/include/frame_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/frame_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_USER_H_ -+#define __FRAME_USER_H_ -+ -+#include "sysdep/frame_user.h" -+#include "frame.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/helper.h um/arch/um/include/helper.h ---- orig/arch/um/include/helper.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/helper.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __HELPER_H__ -+#define __HELPER_H__ -+ -+extern int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, -+ unsigned long *stack_out); -+extern int run_helper_thread(int (*proc)(void *), void *arg, -+ unsigned int flags, unsigned long *stack_out, -+ int stack_order); -+extern int helper_wait(int pid); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/init.h um/arch/um/include/init.h ---- orig/arch/um/include/init.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/init.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,114 @@ -+#ifndef _LINUX_UML_INIT_H -+#define _LINUX_UML_INIT_H -+ -+/* These macros are used to mark some functions or -+ * initialized data (doesn't apply to uninitialized data) -+ * as `initialization' functions. The kernel can take this -+ * as hint that the function is used only during the initialization -+ * phase and free up used memory resources after -+ * -+ * Usage: -+ * For functions: -+ * -+ * You should add __init immediately before the function name, like: -+ * -+ * static void __init initme(int x, int y) -+ * { -+ * extern int z; z = x * y; -+ * } -+ * -+ * If the function has a prototype somewhere, you can also add -+ * __init between closing brace of the prototype and semicolon: -+ * -+ * extern int initialize_foobar_device(int, int, int) __init; -+ * -+ * For initialized data: -+ * You should insert __initdata between the variable name and equal -+ * sign followed by value, e.g.: -+ * -+ * static int init_variable __initdata = 0; -+ * static char linux_logo[] __initdata = { 0x32, 0x36, ... }; -+ * -+ * Don't forget to initialize data not at file scope, i.e. within a function, -+ * as gcc otherwise puts the data into the bss section and not into the init -+ * section. -+ * -+ * Also note, that this data cannot be "const". -+ */ -+ -+#ifndef _LINUX_INIT_H -+typedef int (*initcall_t)(void); -+typedef void (*exitcall_t)(void); -+ -+#define __init __attribute__ ((__section__ (".text.init"))) -+#define __exit __attribute__ ((unused, __section__(".text.exit"))) -+#define __initdata __attribute__ ((__section__ (".data.init"))) -+ -+#endif -+ -+#ifndef MODULE -+struct uml_param { -+ const char *str; -+ int (*setup_func)(char *, int *); -+}; -+ -+extern initcall_t __uml_initcall_start, __uml_initcall_end; -+extern initcall_t __uml_postsetup_start, __uml_postsetup_end; -+extern const char *__uml_help_start, *__uml_help_end; -+#endif -+ -+#define __uml_initcall(fn) \ -+ static initcall_t __uml_initcall_##fn __uml_init_call = fn -+ -+#define __uml_exitcall(fn) \ -+ static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn -+ -+extern struct uml_param __uml_setup_start, __uml_setup_end; -+ -+#define __uml_postsetup(fn) \ -+ static initcall_t __uml_postsetup_##fn __uml_postsetup_call = fn -+ -+#define __non_empty_string(dummyname,string) \ -+ struct __uml_non_empty_string_struct_##dummyname \ -+ { \ -+ char _string[sizeof(string)-2]; \ -+ } -+ -+#ifndef MODULE -+#define __uml_setup(str, fn, help...) \ -+ __non_empty_string(fn ##_setup, str); \ -+ __uml_help(fn, help); \ -+ static char __uml_setup_str_##fn[] __initdata = str; \ -+ static struct uml_param __uml_setup_##fn __uml_init_setup = { __uml_setup_str_##fn, fn } -+#else -+#define __uml_setup(str, fn, help...) \ -+ -+#endif -+ -+#define __uml_help(fn, help...) \ -+ __non_empty_string(fn ##__help, help); \ -+ static char __uml_help_str_##fn[] __initdata = help; \ -+ static const char *__uml_help_##fn __uml_setup_help = __uml_help_str_##fn -+ -+/* -+ * Mark functions and data as being only used at initialization -+ * or exit time. -+ */ -+#define __uml_init_setup __attribute__ ((unused,__section__ (".uml.setup.init"))) -+#define __uml_setup_help __attribute__ ((unused,__section__ (".uml.help.init"))) -+#define __uml_init_call __attribute__ ((unused,__section__ (".uml.initcall.init"))) -+#define __uml_postsetup_call __attribute__ ((unused,__section__ (".uml.postsetup.init"))) -+#define __uml_exit_call __attribute__ ((unused,__section__ (".uml.exitcall.exit"))) -+ -+#endif /* _LINUX_UML_INIT_H */ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/initrd.h um/arch/um/include/initrd.h ---- orig/arch/um/include/initrd.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/initrd.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __INITRD_USER_H__ -+#define __INITRD_USER_H__ -+ -+extern int load_initrd(char *filename, void *buf, int size); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/irq_kern.h um/arch/um/include/irq_kern.h ---- orig/arch/um/include/irq_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/irq_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __IRQ_KERN_H__ -+#define __IRQ_KERN_H__ -+ -+#include "linux/interrupt.h" -+ -+extern int um_request_irq(unsigned int irq, int fd, int type, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, const char * devname, -+ void *dev_id); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/irq_user.h um/arch/um/include/irq_user.h ---- orig/arch/um/include/irq_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/irq_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __IRQ_USER_H__ -+#define __IRQ_USER_H__ -+ -+enum { IRQ_READ, IRQ_WRITE }; -+ -+extern void sigio_handler(int sig, union uml_pt_regs *regs); -+extern int activate_fd(int irq, int fd, int type, void *dev_id); -+extern void free_irq_by_irq_and_dev(int irq, void *dev_id); -+extern void free_irq_by_fd(int fd); -+extern void reactivate_fd(int fd, int irqnum); -+extern void deactivate_fd(int fd, int irqnum); -+extern void forward_interrupts(int pid); -+extern void init_irq_signals(int on_sigstack); -+extern void forward_ipi(int fd, int pid); -+extern void free_irq_later(int irq, void *dev_id); -+extern int activate_ipi(int fd, int pid); -+extern unsigned long irq_lock(void); -+extern void irq_unlock(unsigned long flags); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/kern.h um/arch/um/include/kern.h ---- orig/arch/um/include/kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __KERN_H__ -+#define __KERN_H__ -+ -+/* These are all user-mode things which are convenient to call directly -+ * from kernel code and for which writing a wrapper is too much of a pain. -+ * The regular include files can't be included because this file is included -+ * only into kernel code, and user-space includes conflict with kernel -+ * includes. -+ */ -+ -+extern int errno; -+ -+extern int clone(int (*proc)(void *), void *sp, int flags, void *data); -+extern int sleep(int); -+extern int printf(char *fmt, ...); -+extern char *strerror(int errnum); -+extern char *ptsname(int __fd); -+extern int munmap(void *, int); -+extern void *sbrk(int increment); -+extern void *malloc(int size); -+extern void perror(char *err); -+extern int kill(int pid, int sig); -+extern int getuid(void); -+extern int pause(void); -+extern int write(int, const void *, int); -+extern int exit(int); -+extern int close(int); -+extern int read(unsigned int, char *, int); -+extern int pipe(int *); -+extern int sched_yield(void); -+extern int ptrace(int op, int pid, long addr, long data); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/kern_util.h um/arch/um/include/kern_util.h ---- orig/arch/um/include/kern_util.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/kern_util.h 2003-11-10 00:58:10.000000000 -0500 -@@ -0,0 +1,123 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __KERN_UTIL_H__ -+#define __KERN_UTIL_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int ncpus; -+extern char *linux_prog; -+extern char *gdb_init; -+extern int kmalloc_ok; -+extern int timer_irq_inited; -+extern int jail; -+extern int nsyscalls; -+ -+#define UML_ROUND_DOWN(addr) ((void *)(((unsigned long) addr) & PAGE_MASK)) -+#define UML_ROUND_UP(addr) \ -+ UML_ROUND_DOWN(((unsigned long) addr) + PAGE_SIZE - 1) -+ -+extern int kernel_fork(unsigned long flags, int (*fn)(void *), void * arg); -+extern unsigned long stack_sp(unsigned long page); -+extern int kernel_thread_proc(void *data); -+extern void syscall_segv(int sig); -+extern int current_pid(void); -+extern unsigned long alloc_stack(int order, int atomic); -+extern int do_signal(int error); -+extern int is_stack_fault(unsigned long sp); -+extern unsigned long segv(unsigned long address, unsigned long ip, -+ int is_write, int is_user, void *sc); -+extern unsigned long handle_page_fault(unsigned long address, unsigned long ip, -+ int is_write, int is_user, -+ int *code_out); -+extern void syscall_ready(void); -+extern int segv_syscall(void); -+extern void kern_finish_exec(void *task, int new_pid, unsigned long stack); -+extern int page_size(void); -+extern int page_mask(void); -+extern int need_finish_fork(void); -+extern void free_stack(unsigned long stack, int order); -+extern void add_input_request(int op, void (*proc)(int), void *arg); -+extern int sys_execve(char *file, char **argv, char **env); -+extern char *current_cmd(void); -+extern void timer_handler(int sig, union uml_pt_regs *regs); -+extern int set_signals(int enable); -+extern void force_sigbus(void); -+extern int pid_to_processor_id(int pid); -+extern void block_signals(void); -+extern void unblock_signals(void); -+extern void deliver_signals(void *t); -+extern int next_syscall_index(int max); -+extern int next_trap_index(int max); -+extern void cpu_idle(void); -+extern void finish_fork(void); -+extern void paging_init(void); -+extern void init_flush_vm(void); -+extern void *syscall_sp(void *t); -+extern void syscall_trace(void); -+extern int hz(void); -+extern void idle_timer(void); -+extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs); -+extern int external_pid(void *t); -+extern void boot_timer_handler(int sig); -+extern void interrupt_end(void); -+extern void initial_thread_cb(void (*proc)(void *), void *arg); -+extern int debugger_signal(int status, int pid); -+extern void debugger_parent_signal(int status, int pid); -+extern void child_signal(int pid, int status); -+extern int init_ptrace_proxy(int idle_pid, int startup, int stop); -+extern int init_parent_proxy(int pid); -+extern void check_stack_overflow(void *ptr); -+extern void relay_signal(int sig, union uml_pt_regs *regs); -+extern void not_implemented(void); -+extern int user_context(unsigned long sp); -+extern void timer_irq(union uml_pt_regs *regs); -+extern void unprotect_stack(unsigned long stack); -+extern void do_uml_exitcalls(void); -+extern int attach_debugger(int idle_pid, int pid, int stop); -+extern void bad_segv(unsigned long address, unsigned long ip, int is_write); -+extern int config_gdb(char *str); -+extern int remove_gdb(void); -+extern char *uml_strdup(char *string); -+extern void unprotect_kernel_mem(void); -+extern void protect_kernel_mem(void); -+extern void set_kmem_end(unsigned long); -+extern void uml_cleanup(void); -+extern void set_current(void *t); -+extern void lock_signalled_task(void *t); -+extern void IPI_handler(int cpu); -+extern int jail_setup(char *line, int *add); -+extern void *get_init_task(void); -+extern int clear_user_proc(void *buf, int size); -+extern int copy_to_user_proc(void *to, void *from, int size); -+extern int copy_from_user_proc(void *to, void *from, int size); -+extern int strlen_user_proc(char *str); -+extern void bus_handler(int sig, union uml_pt_regs *regs); -+extern void winch(int sig, union uml_pt_regs *regs); -+extern long execute_syscall(void *r); -+extern int smp_sigio_handler(void); -+extern void *get_current(void); -+extern struct task_struct *get_task(int pid, int require); -+extern void machine_halt(void); -+extern int is_syscall(unsigned long addr); -+extern void arch_switch(void); -+extern void free_irq(unsigned int, void *); -+extern int um_in_interrupt(void); -+extern int cpu(void); -+extern unsigned long long time_stamp(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/line.h um/arch/um/include/line.h ---- orig/arch/um/include/line.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/line.h 2003-11-07 02:25:37.000000000 -0500 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __LINE_H__ -+#define __LINE_H__ -+ -+#include "linux/list.h" -+#include "linux/tqueue.h" -+#include "linux/tty.h" -+#include "asm/semaphore.h" -+#include "chan_user.h" -+#include "mconsole_kern.h" -+ -+struct line_driver { -+ char *name; -+ char *devfs_name; -+ short major; -+ short minor_start; -+ short type; -+ short subtype; -+ int read_irq; -+ char *read_irq_name; -+ int write_irq; -+ char *write_irq_name; -+ char *symlink_from; -+ char *symlink_to; -+ struct mc_device mc; -+}; -+ -+struct line { -+ char *init_str; -+ int init_pri; -+ struct list_head chan_list; -+ int valid; -+ int count; -+ struct tty_struct *tty; -+ struct semaphore sem; -+ char *buffer; -+ char *head; -+ char *tail; -+ int sigio; -+ struct tq_struct task; -+ struct line_driver *driver; -+ int have_irq; -+}; -+ -+#define LINE_INIT(str, d) \ -+ { init_str : str, \ -+ init_pri : INIT_STATIC, \ -+ chan_list : { }, \ -+ valid : 1, \ -+ count : 0, \ -+ tty : NULL, \ -+ sem : { }, \ -+ buffer : NULL, \ -+ head : NULL, \ -+ tail : NULL, \ -+ sigio : 0, \ -+ driver : d, \ -+ have_irq : 0 } -+ -+struct lines { -+ int num; -+}; -+ -+#define LINES_INIT(n) { num : n } -+ -+extern void line_close(struct line *lines, struct tty_struct *tty); -+extern int line_open(struct line *lines, struct tty_struct *tty, -+ struct chan_opts *opts); -+extern int line_setup(struct line *lines, int num, char *init, -+ int all_allowed); -+extern int line_write(struct line *line, struct tty_struct *tty, int from_user, -+ const char *buf, int len); -+extern char *add_xterm_umid(char *base); -+extern int line_setup_irq(int fd, int input, int output, void *data); -+extern void line_close_chan(struct line *line); -+extern void line_disable(struct line *line, int current_irq); -+extern void line_register_devfs(struct lines *set, -+ struct line_driver *line_driver, -+ struct tty_driver *driver, struct line *lines, -+ int nlines); -+extern void lines_init(struct line *lines, int nlines); -+extern void close_lines(struct line *lines, int nlines); -+extern int line_config(struct line *lines, int num, char *str); -+extern int line_remove(struct line *lines, int num, char *str); -+extern int line_get_config(char *dev, struct line *lines, int num, char *str, -+ int size, char **error_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/Makefile um/arch/um/include/Makefile ---- orig/arch/um/include/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/Makefile 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,7 @@ -+all : sc.h -+ -+sc.h : ../util/mk_sc -+ ../util/mk_sc > $@ -+ -+../util/mk_sc : -+ $(MAKE) -C ../util mk_sc -diff -Naur -X ../exclude-files orig/arch/um/include/mconsole.h um/arch/um/include/mconsole.h ---- orig/arch/um/include/mconsole.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/mconsole.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MCONSOLE_H__ -+#define __MCONSOLE_H__ -+ -+#ifndef __KERNEL__ -+#include <stdint.h> -+#define u32 uint32_t -+#endif -+ -+#define MCONSOLE_MAGIC (0xcafebabe) -+#define MCONSOLE_MAX_DATA (512) -+#define MCONSOLE_VERSION 2 -+ -+struct mconsole_request { -+ u32 magic; -+ u32 version; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mconsole_reply { -+ u32 err; -+ u32 more; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mconsole_notify { -+ u32 magic; -+ u32 version; -+ enum { MCONSOLE_SOCKET, MCONSOLE_PANIC, MCONSOLE_HANG, -+ MCONSOLE_USER_NOTIFY } type; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mc_request; -+ -+enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; -+ -+struct mconsole_command -+{ -+ char *command; -+ void (*handler)(struct mc_request *req); -+ enum mc_context context; -+}; -+ -+struct mc_request -+{ -+ int len; -+ int as_interrupt; -+ -+ int originating_fd; -+ int originlen; -+ unsigned char origin[128]; /* sockaddr_un */ -+ -+ struct mconsole_request request; -+ struct mconsole_command *cmd; -+}; -+ -+extern char mconsole_socket_name[]; -+ -+extern int mconsole_unlink_socket(void); -+extern int mconsole_reply(struct mc_request *req, char *reply, int err, -+ int more); -+ -+extern void mconsole_version(struct mc_request *req); -+extern void mconsole_help(struct mc_request *req); -+extern void mconsole_halt(struct mc_request *req); -+extern void mconsole_reboot(struct mc_request *req); -+extern void mconsole_config(struct mc_request *req); -+extern void mconsole_remove(struct mc_request *req); -+extern void mconsole_sysrq(struct mc_request *req); -+extern void mconsole_cad(struct mc_request *req); -+extern void mconsole_stop(struct mc_request *req); -+extern void mconsole_go(struct mc_request *req); -+extern void mconsole_log(struct mc_request *req); -+extern void mconsole_proc(struct mc_request *req); -+ -+extern int mconsole_get_request(int fd, struct mc_request *req); -+extern int mconsole_notify(char *sock_name, int type, const void *data, -+ int len); -+extern char *mconsole_notify_socket(void); -+extern void lock_notify(void); -+extern void unlock_notify(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mconsole_kern.h um/arch/um/include/mconsole_kern.h ---- orig/arch/um/include/mconsole_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/mconsole_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MCONSOLE_KERN_H__ -+#define __MCONSOLE_KERN_H__ -+ -+#include "linux/config.h" -+#include "linux/list.h" -+#include "mconsole.h" -+ -+struct mconsole_entry { -+ struct list_head list; -+ struct mc_request request; -+}; -+ -+struct mc_device { -+ struct list_head list; -+ char *name; -+ int (*config)(char *); -+ int (*get_config)(char *, char *, int, char **); -+ int (*remove)(char *); -+}; -+ -+#define CONFIG_CHUNK(str, size, current, chunk, end) \ -+do { \ -+ current += strlen(chunk); \ -+ if(current >= size) \ -+ str = NULL; \ -+ if(str != NULL){ \ -+ strcpy(str, chunk); \ -+ str += strlen(chunk); \ -+ } \ -+ if(end) \ -+ current++; \ -+} while(0) -+ -+#ifdef CONFIG_MCONSOLE -+ -+extern void mconsole_register_dev(struct mc_device *new); -+ -+#else -+ -+static inline void mconsole_register_dev(struct mc_device *new) -+{ -+} -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mem.h um/arch/um/include/mem.h ---- orig/arch/um/include/mem.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/mem.h 2004-01-16 11:14:38.000000000 -0500 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MEM_H__ -+#define __MEM_H__ -+ -+#include "linux/types.h" -+ -+extern void set_kmem_end(unsigned long new); -+extern int phys_mapping(unsigned long phys, __u64 *offset_out); -+extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); -+extern int is_remapped(void *virt); -+extern int physmem_remove_mapping(void *virt); -+extern void physmem_forget_descriptor(int fd); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mem_kern.h um/arch/um/include/mem_kern.h ---- orig/arch/um/include/mem_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/mem_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MEM_KERN_H__ -+#define __MEM_KERN_H__ -+ -+#include "linux/list.h" -+#include "linux/types.h" -+ -+struct remapper { -+ struct list_head list; -+ int (*proc)(int, unsigned long, int, __u64); -+}; -+ -+extern void register_remapper(struct remapper *info); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mem_user.h um/arch/um/include/mem_user.h ---- orig/arch/um/include/mem_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/mem_user.h 2004-01-10 00:11:06.000000000 -0500 -@@ -0,0 +1,81 @@ -+/* -+ * arch/um/include/mem_user.h -+ * -+ * BRIEF MODULE DESCRIPTION -+ * user side memory interface for support IO memory inside user mode linux -+ * -+ * Copyright (C) 2001 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN -+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+#ifndef _MEM_USER_H -+#define _MEM_USER_H -+ -+struct iomem_region { -+ struct iomem_region *next; -+ char *driver; -+ int fd; -+ int size; -+ unsigned long phys; -+ unsigned long virt; -+}; -+ -+extern struct iomem_region *iomem_regions; -+extern int iomem_size; -+ -+#define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) -+ -+extern unsigned long host_task_size; -+extern unsigned long task_size; -+ -+extern int init_mem_user(void); -+extern int create_mem_file(unsigned long len); -+extern void setup_memory(void *entry); -+extern unsigned long find_iomem(char *driver, unsigned long *len_out); -+extern int init_maps(unsigned long physmem, unsigned long iomem, -+ unsigned long highmem); -+extern unsigned long get_vm(unsigned long len); -+extern void setup_physmem(unsigned long start, unsigned long usable, -+ unsigned long len, unsigned long highmem); -+extern void add_iomem(char *name, int fd, unsigned long size); -+extern unsigned long phys_offset(unsigned long phys); -+extern void unmap_physmem(void); -+extern void map_memory(unsigned long virt, unsigned long phys, -+ unsigned long len, int r, int w, int x); -+extern int protect_memory(unsigned long addr, unsigned long len, -+ int r, int w, int x, int must_succeed); -+extern unsigned long get_kmem_end(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mode.h um/arch/um/include/mode.h ---- orig/arch/um/include/mode.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/mode.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_H__ -+#define __MODE_H__ -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "../kernel/tt/include/mode.h" -+#endif -+ -+#ifdef UML_CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mode.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mode_kern.h um/arch/um/include/mode_kern.h ---- orig/arch/um/include/mode_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/mode_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_KERN_H__ -+#define __MODE_KERN_H__ -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/mode_kern.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mode_kern.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/net_kern.h um/arch/um/include/net_kern.h ---- orig/arch/um/include/net_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/net_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_NET_KERN_H -+#define __UM_NET_KERN_H -+ -+#include "linux/netdevice.h" -+#include "linux/skbuff.h" -+#include "linux/socket.h" -+#include "linux/list.h" -+ -+struct uml_net { -+ struct list_head list; -+ struct net_device *dev; -+ int index; -+ unsigned char mac[ETH_ALEN]; -+ int have_mac; -+}; -+ -+struct uml_net_private { -+ struct list_head list; -+ spinlock_t lock; -+ struct net_device *dev; -+ struct timer_list tl; -+ struct net_device_stats stats; -+ int fd; -+ unsigned char mac[ETH_ALEN]; -+ int have_mac; -+ unsigned short (*protocol)(struct sk_buff *); -+ int (*open)(void *); -+ void (*close)(int, void *); -+ void (*remove)(void *); -+ int (*read)(int, struct sk_buff **skb, struct uml_net_private *); -+ int (*write)(int, struct sk_buff **skb, struct uml_net_private *); -+ -+ void (*add_address)(unsigned char *, unsigned char *, void *); -+ void (*delete_address)(unsigned char *, unsigned char *, void *); -+ int (*set_mtu)(int mtu, void *); -+ int user[1]; -+}; -+ -+struct net_kern_info { -+ void (*init)(struct net_device *, void *); -+ unsigned short (*protocol)(struct sk_buff *); -+ int (*read)(int, struct sk_buff **skb, struct uml_net_private *); -+ int (*write)(int, struct sk_buff **skb, struct uml_net_private *); -+}; -+ -+struct transport { -+ struct list_head list; -+ char *name; -+ int (*setup)(char *, char **, void *); -+ struct net_user_info *user; -+ struct net_kern_info *kern; -+ int private_size; -+ int setup_size; -+}; -+ -+extern struct net_device *ether_init(int); -+extern unsigned short ether_protocol(struct sk_buff *); -+extern int setup_etheraddr(char *str, unsigned char *addr); -+extern struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra); -+extern int tap_setup_common(char *str, char *type, char **dev_name, -+ char **mac_out, char **gate_addr); -+extern void register_transport(struct transport *new); -+extern unsigned short eth_protocol(struct sk_buff *skb); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/net_user.h um/arch/um/include/net_user.h ---- orig/arch/um/include/net_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/net_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_NET_USER_H__ -+#define __UM_NET_USER_H__ -+ -+#define ETH_ADDR_LEN (6) -+#define ETH_HEADER_ETHERTAP (16) -+#define ETH_HEADER_OTHER (14) -+#define ETH_MAX_PACKET (1500) -+ -+#define UML_NET_VERSION (4) -+ -+struct net_user_info { -+ void (*init)(void *, void *); -+ int (*open)(void *); -+ void (*close)(int, void *); -+ void (*remove)(void *); -+ int (*set_mtu)(int mtu, void *); -+ void (*add_address)(unsigned char *, unsigned char *, void *); -+ void (*delete_address)(unsigned char *, unsigned char *, void *); -+ int max_packet; -+}; -+ -+extern void ether_user_init(void *data, void *dev); -+extern void dev_ip_addr(void *d, char *buf, char *bin_buf); -+extern void set_ether_mac(void *d, unsigned char *addr); -+extern void iter_addresses(void *d, void (*cb)(unsigned char *, -+ unsigned char *, void *), -+ void *arg); -+ -+extern void *get_output_buffer(int *len_out); -+extern void free_output_buffer(void *buffer); -+ -+extern int tap_open_common(void *dev, char *gate_addr); -+extern void tap_check_ips(char *gate_addr, char *eth_addr); -+ -+extern void read_output(int fd, char *output_out, int len); -+ -+extern int net_read(int fd, void *buf, int len); -+extern int net_recvfrom(int fd, void *buf, int len); -+extern int net_write(int fd, void *buf, int len); -+extern int net_send(int fd, void *buf, int len); -+extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); -+ -+extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); -+extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); -+ -+extern char *split_if_spec(char *str, ...); -+ -+extern int dev_netmask(void *d, void *m); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/os.h um/arch/um/include/os.h ---- orig/arch/um/include/os.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/os.h 2003-12-17 10:48:12.000000000 -0500 -@@ -0,0 +1,180 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __OS_H__ -+#define __OS_H__ -+ -+#include "asm/types.h" -+#include "../os/include/file.h" -+ -+#define OS_TYPE_FILE 1 -+#define OS_TYPE_DIR 2 -+#define OS_TYPE_SYMLINK 3 -+#define OS_TYPE_CHARDEV 4 -+#define OS_TYPE_BLOCKDEV 5 -+#define OS_TYPE_FIFO 6 -+#define OS_TYPE_SOCK 7 -+ -+/* os_access() flags */ -+#define OS_ACC_F_OK 0 /* Test for existence. */ -+#define OS_ACC_X_OK 1 /* Test for execute permission. */ -+#define OS_ACC_W_OK 2 /* Test for write permission. */ -+#define OS_ACC_R_OK 4 /* Test for read permission. */ -+#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ -+ -+/* -+ * types taken from stat_file() in hostfs_user.c -+ * (if they are wrong here, they are wrong there...). -+ */ -+struct uml_stat { -+ int ust_dev; /* device */ -+ unsigned long long ust_ino; /* inode */ -+ int ust_mode; /* protection */ -+ int ust_nlink; /* number of hard links */ -+ int ust_uid; /* user ID of owner */ -+ int ust_gid; /* group ID of owner */ -+ unsigned long long ust_size; /* total size, in bytes */ -+ int ust_blksize; /* blocksize for filesystem I/O */ -+ unsigned long long ust_blocks; /* number of blocks allocated */ -+ unsigned long ust_atime; /* time of last access */ -+ unsigned long ust_mtime; /* time of last modification */ -+ unsigned long ust_ctime; /* time of last change */ -+}; -+ -+struct openflags { -+ unsigned int r : 1; -+ unsigned int w : 1; -+ unsigned int s : 1; /* O_SYNC */ -+ unsigned int c : 1; /* O_CREAT */ -+ unsigned int t : 1; /* O_TRUNC */ -+ unsigned int a : 1; /* O_APPEND */ -+ unsigned int e : 1; /* O_EXCL */ -+ unsigned int cl : 1; /* FD_CLOEXEC */ -+}; -+ -+#define OPENFLAGS() ((struct openflags) { .r = 0, .w = 0, .s = 0, .c = 0, \ -+ .t = 0, .a = 0, .e = 0, .cl = 0 }) -+ -+static inline struct openflags of_read(struct openflags flags) -+{ -+ flags.r = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_write(struct openflags flags) -+{ -+ flags.w = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_rdwr(struct openflags flags) -+{ -+ return(of_read(of_write(flags))); -+} -+ -+static inline struct openflags of_set_rw(struct openflags flags, int r, int w) -+{ -+ flags.r = r; -+ flags.w = w; -+ return(flags); -+} -+ -+static inline struct openflags of_sync(struct openflags flags) -+{ -+ flags.s = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_create(struct openflags flags) -+{ -+ flags.c = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_trunc(struct openflags flags) -+{ -+ flags.t = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_append(struct openflags flags) -+{ -+ flags.a = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_excl(struct openflags flags) -+{ -+ flags.e = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_cloexec(struct openflags flags) -+{ -+ flags.cl = 1; -+ return(flags); -+} -+ -+extern int os_stat_file(const char *file_name, struct uml_stat *buf); -+extern int os_stat_fd(const int fd, struct uml_stat *buf); -+extern int os_access(const char *file, int mode); -+extern void os_print_error(int error, const char* str); -+extern int os_get_exec_close(int fd, int *close_on_exec); -+extern int os_set_exec_close(int fd, int close_on_exec); -+extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); -+extern int os_window_size(int fd, int *rows, int *cols); -+extern int os_new_tty_pgrp(int fd, int pid); -+extern int os_get_ifname(int fd, char *namebuf); -+extern int os_set_slip(int fd); -+extern int os_set_owner(int fd, int pid); -+extern int os_sigio_async(int master, int slave); -+extern int os_mode_fd(int fd, int mode); -+ -+extern int os_seek_file(int fd, __u64 offset); -+extern int os_open_file(char *file, struct openflags flags, int mode); -+extern int os_read_file(int fd, void *buf, int len); -+extern int os_write_file(int fd, const void *buf, int count); -+extern int os_file_size(char *file, long long *size_out); -+extern int os_file_modtime(char *file, unsigned long *modtime); -+extern int os_pipe(int *fd, int stream, int close_on_exec); -+extern int os_set_fd_async(int fd, int owner); -+extern int os_set_fd_block(int fd, int blocking); -+extern int os_accept_connection(int fd); -+extern int os_create_unix_socket(char *file, int len, int close_on_exec); -+extern int os_shutdown_socket(int fd, int r, int w); -+extern void os_close_file(int fd); -+extern int os_rcv_fd(int fd, int *helper_pid_out); -+extern int create_unix_socket(char *file, int len, int close_on_exec); -+extern int os_connect_socket(char *name); -+extern int os_file_type(char *file); -+extern int os_file_mode(char *file, struct openflags *mode_out); -+extern int os_lock_file(int fd, int excl); -+ -+extern unsigned long os_process_pc(int pid); -+extern int os_process_parent(int pid); -+extern void os_stop_process(int pid); -+extern void os_kill_process(int pid, int reap_child); -+extern void os_usr1_process(int pid); -+extern int os_getpid(void); -+ -+extern int os_map_memory(void *virt, int fd, unsigned long long off, -+ unsigned long len, int r, int w, int x); -+extern int os_protect_memory(void *addr, unsigned long len, -+ int r, int w, int x); -+extern int os_unmap_memory(void *addr, int len); -+extern void os_flush_stdout(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/process.h um/arch/um/include/process.h ---- orig/arch/um/include/process.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/process.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PROCESS_H__ -+#define __PROCESS_H__ -+ -+#include <asm/sigcontext.h> -+ -+extern void sig_handler(int sig, struct sigcontext sc); -+extern void alarm_handler(int sig, struct sigcontext sc); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/ptrace_user.h um/arch/um/include/ptrace_user.h ---- orig/arch/um/include/ptrace_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/ptrace_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,18 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_USER_H__ -+#define __PTRACE_USER_H__ -+ -+#include "sysdep/ptrace_user.h" -+ -+extern int ptrace_getregs(long pid, unsigned long *regs_out); -+extern int ptrace_setregs(long pid, unsigned long *regs_in); -+extern int ptrace_getfpregs(long pid, unsigned long *regs_out); -+extern void arch_enter_kernel(void *task, int pid); -+extern void arch_leave_kernel(void *task, int pid); -+extern void ptrace_pokeuser(unsigned long addr, unsigned long data); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/sigcontext.h um/arch/um/include/sigcontext.h ---- orig/arch/um/include/sigcontext.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sigcontext.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UML_SIGCONTEXT_H__ -+#define __UML_SIGCONTEXT_H__ -+ -+#include "sysdep/sigcontext.h" -+ -+extern int sc_size(void *data); -+extern void sc_to_sc(void *to_ptr, void *from_ptr); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sigio.h um/arch/um/include/sigio.h ---- orig/arch/um/include/sigio.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sigio.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGIO_H__ -+#define __SIGIO_H__ -+ -+extern int write_sigio_irq(int fd); -+extern int register_sigio_fd(int fd); -+extern int read_sigio_fd(int fd); -+extern int add_sigio_fd(int fd, int read); -+extern int ignore_sigio_fd(int fd); -+extern void sigio_lock(void); -+extern void sigio_unlock(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/signal_kern.h um/arch/um/include/signal_kern.h ---- orig/arch/um/include/signal_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/signal_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGNAL_KERN_H__ -+#define __SIGNAL_KERN_H__ -+ -+extern int have_signals(void *t); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/signal_user.h um/arch/um/include/signal_user.h ---- orig/arch/um/include/signal_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/signal_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGNAL_USER_H__ -+#define __SIGNAL_USER_H__ -+ -+extern int signal_stack_size; -+ -+extern int change_sig(int signal, int on); -+extern void set_sigstack(void *stack, int size); -+extern void set_handler(int sig, void (*handler)(int), int flags, ...); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/skas_ptrace.h um/arch/um/include/skas_ptrace.h ---- orig/arch/um/include/skas_ptrace.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/skas_ptrace.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_PTRACE_H -+#define __SKAS_PTRACE_H -+ -+struct ptrace_faultinfo { -+ int is_write; -+ unsigned long addr; -+}; -+ -+struct ptrace_ldt { -+ int func; -+ void *ptr; -+ unsigned long bytecount; -+}; -+ -+#define PTRACE_FAULTINFO 52 -+#define PTRACE_SIGPENDING 53 -+#define PTRACE_LDT 54 -+#define PTRACE_SWITCH_MM 55 -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/syscall_user.h um/arch/um/include/syscall_user.h ---- orig/arch/um/include/syscall_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/syscall_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSCALL_USER_H -+#define __SYSCALL_USER_H -+ -+extern int record_syscall_start(int syscall); -+extern void record_syscall_end(int index, int result); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/checksum.h um/arch/um/include/sysdep-i386/checksum.h ---- orig/arch/um/include/sysdep-i386/checksum.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/checksum.h 2004-01-13 23:40:05.000000000 -0500 -@@ -0,0 +1,218 @@ -+/* -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SYSDEP_CHECKSUM_H -+#define __UM_SYSDEP_CHECKSUM_H -+ -+#include "linux/string.h" -+#include "asm/uaccess.h" -+ -+/* -+ * computes the checksum of a memory block at buff, length len, -+ * and adds in "sum" (32-bit) -+ * -+ * returns a 32-bit number suitable for feeding into itself -+ * or csum_tcpudp_magic -+ * -+ * this function must be called with even lengths, except -+ * for the last fragment, which may be odd -+ * -+ * it's best to have buff aligned on a 32-bit boundary -+ */ -+unsigned int csum_partial(const unsigned char * buff, int len, -+ unsigned int sum); -+ -+/* -+ * the same as csum_partial, but copies from src while it -+ * checksums, and handles user-space pointer exceptions correctly, when needed. -+ * -+ * here even more important to align src and dst on a 32-bit (or even -+ * better 64-bit) boundary -+ */ -+ -+unsigned int csum_partial_copy_to(const char *src, char *dst, int len, -+ int sum, int *err_ptr); -+unsigned int csum_partial_copy_from(const char *src, char *dst, int len, -+ int sum, int *err_ptr); -+ -+/* -+ * Note: when you get a NULL pointer exception here this means someone -+ * passed in an incorrect kernel address to one of these functions. -+ * -+ * If you use these functions directly please don't forget the -+ * verify_area(). -+ */ -+ -+static __inline__ -+unsigned int csum_partial_copy_nocheck(const char *src, char *dst, -+ int len, int sum) -+{ -+ memcpy(dst, src, len); -+ return(csum_partial(dst, len, sum)); -+} -+ -+static __inline__ -+unsigned int csum_partial_copy_from_user(const char *src, char *dst, -+ int len, int sum, int *err_ptr) -+{ -+ return csum_partial_copy_from(src, dst, len, sum, err_ptr); -+} -+ -+/* -+ * These are the old (and unsafe) way of doing checksums, a warning message -+ * will be printed if they are used and an exception occurs. -+ * -+ * these functions should go away after some time. -+ */ -+ -+#define csum_partial_copy_fromuser csum_partial_copy_from_user -+unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum); -+ -+/* -+ * This is a version of ip_compute_csum() optimized for IP headers, -+ * which always checksum on 4 octet boundaries. -+ * -+ * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by -+ * Arnt Gulbrandsen. -+ */ -+static inline unsigned short ip_fast_csum(unsigned char * iph, -+ unsigned int ihl) -+{ -+ unsigned int sum; -+ -+ __asm__ __volatile__( -+ "movl (%1), %0 ;\n" -+ "subl $4, %2 ;\n" -+ "jbe 2f ;\n" -+ "addl 4(%1), %0 ;\n" -+ "adcl 8(%1), %0 ;\n" -+ "adcl 12(%1), %0 ;\n" -+"1: adcl 16(%1), %0 ;\n" -+ "lea 4(%1), %1 ;\n" -+ "decl %2 ;\n" -+ "jne 1b ;\n" -+ "adcl $0, %0 ;\n" -+ "movl %0, %2 ;\n" -+ "shrl $16, %0 ;\n" -+ "addw %w2, %w0 ;\n" -+ "adcl $0, %0 ;\n" -+ "notl %0 ;\n" -+"2: ;\n" -+ /* Since the input registers which are loaded with iph and ipl -+ are modified, we must also specify them as outputs, or gcc -+ will assume they contain their original values. */ -+ : "=r" (sum), "=r" (iph), "=r" (ihl) -+ : "1" (iph), "2" (ihl)); -+ return(sum); -+} -+ -+/* -+ * Fold a partial checksum -+ */ -+ -+static inline unsigned int csum_fold(unsigned int sum) -+{ -+ __asm__( -+ "addl %1, %0 ;\n" -+ "adcl $0xffff, %0 ;\n" -+ : "=r" (sum) -+ : "r" (sum << 16), "0" (sum & 0xffff0000) -+ ); -+ return (~sum) >> 16; -+} -+ -+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, -+ unsigned long daddr, -+ unsigned short len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ __asm__( -+ "addl %1, %0 ;\n" -+ "adcl %2, %0 ;\n" -+ "adcl %3, %0 ;\n" -+ "adcl $0, %0 ;\n" -+ : "=r" (sum) -+ : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); -+ return sum; -+} -+ -+/* -+ * computes the checksum of the TCP/UDP pseudo-header -+ * returns a 16-bit checksum, already complemented -+ */ -+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, -+ unsigned long daddr, -+ unsigned short len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -+} -+ -+/* -+ * this routine is used for miscellaneous IP-like checksums, mainly -+ * in icmp.c -+ */ -+ -+static inline unsigned short ip_compute_csum(unsigned char * buff, int len) -+{ -+ return csum_fold (csum_partial(buff, len, 0)); -+} -+ -+#define _HAVE_ARCH_IPV6_CSUM -+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, -+ struct in6_addr *daddr, -+ __u32 len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ __asm__( -+ "addl 0(%1), %0 ;\n" -+ "adcl 4(%1), %0 ;\n" -+ "adcl 8(%1), %0 ;\n" -+ "adcl 12(%1), %0 ;\n" -+ "adcl 0(%2), %0 ;\n" -+ "adcl 4(%2), %0 ;\n" -+ "adcl 8(%2), %0 ;\n" -+ "adcl 12(%2), %0 ;\n" -+ "adcl %3, %0 ;\n" -+ "adcl %4, %0 ;\n" -+ "adcl $0, %0 ;\n" -+ : "=&r" (sum) -+ : "r" (saddr), "r" (daddr), -+ "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); -+ -+ return csum_fold(sum); -+} -+ -+/* -+ * Copy and checksum to user -+ */ -+#define HAVE_CSUM_COPY_USER -+static __inline__ unsigned int csum_and_copy_to_user(const char *src, -+ char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if (access_ok(VERIFY_WRITE, dst, len)) -+ return(csum_partial_copy_to(src, dst, len, sum, err_ptr)); -+ -+ if (len) -+ *err_ptr = -EFAULT; -+ -+ return -1; /* invalid checksum */ -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/frame.h um/arch/um/include/sysdep-i386/frame.h ---- orig/arch/um/include/sysdep-i386/frame.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/frame.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_I386_H -+#define __FRAME_I386_H -+ -+struct arch_frame_data_raw { -+ unsigned long fp_start; -+ unsigned long sr; -+}; -+ -+struct arch_frame_data { -+ int fpstate_size; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/frame_kern.h um/arch/um/include/sysdep-i386/frame_kern.h ---- orig/arch/um/include/sysdep-i386/frame_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/frame_kern.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_KERN_I386_H -+#define __FRAME_KERN_I386_H -+ -+/* This is called from sys_sigreturn. It takes the sp at the point of the -+ * sigreturn system call and returns the address of the sigcontext struct -+ * on the stack. -+ */ -+ -+static inline void *sp_to_sc(unsigned long sp) -+{ -+ return((void *) sp); -+} -+ -+static inline void *sp_to_uc(unsigned long sp) -+{ -+ unsigned long uc; -+ -+ uc = sp + signal_frame_si.uc_index - -+ signal_frame_si.common.sp_index - 4; -+ return((void *) uc); -+} -+ -+static inline void *sp_to_rt_sc(unsigned long sp) -+{ -+ unsigned long sc; -+ -+ sc = sp - signal_frame_si.common.sp_index + -+ signal_frame_si.common.len - 4; -+ return((void *) sc); -+} -+ -+static inline void *sp_to_mask(unsigned long sp) -+{ -+ unsigned long mask; -+ -+ mask = sp - signal_frame_sc.common.sp_index + -+ signal_frame_sc.common.len - 8; -+ return((void *) mask); -+} -+ -+extern int sc_size(void *data); -+ -+static inline void *sp_to_rt_mask(unsigned long sp) -+{ -+ unsigned long mask; -+ -+ mask = sp - signal_frame_si.common.sp_index + -+ signal_frame_si.common.len + -+ sc_size(&signal_frame_si.common.arch) - 4; -+ return((void *) mask); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/frame_user.h um/arch/um/include/sysdep-i386/frame_user.h ---- orig/arch/um/include/sysdep-i386/frame_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/frame_user.h 2004-01-14 03:54:47.000000000 -0500 -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_USER_I386_H -+#define __FRAME_USER_I386_H -+ -+#include <asm/page.h> -+#include "sysdep/frame.h" -+ -+/* This stuff is to calculate the size of the fp state struct at runtime -+ * because it has changed between 2.2 and 2.4 and it would be good for a -+ * UML compiled on one to work on the other. -+ * So, setup_arch_frame_raw fills in the arch struct with the raw data, which -+ * just contains the address of the end of the sigcontext. This is invoked -+ * from the signal handler. -+ * setup_arch_frame uses that data to figure out what -+ * arch_frame_data.fpstate_size should be. It really has no idea, since it's -+ * not allowed to do sizeof(struct fpstate) but it's safe to consider that it's -+ * everything from the end of the sigcontext up to the top of the stack. So, -+ * it masks off the page number to get the offset within the page and subtracts -+ * that from the page size, and that's how big the fpstate struct will be -+ * considered to be. -+ */ -+ -+static inline void setup_arch_frame_raw(struct arch_frame_data_raw *data, -+ void *end, unsigned long srp) -+{ -+ unsigned long sr = *((unsigned long *) srp); -+ -+ data->fp_start = (unsigned long) end; -+ if((sr & PAGE_MASK) == ((unsigned long) end & PAGE_MASK)) -+ data->sr = sr; -+ else data->sr = 0; -+} -+ -+static inline void setup_arch_frame(struct arch_frame_data_raw *in, -+ struct arch_frame_data *out) -+{ -+ unsigned long fpstate_start = in->fp_start; -+ -+ if(in->sr == 0){ -+ fpstate_start &= ~PAGE_MASK; -+ out->fpstate_size = PAGE_SIZE - fpstate_start; -+ } -+ else { -+ out->fpstate_size = in->sr - fpstate_start; -+ } -+} -+ -+/* This figures out where on the stack the SA_RESTORER function address -+ * is stored. For i386, it's the signal handler return address, so it's -+ * located next to the frame pointer. -+ * This is inlined, so __builtin_frame_address(0) is correct. Otherwise, -+ * it would have to be __builtin_frame_address(1). -+ */ -+ -+#define frame_restorer() \ -+({ \ -+ unsigned long *fp; \ -+\ -+ fp = __builtin_frame_address(0); \ -+ ((unsigned long) (fp + 1)); \ -+}) -+ -+/* Similarly, this returns the value of sp when the handler was first -+ * entered. This is used to calculate the proper sp when delivering -+ * signals. -+ */ -+ -+#define frame_sp() \ -+({ \ -+ unsigned long *fp; \ -+\ -+ fp = __builtin_frame_address(0); \ -+ ((unsigned long) (fp + 1)); \ -+}) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/ptrace.h um/arch/um/include/sysdep-i386/ptrace.h ---- orig/arch/um/include/sysdep-i386/ptrace.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/ptrace.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,193 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_I386_PTRACE_H -+#define __SYSDEP_I386_PTRACE_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "ptrace-tt.h" -+#endif -+ -+#ifdef UML_CONFIG_MODE_SKAS -+#include "ptrace-skas.h" -+#endif -+ -+#include "choose-mode.h" -+ -+union uml_pt_regs { -+#ifdef UML_CONFIG_MODE_TT -+ struct tt_regs { -+ long syscall; -+ void *sc; -+ } tt; -+#endif -+#ifdef UML_CONFIG_MODE_SKAS -+ struct skas_regs { -+ unsigned long regs[HOST_FRAME_SIZE]; -+ unsigned long fp[HOST_FP_SIZE]; -+ unsigned long xfp[HOST_XFP_SIZE]; -+ unsigned long fault_addr; -+ unsigned long fault_type; -+ unsigned long trap_type; -+ long syscall; -+ int is_user; -+ } skas; -+#endif -+}; -+ -+#define EMPTY_UML_PT_REGS { } -+ -+extern int mode_tt; -+ -+#define UPT_SC(r) ((r)->tt.sc) -+#define UPT_IP(r) \ -+ CHOOSE_MODE(SC_IP(UPT_SC(r)), REGS_IP((r)->skas.regs)) -+#define UPT_SP(r) \ -+ CHOOSE_MODE(SC_SP(UPT_SC(r)), REGS_SP((r)->skas.regs)) -+#define UPT_EFLAGS(r) \ -+ CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs)) -+#define UPT_EAX(r) \ -+ CHOOSE_MODE(SC_EAX(UPT_SC(r)), REGS_EAX((r)->skas.regs)) -+#define UPT_EBX(r) \ -+ CHOOSE_MODE(SC_EBX(UPT_SC(r)), REGS_EBX((r)->skas.regs)) -+#define UPT_ECX(r) \ -+ CHOOSE_MODE(SC_ECX(UPT_SC(r)), REGS_ECX((r)->skas.regs)) -+#define UPT_EDX(r) \ -+ CHOOSE_MODE(SC_EDX(UPT_SC(r)), REGS_EDX((r)->skas.regs)) -+#define UPT_ESI(r) \ -+ CHOOSE_MODE(SC_ESI(UPT_SC(r)), REGS_ESI((r)->skas.regs)) -+#define UPT_EDI(r) \ -+ CHOOSE_MODE(SC_EDI(UPT_SC(r)), REGS_EDI((r)->skas.regs)) -+#define UPT_EBP(r) \ -+ CHOOSE_MODE(SC_EBP(UPT_SC(r)), REGS_EBP((r)->skas.regs)) -+#define UPT_ORIG_EAX(r) \ -+ CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall) -+#define UPT_CS(r) \ -+ CHOOSE_MODE(SC_CS(UPT_SC(r)), REGS_CS((r)->skas.regs)) -+#define UPT_SS(r) \ -+ CHOOSE_MODE(SC_SS(UPT_SC(r)), REGS_SS((r)->skas.regs)) -+#define UPT_DS(r) \ -+ CHOOSE_MODE(SC_DS(UPT_SC(r)), REGS_DS((r)->skas.regs)) -+#define UPT_ES(r) \ -+ CHOOSE_MODE(SC_ES(UPT_SC(r)), REGS_ES((r)->skas.regs)) -+#define UPT_FS(r) \ -+ CHOOSE_MODE(SC_FS(UPT_SC(r)), REGS_FS((r)->skas.regs)) -+#define UPT_GS(r) \ -+ CHOOSE_MODE(SC_GS(UPT_SC(r)), REGS_GS((r)->skas.regs)) -+ -+#define UPT_SYSCALL_ARG1(r) UPT_EBX(r) -+#define UPT_SYSCALL_ARG2(r) UPT_ECX(r) -+#define UPT_SYSCALL_ARG3(r) UPT_EDX(r) -+#define UPT_SYSCALL_ARG4(r) UPT_ESI(r) -+#define UPT_SYSCALL_ARG5(r) UPT_EDI(r) -+#define UPT_SYSCALL_ARG6(r) UPT_EBP(r) -+ -+extern int user_context(unsigned long sp); -+ -+#define UPT_IS_USER(r) \ -+ CHOOSE_MODE(user_context(UPT_SP(r)), (r)->skas.is_user) -+ -+struct syscall_args { -+ unsigned long args[6]; -+}; -+ -+#define SYSCALL_ARGS(r) ((struct syscall_args) \ -+ { .args = { UPT_SYSCALL_ARG1(r), \ -+ UPT_SYSCALL_ARG2(r), \ -+ UPT_SYSCALL_ARG3(r), \ -+ UPT_SYSCALL_ARG4(r), \ -+ UPT_SYSCALL_ARG5(r), \ -+ UPT_SYSCALL_ARG6(r) } } ) -+ -+#define UPT_REG(regs, reg) \ -+ ({ unsigned long val; \ -+ switch(reg){ \ -+ case EIP: val = UPT_IP(regs); break; \ -+ case UESP: val = UPT_SP(regs); break; \ -+ case EAX: val = UPT_EAX(regs); break; \ -+ case EBX: val = UPT_EBX(regs); break; \ -+ case ECX: val = UPT_ECX(regs); break; \ -+ case EDX: val = UPT_EDX(regs); break; \ -+ case ESI: val = UPT_ESI(regs); break; \ -+ case EDI: val = UPT_EDI(regs); break; \ -+ case EBP: val = UPT_EBP(regs); break; \ -+ case ORIG_EAX: val = UPT_ORIG_EAX(regs); break; \ -+ case CS: val = UPT_CS(regs); break; \ -+ case SS: val = UPT_SS(regs); break; \ -+ case DS: val = UPT_DS(regs); break; \ -+ case ES: val = UPT_ES(regs); break; \ -+ case FS: val = UPT_FS(regs); break; \ -+ case GS: val = UPT_GS(regs); break; \ -+ case EFL: val = UPT_EFLAGS(regs); break; \ -+ default : \ -+ panic("Bad register in UPT_REG : %d\n", reg); \ -+ val = -1; \ -+ } \ -+ val; \ -+ }) -+ -+ -+#define UPT_SET(regs, reg, val) \ -+ do { \ -+ switch(reg){ \ -+ case EIP: UPT_IP(regs) = val; break; \ -+ case UESP: UPT_SP(regs) = val; break; \ -+ case EAX: UPT_EAX(regs) = val; break; \ -+ case EBX: UPT_EBX(regs) = val; break; \ -+ case ECX: UPT_ECX(regs) = val; break; \ -+ case EDX: UPT_EDX(regs) = val; break; \ -+ case ESI: UPT_ESI(regs) = val; break; \ -+ case EDI: UPT_EDI(regs) = val; break; \ -+ case EBP: UPT_EBP(regs) = val; break; \ -+ case ORIG_EAX: UPT_ORIG_EAX(regs) = val; break; \ -+ case CS: UPT_CS(regs) = val; break; \ -+ case SS: UPT_SS(regs) = val; break; \ -+ case DS: UPT_DS(regs) = val; break; \ -+ case ES: UPT_ES(regs) = val; break; \ -+ case FS: UPT_FS(regs) = val; break; \ -+ case GS: UPT_GS(regs) = val; break; \ -+ case EFL: UPT_EFLAGS(regs) = val; break; \ -+ default : \ -+ panic("Bad register in UPT_SET : %d\n", reg); \ -+ break; \ -+ } \ -+ } while (0) -+ -+#define UPT_SET_SYSCALL_RETURN(r, res) \ -+ CHOOSE_MODE(SC_SET_SYSCALL_RETURN(UPT_SC(r), (res)), \ -+ REGS_SET_SYSCALL_RETURN((r)->skas.regs, (res))) -+ -+#define UPT_RESTART_SYSCALL(r) \ -+ CHOOSE_MODE(SC_RESTART_SYSCALL(UPT_SC(r)), \ -+ REGS_RESTART_SYSCALL((r)->skas.regs)) -+ -+#define UPT_ORIG_SYSCALL(r) UPT_EAX(r) -+#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r) -+#define UPT_SYSCALL_RET(r) UPT_EAX(r) -+ -+#define UPT_SEGV_IS_FIXABLE(r) \ -+ CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \ -+ REGS_SEGV_IS_FIXABLE(&r->skas)) -+ -+#define UPT_FAULT_ADDR(r) \ -+ CHOOSE_MODE(SC_FAULT_ADDR(UPT_SC(r)), REGS_FAULT_ADDR(&r->skas)) -+ -+#define UPT_FAULT_WRITE(r) \ -+ CHOOSE_MODE(SC_FAULT_WRITE(UPT_SC(r)), REGS_FAULT_WRITE(&r->skas)) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/ptrace_user.h um/arch/um/include/sysdep-i386/ptrace_user.h ---- orig/arch/um/include/sysdep-i386/ptrace_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/ptrace_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_I386_PTRACE_USER_H__ -+#define __SYSDEP_I386_PTRACE_USER_H__ -+ -+#include <asm/ptrace.h> -+ -+#define PT_OFFSET(r) ((r) * sizeof(long)) -+ -+#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX]) -+#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX) -+ -+#define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX) -+#define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX) -+#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) -+#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI) -+#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI) -+ -+#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) -+ -+#define PT_IP_OFFSET PT_OFFSET(EIP) -+#define PT_IP(regs) ((regs)[EIP]) -+#define PT_SP(regs) ((regs)[UESP]) -+ -+#ifndef FRAME_SIZE -+#define FRAME_SIZE (17) -+#endif -+#define FRAME_SIZE_OFFSET (FRAME_SIZE * sizeof(unsigned long)) -+ -+#define FP_FRAME_SIZE (27) -+#define FPX_FRAME_SIZE (128) -+ -+#ifdef PTRACE_GETREGS -+#define UM_HAVE_GETREGS -+#endif -+ -+#ifdef PTRACE_SETREGS -+#define UM_HAVE_SETREGS -+#endif -+ -+#ifdef PTRACE_GETFPREGS -+#define UM_HAVE_GETFPREGS -+#endif -+ -+#ifdef PTRACE_SETFPREGS -+#define UM_HAVE_SETFPREGS -+#endif -+ -+#ifdef PTRACE_GETFPXREGS -+#define UM_HAVE_GETFPXREGS -+#endif -+ -+#ifdef PTRACE_SETFPXREGS -+#define UM_HAVE_SETFPXREGS -+#endif -+ -+extern void update_debugregs(int seq); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/sigcontext.h um/arch/um/include/sysdep-i386/sigcontext.h ---- orig/arch/um/include/sysdep-i386/sigcontext.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/sigcontext.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_SIGCONTEXT_I386_H -+#define __SYS_SIGCONTEXT_I386_H -+ -+#include "sc.h" -+ -+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2) -+ -+#define SC_RESTART_SYSCALL(sc) IP_RESTART_SYSCALL(SC_IP(sc)) -+#define SC_SET_SYSCALL_RETURN(sc, result) SC_EAX(sc) = (result) -+ -+#define SC_FAULT_ADDR(sc) SC_CR2(sc) -+#define SC_FAULT_TYPE(sc) SC_ERR(sc) -+ -+#define FAULT_WRITE(err) (err & 2) -+#define TO_SC_ERR(is_write) ((is_write) ? 2 : 0) -+ -+#define SC_FAULT_WRITE(sc) (FAULT_WRITE(SC_ERR(sc))) -+ -+#define SC_TRAP_TYPE(sc) SC_TRAPNO(sc) -+ -+/* ptrace expects that, at the start of a system call, %eax contains -+ * -ENOSYS, so this makes it so. -+ */ -+#define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) -+ -+/* This is Page Fault */ -+#define SEGV_IS_FIXABLE(trap) (trap == 14) -+ -+#define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) -+ -+extern unsigned long *sc_sigmask(void *sc_ptr); -+extern int sc_get_fpregs(unsigned long buf, void *sc_ptr); -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/syscalls.h um/arch/um/include/sysdep-i386/syscalls.h ---- orig/arch/um/include/sysdep-i386/syscalls.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-i386/syscalls.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/unistd.h" -+#include "sysdep/ptrace.h" -+ -+typedef long syscall_handler_t(struct pt_regs); -+ -+#define EXECUTE_SYSCALL(syscall, regs) \ -+ ((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) -+ -+extern syscall_handler_t sys_modify_ldt; -+extern syscall_handler_t old_mmap_i386; -+extern syscall_handler_t old_select; -+extern syscall_handler_t sys_ni_syscall; -+ -+#define ARCH_SYSCALLS \ -+ [ __NR_mmap ] = old_mmap_i386, \ -+ [ __NR_select ] = old_select, \ -+ [ __NR_vm86old ] = sys_ni_syscall, \ -+ [ __NR_modify_ldt ] = sys_modify_ldt, \ -+ [ __NR_lchown32 ] = sys_lchown, \ -+ [ __NR_getuid32 ] = sys_getuid, \ -+ [ __NR_getgid32 ] = sys_getgid, \ -+ [ __NR_geteuid32 ] = sys_geteuid, \ -+ [ __NR_getegid32 ] = sys_getegid, \ -+ [ __NR_setreuid32 ] = sys_setreuid, \ -+ [ __NR_setregid32 ] = sys_setregid, \ -+ [ __NR_getgroups32 ] = sys_getgroups, \ -+ [ __NR_setgroups32 ] = sys_setgroups, \ -+ [ __NR_fchown32 ] = sys_fchown, \ -+ [ __NR_setresuid32 ] = sys_setresuid, \ -+ [ __NR_getresuid32 ] = sys_getresuid, \ -+ [ __NR_setresgid32 ] = sys_setresgid, \ -+ [ __NR_getresgid32 ] = sys_getresgid, \ -+ [ __NR_chown32 ] = sys_chown, \ -+ [ __NR_setuid32 ] = sys_setuid, \ -+ [ __NR_setgid32 ] = sys_setgid, \ -+ [ __NR_setfsuid32 ] = sys_setfsuid, \ -+ [ __NR_setfsgid32 ] = sys_setfsgid, \ -+ [ __NR_pivot_root ] = sys_pivot_root, \ -+ [ __NR_mincore ] = sys_mincore, \ -+ [ __NR_madvise ] = sys_madvise, \ -+ [ 222 ] = sys_ni_syscall, -+ -+/* 222 doesn't yet have a name in include/asm-i386/unistd.h */ -+ -+#define LAST_ARCH_SYSCALL 222 -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ia64/ptrace.h um/arch/um/include/sysdep-ia64/ptrace.h ---- orig/arch/um/include/sysdep-ia64/ptrace.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-ia64/ptrace.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_PTRACE_H -+#define __SYSDEP_IA64_PTRACE_H -+ -+struct sys_pt_regs { -+ int foo; -+}; -+ -+#define EMPTY_REGS { 0 } -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ia64/sigcontext.h um/arch/um/include/sysdep-ia64/sigcontext.h ---- orig/arch/um/include/sysdep-ia64/sigcontext.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-ia64/sigcontext.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_SIGCONTEXT_H -+#define __SYSDEP_IA64_SIGCONTEXT_H -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ia64/syscalls.h um/arch/um/include/sysdep-ia64/syscalls.h ---- orig/arch/um/include/sysdep-ia64/syscalls.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-ia64/syscalls.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_SYSCALLS_H -+#define __SYSDEP_IA64_SYSCALLS_H -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ppc/ptrace.h um/arch/um/include/sysdep-ppc/ptrace.h ---- orig/arch/um/include/sysdep-ppc/ptrace.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-ppc/ptrace.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,104 @@ -+/* -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_PTRACE_PPC_H -+#define __SYS_PTRACE_PPC_H -+ -+#include "linux/config.h" -+#include "linux/types.h" -+ -+/* the following taken from <asm-ppc/ptrace.h> */ -+ -+#ifdef CONFIG_PPC64 -+#define PPC_REG unsigned long /*long*/ -+#else -+#define PPC_REG unsigned long -+#endif -+struct sys_pt_regs_s { -+ PPC_REG gpr[32]; -+ PPC_REG nip; -+ PPC_REG msr; -+ PPC_REG orig_gpr3; /* Used for restarting system calls */ -+ PPC_REG ctr; -+ PPC_REG link; -+ PPC_REG xer; -+ PPC_REG ccr; -+ PPC_REG mq; /* 601 only (not used at present) */ -+ /* Used on APUS to hold IPL value. */ -+ PPC_REG trap; /* Reason for being here */ -+ PPC_REG dar; /* Fault registers */ -+ PPC_REG dsisr; -+ PPC_REG result; /* Result of a system call */ -+}; -+ -+#define NUM_REGS (sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)) -+ -+struct sys_pt_regs { -+ PPC_REG regs[sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)]; -+}; -+ -+#define UM_MAX_REG (PT_FPR0) -+#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(PPC_REG)) -+ -+#define EMPTY_REGS { { [ 0 ... NUM_REGS - 1] = 0 } } -+ -+#define UM_REG(r, n) ((r)->regs[n]) -+ -+#define UM_SYSCALL_RET(r) UM_REG(r, PT_R3) -+#define UM_SP(r) UM_REG(r, PT_R1) -+#define UM_IP(r) UM_REG(r, PT_NIP) -+#define UM_ELF_ZERO(r) UM_REG(r, PT_FPSCR) -+#define UM_SYSCALL_NR(r) UM_REG(r, PT_R0) -+#define UM_SYSCALL_ARG1(r) UM_REG(r, PT_ORIG_R3) -+#define UM_SYSCALL_ARG2(r) UM_REG(r, PT_R4) -+#define UM_SYSCALL_ARG3(r) UM_REG(r, PT_R5) -+#define UM_SYSCALL_ARG4(r) UM_REG(r, PT_R6) -+#define UM_SYSCALL_ARG5(r) UM_REG(r, PT_R7) -+#define UM_SYSCALL_ARG6(r) UM_REG(r, PT_R8) -+ -+#define UM_SYSCALL_NR_OFFSET (PT_R0 * sizeof(PPC_REG)) -+#define UM_SYSCALL_RET_OFFSET (PT_R3 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG1_OFFSET (PT_R3 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG2_OFFSET (PT_R4 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG3_OFFSET (PT_R5 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG4_OFFSET (PT_R6 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG5_OFFSET (PT_R7 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG6_OFFSET (PT_R8 * sizeof(PPC_REG)) -+#define UM_SP_OFFSET (PT_R1 * sizeof(PPC_REG)) -+#define UM_IP_OFFSET (PT_NIP * sizeof(PPC_REG)) -+#define UM_ELF_ZERO_OFFSET (PT_R3 * sizeof(PPC_REG)) -+ -+#define UM_SET_SYSCALL_RETURN(_regs, result) \ -+do { \ -+ if (result < 0) { \ -+ (_regs)->regs[PT_CCR] |= 0x10000000; \ -+ UM_SYSCALL_RET((_regs)) = -result; \ -+ } else { \ -+ UM_SYSCALL_RET((_regs)) = result; \ -+ } \ -+} while(0) -+ -+extern void shove_aux_table(unsigned long sp); -+#define UM_FIX_EXEC_STACK(sp) shove_aux_table(sp); -+ -+/* These aren't actually defined. The undefs are just to make sure -+ * everyone's clear on the concept. -+ */ -+#undef UML_HAVE_GETREGS -+#undef UML_HAVE_GETFPREGS -+#undef UML_HAVE_SETREGS -+#undef UML_HAVE_SETFPREGS -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ppc/sigcontext.h um/arch/um/include/sysdep-ppc/sigcontext.h ---- orig/arch/um/include/sysdep-ppc/sigcontext.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-ppc/sigcontext.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_SIGCONTEXT_PPC_H -+#define __SYS_SIGCONTEXT_PPC_H -+ -+#define DSISR_WRITE 0x02000000 -+ -+#define SC_FAULT_ADDR(sc) ({ \ -+ struct sigcontext *_sc = (sc); \ -+ long retval = -1; \ -+ switch (_sc->regs->trap) { \ -+ case 0x300: \ -+ /* data exception */ \ -+ retval = _sc->regs->dar; \ -+ break; \ -+ case 0x400: \ -+ /* instruction exception */ \ -+ retval = _sc->regs->nip; \ -+ break; \ -+ default: \ -+ panic("SC_FAULT_ADDR: unhandled trap type\n"); \ -+ } \ -+ retval; \ -+ }) -+ -+#define SC_FAULT_WRITE(sc) ({ \ -+ struct sigcontext *_sc = (sc); \ -+ long retval = -1; \ -+ switch (_sc->regs->trap) { \ -+ case 0x300: \ -+ /* data exception */ \ -+ retval = !!(_sc->regs->dsisr & DSISR_WRITE); \ -+ break; \ -+ case 0x400: \ -+ /* instruction exception: not a write */ \ -+ retval = 0; \ -+ break; \ -+ default: \ -+ panic("SC_FAULT_ADDR: unhandled trap type\n"); \ -+ } \ -+ retval; \ -+ }) -+ -+#define SC_IP(sc) ((sc)->regs->nip) -+#define SC_SP(sc) ((sc)->regs->gpr[1]) -+#define SEGV_IS_FIXABLE(sc) (1) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ppc/syscalls.h um/arch/um/include/sysdep-ppc/syscalls.h ---- orig/arch/um/include/sysdep-ppc/syscalls.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysdep-ppc/syscalls.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,50 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+typedef long syscall_handler_t(unsigned long arg1, unsigned long arg2, -+ unsigned long arg3, unsigned long arg4, -+ unsigned long arg5, unsigned long arg6); -+ -+#define EXECUTE_SYSCALL(syscall, regs) \ -+ (*sys_call_table[syscall])(UM_SYSCALL_ARG1(®s), \ -+ UM_SYSCALL_ARG2(®s), \ -+ UM_SYSCALL_ARG3(®s), \ -+ UM_SYSCALL_ARG4(®s), \ -+ UM_SYSCALL_ARG5(®s), \ -+ UM_SYSCALL_ARG6(®s)) -+ -+extern syscall_handler_t sys_mincore; -+extern syscall_handler_t sys_madvise; -+ -+/* old_mmap needs the correct prototype since syscall_kern.c includes -+ * this file. -+ */ -+int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset); -+ -+#define ARCH_SYSCALLS \ -+ [ __NR_modify_ldt ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_read ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_write ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_iobase ] = sys_ni_syscall, \ -+ [ __NR_pivot_root ] = sys_ni_syscall, \ -+ [ __NR_multiplexer ] = sys_ni_syscall, \ -+ [ __NR_mmap ] = old_mmap, \ -+ [ __NR_madvise ] = sys_madvise, \ -+ [ __NR_mincore ] = sys_mincore, -+ -+#define LAST_ARCH_SYSCALL __NR_mincore -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysrq.h um/arch/um/include/sysrq.h ---- orig/arch/um/include/sysrq.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/sysrq.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SYSRQ_H -+#define __UM_SYSRQ_H -+ -+extern void show_trace(unsigned long *stack); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/tempfile.h um/arch/um/include/tempfile.h ---- orig/arch/um/include/tempfile.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/tempfile.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,21 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TEMPFILE_H__ -+#define __TEMPFILE_H__ -+ -+extern int make_tempfile(const char *template, char **tempname, int do_unlink); -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/time_user.h um/arch/um/include/time_user.h ---- orig/arch/um/include/time_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/time_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,17 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TIME_USER_H__ -+#define __TIME_USER_H__ -+ -+extern void timer(void); -+extern void switch_timers(int to_real); -+extern void set_interval(int timer_type); -+extern void idle_sleep(int secs); -+extern void enable_timer(void); -+extern unsigned long time_lock(void); -+extern void time_unlock(unsigned long); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/tlb.h um/arch/um/include/tlb.h ---- orig/arch/um/include/tlb.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/tlb.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TLB_H__ -+#define __TLB_H__ -+ -+extern void mprotect_kernel_vm(int w); -+extern void force_flush_all(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/ubd_user.h um/arch/um/include/ubd_user.h ---- orig/arch/um/include/ubd_user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/ubd_user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,79 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_UBD_USER_H -+#define __UM_UBD_USER_H -+ -+#include "os.h" -+ -+enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; -+ -+struct io_thread_req { -+ enum ubd_req op; -+ int fds[2]; -+ unsigned long offsets[2]; -+ unsigned long long offset; -+ unsigned long length; -+ char *buffer; -+ int sectorsize; -+ unsigned long sector_mask; -+ unsigned long long cow_offset; -+ unsigned long bitmap_words[2]; -+ int map_fd; -+ unsigned long long map_offset; -+ int error; -+}; -+ -+extern int open_ubd_file(char *file, struct openflags *openflags, -+ char **backing_file_out, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out, -+ int *create_cow_out); -+extern int create_cow_file(char *cow_file, char *backing_file, -+ struct openflags flags, int sectorsize, -+ int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, -+ int *data_offset_out); -+extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -+extern int read_ubd_fs(int fd, void *buffer, int len); -+extern int write_ubd_fs(int fd, char *buffer, int len); -+extern int start_io_thread(unsigned long sp, int *fds_out); -+extern void do_io(struct io_thread_req *req); -+ -+static inline int ubd_test_bit(__u64 bit, unsigned char *data) -+{ -+ __u64 n; -+ int bits, off; -+ -+ bits = sizeof(data[0]) * 8; -+ n = bit / bits; -+ off = bit % bits; -+ return((data[n] & (1 << off)) != 0); -+} -+ -+static inline void ubd_set_bit(__u64 bit, unsigned char *data) -+{ -+ __u64 n; -+ int bits, off; -+ -+ bits = sizeof(data[0]) * 8; -+ n = bit / bits; -+ off = bit % bits; -+ data[n] |= (1 << off); -+} -+ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/umid.h um/arch/um/include/umid.h ---- orig/arch/um/include/umid.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/umid.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UMID_H__ -+#define __UMID_H__ -+ -+extern int umid_file_name(char *name, char *buf, int len); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/uml_uaccess.h um/arch/um/include/uml_uaccess.h ---- orig/arch/um/include/uml_uaccess.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/uml_uaccess.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UML_UACCESS_H__ -+#define __UML_UACCESS_H__ -+ -+extern int __do_copy_to_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher); -+extern unsigned long __do_user_copy(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher, -+ void (*op)(void *to, const void *from, -+ int n), int *faulted_out); -+void __do_copy(void *to, const void *from, int n); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/um_mmu.h um/arch/um/include/um_mmu.h ---- orig/arch/um/include/um_mmu.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/um_mmu.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __ARCH_UM_MMU_H -+#define __ARCH_UM_MMU_H -+ -+#include "linux/config.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/mmu.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mmu.h" -+#endif -+ -+typedef union { -+#ifdef CONFIG_MODE_TT -+ struct mmu_context_tt tt; -+#endif -+#ifdef CONFIG_MODE_SKAS -+ struct mmu_context_skas skas; -+#endif -+} mm_context_t; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/umn.h um/arch/um/include/umn.h ---- orig/arch/um/include/umn.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/umn.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UMN_H -+#define __UMN_H -+ -+extern int open_umn_tty(int *slave_out, int *slipno_out); -+extern void close_umn_tty(int master, int slave); -+extern int umn_send_packet(int fd, void *data, int len); -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern void slip_unesc(unsigned char s); -+extern void umn_read(int fd); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/um_uaccess.h um/arch/um/include/um_uaccess.h ---- orig/arch/um/include/um_uaccess.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/um_uaccess.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __ARCH_UM_UACCESS_H -+#define __ARCH_UM_UACCESS_H -+ -+#include "linux/config.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/uaccess.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/uaccess.h" -+#endif -+ -+#define access_ok(type, addr, size) \ -+ CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size) -+ -+static inline int verify_area(int type, const void * addr, unsigned long size) -+{ -+ return(CHOOSE_MODE_PROC(verify_area_tt, verify_area_skas, type, addr, -+ size)); -+} -+ -+static inline int copy_from_user(void *to, const void *from, int n) -+{ -+ return(CHOOSE_MODE_PROC(copy_from_user_tt, copy_from_user_skas, to, -+ from, n)); -+} -+ -+static inline int copy_to_user(void *to, const void *from, int n) -+{ -+ return(CHOOSE_MODE_PROC(copy_to_user_tt, copy_to_user_skas, to, -+ from, n)); -+} -+ -+/* -+ * strncpy_from_user: - Copy a NUL terminated string from userspace. -+ * @dst: Destination address, in kernel space. This buffer must be at -+ * least @count bytes long. -+ * @src: Source address, in user space. -+ * @count: Maximum number of bytes to copy, including the trailing NUL. -+ * -+ * Copies a NUL-terminated string from userspace to kernel space. -+ * -+ * On success, returns the length of the string (not including the trailing -+ * NUL). -+ * -+ * If access to userspace fails, returns -EFAULT (some data may have been -+ * copied). -+ * -+ * If @count is smaller than the length of the string, copies @count bytes -+ * and returns @count. -+ */ -+ -+static inline int strncpy_from_user(char *dst, const char *src, int count) -+{ -+ return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas, -+ dst, src, count)); -+} -+ -+/* -+ * __clear_user: - Zero a block of memory in user space, with less checking. -+ * @to: Destination address, in user space. -+ * @n: Number of bytes to zero. -+ * -+ * Zero a block of memory in user space. Caller must check -+ * the specified block with access_ok() before calling this function. -+ * -+ * Returns number of bytes that could not be cleared. -+ * On success, this will be zero. -+ */ -+static inline int __clear_user(void *mem, int len) -+{ -+ return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len)); -+} -+ -+/* -+ * clear_user: - Zero a block of memory in user space. -+ * @to: Destination address, in user space. -+ * @n: Number of bytes to zero. -+ * -+ * Zero a block of memory in user space. -+ * -+ * Returns number of bytes that could not be cleared. -+ * On success, this will be zero. -+ */ -+static inline int clear_user(void *mem, int len) -+{ -+ return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len)); -+} -+ -+/* -+ * strlen_user: - Get the size of a string in user space. -+ * @str: The string to measure. -+ * @n: The maximum valid length -+ * -+ * Get the size of a NUL-terminated string in user space. -+ * -+ * Returns the size of the string INCLUDING the terminating NUL. -+ * On exception, returns 0. -+ * If the string is too long, returns a value greater than @n. -+ */ -+static inline int strnlen_user(const void *str, int len) -+{ -+ return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/user.h um/arch/um/include/user.h ---- orig/arch/um/include/user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/user.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __USER_H__ -+#define __USER_H__ -+ -+extern void panic(const char *fmt, ...); -+extern int printk(const char *fmt, ...); -+extern void schedule(void); -+extern void *um_kmalloc(int size); -+extern void *um_kmalloc_atomic(int size); -+extern void kfree(void *ptr); -+extern int in_aton(char *str); -+extern int open_gdb_chan(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/user_util.h um/arch/um/include/user_util.h ---- orig/arch/um/include/user_util.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/include/user_util.h 2003-11-09 11:36:31.000000000 -0500 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __USER_UTIL_H__ -+#define __USER_UTIL_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int mode_tt; -+ -+extern int grantpt(int __fd); -+extern int unlockpt(int __fd); -+extern char *ptsname(int __fd); -+ -+struct cpu_task { -+ int pid; -+ void *task; -+}; -+ -+extern struct cpu_task cpu_tasks[]; -+ -+struct signal_info { -+ void (*handler)(int, union uml_pt_regs *); -+ int is_irq; -+}; -+ -+extern struct signal_info sig_info[]; -+ -+extern unsigned long low_physmem; -+extern unsigned long high_physmem; -+extern unsigned long uml_physmem; -+extern unsigned long uml_reserved; -+extern unsigned long end_vm; -+extern unsigned long start_vm; -+extern unsigned long highmem; -+ -+extern char host_info[]; -+ -+extern char saved_command_line[]; -+extern char command_line[]; -+ -+extern char *tempdir; -+ -+extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end; -+extern unsigned long _unprotected_end; -+extern unsigned long brk_start; -+ -+extern int pty_output_sigio; -+extern int pty_close_sigio; -+ -+extern void stop(void); -+extern void stack_protections(unsigned long address); -+extern void task_protections(unsigned long address); -+extern int wait_for_stop(int pid, int sig, int cont_type, void *relay); -+extern void *add_signal_handler(int sig, void (*handler)(int)); -+extern int start_fork_tramp(void *arg, unsigned long temp_stack, -+ int clone_flags, int (*tramp)(void *)); -+extern int linux_main(int argc, char **argv); -+extern void set_cmdline(char *cmd); -+extern void input_cb(void (*proc)(void *), void *arg, int arg_len); -+extern int get_pty(void); -+extern void *um_kmalloc(int size); -+extern int raw(int fd, int complain); -+extern int switcheroo(int fd, int prot, void *from, void *to, int size); -+extern void setup_machinename(char *machine_out); -+extern void setup_hostinfo(void); -+extern void add_arg(char *cmd_line, char *arg); -+extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); -+extern void init_new_thread_signals(int altstack); -+extern void do_exec(int old_pid, int new_pid); -+extern void tracer_panic(char *msg, ...); -+extern char *get_umid(int only_if_set); -+extern void do_longjmp(void *p, int val); -+extern void suspend_new_thread(int fd); -+extern int detach(int pid, int sig); -+extern int attach(int pid); -+extern void kill_child_dead(int pid); -+extern int cont(int pid); -+extern void check_ptrace(void); -+extern void check_sigio(void); -+extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); -+extern void write_sigio_workaround(void); -+extern void arch_check_bugs(void); -+extern int cpu_feature(char *what, char *buf, int len); -+extern int arch_handle_signal(int sig, union uml_pt_regs *regs); -+extern int arch_fixup(unsigned long address, void *sc_ptr); -+extern int can_do_skas(void); -+extern void arch_init_thread(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/checksum.c um/arch/um/kernel/checksum.c ---- orig/arch/um/kernel/checksum.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/checksum.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,42 @@ -+#include "asm/uaccess.h" -+#include "linux/errno.h" -+ -+extern unsigned int arch_csum_partial(const char *buff, int len, int sum); -+ -+extern unsigned int csum_partial(char *buff, int len, int sum) -+{ -+ return(arch_csum_partial(buff, len, sum)); -+} -+ -+unsigned int csum_partial_copy_to(const char *src, char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if(copy_to_user(dst, src, len)){ -+ *err_ptr = -EFAULT; -+ return(-1); -+ } -+ -+ return(arch_csum_partial(src, len, sum)); -+} -+ -+unsigned int csum_partial_copy_from(const char *src, char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if(copy_from_user(dst, src, len)){ -+ *err_ptr = -EFAULT; -+ return(-1); -+ } -+ -+ return(arch_csum_partial(dst, len, sum)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/config.c.in um/arch/um/kernel/config.c.in ---- orig/arch/um/kernel/config.c.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/config.c.in 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include "init.h" -+ -+static __initdata char *config = "CONFIG"; -+ -+static int __init print_config(char *line, int *add) -+{ -+ printf("%s", config); -+ exit(0); -+} -+ -+__uml_setup("--showconfig", print_config, -+"--showconfig\n" -+" Prints the config file that this UML binary was generated from.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/exec_kern.c um/arch/um/kernel/exec_kern.c ---- orig/arch/um/kernel/exec_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/exec_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,86 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/slab.h" -+#include "linux/smp_lock.h" -+#include "asm/ptrace.h" -+#include "asm/pgtable.h" -+#include "asm/pgalloc.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "mem_user.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "tlb.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "time_user.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+ -+void flush_thread(void) -+{ -+ CHOOSE_MODE(flush_thread_tt(), flush_thread_skas()); -+} -+ -+void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) -+{ -+ CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); -+} -+ -+extern void log_exec(char **argv, void *tty); -+ -+static int execve1(char *file, char **argv, char **env) -+{ -+ int error; -+ -+#ifdef CONFIG_TTY_LOG -+ log_exec(argv, current->tty); -+#endif -+ error = do_execve(file, argv, env, ¤t->thread.regs); -+ if (error == 0){ -+ current->ptrace &= ~PT_DTRACE; -+ set_cmdline(current_cmd()); -+ } -+ return(error); -+} -+ -+int um_execve(char *file, char **argv, char **env) -+{ -+ int err; -+ -+ err = execve1(file, argv, env); -+ if(!err) -+ do_longjmp(current->thread.exec_buf, 1); -+ return(err); -+} -+ -+int sys_execve(char *file, char **argv, char **env) -+{ -+ int error; -+ char *filename; -+ -+ lock_kernel(); -+ filename = getname((char *) file); -+ error = PTR_ERR(filename); -+ if (IS_ERR(filename)) goto out; -+ error = execve1(filename, argv, env); -+ putname(filename); -+ out: -+ unlock_kernel(); -+ return(error); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/exitcode.c um/arch/um/kernel/exitcode.c ---- orig/arch/um/kernel/exitcode.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/exitcode.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/ctype.h" -+#include "linux/proc_fs.h" -+#include "asm/uaccess.h" -+ -+/* If read and write race, the read will still atomically read a valid -+ * value. -+ */ -+int uml_exitcode = 0; -+ -+static int read_proc_exitcode(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ len = sprintf(page, "%d\n", uml_exitcode); -+ len -= off; -+ if(len <= off+count) *eof = 1; -+ *start = page + off; -+ if(len > count) len = count; -+ if(len < 0) len = 0; -+ return(len); -+} -+ -+static int write_proc_exitcode(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char *end, buf[sizeof("nnnnn\0")]; -+ int tmp; -+ -+ if(copy_from_user(buf, buffer, count)) -+ return(-EFAULT); -+ tmp = simple_strtol(buf, &end, 0); -+ if((*end != '\0') && !isspace(*end)) -+ return(-EINVAL); -+ uml_exitcode = tmp; -+ return(count); -+} -+ -+static int make_proc_exitcode(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ ent = create_proc_entry("exitcode", 0600, &proc_root); -+ if(ent == NULL){ -+ printk("make_proc_exitcode : Failed to register " -+ "/proc/exitcode\n"); -+ return(0); -+ } -+ -+ ent->read_proc = read_proc_exitcode; -+ ent->write_proc = write_proc_exitcode; -+ -+ return(0); -+} -+ -+__initcall(make_proc_exitcode); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/frame.c um/arch/um/kernel/frame.c ---- orig/arch/um/kernel/frame.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/frame.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,341 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <string.h> -+#include <signal.h> -+#include <wait.h> -+#include <sched.h> -+#include <errno.h> -+#include <sys/ptrace.h> -+#include <sys/syscall.h> -+#include <sys/mman.h> -+#include <asm/page.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+#include "frame_user.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+#include "os.h" -+ -+static int capture_stack(int (*child)(void *arg), void *arg, void *sp, -+ unsigned long top, void **data_out) -+{ -+ unsigned long regs[FRAME_SIZE]; -+ int pid, status, n, len; -+ -+ /* Start the child as a thread */ -+ pid = clone(child, sp, CLONE_VM | SIGCHLD, arg); -+ if(pid < 0){ -+ printf("capture_stack : clone failed - errno = %d\n", errno); -+ exit(1); -+ } -+ -+ /* Wait for it to stop itself and continue it with a SIGUSR1 to force -+ * it into the signal handler. -+ */ -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ fprintf(stderr, "capture_stack : Expected SIGSTOP, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ if(ptrace(PTRACE_CONT, pid, 0, SIGUSR1) < 0){ -+ printf("capture_stack : PTRACE_CONT failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ /* Wait for it to stop itself again and grab its registers again. -+ * At this point, the handler has stuffed the addresses of -+ * sig, sc, and SA_RESTORER in raw. -+ */ -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ fprintf(stderr, "capture_stack : Expected SIGSTOP, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ if(ptrace(PTRACE_GETREGS, pid, 0, regs) < 0){ -+ printf("capture_stack : PTRACE_GETREGS failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ /* It has outlived its usefulness, so continue it so it can exit */ -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0){ -+ printf("capture_stack : PTRACE_CONT failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ if(waitpid(pid, &status, 0) < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != 9)){ -+ printf("capture_stack : Expected exit signal 9, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ -+ /* The frame that we want is the top of the signal stack */ -+ -+ len = top - PT_SP(regs); -+ *data_out = malloc(len); -+ if(*data_out == NULL){ -+ printf("capture_stack : malloc failed - errno = %d\n", errno); -+ exit(1); -+ } -+ memcpy(*data_out, (void *) PT_SP(regs), len); -+ -+ return(len); -+} -+ -+struct common_raw { -+ void *stack; -+ int size; -+ unsigned long sig; -+ unsigned long sr; -+ unsigned long sp; -+ struct arch_frame_data_raw arch; -+}; -+ -+#define SA_RESTORER (0x04000000) -+ -+typedef unsigned long old_sigset_t; -+ -+struct old_sigaction { -+ __sighandler_t handler; -+ old_sigset_t sa_mask; -+ unsigned long sa_flags; -+ void (*sa_restorer)(void); -+}; -+ -+static void child_common(struct common_raw *common, sighandler_t handler, -+ int restorer, int flags) -+{ -+ stack_t ss = ((stack_t) { .ss_sp = common->stack, -+ .ss_flags = 0, -+ .ss_size = common->size }); -+ int err; -+ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ printf("PTRACE_TRACEME failed, errno = %d\n", errno); -+ } -+ if(sigaltstack(&ss, NULL) < 0){ -+ printf("sigaltstack failed - errno = %d\n", errno); -+ kill(getpid(), SIGKILL); -+ } -+ -+ if(restorer){ -+ struct sigaction sa; -+ -+ sa.sa_handler = handler; -+ sigemptyset(&sa.sa_mask); -+ sa.sa_flags = SA_ONSTACK | flags; -+ err = sigaction(SIGUSR1, &sa, NULL); -+ } -+ else { -+ struct old_sigaction sa; -+ -+ sa.handler = handler; -+ sa.sa_mask = 0; -+ sa.sa_flags = (SA_ONSTACK | flags) & ~SA_RESTORER; -+ err = syscall(__NR_sigaction, SIGUSR1, &sa, NULL); -+ } -+ -+ if(err < 0){ -+ printf("sigaction failed - errno = %d\n", errno); -+ kill(getpid(), SIGKILL); -+ } -+ -+ os_stop_process(os_getpid()); -+} -+ -+/* Changed only during early boot */ -+struct sc_frame signal_frame_sc; -+ -+struct sc_frame signal_frame_sc_sr; -+ -+struct sc_frame_raw { -+ struct common_raw common; -+ unsigned long sc; -+ int restorer; -+}; -+ -+/* Changed only during early boot */ -+static struct sc_frame_raw *raw_sc = NULL; -+ -+static void sc_handler(int sig, struct sigcontext sc) -+{ -+ raw_sc->common.sig = (unsigned long) &sig; -+ raw_sc->common.sr = frame_restorer(); -+ raw_sc->common.sp = frame_sp(); -+ raw_sc->sc = (unsigned long) ≻ -+ setup_arch_frame_raw(&raw_sc->common.arch, &sc + 1, raw_sc->common.sr); -+ -+ os_stop_process(os_getpid()); -+ kill(getpid(), SIGKILL); -+} -+ -+static int sc_child(void *arg) -+{ -+ raw_sc = arg; -+ child_common(&raw_sc->common, (sighandler_t) sc_handler, -+ raw_sc->restorer, 0); -+ return(-1); -+} -+ -+/* Changed only during early boot */ -+struct si_frame signal_frame_si; -+ -+struct si_frame_raw { -+ struct common_raw common; -+ unsigned long sip; -+ unsigned long si; -+ unsigned long ucp; -+ unsigned long uc; -+}; -+ -+/* Changed only during early boot */ -+static struct si_frame_raw *raw_si = NULL; -+ -+static void si_handler(int sig, siginfo_t *si, struct ucontext *ucontext) -+{ -+ raw_si->common.sig = (unsigned long) &sig; -+ raw_si->common.sr = frame_restorer(); -+ raw_si->common.sp = frame_sp(); -+ raw_si->sip = (unsigned long) &si; -+ raw_si->si = (unsigned long) si; -+ raw_si->ucp = (unsigned long) &ucontext; -+ raw_si->uc = (unsigned long) ucontext; -+ setup_arch_frame_raw(&raw_si->common.arch, -+ ucontext->uc_mcontext.fpregs, raw_si->common.sr); -+ -+ os_stop_process(os_getpid()); -+ kill(getpid(), SIGKILL); -+} -+ -+static int si_child(void *arg) -+{ -+ raw_si = arg; -+ child_common(&raw_si->common, (sighandler_t) si_handler, 1, -+ SA_SIGINFO); -+ return(-1); -+} -+ -+static int relative_sr(unsigned long sr, int sr_index, void *stack, -+ void *framep) -+{ -+ unsigned long *srp = (unsigned long *) sr; -+ unsigned long frame = (unsigned long) framep; -+ -+ if((*srp & PAGE_MASK) == (unsigned long) stack){ -+ *srp -= sr; -+ *((unsigned long *) (frame + sr_index)) = *srp; -+ return(1); -+ } -+ else return(0); -+} -+ -+static unsigned long capture_stack_common(int (*proc)(void *), void *arg, -+ struct common_raw *common_in, -+ void *top, void *sigstack, -+ int stack_len, -+ struct frame_common *common_out) -+{ -+ unsigned long sig_top = (unsigned long) sigstack + stack_len, base; -+ -+ common_in->stack = (void *) sigstack; -+ common_in->size = stack_len; -+ common_out->len = capture_stack(proc, arg, top, sig_top, -+ &common_out->data); -+ base = sig_top - common_out->len; -+ common_out->sig_index = common_in->sig - base; -+ common_out->sp_index = common_in->sp - base; -+ common_out->sr_index = common_in->sr - base; -+ common_out->sr_relative = relative_sr(common_in->sr, -+ common_out->sr_index, sigstack, -+ common_out->data); -+ return(base); -+} -+ -+void capture_signal_stack(void) -+{ -+ struct sc_frame_raw raw_sc; -+ struct si_frame_raw raw_si; -+ void *stack, *sigstack; -+ unsigned long top, base; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ sigstack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if((stack == MAP_FAILED) || (sigstack == MAP_FAILED)){ -+ printf("capture_signal_stack : mmap failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ -+ /* Get the sigcontext, no sigrestorer layout */ -+ raw_sc.restorer = 0; -+ base = capture_stack_common(sc_child, &raw_sc, &raw_sc.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_sc.common); -+ -+ signal_frame_sc.sc_index = raw_sc.sc - base; -+ setup_arch_frame(&raw_sc.common.arch, &signal_frame_sc.common.arch); -+ -+ /* Ditto for the sigcontext, sigrestorer layout */ -+ raw_sc.restorer = 1; -+ base = capture_stack_common(sc_child, &raw_sc, &raw_sc.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_sc_sr.common); -+ signal_frame_sc_sr.sc_index = raw_sc.sc - base; -+ setup_arch_frame(&raw_sc.common.arch, &signal_frame_sc_sr.common.arch); -+ -+ /* And the siginfo layout */ -+ -+ base = capture_stack_common(si_child, &raw_si, &raw_si.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_si.common); -+ signal_frame_si.sip_index = raw_si.sip - base; -+ signal_frame_si.si_index = raw_si.si - base; -+ signal_frame_si.ucp_index = raw_si.ucp - base; -+ signal_frame_si.uc_index = raw_si.uc - base; -+ setup_arch_frame(&raw_si.common.arch, &signal_frame_si.common.arch); -+ -+ if((munmap(stack, PAGE_SIZE) < 0) || -+ (munmap(sigstack, PAGE_SIZE) < 0)){ -+ printf("capture_signal_stack : munmap failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/frame_kern.c um/arch/um/kernel/frame_kern.c ---- orig/arch/um/kernel/frame_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/frame_kern.c 2003-11-19 03:32:43.000000000 -0500 -@@ -0,0 +1,174 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/signal.h" -+#include "asm/uaccess.h" -+#include "asm/ucontext.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "sysdep/ptrace.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) -+{ -+ if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) -+ return -EFAULT; -+ if (from->si_code < 0) -+ return __copy_to_user(to, from, sizeof(siginfo_t)); -+ else { -+ int err; -+ -+ /* If you change siginfo_t structure, please be sure -+ this code is fixed accordingly. -+ It should never copy any pad contained in the structure -+ to avoid security leaks, but must copy the generic -+ 3 ints plus the relevant union member. */ -+ err = __put_user(from->si_signo, &to->si_signo); -+ err |= __put_user(from->si_errno, &to->si_errno); -+ err |= __put_user((short)from->si_code, &to->si_code); -+ /* First 32bits of unions are always present. */ -+ err |= __put_user(from->si_pid, &to->si_pid); -+ switch (from->si_code >> 16) { -+ case __SI_FAULT >> 16: -+ break; -+ case __SI_CHLD >> 16: -+ err |= __put_user(from->si_utime, &to->si_utime); -+ err |= __put_user(from->si_stime, &to->si_stime); -+ err |= __put_user(from->si_status, &to->si_status); -+ default: -+ err |= __put_user(from->si_uid, &to->si_uid); -+ break; -+ } -+ return err; -+ } -+} -+ -+static int copy_restorer(void (*restorer)(void), unsigned long start, -+ unsigned long sr_index, int sr_relative) -+{ -+ unsigned long sr; -+ -+ if(sr_relative){ -+ sr = (unsigned long) restorer; -+ sr += start + sr_index; -+ restorer = (void (*)(void)) sr; -+ } -+ -+ return(copy_to_user((void *) (start + sr_index), &restorer, -+ sizeof(restorer))); -+} -+ -+extern int userspace_pid[]; -+ -+static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from, -+ struct arch_frame_data *arch) -+{ -+ return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), -+ arch), -+ copy_sc_to_user_skas(userspace_pid[0], to, fp, -+ &from->regs, -+ current->thread.cr2, -+ current->thread.err))); -+} -+ -+static int copy_ucontext_to_user(struct ucontext *uc, void *fp, sigset_t *set, -+ unsigned long sp) -+{ -+ int err = 0; -+ -+ err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); -+ err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); -+ err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); -+ err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs, -+ &signal_frame_si.common.arch); -+ err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); -+ return(err); -+} -+ -+int setup_signal_stack_si(unsigned long stack_top, int sig, -+ unsigned long handler, void (*restorer)(void), -+ struct pt_regs *regs, siginfo_t *info, -+ sigset_t *mask) -+{ -+ unsigned long start; -+ void *sip, *ucp, *fp; -+ -+ start = stack_top - signal_frame_si.common.len; -+ sip = (void *) (start + signal_frame_si.si_index); -+ ucp = (void *) (start + signal_frame_si.uc_index); -+ fp = (void *) (((unsigned long) ucp) + sizeof(struct ucontext)); -+ -+ if(restorer == NULL) -+ panic("setup_signal_stack_si - no restorer"); -+ -+ if(copy_to_user((void *) start, signal_frame_si.common.data, -+ signal_frame_si.common.len) || -+ copy_to_user((void *) (start + signal_frame_si.common.sig_index), -+ &sig, sizeof(sig)) || -+ copy_siginfo_to_user(sip, info) || -+ copy_to_user((void *) (start + signal_frame_si.sip_index), &sip, -+ sizeof(sip)) || -+ copy_ucontext_to_user(ucp, fp, mask, PT_REGS_SP(regs)) || -+ copy_to_user((void *) (start + signal_frame_si.ucp_index), &ucp, -+ sizeof(ucp)) || -+ copy_restorer(restorer, start, signal_frame_si.common.sr_index, -+ signal_frame_si.common.sr_relative)) -+ return(1); -+ -+ PT_REGS_IP(regs) = handler; -+ PT_REGS_SP(regs) = start + signal_frame_si.common.sp_index; -+ return(0); -+} -+ -+int setup_signal_stack_sc(unsigned long stack_top, int sig, -+ unsigned long handler, void (*restorer)(void), -+ struct pt_regs *regs, sigset_t *mask) -+{ -+ struct frame_common *frame = &signal_frame_sc_sr.common; -+ void *user_sc; -+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); -+ unsigned long sigs, sr; -+ unsigned long start = stack_top - frame->len - sig_size; -+ -+ user_sc = (void *) (start + signal_frame_sc_sr.sc_index); -+ if(restorer == NULL){ -+ frame = &signal_frame_sc.common; -+ user_sc = (void *) (start + signal_frame_sc.sc_index); -+ sr = (unsigned long) frame->data; -+ sr += frame->sr_index; -+ sr = *((unsigned long *) sr); -+ restorer = ((void (*)(void)) sr); -+ } -+ -+ sigs = start + frame->len; -+ if(copy_to_user((void *) start, frame->data, frame->len) || -+ copy_to_user((void *) (start + frame->sig_index), &sig, -+ sizeof(sig)) || -+ copy_sc_to_user(user_sc, NULL, regs, -+ &signal_frame_sc.common.arch) || -+ copy_to_user(sc_sigmask(user_sc), mask, sizeof(mask->sig[0])) || -+ copy_to_user((void *) sigs, &mask->sig[1], sig_size) || -+ copy_restorer(restorer, start, frame->sr_index, frame->sr_relative)) -+ return(1); -+ -+ PT_REGS_IP(regs) = handler; -+ PT_REGS_SP(regs) = start + frame->sp_index; -+ -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/gmon_syms.c um/arch/um/kernel/gmon_syms.c ---- orig/arch/um/kernel/gmon_syms.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/gmon_syms.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+ -+extern void __bb_init_func(void *); -+EXPORT_SYMBOL(__bb_init_func); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/gprof_syms.c um/arch/um/kernel/gprof_syms.c ---- orig/arch/um/kernel/gprof_syms.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/gprof_syms.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+ -+extern void mcount(void); -+EXPORT_SYMBOL(mcount); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/helper.c um/arch/um/kernel/helper.c ---- orig/arch/um/kernel/helper.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/helper.c 2003-11-08 07:41:30.000000000 -0500 -@@ -0,0 +1,170 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <sched.h> -+#include <sys/signal.h> -+#include <sys/wait.h> -+#include "user.h" -+#include "kern_util.h" -+#include "os.h" -+ -+struct helper_data { -+ void (*pre_exec)(void*); -+ void *pre_data; -+ char **argv; -+ int fd; -+}; -+ -+/* Debugging aid, changed only from gdb */ -+int helper_pause = 0; -+ -+static void helper_hup(int sig) -+{ -+} -+ -+static int helper_child(void *arg) -+{ -+ struct helper_data *data = arg; -+ char **argv = data->argv; -+ int errval; -+ -+ if(helper_pause){ -+ signal(SIGHUP, helper_hup); -+ pause(); -+ } -+ if(data->pre_exec != NULL) -+ (*data->pre_exec)(data->pre_data); -+ execvp(argv[0], argv); -+ errval = errno; -+ printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); -+ os_write_file(data->fd, &errval, sizeof(errval)); -+ os_kill_process(os_getpid(), 0); -+ return(0); -+} -+ -+/* XXX The alloc_stack here breaks if this is called in the tracing thread */ -+ -+int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, -+ unsigned long *stack_out) -+{ -+ struct helper_data data; -+ unsigned long stack, sp; -+ int pid, fds[2], err, n; -+ -+ if((stack_out != NULL) && (*stack_out != 0)) -+ stack = *stack_out; -+ else stack = alloc_stack(0, um_in_interrupt()); -+ if(stack == 0) -+ return(-ENOMEM); -+ -+ err = os_pipe(fds, 1, 0); -+ if(err < 0){ -+ printk("run_helper : pipe failed, err = %d\n", -err); -+ goto out_free; -+ } -+ -+ err = os_set_exec_close(fds[1], 1); -+ if(err < 0){ -+ printk("run_helper : setting FD_CLOEXEC failed, err = %d\n", -+ -err); -+ goto out_close; -+ } -+ -+ sp = stack + page_size() - sizeof(void *); -+ data.pre_exec = pre_exec; -+ data.pre_data = pre_data; -+ data.argv = argv; -+ data.fd = fds[1]; -+ pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); -+ if(pid < 0){ -+ printk("run_helper : clone failed, errno = %d\n", errno); -+ err = -errno; -+ goto out_close; -+ } -+ -+ os_close_file(fds[1]); -+ n = os_read_file(fds[0], &err, sizeof(err)); -+ if(n < 0){ -+ printk("run_helper : read on pipe failed, err = %d\n", -n); -+ err = n; -+ goto out_kill; -+ } -+ else if(n != 0){ -+ waitpid(pid, NULL, 0); -+ pid = -errno; -+ } -+ -+ if(stack_out == NULL) free_stack(stack, 0); -+ else *stack_out = stack; -+ return(pid); -+ -+ out_kill: -+ os_kill_process(pid, 1); -+ out_close: -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ out_free: -+ free_stack(stack, 0); -+ return(err); -+} -+ -+int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, -+ unsigned long *stack_out, int stack_order) -+{ -+ unsigned long stack, sp; -+ int pid, status; -+ -+ stack = alloc_stack(stack_order, um_in_interrupt()); -+ if(stack == 0) return(-ENOMEM); -+ -+ sp = stack + (page_size() << stack_order) - sizeof(void *); -+ pid = clone(proc, (void *) sp, flags | SIGCHLD, arg); -+ if(pid < 0){ -+ printk("run_helper_thread : clone failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ if(stack_out == NULL){ -+ pid = waitpid(pid, &status, 0); -+ if(pid < 0){ -+ printk("run_helper_thread - wait failed, errno = %d\n", -+ errno); -+ pid = -errno; -+ } -+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) -+ printk("run_helper_thread - thread returned status " -+ "0x%x\n", status); -+ free_stack(stack, stack_order); -+ } -+ else *stack_out = stack; -+ return(pid); -+} -+ -+int helper_wait(int pid, int block) -+{ -+ int ret; -+ -+ ret = waitpid(pid, NULL, WNOHANG); -+ if(ret < 0){ -+ printk("helper_wait : waitpid failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/initrd_kern.c um/arch/um/kernel/initrd_kern.c ---- orig/arch/um/kernel/initrd_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/initrd_kern.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/bootmem.h" -+#include "linux/blk.h" -+#include "asm/types.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "initrd.h" -+#include "init.h" -+#include "os.h" -+ -+/* Changed by uml_initrd_setup, which is a setup */ -+static char *initrd __initdata = NULL; -+ -+static int __init read_initrd(void) -+{ -+ void *area; -+ long long size; -+ int err; -+ -+ if(initrd == NULL) return 0; -+ err = os_file_size(initrd, &size); -+ if(err) return 0; -+ area = alloc_bootmem(size); -+ if(area == NULL) return 0; -+ if(load_initrd(initrd, area, size) == -1) return 0; -+ initrd_start = (unsigned long) area; -+ initrd_end = initrd_start + size; -+ return 0; -+} -+ -+__uml_postsetup(read_initrd); -+ -+static int __init uml_initrd_setup(char *line, int *add) -+{ -+ initrd = line; -+ return 0; -+} -+ -+__uml_setup("initrd=", uml_initrd_setup, -+"initrd=<initrd image>\n" -+" This is used to boot UML from an initrd image. The argument is the\n" -+" name of the file containing the image.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/initrd_user.c um/arch/um/kernel/initrd_user.c ---- orig/arch/um/kernel/initrd_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/initrd_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <errno.h> -+ -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "initrd.h" -+#include "os.h" -+ -+int load_initrd(char *filename, void *buf, int size) -+{ -+ int fd, n; -+ -+ fd = os_open_file(filename, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Opening '%s' failed - err = %d\n", filename, -fd); -+ return(-1); -+ } -+ n = os_read_file(fd, buf, size); -+ if(n != size){ -+ printk("Read of %d bytes from '%s' failed, err = %d\n", size, -+ filename, -n); -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/init_task.c um/arch/um/kernel/init_task.c ---- orig/arch/um/kernel/init_task.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/init_task.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "linux/sched.h" -+#include "linux/version.h" -+#include "asm/uaccess.h" -+#include "asm/pgtable.h" -+#include "user_util.h" -+#include "mem_user.h" -+ -+static struct fs_struct init_fs = INIT_FS; -+static struct files_struct init_files = INIT_FILES; -+static struct signal_struct init_signals = INIT_SIGNALS; -+struct mm_struct init_mm = INIT_MM(init_mm); -+ -+/* -+ * Initial task structure. -+ * -+ * We need to make sure that this is 16384-byte aligned due to the -+ * way process stacks are handled. This is done by having a special -+ * "init_task" linker map entry.. -+ */ -+ -+union task_union init_task_union -+__attribute__((__section__(".data.init_task"))) = -+{ INIT_TASK(init_task_union.task) }; -+ -+struct task_struct *alloc_task_struct(void) -+{ -+ return((struct task_struct *) -+ __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER)); -+} -+ -+void unprotect_stack(unsigned long stack) -+{ -+ protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, -+ 1, 1, 0, 1); -+} -+ -+void free_task_struct(struct task_struct *task) -+{ -+ /* free_pages decrements the page counter and only actually frees -+ * the pages if they are now not accessed by anything. -+ */ -+ free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/irq.c um/arch/um/kernel/irq.c ---- orig/arch/um/kernel/irq.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/irq.c 2003-12-16 22:45:27.000000000 -0500 -@@ -0,0 +1,840 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: -+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "linux/irq.h" -+#include "linux/kernel_stat.h" -+#include "linux/interrupt.h" -+#include "linux/random.h" -+#include "linux/slab.h" -+#include "linux/file.h" -+#include "linux/proc_fs.h" -+#include "linux/init.h" -+#include "linux/seq_file.h" -+#include "asm/irq.h" -+#include "asm/hw_irq.h" -+#include "asm/hardirq.h" -+#include "asm/atomic.h" -+#include "asm/signal.h" -+#include "asm/system.h" -+#include "asm/errno.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+ -+static void register_irq_proc (unsigned int irq); -+ -+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = -+ { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; -+ -+/* -+ * Generic no controller code -+ */ -+ -+static void enable_none(unsigned int irq) { } -+static unsigned int startup_none(unsigned int irq) { return 0; } -+static void disable_none(unsigned int irq) { } -+static void ack_none(unsigned int irq) -+{ -+/* -+ * 'what should we do if we get a hw irq event on an illegal vector'. -+ * each architecture has to answer this themselves, it doesnt deserve -+ * a generic callback i think. -+ */ -+#if CONFIG_X86 -+ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); -+#ifdef CONFIG_X86_LOCAL_APIC -+ /* -+ * Currently unexpected vectors happen only on SMP and APIC. -+ * We _must_ ack these because every local APIC has only N -+ * irq slots per priority level, and a 'hanging, unacked' IRQ -+ * holds up an irq slot - in excessive cases (when multiple -+ * unexpected vectors occur) that might lock up the APIC -+ * completely. -+ */ -+ ack_APIC_irq(); -+#endif -+#endif -+} -+ -+/* startup is the same as "enable", shutdown is same as "disable" */ -+#define shutdown_none disable_none -+#define end_none enable_none -+ -+struct hw_interrupt_type no_irq_type = { -+ "none", -+ startup_none, -+ shutdown_none, -+ enable_none, -+ disable_none, -+ ack_none, -+ end_none -+}; -+ -+/* -+ * Generic, controller-independent functions: -+ */ -+ -+int get_irq_list(char *buf) -+{ -+ int i, j; -+ unsigned long flags; -+ struct irqaction * action; -+ char *p = buf; -+ -+ p += sprintf(p, " "); -+ for (j=0; j<smp_num_cpus; j++) -+ p += sprintf(p, "CPU%d ",j); -+ *p++ = '\n'; -+ -+ for (i = 0 ; i < NR_IRQS ; i++) { -+ spin_lock_irqsave(&irq_desc[i].lock, flags); -+ action = irq_desc[i].action; -+ if (!action) -+ goto end; -+ p += sprintf(p, "%3d: ",i); -+#ifndef CONFIG_SMP -+ p += sprintf(p, "%10u ", kstat_irqs(i)); -+#else -+ for (j = 0; j < smp_num_cpus; j++) -+ p += sprintf(p, "%10u ", -+ kstat.irqs[cpu_logical_map(j)][i]); -+#endif -+ p += sprintf(p, " %14s", irq_desc[i].handler->typename); -+ p += sprintf(p, " %s", action->name); -+ -+ for (action=action->next; action; action = action->next) -+ p += sprintf(p, ", %s", action->name); -+ *p++ = '\n'; -+ end: -+ spin_unlock_irqrestore(&irq_desc[i].lock, flags); -+ } -+ p += sprintf(p, "\n"); -+#ifdef notdef -+#if CONFIG_SMP -+ p += sprintf(p, "LOC: "); -+ for (j = 0; j < smp_num_cpus; j++) -+ p += sprintf(p, "%10u ", -+ apic_timer_irqs[cpu_logical_map(j)]); -+ p += sprintf(p, "\n"); -+#endif -+#endif -+ p += sprintf(p, "ERR: %10lu\n", 0L); -+ return p - buf; -+} -+ -+/* -+ * This should really return information about whether -+ * we should do bottom half handling etc. Right now we -+ * end up _always_ checking the bottom half, which is a -+ * waste of time and is not what some drivers would -+ * prefer. -+ */ -+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, -+ struct irqaction * action) -+{ -+ int status; -+ int cpu = smp_processor_id(); -+ -+ irq_enter(cpu, irq); -+ -+ status = 1; /* Force the "do bottom halves" bit */ -+ -+ if (!(action->flags & SA_INTERRUPT)) -+ __sti(); -+ -+ do { -+ status |= action->flags; -+ action->handler(irq, action->dev_id, regs); -+ action = action->next; -+ } while (action); -+ if (status & SA_SAMPLE_RANDOM) -+ add_interrupt_randomness(irq); -+ __cli(); -+ -+ irq_exit(cpu, irq); -+ -+ return status; -+} -+ -+/* -+ * Generic enable/disable code: this just calls -+ * down into the PIC-specific version for the actual -+ * hardware disable after having gotten the irq -+ * controller lock. -+ */ -+ -+/** -+ * disable_irq_nosync - disable an irq without waiting -+ * @irq: Interrupt to disable -+ * -+ * Disable the selected interrupt line. Disables of an interrupt -+ * stack. Unlike disable_irq(), this function does not ensure existing -+ * instances of the IRQ handler have completed before returning. -+ * -+ * This function may be called from IRQ context. -+ */ -+ -+void inline disable_irq_nosync(unsigned int irq) -+{ -+ irq_desc_t *desc = irq_desc + irq; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&desc->lock, flags); -+ if (!desc->depth++) { -+ desc->status |= IRQ_DISABLED; -+ desc->handler->disable(irq); -+ } -+ spin_unlock_irqrestore(&desc->lock, flags); -+} -+ -+/** -+ * disable_irq - disable an irq and wait for completion -+ * @irq: Interrupt to disable -+ * -+ * Disable the selected interrupt line. Disables of an interrupt -+ * stack. That is for two disables you need two enables. This -+ * function waits for any pending IRQ handlers for this interrupt -+ * to complete before returning. If you use this function while -+ * holding a resource the IRQ handler may need you will deadlock. -+ * -+ * This function may be called - with care - from IRQ context. -+ */ -+ -+void disable_irq(unsigned int irq) -+{ -+ disable_irq_nosync(irq); -+ -+ if (!local_irq_count(smp_processor_id())) { -+ do { -+ barrier(); -+ } while (irq_desc[irq].status & IRQ_INPROGRESS); -+ } -+} -+ -+/** -+ * enable_irq - enable interrupt handling on an irq -+ * @irq: Interrupt to enable -+ * -+ * Re-enables the processing of interrupts on this IRQ line -+ * providing no disable_irq calls are now in effect. -+ * -+ * This function may be called from IRQ context. -+ */ -+ -+void enable_irq(unsigned int irq) -+{ -+ irq_desc_t *desc = irq_desc + irq; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&desc->lock, flags); -+ switch (desc->depth) { -+ case 1: { -+ unsigned int status = desc->status & ~IRQ_DISABLED; -+ desc->status = status; -+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { -+ desc->status = status | IRQ_REPLAY; -+ hw_resend_irq(desc->handler,irq); -+ } -+ desc->handler->enable(irq); -+ /* fall-through */ -+ } -+ default: -+ desc->depth--; -+ break; -+ case 0: -+ printk(KERN_ERR "enable_irq() unbalanced from %p\n", -+ __builtin_return_address(0)); -+ } -+ spin_unlock_irqrestore(&desc->lock, flags); -+} -+ -+/* -+ * do_IRQ handles all normal device IRQ's (the special -+ * SMP cross-CPU interrupts have their own specific -+ * handlers). -+ */ -+unsigned int do_IRQ(int irq, union uml_pt_regs *regs) -+{ -+ /* -+ * 0 return value means that this irq is already being -+ * handled by some other CPU. (or is disabled) -+ */ -+ int cpu = smp_processor_id(); -+ irq_desc_t *desc = irq_desc + irq; -+ struct irqaction * action; -+ unsigned int status; -+ -+ kstat.irqs[cpu][irq]++; -+ spin_lock(&desc->lock); -+ desc->handler->ack(irq); -+ /* -+ REPLAY is when Linux resends an IRQ that was dropped earlier -+ WAITING is used by probe to mark irqs that are being tested -+ */ -+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); -+ status |= IRQ_PENDING; /* we _want_ to handle it */ -+ -+ /* -+ * If the IRQ is disabled for whatever reason, we cannot -+ * use the action we have. -+ */ -+ action = NULL; -+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { -+ action = desc->action; -+ status &= ~IRQ_PENDING; /* we commit to handling */ -+ status |= IRQ_INPROGRESS; /* we are handling it */ -+ } -+ desc->status = status; -+ -+ /* -+ * If there is no IRQ handler or it was disabled, exit early. -+ Since we set PENDING, if another processor is handling -+ a different instance of this same irq, the other processor -+ will take care of it. -+ */ -+ if (!action) -+ goto out; -+ -+ /* -+ * Edge triggered interrupts need to remember -+ * pending events. -+ * This applies to any hw interrupts that allow a second -+ * instance of the same irq to arrive while we are in do_IRQ -+ * or in the handler. But the code here only handles the _second_ -+ * instance of the irq, not the third or fourth. So it is mostly -+ * useful for irq hardware that does not mask cleanly in an -+ * SMP environment. -+ */ -+ for (;;) { -+ spin_unlock(&desc->lock); -+ handle_IRQ_event(irq, (struct pt_regs *) regs, action); -+ spin_lock(&desc->lock); -+ -+ if (!(desc->status & IRQ_PENDING)) -+ break; -+ desc->status &= ~IRQ_PENDING; -+ } -+ desc->status &= ~IRQ_INPROGRESS; -+out: -+ /* -+ * The ->end() handler has to deal with interrupts which got -+ * disabled while the handler was running. -+ */ -+ desc->handler->end(irq); -+ spin_unlock(&desc->lock); -+ -+ if (softirq_pending(cpu)) -+ do_softirq(); -+ return 1; -+} -+ -+/** -+ * request_irq - allocate an interrupt line -+ * @irq: Interrupt line to allocate -+ * @handler: Function to be called when the IRQ occurs -+ * @irqflags: Interrupt type flags -+ * @devname: An ascii name for the claiming device -+ * @dev_id: A cookie passed back to the handler function -+ * -+ * This call allocates interrupt resources and enables the -+ * interrupt line and IRQ handling. From the point this -+ * call is made your handler function may be invoked. Since -+ * your handler function must clear any interrupt the board -+ * raises, you must take care both to initialise your hardware -+ * and to set up the interrupt handler in the right order. -+ * -+ * Dev_id must be globally unique. Normally the address of the -+ * device data structure is used as the cookie. Since the handler -+ * receives this value it makes sense to use it. -+ * -+ * If your interrupt is shared you must pass a non NULL dev_id -+ * as this is required when freeing the interrupt. -+ * -+ * Flags: -+ * -+ * SA_SHIRQ Interrupt is shared -+ * -+ * SA_INTERRUPT Disable local interrupts while processing -+ * -+ * SA_SAMPLE_RANDOM The interrupt can be used for entropy -+ * -+ */ -+ -+int request_irq(unsigned int irq, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, -+ const char * devname, -+ void *dev_id) -+{ -+ int retval; -+ struct irqaction * action; -+ -+#if 1 -+ /* -+ * Sanity-check: shared interrupts should REALLY pass in -+ * a real dev-ID, otherwise we'll have trouble later trying -+ * to figure out which interrupt is which (messes up the -+ * interrupt freeing logic etc). -+ */ -+ if (irqflags & SA_SHIRQ) { -+ if (!dev_id) -+ printk(KERN_ERR "Bad boy: %s (at 0x%x) called us " -+ "without a dev_id!\n", devname, (&irq)[-1]); -+ } -+#endif -+ -+ if (irq >= NR_IRQS) -+ return -EINVAL; -+ if (!handler) -+ return -EINVAL; -+ -+ action = (struct irqaction *) -+ kmalloc(sizeof(struct irqaction), GFP_KERNEL); -+ if (!action) -+ return -ENOMEM; -+ -+ action->handler = handler; -+ action->flags = irqflags; -+ action->mask = 0; -+ action->name = devname; -+ action->next = NULL; -+ action->dev_id = dev_id; -+ -+ retval = setup_irq(irq, action); -+ if (retval) -+ kfree(action); -+ return retval; -+} -+ -+int um_request_irq(unsigned int irq, int fd, int type, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, const char * devname, -+ void *dev_id) -+{ -+ int err; -+ -+ err = request_irq(irq, handler, irqflags, devname, dev_id); -+ if(err) -+ return(err); -+ -+ if(fd != -1) -+ err = activate_fd(irq, fd, type, dev_id); -+ return(err); -+} -+ -+/* this was setup_x86_irq but it seems pretty generic */ -+int setup_irq(unsigned int irq, struct irqaction * new) -+{ -+ int shared = 0; -+ unsigned long flags; -+ struct irqaction *old, **p; -+ irq_desc_t *desc = irq_desc + irq; -+ -+ /* -+ * Some drivers like serial.c use request_irq() heavily, -+ * so we have to be careful not to interfere with a -+ * running system. -+ */ -+ if (new->flags & SA_SAMPLE_RANDOM) { -+ /* -+ * This function might sleep, we want to call it first, -+ * outside of the atomic block. -+ * Yes, this might clear the entropy pool if the wrong -+ * driver is attempted to be loaded, without actually -+ * installing a new handler, but is this really a problem, -+ * only the sysadmin is able to do this. -+ */ -+ rand_initialize_irq(irq); -+ } -+ -+ /* -+ * The following block of code has to be executed atomically -+ */ -+ spin_lock_irqsave(&desc->lock,flags); -+ p = &desc->action; -+ old = *p; -+ if (old != NULL) { -+ /* Can't share interrupts unless both agree to */ -+ if (!(old->flags & new->flags & SA_SHIRQ)) { -+ spin_unlock_irqrestore(&desc->lock,flags); -+ return -EBUSY; -+ } -+ -+ /* add new interrupt at end of irq queue */ -+ do { -+ p = &old->next; -+ old = *p; -+ } while (old); -+ shared = 1; -+ } -+ -+ *p = new; -+ -+ if (!shared) { -+ desc->depth = 0; -+ desc->status &= ~IRQ_DISABLED; -+ desc->handler->startup(irq); -+ } -+ spin_unlock_irqrestore(&desc->lock,flags); -+ -+ register_irq_proc(irq); -+ return 0; -+} -+ -+/** -+ * free_irq - free an interrupt -+ * @irq: Interrupt line to free -+ * @dev_id: Device identity to free -+ * -+ * Remove an interrupt handler. The handler is removed and if the -+ * interrupt line is no longer in use by any driver it is disabled. -+ * On a shared IRQ the caller must ensure the interrupt is disabled -+ * on the card it drives before calling this function. The function -+ * does not return until any executing interrupts for this IRQ -+ * have completed. -+ * -+ * This function may be called from interrupt context. -+ * -+ * Bugs: Attempting to free an irq in a handler for the same irq hangs -+ * the machine. -+ */ -+ -+void free_irq(unsigned int irq, void *dev_id) -+{ -+ irq_desc_t *desc; -+ struct irqaction **p; -+ unsigned long flags; -+ -+ if (irq >= NR_IRQS) -+ return; -+ -+ desc = irq_desc + irq; -+ spin_lock_irqsave(&desc->lock,flags); -+ p = &desc->action; -+ for (;;) { -+ struct irqaction * action = *p; -+ if (action) { -+ struct irqaction **pp = p; -+ p = &action->next; -+ if (action->dev_id != dev_id) -+ continue; -+ -+ /* Found it - now remove it from the list of entries */ -+ *pp = action->next; -+ if (!desc->action) { -+ desc->status |= IRQ_DISABLED; -+ desc->handler->shutdown(irq); -+ } -+ free_irq_by_irq_and_dev(irq, dev_id); -+ spin_unlock_irqrestore(&desc->lock,flags); -+ -+#ifdef CONFIG_SMP -+ /* Wait to make sure it's not being used on another CPU */ -+ while (desc->status & IRQ_INPROGRESS) -+ barrier(); -+#endif -+ kfree(action); -+ return; -+ } -+ printk(KERN_ERR "Trying to free free IRQ%d\n",irq); -+ spin_unlock_irqrestore(&desc->lock,flags); -+ return; -+ } -+} -+ -+/* These are initialized by sysctl_init, which is called from init/main.c */ -+static struct proc_dir_entry * root_irq_dir; -+static struct proc_dir_entry * irq_dir [NR_IRQS]; -+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; -+ -+/* These are read and written as longs, so a read won't see a partial write -+ * even during a race. -+ */ -+static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; -+ -+#define HEX_DIGITS 8 -+ -+static int irq_affinity_read_proc (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ if (count < HEX_DIGITS+1) -+ return -EINVAL; -+ return sprintf (page, "%08lx\n", irq_affinity[(long)data]); -+} -+ -+static unsigned int parse_hex_value (const char *buffer, -+ unsigned long count, unsigned long *ret) -+{ -+ unsigned char hexnum [HEX_DIGITS]; -+ unsigned long value; -+ int i; -+ -+ if (!count) -+ return -EINVAL; -+ if (count > HEX_DIGITS) -+ count = HEX_DIGITS; -+ if (copy_from_user(hexnum, buffer, count)) -+ return -EFAULT; -+ -+ /* -+ * Parse the first HEX_DIGITS characters as a hex string, any non-hex -+ * char is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. -+ */ -+ value = 0; -+ -+ for (i = 0; i < count; i++) { -+ unsigned int c = hexnum[i]; -+ -+ switch (c) { -+ case '0' ... '9': c -= '0'; break; -+ case 'a' ... 'f': c -= 'a'-10; break; -+ case 'A' ... 'F': c -= 'A'-10; break; -+ default: -+ goto out; -+ } -+ value = (value << 4) | c; -+ } -+out: -+ *ret = value; -+ return 0; -+} -+ -+static int irq_affinity_write_proc (struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ int irq = (long) data, full_count = count, err; -+ unsigned long new_value; -+ -+ if (!irq_desc[irq].handler->set_affinity) -+ return -EIO; -+ -+ err = parse_hex_value(buffer, count, &new_value); -+ -+#if CONFIG_SMP -+ /* -+ * Do not allow disabling IRQs completely - it's a too easy -+ * way to make the system unusable accidentally :-) At least -+ * one online CPU still has to be targeted. -+ */ -+ if (!(new_value & cpu_online_map)) -+ return -EINVAL; -+#endif -+ -+ irq_affinity[irq] = new_value; -+ irq_desc[irq].handler->set_affinity(irq, new_value); -+ -+ return full_count; -+} -+ -+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ unsigned long *mask = (unsigned long *) data; -+ if (count < HEX_DIGITS+1) -+ return -EINVAL; -+ return sprintf (page, "%08lx\n", *mask); -+} -+ -+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ unsigned long *mask = (unsigned long *) data, full_count = count, err; -+ unsigned long new_value; -+ -+ err = parse_hex_value(buffer, count, &new_value); -+ if (err) -+ return err; -+ -+ *mask = new_value; -+ return full_count; -+} -+ -+#define MAX_NAMELEN 10 -+ -+static void register_irq_proc (unsigned int irq) -+{ -+ struct proc_dir_entry *entry; -+ char name [MAX_NAMELEN]; -+ -+ if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || -+ irq_dir[irq]) -+ return; -+ -+ memset(name, 0, MAX_NAMELEN); -+ sprintf(name, "%d", irq); -+ -+ /* create /proc/irq/1234 */ -+ irq_dir[irq] = proc_mkdir(name, root_irq_dir); -+ -+ /* create /proc/irq/1234/smp_affinity */ -+ entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); -+ -+ entry->nlink = 1; -+ entry->data = (void *)(long)irq; -+ entry->read_proc = irq_affinity_read_proc; -+ entry->write_proc = irq_affinity_write_proc; -+ -+ smp_affinity_entry[irq] = entry; -+} -+ -+/* Read and written as a long */ -+unsigned long prof_cpu_mask = -1; -+ -+void __init init_irq_proc (void) -+{ -+ struct proc_dir_entry *entry; -+ int i; -+ -+ /* create /proc/irq */ -+ root_irq_dir = proc_mkdir("irq", 0); -+ -+ /* create /proc/irq/prof_cpu_mask */ -+ entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); -+ -+ entry->nlink = 1; -+ entry->data = (void *)&prof_cpu_mask; -+ entry->read_proc = prof_cpu_mask_read_proc; -+ entry->write_proc = prof_cpu_mask_write_proc; -+ -+ /* -+ * Create entries for all existing IRQs. -+ */ -+ for (i = 0; i < NR_IRQS; i++) -+ register_irq_proc(i); -+} -+ -+static spinlock_t irq_spinlock = SPIN_LOCK_UNLOCKED; -+ -+unsigned long irq_lock(void) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&irq_spinlock, flags); -+ return(flags); -+} -+ -+void irq_unlock(unsigned long flags) -+{ -+ spin_unlock_irqrestore(&irq_spinlock, flags); -+} -+ -+unsigned long probe_irq_on(void) -+{ -+ return(0); -+} -+ -+int probe_irq_off(unsigned long val) -+{ -+ return(0); -+} -+ -+static unsigned int startup_SIGIO_irq(unsigned int irq) -+{ -+ return(0); -+} -+ -+static void shutdown_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void enable_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void disable_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void mask_and_ack_SIGIO(unsigned int irq) -+{ -+} -+ -+static void end_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static unsigned int startup_SIGVTALRM_irq(unsigned int irq) -+{ -+ return(0); -+} -+ -+static void shutdown_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void enable_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void disable_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void mask_and_ack_SIGVTALRM(unsigned int irq) -+{ -+} -+ -+static void end_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static struct hw_interrupt_type SIGIO_irq_type = { -+ "SIGIO", -+ startup_SIGIO_irq, -+ shutdown_SIGIO_irq, -+ enable_SIGIO_irq, -+ disable_SIGIO_irq, -+ mask_and_ack_SIGIO, -+ end_SIGIO_irq, -+ NULL -+}; -+ -+static struct hw_interrupt_type SIGVTALRM_irq_type = { -+ "SIGVTALRM", -+ startup_SIGVTALRM_irq, -+ shutdown_SIGVTALRM_irq, -+ enable_SIGVTALRM_irq, -+ disable_SIGVTALRM_irq, -+ mask_and_ack_SIGVTALRM, -+ end_SIGVTALRM_irq, -+ NULL -+}; -+ -+void __init init_IRQ(void) -+{ -+ int i; -+ -+ irq_desc[TIMER_IRQ].status = IRQ_DISABLED; -+ irq_desc[TIMER_IRQ].action = 0; -+ irq_desc[TIMER_IRQ].depth = 1; -+ irq_desc[TIMER_IRQ].handler = &SIGVTALRM_irq_type; -+ enable_irq(TIMER_IRQ); -+ for(i=1;i<NR_IRQS;i++){ -+ irq_desc[i].status = IRQ_DISABLED; -+ irq_desc[i].action = 0; -+ irq_desc[i].depth = 1; -+ irq_desc[i].handler = &SIGIO_irq_type; -+ enable_irq(i); -+ } -+ init_irq_signals(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/irq_user.c um/arch/um/kernel/irq_user.c ---- orig/arch/um/kernel/irq_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/irq_user.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,424 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <signal.h> -+#include <string.h> -+#include <sys/poll.h> -+#include <sys/types.h> -+#include <sys/time.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_user.h" -+#include "sigio.h" -+#include "irq_user.h" -+#include "os.h" -+ -+struct irq_fd { -+ struct irq_fd *next; -+ void *id; -+ int fd; -+ int type; -+ int irq; -+ int pid; -+ int events; -+ int current_events; -+ int freed; -+}; -+ -+static struct irq_fd *active_fds = NULL; -+static struct irq_fd **last_irq_ptr = &active_fds; -+ -+static struct pollfd *pollfds = NULL; -+static int pollfds_num = 0; -+static int pollfds_size = 0; -+ -+extern int io_count, intr_count; -+ -+void sigio_handler(int sig, union uml_pt_regs *regs) -+{ -+ struct irq_fd *irq_fd, *next; -+ int i, n; -+ -+ if(smp_sigio_handler()) return; -+ while(1){ -+ n = poll(pollfds, pollfds_num, 0); -+ if(n < 0){ -+ if(errno == EINTR) continue; -+ printk("sigio_handler : poll returned %d, " -+ "errno = %d\n", n, errno); -+ break; -+ } -+ if(n == 0) break; -+ -+ irq_fd = active_fds; -+ for(i = 0; i < pollfds_num; i++){ -+ if(pollfds[i].revents != 0){ -+ irq_fd->current_events = pollfds[i].revents; -+ pollfds[i].fd = -1; -+ } -+ irq_fd = irq_fd->next; -+ } -+ -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = next){ -+ next = irq_fd->next; -+ if(irq_fd->current_events != 0){ -+ irq_fd->current_events = 0; -+ do_IRQ(irq_fd->irq, regs); -+ -+ /* This is here because the next irq may be -+ * freed in the handler. If a console goes -+ * away, both the read and write irqs will be -+ * freed. After do_IRQ, ->next will point to -+ * a good IRQ. -+ * Irqs can't be freed inside their handlers, -+ * so the next best thing is to have them -+ * marked as needing freeing, so that they -+ * can be freed here. -+ */ -+ next = irq_fd->next; -+ if(irq_fd->freed) -+ free_irq(irq_fd->irq, irq_fd->id); -+ } -+ } -+ } -+} -+ -+int activate_ipi(int fd, int pid) -+{ -+ return(os_set_fd_async(fd, pid)); -+} -+ -+static void maybe_sigio_broken(int fd, int type) -+{ -+ if(isatty(fd)){ -+ if((type == IRQ_WRITE) && !pty_output_sigio){ -+ write_sigio_workaround(); -+ add_sigio_fd(fd, 0); -+ } -+ else if((type == IRQ_READ) && !pty_close_sigio){ -+ write_sigio_workaround(); -+ add_sigio_fd(fd, 1); -+ } -+ } -+} -+ -+int activate_fd(int irq, int fd, int type, void *dev_id) -+{ -+ struct pollfd *tmp_pfd; -+ struct irq_fd *new_fd, *irq_fd; -+ unsigned long flags; -+ int pid, events, err, n, size; -+ -+ pid = os_getpid(); -+ err = os_set_fd_async(fd, pid); -+ if(err < 0) -+ goto out; -+ -+ new_fd = um_kmalloc(sizeof(*new_fd)); -+ err = -ENOMEM; -+ if(new_fd == NULL) -+ goto out; -+ -+ if(type == IRQ_READ) events = POLLIN | POLLPRI; -+ else events = POLLOUT; -+ *new_fd = ((struct irq_fd) { .next = NULL, -+ .id = dev_id, -+ .fd = fd, -+ .type = type, -+ .irq = irq, -+ .pid = pid, -+ .events = events, -+ .current_events = 0, -+ .freed = 0 } ); -+ -+ /* Critical section - locked by a spinlock because this stuff can -+ * be changed from interrupt handlers. The stuff above is done -+ * outside the lock because it allocates memory. -+ */ -+ -+ /* Actually, it only looks like it can be called from interrupt -+ * context. The culprit is reactivate_fd, which calls -+ * maybe_sigio_broken, which calls write_sigio_workaround, -+ * which calls activate_fd. However, write_sigio_workaround should -+ * only be called once, at boot time. That would make it clear that -+ * this is called only from process context, and can be locked with -+ * a semaphore. -+ */ -+ flags = irq_lock(); -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ -+ if((irq_fd->fd == fd) && (irq_fd->type == type)){ -+ printk("Registering fd %d twice\n", fd); -+ printk("Irqs : %d, %d\n", irq_fd->irq, irq); -+ printk("Ids : 0x%x, 0x%x\n", irq_fd->id, dev_id); -+ goto out_unlock; -+ } -+ } -+ -+ n = pollfds_num; -+ if(n == pollfds_size){ -+ while(1){ -+ /* Here we have to drop the lock in order to call -+ * kmalloc, which might sleep. If something else -+ * came in and changed the pollfds array, we free -+ * the buffer and try again. -+ */ -+ irq_unlock(flags); -+ size = (pollfds_num + 1) * sizeof(pollfds[0]); -+ tmp_pfd = um_kmalloc(size); -+ flags = irq_lock(); -+ if(tmp_pfd == NULL) -+ goto out_unlock; -+ if(n == pollfds_size) -+ break; -+ kfree(tmp_pfd); -+ } -+ if(pollfds != NULL){ -+ memcpy(tmp_pfd, pollfds, -+ sizeof(pollfds[0]) * pollfds_size); -+ kfree(pollfds); -+ } -+ pollfds = tmp_pfd; -+ pollfds_size++; -+ } -+ -+ if(type == IRQ_WRITE) -+ fd = -1; -+ -+ pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, -+ .events = events, -+ .revents = 0 }); -+ pollfds_num++; -+ -+ *last_irq_ptr = new_fd; -+ last_irq_ptr = &new_fd->next; -+ -+ irq_unlock(flags); -+ -+ /* This calls activate_fd, so it has to be outside the critical -+ * section. -+ */ -+ maybe_sigio_broken(fd, type); -+ -+ return(0); -+ -+ out_unlock: -+ irq_unlock(flags); -+ kfree(new_fd); -+ out: -+ return(err); -+} -+ -+static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) -+{ -+ struct irq_fd **prev; -+ unsigned long flags; -+ int i = 0; -+ -+ flags = irq_lock(); -+ prev = &active_fds; -+ while(*prev != NULL){ -+ if((*test)(*prev, arg)){ -+ struct irq_fd *old_fd = *prev; -+ if((pollfds[i].fd != -1) && -+ (pollfds[i].fd != (*prev)->fd)){ -+ printk("free_irq_by_cb - mismatch between " -+ "active_fds and pollfds, fd %d vs %d\n", -+ (*prev)->fd, pollfds[i].fd); -+ goto out; -+ } -+ memcpy(&pollfds[i], &pollfds[i + 1], -+ (pollfds_num - i - 1) * sizeof(pollfds[0])); -+ pollfds_num--; -+ if(last_irq_ptr == &old_fd->next) -+ last_irq_ptr = prev; -+ *prev = (*prev)->next; -+ if(old_fd->type == IRQ_WRITE) -+ ignore_sigio_fd(old_fd->fd); -+ kfree(old_fd); -+ continue; -+ } -+ prev = &(*prev)->next; -+ i++; -+ } -+ out: -+ irq_unlock(flags); -+} -+ -+struct irq_and_dev { -+ int irq; -+ void *dev; -+}; -+ -+static int same_irq_and_dev(struct irq_fd *irq, void *d) -+{ -+ struct irq_and_dev *data = d; -+ -+ return((irq->irq == data->irq) && (irq->id == data->dev)); -+} -+ -+void free_irq_by_irq_and_dev(int irq, void *dev) -+{ -+ struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, -+ .dev = dev }); -+ -+ free_irq_by_cb(same_irq_and_dev, &data); -+} -+ -+static int same_fd(struct irq_fd *irq, void *fd) -+{ -+ return(irq->fd == *((int *) fd)); -+} -+ -+void free_irq_by_fd(int fd) -+{ -+ free_irq_by_cb(same_fd, &fd); -+} -+ -+static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) -+{ -+ struct irq_fd *irq; -+ int i = 0; -+ -+ for(irq=active_fds; irq != NULL; irq = irq->next){ -+ if((irq->fd == fd) && (irq->irq == irqnum)) break; -+ i++; -+ } -+ if(irq == NULL){ -+ printk("find_irq_by_fd doesn't have descriptor %d\n", fd); -+ goto out; -+ } -+ if((pollfds[i].fd != -1) && (pollfds[i].fd != fd)){ -+ printk("find_irq_by_fd - mismatch between active_fds and " -+ "pollfds, fd %d vs %d, need %d\n", irq->fd, -+ pollfds[i].fd, fd); -+ irq = NULL; -+ goto out; -+ } -+ *index_out = i; -+ out: -+ return(irq); -+} -+ -+void free_irq_later(int irq, void *dev_id) -+{ -+ struct irq_fd *irq_fd; -+ unsigned long flags; -+ -+ flags = irq_lock(); -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ -+ if((irq_fd->irq == irq) && (irq_fd->id == dev_id)) -+ break; -+ } -+ if(irq_fd == NULL){ -+ printk("free_irq_later found no irq, irq = %d, " -+ "dev_id = 0x%p\n", irq, dev_id); -+ goto out; -+ } -+ irq_fd->freed = 1; -+ out: -+ irq_unlock(flags); -+} -+ -+void reactivate_fd(int fd, int irqnum) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int i; -+ -+ flags = irq_lock(); -+ irq = find_irq_by_fd(fd, irqnum, &i); -+ if(irq == NULL){ -+ irq_unlock(flags); -+ return; -+ } -+ -+ pollfds[i].fd = irq->fd; -+ -+ irq_unlock(flags); -+ -+ /* This calls activate_fd, so it has to be outside the critical -+ * section. -+ */ -+ maybe_sigio_broken(fd, irq->type); -+} -+ -+void deactivate_fd(int fd, int irqnum) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int i; -+ -+ flags = irq_lock(); -+ irq = find_irq_by_fd(fd, irqnum, &i); -+ if(irq == NULL) -+ goto out; -+ pollfds[i].fd = -1; -+ out: -+ irq_unlock(flags); -+} -+ -+void forward_ipi(int fd, int pid) -+{ -+ int err; -+ -+ err = os_set_owner(fd, pid); -+ if(err < 0) -+ printk("forward_ipi: set_owner failed, fd = %d, me = %d, " -+ "target = %d, err = %d\n", fd, os_getpid(), pid, -err); -+} -+ -+void forward_interrupts(int pid) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int err; -+ -+ flags = irq_lock(); -+ for(irq=active_fds;irq != NULL;irq = irq->next){ -+ err = os_set_owner(irq->fd, pid); -+ if(err < 0){ -+ /* XXX Just remove the irq rather than -+ * print out an infinite stream of these -+ */ -+ printk("Failed to forward %d to pid %d, err = %d\n", -+ irq->fd, pid, -err); -+ } -+ -+ irq->pid = pid; -+ } -+ irq_unlock(flags); -+} -+ -+void init_irq_signals(int on_sigstack) -+{ -+ __sighandler_t h; -+ int flags; -+ -+ flags = on_sigstack ? SA_ONSTACK : 0; -+ if(timer_irq_inited) h = (__sighandler_t) alarm_handler; -+ else h = boot_timer_handler; -+ -+ set_handler(SIGVTALRM, h, flags | SA_RESTART, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1); -+ set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ signal(SIGWINCH, SIG_IGN); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/ksyms.c um/arch/um/kernel/ksyms.c ---- orig/arch/um/kernel/ksyms.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/ksyms.c 2004-01-23 00:44:09.000000000 -0500 -@@ -0,0 +1,120 @@ -+/* -+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/module.h" -+#include "linux/string.h" -+#include "linux/smp_lock.h" -+#include "linux/spinlock.h" -+#include "asm/current.h" -+#include "asm/delay.h" -+#include "asm/processor.h" -+#include "asm/unistd.h" -+#include "asm/pgalloc.h" -+#include "asm/pgtable.h" -+#include "asm/page.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "helper.h" -+ -+EXPORT_SYMBOL(stop); -+EXPORT_SYMBOL(strtok); -+EXPORT_SYMBOL(uml_physmem); -+EXPORT_SYMBOL(set_signals); -+EXPORT_SYMBOL(get_signals); -+EXPORT_SYMBOL(kernel_thread); -+EXPORT_SYMBOL(__const_udelay); -+EXPORT_SYMBOL(__udelay); -+EXPORT_SYMBOL(sys_waitpid); -+EXPORT_SYMBOL(task_size); -+EXPORT_SYMBOL(flush_tlb_range); -+EXPORT_SYMBOL(host_task_size); -+EXPORT_SYMBOL(arch_validate); -+EXPORT_SYMBOL(get_kmem_end); -+ -+EXPORT_SYMBOL(high_physmem); -+EXPORT_SYMBOL(empty_zero_page); -+EXPORT_SYMBOL(um_virt_to_phys); -+EXPORT_SYMBOL(__virt_to_page); -+EXPORT_SYMBOL(to_phys); -+EXPORT_SYMBOL(to_virt); -+EXPORT_SYMBOL(mode_tt); -+EXPORT_SYMBOL(handle_page_fault); -+ -+#ifdef CONFIG_MODE_TT -+EXPORT_SYMBOL(copy_from_user_tt); -+EXPORT_SYMBOL(copy_to_user_tt); -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+EXPORT_SYMBOL(copy_to_user_skas); -+EXPORT_SYMBOL(copy_from_user_skas); -+#endif -+ -+EXPORT_SYMBOL(os_stat_fd); -+EXPORT_SYMBOL(os_stat_file); -+EXPORT_SYMBOL(os_access); -+EXPORT_SYMBOL(os_print_error); -+EXPORT_SYMBOL(os_get_exec_close); -+EXPORT_SYMBOL(os_set_exec_close); -+EXPORT_SYMBOL(os_getpid); -+EXPORT_SYMBOL(os_open_file); -+EXPORT_SYMBOL(os_read_file); -+EXPORT_SYMBOL(os_write_file); -+EXPORT_SYMBOL(os_seek_file); -+EXPORT_SYMBOL(os_lock_file); -+EXPORT_SYMBOL(os_pipe); -+EXPORT_SYMBOL(os_file_type); -+EXPORT_SYMBOL(os_file_mode); -+EXPORT_SYMBOL(os_file_size); -+EXPORT_SYMBOL(os_flush_stdout); -+EXPORT_SYMBOL(os_close_file); -+EXPORT_SYMBOL(os_set_fd_async); -+EXPORT_SYMBOL(os_set_fd_block); -+EXPORT_SYMBOL(helper_wait); -+EXPORT_SYMBOL(os_shutdown_socket); -+EXPORT_SYMBOL(os_create_unix_socket); -+EXPORT_SYMBOL(os_connect_socket); -+EXPORT_SYMBOL(os_accept_connection); -+EXPORT_SYMBOL(os_ioctl_generic); -+EXPORT_SYMBOL(os_rcv_fd); -+EXPORT_SYMBOL(run_helper); -+EXPORT_SYMBOL(start_thread); -+EXPORT_SYMBOL(dump_thread); -+ -+/* This is here because UML expands open to sys_open, not to a system -+ * call instruction. -+ */ -+EXPORT_SYMBOL(sys_open); -+EXPORT_SYMBOL(sys_lseek); -+EXPORT_SYMBOL(sys_read); -+EXPORT_SYMBOL(sys_wait4); -+ -+#ifdef CONFIG_SMP -+ -+/* required for SMP */ -+ -+extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -+EXPORT_SYMBOL_NOVERS(__write_lock_failed); -+ -+extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -+EXPORT_SYMBOL_NOVERS(__read_lock_failed); -+ -+EXPORT_SYMBOL(kernel_flag_cacheline); -+EXPORT_SYMBOL(smp_num_cpus); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/Makefile um/arch/um/kernel/Makefile ---- orig/arch/um/kernel/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/Makefile 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,73 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = config.o checksum.o exec_kern.o exitcode.o frame_kern.o frame.o \ -+ helper.o init_task.o irq.o irq_user.o ksyms.o mem.o mem_user.o \ -+ physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ -+ sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ -+ syscall_kern.o syscall_user.o sysrq.o sys_call_table.o tempfile.o \ -+ time.o time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o \ -+ um_arch.o umid.o user_syms.o user_util.o -+ -+obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o -+obj-$(CONFIG_GPROF) += gprof_syms.o -+obj-$(CONFIG_GCOV) += gmon_syms.o -+obj-$(CONFIG_TTY_LOG) += tty_log.o -+ -+subdir-$(CONFIG_MODE_TT) += tt -+subdir-$(CONFIG_MODE_SKAS) += skas -+ -+user-objs-$(CONFIG_TTY_LOG) += tty_log.o -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+# user_syms.o not included here because Rules.make has its own ideas about -+# building anything in export-objs -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) $(user-objs-y) config.o helper.o \ -+ process.o tempfile.o time.o umid.o user_util.o -+ -+DMODULES-$(CONFIG_MODULES) = -D__CONFIG_MODULES__ -+DMODVERSIONS-$(CONFIG_MODVERSIONS) = -D__CONFIG_MODVERSIONS__ -+ -+export-objs-$(CONFIG_GPROF) += gprof_syms.o -+export-objs-$(CONFIG_GCOV) += gmon_syms.o -+ -+export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o $(export-objs-y) -+ -+CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES-y) $(DMODVERSIONS-y) \ -+ -I/usr/include -I../include -+ -+CFLAGS_frame.o := $(patsubst -fomit-frame-pointer,,$(USER_CFLAGS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+# This has to be separate because it needs be compiled with frame pointers -+# regardless of how the rest of the kernel is built. -+ -+frame.o: frame.c -+ $(CC) $(CFLAGS_$@) -c -o $@ $< -+ -+QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }' -+ -+config.c : config.c.in $(TOPDIR)/.config -+ $(PERL) -e $(QUOTE) < config.c.in > $@ -+ -+clean: -+ $(RM) config.c -+ for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done -+ -+modules: -+ -+fastdep: -+ -+dep: -+ -+archmrproper: clean -diff -Naur -X ../exclude-files orig/arch/um/kernel/mem.c um/arch/um/kernel/mem.c ---- orig/arch/um/kernel/mem.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/mem.c 2003-12-22 01:25:00.000000000 -0500 -@@ -0,0 +1,336 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/mm.h" -+#include "linux/bootmem.h" -+#include "linux/highmem.h" -+#include "asm/page.h" -+#include "asm/fixmap.h" -+#include "asm/pgalloc.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mem_user.h" -+#include "uml_uaccess.h" -+#include "os.h" -+ -+extern char __binary_start; -+ -+/* Changed during early boot */ -+unsigned long *empty_zero_page = NULL; -+unsigned long *empty_bad_page = NULL; -+pgd_t swapper_pg_dir[1024]; -+unsigned long highmem; -+int kmalloc_ok = 0; -+ -+static unsigned long brk_end; -+static unsigned long totalram_pages = 0; -+ -+void unmap_physmem(void) -+{ -+ os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -+} -+ -+static void map_cb(void *unused) -+{ -+ map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); -+} -+ -+#ifdef CONFIG_HIGHMEM -+static void setup_highmem(unsigned long highmem_start, -+ unsigned long highmem_len) -+{ -+ struct page *page; -+ unsigned long highmem_pfn; -+ int i; -+ -+ highmem_start_page = virt_to_page(highmem_start); -+ -+ highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; -+ for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ -+ page = &mem_map[highmem_pfn + i]; -+ ClearPageReserved(page); -+ set_bit(PG_highmem, &page->flags); -+ atomic_set(&page->count, 1); -+ __free_page(page); -+ } -+} -+#endif -+ -+void mem_init(void) -+{ -+ unsigned long start; -+ -+ /* clear the zero-page */ -+ memset((void *) empty_zero_page, 0, PAGE_SIZE); -+ -+ /* Map in the area just after the brk now that kmalloc is about -+ * to be turned on. -+ */ -+ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); -+ map_cb(NULL); -+ initial_thread_cb(map_cb, NULL); -+ free_bootmem(__pa(brk_end), uml_reserved - brk_end); -+ uml_reserved = brk_end; -+ -+ /* Fill in any hole at the start of the binary */ -+ start = (unsigned long) &__binary_start; -+ if(uml_physmem != start){ -+ map_memory(uml_physmem, __pa(uml_physmem), start - uml_physmem, -+ 1, 1, 0); -+ } -+ -+ /* this will put all low memory onto the freelists */ -+ totalram_pages = free_all_bootmem(); -+ totalram_pages += highmem >> PAGE_SHIFT; -+ num_physpages = totalram_pages; -+ printk(KERN_INFO "Memory: %luk available\n", -+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); -+ kmalloc_ok = 1; -+ -+#ifdef CONFIG_HIGHMEM -+ setup_highmem(end_iomem, highmem); -+#endif -+} -+ -+static void __init fixrange_init(unsigned long start, unsigned long end, -+ pgd_t *pgd_base) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ int i, j; -+ unsigned long vaddr; -+ -+ vaddr = start; -+ i = __pgd_offset(vaddr); -+ j = __pmd_offset(vaddr); -+ pgd = pgd_base + i; -+ -+ for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { -+ pmd = (pmd_t *)pgd; -+ for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { -+ if (pmd_none(*pmd)) { -+ pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); -+ set_pmd(pmd, __pmd(_KERNPG_TABLE + -+ (unsigned long) __pa(pte))); -+ if (pte != pte_offset(pmd, 0)) -+ BUG(); -+ } -+ vaddr += PMD_SIZE; -+ } -+ j = 0; -+ } -+} -+ -+#ifdef CONFIG_HIGHMEM -+pte_t *kmap_pte; -+pgprot_t kmap_prot; -+ -+#define kmap_get_fixmap_pte(vaddr) \ -+ pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) -+ -+void __init kmap_init(void) -+{ -+ unsigned long kmap_vstart; -+ -+ /* cache the first kmap pte */ -+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); -+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart); -+ -+ kmap_prot = PAGE_KERNEL; -+} -+ -+static void init_highmem(void) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long vaddr; -+ -+ /* -+ * Permanent kmaps: -+ */ -+ vaddr = PKMAP_BASE; -+ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir); -+ -+ pgd = swapper_pg_dir + __pgd_offset(vaddr); -+ pmd = pmd_offset(pgd, vaddr); -+ pte = pte_offset(pmd, vaddr); -+ pkmap_page_table = pte; -+ -+ kmap_init(); -+} -+ -+#endif /* CONFIG_HIGHMEM */ -+ -+void paging_init(void) -+{ -+ unsigned long zones_size[MAX_NR_ZONES], vaddr; -+ int i; -+ -+ empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); -+ empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); -+ for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++) -+ zones_size[i] = 0; -+ zones_size[0] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); -+ zones_size[2] = highmem >> PAGE_SHIFT; -+ free_area_init(zones_size); -+ -+ /* -+ * Fixed mappings, only the page table structure has to be -+ * created - mappings will be set by set_fixmap(): -+ */ -+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; -+ fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); -+ -+#if CONFIG_HIGHMEM -+ init_highmem(); -+#endif -+} -+ -+struct page *arch_validate(struct page *page, int mask, int order) -+{ -+ unsigned long addr, zero = 0; -+ int i; -+ -+ again: -+ if(page == NULL) return(page); -+ if(PageHighMem(page)) return(page); -+ -+ addr = (unsigned long) page_address(page); -+ for(i = 0; i < (1 << order); i++){ -+ current->thread.fault_addr = (void *) addr; -+ if(__do_copy_to_user((void *) addr, &zero, -+ sizeof(zero), -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)){ -+ if(!(mask & __GFP_WAIT)) return(NULL); -+ else break; -+ } -+ addr += PAGE_SIZE; -+ } -+ if(i == (1 << order)) return(page); -+ page = _alloc_pages(mask, order); -+ goto again; -+} -+ -+/* This can't do anything because nothing in the kernel image can be freed -+ * since it's not in kernel physical memory. -+ */ -+ -+void free_initmem(void) -+{ -+} -+ -+#ifdef CONFIG_BLK_DEV_INITRD -+ -+void free_initrd_mem(unsigned long start, unsigned long end) -+{ -+ if (start < end) -+ printk ("Freeing initrd memory: %ldk freed\n", -+ (end - start) >> 10); -+ for (; start < end; start += PAGE_SIZE) { -+ ClearPageReserved(virt_to_page(start)); -+ set_page_count(virt_to_page(start), 1); -+ free_page(start); -+ totalram_pages++; -+ } -+} -+ -+#endif -+ -+int do_check_pgt_cache(int low, int high) -+{ -+ int freed = 0; -+ if(pgtable_cache_size > high) { -+ do { -+ if (pgd_quicklist) { -+ free_pgd_slow(get_pgd_fast()); -+ freed++; -+ } -+ if (pmd_quicklist) { -+ pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); -+ freed++; -+ } -+ if (pte_quicklist) { -+ pte_free_slow(pte_alloc_one_fast(NULL, 0)); -+ freed++; -+ } -+ } while(pgtable_cache_size > low); -+ } -+ return freed; -+} -+ -+void show_mem(void) -+{ -+ int i, total = 0, reserved = 0; -+ int shared = 0, cached = 0; -+ int highmem = 0; -+ -+ printk("Mem-info:\n"); -+ show_free_areas(); -+ printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); -+ i = max_mapnr; -+ while(i-- > 0) { -+ total++; -+ if(PageHighMem(mem_map + i)) -+ highmem++; -+ if(PageReserved(mem_map + i)) -+ reserved++; -+ else if(PageSwapCache(mem_map + i)) -+ cached++; -+ else if(page_count(mem_map + i)) -+ shared += page_count(mem_map + i) - 1; -+ } -+ printk("%d pages of RAM\n", total); -+ printk("%d pages of HIGHMEM\n", highmem); -+ printk("%d reserved pages\n", reserved); -+ printk("%d pages shared\n", shared); -+ printk("%d pages swap cached\n", cached); -+ printk("%ld pages in page table cache\n", pgtable_cache_size); -+ show_buffers(); -+} -+ -+/* Changed by meminfo_compat, which is a setup */ -+static int meminfo_22 = 0; -+ -+static int meminfo_compat(char *str) -+{ -+ meminfo_22 = 1; -+ return(1); -+} -+ -+__setup("22_meminfo", meminfo_compat); -+ -+void si_meminfo(struct sysinfo *val) -+{ -+ val->totalram = totalram_pages; -+ val->sharedram = 0; -+ val->freeram = nr_free_pages(); -+ val->bufferram = atomic_read(&buffermem_pages); -+ val->totalhigh = highmem >> PAGE_SHIFT; -+ val->freehigh = nr_free_highpages(); -+ val->mem_unit = PAGE_SIZE; -+ if(meminfo_22){ -+ val->freeram <<= PAGE_SHIFT; -+ val->bufferram <<= PAGE_SHIFT; -+ val->totalram <<= PAGE_SHIFT; -+ val->sharedram <<= PAGE_SHIFT; -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/mem_user.c um/arch/um/kernel/mem_user.c ---- orig/arch/um/kernel/mem_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/mem_user.c 2004-01-10 00:19:09.000000000 -0500 -@@ -0,0 +1,216 @@ -+/* -+ * arch/um/kernel/mem_user.c -+ * -+ * BRIEF MODULE DESCRIPTION -+ * user side memory routines for supporting IO memory inside user mode linux -+ * -+ * Copyright (C) 2001 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN -+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <stddef.h> -+#include <stdarg.h> -+#include <unistd.h> -+#include <errno.h> -+#include <string.h> -+#include <fcntl.h> -+#include <sys/types.h> -+#include <sys/mman.h> -+#include "kern_util.h" -+#include "user.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "init.h" -+#include "os.h" -+#include "tempfile.h" -+#include "kern_constants.h" -+ -+extern struct mem_region physmem_region; -+ -+#define TEMPNAME_TEMPLATE "vm_file-XXXXXX" -+ -+static int create_tmp_file(unsigned long len) -+{ -+ int fd, err; -+ char zero; -+ -+ fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); -+ if(fd < 0) { -+ os_print_error(fd, "make_tempfile"); -+ exit(1); -+ } -+ -+ err = os_mode_fd(fd, 0777); -+ if(err < 0){ -+ os_print_error(err, "os_mode_fd"); -+ exit(1); -+ } -+ err = os_seek_file(fd, len); -+ if(err < 0){ -+ os_print_error(err, "os_seek_file"); -+ exit(1); -+ } -+ zero = 0; -+ err = os_write_file(fd, &zero, 1); -+ if(err != 1){ -+ os_print_error(err, "os_write_file"); -+ exit(1); -+ } -+ -+ return(fd); -+} -+ -+static int have_devanon(void) -+{ -+ int fd; -+ -+ printk("Checking for /dev/anon on the host..."); -+ fd = open("/dev/anon", O_RDWR); -+ if(fd < 0){ -+ printk("Not available (open failed with errno %d)\n", errno); -+ return(0); -+ } -+ -+ printk("OK\n"); -+ return(1); -+} -+ -+static int create_anon_file(unsigned long len) -+{ -+ void *addr; -+ int fd; -+ -+ fd = open("/dev/anon", O_RDWR); -+ if(fd < 0) { -+ os_print_error(fd, "opening /dev/anon"); -+ exit(1); -+ } -+ -+ addr = mmap(NULL, len, PROT_READ | PROT_WRITE , MAP_PRIVATE, fd, 0); -+ if(addr == MAP_FAILED){ -+ os_print_error((int) addr, "mapping physmem file"); -+ exit(1); -+ } -+ munmap(addr, len); -+ -+ return(fd); -+} -+ -+int create_mem_file(unsigned long len) -+{ -+ int err, fd; -+ -+ if(have_devanon()) -+ fd = create_anon_file(len); -+ else fd = create_tmp_file(len); -+ -+ err = os_set_exec_close(fd, 1); -+ if(err < 0) -+ os_print_error(err, "exec_close"); -+ return(fd); -+} -+ -+struct iomem_region *iomem_regions = NULL; -+int iomem_size = 0; -+ -+static int __init parse_iomem(char *str, int *add) -+{ -+ struct iomem_region *new; -+ struct uml_stat buf; -+ char *file, *driver; -+ int fd, err; -+ -+ driver = str; -+ file = strchr(str,','); -+ if(file == NULL){ -+ printf("parse_iomem : failed to parse iomem\n"); -+ goto out; -+ } -+ *file = '\0'; -+ file++; -+ fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); -+ if(fd < 0){ -+ os_print_error(fd, "parse_iomem - Couldn't open io file"); -+ goto out; -+ } -+ -+ err = os_stat_fd(fd, &buf); -+ if(err < 0){ -+ os_print_error(err, "parse_iomem - cannot stat_fd file"); -+ goto out_close; -+ } -+ -+ new = malloc(sizeof(*new)); -+ if(new == NULL){ -+ perror("Couldn't allocate iomem_region struct"); -+ goto out_close; -+ } -+ -+ *new = ((struct iomem_region) { .next = iomem_regions, -+ .driver = driver, -+ .fd = fd, -+ .size = buf.ust_size, -+ .phys = 0, -+ .virt = 0 }); -+ iomem_regions = new; -+ iomem_size += new->size + UM_KERN_PAGE_SIZE; -+ -+ return(0); -+ out_close: -+ os_close_file(fd); -+ out: -+ return(1); -+} -+ -+__uml_setup("iomem=", parse_iomem, -+"iomem=<name>,<file>\n" -+" Configure <file> as an IO memory region named <name>.\n\n" -+); -+ -+int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, -+ int must_succeed) -+{ -+ int err; -+ -+ err = os_protect_memory((void *) addr, len, r, w, x); -+ if(err < 0){ -+ if(must_succeed) -+ panic("protect failed, err = %d", -err); -+ else return(err); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/mprot.h um/arch/um/kernel/mprot.h ---- orig/arch/um/kernel/mprot.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/mprot.h 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __MPROT_H__ -+#define __MPROT_H__ -+ -+extern void no_access(unsigned long addr, unsigned int len); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/kernel/physmem.c um/arch/um/kernel/physmem.c ---- orig/arch/um/kernel/physmem.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/physmem.c 2004-01-16 23:38:02.000000000 -0500 -@@ -0,0 +1,446 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/mm.h" -+#include "linux/ghash.h" -+#include "linux/slab.h" -+#include "linux/vmalloc.h" -+#include "linux/bootmem.h" -+#include "asm/types.h" -+#include "asm/pgtable.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "mode_kern.h" -+#include "mem.h" -+#include "mem_user.h" -+#include "os.h" -+#include "kern.h" -+#include "init.h" -+ -+#define PHYS_HASHSIZE (8192) -+ -+struct phys_desc; -+ -+DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc); -+ -+struct phys_desc { -+ struct virtmem_ptrs virt_ptrs; -+ int fd; -+ __u64 offset; -+ void *virt; -+ unsigned long phys; -+ struct list_head list; -+}; -+ -+struct virtmem_table virtmem_hash; -+ -+static int virt_cmp(void *virt1, void *virt2) -+{ -+ return(virt1 != virt2); -+} -+ -+static int virt_hash(void *virt) -+{ -+ unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT; -+ return(addr % PHYS_HASHSIZE); -+} -+ -+DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, -+ virt_hash); -+ -+LIST_HEAD(descriptor_mappings); -+ -+struct desc_mapping { -+ int fd; -+ struct list_head list; -+ struct list_head pages; -+}; -+ -+static struct desc_mapping *find_mapping(int fd) -+{ -+ struct desc_mapping *desc; -+ struct list_head *ele; -+ -+ list_for_each(ele, &descriptor_mappings){ -+ desc = list_entry(ele, struct desc_mapping, list); -+ if(desc->fd == fd) -+ return(desc); -+ } -+ -+ return(NULL); -+} -+ -+static struct desc_mapping *descriptor_mapping(int fd) -+{ -+ struct desc_mapping *desc; -+ -+ desc = find_mapping(fd); -+ if(desc != NULL) -+ return(desc); -+ -+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); -+ if(desc == NULL) -+ return(NULL); -+ -+ *desc = ((struct desc_mapping) -+ { .fd = fd, -+ .list = LIST_HEAD_INIT(desc->list), -+ .pages = LIST_HEAD_INIT(desc->pages) }); -+ list_add(&desc->list, &descriptor_mappings); -+ -+ return(desc); -+} -+ -+int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) -+{ -+ struct desc_mapping *fd_maps; -+ struct phys_desc *desc; -+ unsigned long phys; -+ int err; -+ -+ fd_maps = descriptor_mapping(fd); -+ if(fd_maps == NULL) -+ return(-ENOMEM); -+ -+ phys = __pa(virt); -+ if(find_virtmem_hash(&virtmem_hash, virt) != NULL) -+ panic("Address 0x%p is already substituted\n", virt); -+ -+ err = -ENOMEM; -+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); -+ if(desc == NULL) -+ goto out; -+ -+ *desc = ((struct phys_desc) -+ { .virt_ptrs = { NULL, NULL }, -+ .fd = fd, -+ .offset = offset, -+ .virt = virt, -+ .phys = __pa(virt), -+ .list = LIST_HEAD_INIT(desc->list) }); -+ insert_virtmem_hash(&virtmem_hash, desc); -+ -+ list_add(&desc->list, &fd_maps->pages); -+ -+ virt = (void *) ((unsigned long) virt & PAGE_MASK); -+ err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); -+ if(!err) -+ goto out; -+ -+ remove_virtmem_hash(&virtmem_hash, desc); -+ kfree(desc); -+ out: -+ return(err); -+} -+ -+static int physmem_fd = -1; -+ -+static void remove_mapping(struct phys_desc *desc) -+{ -+ void *virt = desc->virt; -+ int err; -+ -+ remove_virtmem_hash(&virtmem_hash, desc); -+ list_del(&desc->list); -+ kfree(desc); -+ -+ err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); -+ if(err) -+ panic("Failed to unmap block device page from physical memory, " -+ "errno = %d", -err); -+} -+ -+int physmem_remove_mapping(void *virt) -+{ -+ struct phys_desc *desc; -+ -+ virt = (void *) ((unsigned long) virt & PAGE_MASK); -+ desc = find_virtmem_hash(&virtmem_hash, virt); -+ if(desc == NULL) -+ return(0); -+ -+ remove_mapping(desc); -+ return(1); -+} -+ -+void physmem_forget_descriptor(int fd) -+{ -+ struct desc_mapping *desc; -+ struct phys_desc *page; -+ struct list_head *ele, *next; -+ __u64 offset; -+ void *addr; -+ int err; -+ -+ desc = find_mapping(fd); -+ if(desc == NULL) -+ return; -+ -+ list_for_each_safe(ele, next, &desc->pages){ -+ page = list_entry(ele, struct phys_desc, list); -+ offset = page->offset; -+ addr = page->virt; -+ remove_mapping(page); -+ err = os_seek_file(fd, offset); -+ if(err) -+ panic("physmem_forget_descriptor - failed to seek " -+ "to %lld in fd %d, error = %d\n", -+ offset, fd, -err); -+ err = os_read_file(fd, addr, PAGE_SIZE); -+ if(err < 0) -+ panic("physmem_forget_descriptor - failed to read " -+ "from fd %d to 0x%p, error = %d\n", -+ fd, addr, -err); -+ } -+ -+ list_del(&desc->list); -+ kfree(desc); -+} -+ -+void arch_free_page(struct page *page, int order) -+{ -+ void *virt; -+ int i; -+ -+ for(i = 0; i < (1 << order); i++){ -+ virt = __va(page_to_phys(page + i)); -+ physmem_remove_mapping(virt); -+ } -+} -+ -+int is_remapped(void *virt) -+{ -+ return(find_virtmem_hash(&virtmem_hash, virt) != NULL); -+} -+ -+/* Changed during early boot */ -+unsigned long high_physmem; -+ -+extern unsigned long physmem_size; -+ -+void *to_virt(unsigned long phys) -+{ -+ return((void *) uml_physmem + phys); -+} -+ -+unsigned long to_phys(void *virt) -+{ -+ return(((unsigned long) virt) - uml_physmem); -+} -+ -+int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) -+{ -+ struct page *p, *map; -+ unsigned long phys_len, phys_pages, highmem_len, highmem_pages; -+ unsigned long iomem_len, iomem_pages, total_len, total_pages; -+ int i; -+ -+ phys_pages = physmem >> PAGE_SHIFT; -+ phys_len = phys_pages * sizeof(struct page); -+ -+ iomem_pages = iomem >> PAGE_SHIFT; -+ iomem_len = iomem_pages * sizeof(struct page); -+ -+ highmem_pages = highmem >> PAGE_SHIFT; -+ highmem_len = highmem_pages * sizeof(struct page); -+ -+ total_pages = phys_pages + iomem_pages + highmem_pages; -+ total_len = phys_len + iomem_pages + highmem_len; -+ -+ if(kmalloc_ok){ -+ map = kmalloc(total_len, GFP_KERNEL); -+ if(map == NULL) -+ map = vmalloc(total_len); -+ } -+ else map = alloc_bootmem_low_pages(total_len); -+ -+ if(map == NULL) -+ return(-ENOMEM); -+ -+ for(i = 0; i < total_pages; i++){ -+ p = &map[i]; -+ set_page_count(p, 0); -+ SetPageReserved(p); -+ INIT_LIST_HEAD(&p->list); -+ } -+ -+ mem_map = map; -+ max_mapnr = total_pages; -+ return(0); -+} -+ -+struct page *phys_to_page(const unsigned long phys) -+{ -+ return(&mem_map[phys >> PAGE_SHIFT]); -+} -+ -+struct page *__virt_to_page(const unsigned long virt) -+{ -+ return(&mem_map[__pa(virt) >> PAGE_SHIFT]); -+} -+ -+unsigned long page_to_phys(struct page *page) -+{ -+ return((page - mem_map) << PAGE_SHIFT); -+} -+ -+pte_t mk_pte(struct page *page, pgprot_t pgprot) -+{ -+ pte_t pte; -+ -+ pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot); -+ if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte)); -+ return(pte); -+} -+ -+/* Changed during early boot */ -+static unsigned long kmem_top = 0; -+ -+unsigned long get_kmem_end(void) -+{ -+ if(kmem_top == 0) -+ kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); -+ return(kmem_top); -+} -+ -+void map_memory(unsigned long virt, unsigned long phys, unsigned long len, -+ int r, int w, int x) -+{ -+ __u64 offset; -+ int fd, err; -+ -+ fd = phys_mapping(phys, &offset); -+ err = os_map_memory((void *) virt, fd, offset, len, r, w, x); -+ if(err) -+ panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " -+ "err = %d\n", virt, fd, offset, len, r, w, x, err); -+} -+ -+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -+ -+void setup_physmem(unsigned long start, unsigned long reserve_end, -+ unsigned long len, unsigned long highmem) -+{ -+ unsigned long reserve = reserve_end - start; -+ int pfn = PFN_UP(__pa(reserve_end)); -+ int delta = (len - reserve) >> PAGE_SHIFT; -+ int err, offset, bootmap_size; -+ -+ physmem_fd = create_mem_file(len + highmem); -+ -+ offset = uml_reserved - uml_physmem; -+ err = os_map_memory((void *) uml_reserved, physmem_fd, offset, -+ len - offset, 1, 1, 0); -+ if(err < 0){ -+ os_print_error(err, "Mapping memory"); -+ exit(1); -+ } -+ -+ bootmap_size = init_bootmem(pfn, pfn + delta); -+ free_bootmem(__pa(reserve_end) + bootmap_size, -+ len - bootmap_size - reserve); -+} -+ -+int phys_mapping(unsigned long phys, __u64 *offset_out) -+{ -+ struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, -+ __va(phys & PAGE_MASK)); -+ int fd = -1; -+ -+ if(desc != NULL){ -+ fd = desc->fd; -+ *offset_out = desc->offset; -+ } -+ else if(phys < physmem_size){ -+ fd = physmem_fd; -+ *offset_out = phys; -+ } -+ else if(phys < __pa(end_iomem)){ -+ struct iomem_region *region = iomem_regions; -+ -+ while(region != NULL){ -+ if((phys >= region->phys) && -+ (phys < region->phys + region->size)){ -+ fd = region->fd; -+ *offset_out = phys - region->phys; -+ break; -+ } -+ region = region->next; -+ } -+ } -+ else if(phys < __pa(end_iomem) + highmem){ -+ fd = physmem_fd; -+ *offset_out = phys - iomem_size; -+ } -+ -+ return(fd); -+} -+ -+static int __init uml_mem_setup(char *line, int *add) -+{ -+ char *retptr; -+ physmem_size = memparse(line,&retptr); -+ return 0; -+} -+__uml_setup("mem=", uml_mem_setup, -+"mem=<Amount of desired ram>\n" -+" This controls how much \"physical\" memory the kernel allocates\n" -+" for the system. The size is specified as a number followed by\n" -+" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" -+" This is not related to the amount of memory in the host. It can\n" -+" be more, and the excess, if it's ever used, will just be swapped out.\n" -+" Example: mem=64M\n\n" -+); -+ -+unsigned long find_iomem(char *driver, unsigned long *len_out) -+{ -+ struct iomem_region *region = iomem_regions; -+ -+ while(region != NULL){ -+ if(!strcmp(region->driver, driver)){ -+ *len_out = region->size; -+ return(region->virt); -+ } -+ } -+ -+ return(0); -+} -+ -+int setup_iomem(void) -+{ -+ struct iomem_region *region = iomem_regions; -+ unsigned long iomem_start = high_physmem + PAGE_SIZE; -+ int err; -+ -+ while(region != NULL){ -+ err = os_map_memory((void *) iomem_start, region->fd, 0, -+ region->size, 1, 1, 0); -+ if(err) -+ printk("Mapping iomem region for driver '%s' failed, " -+ "errno = %d\n", region->driver, -err); -+ else { -+ region->virt = iomem_start; -+ region->phys = __pa(region->virt); -+ } -+ -+ iomem_start += region->size + PAGE_SIZE; -+ region = region->next; -+ } -+ -+ return(0); -+} -+ -+__initcall(setup_iomem); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/process.c um/arch/um/kernel/process.c ---- orig/arch/um/kernel/process.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/process.c 2004-01-31 02:47:57.000000000 -0500 -@@ -0,0 +1,289 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <signal.h> -+#include <sched.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <stdlib.h> -+#include <setjmp.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <sys/mman.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include <asm/unistd.h> -+#include <asm/page.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+#include "irq_user.h" -+#include "ptrace_user.h" -+#include "time_user.h" -+#include "init.h" -+#include "os.h" -+#include "uml-config.h" -+#include "choose-mode.h" -+#include "mode.h" -+#ifdef UML_CONFIG_MODE_SKAS -+#include "skas.h" -+#include "skas_ptrace.h" -+#endif -+ -+void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) -+{ -+ int flags = 0, pages; -+ -+ if(sig_stack != NULL){ -+ pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER) - 2; -+ set_sigstack(sig_stack, pages * page_size()); -+ flags = SA_ONSTACK; -+ } -+ if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); -+} -+ -+void init_new_thread_signals(int altstack) -+{ -+ int flags = altstack ? SA_ONSTACK : 0; -+ -+ /* NODEFER is set here because SEGV isn't turned back on when the -+ * handler is ready to receive signals. This causes any segfault -+ * during a copy_user to kill the process because the fault is blocked. -+ */ -+ set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags | SA_NODEFER, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGILL, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGWINCH, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGUSR2, (__sighandler_t) sig_handler, -+ SA_NOMASK | flags, -1); -+ signal(SIGHUP, SIG_IGN); -+ -+ init_irq_signals(altstack); -+} -+ -+struct tramp { -+ int (*tramp)(void *); -+ void *tramp_data; -+ unsigned long temp_stack; -+ int flags; -+ int pid; -+}; -+ -+/* See above for why sigkill is here */ -+ -+int sigkill = SIGKILL; -+ -+int outer_tramp(void *arg) -+{ -+ struct tramp *t; -+ int sig = sigkill; -+ -+ t = arg; -+ t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, -+ t->flags, t->tramp_data); -+ if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); -+ kill(os_getpid(), sig); -+ _exit(0); -+} -+ -+int start_fork_tramp(void *thread_arg, unsigned long temp_stack, -+ int clone_flags, int (*tramp)(void *)) -+{ -+ struct tramp arg; -+ unsigned long sp; -+ int new_pid, status, err; -+ -+ /* The trampoline will run on the temporary stack */ -+ sp = stack_sp(temp_stack); -+ -+ clone_flags |= CLONE_FILES | SIGCHLD; -+ -+ arg.tramp = tramp; -+ arg.tramp_data = thread_arg; -+ arg.temp_stack = temp_stack; -+ arg.flags = clone_flags; -+ -+ /* Start the process and wait for it to kill itself */ -+ new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); -+ if(new_pid < 0) return(-errno); -+ while(((err = waitpid(new_pid, &status, 0)) < 0) && (errno == EINTR)) ; -+ if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", -+ errno); -+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) -+ panic("outer trampoline didn't exit with SIGKILL, " -+ "status = %d", status); -+ -+ return(arg.pid); -+} -+ -+void suspend_new_thread(int fd) -+{ -+ char c; -+ -+ os_stop_process(os_getpid()); -+ -+ if(os_read_file(fd, &c, sizeof(c)) != sizeof(c)) -+ panic("read failed in suspend_new_thread"); -+} -+ -+static int ptrace_child(void *arg) -+{ -+ int pid = os_getpid(); -+ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ perror("ptrace"); -+ os_kill_process(pid, 0); -+ } -+ os_stop_process(pid); -+ _exit(os_getpid() == pid); -+} -+ -+static int start_ptraced_child(void **stack_out) -+{ -+ void *stack; -+ unsigned long sp; -+ int pid, n, status; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if(stack == MAP_FAILED) -+ panic("check_ptrace : mmap failed, errno = %d", errno); -+ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); -+ if(pid < 0) -+ panic("check_ptrace : clone failed, errno = %d", errno); -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) -+ panic("check_ptrace : expected SIGSTOP, got status = %d", -+ status); -+ -+ *stack_out = stack; -+ return(pid); -+} -+ -+static void stop_ptraced_child(int pid, void *stack, int exitcode) -+{ -+ int status, n; -+ -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) -+ panic("check_ptrace : ptrace failed, errno = %d", errno); -+ n = waitpid(pid, &status, 0); -+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) -+ panic("check_ptrace : child exited with status 0x%x", status); -+ -+ if(munmap(stack, PAGE_SIZE) < 0) -+ panic("check_ptrace : munmap failed, errno = %d", errno); -+} -+ -+void __init check_ptrace(void) -+{ -+ void *stack; -+ int pid, syscall, n, status; -+ -+ printk("Checking that ptrace can change system call numbers..."); -+ pid = start_ptraced_child(&stack); -+ -+ while(1){ -+ if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) -+ panic("check_ptrace : ptrace failed, errno = %d", -+ errno); -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("check_ptrace : expected SIGTRAP, " -+ "got status = %d", status); -+ -+ syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, -+ 0); -+ if(syscall == __NR_getpid){ -+ n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getppid); -+ if(n < 0) -+ panic("check_ptrace : failed to modify system " -+ "call, errno = %d", errno); -+ break; -+ } -+ } -+ stop_ptraced_child(pid, stack, 0); -+ printk("OK\n"); -+} -+ -+int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) -+{ -+ sigjmp_buf buf; -+ int n; -+ -+ *jmp_ptr = &buf; -+ n = sigsetjmp(buf, 1); -+ if(n != 0) -+ return(n); -+ (*fn)(arg); -+ return(0); -+} -+ -+int can_do_skas(void) -+{ -+#ifdef UML_CONFIG_MODE_SKAS -+ struct ptrace_faultinfo fi; -+ void *stack; -+ int pid, n, ret = 1; -+ -+ printf("Checking for the skas3 patch in the host..."); -+ pid = start_ptraced_child(&stack); -+ -+ n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); -+ if(n < 0){ -+ if(errno == EIO) -+ printf("not found\n"); -+ else printf("No (unexpected errno - %d)\n", errno); -+ ret = 0; -+ } -+ else printf("found\n"); -+ -+ init_registers(pid); -+ stop_ptraced_child(pid, stack, 1); -+ -+ printf("Checking for /proc/mm..."); -+ if(os_access("/proc/mm", OS_ACC_W_OK) < 0){ -+ printf("not found\n"); -+ ret = 0; -+ } -+ else printf("found\n"); -+ -+ return(ret); -+#else -+ return(0); -+#endif -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/process_kern.c um/arch/um/kernel/process_kern.c ---- orig/arch/um/kernel/process_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/process_kern.c 2003-11-08 08:37:04.000000000 -0500 -@@ -0,0 +1,396 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/interrupt.h" -+#include "linux/mm.h" -+#include "linux/slab.h" -+#include "linux/utsname.h" -+#include "linux/fs.h" -+#include "linux/utime.h" -+#include "linux/smp_lock.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/capability.h" -+#include "asm/unistd.h" -+#include "asm/mman.h" -+#include "asm/segment.h" -+#include "asm/stat.h" -+#include "asm/pgtable.h" -+#include "asm/processor.h" -+#include "asm/pgalloc.h" -+#include "asm/spinlock.h" -+#include "asm/uaccess.h" -+#include "asm/user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "init.h" -+#include "irq_user.h" -+#include "mem_user.h" -+#include "time_user.h" -+#include "tlb.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "mode.h" -+#include "mode_kern.h" -+#include "choose-mode.h" -+ -+/* This is a per-cpu array. A processor only modifies its entry and it only -+ * cares about its entry, so it's OK if another processor is modifying its -+ * entry. -+ */ -+struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; -+ -+struct task_struct *get_task(int pid, int require) -+{ -+ struct task_struct *ret; -+ -+ read_lock(&tasklist_lock); -+ ret = find_task_by_pid(pid); -+ read_unlock(&tasklist_lock); -+ -+ if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); -+ return(ret); -+} -+ -+int external_pid(void *t) -+{ -+ struct task_struct *task = t ? t : current; -+ -+ return(CHOOSE_MODE_PROC(external_pid_tt, external_pid_skas, task)); -+} -+ -+int pid_to_processor_id(int pid) -+{ -+ int i; -+ -+ for(i = 0; i < smp_num_cpus; i++){ -+ if(cpu_tasks[i].pid == pid) return(i); -+ } -+ return(-1); -+} -+ -+void free_stack(unsigned long stack, int order) -+{ -+ free_pages(stack, order); -+} -+ -+unsigned long alloc_stack(int order, int atomic) -+{ -+ unsigned long page; -+ int flags = GFP_KERNEL; -+ -+ if(atomic) flags |= GFP_ATOMIC; -+ page = __get_free_pages(flags, order); -+ if(page == 0) -+ return(0); -+ stack_protections(page); -+ return(page); -+} -+ -+int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -+{ -+ int pid; -+ -+ current->thread.request.u.thread.proc = fn; -+ current->thread.request.u.thread.arg = arg; -+ pid = do_fork(CLONE_VM | flags, 0, NULL, 0); -+#if 0 /* CLONE_UNTRACED for 2.6 */ -+ pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0); -+#endif -+ if(pid < 0) -+ panic("do_fork failed in kernel_thread, errno = %d", pid); -+ return(pid); -+} -+ -+void switch_mm(struct mm_struct *prev, struct mm_struct *next, -+ struct task_struct *tsk, unsigned cpu) -+{ -+ if (prev != next) -+ clear_bit(cpu, &prev->cpu_vm_mask); -+ set_bit(cpu, &next->cpu_vm_mask); -+} -+ -+void set_current(void *t) -+{ -+ struct task_struct *task = t; -+ -+ cpu_tasks[task->processor] = ((struct cpu_task) -+ { external_pid(task), task }); -+} -+ -+void *_switch_to(void *prev, void *next) -+{ -+ return(CHOOSE_MODE(_switch_to_tt(prev, next), -+ _switch_to_skas(prev, next))); -+} -+ -+void interrupt_end(void) -+{ -+ if(current->need_resched) schedule(); -+ if(current->sigpending != 0) do_signal(0); -+} -+ -+void release_thread(struct task_struct *task) -+{ -+ CHOOSE_MODE(release_thread_tt(task), release_thread_skas(task)); -+} -+ -+void exit_thread(void) -+{ -+ CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); -+ unprotect_stack((unsigned long) current); -+} -+ -+void *get_current(void) -+{ -+ return(current); -+} -+ -+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ p->thread = (struct thread_struct) INIT_THREAD; -+ p->thread.kernel_stack = (unsigned long) p + 2 * PAGE_SIZE; -+ -+ return(CHOOSE_MODE_PROC(copy_thread_tt, copy_thread_skas, nr, -+ clone_flags, sp, stack_top, p, regs)); -+} -+ -+void initial_thread_cb(void (*proc)(void *), void *arg) -+{ -+ int save_kmalloc_ok = kmalloc_ok; -+ -+ kmalloc_ok = 0; -+ CHOOSE_MODE_PROC(initial_thread_cb_tt, initial_thread_cb_skas, proc, -+ arg); -+ kmalloc_ok = save_kmalloc_ok; -+} -+ -+unsigned long stack_sp(unsigned long page) -+{ -+ return(page + PAGE_SIZE - sizeof(void *)); -+} -+ -+int current_pid(void) -+{ -+ return(current->pid); -+} -+ -+void cpu_idle(void) -+{ -+ CHOOSE_MODE(init_idle_tt(), init_idle_skas()); -+ -+ atomic_inc(&init_mm.mm_count); -+ current->mm = &init_mm; -+ current->active_mm = &init_mm; -+ -+ while(1){ -+ /* endless idle loop with no priority at all */ -+ SET_PRI(current); -+ -+ /* -+ * although we are an idle CPU, we do not want to -+ * get into the scheduler unnecessarily. -+ */ -+ if (current->need_resched) { -+ schedule(); -+ check_pgt_cache(); -+ } -+ idle_sleep(10); -+ } -+} -+ -+int page_size(void) -+{ -+ return(PAGE_SIZE); -+} -+ -+int page_mask(void) -+{ -+ return(PAGE_MASK); -+} -+ -+void *um_virt_to_phys(struct task_struct *task, unsigned long addr, -+ pte_t *pte_out) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ if(task->mm == NULL) -+ return(ERR_PTR(-EINVAL)); -+ pgd = pgd_offset(task->mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(!pmd_present(*pmd)) -+ return(ERR_PTR(-EINVAL)); -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte)) -+ return(ERR_PTR(-EINVAL)); -+ if(pte_out != NULL) -+ *pte_out = *pte; -+ return((void *) (pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK)); -+} -+ -+char *current_cmd(void) -+{ -+#if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM) -+ return("(Unknown)"); -+#else -+ void *addr = um_virt_to_phys(current, current->mm->arg_start, NULL); -+ return IS_ERR(addr) ? "(Unknown)": __va((unsigned long) addr); -+#endif -+} -+ -+void force_sigbus(void) -+{ -+ printk(KERN_ERR "Killing pid %d because of a lack of memory\n", -+ current->pid); -+ lock_kernel(); -+ sigaddset(¤t->pending.signal, SIGBUS); -+ recalc_sigpending(current); -+ current->flags |= PF_SIGNALED; -+ do_exit(SIGBUS | 0x80); -+} -+ -+void dump_thread(struct pt_regs *regs, struct user *u) -+{ -+} -+ -+void enable_hlt(void) -+{ -+ panic("enable_hlt"); -+} -+ -+void disable_hlt(void) -+{ -+ panic("disable_hlt"); -+} -+ -+extern int signal_frame_size; -+ -+void *um_kmalloc(int size) -+{ -+ return(kmalloc(size, GFP_KERNEL)); -+} -+ -+void *um_kmalloc_atomic(int size) -+{ -+ return(kmalloc(size, GFP_ATOMIC)); -+} -+ -+unsigned long get_fault_addr(void) -+{ -+ return((unsigned long) current->thread.fault_addr); -+} -+ -+EXPORT_SYMBOL(get_fault_addr); -+ -+void not_implemented(void) -+{ -+ printk(KERN_DEBUG "Something isn't implemented in here\n"); -+} -+ -+EXPORT_SYMBOL(not_implemented); -+ -+int user_context(unsigned long sp) -+{ -+ unsigned long stack; -+ -+ stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); -+ stack += 2 * PAGE_SIZE; -+ return(stack != current->thread.kernel_stack); -+} -+ -+extern void remove_umid_dir(void); -+ -+__uml_exitcall(remove_umid_dir); -+ -+extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; -+ -+void do_uml_exitcalls(void) -+{ -+ exitcall_t *call; -+ -+ call = &__uml_exitcall_end; -+ while (--call >= &__uml_exitcall_begin) -+ (*call)(); -+} -+ -+char *uml_strdup(char *string) -+{ -+ char *new; -+ -+ new = kmalloc(strlen(string) + 1, GFP_KERNEL); -+ if(new == NULL) return(NULL); -+ strcpy(new, string); -+ return(new); -+} -+ -+void *get_init_task(void) -+{ -+ return(&init_task_union.task); -+} -+ -+int copy_to_user_proc(void *to, void *from, int size) -+{ -+ return(copy_to_user(to, from, size)); -+} -+ -+int copy_from_user_proc(void *to, void *from, int size) -+{ -+ return(copy_from_user(to, from, size)); -+} -+ -+int clear_user_proc(void *buf, int size) -+{ -+ return(clear_user(buf, size)); -+} -+ -+int strlen_user_proc(char *str) -+{ -+ return(strlen_user(str)); -+} -+ -+int smp_sigio_handler(void) -+{ -+#ifdef CONFIG_SMP -+ int cpu = current->processor; -+ -+ IPI_handler(cpu); -+ if(cpu != 0) -+ return(1); -+#endif -+ return(0); -+} -+ -+int um_in_interrupt(void) -+{ -+ return(in_interrupt()); -+} -+ -+int cpu(void) -+{ -+ return(current->processor); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/ptrace.c um/arch/um/kernel/ptrace.c ---- orig/arch/um/kernel/ptrace.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/ptrace.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,325 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/mm.h" -+#include "linux/errno.h" -+#include "linux/smp_lock.h" -+#ifdef CONFIG_PROC_MM -+#include "linux/proc_mm.h" -+#endif -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+ -+/* -+ * Called by kernel/ptrace.c when detaching.. -+ */ -+void ptrace_disable(struct task_struct *child) -+{ -+} -+ -+extern long do_mmap2(struct task_struct *task, unsigned long addr, -+ unsigned long len, unsigned long prot, -+ unsigned long flags, unsigned long fd, -+ unsigned long pgoff); -+ -+int sys_ptrace(long request, long pid, long addr, long data) -+{ -+ struct task_struct *child; -+ int i, ret; -+ -+ lock_kernel(); -+ ret = -EPERM; -+ if (request == PTRACE_TRACEME) { -+ /* are we already being traced? */ -+ if (current->ptrace & PT_PTRACED) -+ goto out; -+ /* set the ptrace bit in the process flags. */ -+ current->ptrace |= PT_PTRACED; -+ ret = 0; -+ goto out; -+ } -+ ret = -ESRCH; -+ read_lock(&tasklist_lock); -+ child = find_task_by_pid(pid); -+ if (child) -+ get_task_struct(child); -+ read_unlock(&tasklist_lock); -+ if (!child) -+ goto out; -+ -+ ret = -EPERM; -+ if (pid == 1) /* you may not mess with init */ -+ goto out_tsk; -+ -+ if (request == PTRACE_ATTACH) { -+ ret = ptrace_attach(child); -+ goto out_tsk; -+ } -+ -+ ret = ptrace_check_attach(child, request == PTRACE_KILL); -+ if (ret < 0) -+ goto out_tsk; -+ -+ switch (request) { -+ /* when I and D space are separate, these will need to be fixed. */ -+ case PTRACE_PEEKTEXT: /* read word at location addr. */ -+ case PTRACE_PEEKDATA: { -+ unsigned long tmp; -+ int copied; -+ -+ ret = -EIO; -+ copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); -+ if (copied != sizeof(tmp)) -+ break; -+ ret = put_user(tmp,(unsigned long *) data); -+ break; -+ } -+ -+ /* read the word at location addr in the USER area. */ -+ case PTRACE_PEEKUSR: { -+ unsigned long tmp; -+ -+ ret = -EIO; -+ if ((addr & 3) || addr < 0) -+ break; -+ -+ tmp = 0; /* Default return condition */ -+ if(addr < FRAME_SIZE_OFFSET){ -+ tmp = getreg(child, addr); -+ } -+ else if((addr >= offsetof(struct user, u_debugreg[0])) && -+ (addr <= offsetof(struct user, u_debugreg[7]))){ -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ tmp = child->thread.arch.debugregs[addr]; -+ } -+ ret = put_user(tmp, (unsigned long *) data); -+ break; -+ } -+ -+ /* when I and D space are separate, this will have to be fixed. */ -+ case PTRACE_POKETEXT: /* write the word at location addr. */ -+ case PTRACE_POKEDATA: -+ ret = -EIO; -+ if (access_process_vm(child, addr, &data, sizeof(data), -+ 1) != sizeof(data)) -+ break; -+ ret = 0; -+ break; -+ -+ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ -+ ret = -EIO; -+ if ((addr & 3) || addr < 0) -+ break; -+ -+ if (addr < FRAME_SIZE_OFFSET) { -+ ret = putreg(child, addr, data); -+ break; -+ } -+ else if((addr >= offsetof(struct user, u_debugreg[0])) && -+ (addr <= offsetof(struct user, u_debugreg[7]))){ -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ if((addr == 4) || (addr == 5)) break; -+ child->thread.arch.debugregs[addr] = data; -+ ret = 0; -+ } -+ -+ break; -+ -+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ -+ case PTRACE_CONT: { /* restart after signal. */ -+ ret = -EIO; -+ if ((unsigned long) data > _NSIG) -+ break; -+ if (request == PTRACE_SYSCALL) -+ child->ptrace |= PT_TRACESYS; -+ else -+ child->ptrace &= ~PT_TRACESYS; -+ child->exit_code = data; -+ wake_up_process(child); -+ ret = 0; -+ break; -+ } -+ -+/* -+ * make the child exit. Best I can do is send it a sigkill. -+ * perhaps it should be put in the status that it wants to -+ * exit. -+ */ -+ case PTRACE_KILL: { -+ ret = 0; -+ if (child->state == TASK_ZOMBIE) /* already dead */ -+ break; -+ child->exit_code = SIGKILL; -+ wake_up_process(child); -+ break; -+ } -+ -+ case PTRACE_SINGLESTEP: { /* set the trap flag. */ -+ ret = -EIO; -+ if ((unsigned long) data > _NSIG) -+ break; -+ child->ptrace &= ~PT_TRACESYS; -+ child->ptrace |= PT_DTRACE; -+ child->exit_code = data; -+ /* give it a chance to run. */ -+ wake_up_process(child); -+ ret = 0; -+ break; -+ } -+ -+ case PTRACE_DETACH: -+ /* detach a process that was attached. */ -+ ret = ptrace_detach(child, data); -+ break; -+ -+#ifdef PTRACE_GETREGS -+ case PTRACE_GETREGS: { /* Get all gp regs from the child. */ -+ if (!access_ok(VERIFY_WRITE, (unsigned long *)data, -+ FRAME_SIZE_OFFSET)) { -+ ret = -EIO; -+ break; -+ } -+ for ( i = 0; i < FRAME_SIZE_OFFSET; i += sizeof(long) ) { -+ __put_user(getreg(child, i), (unsigned long *) data); -+ data += sizeof(long); -+ } -+ ret = 0; -+ break; -+ } -+#endif -+#ifdef PTRACE_SETREGS -+ case PTRACE_SETREGS: { /* Set all gp regs in the child. */ -+ unsigned long tmp = 0; -+ if (!access_ok(VERIFY_READ, (unsigned *)data, -+ FRAME_SIZE_OFFSET)) { -+ ret = -EIO; -+ break; -+ } -+ for ( i = 0; i < FRAME_SIZE_OFFSET; i += sizeof(long) ) { -+ __get_user(tmp, (unsigned long *) data); -+ putreg(child, i, tmp); -+ data += sizeof(long); -+ } -+ ret = 0; -+ break; -+ } -+#endif -+#ifdef PTRACE_GETFPREGS -+ case PTRACE_GETFPREGS: /* Get the child FPU state. */ -+ ret = get_fpregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_SETFPREGS -+ case PTRACE_SETFPREGS: /* Set the child FPU state. */ -+ ret = set_fpregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_GETFPXREGS -+ case PTRACE_GETFPXREGS: /* Get the child FPU state. */ -+ ret = get_fpxregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_SETFPXREGS -+ case PTRACE_SETFPXREGS: /* Set the child FPU state. */ -+ ret = set_fpxregs(data, child); -+ break; -+#endif -+ case PTRACE_FAULTINFO: { -+ struct ptrace_faultinfo fault; -+ -+ fault = ((struct ptrace_faultinfo) -+ { .is_write = child->thread.err, -+ .addr = child->thread.cr2 }); -+ ret = copy_to_user((unsigned long *) data, &fault, -+ sizeof(fault)); -+ if(ret) -+ break; -+ break; -+ } -+ case PTRACE_SIGPENDING: -+ ret = copy_to_user((unsigned long *) data, -+ &child->pending.signal, -+ sizeof(child->pending.signal)); -+ break; -+ -+ case PTRACE_LDT: { -+ struct ptrace_ldt ldt; -+ -+ if(copy_from_user(&ldt, (unsigned long *) data, -+ sizeof(ldt))){ -+ ret = -EIO; -+ break; -+ } -+ -+ /* This one is confusing, so just punt and return -EIO for -+ * now -+ */ -+ ret = -EIO; -+ break; -+ } -+#ifdef CONFIG_PROC_MM -+ case PTRACE_SWITCH_MM: { -+ struct mm_struct *old = child->mm; -+ struct mm_struct *new = proc_mm_get_mm(data); -+ -+ if(IS_ERR(new)){ -+ ret = PTR_ERR(new); -+ break; -+ } -+ -+ atomic_inc(&new->mm_users); -+ child->mm = new; -+ child->active_mm = new; -+ mmput(old); -+ ret = 0; -+ break; -+ } -+#endif -+ default: -+ ret = -EIO; -+ break; -+ } -+ out_tsk: -+ free_task_struct(child); -+ out: -+ unlock_kernel(); -+ return ret; -+} -+ -+void syscall_trace(void) -+{ -+ if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) -+ != (PT_PTRACED|PT_TRACESYS)) -+ return; -+ current->exit_code = SIGTRAP; -+ current->state = TASK_STOPPED; -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ /* -+ * this isn't the same as continuing with a signal, but it will do -+ * for normal use. strace only continues with a signal if the -+ * stopping signal is not SIGTRAP. -brl -+ */ -+ if (current->exit_code) { -+ send_sig(current->exit_code, current, 1); -+ current->exit_code = 0; -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/reboot.c um/arch/um/kernel/reboot.c ---- orig/arch/um/kernel/reboot.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/reboot.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "os.h" -+#include "mode.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_SMP -+static void kill_idlers(int me) -+{ -+#ifdef CONFIG_MODE_TT -+ struct task_struct *p; -+ int i; -+ -+ for(i = 0; i < sizeof(init_tasks)/sizeof(init_tasks[0]); i++){ -+ p = init_tasks[i]; -+ if((p != NULL) && (p->thread.mode.tt.extern_pid != me) && -+ (p->thread.mode.tt.extern_pid != -1)) -+ os_kill_process(p->thread.mode.tt.extern_pid, 0); -+ } -+#endif -+} -+#endif -+ -+static void kill_off_processes(void) -+{ -+ CHOOSE_MODE(kill_off_processes_tt(), kill_off_processes_skas()); -+#ifdef CONFIG_SMP -+ kill_idlers(os_getpid()); -+#endif -+} -+ -+void uml_cleanup(void) -+{ -+ kill_off_processes(); -+ do_uml_exitcalls(); -+} -+ -+void machine_restart(char * __unused) -+{ -+ do_uml_exitcalls(); -+ kill_off_processes(); -+ CHOOSE_MODE(reboot_tt(), reboot_skas()); -+} -+ -+void machine_power_off(void) -+{ -+ do_uml_exitcalls(); -+ kill_off_processes(); -+ CHOOSE_MODE(halt_tt(), halt_skas()); -+} -+ -+void machine_halt(void) -+{ -+ machine_power_off(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/resource.c um/arch/um/kernel/resource.c ---- orig/arch/um/kernel/resource.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/resource.c 2003-10-21 03:26:06.000000000 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/pci.h" -+ -+unsigned long resource_fixup(struct pci_dev * dev, struct resource * res, -+ unsigned long start, unsigned long size) -+{ -+ return start; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sigio_kern.c um/arch/um/kernel/sigio_kern.c ---- orig/arch/um/kernel/sigio_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/sigio_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/list.h" -+#include "linux/slab.h" -+#include "asm/irq.h" -+#include "init.h" -+#include "sigio.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+ -+/* Protected by sigio_lock() called from write_sigio_workaround */ -+static int sigio_irq_fd = -1; -+ -+void sigio_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ read_sigio_fd(sigio_irq_fd); -+ reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); -+} -+ -+int write_sigio_irq(int fd) -+{ -+ if(um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, -+ SA_INTERRUPT | SA_SAMPLE_RANDOM, "write sigio", -+ NULL)){ -+ printk("write_sigio_irq : um_request_irq failed\n"); -+ return(-1); -+ } -+ sigio_irq_fd = fd; -+ return(0); -+} -+ -+static spinlock_t sigio_spinlock = SPIN_LOCK_UNLOCKED; -+ -+void sigio_lock(void) -+{ -+ spin_lock(&sigio_spinlock); -+} -+ -+void sigio_unlock(void) -+{ -+ spin_unlock(&sigio_spinlock); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sigio_user.c um/arch/um/kernel/sigio_user.c ---- orig/arch/um/kernel/sigio_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/sigio_user.c 2003-11-07 01:41:13.000000000 -0500 -@@ -0,0 +1,438 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdlib.h> -+#include <termios.h> -+#include <pty.h> -+#include <signal.h> -+#include <errno.h> -+#include <string.h> -+#include <sched.h> -+#include <sys/socket.h> -+#include <sys/poll.h> -+#include "init.h" -+#include "user.h" -+#include "kern_util.h" -+#include "sigio.h" -+#include "helper.h" -+#include "os.h" -+ -+/* Changed during early boot */ -+int pty_output_sigio = 0; -+int pty_close_sigio = 0; -+ -+/* Used as a flag during SIGIO testing early in boot */ -+static volatile int got_sigio = 0; -+ -+void __init handler(int sig) -+{ -+ got_sigio = 1; -+} -+ -+struct openpty_arg { -+ int master; -+ int slave; -+ int err; -+}; -+ -+static void openpty_cb(void *arg) -+{ -+ struct openpty_arg *info = arg; -+ -+ info->err = 0; -+ if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) -+ info->err = -errno; -+} -+ -+void __init check_one_sigio(void (*proc)(int, int)) -+{ -+ struct sigaction old, new; -+ struct termios tt; -+ struct openpty_arg pty = { .master = -1, .slave = -1 }; -+ int master, slave, err; -+ -+ initial_thread_cb(openpty_cb, &pty); -+ if(pty.err){ -+ printk("openpty failed, errno = %d\n", -pty.err); -+ return; -+ } -+ -+ master = pty.master; -+ slave = pty.slave; -+ -+ if((master == -1) || (slave == -1)){ -+ printk("openpty failed to allocate a pty\n"); -+ return; -+ } -+ -+ /* XXX These can fail with EINTR */ -+ if(tcgetattr(master, &tt) < 0) -+ panic("check_sigio : tcgetattr failed, errno = %d\n", errno); -+ cfmakeraw(&tt); -+ if(tcsetattr(master, TCSADRAIN, &tt) < 0) -+ panic("check_sigio : tcsetattr failed, errno = %d\n", errno); -+ -+ err = os_sigio_async(master, slave); -+ if(err < 0) -+ panic("tty_fds : sigio_async failed, err = %d\n", -err); -+ -+ if(sigaction(SIGIO, NULL, &old) < 0) -+ panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); -+ new = old; -+ new.sa_handler = handler; -+ if(sigaction(SIGIO, &new, NULL) < 0) -+ panic("check_sigio : sigaction 2 failed, errno = %d\n", errno); -+ -+ got_sigio = 0; -+ (*proc)(master, slave); -+ -+ os_close_file(master); -+ os_close_file(slave); -+ -+ if(sigaction(SIGIO, &old, NULL) < 0) -+ panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); -+} -+ -+static void tty_output(int master, int slave) -+{ -+ int n; -+ char buf[512]; -+ -+ printk("Checking that host ptys support output SIGIO..."); -+ -+ memset(buf, 0, sizeof(buf)); -+ -+ while(os_write_file(master, buf, sizeof(buf)) > 0) ; -+ if(errno != EAGAIN) -+ panic("check_sigio : write failed, errno = %d\n", errno); -+ while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; -+ -+ if(got_sigio){ -+ printk("Yes\n"); -+ pty_output_sigio = 1; -+ } -+ else if(n == -EAGAIN) printk("No, enabling workaround\n"); -+ else panic("check_sigio : read failed, err = %d\n", n); -+} -+ -+static void tty_close(int master, int slave) -+{ -+ printk("Checking that host ptys support SIGIO on close..."); -+ -+ os_close_file(slave); -+ if(got_sigio){ -+ printk("Yes\n"); -+ pty_close_sigio = 1; -+ } -+ else printk("No, enabling workaround\n"); -+} -+ -+void __init check_sigio(void) -+{ -+ if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && -+ (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ -+ printk("No pseudo-terminals available - skipping pty SIGIO " -+ "check\n"); -+ return; -+ } -+ check_one_sigio(tty_output); -+ check_one_sigio(tty_close); -+} -+ -+/* Protected by sigio_lock(), also used by sigio_cleanup, which is an -+ * exitcall. -+ */ -+static int write_sigio_pid = -1; -+ -+/* These arrays are initialized before the sigio thread is started, and -+ * the descriptors closed after it is killed. So, it can't see them change. -+ * On the UML side, they are changed under the sigio_lock. -+ */ -+static int write_sigio_fds[2] = { -1, -1 }; -+static int sigio_private[2] = { -1, -1 }; -+ -+struct pollfds { -+ struct pollfd *poll; -+ int size; -+ int used; -+}; -+ -+/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread -+ * synchronizes with it. -+ */ -+struct pollfds current_poll = { -+ .poll = NULL, -+ .size = 0, -+ .used = 0 -+}; -+ -+struct pollfds next_poll = { -+ .poll = NULL, -+ .size = 0, -+ .used = 0 -+}; -+ -+static int write_sigio_thread(void *unused) -+{ -+ struct pollfds *fds, tmp; -+ struct pollfd *p; -+ int i, n, respond_fd; -+ char c; -+ -+ fds = ¤t_poll; -+ while(1){ -+ n = poll(fds->poll, fds->used, -1); -+ if(n < 0){ -+ if(errno == EINTR) continue; -+ printk("write_sigio_thread : poll returned %d, " -+ "errno = %d\n", n, errno); -+ } -+ for(i = 0; i < fds->used; i++){ -+ p = &fds->poll[i]; -+ if(p->revents == 0) continue; -+ if(p->fd == sigio_private[1]){ -+ n = os_read_file(sigio_private[1], &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("write_sigio_thread : " -+ "read failed, err = %d\n", -n); -+ tmp = current_poll; -+ current_poll = next_poll; -+ next_poll = tmp; -+ respond_fd = sigio_private[1]; -+ } -+ else { -+ respond_fd = write_sigio_fds[1]; -+ fds->used--; -+ memmove(&fds->poll[i], &fds->poll[i + 1], -+ (fds->used - i) * sizeof(*fds->poll)); -+ } -+ -+ n = os_write_file(respond_fd, &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("write_sigio_thread : write failed, " -+ "err = %d\n", -n); -+ } -+ } -+} -+ -+static int need_poll(int n) -+{ -+ if(n <= next_poll.size){ -+ next_poll.used = n; -+ return(0); -+ } -+ if(next_poll.poll != NULL) kfree(next_poll.poll); -+ next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); -+ if(next_poll.poll == NULL){ -+ printk("need_poll : failed to allocate new pollfds\n"); -+ next_poll.size = 0; -+ next_poll.used = 0; -+ return(-1); -+ } -+ next_poll.size = n; -+ next_poll.used = n; -+ return(0); -+} -+ -+static void update_thread(void) -+{ -+ unsigned long flags; -+ int n; -+ char c; -+ -+ flags = set_signals(0); -+ n = os_write_file(sigio_private[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("update_thread : write failed, err = %d\n", -n); -+ goto fail; -+ } -+ -+ n = os_read_file(sigio_private[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("update_thread : read failed, err = %d\n", -n); -+ goto fail; -+ } -+ -+ set_signals(flags); -+ return; -+ fail: -+ sigio_lock(); -+ if(write_sigio_pid != -1) -+ os_kill_process(write_sigio_pid, 1); -+ write_sigio_pid = -1; -+ os_close_file(sigio_private[0]); -+ os_close_file(sigio_private[1]); -+ os_close_file(write_sigio_fds[0]); -+ os_close_file(write_sigio_fds[1]); -+ sigio_unlock(); -+ set_signals(flags); -+} -+ -+int add_sigio_fd(int fd, int read) -+{ -+ int err = 0, i, n, events; -+ -+ sigio_lock(); -+ for(i = 0; i < current_poll.used; i++){ -+ if(current_poll.poll[i].fd == fd) -+ goto out; -+ } -+ -+ n = current_poll.used + 1; -+ err = need_poll(n); -+ if(err) -+ goto out; -+ -+ for(i = 0; i < current_poll.used; i++) -+ next_poll.poll[i] = current_poll.poll[i]; -+ -+ if(read) events = POLLIN; -+ else events = POLLOUT; -+ -+ next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd, -+ .events = events, -+ .revents = 0 }); -+ update_thread(); -+ out: -+ sigio_unlock(); -+ return(err); -+} -+ -+int ignore_sigio_fd(int fd) -+{ -+ struct pollfd *p; -+ int err = 0, i, n = 0; -+ -+ sigio_lock(); -+ for(i = 0; i < current_poll.used; i++){ -+ if(current_poll.poll[i].fd == fd) break; -+ } -+ if(i == current_poll.used) -+ goto out; -+ -+ err = need_poll(current_poll.used - 1); -+ if(err) -+ goto out; -+ -+ for(i = 0; i < current_poll.used; i++){ -+ p = ¤t_poll.poll[i]; -+ if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i]; -+ } -+ if(n == i){ -+ printk("ignore_sigio_fd : fd %d not found\n", fd); -+ err = -1; -+ goto out; -+ } -+ -+ update_thread(); -+ out: -+ sigio_unlock(); -+ return(err); -+} -+ -+static int setup_initial_poll(int fd) -+{ -+ struct pollfd *p; -+ -+ p = um_kmalloc(sizeof(struct pollfd)); -+ if(p == NULL){ -+ printk("setup_initial_poll : failed to allocate poll\n"); -+ return(-1); -+ } -+ *p = ((struct pollfd) { .fd = fd, -+ .events = POLLIN, -+ .revents = 0 }); -+ current_poll = ((struct pollfds) { .poll = p, -+ .used = 1, -+ .size = 1 }); -+ return(0); -+} -+ -+void write_sigio_workaround(void) -+{ -+ unsigned long stack; -+ int err; -+ -+ sigio_lock(); -+ if(write_sigio_pid != -1) -+ goto out; -+ -+ err = os_pipe(write_sigio_fds, 1, 1); -+ if(err < 0){ -+ printk("write_sigio_workaround - os_pipe 1 failed, " -+ "err = %d\n", -err); -+ goto out; -+ } -+ err = os_pipe(sigio_private, 1, 1); -+ if(err < 0){ -+ printk("write_sigio_workaround - os_pipe 2 failed, " -+ "err = %d\n", -err); -+ goto out_close1; -+ } -+ if(setup_initial_poll(sigio_private[1])) -+ goto out_close2; -+ -+ write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, -+ CLONE_FILES | CLONE_VM, &stack, 0); -+ -+ if(write_sigio_pid < 0) goto out_close2; -+ -+ if(write_sigio_irq(write_sigio_fds[0])) -+ goto out_kill; -+ -+ out: -+ sigio_unlock(); -+ return; -+ -+ out_kill: -+ os_kill_process(write_sigio_pid, 1); -+ write_sigio_pid = -1; -+ out_close2: -+ os_close_file(sigio_private[0]); -+ os_close_file(sigio_private[1]); -+ out_close1: -+ os_close_file(write_sigio_fds[0]); -+ os_close_file(write_sigio_fds[1]); -+ sigio_unlock(); -+} -+ -+int read_sigio_fd(int fd) -+{ -+ int n; -+ char c; -+ -+ n = os_read_file(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ if(n < 0) { -+ printk("read_sigio_fd - read failed, err = %d\n", -n); -+ return(n); -+ } -+ else { -+ printk("read_sigio_fd - short read, bytes = %d\n", n); -+ return(-EIO); -+ } -+ } -+ return(n); -+} -+ -+static void sigio_cleanup(void) -+{ -+ if(write_sigio_pid != -1) -+ os_kill_process(write_sigio_pid, 1); -+} -+ -+__uml_exitcall(sigio_cleanup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/signal_kern.c um/arch/um/kernel/signal_kern.c ---- orig/arch/um/kernel/signal_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/signal_kern.c 2003-11-19 03:50:58.000000000 -0500 -@@ -0,0 +1,369 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/stddef.h" -+#include "linux/sys.h" -+#include "linux/sched.h" -+#include "linux/wait.h" -+#include "linux/kernel.h" -+#include "linux/smp_lock.h" -+#include "linux/module.h" -+#include "linux/slab.h" -+#include "asm/signal.h" -+#include "asm/uaccess.h" -+#include "asm/ucontext.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "kern.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "mode.h" -+ -+EXPORT_SYMBOL(block_signals); -+EXPORT_SYMBOL(unblock_signals); -+ -+static void force_segv(int sig) -+{ -+ if(sig == SIGSEGV){ -+ struct k_sigaction *ka; -+ -+ ka = ¤t->sig->action[SIGSEGV - 1]; -+ ka->sa.sa_handler = SIG_DFL; -+ } -+ force_sig(SIGSEGV, current); -+} -+ -+#define _S(nr) (1<<((nr)-1)) -+ -+#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) -+ -+/* -+ * OK, we're invoking a handler -+ */ -+static int handle_signal(struct pt_regs *regs, unsigned long signr, -+ struct k_sigaction *ka, siginfo_t *info, -+ sigset_t *oldset, int error) -+{ -+ __sighandler_t handler; -+ void (*restorer)(void); -+ unsigned long sp; -+ sigset_t save; -+ int err, ret; -+ -+ ret = 0; -+ switch(error){ -+ case -ERESTARTNOHAND: -+ ret = -EINTR; -+ break; -+ -+ case -ERESTARTSYS: -+ if (!(ka->sa.sa_flags & SA_RESTART)) { -+ ret = -EINTR; -+ break; -+ } -+ /* fallthrough */ -+ case -ERESTARTNOINTR: -+ PT_REGS_RESTART_SYSCALL(regs); -+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); -+ -+ /* This is because of the UM_SET_SYSCALL_RETURN and the fact -+ * that on i386 the system call number and return value are -+ * in the same register. When the system call restarts, %eax -+ * had better have the system call number in it. Since the -+ * return value doesn't matter (except that it shouldn't be -+ * -ERESTART*), we'll stick the system call number there. -+ */ -+ ret = PT_REGS_SYSCALL_NR(regs); -+ break; -+ } -+ -+ handler = ka->sa.sa_handler; -+ save = *oldset; -+ -+ if (ka->sa.sa_flags & SA_ONESHOT) -+ ka->sa.sa_handler = SIG_DFL; -+ -+ if (!(ka->sa.sa_flags & SA_NODEFER)) { -+ spin_lock_irq(¤t->sigmask_lock); -+ sigorsets(¤t->blocked, ¤t->blocked, -+ &ka->sa.sa_mask); -+ sigaddset(¤t->blocked, signr); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ } -+ -+ sp = PT_REGS_SP(regs); -+ -+ if((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0)) -+ sp = current->sas_ss_sp + current->sas_ss_size; -+ -+ if(error != 0) PT_REGS_SET_SYSCALL_RETURN(regs, ret); -+ -+ if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; -+ else restorer = NULL; -+ -+ if(ka->sa.sa_flags & SA_SIGINFO) -+ err = setup_signal_stack_si(sp, signr, (unsigned long) handler, -+ restorer, regs, info, &save); -+ else -+ err = setup_signal_stack_sc(sp, signr, (unsigned long) handler, -+ restorer, regs, &save); -+ if(err) goto segv; -+ -+ return(0); -+ segv: -+ force_segv(signr); -+ return(1); -+} -+ -+/* -+ * Note that 'init' is a special process: it doesn't get signals it doesn't -+ * want to handle. Thus you cannot kill init even with a SIGKILL even by -+ * mistake. -+ */ -+ -+static int kern_do_signal(struct pt_regs *regs, sigset_t *oldset, int error) -+{ -+ siginfo_t info; -+ struct k_sigaction *ka; -+ int err; -+ -+ if (!oldset) -+ oldset = ¤t->blocked; -+ -+ for (;;) { -+ unsigned long signr; -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ signr = dequeue_signal(¤t->blocked, &info); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ if (!signr) -+ break; -+ -+ if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { -+ /* Let the debugger run. */ -+ current->exit_code = signr; -+ current->state = TASK_STOPPED; -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ -+ /* We're back. Did the debugger cancel the sig? */ -+ signr = current->exit_code; -+ if (!signr) -+ continue; -+ current->exit_code = 0; -+ -+ /* The debugger continued. Ignore SIGSTOP. */ -+ if (signr == SIGSTOP) -+ continue; -+ -+ /* Update the siginfo structure. Is this good? */ -+ if (signr != info.si_signo) { -+ info.si_signo = signr; -+ info.si_errno = 0; -+ info.si_code = SI_USER; -+ info.si_pid = current->p_pptr->pid; -+ info.si_uid = current->p_pptr->uid; -+ } -+ -+ /* If the (new) signal is now blocked, requeue it. */ -+ if (sigismember(¤t->blocked, signr)) { -+ send_sig_info(signr, &info, current); -+ continue; -+ } -+ } -+ -+ ka = ¤t->sig->action[signr-1]; -+ if (ka->sa.sa_handler == SIG_IGN) { -+ if (signr != SIGCHLD) -+ continue; -+ /* Check for SIGCHLD: it's special. */ -+ while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) -+ /* nothing */; -+ continue; -+ } -+ -+ if (ka->sa.sa_handler == SIG_DFL) { -+ int exit_code = signr; -+ -+ /* Init gets no signals it doesn't want. */ -+ if (current->pid == 1) -+ continue; -+ -+ switch (signr) { -+ case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: -+ continue; -+ -+ case SIGTSTP: case SIGTTIN: case SIGTTOU: -+ if (is_orphaned_pgrp(current->pgrp)) -+ continue; -+ /* FALLTHRU */ -+ -+ case SIGSTOP: { -+ struct signal_struct *sig; -+ current->state = TASK_STOPPED; -+ current->exit_code = signr; -+ sig = current->p_pptr->sig; -+ if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ continue; -+ } -+ case SIGQUIT: case SIGILL: case SIGTRAP: -+ case SIGABRT: case SIGFPE: case SIGSEGV: -+ case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: -+ if (do_coredump(signr, ¤t->thread.regs)) -+ exit_code |= 0x80; -+ /* FALLTHRU */ -+ -+ default: -+ sig_exit(signr, exit_code, &info); -+ /* NOTREACHED */ -+ } -+ } -+ -+ /* Whee! Actually deliver the signal. */ -+ err = handle_signal(regs, signr, ka, &info, oldset, error); -+ if(!err) return(1); -+ } -+ -+ /* Did we come from a system call? */ -+ if(PT_REGS_SYSCALL_NR(regs) >= 0){ -+ /* Restart the system call - no handlers present */ -+ if(PT_REGS_SYSCALL_RET(regs) == -ERESTARTNOHAND || -+ PT_REGS_SYSCALL_RET(regs) == -ERESTARTSYS || -+ PT_REGS_SYSCALL_RET(regs) == -ERESTARTNOINTR){ -+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); -+ PT_REGS_RESTART_SYSCALL(regs); -+ } -+ } -+ -+ /* This closes a way to execute a system call on the host. If -+ * you set a breakpoint on a system call instruction and singlestep -+ * from it, the tracing thread used to PTRACE_SINGLESTEP the process -+ * rather than PTRACE_SYSCALL it, allowing the system call to execute -+ * on the host. The tracing thread will check this flag and -+ * PTRACE_SYSCALL if necessary. -+ */ -+ if((current->ptrace & PT_DTRACE) && -+ is_syscall(PT_REGS_IP(¤t->thread.regs))) -+ (void) CHOOSE_MODE(current->thread.mode.tt.singlestep_syscall = 1, 0); -+ -+ return(0); -+} -+ -+int do_signal(int error) -+{ -+ return(kern_do_signal(¤t->thread.regs, NULL, error)); -+} -+ -+/* -+ * Atomically swap in the new signal mask, and wait for a signal. -+ */ -+int sys_sigsuspend(int history0, int history1, old_sigset_t mask) -+{ -+ sigset_t saveset; -+ -+ mask &= _BLOCKABLE; -+ spin_lock_irq(¤t->sigmask_lock); -+ saveset = current->blocked; -+ siginitset(¤t->blocked, mask); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ while (1) { -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ if(kern_do_signal(¤t->thread.regs, &saveset, -EINTR)) -+ return(-EINTR); -+ } -+} -+ -+int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) -+{ -+ sigset_t saveset, newset; -+ -+ /* XXX: Don't preclude handling different sized sigset_t's. */ -+ if (sigsetsize != sizeof(sigset_t)) -+ return -EINVAL; -+ -+ if (copy_from_user(&newset, unewset, sizeof(newset))) -+ return -EFAULT; -+ sigdelsetmask(&newset, ~_BLOCKABLE); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ saveset = current->blocked; -+ current->blocked = newset; -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ while (1) { -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ if (kern_do_signal(¤t->thread.regs, &saveset, -EINTR)) -+ return(-EINTR); -+ } -+} -+ -+extern int userspace_pid[]; -+ -+static int copy_sc_from_user(struct pt_regs *to, void *from, -+ struct arch_frame_data *arch) -+{ -+ int ret; -+ -+ ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch), -+ copy_sc_from_user_skas(userspace_pid[0], -+ &to->regs, from)); -+ return(ret); -+} -+ -+int sys_sigreturn(struct pt_regs regs) -+{ -+ void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); -+ void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); -+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ copy_from_user(¤t->blocked.sig[0], sc_sigmask(sc), -+ sizeof(current->blocked.sig[0])); -+ copy_from_user(¤t->blocked.sig[1], mask, sig_size); -+ sigdelsetmask(¤t->blocked, ~_BLOCKABLE); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ copy_sc_from_user(¤t->thread.regs, sc, -+ &signal_frame_sc.common.arch); -+ return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); -+} -+ -+int sys_rt_sigreturn(struct pt_regs regs) -+{ -+ struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs)); -+ int sig_size = _NSIG_WORDS * sizeof(unsigned long); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ copy_from_user(¤t->blocked, &uc->uc_sigmask, sig_size); -+ sigdelsetmask(¤t->blocked, ~_BLOCKABLE); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, -+ &signal_frame_si.common.arch); -+ return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/signal_user.c um/arch/um/kernel/signal_user.c ---- orig/arch/um/kernel/signal_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/signal_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,142 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stdlib.h> -+#include <signal.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <string.h> -+#include <sys/mman.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "signal_user.h" -+#include "signal_kern.h" -+#include "sysdep/sigcontext.h" -+#include "sigcontext.h" -+ -+void set_sigstack(void *sig_stack, int size) -+{ -+ stack_t stack = ((stack_t) { .ss_flags = 0, -+ .ss_sp = (__ptr_t) sig_stack, -+ .ss_size = size - sizeof(void *) }); -+ -+ if(sigaltstack(&stack, NULL) != 0) -+ panic("enabling signal stack failed, errno = %d\n", errno); -+} -+ -+void set_handler(int sig, void (*handler)(int), int flags, ...) -+{ -+ struct sigaction action; -+ va_list ap; -+ int mask; -+ -+ va_start(ap, flags); -+ action.sa_handler = handler; -+ sigemptyset(&action.sa_mask); -+ while((mask = va_arg(ap, int)) != -1){ -+ sigaddset(&action.sa_mask, mask); -+ } -+ action.sa_flags = flags; -+ action.sa_restorer = NULL; -+ if(sigaction(sig, &action, NULL) < 0) -+ panic("sigaction failed"); -+} -+ -+int change_sig(int signal, int on) -+{ -+ sigset_t sigset, old; -+ -+ sigemptyset(&sigset); -+ sigaddset(&sigset, signal); -+ sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); -+ return(!sigismember(&old, signal)); -+} -+ -+static void change_signals(int type) -+{ -+ sigset_t mask; -+ -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ sigaddset(&mask, SIGIO); -+ sigaddset(&mask, SIGPROF); -+ if(sigprocmask(type, &mask, NULL) < 0) -+ panic("Failed to change signal mask - errno = %d", errno); -+} -+ -+void block_signals(void) -+{ -+ change_signals(SIG_BLOCK); -+} -+ -+void unblock_signals(void) -+{ -+ change_signals(SIG_UNBLOCK); -+} -+ -+#define SIGIO_BIT 0 -+#define SIGVTALRM_BIT 1 -+ -+static int enable_mask(sigset_t *mask) -+{ -+ int sigs; -+ -+ sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT; -+ sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT; -+ sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT; -+ return(sigs); -+} -+ -+int get_signals(void) -+{ -+ sigset_t mask; -+ -+ if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0) -+ panic("Failed to get signal mask"); -+ return(enable_mask(&mask)); -+} -+ -+int set_signals(int enable) -+{ -+ sigset_t mask; -+ int ret; -+ -+ sigemptyset(&mask); -+ if(enable & (1 << SIGIO_BIT)) -+ sigaddset(&mask, SIGIO); -+ if(enable & (1 << SIGVTALRM_BIT)){ -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ } -+ if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0) -+ panic("Failed to enable signals"); -+ ret = enable_mask(&mask); -+ sigemptyset(&mask); -+ if((enable & (1 << SIGIO_BIT)) == 0) -+ sigaddset(&mask, SIGIO); -+ if((enable & (1 << SIGVTALRM_BIT)) == 0){ -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ } -+ if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0) -+ panic("Failed to block signals"); -+ -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/exec_kern.c um/arch/um/kernel/skas/exec_kern.c ---- orig/arch/um/kernel/skas/exec_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/exec_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,41 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "asm/current.h" -+#include "asm/page.h" -+#include "asm/signal.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/mmu_context.h" -+#include "tlb.h" -+#include "skas.h" -+#include "mmu.h" -+#include "os.h" -+ -+void flush_thread_skas(void) -+{ -+ force_flush_all(); -+ switch_mm_skas(current->mm->context.skas.mm_fd); -+} -+ -+void start_thread_skas(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp) -+{ -+ set_fs(USER_DS); -+ PT_REGS_IP(regs) = eip; -+ PT_REGS_SP(regs) = esp; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/exec_user.c um/arch/um/kernel/skas/exec_user.c ---- orig/arch/um/kernel/skas/exec_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/exec_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <signal.h> -+#include <sched.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include "user.h" -+#include "kern_util.h" -+#include "os.h" -+#include "time_user.h" -+ -+static int user_thread_tramp(void *arg) -+{ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) -+ panic("user_thread_tramp - PTRACE_TRACEME failed, " -+ "errno = %d\n", errno); -+ enable_timer(); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+int user_thread(unsigned long stack, int flags) -+{ -+ int pid, status; -+ -+ pid = clone(user_thread_tramp, (void *) stack_sp(stack), -+ flags | CLONE_FILES | SIGCHLD, NULL); -+ if(pid < 0){ -+ printk("user_thread - clone failed, errno = %d\n", errno); -+ return(pid); -+ } -+ -+ if(waitpid(pid, &status, WUNTRACED) < 0){ -+ printk("user_thread - waitpid failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ printk("user_thread - trampoline didn't stop, status = %d\n", -+ status); -+ return(-EINVAL); -+ } -+ -+ return(pid); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/mmu.h um/arch/um/kernel/skas/include/mmu.h ---- orig/arch/um/kernel/skas/include/mmu.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/include/mmu.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_MMU_H -+#define __SKAS_MMU_H -+ -+#include "linux/list.h" -+#include "linux/spinlock.h" -+ -+struct mmu_context_skas { -+ int mm_fd; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/mode.h um/arch/um/kernel/skas/include/mode.h ---- orig/arch/um/kernel/skas/include/mode.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/include/mode.h 2003-11-19 03:27:36.000000000 -0500 -@@ -0,0 +1,37 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_SKAS_H__ -+#define __MODE_SKAS_H__ -+ -+extern unsigned long exec_regs[]; -+extern unsigned long exec_fp_regs[]; -+extern unsigned long exec_fpx_regs[]; -+extern int have_fpx_regs; -+ -+extern void user_time_init_skas(void); -+extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, -+ void *from_ptr); -+extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, -+ union uml_pt_regs *regs, -+ unsigned long fault_addr, int fault_type); -+extern void sig_handler_common_skas(int sig, void *sc_ptr); -+extern void halt_skas(void); -+extern void reboot_skas(void); -+extern void kill_off_processes_skas(void); -+extern int is_skas_winch(int pid, int fd, void *data); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/mode_kern.h um/arch/um/kernel/skas/include/mode_kern.h ---- orig/arch/um/kernel/skas/include/mode_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/include/mode_kern.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_MODE_KERN_H__ -+#define __SKAS_MODE_KERN_H__ -+ -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/ptrace.h" -+ -+extern void flush_thread_skas(void); -+extern void *_switch_to_skas(void *prev, void *next); -+extern void start_thread_skas(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp); -+extern int copy_thread_skas(int nr, unsigned long clone_flags, -+ unsigned long sp, unsigned long stack_top, -+ struct task_struct *p, struct pt_regs *regs); -+extern void release_thread_skas(struct task_struct *task); -+extern void exit_thread_skas(void); -+extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); -+extern void init_idle_skas(void); -+extern void flush_tlb_kernel_vm_skas(void); -+extern void __flush_tlb_one_skas(unsigned long addr); -+extern void flush_tlb_range_skas(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_mm_skas(struct mm_struct *mm); -+extern void force_flush_all_skas(void); -+extern long execute_syscall_skas(void *r); -+extern void before_mem_skas(unsigned long unused); -+extern unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out); -+extern int start_uml_skas(void); -+extern int external_pid_skas(struct task_struct *task); -+extern int thread_pid_skas(struct thread_struct *thread); -+ -+#define kmem_end_skas (host_task_size - 1024 * 1024) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/proc_mm.h um/arch/um/kernel/skas/include/proc_mm.h ---- orig/arch/um/kernel/skas/include/proc_mm.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/include/proc_mm.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_PROC_MM_H -+#define __SKAS_PROC_MM_H -+ -+#define MM_MMAP 54 -+#define MM_MUNMAP 55 -+#define MM_MPROTECT 56 -+#define MM_COPY_SEGMENTS 57 -+ -+struct mm_mmap { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+struct mm_munmap { -+ unsigned long addr; -+ unsigned long len; -+}; -+ -+struct mm_mprotect { -+ unsigned long addr; -+ unsigned long len; -+ unsigned int prot; -+}; -+ -+struct proc_mm_op { -+ int op; -+ union { -+ struct mm_mmap mmap; -+ struct mm_munmap munmap; -+ struct mm_mprotect mprotect; -+ int copy_segments; -+ } u; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/ptrace-skas.h um/arch/um/kernel/skas/include/ptrace-skas.h ---- orig/arch/um/kernel/skas/include/ptrace-skas.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/include/ptrace-skas.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_SKAS_H -+#define __PTRACE_SKAS_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_SKAS -+ -+#include "skas_ptregs.h" -+ -+#define HOST_FRAME_SIZE 17 -+ -+#define REGS_IP(r) ((r)[HOST_IP]) -+#define REGS_SP(r) ((r)[HOST_SP]) -+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) -+#define REGS_EAX(r) ((r)[HOST_EAX]) -+#define REGS_EBX(r) ((r)[HOST_EBX]) -+#define REGS_ECX(r) ((r)[HOST_ECX]) -+#define REGS_EDX(r) ((r)[HOST_EDX]) -+#define REGS_ESI(r) ((r)[HOST_ESI]) -+#define REGS_EDI(r) ((r)[HOST_EDI]) -+#define REGS_EBP(r) ((r)[HOST_EBP]) -+#define REGS_CS(r) ((r)[HOST_CS]) -+#define REGS_SS(r) ((r)[HOST_SS]) -+#define REGS_DS(r) ((r)[HOST_DS]) -+#define REGS_ES(r) ((r)[HOST_ES]) -+#define REGS_FS(r) ((r)[HOST_FS]) -+#define REGS_GS(r) ((r)[HOST_GS]) -+ -+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res) -+ -+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) -+ -+#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type) -+ -+#define REGS_FAULT_ADDR(r) ((r)->fault_addr) -+ -+#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/skas.h um/arch/um/kernel/skas/include/skas.h ---- orig/arch/um/kernel/skas/include/skas.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/include/skas.h 2003-11-19 03:00:51.000000000 -0500 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_H -+#define __SKAS_H -+ -+#include "sysdep/ptrace.h" -+ -+extern int userspace_pid[]; -+ -+extern void switch_threads(void *me, void *next); -+extern void thread_wait(void *sw, void *fb); -+extern void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, -+ void (*handler)(int)); -+extern int start_idle_thread(void *stack, void *switch_buf_ptr, -+ void **fork_buf_ptr); -+extern int user_thread(unsigned long stack, int flags); -+extern void userspace(union uml_pt_regs *regs); -+extern void new_thread_proc(void *stack, void (*handler)(int sig)); -+extern void remove_sigstack(void); -+extern void new_thread_handler(int sig); -+extern void handle_syscall(union uml_pt_regs *regs); -+extern void map(int fd, unsigned long virt, unsigned long phys, -+ unsigned long len, int r, int w, int x); -+extern int unmap(int fd, void *addr, int len); -+extern int protect(int fd, unsigned long addr, unsigned long len, -+ int r, int w, int x, int must_succeed); -+extern void user_signal(int sig, union uml_pt_regs *regs); -+extern int singlestepping_skas(void); -+extern int new_mm(int from); -+extern void save_registers(union uml_pt_regs *regs); -+extern void restore_registers(union uml_pt_regs *regs); -+extern void start_userspace(int cpu); -+extern void init_registers(int pid); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/uaccess.h um/arch/um/kernel/skas/include/uaccess.h ---- orig/arch/um/kernel/skas/include/uaccess.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/include/uaccess.h 2004-01-17 06:35:41.000000000 -0500 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_UACCESS_H -+#define __SKAS_UACCESS_H -+ -+#include "asm/errno.h" -+ -+#define access_ok_skas(type, addr, size) \ -+ ((segment_eq(get_fs(), KERNEL_DS)) || \ -+ (((unsigned long) (addr) < TASK_SIZE) && \ -+ ((unsigned long) (addr) + (size) <= TASK_SIZE))) -+ -+static inline int verify_area_skas(int type, const void * addr, -+ unsigned long size) -+{ -+ return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); -+} -+ -+extern int copy_from_user_skas(void *to, const void *from, int n); -+extern int copy_to_user_skas(void *to, const void *from, int n); -+extern int strncpy_from_user_skas(char *dst, const char *src, int count); -+extern int __clear_user_skas(void *mem, int len); -+extern int clear_user_skas(void *mem, int len); -+extern int strnlen_user_skas(const void *str, int len); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/Makefile um/arch/um/kernel/skas/Makefile ---- orig/arch/um/kernel/skas/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/Makefile 2003-11-11 06:36:12.000000000 -0500 -@@ -0,0 +1,31 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = skas.o -+ -+obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \ -+ process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \ -+ uaccess.o -+ -+subdir-y = sys-$(SUBARCH) -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o -+ -+include $(TOPDIR)/Rules.make -+ -+include/skas_ptregs.h : util/mk_ptregs -+ util/mk_ptregs > $@ -+ -+util/mk_ptregs : -+ $(MAKE) -C util -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -+ $(MAKE) -C util clean -+ $(RM) -f include/skas_ptregs.h -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/mem.c um/arch/um/kernel/skas/mem.c ---- orig/arch/um/kernel/skas/mem.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/mem.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "mem_user.h" -+ -+unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out) -+{ -+ /* Round up to the nearest 4M */ -+ unsigned long top = ROUND_4M((unsigned long) &arg); -+ -+ *host_size_out = top; -+ *task_size_out = top; -+ return(((unsigned long) set_task_sizes_skas) & ~0xffffff); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/mem_user.c um/arch/um/kernel/skas/mem_user.c ---- orig/arch/um/kernel/skas/mem_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/mem_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,105 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <sys/mman.h> -+#include <sys/ptrace.h> -+#include "mem_user.h" -+#include "mem.h" -+#include "user.h" -+#include "os.h" -+#include "proc_mm.h" -+ -+void map(int fd, unsigned long virt, unsigned long phys, unsigned long len, -+ int r, int w, int x) -+{ -+ struct proc_mm_op map; -+ __u64 offset; -+ int prot, n, phys_fd; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ phys_fd = phys_mapping(phys, &offset); -+ -+ map = ((struct proc_mm_op) { .op = MM_MMAP, -+ .u = -+ { .mmap = -+ { .addr = virt, -+ .len = len, -+ .prot = prot, -+ .flags = MAP_SHARED | -+ MAP_FIXED, -+ .fd = phys_fd, -+ .offset = offset -+ } } } ); -+ n = os_write_file(fd, &map, sizeof(map)); -+ if(n != sizeof(map)) -+ printk("map : /proc/mm map failed, err = %d\n", -n); -+} -+ -+int unmap(int fd, void *addr, int len) -+{ -+ struct proc_mm_op unmap; -+ int n; -+ -+ unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, -+ .u = -+ { .munmap = -+ { .addr = (unsigned long) addr, -+ .len = len } } } ); -+ n = os_write_file(fd, &unmap, sizeof(unmap)); -+ if(n != sizeof(unmap)) { -+ if(n < 0) -+ return(n); -+ else if(n > 0) -+ return(-EIO); -+ } -+ -+ return(0); -+} -+ -+int protect(int fd, unsigned long addr, unsigned long len, int r, int w, -+ int x, int must_succeed) -+{ -+ struct proc_mm_op protect; -+ int prot, n; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ -+ protect = ((struct proc_mm_op) { .op = MM_MPROTECT, -+ .u = -+ { .mprotect = -+ { .addr = (unsigned long) addr, -+ .len = len, -+ .prot = prot } } } ); -+ -+ n = os_write_file(fd, &protect, sizeof(protect)); -+ if(n != sizeof(protect)) { -+ if(n == 0) return(0); -+ -+ if(must_succeed) -+ panic("protect failed, err = %d", -n); -+ -+ return(-EIO); -+ } -+ -+ return(0); -+} -+ -+void before_mem_skas(unsigned long unused) -+{ -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/mmu.c um/arch/um/kernel/skas/mmu.c ---- orig/arch/um/kernel/skas/mmu.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/mmu.c 2003-11-15 22:25:20.000000000 -0500 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/list.h" -+#include "linux/spinlock.h" -+#include "linux/slab.h" -+#include "asm/segment.h" -+#include "asm/mmu.h" -+#include "os.h" -+#include "skas.h" -+ -+int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) -+{ -+ int from; -+ -+ if((current->mm != NULL) && (current->mm != &init_mm)) -+ from = current->mm->context.skas.mm_fd; -+ else from = -1; -+ -+ mm->context.skas.mm_fd = new_mm(from); -+ if(mm->context.skas.mm_fd < 0){ -+ printk("init_new_context_skas - new_mm failed, errno = %d\n", -+ mm->context.skas.mm_fd); -+ return(mm->context.skas.mm_fd); -+ } -+ -+ return(0); -+} -+ -+void destroy_context_skas(struct mm_struct *mm) -+{ -+ os_close_file(mm->context.skas.mm_fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/process.c um/arch/um/kernel/skas/process.c ---- orig/arch/um/kernel/skas/process.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/process.c 2004-01-31 02:49:36.000000000 -0500 -@@ -0,0 +1,417 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <signal.h> -+#include <setjmp.h> -+#include <sched.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <sys/mman.h> -+#include <sys/user.h> -+#include <asm/unistd.h> -+#include "user.h" -+#include "ptrace_user.h" -+#include "time_user.h" -+#include "sysdep/ptrace.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "skas.h" -+#include "sysdep/sigcontext.h" -+#include "os.h" -+#include "proc_mm.h" -+#include "skas_ptrace.h" -+#include "chan_user.h" -+ -+int is_skas_winch(int pid, int fd, void *data) -+{ -+ if(pid != getpid()) -+ return(0); -+ -+ register_winch_irq(-1, fd, -1, data); -+ return(1); -+} -+ -+/* These are set once at boot time and not changed thereafter */ -+ -+unsigned long exec_regs[FRAME_SIZE]; -+unsigned long exec_fp_regs[HOST_FP_SIZE]; -+unsigned long exec_fpx_regs[HOST_XFP_SIZE]; -+int have_fpx_regs = 1; -+ -+static void handle_segv(int pid) -+{ -+ struct ptrace_faultinfo fault; -+ int err; -+ -+ err = ptrace(PTRACE_FAULTINFO, pid, 0, &fault); -+ if(err) -+ panic("handle_segv - PTRACE_FAULTINFO failed, errno = %d\n", -+ errno); -+ -+ segv(fault.addr, 0, FAULT_WRITE(fault.is_write), 1, NULL); -+} -+ -+static void handle_trap(int pid, union uml_pt_regs *regs) -+{ -+ int err, syscall_nr, status; -+ -+ syscall_nr = PT_SYSCALL_NR(regs->skas.regs); -+ UPT_SYSCALL_NR(regs) = syscall_nr; -+ if(syscall_nr < 1){ -+ relay_signal(SIGTRAP, regs); -+ return; -+ } -+ -+ err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); -+ if(err < 0) -+ panic("handle_trap - nullifying syscall failed errno = %d\n", -+ errno); -+ -+ err = ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ if(err < 0) -+ panic("handle_trap - continuing to end of syscall failed, " -+ "errno = %d\n", errno); -+ -+ err = waitpid(pid, &status, WUNTRACED); -+ if((err < 0) || !WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("handle_trap - failed to wait at end of syscall, " -+ "errno = %d, status = %d\n", errno, status); -+ -+ handle_syscall(regs); -+} -+ -+static int userspace_tramp(void *arg) -+{ -+ init_new_thread_signals(0); -+ enable_timer(); -+ ptrace(PTRACE_TRACEME, 0, 0, 0); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+/* Each element set once, and only accessed by a single processor anyway */ -+#define NR_CPUS 1 -+int userspace_pid[NR_CPUS]; -+ -+void start_userspace(int cpu) -+{ -+ void *stack; -+ unsigned long sp; -+ int pid, status, n; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if(stack == MAP_FAILED) -+ panic("start_userspace : mmap failed, errno = %d", errno); -+ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ -+ pid = clone(userspace_tramp, (void *) sp, -+ CLONE_FILES | CLONE_VM | SIGCHLD, NULL); -+ if(pid < 0) -+ panic("start_userspace : clone failed, errno = %d", errno); -+ -+ do { -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0) -+ panic("start_userspace : wait failed, errno = %d", -+ errno); -+ } while(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); -+ -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) -+ panic("start_userspace : expected SIGSTOP, got status = %d", -+ status); -+ -+ if(munmap(stack, PAGE_SIZE) < 0) -+ panic("start_userspace : munmap failed, errno = %d\n", errno); -+ -+ userspace_pid[cpu] = pid; -+} -+ -+void userspace(union uml_pt_regs *regs) -+{ -+ int err, status, op, pid = userspace_pid[0]; -+ -+ restore_registers(regs); -+ -+ err = ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ if(err) -+ panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", -+ errno); -+ while(1){ -+ err = waitpid(pid, &status, WUNTRACED); -+ if(err < 0) -+ panic("userspace - waitpid failed, errno = %d\n", -+ errno); -+ -+ regs->skas.is_user = 1; -+ save_registers(regs); -+ -+ if(WIFSTOPPED(status)){ -+ switch(WSTOPSIG(status)){ -+ case SIGSEGV: -+ handle_segv(pid); -+ break; -+ case SIGTRAP: -+ handle_trap(pid, regs); -+ break; -+ case SIGIO: -+ case SIGVTALRM: -+ case SIGILL: -+ case SIGBUS: -+ case SIGFPE: -+ case SIGWINCH: -+ user_signal(WSTOPSIG(status), regs); -+ break; -+ default: -+ printk("userspace - child stopped with signal " -+ "%d\n", WSTOPSIG(status)); -+ } -+ interrupt_end(); -+ } -+ -+ restore_registers(regs); -+ -+ op = singlestepping_skas() ? PTRACE_SINGLESTEP : -+ PTRACE_SYSCALL; -+ err = ptrace(op, pid, 0, 0); -+ if(err) -+ panic("userspace - PTRACE_SYSCALL failed, " -+ "errno = %d\n", errno); -+ } -+} -+ -+void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, -+ void (*handler)(int)) -+{ -+ sigjmp_buf switch_buf, fork_buf; -+ -+ *switch_buf_ptr = &switch_buf; -+ *fork_buf_ptr = &fork_buf; -+ -+ if(sigsetjmp(fork_buf, 1) == 0) -+ new_thread_proc(stack, handler); -+ -+ remove_sigstack(); -+} -+ -+void thread_wait(void *sw, void *fb) -+{ -+ sigjmp_buf buf, **switch_buf = sw, *fork_buf; -+ -+ *switch_buf = &buf; -+ fork_buf = fb; -+ if(sigsetjmp(buf, 1) == 0) -+ siglongjmp(*fork_buf, 1); -+} -+ -+static int move_registers(int pid, int int_op, int fp_op, -+ union uml_pt_regs *regs, unsigned long *fp_regs) -+{ -+ if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) -+ return(-errno); -+ if(ptrace(fp_op, pid, 0, fp_regs) < 0) -+ return(-errno); -+ return(0); -+} -+ -+void save_registers(union uml_pt_regs *regs) -+{ -+ unsigned long *fp_regs; -+ int err, fp_op; -+ -+ if(have_fpx_regs){ -+ fp_op = PTRACE_GETFPXREGS; -+ fp_regs = regs->skas.xfp; -+ } -+ else { -+ fp_op = PTRACE_GETFPREGS; -+ fp_regs = regs->skas.fp; -+ } -+ -+ err = move_registers(userspace_pid[0], PTRACE_GETREGS, fp_op, regs, -+ fp_regs); -+ if(err) -+ panic("save_registers - saving registers failed, errno = %d\n", -+ -err); -+} -+ -+void restore_registers(union uml_pt_regs *regs) -+{ -+ unsigned long *fp_regs; -+ int err, fp_op; -+ -+ if(have_fpx_regs){ -+ fp_op = PTRACE_SETFPXREGS; -+ fp_regs = regs->skas.xfp; -+ } -+ else { -+ fp_op = PTRACE_SETFPREGS; -+ fp_regs = regs->skas.fp; -+ } -+ -+ err = move_registers(userspace_pid[0], PTRACE_SETREGS, fp_op, regs, -+ fp_regs); -+ if(err) -+ panic("restore_registers - saving registers failed, " -+ "errno = %d\n", -err); -+} -+ -+void switch_threads(void *me, void *next) -+{ -+ sigjmp_buf my_buf, **me_ptr = me, *next_buf = next; -+ -+ *me_ptr = &my_buf; -+ if(sigsetjmp(my_buf, 1) == 0) -+ siglongjmp(*next_buf, 1); -+} -+ -+static sigjmp_buf initial_jmpbuf; -+ -+/* XXX Make these percpu */ -+static void (*cb_proc)(void *arg); -+static void *cb_arg; -+static sigjmp_buf *cb_back; -+ -+int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr) -+{ -+ sigjmp_buf **switch_buf = switch_buf_ptr; -+ int n; -+ -+ *fork_buf_ptr = &initial_jmpbuf; -+ n = sigsetjmp(initial_jmpbuf, 1); -+ if(n == 0) -+ new_thread_proc((void *) stack, new_thread_handler); -+ else if(n == 1) -+ remove_sigstack(); -+ else if(n == 2){ -+ (*cb_proc)(cb_arg); -+ siglongjmp(*cb_back, 1); -+ } -+ else if(n == 3){ -+ kmalloc_ok = 0; -+ return(0); -+ } -+ else if(n == 4){ -+ kmalloc_ok = 0; -+ return(1); -+ } -+ siglongjmp(**switch_buf, 1); -+} -+ -+void remove_sigstack(void) -+{ -+ stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE, -+ .ss_sp = NULL, -+ .ss_size = 0 }); -+ -+ if(sigaltstack(&stack, NULL) != 0) -+ panic("disabling signal stack failed, errno = %d\n", errno); -+} -+ -+void initial_thread_cb_skas(void (*proc)(void *), void *arg) -+{ -+ sigjmp_buf here; -+ -+ cb_proc = proc; -+ cb_arg = arg; -+ cb_back = &here; -+ -+ block_signals(); -+ if(sigsetjmp(here, 1) == 0) -+ siglongjmp(initial_jmpbuf, 2); -+ unblock_signals(); -+ -+ cb_proc = NULL; -+ cb_arg = NULL; -+ cb_back = NULL; -+} -+ -+void halt_skas(void) -+{ -+ block_signals(); -+ siglongjmp(initial_jmpbuf, 3); -+} -+ -+void reboot_skas(void) -+{ -+ block_signals(); -+ siglongjmp(initial_jmpbuf, 4); -+} -+ -+int new_mm(int from) -+{ -+ struct proc_mm_op copy; -+ int n, fd = os_open_file("/proc/mm", -+ of_cloexec(of_write(OPENFLAGS())), 0); -+ -+ if(fd < 0) -+ return(fd); -+ -+ if(from != -1){ -+ copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, -+ .u = -+ { .copy_segments = from } } ); -+ n = os_write_file(fd, ©, sizeof(copy)); -+ if(n != sizeof(copy)) -+ printk("new_mm : /proc/mm copy_segments failed, " -+ "err = %d\n", -n); -+ } -+ -+ return(fd); -+} -+ -+void switch_mm_skas(int mm_fd) -+{ -+ int err; -+ -+#warning need cpu pid in switch_mm_skas -+ err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); -+ if(err) -+ panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", -+ errno); -+} -+ -+void kill_off_processes_skas(void) -+{ -+#warning need to loop over userspace_pids in kill_off_processes_skas -+ os_kill_process(userspace_pid[0], 1); -+} -+ -+void init_registers(int pid) -+{ -+ int err; -+ -+ if(ptrace(PTRACE_GETREGS, pid, 0, exec_regs) < 0) -+ panic("check_ptrace : PTRACE_GETREGS failed, errno = %d", -+ errno); -+ -+ err = ptrace(PTRACE_GETFPXREGS, pid, 0, exec_fpx_regs); -+ if(!err) -+ return; -+ -+ have_fpx_regs = 0; -+ if(errno != EIO) -+ panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", -+ errno); -+ -+ err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs); -+ if(err) -+ panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d", -+ errno); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/process_kern.c um/arch/um/kernel/skas/process_kern.c ---- orig/arch/um/kernel/skas/process_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/process_kern.c 2003-11-19 03:18:25.000000000 -0500 -@@ -0,0 +1,196 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "kern_util.h" -+#include "time_user.h" -+#include "signal_user.h" -+#include "skas.h" -+#include "os.h" -+#include "user_util.h" -+#include "tlb.h" -+#include "frame.h" -+#include "kern.h" -+#include "mode.h" -+ -+int singlestepping_skas(void) -+{ -+ int ret = current->ptrace & PT_DTRACE; -+ -+ current->ptrace &= ~PT_DTRACE; -+ return(ret); -+} -+ -+void *_switch_to_skas(void *prev, void *next) -+{ -+ struct task_struct *from, *to; -+ -+ from = prev; -+ to = next; -+ -+ /* XXX need to check runqueues[cpu].idle */ -+ if(current->pid == 0) -+ switch_timers(0); -+ -+ to->thread.prev_sched = from; -+ set_current(to); -+ -+ switch_threads(&from->thread.mode.skas.switch_buf, -+ to->thread.mode.skas.switch_buf); -+ -+ if(current->pid == 0) -+ switch_timers(1); -+ -+ return(current->thread.prev_sched); -+} -+ -+extern void schedule_tail(struct task_struct *prev); -+ -+void new_thread_handler(int sig) -+{ -+ int (*fn)(void *), n; -+ void *arg; -+ -+ fn = current->thread.request.u.thread.proc; -+ arg = current->thread.request.u.thread.arg; -+ change_sig(SIGUSR1, 1); -+ thread_wait(¤t->thread.mode.skas.switch_buf, -+ current->thread.mode.skas.fork_buf); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ /* The return value is 1 if the kernel thread execs a process, -+ * 0 if it just exits -+ */ -+ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); -+ if(n == 1) -+ userspace(¤t->thread.regs.regs); -+ else do_exit(0); -+} -+ -+void new_thread_proc(void *stack, void (*handler)(int sig)) -+{ -+ init_new_thread_stack(stack, handler); -+ os_usr1_process(os_getpid()); -+} -+ -+void release_thread_skas(struct task_struct *task) -+{ -+} -+ -+void exit_thread_skas(void) -+{ -+} -+ -+void fork_handler(int sig) -+{ -+ change_sig(SIGUSR1, 1); -+ thread_wait(¤t->thread.mode.skas.switch_buf, -+ current->thread.mode.skas.fork_buf); -+ -+ force_flush_all(); -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ unblock_signals(); -+ -+ userspace(¤t->thread.regs.regs); -+} -+ -+int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ void (*handler)(int); -+ -+ if(current->thread.forking){ -+ memcpy(&p->thread.regs.regs.skas, -+ ¤t->thread.regs.regs.skas, -+ sizeof(p->thread.regs.regs.skas)); -+ REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.skas.regs, 0); -+ if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp; -+ -+ handler = fork_handler; -+ } -+ else { -+ memcpy(p->thread.regs.regs.skas.regs, exec_regs, -+ sizeof(p->thread.regs.regs.skas.regs)); -+ memcpy(p->thread.regs.regs.skas.fp, exec_fp_regs, -+ sizeof(p->thread.regs.regs.skas.fp)); -+ memcpy(p->thread.regs.regs.skas.xfp, exec_fpx_regs, -+ sizeof(p->thread.regs.regs.skas.xfp)); -+ p->thread.request.u.thread = current->thread.request.u.thread; -+ handler = new_thread_handler; -+ } -+ -+ new_thread((void *) p->thread.kernel_stack, -+ &p->thread.mode.skas.switch_buf, -+ &p->thread.mode.skas.fork_buf, handler); -+ return(0); -+} -+ -+void init_idle_skas(void) -+{ -+ cpu_tasks[current->processor].pid = os_getpid(); -+} -+ -+extern void start_kernel(void); -+ -+static int start_kernel_proc(void *unused) -+{ -+ int pid; -+ -+ block_signals(); -+ pid = os_getpid(); -+ -+ cpu_tasks[0].pid = pid; -+ cpu_tasks[0].task = current; -+#ifdef CONFIG_SMP -+ cpu_online_map = 1; -+#endif -+ start_kernel(); -+ return(0); -+} -+ -+int start_uml_skas(void) -+{ -+ start_userspace(0); -+ capture_signal_stack(); -+ idle_timer(); -+ -+ init_new_thread_signals(1); -+ -+ init_task.thread.request.u.thread.proc = start_kernel_proc; -+ init_task.thread.request.u.thread.arg = NULL; -+ return(start_idle_thread((void *) init_task.thread.kernel_stack, -+ &init_task.thread.mode.skas.switch_buf, -+ &init_task.thread.mode.skas.fork_buf)); -+} -+ -+int external_pid_skas(struct task_struct *task) -+{ -+#warning Need to look up userspace_pid by cpu -+ return(userspace_pid[0]); -+} -+ -+int thread_pid_skas(struct thread_struct *thread) -+{ -+#warning Need to look up userspace_pid by cpu -+ return(userspace_pid[0]); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/syscall_kern.c um/arch/um/kernel/skas/syscall_kern.c ---- orig/arch/um/kernel/skas/syscall_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/syscall_kern.c 2004-01-04 08:20:29.000000000 -0500 -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sys.h" -+#include "asm/errno.h" -+#include "asm/unistd.h" -+#include "asm/ptrace.h" -+#include "asm/current.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+extern syscall_handler_t *sys_call_table[]; -+ -+long execute_syscall_skas(void *r) -+{ -+ struct pt_regs *regs = r; -+ long res; -+ int syscall; -+ -+ current->thread.nsyscalls++; -+ nsyscalls++; -+ syscall = UPT_SYSCALL_NR(®s->regs); -+ -+ if((syscall >= NR_syscalls) || (syscall < 1)) -+ res = -ENOSYS; -+ else res = EXECUTE_SYSCALL(syscall, regs); -+ -+ return(res); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/syscall_user.c um/arch/um/kernel/skas/syscall_user.c ---- orig/arch/um/kernel/skas/syscall_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/syscall_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <signal.h> -+#include "kern_util.h" -+#include "syscall_user.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+ -+/* XXX Bogus */ -+#define ERESTARTSYS 512 -+#define ERESTARTNOINTR 513 -+#define ERESTARTNOHAND 514 -+ -+void handle_syscall(union uml_pt_regs *regs) -+{ -+ long result; -+ int index; -+ -+ index = record_syscall_start(UPT_SYSCALL_NR(regs)); -+ -+ syscall_trace(); -+ result = execute_syscall(regs); -+ -+ REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); -+ if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || -+ (result == -ERESTARTNOINTR)) -+ do_signal(result); -+ -+ syscall_trace(); -+ record_syscall_end(index, result); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/sys-i386/Makefile um/arch/um/kernel/skas/sys-i386/Makefile ---- orig/arch/um/kernel/skas/sys-i386/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/sys-i386/Makefile 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = sys-i386.o -+ -+obj-y = sigcontext.o -+ -+USER_OBJS = sigcontext.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/sys-i386/sigcontext.c um/arch/um/kernel/skas/sys-i386/sigcontext.c ---- orig/arch/um/kernel/skas/sys-i386/sigcontext.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/sys-i386/sigcontext.c 2003-11-19 03:27:43.000000000 -0500 -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <asm/sigcontext.h> -+#include <sys/ptrace.h> -+#include <linux/ptrace.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/ptrace_user.h" -+#include "kern_util.h" -+#include "user.h" -+#include "sigcontext.h" -+#include "mode.h" -+ -+int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, void *from_ptr) -+{ -+ struct sigcontext sc, *from = from_ptr; -+ unsigned long fpregs[FP_FRAME_SIZE]; -+ int err; -+ -+ err = copy_from_user_proc(&sc, from, sizeof(sc)); -+ err |= copy_from_user_proc(fpregs, sc.fpstate, sizeof(fpregs)); -+ if(err) -+ return(err); -+ -+ regs->skas.regs[GS] = sc.gs; -+ regs->skas.regs[FS] = sc.fs; -+ regs->skas.regs[ES] = sc.es; -+ regs->skas.regs[DS] = sc.ds; -+ regs->skas.regs[EDI] = sc.edi; -+ regs->skas.regs[ESI] = sc.esi; -+ regs->skas.regs[EBP] = sc.ebp; -+ regs->skas.regs[UESP] = sc.esp; -+ regs->skas.regs[EBX] = sc.ebx; -+ regs->skas.regs[EDX] = sc.edx; -+ regs->skas.regs[ECX] = sc.ecx; -+ regs->skas.regs[EAX] = sc.eax; -+ regs->skas.regs[EIP] = sc.eip; -+ regs->skas.regs[CS] = sc.cs; -+ regs->skas.regs[EFL] = sc.eflags; -+ regs->skas.regs[SS] = sc.ss; -+ regs->skas.fault_addr = sc.cr2; -+ regs->skas.fault_type = FAULT_WRITE(sc.err); -+ regs->skas.trap_type = sc.trapno; -+ -+ err = ptrace(PTRACE_SETFPREGS, pid, 0, fpregs); -+ if(err < 0){ -+ printk("copy_sc_to_user - PTRACE_SETFPREGS failed, " -+ "errno = %d\n", errno); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, -+ union uml_pt_regs *regs, unsigned long fault_addr, -+ int fault_type) -+{ -+ struct sigcontext sc, *to = to_ptr; -+ struct _fpstate *to_fp; -+ unsigned long fpregs[FP_FRAME_SIZE]; -+ int err; -+ -+ sc.gs = regs->skas.regs[GS]; -+ sc.fs = regs->skas.regs[FS]; -+ sc.es = regs->skas.regs[ES]; -+ sc.ds = regs->skas.regs[DS]; -+ sc.edi = regs->skas.regs[EDI]; -+ sc.esi = regs->skas.regs[ESI]; -+ sc.ebp = regs->skas.regs[EBP]; -+ sc.esp = regs->skas.regs[UESP]; -+ sc.ebx = regs->skas.regs[EBX]; -+ sc.edx = regs->skas.regs[EDX]; -+ sc.ecx = regs->skas.regs[ECX]; -+ sc.eax = regs->skas.regs[EAX]; -+ sc.eip = regs->skas.regs[EIP]; -+ sc.cs = regs->skas.regs[CS]; -+ sc.eflags = regs->skas.regs[EFL]; -+ sc.esp_at_signal = regs->skas.regs[UESP]; -+ sc.ss = regs->skas.regs[SS]; -+ sc.cr2 = fault_addr; -+ sc.err = TO_SC_ERR(fault_type); -+ sc.trapno = regs->skas.trap_type; -+ -+ err = ptrace(PTRACE_GETFPREGS, pid, 0, fpregs); -+ if(err < 0){ -+ printk("copy_sc_to_user - PTRACE_GETFPREGS failed, " -+ "errno = %d\n", errno); -+ return(1); -+ } -+ to_fp = (struct _fpstate *) -+ (fp ? (unsigned long) fp : ((unsigned long) to + sizeof(*to))); -+ sc.fpstate = to_fp; -+ -+ if(err) -+ return(err); -+ -+ return(copy_to_user_proc(to, &sc, sizeof(sc)) || -+ copy_to_user_proc(to_fp, fpregs, sizeof(fpregs))); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/time.c um/arch/um/kernel/skas/time.c ---- orig/arch/um/kernel/skas/time.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/time.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <sys/signal.h> -+#include <sys/time.h> -+#include "time_user.h" -+#include "process.h" -+#include "user.h" -+ -+void user_time_init_skas(void) -+{ -+ if(signal(SIGALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGALRM handler"); -+ if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/tlb.c um/arch/um/kernel/skas/tlb.c ---- orig/arch/um/kernel/skas/tlb.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/tlb.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,153 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/mmu.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "skas.h" -+#include "os.h" -+ -+static void fix_range(struct mm_struct *mm, unsigned long start_addr, -+ unsigned long end_addr, int force) -+{ -+ pgd_t *npgd; -+ pmd_t *npmd; -+ pte_t *npte; -+ unsigned long addr; -+ int r, w, x, err, fd; -+ -+ if(mm == NULL) return; -+ fd = mm->context.skas.mm_fd; -+ for(addr = start_addr; addr < end_addr;){ -+ npgd = pgd_offset(mm, addr); -+ npmd = pmd_offset(npgd, addr); -+ if(pmd_present(*npmd)){ -+ npte = pte_offset(npmd, addr); -+ r = pte_read(*npte); -+ w = pte_write(*npte); -+ x = pte_exec(*npte); -+ if(!pte_dirty(*npte)) w = 0; -+ if(!pte_young(*npte)){ -+ r = 0; -+ w = 0; -+ } -+ if(force || pte_newpage(*npte)){ -+ err = unmap(fd, (void *) addr, PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*npte)) -+ map(fd, addr, -+ pte_val(*npte) & PAGE_MASK, -+ PAGE_SIZE, r, w, x); -+ } -+ else if(pte_newprot(*npte)){ -+ protect(fd, addr, PAGE_SIZE, r, w, x, 1); -+ } -+ *npte = pte_mkuptodate(*npte); -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(force || pmd_newpage(*npmd)){ -+ err = unmap(fd, (void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ pmd_mkuptodate(*npmd); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+static void flush_kernel_vm_range(unsigned long start, unsigned long end) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ int updated = 0, err; -+ -+ mm = &init_mm; -+ for(addr = start_vm; addr < end_vm;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte) || pte_newpage(*pte)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*pte)) -+ map_memory(addr, -+ pte_val(*pte) & PAGE_MASK, -+ PAGE_SIZE, 1, 1, 1); -+ } -+ else if(pte_newprot(*pte)){ -+ updated = 1; -+ protect_memory(addr, PAGE_SIZE, 1, 1, 1, 1); -+ } -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(pmd_newpage(*pmd)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+void flush_tlb_kernel_vm_skas(void) -+{ -+ flush_kernel_vm_range(start_vm, end_vm); -+} -+ -+void __flush_tlb_one_skas(unsigned long addr) -+{ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE); -+} -+ -+void flush_tlb_range_skas(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ if(mm == NULL) -+ flush_kernel_vm_range(start, end); -+ else fix_range(mm, start, end, 0); -+} -+ -+void flush_tlb_mm_skas(struct mm_struct *mm) -+{ -+ flush_tlb_kernel_vm_skas(); -+ fix_range(mm, 0, host_task_size, 0); -+} -+ -+void force_flush_all_skas(void) -+{ -+ fix_range(current->mm, 0, host_task_size, 1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/trap_user.c um/arch/um/kernel/skas/trap_user.c ---- orig/arch/um/kernel/skas/trap_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/trap_user.c 2004-01-21 01:17:05.000000000 -0500 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include <errno.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "task.h" -+#include "sigcontext.h" -+ -+void sig_handler_common_skas(int sig, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct skas_regs *r; -+ struct signal_info *info; -+ int save_errno = errno; -+ int save_user; -+ -+ r = &TASK_REGS(get_current())->skas; -+ save_user = r->is_user; -+ r->is_user = 0; -+ r->fault_addr = SC_FAULT_ADDR(sc); -+ r->fault_type = SC_FAULT_TYPE(sc); -+ r->trap_type = SC_TRAP_TYPE(sc); -+ -+ change_sig(SIGUSR1, 1); -+ info = &sig_info[sig]; -+ if(!info->is_irq) unblock_signals(); -+ -+ (*info->handler)(sig, (union uml_pt_regs *) r); -+ -+ errno = save_errno; -+ r->is_user = save_user; -+} -+ -+extern int missed_ticks[]; -+ -+void user_signal(int sig, union uml_pt_regs *regs) -+{ -+ struct signal_info *info; -+ -+ regs->skas.is_user = 1; -+ regs->skas.fault_addr = 0; -+ regs->skas.fault_type = 0; -+ regs->skas.trap_type = 0; -+ info = &sig_info[sig]; -+ (*info->handler)(sig, regs); -+ -+ unblock_signals(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/uaccess.c um/arch/um/kernel/skas/uaccess.c ---- orig/arch/um/kernel/skas/uaccess.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/uaccess.c 2003-12-22 01:30:09.000000000 -0500 -@@ -0,0 +1,217 @@ -+/* -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/string.h" -+#include "linux/fs.h" -+#include "linux/highmem.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+ -+extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, -+ pte_t *pte_out); -+ -+static unsigned long maybe_map(unsigned long virt, int is_write) -+{ -+ pte_t pte; -+ -+ void *phys = um_virt_to_phys(current, virt, &pte); -+ int dummy_code; -+ -+ if(IS_ERR(phys) || (is_write && !pte_write(pte))){ -+ if(!handle_page_fault(virt, 0, is_write, 0, &dummy_code)) -+ return(0); -+ phys = um_virt_to_phys(current, virt, NULL); -+ } -+ return((unsigned long) phys); -+} -+ -+static int do_op(unsigned long addr, int len, int is_write, -+ int (*op)(unsigned long addr, int len, void *arg), void *arg) -+{ -+ struct page *page; -+ int n; -+ -+ addr = maybe_map(addr, is_write); -+ if(addr == -1) -+ return(-1); -+ -+ page = phys_to_page(addr); -+ addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); -+ n = (*op)(addr, len, arg); -+ kunmap(page); -+ -+ return(n); -+} -+ -+static int buffer_op(unsigned long addr, int len, int is_write, -+ int (*op)(unsigned long addr, int len, void *arg), -+ void *arg) -+{ -+ int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); -+ int remain = len, n; -+ -+ n = do_op(addr, size, is_write, op, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ -+ addr += size; -+ remain -= size; -+ if(remain == 0) -+ return(0); -+ -+ while(addr < ((addr + remain) & PAGE_MASK)){ -+ n = do_op(addr, PAGE_SIZE, is_write, op, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ -+ addr += PAGE_SIZE; -+ remain -= PAGE_SIZE; -+ } -+ if(remain == 0) -+ return(0); -+ -+ n = do_op(addr, remain, is_write, op, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ return(0); -+} -+ -+static int copy_chunk_from_user(unsigned long from, int len, void *arg) -+{ -+ unsigned long *to_ptr = arg, to = *to_ptr; -+ -+ memcpy((void *) to, (void *) from, len); -+ *to_ptr += len; -+ return(0); -+} -+ -+int copy_from_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_READ, from, n) ? -+ buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): -+ n); -+} -+ -+static int copy_chunk_to_user(unsigned long to, int len, void *arg) -+{ -+ unsigned long *from_ptr = arg, from = *from_ptr; -+ -+ memcpy((void *) to, (void *) from, len); -+ *from_ptr += len; -+ return(0); -+} -+ -+int copy_to_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, to, n) ? -+ buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : -+ n); -+} -+ -+static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) -+{ -+ char **to_ptr = arg, *to = *to_ptr; -+ int n; -+ -+ strncpy(to, (void *) from, len); -+ n = strnlen(to, len); -+ *to_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+int strncpy_from_user_skas(char *dst, const char *src, int count) -+{ -+ int n; -+ char *ptr = dst; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ strncpy(dst, src, count); -+ return(strnlen(dst, count)); -+ } -+ -+ if(!access_ok_skas(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, -+ &ptr); -+ if(n != 0) -+ return(-EFAULT); -+ return(strnlen(dst, count)); -+} -+ -+static int clear_chunk(unsigned long addr, int len, void *unused) -+{ -+ memset((void *) addr, 0, len); -+ return(0); -+} -+ -+int __clear_user_skas(void *mem, int len) -+{ -+ return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL)); -+} -+ -+int clear_user_skas(void *mem, int len) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memset(mem, 0, len); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, mem, len) ? -+ buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); -+} -+ -+static int strnlen_chunk(unsigned long str, int len, void *arg) -+{ -+ int *len_ptr = arg, n; -+ -+ n = strnlen((void *) str, len); -+ *len_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+int strnlen_user_skas(const void *str, int len) -+{ -+ int count = 0, n; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)) -+ return(strnlen(str, len) + 1); -+ -+ n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); -+ if(n == 0) -+ return(count + 1); -+ return(-EFAULT); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/util/Makefile um/arch/um/kernel/skas/util/Makefile ---- orig/arch/um/kernel/skas/util/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/util/Makefile 2003-11-08 02:52:22.000000000 -0500 -@@ -0,0 +1,10 @@ -+all: mk_ptregs -+ -+mk_ptregs : mk_ptregs.o -+ $(HOSTCC) -o mk_ptregs mk_ptregs.o -+ -+mk_ptregs.o : mk_ptregs.c -+ $(HOSTCC) -c $< -+ -+clean : -+ $(RM) -f mk_ptregs *.o *~ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/util/mk_ptregs.c um/arch/um/kernel/skas/util/mk_ptregs.c ---- orig/arch/um/kernel/skas/util/mk_ptregs.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/skas/util/mk_ptregs.c 2003-12-22 01:31:11.000000000 -0500 -@@ -0,0 +1,51 @@ -+#include <stdio.h> -+#include <asm/ptrace.h> -+#include <asm/user.h> -+ -+#define PRINT_REG(name, val) printf("#define HOST_%s %d\n", (name), (val)) -+ -+int main(int argc, char **argv) -+{ -+ printf("/* Automatically generated by " -+ "arch/um/kernel/skas/util/mk_ptregs */\n"); -+ printf("\n"); -+ printf("#ifndef __SKAS_PT_REGS_\n"); -+ printf("#define __SKAS_PT_REGS_\n"); -+ printf("\n"); -+ printf("#define HOST_FRAME_SIZE %d\n", FRAME_SIZE); -+ printf("#define HOST_FP_SIZE %d\n", -+ sizeof(struct user_i387_struct) / sizeof(unsigned long)); -+ printf("#define HOST_XFP_SIZE %d\n", -+ sizeof(struct user_fxsr_struct) / sizeof(unsigned long)); -+ -+ PRINT_REG("IP", EIP); -+ PRINT_REG("SP", UESP); -+ PRINT_REG("EFLAGS", EFL); -+ PRINT_REG("EAX", EAX); -+ PRINT_REG("EBX", EBX); -+ PRINT_REG("ECX", ECX); -+ PRINT_REG("EDX", EDX); -+ PRINT_REG("ESI", ESI); -+ PRINT_REG("EDI", EDI); -+ PRINT_REG("EBP", EBP); -+ PRINT_REG("CS", CS); -+ PRINT_REG("SS", SS); -+ PRINT_REG("DS", DS); -+ PRINT_REG("FS", FS); -+ PRINT_REG("ES", ES); -+ PRINT_REG("GS", GS); -+ printf("\n"); -+ printf("#endif\n"); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/smp.c um/arch/um/kernel/smp.c ---- orig/arch/um/kernel/smp.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/smp.c 2003-11-15 02:59:25.000000000 -0500 -@@ -0,0 +1,328 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_SMP -+ -+#include "linux/sched.h" -+#include "linux/threads.h" -+#include "linux/interrupt.h" -+#include "asm/smp.h" -+#include "asm/processor.h" -+#include "asm/spinlock.h" -+#include "asm/softirq.h" -+#include "asm/hardirq.h" -+#include "asm/tlb.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "kern.h" -+#include "os.h" -+ -+/* Total count of live CPUs, set by smp_boot_cpus */ -+int smp_num_cpus = 1; -+ -+/* The 'big kernel lock' */ -+spinlock_cacheline_t kernel_flag_cacheline = {SPIN_LOCK_UNLOCKED}; -+ -+/* Per CPU bogomips and other parameters */ -+ -+/* The only piece used here is the ipi pipe, which is set before SMP is -+ * started and never changed. -+ */ -+struct cpuinfo_um cpu_data[NR_CPUS]; -+ -+/* CPU online map, set by smp_boot_cpus */ -+unsigned long cpu_online_map; -+ -+atomic_t global_bh_count; -+ -+/* Set when the idlers are all forked */ -+int smp_threads_ready = 0; -+ -+/* Not used by UML */ -+unsigned char global_irq_holder = 0; -+unsigned volatile long global_irq_lock; -+ -+/* A statistic, can be a little off */ -+static int num_reschedules_sent = 0; -+ -+mmu_gather_t mmu_gathers[NR_CPUS]; -+ -+void smp_send_reschedule(int cpu) -+{ -+ os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); -+ num_reschedules_sent++; -+} -+ -+static void show(char * str) -+{ -+ int cpu = smp_processor_id(); -+ -+ printk(KERN_INFO "\n%s, CPU %d:\n", str, cpu); -+} -+ -+#define MAXCOUNT 100000000 -+ -+static inline void wait_on_bh(void) -+{ -+ int count = MAXCOUNT; -+ do { -+ if (!--count) { -+ show("wait_on_bh"); -+ count = ~0; -+ } -+ /* nothing .. wait for the other bh's to go away */ -+ } while (atomic_read(&global_bh_count) != 0); -+} -+ -+/* -+ * This is called when we want to synchronize with -+ * bottom half handlers. We need to wait until -+ * no other CPU is executing any bottom half handler. -+ * -+ * Don't wait if we're already running in an interrupt -+ * context or are inside a bh handler. -+ */ -+void synchronize_bh(void) -+{ -+ if (atomic_read(&global_bh_count) && !in_interrupt()) -+ wait_on_bh(); -+} -+ -+void smp_send_stop(void) -+{ -+ int i; -+ -+ printk(KERN_INFO "Stopping all CPUs..."); -+ for(i = 0; i < ncpus; i++){ -+ if(i == current->processor) -+ continue; -+ os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); -+ } -+ printk("done\n"); -+} -+ -+ -+static atomic_t smp_commenced = ATOMIC_INIT(0); -+static volatile unsigned long smp_callin_map = 0; -+ -+void smp_commence(void) -+{ -+ printk("All CPUs are go!\n"); -+ -+ wmb(); -+ atomic_set(&smp_commenced, 1); -+} -+ -+static int idle_proc(void *unused) -+{ -+ int cpu, err; -+ -+ set_current(current); -+ del_from_runqueue(current); -+ unhash_process(current); -+ -+ cpu = current->processor; -+ err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); -+ if(err < 0) -+ panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); -+ -+ activate_ipi(cpu_data[cpu].ipi_pipe[0], -+ current->thread.mode.tt.extern_pid); -+ -+ wmb(); -+ if (test_and_set_bit(current->processor, &smp_callin_map)) { -+ printk("huh, CPU#%d already present??\n", current->processor); -+ BUG(); -+ } -+ -+ while (!atomic_read(&smp_commenced)) -+ cpu_relax(); -+ -+ init_idle(); -+ cpu_idle(); -+ return(0); -+} -+ -+static int idle_thread(int (*fn)(void *), int cpu) -+{ -+ struct task_struct *new_task; -+ int pid; -+ unsigned char c; -+ -+ current->thread.request.u.thread.proc = fn; -+ current->thread.request.u.thread.arg = NULL; -+ pid = do_fork(CLONE_VM | CLONE_PID, 0, NULL, 0); -+ if(pid < 0) -+ panic("do_fork failed in idle_thread"); -+ new_task = get_task(pid, 1); -+ -+ cpu_tasks[cpu] = ((struct cpu_task) -+ { .pid = new_task->thread.mode.tt.extern_pid, -+ .task = new_task } ); -+ init_tasks[cpu] = new_task; -+ new_task->processor = cpu; -+ new_task->cpus_allowed = 1 << cpu; -+ new_task->cpus_runnable = new_task->cpus_allowed; -+ CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c, -+ sizeof(c)), -+ ({ panic("skas mode doesn't support SMP"); })); -+ return(new_task->thread.mode.tt.extern_pid); -+} -+ -+void smp_boot_cpus(void) -+{ -+ int err; -+ -+ set_bit(0, &cpu_online_map); -+ set_bit(0, &smp_callin_map); -+ -+ err = os_pipe(cpu_data[0].ipi_pipe, 1, 1); -+ if(err < 0) -+ panic("CPU#0 failed to create IPI pipe, err = %d", -err); -+ -+ activate_ipi(cpu_data[0].ipi_pipe[0], -+ current->thread.mode.tt.extern_pid); -+ -+ if(ncpus < 1){ -+ printk(KERN_INFO "ncpus set to 1\n"); -+ ncpus = 1; -+ } -+ else if(ncpus > NR_CPUS){ -+ printk(KERN_INFO -+ "ncpus can't be greater than NR_CPUS, set to %d\n", -+ NR_CPUS); -+ ncpus = NR_CPUS; -+ } -+ -+ if(ncpus > 1){ -+ int i, pid; -+ -+ printk(KERN_INFO "Starting up other processors:\n"); -+ for(i=1;i<ncpus;i++){ -+ int waittime; -+ -+ /* Do this early, for hard_smp_processor_id() */ -+ cpu_tasks[i].pid = -1; -+ set_bit(i, &cpu_online_map); -+ smp_num_cpus++; -+ -+ pid = idle_thread(idle_proc, i); -+ printk(KERN_INFO "\t#%d - idle thread pid = %d.. ", -+ i, pid); -+ -+ waittime = 200000000; -+ while (waittime-- && !test_bit(i, &smp_callin_map)) -+ cpu_relax(); -+ -+ if (test_bit(i, &smp_callin_map)) -+ printk("online\n"); -+ else { -+ printk("failed\n"); -+ clear_bit(i, &cpu_online_map); -+ } -+ } -+ } -+} -+ -+int setup_profiling_timer(unsigned int multiplier) -+{ -+ printk(KERN_INFO "setup_profiling_timer\n"); -+ return(0); -+} -+ -+void smp_call_function_slave(int cpu); -+ -+void IPI_handler(int cpu) -+{ -+ unsigned char c; -+ int fd; -+ -+ fd = cpu_data[cpu].ipi_pipe[0]; -+ while (os_read_file(fd, &c, 1) == 1) { -+ switch (c) { -+ case 'C': -+ smp_call_function_slave(cpu); -+ break; -+ -+ case 'R': -+ current->need_resched = 1; -+ break; -+ -+ case 'S': -+ printk("CPU#%d stopping\n", cpu); -+ while(1) -+ pause(); -+ break; -+ -+ default: -+ printk("CPU#%d received unknown IPI [%c]!\n", cpu, c); -+ break; -+ } -+ } -+} -+ -+int hard_smp_processor_id(void) -+{ -+ return(pid_to_processor_id(os_getpid())); -+} -+ -+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; -+static atomic_t scf_started; -+static atomic_t scf_finished; -+static void (*func)(void *info); -+static void *info; -+ -+void smp_call_function_slave(int cpu) -+{ -+ atomic_inc(&scf_started); -+ (*func)(info); -+ atomic_inc(&scf_finished); -+} -+ -+int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, -+ int wait) -+{ -+ int cpus = smp_num_cpus - 1; -+ int i; -+ -+ if (!cpus) -+ return 0; -+ -+ spin_lock_bh(&call_lock); -+ atomic_set(&scf_started, 0); -+ atomic_set(&scf_finished, 0); -+ func = _func; -+ info = _info; -+ -+ for (i=0;i<NR_CPUS;i++) -+ if (i != current->processor && test_bit(i, &cpu_online_map)) -+ os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); -+ -+ while (atomic_read(&scf_started) != cpus) -+ barrier(); -+ -+ if (wait) -+ while (atomic_read(&scf_finished) != cpus) -+ barrier(); -+ -+ spin_unlock_bh(&call_lock); -+ return 0; -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/syscall_kern.c um/arch/um/kernel/syscall_kern.c ---- orig/arch/um/kernel/syscall_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/syscall_kern.c 2004-01-10 06:37:46.000000000 -0500 -@@ -0,0 +1,343 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/file.h" -+#include "linux/smp_lock.h" -+#include "linux/mm.h" -+#include "linux/utsname.h" -+#include "linux/msg.h" -+#include "linux/shm.h" -+#include "linux/sys.h" -+#include "linux/unistd.h" -+#include "linux/slab.h" -+#include "linux/utime.h" -+#include "asm/mman.h" -+#include "asm/uaccess.h" -+#include "asm/ipc.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "sysdep/syscalls.h" -+#include "mode_kern.h" -+#include "choose-mode.h" -+ -+/* Unlocked, I don't care if this is a bit off */ -+int nsyscalls = 0; -+ -+long um_mount(char * dev_name, char * dir_name, char * type, -+ unsigned long new_flags, void * data) -+{ -+ if(type == NULL) type = ""; -+ return(sys_mount(dev_name, dir_name, type, new_flags, data)); -+} -+ -+long sys_fork(void) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(SIGCHLD, 0, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+long sys_clone(unsigned long clone_flags, unsigned long newsp) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(clone_flags, newsp, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+long sys_vfork(void) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+/* common code for old and new mmaps */ -+long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, unsigned long fd, -+ unsigned long pgoff) -+{ -+ int error = -EBADF; -+ struct file * file = NULL; -+ -+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); -+ if (!(flags & MAP_ANONYMOUS)) { -+ file = fget(fd); -+ if (!file) -+ goto out; -+ } -+ -+ down_write(&mm->mmap_sem); -+ error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff); -+ up_write(&mm->mmap_sem); -+ -+ if (file) -+ fput(file); -+ out: -+ return error; -+} -+ -+long sys_mmap2(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long pgoff) -+{ -+ return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); -+} -+ -+/* -+ * Perform the select(nd, in, out, ex, tv) and mmap() system -+ * calls. Linux/i386 didn't use to be able to handle more than -+ * 4 system call parameters, so these system calls used a memory -+ * block for parameter passing.. -+ */ -+ -+struct mmap_arg_struct { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset) -+{ -+ int err = -EINVAL; -+ if (offset & ~PAGE_MASK) -+ goto out; -+ -+ err = do_mmap2(current->mm, addr, len, prot, flags, fd, -+ offset >> PAGE_SHIFT); -+ out: -+ return err; -+} -+/* -+ * sys_pipe() is the normal C calling standard for creating -+ * a pipe. It's not the way unix traditionally does this, though. -+ */ -+int sys_pipe(unsigned long * fildes) -+{ -+ int fd[2]; -+ int error; -+ -+ error = do_pipe(fd); -+ if (!error) { -+ if (copy_to_user(fildes, fd, sizeof(fd))) -+ error = -EFAULT; -+ } -+ return error; -+} -+ -+int sys_pause(void) -+{ -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ return -ERESTARTNOHAND; -+} -+ -+int sys_sigaction(int sig, const struct old_sigaction *act, -+ struct old_sigaction *oact) -+{ -+ struct k_sigaction new_ka, old_ka; -+ int ret; -+ -+ if (act) { -+ old_sigset_t mask; -+ if (verify_area(VERIFY_READ, act, sizeof(*act)) || -+ __get_user(new_ka.sa.sa_handler, &act->sa_handler) || -+ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) -+ return -EFAULT; -+ __get_user(new_ka.sa.sa_flags, &act->sa_flags); -+ __get_user(mask, &act->sa_mask); -+ siginitset(&new_ka.sa.sa_mask, mask); -+ } -+ -+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); -+ -+ if (!ret && oact) { -+ if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || -+ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || -+ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) -+ return -EFAULT; -+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags); -+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); -+ } -+ -+ return ret; -+} -+ -+/* -+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.. -+ * -+ * This is really horribly ugly. -+ */ -+int sys_ipc (uint call, int first, int second, -+ int third, void *ptr, long fifth) -+{ -+ int version, ret; -+ -+ version = call >> 16; /* hack for backward compatibility */ -+ call &= 0xffff; -+ -+ switch (call) { -+ case SEMOP: -+ return sys_semop (first, (struct sembuf *)ptr, second); -+ case SEMGET: -+ return sys_semget (first, second, third); -+ case SEMCTL: { -+ union semun fourth; -+ if (!ptr) -+ return -EINVAL; -+ if (get_user(fourth.__pad, (void **) ptr)) -+ return -EFAULT; -+ return sys_semctl (first, second, third, fourth); -+ } -+ -+ case MSGSND: -+ return sys_msgsnd (first, (struct msgbuf *) ptr, -+ second, third); -+ case MSGRCV: -+ switch (version) { -+ case 0: { -+ struct ipc_kludge tmp; -+ if (!ptr) -+ return -EINVAL; -+ -+ if (copy_from_user(&tmp, -+ (struct ipc_kludge *) ptr, -+ sizeof (tmp))) -+ return -EFAULT; -+ return sys_msgrcv (first, tmp.msgp, second, -+ tmp.msgtyp, third); -+ } -+ default: -+ panic("msgrcv with version != 0"); -+ return sys_msgrcv (first, -+ (struct msgbuf *) ptr, -+ second, fifth, third); -+ } -+ case MSGGET: -+ return sys_msgget ((key_t) first, second); -+ case MSGCTL: -+ return sys_msgctl (first, second, (struct msqid_ds *) ptr); -+ -+ case SHMAT: -+ switch (version) { -+ default: { -+ ulong raddr; -+ ret = sys_shmat (first, (char *) ptr, second, &raddr); -+ if (ret) -+ return ret; -+ return put_user (raddr, (ulong *) third); -+ } -+ case 1: /* iBCS2 emulator entry point */ -+ if (!segment_eq(get_fs(), get_ds())) -+ return -EINVAL; -+ return sys_shmat (first, (char *) ptr, second, (ulong *) third); -+ } -+ case SHMDT: -+ return sys_shmdt ((char *)ptr); -+ case SHMGET: -+ return sys_shmget (first, second, third); -+ case SHMCTL: -+ return sys_shmctl (first, second, -+ (struct shmid_ds *) ptr); -+ default: -+ return -EINVAL; -+ } -+} -+ -+int sys_uname(struct old_utsname * name) -+{ -+ int err; -+ if (!name) -+ return -EFAULT; -+ down_read(&uts_sem); -+ err=copy_to_user(name, &system_utsname, sizeof (*name)); -+ up_read(&uts_sem); -+ return err?-EFAULT:0; -+} -+ -+int sys_olduname(struct oldold_utsname * name) -+{ -+ int error; -+ -+ if (!name) -+ return -EFAULT; -+ if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) -+ return -EFAULT; -+ -+ down_read(&uts_sem); -+ -+ error = __copy_to_user(&name->sysname,&system_utsname.sysname, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->sysname+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->nodename,&system_utsname.nodename, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->nodename+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->release,&system_utsname.release, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->release+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->version,&system_utsname.version, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->version+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->machine,&system_utsname.machine, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->machine+__OLD_UTS_LEN); -+ -+ up_read(&uts_sem); -+ -+ error = error ? -EFAULT : 0; -+ -+ return error; -+} -+ -+int sys_sigaltstack(const stack_t *uss, stack_t *uoss) -+{ -+ return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); -+} -+ -+long execute_syscall(void *r) -+{ -+ return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r)); -+} -+ -+spinlock_t syscall_lock = SPIN_LOCK_UNLOCKED; -+ -+static int syscall_index = 0; -+ -+int next_syscall_index(int limit) -+{ -+ int ret; -+ -+ spin_lock(&syscall_lock); -+ ret = syscall_index; -+ if(++syscall_index == limit) -+ syscall_index = 0; -+ spin_unlock(&syscall_lock); -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sys_call_table.c um/arch/um/kernel/sys_call_table.c ---- orig/arch/um/kernel/sys_call_table.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/sys_call_table.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,496 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/unistd.h" -+#include "linux/version.h" -+#include "linux/sys.h" -+#include "asm/signal.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_exit; -+extern syscall_handler_t sys_fork; -+extern syscall_handler_t sys_creat; -+extern syscall_handler_t sys_link; -+extern syscall_handler_t sys_unlink; -+extern syscall_handler_t sys_chdir; -+extern syscall_handler_t sys_mknod; -+extern syscall_handler_t sys_chmod; -+extern syscall_handler_t sys_lchown16; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_stat; -+extern syscall_handler_t sys_getpid; -+extern syscall_handler_t sys_oldumount; -+extern syscall_handler_t sys_setuid16; -+extern syscall_handler_t sys_getuid16; -+extern syscall_handler_t sys_ptrace; -+extern syscall_handler_t sys_alarm; -+extern syscall_handler_t sys_fstat; -+extern syscall_handler_t sys_pause; -+extern syscall_handler_t sys_utime; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_access; -+extern syscall_handler_t sys_nice; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_sync; -+extern syscall_handler_t sys_kill; -+extern syscall_handler_t sys_rename; -+extern syscall_handler_t sys_mkdir; -+extern syscall_handler_t sys_rmdir; -+extern syscall_handler_t sys_pipe; -+extern syscall_handler_t sys_times; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_brk; -+extern syscall_handler_t sys_setgid16; -+extern syscall_handler_t sys_getgid16; -+extern syscall_handler_t sys_signal; -+extern syscall_handler_t sys_geteuid16; -+extern syscall_handler_t sys_getegid16; -+extern syscall_handler_t sys_acct; -+extern syscall_handler_t sys_umount; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ioctl; -+extern syscall_handler_t sys_fcntl; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_setpgid; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_olduname; -+extern syscall_handler_t sys_umask; -+extern syscall_handler_t sys_chroot; -+extern syscall_handler_t sys_ustat; -+extern syscall_handler_t sys_dup2; -+extern syscall_handler_t sys_getppid; -+extern syscall_handler_t sys_getpgrp; -+extern syscall_handler_t sys_sigaction; -+extern syscall_handler_t sys_sgetmask; -+extern syscall_handler_t sys_ssetmask; -+extern syscall_handler_t sys_setreuid16; -+extern syscall_handler_t sys_setregid16; -+extern syscall_handler_t sys_sigsuspend; -+extern syscall_handler_t sys_sigpending; -+extern syscall_handler_t sys_sethostname; -+extern syscall_handler_t sys_setrlimit; -+extern syscall_handler_t sys_old_getrlimit; -+extern syscall_handler_t sys_getrusage; -+extern syscall_handler_t sys_gettimeofday; -+extern syscall_handler_t sys_settimeofday; -+extern syscall_handler_t sys_getgroups16; -+extern syscall_handler_t sys_setgroups16; -+extern syscall_handler_t sys_symlink; -+extern syscall_handler_t sys_lstat; -+extern syscall_handler_t sys_readlink; -+extern syscall_handler_t sys_uselib; -+extern syscall_handler_t sys_swapon; -+extern syscall_handler_t sys_reboot; -+extern syscall_handler_t old_readdir; -+extern syscall_handler_t sys_munmap; -+extern syscall_handler_t sys_truncate; -+extern syscall_handler_t sys_ftruncate; -+extern syscall_handler_t sys_fchmod; -+extern syscall_handler_t sys_fchown16; -+extern syscall_handler_t sys_getpriority; -+extern syscall_handler_t sys_setpriority; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_statfs; -+extern syscall_handler_t sys_fstatfs; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_socketcall; -+extern syscall_handler_t sys_syslog; -+extern syscall_handler_t sys_setitimer; -+extern syscall_handler_t sys_getitimer; -+extern syscall_handler_t sys_newstat; -+extern syscall_handler_t sys_newlstat; -+extern syscall_handler_t sys_newfstat; -+extern syscall_handler_t sys_uname; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_vhangup; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_swapoff; -+extern syscall_handler_t sys_sysinfo; -+extern syscall_handler_t sys_ipc; -+extern syscall_handler_t sys_fsync; -+extern syscall_handler_t sys_sigreturn; -+extern syscall_handler_t sys_rt_sigreturn; -+extern syscall_handler_t sys_clone; -+extern syscall_handler_t sys_setdomainname; -+extern syscall_handler_t sys_newuname; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_adjtimex; -+extern syscall_handler_t sys_mprotect; -+extern syscall_handler_t sys_sigprocmask; -+extern syscall_handler_t sys_create_module; -+extern syscall_handler_t sys_init_module; -+extern syscall_handler_t sys_delete_module; -+extern syscall_handler_t sys_get_kernel_syms; -+extern syscall_handler_t sys_quotactl; -+extern syscall_handler_t sys_getpgid; -+extern syscall_handler_t sys_fchdir; -+extern syscall_handler_t sys_bdflush; -+extern syscall_handler_t sys_sysfs; -+extern syscall_handler_t sys_personality; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_setfsuid16; -+extern syscall_handler_t sys_setfsgid16; -+extern syscall_handler_t sys_llseek; -+extern syscall_handler_t sys_getdents; -+extern syscall_handler_t sys_flock; -+extern syscall_handler_t sys_msync; -+extern syscall_handler_t sys_readv; -+extern syscall_handler_t sys_writev; -+extern syscall_handler_t sys_getsid; -+extern syscall_handler_t sys_fdatasync; -+extern syscall_handler_t sys_sysctl; -+extern syscall_handler_t sys_mlock; -+extern syscall_handler_t sys_munlock; -+extern syscall_handler_t sys_mlockall; -+extern syscall_handler_t sys_munlockall; -+extern syscall_handler_t sys_sched_setparam; -+extern syscall_handler_t sys_sched_getparam; -+extern syscall_handler_t sys_sched_setscheduler; -+extern syscall_handler_t sys_sched_getscheduler; -+extern syscall_handler_t sys_sched_get_priority_max; -+extern syscall_handler_t sys_sched_get_priority_min; -+extern syscall_handler_t sys_sched_rr_get_interval; -+extern syscall_handler_t sys_nanosleep; -+extern syscall_handler_t sys_mremap; -+extern syscall_handler_t sys_setresuid16; -+extern syscall_handler_t sys_getresuid16; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_query_module; -+extern syscall_handler_t sys_poll; -+extern syscall_handler_t sys_nfsservctl; -+extern syscall_handler_t sys_setresgid16; -+extern syscall_handler_t sys_getresgid16; -+extern syscall_handler_t sys_prctl; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_rt_sigaction; -+extern syscall_handler_t sys_rt_sigprocmask; -+extern syscall_handler_t sys_rt_sigpending; -+extern syscall_handler_t sys_rt_sigtimedwait; -+extern syscall_handler_t sys_rt_sigqueueinfo; -+extern syscall_handler_t sys_rt_sigsuspend; -+extern syscall_handler_t sys_pread; -+extern syscall_handler_t sys_pwrite; -+extern syscall_handler_t sys_chown16; -+extern syscall_handler_t sys_getcwd; -+extern syscall_handler_t sys_capget; -+extern syscall_handler_t sys_capset; -+extern syscall_handler_t sys_sigaltstack; -+extern syscall_handler_t sys_sendfile; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_vfork; -+extern syscall_handler_t sys_getrlimit; -+extern syscall_handler_t sys_mmap2; -+extern syscall_handler_t sys_truncate64; -+extern syscall_handler_t sys_ftruncate64; -+extern syscall_handler_t sys_stat64; -+extern syscall_handler_t sys_lstat64; -+extern syscall_handler_t sys_fstat64; -+extern syscall_handler_t sys_lchown; -+extern syscall_handler_t sys_getuid; -+extern syscall_handler_t sys_getgid; -+extern syscall_handler_t sys_geteuid; -+extern syscall_handler_t sys_getegid; -+extern syscall_handler_t sys_setreuid; -+extern syscall_handler_t sys_setregid; -+extern syscall_handler_t sys_getgroups; -+extern syscall_handler_t sys_setgroups; -+extern syscall_handler_t sys_fchown; -+extern syscall_handler_t sys_setresuid; -+extern syscall_handler_t sys_getresuid; -+extern syscall_handler_t sys_setresgid; -+extern syscall_handler_t sys_getresgid; -+extern syscall_handler_t sys_chown; -+extern syscall_handler_t sys_setuid; -+extern syscall_handler_t sys_setgid; -+extern syscall_handler_t sys_setfsuid; -+extern syscall_handler_t sys_setfsgid; -+extern syscall_handler_t sys_pivot_root; -+extern syscall_handler_t sys_mincore; -+extern syscall_handler_t sys_madvise; -+extern syscall_handler_t sys_fcntl64; -+extern syscall_handler_t sys_getdents64; -+extern syscall_handler_t sys_gettid; -+extern syscall_handler_t sys_readahead; -+extern syscall_handler_t sys_tkill; -+extern syscall_handler_t sys_setxattr; -+extern syscall_handler_t sys_lsetxattr; -+extern syscall_handler_t sys_fsetxattr; -+extern syscall_handler_t sys_getxattr; -+extern syscall_handler_t sys_lgetxattr; -+extern syscall_handler_t sys_fgetxattr; -+extern syscall_handler_t sys_listxattr; -+extern syscall_handler_t sys_llistxattr; -+extern syscall_handler_t sys_flistxattr; -+extern syscall_handler_t sys_removexattr; -+extern syscall_handler_t sys_lremovexattr; -+extern syscall_handler_t sys_fremovexattr; -+extern syscall_handler_t sys_sendfile64; -+ -+extern syscall_handler_t um_mount; -+extern syscall_handler_t um_time; -+extern syscall_handler_t um_stime; -+ -+#define LAST_GENERIC_SYSCALL __NR_exit_group -+ -+#if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL -+#define LAST_SYSCALL LAST_GENERIC_SYSCALL -+#else -+#define LAST_SYSCALL LAST_ARCH_SYSCALL -+#endif -+ -+syscall_handler_t *sys_call_table[] = { -+ [ 0 ] = sys_ni_syscall, -+ [ __NR_exit ] = sys_exit, -+ [ __NR_fork ] = sys_fork, -+ [ __NR_read ] = (syscall_handler_t *) sys_read, -+ [ __NR_write ] = (syscall_handler_t *) sys_write, -+ -+ /* These three are declared differently in asm/unistd.h */ -+ [ __NR_open ] = (syscall_handler_t *) sys_open, -+ [ __NR_close ] = (syscall_handler_t *) sys_close, -+ [ __NR_waitpid ] = (syscall_handler_t *) sys_waitpid, -+ [ __NR_creat ] = sys_creat, -+ [ __NR_link ] = sys_link, -+ [ __NR_unlink ] = sys_unlink, -+ -+ /* declared differently in kern_util.h */ -+ [ __NR_execve ] = (syscall_handler_t *) sys_execve, -+ [ __NR_chdir ] = sys_chdir, -+ [ __NR_time ] = um_time, -+ [ __NR_mknod ] = sys_mknod, -+ [ __NR_chmod ] = sys_chmod, -+ [ __NR_lchown ] = sys_lchown16, -+ [ __NR_break ] = sys_ni_syscall, -+ [ __NR_oldstat ] = sys_stat, -+ [ __NR_lseek ] = (syscall_handler_t *) sys_lseek, -+ [ __NR_getpid ] = sys_getpid, -+ [ __NR_mount ] = um_mount, -+ [ __NR_umount ] = sys_oldumount, -+ [ __NR_setuid ] = sys_setuid16, -+ [ __NR_getuid ] = sys_getuid16, -+ [ __NR_stime ] = um_stime, -+ [ __NR_ptrace ] = sys_ptrace, -+ [ __NR_alarm ] = sys_alarm, -+ [ __NR_oldfstat ] = sys_fstat, -+ [ __NR_pause ] = sys_pause, -+ [ __NR_utime ] = sys_utime, -+ [ __NR_stty ] = sys_ni_syscall, -+ [ __NR_gtty ] = sys_ni_syscall, -+ [ __NR_access ] = sys_access, -+ [ __NR_nice ] = sys_nice, -+ [ __NR_ftime ] = sys_ni_syscall, -+ [ __NR_sync ] = sys_sync, -+ [ __NR_kill ] = sys_kill, -+ [ __NR_rename ] = sys_rename, -+ [ __NR_mkdir ] = sys_mkdir, -+ [ __NR_rmdir ] = sys_rmdir, -+ -+ /* Declared differently in asm/unistd.h */ -+ [ __NR_dup ] = (syscall_handler_t *) sys_dup, -+ [ __NR_pipe ] = sys_pipe, -+ [ __NR_times ] = sys_times, -+ [ __NR_prof ] = sys_ni_syscall, -+ [ __NR_brk ] = sys_brk, -+ [ __NR_setgid ] = sys_setgid16, -+ [ __NR_getgid ] = sys_getgid16, -+ [ __NR_signal ] = sys_signal, -+ [ __NR_geteuid ] = sys_geteuid16, -+ [ __NR_getegid ] = sys_getegid16, -+ [ __NR_acct ] = sys_acct, -+ [ __NR_umount2 ] = sys_umount, -+ [ __NR_lock ] = sys_ni_syscall, -+ [ __NR_ioctl ] = sys_ioctl, -+ [ __NR_fcntl ] = sys_fcntl, -+ [ __NR_mpx ] = sys_ni_syscall, -+ [ __NR_setpgid ] = sys_setpgid, -+ [ __NR_ulimit ] = sys_ni_syscall, -+ [ __NR_oldolduname ] = sys_olduname, -+ [ __NR_umask ] = sys_umask, -+ [ __NR_chroot ] = sys_chroot, -+ [ __NR_ustat ] = sys_ustat, -+ [ __NR_dup2 ] = sys_dup2, -+ [ __NR_getppid ] = sys_getppid, -+ [ __NR_getpgrp ] = sys_getpgrp, -+ [ __NR_setsid ] = (syscall_handler_t *) sys_setsid, -+ [ __NR_sigaction ] = sys_sigaction, -+ [ __NR_sgetmask ] = sys_sgetmask, -+ [ __NR_ssetmask ] = sys_ssetmask, -+ [ __NR_setreuid ] = sys_setreuid16, -+ [ __NR_setregid ] = sys_setregid16, -+ [ __NR_sigsuspend ] = sys_sigsuspend, -+ [ __NR_sigpending ] = sys_sigpending, -+ [ __NR_sethostname ] = sys_sethostname, -+ [ __NR_setrlimit ] = sys_setrlimit, -+ [ __NR_getrlimit ] = sys_old_getrlimit, -+ [ __NR_getrusage ] = sys_getrusage, -+ [ __NR_gettimeofday ] = sys_gettimeofday, -+ [ __NR_settimeofday ] = sys_settimeofday, -+ [ __NR_getgroups ] = sys_getgroups16, -+ [ __NR_setgroups ] = sys_setgroups16, -+ [ __NR_symlink ] = sys_symlink, -+ [ __NR_oldlstat ] = sys_lstat, -+ [ __NR_readlink ] = sys_readlink, -+ [ __NR_uselib ] = sys_uselib, -+ [ __NR_swapon ] = sys_swapon, -+ [ __NR_reboot ] = sys_reboot, -+ [ __NR_readdir ] = old_readdir, -+ [ __NR_munmap ] = sys_munmap, -+ [ __NR_truncate ] = sys_truncate, -+ [ __NR_ftruncate ] = sys_ftruncate, -+ [ __NR_fchmod ] = sys_fchmod, -+ [ __NR_fchown ] = sys_fchown16, -+ [ __NR_getpriority ] = sys_getpriority, -+ [ __NR_setpriority ] = sys_setpriority, -+ [ __NR_profil ] = sys_ni_syscall, -+ [ __NR_statfs ] = sys_statfs, -+ [ __NR_fstatfs ] = sys_fstatfs, -+ [ __NR_ioperm ] = sys_ni_syscall, -+ [ __NR_socketcall ] = sys_socketcall, -+ [ __NR_syslog ] = sys_syslog, -+ [ __NR_setitimer ] = sys_setitimer, -+ [ __NR_getitimer ] = sys_getitimer, -+ [ __NR_stat ] = sys_newstat, -+ [ __NR_lstat ] = sys_newlstat, -+ [ __NR_fstat ] = sys_newfstat, -+ [ __NR_olduname ] = sys_uname, -+ [ __NR_iopl ] = sys_ni_syscall, -+ [ __NR_vhangup ] = sys_vhangup, -+ [ __NR_idle ] = sys_ni_syscall, -+ [ __NR_wait4 ] = (syscall_handler_t *) sys_wait4, -+ [ __NR_swapoff ] = sys_swapoff, -+ [ __NR_sysinfo ] = sys_sysinfo, -+ [ __NR_ipc ] = sys_ipc, -+ [ __NR_fsync ] = sys_fsync, -+ [ __NR_sigreturn ] = sys_sigreturn, -+ [ __NR_clone ] = sys_clone, -+ [ __NR_setdomainname ] = sys_setdomainname, -+ [ __NR_uname ] = sys_newuname, -+ [ __NR_adjtimex ] = sys_adjtimex, -+ [ __NR_mprotect ] = sys_mprotect, -+ [ __NR_sigprocmask ] = sys_sigprocmask, -+ [ __NR_create_module ] = sys_create_module, -+ [ __NR_init_module ] = sys_init_module, -+ [ __NR_delete_module ] = sys_delete_module, -+ [ __NR_get_kernel_syms ] = sys_get_kernel_syms, -+ [ __NR_quotactl ] = sys_quotactl, -+ [ __NR_getpgid ] = sys_getpgid, -+ [ __NR_fchdir ] = sys_fchdir, -+ [ __NR_bdflush ] = sys_bdflush, -+ [ __NR_sysfs ] = sys_sysfs, -+ [ __NR_personality ] = sys_personality, -+ [ __NR_afs_syscall ] = sys_ni_syscall, -+ [ __NR_setfsuid ] = sys_setfsuid16, -+ [ __NR_setfsgid ] = sys_setfsgid16, -+ [ __NR__llseek ] = sys_llseek, -+ [ __NR_getdents ] = sys_getdents, -+ [ __NR__newselect ] = (syscall_handler_t *) sys_select, -+ [ __NR_flock ] = sys_flock, -+ [ __NR_msync ] = sys_msync, -+ [ __NR_readv ] = sys_readv, -+ [ __NR_writev ] = sys_writev, -+ [ __NR_getsid ] = sys_getsid, -+ [ __NR_fdatasync ] = sys_fdatasync, -+ [ __NR__sysctl ] = sys_sysctl, -+ [ __NR_mlock ] = sys_mlock, -+ [ __NR_munlock ] = sys_munlock, -+ [ __NR_mlockall ] = sys_mlockall, -+ [ __NR_munlockall ] = sys_munlockall, -+ [ __NR_sched_setparam ] = sys_sched_setparam, -+ [ __NR_sched_getparam ] = sys_sched_getparam, -+ [ __NR_sched_setscheduler ] = sys_sched_setscheduler, -+ [ __NR_sched_getscheduler ] = sys_sched_getscheduler, -+ [ __NR_sched_yield ] = (syscall_handler_t *) yield, -+ [ __NR_sched_get_priority_max ] = sys_sched_get_priority_max, -+ [ __NR_sched_get_priority_min ] = sys_sched_get_priority_min, -+ [ __NR_sched_rr_get_interval ] = sys_sched_rr_get_interval, -+ [ __NR_nanosleep ] = sys_nanosleep, -+ [ __NR_mremap ] = sys_mremap, -+ [ __NR_setresuid ] = sys_setresuid16, -+ [ __NR_getresuid ] = sys_getresuid16, -+ [ __NR_vm86 ] = sys_ni_syscall, -+ [ __NR_query_module ] = sys_query_module, -+ [ __NR_poll ] = sys_poll, -+ [ __NR_nfsservctl ] = sys_nfsservctl, -+ [ __NR_setresgid ] = sys_setresgid16, -+ [ __NR_getresgid ] = sys_getresgid16, -+ [ __NR_prctl ] = sys_prctl, -+ [ __NR_rt_sigreturn ] = sys_rt_sigreturn, -+ [ __NR_rt_sigaction ] = sys_rt_sigaction, -+ [ __NR_rt_sigprocmask ] = sys_rt_sigprocmask, -+ [ __NR_rt_sigpending ] = sys_rt_sigpending, -+ [ __NR_rt_sigtimedwait ] = sys_rt_sigtimedwait, -+ [ __NR_rt_sigqueueinfo ] = sys_rt_sigqueueinfo, -+ [ __NR_rt_sigsuspend ] = sys_rt_sigsuspend, -+ [ __NR_pread ] = sys_pread, -+ [ __NR_pwrite ] = sys_pwrite, -+ [ __NR_chown ] = sys_chown16, -+ [ __NR_getcwd ] = sys_getcwd, -+ [ __NR_capget ] = sys_capget, -+ [ __NR_capset ] = sys_capset, -+ [ __NR_sigaltstack ] = sys_sigaltstack, -+ [ __NR_sendfile ] = sys_sendfile, -+ [ __NR_getpmsg ] = sys_ni_syscall, -+ [ __NR_putpmsg ] = sys_ni_syscall, -+ [ __NR_vfork ] = sys_vfork, -+ [ __NR_ugetrlimit ] = sys_getrlimit, -+ [ __NR_mmap2 ] = sys_mmap2, -+ [ __NR_truncate64 ] = sys_truncate64, -+ [ __NR_ftruncate64 ] = sys_ftruncate64, -+ [ __NR_stat64 ] = sys_stat64, -+ [ __NR_lstat64 ] = sys_lstat64, -+ [ __NR_fstat64 ] = sys_fstat64, -+ [ __NR_fcntl64 ] = sys_fcntl64, -+ [ __NR_getdents64 ] = sys_getdents64, -+ [ __NR_security ] = sys_ni_syscall, -+ [ __NR_gettid ] = sys_gettid, -+ [ __NR_readahead ] = sys_readahead, -+ [ __NR_setxattr ] = sys_setxattr, -+ [ __NR_lsetxattr ] = sys_lsetxattr, -+ [ __NR_fsetxattr ] = sys_fsetxattr, -+ [ __NR_getxattr ] = sys_getxattr, -+ [ __NR_lgetxattr ] = sys_lgetxattr, -+ [ __NR_fgetxattr ] = sys_fgetxattr, -+ [ __NR_listxattr ] = sys_listxattr, -+ [ __NR_llistxattr ] = sys_llistxattr, -+ [ __NR_flistxattr ] = sys_flistxattr, -+ [ __NR_removexattr ] = sys_removexattr, -+ [ __NR_lremovexattr ] = sys_lremovexattr, -+ [ __NR_fremovexattr ] = sys_fremovexattr, -+ [ __NR_tkill ] = sys_tkill, -+ [ __NR_sendfile64 ] = sys_sendfile64, -+ [ __NR_futex ] = sys_ni_syscall, -+ [ __NR_sched_setaffinity ] = sys_ni_syscall, -+ [ __NR_sched_getaffinity ] = sys_ni_syscall, -+ [ __NR_set_thread_area ] = sys_ni_syscall, -+ [ __NR_get_thread_area ] = sys_ni_syscall, -+ [ __NR_io_setup ] = sys_ni_syscall, -+ [ __NR_io_destroy ] = sys_ni_syscall, -+ [ __NR_io_getevents ] = sys_ni_syscall, -+ [ __NR_io_submit ] = sys_ni_syscall, -+ [ __NR_io_cancel ] = sys_ni_syscall, -+ [ __NR_alloc_hugepages ] = sys_ni_syscall, -+ [ __NR_free_hugepages ] = sys_ni_syscall, -+ [ __NR_exit_group ] = sys_ni_syscall, -+ -+ ARCH_SYSCALLS -+ [ LAST_SYSCALL + 1 ... NR_syscalls ] = -+ (syscall_handler_t *) sys_ni_syscall -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/syscall_user.c um/arch/um/kernel/syscall_user.c ---- orig/arch/um/kernel/syscall_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/syscall_user.c 2003-12-22 01:32:48.000000000 -0500 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <sys/time.h> -+#include "kern_util.h" -+#include "syscall_user.h" -+ -+struct { -+ int syscall; -+ int pid; -+ int result; -+ struct timeval start; -+ struct timeval end; -+} syscall_record[1024]; -+ -+int record_syscall_start(int syscall) -+{ -+ int max, index; -+ -+ max = sizeof(syscall_record)/sizeof(syscall_record[0]); -+ index = next_syscall_index(max); -+ -+ syscall_record[index].syscall = syscall; -+ syscall_record[index].pid = current_pid(); -+ syscall_record[index].result = 0xdeadbeef; -+ gettimeofday(&syscall_record[index].start, NULL); -+ return(index); -+} -+ -+void record_syscall_end(int index, int result) -+{ -+ syscall_record[index].result = result; -+ gettimeofday(&syscall_record[index].end, NULL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sysrq.c um/arch/um/kernel/sysrq.c ---- orig/arch/um/kernel/sysrq.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/sysrq.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/kernel.h" -+#include "linux/module.h" -+#include "asm/page.h" -+#include "asm/processor.h" -+#include "sysrq.h" -+#include "user_util.h" -+ -+ /* -+ * If the address is either in the .text section of the -+ * kernel, or in the vmalloc'ed module regions, it *may* -+ * be the address of a calling routine -+ */ -+ -+#ifdef CONFIG_MODULES -+ -+extern struct module *module_list; -+extern struct module kernel_module; -+ -+static inline int kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+ struct module *mod; -+ -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext) -+ return 1; -+ -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+ -+ return retval; -+} -+ -+#else -+ -+static inline int kernel_text_address(unsigned long addr) -+{ -+ return (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext); -+} -+ -+#endif -+ -+void show_trace(unsigned long * stack) -+{ -+ int i; -+ unsigned long addr; -+ -+ if (!stack) -+ stack = (unsigned long*) &stack; -+ -+ printk("Call Trace: "); -+ i = 1; -+ while (((long) stack & (THREAD_SIZE-1)) != 0) { -+ addr = *stack++; -+ if (kernel_text_address(addr)) { -+ if (i && ((i % 6) == 0)) -+ printk("\n "); -+ printk("[<%08lx>] ", addr); -+ i++; -+ } -+ } -+ printk("\n"); -+} -+ -+void show_trace_task(struct task_struct *tsk) -+{ -+ unsigned long esp = PT_REGS_SP(&tsk->thread.regs); -+ -+ /* User space on another CPU? */ -+ if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ return; -+ show_trace((unsigned long *)esp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tempfile.c um/arch/um/kernel/tempfile.c ---- orig/arch/um/kernel/tempfile.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tempfile.c 2003-12-22 01:33:38.000000000 -0500 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/param.h> -+#include "init.h" -+ -+/* Modified from create_mem_file and start_debugger */ -+static char *tempdir = NULL; -+ -+static void __init find_tempdir(void) -+{ -+ char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; -+ int i; -+ char *dir = NULL; -+ -+ if(tempdir != NULL) return; /* We've already been called */ -+ for(i = 0; dirs[i]; i++){ -+ dir = getenv(dirs[i]); -+ if((dir != NULL) && (*dir != '\0')) -+ break; -+ } -+ if((dir == NULL) || (*dir == '\0')) -+ dir = "/tmp"; -+ -+ tempdir = malloc(strlen(dir) + 2); -+ if(tempdir == NULL){ -+ fprintf(stderr, "Failed to malloc tempdir, " -+ "errno = %d\n", errno); -+ return; -+ } -+ strcpy(tempdir, dir); -+ strcat(tempdir, "/"); -+} -+ -+int make_tempfile(const char *template, char **out_tempname, int do_unlink) -+{ -+ char tempname[MAXPATHLEN]; -+ int fd; -+ -+ find_tempdir(); -+ if (*template != '/') -+ strcpy(tempname, tempdir); -+ else -+ *tempname = 0; -+ strcat(tempname, template); -+ fd = mkstemp(tempname); -+ if(fd < 0){ -+ fprintf(stderr, "open - cannot create %s: %s\n", tempname, -+ strerror(errno)); -+ return -1; -+ } -+ if(do_unlink && (unlink(tempname) < 0)){ -+ perror("unlink"); -+ return -1; -+ } -+ if(out_tempname){ -+ *out_tempname = strdup(tempname); -+ if(*out_tempname == NULL){ -+ perror("strdup"); -+ return -1; -+ } -+ } -+ return(fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/time.c um/arch/um/kernel/time.c ---- orig/arch/um/kernel/time.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/time.c 2004-02-12 07:09:48.000000000 -0500 -@@ -0,0 +1,172 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <time.h> -+#include <sys/time.h> -+#include <signal.h> -+#include <errno.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_user.h" -+#include "time_user.h" -+ -+extern struct timeval xtime; -+ -+struct timeval local_offset = { 0, 0 }; -+ -+void timer(void) -+{ -+ gettimeofday(&xtime, NULL); -+ timeradd(&xtime, &local_offset, &xtime); -+} -+ -+void set_interval(int timer_type) -+{ -+ int usec = 1000000/hz(); -+ struct itimerval interval = ((struct itimerval) { { 0, usec }, -+ { 0, usec } }); -+ -+ if(setitimer(timer_type, &interval, NULL) == -1) -+ panic("setitimer failed - errno = %d\n", errno); -+} -+ -+void enable_timer(void) -+{ -+ int usec = 1000000/hz(); -+ struct itimerval enable = ((struct itimerval) { { 0, usec }, -+ { 0, usec }}); -+ if(setitimer(ITIMER_VIRTUAL, &enable, NULL)) -+ printk("enable_timer - setitimer failed, errno = %d\n", -+ errno); -+} -+ -+void switch_timers(int to_real) -+{ -+ struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); -+ struct itimerval enable = ((struct itimerval) { { 0, 1000000/hz() }, -+ { 0, 1000000/hz() }}); -+ int old, new; -+ -+ if(to_real){ -+ old = ITIMER_VIRTUAL; -+ new = ITIMER_REAL; -+ } -+ else { -+ old = ITIMER_REAL; -+ new = ITIMER_VIRTUAL; -+ } -+ -+ if((setitimer(old, &disable, NULL) < 0) || -+ (setitimer(new, &enable, NULL))) -+ printk("switch_timers - setitimer failed, errno = %d\n", -+ errno); -+} -+ -+void idle_timer(void) -+{ -+ if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) -+ panic("Couldn't unset SIGVTALRM handler"); -+ -+ set_handler(SIGALRM, (__sighandler_t) alarm_handler, -+ SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); -+ set_interval(ITIMER_REAL); -+} -+ -+static unsigned long long get_host_hz(void) -+{ -+ char mhzline[16], *end; -+ unsigned long long mhz; -+ int ret, mult, rest, len; -+ -+ ret = cpu_feature("cpu MHz", mhzline, -+ sizeof(mhzline) / sizeof(mhzline[0])); -+ if(!ret) -+ panic ("Could not get host MHZ"); -+ -+ mhz = strtoul(mhzline, &end, 10); -+ -+ /* This business is to parse a floating point number without using -+ * floating types. -+ */ -+ -+ rest = 0; -+ mult = 0; -+ if(*end == '.'){ -+ end++; -+ len = strlen(end); -+ if(len < 6) -+ mult = 6 - len; -+ else if(len > 6) -+ end[6] = '\0'; -+ rest = strtoul(end, NULL, 10); -+ while(mult-- > 0) -+ rest *= 10; -+ } -+ -+ return(1000000 * mhz + rest); -+} -+ -+unsigned long long host_hz = 0; -+ -+void time_init(void) -+{ -+ /* XXX This is to fill xtime with something real - otherwise by the -+ * time /proc is mounted, no timers have fired, and xtime is still 0, -+ * meaning it shows times of Jan 1 1970. The real fix is to figure -+ * out why no timers have happened by then. -+ */ -+ timer(); -+ -+ host_hz = get_host_hz(); -+ if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+void do_gettimeofday(struct timeval *tv) -+{ -+ unsigned long flags; -+ -+ flags = time_lock(); -+ gettimeofday(tv, NULL); -+ timeradd(tv, &local_offset, tv); -+ time_unlock(flags); -+} -+ -+void do_settimeofday(struct timeval *tv) -+{ -+ struct timeval now; -+ unsigned long flags; -+ -+ flags = time_lock(); -+ gettimeofday(&now, NULL); -+ timersub(tv, &now, &local_offset); -+ time_unlock(flags); -+} -+ -+void idle_sleep(int secs) -+{ -+ struct timespec ts; -+ -+ ts.tv_sec = secs; -+ ts.tv_nsec = 0; -+ nanosleep(&ts, NULL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/time_kern.c um/arch/um/kernel/time_kern.c ---- orig/arch/um/kernel/time_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/time_kern.c 2004-02-14 06:51:31.000000000 -0500 -@@ -0,0 +1,205 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/unistd.h" -+#include "linux/stddef.h" -+#include "linux/spinlock.h" -+#include "linux/sched.h" -+#include "linux/interrupt.h" -+#include "linux/init.h" -+#include "linux/delay.h" -+#include "asm/irq.h" -+#include "asm/param.h" -+#include "asm/current.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "time_user.h" -+#include "mode.h" -+ -+extern rwlock_t xtime_lock; -+ -+int hz(void) -+{ -+ return(HZ); -+} -+ -+/* Changed at early boot */ -+int timer_irq_inited = 0; -+ -+/* missed_ticks will be modified after kernel memory has been -+ * write-protected, so this puts it in a section which will be left -+ * write-enabled. -+ */ -+int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS]; -+ -+static int first_tick; -+static unsigned long long prev_tsc; -+static long long delta; /* Deviation per interval */ -+ -+extern unsigned long long host_hz; -+ -+void timer_irq(union uml_pt_regs *regs) -+{ -+ unsigned long long ticks = 0; -+ -+ if(!timer_irq_inited){ -+ /* This is to ensure that ticks don't pile up when -+ * the timer handler is suspended */ -+ first_tick = 0; -+ return; -+ } -+ -+ if(first_tick){ -+#if defined(CONFIG_UML_REAL_TIME_CLOCK) -+ unsigned long long tsc; -+ /* We've had 1 tick */ -+ tsc = time_stamp(); -+ -+ delta += tsc - prev_tsc; -+ prev_tsc = tsc; -+ -+ ticks += (delta * HZ) / host_hz; -+ delta -= (ticks * host_hz) / HZ; -+#else -+ ticks = 1; -+#endif -+ } -+ else { -+ prev_tsc = time_stamp(); -+ first_tick = 1; -+ } -+ -+ while(ticks > 0){ -+ do_IRQ(TIMER_IRQ, regs); -+ ticks--; -+ } -+} -+ -+void boot_timer_handler(int sig) -+{ -+ struct pt_regs regs; -+ -+ CHOOSE_MODE((void) -+ (UPT_SC(®s.regs) = (struct sigcontext *) (&sig + 1)), -+ (void) (regs.regs.skas.is_user = 0)); -+ do_timer(®s); -+} -+ -+void um_timer(int irq, void *dev, struct pt_regs *regs) -+{ -+ do_timer(regs); -+ write_lock(&xtime_lock); -+ vxtime_lock(); -+ timer(); -+ vxtime_unlock(); -+ write_unlock(&xtime_lock); -+} -+ -+long um_time(int * tloc) -+{ -+ struct timeval now; -+ -+ do_gettimeofday(&now); -+ if (tloc) { -+ if (put_user(now.tv_sec,tloc)) -+ now.tv_sec = -EFAULT; -+ } -+ return now.tv_sec; -+} -+ -+long um_stime(int * tptr) -+{ -+ int value; -+ struct timeval new; -+ -+ if (get_user(value, tptr)) -+ return -EFAULT; -+ new.tv_sec = value; -+ new.tv_usec = 0; -+ do_settimeofday(&new); -+ return 0; -+} -+ -+/* XXX Needs to be moved under sys-i386 */ -+void __delay(um_udelay_t time) -+{ -+ /* Stolen from the i386 __loop_delay */ -+ int d0; -+ __asm__ __volatile__( -+ "\tjmp 1f\n" -+ ".align 16\n" -+ "1:\tjmp 2f\n" -+ ".align 16\n" -+ "2:\tdecl %0\n\tjns 2b" -+ :"=&a" (d0) -+ :"0" (time)); -+} -+ -+void __udelay(um_udelay_t usecs) -+{ -+ int i, n; -+ -+ n = (loops_per_jiffy * HZ * usecs) / 1000000; -+ for(i=0;i<n;i++) ; -+} -+ -+void __const_udelay(um_udelay_t usecs) -+{ -+ int i, n; -+ -+ n = (loops_per_jiffy * HZ * usecs) / 1000000; -+ for(i=0;i<n;i++) ; -+} -+ -+void timer_handler(int sig, union uml_pt_regs *regs) -+{ -+#ifdef CONFIG_SMP -+ update_process_times(user_context(UPT_SP(regs))); -+#endif -+ if(current->processor == 0) -+ timer_irq(regs); -+} -+ -+static spinlock_t timer_spinlock = SPIN_LOCK_UNLOCKED; -+ -+unsigned long time_lock(void) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&timer_spinlock, flags); -+ return(flags); -+} -+ -+void time_unlock(unsigned long flags) -+{ -+ spin_unlock_irqrestore(&timer_spinlock, flags); -+} -+ -+int __init timer_init(void) -+{ -+ int err; -+ -+ CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); -+ err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); -+ if(err != 0) -+ printk(KERN_ERR "timer_init : request_irq failed - " -+ "errno = %d\n", -err); -+ timer_irq_inited = 1; -+ return(0); -+} -+ -+__initcall(timer_init); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tlb.c um/arch/um/kernel/tlb.c ---- orig/arch/um/kernel/tlb.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tlb.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/mm.h" -+#include "asm/page.h" -+#include "asm/pgalloc.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+ -+void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -+{ -+ address &= PAGE_MASK; -+ flush_tlb_range(vma->vm_mm, address, address + PAGE_SIZE); -+} -+ -+void flush_tlb_all(void) -+{ -+ flush_tlb_mm(current->mm); -+} -+ -+void flush_tlb_kernel_vm(void) -+{ -+ CHOOSE_MODE(flush_tlb_kernel_vm_tt(), flush_tlb_kernel_vm_skas()); -+} -+ -+void __flush_tlb_one(unsigned long addr) -+{ -+ CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -+} -+ -+void flush_tlb_range(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, mm, start, -+ end); -+} -+ -+void flush_tlb_mm(struct mm_struct *mm) -+{ -+ CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -+} -+ -+void force_flush_all(void) -+{ -+ CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -+} -+ -+ -+pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) -+{ -+ return(pgd_offset(mm, address)); -+} -+ -+pmd_t *pmd_offset_proc(pgd_t *pgd, unsigned long address) -+{ -+ return(pmd_offset(pgd, address)); -+} -+ -+pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) -+{ -+ return(pte_offset(pmd, address)); -+} -+ -+pte_t *addr_pte(struct task_struct *task, unsigned long addr) -+{ -+ return(pte_offset(pmd_offset(pgd_offset(task->mm, addr), addr), addr)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/trap_kern.c um/arch/um/kernel/trap_kern.c ---- orig/arch/um/kernel/trap_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/trap_kern.c 2003-12-22 22:48:41.000000000 -0500 -@@ -0,0 +1,220 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/mm.h" -+#include "linux/spinlock.h" -+#include "linux/config.h" -+#include "linux/init.h" -+#include "asm/semaphore.h" -+#include "asm/pgtable.h" -+#include "asm/pgalloc.h" -+#include "asm/a.out.h" -+#include "asm/current.h" -+#include "asm/irq.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "chan_kern.h" -+#include "mconsole_kern.h" -+#include "2_5compat.h" -+#include "mem.h" -+#include "mem_kern.h" -+ -+unsigned long handle_page_fault(unsigned long address, unsigned long ip, -+ int is_write, int is_user, int *code_out) -+{ -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long page; -+ int handled = 0; -+ -+ *code_out = SEGV_MAPERR; -+ down_read(&mm->mmap_sem); -+ vma = find_vma(mm, address); -+ if(!vma) -+ goto out; -+ else if(vma->vm_start <= address) -+ goto good_area; -+ else if(!(vma->vm_flags & VM_GROWSDOWN)) -+ goto out; -+ else if(expand_stack(vma, address)) -+ goto out; -+ -+ good_area: -+ *code_out = SEGV_ACCERR; -+ if(is_write && !(vma->vm_flags & VM_WRITE)) -+ goto out; -+ page = address & PAGE_MASK; -+ if(page == (unsigned long) current + PAGE_SIZE) -+ panic("Kernel stack overflow"); -+ pgd = pgd_offset(mm, page); -+ pmd = pmd_offset(pgd, page); -+ do { -+ survive: -+ switch (handle_mm_fault(mm, vma, address, is_write)) { -+ case 1: -+ current->min_flt++; -+ break; -+ case 2: -+ current->maj_flt++; -+ break; -+ default: -+ if (current->pid == 1) { -+ up_read(&mm->mmap_sem); -+ yield(); -+ down_read(&mm->mmap_sem); -+ goto survive; -+ } -+ /* Fall through to bad area case */ -+ case 0: -+ goto out; -+ } -+ pte = pte_offset(pmd, page); -+ } while(!pte_present(*pte)); -+ handled = 1; -+ *pte = pte_mkyoung(*pte); -+ if(pte_write(*pte)) *pte = pte_mkdirty(*pte); -+ flush_tlb_page(vma, page); -+ out: -+ up_read(&mm->mmap_sem); -+ return(handled); -+} -+ -+LIST_HEAD(physmem_remappers); -+ -+void register_remapper(struct remapper *info) -+{ -+ list_add(&info->list, &physmem_remappers); -+} -+ -+static int check_remapped_addr(unsigned long address, int is_write) -+{ -+ struct remapper *remapper; -+ struct list_head *ele; -+ __u64 offset; -+ int fd; -+ -+ fd = phys_mapping(__pa(address), &offset); -+ if(fd == -1) -+ return(0); -+ -+ list_for_each(ele, &physmem_remappers){ -+ remapper = list_entry(ele, struct remapper, list); -+ if((*remapper->proc)(fd, address, is_write, offset)) -+ return(1); -+ } -+ -+ return(0); -+} -+ -+unsigned long segv(unsigned long address, unsigned long ip, int is_write, -+ int is_user, void *sc) -+{ -+ struct siginfo si; -+ void *catcher; -+ int handled; -+ -+ if(!is_user && (address >= start_vm) && (address < end_vm)){ -+ flush_tlb_kernel_vm(); -+ return(0); -+ } -+ else if(check_remapped_addr(address & PAGE_MASK, is_write)) -+ return(0); -+ else if(current->mm == NULL) -+ panic("Segfault with no mm"); -+ -+ handled = handle_page_fault(address, ip, is_write, is_user, -+ &si.si_code); -+ -+ catcher = current->thread.fault_catcher; -+ if(handled) -+ return(0); -+ else if(catcher != NULL){ -+ current->thread.fault_addr = (void *) address; -+ do_longjmp(catcher, 1); -+ } -+ else if(current->thread.fault_addr != NULL) -+ panic("fault_addr set but no fault catcher"); -+ else if(arch_fixup(ip, sc)) -+ return(0); -+ -+ if(!is_user) -+ panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", -+ address, ip); -+ si.si_signo = SIGSEGV; -+ si.si_addr = (void *) address; -+ current->thread.cr2 = address; -+ current->thread.err = is_write; -+ force_sig_info(SIGSEGV, &si, current); -+ return(0); -+} -+ -+void bad_segv(unsigned long address, unsigned long ip, int is_write) -+{ -+ struct siginfo si; -+ -+ si.si_signo = SIGSEGV; -+ si.si_code = SEGV_ACCERR; -+ si.si_addr = (void *) address; -+ current->thread.cr2 = address; -+ current->thread.err = is_write; -+ force_sig_info(SIGSEGV, &si, current); -+} -+ -+void relay_signal(int sig, union uml_pt_regs *regs) -+{ -+ if(arch_handle_signal(sig, regs)) return; -+ if(!UPT_IS_USER(regs)) -+ panic("Kernel mode signal %d", sig); -+ force_sig(sig, current); -+} -+ -+void bus_handler(int sig, union uml_pt_regs *regs) -+{ -+ if(current->thread.fault_catcher != NULL) -+ do_longjmp(current->thread.fault_catcher, 1); -+ else relay_signal(sig, regs); -+} -+ -+void winch(int sig, union uml_pt_regs *regs) -+{ -+ do_IRQ(WINCH_IRQ, regs); -+} -+ -+void trap_init(void) -+{ -+} -+ -+spinlock_t trap_lock = SPIN_LOCK_UNLOCKED; -+ -+static int trap_index = 0; -+ -+int next_trap_index(int limit) -+{ -+ int ret; -+ -+ spin_lock(&trap_lock); -+ ret = trap_index; -+ if(++trap_index == limit) -+ trap_index = 0; -+ spin_unlock(&trap_lock); -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/trap_user.c um/arch/um/kernel/trap_user.c ---- orig/arch/um/kernel/trap_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/trap_user.c 2004-01-31 02:47:39.000000000 -0500 -@@ -0,0 +1,138 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <setjmp.h> -+#include <signal.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <asm/page.h> -+#include <asm/unistd.h> -+#include <asm/ptrace.h> -+#include "init.h" -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "sysdep/sigcontext.h" -+#include "irq_user.h" -+#include "frame_user.h" -+#include "signal_user.h" -+#include "time_user.h" -+#include "task.h" -+#include "mode.h" -+#include "choose-mode.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+ -+void kill_child_dead(int pid) -+{ -+ kill(pid, SIGKILL); -+ kill(pid, SIGCONT); -+ while(waitpid(pid, NULL, 0) > 0) kill(pid, SIGCONT); -+} -+ -+/* Unlocked - don't care if this is a bit off */ -+int nsegfaults = 0; -+ -+struct { -+ unsigned long address; -+ int is_write; -+ int pid; -+ unsigned long sp; -+ int is_user; -+} segfault_record[1024]; -+ -+void segv_handler(int sig, union uml_pt_regs *regs) -+{ -+ int index, max; -+ -+ if(UPT_IS_USER(regs) && !UPT_SEGV_IS_FIXABLE(regs)){ -+ bad_segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), -+ UPT_FAULT_WRITE(regs)); -+ return; -+ } -+ max = sizeof(segfault_record)/sizeof(segfault_record[0]); -+ index = next_trap_index(max); -+ -+ nsegfaults++; -+ segfault_record[index].address = UPT_FAULT_ADDR(regs); -+ segfault_record[index].pid = os_getpid(); -+ segfault_record[index].is_write = UPT_FAULT_WRITE(regs); -+ segfault_record[index].sp = UPT_SP(regs); -+ segfault_record[index].is_user = UPT_IS_USER(regs); -+ segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), UPT_FAULT_WRITE(regs), -+ UPT_IS_USER(regs), regs); -+} -+ -+void usr2_handler(int sig, union uml_pt_regs *regs) -+{ -+ CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0); -+} -+ -+struct signal_info sig_info[] = { -+ [ SIGTRAP ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGFPE ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGILL ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGWINCH ] { .handler = winch, -+ .is_irq = 1 }, -+ [ SIGBUS ] { .handler = bus_handler, -+ .is_irq = 0 }, -+ [ SIGSEGV] { .handler = segv_handler, -+ .is_irq = 0 }, -+ [ SIGIO ] { .handler = sigio_handler, -+ .is_irq = 1 }, -+ [ SIGVTALRM ] { .handler = timer_handler, -+ .is_irq = 1 }, -+ [ SIGALRM ] { .handler = timer_handler, -+ .is_irq = 1 }, -+ [ SIGUSR2 ] { .handler = usr2_handler, -+ .is_irq = 0 }, -+}; -+ -+void sig_handler(int sig, struct sigcontext sc) -+{ -+ CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, -+ sig, &sc); -+} -+ -+extern int timer_irq_inited, missed_ticks[]; -+ -+void alarm_handler(int sig, struct sigcontext sc) -+{ -+ if(!timer_irq_inited) return; -+ missed_ticks[cpu()]++; -+ -+ if(sig == SIGALRM) -+ switch_timers(0); -+ -+ CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, -+ sig, &sc); -+ -+ if(sig == SIGALRM) -+ switch_timers(1); -+} -+ -+void do_longjmp(void *b, int val) -+{ -+ sigjmp_buf *buf = b; -+ -+ siglongjmp(*buf, val); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/exec_kern.c um/arch/um/kernel/tt/exec_kern.c ---- orig/arch/um/kernel/tt/exec_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/exec_kern.c 2003-11-07 02:23:10.000000000 -0500 -@@ -0,0 +1,84 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/mm.h" -+#include "asm/signal.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/pgalloc.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "time_user.h" -+#include "mem_user.h" -+#include "os.h" -+#include "tlb.h" -+#include "mode.h" -+ -+static int exec_tramp(void *sig_stack) -+{ -+ init_new_thread_stack(sig_stack, NULL); -+ init_new_thread_signals(1); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+void flush_thread_tt(void) -+{ -+ unsigned long stack; -+ int new_pid; -+ -+ stack = alloc_stack(0, 0); -+ if(stack == 0){ -+ printk(KERN_ERR -+ "flush_thread : failed to allocate temporary stack\n"); -+ do_exit(SIGKILL); -+ } -+ -+ new_pid = start_fork_tramp((void *) current->thread.kernel_stack, -+ stack, 0, exec_tramp); -+ if(new_pid < 0){ -+ printk(KERN_ERR -+ "flush_thread : new thread failed, errno = %d\n", -+ -new_pid); -+ do_exit(SIGKILL); -+ } -+ -+ if(current->processor == 0) -+ forward_interrupts(new_pid); -+ current->thread.request.op = OP_EXEC; -+ current->thread.request.u.exec.pid = new_pid; -+ unprotect_stack((unsigned long) current); -+ os_usr1_process(os_getpid()); -+ -+ enable_timer(); -+ free_page(stack); -+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); -+ task_protections((unsigned long) current); -+ force_flush_all(); -+ unblock_signals(); -+} -+ -+void start_thread_tt(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp) -+{ -+ set_fs(USER_DS); -+ flush_tlb_mm(current->mm); -+ PT_REGS_IP(regs) = eip; -+ PT_REGS_SP(regs) = esp; -+ PT_FIX_EXEC_STACK(esp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/exec_user.c um/arch/um/kernel/tt/exec_user.c ---- orig/arch/um/kernel/tt/exec_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/exec_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stdlib.h> -+#include <sched.h> -+#include <errno.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <signal.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "ptrace_user.h" -+ -+void do_exec(int old_pid, int new_pid) -+{ -+ unsigned long regs[FRAME_SIZE]; -+ -+ if((ptrace(PTRACE_ATTACH, new_pid, 0, 0) < 0) || -+ (ptrace(PTRACE_CONT, new_pid, 0, 0) < 0) || -+ (waitpid(new_pid, 0, WUNTRACED) < 0)) -+ tracer_panic("do_exec failed to attach proc - errno = %d", -+ errno); -+ -+ if(ptrace_getregs(old_pid, regs) < 0) -+ tracer_panic("do_exec failed to get registers - errno = %d", -+ errno); -+ -+ kill(old_pid, SIGKILL); -+ -+ if(ptrace_setregs(new_pid, regs) < 0) -+ tracer_panic("do_exec failed to start new proc - errno = %d", -+ errno); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/gdb.c um/arch/um/kernel/tt/gdb.c ---- orig/arch/um/kernel/tt/gdb.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/gdb.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,278 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <errno.h> -+#include <string.h> -+#include <signal.h> -+#include <sys/ptrace.h> -+#include <sys/types.h> -+#include "uml-config.h" -+#include "kern_constants.h" -+#include "chan_user.h" -+#include "init.h" -+#include "user.h" -+#include "debug.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "tt.h" -+#include "sysdep/thread.h" -+ -+extern int debugger_pid; -+extern int debugger_fd; -+extern int debugger_parent; -+ -+int detach(int pid, int sig) -+{ -+ return(ptrace(PTRACE_DETACH, pid, 0, sig)); -+} -+ -+int attach(int pid) -+{ -+ int err; -+ -+ err = ptrace(PTRACE_ATTACH, pid, 0, 0); -+ if(err < 0) return(-errno); -+ else return(err); -+} -+ -+int cont(int pid) -+{ -+ return(ptrace(PTRACE_CONT, pid, 0, 0)); -+} -+ -+#ifdef UML_CONFIG_PT_PROXY -+ -+int debugger_signal(int status, pid_t pid) -+{ -+ return(debugger_proxy(status, pid)); -+} -+ -+void child_signal(pid_t pid, int status) -+{ -+ child_proxy(pid, status); -+} -+ -+static void gdb_announce(char *dev_name, int dev) -+{ -+ printf("gdb assigned device '%s'\n", dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = gdb_announce, -+ .xterm_title = "UML kernel debugger", -+ .raw = 0, -+ .tramp_stack = 0, -+ .in_kernel = 0, -+}; -+ -+/* Accessed by the tracing thread, which automatically serializes access */ -+static void *xterm_data; -+static int xterm_fd; -+ -+extern void *xterm_init(char *, int, struct chan_opts *); -+extern int xterm_open(int, int, int, void *, char **); -+extern void xterm_close(int, void *); -+ -+int open_gdb_chan(void) -+{ -+ char stack[UM_KERN_PAGE_SIZE], *dummy; -+ -+ opts.tramp_stack = (unsigned long) stack; -+ xterm_data = xterm_init("", 0, &opts); -+ xterm_fd = xterm_open(1, 1, 1, xterm_data, &dummy); -+ return(xterm_fd); -+} -+ -+static void exit_debugger_cb(void *unused) -+{ -+ if(debugger_pid != -1){ -+ if(gdb_pid != -1){ -+ fake_child_exit(); -+ gdb_pid = -1; -+ } -+ else kill_child_dead(debugger_pid); -+ debugger_pid = -1; -+ if(debugger_parent != -1) -+ detach(debugger_parent, SIGINT); -+ } -+ if(xterm_data != NULL) xterm_close(xterm_fd, xterm_data); -+} -+ -+static void exit_debugger(void) -+{ -+ initial_thread_cb(exit_debugger_cb, NULL); -+} -+ -+__uml_exitcall(exit_debugger); -+ -+struct gdb_data { -+ char *str; -+ int err; -+}; -+ -+static void config_gdb_cb(void *arg) -+{ -+ struct gdb_data *data = arg; -+ void *task; -+ int pid; -+ -+ data->err = -1; -+ if(debugger_pid != -1) exit_debugger_cb(NULL); -+ if(!strncmp(data->str, "pid,", strlen("pid,"))){ -+ data->str += strlen("pid,"); -+ pid = strtoul(data->str, NULL, 0); -+ task = cpu_tasks[0].task; -+ debugger_pid = attach_debugger(TASK_EXTERN_PID(task), pid, 0); -+ if(debugger_pid != -1){ -+ data->err = 0; -+ gdb_pid = pid; -+ } -+ return; -+ } -+ data->err = 0; -+ debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); -+ init_proxy(debugger_pid, 0, 0); -+} -+ -+int gdb_config(char *str) -+{ -+ struct gdb_data data; -+ -+ if(*str++ != '=') return(-1); -+ data.str = str; -+ initial_thread_cb(config_gdb_cb, &data); -+ return(data.err); -+} -+ -+void remove_gdb_cb(void *unused) -+{ -+ exit_debugger_cb(NULL); -+} -+ -+int gdb_remove(char *unused) -+{ -+ initial_thread_cb(remove_gdb_cb, NULL); -+ return(0); -+} -+ -+void signal_usr1(int sig) -+{ -+ if(debugger_pid != -1){ -+ printk(UM_KERN_ERR "The debugger is already running\n"); -+ return; -+ } -+ debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); -+ init_proxy(debugger_pid, 0, 0); -+} -+ -+int init_ptrace_proxy(int idle_pid, int startup, int stop) -+{ -+ int pid, status; -+ -+ pid = start_debugger(linux_prog, startup, stop, &debugger_fd); -+ status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL); -+ if(pid < 0){ -+ cont(idle_pid); -+ return(-1); -+ } -+ init_proxy(pid, 1, status); -+ return(pid); -+} -+ -+int attach_debugger(int idle_pid, int pid, int stop) -+{ -+ int status = 0, err; -+ -+ err = attach(pid); -+ if(err < 0){ -+ printf("Failed to attach pid %d, errno = %d\n", pid, -err); -+ return(-1); -+ } -+ if(stop) status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL); -+ init_proxy(pid, 1, status); -+ return(pid); -+} -+ -+#ifdef notdef /* Put this back in when it does something useful */ -+static int __init uml_gdb_init_setup(char *line, int *add) -+{ -+ gdb_init = uml_strdup(line); -+ return 0; -+} -+ -+__uml_setup("gdb=", uml_gdb_init_setup, -+"gdb=<channel description>\n\n" -+); -+#endif -+ -+static int __init uml_gdb_pid_setup(char *line, int *add) -+{ -+ gdb_pid = strtoul(line, NULL, 0); -+ *add = 0; -+ return 0; -+} -+ -+__uml_setup("gdb-pid=", uml_gdb_pid_setup, -+"gdb-pid=<pid>\n" -+" gdb-pid is used to attach an external debugger to UML. This may be\n" -+" an already-running gdb or a debugger-like process like strace.\n\n" -+); -+ -+#else -+ -+int debugger_signal(int status, pid_t pid){ return(0); } -+void child_signal(pid_t pid, int status){ } -+int init_ptrace_proxy(int idle_pid, int startup, int stop) -+{ -+ printk(UM_KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); -+ kill_child_dead(idle_pid); -+ exit(1); -+} -+ -+void signal_usr1(int sig) -+{ -+ printk(UM_KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); -+} -+ -+int attach_debugger(int idle_pid, int pid, int stop) -+{ -+ printk(UM_KERN_ERR "attach_debugger called when CONFIG_PT_PROXY " -+ "is off\n"); -+ return(-1); -+} -+ -+int config_gdb(char *str) -+{ -+ return(-1); -+} -+ -+int remove_gdb(void) -+{ -+ return(-1); -+} -+ -+int init_parent_proxy(int pid) -+{ -+ return(-1); -+} -+ -+void debugger_parent_signal(int status, int pid) -+{ -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/gdb_kern.c um/arch/um/kernel/tt/gdb_kern.c ---- orig/arch/um/kernel/tt/gdb_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/gdb_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/config.h" -+#include "mconsole_kern.h" -+ -+#ifdef CONFIG_MCONSOLE -+ -+extern int gdb_config(char *str); -+extern int gdb_remove(char *unused); -+ -+static struct mc_device gdb_mc = { -+ .name = "gdb", -+ .config = gdb_config, -+ .remove = gdb_remove, -+}; -+ -+int gdb_mc_init(void) -+{ -+ mconsole_register_dev(&gdb_mc); -+ return(0); -+} -+ -+__initcall(gdb_mc_init); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/debug.h um/arch/um/kernel/tt/include/debug.h ---- orig/arch/um/kernel/tt/include/debug.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/include/debug.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) and -+ * Lars Brinkhoff. -+ * Licensed under the GPL -+ */ -+ -+#ifndef __DEBUG_H -+#define __DEBUG_H -+ -+extern int debugger_proxy(int status, pid_t pid); -+extern void child_proxy(pid_t pid, int status); -+extern void init_proxy (pid_t pid, int waiting, int status); -+extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd); -+extern void fake_child_exit(void); -+extern int gdb_config(char *str); -+extern int gdb_remove(char *unused); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/mmu.h um/arch/um/kernel/tt/include/mmu.h ---- orig/arch/um/kernel/tt/include/mmu.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/include/mmu.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_MMU_H -+#define __TT_MMU_H -+ -+struct mmu_context_tt { -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/mode.h um/arch/um/kernel/tt/include/mode.h ---- orig/arch/um/kernel/tt/include/mode.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/include/mode.h 2003-11-07 01:42:09.000000000 -0500 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_TT_H__ -+#define __MODE_TT_H__ -+ -+#include "sysdep/ptrace.h" -+ -+enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; -+ -+extern int tracing_pid; -+ -+extern int tracer(int (*init_proc)(void *), void *sp); -+extern void user_time_init_tt(void); -+extern int copy_sc_from_user_tt(void *to_ptr, void *from_ptr, void *data); -+extern int copy_sc_to_user_tt(void *to_ptr, void *fp, void *from_ptr, -+ void *data); -+extern void sig_handler_common_tt(int sig, void *sc); -+extern void syscall_handler_tt(int sig, union uml_pt_regs *regs); -+extern void reboot_tt(void); -+extern void halt_tt(void); -+extern int is_tracer_winch(int pid, int fd, void *data); -+extern void kill_off_processes_tt(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/mode_kern.h um/arch/um/kernel/tt/include/mode_kern.h ---- orig/arch/um/kernel/tt/include/mode_kern.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/include/mode_kern.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,52 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_MODE_KERN_H__ -+#define __TT_MODE_KERN_H__ -+ -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+ -+extern void *_switch_to_tt(void *prev, void *next); -+extern void flush_thread_tt(void); -+extern void start_thread_tt(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp); -+extern int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct *p, -+ struct pt_regs *regs); -+extern void release_thread_tt(struct task_struct *task); -+extern void exit_thread_tt(void); -+extern void initial_thread_cb_tt(void (*proc)(void *), void *arg); -+extern void init_idle_tt(void); -+extern void flush_tlb_kernel_vm_tt(void); -+extern void __flush_tlb_one_tt(unsigned long addr); -+extern void flush_tlb_range_tt(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_mm_tt(struct mm_struct *mm); -+extern void force_flush_all_tt(void); -+extern long execute_syscall_tt(void *r); -+extern void before_mem_tt(unsigned long brk_start); -+extern unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out); -+extern int start_uml_tt(void); -+extern int external_pid_tt(struct task_struct *task); -+extern int thread_pid_tt(struct thread_struct *thread); -+ -+#define kmem_end_tt (host_task_size - ABOVE_KMEM) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/ptrace-tt.h um/arch/um/kernel/tt/include/ptrace-tt.h ---- orig/arch/um/kernel/tt/include/ptrace-tt.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/include/ptrace-tt.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_TT_H -+#define __PTRACE_TT_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "sysdep/sc.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/tt.h um/arch/um/kernel/tt/include/tt.h ---- orig/arch/um/kernel/tt/include/tt.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/include/tt.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_H__ -+#define __TT_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int gdb_pid; -+extern int debug; -+extern int debug_stop; -+extern int debug_trace; -+ -+extern int honeypot; -+ -+extern int fork_tramp(void *sig_stack); -+extern int do_proc_op(void *t, int proc_id); -+extern int tracer(int (*init_proc)(void *), void *sp); -+extern void attach_process(int pid); -+extern void tracer_panic(char *format, ...); -+extern void set_init_pid(int pid); -+extern int set_user_mode(void *task); -+extern void set_tracing(void *t, int tracing); -+extern int is_tracing(void *task); -+extern int singlestepping_tt(void *t); -+extern void clear_singlestep(void *t); -+extern void syscall_handler(int sig, union uml_pt_regs *regs); -+extern void exit_kernel(int pid, void *task); -+extern int do_syscall(void *task, int pid); -+extern int is_valid_pid(int pid); -+extern void remap_data(void *segment_start, void *segment_end, int w); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/uaccess.h um/arch/um/kernel/tt/include/uaccess.h ---- orig/arch/um/kernel/tt/include/uaccess.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/include/uaccess.h 2003-11-12 08:37:20.000000000 -0500 -@@ -0,0 +1,71 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_UACCESS_H -+#define __TT_UACCESS_H -+ -+#include "linux/string.h" -+#include "linux/sched.h" -+#include "asm/processor.h" -+#include "asm/errno.h" -+#include "asm/current.h" -+#include "asm/a.out.h" -+#include "uml_uaccess.h" -+ -+#define ABOVE_KMEM (16 * 1024 * 1024) -+ -+extern unsigned long end_vm; -+extern unsigned long uml_physmem; -+ -+#define under_task_size(addr, size) \ -+ (((unsigned long) (addr) < TASK_SIZE) && \ -+ (((unsigned long) (addr) + (size)) < TASK_SIZE)) -+ -+#define is_stack(addr, size) \ -+ (((unsigned long) (addr) < STACK_TOP) && \ -+ ((unsigned long) (addr) >= STACK_TOP - ABOVE_KMEM) && \ -+ (((unsigned long) (addr) + (size)) <= STACK_TOP)) -+ -+#define access_ok_tt(type, addr, size) \ -+ ((type == VERIFY_READ) || (segment_eq(get_fs(), KERNEL_DS)) || \ -+ (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ -+ (under_task_size(addr, size) || is_stack(addr, size)))) -+ -+static inline int verify_area_tt(int type, const void * addr, -+ unsigned long size) -+{ -+ return(access_ok_tt(type, addr, size) ? 0 : -EFAULT); -+} -+ -+extern unsigned long get_fault_addr(void); -+ -+extern int __do_copy_from_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher); -+extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, -+ void **fault_addr, void **fault_catcher); -+extern int __do_clear_user(void *mem, size_t len, void **fault_addr, -+ void **fault_catcher); -+extern int __do_strnlen_user(const char *str, unsigned long n, -+ void **fault_addr, void **fault_catcher); -+ -+extern int copy_from_user_tt(void *to, const void *from, int n); -+extern int copy_to_user_tt(void *to, const void *from, int n); -+extern int strncpy_from_user_tt(char *dst, const char *src, int count); -+extern int __clear_user_tt(void *mem, int len); -+extern int clear_user_tt(void *mem, int len); -+extern int strnlen_user_tt(const void *str, int len); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ksyms.c um/arch/um/kernel/tt/ksyms.c ---- orig/arch/um/kernel/tt/ksyms.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ksyms.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+#include "asm/uaccess.h" -+#include "mode.h" -+ -+EXPORT_SYMBOL(__do_copy_from_user); -+EXPORT_SYMBOL(__do_copy_to_user); -+EXPORT_SYMBOL(__do_strncpy_from_user); -+EXPORT_SYMBOL(__do_strnlen_user); -+EXPORT_SYMBOL(__do_clear_user); -+ -+EXPORT_SYMBOL(tracing_pid); -+EXPORT_SYMBOL(honeypot); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/Makefile um/arch/um/kernel/tt/Makefile ---- orig/arch/um/kernel/tt/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/Makefile 2003-11-12 08:34:27.000000000 -0500 -@@ -0,0 +1,39 @@ -+# -+# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = tt.o -+ -+obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ -+ syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ -+ uaccess.o uaccess_user.o -+ -+obj-$(CONFIG_PT_PROXY) += gdb_kern.o -+ -+subdir-y = sys-$(SUBARCH) -+subdir-$(CONFIG_PT_PROXY) += ptproxy -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+export-objs = ksyms.o -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) gdb.o time.o tracer.o -+ -+UNMAP_CFLAGS := $(patsubst -pg -DPROFILING,,$(USER_CFLAGS)) -+UNMAP_CFLAGS := $(patsubst -fprofile-arcs -ftest-coverage,,$(UNMAP_CFLAGS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+$(O_TARGET) : unmap_fin.o -+ -+unmap.o: unmap.c -+ $(CC) $(UNMAP_CFLAGS) -c -o $@ $< -+ -+unmap_fin.o : unmap.o -+ ld -r -o $@ $< -lc -L/usr/lib -+ -+clean : -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/mem.c um/arch/um/kernel/tt/mem.c ---- orig/arch/um/kernel/tt/mem.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/mem.c 2004-01-31 02:38:28.000000000 -0500 -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2002 - 2004 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "asm/uaccess.h" -+#include "mem_user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "kern.h" -+#include "tt.h" -+ -+void before_mem_tt(unsigned long brk_start) -+{ -+ if(!jail || debug) -+ remap_data(UML_ROUND_DOWN(&_stext), UML_ROUND_UP(&_etext), 1); -+ remap_data(UML_ROUND_DOWN(&_sdata), UML_ROUND_UP(&_edata), 1); -+ remap_data(UML_ROUND_DOWN(&__bss_start), UML_ROUND_UP(&_end), 1); -+} -+ -+#ifdef CONFIG_HOST_2G_2G -+#define TOP 0x80000000 -+#else -+#define TOP 0xc0000000 -+#endif -+ -+#define SIZE ((CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS) * 0x20000000) -+#define START (TOP - SIZE) -+ -+unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out) -+{ -+ /* Round up to the nearest 4M */ -+ *host_size_out = ROUND_4M((unsigned long) &arg); -+ *task_size_out = START; -+ return(START); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/mem_user.c um/arch/um/kernel/tt/mem_user.c ---- orig/arch/um/kernel/tt/mem_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/mem_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <stdio.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/mman.h> -+#include "tt.h" -+#include "mem_user.h" -+#include "user_util.h" -+ -+void remap_data(void *segment_start, void *segment_end, int w) -+{ -+ void *addr; -+ unsigned long size; -+ int data, prot; -+ -+ if(w) prot = PROT_WRITE; -+ else prot = 0; -+ prot |= PROT_READ | PROT_EXEC; -+ size = (unsigned long) segment_end - -+ (unsigned long) segment_start; -+ data = create_mem_file(size); -+ addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0); -+ if(addr == MAP_FAILED){ -+ perror("mapping new data segment"); -+ exit(1); -+ } -+ memcpy(addr, segment_start, size); -+ if(switcheroo(data, prot, addr, segment_start, size) < 0){ -+ printf("switcheroo failed\n"); -+ exit(1); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/process_kern.c um/arch/um/kernel/tt/process_kern.c ---- orig/arch/um/kernel/tt/process_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/process_kern.c 2004-01-31 02:39:08.000000000 -0500 -@@ -0,0 +1,535 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/signal.h" -+#include "linux/kernel.h" -+#include "asm/system.h" -+#include "asm/pgalloc.h" -+#include "asm/ptrace.h" -+#include "irq_user.h" -+#include "signal_user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "kern.h" -+#include "sigcontext.h" -+#include "time_user.h" -+#include "mem_user.h" -+#include "tlb.h" -+#include "mode.h" -+#include "init.h" -+#include "tt.h" -+ -+void *_switch_to_tt(void *prev, void *next) -+{ -+ struct task_struct *from, *to; -+ unsigned long flags; -+ int err, vtalrm, alrm, prof, cpu; -+ char c; -+ /* jailing and SMP are incompatible, so this doesn't need to be -+ * made per-cpu -+ */ -+ static int reading; -+ -+ from = prev; -+ to = next; -+ -+ to->thread.prev_sched = from; -+ -+ cpu = from->processor; -+ if(cpu == 0) -+ forward_interrupts(to->thread.mode.tt.extern_pid); -+#ifdef CONFIG_SMP -+ forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid); -+#endif -+ local_irq_save(flags); -+ -+ vtalrm = change_sig(SIGVTALRM, 0); -+ alrm = change_sig(SIGALRM, 0); -+ prof = change_sig(SIGPROF, 0); -+ -+ c = 0; -+ set_current(to); -+ -+ reading = 0; -+ err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); -+ if(err != sizeof(c)) -+ panic("write of switch_pipe failed, err = %d", -err); -+ -+ reading = 1; -+ if(from->state == TASK_ZOMBIE) -+ os_kill_process(os_getpid(), 0); -+ -+ err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c)); -+ if(err != sizeof(c)) -+ panic("read of switch_pipe failed, errno = %d", -err); -+ -+ /* This works around a nasty race with 'jail'. If we are switching -+ * between two threads of a threaded app and the incoming process -+ * runs before the outgoing process reaches the read, and it makes -+ * it all the way out to userspace, then it will have write-protected -+ * the outgoing process stack. Then, when the outgoing process -+ * returns from the write, it will segfault because it can no longer -+ * write its own stack. So, in order to avoid that, the incoming -+ * thread sits in a loop yielding until 'reading' is set. This -+ * isn't entirely safe, since there may be a reschedule from a timer -+ * happening between setting 'reading' and sleeping in read. But, -+ * it should get a whole quantum in which to reach the read and sleep, -+ * which should be enough. -+ */ -+ -+ if(jail){ -+ while(!reading) sched_yield(); -+ } -+ -+ change_sig(SIGVTALRM, vtalrm); -+ change_sig(SIGALRM, alrm); -+ change_sig(SIGPROF, prof); -+ -+ arch_switch(); -+ -+ flush_tlb_all(); -+ local_irq_restore(flags); -+ -+ return(current->thread.prev_sched); -+} -+ -+void release_thread_tt(struct task_struct *task) -+{ -+ os_kill_process(task->thread.mode.tt.extern_pid, 0); -+} -+ -+void exit_thread_tt(void) -+{ -+ os_close_file(current->thread.mode.tt.switch_pipe[0]); -+ os_close_file(current->thread.mode.tt.switch_pipe[1]); -+} -+ -+extern void schedule_tail(struct task_struct *prev); -+ -+static void new_thread_handler(int sig) -+{ -+ unsigned long disable; -+ int (*fn)(void *); -+ void *arg; -+ -+ fn = current->thread.request.u.thread.proc; -+ arg = current->thread.request.u.thread.arg; -+ -+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); -+ disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) | -+ (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1)); -+ SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable; -+ -+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); -+ -+ init_new_thread_signals(1); -+ enable_timer(); -+ free_page(current->thread.temp_stack); -+ set_cmdline("(kernel thread)"); -+ force_flush_all(); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ change_sig(SIGUSR1, 1); -+ change_sig(SIGVTALRM, 1); -+ change_sig(SIGPROF, 1); -+ sti(); -+ if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf)) -+ do_exit(0); -+} -+ -+static int new_thread_proc(void *stack) -+{ -+ /* cli is needed to block out signals until this thread is properly -+ * scheduled. Otherwise, the tracing thread will get mighty upset -+ * about any signals that arrive before that. -+ * This has the complication that it sets the saved signal mask in -+ * the sigcontext to block signals. This gets restored when this -+ * thread (or a descendant, since they get a copy of this sigcontext) -+ * returns to userspace. -+ * So, this is compensated for elsewhere. -+ * XXX There is still a small window until cli() actually finishes -+ * where signals are possible - shouldn't be a problem in practice -+ * since SIGIO hasn't been forwarded here yet, and the cli should -+ * finish before a SIGVTALRM has time to be delivered. -+ */ -+ cli(); -+ init_new_thread_stack(stack, new_thread_handler); -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+/* Signal masking - signals are blocked at the start of fork_tramp. They -+ * are re-enabled when finish_fork_handler is entered by fork_tramp hitting -+ * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off, -+ * so it is blocked before it's called. They are re-enabled on sigreturn -+ * despite the fact that they were blocked when the SIGUSR1 was issued because -+ * copy_thread copies the parent's sigcontext, including the signal mask -+ * onto the signal frame. -+ */ -+ -+static void finish_fork_handler(int sig) -+{ -+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); -+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); -+ -+ init_new_thread_signals(1); -+ enable_timer(); -+ sti(); -+ force_flush_all(); -+ if(current->mm != current->p_pptr->mm) -+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1, -+ 1, 0, 1); -+ task_protections((unsigned long) current); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ free_page(current->thread.temp_stack); -+ cli(); -+ change_sig(SIGUSR1, 0); -+ set_user_mode(current); -+} -+ -+int fork_tramp(void *stack) -+{ -+ cli(); -+ arch_init_thread(); -+ init_new_thread_stack(stack, finish_fork_handler); -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ int (*tramp)(void *); -+ int new_pid, err; -+ unsigned long stack; -+ -+ if(current->thread.forking) -+ tramp = fork_tramp; -+ else { -+ tramp = new_thread_proc; -+ p->thread.request.u.thread = current->thread.request.u.thread; -+ } -+ -+ err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1); -+ if(err < 0){ -+ printk("copy_thread : pipe failed, err = %d\n", -err); -+ return(err); -+ } -+ -+ stack = alloc_stack(0, 0); -+ if(stack == 0){ -+ printk(KERN_ERR "copy_thread : failed to allocate " -+ "temporary stack\n"); -+ return(-ENOMEM); -+ } -+ -+ clone_flags &= CLONE_VM; -+ p->thread.temp_stack = stack; -+ new_pid = start_fork_tramp((void *) p->thread.kernel_stack, stack, -+ clone_flags, tramp); -+ if(new_pid < 0){ -+ printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", -+ -new_pid); -+ return(new_pid); -+ } -+ -+ if(current->thread.forking){ -+ sc_to_sc(UPT_SC(&p->thread.regs.regs), -+ UPT_SC(¤t->thread.regs.regs)); -+ SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0); -+ if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; -+ } -+ p->thread.mode.tt.extern_pid = new_pid; -+ -+ current->thread.request.op = OP_FORK; -+ current->thread.request.u.fork.pid = new_pid; -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+void reboot_tt(void) -+{ -+ current->thread.request.op = OP_REBOOT; -+ os_usr1_process(os_getpid()); -+} -+ -+void halt_tt(void) -+{ -+ current->thread.request.op = OP_HALT; -+ os_usr1_process(os_getpid()); -+} -+ -+void kill_off_processes_tt(void) -+{ -+ struct task_struct *p; -+ int me; -+ -+ me = os_getpid(); -+ for_each_task(p){ -+ int pid = p->thread.mode.tt.extern_pid; -+ if((pid != me) && (pid != -1)) -+ os_kill_process(p->thread.mode.tt.extern_pid, 0); -+ } -+ if((init_task.thread.mode.tt.extern_pid != me) && -+ (init_task.thread.mode.tt.extern_pid != -1)) -+ os_kill_process(init_task.thread.mode.tt.extern_pid, 0); -+} -+ -+void initial_thread_cb_tt(void (*proc)(void *), void *arg) -+{ -+ if(os_getpid() == tracing_pid){ -+ (*proc)(arg); -+ } -+ else { -+ current->thread.request.op = OP_CB; -+ current->thread.request.u.cb.proc = proc; -+ current->thread.request.u.cb.arg = arg; -+ os_usr1_process(os_getpid()); -+ } -+} -+ -+int do_proc_op(void *t, int proc_id) -+{ -+ struct task_struct *task; -+ struct thread_struct *thread; -+ int op, pid; -+ -+ task = t; -+ thread = &task->thread; -+ op = thread->request.op; -+ switch(op){ -+ case OP_NONE: -+ case OP_TRACE_ON: -+ break; -+ case OP_EXEC: -+ pid = thread->request.u.exec.pid; -+ do_exec(thread->mode.tt.extern_pid, pid); -+ thread->mode.tt.extern_pid = pid; -+ cpu_tasks[task->processor].pid = pid; -+ break; -+ case OP_FORK: -+ attach_process(thread->request.u.fork.pid); -+ break; -+ case OP_CB: -+ (*thread->request.u.cb.proc)(thread->request.u.cb.arg); -+ break; -+ case OP_REBOOT: -+ case OP_HALT: -+ break; -+ default: -+ tracer_panic("Bad op in do_proc_op"); -+ break; -+ } -+ thread->request.op = OP_NONE; -+ return(op); -+} -+ -+void init_idle_tt(void) -+{ -+ idle_timer(); -+} -+ -+/* Changed by jail_setup, which is a setup */ -+int jail = 0; -+ -+int __init jail_setup(char *line, int *add) -+{ -+ int ok = 1; -+ -+ if(jail) return(0); -+#ifdef CONFIG_SMP -+ printf("'jail' may not used used in a kernel with CONFIG_SMP " -+ "enabled\n"); -+ ok = 0; -+#endif -+#ifdef CONFIG_HOSTFS -+ printf("'jail' may not used used in a kernel with CONFIG_HOSTFS " -+ "enabled\n"); -+ ok = 0; -+#endif -+#ifdef CONFIG_MODULES -+ printf("'jail' may not used used in a kernel with CONFIG_MODULES " -+ "enabled\n"); -+ ok = 0; -+#endif -+ if(!ok) exit(1); -+ -+ /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem. -+ * Removing it from the bounding set eliminates the ability of anything -+ * to acquire it, and thus read or write kernel memory. -+ */ -+ cap_lower(cap_bset, CAP_SYS_RAWIO); -+ jail = 1; -+ return(0); -+} -+ -+__uml_setup("jail", jail_setup, -+"jail\n" -+" Enables the protection of kernel memory from processes.\n\n" -+); -+ -+static void mprotect_kernel_mem(int w) -+{ -+ unsigned long start, end; -+ int pages; -+ -+ if(!jail || (current == &init_task)) return; -+ -+ pages = (1 << CONFIG_KERNEL_STACK_ORDER); -+ -+ start = (unsigned long) current + PAGE_SIZE; -+ end = (unsigned long) current + PAGE_SIZE * pages; -+ protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1); -+ protect_memory(end, high_physmem - end, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&_stext); -+ end = (unsigned long) UML_ROUND_UP(&_etext); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end); -+ end = (unsigned long) UML_ROUND_UP(&_edata); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&__bss_start); -+ end = (unsigned long) UML_ROUND_UP(&_end); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ mprotect_kernel_vm(w); -+} -+ -+void unprotect_kernel_mem(void) -+{ -+ mprotect_kernel_mem(1); -+} -+ -+void protect_kernel_mem(void) -+{ -+ mprotect_kernel_mem(0); -+} -+ -+extern void start_kernel(void); -+ -+static int start_kernel_proc(void *unused) -+{ -+ int pid; -+ -+ block_signals(); -+ pid = os_getpid(); -+ -+ cpu_tasks[0].pid = pid; -+ cpu_tasks[0].task = current; -+#ifdef CONFIG_SMP -+ cpu_online_map = 1; -+#endif -+ if(debug) os_stop_process(pid); -+ start_kernel(); -+ return(0); -+} -+ -+void set_tracing(void *task, int tracing) -+{ -+ ((struct task_struct *) task)->thread.mode.tt.tracing = tracing; -+} -+ -+int is_tracing(void *t) -+{ -+ return (((struct task_struct *) t)->thread.mode.tt.tracing); -+} -+ -+int set_user_mode(void *t) -+{ -+ struct task_struct *task; -+ -+ task = t ? t : current; -+ if(task->thread.mode.tt.tracing) -+ return(1); -+ task->thread.request.op = OP_TRACE_ON; -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+void set_init_pid(int pid) -+{ -+ int err; -+ -+ init_task.thread.mode.tt.extern_pid = pid; -+ err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1); -+ if(err) -+ panic("Can't create switch pipe for init_task, errno = %d", -+ -err); -+} -+ -+int singlestepping_tt(void *t) -+{ -+ struct task_struct *task = t; -+ -+ if(task->thread.mode.tt.singlestep_syscall) -+ return(0); -+ return(task->ptrace & PT_DTRACE); -+} -+ -+void clear_singlestep(void *t) -+{ -+ struct task_struct *task = t; -+ -+ task->ptrace &= ~PT_DTRACE; -+} -+ -+int start_uml_tt(void) -+{ -+ void *sp; -+ int pages; -+ -+ pages = (1 << CONFIG_KERNEL_STACK_ORDER) - 2; -+ sp = (void *) init_task.thread.kernel_stack + pages * PAGE_SIZE - -+ sizeof(unsigned long); -+ return(tracer(start_kernel_proc, sp)); -+} -+ -+int external_pid_tt(struct task_struct *task) -+{ -+ return(task->thread.mode.tt.extern_pid); -+} -+ -+int thread_pid_tt(struct thread_struct *thread) -+{ -+ return(thread->mode.tt.extern_pid); -+} -+ -+int is_valid_pid(int pid) -+{ -+ struct task_struct *task; -+ -+ read_lock(&tasklist_lock); -+ for_each_task(task){ -+ if(task->thread.mode.tt.extern_pid == pid){ -+ read_unlock(&tasklist_lock); -+ return(1); -+ } -+ } -+ read_unlock(&tasklist_lock); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/Makefile um/arch/um/kernel/tt/ptproxy/Makefile ---- orig/arch/um/kernel/tt/ptproxy/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/Makefile 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,12 @@ -+O_TARGET = ptproxy.o -+ -+obj-y = proxy.o ptrace.o sysdep.o wait.o -+ -+USER_OBJS = $(obj-y) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean: -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/proxy.c um/arch/um/kernel/tt/ptproxy/proxy.c ---- orig/arch/um/kernel/tt/ptproxy/proxy.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/proxy.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,371 @@ -+/********************************************************************** -+proxy.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+Jeff Dike (jdike@karaya.com) : Modified for integration into uml -+**********************************************************************/ -+ -+/* XXX This file shouldn't refer to CONFIG_* */ -+ -+#include <errno.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <signal.h> -+#include <string.h> -+#include <termios.h> -+#include <sys/wait.h> -+#include <sys/types.h> -+#include <sys/ptrace.h> -+#include <sys/ioctl.h> -+#include <asm/unistd.h> -+ -+#include "ptproxy.h" -+#include "sysdep.h" -+#include "wait.h" -+ -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+#include "tempfile.h" -+ -+static int debugger_wait(debugger_state *debugger, int *status, int options, -+ int (*syscall)(debugger_state *debugger, pid_t child), -+ int (*normal_return)(debugger_state *debugger, -+ pid_t unused), -+ int (*wait_return)(debugger_state *debugger, -+ pid_t unused)) -+{ -+ if(debugger->real_wait){ -+ debugger->handle_trace = normal_return; -+ syscall_continue(debugger->pid); -+ debugger->real_wait = 0; -+ return(1); -+ } -+ debugger->wait_status_ptr = status; -+ debugger->wait_options = options; -+ if((debugger->debugee != NULL) && debugger->debugee->event){ -+ syscall_continue(debugger->pid); -+ wait_for_stop(debugger->pid, SIGTRAP, PTRACE_SYSCALL, -+ NULL); -+ (*wait_return)(debugger, -1); -+ return(0); -+ } -+ else if(debugger->wait_options & WNOHANG){ -+ syscall_cancel(debugger->pid, 0); -+ debugger->handle_trace = syscall; -+ return(0); -+ } -+ else { -+ syscall_pause(debugger->pid); -+ debugger->handle_trace = wait_return; -+ debugger->waiting = 1; -+ } -+ return(1); -+} -+ -+/* -+ * Handle debugger trap, i.e. syscall. -+ */ -+ -+int debugger_syscall(debugger_state *debugger, pid_t child) -+{ -+ long arg1, arg2, arg3, arg4, arg5, result; -+ int syscall, ret = 0; -+ -+ syscall = get_syscall(debugger->pid, &arg1, &arg2, &arg3, &arg4, -+ &arg5); -+ -+ switch(syscall){ -+ case __NR_execve: -+ /* execve never returns */ -+ debugger->handle_trace = debugger_syscall; -+ break; -+ -+ case __NR_ptrace: -+ if(debugger->debugee->pid != 0) arg2 = debugger->debugee->pid; -+ if(!debugger->debugee->in_context) -+ child = debugger->debugee->pid; -+ result = proxy_ptrace(debugger, arg1, arg2, arg3, arg4, child, -+ &ret); -+ syscall_cancel(debugger->pid, result); -+ debugger->handle_trace = debugger_syscall; -+ return(ret); -+ -+ case __NR_waitpid: -+ case __NR_wait4: -+ if(!debugger_wait(debugger, (int *) arg2, arg3, -+ debugger_syscall, debugger_normal_return, -+ proxy_wait_return)) -+ return(0); -+ break; -+ -+ case __NR_kill: -+ if(!debugger->debugee->in_context) -+ child = debugger->debugee->pid; -+ if(arg1 == debugger->debugee->pid){ -+ result = kill(child, arg2); -+ syscall_cancel(debugger->pid, result); -+ debugger->handle_trace = debugger_syscall; -+ return(0); -+ } -+ else debugger->handle_trace = debugger_normal_return; -+ break; -+ -+ default: -+ debugger->handle_trace = debugger_normal_return; -+ } -+ -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+/* Used by the tracing thread */ -+static debugger_state parent; -+static int parent_syscall(debugger_state *debugger, int pid); -+ -+int init_parent_proxy(int pid) -+{ -+ parent = ((debugger_state) { .pid = pid, -+ .wait_options = 0, -+ .wait_status_ptr = NULL, -+ .waiting = 0, -+ .real_wait = 0, -+ .expecting_child = 0, -+ .handle_trace = parent_syscall, -+ .debugee = NULL } ); -+ return(0); -+} -+ -+int parent_normal_return(debugger_state *debugger, pid_t unused) -+{ -+ debugger->handle_trace = parent_syscall; -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+static int parent_syscall(debugger_state *debugger, int pid) -+{ -+ long arg1, arg2, arg3, arg4, arg5; -+ int syscall; -+ -+ syscall = get_syscall(pid, &arg1, &arg2, &arg3, &arg4, &arg5); -+ -+ if((syscall == __NR_waitpid) || (syscall == __NR_wait4)){ -+ debugger_wait(&parent, (int *) arg2, arg3, parent_syscall, -+ parent_normal_return, parent_wait_return); -+ } -+ else ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ return(0); -+} -+ -+int debugger_normal_return(debugger_state *debugger, pid_t unused) -+{ -+ debugger->handle_trace = debugger_syscall; -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+void debugger_cancelled_return(debugger_state *debugger, int result) -+{ -+ debugger->handle_trace = debugger_syscall; -+ syscall_set_result(debugger->pid, result); -+ syscall_continue(debugger->pid); -+} -+ -+/* Used by the tracing thread */ -+static debugger_state debugger; -+static debugee_state debugee; -+ -+void init_proxy (pid_t debugger_pid, int stopped, int status) -+{ -+ debugger.pid = debugger_pid; -+ debugger.handle_trace = debugger_syscall; -+ debugger.debugee = &debugee; -+ debugger.waiting = 0; -+ debugger.real_wait = 0; -+ debugger.expecting_child = 0; -+ -+ debugee.pid = 0; -+ debugee.traced = 0; -+ debugee.stopped = stopped; -+ debugee.event = 0; -+ debugee.zombie = 0; -+ debugee.died = 0; -+ debugee.wait_status = status; -+ debugee.in_context = 1; -+} -+ -+int debugger_proxy(int status, int pid) -+{ -+ int ret = 0, sig; -+ -+ if(WIFSTOPPED(status)){ -+ sig = WSTOPSIG(status); -+ if (sig == SIGTRAP) -+ ret = (*debugger.handle_trace)(&debugger, pid); -+ -+ else if(sig == SIGCHLD){ -+ if(debugger.expecting_child){ -+ ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ debugger.expecting_child = 0; -+ } -+ else if(debugger.waiting) -+ real_wait_return(&debugger); -+ else { -+ ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ debugger.real_wait = 1; -+ } -+ } -+ else ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ } -+ else if(WIFEXITED(status)){ -+ tracer_panic("debugger (pid %d) exited with status %d", -+ debugger.pid, WEXITSTATUS(status)); -+ } -+ else if(WIFSIGNALED(status)){ -+ tracer_panic("debugger (pid %d) exited with signal %d", -+ debugger.pid, WTERMSIG(status)); -+ } -+ else { -+ tracer_panic("proxy got unknown status (0x%x) on debugger " -+ "(pid %d)", status, debugger.pid); -+ } -+ return(ret); -+} -+ -+void child_proxy(pid_t pid, int status) -+{ -+ debugee.event = 1; -+ debugee.wait_status = status; -+ -+ if(WIFSTOPPED(status)){ -+ debugee.stopped = 1; -+ debugger.expecting_child = 1; -+ kill(debugger.pid, SIGCHLD); -+ } -+ else if(WIFEXITED(status) || WIFSIGNALED(status)){ -+ debugee.zombie = 1; -+ debugger.expecting_child = 1; -+ kill(debugger.pid, SIGCHLD); -+ } -+ else panic("proxy got unknown status (0x%x) on child (pid %d)", -+ status, pid); -+} -+ -+void debugger_parent_signal(int status, int pid) -+{ -+ int sig; -+ -+ if(WIFSTOPPED(status)){ -+ sig = WSTOPSIG(status); -+ if(sig == SIGTRAP) (*parent.handle_trace)(&parent, pid); -+ else ptrace(PTRACE_SYSCALL, pid, 0, sig); -+ } -+} -+ -+void fake_child_exit(void) -+{ -+ int status, pid; -+ -+ child_proxy(1, W_EXITCODE(0, 0)); -+ while(debugger.waiting == 1){ -+ pid = waitpid(debugger.pid, &status, WUNTRACED); -+ if(pid != debugger.pid){ -+ printk("fake_child_exit - waitpid failed, " -+ "errno = %d\n", errno); -+ return; -+ } -+ debugger_proxy(status, debugger.pid); -+ } -+ pid = waitpid(debugger.pid, &status, WUNTRACED); -+ if(pid != debugger.pid){ -+ printk("fake_child_exit - waitpid failed, " -+ "errno = %d\n", errno); -+ return; -+ } -+ if(ptrace(PTRACE_DETACH, debugger.pid, 0, SIGCONT) < 0) -+ printk("fake_child_exit - PTRACE_DETACH failed, errno = %d\n", -+ errno); -+} -+ -+char gdb_init_string[] = -+"att 1 \n\ -+b panic \n\ -+b stop \n\ -+handle SIGWINCH nostop noprint pass \n\ -+"; -+ -+int start_debugger(char *prog, int startup, int stop, int *fd_out) -+{ -+ int slave, child; -+ -+ slave = open_gdb_chan(); -+ child = fork(); -+ if(child == 0){ -+ char *tempname = NULL; -+ int fd; -+ -+ if(setsid() < 0) perror("setsid"); -+ if((dup2(slave, 0) < 0) || (dup2(slave, 1) < 0) || -+ (dup2(slave, 2) < 0)){ -+ printk("start_debugger : dup2 failed, errno = %d\n", -+ errno); -+ exit(1); -+ } -+ if(ioctl(0, TIOCSCTTY, 0) < 0){ -+ printk("start_debugger : TIOCSCTTY failed, " -+ "errno = %d\n", errno); -+ exit(1); -+ } -+ if(tcsetpgrp (1, os_getpid()) < 0){ -+ printk("start_debugger : tcsetpgrp failed, " -+ "errno = %d\n", errno); -+#ifdef notdef -+ exit(1); -+#endif -+ } -+ fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0); -+ if(fd < 0){ -+ printk("start_debugger : make_tempfile failed," -+ "err = %d\n", -fd); -+ exit(1); -+ } -+ os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1); -+ if(startup){ -+ if(stop){ -+ os_write_file(fd, "b start_kernel\n", -+ strlen("b start_kernel\n")); -+ } -+ os_write_file(fd, "c\n", strlen("c\n")); -+ } -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ printk("start_debugger : PTRACE_TRACEME failed, " -+ "errno = %d\n", errno); -+ exit(1); -+ } -+ execlp("gdb", "gdb", "--command", tempname, prog, NULL); -+ printk("start_debugger : exec of gdb failed, errno = %d\n", -+ errno); -+ } -+ if(child < 0){ -+ printk("start_debugger : fork for gdb failed, errno = %d\n", -+ errno); -+ return(-1); -+ } -+ *fd_out = slave; -+ return(child); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/ptproxy.h um/arch/um/kernel/tt/ptproxy/ptproxy.h ---- orig/arch/um/kernel/tt/ptproxy/ptproxy.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/ptproxy.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,61 @@ -+/********************************************************************** -+ptproxy.h -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#ifndef __PTPROXY_H -+#define __PTPROXY_H -+ -+#include <sys/types.h> -+ -+typedef struct debugger debugger_state; -+typedef struct debugee debugee_state; -+ -+struct debugger -+{ -+ pid_t pid; -+ int wait_options; -+ int *wait_status_ptr; -+ unsigned int waiting : 1; -+ unsigned int real_wait : 1; -+ unsigned int expecting_child : 1; -+ int (*handle_trace) (debugger_state *, pid_t); -+ -+ debugee_state *debugee; -+}; -+ -+struct debugee -+{ -+ pid_t pid; -+ int wait_status; -+ unsigned int died : 1; -+ unsigned int event : 1; -+ unsigned int stopped : 1; -+ unsigned int trace_singlestep : 1; -+ unsigned int trace_syscall : 1; -+ unsigned int traced : 1; -+ unsigned int zombie : 1; -+ unsigned int in_context : 1; -+}; -+ -+extern int debugger_syscall(debugger_state *debugger, pid_t pid); -+extern int debugger_normal_return (debugger_state *debugger, pid_t unused); -+ -+extern long proxy_ptrace (struct debugger *, int, pid_t, long, long, pid_t, -+ int *strace_out); -+extern void debugger_cancelled_return(debugger_state *debugger, int result); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/ptrace.c um/arch/um/kernel/tt/ptproxy/ptrace.c ---- orig/arch/um/kernel/tt/ptproxy/ptrace.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/ptrace.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,239 @@ -+/********************************************************************** -+ptrace.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+Jeff Dike (jdike@karaya.com) : Modified for integration into uml -+**********************************************************************/ -+ -+#include <errno.h> -+#include <unistd.h> -+#include <signal.h> -+#include <sys/types.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <asm/ptrace.h> -+ -+#include "ptproxy.h" -+#include "debug.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+#include "tt.h" -+ -+long proxy_ptrace(struct debugger *debugger, int arg1, pid_t arg2, -+ long arg3, long arg4, pid_t child, int *ret) -+{ -+ sigset_t relay; -+ long result; -+ int status; -+ -+ *ret = 0; -+ if(debugger->debugee->died) return(-ESRCH); -+ -+ switch(arg1){ -+ case PTRACE_ATTACH: -+ if(debugger->debugee->traced) return(-EPERM); -+ -+ debugger->debugee->pid = arg2; -+ debugger->debugee->traced = 1; -+ -+ if(is_valid_pid(arg2) && (arg2 != child)){ -+ debugger->debugee->in_context = 0; -+ kill(arg2, SIGSTOP); -+ debugger->debugee->event = 1; -+ debugger->debugee->wait_status = W_STOPCODE(SIGSTOP); -+ } -+ else { -+ debugger->debugee->in_context = 1; -+ if(debugger->debugee->stopped) -+ child_proxy(child, W_STOPCODE(SIGSTOP)); -+ else kill(child, SIGSTOP); -+ } -+ -+ return(0); -+ -+ case PTRACE_DETACH: -+ if(!debugger->debugee->traced) return(-EPERM); -+ -+ debugger->debugee->traced = 0; -+ debugger->debugee->pid = 0; -+ if(!debugger->debugee->in_context) -+ kill(child, SIGCONT); -+ -+ return(0); -+ -+ case PTRACE_CONT: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ *ret = PTRACE_CONT; -+ return(ptrace(PTRACE_CONT, child, arg3, arg4)); -+ -+#ifdef UM_HAVE_GETFPREGS -+ case PTRACE_GETFPREGS: -+ { -+ long regs[FP_FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETFPREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i, -+ regs[i]); -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_GETFPXREGS -+ case PTRACE_GETFPXREGS: -+ { -+ long regs[FPX_FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETFPXREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i, -+ regs[i]); -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_GETREGS -+ case PTRACE_GETREGS: -+ { -+ long regs[FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace (PTRACE_POKEDATA, debugger->pid, -+ arg4 + 4 * i, regs[i]); -+ return(result); -+ } -+ break; -+#endif -+ -+ case PTRACE_KILL: -+ result = ptrace(PTRACE_KILL, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ -+ case PTRACE_PEEKDATA: -+ case PTRACE_PEEKTEXT: -+ case PTRACE_PEEKUSER: -+ /* The value being read out could be -1, so we have to -+ * check errno to see if there's an error, and zero it -+ * beforehand so we're not faked out by an old error -+ */ -+ -+ errno = 0; -+ result = ptrace(arg1, child, arg3, 0); -+ if((result == -1) && (errno != 0)) return(-errno); -+ -+ result = ptrace(PTRACE_POKEDATA, debugger->pid, arg4, result); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ -+ case PTRACE_POKEDATA: -+ case PTRACE_POKETEXT: -+ case PTRACE_POKEUSER: -+ result = ptrace(arg1, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ if(arg1 == PTRACE_POKEUSER) ptrace_pokeuser(arg3, arg4); -+ return(result); -+ -+#ifdef UM_HAVE_SETFPREGS -+ case PTRACE_SETFPREGS: -+ { -+ long regs[FP_FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETFPREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_SETFPXREGS -+ case PTRACE_SETFPXREGS: -+ { -+ long regs[FPX_FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETFPXREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_SETREGS -+ case PTRACE_SETREGS: -+ { -+ long regs[FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace(PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+ case PTRACE_SINGLESTEP: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ sigemptyset(&relay); -+ sigaddset(&relay, SIGSEGV); -+ sigaddset(&relay, SIGILL); -+ sigaddset(&relay, SIGBUS); -+ result = ptrace(PTRACE_SINGLESTEP, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ status = wait_for_stop(child, SIGTRAP, PTRACE_SINGLESTEP, -+ &relay); -+ child_proxy(child, status); -+ return(result); -+ -+ case PTRACE_SYSCALL: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ result = ptrace(PTRACE_SYSCALL, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ *ret = PTRACE_SYSCALL; -+ return(result); -+ -+ case PTRACE_TRACEME: -+ default: -+ return(-EINVAL); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/sysdep.c um/arch/um/kernel/tt/ptproxy/sysdep.c ---- orig/arch/um/kernel/tt/ptproxy/sysdep.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/sysdep.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,72 @@ -+/********************************************************************** -+sysdep.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#include <stdio.h> -+#include <string.h> -+#include <stdlib.h> -+#include <signal.h> -+#include <errno.h> -+#include <sys/types.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+#include <linux/unistd.h> -+#include "ptrace_user.h" -+#include "user_util.h" -+#include "user.h" -+ -+int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, long *arg4, -+ long *arg5) -+{ -+ *arg1 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG1_OFFSET, 0); -+ *arg2 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG2_OFFSET, 0); -+ *arg3 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG3_OFFSET, 0); -+ *arg4 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG4_OFFSET, 0); -+ *arg5 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG5_OFFSET, 0); -+ return(ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, 0)); -+} -+ -+void syscall_cancel(pid_t pid, int result) -+{ -+ if((ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getpid) < 0) || -+ (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) || -+ (wait_for_stop(pid, SIGTRAP, PTRACE_SYSCALL, NULL) < 0) || -+ (ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, result) < 0) || -+ (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)) -+ printk("ptproxy: couldn't cancel syscall: errno = %d\n", -+ errno); -+} -+ -+void syscall_set_result(pid_t pid, long result) -+{ -+ ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, result); -+} -+ -+void syscall_continue(pid_t pid) -+{ -+ ptrace(PTRACE_SYSCALL, pid, 0, 0); -+} -+ -+int syscall_pause(pid_t pid) -+{ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_pause) < 0){ -+ printk("syscall_change - ptrace failed, errno = %d\n", errno); -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/sysdep.h um/arch/um/kernel/tt/ptproxy/sysdep.h ---- orig/arch/um/kernel/tt/ptproxy/sysdep.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/sysdep.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,25 @@ -+/********************************************************************** -+sysdep.h -+ -+Copyright (C) 1999 Lars Brinkhoff. -+Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+See the file COPYING for licensing terms and conditions. -+**********************************************************************/ -+ -+extern int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, -+ long *arg4, long *arg5); -+extern void syscall_cancel (pid_t pid, long result); -+extern void syscall_set_result (pid_t pid, long result); -+extern void syscall_continue (pid_t pid); -+extern int syscall_pause(pid_t pid); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/wait.c um/arch/um/kernel/tt/ptproxy/wait.c ---- orig/arch/um/kernel/tt/ptproxy/wait.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/wait.c 2004-01-17 05:27:18.000000000 -0500 -@@ -0,0 +1,88 @@ -+/********************************************************************** -+wait.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+**********************************************************************/ -+ -+#include <errno.h> -+#include <signal.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+ -+#include "ptproxy.h" -+#include "sysdep.h" -+#include "wait.h" -+#include "user_util.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/ptrace_user.h" -+#include "sysdep/sigcontext.h" -+ -+int proxy_wait_return(struct debugger *debugger, pid_t unused) -+{ -+ debugger->waiting = 0; -+ -+ if(debugger->debugee->died || (debugger->wait_options & __WCLONE)){ -+ debugger_cancelled_return(debugger, -ECHILD); -+ return(0); -+ } -+ -+ if(debugger->debugee->zombie && debugger->debugee->event) -+ debugger->debugee->died = 1; -+ -+ if(debugger->debugee->event){ -+ debugger->debugee->event = 0; -+ ptrace(PTRACE_POKEDATA, debugger->pid, -+ debugger->wait_status_ptr, -+ debugger->debugee->wait_status); -+ /* if (wait4) -+ ptrace (PTRACE_POKEDATA, pid, rusage_ptr, ...); */ -+ debugger_cancelled_return(debugger, debugger->debugee->pid); -+ return(0); -+ } -+ -+ /* pause will return -EINTR, which happens to be right for wait */ -+ debugger_normal_return(debugger, -1); -+ return(0); -+} -+ -+int parent_wait_return(struct debugger *debugger, pid_t unused) -+{ -+ return(debugger_normal_return(debugger, -1)); -+} -+ -+int real_wait_return(struct debugger *debugger) -+{ -+ unsigned long ip; -+ int pid; -+ -+ pid = debugger->pid; -+ -+ ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); -+ IP_RESTART_SYSCALL(ip); -+ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0) -+ tracer_panic("real_wait_return : Failed to restart system " -+ "call, errno = %d\n", errno); -+ -+ if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) || -+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || -+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || -+ debugger_normal_return(debugger, -1)) -+ tracer_panic("real_wait_return : gdb failed to wait, " -+ "errno = %d\n", errno); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/wait.h um/arch/um/kernel/tt/ptproxy/wait.h ---- orig/arch/um/kernel/tt/ptproxy/wait.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/ptproxy/wait.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,15 @@ -+/********************************************************************** -+wait.h -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#ifndef __PTPROXY_WAIT_H -+#define __PTPROXY_WAIT_H -+ -+extern int proxy_wait_return(struct debugger *debugger, pid_t unused); -+extern int real_wait_return(struct debugger *debugger); -+extern int parent_wait_return(struct debugger *debugger, pid_t unused); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/syscall_kern.c um/arch/um/kernel/tt/syscall_kern.c ---- orig/arch/um/kernel/tt/syscall_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/syscall_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,142 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/types.h" -+#include "linux/utime.h" -+#include "linux/sys.h" -+#include "asm/unistd.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+static inline int check_area(void *ptr, int size) -+{ -+ return(verify_area(VERIFY_WRITE, ptr, size)); -+} -+ -+static int check_readlink(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ UPT_SYSCALL_ARG2(®s->regs))); -+} -+ -+static int check_utime(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct utimbuf))); -+} -+ -+static int check_oldstat(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct __old_kernel_stat))); -+} -+ -+static int check_stat(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct stat))); -+} -+ -+static int check_stat64(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct stat64))); -+} -+ -+struct bogus { -+ int kernel_ds; -+ int (*check_params)(struct pt_regs *); -+}; -+ -+struct bogus this_is_bogus[256] = { -+ [ __NR_mknod ] = { 1, NULL }, -+ [ __NR_mkdir ] = { 1, NULL }, -+ [ __NR_rmdir ] = { 1, NULL }, -+ [ __NR_unlink ] = { 1, NULL }, -+ [ __NR_symlink ] = { 1, NULL }, -+ [ __NR_link ] = { 1, NULL }, -+ [ __NR_rename ] = { 1, NULL }, -+ [ __NR_umount ] = { 1, NULL }, -+ [ __NR_mount ] = { 1, NULL }, -+ [ __NR_pivot_root ] = { 1, NULL }, -+ [ __NR_chdir ] = { 1, NULL }, -+ [ __NR_chroot ] = { 1, NULL }, -+ [ __NR_open ] = { 1, NULL }, -+ [ __NR_quotactl ] = { 1, NULL }, -+ [ __NR_sysfs ] = { 1, NULL }, -+ [ __NR_readlink ] = { 1, check_readlink }, -+ [ __NR_acct ] = { 1, NULL }, -+ [ __NR_execve ] = { 1, NULL }, -+ [ __NR_uselib ] = { 1, NULL }, -+ [ __NR_statfs ] = { 1, NULL }, -+ [ __NR_truncate ] = { 1, NULL }, -+ [ __NR_access ] = { 1, NULL }, -+ [ __NR_chmod ] = { 1, NULL }, -+ [ __NR_chown ] = { 1, NULL }, -+ [ __NR_lchown ] = { 1, NULL }, -+ [ __NR_utime ] = { 1, check_utime }, -+ [ __NR_oldlstat ] = { 1, check_oldstat }, -+ [ __NR_oldstat ] = { 1, check_oldstat }, -+ [ __NR_stat ] = { 1, check_stat }, -+ [ __NR_lstat ] = { 1, check_stat }, -+ [ __NR_stat64 ] = { 1, check_stat64 }, -+ [ __NR_lstat64 ] = { 1, check_stat64 }, -+ [ __NR_chown32 ] = { 1, NULL }, -+}; -+ -+/* sys_utimes */ -+ -+static int check_bogosity(struct pt_regs *regs) -+{ -+ struct bogus *bogon = &this_is_bogus[UPT_SYSCALL_NR(®s->regs)]; -+ -+ if(!bogon->kernel_ds) return(0); -+ if(bogon->check_params && (*bogon->check_params)(regs)) -+ return(-EFAULT); -+ set_fs(KERNEL_DS); -+ return(0); -+} -+ -+extern syscall_handler_t *sys_call_table[]; -+ -+long execute_syscall_tt(void *r) -+{ -+ struct pt_regs *regs = r; -+ long res; -+ int syscall; -+ -+ current->thread.nsyscalls++; -+ nsyscalls++; -+ syscall = UPT_SYSCALL_NR(®s->regs); -+ -+ if((syscall >= NR_syscalls) || (syscall < 0)) -+ res = -ENOSYS; -+ else if(honeypot && check_bogosity(regs)) -+ res = -EFAULT; -+ else res = EXECUTE_SYSCALL(syscall, regs); -+ -+ set_fs(USER_DS); -+ -+ if(current->thread.mode.tt.singlestep_syscall){ -+ current->thread.mode.tt.singlestep_syscall = 0; -+ current->ptrace &= ~PT_DTRACE; -+ force_sig(SIGTRAP, current); -+ } -+ -+ return(res); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/syscall_user.c um/arch/um/kernel/tt/syscall_user.c ---- orig/arch/um/kernel/tt/syscall_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/syscall_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,89 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <signal.h> -+#include <errno.h> -+#include <sys/ptrace.h> -+#include <asm/unistd.h> -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "ptrace_user.h" -+#include "task.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "syscall_user.h" -+#include "tt.h" -+ -+/* XXX Bogus */ -+#define ERESTARTSYS 512 -+#define ERESTARTNOINTR 513 -+#define ERESTARTNOHAND 514 -+ -+void syscall_handler_tt(int sig, union uml_pt_regs *regs) -+{ -+ void *sc; -+ long result; -+ int index, syscall; -+ -+ syscall = UPT_SYSCALL_NR(regs); -+ sc = UPT_SC(regs); -+ SC_START_SYSCALL(sc); -+ -+ index = record_syscall_start(syscall); -+ syscall_trace(); -+ result = execute_syscall(regs); -+ -+ /* regs->sc may have changed while the system call ran (there may -+ * have been an interrupt or segfault), so it needs to be refreshed. -+ */ -+ UPT_SC(regs) = sc; -+ -+ SC_SET_SYSCALL_RETURN(sc, result); -+ if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || -+ (result == -ERESTARTNOINTR)) -+ do_signal(result); -+ -+ syscall_trace(); -+ record_syscall_end(index, result); -+} -+ -+int do_syscall(void *task, int pid) -+{ -+ unsigned long proc_regs[FRAME_SIZE]; -+ union uml_pt_regs *regs; -+ int syscall; -+ -+ if(ptrace_getregs(pid, proc_regs) < 0) -+ tracer_panic("Couldn't read registers"); -+ syscall = PT_SYSCALL_NR(proc_regs); -+ -+ regs = TASK_REGS(task); -+ UPT_SYSCALL_NR(regs) = syscall; -+ -+ if(syscall < 1) return(0); -+ -+ if((syscall != __NR_sigreturn) && -+ ((unsigned long *) PT_IP(proc_regs) >= &_stext) && -+ ((unsigned long *) PT_IP(proc_regs) <= &_etext)) -+ tracer_panic("I'm tracing myself and I can't get out"); -+ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getpid) < 0) -+ tracer_panic("do_syscall : Nullifying syscall failed, " -+ "errno = %d", errno); -+ return(1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/sys-i386/Makefile um/arch/um/kernel/tt/sys-i386/Makefile ---- orig/arch/um/kernel/tt/sys-i386/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/sys-i386/Makefile 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = sys-i386.o -+ -+obj-y = sigcontext.o -+ -+USER_OBJS = sigcontext.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/sys-i386/sigcontext.c um/arch/um/kernel/tt/sys-i386/sigcontext.c ---- orig/arch/um/kernel/tt/sys-i386/sigcontext.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/sys-i386/sigcontext.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <asm/sigcontext.h> -+#include "kern_util.h" -+#include "sysdep/frame.h" -+ -+int copy_sc_from_user_tt(void *to_ptr, void *from_ptr, void *data) -+{ -+ struct arch_frame_data *arch = data; -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ struct _fpstate *to_fp, *from_fp; -+ unsigned long sigs; -+ int err; -+ -+ to_fp = to->fpstate; -+ from_fp = from->fpstate; -+ sigs = to->oldmask; -+ err = copy_from_user_proc(to, from, sizeof(*to)); -+ to->oldmask = sigs; -+ if(to_fp != NULL){ -+ err |= copy_from_user_proc(&to->fpstate, &to_fp, -+ sizeof(to->fpstate)); -+ err |= copy_from_user_proc(to_fp, from_fp, arch->fpstate_size); -+ } -+ return(err); -+} -+ -+int copy_sc_to_user_tt(void *to_ptr, void *fp, void *from_ptr, void *data) -+{ -+ struct arch_frame_data *arch = data; -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ struct _fpstate *to_fp, *from_fp; -+ int err; -+ -+ to_fp = (struct _fpstate *) -+ (fp ? (unsigned long) fp : ((unsigned long) to + sizeof(*to))); -+ from_fp = from->fpstate; -+ err = copy_to_user_proc(to, from, sizeof(*to)); -+ if(from_fp != NULL){ -+ err |= copy_to_user_proc(&to->fpstate, &to_fp, -+ sizeof(to->fpstate)); -+ err |= copy_to_user_proc(to_fp, from_fp, arch->fpstate_size); -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/time.c um/arch/um/kernel/tt/time.c ---- orig/arch/um/kernel/tt/time.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/time.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include <sys/time.h> -+#include <time_user.h> -+#include "process.h" -+#include "user.h" -+ -+void user_time_init_tt(void) -+{ -+ if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/tlb.c um/arch/um/kernel/tt/tlb.c ---- orig/arch/um/kernel/tt/tlb.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/tlb.c 2003-11-13 00:40:57.000000000 -0500 -@@ -0,0 +1,220 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "os.h" -+ -+static void fix_range(struct mm_struct *mm, unsigned long start_addr, -+ unsigned long end_addr, int force) -+{ -+ pgd_t *npgd; -+ pmd_t *npmd; -+ pte_t *npte; -+ unsigned long addr; -+ int r, w, x, err; -+ -+ if((current->thread.mode.tt.extern_pid != -1) && -+ (current->thread.mode.tt.extern_pid != os_getpid())) -+ panic("fix_range fixing wrong address space, current = 0x%p", -+ current); -+ if(mm == NULL) return; -+ for(addr=start_addr;addr<end_addr;){ -+ if(addr == TASK_SIZE){ -+ /* Skip over kernel text, kernel data, and physical -+ * memory, which don't have ptes, plus kernel virtual -+ * memory, which is flushed separately, and remap -+ * the process stack. The only way to get here is -+ * if (end_addr == STACK_TOP) > TASK_SIZE, which is -+ * only true in the honeypot case. -+ */ -+ addr = STACK_TOP - ABOVE_KMEM; -+ continue; -+ } -+ npgd = pgd_offset(mm, addr); -+ npmd = pmd_offset(npgd, addr); -+ if(pmd_present(*npmd)){ -+ npte = pte_offset(npmd, addr); -+ r = pte_read(*npte); -+ w = pte_write(*npte); -+ x = pte_exec(*npte); -+ if(!pte_dirty(*npte)) w = 0; -+ if(!pte_young(*npte)){ -+ r = 0; -+ w = 0; -+ } -+ if(force || pte_newpage(*npte)){ -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*npte)) -+ map_memory(addr, -+ pte_val(*npte) & PAGE_MASK, -+ PAGE_SIZE, r, w, x); -+ } -+ else if(pte_newprot(*npte)){ -+ protect_memory(addr, PAGE_SIZE, r, w, x, 1); -+ } -+ *npte = pte_mkuptodate(*npte); -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(force || pmd_newpage(*npmd)){ -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ pmd_mkuptodate(*npmd); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+atomic_t vmchange_seq = ATOMIC_INIT(1); -+ -+static void flush_kernel_vm_range(unsigned long start, unsigned long end, -+ int update_seq) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ int updated = 0, err; -+ -+ mm = &init_mm; -+ for(addr = start; addr < end;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte) || pte_newpage(*pte)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*pte)) -+ map_memory(addr, -+ pte_val(*pte) & PAGE_MASK, -+ PAGE_SIZE, 1, 1, 1); -+ } -+ else if(pte_newprot(*pte)){ -+ updated = 1; -+ protect_memory(addr, PAGE_SIZE, 1, 1, 1, 1); -+ } -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(pmd_newpage(*pmd)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+ if(updated && update_seq) atomic_inc(&vmchange_seq); -+} -+ -+static void protect_vm_page(unsigned long addr, int w, int must_succeed) -+{ -+ int err; -+ -+ err = protect_memory(addr, PAGE_SIZE, 1, w, 1, must_succeed); -+ if(err == 0) return; -+ else if((err == -EFAULT) || (err == -ENOMEM)){ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE, 1); -+ protect_vm_page(addr, w, 1); -+ } -+ else panic("protect_vm_page : protect failed, errno = %d\n", err); -+} -+ -+void mprotect_kernel_vm(int w) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ -+ mm = &init_mm; -+ for(addr = start_vm; addr < end_vm;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(pte_present(*pte)) protect_vm_page(addr, w, 0); -+ addr += PAGE_SIZE; -+ } -+ else addr += PMD_SIZE; -+ } -+} -+ -+void flush_tlb_kernel_vm_tt(void) -+{ -+ flush_kernel_vm_range(start_vm, end_vm, 1); -+} -+ -+void __flush_tlb_one_tt(unsigned long addr) -+{ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE, 1); -+} -+ -+void flush_tlb_range_tt(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ if(mm != current->mm) return; -+ -+ /* Assumes that the range start ... end is entirely within -+ * either process memory or kernel vm -+ */ -+ if((start >= start_vm) && (start < end_vm)) -+ flush_kernel_vm_range(start, end, 1); -+ else fix_range(mm, start, end, 0); -+} -+ -+void flush_tlb_mm_tt(struct mm_struct *mm) -+{ -+ unsigned long seq; -+ -+ if(mm != current->mm) return; -+ -+ fix_range(mm, 0, STACK_TOP, 0); -+ -+ seq = atomic_read(&vmchange_seq); -+ if(current->thread.mode.tt.vm_seq == seq) return; -+ current->thread.mode.tt.vm_seq = seq; -+ flush_kernel_vm_range(start_vm, end_vm, 0); -+} -+ -+void force_flush_all_tt(void) -+{ -+ fix_range(current->mm, 0, STACK_TOP, 1); -+ flush_kernel_vm_range(start_vm, end_vm, 0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/tracer.c um/arch/um/kernel/tt/tracer.c ---- orig/arch/um/kernel/tt/tracer.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/tracer.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,454 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <stdarg.h> -+#include <unistd.h> -+#include <signal.h> -+#include <errno.h> -+#include <sched.h> -+#include <string.h> -+#include <sys/mman.h> -+#include <sys/ptrace.h> -+#include <sys/time.h> -+#include <sys/wait.h> -+#include "user.h" -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "sysdep/sigcontext.h" -+#include "os.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "process.h" -+#include "kern_util.h" -+#include "frame.h" -+#include "chan_user.h" -+#include "ptrace_user.h" -+#include "mode.h" -+#include "tt.h" -+ -+static int tracer_winch[2]; -+ -+int is_tracer_winch(int pid, int fd, void *data) -+{ -+ if(pid != tracing_pid) -+ return(0); -+ -+ register_winch_irq(tracer_winch[0], fd, -1, data); -+ return(1); -+} -+ -+static void tracer_winch_handler(int sig) -+{ -+ int n; -+ char c = 1; -+ -+ n = os_write_file(tracer_winch[1], &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("tracer_winch_handler - write failed, err = %d\n", -n); -+} -+ -+/* Called only by the tracing thread during initialization */ -+ -+static void setup_tracer_winch(void) -+{ -+ int err; -+ -+ err = os_pipe(tracer_winch, 1, 1); -+ if(err < 0){ -+ printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err); -+ return; -+ } -+ signal(SIGWINCH, tracer_winch_handler); -+} -+ -+void attach_process(int pid) -+{ -+ if((ptrace(PTRACE_ATTACH, pid, 0, 0) < 0) || -+ (ptrace(PTRACE_CONT, pid, 0, 0) < 0)) -+ tracer_panic("OP_FORK failed to attach pid"); -+ wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL); -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) -+ tracer_panic("OP_FORK failed to continue process"); -+} -+ -+void tracer_panic(char *format, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, format); -+ vprintf(format, ap); -+ printf("\n"); -+ while(1) pause(); -+} -+ -+static void tracer_segv(int sig, struct sigcontext sc) -+{ -+ printf("Tracing thread segfault at address 0x%lx, ip 0x%lx\n", -+ SC_FAULT_ADDR(&sc), SC_IP(&sc)); -+ while(1) -+ pause(); -+} -+ -+/* Changed early in boot, and then only read */ -+int debug = 0; -+int debug_stop = 1; -+int debug_parent = 0; -+int honeypot = 0; -+ -+static int signal_tramp(void *arg) -+{ -+ int (*proc)(void *); -+ -+ if(honeypot && munmap((void *) (host_task_size - 0x10000000), -+ 0x10000000)) -+ panic("Unmapping stack failed"); -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) -+ panic("ptrace PTRACE_TRACEME failed"); -+ os_stop_process(os_getpid()); -+ change_sig(SIGWINCH, 0); -+ signal(SIGUSR1, SIG_IGN); -+ change_sig(SIGCHLD, 0); -+ signal(SIGSEGV, (__sighandler_t) sig_handler); -+ set_cmdline("(idle thread)"); -+ set_init_pid(os_getpid()); -+ proc = arg; -+ return((*proc)(NULL)); -+} -+ -+static void sleeping_process_signal(int pid, int sig) -+{ -+ switch(sig){ -+ /* These two result from UML being ^Z-ed and bg-ed. PTRACE_CONT is -+ * right because the process must be in the kernel already. -+ */ -+ case SIGCONT: -+ case SIGTSTP: -+ if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) -+ tracer_panic("sleeping_process_signal : Failed to " -+ "continue pid %d, signal = %d, " -+ "errno = %d\n", pid, sig, errno); -+ break; -+ -+ /* This happens when the debugger (e.g. strace) is doing system call -+ * tracing on the kernel. During a context switch, the current task -+ * will be set to the incoming process and the outgoing process will -+ * hop into write and then read. Since it's not the current process -+ * any more, the trace of those will land here. So, we need to just -+ * PTRACE_SYSCALL it. -+ */ -+ case SIGTRAP: -+ if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) -+ tracer_panic("sleeping_process_signal : Failed to " -+ "PTRACE_SYSCALL pid %d, errno = %d\n", -+ pid, errno); -+ break; -+ case SIGSTOP: -+ break; -+ default: -+ tracer_panic("sleeping process %d got unexpected " -+ "signal : %d\n", pid, sig); -+ break; -+ } -+} -+ -+/* Accessed only by the tracing thread */ -+int debugger_pid = -1; -+int debugger_parent = -1; -+int debugger_fd = -1; -+int gdb_pid = -1; -+ -+struct { -+ int pid; -+ int signal; -+ unsigned long addr; -+ struct timeval time; -+} signal_record[1024][32]; -+ -+int signal_index[32]; -+int nsignals = 0; -+int debug_trace = 0; -+extern int io_nsignals, io_count, intr_count; -+ -+extern void signal_usr1(int sig); -+ -+int tracing_pid = -1; -+ -+int tracer(int (*init_proc)(void *), void *sp) -+{ -+ void *task = NULL; -+ unsigned long eip = 0; -+ int status, pid = 0, sig = 0, cont_type, tracing = 0, op = 0; -+ int last_index, proc_id = 0, n, err, old_tracing = 0, strace = 0; -+ -+ capture_signal_stack(); -+ signal(SIGPIPE, SIG_IGN); -+ setup_tracer_winch(); -+ tracing_pid = os_getpid(); -+ printf("tracing thread pid = %d\n", tracing_pid); -+ -+ pid = clone(signal_tramp, sp, CLONE_FILES | SIGCHLD, init_proc); -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0){ -+ printf("waitpid on idle thread failed, errno = %d\n", errno); -+ exit(1); -+ } -+ if((ptrace(PTRACE_CONT, pid, 0, 0) < 0)){ -+ printf("Failed to continue idle thread, errno = %d\n", errno); -+ exit(1); -+ } -+ -+ signal(SIGSEGV, (sighandler_t) tracer_segv); -+ signal(SIGUSR1, signal_usr1); -+ if(debug_trace){ -+ printf("Tracing thread pausing to be attached\n"); -+ stop(); -+ } -+ if(debug){ -+ if(gdb_pid != -1) -+ debugger_pid = attach_debugger(pid, gdb_pid, 1); -+ else debugger_pid = init_ptrace_proxy(pid, 1, debug_stop); -+ if(debug_parent){ -+ debugger_parent = os_process_parent(debugger_pid); -+ init_parent_proxy(debugger_parent); -+ err = attach(debugger_parent); -+ if(err){ -+ printf("Failed to attach debugger parent %d, " -+ "errno = %d\n", debugger_parent, -err); -+ debugger_parent = -1; -+ } -+ else { -+ if(ptrace(PTRACE_SYSCALL, debugger_parent, -+ 0, 0) < 0){ -+ printf("Failed to continue debugger " -+ "parent, errno = %d\n", errno); -+ debugger_parent = -1; -+ } -+ } -+ } -+ } -+ set_cmdline("(tracing thread)"); -+ while(1){ -+ pid = waitpid(-1, &status, WUNTRACED); -+ if(pid <= 0){ -+ if(errno != ECHILD){ -+ printf("wait failed - errno = %d\n", errno); -+ } -+ continue; -+ } -+ if(pid == debugger_pid){ -+ int cont = 0; -+ -+ if(WIFEXITED(status) || WIFSIGNALED(status)) -+ debugger_pid = -1; -+ /* XXX Figure out how to deal with gdb and SMP */ -+ else cont = debugger_signal(status, cpu_tasks[0].pid); -+ if(cont == PTRACE_SYSCALL) strace = 1; -+ continue; -+ } -+ else if(pid == debugger_parent){ -+ debugger_parent_signal(status, pid); -+ continue; -+ } -+ nsignals++; -+ if(WIFEXITED(status)) ; -+#ifdef notdef -+ { -+ printf("Child %d exited with status %d\n", pid, -+ WEXITSTATUS(status)); -+ } -+#endif -+ else if(WIFSIGNALED(status)){ -+ sig = WTERMSIG(status); -+ if(sig != 9){ -+ printf("Child %d exited with signal %d\n", pid, -+ sig); -+ } -+ } -+ else if(WIFSTOPPED(status)){ -+ proc_id = pid_to_processor_id(pid); -+ sig = WSTOPSIG(status); -+ if(signal_index[proc_id] == 1024){ -+ signal_index[proc_id] = 0; -+ last_index = 1023; -+ } -+ else last_index = signal_index[proc_id] - 1; -+ if(((sig == SIGPROF) || (sig == SIGVTALRM) || -+ (sig == SIGALRM)) && -+ (signal_record[proc_id][last_index].signal == sig)&& -+ (signal_record[proc_id][last_index].pid == pid)) -+ signal_index[proc_id] = last_index; -+ signal_record[proc_id][signal_index[proc_id]].pid = pid; -+ gettimeofday(&signal_record[proc_id][signal_index[proc_id]].time, NULL); -+ eip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); -+ signal_record[proc_id][signal_index[proc_id]].addr = eip; -+ signal_record[proc_id][signal_index[proc_id]++].signal = sig; -+ -+ if(proc_id == -1){ -+ sleeping_process_signal(pid, sig); -+ continue; -+ } -+ -+ task = cpu_tasks[proc_id].task; -+ tracing = is_tracing(task); -+ old_tracing = tracing; -+ -+ switch(sig){ -+ case SIGUSR1: -+ sig = 0; -+ op = do_proc_op(task, proc_id); -+ switch(op){ -+ case OP_TRACE_ON: -+ arch_leave_kernel(task, pid); -+ tracing = 1; -+ break; -+ case OP_REBOOT: -+ case OP_HALT: -+ unmap_physmem(); -+ kmalloc_ok = 0; -+ ptrace(PTRACE_KILL, pid, 0, 0); -+ return(op == OP_REBOOT); -+ case OP_NONE: -+ printf("Detaching pid %d\n", pid); -+ detach(pid, SIGSTOP); -+ continue; -+ default: -+ break; -+ } -+ /* OP_EXEC switches host processes on us, -+ * we want to continue the new one. -+ */ -+ pid = cpu_tasks[proc_id].pid; -+ break; -+ case SIGTRAP: -+ if(!tracing && (debugger_pid != -1)){ -+ child_signal(pid, status); -+ continue; -+ } -+ tracing = 0; -+ if(do_syscall(task, pid)) sig = SIGUSR2; -+ else clear_singlestep(task); -+ break; -+ case SIGPROF: -+ if(tracing) sig = 0; -+ break; -+ case SIGCHLD: -+ case SIGHUP: -+ sig = 0; -+ break; -+ case SIGSEGV: -+ case SIGIO: -+ case SIGALRM: -+ case SIGVTALRM: -+ case SIGFPE: -+ case SIGBUS: -+ case SIGILL: -+ case SIGWINCH: -+ default: -+ tracing = 0; -+ break; -+ } -+ set_tracing(task, tracing); -+ -+ if(!tracing && old_tracing) -+ arch_enter_kernel(task, pid); -+ -+ if(!tracing && (debugger_pid != -1) && (sig != 0) && -+ (sig != SIGALRM) && (sig != SIGVTALRM) && -+ (sig != SIGSEGV) && (sig != SIGTRAP) && -+ (sig != SIGUSR2) && (sig != SIGIO) && -+ (sig != SIGFPE)){ -+ child_signal(pid, status); -+ continue; -+ } -+ -+ if(tracing){ -+ if(singlestepping_tt(task)) -+ cont_type = PTRACE_SINGLESTEP; -+ else cont_type = PTRACE_SYSCALL; -+ } -+ else cont_type = PTRACE_CONT; -+ -+ if((cont_type == PTRACE_CONT) && -+ (debugger_pid != -1) && strace) -+ cont_type = PTRACE_SYSCALL; -+ -+ if(ptrace(cont_type, pid, 0, sig) != 0){ -+ tracer_panic("ptrace failed to continue " -+ "process - errno = %d\n", -+ errno); -+ } -+ } -+ } -+ return(0); -+} -+ -+static int __init uml_debug_setup(char *line, int *add) -+{ -+ char *next; -+ -+ debug = 1; -+ *add = 0; -+ if(*line != '=') return(0); -+ line++; -+ -+ while(line != NULL){ -+ next = strchr(line, ','); -+ if(next) *next++ = '\0'; -+ -+ if(!strcmp(line, "go")) debug_stop = 0; -+ else if(!strcmp(line, "parent")) debug_parent = 1; -+ else printf("Unknown debug option : '%s'\n", line); -+ -+ line = next; -+ } -+ return(0); -+} -+ -+__uml_setup("debug", uml_debug_setup, -+"debug\n" -+" Starts up the kernel under the control of gdb. See the \n" -+" kernel debugging tutorial and the debugging session pages\n" -+" at http://user-mode-linux.sourceforge.net/ for more information.\n\n" -+); -+ -+static int __init uml_debugtrace_setup(char *line, int *add) -+{ -+ debug_trace = 1; -+ return 0; -+} -+__uml_setup("debugtrace", uml_debugtrace_setup, -+"debugtrace\n" -+" Causes the tracing thread to pause until it is attached by a\n" -+" debugger and continued. This is mostly for debugging crashes\n" -+" early during boot, and should be pretty much obsoleted by\n" -+" the debug switch.\n\n" -+); -+ -+static int __init uml_honeypot_setup(char *line, int *add) -+{ -+ jail_setup("", add); -+ honeypot = 1; -+ return 0; -+} -+__uml_setup("honeypot", uml_honeypot_setup, -+"honeypot\n" -+" This makes UML put process stacks in the same location as they are\n" -+" on the host, allowing expoits such as stack smashes to work against\n" -+" UML. This implies 'jail'.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/trap_user.c um/arch/um/kernel/tt/trap_user.c ---- orig/arch/um/kernel/tt/trap_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/trap_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <signal.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "task.h" -+#include "tt.h" -+ -+void sig_handler_common_tt(int sig, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct tt_regs save_regs, *r; -+ struct signal_info *info; -+ int save_errno = errno, is_user; -+ -+ unprotect_kernel_mem(); -+ -+ r = &TASK_REGS(get_current())->tt; -+ save_regs = *r; -+ is_user = user_context(SC_SP(sc)); -+ r->sc = sc; -+ if(sig != SIGUSR2) -+ r->syscall = -1; -+ -+ change_sig(SIGUSR1, 1); -+ info = &sig_info[sig]; -+ if(!info->is_irq) unblock_signals(); -+ -+ (*info->handler)(sig, (union uml_pt_regs *) r); -+ -+ if(is_user){ -+ interrupt_end(); -+ block_signals(); -+ change_sig(SIGUSR1, 0); -+ set_user_mode(NULL); -+ } -+ *r = save_regs; -+ errno = save_errno; -+ if(is_user) protect_kernel_mem(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/uaccess.c um/arch/um/kernel/tt/uaccess.c ---- orig/arch/um/kernel/tt/uaccess.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/uaccess.c 2003-11-13 00:12:45.000000000 -0500 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "asm/uaccess.h" -+ -+int copy_from_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_READ, from, n)) -+ return(n); -+ -+ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int copy_to_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, to, n)) -+ return(n); -+ -+ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int strncpy_from_user_tt(char *dst, const char *src, int count) -+{ -+ int n; -+ -+ if(!access_ok_tt(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = __do_strncpy_from_user(dst, src, count, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher); -+ if(n < 0) return(-EFAULT); -+ return(n); -+} -+ -+int __clear_user_tt(void *mem, int len) -+{ -+ return(__do_clear_user(mem, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int clear_user_tt(void *mem, int len) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, mem, len)) -+ return(len); -+ -+ return(__do_clear_user(mem, len, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int strnlen_user_tt(const void *str, int len) -+{ -+ return(__do_strnlen_user(str, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/uaccess_user.c um/arch/um/kernel/tt/uaccess_user.c ---- orig/arch/um/kernel/tt/uaccess_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/uaccess_user.c 2004-01-31 02:48:29.000000000 -0500 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <setjmp.h> -+#include <string.h> -+#include "user_util.h" -+#include "uml_uaccess.h" -+#include "task.h" -+#include "kern_util.h" -+ -+int __do_copy_from_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, -+ __do_copy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(0); -+ else return(n - (fault - (unsigned long) from)); -+} -+ -+static void __do_strncpy(void *dst, const void *src, int count) -+{ -+ strncpy(dst, src, count); -+} -+ -+int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, -+ __do_strncpy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(strlen(dst)); -+ else return(-1); -+} -+ -+static void __do_clear(void *to, const void *from, int n) -+{ -+ memset(to, 0, n); -+} -+ -+int __do_clear_user(void *mem, unsigned long len, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, -+ __do_clear, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(0); -+ else return(len - (fault - (unsigned long) mem)); -+} -+ -+int __do_strnlen_user(const char *str, unsigned long n, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ int ret; -+ unsigned long *faddrp = (unsigned long *)fault_addr; -+ sigjmp_buf jbuf; -+ -+ *fault_catcher = &jbuf; -+ if(sigsetjmp(jbuf, 1) == 0) -+ ret = strlen(str) + 1; -+ else ret = *faddrp - (unsigned long) str; -+ -+ *fault_addr = NULL; -+ *fault_catcher = NULL; -+ -+ TASK_REGS(get_current())->tt = save; -+ return ret; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/unmap.c um/arch/um/kernel/tt/unmap.c ---- orig/arch/um/kernel/tt/unmap.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tt/unmap.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <sys/mman.h> -+ -+int switcheroo(int fd, int prot, void *from, void *to, int size) -+{ -+ if(munmap(to, size) < 0){ -+ return(-1); -+ } -+ if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){ -+ return(-1); -+ } -+ if(munmap(from, size) < 0){ -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tty_log.c um/arch/um/kernel/tty_log.c ---- orig/arch/um/kernel/tty_log.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/tty_log.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,228 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) and -+ * geoffrey hing <ghing@net.ohio-state.edu> -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <string.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <sys/time.h> -+#include "init.h" -+#include "user.h" -+#include "kern_util.h" -+#include "os.h" -+ -+#define TTY_LOG_DIR "./" -+ -+/* Set early in boot and then unchanged */ -+static char *tty_log_dir = TTY_LOG_DIR; -+static int tty_log_fd = -1; -+ -+#define TTY_LOG_OPEN 1 -+#define TTY_LOG_CLOSE 2 -+#define TTY_LOG_WRITE 3 -+#define TTY_LOG_EXEC 4 -+ -+#define TTY_READ 1 -+#define TTY_WRITE 2 -+ -+struct tty_log_buf { -+ int what; -+ unsigned long tty; -+ int len; -+ int direction; -+ unsigned long sec; -+ unsigned long usec; -+}; -+ -+int open_tty_log(void *tty, void *current_tty) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; -+ int fd; -+ -+ gettimeofday(&tv, NULL); -+ if(tty_log_fd != -1){ -+ data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, -+ .tty = (unsigned long) tty, -+ .len = sizeof(current_tty), -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ os_write_file(tty_log_fd, ¤t_tty, data.len); -+ return(tty_log_fd); -+ } -+ -+ sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, -+ (unsigned int) tv.tv_usec); -+ -+ fd = os_open_file(buf, of_append(of_create(of_rdwr(OPENFLAGS()))), -+ 0644); -+ if(fd < 0){ -+ printk("open_tty_log : couldn't open '%s', errno = %d\n", -+ buf, -fd); -+ } -+ return(fd); -+} -+ -+void close_tty_log(int fd, void *tty) -+{ -+ struct tty_log_buf data; -+ struct timeval tv; -+ -+ if(tty_log_fd != -1){ -+ gettimeofday(&tv, NULL); -+ data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, -+ .tty = (unsigned long) tty, -+ .len = 0, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ return; -+ } -+ os_close_file(fd); -+} -+ -+static int log_chunk(int fd, const char *buf, int len) -+{ -+ int total = 0, try, missed, n; -+ char chunk[64]; -+ -+ while(len > 0){ -+ try = (len > sizeof(chunk)) ? sizeof(chunk) : len; -+ missed = copy_from_user_proc(chunk, (char *) buf, try); -+ try -= missed; -+ n = os_write_file(fd, chunk, try); -+ if(n != try) { -+ if(n < 0) -+ return(n); -+ return(-EIO); -+ } -+ if(missed != 0) -+ return(-EFAULT); -+ -+ len -= try; -+ total += try; -+ buf += try; -+ } -+ -+ return(total); -+} -+ -+int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ int direction; -+ -+ if(fd == tty_log_fd){ -+ gettimeofday(&tv, NULL); -+ direction = is_read ? TTY_READ : TTY_WRITE; -+ data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = direction, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ } -+ -+ return(log_chunk(fd, buf, len)); -+} -+ -+void log_exec(char **argv, void *tty) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ char **ptr,*arg; -+ int len; -+ -+ if(tty_log_fd == -1) return; -+ -+ gettimeofday(&tv, NULL); -+ -+ len = 0; -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ len += strlen_user_proc(arg); -+ } -+ -+ data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); -+ } -+} -+ -+extern void register_tty_logger(int (*opener)(void *, void *), -+ int (*writer)(int, const char *, int, -+ void *, int), -+ void (*closer)(int, void *)); -+ -+static int register_logger(void) -+{ -+ register_tty_logger(open_tty_log, write_tty_log, close_tty_log); -+ return(0); -+} -+ -+__uml_initcall(register_logger); -+ -+static int __init set_tty_log_dir(char *name, int *add) -+{ -+ tty_log_dir = name; -+ return 0; -+} -+ -+__uml_setup("tty_log_dir=", set_tty_log_dir, -+"tty_log_dir=<directory>\n" -+" This is used to specify the directory where the logs of all pty\n" -+" data from this UML machine will be written.\n\n" -+); -+ -+static int __init set_tty_log_fd(char *name, int *add) -+{ -+ char *end; -+ -+ tty_log_fd = strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ printf("set_tty_log_fd - strtoul failed on '%s'\n", name); -+ tty_log_fd = -1; -+ } -+ return 0; -+} -+ -+__uml_setup("tty_log_fd=", set_tty_log_fd, -+"tty_log_fd=<fd>\n" -+" This is used to specify a preconfigured file descriptor to which all\n" -+" tty data will be written. Preconfigure the descriptor with something\n" -+" like '10>tty_log tty_log_fd=10'.\n\n" -+); -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/uaccess_user.c um/arch/um/kernel/uaccess_user.c ---- orig/arch/um/kernel/uaccess_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/uaccess_user.c 2004-01-31 02:48:08.000000000 -0500 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <setjmp.h> -+#include <string.h> -+ -+/* These are here rather than tt/uaccess.c because skas mode needs them in -+ * order to do SIGBUS recovery when a tmpfs mount runs out of room. -+ */ -+ -+unsigned long __do_user_copy(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher, -+ void (*op)(void *to, const void *from, -+ int n), int *faulted_out) -+{ -+ unsigned long *faddrp = (unsigned long *) fault_addr, ret; -+ -+ sigjmp_buf jbuf; -+ *fault_catcher = &jbuf; -+ if(sigsetjmp(jbuf, 1) == 0){ -+ (*op)(to, from, n); -+ ret = 0; -+ *faulted_out = 0; -+ } -+ else { -+ ret = *faddrp; -+ *faulted_out = 1; -+ } -+ *fault_addr = NULL; -+ *fault_catcher = NULL; -+ return ret; -+} -+ -+void __do_copy(void *to, const void *from, int n) -+{ -+ memcpy(to, from, n); -+} -+ -+ -+int __do_copy_to_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher) -+{ -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, -+ __do_copy, &faulted); -+ if(!faulted) return(0); -+ else return(n - (fault - (unsigned long) to)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/um_arch.c um/arch/um/kernel/um_arch.c ---- orig/arch/um/kernel/um_arch.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/um_arch.c 2004-01-10 00:11:09.000000000 -0500 -@@ -0,0 +1,431 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/notifier.h" -+#include "linux/mm.h" -+#include "linux/types.h" -+#include "linux/tty.h" -+#include "linux/init.h" -+#include "linux/bootmem.h" -+#include "linux/spinlock.h" -+#include "linux/utsname.h" -+#include "linux/sysrq.h" -+#include "linux/seq_file.h" -+#include "linux/delay.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/ptrace.h" -+#include "asm/elf.h" -+#include "asm/user.h" -+#include "ubd_user.h" -+#include "asm/current.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mprot.h" -+#include "mem_user.h" -+#include "mem.h" -+#include "umid.h" -+#include "initrd.h" -+#include "init.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+#include "mode.h" -+ -+#define DEFAULT_COMMAND_LINE "root=/dev/ubd0" -+ -+struct cpuinfo_um boot_cpu_data = { -+ .loops_per_jiffy = 0, -+ .pgd_quick = NULL, -+ .pmd_quick = NULL, -+ .pte_quick = NULL, -+ .pgtable_cache_sz = 0, -+ .ipi_pipe = { -1, -1 } -+}; -+ -+unsigned long thread_saved_pc(struct thread_struct *thread) -+{ -+ return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas, -+ thread))); -+} -+ -+static int show_cpuinfo(struct seq_file *m, void *v) -+{ -+ int index = 0; -+ -+#ifdef CONFIG_SMP -+ index = (struct cpuinfo_um *)v - cpu_data; -+ if (!(cpu_online_map & (1 << index))) -+ return 0; -+#endif -+ -+ seq_printf(m, "processor\t: %d\n", index); -+ seq_printf(m, "vendor_id\t: User Mode Linux\n"); -+ seq_printf(m, "model name\t: UML\n"); -+ seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas")); -+ seq_printf(m, "host\t\t: %s\n", host_info); -+ seq_printf(m, "bogomips\t: %lu.%02lu\n\n", -+ loops_per_jiffy/(500000/HZ), -+ (loops_per_jiffy/(5000/HZ)) % 100); -+ -+ return(0); -+} -+ -+static void *c_start(struct seq_file *m, loff_t *pos) -+{ -+ return *pos < NR_CPUS ? cpu_data + *pos : NULL; -+} -+ -+static void *c_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return c_start(m, pos); -+} -+ -+static void c_stop(struct seq_file *m, void *v) -+{ -+} -+ -+struct seq_operations cpuinfo_op = { -+ .start = c_start, -+ .next = c_next, -+ .stop = c_stop, -+ .show = show_cpuinfo, -+}; -+ -+pte_t * __bad_pagetable(void) -+{ -+ panic("Someone should implement __bad_pagetable"); -+ return(NULL); -+} -+ -+/* Set in linux_main */ -+unsigned long host_task_size; -+unsigned long task_size; -+unsigned long uml_start; -+ -+/* Set in early boot */ -+unsigned long uml_physmem; -+unsigned long uml_reserved; -+unsigned long start_vm; -+unsigned long end_vm; -+int ncpus = 1; -+ -+#ifdef CONFIG_MODE_TT -+/* Pointer set in linux_main, the array itself is private to each thread, -+ * and changed at address space creation time so this poses no concurrency -+ * problems. -+ */ -+static char *argv1_begin = NULL; -+static char *argv1_end = NULL; -+#endif -+ -+/* Set in early boot */ -+static int have_root __initdata = 0; -+long physmem_size = 32 * 1024 * 1024; -+ -+void set_cmdline(char *cmd) -+{ -+#ifdef CONFIG_MODE_TT -+ char *umid, *ptr; -+ -+ if(CHOOSE_MODE(honeypot, 0)) return; -+ -+ umid = get_umid(1); -+ if(umid != NULL){ -+ snprintf(argv1_begin, -+ (argv1_end - argv1_begin) * sizeof(*ptr), -+ "(%s) ", umid); -+ ptr = &argv1_begin[strlen(argv1_begin)]; -+ } -+ else ptr = argv1_begin; -+ -+ snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd); -+ memset(argv1_begin + strlen(argv1_begin), '\0', -+ argv1_end - argv1_begin - strlen(argv1_begin)); -+#endif -+} -+ -+static char *usage_string = -+"User Mode Linux v%s\n" -+" available at http://user-mode-linux.sourceforge.net/\n\n"; -+ -+static int __init uml_version_setup(char *line, int *add) -+{ -+ printf("%s\n", system_utsname.release); -+ exit(0); -+} -+ -+__uml_setup("--version", uml_version_setup, -+"--version\n" -+" Prints the version number of the kernel.\n\n" -+); -+ -+static int __init uml_root_setup(char *line, int *add) -+{ -+ have_root = 1; -+ return 0; -+} -+ -+__uml_setup("root=", uml_root_setup, -+"root=<file containing the root fs>\n" -+" This is actually used by the generic kernel in exactly the same\n" -+" way as in any other kernel. If you configure a number of block\n" -+" devices and want to boot off something other than ubd0, you \n" -+" would use something like:\n" -+" root=/dev/ubd5\n\n" -+); -+ -+#ifdef CONFIG_SMP -+static int __init uml_ncpus_setup(char *line, int *add) -+{ -+ if (!sscanf(line, "%d", &ncpus)) { -+ printf("Couldn't parse [%s]\n", line); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+__uml_setup("ncpus=", uml_ncpus_setup, -+"ncpus=<# of desired CPUs>\n" -+" This tells an SMP kernel how many virtual processors to start.\n\n" -+); -+#endif -+ -+int force_tt = 0; -+ -+#if defined(CONFIG_MODE_TT) && defined(CONFIG_MODE_SKAS) -+#define DEFAULT_TT 0 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ force_tt = 1; -+ return(0); -+} -+ -+#else -+#ifdef CONFIG_MODE_SKAS -+ -+#define DEFAULT_TT 0 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); -+ return(0); -+} -+ -+#else -+#ifdef CONFIG_MODE_TT -+ -+#define DEFAULT_TT 1 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); -+ return(0); -+} -+ -+#else -+ -+#error Either CONFIG_MODE_TT or CONFIG_MODE_SKAS must be enabled -+ -+#endif -+#endif -+#endif -+ -+__uml_setup("mode=tt", mode_tt_setup, -+"mode=tt\n" -+" When both CONFIG_MODE_TT and CONFIG_MODE_SKAS are enabled, this option\n" -+" forces UML to run in tt (tracing thread) mode. It is not the default\n" -+" because it's slower and less secure than skas mode.\n\n" -+); -+ -+int mode_tt = DEFAULT_TT; -+ -+static int __init Usage(char *line, int *add) -+{ -+ const char **p; -+ -+ printf(usage_string, system_utsname.release); -+ p = &__uml_help_start; -+ while (p < &__uml_help_end) { -+ printf("%s", *p); -+ p++; -+ } -+ exit(0); -+} -+ -+__uml_setup("--help", Usage, -+"--help\n" -+" Prints this message.\n\n" -+); -+ -+static int __init uml_checksetup(char *line, int *add) -+{ -+ struct uml_param *p; -+ -+ p = &__uml_setup_start; -+ while(p < &__uml_setup_end) { -+ int n; -+ -+ n = strlen(p->str); -+ if(!strncmp(line, p->str, n)){ -+ if (p->setup_func(line + n, add)) return 1; -+ } -+ p++; -+ } -+ return 0; -+} -+ -+static void __init uml_postsetup(void) -+{ -+ initcall_t *p; -+ -+ p = &__uml_postsetup_start; -+ while(p < &__uml_postsetup_end){ -+ (*p)(); -+ p++; -+ } -+ return; -+} -+ -+/* Set during early boot */ -+unsigned long brk_start; -+unsigned long end_iomem; -+ -+#define MIN_VMALLOC (32 * 1024 * 1024) -+ -+int linux_main(int argc, char **argv) -+{ -+ unsigned long avail; -+ unsigned long virtmem_size, max_physmem; -+ unsigned int i, add; -+ -+ for (i = 1; i < argc; i++){ -+ if((i == 1) && (argv[i][0] == ' ')) continue; -+ add = 1; -+ uml_checksetup(argv[i], &add); -+ if(add) add_arg(saved_command_line, argv[i]); -+ } -+ if(have_root == 0) add_arg(saved_command_line, DEFAULT_COMMAND_LINE); -+ -+ mode_tt = force_tt ? 1 : !can_do_skas(); -+ uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0, -+ &host_task_size, &task_size); -+ -+ brk_start = (unsigned long) sbrk(0); -+ CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start); -+ -+ uml_physmem = uml_start; -+ -+ /* Reserve up to 4M after the current brk */ -+ uml_reserved = ROUND_4M(brk_start) + (1 << 22); -+ -+ setup_machinename(system_utsname.machine); -+ -+#ifdef CONFIG_MODE_TT -+ argv1_begin = argv[1]; -+ argv1_end = &argv[1][strlen(argv[1])]; -+#endif -+ -+ highmem = 0; -+ iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; -+ max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; -+ -+ /* Zones have to begin on a 1 << MAX_ORDER page boundary, -+ * so this makes sure that's true for highmem -+ */ -+ max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); -+ if(physmem_size + iomem_size > max_physmem){ -+ highmem = physmem_size + iomem_size - max_physmem; -+ physmem_size -= highmem; -+#ifndef CONFIG_HIGHMEM -+ highmem = 0; -+ printf("CONFIG_HIGHMEM not enabled - physical memory shrunk " -+ "to %ld bytes\n", physmem_size); -+#endif -+ } -+ -+ high_physmem = uml_physmem + physmem_size; -+ end_iomem = high_physmem + iomem_size; -+ high_memory = (void *) end_iomem; -+ -+ start_vm = VMALLOC_START; -+ -+ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); -+ if(init_maps(physmem_size, iomem_size, highmem)){ -+ printf("Failed to allocate mem_map for %ld bytes of physical " -+ "memory and %ld bytes of highmem\n", physmem_size, -+ highmem); -+ exit(1); -+ } -+ -+ virtmem_size = physmem_size; -+ avail = get_kmem_end() - start_vm; -+ if(physmem_size > avail) virtmem_size = avail; -+ end_vm = start_vm + virtmem_size; -+ -+ if(virtmem_size < physmem_size) -+ printf("Kernel virtual memory size shrunk to %ld bytes\n", -+ virtmem_size); -+ -+ uml_postsetup(); -+ -+ init_task.thread.kernel_stack = (unsigned long) &init_task + -+ 2 * PAGE_SIZE; -+ -+ task_protections((unsigned long) &init_task); -+ os_flush_stdout(); -+ -+ return(CHOOSE_MODE(start_uml_tt(), start_uml_skas())); -+} -+ -+static int panic_exit(struct notifier_block *self, unsigned long unused1, -+ void *unused2) -+{ -+#ifdef CONFIG_SYSRQ -+ handle_sysrq('p', ¤t->thread.regs, NULL, NULL); -+#endif -+ machine_halt(); -+ return(0); -+} -+ -+static struct notifier_block panic_exit_notifier = { -+ .notifier_call = panic_exit, -+ .next = NULL, -+ .priority = 0 -+}; -+ -+void __init setup_arch(char **cmdline_p) -+{ -+ notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); -+ paging_init(); -+ strcpy(command_line, saved_command_line); -+ *cmdline_p = command_line; -+ setup_hostinfo(); -+} -+ -+void __init check_bugs(void) -+{ -+ arch_check_bugs(); -+ check_ptrace(); -+ check_sigio(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/umid.c um/arch/um/kernel/umid.c ---- orig/arch/um/kernel/umid.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/umid.c 2004-01-26 10:13:01.000000000 -0500 -@@ -0,0 +1,327 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <errno.h> -+#include <string.h> -+#include <stdlib.h> -+#include <dirent.h> -+#include <signal.h> -+#include <sys/stat.h> -+#include <sys/param.h> -+#include "user.h" -+#include "umid.h" -+#include "init.h" -+#include "os.h" -+#include "user_util.h" -+#include "choose-mode.h" -+ -+#define UMID_LEN 64 -+#define UML_DIR "~/.uml/" -+ -+/* Changed by set_umid and make_umid, which are run early in boot */ -+static char umid[UMID_LEN] = { 0 }; -+ -+/* Changed by set_uml_dir and make_uml_dir, which are run early in boot */ -+static char *uml_dir = UML_DIR; -+ -+/* Changed by set_umid */ -+static int umid_is_random = 1; -+static int umid_inited = 0; -+ -+static int make_umid(int (*printer)(const char *fmt, ...)); -+ -+static int __init set_umid(char *name, int is_random, -+ int (*printer)(const char *fmt, ...)) -+{ -+ if(umid_inited){ -+ (*printer)("Unique machine name can't be set twice\n"); -+ return(-1); -+ } -+ -+ if(strlen(name) > UMID_LEN - 1) -+ (*printer)("Unique machine name is being truncated to %s " -+ "characters\n", UMID_LEN); -+ strncpy(umid, name, UMID_LEN - 1); -+ umid[UMID_LEN - 1] = '\0'; -+ -+ umid_is_random = is_random; -+ umid_inited = 1; -+ return 0; -+} -+ -+static int __init set_umid_arg(char *name, int *add) -+{ -+ return(set_umid(name, 0, printf)); -+} -+ -+__uml_setup("umid=", set_umid_arg, -+"umid=<name>\n" -+" This is used to assign a unique identity to this UML machine and\n" -+" is used for naming the pid file and management console socket.\n\n" -+); -+ -+int __init umid_file_name(char *name, char *buf, int len) -+{ -+ int n; -+ -+ if(!umid_inited && make_umid(printk)) return(-1); -+ -+ n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; -+ if(n > len){ -+ printk("umid_file_name : buffer too short\n"); -+ return(-1); -+ } -+ -+ sprintf(buf, "%s%s/%s", uml_dir, umid, name); -+ return(0); -+} -+ -+extern int tracing_pid; -+ -+static int __init create_pid_file(void) -+{ -+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; -+ char pid[sizeof("nnnnn\0")]; -+ int fd, n; -+ -+ if(umid_file_name("pid", file, sizeof(file))) return 0; -+ -+ fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), -+ 0644); -+ if(fd < 0){ -+ printf("Open of machine pid file \"%s\" failed - " -+ "err = %d\n", file, -fd); -+ return 0; -+ } -+ -+ sprintf(pid, "%d\n", os_getpid()); -+ n = os_write_file(fd, pid, strlen(pid)); -+ if(n != strlen(pid)) -+ printf("Write of pid file failed - err = %d\n", -n); -+ os_close_file(fd); -+ return 0; -+} -+ -+static int actually_do_remove(char *dir) -+{ -+ DIR *directory; -+ struct dirent *ent; -+ int len; -+ char file[256]; -+ -+ directory = opendir(dir); -+ if(directory == NULL){ -+ printk("actually_do_remove : couldn't open directory '%s', " -+ "errno = %d\n", dir, errno); -+ return(1); -+ } -+ while((ent = readdir(directory)) != NULL){ -+ if(!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) -+ continue; -+ len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1; -+ if(len > sizeof(file)){ -+ printk("Not deleting '%s' from '%s' - name too long\n", -+ ent->d_name, dir); -+ continue; -+ } -+ sprintf(file, "%s/%s", dir, ent->d_name); -+ if(unlink(file) < 0){ -+ printk("actually_do_remove : couldn't remove '%s' " -+ "from '%s', errno = %d\n", ent->d_name, dir, -+ errno); -+ return(1); -+ } -+ } -+ if(rmdir(dir) < 0){ -+ printk("actually_do_remove : couldn't rmdir '%s', " -+ "errno = %d\n", dir, errno); -+ return(1); -+ } -+ return(0); -+} -+ -+void remove_umid_dir(void) -+{ -+ char dir[strlen(uml_dir) + UMID_LEN + 1]; -+ if(!umid_inited) return; -+ -+ sprintf(dir, "%s%s", uml_dir, umid); -+ actually_do_remove(dir); -+} -+ -+char *get_umid(int only_if_set) -+{ -+ if(only_if_set && umid_is_random) return(NULL); -+ return(umid); -+} -+ -+int not_dead_yet(char *dir) -+{ -+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; -+ char pid[sizeof("nnnnn\0")], *end; -+ int dead, fd, p, n; -+ -+ sprintf(file, "%s/pid", dir); -+ dead = 0; -+ fd = os_open_file(file, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ if(fd != -ENOENT){ -+ printk("not_dead_yet : couldn't open pid file '%s', " -+ "err = %d\n", file, -fd); -+ return(1); -+ } -+ dead = 1; -+ } -+ if(fd > 0){ -+ n = os_read_file(fd, pid, sizeof(pid)); -+ if(n < 0){ -+ printk("not_dead_yet : couldn't read pid file '%s', " -+ "err = %d\n", file, -n); -+ return(1); -+ } -+ p = strtoul(pid, &end, 0); -+ if(end == pid){ -+ printk("not_dead_yet : couldn't parse pid file '%s', " -+ "errno = %d\n", file, errno); -+ dead = 1; -+ } -+ if(((kill(p, 0) < 0) && (errno == ESRCH)) || -+ (p == CHOOSE_MODE(tracing_pid, os_getpid()))) -+ dead = 1; -+ } -+ if(!dead) return(1); -+ return(actually_do_remove(dir)); -+} -+ -+static int __init set_uml_dir(char *name, int *add) -+{ -+ if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ -+ uml_dir = malloc(strlen(name) + 1); -+ if(uml_dir == NULL){ -+ printf("Failed to malloc uml_dir - error = %d\n", -+ errno); -+ uml_dir = name; -+ /* Return 0 here because do_initcalls doesn't look at -+ * the return value. -+ */ -+ return(0); -+ } -+ sprintf(uml_dir, "%s/", name); -+ } -+ else uml_dir = name; -+ return(0); -+} -+ -+static int __init make_uml_dir(void) -+{ -+ char dir[MAXPATHLEN + 1] = { '\0' }; -+ int len; -+ -+ if(*uml_dir == '~'){ -+ char *home = getenv("HOME"); -+ -+ if(home == NULL){ -+ printf("make_uml_dir : no value in environment for " -+ "$HOME\n"); -+ exit(1); -+ } -+ strncpy(dir, home, sizeof(dir)); -+ uml_dir++; -+ } -+ len = strlen(dir); -+ strncat(dir, uml_dir, sizeof(dir) - len); -+ len = strlen(dir); -+ if((len > 0) && (len < sizeof(dir) - 1) && (dir[len - 1] != '/')){ -+ dir[len] = '/'; -+ dir[len + 1] = '\0'; -+ } -+ -+ uml_dir = malloc(strlen(dir) + 1); -+ if(uml_dir == NULL){ -+ printf("make_uml_dir : malloc failed, errno = %d\n", errno); -+ exit(1); -+ } -+ strcpy(uml_dir, dir); -+ -+ if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ -+ printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno); -+ return(-1); -+ } -+ return 0; -+} -+ -+static int __init make_umid(int (*printer)(const char *fmt, ...)) -+{ -+ int fd, err; -+ char tmp[strlen(uml_dir) + UMID_LEN + 1]; -+ -+ strncpy(tmp, uml_dir, sizeof(tmp) - 1); -+ tmp[sizeof(tmp) - 1] = '\0'; -+ -+ if(!umid_inited){ -+ strcat(tmp, "XXXXXX"); -+ fd = mkstemp(tmp); -+ if(fd < 0){ -+ (*printer)("make_umid - mkstemp failed, errno = %d\n", -+ errno); -+ return(1); -+ } -+ -+ os_close_file(fd); -+ /* There's a nice tiny little race between this unlink and -+ * the mkdir below. It'd be nice if there were a mkstemp -+ * for directories. -+ */ -+ unlink(tmp); -+ set_umid(&tmp[strlen(uml_dir)], 1, printer); -+ } -+ -+ sprintf(tmp, "%s%s", uml_dir, umid); -+ -+ err = mkdir(tmp, 0777); -+ if(err < 0){ -+ if(errno == EEXIST){ -+ if(not_dead_yet(tmp)){ -+ (*printer)("umid '%s' is in use\n", umid); -+ return(-1); -+ } -+ err = mkdir(tmp, 0777); -+ } -+ } -+ if(err < 0){ -+ (*printer)("Failed to create %s - errno = %d\n", umid, errno); -+ return(-1); -+ } -+ -+ return(0); -+} -+ -+__uml_setup("uml_dir=", set_uml_dir, -+"uml_dir=<directory>\n" -+" The location to place the pid and umid files.\n\n" -+); -+ -+__uml_postsetup(make_uml_dir); -+ -+static int __init make_umid_setup(void) -+{ -+ return(make_umid(printf)); -+} -+ -+__uml_postsetup(make_umid_setup); -+__uml_postsetup(create_pid_file); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/user_syms.c um/arch/um/kernel/user_syms.c ---- orig/arch/um/kernel/user_syms.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/user_syms.c 2004-01-23 00:44:09.000000000 -0500 -@@ -0,0 +1,119 @@ -+#include <stdio.h> -+#include <unistd.h> -+#include <dirent.h> -+#include <fcntl.h> -+#include <errno.h> -+#include <utime.h> -+#include <string.h> -+#include <sys/stat.h> -+#include <sys/vfs.h> -+#include <sys/ioctl.h> -+#include "user_util.h" -+#include "mem_user.h" -+ -+/* XXX All the __CONFIG_* stuff is broken because this file can't include -+ * config.h -+ */ -+ -+/* Had to steal this from linux/module.h because that file can't be included -+ * since this includes various user-level headers. -+ */ -+ -+struct module_symbol -+{ -+ unsigned long value; -+ const char *name; -+}; -+ -+/* Indirect stringification. */ -+ -+#define __MODULE_STRING_1(x) #x -+#define __MODULE_STRING(x) __MODULE_STRING_1(x) -+ -+#if !defined(__AUTOCONF_INCLUDED__) -+ -+#define __EXPORT_SYMBOL(sym,str) error config_must_be_included_before_module -+#define EXPORT_SYMBOL(var) error config_must_be_included_before_module -+#define EXPORT_SYMBOL_NOVERS(var) error config_must_be_included_before_module -+ -+#elif !defined(__CONFIG_MODULES__) -+ -+#define __EXPORT_SYMBOL(sym,str) -+#define EXPORT_SYMBOL(var) -+#define EXPORT_SYMBOL_NOVERS(var) -+ -+#else -+ -+#define __EXPORT_SYMBOL(sym, str) \ -+const char __kstrtab_##sym[] \ -+__attribute__((section(".kstrtab"))) = str; \ -+const struct module_symbol __ksymtab_##sym \ -+__attribute__((section("__ksymtab"))) = \ -+{ (unsigned long)&sym, __kstrtab_##sym } -+ -+#if defined(__MODVERSIONS__) || !defined(__CONFIG_MODVERSIONS__) -+#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) -+#else -+#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var))) -+#endif -+ -+#define EXPORT_SYMBOL_NOVERS(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) -+ -+#endif -+ -+EXPORT_SYMBOL(__errno_location); -+ -+EXPORT_SYMBOL(access); -+EXPORT_SYMBOL(open); -+EXPORT_SYMBOL(open64); -+EXPORT_SYMBOL(close); -+EXPORT_SYMBOL(read); -+EXPORT_SYMBOL(write); -+EXPORT_SYMBOL(dup2); -+EXPORT_SYMBOL(__xstat); -+EXPORT_SYMBOL(__lxstat); -+EXPORT_SYMBOL(__lxstat64); -+EXPORT_SYMBOL(lseek); -+EXPORT_SYMBOL(lseek64); -+EXPORT_SYMBOL(chown); -+EXPORT_SYMBOL(truncate); -+EXPORT_SYMBOL(utime); -+EXPORT_SYMBOL(chmod); -+EXPORT_SYMBOL(rename); -+EXPORT_SYMBOL(__xmknod); -+ -+EXPORT_SYMBOL(symlink); -+EXPORT_SYMBOL(link); -+EXPORT_SYMBOL(unlink); -+EXPORT_SYMBOL(readlink); -+ -+EXPORT_SYMBOL(mkdir); -+EXPORT_SYMBOL(rmdir); -+EXPORT_SYMBOL(opendir); -+EXPORT_SYMBOL(readdir); -+EXPORT_SYMBOL(closedir); -+EXPORT_SYMBOL(seekdir); -+EXPORT_SYMBOL(telldir); -+ -+EXPORT_SYMBOL(ioctl); -+ -+extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes, -+ __off64_t __offset); -+extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n, -+ __off64_t __offset); -+EXPORT_SYMBOL(pread64); -+EXPORT_SYMBOL(pwrite64); -+ -+EXPORT_SYMBOL(statfs); -+EXPORT_SYMBOL(statfs64); -+ -+EXPORT_SYMBOL(memcpy); -+EXPORT_SYMBOL(getuid); -+ -+EXPORT_SYMBOL(memset); -+EXPORT_SYMBOL(strstr); -+EXPORT_SYMBOL(strpbrk); -+EXPORT_SYMBOL(strlen); -+EXPORT_SYMBOL(printf); -+ -+EXPORT_SYMBOL(find_iomem); -diff -Naur -X ../exclude-files orig/arch/um/kernel/user_util.c um/arch/um/kernel/user_util.c ---- orig/arch/um/kernel/user_util.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/kernel/user_util.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,164 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <limits.h> -+#include <sys/mman.h> -+#include <sys/stat.h> -+#include <sys/ptrace.h> -+#include <sys/utsname.h> -+#include <sys/param.h> -+#include <sys/time.h> -+#include "asm/types.h" -+#include <ctype.h> -+#include <signal.h> -+#include <wait.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <sched.h> -+#include <termios.h> -+#include <string.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "mem_user.h" -+#include "init.h" -+#include "helper.h" -+#include "uml-config.h" -+ -+#define COMMAND_LINE_SIZE _POSIX_ARG_MAX -+ -+/* Changed in linux_main and setup_arch, which run before SMP is started */ -+char saved_command_line[COMMAND_LINE_SIZE] = { 0 }; -+char command_line[COMMAND_LINE_SIZE] = { 0 }; -+ -+void add_arg(char *cmd_line, char *arg) -+{ -+ if (strlen(cmd_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { -+ printf("add_arg: Too much command line!\n"); -+ exit(1); -+ } -+ if(strlen(cmd_line) > 0) strcat(cmd_line, " "); -+ strcat(cmd_line, arg); -+} -+ -+void stop(void) -+{ -+ while(1) sleep(1000000); -+} -+ -+void stack_protections(unsigned long address) -+{ -+ int prot = PROT_READ | PROT_WRITE | PROT_EXEC; -+ -+ if(mprotect((void *) address, page_size(), prot) < 0) -+ panic("protecting stack failed, errno = %d", errno); -+} -+ -+void task_protections(unsigned long address) -+{ -+ unsigned long guard = address + page_size(); -+ unsigned long stack = guard + page_size(); -+ int prot = 0, pages; -+#ifdef notdef -+ if(mprotect((void *) guard, page_size(), prot) < 0) -+ panic("protecting guard page failed, errno = %d", errno); -+#endif -+ pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER) - 2; -+ prot = PROT_READ | PROT_WRITE | PROT_EXEC; -+ if(mprotect((void *) stack, pages * page_size(), prot) < 0) -+ panic("protecting stack failed, errno = %d", errno); -+} -+ -+int wait_for_stop(int pid, int sig, int cont_type, void *relay) -+{ -+ sigset_t *relay_signals = relay; -+ int status, ret; -+ -+ while(1){ -+ ret = waitpid(pid, &status, WUNTRACED); -+ if((ret < 0) || -+ !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ -+ if(ret < 0){ -+ if(errno == EINTR) continue; -+ printk("wait failed, errno = %d\n", -+ errno); -+ } -+ else if(WIFEXITED(status)) -+ printk("process exited with status %d\n", -+ WEXITSTATUS(status)); -+ else if(WIFSIGNALED(status)) -+ printk("process exited with signal %d\n", -+ WTERMSIG(status)); -+ else if((WSTOPSIG(status) == SIGVTALRM) || -+ (WSTOPSIG(status) == SIGALRM) || -+ (WSTOPSIG(status) == SIGIO) || -+ (WSTOPSIG(status) == SIGPROF) || -+ (WSTOPSIG(status) == SIGCHLD) || -+ (WSTOPSIG(status) == SIGWINCH) || -+ (WSTOPSIG(status) == SIGINT)){ -+ ptrace(cont_type, pid, 0, WSTOPSIG(status)); -+ continue; -+ } -+ else if((relay_signals != NULL) && -+ sigismember(relay_signals, WSTOPSIG(status))){ -+ ptrace(cont_type, pid, 0, WSTOPSIG(status)); -+ continue; -+ } -+ else printk("process stopped with signal %d\n", -+ WSTOPSIG(status)); -+ panic("wait_for_stop failed to wait for %d to stop " -+ "with %d\n", pid, sig); -+ } -+ return(status); -+ } -+} -+ -+int raw(int fd, int complain) -+{ -+ struct termios tt; -+ int err; -+ -+ tcgetattr(fd, &tt); -+ cfmakeraw(&tt); -+ err = tcsetattr(fd, TCSANOW, &tt); -+ if((err < 0) && complain){ -+ printk("tcsetattr failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ return(0); -+} -+ -+void setup_machinename(char *machine_out) -+{ -+ struct utsname host; -+ -+ uname(&host); -+ strcpy(machine_out, host.machine); -+} -+ -+char host_info[(_UTSNAME_LENGTH + 1) * 4 + _UTSNAME_NODENAME_LENGTH + 1]; -+ -+void setup_hostinfo(void) -+{ -+ struct utsname host; -+ -+ uname(&host); -+ sprintf(host_info, "%s %s %s %s %s", host.sysname, host.nodename, -+ host.release, host.version, host.machine); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/link.ld.in um/arch/um/link.ld.in ---- orig/arch/um/link.ld.in 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/link.ld.in 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,94 @@ -+OUTPUT_FORMAT("ELF_FORMAT") -+OUTPUT_ARCH(ELF_ARCH) -+ENTRY(_start) -+ -+SECTIONS -+{ -+ . = START() + SIZEOF_HEADERS; -+ -+ __binary_start = .; -+ifdef(`MODE_TT', ` -+ .thread_private : { -+ __start_thread_private = .; -+ errno = .; -+ . += 4; -+ arch/um/kernel/tt/unmap_fin.o (.data) -+ __end_thread_private = .; -+ } -+ . = ALIGN(4096); -+ .remap : { arch/um/kernel/tt/unmap_fin.o (.text) } -+') -+ . = ALIGN(4096); /* Init code and data */ -+ _stext = .; -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ . = ALIGN(4096); -+ .text : -+ { -+ *(.text) -+ /* .gnu.warning sections are handled specially by elf32.em. */ -+ *(.gnu.warning) -+ *(.gnu.linkonce.t*) -+ } -+ .fini : { *(.fini) } =0x9090 -+ .rodata : { *(.rodata) *(.gnu.linkonce.r*) } -+ .rodata1 : { *(.rodata1) } -+ _etext = .; -+ PROVIDE (etext = .); -+ -+ . = ALIGN(4096); -+ PROVIDE (_sdata = .); -+ -+include(`arch/um/common.ld.in') -+ -+ .data : -+ { -+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ -+ *(.data.init_task) -+ *(.data) -+ *(.gnu.linkonce.d*) -+ CONSTRUCTORS -+ } -+ .data1 : { *(.data1) } -+ .ctors : -+ { -+ *(.ctors) -+ } -+ .dtors : -+ { -+ *(.dtors) -+ } -+ -+ .got : { *(.got.plt) *(.got) } -+ .dynamic : { *(.dynamic) } -+ /* We want the small data sections together, so single-instruction offsets -+ can access them all, and initialized data all before uninitialized, so -+ we can shorten the on-disk segment size. */ -+ .sdata : { *(.sdata) } -+ _edata = .; -+ PROVIDE (edata = .); -+ . = ALIGN(0x1000); -+ .sbss : -+ { -+ __bss_start = .; -+ PROVIDE(_bss_start = .); -+ *(.sbss) -+ *(.scommon) -+ } -+ .bss : -+ { -+ *(.dynbss) -+ *(.bss) -+ *(COMMON) -+ } -+ _end = . ; -+ PROVIDE (end = .); -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+} -diff -Naur -X ../exclude-files orig/arch/um/main.c um/arch/um/main.c ---- orig/arch/um/main.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/main.c 2004-01-29 00:48:42.000000000 -0500 -@@ -0,0 +1,199 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <string.h> -+#include <signal.h> -+#include <errno.h> -+#include <sys/resource.h> -+#include <sys/mman.h> -+#include <sys/user.h> -+#include <asm/page.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "mem_user.h" -+#include "signal_user.h" -+#include "user.h" -+#include "init.h" -+#include "mode.h" -+#include "choose-mode.h" -+#include "uml-config.h" -+ -+/* Set in set_stklim, which is called from main and __wrap_malloc. -+ * __wrap_malloc only calls it if main hasn't started. -+ */ -+unsigned long stacksizelim; -+ -+/* Set in main */ -+char *linux_prog; -+ -+#define PGD_BOUND (4 * 1024 * 1024) -+#define STACKSIZE (8 * 1024 * 1024) -+#define THREAD_NAME_LEN (256) -+ -+static void set_stklim(void) -+{ -+ struct rlimit lim; -+ -+ if(getrlimit(RLIMIT_STACK, &lim) < 0){ -+ perror("getrlimit"); -+ exit(1); -+ } -+ if((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)){ -+ lim.rlim_cur = STACKSIZE; -+ if(setrlimit(RLIMIT_STACK, &lim) < 0){ -+ perror("setrlimit"); -+ exit(1); -+ } -+ } -+ stacksizelim = (lim.rlim_cur + PGD_BOUND - 1) & ~(PGD_BOUND - 1); -+} -+ -+static __init void do_uml_initcalls(void) -+{ -+ initcall_t *call; -+ -+ call = &__uml_initcall_start; -+ while (call < &__uml_initcall_end){; -+ (*call)(); -+ call++; -+ } -+} -+ -+static void last_ditch_exit(int sig) -+{ -+ CHOOSE_MODE(kmalloc_ok = 0, (void) 0); -+ signal(SIGINT, SIG_DFL); -+ signal(SIGTERM, SIG_DFL); -+ signal(SIGHUP, SIG_DFL); -+ uml_cleanup(); -+ exit(1); -+} -+ -+extern int uml_exitcode; -+ -+int main(int argc, char **argv, char **envp) -+{ -+ char **new_argv; -+ sigset_t mask; -+ int ret, i; -+ -+ /* Enable all signals except SIGIO - in some environments, we can -+ * enter with some signals blocked -+ */ -+ -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGIO); -+ if(sigprocmask(SIG_SETMASK, &mask, NULL) < 0){ -+ perror("sigprocmask"); -+ exit(1); -+ } -+ -+#ifdef UML_CONFIG_MODE_TT -+ /* Allocate memory for thread command lines */ -+ if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ -+ -+ char padding[THREAD_NAME_LEN] = { -+ [ 0 ... THREAD_NAME_LEN - 2] = ' ', '\0' -+ }; -+ -+ new_argv = malloc((argc + 2) * sizeof(char*)); -+ if(!new_argv) { -+ perror("Allocating extended argv"); -+ exit(1); -+ } -+ -+ new_argv[0] = argv[0]; -+ new_argv[1] = padding; -+ -+ for(i = 2; i <= argc; i++) -+ new_argv[i] = argv[i - 1]; -+ new_argv[argc + 1] = NULL; -+ -+ execvp(new_argv[0], new_argv); -+ perror("execing with extended args"); -+ exit(1); -+ } -+#endif -+ -+ linux_prog = argv[0]; -+ -+ set_stklim(); -+ -+ new_argv = malloc((argc + 1) * sizeof(char *)); -+ if(new_argv == NULL){ -+ perror("Mallocing argv"); -+ exit(1); -+ } -+ for(i=0;i<argc;i++){ -+ new_argv[i] = strdup(argv[i]); -+ if(new_argv[i] == NULL){ -+ perror("Mallocing an arg"); -+ exit(1); -+ } -+ } -+ new_argv[argc] = NULL; -+ -+ set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ -+ do_uml_initcalls(); -+ ret = linux_main(argc, argv); -+ -+ /* Reboot */ -+ if(ret){ -+ printf("\n"); -+ -+ execvp(new_argv[0], new_argv); -+ perror("Failed to exec kernel"); -+ ret = 1; -+ } -+ printf("\n"); -+ return(uml_exitcode); -+} -+ -+#define CAN_KMALLOC() \ -+ (kmalloc_ok && CHOOSE_MODE((getpid() != tracing_pid), 1)) -+ -+extern void *__real_malloc(int); -+ -+void *__wrap_malloc(int size) -+{ -+ if(CAN_KMALLOC()) -+ return(um_kmalloc(size)); -+ else -+ return(__real_malloc(size)); -+} -+ -+void *__wrap_calloc(int n, int size) -+{ -+ void *ptr = __wrap_malloc(n * size); -+ -+ if(ptr == NULL) return(NULL); -+ memset(ptr, 0, n * size); -+ return(ptr); -+} -+ -+extern void __real_free(void *); -+ -+void __wrap_free(void *ptr) -+{ -+ if(CAN_KMALLOC()) kfree(ptr); -+ else __real_free(ptr); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/Makefile um/arch/um/Makefile ---- orig/arch/um/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/Makefile 2004-02-14 06:26:41.000000000 -0500 -@@ -0,0 +1,174 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+OS := $(shell uname -s) -+ -+ARCH_DIR = arch/um -+ -+core-y := kernel sys-$(SUBARCH) os-$(OS) -+drivers-y := fs drivers -+subdir-y := $(core-y) $(drivers-y) -+SUBDIRS += $(foreach dir,$(subdir-y),$(ARCH_DIR)/$(dir)) -+ -+CORE_FILES += $(foreach dir,$(core-y),$(ARCH_DIR)/$(dir)/built-in.o) -+DRIVERS += $(foreach dir,$(drivers-y),$(ARCH_DIR)/$(dir)/built-in.o) -+ -+include $(ARCH_DIR)/Makefile-$(SUBARCH) -+include $(ARCH_DIR)/Makefile-os-$(OS) -+ -+MAKEFILE-$(CONFIG_MODE_TT) += Makefile-tt -+MAKEFILE-$(CONFIG_MODE_SKAS) += Makefile-skas -+ -+ifneq ($(MAKEFILE-y),) -+ include $(addprefix $(ARCH_DIR)/,$(MAKEFILE-y)) -+endif -+ -+EXTRAVERSION := $(EXTRAVERSION)-1um -+ -+include/linux/version.h: arch/$(ARCH)/Makefile -+ -+# Recalculate MODLIB to reflect the EXTRAVERSION changes (via KERNELRELEASE) -+# The way the toplevel Makefile is written EXTRAVERSION is not supposed -+# to be changed outside the toplevel Makefile, but recalculating MODLIB is -+# a sufficient workaround until we no longer need architecture dependent -+# EXTRAVERSION... -+MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) -+ -+ifeq ($(CONFIG_DEBUGSYM),y) -+CFLAGS := $(subst -fomit-frame-pointer,,$(CFLAGS)) -+endif -+ -+CFLAGS-$(CONFIG_DEBUGSYM) += -g -+ -+ARCH_INCLUDE = -I$(TOPDIR)/$(ARCH_DIR)/include -+ -+# -Derrno=kernel_errno - This turns all kernel references to errno into -+# kernel_errno to separate them from the libc errno. This allows -fno-common -+# in CFLAGS. Otherwise, it would cause ld to complain about the two different -+# errnos. -+ -+CFLAGS += $(ARCH_CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ -+ -D_LARGEFILE64_SOURCE $(ARCH_INCLUDE) -Derrno=kernel_errno \ -+ $(MODE_INCLUDE) -+ -+LINKFLAGS += -r -+ -+LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc -+ -+# These are needed for clean and mrproper, since in that case .config is not -+# included; the values here are meaningless -+ -+CONFIG_NEST_LEVEL ?= 0 -+CONFIG_KERNEL_HALF_GIGS ?= 0 -+ -+SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000) -+ -+# These aren't in Makefile-tt because they are needed in the !CONFIG_MODE_TT + -+# CONFIG_MODE_SKAS + CONFIG_STATIC_LINK case. -+ -+LINK_TT = -static -+LD_SCRIPT_TT := link.ld -+ -+ifeq ($(CONFIG_STATIC_LINK),y) -+ LINK-y += $(LINK_TT) -+ LD_SCRIPT-y := $(LD_SCRIPT_TT) -+else -+ifeq ($(CONFIG_MODE_TT),y) -+ LINK-y += $(LINK_TT) -+ LD_SCRIPT-y := $(LD_SCRIPT_TT) -+else -+ifeq ($(CONFIG_MODE_SKAS),y) -+ LINK-y += $(LINK_SKAS) -+ LD_SCRIPT-y := $(LD_SCRIPT_SKAS) -+endif -+endif -+endif -+ -+LD_SCRIPT-y := $(ARCH_DIR)/$(LD_SCRIPT-y) -+M4_MODE_TT := $(shell [ "$(CONFIG_MODE_TT)" = "y" ] && echo -DMODE_TT) -+ -+$(LD_SCRIPT-y): $(LD_SCRIPT-y).in -+ pages=$$(( 1 << $(CONFIG_KERNEL_STACK_ORDER) )) ; \ -+ m4 -DSTART=$$(($(TOP_ADDR) - $(SIZE))) -DELF_ARCH=$(ELF_ARCH) \ -+ -DELF_FORMAT=$(ELF_FORMAT) $(M4_MODE_TT) \ -+ -DKERNEL_STACK_SIZE=$$(( 4096 * $$pages )) $< > $@ -+ -+SYMLINK_HEADERS = archparam.h system.h sigcontext.h processor.h ptrace.h \ -+ arch-signal.h -+SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header)) -+ -+ARCH_SYMLINKS = include/asm-um/arch arch/um/include/sysdep arch/um/os \ -+ $(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h -+ -+ifeq ($(CONFIG_MODE_SKAS), y) -+$(SYS_HEADERS) : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+endif -+ -+GEN_HEADERS += $(ARCH_DIR)/include/task.h $(ARCH_DIR)/include/kern_constants.h -+ -+setup: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) -+ -+linux: setup $(ARCH_DIR)/main.o vmlinux $(LD_SCRIPT-y) -+ mv vmlinux vmlinux.o -+ $(CC) -Wl,-T,$(LD_SCRIPT-y) $(LINK-y) $(LINK_WRAPS) \ -+ -o linux $(ARCH_DIR)/main.o vmlinux.o -L/usr/lib -lutil -+ -+USER_CFLAGS := $(patsubst -I%,,$(CFLAGS)) -+USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS)) -+USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \ -+ $(MODE_INCLUDE) -+ -+# To get a definition of F_SETSIG -+USER_CFLAGS += -D_GNU_SOURCE -+ -+CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/link.ld $(ARCH_DIR)/dyn_link.ld \ -+ $(GEN_HEADERS) $(ARCH_DIR)/include/uml-config.h -+ -+$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c -+ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+archmrproper: -+ rm -f $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) include/asm \ -+ $(LD_SCRIPT) $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) -+ -+archclean: sysclean -+ find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -+ -o -name '*.gcov' \) -type f -print | xargs rm -f -+ cd $(ARCH_DIR) ; \ -+ for dir in $(subdir-y) util ; do $(MAKE) -C $$dir clean; done -+ -+archdep: -+ -+$(SYMLINK_HEADERS): -+ cd $(TOPDIR)/$(dir $@) ; \ -+ ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@) -+ -+include/asm-um/arch: -+ cd $(TOPDIR)/include/asm-um && ln -sf ../asm-$(SUBARCH) arch -+ -+arch/um/include/sysdep: -+ cd $(TOPDIR)/arch/um/include && ln -sf sysdep-$(SUBARCH) sysdep -+ -+arch/um/os: -+ cd $(ARCH_DIR) && ln -sf os-$(OS) os -+ -+$(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task -+ $< > $@ -+ -+$(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants -+ $< > $@ -+ -+$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h -+ sed 's/ CONFIG/ UML_CONFIG/' $(TOPDIR)/include/linux/autoconf.h > $@ -+ -+$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/util/mk_task_user.c \ -+ $(ARCH_DIR)/util/mk_task_kern.c $(SYS_HEADERS) -+ $(MAKE) $(MFLAGS) -C $(ARCH_DIR)/util mk_task -+ -+$(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util/mk_constants_user.c \ -+ $(ARCH_DIR)/util/mk_constants_kern.c -+ $(MAKE) $(MFLAGS) -C $(ARCH_DIR)/util mk_constants -+ -+export SUBARCH USER_CFLAGS OS -diff -Naur -X ../exclude-files orig/arch/um/Makefile-i386 um/arch/um/Makefile-i386 ---- orig/arch/um/Makefile-i386 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/Makefile-i386 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,35 @@ -+ifeq ($(CONFIG_HOST_2G_2G), y) -+TOP_ADDR = 0x80000000 -+else -+TOP_ADDR = 0xc0000000 -+endif -+ -+ARCH_CFLAGS = -U__$(SUBARCH)__ -U$(SUBARCH) -DUM_FASTCALL -+ELF_ARCH = $(SUBARCH) -+ELF_FORMAT = elf32-$(SUBARCH) -+ -+I386_H = $(ARCH_DIR)/include/sysdep-i386 -+SYS = $(ARCH_DIR)/sys-i386 -+UTIL = $(SYS)/util -+SUBDIRS += $(UTIL) -+ -+SYS_HEADERS = $(I386_H)/sc.h $(I386_H)/thread.h -+ -+$(I386_H)/sc.h : $(UTIL)/mk_sc -+ $(UTIL)/mk_sc > $@ -+ -+$(I386_H)/thread.h : $(UTIL)/mk_thread -+ $(UTIL)/mk_thread > $@ -+ -+$(UTIL)/mk_sc : $(UTIL)/mk_sc.c -+ $(MAKE) -C $(UTIL) mk_sc -+ -+$(UTIL)/mk_thread : $(UTIL)/mk_thread_user.c $(UTIL)/mk_thread_kern.c \ -+ $(I386_H)/sc.h -+ $(MAKE) -C $(UTIL) mk_thread -+ -+sysclean : -+ rm -f $(SYS_HEADERS) -+ $(MAKE) -C $(UTIL) clean -+ $(MAKE) -C $(SYS) clean -+ -diff -Naur -X ../exclude-files orig/arch/um/Makefile-ia64 um/arch/um/Makefile-ia64 ---- orig/arch/um/Makefile-ia64 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/Makefile-ia64 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1 @@ -+START_ADDR = 0x1000000000000000 -diff -Naur -X ../exclude-files orig/arch/um/Makefile-os-Linux um/arch/um/Makefile-os-Linux ---- orig/arch/um/Makefile-os-Linux 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/Makefile-os-Linux 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,7 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+SUBDIRS += $(ARCH_DIR)/os-$(OS)/drivers -+DRIVERS += $(ARCH_DIR)/os-$(OS)/drivers/drivers.o -diff -Naur -X ../exclude-files orig/arch/um/Makefile-ppc um/arch/um/Makefile-ppc ---- orig/arch/um/Makefile-ppc 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/Makefile-ppc 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,9 @@ -+ifeq ($(CONFIG_HOST_2G_2G), y) -+START_ADDR = 0x80000000 -+else -+START_ADDR = 0xc0000000 -+endif -+ARCH_CFLAGS = -U__powerpc__ -D__UM_PPC__ -+ -+# The arch is ppc, but the elf32 name is powerpc -+ELF_SUBARCH = powerpc -diff -Naur -X ../exclude-files orig/arch/um/Makefile-skas um/arch/um/Makefile-skas ---- orig/arch/um/Makefile-skas 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/Makefile-skas 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,20 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+PROFILE += -pg -+ -+CFLAGS-$(CONFIG_GCOV) += -fprofile-arcs -ftest-coverage -+CFLAGS-$(CONFIG_GPROF) += $(PROFILE) -+LINK-$(CONFIG_GPROF) += $(PROFILE) -+ -+MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/kernel/skas/include -+ -+LINK_SKAS = -Wl,-rpath,/lib -+LD_SCRIPT_SKAS = dyn_link.ld -+ -+GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+ -+$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h : -+ $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h -diff -Naur -X ../exclude-files orig/arch/um/Makefile-tt um/arch/um/Makefile-tt ---- orig/arch/um/Makefile-tt 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/Makefile-tt 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,7 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/kernel/tt/include -+ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/etap.h um/arch/um/os-Linux/drivers/etap.h ---- orig/arch/um/os-Linux/drivers/etap.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/drivers/etap.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct ethertap_data { -+ char *dev_name; -+ char *gate_addr; -+ int data_fd; -+ int control_fd; -+ void *dev; -+}; -+ -+extern struct net_user_info ethertap_user_info; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/ethertap_kern.c um/arch/um/os-Linux/drivers/ethertap_kern.c ---- orig/arch/um/os-Linux/drivers/ethertap_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/drivers/ethertap_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,122 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/init.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "etap.h" -+ -+struct ethertap_init { -+ char *dev_name; -+ char *gate_addr; -+}; -+ -+static void etap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct ethertap_data *epri; -+ struct ethertap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ epri = (struct ethertap_data *) pri->user; -+ *epri = ((struct ethertap_data) -+ { .dev_name = init->dev_name, -+ .gate_addr = init->gate_addr, -+ .data_fd = -1, -+ .control_fd = -1, -+ .dev = dev }); -+ -+ printk("ethertap backend - %s", epri->dev_name); -+ if(epri->gate_addr != NULL) -+ printk(", IP = %s", epri->gate_addr); -+ printk("\n"); -+} -+ -+static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ int len; -+ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP); -+ if(*skb == NULL) return(-ENOMEM); -+ len = net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP); -+ if(len <= 0) return(len); -+ skb_pull(*skb, 2); -+ len -= 2; -+ return(len); -+} -+ -+static int etap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ if(skb_headroom(*skb) < 2){ -+ struct sk_buff *skb2; -+ -+ skb2 = skb_realloc_headroom(*skb, 2); -+ dev_kfree_skb(*skb); -+ if (skb2 == NULL) return(-ENOMEM); -+ *skb = skb2; -+ } -+ skb_push(*skb, 2); -+ return(net_send(fd, (*skb)->data, (*skb)->len)); -+} -+ -+struct net_kern_info ethertap_kern_info = { -+ .init = etap_init, -+ .protocol = eth_protocol, -+ .read = etap_read, -+ .write = etap_write, -+}; -+ -+int ethertap_setup(char *str, char **mac_out, void *data) -+{ -+ struct ethertap_init *init = data; -+ -+ *init = ((struct ethertap_init) -+ { .dev_name = NULL, -+ .gate_addr = NULL }); -+ if(tap_setup_common(str, "ethertap", &init->dev_name, mac_out, -+ &init->gate_addr)) -+ return(0); -+ if(init->dev_name == NULL){ -+ printk("ethertap_setup : Missing tap device name\n"); -+ return(0); -+ } -+ -+ return(1); -+} -+ -+static struct transport ethertap_transport = { -+ .list = LIST_HEAD_INIT(ethertap_transport.list), -+ .name = "ethertap", -+ .setup = ethertap_setup, -+ .user = ðertap_user_info, -+ .kern = ðertap_kern_info, -+ .private_size = sizeof(struct ethertap_data), -+}; -+ -+static int register_ethertap(void) -+{ -+ register_transport(ðertap_transport); -+ return(1); -+} -+ -+__initcall(register_ethertap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/ethertap_user.c um/arch/um/os-Linux/drivers/ethertap_user.c ---- orig/arch/um/os-Linux/drivers/ethertap_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/drivers/ethertap_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,238 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stddef.h> -+#include <stdlib.h> -+#include <sys/errno.h> -+#include <sys/socket.h> -+#include <sys/wait.h> -+#include <sys/un.h> -+#include <net/if.h> -+#include "user.h" -+#include "kern_util.h" -+#include "net_user.h" -+#include "etap.h" -+#include "helper.h" -+#include "os.h" -+ -+#define MAX_PACKET ETH_MAX_PACKET -+ -+void etap_user_init(void *data, void *dev) -+{ -+ struct ethertap_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+struct addr_change { -+ enum { ADD_ADDR, DEL_ADDR } what; -+ unsigned char addr[4]; -+ unsigned char netmask[4]; -+}; -+ -+static void etap_change(int op, unsigned char *addr, unsigned char *netmask, -+ int fd) -+{ -+ struct addr_change change; -+ void *output; -+ int n; -+ -+ change.what = op; -+ memcpy(change.addr, addr, sizeof(change.addr)); -+ memcpy(change.netmask, netmask, sizeof(change.netmask)); -+ n = os_write_file(fd, &change, sizeof(change)); -+ if(n != sizeof(change)) -+ printk("etap_change - request failed, err = %d\n", -n); -+ output = um_kmalloc(page_size()); -+ if(output == NULL) -+ printk("etap_change : Failed to allocate output buffer\n"); -+ read_output(fd, output, page_size()); -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+} -+ -+static void etap_open_addr(unsigned char *addr, unsigned char *netmask, -+ void *arg) -+{ -+ etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); -+} -+ -+static void etap_close_addr(unsigned char *addr, unsigned char *netmask, -+ void *arg) -+{ -+ etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); -+} -+ -+struct etap_pre_exec_data { -+ int control_remote; -+ int control_me; -+ int data_me; -+}; -+ -+static void etap_pre_exec(void *arg) -+{ -+ struct etap_pre_exec_data *data = arg; -+ -+ dup2(data->control_remote, 1); -+ os_close_file(data->data_me); -+ os_close_file(data->control_me); -+} -+ -+static int etap_tramp(char *dev, char *gate, int control_me, -+ int control_remote, int data_me, int data_remote) -+{ -+ struct etap_pre_exec_data pe_data; -+ int pid, status, err, n; -+ char version_buf[sizeof("nnnnn\0")]; -+ char data_fd_buf[sizeof("nnnnnn\0")]; -+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; -+ char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, -+ data_fd_buf, gate_buf, NULL }; -+ char *nosetup_args[] = { "uml_net", version_buf, "ethertap", -+ dev, data_fd_buf, NULL }; -+ char **args, c; -+ -+ sprintf(data_fd_buf, "%d", data_remote); -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ if(gate != NULL){ -+ strcpy(gate_buf, gate); -+ args = setup_args; -+ } -+ else args = nosetup_args; -+ -+ err = 0; -+ pe_data.control_remote = control_remote; -+ pe_data.control_me = control_me; -+ pe_data.data_me = data_me; -+ pid = run_helper(etap_pre_exec, &pe_data, args, NULL); -+ -+ if(pid < 0) err = pid; -+ os_close_file(data_remote); -+ os_close_file(control_remote); -+ n = os_read_file(control_me, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("etap_tramp : read of status failed, err = %d\n", -n); -+ return(-EINVAL); -+ } -+ if(c != 1){ -+ printk("etap_tramp : uml_net failed\n"); -+ err = -EINVAL; -+ if(waitpid(pid, &status, 0) < 0) -+ err = -errno; -+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)) -+ printk("uml_net didn't exit with status 1\n"); -+ } -+ return(err); -+} -+ -+static int etap_open(void *data) -+{ -+ struct ethertap_data *pri = data; -+ char *output; -+ int data_fds[2], control_fds[2], err, output_len; -+ -+ err = tap_open_common(pri->dev, pri->gate_addr); -+ if(err) return(err); -+ -+ err = os_pipe(data_fds, 0, 0); -+ if(err < 0){ -+ printk("data os_pipe failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ err = os_pipe(control_fds, 1, 0); -+ if(err < 0){ -+ printk("control os_pipe failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], -+ control_fds[1], data_fds[0], data_fds[1]); -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ read_output(control_fds[0], output, output_len); -+ -+ if(output == NULL) -+ printk("etap_open : failed to allocate output buffer\n"); -+ else { -+ printk("%s", output); -+ kfree(output); -+ } -+ -+ if(err < 0){ -+ printk("etap_tramp failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ pri->data_fd = data_fds[0]; -+ pri->control_fd = control_fds[0]; -+ iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); -+ return(data_fds[0]); -+} -+ -+static void etap_close(int fd, void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); -+ os_close_file(fd); -+ os_shutdown_socket(pri->data_fd, 1, 1); -+ os_close_file(pri->data_fd); -+ pri->data_fd = -1; -+ os_close_file(pri->control_fd); -+ pri->control_fd = -1; -+} -+ -+static int etap_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+static void etap_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ tap_check_ips(pri->gate_addr, addr); -+ if(pri->control_fd == -1) return; -+ etap_open_addr(addr, netmask, &pri->control_fd); -+} -+ -+static void etap_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ if(pri->control_fd == -1) return; -+ etap_close_addr(addr, netmask, &pri->control_fd); -+} -+ -+struct net_user_info ethertap_user_info = { -+ .init = etap_user_init, -+ .open = etap_open, -+ .close = etap_close, -+ .remove = NULL, -+ .set_mtu = etap_set_mtu, -+ .add_address = etap_add_addr, -+ .delete_address = etap_del_addr, -+ .max_packet = MAX_PACKET - ETH_HEADER_ETHERTAP -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/Makefile um/arch/um/os-Linux/drivers/Makefile ---- orig/arch/um/os-Linux/drivers/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/drivers/Makefile 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,31 @@ -+# -+# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := drivers.o -+ -+list-multi := tuntap.o ethertap.o -+ -+ethertap-objs := ethertap_kern.o ethertap_user.o -+tuntap-objs := tuntap_kern.o tuntap_user.o -+ -+obj-y = -+obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o -+obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o -+ -+USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y)),$($(f)-objs)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y) $(USER_SINGLE_OBJS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+ethertap.o : $(ethertap-objs) -+ -+tuntap.o : $(tuntap-objs) -+ -+$(list-multi) : # This doesn't work, but should : '%.o : $(%-objs)' -+ $(LD) $(LD_RFLAG) -r -o $@ $($(patsubst %.o,%,$@)-objs) -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/tuntap.h um/arch/um/os-Linux/drivers/tuntap.h ---- orig/arch/um/os-Linux/drivers/tuntap.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/drivers/tuntap.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_TUNTAP_H -+#define __UM_TUNTAP_H -+ -+#include "net_user.h" -+ -+struct tuntap_data { -+ char *dev_name; -+ int fixed_config; -+ char *gate_addr; -+ int fd; -+ void *dev; -+}; -+ -+extern struct net_user_info tuntap_user_info; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/tuntap_kern.c um/arch/um/os-Linux/drivers/tuntap_kern.c ---- orig/arch/um/os-Linux/drivers/tuntap_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/drivers/tuntap_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,105 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/skbuff.h" -+#include "linux/init.h" -+#include "asm/errno.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "tuntap.h" -+ -+struct tuntap_init { -+ char *dev_name; -+ char *gate_addr; -+}; -+ -+static void tuntap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct tuntap_data *tpri; -+ struct tuntap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ tpri = (struct tuntap_data *) pri->user; -+ *tpri = ((struct tuntap_data) -+ { .dev_name = init->dev_name, -+ .fixed_config = (init->dev_name != NULL), -+ .gate_addr = init->gate_addr, -+ .fd = -1, -+ .dev = dev }); -+ printk("TUN/TAP backend - "); -+ if(tpri->gate_addr != NULL) -+ printk("IP = %s", tpri->gate_addr); -+ printk("\n"); -+} -+ -+static int tuntap_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_read(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int tuntap_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(net_write(fd, (*skb)->data, (*skb)->len)); -+} -+ -+struct net_kern_info tuntap_kern_info = { -+ .init = tuntap_init, -+ .protocol = eth_protocol, -+ .read = tuntap_read, -+ .write = tuntap_write, -+}; -+ -+int tuntap_setup(char *str, char **mac_out, void *data) -+{ -+ struct tuntap_init *init = data; -+ -+ *init = ((struct tuntap_init) -+ { .dev_name = NULL, -+ .gate_addr = NULL }); -+ if(tap_setup_common(str, "tuntap", &init->dev_name, mac_out, -+ &init->gate_addr)) -+ return(0); -+ -+ return(1); -+} -+ -+static struct transport tuntap_transport = { -+ .list = LIST_HEAD_INIT(tuntap_transport.list), -+ .name = "tuntap", -+ .setup = tuntap_setup, -+ .user = &tuntap_user_info, -+ .kern = &tuntap_kern_info, -+ .private_size = sizeof(struct tuntap_data), -+ .setup_size = sizeof(struct tuntap_init), -+}; -+ -+static int register_tuntap(void) -+{ -+ register_transport(&tuntap_transport); -+ return(1); -+} -+ -+__initcall(register_tuntap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/tuntap_user.c um/arch/um/os-Linux/drivers/tuntap_user.c ---- orig/arch/um/os-Linux/drivers/tuntap_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/drivers/tuntap_user.c 2003-11-12 00:02:30.000000000 -0500 -@@ -0,0 +1,224 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stddef.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <sys/wait.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/uio.h> -+#include <sys/ioctl.h> -+#include <net/if.h> -+#include <linux/if_tun.h> -+#include "net_user.h" -+#include "tuntap.h" -+#include "kern_util.h" -+#include "user.h" -+#include "helper.h" -+#include "os.h" -+ -+#define MAX_PACKET ETH_MAX_PACKET -+ -+void tuntap_user_init(void *data, void *dev) -+{ -+ struct tuntap_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ tap_check_ips(pri->gate_addr, addr); -+ if((pri->fd == -1) || pri->fixed_config) return; -+ open_addr(addr, netmask, pri->dev_name); -+} -+ -+static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ if((pri->fd == -1) || pri->fixed_config) return; -+ close_addr(addr, netmask, pri->dev_name); -+} -+ -+struct tuntap_pre_exec_data { -+ int stdout; -+ int close_me; -+}; -+ -+static void tuntap_pre_exec(void *arg) -+{ -+ struct tuntap_pre_exec_data *data = arg; -+ -+ dup2(data->stdout, 1); -+ os_close_file(data->close_me); -+} -+ -+static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, -+ char *buffer, int buffer_len, int *used_out) -+{ -+ struct tuntap_pre_exec_data data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, -+ NULL }; -+ char buf[CMSG_SPACE(sizeof(*fd_out))]; -+ struct msghdr msg; -+ struct cmsghdr *cmsg; -+ struct iovec iov; -+ int pid, n; -+ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ -+ data.stdout = remote; -+ data.close_me = me; -+ -+ pid = run_helper(tuntap_pre_exec, &data, argv, NULL); -+ -+ if(pid < 0) return(-pid); -+ -+ os_close_file(remote); -+ -+ msg.msg_name = NULL; -+ msg.msg_namelen = 0; -+ if(buffer != NULL){ -+ iov = ((struct iovec) { buffer, buffer_len }); -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ } -+ else { -+ msg.msg_iov = NULL; -+ msg.msg_iovlen = 0; -+ } -+ msg.msg_control = buf; -+ msg.msg_controllen = sizeof(buf); -+ msg.msg_flags = 0; -+ n = recvmsg(me, &msg, 0); -+ *used_out = n; -+ if(n < 0){ -+ printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ waitpid(pid, NULL, 0); -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ if(cmsg == NULL){ -+ printk("tuntap_open_tramp : didn't receive a message\n"); -+ return(-EINVAL); -+ } -+ if((cmsg->cmsg_level != SOL_SOCKET) || -+ (cmsg->cmsg_type != SCM_RIGHTS)){ -+ printk("tuntap_open_tramp : didn't receive a descriptor\n"); -+ return(-EINVAL); -+ } -+ *fd_out = ((int *) CMSG_DATA(cmsg))[0]; -+ return(0); -+} -+ -+static int tuntap_open(void *data) -+{ -+ struct ifreq ifr; -+ struct tuntap_data *pri = data; -+ char *output, *buffer; -+ int err, fds[2], len, used; -+ -+ err = tap_open_common(pri->dev, pri->gate_addr); -+ if(err < 0) -+ return(err); -+ -+ if(pri->fixed_config){ -+ pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0); -+ if(pri->fd < 0){ -+ printk("Failed to open /dev/net/tun, err = %d\n", -+ -pri->fd); -+ return(pri->fd); -+ } -+ memset(&ifr, 0, sizeof(ifr)); -+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI; -+ strncpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name) - 1); -+ if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ -+ printk("TUNSETIFF failed, errno = %d\n", errno); -+ os_close_file(pri->fd); -+ return(-errno); -+ } -+ } -+ else { -+ err = os_pipe(fds, 0, 0); -+ if(err < 0){ -+ printk("tuntap_open : os_pipe failed - err = %d\n", -+ -err); -+ return(err); -+ } -+ -+ buffer = get_output_buffer(&len); -+ if(buffer != NULL) len--; -+ used = 0; -+ -+ err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], -+ fds[1], buffer, len, &used); -+ -+ output = buffer; -+ if(err < 0) { -+ printk("%s", output); -+ free_output_buffer(buffer); -+ printk("tuntap_open_tramp failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ pri->dev_name = uml_strdup(buffer); -+ output += IFNAMSIZ; -+ printk("%s", output); -+ free_output_buffer(buffer); -+ -+ os_close_file(fds[0]); -+ iter_addresses(pri->dev, open_addr, pri->dev_name); -+ } -+ -+ return(pri->fd); -+} -+ -+static void tuntap_close(int fd, void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ if(!pri->fixed_config) -+ iter_addresses(pri->dev, close_addr, pri->dev_name); -+ os_close_file(fd); -+ pri->fd = -1; -+} -+ -+static int tuntap_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info tuntap_user_info = { -+ .init = tuntap_user_init, -+ .open = tuntap_open, -+ .close = tuntap_close, -+ .remove = NULL, -+ .set_mtu = tuntap_set_mtu, -+ .add_address = tuntap_add_addr, -+ .delete_address = tuntap_del_addr, -+ .max_packet = MAX_PACKET -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/file.c um/arch/um/os-Linux/file.c ---- orig/arch/um/os-Linux/file.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/file.c 2004-01-10 06:52:24.000000000 -0500 -@@ -0,0 +1,668 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <errno.h> -+#include <fcntl.h> -+#include <signal.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/ioctl.h> -+#include <sys/mount.h> -+#include <sys/uio.h> -+#include "os.h" -+#include "user.h" -+#include "kern_util.h" -+ -+static void copy_stat(struct uml_stat *dst, struct stat64 *src) -+{ -+ *dst = ((struct uml_stat) { -+ .ust_dev = src->st_dev, /* device */ -+ .ust_ino = src->st_ino, /* inode */ -+ .ust_mode = src->st_mode, /* protection */ -+ .ust_nlink = src->st_nlink, /* number of hard links */ -+ .ust_uid = src->st_uid, /* user ID of owner */ -+ .ust_gid = src->st_gid, /* group ID of owner */ -+ .ust_size = src->st_size, /* total size, in bytes */ -+ .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ -+ .ust_blocks = src->st_blocks, /* number of blocks allocated */ -+ .ust_atime = src->st_atime, /* time of last access */ -+ .ust_mtime = src->st_mtime, /* time of last modification */ -+ .ust_ctime = src->st_ctime, /* time of last change */ -+ }); -+} -+ -+int os_stat_fd(const int fd, struct uml_stat *ubuf) -+{ -+ struct stat64 sbuf; -+ int err; -+ -+ do { -+ err = fstat64(fd, &sbuf); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ if(ubuf != NULL) -+ copy_stat(ubuf, &sbuf); -+ return(err); -+} -+ -+int os_stat_file(const char *file_name, struct uml_stat *ubuf) -+{ -+ struct stat64 sbuf; -+ int err; -+ -+ do { -+ err = stat64(file_name, &sbuf); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ if(ubuf != NULL) -+ copy_stat(ubuf, &sbuf); -+ return(err); -+} -+ -+int os_access(const char* file, int mode) -+{ -+ int amode, err; -+ -+ amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) | -+ (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ; -+ -+ err = access(file, amode); -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+void os_print_error(int error, const char* str) -+{ -+ errno = error < 0 ? -error : error; -+ -+ perror(str); -+} -+ -+/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ -+int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) -+{ -+ int err; -+ -+ err = ioctl(fd, cmd, arg); -+ if(err < 0) -+ return(-errno); -+ -+ return(err); -+} -+ -+int os_window_size(int fd, int *rows, int *cols) -+{ -+ struct winsize size; -+ -+ if(ioctl(fd, TIOCGWINSZ, &size) < 0) -+ return(-errno); -+ -+ *rows = size.ws_row; -+ *cols = size.ws_col; -+ -+ return(0); -+} -+ -+int os_new_tty_pgrp(int fd, int pid) -+{ -+ if(ioctl(fd, TIOCSCTTY, 0) < 0){ -+ printk("TIOCSCTTY failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(tcsetpgrp(fd, pid) < 0){ -+ printk("tcsetpgrp failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+/* FIXME: ensure namebuf in os_get_if_name is big enough */ -+int os_get_ifname(int fd, char* namebuf) -+{ -+ if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_set_slip(int fd) -+{ -+ int disc, sencap; -+ -+ disc = N_SLIP; -+ if(ioctl(fd, TIOCSETD, &disc) < 0){ -+ printk("Failed to set slip line discipline - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ sencap = 0; -+ if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0){ -+ printk("Failed to set slip encapsulation - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_set_owner(int fd, int pid) -+{ -+ if(fcntl(fd, F_SETOWN, pid) < 0){ -+ int save_errno = errno; -+ -+ if(fcntl(fd, F_GETOWN, 0) != pid) -+ return(-save_errno); -+ } -+ -+ return(0); -+} -+ -+/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */ -+int os_sigio_async(int master, int slave) -+{ -+ int flags; -+ -+ flags = fcntl(master, F_GETFL); -+ if(flags < 0) { -+ printk("fcntl F_GETFL failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || -+ (fcntl(master, F_SETOWN, os_getpid()) < 0)){ -+ printk("fcntl F_SETFL or F_SETOWN failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)){ -+ printk("fcntl F_SETFL failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_mode_fd(int fd, int mode) -+{ -+ int err; -+ -+ do { -+ err = fchmod(fd, mode); -+ } while((err < 0) && (errno==EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_file_type(char *file) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_stat_file(file, &buf); -+ if(err < 0) -+ return(err); -+ -+ if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR); -+ else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK); -+ else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV); -+ else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV); -+ else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO); -+ else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK); -+ else return(OS_TYPE_FILE); -+} -+ -+int os_file_mode(char *file, struct openflags *mode_out) -+{ -+ int err; -+ -+ *mode_out = OPENFLAGS(); -+ -+ err = os_access(file, OS_ACC_W_OK); -+ if((err < 0) && (err != -EACCES)) -+ return(err); -+ -+ *mode_out = of_write(*mode_out); -+ -+ err = os_access(file, OS_ACC_R_OK); -+ if((err < 0) && (err != -EACCES)) -+ return(err); -+ -+ *mode_out = of_read(*mode_out); -+ -+ return(0); -+} -+ -+int os_open_file(char *file, struct openflags flags, int mode) -+{ -+ int fd, f = 0; -+ -+ if(flags.r && flags.w) f = O_RDWR; -+ else if(flags.r) f = O_RDONLY; -+ else if(flags.w) f = O_WRONLY; -+ else f = 0; -+ -+ if(flags.s) f |= O_SYNC; -+ if(flags.c) f |= O_CREAT; -+ if(flags.t) f |= O_TRUNC; -+ if(flags.e) f |= O_EXCL; -+ -+ fd = open64(file, f, mode); -+ if(fd < 0) -+ return(-errno); -+ -+ if(flags.cl && fcntl(fd, F_SETFD, 1)){ -+ os_close_file(fd); -+ return(-errno); -+ } -+ -+ return(fd); -+} -+ -+int os_connect_socket(char *name) -+{ -+ struct sockaddr_un sock; -+ int fd, err; -+ -+ sock.sun_family = AF_UNIX; -+ snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); -+ -+ fd = socket(AF_UNIX, SOCK_STREAM, 0); -+ if(fd < 0) -+ return(fd); -+ -+ err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); -+ if(err) -+ return(-errno); -+ -+ return(fd); -+} -+ -+void os_close_file(int fd) -+{ -+ close(fd); -+} -+ -+int os_seek_file(int fd, __u64 offset) -+{ -+ __u64 actual; -+ -+ actual = lseek64(fd, offset, SEEK_SET); -+ if(actual != offset) return(-errno); -+ return(0); -+} -+ -+static int fault_buffer(void *start, int len, -+ int (*copy_proc)(void *addr, void *buf, int len)) -+{ -+ int page = getpagesize(), i; -+ char c; -+ -+ for(i = 0; i < len; i += page){ -+ if((*copy_proc)(start + i, &c, sizeof(c))) -+ return(-EFAULT); -+ } -+ if((len % page) != 0){ -+ if((*copy_proc)(start + len - 1, &c, sizeof(c))) -+ return(-EFAULT); -+ } -+ return(0); -+} -+ -+static int file_io(int fd, void *buf, int len, -+ int (*io_proc)(int fd, void *buf, int len), -+ int (*copy_user_proc)(void *addr, void *buf, int len)) -+{ -+ int n, err; -+ -+ do { -+ n = (*io_proc)(fd, buf, len); -+ if((n < 0) && (errno == EFAULT)){ -+ err = fault_buffer(buf, len, copy_user_proc); -+ if(err) -+ return(err); -+ n = (*io_proc)(fd, buf, len); -+ } -+ } while((n < 0) && (errno == EINTR)); -+ -+ if(n < 0) -+ return(-errno); -+ return(n); -+} -+ -+int os_read_file(int fd, void *buf, int len) -+{ -+ return(file_io(fd, buf, len, (int (*)(int, void *, int)) read, -+ copy_from_user_proc)); -+} -+ -+int os_write_file(int fd, const void *buf, int len) -+{ -+ return(file_io(fd, (void *) buf, len, -+ (int (*)(int, void *, int)) write, copy_to_user_proc)); -+} -+ -+int os_file_size(char *file, long long *size_out) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_stat_file(file, &buf); -+ if(err < 0){ -+ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); -+ return(err); -+ } -+ -+ if(S_ISBLK(buf.ust_mode)){ -+ int fd, blocks; -+ -+ fd = os_open_file(file, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open \"%s\", errno = %d\n", file, -fd); -+ return(fd); -+ } -+ if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ -+ printk("Couldn't get the block size of \"%s\", " -+ "errno = %d\n", file, errno); -+ err = -errno; -+ os_close_file(fd); -+ return(err); -+ } -+ *size_out = ((long long) blocks) * 512; -+ os_close_file(fd); -+ return(0); -+ } -+ *size_out = buf.ust_size; -+ return(0); -+} -+ -+int os_file_modtime(char *file, unsigned long *modtime) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_stat_file(file, &buf); -+ if(err < 0){ -+ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); -+ return(err); -+ } -+ -+ *modtime = buf.ust_mtime; -+ return(0); -+} -+ -+int os_get_exec_close(int fd, int* close_on_exec) -+{ -+ int ret; -+ -+ do { -+ ret = fcntl(fd, F_GETFD); -+ } while((ret < 0) && (errno == EINTR)) ; -+ -+ if(ret < 0) -+ return(-errno); -+ -+ *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0; -+ return(ret); -+} -+ -+int os_set_exec_close(int fd, int close_on_exec) -+{ -+ int flag, err; -+ -+ if(close_on_exec) flag = FD_CLOEXEC; -+ else flag = 0; -+ -+ do { -+ err = fcntl(fd, F_SETFD, flag); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ return(err); -+} -+ -+int os_pipe(int *fds, int stream, int close_on_exec) -+{ -+ int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; -+ -+ err = socketpair(AF_UNIX, type, 0, fds); -+ if(err < 0) -+ return(-errno); -+ -+ if(!close_on_exec) -+ return(0); -+ -+ err = os_set_exec_close(fds[0], 1); -+ if(err < 0) -+ goto error; -+ -+ err = os_set_exec_close(fds[1], 1); -+ if(err < 0) -+ goto error; -+ -+ return(0); -+ -+ error: -+ printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); -+ os_close_file(fds[1]); -+ os_close_file(fds[0]); -+ return(err); -+} -+ -+int os_set_fd_async(int fd, int owner) -+{ -+ /* XXX This should do F_GETFL first */ -+ if(fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK) < 0){ -+ printk("os_set_fd_async : failed to set O_ASYNC and " -+ "O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); -+ return(-errno); -+ } -+#ifdef notdef -+ if(fcntl(fd, F_SETFD, 1) < 0){ -+ printk("os_set_fd_async : Setting FD_CLOEXEC failed, " -+ "errno = %d\n", errno); -+ } -+#endif -+ -+ if((fcntl(fd, F_SETSIG, SIGIO) < 0) || -+ (fcntl(fd, F_SETOWN, owner) < 0)){ -+ printk("os_set_fd_async : Failed to fcntl F_SETOWN " -+ "(or F_SETSIG) fd %d to pid %d, errno = %d\n", fd, -+ owner, errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_set_fd_block(int fd, int blocking) -+{ -+ int flags; -+ -+ flags = fcntl(fd, F_GETFL); -+ -+ if(blocking) flags &= ~O_NONBLOCK; -+ else flags |= O_NONBLOCK; -+ -+ if(fcntl(fd, F_SETFL, flags) < 0){ -+ printk("Failed to change blocking on fd # %d, errno = %d\n", -+ fd, errno); -+ return(-errno); -+ } -+ return(0); -+} -+ -+int os_accept_connection(int fd) -+{ -+ int new; -+ -+ new = accept(fd, NULL, 0); -+ if(new < 0) -+ return(-errno); -+ return(new); -+} -+ -+#ifndef SHUT_RD -+#define SHUT_RD 0 -+#endif -+ -+#ifndef SHUT_WR -+#define SHUT_WR 1 -+#endif -+ -+#ifndef SHUT_RDWR -+#define SHUT_RDWR 2 -+#endif -+ -+int os_shutdown_socket(int fd, int r, int w) -+{ -+ int what, err; -+ -+ if(r && w) what = SHUT_RDWR; -+ else if(r) what = SHUT_RD; -+ else if(w) what = SHUT_WR; -+ else { -+ printk("os_shutdown_socket : neither r or w was set\n"); -+ return(-EINVAL); -+ } -+ err = shutdown(fd, what); -+ if(err < 0) -+ return(-errno); -+ return(0); -+} -+ -+int os_rcv_fd(int fd, int *helper_pid_out) -+{ -+ int new, n; -+ char buf[CMSG_SPACE(sizeof(new))]; -+ struct msghdr msg; -+ struct cmsghdr *cmsg; -+ struct iovec iov; -+ -+ msg.msg_name = NULL; -+ msg.msg_namelen = 0; -+ iov = ((struct iovec) { .iov_base = helper_pid_out, -+ .iov_len = sizeof(*helper_pid_out) }); -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ msg.msg_control = buf; -+ msg.msg_controllen = sizeof(buf); -+ msg.msg_flags = 0; -+ -+ n = recvmsg(fd, &msg, 0); -+ if(n < 0) -+ return(-errno); -+ -+ else if(n != sizeof(iov.iov_len)) -+ *helper_pid_out = -1; -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ if(cmsg == NULL){ -+ printk("rcv_fd didn't receive anything, error = %d\n", errno); -+ return(-1); -+ } -+ if((cmsg->cmsg_level != SOL_SOCKET) || -+ (cmsg->cmsg_type != SCM_RIGHTS)){ -+ printk("rcv_fd didn't receive a descriptor\n"); -+ return(-1); -+ } -+ -+ new = ((int *) CMSG_DATA(cmsg))[0]; -+ return(new); -+} -+ -+int os_create_unix_socket(char *file, int len, int close_on_exec) -+{ -+ struct sockaddr_un addr; -+ int sock, err; -+ -+ sock = socket(PF_UNIX, SOCK_DGRAM, 0); -+ if (sock < 0){ -+ printk("create_unix_socket - socket failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ if(close_on_exec) { -+ err = os_set_exec_close(sock, 1); -+ if(err < 0) -+ printk("create_unix_socket : close_on_exec failed, " -+ "err = %d", -err); -+ } -+ -+ addr.sun_family = AF_UNIX; -+ -+ /* XXX Be more careful about overflow */ -+ snprintf(addr.sun_path, len, "%s", file); -+ -+ err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); -+ if (err < 0){ -+ printk("create_listening_socket at '%s' - bind failed, " -+ "errno = %d\n", file, errno); -+ return(-errno); -+ } -+ -+ return(sock); -+} -+ -+void os_flush_stdout(void) -+{ -+ fflush(stdout); -+} -+ -+int os_lock_file(int fd, int excl) -+{ -+ int type = excl ? F_WRLCK : F_RDLCK; -+ struct flock lock = ((struct flock) { .l_type = type, -+ .l_whence = SEEK_SET, -+ .l_start = 0, -+ .l_len = 0 } ); -+ int err, save; -+ -+ err = fcntl(fd, F_SETLK, &lock); -+ if(!err) -+ goto out; -+ -+ save = -errno; -+ err = fcntl(fd, F_GETLK, &lock); -+ if(err){ -+ err = -errno; -+ goto out; -+ } -+ -+ printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); -+ err = save; -+ out: -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/include/file.h um/arch/um/os-Linux/include/file.h ---- orig/arch/um/os-Linux/include/file.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/include/file.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __OS_FILE_H__ -+#define __OS_FILE_H__ -+ -+#define DEV_NULL "/dev/null" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/Makefile um/arch/um/os-Linux/Makefile ---- orig/arch/um/os-Linux/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/Makefile 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = file.o process.o tty.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(obj-y) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -+ -+archmrproper: -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/process.c um/arch/um/os-Linux/process.c ---- orig/arch/um/os-Linux/process.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/process.c 2004-01-10 06:53:44.000000000 -0500 -@@ -0,0 +1,148 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <errno.h> -+#include <signal.h> -+#include <sys/mman.h> -+#include <sys/wait.h> -+#include "os.h" -+#include "user.h" -+ -+#define ARBITRARY_ADDR -1 -+#define FAILURE_PID -1 -+ -+unsigned long os_process_pc(int pid) -+{ -+ char proc_stat[sizeof("/proc/#####/stat\0")], buf[256]; -+ unsigned long pc; -+ int fd, err; -+ -+ sprintf(proc_stat, "/proc/%d/stat", pid); -+ fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("os_process_pc - couldn't open '%s', err = %d\n", -+ proc_stat, -fd); -+ return(ARBITRARY_ADDR); -+ } -+ err = os_read_file(fd, buf, sizeof(buf)); -+ if(err < 0){ -+ printk("os_process_pc - couldn't read '%s', err = %d\n", -+ proc_stat, -err); -+ os_close_file(fd); -+ return(ARBITRARY_ADDR); -+ } -+ os_close_file(fd); -+ pc = ARBITRARY_ADDR; -+ if(sscanf(buf, "%*d %*s %*c %*d %*d %*d %*d %*d %*d %*d %*d " -+ "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " -+ "%*d %*d %*d %*d %ld", &pc) != 1){ -+ printk("os_process_pc - couldn't find pc in '%s'\n", buf); -+ } -+ return(pc); -+} -+ -+int os_process_parent(int pid) -+{ -+ char stat[sizeof("/proc/nnnnn/stat\0")]; -+ char data[256]; -+ int parent, n, fd; -+ -+ if(pid == -1) return(-1); -+ -+ snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); -+ fd = os_open_file(stat, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open '%s', err = %d\n", stat, -fd); -+ return(FAILURE_PID); -+ } -+ -+ n = os_read_file(fd, data, sizeof(data)); -+ os_close_file(fd); -+ -+ if(n < 0){ -+ printk("Couldn't read '%s', err = %d\n", stat, -n); -+ return(FAILURE_PID); -+ } -+ -+ parent = FAILURE_PID; -+ /* XXX This will break if there is a space in the command */ -+ n = sscanf(data, "%*d %*s %*c %d", &parent); -+ if(n != 1) -+ printk("Failed to scan '%s'\n", data); -+ -+ return(parent); -+} -+ -+void os_stop_process(int pid) -+{ -+ kill(pid, SIGSTOP); -+} -+ -+void os_kill_process(int pid, int reap_child) -+{ -+ kill(pid, SIGKILL); -+ if(reap_child) -+ waitpid(pid, NULL, 0); -+ -+} -+ -+void os_usr1_process(int pid) -+{ -+ kill(pid, SIGUSR1); -+} -+ -+int os_getpid(void) -+{ -+ return(getpid()); -+} -+ -+int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, -+ int r, int w, int x) -+{ -+ void *loc; -+ int prot; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ -+ loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, -+ fd, off); -+ if(loc == MAP_FAILED) -+ return(-errno); -+ return(0); -+} -+ -+int os_protect_memory(void *addr, unsigned long len, int r, int w, int x) -+{ -+ int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0)); -+ -+ if(mprotect(addr, len, prot) < 0) -+ return(-errno); -+ return(0); -+} -+ -+int os_unmap_memory(void *addr, int len) -+{ -+ int err; -+ -+ err = munmap(addr, len); -+ if(err < 0) -+ return(-errno); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/tty.c um/arch/um/os-Linux/tty.c ---- orig/arch/um/os-Linux/tty.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/os-Linux/tty.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include "os.h" -+#include "user.h" -+#include "kern_util.h" -+ -+struct grantpt_info { -+ int fd; -+ int res; -+ int err; -+}; -+ -+static void grantpt_cb(void *arg) -+{ -+ struct grantpt_info *info = arg; -+ -+ info->res = grantpt(info->fd); -+ info->err = errno; -+} -+ -+int get_pty(void) -+{ -+ struct grantpt_info info; -+ int fd; -+ -+ fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); -+ return(fd); -+ } -+ -+ info.fd = fd; -+ initial_thread_cb(grantpt_cb, &info); -+ -+ if(info.res < 0){ -+ printk("get_pty : Couldn't grant pty - errno = %d\n", -+ -info.err); -+ return(-1); -+ } -+ if(unlockpt(fd) < 0){ -+ printk("get_pty : Couldn't unlock pty - errno = %d\n", errno); -+ return(-1); -+ } -+ return(fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/bugs.c um/arch/um/sys-i386/bugs.c ---- orig/arch/um/sys-i386/bugs.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/bugs.c 2004-01-21 22:42:39.000000000 -0500 -@@ -0,0 +1,222 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <errno.h> -+#include <string.h> -+#include <sys/signal.h> -+#include <asm/ldt.h> -+#include "kern_util.h" -+#include "user.h" -+#include "sysdep/ptrace.h" -+#include "task.h" -+#include "os.h" -+ -+#define MAXTOKEN 64 -+ -+/* Set during early boot */ -+int host_has_cmov = 1; -+int host_has_xmm = 0; -+ -+static char token(int fd, char *buf, int len, char stop) -+{ -+ int n; -+ char *ptr, *end, c; -+ -+ ptr = buf; -+ end = &buf[len]; -+ do { -+ n = os_read_file(fd, ptr, sizeof(*ptr)); -+ c = *ptr++; -+ if(n != sizeof(*ptr)){ -+ if(n == 0) return(0); -+ printk("Reading /proc/cpuinfo failed, err = %d\n", -n); -+ if(n < 0) -+ return(n); -+ else -+ return(-EIO); -+ } -+ } while((c != '\n') && (c != stop) && (ptr < end)); -+ -+ if(ptr == end){ -+ printk("Failed to find '%c' in /proc/cpuinfo\n", stop); -+ return(-1); -+ } -+ *(ptr - 1) = '\0'; -+ return(c); -+} -+ -+static int find_cpuinfo_line(int fd, char *key, char *scratch, int len) -+{ -+ int n; -+ char c; -+ -+ scratch[len - 1] = '\0'; -+ while(1){ -+ c = token(fd, scratch, len - 1, ':'); -+ if(c <= 0) -+ return(0); -+ else if(c != ':'){ -+ printk("Failed to find ':' in /proc/cpuinfo\n"); -+ return(0); -+ } -+ -+ if(!strncmp(scratch, key, strlen(key))) -+ return(1); -+ -+ do { -+ n = os_read_file(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("Failed to find newline in " -+ "/proc/cpuinfo, err = %d\n", -n); -+ return(0); -+ } -+ } while(c != '\n'); -+ } -+ return(0); -+} -+ -+int cpu_feature(char *what, char *buf, int len) -+{ -+ int fd, ret = 0; -+ -+ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); -+ return(0); -+ } -+ -+ if(!find_cpuinfo_line(fd, what, buf, len)){ -+ printk("Couldn't find '%s' line in /proc/cpuinfo\n", what); -+ goto out_close; -+ } -+ -+ token(fd, buf, len, '\n'); -+ ret = 1; -+ -+ out_close: -+ os_close_file(fd); -+ return(ret); -+} -+ -+static int check_cpu_flag(char *feature, int *have_it) -+{ -+ char buf[MAXTOKEN], c; -+ int fd, len = sizeof(buf)/sizeof(buf[0]); -+ -+ printk("Checking for host processor %s support...", feature); -+ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); -+ return(0); -+ } -+ -+ *have_it = 0; -+ if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) -+ goto out; -+ -+ c = token(fd, buf, len - 1, ' '); -+ if(c < 0) goto out; -+ else if(c != ' '){ -+ printk("Failed to find ' ' in /proc/cpuinfo\n"); -+ goto out; -+ } -+ -+ while(1){ -+ c = token(fd, buf, len - 1, ' '); -+ if(c < 0) goto out; -+ else if(c == '\n') break; -+ -+ if(!strcmp(buf, feature)){ -+ *have_it = 1; -+ goto out; -+ } -+ } -+ out: -+ if(*have_it == 0) printk("No\n"); -+ else if(*have_it == 1) printk("Yes\n"); -+ os_close_file(fd); -+ return(1); -+} -+ -+#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems -+ * for some people. -+ */ -+static void disable_lcall(void) -+{ -+ struct modify_ldt_ldt_s ldt; -+ int err; -+ -+ bzero(&ldt, sizeof(ldt)); -+ ldt.entry_number = 7; -+ ldt.base_addr = 0; -+ ldt.limit = 0; -+ err = modify_ldt(1, &ldt, sizeof(ldt)); -+ if(err) -+ printk("Failed to disable lcall7 - errno = %d\n", errno); -+} -+#endif -+ -+void arch_init_thread(void) -+{ -+#if 0 -+ disable_lcall(); -+#endif -+} -+ -+void arch_check_bugs(void) -+{ -+ int have_it; -+ -+ if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){ -+ printk("/proc/cpuinfo not available - skipping CPU capability " -+ "checks\n"); -+ return; -+ } -+ if(check_cpu_flag("cmov", &have_it)) -+ host_has_cmov = have_it; -+ if(check_cpu_flag("xmm", &have_it)) -+ host_has_xmm = have_it; -+} -+ -+int arch_handle_signal(int sig, union uml_pt_regs *regs) -+{ -+ unsigned char tmp[2]; -+ -+ /* This is testing for a cmov (0x0f 0x4x) instruction causing a -+ * SIGILL in init. -+ */ -+ if((sig != SIGILL) || (TASK_PID(get_current()) != 1)) return(0); -+ -+ if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) -+ panic("SIGILL in init, could not read instructions!\n"); -+ if((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40)) -+ return(0); -+ -+ if(host_has_cmov == 0) -+ panic("SIGILL caused by cmov, which this processor doesn't " -+ "implement, boot a filesystem compiled for older " -+ "processors"); -+ else if(host_has_cmov == 1) -+ panic("SIGILL caused by cmov, which this processor claims to " -+ "implement"); -+ else if(host_has_cmov == -1) -+ panic("SIGILL caused by cmov, couldn't tell if this processor " -+ "implements it, boot a filesystem compiled for older " -+ "processors"); -+ else panic("Bad value for host_has_cmov (%d)", host_has_cmov); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/checksum.S um/arch/um/sys-i386/checksum.S ---- orig/arch/um/sys-i386/checksum.S 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/checksum.S 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,460 @@ -+/* -+ * INET An implementation of the TCP/IP protocol suite for the LINUX -+ * operating system. INET is implemented using the BSD Socket -+ * interface as the means of communication with the user level. -+ * -+ * IP/TCP/UDP checksumming routines -+ * -+ * Authors: Jorge Cwik, <jorge@laser.satlink.net> -+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no> -+ * Tom May, <ftom@netcom.com> -+ * Pentium Pro/II routines: -+ * Alexander Kjeldaas <astor@guardian.no> -+ * Finn Arne Gangstad <finnag@guardian.no> -+ * Lots of code moved from tcp.c and ip.c; see those files -+ * for more names. -+ * -+ * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception -+ * handling. -+ * Andi Kleen, add zeroing on error -+ * converted to pure assembler -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include <linux/config.h> -+#include <asm/errno.h> -+ -+/* -+ * computes a partial checksum, e.g. for TCP/UDP fragments -+ */ -+ -+/* -+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) -+ */ -+ -+.text -+.align 4 -+.globl arch_csum_partial -+ -+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM -+ -+ /* -+ * Experiments with Ethernet and SLIP connections show that buff -+ * is aligned on either a 2-byte or 4-byte boundary. We get at -+ * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. -+ * Fortunately, it is easy to convert 2-byte alignment to 4-byte -+ * alignment for the unrolled loop. -+ */ -+arch_csum_partial: -+ pushl %esi -+ pushl %ebx -+ movl 20(%esp),%eax # Function arg: unsigned int sum -+ movl 16(%esp),%ecx # Function arg: int len -+ movl 12(%esp),%esi # Function arg: unsigned char *buff -+ testl $2, %esi # Check alignment. -+ jz 2f # Jump if alignment is ok. -+ subl $2, %ecx # Alignment uses up two bytes. -+ jae 1f # Jump if we had at least two bytes. -+ addl $2, %ecx # ecx was < 2. Deal with it. -+ jmp 4f -+1: movw (%esi), %bx -+ addl $2, %esi -+ addw %bx, %ax -+ adcl $0, %eax -+2: -+ movl %ecx, %edx -+ shrl $5, %ecx -+ jz 2f -+ testl %esi, %esi -+1: movl (%esi), %ebx -+ adcl %ebx, %eax -+ movl 4(%esi), %ebx -+ adcl %ebx, %eax -+ movl 8(%esi), %ebx -+ adcl %ebx, %eax -+ movl 12(%esi), %ebx -+ adcl %ebx, %eax -+ movl 16(%esi), %ebx -+ adcl %ebx, %eax -+ movl 20(%esi), %ebx -+ adcl %ebx, %eax -+ movl 24(%esi), %ebx -+ adcl %ebx, %eax -+ movl 28(%esi), %ebx -+ adcl %ebx, %eax -+ lea 32(%esi), %esi -+ dec %ecx -+ jne 1b -+ adcl $0, %eax -+2: movl %edx, %ecx -+ andl $0x1c, %edx -+ je 4f -+ shrl $2, %edx # This clears CF -+3: adcl (%esi), %eax -+ lea 4(%esi), %esi -+ dec %edx -+ jne 3b -+ adcl $0, %eax -+4: andl $3, %ecx -+ jz 7f -+ cmpl $2, %ecx -+ jb 5f -+ movw (%esi),%cx -+ leal 2(%esi),%esi -+ je 6f -+ shll $16,%ecx -+5: movb (%esi),%cl -+6: addl %ecx,%eax -+ adcl $0, %eax -+7: -+ popl %ebx -+ popl %esi -+ ret -+ -+#else -+ -+/* Version for PentiumII/PPro */ -+ -+arch_csum_partial: -+ pushl %esi -+ pushl %ebx -+ movl 20(%esp),%eax # Function arg: unsigned int sum -+ movl 16(%esp),%ecx # Function arg: int len -+ movl 12(%esp),%esi # Function arg: const unsigned char *buf -+ -+ testl $2, %esi -+ jnz 30f -+10: -+ movl %ecx, %edx -+ movl %ecx, %ebx -+ andl $0x7c, %ebx -+ shrl $7, %ecx -+ addl %ebx,%esi -+ shrl $2, %ebx -+ negl %ebx -+ lea 45f(%ebx,%ebx,2), %ebx -+ testl %esi, %esi -+ jmp *%ebx -+ -+ # Handle 2-byte-aligned regions -+20: addw (%esi), %ax -+ lea 2(%esi), %esi -+ adcl $0, %eax -+ jmp 10b -+ -+30: subl $2, %ecx -+ ja 20b -+ je 32f -+ movzbl (%esi),%ebx # csumming 1 byte, 2-aligned -+ addl %ebx, %eax -+ adcl $0, %eax -+ jmp 80f -+32: -+ addw (%esi), %ax # csumming 2 bytes, 2-aligned -+ adcl $0, %eax -+ jmp 80f -+ -+40: -+ addl -128(%esi), %eax -+ adcl -124(%esi), %eax -+ adcl -120(%esi), %eax -+ adcl -116(%esi), %eax -+ adcl -112(%esi), %eax -+ adcl -108(%esi), %eax -+ adcl -104(%esi), %eax -+ adcl -100(%esi), %eax -+ adcl -96(%esi), %eax -+ adcl -92(%esi), %eax -+ adcl -88(%esi), %eax -+ adcl -84(%esi), %eax -+ adcl -80(%esi), %eax -+ adcl -76(%esi), %eax -+ adcl -72(%esi), %eax -+ adcl -68(%esi), %eax -+ adcl -64(%esi), %eax -+ adcl -60(%esi), %eax -+ adcl -56(%esi), %eax -+ adcl -52(%esi), %eax -+ adcl -48(%esi), %eax -+ adcl -44(%esi), %eax -+ adcl -40(%esi), %eax -+ adcl -36(%esi), %eax -+ adcl -32(%esi), %eax -+ adcl -28(%esi), %eax -+ adcl -24(%esi), %eax -+ adcl -20(%esi), %eax -+ adcl -16(%esi), %eax -+ adcl -12(%esi), %eax -+ adcl -8(%esi), %eax -+ adcl -4(%esi), %eax -+45: -+ lea 128(%esi), %esi -+ adcl $0, %eax -+ dec %ecx -+ jge 40b -+ movl %edx, %ecx -+50: andl $3, %ecx -+ jz 80f -+ -+ # Handle the last 1-3 bytes without jumping -+ notl %ecx # 1->2, 2->1, 3->0, higher bits are masked -+ movl $0xffffff,%ebx # by the shll and shrl instructions -+ shll $3,%ecx -+ shrl %cl,%ebx -+ andl -128(%esi),%ebx # esi is 4-aligned so should be ok -+ addl %ebx,%eax -+ adcl $0,%eax -+80: -+ popl %ebx -+ popl %esi -+ ret -+ -+#endif -+ -+/* -+unsigned int csum_partial_copy_generic (const char *src, char *dst, -+ int len, int sum, int *src_err_ptr, int *dst_err_ptr) -+ */ -+ -+/* -+ * Copy from ds while checksumming, otherwise like csum_partial -+ * -+ * The macros SRC and DST specify the type of access for the instruction. -+ * thus we can call a custom exception handler for all access types. -+ * -+ * FIXME: could someone double-check whether I haven't mixed up some SRC and -+ * DST definitions? It's damn hard to trigger all cases. I hope I got -+ * them all but there's no guarantee. -+ */ -+ -+#define SRC(y...) \ -+ 9999: y; \ -+ .section __ex_table, "a"; \ -+ .long 9999b, 6001f ; \ -+ .previous -+ -+#define DST(y...) \ -+ 9999: y; \ -+ .section __ex_table, "a"; \ -+ .long 9999b, 6002f ; \ -+ .previous -+ -+.align 4 -+.globl csum_partial_copy_generic_i386 -+ -+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM -+ -+#define ARGBASE 16 -+#define FP 12 -+ -+csum_partial_copy_generic_i386: -+ subl $4,%esp -+ pushl %edi -+ pushl %esi -+ pushl %ebx -+ movl ARGBASE+16(%esp),%eax # sum -+ movl ARGBASE+12(%esp),%ecx # len -+ movl ARGBASE+4(%esp),%esi # src -+ movl ARGBASE+8(%esp),%edi # dst -+ -+ testl $2, %edi # Check alignment. -+ jz 2f # Jump if alignment is ok. -+ subl $2, %ecx # Alignment uses up two bytes. -+ jae 1f # Jump if we had at least two bytes. -+ addl $2, %ecx # ecx was < 2. Deal with it. -+ jmp 4f -+SRC(1: movw (%esi), %bx ) -+ addl $2, %esi -+DST( movw %bx, (%edi) ) -+ addl $2, %edi -+ addw %bx, %ax -+ adcl $0, %eax -+2: -+ movl %ecx, FP(%esp) -+ shrl $5, %ecx -+ jz 2f -+ testl %esi, %esi -+SRC(1: movl (%esi), %ebx ) -+SRC( movl 4(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, (%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 4(%edi) ) -+ -+SRC( movl 8(%esi), %ebx ) -+SRC( movl 12(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 8(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 12(%edi) ) -+ -+SRC( movl 16(%esi), %ebx ) -+SRC( movl 20(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 16(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 20(%edi) ) -+ -+SRC( movl 24(%esi), %ebx ) -+SRC( movl 28(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 24(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 28(%edi) ) -+ -+ lea 32(%esi), %esi -+ lea 32(%edi), %edi -+ dec %ecx -+ jne 1b -+ adcl $0, %eax -+2: movl FP(%esp), %edx -+ movl %edx, %ecx -+ andl $0x1c, %edx -+ je 4f -+ shrl $2, %edx # This clears CF -+SRC(3: movl (%esi), %ebx ) -+ adcl %ebx, %eax -+DST( movl %ebx, (%edi) ) -+ lea 4(%esi), %esi -+ lea 4(%edi), %edi -+ dec %edx -+ jne 3b -+ adcl $0, %eax -+4: andl $3, %ecx -+ jz 7f -+ cmpl $2, %ecx -+ jb 5f -+SRC( movw (%esi), %cx ) -+ leal 2(%esi), %esi -+DST( movw %cx, (%edi) ) -+ leal 2(%edi), %edi -+ je 6f -+ shll $16,%ecx -+SRC(5: movb (%esi), %cl ) -+DST( movb %cl, (%edi) ) -+6: addl %ecx, %eax -+ adcl $0, %eax -+7: -+5000: -+ -+# Exception handler: -+.section .fixup, "ax" -+ -+6001: -+ movl ARGBASE+20(%esp), %ebx # src_err_ptr -+ movl $-EFAULT, (%ebx) -+ -+ # zero the complete destination - computing the rest -+ # is too much work -+ movl ARGBASE+8(%esp), %edi # dst -+ movl ARGBASE+12(%esp), %ecx # len -+ xorl %eax,%eax -+ rep ; stosb -+ -+ jmp 5000b -+ -+6002: -+ movl ARGBASE+24(%esp), %ebx # dst_err_ptr -+ movl $-EFAULT,(%ebx) -+ jmp 5000b -+ -+.previous -+ -+ popl %ebx -+ popl %esi -+ popl %edi -+ popl %ecx # equivalent to addl $4,%esp -+ ret -+ -+#else -+ -+/* Version for PentiumII/PPro */ -+ -+#define ROUND1(x) \ -+ SRC(movl x(%esi), %ebx ) ; \ -+ addl %ebx, %eax ; \ -+ DST(movl %ebx, x(%edi) ) ; -+ -+#define ROUND(x) \ -+ SRC(movl x(%esi), %ebx ) ; \ -+ adcl %ebx, %eax ; \ -+ DST(movl %ebx, x(%edi) ) ; -+ -+#define ARGBASE 12 -+ -+csum_partial_copy_generic_i386: -+ pushl %ebx -+ pushl %edi -+ pushl %esi -+ movl ARGBASE+4(%esp),%esi #src -+ movl ARGBASE+8(%esp),%edi #dst -+ movl ARGBASE+12(%esp),%ecx #len -+ movl ARGBASE+16(%esp),%eax #sum -+# movl %ecx, %edx -+ movl %ecx, %ebx -+ movl %esi, %edx -+ shrl $6, %ecx -+ andl $0x3c, %ebx -+ negl %ebx -+ subl %ebx, %esi -+ subl %ebx, %edi -+ lea -1(%esi),%edx -+ andl $-32,%edx -+ lea 3f(%ebx,%ebx), %ebx -+ testl %esi, %esi -+ jmp *%ebx -+1: addl $64,%esi -+ addl $64,%edi -+ SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) -+ ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) -+ ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) -+ ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) -+ ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) -+3: adcl $0,%eax -+ addl $64, %edx -+ dec %ecx -+ jge 1b -+4: movl ARGBASE+12(%esp),%edx #len -+ andl $3, %edx -+ jz 7f -+ cmpl $2, %edx -+ jb 5f -+SRC( movw (%esi), %dx ) -+ leal 2(%esi), %esi -+DST( movw %dx, (%edi) ) -+ leal 2(%edi), %edi -+ je 6f -+ shll $16,%edx -+5: -+SRC( movb (%esi), %dl ) -+DST( movb %dl, (%edi) ) -+6: addl %edx, %eax -+ adcl $0, %eax -+7: -+.section .fixup, "ax" -+6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr -+ movl $-EFAULT, (%ebx) -+ # zero the complete destination (computing the rest is too much work) -+ movl ARGBASE+8(%esp),%edi # dst -+ movl ARGBASE+12(%esp),%ecx # len -+ xorl %eax,%eax -+ rep; stosb -+ jmp 7b -+6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr -+ movl $-EFAULT, (%ebx) -+ jmp 7b -+.previous -+ -+ popl %esi -+ popl %edi -+ popl %ebx -+ ret -+ -+#undef ROUND -+#undef ROUND1 -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/fault.c um/arch/um/sys-i386/fault.c ---- orig/arch/um/sys-i386/fault.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/fault.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+ -+extern unsigned long search_exception_table(unsigned long addr); -+ -+int arch_fixup(unsigned long address, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ unsigned long fixup; -+ -+ fixup = search_exception_table(address); -+ if(fixup != 0){ -+ sc->eip = fixup; -+ return(1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ksyms.c um/arch/um/sys-i386/ksyms.c ---- orig/arch/um/sys-i386/ksyms.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/ksyms.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,17 @@ -+#include "linux/module.h" -+#include "linux/in6.h" -+#include "linux/rwsem.h" -+#include "asm/byteorder.h" -+#include "asm/semaphore.h" -+#include "asm/uaccess.h" -+#include "asm/checksum.h" -+#include "asm/errno.h" -+ -+EXPORT_SYMBOL(__down_failed); -+EXPORT_SYMBOL(__down_failed_interruptible); -+EXPORT_SYMBOL(__down_failed_trylock); -+EXPORT_SYMBOL(__up_wakeup); -+ -+/* Networking helper routines. */ -+EXPORT_SYMBOL(csum_partial_copy_from); -+EXPORT_SYMBOL(csum_partial_copy_to); -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ldt.c um/arch/um/sys-i386/ldt.c ---- orig/arch/um/sys-i386/ldt.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/ldt.c 2004-01-19 04:01:11.000000000 -0500 -@@ -0,0 +1,94 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/slab.h" -+#include "asm/uaccess.h" -+#include "asm/ptrace.h" -+#include "choose-mode.h" -+#include "kern.h" -+ -+#ifdef CONFIG_MODE_TT -+extern int modify_ldt(int func, void *ptr, unsigned long bytecount); -+ -+/* XXX this needs copy_to_user and copy_from_user */ -+ -+int sys_modify_ldt_tt(int func, void *ptr, unsigned long bytecount) -+{ -+ if(verify_area(VERIFY_READ, ptr, bytecount)) return(-EFAULT); -+ return(modify_ldt(func, ptr, bytecount)); -+} -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+extern int userspace_pid; -+ -+int sys_modify_ldt_skas(int func, void *ptr, unsigned long bytecount) -+{ -+ struct ptrace_ldt ldt; -+ void *buf; -+ int res, n; -+ -+ buf = kmalloc(bytecount, GFP_KERNEL); -+ if(buf == NULL) -+ return(-ENOMEM); -+ -+ res = 0; -+ -+ switch(func){ -+ case 1: -+ case 0x11: -+ res = copy_from_user(buf, ptr, bytecount); -+ break; -+ } -+ -+ if(res != 0){ -+ res = -EFAULT; -+ goto out; -+ } -+ -+ ldt = ((struct ptrace_ldt) { .func = func, -+ .ptr = buf, -+ .bytecount = bytecount }); -+ res = ptrace(PTRACE_LDT, userspace_pid, 0, (unsigned long) &ldt); -+ if(res < 0) -+ goto out; -+ -+ switch(func){ -+ case 0: -+ case 2: -+ n = res; -+ res = copy_to_user(ptr, buf, n); -+ if(res != 0) -+ res = -EFAULT; -+ else -+ res = n; -+ break; -+ } -+ -+ out: -+ kfree(buf); -+ return(res); -+} -+#endif -+ -+int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) -+{ -+ return(CHOOSE_MODE_PROC(sys_modify_ldt_tt, sys_modify_ldt_skas, func, -+ ptr, bytecount)); -+} -+ -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/Makefile um/arch/um/sys-i386/Makefile ---- orig/arch/um/sys-i386/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/Makefile 2003-11-09 11:55:24.000000000 -0500 -@@ -0,0 +1,46 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o ptrace.o \ -+ ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o time.o -+export-objs = ksyms.o -+ -+USER_OBJS = bugs.o ptrace_user.o sigcontext.o fault.o -+ -+SYMLINKS = semaphore.c extable.c -+ -+semaphore.c-dir = kernel -+extable.c-dir = mm -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+define make_link -+ -rm -f $1 -+ ln -sf $(TOPDIR)/arch/i386/$($1-dir)/$1 $1 -+endef -+ -+$(SYMLINKS): -+ $(call make_link,$@) -+ -+clean: -+ $(MAKE) -C util clean -+ rm -f $(SYMLINKS) -+ -+fastdep: -+ -+dep: -+ -+archmrproper: -+ -+archclean: -+ -+archdep: -+ -+modules: -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ptrace.c um/arch/um/sys-i386/ptrace.c ---- orig/arch/um/sys-i386/ptrace.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/ptrace.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,365 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "asm/elf.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "ptrace_user.h" -+#include "sysdep/sigcontext.h" -+#include "sysdep/sc.h" -+ -+void arch_switch(void) -+{ -+ update_debugregs(current->thread.arch.debugregs_seq); -+} -+ -+int is_syscall(unsigned long addr) -+{ -+ unsigned short instr; -+ int n; -+ -+ n = copy_from_user(&instr, (void *) addr, sizeof(instr)); -+ if(n){ -+ printk("is_syscall : failed to read instruction from 0x%lu\n", -+ addr); -+ return(0); -+ } -+ return(instr == 0x80cd); -+} -+ -+/* determines which flags the user has access to. */ -+/* 1 = access 0 = no access */ -+#define FLAG_MASK 0x00044dd5 -+ -+int putreg(struct task_struct *child, int regno, unsigned long value) -+{ -+ regno >>= 2; -+ switch (regno) { -+ case FS: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ PT_REGS_FS(&child->thread.regs) = value; -+ return 0; -+ case GS: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ PT_REGS_GS(&child->thread.regs) = value; -+ return 0; -+ case DS: -+ case ES: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ value &= 0xffff; -+ break; -+ case SS: -+ case CS: -+ if ((value & 3) != 3) -+ return -EIO; -+ value &= 0xffff; -+ break; -+ case EFL: -+ value &= FLAG_MASK; -+ value |= PT_REGS_EFLAGS(&child->thread.regs); -+ break; -+ } -+ PT_REGS_SET(&child->thread.regs, regno, value); -+ return 0; -+} -+ -+unsigned long getreg(struct task_struct *child, int regno) -+{ -+ unsigned long retval = ~0UL; -+ -+ regno >>= 2; -+ switch (regno) { -+ case FS: -+ case GS: -+ case DS: -+ case ES: -+ case SS: -+ case CS: -+ retval = 0xffff; -+ /* fall through */ -+ default: -+ retval &= PT_REG(&child->thread.regs, regno); -+ } -+ return retval; -+} -+ -+struct i387_fxsave_struct { -+ unsigned short cwd; -+ unsigned short swd; -+ unsigned short twd; -+ unsigned short fop; -+ long fip; -+ long fcs; -+ long foo; -+ long fos; -+ long mxcsr; -+ long reserved; -+ long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ -+ long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ -+ long padding[56]; -+}; -+ -+/* -+ * FPU tag word conversions. -+ */ -+ -+static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) -+{ -+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */ -+ -+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */ -+ tmp = ~twd; -+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ -+ /* and move the valid bits to the lower byte. */ -+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ -+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ -+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ -+ return tmp; -+} -+ -+static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) -+{ -+ struct _fpxreg *st = NULL; -+ unsigned long twd = (unsigned long) fxsave->twd; -+ unsigned long tag; -+ unsigned long ret = 0xffff0000; -+ int i; -+ -+#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16); -+ -+ for ( i = 0 ; i < 8 ; i++ ) { -+ if ( twd & 0x1 ) { -+ st = (struct _fpxreg *) FPREG_ADDR( fxsave, i ); -+ -+ switch ( st->exponent & 0x7fff ) { -+ case 0x7fff: -+ tag = 2; /* Special */ -+ break; -+ case 0x0000: -+ if ( !st->significand[0] && -+ !st->significand[1] && -+ !st->significand[2] && -+ !st->significand[3] ) { -+ tag = 1; /* Zero */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ default: -+ if ( st->significand[3] & 0x8000 ) { -+ tag = 0; /* Valid */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ } -+ } else { -+ tag = 3; /* Empty */ -+ } -+ ret |= (tag << (2 * i)); -+ twd = twd >> 1; -+ } -+ return ret; -+} -+ -+/* -+ * FXSR floating point environment conversions. -+ */ -+ -+#ifdef CONFIG_MODE_TT -+static inline int convert_fxsr_to_user_tt(struct _fpstate *buf, -+ struct pt_regs *regs) -+{ -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned long env[7]; -+ struct _fpreg *to; -+ struct _fpxreg *from; -+ int i; -+ -+ env[0] = (unsigned long)fxsave->cwd | 0xffff0000; -+ env[1] = (unsigned long)fxsave->swd | 0xffff0000; -+ env[2] = twd_fxsr_to_i387(fxsave); -+ env[3] = fxsave->fip; -+ env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); -+ env[5] = fxsave->foo; -+ env[6] = fxsave->fos; -+ -+ if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) -+ return 1; -+ -+ to = &buf->_st[0]; -+ from = (struct _fpxreg *) &fxsave->st_space[0]; -+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) { -+ if ( __copy_to_user( to, from, sizeof(*to) ) ) -+ return 1; -+ } -+ return 0; -+} -+#endif -+ -+static inline int convert_fxsr_to_user(struct _fpstate *buf, -+ struct pt_regs *regs) -+{ -+ return(CHOOSE_MODE(convert_fxsr_to_user_tt(buf, regs), 0)); -+} -+ -+#ifdef CONFIG_MODE_TT -+static inline int convert_fxsr_from_user_tt(struct pt_regs *regs, -+ struct _fpstate *buf) -+{ -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned long env[7]; -+ struct _fpxreg *to; -+ struct _fpreg *from; -+ int i; -+ -+ if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) -+ return 1; -+ -+ fxsave->cwd = (unsigned short)(env[0] & 0xffff); -+ fxsave->swd = (unsigned short)(env[1] & 0xffff); -+ fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); -+ fxsave->fip = env[3]; -+ fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16); -+ fxsave->fcs = (env[4] & 0xffff); -+ fxsave->foo = env[5]; -+ fxsave->fos = env[6]; -+ -+ to = (struct _fpxreg *) &fxsave->st_space[0]; -+ from = &buf->_st[0]; -+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) { -+ if ( __copy_from_user( to, from, sizeof(*from) ) ) -+ return 1; -+ } -+ return 0; -+} -+#endif -+ -+static inline int convert_fxsr_from_user(struct pt_regs *regs, -+ struct _fpstate *buf) -+{ -+ return(CHOOSE_MODE(convert_fxsr_from_user_tt(regs, buf), 0)); -+} -+ -+int get_fpregs(unsigned long buf, struct task_struct *child) -+{ -+ int err; -+ -+ err = convert_fxsr_to_user((struct _fpstate *) buf, -+ &child->thread.regs); -+ if(err) return(-EFAULT); -+ else return(0); -+} -+ -+int set_fpregs(unsigned long buf, struct task_struct *child) -+{ -+ int err; -+ -+ err = convert_fxsr_from_user(&child->thread.regs, -+ (struct _fpstate *) buf); -+ if(err) return(-EFAULT); -+ else return(0); -+} -+ -+#ifdef CONFIG_MODE_TT -+int get_fpxregs_tt(unsigned long buf, struct task_struct *tsk) -+{ -+ struct pt_regs *regs = &tsk->thread.regs; -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ int err; -+ -+ err = __copy_to_user((void *) buf, fxsave, -+ sizeof(struct user_fxsr_struct)); -+ if(err) return -EFAULT; -+ else return 0; -+} -+#endif -+ -+int get_fpxregs(unsigned long buf, struct task_struct *tsk) -+{ -+ return(CHOOSE_MODE(get_fpxregs_tt(buf, tsk), 0)); -+} -+ -+#ifdef CONFIG_MODE_TT -+int set_fpxregs_tt(unsigned long buf, struct task_struct *tsk) -+{ -+ struct pt_regs *regs = &tsk->thread.regs; -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ int err; -+ -+ err = __copy_from_user(fxsave, (void *) buf, -+ sizeof(struct user_fxsr_struct) ); -+ if(err) return -EFAULT; -+ else return 0; -+} -+#endif -+ -+int set_fpxregs(unsigned long buf, struct task_struct *tsk) -+{ -+ return(CHOOSE_MODE(set_fpxregs_tt(buf, tsk), 0)); -+} -+ -+#ifdef notdef -+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) -+{ -+ fpu->cwd = (((SC_FP_CW(PT_REGS_SC(regs)) & 0xffff) << 16) | -+ (SC_FP_SW(PT_REGS_SC(regs)) & 0xffff)); -+ fpu->swd = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff; -+ fpu->twd = SC_FP_IPOFF(PT_REGS_SC(regs)); -+ fpu->fip = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff; -+ fpu->fcs = SC_FP_DATAOFF(PT_REGS_SC(regs)); -+ fpu->foo = SC_FP_DATASEL(PT_REGS_SC(regs)); -+ fpu->fos = 0; -+ memcpy(fpu->st_space, (void *) SC_FP_ST(PT_REGS_SC(regs)), -+ sizeof(fpu->st_space)); -+ return(1); -+} -+#endif -+ -+#ifdef CONFIG_MODE_TT -+static inline void copy_fpu_fxsave_tt(struct pt_regs *regs, -+ struct user_i387_struct *buf) -+{ -+ struct i387_fxsave_struct *fpu = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned short *to; -+ unsigned short *from; -+ int i; -+ -+ memcpy( buf, fpu, 7 * sizeof(long) ); -+ -+ to = (unsigned short *) &buf->st_space[0]; -+ from = (unsigned short *) &fpu->st_space[0]; -+ for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { -+ memcpy( to, from, 5 * sizeof(unsigned short) ); -+ } -+} -+#endif -+ -+static inline void copy_fpu_fxsave(struct pt_regs *regs, -+ struct user_i387_struct *buf) -+{ -+ (void) CHOOSE_MODE(copy_fpu_fxsave_tt(regs, buf), 0); -+} -+ -+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu ) -+{ -+ copy_fpu_fxsave(regs, (struct user_i387_struct *) fpu); -+ return(1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ptrace_user.c um/arch/um/sys-i386/ptrace_user.c ---- orig/arch/um/sys-i386/ptrace_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/ptrace_user.c 2003-12-17 01:33:17.000000000 -0500 -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <errno.h> -+#include <unistd.h> -+#include <linux/stddef.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+#include <asm/user.h> -+#include "kern_util.h" -+#include "sysdep/thread.h" -+#include "user.h" -+#include "os.h" -+ -+int ptrace_getregs(long pid, unsigned long *regs_out) -+{ -+ return(ptrace(PTRACE_GETREGS, pid, 0, regs_out)); -+} -+ -+int ptrace_setregs(long pid, unsigned long *regs) -+{ -+ return(ptrace(PTRACE_SETREGS, pid, 0, regs)); -+} -+ -+int ptrace_getfpregs(long pid, unsigned long *regs) -+{ -+ return(ptrace(PTRACE_GETFPREGS, pid, 0, regs)); -+} -+ -+static void write_debugregs(int pid, unsigned long *regs) -+{ -+ struct user *dummy; -+ int nregs, i; -+ -+ dummy = NULL; -+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); -+ for(i = 0; i < nregs; i++){ -+ if((i == 4) || (i == 5)) continue; -+ if(ptrace(PTRACE_POKEUSER, pid, &dummy->u_debugreg[i], -+ regs[i]) < 0) -+ printk("write_debugregs - ptrace failed on " -+ "register %d, errno = %d\n", errno); -+ } -+} -+ -+static void read_debugregs(int pid, unsigned long *regs) -+{ -+ struct user *dummy; -+ int nregs, i; -+ -+ dummy = NULL; -+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); -+ for(i = 0; i < nregs; i++){ -+ regs[i] = ptrace(PTRACE_PEEKUSER, pid, -+ &dummy->u_debugreg[i], 0); -+ } -+} -+ -+/* Accessed only by the tracing thread */ -+static unsigned long kernel_debugregs[8] = { [ 0 ... 7 ] = 0 }; -+static int debugregs_seq = 0; -+ -+void arch_enter_kernel(void *task, int pid) -+{ -+ read_debugregs(pid, TASK_DEBUGREGS(task)); -+ write_debugregs(pid, kernel_debugregs); -+} -+ -+void arch_leave_kernel(void *task, int pid) -+{ -+ read_debugregs(pid, kernel_debugregs); -+ write_debugregs(pid, TASK_DEBUGREGS(task)); -+} -+ -+void ptrace_pokeuser(unsigned long addr, unsigned long data) -+{ -+ if((addr < offsetof(struct user, u_debugreg[0])) || -+ (addr > offsetof(struct user, u_debugreg[7]))) -+ return; -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ if(kernel_debugregs[addr] == data) return; -+ -+ kernel_debugregs[addr] = data; -+ debugregs_seq++; -+} -+ -+static void update_debugregs_cb(void *arg) -+{ -+ int pid = *((int *) arg); -+ -+ write_debugregs(pid, kernel_debugregs); -+} -+ -+void update_debugregs(int seq) -+{ -+ int me; -+ -+ if(seq == debugregs_seq) return; -+ -+ me = os_getpid(); -+ initial_thread_cb(update_debugregs_cb, &me); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/sigcontext.c um/arch/um/sys-i386/sigcontext.c ---- orig/arch/um/sys-i386/sigcontext.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/sigcontext.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stddef.h> -+#include <string.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "kern_util.h" -+#include "frame_user.h" -+ -+int sc_size(void *data) -+{ -+ struct arch_frame_data *arch = data; -+ -+ return(sizeof(struct sigcontext) + arch->fpstate_size); -+} -+ -+void sc_to_sc(void *to_ptr, void *from_ptr) -+{ -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ int size = sizeof(*to) + signal_frame_sc.common.arch.fpstate_size; -+ -+ memcpy(to, from, size); -+ if(from->fpstate != NULL) to->fpstate = (struct _fpstate *) (to + 1); -+} -+ -+unsigned long *sc_sigmask(void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ -+ return(&sc->oldmask); -+} -+ -+int sc_get_fpregs(unsigned long buf, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct _fpstate *from = sc->fpstate, *to = (struct _fpstate *) buf; -+ int err = 0; -+ -+ if(from == NULL){ -+ err |= clear_user_proc(&to->cw, sizeof(to->cw)); -+ err |= clear_user_proc(&to->sw, sizeof(to->sw)); -+ err |= clear_user_proc(&to->tag, sizeof(to->tag)); -+ err |= clear_user_proc(&to->ipoff, sizeof(to->ipoff)); -+ err |= clear_user_proc(&to->cssel, sizeof(to->cssel)); -+ err |= clear_user_proc(&to->dataoff, sizeof(to->dataoff)); -+ err |= clear_user_proc(&to->datasel, sizeof(to->datasel)); -+ err |= clear_user_proc(&to->_st, sizeof(to->_st)); -+ } -+ else { -+ err |= copy_to_user_proc(&to->cw, &from->cw, sizeof(to->cw)); -+ err |= copy_to_user_proc(&to->sw, &from->sw, sizeof(to->sw)); -+ err |= copy_to_user_proc(&to->tag, &from->tag, -+ sizeof(to->tag)); -+ err |= copy_to_user_proc(&to->ipoff, &from->ipoff, -+ sizeof(to->ipoff)); -+ err |= copy_to_user_proc(&to->cssel,& from->cssel, -+ sizeof(to->cssel)); -+ err |= copy_to_user_proc(&to->dataoff, &from->dataoff, -+ sizeof(to->dataoff)); -+ err |= copy_to_user_proc(&to->datasel, &from->datasel, -+ sizeof(to->datasel)); -+ err |= copy_to_user_proc(to->_st, from->_st, sizeof(to->_st)); -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/syscalls.c um/arch/um/sys-i386/syscalls.c ---- orig/arch/um/sys-i386/syscalls.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/syscalls.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/mman.h" -+#include "asm/uaccess.h" -+#include "asm/unistd.h" -+ -+/* -+ * Perform the select(nd, in, out, ex, tv) and mmap() system -+ * calls. Linux/i386 didn't use to be able to handle more than -+ * 4 system call parameters, so these system calls used a memory -+ * block for parameter passing.. -+ */ -+ -+struct mmap_arg_struct { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+extern int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset); -+ -+int old_mmap_i386(struct mmap_arg_struct *arg) -+{ -+ struct mmap_arg_struct a; -+ int err = -EFAULT; -+ -+ if (copy_from_user(&a, arg, sizeof(a))) -+ goto out; -+ -+ err = old_mmap(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -+ out: -+ return err; -+} -+ -+struct sel_arg_struct { -+ unsigned long n; -+ fd_set *inp, *outp, *exp; -+ struct timeval *tvp; -+}; -+ -+int old_select(struct sel_arg_struct *arg) -+{ -+ struct sel_arg_struct a; -+ -+ if (copy_from_user(&a, arg, sizeof(a))) -+ return -EFAULT; -+ /* sys_select() does the appropriate kernel locking */ -+ return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/sysrq.c um/arch/um/sys-i386/sysrq.c ---- orig/arch/um/sys-i386/sysrq.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/sysrq.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,30 @@ -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "linux/sched.h" -+#include "asm/ptrace.h" -+#include "sysrq.h" -+ -+void show_regs(struct pt_regs *regs) -+{ -+ printk("\n"); -+ printk("EIP: %04lx:[<%08lx>] CPU: %d %s", -+ 0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs), -+ smp_processor_id(), print_tainted()); -+ if (PT_REGS_CS(regs) & 3) -+ printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs), -+ PT_REGS_SP(regs)); -+ printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs), -+ print_tainted()); -+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", -+ PT_REGS_EAX(regs), PT_REGS_EBX(regs), -+ PT_REGS_ECX(regs), -+ PT_REGS_EDX(regs)); -+ printk("ESI: %08lx EDI: %08lx EBP: %08lx", -+ PT_REGS_ESI(regs), PT_REGS_EDI(regs), -+ PT_REGS_EBP(regs)); -+ printk(" DS: %04lx ES: %04lx\n", -+ 0xffff & PT_REGS_DS(regs), -+ 0xffff & PT_REGS_ES(regs)); -+ -+ show_trace((unsigned long *) ®s); -+} -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/time.c um/arch/um/sys-i386/time.c ---- orig/arch/um/sys-i386/time.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/time.c 2003-11-10 01:46:49.000000000 -0500 -@@ -0,0 +1,24 @@ -+/* -+ * sys-i386/time.c -+ * Created 25.9.2002 Sapan Bhatia -+ * -+ */ -+ -+unsigned long long time_stamp(void) -+{ -+ unsigned long low, high; -+ -+ asm("rdtsc" : "=a" (low), "=d" (high)); -+ return((((unsigned long long) high) << 32) + low); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/Makefile um/arch/um/sys-i386/util/Makefile ---- orig/arch/um/sys-i386/util/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/util/Makefile 2003-11-08 08:26:40.000000000 -0500 -@@ -0,0 +1,28 @@ -+EXE = mk_sc mk_thread -+ -+include $(TOPDIR)/Rules.make -+ -+all : $(EXE) -+ -+mk_sc : mk_sc.o -+ $(HOSTCC) -o mk_sc mk_sc.o -+ -+mk_sc.o : mk_sc.c -+ $(HOSTCC) -c $< -+ -+mk_thread : mk_thread_user.o mk_thread_kern.o -+ $(HOSTCC) -o mk_thread mk_thread_user.o mk_thread_kern.o -+ -+mk_thread_user.o : mk_thread_user.c -+ $(HOSTCC) -c $< -+ -+mk_thread_kern.o : mk_thread_kern.c -+ $(HOSTCC) $(CFLAGS) -c $< -+ -+clean : -+ $(RM) $(EXE) *.o -+ -+archmrproper : clean -+ -+fastdep : -+ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/mk_sc.c um/arch/um/sys-i386/util/mk_sc.c ---- orig/arch/um/sys-i386/util/mk_sc.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/util/mk_sc.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,52 @@ -+#include <stdio.h> -+#include <signal.h> -+#include <linux/stddef.h> -+ -+#define SC_OFFSET(name, field) \ -+ printf("#define " name "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ -+ offsetof(struct sigcontext, field)) -+ -+#define SC_FP_OFFSET(name, field) \ -+ printf("#define " name \ -+ "(sc) *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ -+ offsetof(struct _fpstate, field)) -+ -+#define SC_FP_OFFSET_PTR(name, field, type) \ -+ printf("#define " name \ -+ "(sc) ((" type " *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ -+ offsetof(struct _fpstate, field)) -+ -+int main(int argc, char **argv) -+{ -+ SC_OFFSET("SC_IP", eip); -+ SC_OFFSET("SC_SP", esp); -+ SC_OFFSET("SC_FS", fs); -+ SC_OFFSET("SC_GS", gs); -+ SC_OFFSET("SC_DS", ds); -+ SC_OFFSET("SC_ES", es); -+ SC_OFFSET("SC_SS", ss); -+ SC_OFFSET("SC_CS", cs); -+ SC_OFFSET("SC_EFLAGS", eflags); -+ SC_OFFSET("SC_EAX", eax); -+ SC_OFFSET("SC_EBX", ebx); -+ SC_OFFSET("SC_ECX", ecx); -+ SC_OFFSET("SC_EDX", edx); -+ SC_OFFSET("SC_EDI", edi); -+ SC_OFFSET("SC_ESI", esi); -+ SC_OFFSET("SC_EBP", ebp); -+ SC_OFFSET("SC_TRAPNO", trapno); -+ SC_OFFSET("SC_ERR", err); -+ SC_OFFSET("SC_CR2", cr2); -+ SC_OFFSET("SC_FPSTATE", fpstate); -+ SC_OFFSET("SC_SIGMASK", oldmask); -+ SC_FP_OFFSET("SC_FP_CW", cw); -+ SC_FP_OFFSET("SC_FP_SW", sw); -+ SC_FP_OFFSET("SC_FP_TAG", tag); -+ SC_FP_OFFSET("SC_FP_IPOFF", ipoff); -+ SC_FP_OFFSET("SC_FP_CSSEL", cssel); -+ SC_FP_OFFSET("SC_FP_DATAOFF", dataoff); -+ SC_FP_OFFSET("SC_FP_DATASEL", datasel); -+ SC_FP_OFFSET_PTR("SC_FP_ST", _st, "struct _fpstate"); -+ SC_FP_OFFSET_PTR("SC_FXSR_ENV", _fxsr_env, "void"); -+ return(0); -+} -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/mk_thread_kern.c um/arch/um/sys-i386/util/mk_thread_kern.c ---- orig/arch/um/sys-i386/util/mk_thread_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/util/mk_thread_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,22 @@ -+#include "linux/config.h" -+#include "linux/stddef.h" -+#include "linux/sched.h" -+ -+extern void print_head(void); -+extern void print_constant_ptr(char *name, int value); -+extern void print_constant(char *name, char *type, int value); -+extern void print_tail(void); -+ -+#define THREAD_OFFSET(field) offsetof(struct task_struct, thread.field) -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_constant_ptr("TASK_DEBUGREGS", THREAD_OFFSET(arch.debugregs)); -+#ifdef CONFIG_MODE_TT -+ print_constant("TASK_EXTERN_PID", "int", THREAD_OFFSET(mode.tt.extern_pid)); -+#endif -+ print_tail(); -+ return(0); -+} -+ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/mk_thread_user.c um/arch/um/sys-i386/util/mk_thread_user.c ---- orig/arch/um/sys-i386/util/mk_thread_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-i386/util/mk_thread_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,30 @@ -+#include <stdio.h> -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_thread\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __UM_THREAD_H\n"); -+ printf("#define __UM_THREAD_H\n"); -+ printf("\n"); -+} -+ -+void print_constant_ptr(char *name, int value) -+{ -+ printf("#define %s(task) ((unsigned long *) " -+ "&(((char *) (task))[%d]))\n", name, value); -+} -+ -+void print_constant(char *name, char *type, int value) -+{ -+ printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, -+ value); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -diff -Naur -X ../exclude-files orig/arch/um/sys-ia64/Makefile um/arch/um/sys-ia64/Makefile ---- orig/arch/um/sys-ia64/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ia64/Makefile 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,26 @@ -+OBJ = sys.o -+ -+OBJS = -+ -+all: $(OBJ) -+ -+$(OBJ): $(OBJS) -+ rm -f $@ -+ $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ -+clean: -+ rm -f $(OBJS) -+ -+fastdep: -+ -+archmrproper: -+ -+archclean: -+ rm -f link.ld -+ @$(MAKEBOOT) clean -+ -+archdep: -+ @$(MAKEBOOT) dep -+ -+modules: -+ -+include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/Makefile um/arch/um/sys-ppc/Makefile ---- orig/arch/um/sys-ppc/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ppc/Makefile 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,80 @@ -+OBJ = sys.o -+ -+.S.o: -+ $(CC) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ -+OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \ -+ ptrace_user.o sysrq.o -+ -+EXTRA_AFLAGS := -DCONFIG_ALL_PPC -I. -I$(TOPDIR)/arch/ppc/kernel -+ -+all: $(OBJ) -+ -+$(OBJ): $(OBJS) -+ rm -f $@ -+ $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ -+ -+ptrace_user.o: ptrace_user.c -+ $(CC) -D__KERNEL__ $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+sigcontext.o: sigcontext.c -+ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+semaphore.c: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+checksum.S: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/lib/$@ $@ -+ -+mk_defs.c: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+ppc_defs.head: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+ppc_defs.h: mk_defs.c ppc_defs.head \ -+ $(TOPDIR)/include/asm-ppc/mmu.h \ -+ $(TOPDIR)/include/asm-ppc/processor.h \ -+ $(TOPDIR)/include/asm-ppc/pgtable.h \ -+ $(TOPDIR)/include/asm-ppc/ptrace.h -+# $(CC) $(CFLAGS) -S mk_defs.c -+ cp ppc_defs.head ppc_defs.h -+# for bk, this way we can write to the file even if it's not checked out -+ echo '#define THREAD 608' >> ppc_defs.h -+ echo '#define PT_REGS 8' >> ppc_defs.h -+ echo '#define CLONE_VM 256' >> ppc_defs.h -+# chmod u+w ppc_defs.h -+# grep '^#define' mk_defs.s >> ppc_defs.h -+# rm mk_defs.s -+ -+# the asm link is horrible, and breaks the other targets. This is also -+# not going to work with parallel makes. -+ -+checksum.o: checksum.S -+ rm -f asm -+ ln -s $(TOPDIR)/include/asm-ppc asm -+ $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ rm -f asm -+ -+misc.o: misc.S ppc_defs.h -+ rm -f asm -+ ln -s $(TOPDIR)/include/asm-ppc asm -+ $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ rm -f asm -+ -+clean: -+ rm -f $(OBJS) -+ rm -f ppc_defs.h -+ rm -f checksum.S semaphore.c mk_defs.c -+ -+fastdep: -+ -+dep: -+ -+modules: -+ -+include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/misc.S um/arch/um/sys-ppc/misc.S ---- orig/arch/um/sys-ppc/misc.S 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ppc/misc.S 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,116 @@ -+/* -+ * This file contains miscellaneous low-level functions. -+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) -+ * -+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) -+ * and Paul Mackerras. -+ * -+ * A couple of functions stolen from arch/ppc/kernel/misc.S for UML -+ * by Chris Emerson. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ */ -+ -+#include <linux/config.h> -+#include <asm/processor.h> -+#include "ppc_asm.h" -+ -+#if defined(CONFIG_4xx) || defined(CONFIG_8xx) -+#define CACHE_LINE_SIZE 16 -+#define LG_CACHE_LINE_SIZE 4 -+#define MAX_COPY_PREFETCH 1 -+#elif !defined(CONFIG_PPC64BRIDGE) -+#define CACHE_LINE_SIZE 32 -+#define LG_CACHE_LINE_SIZE 5 -+#define MAX_COPY_PREFETCH 4 -+#else -+#define CACHE_LINE_SIZE 128 -+#define LG_CACHE_LINE_SIZE 7 -+#define MAX_COPY_PREFETCH 1 -+#endif /* CONFIG_4xx || CONFIG_8xx */ -+ -+ .text -+ -+/* -+ * Clear a page using the dcbz instruction, which doesn't cause any -+ * memory traffic (except to write out any cache lines which get -+ * displaced). This only works on cacheable memory. -+ */ -+_GLOBAL(clear_page) -+ li r0,4096/CACHE_LINE_SIZE -+ mtctr r0 -+#ifdef CONFIG_8xx -+ li r4, 0 -+1: stw r4, 0(r3) -+ stw r4, 4(r3) -+ stw r4, 8(r3) -+ stw r4, 12(r3) -+#else -+1: dcbz 0,r3 -+#endif -+ addi r3,r3,CACHE_LINE_SIZE -+ bdnz 1b -+ blr -+ -+/* -+ * Copy a whole page. We use the dcbz instruction on the destination -+ * to reduce memory traffic (it eliminates the unnecessary reads of -+ * the destination into cache). This requires that the destination -+ * is cacheable. -+ */ -+#define COPY_16_BYTES \ -+ lwz r6,4(r4); \ -+ lwz r7,8(r4); \ -+ lwz r8,12(r4); \ -+ lwzu r9,16(r4); \ -+ stw r6,4(r3); \ -+ stw r7,8(r3); \ -+ stw r8,12(r3); \ -+ stwu r9,16(r3) -+ -+_GLOBAL(copy_page) -+ addi r3,r3,-4 -+ addi r4,r4,-4 -+ li r5,4 -+ -+#ifndef CONFIG_8xx -+#if MAX_COPY_PREFETCH > 1 -+ li r0,MAX_COPY_PREFETCH -+ li r11,4 -+ mtctr r0 -+11: dcbt r11,r4 -+ addi r11,r11,CACHE_LINE_SIZE -+ bdnz 11b -+#else /* MAX_COPY_PREFETCH == 1 */ -+ dcbt r5,r4 -+ li r11,CACHE_LINE_SIZE+4 -+#endif /* MAX_COPY_PREFETCH */ -+#endif /* CONFIG_8xx */ -+ -+ li r0,4096/CACHE_LINE_SIZE -+ mtctr r0 -+1: -+#ifndef CONFIG_8xx -+ dcbt r11,r4 -+ dcbz r5,r3 -+#endif -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 32 -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 64 -+ COPY_16_BYTES -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 128 -+ COPY_16_BYTES -+ COPY_16_BYTES -+ COPY_16_BYTES -+ COPY_16_BYTES -+#endif -+#endif -+#endif -+ bdnz 1b -+ blr -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/miscthings.c um/arch/um/sys-ppc/miscthings.c ---- orig/arch/um/sys-ppc/miscthings.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ppc/miscthings.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,53 @@ -+#include "linux/threads.h" -+#include "linux/stddef.h" // for NULL -+#include "linux/elf.h" // for AT_NULL -+ -+/* The following function nicked from arch/ppc/kernel/process.c and -+ * adapted slightly */ -+/* -+ * XXX ld.so expects the auxiliary table to start on -+ * a 16-byte boundary, so we have to find it and -+ * move it up. :-( -+ */ -+void shove_aux_table(unsigned long sp) -+{ -+ int argc; -+ char *p; -+ unsigned long e; -+ unsigned long aux_start, offset; -+ -+ argc = *(int *)sp; -+ sp += sizeof(int) + (argc + 1) * sizeof(char *); -+ /* skip over the environment pointers */ -+ do { -+ p = *(char **)sp; -+ sp += sizeof(char *); -+ } while (p != NULL); -+ aux_start = sp; -+ /* skip to the end of the auxiliary table */ -+ do { -+ e = *(unsigned long *)sp; -+ sp += 2 * sizeof(unsigned long); -+ } while (e != AT_NULL); -+ offset = ((aux_start + 15) & ~15) - aux_start; -+ if (offset != 0) { -+ do { -+ sp -= sizeof(unsigned long); -+ e = *(unsigned long *)sp; -+ *(unsigned long *)(sp + offset) = e; -+ } while (sp > aux_start); -+ } -+} -+/* END stuff taken from arch/ppc/kernel/process.c */ -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/ptrace.c um/arch/um/sys-ppc/ptrace.c ---- orig/arch/um/sys-ppc/ptrace.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ppc/ptrace.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,28 @@ -+#include "linux/sched.h" -+#include "asm/ptrace.h" -+ -+int putreg(struct task_struct *child, unsigned long regno, -+ unsigned long value) -+{ -+ child->thread.process_regs.regs[regno >> 2] = value; -+ return 0; -+} -+ -+unsigned long getreg(struct task_struct *child, unsigned long regno) -+{ -+ unsigned long retval = ~0UL; -+ -+ retval &= child->thread.process_regs.regs[regno >> 2]; -+ return retval; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/ptrace_user.c um/arch/um/sys-ppc/ptrace_user.c ---- orig/arch/um/sys-ppc/ptrace_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ppc/ptrace_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,40 @@ -+#include <sys/ptrace.h> -+#include <errno.h> -+#include <asm/ptrace.h> -+#include "sysdep/ptrace.h" -+ -+int ptrace_getregs(long pid, unsigned long *regs_out) -+{ -+ int i; -+ for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { -+ errno = 0; -+ regs_out->regs[i] = ptrace(PTRACE_PEEKUSER, pid, i*4, 0); -+ if (errno) { -+ return -errno; -+ } -+ } -+ return 0; -+} -+ -+int ptrace_setregs(long pid, unsigned long *regs_in) -+{ -+ int i; -+ for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { -+ if (i != 34 /* FIXME: PT_ORIG_R3 */ && i <= PT_MQ) { -+ if (ptrace(PTRACE_POKEUSER, pid, i*4, regs_in->regs[i]) < 0) { -+ return -errno; -+ } -+ } -+ } -+ return 0; -+} -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/sigcontext.c um/arch/um/sys-ppc/sigcontext.c ---- orig/arch/um/sys-ppc/sigcontext.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ppc/sigcontext.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,15 @@ -+#include "asm/ptrace.h" -+#include "asm/sigcontext.h" -+#include "sysdep/ptrace.h" -+#include "user_util.h" -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/sysrq.c um/arch/um/sys-ppc/sysrq.c ---- orig/arch/um/sys-ppc/sysrq.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/sys-ppc/sysrq.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "asm/ptrace.h" -+#include "sysrq.h" -+ -+void show_regs(struct pt_regs_subarch *regs) -+{ -+ printk("\n"); -+ printk("show_regs(): insert regs here.\n"); -+#if 0 -+ printk("\n"); -+ printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs, regs->eip, -+ smp_processor_id()); -+ if (regs->xcs & 3) -+ printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp); -+ printk(" EFLAGS: %08lx\n", regs->eflags); -+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", -+ regs->eax, regs->ebx, regs->ecx, regs->edx); -+ printk("ESI: %08lx EDI: %08lx EBP: %08lx", -+ regs->esi, regs->edi, regs->ebp); -+ printk(" DS: %04x ES: %04x\n", -+ 0xffff & regs->xds, 0xffff & regs->xes); -+#endif -+ -+ show_trace(®s->gpr[1]); -+} -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/util/Makefile um/arch/um/util/Makefile ---- orig/arch/um/util/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/util/Makefile 2003-11-08 02:51:30.000000000 -0500 -@@ -0,0 +1,26 @@ -+ALL = mk_task mk_constants -+ -+all : $(ALL) -+ -+mk_task : mk_task_user.o mk_task_kern.o -+ $(HOSTCC) -o mk_task mk_task_user.o mk_task_kern.o -+ -+mk_task_user.o : mk_task_user.c -+ $(HOSTCC) -c $< -+ -+mk_task_kern.o : mk_task_kern.c -+ $(HOSTCC) $(CFLAGS) -c $< -+ -+mk_constants : mk_constants_user.o mk_constants_kern.o -+ $(HOSTCC) -o mk_constants mk_constants_user.o mk_constants_kern.o -+ -+mk_constants_user.o : mk_constants_user.c -+ $(HOSTCC) -c $< -+ -+mk_constants_kern.o : mk_constants_kern.c -+ $(HOSTCC) $(CFLAGS) -c $< -+ -+clean : -+ $(RM) $(ALL) *.o *~ -+ -+archmrproper : clean -diff -Naur -X ../exclude-files orig/arch/um/util/mk_constants_kern.c um/arch/um/util/mk_constants_kern.c ---- orig/arch/um/util/mk_constants_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/util/mk_constants_kern.c 2004-01-10 06:54:51.000000000 -0500 -@@ -0,0 +1,25 @@ -+#include "linux/kernel.h" -+#include "linux/stringify.h" -+#include "asm/page.h" -+ -+extern void print_head(void); -+extern void print_constant_str(char *name, char *value); -+extern void print_constant_int(char *name, int value); -+extern void print_tail(void); -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); -+ -+ print_constant_str("UM_KERN_EMERG", KERN_EMERG); -+ print_constant_str("UM_KERN_ALERT", KERN_ALERT); -+ print_constant_str("UM_KERN_CRIT", KERN_CRIT); -+ print_constant_str("UM_KERN_ERR", KERN_ERR); -+ print_constant_str("UM_KERN_WARNING", KERN_WARNING); -+ print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); -+ print_constant_str("UM_KERN_INFO", KERN_INFO); -+ print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); -+ print_tail(); -+ return(0); -+} -diff -Naur -X ../exclude-files orig/arch/um/util/mk_constants_user.c um/arch/um/util/mk_constants_user.c ---- orig/arch/um/util/mk_constants_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/util/mk_constants_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,28 @@ -+#include <stdio.h> -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_constants\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __UM_CONSTANTS_H\n"); -+ printf("#define __UM_CONSTANTS_H\n"); -+ printf("\n"); -+} -+ -+void print_constant_str(char *name, char *value) -+{ -+ printf("#define %s \"%s\"\n", name, value); -+} -+ -+void print_constant_int(char *name, int value) -+{ -+ printf("#define %s %d\n", name, value); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -diff -Naur -X ../exclude-files orig/arch/um/util/mk_task_kern.c um/arch/um/util/mk_task_kern.c ---- orig/arch/um/util/mk_task_kern.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/util/mk_task_kern.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,17 @@ -+#include "linux/sched.h" -+#include "linux/stddef.h" -+ -+extern void print(char *name, char *type, int offset); -+extern void print_ptr(char *name, char *type, int offset); -+extern void print_head(void); -+extern void print_tail(void); -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_ptr("TASK_REGS", "union uml_pt_regs", -+ offsetof(struct task_struct, thread.regs)); -+ print("TASK_PID", "int", offsetof(struct task_struct, pid)); -+ print_tail(); -+ return(0); -+} -diff -Naur -X ../exclude-files orig/arch/um/util/mk_task_user.c um/arch/um/util/mk_task_user.c ---- orig/arch/um/util/mk_task_user.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/arch/um/util/mk_task_user.c 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,30 @@ -+#include <stdio.h> -+ -+void print(char *name, char *type, int offset) -+{ -+ printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, -+ offset); -+} -+ -+void print_ptr(char *name, char *type, int offset) -+{ -+ printf("#define %s(task) ((%s *) &(((char *) (task))[%d]))\n", name, type, -+ offset); -+} -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_task\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __TASK_H\n"); -+ printf("#define __TASK_H\n"); -+ printf("\n"); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -diff -Naur -X ../exclude-files orig/CREDITS um/CREDITS ---- orig/CREDITS 2003-12-16 22:16:23.000000000 -0500 -+++ um/CREDITS 2003-12-16 22:17:22.000000000 -0500 -@@ -434,6 +434,7 @@ - E: lars@nocrew.org - W: http://lars.nocrew.org/ - D: dsp56k device driver -+D: ptrace proxy in user mode kernel port - S: Kopmansg 2 - S: 411 13 Goteborg - S: Sweden -@@ -719,7 +720,7 @@ - E: jdike@karaya.com - W: http://user-mode-linux.sourceforge.net - D: User mode kernel port --S: RR1 Box 67C -+S: 375 Tubbs Hill Rd - S: Deering NH 03244 - S: USA - -diff -Naur -X ../exclude-files orig/Documentation/Configure.help um/Documentation/Configure.help ---- orig/Documentation/Configure.help 2003-12-16 22:16:23.000000000 -0500 -+++ um/Documentation/Configure.help 2003-12-16 22:17:22.000000000 -0500 -@@ -15821,6 +15821,63 @@ - The module will be called speedtch.o. If you want to compile it as - a module, say M here and read <file:Documentation/modules.txt>. - -+Support for /proc/mm -+CONFIG_PROC_MM -+ Enables support for address space separation through /proc/mm. -+ A host kernel needs to have this enabled in order for UML to -+ run in skas mode. UML kernels do not need to have this option -+ unless they will host sub-UMLs. -+ -+ If you don't know what this does just say Y. -+ -+Separate Kernel Address Space support -+CONFIG_MODE_SKAS -+ This option controls whether skas (separate kernel address space) -+ support is compiled in. If you have applied the skas patch to the -+ host and enabled support for /proc/mm in the host kernel, then you -+ certainly want to say Y here (and consider saying N to -+ CONFIG_MODE_TT). Otherwise, it is safe to say Y. Disabling this -+ option will shrink the UML binary slightly. -+ -+Tracing thread support -+CONFIG_MODE_TT -+ This option controls whether tracing thread support is compiled -+ into UML. Normally, this should be set to Y. If you intend to -+ use only skas mode (and the host has the skas patch applied to it), -+ then it is OK to say N here. -+ -+Force a static link -+CONFIG_STATIC_LINK -+ If CONFIG_MODE_TT is disabled, then this option gives you the ability -+ to force a static link of UML. Normally, if only skas mode is built -+ in to UML, it will be linked as a shared binary. This is inconvenient -+ for use in a chroot jail. So, if you intend to run UML inside a -+ chroot, and you disable CONFIG_MODE_TT, you probably want to say Y -+ here. -+ -+2G/2G host address space split -+CONFIG_HOST_2G_2G -+ Most Linux machines are configured so that the kernel occupies the -+ upper 1G of the 4G address space and processes use the lower 3G. -+ However, some machine are configured with a 2G/2G split, with the -+ kernel occupying the upper 2G and processes using the lower 2G. -+ -+ To allow UML to run on a such host you have to say Y here. N should be -+ a safe choice most of the time. -+ -+Kernel stack size order -+CONFIG_KERNEL_STACK_ORDER -+ This option determines the size of UML kernel stacks. They will -+ be 1 << order pages. The default is OK unless you're running Valgrind -+ on UML, in which case, set this to 3. -+ -+UML ubd block driver -+CONFIG_BLK_DEV_UBD -+ The User-Mode Linux port includes a driver called UBD which will let -+ you access arbitrary files on the host computer as block devices. -+ Unless you know that you do not need such virtual block devices say -+ Y here. -+ - CONFIG_USB_GADGET - USB is a master/slave protocol, organized with one master - host (such as a PC) controlling up to 127 peripheral devices. -@@ -15905,17 +15962,15 @@ - - Always do synchronous disk IO for UBD - CONFIG_BLK_DEV_UBD_SYNC -- The User-Mode Linux port includes a driver called UBD which will let -- you access arbitrary files on the host computer as block devices. -- Writes to such a block device are not immediately written to the -- host's disk; this may cause problems if, for example, the User-Mode -- Linux 'Virtual Machine' uses a journalling file system and the host -- computer crashes. -+ Writes to the virtual block device are not immediately written to the host's -+ disk; this may cause problems if, for example, the User-Mode Linux -+ 'Virtual Machine' uses a journalling filesystem and the host computer -+ crashes. - - Synchronous operation (i.e. always writing data to the host's disk - immediately) is configurable on a per-UBD basis by using a special - kernel command line option. Alternatively, you can say Y here to -- turn on synchronous operation by default for all block. -+ turn on synchronous operation by default for all block devices. - - If you're running a journalling file system (like reiserfs, for - example) in your virtual machine, you will want to say Y here. If -@@ -15927,6 +15982,7 @@ - CONFIG_PT_PROXY - This option enables a debugging interface which allows gdb to debug - the kernel without needing to actually attach to kernel threads. -+ CONFIG_XTERM_CHAN must be enabled in order to enable CONFIG_PT_PROXY. - If you want to do kernel debugging, say Y here; otherwise say N. - - Management console -@@ -15959,7 +16015,7 @@ - This option allows developers to retrieve coverage data from a UML - session. - -- See <http://user-mode-linux.sourceforge.net/gcov.html> for more -+ See <http://user-mode-linux.sourceforge.net/gprof.html> for more - details. - - If you're involved in UML kernel development and want to use gcov, -@@ -15996,6 +16052,19 @@ - If you'd like to be able to work with files stored on the host, - say Y or M here; otherwise say N. - -+HoneyPot ProcFS -+CONFIG_HPPFS -+ hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc -+ entries to be overridden, removed, or fabricated from the host. -+ Its purpose is to allow a UML to appear to be a physical machine -+ by removing or changing anything in /proc which gives away the -+ identity of a UML. -+ -+ See <http://user-mode-linux.sf.net/hppfs.html> for more information. -+ -+ You only need this if you are setting up a UML honeypot. Otherwise, -+ it is safe to say 'N' here. -+ - Example IO Memory driver - CONFIG_MMAPPER - The User-Mode Linux port can provide support for IO Memory -@@ -16011,6 +16080,21 @@ - If you'd like to be able to provide a simulated IO port space for - User-Mode Linux processes, say Y. If unsure, say N. - -+Anonymous Memory support -+CONFIG_DEV_ANON -+ Don't ask. Just say Y. -+ -+Support for software watchdog inside UML -+CONFIG_UML_WATCHDOG -+ Support for a virtual hardware watchdog. It's safe to say N here. -+ -+COW block device -+CONFIG_COW -+ This is a layered driver which sits above two other block devices. -+ One is read-only, and the other is a read-write layer which stores -+ all changes. This provides the illusion that the read-only layer -+ can be mounted read-write and changed. -+ - Virtual Serial Line - CONFIG_SSL - The User-Mode Linux environment allows you to create virtual serial -@@ -16121,26 +16205,197 @@ - - SLIP transport - CONFIG_UML_NET_SLIP -- The Slip User-Mode Linux network transport allows a running UML to -+ The slip User-Mode Linux network transport allows a running UML to - network with its host over a point-to-point link. Unlike Ethertap, - which can carry any Ethernet frame (and hence even non-IP packets), -- the Slip transport can only carry IP packets. -+ the slip transport can only carry IP packets. - -- To use this, your host must support Slip devices. -+ To use this, your host must support slip devices. - - For more information, see - <http://user-mode-linux.sourceforge.net/networking.html>. That site -- has examples of the UML command line to use to enable Slip -+ has examples of the UML command line to use to enable slip - networking, and details of a few quirks with it. - -- The Ethertap Transport is preferred over Slip because of its -- limitation. If you prefer Slip, however, say Y here. Otherwise -+ The Ethertap Transport is preferred over slip because of its -+ limitations. If you prefer slip, however, say Y here. Otherwise - choose the Multicast transport (to network multiple UMLs on - multiple hosts), Ethertap (to network with the host and the - outside world), and/or the Daemon transport (to network multiple - UMLs on a single host). You may choose more than one without - conflict. If you don't need UML networking, say N. - -+SLiRP transport -+CONFIG_UML_NET_SLIRP -+ The SLiRP User-Mode Linux network transport allows a running UML -+ to network by invoking a program that can handle SLIP encapsulated -+ packets. This is commonly (but not limited to) the application -+ known as SLiRP, a program that can re-socket IP packets back onto -+ the host on which it is run. Only IP packets are supported, -+ unlike other network transports that can handle all Ethernet -+ frames. In general, slirp allows the UML the same IP connectivity -+ to the outside world that the host user is permitted, and unlike -+ other transports, SLiRP works without the need of root level -+ privleges, setuid binaries, or SLIP devices on the host. This -+ also means not every type of connection is possible, but most -+ situations can be accomodated with carefully crafted slirp -+ commands that can be passed along as part of the network device's -+ setup string. The effect of this transport on the UML is similar -+ that of a host behind a firewall that masquerades all network -+ connections passing through it (but is less secure). -+ -+ To use this you should first have slirp compiled somewhere -+ accessible on the host, and have read its documentation. If you -+ don't need UML networking, say N. -+ -+ Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" -+ -+pcap transport -+CONFIG_UML_NET_PCAP -+ The pcap transport makes a pcap packet stream on the host look -+ like an ethernet device inside UML. This is useful for making -+ UML act as a network monitor for the host. You must have libcap -+ installed in order to build the pcap transport into UML. -+ -+ For more information, see -+ <http://user-mode-linux.sourceforge.net/networking.html> That site -+ has examples of the UML command line to use to enable this option. -+ -+ If you intend to use UML as a network monitor for the host, say -+ Y here. Otherwise, say N. -+ -+Default main console channel initialization -+CONFIG_CON_ZERO_CHAN -+ This is the string describing the channel to which the main console -+ will be attached by default. This value can be overridden from the -+ command line. The default value is "fd:0,fd:1", which attaches the -+ main console to stdin and stdout. -+ It is safe to leave this unchanged. -+ -+Default console channel initialization -+CONFIG_CON_CHAN -+ This is the string describing the channel to which all consoles -+ except the main console will be attached by default. This value can -+ be overridden from the command line. The default value is "xterm", -+ which brings them up in xterms. -+ It is safe to leave this unchanged, although you may wish to change -+ this if you expect the UML that you build to be run in environments -+ which don't have X or xterm available. -+ -+Default serial line channel initialization -+CONFIG_SSL_CHAN -+ This is the string describing the channel to which the serial lines -+ will be attached by default. This value can be overridden from the -+ command line. The default value is "pty", which attaches them to -+ traditional pseudo-terminals. -+ It is safe to leave this unchanged, although you may wish to change -+ this if you expect the UML that you build to be run in environments -+ which don't have a set of /dev/pty* devices. -+ -+Nesting level -+CONFIG_NEST_LEVEL -+ This is set to the number of layers of UMLs that this UML will be run -+ in. Normally, this is zero, meaning that it will run directly on the -+ host. Setting it to one will build a UML that can run inside a UML -+ that is running on the host. Generally, if you intend this UML to run -+ inside another UML, set CONFIG_NEST_LEVEL to one more than the host UML. -+ Note that if the hosting UML has its CONFIG_KERNEL_HALF_GIGS set to -+ greater than one, then the guest UML should have its CONFIG_NEST_LEVEL -+ set to the host's CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS. -+ Only change this if you are running nested UMLs. -+ -+Kernel address space size (in .5G units) -+CONFIG_KERNEL_HALF_GIGS -+ This determines the amount of address space that UML will allocate for -+ its own, measured in half Gigabyte units. The default is 1. -+ Change this only if you need to boot UML with an unusually large amount -+ of physical memory. -+ -+UML sound support -+CONFIG_UML_SOUND -+ This option enables UML sound support. If enabled, it will pull in -+ soundcore and the UML hostaudio relay, which acts as a intermediary -+ between the host's dsp and mixer devices and the UML sound system. -+ It is safe to say 'Y' here. -+ -+UML SMP support -+CONFIG_UML_SMP -+ This option enables UML SMP support. UML implements virtual SMP by -+ allowing as many processes to run simultaneously on the host as -+ there are virtual processors configured. Obviously, if the host is -+ a uniprocessor, those processes will timeshare, but, inside UML, -+ will appear to be running simultaneously. If the host is a -+ multiprocessor, then UML processes may run simultaneously, depending -+ on the host scheduler. -+ CONFIG_SMP will be set to whatever this option is set to. -+ It is safe to leave this unchanged. -+ -+file descriptor channel support -+CONFIG_FD_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to already set up file descriptors. Generally, the main -+ console is attached to file descriptors 0 and 1 (stdin and stdout), -+ so it would be wise to leave this enabled unless you intend to -+ attach it to some other host device. -+ -+null device channel support -+CONFIG_NULL_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to a device similar to /dev/null. Data written to it disappears -+ and there is never any data to be read. -+ -+port channel support -+CONFIG_PORT_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host portals. They may be accessed with 'telnet <host> -+ <port number>'. Any number of consoles and serial lines may be -+ attached to a single portal, although what UML device you get when -+ you telnet to that portal will be unpredictable. -+ It is safe to say 'Y' here. -+ -+pty channel support -+CONFIG_PTY_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host pseudo-terminals. Access to both traditional -+ pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled -+ with this option. The assignment of UML devices to host devices -+ will be announced in the kernel message log. -+ It is safe to say 'Y' here. -+ -+tty channel support -+CONFIG_TTY_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host terminals. Access to both virtual consoles -+ (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and -+ /dev/pts/*) are controlled by this option. -+ It is safe to say 'Y' here. -+ -+xterm channel support -+CONFIG_XTERM_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to xterms. Each UML device so assigned will be brought up in -+ its own xterm. -+ If you disable this option, then CONFIG_PT_PROXY will be disabled as -+ well, since UML's gdb currently requires an xterm. -+ It is safe to say 'Y' here. -+ -+tty logging -+CONFIG_TTY_LOG -+ This option enables logging of all data going through pseudo-terminals -+ to the host. This is primarily useful for honeypots, where you want -+ secure keystroke logging that can't be detected or disabled by root. -+ Say 'N' unless you are setting up a UML honeypot or otherwise know that -+ you want this option. -+ -+UML real-time clock support -+CONFIG_UML_REAL_TIME_CLOCK -+ This option ties the UML clock to the host clock, so that time passes at -+ the same rate as on the host, regardless of how much CPU time the UML is -+ getting. This should normally be enabled. The exception would be if you're -+ debugging UML. In this case, time spent staring at the debugger with UML -+ stopped will cause lots of timer ticks to be backed up, and UML will spent -+ lots of time calling the timer when it is finally continued. -+ - Microtek USB scanner support - CONFIG_USB_MICROTEK - Say Y here if you want support for the Microtek X6USB and -diff -Naur -X ../exclude-files orig/drivers/char/Makefile um/drivers/char/Makefile ---- orig/drivers/char/Makefile 2003-12-16 22:16:26.000000000 -0500 -+++ um/drivers/char/Makefile 2003-12-16 22:17:25.000000000 -0500 -@@ -109,6 +109,12 @@ - endif - endif - -+ifeq ($(ARCH),um) -+ KEYMAP = -+ KEYBD = -+ CONSOLE = -+endif -+ - ifeq ($(ARCH),sh) - KEYMAP = - KEYBD = -diff -Naur -X ../exclude-files orig/drivers/char/mem.c um/drivers/char/mem.c ---- orig/drivers/char/mem.c 2003-12-16 22:16:27.000000000 -0500 -+++ um/drivers/char/mem.c 2004-01-05 11:23:32.000000000 -0500 -@@ -220,7 +220,8 @@ - ssize_t read = 0; - ssize_t virtr = 0; - char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ -- -+ -+ p = (unsigned long) __va(p); - if (p < (unsigned long) high_memory) { - read = count; - if (count > (unsigned long) high_memory - p) -@@ -269,7 +270,7 @@ - } - free_page((unsigned long)kbuf); - } -- *ppos = p; -+ *ppos = __pa((void *) p); - return virtr + read; - } - -@@ -664,6 +665,8 @@ - write: write_full, - }; - -+extern struct file_operations anon_file_operations; -+ - static int memory_open(struct inode * inode, struct file * filp) - { - switch (MINOR(inode->i_rdev)) { -@@ -693,6 +696,9 @@ - case 9: - filp->f_op = &urandom_fops; - break; -+ case 10: -+ filp->f_op = &anon_file_operations; -+ break; - default: - return -ENXIO; - } -@@ -719,7 +725,8 @@ - {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, - {7, "full", S_IRUGO | S_IWUGO, &full_fops}, - {8, "random", S_IRUGO | S_IWUSR, &random_fops}, -- {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops} -+ {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops}, -+ {10, "anon", S_IRUGO | S_IWUSR, &anon_file_operations}, - }; - int i; - -diff -Naur -X ../exclude-files orig/drivers/char/tty_io.c um/drivers/char/tty_io.c ---- orig/drivers/char/tty_io.c 2003-12-16 22:16:27.000000000 -0500 -+++ um/drivers/char/tty_io.c 2003-12-16 22:17:25.000000000 -0500 -@@ -649,6 +649,23 @@ - wake_up_interruptible(&tty->write_wait); - } - -+#ifdef CONFIG_TTY_LOG -+ -+int (*open_log)(void *, void *) = NULL; -+int (*write_log)(int, const char *, int, void *, int) = NULL; -+void (*close_log)(int, void *) = NULL; -+ -+void register_tty_logger(int (*opener)(void *, void *), -+ int (*writer)(int, const char *, int, void *, int), -+ void (*closer)(int, void *)) -+{ -+ open_log = opener; -+ write_log = writer; -+ close_log = closer; -+} -+ -+#endif -+ - static ssize_t tty_read(struct file * file, char * buf, size_t count, - loff_t *ppos) - { -@@ -689,8 +706,13 @@ - else - i = -EIO; - unlock_kernel(); -- if (i > 0) -+ if (i > 0){ - inode->i_atime = CURRENT_TIME; -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd >= 0) && (write_log != NULL)) -+ (*write_log)(tty->log_fd, buf, i, tty, 1); -+#endif -+ } - return i; - } - -@@ -744,6 +766,10 @@ - if (written) { - file->f_dentry->d_inode->i_mtime = CURRENT_TIME; - ret = written; -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd >= 0) && (write_log != NULL)) -+ (*write_log)(tty->log_fd, buf - ret, ret, tty, 0); -+#endif - } - up(&tty->atomic_write); - return ret; -@@ -971,6 +997,7 @@ - goto release_mem_out; - } - } -+ - goto success; - - /* -@@ -1290,6 +1317,11 @@ - run_task_queue(&tq_timer); - flush_scheduled_tasks(); - -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd >= 0) && (close_log != NULL)) -+ (*close_log)(tty->log_fd, tty); -+#endif -+ - /* - * The release_mem function takes care of the details of clearing - * the slots and preserving the termios structure. -@@ -1448,6 +1480,11 @@ - nr_warns++; - } - } -+ -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd < 0) && (open_log != NULL)) -+ tty->log_fd = (*open_log)(tty, current->tty); -+#endif - return 0; - } - -@@ -2047,6 +2084,9 @@ - spin_lock_init(&tty->read_lock); - INIT_LIST_HEAD(&tty->tty_files); - INIT_TQUEUE(&tty->SAK_tq, 0, 0); -+#ifdef CONFIG_TTY_LOG -+ tty->log_fd = -1; -+#endif - } - - /* -diff -Naur -X ../exclude-files orig/drivers/net/setup.c um/drivers/net/setup.c ---- orig/drivers/net/setup.c 2002-09-15 12:13:19.000000000 -0400 -+++ um/drivers/net/setup.c 2003-10-21 03:26:07.000000000 -0400 -@@ -28,7 +28,6 @@ - extern int lmc_setup(void); - - extern int madgemc_probe(void); --extern int uml_net_probe(void); - - /* Pad device name to IFNAMSIZ=16. F.e. __PAD6 is string of 9 zeros. */ - #define __PAD6 "\0\0\0\0\0\0\0\0\0" -@@ -103,9 +102,6 @@ - #ifdef CONFIG_MADGEMC - {madgemc_probe, 0}, - #endif --#ifdef CONFIG_UML_NET -- {uml_net_probe, 0}, --#endif - - {NULL, 0}, - }; -diff -Naur -X ../exclude-files orig/fs/bad_inode.c um/fs/bad_inode.c ---- orig/fs/bad_inode.c 2002-08-21 11:47:27.000000000 -0400 -+++ um/fs/bad_inode.c 2003-10-21 03:26:07.000000000 -0400 -@@ -83,6 +83,7 @@ - - void make_bad_inode(struct inode * inode) - { -+ inode->i_state = 0; - inode->i_mode = S_IFREG; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &bad_inode_ops; -diff -Naur -X ../exclude-files orig/include/asm-i386/hardirq.h um/include/asm-i386/hardirq.h ---- orig/include/asm-i386/hardirq.h 2004-01-11 22:46:12.000000000 -0500 -+++ um/include/asm-i386/hardirq.h 2004-02-14 06:28:59.000000000 -0500 -@@ -4,6 +4,7 @@ - #include <linux/config.h> - #include <linux/threads.h> - #include <linux/irq.h> -+#include <asm/processor.h> /* for cpu_relax */ - - /* assembly code in softirq.h is sensitive to the offsets of these fields */ - typedef struct { -diff -Naur -X ../exclude-files orig/include/asm-um/a.out.h um/include/asm-um/a.out.h ---- orig/include/asm-um/a.out.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/a.out.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,20 @@ -+#ifndef __UM_A_OUT_H -+#define __UM_A_OUT_H -+ -+#include "linux/config.h" -+#include "asm/arch/a.out.h" -+#include "choose-mode.h" -+ -+#undef STACK_TOP -+ -+extern unsigned long stacksizelim; -+ -+extern unsigned long host_task_size; -+ -+#define STACK_ROOM (stacksizelim) -+ -+extern int honeypot; -+#define STACK_TOP \ -+ CHOOSE_MODE((honeypot ? host_task_size : task_size), task_size) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/archparam-i386.h um/include/asm-um/archparam-i386.h ---- orig/include/asm-um/archparam-i386.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/archparam-i386.h 2003-12-16 08:50:39.000000000 -0500 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_ARCHPARAM_I386_H -+#define __UM_ARCHPARAM_I386_H -+ -+/********* Bits for asm-um/elf.h ************/ -+ -+#include "user.h" -+ -+#define ELF_PLATFORM "i586" -+ -+#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) -+ -+typedef struct user_i387_struct elf_fpregset_t; -+typedef unsigned long elf_greg_t; -+ -+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) -+typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -+ -+#define ELF_DATA ELFDATA2LSB -+#define ELF_ARCH EM_386 -+ -+#define ELF_PLAT_INIT(regs, load_addr) do { \ -+ PT_REGS_EBX(regs) = 0; \ -+ PT_REGS_ECX(regs) = 0; \ -+ PT_REGS_EDX(regs) = 0; \ -+ PT_REGS_ESI(regs) = 0; \ -+ PT_REGS_EDI(regs) = 0; \ -+ PT_REGS_EBP(regs) = 0; \ -+ PT_REGS_EAX(regs) = 0; \ -+} while(0) -+ -+/* Shamelessly stolen from include/asm-i386/elf.h */ -+ -+#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ -+ pr_reg[0] = PT_REGS_EBX(regs); \ -+ pr_reg[1] = PT_REGS_ECX(regs); \ -+ pr_reg[2] = PT_REGS_EDX(regs); \ -+ pr_reg[3] = PT_REGS_ESI(regs); \ -+ pr_reg[4] = PT_REGS_EDI(regs); \ -+ pr_reg[5] = PT_REGS_EBP(regs); \ -+ pr_reg[6] = PT_REGS_EAX(regs); \ -+ pr_reg[7] = PT_REGS_DS(regs); \ -+ pr_reg[8] = PT_REGS_ES(regs); \ -+ /* fake once used fs and gs selectors? */ \ -+ pr_reg[9] = PT_REGS_DS(regs); \ -+ pr_reg[10] = PT_REGS_DS(regs); \ -+ pr_reg[11] = PT_REGS_SYSCALL_NR(regs); \ -+ pr_reg[12] = PT_REGS_IP(regs); \ -+ pr_reg[13] = PT_REGS_CS(regs); \ -+ pr_reg[14] = PT_REGS_EFLAGS(regs); \ -+ pr_reg[15] = PT_REGS_SP(regs); \ -+ pr_reg[16] = PT_REGS_SS(regs); \ -+} while(0); -+ -+/********* Bits for asm-um/delay.h **********/ -+ -+typedef unsigned long um_udelay_t; -+ -+/********* Nothing for asm-um/hardirq.h **********/ -+ -+/********* Nothing for asm-um/hw_irq.h **********/ -+ -+/********* Nothing for asm-um/string.h **********/ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/archparam-ppc.h um/include/asm-um/archparam-ppc.h ---- orig/include/asm-um/archparam-ppc.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/archparam-ppc.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,41 @@ -+#ifndef __UM_ARCHPARAM_PPC_H -+#define __UM_ARCHPARAM_PPC_H -+ -+/********* Bits for asm-um/elf.h ************/ -+ -+#define ELF_PLATFORM (0) -+ -+#define ELF_ET_DYN_BASE (0x08000000) -+ -+/* the following stolen from asm-ppc/elf.h */ -+#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */ -+#define ELF_NFPREG 33 /* includes fpscr */ -+/* General registers */ -+typedef unsigned long elf_greg_t; -+typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -+ -+/* Floating point registers */ -+typedef double elf_fpreg_t; -+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; -+ -+#define ELF_DATA ELFDATA2MSB -+#define ELF_ARCH EM_PPC -+ -+/********* Bits for asm-um/delay.h **********/ -+ -+typedef unsigned int um_udelay_t; -+ -+/********* Bits for asm-um/hw_irq.h **********/ -+ -+struct hw_interrupt_type; -+ -+/********* Bits for asm-um/hardirq.h **********/ -+ -+#define irq_enter(cpu, irq) hardirq_enter(cpu) -+#define irq_exit(cpu, irq) hardirq_exit(cpu) -+ -+/********* Bits for asm-um/string.h **********/ -+ -+#define __HAVE_ARCH_STRRCHR -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/arch-signal-i386.h um/include/asm-um/arch-signal-i386.h ---- orig/include/asm-um/arch-signal-i386.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/arch-signal-i386.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_ARCH_SIGNAL_I386_H -+#define __UM_ARCH_SIGNAL_I386_H -+ -+struct arch_signal_context { -+ unsigned long extrasigs[_NSIG_WORDS]; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/atomic.h um/include/asm-um/atomic.h ---- orig/include/asm-um/atomic.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/atomic.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_ATOMIC_H -+#define __UM_ATOMIC_H -+ -+#include "asm/arch/atomic.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/bitops.h um/include/asm-um/bitops.h ---- orig/include/asm-um/bitops.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/bitops.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BITOPS_H -+#define __UM_BITOPS_H -+ -+#include "asm/arch/bitops.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/boot.h um/include/asm-um/boot.h ---- orig/include/asm-um/boot.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/boot.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BOOT_H -+#define __UM_BOOT_H -+ -+#include "asm/arch/boot.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/bugs.h um/include/asm-um/bugs.h ---- orig/include/asm-um/bugs.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/bugs.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BUGS_H -+#define __UM_BUGS_H -+ -+void check_bugs(void); -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/byteorder.h um/include/asm-um/byteorder.h ---- orig/include/asm-um/byteorder.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/byteorder.h 2004-01-16 03:32:58.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BYTEORDER_H -+#define __UM_BYTEORDER_H -+ -+#include "asm/arch/byteorder.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/cache.h um/include/asm-um/cache.h ---- orig/include/asm-um/cache.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/cache.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_CACHE_H -+#define __UM_CACHE_H -+ -+#define L1_CACHE_BYTES 32 -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/checksum.h um/include/asm-um/checksum.h ---- orig/include/asm-um/checksum.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/checksum.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_CHECKSUM_H -+#define __UM_CHECKSUM_H -+ -+#include "sysdep/checksum.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/cobalt.h um/include/asm-um/cobalt.h ---- orig/include/asm-um/cobalt.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/cobalt.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_COBALT_H -+#define __UM_COBALT_H -+ -+#include "asm/arch/cobalt.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/current.h um/include/asm-um/current.h ---- orig/include/asm-um/current.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/current.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_CURRENT_H -+#define __UM_CURRENT_H -+ -+#ifndef __ASSEMBLY__ -+ -+#include "linux/config.h" -+#include "asm/page.h" -+ -+struct task_struct; -+ -+#define CURRENT_TASK(dummy) (((unsigned long) &dummy) & \ -+ (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER)) -+ -+#define current ({ int dummy; (struct task_struct *) CURRENT_TASK(dummy); }) -+ -+#endif /* __ASSEMBLY__ */ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/delay.h um/include/asm-um/delay.h ---- orig/include/asm-um/delay.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/delay.h 2003-12-16 22:26:58.000000000 -0500 -@@ -0,0 +1,7 @@ -+#ifndef __UM_DELAY_H -+#define __UM_DELAY_H -+ -+#include "asm/arch/delay.h" -+#include "asm/archparam.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/desc.h um/include/asm-um/desc.h ---- orig/include/asm-um/desc.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/desc.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_DESC_H -+#define __UM_DESC_H -+ -+#include "asm/arch/desc.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/div64.h um/include/asm-um/div64.h ---- orig/include/asm-um/div64.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/div64.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef _UM_DIV64_H -+#define _UM_DIV64_H -+ -+#include "asm/arch/div64.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/dma.h um/include/asm-um/dma.h ---- orig/include/asm-um/dma.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/dma.h 2004-01-16 03:32:59.000000000 -0500 -@@ -0,0 +1,10 @@ -+#ifndef __UM_DMA_H -+#define __UM_DMA_H -+ -+#include "asm/io.h" -+ -+extern unsigned long uml_physmem; -+ -+#define MAX_DMA_ADDRESS (uml_physmem) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/elf.h um/include/asm-um/elf.h ---- orig/include/asm-um/elf.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/elf.h 2003-12-16 22:29:20.000000000 -0500 -@@ -0,0 +1,18 @@ -+#ifndef __UM_ELF_H -+#define __UM_ELF_H -+ -+#include "asm/archparam.h" -+ -+#define ELF_HWCAP (0) -+ -+#define SET_PERSONALITY(ex, ibcs2) do ; while(0) -+ -+#define ELF_EXEC_PAGESIZE 4096 -+ -+#define elf_check_arch(x) (1) -+ -+#define ELF_CLASS ELFCLASS32 -+ -+#define USE_ELF_CORE_DUMP -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/errno.h um/include/asm-um/errno.h ---- orig/include/asm-um/errno.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/errno.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_ERRNO_H -+#define __UM_ERRNO_H -+ -+#include "asm/arch/errno.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/fcntl.h um/include/asm-um/fcntl.h ---- orig/include/asm-um/fcntl.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/fcntl.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_FCNTL_H -+#define __UM_FCNTL_H -+ -+#include "asm/arch/fcntl.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/fixmap.h um/include/asm-um/fixmap.h ---- orig/include/asm-um/fixmap.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/fixmap.h 2004-02-14 06:29:00.000000000 -0500 -@@ -0,0 +1,89 @@ -+#ifndef __UM_FIXMAP_H -+#define __UM_FIXMAP_H -+ -+#include <linux/config.h> -+#include <asm/kmap_types.h> -+ -+/* -+ * Here we define all the compile-time 'special' virtual -+ * addresses. The point is to have a constant address at -+ * compile time, but to set the physical address only -+ * in the boot process. We allocate these special addresses -+ * from the end of virtual memory (0xfffff000) backwards. -+ * Also this lets us do fail-safe vmalloc(), we -+ * can guarantee that these special addresses and -+ * vmalloc()-ed addresses never overlap. -+ * -+ * these 'compile-time allocated' memory buffers are -+ * fixed-size 4k pages. (or larger if used with an increment -+ * highger than 1) use fixmap_set(idx,phys) to associate -+ * physical memory with fixmap indices. -+ * -+ * TLB entries of such buffers will not be flushed across -+ * task switches. -+ */ -+ -+/* -+ * on UP currently we will have no trace of the fixmap mechanizm, -+ * no page table allocations, etc. This might change in the -+ * future, say framebuffers for the console driver(s) could be -+ * fix-mapped? -+ */ -+enum fixed_addresses { -+#ifdef CONFIG_HIGHMEM -+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ -+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -+#endif -+ __end_of_fixed_addresses -+}; -+ -+extern void __set_fixmap (enum fixed_addresses idx, -+ unsigned long phys, pgprot_t flags); -+ -+#define set_fixmap(idx, phys) \ -+ __set_fixmap(idx, phys, PAGE_KERNEL) -+/* -+ * Some hardware wants to get fixmapped without caching. -+ */ -+#define set_fixmap_nocache(idx, phys) \ -+ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) -+/* -+ * used by vmalloc.c. -+ * -+ * Leave one empty page between vmalloc'ed areas and -+ * the start of the fixmap, and leave one page empty -+ * at the top of mem.. -+ */ -+extern unsigned long get_kmem_end(void); -+ -+#define FIXADDR_TOP (get_kmem_end() - 0x2000) -+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -+ -+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -+ -+extern void __this_fixmap_does_not_exist(void); -+ -+/* -+ * 'index to address' translation. If anyone tries to use the idx -+ * directly without tranlation, we catch the bug with a NULL-deference -+ * kernel oops. Illegal ranges of incoming indices are caught too. -+ */ -+static inline unsigned long fix_to_virt(const unsigned int idx) -+{ -+ /* -+ * this branch gets completely eliminated after inlining, -+ * except when someone tries to use fixaddr indices in an -+ * illegal way. (such as mixing up address types or using -+ * out-of-range indices). -+ * -+ * If it doesn't get removed, the linker will complain -+ * loudly with a reasonably clear error message.. -+ */ -+ if (idx >= __end_of_fixed_addresses) -+ __this_fixmap_does_not_exist(); -+ -+ return __fix_to_virt(idx); -+} -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/floppy.h um/include/asm-um/floppy.h ---- orig/include/asm-um/floppy.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/floppy.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_FLOPPY_H -+#define __UM_FLOPPY_H -+ -+#include "asm/arch/floppy.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/hardirq.h um/include/asm-um/hardirq.h ---- orig/include/asm-um/hardirq.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/hardirq.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_HARDIRQ_H -+#define __UM_HARDIRQ_H -+ -+#include "asm/arch/hardirq.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/hdreg.h um/include/asm-um/hdreg.h ---- orig/include/asm-um/hdreg.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/hdreg.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_HDREG_H -+#define __UM_HDREG_H -+ -+#include "asm/arch/hdreg.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/highmem.h um/include/asm-um/highmem.h ---- orig/include/asm-um/highmem.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/highmem.h 2004-02-14 06:29:00.000000000 -0500 -@@ -0,0 +1,12 @@ -+#ifndef __UM_HIGHMEM_H -+#define __UM_HIGHMEM_H -+ -+#include "asm/page.h" -+#include "asm/fixmap.h" -+#include "asm/arch/highmem.h" -+ -+#undef PKMAP_BASE -+ -+#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/hw_irq.h um/include/asm-um/hw_irq.h ---- orig/include/asm-um/hw_irq.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/hw_irq.h 2003-12-16 22:26:55.000000000 -0500 -@@ -0,0 +1,10 @@ -+#ifndef _ASM_UM_HW_IRQ_H -+#define _ASM_UM_HW_IRQ_H -+ -+#include "asm/irq.h" -+#include "asm/archparam.h" -+ -+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) -+{} -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ide.h um/include/asm-um/ide.h ---- orig/include/asm-um/ide.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ide.h 2004-01-16 23:40:52.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IDE_H -+#define __UM_IDE_H -+ -+#include "asm/arch/ide.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/init.h um/include/asm-um/init.h ---- orig/include/asm-um/init.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/init.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,11 @@ -+#ifndef _UM_INIT_H -+#define _UM_INIT_H -+ -+#ifdef notdef -+#define __init -+#define __initdata -+#define __initfunc(__arginit) __arginit -+#define __cacheline_aligned -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ioctl.h um/include/asm-um/ioctl.h ---- orig/include/asm-um/ioctl.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ioctl.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IOCTL_H -+#define __UM_IOCTL_H -+ -+#include "asm/arch/ioctl.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ioctls.h um/include/asm-um/ioctls.h ---- orig/include/asm-um/ioctls.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ioctls.h 2003-11-07 02:10:43.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IOCTLS_H -+#define __UM_IOCTLS_H -+ -+#include "asm/arch/ioctls.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/io.h um/include/asm-um/io.h ---- orig/include/asm-um/io.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/io.h 2004-01-16 03:32:59.000000000 -0500 -@@ -0,0 +1,25 @@ -+#ifndef __UM_IO_H -+#define __UM_IO_H -+ -+#include "asm/page.h" -+ -+#define IO_SPACE_LIMIT 0xdeadbeef /* Sure hope nothing uses this */ -+ -+static inline int inb(unsigned long i) { return(0); } -+static inline void outb(char c, unsigned long i) { } -+ -+/* -+ * Change virtual addresses to physical addresses and vv. -+ * These are pretty trivial -+ */ -+static inline unsigned long virt_to_phys(volatile void * address) -+{ -+ return __pa((void *) address); -+} -+ -+static inline void * phys_to_virt(unsigned long address) -+{ -+ return __va(address); -+} -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ipcbuf.h um/include/asm-um/ipcbuf.h ---- orig/include/asm-um/ipcbuf.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ipcbuf.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IPCBUF_H -+#define __UM_IPCBUF_H -+ -+#include "asm/arch/ipcbuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ipc.h um/include/asm-um/ipc.h ---- orig/include/asm-um/ipc.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ipc.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IPC_H -+#define __UM_IPC_H -+ -+#include "asm/arch/ipc.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/irq.h um/include/asm-um/irq.h ---- orig/include/asm-um/irq.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/irq.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,22 @@ -+#ifndef __UM_IRQ_H -+#define __UM_IRQ_H -+ -+#define TIMER_IRQ 0 -+#define UMN_IRQ 1 -+#define CONSOLE_IRQ 2 -+#define CONSOLE_WRITE_IRQ 3 -+#define UBD_IRQ 4 -+#define UM_ETH_IRQ 5 -+#define SSL_IRQ 6 -+#define SSL_WRITE_IRQ 7 -+#define ACCEPT_IRQ 8 -+#define MCONSOLE_IRQ 9 -+#define WINCH_IRQ 10 -+#define SIGIO_WRITE_IRQ 11 -+#define TELNETD_IRQ 12 -+#define XTERM_IRQ 13 -+ -+#define LAST_IRQ XTERM_IRQ -+#define NR_IRQS (LAST_IRQ + 1) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/keyboard.h um/include/asm-um/keyboard.h ---- orig/include/asm-um/keyboard.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/keyboard.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_KEYBOARD_H -+#define __UM_KEYBOARD_H -+ -+#include "asm/arch/keyboard.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/kmap_types.h um/include/asm-um/kmap_types.h ---- orig/include/asm-um/kmap_types.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/kmap_types.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,11 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_KMAP_TYPES_H -+#define __UM_KMAP_TYPES_H -+ -+#include "asm/arch/kmap_types.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/linux_logo.h um/include/asm-um/linux_logo.h ---- orig/include/asm-um/linux_logo.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/linux_logo.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_LINUX_LOGO_H -+#define __UM_LINUX_LOGO_H -+ -+#include "asm/arch/linux_logo.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/locks.h um/include/asm-um/locks.h ---- orig/include/asm-um/locks.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/locks.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_LOCKS_H -+#define __UM_LOCKS_H -+ -+#include "asm/arch/locks.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mca_dma.h um/include/asm-um/mca_dma.h ---- orig/include/asm-um/mca_dma.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/mca_dma.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef mca___UM_DMA_H -+#define mca___UM_DMA_H -+ -+#include "asm/arch/mca_dma.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mman.h um/include/asm-um/mman.h ---- orig/include/asm-um/mman.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/mman.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MMAN_H -+#define __UM_MMAN_H -+ -+#include "asm/arch/mman.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mmu_context.h um/include/asm-um/mmu_context.h ---- orig/include/asm-um/mmu_context.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/mmu_context.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,72 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_MMU_CONTEXT_H -+#define __UM_MMU_CONTEXT_H -+ -+#include "linux/sched.h" -+#include "choose-mode.h" -+ -+#define get_mmu_context(task) do ; while(0) -+#define activate_context(tsk) do ; while(0) -+ -+static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) -+{ -+} -+ -+extern void switch_mm_skas(int mm_fd); -+ -+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, -+ struct task_struct *tsk, unsigned cpu) -+{ -+ if(prev != next){ -+ clear_bit(cpu, &prev->cpu_vm_mask); -+ set_bit(cpu, &next->cpu_vm_mask); -+ if(next != &init_mm) -+ CHOOSE_MODE((void) 0, -+ switch_mm_skas(next->context.skas.mm_fd)); -+ } -+} -+ -+static inline void enter_lazy_tlb(struct mm_struct *mm, -+ struct task_struct *tsk, unsigned cpu) -+{ -+} -+ -+extern int init_new_context_skas(struct task_struct *task, -+ struct mm_struct *mm); -+ -+static inline int init_new_context_tt(struct task_struct *task, -+ struct mm_struct *mm) -+{ -+ return(0); -+} -+ -+static inline int init_new_context(struct task_struct *task, -+ struct mm_struct *mm) -+{ -+ return(CHOOSE_MODE_PROC(init_new_context_tt, init_new_context_skas, -+ task, mm)); -+} -+ -+extern void destroy_context_skas(struct mm_struct *mm); -+ -+static inline void destroy_context(struct mm_struct *mm) -+{ -+ CHOOSE_MODE((void) 0, destroy_context_skas(mm)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/mmu.h um/include/asm-um/mmu.h ---- orig/include/asm-um/mmu.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/mmu.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MMU_H -+#define __MMU_H -+ -+#include "um_mmu.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/module.h um/include/asm-um/module.h ---- orig/include/asm-um/module.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/module.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MODULE_H -+#define __UM_MODULE_H -+ -+#include "asm/arch/module.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/msgbuf.h um/include/asm-um/msgbuf.h ---- orig/include/asm-um/msgbuf.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/msgbuf.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MSGBUF_H -+#define __UM_MSGBUF_H -+ -+#include "asm/arch/msgbuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mtrr.h um/include/asm-um/mtrr.h ---- orig/include/asm-um/mtrr.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/mtrr.h 2004-02-12 05:46:29.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MTRR_H -+#define __UM_MTRR_H -+ -+#include "asm/arch/mtrr.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/namei.h um/include/asm-um/namei.h ---- orig/include/asm-um/namei.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/namei.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_NAMEI_H -+#define __UM_NAMEI_H -+ -+#include "asm/arch/namei.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/page.h um/include/asm-um/page.h ---- orig/include/asm-um/page.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/page.h 2004-01-16 03:32:58.000000000 -0500 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PAGE_H -+#define __UM_PAGE_H -+ -+struct page; -+ -+#include "asm/arch/page.h" -+ -+#undef BUG -+#undef PAGE_BUG -+#undef __pa -+#undef __va -+#undef virt_to_page -+#undef VALID_PAGE -+#undef PAGE_OFFSET -+#undef KERNELBASE -+ -+extern unsigned long uml_physmem; -+ -+#define PAGE_OFFSET (uml_physmem) -+#define KERNELBASE PAGE_OFFSET -+ -+#ifndef __ASSEMBLY__ -+ -+extern void stop(void); -+ -+#define BUG() do { \ -+ panic("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ -+} while (0) -+ -+#define PAGE_BUG(page) do { \ -+ BUG(); \ -+} while (0) -+ -+#endif /* __ASSEMBLY__ */ -+ -+#define __va_space (8*1024*1024) -+ -+extern unsigned long to_phys(void *virt); -+extern void *to_virt(unsigned long phys); -+ -+#define __pa(virt) to_phys((void *) virt) -+#define __va(phys) to_virt((unsigned long) phys) -+ -+#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) -+ -+extern struct page *arch_validate(struct page *page, int mask, int order); -+#define HAVE_ARCH_VALIDATE -+ -+extern void arch_free_page(struct page *page, int order); -+#define HAVE_ARCH_FREE_PAGE -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/page_offset.h um/include/asm-um/page_offset.h ---- orig/include/asm-um/page_offset.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/page_offset.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1 @@ -+#define PAGE_OFFSET_RAW (uml_physmem) -diff -Naur -X ../exclude-files orig/include/asm-um/param.h um/include/asm-um/param.h ---- orig/include/asm-um/param.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/param.h 2004-02-13 02:26:52.000000000 -0500 -@@ -0,0 +1,22 @@ -+#ifndef _UM_PARAM_H -+#define _UM_PARAM_H -+ -+#define HZ 100 -+ -+#define EXEC_PAGESIZE 4096 -+ -+#ifndef NGROUPS -+#define NGROUPS 32 -+#endif -+ -+#ifndef NOGROUP -+#define NOGROUP (-1) -+#endif -+ -+#define MAXHOSTNAMELEN 64 /* max length of hostname */ -+ -+#ifdef __KERNEL__ -+# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/pci.h um/include/asm-um/pci.h ---- orig/include/asm-um/pci.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/pci.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_PCI_H -+#define __UM_PCI_H -+ -+#define PCI_DMA_BUS_IS_PHYS (1) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/pgalloc.h um/include/asm-um/pgalloc.h ---- orig/include/asm-um/pgalloc.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/pgalloc.h 2004-02-14 06:29:00.000000000 -0500 -@@ -0,0 +1,164 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PGALLOC_H -+#define __UM_PGALLOC_H -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "asm/fixmap.h" -+#include "choose-mode.h" -+ -+#define pgd_quicklist (current_cpu_data.pgd_quick) -+#define pmd_quicklist (current_cpu_data.pmd_quick) -+#define pte_quicklist (current_cpu_data.pte_quick) -+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -+ -+#define pmd_populate(mm, pmd, pte) set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) -+ -+/* -+ * Allocate and free page tables. -+ */ -+ -+static inline pgd_t *get_pgd_slow_tt(void) -+{ -+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); -+ -+ if (pgd) { -+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); -+ memcpy(pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); -+ } -+ return pgd; -+} -+ -+static inline pgd_t *get_pgd_slow_skas(void) -+{ -+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); -+ -+ if (pgd) -+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); -+ return pgd; -+} -+ -+static inline pgd_t *get_pgd_slow(void) -+{ -+ return(CHOOSE_MODE(get_pgd_slow_tt(), get_pgd_slow_skas())); -+} -+ -+static inline pgd_t *get_pgd_fast(void) -+{ -+ unsigned long *ret; -+ -+ ret = pgd_quicklist; -+ if (ret != NULL) { -+ pgd_quicklist = (unsigned long *)(*ret); -+ ret[0] = 0; -+ pgtable_cache_size--; -+ } else -+ ret = (unsigned long *)get_pgd_slow(); -+ return (pgd_t *)ret; -+} -+ -+static inline void free_pgd_fast(pgd_t *pgd) -+{ -+ *(unsigned long *)pgd = (unsigned long) pgd_quicklist; -+ pgd_quicklist = (unsigned long *) pgd; -+ pgtable_cache_size++; -+} -+ -+static inline void free_pgd_slow(pgd_t *pgd) -+{ -+ free_page((unsigned long)pgd); -+} -+ -+static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) -+{ -+ pte_t *pte; -+ -+ pte = (pte_t *) __get_free_page(GFP_KERNEL); -+ if (pte) -+ clear_page(pte); -+ return pte; -+} -+ -+static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) -+{ -+ unsigned long *ret; -+ -+ ret = (unsigned long *)pte_quicklist; -+ if (ret != NULL) { -+ pte_quicklist = (unsigned long *)(*ret); -+ ret[0] = ret[1]; -+ pgtable_cache_size--; -+ } -+ return (pte_t *)ret; -+} -+ -+static inline void pte_free_fast(pte_t *pte) -+{ -+ *(unsigned long *)pte = (unsigned long) pte_quicklist; -+ pte_quicklist = (unsigned long *) pte; -+ pgtable_cache_size++; -+} -+ -+static inline void pte_free_slow(pte_t *pte) -+{ -+ free_page((unsigned long)pte); -+} -+ -+#define pte_free(pte) pte_free_fast(pte) -+#define pgd_free(pgd) free_pgd_slow(pgd) -+#define pgd_alloc(mm) get_pgd_fast() -+ -+/* -+ * allocating and freeing a pmd is trivial: the 1-entry pmd is -+ * inside the pgd, so has no extra memory associated with it. -+ */ -+ -+#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -+#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -+#define pmd_free_slow(x) do { } while (0) -+#define pmd_free_fast(x) do { } while (0) -+#define pmd_free(x) do { } while (0) -+#define pgd_populate(mm, pmd, pte) BUG() -+ -+/* -+ * TLB flushing: -+ * -+ * - flush_tlb() flushes the current mm struct TLBs -+ * - flush_tlb_all() flushes all processes TLBs -+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's -+ * - flush_tlb_page(vma, vmaddr) flushes one page -+ * - flush_tlb_kernel_vm() flushes the kernel vm area -+ * - flush_tlb_range(mm, start, end) flushes a range of pages -+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables -+ */ -+ -+extern void flush_tlb_all(void); -+extern void flush_tlb_mm(struct mm_struct *mm); -+extern void flush_tlb_range(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -+extern void flush_tlb_kernel_vm(void); -+ -+static inline void flush_tlb_pgtables(struct mm_struct *mm, -+ unsigned long start, unsigned long end) -+{ -+} -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/pgtable.h um/include/asm-um/pgtable.h ---- orig/include/asm-um/pgtable.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/pgtable.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,413 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Derived from include/asm-i386/pgtable.h -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PGTABLE_H -+#define __UM_PGTABLE_H -+ -+#include "linux/sched.h" -+#include "asm/processor.h" -+#include "asm/page.h" -+ -+extern pgd_t swapper_pg_dir[1024]; -+ -+#define flush_cache_all() do ; while (0) -+#define flush_cache_mm(mm) do ; while (0) -+#define flush_cache_range(vma, start, end) do ; while (0) -+#define flush_cache_page(vma, vmaddr) do ; while (0) -+#define flush_page_to_ram(page) do ; while (0) -+#define flush_dcache_page(page) do ; while (0) -+#define flush_icache_range(from, to) do ; while (0) -+#define flush_icache_page(vma,pg) do ; while (0) -+#define flush_icache_user_range(vma,pg,adr,len) do ; while (0) -+ -+extern void __flush_tlb_one(unsigned long addr); -+ -+extern void pte_free(pte_t *pte); -+ -+extern void pgd_free(pgd_t *pgd); -+ -+extern int do_check_pgt_cache(int, int); -+ -+extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt, -+ pte_t *pte_out); -+ -+/* zero page used for uninitialized stuff */ -+extern unsigned long *empty_zero_page; -+ -+#define pgtable_cache_init() do ; while (0) -+ -+/* PMD_SHIFT determines the size of the area a second-level page table can map */ -+#define PMD_SHIFT 22 -+#define PMD_SIZE (1UL << PMD_SHIFT) -+#define PMD_MASK (~(PMD_SIZE-1)) -+ -+/* PGDIR_SHIFT determines what a third-level page table entry can map */ -+#define PGDIR_SHIFT 22 -+#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -+#define PGDIR_MASK (~(PGDIR_SIZE-1)) -+ -+/* -+ * entries per page directory level: the i386 is two-level, so -+ * we don't really have any PMD directory physically. -+ */ -+#define PTRS_PER_PTE 1024 -+#define PTRS_PER_PMD 1 -+#define PTRS_PER_PGD 1024 -+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -+#define FIRST_USER_PGD_NR 0 -+ -+#define pte_ERROR(e) \ -+ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) -+#define pmd_ERROR(e) \ -+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -+#define pgd_ERROR(e) \ -+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -+ -+/* -+ * pgd entries used up by user/kernel: -+ */ -+ -+#define USER_PGD_PTRS (TASK_SIZE >> PGDIR_SHIFT) -+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) -+ -+#ifndef __ASSEMBLY__ -+/* Just any arbitrary offset to the start of the vmalloc VM area: the -+ * current 8MB value just means that there will be a 8MB "hole" after the -+ * physical memory until the kernel virtual memory starts. That means that -+ * any out-of-bounds memory accesses will hopefully be caught. -+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced -+ * area for the same reason. ;) -+ */ -+ -+extern unsigned long end_iomem; -+ -+#define VMALLOC_OFFSET (__va_space) -+#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -+#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -+ -+#if CONFIG_HIGHMEM -+# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -+#else -+# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) -+#endif -+ -+#define _PAGE_PRESENT 0x001 -+#define _PAGE_NEWPAGE 0x002 -+#define _PAGE_PROTNONE 0x004 /* If not present */ -+#define _PAGE_RW 0x008 -+#define _PAGE_USER 0x010 -+#define _PAGE_ACCESSED 0x020 -+#define _PAGE_DIRTY 0x040 -+#define _PAGE_NEWPROT 0x080 -+ -+#define REGION_MASK 0xf0000000 -+#define REGION_SHIFT 28 -+ -+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) -+ -+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) -+ -+/* -+ * The i386 can't do page protection for execute, and considers that the same are read. -+ * Also, write permissions imply read permissions. This is the closest we can get.. -+ */ -+#define __P000 PAGE_NONE -+#define __P001 PAGE_READONLY -+#define __P010 PAGE_COPY -+#define __P011 PAGE_COPY -+#define __P100 PAGE_READONLY -+#define __P101 PAGE_READONLY -+#define __P110 PAGE_COPY -+#define __P111 PAGE_COPY -+ -+#define __S000 PAGE_NONE -+#define __S001 PAGE_READONLY -+#define __S010 PAGE_SHARED -+#define __S011 PAGE_SHARED -+#define __S100 PAGE_READONLY -+#define __S101 PAGE_READONLY -+#define __S110 PAGE_SHARED -+#define __S111 PAGE_SHARED -+ -+/* -+ * Define this if things work differently on an i386 and an i486: -+ * it will (on an i486) warn about kernel memory accesses that are -+ * done without a 'verify_area(VERIFY_WRITE,..)' -+ */ -+#undef TEST_VERIFY_AREA -+ -+/* page table for 0-4MB for everybody */ -+extern unsigned long pg0[1024]; -+ -+/* -+ * BAD_PAGETABLE is used when we need a bogus page-table, while -+ * BAD_PAGE is used for a bogus page. -+ * -+ * ZERO_PAGE is a global shared page that is always zero: used -+ * for zero-mapped memory areas etc.. -+ */ -+extern pte_t __bad_page(void); -+extern pte_t * __bad_pagetable(void); -+ -+#define BAD_PAGETABLE __bad_pagetable() -+#define BAD_PAGE __bad_page() -+ -+#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) -+ -+/* number of bits that fit into a memory pointer */ -+#define BITS_PER_PTR (8*sizeof(unsigned long)) -+ -+/* to align the pointer to a pointer address */ -+#define PTR_MASK (~(sizeof(void*)-1)) -+ -+/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ -+/* 64-bit machines, beware! SRB. */ -+#define SIZEOF_PTR_LOG2 2 -+ -+/* to find an entry in a page-table */ -+#define PAGE_PTR(address) \ -+((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) -+ -+#define pte_none(x) !(pte_val(x) & ~_PAGE_NEWPAGE) -+#define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) -+ -+#define pte_clear(xp) do { pte_val(*(xp)) = _PAGE_NEWPAGE; } while (0) -+ -+#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) -+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) -+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -+#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) -+ -+#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) -+#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) -+ -+/* -+ * The "pgd_xxx()" functions here are trivial for a folded two-level -+ * setup: the pgd is never bad, and a pmd always exists (as it's folded -+ * into the pgd entry) -+ */ -+static inline int pgd_none(pgd_t pgd) { return 0; } -+static inline int pgd_bad(pgd_t pgd) { return 0; } -+static inline int pgd_present(pgd_t pgd) { return 1; } -+static inline void pgd_clear(pgd_t * pgdp) { } -+ -+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -+ -+#define pte_page(pte) virt_to_page(__va(pte_val(pte))) -+#define pmd_page(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) -+ -+extern struct page *phys_to_page(const unsigned long phys); -+extern struct page *__virt_to_page(const unsigned long virt); -+#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) -+ -+static inline pte_t pte_mknewprot(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_NEWPROT; -+ return(pte); -+} -+ -+static inline pte_t pte_mknewpage(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_NEWPAGE; -+ return(pte); -+} -+ -+static inline void set_pte(pte_t *pteptr, pte_t pteval) -+{ -+ /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so -+ * fix_range knows to unmap it. _PAGE_NEWPROT is specific to -+ * mapped pages. -+ */ -+ *pteptr = pte_mknewpage(pteval); -+ if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); -+} -+ -+/* -+ * (pmds are folded into pgds so this doesnt get actually called, -+ * but the define is needed for a generic inline function.) -+ */ -+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) -+#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -+ -+/* -+ * The following only work if pte_present() is true. -+ * Undefined behaviour if not.. -+ */ -+static inline int pte_read(pte_t pte) -+{ -+ return((pte_val(pte) & _PAGE_USER) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_exec(pte_t pte){ -+ return((pte_val(pte) & _PAGE_USER) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_write(pte_t pte) -+{ -+ return((pte_val(pte) & _PAGE_RW) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } -+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -+static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; } -+static inline int pte_newprot(pte_t pte) -+{ -+ return(pte_present(pte) && (pte_val(pte) & _PAGE_NEWPROT)); -+} -+ -+static inline pte_t pte_rdprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_exprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkclean(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_DIRTY; -+ return(pte); -+} -+ -+static inline pte_t pte_mkold(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_ACCESSED; -+ return(pte); -+} -+ -+static inline pte_t pte_wrprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_RW; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkread(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkexec(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkdirty(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_DIRTY; -+ return(pte); -+} -+ -+static inline pte_t pte_mkyoung(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_ACCESSED; -+ return(pte); -+} -+ -+static inline pte_t pte_mkwrite(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_RW; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkuptodate(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_NEWPAGE; -+ if(pte_present(pte)) pte_val(pte) &= ~_PAGE_NEWPROT; -+ return(pte); -+} -+ -+extern unsigned long page_to_phys(struct page *page); -+ -+/* -+ * Conversion functions: convert a page and protection to a page entry, -+ * and a page entry and page directory to the page they refer to. -+ */ -+ -+extern pte_t mk_pte(struct page *page, pgprot_t pgprot); -+ -+/* This takes a physical page address that is used by the remapping -+ * functions -+ */ -+#define mk_pte_phys(phys, pgprot) \ -+ (pte_mknewpage(mk_pte(virt_to_page(__va(phys)), pgprot))) -+ -+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -+{ -+ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); -+ if(pte_present(pte)) pte = pte_mknewpage(pte_mknewprot(pte)); -+ return pte; -+} -+ -+/* to find an entry in a page-table-directory. */ -+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -+#define __pgd_offset(address) pgd_index(address) -+ -+/* to find an entry in a page-table-directory */ -+#define pgd_offset(mm, address) \ -+((mm)->pgd + ((address) >> PGDIR_SHIFT)) -+ -+/* to find an entry in a kernel page-table-directory */ -+#define pgd_offset_k(address) pgd_offset(&init_mm, address) -+ -+#define __pmd_offset(address) \ -+ (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -+ -+/* Find an entry in the second-level page table.. */ -+static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -+{ -+ return (pmd_t *) dir; -+} -+ -+/* Find an entry in the third-level page table.. */ -+#define pte_offset(pmd, address) \ -+ ((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2)))) -+ -+#define update_mmu_cache(vma,address,pte) do ; while (0) -+ -+/* Encode and de-code a swap entry */ -+#define SWP_TYPE(x) (((x).val >> 3) & 0x7f) -+#define SWP_OFFSET(x) ((x).val >> 10) -+ -+#define SWP_ENTRY(type, offset) \ -+ ((swp_entry_t) { ((type) << 3) | ((offset) << 10) }) -+#define pte_to_swp_entry(pte) \ -+ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) -+#define swp_entry_to_pte(x) ((pte_t) { (x).val }) -+ -+#define PageSkip(x) (0) -+#define kern_addr_valid(addr) (1) -+ -+#include <asm-generic/pgtable.h> -+ -+#endif -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/poll.h um/include/asm-um/poll.h ---- orig/include/asm-um/poll.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/poll.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_POLL_H -+#define __UM_POLL_H -+ -+#include "asm/arch/poll.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/posix_types.h um/include/asm-um/posix_types.h ---- orig/include/asm-um/posix_types.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/posix_types.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_POSIX_TYPES_H -+#define __UM_POSIX_TYPES_H -+ -+#include "asm/arch/posix_types.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/processor-generic.h um/include/asm-um/processor-generic.h ---- orig/include/asm-um/processor-generic.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/processor-generic.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,177 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PROCESSOR_GENERIC_H -+#define __UM_PROCESSOR_GENERIC_H -+ -+struct pt_regs; -+ -+struct task_struct; -+ -+#include "linux/config.h" -+#include "linux/signal.h" -+#include "asm/ptrace.h" -+#include "asm/siginfo.h" -+#include "choose-mode.h" -+ -+struct mm_struct; -+ -+#define current_text_addr() ((void *) 0) -+ -+#define cpu_relax() do ; while (0) -+ -+struct thread_struct { -+ int forking; -+ unsigned long kernel_stack; -+ int nsyscalls; -+ struct pt_regs regs; -+ unsigned long cr2; -+ int err; -+ unsigned long trap_no; -+ void *fault_addr; -+ void *fault_catcher; -+ struct task_struct *prev_sched; -+ unsigned long temp_stack; -+ void *exec_buf; -+ struct arch_thread arch; -+ union { -+#ifdef CONFIG_MODE_TT -+ struct { -+ int extern_pid; -+ int tracing; -+ int switch_pipe[2]; -+ int singlestep_syscall; -+ int vm_seq; -+ } tt; -+#endif -+#ifdef CONFIG_MODE_SKAS -+ struct { -+ void *switch_buf; -+ void *fork_buf; -+ int mm_count; -+ } skas; -+#endif -+ } mode; -+ struct { -+ int op; -+ union { -+ struct { -+ int pid; -+ } fork, exec; -+ struct { -+ int (*proc)(void *); -+ void *arg; -+ } thread; -+ struct { -+ void (*proc)(void *); -+ void *arg; -+ } cb; -+ } u; -+ } request; -+}; -+ -+#define INIT_THREAD \ -+{ \ -+ .forking = 0, \ -+ .kernel_stack = 0, \ -+ .nsyscalls = 0, \ -+ .regs = EMPTY_REGS, \ -+ .cr2 = 0, \ -+ .err = 0, \ -+ .fault_addr = NULL, \ -+ .prev_sched = NULL, \ -+ .temp_stack = 0, \ -+ .exec_buf = NULL, \ -+ .arch = INIT_ARCH_THREAD, \ -+ .request = { 0 } \ -+} -+ -+#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) -+ -+typedef struct { -+ unsigned long seg; -+} mm_segment_t; -+ -+extern struct task_struct *alloc_task_struct(void); -+extern void free_task_struct(struct task_struct *task); -+ -+#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) -+ -+extern void release_thread(struct task_struct *); -+extern int arch_kernel_thread(int (*fn)(void *), void * arg, -+ unsigned long flags); -+extern void dump_thread(struct pt_regs *regs, struct user *u); -+ -+extern unsigned long thread_saved_pc(struct thread_struct *t); -+ -+static inline void mm_copy_segments(struct mm_struct *from_mm, -+ struct mm_struct *new_mm) -+{ -+} -+ -+static inline void copy_segments(struct task_struct *p, -+ struct mm_struct *new_mm) -+{ -+} -+ -+static inline void release_segments(struct mm_struct *mm) -+{ -+} -+ -+#define init_task (init_task_union.task) -+#define init_stack (init_task_union.stack) -+ -+/* -+ * User space process size: 3GB (default). -+ */ -+extern unsigned long task_size; -+ -+#define TASK_SIZE (task_size) -+ -+/* This decides where the kernel will search for a free chunk of vm -+ * space during mmap's. -+ */ -+#define TASK_UNMAPPED_BASE (0x40000000) -+ -+extern void start_thread(struct pt_regs *regs, unsigned long entry, -+ unsigned long stack); -+ -+struct cpuinfo_um { -+ unsigned long loops_per_jiffy; -+ unsigned long *pgd_quick; -+ unsigned long *pmd_quick; -+ unsigned long *pte_quick; -+ unsigned long pgtable_cache_sz; -+ int ipi_pipe[2]; -+}; -+ -+extern struct cpuinfo_um boot_cpu_data; -+ -+#define my_cpu_data cpu_data[smp_processor_id()] -+ -+#ifdef CONFIG_SMP -+extern struct cpuinfo_um cpu_data[]; -+#define current_cpu_data cpu_data[smp_processor_id()] -+#else -+#define cpu_data (&boot_cpu_data) -+#define current_cpu_data boot_cpu_data -+#endif -+ -+#define KSTK_EIP(tsk) (PT_REGS_IP(&tsk->thread.regs)) -+#define KSTK_ESP(tsk) (PT_REGS_SP(&tsk->thread.regs)) -+#define get_wchan(p) (0) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/processor-i386.h um/include/asm-um/processor-i386.h ---- orig/include/asm-um/processor-i386.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/processor-i386.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PROCESSOR_I386_H -+#define __UM_PROCESSOR_I386_H -+ -+extern int cpu_has_xmm; -+extern int cpu_has_cmov; -+ -+struct arch_thread { -+ unsigned long debugregs[8]; -+ int debugregs_seq; -+}; -+ -+#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ -+ .debugregs_seq = 0 } -+ -+#include "asm/arch/user.h" -+ -+#include "asm/processor-generic.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/processor-ppc.h um/include/asm-um/processor-ppc.h ---- orig/include/asm-um/processor-ppc.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/processor-ppc.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,15 @@ -+#ifndef __UM_PROCESSOR_PPC_H -+#define __UM_PROCESSOR_PPC_H -+ -+#if defined(__ASSEMBLY__) -+ -+#define CONFIG_ALL_PPC -+#include "arch/processor.h" -+ -+#else -+ -+#include "asm/processor-generic.h" -+ -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ptrace-generic.h um/include/asm-um/ptrace-generic.h ---- orig/include/asm-um/ptrace-generic.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ptrace-generic.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PTRACE_GENERIC_H -+#define __UM_PTRACE_GENERIC_H -+ -+#ifndef __ASSEMBLY__ -+ -+#include "linux/config.h" -+ -+#include "asm/current.h" -+ -+#define pt_regs pt_regs_subarch -+#define show_regs show_regs_subarch -+ -+#include "asm/arch/ptrace.h" -+ -+#undef pt_regs -+#undef show_regs -+#undef user_mode -+#undef instruction_pointer -+ -+#include "sysdep/ptrace.h" -+#include "skas_ptrace.h" -+ -+struct pt_regs { -+ union uml_pt_regs regs; -+}; -+ -+#define EMPTY_REGS { regs : EMPTY_UML_PT_REGS } -+ -+#define PT_REGS_IP(r) UPT_IP(&(r)->regs) -+#define PT_REGS_SP(r) UPT_SP(&(r)->regs) -+ -+#define PT_REG(r, reg) UPT_REG(&(r)->regs, reg) -+#define PT_REGS_SET(r, reg, val) UPT_SET(&(r)->regs, reg, val) -+ -+#define PT_REGS_SET_SYSCALL_RETURN(r, res) \ -+ UPT_SET_SYSCALL_RETURN(&(r)->regs, res) -+#define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs) -+ -+#define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs) -+ -+#define PT_REGS_SC(r) UPT_SC(&(r)->regs) -+ -+struct task_struct; -+ -+extern unsigned long getreg(struct task_struct *child, int regno); -+extern int putreg(struct task_struct *child, int regno, unsigned long value); -+extern int get_fpregs(unsigned long buf, struct task_struct *child); -+extern int set_fpregs(unsigned long buf, struct task_struct *child); -+extern int get_fpxregs(unsigned long buf, struct task_struct *child); -+extern int set_fpxregs(unsigned long buf, struct task_struct *tsk); -+ -+extern void show_regs(struct pt_regs *regs); -+ -+#define INIT_TASK_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/ptrace-i386.h um/include/asm-um/ptrace-i386.h ---- orig/include/asm-um/ptrace-i386.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ptrace-i386.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PTRACE_I386_H -+#define __UM_PTRACE_I386_H -+ -+#include "sysdep/ptrace.h" -+#include "asm/ptrace-generic.h" -+ -+#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs) -+#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs) -+#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs) -+#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs) -+#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs) -+#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs) -+#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs) -+ -+#define PT_REGS_CS(r) UPT_CS(&(r)->regs) -+#define PT_REGS_SS(r) UPT_SS(&(r)->regs) -+#define PT_REGS_DS(r) UPT_DS(&(r)->regs) -+#define PT_REGS_ES(r) UPT_ES(&(r)->regs) -+#define PT_REGS_FS(r) UPT_FS(&(r)->regs) -+#define PT_REGS_GS(r) UPT_GS(&(r)->regs) -+ -+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) -+ -+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r) -+#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r) -+#define PT_FIX_EXEC_STACK(sp) do ; while(0) -+ -+#define user_mode(r) UPT_IS_USER(&(r)->regs) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/resource.h um/include/asm-um/resource.h ---- orig/include/asm-um/resource.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/resource.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_RESOURCE_H -+#define __UM_RESOURCE_H -+ -+#include "asm/arch/resource.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/rwlock.h um/include/asm-um/rwlock.h ---- orig/include/asm-um/rwlock.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/rwlock.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_RWLOCK_H -+#define __UM_RWLOCK_H -+ -+#include "asm/arch/rwlock.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/rwsem.h um/include/asm-um/rwsem.h ---- orig/include/asm-um/rwsem.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/rwsem.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,10 @@ -+#ifndef __UM_RWSEM_H__ -+#define __UM_RWSEM_H__ -+ -+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) -+#define __builtin_expect(exp,c) (exp) -+#endif -+ -+#include "asm/arch/rwsem.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/scatterlist.h um/include/asm-um/scatterlist.h ---- orig/include/asm-um/scatterlist.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/scatterlist.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SCATTERLIST_H -+#define __UM_SCATTERLIST_H -+ -+#include "asm/arch/scatterlist.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/segment.h um/include/asm-um/segment.h ---- orig/include/asm-um/segment.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/segment.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,4 @@ -+#ifndef __UM_SEGMENT_H -+#define __UM_SEGMENT_H -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/semaphore.h um/include/asm-um/semaphore.h ---- orig/include/asm-um/semaphore.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/semaphore.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SEMAPHORE_H -+#define __UM_SEMAPHORE_H -+ -+#include "asm/arch/semaphore.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sembuf.h um/include/asm-um/sembuf.h ---- orig/include/asm-um/sembuf.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/sembuf.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SEMBUF_H -+#define __UM_SEMBUF_H -+ -+#include "asm/arch/sembuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/serial.h um/include/asm-um/serial.h ---- orig/include/asm-um/serial.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/serial.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SERIAL_H -+#define __UM_SERIAL_H -+ -+#include "asm/arch/serial.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/shmbuf.h um/include/asm-um/shmbuf.h ---- orig/include/asm-um/shmbuf.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/shmbuf.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SHMBUF_H -+#define __UM_SHMBUF_H -+ -+#include "asm/arch/shmbuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/shmparam.h um/include/asm-um/shmparam.h ---- orig/include/asm-um/shmparam.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/shmparam.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SHMPARAM_H -+#define __UM_SHMPARAM_H -+ -+#include "asm/arch/shmparam.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sigcontext-generic.h um/include/asm-um/sigcontext-generic.h ---- orig/include/asm-um/sigcontext-generic.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/sigcontext-generic.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGCONTEXT_GENERIC_H -+#define __UM_SIGCONTEXT_GENERIC_H -+ -+#include "asm/arch/sigcontext.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sigcontext-i386.h um/include/asm-um/sigcontext-i386.h ---- orig/include/asm-um/sigcontext-i386.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/sigcontext-i386.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGCONTEXT_I386_H -+#define __UM_SIGCONTEXT_I386_H -+ -+#include "asm/sigcontext-generic.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sigcontext-ppc.h um/include/asm-um/sigcontext-ppc.h ---- orig/include/asm-um/sigcontext-ppc.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/sigcontext-ppc.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,10 @@ -+#ifndef __UM_SIGCONTEXT_PPC_H -+#define __UM_SIGCONTEXT_PPC_H -+ -+#define pt_regs sys_pt_regs -+ -+#include "asm/sigcontext-generic.h" -+ -+#undef pt_regs -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/siginfo.h um/include/asm-um/siginfo.h ---- orig/include/asm-um/siginfo.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/siginfo.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGINFO_H -+#define __UM_SIGINFO_H -+ -+#include "asm/arch/siginfo.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/signal.h um/include/asm-um/signal.h ---- orig/include/asm-um/signal.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/signal.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SIGNAL_H -+#define __UM_SIGNAL_H -+ -+#include "asm/arch/signal.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/smp.h um/include/asm-um/smp.h ---- orig/include/asm-um/smp.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/smp.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,19 @@ -+#ifndef __UM_SMP_H -+#define __UM_SMP_H -+ -+#ifdef CONFIG_SMP -+ -+#include "linux/config.h" -+#include "asm/current.h" -+ -+#define smp_processor_id() (current->processor) -+#define cpu_logical_map(n) (n) -+#define cpu_number_map(n) (n) -+#define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ -+extern int hard_smp_processor_id(void); -+extern unsigned long cpu_online_map; -+#define NO_PROC_ID -1 -+ -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/smplock.h um/include/asm-um/smplock.h ---- orig/include/asm-um/smplock.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/smplock.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SMPLOCK_H -+#define __UM_SMPLOCK_H -+ -+#include "asm/arch/smplock.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/socket.h um/include/asm-um/socket.h ---- orig/include/asm-um/socket.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/socket.h 2003-11-07 02:10:43.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SOCKET_H -+#define __UM_SOCKET_H -+ -+#include "asm/arch/socket.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sockios.h um/include/asm-um/sockios.h ---- orig/include/asm-um/sockios.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/sockios.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SOCKIOS_H -+#define __UM_SOCKIOS_H -+ -+#include "asm/arch/sockios.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/softirq.h um/include/asm-um/softirq.h ---- orig/include/asm-um/softirq.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/softirq.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,13 @@ -+#ifndef __UM_SOFTIRQ_H -+#define __UM_SOFTIRQ_H -+ -+#include "linux/smp.h" -+#include "asm/system.h" -+#include "asm/processor.h" -+ -+/* A gratuitous name change */ -+#define i386_bh_lock um_bh_lock -+#include "asm/arch/softirq.h" -+#undef i386_bh_lock -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/spinlock.h um/include/asm-um/spinlock.h ---- orig/include/asm-um/spinlock.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/spinlock.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,10 @@ -+#ifndef __UM_SPINLOCK_H -+#define __UM_SPINLOCK_H -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_SMP -+#include "asm/arch/spinlock.h" -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/statfs.h um/include/asm-um/statfs.h ---- orig/include/asm-um/statfs.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/statfs.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef _UM_STATFS_H -+#define _UM_STATFS_H -+ -+#include "asm/arch/statfs.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/stat.h um/include/asm-um/stat.h ---- orig/include/asm-um/stat.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/stat.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_STAT_H -+#define __UM_STAT_H -+ -+#include "asm/arch/stat.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/string.h um/include/asm-um/string.h ---- orig/include/asm-um/string.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/string.h 2004-01-16 03:32:58.000000000 -0500 -@@ -0,0 +1,7 @@ -+#ifndef __UM_STRING_H -+#define __UM_STRING_H -+ -+#include "asm/arch/string.h" -+#include "asm/archparam.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/system-generic.h um/include/asm-um/system-generic.h ---- orig/include/asm-um/system-generic.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/system-generic.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,50 @@ -+#ifndef __UM_SYSTEM_GENERIC_H -+#define __UM_SYSTEM_GENERIC_H -+ -+#include "asm/arch/system.h" -+ -+#undef prepare_to_switch -+#undef switch_to -+#undef __save_flags -+#undef save_flags -+#undef __restore_flags -+#undef restore_flags -+#undef __cli -+#undef __sti -+#undef cli -+#undef sti -+#undef local_irq_save -+#undef local_irq_restore -+#undef local_irq_disable -+#undef local_irq_enable -+ -+#define prepare_to_switch() do ; while(0) -+ -+void *_switch_to(void *prev, void *next); -+ -+#define switch_to(prev, next, last) prev = _switch_to(prev, next) -+ -+extern int get_signals(void); -+extern int set_signals(int enable); -+extern void block_signals(void); -+extern void unblock_signals(void); -+ -+#define local_irq_save(flags) do { (flags) = set_signals(0); } while(0) -+ -+#define local_irq_restore(flags) do { set_signals(flags); } while(0) -+ -+#define local_irq_enable() unblock_signals() -+#define local_irq_disable() block_signals() -+ -+#define __sti() unblock_signals() -+#define sti() unblock_signals() -+#define __cli() block_signals() -+#define cli() block_signals() -+ -+#define __save_flags(x) do { (x) = get_signals(); } while(0) -+#define save_flags(x) __save_flags(x) -+ -+#define __restore_flags(x) local_irq_restore(x) -+#define restore_flags(x) __restore_flags(x) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/system-i386.h um/include/asm-um/system-i386.h ---- orig/include/asm-um/system-i386.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/system-i386.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,39 @@ -+#ifndef __UM_SYSTEM_I386_H -+#define __UM_SYSTEM_I386_H -+ -+#include "asm/system-generic.h" -+ -+#define __HAVE_ARCH_CMPXCHG 1 -+ -+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, -+ unsigned long new, int size) -+{ -+ unsigned long prev; -+ switch (size) { -+ case 1: -+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" -+ : "=a"(prev) -+ : "q"(new), "m"(*__xg(ptr)), "0"(old) -+ : "memory"); -+ return prev; -+ case 2: -+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" -+ : "=a"(prev) -+ : "q"(new), "m"(*__xg(ptr)), "0"(old) -+ : "memory"); -+ return prev; -+ case 4: -+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" -+ : "=a"(prev) -+ : "q"(new), "m"(*__xg(ptr)), "0"(old) -+ : "memory"); -+ return prev; -+ } -+ return old; -+} -+ -+#define cmpxchg(ptr,o,n)\ -+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ -+ (unsigned long)(n),sizeof(*(ptr)))) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/system-ppc.h um/include/asm-um/system-ppc.h ---- orig/include/asm-um/system-ppc.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/system-ppc.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,12 @@ -+#ifndef __UM_SYSTEM_PPC_H -+#define __UM_SYSTEM_PPC_H -+ -+#define _switch_to _ppc_switch_to -+ -+#include "asm/arch/system.h" -+ -+#undef _switch_to -+ -+#include "asm/system-generic.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/termbits.h um/include/asm-um/termbits.h ---- orig/include/asm-um/termbits.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/termbits.h 2003-11-07 02:10:43.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TERMBITS_H -+#define __UM_TERMBITS_H -+ -+#include "asm/arch/termbits.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/termios.h um/include/asm-um/termios.h ---- orig/include/asm-um/termios.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/termios.h 2003-12-16 22:26:55.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TERMIOS_H -+#define __UM_TERMIOS_H -+ -+#include "asm/arch/termios.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/timex.h um/include/asm-um/timex.h ---- orig/include/asm-um/timex.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/timex.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,18 @@ -+#ifndef __UM_TIMEX_H -+#define __UM_TIMEX_H -+ -+#include "linux/time.h" -+ -+typedef unsigned long cycles_t; -+ -+#define cacheflush_time (0) -+ -+static inline cycles_t get_cycles (void) -+{ -+ return 0; -+} -+ -+#define vxtime_lock() do ; while (0) -+#define vxtime_unlock() do ; while (0) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/tlb.h um/include/asm-um/tlb.h ---- orig/include/asm-um/tlb.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/tlb.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1 @@ -+#include <asm-generic/tlb.h> -diff -Naur -X ../exclude-files orig/include/asm-um/types.h um/include/asm-um/types.h ---- orig/include/asm-um/types.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/types.h 2004-01-16 03:32:58.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TYPES_H -+#define __UM_TYPES_H -+ -+#include "asm/arch/types.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/uaccess.h um/include/asm-um/uaccess.h ---- orig/include/asm-um/uaccess.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/uaccess.h 2004-02-14 06:28:59.000000000 -0500 -@@ -0,0 +1,99 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_UACCESS_H -+#define __UM_UACCESS_H -+ -+#include "linux/sched.h" -+ -+#define VERIFY_READ 0 -+#define VERIFY_WRITE 1 -+ -+/* -+ * The fs value determines whether argument validity checking should be -+ * performed or not. If get_fs() == USER_DS, checking is performed, with -+ * get_fs() == KERNEL_DS, checking is bypassed. -+ * -+ * For historical reasons, these macros are grossly misnamed. -+ */ -+ -+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) -+ -+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -+#define USER_DS MAKE_MM_SEG(TASK_SIZE) -+ -+#define get_ds() (KERNEL_DS) -+#define get_fs() (current->addr_limit) -+#define set_fs(x) (current->addr_limit = (x)) -+ -+#define segment_eq(a, b) ((a).seg == (b).seg) -+ -+#include "um_uaccess.h" -+ -+#define __copy_from_user(to, from, n) copy_from_user(to, from, n) -+ -+#define __copy_to_user(to, from, n) copy_to_user(to, from, n) -+ -+#define __get_user(x, ptr) \ -+({ \ -+ const __typeof__(ptr) __private_ptr = ptr; \ -+ __typeof__(*(__private_ptr)) __private_val; \ -+ int __private_ret = -EFAULT; \ -+ (x) = 0; \ -+ if (__copy_from_user(&__private_val, (__private_ptr), \ -+ sizeof(*(__private_ptr))) == 0) {\ -+ (x) = (__typeof__(*(__private_ptr))) __private_val; \ -+ __private_ret = 0; \ -+ } \ -+ __private_ret; \ -+}) -+ -+#define get_user(x, ptr) \ -+({ \ -+ const __typeof__((*ptr)) *private_ptr = (ptr); \ -+ (access_ok(VERIFY_READ, private_ptr, sizeof(*private_ptr)) ? \ -+ __get_user(x, private_ptr) : ((x) = 0, -EFAULT)); \ -+}) -+ -+#define __put_user(x, ptr) \ -+({ \ -+ __typeof__(ptr) __private_ptr = ptr; \ -+ __typeof__(*(__private_ptr)) __private_val; \ -+ int __private_ret = -EFAULT; \ -+ __private_val = (__typeof__(*(__private_ptr))) (x); \ -+ if (__copy_to_user((__private_ptr), &__private_val, \ -+ sizeof(*(__private_ptr))) == 0) { \ -+ __private_ret = 0; \ -+ } \ -+ __private_ret; \ -+}) -+ -+#define put_user(x, ptr) \ -+({ \ -+ __typeof__(*(ptr)) *private_ptr = (ptr); \ -+ (access_ok(VERIFY_WRITE, private_ptr, sizeof(*private_ptr)) ? \ -+ __put_user(x, private_ptr) : -EFAULT); \ -+}) -+ -+#define strlen_user(str) strnlen_user(str, ~0UL >> 1) -+ -+struct exception_table_entry -+{ -+ unsigned long insn; -+ unsigned long fixup; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/ucontext.h um/include/asm-um/ucontext.h ---- orig/include/asm-um/ucontext.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/ucontext.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef _ASM_UM_UCONTEXT_H -+#define _ASM_UM_UCONTEXT_H -+ -+#include "asm/arch/ucontext.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/unaligned.h um/include/asm-um/unaligned.h ---- orig/include/asm-um/unaligned.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/unaligned.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_UNALIGNED_H -+#define __UM_UNALIGNED_H -+ -+#include "asm/arch/unaligned.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/unistd.h um/include/asm-um/unistd.h ---- orig/include/asm-um/unistd.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/unistd.h 2004-02-14 06:29:00.000000000 -0500 -@@ -0,0 +1,121 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef _UM_UNISTD_H_ -+#define _UM_UNISTD_H_ -+ -+#include "linux/resource.h" -+#include "asm/uaccess.h" -+ -+extern long sys_open(const char *filename, int flags, int mode); -+extern long sys_dup(unsigned int fildes); -+extern long sys_close(unsigned int fd); -+extern int um_execve(const char *file, char *const argv[], char *const env[]); -+extern long sys_setsid(void); -+extern long sys_waitpid(pid_t pid, unsigned int * stat_addr, int options); -+extern long sys_wait4(pid_t pid,unsigned int *stat_addr, int options, -+ struct rusage *ru); -+extern long sys_mount(char *dev_name, char *dir_name, char *type, -+ unsigned long flags, void *data); -+extern long sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, -+ struct timeval *tvp); -+extern long sys_lseek(unsigned int fildes, unsigned long offset, int whence); -+extern long sys_read(unsigned int fildes, char *buf, int len); -+extern long sys_write(unsigned int fildes, char *buf, int len); -+ -+#ifdef __KERNEL_SYSCALLS__ -+ -+#define KERNEL_CALL(ret_t, sys, args...) \ -+ mm_segment_t fs = get_fs(); \ -+ ret_t ret; \ -+ set_fs(KERNEL_DS); \ -+ ret = sys(args); \ -+ set_fs(fs); \ -+ if (ret >= 0) \ -+ return ret; \ -+ errno = -(long)ret; \ -+ return -1; -+ -+static inline long open(const char *pathname, int flags, int mode) -+{ -+ KERNEL_CALL(int, sys_open, pathname, flags, mode) -+} -+ -+static inline long dup(unsigned int fd) -+{ -+ KERNEL_CALL(int, sys_dup, fd); -+} -+ -+static inline long close(unsigned int fd) -+{ -+ KERNEL_CALL(int, sys_close, fd); -+} -+ -+static inline int execve(const char *filename, char *const argv[], -+ char *const envp[]) -+{ -+ KERNEL_CALL(int, um_execve, filename, argv, envp); -+} -+ -+static inline long waitpid(pid_t pid, unsigned int *status, int options) -+{ -+ KERNEL_CALL(pid_t, sys_wait4, pid, status, options, NULL) -+} -+ -+static inline pid_t wait(int *status) -+{ -+ KERNEL_CALL(pid_t, sys_wait4, -1, status, 0, NULL) -+} -+ -+static inline pid_t setsid(void) -+{ -+ KERNEL_CALL(pid_t, sys_setsid) -+} -+ -+static inline long lseek(unsigned int fd, off_t offset, unsigned int whence) -+{ -+ KERNEL_CALL(long, sys_lseek, fd, offset, whence) -+} -+ -+static inline int read(unsigned int fd, char * buf, int len) -+{ -+ KERNEL_CALL(int, sys_read, fd, buf, len) -+} -+ -+static inline int write(unsigned int fd, char * buf, int len) -+{ -+ KERNEL_CALL(int, sys_write, fd, buf, len) -+} -+ -+#endif -+ -+/* Save the value of __KERNEL_SYSCALLS__, undefine it, include the underlying -+ * arch's unistd.h for the system call numbers, and restore the old -+ * __KERNEL_SYSCALLS__. -+ */ -+ -+#ifdef __KERNEL_SYSCALLS__ -+#define __SAVE_KERNEL_SYSCALLS__ __KERNEL_SYSCALLS__ -+#endif -+ -+#undef __KERNEL_SYSCALLS__ -+#include "asm/arch/unistd.h" -+ -+#ifdef __KERNEL_SYSCALLS__ -+#define __KERNEL_SYSCALLS__ __SAVE_KERNEL_SYSCALLS__ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/user.h um/include/asm-um/user.h ---- orig/include/asm-um/user.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/user.h 2004-02-14 06:31:38.000000000 -0500 -@@ -0,0 +1,6 @@ -+#ifndef __UM_USER_H -+#define __UM_USER_H -+ -+#include "asm/arch/user.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/vga.h um/include/asm-um/vga.h ---- orig/include/asm-um/vga.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/vga.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_VGA_H -+#define __UM_VGA_H -+ -+#include "asm/arch/vga.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/xor.h um/include/asm-um/xor.h ---- orig/include/asm-um/xor.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/asm-um/xor.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,6 @@ -+#ifndef __UM_XOR_H -+#define __UM_XOR_H -+ -+#include "asm-generic/xor.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/linux/blk.h um/include/linux/blk.h ---- orig/include/linux/blk.h 2004-01-11 22:46:30.000000000 -0500 -+++ um/include/linux/blk.h 2004-02-14 06:29:18.000000000 -0500 -@@ -320,6 +320,24 @@ - #define DEVICE_REQUEST do_ida_request - #define DEVICE_NR(device) (MINOR(device) >> 4) - -+#elif (MAJOR_NR == UBD_MAJOR) -+ -+#define DEVICE_NAME "User-mode block device" -+#define DEVICE_INTR do_ubd -+#define DEVICE_REQUEST do_ubd_request -+#define DEVICE_NR(device) (MINOR(device) >> UBD_SHIFT) -+#define DEVICE_ON(device) -+#define DEVICE_OFF(device) -+ -+#elif (MAJOR_NR == COW_MAJOR) -+ -+#define DEVICE_NAME "COW device" -+#define DEVICE_INTR do_cow -+#define DEVICE_REQUEST do_cow_request -+#define DEVICE_NR(device) (MINOR(device) >> COW_SHIFT) -+#define DEVICE_ON(device) -+#define DEVICE_OFF(device) -+ - #endif /* MAJOR_NR == whatever */ - - /* provide DEVICE_xxx defaults, if not explicitly defined -diff -Naur -X ../exclude-files orig/include/linux/fs.h um/include/linux/fs.h ---- orig/include/linux/fs.h 2003-12-16 22:16:36.000000000 -0500 -+++ um/include/linux/fs.h 2004-02-14 06:28:59.000000000 -0500 -@@ -320,6 +320,8 @@ - #include <linux/ncp_fs_i.h> - #include <linux/proc_fs_i.h> - #include <linux/usbdev_fs_i.h> -+#include <linux/hostfs_fs_i.h> -+#include <linux/hppfs_fs_i.h> - #include <linux/jffs2_fs_i.h> - #include <linux/cramfs_fs_sb.h> - -@@ -516,7 +518,9 @@ - struct proc_inode_info proc_i; - struct socket socket_i; - struct usbdev_inode_info usbdev_i; -- struct jffs2_inode_info jffs2_i; -+ struct hostfs_inode_info hostfs_i; -+ struct hppfs_inode_info hppfs_i; -+ struct jffs2_inode_info jffs2_i; - void *generic_ip; - } u; - }; -@@ -864,6 +868,8 @@ - unsigned int (*poll) (struct file *, struct poll_table_struct *); - int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); - int (*mmap) (struct file *, struct vm_area_struct *); -+ void (*munmap) (struct file *, struct vm_area_struct *, -+ unsigned long start, unsigned long len); - int (*open) (struct inode *, struct file *); - int (*flush) (struct file *); - int (*release) (struct inode *, struct file *); -diff -Naur -X ../exclude-files orig/include/linux/ghash.h um/include/linux/ghash.h ---- orig/include/linux/ghash.h 1997-07-07 11:24:28.000000000 -0400 -+++ um/include/linux/ghash.h 2003-10-21 03:26:07.000000000 -0400 -@@ -153,6 +153,26 @@ - return NULL;\ - } - -+/* LINKAGE - empty or "static", depending on whether you want the definitions to -+ * be public or not -+ * NAME - a string to stick in names to make this hash table type distinct from -+ * any others -+ * HASHSIZE - number of buckets -+ * TYPE - type of data contained in the buckets - must be a structure, one -+ * field is of type NAME_ptrs, another is the hash key -+ * PTRS - TYPE must contain a field of type NAME_ptrs, PTRS is the name of that -+ * field -+ * KEYTYPE - type of the key field within TYPE -+ * KEY - name of the key field within TYPE -+ * KEYCMP - pointer to function that compares KEYTYPEs to each other - the -+ * prototype is int KEYCMP(KEYTYPE, KEYTYPE), it returns zero for equal, -+ * non-zero for not equal -+ * HASHFN - the hash function - the prototype is int HASHFN(KEYTYPE), -+ * it returns a number in the range 0 ... HASHSIZE - 1 -+ * Call DEF_HASH_STRUCTS, define your hash table as a NAME_table, then call -+ * DEF_HASH. -+ */ -+ - #define DEF_HASH_STRUCTS(NAME,HASHSIZE,TYPE) \ - \ - struct NAME##_table {\ -@@ -165,7 +185,7 @@ - TYPE * prev_hash;\ - }; - --#define DEF_HASH(LINKAGE,NAME,HASHSIZE,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,KEYEQ,HASHFN)\ -+#define DEF_HASH(LINKAGE,NAME,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,HASHFN)\ - \ - LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ - {\ -@@ -206,12 +226,10 @@ - \ - LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\ - {\ -- int ix = hashfn(pos);\ -+ int ix = HASHFN(pos);\ - TYPE * ptr = tbl->hashtable[ix];\ - while(ptr && KEYCMP(ptr->KEY, pos))\ - ptr = ptr->PTRS.next_hash;\ -- if(ptr && !KEYEQ(ptr->KEY, pos))\ -- ptr = NULL;\ - return ptr;\ - } - -diff -Naur -X ../exclude-files orig/include/linux/hostfs_fs_i.h um/include/linux/hostfs_fs_i.h ---- orig/include/linux/hostfs_fs_i.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/linux/hostfs_fs_i.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,21 @@ -+#ifndef _HOSTFS_FS_I -+#define _HOSTFS_FS_I -+ -+struct hostfs_inode_info { -+ char *host_filename; -+ int fd; -+ int mode; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/linux/hppfs_fs_i.h um/include/linux/hppfs_fs_i.h ---- orig/include/linux/hppfs_fs_i.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/linux/hppfs_fs_i.h 2003-10-21 03:26:07.000000000 -0400 -@@ -0,0 +1,19 @@ -+#ifndef _HPPFS_FS_I -+#define _HPPFS_FS_I -+ -+struct hppfs_inode_info { -+ struct dentry *proc_dentry; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/linux/kernel.h um/include/linux/kernel.h ---- orig/include/linux/kernel.h 2004-01-11 22:46:12.000000000 -0500 -+++ um/include/linux/kernel.h 2004-02-14 06:28:59.000000000 -0500 -@@ -49,7 +49,7 @@ - # define ATTRIB_NORET __attribute__((noreturn)) - # define NORET_AND noreturn, - --#ifdef __i386__ -+#if defined(__i386__) || defined(UM_FASTCALL) - #define FASTCALL(x) x __attribute__((regparm(3))) - #else - #define FASTCALL(x) x -diff -Naur -X ../exclude-files orig/include/linux/kernel_stat.h um/include/linux/kernel_stat.h ---- orig/include/linux/kernel_stat.h 2004-01-11 22:46:12.000000000 -0500 -+++ um/include/linux/kernel_stat.h 2004-02-14 06:28:59.000000000 -0500 -@@ -12,7 +12,7 @@ - * used by rstatd/perfmeter - */ - --#define DK_MAX_MAJOR 16 -+#define DK_MAX_MAJOR 99 - #define DK_MAX_DISK 16 - - struct kernel_stat { -diff -Naur -X ../exclude-files orig/include/linux/mm.h um/include/linux/mm.h ---- orig/include/linux/mm.h 2003-12-16 22:16:36.000000000 -0500 -+++ um/include/linux/mm.h 2004-02-14 06:28:59.000000000 -0500 -@@ -438,6 +438,18 @@ - extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); - extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); - -+#ifndef HAVE_ARCH_VALIDATE -+static inline struct page *arch_validate(struct page *page, -+ unsigned int gfp_mask, int order) -+{ -+ return(page); -+} -+#endif -+ -+#ifndef HAVE_ARCH_FREE_PAGE -+static inline void arch_free_page(struct page *page, int order) { } -+#endif -+ - static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) - { - /* -@@ -445,7 +457,7 @@ - */ - if (order >= MAX_ORDER) - return NULL; -- return _alloc_pages(gfp_mask, order); -+ return arch_validate(_alloc_pages(gfp_mask, order), gfp_mask, order); - } - - #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -@@ -505,6 +517,9 @@ - int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); - -+extern long do_mprotect(struct mm_struct *mm, unsigned long start, -+ size_t len, unsigned long prot); -+ - /* - * On a two-level page table, this ends up being trivial. Thus the - * inlining and the symmetry break with pte_alloc() that does all -@@ -552,9 +567,10 @@ - - extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - --extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, -- unsigned long len, unsigned long prot, -- unsigned long flag, unsigned long pgoff); -+extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file, -+ unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flag, -+ unsigned long pgoff); - - static inline unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, -@@ -564,7 +580,8 @@ - if ((offset + PAGE_ALIGN(len)) < offset) - goto out; - if (!(offset & ~PAGE_MASK)) -- ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -+ ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag, -+ offset >> PAGE_SHIFT); - out: - return ret; - } -diff -Naur -X ../exclude-files orig/include/linux/proc_mm.h um/include/linux/proc_mm.h ---- orig/include/linux/proc_mm.h 1969-12-31 19:00:00.000000000 -0500 -+++ um/include/linux/proc_mm.h 2004-02-14 06:30:22.000000000 -0500 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PROC_MM_H -+#define __PROC_MM_H -+ -+#include "linux/sched.h" -+ -+#define MM_MMAP 54 -+#define MM_MUNMAP 55 -+#define MM_MPROTECT 56 -+#define MM_COPY_SEGMENTS 57 -+ -+struct mm_mmap { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+struct mm_munmap { -+ unsigned long addr; -+ unsigned long len; -+}; -+ -+struct mm_mprotect { -+ unsigned long addr; -+ unsigned long len; -+ unsigned int prot; -+}; -+ -+struct proc_mm_op { -+ int op; -+ union { -+ struct mm_mmap mmap; -+ struct mm_munmap munmap; -+ struct mm_mprotect mprotect; -+ int copy_segments; -+ } u; -+}; -+ -+extern struct mm_struct *proc_mm_get_mm(int fd); -+ -+#endif -diff -Naur -X ../exclude-files orig/include/linux/shmem_fs.h um/include/linux/shmem_fs.h ---- orig/include/linux/shmem_fs.h 2003-09-02 15:44:03.000000000 -0400 -+++ um/include/linux/shmem_fs.h 2003-12-09 00:03:31.000000000 -0500 -@@ -22,6 +22,8 @@ - unsigned long next_index; - swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */ - void **i_indirect; /* indirect blocks */ -+ unsigned long map_direct[SHMEM_NR_DIRECT]; -+ void **map_indirect; - unsigned long swapped; /* data pages assigned to swap */ - unsigned long flags; - struct list_head list; -diff -Naur -X ../exclude-files orig/include/linux/tty.h um/include/linux/tty.h ---- orig/include/linux/tty.h 2004-01-11 22:46:12.000000000 -0500 -+++ um/include/linux/tty.h 2004-02-14 06:28:59.000000000 -0500 -@@ -309,6 +309,9 @@ - spinlock_t read_lock; - /* If the tty has a pending do_SAK, queue it here - akpm */ - struct tq_struct SAK_tq; -+#ifdef CONFIG_TTY_LOG -+ int log_fd; -+#endif - }; - - /* tty magic number */ -@@ -365,6 +368,7 @@ - extern int specialix_init(void); - extern int espserial_init(void); - extern int macserial_init(void); -+extern int stdio_init(void); - extern int a2232board_init(void); - - extern int tty_paranoia_check(struct tty_struct *tty, kdev_t device, -@@ -420,5 +424,7 @@ - extern int vt_ioctl(struct tty_struct *tty, struct file * file, - unsigned int cmd, unsigned long arg); - -+extern void stdio_console_init(void); -+ - #endif /* __KERNEL__ */ - #endif -diff -Naur -X ../exclude-files orig/init/do_mounts.c um/init/do_mounts.c ---- orig/init/do_mounts.c 2003-12-16 22:16:36.000000000 -0500 -+++ um/init/do_mounts.c 2003-12-16 22:17:32.000000000 -0500 -@@ -154,6 +154,22 @@ - { "pf", 0x2f00 }, - { "apblock", APBLOCK_MAJOR << 8}, - { "ddv", DDV_MAJOR << 8}, -+ { "ubd0", UBD_MAJOR << 8 | 0 << 4}, -+ { "ubda", UBD_MAJOR << 8 | 0 << 4}, -+ { "ubd1", UBD_MAJOR << 8 | 1 << 4}, -+ { "ubdb", UBD_MAJOR << 8 | 1 << 4}, -+ { "ubd2", UBD_MAJOR << 8 | 2 << 4}, -+ { "ubdc", UBD_MAJOR << 8 | 2 << 4}, -+ { "ubd3", UBD_MAJOR << 8 | 3 << 4}, -+ { "ubdd", UBD_MAJOR << 8 | 3 << 4}, -+ { "ubd4", UBD_MAJOR << 8 | 4 << 4}, -+ { "ubde", UBD_MAJOR << 8 | 4 << 4}, -+ { "ubd5", UBD_MAJOR << 8 | 5 << 4}, -+ { "ubdf", UBD_MAJOR << 8 | 5 << 4}, -+ { "ubd6", UBD_MAJOR << 8 | 6 << 4}, -+ { "ubdg", UBD_MAJOR << 8 | 6 << 4}, -+ { "ubd7", UBD_MAJOR << 8 | 7 << 4}, -+ { "ubdh", UBD_MAJOR << 8 | 7 << 4}, - { "jsfd", JSFD_MAJOR << 8}, - #if defined(CONFIG_ARCH_S390) - { "dasda", (DASD_MAJOR << MINORBITS) }, -diff -Naur -X ../exclude-files orig/kernel/panic.c um/kernel/panic.c ---- orig/kernel/panic.c 2003-12-16 22:16:36.000000000 -0500 -+++ um/kernel/panic.c 2003-12-16 22:17:32.000000000 -0500 -@@ -74,7 +74,7 @@ - smp_send_stop(); - #endif - -- notifier_call_chain(&panic_notifier_list, 0, NULL); -+ notifier_call_chain(&panic_notifier_list, 0, buf); - - if (panic_timeout > 0) - { -diff -Naur -X ../exclude-files orig/MAINTAINERS um/MAINTAINERS ---- orig/MAINTAINERS 2003-12-16 22:16:23.000000000 -0500 -+++ um/MAINTAINERS 2003-12-16 22:17:22.000000000 -0500 -@@ -2055,6 +2055,14 @@ - L: linux-usb-devel@lists.sourceforge.net - W: http://usb.in.tum.de - S: Maintained -+ -+USER-MODE PORT -+P: Jeff Dike -+M: jdike@karaya.com -+L: user-mode-linux-devel@lists.sourceforge.net -+L: user-mode-linux-user@lists.sourceforge.net -+W: http://user-mode-linux.sourceforge.net -+S: Maintained - - USB "USBNET" DRIVER - P: David Brownell -diff -Naur -X ../exclude-files orig/Makefile um/Makefile ---- orig/Makefile 2004-02-14 06:26:12.000000000 -0500 -+++ um/Makefile 2004-02-14 06:26:19.000000000 -0500 -@@ -5,7 +5,15 @@ - - KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) - --ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -+# SUBARCH tells the usermode build what the underlying arch is. That is set -+# first, and if a usermode build is happening, the "ARCH=um" on the command -+# line overrides the setting of ARCH below. If a native build is happening, -+# then ARCH is assigned, getting whatever value it gets normally, and -+# SUBARCH is subsequently ignored. -+ -+SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -+ARCH := $(SUBARCH) -+ - KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g") - - CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ -diff -Naur -X ../exclude-files orig/mm/Makefile um/mm/Makefile ---- orig/mm/Makefile 2002-08-21 11:47:43.000000000 -0400 -+++ um/mm/Makefile 2003-10-21 03:26:08.000000000 -0400 -@@ -17,5 +17,6 @@ - shmem.o - - obj-$(CONFIG_HIGHMEM) += highmem.o -+obj-$(CONFIG_PROC_MM) += proc_mm.o - - include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/mm/mmap.c um/mm/mmap.c ---- orig/mm/mmap.c 2003-12-16 22:16:36.000000000 -0500 -+++ um/mm/mmap.c 2003-12-16 22:17:32.000000000 -0500 -@@ -390,10 +390,11 @@ - return 0; - } - --unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, -- unsigned long prot, unsigned long flags, unsigned long pgoff) -+unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file, -+ unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long pgoff) - { -- struct mm_struct * mm = current->mm; - struct vm_area_struct * vma, * prev; - unsigned int vm_flags; - int correct_wcount = 0; -@@ -994,6 +995,11 @@ - remove_shared_vm_struct(mpnt); - mm->map_count--; - -+ if((mpnt->vm_file != NULL) && (mpnt->vm_file->f_op != NULL) && -+ (mpnt->vm_file->f_op->munmap != NULL)) -+ mpnt->vm_file->f_op->munmap(mpnt->vm_file, mpnt, st, -+ size); -+ - zap_page_range(mm, st, size); - - /* -diff -Naur -X ../exclude-files orig/mm/mprotect.c um/mm/mprotect.c ---- orig/mm/mprotect.c 2003-12-16 22:16:36.000000000 -0500 -+++ um/mm/mprotect.c 2003-12-16 22:17:32.000000000 -0500 -@@ -264,7 +264,8 @@ - return 0; - } - --asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -+long do_mprotect(struct mm_struct *mm, unsigned long start, size_t len, -+ unsigned long prot) - { - unsigned long nstart, end, tmp; - struct vm_area_struct * vma, * next, * prev; -@@ -281,9 +282,9 @@ - if (end == start) - return 0; - -- down_write(¤t->mm->mmap_sem); -+ down_write(&mm->mmap_sem); - -- vma = find_vma_prev(current->mm, start, &prev); -+ vma = find_vma_prev(mm, start, &prev); - error = -ENOMEM; - if (!vma || vma->vm_start > start) - goto out; -@@ -332,6 +333,11 @@ - prev->vm_mm->map_count--; - } - out: -- up_write(¤t->mm->mmap_sem); -+ up_write(&mm->mmap_sem); - return error; - } -+ -+asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -+{ -+ return(do_mprotect(current->mm, start, len, prot)); -+} -diff -Naur -X ../exclude-files orig/mm/page_alloc.c um/mm/page_alloc.c ---- orig/mm/page_alloc.c 2003-12-16 22:16:36.000000000 -0500 -+++ um/mm/page_alloc.c 2003-12-16 22:17:32.000000000 -0500 -@@ -89,6 +89,7 @@ - struct page *base; - zone_t *zone; - -+ arch_free_page(page, order); - /* - * Yes, think what happens when other parts of the kernel take - * a reference to a page in order to pin it for io. -ben -diff -Naur -X ../exclude-files orig/mm/proc_mm.c um/mm/proc_mm.c ---- orig/mm/proc_mm.c 1969-12-31 19:00:00.000000000 -0500 -+++ um/mm/proc_mm.c 2003-10-21 03:26:08.000000000 -0400 -@@ -0,0 +1,173 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/proc_fs.h" -+#include "linux/proc_mm.h" -+#include "linux/file.h" -+#include "asm/uaccess.h" -+#include "asm/mmu_context.h" -+ -+static struct file_operations proc_mm_fops; -+ -+struct mm_struct *proc_mm_get_mm(int fd) -+{ -+ struct mm_struct *ret = ERR_PTR(-EBADF); -+ struct file *file; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ ret = ERR_PTR(-EINVAL); -+ if(file->f_op != &proc_mm_fops) -+ goto out_fput; -+ -+ ret = file->private_data; -+ out_fput: -+ fput(file); -+ out: -+ return(ret); -+} -+ -+extern long do_mmap2(struct mm_struct *mm, unsigned long addr, -+ unsigned long len, unsigned long prot, -+ unsigned long flags, unsigned long fd, -+ unsigned long pgoff); -+ -+static ssize_t write_proc_mm(struct file *file, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct mm_struct *mm = file->private_data; -+ struct proc_mm_op req; -+ int n, ret; -+ -+ if(count > sizeof(req)) -+ return(-EINVAL); -+ -+ n = copy_from_user(&req, buffer, count); -+ if(n != 0) -+ return(-EFAULT); -+ -+ ret = count; -+ switch(req.op){ -+ case MM_MMAP: { -+ struct mm_mmap *map = &req.u.mmap; -+ -+ ret = do_mmap2(mm, map->addr, map->len, map->prot, -+ map->flags, map->fd, map->offset >> PAGE_SHIFT); -+ if((ret & ~PAGE_MASK) == 0) -+ ret = count; -+ -+ break; -+ } -+ case MM_MUNMAP: { -+ struct mm_munmap *unmap = &req.u.munmap; -+ -+ down_write(&mm->mmap_sem); -+ ret = do_munmap(mm, unmap->addr, unmap->len); -+ up_write(&mm->mmap_sem); -+ -+ if(ret == 0) -+ ret = count; -+ break; -+ } -+ case MM_MPROTECT: { -+ struct mm_mprotect *protect = &req.u.mprotect; -+ -+ ret = do_mprotect(mm, protect->addr, protect->len, -+ protect->prot); -+ if(ret == 0) -+ ret = count; -+ break; -+ } -+ -+ case MM_COPY_SEGMENTS: { -+ struct mm_struct *from = proc_mm_get_mm(req.u.copy_segments); -+ -+ if(IS_ERR(from)){ -+ ret = PTR_ERR(from); -+ break; -+ } -+ -+ mm_copy_segments(from, mm); -+ break; -+ } -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ return(ret); -+} -+ -+static int open_proc_mm(struct inode *inode, struct file *file) -+{ -+ struct mm_struct *mm = mm_alloc(); -+ int ret; -+ -+ ret = -ENOMEM; -+ if(mm == NULL) -+ goto out_mem; -+ -+ ret = init_new_context(current, mm); -+ if(ret) -+ goto out_free; -+ -+ spin_lock(&mmlist_lock); -+ list_add(&mm->mmlist, ¤t->mm->mmlist); -+ mmlist_nr++; -+ spin_unlock(&mmlist_lock); -+ -+ file->private_data = mm; -+ -+ return(0); -+ -+ out_free: -+ mmput(mm); -+ out_mem: -+ return(ret); -+} -+ -+static int release_proc_mm(struct inode *inode, struct file *file) -+{ -+ struct mm_struct *mm = file->private_data; -+ -+ mmput(mm); -+ return(0); -+} -+ -+static struct file_operations proc_mm_fops = { -+ .open = open_proc_mm, -+ .release = release_proc_mm, -+ .write = write_proc_mm, -+}; -+ -+static int make_proc_mm(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ ent = create_proc_entry("mm", 0222, &proc_root); -+ if(ent == NULL){ -+ printk("make_proc_mm : Failed to register /proc/mm\n"); -+ return(0); -+ } -+ ent->proc_fops = &proc_mm_fops; -+ -+ return(0); -+} -+ -+__initcall(make_proc_mm); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/mm/shmem.c um/mm/shmem.c ---- orig/mm/shmem.c 2003-12-16 22:16:36.000000000 -0500 -+++ um/mm/shmem.c 2003-12-16 22:17:32.000000000 -0500 -@@ -128,16 +128,17 @@ - * +-> 48-51 - * +-> 52-55 - */ --static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page) -+static void *shmem_block(unsigned long index, unsigned long *page, -+ unsigned long *direct, void ***indirect) - { - unsigned long offset; - void **dir; - - if (index < SHMEM_NR_DIRECT) -- return info->i_direct+index; -- if (!info->i_indirect) { -+ return direct+index; -+ if (!*indirect) { - if (page) { -- info->i_indirect = (void **) *page; -+ *indirect = (void **) *page; - *page = 0; - } - return NULL; /* need another page */ -@@ -146,7 +147,7 @@ - index -= SHMEM_NR_DIRECT; - offset = index % ENTRIES_PER_PAGE; - index /= ENTRIES_PER_PAGE; -- dir = info->i_indirect; -+ dir = *indirect; - - if (index >= ENTRIES_PER_PAGE/2) { - index -= ENTRIES_PER_PAGE/2; -@@ -169,7 +170,21 @@ - *dir = (void *) *page; - *page = 0; - } -- return (swp_entry_t *) *dir + offset; -+ return (unsigned long **) *dir + offset; -+} -+ -+static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page) -+{ -+ return((swp_entry_t *) shmem_block(index, page, -+ (unsigned long *) info->i_direct, -+ &info->i_indirect)); -+} -+ -+static unsigned long *shmem_map_count(struct shmem_inode_info *info, -+ unsigned long index, unsigned long *page) -+{ -+ return((unsigned long *) shmem_block(index, page, info->map_direct, -+ &info->map_indirect)); - } - - /* -@@ -838,6 +853,7 @@ - ops = &shmem_vm_ops; - if (!S_ISREG(inode->i_mode)) - return -EACCES; -+ - UPDATE_ATIME(inode); - vma->vm_ops = ops; - return 0; -@@ -1723,4 +1739,125 @@ - return 0; - } - -+static int adjust_map_counts(struct shmem_inode_info *info, -+ unsigned long offset, unsigned long len, -+ int adjust) -+{ -+ unsigned long idx, i, *count, page = 0; -+ -+ spin_lock(&info->lock); -+ offset >>= PAGE_SHIFT; -+ len >>= PAGE_SHIFT; -+ for(i = 0; i < len; i++){ -+ idx = (i + offset) >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); -+ -+ while((count = shmem_map_count(info, idx, &page)) == NULL){ -+ spin_unlock(&info->lock); -+ page = get_zeroed_page(GFP_KERNEL); -+ if(page == 0) -+ return(-ENOMEM); -+ spin_lock(&info->lock); -+ } -+ -+ if(page != 0) -+ free_page(page); -+ -+ *count += adjust; -+ } -+ spin_unlock(&info->lock); -+ return(0); -+} -+ - EXPORT_SYMBOL(shmem_file_setup); -+ -+struct file_operations anon_file_operations; -+ -+static int anon_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ struct file *new; -+ struct inode *inode; -+ loff_t size = vma->vm_end - vma->vm_start; -+ int err; -+ -+ if(file->private_data == NULL){ -+ new = shmem_file_setup("dev/anon", size); -+ if(IS_ERR(new)) -+ return(PTR_ERR(new)); -+ -+ new->f_op = &anon_file_operations; -+ file->private_data = new; -+ } -+ -+ if (vma->vm_file) -+ fput(vma->vm_file); -+ vma->vm_file = file->private_data; -+ get_file(vma->vm_file); -+ -+ inode = vma->vm_file->f_dentry->d_inode; -+ err = adjust_map_counts(SHMEM_I(inode), vma->vm_pgoff, size, 1); -+ if(err) -+ return(err); -+ -+ vma->vm_ops = &shmem_vm_ops; -+ return 0; -+} -+ -+static void anon_munmap(struct file *file, struct vm_area_struct *vma, -+ unsigned long start, unsigned long len) -+{ -+ struct inode *inode = file->f_dentry->d_inode; -+ struct shmem_inode_info *info = SHMEM_I(inode); -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ struct page *page; -+ unsigned long addr, idx, *count; -+ -+ for(addr = start; addr < start + len; addr += PAGE_SIZE){ -+ idx = (addr - vma->vm_start + vma->vm_pgoff); -+ idx >>= PAGE_CACHE_SHIFT; -+ -+ count = shmem_map_count(info, idx, NULL); -+ BUG_ON(count == NULL); -+ -+ (*count)--; -+ if(*count > 0) -+ continue; -+ -+ pgd = pgd_offset(vma->vm_mm, addr); -+ if(pgd_none(*pgd)) -+ continue; -+ -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_none(*pmd)) -+ continue; -+ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte)) /* XXX need to handle swapped pages */ -+ continue; -+ -+ *pte = pte_mkclean(*pte); -+ -+ page = pte_page(*pte); -+ LockPage(page); -+ lru_cache_del(page); -+ ClearPageDirty(page); -+ remove_inode_page(page); -+ UnlockPage(page); -+ -+ page_cache_release(page); -+ } -+} -+ -+int anon_release(struct inode *inode, struct file *file) -+{ -+ if(file->private_data != NULL) -+ fput(file->private_data); -+ return(0); -+} -+ -+struct file_operations anon_file_operations = { -+ .mmap = anon_mmap, -+ .munmap = anon_munmap, -+ .release = anon_release, -+}; diff --git a/lustre/kernel_patches/patches/uml-patch-2.4.29-1.patch b/lustre/kernel_patches/patches/uml-patch-2.4.29-1.patch deleted file mode 100644 index 16f449d..0000000 --- a/lustre/kernel_patches/patches/uml-patch-2.4.29-1.patch +++ /dev/null @@ -1,46719 +0,0 @@ -Index: linux-2.4.29/arch/um/config_block.in -=================================================================== ---- linux-2.4.29.orig/arch/um/config_block.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/config_block.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,22 @@ -+mainmenu_option next_comment -+comment 'Block Devices' -+ -+bool 'Virtual block device' CONFIG_BLK_DEV_UBD -+dep_bool ' Always do synchronous disk IO for UBD' CONFIG_BLK_DEV_UBD_SYNC $CONFIG_BLK_DEV_UBD -+bool 'COW device' CONFIG_COW -+ -+if [ "$CONFIG_BLK_DEV_UBD" = "y" -o "$CONFIG_COW" = "y" ] ; then -+ define_bool CONFIG_COW_COMMON y -+fi -+ -+tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP -+dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET -+tristate 'RAM disk support' CONFIG_BLK_DEV_RAM -+if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then -+ int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 -+fi -+dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM -+ -+tristate 'Example IO memory driver' CONFIG_MMAPPER -+ -+endmenu -Index: linux-2.4.29/arch/um/config_char.in -=================================================================== ---- linux-2.4.29.orig/arch/um/config_char.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/config_char.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,37 @@ -+mainmenu_option next_comment -+comment 'Character Devices' -+ -+define_bool CONFIG_STDIO_CONSOLE y -+ -+bool 'Virtual serial line' CONFIG_SSL -+ -+bool 'file descriptor channel support' CONFIG_FD_CHAN -+bool 'null channel support' CONFIG_NULL_CHAN -+bool 'port channel support' CONFIG_PORT_CHAN -+bool 'pty channel support' CONFIG_PTY_CHAN -+bool 'tty channel support' CONFIG_TTY_CHAN -+bool 'xterm channel support' CONFIG_XTERM_CHAN -+string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \ -+ "fd:0,fd:1" -+string 'Default console channel initialization' CONFIG_CON_CHAN "xterm" -+string 'Default serial line channel initialization' CONFIG_SSL_CHAN "pty" -+ -+ -+bool 'Unix98 PTY support' CONFIG_UNIX98_PTYS -+if [ "$CONFIG_UNIX98_PTYS" = "y" ]; then -+ int 'Maximum number of Unix98 PTYs in use (0-2048)' CONFIG_UNIX98_PTY_COUNT 256 -+fi -+ -+bool 'Watchdog Timer Support' CONFIG_WATCHDOG -+dep_bool ' Disable watchdog shutdown on close' CONFIG_WATCHDOG_NOWAYOUT \ -+ $CONFIG_WATCHDOG -+dep_tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG $CONFIG_WATCHDOG -+dep_tristate ' UML watchdog' CONFIG_UML_WATCHDOG $CONFIG_WATCHDOG -+ -+tristate 'Sound support' CONFIG_UML_SOUND -+define_tristate CONFIG_SOUND $CONFIG_UML_SOUND -+define_tristate CONFIG_HOSTAUDIO $CONFIG_UML_SOUND -+ -+bool 'Enable tty logging' CONFIG_TTY_LOG -+ -+endmenu -Index: linux-2.4.29/arch/um/config.in -=================================================================== ---- linux-2.4.29.orig/arch/um/config.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/config.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,123 @@ -+define_bool CONFIG_USERMODE y -+ -+mainmenu_name "Linux/Usermode Kernel Configuration" -+ -+define_bool CONFIG_ISA n -+define_bool CONFIG_SBUS n -+define_bool CONFIG_PCI n -+ -+define_bool CONFIG_UID16 y -+ -+define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y -+ -+mainmenu_option next_comment -+comment 'Code maturity level options' -+bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -+endmenu -+ -+mainmenu_option next_comment -+comment 'General Setup' -+ -+bool 'Separate kernel address space support' CONFIG_MODE_SKAS -+ -+# This is to ensure that at least one of the modes is enabled. When neither -+# is present in defconfig, they default to N, which is bad. -+if [ "$CONFIG_MODE_SKAS" != "y" ]; then -+ define_bool CONFIG_MODE_TT y -+fi -+ -+bool 'Tracing thread support' CONFIG_MODE_TT -+if [ "$CONFIG_MODE_TT" != "y" ]; then -+ bool 'Statically linked binary when CONFIG_MODE_TT is disabled' CONFIG_STATIC_LINK -+fi -+bool 'Networking support' CONFIG_NET -+bool 'System V IPC' CONFIG_SYSVIPC -+bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -+bool 'Sysctl support' CONFIG_SYSCTL -+tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -+tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF -+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC -+ -+tristate 'Host filesystem' CONFIG_HOSTFS -+tristate 'Usable host filesystem' CONFIG_HUMFS -+ -+if [ "$CONFIG_HOSTFS" = "y" -o "$CONFIG_HUMFS" = "y" ]; then -+ define_tristate CONFIG_EXTERNFS y -+fi -+ -+tristate 'Honeypot proc filesystem' CONFIG_HPPFS -+bool 'Management console' CONFIG_MCONSOLE -+dep_bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ $CONFIG_MCONSOLE -+bool '2G/2G host address space split' CONFIG_HOST_2G_2G -+ -+bool 'Symmetric multi-processing support' CONFIG_UML_SMP -+define_bool CONFIG_SMP $CONFIG_UML_SMP -+if [ "$CONFIG_SMP" = "y" ]; then -+ int 'Maximum number of CPUs (2-32)' CONFIG_NR_CPUS 32 -+fi -+ -+int 'Nesting level' CONFIG_NEST_LEVEL 0 -+int 'Kernel address space size (in .5G units)' CONFIG_KERNEL_HALF_GIGS 1 -+bool 'Highmem support' CONFIG_HIGHMEM -+bool '/proc/mm' CONFIG_PROC_MM -+int 'Kernel stack size order' CONFIG_KERNEL_STACK_ORDER 2 -+bool 'Real-time Clock' CONFIG_UML_REAL_TIME_CLOCK -+endmenu -+ -+mainmenu_option next_comment -+comment 'Loadable module support' -+bool 'Enable loadable module support' CONFIG_MODULES -+if [ "$CONFIG_MODULES" = "y" ]; then -+# MODVERSIONS does not yet work in this architecture -+# bool ' Set version information on all module symbols' CONFIG_MODVERSIONS -+ bool ' Kernel module loader' CONFIG_KMOD -+fi -+endmenu -+ -+source arch/um/config_char.in -+ -+source arch/um/config_block.in -+ -+define_bool CONFIG_NETDEVICES $CONFIG_NET -+ -+if [ "$CONFIG_NET" = "y" ]; then -+ source arch/um/config_net.in -+ source net/Config.in -+fi -+ -+source fs/Config.in -+ -+mainmenu_option next_comment -+comment 'SCSI support' -+ -+tristate 'SCSI support' CONFIG_SCSI -+ -+if [ "$CONFIG_SCSI" != "n" ]; then -+ source arch/um/config_scsi.in -+fi -+endmenu -+ -+source drivers/md/Config.in -+ -+source drivers/mtd/Config.in -+ -+source lib/Config.in -+ -+source crypto/Config.in -+ -+mainmenu_option next_comment -+comment 'Kernel hacking' -+bool 'Debug memory allocations' CONFIG_DEBUG_SLAB -+bool 'Enable kernel debugging symbols' CONFIG_DEBUGSYM -+if [ "$CONFIG_XTERM_CHAN" = "y" ]; then -+ dep_bool 'Enable ptrace proxy' CONFIG_PT_PROXY $CONFIG_DEBUGSYM -+else -+ define_bool CONFIG_PT_PROXY n -+fi -+ -+if [ "$CONFIG_MODE_TT" = "n" ]; then -+ dep_bool 'Enable gprof support' CONFIG_GPROF $CONFIG_DEBUGSYM -+fi -+ -+dep_bool 'Enable gcov support' CONFIG_GCOV $CONFIG_DEBUGSYM -+endmenu -Index: linux-2.4.29/arch/um/config_net.in -=================================================================== ---- linux-2.4.29.orig/arch/um/config_net.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/config_net.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,48 @@ -+mainmenu_option next_comment -+comment 'Network Devices' -+ -+# UML virtual driver -+bool 'Virtual network device' CONFIG_UML_NET -+ -+dep_bool ' Ethertap transport' CONFIG_UML_NET_ETHERTAP $CONFIG_UML_NET -+dep_bool ' TUN/TAP transport' CONFIG_UML_NET_TUNTAP $CONFIG_UML_NET -+dep_bool ' SLIP transport' CONFIG_UML_NET_SLIP $CONFIG_UML_NET -+dep_bool ' SLiRP transport' CONFIG_UML_NET_SLIRP $CONFIG_UML_NET -+dep_bool ' Daemon transport' CONFIG_UML_NET_DAEMON $CONFIG_UML_NET -+dep_bool ' Multicast transport' CONFIG_UML_NET_MCAST $CONFIG_UML_NET -+dep_bool ' pcap transport' CONFIG_UML_NET_PCAP $CONFIG_UML_NET -+ -+# Below are hardware-independent drivers mirrored from -+# drivers/net/Config.in. It would be nice if Linux -+# had HW independent drivers separated from the other -+# but it does not. Until then each non-ISA/PCI arch -+# needs to provide it's own menu of network drivers -+ -+tristate 'Dummy net driver support' CONFIG_DUMMY -+tristate 'Bonding driver support' CONFIG_BONDING -+tristate 'EQL (serial line load balancing) support' CONFIG_EQUALIZER -+tristate 'Universal TUN/TAP device driver support' CONFIG_TUN -+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ if [ "$CONFIG_NETLINK" = "y" ]; then -+ tristate 'Ethertap network tap (OBSOLETE)' CONFIG_ETHERTAP -+ fi -+fi -+ -+tristate 'PPP (point-to-point protocol) support' CONFIG_PPP -+if [ ! "$CONFIG_PPP" = "n" ]; then -+ dep_bool ' PPP multilink support (EXPERIMENTAL)' CONFIG_PPP_MULTILINK $CONFIG_EXPERIMENTAL -+ dep_bool ' PPP filtering' CONFIG_PPP_FILTER $CONFIG_FILTER -+ dep_tristate ' PPP support for async serial ports' CONFIG_PPP_ASYNC $CONFIG_PPP -+ dep_tristate ' PPP support for sync tty ports' CONFIG_PPP_SYNC_TTY $CONFIG_PPP -+ dep_tristate ' PPP Deflate compression' CONFIG_PPP_DEFLATE $CONFIG_PPP -+ dep_tristate ' PPP BSD-Compress compression' CONFIG_PPP_BSDCOMP $CONFIG_PPP -+ dep_tristate ' PPP over Ethernet (EXPERIMENTAL)' CONFIG_PPPOE $CONFIG_PPP $CONFIG_EXPERIMENTAL -+ dep_tristate ' PPP MPPE compression (encryption)' CONFIG_PPP_MPPE $CONFIG_PPP -+fi -+ -+tristate 'SLIP (serial line) support' CONFIG_SLIP -+dep_bool ' CSLIP compressed headers' CONFIG_SLIP_COMPRESSED $CONFIG_SLIP -+dep_bool ' Keepalive and linefill' CONFIG_SLIP_SMART $CONFIG_SLIP -+dep_bool ' Six bit SLIP encapsulation' CONFIG_SLIP_MODE_SLIP6 $CONFIG_SLIP -+ -+endmenu -Index: linux-2.4.29/arch/um/config.release -=================================================================== ---- linux-2.4.29.orig/arch/um/config.release 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/config.release 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,302 @@ -+# -+# Automatically generated make config: don't edit -+# -+CONFIG_USERMODE=y -+# CONFIG_ISA is not set -+# CONFIG_SBUS is not set -+# CONFIG_PCI is not set -+CONFIG_UID16=y -+CONFIG_RWSEM_XCHGADD_ALGORITHM=y -+ -+# -+# Code maturity level options -+# -+CONFIG_EXPERIMENTAL=y -+ -+# -+# General Setup -+# -+CONFIG_NET=y -+CONFIG_SYSVIPC=y -+CONFIG_BSD_PROCESS_ACCT=y -+CONFIG_SYSCTL=y -+CONFIG_BINFMT_AOUT=y -+CONFIG_BINFMT_ELF=y -+CONFIG_BINFMT_MISC=y -+CONFIG_HOSTFS=y -+# CONFIG_HPPFS is not set -+CONFIG_MCONSOLE=y -+CONFIG_MAGIC_SYSRQ=y -+# CONFIG_HOST_2G_2G is not set -+# CONFIG_UML_SMP is not set -+# CONFIG_SMP is not set -+CONFIG_NEST_LEVEL=0 -+CONFIG_KERNEL_HALF_GIGS=1 -+ -+# -+# Loadable module support -+# -+CONFIG_MODULES=y -+CONFIG_KMOD=y -+ -+# -+# Character Devices -+# -+CONFIG_STDIO_CONSOLE=y -+CONFIG_SSL=y -+CONFIG_FD_CHAN=y -+# CONFIG_NULL_CHAN is not set -+CONFIG_PORT_CHAN=y -+CONFIG_PTY_CHAN=y -+CONFIG_TTY_CHAN=y -+CONFIG_XTERM_CHAN=y -+CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -+CONFIG_CON_CHAN="xterm" -+CONFIG_SSL_CHAN="pty" -+CONFIG_UNIX98_PTYS=y -+CONFIG_UNIX98_PTY_COUNT=256 -+# CONFIG_WATCHDOG is not set -+CONFIG_UML_SOUND=y -+CONFIG_SOUND=y -+CONFIG_HOSTAUDIO=y -+# CONFIG_TTY_LOG is not set -+ -+# -+# Block Devices -+# -+CONFIG_BLK_DEV_UBD=y -+# CONFIG_BLK_DEV_UBD_SYNC is not set -+CONFIG_BLK_DEV_LOOP=y -+CONFIG_BLK_DEV_NBD=y -+CONFIG_BLK_DEV_RAM=y -+CONFIG_BLK_DEV_RAM_SIZE=4096 -+CONFIG_BLK_DEV_INITRD=y -+# CONFIG_MMAPPER is not set -+CONFIG_NETDEVICES=y -+ -+# -+# Network Devices -+# -+CONFIG_UML_NET=y -+CONFIG_UML_NET_ETHERTAP=y -+CONFIG_UML_NET_TUNTAP=y -+CONFIG_UML_NET_SLIP=y -+CONFIG_UML_NET_DAEMON=y -+CONFIG_UML_NET_MCAST=y -+CONFIG_DUMMY=y -+CONFIG_BONDING=m -+CONFIG_EQUALIZER=m -+CONFIG_TUN=y -+CONFIG_PPP=m -+CONFIG_PPP_MULTILINK=y -+# CONFIG_PPP_ASYNC is not set -+CONFIG_PPP_SYNC_TTY=m -+CONFIG_PPP_DEFLATE=m -+CONFIG_PPP_BSDCOMP=m -+CONFIG_PPPOE=m -+CONFIG_SLIP=m -+ -+# -+# Networking options -+# -+CONFIG_PACKET=y -+CONFIG_PACKET_MMAP=y -+# CONFIG_NETLINK_DEV is not set -+# CONFIG_NETFILTER is not set -+# CONFIG_FILTER is not set -+CONFIG_UNIX=y -+CONFIG_INET=y -+# CONFIG_IP_MULTICAST is not set -+# CONFIG_IP_ADVANCED_ROUTER is not set -+# CONFIG_IP_PNP is not set -+# CONFIG_NET_IPIP is not set -+# CONFIG_NET_IPGRE is not set -+# CONFIG_ARPD is not set -+# CONFIG_INET_ECN is not set -+# CONFIG_SYN_COOKIES is not set -+# CONFIG_IPV6 is not set -+# CONFIG_KHTTPD is not set -+# CONFIG_ATM is not set -+# CONFIG_VLAN_8021Q is not set -+ -+# -+# -+# -+# CONFIG_IPX is not set -+# CONFIG_ATALK is not set -+ -+# -+# Appletalk devices -+# -+# CONFIG_DECNET is not set -+# CONFIG_BRIDGE is not set -+# CONFIG_X25 is not set -+# CONFIG_LAPB is not set -+# CONFIG_LLC is not set -+# CONFIG_NET_DIVERT is not set -+# CONFIG_ECONET is not set -+# CONFIG_WAN_ROUTER is not set -+# CONFIG_NET_FASTROUTE is not set -+# CONFIG_NET_HW_FLOWCONTROL is not set -+ -+# -+# QoS and/or fair queueing -+# -+# CONFIG_NET_SCHED is not set -+ -+# -+# Network testing -+# -+# CONFIG_NET_PKTGEN is not set -+ -+# -+# File systems -+# -+CONFIG_QUOTA=y -+CONFIG_AUTOFS_FS=m -+CONFIG_AUTOFS4_FS=m -+CONFIG_REISERFS_FS=m -+# CONFIG_REISERFS_CHECK is not set -+# CONFIG_REISERFS_PROC_INFO is not set -+CONFIG_ADFS_FS=m -+# CONFIG_ADFS_FS_RW is not set -+CONFIG_AFFS_FS=m -+CONFIG_HFS_FS=m -+CONFIG_BFS_FS=m -+CONFIG_EXT3_FS=y -+CONFIG_JBD=y -+# CONFIG_JBD_DEBUG is not set -+CONFIG_FAT_FS=y -+CONFIG_MSDOS_FS=y -+CONFIG_UMSDOS_FS=y -+CONFIG_VFAT_FS=y -+CONFIG_EFS_FS=m -+CONFIG_CRAMFS=m -+CONFIG_TMPFS=y -+CONFIG_RAMFS=y -+CONFIG_ISO9660_FS=y -+# CONFIG_JOLIET is not set -+# CONFIG_ZISOFS is not set -+CONFIG_MINIX_FS=m -+CONFIG_VXFS_FS=m -+# CONFIG_NTFS_FS is not set -+CONFIG_HPFS_FS=m -+CONFIG_PROC_FS=y -+CONFIG_DEVFS_FS=y -+CONFIG_DEVFS_MOUNT=y -+# CONFIG_DEVFS_DEBUG is not set -+CONFIG_DEVPTS_FS=y -+CONFIG_QNX4FS_FS=m -+# CONFIG_QNX4FS_RW is not set -+CONFIG_ROMFS_FS=m -+CONFIG_EXT2_FS=y -+CONFIG_SYSV_FS=m -+CONFIG_UDF_FS=m -+# CONFIG_UDF_RW is not set -+CONFIG_UFS_FS=m -+# CONFIG_UFS_FS_WRITE is not set -+ -+# -+# Network File Systems -+# -+# CONFIG_CODA_FS is not set -+# CONFIG_INTERMEZZO_FS is not set -+CONFIG_NFS_FS=y -+CONFIG_NFS_V3=y -+CONFIG_NFSD=y -+CONFIG_NFSD_V3=y -+CONFIG_SUNRPC=y -+CONFIG_LOCKD=y -+CONFIG_LOCKD_V4=y -+# CONFIG_SMB_FS is not set -+# CONFIG_NCP_FS is not set -+# CONFIG_ZISOFS_FS is not set -+CONFIG_ZLIB_FS_INFLATE=m -+ -+# -+# Partition Types -+# -+# CONFIG_PARTITION_ADVANCED is not set -+CONFIG_MSDOS_PARTITION=y -+# CONFIG_SMB_NLS is not set -+CONFIG_NLS=y -+ -+# -+# Native Language Support -+# -+CONFIG_NLS_DEFAULT="iso8859-1" -+# CONFIG_NLS_CODEPAGE_437 is not set -+# CONFIG_NLS_CODEPAGE_737 is not set -+# CONFIG_NLS_CODEPAGE_775 is not set -+# CONFIG_NLS_CODEPAGE_850 is not set -+# CONFIG_NLS_CODEPAGE_852 is not set -+# CONFIG_NLS_CODEPAGE_855 is not set -+# CONFIG_NLS_CODEPAGE_857 is not set -+# CONFIG_NLS_CODEPAGE_860 is not set -+# CONFIG_NLS_CODEPAGE_861 is not set -+# CONFIG_NLS_CODEPAGE_862 is not set -+# CONFIG_NLS_CODEPAGE_863 is not set -+# CONFIG_NLS_CODEPAGE_864 is not set -+# CONFIG_NLS_CODEPAGE_865 is not set -+# CONFIG_NLS_CODEPAGE_866 is not set -+# CONFIG_NLS_CODEPAGE_869 is not set -+# CONFIG_NLS_CODEPAGE_936 is not set -+# CONFIG_NLS_CODEPAGE_950 is not set -+# CONFIG_NLS_CODEPAGE_932 is not set -+# CONFIG_NLS_CODEPAGE_949 is not set -+# CONFIG_NLS_CODEPAGE_874 is not set -+# CONFIG_NLS_ISO8859_8 is not set -+# CONFIG_NLS_CODEPAGE_1250 is not set -+# CONFIG_NLS_CODEPAGE_1251 is not set -+# CONFIG_NLS_ISO8859_1 is not set -+# CONFIG_NLS_ISO8859_2 is not set -+# CONFIG_NLS_ISO8859_3 is not set -+# CONFIG_NLS_ISO8859_4 is not set -+# CONFIG_NLS_ISO8859_5 is not set -+# CONFIG_NLS_ISO8859_6 is not set -+# CONFIG_NLS_ISO8859_7 is not set -+# CONFIG_NLS_ISO8859_9 is not set -+# CONFIG_NLS_ISO8859_13 is not set -+# CONFIG_NLS_ISO8859_14 is not set -+# CONFIG_NLS_ISO8859_15 is not set -+# CONFIG_NLS_KOI8_R is not set -+# CONFIG_NLS_KOI8_U is not set -+# CONFIG_NLS_UTF8 is not set -+ -+# -+# SCSI support -+# -+CONFIG_SCSI=y -+ -+# -+# SCSI support type (disk, tape, CD-ROM) -+# -+# CONFIG_BLK_DEV_SD is not set -+# CONFIG_CHR_DEV_ST is not set -+# CONFIG_BLK_DEV_SR is not set -+# CONFIG_CHR_DEV_SG is not set -+ -+# -+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -+# -+# CONFIG_SCSI_DEBUG_QUEUES is not set -+# CONFIG_SCSI_MULTI_LUN is not set -+# CONFIG_SCSI_CONSTANTS is not set -+# CONFIG_SCSI_LOGGING is not set -+CONFIG_SCSI_DEBUG=m -+ -+# -+# Multi-device support (RAID and LVM) -+# -+# CONFIG_MD is not set -+ -+# -+# Memory Technology Devices (MTD) -+# -+# CONFIG_MTD is not set -+ -+# -+# Kernel hacking -+# -+# CONFIG_DEBUG_SLAB is not set -+# CONFIG_DEBUGSYM is not set -Index: linux-2.4.29/arch/um/config_scsi.in -=================================================================== ---- linux-2.4.29.orig/arch/um/config_scsi.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/config_scsi.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,30 @@ -+comment 'SCSI support type (disk, tape, CD-ROM)' -+ -+dep_tristate ' SCSI disk support' CONFIG_BLK_DEV_SD $CONFIG_SCSI -+ -+if [ "$CONFIG_BLK_DEV_SD" != "n" ]; then -+ int 'Maximum number of SCSI disks that can be loaded as modules' CONFIG_SD_EXTRA_DEVS 40 -+fi -+ -+dep_tristate ' SCSI tape support' CONFIG_CHR_DEV_ST $CONFIG_SCSI -+ -+dep_tristate ' SCSI CD-ROM support' CONFIG_BLK_DEV_SR $CONFIG_SCSI -+ -+if [ "$CONFIG_BLK_DEV_SR" != "n" ]; then -+ bool ' Enable vendor-specific extensions (for SCSI CDROM)' CONFIG_BLK_DEV_SR_VENDOR -+ int 'Maximum number of CDROM devices that can be loaded as modules' CONFIG_SR_EXTRA_DEVS 2 -+fi -+dep_tristate ' SCSI generic support' CONFIG_CHR_DEV_SG $CONFIG_SCSI -+ -+comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -+ -+#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -+#fi -+ -+bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN -+ -+bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS -+bool ' SCSI logging facility' CONFIG_SCSI_LOGGING -+ -+dep_tristate 'SCSI debugging host simulator (EXPERIMENTAL)' CONFIG_SCSI_DEBUG $CONFIG_SCSI -Index: linux-2.4.29/arch/um/defconfig -=================================================================== ---- linux-2.4.29.orig/arch/um/defconfig 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/defconfig 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,430 @@ -+# -+# Automatically generated make config: don't edit -+# -+CONFIG_USERMODE=y -+# CONFIG_ISA is not set -+# CONFIG_SBUS is not set -+# CONFIG_PCI is not set -+CONFIG_UID16=y -+CONFIG_RWSEM_XCHGADD_ALGORITHM=y -+ -+# -+# Code maturity level options -+# -+CONFIG_EXPERIMENTAL=y -+ -+# -+# General Setup -+# -+CONFIG_MODE_SKAS=y -+CONFIG_MODE_TT=y -+CONFIG_NET=y -+CONFIG_SYSVIPC=y -+CONFIG_BSD_PROCESS_ACCT=y -+CONFIG_SYSCTL=y -+CONFIG_BINFMT_AOUT=y -+CONFIG_BINFMT_ELF=y -+CONFIG_BINFMT_MISC=y -+CONFIG_HOSTFS=y -+CONFIG_HUMFS=y -+CONFIG_EXTERNFS=y -+CONFIG_HPPFS=y -+CONFIG_MCONSOLE=y -+CONFIG_MAGIC_SYSRQ=y -+# CONFIG_HOST_2G_2G is not set -+# CONFIG_UML_SMP is not set -+# CONFIG_SMP is not set -+CONFIG_NEST_LEVEL=0 -+CONFIG_KERNEL_HALF_GIGS=1 -+# CONFIG_HIGHMEM is not set -+CONFIG_PROC_MM=y -+CONFIG_KERNEL_STACK_ORDER=2 -+CONFIG_UML_REAL_TIME_CLOCK=y -+ -+# -+# Loadable module support -+# -+CONFIG_MODULES=y -+# CONFIG_KMOD is not set -+ -+# -+# Character Devices -+# -+CONFIG_STDIO_CONSOLE=y -+CONFIG_SSL=y -+CONFIG_FD_CHAN=y -+CONFIG_NULL_CHAN=y -+CONFIG_PORT_CHAN=y -+CONFIG_PTY_CHAN=y -+CONFIG_TTY_CHAN=y -+CONFIG_XTERM_CHAN=y -+CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -+CONFIG_CON_CHAN="xterm" -+CONFIG_SSL_CHAN="pty" -+CONFIG_UNIX98_PTYS=y -+CONFIG_UNIX98_PTY_COUNT=256 -+# CONFIG_WATCHDOG is not set -+# CONFIG_WATCHDOG_NOWAYOUT is not set -+# CONFIG_SOFT_WATCHDOG is not set -+# CONFIG_UML_WATCHDOG is not set -+CONFIG_UML_SOUND=y -+CONFIG_SOUND=y -+CONFIG_HOSTAUDIO=y -+# CONFIG_TTY_LOG is not set -+ -+# -+# Block Devices -+# -+CONFIG_BLK_DEV_UBD=y -+# CONFIG_BLK_DEV_UBD_SYNC is not set -+# CONFIG_COW is not set -+CONFIG_COW_COMMON=y -+CONFIG_BLK_DEV_LOOP=y -+CONFIG_BLK_DEV_NBD=y -+CONFIG_BLK_DEV_RAM=y -+CONFIG_BLK_DEV_RAM_SIZE=4096 -+CONFIG_BLK_DEV_INITRD=y -+# CONFIG_MMAPPER is not set -+CONFIG_NETDEVICES=y -+ -+# -+# Network Devices -+# -+CONFIG_UML_NET=y -+CONFIG_UML_NET_ETHERTAP=y -+CONFIG_UML_NET_TUNTAP=y -+CONFIG_UML_NET_SLIP=y -+CONFIG_UML_NET_SLIRP=y -+CONFIG_UML_NET_DAEMON=y -+CONFIG_UML_NET_MCAST=y -+# CONFIG_UML_NET_PCAP is not set -+CONFIG_DUMMY=y -+# CONFIG_BONDING is not set -+# CONFIG_EQUALIZER is not set -+CONFIG_TUN=y -+CONFIG_PPP=y -+# CONFIG_PPP_MULTILINK is not set -+# CONFIG_PPP_FILTER is not set -+# CONFIG_PPP_ASYNC is not set -+# CONFIG_PPP_SYNC_TTY is not set -+# CONFIG_PPP_DEFLATE is not set -+# CONFIG_PPP_BSDCOMP is not set -+# CONFIG_PPPOE is not set -+# CONFIG_PPP_MPPE is not set -+CONFIG_SLIP=y -+# CONFIG_SLIP_COMPRESSED is not set -+# CONFIG_SLIP_SMART is not set -+# CONFIG_SLIP_MODE_SLIP6 is not set -+ -+# -+# Networking options -+# -+CONFIG_PACKET=y -+CONFIG_PACKET_MMAP=y -+# CONFIG_NETLINK_DEV is not set -+# CONFIG_NETFILTER is not set -+# CONFIG_FILTER is not set -+CONFIG_UNIX=y -+CONFIG_INET=y -+# CONFIG_IP_MULTICAST is not set -+# CONFIG_IP_ADVANCED_ROUTER is not set -+# CONFIG_IP_PNP is not set -+# CONFIG_NET_IPIP is not set -+# CONFIG_NET_IPGRE is not set -+# CONFIG_ARPD is not set -+# CONFIG_INET_ECN is not set -+# CONFIG_SYN_COOKIES is not set -+# CONFIG_IPV6 is not set -+# CONFIG_KHTTPD is not set -+ -+# -+# SCTP Configuration (EXPERIMENTAL) -+# -+CONFIG_IPV6_SCTP__=y -+# CONFIG_IP_SCTP is not set -+# CONFIG_ATM is not set -+# CONFIG_VLAN_8021Q is not set -+ -+# -+# -+# -+# CONFIG_IPX is not set -+# CONFIG_ATALK is not set -+ -+# -+# Appletalk devices -+# -+# CONFIG_DEV_APPLETALK is not set -+# CONFIG_DECNET is not set -+# CONFIG_BRIDGE is not set -+# CONFIG_X25 is not set -+# CONFIG_LAPB is not set -+# CONFIG_LLC is not set -+# CONFIG_NET_DIVERT is not set -+# CONFIG_ECONET is not set -+# CONFIG_WAN_ROUTER is not set -+# CONFIG_NET_FASTROUTE is not set -+# CONFIG_NET_HW_FLOWCONTROL is not set -+ -+# -+# QoS and/or fair queueing -+# -+# CONFIG_NET_SCHED is not set -+ -+# -+# Network testing -+# -+# CONFIG_NET_PKTGEN is not set -+ -+# -+# File systems -+# -+CONFIG_QUOTA=y -+# CONFIG_QFMT_V2 is not set -+CONFIG_AUTOFS_FS=y -+CONFIG_AUTOFS4_FS=y -+CONFIG_REISERFS_FS=y -+# CONFIG_REISERFS_CHECK is not set -+# CONFIG_REISERFS_PROC_INFO is not set -+# CONFIG_ADFS_FS is not set -+# CONFIG_ADFS_FS_RW is not set -+# CONFIG_AFFS_FS is not set -+# CONFIG_HFS_FS is not set -+# CONFIG_HFSPLUS_FS is not set -+# CONFIG_BEFS_FS is not set -+# CONFIG_BEFS_DEBUG is not set -+# CONFIG_BFS_FS is not set -+CONFIG_EXT3_FS=y -+CONFIG_JBD=y -+# CONFIG_JBD_DEBUG is not set -+CONFIG_FAT_FS=y -+CONFIG_MSDOS_FS=y -+CONFIG_UMSDOS_FS=y -+CONFIG_VFAT_FS=y -+# CONFIG_EFS_FS is not set -+CONFIG_JFFS_FS=y -+CONFIG_JFFS_FS_VERBOSE=0 -+CONFIG_JFFS_PROC_FS=y -+CONFIG_JFFS2_FS=y -+CONFIG_JFFS2_FS_DEBUG=0 -+# CONFIG_CRAMFS is not set -+CONFIG_TMPFS=y -+CONFIG_RAMFS=y -+CONFIG_ISO9660_FS=y -+# CONFIG_JOLIET is not set -+# CONFIG_ZISOFS is not set -+# CONFIG_JFS_FS is not set -+# CONFIG_JFS_DEBUG is not set -+# CONFIG_JFS_STATISTICS is not set -+CONFIG_MINIX_FS=y -+# CONFIG_VXFS_FS is not set -+# CONFIG_NTFS_FS is not set -+# CONFIG_NTFS_RW is not set -+# CONFIG_HPFS_FS is not set -+CONFIG_PROC_FS=y -+CONFIG_DEVFS_FS=y -+CONFIG_DEVFS_MOUNT=y -+# CONFIG_DEVFS_DEBUG is not set -+CONFIG_DEVPTS_FS=y -+# CONFIG_QNX4FS_FS is not set -+# CONFIG_QNX4FS_RW is not set -+# CONFIG_ROMFS_FS is not set -+CONFIG_EXT2_FS=y -+# CONFIG_SYSV_FS is not set -+# CONFIG_UDF_FS is not set -+# CONFIG_UDF_RW is not set -+# CONFIG_UFS_FS is not set -+# CONFIG_UFS_FS_WRITE is not set -+# CONFIG_XFS_FS is not set -+# CONFIG_XFS_QUOTA is not set -+# CONFIG_XFS_RT is not set -+# CONFIG_XFS_TRACE is not set -+# CONFIG_XFS_DEBUG is not set -+ -+# -+# Network File Systems -+# -+# CONFIG_CODA_FS is not set -+# CONFIG_INTERMEZZO_FS is not set -+# CONFIG_NFS_FS is not set -+# CONFIG_NFS_V3 is not set -+# CONFIG_NFS_DIRECTIO is not set -+# CONFIG_ROOT_NFS is not set -+# CONFIG_NFSD is not set -+# CONFIG_NFSD_V3 is not set -+# CONFIG_NFSD_TCP is not set -+# CONFIG_SUNRPC is not set -+# CONFIG_LOCKD is not set -+# CONFIG_SMB_FS is not set -+# CONFIG_NCP_FS is not set -+# CONFIG_NCPFS_PACKET_SIGNING is not set -+# CONFIG_NCPFS_IOCTL_LOCKING is not set -+# CONFIG_NCPFS_STRONG is not set -+# CONFIG_NCPFS_NFS_NS is not set -+# CONFIG_NCPFS_OS2_NS is not set -+# CONFIG_NCPFS_SMALLDOS is not set -+# CONFIG_NCPFS_NLS is not set -+# CONFIG_NCPFS_EXTRAS is not set -+# CONFIG_ZISOFS_FS is not set -+ -+# -+# Partition Types -+# -+# CONFIG_PARTITION_ADVANCED is not set -+CONFIG_MSDOS_PARTITION=y -+# CONFIG_SMB_NLS is not set -+CONFIG_NLS=y -+ -+# -+# Native Language Support -+# -+CONFIG_NLS_DEFAULT="iso8859-1" -+# CONFIG_NLS_CODEPAGE_437 is not set -+# CONFIG_NLS_CODEPAGE_737 is not set -+# CONFIG_NLS_CODEPAGE_775 is not set -+# CONFIG_NLS_CODEPAGE_850 is not set -+# CONFIG_NLS_CODEPAGE_852 is not set -+# CONFIG_NLS_CODEPAGE_855 is not set -+# CONFIG_NLS_CODEPAGE_857 is not set -+# CONFIG_NLS_CODEPAGE_860 is not set -+# CONFIG_NLS_CODEPAGE_861 is not set -+# CONFIG_NLS_CODEPAGE_862 is not set -+# CONFIG_NLS_CODEPAGE_863 is not set -+# CONFIG_NLS_CODEPAGE_864 is not set -+# CONFIG_NLS_CODEPAGE_865 is not set -+# CONFIG_NLS_CODEPAGE_866 is not set -+# CONFIG_NLS_CODEPAGE_869 is not set -+# CONFIG_NLS_CODEPAGE_936 is not set -+# CONFIG_NLS_CODEPAGE_950 is not set -+# CONFIG_NLS_CODEPAGE_932 is not set -+# CONFIG_NLS_CODEPAGE_949 is not set -+# CONFIG_NLS_CODEPAGE_874 is not set -+# CONFIG_NLS_ISO8859_8 is not set -+# CONFIG_NLS_CODEPAGE_1250 is not set -+# CONFIG_NLS_CODEPAGE_1251 is not set -+# CONFIG_NLS_ISO8859_1 is not set -+# CONFIG_NLS_ISO8859_2 is not set -+# CONFIG_NLS_ISO8859_3 is not set -+# CONFIG_NLS_ISO8859_4 is not set -+# CONFIG_NLS_ISO8859_5 is not set -+# CONFIG_NLS_ISO8859_6 is not set -+# CONFIG_NLS_ISO8859_7 is not set -+# CONFIG_NLS_ISO8859_9 is not set -+# CONFIG_NLS_ISO8859_13 is not set -+# CONFIG_NLS_ISO8859_14 is not set -+# CONFIG_NLS_ISO8859_15 is not set -+# CONFIG_NLS_KOI8_R is not set -+# CONFIG_NLS_KOI8_U is not set -+# CONFIG_NLS_UTF8 is not set -+ -+# -+# SCSI support -+# -+CONFIG_SCSI=y -+ -+# -+# SCSI support type (disk, tape, CD-ROM) -+# -+# CONFIG_BLK_DEV_SD is not set -+# CONFIG_CHR_DEV_ST is not set -+# CONFIG_BLK_DEV_SR is not set -+# CONFIG_CHR_DEV_SG is not set -+ -+# -+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -+# -+# CONFIG_SCSI_DEBUG_QUEUES is not set -+# CONFIG_SCSI_MULTI_LUN is not set -+# CONFIG_SCSI_CONSTANTS is not set -+# CONFIG_SCSI_LOGGING is not set -+CONFIG_SCSI_DEBUG=y -+ -+# -+# Multi-device support (RAID and LVM) -+# -+# CONFIG_MD is not set -+# CONFIG_BLK_DEV_MD is not set -+# CONFIG_MD_LINEAR is not set -+# CONFIG_MD_RAID0 is not set -+# CONFIG_MD_RAID1 is not set -+# CONFIG_MD_RAID5 is not set -+# CONFIG_MD_MULTIPATH is not set -+# CONFIG_BLK_DEV_LVM is not set -+ -+# -+# Memory Technology Devices (MTD) -+# -+CONFIG_MTD=y -+# CONFIG_MTD_DEBUG is not set -+# CONFIG_MTD_PARTITIONS is not set -+# CONFIG_MTD_CONCAT is not set -+# CONFIG_MTD_REDBOOT_PARTS is not set -+# CONFIG_MTD_CMDLINE_PARTS is not set -+ -+# -+# User Modules And Translation Layers -+# -+CONFIG_MTD_CHAR=y -+CONFIG_MTD_BLOCK=y -+# CONFIG_FTL is not set -+# CONFIG_NFTL is not set -+ -+# -+# RAM/ROM/Flash chip drivers -+# -+# CONFIG_MTD_CFI is not set -+# CONFIG_MTD_JEDECPROBE is not set -+# CONFIG_MTD_GEN_PROBE is not set -+# CONFIG_MTD_CFI_INTELEXT is not set -+# CONFIG_MTD_CFI_AMDSTD is not set -+# CONFIG_MTD_CFI_STAA is not set -+# CONFIG_MTD_RAM is not set -+# CONFIG_MTD_ROM is not set -+# CONFIG_MTD_ABSENT is not set -+# CONFIG_MTD_OBSOLETE_CHIPS is not set -+# CONFIG_MTD_AMDSTD is not set -+# CONFIG_MTD_SHARP is not set -+# CONFIG_MTD_JEDEC is not set -+ -+# -+# Mapping drivers for chip access -+# -+# CONFIG_MTD_PHYSMAP is not set -+# CONFIG_MTD_PCI is not set -+# CONFIG_MTD_PCMCIA is not set -+ -+# -+# Self-contained MTD device drivers -+# -+# CONFIG_MTD_PMC551 is not set -+# CONFIG_MTD_SLRAM is not set -+# CONFIG_MTD_MTDRAM is not set -+CONFIG_MTD_BLKMTD=y -+ -+# -+# Disk-On-Chip Device Drivers -+# -+# CONFIG_MTD_DOC1000 is not set -+# CONFIG_MTD_DOC2000 is not set -+# CONFIG_MTD_DOC2001 is not set -+# CONFIG_MTD_DOCPROBE is not set -+ -+# -+# NAND Flash Device Drivers -+# -+# CONFIG_MTD_NAND is not set -+ -+# -+# Library routines -+# -+# CONFIG_CRC32 is not set -+CONFIG_ZLIB_INFLATE=y -+CONFIG_ZLIB_DEFLATE=y -+ -+# -+# Kernel hacking -+# -+# CONFIG_DEBUG_SLAB is not set -+CONFIG_DEBUGSYM=y -+CONFIG_PT_PROXY=y -+# CONFIG_GCOV is not set -Index: linux-2.4.29/arch/um/drivers/chan_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/chan_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/chan_kern.c 2005-05-03 22:28:14.196452024 +0300 -@@ -0,0 +1,568 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/stddef.h> -+#include <linux/kernel.h> -+#include <linux/list.h> -+#include <linux/slab.h> -+#include <linux/tty.h> -+#include <linux/string.h> -+#include <linux/tty_flip.h> -+#include <asm/irq.h> -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "sigio.h" -+#include "line.h" -+#include "os.h" -+ -+static void *not_configged_init(char *str, int device, struct chan_opts *opts) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(NULL); -+} -+ -+static int not_configged_open(int input, int output, int primary, void *data, -+ char **dev_out) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-ENODEV); -+} -+ -+static void not_configged_close(int fd, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+} -+ -+static int not_configged_read(int fd, char *c_out, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_write(int fd, const char *buf, int len, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_console_write(int fd, const char *buf, int len, -+ void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_window_size(int fd, void *data, unsigned short *rows, -+ unsigned short *cols) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-ENODEV); -+} -+ -+static void not_configged_free(void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+} -+ -+static struct chan_ops not_configged_ops = { -+ .init = not_configged_init, -+ .open = not_configged_open, -+ .close = not_configged_close, -+ .read = not_configged_read, -+ .write = not_configged_write, -+ .console_write = not_configged_console_write, -+ .window_size = not_configged_window_size, -+ .free = not_configged_free, -+ .winch = 0, -+}; -+ -+void generic_close(int fd, void *unused) -+{ -+ os_close_file(fd); -+} -+ -+int generic_read(int fd, char *c_out, void *unused) -+{ -+ int n; -+ -+ n = os_read_file(fd, c_out, sizeof(*c_out)); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-EIO); -+ return(n); -+} -+ -+/* XXX Trivial wrapper around os_write_file */ -+ -+int generic_write(int fd, const char *buf, int n, void *unused) -+{ -+ return(os_write_file(fd, buf, n)); -+} -+ -+int generic_window_size(int fd, void *unused, unsigned short *rows_out, -+ unsigned short *cols_out) -+{ -+ int rows, cols; -+ int ret; -+ -+ ret = os_window_size(fd, &rows, &cols); -+ if(ret < 0) -+ return(ret); -+ -+ ret = ((*rows_out != rows) || (*cols_out != cols)); -+ -+ *rows_out = rows; -+ *cols_out = cols; -+ -+ return(ret); -+} -+ -+void generic_free(void *data) -+{ -+ kfree(data); -+} -+ -+static void tty_receive_char(struct tty_struct *tty, char ch) -+{ -+ if(tty == NULL) return; -+ -+ if(I_IXON(tty) && !I_IXOFF(tty) && !tty->raw) { -+ if(ch == STOP_CHAR(tty)){ -+ stop_tty(tty); -+ return; -+ } -+ else if(ch == START_CHAR(tty)){ -+ start_tty(tty); -+ return; -+ } -+ } -+ -+ if((tty->flip.flag_buf_ptr == NULL) || -+ (tty->flip.char_buf_ptr == NULL)) -+ return; -+ tty_insert_flip_char(tty, ch, TTY_NORMAL); -+} -+ -+static int open_one_chan(struct chan *chan, int input, int output, int primary) -+{ -+ int fd; -+ -+ if(chan->opened) return(0); -+ if(chan->ops->open == NULL) fd = 0; -+ else fd = (*chan->ops->open)(input, output, primary, chan->data, -+ &chan->dev); -+ if(fd < 0) return(fd); -+ chan->fd = fd; -+ -+ chan->opened = 1; -+ return(0); -+} -+ -+int open_chan(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int ret, err = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ ret = open_one_chan(chan, chan->input, chan->output, -+ chan->primary); -+ if(chan->primary) err = ret; -+ } -+ return(err); -+} -+ -+void chan_enable_winch(struct list_head *chans, void *line) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary && chan->output && chan->ops->winch){ -+ register_winch(chan->fd, line); -+ return; -+ } -+ } -+} -+ -+void enable_chan(struct list_head *chans, void *data) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->opened) continue; -+ -+ line_setup_irq(chan->fd, chan->input, chan->output, data); -+ } -+} -+ -+void close_chan(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ /* Close in reverse order as open in case more than one of them -+ * refers to the same device and they save and restore that device's -+ * state. Then, the first one opened will have the original state, -+ * so it must be the last closed. -+ */ -+ for(ele = chans->prev; ele != chans; ele = ele->prev){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->opened) continue; -+ if(chan->ops->close != NULL) -+ (*chan->ops->close)(chan->fd, chan->data); -+ chan->opened = 0; -+ chan->fd = -1; -+ } -+} -+ -+int write_chan(struct list_head *chans, const char *buf, int len, -+ int write_irq) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int n, ret = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->output || (chan->ops->write == NULL)) continue; -+ n = chan->ops->write(chan->fd, buf, len, chan->data); -+ if(chan->primary){ -+ ret = n; -+ if((ret == -EAGAIN) || ((ret >= 0) && (ret < len))){ -+ reactivate_fd(chan->fd, write_irq); -+ if(ret == -EAGAIN) ret = 0; -+ } -+ } -+ } -+ return(ret); -+} -+ -+int console_write_chan(struct list_head *chans, const char *buf, int len) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int n, ret = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->output || (chan->ops->console_write == NULL)) -+ continue; -+ n = chan->ops->console_write(chan->fd, buf, len, chan->data); -+ if(chan->primary) ret = n; -+ } -+ return(ret); -+} -+ -+int chan_window_size(struct list_head *chans, unsigned short *rows_out, -+ unsigned short *cols_out) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary){ -+ if(chan->ops->window_size == NULL) return(0); -+ return(chan->ops->window_size(chan->fd, chan->data, -+ rows_out, cols_out)); -+ } -+ } -+ return(0); -+} -+ -+void free_one_chan(struct chan *chan) -+{ -+ list_del(&chan->list); -+ if(chan->ops->free != NULL) -+ (*chan->ops->free)(chan->data); -+ free_irq_by_fd(chan->fd); -+ if(chan->primary && chan->output) ignore_sigio_fd(chan->fd); -+ kfree(chan); -+} -+ -+void free_chan(struct list_head *chans) -+{ -+ struct list_head *ele, *next; -+ struct chan *chan; -+ -+ list_for_each_safe(ele, next, chans){ -+ chan = list_entry(ele, struct chan, list); -+ free_one_chan(chan); -+ } -+} -+ -+static int one_chan_config_string(struct chan *chan, char *str, int size, -+ char **error_out) -+{ -+ int n = 0; -+ -+ if(chan == NULL){ -+ CONFIG_CHUNK(str, size, n, "none", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, chan->ops->type, 0); -+ -+ if(chan->dev == NULL){ -+ CONFIG_CHUNK(str, size, n, "", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, ":", 0); -+ CONFIG_CHUNK(str, size, n, chan->dev, 0); -+ -+ return(n); -+} -+ -+static int chan_pair_config_string(struct chan *in, struct chan *out, -+ char *str, int size, char **error_out) -+{ -+ int n; -+ -+ n = one_chan_config_string(in, str, size, error_out); -+ str += n; -+ size -= n; -+ -+ if(in == out){ -+ CONFIG_CHUNK(str, size, n, "", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, ",", 1); -+ n = one_chan_config_string(out, str, size, error_out); -+ str += n; -+ size -= n; -+ CONFIG_CHUNK(str, size, n, "", 1); -+ -+ return(n); -+} -+ -+int chan_config_string(struct list_head *chans, char *str, int size, -+ char **error_out) -+{ -+ struct list_head *ele; -+ struct chan *chan, *in = NULL, *out = NULL; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->primary) -+ continue; -+ if(chan->input) -+ in = chan; -+ if(chan->output) -+ out = chan; -+ } -+ -+ return(chan_pair_config_string(in, out, str, size, error_out)); -+} -+ -+struct chan_type { -+ char *key; -+ struct chan_ops *ops; -+}; -+ -+struct chan_type chan_table[] = { -+#ifdef CONFIG_FD_CHAN -+ { "fd", &fd_ops }, -+#else -+ { "fd", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_NULL_CHAN -+ { "null", &null_ops }, -+#else -+ { "null", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_PORT_CHAN -+ { "port", &port_ops }, -+#else -+ { "port", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_PTY_CHAN -+ { "pty", &pty_ops }, -+ { "pts", &pts_ops }, -+#else -+ { "pty", ¬_configged_ops }, -+ { "pts", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_TTY_CHAN -+ { "tty", &tty_ops }, -+#else -+ { "tty", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_XTERM_CHAN -+ { "xterm", &xterm_ops }, -+#else -+ { "xterm", ¬_configged_ops }, -+#endif -+}; -+ -+static struct chan *parse_chan(char *str, int pri, int device, -+ struct chan_opts *opts) -+{ -+ struct chan_type *entry; -+ struct chan_ops *ops; -+ struct chan *chan; -+ void *data; -+ int i; -+ -+ ops = NULL; -+ data = NULL; -+ for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){ -+ entry = &chan_table[i]; -+ if(!strncmp(str, entry->key, strlen(entry->key))){ -+ ops = entry->ops; -+ str += strlen(entry->key); -+ break; -+ } -+ } -+ if(ops == NULL){ -+ printk(KERN_ERR "parse_chan couldn't parse \"%s\"\n", -+ str); -+ return(NULL); -+ } -+ if(ops->init == NULL) return(NULL); -+ data = (*ops->init)(str, device, opts); -+ if(data == NULL) return(NULL); -+ -+ chan = kmalloc(sizeof(*chan), GFP_KERNEL); -+ if(chan == NULL) return(NULL); -+ *chan = ((struct chan) { .list = LIST_HEAD_INIT(chan->list), -+ .primary = 1, -+ .input = 0, -+ .output = 0, -+ .opened = 0, -+ .fd = -1, -+ .pri = pri, -+ .ops = ops, -+ .data = data }); -+ return(chan); -+} -+ -+int parse_chan_pair(char *str, struct list_head *chans, int pri, int device, -+ struct chan_opts *opts) -+{ -+ struct chan *new, *chan; -+ char *in, *out; -+ -+ if(!list_empty(chans)){ -+ chan = list_entry(chans->next, struct chan, list); -+ if(chan->pri >= pri) return(0); -+ free_chan(chans); -+ INIT_LIST_HEAD(chans); -+ } -+ -+ out = strchr(str, ','); -+ if(out != NULL){ -+ in = str; -+ *out = '\0'; -+ out++; -+ new = parse_chan(in, pri, device, opts); -+ if(new == NULL) return(-1); -+ new->input = 1; -+ list_add(&new->list, chans); -+ -+ new = parse_chan(out, pri, device, opts); -+ if(new == NULL) return(-1); -+ list_add(&new->list, chans); -+ new->output = 1; -+ } -+ else { -+ new = parse_chan(str, pri, device, opts); -+ if(new == NULL) return(-1); -+ list_add(&new->list, chans); -+ new->input = 1; -+ new->output = 1; -+ } -+ return(0); -+} -+ -+int chan_out_fd(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary && chan->output) -+ return(chan->fd); -+ } -+ return(-1); -+} -+ -+void chan_interrupt(struct list_head *chans, struct tq_struct *task, -+ struct tty_struct *tty, int irq, void *dev) -+{ -+ struct list_head *ele, *next; -+ struct chan *chan; -+ int err; -+ char c; -+ -+ list_for_each_safe(ele, next, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->input || (chan->ops->read == NULL)) continue; -+ do { -+ if((tty != NULL) && -+ (tty->flip.count >= TTY_FLIPBUF_SIZE)){ -+ queue_task(task, &tq_timer); -+ goto out; -+ } -+ err = chan->ops->read(chan->fd, &c, chan->data); -+ if(err > 0) -+ tty_receive_char(tty, c); -+ } while(err > 0); -+ -+ if(err == 0) reactivate_fd(chan->fd, irq); -+ if(err == -EIO){ -+ if(chan->primary){ -+ if(tty != NULL) -+ tty_hangup(tty); -+ line_disable(dev, irq); -+ close_chan(chans); -+ free_chan(chans); -+ return; -+ } -+ else { -+ if(chan->ops->close != NULL) -+ chan->ops->close(chan->fd, chan->data); -+ free_one_chan(chan); -+ } -+ } -+ } -+ out: -+ if(tty) tty_flip_buffer_push(tty); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/chan_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/chan_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/chan_user.c 2005-05-03 22:28:14.197451872 +0300 -@@ -0,0 +1,172 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdlib.h> -+#include <errno.h> -+#include <termios.h> -+#include <string.h> -+#include <signal.h> -+#include <sys/stat.h> -+#include <sys/ioctl.h> -+#include <sys/socket.h> -+#include "kern_util.h" -+#include "user_util.h" -+#include "chan_user.h" -+#include "user.h" -+#include "helper.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+static void winch_handler(int sig) -+{ -+} -+ -+struct winch_data { -+ int pty_fd; -+ int pipe_fd; -+ int close_me; -+}; -+ -+/* XXX This breaks horribly (by hanging UML) when moved to chan_kern.c - -+ * needs investigation -+ */ -+int generic_console_write(int fd, const char *buf, int n, void *unused) -+{ -+ struct termios save, new; -+ int err; -+ -+ if(isatty(fd)){ -+ tcgetattr(fd, &save); -+ new = save; -+ new.c_oflag |= OPOST; -+ tcsetattr(fd, TCSAFLUSH, &new); -+ } -+ err = generic_write(fd, buf, n, NULL); -+ if(isatty(fd)) tcsetattr(fd, TCSAFLUSH, &save); -+ return(err); -+} -+ -+static int winch_thread(void *arg) -+{ -+ struct winch_data *data = arg; -+ sigset_t sigs; -+ int pty_fd, pipe_fd; -+ int count, err; -+ char c = 1; -+ -+ os_close_file(data->close_me); -+ pty_fd = data->pty_fd; -+ pipe_fd = data->pipe_fd; -+ count = os_write_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("winch_thread : failed to write synchronization " -+ "byte, err = %d\n", -count); -+ -+ signal(SIGWINCH, winch_handler); -+ sigfillset(&sigs); -+ sigdelset(&sigs, SIGWINCH); -+ if(sigprocmask(SIG_SETMASK, &sigs, NULL) < 0){ -+ printk("winch_thread : sigprocmask failed, errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ if(setsid() < 0){ -+ printk("winch_thread : setsid failed, errno = %d\n", errno); -+ exit(1); -+ } -+ -+ err = os_new_tty_pgrp(pty_fd, os_getpid()); -+ if(err < 0){ -+ printk("winch_thread : new_tty_pgrp failed, err = %d\n", -err); -+ exit(1); -+ } -+ -+ count = os_read_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("winch_thread : failed to read synchronization byte, " -+ "err = %d\n", -count); -+ -+ while(1){ -+ pause(); -+ -+ count = os_write_file(pipe_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("winch_thread : write failed, err = %d\n", -+ -count); -+ } -+} -+ -+static int winch_tramp(int fd, void *device_data, int *fd_out) -+{ -+ struct winch_data data; -+ unsigned long stack; -+ int fds[2], pid, n, err; -+ char c; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err < 0){ -+ printk("winch_tramp : os_pipe failed, err = %d\n", -err); -+ return(err); -+ } -+ -+ data = ((struct winch_data) { .pty_fd = fd, -+ .pipe_fd = fds[1], -+ .close_me = fds[0] } ); -+ pid = run_helper_thread(winch_thread, &data, 0, &stack, 0); -+ if(pid < 0){ -+ printk("fork of winch_thread failed - errno = %d\n", errno); -+ return(pid); -+ } -+ -+ os_close_file(fds[1]); -+ *fd_out = fds[0]; -+ n = os_read_file(fds[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("winch_tramp : failed to read synchronization byte\n"); -+ printk("read failed, err = %d\n", -n); -+ printk("fd %d will not support SIGWINCH\n", fd); -+ *fd_out = -1; -+ } -+ return(pid); -+} -+ -+void register_winch(int fd, void *device_data) -+{ -+ int pid, thread, thread_fd; -+ int count; -+ char c = 1; -+ -+ if(!isatty(fd)) -+ return; -+ -+ pid = tcgetpgrp(fd); -+ if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, -+ device_data) && (pid == -1)){ -+ thread = winch_tramp(fd, device_data, &thread_fd); -+ if(fd != -1){ -+ register_winch_irq(thread_fd, fd, thread, device_data); -+ -+ count = os_write_file(thread_fd, &c, sizeof(c)); -+ if(count != sizeof(c)) -+ printk("register_winch : failed to write " -+ "synchronization byte, err = %d\n", -+ -count); -+ } -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/cow.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/cow.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/cow.h 2005-05-03 22:43:32.718815400 +0300 -@@ -0,0 +1,41 @@ -+#ifndef __COW_H__ -+#define __COW_H__ -+ -+#include <asm/types.h> -+ -+#if __BYTE_ORDER == __BIG_ENDIAN -+# define ntohll(x) (x) -+# define htonll(x) (x) -+#elif __BYTE_ORDER == __LITTLE_ENDIAN -+# define ntohll(x) bswap_64(x) -+# define htonll(x) bswap_64(x) -+#else -+#error "__BYTE_ORDER not defined" -+#endif -+ -+extern int init_cow_file(int fd, char *cow_file, char *backing_file, -+ int sectorsize, int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out); -+ -+extern int file_reader(__u64 offset, char *buf, int len, void *arg); -+extern int read_cow_header(int (*reader)(__u64, char *, int, void *), -+ void *arg, __u32 *version_out, -+ char **backing_file_out, time_t *mtime_out, -+ __u64 *size_out, int *sectorsize_out, -+ __u32 *align_out, int *bitmap_offset_out); -+ -+extern int write_cow_header(char *cow_file, int fd, char *backing_file, -+ int sectorsize, int alignment, long long *size); -+ -+extern void cow_sizes(int version, __u64 size, int sectorsize, int align, -+ int bitmap_offset, unsigned long *bitmap_len_out, -+ int *data_offset_out); -+ -+#endif -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/cow_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/cow_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/cow_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,630 @@ -+#define COW_MAJOR 60 -+#define MAJOR_NR COW_MAJOR -+ -+#include <linux/stddef.h> -+#include <linux/kernel.h> -+#include <linux/ctype.h> -+#include <linux/stat.h> -+#include <linux/vmalloc.h> -+#include <linux/blkdev.h> -+#include <linux/blk.h> -+#include <linux/fs.h> -+#include <linux/genhd.h> -+#include <linux/devfs_fs.h> -+#include <asm/uaccess.h> -+#include "2_5compat.h" -+#include "cow.h" -+#include "ubd_user.h" -+ -+#define COW_SHIFT 4 -+ -+struct cow { -+ int count; -+ char *cow_path; -+ dev_t cow_dev; -+ struct block_device *cow_bdev; -+ char *backing_path; -+ dev_t backing_dev; -+ struct block_device *backing_bdev; -+ int sectorsize; -+ unsigned long *bitmap; -+ unsigned long bitmap_len; -+ int bitmap_offset; -+ int data_offset; -+ devfs_handle_t devfs; -+ struct semaphore sem; -+ struct semaphore io_sem; -+ atomic_t working; -+ spinlock_t io_lock; -+ struct buffer_head *bh; -+ struct buffer_head *bhtail; -+ void *end_io; -+}; -+ -+#define DEFAULT_COW { \ -+ .count = 0, \ -+ .cow_path = NULL, \ -+ .cow_dev = 0, \ -+ .backing_path = NULL, \ -+ .backing_dev = 0, \ -+ .bitmap = NULL, \ -+ .bitmap_len = 0, \ -+ .bitmap_offset = 0, \ -+ .data_offset = 0, \ -+ .devfs = NULL, \ -+ .working = ATOMIC_INIT(0), \ -+ .io_lock = SPIN_LOCK_UNLOCKED, \ -+} -+ -+#define MAX_DEV (8) -+#define MAX_MINOR (MAX_DEV << COW_SHIFT) -+ -+struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW }; -+ -+/* Not modified by this driver */ -+static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; -+static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; -+ -+/* Protected by cow_lock */ -+static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; -+ -+static struct hd_struct cow_part[MAX_MINOR] = -+ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; -+ -+/* Protected by io_request_lock */ -+static request_queue_t *cow_queue; -+ -+static int cow_open(struct inode *inode, struct file *filp); -+static int cow_release(struct inode * inode, struct file * file); -+static int cow_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg); -+static int cow_revalidate(kdev_t rdev); -+ -+static struct block_device_operations cow_blops = { -+ .open = cow_open, -+ .release = cow_release, -+ .ioctl = cow_ioctl, -+ .revalidate = cow_revalidate, -+}; -+ -+/* Initialized in an initcall, and unchanged thereafter */ -+devfs_handle_t cow_dir_handle; -+ -+#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ -+{ \ -+ .major = maj, \ -+ .major_name = name, \ -+ .minor_shift = shift, \ -+ .max_p = 1 << shift, \ -+ .part = parts, \ -+ .sizes = bsizes, \ -+ .nr_real = max, \ -+ .real_devices = NULL, \ -+ .next = NULL, \ -+ .fops = blops, \ -+ .de_arr = NULL, \ -+ .flags = 0 \ -+} -+ -+static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED; -+ -+static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part, -+ COW_SHIFT, sizes, MAX_DEV, -+ &cow_blops); -+ -+static int cow_add(int n) -+{ -+ struct cow *dev = &cow_dev[n]; -+ char name[sizeof("nnnnnn\0")]; -+ int err = -ENODEV; -+ -+ if(dev->cow_path == NULL) -+ goto out; -+ -+ sprintf(name, "%d", n); -+ dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE, -+ MAJOR_NR, n << COW_SHIFT, S_IFBLK | -+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, -+ &cow_blops, NULL); -+ -+ init_MUTEX_LOCKED(&dev->sem); -+ init_MUTEX(&dev->io_sem); -+ -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+/* -+ * Add buffer_head to back of pending list -+ */ -+static void cow_add_bh(struct cow *cow, struct buffer_head *bh) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&cow->io_lock, flags); -+ if(cow->bhtail != NULL){ -+ cow->bhtail->b_reqnext = bh; -+ cow->bhtail = bh; -+ } -+ else { -+ cow->bh = bh; -+ cow->bhtail = bh; -+ } -+ spin_unlock_irqrestore(&cow->io_lock, flags); -+} -+ -+/* -+ * Grab first pending buffer -+ */ -+static struct buffer_head *cow_get_bh(struct cow *cow) -+{ -+ struct buffer_head *bh; -+ -+ spin_lock_irq(&cow->io_lock); -+ bh = cow->bh; -+ if(bh != NULL){ -+ if(bh == cow->bhtail) -+ cow->bhtail = NULL; -+ cow->bh = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ } -+ spin_unlock_irq(&cow->io_lock); -+ -+ return(bh); -+} -+ -+static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, -+ struct buffer_head **cow_bh, int ncow_bh) -+{ -+ int i; -+ -+ if(ncow_bh > 0) -+ ll_rw_block(WRITE, ncow_bh, cow_bh); -+ -+ for(i = 0; i < ncow_bh ; i++){ -+ wait_on_buffer(cow_bh[i]); -+ brelse(cow_bh[i]); -+ } -+ -+ ll_rw_block(WRITE, 1, &bh); -+ brelse(bh); -+} -+ -+static struct buffer_head *cow_new_bh(struct cow *dev, int sector) -+{ -+ struct buffer_head *bh; -+ -+ sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize; -+ bh = getblk(dev->cow_dev, sector, dev->sectorsize); -+ memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])), -+ dev->sectorsize); -+ return(bh); -+} -+ -+/* Copied from loop.c, needed to avoid deadlocking in make_request. */ -+ -+static int cow_thread(void *data) -+{ -+ struct cow *dev = data; -+ struct buffer_head *bh; -+ -+ daemonize(); -+ exit_files(current); -+ -+ sprintf(current->comm, "cow%d", dev - cow_dev); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ sigfillset(¤t->blocked); -+ flush_signals(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ atomic_inc(&dev->working); -+ -+ current->policy = SCHED_OTHER; -+ current->nice = -20; -+ -+ current->flags |= PF_NOIO; -+ -+ /* -+ * up sem, we are running -+ */ -+ up(&dev->sem); -+ -+ for(;;){ -+ int start, len, nbh, i, update_bitmap = 0; -+ struct buffer_head *cow_bh[2]; -+ -+ down_interruptible(&dev->io_sem); -+ /* -+ * could be upped because of tear-down, not because of -+ * pending work -+ */ -+ if(!atomic_read(&dev->working)) -+ break; -+ -+ bh = cow_get_bh(dev); -+ if(bh == NULL){ -+ printk(KERN_ERR "cow: missing bh\n"); -+ continue; -+ } -+ -+ start = bh->b_blocknr * bh->b_size / dev->sectorsize; -+ len = bh->b_size / dev->sectorsize; -+ for(i = 0; i < len ; i++){ -+ if(ubd_test_bit(start + i, -+ (unsigned char *) dev->bitmap)) -+ continue; -+ -+ update_bitmap = 1; -+ ubd_set_bit(start + i, (unsigned char *) dev->bitmap); -+ } -+ -+ cow_bh[0] = NULL; -+ cow_bh[1] = NULL; -+ nbh = 0; -+ if(update_bitmap){ -+ cow_bh[0] = cow_new_bh(dev, start); -+ nbh++; -+ if(start / dev->sectorsize != -+ (start + len) / dev->sectorsize){ -+ cow_bh[1] = cow_new_bh(dev, start + len); -+ nbh++; -+ } -+ } -+ -+ bh->b_dev = dev->cow_dev; -+ bh->b_blocknr += dev->data_offset / dev->sectorsize; -+ -+ cow_handle_bh(dev, bh, cow_bh, nbh); -+ -+ /* -+ * upped both for pending work and tear-down, lo_pending -+ * will hit zero then -+ */ -+ if(atomic_dec_and_test(&dev->working)) -+ break; -+ } -+ -+ up(&dev->sem); -+ return(0); -+} -+ -+static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh) -+{ -+ struct cow *dev; -+ int n, minor; -+ -+ minor = MINOR(bh->b_rdev); -+ n = minor >> COW_SHIFT; -+ dev = &cow_dev[n]; -+ -+ dev->end_io = NULL; -+ if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){ -+ bh->b_rdev = dev->cow_dev; -+ bh->b_rsector += dev->data_offset / dev->sectorsize; -+ } -+ else if(rw == WRITE){ -+ bh->b_dev = dev->cow_dev; -+ bh->b_blocknr += dev->data_offset / dev->sectorsize; -+ -+ cow_add_bh(dev, bh); -+ up(&dev->io_sem); -+ return(0); -+ } -+ else { -+ bh->b_rdev = dev->backing_dev; -+ } -+ -+ return(1); -+} -+ -+int cow_init(void) -+{ -+ int i; -+ -+ cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL); -+ if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) { -+ printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR); -+ return -1; -+ } -+ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[MAJOR_NR] = blk_sizes; -+ blk_size[MAJOR_NR] = sizes; -+ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); -+ -+ cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); -+ blk_init_queue(cow_queue, NULL); -+ INIT_ELV(cow_queue, &cow_queue->elevator); -+ blk_queue_make_request(cow_queue, cow_make_request); -+ -+ add_gendisk(&cow_gendisk); -+ -+ for(i=0;i<MAX_DEV;i++) -+ cow_add(i); -+ -+ return(0); -+} -+ -+__initcall(cow_init); -+ -+static int reader(__u64 start, char *buf, int count, void *arg) -+{ -+ dev_t dev = *((dev_t *) arg); -+ struct buffer_head *bh; -+ __u64 block; -+ int cur, offset, left, n, blocksize = get_hardsect_size(dev); -+ -+ if(blocksize == 0) -+ panic("Zero blocksize"); -+ -+ block = start / blocksize; -+ offset = start % blocksize; -+ left = count; -+ cur = 0; -+ while(left > 0){ -+ n = (left > blocksize) ? blocksize : left; -+ -+ bh = bread(dev, block, (n < 512) ? 512 : n); -+ if(bh == NULL) -+ return(-EIO); -+ -+ n -= offset; -+ memcpy(&buf[cur], bh->b_data + offset, n); -+ block++; -+ left -= n; -+ cur += n; -+ offset = 0; -+ brelse(bh); -+ } -+ -+ return(count); -+} -+ -+static int cow_open(struct inode *inode, struct file *filp) -+{ -+ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, -+ unsigned long); -+ mm_segment_t fs; -+ struct cow *dev; -+ __u64 size; -+ __u32 version, align; -+ time_t mtime; -+ char *backing_file; -+ int n, offset, err = 0; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ offset = n << COW_SHIFT; -+ -+ spin_lock(&cow_lock); -+ -+ if(dev->count == 0){ -+ dev->cow_dev = name_to_kdev_t(dev->cow_path); -+ if(dev->cow_dev == 0){ -+ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " -+ "failed\n", dev->cow_path); -+ err = -ENODEV; -+ } -+ -+ dev->backing_dev = name_to_kdev_t(dev->backing_path); -+ if(dev->backing_dev == 0){ -+ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " -+ "failed\n", dev->backing_path); -+ err = -ENODEV; -+ } -+ -+ if(err) -+ goto out; -+ -+ dev->cow_bdev = bdget(dev->cow_dev); -+ if(dev->cow_bdev == NULL){ -+ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", -+ dev->cow_path); -+ err = -ENOMEM; -+ } -+ dev->backing_bdev = bdget(dev->backing_dev); -+ if(dev->backing_bdev == NULL){ -+ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", -+ dev->backing_path); -+ err = -ENOMEM; -+ } -+ -+ if(err) -+ goto out; -+ -+ err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, -+ BDEV_RAW); -+ if(err){ -+ printk("cow_open - blkdev_get of COW device failed, " -+ "error = %d\n", err); -+ goto out; -+ } -+ -+ err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW); -+ if(err){ -+ printk("cow_open - blkdev_get of backing device " -+ "failed, error = %d\n", err); -+ goto out; -+ } -+ -+ err = read_cow_header(reader, &dev->cow_dev, &version, -+ &backing_file, &mtime, &size, -+ &dev->sectorsize, &align, -+ &dev->bitmap_offset); -+ if(err){ -+ printk(KERN_ERR "cow_open - read_cow_header failed, " -+ "err = %d\n", err); -+ goto out; -+ } -+ -+ cow_sizes(version, size, dev->sectorsize, align, -+ dev->bitmap_offset, &dev->bitmap_len, -+ &dev->data_offset); -+ dev->bitmap = (void *) vmalloc(dev->bitmap_len); -+ if(dev->bitmap == NULL){ -+ err = -ENOMEM; -+ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); -+ goto out; -+ } -+ flush_tlb_kernel_vm(); -+ -+ err = reader(dev->bitmap_offset, (char *) dev->bitmap, -+ dev->bitmap_len, &dev->cow_dev); -+ if(err < 0){ -+ printk(KERN_ERR "Failed to read COW bitmap\n"); -+ vfree(dev->bitmap); -+ goto out; -+ } -+ -+ dev_ioctl = dev->backing_bdev->bd_op->ioctl; -+ fs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = (*dev_ioctl)(inode, filp, BLKGETSIZE, -+ (unsigned long) &sizes[offset]); -+ set_fs(fs); -+ if(err){ -+ printk(KERN_ERR "cow_open - BLKGETSIZE failed, " -+ "error = %d\n", err); -+ goto out; -+ } -+ -+ kernel_thread(cow_thread, dev, -+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND); -+ down(&dev->sem); -+ } -+ dev->count++; -+ out: -+ spin_unlock(&cow_lock); -+ return(err); -+} -+ -+static int cow_release(struct inode * inode, struct file * file) -+{ -+ struct cow *dev; -+ int n, err; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ -+ spin_lock(&cow_lock); -+ -+ if(--dev->count > 0) -+ goto out; -+ -+ err = blkdev_put(dev->cow_bdev, BDEV_RAW); -+ if(err) -+ printk("cow_release - blkdev_put of cow device failed, " -+ "error = %d\n", err); -+ bdput(dev->cow_bdev); -+ dev->cow_bdev = 0; -+ -+ err = blkdev_put(dev->backing_bdev, BDEV_RAW); -+ if(err) -+ printk("cow_release - blkdev_put of backing device failed, " -+ "error = %d\n", err); -+ bdput(dev->backing_bdev); -+ dev->backing_bdev = 0; -+ -+ out: -+ spin_unlock(&cow_lock); -+ return(0); -+} -+ -+static int cow_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct cow *dev; -+ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, -+ unsigned long); -+ int n; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ if(n >= MAX_DEV) -+ return(-ENODEV); -+ dev = &cow_dev[n]; -+ -+ dev_ioctl = dev->backing_bdev->bd_op->ioctl; -+ return((*dev_ioctl)(inode, file, cmd, arg)); -+} -+ -+static int cow_revalidate(kdev_t rdev) -+{ -+ printk(KERN_ERR "Need to implement cow_revalidate\n"); -+ return(0); -+} -+ -+static int parse_unit(char **ptr) -+{ -+ char *str = *ptr, *end; -+ int n = -1; -+ -+ if(isdigit(*str)) { -+ n = simple_strtoul(str, &end, 0); -+ if(end == str) -+ return(-1); -+ *ptr = end; -+ } -+ else if (('a' <= *str) && (*str <= 'h')) { -+ n = *str - 'a'; -+ str++; -+ *ptr = str; -+ } -+ return(n); -+} -+ -+static int cow_setup(char *str) -+{ -+ struct cow *dev; -+ char *cow_name, *backing_name; -+ int unit; -+ -+ unit = parse_unit(&str); -+ if(unit < 0){ -+ printk(KERN_ERR "cow_setup - Couldn't parse unit number\n"); -+ return(1); -+ } -+ -+ if(*str != '='){ -+ printk(KERN_ERR "cow_setup - Missing '=' after unit " -+ "number\n"); -+ return(1); -+ } -+ str++; -+ -+ cow_name = str; -+ backing_name = strchr(str, ','); -+ if(backing_name == NULL){ -+ printk(KERN_ERR "cow_setup - missing backing device name\n"); -+ return(0); -+ } -+ *backing_name = '\0'; -+ backing_name++; -+ -+ spin_lock(&cow_lock); -+ -+ dev = &cow_dev[unit]; -+ dev->cow_path = cow_name; -+ dev->backing_path = backing_name; -+ -+ spin_unlock(&cow_lock); -+ return(0); -+} -+ -+__setup("cow", cow_setup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/cow_sys.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/cow_sys.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/cow_sys.h 2005-05-03 22:43:34.768503800 +0300 -@@ -0,0 +1,48 @@ -+#ifndef __COW_SYS_H__ -+#define __COW_SYS_H__ -+ -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "user.h" -+ -+static inline void *cow_malloc(int size) -+{ -+ return(um_kmalloc(size)); -+} -+ -+static inline void cow_free(void *ptr) -+{ -+ kfree(ptr); -+} -+ -+#define cow_printf printk -+ -+static inline char *cow_strdup(char *str) -+{ -+ return(uml_strdup(str)); -+} -+ -+static inline int cow_seek_file(int fd, __u64 offset) -+{ -+ return(os_seek_file(fd, offset)); -+} -+ -+static inline int cow_file_size(char *file, __u64 *size_out) -+{ -+ return(os_file_size(file, size_out)); -+} -+ -+static inline int cow_write_file(int fd, char *buf, int size) -+{ -+ return(os_write_file(fd, buf, size)); -+} -+ -+#endif -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/cow_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/cow_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/cow_user.c 2005-05-03 22:28:14.203450960 +0300 -@@ -0,0 +1,375 @@ -+#include <stddef.h> -+#include <string.h> -+#include <errno.h> -+#include <unistd.h> -+#include <byteswap.h> -+#include <sys/time.h> -+#include <sys/param.h> -+#include <sys/user.h> -+#include <netinet/in.h> -+ -+#include "os.h" -+ -+#include "cow.h" -+#include "cow_sys.h" -+ -+#define PATH_LEN_V1 256 -+ -+struct cow_header_v1 { -+ int magic; -+ int version; -+ char backing_file[PATH_LEN_V1]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+#define PATH_LEN_V2 MAXPATHLEN -+ -+struct cow_header_v2 { -+ __u32 magic; -+ __u32 version; -+ char backing_file[PATH_LEN_V2]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in -+ * case other systems have different values for MAXPATHLEN -+ */ -+#define PATH_LEN_V3 4096 -+ -+/* Changes from V2 - -+ * PATH_LEN_V3 as described above -+ * Explicitly specify field bit lengths for systems with different -+ * lengths for the usual C types. Not sure whether char or -+ * time_t should be changed, this can be changed later without -+ * breaking compatibility -+ * Add alignment field so that different alignments can be used for the -+ * bitmap and data -+ * Add cow_format field to allow for the possibility of different ways -+ * of specifying the COW blocks. For now, the only value is 0, -+ * for the traditional COW bitmap. -+ * Move the backing_file field to the end of the header. This allows -+ * for the possibility of expanding it into the padding required -+ * by the bitmap alignment. -+ * The bitmap and data portions of the file will be aligned as specified -+ * by the alignment field. This is to allow COW files to be -+ * put on devices with restrictions on access alignments, such as -+ * /dev/raw, with a 512 byte alignment restriction. This also -+ * allows the data to be more aligned more strictly than on -+ * sector boundaries. This is needed for ubd-mmap, which needs -+ * the data to be page aligned. -+ * Fixed (finally!) the rounding bug -+ */ -+ -+struct cow_header_v3 { -+ __u32 magic; -+ __u32 version; -+ time_t mtime; -+ __u64 size; -+ __u32 sectorsize; -+ __u32 alignment; -+ __u32 cow_format; -+ char backing_file[PATH_LEN_V3]; -+}; -+ -+/* COW format definitions - for now, we have only the usual COW bitmap */ -+#define COW_BITMAP 0 -+ -+union cow_header { -+ struct cow_header_v1 v1; -+ struct cow_header_v2 v2; -+ struct cow_header_v3 v3; -+}; -+ -+#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -+#define COW_VERSION 3 -+ -+#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) -+#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) -+ -+void cow_sizes(int version, __u64 size, int sectorsize, int align, -+ int bitmap_offset, unsigned long *bitmap_len_out, -+ int *data_offset_out) -+{ -+ if(version < 3){ -+ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); -+ -+ *data_offset_out = bitmap_offset + *bitmap_len_out; -+ *data_offset_out = (*data_offset_out + sectorsize - 1) / -+ sectorsize; -+ *data_offset_out *= sectorsize; -+ } -+ else { -+ *bitmap_len_out = DIV_ROUND(size, sectorsize); -+ *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); -+ -+ *data_offset_out = bitmap_offset + *bitmap_len_out; -+ *data_offset_out = ROUND_UP(*data_offset_out, align); -+ } -+} -+ -+static int absolutize(char *to, int size, char *from) -+{ -+ char save_cwd[256], *slash; -+ int remaining; -+ -+ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { -+ cow_printf("absolutize : unable to get cwd - errno = %d\n", -+ errno); -+ return(-1); -+ } -+ slash = strrchr(from, '/'); -+ if(slash != NULL){ -+ *slash = '\0'; -+ if(chdir(from)){ -+ *slash = '/'; -+ cow_printf("absolutize : Can't cd to '%s' - " -+ "errno = %d\n", from, errno); -+ return(-1); -+ } -+ *slash = '/'; -+ if(getcwd(to, size) == NULL){ -+ cow_printf("absolutize : unable to get cwd of '%s' - " -+ "errno = %d\n", from, errno); -+ return(-1); -+ } -+ remaining = size - strlen(to); -+ if(strlen(slash) + 1 > remaining){ -+ cow_printf("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcat(to, slash); -+ } -+ else { -+ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ -+ cow_printf("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcpy(to, save_cwd); -+ strcat(to, "/"); -+ strcat(to, from); -+ } -+ chdir(save_cwd); -+ return(0); -+} -+ -+int write_cow_header(char *cow_file, int fd, char *backing_file, -+ int sectorsize, int alignment, long long *size) -+{ -+ struct cow_header_v3 *header; -+ unsigned long modtime; -+ int err; -+ -+ err = cow_seek_file(fd, 0); -+ if(err < 0){ -+ cow_printf("write_cow_header - lseek failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ err = -ENOMEM; -+ header = cow_malloc(sizeof(*header)); -+ if(header == NULL){ -+ cow_printf("Failed to allocate COW V3 header\n"); -+ goto out; -+ } -+ header->magic = htonl(COW_MAGIC); -+ header->version = htonl(COW_VERSION); -+ -+ err = -EINVAL; -+ if(strlen(backing_file) > sizeof(header->backing_file) - 1){ -+ cow_printf("Backing file name \"%s\" is too long - names are " -+ "limited to %d characters\n", backing_file, -+ sizeof(header->backing_file) - 1); -+ goto out_free; -+ } -+ -+ if(absolutize(header->backing_file, sizeof(header->backing_file), -+ backing_file)) -+ goto out_free; -+ -+ err = os_file_modtime(header->backing_file, &modtime); -+ if(err < 0){ -+ cow_printf("Backing file '%s' mtime request failed, " -+ "err = %d\n", header->backing_file, -err); -+ goto out_free; -+ } -+ -+ err = cow_file_size(header->backing_file, size); -+ if(err < 0){ -+ cow_printf("Couldn't get size of backing file '%s', " -+ "err = %d\n", header->backing_file, -err); -+ goto out_free; -+ } -+ -+ header->mtime = htonl(modtime); -+ header->size = htonll(*size); -+ header->sectorsize = htonl(sectorsize); -+ header->alignment = htonl(alignment); -+ header->cow_format = COW_BITMAP; -+ -+ err = os_write_file(fd, header, sizeof(*header)); -+ if(err != sizeof(*header)){ -+ cow_printf("Write of header to new COW file '%s' failed, " -+ "err = %d\n", cow_file, -err); -+ goto out_free; -+ } -+ err = 0; -+ out_free: -+ cow_free(header); -+ out: -+ return(err); -+} -+ -+int file_reader(__u64 offset, char *buf, int len, void *arg) -+{ -+ int fd = *((int *) arg); -+ -+ return(pread(fd, buf, len, offset)); -+} -+ -+/* XXX Need to sanity-check the values read from the header */ -+ -+int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, -+ __u32 *version_out, char **backing_file_out, -+ time_t *mtime_out, __u64 *size_out, -+ int *sectorsize_out, __u32 *align_out, -+ int *bitmap_offset_out) -+{ -+ union cow_header *header; -+ char *file; -+ int err, n; -+ unsigned long version, magic; -+ -+ header = cow_malloc(sizeof(*header)); -+ if(header == NULL){ -+ cow_printf("read_cow_header - Failed to allocate header\n"); -+ return(-ENOMEM); -+ } -+ err = -EINVAL; -+ n = (*reader)(0, (char *) header, sizeof(*header), arg); -+ if(n < offsetof(typeof(header->v1), backing_file)){ -+ cow_printf("read_cow_header - short header\n"); -+ goto out; -+ } -+ -+ magic = header->v1.magic; -+ if(magic == COW_MAGIC) { -+ version = header->v1.version; -+ } -+ else if(magic == ntohl(COW_MAGIC)){ -+ version = ntohl(header->v1.version); -+ } -+ /* No error printed because the non-COW case comes through here */ -+ else goto out; -+ -+ *version_out = version; -+ -+ if(version == 1){ -+ if(n < sizeof(header->v1)){ -+ cow_printf("read_cow_header - failed to read V1 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = header->v1.mtime; -+ *size_out = header->v1.size; -+ *sectorsize_out = header->v1.sectorsize; -+ *bitmap_offset_out = sizeof(header->v1); -+ *align_out = *sectorsize_out; -+ file = header->v1.backing_file; -+ } -+ else if(version == 2){ -+ if(n < sizeof(header->v2)){ -+ cow_printf("read_cow_header - failed to read V2 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = ntohl(header->v2.mtime); -+ *size_out = ntohll(header->v2.size); -+ *sectorsize_out = ntohl(header->v2.sectorsize); -+ *bitmap_offset_out = sizeof(header->v2); -+ *align_out = *sectorsize_out; -+ file = header->v2.backing_file; -+ } -+ else if(version == 3){ -+ if(n < sizeof(header->v3)){ -+ cow_printf("read_cow_header - failed to read V2 " -+ "header\n"); -+ goto out; -+ } -+ *mtime_out = ntohl(header->v3.mtime); -+ *size_out = ntohll(header->v3.size); -+ *sectorsize_out = ntohl(header->v3.sectorsize); -+ *align_out = ntohl(header->v3.alignment); -+ *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); -+ file = header->v3.backing_file; -+ } -+ else { -+ cow_printf("read_cow_header - invalid COW version\n"); -+ goto out; -+ } -+ err = -ENOMEM; -+ *backing_file_out = cow_strdup(file); -+ if(*backing_file_out == NULL){ -+ cow_printf("read_cow_header - failed to allocate backing " -+ "file\n"); -+ goto out; -+ } -+ err = 0; -+ out: -+ cow_free(header); -+ return(err); -+} -+ -+int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, -+ int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out) -+{ -+ __u64 size, offset; -+ char zero = 0; -+ int err; -+ -+ err = write_cow_header(cow_file, fd, backing_file, sectorsize, -+ alignment, &size); -+ if(err) -+ goto out; -+ -+ *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); -+ cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, -+ bitmap_len_out, data_offset_out); -+ -+ offset = *data_offset_out + size - sizeof(zero); -+ err = cow_seek_file(fd, offset); -+ if(err < 0){ -+ cow_printf("cow bitmap lseek failed : err = %d\n", -err); -+ goto out; -+ } -+ -+ /* does not really matter how much we write it is just to set EOF -+ * this also sets the entire COW bitmap -+ * to zero without having to allocate it -+ */ -+ err = cow_write_file(fd, &zero, sizeof(zero)); -+ if(err != sizeof(zero)){ -+ cow_printf("Write of bitmap to new COW file '%s' failed, " -+ "err = %d\n", cow_file, -err); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/daemon.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/daemon.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/daemon.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+#define SWITCH_VERSION 3 -+ -+struct daemon_data { -+ char *sock_type; -+ char *ctl_sock; -+ void *ctl_addr; -+ void *data_addr; -+ void *local_addr; -+ int fd; -+ int control; -+ void *dev; -+}; -+ -+extern struct net_user_info daemon_user_info; -+ -+extern int daemon_user_write(int fd, void *buf, int len, -+ struct daemon_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/daemon_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/daemon_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/daemon_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "daemon.h" -+ -+struct daemon_init { -+ char *sock_type; -+ char *ctl_sock; -+}; -+ -+void daemon_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct daemon_data *dpri; -+ struct daemon_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ dpri = (struct daemon_data *) pri->user; -+ *dpri = ((struct daemon_data) -+ { .sock_type = init->sock_type, -+ .ctl_sock = init->ctl_sock, -+ .ctl_addr = NULL, -+ .data_addr = NULL, -+ .local_addr = NULL, -+ .fd = -1, -+ .control = -1, -+ .dev = dev }); -+ -+ printk("daemon backend (uml_switch version %d) - %s:%s", -+ SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock); -+ printk("\n"); -+} -+ -+static int daemon_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int daemon_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(daemon_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct daemon_data *) &lp->user)); -+} -+ -+static struct net_kern_info daemon_kern_info = { -+ .init = daemon_init, -+ .protocol = eth_protocol, -+ .read = daemon_read, -+ .write = daemon_write, -+}; -+ -+int daemon_setup(char *str, char **mac_out, void *data) -+{ -+ struct daemon_init *init = data; -+ char *remain; -+ -+ *init = ((struct daemon_init) -+ { .sock_type = "unix", -+ .ctl_sock = "/tmp/uml.ctl" }); -+ -+ remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock, -+ NULL); -+ if(remain != NULL) -+ printk(KERN_WARNING "daemon_setup : Ignoring data socket " -+ "specification\n"); -+ -+ return(1); -+} -+ -+static struct transport daemon_transport = { -+ .list = LIST_HEAD_INIT(daemon_transport.list), -+ .name = "daemon", -+ .setup = daemon_setup, -+ .user = &daemon_user_info, -+ .kern = &daemon_kern_info, -+ .private_size = sizeof(struct daemon_data), -+ .setup_size = sizeof(struct daemon_init), -+}; -+ -+static int register_daemon(void) -+{ -+ register_transport(&daemon_transport); -+ return(1); -+} -+ -+__initcall(register_daemon); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/daemon_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/daemon_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/daemon_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,197 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include <errno.h> -+#include <unistd.h> -+#include <stdint.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/time.h> -+#include "net_user.h" -+#include "daemon.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+enum request_type { REQ_NEW_CONTROL }; -+ -+#define SWITCH_MAGIC 0xfeedface -+ -+struct request_v3 { -+ uint32_t magic; -+ uint32_t version; -+ enum request_type type; -+ struct sockaddr_un sock; -+}; -+ -+static struct sockaddr_un *new_addr(void *name, int len) -+{ -+ struct sockaddr_un *sun; -+ -+ sun = um_kmalloc(sizeof(struct sockaddr_un)); -+ if(sun == NULL){ -+ printk("new_addr: allocation of sockaddr_un failed\n"); -+ return(NULL); -+ } -+ sun->sun_family = AF_UNIX; -+ memcpy(sun->sun_path, name, len); -+ return(sun); -+} -+ -+static int connect_to_switch(struct daemon_data *pri) -+{ -+ struct sockaddr_un *ctl_addr = pri->ctl_addr; -+ struct sockaddr_un *local_addr = pri->local_addr; -+ struct sockaddr_un *sun; -+ struct request_v3 req; -+ int fd, n, err; -+ -+ pri->control = socket(AF_UNIX, SOCK_STREAM, 0); -+ if(pri->control < 0){ -+ printk("daemon_open : control socket failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ if(connect(pri->control, (struct sockaddr *) ctl_addr, -+ sizeof(*ctl_addr)) < 0){ -+ printk("daemon_open : control connect failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out; -+ } -+ -+ fd = socket(AF_UNIX, SOCK_DGRAM, 0); -+ if(fd < 0){ -+ printk("daemon_open : data socket failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out; -+ } -+ if(bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0){ -+ printk("daemon_open : data bind failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out_close; -+ } -+ -+ sun = um_kmalloc(sizeof(struct sockaddr_un)); -+ if(sun == NULL){ -+ printk("new_addr: allocation of sockaddr_un failed\n"); -+ err = -ENOMEM; -+ goto out_close; -+ } -+ -+ req.magic = SWITCH_MAGIC; -+ req.version = SWITCH_VERSION; -+ req.type = REQ_NEW_CONTROL; -+ req.sock = *local_addr; -+ n = os_write_file(pri->control, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ printk("daemon_open : control setup request failed, err = %d\n", -+ -n); -+ err = -ENOTCONN; -+ goto out; -+ } -+ -+ n = os_read_file(pri->control, sun, sizeof(*sun)); -+ if(n != sizeof(*sun)){ -+ printk("daemon_open : read of data socket failed, err = %d\n", -+ -n); -+ err = -ENOTCONN; -+ goto out_close; -+ } -+ -+ pri->data_addr = sun; -+ return(fd); -+ -+ out_close: -+ os_close_file(fd); -+ out: -+ os_close_file(pri->control); -+ return(err); -+} -+ -+static void daemon_user_init(void *data, void *dev) -+{ -+ struct daemon_data *pri = data; -+ struct timeval tv; -+ struct { -+ char zero; -+ int pid; -+ int usecs; -+ } name; -+ -+ if(!strcmp(pri->sock_type, "unix")) -+ pri->ctl_addr = new_addr(pri->ctl_sock, -+ strlen(pri->ctl_sock) + 1); -+ name.zero = 0; -+ name.pid = os_getpid(); -+ gettimeofday(&tv, NULL); -+ name.usecs = tv.tv_usec; -+ pri->local_addr = new_addr(&name, sizeof(name)); -+ pri->dev = dev; -+ pri->fd = connect_to_switch(pri); -+ if(pri->fd < 0){ -+ kfree(pri->local_addr); -+ pri->local_addr = NULL; -+ } -+} -+ -+static int daemon_open(void *data) -+{ -+ struct daemon_data *pri = data; -+ return(pri->fd); -+} -+ -+static void daemon_remove(void *data) -+{ -+ struct daemon_data *pri = data; -+ -+ os_close_file(pri->fd); -+ os_close_file(pri->control); -+ if(pri->data_addr != NULL) kfree(pri->data_addr); -+ if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); -+ if(pri->local_addr != NULL) kfree(pri->local_addr); -+} -+ -+int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) -+{ -+ struct sockaddr_un *data_addr = pri->data_addr; -+ -+ return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); -+} -+ -+static int daemon_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info daemon_user_info = { -+ .init = daemon_user_init, -+ .open = daemon_open, -+ .close = NULL, -+ .remove = daemon_remove, -+ .set_mtu = daemon_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/fd.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/fd.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/fd.c 2005-05-03 22:28:14.208450200 +0300 -@@ -0,0 +1,108 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <termios.h> -+#include <errno.h> -+#include "user.h" -+#include "user_util.h" -+#include "chan_user.h" -+ -+struct fd_chan { -+ int fd; -+ int raw; -+ struct termios tt; -+ char str[sizeof("1234567890\0")]; -+}; -+ -+void *fd_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct fd_chan *data; -+ char *end; -+ int n; -+ -+ if(*str != ':'){ -+ printk("fd_init : channel type 'fd' must specify a file " -+ "descriptor\n"); -+ return(NULL); -+ } -+ str++; -+ n = strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk("fd_init : couldn't parse file descriptor '%s'\n", str); -+ return(NULL); -+ } -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) return(NULL); -+ *data = ((struct fd_chan) { .fd = n, -+ .raw = opts->raw }); -+ return(data); -+} -+ -+int fd_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct fd_chan *data = d; -+ int err; -+ -+ if(data->raw && isatty(data->fd)){ -+ CATCH_EINTR(err = tcgetattr(data->fd, &data->tt)); -+ if(err) -+ return(err); -+ -+ err = raw(data->fd); -+ if(err) -+ return(err); -+ } -+ sprintf(data->str, "%d", data->fd); -+ *dev_out = data->str; -+ return(data->fd); -+} -+ -+void fd_close(int fd, void *d) -+{ -+ struct fd_chan *data = d; -+ int err; -+ -+ if(data->raw && isatty(fd)){ -+ CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &data->tt)); -+ if(err) -+ printk("Failed to restore terminal state - " -+ "errno = %d\n", -err); -+ data->raw = 0; -+ } -+} -+ -+int fd_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct fd_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops fd_ops = { -+ .type = "fd", -+ .init = fd_init, -+ .open = fd_open, -+ .close = fd_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = fd_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 1, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/harddog_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/harddog_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/harddog_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,194 @@ -+/* UML hardware watchdog, shamelessly stolen from: -+ * -+ * SoftDog 0.05: A Software Watchdog Device -+ * -+ * (c) Copyright 1996 Alan Cox <alan@redhat.com>, All Rights Reserved. -+ * http://www.redhat.com -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * Neither Alan Cox nor CymruNet Ltd. admit liability nor provide -+ * warranty for any of this software. This material is provided -+ * "AS-IS" and at no charge. -+ * -+ * (c) Copyright 1995 Alan Cox <alan@lxorguk.ukuu.org.uk> -+ * -+ * Software only watchdog driver. Unlike its big brother the WDT501P -+ * driver this won't always recover a failed machine. -+ * -+ * 03/96: Angelo Haritsis <ah@doc.ic.ac.uk> : -+ * Modularised. -+ * Added soft_margin; use upon insmod to change the timer delay. -+ * NB: uses same minor as wdt (WATCHDOG_MINOR); we could use separate -+ * minors. -+ * -+ * 19980911 Alan Cox -+ * Made SMP safe for 2.3.x -+ * -+ * 20011127 Joel Becker (jlbec@evilplan.org> -+ * Added soft_noboot; Allows testing the softdog trigger without -+ * requiring a recompile. -+ * Added WDIOC_GETTIMEOUT and WDIOC_SETTIMOUT. -+ */ -+ -+#include <linux/module.h> -+#include <linux/config.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/fs.h> -+#include <linux/mm.h> -+#include <linux/miscdevice.h> -+#include <linux/watchdog.h> -+#include <linux/reboot.h> -+#include <linux/smp_lock.h> -+#include <linux/init.h> -+#include <asm/uaccess.h> -+#include "helper.h" -+#include "mconsole.h" -+ -+MODULE_LICENSE("GPL"); -+ -+/* Locked by the BKL in harddog_open and harddog_release */ -+static int timer_alive; -+static int harddog_in_fd = -1; -+static int harddog_out_fd = -1; -+ -+/* -+ * Allow only one person to hold it open -+ */ -+ -+extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); -+ -+static int harddog_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ char *sock = NULL; -+ -+ lock_kernel(); -+ if(timer_alive) -+ return -EBUSY; -+#ifdef CONFIG_HARDDOG_NOWAYOUT -+ MOD_INC_USE_COUNT; -+#endif -+ -+#ifdef CONFIG_MCONSOLE -+ sock = mconsole_notify_socket(); -+#endif -+ err = start_watchdog(&harddog_in_fd, &harddog_out_fd, sock); -+ if(err) return(err); -+ -+ timer_alive = 1; -+ unlock_kernel(); -+ return 0; -+} -+ -+extern void stop_watchdog(int in_fd, int out_fd); -+ -+static int harddog_release(struct inode *inode, struct file *file) -+{ -+ /* -+ * Shut off the timer. -+ */ -+ lock_kernel(); -+ -+ stop_watchdog(harddog_in_fd, harddog_out_fd); -+ harddog_in_fd = -1; -+ harddog_out_fd = -1; -+ -+ timer_alive=0; -+ unlock_kernel(); -+ return 0; -+} -+ -+extern int ping_watchdog(int fd); -+ -+static ssize_t harddog_write(struct file *file, const char *data, size_t len, -+ loff_t *ppos) -+{ -+ /* Can't seek (pwrite) on this device */ -+ if (ppos != &file->f_pos) -+ return -ESPIPE; -+ -+ /* -+ * Refresh the timer. -+ */ -+ if(len) -+ return(ping_watchdog(harddog_out_fd)); -+ return 0; -+} -+ -+static int harddog_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ static struct watchdog_info ident = { -+ WDIOF_SETTIMEOUT, -+ 0, -+ "UML Hardware Watchdog" -+ }; -+ switch (cmd) { -+ default: -+ return -ENOTTY; -+ case WDIOC_GETSUPPORT: -+ if(copy_to_user((struct harddog_info *)arg, &ident, -+ sizeof(ident))) -+ return -EFAULT; -+ return 0; -+ case WDIOC_GETSTATUS: -+ case WDIOC_GETBOOTSTATUS: -+ return put_user(0,(int *)arg); -+ case WDIOC_KEEPALIVE: -+ return(ping_watchdog(harddog_out_fd)); -+ } -+} -+ -+static struct file_operations harddog_fops = { -+ .owner = THIS_MODULE, -+ .write = harddog_write, -+ .ioctl = harddog_ioctl, -+ .open = harddog_open, -+ .release = harddog_release, -+}; -+ -+static struct miscdevice harddog_miscdev = { -+ .minor = WATCHDOG_MINOR, -+ .name = "watchdog", -+ .fops = &harddog_fops, -+}; -+ -+static char banner[] __initdata = KERN_INFO "UML Watchdog Timer\n"; -+ -+static int __init harddog_init(void) -+{ -+ int ret; -+ -+ ret = misc_register(&harddog_miscdev); -+ -+ if (ret) -+ return ret; -+ -+ printk(banner); -+ -+ return(0); -+} -+ -+static void __exit harddog_exit(void) -+{ -+ misc_deregister(&harddog_miscdev); -+} -+ -+module_init(harddog_init); -+module_exit(harddog_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/harddog_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/harddog_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/harddog_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <errno.h> -+#include "user_util.h" -+#include "user.h" -+#include "helper.h" -+#include "mconsole.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+struct dog_data { -+ int stdin; -+ int stdout; -+ int close_me[2]; -+}; -+ -+static void pre_exec(void *d) -+{ -+ struct dog_data *data = d; -+ -+ dup2(data->stdin, 0); -+ dup2(data->stdout, 1); -+ dup2(data->stdout, 2); -+ os_close_file(data->stdin); -+ os_close_file(data->stdout); -+ os_close_file(data->close_me[0]); -+ os_close_file(data->close_me[1]); -+} -+ -+int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) -+{ -+ struct dog_data data; -+ int in_fds[2], out_fds[2], pid, n, err; -+ char pid_buf[sizeof("nnnnn\0")], c; -+ char *pid_args[] = { "/usr/bin/uml_watchdog", "-pid", pid_buf, NULL }; -+ char *mconsole_args[] = { "/usr/bin/uml_watchdog", "-mconsole", NULL, -+ NULL }; -+ char **args = NULL; -+ -+ err = os_pipe(in_fds, 1, 0); -+ if(err < 0){ -+ printk("harddog_open - os_pipe failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ err = os_pipe(out_fds, 1, 0); -+ if(err < 0){ -+ printk("harddog_open - os_pipe failed, err = %d\n", -err); -+ goto out_close_in; -+ } -+ -+ data.stdin = out_fds[0]; -+ data.stdout = in_fds[1]; -+ data.close_me[0] = out_fds[1]; -+ data.close_me[1] = in_fds[0]; -+ -+ if(sock != NULL){ -+ mconsole_args[2] = sock; -+ args = mconsole_args; -+ } -+ else { -+ /* XXX The os_getpid() is not SMP correct */ -+ sprintf(pid_buf, "%d", CHOOSE_MODE(tracing_pid, os_getpid())); -+ args = pid_args; -+ } -+ -+ pid = run_helper(pre_exec, &data, args, NULL); -+ -+ os_close_file(out_fds[0]); -+ os_close_file(in_fds[1]); -+ -+ if(pid < 0){ -+ err = -pid; -+ printk("harddog_open - run_helper failed, errno = %d\n", -err); -+ goto out_close_out; -+ } -+ -+ n = os_read_file(in_fds[0], &c, sizeof(c)); -+ if(n == 0){ -+ printk("harddog_open - EOF on watchdog pipe\n"); -+ helper_wait(pid); -+ err = -EIO; -+ goto out_close_out; -+ } -+ else if(n < 0){ -+ printk("harddog_open - read of watchdog pipe failed, " -+ "err = %d\n", -n); -+ helper_wait(pid); -+ err = n; -+ goto out_close_out; -+ } -+ *in_fd_ret = in_fds[0]; -+ *out_fd_ret = out_fds[1]; -+ return(0); -+ -+ out_close_in: -+ os_close_file(in_fds[0]); -+ os_close_file(in_fds[1]); -+ out_close_out: -+ os_close_file(out_fds[0]); -+ os_close_file(out_fds[1]); -+ out: -+ return(err); -+} -+ -+void stop_watchdog(int in_fd, int out_fd) -+{ -+ os_close_file(in_fd); -+ os_close_file(out_fd); -+} -+ -+int ping_watchdog(int fd) -+{ -+ int n; -+ char c = '\n'; -+ -+ n = os_write_file(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("ping_watchdog - write failed, err = %d\n", -n); -+ if(n < 0) -+ return(n); -+ return(-EIO); -+ } -+ return 1; -+ -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/hostaudio_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/hostaudio_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/hostaudio_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,352 @@ -+/* -+ * Copyright (C) 2002 Steve Schmidtke -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/slab.h" -+#include "linux/fs.h" -+#include "linux/sound.h" -+#include "linux/soundcard.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+#include "init.h" -+#include "os.h" -+ -+struct hostaudio_state { -+ int fd; -+}; -+ -+struct hostmixer_state { -+ int fd; -+}; -+ -+#define HOSTAUDIO_DEV_DSP "/dev/sound/dsp" -+#define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer" -+ -+/* Only changed from linux_main at boot time */ -+char *dsp = HOSTAUDIO_DEV_DSP; -+char *mixer = HOSTAUDIO_DEV_MIXER; -+ -+#define DSP_HELP \ -+" This is used to specify the host dsp device to the hostaudio driver.\n" \ -+" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" -+ -+#define MIXER_HELP \ -+" This is used to specify the host mixer device to the hostaudio driver.\n"\ -+" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" -+ -+#ifndef MODULE -+static int set_dsp(char *name, int *add) -+{ -+ dsp = name; -+ return(0); -+} -+ -+__uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP); -+ -+static int set_mixer(char *name, int *add) -+{ -+ mixer = name; -+ return(0); -+} -+ -+__uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP); -+ -+#else /*MODULE*/ -+ -+MODULE_PARM(dsp, "s"); -+MODULE_PARM_DESC(dsp, DSP_HELP); -+ -+MODULE_PARM(mixer, "s"); -+MODULE_PARM_DESC(mixer, MIXER_HELP); -+ -+#endif -+ -+/* /dev/dsp file operations */ -+ -+static ssize_t hostaudio_read(struct file *file, char *buffer, size_t count, -+ loff_t *ppos) -+{ -+ struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: read called, count = %d\n", count); -+#endif -+ -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ err = os_read_file(state->fd, kbuf, count); -+ if(err < 0) -+ goto out; -+ -+ if(copy_to_user(buffer, kbuf, err)) -+ err = -EFAULT; -+ -+ out: -+ kfree(kbuf); -+ return(err); -+} -+ -+static ssize_t hostaudio_write(struct file *file, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: write called, count = %d\n", count); -+#endif -+ -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ err = -EFAULT; -+ if(copy_from_user(kbuf, buffer, count)) -+ goto out; -+ -+ err = os_write_file(state->fd, kbuf, count); -+ if(err < 0) -+ goto out; -+ *ppos += err; -+ -+ out: -+ kfree(kbuf); -+ return(err); -+} -+ -+static unsigned int hostaudio_poll(struct file *file, -+ struct poll_table_struct *wait) -+{ -+ unsigned int mask = 0; -+ -+#ifdef DEBUG -+ printk("hostaudio: poll called (unimplemented)\n"); -+#endif -+ -+ return(mask); -+} -+ -+static int hostaudio_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hostaudio_state *state = file->private_data; -+ unsigned long data = 0; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: ioctl called, cmd = %u\n", cmd); -+#endif -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(get_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } -+ -+ err = os_ioctl_generic(state->fd, cmd, (unsigned long) &data); -+ -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(put_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } -+ -+ return(err); -+} -+ -+static int hostaudio_open(struct inode *inode, struct file *file) -+{ -+ struct hostaudio_state *state; -+ int r = 0, w = 0; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostaudio: open called (host: %s)\n", dsp); -+#endif -+ -+ state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); -+ if(state == NULL) -+ return(-ENOMEM); -+ -+ if(file->f_mode & FMODE_READ) r = 1; -+ if(file->f_mode & FMODE_WRITE) w = 1; -+ -+ ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); -+ if(ret < 0){ -+ kfree(state); -+ return(ret); -+ } -+ -+ state->fd = ret; -+ file->private_data = state; -+ return(0); -+} -+ -+static int hostaudio_release(struct inode *inode, struct file *file) -+{ -+ struct hostaudio_state *state = file->private_data; -+ -+#ifdef DEBUG -+ printk("hostaudio: release called\n"); -+#endif -+ -+ os_close_file(state->fd); -+ kfree(state); -+ -+ return(0); -+} -+ -+/* /dev/mixer file operations */ -+ -+static int hostmixer_ioctl_mixdev(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hostmixer_state *state = file->private_data; -+ -+#ifdef DEBUG -+ printk("hostmixer: ioctl called\n"); -+#endif -+ -+ return(os_ioctl_generic(state->fd, cmd, arg)); -+} -+ -+static int hostmixer_open_mixdev(struct inode *inode, struct file *file) -+{ -+ struct hostmixer_state *state; -+ int r = 0, w = 0; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostmixer: open called (host: %s)\n", mixer); -+#endif -+ -+ state = kmalloc(sizeof(struct hostmixer_state), GFP_KERNEL); -+ if(state == NULL) return(-ENOMEM); -+ -+ if(file->f_mode & FMODE_READ) r = 1; -+ if(file->f_mode & FMODE_WRITE) w = 1; -+ -+ ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); -+ -+ if(ret < 0){ -+ printk("hostaudio_open_mixdev failed to open '%s', err = %d\n", -+ dsp, -ret); -+ kfree(state); -+ return(ret); -+ } -+ -+ file->private_data = state; -+ return(0); -+} -+ -+static int hostmixer_release(struct inode *inode, struct file *file) -+{ -+ struct hostmixer_state *state = file->private_data; -+ -+#ifdef DEBUG -+ printk("hostmixer: release called\n"); -+#endif -+ -+ os_close_file(state->fd); -+ kfree(state); -+ -+ return(0); -+} -+ -+ -+/* kernel module operations */ -+ -+static struct file_operations hostaudio_fops = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .read = hostaudio_read, -+ .write = hostaudio_write, -+ .poll = hostaudio_poll, -+ .ioctl = hostaudio_ioctl, -+ .mmap = NULL, -+ .open = hostaudio_open, -+ .release = hostaudio_release, -+}; -+ -+static struct file_operations hostmixer_fops = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .ioctl = hostmixer_ioctl_mixdev, -+ .open = hostmixer_open_mixdev, -+ .release = hostmixer_release, -+}; -+ -+struct { -+ int dev_audio; -+ int dev_mixer; -+} module_data; -+ -+MODULE_AUTHOR("Steve Schmidtke"); -+MODULE_DESCRIPTION("UML Audio Relay"); -+MODULE_LICENSE("GPL"); -+ -+static int __init hostaudio_init_module(void) -+{ -+ printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", -+ dsp, mixer); -+ -+ module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); -+ if(module_data.dev_audio < 0){ -+ printk(KERN_ERR "hostaudio: couldn't register DSP device!\n"); -+ return -ENODEV; -+ } -+ -+ module_data.dev_mixer = register_sound_mixer(&hostmixer_fops, -1); -+ if(module_data.dev_mixer < 0){ -+ printk(KERN_ERR "hostmixer: couldn't register mixer " -+ "device!\n"); -+ unregister_sound_dsp(module_data.dev_audio); -+ return -ENODEV; -+ } -+ -+ return 0; -+} -+ -+static void __exit hostaudio_cleanup_module (void) -+{ -+ unregister_sound_mixer(module_data.dev_mixer); -+ unregister_sound_dsp(module_data.dev_audio); -+} -+ -+module_init(hostaudio_init_module); -+module_exit(hostaudio_cleanup_module); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/line.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/line.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/line.c 2005-05-03 22:28:14.214449288 +0300 -@@ -0,0 +1,610 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "linux/list.h" -+#include "linux/devfs_fs_kernel.h" -+#include "asm/irq.h" -+#include "asm/uaccess.h" -+#include "chan_kern.h" -+#include "irq_user.h" -+#include "line.h" -+#include "kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "os.h" -+#include "irq_kern.h" -+ -+#define LINE_BUFSIZE 4096 -+ -+static void line_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct line *dev = data; -+ -+ if(dev->count > 0) -+ chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, -+ dev); -+} -+ -+static void line_timer_cb(void *arg) -+{ -+ struct line *dev = arg; -+ -+ line_interrupt(dev->driver->read_irq, dev, NULL); -+} -+ -+static int write_room(struct line *dev) -+{ -+ int n; -+ -+ if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); -+ -+ n = dev->head - dev->tail; -+ if(n <= 0) n = LINE_BUFSIZE + n; -+ return(n - 1); -+} -+ -+static int buffer_data(struct line *line, const char *buf, int len) -+{ -+ int end, room; -+ -+ if(line->buffer == NULL){ -+ line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); -+ if(line->buffer == NULL){ -+ printk("buffer_data - atomic allocation failed\n"); -+ return(0); -+ } -+ line->head = line->buffer; -+ line->tail = line->buffer; -+ } -+ -+ room = write_room(line); -+ len = (len > room) ? room : len; -+ -+ end = line->buffer + LINE_BUFSIZE - line->tail; -+ if(len < end){ -+ memcpy(line->tail, buf, len); -+ line->tail += len; -+ } -+ else { -+ memcpy(line->tail, buf, end); -+ buf += end; -+ len -= end; -+ memcpy(line->buffer, buf, len); -+ line->tail = line->buffer + len; -+ } -+ -+ return(len); -+} -+ -+static int flush_buffer(struct line *line) -+{ -+ int n, count; -+ -+ if((line->buffer == NULL) || (line->head == line->tail)) return(1); -+ -+ if(line->tail < line->head){ -+ count = line->buffer + LINE_BUFSIZE - line->head; -+ n = write_chan(&line->chan_list, line->head, count, -+ line->driver->write_irq); -+ if(n < 0) return(n); -+ if(n == count) line->head = line->buffer; -+ else { -+ line->head += n; -+ return(0); -+ } -+ } -+ -+ count = line->tail - line->head; -+ n = write_chan(&line->chan_list, line->head, count, -+ line->driver->write_irq); -+ if(n < 0) return(n); -+ -+ line->head += n; -+ return(line->head == line->tail); -+} -+ -+int line_write(struct line *lines, struct tty_struct *tty, int from_user, -+ const char *buf, int len) -+{ -+ struct line *line; -+ char *new; -+ unsigned long flags; -+ int n, err, i, ret = 0; -+ -+ if(tty->stopped) return 0; -+ -+ if(from_user){ -+ new = kmalloc(len, GFP_KERNEL); -+ if(new == NULL) -+ return(0); -+ n = copy_from_user(new, buf, len); -+ buf = new; -+ if(n == len){ -+ len = -EFAULT; -+ goto out_free; -+ } -+ -+ len -= n; -+ } -+ -+ i = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[i]; -+ -+ down(&line->sem); -+ if(line->head != line->tail){ -+ local_irq_save(flags); -+ ret += buffer_data(line, buf, len); -+ err = flush_buffer(line); -+ local_irq_restore(flags); -+ if(err <= 0) -+ goto out_up; -+ } -+ else { -+ n = write_chan(&line->chan_list, buf, len, -+ line->driver->write_irq); -+ if(n < 0){ -+ ret = n; -+ goto out_up; -+ } -+ -+ len -= n; -+ ret += n; -+ if(len > 0) -+ ret += buffer_data(line, buf + n, len); -+ } -+ out_up: -+ up(&line->sem); -+ -+ out_free: -+ if(from_user) -+ kfree(buf); -+ return(ret); -+} -+ -+static void line_write_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct line *dev = data; -+ struct tty_struct *tty = dev->tty; -+ int err; -+ -+ err = flush_buffer(dev); -+ if(err == 0) return; -+ else if(err < 0){ -+ dev->head = dev->buffer; -+ dev->tail = dev->buffer; -+ } -+ -+ if(tty == NULL) return; -+ -+ if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && -+ (tty->ldisc.write_wakeup != NULL)) -+ (tty->ldisc.write_wakeup)(tty); -+ -+ /* BLOCKING mode -+ * In blocking mode, everything sleeps on tty->write_wait. -+ * Sleeping in the console driver would break non-blocking -+ * writes. -+ */ -+ -+ if (waitqueue_active(&tty->write_wait)) -+ wake_up_interruptible(&tty->write_wait); -+ -+} -+ -+int line_setup_irq(int fd, int input, int output, void *data) -+{ -+ struct line *line = data; -+ struct line_driver *driver = line->driver; -+ int err = 0, flags = SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM; -+ -+ if(input) err = um_request_irq(driver->read_irq, fd, IRQ_READ, -+ line_interrupt, flags, -+ driver->read_irq_name, line); -+ if(err) return(err); -+ if(output) err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, -+ line_write_interrupt, flags, -+ driver->write_irq_name, line); -+ line->have_irq = 1; -+ return(err); -+} -+ -+void line_disable(struct line *line, int current_irq) -+{ -+ if(!line->have_irq) return; -+ -+ if(line->driver->read_irq == current_irq) -+ free_irq_later(line->driver->read_irq, line); -+ else -+ free_irq(line->driver->read_irq, line); -+ -+ if(line->driver->write_irq == current_irq) -+ free_irq_later(line->driver->write_irq, line); -+ else -+ free_irq(line->driver->write_irq, line); -+ -+ line->have_irq = 0; -+} -+ -+int line_open(struct line *lines, struct tty_struct *tty, -+ struct chan_opts *opts) -+{ -+ struct line *line; -+ int n, err = 0; -+ -+ if(tty == NULL) n = 0; -+ else n = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[n]; -+ -+ down(&line->sem); -+ if(line->count == 0){ -+ if(!line->valid){ -+ err = -ENODEV; -+ goto out; -+ } -+ if(list_empty(&line->chan_list)){ -+ err = parse_chan_pair(line->init_str, &line->chan_list, -+ line->init_pri, n, opts); -+ if(err) goto out; -+ err = open_chan(&line->chan_list); -+ if(err) goto out; -+ } -+ enable_chan(&line->chan_list, line); -+ INIT_TQUEUE(&line->task, line_timer_cb, line); -+ } -+ -+ if(!line->sigio){ -+ chan_enable_winch(&line->chan_list, line); -+ line->sigio = 1; -+ } -+ -+ /* This is outside the if because the initial console is opened -+ * with tty == NULL -+ */ -+ line->tty = tty; -+ -+ if(tty != NULL){ -+ tty->driver_data = line; -+ chan_window_size(&line->chan_list, &tty->winsize.ws_row, -+ &tty->winsize.ws_col); -+ } -+ -+ line->count++; -+ out: -+ up(&line->sem); -+ return(err); -+} -+ -+void line_close(struct line *lines, struct tty_struct *tty) -+{ -+ struct line *line; -+ int n; -+ -+ if(tty == NULL) n = 0; -+ else n = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[n]; -+ -+ down(&line->sem); -+ line->count--; -+ -+ /* I don't like this, but I can't think of anything better. What's -+ * going on is that the tty is in the process of being closed for -+ * the last time. Its count hasn't been dropped yet, so it's still -+ * at 1. This may happen when line->count != 0 because of the initial -+ * console open (without a tty) bumping it up to 1. -+ */ -+ if((line->tty != NULL) && (line->tty->count == 1)) -+ line->tty = NULL; -+ if(line->count == 0) -+ line_disable(line, -1); -+ up(&line->sem); -+} -+ -+void close_lines(struct line *lines, int nlines) -+{ -+ int i; -+ -+ for(i = 0; i < nlines; i++) -+ close_chan(&lines[i].chan_list); -+} -+ -+int line_setup(struct line *lines, int num, char *init, int all_allowed) -+{ -+ int i, n; -+ char *end; -+ -+ if(*init == '=') n = -1; -+ else { -+ n = simple_strtoul(init, &end, 0); -+ if(*end != '='){ -+ printk(KERN_ERR "line_setup failed to parse \"%s\"\n", -+ init); -+ return(0); -+ } -+ init = end; -+ } -+ init++; -+ if((n >= 0) && (n >= num)){ -+ printk("line_setup - %d out of range ((0 ... %d) allowed)\n", -+ n, num); -+ return(0); -+ } -+ else if(n >= 0){ -+ if(lines[n].count > 0){ -+ printk("line_setup - device %d is open\n", n); -+ return(0); -+ } -+ if(lines[n].init_pri <= INIT_ONE){ -+ lines[n].init_pri = INIT_ONE; -+ if(!strcmp(init, "none")) lines[n].valid = 0; -+ else { -+ lines[n].init_str = init; -+ lines[n].valid = 1; -+ } -+ } -+ } -+ else if(!all_allowed){ -+ printk("line_setup - can't configure all devices from " -+ "mconsole\n"); -+ return(0); -+ } -+ else { -+ for(i = 0; i < num; i++){ -+ if(lines[i].init_pri <= INIT_ALL){ -+ lines[i].init_pri = INIT_ALL; -+ if(!strcmp(init, "none")) lines[i].valid = 0; -+ else { -+ lines[i].init_str = init; -+ lines[i].valid = 1; -+ } -+ } -+ } -+ } -+ return(1); -+} -+ -+int line_config(struct line *lines, int num, char *str) -+{ -+ char *new = uml_strdup(str); -+ -+ if(new == NULL){ -+ printk("line_config - uml_strdup failed\n"); -+ return(-ENOMEM); -+ } -+ return(!line_setup(lines, num, new, 0)); -+} -+ -+int line_get_config(char *name, struct line *lines, int num, char *str, -+ int size, char **error_out) -+{ -+ struct line *line; -+ char *end; -+ int dev, n = 0; -+ -+ dev = simple_strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ *error_out = "line_get_config failed to parse device number"; -+ return(0); -+ } -+ -+ if((dev < 0) || (dev >= num)){ -+ *error_out = "device number of of range"; -+ return(0); -+ } -+ -+ line = &lines[dev]; -+ -+ down(&line->sem); -+ if(!line->valid) -+ CONFIG_CHUNK(str, size, n, "none", 1); -+ else if(line->count == 0) -+ CONFIG_CHUNK(str, size, n, line->init_str, 1); -+ else n = chan_config_string(&line->chan_list, str, size, error_out); -+ up(&line->sem); -+ -+ return(n); -+} -+ -+int line_remove(struct line *lines, int num, char *str) -+{ -+ char config[sizeof("conxxxx=none\0")]; -+ -+ sprintf(config, "%s=none", str); -+ return(!line_setup(lines, num, config, 0)); -+} -+ -+static int line_write_room(struct tty_struct *tty) -+{ -+ struct line *dev = tty->driver_data; -+ -+ return(write_room(dev)); -+} -+ -+void line_register_devfs(struct lines *set, struct line_driver *line_driver, -+ struct tty_driver *driver, struct line *lines, -+ int nlines) -+{ -+ int err, i, n; -+ char *from, *to; -+ -+ driver->driver_name = line_driver->name; -+ driver->name = line_driver->devfs_name; -+ driver->major = line_driver->major; -+ driver->minor_start = line_driver->minor_start; -+ driver->type = line_driver->type; -+ driver->subtype = line_driver->subtype; -+ driver->magic = TTY_DRIVER_MAGIC; -+ driver->flags = TTY_DRIVER_REAL_RAW; -+ -+ n = set->num; -+ driver->num = n; -+ driver->table = kmalloc(n * sizeof(driver->table[0]), GFP_KERNEL); -+ driver->termios = kmalloc(n * sizeof(driver->termios[0]), GFP_KERNEL); -+ driver->termios_locked = kmalloc(n * sizeof(driver->termios_locked[0]), -+ GFP_KERNEL); -+ if((driver->table == NULL) || (driver->termios == NULL) || -+ (driver->termios_locked == NULL)) -+ panic("Failed to allocate driver table"); -+ -+ memset(driver->table, 0, n * sizeof(driver->table[0])); -+ memset(driver->termios, 0, n * sizeof(driver->termios[0])); -+ memset(driver->termios_locked, 0, -+ n * sizeof(driver->termios_locked[0])); -+ -+ driver->write_room = line_write_room; -+ driver->init_termios = tty_std_termios; -+ -+ if (tty_register_driver(driver)) -+ panic("line_register_devfs : Couldn't register driver\n"); -+ -+ from = line_driver->symlink_from; -+ to = line_driver->symlink_to; -+ err = devfs_mk_symlink(NULL, from, 0, to, NULL, NULL); -+ if(err) printk("Symlink creation from /dev/%s to /dev/%s " -+ "returned %d\n", from, to, err); -+ -+ for(i = 0; i < nlines; i++){ -+ if(!lines[i].valid) -+ tty_unregister_devfs(driver, driver->minor_start + i); -+ } -+ -+ mconsole_register_dev(&line_driver->mc); -+} -+ -+void lines_init(struct line *lines, int nlines) -+{ -+ struct line *line; -+ int i; -+ -+ for(i = 0; i < nlines; i++){ -+ line = &lines[i]; -+ INIT_LIST_HEAD(&line->chan_list); -+ sema_init(&line->sem, 1); -+ if(line->init_str != NULL){ -+ line->init_str = uml_strdup(line->init_str); -+ if(line->init_str == NULL) -+ printk("lines_init - uml_strdup returned " -+ "NULL\n"); -+ } -+ } -+} -+ -+struct winch { -+ struct list_head list; -+ int fd; -+ int tty_fd; -+ int pid; -+ struct line *line; -+}; -+ -+void winch_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct winch *winch = data; -+ struct tty_struct *tty; -+ int err; -+ char c; -+ -+ if(winch->fd != -1){ -+ err = generic_read(winch->fd, &c, NULL); -+ if(err < 0){ -+ if(err != -EAGAIN){ -+ printk("winch_interrupt : read failed, " -+ "errno = %d\n", -err); -+ printk("fd %d is losing SIGWINCH support\n", -+ winch->tty_fd); -+ return; -+ } -+ goto out; -+ } -+ } -+ tty = winch->line->tty; -+ if(tty != NULL){ -+ chan_window_size(&winch->line->chan_list, -+ &tty->winsize.ws_row, -+ &tty->winsize.ws_col); -+ kill_pg(tty->pgrp, SIGWINCH, 1); -+ } -+ out: -+ if(winch->fd != -1) -+ reactivate_fd(winch->fd, WINCH_IRQ); -+} -+ -+DECLARE_MUTEX(winch_handler_sem); -+LIST_HEAD(winch_handlers); -+ -+void register_winch_irq(int fd, int tty_fd, int pid, void *line) -+{ -+ struct winch *winch; -+ -+ down(&winch_handler_sem); -+ winch = kmalloc(sizeof(*winch), GFP_KERNEL); -+ if(winch == NULL){ -+ printk("register_winch_irq - kmalloc failed\n"); -+ goto out; -+ } -+ *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), -+ .fd = fd, -+ .tty_fd = tty_fd, -+ .pid = pid, -+ .line = line }); -+ list_add(&winch->list, &winch_handlers); -+ if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "winch", winch) < 0) -+ printk("register_winch_irq - failed to register IRQ\n"); -+ out: -+ up(&winch_handler_sem); -+} -+ -+static void winch_cleanup(void) -+{ -+ struct list_head *ele; -+ struct winch *winch; -+ -+ list_for_each(ele, &winch_handlers){ -+ winch = list_entry(ele, struct winch, list); -+ if(winch->fd != -1){ -+ deactivate_fd(winch->fd, WINCH_IRQ); -+ os_close_file(winch->fd); -+ } -+ if(winch->pid != -1) -+ os_kill_process(winch->pid, 1); -+ } -+} -+ -+__uml_exitcall(winch_cleanup); -+ -+char *add_xterm_umid(char *base) -+{ -+ char *umid, *title; -+ int len; -+ -+ umid = get_umid(1); -+ if(umid == NULL) return(base); -+ -+ len = strlen(base) + strlen(" ()") + strlen(umid) + 1; -+ title = kmalloc(len, GFP_KERNEL); -+ if(title == NULL){ -+ printk("Failed to allocate buffer for xterm title\n"); -+ return(base); -+ } -+ -+ strncpy(title, base, len); -+ len -= strlen(title); -+ snprintf(&title[strlen(title)], len, " (%s)", umid); -+ return(title); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/Makefile 2005-05-03 22:28:14.215449136 +0300 -@@ -0,0 +1,97 @@ -+# -+# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := built-in.o -+ -+CHAN_OBJS := chan_kern.o chan_user.o line.o -+ -+list-multi := slip.o slirp.o daemon.o mcast.o mconsole.o net.o ubd.o \ -+ hostaudio.o pcap.o port.o harddog.o -+ -+slip-objs := slip_kern.o slip_user.o -+slirp-objs := slirp_kern.o slirp_user.o -+daemon-objs := daemon_kern.o daemon_user.o -+mcast-objs := mcast_kern.o mcast_user.o -+pcap-objs := pcap_kern.o pcap_user.o -+pcap-libs := -lpcap -L/usr/lib -+net-objs := net_kern.o net_user.o -+mconsole-objs := mconsole_kern.o mconsole_user.o -+hostaudio-objs := hostaudio_kern.o -+ubd-objs := ubd_kern.o ubd_user.o -+port-objs := port_kern.o port_user.o -+harddog-objs := harddog_kern.o harddog_user.o -+ -+export-objs := mconsole_kern.o -+ -+obj-y = -+obj-$(CONFIG_SSL) += ssl.o -+obj-$(CONFIG_UML_NET_SLIP) += slip.o -+obj-$(CONFIG_UML_NET_SLIRP) += slirp.o -+obj-$(CONFIG_UML_NET_DAEMON) += daemon.o -+obj-$(CONFIG_UML_NET_MCAST) += mcast.o -+obj-$(CONFIG_UML_NET_PCAP) += pcap.o -+obj-$(CONFIG_UML_NET) += net.o -+obj-$(CONFIG_MCONSOLE) += mconsole.o -+obj-$(CONFIG_MMAPPER) += mmapper_kern.o -+obj-$(CONFIG_BLK_DEV_UBD) += ubd.o -+obj-$(CONFIG_HOSTAUDIO) += hostaudio.o -+obj-$(CONFIG_FD_CHAN) += fd.o -+obj-$(CONFIG_NULL_CHAN) += null.o -+obj-$(CONFIG_PORT_CHAN) += port.o -+obj-$(CONFIG_PTY_CHAN) += pty.o -+obj-$(CONFIG_TTY_CHAN) += tty.o -+obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o -+obj-$(CONFIG_UML_WATCHDOG) += harddog.o -+obj-$(CONFIG_COW) += cow_kern.o -+obj-$(CONFIG_COW_COMMON) += cow_user.o -+ -+CFLAGS_pcap_user.o = -I/usr/include/pcap -+ -+obj-y += stdio_console.o $(CHAN_OBJS) -+ -+USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \ -+ null.o pty.o tty.o xterm.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean: -+ -+modules: -+ -+fastdep: -+ -+dep: -+ -+archmrproper: -+ -+daemon.o : $(daemon-objs) -+ -+slip.o : $(slip-objs) -+ -+slirp.o : $(slirp-objs) -+ -+mcast.o : $(mcast-objs) -+ -+pcap.o : $(pcap-objs) -+ -+mconsole.o : $(mconsole-objs) -+ -+net.o : $(net-objs) -+ -+hostaudio.o : $(hostaudio-objs) -+ -+ubd.o : $(ubd-objs) -+ -+port.o : $(port-objs) -+ -+harddog.o : $(harddog-objs) -+ -+$(list-multi) : # This doesn't work, but should : '%.o : $(%-objs)' -+ $(LD) -r -o $@ $($(patsubst %.o,%,$@)-objs) $($(patsubst %.o,%,$@)-libs) -Index: linux-2.4.29/arch/um/drivers/mcast.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/mcast.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/mcast.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct mcast_data { -+ char *addr; -+ unsigned short port; -+ void *mcast_addr; -+ int ttl; -+ void *dev; -+}; -+ -+extern struct net_user_info mcast_user_info; -+ -+extern int mcast_user_write(int fd, void *buf, int len, -+ struct mcast_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/mcast_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/mcast_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/mcast_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,145 @@ -+/* -+ * user-mode-linux networking multicast transport -+ * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> -+ * -+ * based on the existing uml-networking code, which is -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/in.h" -+#include "linux/inet.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "mcast.h" -+ -+struct mcast_init { -+ char *addr; -+ int port; -+ int ttl; -+}; -+ -+void mcast_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct mcast_data *dpri; -+ struct mcast_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ dpri = (struct mcast_data *) pri->user; -+ *dpri = ((struct mcast_data) -+ { .addr = init->addr, -+ .port = init->port, -+ .ttl = init->ttl, -+ .mcast_addr = NULL, -+ .dev = dev }); -+ printk("mcast backend "); -+ printk("multicast adddress: %s:%u, TTL:%u ", -+ dpri->addr, dpri->port, dpri->ttl); -+ -+ printk("\n"); -+} -+ -+static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int mcast_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return mcast_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct mcast_data *) &lp->user); -+} -+ -+static struct net_kern_info mcast_kern_info = { -+ .init = mcast_init, -+ .protocol = eth_protocol, -+ .read = mcast_read, -+ .write = mcast_write, -+}; -+ -+int mcast_setup(char *str, char **mac_out, void *data) -+{ -+ struct mcast_init *init = data; -+ char *port_str = NULL, *ttl_str = NULL, *remain; -+ char *last; -+ int n; -+ -+ *init = ((struct mcast_init) -+ { .addr = "239.192.168.1", -+ .port = 1102, -+ .ttl = 1 }); -+ -+ remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str, -+ NULL); -+ if(remain != NULL){ -+ printk(KERN_ERR "mcast_setup - Extra garbage on " -+ "specification : '%s'\n", remain); -+ return(0); -+ } -+ -+ if(port_str != NULL){ -+ n = simple_strtoul(port_str, &last, 10); -+ if((*last != '\0') || (last == port_str)){ -+ printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", -+ port_str); -+ return(0); -+ } -+ init->port = htons(n); -+ } -+ -+ if(ttl_str != NULL){ -+ init->ttl = simple_strtoul(ttl_str, &last, 10); -+ if((*last != '\0') || (last == ttl_str)){ -+ printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", -+ ttl_str); -+ return(0); -+ } -+ } -+ -+ printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr, -+ init->port, init->ttl); -+ -+ return(1); -+} -+ -+static struct transport mcast_transport = { -+ .list = LIST_HEAD_INIT(mcast_transport.list), -+ .name = "mcast", -+ .setup = mcast_setup, -+ .user = &mcast_user_info, -+ .kern = &mcast_kern_info, -+ .private_size = sizeof(struct mcast_data), -+ .setup_size = sizeof(struct mcast_init), -+}; -+ -+static int register_mcast(void) -+{ -+ register_transport(&mcast_transport); -+ return(1); -+} -+ -+__initcall(register_mcast); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/mcast_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/mcast_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/mcast_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,177 @@ -+/* -+ * user-mode-linux networking multicast transport -+ * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> -+ * -+ * based on the existing uml-networking code, which is -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * -+ * Licensed under the GPL. -+ * -+ */ -+ -+#include <errno.h> -+#include <unistd.h> -+#include <linux/inet.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/time.h> -+#include <netinet/in.h> -+#include "net_user.h" -+#include "mcast.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+static struct sockaddr_in *new_addr(char *addr, unsigned short port) -+{ -+ struct sockaddr_in *sin; -+ -+ sin = um_kmalloc(sizeof(struct sockaddr_in)); -+ if(sin == NULL){ -+ printk("new_addr: allocation of sockaddr_in failed\n"); -+ return(NULL); -+ } -+ sin->sin_family = AF_INET; -+ sin->sin_addr.s_addr = in_aton(addr); -+ sin->sin_port = port; -+ return(sin); -+} -+ -+static void mcast_user_init(void *data, void *dev) -+{ -+ struct mcast_data *pri = data; -+ -+ pri->mcast_addr = new_addr(pri->addr, pri->port); -+ pri->dev = dev; -+} -+ -+static int mcast_open(void *data) -+{ -+ struct mcast_data *pri = data; -+ struct sockaddr_in *sin = pri->mcast_addr; -+ struct ip_mreq mreq; -+ int fd, yes = 1; -+ -+ -+ if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) { -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ fd = socket(AF_INET, SOCK_DGRAM, 0); -+ if (fd < 0){ -+ printk("mcast_open : data socket failed, errno = %d\n", -+ errno); -+ fd = -ENOMEM; -+ goto out; -+ } -+ -+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { -+ printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", -+ errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* set ttl according to config */ -+ if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, -+ sizeof(pri->ttl)) < 0) { -+ printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", -+ errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* set LOOP, so data does get fed back to local sockets */ -+ if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { -+ printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", -+ errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* bind socket to mcast address */ -+ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { -+ printk("mcast_open : data bind failed, errno = %d\n", errno); -+ os_close_file(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* subscribe to the multicast group */ -+ mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; -+ mreq.imr_interface.s_addr = 0; -+ if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, -+ &mreq, sizeof(mreq)) < 0) { -+ printk("mcast_open: IP_ADD_MEMBERSHIP failed, error = %d\n", -+ errno); -+ printk("There appears not to be a multicast-capable network " -+ "interface on the host.\n"); -+ printk("eth0 should be configured in order to use the " -+ "multicast transport.\n"); -+ os_close_file(fd); -+ fd = -EINVAL; -+ } -+ -+ out: -+ return(fd); -+} -+ -+static void mcast_close(int fd, void *data) -+{ -+ struct ip_mreq mreq; -+ struct mcast_data *pri = data; -+ struct sockaddr_in *sin = pri->mcast_addr; -+ -+ mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; -+ mreq.imr_interface.s_addr = 0; -+ if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, -+ &mreq, sizeof(mreq)) < 0) { -+ printk("mcast_open: IP_DROP_MEMBERSHIP failed, error = %d\n", -+ errno); -+ } -+ -+ os_close_file(fd); -+} -+ -+int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) -+{ -+ struct sockaddr_in *data_addr = pri->mcast_addr; -+ -+ return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); -+} -+ -+static int mcast_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info mcast_user_info = { -+ .init = mcast_user_init, -+ .open = mcast_open, -+ .close = mcast_close, -+ .remove = NULL, -+ .set_mtu = mcast_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/mconsole_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/mconsole_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/mconsole_kern.c 2005-05-03 22:28:14.222448072 +0300 -@@ -0,0 +1,560 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/slab.h" -+#include "linux/init.h" -+#include "linux/notifier.h" -+#include "linux/reboot.h" -+#include "linux/utsname.h" -+#include "linux/ctype.h" -+#include "linux/interrupt.h" -+#include "linux/sysrq.h" -+#include "linux/tqueue.h" -+#include "linux/module.h" -+#include "linux/file.h" -+#include "linux/fs.h" -+#include "linux/proc_fs.h" -+#include "asm/irq.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mconsole.h" -+#include "mconsole_kern.h" -+#include "irq_user.h" -+#include "init.h" -+#include "os.h" -+#include "umid.h" -+#include "irq_kern.h" -+ -+static int do_unlink_socket(struct notifier_block *notifier, -+ unsigned long what, void *data) -+{ -+ return(mconsole_unlink_socket()); -+} -+ -+ -+static struct notifier_block reboot_notifier = { -+ .notifier_call = do_unlink_socket, -+ .priority = 0, -+}; -+ -+/* Safe without explicit locking for now. Tasklets provide their own -+ * locking, and the interrupt handler is safe because it can't interrupt -+ * itself and it can only happen on CPU 0. -+ */ -+ -+LIST_HEAD(mc_requests); -+ -+static void mc_task_proc(void *unused) -+{ -+ struct mconsole_entry *req; -+ unsigned long flags; -+ -+ while(!list_empty(&mc_requests)){ -+ local_irq_save(flags); -+ req = list_entry(mc_requests.next, struct mconsole_entry, -+ list); -+ list_del(&req->list); -+ local_irq_restore(flags); -+ req->request.cmd->handler(&req->request); -+ kfree(req); -+ } -+} -+ -+struct tq_struct mconsole_task = { -+ .routine = mc_task_proc, -+ .data = NULL -+}; -+ -+static void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ int fd; -+ struct mconsole_entry *new; -+ struct mc_request req; -+ -+ fd = (int) dev_id; -+ while (mconsole_get_request(fd, &req)){ -+ if(req.cmd->context == MCONSOLE_INTR) -+ (*req.cmd->handler)(&req); -+ else { -+ new = kmalloc(sizeof(*new), GFP_ATOMIC); -+ if(new == NULL) -+ mconsole_reply(&req, "Out of memory", 1, 0); -+ else { -+ new->request = req; -+ list_add(&new->list, &mc_requests); -+ } -+ } -+ } -+ -+ if(!list_empty(&mc_requests)) -+ schedule_task(&mconsole_task); -+ reactivate_fd(fd, MCONSOLE_IRQ); -+} -+ -+void mconsole_version(struct mc_request *req) -+{ -+ char version[256]; -+ -+ sprintf(version, "%s %s %s %s %s", system_utsname.sysname, -+ system_utsname.nodename, system_utsname.release, -+ system_utsname.version, system_utsname.machine); -+ mconsole_reply(req, version, 0, 0); -+} -+ -+void mconsole_log(struct mc_request *req) -+{ -+ int len; -+ char *ptr = req->request.data; -+ -+ ptr += strlen("log "); -+ -+ len = req->len - (ptr - req->request.data); -+ printk("%.*s", len, ptr); -+ mconsole_reply(req, "", 0, 0); -+} -+ -+void mconsole_proc(struct mc_request *req) -+{ -+ struct nameidata nd; -+ struct file_system_type *proc; -+ struct super_block *super; -+ struct file *file; -+ int n, err; -+ char *ptr = req->request.data, *buf; -+ -+ ptr += strlen("proc"); -+ while(isspace(*ptr)) ptr++; -+ -+ proc = get_fs_type("proc"); -+ if(proc == NULL){ -+ mconsole_reply(req, "procfs not registered", 1, 0); -+ goto out; -+ } -+ -+ super = get_anon_super(proc, NULL, NULL); -+ if(super == NULL){ -+ mconsole_reply(req, "Failed to get procfs superblock", 1, 0); -+ goto out_put; -+ } -+ -+ if(super->s_root == NULL){ -+ super = (*proc->read_super)(super, NULL, 0); -+ if(super == NULL){ -+ mconsole_reply(req, "Failed to read superblock", 1, 0); -+ goto out_put; -+ } -+ } -+ up_write(&super->s_umount); -+ -+ nd.dentry = super->s_root; -+ nd.mnt = NULL; -+ nd.flags = O_RDONLY + 1; -+ nd.last_type = LAST_ROOT; -+ -+ err = link_path_walk(ptr, &nd); -+ if(err){ -+ mconsole_reply(req, "Failed to look up file", 1, 0); -+ goto out_kill; -+ } -+ -+ file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ if(IS_ERR(file)){ -+ mconsole_reply(req, "Failed to open file", 1, 0); -+ goto out_kill; -+ } -+ -+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if(buf == NULL){ -+ mconsole_reply(req, "Failed to allocate buffer", 1, 0); -+ goto out_fput; -+ } -+ -+ if((file->f_op != NULL) && (file->f_op->read != NULL)){ -+ do { -+ n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1, -+ &file->f_pos); -+ if(n >= 0){ -+ buf[n] = '\0'; -+ mconsole_reply(req, buf, 0, (n > 0)); -+ } -+ else { -+ mconsole_reply(req, "Read of file failed", -+ 1, 0); -+ goto out_free; -+ } -+ } while(n > 0); -+ } -+ else mconsole_reply(req, "", 0, 0); -+ -+ out_free: -+ kfree(buf); -+ out_fput: -+ fput(file); -+ out_kill: -+ kill_super(super); -+ out_put: -+ /* put_filesystem(proc); */ -+ out: ; -+} -+ -+#define UML_MCONSOLE_HELPTEXT \ -+"Commands: \n\ -+ version - Get kernel version \n\ -+ help - Print this message \n\ -+ halt - Halt UML \n\ -+ reboot - Reboot UML \n\ -+ config <dev>=<config> - Add a new device to UML; \n\ -+ same syntax as command line \n\ -+ config <dev> - Query the configuration of a device \n\ -+ remove <dev> - Remove a device from UML \n\ -+ sysrq <letter> - Performs the SysRq action controlled by the letter \n\ -+ cad - invoke the Ctl-Alt-Del handler \n\ -+ stop - pause the UML; it will do nothing until it receives a 'go' \n\ -+ go - continue the UML after a 'stop' \n\ -+ log <string> - make UML enter <string> into the kernel log\n\ -+ proc <file> - returns the contents of the UML's /proc/<file>\n\ -+" -+ -+void mconsole_help(struct mc_request *req) -+{ -+ mconsole_reply(req, UML_MCONSOLE_HELPTEXT, 0, 0); -+} -+ -+void mconsole_halt(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ machine_halt(); -+} -+ -+void mconsole_reboot(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ machine_restart(NULL); -+} -+ -+extern void ctrl_alt_del(void); -+ -+void mconsole_cad(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ ctrl_alt_del(); -+} -+ -+void mconsole_go(struct mc_request *req) -+{ -+ mconsole_reply(req, "Not stopped", 1, 0); -+} -+ -+void mconsole_stop(struct mc_request *req) -+{ -+ deactivate_fd(req->originating_fd, MCONSOLE_IRQ); -+ os_set_fd_block(req->originating_fd, 1); -+ mconsole_reply(req, "", 0, 0); -+ while(mconsole_get_request(req->originating_fd, req)){ -+ if(req->cmd->handler == mconsole_go) break; -+ (*req->cmd->handler)(req); -+ } -+ os_set_fd_block(req->originating_fd, 0); -+ reactivate_fd(req->originating_fd, MCONSOLE_IRQ); -+ mconsole_reply(req, "", 0, 0); -+} -+ -+/* This list is populated by __initcall routines. */ -+ -+LIST_HEAD(mconsole_devices); -+ -+void mconsole_register_dev(struct mc_device *new) -+{ -+ list_add(&new->list, &mconsole_devices); -+} -+ -+static struct mc_device *mconsole_find_dev(char *name) -+{ -+ struct list_head *ele; -+ struct mc_device *dev; -+ -+ list_for_each(ele, &mconsole_devices){ -+ dev = list_entry(ele, struct mc_device, list); -+ if(!strncmp(name, dev->name, strlen(dev->name))) -+ return(dev); -+ } -+ return(NULL); -+} -+ -+#define CONFIG_BUF_SIZE 64 -+ -+static void mconsole_get_config(int (*get_config)(char *, char *, int, -+ char **), -+ struct mc_request *req, char *name) -+{ -+ char default_buf[CONFIG_BUF_SIZE], *error, *buf; -+ int n, size; -+ -+ if(get_config == NULL){ -+ mconsole_reply(req, "No get_config routine defined", 1, 0); -+ return; -+ } -+ -+ error = NULL; -+ size = sizeof(default_buf)/sizeof(default_buf[0]); -+ buf = default_buf; -+ -+ while(1){ -+ n = (*get_config)(name, buf, size, &error); -+ if(error != NULL){ -+ mconsole_reply(req, error, 1, 0); -+ goto out; -+ } -+ -+ if(n <= size){ -+ mconsole_reply(req, buf, 0, 0); -+ goto out; -+ } -+ -+ if(buf != default_buf) -+ kfree(buf); -+ -+ size = n; -+ buf = kmalloc(size, GFP_KERNEL); -+ if(buf == NULL){ -+ mconsole_reply(req, "Failed to allocate buffer", 1, 0); -+ return; -+ } -+ } -+ out: -+ if(buf != default_buf) -+ kfree(buf); -+ -+} -+ -+void mconsole_config(struct mc_request *req) -+{ -+ struct mc_device *dev; -+ char *ptr = req->request.data, *name; -+ int err; -+ -+ ptr += strlen("config"); -+ while(isspace(*ptr)) ptr++; -+ dev = mconsole_find_dev(ptr); -+ if(dev == NULL){ -+ mconsole_reply(req, "Bad configuration option", 1, 0); -+ return; -+ } -+ -+ name = &ptr[strlen(dev->name)]; -+ ptr = name; -+ while((*ptr != '=') && (*ptr != '\0')) -+ ptr++; -+ -+ if(*ptr == '='){ -+ err = (*dev->config)(name); -+ mconsole_reply(req, "", err, 0); -+ } -+ else mconsole_get_config(dev->get_config, req, name); -+} -+ -+void mconsole_remove(struct mc_request *req) -+{ -+ struct mc_device *dev; -+ char *ptr = req->request.data; -+ int err; -+ -+ ptr += strlen("remove"); -+ while(isspace(*ptr)) ptr++; -+ dev = mconsole_find_dev(ptr); -+ if(dev == NULL){ -+ mconsole_reply(req, "Bad remove option", 1, 0); -+ return; -+ } -+ err = (*dev->remove)(&ptr[strlen(dev->name)]); -+ mconsole_reply(req, "", err, 0); -+} -+ -+#ifdef CONFIG_MAGIC_SYSRQ -+void mconsole_sysrq(struct mc_request *req) -+{ -+ char *ptr = req->request.data; -+ -+ ptr += strlen("sysrq"); -+ while(isspace(*ptr)) ptr++; -+ -+ mconsole_reply(req, "", 0, 0); -+ handle_sysrq(*ptr, ¤t->thread.regs, NULL, NULL); -+} -+#else -+void mconsole_sysrq(struct mc_request *req) -+{ -+ mconsole_reply(req, "Sysrq not compiled in", 1, 0); -+} -+#endif -+ -+/* Changed by mconsole_setup, which is __setup, and called before SMP is -+ * active. -+ */ -+static char *notify_socket = NULL; -+ -+int mconsole_init(void) -+{ -+ int err, sock; -+ char file[256]; -+ -+ if(umid_file_name("mconsole", file, sizeof(file))) return(-1); -+ snprintf(mconsole_socket_name, sizeof(file), "%s", file); -+ -+ sock = os_create_unix_socket(file, sizeof(file), 1); -+ if (sock < 0){ -+ printk("Failed to initialize management console\n"); -+ return(1); -+ } -+ -+ register_reboot_notifier(&reboot_notifier); -+ -+ err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "mconsole", (void *)sock); -+ if (err){ -+ printk("Failed to get IRQ for management console\n"); -+ return(1); -+ } -+ -+ if(notify_socket != NULL){ -+ notify_socket = uml_strdup(notify_socket); -+ if(notify_socket != NULL) -+ mconsole_notify(notify_socket, MCONSOLE_SOCKET, -+ mconsole_socket_name, -+ strlen(mconsole_socket_name) + 1); -+ else printk(KERN_ERR "mconsole_setup failed to strdup " -+ "string\n"); -+ } -+ -+ printk("mconsole (version %d) initialized on %s\n", -+ MCONSOLE_VERSION, mconsole_socket_name); -+ return(0); -+} -+ -+__initcall(mconsole_init); -+ -+static int write_proc_mconsole(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char *buf; -+ -+ buf = kmalloc(count + 1, GFP_KERNEL); -+ if(buf == NULL) -+ return(-ENOMEM); -+ -+ if(copy_from_user(buf, buffer, count)){ -+ count = -EFAULT; -+ goto out; -+ } -+ -+ buf[count] = '\0'; -+ -+ mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); -+ out: -+ kfree(buf); -+ return(count); -+} -+ -+static int create_proc_mconsole(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ if(notify_socket == NULL) return(0); -+ -+ ent = create_proc_entry("mconsole", S_IFREG | 0200, NULL); -+ if(ent == NULL){ -+ printk("create_proc_mconsole : create_proc_entry failed\n"); -+ return(0); -+ } -+ -+ ent->read_proc = NULL; -+ ent->write_proc = write_proc_mconsole; -+ return(0); -+} -+ -+static spinlock_t notify_spinlock = SPIN_LOCK_UNLOCKED; -+ -+void lock_notify(void) -+{ -+ spin_lock(¬ify_spinlock); -+} -+ -+void unlock_notify(void) -+{ -+ spin_unlock(¬ify_spinlock); -+} -+ -+__initcall(create_proc_mconsole); -+ -+#define NOTIFY "=notify:" -+ -+static int mconsole_setup(char *str) -+{ -+ if(!strncmp(str, NOTIFY, strlen(NOTIFY))){ -+ str += strlen(NOTIFY); -+ notify_socket = str; -+ } -+ else printk(KERN_ERR "mconsole_setup : Unknown option - '%s'\n", str); -+ return(1); -+} -+ -+__setup("mconsole", mconsole_setup); -+ -+__uml_help(mconsole_setup, -+"mconsole=notify:<socket>\n" -+" Requests that the mconsole driver send a message to the named Unix\n" -+" socket containing the name of the mconsole socket. This also serves\n" -+" to notify outside processes when UML has booted far enough to respond\n" -+" to mconsole requests.\n\n" -+); -+ -+static int notify_panic(struct notifier_block *self, unsigned long unused1, -+ void *ptr) -+{ -+ char *message = ptr; -+ -+ if(notify_socket == NULL) return(0); -+ -+ mconsole_notify(notify_socket, MCONSOLE_PANIC, message, -+ strlen(message) + 1); -+ return(0); -+} -+ -+static struct notifier_block panic_exit_notifier = { -+ .notifier_call = notify_panic, -+ .next = NULL, -+ .priority = 1 -+}; -+ -+static int add_notifier(void) -+{ -+ notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); -+ return(0); -+} -+ -+__initcall(add_notifier); -+ -+char *mconsole_notify_socket(void) -+{ -+ return(notify_socket); -+} -+ -+EXPORT_SYMBOL(mconsole_notify_socket); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/mconsole_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/mconsole_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/mconsole_user.c 2005-05-03 22:28:14.223447920 +0300 -@@ -0,0 +1,215 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <errno.h> -+#include <signal.h> -+#include <sys/socket.h> -+#include <sys/types.h> -+#include <sys/uio.h> -+#include <sys/un.h> -+#include <unistd.h> -+#include "user.h" -+#include "mconsole.h" -+#include "umid.h" -+ -+static struct mconsole_command commands[] = { -+ { "version", mconsole_version, MCONSOLE_INTR }, -+ { "halt", mconsole_halt, MCONSOLE_PROC }, -+ { "reboot", mconsole_reboot, MCONSOLE_PROC }, -+ { "config", mconsole_config, MCONSOLE_PROC }, -+ { "remove", mconsole_remove, MCONSOLE_PROC }, -+ { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, -+ { "help", mconsole_help, MCONSOLE_INTR }, -+ { "cad", mconsole_cad, MCONSOLE_INTR }, -+ { "stop", mconsole_stop, MCONSOLE_PROC }, -+ { "go", mconsole_go, MCONSOLE_INTR }, -+ { "log", mconsole_log, MCONSOLE_INTR }, -+ { "proc", mconsole_proc, MCONSOLE_PROC }, -+}; -+ -+/* Initialized in mconsole_init, which is an initcall */ -+char mconsole_socket_name[256]; -+ -+int mconsole_reply_v0(struct mc_request *req, char *reply) -+{ -+ struct iovec iov; -+ struct msghdr msg; -+ -+ iov.iov_base = reply; -+ iov.iov_len = strlen(reply); -+ -+ msg.msg_name = &(req->origin); -+ msg.msg_namelen = req->originlen; -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ msg.msg_control = NULL; -+ msg.msg_controllen = 0; -+ msg.msg_flags = 0; -+ -+ return sendmsg(req->originating_fd, &msg, 0); -+} -+ -+static struct mconsole_command *mconsole_parse(struct mc_request *req) -+{ -+ struct mconsole_command *cmd; -+ int i; -+ -+ for(i=0;i<sizeof(commands)/sizeof(commands[0]);i++){ -+ cmd = &commands[i]; -+ if(!strncmp(req->request.data, cmd->command, -+ strlen(cmd->command))){ -+ return(cmd); -+ } -+ } -+ return(NULL); -+} -+ -+#define MIN(a,b) ((a)<(b) ? (a):(b)) -+ -+#define STRINGX(x) #x -+#define STRING(x) STRINGX(x) -+ -+int mconsole_get_request(int fd, struct mc_request *req) -+{ -+ int len; -+ -+ req->originlen = sizeof(req->origin); -+ req->len = recvfrom(fd, &req->request, sizeof(req->request), 0, -+ (struct sockaddr *) req->origin, &req->originlen); -+ if (req->len < 0) -+ return 0; -+ -+ req->originating_fd = fd; -+ -+ if(req->request.magic != MCONSOLE_MAGIC){ -+ /* Unversioned request */ -+ len = MIN(sizeof(req->request.data) - 1, -+ strlen((char *) &req->request)); -+ memmove(req->request.data, &req->request, len); -+ req->request.data[len] = '\0'; -+ -+ req->request.magic = MCONSOLE_MAGIC; -+ req->request.version = 0; -+ req->request.len = len; -+ -+ mconsole_reply_v0(req, "ERR Version 0 mconsole clients are " -+ "not supported by this driver"); -+ return(0); -+ } -+ -+ if(req->request.len >= MCONSOLE_MAX_DATA){ -+ mconsole_reply(req, "Request too large", 1, 0); -+ return(0); -+ } -+ if(req->request.version != MCONSOLE_VERSION){ -+ mconsole_reply(req, "This driver only supports version " -+ STRING(MCONSOLE_VERSION) " clients", 1, 0); -+ } -+ -+ req->request.data[req->request.len] = '\0'; -+ req->cmd = mconsole_parse(req); -+ if(req->cmd == NULL){ -+ mconsole_reply(req, "Unknown command", 1, 0); -+ return(0); -+ } -+ -+ return(1); -+} -+ -+int mconsole_reply(struct mc_request *req, char *str, int err, int more) -+{ -+ struct mconsole_reply reply; -+ int total, len, n; -+ -+ total = strlen(str); -+ do { -+ reply.err = err; -+ -+ /* err can only be true on the first packet */ -+ err = 0; -+ -+ len = MIN(total, MCONSOLE_MAX_DATA - 1); -+ -+ if(len == total) reply.more = more; -+ else reply.more = 1; -+ -+ memcpy(reply.data, str, len); -+ reply.data[len] = '\0'; -+ total -= len; -+ str += len; -+ reply.len = len + 1; -+ -+ len = sizeof(reply) + reply.len - sizeof(reply.data); -+ -+ n = sendto(req->originating_fd, &reply, len, 0, -+ (struct sockaddr *) req->origin, req->originlen); -+ -+ if(n < 0) return(-errno); -+ } while(total > 0); -+ return(0); -+} -+ -+int mconsole_unlink_socket(void) -+{ -+ unlink(mconsole_socket_name); -+ return 0; -+} -+ -+static int notify_sock = -1; -+ -+int mconsole_notify(char *sock_name, int type, const void *data, int len) -+{ -+ struct sockaddr_un target; -+ struct mconsole_notify packet; -+ int n, err = 0; -+ -+ lock_notify(); -+ if(notify_sock < 0){ -+ notify_sock = socket(PF_UNIX, SOCK_DGRAM, 0); -+ if(notify_sock < 0){ -+ printk("mconsole_notify - socket failed, errno = %d\n", -+ errno); -+ err = -errno; -+ } -+ } -+ unlock_notify(); -+ -+ if(err) -+ return(err); -+ -+ target.sun_family = AF_UNIX; -+ strcpy(target.sun_path, sock_name); -+ -+ packet.magic = MCONSOLE_MAGIC; -+ packet.version = MCONSOLE_VERSION; -+ packet.type = type; -+ len = (len > sizeof(packet.data)) ? sizeof(packet.data) : len; -+ packet.len = len; -+ memcpy(packet.data, data, len); -+ -+ err = 0; -+ len = sizeof(packet) + packet.len - sizeof(packet.data); -+ n = sendto(notify_sock, &packet, len, 0, (struct sockaddr *) &target, -+ sizeof(target)); -+ if(n < 0){ -+ printk("mconsole_notify - sendto failed, errno = %d\n", errno); -+ err = -errno; -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/mmapper_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/mmapper_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/mmapper_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,151 @@ -+/* -+ * arch/um/drivers/mmapper_kern.c -+ * -+ * BRIEF MODULE DESCRIPTION -+ * -+ * Copyright (C) 2000 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ */ -+#include <linux/kdev_t.h> -+#include <linux/time.h> -+#include <linux/devfs_fs_kernel.h> -+#include <linux/module.h> -+#include <linux/mm.h> -+#include <linux/slab.h> -+#include <linux/init.h> -+#include <asm/uaccess.h> -+#include <asm/irq.h> -+#include <asm/smplock.h> -+#include <asm/pgtable.h> -+#include "mem_user.h" -+#include "user_util.h" -+ -+/* These are set in mmapper_init, which is called at boot time */ -+static unsigned long mmapper_size; -+static unsigned long p_buf = 0; -+static char *v_buf = NULL; -+ -+static ssize_t -+mmapper_read(struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ if(*ppos > mmapper_size) -+ return -EINVAL; -+ -+ if(count + *ppos > mmapper_size) -+ count = count + *ppos - mmapper_size; -+ -+ if(count < 0) -+ return -EINVAL; -+ -+ copy_to_user(buf,&v_buf[*ppos],count); -+ -+ return count; -+} -+ -+static ssize_t -+mmapper_write(struct file *file, const char *buf, size_t count, loff_t *ppos) -+{ -+ if(*ppos > mmapper_size) -+ return -EINVAL; -+ -+ if(count + *ppos > mmapper_size) -+ count = count + *ppos - mmapper_size; -+ -+ if(count < 0) -+ return -EINVAL; -+ -+ copy_from_user(&v_buf[*ppos],buf,count); -+ -+ return count; -+} -+ -+static int -+mmapper_ioctl(struct inode *inode, struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ return(-ENOIOCTLCMD); -+} -+ -+static int -+mmapper_mmap(struct file *file, struct vm_area_struct * vma) -+{ -+ int ret = -EINVAL; -+ int size; -+ -+ lock_kernel(); -+ if (vma->vm_pgoff != 0) -+ goto out; -+ -+ size = vma->vm_end - vma->vm_start; -+ if(size > mmapper_size) return(-EFAULT); -+ -+ /* XXX A comment above remap_page_range says it should only be -+ * called when the mm semaphore is held -+ */ -+ if (remap_page_range(vma->vm_start, p_buf, size, vma->vm_page_prot)) -+ goto out; -+ ret = 0; -+out: -+ unlock_kernel(); -+ return ret; -+} -+ -+static int -+mmapper_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static int -+mmapper_release(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static struct file_operations mmapper_fops = { -+ .owner = THIS_MODULE, -+ .read = mmapper_read, -+ .write = mmapper_write, -+ .ioctl = mmapper_ioctl, -+ .mmap = mmapper_mmap, -+ .open = mmapper_open, -+ .release = mmapper_release, -+}; -+ -+static int __init mmapper_init(void) -+{ -+ printk(KERN_INFO "Mapper v0.1\n"); -+ -+ v_buf = (char *) find_iomem("mmapper", &mmapper_size); -+ if(mmapper_size == 0){ -+ printk(KERN_ERR "mmapper_init - find_iomem failed\n"); -+ return(0); -+ } -+ -+ p_buf = __pa(v_buf); -+ -+ devfs_register (NULL, "mmapper", DEVFS_FL_DEFAULT, -+ 30, 0, S_IFCHR | S_IRUGO | S_IWUGO, -+ &mmapper_fops, NULL); -+ devfs_mk_symlink(NULL, "mmapper0", DEVFS_FL_DEFAULT, "mmapper", -+ NULL, NULL); -+ return(0); -+} -+ -+static void mmapper_exit(void) -+{ -+} -+ -+module_init(mmapper_init); -+module_exit(mmapper_exit); -+ -+MODULE_AUTHOR("Greg Lonnon <glonnon@ridgerun.com>"); -+MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/net_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/net_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/net_kern.c 2005-05-03 22:28:14.228447160 +0300 -@@ -0,0 +1,903 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/netdevice.h" -+#include "linux/rtnetlink.h" -+#include "linux/skbuff.h" -+#include "linux/socket.h" -+#include "linux/spinlock.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/etherdevice.h" -+#include "linux/list.h" -+#include "linux/inetdevice.h" -+#include "linux/ctype.h" -+#include "linux/bootmem.h" -+#include "linux/ethtool.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+ -+static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED; -+LIST_HEAD(opened); -+ -+static int uml_net_rx(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ int pkt_len; -+ struct sk_buff *skb; -+ -+ /* If we can't allocate memory, try again next round. */ -+ skb = dev_alloc_skb(dev->mtu); -+ if (skb == NULL) { -+ lp->stats.rx_dropped++; -+ return 0; -+ } -+ -+ skb->dev = dev; -+ skb_put(skb, dev->mtu); -+ skb->mac.raw = skb->data; -+ pkt_len = (*lp->read)(lp->fd, &skb, lp); -+ -+ if (pkt_len > 0) { -+ skb_trim(skb, pkt_len); -+ skb->protocol = (*lp->protocol)(skb); -+ netif_rx(skb); -+ -+ lp->stats.rx_bytes += skb->len; -+ lp->stats.rx_packets++; -+ return pkt_len; -+ } -+ -+ kfree_skb(skb); -+ return pkt_len; -+} -+ -+void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ struct net_device *dev = dev_id; -+ struct uml_net_private *lp = dev->priv; -+ int err; -+ -+ if(!netif_running(dev)) -+ return; -+ -+ spin_lock(&lp->lock); -+ while((err = uml_net_rx(dev)) > 0) ; -+ if(err < 0) { -+ printk(KERN_ERR -+ "Device '%s' read returned %d, shutting it down\n", -+ dev->name, err); -+ dev_close(dev); -+ goto out; -+ } -+ reactivate_fd(lp->fd, UM_ETH_IRQ); -+ -+ out: -+ spin_unlock(&lp->lock); -+} -+ -+static int uml_net_open(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ char addr[sizeof("255.255.255.255\0")]; -+ int err; -+ -+ spin_lock(&lp->lock); -+ -+ if(lp->fd >= 0){ -+ err = -ENXIO; -+ goto out; -+ } -+ -+ if(!lp->have_mac){ -+ dev_ip_addr(dev, addr, &lp->mac[2]); -+ set_ether_mac(dev, lp->mac); -+ } -+ -+ lp->fd = (*lp->open)(&lp->user); -+ if(lp->fd < 0){ -+ err = lp->fd; -+ goto out; -+ } -+ -+ err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, -+ SA_INTERRUPT | SA_SHIRQ, dev->name, dev); -+ if(err != 0){ -+ printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ lp->fd = -1; -+ err = -ENETUNREACH; -+ } -+ -+ lp->tl.data = (unsigned long) &lp->user; -+ netif_start_queue(dev); -+ -+ spin_lock(&opened_lock); -+ list_add(&lp->list, &opened); -+ spin_unlock(&opened_lock); -+ /* clear buffer - it can happen that the host side of the interface -+ * is full when we get here. In this case, new data is never queued, -+ * SIGIOs never arrive, and the net never works. -+ */ -+ while((err = uml_net_rx(dev)) > 0) ; -+ -+ MOD_INC_USE_COUNT; -+ out: -+ spin_unlock(&lp->lock); -+ return(err); -+} -+ -+static int uml_net_close(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ -+ netif_stop_queue(dev); -+ spin_lock(&lp->lock); -+ -+ free_irq(dev->irq, dev); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ lp->fd = -1; -+ spin_lock(&opened_lock); -+ list_del(&lp->list); -+ spin_unlock(&opened_lock); -+ -+ MOD_DEC_USE_COUNT; -+ spin_unlock(&lp->lock); -+ return 0; -+} -+ -+static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ unsigned long flags; -+ int len; -+ -+ netif_stop_queue(dev); -+ -+ spin_lock_irqsave(&lp->lock, flags); -+ -+ len = (*lp->write)(lp->fd, &skb, lp); -+ -+ if(len == skb->len) { -+ lp->stats.tx_packets++; -+ lp->stats.tx_bytes += skb->len; -+ dev->trans_start = jiffies; -+ netif_start_queue(dev); -+ -+ /* this is normally done in the interrupt when tx finishes */ -+ netif_wake_queue(dev); -+ } -+ else if(len == 0){ -+ netif_start_queue(dev); -+ lp->stats.tx_dropped++; -+ } -+ else { -+ netif_start_queue(dev); -+ printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); -+ } -+ -+ spin_unlock_irqrestore(&lp->lock, flags); -+ -+ dev_kfree_skb(skb); -+ -+ return 0; -+} -+ -+static struct net_device_stats *uml_net_get_stats(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ return &lp->stats; -+} -+ -+static void uml_net_set_multicast_list(struct net_device *dev) -+{ -+ if (dev->flags & IFF_PROMISC) return; -+ else if (dev->mc_count) dev->flags |= IFF_ALLMULTI; -+ else dev->flags &= ~IFF_ALLMULTI; -+} -+ -+static void uml_net_tx_timeout(struct net_device *dev) -+{ -+ dev->trans_start = jiffies; -+ netif_wake_queue(dev); -+} -+ -+static int uml_net_set_mac(struct net_device *dev, void *addr) -+{ -+ struct uml_net_private *lp = dev->priv; -+ struct sockaddr *hwaddr = addr; -+ -+ spin_lock(&lp->lock); -+ memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN); -+ spin_unlock(&lp->lock); -+ -+ return(0); -+} -+ -+static int uml_net_change_mtu(struct net_device *dev, int new_mtu) -+{ -+ struct uml_net_private *lp = dev->priv; -+ int err = 0; -+ -+ spin_lock(&lp->lock); -+ -+ new_mtu = (*lp->set_mtu)(new_mtu, &lp->user); -+ if(new_mtu < 0){ -+ err = new_mtu; -+ goto out; -+ } -+ -+ dev->mtu = new_mtu; -+ -+ out: -+ spin_unlock(&lp->lock); -+ return err; -+} -+ -+static int uml_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -+{ -+ static const struct ethtool_drvinfo info = { -+ .cmd = ETHTOOL_GDRVINFO, -+ .driver = "uml virtual ethernet", -+ .version = "42", -+ }; -+ void *useraddr; -+ u32 ethcmd; -+ -+ switch (cmd) { -+ case SIOCETHTOOL: -+ useraddr = ifr->ifr_data; -+ if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) -+ return -EFAULT; -+ switch (ethcmd) { -+ case ETHTOOL_GDRVINFO: -+ if (copy_to_user(useraddr, &info, sizeof(info))) -+ return -EFAULT; -+ return 0; -+ default: -+ return -EOPNOTSUPP; -+ } -+ default: -+ return -EINVAL; -+ } -+} -+ -+void uml_net_user_timer_expire(unsigned long _conn) -+{ -+#ifdef undef -+ struct connection *conn = (struct connection *)_conn; -+ -+ dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); -+ do_connect(conn); -+#endif -+} -+ -+/* -+ * default do nothing hard header packet routines for struct net_device init. -+ * real ethernet transports will overwrite with real routines. -+ */ -+static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev, -+ unsigned short type, void *daddr, void *saddr, unsigned len) -+{ -+ return(0); /* no change */ -+} -+ -+static int uml_net_rebuild_header(struct sk_buff *skb) -+{ -+ return(0); /* ignore */ -+} -+ -+static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh) -+{ -+ return(-1); /* fail */ -+} -+ -+static void uml_net_header_cache_update(struct hh_cache *hh, -+ struct net_device *dev, unsigned char * haddr) -+{ -+ /* ignore */ -+} -+ -+static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr) -+{ -+ return(0); /* nothing */ -+} -+ -+static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; -+static struct list_head devices = LIST_HEAD_INIT(devices); -+ -+static int eth_configure(int n, void *init, char *mac, -+ struct transport *transport) -+{ -+ struct uml_net *device; -+ struct net_device *dev; -+ struct uml_net_private *lp; -+ int save, err, size; -+ -+ size = transport->private_size + sizeof(struct uml_net_private) + -+ sizeof(((struct uml_net_private *) 0)->user); -+ -+ device = kmalloc(sizeof(*device), GFP_KERNEL); -+ if(device == NULL){ -+ printk(KERN_ERR "eth_configure failed to allocate uml_net\n"); -+ return(1); -+ } -+ -+ *device = ((struct uml_net) { .list = LIST_HEAD_INIT(device->list), -+ .dev = NULL, -+ .index = n, -+ .mac = { [ 0 ... 5 ] = 0 }, -+ .have_mac = 0 }); -+ -+ spin_lock(&devices_lock); -+ list_add(&device->list, &devices); -+ spin_unlock(&devices_lock); -+ -+ if(setup_etheraddr(mac, device->mac)) -+ device->have_mac = 1; -+ -+ printk(KERN_INFO "Netdevice %d ", n); -+ if(device->have_mac) printk("(%02x:%02x:%02x:%02x:%02x:%02x) ", -+ device->mac[0], device->mac[1], -+ device->mac[2], device->mac[3], -+ device->mac[4], device->mac[5]); -+ printk(": "); -+ dev = kmalloc(sizeof(*dev) + size, GFP_KERNEL); -+ if(dev == NULL){ -+ printk(KERN_ERR "eth_configure: failed to allocate device\n"); -+ return(1); -+ } -+ memset(dev, 0, sizeof(*dev) + size); -+ -+ snprintf(dev->name, sizeof(dev->name), "eth%d", n); -+ dev->priv = (void *) &dev[1]; -+ device->dev = dev; -+ -+ dev->hard_header = uml_net_hard_header; -+ dev->rebuild_header = uml_net_rebuild_header; -+ dev->hard_header_cache = uml_net_header_cache; -+ dev->header_cache_update= uml_net_header_cache_update; -+ dev->hard_header_parse = uml_net_header_parse; -+ -+ (*transport->kern->init)(dev, init); -+ -+ dev->mtu = transport->user->max_packet; -+ dev->open = uml_net_open; -+ dev->hard_start_xmit = uml_net_start_xmit; -+ dev->stop = uml_net_close; -+ dev->get_stats = uml_net_get_stats; -+ dev->set_multicast_list = uml_net_set_multicast_list; -+ dev->tx_timeout = uml_net_tx_timeout; -+ dev->set_mac_address = uml_net_set_mac; -+ dev->change_mtu = uml_net_change_mtu; -+ dev->do_ioctl = uml_net_ioctl; -+ dev->watchdog_timeo = (HZ >> 1); -+ dev->irq = UM_ETH_IRQ; -+ -+ rtnl_lock(); -+ err = register_netdevice(dev); -+ rtnl_unlock(); -+ if(err) -+ return(1); -+ lp = dev->priv; -+ -+ /* lp.user is the first four bytes of the transport data, which -+ * has already been initialized. This structure assignment will -+ * overwrite that, so we make sure that .user gets overwritten with -+ * what it already has. -+ */ -+ save = lp->user[0]; -+ *lp = ((struct uml_net_private) -+ { .list = LIST_HEAD_INIT(lp->list), -+ .lock = SPIN_LOCK_UNLOCKED, -+ .dev = dev, -+ .fd = -1, -+ .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, -+ .have_mac = device->have_mac, -+ .protocol = transport->kern->protocol, -+ .open = transport->user->open, -+ .close = transport->user->close, -+ .remove = transport->user->remove, -+ .read = transport->kern->read, -+ .write = transport->kern->write, -+ .add_address = transport->user->add_address, -+ .delete_address = transport->user->delete_address, -+ .set_mtu = transport->user->set_mtu, -+ .user = { save } }); -+ init_timer(&lp->tl); -+ lp->tl.function = uml_net_user_timer_expire; -+ memset(&lp->stats, 0, sizeof(lp->stats)); -+ if(lp->have_mac) memcpy(lp->mac, device->mac, sizeof(lp->mac)); -+ -+ if(transport->user->init) -+ (*transport->user->init)(&lp->user, dev); -+ -+ if(device->have_mac) -+ set_ether_mac(dev, device->mac); -+ return(0); -+} -+ -+static struct uml_net *find_device(int n) -+{ -+ struct uml_net *device; -+ struct list_head *ele; -+ -+ spin_lock(&devices_lock); -+ list_for_each(ele, &devices){ -+ device = list_entry(ele, struct uml_net, list); -+ if(device->index == n) -+ goto out; -+ } -+ device = NULL; -+ out: -+ spin_unlock(&devices_lock); -+ return(device); -+} -+ -+static int eth_parse(char *str, int *index_out, char **str_out) -+{ -+ char *end; -+ int n; -+ -+ n = simple_strtoul(str, &end, 0); -+ if(end == str){ -+ printk(KERN_ERR "eth_setup: Failed to parse '%s'\n", str); -+ return(1); -+ } -+ if(n < 0){ -+ printk(KERN_ERR "eth_setup: device %d is negative\n", n); -+ return(1); -+ } -+ str = end; -+ if(*str != '='){ -+ printk(KERN_ERR -+ "eth_setup: expected '=' after device number\n"); -+ return(1); -+ } -+ str++; -+ if(find_device(n)){ -+ printk(KERN_ERR "eth_setup: Device %d already configured\n", -+ n); -+ return(1); -+ } -+ if(index_out) *index_out = n; -+ *str_out = str; -+ return(0); -+} -+ -+struct eth_init { -+ struct list_head list; -+ char *init; -+ int index; -+}; -+ -+/* Filled in at boot time. Will need locking if the transports become -+ * modular. -+ */ -+struct list_head transports = LIST_HEAD_INIT(transports); -+ -+/* Filled in during early boot */ -+struct list_head eth_cmd_line = LIST_HEAD_INIT(eth_cmd_line); -+ -+static int check_transport(struct transport *transport, char *eth, int n, -+ void **init_out, char **mac_out) -+{ -+ int len; -+ -+ len = strlen(transport->name); -+ if(strncmp(eth, transport->name, len)) -+ return(0); -+ -+ eth += len; -+ if(*eth == ',') -+ eth++; -+ else if(*eth != '\0') -+ return(0); -+ -+ *init_out = kmalloc(transport->setup_size, GFP_KERNEL); -+ if(*init_out == NULL) -+ return(1); -+ -+ if(!transport->setup(eth, mac_out, *init_out)){ -+ kfree(*init_out); -+ *init_out = NULL; -+ } -+ return(1); -+} -+ -+void register_transport(struct transport *new) -+{ -+ struct list_head *ele, *next; -+ struct eth_init *eth; -+ void *init; -+ char *mac = NULL; -+ int match; -+ -+ list_add(&new->list, &transports); -+ -+ list_for_each_safe(ele, next, ð_cmd_line){ -+ eth = list_entry(ele, struct eth_init, list); -+ match = check_transport(new, eth->init, eth->index, &init, -+ &mac); -+ if(!match) -+ continue; -+ else if(init != NULL){ -+ eth_configure(eth->index, init, mac, new); -+ kfree(init); -+ } -+ list_del(ð->list); -+ } -+} -+ -+static int eth_setup_common(char *str, int index) -+{ -+ struct list_head *ele; -+ struct transport *transport; -+ void *init; -+ char *mac = NULL; -+ -+ list_for_each(ele, &transports){ -+ transport = list_entry(ele, struct transport, list); -+ if(!check_transport(transport, str, index, &init, &mac)) -+ continue; -+ if(init != NULL){ -+ eth_configure(index, init, mac, transport); -+ kfree(init); -+ } -+ return(1); -+ } -+ return(0); -+} -+ -+static int eth_setup(char *str) -+{ -+ struct eth_init *new; -+ int n, err; -+ -+ err = eth_parse(str, &n, &str); -+ if(err) return(1); -+ -+ new = alloc_bootmem(sizeof(new)); -+ if(new == NULL){ -+ printk("eth_init : alloc_bootmem failed\n"); -+ return(1); -+ } -+ *new = ((struct eth_init) { .list = LIST_HEAD_INIT(new->list), -+ .index = n, -+ .init = str }); -+ list_add_tail(&new->list, ð_cmd_line); -+ return(1); -+} -+ -+__setup("eth", eth_setup); -+__uml_help(eth_setup, -+"eth[0-9]+=<transport>,<options>\n" -+" Configure a network device.\n\n" -+); -+ -+static int eth_init(void) -+{ -+ struct list_head *ele, *next; -+ struct eth_init *eth; -+ -+ list_for_each_safe(ele, next, ð_cmd_line){ -+ eth = list_entry(ele, struct eth_init, list); -+ -+ if(eth_setup_common(eth->init, eth->index)) -+ list_del(ð->list); -+ } -+ -+ return(1); -+} -+ -+__initcall(eth_init); -+ -+static int net_config(char *str) -+{ -+ int n, err; -+ -+ err = eth_parse(str, &n, &str); -+ if(err) return(err); -+ -+ str = uml_strdup(str); -+ if(str == NULL){ -+ printk(KERN_ERR "net_config failed to strdup string\n"); -+ return(-1); -+ } -+ err = !eth_setup_common(str, n); -+ if(err) -+ kfree(str); -+ return(err); -+} -+ -+static int net_remove(char *str) -+{ -+ struct uml_net *device; -+ struct net_device *dev; -+ struct uml_net_private *lp; -+ char *end; -+ int n; -+ -+ n = simple_strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)) -+ return(-1); -+ -+ device = find_device(n); -+ if(device == NULL) -+ return(0); -+ -+ dev = device->dev; -+ lp = dev->priv; -+ if(lp->fd > 0) return(-1); -+ if(lp->remove != NULL) (*lp->remove)(&lp->user); -+ unregister_netdev(dev); -+ -+ list_del(&device->list); -+ kfree(device); -+ return(0); -+} -+ -+static struct mc_device net_mc = { -+ .name = "eth", -+ .config = net_config, -+ .get_config = NULL, -+ .remove = net_remove, -+}; -+ -+static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, -+ void *ptr) -+{ -+ struct in_ifaddr *ifa = ptr; -+ u32 addr = ifa->ifa_address; -+ u32 netmask = ifa->ifa_mask; -+ struct net_device *dev = ifa->ifa_dev->dev; -+ struct uml_net_private *lp; -+ void (*proc)(unsigned char *, unsigned char *, void *); -+ unsigned char addr_buf[4], netmask_buf[4]; -+ -+ if(dev->open != uml_net_open) return(NOTIFY_DONE); -+ -+ lp = dev->priv; -+ -+ proc = NULL; -+ switch (event){ -+ case NETDEV_UP: -+ proc = lp->add_address; -+ break; -+ case NETDEV_DOWN: -+ proc = lp->delete_address; -+ break; -+ } -+ if(proc != NULL){ -+ addr_buf[0] = addr & 0xff; -+ addr_buf[1] = (addr >> 8) & 0xff; -+ addr_buf[2] = (addr >> 16) & 0xff; -+ addr_buf[3] = addr >> 24; -+ netmask_buf[0] = netmask & 0xff; -+ netmask_buf[1] = (netmask >> 8) & 0xff; -+ netmask_buf[2] = (netmask >> 16) & 0xff; -+ netmask_buf[3] = netmask >> 24; -+ (*proc)(addr_buf, netmask_buf, &lp->user); -+ } -+ return(NOTIFY_DONE); -+} -+ -+struct notifier_block uml_inetaddr_notifier = { -+ .notifier_call = uml_inetaddr_event, -+}; -+ -+static int uml_net_init(void) -+{ -+ struct list_head *ele; -+ struct uml_net_private *lp; -+ struct in_device *ip; -+ struct in_ifaddr *in; -+ -+ mconsole_register_dev(&net_mc); -+ register_inetaddr_notifier(¨_inetaddr_notifier); -+ -+ /* Devices may have been opened already, so the uml_inetaddr_notifier -+ * didn't get a chance to run for them. This fakes it so that -+ * addresses which have already been set up get handled properly. -+ */ -+ list_for_each(ele, &opened){ -+ lp = list_entry(ele, struct uml_net_private, list); -+ ip = lp->dev->ip_ptr; -+ if(ip == NULL) continue; -+ in = ip->ifa_list; -+ while(in != NULL){ -+ uml_inetaddr_event(NULL, NETDEV_UP, in); -+ in = in->ifa_next; -+ } -+ } -+ -+ return(0); -+} -+ -+__initcall(uml_net_init); -+ -+static void close_devices(void) -+{ -+ struct list_head *ele; -+ struct uml_net_private *lp; -+ -+ list_for_each(ele, &opened){ -+ lp = list_entry(ele, struct uml_net_private, list); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ if(lp->remove != NULL) (*lp->remove)(&lp->user); -+ } -+} -+ -+__uml_exitcall(close_devices); -+ -+int setup_etheraddr(char *str, unsigned char *addr) -+{ -+ char *end; -+ int i; -+ -+ if(str == NULL) -+ return(0); -+ for(i=0;i<6;i++){ -+ addr[i] = simple_strtoul(str, &end, 16); -+ if((end == str) || -+ ((*end != ':') && (*end != ',') && (*end != '\0'))){ -+ printk(KERN_ERR -+ "setup_etheraddr: failed to parse '%s' " -+ "as an ethernet address\n", str); -+ return(0); -+ } -+ str = end + 1; -+ } -+ if(addr[0] & 1){ -+ printk(KERN_ERR -+ "Attempt to assign a broadcast ethernet address to a " -+ "device disallowed\n"); -+ return(0); -+ } -+ return(1); -+} -+ -+void dev_ip_addr(void *d, char *buf, char *bin_buf) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ u32 addr; -+ -+ if((ip == NULL) || ((in = ip->ifa_list) == NULL)){ -+ printk(KERN_WARNING "dev_ip_addr - device not assigned an " -+ "IP address\n"); -+ return; -+ } -+ addr = in->ifa_address; -+ sprintf(buf, "%d.%d.%d.%d", addr & 0xff, (addr >> 8) & 0xff, -+ (addr >> 16) & 0xff, addr >> 24); -+ if(bin_buf){ -+ bin_buf[0] = addr & 0xff; -+ bin_buf[1] = (addr >> 8) & 0xff; -+ bin_buf[2] = (addr >> 16) & 0xff; -+ bin_buf[3] = addr >> 24; -+ } -+} -+ -+void set_ether_mac(void *d, unsigned char *addr) -+{ -+ struct net_device *dev = d; -+ -+ memcpy(dev->dev_addr, addr, ETH_ALEN); -+} -+ -+struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra) -+{ -+ if((skb != NULL) && (skb_tailroom(skb) < extra)){ -+ struct sk_buff *skb2; -+ -+ skb2 = skb_copy_expand(skb, 0, extra, GFP_ATOMIC); -+ dev_kfree_skb(skb); -+ skb = skb2; -+ } -+ if(skb != NULL) skb_put(skb, extra); -+ return(skb); -+} -+ -+void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, -+ void *), -+ void *arg) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ unsigned char address[4], netmask[4]; -+ -+ if(ip == NULL) return; -+ in = ip->ifa_list; -+ while(in != NULL){ -+ address[0] = in->ifa_address & 0xff; -+ address[1] = (in->ifa_address >> 8) & 0xff; -+ address[2] = (in->ifa_address >> 16) & 0xff; -+ address[3] = in->ifa_address >> 24; -+ netmask[0] = in->ifa_mask & 0xff; -+ netmask[1] = (in->ifa_mask >> 8) & 0xff; -+ netmask[2] = (in->ifa_mask >> 16) & 0xff; -+ netmask[3] = in->ifa_mask >> 24; -+ (*cb)(address, netmask, arg); -+ in = in->ifa_next; -+ } -+} -+ -+int dev_netmask(void *d, void *m) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ __u32 *mask_out = m; -+ -+ if(ip == NULL) -+ return(1); -+ -+ in = ip->ifa_list; -+ if(in == NULL) -+ return(1); -+ -+ *mask_out = in->ifa_mask; -+ return(0); -+} -+ -+void *get_output_buffer(int *len_out) -+{ -+ void *ret; -+ -+ ret = (void *) __get_free_pages(GFP_KERNEL, 0); -+ if(ret) *len_out = PAGE_SIZE; -+ else *len_out = 0; -+ return(ret); -+} -+ -+void free_output_buffer(void *buffer) -+{ -+ free_pages((unsigned long) buffer, 0); -+} -+ -+int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, -+ char **gate_addr) -+{ -+ char *remain; -+ -+ remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL); -+ if(remain != NULL){ -+ printk("tap_setup_common - Extra garbage on specification : " -+ "'%s'\n", remain); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+unsigned short eth_protocol(struct sk_buff *skb) -+{ -+ return(eth_type_trans(skb, skb->dev)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/net_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/net_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/net_user.c 2005-05-03 22:28:14.229447008 +0300 -@@ -0,0 +1,253 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stddef.h> -+#include <stdarg.h> -+#include <unistd.h> -+#include <stdio.h> -+#include <errno.h> -+#include <stdlib.h> -+#include <string.h> -+#include <sys/socket.h> -+#include <sys/wait.h> -+#include "user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "net_user.h" -+#include "helper.h" -+#include "os.h" -+ -+int tap_open_common(void *dev, char *gate_addr) -+{ -+ int tap_addr[4]; -+ -+ if(gate_addr == NULL) return(0); -+ if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], -+ &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ -+ printk("Invalid tap IP address - '%s'\n", gate_addr); -+ return(-EINVAL); -+ } -+ return(0); -+} -+ -+void tap_check_ips(char *gate_addr, char *eth_addr) -+{ -+ int tap_addr[4]; -+ -+ if((gate_addr != NULL) && -+ (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], -+ &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) && -+ (eth_addr[0] == tap_addr[0]) && -+ (eth_addr[1] == tap_addr[1]) && -+ (eth_addr[2] == tap_addr[2]) && -+ (eth_addr[3] == tap_addr[3])){ -+ printk("The tap IP address and the UML eth IP address" -+ " must be different\n"); -+ } -+} -+ -+void read_output(int fd, char *output, int len) -+{ -+ int remain, n, actual; -+ char c; -+ -+ if(output == NULL){ -+ output = &c; -+ len = sizeof(c); -+ } -+ -+ *output = '\0'; -+ n = os_read_file(fd, &remain, sizeof(remain)); -+ if(n != sizeof(remain)){ -+ printk("read_output - read of length failed, err = %d\n", -n); -+ return; -+ } -+ -+ while(remain != 0){ -+ n = (remain < len) ? remain : len; -+ actual = os_read_file(fd, output, n); -+ if(actual != n){ -+ printk("read_output - read of data failed, " -+ "err = %d\n", -actual); -+ return; -+ } -+ remain -= actual; -+ } -+ return; -+} -+ -+int net_read(int fd, void *buf, int len) -+{ -+ int n; -+ -+ n = os_read_file(fd, buf, len); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-ENOTCONN); -+ return(n); -+} -+ -+int net_recvfrom(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = recvfrom(fd, buf, len, 0, NULL, NULL)) < 0) && -+ (errno == EINTR)) ; -+ -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_write(int fd, void *buf, int len) -+{ -+ int n; -+ -+ n = os_write_file(fd, buf, len); -+ -+ if(n == -EAGAIN) -+ return(0); -+ else if(n == 0) -+ return(-ENOTCONN); -+ return(n); -+} -+ -+int net_send(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = send(fd, buf, len, 0)) < 0) && (errno == EINTR)) ; -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_sendto(int fd, void *buf, int len, void *to, int sock_len) -+{ -+ int n; -+ -+ while(((n = sendto(fd, buf, len, 0, (struct sockaddr *) to, -+ sock_len)) < 0) && (errno == EINTR)) ; -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+struct change_pre_exec_data { -+ int close_me; -+ int stdout; -+}; -+ -+static void change_pre_exec(void *arg) -+{ -+ struct change_pre_exec_data *data = arg; -+ -+ os_close_file(data->close_me); -+ dup2(data->stdout, 1); -+} -+ -+static int change_tramp(char **argv, char *output, int output_len) -+{ -+ int pid, fds[2], err; -+ struct change_pre_exec_data pe_data; -+ -+ err = os_pipe(fds, 1, 0); -+ if(err < 0){ -+ printk("change_tramp - pipe failed, err = %d\n", -err); -+ return(err); -+ } -+ pe_data.close_me = fds[0]; -+ pe_data.stdout = fds[1]; -+ pid = run_helper(change_pre_exec, &pe_data, argv, NULL); -+ -+ read_output(fds[0], output, output_len); -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ CATCH_EINTR(waitpid(pid, NULL, 0)); -+ return(pid); -+} -+ -+static void change(char *dev, char *what, unsigned char *addr, -+ unsigned char *netmask) -+{ -+ char addr_buf[sizeof("255.255.255.255\0")]; -+ char netmask_buf[sizeof("255.255.255.255\0")]; -+ char version[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version, what, dev, addr_buf, -+ netmask_buf, NULL }; -+ char *output; -+ int output_len, pid; -+ -+ sprintf(version, "%d", UML_NET_VERSION); -+ sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); -+ sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], -+ netmask[2], netmask[3]); -+ -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ if(output == NULL) -+ printk("change : failed to allocate output buffer\n"); -+ -+ pid = change_tramp(argv, output, output_len); -+ if(pid < 0) return; -+ -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+} -+ -+void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) -+{ -+ change(arg, "add", addr, netmask); -+} -+ -+void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) -+{ -+ change(arg, "del", addr, netmask); -+} -+ -+char *split_if_spec(char *str, ...) -+{ -+ char **arg, *end; -+ va_list ap; -+ -+ va_start(ap, str); -+ while((arg = va_arg(ap, char **)) != NULL){ -+ if(*str == '\0') -+ return(NULL); -+ end = strchr(str, ','); -+ if(end != str) -+ *arg = str; -+ if(end == NULL) -+ return(NULL); -+ *end++ = '\0'; -+ str = end; -+ } -+ va_end(ap); -+ return(str); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/null.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/null.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/null.c 2005-05-03 22:28:14.230446856 +0300 -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include "chan_user.h" -+#include "os.h" -+ -+static int null_chan; -+ -+void *null_init(char *str, int device, struct chan_opts *opts) -+{ -+ return(&null_chan); -+} -+ -+int null_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ *dev_out = NULL; -+ return(os_open_file(DEV_NULL, of_rdwr(OPENFLAGS()), 0)); -+} -+ -+int null_read(int fd, char *c_out, void *unused) -+{ -+ return(-ENODEV); -+} -+ -+void null_free(void *data) -+{ -+} -+ -+struct chan_ops null_ops = { -+ .type = "null", -+ .init = null_init, -+ .open = null_open, -+ .close = generic_close, -+ .read = null_read, -+ .write = generic_write, -+ .console_write = generic_console_write, -+ .window_size = generic_window_size, -+ .free = null_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/pcap_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/pcap_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/pcap_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,127 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike <jdike@karaya.com> -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "pcap_user.h" -+ -+struct pcap_init { -+ char *host_if; -+ int promisc; -+ int optimize; -+ char *filter; -+}; -+ -+void pcap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct pcap_data *ppri; -+ struct pcap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ ppri = (struct pcap_data *) pri->user; -+ *ppri = ((struct pcap_data) -+ { .host_if = init->host_if, -+ .promisc = init->promisc, -+ .optimize = init->optimize, -+ .filter = init->filter, -+ .compiled = NULL, -+ .pcap = NULL }); -+} -+ -+static int pcap_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(pcap_user_read(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER, -+ (struct pcap_data *) &lp->user)); -+} -+ -+static int pcap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ return(-EPERM); -+} -+ -+static struct net_kern_info pcap_kern_info = { -+ .init = pcap_init, -+ .protocol = eth_protocol, -+ .read = pcap_read, -+ .write = pcap_write, -+}; -+ -+int pcap_setup(char *str, char **mac_out, void *data) -+{ -+ struct pcap_init *init = data; -+ char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; -+ int i; -+ -+ *init = ((struct pcap_init) -+ { .host_if = "eth0", -+ .promisc = 1, -+ .optimize = 0, -+ .filter = NULL }); -+ -+ remain = split_if_spec(str, &host_if, &init->filter, -+ &options[0], &options[1], NULL); -+ if(remain != NULL){ -+ printk(KERN_ERR "pcap_setup - Extra garbage on " -+ "specification : '%s'\n", remain); -+ return(0); -+ } -+ -+ if(host_if != NULL) -+ init->host_if = host_if; -+ -+ for(i = 0; i < sizeof(options)/sizeof(options[0]); i++){ -+ if(options[i] == NULL) -+ continue; -+ if(!strcmp(options[i], "promisc")) -+ init->promisc = 1; -+ else if(!strcmp(options[i], "nopromisc")) -+ init->promisc = 0; -+ else if(!strcmp(options[i], "optimize")) -+ init->optimize = 1; -+ else if(!strcmp(options[i], "nooptimize")) -+ init->optimize = 0; -+ else printk("pcap_setup : bad option - '%s'\n", options[i]); -+ } -+ -+ return(1); -+} -+ -+static struct transport pcap_transport = { -+ .list = LIST_HEAD_INIT(pcap_transport.list), -+ .name = "pcap", -+ .setup = pcap_setup, -+ .user = &pcap_user_info, -+ .kern = &pcap_kern_info, -+ .private_size = sizeof(struct pcap_data), -+ .setup_size = sizeof(struct pcap_init), -+}; -+ -+static int register_pcap(void) -+{ -+ register_transport(&pcap_transport); -+ return(1); -+} -+ -+__initcall(register_pcap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/pcap_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/pcap_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/pcap_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike <jdike@karaya.com> -+ * Licensed under the GPL. -+ */ -+ -+#include <unistd.h> -+#include <stdlib.h> -+#include <string.h> -+#include <errno.h> -+#include <pcap.h> -+#include <asm/types.h> -+#include "net_user.h" -+#include "pcap_user.h" -+#include "user.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+#define PCAP_FD(p) (*(int *)(p)) -+ -+static void pcap_user_init(void *data, void *dev) -+{ -+ struct pcap_data *pri = data; -+ pcap_t *p; -+ char errors[PCAP_ERRBUF_SIZE]; -+ -+ p = pcap_open_live(pri->host_if, MAX_PACKET, pri->promisc, 0, errors); -+ if(p == NULL){ -+ printk("pcap_user_init : pcap_open_live failed - '%s'\n", -+ errors); -+ return; -+ } -+ -+ pri->dev = dev; -+ pri->pcap = p; -+} -+ -+static int pcap_open(void *data) -+{ -+ struct pcap_data *pri = data; -+ __u32 netmask; -+ int err; -+ -+ if(pri->pcap == NULL) -+ return(-ENODEV); -+ -+ if(pri->filter != NULL){ -+ err = dev_netmask(pri->dev, &netmask); -+ if(err < 0){ -+ printk("pcap_open : dev_netmask failed\n"); -+ return(-EIO); -+ } -+ -+ pri->compiled = um_kmalloc(sizeof(struct bpf_program)); -+ if(pri->compiled == NULL){ -+ printk("pcap_open : kmalloc failed\n"); -+ return(-ENOMEM); -+ } -+ -+ err = pcap_compile(pri->pcap, -+ (struct bpf_program *) pri->compiled, -+ pri->filter, pri->optimize, netmask); -+ if(err < 0){ -+ printk("pcap_open : pcap_compile failed - '%s'\n", -+ pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ -+ err = pcap_setfilter(pri->pcap, pri->compiled); -+ if(err < 0){ -+ printk("pcap_open : pcap_setfilter failed - '%s'\n", -+ pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ } -+ -+ return(PCAP_FD(pri->pcap)); -+} -+ -+static void pcap_remove(void *data) -+{ -+ struct pcap_data *pri = data; -+ -+ if(pri->compiled != NULL) -+ pcap_freecode(pri->compiled); -+ -+ pcap_close(pri->pcap); -+} -+ -+struct pcap_handler_data { -+ char *buffer; -+ int len; -+}; -+ -+static void handler(u_char *data, const struct pcap_pkthdr *header, -+ const u_char *packet) -+{ -+ int len; -+ -+ struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; -+ -+ len = hdata->len < header->caplen ? hdata->len : header->caplen; -+ memcpy(hdata->buffer, packet, len); -+ hdata->len = len; -+} -+ -+int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) -+{ -+ struct pcap_handler_data hdata = ((struct pcap_handler_data) -+ { .buffer = buffer, -+ .len = len }); -+ int n; -+ -+ n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); -+ if(n < 0){ -+ printk("pcap_dispatch failed - %s\n", pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ else if(n == 0) -+ return(0); -+ return(hdata.len); -+} -+ -+struct net_user_info pcap_user_info = { -+ .init = pcap_user_init, -+ .open = pcap_open, -+ .close = NULL, -+ .remove = pcap_remove, -+ .set_mtu = NULL, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/pcap_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/pcap_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/pcap_user.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct pcap_data { -+ char *host_if; -+ int promisc; -+ int optimize; -+ char *filter; -+ void *compiled; -+ void *pcap; -+ void *dev; -+}; -+ -+extern struct net_user_info pcap_user_info; -+ -+extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/port.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/port.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/port.h 2005-05-03 22:28:14.234446248 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PORT_H__ -+#define __PORT_H__ -+ -+extern void *port_data(int port); -+extern int port_wait(void *data); -+extern void port_kern_close(void *d); -+extern int port_connection(int fd, int *socket_out, int *pid_out); -+extern int port_listen_fd(int port); -+extern void port_read(int fd, void *data); -+extern void port_kern_free(void *d); -+extern int port_rcv_fd(int fd); -+extern void port_remove_dev(void *d); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/port_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/port_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/port_kern.c 2005-05-03 22:28:14.235446096 +0300 -@@ -0,0 +1,303 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/list.h" -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "linux/irq.h" -+#include "linux/spinlock.h" -+#include "linux/errno.h" -+#include "asm/semaphore.h" -+#include "asm/errno.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+#include "port.h" -+#include "init.h" -+#include "os.h" -+ -+struct port_list { -+ struct list_head list; -+ int has_connection; -+ struct semaphore sem; -+ int port; -+ int fd; -+ spinlock_t lock; -+ struct list_head pending; -+ struct list_head connections; -+}; -+ -+struct port_dev { -+ struct port_list *port; -+ int helper_pid; -+ int telnetd_pid; -+}; -+ -+struct connection { -+ struct list_head list; -+ int fd; -+ int helper_pid; -+ int socket[2]; -+ int telnetd_pid; -+ struct port_list *port; -+}; -+ -+static void pipe_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct connection *conn = data; -+ int fd; -+ -+ fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); -+ if(fd < 0){ -+ if(fd == -EAGAIN) -+ return; -+ -+ printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", -+ -fd); -+ os_close_file(conn->fd); -+ } -+ -+ list_del(&conn->list); -+ -+ conn->fd = fd; -+ list_add(&conn->list, &conn->port->connections); -+ -+ up(&conn->port->sem); -+} -+ -+static int port_accept(struct port_list *port) -+{ -+ struct connection *conn; -+ int fd, socket[2], pid, ret = 0; -+ -+ fd = port_connection(port->fd, socket, &pid); -+ if(fd < 0){ -+ if(fd != -EAGAIN) -+ printk(KERN_ERR "port_accept : port_connection " -+ "returned %d\n", -fd); -+ goto out; -+ } -+ -+ conn = kmalloc(sizeof(*conn), GFP_ATOMIC); -+ if(conn == NULL){ -+ printk(KERN_ERR "port_accept : failed to allocate " -+ "connection\n"); -+ goto out_close; -+ } -+ *conn = ((struct connection) -+ { .list = LIST_HEAD_INIT(conn->list), -+ .fd = fd, -+ .socket = { socket[0], socket[1] }, -+ .telnetd_pid = pid, -+ .port = port }); -+ -+ if(um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "telnetd", conn)){ -+ printk(KERN_ERR "port_accept : failed to get IRQ for " -+ "telnetd\n"); -+ goto out_free; -+ } -+ -+ list_add(&conn->list, &port->pending); -+ return(1); -+ -+ out_free: -+ kfree(conn); -+ out_close: -+ os_close_file(fd); -+ if(pid != -1) -+ os_kill_process(pid, 1); -+ out: -+ return(ret); -+} -+ -+DECLARE_MUTEX(ports_sem); -+struct list_head ports = LIST_HEAD_INIT(ports); -+ -+void port_task_proc(void *unused) -+{ -+ struct port_list *port; -+ struct list_head *ele; -+ unsigned long flags; -+ -+ save_flags(flags); -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ if(!port->has_connection) -+ continue; -+ reactivate_fd(port->fd, ACCEPT_IRQ); -+ while(port_accept(port)) ; -+ port->has_connection = 0; -+ } -+ restore_flags(flags); -+} -+ -+struct tq_struct port_task = { -+ .routine = port_task_proc, -+ .data = NULL -+}; -+ -+static void port_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct port_list *port = data; -+ -+ port->has_connection = 1; -+ schedule_task(&port_task); -+} -+ -+void *port_data(int port_num) -+{ -+ struct list_head *ele; -+ struct port_list *port; -+ struct port_dev *dev = NULL; -+ int fd; -+ -+ down(&ports_sem); -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ if(port->port == port_num) goto found; -+ } -+ port = kmalloc(sizeof(struct port_list), GFP_KERNEL); -+ if(port == NULL){ -+ printk(KERN_ERR "Allocation of port list failed\n"); -+ goto out; -+ } -+ -+ fd = port_listen_fd(port_num); -+ if(fd < 0){ -+ printk(KERN_ERR "binding to port %d failed, errno = %d\n", -+ port_num, -fd); -+ goto out_free; -+ } -+ if(um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, "port", -+ port)){ -+ printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); -+ goto out_close; -+ } -+ -+ *port = ((struct port_list) -+ { .list = LIST_HEAD_INIT(port->list), -+ .has_connection = 0, -+ .sem = __SEMAPHORE_INITIALIZER(port->sem, -+ 0), -+ .lock = SPIN_LOCK_UNLOCKED, -+ .port = port_num, -+ .fd = fd, -+ .pending = LIST_HEAD_INIT(port->pending), -+ .connections = LIST_HEAD_INIT(port->connections) }); -+ list_add(&port->list, &ports); -+ -+ found: -+ dev = kmalloc(sizeof(struct port_dev), GFP_KERNEL); -+ if(dev == NULL){ -+ printk(KERN_ERR "Allocation of port device entry failed\n"); -+ goto out; -+ } -+ -+ *dev = ((struct port_dev) { .port = port, -+ .helper_pid = -1, -+ .telnetd_pid = -1 }); -+ goto out; -+ -+ out_free: -+ kfree(port); -+ out_close: -+ os_close_file(fd); -+ out: -+ up(&ports_sem); -+ return(dev); -+} -+ -+int port_wait(void *data) -+{ -+ struct port_dev *dev = data; -+ struct connection *conn; -+ struct port_list *port = dev->port; -+ int fd; -+ -+ while(1){ -+ if(down_interruptible(&port->sem)) -+ return(-ERESTARTSYS); -+ -+ spin_lock(&port->lock); -+ -+ conn = list_entry(port->connections.next, struct connection, -+ list); -+ list_del(&conn->list); -+ spin_unlock(&port->lock); -+ -+ os_shutdown_socket(conn->socket[0], 1, 1); -+ os_close_file(conn->socket[0]); -+ os_shutdown_socket(conn->socket[1], 1, 1); -+ os_close_file(conn->socket[1]); -+ -+ /* This is done here because freeing an IRQ can't be done -+ * within the IRQ handler. So, pipe_interrupt always ups -+ * the semaphore regardless of whether it got a successful -+ * connection. Then we loop here throwing out failed -+ * connections until a good one is found. -+ */ -+ free_irq(TELNETD_IRQ, conn); -+ -+ if(conn->fd >= 0) break; -+ os_close_file(conn->fd); -+ kfree(conn); -+ } -+ -+ fd = conn->fd; -+ dev->helper_pid = conn->helper_pid; -+ dev->telnetd_pid = conn->telnetd_pid; -+ kfree(conn); -+ -+ return(fd); -+} -+ -+void port_remove_dev(void *d) -+{ -+ struct port_dev *dev = d; -+ -+ if(dev->helper_pid != -1) -+ os_kill_process(dev->helper_pid, 0); -+ if(dev->telnetd_pid != -1) -+ os_kill_process(dev->telnetd_pid, 1); -+ dev->helper_pid = -1; -+ dev->telnetd_pid = -1; -+} -+ -+void port_kern_free(void *d) -+{ -+ struct port_dev *dev = d; -+ -+ port_remove_dev(dev); -+ kfree(dev); -+} -+ -+static void free_port(void) -+{ -+ struct list_head *ele; -+ struct port_list *port; -+ -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ free_irq_by_fd(port->fd); -+ os_close_file(port->fd); -+ } -+} -+ -+__uml_exitcall(free_port); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/port_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/port_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/port_user.c 2005-05-03 22:28:14.237445792 +0300 -@@ -0,0 +1,224 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stddef.h> -+#include <stdlib.h> -+#include <string.h> -+#include <errno.h> -+#include <unistd.h> -+#include <termios.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <netinet/in.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "chan_user.h" -+#include "port.h" -+#include "helper.h" -+#include "os.h" -+ -+struct port_chan { -+ int raw; -+ struct termios tt; -+ void *kernel_data; -+ char dev[sizeof("32768\0")]; -+}; -+ -+void *port_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct port_chan *data; -+ void *kern_data; -+ char *end; -+ int port; -+ -+ if(*str != ':'){ -+ printk("port_init : channel type 'port' must specify a " -+ "port number\n"); -+ return(NULL); -+ } -+ str++; -+ port = strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk("port_init : couldn't parse port '%s'\n", str); -+ return(NULL); -+ } -+ -+ kern_data = port_data(port); -+ if(kern_data == NULL) -+ return(NULL); -+ -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) -+ goto err; -+ -+ *data = ((struct port_chan) { .raw = opts->raw, -+ .kernel_data = kern_data }); -+ sprintf(data->dev, "%d", port); -+ -+ return(data); -+ err: -+ port_kern_free(kern_data); -+ return(NULL); -+} -+ -+void port_free(void *d) -+{ -+ struct port_chan *data = d; -+ -+ port_kern_free(data->kernel_data); -+ kfree(data); -+} -+ -+int port_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct port_chan *data = d; -+ int fd, err; -+ -+ fd = port_wait(data->kernel_data); -+ if((fd >= 0) && data->raw){ -+ CATCH_EINTR(err = tcgetattr(fd, &data->tt)); -+ if(err) -+ return(err); -+ -+ err = raw(fd); -+ if(err) -+ return(err); -+ } -+ *dev_out = data->dev; -+ return(fd); -+} -+ -+void port_close(int fd, void *d) -+{ -+ struct port_chan *data = d; -+ -+ port_remove_dev(data->kernel_data); -+ os_close_file(fd); -+} -+ -+int port_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct port_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops port_ops = { -+ .type = "port", -+ .init = port_init, -+ .open = port_open, -+ .close = port_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = port_console_write, -+ .window_size = generic_window_size, -+ .free = port_free, -+ .winch = 1, -+}; -+ -+int port_listen_fd(int port) -+{ -+ struct sockaddr_in addr; -+ int fd, err, arg; -+ -+ fd = socket(PF_INET, SOCK_STREAM, 0); -+ if(fd == -1) -+ return(-errno); -+ -+ arg = 1; -+ if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &arg, sizeof(arg)) < 0){ -+ err = -errno; -+ goto out; -+ } -+ -+ addr.sin_family = AF_INET; -+ addr.sin_port = htons(port); -+ addr.sin_addr.s_addr = htonl(INADDR_ANY); -+ if(bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0){ -+ err = -errno; -+ goto out; -+ } -+ -+ if(listen(fd, 1) < 0){ -+ err = -errno; -+ goto out; -+ } -+ -+ err = os_set_fd_block(fd, 0); -+ if(err < 0) -+ goto out; -+ -+ return(fd); -+ out: -+ os_close_file(fd); -+ return(err); -+} -+ -+struct port_pre_exec_data { -+ int sock_fd; -+ int pipe_fd; -+}; -+ -+void port_pre_exec(void *arg) -+{ -+ struct port_pre_exec_data *data = arg; -+ -+ dup2(data->sock_fd, 0); -+ dup2(data->sock_fd, 1); -+ dup2(data->sock_fd, 2); -+ os_close_file(data->sock_fd); -+ dup2(data->pipe_fd, 3); -+ os_shutdown_socket(3, 1, 0); -+ os_close_file(data->pipe_fd); -+} -+ -+int port_connection(int fd, int *socket, int *pid_out) -+{ -+ int new, err; -+ char *argv[] = { "/usr/sbin/in.telnetd", "-L", -+ "/usr/lib/uml/port-helper", NULL }; -+ struct port_pre_exec_data data; -+ -+ new = os_accept_connection(fd); -+ if(new < 0) -+ return(new); -+ -+ err = os_pipe(socket, 0, 0); -+ if(err < 0) -+ goto out_close; -+ -+ data = ((struct port_pre_exec_data) -+ { .sock_fd = new, -+ .pipe_fd = socket[1] }); -+ -+ err = run_helper(port_pre_exec, &data, argv, NULL); -+ if(err < 0) -+ goto out_shutdown; -+ -+ *pid_out = err; -+ return(new); -+ -+ out_shutdown: -+ os_shutdown_socket(socket[0], 1, 1); -+ os_close_file(socket[0]); -+ os_shutdown_socket(socket[1], 1, 1); -+ os_close_file(socket[1]); -+ out_close: -+ os_close_file(new); -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/pty.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/pty.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/pty.c 2005-05-03 22:28:14.238445640 +0300 -@@ -0,0 +1,159 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <termios.h> -+#include "chan_user.h" -+#include "user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "os.h" -+ -+struct pty_chan { -+ void (*announce)(char *dev_name, int dev); -+ int dev; -+ int raw; -+ struct termios tt; -+ char dev_name[sizeof("/dev/pts/0123456\0")]; -+}; -+ -+void *pty_chan_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct pty_chan *data; -+ -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) return(NULL); -+ *data = ((struct pty_chan) { .announce = opts->announce, -+ .dev = device, -+ .raw = opts->raw }); -+ return(data); -+} -+ -+int pts_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct pty_chan *data = d; -+ char *dev; -+ int fd, err; -+ -+ fd = get_pty(); -+ if(fd < 0){ -+ printk("open_pts : Failed to open pts\n"); -+ return(-errno); -+ } -+ if(data->raw){ -+ CATCH_EINTR(err = tcgetattr(fd, &data->tt)); -+ if(err) -+ return(err); -+ -+ err = raw(fd); -+ if(err) -+ return(err); -+ } -+ -+ dev = ptsname(fd); -+ sprintf(data->dev_name, "%s", dev); -+ *dev_out = data->dev_name; -+ if(data->announce) (*data->announce)(dev, data->dev); -+ return(fd); -+} -+ -+int getmaster(char *line) -+{ -+ char *pty, *bank, *cp; -+ int master, err; -+ -+ pty = &line[strlen("/dev/ptyp")]; -+ for (bank = "pqrs"; *bank; bank++) { -+ line[strlen("/dev/pty")] = *bank; -+ *pty = '0'; -+ if (os_stat_file(line, NULL) < 0) -+ break; -+ for (cp = "0123456789abcdef"; *cp; cp++) { -+ *pty = *cp; -+ master = os_open_file(line, of_rdwr(OPENFLAGS()), 0); -+ if (master >= 0) { -+ char *tp = &line[strlen("/dev/")]; -+ -+ /* verify slave side is usable */ -+ *tp = 't'; -+ err = os_access(line, OS_ACC_RW_OK); -+ *tp = 'p'; -+ if(err == 0) return(master); -+ (void) os_close_file(master); -+ } -+ } -+ } -+ return(-1); -+} -+ -+int pty_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct pty_chan *data = d; -+ int fd, err; -+ char dev[sizeof("/dev/ptyxx\0")] = "/dev/ptyxx"; -+ -+ fd = getmaster(dev); -+ if(fd < 0) -+ return(-errno); -+ -+ if(data->raw){ -+ err = raw(fd); -+ if(err) -+ return(err); -+ } -+ -+ if(data->announce) (*data->announce)(dev, data->dev); -+ -+ sprintf(data->dev_name, "%s", dev); -+ *dev_out = data->dev_name; -+ return(fd); -+} -+ -+int pty_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct pty_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops pty_ops = { -+ .type = "pty", -+ .init = pty_chan_init, -+ .open = pty_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = pty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+struct chan_ops pts_ops = { -+ .type = "pts", -+ .init = pty_chan_init, -+ .open = pts_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = pty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/slip.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/slip.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/slip.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,39 @@ -+#ifndef __UM_SLIP_H -+#define __UM_SLIP_H -+ -+#define BUF_SIZE 1500 -+ /* two bytes each for a (pathological) max packet of escaped chars + * -+ * terminating END char + initial END char */ -+#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) -+ -+struct slip_data { -+ void *dev; -+ char name[sizeof("slnnnnn\0")]; -+ char *addr; -+ char *gate_addr; -+ int slave; -+ char ibuf[ENC_BUF_SIZE]; -+ char obuf[ENC_BUF_SIZE]; -+ int more; /* more data: do not read fd until ibuf has been drained */ -+ int pos; -+ int esc; -+}; -+ -+extern struct net_user_info slip_user_info; -+ -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); -+extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/slip_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/slip_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/slip_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,109 @@ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/stddef.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/if_arp.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "kern.h" -+#include "slip.h" -+ -+struct slip_init { -+ char *gate_addr; -+}; -+ -+void slip_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *private; -+ struct slip_data *spri; -+ struct slip_init *init = data; -+ -+ private = dev->priv; -+ spri = (struct slip_data *) private->user; -+ *spri = ((struct slip_data) -+ { .name = { '\0' }, -+ .addr = NULL, -+ .gate_addr = init->gate_addr, -+ .slave = -1, -+ .ibuf = { '\0' }, -+ .obuf = { '\0' }, -+ .pos = 0, -+ .esc = 0, -+ .dev = dev }); -+ -+ dev->init = NULL; -+ dev->hard_header_len = 0; -+ dev->addr_len = 4; -+ dev->type = ARPHRD_ETHER; -+ dev->tx_queue_len = 256; -+ dev->flags = IFF_NOARP; -+ printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); -+} -+ -+static unsigned short slip_protocol(struct sk_buff *skbuff) -+{ -+ return(htons(ETH_P_IP)); -+} -+ -+static int slip_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slip_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, -+ (struct slip_data *) &lp->user)); -+} -+ -+static int slip_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slip_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct slip_data *) &lp->user)); -+} -+ -+struct net_kern_info slip_kern_info = { -+ .init = slip_init, -+ .protocol = slip_protocol, -+ .read = slip_read, -+ .write = slip_write, -+}; -+ -+static int slip_setup(char *str, char **mac_out, void *data) -+{ -+ struct slip_init *init = data; -+ -+ *init = ((struct slip_init) -+ { .gate_addr = NULL }); -+ -+ if(str[0] != '\0') -+ init->gate_addr = str; -+ return(1); -+} -+ -+static struct transport slip_transport = { -+ .list = LIST_HEAD_INIT(slip_transport.list), -+ .name = "slip", -+ .setup = slip_setup, -+ .user = &slip_user_info, -+ .kern = &slip_kern_info, -+ .private_size = sizeof(struct slip_data), -+ .setup_size = sizeof(struct slip_init), -+}; -+ -+static int register_slip(void) -+{ -+ register_transport(&slip_transport); -+ return(1); -+} -+ -+__initcall(register_slip); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/slip_proto.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/slip_proto.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/slip_proto.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,93 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SLIP_PROTO_H__ -+#define __UM_SLIP_PROTO_H__ -+ -+/* SLIP protocol characters. */ -+#define SLIP_END 0300 /* indicates end of frame */ -+#define SLIP_ESC 0333 /* indicates byte stuffing */ -+#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ -+#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ -+ -+static inline int slip_unesc(unsigned char c,char *buf,int *pos, int *esc) -+{ -+ int ret; -+ -+ switch(c){ -+ case SLIP_END: -+ *esc = 0; -+ ret=*pos; -+ *pos=0; -+ return(ret); -+ case SLIP_ESC: -+ *esc = 1; -+ return(0); -+ case SLIP_ESC_ESC: -+ if(*esc){ -+ *esc = 0; -+ c = SLIP_ESC; -+ } -+ break; -+ case SLIP_ESC_END: -+ if(*esc){ -+ *esc = 0; -+ c = SLIP_END; -+ } -+ break; -+ } -+ buf[(*pos)++] = c; -+ return(0); -+} -+ -+static inline int slip_esc(unsigned char *s, unsigned char *d, int len) -+{ -+ unsigned char *ptr = d; -+ unsigned char c; -+ -+ /* -+ * Send an initial END character to flush out any -+ * data that may have accumulated in the receiver -+ * due to line noise. -+ */ -+ -+ *ptr++ = SLIP_END; -+ -+ /* -+ * For each byte in the packet, send the appropriate -+ * character sequence, according to the SLIP protocol. -+ */ -+ -+ while (len-- > 0) { -+ switch(c = *s++) { -+ case SLIP_END: -+ *ptr++ = SLIP_ESC; -+ *ptr++ = SLIP_ESC_END; -+ break; -+ case SLIP_ESC: -+ *ptr++ = SLIP_ESC; -+ *ptr++ = SLIP_ESC_ESC; -+ break; -+ default: -+ *ptr++ = c; -+ break; -+ } -+ } -+ *ptr++ = SLIP_END; -+ return (ptr - d); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/slip_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/slip_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/slip_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,276 @@ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <stddef.h> -+#include <sched.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/termios.h> -+#include <sys/wait.h> -+#include <sys/signal.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "net_user.h" -+#include "slip.h" -+#include "slip_proto.h" -+#include "helper.h" -+#include "os.h" -+ -+void slip_user_init(void *data, void *dev) -+{ -+ struct slip_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+static int set_up_tty(int fd) -+{ -+ int i; -+ struct termios tios; -+ -+ if (tcgetattr(fd, &tios) < 0) { -+ printk("could not get initial terminal attributes\n"); -+ return(-1); -+ } -+ -+ tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; -+ tios.c_iflag = IGNBRK | IGNPAR; -+ tios.c_oflag = 0; -+ tios.c_lflag = 0; -+ for (i = 0; i < NCCS; i++) -+ tios.c_cc[i] = 0; -+ tios.c_cc[VMIN] = 1; -+ tios.c_cc[VTIME] = 0; -+ -+ cfsetospeed(&tios, B38400); -+ cfsetispeed(&tios, B38400); -+ -+ if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { -+ printk("failed to set terminal attributes\n"); -+ return(-1); -+ } -+ return(0); -+} -+ -+struct slip_pre_exec_data { -+ int stdin; -+ int stdout; -+ int close_me; -+}; -+ -+static void slip_pre_exec(void *arg) -+{ -+ struct slip_pre_exec_data *data = arg; -+ -+ if(data->stdin >= 0) dup2(data->stdin, 0); -+ dup2(data->stdout, 1); -+ if(data->close_me >= 0) os_close_file(data->close_me); -+} -+ -+static int slip_tramp(char **argv, int fd) -+{ -+ struct slip_pre_exec_data pe_data; -+ char *output; -+ int status, pid, fds[2], err, output_len; -+ -+ err = os_pipe(fds, 1, 0); -+ if(err < 0){ -+ printk("slip_tramp : pipe failed, err = %d\n", -err); -+ return(err); -+ } -+ -+ err = 0; -+ pe_data.stdin = fd; -+ pe_data.stdout = fds[1]; -+ pe_data.close_me = fds[0]; -+ pid = run_helper(slip_pre_exec, &pe_data, argv, NULL); -+ -+ if(pid < 0) err = pid; -+ else { -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ if(output == NULL) -+ printk("slip_tramp : failed to allocate output " -+ "buffer\n"); -+ -+ os_close_file(fds[1]); -+ read_output(fds[0], output, output_len); -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+ CATCH_EINTR(err = waitpid(pid, &status, 0)); -+ if(err < 0) -+ err = errno; -+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ -+ printk("'%s' didn't exit with status 0\n", argv[0]); -+ err = -EINVAL; -+ } -+ } -+ return(err); -+} -+ -+static int slip_open(void *data) -+{ -+ struct slip_data *pri = data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, -+ NULL }; -+ int sfd, mfd, err; -+ -+ mfd = get_pty(); -+ if(mfd < 0){ -+ printk("umn : Failed to open pty, err = %d\n", -mfd); -+ return(mfd); -+ } -+ sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); -+ if(sfd < 0){ -+ printk("Couldn't open tty for slip line, err = %d\n", -sfd); -+ return(sfd); -+ } -+ if(set_up_tty(sfd)) return(-1); -+ pri->slave = sfd; -+ pri->pos = 0; -+ pri->esc = 0; -+ if(pri->gate_addr != NULL){ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ strcpy(gate_buf, pri->gate_addr); -+ -+ err = slip_tramp(argv, sfd); -+ -+ if(err < 0){ -+ printk("slip_tramp failed - err = %d\n", -err); -+ return(err); -+ } -+ err = os_get_ifname(pri->slave, pri->name); -+ if(err < 0){ -+ printk("get_ifname failed, err = %d\n", -err); -+ return(err); -+ } -+ iter_addresses(pri->dev, open_addr, pri->name); -+ } -+ else { -+ err = os_set_slip(sfd); -+ if(err < 0){ -+ printk("Failed to set slip discipline encapsulation - " -+ "err = %d\n", -err); -+ return(err); -+ } -+ } -+ return(mfd); -+} -+ -+static void slip_close(int fd, void *data) -+{ -+ struct slip_data *pri = data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, -+ NULL }; -+ int err; -+ -+ if(pri->gate_addr != NULL) -+ iter_addresses(pri->dev, close_addr, pri->name); -+ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ -+ err = slip_tramp(argv, -1); -+ -+ if(err != 0) -+ printk("slip_tramp failed - errno = %d\n", -err); -+ os_close_file(fd); -+ os_close_file(pri->slave); -+ pri->slave = -1; -+} -+ -+int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) -+{ -+ int i, n, size, start; -+ -+ if(pri->more>0) { -+ i = 0; -+ while(i < pri->more) { -+ size = slip_unesc(pri->ibuf[i++], -+ pri->ibuf, &pri->pos, &pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); -+ pri->more=pri->more-i; -+ return(size); -+ } -+ } -+ pri->more=0; -+ } -+ -+ n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); -+ if(n <= 0) return(n); -+ -+ start = pri->pos; -+ for(i = 0; i < n; i++){ -+ size = slip_unesc(pri->ibuf[start + i], -+ pri->ibuf, &pri->pos, &pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); -+ pri->more=n-(i+1); -+ return(size); -+ } -+ } -+ return(0); -+} -+ -+int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) -+{ -+ int actual, n; -+ -+ actual = slip_esc(buf, pri->obuf, len); -+ n = net_write(fd, pri->obuf, actual); -+ if(n < 0) return(n); -+ else return(len); -+} -+ -+static int slip_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+static void slip_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct slip_data *pri = data; -+ -+ if(pri->slave < 0) return; -+ open_addr(addr, netmask, pri->name); -+} -+ -+static void slip_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct slip_data *pri = data; -+ -+ if(pri->slave < 0) return; -+ close_addr(addr, netmask, pri->name); -+} -+ -+struct net_user_info slip_user_info = { -+ .init = slip_user_init, -+ .open = slip_open, -+ .close = slip_close, -+ .remove = NULL, -+ .set_mtu = slip_set_mtu, -+ .add_address = slip_add_addr, -+ .delete_address = slip_del_addr, -+ .max_packet = BUF_SIZE -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/slirp.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/slirp.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/slirp.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,51 @@ -+#ifndef __UM_SLIRP_H -+#define __UM_SLIRP_H -+ -+#define BUF_SIZE 1500 -+ /* two bytes each for a (pathological) max packet of escaped chars + * -+ * terminating END char + initial END char */ -+#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) -+ -+#define SLIRP_MAX_ARGS 100 -+/* -+ * XXX this next definition is here because I don't understand why this -+ * initializer doesn't work in slirp_kern.c: -+ * -+ * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] }, -+ * -+ * or why I can't typecast like this: -+ * -+ * argv : (char* [SLIRP_MAX_ARGS])(init->argv), -+ */ -+struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; }; -+ -+struct slirp_data { -+ void *dev; -+ struct arg_list_dummy_wrapper argw; -+ int pid; -+ int slave; -+ char ibuf[ENC_BUF_SIZE]; -+ char obuf[ENC_BUF_SIZE]; -+ int more; /* more data: do not read fd until ibuf has been drained */ -+ int pos; -+ int esc; -+}; -+ -+extern struct net_user_info slirp_user_info; -+ -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); -+extern int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/slirp_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/slirp_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/slirp_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,132 @@ -+#include "linux/kernel.h" -+#include "linux/stddef.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/if_arp.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "kern.h" -+#include "slirp.h" -+ -+struct slirp_init { -+ struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ -+}; -+ -+void slirp_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *private; -+ struct slirp_data *spri; -+ struct slirp_init *init = data; -+ int i; -+ -+ private = dev->priv; -+ spri = (struct slirp_data *) private->user; -+ *spri = ((struct slirp_data) -+ { .argw = init->argw, -+ .pid = -1, -+ .slave = -1, -+ .ibuf = { '\0' }, -+ .obuf = { '\0' }, -+ .pos = 0, -+ .esc = 0, -+ .dev = dev }); -+ -+ dev->init = NULL; -+ dev->hard_header_len = 0; -+ dev->addr_len = 4; -+ dev->type = ARPHRD_ETHER; -+ dev->tx_queue_len = 256; -+ dev->flags = IFF_NOARP; -+ printk("SLIRP backend - command line:"); -+ for(i=0;spri->argw.argv[i]!=NULL;i++) { -+ printk(" '%s'",spri->argw.argv[i]); -+ } -+ printk("\n"); -+} -+ -+static unsigned short slirp_protocol(struct sk_buff *skbuff) -+{ -+ return(htons(ETH_P_IP)); -+} -+ -+static int slirp_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slirp_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, -+ (struct slirp_data *) &lp->user)); -+} -+ -+static int slirp_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slirp_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct slirp_data *) &lp->user)); -+} -+ -+struct net_kern_info slirp_kern_info = { -+ .init = slirp_init, -+ .protocol = slirp_protocol, -+ .read = slirp_read, -+ .write = slirp_write, -+}; -+ -+static int slirp_setup(char *str, char **mac_out, void *data) -+{ -+ struct slirp_init *init = data; -+ int i=0; -+ -+ *init = ((struct slirp_init) -+ { argw : { { "slirp", NULL } } }); -+ -+ str = split_if_spec(str, mac_out, NULL); -+ -+ if(str == NULL) { /* no command line given after MAC addr */ -+ return(1); -+ } -+ -+ do { -+ if(i>=SLIRP_MAX_ARGS-1) { -+ printk("slirp_setup: truncating slirp arguments\n"); -+ break; -+ } -+ init->argw.argv[i++] = str; -+ while(*str && *str!=',') { -+ if(*str=='_') *str=' '; -+ str++; -+ } -+ if(*str!=',') -+ break; -+ *str++='\0'; -+ } while(1); -+ init->argw.argv[i]=NULL; -+ return(1); -+} -+ -+static struct transport slirp_transport = { -+ .list = LIST_HEAD_INIT(slirp_transport.list), -+ .name = "slirp", -+ .setup = slirp_setup, -+ .user = &slirp_user_info, -+ .kern = &slirp_kern_info, -+ .private_size = sizeof(struct slirp_data), -+ .setup_size = sizeof(struct slirp_init), -+}; -+ -+static int register_slirp(void) -+{ -+ register_transport(&slirp_transport); -+ return(1); -+} -+ -+__initcall(register_slirp); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/slirp_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/slirp_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/slirp_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,201 @@ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <stddef.h> -+#include <sched.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/wait.h> -+#include <sys/signal.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "net_user.h" -+#include "slirp.h" -+#include "slip_proto.h" -+#include "helper.h" -+#include "os.h" -+ -+void slirp_user_init(void *data, void *dev) -+{ -+ struct slirp_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+struct slirp_pre_exec_data { -+ int stdin; -+ int stdout; -+}; -+ -+static void slirp_pre_exec(void *arg) -+{ -+ struct slirp_pre_exec_data *data = arg; -+ -+ if(data->stdin != -1) dup2(data->stdin, 0); -+ if(data->stdout != -1) dup2(data->stdout, 1); -+} -+ -+static int slirp_tramp(char **argv, int fd) -+{ -+ struct slirp_pre_exec_data pe_data; -+ int pid; -+ -+ pe_data.stdin = fd; -+ pe_data.stdout = fd; -+ pid = run_helper(slirp_pre_exec, &pe_data, argv, NULL); -+ -+ return(pid); -+} -+ -+/* XXX This is just a trivial wrapper around os_pipe */ -+static int slirp_datachan(int *mfd, int *sfd) -+{ -+ int fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err < 0){ -+ printk("slirp_datachan: Failed to open pipe, err = %d\n", -err); -+ return(err); -+ } -+ -+ *mfd = fds[0]; -+ *sfd = fds[1]; -+ return(0); -+} -+ -+static int slirp_open(void *data) -+{ -+ struct slirp_data *pri = data; -+ int sfd, mfd, pid, err; -+ -+ err = slirp_datachan(&mfd, &sfd); -+ if(err) -+ return(err); -+ -+ pid = slirp_tramp(pri->argw.argv, sfd); -+ -+ if(pid < 0){ -+ printk("slirp_tramp failed - errno = %d\n", -pid); -+ os_close_file(sfd); -+ os_close_file(mfd); -+ return(pid); -+ } -+ -+ pri->slave = sfd; -+ pri->pos = 0; -+ pri->esc = 0; -+ -+ pri->pid = pid; -+ -+ return(mfd); -+} -+ -+static void slirp_close(int fd, void *data) -+{ -+ struct slirp_data *pri = data; -+ int status,err; -+ -+ os_close_file(fd); -+ os_close_file(pri->slave); -+ -+ pri->slave = -1; -+ -+ if(pri->pid<1) { -+ printk("slirp_close: no child process to shut down\n"); -+ return; -+ } -+ -+#if 0 -+ if(kill(pri->pid, SIGHUP)<0) { -+ printk("slirp_close: sending hangup to %d failed (%d)\n", -+ pri->pid, errno); -+ } -+#endif -+ -+ CATCH_EINTR(err = waitpid(pri->pid, &status, WNOHANG)); -+ if(err < 0) { -+ printk("slirp_close: waitpid returned %d\n", errno); -+ return; -+ } -+ -+ if(err == 0) { -+ printk("slirp_close: process %d has not exited\n"); -+ return; -+ } -+ -+ pri->pid = -1; -+} -+ -+int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) -+{ -+ int i, n, size, start; -+ -+ if(pri->more>0) { -+ i = 0; -+ while(i < pri->more) { -+ size = slip_unesc(pri->ibuf[i++], -+ pri->ibuf,&pri->pos,&pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); -+ pri->more=pri->more-i; -+ return(size); -+ } -+ } -+ pri->more=0; -+ } -+ -+ n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); -+ if(n <= 0) return(n); -+ -+ start = pri->pos; -+ for(i = 0; i < n; i++){ -+ size = slip_unesc(pri->ibuf[start + i], -+ pri->ibuf,&pri->pos,&pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); -+ pri->more=n-(i+1); -+ return(size); -+ } -+ } -+ return(0); -+} -+ -+int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) -+{ -+ int actual, n; -+ -+ actual = slip_esc(buf, pri->obuf, len); -+ n = net_write(fd, pri->obuf, actual); -+ if(n < 0) return(n); -+ else return(len); -+} -+ -+static int slirp_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info slirp_user_info = { -+ .init = slirp_user_init, -+ .open = slirp_open, -+ .close = slirp_close, -+ .remove = NULL, -+ .set_mtu = slirp_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = BUF_SIZE -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/ssl.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/ssl.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/ssl.c 2005-05-03 22:28:14.247444272 +0300 -@@ -0,0 +1,300 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/fs.h" -+#include "linux/tty.h" -+#include "linux/tty_driver.h" -+#include "linux/major.h" -+#include "linux/mm.h" -+#include "linux/init.h" -+#include "linux/console.h" -+#include "asm/termbits.h" -+#include "asm/irq.h" -+#include "line.h" -+#include "ssl.h" -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "mconsole_kern.h" -+#include "2_5compat.h" -+ -+static int ssl_version = 1; -+ -+/* Referenced only by tty_driver below - presumably it's locked correctly -+ * by the tty driver. -+ */ -+static int ssl_refcount = 0; -+ -+static struct tty_driver ssl_driver; -+ -+#define NR_PORTS 64 -+ -+void ssl_announce(char *dev_name, int dev) -+{ -+ printk(KERN_INFO "Serial line %d assigned device '%s'\n", dev, -+ dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = ssl_announce, -+ .xterm_title = "Serial Line #%d", -+ .raw = 1, -+ .tramp_stack = 0, -+ .in_kernel = 1, -+}; -+ -+static int ssl_config(char *str); -+static int ssl_get_config(char *dev, char *str, int size, char **error_out); -+static int ssl_remove(char *str); -+ -+static struct line_driver driver = { -+ .name = "UML serial line", -+ .devfs_name = "tts/%d", -+ .major = TTY_MAJOR, -+ .minor_start = 64, -+ .type = TTY_DRIVER_TYPE_SERIAL, -+ .subtype = 0, -+ .read_irq = SSL_IRQ, -+ .read_irq_name = "ssl", -+ .write_irq = SSL_WRITE_IRQ, -+ .write_irq_name = "ssl-write", -+ .symlink_from = "serial", -+ .symlink_to = "tts", -+ .mc = { -+ .name = "ssl", -+ .config = ssl_config, -+ .get_config = ssl_get_config, -+ .remove = ssl_remove, -+ }, -+}; -+ -+/* The array is initialized by line_init, which is an initcall. The -+ * individual elements are protected by individual semaphores. -+ */ -+static struct line serial_lines[NR_PORTS] = -+ { [0 ... NR_PORTS - 1] = LINE_INIT(CONFIG_SSL_CHAN, &driver) }; -+ -+static struct lines lines = LINES_INIT(NR_PORTS); -+ -+static int ssl_config(char *str) -+{ -+ return(line_config(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), str)); -+} -+ -+static int ssl_get_config(char *dev, char *str, int size, char **error_out) -+{ -+ return(line_get_config(dev, serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), -+ str, size, error_out)); -+} -+ -+static int ssl_remove(char *str) -+{ -+ return(line_remove(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), str)); -+} -+ -+int ssl_open(struct tty_struct *tty, struct file *filp) -+{ -+ return(line_open(serial_lines, tty, &opts)); -+} -+ -+static void ssl_close(struct tty_struct *tty, struct file * filp) -+{ -+ line_close(serial_lines, tty); -+} -+ -+static int ssl_write(struct tty_struct * tty, int from_user, -+ const unsigned char *buf, int count) -+{ -+ return(line_write(serial_lines, tty, from_user, buf, count)); -+} -+ -+static void ssl_put_char(struct tty_struct *tty, unsigned char ch) -+{ -+ line_write(serial_lines, tty, 0, &ch, sizeof(ch)); -+} -+ -+static void ssl_flush_chars(struct tty_struct *tty) -+{ -+ return; -+} -+ -+static int ssl_chars_in_buffer(struct tty_struct *tty) -+{ -+ return(0); -+} -+ -+static void ssl_flush_buffer(struct tty_struct *tty) -+{ -+ return; -+} -+ -+static int ssl_ioctl(struct tty_struct *tty, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ int ret; -+ -+ ret = 0; -+ switch(cmd){ -+ case TCGETS: -+ case TCSETS: -+ case TCFLSH: -+ case TCSETSF: -+ case TCSETSW: -+ case TCGETA: -+ case TIOCMGET: -+ case TCSBRK: -+ case TCSBRKP: -+ case TIOCMSET: -+ ret = -ENOIOCTLCMD; -+ break; -+ default: -+ printk(KERN_ERR -+ "Unimplemented ioctl in ssl_ioctl : 0x%x\n", cmd); -+ ret = -ENOIOCTLCMD; -+ break; -+ } -+ return(ret); -+} -+ -+static void ssl_throttle(struct tty_struct * tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_throttle\n"); -+} -+ -+static void ssl_unthrottle(struct tty_struct * tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_unthrottle\n"); -+} -+ -+static void ssl_set_termios(struct tty_struct *tty, -+ struct termios *old_termios) -+{ -+} -+ -+static void ssl_stop(struct tty_struct *tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_stop\n"); -+} -+ -+static void ssl_start(struct tty_struct *tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_start\n"); -+} -+ -+void ssl_hangup(struct tty_struct *tty) -+{ -+} -+ -+static struct tty_driver ssl_driver = { -+ .refcount = &ssl_refcount, -+ .open = ssl_open, -+ .close = ssl_close, -+ .write = ssl_write, -+ .put_char = ssl_put_char, -+ .flush_chars = ssl_flush_chars, -+ .chars_in_buffer = ssl_chars_in_buffer, -+ .flush_buffer = ssl_flush_buffer, -+ .ioctl = ssl_ioctl, -+ .throttle = ssl_throttle, -+ .unthrottle = ssl_unthrottle, -+ .set_termios = ssl_set_termios, -+ .stop = ssl_stop, -+ .start = ssl_start, -+ .hangup = ssl_hangup -+}; -+ -+/* Changed by ssl_init and referenced by ssl_exit, which are both serialized -+ * by being an initcall and exitcall, respectively. -+ */ -+static int ssl_init_done = 0; -+ -+static void ssl_console_write(struct console *c, const char *string, -+ unsigned len) -+{ -+ struct line *line = &serial_lines[c->index]; -+ if(ssl_init_done) -+ down(&line->sem); -+ console_write_chan(&line->chan_list, string, len); -+ if(ssl_init_done) -+ up(&line->sem); -+} -+ -+static kdev_t ssl_console_device(struct console *c) -+{ -+ return mk_kdev(TTY_MAJOR, c->index); -+} -+ -+static int ssl_console_setup(struct console *co, char *options) -+{ -+ return(0); -+} -+ -+static struct console ssl_cons = { -+ name: "ttyS", -+ write: ssl_console_write, -+ device: ssl_console_device, -+ setup: ssl_console_setup, -+ flags: CON_PRINTBUFFER, -+ index: -1, -+}; -+ -+int ssl_init(void) -+{ -+ char *new_title; -+ -+ printk(KERN_INFO "Initializing software serial port version %d\n", -+ ssl_version); -+ -+ line_register_devfs(&lines, &driver, &ssl_driver, serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0])); -+ -+ lines_init(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0])); -+ -+ new_title = add_xterm_umid(opts.xterm_title); -+ if(new_title != NULL) opts.xterm_title = new_title; -+ -+ register_console(&ssl_cons); -+ ssl_init_done = 1; -+ return(0); -+} -+ -+__initcall(ssl_init); -+ -+static int ssl_chan_setup(char *str) -+{ -+ return(line_setup(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), -+ str, 1)); -+} -+ -+__setup("ssl", ssl_chan_setup); -+__channel_help(ssl_chan_setup, "ssl"); -+ -+static void ssl_exit(void) -+{ -+ if(!ssl_init_done) return; -+ close_lines(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0])); -+} -+ -+__uml_exitcall(ssl_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/ssl.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/ssl.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/ssl.h 2005-05-03 22:28:14.248444120 +0300 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SSL_H__ -+#define __SSL_H__ -+ -+extern int ssl_read(int fd, int line); -+extern void ssl_receive_char(int line, char ch); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/stdio_console.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/stdio_console.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/stdio_console.c 2005-05-03 22:28:14.249443968 +0300 -@@ -0,0 +1,258 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/posix_types.h" -+#include "linux/tty.h" -+#include "linux/tty_flip.h" -+#include "linux/types.h" -+#include "linux/major.h" -+#include "linux/kdev_t.h" -+#include "linux/console.h" -+#include "linux/string.h" -+#include "linux/sched.h" -+#include "linux/list.h" -+#include "linux/init.h" -+#include "linux/interrupt.h" -+#include "linux/slab.h" -+#include "asm/current.h" -+#include "asm/softirq.h" -+#include "asm/hardirq.h" -+#include "asm/irq.h" -+#include "stdio_console.h" -+#include "line.h" -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "2_5compat.h" -+ -+#define MAX_TTYS (8) -+ -+/* Referenced only by tty_driver below - presumably it's locked correctly -+ * by the tty driver. -+ */ -+ -+static struct tty_driver console_driver; -+ -+static int console_refcount = 0; -+ -+static struct chan_ops init_console_ops = { -+ .type = "you shouldn't see this", -+ .init = NULL, -+ .open = NULL, -+ .close = NULL, -+ .read = NULL, -+ .write = NULL, -+ .console_write = generic_write, -+ .window_size = NULL, -+ .free = NULL, -+ .winch = 0, -+}; -+ -+static struct chan init_console_chan = { -+ .list = { }, -+ .primary = 1, -+ .input = 0, -+ .output = 1, -+ .opened = 1, -+ .fd = 1, -+ .pri = INIT_STATIC, -+ .ops = &init_console_ops, -+ .data = NULL -+}; -+ -+void stdio_announce(char *dev_name, int dev) -+{ -+ printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev, -+ dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = stdio_announce, -+ .xterm_title = "Virtual Console #%d", -+ .raw = 1, -+ .tramp_stack = 0, -+ .in_kernel = 1, -+}; -+ -+static int con_config(char *str); -+static int con_get_config(char *dev, char *str, int size, char **error_out); -+static int con_remove(char *str); -+ -+static struct line_driver driver = { -+ .name = "UML console", -+ .devfs_name = "vc/%d", -+ .major = TTY_MAJOR, -+ .minor_start = 0, -+ .type = TTY_DRIVER_TYPE_CONSOLE, -+ .subtype = SYSTEM_TYPE_CONSOLE, -+ .read_irq = CONSOLE_IRQ, -+ .read_irq_name = "console", -+ .write_irq = CONSOLE_WRITE_IRQ, -+ .write_irq_name = "console-write", -+ .symlink_from = "ttys", -+ .symlink_to = "vc", -+ .mc = { -+ .name = "con", -+ .config = con_config, -+ .get_config = con_get_config, -+ .remove = con_remove, -+ }, -+}; -+ -+static struct lines console_lines = LINES_INIT(MAX_TTYS); -+ -+/* The array is initialized by line_init, which is an initcall. The -+ * individual elements are protected by individual semaphores. -+ */ -+struct line vts[MAX_TTYS] = { LINE_INIT(CONFIG_CON_ZERO_CHAN, &driver), -+ [ 1 ... MAX_TTYS - 1 ] = -+ LINE_INIT(CONFIG_CON_CHAN, &driver) }; -+ -+static int con_config(char *str) -+{ -+ return(line_config(vts, sizeof(vts)/sizeof(vts[0]), str)); -+} -+ -+static int con_get_config(char *dev, char *str, int size, char **error_out) -+{ -+ return(line_get_config(dev, vts, sizeof(vts)/sizeof(vts[0]), str, -+ size, error_out)); -+} -+ -+static int con_remove(char *str) -+{ -+ return(line_remove(vts, sizeof(vts)/sizeof(vts[0]), str)); -+} -+ -+static int open_console(struct tty_struct *tty) -+{ -+ return(line_open(vts, tty, &opts)); -+} -+ -+static int con_open(struct tty_struct *tty, struct file *filp) -+{ -+ return(open_console(tty)); -+} -+ -+static void con_close(struct tty_struct *tty, struct file *filp) -+{ -+ line_close(vts, tty); -+} -+ -+static int con_write(struct tty_struct *tty, int from_user, -+ const unsigned char *buf, int count) -+{ -+ return(line_write(vts, tty, from_user, buf, count)); -+} -+ -+static void set_termios(struct tty_struct *tty, struct termios * old) -+{ -+} -+ -+static int chars_in_buffer(struct tty_struct *tty) -+{ -+ return(0); -+} -+ -+static int con_init_done = 0; -+ -+int stdio_init(void) -+{ -+ char *new_title; -+ -+ printk(KERN_INFO "Initializing stdio console driver\n"); -+ -+ line_register_devfs(&console_lines, &driver, &console_driver, vts, -+ sizeof(vts)/sizeof(vts[0])); -+ -+ lines_init(vts, sizeof(vts)/sizeof(vts[0])); -+ -+ new_title = add_xterm_umid(opts.xterm_title); -+ if(new_title != NULL) opts.xterm_title = new_title; -+ -+ open_console(NULL); -+ con_init_done = 1; -+ return(0); -+} -+ -+__initcall(stdio_init); -+ -+static void console_write(struct console *console, const char *string, -+ unsigned len) -+{ -+ struct line *line = &vts[console->index]; -+ -+ if(con_init_done) -+ down(&line->sem); -+ console_write_chan(&line->chan_list, string, len); -+ if(con_init_done) -+ up(&line->sem); -+} -+ -+static struct tty_driver console_driver = { -+ .refcount = &console_refcount, -+ .open = con_open, -+ .close = con_close, -+ .write = con_write, -+ .chars_in_buffer = chars_in_buffer, -+ .set_termios = set_termios -+}; -+ -+static kdev_t console_device(struct console *c) -+{ -+ return mk_kdev(TTY_MAJOR, c->index); -+} -+ -+static int console_setup(struct console *co, char *options) -+{ -+ return(0); -+} -+ -+static struct console stdiocons = { -+ name: "tty", -+ write: console_write, -+ device: console_device, -+ setup: console_setup, -+ flags: CON_PRINTBUFFER, -+ index: -1, -+}; -+ -+void stdio_console_init(void) -+{ -+ INIT_LIST_HEAD(&vts[0].chan_list); -+ list_add(&init_console_chan.list, &vts[0].chan_list); -+ register_console(&stdiocons); -+} -+ -+static int console_chan_setup(char *str) -+{ -+ return(line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1)); -+} -+ -+__setup("con", console_chan_setup); -+__channel_help(console_chan_setup, "con"); -+ -+static void console_exit(void) -+{ -+ if(!con_init_done) return; -+ close_lines(vts, sizeof(vts)/sizeof(vts[0])); -+} -+ -+__uml_exitcall(console_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/stdio_console.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/stdio_console.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/stdio_console.h 2005-05-03 22:28:14.250443816 +0300 -@@ -0,0 +1,21 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __STDIO_CONSOLE_H -+#define __STDIO_CONSOLE_H -+ -+extern void save_console_flags(void); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/tty.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/tty.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/tty.c 2005-05-03 22:28:14.251443664 +0300 -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <termios.h> -+#include <errno.h> -+#include <unistd.h> -+#include "chan_user.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+struct tty_chan { -+ char *dev; -+ int raw; -+ struct termios tt; -+}; -+ -+void *tty_chan_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct tty_chan *data; -+ -+ if(*str != ':'){ -+ printk("tty_init : channel type 'tty' must specify " -+ "a device\n"); -+ return(NULL); -+ } -+ str++; -+ -+ data = um_kmalloc(sizeof(*data)); -+ if(data == NULL) -+ return(NULL); -+ *data = ((struct tty_chan) { .dev = str, -+ .raw = opts->raw }); -+ -+ return(data); -+} -+ -+int tty_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct tty_chan *data = d; -+ int fd, err; -+ -+ fd = os_open_file(data->dev, of_set_rw(OPENFLAGS(), input, output), 0); -+ if(fd < 0) return(fd); -+ if(data->raw){ -+ CATCH_EINTR(err = tcgetattr(fd, &data->tt)); -+ if(err) -+ return(err); -+ -+ err = raw(fd); -+ if(err) -+ return(err); -+ } -+ -+ *dev_out = data->dev; -+ return(fd); -+} -+ -+int tty_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct tty_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops tty_ops = { -+ .type = "tty", -+ .init = tty_chan_init, -+ .open = tty_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = tty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/ubd_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/ubd_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/ubd_kern.c 2005-05-03 22:28:14.257442752 +0300 -@@ -0,0 +1,1380 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+/* 2001-09-28...2002-04-17 -+ * Partition stuff by James_McMechan@hotmail.com -+ * old style ubd by setting UBD_SHIFT to 0 -+ */ -+ -+#define MAJOR_NR UBD_MAJOR -+#define UBD_SHIFT 4 -+ -+#include "linux/config.h" -+#include "linux/blk.h" -+#include "linux/blkdev.h" -+#include "linux/hdreg.h" -+#include "linux/init.h" -+#include "linux/devfs_fs_kernel.h" -+#include "linux/cdrom.h" -+#include "linux/proc_fs.h" -+#include "linux/ctype.h" -+#include "linux/capability.h" -+#include "linux/mm.h" -+#include "linux/vmalloc.h" -+#include "linux/blkpg.h" -+#include "linux/genhd.h" -+#include "linux/spinlock.h" -+#include "asm/segment.h" -+#include "asm/uaccess.h" -+#include "asm/irq.h" -+#include "asm/types.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+#include "ubd_user.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "mem.h" -+#include "mem_kern.h" -+ -+static int ubd_open(struct inode * inode, struct file * filp); -+static int ubd_release(struct inode * inode, struct file * file); -+static int ubd_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg); -+static int ubd_revalidate(kdev_t rdev); -+static int ubd_revalidate1(kdev_t rdev); -+ -+#define MAX_DEV (8) -+#define MAX_MINOR (MAX_DEV << UBD_SHIFT) -+ -+/* Changed in early boot */ -+static int ubd_do_mmap = 0; -+#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE -+ -+/* Not modified by this driver */ -+static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; -+static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; -+ -+/* Protected by ubd_lock */ -+static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; -+ -+static struct block_device_operations ubd_blops = { -+ .open = ubd_open, -+ .release = ubd_release, -+ .ioctl = ubd_ioctl, -+ .revalidate = ubd_revalidate, -+}; -+ -+/* Protected by ubd_lock, except in prepare_request and ubd_ioctl because -+ * the block layer should ensure that the device is idle before closing it. -+ */ -+static struct hd_struct ubd_part[MAX_MINOR] = -+ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; -+ -+/* Protected by io_request_lock */ -+static request_queue_t *ubd_queue; -+ -+/* Protected by ubd_lock */ -+static int fake_major = MAJOR_NR; -+ -+static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; -+ -+#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ -+{ \ -+ .major = maj, \ -+ .major_name = name, \ -+ .minor_shift = shift, \ -+ .max_p = 1 << shift, \ -+ .part = parts, \ -+ .sizes = bsizes, \ -+ .nr_real = max, \ -+ .real_devices = NULL, \ -+ .next = NULL, \ -+ .fops = blops, \ -+ .de_arr = NULL, \ -+ .flags = 0 \ -+} -+ -+static struct gendisk ubd_gendisk = INIT_GENDISK(MAJOR_NR, "ubd", ubd_part, -+ UBD_SHIFT, sizes, MAX_DEV, -+ &ubd_blops); -+static struct gendisk fake_gendisk = INIT_GENDISK(0, "ubd", ubd_part, -+ UBD_SHIFT, sizes, MAX_DEV, -+ &ubd_blops); -+ -+#ifdef CONFIG_BLK_DEV_UBD_SYNC -+#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ -+ .cl = 1 }) -+#else -+#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ -+ .cl = 1 }) -+#endif -+ -+/* Not protected - changed only in ubd_setup_common and then only to -+ * to enable O_SYNC. -+ */ -+static struct openflags global_openflags = OPEN_FLAGS; -+ -+struct cow { -+ char *file; -+ int fd; -+ unsigned long *bitmap; -+ unsigned long bitmap_len; -+ int bitmap_offset; -+ int data_offset; -+}; -+ -+struct ubd { -+ char *file; -+ int count; -+ int fd; -+ __u64 size; -+ struct openflags boot_openflags; -+ struct openflags openflags; -+ devfs_handle_t devfs; -+ int no_cow; -+ struct cow cow; -+ -+ int map_writes; -+ int map_reads; -+ int nomap_writes; -+ int nomap_reads; -+ int write_maps; -+}; -+ -+#define DEFAULT_COW { \ -+ .file = NULL, \ -+ .fd = -1, \ -+ .bitmap = NULL, \ -+ .bitmap_offset = 0, \ -+ .data_offset = 0, \ -+} -+ -+#define DEFAULT_UBD { \ -+ .file = NULL, \ -+ .count = 0, \ -+ .fd = -1, \ -+ .size = -1, \ -+ .boot_openflags = OPEN_FLAGS, \ -+ .openflags = OPEN_FLAGS, \ -+ .devfs = NULL, \ -+ .no_cow = 0, \ -+ .cow = DEFAULT_COW, \ -+ .map_writes = 0, \ -+ .map_reads = 0, \ -+ .nomap_writes = 0, \ -+ .nomap_reads = 0, \ -+ .write_maps = 0, \ -+} -+ -+struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; -+ -+static int ubd0_init(void) -+{ -+ struct ubd *dev = &ubd_dev[0]; -+ -+ if(dev->file == NULL) -+ dev->file = "root_fs"; -+ return(0); -+} -+ -+__initcall(ubd0_init); -+ -+/* Only changed by fake_ide_setup which is a setup */ -+static int fake_ide = 0; -+static struct proc_dir_entry *proc_ide_root = NULL; -+static struct proc_dir_entry *proc_ide = NULL; -+ -+static void make_proc_ide(void) -+{ -+ proc_ide_root = proc_mkdir("ide", 0); -+ proc_ide = proc_mkdir("ide0", proc_ide_root); -+} -+ -+static int proc_ide_read_media(char *page, char **start, off_t off, int count, -+ int *eof, void *data) -+{ -+ int len; -+ -+ strcpy(page, "disk\n"); -+ len = strlen("disk\n"); -+ len -= off; -+ if (len < count){ -+ *eof = 1; -+ if (len <= 0) return 0; -+ } -+ else len = count; -+ *start = page + off; -+ return len; -+} -+ -+static void make_ide_entries(char *dev_name) -+{ -+ struct proc_dir_entry *dir, *ent; -+ char name[64]; -+ -+ if(!fake_ide) return; -+ -+ /* Without locking this could race if a UML was booted with no -+ * disks and then two mconsole requests which add disks came in -+ * at the same time. -+ */ -+ spin_lock(&ubd_lock); -+ if(proc_ide_root == NULL) make_proc_ide(); -+ spin_unlock(&ubd_lock); -+ -+ dir = proc_mkdir(dev_name, proc_ide); -+ if(!dir) return; -+ -+ ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); -+ if(!ent) return; -+ ent->nlink = 1; -+ ent->data = NULL; -+ ent->read_proc = proc_ide_read_media; -+ ent->write_proc = NULL; -+ sprintf(name,"ide0/%s", dev_name); -+ proc_symlink(dev_name, proc_ide_root, name); -+} -+ -+static int fake_ide_setup(char *str) -+{ -+ fake_ide = 1; -+ return(1); -+} -+ -+__setup("fake_ide", fake_ide_setup); -+ -+__uml_help(fake_ide_setup, -+"fake_ide\n" -+" Create ide0 entries that map onto ubd devices.\n\n" -+); -+ -+static int parse_unit(char **ptr) -+{ -+ char *str = *ptr, *end; -+ int n = -1; -+ -+ if(isdigit(*str)) { -+ n = simple_strtoul(str, &end, 0); -+ if(end == str) -+ return(-1); -+ *ptr = end; -+ } -+ else if (('a' <= *str) && (*str <= 'h')) { -+ n = *str - 'a'; -+ str++; -+ *ptr = str; -+ } -+ return(n); -+} -+ -+static int ubd_setup_common(char *str, int *index_out) -+{ -+ struct openflags flags = global_openflags; -+ struct ubd *dev; -+ char *backing_file; -+ int n, err; -+ -+ if(index_out) *index_out = -1; -+ n = *str; -+ if(n == '='){ -+ char *end; -+ int major; -+ -+ str++; -+ if(!strcmp(str, "mmap")){ -+ CHOOSE_MODE(printk("mmap not supported by the ubd " -+ "driver in tt mode\n"), -+ ubd_do_mmap = 1); -+ return(0); -+ } -+ -+ if(!strcmp(str, "sync")){ -+ global_openflags.s = 1; -+ return(0); -+ } -+ major = simple_strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk(KERN_ERR -+ "ubd_setup : didn't parse major number\n"); -+ return(1); -+ } -+ -+ err = 1; -+ spin_lock(&ubd_lock); -+ if(fake_major != MAJOR_NR){ -+ printk(KERN_ERR "Can't assign a fake major twice\n"); -+ goto out1; -+ } -+ -+ fake_gendisk.major = major; -+ fake_major = major; -+ -+ printk(KERN_INFO "Setting extra ubd major number to %d\n", -+ major); -+ err = 0; -+ out1: -+ spin_unlock(&ubd_lock); -+ return(err); -+ } -+ -+ n = parse_unit(&str); -+ if(n < 0){ -+ printk(KERN_ERR "ubd_setup : couldn't parse unit number " -+ "'%s'\n", str); -+ return(1); -+ } -+ -+ if(n >= MAX_DEV){ -+ printk(KERN_ERR "ubd_setup : index %d out of range " -+ "(%d devices)\n", n, MAX_DEV); -+ return(1); -+ } -+ -+ err = 1; -+ spin_lock(&ubd_lock); -+ -+ dev = &ubd_dev[n]; -+ if(dev->file != NULL){ -+ printk(KERN_ERR "ubd_setup : device already configured\n"); -+ goto out2; -+ } -+ -+ if(index_out) *index_out = n; -+ -+ if(*str == 'r'){ -+ flags.w = 0; -+ str++; -+ } -+ if(*str == 's'){ -+ flags.s = 1; -+ str++; -+ } -+ if(*str == 'd'){ -+ dev->no_cow = 1; -+ str++; -+ } -+ -+ if(*str++ != '='){ -+ printk(KERN_ERR "ubd_setup : Expected '='\n"); -+ goto out2; -+ } -+ -+ err = 0; -+ backing_file = strchr(str, ','); -+ if(backing_file){ -+ if(dev->no_cow) -+ printk(KERN_ERR "Can't specify both 'd' and a " -+ "cow file\n"); -+ else { -+ *backing_file = '\0'; -+ backing_file++; -+ } -+ } -+ dev->file = str; -+ dev->cow.file = backing_file; -+ dev->boot_openflags = flags; -+ out2: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_setup(char *str) -+{ -+ ubd_setup_common(str, NULL); -+ return(1); -+} -+ -+__setup("ubd", ubd_setup); -+__uml_help(ubd_setup, -+"ubd<n>=<filename>\n" -+" This is used to associate a device with a file in the underlying\n" -+" filesystem. Usually, there is a filesystem in the file, but \n" -+" that's not required. Swap devices containing swap files can be\n" -+" specified like this. Also, a file which doesn't contain a\n" -+" filesystem can have its contents read in the virtual \n" -+" machine by running dd on the device. n must be in the range\n" -+" 0 to 7. Appending an 'r' to the number will cause that device\n" -+" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" -+" an 's' (has to be _after_ 'r', if there is one) will cause data\n" -+" to be written to disk on the host immediately.\n\n" -+); -+ -+static int fakehd(char *str) -+{ -+ printk(KERN_INFO -+ "fakehd : Changing ubd_gendisk.major_name to \"hd\".\n"); -+ ubd_gendisk.major_name = "hd"; -+ return(1); -+} -+ -+__setup("fakehd", fakehd); -+__uml_help(fakehd, -+"fakehd\n" -+" Change the ubd device name to \"hd\".\n\n" -+); -+ -+static void do_ubd_request(request_queue_t * q); -+ -+/* Only changed by ubd_init, which is an initcall. */ -+int thread_fd = -1; -+ -+/* Changed by ubd_handler, which is serialized because interrupts only -+ * happen on CPU 0. -+ */ -+int intr_count = 0; -+ -+static void ubd_finish(int error) -+{ -+ int nsect; -+ -+ if(error){ -+ end_request(0); -+ return; -+ } -+ nsect = CURRENT->current_nr_sectors; -+ CURRENT->sector += nsect; -+ CURRENT->buffer += nsect << 9; -+ CURRENT->errors = 0; -+ CURRENT->nr_sectors -= nsect; -+ CURRENT->current_nr_sectors = 0; -+ end_request(1); -+} -+ -+static void ubd_handler(void) -+{ -+ struct io_thread_req req; -+ int n, err; -+ -+ DEVICE_INTR = NULL; -+ intr_count++; -+ n = read_ubd_fs(thread_fd, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " -+ "err = %d\n", os_getpid(), -n); -+ spin_lock(&io_request_lock); -+ end_request(0); -+ spin_unlock(&io_request_lock); -+ return; -+ } -+ -+ if((req.op != UBD_MMAP) && -+ ((req.offset != ((__u64) (CURRENT->sector)) << 9) || -+ (req.length != (CURRENT->current_nr_sectors) << 9))) -+ panic("I/O op mismatch"); -+ -+ if(req.map_fd != -1){ -+ err = physmem_subst_mapping(req.buffer, req.map_fd, -+ req.map_offset, 1); -+ if(err) -+ printk("ubd_handler - physmem_subst_mapping failed, " -+ "err = %d\n", -err); -+ } -+ -+ spin_lock(&io_request_lock); -+ ubd_finish(req.error); -+ reactivate_fd(thread_fd, UBD_IRQ); -+ do_ubd_request(ubd_queue); -+ spin_unlock(&io_request_lock); -+} -+ -+static void ubd_intr(int irq, void *dev, struct pt_regs *unused) -+{ -+ ubd_handler(); -+} -+ -+/* Only changed by ubd_init, which is an initcall. */ -+static int io_pid = -1; -+ -+void kill_io_thread(void) -+{ -+ if(io_pid != -1) -+ os_kill_process(io_pid, 1); -+} -+ -+__uml_exitcall(kill_io_thread); -+ -+/* Initialized in an initcall, and unchanged thereafter */ -+devfs_handle_t ubd_dir_handle; -+ -+static int ubd_add(int n) -+{ -+ struct ubd *dev = &ubd_dev[n]; -+ char name[sizeof("nnnnnn\0")], dev_name[sizeof("ubd0x")]; -+ int err = -EISDIR; -+ -+ if(dev->file == NULL) -+ goto out; -+ -+ err = ubd_revalidate1(MKDEV(MAJOR_NR, n << UBD_SHIFT)); -+ if(err) -+ goto out; -+ -+ if(dev->cow.file == NULL) -+ blk_sizes[n] = UBD_MMAP_BLOCK_SIZE; -+ -+ sprintf(name, "%d", n); -+ dev->devfs = devfs_register(ubd_dir_handle, name, DEVFS_FL_REMOVABLE, -+ MAJOR_NR, n << UBD_SHIFT, S_IFBLK | -+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, -+ &ubd_blops, NULL); -+ -+#if 0 /* 2.5 ... */ -+ sprintf(disk->disk_name, "ubd%c", 'a' + unit); -+#endif -+ -+ sprintf(dev_name, "%s%c", ubd_gendisk.major_name, -+ n + 'a'); -+ -+ make_ide_entries(dev_name); -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+static int ubd_config(char *str) -+{ -+ int n, err; -+ -+ str = uml_strdup(str); -+ if(str == NULL){ -+ printk(KERN_ERR "ubd_config failed to strdup string\n"); -+ return(1); -+ } -+ err = ubd_setup_common(str, &n); -+ if(err){ -+ kfree(str); -+ return(-1); -+ } -+ if(n == -1) return(0); -+ -+ spin_lock(&ubd_lock); -+ err = ubd_add(n); -+ if(err) -+ ubd_dev[n].file = NULL; -+ spin_unlock(&ubd_lock); -+ -+ return(err); -+} -+ -+static int ubd_get_config(char *name, char *str, int size, char **error_out) -+{ -+ struct ubd *dev; -+ char *end; -+ int n, len = 0; -+ -+ n = simple_strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ *error_out = "ubd_get_config : didn't parse device number"; -+ return(-1); -+ } -+ -+ if((n >= MAX_DEV) || (n < 0)){ -+ *error_out = "ubd_get_config : device number out of range"; -+ return(-1); -+ } -+ -+ dev = &ubd_dev[n]; -+ spin_lock(&ubd_lock); -+ -+ if(dev->file == NULL){ -+ CONFIG_CHUNK(str, size, len, "", 1); -+ goto out; -+ } -+ -+ CONFIG_CHUNK(str, size, len, dev->file, 0); -+ -+ if(dev->cow.file != NULL){ -+ CONFIG_CHUNK(str, size, len, ",", 0); -+ CONFIG_CHUNK(str, size, len, dev->cow.file, 1); -+ } -+ else CONFIG_CHUNK(str, size, len, "", 1); -+ -+ out: -+ spin_unlock(&ubd_lock); -+ return(len); -+} -+ -+static int ubd_remove(char *str) -+{ -+ struct ubd *dev; -+ int n, err = -ENODEV; -+ -+ if(isdigit(*str)){ -+ char *end; -+ n = simple_strtoul(str, &end, 0); -+ if ((*end != '\0') || (end == str)) -+ return(err); -+ } -+ else if (('a' <= *str) && (*str <= 'h')) -+ n = *str - 'a'; -+ else -+ return(err); /* it should be a number 0-7/a-h */ -+ -+ if((n < 0) || (n >= MAX_DEV)) -+ return(err); -+ -+ dev = &ubd_dev[n]; -+ -+ spin_lock(&ubd_lock); -+ err = 0; -+ if(dev->file == NULL) -+ goto out; -+ err = -1; -+ if(dev->count > 0) -+ goto out; -+ if(dev->devfs != NULL) -+ devfs_unregister(dev->devfs); -+ -+ *dev = ((struct ubd) DEFAULT_UBD); -+ err = 0; -+ out: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static struct mc_device ubd_mc = { -+ .name = "ubd", -+ .config = ubd_config, -+ .get_config = ubd_get_config, -+ .remove = ubd_remove, -+}; -+ -+static int ubd_mc_init(void) -+{ -+ mconsole_register_dev(&ubd_mc); -+ return(0); -+} -+ -+__initcall(ubd_mc_init); -+ -+static request_queue_t *ubd_get_queue(kdev_t device) -+{ -+ return(ubd_queue); -+} -+ -+int ubd_init(void) -+{ -+ unsigned long stack; -+ int i, err; -+ -+ ubd_dir_handle = devfs_mk_dir (NULL, "ubd", NULL); -+ if (devfs_register_blkdev(MAJOR_NR, "ubd", &ubd_blops)) { -+ printk(KERN_ERR "ubd: unable to get major %d\n", MAJOR_NR); -+ return -1; -+ } -+ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[MAJOR_NR] = blk_sizes; -+ blk_size[MAJOR_NR] = sizes; -+ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); -+ -+ ubd_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); -+ blk_init_queue(ubd_queue, DEVICE_REQUEST); -+ INIT_ELV(ubd_queue, &ubd_queue->elevator); -+ -+ add_gendisk(&ubd_gendisk); -+ if (fake_major != MAJOR_NR){ -+ /* major number 0 is used to auto select */ -+ err = devfs_register_blkdev(fake_major, "fake", &ubd_blops); -+ if(fake_major == 0){ -+ /* auto device number case */ -+ fake_major = err; -+ if(err == 0) -+ return(-ENODEV); -+ } -+ else if (err){ -+ /* not auto so normal error */ -+ printk(KERN_ERR "ubd: error %d getting major %d\n", -+ -err, fake_major); -+ return(-ENODEV); -+ } -+ -+ blk_dev[fake_major].queue = ubd_get_queue; -+ read_ahead[fake_major] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[fake_major] = blk_sizes; -+ blk_size[fake_major] = sizes; -+ INIT_HARDSECT(hardsect_size, fake_major, hardsect_sizes); -+ add_gendisk(&fake_gendisk); -+ } -+ -+ for(i=0;i<MAX_DEV;i++) -+ ubd_add(i); -+ -+ if(global_openflags.s){ -+ printk(KERN_INFO "ubd : Synchronous mode\n"); -+ return(0); -+ } -+ stack = alloc_stack(0, 0); -+ io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), -+ &thread_fd); -+ if(io_pid < 0){ -+ io_pid = -1; -+ printk(KERN_ERR -+ "ubd : Failed to start I/O thread (errno = %d) - " -+ "falling back to synchronous I/O\n", -io_pid); -+ return(0); -+ } -+ err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, -+ SA_INTERRUPT, "ubd", ubd_dev); -+ if(err != 0) -+ printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); -+ return(err); -+} -+ -+__initcall(ubd_init); -+ -+static void ubd_close(struct ubd *dev) -+{ -+ if(ubd_do_mmap) -+ physmem_forget_descriptor(dev->fd); -+ os_close_file(dev->fd); -+ if(dev->cow.file != NULL) -+ return; -+ -+ if(ubd_do_mmap) -+ physmem_forget_descriptor(dev->cow.fd); -+ os_close_file(dev->cow.fd); -+ vfree(dev->cow.bitmap); -+ dev->cow.bitmap = NULL; -+} -+ -+static int ubd_open_dev(struct ubd *dev) -+{ -+ struct openflags flags; -+ char **back_ptr; -+ int err, create_cow, *create_ptr; -+ -+ dev->openflags = dev->boot_openflags; -+ create_cow = 0; -+ create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; -+ back_ptr = dev->no_cow ? NULL : &dev->cow.file; -+ dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, -+ &dev->cow.bitmap_offset, &dev->cow.bitmap_len, -+ &dev->cow.data_offset, create_ptr); -+ -+ if((dev->fd == -ENOENT) && create_cow){ -+ dev->fd = create_cow_file(dev->file, dev->cow.file, -+ dev->openflags, 1 << 9, PAGE_SIZE, -+ &dev->cow.bitmap_offset, -+ &dev->cow.bitmap_len, -+ &dev->cow.data_offset); -+ if(dev->fd >= 0){ -+ printk(KERN_INFO "Creating \"%s\" as COW file for " -+ "\"%s\"\n", dev->file, dev->cow.file); -+ } -+ } -+ -+ if(dev->fd < 0) return(dev->fd); -+ -+ if(dev->cow.file != NULL){ -+ err = -ENOMEM; -+ dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); -+ if(dev->cow.bitmap == NULL){ -+ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); -+ goto error; -+ } -+ flush_tlb_kernel_vm(); -+ -+ err = read_cow_bitmap(dev->fd, dev->cow.bitmap, -+ dev->cow.bitmap_offset, -+ dev->cow.bitmap_len); -+ if(err < 0) -+ goto error; -+ -+ flags = dev->openflags; -+ flags.w = 0; -+ err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, -+ NULL, NULL); -+ if(err < 0) goto error; -+ dev->cow.fd = err; -+ } -+ return(0); -+ error: -+ os_close_file(dev->fd); -+ return(err); -+} -+ -+static int ubd_file_size(struct ubd *dev, __u64 *size_out) -+{ -+ char *file; -+ -+ file = dev->cow.file ? dev->cow.file : dev->file; -+ return(os_file_size(file, size_out)); -+} -+ -+static int ubd_open(struct inode *inode, struct file *filp) -+{ -+ struct ubd *dev; -+ int n, offset, err = 0; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ dev = &ubd_dev[n]; -+ if(n >= MAX_DEV) -+ return -ENODEV; -+ -+ spin_lock(&ubd_lock); -+ offset = n << UBD_SHIFT; -+ -+ if(dev->count == 0){ -+ err = ubd_open_dev(dev); -+ if(err){ -+ printk(KERN_ERR "ubd%d: Can't open \"%s\": " -+ "errno = %d\n", n, dev->file, -err); -+ goto out; -+ } -+ err = ubd_file_size(dev, &dev->size); -+ if(err < 0) -+ goto out; -+ sizes[offset] = dev->size / BLOCK_SIZE; -+ ubd_part[offset].nr_sects = dev->size / hardsect_sizes[offset]; -+ } -+ dev->count++; -+ if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){ -+ if(--dev->count == 0) ubd_close(dev); -+ err = -EROFS; -+ } -+ out: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_release(struct inode * inode, struct file * file) -+{ -+ int n, offset; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ offset = n << UBD_SHIFT; -+ if(n >= MAX_DEV) -+ return -ENODEV; -+ -+ spin_lock(&ubd_lock); -+ if(--ubd_dev[n].count == 0) -+ ubd_close(&ubd_dev[n]); -+ spin_unlock(&ubd_lock); -+ -+ return(0); -+} -+ -+static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, -+ __u64 *cow_offset, unsigned long *bitmap, -+ __u64 bitmap_offset, unsigned long *bitmap_words, -+ __u64 bitmap_len) -+{ -+ __u64 sector = io_offset >> 9; -+ int i, update_bitmap = 0; -+ -+ for(i = 0; i < length >> 9; i++){ -+ if(cow_mask != NULL) -+ ubd_set_bit(i, (unsigned char *) cow_mask); -+ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) -+ continue; -+ -+ update_bitmap = 1; -+ ubd_set_bit(sector + i, (unsigned char *) bitmap); -+ } -+ -+ if(!update_bitmap) -+ return; -+ -+ *cow_offset = sector / (sizeof(unsigned long) * 8); -+ -+ /* This takes care of the case where we're exactly at the end of the -+ * device, and *cow_offset + 1 is off the end. So, just back it up -+ * by one word. Thanks to Lynn Kerby for the fix and James McMechan -+ * for the original diagnosis. -+ */ -+ if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / -+ sizeof(unsigned long) - 1)) -+ (*cow_offset)--; -+ -+ bitmap_words[0] = bitmap[*cow_offset]; -+ bitmap_words[1] = bitmap[*cow_offset + 1]; -+ -+ *cow_offset *= sizeof(unsigned long); -+ *cow_offset += bitmap_offset; -+} -+ -+static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, -+ __u64 bitmap_offset, __u64 bitmap_len) -+{ -+ __u64 sector = req->offset >> 9; -+ int i; -+ -+ if(req->length > (sizeof(req->sector_mask) * 8) << 9) -+ panic("Operation too long"); -+ -+ if(req->op == UBD_READ) { -+ for(i = 0; i < req->length >> 9; i++){ -+ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)){ -+ ubd_set_bit(i, (unsigned char *) -+ &req->sector_mask); -+ } -+ } -+ } -+ else cowify_bitmap(req->offset, req->length, &req->sector_mask, -+ &req->cow_offset, bitmap, bitmap_offset, -+ req->bitmap_words, bitmap_len); -+} -+ -+static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) -+{ -+ __u64 sector; -+ unsigned char *bitmap; -+ int bit, i; -+ -+ /* mmap must have been requested on the command line */ -+ if(!ubd_do_mmap) -+ return(-1); -+ -+ /* The buffer must be page aligned */ -+ if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) -+ return(-1); -+ -+ /* The request must be a page long */ -+ if((req->current_nr_sectors << 9) != PAGE_SIZE) -+ return(-1); -+ -+ if(dev->cow.file == NULL) -+ return(dev->fd); -+ -+ sector = offset >> 9; -+ bitmap = (unsigned char *) dev->cow.bitmap; -+ bit = ubd_test_bit(sector, bitmap); -+ -+ for(i = 1; i < req->current_nr_sectors; i++){ -+ if(ubd_test_bit(sector + i, bitmap) != bit) -+ return(-1); -+ } -+ -+ if(bit || (req->cmd == WRITE)) -+ offset += dev->cow.data_offset; -+ -+ /* The data on disk must be page aligned */ -+ if((offset % UBD_MMAP_BLOCK_SIZE) != 0) -+ return(-1); -+ -+ return(bit ? dev->fd : dev->cow.fd); -+} -+ -+static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, -+ struct request *req, -+ struct io_thread_req *io_req) -+{ -+ int err; -+ -+ if(req->cmd == WRITE){ -+ /* Writes are almost no-ops since the new data is already in the -+ * host page cache -+ */ -+ dev->map_writes++; -+ if(dev->cow.file != NULL) -+ cowify_bitmap(io_req->offset, io_req->length, -+ &io_req->sector_mask, &io_req->cow_offset, -+ dev->cow.bitmap, dev->cow.bitmap_offset, -+ io_req->bitmap_words, -+ dev->cow.bitmap_len); -+ } -+ else { -+ int w; -+ -+ if((dev->cow.file != NULL) && (fd == dev->cow.fd)) -+ w = 0; -+ else w = dev->openflags.w; -+ -+ if((dev->cow.file != NULL) && (fd == dev->fd)) -+ offset += dev->cow.data_offset; -+ -+ err = physmem_subst_mapping(req->buffer, fd, offset, w); -+ if(err){ -+ printk("physmem_subst_mapping failed, err = %d\n", -+ -err); -+ return(1); -+ } -+ dev->map_reads++; -+ } -+ io_req->op = UBD_MMAP; -+ io_req->buffer = req->buffer; -+ return(0); -+} -+ -+static int prepare_request(struct request *req, struct io_thread_req *io_req) -+{ -+ struct ubd *dev; -+ __u64 offset; -+ int minor, n, len, fd; -+ -+ if(req->rq_status == RQ_INACTIVE) return(1); -+ -+ minor = MINOR(req->rq_dev); -+ n = minor >> UBD_SHIFT; -+ dev = &ubd_dev[n]; -+ -+ if(IS_WRITE(req) && !dev->openflags.w){ -+ printk("Write attempted on readonly ubd device %d\n", n); -+ end_request(0); -+ return(1); -+ } -+ -+ req->sector += ubd_part[minor].start_sect; -+ offset = ((__u64) req->sector) << 9; -+ len = req->current_nr_sectors << 9; -+ -+ io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; -+ io_req->fds[1] = dev->fd; -+ io_req->map_fd = -1; -+ io_req->cow_offset = -1; -+ io_req->offset = offset; -+ io_req->length = len; -+ io_req->error = 0; -+ io_req->sector_mask = 0; -+ -+ fd = mmap_fd(req, dev, io_req->offset); -+ if(fd > 0){ -+ /* If mmapping is otherwise OK, but the first access to the -+ * page is a write, then it's not mapped in yet. So we have -+ * to write the data to disk first, then we can map the disk -+ * page in and continue normally from there. -+ */ -+ if((req->cmd == WRITE) && !is_remapped(req->buffer, dev->fd, io_req->offset + dev->cow.data_offset)){ -+ io_req->map_fd = dev->fd; -+ io_req->map_offset = io_req->offset + -+ dev->cow.data_offset; -+ dev->write_maps++; -+ } -+ else return(prepare_mmap_request(dev, fd, io_req->offset, req, -+ io_req)); -+ } -+ -+ if(req->cmd == READ) -+ dev->nomap_reads++; -+ else dev->nomap_writes++; -+ -+ io_req->op = (req->cmd == READ) ? UBD_READ : UBD_WRITE; -+ io_req->offsets[0] = 0; -+ io_req->offsets[1] = dev->cow.data_offset; -+ io_req->buffer = req->buffer; -+ io_req->sectorsize = 1 << 9; -+ -+ if(dev->cow.file != NULL) -+ cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, -+ dev->cow.bitmap_len); -+ return(0); -+} -+ -+static void do_ubd_request(request_queue_t *q) -+{ -+ struct io_thread_req io_req; -+ struct request *req; -+ int err, n; -+ -+ if(thread_fd == -1){ -+ while(!list_empty(&q->queue_head)){ -+ req = blkdev_entry_next_request(&q->queue_head); -+ err = prepare_request(req, &io_req); -+ if(!err){ -+ do_io(&io_req); -+ ubd_finish(io_req.error); -+ } -+ } -+ } -+ else { -+ if(DEVICE_INTR || list_empty(&q->queue_head)) return; -+ req = blkdev_entry_next_request(&q->queue_head); -+ err = prepare_request(req, &io_req); -+ if(!err){ -+ SET_INTR(ubd_handler); -+ n = write_ubd_fs(thread_fd, (char *) &io_req, -+ sizeof(io_req)); -+ if(n != sizeof(io_req)) -+ printk("write to io thread failed, " -+ "errno = %d\n", -n); -+ } -+ } -+} -+ -+static int ubd_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hd_geometry *loc = (struct hd_geometry *) arg; -+ struct ubd *dev; -+ int n, minor, err; -+ struct hd_driveid ubd_id = { -+ .cyls = 0, -+ .heads = 128, -+ .sectors = 32, -+ }; -+ -+ if(!inode) return(-EINVAL); -+ minor = MINOR(inode->i_rdev); -+ n = minor >> UBD_SHIFT; -+ if(n >= MAX_DEV) -+ return(-EINVAL); -+ dev = &ubd_dev[n]; -+ switch (cmd) { -+ struct hd_geometry g; -+ struct cdrom_volctrl volume; -+ case HDIO_GETGEO: -+ if(!loc) return(-EINVAL); -+ g.heads = 128; -+ g.sectors = 32; -+ g.cylinders = dev->size / (128 * 32 * hardsect_sizes[minor]); -+ g.start = ubd_part[minor].start_sect; -+ return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); -+ case BLKGETSIZE: /* Return device size */ -+ if(!arg) return(-EINVAL); -+ err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); -+ if(err) -+ return(err); -+ put_user(ubd_part[minor].nr_sects, (long *) arg); -+ return(0); -+ case BLKRRPART: /* Re-read partition tables */ -+ return(ubd_revalidate(inode->i_rdev)); -+ -+ case HDIO_GET_IDENTITY: -+ ubd_id.cyls = dev->size / (128 * 32 * hardsect_sizes[minor]); -+ if(copy_to_user((char *) arg, (char *) &ubd_id, -+ sizeof(ubd_id))) -+ return(-EFAULT); -+ return(0); -+ -+ case CDROMVOLREAD: -+ if(copy_from_user(&volume, (char *) arg, sizeof(volume))) -+ return(-EFAULT); -+ volume.channel0 = 255; -+ volume.channel1 = 255; -+ volume.channel2 = 255; -+ volume.channel3 = 255; -+ if(copy_to_user((char *) arg, &volume, sizeof(volume))) -+ return(-EFAULT); -+ return(0); -+ -+ default: -+ return blk_ioctl(inode->i_rdev, cmd, arg); -+ } -+} -+ -+static int ubd_revalidate1(kdev_t rdev) -+{ -+ int i, n, offset, err = 0, pcount = 1 << UBD_SHIFT; -+ struct ubd *dev; -+ struct hd_struct *part; -+ -+ n = DEVICE_NR(rdev); -+ offset = n << UBD_SHIFT; -+ dev = &ubd_dev[n]; -+ -+ part = &ubd_part[offset]; -+ -+ /* clear all old partition counts */ -+ for(i = 1; i < pcount; i++) { -+ part[i].start_sect = 0; -+ part[i].nr_sects = 0; -+ } -+ -+ /* If it already has been opened we can check the partitions -+ * directly -+ */ -+ if(dev->count){ -+ part->start_sect = 0; -+ register_disk(&ubd_gendisk, MKDEV(MAJOR_NR, offset), pcount, -+ &ubd_blops, part->nr_sects); -+ } -+ else if(dev->file){ -+ err = ubd_open_dev(dev); -+ if(err){ -+ printk(KERN_ERR "unable to open %s for validation\n", -+ dev->file); -+ goto out; -+ } -+ -+ /* have to recompute sizes since we opened it */ -+ err = ubd_file_size(dev, &dev->size); -+ if(err < 0) { -+ ubd_close(dev); -+ goto out; -+ } -+ part->start_sect = 0; -+ part->nr_sects = dev->size / hardsect_sizes[offset]; -+ register_disk(&ubd_gendisk, MKDEV(MAJOR_NR, offset), pcount, -+ &ubd_blops, part->nr_sects); -+ -+ /* we are done so close it */ -+ ubd_close(dev); -+ } -+ else err = -ENODEV; -+ out: -+ return(err); -+} -+ -+static int ubd_revalidate(kdev_t rdev) -+{ -+ int err; -+ -+ spin_lock(&ubd_lock); -+ err = ubd_revalidate1(rdev); -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_check_remapped(int fd, unsigned long address, int is_write, -+ __u64 offset, int is_user) -+{ -+ __u64 bitmap_offset; -+ unsigned long new_bitmap[2]; -+ int i, err, n; -+ -+ /* This can only fix kernelspace faults */ -+ if(is_user) -+ return(0); -+ -+ /* ubd-mmap is only enabled in skas mode */ -+ if(CHOOSE_MODE(1, 0)) -+ return(0); -+ -+ /* If it's not a write access, we can't do anything about it */ -+ if(!is_write) -+ return(0); -+ -+ /* We have a write */ -+ for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ -+ struct ubd *dev = &ubd_dev[i]; -+ -+ if((dev->fd != fd) && (dev->cow.fd != fd)) -+ continue; -+ -+ /* It's a write to a ubd device */ -+ -+ if(!dev->openflags.w){ -+ /* It's a write access on a read-only device - probably -+ * shouldn't happen. If the kernel is trying to change -+ * something with no intention of writing it back out, -+ * then this message will clue us in that this needs -+ * fixing -+ */ -+ printk("Write access to mapped page from readonly ubd " -+ "device %d\n", i); -+ return(0); -+ } -+ -+ /* It's a write to a writeable ubd device - it must be COWed -+ * because, otherwise, the page would have been mapped in -+ * writeable -+ */ -+ -+ if(!dev->cow.file) -+ panic("Write fault on writeable non-COW ubd device %d", -+ i); -+ -+ /* It should also be an access to the backing file since the -+ * COW pages should be mapped in read-write -+ */ -+ -+ if(fd == dev->fd) -+ panic("Write fault on a backing page of ubd " -+ "device %d\n", i); -+ -+ /* So, we do the write, copying the backing data to the COW -+ * file... -+ */ -+ -+ err = os_seek_file(dev->fd, offset + dev->cow.data_offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of ubd " -+ "device %d, err = %d", -+ offset + dev->cow.data_offset, i, -err); -+ -+ n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); -+ if(n != PAGE_SIZE) -+ panic("Couldn't copy data to COW file of ubd " -+ "device %d, err = %d", i, -n); -+ -+ /* ... updating the COW bitmap... */ -+ -+ cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, -+ dev->cow.bitmap, dev->cow.bitmap_offset, -+ new_bitmap, dev->cow.bitmap_len); -+ -+ err = os_seek_file(dev->fd, bitmap_offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of ubd " -+ "device %d, err = %d", bitmap_offset, i, -err); -+ -+ n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); -+ if(n != sizeof(new_bitmap)) -+ panic("Couldn't update bitmap of ubd device %d, " -+ "err = %d", i, -n); -+ -+ /* Maybe we can map the COW page in, and maybe we can't. If -+ * it is a pre-V3 COW file, we can't, since the alignment will -+ * be wrong. If it is a V3 or later COW file which has been -+ * moved to a system with a larger page size, then maybe we -+ * can't, depending on the exact location of the page. -+ */ -+ -+ offset += dev->cow.data_offset; -+ -+ /* Remove the remapping, putting the original anonymous page -+ * back. If the COW file can be mapped in, that is done. -+ * Otherwise, the COW page is read in. -+ */ -+ -+ if(!physmem_remove_mapping((void *) address)) -+ panic("Address 0x%lx not remapped by ubd device %d", -+ address, i); -+ if((offset % UBD_MMAP_BLOCK_SIZE) == 0) -+ physmem_subst_mapping((void *) address, dev->fd, -+ offset, 1); -+ else { -+ err = os_seek_file(dev->fd, offset); -+ if(err < 0) -+ panic("Couldn't seek to %lld in COW file of " -+ "ubd device %d, err = %d", offset, i, -+ -err); -+ -+ n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); -+ if(n != PAGE_SIZE) -+ panic("Failed to read page from offset %llx of " -+ "COW file of ubd device %d, err = %d", -+ offset, i, -n); -+ } -+ -+ return(1); -+ } -+ -+ /* It's not a write on a ubd device */ -+ return(0); -+} -+ -+static struct remapper ubd_remapper = { -+ .list = LIST_HEAD_INIT(ubd_remapper.list), -+ .proc = ubd_check_remapped, -+}; -+ -+static int ubd_remapper_setup(void) -+{ -+ if(ubd_do_mmap) -+ register_remapper(&ubd_remapper); -+ -+ return(0); -+} -+ -+__initcall(ubd_remapper_setup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/ubd_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/ubd_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/ubd_user.c 2005-05-03 22:28:14.259442448 +0300 -@@ -0,0 +1,379 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stddef.h> -+#include <unistd.h> -+#include <errno.h> -+#include <sched.h> -+#include <signal.h> -+#include <string.h> -+#include <netinet/in.h> -+#include <sys/time.h> -+#include <sys/socket.h> -+#include <sys/mman.h> -+#include <sys/param.h> -+#include "asm/types.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "ubd_user.h" -+#include "os.h" -+#include "cow.h" -+ -+#include <endian.h> -+#include <byteswap.h> -+ -+static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) -+{ -+ struct uml_stat buf1, buf2; -+ int err; -+ -+ if(from_cmdline == NULL) return(1); -+ if(!strcmp(from_cmdline, from_cow)) return(1); -+ -+ err = os_stat_file(from_cmdline, &buf1); -+ if(err < 0){ -+ printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); -+ return(1); -+ } -+ err = os_stat_file(from_cow, &buf2); -+ if(err < 0){ -+ printk("Couldn't stat '%s', err = %d\n", from_cow, -err); -+ return(1); -+ } -+ if((buf1.ust_major == buf2.ust_major) && -+ (buf1.ust_minor == buf2.ust_minor) && -+ (buf1.ust_ino == buf2.ust_ino)) -+ return(1); -+ -+ printk("Backing file mismatch - \"%s\" requested,\n" -+ "\"%s\" specified in COW header of \"%s\"\n", -+ from_cmdline, from_cow, cow); -+ return(0); -+} -+ -+static int backing_file_mismatch(char *file, __u64 size, time_t mtime) -+{ -+ unsigned long modtime; -+ long long actual; -+ int err; -+ -+ err = os_file_modtime(file, &modtime); -+ if(err < 0){ -+ printk("Failed to get modification time of backing file " -+ "\"%s\", err = %d\n", file, -err); -+ return(err); -+ } -+ -+ err = os_file_size(file, &actual); -+ if(err < 0){ -+ printk("Failed to get size of backing file \"%s\", " -+ "err = %d\n", file, -err); -+ return(err); -+ } -+ -+ if(actual != size){ -+ printk("Size mismatch (%ld vs %ld) of COW header vs backing " -+ "file\n", size, actual); -+ return(-EINVAL); -+ } -+ if(modtime != mtime){ -+ printk("mtime mismatch (%ld vs %ld) of COW header vs backing " -+ "file\n", mtime, modtime); -+ return(-EINVAL); -+ } -+ return(0); -+} -+ -+int read_cow_bitmap(int fd, void *buf, int offset, int len) -+{ -+ int err; -+ -+ err = os_seek_file(fd, offset); -+ if(err < 0) -+ return(err); -+ -+ err = os_read_file(fd, buf, len); -+ if(err < 0) -+ return(err); -+ -+ return(0); -+} -+ -+int open_ubd_file(char *file, struct openflags *openflags, -+ char **backing_file_out, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out, -+ int *create_cow_out) -+{ -+ time_t mtime; -+ __u64 size; -+ __u32 version, align; -+ char *backing_file; -+ int fd, err, sectorsize, same, mode = 0644; -+ -+ fd = os_open_file(file, *openflags, mode); -+ if(fd < 0){ -+ if((fd == -ENOENT) && (create_cow_out != NULL)) -+ *create_cow_out = 1; -+ if(!openflags->w || -+ ((errno != EROFS) && (errno != EACCES))) return(-errno); -+ openflags->w = 0; -+ fd = os_open_file(file, *openflags, mode); -+ if(fd < 0) -+ return(fd); -+ } -+ -+ err = os_lock_file(fd, openflags->w); -+ if(err < 0){ -+ printk("Failed to lock '%s', err = %d\n", file, -err); -+ goto out_close; -+ } -+ -+ if(backing_file_out == NULL) return(fd); -+ -+ err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, -+ &size, §orsize, &align, bitmap_offset_out); -+ if(err && (*backing_file_out != NULL)){ -+ printk("Failed to read COW header from COW file \"%s\", " -+ "errno = %d\n", file, -err); -+ goto out_close; -+ } -+ if(err) return(fd); -+ -+ if(backing_file_out == NULL) return(fd); -+ -+ same = same_backing_files(*backing_file_out, backing_file, file); -+ -+ if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ -+ printk("Switching backing file to '%s'\n", *backing_file_out); -+ err = write_cow_header(file, fd, *backing_file_out, -+ sectorsize, align, &size); -+ if(err){ -+ printk("Switch failed, errno = %d\n", -err); -+ return(err); -+ } -+ } -+ else { -+ *backing_file_out = backing_file; -+ err = backing_file_mismatch(*backing_file_out, size, mtime); -+ if(err) goto out_close; -+ } -+ -+ cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, -+ bitmap_len_out, data_offset_out); -+ -+ return(fd); -+ out_close: -+ os_close_file(fd); -+ return(err); -+} -+ -+int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, -+ int sectorsize, int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out) -+{ -+ int err, fd; -+ -+ flags.c = 1; -+ fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); -+ if(fd < 0){ -+ err = fd; -+ printk("Open of COW file '%s' failed, errno = %d\n", cow_file, -+ -err); -+ goto out; -+ } -+ -+ err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, -+ bitmap_offset_out, bitmap_len_out, -+ data_offset_out); -+ if(!err) -+ return(fd); -+ -+ os_close_file(fd); -+ out: -+ return(err); -+} -+ -+/* XXX Just trivial wrappers around os_read_file and os_write_file */ -+int read_ubd_fs(int fd, void *buffer, int len) -+{ -+ return(os_read_file(fd, buffer, len)); -+} -+ -+int write_ubd_fs(int fd, char *buffer, int len) -+{ -+ return(os_write_file(fd, buffer, len)); -+} -+ -+static int update_bitmap(struct io_thread_req *req) -+{ -+ int n; -+ -+ if(req->cow_offset == -1) -+ return(0); -+ -+ n = os_seek_file(req->fds[1], req->cow_offset); -+ if(n < 0){ -+ printk("do_io - bitmap lseek failed : err = %d\n", -n); -+ return(1); -+ } -+ -+ n = os_write_file(req->fds[1], &req->bitmap_words, -+ sizeof(req->bitmap_words)); -+ if(n != sizeof(req->bitmap_words)){ -+ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, -+ req->fds[1]); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+void do_io(struct io_thread_req *req) -+{ -+ char *buf; -+ unsigned long len; -+ int n, nsectors, start, end, bit; -+ int err; -+ __u64 off; -+ -+ if(req->op == UBD_MMAP){ -+ /* Touch the page to force the host to do any necessary IO to -+ * get it into memory -+ */ -+ n = *((volatile int *) req->buffer); -+ req->error = update_bitmap(req); -+ return; -+ } -+ -+ nsectors = req->length / req->sectorsize; -+ start = 0; -+ do { -+ bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); -+ end = start; -+ while((end < nsectors) && -+ (ubd_test_bit(end, (unsigned char *) -+ &req->sector_mask) == bit)) -+ end++; -+ -+ off = req->offset + req->offsets[bit] + -+ start * req->sectorsize; -+ len = (end - start) * req->sectorsize; -+ buf = &req->buffer[start * req->sectorsize]; -+ -+ err = os_seek_file(req->fds[bit], off); -+ if(err < 0){ -+ printk("do_io - lseek failed : err = %d\n", -err); -+ req->error = 1; -+ return; -+ } -+ if(req->op == UBD_READ){ -+ n = 0; -+ do { -+ buf = &buf[n]; -+ len -= n; -+ n = os_read_file(req->fds[bit], buf, len); -+ if (n < 0) { -+ printk("do_io - read failed, err = %d " -+ "fd = %d\n", -n, req->fds[bit]); -+ req->error = 1; -+ return; -+ } -+ } while((n < len) && (n != 0)); -+ if (n < len) memset(&buf[n], 0, len - n); -+ } -+ else { -+ n = os_write_file(req->fds[bit], buf, len); -+ if(n != len){ -+ printk("do_io - write failed err = %d " -+ "fd = %d\n", -n, req->fds[bit]); -+ req->error = 1; -+ return; -+ } -+ } -+ -+ start = end; -+ } while(start < nsectors); -+ -+ req->error = update_bitmap(req); -+} -+ -+/* Changed in start_io_thread, which is serialized by being called only -+ * from ubd_init, which is an initcall. -+ */ -+int kernel_fd = -1; -+ -+/* Only changed by the io thread */ -+int io_count = 0; -+ -+int io_thread(void *arg) -+{ -+ struct io_thread_req req; -+ int n; -+ -+ signal(SIGWINCH, SIG_IGN); -+ while(1){ -+ n = os_read_file(kernel_fd, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ if(n < 0) -+ printk("io_thread - read failed, fd = %d, " -+ "err = %d\n", kernel_fd, -n); -+ else { -+ printk("io_thread - short read, fd = %d, " -+ "length = %d\n", kernel_fd, n); -+ } -+ continue; -+ } -+ io_count++; -+ do_io(&req); -+ n = os_write_file(kernel_fd, &req, sizeof(req)); -+ if(n != sizeof(req)) -+ printk("io_thread - write failed, fd = %d, err = %d\n", -+ kernel_fd, -n); -+ } -+} -+ -+int start_io_thread(unsigned long sp, int *fd_out) -+{ -+ int pid, fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err < 0){ -+ printk("start_io_thread - os_pipe failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ kernel_fd = fds[0]; -+ *fd_out = fds[1]; -+ -+ pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, -+ NULL); -+ if(pid < 0){ -+ printk("start_io_thread - clone failed : errno = %d\n", errno); -+ goto out_close; -+ } -+ -+ return(pid); -+ -+ out_close: -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ kernel_fd = -1; -+ *fd_out = -1; -+ out: -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/xterm.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/xterm.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/xterm.c 2005-05-03 22:28:14.260442296 +0300 -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <termios.h> -+#include <signal.h> -+#include <sched.h> -+#include <sys/socket.h> -+#include "kern_util.h" -+#include "chan_user.h" -+#include "helper.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+#include "xterm.h" -+ -+struct xterm_chan { -+ int pid; -+ int helper_pid; -+ char *title; -+ int device; -+ int raw; -+ struct termios tt; -+ unsigned long stack; -+ int direct_rcv; -+}; -+ -+void *xterm_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct xterm_chan *data; -+ -+ data = malloc(sizeof(*data)); -+ if(data == NULL) return(NULL); -+ *data = ((struct xterm_chan) { .pid = -1, -+ .helper_pid = -1, -+ .device = device, -+ .title = opts->xterm_title, -+ .raw = opts->raw, -+ .stack = opts->tramp_stack, -+ .direct_rcv = !opts->in_kernel } ); -+ return(data); -+} -+ -+/* Only changed by xterm_setup, which is a setup */ -+static char *terminal_emulator = "xterm"; -+static char *title_switch = "-T"; -+static char *exec_switch = "-e"; -+ -+static int __init xterm_setup(char *line, int *add) -+{ -+ *add = 0; -+ terminal_emulator = line; -+ -+ line = strchr(line, ','); -+ if(line == NULL) return(0); -+ *line++ = '\0'; -+ if(*line) title_switch = line; -+ -+ line = strchr(line, ','); -+ if(line == NULL) return(0); -+ *line++ = '\0'; -+ if(*line) exec_switch = line; -+ -+ return(0); -+} -+ -+__uml_setup("xterm=", xterm_setup, -+"xterm=<terminal emulator>,<title switch>,<exec switch>\n" -+" Specifies an alternate terminal emulator to use for the debugger,\n" -+" consoles, and serial lines when they are attached to the xterm channel.\n" -+" The values are the terminal emulator binary, the switch it uses to set\n" -+" its title, and the switch it uses to execute a subprocess,\n" -+" respectively. The title switch must have the form '<switch> title',\n" -+" not '<switch>=title'. Similarly, the exec switch must have the form\n" -+" '<switch> command arg1 arg2 ...'.\n" -+" The default values are 'xterm=xterm,-T,-e'. Values for gnome-terminal\n" -+" are 'xterm=gnome-terminal,-t,-x'.\n\n" -+); -+ -+/* XXX This badly needs some cleaning up in the error paths */ -+int xterm_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct xterm_chan *data = d; -+ unsigned long stack; -+ int pid, fd, new, err; -+ char title[256], file[] = "/tmp/xterm-pipeXXXXXX"; -+ char *argv[] = { terminal_emulator, title_switch, title, exec_switch, -+ "/usr/lib/uml/port-helper", "-uml-socket", -+ file, NULL }; -+ -+ if(os_access(argv[4], OS_ACC_X_OK) < 0) -+ argv[4] = "port-helper"; -+ -+ fd = mkstemp(file); -+ if(fd < 0){ -+ printk("xterm_open : mkstemp failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(unlink(file)){ -+ printk("xterm_open : unlink failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ os_close_file(fd); -+ -+ fd = os_create_unix_socket(file, sizeof(file), 1); -+ if(fd < 0){ -+ printk("xterm_open : create_unix_socket failed, errno = %d\n", -+ -fd); -+ return(fd); -+ } -+ -+ sprintf(title, data->title, data->device); -+ stack = data->stack; -+ pid = run_helper(NULL, NULL, argv, &stack); -+ if(pid < 0){ -+ printk("xterm_open : run_helper failed, errno = %d\n", -pid); -+ return(pid); -+ } -+ -+ if(data->stack == 0) free_stack(stack, 0); -+ -+ if(data->direct_rcv) -+ new = os_rcv_fd(fd, &data->helper_pid); -+ else { -+ err = os_set_fd_block(fd, 0); -+ if(err < 0){ -+ printk("xterm_open : failed to set descriptor " -+ "non-blocking, err = %d\n", -err); -+ return(err); -+ } -+ new = xterm_fd(fd, &data->helper_pid); -+ } -+ if(new < 0){ -+ printk("xterm_open : os_rcv_fd failed, err = %d\n", -new); -+ goto out; -+ } -+ -+ CATCH_EINTR(err = tcgetattr(new, &data->tt)); -+ if(err){ -+ new = err; -+ goto out; -+ } -+ -+ if(data->raw){ -+ err = raw(new); -+ if(err){ -+ new = err; -+ goto out; -+ } -+ } -+ -+ data->pid = pid; -+ *dev_out = NULL; -+ out: -+ unlink(file); -+ return(new); -+} -+ -+void xterm_close(int fd, void *d) -+{ -+ struct xterm_chan *data = d; -+ -+ if(data->pid != -1) -+ os_kill_process(data->pid, 1); -+ data->pid = -1; -+ if(data->helper_pid != -1) -+ os_kill_process(data->helper_pid, 0); -+ data->helper_pid = -1; -+ os_close_file(fd); -+} -+ -+void xterm_free(void *d) -+{ -+ free(d); -+} -+ -+int xterm_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct xterm_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops xterm_ops = { -+ .type = "xterm", -+ .init = xterm_init, -+ .open = xterm_open, -+ .close = xterm_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = xterm_console_write, -+ .window_size = generic_window_size, -+ .free = xterm_free, -+ .winch = 1, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/xterm.h -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/xterm.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/xterm.h 2005-05-03 22:28:14.261442144 +0300 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __XTERM_H__ -+#define __XTERM_H__ -+ -+extern int xterm_fd(int socket, int *pid_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/drivers/xterm_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/drivers/xterm_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/drivers/xterm_kern.c 2005-05-03 22:28:14.262441992 +0300 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/errno.h" -+#include "linux/slab.h" -+#include "asm/semaphore.h" -+#include "asm/irq.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+#include "kern_util.h" -+#include "os.h" -+#include "xterm.h" -+ -+struct xterm_wait { -+ struct semaphore sem; -+ int fd; -+ int pid; -+ int new_fd; -+}; -+ -+static void xterm_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct xterm_wait *xterm = data; -+ int fd; -+ -+ fd = os_rcv_fd(xterm->fd, &xterm->pid); -+ if(fd == -EAGAIN) -+ return; -+ -+ xterm->new_fd = fd; -+ up(&xterm->sem); -+} -+ -+int xterm_fd(int socket, int *pid_out) -+{ -+ struct xterm_wait *data; -+ int err, ret; -+ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL){ -+ printk(KERN_ERR "xterm_fd : failed to allocate xterm_wait\n"); -+ return(-ENOMEM); -+ } -+ *data = ((struct xterm_wait) -+ { .sem = __SEMAPHORE_INITIALIZER(data->sem, 0), -+ .fd = socket, -+ .pid = -1, -+ .new_fd = -1 }); -+ -+ err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "xterm", data); -+ if(err){ -+ printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " -+ "err = %d\n", err); -+ ret = err; -+ goto out; -+ } -+ down(&data->sem); -+ -+ free_irq(XTERM_IRQ, data); -+ -+ ret = data->new_fd; -+ *pid_out = data->pid; -+ out: -+ kfree(data); -+ -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hostfs/externfs.c -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/externfs.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/externfs.c 2005-05-03 22:28:14.269440928 +0300 -@@ -0,0 +1,1283 @@ -+/* -+ * Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/stddef.h> -+#include <linux/fs.h> -+#include <linux/version.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <linux/pagemap.h> -+#include <linux/blkdev.h> -+#include <asm/uaccess.h> -+#include "hostfs.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "user_util.h" -+#include "2_5compat.h" -+#include "mem.h" -+#include "filehandle.h" -+ -+struct externfs { -+ struct list_head list; -+ struct externfs_mount_ops *mount_ops; -+ struct file_system_type type; -+}; -+ -+static inline struct externfs_inode *EXTERNFS_I(struct inode *inode) -+{ -+ return(inode->u.generic_ip); -+} -+ -+#define file_externfs_i(file) EXTERNFS_I((file)->f_dentry->d_inode) -+ -+int externfs_d_delete(struct dentry *dentry) -+{ -+ return(1); -+} -+ -+struct dentry_operations externfs_dentry_ops = { -+}; -+ -+#define EXTERNFS_SUPER_MAGIC 0x00c0ffee -+ -+static struct inode_operations externfs_iops; -+static struct inode_operations externfs_dir_iops; -+static struct address_space_operations externfs_link_aops; -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *name; -+ int len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len -= parent->d_name.len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], parent->d_name.name, -+ parent->d_name.len); -+ parent = parent->d_parent; -+ } -+ -+ return(name); -+} -+ -+char *inode_name(struct inode *ino, int extra) -+{ -+ struct dentry *dentry; -+ -+ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); -+ return(dentry_name(dentry, extra)); -+} -+ -+char *inode_name_prefix(struct inode *inode, char *prefix) -+{ -+ int len; -+ char *name; -+ -+ len = strlen(prefix); -+ name = inode_name(inode, len); -+ if(name == NULL) -+ return(name); -+ -+ memmove(&name[len], name, strlen(name) + 1); -+ memcpy(name, prefix, strlen(prefix)); -+ return(name); -+} -+ -+static int read_name(struct inode *ino, char *name) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ /* The non-int inode fields are copied into ints by stat_file and -+ * then copied into the inode because passing the actual pointers -+ * in and having them treated as int * breaks on big-endian machines -+ */ -+ int err; -+ int i_dev, i_mode, i_nlink, i_blksize; -+ unsigned long long i_size; -+ unsigned long long i_ino; -+ unsigned long long i_blocks; -+ -+ err = (*ops->stat_file)(name, ino->i_sb->u.generic_sbp, -+ (dev_t *) &i_dev, &i_ino, &i_mode, &i_nlink, -+ &ino->i_uid, &ino->i_gid, &i_size, -+ &ino->i_atime, &ino->i_mtime, &ino->i_ctime, -+ &i_blksize, &i_blocks); -+ if(err) return(err); -+ ino->i_ino = i_ino; -+ ino->i_dev = i_dev; -+ ino->i_mode = i_mode; -+ ino->i_nlink = i_nlink; -+ ino->i_size = i_size; -+ ino->i_blksize = i_blksize; -+ ino->i_blocks = i_blocks; -+ return(0); -+} -+ -+static char *follow_link(char *link, -+ int (*do_read_link)(char *path, int uid, int gid, -+ char *buf, int size, -+ struct externfs_data *ed), -+ int uid, int gid, struct externfs_data *ed) -+{ -+ int len, n; -+ char *name, *resolved, *end; -+ -+ len = 64; -+ while(1){ -+ n = -ENOMEM; -+ name = kmalloc(len, GFP_KERNEL); -+ if(name == NULL) -+ goto out; -+ -+ n = (*do_read_link)(link, uid, gid, name, len, ed); -+ if(n < len) -+ break; -+ len *= 2; -+ kfree(name); -+ } -+ if(n < 0) -+ goto out_free; -+ -+ if(*name == '/') -+ return(name); -+ -+ end = strrchr(link, '/'); -+ if(end == NULL) -+ return(name); -+ -+ *(end + 1) = '\0'; -+ len = strlen(link) + strlen(name) + 1; -+ -+ resolved = kmalloc(len, GFP_KERNEL); -+ if(resolved == NULL){ -+ n = -ENOMEM; -+ goto out_free; -+ } -+ -+ sprintf(resolved, "%s%s", link, name); -+ kfree(name); -+ return(resolved); -+ -+ out_free: -+ kfree(name); -+ out: -+ return(ERR_PTR(n)); -+} -+ -+static int read_inode(struct inode *ino) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ char *name, *new; -+ int err, type; -+ -+ err = -ENOMEM; -+ name = inode_name(ino, 0); -+ if(name == NULL) -+ goto out; -+ -+ type = (*ops->file_type)(name, NULL, ed); -+ if(type < 0){ -+ err = type; -+ goto out_free; -+ } -+ -+ if(type == OS_TYPE_SYMLINK){ -+ new = follow_link(name, ops->read_link, current->fsuid, -+ current->fsgid, ed); -+ if(IS_ERR(new)){ -+ err = PTR_ERR(new); -+ goto out_free; -+ } -+ kfree(name); -+ name = new; -+ } -+ -+ err = read_name(ino, name); -+ out_free: -+ kfree(name); -+ out: -+ return(err); -+} -+ -+void externfs_delete_inode(struct inode *ino) -+{ -+ struct externfs_inode *ext = EXTERNFS_I(ino); -+ struct externfs_file_ops *ops = ext->ops; -+ -+ (*ops->close_file)(ext, ino->i_size); -+ -+ clear_inode(ino); -+} -+ -+int externfs_statfs(struct super_block *sb, struct statfs *sf) -+{ -+ /* do_statfs uses struct statfs64 internally, but the linux kernel -+ * struct statfs still has 32-bit versions for most of these fields, -+ * so we convert them here -+ */ -+ int err; -+ long long f_blocks; -+ long long f_bfree; -+ long long f_bavail; -+ long long f_files; -+ long long f_ffree; -+ struct externfs_data *ed = sb->u.generic_sbp; -+ -+ err = (*ed->file_ops->statfs)(&sf->f_bsize, &f_blocks, &f_bfree, -+ &f_bavail, &f_files, &f_ffree, -+ &sf->f_fsid, sizeof(sf->f_fsid), -+ &sf->f_namelen, sf->f_spare, ed); -+ if(err) -+ return(err); -+ -+ sf->f_blocks = f_blocks; -+ sf->f_bfree = f_bfree; -+ sf->f_bavail = f_bavail; -+ sf->f_files = f_files; -+ sf->f_ffree = f_ffree; -+ sf->f_type = EXTERNFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct super_operations externfs_sbops = { -+ .delete_inode = externfs_delete_inode, -+ .statfs = externfs_statfs, -+}; -+ -+int externfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ void *dir; -+ char *name; -+ unsigned long long next, ino; -+ int error, len; -+ struct externfs_file_ops *ops = file_externfs_i(file)->ops; -+ struct externfs_data *ed = -+ file->f_dentry->d_inode->i_sb->u.generic_sbp; -+ -+ name = dentry_name(file->f_dentry, 0); -+ if(name == NULL) -+ return(-ENOMEM); -+ -+ dir = (*ops->open_dir)(name, current->fsuid, current->fsgid, ed); -+ kfree(name); -+ if(IS_ERR(dir)) -+ return(PTR_ERR(dir)); -+ -+ next = file->f_pos; -+ while((name = (*ops->read_dir)(dir, &next, &ino, &len, ed)) != NULL){ -+ error = (*filldir)(ent, name, len, file->f_pos, ino, -+ DT_UNKNOWN); -+ if(error) -+ break; -+ file->f_pos = next; -+ } -+ (*ops->close_dir)(dir, ed); -+ return(0); -+} -+ -+int externfs_file_open(struct inode *ino, struct file *file) -+{ -+ ino->i_nlink++; -+ return(0); -+} -+ -+int externfs_dir_open(struct inode *ino, struct file *file) -+{ -+ return(0); -+} -+ -+int externfs_dir_release(struct inode *ino, struct file *file) -+{ -+ return(0); -+} -+ -+int externfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ struct externfs_file_ops *ops = file_externfs_i(file)->ops; -+ struct inode *inode = dentry->d_inode; -+ struct externfs_data *ed = inode->i_sb->u.generic_sbp; -+ -+ return((*ops->truncate_file)(EXTERNFS_I(inode), inode->i_size, ed)); -+} -+ -+static struct file_operations externfs_file_fops = { -+ .owner = NULL, -+ .read = generic_file_read, -+ .write = generic_file_write, -+ .mmap = generic_file_mmap, -+ .open = externfs_file_open, -+ .release = NULL, -+ .fsync = externfs_fsync, -+}; -+ -+static struct file_operations externfs_dir_fops = { -+ .owner = NULL, -+ .readdir = externfs_readdir, -+ .open = externfs_dir_open, -+ .release = externfs_dir_release, -+ .fsync = externfs_fsync, -+}; -+ -+struct wp_info { -+ struct page *page; -+ int count; -+ unsigned long long start; -+ unsigned long long size; -+ int (*truncate)(struct externfs_inode *ei, __u64 size, -+ struct externfs_data *ed); -+ struct externfs_inode *ei; -+ struct externfs_data *ed; -+}; -+ -+static void externfs_finish_writepage(char *buffer, int res, void *arg) -+{ -+ struct wp_info *wp = arg; -+ -+ if(res == wp->count){ -+ ClearPageError(wp->page); -+ if(wp->start + res > wp->size) -+ (*wp->truncate)(wp->ei, wp->size, wp->ed); -+ } -+ else { -+ SetPageError(wp->page); -+ ClearPageUptodate(wp->page); -+ } -+ -+ kunmap(wp->page); -+ unlock_page(wp->page); -+ kfree(wp); -+} -+ -+static int externfs_writepage(struct page *page) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ struct externfs_file_ops *ops = EXTERNFS_I(inode)->ops; -+ struct wp_info *wp; -+ struct externfs_data *ed = inode->i_sb->u.generic_sbp; -+ char *buffer; -+ unsigned long long base; -+ int count = PAGE_CACHE_SIZE; -+ int end_index = inode->i_size >> PAGE_CACHE_SHIFT; -+ int err, offset; -+ -+ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; -+ -+ /* If we are entirely outside the file, then return an error */ -+ err = -EIO; -+ offset = inode->i_size & (PAGE_CACHE_SIZE-1); -+ if (page->index > end_index || -+ ((page->index == end_index) && !offset)) -+ goto out_unlock; -+ -+ err = -ENOMEM; -+ wp = kmalloc(sizeof(*wp), GFP_KERNEL); -+ if(wp == NULL) -+ goto out_unlock; -+ -+ *wp = ((struct wp_info) { .page = page, -+ .count = count, -+ .start = base, -+ .size = inode->i_size, -+ .truncate = ops->truncate_file, -+ .ei = EXTERNFS_I(inode), -+ .ed = ed }); -+ -+ buffer = kmap(page); -+ err = (*ops->write_file)(EXTERNFS_I(inode), base, buffer, 0, -+ count, externfs_finish_writepage, wp, ed); -+ -+ return err; -+ -+ out_unlock: -+ unlock_page(page); -+ return(err); -+} -+ -+static void externfs_finish_readpage(char *buffer, int res, void *arg) -+{ -+ struct page *page = arg; -+ struct inode *inode; -+ -+ if(res < 0){ -+ SetPageError(page); -+ goto out; -+ } -+ -+ inode = page->mapping->host; -+ if(inode->i_size >> PAGE_CACHE_SHIFT == page->index) -+ res = inode->i_size % PAGE_CACHE_SIZE; -+ -+ memset(&buffer[res], 0, PAGE_CACHE_SIZE - res); -+ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) -+ ClearPageError(page); -+ out: -+ kunmap(page); -+ unlock_page(page); -+} -+ -+static int externfs_readpage(struct file *file, struct page *page) -+{ -+ struct inode *ino = page->mapping->host; -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ char *buffer; -+ long long start; -+ int err = 0; -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ -+ if(ops->map_file_page != NULL){ -+ /* XXX What happens when PAGE_SIZE != PAGE_CACHE_SIZE? */ -+ err = (*ops->map_file_page)(file_externfs_i(file), start, -+ buffer, file->f_mode & FMODE_WRITE, -+ ed); -+ if(!err) -+ err = PAGE_CACHE_SIZE; -+ } -+ else err = (*ops->read_file)(file_externfs_i(file), start, buffer, -+ PAGE_CACHE_SIZE, 0, 0, -+ externfs_finish_readpage, page, ed); -+ -+ if(err > 0) -+ err = 0; -+ return(err); -+} -+ -+struct writepage_info { -+ struct semaphore sem; -+ int res; -+}; -+ -+static void externfs_finish_prepare(char *buffer, int res, void *arg) -+{ -+ struct writepage_info *wp = arg; -+ -+ wp->res = res; -+ up(&wp->sem); -+} -+ -+int externfs_prepare_write(struct file *file, struct page *page, -+ unsigned int from, unsigned int to) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ struct externfs_file_ops *ops = EXTERNFS_I(inode)->ops; -+ struct externfs_data *ed = inode->i_sb->u.generic_sbp; -+ char *buffer; -+ long long start; -+ int err; -+ struct writepage_info wp; -+ -+ if(Page_Uptodate(page)) -+ return(0); -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ -+ if(ops->map_file_page != NULL){ -+ err = (*ops->map_file_page)(file_externfs_i(file), start, -+ buffer, file->f_mode & FMODE_WRITE, -+ ed); -+ goto out; -+ } -+ -+ init_MUTEX_LOCKED(&wp.sem); -+ err = (*ops->read_file)(file_externfs_i(file), start, buffer, -+ PAGE_CACHE_SIZE, from, to, -+ externfs_finish_prepare, &wp, ed); -+ down(&wp.sem); -+ if(err < 0) -+ goto out; -+ -+ err = wp.res; -+ if(err < 0) -+ goto out; -+ -+ if(from > 0) -+ memset(buffer, 0, from); -+ if(to < PAGE_CACHE_SIZE) -+ memset(buffer + to, 0, PAGE_CACHE_SIZE - to); -+ -+ SetPageUptodate(page); -+ err = 0; -+ out: -+ kunmap(page); -+ return(err); -+} -+ -+static int externfs_commit_write(struct file *file, struct page *page, -+ unsigned from, unsigned to) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ struct externfs_file_ops *ops = EXTERNFS_I(inode)->ops; -+ unsigned long long size; -+ long long start; -+ int err; -+ -+ start = (long long) (page->index << PAGE_CACHE_SHIFT); -+ -+ if(ops->map_file_page != NULL) -+ err = to - from; -+ else { -+ size = start + to; -+ if(size > inode->i_size){ -+ inode->i_size = size; -+ mark_inode_dirty(inode); -+ } -+ } -+ -+ set_page_dirty(page); -+ return(to - from); -+} -+ -+static void externfs_removepage(struct page *page) -+{ -+ physmem_remove_mapping(page_address(page)); -+} -+ -+static struct address_space_operations externfs_aops = { -+ .writepage = externfs_writepage, -+ .readpage = externfs_readpage, -+ .removepage = externfs_removepage, -+/* .set_page_dirty = __set_page_dirty_nobuffers, */ -+ .prepare_write = externfs_prepare_write, -+ .commit_write = externfs_commit_write -+}; -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int need_fh) -+{ -+ struct inode *inode; -+ struct externfs_data *ed = sb->u.generic_sbp; -+ struct externfs_mount_ops *ops = ed->mount_ops; -+ char *name = NULL; -+ int type, err = -ENOMEM, rdev; -+ -+ if(dentry){ -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out; -+ type = (*ed->file_ops->file_type)(name, &rdev, ed); -+ } -+ else type = OS_TYPE_DIR; -+ -+ inode = new_inode(sb); -+ if(inode == NULL) -+ goto out_free; -+ -+ insert_inode_hash(inode); -+ -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_op = &page_symlink_inode_operations; -+ else if(type == OS_TYPE_DIR) -+ inode->i_op = &externfs_dir_iops; -+ else inode->i_op = &externfs_iops; -+ -+ if(type == OS_TYPE_DIR) inode->i_fop = &externfs_dir_fops; -+ else inode->i_fop = &externfs_file_fops; -+ -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_mapping->a_ops = &externfs_link_aops; -+ else inode->i_mapping->a_ops = &externfs_aops; -+ -+ switch (type) { -+ case OS_TYPE_CHARDEV: -+ init_special_inode(inode, S_IFCHR, rdev); -+ break; -+ case OS_TYPE_BLOCKDEV: -+ init_special_inode(inode, S_IFBLK, rdev); -+ break; -+ case OS_TYPE_FIFO: -+ init_special_inode(inode, S_IFIFO, 0); -+ break; -+ case OS_TYPE_SOCK: -+ init_special_inode(inode, S_IFSOCK, 0); -+ break; -+ case OS_TYPE_SYMLINK: -+ inode->i_mode = S_IFLNK | S_IRWXUGO; -+ } -+ -+ if(need_fh){ -+ struct externfs_inode *ei; -+ -+ err = -ENOMEM; -+ ei = (*ops->init_file)(ed); -+ if(ei == NULL) -+ goto out_put; -+ -+ *ei = ((struct externfs_inode) { .ops = ed->file_ops }); -+ inode->u.generic_ip = ei; -+ -+ err = (*ed->file_ops->open_file)(ei, name, current->fsuid, -+ current->fsgid, inode, ed); -+ if(err && ((err != -ENOENT) && (err != -EISDIR))) -+ goto out_put; -+ } -+ -+ return(inode); -+ -+ out_put: -+ iput(inode); -+ out_free: -+ kfree(name); -+ out: -+ return(ERR_PTR(err)); -+} -+ -+int externfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(dir)->ops; -+ struct inode *inode; -+ struct externfs_data *ed = dir->i_sb->u.generic_sbp; -+ struct externfs_inode *ei; -+ char *name; -+ int err = -ENOMEM; -+ -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out; -+ -+ inode = get_inode(dir->i_sb, dentry, 0); -+ if(IS_ERR(inode)){ -+ err = PTR_ERR(inode); -+ goto out_free; -+ } -+ -+ ei = (*ed->mount_ops->init_file)(ed); -+ if(ei == NULL) -+ /* XXX need a free_file() */ -+ goto out_put; -+ -+ *ei = ((struct externfs_inode) { .ops = ed->file_ops }); -+ inode->u.generic_ip = ei; -+ -+ err = (*ops->create_file)(ei, name, mode, current->fsuid, -+ current->fsuid, inode, ed); -+ if(err) -+ goto out_put; -+ -+ err = read_name(inode, name); -+ if(err) -+ goto out_rm; -+ -+ inode->i_nlink++; -+ d_instantiate(dentry, inode); -+ out_free: -+ kfree(name); -+ out: -+ return(err); -+ -+ out_rm: -+ (*ops->unlink_file)(name, ed); -+ out_put: -+ inode->i_nlink = 0; -+ iput(inode); -+ goto out_free; -+} -+ -+struct dentry *externfs_lookup(struct inode *ino, struct dentry *dentry) -+{ -+ struct inode *inode; -+ char *name; -+ int err; -+ -+ inode = get_inode(ino->i_sb, dentry, 1); -+ if(IS_ERR(inode)){ -+ err = PTR_ERR(inode); -+ goto out; -+ } -+ -+ err = -ENOMEM; -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out_put; -+ -+ err = read_name(inode, name); -+ kfree(name); -+ if(err){ -+ if(err != -ENOENT) -+ goto out_put; -+ -+ inode->i_nlink = 0; -+ iput(inode); -+ inode = NULL; -+ } -+ d_add(dentry, inode); -+ dentry->d_op = &externfs_dentry_ops; -+ return(NULL); -+ -+ out_put: -+ inode->i_nlink = 0; -+ iput(inode); -+ out: -+ return(ERR_PTR(err)); -+} -+ -+static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int len; -+ -+ file = inode_name(ino, dentry->d_name.len + 1); -+ if(file == NULL) return(NULL); -+ strcat(file, "/"); -+ len = strlen(file); -+ strncat(file, dentry->d_name.name, dentry->d_name.len); -+ file[len + dentry->d_name.len] = '\0'; -+ return(file); -+} -+ -+int externfs_link(struct dentry *to, struct inode *ino, struct dentry *from) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ char *from_name, *to_name; -+ int err = -ENOMEM; -+ -+ from_name = inode_dentry_name(ino, from); -+ if(from_name == NULL) -+ goto out; -+ -+ to_name = dentry_name(to, 0); -+ if(to_name == NULL) -+ goto out_free_from; -+ -+ err = (*ops->link_file)(to_name, from_name, current->fsuid, -+ current->fsgid, ed); -+ if(err) -+ goto out_free_to; -+ -+ d_instantiate(from, to->d_inode); -+ to->d_inode->i_nlink++; -+ atomic_inc(&to->d_inode->i_count); -+ -+ out_free_to: -+ kfree(to_name); -+ out_free_from: -+ kfree(from_name); -+ out: -+ return(err); -+} -+ -+int externfs_unlink(struct inode *ino, struct dentry *dentry) -+{ -+ struct inode *inode; -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) -+ return(-ENOMEM); -+ -+ inode = dentry->d_inode; -+ if((inode->i_nlink == 1) && (ops->invisible != NULL)) -+ (*ops->invisible)(EXTERNFS_I(inode)); -+ -+ err = (*ops->unlink_file)(file, ed); -+ kfree(file); -+ -+ inode->i_nlink--; -+ -+ return(err); -+} -+ -+int externfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ struct inode *inode; -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) -+ return(-ENOMEM); -+ err = (*ops->make_symlink)(file, to, current->fsuid, current->fsgid, -+ ed); -+ kfree(file); -+ -+ inode = get_inode(ino->i_sb, dentry, 1); -+ if(IS_ERR(inode)){ -+ err = PTR_ERR(inode); -+ goto out; -+ } -+ -+ d_instantiate(dentry, inode); -+ inode->i_nlink++; -+ iput(inode); -+ out: -+ return(err); -+} -+ -+int externfs_make_dir(struct inode *ino, struct dentry *dentry, int mode) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ struct inode *inode; -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) -+ return(-ENOMEM); -+ err = (*ops->make_dir)(file, mode, current->fsuid, current->fsgid, ed); -+ -+ inode = get_inode(ino->i_sb, dentry, 1); -+ if(IS_ERR(inode)){ -+ err = PTR_ERR(inode); -+ goto out_free; -+ } -+ -+ err = read_name(inode, file); -+ kfree(file); -+ if(err) -+ goto out_put; -+ -+ d_instantiate(dentry, inode); -+ inode->i_nlink = 2; -+ inode->i_mode = S_IFDIR | mode; -+ iput(inode); -+ -+ ino->i_nlink++; -+ out: -+ return(err); -+ out_free: -+ kfree(file); -+ out_put: -+ inode->i_nlink = 0; -+ iput(inode); -+ goto out; -+} -+ -+int externfs_remove_dir(struct inode *ino, struct dentry *dentry) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ void *mount = ino->i_sb->u.generic_sbp; -+ char *file; -+ int err; -+ -+ file = inode_dentry_name(ino, dentry); -+ if(file == NULL) -+ return(-ENOMEM); -+ err = (*ops->remove_dir)(file, current->fsuid, current->fsgid, mount); -+ kfree(file); -+ -+ dentry->d_inode->i_nlink = 0; -+ ino->i_nlink--; -+ return(err); -+} -+ -+int externfs_make_node(struct inode *dir, struct dentry *dentry, int mode, -+ int dev) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(dir)->ops; -+ struct externfs_data *ed = dir->i_sb->u.generic_sbp; -+ struct inode *inode; -+ char *name; -+ int err = -ENOMEM; -+ -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ goto out; -+ -+ inode = get_inode(dir->i_sb, dentry, 1); -+ if(IS_ERR(inode)){ -+ err = PTR_ERR(inode); -+ goto out_free; -+ } -+ -+ init_special_inode(inode, mode, dev); -+ err = (*ops->make_node)(name, mode & S_IRWXUGO, current->fsuid, -+ current->fsgid, mode & S_IFMT, major(dev), -+ minor(dev), ed); -+ if(err) -+ goto out_put; -+ -+ err = read_name(inode, name); -+ if(err) -+ goto out_rm; -+ -+ d_instantiate(dentry, inode); -+ out_free: -+ kfree(name); -+ out: -+ return(err); -+ -+ out_rm: -+ (*ops->unlink_file)(name, ed); -+ out_put: -+ inode->i_nlink = 0; -+ iput(inode); -+ goto out_free; -+} -+ -+int externfs_rename(struct inode *from_ino, struct dentry *from, -+ struct inode *to_ino, struct dentry *to) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(from_ino)->ops; -+ struct externfs_data *ed = from_ino->i_sb->u.generic_sbp; -+ char *from_name, *to_name; -+ int err; -+ -+ from_name = inode_dentry_name(from_ino, from); -+ if(from_name == NULL) -+ return(-ENOMEM); -+ to_name = inode_dentry_name(to_ino, to); -+ if(to_name == NULL){ -+ kfree(from_name); -+ return(-ENOMEM); -+ } -+ err = (*ops->rename_file)(from_name, to_name, ed); -+ kfree(from_name); -+ kfree(to_name); -+ -+ from_ino->i_nlink--; -+ to_ino->i_nlink++; -+ return(err); -+} -+ -+void externfs_truncate(struct inode *ino) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ -+ (*ops->truncate_file)(EXTERNFS_I(ino), ino->i_size, ed); -+} -+ -+int externfs_permission(struct inode *ino, int desired) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ char *name; -+ int r = 0, w = 0, x = 0, err; -+ -+ if(ops->access_file == NULL) -+ return(vfs_permission(ino, desired)); -+ -+ if(desired & MAY_READ) r = 1; -+ if(desired & MAY_WRITE) w = 1; -+ if(desired & MAY_EXEC) x = 1; -+ name = inode_name(ino, 0); -+ if(name == NULL) -+ return(-ENOMEM); -+ -+ err = (*ops->access_file)(name, r, w, x, current->fsuid, -+ current->fsgid, ed); -+ kfree(name); -+ -+ if(!err) -+ err = vfs_permission(ino, desired); -+ return(err); -+} -+ -+int externfs_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ struct externfs_file_ops *ops = EXTERNFS_I(dentry->d_inode)->ops; -+ struct externfs_data *ed = dentry->d_inode->i_sb->u.generic_sbp; -+ struct externfs_iattr attrs; -+ char *name; -+ int err; -+ -+ attrs.ia_valid = 0; -+ if(attr->ia_valid & ATTR_MODE){ -+ attrs.ia_valid |= EXTERNFS_ATTR_MODE; -+ attrs.ia_mode = attr->ia_mode; -+ } -+ if(attr->ia_valid & ATTR_UID){ -+ attrs.ia_valid |= EXTERNFS_ATTR_UID; -+ attrs.ia_uid = attr->ia_uid; -+ } -+ if(attr->ia_valid & ATTR_GID){ -+ attrs.ia_valid |= EXTERNFS_ATTR_GID; -+ attrs.ia_gid = attr->ia_gid; -+ } -+ if(attr->ia_valid & ATTR_SIZE){ -+ attrs.ia_valid |= EXTERNFS_ATTR_SIZE; -+ attrs.ia_size = attr->ia_size; -+ } -+ if(attr->ia_valid & ATTR_ATIME){ -+ attrs.ia_valid |= EXTERNFS_ATTR_ATIME; -+ attrs.ia_atime = attr->ia_atime; -+ } -+ if(attr->ia_valid & ATTR_MTIME){ -+ attrs.ia_valid |= EXTERNFS_ATTR_MTIME; -+ attrs.ia_mtime = attr->ia_mtime; -+ } -+ if(attr->ia_valid & ATTR_CTIME){ -+ attrs.ia_valid |= EXTERNFS_ATTR_CTIME; -+ attrs.ia_ctime = attr->ia_ctime; -+ } -+ if(attr->ia_valid & ATTR_ATIME_SET){ -+ attrs.ia_valid |= EXTERNFS_ATTR_ATIME_SET; -+ } -+ if(attr->ia_valid & ATTR_MTIME_SET){ -+ attrs.ia_valid |= EXTERNFS_ATTR_MTIME_SET; -+ } -+ name = dentry_name(dentry, 0); -+ if(name == NULL) -+ return(-ENOMEM); -+ err = (*ops->set_attr)(name, &attrs, ed); -+ kfree(name); -+ if(err) -+ return(err); -+ -+ return(inode_setattr(dentry->d_inode, attr)); -+} -+ -+int externfs_getattr(struct dentry *dentry, struct iattr *attr) -+{ -+ not_implemented(); -+ return(-EINVAL); -+} -+ -+static struct inode_operations externfs_iops = { -+ .create = externfs_create, -+ .link = externfs_link, -+ .unlink = externfs_unlink, -+ .symlink = externfs_symlink, -+ .mkdir = externfs_make_dir, -+ .rmdir = externfs_remove_dir, -+ .mknod = externfs_make_node, -+ .rename = externfs_rename, -+ .truncate = externfs_truncate, -+ .permission = externfs_permission, -+ .setattr = externfs_setattr, -+ .getattr = externfs_getattr, -+}; -+ -+static struct inode_operations externfs_dir_iops = { -+ .create = externfs_create, -+ .lookup = externfs_lookup, -+ .link = externfs_link, -+ .unlink = externfs_unlink, -+ .symlink = externfs_symlink, -+ .mkdir = externfs_make_dir, -+ .rmdir = externfs_remove_dir, -+ .mknod = externfs_make_node, -+ .rename = externfs_rename, -+ .truncate = externfs_truncate, -+ .permission = externfs_permission, -+ .setattr = externfs_setattr, -+ .getattr = externfs_getattr, -+}; -+ -+int externfs_link_readpage(struct file *file, struct page *page) -+{ -+ struct inode *ino = page->mapping->host; -+ struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops; -+ struct externfs_data *ed = ino->i_sb->u.generic_sbp; -+ char *buffer, *name; -+ long long start; -+ int err; -+ -+ start = page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ name = inode_name(ino, 0); -+ if(name == NULL) -+ return(-ENOMEM); -+ -+ err = (*ops->read_link)(name, current->fsuid, current->fsgid, buffer, -+ PAGE_CACHE_SIZE, ed); -+ -+ kfree(name); -+ if(err == PAGE_CACHE_SIZE) -+ err = -E2BIG; -+ else if(err > 0){ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) ClearPageError(page); -+ err = 0; -+ } -+ kunmap(page); -+ UnlockPage(page); -+ return(err); -+} -+ -+static int externfs_flushpage(struct page *page, unsigned long offset) -+{ -+ return(externfs_writepage(page)); -+} -+ -+struct externfs_data *inode_externfs_info(struct inode *inode) -+{ -+ return(inode->i_sb->u.generic_sbp); -+} -+ -+static struct address_space_operations externfs_link_aops = { -+ .readpage = externfs_link_readpage, -+ .removepage = externfs_removepage, -+ .flushpage = externfs_flushpage, -+}; -+ -+DECLARE_MUTEX(externfs_sem); -+struct list_head externfses = LIST_HEAD_INIT(externfses); -+ -+static struct externfs *find_externfs(struct file_system_type *type) -+{ -+ struct list_head *ele; -+ struct externfs *fs; -+ -+ down(&externfs_sem); -+ list_for_each(ele, &externfses){ -+ fs = list_entry(ele, struct externfs, list); -+ if(&fs->type == type) -+ goto out; -+ } -+ fs = NULL; -+ out: -+ up(&externfs_sem); -+ return(fs); -+} -+ -+#define DEFAULT_ROOT "/" -+ -+char *host_root_filename(char *mount_arg) -+{ -+ char *root = DEFAULT_ROOT; -+ -+ if((mount_arg != NULL) && (*mount_arg != '\0')) -+ root = mount_arg; -+ -+ return(uml_strdup(root)); -+} -+ -+struct super_block *externfs_read_super(struct super_block *sb, void *data, -+ int silent) -+{ -+ struct externfs *fs; -+ struct inode *root_inode; -+ struct externfs_data *sb_data; -+ int err = -EINVAL; -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = EXTERNFS_SUPER_MAGIC; -+ sb->s_op = &externfs_sbops; -+ -+ fs = find_externfs(sb->s_type); -+ if(fs == NULL){ -+ printk("Couldn't find externfs for filesystem '%s'\n", -+ sb->s_type->name); -+ goto out; -+ } -+ -+ sb_data = (*fs->mount_ops->mount)(data); -+ if(IS_ERR(sb_data)){ -+ err = PTR_ERR(sb_data); -+ goto out; -+ } -+ -+ sb->u.generic_sbp = sb_data; -+ sb_data->mount_ops = fs->mount_ops; -+ -+ root_inode = get_inode(sb, NULL, 1); -+ if(IS_ERR(root_inode)) -+ goto out; -+ -+ sb->s_root = d_alloc_root(root_inode); -+ if(sb->s_root == NULL) -+ goto out_put; -+ -+ if(read_inode(root_inode)) -+ goto out_dput; -+ return(sb); -+ -+ out_dput: -+ /* dput frees the inode */ -+ dput(sb->s_root); -+ return(NULL); -+ out_put: -+ root_inode->i_nlink = 0; -+ make_bad_inode(root_inode); -+ iput(root_inode); -+ out: -+ return(NULL); -+} -+ -+void init_externfs(struct externfs_data *ed, struct externfs_file_ops *ops) -+{ -+ ed->file_ops = ops; -+} -+ -+int register_externfs(char *name, struct externfs_mount_ops *mount_ops) -+{ -+ struct externfs *new; -+ int err = -ENOMEM; -+ -+ new = kmalloc(sizeof(*new), GFP_KERNEL); -+ if(new == NULL) -+ goto out; -+ -+ memset(new, 0, sizeof(*new)); -+ *new = ((struct externfs) { .list = LIST_HEAD_INIT(new->list), -+ .mount_ops = mount_ops, -+ .type = { .name = name, -+ .read_super = externfs_read_super, -+ .fs_flags = 0, -+ .owner = THIS_MODULE } }); -+ list_add(&new->list, &externfses); -+ -+ err = register_filesystem(&new->type); -+ if(err) -+ goto out_del; -+ return(0); -+ -+ out_del: -+ list_del(&new->list); -+ kfree(new); -+ out: -+ return(err); -+} -+ -+void unregister_externfs(char *name) -+{ -+ struct list_head *ele; -+ struct externfs *fs; -+ -+ down(&externfs_sem); -+ list_for_each(ele, &externfses){ -+ fs = list_entry(ele, struct externfs, list); -+ if(!strcmp(fs->type.name, name)){ -+ list_del(ele); -+ up(&externfs_sem); -+ return; -+ } -+ } -+ up(&externfs_sem); -+ printk("Unregister_externfs - filesystem '%s' not found\n", name); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hostfs/host_file.c -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/host_file.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/host_file.c 2005-05-03 22:28:14.271440624 +0300 -@@ -0,0 +1,441 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/string.h" -+#include "linux/errno.h" -+#include "linux/types.h" -+#include "linux/slab.h" -+#include "linux/blkdev.h" -+#include "asm/fcntl.h" -+#include "hostfs.h" -+ -+extern int append; -+ -+char *get_path(const char *path[], char *buf, int size) -+{ -+ const char **s; -+ char *p; -+ int new = 1; -+ -+ for(s = path; *s != NULL; s++){ -+ new += strlen(*s); -+ if((*(s + 1) != NULL) && (strlen(*s) > 0) && -+ ((*s)[strlen(*s) - 1] != '/')) -+ new++; -+ } -+ -+ if(new > size){ -+ buf = kmalloc(new, GFP_KERNEL); -+ if(buf == NULL) -+ return(NULL); -+ } -+ -+ p = buf; -+ for(s = path; *s != NULL; s++){ -+ strcpy(p, *s); -+ p += strlen(*s); -+ if((*(s + 1) != NULL) && (strlen(*s) > 0) && -+ ((*s)[strlen(*s) - 1] != '/')) -+ strcpy(p++, "/"); -+ } -+ -+ return(buf); -+} -+ -+void free_path(const char *buf, char *tmp) -+{ -+ if((buf != tmp) && (buf != NULL)) -+ kfree((char *) buf); -+} -+ -+int host_open_file(const char *path[], int r, int w, struct file_handle *fh) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int mode = 0, err; -+ struct openflags flags = OPENFLAGS(); -+ -+ if (r) -+ flags = of_read(flags); -+ if (w) -+ flags = of_write(flags); -+ if(append) -+ flags = of_append(flags); -+ -+ err = -ENOMEM; -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = open_filehandle(file, flags, mode, fh); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+void *host_open_dir(const char *path[]) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ void *dir = ERR_PTR(-ENOMEM); -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ dir = open_dir(file); -+ out: -+ free_path(file, tmp); -+ return(dir); -+} -+ -+char *host_read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out) -+{ -+ int err; -+ char *name; -+ -+ err = os_seek_dir(stream, *pos); -+ if(err) -+ return(ERR_PTR(err)); -+ -+ err = os_read_dir(stream, ino_out, &name); -+ if(err) -+ return(ERR_PTR(err)); -+ -+ if(name == NULL) -+ return(NULL); -+ -+ *len_out = strlen(name); -+ *pos = os_tell_dir(stream); -+ return(name); -+} -+ -+int host_file_type(const char *path[], int *rdev) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ struct uml_stat buf; -+ int ret; -+ -+ ret = -ENOMEM; -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ if(rdev != NULL){ -+ ret = os_lstat_file(file, &buf); -+ if(ret) -+ goto out; -+ *rdev = MKDEV(buf.ust_rmajor, buf.ust_rminor); -+ } -+ -+ ret = os_file_type(file); -+ out: -+ free_path(file, tmp); -+ return(ret); -+} -+ -+int host_create_file(const char *path[], int mode, struct file_handle *fh) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err = -ENOMEM; -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = open_filehandle(file, of_create(of_rdwr(OPENFLAGS())), mode, fh); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+static int do_stat_file(const char *path, dev_t *dev_out, -+ unsigned long long *inode_out, int *mode_out, -+ int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, unsigned long *atime_out, -+ unsigned long *mtime_out, unsigned long *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_lstat_file(path, &buf); -+ if(err < 0) -+ return(err); -+ -+ if(dev_out != NULL) *dev_out = MKDEV(buf.ust_major, buf.ust_minor); -+ if(inode_out != NULL) *inode_out = buf.ust_ino; -+ if(mode_out != NULL) *mode_out = buf.ust_mode; -+ if(nlink_out != NULL) *nlink_out = buf.ust_nlink; -+ if(uid_out != NULL) *uid_out = buf.ust_uid; -+ if(gid_out != NULL) *gid_out = buf.ust_gid; -+ if(size_out != NULL) *size_out = buf.ust_size; -+ if(atime_out != NULL) *atime_out = buf.ust_atime; -+ if(mtime_out != NULL) *mtime_out = buf.ust_mtime; -+ if(ctime_out != NULL) *ctime_out = buf.ust_ctime; -+ if(blksize_out != NULL) *blksize_out = buf.ust_blksize; -+ if(blocks_out != NULL) *blocks_out = buf.ust_blocks; -+ -+ return(0); -+} -+ -+int host_stat_file(const char *path[], dev_t *dev_out, -+ unsigned long long *inode_out, int *mode_out, -+ int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, unsigned long *atime_out, -+ unsigned long *mtime_out, unsigned long *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err; -+ -+ err = -ENOMEM; -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = do_stat_file(file, dev_out, inode_out, mode_out, nlink_out, -+ uid_out, gid_out, size_out, atime_out, mtime_out, -+ ctime_out, blksize_out, blocks_out); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+int host_set_attr(const char *path[], struct externfs_iattr *attrs) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ unsigned long time; -+ int err = 0, ma; -+ -+ if(append && (attrs->ia_valid & EXTERNFS_ATTR_SIZE)) -+ return(-EPERM); -+ -+ err = -ENOMEM; -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ if(attrs->ia_valid & EXTERNFS_ATTR_MODE){ -+ err = os_set_file_perms(file, attrs->ia_mode); -+ if(err < 0) -+ goto out; -+ } -+ if(attrs->ia_valid & EXTERNFS_ATTR_UID){ -+ err = os_set_file_owner(file, attrs->ia_uid, -1); -+ if(err < 0) -+ goto out; -+ } -+ if(attrs->ia_valid & EXTERNFS_ATTR_GID){ -+ err = os_set_file_owner(file, -1, attrs->ia_gid); -+ if(err < 0) -+ goto out; -+ } -+ if(attrs->ia_valid & EXTERNFS_ATTR_SIZE){ -+ err = os_truncate_file(file, attrs->ia_size); -+ if(err < 0) -+ goto out; -+ } -+ ma = EXTERNFS_ATTR_ATIME_SET | EXTERNFS_ATTR_MTIME_SET; -+ if((attrs->ia_valid & ma) == ma){ -+ err = os_set_file_time(file, attrs->ia_atime, attrs->ia_mtime); -+ if(err) -+ goto out; -+ } -+ else { -+ if(attrs->ia_valid & EXTERNFS_ATTR_ATIME_SET){ -+ err = do_stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, &time, -+ NULL, NULL, NULL); -+ if(err != 0) -+ goto out; -+ -+ err = os_set_file_time(file, attrs->ia_atime, time); -+ if(err) -+ goto out; -+ } -+ if(attrs->ia_valid & EXTERNFS_ATTR_MTIME_SET){ -+ err = do_stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, &time, NULL, -+ NULL, NULL, NULL); -+ if(err != 0) -+ goto out; -+ -+ err = os_set_file_time(file, time, attrs->ia_mtime); -+ if(err) -+ goto out; -+ } -+ } -+ if(attrs->ia_valid & EXTERNFS_ATTR_CTIME) ; -+ if(attrs->ia_valid & (EXTERNFS_ATTR_ATIME | EXTERNFS_ATTR_MTIME)){ -+ err = do_stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, &attrs->ia_atime, -+ &attrs->ia_mtime, NULL, NULL, NULL); -+ if(err != 0) -+ goto out; -+ } -+ -+ err = 0; -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+int host_make_symlink(const char *from[], const char *to) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err = -ENOMEM; -+ -+ file = get_path(from, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_make_symlink(to, file); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+int host_unlink_file(const char *path[]) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err = -ENOMEM; -+ -+ if(append) -+ return(-EPERM); -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_remove_file(file); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+int host_make_dir(const char *path[], int mode) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err = -ENOMEM; -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_make_dir(file, mode); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+int host_remove_dir(const char *path[]) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err = -ENOMEM; -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_remove_dir(file); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+int host_link_file(const char *to[], const char *from[]) -+{ -+ char from_tmp[HOSTFS_BUFSIZE], *f, to_tmp[HOSTFS_BUFSIZE], *t; -+ int err = -ENOMEM; -+ -+ f = get_path(from, from_tmp, sizeof(from_tmp)); -+ t = get_path(to, to_tmp, sizeof(to_tmp)); -+ if((f == NULL) || (t == NULL)) -+ goto out; -+ -+ err = os_link_file(t, f); -+ out: -+ free_path(f, from_tmp); -+ free_path(t, to_tmp); -+ return(err); -+} -+ -+int host_read_link(const char *path[], char *buf, int size) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int n = -ENOMEM; -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ n = os_read_symlink(file, buf, size); -+ if(n < size) -+ buf[n] = '\0'; -+ out: -+ free_path(file, tmp); -+ return(n); -+} -+ -+int host_rename_file(const char *from[], const char *to[]) -+{ -+ char from_tmp[HOSTFS_BUFSIZE], *f, to_tmp[HOSTFS_BUFSIZE], *t; -+ int err = -ENOMEM; -+ -+ f = get_path(from, from_tmp, sizeof(from_tmp)); -+ t = get_path(to, to_tmp, sizeof(to_tmp)); -+ if((f == NULL) || (t == NULL)) -+ goto out; -+ -+ err = os_move_file(f, t); -+ out: -+ free_path(f, from_tmp); -+ free_path(t, to_tmp); -+ return(err); -+} -+ -+int host_stat_fs(const char *path[], long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, void *fsid_out, -+ int fsid_size, long *namelen_out, long *spare_out) -+{ -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err = -ENOMEM; -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_stat_filesystem(file, bsize_out, blocks_out, bfree_out, -+ bavail_out, files_out, ffree_out, fsid_out, -+ fsid_size, namelen_out, spare_out); -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+char *generic_host_read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out, -+ void *mount) -+{ -+ return(host_read_dir(stream, pos, ino_out, len_out)); -+} -+ -+int generic_host_truncate_file(struct file_handle *fh, __u64 size, void *m) -+{ -+ return(truncate_file(fh, size)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hostfs/host_fs.c -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/host_fs.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/host_fs.c 2005-05-03 22:28:14.273440320 +0300 -@@ -0,0 +1,465 @@ -+/* -+ * Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/string.h" -+#include "linux/types.h" -+#include "linux/errno.h" -+#include "linux/slab.h" -+#include "linux/init.h" -+#include "linux/fs.h" -+#include "linux/stat.h" -+#include "hostfs.h" -+#include "kern.h" -+#include "init.h" -+#include "kern_util.h" -+#include "filehandle.h" -+#include "os.h" -+ -+/* Changed in hostfs_args before the kernel starts running */ -+static char *jail_dir = "/"; -+int append = 0; -+ -+static int __init hostfs_args(char *options, int *add) -+{ -+ char *ptr; -+ -+ ptr = strchr(options, ','); -+ if(ptr != NULL) -+ *ptr++ = '\0'; -+ if(*options != '\0') -+ jail_dir = options; -+ -+ options = ptr; -+ while(options){ -+ ptr = strchr(options, ','); -+ if(ptr != NULL) -+ *ptr++ = '\0'; -+ if(*options != '\0'){ -+ if(!strcmp(options, "append")) -+ append = 1; -+ else printf("hostfs_args - unsupported option - %s\n", -+ options); -+ } -+ options = ptr; -+ } -+ return(0); -+} -+ -+__uml_setup("hostfs=", hostfs_args, -+"hostfs=<root dir>,<flags>,...\n" -+" This is used to set hostfs parameters. The root directory argument\n" -+" is used to confine all hostfs mounts to within the specified directory\n" -+" tree on the host. If this isn't specified, then a user inside UML can\n" -+" mount anything on the host that's accessible to the user that's running\n" -+" it.\n" -+" The only flag currently supported is 'append', which specifies that all\n" -+" files opened by hostfs will be opened in append mode.\n\n" -+); -+ -+struct hostfs_data { -+ struct externfs_data ext; -+ char *mount; -+}; -+ -+struct hostfs_file { -+ struct externfs_inode ext; -+ struct file_handle fh; -+}; -+ -+static int hostfs_access_file(char *file, int r, int w, int x, int uid, -+ int gid, struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ char tmp[HOSTFS_BUFSIZE]; -+ int err, mode = 0; -+ -+ if(r) mode = OS_ACC_R_OK; -+ if(w) mode |= OS_ACC_W_OK; -+ if(x) mode |= OS_ACC_X_OK; -+ -+ err = -ENOMEM; -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_access(file, mode); -+ free_path(file, tmp); -+ out: -+ return(err); -+} -+ -+static int hostfs_make_node(const char *file, int mode, int uid, int gid, -+ int type, int major, int minor, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ char tmp[HOSTFS_BUFSIZE]; -+ int err = -ENOMEM; -+ -+ file = get_path(path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ /* XXX Pass type in an OS-independent way */ -+ mode |= type; -+ -+ err = os_make_dev(file, mode, major, minor); -+ free_path(file, tmp); -+ out: -+ return(err); -+} -+ -+static int hostfs_stat_file(const char *file, struct externfs_data *ed, -+ dev_t *dev_out, unsigned long long *inode_out, -+ int *mode_out, int *nlink_out, int *uid_out, -+ int *gid_out, unsigned long long *size_out, -+ unsigned long *atime_out, unsigned long *mtime_out, -+ unsigned long *ctime_out, int *blksize_out, -+ unsigned long long *blocks_out) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ /* XXX Why pretend everything is owned by root? */ -+ *uid_out = 0; -+ *gid_out = 0; -+ return(host_stat_file(path, dev_out, inode_out, mode_out, nlink_out, -+ NULL, NULL, size_out, atime_out, mtime_out, -+ ctime_out, blksize_out, blocks_out)); -+} -+ -+static int hostfs_file_type(const char *file, int *rdev, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ return(host_file_type(path, rdev)); -+} -+ -+static char *hostfs_name(struct inode *inode) -+{ -+ struct externfs_data *ed = inode_externfs_info(inode); -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ -+ return(inode_name_prefix(inode, mount)); -+} -+ -+static struct externfs_inode *hostfs_init_file(struct externfs_data *ed) -+{ -+ struct hostfs_file *hf; -+ -+ hf = kmalloc(sizeof(*hf), GFP_KERNEL); -+ if(hf == NULL) -+ return(NULL); -+ -+ hf->fh.fd = -1; -+ return(&hf->ext); -+} -+ -+static int hostfs_open_file(struct externfs_inode *ext, char *file, -+ int uid, int gid, struct inode *inode, -+ struct externfs_data *ed) -+{ -+ struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext); -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ int err; -+ -+ err = host_open_file(path, 1, 1, &hf->fh); -+ if(err == -EISDIR) -+ goto out; -+ -+ if(err == -EACCES) -+ err = host_open_file(path, 1, 0, &hf->fh); -+ -+ if(err) -+ goto out; -+ -+ is_reclaimable(&hf->fh, hostfs_name, inode); -+ out: -+ return(err); -+} -+ -+static void *hostfs_open_dir(char *file, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ return(host_open_dir(path)); -+} -+ -+static void hostfs_close_dir(void *stream, struct externfs_data *ed) -+{ -+ os_close_dir(stream); -+} -+ -+static char *hostfs_read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ -+ return(generic_host_read_dir(stream, pos, ino_out, len_out, mount)); -+} -+ -+static int hostfs_read_file(struct externfs_inode *ext, -+ unsigned long long offset, char *buf, int len, -+ int ignore_start, int ignore_end, -+ void (*completion)(char *, int, void *), void *arg, -+ struct externfs_data *ed) -+{ -+ struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext); -+ int err = 0; -+ -+ if(ignore_start != 0){ -+ err = read_file(&hf->fh, offset, buf, ignore_start); -+ if(err < 0) -+ goto out; -+ } -+ -+ if(ignore_end != len) -+ err = read_file(&hf->fh, offset + ignore_end, buf + ignore_end, -+ len - ignore_end); -+ -+ out: -+ -+ (*completion)(buf, err, arg); -+ if (err > 0) -+ err = 0; -+ return(err); -+} -+ -+static int hostfs_write_file(struct externfs_inode *ext, -+ unsigned long long offset, const char *buf, -+ int start, int len, -+ void (*completion)(char *, int, void *), -+ void *arg, struct externfs_data *ed) -+{ -+ struct file_handle *fh; -+ int err; -+ -+ fh = &container_of(ext, struct hostfs_file, ext)->fh; -+ err = write_file(fh, offset + start, buf + start, len); -+ -+ (*completion)((char *) buf, err, arg); -+ if (err > 0) -+ err = 0; -+ -+ return(err); -+} -+ -+static int hostfs_create_file(struct externfs_inode *ext, char *file, int mode, -+ int uid, int gid, struct inode *inode, -+ struct externfs_data *ed) -+{ -+ struct hostfs_file *hf = container_of(ext, struct hostfs_file, -+ ext); -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ int err = -ENOMEM; -+ -+ err = host_create_file(path, mode, &hf->fh); -+ if(err) -+ goto out; -+ -+ is_reclaimable(&hf->fh, hostfs_name, inode); -+ out: -+ return(err); -+} -+ -+static int hostfs_set_attr(const char *file, struct externfs_iattr *attrs, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ return(host_set_attr(path, attrs)); -+} -+ -+static int hostfs_make_symlink(const char *from, const char *to, int uid, -+ int gid, struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, from, NULL }; -+ -+ return(host_make_symlink(path, to)); -+} -+ -+static int hostfs_link_file(const char *to, const char *from, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *to_path[] = { jail_dir, mount, to, NULL }; -+ const char *from_path[] = { jail_dir, mount, from, NULL }; -+ -+ return(host_link_file(to_path, from_path)); -+} -+ -+static int hostfs_unlink_file(const char *file, struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ return(host_unlink_file(path)); -+} -+ -+static int hostfs_make_dir(const char *file, int mode, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ return(host_make_dir(path, mode)); -+} -+ -+static int hostfs_remove_dir(const char *file, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ return(host_remove_dir(path)); -+} -+ -+static int hostfs_read_link(char *file, int uid, int gid, char *buf, int size, -+ struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, file, NULL }; -+ -+ return(host_read_link(path, buf, size)); -+} -+ -+static int hostfs_rename_file(char *from, char *to, struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *to_path[] = { jail_dir, mount, to, NULL }; -+ const char *from_path[] = { jail_dir, mount, from, NULL }; -+ -+ return(host_rename_file(from_path, to_path)); -+} -+ -+static int hostfs_stat_fs(long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out, struct externfs_data *ed) -+{ -+ char *mount = container_of(ed, struct hostfs_data, ext)->mount; -+ const char *path[] = { jail_dir, mount, NULL }; -+ -+ return(host_stat_fs(path, bsize_out, blocks_out, bfree_out, bavail_out, -+ files_out, ffree_out, fsid_out, fsid_size, -+ namelen_out, spare_out)); -+} -+ -+static void hostfs_close_file(struct externfs_inode *ext, -+ unsigned long long size) -+{ -+ struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext); -+ -+ if(hf->fh.fd == -1) -+ return; -+ -+ truncate_file(&hf->fh, size); -+ close_file(&hf->fh); -+} -+ -+static int hostfs_truncate_file(struct externfs_inode *ext, __u64 size, -+ struct externfs_data *ed) -+{ -+ struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext); -+ -+ return(truncate_file(&hf->fh, size)); -+} -+ -+static struct externfs_file_ops hostfs_file_ops = { -+ .stat_file = hostfs_stat_file, -+ .file_type = hostfs_file_type, -+ .access_file = hostfs_access_file, -+ .open_file = hostfs_open_file, -+ .open_dir = hostfs_open_dir, -+ .read_dir = hostfs_read_dir, -+ .read_file = hostfs_read_file, -+ .write_file = hostfs_write_file, -+ .map_file_page = NULL, -+ .close_file = hostfs_close_file, -+ .close_dir = hostfs_close_dir, -+ .invisible = NULL, -+ .create_file = hostfs_create_file, -+ .set_attr = hostfs_set_attr, -+ .make_symlink = hostfs_make_symlink, -+ .unlink_file = hostfs_unlink_file, -+ .make_dir = hostfs_make_dir, -+ .remove_dir = hostfs_remove_dir, -+ .make_node = hostfs_make_node, -+ .link_file = hostfs_link_file, -+ .read_link = hostfs_read_link, -+ .rename_file = hostfs_rename_file, -+ .statfs = hostfs_stat_fs, -+ .truncate_file = hostfs_truncate_file -+}; -+ -+static struct externfs_data *mount_fs(char *mount_arg) -+{ -+ struct hostfs_data *hd; -+ int err = -ENOMEM; -+ -+ hd = kmalloc(sizeof(*hd), GFP_KERNEL); -+ if(hd == NULL) -+ goto out; -+ -+ hd->mount = host_root_filename(mount_arg); -+ if(hd->mount == NULL) -+ goto out_free; -+ -+ init_externfs(&hd->ext, &hostfs_file_ops); -+ -+ return(&hd->ext); -+ out_free: -+ kfree(hd); -+ out: -+ return(ERR_PTR(err)); -+} -+ -+static struct externfs_mount_ops hostfs_mount_ops = { -+ .init_file = hostfs_init_file, -+ .mount = mount_fs, -+}; -+ -+static int __init init_hostfs(void) -+{ -+ return(register_externfs("hostfs", &hostfs_mount_ops)); -+} -+ -+static void __exit exit_hostfs(void) -+{ -+ unregister_externfs("hostfs"); -+} -+ -+__initcall(init_hostfs); -+__exitcall(exit_hostfs); -+ -+#if 0 -+module_init(init_hostfs) -+module_exit(exit_hostfs) -+MODULE_LICENSE("GPL"); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hostfs/hostfs.h -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/hostfs.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/hostfs.h 2005-05-03 23:46:13.801043992 +0300 -@@ -0,0 +1,200 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_FS_HOSTFS -+#define __UM_FS_HOSTFS -+ -+#include "linux/fs.h" -+#include "linux/blkdev.h" -+#include "filehandle.h" -+#include "os.h" -+ -+/* These are exactly the same definitions as in fs.h, but the names are -+ * changed so that this file can be included in both kernel and user files. -+ */ -+ -+#define EXTERNFS_ATTR_MODE 1 -+#define EXTERNFS_ATTR_UID 2 -+#define EXTERNFS_ATTR_GID 4 -+#define EXTERNFS_ATTR_SIZE 8 -+#define EXTERNFS_ATTR_ATIME 16 -+#define EXTERNFS_ATTR_MTIME 32 -+#define EXTERNFS_ATTR_CTIME 64 -+#define EXTERNFS_ATTR_ATIME_SET 128 -+#define EXTERNFS_ATTR_MTIME_SET 256 -+#define EXTERNFS_ATTR_FORCE 512 /* Not a change, but a change it */ -+#define EXTERNFS_ATTR_ATTR_FLAG 1024 -+ -+/** -+ * container_of - cast a member of a structure out to the containing structure -+ * -+ * @ptr: the pointer to the member. -+ * @type: the type of the container struct this is embedded in. -+ * @member: the name of the member within the struct. -+ * -+ */ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct externfs_iattr { -+ unsigned int ia_valid; -+ mode_t ia_mode; -+ uid_t ia_uid; -+ gid_t ia_gid; -+ loff_t ia_size; -+ time_t ia_atime; -+ time_t ia_mtime; -+ time_t ia_ctime; -+ unsigned int ia_attr_flags; -+}; -+ -+struct externfs_data { -+ struct externfs_file_ops *file_ops; -+ struct externfs_mount_ops *mount_ops; -+}; -+ -+struct externfs_inode { -+ struct externfs_file_ops *ops; -+}; -+ -+struct externfs_mount_ops { -+ struct externfs_data *(*mount)(char *mount_arg); -+ struct externfs_inode *(*init_file)(struct externfs_data *ed); -+}; -+ -+struct externfs_file_ops { -+ int (*stat_file)(const char *path, struct externfs_data *ed, -+ dev_t *dev_out, unsigned long long *inode_out, -+ int *mode_out, int *nlink_out, int *uid_out, -+ int *gid_out, unsigned long long *size_out, -+ unsigned long *atime_out, unsigned long *mtime_out, -+ unsigned long *ctime_out, int *blksize_out, -+ unsigned long long *blocks_out); -+ int (*file_type)(const char *path, int *rdev, -+ struct externfs_data *ed); -+ int (*access_file)(char *path, int r, int w, int x, int uid, int gid, -+ struct externfs_data *ed); -+ int (*open_file)(struct externfs_inode *ext, char *file, -+ int uid, int gid, struct inode *inode, -+ struct externfs_data *ed); -+ void (*close_file)(struct externfs_inode *ext, -+ unsigned long long size); -+ void *(*open_dir)(char *path, int uid, int gid, -+ struct externfs_data *ed); -+ char *(*read_dir)(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out, -+ struct externfs_data *ed); -+ int (*read_file)(struct externfs_inode *ext, -+ unsigned long long offset, char *buf, int len, -+ int ignore_start, int ignore_end, -+ void (*completion)(char *, int, void *), void *arg, -+ struct externfs_data *ed); -+ int (*write_file)(struct externfs_inode *ext, -+ unsigned long long offset, const char *buf, -+ int start, int len, -+ void (*completion)(char *, int, void *), void *arg, -+ struct externfs_data *ed); -+ int (*map_file_page)(struct externfs_inode *ext, -+ unsigned long long offset, char *buf, int w, -+ struct externfs_data *ed); -+ void (*close_dir)(void *stream, struct externfs_data *ed); -+ void (*invisible)(struct externfs_inode *ext); -+ int (*create_file)(struct externfs_inode *ext, char *path, -+ int mode, int uid, int gid, struct inode *inode, -+ struct externfs_data *ed); -+ int (*set_attr)(const char *path, struct externfs_iattr *attrs, -+ struct externfs_data *ed); -+ int (*make_symlink)(const char *from, const char *to, int uid, int gid, -+ struct externfs_data *ed); -+ int (*unlink_file)(const char *path, struct externfs_data *ed); -+ int (*make_dir)(const char *path, int mode, int uid, int gid, -+ struct externfs_data *ed); -+ int (*remove_dir)(const char *path, int uid, int gid, -+ struct externfs_data *ed); -+ int (*make_node)(const char *path, int mode, int uid, int gid, -+ int type, int maj, int min, struct externfs_data *ed); -+ int (*link_file)(const char *to, const char *from, int uid, int gid, -+ struct externfs_data *ed); -+ int (*read_link)(char *path, int uid, int gid, char *buf, int size, -+ struct externfs_data *ed); -+ int (*rename_file)(char *from, char *to, struct externfs_data *ed); -+ int (*statfs)(long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out, struct externfs_data *ed); -+ int (*truncate_file)(struct externfs_inode *ext, __u64 size, -+ struct externfs_data *ed); -+}; -+ -+#define HOSTFS_BUFSIZE 64 -+ -+extern int register_externfs(char *name, struct externfs_mount_ops *mount_ops); -+extern void unregister_externfs(char *name); -+extern void init_externfs(struct externfs_data *ed, -+ struct externfs_file_ops *ops); -+struct externfs_data *inode_externfs_info(struct inode *inode); -+ -+extern char *generic_root_filename(char *mount_arg); -+extern void host_close_file(void *stream); -+extern int host_read_file(int fd, unsigned long long offset, char *buf, -+ int len); -+extern int host_open_file(const char *path[], int r, int w, -+ struct file_handle *fh); -+extern void *host_open_dir(const char *path[]); -+extern char *host_read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out); -+extern int host_file_type(const char *path[], int *rdev); -+extern char *host_root_filename(char *mount_arg); -+extern char *get_path(const char *path[], char *buf, int size); -+extern void free_path(const char *buf, char *tmp); -+extern int host_create_file(const char *path[], int mode, -+ struct file_handle *fh); -+extern int host_set_attr(const char *path[], struct externfs_iattr *attrs); -+extern int host_make_symlink(const char *from[], const char *to); -+extern int host_unlink_file(const char *path[]); -+extern int host_make_dir(const char *path[], int mode); -+extern int host_remove_dir(const char *path[]); -+extern int host_link_file(const char *to[], const char *from[]); -+extern int host_read_link(const char *path[], char *buf, int size); -+extern int host_rename_file(const char *from[], const char *to[]); -+extern int host_stat_fs(const char *path[], long *bsize_out, -+ long long *blocks_out, long long *bfree_out, -+ long long *bavail_out, long long *files_out, -+ long long *ffree_out, void *fsid_out, int fsid_size, -+ long *namelen_out, long *spare_out); -+extern int host_stat_file(const char *path[], dev_t *dev_out, -+ unsigned long long *inode_out, int *mode_out, -+ int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, -+ unsigned long *atime_out, unsigned long *mtime_out, -+ unsigned long *ctime_out, int *blksize_out, -+ unsigned long long *blocks_out); -+ -+extern char *generic_host_read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out, -+ void *mount); -+extern int generic_host_read_file(int fd, unsigned long long offset, char *buf, -+ int len, void *mount); -+extern void generic_host_close_file(void *stream, unsigned long long size, -+ void *mount); -+extern int generic_host_truncate_file(struct file_handle *fh, __u64 size, -+ void *m); -+ -+extern char *inode_name_prefix(struct inode *inode, char *prefix); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hostfs/humfs.c -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/humfs.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/humfs.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,1024 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/list.h> -+#include <linux/sched.h> -+#include <linux/slab.h> -+#include <linux/stat.h> -+#include <linux/tqueue.h> -+#include <linux/types.h> -+#include <linux/errno.h> -+#include <linux/string.h> -+#include <linux/kdev_t.h> -+#include <asm/irq.h> -+#include "hostfs.h" -+#include "mem.h" -+#include "os.h" -+#include "mode.h" -+#include "aio.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+#include "filehandle.h" -+#include "metadata.h" -+ -+#define HUMFS_VERSION 2 -+ -+static int humfs_stat_file(const char *path, struct externfs_data *ed, -+ dev_t *dev_out, unsigned long long *inode_out, -+ int *mode_out, int *nlink_out, int *uid_out, -+ int *gid_out, unsigned long long *size_out, -+ unsigned long *atime_out, unsigned long *mtime_out, -+ unsigned long *ctime_out, int *blksize_out, -+ unsigned long long *blocks_out) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int err, mode, perms, major, minor; -+ char type; -+ -+ err = host_stat_file(data_path, dev_out, inode_out, mode_out, -+ nlink_out, NULL, NULL, size_out, atime_out, -+ mtime_out, ctime_out, blksize_out, blocks_out); -+ if(err) -+ return(err); -+ -+ err = (*mount->meta->ownerships)(path, &perms, uid_out, gid_out, -+ &type, &major, &minor, mount); -+ if(err) -+ return(err); -+ -+ *mode_out = (*mode_out & ~S_IRWXUGO) | perms; -+ -+ mode = 0; -+ switch(type){ -+ case 'c': -+ mode = S_IFCHR; -+ break; -+ case 'b': -+ mode = S_IFBLK; -+ break; -+ case 's': -+ mode = S_IFSOCK; -+ break; -+ default: -+ break; -+ } -+ -+ if(mode != 0) -+ *mode_out = (*mode_out & ~S_IFMT) | mode; -+ -+ return(0); -+} -+ -+static int meta_type(const char *path, int *dev_out, void *m) -+{ -+ struct humfs *mount = m; -+ int err, type, maj, min; -+ char c; -+ -+ err = (*mount->meta->ownerships)(path, NULL, NULL, NULL, &c, &maj, -+ &min, mount); -+ if(err) -+ return(err); -+ -+ if(c == 0) -+ return(0); -+ -+ if(dev_out) -+ *dev_out = MKDEV(maj, min); -+ -+ switch(c){ -+ case 'c': -+ type = OS_TYPE_CHARDEV; -+ break; -+ case 'b': -+ type = OS_TYPE_BLOCKDEV; -+ break; -+ case 'p': -+ type = OS_TYPE_FIFO; -+ break; -+ case 's': -+ type = OS_TYPE_SOCK; -+ break; -+ default: -+ type = -EINVAL; -+ break; -+ } -+ -+ return(type); -+} -+ -+static int humfs_file_type(const char *path, int *dev_out, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int type; -+ -+ type = meta_type(path, dev_out, mount); -+ if(type != 0) -+ return(type); -+ -+ return(host_file_type(data_path, dev_out)); -+} -+ -+static char *humfs_data_name(struct inode *inode) -+{ -+ struct externfs_data *ed = inode_externfs_info(inode); -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ -+ return(inode_name_prefix(inode, mount->data)); -+} -+ -+static struct externfs_inode *humfs_init_file(struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ struct humfs_file *hf; -+ -+ hf = (*mount->meta->init_file)(); -+ if(IS_ERR(hf)) -+ return((struct externfs_inode *) hf); -+ -+ hf->data.fd = -1; -+ return(&hf->ext); -+} -+ -+static int humfs_open_file(struct externfs_inode *ext, char *path, int uid, -+ int gid, struct inode *inode, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ struct openflags flags; -+ char tmp[HOSTFS_BUFSIZE], *file; -+ int err = -ENOMEM; -+ -+ file = get_path(data_path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ flags = of_rdwr(OPENFLAGS()); -+ if(mount->direct) -+ flags = of_direct(flags); -+ -+ if(path == NULL) -+ path = ""; -+ err = (*mount->meta->open_file)(hf, path, inode, mount); -+ if(err) -+ goto out_free; -+ -+ err = open_filehandle(file, flags, 0, &hf->data); -+ if(err == -EISDIR) -+ goto out; -+ else if(err == -EPERM){ -+ flags = of_set_rw(flags, 1, 0); -+ err = open_filehandle(file, flags, 0, &hf->data); -+ } -+ -+ if(err) -+ goto out_close; -+ -+ hf->mount = mount; -+ is_reclaimable(&hf->data, humfs_data_name, inode); -+ -+ out_free: -+ free_path(file, tmp); -+ out: -+ return(err); -+ -+ out_close: -+ (*mount->meta->close_file)(hf); -+ goto out_free; -+} -+ -+static void *humfs_open_dir(char *path, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ -+ return(host_open_dir(data_path)); -+} -+ -+static void humfs_close_dir(void *stream, struct externfs_data *ed) -+{ -+ os_close_dir(stream); -+} -+ -+static char *humfs_read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ -+ return(generic_host_read_dir(stream, pos, ino_out, len_out, mount)); -+} -+ -+LIST_HEAD(humfs_replies); -+ -+struct humfs_aio { -+ struct aio_context aio; -+ struct list_head list; -+ void (*completion)(char *, int, void *); -+ char *buf; -+ int real_len; -+ int err; -+ void *data; -+}; -+ -+static int humfs_reply_fd = -1; -+ -+struct humfs_aio last_task_aio, last_intr_aio; -+struct humfs_aio *last_task_aio_ptr, *last_intr_aio_ptr; -+ -+void humfs_task_proc(void *unused) -+{ -+ struct humfs_aio *aio; -+ unsigned long flags; -+ -+ while(!list_empty(&humfs_replies)){ -+ local_irq_save(flags); -+ aio = list_entry(humfs_replies.next, struct humfs_aio, list); -+ -+ last_task_aio = *aio; -+ last_task_aio_ptr = aio; -+ -+ list_del(&aio->list); -+ local_irq_restore(flags); -+ -+ if(aio->err >= 0) -+ aio->err = aio->real_len; -+ (*aio->completion)(aio->buf, aio->err, aio->data); -+ kfree(aio); -+ } -+} -+ -+struct tq_struct humfs_task = { -+ .routine = humfs_task_proc, -+ .data = NULL -+}; -+ -+static void humfs_interrupt(int irq, void *dev_id, struct pt_regs *unused) -+{ -+ struct aio_thread_reply reply; -+ struct humfs_aio *aio; -+ int err, fd = (int) dev_id; -+ -+ while(1){ -+ err = os_read_file(fd, &reply, sizeof(reply)); -+ if(err < 0){ -+ if(err == -EAGAIN) -+ break; -+ printk("humfs_interrupt - read returned err %d\n", -+ -err); -+ return; -+ } -+ aio = reply.data; -+ aio->err = reply.err; -+ list_add(&aio->list, &humfs_replies); -+ last_intr_aio = *aio; -+ last_intr_aio_ptr = aio; -+ } -+ -+ if(!list_empty(&humfs_replies)) -+ schedule_task(&humfs_task); -+ reactivate_fd(fd, HUMFS_IRQ); -+} -+ -+static int init_humfs_aio(void) -+{ -+ int fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err){ -+ printk("init_humfs_aio - pipe failed, err = %d\n", -err); -+ goto out; -+ } -+ -+ err = um_request_irq(HUMFS_IRQ, fds[0], IRQ_READ, humfs_interrupt, -+ SA_INTERRUPT | SA_SAMPLE_RANDOM, "humfs", -+ (void *) fds[0]); -+ if(err){ -+ printk("init_humfs_aio - : um_request_irq failed, err = %d\n", -+ err); -+ goto out_close; -+ } -+ -+ humfs_reply_fd = fds[1]; -+ goto out; -+ -+ out_close: -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ out: -+ return(0); -+} -+ -+__initcall(init_humfs_aio); -+ -+static int humfs_aio(enum aio_type type, int fd, unsigned long long offset, -+ char *buf, int len, int real_len, -+ void (*completion)(char *, int, void *), void *arg) -+{ -+ struct humfs_aio *aio; -+ int err = -ENOMEM; -+ -+ aio = kmalloc(sizeof(*aio), GFP_KERNEL); -+ if(aio == NULL) -+ goto out; -+ *aio = ((struct humfs_aio) { .aio = INIT_AIO_CONTEXT, -+ .list = LIST_HEAD_INIT(aio->list), -+ .completion= completion, -+ .buf = buf, -+ .err = 0, -+ .real_len = real_len, -+ .data = arg }); -+ -+ err = submit_aio(type, fd, buf, len, offset, humfs_reply_fd, aio); -+ if(err) -+ (*completion)(buf, err, arg); -+ -+ out: -+ return(err); -+} -+ -+static int humfs_read_file(struct externfs_inode *ext, -+ unsigned long long offset, char *buf, int len, -+ int ignore_start, int ignore_end, -+ void (*completion)(char *, int, void *), void *arg, -+ struct externfs_data *ed) -+{ -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ int fd = filehandle_fd(&hf->data); -+ -+ if(fd < 0){ -+ (*completion)(buf, fd, arg); -+ return(fd); -+ } -+ -+ return(humfs_aio(AIO_READ, fd, offset, buf, len, len, completion, -+ arg)); -+} -+ -+static int humfs_write_file(struct externfs_inode *ext, -+ unsigned long long offset, const char *buf, -+ int start, int len, -+ void (*completion)(char *, int, void *), void *arg, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ int err, orig_len = len, fd = filehandle_fd(&hf->data); -+ -+ if(fd < 0){ -+ (*completion)((char *) buf, fd, arg); -+ return(fd); -+ } -+ -+ if(mount->direct) -+ len = PAGE_SIZE; -+ else { -+ offset += start; -+ buf += start; -+ } -+ -+ err = humfs_aio(AIO_WRITE, fd, offset, (char *) buf, len, orig_len, -+ completion, arg); -+ -+ if(err < 0) -+ return(err); -+ -+ if(mount->direct) -+ err = orig_len; -+ -+ return(err); -+} -+ -+static int humfs_map_file_page(struct externfs_inode *ext, -+ unsigned long long offset, char *buf, int w, -+ struct externfs_data *ed) -+{ -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ unsigned long long size, need; -+ int err, fd = filehandle_fd(&hf->data); -+ -+ if(fd < 0) -+ return(fd); -+ -+ err = os_fd_size(fd, &size); -+ if(err) -+ return(err); -+ -+ need = offset + PAGE_SIZE; -+ if(size < need){ -+ err = os_truncate_fd(fd, need); -+ if(err) -+ return(err); -+ } -+ -+ return(physmem_subst_mapping(buf, fd, offset, w)); -+} -+ -+static void humfs_close_file(struct externfs_inode *ext, -+ unsigned long long size) -+{ -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ int fd; -+ -+ if(hf->data.fd == -1) -+ return; -+ -+ fd = filehandle_fd(&hf->data); -+ physmem_forget_descriptor(fd); -+ truncate_file(&hf->data, size); -+ close_file(&hf->data); -+ -+ (*hf->mount->meta->close_file)(hf); -+} -+ -+/* XXX Assumes that you can't make a normal file */ -+ -+static int humfs_make_node(const char *path, int mode, int uid, int gid, -+ int type, int major, int minor, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ struct file_handle fh; -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int err; -+ char t; -+ -+ err = host_create_file(data_path, S_IRWXUGO, &fh); -+ if(err) -+ goto out; -+ -+ close_file(&fh); -+ -+ switch(type){ -+ case S_IFCHR: -+ t = 'c'; -+ break; -+ case S_IFBLK: -+ t = 'b'; -+ break; -+ case S_IFIFO: -+ t = 'p'; -+ break; -+ case S_IFSOCK: -+ t = 's'; -+ break; -+ default: -+ err = -EINVAL; -+ printk("humfs_make_node - bad node type : %d\n", type); -+ goto out_rm; -+ } -+ -+ err = (*mount->meta->make_node)(path, mode, uid, gid, t, major, minor, -+ mount); -+ if(err) -+ goto out_rm; -+ -+ out: -+ return(err); -+ -+ out_rm: -+ host_unlink_file(data_path); -+ goto out; -+} -+ -+static int humfs_create_file(struct externfs_inode *ext, char *path, int mode, -+ int uid, int gid, struct inode *inode, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int err; -+ -+ err = (*mount->meta->create_file)(hf, path, mode, uid, gid, inode, -+ mount); -+ if(err) -+ goto out; -+ -+ err = host_create_file(data_path, S_IRWXUGO, &hf->data); -+ if(err) -+ goto out_rm; -+ -+ -+ is_reclaimable(&hf->data, humfs_data_name, inode); -+ -+ return(0); -+ -+ out_rm: -+ (*mount->meta->remove_file)(path, mount); -+ (*mount->meta->close_file)(hf); -+ out: -+ return(err); -+} -+ -+static int humfs_set_attr(const char *path, struct externfs_iattr *attrs, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int (*chown)(const char *, int, int, int, struct humfs *); -+ int err; -+ -+ chown = mount->meta->change_ownerships; -+ if(attrs->ia_valid & EXTERNFS_ATTR_MODE){ -+ err = (*chown)(path, attrs->ia_mode, -1, -1, mount); -+ if(err) -+ return(err); -+ } -+ if(attrs->ia_valid & EXTERNFS_ATTR_UID){ -+ err = (*chown)(path, -1, attrs->ia_uid, -1, mount); -+ if(err) -+ return(err); -+ } -+ if(attrs->ia_valid & EXTERNFS_ATTR_GID){ -+ err = (*chown)(path, -1, -1, attrs->ia_gid, mount); -+ if(err) -+ return(err); -+ } -+ -+ attrs->ia_valid &= ~(EXTERNFS_ATTR_MODE | EXTERNFS_ATTR_UID | -+ EXTERNFS_ATTR_GID); -+ -+ return(host_set_attr(data_path, attrs)); -+} -+ -+static int humfs_make_symlink(const char *from, const char *to, int uid, -+ int gid, struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ struct humfs_file *hf; -+ const char *data_path[3] = { mount->data, from, NULL }; -+ int err = -ENOMEM; -+ -+ hf = (*mount->meta->init_file)(); -+ if(hf == NULL) -+ goto out; -+ -+ err = (*mount->meta->create_file)(hf, from, S_IRWXUGO, uid, gid, NULL, -+ mount); -+ if(err) -+ goto out_close; -+ -+ err = host_make_symlink(data_path, to); -+ if(err) -+ (*mount->meta->remove_file)(from, mount); -+ -+ out_close: -+ (*mount->meta->close_file)(hf); -+ out: -+ return(err); -+} -+ -+static int humfs_link_file(const char *to, const char *from, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path_from[3] = { mount->data, from, NULL }; -+ const char *data_path_to[3] = { mount->data, to, NULL }; -+ int err; -+ -+ err = (*mount->meta->create_link)(to, from, mount); -+ if(err) -+ return(err); -+ -+ err = host_link_file(data_path_to, data_path_from); -+ if(err) -+ (*mount->meta->remove_file)(from, mount); -+ -+ return(err); -+} -+ -+static int humfs_unlink_file(const char *path, struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int err; -+ -+ err = (*mount->meta->remove_file)(path, mount); -+ if (err) -+ return err; -+ -+ (*mount->meta->remove_file)(path, mount); -+ return(host_unlink_file(data_path)); -+} -+ -+static void humfs_invisible(struct externfs_inode *ext) -+{ -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ struct humfs *mount = hf->mount; -+ -+ (*mount->meta->invisible)(hf); -+ not_reclaimable(&hf->data); -+} -+ -+static int humfs_make_dir(const char *path, int mode, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int err; -+ -+ err = (*mount->meta->create_dir)(path, mode, uid, gid, mount); -+ if(err) -+ return(err); -+ -+ err = host_make_dir(data_path, S_IRWXUGO); -+ if(err) -+ (*mount->meta->remove_dir)(path, mount); -+ -+ return(err); -+} -+ -+static int humfs_remove_dir(const char *path, int uid, int gid, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, path, NULL }; -+ int err; -+ -+ err = host_remove_dir(data_path); -+ if (err) -+ return err; -+ -+ (*mount->meta->remove_dir)(path, mount); -+ -+ return(err); -+} -+ -+static int humfs_read_link(char *file, int uid, int gid, char *buf, int size, -+ struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, file, NULL }; -+ -+ return(host_read_link(data_path, buf, size)); -+} -+ -+struct humfs *inode_humfs_info(struct inode *inode) -+{ -+ return(container_of(inode_externfs_info(inode), struct humfs, ext)); -+} -+ -+static int humfs_rename_file(char *from, char *to, struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path_from[3] = { mount->data, from, NULL }; -+ const char *data_path_to[3] = { mount->data, to, NULL }; -+ int err; -+ -+ err = (*mount->meta->rename_file)(from, to, mount); -+ if(err) -+ return(err); -+ -+ err = host_rename_file(data_path_from, data_path_to); -+ if(err) -+ (*mount->meta->rename_file)(to, from, mount); -+ -+ return(err); -+} -+ -+static int humfs_stat_fs(long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out, struct externfs_data *ed) -+{ -+ struct humfs *mount = container_of(ed, struct humfs, ext); -+ const char *data_path[3] = { mount->data, NULL }; -+ int err; -+ -+ /* XXX Needs to maintain this info as metadata */ -+ err = host_stat_fs(data_path, bsize_out, blocks_out, bfree_out, -+ bavail_out, files_out, ffree_out, fsid_out, -+ fsid_size, namelen_out, spare_out); -+ if(err) -+ return(err); -+ -+ *blocks_out = mount->total / *bsize_out; -+ *bfree_out = (mount->total - mount->used) / *bsize_out; -+ *bavail_out = (mount->total - mount->used) / *bsize_out; -+ return(0); -+} -+ -+int humfs_truncate_file(struct externfs_inode *ext, __u64 size, -+ struct externfs_data *ed) -+{ -+ struct humfs_file *hf = container_of(ext, struct humfs_file, ext); -+ -+ return(truncate_file(&hf->data, size)); -+} -+ -+char *humfs_path(char *dir, char *file) -+{ -+ int need_slash, len = strlen(dir) + strlen(file); -+ char *new; -+ -+ need_slash = (dir[strlen(dir) - 1] != '/'); -+ if(need_slash) -+ len++; -+ -+ new = kmalloc(len + 1, GFP_KERNEL); -+ if(new == NULL) -+ return(NULL); -+ -+ strcpy(new, dir); -+ if(need_slash) -+ strcat(new, "/"); -+ strcat(new, file); -+ -+ return(new); -+} -+ -+DECLARE_MUTEX(meta_sem); -+struct list_head metas = LIST_HEAD_INIT(metas); -+ -+static struct humfs_meta_ops *find_meta(const char *name) -+{ -+ struct list_head *ele; -+ struct humfs_meta_ops *m; -+ -+ down(&meta_sem); -+ list_for_each(ele, &metas){ -+ m = list_entry(ele, struct humfs_meta_ops, list); -+ if(!strcmp(m->name, name)) -+ goto out; -+ } -+ m = NULL; -+ out: -+ up(&meta_sem); -+ return(m); -+} -+ -+void register_meta(struct humfs_meta_ops *ops) -+{ -+ down(&meta_sem); -+ list_add(&ops->list, &metas); -+ up(&meta_sem); -+} -+ -+void unregister_meta(struct humfs_meta_ops *ops) -+{ -+ down(&meta_sem); -+ list_del(&ops->list); -+ up(&meta_sem); -+} -+ -+static struct humfs *read_superblock(char *root) -+{ -+ struct humfs *mount; -+ struct humfs_meta_ops *meta = NULL; -+ struct file_handle *fh; -+ const char *path[] = { root, "superblock", NULL }; -+ u64 used, total; -+ char meta_buf[33], line[HOSTFS_BUFSIZE], *newline; -+ unsigned long long pos; -+ int version, i, n, err; -+ -+ fh = kmalloc(sizeof(*fh), GFP_KERNEL); -+ if(fh == NULL) -+ return(ERR_PTR(-ENOMEM)); -+ -+ err = host_open_file(path, 1, 0, fh); -+ if(err){ -+ printk("Failed to open %s/%s, errno = %d\n", path[0], -+ path[1], err); -+ return(ERR_PTR(err)); -+ } -+ -+ used = 0; -+ total = 0; -+ pos = 0; -+ i = 0; -+ while(1){ -+ n = read_file(fh, pos, &line[i], sizeof(line) - i - 1); -+ if((n == 0) && (i == 0)) -+ break; -+ if(n < 0) -+ return(ERR_PTR(n)); -+ -+ pos += n; -+ if(n > 0) -+ line[n + i] = '\0'; -+ -+ newline = strchr(line, '\n'); -+ if(newline == NULL){ -+ printk("read_superblock - line too long : '%s'\n", -+ line); -+ return(ERR_PTR(-EINVAL)); -+ } -+ newline++; -+ -+ if(sscanf(line, "version %d\n", &version) == 1){ -+ if(version != HUMFS_VERSION){ -+ printk("humfs version mismatch - want version " -+ "%d, got version %d.\n", HUMFS_VERSION, -+ version); -+ return(ERR_PTR(-EINVAL)); -+ } -+ } -+ else if(sscanf(line, "used %Lu\n", &used) == 1) ; -+ else if(sscanf(line, "total %Lu\n", &total) == 1) ; -+ else if(sscanf(line, "metadata %32s\n", meta_buf) == 1){ -+ meta = find_meta(meta_buf); -+ if(meta == NULL){ -+ printk("read_superblock - meta api \"%s\" not " -+ "registered\n", meta_buf); -+ return(ERR_PTR(-EINVAL)); -+ } -+ } -+ -+ else { -+ printk("read_superblock - bogus line : '%s'\n", line); -+ return(ERR_PTR(-EINVAL)); -+ } -+ -+ i = newline - line; -+ memmove(line, newline, sizeof(line) - i); -+ i = strlen(line); -+ } -+ -+ if(used == 0){ -+ printk("read_superblock - used not specified or set to " -+ "zero\n"); -+ return(ERR_PTR(-EINVAL)); -+ } -+ if(total == 0){ -+ printk("read_superblock - total not specified or set to " -+ "zero\n"); -+ return(ERR_PTR(-EINVAL)); -+ } -+ if(used > total){ -+ printk("read_superblock - used is greater than total\n"); -+ return(ERR_PTR(-EINVAL)); -+ } -+ -+ if(meta == NULL){ -+ meta = find_meta("shadow_fs"); -+ } -+ -+ if(meta == NULL){ -+ printk("read_superblock - valid meta api was not specified\n"); -+ return(ERR_PTR(-EINVAL)); -+ } -+ -+ mount = (*meta->init_mount)(root); -+ if(IS_ERR(mount)) -+ return(mount); -+ -+ *mount = ((struct humfs) { .total = total, -+ .used = used, -+ .meta = meta }); -+ return(mount); -+} -+ -+struct externfs_file_ops humfs_no_mmap_file_ops = { -+ .stat_file = humfs_stat_file, -+ .file_type = humfs_file_type, -+ .access_file = NULL, -+ .open_file = humfs_open_file, -+ .open_dir = humfs_open_dir, -+ .read_dir = humfs_read_dir, -+ .read_file = humfs_read_file, -+ .write_file = humfs_write_file, -+ .map_file_page = NULL, -+ .close_file = humfs_close_file, -+ .close_dir = humfs_close_dir, -+ .invisible = humfs_invisible, -+ .create_file = humfs_create_file, -+ .set_attr = humfs_set_attr, -+ .make_symlink = humfs_make_symlink, -+ .unlink_file = humfs_unlink_file, -+ .make_dir = humfs_make_dir, -+ .remove_dir = humfs_remove_dir, -+ .make_node = humfs_make_node, -+ .link_file = humfs_link_file, -+ .read_link = humfs_read_link, -+ .rename_file = humfs_rename_file, -+ .statfs = humfs_stat_fs, -+ .truncate_file = humfs_truncate_file -+}; -+ -+struct externfs_file_ops humfs_mmap_file_ops = { -+ .stat_file = humfs_stat_file, -+ .file_type = humfs_file_type, -+ .access_file = NULL, -+ .open_file = humfs_open_file, -+ .open_dir = humfs_open_dir, -+ .read_dir = humfs_read_dir, -+ .read_file = humfs_read_file, -+ .write_file = humfs_write_file, -+ .map_file_page = humfs_map_file_page, -+ .close_file = humfs_close_file, -+ .close_dir = humfs_close_dir, -+ .invisible = humfs_invisible, -+ .create_file = humfs_create_file, -+ .set_attr = humfs_set_attr, -+ .make_symlink = humfs_make_symlink, -+ .unlink_file = humfs_unlink_file, -+ .make_dir = humfs_make_dir, -+ .remove_dir = humfs_remove_dir, -+ .make_node = humfs_make_node, -+ .link_file = humfs_link_file, -+ .read_link = humfs_read_link, -+ .rename_file = humfs_rename_file, -+ .statfs = humfs_stat_fs, -+ .truncate_file = humfs_truncate_file -+}; -+ -+static struct externfs_data *mount_fs(char *mount_arg) -+{ -+ char *root, *data, *flags; -+ struct humfs *mount; -+ struct externfs_file_ops *file_ops; -+ int err, do_mmap = 0; -+ -+ if(mount_arg == NULL){ -+ printk("humfs - no host directory specified\n"); -+ return(NULL); -+ } -+ -+ flags = strchr((char *) mount_arg, ','); -+ if(flags != NULL){ -+ do { -+ *flags++ = '\0'; -+ -+ if(!strcmp(flags, "mmap")) -+ do_mmap = 1; -+ -+ flags = strchr(flags, ','); -+ } while(flags != NULL); -+ } -+ -+ err = -ENOMEM; -+ root = host_root_filename(mount_arg); -+ if(root == NULL) -+ goto err; -+ -+ mount = read_superblock(root); -+ if(IS_ERR(mount)){ -+ err = PTR_ERR(mount); -+ goto err_free_root; -+ } -+ -+ data = humfs_path(root, "data/"); -+ if(data == NULL) -+ goto err_free_mount; -+ -+ if(CHOOSE_MODE(do_mmap, 0)){ -+ printk("humfs doesn't support mmap in tt mode\n"); -+ do_mmap = 0; -+ } -+ -+ mount->data = data; -+ mount->mmap = do_mmap; -+ -+ file_ops = do_mmap ? &humfs_mmap_file_ops : &humfs_no_mmap_file_ops; -+ init_externfs(&mount->ext, file_ops); -+ -+ return(&mount->ext); -+ -+ err_free_mount: -+ kfree(mount); -+ err_free_root: -+ kfree(root); -+ err: -+ return(NULL); -+} -+ -+struct externfs_mount_ops humfs_mount_ops = { -+ .init_file = humfs_init_file, -+ .mount = mount_fs, -+}; -+ -+static int __init init_humfs(void) -+{ -+ return(register_externfs("humfs", &humfs_mount_ops)); -+} -+ -+static void __exit exit_humfs(void) -+{ -+ unregister_externfs("humfs"); -+} -+ -+__initcall(init_humfs); -+__exitcall(exit_humfs); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hostfs/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/Makefile 2005-05-03 22:28:14.284438648 +0300 -@@ -0,0 +1,14 @@ -+# -+# Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := hostfs.o -+ -+obj-$(CONFIG_EXTERNFS) += externfs.o -+obj-$(CONFIG_HOSTFS) += host_fs.o host_file.o -+obj-$(CONFIG_HUMFS) += humfs.o host_file.o meta_fs.o -+ -+obj-m = $(O_TARGET) -+ -+include $(TOPDIR)/Rules.make -Index: linux-2.4.29/arch/um/fs/hostfs/metadata.h -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/metadata.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/metadata.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,84 @@ -+/* -+ * Copyright (C) 2004 Piotr Neuman (sikkh@wp.pl) and -+ * Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_FS_METADATA -+#define __UM_FS_METADATA -+ -+#include "linux/fs.h" -+#include "linux/list.h" -+#include "os.h" -+#include "hostfs.h" -+#include "filehandle.h" -+ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct humfs { -+ struct externfs_data ext; -+ __u64 used; -+ __u64 total; -+ char *data; -+ int mmap; -+ int direct; -+ struct humfs_meta_ops *meta; -+}; -+ -+struct humfs_file { -+ struct humfs *mount; -+ struct file_handle data; -+ struct externfs_inode ext; -+}; -+ -+struct humfs_meta_ops { -+ struct list_head list; -+ char *name; -+ struct humfs_file *(*init_file)(void); -+ int (*open_file)(struct humfs_file *hf, const char *path, -+ struct inode *inode, struct humfs *humfs); -+ int (*create_file)(struct humfs_file *hf, const char *path, int mode, -+ int uid, int gid, struct inode *inode, -+ struct humfs *humfs); -+ void (*close_file)(struct humfs_file *humfs); -+ int (*ownerships)(const char *path, int *mode_out, int *uid_out, -+ int *gid_out, char *type_out, int *maj_out, -+ int *min_out, struct humfs *humfs); -+ int (*make_node)(const char *path, int mode, int uid, int gid, -+ int type, int major, int minor, struct humfs *humfs); -+ int (*create_link)(const char *to, const char *from, -+ struct humfs *humfs); -+ int (*remove_file)(const char *path, struct humfs *humfs); -+ int (*create_dir)(const char *path, int mode, int uid, int gid, -+ struct humfs *humfs); -+ int (*remove_dir)(const char *path, struct humfs *humfs); -+ int (*change_ownerships)(const char *path, int mode, int uid, int gid, -+ struct humfs *humfs); -+ int (*rename_file)(const char *from, const char *to, -+ struct humfs *humfs); -+ void (*invisible)(struct humfs_file *hf); -+ struct humfs *(*init_mount)(char *root); -+ void (*free_mount)(struct humfs *humfs); -+}; -+ -+void register_meta(struct humfs_meta_ops *ops); -+void unregister_meta(struct humfs_meta_ops *ops); -+ -+char *humfs_path(char *dir, char *file); -+char *humfs_name(struct inode *inode, char *prefix); -+extern struct humfs *inode_humfs_info(struct inode *inode); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hostfs/meta_fs.c -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hostfs/meta_fs.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hostfs/meta_fs.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,519 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/slab.h> -+#include "hostfs.h" -+#include "metadata.h" -+#include "kern_util.h" -+ -+#define METADATA_FILE_PATH(meta) (meta)->root, "file_metadata" -+#define METADATA_DIR_PATH(meta) (meta)->root, "dir_metadata" -+ -+struct meta_fs { -+ struct humfs humfs; -+ char *root; -+}; -+ -+struct meta_file { -+ struct humfs_file humfs; -+ struct file_handle fh; -+}; -+ -+static int meta_file_path(const char *path, struct meta_fs *meta, -+ const char *path_out[]) -+{ -+ const char *data_path[] = { meta->root, "data", path, NULL }; -+ char data_tmp[HOSTFS_BUFSIZE]; -+ char *data_file = get_path(data_path, data_tmp, sizeof(data_tmp)); -+ -+ if(data_file == NULL) -+ return(-ENOMEM); -+ -+ path_out[0] = meta->root; -+ path_out[2] = path; -+ if(os_file_type(data_file) == OS_TYPE_DIR){ -+ path_out[1] = "dir_metadata"; -+ path_out[3] = "metadata"; -+ path_out[4] = NULL; -+ } -+ else { -+ path_out[1] = "file_metadata"; -+ path_out[3] = NULL; -+ } -+ -+ return(0); -+} -+ -+static int open_meta_file(const char *path, struct humfs *humfs, -+ struct file_handle *fh) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ const char *meta_path[5]; -+ char meta_tmp[HOSTFS_BUFSIZE]; -+ char *meta_file; -+ int err; -+ -+ err = meta_file_path(path, meta, meta_path); -+ if(err) -+ goto out; -+ -+ meta_file = get_path(meta_path, meta_tmp, sizeof(meta_tmp)); -+ if(meta_file == NULL) -+ goto out; -+ -+ err = open_filehandle(meta_file, of_rdwr(OPENFLAGS()), 0, fh); -+ -+ out: -+ return(err); -+} -+ -+static char *meta_fs_name(struct inode *inode) -+{ -+ struct humfs *mount = inode->i_sb->u.generic_sbp; -+ struct meta_fs *meta = container_of(mount, struct meta_fs, humfs); -+ const char *metadata_path[5]; -+ char tmp[HOSTFS_BUFSIZE], *name, *file; -+ -+ if(meta_file_path("", meta, metadata_path)) -+ return(NULL); -+ -+ file = get_path(metadata_path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ return(NULL); -+ -+ name = inode_name_prefix(inode, file); -+ -+ free_path(file, tmp); -+ return(name); -+} -+ -+static void metafs_invisible(struct humfs_file *hf) -+{ -+ struct meta_file *mf = container_of(hf, struct meta_file, humfs); -+ -+ not_reclaimable(&mf->fh); -+} -+ -+static struct humfs_file *metafs_init_file(void) -+{ -+ struct meta_file *mf; -+ int err = -ENOMEM; -+ -+ mf = kmalloc(sizeof(*mf), GFP_KERNEL); -+ if(mf == NULL) -+ return(ERR_PTR(err)); -+ -+ return(&mf->humfs); -+} -+ -+static int metafs_open_file(struct humfs_file *hf, const char *path, -+ struct inode *inode, struct humfs *humfs) -+{ -+ struct meta_file *mf = container_of(hf, struct meta_file, humfs); -+ int err; -+ -+ err = open_meta_file(path, humfs, &mf->fh); -+ if(err) -+ return(err); -+ -+ is_reclaimable(&mf->fh, meta_fs_name, inode); -+ -+ return(0); -+} -+ -+static void metafs_close_file(struct humfs_file *hf) -+{ -+ struct meta_file *meta = container_of(hf, struct meta_file, humfs); -+ -+ close_file(&meta->fh); -+ kfree(meta); -+} -+ -+static int metafs_create_file(struct humfs_file *hf, const char *path, -+ int mode, int uid, int gid, struct inode *inode, -+ struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ struct meta_file *mf = container_of(hf, struct meta_file, humfs); -+ char tmp[HOSTFS_BUFSIZE]; -+ const char *metadata_path[] = { METADATA_FILE_PATH(meta), path, NULL }; -+ char *file = get_path(metadata_path, tmp, sizeof(tmp)); -+ char buf[sizeof("mmmm uuuuuuuuuu gggggggggg")]; -+ int err = -ENOMEM; -+ -+ if(file == NULL) -+ goto out; -+ -+ err = open_filehandle(file, of_write(of_create(OPENFLAGS())), 0644, -+ &mf->fh); -+ if(err) -+ goto out_free_path; -+ -+ if(inode != NULL) -+ is_reclaimable(&mf->fh, meta_fs_name, inode); -+ -+ sprintf(buf, "%d %d %d\n", mode & S_IRWXUGO, uid, gid); -+ err = write_file(&mf->fh, 0, buf, strlen(buf)); -+ if(err < 0) -+ goto out_rm; -+ -+ free_path(file, tmp); -+ return(0); -+ -+ out_rm: -+ close_file(&mf->fh); -+ os_remove_file(file); -+ out_free_path: -+ free_path(file, tmp); -+ out: -+ return(err); -+} -+ -+static int metafs_create_link(const char *to, const char *from, -+ struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ const char *path_to[] = { METADATA_FILE_PATH(meta), to, NULL }; -+ const char *path_from[] = { METADATA_FILE_PATH(meta), from, NULL }; -+ -+ return(host_link_file(path_to, path_from)); -+} -+ -+static int metafs_remove_file(const char *path, struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ char tmp[HOSTFS_BUFSIZE]; -+ const char *metadata_path[] = { METADATA_FILE_PATH(meta), path, NULL }; -+ char *file = get_path(metadata_path, tmp, sizeof(tmp)); -+ int err = -ENOMEM; -+ -+ if(file == NULL) -+ goto out; -+ -+ err = os_remove_file(file); -+ -+ out: -+ free_path(file, tmp); -+ return(err); -+} -+ -+static int metafs_create_directory(const char *path, int mode, int uid, -+ int gid, struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ char tmp[HOSTFS_BUFSIZE]; -+ const char *dir_path[] = { METADATA_DIR_PATH(meta), path, NULL, NULL }; -+ const char *file_path[] = { METADATA_FILE_PATH(meta), path, NULL, -+ NULL }; -+ char *file, dir_meta[sizeof("mmmm uuuuuuuuuu gggggggggg\n")]; -+ int err, fd; -+ -+ err = host_make_dir(dir_path, 0755); -+ if(err) -+ goto out; -+ -+ err = host_make_dir(file_path, 0755); -+ if(err) -+ goto out_rm; -+ -+ /* This to make the index independent of the number of elements in -+ * METADATA_DIR_PATH(). -+ */ -+ dir_path[sizeof(dir_path) / sizeof(dir_path[0]) - 2] = "metadata"; -+ -+ err = -ENOMEM; -+ file = get_path(dir_path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ fd = os_open_file(file, of_create(of_rdwr(OPENFLAGS())), 0644); -+ if(fd < 0){ -+ err = fd; -+ goto out_free; -+ } -+ -+ sprintf(dir_meta, "%d %d %d\n", mode & S_IRWXUGO, uid, gid); -+ err = os_write_file(fd, dir_meta, strlen(dir_meta)); -+ if(err > 0) -+ err = 0; -+ -+ os_close_file(fd); -+ -+ out_free: -+ free_path(file, tmp); -+ out_rm: -+ host_remove_dir(dir_path); -+ out: -+ return(err); -+} -+ -+static int metafs_remove_directory(const char *path, struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ char tmp[HOSTFS_BUFSIZE], *file; -+ const char *dir_path[] = { METADATA_DIR_PATH(meta), path, "metadata", -+ NULL }; -+ const char *file_path[] = { METADATA_FILE_PATH(meta), path, NULL }; -+ char *slash; -+ int err; -+ -+ err = -ENOMEM; -+ file = get_path(dir_path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_remove_file(file); -+ if(err) -+ goto out_free; -+ -+ slash = strrchr(file, '/'); -+ if(slash == NULL){ -+ printk("remove_shadow_directory failed to find last slash\n"); -+ goto out_free; -+ } -+ *slash = '\0'; -+ err = os_remove_dir(file); -+ free_path(file, tmp); -+ -+ file = get_path(file_path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = os_remove_dir(file); -+ if(err) -+ goto out_free; -+ -+ out: -+ return(err); -+ out_free: -+ free_path(file, tmp); -+ goto out; -+} -+ -+static int metafs_make_node(const char *path, int mode, int uid, int gid, -+ int type, int maj, int min, struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ struct file_handle fh; -+ char tmp[HOSTFS_BUFSIZE]; -+ const char *metadata_path[] = { METADATA_FILE_PATH(meta), path, NULL }; -+ int err; -+ char buf[sizeof("mmmm uuuuuuuuuu gggggggggg x nnn mmm\n")], *file; -+ -+ sprintf(buf, "%d %d %d %c %d %d\n", mode & S_IRWXUGO, uid, gid, type, -+ maj, min); -+ -+ err = -ENOMEM; -+ file = get_path(metadata_path, tmp, sizeof(tmp)); -+ if(file == NULL) -+ goto out; -+ -+ err = open_filehandle(file, -+ of_create(of_rdwr(OPENFLAGS())), 0644, &fh); -+ if(err) -+ goto out_free; -+ -+ err = write_file(&fh, 0, buf, strlen(buf)); -+ if(err > 0) -+ err = 0; -+ -+ close_file(&fh); -+ -+ out_free: -+ free_path(file, tmp); -+ out: -+ return(err); -+} -+ -+static int metafs_ownerships(const char *path, int *mode_out, int *uid_out, -+ int *gid_out, char *type_out, int *maj_out, -+ int *min_out, struct humfs *humfs) -+{ -+ struct file_handle fh; -+ char buf[sizeof("mmmm uuuuuuuuuu gggggggggg x nnn mmm\n")]; -+ int err, n, mode, uid, gid, maj, min; -+ char type; -+ -+ err = open_meta_file(path, humfs, &fh); -+ if(err) -+ goto out; -+ -+ err = os_read_file(fh.fd, buf, sizeof(buf) - 1); -+ if(err < 0) -+ goto out_close; -+ -+ buf[err] = '\0'; -+ err = 0; -+ -+ n = sscanf(buf, "%d %d %d %c %d %d", &mode, &uid, &gid, &type, &maj, -+ &min); -+ if(n == 3){ -+ maj = -1; -+ min = -1; -+ type = 0; -+ err = 0; -+ } -+ else if(n != 6) -+ err = -EINVAL; -+ -+ if(mode_out != NULL) -+ *mode_out = mode; -+ if(uid_out != NULL) -+ *uid_out = uid; -+ if(gid_out != NULL) -+ *gid_out = uid; -+ if(type_out != NULL) -+ *type_out = type; -+ if(maj_out != NULL) -+ *maj_out = maj; -+ if(min_out != NULL) -+ *min_out = min; -+ -+ out_close: -+ close_file(&fh); -+ out: -+ return(err); -+} -+ -+static int metafs_change_ownerships(const char *path, int mode, int uid, -+ int gid, struct humfs *humfs) -+{ -+ struct file_handle fh; -+ char type; -+ char buf[sizeof("mmmm uuuuuuuuuu gggggggggg x nnn mmm\n")]; -+ int err = -ENOMEM, old_mode, old_uid, old_gid, n, maj, min; -+ -+ err = open_meta_file(path, humfs, &fh); -+ if(err) -+ goto out; -+ -+ err = read_file(&fh, 0, buf, sizeof(buf) - 1); -+ if(err < 0) -+ goto out_close; -+ -+ buf[err] = '\0'; -+ -+ n = sscanf(buf, "%d %d %d %c %d %d\n", &old_mode, &old_uid, &old_gid, -+ &type, &maj, &min); -+ if((n != 3) && (n != 6)){ -+ err = -EINVAL; -+ goto out_close; -+ } -+ -+ if(mode == -1) -+ mode = old_mode; -+ if(uid == -1) -+ uid = old_uid; -+ if(gid == -1) -+ gid = old_gid; -+ -+ if(n == 3) -+ sprintf(buf, "%d %d %d\n", mode & S_IRWXUGO, uid, gid); -+ else -+ sprintf(buf, "%d %d %d %c %d %d\n", mode & S_IRWXUGO, uid, gid, -+ type, maj, min); -+ -+ err = write_file(&fh, 0, buf, strlen(buf)); -+ if(err > 0) -+ err = 0; -+ -+ err = truncate_file(&fh, strlen(buf)); -+ -+ out_close: -+ close_file(&fh); -+ out: -+ return(err); -+} -+ -+static int metafs_rename_file(const char *from, const char *to, -+ struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ const char *metadata_path_from[5], *metadata_path_to[5]; -+ int err; -+ -+ err = meta_file_path(from, meta, metadata_path_from); -+ if(err) -+ return(err); -+ -+ err = meta_file_path(to, meta, metadata_path_to); -+ if(err) -+ return(err); -+ -+ return(host_rename_file(metadata_path_from, metadata_path_to)); -+} -+ -+static struct humfs *metafs_init_mount(char *root) -+{ -+ struct meta_fs *meta; -+ int err = -ENOMEM; -+ -+ meta = kmalloc(sizeof(*meta), GFP_KERNEL); -+ if(meta == NULL) -+ goto out; -+ -+ meta->root = uml_strdup(root); -+ if(meta->root == NULL) -+ goto out_free_meta; -+ -+ return(&meta->humfs); -+ -+ out_free_meta: -+ kfree(meta); -+ out: -+ return(ERR_PTR(err)); -+} -+ -+static void metafs_free_mount(struct humfs *humfs) -+{ -+ struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs); -+ -+ kfree(meta); -+} -+ -+struct humfs_meta_ops hum_fs_meta_fs_ops = { -+ .list = LIST_HEAD_INIT(hum_fs_meta_fs_ops.list), -+ .name = "shadow_fs", -+ .init_file = metafs_init_file, -+ .open_file = metafs_open_file, -+ .close_file = metafs_close_file, -+ .ownerships = metafs_ownerships, -+ .make_node = metafs_make_node, -+ .create_file = metafs_create_file, -+ .create_link = metafs_create_link, -+ .remove_file = metafs_remove_file, -+ .create_dir = metafs_create_directory, -+ .remove_dir = metafs_remove_directory, -+ .change_ownerships = metafs_change_ownerships, -+ .rename_file = metafs_rename_file, -+ .invisible = metafs_invisible, -+ .init_mount = metafs_init_mount, -+ .free_mount = metafs_free_mount, -+}; -+ -+static int __init init_meta_fs(void) -+{ -+ register_meta(&hum_fs_meta_fs_ops); -+ return(0); -+} -+ -+static void __exit exit_meta_fs(void) -+{ -+ unregister_meta(&hum_fs_meta_fs_ops); -+} -+ -+__initcall(init_meta_fs); -+__exitcall(exit_meta_fs); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hppfs/hppfs_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hppfs/hppfs_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hppfs/hppfs_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,737 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/fs.h> -+#include <linux/module.h> -+#include <linux/slab.h> -+#include <linux/list.h> -+#include <linux/kernel.h> -+#include <linux/ctype.h> -+#include <asm/uaccess.h> -+#include "os.h" -+ -+struct hppfs_data { -+ struct list_head list; -+ char contents[PAGE_SIZE - sizeof(struct list_head)]; -+}; -+ -+struct hppfs_private { -+ struct file proc_file; -+ int host_fd; -+ loff_t len; -+ struct hppfs_data *contents; -+}; -+ -+#define HPPFS_SUPER_MAGIC 0xb00000ee -+ -+static struct super_operations hppfs_sbops; -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error); -+ -+static int is_pid(struct dentry *dentry) -+{ -+ struct super_block *sb; -+ int i; -+ -+ sb = dentry->d_sb; -+ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) -+ return(0); -+ -+ for(i = 0; i < dentry->d_name.len; i++){ -+ if(!isdigit(dentry->d_name.name[i])) -+ return(0); -+ } -+ return(1); -+} -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *root, *name; -+ const char *seg_name; -+ int len, seg_len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)) -+ len += strlen("pid") + 1; -+ else len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ root = "proc"; -+ len += strlen(root); -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)){ -+ seg_name = "pid"; -+ seg_len = strlen("pid"); -+ } -+ else { -+ seg_name = parent->d_name.name; -+ seg_len = parent->d_name.len; -+ } -+ -+ len -= seg_len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], seg_name, seg_len); -+ parent = parent->d_parent; -+ } -+ strncpy(name, root, strlen(root)); -+ return(name); -+} -+ -+struct dentry_operations hppfs_dentry_ops = { -+}; -+ -+static int file_removed(struct dentry *dentry, const char *file) -+{ -+ char *host_file; -+ int extra, fd; -+ -+ extra = 0; -+ if(file != NULL) extra += strlen(file) + 1; -+ -+ host_file = dentry_name(dentry, extra + strlen("/remove")); -+ if(host_file == NULL){ -+ printk("file_removed : allocation failed\n"); -+ return(-ENOMEM); -+ } -+ -+ if(file != NULL){ -+ strcat(host_file, "/"); -+ strcat(host_file, file); -+ } -+ strcat(host_file, "/remove"); -+ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ kfree(host_file); -+ if(fd >= 0){ -+ os_close_file(fd); -+ return(1); -+ } -+ return(0); -+} -+ -+static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry) -+{ -+ struct dentry *proc_dentry; -+ struct inode *inode; -+ int err, deleted; -+ -+ deleted = file_removed(dentry, NULL); -+ if(deleted < 0) -+ return(ERR_PTR(deleted)); -+ else if(deleted) -+ return(ERR_PTR(-ENOENT)); -+ -+ proc_dentry = lookup_hash(&dentry->d_name, ino->u.hppfs_i.proc_dentry); -+ if(IS_ERR(proc_dentry)) -+ return(proc_dentry); -+ -+ inode = get_inode(ino->i_sb, proc_dentry, &err); -+ if(err != 0) -+ return(ERR_PTR(err)); -+ -+ d_add(dentry, inode); -+ dentry->d_op = &hppfs_dentry_ops; -+ return(NULL); -+} -+ -+static struct inode_operations hppfs_file_iops = { -+}; -+ -+static struct inode_operations hppfs_dir_iops = { -+ .lookup = hppfs_lookup, -+}; -+ -+static ssize_t read_proc(struct file *file, char *buf, ssize_t count, -+ loff_t *ppos, int is_user) -+{ -+ ssize_t (*read)(struct file *, char *, size_t, loff_t *); -+ ssize_t n; -+ -+ read = file->f_dentry->d_inode->i_fop->read; -+ if(read == NULL) -+ return(-EOPNOTSUPP); -+ -+ if(!is_user) -+ set_fs(KERNEL_DS); -+ -+ n = (*read)(file, buf, count, &file->f_pos); -+ -+ if(!is_user) -+ set_fs(USER_DS); -+ -+ if(ppos) *ppos = file->f_pos; -+ return(n); -+} -+ -+static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) -+{ -+ ssize_t n; -+ int cur, err; -+ char *new_buf; -+ -+ n = -ENOMEM; -+ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if(new_buf == NULL){ -+ printk("hppfs_read_file : kmalloc failed\n"); -+ goto out; -+ } -+ n = 0; -+ while(count > 0){ -+ cur = min_t(ssize_t, count, PAGE_SIZE); -+ err = os_read_file(fd, new_buf, cur); -+ if(err < 0){ -+ printk("hppfs_read : read failed, err = %d\n", -err); -+ n = err; -+ goto out_free; -+ } -+ else if(err == 0) -+ break; -+ -+ if(copy_to_user(buf, new_buf, err)){ -+ n = -EFAULT; -+ goto out_free; -+ } -+ n += err; -+ count -= err; -+ } -+ out_free: -+ kfree(new_buf); -+ out: -+ return(n); -+} -+ -+static ssize_t hppfs_read(struct file *file, char *buf, size_t count, -+ loff_t *ppos) -+{ -+ struct hppfs_private *hppfs = file->private_data; -+ struct hppfs_data *data; -+ loff_t off; -+ int err; -+ -+ if(hppfs->contents != NULL){ -+ if(*ppos >= hppfs->len) return(0); -+ -+ data = hppfs->contents; -+ off = *ppos; -+ while(off >= sizeof(data->contents)){ -+ data = list_entry(data->list.next, struct hppfs_data, -+ list); -+ off -= sizeof(data->contents); -+ } -+ -+ if(off + count > hppfs->len) -+ count = hppfs->len - off; -+ copy_to_user(buf, &data->contents[off], count); -+ *ppos += count; -+ } -+ else if(hppfs->host_fd != -1){ -+ err = os_seek_file(hppfs->host_fd, *ppos); -+ if(err < 0){ -+ printk("hppfs_read : seek failed, err = %d\n", -err); -+ return(err); -+ } -+ count = hppfs_read_file(hppfs->host_fd, buf, count); -+ if(count > 0) -+ *ppos += count; -+ } -+ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1); -+ -+ return(count); -+} -+ -+static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, -+ loff_t *ppos) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ ssize_t (*write)(struct file *, const char *, size_t, loff_t *); -+ int err; -+ -+ write = proc_file->f_dentry->d_inode->i_fop->write; -+ if(write == NULL) -+ return(-EOPNOTSUPP); -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*write)(proc_file, buf, len, &proc_file->f_pos); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int open_host_sock(char *host_file, int *filter_out) -+{ -+ char *end; -+ int fd; -+ -+ end = &host_file[strlen(host_file)]; -+ strcpy(end, "/rw"); -+ *filter_out = 1; -+ fd = os_connect_socket(host_file); -+ if(fd >= 0) -+ return(fd); -+ -+ strcpy(end, "/r"); -+ *filter_out = 0; -+ fd = os_connect_socket(host_file); -+ return(fd); -+} -+ -+static void free_contents(struct hppfs_data *head) -+{ -+ struct hppfs_data *data; -+ struct list_head *ele, *next; -+ -+ if(head == NULL) return; -+ -+ list_for_each_safe(ele, next, &head->list){ -+ data = list_entry(ele, struct hppfs_data, list); -+ kfree(data); -+ } -+ kfree(head); -+} -+ -+static struct hppfs_data *hppfs_get_data(int fd, int filter, -+ struct file *proc_file, -+ struct file *hppfs_file, -+ loff_t *size_out) -+{ -+ struct hppfs_data *data, *new, *head; -+ int n, err; -+ -+ err = -ENOMEM; -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL){ -+ printk("hppfs_get_data : head allocation failed\n"); -+ goto failed; -+ } -+ -+ INIT_LIST_HEAD(&data->list); -+ -+ head = data; -+ *size_out = 0; -+ -+ if(filter){ -+ while((n = read_proc(proc_file, data->contents, -+ sizeof(data->contents), NULL, 0)) > 0) { -+ err = os_write_file(fd, data->contents, n); -+ if(err != n) -+ printk("hppfs_get_data : failed to write out " -+ "%d bytes, err = %d\n", n, -err); -+ } -+ err = os_shutdown_socket(fd, 0, 1); -+ if(err < 0){ -+ printk("hppfs_get_data : failed to shut down " -+ "socket\n"); -+ goto failed_free; -+ } -+ } -+ while(1){ -+ n = os_read_file(fd, data->contents, sizeof(data->contents)); -+ if(n < 0){ -+ err = n; -+ printk("hppfs_get_data : read failed, err = %d\n", -n); -+ goto failed_free; -+ } -+ else if(n == 0) -+ break; -+ -+ *size_out += n; -+ -+ if(n < sizeof(data->contents)) -+ break; -+ -+ new = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(new == 0){ -+ printk("hppfs_get_data : data allocation failed\n"); -+ err = -ENOMEM; -+ goto failed_free; -+ } -+ -+ INIT_LIST_HEAD(&new->list); -+ list_add(&new->list, &data->list); -+ data = new; -+ } -+ return(head); -+ -+ failed_free: -+ free_contents(head); -+ failed: -+ return(ERR_PTR(err)); -+} -+ -+static struct hppfs_private *hppfs_data(void) -+{ -+ struct hppfs_private *data; -+ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL) -+ return(data); -+ -+ *data = ((struct hppfs_private ) { .host_fd = -1, -+ .len = -1, -+ .contents = NULL } ); -+ return(data); -+} -+ -+static int hppfs_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ char *host_file; -+ int err, fd, type, filter; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ host_file = dentry_name(file->f_dentry, strlen("/rw")); -+ if(host_file == NULL) -+ goto out_free2; -+ -+ proc_dentry = inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&data->proc_file, proc_dentry, file->f_mode); -+ if(err) -+ goto out_free1; -+ -+ type = os_file_type(host_file); -+ if(type == OS_TYPE_FILE){ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ if(fd >= 0) -+ data->host_fd = fd; -+ else printk("hppfs_open : failed to open '%s', err = %d\n", -+ host_file, -fd); -+ -+ data->contents = NULL; -+ } -+ else if(type == OS_TYPE_DIR){ -+ fd = open_host_sock(host_file, &filter); -+ if(fd >= 0){ -+ data->contents = hppfs_get_data(fd, filter, -+ &data->proc_file, -+ file, &data->len); -+ if(!IS_ERR(data->contents)) -+ data->host_fd = fd; -+ } -+ else printk("hppfs_open : failed to open a socket in " -+ "'%s', err = %d\n", host_file, -fd); -+ } -+ kfree(host_file); -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free1: -+ kfree(host_file); -+ out_free2: -+ free_contents(data->contents); -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static int hppfs_dir_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ int err; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ proc_dentry = inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&data->proc_file, proc_dentry, file->f_mode); -+ if(err) -+ goto out_free; -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free: -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static loff_t hppfs_llseek(struct file *file, loff_t off, int where) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ loff_t (*llseek)(struct file *, loff_t, int); -+ loff_t ret; -+ -+ llseek = proc_file->f_dentry->d_inode->i_fop->llseek; -+ if(llseek != NULL){ -+ ret = (*llseek)(proc_file, off, where); -+ if(ret < 0) -+ return(ret); -+ } -+ -+ return(default_llseek(file, off, where)); -+} -+ -+struct hppfs_dirent { -+ void *vfs_dirent; -+ filldir_t filldir; -+ struct dentry *dentry; -+}; -+ -+static int hppfs_filldir(void *d, const char *name, int size, -+ loff_t offset, ino_t inode, unsigned int type) -+{ -+ struct hppfs_dirent *dirent = d; -+ -+ if(file_removed(dirent->dentry, name)) -+ return(0); -+ -+ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, -+ inode, type)); -+} -+ -+static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ int (*readdir)(struct file *, void *, filldir_t); -+ struct hppfs_dirent dirent = ((struct hppfs_dirent) -+ { .vfs_dirent = ent, -+ .filldir = filldir, -+ .dentry = file->f_dentry } ); -+ int err; -+ -+ readdir = proc_file->f_dentry->d_inode->i_fop->readdir; -+ if(readdir == NULL) -+ return(-EOPNOTSUPP); -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*readdir)(proc_file, &dirent, hppfs_filldir); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ return(0); -+} -+ -+static struct file_operations hppfs_file_fops = { -+ .owner = NULL, -+ .llseek = hppfs_llseek, -+ .read = hppfs_read, -+ .write = hppfs_write, -+ .open = hppfs_open, -+}; -+ -+static struct file_operations hppfs_dir_fops = { -+ .owner = NULL, -+ .readdir = hppfs_readdir, -+ .open = hppfs_dir_open, -+ .fsync = hppfs_fsync, -+}; -+ -+static int hppfs_statfs(struct super_block *sb, struct statfs *sf) -+{ -+ sf->f_blocks = 0; -+ sf->f_bfree = 0; -+ sf->f_bavail = 0; -+ sf->f_files = 0; -+ sf->f_ffree = 0; -+ sf->f_type = HPPFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct super_operations hppfs_sbops = { -+ .put_inode = force_delete, -+ .delete_inode = NULL, -+ .statfs = hppfs_statfs, -+}; -+ -+static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*readlink)(struct dentry *, char *, int); -+ int err, n; -+ -+ proc_dentry = dentry->d_inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&proc_file, proc_dentry, FMODE_READ); -+ if(err) -+ return(err); -+ -+ readlink = proc_dentry->d_inode->i_op->readlink; -+ if(readlink == NULL) -+ return(-EOPNOTSUPP); -+ n = (*readlink)(proc_dentry, buffer, buflen); -+ -+ if(proc_file.f_op->release) -+ (*proc_file.f_op->release)(proc_dentry->d_inode, &proc_file); -+ -+ return(n); -+} -+ -+static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*follow_link)(struct dentry *, struct nameidata *); -+ int err, n; -+ -+ proc_dentry = dentry->d_inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&proc_file, proc_dentry, FMODE_READ); -+ if(err) -+ return(err); -+ -+ follow_link = proc_dentry->d_inode->i_op->follow_link; -+ if(follow_link == NULL) -+ return(-EOPNOTSUPP); -+ n = (*follow_link)(proc_dentry, nd); -+ -+ if(proc_file.f_op->release) -+ (*proc_file.f_op->release)(proc_dentry->d_inode, &proc_file); -+ -+ return(n); -+} -+ -+static struct inode_operations hppfs_link_iops = { -+ .readlink = hppfs_readlink, -+ .follow_link = hppfs_follow_link, -+}; -+ -+static void read_inode(struct inode *ino) -+{ -+ struct inode *proc_ino; -+ -+ proc_ino = ino->u.hppfs_i.proc_dentry->d_inode; -+ ino->i_uid = proc_ino->i_uid; -+ ino->i_gid = proc_ino->i_gid; -+ ino->i_atime = proc_ino->i_atime; -+ ino->i_mtime = proc_ino->i_mtime; -+ ino->i_ctime = proc_ino->i_ctime; -+ ino->i_ino = proc_ino->i_ino; -+ ino->i_dev = proc_ino->i_dev; -+ ino->i_mode = proc_ino->i_mode; -+ ino->i_nlink = proc_ino->i_nlink; -+ ino->i_size = proc_ino->i_size; -+ ino->i_blksize = proc_ino->i_blksize; -+ ino->i_blocks = proc_ino->i_blocks; -+} -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error) -+{ -+ struct inode *inode; -+ int err = -ENOMEM; -+ -+ inode = new_inode(sb); -+ if(inode == NULL) -+ goto out; -+ -+ insert_inode_hash(inode); -+ if(S_ISDIR(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_dir_iops; -+ inode->i_fop = &hppfs_dir_fops; -+ } -+ else if(S_ISLNK(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_link_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ else { -+ inode->i_op = &hppfs_file_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ -+ inode->i_sb = sb; -+ inode->u.hppfs_i.proc_dentry = dentry; -+ -+ read_inode(inode); -+ err = 0; -+ -+ if(error) *error = err; -+ return(inode); -+ out: -+ if(error) *error = err; -+ return(NULL); -+} -+ -+static struct super_block *hppfs_read_super(struct super_block *sb, void *d, -+ int silent) -+{ -+ struct inode *root_inode; -+ struct file_system_type *procfs; -+ struct super_block *proc_sb; -+ -+ procfs = get_fs_type("proc"); -+ if(procfs == NULL) -+ goto out; -+ -+ if(list_empty(&procfs->fs_supers)) -+ goto out; -+ -+ proc_sb = list_entry(procfs->fs_supers.next, struct super_block, -+ s_instances); -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = HPPFS_SUPER_MAGIC; -+ sb->s_op = &hppfs_sbops; -+ -+ dget(proc_sb->s_root); -+ root_inode = get_inode(sb, proc_sb->s_root, NULL); -+ if(root_inode == NULL) -+ goto out_dput; -+ -+ sb->s_root = d_alloc_root(root_inode); -+ if(sb->s_root == NULL) -+ goto out_put; -+ -+ return(sb); -+ -+ out_put: -+ iput(root_inode); -+ out_dput: -+ dput(proc_sb->s_root); -+ out: -+ return(NULL); -+} -+ -+DECLARE_FSTYPE(hppfs_type, "hppfs", hppfs_read_super, 0); -+ -+static int __init init_hppfs(void) -+{ -+ return(register_filesystem(&hppfs_type)); -+} -+ -+static void __exit exit_hppfs(void) -+{ -+ unregister_filesystem(&hppfs_type); -+} -+ -+module_init(init_hppfs) -+module_exit(exit_hppfs) -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/fs/hppfs/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/hppfs/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/hppfs/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,10 @@ -+O_TARGET := hppfs.o -+obj-y = hppfs_kern.o #hppfs_user.o -+obj-m = $(O_TARGET) -+ -+CFLAGS_hppfs_kern.o := $(CFLAGS) -+#CFLAGS_hppfs_user.o := $(USER_CFLAGS) -+ -+override CFLAGS = -+ -+include $(TOPDIR)/Rules.make -Index: linux-2.4.29/arch/um/fs/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/fs/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/fs/Makefile 2005-05-03 22:28:14.291437584 +0300 -@@ -0,0 +1,23 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := built-in.o -+ -+subdir-y = -+subdir-m = -+ -+subdir-$(CONFIG_HOSTFS) += hostfs -+subdir-$(CONFIG_HPPFS) += hppfs -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+obj-m += $(join $(subdir-m),$(subdir-m:%=/%.o)) -+ -+include $(TOPDIR)/Rules.make -+ -+dep: -+ -+clean: -+ -+archmrproper: -Index: linux-2.4.29/arch/um/include/2_5compat.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/2_5compat.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/2_5compat.h 2005-05-03 22:28:14.292437432 +0300 -@@ -0,0 +1,33 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __2_5_COMPAT_H__ -+#define __2_5_COMPAT_H__ -+ -+#include "linux/version.h" -+ -+#define INIT_ELV(queue, elv) elevator_init(elv, ELV_NOOP) -+ -+#define ELV_NOOP ELEVATOR_NOOP -+ -+#define INIT_HARDSECT(arr, maj, sizes) arr[maj] = sizes -+ -+#define IS_WRITE(req) ((req)->cmd == WRITE) -+ -+#define SET_PRI(task) \ -+ do { (task)->nice = 20; (task)->counter = -100; } while(0); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/aio.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/aio.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/aio.h 2005-05-03 22:28:14.293437280 +0300 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef AIO_H__ -+#define AIO_H__ -+ -+enum aio_type { AIO_READ, AIO_WRITE, AIO_MMAP }; -+ -+struct aio_thread_reply { -+ void *data; -+ int err; -+}; -+ -+struct aio_context { -+ int reply_fd; -+}; -+ -+#define INIT_AIO_CONTEXT { .reply_fd = -1 } -+ -+extern int submit_aio(enum aio_type type, int fd, char *buf, int len, -+ unsigned long long offset, int reply_fd, void *data); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/chan_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/chan_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/chan_kern.h 2005-05-03 22:28:14.294437128 +0300 -@@ -0,0 +1,56 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHAN_KERN_H__ -+#define __CHAN_KERN_H__ -+ -+#include "linux/tty.h" -+#include "linux/list.h" -+#include "chan_user.h" -+ -+struct chan { -+ struct list_head list; -+ char *dev; -+ unsigned int primary:1; -+ unsigned int input:1; -+ unsigned int output:1; -+ unsigned int opened:1; -+ int fd; -+ enum chan_init_pri pri; -+ struct chan_ops *ops; -+ void *data; -+}; -+ -+extern void chan_interrupt(struct list_head *chans, struct tq_struct *task, -+ struct tty_struct *tty, int irq, void *dev); -+extern int parse_chan_pair(char *str, struct list_head *chans, int pri, -+ int device, struct chan_opts *opts); -+extern int open_chan(struct list_head *chans); -+extern int write_chan(struct list_head *chans, const char *buf, int len, -+ int write_irq); -+extern int console_write_chan(struct list_head *chans, const char *buf, -+ int len); -+extern void close_chan(struct list_head *chans); -+extern void chan_enable_winch(struct list_head *chans, void *line); -+extern void enable_chan(struct list_head *chans, void *data); -+extern int chan_window_size(struct list_head *chans, -+ unsigned short *rows_out, -+ unsigned short *cols_out); -+extern int chan_out_fd(struct list_head *chans); -+extern int chan_config_string(struct list_head *chans, char *str, int size, -+ char **error_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/chan_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/chan_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/chan_user.h 2005-05-03 22:28:14.295436976 +0300 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHAN_USER_H__ -+#define __CHAN_USER_H__ -+ -+#include "init.h" -+ -+struct chan_opts { -+ void (*announce)(char *dev_name, int dev); -+ char *xterm_title; -+ int raw; -+ unsigned long tramp_stack; -+ int in_kernel; -+}; -+ -+enum chan_init_pri { INIT_STATIC, INIT_ALL, INIT_ONE }; -+ -+struct chan_ops { -+ char *type; -+ void *(*init)(char *, int, struct chan_opts *); -+ int (*open)(int, int, int, void *, char **); -+ void (*close)(int, void *); -+ int (*read)(int, char *, void *); -+ int (*write)(int, const char *, int, void *); -+ int (*console_write)(int, const char *, int, void *); -+ int (*window_size)(int, void *, unsigned short *, unsigned short *); -+ void (*free)(void *); -+ int winch; -+}; -+ -+extern struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops, tty_ops, -+ xterm_ops; -+ -+extern void generic_close(int fd, void *unused); -+extern int generic_read(int fd, char *c_out, void *unused); -+extern int generic_write(int fd, const char *buf, int n, void *unused); -+extern int generic_console_write(int fd, const char *buf, int n, void *state); -+extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, -+ unsigned short *cols_out); -+extern void generic_free(void *data); -+ -+extern void register_winch(int fd, void *device_data); -+extern void register_winch_irq(int fd, int tty_fd, int pid, void *line); -+ -+#define __channel_help(fn, prefix) \ -+__uml_help(fn, prefix "[0-9]*=<channel description>\n" \ -+" Attach a console or serial line to a host channel. See\n" \ -+" http://user-mode-linux.sourceforge.net/input.html for a complete\n" \ -+" description of this switch.\n\n" \ -+); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/choose-mode.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/choose-mode.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/choose-mode.h 2005-05-03 22:28:14.295436976 +0300 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHOOSE_MODE_H__ -+#define __CHOOSE_MODE_H__ -+ -+#include "uml-config.h" -+ -+#if defined(UML_CONFIG_MODE_TT) && defined(UML_CONFIG_MODE_SKAS) -+#define CHOOSE_MODE(tt, skas) (mode_tt ? (tt) : (skas)) -+ -+#elif defined(UML_CONFIG_MODE_SKAS) -+#define CHOOSE_MODE(tt, skas) (skas) -+ -+#elif defined(UML_CONFIG_MODE_TT) -+#define CHOOSE_MODE(tt, skas) (tt) -+#endif -+ -+#define CHOOSE_MODE_PROC(tt, skas, args...) \ -+ CHOOSE_MODE(tt(args), skas(args)) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/filehandle.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/filehandle.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/filehandle.h 2005-05-03 22:28:14.296436824 +0300 -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FILEHANDLE_H__ -+#define __FILEHANDLE_H__ -+ -+#include "linux/list.h" -+#include "linux/fs.h" -+#include "os.h" -+ -+struct file_handle { -+ struct list_head list; -+ int fd; -+ char *(*get_name)(struct inode *); -+ struct inode *inode; -+ struct openflags flags; -+}; -+ -+extern struct file_handle bad_filehandle; -+ -+extern int open_file(char *name, struct openflags flags, int mode); -+extern void *open_dir(char *file); -+extern int open_filehandle(char *name, struct openflags flags, int mode, -+ struct file_handle *fh); -+extern int read_file(struct file_handle *fh, unsigned long long offset, -+ char *buf, int len); -+extern int write_file(struct file_handle *fh, unsigned long long offset, -+ const char *buf, int len); -+extern int truncate_file(struct file_handle *fh, unsigned long long size); -+extern int close_file(struct file_handle *fh); -+extern void not_reclaimable(struct file_handle *fh); -+extern void is_reclaimable(struct file_handle *fh, -+ char *(name_proc)(struct inode *), -+ struct inode *inode); -+extern int filehandle_fd(struct file_handle *fh); -+extern int make_pipe(struct file_handle *fhs); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/frame.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/frame.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/frame.h 2005-05-03 22:28:14.297436672 +0300 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_H_ -+#define __FRAME_H_ -+ -+#include "sysdep/frame.h" -+ -+struct frame_common { -+ void *data; -+ int len; -+ int sig_index; -+ int sr_index; -+ int sr_relative; -+ int sp_index; -+ struct arch_frame_data arch; -+}; -+ -+struct sc_frame { -+ struct frame_common common; -+ int sc_index; -+}; -+ -+extern struct sc_frame signal_frame_sc; -+ -+extern struct sc_frame signal_frame_sc_sr; -+ -+struct si_frame { -+ struct frame_common common; -+ int sip_index; -+ int si_index; -+ int ucp_index; -+ int uc_index; -+}; -+ -+extern struct si_frame signal_frame_si; -+ -+extern void capture_signal_stack(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/frame_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/frame_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/frame_kern.h 2005-05-03 22:28:14.298436520 +0300 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_KERN_H_ -+#define __FRAME_KERN_H_ -+ -+#include "frame.h" -+#include "sysdep/frame_kern.h" -+ -+extern int setup_signal_stack_sc(unsigned long stack_top, int sig, -+ unsigned long handler, -+ void (*restorer)(void), -+ struct pt_regs *regs, -+ sigset_t *mask); -+extern int setup_signal_stack_si(unsigned long stack_top, int sig, -+ unsigned long handler, -+ void (*restorer)(void), -+ struct pt_regs *regs, siginfo_t *info, -+ sigset_t *mask); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/frame_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/frame_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/frame_user.h 2005-05-03 22:28:14.299436368 +0300 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_USER_H_ -+#define __FRAME_USER_H_ -+ -+#include "sysdep/frame_user.h" -+#include "frame.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/helper.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/helper.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/helper.h 2005-05-03 22:28:14.300436216 +0300 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __HELPER_H__ -+#define __HELPER_H__ -+ -+extern int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, -+ unsigned long *stack_out); -+extern int run_helper_thread(int (*proc)(void *), void *arg, -+ unsigned int flags, unsigned long *stack_out, -+ int stack_order); -+extern int helper_wait(int pid); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/init.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/init.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/init.h 2005-05-03 22:28:14.301436064 +0300 -@@ -0,0 +1,124 @@ -+#ifndef _LINUX_UML_INIT_H -+#define _LINUX_UML_INIT_H -+ -+/* These macros are used to mark some functions or -+ * initialized data (doesn't apply to uninitialized data) -+ * as `initialization' functions. The kernel can take this -+ * as hint that the function is used only during the initialization -+ * phase and free up used memory resources after -+ * -+ * Usage: -+ * For functions: -+ * -+ * You should add __init immediately before the function name, like: -+ * -+ * static void __init initme(int x, int y) -+ * { -+ * extern int z; z = x * y; -+ * } -+ * -+ * If the function has a prototype somewhere, you can also add -+ * __init between closing brace of the prototype and semicolon: -+ * -+ * extern int initialize_foobar_device(int, int, int) __init; -+ * -+ * For initialized data: -+ * You should insert __initdata between the variable name and equal -+ * sign followed by value, e.g.: -+ * -+ * static int init_variable __initdata = 0; -+ * static char linux_logo[] __initdata = { 0x32, 0x36, ... }; -+ * -+ * Don't forget to initialize data not at file scope, i.e. within a function, -+ * as gcc otherwise puts the data into the bss section and not into the init -+ * section. -+ * -+ * Also note, that this data cannot be "const". -+ */ -+ -+#ifndef _LINUX_INIT_H -+typedef int (*initcall_t)(void); -+typedef void (*exitcall_t)(void); -+ -+#define __init __attribute__ ((__section__ (".text.init"))) -+#define __exit __attribute__ ((unused, __section__(".text.exit"))) -+#define __initdata __attribute__ ((__section__ (".data.init"))) -+ -+#endif -+ -+#ifndef MODULE -+struct uml_param { -+ const char *str; -+ int (*setup_func)(char *, int *); -+}; -+ -+extern initcall_t __uml_initcall_start, __uml_initcall_end; -+extern initcall_t __uml_postsetup_start, __uml_postsetup_end; -+extern const char *__uml_help_start, *__uml_help_end; -+#endif -+ -+#define __uml_initcall(fn) \ -+ static initcall_t __uml_initcall_##fn __uml_init_call = fn -+ -+#define __uml_exitcall(fn) \ -+ static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn -+ -+extern struct uml_param __uml_setup_start, __uml_setup_end; -+ -+#define __uml_postsetup(fn) \ -+ static initcall_t __uml_postsetup_##fn __uml_postsetup_call = fn -+ -+#define __non_empty_string(dummyname,string) \ -+ struct __uml_non_empty_string_struct_##dummyname \ -+ { \ -+ char _string[sizeof(string)-2]; \ -+ } -+ -+#ifndef MODULE -+#define __uml_setup(str, fn, help...) \ -+ __non_empty_string(fn ##_setup, str); \ -+ __uml_help(fn, help); \ -+ static char __uml_setup_str_##fn[] __initdata = str; \ -+ static struct uml_param __uml_setup_##fn __uml_init_setup = { __uml_setup_str_##fn, fn } -+#else -+#define __uml_setup(str, fn, help...) \ -+ -+#endif -+ -+#define __uml_help(fn, help...) \ -+ __non_empty_string(fn ##__help, help); \ -+ static char __uml_help_str_##fn[] __initdata = help; \ -+ static const char *__uml_help_##fn __uml_setup_help = __uml_help_str_##fn -+ -+/* -+ * Mark functions and data as being only used at initialization -+ * or exit time. -+ */ -+#define __uml_init_setup __attribute__ ((unused,__section__ (".uml.setup.init"))) -+#define __uml_setup_help __attribute__ ((unused,__section__ (".uml.help.init"))) -+#define __uml_init_call __attribute__ ((unused,__section__ (".uml.initcall.init"))) -+#define __uml_postsetup_call __attribute__ ((unused,__section__ (".uml.postsetup.init"))) -+#define __uml_exit_call __attribute__ ((unused,__section__ (".uml.exitcall.exit"))) -+ -+#ifndef __KERNEL__ -+ -+#define __initcall(fn) static initcall_t __initcall_##fn __init_call = fn -+#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn -+ -+#define __init_call __attribute__ ((unused,__section__ (".initcall.init"))) -+#define __exit_call __attribute__ ((unused,__section__ (".exitcall.exit"))) -+ -+#endif -+ -+#endif /* _LINUX_UML_INIT_H */ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/initrd.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/initrd.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/initrd.h 2005-05-03 22:28:14.301436064 +0300 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __INITRD_USER_H__ -+#define __INITRD_USER_H__ -+ -+extern int load_initrd(char *filename, void *buf, int size); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/irq_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/irq_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/irq_kern.h 2005-05-03 22:28:14.303435760 +0300 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __IRQ_KERN_H__ -+#define __IRQ_KERN_H__ -+ -+#include "linux/interrupt.h" -+ -+extern int um_request_irq(unsigned int irq, int fd, int type, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, const char * devname, -+ void *dev_id); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/irq_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/irq_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/irq_user.h 2005-05-03 22:28:14.304435608 +0300 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __IRQ_USER_H__ -+#define __IRQ_USER_H__ -+ -+enum { IRQ_READ, IRQ_WRITE }; -+ -+extern void sigio_handler(int sig, union uml_pt_regs *regs); -+extern int activate_fd(int irq, int fd, int type, void *dev_id); -+extern void free_irq_by_irq_and_dev(int irq, void *dev_id); -+extern void free_irq_by_fd(int fd); -+extern void reactivate_fd(int fd, int irqnum); -+extern void deactivate_fd(int fd, int irqnum); -+extern int deactivate_all_fds(void); -+extern void forward_interrupts(int pid); -+extern void init_irq_signals(int on_sigstack); -+extern void forward_ipi(int fd, int pid); -+extern void free_irq_later(int irq, void *dev_id); -+extern int activate_ipi(int fd, int pid); -+extern unsigned long irq_lock(void); -+extern void irq_unlock(unsigned long flags); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/kern.h 2005-05-03 22:28:14.304435608 +0300 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __KERN_H__ -+#define __KERN_H__ -+ -+/* These are all user-mode things which are convenient to call directly -+ * from kernel code and for which writing a wrapper is too much of a pain. -+ * The regular include files can't be included because this file is included -+ * only into kernel code, and user-space includes conflict with kernel -+ * includes. -+ */ -+ -+extern int errno; -+ -+extern int clone(int (*proc)(void *), void *sp, int flags, void *data); -+extern int sleep(int); -+extern int printf(char *fmt, ...); -+extern char *strerror(int errnum); -+extern char *ptsname(int __fd); -+extern int munmap(void *, int); -+extern void *sbrk(int increment); -+extern void *malloc(int size); -+extern void perror(char *err); -+extern int kill(int pid, int sig); -+extern int getuid(void); -+extern int pause(void); -+extern int write(int, const void *, int); -+extern int exit(int); -+extern int close(int); -+extern int read(unsigned int, char *, int); -+extern int pipe(int *); -+extern int sched_yield(void); -+extern int ptrace(int op, int pid, long addr, long data); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/kern_util.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/kern_util.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/kern_util.h 2005-05-03 22:28:14.306435304 +0300 -@@ -0,0 +1,123 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __KERN_UTIL_H__ -+#define __KERN_UTIL_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int ncpus; -+extern char *linux_prog; -+extern char *gdb_init; -+extern int kmalloc_ok; -+extern int timer_irq_inited; -+extern int jail; -+extern int nsyscalls; -+ -+#define UML_ROUND_DOWN(addr) ((void *)(((unsigned long) addr) & PAGE_MASK)) -+#define UML_ROUND_UP(addr) \ -+ UML_ROUND_DOWN(((unsigned long) addr) + PAGE_SIZE - 1) -+ -+extern int kernel_fork(unsigned long flags, int (*fn)(void *), void * arg); -+extern unsigned long stack_sp(unsigned long page); -+extern int kernel_thread_proc(void *data); -+extern void syscall_segv(int sig); -+extern int current_pid(void); -+extern unsigned long alloc_stack(int order, int atomic); -+extern int do_signal(int error); -+extern int is_stack_fault(unsigned long sp); -+extern unsigned long segv(unsigned long address, unsigned long ip, -+ int is_write, int is_user, void *sc); -+extern unsigned long handle_page_fault(unsigned long address, unsigned long ip, -+ int is_write, int is_user, -+ int *code_out); -+extern void syscall_ready(void); -+extern int segv_syscall(void); -+extern void kern_finish_exec(void *task, int new_pid, unsigned long stack); -+extern int page_size(void); -+extern int page_mask(void); -+extern int need_finish_fork(void); -+extern void free_stack(unsigned long stack, int order); -+extern void add_input_request(int op, void (*proc)(int), void *arg); -+extern int sys_execve(char *file, char **argv, char **env); -+extern char *current_cmd(void); -+extern void timer_handler(int sig, union uml_pt_regs *regs); -+extern int set_signals(int enable); -+extern void force_sigbus(void); -+extern int pid_to_processor_id(int pid); -+extern void block_signals(void); -+extern void unblock_signals(void); -+extern void deliver_signals(void *t); -+extern int next_syscall_index(int max); -+extern int next_trap_index(int max); -+extern void cpu_idle(void); -+extern void finish_fork(void); -+extern void paging_init(void); -+extern void init_flush_vm(void); -+extern void *syscall_sp(void *t); -+extern void syscall_trace(void); -+extern int hz(void); -+extern void idle_timer(void); -+extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs); -+extern int external_pid(void *t); -+extern void boot_timer_handler(int sig); -+extern void interrupt_end(void); -+extern void initial_thread_cb(void (*proc)(void *), void *arg); -+extern int debugger_signal(int status, int pid); -+extern void debugger_parent_signal(int status, int pid); -+extern void child_signal(int pid, int status); -+extern int init_ptrace_proxy(int idle_pid, int startup, int stop); -+extern int init_parent_proxy(int pid); -+extern void check_stack_overflow(void *ptr); -+extern void relay_signal(int sig, union uml_pt_regs *regs); -+extern void not_implemented(void); -+extern int user_context(unsigned long sp); -+extern void timer_irq(union uml_pt_regs *regs); -+extern void unprotect_stack(unsigned long stack); -+extern void do_uml_exitcalls(void); -+extern int attach_debugger(int idle_pid, int pid, int stop); -+extern void bad_segv(unsigned long address, unsigned long ip, int is_write); -+extern int config_gdb(char *str); -+extern int remove_gdb(void); -+extern char *uml_strdup(char *string); -+extern void unprotect_kernel_mem(void); -+extern void protect_kernel_mem(void); -+extern void set_kmem_end(unsigned long); -+extern void uml_cleanup(void); -+extern void set_current(void *t); -+extern void lock_signalled_task(void *t); -+extern void IPI_handler(int cpu); -+extern int jail_setup(char *line, int *add); -+extern void *get_init_task(void); -+extern int clear_user_proc(void *buf, int size); -+extern int copy_to_user_proc(void *to, void *from, int size); -+extern int copy_from_user_proc(void *to, void *from, int size); -+extern int strlen_user_proc(char *str); -+extern void bus_handler(int sig, union uml_pt_regs *regs); -+extern void winch(int sig, union uml_pt_regs *regs); -+extern long execute_syscall(void *r); -+extern int smp_sigio_handler(void); -+extern void *get_current(void); -+extern struct task_struct *get_task(int pid, int require); -+extern void machine_halt(void); -+extern int is_syscall(unsigned long addr); -+extern void arch_switch(void); -+extern void free_irq(unsigned int, void *); -+extern int um_in_interrupt(void); -+extern int cpu(void); -+extern unsigned long long time_stamp(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/line.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/line.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/line.h 2005-05-03 22:28:14.307435152 +0300 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __LINE_H__ -+#define __LINE_H__ -+ -+#include "linux/list.h" -+#include "linux/tqueue.h" -+#include "linux/tty.h" -+#include "asm/semaphore.h" -+#include "chan_user.h" -+#include "mconsole_kern.h" -+ -+struct line_driver { -+ char *name; -+ char *devfs_name; -+ short major; -+ short minor_start; -+ short type; -+ short subtype; -+ int read_irq; -+ char *read_irq_name; -+ int write_irq; -+ char *write_irq_name; -+ char *symlink_from; -+ char *symlink_to; -+ struct mc_device mc; -+}; -+ -+struct line { -+ char *init_str; -+ int init_pri; -+ struct list_head chan_list; -+ int valid; -+ int count; -+ struct tty_struct *tty; -+ struct semaphore sem; -+ char *buffer; -+ char *head; -+ char *tail; -+ int sigio; -+ struct tq_struct task; -+ struct line_driver *driver; -+ int have_irq; -+}; -+ -+#define LINE_INIT(str, d) \ -+ { init_str : str, \ -+ init_pri : INIT_STATIC, \ -+ chan_list : { }, \ -+ valid : 1, \ -+ count : 0, \ -+ tty : NULL, \ -+ sem : { }, \ -+ buffer : NULL, \ -+ head : NULL, \ -+ tail : NULL, \ -+ sigio : 0, \ -+ driver : d, \ -+ have_irq : 0 } -+ -+struct lines { -+ int num; -+}; -+ -+#define LINES_INIT(n) { num : n } -+ -+extern void line_close(struct line *lines, struct tty_struct *tty); -+extern int line_open(struct line *lines, struct tty_struct *tty, -+ struct chan_opts *opts); -+extern int line_setup(struct line *lines, int num, char *init, -+ int all_allowed); -+extern int line_write(struct line *line, struct tty_struct *tty, int from_user, -+ const char *buf, int len); -+extern char *add_xterm_umid(char *base); -+extern int line_setup_irq(int fd, int input, int output, void *data); -+extern void line_close_chan(struct line *line); -+extern void line_disable(struct line *line, int current_irq); -+extern void line_register_devfs(struct lines *set, -+ struct line_driver *line_driver, -+ struct tty_driver *driver, struct line *lines, -+ int nlines); -+extern void lines_init(struct line *lines, int nlines); -+extern void close_lines(struct line *lines, int nlines); -+extern int line_config(struct line *lines, int num, char *str); -+extern int line_remove(struct line *lines, int num, char *str); -+extern int line_get_config(char *dev, struct line *lines, int num, char *str, -+ int size, char **error_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/include/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,7 @@ -+all : sc.h -+ -+sc.h : ../util/mk_sc -+ ../util/mk_sc > $@ -+ -+../util/mk_sc : -+ $(MAKE) -C ../util mk_sc -Index: linux-2.4.29/arch/um/include/mconsole.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/mconsole.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/mconsole.h 2005-05-03 22:28:14.309434848 +0300 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MCONSOLE_H__ -+#define __MCONSOLE_H__ -+ -+#ifndef __KERNEL__ -+#include <stdint.h> -+#define u32 uint32_t -+#endif -+ -+#define MCONSOLE_MAGIC (0xcafebabe) -+#define MCONSOLE_MAX_DATA (512) -+#define MCONSOLE_VERSION 2 -+ -+struct mconsole_request { -+ u32 magic; -+ u32 version; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mconsole_reply { -+ u32 err; -+ u32 more; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mconsole_notify { -+ u32 magic; -+ u32 version; -+ enum { MCONSOLE_SOCKET, MCONSOLE_PANIC, MCONSOLE_HANG, -+ MCONSOLE_USER_NOTIFY } type; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mc_request; -+ -+enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; -+ -+struct mconsole_command -+{ -+ char *command; -+ void (*handler)(struct mc_request *req); -+ enum mc_context context; -+}; -+ -+struct mc_request -+{ -+ int len; -+ int as_interrupt; -+ -+ int originating_fd; -+ int originlen; -+ unsigned char origin[128]; /* sockaddr_un */ -+ -+ struct mconsole_request request; -+ struct mconsole_command *cmd; -+}; -+ -+extern char mconsole_socket_name[]; -+ -+extern int mconsole_unlink_socket(void); -+extern int mconsole_reply(struct mc_request *req, char *reply, int err, -+ int more); -+ -+extern void mconsole_version(struct mc_request *req); -+extern void mconsole_help(struct mc_request *req); -+extern void mconsole_halt(struct mc_request *req); -+extern void mconsole_reboot(struct mc_request *req); -+extern void mconsole_config(struct mc_request *req); -+extern void mconsole_remove(struct mc_request *req); -+extern void mconsole_sysrq(struct mc_request *req); -+extern void mconsole_cad(struct mc_request *req); -+extern void mconsole_stop(struct mc_request *req); -+extern void mconsole_go(struct mc_request *req); -+extern void mconsole_log(struct mc_request *req); -+extern void mconsole_proc(struct mc_request *req); -+ -+extern int mconsole_get_request(int fd, struct mc_request *req); -+extern int mconsole_notify(char *sock_name, int type, const void *data, -+ int len); -+extern char *mconsole_notify_socket(void); -+extern void lock_notify(void); -+extern void unlock_notify(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/mconsole_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/mconsole_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/mconsole_kern.h 2005-05-03 22:28:14.310434696 +0300 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MCONSOLE_KERN_H__ -+#define __MCONSOLE_KERN_H__ -+ -+#include "linux/config.h" -+#include "linux/list.h" -+#include "mconsole.h" -+ -+struct mconsole_entry { -+ struct list_head list; -+ struct mc_request request; -+}; -+ -+struct mc_device { -+ struct list_head list; -+ char *name; -+ int (*config)(char *); -+ int (*get_config)(char *, char *, int, char **); -+ int (*remove)(char *); -+}; -+ -+#define CONFIG_CHUNK(str, size, current, chunk, end) \ -+do { \ -+ current += strlen(chunk); \ -+ if(current >= size) \ -+ str = NULL; \ -+ if(str != NULL){ \ -+ strcpy(str, chunk); \ -+ str += strlen(chunk); \ -+ } \ -+ if(end) \ -+ current++; \ -+} while(0) -+ -+#ifdef CONFIG_MCONSOLE -+ -+extern void mconsole_register_dev(struct mc_device *new); -+ -+#else -+ -+static inline void mconsole_register_dev(struct mc_device *new) -+{ -+} -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/mem.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/mem.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/mem.h 2005-05-03 22:28:14.310434696 +0300 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MEM_H__ -+#define __MEM_H__ -+ -+#include "linux/types.h" -+ -+extern void set_kmem_end(unsigned long new); -+extern int phys_mapping(unsigned long phys, __u64 *offset_out); -+extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); -+extern int is_remapped(const void *virt, int fd, __u64 offset); -+extern int physmem_remove_mapping(void *virt); -+extern void physmem_forget_descriptor(int fd); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/mem_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/mem_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/mem_kern.h 2005-05-03 22:28:14.311434544 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MEM_KERN_H__ -+#define __MEM_KERN_H__ -+ -+#include "linux/list.h" -+#include "linux/types.h" -+ -+struct remapper { -+ struct list_head list; -+ int (*proc)(int, unsigned long, int, __u64, int); -+}; -+ -+extern void register_remapper(struct remapper *info); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/mem_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/mem_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/mem_user.h 2005-05-03 22:28:14.313434240 +0300 -@@ -0,0 +1,82 @@ -+/* -+ * arch/um/include/mem_user.h -+ * -+ * BRIEF MODULE DESCRIPTION -+ * user side memory interface for support IO memory inside user mode linux -+ * -+ * Copyright (C) 2001 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN -+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+#ifndef _MEM_USER_H -+#define _MEM_USER_H -+ -+struct iomem_region { -+ struct iomem_region *next; -+ char *driver; -+ int fd; -+ int size; -+ unsigned long phys; -+ unsigned long virt; -+}; -+ -+extern struct iomem_region *iomem_regions; -+extern int iomem_size; -+ -+#define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) -+ -+extern unsigned long host_task_size; -+extern unsigned long task_size; -+ -+extern int init_mem_user(void); -+extern int create_mem_file(unsigned long len); -+extern void setup_memory(void *entry); -+extern unsigned long find_iomem(char *driver, unsigned long *len_out); -+extern int init_maps(unsigned long physmem, unsigned long iomem, -+ unsigned long highmem); -+extern unsigned long get_vm(unsigned long len); -+extern void setup_physmem(unsigned long start, unsigned long usable, -+ unsigned long len, unsigned long highmem); -+extern void add_iomem(char *name, int fd, unsigned long size); -+extern unsigned long phys_offset(unsigned long phys); -+extern void unmap_physmem(void); -+extern void map_memory(unsigned long virt, unsigned long phys, -+ unsigned long len, int r, int w, int x); -+extern int protect_memory(unsigned long addr, unsigned long len, -+ int r, int w, int x, int must_succeed); -+extern unsigned long get_kmem_end(void); -+extern void check_tmpexec(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/mode.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/mode.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/mode.h 2005-05-03 22:28:14.313434240 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_H__ -+#define __MODE_H__ -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "../kernel/tt/include/mode.h" -+#endif -+ -+#ifdef UML_CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mode.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/mode_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/mode_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/mode_kern.h 2005-05-03 22:28:14.314434088 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_KERN_H__ -+#define __MODE_KERN_H__ -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/mode_kern.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mode_kern.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/net_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/net_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/net_kern.h 2005-05-03 22:28:14.315433936 +0300 -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_NET_KERN_H -+#define __UM_NET_KERN_H -+ -+#include "linux/netdevice.h" -+#include "linux/skbuff.h" -+#include "linux/socket.h" -+#include "linux/list.h" -+ -+struct uml_net { -+ struct list_head list; -+ struct net_device *dev; -+ int index; -+ unsigned char mac[ETH_ALEN]; -+ int have_mac; -+}; -+ -+struct uml_net_private { -+ struct list_head list; -+ spinlock_t lock; -+ struct net_device *dev; -+ struct timer_list tl; -+ struct net_device_stats stats; -+ int fd; -+ unsigned char mac[ETH_ALEN]; -+ int have_mac; -+ unsigned short (*protocol)(struct sk_buff *); -+ int (*open)(void *); -+ void (*close)(int, void *); -+ void (*remove)(void *); -+ int (*read)(int, struct sk_buff **skb, struct uml_net_private *); -+ int (*write)(int, struct sk_buff **skb, struct uml_net_private *); -+ -+ void (*add_address)(unsigned char *, unsigned char *, void *); -+ void (*delete_address)(unsigned char *, unsigned char *, void *); -+ int (*set_mtu)(int mtu, void *); -+ int user[1]; -+}; -+ -+struct net_kern_info { -+ void (*init)(struct net_device *, void *); -+ unsigned short (*protocol)(struct sk_buff *); -+ int (*read)(int, struct sk_buff **skb, struct uml_net_private *); -+ int (*write)(int, struct sk_buff **skb, struct uml_net_private *); -+}; -+ -+struct transport { -+ struct list_head list; -+ char *name; -+ int (*setup)(char *, char **, void *); -+ struct net_user_info *user; -+ struct net_kern_info *kern; -+ int private_size; -+ int setup_size; -+}; -+ -+extern struct net_device *ether_init(int); -+extern unsigned short ether_protocol(struct sk_buff *); -+extern int setup_etheraddr(char *str, unsigned char *addr); -+extern struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra); -+extern int tap_setup_common(char *str, char *type, char **dev_name, -+ char **mac_out, char **gate_addr); -+extern void register_transport(struct transport *new); -+extern unsigned short eth_protocol(struct sk_buff *skb); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/net_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/net_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/net_user.h 2005-05-03 22:28:14.316433784 +0300 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_NET_USER_H__ -+#define __UM_NET_USER_H__ -+ -+#define ETH_ADDR_LEN (6) -+#define ETH_HEADER_ETHERTAP (16) -+#define ETH_HEADER_OTHER (14) -+#define ETH_MAX_PACKET (1500) -+ -+#define UML_NET_VERSION (4) -+ -+struct net_user_info { -+ void (*init)(void *, void *); -+ int (*open)(void *); -+ void (*close)(int, void *); -+ void (*remove)(void *); -+ int (*set_mtu)(int mtu, void *); -+ void (*add_address)(unsigned char *, unsigned char *, void *); -+ void (*delete_address)(unsigned char *, unsigned char *, void *); -+ int max_packet; -+}; -+ -+extern void ether_user_init(void *data, void *dev); -+extern void dev_ip_addr(void *d, char *buf, char *bin_buf); -+extern void set_ether_mac(void *d, unsigned char *addr); -+extern void iter_addresses(void *d, void (*cb)(unsigned char *, -+ unsigned char *, void *), -+ void *arg); -+ -+extern void *get_output_buffer(int *len_out); -+extern void free_output_buffer(void *buffer); -+ -+extern int tap_open_common(void *dev, char *gate_addr); -+extern void tap_check_ips(char *gate_addr, char *eth_addr); -+ -+extern void read_output(int fd, char *output_out, int len); -+ -+extern int net_read(int fd, void *buf, int len); -+extern int net_recvfrom(int fd, void *buf, int len); -+extern int net_write(int fd, void *buf, int len); -+extern int net_send(int fd, void *buf, int len); -+extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); -+ -+extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); -+extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); -+ -+extern char *split_if_spec(char *str, ...); -+ -+extern int dev_netmask(void *d, void *m); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/os.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/os.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/os.h 2005-05-03 22:28:14.318433480 +0300 -@@ -0,0 +1,221 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __OS_H__ -+#define __OS_H__ -+ -+#include "asm/types.h" -+#include "../os/include/file.h" -+ -+#define OS_TYPE_FILE 1 -+#define OS_TYPE_DIR 2 -+#define OS_TYPE_SYMLINK 3 -+#define OS_TYPE_CHARDEV 4 -+#define OS_TYPE_BLOCKDEV 5 -+#define OS_TYPE_FIFO 6 -+#define OS_TYPE_SOCK 7 -+ -+/* os_access() flags */ -+#define OS_ACC_F_OK 0 /* Test for existence. */ -+#define OS_ACC_X_OK 1 /* Test for execute permission. */ -+#define OS_ACC_W_OK 2 /* Test for write permission. */ -+#define OS_ACC_R_OK 4 /* Test for read permission. */ -+#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ -+ -+/* -+ * types taken from stat_file() in hostfs_user.c -+ * (if they are wrong here, they are wrong there...). -+ */ -+struct uml_stat { -+ int ust_major; /* device */ -+ int ust_minor; -+ unsigned long long ust_ino; /* inode */ -+ int ust_mode; /* protection */ -+ int ust_nlink; /* number of hard links */ -+ int ust_uid; /* user ID of owner */ -+ int ust_gid; /* group ID of owner */ -+ unsigned long long ust_size; /* total size, in bytes */ -+ int ust_blksize; /* blocksize for filesystem I/O */ -+ unsigned long long ust_blocks; /* number of blocks allocated */ -+ unsigned long ust_atime; /* time of last access */ -+ unsigned long ust_mtime; /* time of last modification */ -+ unsigned long ust_ctime; /* time of last change */ -+ int ust_rmajor; -+ int ust_rminor; -+}; -+ -+struct openflags { -+ unsigned int r : 1; -+ unsigned int w : 1; -+ unsigned int s : 1; /* O_SYNC */ -+ unsigned int c : 1; /* O_CREAT */ -+ unsigned int t : 1; /* O_TRUNC */ -+ unsigned int a : 1; /* O_APPEND */ -+ unsigned int e : 1; /* O_EXCL */ -+ unsigned int cl : 1; /* FD_CLOEXEC */ -+ unsigned int d : 1; /* O_DIRECT */ -+}; -+ -+#define OPENFLAGS() ((struct openflags) { .r = 0, .w = 0, .s = 0, .c = 0, \ -+ .t = 0, .a = 0, .e = 0, .cl = 0, \ -+ .d = 0 }) -+ -+static inline struct openflags of_read(struct openflags flags) -+{ -+ flags.r = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_write(struct openflags flags) -+{ -+ flags.w = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_rdwr(struct openflags flags) -+{ -+ return(of_read(of_write(flags))); -+} -+ -+static inline struct openflags of_set_rw(struct openflags flags, int r, int w) -+{ -+ flags.r = r; -+ flags.w = w; -+ return(flags); -+} -+ -+static inline struct openflags of_sync(struct openflags flags) -+{ -+ flags.s = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_create(struct openflags flags) -+{ -+ flags.c = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_trunc(struct openflags flags) -+{ -+ flags.t = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_append(struct openflags flags) -+{ -+ flags.a = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_excl(struct openflags flags) -+{ -+ flags.e = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_cloexec(struct openflags flags) -+{ -+ flags.cl = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_direct(struct openflags flags) -+{ -+ flags.d = 1; -+ return(flags); -+} -+ -+extern int os_stat_file(const char *file_name, struct uml_stat *buf); -+extern int os_lstat_file(const char *file_name, struct uml_stat *ubuf); -+extern int os_stat_fd(const int fd, struct uml_stat *buf); -+extern int os_access(const char *file, int mode); -+extern int os_set_file_time(const char *file, unsigned long access, -+ unsigned long mod); -+extern int os_set_file_perms(const char *file, int mode); -+extern int os_set_file_owner(const char *file, int owner, int group); -+extern void os_print_error(int error, const char* str); -+extern int os_get_exec_close(int fd, int *close_on_exec); -+extern int os_set_exec_close(int fd, int close_on_exec); -+extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); -+extern int os_window_size(int fd, int *rows, int *cols); -+extern int os_new_tty_pgrp(int fd, int pid); -+extern int os_get_ifname(int fd, char *namebuf); -+extern int os_set_slip(int fd); -+extern int os_set_owner(int fd, int pid); -+extern int os_sigio_async(int master, int slave); -+extern int os_mode_fd(int fd, int mode); -+ -+extern int os_seek_file(int fd, __u64 offset); -+extern int os_open_file(char *file, struct openflags flags, int mode); -+extern void *os_open_dir(char *dir, int *err_out); -+extern int os_seek_dir(void *stream, unsigned long long pos); -+extern int os_read_dir(void *stream, unsigned long long *ino_out, -+ char **name_out); -+extern int os_tell_dir(void *stream); -+extern int os_close_dir(void *stream); -+extern int os_remove_file(const char *file); -+extern int os_move_file(const char *from, const char *to); -+extern int os_truncate_file(const char *file, unsigned long long len); -+extern int os_truncate_fd(int fd, unsigned long long len); -+extern int os_read_file(int fd, void *buf, int len); -+extern int os_write_file(int fd, const void *buf, int count); -+extern int os_file_size(char *file, long long *size_out); -+extern int os_fd_size(int fd, long long *size_out); -+extern int os_file_modtime(char *file, unsigned long *modtime); -+extern int os_pipe(int *fd, int stream, int close_on_exec); -+extern int os_set_fd_async(int fd, int owner); -+extern int os_clear_fd_async(int fd); -+extern int os_set_fd_block(int fd, int blocking); -+extern int os_accept_connection(int fd); -+extern int os_create_unix_socket(char *file, int len, int close_on_exec); -+extern int os_make_symlink(const char *to, const char *from); -+extern int os_read_symlink(const char *file, char *buf, int size); -+extern int os_link_file(const char *to, const char *from); -+extern int os_make_dir(const char *dir, int mode); -+extern int os_remove_dir(const char *dir); -+extern int os_make_dev(const char *name, int mode, int major, int minor); -+extern int os_shutdown_socket(int fd, int r, int w); -+extern void os_close_file(int fd); -+extern int os_rcv_fd(int fd, int *helper_pid_out); -+extern int create_unix_socket(char *file, int len, int close_on_exec); -+extern int os_connect_socket(char *name); -+extern int os_file_type(char *file); -+extern int os_file_mode(char *file, struct openflags *mode_out); -+extern int os_lock_file(int fd, int excl); -+ -+extern unsigned long os_process_pc(int pid); -+extern int os_process_parent(int pid); -+extern void os_stop_process(int pid); -+extern void os_kill_process(int pid, int reap_child); -+extern void os_usr1_process(int pid); -+extern int os_getpid(void); -+ -+extern int os_map_memory(void *virt, int fd, unsigned long long off, -+ unsigned long len, int r, int w, int x); -+extern int os_protect_memory(void *addr, unsigned long len, -+ int r, int w, int x); -+extern int os_unmap_memory(void *addr, int len); -+extern void os_flush_stdout(void); -+extern int os_stat_filesystem(char *path, long *bsize_out, -+ long long *blocks_out, long long *bfree_out, -+ long long *bavail_out, long long *files_out, -+ long long *ffree_out, void *fsid_out, -+ int fsid_size, long *namelen_out, -+ long *spare_out); -+extern unsigned long long os_usecs(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/process.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/process.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/process.h 2005-05-03 22:28:14.319433328 +0300 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PROCESS_H__ -+#define __PROCESS_H__ -+ -+#include <asm/sigcontext.h> -+ -+extern void sig_handler(int sig, struct sigcontext sc); -+extern void alarm_handler(int sig, struct sigcontext sc); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/ptrace_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/ptrace_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/ptrace_user.h 2005-05-03 22:28:14.319433328 +0300 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_USER_H__ -+#define __PTRACE_USER_H__ -+ -+#include "sysdep/ptrace_user.h" -+ -+/* syscall emulation path in ptrace */ -+#ifndef PTRACE_SYSEMU -+#define PTRACE_SYSEMU 31 -+#endif -+ -+extern int use_sysemu; -+ -+extern int ptrace_getregs(long pid, unsigned long *regs_out); -+extern int ptrace_setregs(long pid, unsigned long *regs_in); -+extern int ptrace_getfpregs(long pid, unsigned long *regs_out); -+extern void arch_enter_kernel(void *task, int pid); -+extern void arch_leave_kernel(void *task, int pid); -+extern void ptrace_pokeuser(unsigned long addr, unsigned long data); -+ -+#endif -Index: linux-2.4.29/arch/um/include/sigcontext.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sigcontext.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sigcontext.h 2005-05-03 22:28:14.320433176 +0300 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UML_SIGCONTEXT_H__ -+#define __UML_SIGCONTEXT_H__ -+ -+#include "sysdep/sigcontext.h" -+ -+extern int sc_size(void *data); -+extern void sc_to_sc(void *to_ptr, void *from_ptr); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sigio.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sigio.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sigio.h 2005-05-03 22:28:14.321433024 +0300 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGIO_H__ -+#define __SIGIO_H__ -+ -+extern int write_sigio_irq(int fd); -+extern int register_sigio_fd(int fd); -+extern int read_sigio_fd(int fd); -+extern int add_sigio_fd(int fd, int read); -+extern int ignore_sigio_fd(int fd); -+extern void sigio_lock(void); -+extern void sigio_unlock(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/signal_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/signal_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/signal_kern.h 2005-05-03 22:28:14.322432872 +0300 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGNAL_KERN_H__ -+#define __SIGNAL_KERN_H__ -+ -+extern int have_signals(void *t); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/signal_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/signal_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/signal_user.h 2005-05-03 22:28:14.323432720 +0300 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGNAL_USER_H__ -+#define __SIGNAL_USER_H__ -+ -+extern int signal_stack_size; -+ -+extern int change_sig(int signal, int on); -+extern void set_sigstack(void *stack, int size); -+extern void set_handler(int sig, void (*handler)(int), int flags, ...); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/skas_ptrace.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/skas_ptrace.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/skas_ptrace.h 2005-05-03 22:28:14.323432720 +0300 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_PTRACE_H -+#define __SKAS_PTRACE_H -+ -+struct ptrace_faultinfo { -+ int is_write; -+ unsigned long addr; -+}; -+ -+struct ptrace_ldt { -+ int func; -+ void *ptr; -+ unsigned long bytecount; -+}; -+ -+#define PTRACE_FAULTINFO 52 -+#define PTRACE_SIGPENDING 53 -+#define PTRACE_LDT 54 -+#define PTRACE_SWITCH_MM 55 -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/syscall_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/syscall_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/syscall_user.h 2005-05-03 22:28:14.324432568 +0300 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSCALL_USER_H -+#define __SYSCALL_USER_H -+ -+extern int record_syscall_start(int syscall); -+extern void record_syscall_end(int index, int result); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-i386/checksum.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/checksum.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/checksum.h 2005-05-03 22:28:14.326432264 +0300 -@@ -0,0 +1,218 @@ -+/* -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SYSDEP_CHECKSUM_H -+#define __UM_SYSDEP_CHECKSUM_H -+ -+#include "linux/string.h" -+#include "asm/uaccess.h" -+ -+/* -+ * computes the checksum of a memory block at buff, length len, -+ * and adds in "sum" (32-bit) -+ * -+ * returns a 32-bit number suitable for feeding into itself -+ * or csum_tcpudp_magic -+ * -+ * this function must be called with even lengths, except -+ * for the last fragment, which may be odd -+ * -+ * it's best to have buff aligned on a 32-bit boundary -+ */ -+unsigned int csum_partial(const unsigned char * buff, int len, -+ unsigned int sum); -+ -+/* -+ * the same as csum_partial, but copies from src while it -+ * checksums, and handles user-space pointer exceptions correctly, when needed. -+ * -+ * here even more important to align src and dst on a 32-bit (or even -+ * better 64-bit) boundary -+ */ -+ -+unsigned int csum_partial_copy_to(const char *src, char *dst, int len, -+ int sum, int *err_ptr); -+unsigned int csum_partial_copy_from(const char *src, char *dst, int len, -+ int sum, int *err_ptr); -+ -+/* -+ * Note: when you get a NULL pointer exception here this means someone -+ * passed in an incorrect kernel address to one of these functions. -+ * -+ * If you use these functions directly please don't forget the -+ * verify_area(). -+ */ -+ -+static __inline__ -+unsigned int csum_partial_copy_nocheck(const char *src, char *dst, -+ int len, int sum) -+{ -+ memcpy(dst, src, len); -+ return(csum_partial(dst, len, sum)); -+} -+ -+static __inline__ -+unsigned int csum_partial_copy_from_user(const char *src, char *dst, -+ int len, int sum, int *err_ptr) -+{ -+ return csum_partial_copy_from(src, dst, len, sum, err_ptr); -+} -+ -+/* -+ * These are the old (and unsafe) way of doing checksums, a warning message -+ * will be printed if they are used and an exception occurs. -+ * -+ * these functions should go away after some time. -+ */ -+ -+#define csum_partial_copy_fromuser csum_partial_copy_from_user -+unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum); -+ -+/* -+ * This is a version of ip_compute_csum() optimized for IP headers, -+ * which always checksum on 4 octet boundaries. -+ * -+ * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by -+ * Arnt Gulbrandsen. -+ */ -+static inline unsigned short ip_fast_csum(unsigned char * iph, -+ unsigned int ihl) -+{ -+ unsigned int sum; -+ -+ __asm__ __volatile__( -+ "movl (%1), %0 ;\n" -+ "subl $4, %2 ;\n" -+ "jbe 2f ;\n" -+ "addl 4(%1), %0 ;\n" -+ "adcl 8(%1), %0 ;\n" -+ "adcl 12(%1), %0 ;\n" -+"1: adcl 16(%1), %0 ;\n" -+ "lea 4(%1), %1 ;\n" -+ "decl %2 ;\n" -+ "jne 1b ;\n" -+ "adcl $0, %0 ;\n" -+ "movl %0, %2 ;\n" -+ "shrl $16, %0 ;\n" -+ "addw %w2, %w0 ;\n" -+ "adcl $0, %0 ;\n" -+ "notl %0 ;\n" -+"2: ;\n" -+ /* Since the input registers which are loaded with iph and ipl -+ are modified, we must also specify them as outputs, or gcc -+ will assume they contain their original values. */ -+ : "=r" (sum), "=r" (iph), "=r" (ihl) -+ : "1" (iph), "2" (ihl)); -+ return(sum); -+} -+ -+/* -+ * Fold a partial checksum -+ */ -+ -+static inline unsigned int csum_fold(unsigned int sum) -+{ -+ __asm__( -+ "addl %1, %0 ;\n" -+ "adcl $0xffff, %0 ;\n" -+ : "=r" (sum) -+ : "r" (sum << 16), "0" (sum & 0xffff0000) -+ ); -+ return (~sum) >> 16; -+} -+ -+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, -+ unsigned long daddr, -+ unsigned short len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ __asm__( -+ "addl %1, %0 ;\n" -+ "adcl %2, %0 ;\n" -+ "adcl %3, %0 ;\n" -+ "adcl $0, %0 ;\n" -+ : "=r" (sum) -+ : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); -+ return sum; -+} -+ -+/* -+ * computes the checksum of the TCP/UDP pseudo-header -+ * returns a 16-bit checksum, already complemented -+ */ -+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, -+ unsigned long daddr, -+ unsigned short len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -+} -+ -+/* -+ * this routine is used for miscellaneous IP-like checksums, mainly -+ * in icmp.c -+ */ -+ -+static inline unsigned short ip_compute_csum(unsigned char * buff, int len) -+{ -+ return csum_fold (csum_partial(buff, len, 0)); -+} -+ -+#define _HAVE_ARCH_IPV6_CSUM -+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, -+ struct in6_addr *daddr, -+ __u32 len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ __asm__( -+ "addl 0(%1), %0 ;\n" -+ "adcl 4(%1), %0 ;\n" -+ "adcl 8(%1), %0 ;\n" -+ "adcl 12(%1), %0 ;\n" -+ "adcl 0(%2), %0 ;\n" -+ "adcl 4(%2), %0 ;\n" -+ "adcl 8(%2), %0 ;\n" -+ "adcl 12(%2), %0 ;\n" -+ "adcl %3, %0 ;\n" -+ "adcl %4, %0 ;\n" -+ "adcl $0, %0 ;\n" -+ : "=&r" (sum) -+ : "r" (saddr), "r" (daddr), -+ "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); -+ -+ return csum_fold(sum); -+} -+ -+/* -+ * Copy and checksum to user -+ */ -+#define HAVE_CSUM_COPY_USER -+static __inline__ unsigned int csum_and_copy_to_user(const char *src, -+ char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if (access_ok(VERIFY_WRITE, dst, len)) -+ return(csum_partial_copy_to(src, dst, len, sum, err_ptr)); -+ -+ if (len) -+ *err_ptr = -EFAULT; -+ -+ return -1; /* invalid checksum */ -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-i386/frame.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/frame.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/frame.h 2005-05-03 22:28:14.351428464 +0300 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_I386_H -+#define __FRAME_I386_H -+ -+struct arch_frame_data_raw { -+ unsigned long fp_start; -+ unsigned long sr; -+}; -+ -+struct arch_frame_data { -+ int fpstate_size; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-i386/frame_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/frame_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/frame_kern.h 2005-05-03 22:28:14.352428312 +0300 -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_KERN_I386_H -+#define __FRAME_KERN_I386_H -+ -+/* This is called from sys_sigreturn. It takes the sp at the point of the -+ * sigreturn system call and returns the address of the sigcontext struct -+ * on the stack. -+ */ -+ -+static inline void *sp_to_sc(unsigned long sp) -+{ -+ return((void *) sp); -+} -+ -+static inline void *sp_to_uc(unsigned long sp) -+{ -+ unsigned long uc; -+ -+ uc = sp + signal_frame_si.uc_index - -+ signal_frame_si.common.sp_index - 4; -+ return((void *) uc); -+} -+ -+static inline void *sp_to_rt_sc(unsigned long sp) -+{ -+ unsigned long sc; -+ -+ sc = sp - signal_frame_si.common.sp_index + -+ signal_frame_si.common.len - 4; -+ return((void *) sc); -+} -+ -+static inline void *sp_to_mask(unsigned long sp) -+{ -+ unsigned long mask; -+ -+ mask = sp - signal_frame_sc.common.sp_index + -+ signal_frame_sc.common.len - 8; -+ return((void *) mask); -+} -+ -+extern int sc_size(void *data); -+ -+static inline void *sp_to_rt_mask(unsigned long sp) -+{ -+ unsigned long mask; -+ -+ mask = sp - signal_frame_si.common.sp_index + -+ signal_frame_si.common.len + -+ sc_size(&signal_frame_si.common.arch) - 4; -+ return((void *) mask); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-i386/frame_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/frame_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/frame_user.h 2005-05-03 22:28:14.353428160 +0300 -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_USER_I386_H -+#define __FRAME_USER_I386_H -+ -+#include <asm/page.h> -+#include "sysdep/frame.h" -+ -+/* This stuff is to calculate the size of the fp state struct at runtime -+ * because it has changed between 2.2 and 2.4 and it would be good for a -+ * UML compiled on one to work on the other. -+ * So, setup_arch_frame_raw fills in the arch struct with the raw data, which -+ * just contains the address of the end of the sigcontext. This is invoked -+ * from the signal handler. -+ * setup_arch_frame uses that data to figure out what -+ * arch_frame_data.fpstate_size should be. It really has no idea, since it's -+ * not allowed to do sizeof(struct fpstate) but it's safe to consider that it's -+ * everything from the end of the sigcontext up to the top of the stack. So, -+ * it masks off the page number to get the offset within the page and subtracts -+ * that from the page size, and that's how big the fpstate struct will be -+ * considered to be. -+ */ -+ -+static inline void setup_arch_frame_raw(struct arch_frame_data_raw *data, -+ void *end, unsigned long srp) -+{ -+ unsigned long sr = *((unsigned long *) srp); -+ -+ data->fp_start = (unsigned long) end; -+ if((sr & PAGE_MASK) == ((unsigned long) end & PAGE_MASK)) -+ data->sr = sr; -+ else data->sr = 0; -+} -+ -+static inline void setup_arch_frame(struct arch_frame_data_raw *in, -+ struct arch_frame_data *out) -+{ -+ unsigned long fpstate_start = in->fp_start; -+ -+ if(in->sr == 0){ -+ fpstate_start &= ~PAGE_MASK; -+ out->fpstate_size = PAGE_SIZE - fpstate_start; -+ } -+ else { -+ out->fpstate_size = in->sr - fpstate_start; -+ } -+} -+ -+/* This figures out where on the stack the SA_RESTORER function address -+ * is stored. For i386, it's the signal handler return address, so it's -+ * located next to the frame pointer. -+ * This is inlined, so __builtin_frame_address(0) is correct. Otherwise, -+ * it would have to be __builtin_frame_address(1). -+ */ -+ -+#define frame_restorer() \ -+({ \ -+ unsigned long *fp; \ -+\ -+ fp = __builtin_frame_address(0); \ -+ ((unsigned long) (fp + 1)); \ -+}) -+ -+/* Similarly, this returns the value of sp when the handler was first -+ * entered. This is used to calculate the proper sp when delivering -+ * signals. -+ */ -+ -+#define frame_sp() \ -+({ \ -+ unsigned long *fp; \ -+\ -+ fp = __builtin_frame_address(0); \ -+ ((unsigned long) (fp + 1)); \ -+}) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-i386/ptrace.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/ptrace.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/ptrace.h 2005-05-03 22:28:14.355427856 +0300 -@@ -0,0 +1,193 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_I386_PTRACE_H -+#define __SYSDEP_I386_PTRACE_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "ptrace-tt.h" -+#endif -+ -+#ifdef UML_CONFIG_MODE_SKAS -+#include "ptrace-skas.h" -+#endif -+ -+#include "choose-mode.h" -+ -+union uml_pt_regs { -+#ifdef UML_CONFIG_MODE_TT -+ struct tt_regs { -+ long syscall; -+ void *sc; -+ } tt; -+#endif -+#ifdef UML_CONFIG_MODE_SKAS -+ struct skas_regs { -+ unsigned long regs[HOST_FRAME_SIZE]; -+ unsigned long fp[HOST_FP_SIZE]; -+ unsigned long xfp[HOST_XFP_SIZE]; -+ unsigned long fault_addr; -+ unsigned long fault_type; -+ unsigned long trap_type; -+ long syscall; -+ int is_user; -+ } skas; -+#endif -+}; -+ -+#define EMPTY_UML_PT_REGS { } -+ -+extern int mode_tt; -+ -+#define UPT_SC(r) ((r)->tt.sc) -+#define UPT_IP(r) \ -+ CHOOSE_MODE(SC_IP(UPT_SC(r)), REGS_IP((r)->skas.regs)) -+#define UPT_SP(r) \ -+ CHOOSE_MODE(SC_SP(UPT_SC(r)), REGS_SP((r)->skas.regs)) -+#define UPT_EFLAGS(r) \ -+ CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs)) -+#define UPT_EAX(r) \ -+ CHOOSE_MODE(SC_EAX(UPT_SC(r)), REGS_EAX((r)->skas.regs)) -+#define UPT_EBX(r) \ -+ CHOOSE_MODE(SC_EBX(UPT_SC(r)), REGS_EBX((r)->skas.regs)) -+#define UPT_ECX(r) \ -+ CHOOSE_MODE(SC_ECX(UPT_SC(r)), REGS_ECX((r)->skas.regs)) -+#define UPT_EDX(r) \ -+ CHOOSE_MODE(SC_EDX(UPT_SC(r)), REGS_EDX((r)->skas.regs)) -+#define UPT_ESI(r) \ -+ CHOOSE_MODE(SC_ESI(UPT_SC(r)), REGS_ESI((r)->skas.regs)) -+#define UPT_EDI(r) \ -+ CHOOSE_MODE(SC_EDI(UPT_SC(r)), REGS_EDI((r)->skas.regs)) -+#define UPT_EBP(r) \ -+ CHOOSE_MODE(SC_EBP(UPT_SC(r)), REGS_EBP((r)->skas.regs)) -+#define UPT_ORIG_EAX(r) \ -+ CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall) -+#define UPT_CS(r) \ -+ CHOOSE_MODE(SC_CS(UPT_SC(r)), REGS_CS((r)->skas.regs)) -+#define UPT_SS(r) \ -+ CHOOSE_MODE(SC_SS(UPT_SC(r)), REGS_SS((r)->skas.regs)) -+#define UPT_DS(r) \ -+ CHOOSE_MODE(SC_DS(UPT_SC(r)), REGS_DS((r)->skas.regs)) -+#define UPT_ES(r) \ -+ CHOOSE_MODE(SC_ES(UPT_SC(r)), REGS_ES((r)->skas.regs)) -+#define UPT_FS(r) \ -+ CHOOSE_MODE(SC_FS(UPT_SC(r)), REGS_FS((r)->skas.regs)) -+#define UPT_GS(r) \ -+ CHOOSE_MODE(SC_GS(UPT_SC(r)), REGS_GS((r)->skas.regs)) -+ -+#define UPT_SYSCALL_ARG1(r) UPT_EBX(r) -+#define UPT_SYSCALL_ARG2(r) UPT_ECX(r) -+#define UPT_SYSCALL_ARG3(r) UPT_EDX(r) -+#define UPT_SYSCALL_ARG4(r) UPT_ESI(r) -+#define UPT_SYSCALL_ARG5(r) UPT_EDI(r) -+#define UPT_SYSCALL_ARG6(r) UPT_EBP(r) -+ -+extern int user_context(unsigned long sp); -+ -+#define UPT_IS_USER(r) \ -+ CHOOSE_MODE(user_context(UPT_SP(r)), (r)->skas.is_user) -+ -+struct syscall_args { -+ unsigned long args[6]; -+}; -+ -+#define SYSCALL_ARGS(r) ((struct syscall_args) \ -+ { .args = { UPT_SYSCALL_ARG1(r), \ -+ UPT_SYSCALL_ARG2(r), \ -+ UPT_SYSCALL_ARG3(r), \ -+ UPT_SYSCALL_ARG4(r), \ -+ UPT_SYSCALL_ARG5(r), \ -+ UPT_SYSCALL_ARG6(r) } } ) -+ -+#define UPT_REG(regs, reg) \ -+ ({ unsigned long val; \ -+ switch(reg){ \ -+ case EIP: val = UPT_IP(regs); break; \ -+ case UESP: val = UPT_SP(regs); break; \ -+ case EAX: val = UPT_EAX(regs); break; \ -+ case EBX: val = UPT_EBX(regs); break; \ -+ case ECX: val = UPT_ECX(regs); break; \ -+ case EDX: val = UPT_EDX(regs); break; \ -+ case ESI: val = UPT_ESI(regs); break; \ -+ case EDI: val = UPT_EDI(regs); break; \ -+ case EBP: val = UPT_EBP(regs); break; \ -+ case ORIG_EAX: val = UPT_ORIG_EAX(regs); break; \ -+ case CS: val = UPT_CS(regs); break; \ -+ case SS: val = UPT_SS(regs); break; \ -+ case DS: val = UPT_DS(regs); break; \ -+ case ES: val = UPT_ES(regs); break; \ -+ case FS: val = UPT_FS(regs); break; \ -+ case GS: val = UPT_GS(regs); break; \ -+ case EFL: val = UPT_EFLAGS(regs); break; \ -+ default : \ -+ panic("Bad register in UPT_REG : %d\n", reg); \ -+ val = -1; \ -+ } \ -+ val; \ -+ }) -+ -+ -+#define UPT_SET(regs, reg, val) \ -+ do { \ -+ switch(reg){ \ -+ case EIP: UPT_IP(regs) = val; break; \ -+ case UESP: UPT_SP(regs) = val; break; \ -+ case EAX: UPT_EAX(regs) = val; break; \ -+ case EBX: UPT_EBX(regs) = val; break; \ -+ case ECX: UPT_ECX(regs) = val; break; \ -+ case EDX: UPT_EDX(regs) = val; break; \ -+ case ESI: UPT_ESI(regs) = val; break; \ -+ case EDI: UPT_EDI(regs) = val; break; \ -+ case EBP: UPT_EBP(regs) = val; break; \ -+ case ORIG_EAX: UPT_ORIG_EAX(regs) = val; break; \ -+ case CS: UPT_CS(regs) = val; break; \ -+ case SS: UPT_SS(regs) = val; break; \ -+ case DS: UPT_DS(regs) = val; break; \ -+ case ES: UPT_ES(regs) = val; break; \ -+ case FS: UPT_FS(regs) = val; break; \ -+ case GS: UPT_GS(regs) = val; break; \ -+ case EFL: UPT_EFLAGS(regs) = val; break; \ -+ default : \ -+ panic("Bad register in UPT_SET : %d\n", reg); \ -+ break; \ -+ } \ -+ } while (0) -+ -+#define UPT_SET_SYSCALL_RETURN(r, res) \ -+ CHOOSE_MODE(SC_SET_SYSCALL_RETURN(UPT_SC(r), (res)), \ -+ REGS_SET_SYSCALL_RETURN((r)->skas.regs, (res))) -+ -+#define UPT_RESTART_SYSCALL(r) \ -+ CHOOSE_MODE(SC_RESTART_SYSCALL(UPT_SC(r)), \ -+ REGS_RESTART_SYSCALL((r)->skas.regs)) -+ -+#define UPT_ORIG_SYSCALL(r) UPT_EAX(r) -+#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r) -+#define UPT_SYSCALL_RET(r) UPT_EAX(r) -+ -+#define UPT_SEGV_IS_FIXABLE(r) \ -+ CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \ -+ REGS_SEGV_IS_FIXABLE(&r->skas)) -+ -+#define UPT_FAULT_ADDR(r) \ -+ CHOOSE_MODE(SC_FAULT_ADDR(UPT_SC(r)), REGS_FAULT_ADDR(&r->skas)) -+ -+#define UPT_FAULT_WRITE(r) \ -+ CHOOSE_MODE(SC_FAULT_WRITE(UPT_SC(r)), REGS_FAULT_WRITE(&r->skas)) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-i386/ptrace_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/ptrace_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/ptrace_user.h 2005-05-03 22:28:14.356427704 +0300 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_I386_PTRACE_USER_H__ -+#define __SYSDEP_I386_PTRACE_USER_H__ -+ -+#include <asm/ptrace.h> -+ -+#define PT_OFFSET(r) ((r) * sizeof(long)) -+ -+#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX]) -+#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX) -+ -+#define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX) -+#define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX) -+#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) -+#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI) -+#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI) -+ -+#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) -+ -+#define PT_IP_OFFSET PT_OFFSET(EIP) -+#define PT_IP(regs) ((regs)[EIP]) -+#define PT_SP(regs) ((regs)[UESP]) -+ -+#ifndef FRAME_SIZE -+#define FRAME_SIZE (17) -+#endif -+#define FRAME_SIZE_OFFSET (FRAME_SIZE * sizeof(unsigned long)) -+ -+#define FP_FRAME_SIZE (27) -+#define FPX_FRAME_SIZE (128) -+ -+#ifdef PTRACE_GETREGS -+#define UM_HAVE_GETREGS -+#endif -+ -+#ifdef PTRACE_SETREGS -+#define UM_HAVE_SETREGS -+#endif -+ -+#ifdef PTRACE_GETFPREGS -+#define UM_HAVE_GETFPREGS -+#endif -+ -+#ifdef PTRACE_SETFPREGS -+#define UM_HAVE_SETFPREGS -+#endif -+ -+#ifdef PTRACE_GETFPXREGS -+#define UM_HAVE_GETFPXREGS -+#endif -+ -+#ifdef PTRACE_SETFPXREGS -+#define UM_HAVE_SETFPXREGS -+#endif -+ -+extern void update_debugregs(int seq); -+ -+#endif -Index: linux-2.4.29/arch/um/include/sysdep-i386/sigcontext.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/sigcontext.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/sigcontext.h 2005-05-03 22:28:14.357427552 +0300 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_SIGCONTEXT_I386_H -+#define __SYS_SIGCONTEXT_I386_H -+ -+#include "sc.h" -+ -+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2) -+ -+#define SC_RESTART_SYSCALL(sc) IP_RESTART_SYSCALL(SC_IP(sc)) -+#define SC_SET_SYSCALL_RETURN(sc, result) SC_EAX(sc) = (result) -+ -+#define SC_FAULT_ADDR(sc) SC_CR2(sc) -+#define SC_FAULT_TYPE(sc) SC_ERR(sc) -+ -+#define FAULT_WRITE(err) (err & 2) -+#define TO_SC_ERR(is_write) ((is_write) ? 2 : 0) -+ -+#define SC_FAULT_WRITE(sc) (FAULT_WRITE(SC_ERR(sc))) -+ -+#define SC_TRAP_TYPE(sc) SC_TRAPNO(sc) -+ -+/* ptrace expects that, at the start of a system call, %eax contains -+ * -ENOSYS, so this makes it so. -+ */ -+#define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) -+ -+/* This is Page Fault */ -+#define SEGV_IS_FIXABLE(trap) (trap == 14) -+ -+#define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) -+ -+extern unsigned long *sc_sigmask(void *sc_ptr); -+extern int sc_get_fpregs(unsigned long buf, void *sc_ptr); -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-i386/syscalls.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-i386/syscalls.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-i386/syscalls.h 2005-05-03 22:28:14.358427400 +0300 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/unistd.h" -+#include "sysdep/ptrace.h" -+ -+typedef long syscall_handler_t(struct pt_regs); -+ -+#define EXECUTE_SYSCALL(syscall, regs) \ -+ ((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) -+ -+extern syscall_handler_t sys_modify_ldt; -+extern syscall_handler_t old_mmap_i386; -+extern syscall_handler_t old_select; -+extern syscall_handler_t sys_ni_syscall; -+ -+#define ARCH_SYSCALLS \ -+ [ __NR_mmap ] = old_mmap_i386, \ -+ [ __NR_select ] = old_select, \ -+ [ __NR_vm86old ] = sys_ni_syscall, \ -+ [ __NR_modify_ldt ] = sys_modify_ldt, \ -+ [ __NR_lchown32 ] = sys_lchown, \ -+ [ __NR_getuid32 ] = sys_getuid, \ -+ [ __NR_getgid32 ] = sys_getgid, \ -+ [ __NR_geteuid32 ] = sys_geteuid, \ -+ [ __NR_getegid32 ] = sys_getegid, \ -+ [ __NR_setreuid32 ] = sys_setreuid, \ -+ [ __NR_setregid32 ] = sys_setregid, \ -+ [ __NR_getgroups32 ] = sys_getgroups, \ -+ [ __NR_setgroups32 ] = sys_setgroups, \ -+ [ __NR_fchown32 ] = sys_fchown, \ -+ [ __NR_setresuid32 ] = sys_setresuid, \ -+ [ __NR_getresuid32 ] = sys_getresuid, \ -+ [ __NR_setresgid32 ] = sys_setresgid, \ -+ [ __NR_getresgid32 ] = sys_getresgid, \ -+ [ __NR_chown32 ] = sys_chown, \ -+ [ __NR_setuid32 ] = sys_setuid, \ -+ [ __NR_setgid32 ] = sys_setgid, \ -+ [ __NR_setfsuid32 ] = sys_setfsuid, \ -+ [ __NR_setfsgid32 ] = sys_setfsgid, \ -+ [ __NR_pivot_root ] = sys_pivot_root, \ -+ [ __NR_mincore ] = sys_mincore, \ -+ [ __NR_madvise ] = sys_madvise, \ -+ [ 222 ] = sys_ni_syscall, -+ -+/* 222 doesn't yet have a name in include/asm-i386/unistd.h */ -+ -+#define LAST_ARCH_SYSCALL 222 -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-ia64/ptrace.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-ia64/ptrace.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-ia64/ptrace.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_PTRACE_H -+#define __SYSDEP_IA64_PTRACE_H -+ -+struct sys_pt_regs { -+ int foo; -+}; -+ -+#define EMPTY_REGS { 0 } -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-ia64/sigcontext.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-ia64/sigcontext.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-ia64/sigcontext.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_SIGCONTEXT_H -+#define __SYSDEP_IA64_SIGCONTEXT_H -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-ia64/syscalls.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-ia64/syscalls.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-ia64/syscalls.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_SYSCALLS_H -+#define __SYSDEP_IA64_SYSCALLS_H -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-ppc/ptrace.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-ppc/ptrace.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-ppc/ptrace.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,104 @@ -+/* -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_PTRACE_PPC_H -+#define __SYS_PTRACE_PPC_H -+ -+#include "linux/config.h" -+#include "linux/types.h" -+ -+/* the following taken from <asm-ppc/ptrace.h> */ -+ -+#ifdef CONFIG_PPC64 -+#define PPC_REG unsigned long /*long*/ -+#else -+#define PPC_REG unsigned long -+#endif -+struct sys_pt_regs_s { -+ PPC_REG gpr[32]; -+ PPC_REG nip; -+ PPC_REG msr; -+ PPC_REG orig_gpr3; /* Used for restarting system calls */ -+ PPC_REG ctr; -+ PPC_REG link; -+ PPC_REG xer; -+ PPC_REG ccr; -+ PPC_REG mq; /* 601 only (not used at present) */ -+ /* Used on APUS to hold IPL value. */ -+ PPC_REG trap; /* Reason for being here */ -+ PPC_REG dar; /* Fault registers */ -+ PPC_REG dsisr; -+ PPC_REG result; /* Result of a system call */ -+}; -+ -+#define NUM_REGS (sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)) -+ -+struct sys_pt_regs { -+ PPC_REG regs[sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)]; -+}; -+ -+#define UM_MAX_REG (PT_FPR0) -+#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(PPC_REG)) -+ -+#define EMPTY_REGS { { [ 0 ... NUM_REGS - 1] = 0 } } -+ -+#define UM_REG(r, n) ((r)->regs[n]) -+ -+#define UM_SYSCALL_RET(r) UM_REG(r, PT_R3) -+#define UM_SP(r) UM_REG(r, PT_R1) -+#define UM_IP(r) UM_REG(r, PT_NIP) -+#define UM_ELF_ZERO(r) UM_REG(r, PT_FPSCR) -+#define UM_SYSCALL_NR(r) UM_REG(r, PT_R0) -+#define UM_SYSCALL_ARG1(r) UM_REG(r, PT_ORIG_R3) -+#define UM_SYSCALL_ARG2(r) UM_REG(r, PT_R4) -+#define UM_SYSCALL_ARG3(r) UM_REG(r, PT_R5) -+#define UM_SYSCALL_ARG4(r) UM_REG(r, PT_R6) -+#define UM_SYSCALL_ARG5(r) UM_REG(r, PT_R7) -+#define UM_SYSCALL_ARG6(r) UM_REG(r, PT_R8) -+ -+#define UM_SYSCALL_NR_OFFSET (PT_R0 * sizeof(PPC_REG)) -+#define UM_SYSCALL_RET_OFFSET (PT_R3 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG1_OFFSET (PT_R3 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG2_OFFSET (PT_R4 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG3_OFFSET (PT_R5 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG4_OFFSET (PT_R6 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG5_OFFSET (PT_R7 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG6_OFFSET (PT_R8 * sizeof(PPC_REG)) -+#define UM_SP_OFFSET (PT_R1 * sizeof(PPC_REG)) -+#define UM_IP_OFFSET (PT_NIP * sizeof(PPC_REG)) -+#define UM_ELF_ZERO_OFFSET (PT_R3 * sizeof(PPC_REG)) -+ -+#define UM_SET_SYSCALL_RETURN(_regs, result) \ -+do { \ -+ if (result < 0) { \ -+ (_regs)->regs[PT_CCR] |= 0x10000000; \ -+ UM_SYSCALL_RET((_regs)) = -result; \ -+ } else { \ -+ UM_SYSCALL_RET((_regs)) = result; \ -+ } \ -+} while(0) -+ -+extern void shove_aux_table(unsigned long sp); -+#define UM_FIX_EXEC_STACK(sp) shove_aux_table(sp); -+ -+/* These aren't actually defined. The undefs are just to make sure -+ * everyone's clear on the concept. -+ */ -+#undef UML_HAVE_GETREGS -+#undef UML_HAVE_GETFPREGS -+#undef UML_HAVE_SETREGS -+#undef UML_HAVE_SETFPREGS -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-ppc/sigcontext.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-ppc/sigcontext.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-ppc/sigcontext.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_SIGCONTEXT_PPC_H -+#define __SYS_SIGCONTEXT_PPC_H -+ -+#define DSISR_WRITE 0x02000000 -+ -+#define SC_FAULT_ADDR(sc) ({ \ -+ struct sigcontext *_sc = (sc); \ -+ long retval = -1; \ -+ switch (_sc->regs->trap) { \ -+ case 0x300: \ -+ /* data exception */ \ -+ retval = _sc->regs->dar; \ -+ break; \ -+ case 0x400: \ -+ /* instruction exception */ \ -+ retval = _sc->regs->nip; \ -+ break; \ -+ default: \ -+ panic("SC_FAULT_ADDR: unhandled trap type\n"); \ -+ } \ -+ retval; \ -+ }) -+ -+#define SC_FAULT_WRITE(sc) ({ \ -+ struct sigcontext *_sc = (sc); \ -+ long retval = -1; \ -+ switch (_sc->regs->trap) { \ -+ case 0x300: \ -+ /* data exception */ \ -+ retval = !!(_sc->regs->dsisr & DSISR_WRITE); \ -+ break; \ -+ case 0x400: \ -+ /* instruction exception: not a write */ \ -+ retval = 0; \ -+ break; \ -+ default: \ -+ panic("SC_FAULT_ADDR: unhandled trap type\n"); \ -+ } \ -+ retval; \ -+ }) -+ -+#define SC_IP(sc) ((sc)->regs->nip) -+#define SC_SP(sc) ((sc)->regs->gpr[1]) -+#define SEGV_IS_FIXABLE(sc) (1) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysdep-ppc/syscalls.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysdep-ppc/syscalls.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysdep-ppc/syscalls.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,50 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+typedef long syscall_handler_t(unsigned long arg1, unsigned long arg2, -+ unsigned long arg3, unsigned long arg4, -+ unsigned long arg5, unsigned long arg6); -+ -+#define EXECUTE_SYSCALL(syscall, regs) \ -+ (*sys_call_table[syscall])(UM_SYSCALL_ARG1(®s), \ -+ UM_SYSCALL_ARG2(®s), \ -+ UM_SYSCALL_ARG3(®s), \ -+ UM_SYSCALL_ARG4(®s), \ -+ UM_SYSCALL_ARG5(®s), \ -+ UM_SYSCALL_ARG6(®s)) -+ -+extern syscall_handler_t sys_mincore; -+extern syscall_handler_t sys_madvise; -+ -+/* old_mmap needs the correct prototype since syscall_kern.c includes -+ * this file. -+ */ -+int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset); -+ -+#define ARCH_SYSCALLS \ -+ [ __NR_modify_ldt ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_read ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_write ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_iobase ] = sys_ni_syscall, \ -+ [ __NR_pivot_root ] = sys_ni_syscall, \ -+ [ __NR_multiplexer ] = sys_ni_syscall, \ -+ [ __NR_mmap ] = old_mmap, \ -+ [ __NR_madvise ] = sys_madvise, \ -+ [ __NR_mincore ] = sys_mincore, -+ -+#define LAST_ARCH_SYSCALL __NR_mincore -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/sysrq.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/sysrq.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/sysrq.h 2005-05-03 22:28:14.364426488 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SYSRQ_H -+#define __UM_SYSRQ_H -+ -+extern void show_trace(unsigned long *stack); -+ -+#endif -Index: linux-2.4.29/arch/um/include/tempfile.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/tempfile.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/tempfile.h 2005-05-03 22:28:14.365426336 +0300 -@@ -0,0 +1,21 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TEMPFILE_H__ -+#define __TEMPFILE_H__ -+ -+extern int make_tempfile(const char *template, char **tempname, int do_unlink); -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/time_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/time_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/time_user.h 2005-05-03 22:28:14.366426184 +0300 -@@ -0,0 +1,18 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TIME_USER_H__ -+#define __TIME_USER_H__ -+ -+extern void timer(void); -+extern void switch_timers(int to_real); -+extern void set_interval(int timer_type); -+extern void idle_sleep(int secs); -+extern void enable_timer(void); -+extern void disable_timer(void); -+extern unsigned long time_lock(void); -+extern void time_unlock(unsigned long); -+ -+#endif -Index: linux-2.4.29/arch/um/include/tlb.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/tlb.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/tlb.h 2005-05-03 22:28:14.367426032 +0300 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TLB_H__ -+#define __TLB_H__ -+ -+extern void mprotect_kernel_vm(int w); -+extern void force_flush_all(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/ubd_user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/ubd_user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/ubd_user.h 2005-05-03 22:28:14.368425880 +0300 -@@ -0,0 +1,79 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_UBD_USER_H -+#define __UM_UBD_USER_H -+ -+#include "os.h" -+ -+enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; -+ -+struct io_thread_req { -+ enum ubd_req op; -+ int fds[2]; -+ unsigned long offsets[2]; -+ unsigned long long offset; -+ unsigned long length; -+ char *buffer; -+ int sectorsize; -+ unsigned long sector_mask; -+ unsigned long long cow_offset; -+ unsigned long bitmap_words[2]; -+ int map_fd; -+ unsigned long long map_offset; -+ int error; -+}; -+ -+extern int open_ubd_file(char *file, struct openflags *openflags, -+ char **backing_file_out, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out, -+ int *create_cow_out); -+extern int create_cow_file(char *cow_file, char *backing_file, -+ struct openflags flags, int sectorsize, -+ int alignment, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, -+ int *data_offset_out); -+extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -+extern int read_ubd_fs(int fd, void *buffer, int len); -+extern int write_ubd_fs(int fd, char *buffer, int len); -+extern int start_io_thread(unsigned long sp, int *fds_out); -+extern void do_io(struct io_thread_req *req); -+ -+static inline int ubd_test_bit(__u64 bit, unsigned char *data) -+{ -+ __u64 n; -+ int bits, off; -+ -+ bits = sizeof(data[0]) * 8; -+ n = bit / bits; -+ off = bit % bits; -+ return((data[n] & (1 << off)) != 0); -+} -+ -+static inline void ubd_set_bit(__u64 bit, unsigned char *data) -+{ -+ __u64 n; -+ int bits, off; -+ -+ bits = sizeof(data[0]) * 8; -+ n = bit / bits; -+ off = bit % bits; -+ data[n] |= (1 << off); -+} -+ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/umid.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/umid.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/umid.h 2005-05-03 22:28:14.368425880 +0300 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UMID_H__ -+#define __UMID_H__ -+ -+extern int umid_file_name(char *name, char *buf, int len); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/uml_uaccess.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/uml_uaccess.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/uml_uaccess.h 2005-05-03 22:28:14.369425728 +0300 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UML_UACCESS_H__ -+#define __UML_UACCESS_H__ -+ -+extern int __do_copy_to_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher); -+extern unsigned long __do_user_copy(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher, -+ void (*op)(void *to, const void *from, -+ int n), int *faulted_out); -+void __do_copy(void *to, const void *from, int n); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/um_mmu.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/um_mmu.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/um_mmu.h 2005-05-03 22:28:14.370425576 +0300 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __ARCH_UM_MMU_H -+#define __ARCH_UM_MMU_H -+ -+#include "linux/config.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/mmu.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mmu.h" -+#endif -+ -+typedef union { -+#ifdef CONFIG_MODE_TT -+ struct mmu_context_tt tt; -+#endif -+#ifdef CONFIG_MODE_SKAS -+ struct mmu_context_skas skas; -+#endif -+} mm_context_t; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/umn.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/umn.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/umn.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UMN_H -+#define __UMN_H -+ -+extern int open_umn_tty(int *slave_out, int *slipno_out); -+extern void close_umn_tty(int master, int slave); -+extern int umn_send_packet(int fd, void *data, int len); -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern void slip_unesc(unsigned char s); -+extern void umn_read(int fd); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/um_uaccess.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/um_uaccess.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/um_uaccess.h 2005-05-03 22:28:14.372425272 +0300 -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __ARCH_UM_UACCESS_H -+#define __ARCH_UM_UACCESS_H -+ -+#include "linux/config.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/uaccess.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/uaccess.h" -+#endif -+ -+#define access_ok(type, addr, size) \ -+ CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size) -+ -+static inline int verify_area(int type, const void * addr, unsigned long size) -+{ -+ return(CHOOSE_MODE_PROC(verify_area_tt, verify_area_skas, type, addr, -+ size)); -+} -+ -+static inline int copy_from_user(void *to, const void *from, int n) -+{ -+ return(CHOOSE_MODE_PROC(copy_from_user_tt, copy_from_user_skas, to, -+ from, n)); -+} -+ -+static inline int copy_to_user(void *to, const void *from, int n) -+{ -+ return(CHOOSE_MODE_PROC(copy_to_user_tt, copy_to_user_skas, to, -+ from, n)); -+} -+ -+/* -+ * strncpy_from_user: - Copy a NUL terminated string from userspace. -+ * @dst: Destination address, in kernel space. This buffer must be at -+ * least @count bytes long. -+ * @src: Source address, in user space. -+ * @count: Maximum number of bytes to copy, including the trailing NUL. -+ * -+ * Copies a NUL-terminated string from userspace to kernel space. -+ * -+ * On success, returns the length of the string (not including the trailing -+ * NUL). -+ * -+ * If access to userspace fails, returns -EFAULT (some data may have been -+ * copied). -+ * -+ * If @count is smaller than the length of the string, copies @count bytes -+ * and returns @count. -+ */ -+ -+static inline int strncpy_from_user(char *dst, const char *src, int count) -+{ -+ return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas, -+ dst, src, count)); -+} -+ -+/* -+ * __clear_user: - Zero a block of memory in user space, with less checking. -+ * @to: Destination address, in user space. -+ * @n: Number of bytes to zero. -+ * -+ * Zero a block of memory in user space. Caller must check -+ * the specified block with access_ok() before calling this function. -+ * -+ * Returns number of bytes that could not be cleared. -+ * On success, this will be zero. -+ */ -+static inline int __clear_user(void *mem, int len) -+{ -+ return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len)); -+} -+ -+/* -+ * clear_user: - Zero a block of memory in user space. -+ * @to: Destination address, in user space. -+ * @n: Number of bytes to zero. -+ * -+ * Zero a block of memory in user space. -+ * -+ * Returns number of bytes that could not be cleared. -+ * On success, this will be zero. -+ */ -+static inline int clear_user(void *mem, int len) -+{ -+ return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len)); -+} -+ -+/* -+ * strlen_user: - Get the size of a string in user space. -+ * @str: The string to measure. -+ * @n: The maximum valid length -+ * -+ * Get the size of a NUL-terminated string in user space. -+ * -+ * Returns the size of the string INCLUDING the terminating NUL. -+ * On exception, returns 0. -+ * If the string is too long, returns a value greater than @n. -+ */ -+static inline int strnlen_user(const void *str, int len) -+{ -+ return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/user.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/user.h 2005-05-03 22:28:14.373425120 +0300 -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __USER_H__ -+#define __USER_H__ -+ -+extern void panic(const char *fmt, ...); -+extern int printk(const char *fmt, ...); -+extern void schedule(void); -+extern void *um_kmalloc(int size); -+extern void *um_kmalloc_atomic(int size); -+extern void kfree(void *ptr); -+extern int in_aton(char *str); -+extern int open_gdb_chan(void); -+extern void *um_vmalloc(int size); -+extern void vfree(void *ptr); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/include/user_util.h -=================================================================== ---- linux-2.4.29.orig/arch/um/include/user_util.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/include/user_util.h 2005-05-03 22:28:14.374424968 +0300 -@@ -0,0 +1,105 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __USER_UTIL_H__ -+#define __USER_UTIL_H__ -+ -+#include "sysdep/ptrace.h" -+ -+#define CATCH_EINTR(expr) while (((expr) < 0) && (errno == EINTR)) -+ -+extern int mode_tt; -+ -+extern int grantpt(int __fd); -+extern int unlockpt(int __fd); -+extern char *ptsname(int __fd); -+ -+struct cpu_task { -+ int pid; -+ void *task; -+}; -+ -+extern struct cpu_task cpu_tasks[]; -+ -+struct signal_info { -+ void (*handler)(int, union uml_pt_regs *); -+ int is_irq; -+}; -+ -+extern struct signal_info sig_info[]; -+ -+extern unsigned long low_physmem; -+extern unsigned long high_physmem; -+extern unsigned long uml_physmem; -+extern unsigned long uml_reserved; -+extern unsigned long end_vm; -+extern unsigned long start_vm; -+extern unsigned long highmem; -+ -+extern char host_info[]; -+ -+extern char saved_command_line[]; -+extern char command_line[]; -+ -+extern char *tempdir; -+ -+extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end; -+extern unsigned long _unprotected_end; -+extern unsigned long brk_start; -+ -+extern int pty_output_sigio; -+extern int pty_close_sigio; -+ -+extern void stop(void); -+extern void stack_protections(unsigned long address); -+extern void task_protections(unsigned long address); -+extern int wait_for_stop(int pid, int sig, int cont_type, void *relay); -+extern void *add_signal_handler(int sig, void (*handler)(int)); -+extern int start_fork_tramp(void *arg, unsigned long temp_stack, -+ int clone_flags, int (*tramp)(void *)); -+extern int linux_main(int argc, char **argv); -+extern void set_cmdline(char *cmd); -+extern void input_cb(void (*proc)(void *), void *arg, int arg_len); -+extern int get_pty(void); -+extern void *um_kmalloc(int size); -+extern int switcheroo(int fd, int prot, void *from, void *to, int size); -+extern void setup_machinename(char *machine_out); -+extern void setup_hostinfo(void); -+extern void add_arg(char *cmd_line, char *arg); -+extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); -+extern void init_new_thread_signals(int altstack); -+extern void do_exec(int old_pid, int new_pid); -+extern void tracer_panic(char *msg, ...); -+extern char *get_umid(int only_if_set); -+extern void do_longjmp(void *p, int val); -+extern int detach(int pid, int sig); -+extern int attach(int pid); -+extern void kill_child_dead(int pid); -+extern int cont(int pid); -+extern void check_ptrace(void); -+extern void check_sigio(void); -+extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); -+extern void write_sigio_workaround(void); -+extern void arch_check_bugs(void); -+extern int cpu_feature(char *what, char *buf, int len); -+extern int arch_handle_signal(int sig, union uml_pt_regs *regs); -+extern int arch_fixup(unsigned long address, void *sc_ptr); -+extern int can_do_skas(void); -+extern void arch_init_thread(void); -+extern int setjmp_wrapper(void (*proc)(void *, void *), ...); -+extern int raw(int fd); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/checksum.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/checksum.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/checksum.c 2005-05-03 22:28:14.375424816 +0300 -@@ -0,0 +1,42 @@ -+#include "asm/uaccess.h" -+#include "linux/errno.h" -+ -+extern unsigned int arch_csum_partial(const char *buff, int len, int sum); -+ -+extern unsigned int csum_partial(char *buff, int len, int sum) -+{ -+ return(arch_csum_partial(buff, len, sum)); -+} -+ -+unsigned int csum_partial_copy_to(const char *src, char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if(copy_to_user(dst, src, len)){ -+ *err_ptr = -EFAULT; -+ return(-1); -+ } -+ -+ return(arch_csum_partial(src, len, sum)); -+} -+ -+unsigned int csum_partial_copy_from(const char *src, char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if(copy_from_user(dst, src, len)){ -+ *err_ptr = -EFAULT; -+ return(-1); -+ } -+ -+ return(arch_csum_partial(dst, len, sum)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/common.ld.in -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/common.ld.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/common.ld.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,53 @@ -+ .kstrtab : { *(.kstrtab) } -+ -+ . = ALIGN(16); /* Exception table */ -+ __start___ex_table = .; -+ __ex_table : { *(__ex_table) } -+ __stop___ex_table = .; -+ -+ __start___ksymtab = .; /* Kernel symbol table */ -+ __ksymtab : { *(__ksymtab) } -+ __stop___ksymtab = .; -+ -+ .unprotected : { *(.unprotected) } -+ . = ALIGN(4096); -+ PROVIDE (_unprotected_end = .); -+ -+ . = ALIGN(4096); -+ __uml_setup_start = .; -+ .uml.setup.init : { *(.uml.setup.init) } -+ __uml_setup_end = .; -+ __uml_help_start = .; -+ .uml.help.init : { *(.uml.help.init) } -+ __uml_help_end = .; -+ __uml_postsetup_start = .; -+ .uml.postsetup.init : { *(.uml.postsetup.init) } -+ __uml_postsetup_end = .; -+ __setup_start = .; -+ .setup.init : { *(.setup.init) } -+ __setup_end = .; -+ __initcall_start = .; -+ .initcall.init : { *(.initcall.init) } -+ __initcall_end = .; -+ __uml_initcall_start = .; -+ .uml.initcall.init : { *(.uml.initcall.init) } -+ __uml_initcall_end = .; -+ __init_end = .; -+ __exitcall_begin = .; -+ .exitcall : { *(.exitcall.exit) } -+ __exitcall_end = .; -+ __uml_exitcall_begin = .; -+ .uml.exitcall : { *(.uml.exitcall.exit) } -+ __uml_exitcall_end = .; -+ -+ __preinit_array_start = .; -+ .preinit_array : { *(.preinit_array) } -+ __preinit_array_end = .; -+ __init_array_start = .; -+ .init_array : { *(.init_array) } -+ __init_array_end = .; -+ __fini_array_start = .; -+ .fini_array : { *(.fini_array) } -+ __fini_array_end = .; -+ -+ .data.init : { *(.data.init) } -Index: linux-2.4.29/arch/um/kernel/config.c.in -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/config.c.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/config.c.in 2005-05-03 22:28:14.406420104 +0300 -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include "init.h" -+ -+static __initdata char *config = "CONFIG"; -+ -+static int __init print_config(char *line, int *add) -+{ -+ printf("%s", config); -+ exit(0); -+} -+ -+__uml_setup("--showconfig", print_config, -+"--showconfig\n" -+" Prints the config file that this UML binary was generated from.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/dyn_link.ld.in -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/dyn_link.ld.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/dyn_link.ld.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,169 @@ -+OUTPUT_FORMAT("ELF_FORMAT") -+OUTPUT_ARCH(ELF_ARCH) -+ENTRY(_start) -+ -+SECTIONS -+{ -+ . = START() + SIZEOF_HEADERS; -+ .interp : { *(.interp) } -+ __binary_start = .; -+ . = ALIGN(4096); /* Init code and data */ -+ _stext = .; -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ -+ . = ALIGN(4096); -+ -+ /* Read-only sections, merged into text segment: */ -+ .hash : { *(.hash) } -+ .dynsym : { *(.dynsym) } -+ .dynstr : { *(.dynstr) } -+ .gnu.version : { *(.gnu.version) } -+ .gnu.version_d : { *(.gnu.version_d) } -+ .gnu.version_r : { *(.gnu.version_r) } -+ .rel.init : { *(.rel.init) } -+ .rela.init : { *(.rela.init) } -+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) } -+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) } -+ .rel.fini : { *(.rel.fini) } -+ .rela.fini : { *(.rela.fini) } -+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) } -+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) } -+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) } -+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) } -+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } -+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } -+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } -+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } -+ .rel.ctors : { *(.rel.ctors) } -+ .rela.ctors : { *(.rela.ctors) } -+ .rel.dtors : { *(.rel.dtors) } -+ .rela.dtors : { *(.rela.dtors) } -+ .rel.got : { *(.rel.got) } -+ .rela.got : { *(.rela.got) } -+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } -+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } -+ .rel.plt : { *(.rel.plt) } -+ .rela.plt : { *(.rela.plt) } -+ .init : { -+ KEEP (*(.init)) -+ } =0x90909090 -+ .plt : { *(.plt) } -+ .text : { -+ *(.text .stub .text.* .gnu.linkonce.t.*) -+ /* .gnu.warning sections are handled specially by elf32.em. */ -+ *(.gnu.warning) -+ } =0x90909090 -+ .fini : { -+ KEEP (*(.fini)) -+ } =0x90909090 -+ -+ PROVIDE (__etext = .); -+ PROVIDE (_etext = .); -+ PROVIDE (etext = .); -+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } -+ .rodata1 : { *(.rodata1) } -+ .eh_frame_hdr : { *(.eh_frame_hdr) } -+ -+ -+ . = ALIGN(4096); -+ PROVIDE (_sdata = .); -+ -+include(`arch/um/kernel/common.ld.in') -+ -+ /* Ensure the __preinit_array_start label is properly aligned. We -+ could instead move the label definition inside the section, but -+ the linker would then create the section even if it turns out to -+ be empty, which isn't pretty. */ -+ . = ALIGN(32 / 8); -+ .preinit_array : { *(.preinit_array) } -+ .init_array : { *(.init_array) } -+ .fini_array : { *(.fini_array) } -+ .data : { -+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ -+ *(.data.init_task) -+ *(.data .data.* .gnu.linkonce.d.*) -+ SORT(CONSTRUCTORS) -+ } -+ .data1 : { *(.data1) } -+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } -+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } -+ .eh_frame : { KEEP (*(.eh_frame)) } -+ .gcc_except_table : { *(.gcc_except_table) } -+ .dynamic : { *(.dynamic) } -+ .ctors : { -+ /* gcc uses crtbegin.o to find the start of -+ the constructors, so we make sure it is -+ first. Because this is a wildcard, it -+ doesn't matter if the user does not -+ actually link against crtbegin.o; the -+ linker won't look for a file to match a -+ wildcard. The wildcard also means that it -+ doesn't matter which directory crtbegin.o -+ is in. */ -+ KEEP (*crtbegin.o(.ctors)) -+ /* We don't want to include the .ctor section from -+ from the crtend.o file until after the sorted ctors. -+ The .ctor section from the crtend file contains the -+ end of ctors marker and it must be last */ -+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors)) -+ KEEP (*(SORT(.ctors.*))) -+ KEEP (*(.ctors)) -+ } -+ .dtors : { -+ KEEP (*crtbegin.o(.dtors)) -+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors)) -+ KEEP (*(SORT(.dtors.*))) -+ KEEP (*(.dtors)) -+ } -+ .jcr : { KEEP (*(.jcr)) } -+ .got : { *(.got.plt) *(.got) } -+ _edata = .; -+ PROVIDE (edata = .); -+ __bss_start = .; -+ .bss : { -+ *(.dynbss) -+ *(.bss .bss.* .gnu.linkonce.b.*) -+ *(COMMON) -+ /* Align here to ensure that the .bss section occupies space up to -+ _end. Align after .bss to ensure correct alignment even if the -+ .bss section disappears because there are no input sections. */ -+ . = ALIGN(32 / 8); -+ . = ALIGN(32 / 8); -+ } -+ _end = .; -+ PROVIDE (end = .); -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+ /* DWARF debug sections. -+ Symbols in the DWARF debugging sections are relative to the beginning -+ of the section so we begin them at 0. */ -+ /* DWARF 1 */ -+ .debug 0 : { *(.debug) } -+ .line 0 : { *(.line) } -+ /* GNU DWARF 1 extensions */ -+ .debug_srcinfo 0 : { *(.debug_srcinfo) } -+ .debug_sfnames 0 : { *(.debug_sfnames) } -+ /* DWARF 1.1 and DWARF 2 */ -+ .debug_aranges 0 : { *(.debug_aranges) } -+ .debug_pubnames 0 : { *(.debug_pubnames) } -+ /* DWARF 2 */ -+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } -+ .debug_abbrev 0 : { *(.debug_abbrev) } -+ .debug_line 0 : { *(.debug_line) } -+ .debug_frame 0 : { *(.debug_frame) } -+ .debug_str 0 : { *(.debug_str) } -+ .debug_loc 0 : { *(.debug_loc) } -+ .debug_macinfo 0 : { *(.debug_macinfo) } -+ /* SGI/MIPS DWARF 2 extensions */ -+ .debug_weaknames 0 : { *(.debug_weaknames) } -+ .debug_funcnames 0 : { *(.debug_funcnames) } -+ .debug_typenames 0 : { *(.debug_typenames) } -+ .debug_varnames 0 : { *(.debug_varnames) } -+} -Index: linux-2.4.29/arch/um/kernel/exec_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/exec_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/exec_kern.c 2005-05-03 22:28:14.408419800 +0300 -@@ -0,0 +1,86 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/slab.h" -+#include "linux/smp_lock.h" -+#include "asm/ptrace.h" -+#include "asm/pgtable.h" -+#include "asm/pgalloc.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "mem_user.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "tlb.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "time_user.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+ -+void flush_thread(void) -+{ -+ CHOOSE_MODE(flush_thread_tt(), flush_thread_skas()); -+} -+ -+void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) -+{ -+ CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); -+} -+ -+extern void log_exec(char **argv, void *tty); -+ -+static int execve1(char *file, char **argv, char **env) -+{ -+ int error; -+ -+#ifdef CONFIG_TTY_LOG -+ log_exec(argv, current->tty); -+#endif -+ error = do_execve(file, argv, env, ¤t->thread.regs); -+ if (error == 0){ -+ current->ptrace &= ~PT_DTRACE; -+ set_cmdline(current_cmd()); -+ } -+ return(error); -+} -+ -+int um_execve(char *file, char **argv, char **env) -+{ -+ int err; -+ -+ err = execve1(file, argv, env); -+ if(!err) -+ do_longjmp(current->thread.exec_buf, 1); -+ return(err); -+} -+ -+int sys_execve(char *file, char **argv, char **env) -+{ -+ int error; -+ char *filename; -+ -+ lock_kernel(); -+ filename = getname((char *) file); -+ error = PTR_ERR(filename); -+ if (IS_ERR(filename)) goto out; -+ error = execve1(filename, argv, env); -+ putname(filename); -+ out: -+ unlock_kernel(); -+ return(error); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/exitcode.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/exitcode.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/exitcode.c 2005-05-03 22:28:14.409419648 +0300 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/ctype.h" -+#include "linux/proc_fs.h" -+#include "asm/uaccess.h" -+ -+/* If read and write race, the read will still atomically read a valid -+ * value. -+ */ -+int uml_exitcode = 0; -+ -+static int read_proc_exitcode(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ len = sprintf(page, "%d\n", uml_exitcode); -+ len -= off; -+ if(len <= off+count) *eof = 1; -+ *start = page + off; -+ if(len > count) len = count; -+ if(len < 0) len = 0; -+ return(len); -+} -+ -+static int write_proc_exitcode(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char *end, buf[sizeof("nnnnn\0")]; -+ int tmp; -+ -+ if(copy_from_user(buf, buffer, count)) -+ return(-EFAULT); -+ tmp = simple_strtol(buf, &end, 0); -+ if((*end != '\0') && !isspace(*end)) -+ return(-EINVAL); -+ uml_exitcode = tmp; -+ return(count); -+} -+ -+static int make_proc_exitcode(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ ent = create_proc_entry("exitcode", 0600, &proc_root); -+ if(ent == NULL){ -+ printk("make_proc_exitcode : Failed to register " -+ "/proc/exitcode\n"); -+ return(0); -+ } -+ -+ ent->read_proc = read_proc_exitcode; -+ ent->write_proc = write_proc_exitcode; -+ -+ return(0); -+} -+ -+__initcall(make_proc_exitcode); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/filehandle.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/filehandle.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/filehandle.c 2005-05-03 22:28:14.410419496 +0300 -@@ -0,0 +1,250 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/slab.h" -+#include "linux/list.h" -+#include "linux/spinlock.h" -+#include "linux/fs.h" -+#include "linux/errno.h" -+#include "filehandle.h" -+#include "os.h" -+#include "kern_util.h" -+ -+static spinlock_t open_files_lock = SPIN_LOCK_UNLOCKED; -+static struct list_head open_files = LIST_HEAD_INIT(open_files); -+ -+#define NUM_RECLAIM 128 -+ -+static void reclaim_fds(void) -+{ -+ struct file_handle *victim; -+ int closed = NUM_RECLAIM; -+ -+ spin_lock(&open_files_lock); -+ while(!list_empty(&open_files) && closed--){ -+ victim = list_entry(open_files.prev, struct file_handle, list); -+ os_close_file(victim->fd); -+ victim->fd = -1; -+ list_del_init(&victim->list); -+ } -+ spin_unlock(&open_files_lock); -+} -+ -+int open_file(char *name, struct openflags flags, int mode) -+{ -+ int fd; -+ -+ fd = os_open_file(name, flags, mode); -+ if(fd != -EMFILE) -+ return(fd); -+ -+ reclaim_fds(); -+ fd = os_open_file(name, flags, mode); -+ -+ return(fd); -+} -+ -+void *open_dir(char *file) -+{ -+ void *dir; -+ int err; -+ -+ dir = os_open_dir(file, &err); -+ if(dir != NULL) -+ return(dir); -+ if(err != -EMFILE) -+ return(ERR_PTR(err)); -+ -+ reclaim_fds(); -+ -+ dir = os_open_dir(file, &err); -+ if(dir == NULL) -+ dir = ERR_PTR(err); -+ -+ return(dir); -+} -+ -+void not_reclaimable(struct file_handle *fh) -+{ -+ char *name; -+ -+ if(fh->get_name == NULL) -+ return; -+ -+ if(list_empty(&fh->list)){ -+ name = (*fh->get_name)(fh->inode); -+ if(name != NULL){ -+ fh->fd = open_file(name, fh->flags, 0); -+ kfree(name); -+ } -+ else printk("File descriptor %d has no name\n", fh->fd); -+ } -+ else { -+ spin_lock(&open_files_lock); -+ list_del_init(&fh->list); -+ spin_unlock(&open_files_lock); -+ } -+} -+ -+void is_reclaimable(struct file_handle *fh, char *(name_proc)(struct inode *), -+ struct inode *inode) -+{ -+ fh->get_name = name_proc; -+ fh->inode = inode; -+ -+ spin_lock(&open_files_lock); -+ list_add(&fh->list, &open_files); -+ spin_unlock(&open_files_lock); -+} -+ -+static int active_handle(struct file_handle *fh) -+{ -+ int fd; -+ char *name; -+ -+ if(!list_empty(&fh->list)) -+ list_move(&fh->list, &open_files); -+ -+ if(fh->fd != -1) -+ return(0); -+ -+ if(fh->inode == NULL) -+ return(-ENOENT); -+ -+ name = (*fh->get_name)(fh->inode); -+ if(name == NULL) -+ return(-ENOMEM); -+ -+ fd = open_file(name, fh->flags, 0); -+ kfree(name); -+ if(fd < 0) -+ return(fd); -+ -+ fh->fd = fd; -+ is_reclaimable(fh, fh->get_name, fh->inode); -+ -+ return(0); -+} -+ -+int filehandle_fd(struct file_handle *fh) -+{ -+ int err; -+ -+ err = active_handle(fh); -+ if(err) -+ return(err); -+ -+ return(fh->fd); -+} -+ -+static void init_fh(struct file_handle *fh, int fd, struct openflags flags) -+{ -+ flags.c = 0; -+ *fh = ((struct file_handle) { .list = LIST_HEAD_INIT(fh->list), -+ .fd = fd, -+ .get_name = NULL, -+ .inode = NULL, -+ .flags = flags }); -+} -+ -+int open_filehandle(char *name, struct openflags flags, int mode, -+ struct file_handle *fh) -+{ -+ int fd; -+ -+ fd = open_file(name, flags, mode); -+ if(fd < 0) -+ return(fd); -+ -+ init_fh(fh, fd, flags); -+ return(0); -+} -+ -+int close_file(struct file_handle *fh) -+{ -+ spin_lock(&open_files_lock); -+ list_del(&fh->list); -+ spin_unlock(&open_files_lock); -+ -+ os_close_file(fh->fd); -+ -+ fh->fd = -1; -+ return(0); -+} -+ -+int read_file(struct file_handle *fh, unsigned long long offset, char *buf, -+ int len) -+{ -+ int err; -+ -+ err = active_handle(fh); -+ if(err) -+ return(err); -+ -+ err = os_seek_file(fh->fd, offset); -+ if(err) -+ return(err); -+ -+ return(os_read_file(fh->fd, buf, len)); -+} -+ -+int write_file(struct file_handle *fh, unsigned long long offset, -+ const char *buf, int len) -+{ -+ int err; -+ -+ err = active_handle(fh); -+ if(err) -+ return(err); -+ -+ if(offset != -1) -+ err = os_seek_file(fh->fd, offset); -+ if(err) -+ return(err); -+ -+ return(os_write_file(fh->fd, buf, len)); -+} -+ -+int truncate_file(struct file_handle *fh, unsigned long long size) -+{ -+ int err; -+ -+ err = active_handle(fh); -+ if(err) -+ return(err); -+ -+ return(os_truncate_fd(fh->fd, size)); -+} -+ -+int make_pipe(struct file_handle *fhs) -+{ -+ int fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err && (err != -EMFILE)) -+ return(err); -+ -+ if(err){ -+ reclaim_fds(); -+ err = os_pipe(fds, 1, 1); -+ } -+ if(err) -+ return(err); -+ -+ init_fh(&fhs[0], fds[0], OPENFLAGS()); -+ init_fh(&fhs[1], fds[1], OPENFLAGS()); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/frame.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/frame.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/frame.c 2005-05-03 22:28:14.412419192 +0300 -@@ -0,0 +1,343 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <string.h> -+#include <signal.h> -+#include <wait.h> -+#include <sched.h> -+#include <errno.h> -+#include <sys/ptrace.h> -+#include <sys/syscall.h> -+#include <sys/mman.h> -+#include <asm/page.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+#include "frame_user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "ptrace_user.h" -+#include "os.h" -+ -+static int capture_stack(int (*child)(void *arg), void *arg, void *sp, -+ unsigned long top, void **data_out) -+{ -+ unsigned long regs[FRAME_SIZE]; -+ int pid, status, n, len; -+ -+ /* Start the child as a thread */ -+ pid = clone(child, sp, CLONE_VM | SIGCHLD, arg); -+ if(pid < 0){ -+ printf("capture_stack : clone failed - errno = %d\n", errno); -+ exit(1); -+ } -+ -+ /* Wait for it to stop itself and continue it with a SIGUSR1 to force -+ * it into the signal handler. -+ */ -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if(n < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ fprintf(stderr, "capture_stack : Expected SIGSTOP, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ if(ptrace(PTRACE_CONT, pid, 0, SIGUSR1) < 0){ -+ printf("capture_stack : PTRACE_CONT failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ /* Wait for it to stop itself again and grab its registers again. -+ * At this point, the handler has stuffed the addresses of -+ * sig, sc, and SA_RESTORER in raw. -+ */ -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if(n < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ fprintf(stderr, "capture_stack : Expected SIGSTOP, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ if(ptrace(PTRACE_GETREGS, pid, 0, regs) < 0){ -+ printf("capture_stack : PTRACE_GETREGS failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ /* It has outlived its usefulness, so continue it so it can exit */ -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0){ -+ printf("capture_stack : PTRACE_CONT failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ CATCH_EINTR(n = waitpid(pid, &status, 0)); -+ if(n < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != 9)){ -+ printf("capture_stack : Expected exit signal 9, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ -+ /* The frame that we want is the top of the signal stack */ -+ -+ len = top - PT_SP(regs); -+ *data_out = malloc(len); -+ if(*data_out == NULL){ -+ printf("capture_stack : malloc failed - errno = %d\n", errno); -+ exit(1); -+ } -+ memcpy(*data_out, (void *) PT_SP(regs), len); -+ -+ return(len); -+} -+ -+struct common_raw { -+ void *stack; -+ int size; -+ unsigned long sig; -+ unsigned long sr; -+ unsigned long sp; -+ struct arch_frame_data_raw arch; -+}; -+ -+#define SA_RESTORER (0x04000000) -+ -+typedef unsigned long old_sigset_t; -+ -+struct old_sigaction { -+ __sighandler_t handler; -+ old_sigset_t sa_mask; -+ unsigned long sa_flags; -+ void (*sa_restorer)(void); -+}; -+ -+static void child_common(struct common_raw *common, sighandler_t handler, -+ int restorer, int flags) -+{ -+ stack_t ss = ((stack_t) { .ss_sp = common->stack, -+ .ss_flags = 0, -+ .ss_size = common->size }); -+ int err; -+ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ printf("PTRACE_TRACEME failed, errno = %d\n", errno); -+ } -+ if(sigaltstack(&ss, NULL) < 0){ -+ printf("sigaltstack failed - errno = %d\n", errno); -+ kill(getpid(), SIGKILL); -+ } -+ -+ if(restorer){ -+ struct sigaction sa; -+ -+ sa.sa_handler = handler; -+ sigemptyset(&sa.sa_mask); -+ sa.sa_flags = SA_ONSTACK | flags; -+ err = sigaction(SIGUSR1, &sa, NULL); -+ } -+ else { -+ struct old_sigaction sa; -+ -+ sa.handler = handler; -+ sa.sa_mask = 0; -+ sa.sa_flags = (SA_ONSTACK | flags) & ~SA_RESTORER; -+ err = syscall(__NR_sigaction, SIGUSR1, &sa, NULL); -+ } -+ -+ if(err < 0){ -+ printf("sigaction failed - errno = %d\n", errno); -+ kill(getpid(), SIGKILL); -+ } -+ -+ os_stop_process(os_getpid()); -+} -+ -+/* Changed only during early boot */ -+struct sc_frame signal_frame_sc; -+ -+struct sc_frame signal_frame_sc_sr; -+ -+struct sc_frame_raw { -+ struct common_raw common; -+ unsigned long sc; -+ int restorer; -+}; -+ -+/* Changed only during early boot */ -+static struct sc_frame_raw *raw_sc = NULL; -+ -+static void sc_handler(int sig, struct sigcontext sc) -+{ -+ raw_sc->common.sig = (unsigned long) &sig; -+ raw_sc->common.sr = frame_restorer(); -+ raw_sc->common.sp = frame_sp(); -+ raw_sc->sc = (unsigned long) ≻ -+ setup_arch_frame_raw(&raw_sc->common.arch, &sc + 1, raw_sc->common.sr); -+ -+ os_stop_process(os_getpid()); -+ kill(getpid(), SIGKILL); -+} -+ -+static int sc_child(void *arg) -+{ -+ raw_sc = arg; -+ child_common(&raw_sc->common, (sighandler_t) sc_handler, -+ raw_sc->restorer, 0); -+ return(-1); -+} -+ -+/* Changed only during early boot */ -+struct si_frame signal_frame_si; -+ -+struct si_frame_raw { -+ struct common_raw common; -+ unsigned long sip; -+ unsigned long si; -+ unsigned long ucp; -+ unsigned long uc; -+}; -+ -+/* Changed only during early boot */ -+static struct si_frame_raw *raw_si = NULL; -+ -+static void si_handler(int sig, siginfo_t *si, struct ucontext *ucontext) -+{ -+ raw_si->common.sig = (unsigned long) &sig; -+ raw_si->common.sr = frame_restorer(); -+ raw_si->common.sp = frame_sp(); -+ raw_si->sip = (unsigned long) &si; -+ raw_si->si = (unsigned long) si; -+ raw_si->ucp = (unsigned long) &ucontext; -+ raw_si->uc = (unsigned long) ucontext; -+ setup_arch_frame_raw(&raw_si->common.arch, -+ ucontext->uc_mcontext.fpregs, raw_si->common.sr); -+ -+ os_stop_process(os_getpid()); -+ kill(getpid(), SIGKILL); -+} -+ -+static int si_child(void *arg) -+{ -+ raw_si = arg; -+ child_common(&raw_si->common, (sighandler_t) si_handler, 1, -+ SA_SIGINFO); -+ return(-1); -+} -+ -+static int relative_sr(unsigned long sr, int sr_index, void *stack, -+ void *framep) -+{ -+ unsigned long *srp = (unsigned long *) sr; -+ unsigned long frame = (unsigned long) framep; -+ -+ if((*srp & PAGE_MASK) == (unsigned long) stack){ -+ *srp -= sr; -+ *((unsigned long *) (frame + sr_index)) = *srp; -+ return(1); -+ } -+ else return(0); -+} -+ -+static unsigned long capture_stack_common(int (*proc)(void *), void *arg, -+ struct common_raw *common_in, -+ void *top, void *sigstack, -+ int stack_len, -+ struct frame_common *common_out) -+{ -+ unsigned long sig_top = (unsigned long) sigstack + stack_len, base; -+ -+ common_in->stack = (void *) sigstack; -+ common_in->size = stack_len; -+ common_out->len = capture_stack(proc, arg, top, sig_top, -+ &common_out->data); -+ base = sig_top - common_out->len; -+ common_out->sig_index = common_in->sig - base; -+ common_out->sp_index = common_in->sp - base; -+ common_out->sr_index = common_in->sr - base; -+ common_out->sr_relative = relative_sr(common_in->sr, -+ common_out->sr_index, sigstack, -+ common_out->data); -+ return(base); -+} -+ -+void capture_signal_stack(void) -+{ -+ struct sc_frame_raw raw_sc; -+ struct si_frame_raw raw_si; -+ void *stack, *sigstack; -+ unsigned long top, base; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ sigstack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if((stack == MAP_FAILED) || (sigstack == MAP_FAILED)){ -+ printf("capture_signal_stack : mmap failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ -+ /* Get the sigcontext, no sigrestorer layout */ -+ raw_sc.restorer = 0; -+ base = capture_stack_common(sc_child, &raw_sc, &raw_sc.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_sc.common); -+ -+ signal_frame_sc.sc_index = raw_sc.sc - base; -+ setup_arch_frame(&raw_sc.common.arch, &signal_frame_sc.common.arch); -+ -+ /* Ditto for the sigcontext, sigrestorer layout */ -+ raw_sc.restorer = 1; -+ base = capture_stack_common(sc_child, &raw_sc, &raw_sc.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_sc_sr.common); -+ signal_frame_sc_sr.sc_index = raw_sc.sc - base; -+ setup_arch_frame(&raw_sc.common.arch, &signal_frame_sc_sr.common.arch); -+ -+ /* And the siginfo layout */ -+ -+ base = capture_stack_common(si_child, &raw_si, &raw_si.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_si.common); -+ signal_frame_si.sip_index = raw_si.sip - base; -+ signal_frame_si.si_index = raw_si.si - base; -+ signal_frame_si.ucp_index = raw_si.ucp - base; -+ signal_frame_si.uc_index = raw_si.uc - base; -+ setup_arch_frame(&raw_si.common.arch, &signal_frame_si.common.arch); -+ -+ if((munmap(stack, PAGE_SIZE) < 0) || -+ (munmap(sigstack, PAGE_SIZE) < 0)){ -+ printf("capture_signal_stack : munmap failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/frame_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/frame_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/frame_kern.c 2005-05-03 22:28:14.413419040 +0300 -@@ -0,0 +1,173 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/signal.h" -+#include "asm/ucontext.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "sysdep/ptrace.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) -+{ -+ if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) -+ return -EFAULT; -+ if (from->si_code < 0) -+ return __copy_to_user(to, from, sizeof(siginfo_t)); -+ else { -+ int err; -+ -+ /* If you change siginfo_t structure, please be sure -+ this code is fixed accordingly. -+ It should never copy any pad contained in the structure -+ to avoid security leaks, but must copy the generic -+ 3 ints plus the relevant union member. */ -+ err = __put_user(from->si_signo, &to->si_signo); -+ err |= __put_user(from->si_errno, &to->si_errno); -+ err |= __put_user((short)from->si_code, &to->si_code); -+ /* First 32bits of unions are always present. */ -+ err |= __put_user(from->si_pid, &to->si_pid); -+ switch (from->si_code >> 16) { -+ case __SI_FAULT >> 16: -+ break; -+ case __SI_CHLD >> 16: -+ err |= __put_user(from->si_utime, &to->si_utime); -+ err |= __put_user(from->si_stime, &to->si_stime); -+ err |= __put_user(from->si_status, &to->si_status); -+ default: -+ err |= __put_user(from->si_uid, &to->si_uid); -+ break; -+ } -+ return err; -+ } -+} -+ -+static int copy_restorer(void (*restorer)(void), unsigned long start, -+ unsigned long sr_index, int sr_relative) -+{ -+ unsigned long sr; -+ -+ if(sr_relative){ -+ sr = (unsigned long) restorer; -+ sr += start + sr_index; -+ restorer = (void (*)(void)) sr; -+ } -+ -+ return(copy_to_user((void *) (start + sr_index), &restorer, -+ sizeof(restorer))); -+} -+ -+extern int userspace_pid[]; -+ -+static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from, -+ struct arch_frame_data *arch) -+{ -+ return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), -+ arch), -+ copy_sc_to_user_skas(userspace_pid[0], to, fp, -+ &from->regs, -+ current->thread.cr2, -+ current->thread.err))); -+} -+ -+static int copy_ucontext_to_user(struct ucontext *uc, void *fp, sigset_t *set, -+ unsigned long sp) -+{ -+ int err = 0; -+ -+ err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); -+ err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); -+ err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); -+ err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs, -+ &signal_frame_si.common.arch); -+ err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); -+ return(err); -+} -+ -+int setup_signal_stack_si(unsigned long stack_top, int sig, -+ unsigned long handler, void (*restorer)(void), -+ struct pt_regs *regs, siginfo_t *info, -+ sigset_t *mask) -+{ -+ unsigned long start; -+ void *sip, *ucp, *fp; -+ -+ start = stack_top - signal_frame_si.common.len; -+ sip = (void *) (start + signal_frame_si.si_index); -+ ucp = (void *) (start + signal_frame_si.uc_index); -+ fp = (void *) (((unsigned long) ucp) + sizeof(struct ucontext)); -+ -+ if(restorer == NULL) -+ panic("setup_signal_stack_si - no restorer"); -+ -+ if(copy_to_user((void *) start, signal_frame_si.common.data, -+ signal_frame_si.common.len) || -+ copy_to_user((void *) (start + signal_frame_si.common.sig_index), -+ &sig, sizeof(sig)) || -+ copy_siginfo_to_user(sip, info) || -+ copy_to_user((void *) (start + signal_frame_si.sip_index), &sip, -+ sizeof(sip)) || -+ copy_ucontext_to_user(ucp, fp, mask, PT_REGS_SP(regs)) || -+ copy_to_user((void *) (start + signal_frame_si.ucp_index), &ucp, -+ sizeof(ucp)) || -+ copy_restorer(restorer, start, signal_frame_si.common.sr_index, -+ signal_frame_si.common.sr_relative)) -+ return(1); -+ -+ PT_REGS_IP(regs) = handler; -+ PT_REGS_SP(regs) = start + signal_frame_si.common.sp_index; -+ return(0); -+} -+ -+int setup_signal_stack_sc(unsigned long stack_top, int sig, -+ unsigned long handler, void (*restorer)(void), -+ struct pt_regs *regs, sigset_t *mask) -+{ -+ struct frame_common *frame = &signal_frame_sc_sr.common; -+ void *user_sc; -+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); -+ unsigned long sigs, sr; -+ unsigned long start = stack_top - frame->len - sig_size; -+ -+ user_sc = (void *) (start + signal_frame_sc_sr.sc_index); -+ if(restorer == NULL){ -+ frame = &signal_frame_sc.common; -+ user_sc = (void *) (start + signal_frame_sc.sc_index); -+ sr = (unsigned long) frame->data; -+ sr += frame->sr_index; -+ sr = *((unsigned long *) sr); -+ restorer = ((void (*)(void)) sr); -+ } -+ -+ sigs = start + frame->len; -+ if(copy_to_user((void *) start, frame->data, frame->len) || -+ copy_to_user((void *) (start + frame->sig_index), &sig, -+ sizeof(sig)) || -+ copy_sc_to_user(user_sc, NULL, regs, -+ &signal_frame_sc.common.arch) || -+ copy_to_user(sc_sigmask(user_sc), mask, sizeof(mask->sig[0])) || -+ copy_to_user((void *) sigs, &mask->sig[1], sig_size) || -+ copy_restorer(restorer, start, frame->sr_index, frame->sr_relative)) -+ return(1); -+ -+ PT_REGS_IP(regs) = handler; -+ PT_REGS_SP(regs) = start + frame->sp_index; -+ -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/gmon_syms.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/gmon_syms.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/gmon_syms.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+ -+extern void __bb_init_func(void *); -+EXPORT_SYMBOL(__bb_init_func); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/gprof_syms.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/gprof_syms.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/gprof_syms.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+ -+extern void mcount(void); -+EXPORT_SYMBOL(mcount); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/helper.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/helper.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/helper.c 2005-05-03 22:28:14.416418584 +0300 -@@ -0,0 +1,167 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <sched.h> -+#include <sys/signal.h> -+#include <sys/wait.h> -+#include "user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+ -+struct helper_data { -+ void (*pre_exec)(void*); -+ void *pre_data; -+ char **argv; -+ int fd; -+}; -+ -+/* Debugging aid, changed only from gdb */ -+int helper_pause = 0; -+ -+static void helper_hup(int sig) -+{ -+} -+ -+static int helper_child(void *arg) -+{ -+ struct helper_data *data = arg; -+ char **argv = data->argv; -+ int errval; -+ -+ if(helper_pause){ -+ signal(SIGHUP, helper_hup); -+ pause(); -+ } -+ if(data->pre_exec != NULL) -+ (*data->pre_exec)(data->pre_data); -+ execvp(argv[0], argv); -+ errval = errno; -+ printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); -+ os_write_file(data->fd, &errval, sizeof(errval)); -+ os_kill_process(os_getpid(), 0); -+ return(0); -+} -+ -+/* XXX The alloc_stack here breaks if this is called in the tracing thread */ -+ -+int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, -+ unsigned long *stack_out) -+{ -+ struct helper_data data; -+ unsigned long stack, sp; -+ int pid, fds[2], err, n; -+ -+ if((stack_out != NULL) && (*stack_out != 0)) -+ stack = *stack_out; -+ else stack = alloc_stack(0, um_in_interrupt()); -+ if(stack == 0) -+ return(-ENOMEM); -+ -+ err = os_pipe(fds, 1, 0); -+ if(err < 0){ -+ printk("run_helper : pipe failed, err = %d\n", -err); -+ goto out_free; -+ } -+ -+ err = os_set_exec_close(fds[1], 1); -+ if(err < 0){ -+ printk("run_helper : setting FD_CLOEXEC failed, err = %d\n", -+ -err); -+ goto out_close; -+ } -+ -+ sp = stack + page_size() - sizeof(void *); -+ data.pre_exec = pre_exec; -+ data.pre_data = pre_data; -+ data.argv = argv; -+ data.fd = fds[1]; -+ pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); -+ if(pid < 0){ -+ printk("run_helper : clone failed, errno = %d\n", errno); -+ err = -errno; -+ goto out_close; -+ } -+ -+ os_close_file(fds[1]); -+ n = os_read_file(fds[0], &err, sizeof(err)); -+ if(n < 0){ -+ printk("run_helper : read on pipe failed, err = %d\n", -n); -+ err = n; -+ os_kill_process(pid, 1); -+ } -+ else if(n != 0){ -+ CATCH_EINTR(n = waitpid(pid, NULL, 0)); -+ pid = -errno; -+ } -+ err = pid; -+ -+ out_close: -+ os_close_file(fds[0]); -+ out_free: -+ if(stack_out == NULL) -+ free_stack(stack, 0); -+ else *stack_out = stack; -+ return(err); -+} -+ -+int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, -+ unsigned long *stack_out, int stack_order) -+{ -+ unsigned long stack, sp; -+ int pid, status; -+ -+ stack = alloc_stack(stack_order, um_in_interrupt()); -+ if(stack == 0) return(-ENOMEM); -+ -+ sp = stack + (page_size() << stack_order) - sizeof(void *); -+ pid = clone(proc, (void *) sp, flags | SIGCHLD, arg); -+ if(pid < 0){ -+ printk("run_helper_thread : clone failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ if(stack_out == NULL){ -+ CATCH_EINTR(pid = waitpid(pid, &status, 0)); -+ if(pid < 0){ -+ printk("run_helper_thread - wait failed, errno = %d\n", -+ errno); -+ pid = -errno; -+ } -+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) -+ printk("run_helper_thread - thread returned status " -+ "0x%x\n", status); -+ free_stack(stack, stack_order); -+ } -+ else *stack_out = stack; -+ return(pid); -+} -+ -+int helper_wait(int pid, int block) -+{ -+ int ret; -+ -+ CATCH_EINTR(ret = waitpid(pid, NULL, WNOHANG)); -+ if(ret < 0){ -+ printk("helper_wait : waitpid failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/initrd_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/initrd_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/initrd_kern.c 2005-05-03 22:28:14.417418432 +0300 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/bootmem.h" -+#include "linux/blk.h" -+#include "asm/types.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "initrd.h" -+#include "init.h" -+#include "os.h" -+ -+/* Changed by uml_initrd_setup, which is a setup */ -+static char *initrd __initdata = NULL; -+ -+static int __init read_initrd(void) -+{ -+ void *area; -+ long long size; -+ int err; -+ -+ if(initrd == NULL) return 0; -+ err = os_file_size(initrd, &size); -+ if(err) return 0; -+ area = alloc_bootmem(size); -+ if(area == NULL) return 0; -+ if(load_initrd(initrd, area, size) == -1) return 0; -+ initrd_start = (unsigned long) area; -+ initrd_end = initrd_start + size; -+ return 0; -+} -+ -+__uml_postsetup(read_initrd); -+ -+static int __init uml_initrd_setup(char *line, int *add) -+{ -+ initrd = line; -+ return 0; -+} -+ -+__uml_setup("initrd=", uml_initrd_setup, -+"initrd=<initrd image>\n" -+" This is used to boot UML from an initrd image. The argument is the\n" -+" name of the file containing the image.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/initrd_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/initrd_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/initrd_user.c 2005-05-03 22:28:14.418418280 +0300 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <errno.h> -+ -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "initrd.h" -+#include "os.h" -+ -+int load_initrd(char *filename, void *buf, int size) -+{ -+ int fd, n; -+ -+ fd = os_open_file(filename, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Opening '%s' failed - err = %d\n", filename, -fd); -+ return(-1); -+ } -+ n = os_read_file(fd, buf, size); -+ if(n != size){ -+ printk("Read of %d bytes from '%s' failed, err = %d\n", size, -+ filename, -n); -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/init_task.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/init_task.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/init_task.c 2005-05-03 22:28:14.419418128 +0300 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "linux/sched.h" -+#include "linux/version.h" -+#include "asm/uaccess.h" -+#include "asm/pgtable.h" -+#include "user_util.h" -+#include "mem_user.h" -+ -+static struct fs_struct init_fs = INIT_FS; -+static struct files_struct init_files = INIT_FILES; -+static struct signal_struct init_signals = INIT_SIGNALS; -+struct mm_struct init_mm = INIT_MM(init_mm); -+ -+/* -+ * Initial task structure. -+ * -+ * We need to make sure that this is 16384-byte aligned due to the -+ * way process stacks are handled. This is done by having a special -+ * "init_task" linker map entry.. -+ */ -+ -+union task_union init_task_union -+__attribute__((__section__(".data.init_task"))) = -+{ INIT_TASK(init_task_union.task) }; -+ -+struct task_struct *alloc_task_struct(void) -+{ -+ return((struct task_struct *) -+ __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER)); -+} -+ -+void unprotect_stack(unsigned long stack) -+{ -+ protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, -+ 1, 1, 0, 1); -+} -+ -+void free_task_struct(struct task_struct *task) -+{ -+ /* free_pages decrements the page counter and only actually frees -+ * the pages if they are now not accessed by anything. -+ */ -+ free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/irq.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/irq.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/irq.c 2005-05-03 22:28:14.422417672 +0300 -@@ -0,0 +1,840 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: -+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "linux/irq.h" -+#include "linux/kernel_stat.h" -+#include "linux/interrupt.h" -+#include "linux/random.h" -+#include "linux/slab.h" -+#include "linux/file.h" -+#include "linux/proc_fs.h" -+#include "linux/init.h" -+#include "linux/seq_file.h" -+#include "asm/irq.h" -+#include "asm/hw_irq.h" -+#include "asm/hardirq.h" -+#include "asm/atomic.h" -+#include "asm/signal.h" -+#include "asm/system.h" -+#include "asm/errno.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+ -+static void register_irq_proc (unsigned int irq); -+ -+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = -+ { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; -+ -+/* -+ * Generic no controller code -+ */ -+ -+static void enable_none(unsigned int irq) { } -+static unsigned int startup_none(unsigned int irq) { return 0; } -+static void disable_none(unsigned int irq) { } -+static void ack_none(unsigned int irq) -+{ -+/* -+ * 'what should we do if we get a hw irq event on an illegal vector'. -+ * each architecture has to answer this themselves, it doesnt deserve -+ * a generic callback i think. -+ */ -+#if CONFIG_X86 -+ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); -+#ifdef CONFIG_X86_LOCAL_APIC -+ /* -+ * Currently unexpected vectors happen only on SMP and APIC. -+ * We _must_ ack these because every local APIC has only N -+ * irq slots per priority level, and a 'hanging, unacked' IRQ -+ * holds up an irq slot - in excessive cases (when multiple -+ * unexpected vectors occur) that might lock up the APIC -+ * completely. -+ */ -+ ack_APIC_irq(); -+#endif -+#endif -+} -+ -+/* startup is the same as "enable", shutdown is same as "disable" */ -+#define shutdown_none disable_none -+#define end_none enable_none -+ -+struct hw_interrupt_type no_irq_type = { -+ "none", -+ startup_none, -+ shutdown_none, -+ enable_none, -+ disable_none, -+ ack_none, -+ end_none -+}; -+ -+/* -+ * Generic, controller-independent functions: -+ */ -+ -+int get_irq_list(char *buf) -+{ -+ int i, j; -+ unsigned long flags; -+ struct irqaction * action; -+ char *p = buf; -+ -+ p += sprintf(p, " "); -+ for (j=0; j<smp_num_cpus; j++) -+ p += sprintf(p, "CPU%d ",j); -+ *p++ = '\n'; -+ -+ for (i = 0 ; i < NR_IRQS ; i++) { -+ spin_lock_irqsave(&irq_desc[i].lock, flags); -+ action = irq_desc[i].action; -+ if (!action) -+ goto end; -+ p += sprintf(p, "%3d: ",i); -+#ifndef CONFIG_SMP -+ p += sprintf(p, "%10u ", kstat_irqs(i)); -+#else -+ for (j = 0; j < smp_num_cpus; j++) -+ p += sprintf(p, "%10u ", -+ kstat.irqs[cpu_logical_map(j)][i]); -+#endif -+ p += sprintf(p, " %14s", irq_desc[i].handler->typename); -+ p += sprintf(p, " %s", action->name); -+ -+ for (action=action->next; action; action = action->next) -+ p += sprintf(p, ", %s", action->name); -+ *p++ = '\n'; -+ end: -+ spin_unlock_irqrestore(&irq_desc[i].lock, flags); -+ } -+ p += sprintf(p, "\n"); -+#ifdef notdef -+#if CONFIG_SMP -+ p += sprintf(p, "LOC: "); -+ for (j = 0; j < smp_num_cpus; j++) -+ p += sprintf(p, "%10u ", -+ apic_timer_irqs[cpu_logical_map(j)]); -+ p += sprintf(p, "\n"); -+#endif -+#endif -+ p += sprintf(p, "ERR: %10lu\n", 0L); -+ return p - buf; -+} -+ -+/* -+ * This should really return information about whether -+ * we should do bottom half handling etc. Right now we -+ * end up _always_ checking the bottom half, which is a -+ * waste of time and is not what some drivers would -+ * prefer. -+ */ -+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, -+ struct irqaction * action) -+{ -+ int status; -+ int cpu = smp_processor_id(); -+ -+ irq_enter(cpu, irq); -+ -+ status = 1; /* Force the "do bottom halves" bit */ -+ -+ if (!(action->flags & SA_INTERRUPT)) -+ __sti(); -+ -+ do { -+ status |= action->flags; -+ action->handler(irq, action->dev_id, regs); -+ action = action->next; -+ } while (action); -+ if (status & SA_SAMPLE_RANDOM) -+ add_interrupt_randomness(irq); -+ __cli(); -+ -+ irq_exit(cpu, irq); -+ -+ return status; -+} -+ -+/* -+ * Generic enable/disable code: this just calls -+ * down into the PIC-specific version for the actual -+ * hardware disable after having gotten the irq -+ * controller lock. -+ */ -+ -+/** -+ * disable_irq_nosync - disable an irq without waiting -+ * @irq: Interrupt to disable -+ * -+ * Disable the selected interrupt line. Disables of an interrupt -+ * stack. Unlike disable_irq(), this function does not ensure existing -+ * instances of the IRQ handler have completed before returning. -+ * -+ * This function may be called from IRQ context. -+ */ -+ -+void inline disable_irq_nosync(unsigned int irq) -+{ -+ irq_desc_t *desc = irq_desc + irq; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&desc->lock, flags); -+ if (!desc->depth++) { -+ desc->status |= IRQ_DISABLED; -+ desc->handler->disable(irq); -+ } -+ spin_unlock_irqrestore(&desc->lock, flags); -+} -+ -+/** -+ * disable_irq - disable an irq and wait for completion -+ * @irq: Interrupt to disable -+ * -+ * Disable the selected interrupt line. Disables of an interrupt -+ * stack. That is for two disables you need two enables. This -+ * function waits for any pending IRQ handlers for this interrupt -+ * to complete before returning. If you use this function while -+ * holding a resource the IRQ handler may need you will deadlock. -+ * -+ * This function may be called - with care - from IRQ context. -+ */ -+ -+void disable_irq(unsigned int irq) -+{ -+ disable_irq_nosync(irq); -+ -+ if (!local_irq_count(smp_processor_id())) { -+ do { -+ barrier(); -+ } while (irq_desc[irq].status & IRQ_INPROGRESS); -+ } -+} -+ -+/** -+ * enable_irq - enable interrupt handling on an irq -+ * @irq: Interrupt to enable -+ * -+ * Re-enables the processing of interrupts on this IRQ line -+ * providing no disable_irq calls are now in effect. -+ * -+ * This function may be called from IRQ context. -+ */ -+ -+void enable_irq(unsigned int irq) -+{ -+ irq_desc_t *desc = irq_desc + irq; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&desc->lock, flags); -+ switch (desc->depth) { -+ case 1: { -+ unsigned int status = desc->status & ~IRQ_DISABLED; -+ desc->status = status; -+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { -+ desc->status = status | IRQ_REPLAY; -+ hw_resend_irq(desc->handler,irq); -+ } -+ desc->handler->enable(irq); -+ /* fall-through */ -+ } -+ default: -+ desc->depth--; -+ break; -+ case 0: -+ printk(KERN_ERR "enable_irq() unbalanced from %p\n", -+ __builtin_return_address(0)); -+ } -+ spin_unlock_irqrestore(&desc->lock, flags); -+} -+ -+/* -+ * do_IRQ handles all normal device IRQ's (the special -+ * SMP cross-CPU interrupts have their own specific -+ * handlers). -+ */ -+unsigned int do_IRQ(int irq, union uml_pt_regs *regs) -+{ -+ /* -+ * 0 return value means that this irq is already being -+ * handled by some other CPU. (or is disabled) -+ */ -+ int cpu = smp_processor_id(); -+ irq_desc_t *desc = irq_desc + irq; -+ struct irqaction * action; -+ unsigned int status; -+ -+ kstat.irqs[cpu][irq]++; -+ spin_lock(&desc->lock); -+ desc->handler->ack(irq); -+ /* -+ REPLAY is when Linux resends an IRQ that was dropped earlier -+ WAITING is used by probe to mark irqs that are being tested -+ */ -+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); -+ status |= IRQ_PENDING; /* we _want_ to handle it */ -+ -+ /* -+ * If the IRQ is disabled for whatever reason, we cannot -+ * use the action we have. -+ */ -+ action = NULL; -+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { -+ action = desc->action; -+ status &= ~IRQ_PENDING; /* we commit to handling */ -+ status |= IRQ_INPROGRESS; /* we are handling it */ -+ } -+ desc->status = status; -+ -+ /* -+ * If there is no IRQ handler or it was disabled, exit early. -+ Since we set PENDING, if another processor is handling -+ a different instance of this same irq, the other processor -+ will take care of it. -+ */ -+ if (!action) -+ goto out; -+ -+ /* -+ * Edge triggered interrupts need to remember -+ * pending events. -+ * This applies to any hw interrupts that allow a second -+ * instance of the same irq to arrive while we are in do_IRQ -+ * or in the handler. But the code here only handles the _second_ -+ * instance of the irq, not the third or fourth. So it is mostly -+ * useful for irq hardware that does not mask cleanly in an -+ * SMP environment. -+ */ -+ for (;;) { -+ spin_unlock(&desc->lock); -+ handle_IRQ_event(irq, (struct pt_regs *) regs, action); -+ spin_lock(&desc->lock); -+ -+ if (!(desc->status & IRQ_PENDING)) -+ break; -+ desc->status &= ~IRQ_PENDING; -+ } -+ desc->status &= ~IRQ_INPROGRESS; -+out: -+ /* -+ * The ->end() handler has to deal with interrupts which got -+ * disabled while the handler was running. -+ */ -+ desc->handler->end(irq); -+ spin_unlock(&desc->lock); -+ -+ if (softirq_pending(cpu)) -+ do_softirq(); -+ return 1; -+} -+ -+/** -+ * request_irq - allocate an interrupt line -+ * @irq: Interrupt line to allocate -+ * @handler: Function to be called when the IRQ occurs -+ * @irqflags: Interrupt type flags -+ * @devname: An ascii name for the claiming device -+ * @dev_id: A cookie passed back to the handler function -+ * -+ * This call allocates interrupt resources and enables the -+ * interrupt line and IRQ handling. From the point this -+ * call is made your handler function may be invoked. Since -+ * your handler function must clear any interrupt the board -+ * raises, you must take care both to initialise your hardware -+ * and to set up the interrupt handler in the right order. -+ * -+ * Dev_id must be globally unique. Normally the address of the -+ * device data structure is used as the cookie. Since the handler -+ * receives this value it makes sense to use it. -+ * -+ * If your interrupt is shared you must pass a non NULL dev_id -+ * as this is required when freeing the interrupt. -+ * -+ * Flags: -+ * -+ * SA_SHIRQ Interrupt is shared -+ * -+ * SA_INTERRUPT Disable local interrupts while processing -+ * -+ * SA_SAMPLE_RANDOM The interrupt can be used for entropy -+ * -+ */ -+ -+int request_irq(unsigned int irq, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, -+ const char * devname, -+ void *dev_id) -+{ -+ int retval; -+ struct irqaction * action; -+ -+#if 1 -+ /* -+ * Sanity-check: shared interrupts should REALLY pass in -+ * a real dev-ID, otherwise we'll have trouble later trying -+ * to figure out which interrupt is which (messes up the -+ * interrupt freeing logic etc). -+ */ -+ if (irqflags & SA_SHIRQ) { -+ if (!dev_id) -+ printk(KERN_ERR "Bad boy: %s (at 0x%x) called us " -+ "without a dev_id!\n", devname, (&irq)[-1]); -+ } -+#endif -+ -+ if (irq >= NR_IRQS) -+ return -EINVAL; -+ if (!handler) -+ return -EINVAL; -+ -+ action = (struct irqaction *) -+ kmalloc(sizeof(struct irqaction), GFP_KERNEL); -+ if (!action) -+ return -ENOMEM; -+ -+ action->handler = handler; -+ action->flags = irqflags; -+ action->mask = 0; -+ action->name = devname; -+ action->next = NULL; -+ action->dev_id = dev_id; -+ -+ retval = setup_irq(irq, action); -+ if (retval) -+ kfree(action); -+ return retval; -+} -+ -+int um_request_irq(unsigned int irq, int fd, int type, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, const char * devname, -+ void *dev_id) -+{ -+ int err; -+ -+ err = request_irq(irq, handler, irqflags, devname, dev_id); -+ if(err) -+ return(err); -+ -+ if(fd != -1) -+ err = activate_fd(irq, fd, type, dev_id); -+ return(err); -+} -+ -+/* this was setup_x86_irq but it seems pretty generic */ -+int setup_irq(unsigned int irq, struct irqaction * new) -+{ -+ int shared = 0; -+ unsigned long flags; -+ struct irqaction *old, **p; -+ irq_desc_t *desc = irq_desc + irq; -+ -+ /* -+ * Some drivers like serial.c use request_irq() heavily, -+ * so we have to be careful not to interfere with a -+ * running system. -+ */ -+ if (new->flags & SA_SAMPLE_RANDOM) { -+ /* -+ * This function might sleep, we want to call it first, -+ * outside of the atomic block. -+ * Yes, this might clear the entropy pool if the wrong -+ * driver is attempted to be loaded, without actually -+ * installing a new handler, but is this really a problem, -+ * only the sysadmin is able to do this. -+ */ -+ rand_initialize_irq(irq); -+ } -+ -+ /* -+ * The following block of code has to be executed atomically -+ */ -+ spin_lock_irqsave(&desc->lock,flags); -+ p = &desc->action; -+ old = *p; -+ if (old != NULL) { -+ /* Can't share interrupts unless both agree to */ -+ if (!(old->flags & new->flags & SA_SHIRQ)) { -+ spin_unlock_irqrestore(&desc->lock,flags); -+ return -EBUSY; -+ } -+ -+ /* add new interrupt at end of irq queue */ -+ do { -+ p = &old->next; -+ old = *p; -+ } while (old); -+ shared = 1; -+ } -+ -+ *p = new; -+ -+ if (!shared) { -+ desc->depth = 0; -+ desc->status &= ~IRQ_DISABLED; -+ desc->handler->startup(irq); -+ } -+ spin_unlock_irqrestore(&desc->lock,flags); -+ -+ register_irq_proc(irq); -+ return 0; -+} -+ -+/** -+ * free_irq - free an interrupt -+ * @irq: Interrupt line to free -+ * @dev_id: Device identity to free -+ * -+ * Remove an interrupt handler. The handler is removed and if the -+ * interrupt line is no longer in use by any driver it is disabled. -+ * On a shared IRQ the caller must ensure the interrupt is disabled -+ * on the card it drives before calling this function. The function -+ * does not return until any executing interrupts for this IRQ -+ * have completed. -+ * -+ * This function may be called from interrupt context. -+ * -+ * Bugs: Attempting to free an irq in a handler for the same irq hangs -+ * the machine. -+ */ -+ -+void free_irq(unsigned int irq, void *dev_id) -+{ -+ irq_desc_t *desc; -+ struct irqaction **p; -+ unsigned long flags; -+ -+ if (irq >= NR_IRQS) -+ return; -+ -+ desc = irq_desc + irq; -+ spin_lock_irqsave(&desc->lock,flags); -+ p = &desc->action; -+ for (;;) { -+ struct irqaction * action = *p; -+ if (action) { -+ struct irqaction **pp = p; -+ p = &action->next; -+ if (action->dev_id != dev_id) -+ continue; -+ -+ /* Found it - now remove it from the list of entries */ -+ *pp = action->next; -+ if (!desc->action) { -+ desc->status |= IRQ_DISABLED; -+ desc->handler->shutdown(irq); -+ } -+ free_irq_by_irq_and_dev(irq, dev_id); -+ spin_unlock_irqrestore(&desc->lock,flags); -+ -+#ifdef CONFIG_SMP -+ /* Wait to make sure it's not being used on another CPU */ -+ while (desc->status & IRQ_INPROGRESS) -+ barrier(); -+#endif -+ kfree(action); -+ return; -+ } -+ printk(KERN_ERR "Trying to free free IRQ%d\n",irq); -+ spin_unlock_irqrestore(&desc->lock,flags); -+ return; -+ } -+} -+ -+/* These are initialized by sysctl_init, which is called from init/main.c */ -+static struct proc_dir_entry * root_irq_dir; -+static struct proc_dir_entry * irq_dir [NR_IRQS]; -+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; -+ -+/* These are read and written as longs, so a read won't see a partial write -+ * even during a race. -+ */ -+static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; -+ -+#define HEX_DIGITS 8 -+ -+static int irq_affinity_read_proc (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ if (count < HEX_DIGITS+1) -+ return -EINVAL; -+ return sprintf (page, "%08lx\n", irq_affinity[(long)data]); -+} -+ -+static unsigned int parse_hex_value (const char *buffer, -+ unsigned long count, unsigned long *ret) -+{ -+ unsigned char hexnum [HEX_DIGITS]; -+ unsigned long value; -+ int i; -+ -+ if (!count) -+ return -EINVAL; -+ if (count > HEX_DIGITS) -+ count = HEX_DIGITS; -+ if (copy_from_user(hexnum, buffer, count)) -+ return -EFAULT; -+ -+ /* -+ * Parse the first HEX_DIGITS characters as a hex string, any non-hex -+ * char is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. -+ */ -+ value = 0; -+ -+ for (i = 0; i < count; i++) { -+ unsigned int c = hexnum[i]; -+ -+ switch (c) { -+ case '0' ... '9': c -= '0'; break; -+ case 'a' ... 'f': c -= 'a'-10; break; -+ case 'A' ... 'F': c -= 'A'-10; break; -+ default: -+ goto out; -+ } -+ value = (value << 4) | c; -+ } -+out: -+ *ret = value; -+ return 0; -+} -+ -+static int irq_affinity_write_proc (struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ int irq = (long) data, full_count = count, err; -+ unsigned long new_value; -+ -+ if (!irq_desc[irq].handler->set_affinity) -+ return -EIO; -+ -+ err = parse_hex_value(buffer, count, &new_value); -+ -+#if CONFIG_SMP -+ /* -+ * Do not allow disabling IRQs completely - it's a too easy -+ * way to make the system unusable accidentally :-) At least -+ * one online CPU still has to be targeted. -+ */ -+ if (!(new_value & cpu_online_map)) -+ return -EINVAL; -+#endif -+ -+ irq_affinity[irq] = new_value; -+ irq_desc[irq].handler->set_affinity(irq, new_value); -+ -+ return full_count; -+} -+ -+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ unsigned long *mask = (unsigned long *) data; -+ if (count < HEX_DIGITS+1) -+ return -EINVAL; -+ return sprintf (page, "%08lx\n", *mask); -+} -+ -+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ unsigned long *mask = (unsigned long *) data, full_count = count, err; -+ unsigned long new_value; -+ -+ err = parse_hex_value(buffer, count, &new_value); -+ if (err) -+ return err; -+ -+ *mask = new_value; -+ return full_count; -+} -+ -+#define MAX_NAMELEN 10 -+ -+static void register_irq_proc (unsigned int irq) -+{ -+ struct proc_dir_entry *entry; -+ char name [MAX_NAMELEN]; -+ -+ if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || -+ irq_dir[irq]) -+ return; -+ -+ memset(name, 0, MAX_NAMELEN); -+ sprintf(name, "%d", irq); -+ -+ /* create /proc/irq/1234 */ -+ irq_dir[irq] = proc_mkdir(name, root_irq_dir); -+ -+ /* create /proc/irq/1234/smp_affinity */ -+ entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); -+ -+ entry->nlink = 1; -+ entry->data = (void *)(long)irq; -+ entry->read_proc = irq_affinity_read_proc; -+ entry->write_proc = irq_affinity_write_proc; -+ -+ smp_affinity_entry[irq] = entry; -+} -+ -+/* Read and written as a long */ -+unsigned long prof_cpu_mask = -1; -+ -+void __init init_irq_proc (void) -+{ -+ struct proc_dir_entry *entry; -+ int i; -+ -+ /* create /proc/irq */ -+ root_irq_dir = proc_mkdir("irq", 0); -+ -+ /* create /proc/irq/prof_cpu_mask */ -+ entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); -+ -+ entry->nlink = 1; -+ entry->data = (void *)&prof_cpu_mask; -+ entry->read_proc = prof_cpu_mask_read_proc; -+ entry->write_proc = prof_cpu_mask_write_proc; -+ -+ /* -+ * Create entries for all existing IRQs. -+ */ -+ for (i = 0; i < NR_IRQS; i++) -+ register_irq_proc(i); -+} -+ -+static spinlock_t irq_spinlock = SPIN_LOCK_UNLOCKED; -+ -+unsigned long irq_lock(void) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&irq_spinlock, flags); -+ return(flags); -+} -+ -+void irq_unlock(unsigned long flags) -+{ -+ spin_unlock_irqrestore(&irq_spinlock, flags); -+} -+ -+unsigned long probe_irq_on(void) -+{ -+ return(0); -+} -+ -+int probe_irq_off(unsigned long val) -+{ -+ return(0); -+} -+ -+static unsigned int startup_SIGIO_irq(unsigned int irq) -+{ -+ return(0); -+} -+ -+static void shutdown_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void enable_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void disable_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void mask_and_ack_SIGIO(unsigned int irq) -+{ -+} -+ -+static void end_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static unsigned int startup_SIGVTALRM_irq(unsigned int irq) -+{ -+ return(0); -+} -+ -+static void shutdown_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void enable_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void disable_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void mask_and_ack_SIGVTALRM(unsigned int irq) -+{ -+} -+ -+static void end_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static struct hw_interrupt_type SIGIO_irq_type = { -+ "SIGIO", -+ startup_SIGIO_irq, -+ shutdown_SIGIO_irq, -+ enable_SIGIO_irq, -+ disable_SIGIO_irq, -+ mask_and_ack_SIGIO, -+ end_SIGIO_irq, -+ NULL -+}; -+ -+static struct hw_interrupt_type SIGVTALRM_irq_type = { -+ "SIGVTALRM", -+ startup_SIGVTALRM_irq, -+ shutdown_SIGVTALRM_irq, -+ enable_SIGVTALRM_irq, -+ disable_SIGVTALRM_irq, -+ mask_and_ack_SIGVTALRM, -+ end_SIGVTALRM_irq, -+ NULL -+}; -+ -+void __init init_IRQ(void) -+{ -+ int i; -+ -+ irq_desc[TIMER_IRQ].status = IRQ_DISABLED; -+ irq_desc[TIMER_IRQ].action = 0; -+ irq_desc[TIMER_IRQ].depth = 1; -+ irq_desc[TIMER_IRQ].handler = &SIGVTALRM_irq_type; -+ enable_irq(TIMER_IRQ); -+ for(i=1;i<NR_IRQS;i++){ -+ irq_desc[i].status = IRQ_DISABLED; -+ irq_desc[i].action = 0; -+ irq_desc[i].depth = 1; -+ irq_desc[i].handler = &SIGIO_irq_type; -+ enable_irq(i); -+ } -+ init_irq_signals(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/irq_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/irq_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/irq_user.c 2005-05-03 22:28:14.424417368 +0300 -@@ -0,0 +1,438 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <signal.h> -+#include <string.h> -+#include <sys/poll.h> -+#include <sys/types.h> -+#include <sys/time.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_user.h" -+#include "sigio.h" -+#include "irq_user.h" -+#include "os.h" -+ -+struct irq_fd { -+ struct irq_fd *next; -+ void *id; -+ int fd; -+ int type; -+ int irq; -+ int pid; -+ int events; -+ int current_events; -+ int freed; -+}; -+ -+static struct irq_fd *active_fds = NULL; -+static struct irq_fd **last_irq_ptr = &active_fds; -+ -+static struct pollfd *pollfds = NULL; -+static int pollfds_num = 0; -+static int pollfds_size = 0; -+ -+extern int io_count, intr_count; -+ -+void sigio_handler(int sig, union uml_pt_regs *regs) -+{ -+ struct irq_fd *irq_fd, *next; -+ int i, n; -+ -+ if(smp_sigio_handler()) return; -+ while(1){ -+ n = poll(pollfds, pollfds_num, 0); -+ if(n < 0){ -+ if(errno == EINTR) continue; -+ printk("sigio_handler : poll returned %d, " -+ "errno = %d\n", n, errno); -+ break; -+ } -+ if(n == 0) break; -+ -+ irq_fd = active_fds; -+ for(i = 0; i < pollfds_num; i++){ -+ if(pollfds[i].revents != 0){ -+ irq_fd->current_events = pollfds[i].revents; -+ pollfds[i].fd = -1; -+ } -+ irq_fd = irq_fd->next; -+ } -+ -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = next){ -+ next = irq_fd->next; -+ if(irq_fd->current_events != 0){ -+ irq_fd->current_events = 0; -+ do_IRQ(irq_fd->irq, regs); -+ -+ /* This is here because the next irq may be -+ * freed in the handler. If a console goes -+ * away, both the read and write irqs will be -+ * freed. After do_IRQ, ->next will point to -+ * a good IRQ. -+ * Irqs can't be freed inside their handlers, -+ * so the next best thing is to have them -+ * marked as needing freeing, so that they -+ * can be freed here. -+ */ -+ next = irq_fd->next; -+ if(irq_fd->freed) -+ free_irq(irq_fd->irq, irq_fd->id); -+ } -+ } -+ } -+} -+ -+int activate_ipi(int fd, int pid) -+{ -+ return(os_set_fd_async(fd, pid)); -+} -+ -+static void maybe_sigio_broken(int fd, int type) -+{ -+ if(isatty(fd)){ -+ if((type == IRQ_WRITE) && !pty_output_sigio){ -+ write_sigio_workaround(); -+ add_sigio_fd(fd, 0); -+ } -+ else if((type == IRQ_READ) && !pty_close_sigio){ -+ write_sigio_workaround(); -+ add_sigio_fd(fd, 1); -+ } -+ } -+} -+ -+int activate_fd(int irq, int fd, int type, void *dev_id) -+{ -+ struct pollfd *tmp_pfd; -+ struct irq_fd *new_fd, *irq_fd; -+ unsigned long flags; -+ int pid, events, err, n, size; -+ -+ pid = os_getpid(); -+ err = os_set_fd_async(fd, pid); -+ if(err < 0) -+ goto out; -+ -+ new_fd = um_kmalloc(sizeof(*new_fd)); -+ err = -ENOMEM; -+ if(new_fd == NULL) -+ goto out; -+ -+ if(type == IRQ_READ) events = POLLIN | POLLPRI; -+ else events = POLLOUT; -+ *new_fd = ((struct irq_fd) { .next = NULL, -+ .id = dev_id, -+ .fd = fd, -+ .type = type, -+ .irq = irq, -+ .pid = pid, -+ .events = events, -+ .current_events = 0, -+ .freed = 0 } ); -+ -+ /* Critical section - locked by a spinlock because this stuff can -+ * be changed from interrupt handlers. The stuff above is done -+ * outside the lock because it allocates memory. -+ */ -+ -+ /* Actually, it only looks like it can be called from interrupt -+ * context. The culprit is reactivate_fd, which calls -+ * maybe_sigio_broken, which calls write_sigio_workaround, -+ * which calls activate_fd. However, write_sigio_workaround should -+ * only be called once, at boot time. That would make it clear that -+ * this is called only from process context, and can be locked with -+ * a semaphore. -+ */ -+ flags = irq_lock(); -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ -+ if((irq_fd->fd == fd) && (irq_fd->type == type)){ -+ printk("Registering fd %d twice\n", fd); -+ printk("Irqs : %d, %d\n", irq_fd->irq, irq); -+ printk("Ids : 0x%x, 0x%x\n", irq_fd->id, dev_id); -+ goto out_unlock; -+ } -+ } -+ -+ n = pollfds_num; -+ if(n == pollfds_size){ -+ while(1){ -+ /* Here we have to drop the lock in order to call -+ * kmalloc, which might sleep. If something else -+ * came in and changed the pollfds array, we free -+ * the buffer and try again. -+ */ -+ irq_unlock(flags); -+ size = (pollfds_num + 1) * sizeof(pollfds[0]); -+ tmp_pfd = um_kmalloc(size); -+ flags = irq_lock(); -+ if(tmp_pfd == NULL) -+ goto out_unlock; -+ if(n == pollfds_size) -+ break; -+ kfree(tmp_pfd); -+ } -+ if(pollfds != NULL){ -+ memcpy(tmp_pfd, pollfds, -+ sizeof(pollfds[0]) * pollfds_size); -+ kfree(pollfds); -+ } -+ pollfds = tmp_pfd; -+ pollfds_size++; -+ } -+ -+ if(type == IRQ_WRITE) -+ fd = -1; -+ -+ pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, -+ .events = events, -+ .revents = 0 }); -+ pollfds_num++; -+ -+ *last_irq_ptr = new_fd; -+ last_irq_ptr = &new_fd->next; -+ -+ irq_unlock(flags); -+ -+ /* This calls activate_fd, so it has to be outside the critical -+ * section. -+ */ -+ maybe_sigio_broken(fd, type); -+ -+ return(0); -+ -+ out_unlock: -+ irq_unlock(flags); -+ kfree(new_fd); -+ out: -+ return(err); -+} -+ -+static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) -+{ -+ struct irq_fd **prev; -+ unsigned long flags; -+ int i = 0; -+ -+ flags = irq_lock(); -+ prev = &active_fds; -+ while(*prev != NULL){ -+ if((*test)(*prev, arg)){ -+ struct irq_fd *old_fd = *prev; -+ if((pollfds[i].fd != -1) && -+ (pollfds[i].fd != (*prev)->fd)){ -+ printk("free_irq_by_cb - mismatch between " -+ "active_fds and pollfds, fd %d vs %d\n", -+ (*prev)->fd, pollfds[i].fd); -+ goto out; -+ } -+ memcpy(&pollfds[i], &pollfds[i + 1], -+ (pollfds_num - i - 1) * sizeof(pollfds[0])); -+ pollfds_num--; -+ if(last_irq_ptr == &old_fd->next) -+ last_irq_ptr = prev; -+ *prev = (*prev)->next; -+ if(old_fd->type == IRQ_WRITE) -+ ignore_sigio_fd(old_fd->fd); -+ kfree(old_fd); -+ continue; -+ } -+ prev = &(*prev)->next; -+ i++; -+ } -+ out: -+ irq_unlock(flags); -+} -+ -+struct irq_and_dev { -+ int irq; -+ void *dev; -+}; -+ -+static int same_irq_and_dev(struct irq_fd *irq, void *d) -+{ -+ struct irq_and_dev *data = d; -+ -+ return((irq->irq == data->irq) && (irq->id == data->dev)); -+} -+ -+void free_irq_by_irq_and_dev(int irq, void *dev) -+{ -+ struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, -+ .dev = dev }); -+ -+ free_irq_by_cb(same_irq_and_dev, &data); -+} -+ -+static int same_fd(struct irq_fd *irq, void *fd) -+{ -+ return(irq->fd == *((int *) fd)); -+} -+ -+void free_irq_by_fd(int fd) -+{ -+ free_irq_by_cb(same_fd, &fd); -+} -+ -+static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) -+{ -+ struct irq_fd *irq; -+ int i = 0; -+ -+ for(irq=active_fds; irq != NULL; irq = irq->next){ -+ if((irq->fd == fd) && (irq->irq == irqnum)) break; -+ i++; -+ } -+ if(irq == NULL){ -+ printk("find_irq_by_fd doesn't have descriptor %d\n", fd); -+ goto out; -+ } -+ if((pollfds[i].fd != -1) && (pollfds[i].fd != fd)){ -+ printk("find_irq_by_fd - mismatch between active_fds and " -+ "pollfds, fd %d vs %d, need %d\n", irq->fd, -+ pollfds[i].fd, fd); -+ irq = NULL; -+ goto out; -+ } -+ *index_out = i; -+ out: -+ return(irq); -+} -+ -+void free_irq_later(int irq, void *dev_id) -+{ -+ struct irq_fd *irq_fd; -+ unsigned long flags; -+ -+ flags = irq_lock(); -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ -+ if((irq_fd->irq == irq) && (irq_fd->id == dev_id)) -+ break; -+ } -+ if(irq_fd == NULL){ -+ printk("free_irq_later found no irq, irq = %d, " -+ "dev_id = 0x%p\n", irq, dev_id); -+ goto out; -+ } -+ irq_fd->freed = 1; -+ out: -+ irq_unlock(flags); -+} -+ -+void reactivate_fd(int fd, int irqnum) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int i; -+ -+ flags = irq_lock(); -+ irq = find_irq_by_fd(fd, irqnum, &i); -+ if(irq == NULL){ -+ irq_unlock(flags); -+ return; -+ } -+ -+ pollfds[i].fd = irq->fd; -+ -+ irq_unlock(flags); -+ -+ /* This calls activate_fd, so it has to be outside the critical -+ * section. -+ */ -+ maybe_sigio_broken(fd, irq->type); -+} -+ -+void deactivate_fd(int fd, int irqnum) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int i; -+ -+ flags = irq_lock(); -+ irq = find_irq_by_fd(fd, irqnum, &i); -+ if(irq == NULL) -+ goto out; -+ pollfds[i].fd = -1; -+ out: -+ irq_unlock(flags); -+} -+ -+int deactivate_all_fds(void) -+{ -+ struct irq_fd *irq; -+ int err; -+ -+ for(irq=active_fds;irq != NULL;irq = irq->next){ -+ err = os_clear_fd_async(irq->fd); -+ if(err) -+ return(err); -+ } -+ -+ return(0); -+} -+ -+void forward_ipi(int fd, int pid) -+{ -+ int err; -+ -+ err = os_set_owner(fd, pid); -+ if(err < 0) -+ printk("forward_ipi: set_owner failed, fd = %d, me = %d, " -+ "target = %d, err = %d\n", fd, os_getpid(), pid, -err); -+} -+ -+void forward_interrupts(int pid) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int err; -+ -+ flags = irq_lock(); -+ for(irq=active_fds;irq != NULL;irq = irq->next){ -+ err = os_set_owner(irq->fd, pid); -+ if(err < 0){ -+ /* XXX Just remove the irq rather than -+ * print out an infinite stream of these -+ */ -+ printk("Failed to forward %d to pid %d, err = %d\n", -+ irq->fd, pid, -err); -+ } -+ -+ irq->pid = pid; -+ } -+ irq_unlock(flags); -+} -+ -+void init_irq_signals(int on_sigstack) -+{ -+ __sighandler_t h; -+ int flags; -+ -+ flags = on_sigstack ? SA_ONSTACK : 0; -+ if(timer_irq_inited) h = (__sighandler_t) alarm_handler; -+ else h = boot_timer_handler; -+ -+ set_handler(SIGVTALRM, h, flags | SA_RESTART, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1); -+ set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ signal(SIGWINCH, SIG_IGN); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/ksyms.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/ksyms.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/ksyms.c 2005-05-03 23:56:02.752509760 +0300 -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (C) 2001 - 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/module.h" -+#include "linux/string.h" -+#include "linux/smp_lock.h" -+#include "linux/spinlock.h" -+#include "asm/current.h" -+#include "asm/delay.h" -+#include "asm/processor.h" -+#include "asm/unistd.h" -+#include "asm/pgalloc.h" -+#include "asm/pgtable.h" -+#include "asm/page.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "os.h" -+#include "helper.h" -+ -+EXPORT_SYMBOL(stop); -+EXPORT_SYMBOL(strtok); -+EXPORT_SYMBOL(uml_physmem); -+EXPORT_SYMBOL(set_signals); -+EXPORT_SYMBOL(get_signals); -+EXPORT_SYMBOL(kernel_thread); -+EXPORT_SYMBOL(__const_udelay); -+EXPORT_SYMBOL(__udelay); -+EXPORT_SYMBOL(sys_waitpid); -+EXPORT_SYMBOL(task_size); -+EXPORT_SYMBOL(flush_tlb_range); -+EXPORT_SYMBOL(host_task_size); -+EXPORT_SYMBOL(arch_validate); -+EXPORT_SYMBOL(get_kmem_end); -+ -+EXPORT_SYMBOL(high_physmem); -+EXPORT_SYMBOL(empty_zero_page); -+EXPORT_SYMBOL(um_virt_to_phys); -+EXPORT_SYMBOL(__virt_to_page); -+EXPORT_SYMBOL(to_phys); -+EXPORT_SYMBOL(to_virt); -+EXPORT_SYMBOL(mode_tt); -+EXPORT_SYMBOL(handle_page_fault); -+EXPORT_SYMBOL(find_iomem); -+ -+#ifdef CONFIG_MODE_TT -+EXPORT_SYMBOL(strncpy_from_user_tt); -+EXPORT_SYMBOL(copy_from_user_tt); -+EXPORT_SYMBOL(copy_to_user_tt); -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+EXPORT_SYMBOL(strncpy_from_user_skas); -+EXPORT_SYMBOL(copy_to_user_skas); -+EXPORT_SYMBOL(copy_from_user_skas); -+#endif -+ -+EXPORT_SYMBOL(os_stat_fd); -+EXPORT_SYMBOL(os_stat_file); -+EXPORT_SYMBOL(os_access); -+EXPORT_SYMBOL(os_print_error); -+EXPORT_SYMBOL(os_get_exec_close); -+EXPORT_SYMBOL(os_set_exec_close); -+EXPORT_SYMBOL(os_getpid); -+EXPORT_SYMBOL(os_open_file); -+EXPORT_SYMBOL(os_read_file); -+EXPORT_SYMBOL(os_write_file); -+EXPORT_SYMBOL(os_seek_file); -+EXPORT_SYMBOL(os_lock_file); -+EXPORT_SYMBOL(os_pipe); -+EXPORT_SYMBOL(os_file_type); -+EXPORT_SYMBOL(os_file_mode); -+EXPORT_SYMBOL(os_file_size); -+EXPORT_SYMBOL(os_flush_stdout); -+EXPORT_SYMBOL(os_close_file); -+EXPORT_SYMBOL(os_set_fd_async); -+EXPORT_SYMBOL(os_set_fd_block); -+EXPORT_SYMBOL(helper_wait); -+EXPORT_SYMBOL(os_shutdown_socket); -+EXPORT_SYMBOL(os_create_unix_socket); -+EXPORT_SYMBOL(os_connect_socket); -+EXPORT_SYMBOL(os_accept_connection); -+EXPORT_SYMBOL(os_ioctl_generic); -+EXPORT_SYMBOL(os_rcv_fd); -+EXPORT_SYMBOL(run_helper); -+EXPORT_SYMBOL(start_thread); -+EXPORT_SYMBOL(dump_thread); -+ -+/* This is here because UML expands open to sys_open, not to a system -+ * call instruction. -+ */ -+EXPORT_SYMBOL(sys_open); -+EXPORT_SYMBOL(sys_lseek); -+EXPORT_SYMBOL(sys_read); -+EXPORT_SYMBOL(sys_wait4); -+ -+#ifdef CONFIG_SMP -+ -+/* required for SMP */ -+ -+extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -+EXPORT_SYMBOL_NOVERS(__write_lock_failed); -+ -+extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -+EXPORT_SYMBOL_NOVERS(__read_lock_failed); -+ -+EXPORT_SYMBOL(kernel_flag_cacheline); -+EXPORT_SYMBOL(smp_num_cpus); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/link.ld -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/link.ld 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/link.ld 2005-05-03 22:43:46.000000000 +0300 -@@ -0,0 +1,147 @@ -+OUTPUT_FORMAT("elf32-i386") -+OUTPUT_ARCH(i386) -+ENTRY(_start) -+ -+SECTIONS -+{ -+ . = 2684354560 + SIZEOF_HEADERS; -+ -+ __binary_start = .; -+ -+ .thread_private : { -+ __start_thread_private = .; -+ errno = .; -+ . += 4; -+ arch/um/kernel/tt/unmap_fin.o (.data) -+ __end_thread_private = .; -+ } -+ . = ALIGN(4096); -+ .remap : { arch/um/kernel/tt/unmap_fin.o (.text) } -+ -+ . = ALIGN(4096); /* Init code and data */ -+ _stext = .; -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ . = ALIGN(4096); -+ .text : -+ { -+ *(.text) -+ /* .gnu.warning sections are handled specially by elf32.em. */ -+ *(.gnu.warning) -+ *(.gnu.linkonce.t*) -+ } -+ .fini : { *(.fini) } =0x9090 -+ .rodata : { *(.rodata) *(.gnu.linkonce.r*) } -+ .rodata1 : { *(.rodata1) } -+ _etext = .; -+ PROVIDE (etext = .); -+ -+ . = ALIGN(4096); -+ PROVIDE (_sdata = .); -+ -+ .kstrtab : { *(.kstrtab) } -+ -+ . = ALIGN(16); /* Exception table */ -+ __start___ex_table = .; -+ __ex_table : { *(__ex_table) } -+ __stop___ex_table = .; -+ -+ __start___ksymtab = .; /* Kernel symbol table */ -+ __ksymtab : { *(__ksymtab) } -+ __stop___ksymtab = .; -+ -+ .unprotected : { *(.unprotected) } -+ . = ALIGN(4096); -+ PROVIDE (_unprotected_end = .); -+ -+ . = ALIGN(4096); -+ __uml_setup_start = .; -+ .uml.setup.init : { *(.uml.setup.init) } -+ __uml_setup_end = .; -+ __uml_help_start = .; -+ .uml.help.init : { *(.uml.help.init) } -+ __uml_help_end = .; -+ __uml_postsetup_start = .; -+ .uml.postsetup.init : { *(.uml.postsetup.init) } -+ __uml_postsetup_end = .; -+ __setup_start = .; -+ .setup.init : { *(.setup.init) } -+ __setup_end = .; -+ __initcall_start = .; -+ .initcall.init : { *(.initcall.init) } -+ __initcall_end = .; -+ __uml_initcall_start = .; -+ .uml.initcall.init : { *(.uml.initcall.init) } -+ __uml_initcall_end = .; -+ __init_end = .; -+ __exitcall_begin = .; -+ .exitcall : { *(.exitcall.exit) } -+ __exitcall_end = .; -+ __uml_exitcall_begin = .; -+ .uml.exitcall : { *(.uml.exitcall.exit) } -+ __uml_exitcall_end = .; -+ -+ __preinit_array_start = .; -+ .preinit_array : { *(.preinit_array) } -+ __preinit_array_end = .; -+ __init_array_start = .; -+ .init_array : { *(.init_array) } -+ __init_array_end = .; -+ __fini_array_start = .; -+ .fini_array : { *(.fini_array) } -+ __fini_array_end = .; -+ -+ .data.init : { *(.data.init) } -+ -+ -+ .data : -+ { -+ . = ALIGN(32768); /* init_task */ -+ *(.data.init_task) -+ *(.data) -+ *(.gnu.linkonce.d*) -+ CONSTRUCTORS -+ } -+ .data1 : { *(.data1) } -+ .ctors : -+ { -+ *(.ctors) -+ } -+ .dtors : -+ { -+ *(.dtors) -+ } -+ -+ .got : { *(.got.plt) *(.got) } -+ .dynamic : { *(.dynamic) } -+ /* We want the small data sections together, so single-instruction offsets -+ can access them all, and initialized data all before uninitialized, so -+ we can shorten the on-disk segment size. */ -+ .sdata : { *(.sdata) } -+ _edata = .; -+ PROVIDE (edata = .); -+ . = ALIGN(0x1000); -+ .sbss : -+ { -+ __bss_start = .; -+ PROVIDE(_bss_start = .); -+ *(.sbss) -+ *(.scommon) -+ } -+ .bss : -+ { -+ *(.dynbss) -+ *(.bss) -+ *(COMMON) -+ } -+ _end = . ; -+ PROVIDE (end = .); -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+} -Index: linux-2.4.29/arch/um/kernel/link.ld.in -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/link.ld.in 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/link.ld.in 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,94 @@ -+OUTPUT_FORMAT("ELF_FORMAT") -+OUTPUT_ARCH(ELF_ARCH) -+ENTRY(_start) -+ -+SECTIONS -+{ -+ . = START() + SIZEOF_HEADERS; -+ -+ __binary_start = .; -+ifdef(`MODE_TT', ` -+ .thread_private : { -+ __start_thread_private = .; -+ errno = .; -+ . += 4; -+ arch/um/kernel/tt/unmap_fin.o (.data) -+ __end_thread_private = .; -+ } -+ . = ALIGN(4096); -+ .remap : { arch/um/kernel/tt/unmap_fin.o (.text) } -+') -+ . = ALIGN(4096); /* Init code and data */ -+ _stext = .; -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ . = ALIGN(4096); -+ .text : -+ { -+ *(.text) -+ /* .gnu.warning sections are handled specially by elf32.em. */ -+ *(.gnu.warning) -+ *(.gnu.linkonce.t*) -+ } -+ .fini : { *(.fini) } =0x9090 -+ .rodata : { *(.rodata) *(.gnu.linkonce.r*) } -+ .rodata1 : { *(.rodata1) } -+ _etext = .; -+ PROVIDE (etext = .); -+ -+ . = ALIGN(4096); -+ PROVIDE (_sdata = .); -+ -+include(`arch/um/kernel/common.ld.in') -+ -+ .data : -+ { -+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ -+ *(.data.init_task) -+ *(.data) -+ *(.gnu.linkonce.d*) -+ CONSTRUCTORS -+ } -+ .data1 : { *(.data1) } -+ .ctors : -+ { -+ *(.ctors) -+ } -+ .dtors : -+ { -+ *(.dtors) -+ } -+ -+ .got : { *(.got.plt) *(.got) } -+ .dynamic : { *(.dynamic) } -+ /* We want the small data sections together, so single-instruction offsets -+ can access them all, and initialized data all before uninitialized, so -+ we can shorten the on-disk segment size. */ -+ .sdata : { *(.sdata) } -+ _edata = .; -+ PROVIDE (edata = .); -+ . = ALIGN(0x1000); -+ .sbss : -+ { -+ __bss_start = .; -+ PROVIDE(_bss_start = .); -+ *(.sbss) -+ *(.scommon) -+ } -+ .bss : -+ { -+ *(.dynbss) -+ *(.bss) -+ *(COMMON) -+ } -+ _end = . ; -+ PROVIDE (end = .); -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+} -Index: linux-2.4.29/arch/um/kernel/main.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/main.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/main.c 2005-05-03 22:28:14.429416608 +0300 -@@ -0,0 +1,250 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <string.h> -+#include <signal.h> -+#include <errno.h> -+#include <sys/resource.h> -+#include <sys/mman.h> -+#include <sys/user.h> -+#include <asm/page.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "mem_user.h" -+#include "signal_user.h" -+#include "time_user.h" -+#include "irq_user.h" -+#include "user.h" -+#include "init.h" -+#include "mode.h" -+#include "choose-mode.h" -+#include "uml-config.h" -+ -+/* Set in set_stklim, which is called from main and __wrap_malloc. -+ * __wrap_malloc only calls it if main hasn't started. -+ */ -+unsigned long stacksizelim; -+ -+/* Set in main */ -+char *linux_prog; -+ -+#define PGD_BOUND (4 * 1024 * 1024) -+#define STACKSIZE (8 * 1024 * 1024) -+#define THREAD_NAME_LEN (256) -+ -+static void set_stklim(void) -+{ -+ struct rlimit lim; -+ -+ if(getrlimit(RLIMIT_STACK, &lim) < 0){ -+ perror("getrlimit"); -+ exit(1); -+ } -+ if((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)){ -+ lim.rlim_cur = STACKSIZE; -+ if(setrlimit(RLIMIT_STACK, &lim) < 0){ -+ perror("setrlimit"); -+ exit(1); -+ } -+ } -+ stacksizelim = (lim.rlim_cur + PGD_BOUND - 1) & ~(PGD_BOUND - 1); -+} -+ -+static __init void do_uml_initcalls(void) -+{ -+ initcall_t *call; -+ -+ call = &__uml_initcall_start; -+ while (call < &__uml_initcall_end){; -+ (*call)(); -+ call++; -+ } -+} -+ -+static void last_ditch_exit(int sig) -+{ -+ CHOOSE_MODE(kmalloc_ok = 0, (void) 0); -+ signal(SIGINT, SIG_DFL); -+ signal(SIGTERM, SIG_DFL); -+ signal(SIGHUP, SIG_DFL); -+ uml_cleanup(); -+ exit(1); -+} -+ -+extern int uml_exitcode; -+ -+int main(int argc, char **argv, char **envp) -+{ -+ char **new_argv; -+ sigset_t mask; -+ int ret, i; -+ -+ /* Enable all signals except SIGIO - in some environments, we can -+ * enter with some signals blocked -+ */ -+ -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGIO); -+ if(sigprocmask(SIG_SETMASK, &mask, NULL) < 0){ -+ perror("sigprocmask"); -+ exit(1); -+ } -+ -+#ifdef UML_CONFIG_MODE_TT -+ /* Allocate memory for thread command lines */ -+ if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ -+ -+ char padding[THREAD_NAME_LEN] = { -+ [ 0 ... THREAD_NAME_LEN - 2] = ' ', '\0' -+ }; -+ -+ new_argv = malloc((argc + 2) * sizeof(char*)); -+ if(!new_argv) { -+ perror("Allocating extended argv"); -+ exit(1); -+ } -+ -+ new_argv[0] = argv[0]; -+ new_argv[1] = padding; -+ -+ for(i = 2; i <= argc; i++) -+ new_argv[i] = argv[i - 1]; -+ new_argv[argc + 1] = NULL; -+ -+ execvp(new_argv[0], new_argv); -+ perror("execing with extended args"); -+ exit(1); -+ } -+#endif -+ -+ linux_prog = argv[0]; -+ -+ set_stklim(); -+ -+ new_argv = malloc((argc + 1) * sizeof(char *)); -+ if(new_argv == NULL){ -+ perror("Mallocing argv"); -+ exit(1); -+ } -+ for(i=0;i<argc;i++){ -+ new_argv[i] = strdup(argv[i]); -+ if(new_argv[i] == NULL){ -+ perror("Mallocing an arg"); -+ exit(1); -+ } -+ } -+ new_argv[argc] = NULL; -+ -+ set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ -+ do_uml_initcalls(); -+ ret = linux_main(argc, argv); -+ -+ /* Reboot */ -+ if(ret){ -+ int err; -+ -+ printf("\n"); -+ -+ /* Let any pending signals fire, then disable them. This -+ * ensures that they won't be delivered after the exec, when -+ * they are definitely not expected. -+ */ -+ unblock_signals(); -+ disable_timer(); -+ err = deactivate_all_fds(); -+ if(err) -+ printf("deactivate_all_fds failed, errno = %d\n", -err); -+ -+ execvp(new_argv[0], new_argv); -+ perror("Failed to exec kernel"); -+ ret = 1; -+ } -+ printf("\n"); -+ return(uml_exitcode); -+} -+ -+#define CAN_KMALLOC() \ -+ (kmalloc_ok && CHOOSE_MODE((getpid() != tracing_pid), 1)) -+ -+extern void *__real_malloc(int); -+ -+void *__wrap_malloc(int size) -+{ -+ void *ret; -+ -+ if(!CAN_KMALLOC()) -+ return(__real_malloc(size)); -+ else if(size <= PAGE_SIZE) /* finding contiguos pages is hard */ -+ ret = um_kmalloc(size); -+ else ret = um_vmalloc(size); -+ -+ /* glibc people insist that if malloc fails, errno should be -+ * set by malloc as well. So we do. -+ */ -+ if(ret == NULL) -+ errno = ENOMEM; -+ -+ return(ret); -+} -+ -+void *__wrap_calloc(int n, int size) -+{ -+ void *ptr = __wrap_malloc(n * size); -+ -+ if(ptr == NULL) return(NULL); -+ memset(ptr, 0, n * size); -+ return(ptr); -+} -+ -+extern void __real_free(void *); -+ -+extern unsigned long high_physmem; -+ -+void __wrap_free(void *ptr) -+{ -+ unsigned long addr = (unsigned long) ptr; -+ -+ /* We need to know how the allocation happened, so it can be correctly -+ * freed. This is done by seeing what region of memory the pointer is -+ * in - -+ * physical memory - kmalloc/kfree -+ * kernel virtual memory - vmalloc/vfree -+ * anywhere else - malloc/free -+ * If kmalloc is not yet possible, then the kernel memory regions -+ * may not be set up yet, and the variables not set up. So, -+ * free is called. -+ * -+ * CAN_KMALLOC is checked because it would be bad to free a buffer -+ * with kmalloc/vmalloc after they have been turned off during -+ * shutdown. -+ */ -+ -+ if((addr >= uml_physmem) && (addr < high_physmem)){ -+ if(CAN_KMALLOC()) -+ kfree(ptr); -+ } -+ else if((addr >= start_vm) && (addr < end_vm)){ -+ if(CAN_KMALLOC()) -+ vfree(ptr); -+ } -+ else __real_free(ptr); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/Makefile 2005-05-03 22:28:14.430416456 +0300 -@@ -0,0 +1,73 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = config.o checksum.o exec_kern.o exitcode.o filehandle.o frame_kern.o \ -+ frame.o helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o \ -+ mem_user.o physmem.o process.o process_kern.o ptrace.o reboot.o \ -+ resource.o sigio_user.o sigio_kern.o signal_kern.o signal_user.o \ -+ smp.o syscall_kern.o syscall_user.o sysrq.o sys_call_table.o \ -+ tempfile.o time.o time_kern.o tlb.o trap_kern.o trap_user.o \ -+ uaccess_user.o um_arch.o umid.o user_syms.o user_util.o -+ -+obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o -+obj-$(CONFIG_GPROF) += gprof_syms.o -+obj-$(CONFIG_GCOV) += gmon_syms.o -+obj-$(CONFIG_TTY_LOG) += tty_log.o -+ -+subdir-$(CONFIG_MODE_TT) += tt -+subdir-$(CONFIG_MODE_SKAS) += skas -+ -+user-objs-$(CONFIG_TTY_LOG) += tty_log.o -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+# user_syms.o not included here because Rules.make has its own ideas about -+# building anything in export-objs -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) $(user-objs-y) config.o helper.o \ -+ main.o process.o tempfile.o time.o umid.o user_util.o -+ -+DMODULES-$(CONFIG_MODULES) = -D__CONFIG_MODULES__ -+DMODVERSIONS-$(CONFIG_MODVERSIONS) = -D__CONFIG_MODVERSIONS__ -+ -+export-objs-$(CONFIG_GPROF) += gprof_syms.o -+export-objs-$(CONFIG_GCOV) += gmon_syms.o -+ -+export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o $(export-objs-y) -+ -+CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES-y) $(DMODVERSIONS-y) \ -+ -I/usr/include -I../include -+ -+CFLAGS_frame.o := $(patsubst -fomit-frame-pointer,,$(USER_CFLAGS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+# This has to be separate because it needs be compiled with frame pointers -+# regardless of how the rest of the kernel is built. -+ -+frame.o: frame.c -+ $(CC) $(CFLAGS_$@) -c -o $@ $< -+ -+QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }' -+ -+config.c : config.c.in $(TOPDIR)/.config -+ $(PERL) -e $(QUOTE) < config.c.in > $@ -+ -+clean: -+ $(RM) config.c -+ for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done -+ -+modules: -+ -+fastdep: -+ -+dep: -+ -+archmrproper: clean -Index: linux-2.4.29/arch/um/kernel/mem.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/mem.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/mem.c 2005-05-03 22:28:14.431416304 +0300 -@@ -0,0 +1,336 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/mm.h" -+#include "linux/bootmem.h" -+#include "linux/highmem.h" -+#include "asm/page.h" -+#include "asm/fixmap.h" -+#include "asm/pgalloc.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mem_user.h" -+#include "uml_uaccess.h" -+#include "os.h" -+ -+extern char __binary_start; -+ -+/* Changed during early boot */ -+unsigned long *empty_zero_page = NULL; -+unsigned long *empty_bad_page = NULL; -+pgd_t swapper_pg_dir[1024]; -+unsigned long highmem; -+int kmalloc_ok = 0; -+ -+static unsigned long brk_end; -+static unsigned long totalram_pages = 0; -+ -+void unmap_physmem(void) -+{ -+ os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -+} -+ -+static void map_cb(void *unused) -+{ -+ map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); -+} -+ -+#ifdef CONFIG_HIGHMEM -+static void setup_highmem(unsigned long highmem_start, -+ unsigned long highmem_len) -+{ -+ struct page *page; -+ unsigned long highmem_pfn; -+ int i; -+ -+ highmem_start_page = virt_to_page(highmem_start); -+ -+ highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; -+ for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ -+ page = &mem_map[highmem_pfn + i]; -+ ClearPageReserved(page); -+ set_bit(PG_highmem, &page->flags); -+ atomic_set(&page->count, 1); -+ __free_page(page); -+ } -+} -+#endif -+ -+void mem_init(void) -+{ -+ unsigned long start; -+ -+ /* clear the zero-page */ -+ memset((void *) empty_zero_page, 0, PAGE_SIZE); -+ -+ /* Map in the area just after the brk now that kmalloc is about -+ * to be turned on. -+ */ -+ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); -+ map_cb(NULL); -+ initial_thread_cb(map_cb, NULL); -+ free_bootmem(__pa(brk_end), uml_reserved - brk_end); -+ uml_reserved = brk_end; -+ -+ /* Fill in any hole at the start of the binary */ -+ start = (unsigned long) &__binary_start; -+ if(uml_physmem != start){ -+ map_memory(uml_physmem, __pa(uml_physmem), start - uml_physmem, -+ 1, 1, 0); -+ } -+ -+ /* this will put all low memory onto the freelists */ -+ totalram_pages = free_all_bootmem(); -+ totalram_pages += highmem >> PAGE_SHIFT; -+ num_physpages = totalram_pages; -+ printk(KERN_INFO "Memory: %luk available\n", -+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); -+ kmalloc_ok = 1; -+ -+#ifdef CONFIG_HIGHMEM -+ setup_highmem(end_iomem, highmem); -+#endif -+} -+ -+static void __init fixrange_init(unsigned long start, unsigned long end, -+ pgd_t *pgd_base) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ int i, j; -+ unsigned long vaddr; -+ -+ vaddr = start; -+ i = __pgd_offset(vaddr); -+ j = __pmd_offset(vaddr); -+ pgd = pgd_base + i; -+ -+ for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { -+ pmd = (pmd_t *)pgd; -+ for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { -+ if (pmd_none(*pmd)) { -+ pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); -+ set_pmd(pmd, __pmd(_KERNPG_TABLE + -+ (unsigned long) __pa(pte))); -+ if (pte != pte_offset(pmd, 0)) -+ BUG(); -+ } -+ vaddr += PMD_SIZE; -+ } -+ j = 0; -+ } -+} -+ -+#ifdef CONFIG_HIGHMEM -+pte_t *kmap_pte; -+pgprot_t kmap_prot; -+ -+#define kmap_get_fixmap_pte(vaddr) \ -+ pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) -+ -+void __init kmap_init(void) -+{ -+ unsigned long kmap_vstart; -+ -+ /* cache the first kmap pte */ -+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); -+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart); -+ -+ kmap_prot = PAGE_KERNEL; -+} -+ -+static void init_highmem(void) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long vaddr; -+ -+ /* -+ * Permanent kmaps: -+ */ -+ vaddr = PKMAP_BASE; -+ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir); -+ -+ pgd = swapper_pg_dir + __pgd_offset(vaddr); -+ pmd = pmd_offset(pgd, vaddr); -+ pte = pte_offset(pmd, vaddr); -+ pkmap_page_table = pte; -+ -+ kmap_init(); -+} -+ -+#endif /* CONFIG_HIGHMEM */ -+ -+void paging_init(void) -+{ -+ unsigned long zones_size[MAX_NR_ZONES], vaddr; -+ int i; -+ -+ empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); -+ empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); -+ for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++) -+ zones_size[i] = 0; -+ zones_size[0] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); -+ zones_size[2] = highmem >> PAGE_SHIFT; -+ free_area_init(zones_size); -+ -+ /* -+ * Fixed mappings, only the page table structure has to be -+ * created - mappings will be set by set_fixmap(): -+ */ -+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; -+ fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); -+ -+#if CONFIG_HIGHMEM -+ init_highmem(); -+#endif -+} -+ -+struct page *arch_validate(struct page *page, int mask, int order) -+{ -+ unsigned long addr, zero = 0; -+ int i; -+ -+ again: -+ if(page == NULL) return(page); -+ if(PageHighMem(page)) return(page); -+ -+ addr = (unsigned long) page_address(page); -+ for(i = 0; i < (1 << order); i++){ -+ current->thread.fault_addr = (void *) addr; -+ if(__do_copy_to_user((void *) addr, &zero, -+ sizeof(zero), -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)){ -+ if(!(mask & __GFP_WAIT)) return(NULL); -+ else break; -+ } -+ addr += PAGE_SIZE; -+ } -+ if(i == (1 << order)) return(page); -+ page = _alloc_pages(mask, order); -+ goto again; -+} -+ -+/* This can't do anything because nothing in the kernel image can be freed -+ * since it's not in kernel physical memory. -+ */ -+ -+void free_initmem(void) -+{ -+} -+ -+#ifdef CONFIG_BLK_DEV_INITRD -+ -+void free_initrd_mem(unsigned long start, unsigned long end) -+{ -+ if (start < end) -+ printk ("Freeing initrd memory: %ldk freed\n", -+ (end - start) >> 10); -+ for (; start < end; start += PAGE_SIZE) { -+ ClearPageReserved(virt_to_page(start)); -+ set_page_count(virt_to_page(start), 1); -+ free_page(start); -+ totalram_pages++; -+ } -+} -+ -+#endif -+ -+int do_check_pgt_cache(int low, int high) -+{ -+ int freed = 0; -+ if(pgtable_cache_size > high) { -+ do { -+ if (pgd_quicklist) { -+ free_pgd_slow(get_pgd_fast()); -+ freed++; -+ } -+ if (pmd_quicklist) { -+ pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); -+ freed++; -+ } -+ if (pte_quicklist) { -+ pte_free_slow(pte_alloc_one_fast(NULL, 0)); -+ freed++; -+ } -+ } while(pgtable_cache_size > low); -+ } -+ return freed; -+} -+ -+void show_mem(void) -+{ -+ int i, total = 0, reserved = 0; -+ int shared = 0, cached = 0; -+ int highmem = 0; -+ -+ printk("Mem-info:\n"); -+ show_free_areas(); -+ printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); -+ i = max_mapnr; -+ while(i-- > 0) { -+ total++; -+ if(PageHighMem(mem_map + i)) -+ highmem++; -+ if(PageReserved(mem_map + i)) -+ reserved++; -+ else if(PageSwapCache(mem_map + i)) -+ cached++; -+ else if(page_count(mem_map + i)) -+ shared += page_count(mem_map + i) - 1; -+ } -+ printk("%d pages of RAM\n", total); -+ printk("%d pages of HIGHMEM\n", highmem); -+ printk("%d reserved pages\n", reserved); -+ printk("%d pages shared\n", shared); -+ printk("%d pages swap cached\n", cached); -+ printk("%ld pages in page table cache\n", pgtable_cache_size); -+ show_buffers(); -+} -+ -+/* Changed by meminfo_compat, which is a setup */ -+static int meminfo_22 = 0; -+ -+static int meminfo_compat(char *str) -+{ -+ meminfo_22 = 1; -+ return(1); -+} -+ -+__setup("22_meminfo", meminfo_compat); -+ -+void si_meminfo(struct sysinfo *val) -+{ -+ val->totalram = totalram_pages; -+ val->sharedram = 0; -+ val->freeram = nr_free_pages(); -+ val->bufferram = atomic_read(&buffermem_pages); -+ val->totalhigh = highmem >> PAGE_SHIFT; -+ val->freehigh = nr_free_highpages(); -+ val->mem_unit = PAGE_SIZE; -+ if(meminfo_22){ -+ val->freeram <<= PAGE_SHIFT; -+ val->bufferram <<= PAGE_SHIFT; -+ val->totalram <<= PAGE_SHIFT; -+ val->sharedram <<= PAGE_SHIFT; -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/mem_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/mem_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/mem_user.c 2005-05-03 22:28:14.433416000 +0300 -@@ -0,0 +1,271 @@ -+/* -+ * arch/um/kernel/mem_user.c -+ * -+ * BRIEF MODULE DESCRIPTION -+ * user side memory routines for supporting IO memory inside user mode linux -+ * -+ * Copyright (C) 2001 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN -+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <stddef.h> -+#include <stdarg.h> -+#include <unistd.h> -+#include <errno.h> -+#include <string.h> -+#include <fcntl.h> -+#include <sys/types.h> -+#include <sys/mman.h> -+#include "kern_util.h" -+#include "user.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "init.h" -+#include "os.h" -+#include "tempfile.h" -+#include "kern_constants.h" -+ -+extern struct mem_region physmem_region; -+ -+#define TEMPNAME_TEMPLATE "vm_file-XXXXXX" -+ -+static int create_tmp_file(unsigned long len) -+{ -+ int fd, err; -+ char zero; -+ -+ fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); -+ if(fd < 0) { -+ os_print_error(fd, "make_tempfile"); -+ exit(1); -+ } -+ -+ err = os_mode_fd(fd, 0777); -+ if(err < 0){ -+ os_print_error(err, "os_mode_fd"); -+ exit(1); -+ } -+ err = os_seek_file(fd, len); -+ if(err < 0){ -+ os_print_error(err, "os_seek_file"); -+ exit(1); -+ } -+ zero = 0; -+ err = os_write_file(fd, &zero, 1); -+ if(err != 1){ -+ os_print_error(err, "os_write_file"); -+ exit(1); -+ } -+ -+ return(fd); -+} -+ -+void check_tmpexec(void) -+{ -+ void *addr; -+ int err, fd = create_tmp_file(UM_KERN_PAGE_SIZE); -+ -+ addr = mmap(NULL, UM_KERN_PAGE_SIZE, -+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0); -+ printf("Checking PROT_EXEC mmap in /tmp..."); -+ fflush(stdout); -+ if(addr == MAP_FAILED){ -+ err = errno; -+ perror("failed"); -+ if(err == EPERM) -+ printf("/tmp must be not mounted noexec\n"); -+ exit(1); -+ } -+ printf("OK\n"); -+ munmap(addr, UM_KERN_PAGE_SIZE); -+} -+ -+static int have_devanon(void) -+{ -+ int fd; -+ -+ printk("Checking for /dev/anon on the host..."); -+ fd = open("/dev/anon", O_RDWR); -+ if(fd < 0){ -+ printk("Not available (open failed with errno %d)\n", errno); -+ return(0); -+ } -+ -+ printk("OK\n"); -+ return(1); -+} -+ -+static int create_anon_file(unsigned long len) -+{ -+ void *addr; -+ int fd; -+ -+ fd = open("/dev/anon", O_RDWR); -+ if(fd < 0) { -+ os_print_error(fd, "opening /dev/anon"); -+ exit(1); -+ } -+ -+ addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); -+ if(addr == MAP_FAILED){ -+ os_print_error((int) addr, "mapping physmem file"); -+ exit(1); -+ } -+ munmap(addr, len); -+ -+ return(fd); -+} -+ -+int create_mem_file(unsigned long len) -+{ -+ int err, fd; -+ -+ if(have_devanon()) -+ fd = create_anon_file(len); -+ else fd = create_tmp_file(len); -+ -+ err = os_set_exec_close(fd, 1); -+ if(err < 0) -+ os_print_error(err, "exec_close"); -+ return(fd); -+} -+ -+struct iomem_region *iomem_regions = NULL; -+int iomem_size = 0; -+ -+static int __init parse_iomem(char *str, int *add) -+{ -+ struct iomem_region *new; -+ struct uml_stat buf; -+ char *file, *driver; -+ int fd, err, size; -+ -+ driver = str; -+ file = strchr(str,','); -+ if(file == NULL){ -+ printf("parse_iomem : failed to parse iomem\n"); -+ goto out; -+ } -+ *file = '\0'; -+ file++; -+ fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); -+ if(fd < 0){ -+ os_print_error(fd, "parse_iomem - Couldn't open io file"); -+ goto out; -+ } -+ -+ err = os_stat_fd(fd, &buf); -+ if(err < 0){ -+ os_print_error(err, "parse_iomem - cannot stat_fd file"); -+ goto out_close; -+ } -+ -+ new = malloc(sizeof(*new)); -+ if(new == NULL){ -+ perror("Couldn't allocate iomem_region struct"); -+ goto out_close; -+ } -+ -+ size = (buf.ust_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); -+ -+ *new = ((struct iomem_region) { .next = iomem_regions, -+ .driver = driver, -+ .fd = fd, -+ .size = size, -+ .phys = 0, -+ .virt = 0 }); -+ iomem_regions = new; -+ iomem_size += new->size + UM_KERN_PAGE_SIZE; -+ -+ return(0); -+ out_close: -+ os_close_file(fd); -+ out: -+ return(1); -+} -+ -+__uml_setup("iomem=", parse_iomem, -+"iomem=<name>,<file>\n" -+" Configure <file> as an IO memory region named <name>.\n\n" -+); -+ -+int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, -+ int must_succeed) -+{ -+ int err; -+ -+ err = os_protect_memory((void *) addr, len, r, w, x); -+ if(err < 0){ -+ if(must_succeed) -+ panic("protect failed, err = %d", -err); -+ else return(err); -+ } -+ return(0); -+} -+ -+#if 0 -+/* Debugging facility for dumping stuff out to the host, avoiding the timing -+ * problems that come with printf and breakpoints. -+ * Enable in case of emergency. -+ */ -+ -+int logging = 1; -+int logging_fd = -1; -+ -+int logging_line = 0; -+char logging_buf[512]; -+ -+void log(char *fmt, ...) -+{ -+ va_list ap; -+ struct timeval tv; -+ struct openflags flags; -+ -+ if(logging == 0) return; -+ if(logging_fd < 0){ -+ flags = of_create(of_trunc(of_rdwr(OPENFLAGS()))); -+ logging_fd = os_open_file("log", flags, 0644); -+ } -+ gettimeofday(&tv, NULL); -+ sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec, -+ tv.tv_usec); -+ va_start(ap, fmt); -+ vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap); -+ va_end(ap); -+ write(logging_fd, logging_buf, strlen(logging_buf)); -+} -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/physmem.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/physmem.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/physmem.c 2005-05-03 22:28:14.436415544 +0300 -@@ -0,0 +1,480 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/mm.h" -+#include "linux/rbtree.h" -+#include "linux/slab.h" -+#include "linux/vmalloc.h" -+#include "linux/bootmem.h" -+#include "asm/types.h" -+#include "asm/pgtable.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "mode_kern.h" -+#include "mem.h" -+#include "mem_user.h" -+#include "os.h" -+#include "kern.h" -+#include "init.h" -+ -+struct phys_desc { -+ struct rb_node_s rb; -+ int fd; -+ __u64 offset; -+ void *virt; -+ unsigned long phys; -+ struct list_head list; -+}; -+ -+static struct rb_root_s phys_mappings = RB_ROOT; -+ -+static struct rb_node_s **find_rb(void *virt) -+{ -+ struct rb_node_s **n = &phys_mappings.rb_node; -+ struct phys_desc *d; -+ -+ while(*n != NULL){ -+ d = rb_entry(n, struct phys_desc, rb); -+ if(d->virt == virt) -+ return(n); -+ -+ if(d->virt > virt) -+ n = &(*n)->rb_left; -+ else -+ n = &(*n)->rb_right; -+ } -+ -+ return(n); -+} -+ -+static struct phys_desc *find_phys_mapping(void *virt) -+{ -+ struct rb_node_s **n = find_rb(virt); -+ -+ if(*n == NULL) -+ return(NULL); -+ -+ return(rb_entry(n, struct phys_desc, rb)); -+} -+ -+static void insert_phys_mapping(struct phys_desc *desc) -+{ -+ struct rb_node_s **n = find_rb(desc->virt); -+ -+ if(*n != NULL) -+ panic("Physical remapping for %p already present", -+ desc->virt); -+ -+ rb_link_node(&desc->rb, (*n)->rb_parent, n); -+ rb_insert_color(&desc->rb, &phys_mappings); -+} -+ -+LIST_HEAD(descriptor_mappings); -+ -+struct desc_mapping { -+ int fd; -+ struct list_head list; -+ struct list_head pages; -+}; -+ -+static struct desc_mapping *find_mapping(int fd) -+{ -+ struct desc_mapping *desc; -+ struct list_head *ele; -+ -+ list_for_each(ele, &descriptor_mappings){ -+ desc = list_entry(ele, struct desc_mapping, list); -+ if(desc->fd == fd) -+ return(desc); -+ } -+ -+ return(NULL); -+} -+ -+static struct desc_mapping *descriptor_mapping(int fd) -+{ -+ struct desc_mapping *desc; -+ -+ desc = find_mapping(fd); -+ if(desc != NULL) -+ return(desc); -+ -+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); -+ if(desc == NULL) -+ return(NULL); -+ -+ *desc = ((struct desc_mapping) -+ { .fd = fd, -+ .list = LIST_HEAD_INIT(desc->list), -+ .pages = LIST_HEAD_INIT(desc->pages) }); -+ list_add(&desc->list, &descriptor_mappings); -+ -+ return(desc); -+} -+ -+int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) -+{ -+ struct desc_mapping *fd_maps; -+ struct phys_desc *desc; -+ unsigned long phys; -+ int err; -+ -+ phys = __pa(virt); -+ desc = find_phys_mapping(virt); -+ if(desc != NULL){ -+ if((virt != desc->virt) || (fd != desc->fd) || -+ (offset != desc->offset)) -+ panic("Address 0x%p is already substituted\n", virt); -+ return(0); -+ } -+ -+ fd_maps = descriptor_mapping(fd); -+ if(fd_maps == NULL) -+ return(-ENOMEM); -+ -+ err = -ENOMEM; -+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); -+ if(desc == NULL) -+ goto out; -+ -+ *desc = ((struct phys_desc) -+ { .fd = fd, -+ .offset = offset, -+ .virt = virt, -+ .phys = __pa(virt), -+ .list = LIST_HEAD_INIT(desc->list) }); -+ insert_phys_mapping(desc); -+ -+ list_add(&desc->list, &fd_maps->pages); -+ -+ virt = (void *) ((unsigned long) virt & PAGE_MASK); -+ err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); -+ if(!err) -+ goto out; -+ -+ rb_erase(&desc->rb, &phys_mappings); -+ kfree(desc); -+ out: -+ return(err); -+} -+ -+static int physmem_fd = -1; -+ -+static void remove_mapping(struct phys_desc *desc) -+{ -+ void *virt = desc->virt; -+ int err; -+ -+ rb_erase(&desc->rb, &phys_mappings); -+ list_del(&desc->list); -+ kfree(desc); -+ -+ err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); -+ if(err) -+ panic("Failed to unmap block device page from physical memory, " -+ "errno = %d", -err); -+} -+ -+int physmem_remove_mapping(void *virt) -+{ -+ struct phys_desc *desc; -+ -+ virt = (void *) ((unsigned long) virt & PAGE_MASK); -+ desc = find_phys_mapping(virt); -+ if(desc == NULL) -+ return(0); -+ -+ remove_mapping(desc); -+ return(1); -+} -+ -+void physmem_forget_descriptor(int fd) -+{ -+ struct desc_mapping *desc; -+ struct phys_desc *page; -+ struct list_head *ele, *next; -+ __u64 offset; -+ void *addr; -+ int err; -+ -+ desc = find_mapping(fd); -+ if(desc == NULL) -+ return; -+ -+ if(!list_empty(&desc->pages)) -+ printk("Still have mapped pages on fd %d\n", fd); -+ -+ list_for_each_safe(ele, next, &desc->pages){ -+ page = list_entry(ele, struct phys_desc, list); -+ offset = page->offset; -+ addr = page->virt; -+ remove_mapping(page); -+ err = os_seek_file(fd, offset); -+ if(err) -+ panic("physmem_forget_descriptor - failed to seek " -+ "to %lld in fd %d, error = %d\n", -+ offset, fd, -err); -+ err = os_read_file(fd, addr, PAGE_SIZE); -+ if(err < 0) -+ panic("physmem_forget_descriptor - failed to read " -+ "from fd %d to 0x%p, error = %d\n", -+ fd, addr, -err); -+ } -+ -+ list_del(&desc->list); -+ kfree(desc); -+} -+ -+void arch_free_page(struct page *page, int order) -+{ -+ void *virt; -+ int i; -+ -+ for(i = 0; i < (1 << order); i++){ -+ virt = __va(page_to_phys(page + i)); -+ physmem_remove_mapping(virt); -+ } -+} -+ -+int is_remapped(const void *virt, int fd, __u64 offset) -+{ -+ struct phys_desc *desc; -+ -+ desc = find_phys_mapping((void *) virt); -+ if(desc == NULL) -+ return(0); -+ if(offset != desc->offset) -+ printk("offset mismatch\n"); -+ return(find_phys_mapping((void *) virt) != NULL); -+} -+ -+/* Changed during early boot */ -+unsigned long high_physmem; -+ -+extern unsigned long physmem_size; -+ -+void *to_virt(unsigned long phys) -+{ -+ return((void *) uml_physmem + phys); -+} -+ -+unsigned long to_phys(void *virt) -+{ -+ return(((unsigned long) virt) - uml_physmem); -+} -+ -+int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) -+{ -+ struct page *p, *map; -+ unsigned long phys_len, phys_pages, highmem_len, highmem_pages; -+ unsigned long iomem_len, iomem_pages, total_len, total_pages; -+ int i; -+ -+ phys_pages = physmem >> PAGE_SHIFT; -+ phys_len = phys_pages * sizeof(struct page); -+ -+ iomem_pages = iomem >> PAGE_SHIFT; -+ iomem_len = iomem_pages * sizeof(struct page); -+ -+ highmem_pages = highmem >> PAGE_SHIFT; -+ highmem_len = highmem_pages * sizeof(struct page); -+ -+ total_pages = phys_pages + iomem_pages + highmem_pages; -+ total_len = phys_len + iomem_pages + highmem_len; -+ -+ if(kmalloc_ok){ -+ map = kmalloc(total_len, GFP_KERNEL); -+ if(map == NULL) -+ map = vmalloc(total_len); -+ } -+ else map = alloc_bootmem_low_pages(total_len); -+ -+ if(map == NULL) -+ return(-ENOMEM); -+ -+ for(i = 0; i < total_pages; i++){ -+ p = &map[i]; -+ set_page_count(p, 0); -+ SetPageReserved(p); -+ INIT_LIST_HEAD(&p->list); -+ } -+ -+ mem_map = map; -+ max_mapnr = total_pages; -+ return(0); -+} -+ -+struct page *phys_to_page(const unsigned long phys) -+{ -+ return(&mem_map[phys >> PAGE_SHIFT]); -+} -+ -+struct page *__virt_to_page(const unsigned long virt) -+{ -+ return(&mem_map[__pa(virt) >> PAGE_SHIFT]); -+} -+ -+unsigned long page_to_phys(struct page *page) -+{ -+ return((page - mem_map) << PAGE_SHIFT); -+} -+ -+pte_t mk_pte(struct page *page, pgprot_t pgprot) -+{ -+ pte_t pte; -+ -+ pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot); -+ if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte)); -+ return(pte); -+} -+ -+/* Changed during early boot */ -+static unsigned long kmem_top = 0; -+ -+unsigned long get_kmem_end(void) -+{ -+ if(kmem_top == 0) -+ kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); -+ return(kmem_top); -+} -+ -+void map_memory(unsigned long virt, unsigned long phys, unsigned long len, -+ int r, int w, int x) -+{ -+ __u64 offset; -+ int fd, err; -+ -+ fd = phys_mapping(phys, &offset); -+ err = os_map_memory((void *) virt, fd, offset, len, r, w, x); -+ if(err) -+ panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " -+ "err = %d\n", virt, fd, offset, len, r, w, x, err); -+} -+ -+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -+ -+void setup_physmem(unsigned long start, unsigned long reserve_end, -+ unsigned long len, unsigned long highmem) -+{ -+ unsigned long reserve = reserve_end - start; -+ int pfn = PFN_UP(__pa(reserve_end)); -+ int delta = (len - reserve) >> PAGE_SHIFT; -+ int err, offset, bootmap_size; -+ -+ physmem_fd = create_mem_file(len + highmem); -+ -+ offset = uml_reserved - uml_physmem; -+ err = os_map_memory((void *) uml_reserved, physmem_fd, offset, -+ len - offset, 1, 1, 0); -+ if(err < 0){ -+ os_print_error(err, "Mapping memory"); -+ exit(1); -+ } -+ -+ bootmap_size = init_bootmem(pfn, pfn + delta); -+ free_bootmem(__pa(reserve_end) + bootmap_size, -+ len - bootmap_size - reserve); -+} -+ -+int phys_mapping(unsigned long phys, __u64 *offset_out) -+{ -+ struct phys_desc *desc = find_phys_mapping(__va(phys & PAGE_MASK)); -+ int fd = -1; -+ -+ if(desc != NULL){ -+ fd = desc->fd; -+ *offset_out = desc->offset; -+ } -+ else if(phys < physmem_size){ -+ fd = physmem_fd; -+ *offset_out = phys; -+ } -+ else if(phys < __pa(end_iomem)){ -+ struct iomem_region *region = iomem_regions; -+ -+ while(region != NULL){ -+ if((phys >= region->phys) && -+ (phys < region->phys + region->size)){ -+ fd = region->fd; -+ *offset_out = phys - region->phys; -+ break; -+ } -+ region = region->next; -+ } -+ } -+ else if(phys < __pa(end_iomem) + highmem){ -+ fd = physmem_fd; -+ *offset_out = phys - iomem_size; -+ } -+ -+ return(fd); -+} -+ -+static int __init uml_mem_setup(char *line, int *add) -+{ -+ char *retptr; -+ physmem_size = memparse(line,&retptr); -+ return 0; -+} -+__uml_setup("mem=", uml_mem_setup, -+"mem=<Amount of desired ram>\n" -+" This controls how much \"physical\" memory the kernel allocates\n" -+" for the system. The size is specified as a number followed by\n" -+" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" -+" This is not related to the amount of memory in the host. It can\n" -+" be more, and the excess, if it's ever used, will just be swapped out.\n" -+" Example: mem=64M\n\n" -+); -+ -+unsigned long find_iomem(char *driver, unsigned long *len_out) -+{ -+ struct iomem_region *region = iomem_regions; -+ -+ while(region != NULL){ -+ if(!strcmp(region->driver, driver)){ -+ *len_out = region->size; -+ return(region->virt); -+ } -+ } -+ -+ return(0); -+} -+ -+int setup_iomem(void) -+{ -+ struct iomem_region *region = iomem_regions; -+ unsigned long iomem_start = high_physmem + PAGE_SIZE; -+ int err; -+ -+ while(region != NULL){ -+ err = os_map_memory((void *) iomem_start, region->fd, 0, -+ region->size, 1, 1, 0); -+ if(err) -+ printk("Mapping iomem region for driver '%s' failed, " -+ "errno = %d\n", region->driver, -err); -+ else { -+ region->virt = iomem_start; -+ region->phys = __pa(region->virt); -+ } -+ -+ iomem_start += region->size + PAGE_SIZE; -+ region = region->next; -+ } -+ -+ return(0); -+} -+ -+__initcall(setup_iomem); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/process.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/process.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/process.c 2005-05-03 22:28:14.437415392 +0300 -@@ -0,0 +1,310 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <signal.h> -+#include <sched.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <stdlib.h> -+#include <setjmp.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <sys/mman.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include <asm/unistd.h> -+#include <asm/page.h> -+#include <asm/user.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+#include "irq_user.h" -+#include "ptrace_user.h" -+#include "time_user.h" -+#include "init.h" -+#include "os.h" -+#include "uml-config.h" -+#include "choose-mode.h" -+#include "mode.h" -+#ifdef UML_CONFIG_MODE_SKAS -+#include "skas.h" -+#include "skas_ptrace.h" -+#endif -+ -+void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) -+{ -+ int flags = 0, pages; -+ -+ if(sig_stack != NULL){ -+ pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER); -+ set_sigstack(sig_stack, pages * page_size()); -+ flags = SA_ONSTACK; -+ } -+ if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); -+} -+ -+void init_new_thread_signals(int altstack) -+{ -+ int flags = altstack ? SA_ONSTACK : 0; -+ -+ set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGILL, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGWINCH, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGUSR2, (__sighandler_t) sig_handler, -+ flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ signal(SIGHUP, SIG_IGN); -+ -+ init_irq_signals(altstack); -+} -+ -+struct tramp { -+ int (*tramp)(void *); -+ void *tramp_data; -+ unsigned long temp_stack; -+ int flags; -+ int pid; -+}; -+ -+/* See above for why sigkill is here */ -+ -+int sigkill = SIGKILL; -+ -+int outer_tramp(void *arg) -+{ -+ struct tramp *t; -+ int sig = sigkill; -+ -+ t = arg; -+ t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, -+ t->flags, t->tramp_data); -+ if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); -+ kill(os_getpid(), sig); -+ _exit(0); -+} -+ -+int start_fork_tramp(void *thread_arg, unsigned long temp_stack, -+ int clone_flags, int (*tramp)(void *)) -+{ -+ struct tramp arg; -+ unsigned long sp; -+ int new_pid, status, err; -+ -+ /* The trampoline will run on the temporary stack */ -+ sp = stack_sp(temp_stack); -+ -+ clone_flags |= CLONE_FILES | SIGCHLD; -+ -+ arg.tramp = tramp; -+ arg.tramp_data = thread_arg; -+ arg.temp_stack = temp_stack; -+ arg.flags = clone_flags; -+ -+ /* Start the process and wait for it to kill itself */ -+ new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); -+ if(new_pid < 0) -+ return(new_pid); -+ -+ CATCH_EINTR(err = waitpid(new_pid, &status, 0)); -+ if(err < 0) -+ panic("Waiting for outer trampoline failed - errno = %d", -+ errno); -+ -+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) -+ panic("outer trampoline didn't exit with SIGKILL, " -+ "status = %d", status); -+ -+ return(arg.pid); -+} -+ -+static int ptrace_child(void *arg) -+{ -+ int pid = os_getpid(); -+ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ perror("ptrace"); -+ os_kill_process(pid, 0); -+ } -+ os_stop_process(pid); -+ _exit(os_getpid() == pid); -+} -+ -+static int start_ptraced_child(void **stack_out) -+{ -+ void *stack; -+ unsigned long sp; -+ int pid, n, status; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if(stack == MAP_FAILED) -+ panic("check_ptrace : mmap failed, errno = %d", errno); -+ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); -+ if(pid < 0) -+ panic("check_ptrace : clone failed, errno = %d", errno); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if(n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) -+ panic("check_ptrace : expected SIGSTOP, got status = %d", -+ status); -+ -+ *stack_out = stack; -+ return(pid); -+} -+ -+static void stop_ptraced_child(int pid, void *stack, int exitcode) -+{ -+ int status, n; -+ -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) -+ panic("check_ptrace : ptrace failed, errno = %d", errno); -+ CATCH_EINTR(n = waitpid(pid, &status, 0)); -+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) -+ panic("check_ptrace : child exited with status 0x%x", status); -+ -+ if(munmap(stack, PAGE_SIZE) < 0) -+ panic("check_ptrace : munmap failed, errno = %d", errno); -+} -+ -+int use_sysemu = 0; -+ -+void __init check_ptrace(void) -+{ -+ void *stack; -+ int pid, syscall, n, status; -+ -+ printk("Checking that ptrace can change system call numbers..."); -+ pid = start_ptraced_child(&stack); -+ -+ while(1){ -+ if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) -+ panic("check_ptrace : ptrace failed, errno = %d", -+ errno); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if(n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("check_ptrace : expected SIGTRAP, " -+ "got status = %d", status); -+ -+ syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, -+ 0); -+ if(syscall == __NR_getpid){ -+ n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getppid); -+ if(n < 0) -+ panic("check_ptrace : failed to modify system " -+ "call, errno = %d", errno); -+ break; -+ } -+ } -+ stop_ptraced_child(pid, stack, 0); -+ printk("OK\n"); -+ -+ printk("Checking syscall emulation patch for ptrace..."); -+ pid = start_ptraced_child(&stack); -+ if(ptrace(PTRACE_SYSEMU, pid, 0, 0) >= 0) { -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if(n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("check_ptrace : expected SIGTRAP, " -+ "got status = %d", status); -+ -+ -+ n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, -+ os_getpid()); -+ if(n < 0) -+ panic("check_ptrace : failed to modify system " -+ "call return, errno = %d", errno); -+ -+ stop_ptraced_child(pid, stack, 0); -+ -+ printk("OK\n"); -+ use_sysemu = 1; -+ } -+ else { -+ printk("missing\n"); -+ stop_ptraced_child(pid, stack, 1); -+ } -+} -+ -+int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) -+{ -+ sigjmp_buf buf; -+ int n; -+ -+ *jmp_ptr = &buf; -+ n = sigsetjmp(buf, 1); -+ if(n != 0) -+ return(n); -+ (*fn)(arg); -+ return(0); -+} -+ -+int can_do_skas(void) -+{ -+#ifdef UML_CONFIG_MODE_SKAS -+ struct ptrace_faultinfo fi; -+ void *stack; -+ int pid, n, ret = 1; -+ -+ printf("Checking for the skas3 patch in the host..."); -+ pid = start_ptraced_child(&stack); -+ -+ n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); -+ if(n < 0){ -+ if(errno == EIO) -+ printf("not found\n"); -+ else printf("No (unexpected errno - %d)\n", errno); -+ ret = 0; -+ } -+ else printf("found\n"); -+ -+ init_registers(pid); -+ stop_ptraced_child(pid, stack, 1); -+ -+ printf("Checking for /proc/mm..."); -+ if(os_access("/proc/mm", OS_ACC_W_OK) < 0){ -+ printf("not found\n"); -+ ret = 0; -+ } -+ else printf("found\n"); -+ -+ return(ret); -+#else -+ return(0); -+#endif -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/process_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/process_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/process_kern.c 2005-05-03 22:28:14.439415088 +0300 -@@ -0,0 +1,413 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/interrupt.h" -+#include "linux/mm.h" -+#include "linux/slab.h" -+#include "linux/utsname.h" -+#include "linux/fs.h" -+#include "linux/utime.h" -+#include "linux/smp_lock.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/capability.h" -+#include "linux/vmalloc.h" -+#include "linux/ptrace.h" -+#include "asm/unistd.h" -+#include "asm/mman.h" -+#include "asm/segment.h" -+#include "asm/stat.h" -+#include "asm/pgtable.h" -+#include "asm/processor.h" -+#include "asm/pgalloc.h" -+#include "asm/spinlock.h" -+#include "asm/uaccess.h" -+#include "asm/user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "init.h" -+#include "irq_user.h" -+#include "mem_user.h" -+#include "time_user.h" -+#include "tlb.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "mode.h" -+#include "mode_kern.h" -+#include "choose-mode.h" -+ -+/* This is a per-cpu array. A processor only modifies its entry and it only -+ * cares about its entry, so it's OK if another processor is modifying its -+ * entry. -+ */ -+struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; -+ -+struct task_struct *get_task(int pid, int require) -+{ -+ struct task_struct *ret; -+ -+ read_lock(&tasklist_lock); -+ ret = find_task_by_pid(pid); -+ read_unlock(&tasklist_lock); -+ -+ if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); -+ return(ret); -+} -+ -+int external_pid(void *t) -+{ -+ struct task_struct *task = t ? t : current; -+ -+ return(CHOOSE_MODE_PROC(external_pid_tt, external_pid_skas, task)); -+} -+ -+int pid_to_processor_id(int pid) -+{ -+ int i; -+ -+ for(i = 0; i < smp_num_cpus; i++){ -+ if(cpu_tasks[i].pid == pid) return(i); -+ } -+ return(-1); -+} -+ -+void free_stack(unsigned long stack, int order) -+{ -+ free_pages(stack, order); -+} -+ -+unsigned long alloc_stack(int order, int atomic) -+{ -+ unsigned long page; -+ int flags = GFP_KERNEL; -+ -+ if(atomic) flags |= GFP_ATOMIC; -+ page = __get_free_pages(flags, order); -+ if(page == 0) -+ return(0); -+ stack_protections(page); -+ return(page); -+} -+ -+int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -+{ -+ int pid; -+ -+ current->thread.request.u.thread.proc = fn; -+ current->thread.request.u.thread.arg = arg; -+ pid = do_fork(CLONE_VM | flags, 0, NULL, 0); -+#if 0 /* CLONE_UNTRACED for 2.6 */ -+ pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0); -+#endif -+ if(pid < 0) -+ panic("do_fork failed in kernel_thread, errno = %d", pid); -+ return(pid); -+} -+ -+void switch_mm(struct mm_struct *prev, struct mm_struct *next, -+ struct task_struct *tsk, unsigned cpu) -+{ -+ if (prev != next) -+ clear_bit(cpu, &prev->cpu_vm_mask); -+ set_bit(cpu, &next->cpu_vm_mask); -+} -+ -+void set_current(void *t) -+{ -+ struct task_struct *task = t; -+ -+ cpu_tasks[task->processor] = ((struct cpu_task) -+ { external_pid(task), task }); -+} -+ -+void *_switch_to(void *prev, void *next) -+{ -+ return(CHOOSE_MODE(_switch_to_tt(prev, next), -+ _switch_to_skas(prev, next))); -+} -+ -+void interrupt_end(void) -+{ -+ if(current->need_resched) schedule(); -+ if(current->sigpending != 0) do_signal(0); -+} -+ -+void release_thread(struct task_struct *task) -+{ -+ CHOOSE_MODE(release_thread_tt(task), release_thread_skas(task)); -+} -+ -+void exit_thread(void) -+{ -+ CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); -+ unprotect_stack((unsigned long) current); -+} -+ -+void *get_current(void) -+{ -+ return(current); -+} -+ -+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ p->thread = (struct thread_struct) INIT_THREAD; -+ return(CHOOSE_MODE_PROC(copy_thread_tt, copy_thread_skas, nr, -+ clone_flags, sp, stack_top, p, regs)); -+} -+ -+void initial_thread_cb(void (*proc)(void *), void *arg) -+{ -+ int save_kmalloc_ok = kmalloc_ok; -+ -+ kmalloc_ok = 0; -+ CHOOSE_MODE_PROC(initial_thread_cb_tt, initial_thread_cb_skas, proc, -+ arg); -+ kmalloc_ok = save_kmalloc_ok; -+} -+ -+unsigned long stack_sp(unsigned long page) -+{ -+ return(page + PAGE_SIZE - sizeof(void *)); -+} -+ -+int current_pid(void) -+{ -+ return(current->pid); -+} -+ -+void cpu_idle(void) -+{ -+ CHOOSE_MODE(init_idle_tt(), init_idle_skas()); -+ -+ atomic_inc(&init_mm.mm_count); -+ current->mm = &init_mm; -+ current->active_mm = &init_mm; -+ -+ while(1){ -+ /* endless idle loop with no priority at all */ -+ SET_PRI(current); -+ -+ /* -+ * although we are an idle CPU, we do not want to -+ * get into the scheduler unnecessarily. -+ */ -+ if (current->need_resched) { -+ schedule(); -+ check_pgt_cache(); -+ } -+ idle_sleep(10); -+ } -+} -+ -+int page_size(void) -+{ -+ return(PAGE_SIZE); -+} -+ -+int page_mask(void) -+{ -+ return(PAGE_MASK); -+} -+ -+void *um_virt_to_phys(struct task_struct *task, unsigned long addr, -+ pte_t *pte_out) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ if(task->mm == NULL) -+ return(ERR_PTR(-EINVAL)); -+ pgd = pgd_offset(task->mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(!pmd_present(*pmd)) -+ return(ERR_PTR(-EINVAL)); -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte)) -+ return(ERR_PTR(-EINVAL)); -+ if(pte_out != NULL) -+ *pte_out = *pte; -+ return((void *) (pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK)); -+} -+ -+char *current_cmd(void) -+{ -+#if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM) -+ return("(Unknown)"); -+#else -+ void *addr = um_virt_to_phys(current, current->mm->arg_start, NULL); -+ return IS_ERR(addr) ? "(Unknown)": __va((unsigned long) addr); -+#endif -+} -+ -+void force_sigbus(void) -+{ -+ printk(KERN_ERR "Killing pid %d because of a lack of memory\n", -+ current->pid); -+ lock_kernel(); -+ sigaddset(¤t->pending.signal, SIGBUS); -+ recalc_sigpending(current); -+ current->flags |= PF_SIGNALED; -+ do_exit(SIGBUS | 0x80); -+} -+ -+void dump_thread(struct pt_regs *regs, struct user *u) -+{ -+} -+ -+void enable_hlt(void) -+{ -+ panic("enable_hlt"); -+} -+ -+void disable_hlt(void) -+{ -+ panic("disable_hlt"); -+} -+ -+extern int signal_frame_size; -+ -+void *um_kmalloc(int size) -+{ -+ return(kmalloc(size, GFP_KERNEL)); -+} -+ -+void *um_kmalloc_atomic(int size) -+{ -+ return(kmalloc(size, GFP_ATOMIC)); -+} -+ -+void *um_vmalloc(int size) -+{ -+ return(vmalloc(size)); -+} -+ -+unsigned long get_fault_addr(void) -+{ -+ return((unsigned long) current->thread.fault_addr); -+} -+ -+EXPORT_SYMBOL(get_fault_addr); -+ -+void not_implemented(void) -+{ -+ printk(KERN_DEBUG "Something isn't implemented in here\n"); -+} -+ -+EXPORT_SYMBOL(not_implemented); -+ -+int user_context(unsigned long sp) -+{ -+ unsigned long stack; -+ -+ stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); -+ return(stack != current); -+} -+ -+extern void remove_umid_dir(void); -+ -+__uml_exitcall(remove_umid_dir); -+ -+extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; -+ -+void do_uml_exitcalls(void) -+{ -+ exitcall_t *call; -+ -+ call = &__uml_exitcall_end; -+ while (--call >= &__uml_exitcall_begin) -+ (*call)(); -+} -+ -+char *uml_strdup(char *string) -+{ -+ char *new; -+ -+ new = kmalloc(strlen(string) + 1, GFP_KERNEL); -+ if(new == NULL) return(NULL); -+ strcpy(new, string); -+ return(new); -+} -+ -+void *get_init_task(void) -+{ -+ return(&init_task_union.task); -+} -+ -+int copy_to_user_proc(void *to, void *from, int size) -+{ -+ return(copy_to_user(to, from, size)); -+} -+ -+int copy_from_user_proc(void *to, void *from, int size) -+{ -+ return(copy_from_user(to, from, size)); -+} -+ -+int clear_user_proc(void *buf, int size) -+{ -+ return(clear_user(buf, size)); -+} -+ -+int strlen_user_proc(char *str) -+{ -+ return(strlen_user(str)); -+} -+ -+int smp_sigio_handler(void) -+{ -+#ifdef CONFIG_SMP -+ int cpu = current->processor; -+ -+ IPI_handler(cpu); -+ if(cpu != 0) -+ return(1); -+#endif -+ return(0); -+} -+ -+int um_in_interrupt(void) -+{ -+ return(in_interrupt()); -+} -+ -+int cpu(void) -+{ -+ return(current->processor); -+} -+ -+int singlestepping(void * t) -+{ -+ struct task_struct *task = t ? t : current; -+ -+ if ( ! (task->ptrace & PT_DTRACE) ) -+ return(0); -+ -+ if (task->thread.singlestep_syscall) -+ return(0); -+ -+ return 1; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/ptrace.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/ptrace.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/ptrace.c 2005-05-03 22:28:14.441414784 +0300 -@@ -0,0 +1,341 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/mm.h" -+#include "linux/errno.h" -+#include "linux/smp_lock.h" -+#ifdef CONFIG_PROC_MM -+#include "linux/proc_mm.h" -+#endif -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+ -+/* -+ * Called by kernel/ptrace.c when detaching.. -+ */ -+void ptrace_disable(struct task_struct *child) -+{ -+ child->ptrace &= ~PT_DTRACE; -+ child->thread.singlestep_syscall = 0; -+} -+ -+extern long do_mmap2(struct task_struct *task, unsigned long addr, -+ unsigned long len, unsigned long prot, -+ unsigned long flags, unsigned long fd, -+ unsigned long pgoff); -+ -+int sys_ptrace(long request, long pid, long addr, long data) -+{ -+ struct task_struct *child; -+ int i, ret; -+ -+ lock_kernel(); -+ ret = -EPERM; -+ if (request == PTRACE_TRACEME) { -+ /* are we already being traced? */ -+ if (current->ptrace & PT_PTRACED) -+ goto out; -+ /* set the ptrace bit in the process flags. */ -+ current->ptrace |= PT_PTRACED; -+ ret = 0; -+ goto out; -+ } -+ ret = -ESRCH; -+ read_lock(&tasklist_lock); -+ child = find_task_by_pid(pid); -+ if (child) -+ get_task_struct(child); -+ read_unlock(&tasklist_lock); -+ if (!child) -+ goto out; -+ -+ ret = -EPERM; -+ if (pid == 1) /* you may not mess with init */ -+ goto out_tsk; -+ -+ if (request == PTRACE_ATTACH) { -+ ret = ptrace_attach(child); -+ goto out_tsk; -+ } -+ -+ ret = ptrace_check_attach(child, request == PTRACE_KILL); -+ if (ret < 0) -+ goto out_tsk; -+ -+ switch (request) { -+ /* when I and D space are separate, these will need to be fixed. */ -+ case PTRACE_PEEKTEXT: /* read word at location addr. */ -+ case PTRACE_PEEKDATA: { -+ unsigned long tmp; -+ int copied; -+ -+ ret = -EIO; -+ copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); -+ if (copied != sizeof(tmp)) -+ break; -+ ret = put_user(tmp,(unsigned long *) data); -+ break; -+ } -+ -+ /* read the word at location addr in the USER area. */ -+ case PTRACE_PEEKUSR: { -+ unsigned long tmp; -+ -+ ret = -EIO; -+ if ((addr & 3) || addr < 0) -+ break; -+ -+ tmp = 0; /* Default return condition */ -+ if(addr < FRAME_SIZE_OFFSET){ -+ tmp = getreg(child, addr); -+ } -+ else if((addr >= offsetof(struct user, u_debugreg[0])) && -+ (addr <= offsetof(struct user, u_debugreg[7]))){ -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ tmp = child->thread.arch.debugregs[addr]; -+ } -+ ret = put_user(tmp, (unsigned long *) data); -+ break; -+ } -+ -+ /* when I and D space are separate, this will have to be fixed. */ -+ case PTRACE_POKETEXT: /* write the word at location addr. */ -+ case PTRACE_POKEDATA: -+ ret = -EIO; -+ if (access_process_vm(child, addr, &data, sizeof(data), -+ 1) != sizeof(data)) -+ break; -+ ret = 0; -+ break; -+ -+ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ -+ ret = -EIO; -+ if ((addr & 3) || addr < 0) -+ break; -+ -+ if (addr < FRAME_SIZE_OFFSET) { -+ ret = putreg(child, addr, data); -+ break; -+ } -+ else if((addr >= offsetof(struct user, u_debugreg[0])) && -+ (addr <= offsetof(struct user, u_debugreg[7]))){ -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ if((addr == 4) || (addr == 5)) break; -+ child->thread.arch.debugregs[addr] = data; -+ ret = 0; -+ } -+ -+ break; -+ -+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ -+ case PTRACE_CONT: { /* restart after signal. */ -+ ret = -EIO; -+ if ((unsigned long) data > _NSIG) -+ break; -+ -+ child->ptrace &= ~PT_DTRACE; -+ child->thread.singlestep_syscall = 0; -+ -+ if (request == PTRACE_SYSCALL) -+ child->ptrace |= PT_TRACESYS; -+ else -+ child->ptrace &= ~PT_TRACESYS; -+ child->exit_code = data; -+ wake_up_process(child); -+ ret = 0; -+ break; -+ } -+ -+/* -+ * make the child exit. Best I can do is send it a sigkill. -+ * perhaps it should be put in the status that it wants to -+ * exit. -+ */ -+ case PTRACE_KILL: { -+ ret = 0; -+ if (child->state == TASK_ZOMBIE) /* already dead */ -+ break; -+ -+ child->ptrace &= ~PT_DTRACE; -+ child->thread.singlestep_syscall = 0; -+ child->exit_code = SIGKILL; -+ wake_up_process(child); -+ break; -+ } -+ -+ case PTRACE_SINGLESTEP: { /* set the trap flag. */ -+ ret = -EIO; -+ if ((unsigned long) data > _NSIG) -+ break; -+ child->ptrace &= ~PT_TRACESYS; -+ child->ptrace |= PT_DTRACE; -+ child->thread.singlestep_syscall = 0; -+ child->exit_code = data; -+ /* give it a chance to run. */ -+ wake_up_process(child); -+ ret = 0; -+ break; -+ } -+ -+ case PTRACE_DETACH: -+ /* detach a process that was attached. */ -+ ret = ptrace_detach(child, data); -+ break; -+ -+#ifdef PTRACE_GETREGS -+ case PTRACE_GETREGS: { /* Get all gp regs from the child. */ -+ if (!access_ok(VERIFY_WRITE, (unsigned long *)data, -+ FRAME_SIZE_OFFSET)) { -+ ret = -EIO; -+ break; -+ } -+ for ( i = 0; i < FRAME_SIZE_OFFSET; i += sizeof(long) ) { -+ __put_user(getreg(child, i), (unsigned long *) data); -+ data += sizeof(long); -+ } -+ ret = 0; -+ break; -+ } -+#endif -+#ifdef PTRACE_SETREGS -+ case PTRACE_SETREGS: { /* Set all gp regs in the child. */ -+ unsigned long tmp = 0; -+ if (!access_ok(VERIFY_READ, (unsigned *)data, -+ FRAME_SIZE_OFFSET)) { -+ ret = -EIO; -+ break; -+ } -+ for ( i = 0; i < FRAME_SIZE_OFFSET; i += sizeof(long) ) { -+ __get_user(tmp, (unsigned long *) data); -+ putreg(child, i, tmp); -+ data += sizeof(long); -+ } -+ ret = 0; -+ break; -+ } -+#endif -+#ifdef PTRACE_GETFPREGS -+ case PTRACE_GETFPREGS: /* Get the child FPU state. */ -+ ret = get_fpregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_SETFPREGS -+ case PTRACE_SETFPREGS: /* Set the child FPU state. */ -+ ret = set_fpregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_GETFPXREGS -+ case PTRACE_GETFPXREGS: /* Get the child FPU state. */ -+ ret = get_fpxregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_SETFPXREGS -+ case PTRACE_SETFPXREGS: /* Set the child FPU state. */ -+ ret = set_fpxregs(data, child); -+ break; -+#endif -+ case PTRACE_FAULTINFO: { -+ struct ptrace_faultinfo fault; -+ -+ fault = ((struct ptrace_faultinfo) -+ { .is_write = child->thread.err, -+ .addr = child->thread.cr2 }); -+ ret = copy_to_user((unsigned long *) data, &fault, -+ sizeof(fault)); -+ if(ret) -+ break; -+ break; -+ } -+ case PTRACE_SIGPENDING: -+ ret = copy_to_user((unsigned long *) data, -+ &child->pending.signal, -+ sizeof(child->pending.signal)); -+ break; -+ -+ case PTRACE_LDT: { -+ struct ptrace_ldt ldt; -+ -+ if(copy_from_user(&ldt, (unsigned long *) data, -+ sizeof(ldt))){ -+ ret = -EIO; -+ break; -+ } -+ -+ /* This one is confusing, so just punt and return -EIO for -+ * now -+ */ -+ ret = -EIO; -+ break; -+ } -+#ifdef CONFIG_PROC_MM -+ case PTRACE_SWITCH_MM: { -+ struct mm_struct *old = child->mm; -+ struct mm_struct *new = proc_mm_get_mm(data); -+ -+ if(IS_ERR(new)){ -+ ret = PTR_ERR(new); -+ break; -+ } -+ -+ atomic_inc(&new->mm_users); -+ child->mm = new; -+ child->active_mm = new; -+ mmput(old); -+ ret = 0; -+ break; -+ } -+#endif -+ default: -+ ret = -EIO; -+ break; -+ } -+ out_tsk: -+ free_task_struct(child); -+ out: -+ unlock_kernel(); -+ return ret; -+} -+ -+void syscall_trace(void) -+{ -+ int is_singlestep = (current->ptrace & PT_DTRACE); -+ -+ if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) -+ != (PT_PTRACED|PT_TRACESYS) && !is_singlestep) -+ return; -+ current->exit_code = SIGTRAP; -+ current->state = TASK_STOPPED; -+ notify_parent(current, SIGCHLD); -+ -+ schedule(); -+ /* -+ * this isn't the same as continuing with a signal, but it will do -+ * for normal use. strace only continues with a signal if the -+ * stopping signal is not SIGTRAP. -brl -+ */ -+ if (current->exit_code) { -+ send_sig(current->exit_code, current, 1); -+ current->exit_code = 0; -+ } -+ -+ if(is_syscall(PT_REGS_IP(¤t->thread.regs))) -+ current->thread.singlestep_syscall = 1; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/reboot.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/reboot.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/reboot.c 2005-05-03 22:28:14.441414784 +0300 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "os.h" -+#include "mode.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_SMP -+static void kill_idlers(int me) -+{ -+#ifdef CONFIG_MODE_TT -+ struct task_struct *p; -+ int i; -+ -+ for(i = 0; i < sizeof(init_tasks)/sizeof(init_tasks[0]); i++){ -+ p = init_tasks[i]; -+ if((p != NULL) && (p->thread.mode.tt.extern_pid != me) && -+ (p->thread.mode.tt.extern_pid != -1)) -+ os_kill_process(p->thread.mode.tt.extern_pid, 0); -+ } -+#endif -+} -+#endif -+ -+static void kill_off_processes(void) -+{ -+ CHOOSE_MODE(kill_off_processes_tt(), kill_off_processes_skas()); -+#ifdef CONFIG_SMP -+ kill_idlers(os_getpid()); -+#endif -+} -+ -+void uml_cleanup(void) -+{ -+ kill_off_processes(); -+ do_uml_exitcalls(); -+} -+ -+void machine_restart(char * __unused) -+{ -+ do_uml_exitcalls(); -+ kill_off_processes(); -+ CHOOSE_MODE(reboot_tt(), reboot_skas()); -+} -+ -+void machine_power_off(void) -+{ -+ do_uml_exitcalls(); -+ kill_off_processes(); -+ CHOOSE_MODE(halt_tt(), halt_skas()); -+} -+ -+void machine_halt(void) -+{ -+ machine_power_off(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/resource.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/resource.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/resource.c 2005-05-03 22:28:14.442414632 +0300 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/pci.h" -+ -+unsigned long resource_fixup(struct pci_dev * dev, struct resource * res, -+ unsigned long start, unsigned long size) -+{ -+ return start; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/sigio_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/sigio_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/sigio_kern.c 2005-05-03 22:28:14.443414480 +0300 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/list.h" -+#include "linux/slab.h" -+#include "asm/irq.h" -+#include "init.h" -+#include "sigio.h" -+#include "irq_user.h" -+#include "irq_kern.h" -+ -+/* Protected by sigio_lock() called from write_sigio_workaround */ -+static int sigio_irq_fd = -1; -+ -+static void sigio_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ read_sigio_fd(sigio_irq_fd); -+ reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); -+} -+ -+int write_sigio_irq(int fd) -+{ -+ int err; -+ -+ err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, -+ SA_INTERRUPT | SA_SAMPLE_RANDOM, "write sigio", -+ NULL); -+ if(err){ -+ printk("write_sigio_irq : um_request_irq failed, err = %d\n", -+ err); -+ return(-1); -+ } -+ sigio_irq_fd = fd; -+ return(0); -+} -+ -+static spinlock_t sigio_spinlock = SPIN_LOCK_UNLOCKED; -+ -+void sigio_lock(void) -+{ -+ spin_lock(&sigio_spinlock); -+} -+ -+void sigio_unlock(void) -+{ -+ spin_unlock(&sigio_spinlock); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/sigio_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/sigio_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/sigio_user.c 2005-05-03 22:28:14.445414176 +0300 -@@ -0,0 +1,436 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdlib.h> -+#include <termios.h> -+#include <pty.h> -+#include <signal.h> -+#include <errno.h> -+#include <string.h> -+#include <sched.h> -+#include <sys/socket.h> -+#include <sys/poll.h> -+#include "init.h" -+#include "user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "sigio.h" -+#include "helper.h" -+#include "os.h" -+ -+/* Changed during early boot */ -+int pty_output_sigio = 0; -+int pty_close_sigio = 0; -+ -+/* Used as a flag during SIGIO testing early in boot */ -+static volatile int got_sigio = 0; -+ -+void __init handler(int sig) -+{ -+ got_sigio = 1; -+} -+ -+struct openpty_arg { -+ int master; -+ int slave; -+ int err; -+}; -+ -+static void openpty_cb(void *arg) -+{ -+ struct openpty_arg *info = arg; -+ -+ info->err = 0; -+ if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) -+ info->err = -errno; -+} -+ -+void __init check_one_sigio(void (*proc)(int, int)) -+{ -+ struct sigaction old, new; -+ struct openpty_arg pty = { .master = -1, .slave = -1 }; -+ int master, slave, err; -+ -+ initial_thread_cb(openpty_cb, &pty); -+ if(pty.err){ -+ printk("openpty failed, errno = %d\n", -pty.err); -+ return; -+ } -+ -+ master = pty.master; -+ slave = pty.slave; -+ -+ if((master == -1) || (slave == -1)){ -+ printk("openpty failed to allocate a pty\n"); -+ return; -+ } -+ -+ /* Not now, but complain so we now where we failed. */ -+ err = raw(master); -+ if (err < 0) -+ panic("check_sigio : __raw failed, errno = %d\n", -err); -+ -+ err = os_sigio_async(master, slave); -+ if(err < 0) -+ panic("tty_fds : sigio_async failed, err = %d\n", -err); -+ -+ if(sigaction(SIGIO, NULL, &old) < 0) -+ panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); -+ new = old; -+ new.sa_handler = handler; -+ if(sigaction(SIGIO, &new, NULL) < 0) -+ panic("check_sigio : sigaction 2 failed, errno = %d\n", errno); -+ -+ got_sigio = 0; -+ (*proc)(master, slave); -+ -+ os_close_file(master); -+ os_close_file(slave); -+ -+ if(sigaction(SIGIO, &old, NULL) < 0) -+ panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); -+} -+ -+static void tty_output(int master, int slave) -+{ -+ int n; -+ char buf[512]; -+ -+ printk("Checking that host ptys support output SIGIO..."); -+ -+ memset(buf, 0, sizeof(buf)); -+ -+ while(os_write_file(master, buf, sizeof(buf)) > 0) ; -+ if(errno != EAGAIN) -+ panic("check_sigio : write failed, errno = %d\n", errno); -+ while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; -+ -+ if(got_sigio){ -+ printk("Yes\n"); -+ pty_output_sigio = 1; -+ } -+ else if(n == -EAGAIN) printk("No, enabling workaround\n"); -+ else panic("check_sigio : read failed, err = %d\n", n); -+} -+ -+static void tty_close(int master, int slave) -+{ -+ printk("Checking that host ptys support SIGIO on close..."); -+ -+ os_close_file(slave); -+ if(got_sigio){ -+ printk("Yes\n"); -+ pty_close_sigio = 1; -+ } -+ else printk("No, enabling workaround\n"); -+} -+ -+void __init check_sigio(void) -+{ -+ if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && -+ (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ -+ printk("No pseudo-terminals available - skipping pty SIGIO " -+ "check\n"); -+ return; -+ } -+ check_one_sigio(tty_output); -+ check_one_sigio(tty_close); -+} -+ -+/* Protected by sigio_lock(), also used by sigio_cleanup, which is an -+ * exitcall. -+ */ -+static int write_sigio_pid = -1; -+ -+/* These arrays are initialized before the sigio thread is started, and -+ * the descriptors closed after it is killed. So, it can't see them change. -+ * On the UML side, they are changed under the sigio_lock. -+ */ -+static int write_sigio_fds[2] = { -1, -1 }; -+static int sigio_private[2] = { -1, -1 }; -+ -+struct pollfds { -+ struct pollfd *poll; -+ int size; -+ int used; -+}; -+ -+/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread -+ * synchronizes with it. -+ */ -+struct pollfds current_poll = { -+ .poll = NULL, -+ .size = 0, -+ .used = 0 -+}; -+ -+struct pollfds next_poll = { -+ .poll = NULL, -+ .size = 0, -+ .used = 0 -+}; -+ -+static int write_sigio_thread(void *unused) -+{ -+ struct pollfds *fds, tmp; -+ struct pollfd *p; -+ int i, n, respond_fd; -+ char c; -+ -+ fds = ¤t_poll; -+ while(1){ -+ n = poll(fds->poll, fds->used, -1); -+ if(n < 0){ -+ if(errno == EINTR) continue; -+ printk("write_sigio_thread : poll returned %d, " -+ "errno = %d\n", n, errno); -+ } -+ for(i = 0; i < fds->used; i++){ -+ p = &fds->poll[i]; -+ if(p->revents == 0) continue; -+ if(p->fd == sigio_private[1]){ -+ n = os_read_file(sigio_private[1], &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("write_sigio_thread : " -+ "read failed, err = %d\n", -n); -+ tmp = current_poll; -+ current_poll = next_poll; -+ next_poll = tmp; -+ respond_fd = sigio_private[1]; -+ } -+ else { -+ respond_fd = write_sigio_fds[1]; -+ fds->used--; -+ memmove(&fds->poll[i], &fds->poll[i + 1], -+ (fds->used - i) * sizeof(*fds->poll)); -+ } -+ -+ n = os_write_file(respond_fd, &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("write_sigio_thread : write failed, " -+ "err = %d\n", -n); -+ } -+ } -+} -+ -+static int need_poll(int n) -+{ -+ if(n <= next_poll.size){ -+ next_poll.used = n; -+ return(0); -+ } -+ if(next_poll.poll != NULL) kfree(next_poll.poll); -+ next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); -+ if(next_poll.poll == NULL){ -+ printk("need_poll : failed to allocate new pollfds\n"); -+ next_poll.size = 0; -+ next_poll.used = 0; -+ return(-1); -+ } -+ next_poll.size = n; -+ next_poll.used = n; -+ return(0); -+} -+ -+static void update_thread(void) -+{ -+ unsigned long flags; -+ int n; -+ char c; -+ -+ flags = set_signals(0); -+ n = os_write_file(sigio_private[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("update_thread : write failed, err = %d\n", -n); -+ goto fail; -+ } -+ -+ n = os_read_file(sigio_private[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("update_thread : read failed, err = %d\n", -n); -+ goto fail; -+ } -+ -+ set_signals(flags); -+ return; -+ fail: -+ sigio_lock(); -+ if(write_sigio_pid != -1) -+ os_kill_process(write_sigio_pid, 1); -+ write_sigio_pid = -1; -+ os_close_file(sigio_private[0]); -+ os_close_file(sigio_private[1]); -+ os_close_file(write_sigio_fds[0]); -+ os_close_file(write_sigio_fds[1]); -+ sigio_unlock(); -+ set_signals(flags); -+} -+ -+int add_sigio_fd(int fd, int read) -+{ -+ int err = 0, i, n, events; -+ -+ sigio_lock(); -+ for(i = 0; i < current_poll.used; i++){ -+ if(current_poll.poll[i].fd == fd) -+ goto out; -+ } -+ -+ n = current_poll.used + 1; -+ err = need_poll(n); -+ if(err) -+ goto out; -+ -+ for(i = 0; i < current_poll.used; i++) -+ next_poll.poll[i] = current_poll.poll[i]; -+ -+ if(read) events = POLLIN; -+ else events = POLLOUT; -+ -+ next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd, -+ .events = events, -+ .revents = 0 }); -+ update_thread(); -+ out: -+ sigio_unlock(); -+ return(err); -+} -+ -+int ignore_sigio_fd(int fd) -+{ -+ struct pollfd *p; -+ int err = 0, i, n = 0; -+ -+ sigio_lock(); -+ for(i = 0; i < current_poll.used; i++){ -+ if(current_poll.poll[i].fd == fd) break; -+ } -+ if(i == current_poll.used) -+ goto out; -+ -+ err = need_poll(current_poll.used - 1); -+ if(err) -+ goto out; -+ -+ for(i = 0; i < current_poll.used; i++){ -+ p = ¤t_poll.poll[i]; -+ if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i]; -+ } -+ if(n == i){ -+ printk("ignore_sigio_fd : fd %d not found\n", fd); -+ err = -1; -+ goto out; -+ } -+ -+ update_thread(); -+ out: -+ sigio_unlock(); -+ return(err); -+} -+ -+static int setup_initial_poll(int fd) -+{ -+ struct pollfd *p; -+ -+ p = um_kmalloc(sizeof(struct pollfd)); -+ if(p == NULL){ -+ printk("setup_initial_poll : failed to allocate poll\n"); -+ return(-1); -+ } -+ *p = ((struct pollfd) { .fd = fd, -+ .events = POLLIN, -+ .revents = 0 }); -+ current_poll = ((struct pollfds) { .poll = p, -+ .used = 1, -+ .size = 1 }); -+ return(0); -+} -+ -+void write_sigio_workaround(void) -+{ -+ unsigned long stack; -+ int err; -+ -+ sigio_lock(); -+ if(write_sigio_pid != -1) -+ goto out; -+ -+ err = os_pipe(write_sigio_fds, 1, 1); -+ if(err < 0){ -+ printk("write_sigio_workaround - os_pipe 1 failed, " -+ "err = %d\n", -err); -+ goto out; -+ } -+ err = os_pipe(sigio_private, 1, 1); -+ if(err < 0){ -+ printk("write_sigio_workaround - os_pipe 2 failed, " -+ "err = %d\n", -err); -+ goto out_close1; -+ } -+ if(setup_initial_poll(sigio_private[1])) -+ goto out_close2; -+ -+ write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, -+ CLONE_FILES | CLONE_VM, &stack, 0); -+ -+ if(write_sigio_pid < 0) goto out_close2; -+ -+ if(write_sigio_irq(write_sigio_fds[0])) -+ goto out_kill; -+ -+ out: -+ sigio_unlock(); -+ return; -+ -+ out_kill: -+ os_kill_process(write_sigio_pid, 1); -+ write_sigio_pid = -1; -+ out_close2: -+ os_close_file(sigio_private[0]); -+ os_close_file(sigio_private[1]); -+ out_close1: -+ os_close_file(write_sigio_fds[0]); -+ os_close_file(write_sigio_fds[1]); -+ sigio_unlock(); -+} -+ -+int read_sigio_fd(int fd) -+{ -+ int n; -+ char c; -+ -+ n = os_read_file(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ if(n < 0) { -+ printk("read_sigio_fd - read failed, err = %d\n", -n); -+ return(n); -+ } -+ else { -+ printk("read_sigio_fd - short read, bytes = %d\n", n); -+ return(-EIO); -+ } -+ } -+ return(n); -+} -+ -+static void sigio_cleanup(void) -+{ -+ if(write_sigio_pid != -1) -+ os_kill_process(write_sigio_pid, 1); -+} -+ -+__uml_exitcall(sigio_cleanup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/signal_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/signal_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/signal_kern.c 2005-05-03 22:28:14.447413872 +0300 -@@ -0,0 +1,368 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/stddef.h" -+#include "linux/sys.h" -+#include "linux/sched.h" -+#include "linux/wait.h" -+#include "linux/kernel.h" -+#include "linux/smp_lock.h" -+#include "linux/module.h" -+#include "linux/slab.h" -+#include "asm/signal.h" -+#include "asm/uaccess.h" -+#include "asm/ucontext.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "kern.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "mode.h" -+ -+EXPORT_SYMBOL(block_signals); -+EXPORT_SYMBOL(unblock_signals); -+ -+static void force_segv(int sig) -+{ -+ if(sig == SIGSEGV){ -+ struct k_sigaction *ka; -+ -+ ka = ¤t->sig->action[SIGSEGV - 1]; -+ ka->sa.sa_handler = SIG_DFL; -+ } -+ force_sig(SIGSEGV, current); -+} -+ -+#define _S(nr) (1<<((nr)-1)) -+ -+#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) -+ -+/* -+ * OK, we're invoking a handler -+ */ -+static void handle_signal(struct pt_regs *regs, unsigned long signr, -+ struct k_sigaction *ka, siginfo_t *info, -+ sigset_t *oldset, int error) -+{ -+ __sighandler_t handler; -+ void (*restorer)(void); -+ unsigned long sp; -+ sigset_t save; -+ int err, ret; -+ -+ err = PT_REGS_SYSCALL_RET(¤t->thread.regs); -+ ret = 0; -+ switch(err){ -+ case -ERESTARTNOHAND: -+ ret = -EINTR; -+ break; -+ -+ case -ERESTARTSYS: -+ if (!(ka->sa.sa_flags & SA_RESTART)) { -+ ret = -EINTR; -+ break; -+ } -+ /* fallthrough */ -+ case -ERESTARTNOINTR: -+ PT_REGS_RESTART_SYSCALL(regs); -+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); -+ -+ /* This is because of the UM_SET_SYSCALL_RETURN and the fact -+ * that on i386 the system call number and return value are -+ * in the same register. When the system call restarts, %eax -+ * had better have the system call number in it. Since the -+ * return value doesn't matter (except that it shouldn't be -+ * -ERESTART*), we'll stick the system call number there. -+ */ -+ ret = PT_REGS_SYSCALL_NR(regs); -+ break; -+ } -+ -+ handler = ka->sa.sa_handler; -+ save = *oldset; -+ -+ if (ka->sa.sa_flags & SA_ONESHOT) -+ ka->sa.sa_handler = SIG_DFL; -+ -+ if (!(ka->sa.sa_flags & SA_NODEFER)) { -+ spin_lock_irq(¤t->sigmask_lock); -+ sigorsets(¤t->blocked, ¤t->blocked, -+ &ka->sa.sa_mask); -+ sigaddset(¤t->blocked, signr); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ } -+ -+ sp = PT_REGS_SP(regs); -+ -+ if((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0)) -+ sp = current->sas_ss_sp + current->sas_ss_size; -+ -+ if(error != 0) -+ PT_REGS_SET_SYSCALL_RETURN(regs, ret); -+ -+ if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; -+ else restorer = NULL; -+ -+ if(ka->sa.sa_flags & SA_SIGINFO) -+ err = setup_signal_stack_si(sp, signr, (unsigned long) handler, -+ restorer, regs, info, &save); -+ else -+ err = setup_signal_stack_sc(sp, signr, (unsigned long) handler, -+ restorer, regs, &save); -+ if(err) -+ force_segv(signr); -+} -+ -+/* -+ * Note that 'init' is a special process: it doesn't get signals it doesn't -+ * want to handle. Thus you cannot kill init even with a SIGKILL even by -+ * mistake. -+ */ -+ -+static int kern_do_signal(struct pt_regs *regs, sigset_t *oldset, int error) -+{ -+ siginfo_t info; -+ struct k_sigaction *ka; -+ -+ if (!oldset) -+ oldset = ¤t->blocked; -+ -+ for (;;) { -+ unsigned long signr; -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ signr = dequeue_signal(¤t->blocked, &info); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ if (!signr) -+ break; -+ -+ if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { -+ /* Let the debugger run. */ -+ current->exit_code = signr; -+ current->state = TASK_STOPPED; -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ -+ /* We're back. Did the debugger cancel the sig? */ -+ signr = current->exit_code; -+ if (!signr) -+ continue; -+ current->exit_code = 0; -+ -+ /* The debugger continued. Ignore SIGSTOP. */ -+ if (signr == SIGSTOP) -+ continue; -+ -+ /* Update the siginfo structure. Is this good? */ -+ if (signr != info.si_signo) { -+ info.si_signo = signr; -+ info.si_errno = 0; -+ info.si_code = SI_USER; -+ info.si_pid = current->p_pptr->pid; -+ info.si_uid = current->p_pptr->uid; -+ } -+ -+ /* If the (new) signal is now blocked, requeue it. */ -+ if (sigismember(¤t->blocked, signr)) { -+ send_sig_info(signr, &info, current); -+ continue; -+ } -+ } -+ -+ ka = ¤t->sig->action[signr-1]; -+ if (ka->sa.sa_handler == SIG_IGN) { -+ if (signr != SIGCHLD) -+ continue; -+ /* Check for SIGCHLD: it's special. */ -+ while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) -+ /* nothing */; -+ continue; -+ } -+ -+ if (ka->sa.sa_handler == SIG_DFL) { -+ int exit_code = signr; -+ -+ /* Init gets no signals it doesn't want. */ -+ if (current->pid == 1) -+ continue; -+ -+ switch (signr) { -+ case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: -+ continue; -+ -+ case SIGTSTP: case SIGTTIN: case SIGTTOU: -+ if (is_orphaned_pgrp(current->pgrp)) -+ continue; -+ /* FALLTHRU */ -+ -+ case SIGSTOP: { -+ struct signal_struct *sig; -+ current->state = TASK_STOPPED; -+ current->exit_code = signr; -+ sig = current->p_pptr->sig; -+ if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ continue; -+ } -+ case SIGQUIT: case SIGILL: case SIGTRAP: -+ case SIGABRT: case SIGFPE: case SIGSEGV: -+ case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: -+ if (do_coredump(signr, ¤t->thread.regs)) -+ exit_code |= 0x80; -+ /* FALLTHRU */ -+ -+ default: -+ sig_exit(signr, exit_code, &info); -+ /* NOTREACHED */ -+ } -+ } -+ -+ /* Whee! Actually deliver the signal. */ -+ handle_signal(regs, signr, ka, &info, oldset, error); -+ return(1); -+ } -+ -+ /* Did we come from a system call? */ -+ if(PT_REGS_SYSCALL_NR(regs) >= 0){ -+ /* Restart the system call - no handlers present */ -+ if(PT_REGS_SYSCALL_RET(regs) == -ERESTARTNOHAND || -+ PT_REGS_SYSCALL_RET(regs) == -ERESTARTSYS || -+ PT_REGS_SYSCALL_RET(regs) == -ERESTARTNOINTR){ -+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); -+ PT_REGS_RESTART_SYSCALL(regs); -+ } -+ } -+ -+ /* This closes a way to execute a system call on the host. If -+ * you set a breakpoint on a system call instruction and singlestep -+ * from it, the tracing thread used to PTRACE_SINGLESTEP the process -+ * rather than PTRACE_SYSCALL it, allowing the system call to execute -+ * on the host. The tracing thread will check this flag and -+ * PTRACE_SYSCALL if necessary. -+ */ -+ if((current->ptrace & PT_DTRACE) && -+ is_syscall(PT_REGS_IP(¤t->thread.regs))) -+ current->thread.singlestep_syscall = 1; -+ -+ return(0); -+} -+ -+int do_signal(int error) -+{ -+ return(kern_do_signal(¤t->thread.regs, NULL, error)); -+} -+ -+/* -+ * Atomically swap in the new signal mask, and wait for a signal. -+ */ -+int sys_sigsuspend(int history0, int history1, old_sigset_t mask) -+{ -+ sigset_t saveset; -+ -+ mask &= _BLOCKABLE; -+ spin_lock_irq(¤t->sigmask_lock); -+ saveset = current->blocked; -+ siginitset(¤t->blocked, mask); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ PT_REGS_SYSCALL_RET(¤t->thread.regs) = -EINTR; -+ while (1) { -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ if(kern_do_signal(¤t->thread.regs, &saveset, -EINTR)) -+ return(-EINTR); -+ } -+} -+ -+int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) -+{ -+ sigset_t saveset, newset; -+ -+ /* XXX: Don't preclude handling different sized sigset_t's. */ -+ if (sigsetsize != sizeof(sigset_t)) -+ return -EINVAL; -+ -+ if (copy_from_user(&newset, unewset, sizeof(newset))) -+ return -EFAULT; -+ sigdelsetmask(&newset, ~_BLOCKABLE); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ saveset = current->blocked; -+ current->blocked = newset; -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ PT_REGS_SYSCALL_RET(¤t->thread.regs) = -EINTR; -+ while (1) { -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ if (kern_do_signal(¤t->thread.regs, &saveset, -EINTR)) -+ return(-EINTR); -+ } -+} -+ -+extern int userspace_pid[]; -+ -+static int copy_sc_from_user(struct pt_regs *to, void *from, -+ struct arch_frame_data *arch) -+{ -+ int ret; -+ -+ ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch), -+ copy_sc_from_user_skas(userspace_pid[0], -+ &to->regs, from)); -+ return(ret); -+} -+ -+int sys_sigreturn(struct pt_regs regs) -+{ -+ void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); -+ void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); -+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ copy_from_user(¤t->blocked.sig[0], sc_sigmask(sc), -+ sizeof(current->blocked.sig[0])); -+ copy_from_user(¤t->blocked.sig[1], mask, sig_size); -+ sigdelsetmask(¤t->blocked, ~_BLOCKABLE); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ copy_sc_from_user(¤t->thread.regs, sc, -+ &signal_frame_sc.common.arch); -+ return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); -+} -+ -+int sys_rt_sigreturn(struct pt_regs regs) -+{ -+ struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs)); -+ int sig_size = _NSIG_WORDS * sizeof(unsigned long); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ copy_from_user(¤t->blocked, &uc->uc_sigmask, sig_size); -+ sigdelsetmask(¤t->blocked, ~_BLOCKABLE); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, -+ &signal_frame_si.common.arch); -+ return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/signal_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/signal_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/signal_user.c 2005-05-03 22:28:14.448413720 +0300 -@@ -0,0 +1,142 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stdlib.h> -+#include <signal.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <string.h> -+#include <sys/mman.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "signal_user.h" -+#include "signal_kern.h" -+#include "sysdep/sigcontext.h" -+#include "sigcontext.h" -+ -+void set_sigstack(void *sig_stack, int size) -+{ -+ stack_t stack = ((stack_t) { .ss_flags = 0, -+ .ss_sp = (__ptr_t) sig_stack, -+ .ss_size = size - sizeof(void *) }); -+ -+ if(sigaltstack(&stack, NULL) != 0) -+ panic("enabling signal stack failed, errno = %d\n", errno); -+} -+ -+void set_handler(int sig, void (*handler)(int), int flags, ...) -+{ -+ struct sigaction action; -+ va_list ap; -+ int mask; -+ -+ va_start(ap, flags); -+ action.sa_handler = handler; -+ sigemptyset(&action.sa_mask); -+ while((mask = va_arg(ap, int)) != -1){ -+ sigaddset(&action.sa_mask, mask); -+ } -+ action.sa_flags = flags; -+ action.sa_restorer = NULL; -+ if(sigaction(sig, &action, NULL) < 0) -+ panic("sigaction failed"); -+} -+ -+int change_sig(int signal, int on) -+{ -+ sigset_t sigset, old; -+ -+ sigemptyset(&sigset); -+ sigaddset(&sigset, signal); -+ sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); -+ return(!sigismember(&old, signal)); -+} -+ -+static void change_signals(int type) -+{ -+ sigset_t mask; -+ -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ sigaddset(&mask, SIGIO); -+ sigaddset(&mask, SIGPROF); -+ if(sigprocmask(type, &mask, NULL) < 0) -+ panic("Failed to change signal mask - errno = %d", errno); -+} -+ -+void block_signals(void) -+{ -+ change_signals(SIG_BLOCK); -+} -+ -+void unblock_signals(void) -+{ -+ change_signals(SIG_UNBLOCK); -+} -+ -+#define SIGIO_BIT 0 -+#define SIGVTALRM_BIT 1 -+ -+static int enable_mask(sigset_t *mask) -+{ -+ int sigs; -+ -+ sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT; -+ sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT; -+ sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT; -+ return(sigs); -+} -+ -+int get_signals(void) -+{ -+ sigset_t mask; -+ -+ if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0) -+ panic("Failed to get signal mask"); -+ return(enable_mask(&mask)); -+} -+ -+int set_signals(int enable) -+{ -+ sigset_t mask; -+ int ret; -+ -+ sigemptyset(&mask); -+ if(enable & (1 << SIGIO_BIT)) -+ sigaddset(&mask, SIGIO); -+ if(enable & (1 << SIGVTALRM_BIT)){ -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ } -+ if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0) -+ panic("Failed to enable signals"); -+ ret = enable_mask(&mask); -+ sigemptyset(&mask); -+ if((enable & (1 << SIGIO_BIT)) == 0) -+ sigaddset(&mask, SIGIO); -+ if((enable & (1 << SIGVTALRM_BIT)) == 0){ -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ } -+ if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0) -+ panic("Failed to block signals"); -+ -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/exec_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/exec_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/exec_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,41 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "asm/current.h" -+#include "asm/page.h" -+#include "asm/signal.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/mmu_context.h" -+#include "tlb.h" -+#include "skas.h" -+#include "mmu.h" -+#include "os.h" -+ -+void flush_thread_skas(void) -+{ -+ force_flush_all(); -+ switch_mm_skas(current->mm->context.skas.mm_fd); -+} -+ -+void start_thread_skas(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp) -+{ -+ set_fs(USER_DS); -+ PT_REGS_IP(regs) = eip; -+ PT_REGS_SP(regs) = esp; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/exec_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/exec_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/exec_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,63 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <signal.h> -+#include <sched.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include "user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "time_user.h" -+ -+static int user_thread_tramp(void *arg) -+{ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) -+ panic("user_thread_tramp - PTRACE_TRACEME failed, " -+ "errno = %d\n", errno); -+ enable_timer(); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+int user_thread(unsigned long stack, int flags) -+{ -+ int pid, status, err; -+ -+ pid = clone(user_thread_tramp, (void *) stack_sp(stack), -+ flags | CLONE_FILES | SIGCHLD, NULL); -+ if(pid < 0){ -+ printk("user_thread - clone failed, errno = %d\n", errno); -+ return(pid); -+ } -+ -+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); -+ if(err < 0){ -+ printk("user_thread - waitpid failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ printk("user_thread - trampoline didn't stop, status = %d\n", -+ status); -+ return(-EINVAL); -+ } -+ -+ return(pid); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/include/mmu.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/include/mmu.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/include/mmu.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_MMU_H -+#define __SKAS_MMU_H -+ -+#include "linux/list.h" -+#include "linux/spinlock.h" -+ -+struct mmu_context_skas { -+ int mm_fd; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/include/mode.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/include/mode.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/include/mode.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,39 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_SKAS_H__ -+#define __MODE_SKAS_H__ -+ -+#include <sysdep/ptrace.h> -+ -+extern unsigned long exec_regs[]; -+extern unsigned long exec_fp_regs[]; -+extern unsigned long exec_fpx_regs[]; -+extern int have_fpx_regs; -+ -+extern void user_time_init_skas(void); -+extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, -+ void *from_ptr); -+extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, -+ union uml_pt_regs *regs, -+ unsigned long fault_addr, int fault_type); -+extern void sig_handler_common_skas(int sig, void *sc_ptr); -+extern void halt_skas(void); -+extern void reboot_skas(void); -+extern void kill_off_processes_skas(void); -+extern int is_skas_winch(int pid, int fd, void *data); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/include/mode_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/include/mode_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/include/mode_kern.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_MODE_KERN_H__ -+#define __SKAS_MODE_KERN_H__ -+ -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/ptrace.h" -+ -+extern void flush_thread_skas(void); -+extern void *_switch_to_skas(void *prev, void *next); -+extern void start_thread_skas(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp); -+extern int copy_thread_skas(int nr, unsigned long clone_flags, -+ unsigned long sp, unsigned long stack_top, -+ struct task_struct *p, struct pt_regs *regs); -+extern void release_thread_skas(struct task_struct *task); -+extern void exit_thread_skas(void); -+extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); -+extern void init_idle_skas(void); -+extern void flush_tlb_kernel_vm_skas(void); -+extern void __flush_tlb_one_skas(unsigned long addr); -+extern void flush_tlb_range_skas(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_mm_skas(struct mm_struct *mm); -+extern void force_flush_all_skas(void); -+extern long execute_syscall_skas(void *r); -+extern void before_mem_skas(unsigned long unused); -+extern unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out); -+extern int start_uml_skas(void); -+extern int external_pid_skas(struct task_struct *task); -+extern int thread_pid_skas(struct thread_struct *thread); -+ -+#define kmem_end_skas (host_task_size - 1024 * 1024) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/include/proc_mm.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/include/proc_mm.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/include/proc_mm.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_PROC_MM_H -+#define __SKAS_PROC_MM_H -+ -+#define MM_MMAP 54 -+#define MM_MUNMAP 55 -+#define MM_MPROTECT 56 -+#define MM_COPY_SEGMENTS 57 -+ -+struct mm_mmap { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+struct mm_munmap { -+ unsigned long addr; -+ unsigned long len; -+}; -+ -+struct mm_mprotect { -+ unsigned long addr; -+ unsigned long len; -+ unsigned int prot; -+}; -+ -+struct proc_mm_op { -+ int op; -+ union { -+ struct mm_mmap mmap; -+ struct mm_munmap munmap; -+ struct mm_mprotect mprotect; -+ int copy_segments; -+ } u; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/include/ptrace-skas.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/include/ptrace-skas.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/include/ptrace-skas.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_SKAS_H -+#define __PTRACE_SKAS_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_SKAS -+ -+#include "skas_ptregs.h" -+ -+#define HOST_FRAME_SIZE 17 -+ -+#define REGS_IP(r) ((r)[HOST_IP]) -+#define REGS_SP(r) ((r)[HOST_SP]) -+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) -+#define REGS_EAX(r) ((r)[HOST_EAX]) -+#define REGS_EBX(r) ((r)[HOST_EBX]) -+#define REGS_ECX(r) ((r)[HOST_ECX]) -+#define REGS_EDX(r) ((r)[HOST_EDX]) -+#define REGS_ESI(r) ((r)[HOST_ESI]) -+#define REGS_EDI(r) ((r)[HOST_EDI]) -+#define REGS_EBP(r) ((r)[HOST_EBP]) -+#define REGS_CS(r) ((r)[HOST_CS]) -+#define REGS_SS(r) ((r)[HOST_SS]) -+#define REGS_DS(r) ((r)[HOST_DS]) -+#define REGS_ES(r) ((r)[HOST_ES]) -+#define REGS_FS(r) ((r)[HOST_FS]) -+#define REGS_GS(r) ((r)[HOST_GS]) -+ -+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res) -+ -+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) -+ -+#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type) -+ -+#define REGS_FAULT_ADDR(r) ((r)->fault_addr) -+ -+#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/include/skas.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/include/skas.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/include/skas.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_H -+#define __SKAS_H -+ -+#include "sysdep/ptrace.h" -+ -+extern int userspace_pid[]; -+ -+extern void switch_threads(void *me, void *next); -+extern void thread_wait(void *sw, void *fb); -+extern void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, -+ void (*handler)(int)); -+extern int start_idle_thread(void *stack, void *switch_buf_ptr, -+ void **fork_buf_ptr); -+extern int user_thread(unsigned long stack, int flags); -+extern void userspace(union uml_pt_regs *regs); -+extern void new_thread_proc(void *stack, void (*handler)(int sig)); -+extern void remove_sigstack(void); -+extern void new_thread_handler(int sig); -+extern void handle_syscall(union uml_pt_regs *regs); -+extern void map(int fd, unsigned long virt, unsigned long phys, -+ unsigned long len, int r, int w, int x); -+extern int unmap(int fd, void *addr, int len); -+extern int protect(int fd, unsigned long addr, unsigned long len, -+ int r, int w, int x, int must_succeed); -+extern void user_signal(int sig, union uml_pt_regs *regs); -+extern int new_mm(int from); -+extern void save_registers(union uml_pt_regs *regs); -+extern void restore_registers(union uml_pt_regs *regs); -+extern void start_userspace(int cpu); -+extern void init_registers(int pid); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/include/uaccess.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/include/uaccess.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/include/uaccess.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_UACCESS_H -+#define __SKAS_UACCESS_H -+ -+#include "asm/errno.h" -+ -+#define access_ok_skas(type, addr, size) \ -+ ((segment_eq(get_fs(), KERNEL_DS)) || \ -+ (((unsigned long) (addr) < TASK_SIZE) && \ -+ ((unsigned long) (addr) + (size) <= TASK_SIZE))) -+ -+static inline int verify_area_skas(int type, const void * addr, -+ unsigned long size) -+{ -+ return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); -+} -+ -+extern int copy_from_user_skas(void *to, const void *from, int n); -+extern int copy_to_user_skas(void *to, const void *from, int n); -+extern int strncpy_from_user_skas(char *dst, const char *src, int count); -+extern int __clear_user_skas(void *mem, int len); -+extern int clear_user_skas(void *mem, int len); -+extern int strnlen_user_skas(const void *str, int len); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,31 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = skas.o -+ -+obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \ -+ process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \ -+ uaccess.o -+ -+subdir-y = sys-$(SUBARCH) -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o -+ -+include $(TOPDIR)/Rules.make -+ -+include/skas_ptregs.h : util/mk_ptregs -+ util/mk_ptregs > $@ -+ -+util/mk_ptregs : -+ $(MAKE) -C util -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -+ $(MAKE) -C util clean -+ $(RM) -f include/skas_ptregs.h -Index: linux-2.4.29/arch/um/kernel/skas/mem.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/mem.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/mem.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "mem_user.h" -+ -+unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out) -+{ -+ /* Round up to the nearest 4M */ -+ unsigned long top = ROUND_4M((unsigned long) &arg); -+ -+ *host_size_out = top; -+ *task_size_out = top; -+ return(((unsigned long) set_task_sizes_skas) & ~0xffffff); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/mem_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/mem_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/mem_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,105 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <sys/mman.h> -+#include <sys/ptrace.h> -+#include "mem_user.h" -+#include "mem.h" -+#include "user.h" -+#include "os.h" -+#include "proc_mm.h" -+ -+void map(int fd, unsigned long virt, unsigned long phys, unsigned long len, -+ int r, int w, int x) -+{ -+ struct proc_mm_op map; -+ __u64 offset; -+ int prot, n, phys_fd; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ phys_fd = phys_mapping(phys, &offset); -+ -+ map = ((struct proc_mm_op) { .op = MM_MMAP, -+ .u = -+ { .mmap = -+ { .addr = virt, -+ .len = len, -+ .prot = prot, -+ .flags = MAP_SHARED | -+ MAP_FIXED, -+ .fd = phys_fd, -+ .offset = offset -+ } } } ); -+ n = os_write_file(fd, &map, sizeof(map)); -+ if(n != sizeof(map)) -+ printk("map : /proc/mm map failed, err = %d\n", -n); -+} -+ -+int unmap(int fd, void *addr, int len) -+{ -+ struct proc_mm_op unmap; -+ int n; -+ -+ unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, -+ .u = -+ { .munmap = -+ { .addr = (unsigned long) addr, -+ .len = len } } } ); -+ n = os_write_file(fd, &unmap, sizeof(unmap)); -+ if(n != sizeof(unmap)) { -+ if(n < 0) -+ return(n); -+ else if(n > 0) -+ return(-EIO); -+ } -+ -+ return(0); -+} -+ -+int protect(int fd, unsigned long addr, unsigned long len, int r, int w, -+ int x, int must_succeed) -+{ -+ struct proc_mm_op protect; -+ int prot, n; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ -+ protect = ((struct proc_mm_op) { .op = MM_MPROTECT, -+ .u = -+ { .mprotect = -+ { .addr = (unsigned long) addr, -+ .len = len, -+ .prot = prot } } } ); -+ -+ n = os_write_file(fd, &protect, sizeof(protect)); -+ if(n != sizeof(protect)) { -+ if(n == 0) return(0); -+ -+ if(must_succeed) -+ panic("protect failed, err = %d", -n); -+ -+ return(-EIO); -+ } -+ -+ return(0); -+} -+ -+void before_mem_skas(unsigned long unused) -+{ -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/mmu.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/mmu.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/mmu.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/list.h" -+#include "linux/spinlock.h" -+#include "linux/slab.h" -+#include "asm/segment.h" -+#include "asm/mmu.h" -+#include "os.h" -+#include "skas.h" -+ -+int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) -+{ -+ int from; -+ -+ if((current->mm != NULL) && (current->mm != &init_mm)) -+ from = current->mm->context.skas.mm_fd; -+ else from = -1; -+ -+ mm->context.skas.mm_fd = new_mm(from); -+ if(mm->context.skas.mm_fd < 0){ -+ printk("init_new_context_skas - new_mm failed, errno = %d\n", -+ mm->context.skas.mm_fd); -+ return(mm->context.skas.mm_fd); -+ } -+ -+ return(0); -+} -+ -+void destroy_context_skas(struct mm_struct *mm) -+{ -+ os_close_file(mm->context.skas.mm_fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/process.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/process.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/process.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,400 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <signal.h> -+#include <setjmp.h> -+#include <sched.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <sys/mman.h> -+#include <sys/user.h> -+#include <asm/unistd.h> -+#include "user.h" -+#include "ptrace_user.h" -+#include "time_user.h" -+#include "sysdep/ptrace.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "skas.h" -+#include "sysdep/sigcontext.h" -+#include "os.h" -+#include "proc_mm.h" -+#include "skas_ptrace.h" -+#include "chan_user.h" -+ -+int is_skas_winch(int pid, int fd, void *data) -+{ -+ if(pid != getpid()) -+ return(0); -+ -+ register_winch_irq(-1, fd, -1, data); -+ return(1); -+} -+ -+/* These are set once at boot time and not changed thereafter */ -+ -+unsigned long exec_regs[FRAME_SIZE]; -+unsigned long exec_fp_regs[HOST_FP_SIZE]; -+unsigned long exec_fpx_regs[HOST_XFP_SIZE]; -+int have_fpx_regs = 1; -+ -+static void handle_segv(int pid) -+{ -+ struct ptrace_faultinfo fault; -+ int err; -+ -+ err = ptrace(PTRACE_FAULTINFO, pid, 0, &fault); -+ if(err) -+ panic("handle_segv - PTRACE_FAULTINFO failed, errno = %d\n", -+ errno); -+ -+ segv(fault.addr, 0, FAULT_WRITE(fault.is_write), 1, NULL); -+} -+ -+static void handle_trap(int pid, union uml_pt_regs *regs) -+{ -+ int err, syscall_nr, status; -+ -+ syscall_nr = PT_SYSCALL_NR(regs->skas.regs); -+ UPT_SYSCALL_NR(regs) = syscall_nr; -+ if(syscall_nr < 1){ -+ relay_signal(SIGTRAP, regs); -+ return; -+ } -+ -+ if(!use_sysemu){ -+ err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getpid); -+ if(err < 0) -+ panic("handle_trap - nullifying syscall failed, " -+ "errno = %d\n", errno); -+ -+ err = ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ if(err < 0) -+ panic("handle_trap - continuing to end of syscall " -+ "failed, errno = %d\n", errno); -+ -+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); -+ if((err < 0) || !WIFSTOPPED(status) || -+ (WSTOPSIG(status) != SIGTRAP)) -+ panic("handle_trap - failed to wait at end of " -+ "syscall, errno = %d, status = %d\n", errno, -+ status); -+ } -+ -+ handle_syscall(regs); -+} -+ -+static int userspace_tramp(void *arg) -+{ -+ init_new_thread_signals(0); -+ enable_timer(); -+ ptrace(PTRACE_TRACEME, 0, 0, 0); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+/* Each element set once, and only accessed by a single processor anyway */ -+#define NR_CPUS 1 -+int userspace_pid[NR_CPUS]; -+ -+void start_userspace(int cpu) -+{ -+ void *stack; -+ unsigned long sp; -+ int pid, status, n; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if(stack == MAP_FAILED) -+ panic("start_userspace : mmap failed, errno = %d", errno); -+ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ -+ pid = clone(userspace_tramp, (void *) sp, -+ CLONE_FILES | CLONE_VM | SIGCHLD, NULL); -+ if(pid < 0) -+ panic("start_userspace : clone failed, errno = %d", errno); -+ -+ do { -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if(n < 0) -+ panic("start_userspace : wait failed, errno = %d", -+ errno); -+ } while(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); -+ -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) -+ panic("start_userspace : expected SIGSTOP, got status = %d", -+ status); -+ -+ if(munmap(stack, PAGE_SIZE) < 0) -+ panic("start_userspace : munmap failed, errno = %d\n", errno); -+ -+ userspace_pid[cpu] = pid; -+} -+ -+void userspace(union uml_pt_regs *regs) -+{ -+ int err, status, op, do_syscall, pid = userspace_pid[0]; -+ -+ do_syscall = use_sysemu ? PTRACE_SYSEMU : PTRACE_SYSCALL; -+ restore_registers(regs); -+ -+ err = ptrace(do_syscall, pid, 0, 0); -+ if(err) -+ panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", -+ errno); -+ while(1){ -+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); -+ if(err < 0) -+ panic("userspace - waitpid failed, errno = %d\n", -+ errno); -+ -+ regs->skas.is_user = 1; -+ save_registers(regs); -+ -+ if(WIFSTOPPED(status)){ -+ switch(WSTOPSIG(status)){ -+ case SIGSEGV: -+ handle_segv(pid); -+ break; -+ case SIGTRAP: -+ handle_trap(pid, regs); -+ break; -+ case SIGIO: -+ case SIGVTALRM: -+ case SIGILL: -+ case SIGBUS: -+ case SIGFPE: -+ case SIGWINCH: -+ user_signal(WSTOPSIG(status), regs); -+ break; -+ default: -+ printk("userspace - child stopped with signal " -+ "%d\n", WSTOPSIG(status)); -+ } -+ interrupt_end(); -+ } -+ -+ restore_registers(regs); -+ -+ op = singlestepping(NULL) ? PTRACE_SINGLESTEP : do_syscall; -+ err = ptrace(op, pid, 0, 0); -+ if(err) -+ panic("userspace - PTRACE_SYSCALL failed, " -+ "errno = %d\n", errno); -+ } -+} -+ -+void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, -+ void (*handler)(int)) -+{ -+ sigjmp_buf switch_buf, fork_buf; -+ -+ *switch_buf_ptr = &switch_buf; -+ *fork_buf_ptr = &fork_buf; -+ -+ if(sigsetjmp(fork_buf, 1) == 0) -+ new_thread_proc(stack, handler); -+ -+ remove_sigstack(); -+} -+ -+void thread_wait(void *sw, void *fb) -+{ -+ sigjmp_buf buf, **switch_buf = sw, *fork_buf; -+ -+ *switch_buf = &buf; -+ fork_buf = fb; -+ if(sigsetjmp(buf, 1) == 0) -+ siglongjmp(*fork_buf, 1); -+} -+ -+static int move_registers(int pid, int int_op, int fp_op, -+ union uml_pt_regs *regs, unsigned long *fp_regs) -+{ -+ if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) -+ return(-errno); -+ if(ptrace(fp_op, pid, 0, fp_regs) < 0) -+ return(-errno); -+ return(0); -+} -+ -+void save_registers(union uml_pt_regs *regs) -+{ -+ unsigned long *fp_regs; -+ int err, fp_op; -+ -+ if(have_fpx_regs){ -+ fp_op = PTRACE_GETFPXREGS; -+ fp_regs = regs->skas.xfp; -+ } -+ else { -+ fp_op = PTRACE_GETFPREGS; -+ fp_regs = regs->skas.fp; -+ } -+ -+ err = move_registers(userspace_pid[0], PTRACE_GETREGS, fp_op, regs, -+ fp_regs); -+ if(err) -+ panic("save_registers - saving registers failed, errno = %d\n", -+ -err); -+} -+ -+void restore_registers(union uml_pt_regs *regs) -+{ -+ unsigned long *fp_regs; -+ int err, fp_op; -+ -+ if(have_fpx_regs){ -+ fp_op = PTRACE_SETFPXREGS; -+ fp_regs = regs->skas.xfp; -+ } -+ else { -+ fp_op = PTRACE_SETFPREGS; -+ fp_regs = regs->skas.fp; -+ } -+ -+ err = move_registers(userspace_pid[0], PTRACE_SETREGS, fp_op, regs, -+ fp_regs); -+ if(err) -+ panic("restore_registers - saving registers failed, " -+ "errno = %d\n", -err); -+} -+ -+void switch_threads(void *me, void *next) -+{ -+ sigjmp_buf my_buf, **me_ptr = me, *next_buf = next; -+ -+ *me_ptr = &my_buf; -+ if(sigsetjmp(my_buf, 1) == 0) -+ siglongjmp(*next_buf, 1); -+} -+ -+static sigjmp_buf initial_jmpbuf; -+ -+/* XXX Make these percpu */ -+static void (*cb_proc)(void *arg); -+static void *cb_arg; -+static sigjmp_buf *cb_back; -+ -+int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr) -+{ -+ sigjmp_buf **switch_buf = switch_buf_ptr; -+ int n; -+ -+ *fork_buf_ptr = &initial_jmpbuf; -+ n = sigsetjmp(initial_jmpbuf, 1); -+ if(n == 0) -+ new_thread_proc((void *) stack, new_thread_handler); -+ else if(n == 1) -+ remove_sigstack(); -+ else if(n == 2){ -+ (*cb_proc)(cb_arg); -+ siglongjmp(*cb_back, 1); -+ } -+ else if(n == 3){ -+ kmalloc_ok = 0; -+ return(0); -+ } -+ else if(n == 4){ -+ kmalloc_ok = 0; -+ return(1); -+ } -+ siglongjmp(**switch_buf, 1); -+} -+ -+void remove_sigstack(void) -+{ -+ stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE, -+ .ss_sp = NULL, -+ .ss_size = 0 }); -+ -+ if(sigaltstack(&stack, NULL) != 0) -+ panic("disabling signal stack failed, errno = %d\n", errno); -+} -+ -+void initial_thread_cb_skas(void (*proc)(void *), void *arg) -+{ -+ sigjmp_buf here; -+ -+ cb_proc = proc; -+ cb_arg = arg; -+ cb_back = &here; -+ -+ block_signals(); -+ if(sigsetjmp(here, 1) == 0) -+ siglongjmp(initial_jmpbuf, 2); -+ unblock_signals(); -+ -+ cb_proc = NULL; -+ cb_arg = NULL; -+ cb_back = NULL; -+} -+ -+void halt_skas(void) -+{ -+ block_signals(); -+ siglongjmp(initial_jmpbuf, 3); -+} -+ -+void reboot_skas(void) -+{ -+ block_signals(); -+ siglongjmp(initial_jmpbuf, 4); -+} -+ -+void switch_mm_skas(int mm_fd) -+{ -+ int err; -+ -+#warning need cpu pid in switch_mm_skas -+ err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); -+ if(err) -+ panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", -+ errno); -+} -+ -+void kill_off_processes_skas(void) -+{ -+#warning need to loop over userspace_pids in kill_off_processes_skas -+ os_kill_process(userspace_pid[0], 1); -+} -+ -+void init_registers(int pid) -+{ -+ int err; -+ -+ if(ptrace(PTRACE_GETREGS, pid, 0, exec_regs) < 0) -+ panic("check_ptrace : PTRACE_GETREGS failed, errno = %d", -+ errno); -+ -+ err = ptrace(PTRACE_GETFPXREGS, pid, 0, exec_fpx_regs); -+ if(!err) -+ return; -+ -+ have_fpx_regs = 0; -+ if(errno != EIO) -+ panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", -+ errno); -+ -+ err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs); -+ if(err) -+ panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d", -+ errno); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/process_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/process_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/process_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,211 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "kern_util.h" -+#include "time_user.h" -+#include "signal_user.h" -+#include "skas.h" -+#include "os.h" -+#include "user_util.h" -+#include "tlb.h" -+#include "frame.h" -+#include "kern.h" -+#include "mode.h" -+#include "filehandle.h" -+#include "proc_mm.h" -+ -+void *_switch_to_skas(void *prev, void *next) -+{ -+ struct task_struct *from, *to; -+ -+ from = prev; -+ to = next; -+ -+ /* XXX need to check runqueues[cpu].idle */ -+ if(current->pid == 0) -+ switch_timers(0); -+ -+ to->thread.prev_sched = from; -+ set_current(to); -+ -+ switch_threads(&from->thread.mode.skas.switch_buf, -+ to->thread.mode.skas.switch_buf); -+ -+ if(current->pid == 0) -+ switch_timers(1); -+ -+ return(current->thread.prev_sched); -+} -+ -+extern void schedule_tail(struct task_struct *prev); -+ -+void new_thread_handler(int sig) -+{ -+ int (*fn)(void *), n; -+ void *arg; -+ -+ fn = current->thread.request.u.thread.proc; -+ arg = current->thread.request.u.thread.arg; -+ change_sig(SIGUSR1, 1); -+ thread_wait(¤t->thread.mode.skas.switch_buf, -+ current->thread.mode.skas.fork_buf); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ /* The return value is 1 if the kernel thread execs a process, -+ * 0 if it just exits -+ */ -+ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); -+ if(n == 1) -+ userspace(¤t->thread.regs.regs); -+ else do_exit(0); -+} -+ -+void new_thread_proc(void *stack, void (*handler)(int sig)) -+{ -+ init_new_thread_stack(stack, handler); -+ os_usr1_process(os_getpid()); -+} -+ -+void release_thread_skas(struct task_struct *task) -+{ -+} -+ -+void exit_thread_skas(void) -+{ -+} -+ -+void fork_handler(int sig) -+{ -+ change_sig(SIGUSR1, 1); -+ thread_wait(¤t->thread.mode.skas.switch_buf, -+ current->thread.mode.skas.fork_buf); -+ -+ force_flush_all(); -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ unblock_signals(); -+ -+ userspace(¤t->thread.regs.regs); -+} -+ -+int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ void (*handler)(int); -+ -+ if(current->thread.forking){ -+ memcpy(&p->thread.regs.regs.skas, -+ ¤t->thread.regs.regs.skas, -+ sizeof(p->thread.regs.regs.skas)); -+ REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.skas.regs, 0); -+ if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp; -+ -+ handler = fork_handler; -+ } -+ else { -+ memcpy(p->thread.regs.regs.skas.regs, exec_regs, -+ sizeof(p->thread.regs.regs.skas.regs)); -+ memcpy(p->thread.regs.regs.skas.fp, exec_fp_regs, -+ sizeof(p->thread.regs.regs.skas.fp)); -+ memcpy(p->thread.regs.regs.skas.xfp, exec_fpx_regs, -+ sizeof(p->thread.regs.regs.skas.xfp)); -+ p->thread.request.u.thread = current->thread.request.u.thread; -+ handler = new_thread_handler; -+ } -+ -+ new_thread(p, &p->thread.mode.skas.switch_buf, -+ &p->thread.mode.skas.fork_buf, handler); -+ return(0); -+} -+ -+int new_mm(int from) -+{ -+ struct proc_mm_op copy; -+ int n; -+ int fd = open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0); -+ -+ if(fd < 0) -+ return(fd); -+ -+ if(from != -1){ -+ copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, -+ .u = -+ { .copy_segments = from } } ); -+ n = os_write_file(fd, ©, sizeof(copy)); -+ if(n != sizeof(copy)) -+ printk("new_mm : /proc/mm copy_segments failed, " -+ "err = %d\n", -n); -+ } -+ -+ return(fd); -+} -+ -+void init_idle_skas(void) -+{ -+ cpu_tasks[current->processor].pid = os_getpid(); -+} -+ -+extern void start_kernel(void); -+ -+static int start_kernel_proc(void *unused) -+{ -+ int pid; -+ -+ block_signals(); -+ pid = os_getpid(); -+ -+ cpu_tasks[0].pid = pid; -+ cpu_tasks[0].task = current; -+#ifdef CONFIG_SMP -+ cpu_online_map = 1; -+#endif -+ start_kernel(); -+ return(0); -+} -+ -+int start_uml_skas(void) -+{ -+ start_userspace(0); -+ capture_signal_stack(); -+ -+ init_new_thread_signals(1); -+ idle_timer(); -+ -+ init_task.thread.request.u.thread.proc = start_kernel_proc; -+ init_task.thread.request.u.thread.arg = NULL; -+ return(start_idle_thread(&init_task, -+ &init_task.thread.mode.skas.switch_buf, -+ &init_task.thread.mode.skas.fork_buf)); -+} -+ -+int external_pid_skas(struct task_struct *task) -+{ -+#warning Need to look up userspace_pid by cpu -+ return(userspace_pid[0]); -+} -+ -+int thread_pid_skas(struct thread_struct *thread) -+{ -+#warning Need to look up userspace_pid by cpu -+ return(userspace_pid[0]); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/syscall_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/syscall_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/syscall_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sys.h" -+#include "linux/ptrace.h" -+#include "asm/errno.h" -+#include "asm/unistd.h" -+#include "asm/ptrace.h" -+#include "asm/current.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+extern syscall_handler_t *sys_call_table[]; -+ -+long execute_syscall_skas(void *r) -+{ -+ struct pt_regs *regs = r; -+ long res; -+ int syscall; -+ -+ current->thread.nsyscalls++; -+ nsyscalls++; -+ syscall = UPT_SYSCALL_NR(®s->regs); -+ -+ if((syscall >= NR_syscalls) || (syscall < 1)) -+ res = -ENOSYS; -+ else res = EXECUTE_SYSCALL(syscall, regs); -+ -+ return(res); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/syscall_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/syscall_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/syscall_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <signal.h> -+#include "kern_util.h" -+#include "syscall_user.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+ -+/* XXX Bogus */ -+#define ERESTARTSYS 512 -+#define ERESTARTNOINTR 513 -+#define ERESTARTNOHAND 514 -+ -+void handle_syscall(union uml_pt_regs *regs) -+{ -+ long result; -+ int index; -+ -+ index = record_syscall_start(UPT_SYSCALL_NR(regs)); -+ -+ syscall_trace(); -+ result = execute_syscall(regs); -+ -+ REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); -+ if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || -+ (result == -ERESTARTNOINTR)) -+ do_signal(result); -+ -+ syscall_trace(); -+ record_syscall_end(index, result); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/sys-i386/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/sys-i386/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/sys-i386/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = sys-i386.o -+ -+obj-y = sigcontext.o -+ -+USER_OBJS = sigcontext.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -Index: linux-2.4.29/arch/um/kernel/skas/sys-i386/sigcontext.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/sys-i386/sigcontext.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/sys-i386/sigcontext.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <asm/sigcontext.h> -+#include <sys/ptrace.h> -+#include <linux/ptrace.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/ptrace_user.h" -+#include "kern_util.h" -+#include "user.h" -+#include "sigcontext.h" -+#include "mode.h" -+ -+int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, void *from_ptr) -+{ -+ struct sigcontext sc, *from = from_ptr; -+ unsigned long fpregs[FP_FRAME_SIZE]; -+ int err; -+ -+ err = copy_from_user_proc(&sc, from, sizeof(sc)); -+ err |= copy_from_user_proc(fpregs, sc.fpstate, sizeof(fpregs)); -+ if(err) -+ return(err); -+ -+ regs->skas.regs[GS] = sc.gs; -+ regs->skas.regs[FS] = sc.fs; -+ regs->skas.regs[ES] = sc.es; -+ regs->skas.regs[DS] = sc.ds; -+ regs->skas.regs[EDI] = sc.edi; -+ regs->skas.regs[ESI] = sc.esi; -+ regs->skas.regs[EBP] = sc.ebp; -+ regs->skas.regs[UESP] = sc.esp; -+ regs->skas.regs[EBX] = sc.ebx; -+ regs->skas.regs[EDX] = sc.edx; -+ regs->skas.regs[ECX] = sc.ecx; -+ regs->skas.regs[EAX] = sc.eax; -+ regs->skas.regs[EIP] = sc.eip; -+ regs->skas.regs[CS] = sc.cs; -+ regs->skas.regs[EFL] = sc.eflags; -+ regs->skas.regs[SS] = sc.ss; -+ regs->skas.fault_addr = sc.cr2; -+ regs->skas.fault_type = FAULT_WRITE(sc.err); -+ regs->skas.trap_type = sc.trapno; -+ -+ err = ptrace(PTRACE_SETFPREGS, pid, 0, fpregs); -+ if(err < 0){ -+ printk("copy_sc_to_user - PTRACE_SETFPREGS failed, " -+ "errno = %d\n", errno); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, -+ union uml_pt_regs *regs, unsigned long fault_addr, -+ int fault_type) -+{ -+ struct sigcontext sc, *to = to_ptr; -+ struct _fpstate *to_fp; -+ unsigned long fpregs[FP_FRAME_SIZE]; -+ int err; -+ -+ sc.gs = regs->skas.regs[GS]; -+ sc.fs = regs->skas.regs[FS]; -+ sc.es = regs->skas.regs[ES]; -+ sc.ds = regs->skas.regs[DS]; -+ sc.edi = regs->skas.regs[EDI]; -+ sc.esi = regs->skas.regs[ESI]; -+ sc.ebp = regs->skas.regs[EBP]; -+ sc.esp = regs->skas.regs[UESP]; -+ sc.ebx = regs->skas.regs[EBX]; -+ sc.edx = regs->skas.regs[EDX]; -+ sc.ecx = regs->skas.regs[ECX]; -+ sc.eax = regs->skas.regs[EAX]; -+ sc.eip = regs->skas.regs[EIP]; -+ sc.cs = regs->skas.regs[CS]; -+ sc.eflags = regs->skas.regs[EFL]; -+ sc.esp_at_signal = regs->skas.regs[UESP]; -+ sc.ss = regs->skas.regs[SS]; -+ sc.cr2 = fault_addr; -+ sc.err = TO_SC_ERR(fault_type); -+ sc.trapno = regs->skas.trap_type; -+ -+ err = ptrace(PTRACE_GETFPREGS, pid, 0, fpregs); -+ if(err < 0){ -+ printk("copy_sc_to_user - PTRACE_GETFPREGS failed, " -+ "errno = %d\n", errno); -+ return(1); -+ } -+ to_fp = (struct _fpstate *) -+ (fp ? (unsigned long) fp : ((unsigned long) to + sizeof(*to))); -+ sc.fpstate = to_fp; -+ -+ if(err) -+ return(err); -+ -+ return(copy_to_user_proc(to, &sc, sizeof(sc)) || -+ copy_to_user_proc(to_fp, fpregs, sizeof(fpregs))); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/time.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/time.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/time.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <sys/signal.h> -+#include <sys/time.h> -+#include "time_user.h" -+#include "process.h" -+#include "user.h" -+ -+void user_time_init_skas(void) -+{ -+ if(signal(SIGALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGALRM handler"); -+ if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/tlb.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/tlb.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/tlb.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,153 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/mmu.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "skas.h" -+#include "os.h" -+ -+static void fix_range(struct mm_struct *mm, unsigned long start_addr, -+ unsigned long end_addr, int force) -+{ -+ pgd_t *npgd; -+ pmd_t *npmd; -+ pte_t *npte; -+ unsigned long addr; -+ int r, w, x, err, fd; -+ -+ if(mm == NULL) return; -+ fd = mm->context.skas.mm_fd; -+ for(addr = start_addr; addr < end_addr;){ -+ npgd = pgd_offset(mm, addr); -+ npmd = pmd_offset(npgd, addr); -+ if(pmd_present(*npmd)){ -+ npte = pte_offset(npmd, addr); -+ r = pte_read(*npte); -+ w = pte_write(*npte); -+ x = pte_exec(*npte); -+ if(!pte_dirty(*npte)) w = 0; -+ if(!pte_young(*npte)){ -+ r = 0; -+ w = 0; -+ } -+ if(force || pte_newpage(*npte)){ -+ err = unmap(fd, (void *) addr, PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*npte)) -+ map(fd, addr, -+ pte_val(*npte) & PAGE_MASK, -+ PAGE_SIZE, r, w, x); -+ } -+ else if(pte_newprot(*npte)){ -+ protect(fd, addr, PAGE_SIZE, r, w, x, 1); -+ } -+ *npte = pte_mkuptodate(*npte); -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(force || pmd_newpage(*npmd)){ -+ err = unmap(fd, (void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ pmd_mkuptodate(*npmd); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+static void flush_kernel_vm_range(unsigned long start, unsigned long end) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ int updated = 0, err; -+ -+ mm = &init_mm; -+ for(addr = start; addr < end;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte) || pte_newpage(*pte)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*pte)) -+ map_memory(addr, -+ pte_val(*pte) & PAGE_MASK, -+ PAGE_SIZE, 1, 1, 1); -+ } -+ else if(pte_newprot(*pte)){ -+ updated = 1; -+ protect_memory(addr, PAGE_SIZE, 1, 1, 1, 1); -+ } -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(pmd_newpage(*pmd)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+void flush_tlb_kernel_vm_skas(void) -+{ -+ flush_kernel_vm_range(start_vm, end_vm); -+} -+ -+void __flush_tlb_one_skas(unsigned long addr) -+{ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE); -+} -+ -+void flush_tlb_range_skas(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ if(mm == NULL) -+ flush_kernel_vm_range(start, end); -+ else fix_range(mm, start, end, 0); -+} -+ -+void flush_tlb_mm_skas(struct mm_struct *mm) -+{ -+ flush_tlb_kernel_vm_skas(); -+ fix_range(mm, 0, host_task_size, 0); -+} -+ -+void force_flush_all_skas(void) -+{ -+ fix_range(current->mm, 0, host_task_size, 1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/trap_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/trap_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/trap_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include <errno.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "task.h" -+#include "sigcontext.h" -+ -+void sig_handler_common_skas(int sig, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct skas_regs *r; -+ struct signal_info *info; -+ int save_errno = errno; -+ int save_user; -+ -+ r = &TASK_REGS(get_current())->skas; -+ save_user = r->is_user; -+ r->is_user = 0; -+ r->fault_addr = SC_FAULT_ADDR(sc); -+ r->fault_type = SC_FAULT_TYPE(sc); -+ r->trap_type = SC_TRAP_TYPE(sc); -+ -+ change_sig(SIGUSR1, 1); -+ info = &sig_info[sig]; -+ if(!info->is_irq) unblock_signals(); -+ -+ (*info->handler)(sig, (union uml_pt_regs *) r); -+ -+ errno = save_errno; -+ r->is_user = save_user; -+} -+ -+extern int missed_ticks[]; -+ -+void user_signal(int sig, union uml_pt_regs *regs) -+{ -+ struct signal_info *info; -+ -+ regs->skas.is_user = 1; -+ regs->skas.fault_addr = 0; -+ regs->skas.fault_type = 0; -+ regs->skas.trap_type = 0; -+ info = &sig_info[sig]; -+ (*info->handler)(sig, regs); -+ -+ unblock_signals(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/uaccess.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/uaccess.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/uaccess.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,248 @@ -+/* -+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/string.h" -+#include "linux/fs.h" -+#include "linux/highmem.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+#include "user_util.h" -+ -+extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, -+ pte_t *pte_out); -+ -+static unsigned long maybe_map(unsigned long virt, int is_write) -+{ -+ pte_t pte; -+ -+ void *phys = um_virt_to_phys(current, virt, &pte); -+ int dummy_code; -+ -+ if(IS_ERR(phys) || (is_write && !pte_write(pte))){ -+ if(!handle_page_fault(virt, 0, is_write, 1, &dummy_code)) -+ return(0); -+ phys = um_virt_to_phys(current, virt, NULL); -+ } -+ return((unsigned long) phys); -+} -+ -+static int do_op(unsigned long addr, int len, int is_write, -+ int (*op)(unsigned long addr, int len, void *arg), void *arg) -+{ -+ struct page *page; -+ int n; -+ -+ addr = maybe_map(addr, is_write); -+ if(addr == -1) -+ return(-1); -+ -+ page = phys_to_page(addr); -+ addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); -+ n = (*op)(addr, len, arg); -+ kunmap(page); -+ -+ return(n); -+} -+ -+static void do_buffer_op(void *jmpbuf, void *arg_ptr) -+{ -+ va_list args = *((va_list *) arg_ptr); -+ unsigned long addr = va_arg(args, unsigned long); -+ int len = va_arg(args, int); -+ int is_write = va_arg(args, int); -+ int (*op)(unsigned long, int, void *) = va_arg(args, void *); -+ void *arg = va_arg(args, void *); -+ int *res = va_arg(args, int *); -+ int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); -+ int remain = len, n; -+ -+ current->thread.fault_catcher = jmpbuf; -+ n = do_op(addr, size, is_write, op, arg); -+ if(n != 0){ -+ *res = (n < 0 ? remain : 0); -+ goto out; -+ } -+ -+ addr += size; -+ remain -= size; -+ if(remain == 0){ -+ *res = 0; -+ goto out; -+ } -+ -+ while(addr < ((addr + remain) & PAGE_MASK)){ -+ n = do_op(addr, PAGE_SIZE, is_write, op, arg); -+ if(n != 0){ -+ *res = (n < 0 ? remain : 0); -+ goto out; -+ } -+ -+ addr += PAGE_SIZE; -+ remain -= PAGE_SIZE; -+ } -+ if(remain == 0){ -+ *res = 0; -+ goto out; -+ } -+ -+ n = do_op(addr, remain, is_write, op, arg); -+ if(n != 0) -+ *res = (n < 0 ? remain : 0); -+ else *res = 0; -+ out: -+ current->thread.fault_catcher = NULL; -+} -+ -+static int buffer_op(unsigned long addr, int len, int is_write, -+ int (*op)(unsigned long addr, int len, void *arg), -+ void *arg) -+{ -+ int faulted, res; -+ -+ faulted = setjmp_wrapper(do_buffer_op, addr, len, is_write, op, arg, -+ &res); -+ if(!faulted) -+ return(res); -+ -+ return(addr + len - (unsigned long) current->thread.fault_addr); -+} -+ -+static int copy_chunk_from_user(unsigned long from, int len, void *arg) -+{ -+ unsigned long *to_ptr = arg, to = *to_ptr; -+ -+ memcpy((void *) to, (void *) from, len); -+ *to_ptr += len; -+ return(0); -+} -+ -+int copy_from_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_READ, from, n) ? -+ buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): -+ n); -+} -+ -+static int copy_chunk_to_user(unsigned long to, int len, void *arg) -+{ -+ unsigned long *from_ptr = arg, from = *from_ptr; -+ -+ memcpy((void *) to, (void *) from, len); -+ *from_ptr += len; -+ return(0); -+} -+ -+int copy_to_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, to, n) ? -+ buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : -+ n); -+} -+ -+static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) -+{ -+ char **to_ptr = arg, *to = *to_ptr; -+ int n; -+ -+ strncpy(to, (void *) from, len); -+ n = strnlen(to, len); -+ *to_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+int strncpy_from_user_skas(char *dst, const char *src, int count) -+{ -+ int n; -+ char *ptr = dst; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ strncpy(dst, src, count); -+ return(strnlen(dst, count)); -+ } -+ -+ if(!access_ok_skas(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, -+ &ptr); -+ if(n != 0) -+ return(-EFAULT); -+ return(strnlen(dst, count)); -+} -+ -+static int clear_chunk(unsigned long addr, int len, void *unused) -+{ -+ memset((void *) addr, 0, len); -+ return(0); -+} -+ -+int __clear_user_skas(void *mem, int len) -+{ -+ return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL)); -+} -+ -+int clear_user_skas(void *mem, int len) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memset(mem, 0, len); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, mem, len) ? -+ buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); -+} -+ -+static int strnlen_chunk(unsigned long str, int len, void *arg) -+{ -+ int *len_ptr = arg, n; -+ -+ n = strnlen((void *) str, len); -+ *len_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+int strnlen_user_skas(const void *str, int len) -+{ -+ int count = 0, n; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)) -+ return(strnlen(str, len) + 1); -+ -+ n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); -+ if(n == 0) -+ return(count + 1); -+ return(-EFAULT); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/skas/util/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/util/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/util/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,10 @@ -+all: mk_ptregs -+ -+mk_ptregs : mk_ptregs.o -+ $(HOSTCC) -o mk_ptregs mk_ptregs.o -+ -+mk_ptregs.o : mk_ptregs.c -+ $(HOSTCC) -c $< -+ -+clean : -+ $(RM) -f mk_ptregs *.o *~ -Index: linux-2.4.29/arch/um/kernel/skas/util/mk_ptregs.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/skas/util/mk_ptregs.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/skas/util/mk_ptregs.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,51 @@ -+#include <stdio.h> -+#include <asm/ptrace.h> -+#include <asm/user.h> -+ -+#define PRINT_REG(name, val) printf("#define HOST_%s %d\n", (name), (val)) -+ -+int main(int argc, char **argv) -+{ -+ printf("/* Automatically generated by " -+ "arch/um/kernel/skas/util/mk_ptregs */\n"); -+ printf("\n"); -+ printf("#ifndef __SKAS_PT_REGS_\n"); -+ printf("#define __SKAS_PT_REGS_\n"); -+ printf("\n"); -+ printf("#define HOST_FRAME_SIZE %d\n", FRAME_SIZE); -+ printf("#define HOST_FP_SIZE %d\n", -+ sizeof(struct user_i387_struct) / sizeof(unsigned long)); -+ printf("#define HOST_XFP_SIZE %d\n", -+ sizeof(struct user_fxsr_struct) / sizeof(unsigned long)); -+ -+ PRINT_REG("IP", EIP); -+ PRINT_REG("SP", UESP); -+ PRINT_REG("EFLAGS", EFL); -+ PRINT_REG("EAX", EAX); -+ PRINT_REG("EBX", EBX); -+ PRINT_REG("ECX", ECX); -+ PRINT_REG("EDX", EDX); -+ PRINT_REG("ESI", ESI); -+ PRINT_REG("EDI", EDI); -+ PRINT_REG("EBP", EBP); -+ PRINT_REG("CS", CS); -+ PRINT_REG("SS", SS); -+ PRINT_REG("DS", DS); -+ PRINT_REG("FS", FS); -+ PRINT_REG("ES", ES); -+ PRINT_REG("GS", GS); -+ printf("\n"); -+ printf("#endif\n"); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/smp.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/smp.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/smp.c 2005-05-03 22:28:14.476409464 +0300 -@@ -0,0 +1,329 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_SMP -+ -+#include "linux/sched.h" -+#include "linux/threads.h" -+#include "linux/interrupt.h" -+#include "asm/smp.h" -+#include "asm/processor.h" -+#include "asm/spinlock.h" -+#include "asm/softirq.h" -+#include "asm/hardirq.h" -+#include "asm/tlb.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "kern.h" -+#include "os.h" -+ -+/* Total count of live CPUs, set by smp_boot_cpus */ -+int smp_num_cpus = 1; -+ -+/* The 'big kernel lock' */ -+spinlock_cacheline_t kernel_flag_cacheline = {SPIN_LOCK_UNLOCKED}; -+ -+/* Per CPU bogomips and other parameters */ -+ -+/* The only piece used here is the ipi pipe, which is set before SMP is -+ * started and never changed. -+ */ -+struct cpuinfo_um cpu_data[NR_CPUS]; -+ -+/* CPU online map, set by smp_boot_cpus */ -+unsigned long cpu_online_map; -+ -+atomic_t global_bh_count; -+ -+/* Set when the idlers are all forked */ -+int smp_threads_ready = 0; -+ -+/* Not used by UML */ -+unsigned char global_irq_holder = 0; -+unsigned volatile long global_irq_lock; -+ -+/* A statistic, can be a little off */ -+static int num_reschedules_sent = 0; -+ -+mmu_gather_t mmu_gathers[NR_CPUS]; -+ -+void smp_send_reschedule(int cpu) -+{ -+ os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); -+ num_reschedules_sent++; -+} -+ -+static void show(char * str) -+{ -+ int cpu = smp_processor_id(); -+ -+ printk(KERN_INFO "\n%s, CPU %d:\n", str, cpu); -+} -+ -+#define MAXCOUNT 100000000 -+ -+static inline void wait_on_bh(void) -+{ -+ int count = MAXCOUNT; -+ do { -+ if (!--count) { -+ show("wait_on_bh"); -+ count = ~0; -+ } -+ /* nothing .. wait for the other bh's to go away */ -+ } while (atomic_read(&global_bh_count) != 0); -+} -+ -+/* -+ * This is called when we want to synchronize with -+ * bottom half handlers. We need to wait until -+ * no other CPU is executing any bottom half handler. -+ * -+ * Don't wait if we're already running in an interrupt -+ * context or are inside a bh handler. -+ */ -+void synchronize_bh(void) -+{ -+ if (atomic_read(&global_bh_count) && !in_interrupt()) -+ wait_on_bh(); -+} -+ -+void smp_send_stop(void) -+{ -+ int i; -+ -+ printk(KERN_INFO "Stopping all CPUs..."); -+ for(i = 0; i < ncpus; i++){ -+ if(i == current->processor) -+ continue; -+ os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); -+ } -+ printk("done\n"); -+} -+ -+ -+static atomic_t smp_commenced = ATOMIC_INIT(0); -+static volatile unsigned long smp_callin_map = 0; -+ -+void smp_commence(void) -+{ -+ printk("All CPUs are go!\n"); -+ -+ wmb(); -+ atomic_set(&smp_commenced, 1); -+} -+ -+static int idle_proc(void *unused) -+{ -+ int cpu, err; -+ -+ set_current(current); -+ del_from_runqueue(current); -+ unhash_process(current); -+ -+ cpu = current->processor; -+ err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); -+ if(err < 0) -+ panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); -+ -+ activate_ipi(cpu_data[cpu].ipi_pipe[0], -+ current->thread.mode.tt.extern_pid); -+ -+ wmb(); -+ if (test_and_set_bit(current->processor, &smp_callin_map)) { -+ printk("huh, CPU#%d already present??\n", current->processor); -+ BUG(); -+ } -+ -+ while (!atomic_read(&smp_commenced)) -+ cpu_relax(); -+ -+ init_idle(); -+ cpu_idle(); -+ return(0); -+} -+ -+static int idle_thread(int (*fn)(void *), int cpu) -+{ -+ struct task_struct *new_task; -+ int pid; -+ unsigned char c; -+ -+ current->thread.request.u.thread.proc = fn; -+ current->thread.request.u.thread.arg = NULL; -+ pid = do_fork(CLONE_VM | CLONE_PID, 0, NULL, 0); -+ if(pid < 0) -+ panic("do_fork failed in idle_thread"); -+ new_task = get_task(pid, 1); -+ -+ cpu_tasks[cpu] = ((struct cpu_task) -+ { .pid = new_task->thread.mode.tt.extern_pid, -+ .task = new_task } ); -+ init_tasks[cpu] = new_task; -+ new_task->processor = cpu; -+ new_task->cpus_allowed = 1 << cpu; -+ new_task->cpus_runnable = new_task->cpus_allowed; -+ CHOOSE_MODE(({ struct file_handle *pipe; -+ pipe = new_task->thread.mode.tt.switch_pipe; -+ write_file(&pipe[1], -1, &c, sizeof(c)); }), -+ ({ panic("skas mode doesn't support SMP"); })); -+ return(new_task->thread.mode.tt.extern_pid); -+} -+ -+void smp_boot_cpus(void) -+{ -+ int err; -+ -+ set_bit(0, &cpu_online_map); -+ set_bit(0, &smp_callin_map); -+ -+ err = os_pipe(cpu_data[0].ipi_pipe, 1, 1); -+ if(err < 0) -+ panic("CPU#0 failed to create IPI pipe, err = %d", -err); -+ -+ activate_ipi(cpu_data[0].ipi_pipe[0], -+ current->thread.mode.tt.extern_pid); -+ -+ if(ncpus < 1){ -+ printk(KERN_INFO "ncpus set to 1\n"); -+ ncpus = 1; -+ } -+ else if(ncpus > NR_CPUS){ -+ printk(KERN_INFO -+ "ncpus can't be greater than NR_CPUS, set to %d\n", -+ NR_CPUS); -+ ncpus = NR_CPUS; -+ } -+ -+ if(ncpus > 1){ -+ int i, pid; -+ -+ printk(KERN_INFO "Starting up other processors:\n"); -+ for(i=1;i<ncpus;i++){ -+ int waittime; -+ -+ /* Do this early, for hard_smp_processor_id() */ -+ cpu_tasks[i].pid = -1; -+ set_bit(i, &cpu_online_map); -+ smp_num_cpus++; -+ -+ pid = idle_thread(idle_proc, i); -+ printk(KERN_INFO "\t#%d - idle thread pid = %d.. ", -+ i, pid); -+ -+ waittime = 200000000; -+ while (waittime-- && !test_bit(i, &smp_callin_map)) -+ cpu_relax(); -+ -+ if (test_bit(i, &smp_callin_map)) -+ printk("online\n"); -+ else { -+ printk("failed\n"); -+ clear_bit(i, &cpu_online_map); -+ } -+ } -+ } -+} -+ -+int setup_profiling_timer(unsigned int multiplier) -+{ -+ printk(KERN_INFO "setup_profiling_timer\n"); -+ return(0); -+} -+ -+void smp_call_function_slave(int cpu); -+ -+void IPI_handler(int cpu) -+{ -+ unsigned char c; -+ int fd; -+ -+ fd = cpu_data[cpu].ipi_pipe[0]; -+ while (os_read_file(fd, &c, 1) == 1) { -+ switch (c) { -+ case 'C': -+ smp_call_function_slave(cpu); -+ break; -+ -+ case 'R': -+ current->need_resched = 1; -+ break; -+ -+ case 'S': -+ printk("CPU#%d stopping\n", cpu); -+ while(1) -+ pause(); -+ break; -+ -+ default: -+ printk("CPU#%d received unknown IPI [%c]!\n", cpu, c); -+ break; -+ } -+ } -+} -+ -+int hard_smp_processor_id(void) -+{ -+ return(pid_to_processor_id(os_getpid())); -+} -+ -+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; -+static atomic_t scf_started; -+static atomic_t scf_finished; -+static void (*func)(void *info); -+static void *info; -+ -+void smp_call_function_slave(int cpu) -+{ -+ atomic_inc(&scf_started); -+ (*func)(info); -+ atomic_inc(&scf_finished); -+} -+ -+int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, -+ int wait) -+{ -+ int cpus = smp_num_cpus - 1; -+ int i; -+ -+ if (!cpus) -+ return 0; -+ -+ spin_lock_bh(&call_lock); -+ atomic_set(&scf_started, 0); -+ atomic_set(&scf_finished, 0); -+ func = _func; -+ info = _info; -+ -+ for (i=0;i<NR_CPUS;i++) -+ if (i != current->processor && test_bit(i, &cpu_online_map)) -+ os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); -+ -+ while (atomic_read(&scf_started) != cpus) -+ barrier(); -+ -+ if (wait) -+ while (atomic_read(&scf_finished) != cpus) -+ barrier(); -+ -+ spin_unlock_bh(&call_lock); -+ return 0; -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/syscall_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/syscall_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/syscall_kern.c 2005-05-03 22:28:14.477409312 +0300 -@@ -0,0 +1,343 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/file.h" -+#include "linux/smp_lock.h" -+#include "linux/mm.h" -+#include "linux/utsname.h" -+#include "linux/msg.h" -+#include "linux/shm.h" -+#include "linux/sys.h" -+#include "linux/unistd.h" -+#include "linux/slab.h" -+#include "linux/utime.h" -+#include "asm/mman.h" -+#include "asm/uaccess.h" -+#include "asm/ipc.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "sysdep/syscalls.h" -+#include "mode_kern.h" -+#include "choose-mode.h" -+ -+/* Unlocked, I don't care if this is a bit off */ -+int nsyscalls = 0; -+ -+long um_mount(char * dev_name, char * dir_name, char * type, -+ unsigned long new_flags, void * data) -+{ -+ if(type == NULL) type = ""; -+ return(sys_mount(dev_name, dir_name, type, new_flags, data)); -+} -+ -+long sys_fork(void) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(SIGCHLD, 0, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+long sys_clone(unsigned long clone_flags, unsigned long newsp) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(clone_flags, newsp, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+long sys_vfork(void) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+/* common code for old and new mmaps */ -+long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, unsigned long fd, -+ unsigned long pgoff) -+{ -+ int error = -EBADF; -+ struct file * file = NULL; -+ -+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); -+ if (!(flags & MAP_ANONYMOUS)) { -+ file = fget(fd); -+ if (!file) -+ goto out; -+ } -+ -+ down_write(&mm->mmap_sem); -+ error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff); -+ up_write(&mm->mmap_sem); -+ -+ if (file) -+ fput(file); -+ out: -+ return error; -+} -+ -+long sys_mmap2(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long pgoff) -+{ -+ return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); -+} -+ -+/* -+ * Perform the select(nd, in, out, ex, tv) and mmap() system -+ * calls. Linux/i386 didn't use to be able to handle more than -+ * 4 system call parameters, so these system calls used a memory -+ * block for parameter passing.. -+ */ -+ -+struct mmap_arg_struct { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset) -+{ -+ int err = -EINVAL; -+ if (offset & ~PAGE_MASK) -+ goto out; -+ -+ err = do_mmap2(current->mm, addr, len, prot, flags, fd, -+ offset >> PAGE_SHIFT); -+ out: -+ return err; -+} -+/* -+ * sys_pipe() is the normal C calling standard for creating -+ * a pipe. It's not the way unix traditionally does this, though. -+ */ -+int sys_pipe(unsigned long * fildes) -+{ -+ int fd[2]; -+ int error; -+ -+ error = do_pipe(fd); -+ if (!error) { -+ if (copy_to_user(fildes, fd, sizeof(fd))) -+ error = -EFAULT; -+ } -+ return error; -+} -+ -+int sys_pause(void) -+{ -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ return -ERESTARTNOHAND; -+} -+ -+int sys_sigaction(int sig, const struct old_sigaction *act, -+ struct old_sigaction *oact) -+{ -+ struct k_sigaction new_ka, old_ka; -+ int ret; -+ -+ if (act) { -+ old_sigset_t mask; -+ if (verify_area(VERIFY_READ, act, sizeof(*act)) || -+ __get_user(new_ka.sa.sa_handler, &act->sa_handler) || -+ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) -+ return -EFAULT; -+ __get_user(new_ka.sa.sa_flags, &act->sa_flags); -+ __get_user(mask, &act->sa_mask); -+ siginitset(&new_ka.sa.sa_mask, mask); -+ } -+ -+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); -+ -+ if (!ret && oact) { -+ if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || -+ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || -+ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) -+ return -EFAULT; -+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags); -+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); -+ } -+ -+ return ret; -+} -+ -+/* -+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.. -+ * -+ * This is really horribly ugly. -+ */ -+int sys_ipc (uint call, int first, int second, -+ int third, void *ptr, long fifth) -+{ -+ int version, ret; -+ -+ version = call >> 16; /* hack for backward compatibility */ -+ call &= 0xffff; -+ -+ switch (call) { -+ case SEMOP: -+ return sys_semop (first, (struct sembuf *)ptr, second); -+ case SEMGET: -+ return sys_semget (first, second, third); -+ case SEMCTL: { -+ union semun fourth; -+ if (!ptr) -+ return -EINVAL; -+ if (get_user(fourth.__pad, (void **) ptr)) -+ return -EFAULT; -+ return sys_semctl (first, second, third, fourth); -+ } -+ -+ case MSGSND: -+ return sys_msgsnd (first, (struct msgbuf *) ptr, -+ second, third); -+ case MSGRCV: -+ switch (version) { -+ case 0: { -+ struct ipc_kludge tmp; -+ if (!ptr) -+ return -EINVAL; -+ -+ if (copy_from_user(&tmp, -+ (struct ipc_kludge *) ptr, -+ sizeof (tmp))) -+ return -EFAULT; -+ return sys_msgrcv (first, tmp.msgp, second, -+ tmp.msgtyp, third); -+ } -+ default: -+ panic("msgrcv with version != 0"); -+ return sys_msgrcv (first, -+ (struct msgbuf *) ptr, -+ second, fifth, third); -+ } -+ case MSGGET: -+ return sys_msgget ((key_t) first, second); -+ case MSGCTL: -+ return sys_msgctl (first, second, (struct msqid_ds *) ptr); -+ -+ case SHMAT: -+ switch (version) { -+ default: { -+ ulong raddr; -+ ret = sys_shmat (first, (char *) ptr, second, &raddr); -+ if (ret) -+ return ret; -+ return put_user (raddr, (ulong *) third); -+ } -+ case 1: /* iBCS2 emulator entry point */ -+ if (!segment_eq(get_fs(), get_ds())) -+ return -EINVAL; -+ return sys_shmat (first, (char *) ptr, second, (ulong *) third); -+ } -+ case SHMDT: -+ return sys_shmdt ((char *)ptr); -+ case SHMGET: -+ return sys_shmget (first, second, third); -+ case SHMCTL: -+ return sys_shmctl (first, second, -+ (struct shmid_ds *) ptr); -+ default: -+ return -EINVAL; -+ } -+} -+ -+int sys_uname(struct old_utsname * name) -+{ -+ int err; -+ if (!name) -+ return -EFAULT; -+ down_read(&uts_sem); -+ err=copy_to_user(name, &system_utsname, sizeof (*name)); -+ up_read(&uts_sem); -+ return err?-EFAULT:0; -+} -+ -+int sys_olduname(struct oldold_utsname * name) -+{ -+ int error; -+ -+ if (!name) -+ return -EFAULT; -+ if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) -+ return -EFAULT; -+ -+ down_read(&uts_sem); -+ -+ error = __copy_to_user(&name->sysname,&system_utsname.sysname, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->sysname+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->nodename,&system_utsname.nodename, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->nodename+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->release,&system_utsname.release, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->release+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->version,&system_utsname.version, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->version+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->machine,&system_utsname.machine, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->machine+__OLD_UTS_LEN); -+ -+ up_read(&uts_sem); -+ -+ error = error ? -EFAULT : 0; -+ -+ return error; -+} -+ -+int sys_sigaltstack(const stack_t *uss, stack_t *uoss) -+{ -+ return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); -+} -+ -+long execute_syscall(void *r) -+{ -+ return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r)); -+} -+ -+spinlock_t syscall_lock = SPIN_LOCK_UNLOCKED; -+ -+static int syscall_index = 0; -+ -+int next_syscall_index(int limit) -+{ -+ int ret; -+ -+ spin_lock(&syscall_lock); -+ ret = syscall_index; -+ if(++syscall_index == limit) -+ syscall_index = 0; -+ spin_unlock(&syscall_lock); -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/sys_call_table.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/sys_call_table.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/sys_call_table.c 2005-05-03 22:28:14.480408856 +0300 -@@ -0,0 +1,496 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/unistd.h" -+#include "linux/version.h" -+#include "linux/sys.h" -+#include "asm/signal.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_exit; -+extern syscall_handler_t sys_fork; -+extern syscall_handler_t sys_creat; -+extern syscall_handler_t sys_link; -+extern syscall_handler_t sys_unlink; -+extern syscall_handler_t sys_chdir; -+extern syscall_handler_t sys_mknod; -+extern syscall_handler_t sys_chmod; -+extern syscall_handler_t sys_lchown16; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_stat; -+extern syscall_handler_t sys_getpid; -+extern syscall_handler_t sys_oldumount; -+extern syscall_handler_t sys_setuid16; -+extern syscall_handler_t sys_getuid16; -+extern syscall_handler_t sys_ptrace; -+extern syscall_handler_t sys_alarm; -+extern syscall_handler_t sys_fstat; -+extern syscall_handler_t sys_pause; -+extern syscall_handler_t sys_utime; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_access; -+extern syscall_handler_t sys_nice; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_sync; -+extern syscall_handler_t sys_kill; -+extern syscall_handler_t sys_rename; -+extern syscall_handler_t sys_mkdir; -+extern syscall_handler_t sys_rmdir; -+extern syscall_handler_t sys_pipe; -+extern syscall_handler_t sys_times; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_brk; -+extern syscall_handler_t sys_setgid16; -+extern syscall_handler_t sys_getgid16; -+extern syscall_handler_t sys_signal; -+extern syscall_handler_t sys_geteuid16; -+extern syscall_handler_t sys_getegid16; -+extern syscall_handler_t sys_acct; -+extern syscall_handler_t sys_umount; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ioctl; -+extern syscall_handler_t sys_fcntl; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_setpgid; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_olduname; -+extern syscall_handler_t sys_umask; -+extern syscall_handler_t sys_chroot; -+extern syscall_handler_t sys_ustat; -+extern syscall_handler_t sys_dup2; -+extern syscall_handler_t sys_getppid; -+extern syscall_handler_t sys_getpgrp; -+extern syscall_handler_t sys_sigaction; -+extern syscall_handler_t sys_sgetmask; -+extern syscall_handler_t sys_ssetmask; -+extern syscall_handler_t sys_setreuid16; -+extern syscall_handler_t sys_setregid16; -+extern syscall_handler_t sys_sigsuspend; -+extern syscall_handler_t sys_sigpending; -+extern syscall_handler_t sys_sethostname; -+extern syscall_handler_t sys_setrlimit; -+extern syscall_handler_t sys_old_getrlimit; -+extern syscall_handler_t sys_getrusage; -+extern syscall_handler_t sys_gettimeofday; -+extern syscall_handler_t sys_settimeofday; -+extern syscall_handler_t sys_getgroups16; -+extern syscall_handler_t sys_setgroups16; -+extern syscall_handler_t sys_symlink; -+extern syscall_handler_t sys_lstat; -+extern syscall_handler_t sys_readlink; -+extern syscall_handler_t sys_uselib; -+extern syscall_handler_t sys_swapon; -+extern syscall_handler_t sys_reboot; -+extern syscall_handler_t old_readdir; -+extern syscall_handler_t sys_munmap; -+extern syscall_handler_t sys_truncate; -+extern syscall_handler_t sys_ftruncate; -+extern syscall_handler_t sys_fchmod; -+extern syscall_handler_t sys_fchown16; -+extern syscall_handler_t sys_getpriority; -+extern syscall_handler_t sys_setpriority; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_statfs; -+extern syscall_handler_t sys_fstatfs; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_socketcall; -+extern syscall_handler_t sys_syslog; -+extern syscall_handler_t sys_setitimer; -+extern syscall_handler_t sys_getitimer; -+extern syscall_handler_t sys_newstat; -+extern syscall_handler_t sys_newlstat; -+extern syscall_handler_t sys_newfstat; -+extern syscall_handler_t sys_uname; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_vhangup; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_swapoff; -+extern syscall_handler_t sys_sysinfo; -+extern syscall_handler_t sys_ipc; -+extern syscall_handler_t sys_fsync; -+extern syscall_handler_t sys_sigreturn; -+extern syscall_handler_t sys_rt_sigreturn; -+extern syscall_handler_t sys_clone; -+extern syscall_handler_t sys_setdomainname; -+extern syscall_handler_t sys_newuname; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_adjtimex; -+extern syscall_handler_t sys_mprotect; -+extern syscall_handler_t sys_sigprocmask; -+extern syscall_handler_t sys_create_module; -+extern syscall_handler_t sys_init_module; -+extern syscall_handler_t sys_delete_module; -+extern syscall_handler_t sys_get_kernel_syms; -+extern syscall_handler_t sys_quotactl; -+extern syscall_handler_t sys_getpgid; -+extern syscall_handler_t sys_fchdir; -+extern syscall_handler_t sys_bdflush; -+extern syscall_handler_t sys_sysfs; -+extern syscall_handler_t sys_personality; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_setfsuid16; -+extern syscall_handler_t sys_setfsgid16; -+extern syscall_handler_t sys_llseek; -+extern syscall_handler_t sys_getdents; -+extern syscall_handler_t sys_flock; -+extern syscall_handler_t sys_msync; -+extern syscall_handler_t sys_readv; -+extern syscall_handler_t sys_writev; -+extern syscall_handler_t sys_getsid; -+extern syscall_handler_t sys_fdatasync; -+extern syscall_handler_t sys_sysctl; -+extern syscall_handler_t sys_mlock; -+extern syscall_handler_t sys_munlock; -+extern syscall_handler_t sys_mlockall; -+extern syscall_handler_t sys_munlockall; -+extern syscall_handler_t sys_sched_setparam; -+extern syscall_handler_t sys_sched_getparam; -+extern syscall_handler_t sys_sched_setscheduler; -+extern syscall_handler_t sys_sched_getscheduler; -+extern syscall_handler_t sys_sched_get_priority_max; -+extern syscall_handler_t sys_sched_get_priority_min; -+extern syscall_handler_t sys_sched_rr_get_interval; -+extern syscall_handler_t sys_nanosleep; -+extern syscall_handler_t sys_mremap; -+extern syscall_handler_t sys_setresuid16; -+extern syscall_handler_t sys_getresuid16; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_query_module; -+extern syscall_handler_t sys_poll; -+extern syscall_handler_t sys_nfsservctl; -+extern syscall_handler_t sys_setresgid16; -+extern syscall_handler_t sys_getresgid16; -+extern syscall_handler_t sys_prctl; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_rt_sigaction; -+extern syscall_handler_t sys_rt_sigprocmask; -+extern syscall_handler_t sys_rt_sigpending; -+extern syscall_handler_t sys_rt_sigtimedwait; -+extern syscall_handler_t sys_rt_sigqueueinfo; -+extern syscall_handler_t sys_rt_sigsuspend; -+extern syscall_handler_t sys_pread; -+extern syscall_handler_t sys_pwrite; -+extern syscall_handler_t sys_chown16; -+extern syscall_handler_t sys_getcwd; -+extern syscall_handler_t sys_capget; -+extern syscall_handler_t sys_capset; -+extern syscall_handler_t sys_sigaltstack; -+extern syscall_handler_t sys_sendfile; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_vfork; -+extern syscall_handler_t sys_getrlimit; -+extern syscall_handler_t sys_mmap2; -+extern syscall_handler_t sys_truncate64; -+extern syscall_handler_t sys_ftruncate64; -+extern syscall_handler_t sys_stat64; -+extern syscall_handler_t sys_lstat64; -+extern syscall_handler_t sys_fstat64; -+extern syscall_handler_t sys_lchown; -+extern syscall_handler_t sys_getuid; -+extern syscall_handler_t sys_getgid; -+extern syscall_handler_t sys_geteuid; -+extern syscall_handler_t sys_getegid; -+extern syscall_handler_t sys_setreuid; -+extern syscall_handler_t sys_setregid; -+extern syscall_handler_t sys_getgroups; -+extern syscall_handler_t sys_setgroups; -+extern syscall_handler_t sys_fchown; -+extern syscall_handler_t sys_setresuid; -+extern syscall_handler_t sys_getresuid; -+extern syscall_handler_t sys_setresgid; -+extern syscall_handler_t sys_getresgid; -+extern syscall_handler_t sys_chown; -+extern syscall_handler_t sys_setuid; -+extern syscall_handler_t sys_setgid; -+extern syscall_handler_t sys_setfsuid; -+extern syscall_handler_t sys_setfsgid; -+extern syscall_handler_t sys_pivot_root; -+extern syscall_handler_t sys_mincore; -+extern syscall_handler_t sys_madvise; -+extern syscall_handler_t sys_fcntl64; -+extern syscall_handler_t sys_getdents64; -+extern syscall_handler_t sys_gettid; -+extern syscall_handler_t sys_readahead; -+extern syscall_handler_t sys_tkill; -+extern syscall_handler_t sys_setxattr; -+extern syscall_handler_t sys_lsetxattr; -+extern syscall_handler_t sys_fsetxattr; -+extern syscall_handler_t sys_getxattr; -+extern syscall_handler_t sys_lgetxattr; -+extern syscall_handler_t sys_fgetxattr; -+extern syscall_handler_t sys_listxattr; -+extern syscall_handler_t sys_llistxattr; -+extern syscall_handler_t sys_flistxattr; -+extern syscall_handler_t sys_removexattr; -+extern syscall_handler_t sys_lremovexattr; -+extern syscall_handler_t sys_fremovexattr; -+extern syscall_handler_t sys_sendfile64; -+ -+extern syscall_handler_t um_mount; -+extern syscall_handler_t um_time; -+extern syscall_handler_t um_stime; -+ -+#define LAST_GENERIC_SYSCALL __NR_exit_group -+ -+#if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL -+#define LAST_SYSCALL LAST_GENERIC_SYSCALL -+#else -+#define LAST_SYSCALL LAST_ARCH_SYSCALL -+#endif -+ -+syscall_handler_t *sys_call_table[] = { -+ [ 0 ] = sys_ni_syscall, -+ [ __NR_exit ] = sys_exit, -+ [ __NR_fork ] = sys_fork, -+ [ __NR_read ] = (syscall_handler_t *) sys_read, -+ [ __NR_write ] = (syscall_handler_t *) sys_write, -+ -+ /* These three are declared differently in asm/unistd.h */ -+ [ __NR_open ] = (syscall_handler_t *) sys_open, -+ [ __NR_close ] = (syscall_handler_t *) sys_close, -+ [ __NR_waitpid ] = (syscall_handler_t *) sys_waitpid, -+ [ __NR_creat ] = sys_creat, -+ [ __NR_link ] = sys_link, -+ [ __NR_unlink ] = sys_unlink, -+ -+ /* declared differently in kern_util.h */ -+ [ __NR_execve ] = (syscall_handler_t *) sys_execve, -+ [ __NR_chdir ] = sys_chdir, -+ [ __NR_time ] = um_time, -+ [ __NR_mknod ] = sys_mknod, -+ [ __NR_chmod ] = sys_chmod, -+ [ __NR_lchown ] = sys_lchown16, -+ [ __NR_break ] = sys_ni_syscall, -+ [ __NR_oldstat ] = sys_stat, -+ [ __NR_lseek ] = (syscall_handler_t *) sys_lseek, -+ [ __NR_getpid ] = sys_getpid, -+ [ __NR_mount ] = um_mount, -+ [ __NR_umount ] = sys_oldumount, -+ [ __NR_setuid ] = sys_setuid16, -+ [ __NR_getuid ] = sys_getuid16, -+ [ __NR_stime ] = um_stime, -+ [ __NR_ptrace ] = sys_ptrace, -+ [ __NR_alarm ] = sys_alarm, -+ [ __NR_oldfstat ] = sys_fstat, -+ [ __NR_pause ] = sys_pause, -+ [ __NR_utime ] = sys_utime, -+ [ __NR_stty ] = sys_ni_syscall, -+ [ __NR_gtty ] = sys_ni_syscall, -+ [ __NR_access ] = sys_access, -+ [ __NR_nice ] = sys_nice, -+ [ __NR_ftime ] = sys_ni_syscall, -+ [ __NR_sync ] = sys_sync, -+ [ __NR_kill ] = sys_kill, -+ [ __NR_rename ] = sys_rename, -+ [ __NR_mkdir ] = sys_mkdir, -+ [ __NR_rmdir ] = sys_rmdir, -+ -+ /* Declared differently in asm/unistd.h */ -+ [ __NR_dup ] = (syscall_handler_t *) sys_dup, -+ [ __NR_pipe ] = sys_pipe, -+ [ __NR_times ] = sys_times, -+ [ __NR_prof ] = sys_ni_syscall, -+ [ __NR_brk ] = sys_brk, -+ [ __NR_setgid ] = sys_setgid16, -+ [ __NR_getgid ] = sys_getgid16, -+ [ __NR_signal ] = sys_signal, -+ [ __NR_geteuid ] = sys_geteuid16, -+ [ __NR_getegid ] = sys_getegid16, -+ [ __NR_acct ] = sys_acct, -+ [ __NR_umount2 ] = sys_umount, -+ [ __NR_lock ] = sys_ni_syscall, -+ [ __NR_ioctl ] = sys_ioctl, -+ [ __NR_fcntl ] = sys_fcntl, -+ [ __NR_mpx ] = sys_ni_syscall, -+ [ __NR_setpgid ] = sys_setpgid, -+ [ __NR_ulimit ] = sys_ni_syscall, -+ [ __NR_oldolduname ] = sys_olduname, -+ [ __NR_umask ] = sys_umask, -+ [ __NR_chroot ] = sys_chroot, -+ [ __NR_ustat ] = sys_ustat, -+ [ __NR_dup2 ] = sys_dup2, -+ [ __NR_getppid ] = sys_getppid, -+ [ __NR_getpgrp ] = sys_getpgrp, -+ [ __NR_setsid ] = (syscall_handler_t *) sys_setsid, -+ [ __NR_sigaction ] = sys_sigaction, -+ [ __NR_sgetmask ] = sys_sgetmask, -+ [ __NR_ssetmask ] = sys_ssetmask, -+ [ __NR_setreuid ] = sys_setreuid16, -+ [ __NR_setregid ] = sys_setregid16, -+ [ __NR_sigsuspend ] = sys_sigsuspend, -+ [ __NR_sigpending ] = sys_sigpending, -+ [ __NR_sethostname ] = sys_sethostname, -+ [ __NR_setrlimit ] = sys_setrlimit, -+ [ __NR_getrlimit ] = sys_old_getrlimit, -+ [ __NR_getrusage ] = sys_getrusage, -+ [ __NR_gettimeofday ] = sys_gettimeofday, -+ [ __NR_settimeofday ] = sys_settimeofday, -+ [ __NR_getgroups ] = sys_getgroups16, -+ [ __NR_setgroups ] = sys_setgroups16, -+ [ __NR_symlink ] = sys_symlink, -+ [ __NR_oldlstat ] = sys_lstat, -+ [ __NR_readlink ] = sys_readlink, -+ [ __NR_uselib ] = sys_uselib, -+ [ __NR_swapon ] = sys_swapon, -+ [ __NR_reboot ] = sys_reboot, -+ [ __NR_readdir ] = old_readdir, -+ [ __NR_munmap ] = sys_munmap, -+ [ __NR_truncate ] = sys_truncate, -+ [ __NR_ftruncate ] = sys_ftruncate, -+ [ __NR_fchmod ] = sys_fchmod, -+ [ __NR_fchown ] = sys_fchown16, -+ [ __NR_getpriority ] = sys_getpriority, -+ [ __NR_setpriority ] = sys_setpriority, -+ [ __NR_profil ] = sys_ni_syscall, -+ [ __NR_statfs ] = sys_statfs, -+ [ __NR_fstatfs ] = sys_fstatfs, -+ [ __NR_ioperm ] = sys_ni_syscall, -+ [ __NR_socketcall ] = sys_socketcall, -+ [ __NR_syslog ] = sys_syslog, -+ [ __NR_setitimer ] = sys_setitimer, -+ [ __NR_getitimer ] = sys_getitimer, -+ [ __NR_stat ] = sys_newstat, -+ [ __NR_lstat ] = sys_newlstat, -+ [ __NR_fstat ] = sys_newfstat, -+ [ __NR_olduname ] = sys_uname, -+ [ __NR_iopl ] = sys_ni_syscall, -+ [ __NR_vhangup ] = sys_vhangup, -+ [ __NR_idle ] = sys_ni_syscall, -+ [ __NR_wait4 ] = (syscall_handler_t *) sys_wait4, -+ [ __NR_swapoff ] = sys_swapoff, -+ [ __NR_sysinfo ] = sys_sysinfo, -+ [ __NR_ipc ] = sys_ipc, -+ [ __NR_fsync ] = sys_fsync, -+ [ __NR_sigreturn ] = sys_sigreturn, -+ [ __NR_clone ] = sys_clone, -+ [ __NR_setdomainname ] = sys_setdomainname, -+ [ __NR_uname ] = sys_newuname, -+ [ __NR_adjtimex ] = sys_adjtimex, -+ [ __NR_mprotect ] = sys_mprotect, -+ [ __NR_sigprocmask ] = sys_sigprocmask, -+ [ __NR_create_module ] = sys_create_module, -+ [ __NR_init_module ] = sys_init_module, -+ [ __NR_delete_module ] = sys_delete_module, -+ [ __NR_get_kernel_syms ] = sys_get_kernel_syms, -+ [ __NR_quotactl ] = sys_quotactl, -+ [ __NR_getpgid ] = sys_getpgid, -+ [ __NR_fchdir ] = sys_fchdir, -+ [ __NR_bdflush ] = sys_bdflush, -+ [ __NR_sysfs ] = sys_sysfs, -+ [ __NR_personality ] = sys_personality, -+ [ __NR_afs_syscall ] = sys_ni_syscall, -+ [ __NR_setfsuid ] = sys_setfsuid16, -+ [ __NR_setfsgid ] = sys_setfsgid16, -+ [ __NR__llseek ] = sys_llseek, -+ [ __NR_getdents ] = sys_getdents, -+ [ __NR__newselect ] = (syscall_handler_t *) sys_select, -+ [ __NR_flock ] = sys_flock, -+ [ __NR_msync ] = sys_msync, -+ [ __NR_readv ] = sys_readv, -+ [ __NR_writev ] = sys_writev, -+ [ __NR_getsid ] = sys_getsid, -+ [ __NR_fdatasync ] = sys_fdatasync, -+ [ __NR__sysctl ] = sys_sysctl, -+ [ __NR_mlock ] = sys_mlock, -+ [ __NR_munlock ] = sys_munlock, -+ [ __NR_mlockall ] = sys_mlockall, -+ [ __NR_munlockall ] = sys_munlockall, -+ [ __NR_sched_setparam ] = sys_sched_setparam, -+ [ __NR_sched_getparam ] = sys_sched_getparam, -+ [ __NR_sched_setscheduler ] = sys_sched_setscheduler, -+ [ __NR_sched_getscheduler ] = sys_sched_getscheduler, -+ [ __NR_sched_yield ] = (syscall_handler_t *) yield, -+ [ __NR_sched_get_priority_max ] = sys_sched_get_priority_max, -+ [ __NR_sched_get_priority_min ] = sys_sched_get_priority_min, -+ [ __NR_sched_rr_get_interval ] = sys_sched_rr_get_interval, -+ [ __NR_nanosleep ] = sys_nanosleep, -+ [ __NR_mremap ] = sys_mremap, -+ [ __NR_setresuid ] = sys_setresuid16, -+ [ __NR_getresuid ] = sys_getresuid16, -+ [ __NR_vm86 ] = sys_ni_syscall, -+ [ __NR_query_module ] = sys_query_module, -+ [ __NR_poll ] = sys_poll, -+ [ __NR_nfsservctl ] = sys_nfsservctl, -+ [ __NR_setresgid ] = sys_setresgid16, -+ [ __NR_getresgid ] = sys_getresgid16, -+ [ __NR_prctl ] = sys_prctl, -+ [ __NR_rt_sigreturn ] = sys_rt_sigreturn, -+ [ __NR_rt_sigaction ] = sys_rt_sigaction, -+ [ __NR_rt_sigprocmask ] = sys_rt_sigprocmask, -+ [ __NR_rt_sigpending ] = sys_rt_sigpending, -+ [ __NR_rt_sigtimedwait ] = sys_rt_sigtimedwait, -+ [ __NR_rt_sigqueueinfo ] = sys_rt_sigqueueinfo, -+ [ __NR_rt_sigsuspend ] = sys_rt_sigsuspend, -+ [ __NR_pread ] = sys_pread, -+ [ __NR_pwrite ] = sys_pwrite, -+ [ __NR_chown ] = sys_chown16, -+ [ __NR_getcwd ] = sys_getcwd, -+ [ __NR_capget ] = sys_capget, -+ [ __NR_capset ] = sys_capset, -+ [ __NR_sigaltstack ] = sys_sigaltstack, -+ [ __NR_sendfile ] = sys_sendfile, -+ [ __NR_getpmsg ] = sys_ni_syscall, -+ [ __NR_putpmsg ] = sys_ni_syscall, -+ [ __NR_vfork ] = sys_vfork, -+ [ __NR_ugetrlimit ] = sys_getrlimit, -+ [ __NR_mmap2 ] = sys_mmap2, -+ [ __NR_truncate64 ] = sys_truncate64, -+ [ __NR_ftruncate64 ] = sys_ftruncate64, -+ [ __NR_stat64 ] = sys_stat64, -+ [ __NR_lstat64 ] = sys_lstat64, -+ [ __NR_fstat64 ] = sys_fstat64, -+ [ __NR_fcntl64 ] = sys_fcntl64, -+ [ __NR_getdents64 ] = sys_getdents64, -+ [ __NR_security ] = sys_ni_syscall, -+ [ __NR_gettid ] = sys_gettid, -+ [ __NR_readahead ] = sys_readahead, -+ [ __NR_setxattr ] = sys_setxattr, -+ [ __NR_lsetxattr ] = sys_lsetxattr, -+ [ __NR_fsetxattr ] = sys_fsetxattr, -+ [ __NR_getxattr ] = sys_getxattr, -+ [ __NR_lgetxattr ] = sys_lgetxattr, -+ [ __NR_fgetxattr ] = sys_fgetxattr, -+ [ __NR_listxattr ] = sys_listxattr, -+ [ __NR_llistxattr ] = sys_llistxattr, -+ [ __NR_flistxattr ] = sys_flistxattr, -+ [ __NR_removexattr ] = sys_removexattr, -+ [ __NR_lremovexattr ] = sys_lremovexattr, -+ [ __NR_fremovexattr ] = sys_fremovexattr, -+ [ __NR_tkill ] = sys_tkill, -+ [ __NR_sendfile64 ] = sys_sendfile64, -+ [ __NR_futex ] = sys_ni_syscall, -+ [ __NR_sched_setaffinity ] = sys_ni_syscall, -+ [ __NR_sched_getaffinity ] = sys_ni_syscall, -+ [ __NR_set_thread_area ] = sys_ni_syscall, -+ [ __NR_get_thread_area ] = sys_ni_syscall, -+ [ __NR_io_setup ] = sys_ni_syscall, -+ [ __NR_io_destroy ] = sys_ni_syscall, -+ [ __NR_io_getevents ] = sys_ni_syscall, -+ [ __NR_io_submit ] = sys_ni_syscall, -+ [ __NR_io_cancel ] = sys_ni_syscall, -+ [ __NR_alloc_hugepages ] = sys_ni_syscall, -+ [ __NR_free_hugepages ] = sys_ni_syscall, -+ [ __NR_exit_group ] = sys_ni_syscall, -+ -+ ARCH_SYSCALLS -+ [ LAST_SYSCALL + 1 ... NR_syscalls ] = -+ (syscall_handler_t *) sys_ni_syscall -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/syscall_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/syscall_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/syscall_user.c 2005-05-03 22:28:14.481408704 +0300 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <sys/time.h> -+#include "kern_util.h" -+#include "syscall_user.h" -+ -+struct { -+ int syscall; -+ int pid; -+ int result; -+ struct timeval start; -+ struct timeval end; -+} syscall_record[1024]; -+ -+int record_syscall_start(int syscall) -+{ -+ int max, index; -+ -+ max = sizeof(syscall_record)/sizeof(syscall_record[0]); -+ index = next_syscall_index(max); -+ -+ syscall_record[index].syscall = syscall; -+ syscall_record[index].pid = current_pid(); -+ syscall_record[index].result = 0xdeadbeef; -+ gettimeofday(&syscall_record[index].start, NULL); -+ return(index); -+} -+ -+void record_syscall_end(int index, int result) -+{ -+ syscall_record[index].result = result; -+ gettimeofday(&syscall_record[index].end, NULL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/sysrq.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/sysrq.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/sysrq.c 2005-05-03 22:28:14.482408552 +0300 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/kernel.h" -+#include "linux/module.h" -+#include "asm/page.h" -+#include "asm/processor.h" -+#include "sysrq.h" -+#include "user_util.h" -+ -+ /* -+ * If the address is either in the .text section of the -+ * kernel, or in the vmalloc'ed module regions, it *may* -+ * be the address of a calling routine -+ */ -+ -+#ifdef CONFIG_MODULES -+ -+extern struct module *module_list; -+extern struct module kernel_module; -+ -+static inline int kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+ struct module *mod; -+ -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext) -+ return 1; -+ -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+ -+ return retval; -+} -+ -+#else -+ -+static inline int kernel_text_address(unsigned long addr) -+{ -+ return (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext); -+} -+ -+#endif -+ -+void show_trace(unsigned long * stack) -+{ -+ int i; -+ unsigned long addr; -+ -+ if (!stack) -+ stack = (unsigned long*) &stack; -+ -+ printk("Call Trace: "); -+ i = 1; -+ while (((long) stack & (THREAD_SIZE-1)) != 0) { -+ addr = *stack++; -+ if (kernel_text_address(addr)) { -+ if (i && ((i % 6) == 0)) -+ printk("\n "); -+ printk("[<%08lx>] ", addr); -+ i++; -+ } -+ } -+ printk("\n"); -+} -+ -+void show_trace_task(struct task_struct *tsk) -+{ -+ unsigned long esp = PT_REGS_SP(&tsk->thread.regs); -+ -+ /* User space on another CPU? */ -+ if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ return; -+ show_trace((unsigned long *)esp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tempfile.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tempfile.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tempfile.c 2005-05-03 22:28:14.483408400 +0300 -@@ -0,0 +1,82 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/param.h> -+#include "init.h" -+ -+/* Modified from create_mem_file and start_debugger */ -+static char *tempdir = NULL; -+ -+static void __init find_tempdir(void) -+{ -+ char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; -+ int i; -+ char *dir = NULL; -+ -+ if(tempdir != NULL) return; /* We've already been called */ -+ for(i = 0; dirs[i]; i++){ -+ dir = getenv(dirs[i]); -+ if((dir != NULL) && (*dir != '\0')) -+ break; -+ } -+ if((dir == NULL) || (*dir == '\0')) -+ dir = "/tmp"; -+ -+ tempdir = malloc(strlen(dir) + 2); -+ if(tempdir == NULL){ -+ fprintf(stderr, "Failed to malloc tempdir, " -+ "errno = %d\n", errno); -+ return; -+ } -+ strcpy(tempdir, dir); -+ strcat(tempdir, "/"); -+} -+ -+int make_tempfile(const char *template, char **out_tempname, int do_unlink) -+{ -+ char tempname[MAXPATHLEN]; -+ int fd; -+ -+ find_tempdir(); -+ if (*template != '/') -+ strcpy(tempname, tempdir); -+ else -+ *tempname = 0; -+ strcat(tempname, template); -+ fd = mkstemp(tempname); -+ if(fd < 0){ -+ fprintf(stderr, "open - cannot create %s: %s\n", tempname, -+ strerror(errno)); -+ return -1; -+ } -+ if(do_unlink && (unlink(tempname) < 0)){ -+ perror("unlink"); -+ return -1; -+ } -+ if(out_tempname){ -+ *out_tempname = strdup(tempname); -+ if(*out_tempname == NULL){ -+ perror("strdup"); -+ return -1; -+ } -+ } -+ return(fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/time.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/time.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/time.c 2005-05-03 22:28:14.484408248 +0300 -@@ -0,0 +1,144 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <time.h> -+#include <sys/time.h> -+#include <signal.h> -+#include <errno.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_user.h" -+#include "time_user.h" -+ -+extern struct timeval xtime; -+ -+struct timeval local_offset = { 0, 0 }; -+ -+void timer(void) -+{ -+ gettimeofday(&xtime, NULL); -+ timeradd(&xtime, &local_offset, &xtime); -+} -+ -+void set_interval(int timer_type) -+{ -+ int usec = 1000000/hz(); -+ struct itimerval interval = ((struct itimerval) { { 0, usec }, -+ { 0, usec } }); -+ -+ if(setitimer(timer_type, &interval, NULL) == -1) -+ panic("setitimer failed - errno = %d\n", errno); -+} -+ -+void enable_timer(void) -+{ -+ int usec = 1000000/hz(); -+ struct itimerval enable = ((struct itimerval) { { 0, usec }, -+ { 0, usec }}); -+ if(setitimer(ITIMER_VIRTUAL, &enable, NULL)) -+ printk("enable_timer - setitimer failed, errno = %d\n", -+ errno); -+} -+ -+void disable_timer(void) -+{ -+ struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); -+ if((setitimer(ITIMER_VIRTUAL, &disable, NULL) < 0) || -+ (setitimer(ITIMER_REAL, &disable, NULL) < 0)) -+ printk("disnable_timer - setitimer failed, errno = %d\n", -+ errno); -+} -+ -+void switch_timers(int to_real) -+{ -+ struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); -+ struct itimerval enable = ((struct itimerval) { { 0, 1000000/hz() }, -+ { 0, 1000000/hz() }}); -+ int old, new; -+ -+ if(to_real){ -+ old = ITIMER_VIRTUAL; -+ new = ITIMER_REAL; -+ } -+ else { -+ old = ITIMER_REAL; -+ new = ITIMER_VIRTUAL; -+ } -+ -+ if((setitimer(old, &disable, NULL) < 0) || -+ (setitimer(new, &enable, NULL))) -+ printk("switch_timers - setitimer failed, errno = %d\n", -+ errno); -+} -+ -+void idle_timer(void) -+{ -+ if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) -+ panic("Couldn't unset SIGVTALRM handler"); -+ -+ set_handler(SIGALRM, (__sighandler_t) alarm_handler, -+ SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); -+ set_interval(ITIMER_REAL); -+} -+ -+void time_init(void) -+{ -+ /* XXX This is to fill xtime with something real - otherwise by the -+ * time /proc is mounted, no timers have fired, and xtime is still 0, -+ * meaning it shows times of Jan 1 1970. The real fix is to figure -+ * out why no timers have happened by then. -+ */ -+ timer(); -+ -+ if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+void do_gettimeofday(struct timeval *tv) -+{ -+ unsigned long flags; -+ -+ flags = time_lock(); -+ gettimeofday(tv, NULL); -+ timeradd(tv, &local_offset, tv); -+ time_unlock(flags); -+} -+ -+void do_settimeofday(struct timeval *tv) -+{ -+ struct timeval now; -+ unsigned long flags; -+ -+ flags = time_lock(); -+ gettimeofday(&now, NULL); -+ timersub(tv, &now, &local_offset); -+ time_unlock(flags); -+} -+ -+void idle_sleep(int secs) -+{ -+ struct timespec ts; -+ -+ ts.tv_sec = secs; -+ ts.tv_nsec = 0; -+ nanosleep(&ts, NULL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/time_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/time_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/time_kern.c 2005-05-03 22:28:14.485408096 +0300 -@@ -0,0 +1,209 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/unistd.h" -+#include "linux/stddef.h" -+#include "linux/spinlock.h" -+#include "linux/sched.h" -+#include "linux/interrupt.h" -+#include "linux/init.h" -+#include "linux/delay.h" -+#include "asm/irq.h" -+#include "asm/param.h" -+#include "asm/current.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "time_user.h" -+#include "mode.h" -+#include "os.h" -+ -+extern rwlock_t xtime_lock; -+ -+int hz(void) -+{ -+ return(HZ); -+} -+ -+/* Changed at early boot */ -+int timer_irq_inited = 0; -+ -+/* missed_ticks will be modified after kernel memory has been -+ * write-protected, so this puts it in a section which will be left -+ * write-enabled. -+ */ -+int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS]; -+ -+static int first_tick; -+static unsigned long long prev_usecs; -+static long long delta; /* Deviation per interval */ -+ -+#define MILLION 1000000 -+ -+void timer_irq(union uml_pt_regs *regs) -+{ -+ unsigned long long ticks = 0; -+ -+ if(!timer_irq_inited){ -+ /* This is to ensure that ticks don't pile up when -+ * the timer handler is suspended */ -+ first_tick = 0; -+ return; -+ } -+ -+ if(first_tick){ -+#if defined(CONFIG_UML_REAL_TIME_CLOCK) -+ /* We've had 1 tick */ -+ unsigned long long usecs = os_usecs(); -+ -+ delta += usecs - prev_usecs; -+ prev_usecs = usecs; -+ -+ /* Protect against the host clock being set backwards */ -+ if(delta < 0) -+ delta = 0; -+ -+ ticks += (delta * HZ) / MILLION; -+ delta -= (ticks * MILLION) / HZ; -+#else -+ ticks = 1; -+#endif -+ } -+ else { -+ prev_usecs = os_usecs(); -+ first_tick = 1; -+ } -+ -+ while(ticks > 0){ -+ do_IRQ(TIMER_IRQ, regs); -+ ticks--; -+ } -+} -+ -+void boot_timer_handler(int sig) -+{ -+ struct pt_regs regs; -+ -+ CHOOSE_MODE((void) -+ (UPT_SC(®s.regs) = (struct sigcontext *) (&sig + 1)), -+ (void) (regs.regs.skas.is_user = 0)); -+ do_timer(®s); -+} -+ -+void um_timer(int irq, void *dev, struct pt_regs *regs) -+{ -+ do_timer(regs); -+ write_lock(&xtime_lock); -+ vxtime_lock(); -+ timer(); -+ vxtime_unlock(); -+ write_unlock(&xtime_lock); -+} -+ -+long um_time(int * tloc) -+{ -+ struct timeval now; -+ -+ do_gettimeofday(&now); -+ if (tloc) { -+ if (put_user(now.tv_sec,tloc)) -+ now.tv_sec = -EFAULT; -+ } -+ return now.tv_sec; -+} -+ -+long um_stime(int * tptr) -+{ -+ int value; -+ struct timeval new; -+ -+ if (get_user(value, tptr)) -+ return -EFAULT; -+ new.tv_sec = value; -+ new.tv_usec = 0; -+ do_settimeofday(&new); -+ return 0; -+} -+ -+/* XXX Needs to be moved under sys-i386 */ -+void __delay(um_udelay_t time) -+{ -+ /* Stolen from the i386 __loop_delay */ -+ int d0; -+ __asm__ __volatile__( -+ "\tjmp 1f\n" -+ ".align 16\n" -+ "1:\tjmp 2f\n" -+ ".align 16\n" -+ "2:\tdecl %0\n\tjns 2b" -+ :"=&a" (d0) -+ :"0" (time)); -+} -+ -+void __udelay(um_udelay_t usecs) -+{ -+ int i, n; -+ -+ n = (loops_per_jiffy * HZ * usecs) / MILLION; -+ for(i=0;i<n;i++) ; -+} -+ -+void __const_udelay(um_udelay_t usecs) -+{ -+ int i, n; -+ -+ n = (loops_per_jiffy * HZ * usecs) / MILLION; -+ for(i=0;i<n;i++) ; -+} -+ -+void timer_handler(int sig, union uml_pt_regs *regs) -+{ -+#ifdef CONFIG_SMP -+ update_process_times(user_context(UPT_SP(regs))); -+#endif -+ if(current->processor == 0) -+ timer_irq(regs); -+} -+ -+static spinlock_t timer_spinlock = SPIN_LOCK_UNLOCKED; -+ -+unsigned long time_lock(void) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&timer_spinlock, flags); -+ return(flags); -+} -+ -+void time_unlock(unsigned long flags) -+{ -+ spin_unlock_irqrestore(&timer_spinlock, flags); -+} -+ -+int __init timer_init(void) -+{ -+ int err; -+ -+ CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); -+ err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); -+ if(err != 0) -+ printk(KERN_ERR "timer_init : request_irq failed - " -+ "errno = %d\n", -err); -+ timer_irq_inited = 1; -+ return(0); -+} -+ -+__initcall(timer_init); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tlb.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tlb.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tlb.c 2005-05-03 22:28:14.486407944 +0300 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/mm.h" -+#include "asm/page.h" -+#include "asm/pgalloc.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+ -+void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -+{ -+ address &= PAGE_MASK; -+ flush_tlb_range(vma->vm_mm, address, address + PAGE_SIZE); -+} -+ -+void flush_tlb_all(void) -+{ -+ flush_tlb_mm(current->mm); -+} -+ -+void flush_tlb_kernel_vm(void) -+{ -+ CHOOSE_MODE(flush_tlb_kernel_vm_tt(), flush_tlb_kernel_vm_skas()); -+} -+ -+void __flush_tlb_one(unsigned long addr) -+{ -+ CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -+} -+ -+void flush_tlb_range(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, mm, start, -+ end); -+} -+ -+void flush_tlb_mm(struct mm_struct *mm) -+{ -+ CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -+} -+ -+void force_flush_all(void) -+{ -+ CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -+} -+ -+ -+pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) -+{ -+ return(pgd_offset(mm, address)); -+} -+ -+pmd_t *pmd_offset_proc(pgd_t *pgd, unsigned long address) -+{ -+ return(pmd_offset(pgd, address)); -+} -+ -+pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) -+{ -+ return(pte_offset(pmd, address)); -+} -+ -+pte_t *addr_pte(struct task_struct *task, unsigned long addr) -+{ -+ return(pte_offset(pmd_offset(pgd_offset(task->mm, addr), addr), addr)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/trap_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/trap_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/trap_kern.c 2005-05-03 22:28:14.487407792 +0300 -@@ -0,0 +1,220 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/mm.h" -+#include "linux/spinlock.h" -+#include "linux/config.h" -+#include "linux/init.h" -+#include "asm/semaphore.h" -+#include "asm/pgtable.h" -+#include "asm/pgalloc.h" -+#include "asm/a.out.h" -+#include "asm/current.h" -+#include "asm/irq.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "chan_kern.h" -+#include "mconsole_kern.h" -+#include "2_5compat.h" -+#include "mem.h" -+#include "mem_kern.h" -+ -+unsigned long handle_page_fault(unsigned long address, unsigned long ip, -+ int is_write, int is_user, int *code_out) -+{ -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long page; -+ int handled = 0; -+ -+ *code_out = SEGV_MAPERR; -+ down_read(&mm->mmap_sem); -+ vma = find_vma(mm, address); -+ if(!vma) -+ goto out; -+ else if(vma->vm_start <= address) -+ goto good_area; -+ else if(!(vma->vm_flags & VM_GROWSDOWN)) -+ goto out; -+ else if(expand_stack(vma, address)) -+ goto out; -+ -+ good_area: -+ *code_out = SEGV_ACCERR; -+ if(is_write && !(vma->vm_flags & VM_WRITE)) -+ goto out; -+ page = address & PAGE_MASK; -+ if(page == (unsigned long) current + PAGE_SIZE) -+ panic("Kernel stack overflow"); -+ pgd = pgd_offset(mm, page); -+ pmd = pmd_offset(pgd, page); -+ do { -+ survive: -+ switch (handle_mm_fault(mm, vma, address, is_write)) { -+ case 1: -+ current->min_flt++; -+ break; -+ case 2: -+ current->maj_flt++; -+ break; -+ default: -+ if (current->pid == 1) { -+ up_read(&mm->mmap_sem); -+ yield(); -+ down_read(&mm->mmap_sem); -+ goto survive; -+ } -+ /* Fall through to bad area case */ -+ case 0: -+ goto out; -+ } -+ pte = pte_offset(pmd, page); -+ } while(!pte_present(*pte)); -+ handled = 1; -+ *pte = pte_mkyoung(*pte); -+ if(pte_write(*pte)) *pte = pte_mkdirty(*pte); -+ flush_tlb_page(vma, page); -+ out: -+ up_read(&mm->mmap_sem); -+ return(handled); -+} -+ -+LIST_HEAD(physmem_remappers); -+ -+void register_remapper(struct remapper *info) -+{ -+ list_add(&info->list, &physmem_remappers); -+} -+ -+static int check_remapped_addr(unsigned long address, int is_write, int is_user) -+{ -+ struct remapper *remapper; -+ struct list_head *ele; -+ __u64 offset; -+ int fd; -+ -+ fd = phys_mapping(__pa(address), &offset); -+ if(fd == -1) -+ return(0); -+ -+ list_for_each(ele, &physmem_remappers){ -+ remapper = list_entry(ele, struct remapper, list); -+ if((*remapper->proc)(fd, address, is_write, offset, is_user)) -+ return(1); -+ } -+ -+ return(0); -+} -+ -+unsigned long segv(unsigned long address, unsigned long ip, int is_write, -+ int is_user, void *sc) -+{ -+ struct siginfo si; -+ void *catcher; -+ int handled; -+ -+ if(!is_user && (address >= start_vm) && (address < end_vm)){ -+ flush_tlb_kernel_vm(); -+ return(0); -+ } -+ else if(check_remapped_addr(address & PAGE_MASK, is_write, is_user)) -+ return(0); -+ else if(current->mm == NULL) -+ panic("Segfault with no mm"); -+ -+ handled = handle_page_fault(address, ip, is_write, is_user, -+ &si.si_code); -+ -+ catcher = current->thread.fault_catcher; -+ if(handled) -+ return(0); -+ else if(catcher != NULL){ -+ current->thread.fault_addr = (void *) address; -+ do_longjmp(catcher, 1); -+ } -+ else if(current->thread.fault_addr != NULL) -+ panic("fault_addr set but no fault catcher"); -+ else if(arch_fixup(ip, sc)) -+ return(0); -+ -+ if(!is_user) -+ panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", -+ address, ip); -+ si.si_signo = SIGSEGV; -+ si.si_addr = (void *) address; -+ current->thread.cr2 = address; -+ current->thread.err = is_write; -+ force_sig_info(SIGSEGV, &si, current); -+ return(0); -+} -+ -+void bad_segv(unsigned long address, unsigned long ip, int is_write) -+{ -+ struct siginfo si; -+ -+ si.si_signo = SIGSEGV; -+ si.si_code = SEGV_ACCERR; -+ si.si_addr = (void *) address; -+ current->thread.cr2 = address; -+ current->thread.err = is_write; -+ force_sig_info(SIGSEGV, &si, current); -+} -+ -+void relay_signal(int sig, union uml_pt_regs *regs) -+{ -+ if(arch_handle_signal(sig, regs)) return; -+ if(!UPT_IS_USER(regs)) -+ panic("Kernel mode signal %d", sig); -+ force_sig(sig, current); -+} -+ -+void bus_handler(int sig, union uml_pt_regs *regs) -+{ -+ if(current->thread.fault_catcher != NULL) -+ do_longjmp(current->thread.fault_catcher, 1); -+ else relay_signal(sig, regs); -+} -+ -+void winch(int sig, union uml_pt_regs *regs) -+{ -+ do_IRQ(WINCH_IRQ, regs); -+} -+ -+void trap_init(void) -+{ -+} -+ -+spinlock_t trap_lock = SPIN_LOCK_UNLOCKED; -+ -+static int trap_index = 0; -+ -+int next_trap_index(int limit) -+{ -+ int ret; -+ -+ spin_lock(&trap_lock); -+ ret = trap_index; -+ if(++trap_index == limit) -+ trap_index = 0; -+ spin_unlock(&trap_lock); -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/trap_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/trap_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/trap_user.c 2005-05-03 22:28:14.489407488 +0300 -@@ -0,0 +1,145 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <setjmp.h> -+#include <signal.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <asm/page.h> -+#include <asm/unistd.h> -+#include <asm/ptrace.h> -+#include "init.h" -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "sysdep/sigcontext.h" -+#include "irq_user.h" -+#include "frame_user.h" -+#include "signal_user.h" -+#include "time_user.h" -+#include "task.h" -+#include "mode.h" -+#include "choose-mode.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+ -+void kill_child_dead(int pid) -+{ -+ kill(pid, SIGKILL); -+ kill(pid, SIGCONT); -+ do { -+ int n; -+ CATCH_EINTR(n = waitpid(pid, NULL, 0)); -+ if (n > 0) -+ kill(pid, SIGCONT); -+ else -+ break; -+ } while(1); -+} -+ -+/* Unlocked - don't care if this is a bit off */ -+int nsegfaults = 0; -+ -+struct { -+ unsigned long address; -+ int is_write; -+ int pid; -+ unsigned long sp; -+ int is_user; -+} segfault_record[1024]; -+ -+void segv_handler(int sig, union uml_pt_regs *regs) -+{ -+ int index, max; -+ -+ if(UPT_IS_USER(regs) && !UPT_SEGV_IS_FIXABLE(regs)){ -+ bad_segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), -+ UPT_FAULT_WRITE(regs)); -+ return; -+ } -+ max = sizeof(segfault_record)/sizeof(segfault_record[0]); -+ index = next_trap_index(max); -+ -+ nsegfaults++; -+ segfault_record[index].address = UPT_FAULT_ADDR(regs); -+ segfault_record[index].pid = os_getpid(); -+ segfault_record[index].is_write = UPT_FAULT_WRITE(regs); -+ segfault_record[index].sp = UPT_SP(regs); -+ segfault_record[index].is_user = UPT_IS_USER(regs); -+ segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), UPT_FAULT_WRITE(regs), -+ UPT_IS_USER(regs), regs); -+} -+ -+void usr2_handler(int sig, union uml_pt_regs *regs) -+{ -+ CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0); -+} -+ -+struct signal_info sig_info[] = { -+ [ SIGTRAP ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGFPE ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGILL ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGWINCH ] { .handler = winch, -+ .is_irq = 1 }, -+ [ SIGBUS ] { .handler = bus_handler, -+ .is_irq = 0 }, -+ [ SIGSEGV] { .handler = segv_handler, -+ .is_irq = 0 }, -+ [ SIGIO ] { .handler = sigio_handler, -+ .is_irq = 1 }, -+ [ SIGVTALRM ] { .handler = timer_handler, -+ .is_irq = 1 }, -+ [ SIGALRM ] { .handler = timer_handler, -+ .is_irq = 1 }, -+ [ SIGUSR2 ] { .handler = usr2_handler, -+ .is_irq = 0 }, -+}; -+ -+void sig_handler(int sig, struct sigcontext sc) -+{ -+ CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, -+ sig, &sc); -+} -+ -+extern int timer_irq_inited, missed_ticks[]; -+ -+void alarm_handler(int sig, struct sigcontext sc) -+{ -+ if(!timer_irq_inited) return; -+ missed_ticks[cpu()]++; -+ -+ if(sig == SIGALRM) -+ switch_timers(0); -+ -+ CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, -+ sig, &sc); -+ -+ if(sig == SIGALRM) -+ switch_timers(1); -+} -+ -+void do_longjmp(void *b, int val) -+{ -+ sigjmp_buf *buf = b; -+ -+ siglongjmp(*buf, val); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/exec_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/exec_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/exec_kern.c 2005-05-03 22:28:14.490407336 +0300 -@@ -0,0 +1,86 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/mm.h" -+#include "asm/signal.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/pgalloc.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "time_user.h" -+#include "mem_user.h" -+#include "signal_user.h" -+#include "os.h" -+#include "tlb.h" -+#include "mode.h" -+ -+static int exec_tramp(void *sig_stack) -+{ -+ init_new_thread_stack(sig_stack, NULL); -+ init_new_thread_signals(1); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+void flush_thread_tt(void) -+{ -+ unsigned long stack; -+ int new_pid; -+ -+ stack = alloc_stack(0, 0); -+ if(stack == 0){ -+ printk(KERN_ERR -+ "flush_thread : failed to allocate temporary stack\n"); -+ do_exit(SIGKILL); -+ } -+ -+ new_pid = start_fork_tramp(current, stack, 0, exec_tramp); -+ if(new_pid < 0){ -+ printk(KERN_ERR -+ "flush_thread : new thread failed, errno = %d\n", -+ -new_pid); -+ do_exit(SIGKILL); -+ } -+ -+ if(current->processor == 0) -+ forward_interrupts(new_pid); -+ current->thread.request.op = OP_EXEC; -+ current->thread.request.u.exec.pid = new_pid; -+ unprotect_stack((unsigned long) current); -+ os_usr1_process(os_getpid()); -+ change_sig(SIGUSR1, 1); -+ -+ change_sig(SIGUSR1, 0); -+ enable_timer(); -+ free_page(stack); -+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); -+ task_protections((unsigned long) current); -+ force_flush_all(); -+ unblock_signals(); -+} -+ -+void start_thread_tt(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp) -+{ -+ set_fs(USER_DS); -+ flush_tlb_mm(current->mm); -+ PT_REGS_IP(regs) = eip; -+ PT_REGS_SP(regs) = esp; -+ PT_FIX_EXEC_STACK(esp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/exec_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/exec_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/exec_user.c 2005-05-03 22:28:14.491407184 +0300 -@@ -0,0 +1,54 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stdlib.h> -+#include <sched.h> -+#include <errno.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <signal.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "ptrace_user.h" -+ -+void do_exec(int old_pid, int new_pid) -+{ -+ unsigned long regs[FRAME_SIZE]; -+ int err; -+ -+ if((ptrace(PTRACE_ATTACH, new_pid, 0, 0) < 0) || -+ (ptrace(PTRACE_CONT, new_pid, 0, 0) < 0)) -+ tracer_panic("do_exec failed to attach proc - errno = %d", -+ errno); -+ -+ CATCH_EINTR(err = waitpid(new_pid, 0, WUNTRACED)); -+ if (err < 0) -+ tracer_panic("do_exec failed to attach proc in waitpid - errno = %d", -+ errno); -+ -+ if(ptrace_getregs(old_pid, regs) < 0) -+ tracer_panic("do_exec failed to get registers - errno = %d", -+ errno); -+ -+ kill(old_pid, SIGKILL); -+ -+ if(ptrace_setregs(new_pid, regs) < 0) -+ tracer_panic("do_exec failed to start new proc - errno = %d", -+ errno); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/gdb.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/gdb.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/gdb.c 2005-05-03 22:28:14.492407032 +0300 -@@ -0,0 +1,278 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <errno.h> -+#include <string.h> -+#include <signal.h> -+#include <sys/ptrace.h> -+#include <sys/types.h> -+#include "uml-config.h" -+#include "kern_constants.h" -+#include "chan_user.h" -+#include "init.h" -+#include "user.h" -+#include "debug.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "tt.h" -+#include "sysdep/thread.h" -+ -+extern int debugger_pid; -+extern int debugger_fd; -+extern int debugger_parent; -+ -+int detach(int pid, int sig) -+{ -+ return(ptrace(PTRACE_DETACH, pid, 0, sig)); -+} -+ -+int attach(int pid) -+{ -+ int err; -+ -+ err = ptrace(PTRACE_ATTACH, pid, 0, 0); -+ if(err < 0) return(-errno); -+ else return(err); -+} -+ -+int cont(int pid) -+{ -+ return(ptrace(PTRACE_CONT, pid, 0, 0)); -+} -+ -+#ifdef UML_CONFIG_PT_PROXY -+ -+int debugger_signal(int status, pid_t pid) -+{ -+ return(debugger_proxy(status, pid)); -+} -+ -+void child_signal(pid_t pid, int status) -+{ -+ child_proxy(pid, status); -+} -+ -+static void gdb_announce(char *dev_name, int dev) -+{ -+ printf("gdb assigned device '%s'\n", dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = gdb_announce, -+ .xterm_title = "UML kernel debugger", -+ .raw = 0, -+ .tramp_stack = 0, -+ .in_kernel = 0, -+}; -+ -+/* Accessed by the tracing thread, which automatically serializes access */ -+static void *xterm_data; -+static int xterm_fd; -+ -+extern void *xterm_init(char *, int, struct chan_opts *); -+extern int xterm_open(int, int, int, void *, char **); -+extern void xterm_close(int, void *); -+ -+int open_gdb_chan(void) -+{ -+ char stack[UM_KERN_PAGE_SIZE], *dummy; -+ -+ opts.tramp_stack = (unsigned long) stack; -+ xterm_data = xterm_init("", 0, &opts); -+ xterm_fd = xterm_open(1, 1, 1, xterm_data, &dummy); -+ return(xterm_fd); -+} -+ -+static void exit_debugger_cb(void *unused) -+{ -+ if(debugger_pid != -1){ -+ if(gdb_pid != -1){ -+ fake_child_exit(); -+ gdb_pid = -1; -+ } -+ else kill_child_dead(debugger_pid); -+ debugger_pid = -1; -+ if(debugger_parent != -1) -+ detach(debugger_parent, SIGINT); -+ } -+ if(xterm_data != NULL) xterm_close(xterm_fd, xterm_data); -+} -+ -+static void exit_debugger(void) -+{ -+ initial_thread_cb(exit_debugger_cb, NULL); -+} -+ -+__uml_exitcall(exit_debugger); -+ -+struct gdb_data { -+ char *str; -+ int err; -+}; -+ -+static void config_gdb_cb(void *arg) -+{ -+ struct gdb_data *data = arg; -+ void *task; -+ int pid; -+ -+ data->err = -1; -+ if(debugger_pid != -1) exit_debugger_cb(NULL); -+ if(!strncmp(data->str, "pid,", strlen("pid,"))){ -+ data->str += strlen("pid,"); -+ pid = strtoul(data->str, NULL, 0); -+ task = cpu_tasks[0].task; -+ debugger_pid = attach_debugger(TASK_EXTERN_PID(task), pid, 0); -+ if(debugger_pid != -1){ -+ data->err = 0; -+ gdb_pid = pid; -+ } -+ return; -+ } -+ data->err = 0; -+ debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); -+ init_proxy(debugger_pid, 0, 0); -+} -+ -+int gdb_config(char *str) -+{ -+ struct gdb_data data; -+ -+ if(*str++ != '=') return(-1); -+ data.str = str; -+ initial_thread_cb(config_gdb_cb, &data); -+ return(data.err); -+} -+ -+void remove_gdb_cb(void *unused) -+{ -+ exit_debugger_cb(NULL); -+} -+ -+int gdb_remove(char *unused) -+{ -+ initial_thread_cb(remove_gdb_cb, NULL); -+ return(0); -+} -+ -+void signal_usr1(int sig) -+{ -+ if(debugger_pid != -1){ -+ printk(UM_KERN_ERR "The debugger is already running\n"); -+ return; -+ } -+ debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); -+ init_proxy(debugger_pid, 0, 0); -+} -+ -+int init_ptrace_proxy(int idle_pid, int startup, int stop) -+{ -+ int pid, status; -+ -+ pid = start_debugger(linux_prog, startup, stop, &debugger_fd); -+ status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL); -+ if(pid < 0){ -+ cont(idle_pid); -+ return(-1); -+ } -+ init_proxy(pid, 1, status); -+ return(pid); -+} -+ -+int attach_debugger(int idle_pid, int pid, int stop) -+{ -+ int status = 0, err; -+ -+ err = attach(pid); -+ if(err < 0){ -+ printf("Failed to attach pid %d, errno = %d\n", pid, -err); -+ return(-1); -+ } -+ if(stop) status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL); -+ init_proxy(pid, 1, status); -+ return(pid); -+} -+ -+#ifdef notdef /* Put this back in when it does something useful */ -+static int __init uml_gdb_init_setup(char *line, int *add) -+{ -+ gdb_init = uml_strdup(line); -+ return 0; -+} -+ -+__uml_setup("gdb=", uml_gdb_init_setup, -+"gdb=<channel description>\n\n" -+); -+#endif -+ -+static int __init uml_gdb_pid_setup(char *line, int *add) -+{ -+ gdb_pid = strtoul(line, NULL, 0); -+ *add = 0; -+ return 0; -+} -+ -+__uml_setup("gdb-pid=", uml_gdb_pid_setup, -+"gdb-pid=<pid>\n" -+" gdb-pid is used to attach an external debugger to UML. This may be\n" -+" an already-running gdb or a debugger-like process like strace.\n\n" -+); -+ -+#else -+ -+int debugger_signal(int status, pid_t pid){ return(0); } -+void child_signal(pid_t pid, int status){ } -+int init_ptrace_proxy(int idle_pid, int startup, int stop) -+{ -+ printk(UM_KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); -+ kill_child_dead(idle_pid); -+ exit(1); -+} -+ -+void signal_usr1(int sig) -+{ -+ printk(UM_KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); -+} -+ -+int attach_debugger(int idle_pid, int pid, int stop) -+{ -+ printk(UM_KERN_ERR "attach_debugger called when CONFIG_PT_PROXY " -+ "is off\n"); -+ return(-1); -+} -+ -+int config_gdb(char *str) -+{ -+ return(-1); -+} -+ -+int remove_gdb(void) -+{ -+ return(-1); -+} -+ -+int init_parent_proxy(int pid) -+{ -+ return(-1); -+} -+ -+void debugger_parent_signal(int status, int pid) -+{ -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/gdb_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/gdb_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/gdb_kern.c 2005-05-03 22:28:14.493406880 +0300 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/config.h" -+#include "mconsole_kern.h" -+ -+#ifdef CONFIG_MCONSOLE -+ -+extern int gdb_config(char *str); -+extern int gdb_remove(char *unused); -+ -+static struct mc_device gdb_mc = { -+ .name = "gdb", -+ .config = gdb_config, -+ .remove = gdb_remove, -+}; -+ -+int gdb_mc_init(void) -+{ -+ mconsole_register_dev(&gdb_mc); -+ return(0); -+} -+ -+__initcall(gdb_mc_init); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/include/debug.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/include/debug.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/include/debug.h 2005-05-03 22:28:14.494406728 +0300 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) and -+ * Lars Brinkhoff. -+ * Licensed under the GPL -+ */ -+ -+#ifndef __DEBUG_H -+#define __DEBUG_H -+ -+extern int debugger_proxy(int status, pid_t pid); -+extern void child_proxy(pid_t pid, int status); -+extern void init_proxy (pid_t pid, int waiting, int status); -+extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd); -+extern void fake_child_exit(void); -+extern int gdb_config(char *str); -+extern int gdb_remove(char *unused); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/include/mmu.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/include/mmu.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/include/mmu.h 2005-05-03 22:28:14.495406576 +0300 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_MMU_H -+#define __TT_MMU_H -+ -+struct mmu_context_tt { -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/include/mode.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/include/mode.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/include/mode.h 2005-05-03 22:28:14.496406424 +0300 -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_TT_H__ -+#define __MODE_TT_H__ -+ -+#include "sysdep/ptrace.h" -+ -+enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; -+ -+extern int tracing_pid; -+ -+extern int tracer(int (*init_proc)(void *), void *sp); -+extern void user_time_init_tt(void); -+extern int copy_sc_from_user_tt(void *to_ptr, void *from_ptr, void *data); -+extern int copy_sc_to_user_tt(void *to_ptr, void *fp, void *from_ptr, -+ void *data); -+extern void sig_handler_common_tt(int sig, void *sc); -+extern void syscall_handler_tt(int sig, union uml_pt_regs *regs); -+extern void reboot_tt(void); -+extern void halt_tt(void); -+extern int is_tracer_winch(int pid, int fd, void *data); -+extern void kill_off_processes_tt(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/include/mode_kern.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/include/mode_kern.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/include/mode_kern.h 2005-05-03 22:28:14.496406424 +0300 -@@ -0,0 +1,52 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_MODE_KERN_H__ -+#define __TT_MODE_KERN_H__ -+ -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+ -+extern void *_switch_to_tt(void *prev, void *next); -+extern void flush_thread_tt(void); -+extern void start_thread_tt(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp); -+extern int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct *p, -+ struct pt_regs *regs); -+extern void release_thread_tt(struct task_struct *task); -+extern void exit_thread_tt(void); -+extern void initial_thread_cb_tt(void (*proc)(void *), void *arg); -+extern void init_idle_tt(void); -+extern void flush_tlb_kernel_vm_tt(void); -+extern void __flush_tlb_one_tt(unsigned long addr); -+extern void flush_tlb_range_tt(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_mm_tt(struct mm_struct *mm); -+extern void force_flush_all_tt(void); -+extern long execute_syscall_tt(void *r); -+extern void before_mem_tt(unsigned long brk_start); -+extern unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out); -+extern int start_uml_tt(void); -+extern int external_pid_tt(struct task_struct *task); -+extern int thread_pid_tt(struct thread_struct *thread); -+ -+#define kmem_end_tt (host_task_size - ABOVE_KMEM) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/include/ptrace-tt.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/include/ptrace-tt.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/include/ptrace-tt.h 2005-05-03 22:28:14.497406272 +0300 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_TT_H -+#define __PTRACE_TT_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "sysdep/sc.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/include/tt.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/include/tt.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/include/tt.h 2005-05-03 22:28:14.498406120 +0300 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_H__ -+#define __TT_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int gdb_pid; -+extern int debug; -+extern int debug_stop; -+extern int debug_trace; -+ -+extern int honeypot; -+ -+extern int fork_tramp(void *sig_stack); -+extern int do_proc_op(void *t, int proc_id); -+extern int tracer(int (*init_proc)(void *), void *sp); -+extern void attach_process(int pid); -+extern void tracer_panic(char *format, ...); -+extern void set_init_pid(int pid); -+extern int set_user_mode(void *task); -+extern void set_tracing(void *t, int tracing); -+extern int is_tracing(void *task); -+extern void syscall_handler(int sig, union uml_pt_regs *regs); -+extern void exit_kernel(int pid, void *task); -+extern int do_syscall(void *task, int pid); -+extern int is_valid_pid(int pid); -+extern void remap_data(void *segment_start, void *segment_end, int w); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/include/uaccess.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/include/uaccess.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/include/uaccess.h 2005-05-03 22:28:14.499405968 +0300 -@@ -0,0 +1,71 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_UACCESS_H -+#define __TT_UACCESS_H -+ -+#include "linux/string.h" -+#include "linux/sched.h" -+#include "asm/processor.h" -+#include "asm/errno.h" -+#include "asm/current.h" -+#include "asm/a.out.h" -+#include "uml_uaccess.h" -+ -+#define ABOVE_KMEM (16 * 1024 * 1024) -+ -+extern unsigned long end_vm; -+extern unsigned long uml_physmem; -+ -+#define under_task_size(addr, size) \ -+ (((unsigned long) (addr) < TASK_SIZE) && \ -+ (((unsigned long) (addr) + (size)) < TASK_SIZE)) -+ -+#define is_stack(addr, size) \ -+ (((unsigned long) (addr) < STACK_TOP) && \ -+ ((unsigned long) (addr) >= STACK_TOP - ABOVE_KMEM) && \ -+ (((unsigned long) (addr) + (size)) <= STACK_TOP)) -+ -+#define access_ok_tt(type, addr, size) \ -+ ((type == VERIFY_READ) || (segment_eq(get_fs(), KERNEL_DS)) || \ -+ (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ -+ (under_task_size(addr, size) || is_stack(addr, size)))) -+ -+static inline int verify_area_tt(int type, const void * addr, -+ unsigned long size) -+{ -+ return(access_ok_tt(type, addr, size) ? 0 : -EFAULT); -+} -+ -+extern unsigned long get_fault_addr(void); -+ -+extern int __do_copy_from_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher); -+extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, -+ void **fault_addr, void **fault_catcher); -+extern int __do_clear_user(void *mem, size_t len, void **fault_addr, -+ void **fault_catcher); -+extern int __do_strnlen_user(const char *str, unsigned long n, -+ void **fault_addr, void **fault_catcher); -+ -+extern int copy_from_user_tt(void *to, const void *from, int n); -+extern int copy_to_user_tt(void *to, const void *from, int n); -+extern int strncpy_from_user_tt(char *dst, const char *src, int count); -+extern int __clear_user_tt(void *mem, int len); -+extern int clear_user_tt(void *mem, int len); -+extern int strnlen_user_tt(const void *str, int len); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ksyms.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ksyms.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ksyms.c 2005-05-03 22:28:14.500405816 +0300 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+#include "asm/uaccess.h" -+#include "mode.h" -+ -+EXPORT_SYMBOL(__do_copy_from_user); -+EXPORT_SYMBOL(__do_copy_to_user); -+EXPORT_SYMBOL(__do_strncpy_from_user); -+EXPORT_SYMBOL(__do_strnlen_user); -+EXPORT_SYMBOL(__do_clear_user); -+ -+EXPORT_SYMBOL(tracing_pid); -+EXPORT_SYMBOL(honeypot); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/Makefile 2005-05-03 22:28:14.501405664 +0300 -@@ -0,0 +1,39 @@ -+# -+# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = tt.o -+ -+obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ -+ syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ -+ uaccess.o uaccess_user.o -+ -+obj-$(CONFIG_PT_PROXY) += gdb_kern.o -+ -+subdir-y = sys-$(SUBARCH) -+subdir-$(CONFIG_PT_PROXY) += ptproxy -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+export-objs = ksyms.o -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) gdb.o time.o tracer.o -+ -+UNMAP_CFLAGS := $(patsubst -pg -DPROFILING,,$(USER_CFLAGS)) -+UNMAP_CFLAGS := $(patsubst -fprofile-arcs -ftest-coverage,,$(UNMAP_CFLAGS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+$(O_TARGET) : unmap_fin.o -+ -+unmap.o: unmap.c -+ $(CC) $(UNMAP_CFLAGS) -c -o $@ $< -+ -+unmap_fin.o : unmap.o -+ ld -r -o $@ $< -lc -L/usr/lib -+ -+clean : -Index: linux-2.4.29/arch/um/kernel/tt/mem.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/mem.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/mem.c 2005-05-03 22:28:14.502405512 +0300 -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2002 - 2004 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "asm/uaccess.h" -+#include "mem_user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "kern.h" -+#include "tt.h" -+ -+void before_mem_tt(unsigned long brk_start) -+{ -+ if(!jail || debug) -+ remap_data(UML_ROUND_DOWN(&_stext), UML_ROUND_UP(&_etext), 1); -+ remap_data(UML_ROUND_DOWN(&_sdata), UML_ROUND_UP(&_edata), 1); -+ remap_data(UML_ROUND_DOWN(&__bss_start), UML_ROUND_UP(&_end), 1); -+} -+ -+#ifdef CONFIG_HOST_2G_2G -+#define TOP 0x80000000 -+#else -+#define TOP 0xc0000000 -+#endif -+ -+#define SIZE ((CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS) * 0x20000000) -+#define START (TOP - SIZE) -+ -+unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out) -+{ -+ /* Round up to the nearest 4M */ -+ *host_size_out = ROUND_4M((unsigned long) &arg); -+ *task_size_out = START; -+ return(START); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/mem_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/mem_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/mem_user.c 2005-05-03 22:28:14.502405512 +0300 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <stdio.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/mman.h> -+#include "tt.h" -+#include "mem_user.h" -+#include "user_util.h" -+ -+void remap_data(void *segment_start, void *segment_end, int w) -+{ -+ void *addr; -+ unsigned long size; -+ int data, prot; -+ -+ if(w) prot = PROT_WRITE; -+ else prot = 0; -+ prot |= PROT_READ | PROT_EXEC; -+ size = (unsigned long) segment_end - -+ (unsigned long) segment_start; -+ data = create_mem_file(size); -+ addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0); -+ if(addr == MAP_FAILED){ -+ perror("mapping new data segment"); -+ exit(1); -+ } -+ memcpy(addr, segment_start, size); -+ if(switcheroo(data, prot, addr, segment_start, size) < 0){ -+ printf("switcheroo failed\n"); -+ exit(1); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/process_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/process_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/process_kern.c 2005-05-03 22:28:14.526401864 +0300 -@@ -0,0 +1,615 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/signal.h" -+#include "linux/kernel.h" -+#include "linux/slab.h" -+#include "asm/system.h" -+#include "asm/pgalloc.h" -+#include "asm/ptrace.h" -+#include "irq_user.h" -+#include "signal_user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "kern.h" -+#include "sigcontext.h" -+#include "time_user.h" -+#include "mem_user.h" -+#include "tlb.h" -+#include "mode.h" -+#include "init.h" -+#include "tt.h" -+#include "filehandle.h" -+ -+void *_switch_to_tt(void *prev, void *next) -+{ -+ struct task_struct *from, *to, *prev_sched; -+ struct file_handle *pipe; -+ unsigned long flags; -+ int err, vtalrm, alrm, prof, cpu; -+ char c; -+ /* jailing and SMP are incompatible, so this doesn't need to be -+ * made per-cpu -+ */ -+ static int reading; -+ -+ from = prev; -+ to = next; -+ -+ to->thread.prev_sched = from; -+ -+ cpu = from->processor; -+ if(cpu == 0) -+ forward_interrupts(to->thread.mode.tt.extern_pid); -+#ifdef CONFIG_SMP -+ forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid); -+#endif -+ local_irq_save(flags); -+ -+ vtalrm = change_sig(SIGVTALRM, 0); -+ alrm = change_sig(SIGALRM, 0); -+ prof = change_sig(SIGPROF, 0); -+ -+ c = 0; -+ set_current(to); -+ -+ reading = 0; -+ pipe = to->thread.mode.tt.switch_pipe; -+ err = write_file(&pipe[1], -1, &c, sizeof(c)); -+ if(err != sizeof(c)) -+ panic("write of switch_pipe failed, err = %d", -err); -+ -+ reading = 1; -+ if(from->state == TASK_ZOMBIE) -+ os_kill_process(os_getpid(), 0); -+ -+ pipe = from->thread.mode.tt.switch_pipe; -+ err = read_file(&pipe[0], -1, &c, sizeof(c)); -+ if(err != sizeof(c)) -+ panic("read of switch_pipe failed, errno = %d", -err); -+ -+ /* If the process that we have just scheduled away from has exited, -+ * then it needs to be killed here. The reason is that, even though -+ * it will kill itself when it next runs, that may be too late. Its -+ * stack will be freed, possibly before then, and if that happens, -+ * we have a use-after-free situation. So, it gets killed here -+ * in case it has not already killed itself. -+ */ -+ prev_sched = current->thread.prev_sched; -+ if(prev_sched->state == TASK_ZOMBIE) -+ os_kill_process(prev_sched->thread.mode.tt.extern_pid, 1); -+ -+ /* This works around a nasty race with 'jail'. If we are switching -+ * between two threads of a threaded app and the incoming process -+ * runs before the outgoing process reaches the read, and it makes -+ * it all the way out to userspace, then it will have write-protected -+ * the outgoing process stack. Then, when the outgoing process -+ * returns from the write, it will segfault because it can no longer -+ * write its own stack. So, in order to avoid that, the incoming -+ * thread sits in a loop yielding until 'reading' is set. This -+ * isn't entirely safe, since there may be a reschedule from a timer -+ * happening between setting 'reading' and sleeping in read. But, -+ * it should get a whole quantum in which to reach the read and sleep, -+ * which should be enough. -+ */ -+ -+ if(jail){ -+ while(!reading) sched_yield(); -+ } -+ -+ change_sig(SIGVTALRM, vtalrm); -+ change_sig(SIGALRM, alrm); -+ change_sig(SIGPROF, prof); -+ -+ arch_switch(); -+ -+ flush_tlb_all(); -+ local_irq_restore(flags); -+ -+ return(current->thread.prev_sched); -+} -+ -+void release_thread_tt(struct task_struct *task) -+{ -+ os_kill_process(task->thread.mode.tt.extern_pid, 0); -+} -+ -+void exit_thread_tt(void) -+{ -+ struct file_handle *pipe = current->thread.mode.tt.switch_pipe; -+ -+ close_file(&pipe[0]); -+ close_file(&pipe[1]); -+ kfree(pipe); -+} -+ -+static void suspend_new_thread(struct file_handle *fh) -+{ -+ char c; -+ -+ os_stop_process(os_getpid()); -+ -+ if(read_file(fh, -1, &c, sizeof(c)) != sizeof(c)) -+ panic("read failed in suspend_new_thread"); -+} -+ -+extern void schedule_tail(struct task_struct *prev); -+ -+static void new_thread_handler(int sig) -+{ -+ struct file_handle *pipe; -+ unsigned long disable; -+ int (*fn)(void *); -+ void *arg; -+ -+ fn = current->thread.request.u.thread.proc; -+ arg = current->thread.request.u.thread.arg; -+ -+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); -+ disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) | -+ (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1)); -+ SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable; -+ -+ pipe = current->thread.mode.tt.switch_pipe; -+ suspend_new_thread(&pipe[0]); -+ -+ init_new_thread_signals(1); -+ enable_timer(); -+ free_page(current->thread.temp_stack); -+ set_cmdline("(kernel thread)"); -+ force_flush_all(); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ change_sig(SIGUSR1, 1); -+ change_sig(SIGVTALRM, 1); -+ change_sig(SIGPROF, 1); -+ sti(); -+ if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf)) -+ do_exit(0); -+ -+ /* XXX No set_user_mode here because a newly execed process will -+ * immediately segfault on its non-existent IP, coming straight back -+ * to the signal handler, which will call set_user_mode on its way -+ * out. This should probably change since it's confusing. -+ */ -+} -+ -+static int new_thread_proc(void *stack) -+{ -+ /* cli is needed to block out signals until this thread is properly -+ * scheduled. Otherwise, the tracing thread will get mighty upset -+ * about any signals that arrive before that. -+ * This has the complication that it sets the saved signal mask in -+ * the sigcontext to block signals. This gets restored when this -+ * thread (or a descendant, since they get a copy of this sigcontext) -+ * returns to userspace. -+ * So, this is compensated for elsewhere. -+ * XXX There is still a small window until cli() actually finishes -+ * where signals are possible - shouldn't be a problem in practice -+ * since SIGIO hasn't been forwarded here yet, and the cli should -+ * finish before a SIGVTALRM has time to be delivered. -+ */ -+ cli(); -+ init_new_thread_stack(stack, new_thread_handler); -+ os_usr1_process(os_getpid()); -+ change_sig(SIGUSR1, 1); -+ return(0); -+} -+ -+/* Signal masking - signals are blocked at the start of fork_tramp. They -+ * are re-enabled when finish_fork_handler is entered by fork_tramp hitting -+ * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off, -+ * so it is blocked before it's called. They are re-enabled on sigreturn -+ * despite the fact that they were blocked when the SIGUSR1 was issued because -+ * copy_thread copies the parent's sigcontext, including the signal mask -+ * onto the signal frame. -+ */ -+ -+static void finish_fork_handler(int sig) -+{ -+ struct file_handle *pipe = current->thread.mode.tt.switch_pipe; -+ -+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); -+ suspend_new_thread(&pipe[0]); -+ -+ init_new_thread_signals(1); -+ enable_timer(); -+ sti(); -+ force_flush_all(); -+ if(current->mm != current->p_pptr->mm) -+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1, -+ 1, 0, 1); -+ task_protections((unsigned long) current); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ free_page(current->thread.temp_stack); -+ cli(); -+ change_sig(SIGUSR1, 0); -+ set_user_mode(current); -+} -+ -+int fork_tramp(void *stack) -+{ -+ cli(); -+ arch_init_thread(); -+ init_new_thread_stack(stack, finish_fork_handler); -+ os_usr1_process(os_getpid()); -+ change_sig(SIGUSR1, 1); -+ return(0); -+} -+ -+struct file_handle *make_switch_pipe(void) -+{ -+ struct file_handle *pipe; -+ int err; -+ -+ pipe = kmalloc(sizeof(struct file_handle [2]), GFP_KERNEL); -+ if(pipe == NULL){ -+ pipe = ERR_PTR(-ENOMEM); -+ goto out; -+ } -+ -+ err = make_pipe(pipe); -+ if(err) -+ goto out_free; -+ -+ out: -+ return(pipe); -+ -+ out_free: -+ kfree(pipe); -+ pipe = ERR_PTR(err); -+ goto out; -+} -+ -+int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ int (*tramp)(void *); -+ int new_pid, err; -+ unsigned long stack; -+ -+ if(current->thread.forking) -+ tramp = fork_tramp; -+ else { -+ tramp = new_thread_proc; -+ p->thread.request.u.thread = current->thread.request.u.thread; -+ } -+ -+ p->thread.mode.tt.switch_pipe = make_switch_pipe(); -+ if(IS_ERR(p->thread.mode.tt.switch_pipe)){ -+ err = PTR_ERR(p->thread.mode.tt.switch_pipe); -+ goto out; -+ } -+ -+ stack = alloc_stack(0, 0); -+ if(stack == 0){ -+ printk(KERN_ERR "copy_thread : failed to allocate " -+ "temporary stack\n"); -+ err = -ENOMEM; -+ goto out_close; -+ } -+ -+ clone_flags &= CLONE_VM; -+ p->thread.temp_stack = stack; -+ new_pid = start_fork_tramp(p, stack, clone_flags, tramp); -+ if(new_pid < 0){ -+ printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", -+ -new_pid); -+ err = new_pid; -+ goto out_stack; -+ } -+ -+ if(current->thread.forking){ -+ sc_to_sc(UPT_SC(&p->thread.regs.regs), -+ UPT_SC(¤t->thread.regs.regs)); -+ SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0); -+ if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; -+ } -+ p->thread.mode.tt.extern_pid = new_pid; -+ -+ current->thread.request.op = OP_FORK; -+ current->thread.request.u.fork.pid = new_pid; -+ os_usr1_process(os_getpid()); -+ -+ /* Enable the signal and then disable it to ensure that it is handled -+ * here, and nowhere else. -+ */ -+ change_sig(SIGUSR1, 1); -+ -+ change_sig(SIGUSR1, 0); -+ err = 0; -+ -+ out: -+ return(err); -+ -+ out_stack: -+ free_stack(stack, 0); -+ out_close: -+ close_file(&((struct file_handle *) p->thread.mode.tt.switch_pipe)[0]); -+ close_file(&((struct file_handle *) p->thread.mode.tt.switch_pipe)[1]); -+ kfree(p->thread.mode.tt.switch_pipe); -+ goto out; -+} -+ -+void reboot_tt(void) -+{ -+ current->thread.request.op = OP_REBOOT; -+ os_usr1_process(os_getpid()); -+ change_sig(SIGUSR1, 1); -+} -+ -+void halt_tt(void) -+{ -+ current->thread.request.op = OP_HALT; -+ os_usr1_process(os_getpid()); -+ change_sig(SIGUSR1, 1); -+} -+ -+void kill_off_processes_tt(void) -+{ -+ struct task_struct *p; -+ int me; -+ -+ me = os_getpid(); -+ for_each_task(p){ -+ int pid = p->thread.mode.tt.extern_pid; -+ if((pid != me) && (pid != -1)) -+ os_kill_process(p->thread.mode.tt.extern_pid, 0); -+ } -+ if((init_task.thread.mode.tt.extern_pid != me) && -+ (init_task.thread.mode.tt.extern_pid != -1)) -+ os_kill_process(init_task.thread.mode.tt.extern_pid, 0); -+} -+ -+void initial_thread_cb_tt(void (*proc)(void *), void *arg) -+{ -+ if(os_getpid() == tracing_pid){ -+ (*proc)(arg); -+ } -+ else { -+ current->thread.request.op = OP_CB; -+ current->thread.request.u.cb.proc = proc; -+ current->thread.request.u.cb.arg = arg; -+ os_usr1_process(os_getpid()); -+ change_sig(SIGUSR1, 1); -+ -+ change_sig(SIGUSR1, 0); -+ } -+} -+ -+int do_proc_op(void *t, int proc_id) -+{ -+ struct task_struct *task; -+ struct thread_struct *thread; -+ int op, pid; -+ -+ task = t; -+ thread = &task->thread; -+ op = thread->request.op; -+ switch(op){ -+ case OP_NONE: -+ case OP_TRACE_ON: -+ break; -+ case OP_EXEC: -+ pid = thread->request.u.exec.pid; -+ do_exec(thread->mode.tt.extern_pid, pid); -+ thread->mode.tt.extern_pid = pid; -+ cpu_tasks[task->processor].pid = pid; -+ break; -+ case OP_FORK: -+ attach_process(thread->request.u.fork.pid); -+ break; -+ case OP_CB: -+ (*thread->request.u.cb.proc)(thread->request.u.cb.arg); -+ break; -+ case OP_REBOOT: -+ case OP_HALT: -+ break; -+ default: -+ tracer_panic("Bad op in do_proc_op"); -+ break; -+ } -+ thread->request.op = OP_NONE; -+ return(op); -+} -+ -+void init_idle_tt(void) -+{ -+ idle_timer(); -+} -+ -+/* Changed by jail_setup, which is a setup */ -+int jail = 0; -+ -+int __init jail_setup(char *line, int *add) -+{ -+ int ok = 1; -+ -+ if(jail) return(0); -+#ifdef CONFIG_SMP -+ printf("'jail' may not used used in a kernel with CONFIG_SMP " -+ "enabled\n"); -+ ok = 0; -+#endif -+#ifdef CONFIG_HOSTFS -+ printf("'jail' may not used used in a kernel with CONFIG_HOSTFS " -+ "enabled\n"); -+ ok = 0; -+#endif -+#ifdef CONFIG_MODULES -+ printf("'jail' may not used used in a kernel with CONFIG_MODULES " -+ "enabled\n"); -+ ok = 0; -+#endif -+ if(!ok) exit(1); -+ -+ /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem. -+ * Removing it from the bounding set eliminates the ability of anything -+ * to acquire it, and thus read or write kernel memory. -+ */ -+ cap_lower(cap_bset, CAP_SYS_RAWIO); -+ jail = 1; -+ return(0); -+} -+ -+__uml_setup("jail", jail_setup, -+"jail\n" -+" Enables the protection of kernel memory from processes.\n\n" -+); -+ -+static void mprotect_kernel_mem(int w) -+{ -+ unsigned long start, end; -+ int pages; -+ -+ if(!jail || (current == &init_task)) return; -+ -+ pages = (1 << CONFIG_KERNEL_STACK_ORDER); -+ -+ start = (unsigned long) current + PAGE_SIZE; -+ end = (unsigned long) current + PAGE_SIZE * pages; -+ protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1); -+ protect_memory(end, high_physmem - end, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&_stext); -+ end = (unsigned long) UML_ROUND_UP(&_etext); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end); -+ end = (unsigned long) UML_ROUND_UP(&_edata); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&__bss_start); -+ end = (unsigned long) UML_ROUND_UP(&_end); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ mprotect_kernel_vm(w); -+} -+ -+void unprotect_kernel_mem(void) -+{ -+ mprotect_kernel_mem(1); -+} -+ -+void protect_kernel_mem(void) -+{ -+ mprotect_kernel_mem(0); -+} -+ -+extern void start_kernel(void); -+ -+static int start_kernel_proc(void *unused) -+{ -+ int pid; -+ -+ block_signals(); -+ pid = os_getpid(); -+ -+ cpu_tasks[0].pid = pid; -+ cpu_tasks[0].task = current; -+#ifdef CONFIG_SMP -+ cpu_online_map = 1; -+#endif -+ if(debug) os_stop_process(pid); -+ start_kernel(); -+ return(0); -+} -+ -+void set_tracing(void *task, int tracing) -+{ -+ ((struct task_struct *) task)->thread.mode.tt.tracing = tracing; -+} -+ -+int is_tracing(void *t) -+{ -+ return (((struct task_struct *) t)->thread.mode.tt.tracing); -+} -+ -+int set_user_mode(void *t) -+{ -+ struct task_struct *task; -+ -+ task = t ? t : current; -+ if(task->thread.mode.tt.tracing) -+ return(1); -+ task->thread.request.op = OP_TRACE_ON; -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+/* This is static rather than kmalloced because this happens before kmalloc -+ * is initialized. Also, it is always needed, so might as well be static on -+ * this ground. -+ */ -+static struct file_handle init_switch_pipe[2]; -+ -+void set_init_pid(int pid) -+{ -+ int err; -+ -+ init_task.thread.mode.tt.extern_pid = pid; -+ -+ err = make_pipe(init_switch_pipe); -+ if(err) -+ panic("set_init_pid - make_pipe failed, errno = %d", err); -+ init_task.thread.mode.tt.switch_pipe = init_switch_pipe; -+} -+ -+int start_uml_tt(void) -+{ -+ void *sp; -+ int pages; -+ -+ pages = (1 << CONFIG_KERNEL_STACK_ORDER); -+ sp = (void *) ((unsigned long) &init_task) + pages * PAGE_SIZE - -+ sizeof(unsigned long); -+ return(tracer(start_kernel_proc, sp)); -+} -+ -+int external_pid_tt(struct task_struct *task) -+{ -+ return(task->thread.mode.tt.extern_pid); -+} -+ -+int thread_pid_tt(struct thread_struct *thread) -+{ -+ return(thread->mode.tt.extern_pid); -+} -+ -+int is_valid_pid(int pid) -+{ -+ struct task_struct *task; -+ -+ read_lock(&tasklist_lock); -+ for_each_task(task){ -+ if(task->thread.mode.tt.extern_pid == pid){ -+ read_unlock(&tasklist_lock); -+ return(1); -+ } -+ } -+ read_unlock(&tasklist_lock); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/Makefile 2005-05-03 22:28:14.526401864 +0300 -@@ -0,0 +1,12 @@ -+O_TARGET = ptproxy.o -+ -+obj-y = proxy.o ptrace.o sysdep.o wait.o -+ -+USER_OBJS = $(obj-y) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean: -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/proxy.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/proxy.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/proxy.c 2005-05-03 22:28:14.529401408 +0300 -@@ -0,0 +1,371 @@ -+/********************************************************************** -+proxy.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+Jeff Dike (jdike@karaya.com) : Modified for integration into uml -+**********************************************************************/ -+ -+/* XXX This file shouldn't refer to CONFIG_* */ -+ -+#include <errno.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <signal.h> -+#include <string.h> -+#include <termios.h> -+#include <sys/wait.h> -+#include <sys/types.h> -+#include <sys/ptrace.h> -+#include <sys/ioctl.h> -+#include <asm/unistd.h> -+ -+#include "ptproxy.h" -+#include "sysdep.h" -+#include "wait.h" -+ -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+#include "tempfile.h" -+ -+static int debugger_wait(debugger_state *debugger, int *status, int options, -+ int (*syscall)(debugger_state *debugger, pid_t child), -+ int (*normal_return)(debugger_state *debugger, -+ pid_t unused), -+ int (*wait_return)(debugger_state *debugger, -+ pid_t unused)) -+{ -+ if(debugger->real_wait){ -+ debugger->handle_trace = normal_return; -+ syscall_continue(debugger->pid); -+ debugger->real_wait = 0; -+ return(1); -+ } -+ debugger->wait_status_ptr = status; -+ debugger->wait_options = options; -+ if((debugger->debugee != NULL) && debugger->debugee->event){ -+ syscall_continue(debugger->pid); -+ wait_for_stop(debugger->pid, SIGTRAP, PTRACE_SYSCALL, -+ NULL); -+ (*wait_return)(debugger, -1); -+ return(0); -+ } -+ else if(debugger->wait_options & WNOHANG){ -+ syscall_cancel(debugger->pid, 0); -+ debugger->handle_trace = syscall; -+ return(0); -+ } -+ else { -+ syscall_pause(debugger->pid); -+ debugger->handle_trace = wait_return; -+ debugger->waiting = 1; -+ } -+ return(1); -+} -+ -+/* -+ * Handle debugger trap, i.e. syscall. -+ */ -+ -+int debugger_syscall(debugger_state *debugger, pid_t child) -+{ -+ long arg1, arg2, arg3, arg4, arg5, result; -+ int syscall, ret = 0; -+ -+ syscall = get_syscall(debugger->pid, &arg1, &arg2, &arg3, &arg4, -+ &arg5); -+ -+ switch(syscall){ -+ case __NR_execve: -+ /* execve never returns */ -+ debugger->handle_trace = debugger_syscall; -+ break; -+ -+ case __NR_ptrace: -+ if(debugger->debugee->pid != 0) arg2 = debugger->debugee->pid; -+ if(!debugger->debugee->in_context) -+ child = debugger->debugee->pid; -+ result = proxy_ptrace(debugger, arg1, arg2, arg3, arg4, child, -+ &ret); -+ syscall_cancel(debugger->pid, result); -+ debugger->handle_trace = debugger_syscall; -+ return(ret); -+ -+ case __NR_waitpid: -+ case __NR_wait4: -+ if(!debugger_wait(debugger, (int *) arg2, arg3, -+ debugger_syscall, debugger_normal_return, -+ proxy_wait_return)) -+ return(0); -+ break; -+ -+ case __NR_kill: -+ if(!debugger->debugee->in_context) -+ child = debugger->debugee->pid; -+ if(arg1 == debugger->debugee->pid){ -+ result = kill(child, arg2); -+ syscall_cancel(debugger->pid, result); -+ debugger->handle_trace = debugger_syscall; -+ return(0); -+ } -+ else debugger->handle_trace = debugger_normal_return; -+ break; -+ -+ default: -+ debugger->handle_trace = debugger_normal_return; -+ } -+ -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+/* Used by the tracing thread */ -+static debugger_state parent; -+static int parent_syscall(debugger_state *debugger, int pid); -+ -+int init_parent_proxy(int pid) -+{ -+ parent = ((debugger_state) { .pid = pid, -+ .wait_options = 0, -+ .wait_status_ptr = NULL, -+ .waiting = 0, -+ .real_wait = 0, -+ .expecting_child = 0, -+ .handle_trace = parent_syscall, -+ .debugee = NULL } ); -+ return(0); -+} -+ -+int parent_normal_return(debugger_state *debugger, pid_t unused) -+{ -+ debugger->handle_trace = parent_syscall; -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+static int parent_syscall(debugger_state *debugger, int pid) -+{ -+ long arg1, arg2, arg3, arg4, arg5; -+ int syscall; -+ -+ syscall = get_syscall(pid, &arg1, &arg2, &arg3, &arg4, &arg5); -+ -+ if((syscall == __NR_waitpid) || (syscall == __NR_wait4)){ -+ debugger_wait(&parent, (int *) arg2, arg3, parent_syscall, -+ parent_normal_return, parent_wait_return); -+ } -+ else ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ return(0); -+} -+ -+int debugger_normal_return(debugger_state *debugger, pid_t unused) -+{ -+ debugger->handle_trace = debugger_syscall; -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+void debugger_cancelled_return(debugger_state *debugger, int result) -+{ -+ debugger->handle_trace = debugger_syscall; -+ syscall_set_result(debugger->pid, result); -+ syscall_continue(debugger->pid); -+} -+ -+/* Used by the tracing thread */ -+static debugger_state debugger; -+static debugee_state debugee; -+ -+void init_proxy (pid_t debugger_pid, int stopped, int status) -+{ -+ debugger.pid = debugger_pid; -+ debugger.handle_trace = debugger_syscall; -+ debugger.debugee = &debugee; -+ debugger.waiting = 0; -+ debugger.real_wait = 0; -+ debugger.expecting_child = 0; -+ -+ debugee.pid = 0; -+ debugee.traced = 0; -+ debugee.stopped = stopped; -+ debugee.event = 0; -+ debugee.zombie = 0; -+ debugee.died = 0; -+ debugee.wait_status = status; -+ debugee.in_context = 1; -+} -+ -+int debugger_proxy(int status, int pid) -+{ -+ int ret = 0, sig; -+ -+ if(WIFSTOPPED(status)){ -+ sig = WSTOPSIG(status); -+ if (sig == SIGTRAP) -+ ret = (*debugger.handle_trace)(&debugger, pid); -+ -+ else if(sig == SIGCHLD){ -+ if(debugger.expecting_child){ -+ ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ debugger.expecting_child = 0; -+ } -+ else if(debugger.waiting) -+ real_wait_return(&debugger); -+ else { -+ ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ debugger.real_wait = 1; -+ } -+ } -+ else ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ } -+ else if(WIFEXITED(status)){ -+ tracer_panic("debugger (pid %d) exited with status %d", -+ debugger.pid, WEXITSTATUS(status)); -+ } -+ else if(WIFSIGNALED(status)){ -+ tracer_panic("debugger (pid %d) exited with signal %d", -+ debugger.pid, WTERMSIG(status)); -+ } -+ else { -+ tracer_panic("proxy got unknown status (0x%x) on debugger " -+ "(pid %d)", status, debugger.pid); -+ } -+ return(ret); -+} -+ -+void child_proxy(pid_t pid, int status) -+{ -+ debugee.event = 1; -+ debugee.wait_status = status; -+ -+ if(WIFSTOPPED(status)){ -+ debugee.stopped = 1; -+ debugger.expecting_child = 1; -+ kill(debugger.pid, SIGCHLD); -+ } -+ else if(WIFEXITED(status) || WIFSIGNALED(status)){ -+ debugee.zombie = 1; -+ debugger.expecting_child = 1; -+ kill(debugger.pid, SIGCHLD); -+ } -+ else panic("proxy got unknown status (0x%x) on child (pid %d)", -+ status, pid); -+} -+ -+void debugger_parent_signal(int status, int pid) -+{ -+ int sig; -+ -+ if(WIFSTOPPED(status)){ -+ sig = WSTOPSIG(status); -+ if(sig == SIGTRAP) (*parent.handle_trace)(&parent, pid); -+ else ptrace(PTRACE_SYSCALL, pid, 0, sig); -+ } -+} -+ -+void fake_child_exit(void) -+{ -+ int status, pid; -+ -+ child_proxy(1, W_EXITCODE(0, 0)); -+ while(debugger.waiting == 1){ -+ CATCH_EINTR(pid = waitpid(debugger.pid, &status, WUNTRACED)); -+ if(pid != debugger.pid){ -+ printk("fake_child_exit - waitpid failed, " -+ "errno = %d\n", errno); -+ return; -+ } -+ debugger_proxy(status, debugger.pid); -+ } -+ CATCH_EINTR(pid = waitpid(debugger.pid, &status, WUNTRACED)); -+ if(pid != debugger.pid){ -+ printk("fake_child_exit - waitpid failed, " -+ "errno = %d\n", errno); -+ return; -+ } -+ if(ptrace(PTRACE_DETACH, debugger.pid, 0, SIGCONT) < 0) -+ printk("fake_child_exit - PTRACE_DETACH failed, errno = %d\n", -+ errno); -+} -+ -+char gdb_init_string[] = -+"att 1 \n\ -+b panic \n\ -+b stop \n\ -+handle SIGWINCH nostop noprint pass \n\ -+"; -+ -+int start_debugger(char *prog, int startup, int stop, int *fd_out) -+{ -+ int slave, child; -+ -+ slave = open_gdb_chan(); -+ child = fork(); -+ if(child == 0){ -+ char *tempname = NULL; -+ int fd; -+ -+ if(setsid() < 0) perror("setsid"); -+ if((dup2(slave, 0) < 0) || (dup2(slave, 1) < 0) || -+ (dup2(slave, 2) < 0)){ -+ printk("start_debugger : dup2 failed, errno = %d\n", -+ errno); -+ exit(1); -+ } -+ if(ioctl(0, TIOCSCTTY, 0) < 0){ -+ printk("start_debugger : TIOCSCTTY failed, " -+ "errno = %d\n", errno); -+ exit(1); -+ } -+ if(tcsetpgrp (1, os_getpid()) < 0){ -+ printk("start_debugger : tcsetpgrp failed, " -+ "errno = %d\n", errno); -+#ifdef notdef -+ exit(1); -+#endif -+ } -+ fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0); -+ if(fd < 0){ -+ printk("start_debugger : make_tempfile failed," -+ "err = %d\n", -fd); -+ exit(1); -+ } -+ os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1); -+ if(startup){ -+ if(stop){ -+ os_write_file(fd, "b start_kernel\n", -+ strlen("b start_kernel\n")); -+ } -+ os_write_file(fd, "c\n", strlen("c\n")); -+ } -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ printk("start_debugger : PTRACE_TRACEME failed, " -+ "errno = %d\n", errno); -+ exit(1); -+ } -+ execlp("gdb", "gdb", "--command", tempname, prog, NULL); -+ printk("start_debugger : exec of gdb failed, errno = %d\n", -+ errno); -+ } -+ if(child < 0){ -+ printk("start_debugger : fork for gdb failed, errno = %d\n", -+ errno); -+ return(-1); -+ } -+ *fd_out = slave; -+ return(child); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/ptproxy.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/ptproxy.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/ptproxy.h 2005-05-03 22:28:14.529401408 +0300 -@@ -0,0 +1,61 @@ -+/********************************************************************** -+ptproxy.h -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#ifndef __PTPROXY_H -+#define __PTPROXY_H -+ -+#include <sys/types.h> -+ -+typedef struct debugger debugger_state; -+typedef struct debugee debugee_state; -+ -+struct debugger -+{ -+ pid_t pid; -+ int wait_options; -+ int *wait_status_ptr; -+ unsigned int waiting : 1; -+ unsigned int real_wait : 1; -+ unsigned int expecting_child : 1; -+ int (*handle_trace) (debugger_state *, pid_t); -+ -+ debugee_state *debugee; -+}; -+ -+struct debugee -+{ -+ pid_t pid; -+ int wait_status; -+ unsigned int died : 1; -+ unsigned int event : 1; -+ unsigned int stopped : 1; -+ unsigned int trace_singlestep : 1; -+ unsigned int trace_syscall : 1; -+ unsigned int traced : 1; -+ unsigned int zombie : 1; -+ unsigned int in_context : 1; -+}; -+ -+extern int debugger_syscall(debugger_state *debugger, pid_t pid); -+extern int debugger_normal_return (debugger_state *debugger, pid_t unused); -+ -+extern long proxy_ptrace (struct debugger *, int, pid_t, long, long, pid_t, -+ int *strace_out); -+extern void debugger_cancelled_return(debugger_state *debugger, int result); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/ptrace.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/ptrace.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/ptrace.c 2005-05-03 22:28:14.531401104 +0300 -@@ -0,0 +1,239 @@ -+/********************************************************************** -+ptrace.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+Jeff Dike (jdike@karaya.com) : Modified for integration into uml -+**********************************************************************/ -+ -+#include <errno.h> -+#include <unistd.h> -+#include <signal.h> -+#include <sys/types.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <asm/ptrace.h> -+ -+#include "ptproxy.h" -+#include "debug.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+#include "tt.h" -+ -+long proxy_ptrace(struct debugger *debugger, int arg1, pid_t arg2, -+ long arg3, long arg4, pid_t child, int *ret) -+{ -+ sigset_t relay; -+ long result; -+ int status; -+ -+ *ret = 0; -+ if(debugger->debugee->died) return(-ESRCH); -+ -+ switch(arg1){ -+ case PTRACE_ATTACH: -+ if(debugger->debugee->traced) return(-EPERM); -+ -+ debugger->debugee->pid = arg2; -+ debugger->debugee->traced = 1; -+ -+ if(is_valid_pid(arg2) && (arg2 != child)){ -+ debugger->debugee->in_context = 0; -+ kill(arg2, SIGSTOP); -+ debugger->debugee->event = 1; -+ debugger->debugee->wait_status = W_STOPCODE(SIGSTOP); -+ } -+ else { -+ debugger->debugee->in_context = 1; -+ if(debugger->debugee->stopped) -+ child_proxy(child, W_STOPCODE(SIGSTOP)); -+ else kill(child, SIGSTOP); -+ } -+ -+ return(0); -+ -+ case PTRACE_DETACH: -+ if(!debugger->debugee->traced) return(-EPERM); -+ -+ debugger->debugee->traced = 0; -+ debugger->debugee->pid = 0; -+ if(!debugger->debugee->in_context) -+ kill(child, SIGCONT); -+ -+ return(0); -+ -+ case PTRACE_CONT: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ *ret = PTRACE_CONT; -+ return(ptrace(PTRACE_CONT, child, arg3, arg4)); -+ -+#ifdef UM_HAVE_GETFPREGS -+ case PTRACE_GETFPREGS: -+ { -+ long regs[FP_FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETFPREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i, -+ regs[i]); -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_GETFPXREGS -+ case PTRACE_GETFPXREGS: -+ { -+ long regs[FPX_FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETFPXREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i, -+ regs[i]); -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_GETREGS -+ case PTRACE_GETREGS: -+ { -+ long regs[FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace (PTRACE_POKEDATA, debugger->pid, -+ arg4 + 4 * i, regs[i]); -+ return(result); -+ } -+ break; -+#endif -+ -+ case PTRACE_KILL: -+ result = ptrace(PTRACE_KILL, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ -+ case PTRACE_PEEKDATA: -+ case PTRACE_PEEKTEXT: -+ case PTRACE_PEEKUSER: -+ /* The value being read out could be -1, so we have to -+ * check errno to see if there's an error, and zero it -+ * beforehand so we're not faked out by an old error -+ */ -+ -+ errno = 0; -+ result = ptrace(arg1, child, arg3, 0); -+ if((result == -1) && (errno != 0)) return(-errno); -+ -+ result = ptrace(PTRACE_POKEDATA, debugger->pid, arg4, result); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ -+ case PTRACE_POKEDATA: -+ case PTRACE_POKETEXT: -+ case PTRACE_POKEUSER: -+ result = ptrace(arg1, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ if(arg1 == PTRACE_POKEUSER) ptrace_pokeuser(arg3, arg4); -+ return(result); -+ -+#ifdef UM_HAVE_SETFPREGS -+ case PTRACE_SETFPREGS: -+ { -+ long regs[FP_FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETFPREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_SETFPXREGS -+ case PTRACE_SETFPXREGS: -+ { -+ long regs[FPX_FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETFPXREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_SETREGS -+ case PTRACE_SETREGS: -+ { -+ long regs[FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace(PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+ case PTRACE_SINGLESTEP: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ sigemptyset(&relay); -+ sigaddset(&relay, SIGSEGV); -+ sigaddset(&relay, SIGILL); -+ sigaddset(&relay, SIGBUS); -+ result = ptrace(PTRACE_SINGLESTEP, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ status = wait_for_stop(child, SIGTRAP, PTRACE_SINGLESTEP, -+ &relay); -+ child_proxy(child, status); -+ return(result); -+ -+ case PTRACE_SYSCALL: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ result = ptrace(PTRACE_SYSCALL, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ *ret = PTRACE_SYSCALL; -+ return(result); -+ -+ case PTRACE_TRACEME: -+ default: -+ return(-EINVAL); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/sysdep.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/sysdep.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/sysdep.c 2005-05-03 22:28:14.532400952 +0300 -@@ -0,0 +1,72 @@ -+/********************************************************************** -+sysdep.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#include <stdio.h> -+#include <string.h> -+#include <stdlib.h> -+#include <signal.h> -+#include <errno.h> -+#include <sys/types.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+#include <linux/unistd.h> -+#include "ptrace_user.h" -+#include "user_util.h" -+#include "user.h" -+ -+int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, long *arg4, -+ long *arg5) -+{ -+ *arg1 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG1_OFFSET, 0); -+ *arg2 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG2_OFFSET, 0); -+ *arg3 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG3_OFFSET, 0); -+ *arg4 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG4_OFFSET, 0); -+ *arg5 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG5_OFFSET, 0); -+ return(ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, 0)); -+} -+ -+void syscall_cancel(pid_t pid, int result) -+{ -+ if((ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getpid) < 0) || -+ (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) || -+ (wait_for_stop(pid, SIGTRAP, PTRACE_SYSCALL, NULL) < 0) || -+ (ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, result) < 0) || -+ (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)) -+ printk("ptproxy: couldn't cancel syscall: errno = %d\n", -+ errno); -+} -+ -+void syscall_set_result(pid_t pid, long result) -+{ -+ ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, result); -+} -+ -+void syscall_continue(pid_t pid) -+{ -+ ptrace(PTRACE_SYSCALL, pid, 0, 0); -+} -+ -+int syscall_pause(pid_t pid) -+{ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_pause) < 0){ -+ printk("syscall_change - ptrace failed, errno = %d\n", errno); -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/sysdep.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/sysdep.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/sysdep.h 2005-05-03 22:28:14.533400800 +0300 -@@ -0,0 +1,25 @@ -+/********************************************************************** -+sysdep.h -+ -+Copyright (C) 1999 Lars Brinkhoff. -+Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+See the file COPYING for licensing terms and conditions. -+**********************************************************************/ -+ -+extern int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, -+ long *arg4, long *arg5); -+extern void syscall_cancel (pid_t pid, long result); -+extern void syscall_set_result (pid_t pid, long result); -+extern void syscall_continue (pid_t pid); -+extern int syscall_pause(pid_t pid); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/wait.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/wait.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/wait.c 2005-05-03 22:28:14.534400648 +0300 -@@ -0,0 +1,88 @@ -+/********************************************************************** -+wait.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+**********************************************************************/ -+ -+#include <errno.h> -+#include <signal.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+ -+#include "ptproxy.h" -+#include "sysdep.h" -+#include "wait.h" -+#include "user_util.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/ptrace_user.h" -+#include "sysdep/sigcontext.h" -+ -+int proxy_wait_return(struct debugger *debugger, pid_t unused) -+{ -+ debugger->waiting = 0; -+ -+ if(debugger->debugee->died || (debugger->wait_options & __WCLONE)){ -+ debugger_cancelled_return(debugger, -ECHILD); -+ return(0); -+ } -+ -+ if(debugger->debugee->zombie && debugger->debugee->event) -+ debugger->debugee->died = 1; -+ -+ if(debugger->debugee->event){ -+ debugger->debugee->event = 0; -+ ptrace(PTRACE_POKEDATA, debugger->pid, -+ debugger->wait_status_ptr, -+ debugger->debugee->wait_status); -+ /* if (wait4) -+ ptrace (PTRACE_POKEDATA, pid, rusage_ptr, ...); */ -+ debugger_cancelled_return(debugger, debugger->debugee->pid); -+ return(0); -+ } -+ -+ /* pause will return -EINTR, which happens to be right for wait */ -+ debugger_normal_return(debugger, -1); -+ return(0); -+} -+ -+int parent_wait_return(struct debugger *debugger, pid_t unused) -+{ -+ return(debugger_normal_return(debugger, -1)); -+} -+ -+int real_wait_return(struct debugger *debugger) -+{ -+ unsigned long ip; -+ int pid; -+ -+ pid = debugger->pid; -+ -+ ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); -+ IP_RESTART_SYSCALL(ip); -+ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0) -+ tracer_panic("real_wait_return : Failed to restart system " -+ "call, errno = %d\n", errno); -+ -+ if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) || -+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || -+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || -+ debugger_normal_return(debugger, -1)) -+ tracer_panic("real_wait_return : gdb failed to wait, " -+ "errno = %d\n", errno); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/ptproxy/wait.h -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/ptproxy/wait.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/ptproxy/wait.h 2005-05-03 22:28:14.534400648 +0300 -@@ -0,0 +1,15 @@ -+/********************************************************************** -+wait.h -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#ifndef __PTPROXY_WAIT_H -+#define __PTPROXY_WAIT_H -+ -+extern int proxy_wait_return(struct debugger *debugger, pid_t unused); -+extern int real_wait_return(struct debugger *debugger); -+extern int parent_wait_return(struct debugger *debugger, pid_t unused); -+ -+#endif -Index: linux-2.4.29/arch/um/kernel/tt/syscall_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/syscall_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/syscall_kern.c 2005-05-03 22:28:14.536400344 +0300 -@@ -0,0 +1,136 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/types.h" -+#include "linux/utime.h" -+#include "linux/sys.h" -+#include "asm/unistd.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+static inline int check_area(void *ptr, int size) -+{ -+ return(verify_area(VERIFY_WRITE, ptr, size)); -+} -+ -+static int check_readlink(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ UPT_SYSCALL_ARG2(®s->regs))); -+} -+ -+static int check_utime(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct utimbuf))); -+} -+ -+static int check_oldstat(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct __old_kernel_stat))); -+} -+ -+static int check_stat(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct stat))); -+} -+ -+static int check_stat64(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct stat64))); -+} -+ -+struct bogus { -+ int kernel_ds; -+ int (*check_params)(struct pt_regs *); -+}; -+ -+struct bogus this_is_bogus[256] = { -+ [ __NR_mknod ] = { 1, NULL }, -+ [ __NR_mkdir ] = { 1, NULL }, -+ [ __NR_rmdir ] = { 1, NULL }, -+ [ __NR_unlink ] = { 1, NULL }, -+ [ __NR_symlink ] = { 1, NULL }, -+ [ __NR_link ] = { 1, NULL }, -+ [ __NR_rename ] = { 1, NULL }, -+ [ __NR_umount ] = { 1, NULL }, -+ [ __NR_mount ] = { 1, NULL }, -+ [ __NR_pivot_root ] = { 1, NULL }, -+ [ __NR_chdir ] = { 1, NULL }, -+ [ __NR_chroot ] = { 1, NULL }, -+ [ __NR_open ] = { 1, NULL }, -+ [ __NR_quotactl ] = { 1, NULL }, -+ [ __NR_sysfs ] = { 1, NULL }, -+ [ __NR_readlink ] = { 1, check_readlink }, -+ [ __NR_acct ] = { 1, NULL }, -+ [ __NR_execve ] = { 1, NULL }, -+ [ __NR_uselib ] = { 1, NULL }, -+ [ __NR_statfs ] = { 1, NULL }, -+ [ __NR_truncate ] = { 1, NULL }, -+ [ __NR_access ] = { 1, NULL }, -+ [ __NR_chmod ] = { 1, NULL }, -+ [ __NR_chown ] = { 1, NULL }, -+ [ __NR_lchown ] = { 1, NULL }, -+ [ __NR_utime ] = { 1, check_utime }, -+ [ __NR_oldlstat ] = { 1, check_oldstat }, -+ [ __NR_oldstat ] = { 1, check_oldstat }, -+ [ __NR_stat ] = { 1, check_stat }, -+ [ __NR_lstat ] = { 1, check_stat }, -+ [ __NR_stat64 ] = { 1, check_stat64 }, -+ [ __NR_lstat64 ] = { 1, check_stat64 }, -+ [ __NR_chown32 ] = { 1, NULL }, -+}; -+ -+/* sys_utimes */ -+ -+static int check_bogosity(struct pt_regs *regs) -+{ -+ struct bogus *bogon = &this_is_bogus[UPT_SYSCALL_NR(®s->regs)]; -+ -+ if(!bogon->kernel_ds) return(0); -+ if(bogon->check_params && (*bogon->check_params)(regs)) -+ return(-EFAULT); -+ set_fs(KERNEL_DS); -+ return(0); -+} -+ -+extern syscall_handler_t *sys_call_table[]; -+ -+long execute_syscall_tt(void *r) -+{ -+ struct pt_regs *regs = r; -+ long res; -+ int syscall; -+ -+ current->thread.nsyscalls++; -+ nsyscalls++; -+ syscall = UPT_SYSCALL_NR(®s->regs); -+ -+ if((syscall >= NR_syscalls) || (syscall < 0)) -+ res = -ENOSYS; -+ else if(honeypot && check_bogosity(regs)) -+ res = -EFAULT; -+ else res = EXECUTE_SYSCALL(syscall, regs); -+ -+ set_fs(USER_DS); -+ -+ return(res); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/syscall_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/syscall_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/syscall_user.c 2005-05-03 22:28:14.537400192 +0300 -@@ -0,0 +1,92 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <signal.h> -+#include <errno.h> -+#include <sys/ptrace.h> -+#include <asm/unistd.h> -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "ptrace_user.h" -+#include "task.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "syscall_user.h" -+#include "tt.h" -+ -+/* XXX Bogus */ -+#define ERESTARTSYS 512 -+#define ERESTARTNOINTR 513 -+#define ERESTARTNOHAND 514 -+ -+void syscall_handler_tt(int sig, union uml_pt_regs *regs) -+{ -+ void *sc; -+ long result; -+ int index, syscall; -+ -+ syscall = UPT_SYSCALL_NR(regs); -+ sc = UPT_SC(regs); -+ SC_START_SYSCALL(sc); -+ -+ index = record_syscall_start(syscall); -+ syscall_trace(); -+ result = execute_syscall(regs); -+ -+ /* regs->sc may have changed while the system call ran (there may -+ * have been an interrupt or segfault), so it needs to be refreshed. -+ */ -+ UPT_SC(regs) = sc; -+ -+ SC_SET_SYSCALL_RETURN(sc, result); -+ if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || -+ (result == -ERESTARTNOINTR)) -+ do_signal(result); -+ -+ syscall_trace(); -+ record_syscall_end(index, result); -+} -+ -+int do_syscall(void *task, int pid) -+{ -+ unsigned long proc_regs[FRAME_SIZE]; -+ union uml_pt_regs *regs; -+ int syscall; -+ -+ if(ptrace_getregs(pid, proc_regs) < 0) -+ tracer_panic("Couldn't read registers"); -+ syscall = PT_SYSCALL_NR(proc_regs); -+ -+ regs = TASK_REGS(task); -+ UPT_SYSCALL_NR(regs) = syscall; -+ -+ if(syscall < 1) return(0); -+ -+ if((syscall != __NR_sigreturn) && -+ ((unsigned long *) PT_IP(proc_regs) >= &_stext) && -+ ((unsigned long *) PT_IP(proc_regs) <= &_etext)) -+ tracer_panic("I'm tracing myself and I can't get out"); -+ -+ if(use_sysemu) -+ return(1); -+ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getpid) < 0) -+ tracer_panic("do_syscall : Nullifying syscall failed, " -+ "errno = %d", errno); -+ return(1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/sys-i386/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/sys-i386/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/sys-i386/Makefile 2005-05-03 22:28:14.538400040 +0300 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = sys-i386.o -+ -+obj-y = sigcontext.o -+ -+USER_OBJS = sigcontext.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -Index: linux-2.4.29/arch/um/kernel/tt/sys-i386/sigcontext.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/sys-i386/sigcontext.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/sys-i386/sigcontext.c 2005-05-03 22:28:14.539399888 +0300 -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <asm/sigcontext.h> -+#include "kern_util.h" -+#include "sysdep/frame.h" -+ -+int copy_sc_from_user_tt(void *to_ptr, void *from_ptr, void *data) -+{ -+ struct arch_frame_data *arch = data; -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ struct _fpstate *to_fp, *from_fp; -+ unsigned long sigs; -+ int err; -+ -+ to_fp = to->fpstate; -+ from_fp = from->fpstate; -+ sigs = to->oldmask; -+ err = copy_from_user_proc(to, from, sizeof(*to)); -+ to->oldmask = sigs; -+ if(to_fp != NULL){ -+ err |= copy_from_user_proc(&to->fpstate, &to_fp, -+ sizeof(to->fpstate)); -+ err |= copy_from_user_proc(to_fp, from_fp, arch->fpstate_size); -+ } -+ return(err); -+} -+ -+int copy_sc_to_user_tt(void *to_ptr, void *fp, void *from_ptr, void *data) -+{ -+ struct arch_frame_data *arch = data; -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ struct _fpstate *to_fp, *from_fp; -+ int err; -+ -+ to_fp = (struct _fpstate *) -+ (fp ? (unsigned long) fp : ((unsigned long) to + sizeof(*to))); -+ from_fp = from->fpstate; -+ err = copy_to_user_proc(to, from, sizeof(*to)); -+ if(from_fp != NULL){ -+ err |= copy_to_user_proc(&to->fpstate, &to_fp, -+ sizeof(to->fpstate)); -+ err |= copy_to_user_proc(to_fp, from_fp, arch->fpstate_size); -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/time.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/time.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/time.c 2005-05-03 22:28:14.540399736 +0300 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include <sys/time.h> -+#include <time_user.h> -+#include "process.h" -+#include "user.h" -+ -+void user_time_init_tt(void) -+{ -+ if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/tlb.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/tlb.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/tlb.c 2005-05-03 22:28:14.541399584 +0300 -@@ -0,0 +1,220 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "os.h" -+ -+static void fix_range(struct mm_struct *mm, unsigned long start_addr, -+ unsigned long end_addr, int force) -+{ -+ pgd_t *npgd; -+ pmd_t *npmd; -+ pte_t *npte; -+ unsigned long addr; -+ int r, w, x, err; -+ -+ if((current->thread.mode.tt.extern_pid != -1) && -+ (current->thread.mode.tt.extern_pid != os_getpid())) -+ panic("fix_range fixing wrong address space, current = 0x%p", -+ current); -+ if(mm == NULL) return; -+ for(addr=start_addr;addr<end_addr;){ -+ if(addr == TASK_SIZE){ -+ /* Skip over kernel text, kernel data, and physical -+ * memory, which don't have ptes, plus kernel virtual -+ * memory, which is flushed separately, and remap -+ * the process stack. The only way to get here is -+ * if (end_addr == STACK_TOP) > TASK_SIZE, which is -+ * only true in the honeypot case. -+ */ -+ addr = STACK_TOP - ABOVE_KMEM; -+ continue; -+ } -+ npgd = pgd_offset(mm, addr); -+ npmd = pmd_offset(npgd, addr); -+ if(pmd_present(*npmd)){ -+ npte = pte_offset(npmd, addr); -+ r = pte_read(*npte); -+ w = pte_write(*npte); -+ x = pte_exec(*npte); -+ if(!pte_dirty(*npte)) w = 0; -+ if(!pte_young(*npte)){ -+ r = 0; -+ w = 0; -+ } -+ if(force || pte_newpage(*npte)){ -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*npte)) -+ map_memory(addr, -+ pte_val(*npte) & PAGE_MASK, -+ PAGE_SIZE, r, w, x); -+ } -+ else if(pte_newprot(*npte)){ -+ protect_memory(addr, PAGE_SIZE, r, w, x, 1); -+ } -+ *npte = pte_mkuptodate(*npte); -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(force || pmd_newpage(*npmd)){ -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ pmd_mkuptodate(*npmd); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+atomic_t vmchange_seq = ATOMIC_INIT(1); -+ -+static void flush_kernel_vm_range(unsigned long start, unsigned long end, -+ int update_seq) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ int updated = 0, err; -+ -+ mm = &init_mm; -+ for(addr = start; addr < end;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte) || pte_newpage(*pte)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*pte)) -+ map_memory(addr, -+ pte_val(*pte) & PAGE_MASK, -+ PAGE_SIZE, 1, 1, 1); -+ } -+ else if(pte_newprot(*pte)){ -+ updated = 1; -+ protect_memory(addr, PAGE_SIZE, 1, 1, 1, 1); -+ } -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(pmd_newpage(*pmd)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+ if(updated && update_seq) atomic_inc(&vmchange_seq); -+} -+ -+static void protect_vm_page(unsigned long addr, int w, int must_succeed) -+{ -+ int err; -+ -+ err = protect_memory(addr, PAGE_SIZE, 1, w, 1, must_succeed); -+ if(err == 0) return; -+ else if((err == -EFAULT) || (err == -ENOMEM)){ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE, 1); -+ protect_vm_page(addr, w, 1); -+ } -+ else panic("protect_vm_page : protect failed, errno = %d\n", err); -+} -+ -+void mprotect_kernel_vm(int w) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ -+ mm = &init_mm; -+ for(addr = start_vm; addr < end_vm;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(pte_present(*pte)) protect_vm_page(addr, w, 0); -+ addr += PAGE_SIZE; -+ } -+ else addr += PMD_SIZE; -+ } -+} -+ -+void flush_tlb_kernel_vm_tt(void) -+{ -+ flush_kernel_vm_range(start_vm, end_vm, 1); -+} -+ -+void __flush_tlb_one_tt(unsigned long addr) -+{ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE, 1); -+} -+ -+void flush_tlb_range_tt(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ if(mm != current->mm) return; -+ -+ /* Assumes that the range start ... end is entirely within -+ * either process memory or kernel vm -+ */ -+ if((start >= start_vm) && (start < end_vm)) -+ flush_kernel_vm_range(start, end, 1); -+ else fix_range(mm, start, end, 0); -+} -+ -+void flush_tlb_mm_tt(struct mm_struct *mm) -+{ -+ unsigned long seq; -+ -+ if(mm != current->mm) return; -+ -+ fix_range(mm, 0, STACK_TOP, 0); -+ -+ seq = atomic_read(&vmchange_seq); -+ if(current->thread.mode.tt.vm_seq == seq) return; -+ current->thread.mode.tt.vm_seq = seq; -+ flush_kernel_vm_range(start_vm, end_vm, 0); -+} -+ -+void force_flush_all_tt(void) -+{ -+ fix_range(current->mm, 0, STACK_TOP, 1); -+ flush_kernel_vm_range(start_vm, end_vm, 0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/tracer.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/tracer.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/tracer.c 2005-05-03 22:28:14.543399280 +0300 -@@ -0,0 +1,457 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <stdarg.h> -+#include <unistd.h> -+#include <signal.h> -+#include <errno.h> -+#include <sched.h> -+#include <string.h> -+#include <sys/mman.h> -+#include <sys/ptrace.h> -+#include <sys/time.h> -+#include <sys/wait.h> -+#include "user.h" -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "sysdep/sigcontext.h" -+#include "os.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "process.h" -+#include "kern_util.h" -+#include "frame.h" -+#include "chan_user.h" -+#include "ptrace_user.h" -+#include "mode.h" -+#include "tt.h" -+ -+static int tracer_winch[2]; -+ -+int is_tracer_winch(int pid, int fd, void *data) -+{ -+ if(pid != tracing_pid) -+ return(0); -+ -+ register_winch_irq(tracer_winch[0], fd, -1, data); -+ return(1); -+} -+ -+static void tracer_winch_handler(int sig) -+{ -+ int n; -+ char c = 1; -+ -+ n = os_write_file(tracer_winch[1], &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("tracer_winch_handler - write failed, err = %d\n", -n); -+} -+ -+/* Called only by the tracing thread during initialization */ -+ -+static void setup_tracer_winch(void) -+{ -+ int err; -+ -+ err = os_pipe(tracer_winch, 1, 1); -+ if(err < 0){ -+ printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err); -+ return; -+ } -+ signal(SIGWINCH, tracer_winch_handler); -+} -+ -+void attach_process(int pid) -+{ -+ if((ptrace(PTRACE_ATTACH, pid, 0, 0) < 0) || -+ (ptrace(PTRACE_CONT, pid, 0, 0) < 0)) -+ tracer_panic("OP_FORK failed to attach pid"); -+ wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL); -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) -+ tracer_panic("OP_FORK failed to continue process"); -+} -+ -+void tracer_panic(char *format, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, format); -+ vprintf(format, ap); -+ printf("\n"); -+ while(1) pause(); -+} -+ -+static void tracer_segv(int sig, struct sigcontext sc) -+{ -+ printf("Tracing thread segfault at address 0x%lx, ip 0x%lx\n", -+ SC_FAULT_ADDR(&sc), SC_IP(&sc)); -+ while(1) -+ pause(); -+} -+ -+/* Changed early in boot, and then only read */ -+int debug = 0; -+int debug_stop = 1; -+int debug_parent = 0; -+int honeypot = 0; -+ -+static int signal_tramp(void *arg) -+{ -+ int (*proc)(void *); -+ -+ if(honeypot && munmap((void *) (host_task_size - 0x10000000), -+ 0x10000000)) -+ panic("Unmapping stack failed"); -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) -+ panic("ptrace PTRACE_TRACEME failed"); -+ os_stop_process(os_getpid()); -+ change_sig(SIGWINCH, 0); -+ signal(SIGUSR1, SIG_IGN); -+ change_sig(SIGCHLD, 0); -+ signal(SIGSEGV, (__sighandler_t) sig_handler); -+ set_cmdline("(idle thread)"); -+ set_init_pid(os_getpid()); -+ proc = arg; -+ return((*proc)(NULL)); -+} -+ -+static void sleeping_process_signal(int pid, int sig) -+{ -+ switch(sig){ -+ /* These two result from UML being ^Z-ed and bg-ed. PTRACE_CONT is -+ * right because the process must be in the kernel already. -+ */ -+ case SIGCONT: -+ case SIGTSTP: -+ if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) -+ tracer_panic("sleeping_process_signal : Failed to " -+ "continue pid %d, signal = %d, " -+ "errno = %d\n", pid, sig, errno); -+ break; -+ -+ /* This happens when the debugger (e.g. strace) is doing system call -+ * tracing on the kernel. During a context switch, the current task -+ * will be set to the incoming process and the outgoing process will -+ * hop into write and then read. Since it's not the current process -+ * any more, the trace of those will land here. So, we need to just -+ * PTRACE_SYSCALL it. -+ */ -+ case SIGTRAP: -+ if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) -+ tracer_panic("sleeping_process_signal : Failed to " -+ "PTRACE_SYSCALL pid %d, errno = %d\n", -+ pid, errno); -+ break; -+ case SIGSTOP: -+ break; -+ default: -+ tracer_panic("sleeping process %d got unexpected " -+ "signal : %d\n", pid, sig); -+ break; -+ } -+} -+ -+/* Accessed only by the tracing thread */ -+int debugger_pid = -1; -+int debugger_parent = -1; -+int debugger_fd = -1; -+int gdb_pid = -1; -+ -+struct { -+ int pid; -+ int signal; -+ unsigned long addr; -+ struct timeval time; -+} signal_record[1024][32]; -+ -+int signal_index[32]; -+int nsignals = 0; -+int debug_trace = 0; -+extern int io_nsignals, io_count, intr_count; -+ -+extern void signal_usr1(int sig); -+ -+int tracing_pid = -1; -+ -+int tracer(int (*init_proc)(void *), void *sp) -+{ -+ void *task = NULL; -+ unsigned long eip = 0; -+ int status, pid = 0, sig = 0, cont_type, tracing = 0, op = 0; -+ int last_index, proc_id = 0, n, err, old_tracing = 0, strace = 0; -+ int cont_syscall; -+ -+ capture_signal_stack(); -+ signal(SIGPIPE, SIG_IGN); -+ setup_tracer_winch(); -+ tracing_pid = os_getpid(); -+ printf("tracing thread pid = %d\n", tracing_pid); -+ -+ pid = clone(signal_tramp, sp, CLONE_FILES | SIGCHLD, init_proc); -+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); -+ if(n < 0){ -+ printf("waitpid on idle thread failed, errno = %d\n", errno); -+ exit(1); -+ } -+ if((ptrace(PTRACE_CONT, pid, 0, 0) < 0)){ -+ printf("Failed to continue idle thread, errno = %d\n", errno); -+ exit(1); -+ } -+ -+ signal(SIGSEGV, (sighandler_t) tracer_segv); -+ signal(SIGUSR1, signal_usr1); -+ if(debug_trace){ -+ printf("Tracing thread pausing to be attached\n"); -+ stop(); -+ } -+ if(debug){ -+ if(gdb_pid != -1) -+ debugger_pid = attach_debugger(pid, gdb_pid, 1); -+ else debugger_pid = init_ptrace_proxy(pid, 1, debug_stop); -+ if(debug_parent){ -+ debugger_parent = os_process_parent(debugger_pid); -+ init_parent_proxy(debugger_parent); -+ err = attach(debugger_parent); -+ if(err){ -+ printf("Failed to attach debugger parent %d, " -+ "errno = %d\n", debugger_parent, -err); -+ debugger_parent = -1; -+ } -+ else { -+ if(ptrace(PTRACE_SYSCALL, debugger_parent, -+ 0, 0) < 0){ -+ printf("Failed to continue debugger " -+ "parent, errno = %d\n", errno); -+ debugger_parent = -1; -+ } -+ } -+ } -+ } -+ set_cmdline("(tracing thread)"); -+ while(1){ -+ CATCH_EINTR(pid = waitpid(-1, &status, WUNTRACED)); -+ if(pid <= 0){ -+ if(errno != ECHILD){ -+ printf("wait failed - errno = %d\n", errno); -+ } -+ continue; -+ } -+ if(pid == debugger_pid){ -+ int cont = 0; -+ -+ if(WIFEXITED(status) || WIFSIGNALED(status)) -+ debugger_pid = -1; -+ /* XXX Figure out how to deal with gdb and SMP */ -+ else cont = debugger_signal(status, cpu_tasks[0].pid); -+ if(cont == PTRACE_SYSCALL) strace = 1; -+ continue; -+ } -+ else if(pid == debugger_parent){ -+ debugger_parent_signal(status, pid); -+ continue; -+ } -+ nsignals++; -+ if(WIFEXITED(status)) ; -+#ifdef notdef -+ { -+ printf("Child %d exited with status %d\n", pid, -+ WEXITSTATUS(status)); -+ } -+#endif -+ else if(WIFSIGNALED(status)){ -+ sig = WTERMSIG(status); -+ if(sig != 9){ -+ printf("Child %d exited with signal %d\n", pid, -+ sig); -+ } -+ } -+ else if(WIFSTOPPED(status)){ -+ proc_id = pid_to_processor_id(pid); -+ sig = WSTOPSIG(status); -+ if(signal_index[proc_id] == 1024){ -+ signal_index[proc_id] = 0; -+ last_index = 1023; -+ } -+ else last_index = signal_index[proc_id] - 1; -+ if(((sig == SIGPROF) || (sig == SIGVTALRM) || -+ (sig == SIGALRM)) && -+ (signal_record[proc_id][last_index].signal == sig)&& -+ (signal_record[proc_id][last_index].pid == pid)) -+ signal_index[proc_id] = last_index; -+ signal_record[proc_id][signal_index[proc_id]].pid = pid; -+ gettimeofday(&signal_record[proc_id][signal_index[proc_id]].time, NULL); -+ eip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); -+ signal_record[proc_id][signal_index[proc_id]].addr = eip; -+ signal_record[proc_id][signal_index[proc_id]++].signal = sig; -+ -+ if(proc_id == -1){ -+ sleeping_process_signal(pid, sig); -+ continue; -+ } -+ -+ task = cpu_tasks[proc_id].task; -+ tracing = is_tracing(task); -+ old_tracing = tracing; -+ -+ cont_syscall = use_sysemu ? PTRACE_SYSEMU : -+ PTRACE_SYSCALL; -+ switch(sig){ -+ case SIGUSR1: -+ sig = 0; -+ op = do_proc_op(task, proc_id); -+ switch(op){ -+ case OP_TRACE_ON: -+ arch_leave_kernel(task, pid); -+ tracing = 1; -+ break; -+ case OP_REBOOT: -+ case OP_HALT: -+ unmap_physmem(); -+ kmalloc_ok = 0; -+ ptrace(PTRACE_KILL, pid, 0, 0); -+ return(op == OP_REBOOT); -+ case OP_NONE: -+ printf("Detaching pid %d\n", pid); -+ detach(pid, SIGSTOP); -+ continue; -+ default: -+ break; -+ } -+ /* OP_EXEC switches host processes on us, -+ * we want to continue the new one. -+ */ -+ pid = cpu_tasks[proc_id].pid; -+ break; -+ case SIGTRAP: -+ if(!tracing && (debugger_pid != -1)){ -+ child_signal(pid, status); -+ continue; -+ } -+ tracing = 0; -+ if(do_syscall(task, pid)) -+ sig = SIGUSR2; -+ break; -+ case SIGPROF: -+ if(tracing) sig = 0; -+ break; -+ case SIGCHLD: -+ case SIGHUP: -+ sig = 0; -+ break; -+ case SIGSEGV: -+ case SIGIO: -+ case SIGALRM: -+ case SIGVTALRM: -+ case SIGFPE: -+ case SIGBUS: -+ case SIGILL: -+ case SIGWINCH: -+ default: -+ tracing = 0; -+ break; -+ } -+ set_tracing(task, tracing); -+ -+ if(!tracing && old_tracing) -+ arch_enter_kernel(task, pid); -+ -+ if(!tracing && (debugger_pid != -1) && (sig != 0) && -+ (sig != SIGALRM) && (sig != SIGVTALRM) && -+ (sig != SIGSEGV) && (sig != SIGTRAP) && -+ (sig != SIGUSR2) && (sig != SIGIO) && -+ (sig != SIGFPE)){ -+ child_signal(pid, status); -+ continue; -+ } -+ -+ if(tracing){ -+ if(singlestepping(task)) -+ cont_type = PTRACE_SINGLESTEP; -+ else cont_type = cont_syscall; -+ } -+ else cont_type = PTRACE_CONT; -+ -+ if((cont_type == PTRACE_CONT) && -+ (debugger_pid != -1) && strace) -+ cont_type = PTRACE_SYSCALL; -+ -+ if(ptrace(cont_type, pid, 0, sig) != 0){ -+ tracer_panic("ptrace failed to continue " -+ "process - errno = %d\n", -+ errno); -+ } -+ } -+ } -+ return(0); -+} -+ -+static int __init uml_debug_setup(char *line, int *add) -+{ -+ char *next; -+ -+ debug = 1; -+ *add = 0; -+ if(*line != '=') return(0); -+ line++; -+ -+ while(line != NULL){ -+ next = strchr(line, ','); -+ if(next) *next++ = '\0'; -+ -+ if(!strcmp(line, "go")) debug_stop = 0; -+ else if(!strcmp(line, "parent")) debug_parent = 1; -+ else printf("Unknown debug option : '%s'\n", line); -+ -+ line = next; -+ } -+ return(0); -+} -+ -+__uml_setup("debug", uml_debug_setup, -+"debug\n" -+" Starts up the kernel under the control of gdb. See the \n" -+" kernel debugging tutorial and the debugging session pages\n" -+" at http://user-mode-linux.sourceforge.net/ for more information.\n\n" -+); -+ -+static int __init uml_debugtrace_setup(char *line, int *add) -+{ -+ debug_trace = 1; -+ return 0; -+} -+__uml_setup("debugtrace", uml_debugtrace_setup, -+"debugtrace\n" -+" Causes the tracing thread to pause until it is attached by a\n" -+" debugger and continued. This is mostly for debugging crashes\n" -+" early during boot, and should be pretty much obsoleted by\n" -+" the debug switch.\n\n" -+); -+ -+static int __init uml_honeypot_setup(char *line, int *add) -+{ -+ jail_setup("", add); -+ honeypot = 1; -+ return 0; -+} -+__uml_setup("honeypot", uml_honeypot_setup, -+"honeypot\n" -+" This makes UML put process stacks in the same location as they are\n" -+" on the host, allowing expoits such as stack smashes to work against\n" -+" UML. This implies 'jail'.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/trap_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/trap_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/trap_user.c 2005-05-03 22:28:14.544399128 +0300 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <signal.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "task.h" -+#include "tt.h" -+ -+void sig_handler_common_tt(int sig, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct tt_regs save_regs, *r; -+ struct signal_info *info; -+ int save_errno = errno, is_user; -+ -+ unprotect_kernel_mem(); -+ -+ /* This is done because to allow SIGSEGV to be delivered inside a SEGV -+ * handler. This can happen in copy_user, and if SEGV is disabled, -+ * the process will die. -+ */ -+ if(sig == SIGSEGV) -+ change_sig(SIGSEGV, 1); -+ -+ r = &TASK_REGS(get_current())->tt; -+ save_regs = *r; -+ is_user = user_context(SC_SP(sc)); -+ r->sc = sc; -+ if(sig != SIGUSR2) -+ r->syscall = -1; -+ -+ info = &sig_info[sig]; -+ if(!info->is_irq) unblock_signals(); -+ -+ (*info->handler)(sig, (union uml_pt_regs *) r); -+ -+ if(is_user){ -+ interrupt_end(); -+ block_signals(); -+ set_user_mode(NULL); -+ } -+ *r = save_regs; -+ errno = save_errno; -+ if(is_user) protect_kernel_mem(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/uaccess.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/uaccess.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/uaccess.c 2005-05-03 22:28:14.545398976 +0300 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "asm/uaccess.h" -+ -+int copy_from_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_READ, from, n)) -+ return(n); -+ -+ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int copy_to_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, to, n)) -+ return(n); -+ -+ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int strncpy_from_user_tt(char *dst, const char *src, int count) -+{ -+ int n; -+ -+ if(!access_ok_tt(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = __do_strncpy_from_user(dst, src, count, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher); -+ if(n < 0) return(-EFAULT); -+ return(n); -+} -+ -+int __clear_user_tt(void *mem, int len) -+{ -+ return(__do_clear_user(mem, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int clear_user_tt(void *mem, int len) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, mem, len)) -+ return(len); -+ -+ return(__do_clear_user(mem, len, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+int strnlen_user_tt(const void *str, int len) -+{ -+ return(__do_strnlen_user(str, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/uaccess_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/uaccess_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/uaccess_user.c 2005-05-03 22:28:14.546398824 +0300 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <setjmp.h> -+#include <string.h> -+#include "user_util.h" -+#include "uml_uaccess.h" -+#include "task.h" -+#include "kern_util.h" -+ -+int __do_copy_from_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, -+ __do_copy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(0); -+ else return(n - (fault - (unsigned long) from)); -+} -+ -+static void __do_strncpy(void *dst, const void *src, int count) -+{ -+ strncpy(dst, src, count); -+} -+ -+int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, -+ __do_strncpy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(strlen(dst)); -+ else return(-1); -+} -+ -+static void __do_clear(void *to, const void *from, int n) -+{ -+ memset(to, 0, n); -+} -+ -+int __do_clear_user(void *mem, unsigned long len, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, -+ __do_clear, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(0); -+ else return(len - (fault - (unsigned long) mem)); -+} -+ -+int __do_strnlen_user(const char *str, unsigned long n, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ int ret; -+ unsigned long *faddrp = (unsigned long *)fault_addr; -+ sigjmp_buf jbuf; -+ -+ *fault_catcher = &jbuf; -+ if(sigsetjmp(jbuf, 1) == 0) -+ ret = strlen(str) + 1; -+ else ret = *faddrp - (unsigned long) str; -+ -+ *fault_addr = NULL; -+ *fault_catcher = NULL; -+ -+ TASK_REGS(get_current())->tt = save; -+ return ret; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tt/unmap.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tt/unmap.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tt/unmap.c 2005-05-03 22:28:14.547398672 +0300 -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <sys/mman.h> -+ -+int switcheroo(int fd, int prot, void *from, void *to, int size) -+{ -+ if(munmap(to, size) < 0){ -+ return(-1); -+ } -+ if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){ -+ return(-1); -+ } -+ if(munmap(from, size) < 0){ -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/tty_log.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/tty_log.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/tty_log.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,230 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) and -+ * geoffrey hing <ghing@net.ohio-state.edu> -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <string.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <sys/time.h> -+#include "init.h" -+#include "user.h" -+#include "kern_util.h" -+#include "os.h" -+ -+#define TTY_LOG_DIR "./" -+ -+/* Set early in boot and then unchanged */ -+static char *tty_log_dir = TTY_LOG_DIR; -+static int tty_log_fd = -1; -+ -+#define TTY_LOG_OPEN 1 -+#define TTY_LOG_CLOSE 2 -+#define TTY_LOG_WRITE 3 -+#define TTY_LOG_EXEC 4 -+ -+#define TTY_READ 1 -+#define TTY_WRITE 2 -+ -+struct tty_log_buf { -+ int what; -+ unsigned long tty; -+ int len; -+ int direction; -+ unsigned long sec; -+ unsigned long usec; -+}; -+ -+int open_tty_log(void *tty, void *current_tty) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; -+ int fd; -+ -+ gettimeofday(&tv, NULL); -+ if(tty_log_fd != -1){ -+ data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, -+ .tty = (unsigned long) tty, -+ .len = sizeof(current_tty), -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ os_write_file(tty_log_fd, ¤t_tty, data.len); -+ return(tty_log_fd); -+ } -+ -+ sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, -+ (unsigned int) tv.tv_usec); -+ -+ fd = os_open_file(buf, of_append(of_create(of_rdwr(OPENFLAGS()))), -+ 0644); -+ if(fd < 0){ -+ printk("open_tty_log : couldn't open '%s', errno = %d\n", -+ buf, -fd); -+ } -+ return(fd); -+} -+ -+void close_tty_log(int fd, void *tty) -+{ -+ struct tty_log_buf data; -+ struct timeval tv; -+ -+ if(tty_log_fd != -1){ -+ gettimeofday(&tv, NULL); -+ data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, -+ .tty = (unsigned long) tty, -+ .len = 0, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ return; -+ } -+ os_close_file(fd); -+} -+ -+static int log_chunk(int fd, const char *buf, int len) -+{ -+ int total = 0, try, missed, n; -+ char chunk[64]; -+ -+ while(len > 0){ -+ try = (len > sizeof(chunk)) ? sizeof(chunk) : len; -+ missed = copy_from_user_proc(chunk, (char *) buf, try); -+ try -= missed; -+ n = os_write_file(fd, chunk, try); -+ if(n != try) { -+ if(n < 0) -+ return(n); -+ return(-EIO); -+ } -+ if(missed != 0) -+ return(-EFAULT); -+ -+ len -= try; -+ total += try; -+ buf += try; -+ } -+ -+ return(total); -+} -+ -+int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ int direction; -+ -+ if(fd == tty_log_fd){ -+ gettimeofday(&tv, NULL); -+ direction = is_read ? TTY_READ : TTY_WRITE; -+ data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = direction, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ } -+ -+ return(log_chunk(fd, buf, len)); -+} -+ -+void log_exec(char **argv, void *tty) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ char **ptr,*arg; -+ int len; -+ -+ if(tty_log_fd == -1) return; -+ -+ gettimeofday(&tv, NULL); -+ -+ len = 0; -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ len += strlen_user_proc(arg); -+ } -+ -+ data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ os_write_file(tty_log_fd, &data, sizeof(data)); -+ -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); -+ } -+} -+ -+extern void register_tty_logger(int (*opener)(void *, void *), -+ int (*writer)(int, const char *, int, -+ void *, int), -+ void (*closer)(int, void *)); -+ -+static int register_logger(void) -+{ -+ register_tty_logger(open_tty_log, write_tty_log, close_tty_log); -+ return(0); -+} -+ -+__uml_initcall(register_logger); -+ -+static int __init set_tty_log_dir(char *name, int *add) -+{ -+ tty_log_dir = name; -+ return 0; -+} -+ -+__uml_setup("tty_log_dir=", set_tty_log_dir, -+"tty_log_dir=<directory>\n" -+" This is used to specify the directory where the logs of all pty\n" -+" data from this UML machine will be written.\n\n" -+); -+ -+static int __init set_tty_log_fd(char *name, int *add) -+{ -+ char *end; -+ -+ tty_log_fd = strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ printf("set_tty_log_fd - strtoul failed on '%s'\n", name); -+ tty_log_fd = -1; -+ } -+ -+ *add = 0; -+ return 0; -+} -+ -+__uml_setup("tty_log_fd=", set_tty_log_fd, -+"tty_log_fd=<fd>\n" -+" This is used to specify a preconfigured file descriptor to which all\n" -+" tty data will be written. Preconfigure the descriptor with something\n" -+" like '10>tty_log tty_log_fd=10'.\n\n" -+); -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/uaccess_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/uaccess_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/uaccess_user.c 2005-05-03 22:28:14.549398368 +0300 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <setjmp.h> -+#include <string.h> -+ -+/* These are here rather than tt/uaccess.c because skas mode needs them in -+ * order to do SIGBUS recovery when a tmpfs mount runs out of room. -+ */ -+ -+unsigned long __do_user_copy(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher, -+ void (*op)(void *to, const void *from, -+ int n), int *faulted_out) -+{ -+ unsigned long *faddrp = (unsigned long *) fault_addr, ret; -+ -+ sigjmp_buf jbuf; -+ *fault_catcher = &jbuf; -+ if(sigsetjmp(jbuf, 1) == 0){ -+ (*op)(to, from, n); -+ ret = 0; -+ *faulted_out = 0; -+ } -+ else { -+ ret = *faddrp; -+ *faulted_out = 1; -+ } -+ *fault_addr = NULL; -+ *fault_catcher = NULL; -+ return ret; -+} -+ -+void __do_copy(void *to, const void *from, int n) -+{ -+ memcpy(to, from, n); -+} -+ -+ -+int __do_copy_to_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher) -+{ -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, -+ __do_copy, &faulted); -+ if(!faulted) return(0); -+ else return(n - (fault - (unsigned long) to)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/um_arch.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/um_arch.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/um_arch.c 2005-05-03 22:28:14.552397912 +0300 -@@ -0,0 +1,445 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/notifier.h" -+#include "linux/mm.h" -+#include "linux/types.h" -+#include "linux/tty.h" -+#include "linux/init.h" -+#include "linux/bootmem.h" -+#include "linux/spinlock.h" -+#include "linux/utsname.h" -+#include "linux/sysrq.h" -+#include "linux/seq_file.h" -+#include "linux/delay.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/ptrace.h" -+#include "asm/elf.h" -+#include "asm/user.h" -+#include "ubd_user.h" -+#include "asm/current.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mem_user.h" -+#include "mem.h" -+#include "umid.h" -+#include "initrd.h" -+#include "init.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+#include "mode.h" -+ -+#define DEFAULT_COMMAND_LINE "root=/dev/ubd0" -+ -+struct cpuinfo_um boot_cpu_data = { -+ .loops_per_jiffy = 0, -+ .pgd_quick = NULL, -+ .pmd_quick = NULL, -+ .pte_quick = NULL, -+ .pgtable_cache_sz = 0, -+ .ipi_pipe = { -1, -1 } -+}; -+ -+unsigned long thread_saved_pc(struct thread_struct *thread) -+{ -+ return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas, -+ thread))); -+} -+ -+static int show_cpuinfo(struct seq_file *m, void *v) -+{ -+ int index = 0; -+ -+#ifdef CONFIG_SMP -+ index = (struct cpuinfo_um *)v - cpu_data; -+ if (!(cpu_online_map & (1 << index))) -+ return 0; -+#endif -+ -+ seq_printf(m, "processor\t: %d\n", index); -+ seq_printf(m, "vendor_id\t: User Mode Linux\n"); -+ seq_printf(m, "model name\t: UML\n"); -+ seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas")); -+ seq_printf(m, "host\t\t: %s\n", host_info); -+ seq_printf(m, "bogomips\t: %lu.%02lu\n\n", -+ loops_per_jiffy/(500000/HZ), -+ (loops_per_jiffy/(5000/HZ)) % 100); -+ -+ return(0); -+} -+ -+static void *c_start(struct seq_file *m, loff_t *pos) -+{ -+ return *pos < NR_CPUS ? cpu_data + *pos : NULL; -+} -+ -+static void *c_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return c_start(m, pos); -+} -+ -+static void c_stop(struct seq_file *m, void *v) -+{ -+} -+ -+struct seq_operations cpuinfo_op = { -+ .start = c_start, -+ .next = c_next, -+ .stop = c_stop, -+ .show = show_cpuinfo, -+}; -+ -+pte_t * __bad_pagetable(void) -+{ -+ panic("Someone should implement __bad_pagetable"); -+ return(NULL); -+} -+ -+/* Set in linux_main */ -+unsigned long host_task_size; -+unsigned long task_size; -+unsigned long uml_start; -+ -+/* Set in early boot */ -+unsigned long uml_physmem; -+unsigned long uml_reserved; -+unsigned long start_vm; -+unsigned long end_vm; -+int ncpus = 1; -+ -+#ifdef CONFIG_MODE_TT -+/* Pointer set in linux_main, the array itself is private to each thread, -+ * and changed at address space creation time so this poses no concurrency -+ * problems. -+ */ -+static char *argv1_begin = NULL; -+static char *argv1_end = NULL; -+#endif -+ -+/* Set in early boot */ -+static int have_root __initdata = 0; -+long physmem_size = 32 * 1024 * 1024; -+ -+void set_cmdline(char *cmd) -+{ -+#ifdef CONFIG_MODE_TT -+ char *umid, *ptr; -+ -+ if(CHOOSE_MODE(honeypot, 0)) return; -+ -+ umid = get_umid(1); -+ if(umid != NULL){ -+ snprintf(argv1_begin, -+ (argv1_end - argv1_begin) * sizeof(*ptr), -+ "(%s) ", umid); -+ ptr = &argv1_begin[strlen(argv1_begin)]; -+ } -+ else ptr = argv1_begin; -+ -+ snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd); -+ memset(argv1_begin + strlen(argv1_begin), '\0', -+ argv1_end - argv1_begin - strlen(argv1_begin)); -+#endif -+} -+ -+static char *usage_string = -+"User Mode Linux v%s\n" -+" available at http://user-mode-linux.sourceforge.net/\n\n"; -+ -+static int __init uml_version_setup(char *line, int *add) -+{ -+ printf("%s\n", system_utsname.release); -+ exit(0); -+} -+ -+__uml_setup("--version", uml_version_setup, -+"--version\n" -+" Prints the version number of the kernel.\n\n" -+); -+ -+static int __init uml_root_setup(char *line, int *add) -+{ -+ have_root = 1; -+ return 0; -+} -+ -+__uml_setup("root=", uml_root_setup, -+"root=<file containing the root fs>\n" -+" This is actually used by the generic kernel in exactly the same\n" -+" way as in any other kernel. If you configure a number of block\n" -+" devices and want to boot off something other than ubd0, you \n" -+" would use something like:\n" -+" root=/dev/ubd5\n\n" -+); -+ -+#ifdef CONFIG_SMP -+static int __init uml_ncpus_setup(char *line, int *add) -+{ -+ if (!sscanf(line, "%d", &ncpus)) { -+ printf("Couldn't parse [%s]\n", line); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+__uml_setup("ncpus=", uml_ncpus_setup, -+"ncpus=<# of desired CPUs>\n" -+" This tells an SMP kernel how many virtual processors to start.\n\n" -+); -+#endif -+ -+int force_tt = 0; -+ -+#if defined(CONFIG_MODE_TT) && defined(CONFIG_MODE_SKAS) -+#define DEFAULT_TT 0 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ force_tt = 1; -+ return(0); -+} -+ -+#else -+#ifdef CONFIG_MODE_SKAS -+ -+#define DEFAULT_TT 0 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); -+ return(0); -+} -+ -+#else -+#ifdef CONFIG_MODE_TT -+ -+#define DEFAULT_TT 1 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); -+ return(0); -+} -+ -+#else -+ -+#error Either CONFIG_MODE_TT or CONFIG_MODE_SKAS must be enabled -+ -+#endif -+#endif -+#endif -+ -+__uml_setup("mode=tt", mode_tt_setup, -+"mode=tt\n" -+" When both CONFIG_MODE_TT and CONFIG_MODE_SKAS are enabled, this option\n" -+" forces UML to run in tt (tracing thread) mode. It is not the default\n" -+" because it's slower and less secure than skas mode.\n\n" -+); -+ -+int mode_tt = DEFAULT_TT; -+ -+static int __init Usage(char *line, int *add) -+{ -+ const char **p; -+ -+ printf(usage_string, system_utsname.release); -+ p = &__uml_help_start; -+ while (p < &__uml_help_end) { -+ printf("%s", *p); -+ p++; -+ } -+ exit(0); -+} -+ -+__uml_setup("--help", Usage, -+"--help\n" -+" Prints this message.\n\n" -+); -+ -+static int __init uml_checksetup(char *line, int *add) -+{ -+ struct uml_param *p; -+ -+ p = &__uml_setup_start; -+ while(p < &__uml_setup_end) { -+ int n; -+ -+ n = strlen(p->str); -+ if(!strncmp(line, p->str, n)){ -+ if (p->setup_func(line + n, add)) return 1; -+ } -+ p++; -+ } -+ return 0; -+} -+ -+static void __init uml_postsetup(void) -+{ -+ initcall_t *p; -+ -+ p = &__uml_postsetup_start; -+ while(p < &__uml_postsetup_end){ -+ (*p)(); -+ p++; -+ } -+ return; -+} -+ -+/* Set during early boot */ -+unsigned long brk_start; -+unsigned long end_iomem; -+ -+#define MIN_VMALLOC (32 * 1024 * 1024) -+ -+int linux_main(int argc, char **argv) -+{ -+ unsigned long avail, diff; -+ unsigned long virtmem_size, max_physmem; -+ unsigned int i, add; -+ -+ for (i = 1; i < argc; i++){ -+ if((i == 1) && (argv[i][0] == ' ')) continue; -+ add = 1; -+ uml_checksetup(argv[i], &add); -+ if(add) add_arg(saved_command_line, argv[i]); -+ } -+ if(have_root == 0) add_arg(saved_command_line, DEFAULT_COMMAND_LINE); -+ -+ mode_tt = force_tt ? 1 : !can_do_skas(); -+ uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0, -+ &host_task_size, &task_size); -+ -+ /* Need to check this early because mmapping happens before the -+ * kernel is running. -+ */ -+ check_tmpexec(); -+ -+ brk_start = (unsigned long) sbrk(0); -+ CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start); -+ /* Increase physical memory size for exec-shield users -+ so they actually get what they asked for. This should -+ add zero for non-exec shield users */ -+ -+ diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); -+ if(diff > 1024 * 1024){ -+ printf("Adding %ld bytes to physical memory to account for " -+ "exec-shield gap\n", diff); -+ physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); -+ } -+ -+ uml_physmem = uml_start; -+ -+ /* Reserve up to 4M after the current brk */ -+ uml_reserved = ROUND_4M(brk_start) + (1 << 22); -+ -+ setup_machinename(system_utsname.machine); -+ -+#ifdef CONFIG_MODE_TT -+ argv1_begin = argv[1]; -+ argv1_end = &argv[1][strlen(argv[1])]; -+#endif -+ -+ highmem = 0; -+ iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; -+ max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; -+ -+ /* Zones have to begin on a 1 << MAX_ORDER page boundary, -+ * so this makes sure that's true for highmem -+ */ -+ max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); -+ if(physmem_size + iomem_size > max_physmem){ -+ highmem = physmem_size + iomem_size - max_physmem; -+ physmem_size -= highmem; -+#ifndef CONFIG_HIGHMEM -+ highmem = 0; -+ printf("CONFIG_HIGHMEM not enabled - physical memory shrunk " -+ "to %ld bytes\n", physmem_size); -+#endif -+ } -+ -+ high_physmem = uml_physmem + physmem_size; -+ end_iomem = high_physmem + iomem_size; -+ high_memory = (void *) end_iomem; -+ -+ start_vm = VMALLOC_START; -+ -+ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); -+ if(init_maps(physmem_size, iomem_size, highmem)){ -+ printf("Failed to allocate mem_map for %ld bytes of physical " -+ "memory and %ld bytes of highmem\n", physmem_size, -+ highmem); -+ exit(1); -+ } -+ -+ virtmem_size = physmem_size; -+ avail = get_kmem_end() - start_vm; -+ if(physmem_size > avail) virtmem_size = avail; -+ end_vm = start_vm + virtmem_size; -+ -+ if(virtmem_size < physmem_size) -+ printf("Kernel virtual memory size shrunk to %ld bytes\n", -+ virtmem_size); -+ -+ uml_postsetup(); -+ -+ task_protections((unsigned long) &init_task); -+ os_flush_stdout(); -+ -+ return(CHOOSE_MODE(start_uml_tt(), start_uml_skas())); -+} -+ -+extern int uml_exitcode; -+ -+static int panic_exit(struct notifier_block *self, unsigned long unused1, -+ void *unused2) -+{ -+#ifdef CONFIG_MAGIC_SYSRQ -+ handle_sysrq('p', ¤t->thread.regs, NULL, NULL); -+#endif -+ uml_exitcode = 1; -+ machine_halt(); -+ return(0); -+} -+ -+static struct notifier_block panic_exit_notifier = { -+ .notifier_call = panic_exit, -+ .next = NULL, -+ .priority = 0 -+}; -+ -+void __init setup_arch(char **cmdline_p) -+{ -+ notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); -+ paging_init(); -+ strcpy(command_line, saved_command_line); -+ *cmdline_p = command_line; -+ setup_hostinfo(); -+} -+ -+void __init check_bugs(void) -+{ -+ arch_check_bugs(); -+ check_ptrace(); -+ check_sigio(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/umid.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/umid.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/umid.c 2005-05-03 22:28:14.554397608 +0300 -@@ -0,0 +1,328 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <errno.h> -+#include <string.h> -+#include <stdlib.h> -+#include <dirent.h> -+#include <signal.h> -+#include <sys/stat.h> -+#include <sys/param.h> -+#include "user.h" -+#include "umid.h" -+#include "init.h" -+#include "os.h" -+#include "user_util.h" -+#include "choose-mode.h" -+ -+#define UMID_LEN 64 -+#define UML_DIR "~/.uml/" -+ -+/* Changed by set_umid and make_umid, which are run early in boot */ -+static char umid[UMID_LEN] = { 0 }; -+ -+/* Changed by set_uml_dir and make_uml_dir, which are run early in boot */ -+static char *uml_dir = UML_DIR; -+ -+/* Changed by set_umid */ -+static int umid_is_random = 1; -+static int umid_inited = 0; -+ -+static int make_umid(int (*printer)(const char *fmt, ...)); -+ -+static int __init set_umid(char *name, int is_random, -+ int (*printer)(const char *fmt, ...)) -+{ -+ if(umid_inited){ -+ (*printer)("Unique machine name can't be set twice\n"); -+ return(-1); -+ } -+ -+ if(strlen(name) > UMID_LEN - 1) -+ (*printer)("Unique machine name is being truncated to %d " -+ "characters\n", UMID_LEN); -+ strncpy(umid, name, UMID_LEN - 1); -+ umid[UMID_LEN - 1] = '\0'; -+ -+ umid_is_random = is_random; -+ umid_inited = 1; -+ return 0; -+} -+ -+static int __init set_umid_arg(char *name, int *add) -+{ -+ *add = 0; -+ return(set_umid(name, 0, printf)); -+} -+ -+__uml_setup("umid=", set_umid_arg, -+"umid=<name>\n" -+" This is used to assign a unique identity to this UML machine and\n" -+" is used for naming the pid file and management console socket.\n\n" -+); -+ -+int __init umid_file_name(char *name, char *buf, int len) -+{ -+ int n; -+ -+ if(!umid_inited && make_umid(printk)) return(-1); -+ -+ n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; -+ if(n > len){ -+ printk("umid_file_name : buffer too short\n"); -+ return(-1); -+ } -+ -+ sprintf(buf, "%s%s/%s", uml_dir, umid, name); -+ return(0); -+} -+ -+extern int tracing_pid; -+ -+static int __init create_pid_file(void) -+{ -+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; -+ char pid[sizeof("nnnnn\0")]; -+ int fd, n; -+ -+ if(umid_file_name("pid", file, sizeof(file))) return 0; -+ -+ fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), -+ 0644); -+ if(fd < 0){ -+ printf("Open of machine pid file \"%s\" failed - " -+ "err = %d\n", file, -fd); -+ return 0; -+ } -+ -+ sprintf(pid, "%d\n", os_getpid()); -+ n = os_write_file(fd, pid, strlen(pid)); -+ if(n != strlen(pid)) -+ printf("Write of pid file failed - err = %d\n", -n); -+ os_close_file(fd); -+ return 0; -+} -+ -+static int actually_do_remove(char *dir) -+{ -+ DIR *directory; -+ struct dirent *ent; -+ int len; -+ char file[256]; -+ -+ directory = opendir(dir); -+ if(directory == NULL){ -+ printk("actually_do_remove : couldn't open directory '%s', " -+ "errno = %d\n", dir, errno); -+ return(1); -+ } -+ while((ent = readdir(directory)) != NULL){ -+ if(!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) -+ continue; -+ len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1; -+ if(len > sizeof(file)){ -+ printk("Not deleting '%s' from '%s' - name too long\n", -+ ent->d_name, dir); -+ continue; -+ } -+ sprintf(file, "%s/%s", dir, ent->d_name); -+ if(unlink(file) < 0){ -+ printk("actually_do_remove : couldn't remove '%s' " -+ "from '%s', errno = %d\n", ent->d_name, dir, -+ errno); -+ return(1); -+ } -+ } -+ if(rmdir(dir) < 0){ -+ printk("actually_do_remove : couldn't rmdir '%s', " -+ "errno = %d\n", dir, errno); -+ return(1); -+ } -+ return(0); -+} -+ -+void remove_umid_dir(void) -+{ -+ char dir[strlen(uml_dir) + UMID_LEN + 1]; -+ if(!umid_inited) return; -+ -+ sprintf(dir, "%s%s", uml_dir, umid); -+ actually_do_remove(dir); -+} -+ -+char *get_umid(int only_if_set) -+{ -+ if(only_if_set && umid_is_random) return(NULL); -+ return(umid); -+} -+ -+int not_dead_yet(char *dir) -+{ -+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; -+ char pid[sizeof("nnnnn\0")], *end; -+ int dead, fd, p, n; -+ -+ sprintf(file, "%s/pid", dir); -+ dead = 0; -+ fd = os_open_file(file, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ if(fd != -ENOENT){ -+ printk("not_dead_yet : couldn't open pid file '%s', " -+ "err = %d\n", file, -fd); -+ return(1); -+ } -+ dead = 1; -+ } -+ if(fd > 0){ -+ n = os_read_file(fd, pid, sizeof(pid)); -+ if(n < 0){ -+ printk("not_dead_yet : couldn't read pid file '%s', " -+ "err = %d\n", file, -n); -+ return(1); -+ } -+ p = strtoul(pid, &end, 0); -+ if(end == pid){ -+ printk("not_dead_yet : couldn't parse pid file '%s', " -+ "errno = %d\n", file, errno); -+ dead = 1; -+ } -+ if(((kill(p, 0) < 0) && (errno == ESRCH)) || -+ (p == CHOOSE_MODE(tracing_pid, os_getpid()))) -+ dead = 1; -+ } -+ if(!dead) return(1); -+ return(actually_do_remove(dir)); -+} -+ -+static int __init set_uml_dir(char *name, int *add) -+{ -+ if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ -+ uml_dir = malloc(strlen(name) + 2); -+ if(uml_dir == NULL){ -+ printf("Failed to malloc uml_dir - error = %d\n", -+ errno); -+ uml_dir = name; -+ /* Return 0 here because do_initcalls doesn't look at -+ * the return value. -+ */ -+ return(0); -+ } -+ sprintf(uml_dir, "%s/", name); -+ } -+ else uml_dir = name; -+ return(0); -+} -+ -+static int __init make_uml_dir(void) -+{ -+ char dir[MAXPATHLEN + 1] = { '\0' }; -+ int len; -+ -+ if(*uml_dir == '~'){ -+ char *home = getenv("HOME"); -+ -+ if(home == NULL){ -+ printf("make_uml_dir : no value in environment for " -+ "$HOME\n"); -+ exit(1); -+ } -+ strncpy(dir, home, sizeof(dir)); -+ uml_dir++; -+ } -+ len = strlen(dir); -+ strncat(dir, uml_dir, sizeof(dir) - len); -+ len = strlen(dir); -+ if((len > 0) && (len < sizeof(dir) - 1) && (dir[len - 1] != '/')){ -+ dir[len] = '/'; -+ dir[len + 1] = '\0'; -+ } -+ -+ uml_dir = malloc(strlen(dir) + 1); -+ if(uml_dir == NULL){ -+ printf("make_uml_dir : malloc failed, errno = %d\n", errno); -+ exit(1); -+ } -+ strcpy(uml_dir, dir); -+ -+ if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ -+ printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno); -+ return(-1); -+ } -+ return 0; -+} -+ -+static int __init make_umid(int (*printer)(const char *fmt, ...)) -+{ -+ int fd, err; -+ char tmp[strlen(uml_dir) + UMID_LEN + 1]; -+ -+ strncpy(tmp, uml_dir, sizeof(tmp) - 1); -+ tmp[sizeof(tmp) - 1] = '\0'; -+ -+ if(!umid_inited){ -+ strcat(tmp, "XXXXXX"); -+ fd = mkstemp(tmp); -+ if(fd < 0){ -+ (*printer)("make_umid - mkstemp failed, errno = %d\n", -+ errno); -+ return(1); -+ } -+ -+ os_close_file(fd); -+ /* There's a nice tiny little race between this unlink and -+ * the mkdir below. It'd be nice if there were a mkstemp -+ * for directories. -+ */ -+ unlink(tmp); -+ set_umid(&tmp[strlen(uml_dir)], 1, printer); -+ } -+ -+ sprintf(tmp, "%s%s", uml_dir, umid); -+ -+ err = mkdir(tmp, 0777); -+ if(err < 0){ -+ if(errno == EEXIST){ -+ if(not_dead_yet(tmp)){ -+ (*printer)("umid '%s' is in use\n", umid); -+ return(-1); -+ } -+ err = mkdir(tmp, 0777); -+ } -+ } -+ if(err < 0){ -+ (*printer)("Failed to create %s - errno = %d\n", umid, errno); -+ return(-1); -+ } -+ -+ return(0); -+} -+ -+__uml_setup("uml_dir=", set_uml_dir, -+"uml_dir=<directory>\n" -+" The location to place the pid and umid files.\n\n" -+); -+ -+__uml_postsetup(make_uml_dir); -+ -+static int __init make_umid_setup(void) -+{ -+ return(make_umid(printf)); -+} -+ -+__uml_postsetup(make_umid_setup); -+__uml_postsetup(create_pid_file); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/kernel/user_syms.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/user_syms.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/user_syms.c 2005-05-03 23:38:38.888201256 +0300 -@@ -0,0 +1,85 @@ -+#include "linux/types.h" -+#include "linux/module.h" -+ -+/* XXX Deleted a number of symbols because they clashed strangely with headers -+ * Add them back later. -+ */ -+ -+#if 1 -+/* Some of this are builtin function (some are not but could in the future), -+ * so I *must* declare good prototypes for them and then EXPORT them. -+ * The kernel code uses the macro defined by include/linux/string.h, -+ * so I undef macros; the userspace code does not include that and I -+ * add an EXPORT for the glibc one.*/ -+ -+#undef strlen -+#undef memcpy -+#undef memset -+ -+//extern size_t strlen(const char *); -+extern void *memcpy(void *, const void *, size_t); -+extern void *memset(void *, int, size_t); -+//extern int printf(const char *, ...); -+ -+//EXPORT_SYMBOL(strlen); -+EXPORT_SYMBOL(memset); -+EXPORT_SYMBOL(memcpy); -+#undef strstr -+ -+EXPORT_SYMBOL(strstr); -+ -+#endif -+ -+/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms. -+ * However, the modules will use the CRC defined *here*, no matter if it is -+ * good; so the versions of these symbols will always match -+ */ -+#define EXPORT_SYMBOL_PROTO(sym) \ -+ int sym(void); \ -+ EXPORT_SYMBOL(sym); -+ -+EXPORT_SYMBOL_PROTO(__errno_location); -+ -+EXPORT_SYMBOL_PROTO(access); -+EXPORT_SYMBOL_PROTO(open); -+EXPORT_SYMBOL_PROTO(open64); -+EXPORT_SYMBOL_PROTO(close); -+EXPORT_SYMBOL_PROTO(read); -+EXPORT_SYMBOL_PROTO(write); -+EXPORT_SYMBOL_PROTO(dup2); -+EXPORT_SYMBOL_PROTO(__xstat); -+EXPORT_SYMBOL_PROTO(__lxstat); -+EXPORT_SYMBOL_PROTO(__lxstat64); -+EXPORT_SYMBOL_PROTO(lseek); -+EXPORT_SYMBOL_PROTO(lseek64); -+EXPORT_SYMBOL_PROTO(chown); -+EXPORT_SYMBOL_PROTO(truncate); -+EXPORT_SYMBOL_PROTO(utime); -+EXPORT_SYMBOL_PROTO(chmod); -+EXPORT_SYMBOL_PROTO(rename); -+EXPORT_SYMBOL_PROTO(__xmknod); -+ -+EXPORT_SYMBOL_PROTO(symlink); -+EXPORT_SYMBOL_PROTO(link); -+EXPORT_SYMBOL_PROTO(unlink); -+EXPORT_SYMBOL_PROTO(readlink); -+ -+EXPORT_SYMBOL_PROTO(mkdir); -+EXPORT_SYMBOL_PROTO(rmdir); -+EXPORT_SYMBOL_PROTO(opendir); -+EXPORT_SYMBOL_PROTO(readdir); -+EXPORT_SYMBOL_PROTO(closedir); -+EXPORT_SYMBOL_PROTO(seekdir); -+EXPORT_SYMBOL_PROTO(telldir); -+ -+EXPORT_SYMBOL_PROTO(ioctl); -+ -+EXPORT_SYMBOL_PROTO(pread64); -+EXPORT_SYMBOL_PROTO(pwrite64); -+ -+EXPORT_SYMBOL_PROTO(statfs); -+EXPORT_SYMBOL_PROTO(statfs64); -+ -+EXPORT_SYMBOL_PROTO(getuid); -+ -+EXPORT_SYMBOL_PROTO(printf); -Index: linux-2.4.29/arch/um/kernel/user_util.c -=================================================================== ---- linux-2.4.29.orig/arch/um/kernel/user_util.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/kernel/user_util.c 2005-05-03 22:28:14.556397304 +0300 -@@ -0,0 +1,188 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <limits.h> -+#include <setjmp.h> -+#include <sys/mman.h> -+#include <sys/stat.h> -+#include <sys/ptrace.h> -+#include <sys/utsname.h> -+#include <sys/param.h> -+#include <sys/time.h> -+#include "asm/types.h" -+#include <ctype.h> -+#include <signal.h> -+#include <wait.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <sched.h> -+#include <termios.h> -+#include <string.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "mem_user.h" -+#include "init.h" -+#include "helper.h" -+#include "uml-config.h" -+ -+#define COMMAND_LINE_SIZE _POSIX_ARG_MAX -+ -+/* Changed in linux_main and setup_arch, which run before SMP is started */ -+char saved_command_line[COMMAND_LINE_SIZE] = { 0 }; -+char command_line[COMMAND_LINE_SIZE] = { 0 }; -+ -+void add_arg(char *cmd_line, char *arg) -+{ -+ if (strlen(cmd_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { -+ printf("add_arg: Too much command line!\n"); -+ exit(1); -+ } -+ if(strlen(cmd_line) > 0) strcat(cmd_line, " "); -+ strcat(cmd_line, arg); -+} -+ -+void stop(void) -+{ -+ while(1) sleep(1000000); -+} -+ -+void stack_protections(unsigned long address) -+{ -+ int prot = PROT_READ | PROT_WRITE | PROT_EXEC; -+ -+ if(mprotect((void *) address, page_size(), prot) < 0) -+ panic("protecting stack failed, errno = %d", errno); -+} -+ -+void task_protections(unsigned long address) -+{ -+ unsigned long guard = address + page_size(); -+ unsigned long stack = guard + page_size(); -+ int prot = 0, pages; -+#ifdef notdef -+ if(mprotect((void *) guard, page_size(), prot) < 0) -+ panic("protecting guard page failed, errno = %d", errno); -+#endif -+ pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER) - 2; -+ prot = PROT_READ | PROT_WRITE | PROT_EXEC; -+ if(mprotect((void *) stack, pages * page_size(), prot) < 0) -+ panic("protecting stack failed, errno = %d", errno); -+} -+ -+int wait_for_stop(int pid, int sig, int cont_type, void *relay) -+{ -+ sigset_t *relay_signals = relay; -+ int status, ret; -+ -+ while(1){ -+ CATCH_EINTR(ret = waitpid(pid, &status, WUNTRACED)); -+ if((ret < 0) || -+ !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ -+ if(ret < 0){ -+ printk("wait failed, errno = %d\n", -+ errno); -+ } -+ else if(WIFEXITED(status)) -+ printk("process %d exited with status %d\n", -+ pid, WEXITSTATUS(status)); -+ else if(WIFSIGNALED(status)) -+ printk("process %d exited with signal %d\n", -+ pid, WTERMSIG(status)); -+ else if((WSTOPSIG(status) == SIGVTALRM) || -+ (WSTOPSIG(status) == SIGALRM) || -+ (WSTOPSIG(status) == SIGIO) || -+ (WSTOPSIG(status) == SIGPROF) || -+ (WSTOPSIG(status) == SIGCHLD) || -+ (WSTOPSIG(status) == SIGWINCH) || -+ (WSTOPSIG(status) == SIGINT)){ -+ ptrace(cont_type, pid, 0, WSTOPSIG(status)); -+ continue; -+ } -+ else if((relay_signals != NULL) && -+ sigismember(relay_signals, WSTOPSIG(status))){ -+ ptrace(cont_type, pid, 0, WSTOPSIG(status)); -+ continue; -+ } -+ else printk("process %d stopped with signal %d\n", -+ pid, WSTOPSIG(status)); -+ panic("wait_for_stop failed to wait for %d to stop " -+ "with %d\n", pid, sig); -+ } -+ return(status); -+ } -+} -+ -+int raw(int fd) -+{ -+ struct termios tt; -+ int err; -+ -+ CATCH_EINTR(err = tcgetattr(fd, &tt)); -+ if (err < 0) { -+ printk("tcgetattr failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ cfmakeraw(&tt); -+ -+ CATCH_EINTR(err = tcsetattr(fd, TCSADRAIN, &tt)); -+ if (err < 0) { -+ printk("tcsetattr failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ /* XXX tcsetattr could have applied only some changes -+ * (and cfmakeraw() is a set of changes) */ -+ return(0); -+} -+ -+void setup_machinename(char *machine_out) -+{ -+ struct utsname host; -+ -+ uname(&host); -+ strcpy(machine_out, host.machine); -+} -+ -+char host_info[(_UTSNAME_LENGTH + 1) * 4 + _UTSNAME_NODENAME_LENGTH + 1]; -+ -+void setup_hostinfo(void) -+{ -+ struct utsname host; -+ -+ uname(&host); -+ sprintf(host_info, "%s %s %s %s %s", host.sysname, host.nodename, -+ host.release, host.version, host.machine); -+} -+ -+int setjmp_wrapper(void (*proc)(void *, void *), ...) -+{ -+ va_list args; -+ sigjmp_buf buf; -+ int n; -+ -+ n = sigsetjmp(buf, 1); -+ if(n == 0){ -+ va_start(args, proc); -+ (*proc)(&buf, &args); -+ } -+ va_end(args); -+ return(n); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,191 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+OS := $(shell uname -s) -+ -+ARCH_DIR = arch/um -+ -+core-y := kernel sys-$(SUBARCH) os-$(OS) -+drivers-y := fs drivers -+subdir-y := $(core-y) $(drivers-y) -+SUBDIRS += $(foreach dir,$(subdir-y),$(ARCH_DIR)/$(dir)) -+ -+CORE_FILES += $(foreach dir,$(core-y),$(ARCH_DIR)/$(dir)/built-in.o) -+DRIVERS += $(foreach dir,$(drivers-y),$(ARCH_DIR)/$(dir)/built-in.o) -+ -+include $(ARCH_DIR)/Makefile-$(SUBARCH) -+include $(ARCH_DIR)/Makefile-os-$(OS) -+ -+MAKEFILE-$(CONFIG_MODE_TT) += Makefile-tt -+MAKEFILE-$(CONFIG_MODE_SKAS) += Makefile-skas -+ -+ifneq ($(MAKEFILE-y),) -+ include $(addprefix $(ARCH_DIR)/,$(MAKEFILE-y)) -+endif -+ -+EXTRAVERSION := $(EXTRAVERSION)-1um -+ -+include/linux/version.h: arch/$(ARCH)/Makefile -+ -+# We require bash because the vmlinux link and loader script cpp use bash -+# features. -+SHELL := /bin/bash -+ -+# Recalculate MODLIB to reflect the EXTRAVERSION changes (via KERNELRELEASE) -+# The way the toplevel Makefile is written EXTRAVERSION is not supposed -+# to be changed outside the toplevel Makefile, but recalculating MODLIB is -+# a sufficient workaround until we no longer need architecture dependent -+# EXTRAVERSION... -+MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) -+ -+ifeq ($(CONFIG_DEBUGSYM),y) -+CFLAGS := $(subst -fomit-frame-pointer,,$(CFLAGS)) -+endif -+ -+CFLAGS-$(CONFIG_DEBUGSYM) += -g -+ -+ARCH_INCLUDE = -I$(TOPDIR)/$(ARCH_DIR)/include -+ -+# -Derrno=kernel_errno - This turns all kernel references to errno into -+# kernel_errno to separate them from the libc errno. This allows -fno-common -+# in CFLAGS. Otherwise, it would cause ld to complain about the two different -+# errnos. -+ -+CFLAGS += $(ARCH_CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ -+ -D_LARGEFILE64_SOURCE $(ARCH_INCLUDE) -Derrno=kernel_errno \ -+ $(MODE_INCLUDE) -+ -+check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) -+ -+CFLAGS += $(call check_gcc,-fno-unit-at-a-time,) -+ -+LINKFLAGS += -r -+ -+LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc -+ -+# These are needed for clean and mrproper, since in that case .config is not -+# included; the values here are meaningless -+ -+CONFIG_NEST_LEVEL ?= 0 -+CONFIG_KERNEL_HALF_GIGS ?= 0 -+ -+SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000) -+ -+# These aren't in Makefile-tt because they are needed in the !CONFIG_MODE_TT + -+# CONFIG_MODE_SKAS + CONFIG_STATIC_LINK case. -+ -+LINK_TT = -static -+LD_SCRIPT_TT := link.ld -+ -+ifeq ($(CONFIG_STATIC_LINK),y) -+ LINK-y += $(LINK_TT) -+ LD_SCRIPT-y := $(LD_SCRIPT_TT) -+else -+ifeq ($(CONFIG_MODE_TT),y) -+ LINK-y += $(LINK_TT) -+ LD_SCRIPT-y := $(LD_SCRIPT_TT) -+else -+ifeq ($(CONFIG_MODE_SKAS),y) -+ LINK-y += $(LINK_SKAS) -+ LD_SCRIPT-y := $(LD_SCRIPT_SKAS) -+endif -+endif -+endif -+ -+LD_SCRIPT-y := $(ARCH_DIR)/kernel/$(LD_SCRIPT-y) -+M4_MODE_TT := $(shell [ "$(CONFIG_MODE_TT)" = "y" ] && echo -DMODE_TT) -+ -+ifndef START -+ START = $$(($(TOP_ADDR) - $(SIZE))) -+endif -+ -+$(LD_SCRIPT-y): $(LD_SCRIPT-y).in -+ pages=$$(( 1 << $(CONFIG_KERNEL_STACK_ORDER) )) ; \ -+ m4 -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \ -+ -DELF_FORMAT=$(ELF_FORMAT) $(M4_MODE_TT) \ -+ -DKERNEL_STACK_SIZE=$$(( 4096 * $$pages )) $< > $@ -+ -+SYMLINK_HEADERS = archparam.h system.h sigcontext.h processor.h ptrace.h \ -+ arch-signal.h -+SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header)) -+ -+ARCH_SYMLINKS = include/asm-um/arch arch/um/include/sysdep arch/um/os \ -+ $(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h -+ -+ifeq ($(CONFIG_MODE_SKAS), y) -+$(SYS_HEADERS) : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+endif -+ -+GEN_HEADERS += $(ARCH_DIR)/include/task.h $(ARCH_DIR)/include/kern_constants.h -+ -+setup: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) -+ -+linux: setup vmlinux $(LD_SCRIPT-y) -+ mv vmlinux vmlinux.o -+ $(CC) -Wl,-T,$(LD_SCRIPT-y) $(LINK-y) $(LINK_WRAPS) \ -+ -o linux vmlinux.o -L/usr/lib -lutil -+ -+USER_CFLAGS := $(patsubst -I%,,$(CFLAGS)) -+USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS)) -+USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \ -+ $(MODE_INCLUDE) -+ -+# To get a definition of F_SETSIG -+USER_CFLAGS += -D_GNU_SOURCE -+ -+CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/link.ld $(ARCH_DIR)/dyn_link.ld \ -+ $(GEN_HEADERS) -+# $(ARCH_DIR)/include/uml-config.h removed temporarily because this causes -+# make to fail after make clean -+ -+archmrproper: -+ rm -f $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) include/asm \ -+ $(LD_SCRIPT) $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) -+ -+archclean: sysclean -+ find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -+ -o -name '*.gcov' \) -type f -print | xargs rm -f -+ cd $(ARCH_DIR) ; \ -+ for dir in $(subdir-y) util ; do $(MAKE) -C $$dir clean; done -+ -+archdep: -+ -+$(SYMLINK_HEADERS): -+ cd $(TOPDIR)/$(dir $@) ; \ -+ ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@) -+ -+include/asm-um/arch: -+ cd $(TOPDIR)/include/asm-um && ln -sf ../asm-$(SUBARCH) arch -+ -+arch/um/include/sysdep: -+ cd $(TOPDIR)/arch/um/include && ln -sf sysdep-$(SUBARCH) sysdep -+ -+arch/um/os: -+ cd $(ARCH_DIR) && ln -sf os-$(OS) os -+ -+$(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task -+ $< > $@ -+ -+$(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants -+ $< > $@ -+ -+$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h -+ sed 's/ CONFIG/ UML_CONFIG/' $(TOPDIR)/include/linux/autoconf.h > $@ -+ -+$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/util/mk_task_user.c \ -+ $(ARCH_DIR)/util/mk_task_kern.c $(SYS_HEADERS) -+ $(MAKE) $(MFLAGS) -C $(ARCH_DIR)/util mk_task -+ -+$(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util/mk_constants_user.c \ -+ $(ARCH_DIR)/util/mk_constants_kern.c -+ $(MAKE) $(MFLAGS) -C $(ARCH_DIR)/util mk_constants -+ -+export SUBARCH USER_CFLAGS OS -+ -+all: linux -+ -+define archhelp -+ echo '* linux - Binary kernel image (./linux)' -+endef -Index: linux-2.4.29/arch/um/Makefile-i386 -=================================================================== ---- linux-2.4.29.orig/arch/um/Makefile-i386 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/Makefile-i386 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,46 @@ -+ifeq ($(CONFIG_HOST_2G_2G),y) -+TOP_ADDR = 0x80000000 -+else -+TOP_ADDR = 0xc0000000 -+endif -+ -+ifeq ($(CONFIG_MODE_SKAS),y) -+ ifneq ($(CONFIG_MODE_TT),y) -+ START = 0x8048000 -+ endif -+endif -+ -+ARCH_CFLAGS = -U__$(SUBARCH)__ -U$(SUBARCH) -+ -+ifneq ($(CONFIG_GPROF),y) -+ARCH_CFLAGS += -DUM_FASTCALL -+endif -+ -+ELF_ARCH = $(SUBARCH) -+ELF_FORMAT = elf32-$(SUBARCH) -+ -+I386_H = $(ARCH_DIR)/include/sysdep-i386 -+SYS = $(ARCH_DIR)/sys-i386 -+UTIL = $(SYS)/util -+SUBDIRS += $(UTIL) -+ -+SYS_HEADERS = $(I386_H)/sc.h $(I386_H)/thread.h -+ -+$(I386_H)/sc.h : $(UTIL)/mk_sc -+ $(UTIL)/mk_sc > $@ -+ -+$(I386_H)/thread.h : $(UTIL)/mk_thread -+ $(UTIL)/mk_thread > $@ -+ -+$(UTIL)/mk_sc : $(UTIL)/mk_sc.c -+ $(MAKE) -C $(UTIL) mk_sc -+ -+$(UTIL)/mk_thread : $(UTIL)/mk_thread_user.c $(UTIL)/mk_thread_kern.c \ -+ $(I386_H)/sc.h -+ $(MAKE) -C $(UTIL) mk_thread -+ -+sysclean : -+ rm -f $(SYS_HEADERS) -+ $(MAKE) -C $(UTIL) clean -+ $(MAKE) -C $(SYS) clean -+ -Index: linux-2.4.29/arch/um/Makefile-ia64 -=================================================================== ---- linux-2.4.29.orig/arch/um/Makefile-ia64 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/Makefile-ia64 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1 @@ -+START_ADDR = 0x1000000000000000 -Index: linux-2.4.29/arch/um/Makefile-os-Linux -=================================================================== ---- linux-2.4.29.orig/arch/um/Makefile-os-Linux 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/Makefile-os-Linux 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,7 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+SUBDIRS += $(ARCH_DIR)/os-$(OS)/drivers -+DRIVERS += $(ARCH_DIR)/os-$(OS)/drivers/drivers.o -Index: linux-2.4.29/arch/um/Makefile-ppc -=================================================================== ---- linux-2.4.29.orig/arch/um/Makefile-ppc 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/Makefile-ppc 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,9 @@ -+ifeq ($(CONFIG_HOST_2G_2G), y) -+START_ADDR = 0x80000000 -+else -+START_ADDR = 0xc0000000 -+endif -+ARCH_CFLAGS = -U__powerpc__ -D__UM_PPC__ -+ -+# The arch is ppc, but the elf32 name is powerpc -+ELF_SUBARCH = powerpc -Index: linux-2.4.29/arch/um/Makefile-skas -=================================================================== ---- linux-2.4.29.orig/arch/um/Makefile-skas 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/Makefile-skas 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,20 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+PROFILE += -pg -+ -+CFLAGS-$(CONFIG_GCOV) += -fprofile-arcs -ftest-coverage -+CFLAGS-$(CONFIG_GPROF) += $(PROFILE) -+LINK-$(CONFIG_GPROF) += $(PROFILE) -+ -+MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/kernel/skas/include -+ -+LINK_SKAS = -Wl,-rpath,/lib -+LD_SCRIPT_SKAS = dyn_link.ld -+ -+GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+ -+$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h : -+ $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h -Index: linux-2.4.29/arch/um/Makefile-tt -=================================================================== ---- linux-2.4.29.orig/arch/um/Makefile-tt 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/Makefile-tt 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,7 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/kernel/tt/include -+ -Index: linux-2.4.29/arch/um/os-Linux/aio.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/aio.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/aio.c 2005-05-03 22:28:14.563396240 +0300 -@@ -0,0 +1,404 @@ -+/* -+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <unistd.h> -+#include <signal.h> -+#include <errno.h> -+#include <sched.h> -+#include <sys/syscall.h> -+#include "os.h" -+#include "helper.h" -+#include "aio.h" -+#include "init.h" -+#include "user.h" -+#include "mode.h" -+ -+struct aio_thread_req { -+ enum aio_type type; -+ int io_fd; -+ unsigned long long offset; -+ char *buf; -+ int len; -+ int reply_fd; -+ void *data; -+}; -+ -+static int aio_req_fd_r = -1; -+static int aio_req_fd_w = -1; -+ -+#if defined(HAVE_AIO_ABI) -+#include <linux/aio_abi.h> -+ -+/* If we have the headers, we are going to build with AIO enabled. -+ * If we don't have aio in libc, we define the necessary stubs here. -+ */ -+ -+#if !defined(HAVE_AIO_LIBC) -+ -+#define __NR_io_setup 245 -+#define __NR_io_getevents 247 -+#define __NR_io_submit 248 -+ -+static long io_setup(int n, aio_context_t *ctxp) -+{ -+ return(syscall(__NR_io_setup, n, ctxp)); -+} -+ -+static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) -+{ -+ return(syscall(__NR_io_submit, ctx, nr, iocbpp)); -+} -+ -+static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, -+ struct io_event *events, struct timespec *timeout) -+{ -+ return(syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout)); -+} -+ -+#endif -+ -+/* The AIO_MMAP cases force the mmapped page into memory here -+ * rather than in whatever place first touches the data. I used -+ * to do this by touching the page, but that's delicate because -+ * gcc is prone to optimizing that away. So, what's done here -+ * is we read from the descriptor from which the page was -+ * mapped. The caller is required to pass an offset which is -+ * inside the page that was mapped. Thus, when the read -+ * returns, we know that the page is in the page cache, and -+ * that it now backs the mmapped area. -+ */ -+ -+static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, -+ int len, unsigned long long offset, void *data) -+{ -+ struct iocb iocb, *iocbp = &iocb; -+ char c; -+ int err; -+ -+ iocb = ((struct iocb) { .aio_data = (unsigned long) data, -+ .aio_reqprio = 0, -+ .aio_fildes = fd, -+ .aio_buf = (unsigned long) buf, -+ .aio_nbytes = len, -+ .aio_offset = offset, -+ .aio_reserved1 = 0, -+ .aio_reserved2 = 0, -+ .aio_reserved3 = 0 }); -+ -+ switch(type){ -+ case AIO_READ: -+ iocb.aio_lio_opcode = IOCB_CMD_PREAD; -+ err = io_submit(ctx, 1, &iocbp); -+ break; -+ case AIO_WRITE: -+ iocb.aio_lio_opcode = IOCB_CMD_PWRITE; -+ err = io_submit(ctx, 1, &iocbp); -+ break; -+ case AIO_MMAP: -+ iocb.aio_lio_opcode = IOCB_CMD_PREAD; -+ iocb.aio_buf = (unsigned long) &c; -+ iocb.aio_nbytes = sizeof(c); -+ err = io_submit(ctx, 1, &iocbp); -+ break; -+ default: -+ printk("Bogus op in do_aio - %d\n", type); -+ err = -EINVAL; -+ break; -+ } -+ if(err > 0) -+ err = 0; -+ -+ return(err); -+} -+ -+static aio_context_t ctx = 0; -+ -+static int aio_thread(void *arg) -+{ -+ struct aio_thread_reply reply; -+ struct io_event event; -+ int err, n, reply_fd; -+ -+ signal(SIGWINCH, SIG_IGN); -+ -+ while(1){ -+ n = io_getevents(ctx, 1, 1, &event, NULL); -+ if(n < 0){ -+ if(errno == EINTR) -+ continue; -+ printk("aio_thread - io_getevents failed, " -+ "errno = %d\n", errno); -+ } -+ else { -+ reply = ((struct aio_thread_reply) -+ { .data = (void *) event.data, -+ .err = event.res }); -+ reply_fd = -+ ((struct aio_context *) event.data)->reply_fd; -+ err = os_write_file(reply_fd, &reply, sizeof(reply)); -+ if(err != sizeof(reply)) -+ printk("not_aio_thread - write failed, " -+ "fd = %d, err = %d\n", -+ aio_req_fd_r, -err); -+ } -+ } -+ return(0); -+} -+ -+#endif -+ -+static int do_not_aio(struct aio_thread_req *req) -+{ -+ char c; -+ int err; -+ -+ switch(req->type){ -+ case AIO_READ: -+ err = os_seek_file(req->io_fd, req->offset); -+ if(err) -+ goto out; -+ -+ err = os_read_file(req->io_fd, req->buf, req->len); -+ break; -+ case AIO_WRITE: -+ err = os_seek_file(req->io_fd, req->offset); -+ if(err) -+ goto out; -+ -+ err = os_write_file(req->io_fd, req->buf, req->len); -+ break; -+ case AIO_MMAP: -+ err = os_seek_file(req->io_fd, req->offset); -+ if(err) -+ goto out; -+ -+ err = os_read_file(req->io_fd, &c, sizeof(c)); -+ break; -+ default: -+ printk("do_not_aio - bad request type : %d\n", req->type); -+ err = -EINVAL; -+ break; -+ } -+ -+ out: -+ return(err); -+} -+ -+static int not_aio_thread(void *arg) -+{ -+ struct aio_thread_req req; -+ struct aio_thread_reply reply; -+ int err; -+ -+ signal(SIGWINCH, SIG_IGN); -+ while(1){ -+ err = os_read_file(aio_req_fd_r, &req, sizeof(req)); -+ if(err != sizeof(req)){ -+ if(err < 0) -+ printk("not_aio_thread - read failed, fd = %d, " -+ "err = %d\n", aio_req_fd_r, -err); -+ else { -+ printk("not_aio_thread - short read, fd = %d, " -+ "length = %d\n", aio_req_fd_r, err); -+ } -+ continue; -+ } -+ err = do_not_aio(&req); -+ reply = ((struct aio_thread_reply) { .data = req.data, -+ .err = err }); -+ err = os_write_file(req.reply_fd, &reply, sizeof(reply)); -+ if(err != sizeof(reply)) -+ printk("not_aio_thread - write failed, fd = %d, " -+ "err = %d\n", aio_req_fd_r, -err); -+ } -+} -+ -+static int aio_pid = -1; -+ -+static int init_aio_24(void) -+{ -+ unsigned long stack; -+ int fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err) -+ goto out; -+ -+ aio_req_fd_w = fds[0]; -+ aio_req_fd_r = fds[1]; -+ err = run_helper_thread(not_aio_thread, NULL, -+ CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); -+ if(err < 0) -+ goto out_close_pipe; -+ -+ aio_pid = err; -+ goto out; -+ -+ out_close_pipe: -+ os_close_file(fds[0]); -+ os_close_file(fds[1]); -+ aio_req_fd_w = -1; -+ aio_req_fd_r = -1; -+ out: -+ return(0); -+} -+ -+#ifdef HAVE_AIO_ABI -+#define DEFAULT_24_AIO 0 -+static int init_aio_26(void) -+{ -+ unsigned long stack; -+ int err; -+ -+ if(io_setup(256, &ctx)){ -+ printk("aio_thread failed to initialize context, err = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ err = run_helper_thread(aio_thread, NULL, -+ CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); -+ if(err < 0) -+ return(-errno); -+ -+ aio_pid = err; -+ err = 0; -+ out: -+ return(err); -+} -+ -+int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, -+ unsigned long long offset, int reply_fd, void *data) -+{ -+ struct aio_thread_reply reply; -+ int err; -+ -+ ((struct aio_context *) data)->reply_fd = reply_fd; -+ -+ err = do_aio(ctx, type, io_fd, buf, len, offset, data); -+ if(err){ -+ reply = ((struct aio_thread_reply) { .data = data, -+ .err = err }); -+ err = os_write_file(reply_fd, &reply, sizeof(reply)); -+ if(err != sizeof(reply)) -+ printk("submit_aio_26 - write failed, " -+ "fd = %d, err = %d\n", reply_fd, -err); -+ else err = 0; -+ } -+ -+ return(err); -+} -+ -+#else -+#define DEFAULT_24_AIO 1 -+static int init_aio_26(void) -+{ -+ return(-ENOSYS); -+} -+ -+int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, -+ unsigned long long offset, int reply_fd, void *data) -+{ -+ return(-ENOSYS); -+} -+#endif -+ -+static int aio_24 = DEFAULT_24_AIO; -+ -+static int __init set_aio_24(char *name, int *add) -+{ -+ aio_24 = 1; -+ return(0); -+} -+ -+__uml_setup("aio=2.4", set_aio_24, -+"aio=2.4\n" -+" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" -+" available. 2.4 AIO is a single thread that handles one request at a\n" -+" time, synchronously. 2.6 AIO is a thread which uses 2.5 AIO interface\n" -+" to handle an arbitrary number of pending requests. 2.6 AIO is not\n" -+" available in tt mode, on 2.4 hosts, or when UML is built with\n" -+" /usr/include/linux/aio_abi no available.\n\n" -+); -+ -+static int init_aio(void) -+{ -+ int err; -+ -+ CHOOSE_MODE(({ -+ if(!aio_24){ -+ printk("Disabling 2.6 AIO in tt mode\n"); -+ aio_24 = 1; -+ } }), (void) 0); -+ -+ if(!aio_24){ -+ err = init_aio_26(); -+ if(err && (errno == ENOSYS)){ -+ printk("2.6 AIO not supported on the host - " -+ "reverting to 2.4 AIO\n"); -+ aio_24 = 1; -+ } -+ else return(err); -+ } -+ -+ if(aio_24) -+ return(init_aio_24()); -+ -+ return(0); -+} -+ -+__initcall(init_aio); -+ -+static void exit_aio(void) -+{ -+ if(aio_pid != -1) -+ os_kill_process(aio_pid, 1); -+} -+ -+__uml_exitcall(exit_aio); -+ -+int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, -+ unsigned long long offset, int reply_fd, void *data) -+{ -+ struct aio_thread_req req = { .type = type, -+ .io_fd = io_fd, -+ .offset = offset, -+ .buf = buf, -+ .len = len, -+ .reply_fd = reply_fd, -+ .data = data, -+ }; -+ int err; -+ -+ err = os_write_file(aio_req_fd_w, &req, sizeof(req)); -+ if(err == sizeof(req)) -+ err = 0; -+ -+ return(err); -+} -+ -+int submit_aio(enum aio_type type, int io_fd, char *buf, int len, -+ unsigned long long offset, int reply_fd, void *data) -+{ -+ if(aio_24) -+ return(submit_aio_24(type, io_fd, buf, len, offset, reply_fd, -+ data)); -+ else { -+ return(submit_aio_26(type, io_fd, buf, len, offset, reply_fd, -+ data)); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/drivers/etap.h -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/drivers/etap.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/drivers/etap.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct ethertap_data { -+ char *dev_name; -+ char *gate_addr; -+ int data_fd; -+ int control_fd; -+ void *dev; -+}; -+ -+extern struct net_user_info ethertap_user_info; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/drivers/ethertap_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/drivers/ethertap_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/drivers/ethertap_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,121 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "etap.h" -+ -+struct ethertap_init { -+ char *dev_name; -+ char *gate_addr; -+}; -+ -+static void etap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct ethertap_data *epri; -+ struct ethertap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ epri = (struct ethertap_data *) pri->user; -+ *epri = ((struct ethertap_data) -+ { .dev_name = init->dev_name, -+ .gate_addr = init->gate_addr, -+ .data_fd = -1, -+ .control_fd = -1, -+ .dev = dev }); -+ -+ printk("ethertap backend - %s", epri->dev_name); -+ if(epri->gate_addr != NULL) -+ printk(", IP = %s", epri->gate_addr); -+ printk("\n"); -+} -+ -+static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ int len; -+ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP); -+ if(*skb == NULL) return(-ENOMEM); -+ len = net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP); -+ if(len <= 0) return(len); -+ skb_pull(*skb, 2); -+ len -= 2; -+ return(len); -+} -+ -+static int etap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ if(skb_headroom(*skb) < 2){ -+ struct sk_buff *skb2; -+ -+ skb2 = skb_realloc_headroom(*skb, 2); -+ dev_kfree_skb(*skb); -+ if (skb2 == NULL) return(-ENOMEM); -+ *skb = skb2; -+ } -+ skb_push(*skb, 2); -+ return(net_send(fd, (*skb)->data, (*skb)->len)); -+} -+ -+struct net_kern_info ethertap_kern_info = { -+ .init = etap_init, -+ .protocol = eth_protocol, -+ .read = etap_read, -+ .write = etap_write, -+}; -+ -+int ethertap_setup(char *str, char **mac_out, void *data) -+{ -+ struct ethertap_init *init = data; -+ -+ *init = ((struct ethertap_init) -+ { .dev_name = NULL, -+ .gate_addr = NULL }); -+ if(tap_setup_common(str, "ethertap", &init->dev_name, mac_out, -+ &init->gate_addr)) -+ return(0); -+ if(init->dev_name == NULL){ -+ printk("ethertap_setup : Missing tap device name\n"); -+ return(0); -+ } -+ -+ return(1); -+} -+ -+static struct transport ethertap_transport = { -+ .list = LIST_HEAD_INIT(ethertap_transport.list), -+ .name = "ethertap", -+ .setup = ethertap_setup, -+ .user = ðertap_user_info, -+ .kern = ðertap_kern_info, -+ .private_size = sizeof(struct ethertap_data), -+}; -+ -+static int register_ethertap(void) -+{ -+ register_transport(ðertap_transport); -+ return(1); -+} -+ -+__initcall(register_ethertap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/drivers/ethertap_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/drivers/ethertap_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/drivers/ethertap_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,240 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stddef.h> -+#include <stdlib.h> -+#include <sys/errno.h> -+#include <sys/socket.h> -+#include <sys/wait.h> -+#include <sys/un.h> -+#include <net/if.h> -+#include "user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "net_user.h" -+#include "etap.h" -+#include "helper.h" -+#include "os.h" -+ -+#define MAX_PACKET ETH_MAX_PACKET -+ -+void etap_user_init(void *data, void *dev) -+{ -+ struct ethertap_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+struct addr_change { -+ enum { ADD_ADDR, DEL_ADDR } what; -+ unsigned char addr[4]; -+ unsigned char netmask[4]; -+}; -+ -+static void etap_change(int op, unsigned char *addr, unsigned char *netmask, -+ int fd) -+{ -+ struct addr_change change; -+ void *output; -+ int n; -+ -+ change.what = op; -+ memcpy(change.addr, addr, sizeof(change.addr)); -+ memcpy(change.netmask, netmask, sizeof(change.netmask)); -+ n = os_write_file(fd, &change, sizeof(change)); -+ if(n != sizeof(change)) -+ printk("etap_change - request failed, err = %d\n", -n); -+ output = um_kmalloc(page_size()); -+ if(output == NULL) -+ printk("etap_change : Failed to allocate output buffer\n"); -+ read_output(fd, output, page_size()); -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+} -+ -+static void etap_open_addr(unsigned char *addr, unsigned char *netmask, -+ void *arg) -+{ -+ etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); -+} -+ -+static void etap_close_addr(unsigned char *addr, unsigned char *netmask, -+ void *arg) -+{ -+ etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); -+} -+ -+struct etap_pre_exec_data { -+ int control_remote; -+ int control_me; -+ int data_me; -+}; -+ -+static void etap_pre_exec(void *arg) -+{ -+ struct etap_pre_exec_data *data = arg; -+ -+ dup2(data->control_remote, 1); -+ os_close_file(data->data_me); -+ os_close_file(data->control_me); -+} -+ -+static int etap_tramp(char *dev, char *gate, int control_me, -+ int control_remote, int data_me, int data_remote) -+{ -+ struct etap_pre_exec_data pe_data; -+ int pid, status, err, n; -+ char version_buf[sizeof("nnnnn\0")]; -+ char data_fd_buf[sizeof("nnnnnn\0")]; -+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; -+ char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, -+ data_fd_buf, gate_buf, NULL }; -+ char *nosetup_args[] = { "uml_net", version_buf, "ethertap", -+ dev, data_fd_buf, NULL }; -+ char **args, c; -+ -+ sprintf(data_fd_buf, "%d", data_remote); -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ if(gate != NULL){ -+ strcpy(gate_buf, gate); -+ args = setup_args; -+ } -+ else args = nosetup_args; -+ -+ err = 0; -+ pe_data.control_remote = control_remote; -+ pe_data.control_me = control_me; -+ pe_data.data_me = data_me; -+ pid = run_helper(etap_pre_exec, &pe_data, args, NULL); -+ -+ if(pid < 0) err = pid; -+ os_close_file(data_remote); -+ os_close_file(control_remote); -+ n = os_read_file(control_me, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("etap_tramp : read of status failed, err = %d\n", -n); -+ return(-EINVAL); -+ } -+ if(c != 1){ -+ printk("etap_tramp : uml_net failed\n"); -+ err = -EINVAL; -+ CATCH_EINTR(n = waitpid(pid, &status, 0)); -+ if(n < 0) -+ err = -errno; -+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)) -+ printk("uml_net didn't exit with status 1\n"); -+ } -+ return(err); -+} -+ -+static int etap_open(void *data) -+{ -+ struct ethertap_data *pri = data; -+ char *output; -+ int data_fds[2], control_fds[2], err, output_len; -+ -+ err = tap_open_common(pri->dev, pri->gate_addr); -+ if(err) return(err); -+ -+ err = os_pipe(data_fds, 0, 0); -+ if(err < 0){ -+ printk("data os_pipe failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ err = os_pipe(control_fds, 1, 0); -+ if(err < 0){ -+ printk("control os_pipe failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], -+ control_fds[1], data_fds[0], data_fds[1]); -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ read_output(control_fds[0], output, output_len); -+ -+ if(output == NULL) -+ printk("etap_open : failed to allocate output buffer\n"); -+ else { -+ printk("%s", output); -+ kfree(output); -+ } -+ -+ if(err < 0){ -+ printk("etap_tramp failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ pri->data_fd = data_fds[0]; -+ pri->control_fd = control_fds[0]; -+ iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); -+ return(data_fds[0]); -+} -+ -+static void etap_close(int fd, void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); -+ os_close_file(fd); -+ os_shutdown_socket(pri->data_fd, 1, 1); -+ os_close_file(pri->data_fd); -+ pri->data_fd = -1; -+ os_close_file(pri->control_fd); -+ pri->control_fd = -1; -+} -+ -+static int etap_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+static void etap_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ tap_check_ips(pri->gate_addr, addr); -+ if(pri->control_fd == -1) return; -+ etap_open_addr(addr, netmask, &pri->control_fd); -+} -+ -+static void etap_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ if(pri->control_fd == -1) return; -+ etap_close_addr(addr, netmask, &pri->control_fd); -+} -+ -+struct net_user_info ethertap_user_info = { -+ .init = etap_user_init, -+ .open = etap_open, -+ .close = etap_close, -+ .remove = NULL, -+ .set_mtu = etap_set_mtu, -+ .add_address = etap_add_addr, -+ .delete_address = etap_del_addr, -+ .max_packet = MAX_PACKET - ETH_HEADER_ETHERTAP -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/drivers/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/drivers/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/drivers/Makefile 2005-05-03 22:28:14.568395480 +0300 -@@ -0,0 +1,31 @@ -+# -+# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := drivers.o -+ -+list-multi := tuntap.o ethertap.o -+ -+ethertap-objs := ethertap_kern.o ethertap_user.o -+tuntap-objs := tuntap_kern.o tuntap_user.o -+ -+obj-y = -+obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o -+obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o -+ -+USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y)),$($(f)-objs)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y) $(USER_SINGLE_OBJS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+ethertap.o : $(ethertap-objs) -+ -+tuntap.o : $(tuntap-objs) -+ -+$(list-multi) : # This doesn't work, but should : '%.o : $(%-objs)' -+ $(LD) $(LD_RFLAG) -r -o $@ $($(patsubst %.o,%,$@)-objs) -Index: linux-2.4.29/arch/um/os-Linux/drivers/tuntap.h -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/drivers/tuntap.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/drivers/tuntap.h 2005-05-03 22:28:14.568395480 +0300 -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_TUNTAP_H -+#define __UM_TUNTAP_H -+ -+#include "net_user.h" -+ -+struct tuntap_data { -+ char *dev_name; -+ int fixed_config; -+ char *gate_addr; -+ int fd; -+ void *dev; -+}; -+ -+extern struct net_user_info tuntap_user_info; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/drivers/tuntap_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/drivers/tuntap_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/drivers/tuntap_kern.c 2005-05-03 22:28:14.569395328 +0300 -@@ -0,0 +1,105 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/skbuff.h" -+#include "linux/init.h" -+#include "asm/errno.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "tuntap.h" -+ -+struct tuntap_init { -+ char *dev_name; -+ char *gate_addr; -+}; -+ -+static void tuntap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct tuntap_data *tpri; -+ struct tuntap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ tpri = (struct tuntap_data *) pri->user; -+ *tpri = ((struct tuntap_data) -+ { .dev_name = init->dev_name, -+ .fixed_config = (init->dev_name != NULL), -+ .gate_addr = init->gate_addr, -+ .fd = -1, -+ .dev = dev }); -+ printk("TUN/TAP backend - "); -+ if(tpri->gate_addr != NULL) -+ printk("IP = %s", tpri->gate_addr); -+ printk("\n"); -+} -+ -+static int tuntap_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_read(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int tuntap_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(net_write(fd, (*skb)->data, (*skb)->len)); -+} -+ -+struct net_kern_info tuntap_kern_info = { -+ .init = tuntap_init, -+ .protocol = eth_protocol, -+ .read = tuntap_read, -+ .write = tuntap_write, -+}; -+ -+int tuntap_setup(char *str, char **mac_out, void *data) -+{ -+ struct tuntap_init *init = data; -+ -+ *init = ((struct tuntap_init) -+ { .dev_name = NULL, -+ .gate_addr = NULL }); -+ if(tap_setup_common(str, "tuntap", &init->dev_name, mac_out, -+ &init->gate_addr)) -+ return(0); -+ -+ return(1); -+} -+ -+static struct transport tuntap_transport = { -+ .list = LIST_HEAD_INIT(tuntap_transport.list), -+ .name = "tuntap", -+ .setup = tuntap_setup, -+ .user = &tuntap_user_info, -+ .kern = &tuntap_kern_info, -+ .private_size = sizeof(struct tuntap_data), -+ .setup_size = sizeof(struct tuntap_init), -+}; -+ -+static int register_tuntap(void) -+{ -+ register_transport(&tuntap_transport); -+ return(1); -+} -+ -+__initcall(register_tuntap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/drivers/tuntap_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/drivers/tuntap_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/drivers/tuntap_user.c 2005-05-03 22:28:14.571395024 +0300 -@@ -0,0 +1,225 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stddef.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <sys/wait.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/uio.h> -+#include <sys/ioctl.h> -+#include <net/if.h> -+#include <linux/if_tun.h> -+#include "net_user.h" -+#include "tuntap.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "user.h" -+#include "helper.h" -+#include "os.h" -+ -+#define MAX_PACKET ETH_MAX_PACKET -+ -+void tuntap_user_init(void *data, void *dev) -+{ -+ struct tuntap_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ tap_check_ips(pri->gate_addr, addr); -+ if((pri->fd == -1) || pri->fixed_config) return; -+ open_addr(addr, netmask, pri->dev_name); -+} -+ -+static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ if((pri->fd == -1) || pri->fixed_config) return; -+ close_addr(addr, netmask, pri->dev_name); -+} -+ -+struct tuntap_pre_exec_data { -+ int stdout; -+ int close_me; -+}; -+ -+static void tuntap_pre_exec(void *arg) -+{ -+ struct tuntap_pre_exec_data *data = arg; -+ -+ dup2(data->stdout, 1); -+ os_close_file(data->close_me); -+} -+ -+static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, -+ char *buffer, int buffer_len, int *used_out) -+{ -+ struct tuntap_pre_exec_data data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, -+ NULL }; -+ char buf[CMSG_SPACE(sizeof(*fd_out))]; -+ struct msghdr msg; -+ struct cmsghdr *cmsg; -+ struct iovec iov; -+ int pid, n; -+ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ -+ data.stdout = remote; -+ data.close_me = me; -+ -+ pid = run_helper(tuntap_pre_exec, &data, argv, NULL); -+ -+ if(pid < 0) return(-pid); -+ -+ os_close_file(remote); -+ -+ msg.msg_name = NULL; -+ msg.msg_namelen = 0; -+ if(buffer != NULL){ -+ iov = ((struct iovec) { buffer, buffer_len }); -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ } -+ else { -+ msg.msg_iov = NULL; -+ msg.msg_iovlen = 0; -+ } -+ msg.msg_control = buf; -+ msg.msg_controllen = sizeof(buf); -+ msg.msg_flags = 0; -+ n = recvmsg(me, &msg, 0); -+ *used_out = n; -+ if(n < 0){ -+ printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ CATCH_EINTR(waitpid(pid, NULL, 0)); -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ if(cmsg == NULL){ -+ printk("tuntap_open_tramp : didn't receive a message\n"); -+ return(-EINVAL); -+ } -+ if((cmsg->cmsg_level != SOL_SOCKET) || -+ (cmsg->cmsg_type != SCM_RIGHTS)){ -+ printk("tuntap_open_tramp : didn't receive a descriptor\n"); -+ return(-EINVAL); -+ } -+ *fd_out = ((int *) CMSG_DATA(cmsg))[0]; -+ return(0); -+} -+ -+static int tuntap_open(void *data) -+{ -+ struct ifreq ifr; -+ struct tuntap_data *pri = data; -+ char *output, *buffer; -+ int err, fds[2], len, used; -+ -+ err = tap_open_common(pri->dev, pri->gate_addr); -+ if(err < 0) -+ return(err); -+ -+ if(pri->fixed_config){ -+ pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0); -+ if(pri->fd < 0){ -+ printk("Failed to open /dev/net/tun, err = %d\n", -+ -pri->fd); -+ return(pri->fd); -+ } -+ memset(&ifr, 0, sizeof(ifr)); -+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI; -+ strncpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name) - 1); -+ if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ -+ printk("TUNSETIFF failed, errno = %d\n", errno); -+ os_close_file(pri->fd); -+ return(-errno); -+ } -+ } -+ else { -+ err = os_pipe(fds, 0, 0); -+ if(err < 0){ -+ printk("tuntap_open : os_pipe failed - err = %d\n", -+ -err); -+ return(err); -+ } -+ -+ buffer = get_output_buffer(&len); -+ if(buffer != NULL) len--; -+ used = 0; -+ -+ err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], -+ fds[1], buffer, len, &used); -+ -+ output = buffer; -+ if(err < 0) { -+ printk("%s", output); -+ free_output_buffer(buffer); -+ printk("tuntap_open_tramp failed - err = %d\n", -err); -+ return(err); -+ } -+ -+ pri->dev_name = uml_strdup(buffer); -+ output += IFNAMSIZ; -+ printk("%s", output); -+ free_output_buffer(buffer); -+ -+ os_close_file(fds[0]); -+ iter_addresses(pri->dev, open_addr, pri->dev_name); -+ } -+ -+ return(pri->fd); -+} -+ -+static void tuntap_close(int fd, void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ if(!pri->fixed_config) -+ iter_addresses(pri->dev, close_addr, pri->dev_name); -+ os_close_file(fd); -+ pri->fd = -1; -+} -+ -+static int tuntap_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info tuntap_user_info = { -+ .init = tuntap_user_init, -+ .open = tuntap_open, -+ .close = tuntap_close, -+ .remove = NULL, -+ .set_mtu = tuntap_set_mtu, -+ .add_address = tuntap_add_addr, -+ .delete_address = tuntap_del_addr, -+ .max_packet = MAX_PACKET -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/file.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/file.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/file.c 2005-05-03 22:28:14.574394568 +0300 -@@ -0,0 +1,942 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <errno.h> -+#include <fcntl.h> -+#include <signal.h> -+#include <utime.h> -+#include <dirent.h> -+#include <linux/kdev_t.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/ioctl.h> -+#include <sys/mount.h> -+#include <sys/uio.h> -+#include <sys/utsname.h> -+#include <sys/vfs.h> -+#include "os.h" -+#include "user.h" -+#include "kern_util.h" -+ -+static void copy_stat(struct uml_stat *dst, struct stat64 *src) -+{ -+ *dst = ((struct uml_stat) { -+ .ust_major = MAJOR(src->st_dev), /* device */ -+ .ust_minor = MINOR(src->st_dev), -+ .ust_ino = src->st_ino, /* inode */ -+ .ust_mode = src->st_mode, /* protection */ -+ .ust_nlink = src->st_nlink, /* number of hard links */ -+ .ust_uid = src->st_uid, /* user ID of owner */ -+ .ust_gid = src->st_gid, /* group ID of owner */ -+ .ust_size = src->st_size, /* total size, in bytes */ -+ .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ -+ .ust_blocks = src->st_blocks, /* number of blocks allocated */ -+ .ust_atime = src->st_atime, /* time of last access */ -+ .ust_mtime = src->st_mtime, /* time of last modification */ -+ .ust_ctime = src->st_ctime, /* time of last change */ -+ .ust_rmajor = MAJOR(src->st_rdev), -+ .ust_rminor = MINOR(src->st_rdev), -+ }); -+} -+ -+int os_stat_fd(const int fd, struct uml_stat *ubuf) -+{ -+ struct stat64 sbuf; -+ int err; -+ -+ do { -+ err = fstat64(fd, &sbuf); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ if(ubuf != NULL) -+ copy_stat(ubuf, &sbuf); -+ return(err); -+} -+ -+int os_stat_file(const char *file_name, struct uml_stat *ubuf) -+{ -+ struct stat64 sbuf; -+ int err; -+ -+ do { -+ err = stat64(file_name, &sbuf); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ if(ubuf != NULL) -+ copy_stat(ubuf, &sbuf); -+ return(err); -+} -+ -+int os_lstat_file(const char *file_name, struct uml_stat *ubuf) -+{ -+ struct stat64 sbuf; -+ int err; -+ -+ do { -+ err = lstat64(file_name, &sbuf); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ if(ubuf != NULL) -+ copy_stat(ubuf, &sbuf); -+ return(err); -+} -+ -+int os_access(const char *file, int mode) -+{ -+ int amode, err; -+ -+ amode=(mode& OS_ACC_R_OK ? R_OK : 0) | (mode& OS_ACC_W_OK ? W_OK : 0) | -+ (mode& OS_ACC_X_OK ? X_OK : 0) | (mode& OS_ACC_F_OK ? F_OK : 0) ; -+ -+ err = access(file, amode); -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_set_file_time(const char *file, unsigned long access, unsigned long mod) -+{ -+ struct utimbuf buf = ((struct utimbuf){ .actime = access, -+ .modtime = mod }); -+ int err; -+ -+ err = utime(file, &buf); -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_set_file_perms(const char *file, int mode) -+{ -+ int err; -+ -+ err = chmod(file, mode); -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_set_file_owner(const char *file, int owner, int group) -+{ -+ int err; -+ -+ err = chown(file, owner, group); -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+void os_print_error(int error, const char* str) -+{ -+ errno = error < 0 ? -error : error; -+ -+ perror(str); -+} -+ -+/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ -+int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) -+{ -+ int err; -+ -+ err = ioctl(fd, cmd, arg); -+ if(err < 0) -+ return(-errno); -+ -+ return(err); -+} -+ -+int os_window_size(int fd, int *rows, int *cols) -+{ -+ struct winsize size; -+ -+ if(ioctl(fd, TIOCGWINSZ, &size) < 0) -+ return(-errno); -+ -+ *rows = size.ws_row; -+ *cols = size.ws_col; -+ -+ return(0); -+} -+ -+int os_new_tty_pgrp(int fd, int pid) -+{ -+ if(ioctl(fd, TIOCSCTTY, 0) < 0){ -+ printk("TIOCSCTTY failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(tcsetpgrp(fd, pid) < 0){ -+ printk("tcsetpgrp failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+/* FIXME: ensure namebuf in os_get_if_name is big enough */ -+int os_get_ifname(int fd, char* namebuf) -+{ -+ if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_set_slip(int fd) -+{ -+ int disc, sencap; -+ -+ disc = N_SLIP; -+ if(ioctl(fd, TIOCSETD, &disc) < 0){ -+ printk("Failed to set slip line discipline - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ sencap = 0; -+ if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0){ -+ printk("Failed to set slip encapsulation - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_set_owner(int fd, int pid) -+{ -+ if(fcntl(fd, F_SETOWN, pid) < 0){ -+ int save_errno = errno; -+ -+ if(fcntl(fd, F_GETOWN, 0) != pid) -+ return(-save_errno); -+ } -+ -+ return(0); -+} -+ -+/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */ -+int os_sigio_async(int master, int slave) -+{ -+ int flags; -+ -+ flags = fcntl(master, F_GETFL); -+ if(flags < 0) { -+ printk("fcntl F_GETFL failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || -+ (fcntl(master, F_SETOWN, os_getpid()) < 0)){ -+ printk("fcntl F_SETFL or F_SETOWN failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)){ -+ printk("fcntl F_SETFL failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_mode_fd(int fd, int mode) -+{ -+ int err; -+ -+ do { -+ err = fchmod(fd, mode); -+ } while((err < 0) && (errno==EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_file_type(char *file) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_lstat_file(file, &buf); -+ if(err < 0) -+ return(err); -+ -+ if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR); -+ else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK); -+ else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV); -+ else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV); -+ else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO); -+ else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK); -+ else return(OS_TYPE_FILE); -+} -+ -+int os_file_mode(char *file, struct openflags *mode_out) -+{ -+ int err; -+ -+ *mode_out = OPENFLAGS(); -+ -+ err = os_access(file, OS_ACC_W_OK); -+ if((err < 0) && (err != -EACCES)) -+ return(err); -+ -+ *mode_out = of_write(*mode_out); -+ -+ err = os_access(file, OS_ACC_R_OK); -+ if((err < 0) && (err != -EACCES)) -+ return(err); -+ -+ *mode_out = of_read(*mode_out); -+ -+ return(0); -+} -+ -+int os_open_file(char *file, struct openflags flags, int mode) -+{ -+ int fd, f = 0; -+ -+ if(flags.r && flags.w) f = O_RDWR; -+ else if(flags.r) f = O_RDONLY; -+ else if(flags.w) f = O_WRONLY; -+ else f = 0; -+ -+ if(flags.s) f |= O_SYNC; -+ if(flags.c) f |= O_CREAT; -+ if(flags.t) f |= O_TRUNC; -+ if(flags.e) f |= O_EXCL; -+ if(flags.d) f |= O_DIRECT; -+ -+ fd = open64(file, f, mode); -+ if(fd < 0) -+ return(-errno); -+ -+ if(flags.cl && fcntl(fd, F_SETFD, 1)){ -+ os_close_file(fd); -+ return(-errno); -+ } -+ -+ return(fd); -+} -+ -+void *os_open_dir(char *path, int *err_out) -+{ -+ void *dir; -+ -+ dir = opendir(path); -+ *err_out = -errno; -+ return(dir); -+} -+ -+int os_seek_dir(void *stream, unsigned long long pos) -+{ -+ seekdir(stream, pos); -+ return(0); -+} -+ -+int os_read_dir(void *stream, unsigned long long *ino_out, char **name_out) -+{ -+ struct dirent *ent; -+ -+ errno = 0; -+ ent = readdir(stream); -+ if(ent == NULL){ -+ if(errno != 0) -+ return(-errno); -+ *name_out = NULL; -+ return(0); -+ } -+ -+ *ino_out = ent->d_ino; -+ *name_out = ent->d_name; -+ return(0); -+} -+ -+int os_tell_dir(void *stream) -+{ -+ return(telldir(stream)); -+} -+ -+int os_close_dir(void *stream) -+{ -+ int err; -+ -+ err = closedir(stream); -+ if(err < 0) -+ return(-errno); -+ return(0); -+} -+ -+int os_remove_file(const char *file) -+{ -+ int err; -+ -+ err = unlink(file); -+ if(err) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_move_file(const char *from, const char *to) -+{ -+ int err; -+ -+ err = rename(from, to); -+ if(err) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_truncate_fd(int fd, unsigned long long len) -+{ -+ int err; -+ -+ err = ftruncate64(fd, len); -+ if(err) -+ return(-errno); -+ return(0); -+} -+ -+int os_truncate_file(const char *file, unsigned long long len) -+{ -+ int err; -+ -+ err = truncate64(file, len); -+ if(err) -+ return(-errno); -+ return(0); -+} -+ -+int os_connect_socket(char *name) -+{ -+ struct sockaddr_un sock; -+ int fd, err; -+ -+ sock.sun_family = AF_UNIX; -+ snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); -+ -+ fd = socket(AF_UNIX, SOCK_STREAM, 0); -+ if(fd < 0) -+ return(fd); -+ -+ err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); -+ if(err) -+ return(-errno); -+ -+ return(fd); -+} -+ -+void os_close_file(int fd) -+{ -+ close(fd); -+} -+ -+int os_seek_file(int fd, __u64 offset) -+{ -+ __u64 actual; -+ -+ actual = lseek64(fd, offset, SEEK_SET); -+ if(actual != offset) -+ return(-errno); -+ return(0); -+} -+ -+static int fault_buffer(void *start, int len, -+ int (*copy_proc)(void *addr, void *buf, int len)) -+{ -+ int page = getpagesize(), i; -+ char c; -+ -+ for(i = 0; i < len; i += page){ -+ if((*copy_proc)(start + i, &c, sizeof(c))) -+ return(-EFAULT); -+ } -+ if((len % page) != 0){ -+ if((*copy_proc)(start + len - 1, &c, sizeof(c))) -+ return(-EFAULT); -+ } -+ return(0); -+} -+ -+static int file_io(int fd, void *buf, int len, -+ int (*io_proc)(int fd, void *buf, int len), -+ int (*copy_user_proc)(void *addr, void *buf, int len)) -+{ -+ int n, err; -+ -+ do { -+ n = (*io_proc)(fd, buf, len); -+ if((n < 0) && (errno == EFAULT)){ -+ err = fault_buffer(buf, len, copy_user_proc); -+ if(err) -+ return(err); -+ n = (*io_proc)(fd, buf, len); -+ } -+ } while((n < 0) && (errno == EINTR)); -+ -+ if(n < 0) -+ return(-errno); -+ return(n); -+} -+ -+int os_read_file(int fd, void *buf, int len) -+{ -+ return(file_io(fd, buf, len, (int (*)(int, void *, int)) read, -+ copy_from_user_proc)); -+} -+ -+int os_write_file(int fd, const void *buf, int len) -+{ -+ return(file_io(fd, (void *) buf, len, -+ (int (*)(int, void *, int)) write, copy_to_user_proc)); -+} -+ -+int os_file_size(char *file, long long *size_out) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_stat_file(file, &buf); -+ if(err < 0){ -+ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); -+ return(err); -+ } -+ -+ if(S_ISBLK(buf.ust_mode)){ -+ int fd, blocks; -+ -+ fd = os_open_file(file, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open \"%s\", errno = %d\n", file, -fd); -+ return(fd); -+ } -+ if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ -+ printk("Couldn't get the block size of \"%s\", " -+ "errno = %d\n", file, errno); -+ err = -errno; -+ os_close_file(fd); -+ return(err); -+ } -+ *size_out = ((long long) blocks) * 512; -+ os_close_file(fd); -+ return(0); -+ } -+ *size_out = buf.ust_size; -+ return(0); -+} -+ -+int os_fd_size(int fd, long long *size_out) -+{ -+ struct stat buf; -+ int err; -+ -+ err = fstat(fd, &buf); -+ if(err) -+ return(-errno); -+ -+ *size_out = buf.st_size; -+ return(0); -+} -+ -+int os_file_modtime(char *file, unsigned long *modtime) -+{ -+ struct uml_stat buf; -+ int err; -+ -+ err = os_stat_file(file, &buf); -+ if(err < 0){ -+ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); -+ return(err); -+ } -+ -+ *modtime = buf.ust_mtime; -+ return(0); -+} -+ -+int os_get_exec_close(int fd, int* close_on_exec) -+{ -+ int ret; -+ -+ do { -+ ret = fcntl(fd, F_GETFD); -+ } while((ret < 0) && (errno == EINTR)) ; -+ -+ if(ret < 0) -+ return(-errno); -+ -+ *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0; -+ return(ret); -+} -+ -+int os_set_exec_close(int fd, int close_on_exec) -+{ -+ int flag, err; -+ -+ if(close_on_exec) flag = FD_CLOEXEC; -+ else flag = 0; -+ -+ do { -+ err = fcntl(fd, F_SETFD, flag); -+ } while((err < 0) && (errno == EINTR)) ; -+ -+ if(err < 0) -+ return(-errno); -+ return(err); -+} -+ -+int os_pipe(int *fds, int stream, int close_on_exec) -+{ -+ int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; -+ -+ err = socketpair(AF_UNIX, type, 0, fds); -+ if(err < 0) -+ return(-errno); -+ -+ if(!close_on_exec) -+ return(0); -+ -+ err = os_set_exec_close(fds[0], 1); -+ if(err < 0) -+ goto error; -+ -+ err = os_set_exec_close(fds[1], 1); -+ if(err < 0) -+ goto error; -+ -+ return(0); -+ -+ error: -+ printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); -+ os_close_file(fds[1]); -+ os_close_file(fds[0]); -+ return(err); -+} -+ -+int os_set_fd_async(int fd, int owner) -+{ -+ /* XXX This should do F_GETFL first */ -+ if(fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK) < 0){ -+ printk("os_set_fd_async : failed to set O_ASYNC and " -+ "O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); -+ return(-errno); -+ } -+#ifdef notdef -+ if(fcntl(fd, F_SETFD, 1) < 0){ -+ printk("os_set_fd_async : Setting FD_CLOEXEC failed, " -+ "errno = %d\n", errno); -+ } -+#endif -+ -+ if((fcntl(fd, F_SETSIG, SIGIO) < 0) || -+ (fcntl(fd, F_SETOWN, owner) < 0)){ -+ printk("os_set_fd_async : Failed to fcntl F_SETOWN " -+ "(or F_SETSIG) fd %d to pid %d, errno = %d\n", fd, -+ owner, errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_clear_fd_async(int fd) -+{ -+ int flags = fcntl(fd, F_GETFL); -+ -+ flags &= ~(O_ASYNC | O_NONBLOCK); -+ if(fcntl(fd, F_SETFL, flags) < 0) -+ return(-errno); -+ return(0); -+} -+ -+int os_set_fd_block(int fd, int blocking) -+{ -+ int flags; -+ -+ flags = fcntl(fd, F_GETFL); -+ -+ if(blocking) flags &= ~O_NONBLOCK; -+ else flags |= O_NONBLOCK; -+ -+ if(fcntl(fd, F_SETFL, flags) < 0){ -+ printk("Failed to change blocking on fd # %d, errno = %d\n", -+ fd, errno); -+ return(-errno); -+ } -+ return(0); -+} -+ -+int os_accept_connection(int fd) -+{ -+ int new; -+ -+ new = accept(fd, NULL, 0); -+ if(new < 0) -+ return(-errno); -+ return(new); -+} -+ -+#ifndef SHUT_RD -+#define SHUT_RD 0 -+#endif -+ -+#ifndef SHUT_WR -+#define SHUT_WR 1 -+#endif -+ -+#ifndef SHUT_RDWR -+#define SHUT_RDWR 2 -+#endif -+ -+int os_shutdown_socket(int fd, int r, int w) -+{ -+ int what, err; -+ -+ if(r && w) what = SHUT_RDWR; -+ else if(r) what = SHUT_RD; -+ else if(w) what = SHUT_WR; -+ else { -+ printk("os_shutdown_socket : neither r or w was set\n"); -+ return(-EINVAL); -+ } -+ err = shutdown(fd, what); -+ if(err < 0) -+ return(-errno); -+ return(0); -+} -+ -+int os_rcv_fd(int fd, int *helper_pid_out) -+{ -+ int new, n; -+ char buf[CMSG_SPACE(sizeof(new))]; -+ struct msghdr msg; -+ struct cmsghdr *cmsg; -+ struct iovec iov; -+ -+ msg.msg_name = NULL; -+ msg.msg_namelen = 0; -+ iov = ((struct iovec) { .iov_base = helper_pid_out, -+ .iov_len = sizeof(*helper_pid_out) }); -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ msg.msg_control = buf; -+ msg.msg_controllen = sizeof(buf); -+ msg.msg_flags = 0; -+ -+ n = recvmsg(fd, &msg, 0); -+ if(n < 0) -+ return(-errno); -+ -+ else if(n != sizeof(iov.iov_len)) -+ *helper_pid_out = -1; -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ if(cmsg == NULL){ -+ printk("rcv_fd didn't receive anything, error = %d\n", errno); -+ return(-1); -+ } -+ if((cmsg->cmsg_level != SOL_SOCKET) || -+ (cmsg->cmsg_type != SCM_RIGHTS)){ -+ printk("rcv_fd didn't receive a descriptor\n"); -+ return(-1); -+ } -+ -+ new = ((int *) CMSG_DATA(cmsg))[0]; -+ return(new); -+} -+ -+int os_create_unix_socket(char *file, int len, int close_on_exec) -+{ -+ struct sockaddr_un addr; -+ int sock, err; -+ -+ sock = socket(PF_UNIX, SOCK_DGRAM, 0); -+ if (sock < 0){ -+ printk("create_unix_socket - socket failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ if(close_on_exec) { -+ err = os_set_exec_close(sock, 1); -+ if(err < 0) -+ printk("create_unix_socket : close_on_exec failed, " -+ "err = %d", -err); -+ } -+ -+ addr.sun_family = AF_UNIX; -+ -+ /* XXX Be more careful about overflow */ -+ snprintf(addr.sun_path, len, "%s", file); -+ -+ err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); -+ if (err < 0){ -+ printk("create_listening_socket at '%s' - bind failed, " -+ "errno = %d\n", file, errno); -+ return(-errno); -+ } -+ -+ return(sock); -+} -+ -+int os_make_symlink(const char *to, const char *from) -+{ -+ int err; -+ -+ err = symlink(to, from); -+ if(err) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_read_symlink(const char *file, char *buf, int size) -+{ -+ int err; -+ -+ err = readlink(file, buf, size); -+ if(err < 0) -+ return(-errno); -+ -+ return(err); -+} -+ -+int os_link_file(const char *to, const char *from) -+{ -+ int err; -+ -+ err = link(to, from); -+ if(err) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_make_dir(const char *dir, int mode) -+{ -+ int err; -+ -+ err = mkdir(dir, mode); -+ if(err) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_make_dev(const char *name, int mode, int major, int minor) -+{ -+ int err; -+ -+ err = mknod(name, mode, MKDEV(major, minor)); -+ if(err) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_remove_dir(const char *dir) -+{ -+ int err; -+ -+ err = rmdir(dir); -+ if(err) -+ return(-errno); -+ -+ return(0); -+} -+ -+void os_flush_stdout(void) -+{ -+ fflush(stdout); -+} -+ -+int os_lock_file(int fd, int excl) -+{ -+ int type = excl ? F_WRLCK : F_RDLCK; -+ struct flock lock = ((struct flock) { .l_type = type, -+ .l_whence = SEEK_SET, -+ .l_start = 0, -+ .l_len = 0 } ); -+ int err, save; -+ -+ err = fcntl(fd, F_SETLK, &lock); -+ if(!err) -+ goto out; -+ -+ save = -errno; -+ err = fcntl(fd, F_GETLK, &lock); -+ if(err){ -+ err = -errno; -+ goto out; -+ } -+ -+ printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); -+ err = save; -+ out: -+ return(err); -+} -+ -+int os_stat_filesystem(char *path, long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out) -+{ -+ struct statfs64 buf; -+ int err; -+ -+ err = statfs64(path, &buf); -+ if(err < 0) -+ return(-errno); -+ -+ *bsize_out = buf.f_bsize; -+ *blocks_out = buf.f_blocks; -+ *bfree_out = buf.f_bfree; -+ *bavail_out = buf.f_bavail; -+ *files_out = buf.f_files; -+ *ffree_out = buf.f_ffree; -+ memcpy(fsid_out, &buf.f_fsid, -+ sizeof(buf.f_fsid) > fsid_size ? fsid_size : -+ sizeof(buf.f_fsid)); -+ *namelen_out = buf.f_namelen; -+ spare_out[0] = buf.f_spare[0]; -+ spare_out[1] = buf.f_spare[1]; -+ spare_out[2] = buf.f_spare[2]; -+ spare_out[3] = buf.f_spare[3]; -+ spare_out[4] = buf.f_spare[4]; -+ spare_out[5] = buf.f_spare[5]; -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/include/file.h -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/include/file.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/include/file.h 2005-05-03 22:28:14.575394416 +0300 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __OS_FILE_H__ -+#define __OS_FILE_H__ -+ -+#define DEV_NULL "/dev/null" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/Makefile 2005-05-03 22:28:14.575394416 +0300 -@@ -0,0 +1,23 @@ -+# -+# Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = aio.o file.o process.o time.o tty.o -+ -+HAVE_AIO_ABI = $(shell [ -e /usr/include/linux/aio_abi.h ] && \ -+ echo -DHAVE_AIO_ABI) -+HAVE_AIO_LIBC = $(shell objdump -T /lib/libc-*.so | grep io_submit && \ -+ echo -DHAVE_AIO_LIBC) -+CFLAGS_aio.o = $(HAVE_AIO_ABI) $(HAVE_AIO_LIBC) -+ -+include $(TOPDIR)/Rules.make -+ -+$(obj-y) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -+ -+archmrproper: -Index: linux-2.4.29/arch/um/os-Linux/process.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/process.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/process.c 2005-05-03 22:28:14.577394112 +0300 -@@ -0,0 +1,151 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <errno.h> -+#include <signal.h> -+#include <sys/mman.h> -+#include <sys/wait.h> -+#include "os.h" -+#include "user.h" -+#include "user_util.h" -+ -+#define ARBITRARY_ADDR -1 -+#define FAILURE_PID -1 -+ -+#define STAT_PATH_LEN sizeof("/proc/#######/stat\0") -+#define COMM_SCANF "%*[^)])" -+ -+unsigned long os_process_pc(int pid) -+{ -+ char proc_stat[STAT_PATH_LEN], buf[256]; -+ unsigned long pc; -+ int fd, err; -+ -+ sprintf(proc_stat, "/proc/%d/stat", pid); -+ fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("os_process_pc - couldn't open '%s', err = %d\n", -+ proc_stat, -fd); -+ return(ARBITRARY_ADDR); -+ } -+ err = os_read_file(fd, buf, sizeof(buf)); -+ if(err < 0){ -+ printk("os_process_pc - couldn't read '%s', err = %d\n", -+ proc_stat, -err); -+ os_close_file(fd); -+ return(ARBITRARY_ADDR); -+ } -+ os_close_file(fd); -+ pc = ARBITRARY_ADDR; -+ if(sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " -+ "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " -+ "%*d %*d %*d %*d %*d %lu", &pc) != 1){ -+ printk("os_process_pc - couldn't find pc in '%s'\n", buf); -+ } -+ return(pc); -+} -+ -+int os_process_parent(int pid) -+{ -+ char stat[STAT_PATH_LEN]; -+ char data[256]; -+ int parent, n, fd; -+ -+ if(pid == -1) return(-1); -+ -+ snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); -+ fd = os_open_file(stat, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open '%s', err = %d\n", stat, -fd); -+ return(FAILURE_PID); -+ } -+ -+ n = os_read_file(fd, data, sizeof(data)); -+ os_close_file(fd); -+ -+ if(n < 0){ -+ printk("Couldn't read '%s', err = %d\n", stat, -n); -+ return(FAILURE_PID); -+ } -+ -+ parent = FAILURE_PID; -+ n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); -+ if(n != 1) -+ printk("Failed to scan '%s'\n", data); -+ -+ return(parent); -+} -+ -+void os_stop_process(int pid) -+{ -+ kill(pid, SIGSTOP); -+} -+ -+void os_kill_process(int pid, int reap_child) -+{ -+ kill(pid, SIGKILL); -+ if(reap_child) -+ CATCH_EINTR(waitpid(pid, NULL, 0)); -+ -+} -+ -+void os_usr1_process(int pid) -+{ -+ kill(pid, SIGUSR1); -+} -+ -+int os_getpid(void) -+{ -+ return(getpid()); -+} -+ -+int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, -+ int r, int w, int x) -+{ -+ void *loc; -+ int prot; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ -+ loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, -+ fd, off); -+ if(loc == MAP_FAILED) -+ return(-errno); -+ return(0); -+} -+ -+int os_protect_memory(void *addr, unsigned long len, int r, int w, int x) -+{ -+ int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0)); -+ -+ if(mprotect(addr, len, prot) < 0) -+ return(-errno); -+ return(0); -+} -+ -+int os_unmap_memory(void *addr, int len) -+{ -+ int err; -+ -+ err = munmap(addr, len); -+ if(err < 0) -+ return(-errno); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/time.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/time.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/time.c 2005-05-03 22:28:14.578393960 +0300 -@@ -0,0 +1,21 @@ -+#include <stdlib.h> -+#include <sys/time.h> -+ -+unsigned long long os_usecs(void) -+{ -+ struct timeval tv; -+ -+ gettimeofday(&tv, NULL); -+ return((unsigned long long) tv.tv_sec * 1000000 + tv.tv_usec); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/os-Linux/tty.c -=================================================================== ---- linux-2.4.29.orig/arch/um/os-Linux/tty.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/os-Linux/tty.c 2005-05-03 22:28:14.579393808 +0300 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include "os.h" -+#include "user.h" -+#include "kern_util.h" -+ -+struct grantpt_info { -+ int fd; -+ int res; -+ int err; -+}; -+ -+static void grantpt_cb(void *arg) -+{ -+ struct grantpt_info *info = arg; -+ -+ info->res = grantpt(info->fd); -+ info->err = errno; -+} -+ -+int get_pty(void) -+{ -+ struct grantpt_info info; -+ int fd; -+ -+ fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); -+ return(fd); -+ } -+ -+ info.fd = fd; -+ initial_thread_cb(grantpt_cb, &info); -+ -+ if(info.res < 0){ -+ printk("get_pty : Couldn't grant pty - errno = %d\n", -+ -info.err); -+ return(-1); -+ } -+ if(unlockpt(fd) < 0){ -+ printk("get_pty : Couldn't unlock pty - errno = %d\n", errno); -+ return(-1); -+ } -+ return(fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/bugs.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/bugs.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/bugs.c 2005-05-03 22:28:14.580393656 +0300 -@@ -0,0 +1,222 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <errno.h> -+#include <string.h> -+#include <sys/signal.h> -+#include <asm/ldt.h> -+#include "kern_util.h" -+#include "user.h" -+#include "sysdep/ptrace.h" -+#include "task.h" -+#include "os.h" -+ -+#define MAXTOKEN 64 -+ -+/* Set during early boot */ -+int host_has_cmov = 1; -+int host_has_xmm = 0; -+ -+static char token(int fd, char *buf, int len, char stop) -+{ -+ int n; -+ char *ptr, *end, c; -+ -+ ptr = buf; -+ end = &buf[len]; -+ do { -+ n = os_read_file(fd, ptr, sizeof(*ptr)); -+ c = *ptr++; -+ if(n != sizeof(*ptr)){ -+ if(n == 0) return(0); -+ printk("Reading /proc/cpuinfo failed, err = %d\n", -n); -+ if(n < 0) -+ return(n); -+ else -+ return(-EIO); -+ } -+ } while((c != '\n') && (c != stop) && (ptr < end)); -+ -+ if(ptr == end){ -+ printk("Failed to find '%c' in /proc/cpuinfo\n", stop); -+ return(-1); -+ } -+ *(ptr - 1) = '\0'; -+ return(c); -+} -+ -+static int find_cpuinfo_line(int fd, char *key, char *scratch, int len) -+{ -+ int n; -+ char c; -+ -+ scratch[len - 1] = '\0'; -+ while(1){ -+ c = token(fd, scratch, len - 1, ':'); -+ if(c <= 0) -+ return(0); -+ else if(c != ':'){ -+ printk("Failed to find ':' in /proc/cpuinfo\n"); -+ return(0); -+ } -+ -+ if(!strncmp(scratch, key, strlen(key))) -+ return(1); -+ -+ do { -+ n = os_read_file(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("Failed to find newline in " -+ "/proc/cpuinfo, err = %d\n", -n); -+ return(0); -+ } -+ } while(c != '\n'); -+ } -+ return(0); -+} -+ -+int cpu_feature(char *what, char *buf, int len) -+{ -+ int fd, ret = 0; -+ -+ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); -+ return(0); -+ } -+ -+ if(!find_cpuinfo_line(fd, what, buf, len)){ -+ printk("Couldn't find '%s' line in /proc/cpuinfo\n", what); -+ goto out_close; -+ } -+ -+ token(fd, buf, len, '\n'); -+ ret = 1; -+ -+ out_close: -+ os_close_file(fd); -+ return(ret); -+} -+ -+static int check_cpu_flag(char *feature, int *have_it) -+{ -+ char buf[MAXTOKEN], c; -+ int fd, len = sizeof(buf)/sizeof(buf[0]); -+ -+ printk("Checking for host processor %s support...", feature); -+ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); -+ return(0); -+ } -+ -+ *have_it = 0; -+ if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) -+ goto out; -+ -+ c = token(fd, buf, len - 1, ' '); -+ if(c < 0) goto out; -+ else if(c != ' '){ -+ printk("Failed to find ' ' in /proc/cpuinfo\n"); -+ goto out; -+ } -+ -+ while(1){ -+ c = token(fd, buf, len - 1, ' '); -+ if(c < 0) goto out; -+ else if(c == '\n') break; -+ -+ if(!strcmp(buf, feature)){ -+ *have_it = 1; -+ goto out; -+ } -+ } -+ out: -+ if(*have_it == 0) printk("No\n"); -+ else if(*have_it == 1) printk("Yes\n"); -+ os_close_file(fd); -+ return(1); -+} -+ -+#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems -+ * for some people. -+ */ -+static void disable_lcall(void) -+{ -+ struct modify_ldt_ldt_s ldt; -+ int err; -+ -+ bzero(&ldt, sizeof(ldt)); -+ ldt.entry_number = 7; -+ ldt.base_addr = 0; -+ ldt.limit = 0; -+ err = modify_ldt(1, &ldt, sizeof(ldt)); -+ if(err) -+ printk("Failed to disable lcall7 - errno = %d\n", errno); -+} -+#endif -+ -+void arch_init_thread(void) -+{ -+#if 0 -+ disable_lcall(); -+#endif -+} -+ -+void arch_check_bugs(void) -+{ -+ int have_it; -+ -+ if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){ -+ printk("/proc/cpuinfo not available - skipping CPU capability " -+ "checks\n"); -+ return; -+ } -+ if(check_cpu_flag("cmov", &have_it)) -+ host_has_cmov = have_it; -+ if(check_cpu_flag("xmm", &have_it)) -+ host_has_xmm = have_it; -+} -+ -+int arch_handle_signal(int sig, union uml_pt_regs *regs) -+{ -+ unsigned char tmp[2]; -+ -+ /* This is testing for a cmov (0x0f 0x4x) instruction causing a -+ * SIGILL in init. -+ */ -+ if((sig != SIGILL) || (TASK_PID(get_current()) != 1)) return(0); -+ -+ if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) -+ panic("SIGILL in init, could not read instructions!\n"); -+ if((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40)) -+ return(0); -+ -+ if(host_has_cmov == 0) -+ panic("SIGILL caused by cmov, which this processor doesn't " -+ "implement, boot a filesystem compiled for older " -+ "processors"); -+ else if(host_has_cmov == 1) -+ panic("SIGILL caused by cmov, which this processor claims to " -+ "implement"); -+ else if(host_has_cmov == -1) -+ panic("SIGILL caused by cmov, couldn't tell if this processor " -+ "implements it, boot a filesystem compiled for older " -+ "processors"); -+ else panic("Bad value for host_has_cmov (%d)", host_has_cmov); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/checksum.S -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/checksum.S 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/checksum.S 2005-05-03 22:28:14.582393352 +0300 -@@ -0,0 +1,460 @@ -+/* -+ * INET An implementation of the TCP/IP protocol suite for the LINUX -+ * operating system. INET is implemented using the BSD Socket -+ * interface as the means of communication with the user level. -+ * -+ * IP/TCP/UDP checksumming routines -+ * -+ * Authors: Jorge Cwik, <jorge@laser.satlink.net> -+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no> -+ * Tom May, <ftom@netcom.com> -+ * Pentium Pro/II routines: -+ * Alexander Kjeldaas <astor@guardian.no> -+ * Finn Arne Gangstad <finnag@guardian.no> -+ * Lots of code moved from tcp.c and ip.c; see those files -+ * for more names. -+ * -+ * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception -+ * handling. -+ * Andi Kleen, add zeroing on error -+ * converted to pure assembler -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include <linux/config.h> -+#include <asm/errno.h> -+ -+/* -+ * computes a partial checksum, e.g. for TCP/UDP fragments -+ */ -+ -+/* -+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) -+ */ -+ -+.text -+.align 4 -+.globl arch_csum_partial -+ -+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM -+ -+ /* -+ * Experiments with Ethernet and SLIP connections show that buff -+ * is aligned on either a 2-byte or 4-byte boundary. We get at -+ * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. -+ * Fortunately, it is easy to convert 2-byte alignment to 4-byte -+ * alignment for the unrolled loop. -+ */ -+arch_csum_partial: -+ pushl %esi -+ pushl %ebx -+ movl 20(%esp),%eax # Function arg: unsigned int sum -+ movl 16(%esp),%ecx # Function arg: int len -+ movl 12(%esp),%esi # Function arg: unsigned char *buff -+ testl $2, %esi # Check alignment. -+ jz 2f # Jump if alignment is ok. -+ subl $2, %ecx # Alignment uses up two bytes. -+ jae 1f # Jump if we had at least two bytes. -+ addl $2, %ecx # ecx was < 2. Deal with it. -+ jmp 4f -+1: movw (%esi), %bx -+ addl $2, %esi -+ addw %bx, %ax -+ adcl $0, %eax -+2: -+ movl %ecx, %edx -+ shrl $5, %ecx -+ jz 2f -+ testl %esi, %esi -+1: movl (%esi), %ebx -+ adcl %ebx, %eax -+ movl 4(%esi), %ebx -+ adcl %ebx, %eax -+ movl 8(%esi), %ebx -+ adcl %ebx, %eax -+ movl 12(%esi), %ebx -+ adcl %ebx, %eax -+ movl 16(%esi), %ebx -+ adcl %ebx, %eax -+ movl 20(%esi), %ebx -+ adcl %ebx, %eax -+ movl 24(%esi), %ebx -+ adcl %ebx, %eax -+ movl 28(%esi), %ebx -+ adcl %ebx, %eax -+ lea 32(%esi), %esi -+ dec %ecx -+ jne 1b -+ adcl $0, %eax -+2: movl %edx, %ecx -+ andl $0x1c, %edx -+ je 4f -+ shrl $2, %edx # This clears CF -+3: adcl (%esi), %eax -+ lea 4(%esi), %esi -+ dec %edx -+ jne 3b -+ adcl $0, %eax -+4: andl $3, %ecx -+ jz 7f -+ cmpl $2, %ecx -+ jb 5f -+ movw (%esi),%cx -+ leal 2(%esi),%esi -+ je 6f -+ shll $16,%ecx -+5: movb (%esi),%cl -+6: addl %ecx,%eax -+ adcl $0, %eax -+7: -+ popl %ebx -+ popl %esi -+ ret -+ -+#else -+ -+/* Version for PentiumII/PPro */ -+ -+arch_csum_partial: -+ pushl %esi -+ pushl %ebx -+ movl 20(%esp),%eax # Function arg: unsigned int sum -+ movl 16(%esp),%ecx # Function arg: int len -+ movl 12(%esp),%esi # Function arg: const unsigned char *buf -+ -+ testl $2, %esi -+ jnz 30f -+10: -+ movl %ecx, %edx -+ movl %ecx, %ebx -+ andl $0x7c, %ebx -+ shrl $7, %ecx -+ addl %ebx,%esi -+ shrl $2, %ebx -+ negl %ebx -+ lea 45f(%ebx,%ebx,2), %ebx -+ testl %esi, %esi -+ jmp *%ebx -+ -+ # Handle 2-byte-aligned regions -+20: addw (%esi), %ax -+ lea 2(%esi), %esi -+ adcl $0, %eax -+ jmp 10b -+ -+30: subl $2, %ecx -+ ja 20b -+ je 32f -+ movzbl (%esi),%ebx # csumming 1 byte, 2-aligned -+ addl %ebx, %eax -+ adcl $0, %eax -+ jmp 80f -+32: -+ addw (%esi), %ax # csumming 2 bytes, 2-aligned -+ adcl $0, %eax -+ jmp 80f -+ -+40: -+ addl -128(%esi), %eax -+ adcl -124(%esi), %eax -+ adcl -120(%esi), %eax -+ adcl -116(%esi), %eax -+ adcl -112(%esi), %eax -+ adcl -108(%esi), %eax -+ adcl -104(%esi), %eax -+ adcl -100(%esi), %eax -+ adcl -96(%esi), %eax -+ adcl -92(%esi), %eax -+ adcl -88(%esi), %eax -+ adcl -84(%esi), %eax -+ adcl -80(%esi), %eax -+ adcl -76(%esi), %eax -+ adcl -72(%esi), %eax -+ adcl -68(%esi), %eax -+ adcl -64(%esi), %eax -+ adcl -60(%esi), %eax -+ adcl -56(%esi), %eax -+ adcl -52(%esi), %eax -+ adcl -48(%esi), %eax -+ adcl -44(%esi), %eax -+ adcl -40(%esi), %eax -+ adcl -36(%esi), %eax -+ adcl -32(%esi), %eax -+ adcl -28(%esi), %eax -+ adcl -24(%esi), %eax -+ adcl -20(%esi), %eax -+ adcl -16(%esi), %eax -+ adcl -12(%esi), %eax -+ adcl -8(%esi), %eax -+ adcl -4(%esi), %eax -+45: -+ lea 128(%esi), %esi -+ adcl $0, %eax -+ dec %ecx -+ jge 40b -+ movl %edx, %ecx -+50: andl $3, %ecx -+ jz 80f -+ -+ # Handle the last 1-3 bytes without jumping -+ notl %ecx # 1->2, 2->1, 3->0, higher bits are masked -+ movl $0xffffff,%ebx # by the shll and shrl instructions -+ shll $3,%ecx -+ shrl %cl,%ebx -+ andl -128(%esi),%ebx # esi is 4-aligned so should be ok -+ addl %ebx,%eax -+ adcl $0,%eax -+80: -+ popl %ebx -+ popl %esi -+ ret -+ -+#endif -+ -+/* -+unsigned int csum_partial_copy_generic (const char *src, char *dst, -+ int len, int sum, int *src_err_ptr, int *dst_err_ptr) -+ */ -+ -+/* -+ * Copy from ds while checksumming, otherwise like csum_partial -+ * -+ * The macros SRC and DST specify the type of access for the instruction. -+ * thus we can call a custom exception handler for all access types. -+ * -+ * FIXME: could someone double-check whether I haven't mixed up some SRC and -+ * DST definitions? It's damn hard to trigger all cases. I hope I got -+ * them all but there's no guarantee. -+ */ -+ -+#define SRC(y...) \ -+ 9999: y; \ -+ .section __ex_table, "a"; \ -+ .long 9999b, 6001f ; \ -+ .previous -+ -+#define DST(y...) \ -+ 9999: y; \ -+ .section __ex_table, "a"; \ -+ .long 9999b, 6002f ; \ -+ .previous -+ -+.align 4 -+.globl csum_partial_copy_generic_i386 -+ -+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM -+ -+#define ARGBASE 16 -+#define FP 12 -+ -+csum_partial_copy_generic_i386: -+ subl $4,%esp -+ pushl %edi -+ pushl %esi -+ pushl %ebx -+ movl ARGBASE+16(%esp),%eax # sum -+ movl ARGBASE+12(%esp),%ecx # len -+ movl ARGBASE+4(%esp),%esi # src -+ movl ARGBASE+8(%esp),%edi # dst -+ -+ testl $2, %edi # Check alignment. -+ jz 2f # Jump if alignment is ok. -+ subl $2, %ecx # Alignment uses up two bytes. -+ jae 1f # Jump if we had at least two bytes. -+ addl $2, %ecx # ecx was < 2. Deal with it. -+ jmp 4f -+SRC(1: movw (%esi), %bx ) -+ addl $2, %esi -+DST( movw %bx, (%edi) ) -+ addl $2, %edi -+ addw %bx, %ax -+ adcl $0, %eax -+2: -+ movl %ecx, FP(%esp) -+ shrl $5, %ecx -+ jz 2f -+ testl %esi, %esi -+SRC(1: movl (%esi), %ebx ) -+SRC( movl 4(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, (%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 4(%edi) ) -+ -+SRC( movl 8(%esi), %ebx ) -+SRC( movl 12(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 8(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 12(%edi) ) -+ -+SRC( movl 16(%esi), %ebx ) -+SRC( movl 20(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 16(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 20(%edi) ) -+ -+SRC( movl 24(%esi), %ebx ) -+SRC( movl 28(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 24(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 28(%edi) ) -+ -+ lea 32(%esi), %esi -+ lea 32(%edi), %edi -+ dec %ecx -+ jne 1b -+ adcl $0, %eax -+2: movl FP(%esp), %edx -+ movl %edx, %ecx -+ andl $0x1c, %edx -+ je 4f -+ shrl $2, %edx # This clears CF -+SRC(3: movl (%esi), %ebx ) -+ adcl %ebx, %eax -+DST( movl %ebx, (%edi) ) -+ lea 4(%esi), %esi -+ lea 4(%edi), %edi -+ dec %edx -+ jne 3b -+ adcl $0, %eax -+4: andl $3, %ecx -+ jz 7f -+ cmpl $2, %ecx -+ jb 5f -+SRC( movw (%esi), %cx ) -+ leal 2(%esi), %esi -+DST( movw %cx, (%edi) ) -+ leal 2(%edi), %edi -+ je 6f -+ shll $16,%ecx -+SRC(5: movb (%esi), %cl ) -+DST( movb %cl, (%edi) ) -+6: addl %ecx, %eax -+ adcl $0, %eax -+7: -+5000: -+ -+# Exception handler: -+.section .fixup, "ax" -+ -+6001: -+ movl ARGBASE+20(%esp), %ebx # src_err_ptr -+ movl $-EFAULT, (%ebx) -+ -+ # zero the complete destination - computing the rest -+ # is too much work -+ movl ARGBASE+8(%esp), %edi # dst -+ movl ARGBASE+12(%esp), %ecx # len -+ xorl %eax,%eax -+ rep ; stosb -+ -+ jmp 5000b -+ -+6002: -+ movl ARGBASE+24(%esp), %ebx # dst_err_ptr -+ movl $-EFAULT,(%ebx) -+ jmp 5000b -+ -+.previous -+ -+ popl %ebx -+ popl %esi -+ popl %edi -+ popl %ecx # equivalent to addl $4,%esp -+ ret -+ -+#else -+ -+/* Version for PentiumII/PPro */ -+ -+#define ROUND1(x) \ -+ SRC(movl x(%esi), %ebx ) ; \ -+ addl %ebx, %eax ; \ -+ DST(movl %ebx, x(%edi) ) ; -+ -+#define ROUND(x) \ -+ SRC(movl x(%esi), %ebx ) ; \ -+ adcl %ebx, %eax ; \ -+ DST(movl %ebx, x(%edi) ) ; -+ -+#define ARGBASE 12 -+ -+csum_partial_copy_generic_i386: -+ pushl %ebx -+ pushl %edi -+ pushl %esi -+ movl ARGBASE+4(%esp),%esi #src -+ movl ARGBASE+8(%esp),%edi #dst -+ movl ARGBASE+12(%esp),%ecx #len -+ movl ARGBASE+16(%esp),%eax #sum -+# movl %ecx, %edx -+ movl %ecx, %ebx -+ movl %esi, %edx -+ shrl $6, %ecx -+ andl $0x3c, %ebx -+ negl %ebx -+ subl %ebx, %esi -+ subl %ebx, %edi -+ lea -1(%esi),%edx -+ andl $-32,%edx -+ lea 3f(%ebx,%ebx), %ebx -+ testl %esi, %esi -+ jmp *%ebx -+1: addl $64,%esi -+ addl $64,%edi -+ SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) -+ ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) -+ ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) -+ ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) -+ ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) -+3: adcl $0,%eax -+ addl $64, %edx -+ dec %ecx -+ jge 1b -+4: movl ARGBASE+12(%esp),%edx #len -+ andl $3, %edx -+ jz 7f -+ cmpl $2, %edx -+ jb 5f -+SRC( movw (%esi), %dx ) -+ leal 2(%esi), %esi -+DST( movw %dx, (%edi) ) -+ leal 2(%edi), %edi -+ je 6f -+ shll $16,%edx -+5: -+SRC( movb (%esi), %dl ) -+DST( movb %dl, (%edi) ) -+6: addl %edx, %eax -+ adcl $0, %eax -+7: -+.section .fixup, "ax" -+6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr -+ movl $-EFAULT, (%ebx) -+ # zero the complete destination (computing the rest is too much work) -+ movl ARGBASE+8(%esp),%edi # dst -+ movl ARGBASE+12(%esp),%ecx # len -+ xorl %eax,%eax -+ rep; stosb -+ jmp 7b -+6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr -+ movl $-EFAULT, (%ebx) -+ jmp 7b -+.previous -+ -+ popl %esi -+ popl %edi -+ popl %ebx -+ ret -+ -+#undef ROUND -+#undef ROUND1 -+ -+#endif -Index: linux-2.4.29/arch/um/sys-i386/fault.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/fault.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/fault.c 2005-05-03 22:28:14.583393200 +0300 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+ -+extern unsigned long search_exception_table(unsigned long addr); -+ -+int arch_fixup(unsigned long address, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ unsigned long fixup; -+ -+ fixup = search_exception_table(address); -+ if(fixup != 0){ -+ sc->eip = fixup; -+ return(1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/ksyms.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/ksyms.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/ksyms.c 2005-05-03 22:28:14.584393048 +0300 -@@ -0,0 +1,17 @@ -+#include "linux/module.h" -+#include "linux/in6.h" -+#include "linux/rwsem.h" -+#include "asm/byteorder.h" -+#include "asm/semaphore.h" -+#include "asm/uaccess.h" -+#include "asm/checksum.h" -+#include "asm/errno.h" -+ -+EXPORT_SYMBOL(__down_failed); -+EXPORT_SYMBOL(__down_failed_interruptible); -+EXPORT_SYMBOL(__down_failed_trylock); -+EXPORT_SYMBOL(__up_wakeup); -+ -+/* Networking helper routines. */ -+EXPORT_SYMBOL(csum_partial_copy_from); -+EXPORT_SYMBOL(csum_partial_copy_to); -Index: linux-2.4.29/arch/um/sys-i386/ldt.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/ldt.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/ldt.c 2005-05-03 22:28:14.585392896 +0300 -@@ -0,0 +1,94 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/slab.h" -+#include "asm/uaccess.h" -+#include "asm/ptrace.h" -+#include "choose-mode.h" -+#include "kern.h" -+ -+#ifdef CONFIG_MODE_TT -+extern int modify_ldt(int func, void *ptr, unsigned long bytecount); -+ -+/* XXX this needs copy_to_user and copy_from_user */ -+ -+int sys_modify_ldt_tt(int func, void *ptr, unsigned long bytecount) -+{ -+ if(verify_area(VERIFY_READ, ptr, bytecount)) return(-EFAULT); -+ return(modify_ldt(func, ptr, bytecount)); -+} -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+extern int userspace_pid; -+ -+int sys_modify_ldt_skas(int func, void *ptr, unsigned long bytecount) -+{ -+ struct ptrace_ldt ldt; -+ void *buf; -+ int res, n; -+ -+ buf = kmalloc(bytecount, GFP_KERNEL); -+ if(buf == NULL) -+ return(-ENOMEM); -+ -+ res = 0; -+ -+ switch(func){ -+ case 1: -+ case 0x11: -+ res = copy_from_user(buf, ptr, bytecount); -+ break; -+ } -+ -+ if(res != 0){ -+ res = -EFAULT; -+ goto out; -+ } -+ -+ ldt = ((struct ptrace_ldt) { .func = func, -+ .ptr = buf, -+ .bytecount = bytecount }); -+ res = ptrace(PTRACE_LDT, userspace_pid, 0, (unsigned long) &ldt); -+ if(res < 0) -+ goto out; -+ -+ switch(func){ -+ case 0: -+ case 2: -+ n = res; -+ res = copy_to_user(ptr, buf, n); -+ if(res != 0) -+ res = -EFAULT; -+ else -+ res = n; -+ break; -+ } -+ -+ out: -+ kfree(buf); -+ return(res); -+} -+#endif -+ -+int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) -+{ -+ return(CHOOSE_MODE_PROC(sys_modify_ldt_tt, sys_modify_ldt_skas, func, -+ ptr, bytecount)); -+} -+ -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/Makefile 2005-05-03 22:28:14.586392744 +0300 -@@ -0,0 +1,46 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o ptrace.o \ -+ ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o -+export-objs = ksyms.o -+ -+USER_OBJS = bugs.o ptrace_user.o sigcontext.o fault.o -+ -+SYMLINKS = semaphore.c extable.c -+ -+semaphore.c-dir = kernel -+extable.c-dir = mm -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+define make_link -+ -rm -f $1 -+ ln -sf $(TOPDIR)/arch/i386/$($1-dir)/$1 $1 -+endef -+ -+$(SYMLINKS): -+ $(call make_link,$@) -+ -+clean: -+ $(MAKE) -C util clean -+ rm -f $(SYMLINKS) -+ -+fastdep: -+ -+dep: -+ -+archmrproper: -+ -+archclean: -+ -+archdep: -+ -+modules: -Index: linux-2.4.29/arch/um/sys-i386/ptrace.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/ptrace.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/ptrace.c 2005-05-03 22:28:14.588392440 +0300 -@@ -0,0 +1,367 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "asm/elf.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/unistd.h" -+#include "ptrace_user.h" -+#include "sysdep/sigcontext.h" -+#include "sysdep/sc.h" -+ -+void arch_switch(void) -+{ -+ update_debugregs(current->thread.arch.debugregs_seq); -+} -+ -+int is_syscall(unsigned long addr) -+{ -+ unsigned short instr; -+ int n; -+ -+ n = copy_from_user(&instr, (void *) addr, sizeof(instr)); -+ if(n){ -+ printk("is_syscall : failed to read instruction from 0x%lx\n", -+ addr); -+ return(0); -+ } -+ /* int 0x80 or sysenter */ -+ return((instr == 0x80cd) || (instr == 0x340f)); -+} -+ -+/* determines which flags the user has access to. */ -+/* 1 = access 0 = no access */ -+#define FLAG_MASK 0x00044dd5 -+ -+int putreg(struct task_struct *child, int regno, unsigned long value) -+{ -+ regno >>= 2; -+ switch (regno) { -+ case FS: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ PT_REGS_FS(&child->thread.regs) = value; -+ return 0; -+ case GS: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ PT_REGS_GS(&child->thread.regs) = value; -+ return 0; -+ case DS: -+ case ES: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ value &= 0xffff; -+ break; -+ case SS: -+ case CS: -+ if ((value & 3) != 3) -+ return -EIO; -+ value &= 0xffff; -+ break; -+ case EFL: -+ value &= FLAG_MASK; -+ value |= PT_REGS_EFLAGS(&child->thread.regs); -+ break; -+ } -+ PT_REGS_SET(&child->thread.regs, regno, value); -+ return 0; -+} -+ -+unsigned long getreg(struct task_struct *child, int regno) -+{ -+ unsigned long retval = ~0UL; -+ -+ regno >>= 2; -+ switch (regno) { -+ case FS: -+ case GS: -+ case DS: -+ case ES: -+ case SS: -+ case CS: -+ retval = 0xffff; -+ /* fall through */ -+ default: -+ retval &= PT_REG(&child->thread.regs, regno); -+ } -+ return retval; -+} -+ -+struct i387_fxsave_struct { -+ unsigned short cwd; -+ unsigned short swd; -+ unsigned short twd; -+ unsigned short fop; -+ long fip; -+ long fcs; -+ long foo; -+ long fos; -+ long mxcsr; -+ long reserved; -+ long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ -+ long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ -+ long padding[56]; -+}; -+ -+/* -+ * FPU tag word conversions. -+ */ -+ -+static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) -+{ -+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */ -+ -+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */ -+ tmp = ~twd; -+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ -+ /* and move the valid bits to the lower byte. */ -+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ -+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ -+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ -+ return tmp; -+} -+ -+static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) -+{ -+ struct _fpxreg *st = NULL; -+ unsigned long twd = (unsigned long) fxsave->twd; -+ unsigned long tag; -+ unsigned long ret = 0xffff0000; -+ int i; -+ -+#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16); -+ -+ for ( i = 0 ; i < 8 ; i++ ) { -+ if ( twd & 0x1 ) { -+ st = (struct _fpxreg *) FPREG_ADDR( fxsave, i ); -+ -+ switch ( st->exponent & 0x7fff ) { -+ case 0x7fff: -+ tag = 2; /* Special */ -+ break; -+ case 0x0000: -+ if ( !st->significand[0] && -+ !st->significand[1] && -+ !st->significand[2] && -+ !st->significand[3] ) { -+ tag = 1; /* Zero */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ default: -+ if ( st->significand[3] & 0x8000 ) { -+ tag = 0; /* Valid */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ } -+ } else { -+ tag = 3; /* Empty */ -+ } -+ ret |= (tag << (2 * i)); -+ twd = twd >> 1; -+ } -+ return ret; -+} -+ -+/* -+ * FXSR floating point environment conversions. -+ */ -+ -+#ifdef CONFIG_MODE_TT -+static inline int convert_fxsr_to_user_tt(struct _fpstate *buf, -+ struct pt_regs *regs) -+{ -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned long env[7]; -+ struct _fpreg *to; -+ struct _fpxreg *from; -+ int i; -+ -+ env[0] = (unsigned long)fxsave->cwd | 0xffff0000; -+ env[1] = (unsigned long)fxsave->swd | 0xffff0000; -+ env[2] = twd_fxsr_to_i387(fxsave); -+ env[3] = fxsave->fip; -+ env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); -+ env[5] = fxsave->foo; -+ env[6] = fxsave->fos; -+ -+ if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) -+ return 1; -+ -+ to = &buf->_st[0]; -+ from = (struct _fpxreg *) &fxsave->st_space[0]; -+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) { -+ if ( __copy_to_user( to, from, sizeof(*to) ) ) -+ return 1; -+ } -+ return 0; -+} -+#endif -+ -+static inline int convert_fxsr_to_user(struct _fpstate *buf, -+ struct pt_regs *regs) -+{ -+ return(CHOOSE_MODE(convert_fxsr_to_user_tt(buf, regs), 0)); -+} -+ -+#ifdef CONFIG_MODE_TT -+static inline int convert_fxsr_from_user_tt(struct pt_regs *regs, -+ struct _fpstate *buf) -+{ -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned long env[7]; -+ struct _fpxreg *to; -+ struct _fpreg *from; -+ int i; -+ -+ if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) -+ return 1; -+ -+ fxsave->cwd = (unsigned short)(env[0] & 0xffff); -+ fxsave->swd = (unsigned short)(env[1] & 0xffff); -+ fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); -+ fxsave->fip = env[3]; -+ fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16); -+ fxsave->fcs = (env[4] & 0xffff); -+ fxsave->foo = env[5]; -+ fxsave->fos = env[6]; -+ -+ to = (struct _fpxreg *) &fxsave->st_space[0]; -+ from = &buf->_st[0]; -+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) { -+ if ( __copy_from_user( to, from, sizeof(*from) ) ) -+ return 1; -+ } -+ return 0; -+} -+#endif -+ -+static inline int convert_fxsr_from_user(struct pt_regs *regs, -+ struct _fpstate *buf) -+{ -+ return(CHOOSE_MODE(convert_fxsr_from_user_tt(regs, buf), 0)); -+} -+ -+int get_fpregs(unsigned long buf, struct task_struct *child) -+{ -+ int err; -+ -+ err = convert_fxsr_to_user((struct _fpstate *) buf, -+ &child->thread.regs); -+ if(err) return(-EFAULT); -+ else return(0); -+} -+ -+int set_fpregs(unsigned long buf, struct task_struct *child) -+{ -+ int err; -+ -+ err = convert_fxsr_from_user(&child->thread.regs, -+ (struct _fpstate *) buf); -+ if(err) return(-EFAULT); -+ else return(0); -+} -+ -+#ifdef CONFIG_MODE_TT -+int get_fpxregs_tt(unsigned long buf, struct task_struct *tsk) -+{ -+ struct pt_regs *regs = &tsk->thread.regs; -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ int err; -+ -+ err = __copy_to_user((void *) buf, fxsave, -+ sizeof(struct user_fxsr_struct)); -+ if(err) return -EFAULT; -+ else return 0; -+} -+#endif -+ -+int get_fpxregs(unsigned long buf, struct task_struct *tsk) -+{ -+ return(CHOOSE_MODE(get_fpxregs_tt(buf, tsk), 0)); -+} -+ -+#ifdef CONFIG_MODE_TT -+int set_fpxregs_tt(unsigned long buf, struct task_struct *tsk) -+{ -+ struct pt_regs *regs = &tsk->thread.regs; -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ int err; -+ -+ err = __copy_from_user(fxsave, (void *) buf, -+ sizeof(struct user_fxsr_struct) ); -+ if(err) return -EFAULT; -+ else return 0; -+} -+#endif -+ -+int set_fpxregs(unsigned long buf, struct task_struct *tsk) -+{ -+ return(CHOOSE_MODE(set_fpxregs_tt(buf, tsk), 0)); -+} -+ -+#ifdef notdef -+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) -+{ -+ fpu->cwd = (((SC_FP_CW(PT_REGS_SC(regs)) & 0xffff) << 16) | -+ (SC_FP_SW(PT_REGS_SC(regs)) & 0xffff)); -+ fpu->swd = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff; -+ fpu->twd = SC_FP_IPOFF(PT_REGS_SC(regs)); -+ fpu->fip = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff; -+ fpu->fcs = SC_FP_DATAOFF(PT_REGS_SC(regs)); -+ fpu->foo = SC_FP_DATASEL(PT_REGS_SC(regs)); -+ fpu->fos = 0; -+ memcpy(fpu->st_space, (void *) SC_FP_ST(PT_REGS_SC(regs)), -+ sizeof(fpu->st_space)); -+ return(1); -+} -+#endif -+ -+#ifdef CONFIG_MODE_TT -+static inline void copy_fpu_fxsave_tt(struct pt_regs *regs, -+ struct user_i387_struct *buf) -+{ -+ struct i387_fxsave_struct *fpu = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned short *to; -+ unsigned short *from; -+ int i; -+ -+ memcpy( buf, fpu, 7 * sizeof(long) ); -+ -+ to = (unsigned short *) &buf->st_space[0]; -+ from = (unsigned short *) &fpu->st_space[0]; -+ for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { -+ memcpy( to, from, 5 * sizeof(unsigned short) ); -+ } -+} -+#endif -+ -+static inline void copy_fpu_fxsave(struct pt_regs *regs, -+ struct user_i387_struct *buf) -+{ -+ (void) CHOOSE_MODE(copy_fpu_fxsave_tt(regs, buf), 0); -+} -+ -+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu ) -+{ -+ copy_fpu_fxsave(regs, (struct user_i387_struct *) fpu); -+ return(1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/ptrace_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/ptrace_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/ptrace_user.c 2005-05-03 22:28:14.589392288 +0300 -@@ -0,0 +1,118 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <errno.h> -+#include <unistd.h> -+#include <linux/stddef.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+#include <asm/user.h> -+#include "kern_util.h" -+#include "sysdep/thread.h" -+#include "user.h" -+#include "os.h" -+ -+int ptrace_getregs(long pid, unsigned long *regs_out) -+{ -+ return(ptrace(PTRACE_GETREGS, pid, 0, regs_out)); -+} -+ -+int ptrace_setregs(long pid, unsigned long *regs) -+{ -+ return(ptrace(PTRACE_SETREGS, pid, 0, regs)); -+} -+ -+int ptrace_getfpregs(long pid, unsigned long *regs) -+{ -+ return(ptrace(PTRACE_GETFPREGS, pid, 0, regs)); -+} -+ -+static void write_debugregs(int pid, unsigned long *regs) -+{ -+ struct user *dummy; -+ int nregs, i; -+ -+ dummy = NULL; -+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); -+ for(i = 0; i < nregs; i++){ -+ if((i == 4) || (i == 5)) continue; -+ if(ptrace(PTRACE_POKEUSER, pid, &dummy->u_debugreg[i], -+ regs[i]) < 0) -+ printk("write_debugregs - ptrace failed on " -+ "register %d, value = 0x%x, errno = %d\n", i, -+ regs[i], errno); -+ } -+} -+ -+static void read_debugregs(int pid, unsigned long *regs) -+{ -+ struct user *dummy; -+ int nregs, i; -+ -+ dummy = NULL; -+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); -+ for(i = 0; i < nregs; i++){ -+ regs[i] = ptrace(PTRACE_PEEKUSER, pid, -+ &dummy->u_debugreg[i], 0); -+ } -+} -+ -+/* Accessed only by the tracing thread */ -+static unsigned long kernel_debugregs[8] = { [ 0 ... 7 ] = 0 }; -+static int debugregs_seq = 0; -+ -+void arch_enter_kernel(void *task, int pid) -+{ -+ read_debugregs(pid, TASK_DEBUGREGS(task)); -+ write_debugregs(pid, kernel_debugregs); -+} -+ -+void arch_leave_kernel(void *task, int pid) -+{ -+ read_debugregs(pid, kernel_debugregs); -+ write_debugregs(pid, TASK_DEBUGREGS(task)); -+} -+ -+void ptrace_pokeuser(unsigned long addr, unsigned long data) -+{ -+ if((addr < offsetof(struct user, u_debugreg[0])) || -+ (addr > offsetof(struct user, u_debugreg[7]))) -+ return; -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ if(kernel_debugregs[addr] == data) return; -+ -+ kernel_debugregs[addr] = data; -+ debugregs_seq++; -+} -+ -+static void update_debugregs_cb(void *arg) -+{ -+ int pid = *((int *) arg); -+ -+ write_debugregs(pid, kernel_debugregs); -+} -+ -+void update_debugregs(int seq) -+{ -+ int me; -+ -+ if(seq == debugregs_seq) return; -+ -+ me = os_getpid(); -+ initial_thread_cb(update_debugregs_cb, &me); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/sigcontext.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/sigcontext.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/sigcontext.c 2005-05-03 22:28:14.590392136 +0300 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stddef.h> -+#include <string.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "kern_util.h" -+#include "frame_user.h" -+ -+int sc_size(void *data) -+{ -+ struct arch_frame_data *arch = data; -+ -+ return(sizeof(struct sigcontext) + arch->fpstate_size); -+} -+ -+void sc_to_sc(void *to_ptr, void *from_ptr) -+{ -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ int size = sizeof(*to) + signal_frame_sc.common.arch.fpstate_size; -+ -+ memcpy(to, from, size); -+ if(from->fpstate != NULL) to->fpstate = (struct _fpstate *) (to + 1); -+} -+ -+unsigned long *sc_sigmask(void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ -+ return(&sc->oldmask); -+} -+ -+int sc_get_fpregs(unsigned long buf, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct _fpstate *from = sc->fpstate, *to = (struct _fpstate *) buf; -+ int err = 0; -+ -+ if(from == NULL){ -+ err |= clear_user_proc(&to->cw, sizeof(to->cw)); -+ err |= clear_user_proc(&to->sw, sizeof(to->sw)); -+ err |= clear_user_proc(&to->tag, sizeof(to->tag)); -+ err |= clear_user_proc(&to->ipoff, sizeof(to->ipoff)); -+ err |= clear_user_proc(&to->cssel, sizeof(to->cssel)); -+ err |= clear_user_proc(&to->dataoff, sizeof(to->dataoff)); -+ err |= clear_user_proc(&to->datasel, sizeof(to->datasel)); -+ err |= clear_user_proc(&to->_st, sizeof(to->_st)); -+ } -+ else { -+ err |= copy_to_user_proc(&to->cw, &from->cw, sizeof(to->cw)); -+ err |= copy_to_user_proc(&to->sw, &from->sw, sizeof(to->sw)); -+ err |= copy_to_user_proc(&to->tag, &from->tag, -+ sizeof(to->tag)); -+ err |= copy_to_user_proc(&to->ipoff, &from->ipoff, -+ sizeof(to->ipoff)); -+ err |= copy_to_user_proc(&to->cssel,& from->cssel, -+ sizeof(to->cssel)); -+ err |= copy_to_user_proc(&to->dataoff, &from->dataoff, -+ sizeof(to->dataoff)); -+ err |= copy_to_user_proc(&to->datasel, &from->datasel, -+ sizeof(to->datasel)); -+ err |= copy_to_user_proc(to->_st, from->_st, sizeof(to->_st)); -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/syscalls.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/syscalls.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/syscalls.c 2005-05-03 22:28:14.590392136 +0300 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/mman.h" -+#include "asm/uaccess.h" -+#include "asm/unistd.h" -+ -+/* -+ * Perform the select(nd, in, out, ex, tv) and mmap() system -+ * calls. Linux/i386 didn't use to be able to handle more than -+ * 4 system call parameters, so these system calls used a memory -+ * block for parameter passing.. -+ */ -+ -+struct mmap_arg_struct { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+extern int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset); -+ -+int old_mmap_i386(struct mmap_arg_struct *arg) -+{ -+ struct mmap_arg_struct a; -+ int err = -EFAULT; -+ -+ if (copy_from_user(&a, arg, sizeof(a))) -+ goto out; -+ -+ err = old_mmap(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -+ out: -+ return err; -+} -+ -+struct sel_arg_struct { -+ unsigned long n; -+ fd_set *inp, *outp, *exp; -+ struct timeval *tvp; -+}; -+ -+int old_select(struct sel_arg_struct *arg) -+{ -+ struct sel_arg_struct a; -+ -+ if (copy_from_user(&a, arg, sizeof(a))) -+ return -EFAULT; -+ /* sys_select() does the appropriate kernel locking */ -+ return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-i386/sysrq.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/sysrq.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/sysrq.c 2005-05-03 22:28:14.591391984 +0300 -@@ -0,0 +1,30 @@ -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "linux/sched.h" -+#include "asm/ptrace.h" -+#include "sysrq.h" -+ -+void show_regs(struct pt_regs *regs) -+{ -+ printk("\n"); -+ printk("EIP: %04lx:[<%08lx>] CPU: %d %s", -+ 0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs), -+ smp_processor_id(), print_tainted()); -+ if (PT_REGS_CS(regs) & 3) -+ printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs), -+ PT_REGS_SP(regs)); -+ printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs), -+ print_tainted()); -+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", -+ PT_REGS_EAX(regs), PT_REGS_EBX(regs), -+ PT_REGS_ECX(regs), -+ PT_REGS_EDX(regs)); -+ printk("ESI: %08lx EDI: %08lx EBP: %08lx", -+ PT_REGS_ESI(regs), PT_REGS_EDI(regs), -+ PT_REGS_EBP(regs)); -+ printk(" DS: %04lx ES: %04lx\n", -+ 0xffff & PT_REGS_DS(regs), -+ 0xffff & PT_REGS_ES(regs)); -+ -+ show_trace((unsigned long *) ®s); -+} -Index: linux-2.4.29/arch/um/sys-i386/util/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/util/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/util/Makefile 2005-05-03 22:28:14.592391832 +0300 -@@ -0,0 +1,28 @@ -+EXE = mk_sc mk_thread -+ -+include $(TOPDIR)/Rules.make -+ -+all : $(EXE) -+ -+mk_sc : mk_sc.o -+ $(HOSTCC) -o mk_sc mk_sc.o -+ -+mk_sc.o : mk_sc.c -+ $(HOSTCC) -c $< -+ -+mk_thread : mk_thread_user.o mk_thread_kern.o -+ $(HOSTCC) -o mk_thread mk_thread_user.o mk_thread_kern.o -+ -+mk_thread_user.o : mk_thread_user.c -+ $(HOSTCC) -c $< -+ -+mk_thread_kern.o : mk_thread_kern.c -+ $(HOSTCC) $(CFLAGS) -c $< -+ -+clean : -+ $(RM) $(EXE) *.o -+ -+archmrproper : clean -+ -+fastdep : -+ -Index: linux-2.4.29/arch/um/sys-i386/util/mk_sc.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/util/mk_sc.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/util/mk_sc.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,52 @@ -+#include <stdio.h> -+#include <signal.h> -+#include <linux/stddef.h> -+ -+#define SC_OFFSET(name, field) \ -+ printf("#define " name "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ -+ offsetof(struct sigcontext, field)) -+ -+#define SC_FP_OFFSET(name, field) \ -+ printf("#define " name \ -+ "(sc) *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ -+ offsetof(struct _fpstate, field)) -+ -+#define SC_FP_OFFSET_PTR(name, field, type) \ -+ printf("#define " name \ -+ "(sc) ((" type " *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ -+ offsetof(struct _fpstate, field)) -+ -+int main(int argc, char **argv) -+{ -+ SC_OFFSET("SC_IP", eip); -+ SC_OFFSET("SC_SP", esp); -+ SC_OFFSET("SC_FS", fs); -+ SC_OFFSET("SC_GS", gs); -+ SC_OFFSET("SC_DS", ds); -+ SC_OFFSET("SC_ES", es); -+ SC_OFFSET("SC_SS", ss); -+ SC_OFFSET("SC_CS", cs); -+ SC_OFFSET("SC_EFLAGS", eflags); -+ SC_OFFSET("SC_EAX", eax); -+ SC_OFFSET("SC_EBX", ebx); -+ SC_OFFSET("SC_ECX", ecx); -+ SC_OFFSET("SC_EDX", edx); -+ SC_OFFSET("SC_EDI", edi); -+ SC_OFFSET("SC_ESI", esi); -+ SC_OFFSET("SC_EBP", ebp); -+ SC_OFFSET("SC_TRAPNO", trapno); -+ SC_OFFSET("SC_ERR", err); -+ SC_OFFSET("SC_CR2", cr2); -+ SC_OFFSET("SC_FPSTATE", fpstate); -+ SC_OFFSET("SC_SIGMASK", oldmask); -+ SC_FP_OFFSET("SC_FP_CW", cw); -+ SC_FP_OFFSET("SC_FP_SW", sw); -+ SC_FP_OFFSET("SC_FP_TAG", tag); -+ SC_FP_OFFSET("SC_FP_IPOFF", ipoff); -+ SC_FP_OFFSET("SC_FP_CSSEL", cssel); -+ SC_FP_OFFSET("SC_FP_DATAOFF", dataoff); -+ SC_FP_OFFSET("SC_FP_DATASEL", datasel); -+ SC_FP_OFFSET_PTR("SC_FP_ST", _st, "struct _fpstate"); -+ SC_FP_OFFSET_PTR("SC_FXSR_ENV", _fxsr_env, "void"); -+ return(0); -+} -Index: linux-2.4.29/arch/um/sys-i386/util/mk_thread_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/util/mk_thread_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/util/mk_thread_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,22 @@ -+#include "linux/config.h" -+#include "linux/stddef.h" -+#include "linux/sched.h" -+ -+extern void print_head(void); -+extern void print_constant_ptr(char *name, int value); -+extern void print_constant(char *name, char *type, int value); -+extern void print_tail(void); -+ -+#define THREAD_OFFSET(field) offsetof(struct task_struct, thread.field) -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_constant_ptr("TASK_DEBUGREGS", THREAD_OFFSET(arch.debugregs)); -+#ifdef CONFIG_MODE_TT -+ print_constant("TASK_EXTERN_PID", "int", THREAD_OFFSET(mode.tt.extern_pid)); -+#endif -+ print_tail(); -+ return(0); -+} -+ -Index: linux-2.4.29/arch/um/sys-i386/util/mk_thread_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-i386/util/mk_thread_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-i386/util/mk_thread_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,30 @@ -+#include <stdio.h> -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_thread\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __UM_THREAD_H\n"); -+ printf("#define __UM_THREAD_H\n"); -+ printf("\n"); -+} -+ -+void print_constant_ptr(char *name, int value) -+{ -+ printf("#define %s(task) ((unsigned long *) " -+ "&(((char *) (task))[%d]))\n", name, value); -+} -+ -+void print_constant(char *name, char *type, int value) -+{ -+ printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, -+ value); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -Index: linux-2.4.29/arch/um/sys-ia64/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ia64/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ia64/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,26 @@ -+OBJ = sys.o -+ -+OBJS = -+ -+all: $(OBJ) -+ -+$(OBJ): $(OBJS) -+ rm -f $@ -+ $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ -+clean: -+ rm -f $(OBJS) -+ -+fastdep: -+ -+archmrproper: -+ -+archclean: -+ rm -f link.ld -+ @$(MAKEBOOT) clean -+ -+archdep: -+ @$(MAKEBOOT) dep -+ -+modules: -+ -+include $(TOPDIR)/Rules.make -Index: linux-2.4.29/arch/um/sys-ppc/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ppc/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ppc/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,80 @@ -+OBJ = sys.o -+ -+.S.o: -+ $(CC) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ -+OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \ -+ ptrace_user.o sysrq.o -+ -+EXTRA_AFLAGS := -DCONFIG_ALL_PPC -I. -I$(TOPDIR)/arch/ppc/kernel -+ -+all: $(OBJ) -+ -+$(OBJ): $(OBJS) -+ rm -f $@ -+ $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ -+ -+ptrace_user.o: ptrace_user.c -+ $(CC) -D__KERNEL__ $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+sigcontext.o: sigcontext.c -+ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+semaphore.c: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+checksum.S: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/lib/$@ $@ -+ -+mk_defs.c: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+ppc_defs.head: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+ppc_defs.h: mk_defs.c ppc_defs.head \ -+ $(TOPDIR)/include/asm-ppc/mmu.h \ -+ $(TOPDIR)/include/asm-ppc/processor.h \ -+ $(TOPDIR)/include/asm-ppc/pgtable.h \ -+ $(TOPDIR)/include/asm-ppc/ptrace.h -+# $(CC) $(CFLAGS) -S mk_defs.c -+ cp ppc_defs.head ppc_defs.h -+# for bk, this way we can write to the file even if it's not checked out -+ echo '#define THREAD 608' >> ppc_defs.h -+ echo '#define PT_REGS 8' >> ppc_defs.h -+ echo '#define CLONE_VM 256' >> ppc_defs.h -+# chmod u+w ppc_defs.h -+# grep '^#define' mk_defs.s >> ppc_defs.h -+# rm mk_defs.s -+ -+# the asm link is horrible, and breaks the other targets. This is also -+# not going to work with parallel makes. -+ -+checksum.o: checksum.S -+ rm -f asm -+ ln -s $(TOPDIR)/include/asm-ppc asm -+ $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ rm -f asm -+ -+misc.o: misc.S ppc_defs.h -+ rm -f asm -+ ln -s $(TOPDIR)/include/asm-ppc asm -+ $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ rm -f asm -+ -+clean: -+ rm -f $(OBJS) -+ rm -f ppc_defs.h -+ rm -f checksum.S semaphore.c mk_defs.c -+ -+fastdep: -+ -+dep: -+ -+modules: -+ -+include $(TOPDIR)/Rules.make -Index: linux-2.4.29/arch/um/sys-ppc/misc.S -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ppc/misc.S 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ppc/misc.S 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,116 @@ -+/* -+ * This file contains miscellaneous low-level functions. -+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) -+ * -+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) -+ * and Paul Mackerras. -+ * -+ * A couple of functions stolen from arch/ppc/kernel/misc.S for UML -+ * by Chris Emerson. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ */ -+ -+#include <linux/config.h> -+#include <asm/processor.h> -+#include "ppc_asm.h" -+ -+#if defined(CONFIG_4xx) || defined(CONFIG_8xx) -+#define CACHE_LINE_SIZE 16 -+#define LG_CACHE_LINE_SIZE 4 -+#define MAX_COPY_PREFETCH 1 -+#elif !defined(CONFIG_PPC64BRIDGE) -+#define CACHE_LINE_SIZE 32 -+#define LG_CACHE_LINE_SIZE 5 -+#define MAX_COPY_PREFETCH 4 -+#else -+#define CACHE_LINE_SIZE 128 -+#define LG_CACHE_LINE_SIZE 7 -+#define MAX_COPY_PREFETCH 1 -+#endif /* CONFIG_4xx || CONFIG_8xx */ -+ -+ .text -+ -+/* -+ * Clear a page using the dcbz instruction, which doesn't cause any -+ * memory traffic (except to write out any cache lines which get -+ * displaced). This only works on cacheable memory. -+ */ -+_GLOBAL(clear_page) -+ li r0,4096/CACHE_LINE_SIZE -+ mtctr r0 -+#ifdef CONFIG_8xx -+ li r4, 0 -+1: stw r4, 0(r3) -+ stw r4, 4(r3) -+ stw r4, 8(r3) -+ stw r4, 12(r3) -+#else -+1: dcbz 0,r3 -+#endif -+ addi r3,r3,CACHE_LINE_SIZE -+ bdnz 1b -+ blr -+ -+/* -+ * Copy a whole page. We use the dcbz instruction on the destination -+ * to reduce memory traffic (it eliminates the unnecessary reads of -+ * the destination into cache). This requires that the destination -+ * is cacheable. -+ */ -+#define COPY_16_BYTES \ -+ lwz r6,4(r4); \ -+ lwz r7,8(r4); \ -+ lwz r8,12(r4); \ -+ lwzu r9,16(r4); \ -+ stw r6,4(r3); \ -+ stw r7,8(r3); \ -+ stw r8,12(r3); \ -+ stwu r9,16(r3) -+ -+_GLOBAL(copy_page) -+ addi r3,r3,-4 -+ addi r4,r4,-4 -+ li r5,4 -+ -+#ifndef CONFIG_8xx -+#if MAX_COPY_PREFETCH > 1 -+ li r0,MAX_COPY_PREFETCH -+ li r11,4 -+ mtctr r0 -+11: dcbt r11,r4 -+ addi r11,r11,CACHE_LINE_SIZE -+ bdnz 11b -+#else /* MAX_COPY_PREFETCH == 1 */ -+ dcbt r5,r4 -+ li r11,CACHE_LINE_SIZE+4 -+#endif /* MAX_COPY_PREFETCH */ -+#endif /* CONFIG_8xx */ -+ -+ li r0,4096/CACHE_LINE_SIZE -+ mtctr r0 -+1: -+#ifndef CONFIG_8xx -+ dcbt r11,r4 -+ dcbz r5,r3 -+#endif -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 32 -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 64 -+ COPY_16_BYTES -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 128 -+ COPY_16_BYTES -+ COPY_16_BYTES -+ COPY_16_BYTES -+ COPY_16_BYTES -+#endif -+#endif -+#endif -+ bdnz 1b -+ blr -Index: linux-2.4.29/arch/um/sys-ppc/miscthings.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ppc/miscthings.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ppc/miscthings.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,53 @@ -+#include "linux/threads.h" -+#include "linux/stddef.h" // for NULL -+#include "linux/elf.h" // for AT_NULL -+ -+/* The following function nicked from arch/ppc/kernel/process.c and -+ * adapted slightly */ -+/* -+ * XXX ld.so expects the auxiliary table to start on -+ * a 16-byte boundary, so we have to find it and -+ * move it up. :-( -+ */ -+void shove_aux_table(unsigned long sp) -+{ -+ int argc; -+ char *p; -+ unsigned long e; -+ unsigned long aux_start, offset; -+ -+ argc = *(int *)sp; -+ sp += sizeof(int) + (argc + 1) * sizeof(char *); -+ /* skip over the environment pointers */ -+ do { -+ p = *(char **)sp; -+ sp += sizeof(char *); -+ } while (p != NULL); -+ aux_start = sp; -+ /* skip to the end of the auxiliary table */ -+ do { -+ e = *(unsigned long *)sp; -+ sp += 2 * sizeof(unsigned long); -+ } while (e != AT_NULL); -+ offset = ((aux_start + 15) & ~15) - aux_start; -+ if (offset != 0) { -+ do { -+ sp -= sizeof(unsigned long); -+ e = *(unsigned long *)sp; -+ *(unsigned long *)(sp + offset) = e; -+ } while (sp > aux_start); -+ } -+} -+/* END stuff taken from arch/ppc/kernel/process.c */ -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-ppc/ptrace.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ppc/ptrace.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ppc/ptrace.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,28 @@ -+#include "linux/sched.h" -+#include "asm/ptrace.h" -+ -+int putreg(struct task_struct *child, unsigned long regno, -+ unsigned long value) -+{ -+ child->thread.process_regs.regs[regno >> 2] = value; -+ return 0; -+} -+ -+unsigned long getreg(struct task_struct *child, unsigned long regno) -+{ -+ unsigned long retval = ~0UL; -+ -+ retval &= child->thread.process_regs.regs[regno >> 2]; -+ return retval; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-ppc/ptrace_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ppc/ptrace_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ppc/ptrace_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,40 @@ -+#include <sys/ptrace.h> -+#include <errno.h> -+#include <asm/ptrace.h> -+#include "sysdep/ptrace.h" -+ -+int ptrace_getregs(long pid, unsigned long *regs_out) -+{ -+ int i; -+ for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { -+ errno = 0; -+ regs_out->regs[i] = ptrace(PTRACE_PEEKUSER, pid, i*4, 0); -+ if (errno) { -+ return -errno; -+ } -+ } -+ return 0; -+} -+ -+int ptrace_setregs(long pid, unsigned long *regs_in) -+{ -+ int i; -+ for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { -+ if (i != 34 /* FIXME: PT_ORIG_R3 */ && i <= PT_MQ) { -+ if (ptrace(PTRACE_POKEUSER, pid, i*4, regs_in->regs[i]) < 0) { -+ return -errno; -+ } -+ } -+ } -+ return 0; -+} -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-ppc/sigcontext.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ppc/sigcontext.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ppc/sigcontext.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,15 @@ -+#include "asm/ptrace.h" -+#include "asm/sigcontext.h" -+#include "sysdep/ptrace.h" -+#include "user_util.h" -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/sys-ppc/sysrq.c -=================================================================== ---- linux-2.4.29.orig/arch/um/sys-ppc/sysrq.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/sys-ppc/sysrq.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "asm/ptrace.h" -+#include "sysrq.h" -+ -+void show_regs(struct pt_regs_subarch *regs) -+{ -+ printk("\n"); -+ printk("show_regs(): insert regs here.\n"); -+#if 0 -+ printk("\n"); -+ printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs, regs->eip, -+ smp_processor_id()); -+ if (regs->xcs & 3) -+ printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp); -+ printk(" EFLAGS: %08lx\n", regs->eflags); -+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", -+ regs->eax, regs->ebx, regs->ecx, regs->edx); -+ printk("ESI: %08lx EDI: %08lx EBP: %08lx", -+ regs->esi, regs->edi, regs->ebp); -+ printk(" DS: %04x ES: %04x\n", -+ 0xffff & regs->xds, 0xffff & regs->xes); -+#endif -+ -+ show_trace(®s->gpr[1]); -+} -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/arch/um/util/Makefile -=================================================================== ---- linux-2.4.29.orig/arch/um/util/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/util/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,26 @@ -+ALL = mk_task mk_constants -+ -+all : $(ALL) -+ -+mk_task : mk_task_user.o mk_task_kern.o -+ $(HOSTCC) -o mk_task mk_task_user.o mk_task_kern.o -+ -+mk_task_user.o : mk_task_user.c -+ $(HOSTCC) -c $< -+ -+mk_task_kern.o : mk_task_kern.c -+ $(HOSTCC) $(CFLAGS) -c $< -+ -+mk_constants : mk_constants_user.o mk_constants_kern.o -+ $(HOSTCC) -o mk_constants mk_constants_user.o mk_constants_kern.o -+ -+mk_constants_user.o : mk_constants_user.c -+ $(HOSTCC) -c $< -+ -+mk_constants_kern.o : mk_constants_kern.c -+ $(HOSTCC) $(CFLAGS) -c $< -+ -+clean : -+ $(RM) $(ALL) *.o *~ -+ -+archmrproper : clean -Index: linux-2.4.29/arch/um/util/mk_constants_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/util/mk_constants_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/util/mk_constants_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,25 @@ -+#include "linux/kernel.h" -+#include "linux/stringify.h" -+#include "asm/page.h" -+ -+extern void print_head(void); -+extern void print_constant_str(char *name, char *value); -+extern void print_constant_int(char *name, int value); -+extern void print_tail(void); -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); -+ -+ print_constant_str("UM_KERN_EMERG", KERN_EMERG); -+ print_constant_str("UM_KERN_ALERT", KERN_ALERT); -+ print_constant_str("UM_KERN_CRIT", KERN_CRIT); -+ print_constant_str("UM_KERN_ERR", KERN_ERR); -+ print_constant_str("UM_KERN_WARNING", KERN_WARNING); -+ print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); -+ print_constant_str("UM_KERN_INFO", KERN_INFO); -+ print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); -+ print_tail(); -+ return(0); -+} -Index: linux-2.4.29/arch/um/util/mk_constants_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/util/mk_constants_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/util/mk_constants_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,28 @@ -+#include <stdio.h> -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_constants\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __UM_CONSTANTS_H\n"); -+ printf("#define __UM_CONSTANTS_H\n"); -+ printf("\n"); -+} -+ -+void print_constant_str(char *name, char *value) -+{ -+ printf("#define %s \"%s\"\n", name, value); -+} -+ -+void print_constant_int(char *name, int value) -+{ -+ printf("#define %s %d\n", name, value); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -Index: linux-2.4.29/arch/um/util/mk_task_kern.c -=================================================================== ---- linux-2.4.29.orig/arch/um/util/mk_task_kern.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/util/mk_task_kern.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,17 @@ -+#include "linux/sched.h" -+#include "linux/stddef.h" -+ -+extern void print(char *name, char *type, int offset); -+extern void print_ptr(char *name, char *type, int offset); -+extern void print_head(void); -+extern void print_tail(void); -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_ptr("TASK_REGS", "union uml_pt_regs", -+ offsetof(struct task_struct, thread.regs)); -+ print("TASK_PID", "int", offsetof(struct task_struct, pid)); -+ print_tail(); -+ return(0); -+} -Index: linux-2.4.29/arch/um/util/mk_task_user.c -=================================================================== ---- linux-2.4.29.orig/arch/um/util/mk_task_user.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/arch/um/util/mk_task_user.c 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,30 @@ -+#include <stdio.h> -+ -+void print(char *name, char *type, int offset) -+{ -+ printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, -+ offset); -+} -+ -+void print_ptr(char *name, char *type, int offset) -+{ -+ printf("#define %s(task) ((%s *) &(((char *) (task))[%d]))\n", name, type, -+ offset); -+} -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_task\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __TASK_H\n"); -+ printf("#define __TASK_H\n"); -+ printf("\n"); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -Index: linux-2.4.29/CREDITS -=================================================================== ---- linux-2.4.29.orig/CREDITS 2005-05-03 21:08:24.000000000 +0300 -+++ linux-2.4.29/CREDITS 2005-05-03 22:28:14.000000000 +0300 -@@ -434,6 +434,7 @@ - E: lars@nocrew.org - W: http://lars.nocrew.org/ - D: dsp56k device driver -+D: ptrace proxy in user mode kernel port - S: Kopmansg 2 - S: 411 13 Goteborg - S: Sweden -@@ -727,7 +728,7 @@ - E: jdike@karaya.com - W: http://user-mode-linux.sourceforge.net - D: User mode kernel port --S: RR1 Box 67C -+S: 375 Tubbs Hill Rd - S: Deering NH 03244 - S: USA - -Index: linux-2.4.29/Documentation/Configure.help -=================================================================== ---- linux-2.4.29.orig/Documentation/Configure.help 2005-05-03 21:09:27.000000000 +0300 -+++ linux-2.4.29/Documentation/Configure.help 2005-05-03 23:55:57.615290736 +0300 -@@ -16184,6 +16184,63 @@ - The module will be called speedtch.o. If you want to compile it as - a module, say M here and read <file:Documentation/modules.txt>. - -+Support for /proc/mm -+CONFIG_PROC_MM -+ Enables support for address space separation through /proc/mm. -+ A host kernel needs to have this enabled in order for UML to -+ run in skas mode. UML kernels do not need to have this option -+ unless they will host sub-UMLs. -+ -+ If you don't know what this does just say Y. -+ -+Separate Kernel Address Space support -+CONFIG_MODE_SKAS -+ This option controls whether skas (separate kernel address space) -+ support is compiled in. If you have applied the skas patch to the -+ host and enabled support for /proc/mm in the host kernel, then you -+ certainly want to say Y here (and consider saying N to -+ CONFIG_MODE_TT). Otherwise, it is safe to say Y. Disabling this -+ option will shrink the UML binary slightly. -+ -+Tracing thread support -+CONFIG_MODE_TT -+ This option controls whether tracing thread support is compiled -+ into UML. Normally, this should be set to Y. If you intend to -+ use only skas mode (and the host has the skas patch applied to it), -+ then it is OK to say N here. -+ -+Force a static link -+CONFIG_STATIC_LINK -+ If CONFIG_MODE_TT is disabled, then this option gives you the ability -+ to force a static link of UML. Normally, if only skas mode is built -+ in to UML, it will be linked as a shared binary. This is inconvenient -+ for use in a chroot jail. So, if you intend to run UML inside a -+ chroot, and you disable CONFIG_MODE_TT, you probably want to say Y -+ here. -+ -+2G/2G host address space split -+CONFIG_HOST_2G_2G -+ Most Linux machines are configured so that the kernel occupies the -+ upper 1G of the 4G address space and processes use the lower 3G. -+ However, some machine are configured with a 2G/2G split, with the -+ kernel occupying the upper 2G and processes using the lower 2G. -+ -+ To allow UML to run on a such host you have to say Y here. N should be -+ a safe choice most of the time. -+ -+Kernel stack size order -+CONFIG_KERNEL_STACK_ORDER -+ This option determines the size of UML kernel stacks. They will -+ be 1 << order pages. The default is OK unless you're running Valgrind -+ on UML, in which case, set this to 3. -+ -+UML ubd block driver -+CONFIG_BLK_DEV_UBD -+ The User-Mode Linux port includes a driver called UBD which will let -+ you access arbitrary files on the host computer as block devices. -+ Unless you know that you do not need such virtual block devices say -+ Y here. -+ - CONFIG_USB_GADGET - USB is a master/slave protocol, organized with one master - host (such as a PC) controlling up to 127 peripheral devices. -@@ -16289,17 +16346,15 @@ - - Always do synchronous disk IO for UBD - CONFIG_BLK_DEV_UBD_SYNC -- The User-Mode Linux port includes a driver called UBD which will let -- you access arbitrary files on the host computer as block devices. -- Writes to such a block device are not immediately written to the -- host's disk; this may cause problems if, for example, the User-Mode -- Linux 'Virtual Machine' uses a journalling file system and the host -- computer crashes. -+ Writes to the virtual block device are not immediately written to the host's -+ disk; this may cause problems if, for example, the User-Mode Linux -+ 'Virtual Machine' uses a journalling filesystem and the host computer -+ crashes. - - Synchronous operation (i.e. always writing data to the host's disk - immediately) is configurable on a per-UBD basis by using a special - kernel command line option. Alternatively, you can say Y here to -- turn on synchronous operation by default for all block. -+ turn on synchronous operation by default for all block devices. - - If you're running a journalling file system (like reiserfs, for - example) in your virtual machine, you will want to say Y here. If -@@ -16311,6 +16366,7 @@ - CONFIG_PT_PROXY - This option enables a debugging interface which allows gdb to debug - the kernel without needing to actually attach to kernel threads. -+ CONFIG_XTERM_CHAN must be enabled in order to enable CONFIG_PT_PROXY. - If you want to do kernel debugging, say Y here; otherwise say N. - - Management console -@@ -16357,6 +16413,9 @@ - See <http://user-mode-linux.sourceforge.net/gprof.html> for more - details. - -+ This option requires that CONFIG_MODE_TT be disabled, as UML will -+ not build with both enabled. -+ - If you're involved in UML kernel development and want to use gprof, - say Y. If you're unsure, say N. - -@@ -16380,6 +16439,19 @@ - If you'd like to be able to work with files stored on the host, - say Y or M here; otherwise say N. - -+HoneyPot ProcFS -+CONFIG_HPPFS -+ hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc -+ entries to be overridden, removed, or fabricated from the host. -+ Its purpose is to allow a UML to appear to be a physical machine -+ by removing or changing anything in /proc which gives away the -+ identity of a UML. -+ -+ See <http://user-mode-linux.sf.net/hppfs.html> for more information. -+ -+ You only need this if you are setting up a UML honeypot. Otherwise, -+ it is safe to say 'N' here. -+ - Example IO Memory driver - CONFIG_MMAPPER - The User-Mode Linux port can provide support for IO Memory -@@ -16395,6 +16467,21 @@ - If you'd like to be able to provide a simulated IO port space for - User-Mode Linux processes, say Y. If unsure, say N. - -+Anonymous Memory support -+CONFIG_DEV_ANON -+ Don't ask. Just say Y. -+ -+Support for software watchdog inside UML -+CONFIG_UML_WATCHDOG -+ Support for a virtual hardware watchdog. It's safe to say N here. -+ -+COW block device -+CONFIG_COW -+ This is a layered driver which sits above two other block devices. -+ One is read-only, and the other is a read-write layer which stores -+ all changes. This provides the illusion that the read-only layer -+ can be mounted read-write and changed. -+ - Virtual Serial Line - CONFIG_SSL - The User-Mode Linux environment allows you to create virtual serial -@@ -16505,26 +16592,197 @@ - - SLIP transport - CONFIG_UML_NET_SLIP -- The Slip User-Mode Linux network transport allows a running UML to -+ The slip User-Mode Linux network transport allows a running UML to - network with its host over a point-to-point link. Unlike Ethertap, - which can carry any Ethernet frame (and hence even non-IP packets), -- the Slip transport can only carry IP packets. -+ the slip transport can only carry IP packets. - -- To use this, your host must support Slip devices. -+ To use this, your host must support slip devices. - - For more information, see - <http://user-mode-linux.sourceforge.net/networking.html>. That site -- has examples of the UML command line to use to enable Slip -+ has examples of the UML command line to use to enable slip - networking, and details of a few quirks with it. - -- The Ethertap Transport is preferred over Slip because of its -- limitation. If you prefer Slip, however, say Y here. Otherwise -+ The Ethertap Transport is preferred over slip because of its -+ limitations. If you prefer slip, however, say Y here. Otherwise - choose the Multicast transport (to network multiple UMLs on - multiple hosts), Ethertap (to network with the host and the - outside world), and/or the Daemon transport (to network multiple - UMLs on a single host). You may choose more than one without - conflict. If you don't need UML networking, say N. - -+SLiRP transport -+CONFIG_UML_NET_SLIRP -+ The SLiRP User-Mode Linux network transport allows a running UML -+ to network by invoking a program that can handle SLIP encapsulated -+ packets. This is commonly (but not limited to) the application -+ known as SLiRP, a program that can re-socket IP packets back onto -+ the host on which it is run. Only IP packets are supported, -+ unlike other network transports that can handle all Ethernet -+ frames. In general, slirp allows the UML the same IP connectivity -+ to the outside world that the host user is permitted, and unlike -+ other transports, SLiRP works without the need of root level -+ privleges, setuid binaries, or SLIP devices on the host. This -+ also means not every type of connection is possible, but most -+ situations can be accomodated with carefully crafted slirp -+ commands that can be passed along as part of the network device's -+ setup string. The effect of this transport on the UML is similar -+ that of a host behind a firewall that masquerades all network -+ connections passing through it (but is less secure). -+ -+ To use this you should first have slirp compiled somewhere -+ accessible on the host, and have read its documentation. If you -+ don't need UML networking, say N. -+ -+ Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" -+ -+pcap transport -+CONFIG_UML_NET_PCAP -+ The pcap transport makes a pcap packet stream on the host look -+ like an ethernet device inside UML. This is useful for making -+ UML act as a network monitor for the host. You must have libcap -+ installed in order to build the pcap transport into UML. -+ -+ For more information, see -+ <http://user-mode-linux.sourceforge.net/networking.html> That site -+ has examples of the UML command line to use to enable this option. -+ -+ If you intend to use UML as a network monitor for the host, say -+ Y here. Otherwise, say N. -+ -+Default main console channel initialization -+CONFIG_CON_ZERO_CHAN -+ This is the string describing the channel to which the main console -+ will be attached by default. This value can be overridden from the -+ command line. The default value is "fd:0,fd:1", which attaches the -+ main console to stdin and stdout. -+ It is safe to leave this unchanged. -+ -+Default console channel initialization -+CONFIG_CON_CHAN -+ This is the string describing the channel to which all consoles -+ except the main console will be attached by default. This value can -+ be overridden from the command line. The default value is "xterm", -+ which brings them up in xterms. -+ It is safe to leave this unchanged, although you may wish to change -+ this if you expect the UML that you build to be run in environments -+ which don't have X or xterm available. -+ -+Default serial line channel initialization -+CONFIG_SSL_CHAN -+ This is the string describing the channel to which the serial lines -+ will be attached by default. This value can be overridden from the -+ command line. The default value is "pty", which attaches them to -+ traditional pseudo-terminals. -+ It is safe to leave this unchanged, although you may wish to change -+ this if you expect the UML that you build to be run in environments -+ which don't have a set of /dev/pty* devices. -+ -+Nesting level -+CONFIG_NEST_LEVEL -+ This is set to the number of layers of UMLs that this UML will be run -+ in. Normally, this is zero, meaning that it will run directly on the -+ host. Setting it to one will build a UML that can run inside a UML -+ that is running on the host. Generally, if you intend this UML to run -+ inside another UML, set CONFIG_NEST_LEVEL to one more than the host UML. -+ Note that if the hosting UML has its CONFIG_KERNEL_HALF_GIGS set to -+ greater than one, then the guest UML should have its CONFIG_NEST_LEVEL -+ set to the host's CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS. -+ Only change this if you are running nested UMLs. -+ -+Kernel address space size (in .5G units) -+CONFIG_KERNEL_HALF_GIGS -+ This determines the amount of address space that UML will allocate for -+ its own, measured in half Gigabyte units. The default is 1. -+ Change this only if you need to boot UML with an unusually large amount -+ of physical memory. -+ -+UML sound support -+CONFIG_UML_SOUND -+ This option enables UML sound support. If enabled, it will pull in -+ soundcore and the UML hostaudio relay, which acts as a intermediary -+ between the host's dsp and mixer devices and the UML sound system. -+ It is safe to say 'Y' here. -+ -+UML SMP support -+CONFIG_UML_SMP -+ This option enables UML SMP support. UML implements virtual SMP by -+ allowing as many processes to run simultaneously on the host as -+ there are virtual processors configured. Obviously, if the host is -+ a uniprocessor, those processes will timeshare, but, inside UML, -+ will appear to be running simultaneously. If the host is a -+ multiprocessor, then UML processes may run simultaneously, depending -+ on the host scheduler. -+ CONFIG_SMP will be set to whatever this option is set to. -+ It is safe to leave this unchanged. -+ -+file descriptor channel support -+CONFIG_FD_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to already set up file descriptors. Generally, the main -+ console is attached to file descriptors 0 and 1 (stdin and stdout), -+ so it would be wise to leave this enabled unless you intend to -+ attach it to some other host device. -+ -+null device channel support -+CONFIG_NULL_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to a device similar to /dev/null. Data written to it disappears -+ and there is never any data to be read. -+ -+port channel support -+CONFIG_PORT_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host portals. They may be accessed with 'telnet <host> -+ <port number>'. Any number of consoles and serial lines may be -+ attached to a single portal, although what UML device you get when -+ you telnet to that portal will be unpredictable. -+ It is safe to say 'Y' here. -+ -+pty channel support -+CONFIG_PTY_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host pseudo-terminals. Access to both traditional -+ pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled -+ with this option. The assignment of UML devices to host devices -+ will be announced in the kernel message log. -+ It is safe to say 'Y' here. -+ -+tty channel support -+CONFIG_TTY_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host terminals. Access to both virtual consoles -+ (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and -+ /dev/pts/*) are controlled by this option. -+ It is safe to say 'Y' here. -+ -+xterm channel support -+CONFIG_XTERM_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to xterms. Each UML device so assigned will be brought up in -+ its own xterm. -+ If you disable this option, then CONFIG_PT_PROXY will be disabled as -+ well, since UML's gdb currently requires an xterm. -+ It is safe to say 'Y' here. -+ -+tty logging -+CONFIG_TTY_LOG -+ This option enables logging of all data going through pseudo-terminals -+ to the host. This is primarily useful for honeypots, where you want -+ secure keystroke logging that can't be detected or disabled by root. -+ Say 'N' unless you are setting up a UML honeypot or otherwise know that -+ you want this option. -+ -+UML real-time clock support -+CONFIG_UML_REAL_TIME_CLOCK -+ This option ties the UML clock to the host clock, so that time passes at -+ the same rate as on the host, regardless of how much CPU time the UML is -+ getting. This should normally be enabled. The exception would be if you're -+ debugging UML. In this case, time spent staring at the debugger with UML -+ stopped will cause lots of timer ticks to be backed up, and UML will spent -+ lots of time calling the timer when it is finally continued. -+ - Microtek USB scanner support - CONFIG_USB_MICROTEK - Say Y here if you want support for the Microtek X6USB and -Index: linux-2.4.29/drivers/char/Makefile -=================================================================== ---- linux-2.4.29.orig/drivers/char/Makefile 2005-05-03 21:09:35.000000000 +0300 -+++ linux-2.4.29/drivers/char/Makefile 2005-05-03 22:28:14.000000000 +0300 -@@ -114,6 +114,12 @@ - endif - endif - -+ifeq ($(ARCH),um) -+ KEYMAP = -+ KEYBD = -+ CONSOLE = -+endif -+ - ifeq ($(ARCH),sh) - KEYMAP = - KEYBD = -Index: linux-2.4.29/drivers/char/mem.c -=================================================================== ---- linux-2.4.29.orig/drivers/char/mem.c 2005-05-03 21:07:25.000000000 +0300 -+++ linux-2.4.29/drivers/char/mem.c 2005-05-03 22:28:14.000000000 +0300 -@@ -220,7 +220,7 @@ - ssize_t read = 0; - ssize_t virtr = 0; - char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ -- -+ - if (p < (unsigned long) high_memory) { - read = count; - if (count > (unsigned long) high_memory - p) -@@ -292,6 +292,8 @@ - wrote = (unsigned long) high_memory - p; - - wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos); -+ if(wrote < 0) -+ return(wrote); - - p += wrote; - buf += wrote; -@@ -664,6 +666,8 @@ - write: write_full, - }; - -+extern struct file_operations anon_file_operations; -+ - static int memory_open(struct inode * inode, struct file * filp) - { - switch (MINOR(inode->i_rdev)) { -@@ -693,6 +697,9 @@ - case 9: - filp->f_op = &urandom_fops; - break; -+ case 10: -+ filp->f_op = &anon_file_operations; -+ break; - default: - return -ENXIO; - } -@@ -719,7 +726,8 @@ - {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, - {7, "full", S_IRUGO | S_IWUGO, &full_fops}, - {8, "random", S_IRUGO | S_IWUSR, &random_fops}, -- {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops} -+ {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops}, -+ {10, "anon", S_IRUGO | S_IWUSR, &anon_file_operations}, - }; - int i; - -Index: linux-2.4.29/drivers/char/n_tty.c -=================================================================== ---- linux-2.4.29.orig/drivers/char/n_tty.c 2005-05-03 21:05:08.000000000 +0300 -+++ linux-2.4.29/drivers/char/n_tty.c 2005-05-03 22:28:14.000000000 +0300 -@@ -25,9 +25,11 @@ - * who actually finally proved there really was a race. - * - * 2002/03/18 Implemented n_tty_wakeup to send SIGIO POLL_OUTs to -- * waiting writing processes-Sapan Bhatia <sapan@corewars.org>. -- * Also fixed a bug in BLOCKING mode where write_chan returns -- * EAGAIN -+ * waiting writing processes-Sapan Bhatia <sapan@corewars.org> -+ * -+ * 2002/03/19 Fixed write_chan to stay put if console driver returns -+ * EAGAIN and not return since it returns an EAGAIN in a -+ * non-blocking operation-Sapan Bhatia <sapan@corewars.org> - */ - - #include <linux/types.h> -@@ -1393,9 +1395,9 @@ - if (O_OPOST(tty) && !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) { - while (nr > 0) { - ssize_t num = opost_block(tty, b, nr); -- if (num < 0) { -- if (num == -EAGAIN) -- break; -+ if (num < 0){ -+ if(num == -EAGAIN) -+ break; - retval = num; - goto break_out; - } -Index: linux-2.4.29/drivers/char/tty_io.c -=================================================================== ---- linux-2.4.29.orig/drivers/char/tty_io.c 2005-05-03 21:07:50.000000000 +0300 -+++ linux-2.4.29/drivers/char/tty_io.c 2005-05-03 22:28:14.000000000 +0300 -@@ -967,6 +967,23 @@ - tty_wakeup(tty); - } - -+#ifdef CONFIG_TTY_LOG -+ -+int (*open_log)(void *, void *) = NULL; -+int (*write_log)(int, const char *, int, void *, int) = NULL; -+void (*close_log)(int, void *) = NULL; -+ -+void register_tty_logger(int (*opener)(void *, void *), -+ int (*writer)(int, const char *, int, void *, int), -+ void (*closer)(int, void *)) -+{ -+ open_log = opener; -+ write_log = writer; -+ close_log = closer; -+} -+ -+#endif -+ - static ssize_t tty_read(struct file * file, char * buf, size_t count, - loff_t *ppos) - { -@@ -1012,8 +1029,13 @@ - i = -EIO; - tty_ldisc_deref(ld); - unlock_kernel(); -- if (i > 0) -+ if (i > 0){ - inode->i_atime = CURRENT_TIME; -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd >= 0) && (write_log != NULL)) -+ (*write_log)(tty->log_fd, buf, i, tty, 1); -+#endif -+ } - return i; - } - -@@ -1067,6 +1089,10 @@ - if (written) { - file->f_dentry->d_inode->i_mtime = CURRENT_TIME; - ret = written; -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd >= 0) && (write_log != NULL)) -+ (*write_log)(tty->log_fd, buf - ret, ret, tty, 0); -+#endif - } - up(&tty->atomic_write); - return ret; -@@ -1662,6 +1688,11 @@ - tty_set_termios_ldisc(o_tty,N_TTY); - } - -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd >= 0) && (close_log != NULL)) -+ (*close_log)(tty->log_fd, tty); -+#endif -+ - /* - * The release_mem function takes care of the details of clearing - * the slots and preserving the termios structure. -@@ -1820,6 +1851,11 @@ - nr_warns++; - } - } -+ -+#ifdef CONFIG_TTY_LOG -+ if((tty->log_fd < 0) && (open_log != NULL)) -+ tty->log_fd = (*open_log)(tty, current->tty); -+#endif - return 0; - } - -@@ -2467,6 +2503,9 @@ - spin_lock_init(&tty->read_lock); - INIT_LIST_HEAD(&tty->tty_files); - INIT_TQUEUE(&tty->SAK_tq, 0, 0); -+#ifdef CONFIG_TTY_LOG -+ tty->log_fd = -1; -+#endif - } - - /* -Index: linux-2.4.29/drivers/net/setup.c -=================================================================== ---- linux-2.4.29.orig/drivers/net/setup.c 2005-05-03 21:09:27.000000000 +0300 -+++ linux-2.4.29/drivers/net/setup.c 2005-05-03 22:28:14.000000000 +0300 -@@ -28,7 +28,6 @@ - extern int lmc_setup(void); - - extern int madgemc_probe(void); --extern int uml_net_probe(void); - - /* Pad device name to IFNAMSIZ=16. F.e. __PAD6 is string of 9 zeros. */ - #define __PAD6 "\0\0\0\0\0\0\0\0\0" -@@ -103,9 +102,6 @@ - #ifdef CONFIG_MADGEMC - {madgemc_probe, 0}, - #endif --#ifdef CONFIG_UML_NET -- {uml_net_probe, 0}, --#endif - - {NULL, 0}, - }; -Index: linux-2.4.29/fs/bad_inode.c -=================================================================== ---- linux-2.4.29.orig/fs/bad_inode.c 2005-05-03 21:05:44.000000000 +0300 -+++ linux-2.4.29/fs/bad_inode.c 2005-05-03 22:28:14.000000000 +0300 -@@ -83,6 +83,7 @@ - - void make_bad_inode(struct inode * inode) - { -+ inode->i_state = 0; - inode->i_mode = S_IFREG; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &bad_inode_ops; -Index: linux-2.4.29/include/asm-i386/hardirq.h -=================================================================== ---- linux-2.4.29.orig/include/asm-i386/hardirq.h 2005-05-03 21:08:39.000000000 +0300 -+++ linux-2.4.29/include/asm-i386/hardirq.h 2005-05-03 23:41:09.908242720 +0300 -@@ -4,6 +4,7 @@ - #include <linux/config.h> - #include <linux/threads.h> - #include <linux/irq.h> -+#include <asm/processor.h> /* for cpu_relax */ - - /* assembly code in softirq.h is sensitive to the offsets of these fields */ - typedef struct { -Index: linux-2.4.29/include/asm-um/a.out.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/a.out.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/a.out.h 2005-05-03 22:28:14.909343648 +0300 -@@ -0,0 +1,20 @@ -+#ifndef __UM_A_OUT_H -+#define __UM_A_OUT_H -+ -+#include "linux/config.h" -+#include "asm/arch/a.out.h" -+#include "choose-mode.h" -+ -+#undef STACK_TOP -+ -+extern unsigned long stacksizelim; -+ -+extern unsigned long host_task_size; -+ -+#define STACK_ROOM (stacksizelim) -+ -+extern int honeypot; -+#define STACK_TOP \ -+ CHOOSE_MODE((honeypot ? host_task_size : task_size), task_size) -+ -+#endif -Index: linux-2.4.29/include/asm-um/archparam-i386.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/archparam-i386.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/archparam-i386.h 2005-05-03 22:28:14.911343344 +0300 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_ARCHPARAM_I386_H -+#define __UM_ARCHPARAM_I386_H -+ -+/********* Bits for asm-um/elf.h ************/ -+ -+#include "user.h" -+ -+#define ELF_PLATFORM "i586" -+ -+#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) -+ -+typedef struct user_i387_struct elf_fpregset_t; -+typedef unsigned long elf_greg_t; -+ -+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) -+typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -+ -+#define ELF_DATA ELFDATA2LSB -+#define ELF_ARCH EM_386 -+ -+#define ELF_PLAT_INIT(regs, load_addr) do { \ -+ PT_REGS_EBX(regs) = 0; \ -+ PT_REGS_ECX(regs) = 0; \ -+ PT_REGS_EDX(regs) = 0; \ -+ PT_REGS_ESI(regs) = 0; \ -+ PT_REGS_EDI(regs) = 0; \ -+ PT_REGS_EBP(regs) = 0; \ -+ PT_REGS_EAX(regs) = 0; \ -+} while(0) -+ -+/* Shamelessly stolen from include/asm-i386/elf.h */ -+ -+#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ -+ pr_reg[0] = PT_REGS_EBX(regs); \ -+ pr_reg[1] = PT_REGS_ECX(regs); \ -+ pr_reg[2] = PT_REGS_EDX(regs); \ -+ pr_reg[3] = PT_REGS_ESI(regs); \ -+ pr_reg[4] = PT_REGS_EDI(regs); \ -+ pr_reg[5] = PT_REGS_EBP(regs); \ -+ pr_reg[6] = PT_REGS_EAX(regs); \ -+ pr_reg[7] = PT_REGS_DS(regs); \ -+ pr_reg[8] = PT_REGS_ES(regs); \ -+ /* fake once used fs and gs selectors? */ \ -+ pr_reg[9] = PT_REGS_DS(regs); \ -+ pr_reg[10] = PT_REGS_DS(regs); \ -+ pr_reg[11] = PT_REGS_SYSCALL_NR(regs); \ -+ pr_reg[12] = PT_REGS_IP(regs); \ -+ pr_reg[13] = PT_REGS_CS(regs); \ -+ pr_reg[14] = PT_REGS_EFLAGS(regs); \ -+ pr_reg[15] = PT_REGS_SP(regs); \ -+ pr_reg[16] = PT_REGS_SS(regs); \ -+} while(0); -+ -+/********* Bits for asm-um/delay.h **********/ -+ -+typedef unsigned long um_udelay_t; -+ -+/********* Nothing for asm-um/hardirq.h **********/ -+ -+/********* Nothing for asm-um/hw_irq.h **********/ -+ -+/********* Nothing for asm-um/string.h **********/ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/archparam-ppc.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/archparam-ppc.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/archparam-ppc.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,41 @@ -+#ifndef __UM_ARCHPARAM_PPC_H -+#define __UM_ARCHPARAM_PPC_H -+ -+/********* Bits for asm-um/elf.h ************/ -+ -+#define ELF_PLATFORM (0) -+ -+#define ELF_ET_DYN_BASE (0x08000000) -+ -+/* the following stolen from asm-ppc/elf.h */ -+#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */ -+#define ELF_NFPREG 33 /* includes fpscr */ -+/* General registers */ -+typedef unsigned long elf_greg_t; -+typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -+ -+/* Floating point registers */ -+typedef double elf_fpreg_t; -+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; -+ -+#define ELF_DATA ELFDATA2MSB -+#define ELF_ARCH EM_PPC -+ -+/********* Bits for asm-um/delay.h **********/ -+ -+typedef unsigned int um_udelay_t; -+ -+/********* Bits for asm-um/hw_irq.h **********/ -+ -+struct hw_interrupt_type; -+ -+/********* Bits for asm-um/hardirq.h **********/ -+ -+#define irq_enter(cpu, irq) hardirq_enter(cpu) -+#define irq_exit(cpu, irq) hardirq_exit(cpu) -+ -+/********* Bits for asm-um/string.h **********/ -+ -+#define __HAVE_ARCH_STRRCHR -+ -+#endif -Index: linux-2.4.29/include/asm-um/arch-signal-i386.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/arch-signal-i386.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/arch-signal-i386.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_ARCH_SIGNAL_I386_H -+#define __UM_ARCH_SIGNAL_I386_H -+ -+struct arch_signal_context { -+ unsigned long extrasigs[_NSIG_WORDS]; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/atomic.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/atomic.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/atomic.h 2005-05-03 23:41:08.099517688 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_ATOMIC_H -+#define __UM_ATOMIC_H -+ -+#include "asm/arch/atomic.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/bitops.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/bitops.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/bitops.h 2005-05-03 23:41:08.093518600 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BITOPS_H -+#define __UM_BITOPS_H -+ -+#include "asm/arch/bitops.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/boot.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/boot.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/boot.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BOOT_H -+#define __UM_BOOT_H -+ -+#include "asm/arch/boot.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/bugs.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/bugs.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/bugs.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BUGS_H -+#define __UM_BUGS_H -+ -+void check_bugs(void); -+ -+#endif -Index: linux-2.4.29/include/asm-um/byteorder.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/byteorder.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/byteorder.h 2005-05-03 22:37:45.347623848 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BYTEORDER_H -+#define __UM_BYTEORDER_H -+ -+#include "asm/arch/byteorder.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/cache.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/cache.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/cache.h 2005-05-03 22:28:14.917342432 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_CACHE_H -+#define __UM_CACHE_H -+ -+#define L1_CACHE_BYTES 32 -+ -+#endif -Index: linux-2.4.29/include/asm-um/checksum.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/checksum.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/checksum.h 2005-05-03 22:28:14.917342432 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_CHECKSUM_H -+#define __UM_CHECKSUM_H -+ -+#include "sysdep/checksum.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/cobalt.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/cobalt.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/cobalt.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_COBALT_H -+#define __UM_COBALT_H -+ -+#include "asm/arch/cobalt.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/current.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/current.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/current.h 2005-05-03 23:41:08.083520120 +0300 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_CURRENT_H -+#define __UM_CURRENT_H -+ -+#ifndef __ASSEMBLY__ -+ -+#include "linux/config.h" -+#include "asm/page.h" -+ -+struct task_struct; -+ -+#define CURRENT_TASK(dummy) (((unsigned long) &dummy) & \ -+ (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER)) -+ -+#define current ({ int dummy; (struct task_struct *) CURRENT_TASK(dummy); }) -+ -+#endif /* __ASSEMBLY__ */ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/delay.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/delay.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/delay.h 2005-05-03 22:28:14.919342128 +0300 -@@ -0,0 +1,7 @@ -+#ifndef __UM_DELAY_H -+#define __UM_DELAY_H -+ -+#include "asm/arch/delay.h" -+#include "asm/archparam.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/desc.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/desc.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/desc.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_DESC_H -+#define __UM_DESC_H -+ -+#include "asm/arch/desc.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/div64.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/div64.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/div64.h 2005-05-03 22:28:14.941338784 +0300 -@@ -0,0 +1,6 @@ -+#ifndef _UM_DIV64_H -+#define _UM_DIV64_H -+ -+#include "asm/arch/div64.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/dma.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/dma.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/dma.h 2005-05-03 22:37:48.244183504 +0300 -@@ -0,0 +1,10 @@ -+#ifndef __UM_DMA_H -+#define __UM_DMA_H -+ -+#include "asm/io.h" -+ -+extern unsigned long uml_physmem; -+ -+#define MAX_DMA_ADDRESS (uml_physmem) -+ -+#endif -Index: linux-2.4.29/include/asm-um/elf.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/elf.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/elf.h 2005-05-03 22:28:14.943338480 +0300 -@@ -0,0 +1,18 @@ -+#ifndef __UM_ELF_H -+#define __UM_ELF_H -+ -+#include "asm/archparam.h" -+ -+#define ELF_HWCAP (0) -+ -+#define SET_PERSONALITY(ex, ibcs2) do ; while(0) -+ -+#define ELF_EXEC_PAGESIZE 4096 -+ -+#define elf_check_arch(x) (1) -+ -+#define ELF_CLASS ELFCLASS32 -+ -+#define USE_ELF_CORE_DUMP -+ -+#endif -Index: linux-2.4.29/include/asm-um/errno.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/errno.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/errno.h 2005-05-03 22:28:14.944338328 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_ERRNO_H -+#define __UM_ERRNO_H -+ -+#include "asm/arch/errno.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/fcntl.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/fcntl.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/fcntl.h 2005-05-03 22:28:14.945338176 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_FCNTL_H -+#define __UM_FCNTL_H -+ -+#include "asm/arch/fcntl.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/fixmap.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/fixmap.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/fixmap.h 2005-05-03 23:41:11.208045120 +0300 -@@ -0,0 +1,89 @@ -+#ifndef __UM_FIXMAP_H -+#define __UM_FIXMAP_H -+ -+#include <linux/config.h> -+#include <asm/kmap_types.h> -+ -+/* -+ * Here we define all the compile-time 'special' virtual -+ * addresses. The point is to have a constant address at -+ * compile time, but to set the physical address only -+ * in the boot process. We allocate these special addresses -+ * from the end of virtual memory (0xfffff000) backwards. -+ * Also this lets us do fail-safe vmalloc(), we -+ * can guarantee that these special addresses and -+ * vmalloc()-ed addresses never overlap. -+ * -+ * these 'compile-time allocated' memory buffers are -+ * fixed-size 4k pages. (or larger if used with an increment -+ * highger than 1) use fixmap_set(idx,phys) to associate -+ * physical memory with fixmap indices. -+ * -+ * TLB entries of such buffers will not be flushed across -+ * task switches. -+ */ -+ -+/* -+ * on UP currently we will have no trace of the fixmap mechanizm, -+ * no page table allocations, etc. This might change in the -+ * future, say framebuffers for the console driver(s) could be -+ * fix-mapped? -+ */ -+enum fixed_addresses { -+#ifdef CONFIG_HIGHMEM -+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ -+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -+#endif -+ __end_of_fixed_addresses -+}; -+ -+extern void __set_fixmap (enum fixed_addresses idx, -+ unsigned long phys, pgprot_t flags); -+ -+#define set_fixmap(idx, phys) \ -+ __set_fixmap(idx, phys, PAGE_KERNEL) -+/* -+ * Some hardware wants to get fixmapped without caching. -+ */ -+#define set_fixmap_nocache(idx, phys) \ -+ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) -+/* -+ * used by vmalloc.c. -+ * -+ * Leave one empty page between vmalloc'ed areas and -+ * the start of the fixmap, and leave one page empty -+ * at the top of mem.. -+ */ -+extern unsigned long get_kmem_end(void); -+ -+#define FIXADDR_TOP (get_kmem_end() - 0x2000) -+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -+ -+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -+ -+extern void __this_fixmap_does_not_exist(void); -+ -+/* -+ * 'index to address' translation. If anyone tries to use the idx -+ * directly without tranlation, we catch the bug with a NULL-deference -+ * kernel oops. Illegal ranges of incoming indices are caught too. -+ */ -+static inline unsigned long fix_to_virt(const unsigned int idx) -+{ -+ /* -+ * this branch gets completely eliminated after inlining, -+ * except when someone tries to use fixaddr indices in an -+ * illegal way. (such as mixing up address types or using -+ * out-of-range indices). -+ * -+ * If it doesn't get removed, the linker will complain -+ * loudly with a reasonably clear error message.. -+ */ -+ if (idx >= __end_of_fixed_addresses) -+ __this_fixmap_does_not_exist(); -+ -+ return __fix_to_virt(idx); -+} -+ -+#endif -Index: linux-2.4.29/include/asm-um/floppy.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/floppy.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/floppy.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_FLOPPY_H -+#define __UM_FLOPPY_H -+ -+#include "asm/arch/floppy.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/hardirq.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/hardirq.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/hardirq.h 2005-05-03 23:41:09.909242568 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_HARDIRQ_H -+#define __UM_HARDIRQ_H -+ -+#include "asm/arch/hardirq.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/hdreg.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/hdreg.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/hdreg.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_HDREG_H -+#define __UM_HDREG_H -+ -+#include "asm/arch/hdreg.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/highmem.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/highmem.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/highmem.h 2005-05-03 23:41:11.210044816 +0300 -@@ -0,0 +1,12 @@ -+#ifndef __UM_HIGHMEM_H -+#define __UM_HIGHMEM_H -+ -+#include "asm/page.h" -+#include "asm/fixmap.h" -+#include "asm/arch/highmem.h" -+ -+#undef PKMAP_BASE -+ -+#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) -+ -+#endif -Index: linux-2.4.29/include/asm-um/hw_irq.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/hw_irq.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/hw_irq.h 2005-05-03 22:37:48.105204632 +0300 -@@ -0,0 +1,10 @@ -+#ifndef _ASM_UM_HW_IRQ_H -+#define _ASM_UM_HW_IRQ_H -+ -+#include "asm/irq.h" -+#include "asm/archparam.h" -+ -+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) -+{} -+ -+#endif -Index: linux-2.4.29/include/asm-um/ide.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ide.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ide.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IDE_H -+#define __UM_IDE_H -+ -+#include "asm/arch/ide.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/init.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/init.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/init.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,11 @@ -+#ifndef _UM_INIT_H -+#define _UM_INIT_H -+ -+#ifdef notdef -+#define __init -+#define __initdata -+#define __initfunc(__arginit) __arginit -+#define __cacheline_aligned -+#endif -+ -+#endif -Index: linux-2.4.29/include/asm-um/ioctl.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ioctl.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ioctl.h 2005-05-03 22:28:14.952337112 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IOCTL_H -+#define __UM_IOCTL_H -+ -+#include "asm/arch/ioctl.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/ioctls.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ioctls.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ioctls.h 2005-05-03 22:37:45.509599224 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IOCTLS_H -+#define __UM_IOCTLS_H -+ -+#include "asm/arch/ioctls.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/io.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/io.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/io.h 2005-05-03 22:37:48.176193840 +0300 -@@ -0,0 +1,25 @@ -+#ifndef __UM_IO_H -+#define __UM_IO_H -+ -+#include "asm/page.h" -+ -+#define IO_SPACE_LIMIT 0xdeadbeef /* Sure hope nothing uses this */ -+ -+static inline int inb(unsigned long i) { return(0); } -+static inline void outb(char c, unsigned long i) { } -+ -+/* -+ * Change virtual addresses to physical addresses and vv. -+ * These are pretty trivial -+ */ -+static inline unsigned long virt_to_phys(volatile void * address) -+{ -+ return __pa((void *) address); -+} -+ -+static inline void * phys_to_virt(unsigned long address) -+{ -+ return __va(address); -+} -+ -+#endif -Index: linux-2.4.29/include/asm-um/ipcbuf.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ipcbuf.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ipcbuf.h 2005-05-03 22:28:14.954336808 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IPCBUF_H -+#define __UM_IPCBUF_H -+ -+#include "asm/arch/ipcbuf.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/ipc.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ipc.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ipc.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IPC_H -+#define __UM_IPC_H -+ -+#include "asm/arch/ipc.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/irq.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/irq.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/irq.h 2005-05-03 22:28:14.956336504 +0300 -@@ -0,0 +1,23 @@ -+#ifndef __UM_IRQ_H -+#define __UM_IRQ_H -+ -+#define TIMER_IRQ 0 -+#define UMN_IRQ 1 -+#define CONSOLE_IRQ 2 -+#define CONSOLE_WRITE_IRQ 3 -+#define UBD_IRQ 4 -+#define UM_ETH_IRQ 5 -+#define SSL_IRQ 6 -+#define SSL_WRITE_IRQ 7 -+#define ACCEPT_IRQ 8 -+#define MCONSOLE_IRQ 9 -+#define WINCH_IRQ 10 -+#define SIGIO_WRITE_IRQ 11 -+#define TELNETD_IRQ 12 -+#define XTERM_IRQ 13 -+#define HUMFS_IRQ 14 -+ -+#define LAST_IRQ HUMFS_IRQ -+#define NR_IRQS (LAST_IRQ + 1) -+ -+#endif -Index: linux-2.4.29/include/asm-um/keyboard.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/keyboard.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/keyboard.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_KEYBOARD_H -+#define __UM_KEYBOARD_H -+ -+#include "asm/arch/keyboard.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/kmap_types.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/kmap_types.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/kmap_types.h 2005-05-03 22:28:14.957336352 +0300 -@@ -0,0 +1,11 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_KMAP_TYPES_H -+#define __UM_KMAP_TYPES_H -+ -+#include "asm/arch/kmap_types.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/linux_logo.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/linux_logo.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/linux_logo.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_LINUX_LOGO_H -+#define __UM_LINUX_LOGO_H -+ -+#include "asm/arch/linux_logo.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/locks.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/locks.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/locks.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_LOCKS_H -+#define __UM_LOCKS_H -+ -+#include "asm/arch/locks.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/mca_dma.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/mca_dma.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/mca_dma.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef mca___UM_DMA_H -+#define mca___UM_DMA_H -+ -+#include "asm/arch/mca_dma.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/mman.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/mman.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/mman.h 2005-05-03 22:28:14.961335744 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MMAN_H -+#define __UM_MMAN_H -+ -+#include "asm/arch/mman.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/mmu_context.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/mmu_context.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/mmu_context.h 2005-05-03 23:41:09.000000000 +0300 -@@ -0,0 +1,72 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_MMU_CONTEXT_H -+#define __UM_MMU_CONTEXT_H -+ -+#include "linux/sched.h" -+#include "choose-mode.h" -+ -+#define get_mmu_context(task) do ; while(0) -+#define activate_context(tsk) do ; while(0) -+ -+static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) -+{ -+} -+ -+extern void switch_mm_skas(int mm_fd); -+ -+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, -+ struct task_struct *tsk, unsigned cpu) -+{ -+ if(prev != next){ -+ clear_bit(cpu, &prev->cpu_vm_mask); -+ set_bit(cpu, &next->cpu_vm_mask); -+ if(next != &init_mm) -+ CHOOSE_MODE((void) 0, -+ switch_mm_skas(next->context.skas.mm_fd)); -+ } -+} -+ -+static inline void enter_lazy_tlb(struct mm_struct *mm, -+ struct task_struct *tsk, unsigned cpu) -+{ -+} -+ -+extern int init_new_context_skas(struct task_struct *task, -+ struct mm_struct *mm); -+ -+static inline int init_new_context_tt(struct task_struct *task, -+ struct mm_struct *mm) -+{ -+ return(0); -+} -+ -+static inline int init_new_context(struct task_struct *task, -+ struct mm_struct *mm) -+{ -+ return(CHOOSE_MODE_PROC(init_new_context_tt, init_new_context_skas, -+ task, mm)); -+} -+ -+extern void destroy_context_skas(struct mm_struct *mm); -+ -+static inline void destroy_context(struct mm_struct *mm) -+{ -+ CHOOSE_MODE((void) 0, destroy_context_skas(mm)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/mmu.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/mmu.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/mmu.h 2005-05-03 22:28:14.962335592 +0300 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MMU_H -+#define __MMU_H -+ -+#include "um_mmu.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/module.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/module.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/module.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MODULE_H -+#define __UM_MODULE_H -+ -+#include "asm/arch/module.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/msgbuf.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/msgbuf.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/msgbuf.h 2005-05-03 22:28:14.964335288 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MSGBUF_H -+#define __UM_MSGBUF_H -+ -+#include "asm/arch/msgbuf.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/mtrr.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/mtrr.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/mtrr.h 2005-05-03 22:37:48.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MTRR_H -+#define __UM_MTRR_H -+ -+#include "asm/arch/mtrr.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/namei.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/namei.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/namei.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_NAMEI_H -+#define __UM_NAMEI_H -+ -+#include "asm/arch/namei.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/page.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/page.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/page.h 2005-05-03 22:37:45.335625672 +0300 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PAGE_H -+#define __UM_PAGE_H -+ -+struct page; -+ -+#include "asm/arch/page.h" -+ -+#undef BUG -+#undef PAGE_BUG -+#undef __pa -+#undef __va -+#undef virt_to_page -+#undef VALID_PAGE -+#undef PAGE_OFFSET -+#undef KERNELBASE -+ -+extern unsigned long uml_physmem; -+ -+#define PAGE_OFFSET (uml_physmem) -+#define KERNELBASE PAGE_OFFSET -+ -+#ifndef __ASSEMBLY__ -+ -+extern void stop(void); -+ -+#define BUG() do { \ -+ panic("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ -+} while (0) -+ -+#define PAGE_BUG(page) do { \ -+ BUG(); \ -+} while (0) -+ -+#endif /* __ASSEMBLY__ */ -+ -+#define __va_space (8*1024*1024) -+ -+extern unsigned long to_phys(void *virt); -+extern void *to_virt(unsigned long phys); -+ -+#define __pa(virt) to_phys((void *) virt) -+#define __va(phys) to_virt((unsigned long) phys) -+ -+#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) -+ -+extern struct page *arch_validate(struct page *page, int mask, int order); -+#define HAVE_ARCH_VALIDATE -+ -+extern void arch_free_page(struct page *page, int order); -+#define HAVE_ARCH_FREE_PAGE -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/page_offset.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/page_offset.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/page_offset.h 2005-05-03 22:28:14.967334832 +0300 -@@ -0,0 +1 @@ -+#define PAGE_OFFSET_RAW (uml_physmem) -Index: linux-2.4.29/include/asm-um/param.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/param.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/param.h 2005-05-03 22:28:14.968334680 +0300 -@@ -0,0 +1,22 @@ -+#ifndef _UM_PARAM_H -+#define _UM_PARAM_H -+ -+#define HZ 100 -+ -+#define EXEC_PAGESIZE 4096 -+ -+#ifndef NGROUPS -+#define NGROUPS 32 -+#endif -+ -+#ifndef NOGROUP -+#define NOGROUP (-1) -+#endif -+ -+#define MAXHOSTNAMELEN 64 /* max length of hostname */ -+ -+#ifdef __KERNEL__ -+# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ -+#endif -+ -+#endif -Index: linux-2.4.29/include/asm-um/pci.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/pci.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/pci.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_PCI_H -+#define __UM_PCI_H -+ -+#define PCI_DMA_BUS_IS_PHYS (1) -+ -+#endif -Index: linux-2.4.29/include/asm-um/pgalloc.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/pgalloc.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/pgalloc.h 2005-05-03 23:41:11.209044968 +0300 -@@ -0,0 +1,164 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PGALLOC_H -+#define __UM_PGALLOC_H -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "asm/fixmap.h" -+#include "choose-mode.h" -+ -+#define pgd_quicklist (current_cpu_data.pgd_quick) -+#define pmd_quicklist (current_cpu_data.pmd_quick) -+#define pte_quicklist (current_cpu_data.pte_quick) -+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -+ -+#define pmd_populate(mm, pmd, pte) set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) -+ -+/* -+ * Allocate and free page tables. -+ */ -+ -+static inline pgd_t *get_pgd_slow_tt(void) -+{ -+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); -+ -+ if (pgd) { -+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); -+ memcpy(pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); -+ } -+ return pgd; -+} -+ -+static inline pgd_t *get_pgd_slow_skas(void) -+{ -+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); -+ -+ if (pgd) -+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); -+ return pgd; -+} -+ -+static inline pgd_t *get_pgd_slow(void) -+{ -+ return(CHOOSE_MODE(get_pgd_slow_tt(), get_pgd_slow_skas())); -+} -+ -+static inline pgd_t *get_pgd_fast(void) -+{ -+ unsigned long *ret; -+ -+ ret = pgd_quicklist; -+ if (ret != NULL) { -+ pgd_quicklist = (unsigned long *)(*ret); -+ ret[0] = 0; -+ pgtable_cache_size--; -+ } else -+ ret = (unsigned long *)get_pgd_slow(); -+ return (pgd_t *)ret; -+} -+ -+static inline void free_pgd_fast(pgd_t *pgd) -+{ -+ *(unsigned long *)pgd = (unsigned long) pgd_quicklist; -+ pgd_quicklist = (unsigned long *) pgd; -+ pgtable_cache_size++; -+} -+ -+static inline void free_pgd_slow(pgd_t *pgd) -+{ -+ free_page((unsigned long)pgd); -+} -+ -+static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) -+{ -+ pte_t *pte; -+ -+ pte = (pte_t *) __get_free_page(GFP_KERNEL); -+ if (pte) -+ clear_page(pte); -+ return pte; -+} -+ -+static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) -+{ -+ unsigned long *ret; -+ -+ ret = (unsigned long *)pte_quicklist; -+ if (ret != NULL) { -+ pte_quicklist = (unsigned long *)(*ret); -+ ret[0] = ret[1]; -+ pgtable_cache_size--; -+ } -+ return (pte_t *)ret; -+} -+ -+static inline void pte_free_fast(pte_t *pte) -+{ -+ *(unsigned long *)pte = (unsigned long) pte_quicklist; -+ pte_quicklist = (unsigned long *) pte; -+ pgtable_cache_size++; -+} -+ -+static inline void pte_free_slow(pte_t *pte) -+{ -+ free_page((unsigned long)pte); -+} -+ -+#define pte_free(pte) pte_free_fast(pte) -+#define pgd_free(pgd) free_pgd_slow(pgd) -+#define pgd_alloc(mm) get_pgd_fast() -+ -+/* -+ * allocating and freeing a pmd is trivial: the 1-entry pmd is -+ * inside the pgd, so has no extra memory associated with it. -+ */ -+ -+#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -+#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -+#define pmd_free_slow(x) do { } while (0) -+#define pmd_free_fast(x) do { } while (0) -+#define pmd_free(x) do { } while (0) -+#define pgd_populate(mm, pmd, pte) BUG() -+ -+/* -+ * TLB flushing: -+ * -+ * - flush_tlb() flushes the current mm struct TLBs -+ * - flush_tlb_all() flushes all processes TLBs -+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's -+ * - flush_tlb_page(vma, vmaddr) flushes one page -+ * - flush_tlb_kernel_vm() flushes the kernel vm area -+ * - flush_tlb_range(mm, start, end) flushes a range of pages -+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables -+ */ -+ -+extern void flush_tlb_all(void); -+extern void flush_tlb_mm(struct mm_struct *mm); -+extern void flush_tlb_range(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -+extern void flush_tlb_kernel_vm(void); -+ -+static inline void flush_tlb_pgtables(struct mm_struct *mm, -+ unsigned long start, unsigned long end) -+{ -+} -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/pgtable.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/pgtable.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/pgtable.h 2005-05-03 23:41:09.906243024 +0300 -@@ -0,0 +1,413 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Derived from include/asm-i386/pgtable.h -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PGTABLE_H -+#define __UM_PGTABLE_H -+ -+#include "linux/sched.h" -+#include "asm/processor.h" -+#include "asm/page.h" -+ -+extern pgd_t swapper_pg_dir[1024]; -+ -+#define flush_cache_all() do ; while (0) -+#define flush_cache_mm(mm) do ; while (0) -+#define flush_cache_range(vma, start, end) do ; while (0) -+#define flush_cache_page(vma, vmaddr) do ; while (0) -+#define flush_page_to_ram(page) do ; while (0) -+#define flush_dcache_page(page) do ; while (0) -+#define flush_icache_range(from, to) do ; while (0) -+#define flush_icache_page(vma,pg) do ; while (0) -+#define flush_icache_user_range(vma,pg,adr,len) do ; while (0) -+ -+extern void __flush_tlb_one(unsigned long addr); -+ -+extern void pte_free(pte_t *pte); -+ -+extern void pgd_free(pgd_t *pgd); -+ -+extern int do_check_pgt_cache(int, int); -+ -+extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt, -+ pte_t *pte_out); -+ -+/* zero page used for uninitialized stuff */ -+extern unsigned long *empty_zero_page; -+ -+#define pgtable_cache_init() do ; while (0) -+ -+/* PMD_SHIFT determines the size of the area a second-level page table can map */ -+#define PMD_SHIFT 22 -+#define PMD_SIZE (1UL << PMD_SHIFT) -+#define PMD_MASK (~(PMD_SIZE-1)) -+ -+/* PGDIR_SHIFT determines what a third-level page table entry can map */ -+#define PGDIR_SHIFT 22 -+#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -+#define PGDIR_MASK (~(PGDIR_SIZE-1)) -+ -+/* -+ * entries per page directory level: the i386 is two-level, so -+ * we don't really have any PMD directory physically. -+ */ -+#define PTRS_PER_PTE 1024 -+#define PTRS_PER_PMD 1 -+#define PTRS_PER_PGD 1024 -+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -+#define FIRST_USER_PGD_NR 0 -+ -+#define pte_ERROR(e) \ -+ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) -+#define pmd_ERROR(e) \ -+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -+#define pgd_ERROR(e) \ -+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -+ -+/* -+ * pgd entries used up by user/kernel: -+ */ -+ -+#define USER_PGD_PTRS (TASK_SIZE >> PGDIR_SHIFT) -+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) -+ -+#ifndef __ASSEMBLY__ -+/* Just any arbitrary offset to the start of the vmalloc VM area: the -+ * current 8MB value just means that there will be a 8MB "hole" after the -+ * physical memory until the kernel virtual memory starts. That means that -+ * any out-of-bounds memory accesses will hopefully be caught. -+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced -+ * area for the same reason. ;) -+ */ -+ -+extern unsigned long end_iomem; -+ -+#define VMALLOC_OFFSET (__va_space) -+#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -+#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -+ -+#if CONFIG_HIGHMEM -+# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -+#else -+# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) -+#endif -+ -+#define _PAGE_PRESENT 0x001 -+#define _PAGE_NEWPAGE 0x002 -+#define _PAGE_PROTNONE 0x004 /* If not present */ -+#define _PAGE_RW 0x008 -+#define _PAGE_USER 0x010 -+#define _PAGE_ACCESSED 0x020 -+#define _PAGE_DIRTY 0x040 -+#define _PAGE_NEWPROT 0x080 -+ -+#define REGION_MASK 0xf0000000 -+#define REGION_SHIFT 28 -+ -+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) -+ -+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) -+ -+/* -+ * The i386 can't do page protection for execute, and considers that the same are read. -+ * Also, write permissions imply read permissions. This is the closest we can get.. -+ */ -+#define __P000 PAGE_NONE -+#define __P001 PAGE_READONLY -+#define __P010 PAGE_COPY -+#define __P011 PAGE_COPY -+#define __P100 PAGE_READONLY -+#define __P101 PAGE_READONLY -+#define __P110 PAGE_COPY -+#define __P111 PAGE_COPY -+ -+#define __S000 PAGE_NONE -+#define __S001 PAGE_READONLY -+#define __S010 PAGE_SHARED -+#define __S011 PAGE_SHARED -+#define __S100 PAGE_READONLY -+#define __S101 PAGE_READONLY -+#define __S110 PAGE_SHARED -+#define __S111 PAGE_SHARED -+ -+/* -+ * Define this if things work differently on an i386 and an i486: -+ * it will (on an i486) warn about kernel memory accesses that are -+ * done without a 'verify_area(VERIFY_WRITE,..)' -+ */ -+#undef TEST_VERIFY_AREA -+ -+/* page table for 0-4MB for everybody */ -+extern unsigned long pg0[1024]; -+ -+/* -+ * BAD_PAGETABLE is used when we need a bogus page-table, while -+ * BAD_PAGE is used for a bogus page. -+ * -+ * ZERO_PAGE is a global shared page that is always zero: used -+ * for zero-mapped memory areas etc.. -+ */ -+extern pte_t __bad_page(void); -+extern pte_t * __bad_pagetable(void); -+ -+#define BAD_PAGETABLE __bad_pagetable() -+#define BAD_PAGE __bad_page() -+ -+#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) -+ -+/* number of bits that fit into a memory pointer */ -+#define BITS_PER_PTR (8*sizeof(unsigned long)) -+ -+/* to align the pointer to a pointer address */ -+#define PTR_MASK (~(sizeof(void*)-1)) -+ -+/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ -+/* 64-bit machines, beware! SRB. */ -+#define SIZEOF_PTR_LOG2 2 -+ -+/* to find an entry in a page-table */ -+#define PAGE_PTR(address) \ -+((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) -+ -+#define pte_none(x) !(pte_val(x) & ~_PAGE_NEWPAGE) -+#define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) -+ -+#define pte_clear(xp) do { pte_val(*(xp)) = _PAGE_NEWPAGE; } while (0) -+ -+#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) -+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) -+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -+#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) -+ -+#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) -+#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) -+ -+/* -+ * The "pgd_xxx()" functions here are trivial for a folded two-level -+ * setup: the pgd is never bad, and a pmd always exists (as it's folded -+ * into the pgd entry) -+ */ -+static inline int pgd_none(pgd_t pgd) { return 0; } -+static inline int pgd_bad(pgd_t pgd) { return 0; } -+static inline int pgd_present(pgd_t pgd) { return 1; } -+static inline void pgd_clear(pgd_t * pgdp) { } -+ -+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -+ -+#define pte_page(pte) virt_to_page(__va(pte_val(pte))) -+#define pmd_page(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) -+ -+extern struct page *phys_to_page(const unsigned long phys); -+extern struct page *__virt_to_page(const unsigned long virt); -+#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) -+ -+static inline pte_t pte_mknewprot(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_NEWPROT; -+ return(pte); -+} -+ -+static inline pte_t pte_mknewpage(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_NEWPAGE; -+ return(pte); -+} -+ -+static inline void set_pte(pte_t *pteptr, pte_t pteval) -+{ -+ /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so -+ * fix_range knows to unmap it. _PAGE_NEWPROT is specific to -+ * mapped pages. -+ */ -+ *pteptr = pte_mknewpage(pteval); -+ if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); -+} -+ -+/* -+ * (pmds are folded into pgds so this doesnt get actually called, -+ * but the define is needed for a generic inline function.) -+ */ -+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) -+#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -+ -+/* -+ * The following only work if pte_present() is true. -+ * Undefined behaviour if not.. -+ */ -+static inline int pte_read(pte_t pte) -+{ -+ return((pte_val(pte) & _PAGE_USER) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_exec(pte_t pte){ -+ return((pte_val(pte) & _PAGE_USER) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_write(pte_t pte) -+{ -+ return((pte_val(pte) & _PAGE_RW) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } -+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -+static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; } -+static inline int pte_newprot(pte_t pte) -+{ -+ return(pte_present(pte) && (pte_val(pte) & _PAGE_NEWPROT)); -+} -+ -+static inline pte_t pte_rdprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_exprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkclean(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_DIRTY; -+ return(pte); -+} -+ -+static inline pte_t pte_mkold(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_ACCESSED; -+ return(pte); -+} -+ -+static inline pte_t pte_wrprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_RW; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkread(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkexec(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkdirty(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_DIRTY; -+ return(pte); -+} -+ -+static inline pte_t pte_mkyoung(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_ACCESSED; -+ return(pte); -+} -+ -+static inline pte_t pte_mkwrite(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_RW; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkuptodate(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_NEWPAGE; -+ if(pte_present(pte)) pte_val(pte) &= ~_PAGE_NEWPROT; -+ return(pte); -+} -+ -+extern unsigned long page_to_phys(struct page *page); -+ -+/* -+ * Conversion functions: convert a page and protection to a page entry, -+ * and a page entry and page directory to the page they refer to. -+ */ -+ -+extern pte_t mk_pte(struct page *page, pgprot_t pgprot); -+ -+/* This takes a physical page address that is used by the remapping -+ * functions -+ */ -+#define mk_pte_phys(phys, pgprot) \ -+ (pte_mknewpage(mk_pte(virt_to_page(__va(phys)), pgprot))) -+ -+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -+{ -+ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); -+ if(pte_present(pte)) pte = pte_mknewpage(pte_mknewprot(pte)); -+ return pte; -+} -+ -+/* to find an entry in a page-table-directory. */ -+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -+#define __pgd_offset(address) pgd_index(address) -+ -+/* to find an entry in a page-table-directory */ -+#define pgd_offset(mm, address) \ -+((mm)->pgd + ((address) >> PGDIR_SHIFT)) -+ -+/* to find an entry in a kernel page-table-directory */ -+#define pgd_offset_k(address) pgd_offset(&init_mm, address) -+ -+#define __pmd_offset(address) \ -+ (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -+ -+/* Find an entry in the second-level page table.. */ -+static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -+{ -+ return (pmd_t *) dir; -+} -+ -+/* Find an entry in the third-level page table.. */ -+#define pte_offset(pmd, address) \ -+ ((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2)))) -+ -+#define update_mmu_cache(vma,address,pte) do ; while (0) -+ -+/* Encode and de-code a swap entry */ -+#define SWP_TYPE(x) (((x).val >> 3) & 0x7f) -+#define SWP_OFFSET(x) ((x).val >> 10) -+ -+#define SWP_ENTRY(type, offset) \ -+ ((swp_entry_t) { ((type) << 3) | ((offset) << 10) }) -+#define pte_to_swp_entry(pte) \ -+ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) -+#define swp_entry_to_pte(x) ((pte_t) { (x).val }) -+ -+#define PageSkip(x) (0) -+#define kern_addr_valid(addr) (1) -+ -+#include <asm-generic/pgtable.h> -+ -+#endif -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/poll.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/poll.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/poll.h 2005-05-03 22:28:14.973333920 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_POLL_H -+#define __UM_POLL_H -+ -+#include "asm/arch/poll.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/posix_types.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/posix_types.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/posix_types.h 2005-05-03 22:28:14.974333768 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_POSIX_TYPES_H -+#define __UM_POSIX_TYPES_H -+ -+#include "asm/arch/posix_types.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/processor-generic.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/processor-generic.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/processor-generic.h 2005-05-03 23:41:08.094518448 +0300 -@@ -0,0 +1,183 @@ -+/* -+ * Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PROCESSOR_GENERIC_H -+#define __UM_PROCESSOR_GENERIC_H -+ -+struct pt_regs; -+ -+struct task_struct; -+ -+#include "linux/config.h" -+#include "linux/signal.h" -+#include "asm/ptrace.h" -+#include "asm/siginfo.h" -+#include "choose-mode.h" -+ -+struct mm_struct; -+ -+#define current_text_addr() ((void *) 0) -+ -+#define cpu_relax() do ; while (0) -+ -+struct thread_struct { -+ int forking; -+ int nsyscalls; -+ struct pt_regs regs; -+ unsigned long cr2; -+ int err; -+ unsigned long trap_no; -+ int singlestep_syscall; -+ void *fault_addr; -+ void *fault_catcher; -+ struct task_struct *prev_sched; -+ unsigned long temp_stack; -+ void *exec_buf; -+ struct arch_thread arch; -+ union { -+#ifdef CONFIG_MODE_TT -+ struct { -+ int extern_pid; -+ int tracing; -+ /* XXX This is really two filehandles, but they contain -+ * lists, and list.h includes processor.h through -+ * prefetch.h before defining struct list, so this -+ * makes the lists' sizes unknown at this point. -+ * So, this is a void *, and allocated separately. -+ * Check to see if this is fixed in 2.6. -+ */ -+ void *switch_pipe; -+ int singlestep_syscall; -+ int vm_seq; -+ } tt; -+#endif -+#ifdef CONFIG_MODE_SKAS -+ struct { -+ void *switch_buf; -+ void *fork_buf; -+ int mm_count; -+ } skas; -+#endif -+ } mode; -+ struct { -+ int op; -+ union { -+ struct { -+ int pid; -+ } fork, exec; -+ struct { -+ int (*proc)(void *); -+ void *arg; -+ } thread; -+ struct { -+ void (*proc)(void *); -+ void *arg; -+ } cb; -+ } u; -+ } request; -+}; -+ -+#define INIT_THREAD \ -+{ \ -+ .forking = 0, \ -+ .nsyscalls = 0, \ -+ .regs = EMPTY_REGS, \ -+ .cr2 = 0, \ -+ .err = 0, \ -+ .fault_addr = NULL, \ -+ .prev_sched = NULL, \ -+ .temp_stack = 0, \ -+ .exec_buf = NULL, \ -+ .arch = INIT_ARCH_THREAD, \ -+ .request = { 0 } \ -+} -+ -+#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) -+ -+typedef struct { -+ unsigned long seg; -+} mm_segment_t; -+ -+extern struct task_struct *alloc_task_struct(void); -+extern void free_task_struct(struct task_struct *task); -+ -+#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) -+ -+extern void release_thread(struct task_struct *); -+extern int arch_kernel_thread(int (*fn)(void *), void * arg, -+ unsigned long flags); -+extern void dump_thread(struct pt_regs *regs, struct user *u); -+ -+extern unsigned long thread_saved_pc(struct thread_struct *t); -+ -+static inline void mm_copy_segments(struct mm_struct *from_mm, -+ struct mm_struct *new_mm) -+{ -+} -+ -+static inline void copy_segments(struct task_struct *p, -+ struct mm_struct *new_mm) -+{ -+} -+ -+static inline void release_segments(struct mm_struct *mm) -+{ -+} -+ -+#define init_task (init_task_union.task) -+#define init_stack (init_task_union.stack) -+ -+/* -+ * User space process size: 3GB (default). -+ */ -+extern unsigned long task_size; -+ -+#define TASK_SIZE (task_size) -+ -+/* This decides where the kernel will search for a free chunk of vm -+ * space during mmap's. -+ */ -+#define TASK_UNMAPPED_BASE (0x40000000) -+ -+extern void start_thread(struct pt_regs *regs, unsigned long entry, -+ unsigned long stack); -+ -+struct cpuinfo_um { -+ unsigned long loops_per_jiffy; -+ unsigned long *pgd_quick; -+ unsigned long *pmd_quick; -+ unsigned long *pte_quick; -+ unsigned long pgtable_cache_sz; -+ int ipi_pipe[2]; -+}; -+ -+extern struct cpuinfo_um boot_cpu_data; -+ -+#define my_cpu_data cpu_data[smp_processor_id()] -+ -+#ifdef CONFIG_SMP -+extern struct cpuinfo_um cpu_data[]; -+#define current_cpu_data cpu_data[smp_processor_id()] -+#else -+#define cpu_data (&boot_cpu_data) -+#define current_cpu_data boot_cpu_data -+#endif -+ -+#define KSTK_EIP(tsk) (PT_REGS_IP(&tsk->thread.regs)) -+#define KSTK_ESP(tsk) (PT_REGS_SP(&tsk->thread.regs)) -+#define get_wchan(p) (0) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/processor-i386.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/processor-i386.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/processor-i386.h 2005-05-03 23:41:08.095518296 +0300 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PROCESSOR_I386_H -+#define __UM_PROCESSOR_I386_H -+ -+extern int cpu_has_xmm; -+extern int cpu_has_cmov; -+ -+struct arch_thread { -+ unsigned long debugregs[8]; -+ int debugregs_seq; -+}; -+ -+#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ -+ .debugregs_seq = 0 } -+ -+#include "asm/arch/user.h" -+ -+#include "asm/processor-generic.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/processor-ppc.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/processor-ppc.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/processor-ppc.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,15 @@ -+#ifndef __UM_PROCESSOR_PPC_H -+#define __UM_PROCESSOR_PPC_H -+ -+#if defined(__ASSEMBLY__) -+ -+#define CONFIG_ALL_PPC -+#include "arch/processor.h" -+ -+#else -+ -+#include "asm/processor-generic.h" -+ -+#endif -+ -+#endif -Index: linux-2.4.29/include/asm-um/ptrace-generic.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ptrace-generic.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ptrace-generic.h 2005-05-03 23:41:08.084519968 +0300 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PTRACE_GENERIC_H -+#define __UM_PTRACE_GENERIC_H -+ -+#ifndef __ASSEMBLY__ -+ -+#include "linux/config.h" -+ -+#include "asm/current.h" -+ -+#define pt_regs pt_regs_subarch -+#define show_regs show_regs_subarch -+ -+#include "asm/arch/ptrace.h" -+ -+#undef pt_regs -+#undef show_regs -+#undef user_mode -+#undef instruction_pointer -+ -+#include "sysdep/ptrace.h" -+#include "skas_ptrace.h" -+ -+struct pt_regs { -+ union uml_pt_regs regs; -+}; -+ -+#define EMPTY_REGS { regs : EMPTY_UML_PT_REGS } -+ -+#define PT_REGS_IP(r) UPT_IP(&(r)->regs) -+#define PT_REGS_SP(r) UPT_SP(&(r)->regs) -+ -+#define PT_REG(r, reg) UPT_REG(&(r)->regs, reg) -+#define PT_REGS_SET(r, reg, val) UPT_SET(&(r)->regs, reg, val) -+ -+#define PT_REGS_SET_SYSCALL_RETURN(r, res) \ -+ UPT_SET_SYSCALL_RETURN(&(r)->regs, res) -+#define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs) -+ -+#define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs) -+ -+#define PT_REGS_SC(r) UPT_SC(&(r)->regs) -+ -+struct task_struct; -+ -+extern unsigned long getreg(struct task_struct *child, int regno); -+extern int putreg(struct task_struct *child, int regno, unsigned long value); -+extern int get_fpregs(unsigned long buf, struct task_struct *child); -+extern int set_fpregs(unsigned long buf, struct task_struct *child); -+extern int get_fpxregs(unsigned long buf, struct task_struct *child); -+extern int set_fpxregs(unsigned long buf, struct task_struct *tsk); -+ -+extern void show_regs(struct pt_regs *regs); -+ -+#define INIT_TASK_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/ptrace-i386.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ptrace-i386.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ptrace-i386.h 2005-05-03 23:41:08.085519816 +0300 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PTRACE_I386_H -+#define __UM_PTRACE_I386_H -+ -+#include "sysdep/ptrace.h" -+#include "asm/ptrace-generic.h" -+ -+#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs) -+#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs) -+#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs) -+#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs) -+#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs) -+#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs) -+#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs) -+ -+#define PT_REGS_CS(r) UPT_CS(&(r)->regs) -+#define PT_REGS_SS(r) UPT_SS(&(r)->regs) -+#define PT_REGS_DS(r) UPT_DS(&(r)->regs) -+#define PT_REGS_ES(r) UPT_ES(&(r)->regs) -+#define PT_REGS_FS(r) UPT_FS(&(r)->regs) -+#define PT_REGS_GS(r) UPT_GS(&(r)->regs) -+ -+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) -+ -+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r) -+#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r) -+#define PT_FIX_EXEC_STACK(sp) do ; while(0) -+ -+#define user_mode(r) UPT_IS_USER(&(r)->regs) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/resource.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/resource.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/resource.h 2005-05-03 22:28:14.980332856 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_RESOURCE_H -+#define __UM_RESOURCE_H -+ -+#include "asm/arch/resource.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/rwlock.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/rwlock.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/rwlock.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_RWLOCK_H -+#define __UM_RWLOCK_H -+ -+#include "asm/arch/rwlock.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/rwsem.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/rwsem.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/rwsem.h 2005-05-03 23:41:08.109516168 +0300 -@@ -0,0 +1,10 @@ -+#ifndef __UM_RWSEM_H__ -+#define __UM_RWSEM_H__ -+ -+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) -+#define __builtin_expect(exp,c) (exp) -+#endif -+ -+#include "asm/arch/rwsem.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/scatterlist.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/scatterlist.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/scatterlist.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SCATTERLIST_H -+#define __UM_SCATTERLIST_H -+ -+#include "asm/arch/scatterlist.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/segment.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/segment.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/segment.h 2005-05-03 22:28:14.983332400 +0300 -@@ -0,0 +1,4 @@ -+#ifndef __UM_SEGMENT_H -+#define __UM_SEGMENT_H -+ -+#endif -Index: linux-2.4.29/include/asm-um/semaphore.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/semaphore.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/semaphore.h 2005-05-03 23:41:08.110516016 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SEMAPHORE_H -+#define __UM_SEMAPHORE_H -+ -+#include "asm/arch/semaphore.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/sembuf.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/sembuf.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/sembuf.h 2005-05-03 22:28:14.984332248 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SEMBUF_H -+#define __UM_SEMBUF_H -+ -+#include "asm/arch/sembuf.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/serial.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/serial.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/serial.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SERIAL_H -+#define __UM_SERIAL_H -+ -+#include "asm/arch/serial.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/shmbuf.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/shmbuf.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/shmbuf.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SHMBUF_H -+#define __UM_SHMBUF_H -+ -+#include "asm/arch/shmbuf.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/shmparam.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/shmparam.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/shmparam.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SHMPARAM_H -+#define __UM_SHMPARAM_H -+ -+#include "asm/arch/shmparam.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/sigcontext-generic.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/sigcontext-generic.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/sigcontext-generic.h 2005-05-03 22:28:14.987331792 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGCONTEXT_GENERIC_H -+#define __UM_SIGCONTEXT_GENERIC_H -+ -+#include "asm/arch/sigcontext.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/sigcontext-i386.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/sigcontext-i386.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/sigcontext-i386.h 2005-05-03 22:28:14.988331640 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGCONTEXT_I386_H -+#define __UM_SIGCONTEXT_I386_H -+ -+#include "asm/sigcontext-generic.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/sigcontext-ppc.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/sigcontext-ppc.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/sigcontext-ppc.h 2005-05-03 22:28:14.000000000 +0300 -@@ -0,0 +1,10 @@ -+#ifndef __UM_SIGCONTEXT_PPC_H -+#define __UM_SIGCONTEXT_PPC_H -+ -+#define pt_regs sys_pt_regs -+ -+#include "asm/sigcontext-generic.h" -+ -+#undef pt_regs -+ -+#endif -Index: linux-2.4.29/include/asm-um/siginfo.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/siginfo.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/siginfo.h 2005-05-03 23:41:08.092518752 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGINFO_H -+#define __UM_SIGINFO_H -+ -+#include "asm/arch/siginfo.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/signal.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/signal.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/signal.h 2005-05-03 23:41:08.090519056 +0300 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SIGNAL_H -+#define __UM_SIGNAL_H -+ -+#include "asm/arch/signal.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/smp.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/smp.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/smp.h 2005-05-03 23:41:09.801258984 +0300 -@@ -0,0 +1,19 @@ -+#ifndef __UM_SMP_H -+#define __UM_SMP_H -+ -+#ifdef CONFIG_SMP -+ -+#include "linux/config.h" -+#include "asm/current.h" -+ -+#define smp_processor_id() (current->processor) -+#define cpu_logical_map(n) (n) -+#define cpu_number_map(n) (n) -+#define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ -+extern int hard_smp_processor_id(void); -+extern unsigned long cpu_online_map; -+#define NO_PROC_ID -1 -+ -+#endif -+ -+#endif -Index: linux-2.4.29/include/asm-um/smplock.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/smplock.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/smplock.h 2005-05-03 23:41:09.912242112 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SMPLOCK_H -+#define __UM_SMPLOCK_H -+ -+#include "asm/arch/smplock.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/socket.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/socket.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/socket.h 2005-05-03 22:37:45.384618224 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SOCKET_H -+#define __UM_SOCKET_H -+ -+#include "asm/arch/socket.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/sockios.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/sockios.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/sockios.h 2005-05-03 22:28:14.994330728 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SOCKIOS_H -+#define __UM_SOCKIOS_H -+ -+#include "asm/arch/sockios.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/softirq.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/softirq.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/softirq.h 2005-05-03 23:41:09.910242416 +0300 -@@ -0,0 +1,13 @@ -+#ifndef __UM_SOFTIRQ_H -+#define __UM_SOFTIRQ_H -+ -+#include "linux/smp.h" -+#include "asm/system.h" -+#include "asm/processor.h" -+ -+/* A gratuitous name change */ -+#define i386_bh_lock um_bh_lock -+#include "asm/arch/softirq.h" -+#undef i386_bh_lock -+ -+#endif -Index: linux-2.4.29/include/asm-um/spinlock.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/spinlock.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/spinlock.h 2005-05-03 23:41:08.101517384 +0300 -@@ -0,0 +1,10 @@ -+#ifndef __UM_SPINLOCK_H -+#define __UM_SPINLOCK_H -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_SMP -+#include "asm/arch/spinlock.h" -+#endif -+ -+#endif -Index: linux-2.4.29/include/asm-um/statfs.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/statfs.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/statfs.h 2005-05-03 23:41:08.104516928 +0300 -@@ -0,0 +1,6 @@ -+#ifndef _UM_STATFS_H -+#define _UM_STATFS_H -+ -+#include "asm/arch/statfs.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/stat.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/stat.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/stat.h 2005-05-03 22:28:14.997330272 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_STAT_H -+#define __UM_STAT_H -+ -+#include "asm/arch/stat.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/string.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/string.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/string.h 2005-05-03 22:28:14.998330120 +0300 -@@ -0,0 +1,7 @@ -+#ifndef __UM_STRING_H -+#define __UM_STRING_H -+ -+#include "asm/arch/string.h" -+#include "asm/archparam.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/system-generic.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/system-generic.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/system-generic.h 2005-05-03 23:41:08.098517840 +0300 -@@ -0,0 +1,50 @@ -+#ifndef __UM_SYSTEM_GENERIC_H -+#define __UM_SYSTEM_GENERIC_H -+ -+#include "asm/arch/system.h" -+ -+#undef prepare_to_switch -+#undef switch_to -+#undef __save_flags -+#undef save_flags -+#undef __restore_flags -+#undef restore_flags -+#undef __cli -+#undef __sti -+#undef cli -+#undef sti -+#undef local_irq_save -+#undef local_irq_restore -+#undef local_irq_disable -+#undef local_irq_enable -+ -+#define prepare_to_switch() do ; while(0) -+ -+void *_switch_to(void *prev, void *next); -+ -+#define switch_to(prev, next, last) prev = _switch_to(prev, next) -+ -+extern int get_signals(void); -+extern int set_signals(int enable); -+extern void block_signals(void); -+extern void unblock_signals(void); -+ -+#define local_irq_save(flags) do { (flags) = set_signals(0); } while(0) -+ -+#define local_irq_restore(flags) do { set_signals(flags); } while(0) -+ -+#define local_irq_enable() unblock_signals() -+#define local_irq_disable() block_signals() -+ -+#define __sti() unblock_signals() -+#define sti() unblock_signals() -+#define __cli() block_signals() -+#define cli() block_signals() -+ -+#define __save_flags(x) do { (x) = get_signals(); } while(0) -+#define save_flags(x) __save_flags(x) -+ -+#define __restore_flags(x) local_irq_restore(x) -+#define restore_flags(x) __restore_flags(x) -+ -+#endif -Index: linux-2.4.29/include/asm-um/system-i386.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/system-i386.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/system-i386.h 2005-05-03 23:41:08.098517840 +0300 -@@ -0,0 +1,8 @@ -+#ifndef __UM_SYSTEM_I386_H -+#define __UM_SYSTEM_I386_H -+ -+#include "asm/system-generic.h" -+ -+#define __HAVE_ARCH_CMPXCHG 1 -+ -+#endif -Index: linux-2.4.29/include/asm-um/system-ppc.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/system-ppc.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/system-ppc.h 2005-05-03 22:28:15.000000000 +0300 -@@ -0,0 +1,12 @@ -+#ifndef __UM_SYSTEM_PPC_H -+#define __UM_SYSTEM_PPC_H -+ -+#define _switch_to _ppc_switch_to -+ -+#include "asm/arch/system.h" -+ -+#undef _switch_to -+ -+#include "asm/system-generic.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/termbits.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/termbits.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/termbits.h 2005-05-03 22:37:45.506599680 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TERMBITS_H -+#define __UM_TERMBITS_H -+ -+#include "asm/arch/termbits.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/termios.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/termios.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/termios.h 2005-05-03 22:37:45.512598768 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TERMIOS_H -+#define __UM_TERMIOS_H -+ -+#include "asm/arch/termios.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/timex.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/timex.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/timex.h 2005-05-03 23:41:09.799259288 +0300 -@@ -0,0 +1,18 @@ -+#ifndef __UM_TIMEX_H -+#define __UM_TIMEX_H -+ -+#include "linux/time.h" -+ -+typedef unsigned long cycles_t; -+ -+#define cacheflush_time (0) -+ -+static inline cycles_t get_cycles (void) -+{ -+ return 0; -+} -+ -+#define vxtime_lock() do ; while (0) -+#define vxtime_unlock() do ; while (0) -+ -+#endif -Index: linux-2.4.29/include/asm-um/tlb.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/tlb.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/tlb.h 2005-05-03 22:28:15.000000000 +0300 -@@ -0,0 +1 @@ -+#include <asm-generic/tlb.h> -Index: linux-2.4.29/include/asm-um/types.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/types.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/types.h 2005-05-03 22:28:15.006328904 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TYPES_H -+#define __UM_TYPES_H -+ -+#include "asm/arch/types.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/uaccess.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/uaccess.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/uaccess.h 2005-05-03 23:41:09.913241960 +0300 -@@ -0,0 +1,99 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_UACCESS_H -+#define __UM_UACCESS_H -+ -+#include "linux/sched.h" -+ -+#define VERIFY_READ 0 -+#define VERIFY_WRITE 1 -+ -+/* -+ * The fs value determines whether argument validity checking should be -+ * performed or not. If get_fs() == USER_DS, checking is performed, with -+ * get_fs() == KERNEL_DS, checking is bypassed. -+ * -+ * For historical reasons, these macros are grossly misnamed. -+ */ -+ -+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) -+ -+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -+#define USER_DS MAKE_MM_SEG(TASK_SIZE) -+ -+#define get_ds() (KERNEL_DS) -+#define get_fs() (current->addr_limit) -+#define set_fs(x) (current->addr_limit = (x)) -+ -+#define segment_eq(a, b) ((a).seg == (b).seg) -+ -+#include "um_uaccess.h" -+ -+#define __copy_from_user(to, from, n) copy_from_user(to, from, n) -+ -+#define __copy_to_user(to, from, n) copy_to_user(to, from, n) -+ -+#define __get_user(x, ptr) \ -+({ \ -+ const __typeof__(ptr) __private_ptr = ptr; \ -+ __typeof__(*(__private_ptr)) __private_val; \ -+ int __private_ret = -EFAULT; \ -+ (x) = 0; \ -+ if (__copy_from_user(&__private_val, (__private_ptr), \ -+ sizeof(*(__private_ptr))) == 0) {\ -+ (x) = (__typeof__(*(__private_ptr))) __private_val; \ -+ __private_ret = 0; \ -+ } \ -+ __private_ret; \ -+}) -+ -+#define get_user(x, ptr) \ -+({ \ -+ const __typeof__((*ptr)) *private_ptr = (ptr); \ -+ (access_ok(VERIFY_READ, private_ptr, sizeof(*private_ptr)) ? \ -+ __get_user(x, private_ptr) : ((x) = 0, -EFAULT)); \ -+}) -+ -+#define __put_user(x, ptr) \ -+({ \ -+ __typeof__(ptr) __private_ptr = ptr; \ -+ __typeof__(*(__private_ptr)) __private_val; \ -+ int __private_ret = -EFAULT; \ -+ __private_val = (__typeof__(*(__private_ptr))) (x); \ -+ if (__copy_to_user((__private_ptr), &__private_val, \ -+ sizeof(*(__private_ptr))) == 0) { \ -+ __private_ret = 0; \ -+ } \ -+ __private_ret; \ -+}) -+ -+#define put_user(x, ptr) \ -+({ \ -+ __typeof__(*(ptr)) *private_ptr = (ptr); \ -+ (access_ok(VERIFY_WRITE, private_ptr, sizeof(*private_ptr)) ? \ -+ __put_user(x, private_ptr) : -EFAULT); \ -+}) -+ -+#define strlen_user(str) strnlen_user(str, ~0UL >> 1) -+ -+struct exception_table_entry -+{ -+ unsigned long insn; -+ unsigned long fixup; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/ucontext.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/ucontext.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/ucontext.h 2005-05-03 22:28:15.008328600 +0300 -@@ -0,0 +1,6 @@ -+#ifndef _ASM_UM_UCONTEXT_H -+#define _ASM_UM_UCONTEXT_H -+ -+#include "asm/arch/ucontext.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/unaligned.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/unaligned.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/unaligned.h 2005-05-03 22:28:15.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_UNALIGNED_H -+#define __UM_UNALIGNED_H -+ -+#include "asm/arch/unaligned.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/unistd.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/unistd.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/unistd.h 2005-05-03 23:41:11.206045424 +0300 -@@ -0,0 +1,121 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef _UM_UNISTD_H_ -+#define _UM_UNISTD_H_ -+ -+#include "linux/resource.h" -+#include "asm/uaccess.h" -+ -+extern long sys_open(const char *filename, int flags, int mode); -+extern long sys_dup(unsigned int fildes); -+extern long sys_close(unsigned int fd); -+extern int um_execve(const char *file, char *const argv[], char *const env[]); -+extern long sys_setsid(void); -+extern long sys_waitpid(pid_t pid, unsigned int * stat_addr, int options); -+extern long sys_wait4(pid_t pid,unsigned int *stat_addr, int options, -+ struct rusage *ru); -+extern long sys_mount(char *dev_name, char *dir_name, char *type, -+ unsigned long flags, void *data); -+extern long sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, -+ struct timeval *tvp); -+extern long sys_lseek(unsigned int fildes, unsigned long offset, int whence); -+extern long sys_read(unsigned int fildes, char *buf, int len); -+extern long sys_write(unsigned int fildes, char *buf, int len); -+ -+#ifdef __KERNEL_SYSCALLS__ -+ -+#define KERNEL_CALL(ret_t, sys, args...) \ -+ mm_segment_t fs = get_fs(); \ -+ ret_t ret; \ -+ set_fs(KERNEL_DS); \ -+ ret = sys(args); \ -+ set_fs(fs); \ -+ if (ret >= 0) \ -+ return ret; \ -+ errno = -(long)ret; \ -+ return -1; -+ -+static inline long open(const char *pathname, int flags, int mode) -+{ -+ KERNEL_CALL(int, sys_open, pathname, flags, mode) -+} -+ -+static inline long dup(unsigned int fd) -+{ -+ KERNEL_CALL(int, sys_dup, fd); -+} -+ -+static inline long close(unsigned int fd) -+{ -+ KERNEL_CALL(int, sys_close, fd); -+} -+ -+static inline int execve(const char *filename, char *const argv[], -+ char *const envp[]) -+{ -+ KERNEL_CALL(int, um_execve, filename, argv, envp); -+} -+ -+static inline long waitpid(pid_t pid, unsigned int *status, int options) -+{ -+ KERNEL_CALL(pid_t, sys_wait4, pid, status, options, NULL) -+} -+ -+static inline pid_t wait(int *status) -+{ -+ KERNEL_CALL(pid_t, sys_wait4, -1, status, 0, NULL) -+} -+ -+static inline pid_t setsid(void) -+{ -+ KERNEL_CALL(pid_t, sys_setsid) -+} -+ -+static inline long lseek(unsigned int fd, off_t offset, unsigned int whence) -+{ -+ KERNEL_CALL(long, sys_lseek, fd, offset, whence) -+} -+ -+static inline int read(unsigned int fd, char * buf, int len) -+{ -+ KERNEL_CALL(int, sys_read, fd, buf, len) -+} -+ -+static inline int write(unsigned int fd, char * buf, int len) -+{ -+ KERNEL_CALL(int, sys_write, fd, buf, len) -+} -+ -+#endif -+ -+/* Save the value of __KERNEL_SYSCALLS__, undefine it, include the underlying -+ * arch's unistd.h for the system call numbers, and restore the old -+ * __KERNEL_SYSCALLS__. -+ */ -+ -+#ifdef __KERNEL_SYSCALLS__ -+#define __SAVE_KERNEL_SYSCALLS__ __KERNEL_SYSCALLS__ -+#endif -+ -+#undef __KERNEL_SYSCALLS__ -+#include "asm/arch/unistd.h" -+ -+#ifdef __KERNEL_SYSCALLS__ -+#define __KERNEL_SYSCALLS__ __SAVE_KERNEL_SYSCALLS__ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/asm-um/user.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/user.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/user.h 2005-05-03 23:43:26.305507184 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_USER_H -+#define __UM_USER_H -+ -+#include "asm/arch/user.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/vga.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/vga.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/vga.h 2005-05-03 22:28:15.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_VGA_H -+#define __UM_VGA_H -+ -+#include "asm/arch/vga.h" -+ -+#endif -Index: linux-2.4.29/include/asm-um/xor.h -=================================================================== ---- linux-2.4.29.orig/include/asm-um/xor.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/asm-um/xor.h 2005-05-03 22:28:15.000000000 +0300 -@@ -0,0 +1,6 @@ -+#ifndef __UM_XOR_H -+#define __UM_XOR_H -+ -+#include "asm-generic/xor.h" -+ -+#endif -Index: linux-2.4.29/include/linux/blk.h -=================================================================== ---- linux-2.4.29.orig/include/linux/blk.h 2005-05-03 21:09:03.000000000 +0300 -+++ linux-2.4.29/include/linux/blk.h 2005-05-03 23:41:26.547713136 +0300 -@@ -320,6 +320,24 @@ - #define DEVICE_REQUEST do_ida_request - #define DEVICE_NR(device) (MINOR(device) >> 4) - -+#elif (MAJOR_NR == UBD_MAJOR) -+ -+#define DEVICE_NAME "User-mode block device" -+#define DEVICE_INTR do_ubd -+#define DEVICE_REQUEST do_ubd_request -+#define DEVICE_NR(device) (MINOR(device) >> UBD_SHIFT) -+#define DEVICE_ON(device) -+#define DEVICE_OFF(device) -+ -+#elif (MAJOR_NR == COW_MAJOR) -+ -+#define DEVICE_NAME "COW device" -+#define DEVICE_INTR do_cow -+#define DEVICE_REQUEST do_cow_request -+#define DEVICE_NR(device) (MINOR(device) >> COW_SHIFT) -+#define DEVICE_ON(device) -+#define DEVICE_OFF(device) -+ - #endif /* MAJOR_NR == whatever */ - - /* provide DEVICE_xxx defaults, if not explicitly defined -Index: linux-2.4.29/include/linux/fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/fs.h 2005-05-03 21:06:01.000000000 +0300 -+++ linux-2.4.29/include/linux/fs.h 2005-05-03 23:56:00.359873496 +0300 -@@ -322,6 +322,8 @@ - #include <linux/ncp_fs_i.h> - #include <linux/proc_fs_i.h> - #include <linux/usbdev_fs_i.h> -+#include <linux/hostfs_fs_i.h> -+#include <linux/hppfs_fs_i.h> - #include <linux/jffs2_fs_i.h> - #include <linux/cramfs_fs_sb.h> - -@@ -518,7 +520,9 @@ - struct proc_inode_info proc_i; - struct socket socket_i; - struct usbdev_inode_info usbdev_i; -- struct jffs2_inode_info jffs2_i; -+ struct hostfs_inode_info hostfs_i; -+ struct hppfs_inode_info hppfs_i; -+ struct jffs2_inode_info jffs2_i; - void *generic_ip; - } u; - }; -@@ -866,6 +870,8 @@ - unsigned int (*poll) (struct file *, struct poll_table_struct *); - int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); - int (*mmap) (struct file *, struct vm_area_struct *); -+ void (*munmap) (struct file *, struct vm_area_struct *, -+ unsigned long start, unsigned long len); - int (*open) (struct inode *, struct file *); - int (*flush) (struct file *); - int (*release) (struct inode *, struct file *); -Index: linux-2.4.29/include/linux/ghash.h -=================================================================== ---- linux-2.4.29.orig/include/linux/ghash.h 2005-05-03 21:09:50.000000000 +0300 -+++ linux-2.4.29/include/linux/ghash.h 2005-05-03 22:28:15.000000000 +0300 -@@ -153,6 +153,26 @@ - return NULL;\ - } - -+/* LINKAGE - empty or "static", depending on whether you want the definitions to -+ * be public or not -+ * NAME - a string to stick in names to make this hash table type distinct from -+ * any others -+ * HASHSIZE - number of buckets -+ * TYPE - type of data contained in the buckets - must be a structure, one -+ * field is of type NAME_ptrs, another is the hash key -+ * PTRS - TYPE must contain a field of type NAME_ptrs, PTRS is the name of that -+ * field -+ * KEYTYPE - type of the key field within TYPE -+ * KEY - name of the key field within TYPE -+ * KEYCMP - pointer to function that compares KEYTYPEs to each other - the -+ * prototype is int KEYCMP(KEYTYPE, KEYTYPE), it returns zero for equal, -+ * non-zero for not equal -+ * HASHFN - the hash function - the prototype is int HASHFN(KEYTYPE), -+ * it returns a number in the range 0 ... HASHSIZE - 1 -+ * Call DEF_HASH_STRUCTS, define your hash table as a NAME_table, then call -+ * DEF_HASH. -+ */ -+ - #define DEF_HASH_STRUCTS(NAME,HASHSIZE,TYPE) \ - \ - struct NAME##_table {\ -@@ -165,7 +185,7 @@ - TYPE * prev_hash;\ - }; - --#define DEF_HASH(LINKAGE,NAME,HASHSIZE,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,KEYEQ,HASHFN)\ -+#define DEF_HASH(LINKAGE,NAME,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,HASHFN)\ - \ - LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ - {\ -@@ -206,12 +226,10 @@ - \ - LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\ - {\ -- int ix = hashfn(pos);\ -+ int ix = HASHFN(pos);\ - TYPE * ptr = tbl->hashtable[ix];\ - while(ptr && KEYCMP(ptr->KEY, pos))\ - ptr = ptr->PTRS.next_hash;\ -- if(ptr && !KEYEQ(ptr->KEY, pos))\ -- ptr = NULL;\ - return ptr;\ - } - -Index: linux-2.4.29/include/linux/hostfs_fs_i.h -=================================================================== ---- linux-2.4.29.orig/include/linux/hostfs_fs_i.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/linux/hostfs_fs_i.h 2005-05-03 22:28:15.053321760 +0300 -@@ -0,0 +1,25 @@ -+#ifndef _HOSTFS_FS_I -+#define _HOSTFS_FS_I -+ -+#include "filehandle.h" -+ -+struct externfs_file_ops; -+ -+struct hostfs_inode_info { -+ struct externfs_file_ops *ops; -+ struct file_handle *fh; -+ int mode; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/linux/hppfs_fs_i.h -=================================================================== ---- linux-2.4.29.orig/include/linux/hppfs_fs_i.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/linux/hppfs_fs_i.h 2005-05-03 22:28:15.054321608 +0300 -@@ -0,0 +1,19 @@ -+#ifndef _HPPFS_FS_I -+#define _HPPFS_FS_I -+ -+struct hppfs_inode_info { -+ struct dentry *proc_dentry; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/include/linux/kernel.h -=================================================================== ---- linux-2.4.29.orig/include/linux/kernel.h 2005-05-03 21:04:57.000000000 +0300 -+++ linux-2.4.29/include/linux/kernel.h 2005-05-03 23:41:08.088519360 +0300 -@@ -49,7 +49,7 @@ - # define ATTRIB_NORET __attribute__((noreturn)) - # define NORET_AND noreturn, - --#ifdef __i386__ -+#if defined(__i386__) || defined(UM_FASTCALL) - #define FASTCALL(x) x __attribute__((regparm(3))) - #define fastcall __attribute__((regparm(3))) - #else -Index: linux-2.4.29/include/linux/kernel_stat.h -=================================================================== ---- linux-2.4.29.orig/include/linux/kernel_stat.h 2005-05-03 21:09:36.000000000 +0300 -+++ linux-2.4.29/include/linux/kernel_stat.h 2005-05-03 23:41:09.913241960 +0300 -@@ -12,7 +12,7 @@ - * used by rstatd/perfmeter - */ - --#define DK_MAX_MAJOR 16 -+#define DK_MAX_MAJOR 99 - #define DK_MAX_DISK 16 - - struct kernel_stat { -Index: linux-2.4.29/include/linux/mm.h -=================================================================== ---- linux-2.4.29.orig/include/linux/mm.h 2005-05-03 21:05:15.000000000 +0300 -+++ linux-2.4.29/include/linux/mm.h 2005-05-03 23:55:59.724970016 +0300 -@@ -441,6 +441,18 @@ - extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); - extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); - -+#ifndef HAVE_ARCH_VALIDATE -+static inline struct page *arch_validate(struct page *page, -+ unsigned int gfp_mask, int order) -+{ -+ return(page); -+} -+#endif -+ -+#ifndef HAVE_ARCH_FREE_PAGE -+static inline void arch_free_page(struct page *page, int order) { } -+#endif -+ - static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) - { - /* -@@ -448,7 +460,7 @@ - */ - if (order >= MAX_ORDER) - return NULL; -- return _alloc_pages(gfp_mask, order); -+ return arch_validate(_alloc_pages(gfp_mask, order), gfp_mask, order); - } - - #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -@@ -508,6 +520,9 @@ - int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); - -+extern long do_mprotect(struct mm_struct *mm, unsigned long start, -+ size_t len, unsigned long prot); -+ - /* - * On a two-level page table, this ends up being trivial. Thus the - * inlining and the symmetry break with pte_alloc() that does all -@@ -555,9 +570,10 @@ - - extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - --extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, -- unsigned long len, unsigned long prot, -- unsigned long flag, unsigned long pgoff); -+extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file, -+ unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flag, -+ unsigned long pgoff); - - static inline unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, -@@ -567,7 +583,8 @@ - if ((offset + PAGE_ALIGN(len)) < offset) - goto out; - if (!(offset & ~PAGE_MASK)) -- ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -+ ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag, -+ offset >> PAGE_SHIFT); - out: - return ret; - } -Index: linux-2.4.29/include/linux/proc_mm.h -=================================================================== ---- linux-2.4.29.orig/include/linux/proc_mm.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/include/linux/proc_mm.h 2005-05-03 23:46:00.225107848 +0300 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PROC_MM_H -+#define __PROC_MM_H -+ -+#include "linux/sched.h" -+ -+#define MM_MMAP 54 -+#define MM_MUNMAP 55 -+#define MM_MPROTECT 56 -+#define MM_COPY_SEGMENTS 57 -+ -+struct mm_mmap { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+struct mm_munmap { -+ unsigned long addr; -+ unsigned long len; -+}; -+ -+struct mm_mprotect { -+ unsigned long addr; -+ unsigned long len; -+ unsigned int prot; -+}; -+ -+struct proc_mm_op { -+ int op; -+ union { -+ struct mm_mmap mmap; -+ struct mm_munmap munmap; -+ struct mm_mprotect mprotect; -+ int copy_segments; -+ } u; -+}; -+ -+extern struct mm_struct *proc_mm_get_mm(int fd); -+ -+#endif -Index: linux-2.4.29/include/linux/shmem_fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/shmem_fs.h 2005-05-03 21:09:04.000000000 +0300 -+++ linux-2.4.29/include/linux/shmem_fs.h 2005-05-03 22:28:15.082317352 +0300 -@@ -22,6 +22,8 @@ - unsigned long next_index; - swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */ - void **i_indirect; /* indirect blocks */ -+ unsigned long map_direct[SHMEM_NR_DIRECT]; -+ void **map_indirect; - unsigned long swapped; /* data pages assigned to swap */ - unsigned long flags; - struct list_head list; -Index: linux-2.4.29/include/linux/tty.h -=================================================================== ---- linux-2.4.29.orig/include/linux/tty.h 2005-05-03 21:07:23.000000000 +0300 -+++ linux-2.4.29/include/linux/tty.h 2005-05-03 23:41:09.901243784 +0300 -@@ -310,6 +310,9 @@ - spinlock_t read_lock; - /* If the tty has a pending do_SAK, queue it here - akpm */ - struct tq_struct SAK_tq; -+#ifdef CONFIG_TTY_LOG -+ int log_fd; -+#endif - }; - - /* tty magic number */ -@@ -368,6 +371,7 @@ - extern int specialix_init(void); - extern int espserial_init(void); - extern int macserial_init(void); -+extern int stdio_init(void); - extern int a2232board_init(void); - - extern int tty_paranoia_check(struct tty_struct *tty, kdev_t device, -@@ -434,5 +438,7 @@ - extern int vt_ioctl(struct tty_struct *tty, struct file * file, - unsigned int cmd, unsigned long arg); - -+extern void stdio_console_init(void); -+ - #endif /* __KERNEL__ */ - #endif -Index: linux-2.4.29/init/do_mounts.c -=================================================================== ---- linux-2.4.29.orig/init/do_mounts.c 2005-05-03 21:09:10.000000000 +0300 -+++ linux-2.4.29/init/do_mounts.c 2005-05-03 22:28:15.000000000 +0300 -@@ -154,6 +154,22 @@ - { "pf", 0x2f00 }, - { "apblock", APBLOCK_MAJOR << 8}, - { "ddv", DDV_MAJOR << 8}, -+ { "ubd0", UBD_MAJOR << 8 | 0 << 4}, -+ { "ubda", UBD_MAJOR << 8 | 0 << 4}, -+ { "ubd1", UBD_MAJOR << 8 | 1 << 4}, -+ { "ubdb", UBD_MAJOR << 8 | 1 << 4}, -+ { "ubd2", UBD_MAJOR << 8 | 2 << 4}, -+ { "ubdc", UBD_MAJOR << 8 | 2 << 4}, -+ { "ubd3", UBD_MAJOR << 8 | 3 << 4}, -+ { "ubdd", UBD_MAJOR << 8 | 3 << 4}, -+ { "ubd4", UBD_MAJOR << 8 | 4 << 4}, -+ { "ubde", UBD_MAJOR << 8 | 4 << 4}, -+ { "ubd5", UBD_MAJOR << 8 | 5 << 4}, -+ { "ubdf", UBD_MAJOR << 8 | 5 << 4}, -+ { "ubd6", UBD_MAJOR << 8 | 6 << 4}, -+ { "ubdg", UBD_MAJOR << 8 | 6 << 4}, -+ { "ubd7", UBD_MAJOR << 8 | 7 << 4}, -+ { "ubdh", UBD_MAJOR << 8 | 7 << 4}, - { "jsfd", JSFD_MAJOR << 8}, - #if defined(CONFIG_ARCH_S390) - { "dasda", (DASD_MAJOR << MINORBITS) }, -Index: linux-2.4.29/kernel/panic.c -=================================================================== ---- linux-2.4.29.orig/kernel/panic.c 2005-05-03 21:09:35.000000000 +0300 -+++ linux-2.4.29/kernel/panic.c 2005-05-03 22:28:15.000000000 +0300 -@@ -74,7 +74,7 @@ - smp_send_stop(); - #endif - -- notifier_call_chain(&panic_notifier_list, 0, NULL); -+ notifier_call_chain(&panic_notifier_list, 0, buf); - - if (panic_timeout > 0) - { -Index: linux-2.4.29/MAINTAINERS -=================================================================== ---- linux-2.4.29.orig/MAINTAINERS 2005-05-03 21:07:57.000000000 +0300 -+++ linux-2.4.29/MAINTAINERS 2005-05-03 22:28:15.000000000 +0300 -@@ -2120,6 +2120,14 @@ - L: linux-usb-devel@lists.sourceforge.net - W: http://usb.in.tum.de - S: Maintained -+ -+USER-MODE PORT -+P: Jeff Dike -+M: jdike@karaya.com -+L: user-mode-linux-devel@lists.sourceforge.net -+L: user-mode-linux-user@lists.sourceforge.net -+W: http://user-mode-linux.sourceforge.net -+S: Maintained - - USB "USBNET" DRIVER - P: David Brownell -Index: linux-2.4.29/Makefile -=================================================================== ---- linux-2.4.29.orig/Makefile 2005-05-03 21:08:14.000000000 +0300 -+++ linux-2.4.29/Makefile 2005-05-03 22:28:15.000000000 +0300 -@@ -5,7 +5,15 @@ - - KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) - --ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -+# SUBARCH tells the usermode build what the underlying arch is. That is set -+# first, and if a usermode build is happening, the "ARCH=um" on the command -+# line overrides the setting of ARCH below. If a native build is happening, -+# then ARCH is assigned, getting whatever value it gets normally, and -+# SUBARCH is subsequently ignored. -+ -+SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -+ARCH := $(SUBARCH) -+ - KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g") - - CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ -Index: linux-2.4.29/mm/Makefile -=================================================================== ---- linux-2.4.29.orig/mm/Makefile 2005-05-03 21:08:06.000000000 +0300 -+++ linux-2.4.29/mm/Makefile 2005-05-03 22:28:15.000000000 +0300 -@@ -17,5 +17,6 @@ - shmem.o - - obj-$(CONFIG_HIGHMEM) += highmem.o -+obj-$(CONFIG_PROC_MM) += proc_mm.o - - include $(TOPDIR)/Rules.make -Index: linux-2.4.29/mm/mmap.c -=================================================================== ---- linux-2.4.29.orig/mm/mmap.c 2005-05-03 21:07:59.000000000 +0300 -+++ linux-2.4.29/mm/mmap.c 2005-05-03 22:28:15.000000000 +0300 -@@ -391,10 +391,11 @@ - return 0; - } - --unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, -- unsigned long prot, unsigned long flags, unsigned long pgoff) -+unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file, -+ unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long pgoff) - { -- struct mm_struct * mm = current->mm; - struct vm_area_struct * vma, * prev; - unsigned int vm_flags; - int correct_wcount = 0; -@@ -1000,6 +1001,11 @@ - remove_shared_vm_struct(mpnt); - mm->map_count--; - -+ if((mpnt->vm_file != NULL) && (mpnt->vm_file->f_op != NULL) && -+ (mpnt->vm_file->f_op->munmap != NULL)) -+ mpnt->vm_file->f_op->munmap(mpnt->vm_file, mpnt, st, -+ size); -+ - zap_page_range(mm, st, size); - - /* -Index: linux-2.4.29/mm/mprotect.c -=================================================================== ---- linux-2.4.29.orig/mm/mprotect.c 2005-05-03 21:06:44.000000000 +0300 -+++ linux-2.4.29/mm/mprotect.c 2005-05-03 22:28:15.000000000 +0300 -@@ -264,7 +264,8 @@ - return 0; - } - --asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -+long do_mprotect(struct mm_struct *mm, unsigned long start, size_t len, -+ unsigned long prot) - { - unsigned long nstart, end, tmp; - struct vm_area_struct * vma, * next, * prev; -@@ -281,9 +282,9 @@ - if (end == start) - return 0; - -- down_write(¤t->mm->mmap_sem); -+ down_write(&mm->mmap_sem); - -- vma = find_vma_prev(current->mm, start, &prev); -+ vma = find_vma_prev(mm, start, &prev); - error = -ENOMEM; - if (!vma || vma->vm_start > start) - goto out; -@@ -332,6 +333,11 @@ - prev->vm_mm->map_count--; - } - out: -- up_write(¤t->mm->mmap_sem); -+ up_write(&mm->mmap_sem); - return error; - } -+ -+asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -+{ -+ return(do_mprotect(current->mm, start, len, prot)); -+} -Index: linux-2.4.29/mm/page_alloc.c -=================================================================== ---- linux-2.4.29.orig/mm/page_alloc.c 2005-05-03 21:05:16.000000000 +0300 -+++ linux-2.4.29/mm/page_alloc.c 2005-05-03 22:28:15.000000000 +0300 -@@ -116,6 +116,7 @@ - struct page *base; - zone_t *zone; - -+ arch_free_page(page, order); - /* - * Yes, think what happens when other parts of the kernel take - * a reference to a page in order to pin it for io. -ben -Index: linux-2.4.29/mm/proc_mm.c -=================================================================== ---- linux-2.4.29.orig/mm/proc_mm.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.4.29/mm/proc_mm.c 2005-05-03 22:28:15.000000000 +0300 -@@ -0,0 +1,173 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/proc_fs.h" -+#include "linux/proc_mm.h" -+#include "linux/file.h" -+#include "asm/uaccess.h" -+#include "asm/mmu_context.h" -+ -+static struct file_operations proc_mm_fops; -+ -+struct mm_struct *proc_mm_get_mm(int fd) -+{ -+ struct mm_struct *ret = ERR_PTR(-EBADF); -+ struct file *file; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ ret = ERR_PTR(-EINVAL); -+ if(file->f_op != &proc_mm_fops) -+ goto out_fput; -+ -+ ret = file->private_data; -+ out_fput: -+ fput(file); -+ out: -+ return(ret); -+} -+ -+extern long do_mmap2(struct mm_struct *mm, unsigned long addr, -+ unsigned long len, unsigned long prot, -+ unsigned long flags, unsigned long fd, -+ unsigned long pgoff); -+ -+static ssize_t write_proc_mm(struct file *file, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct mm_struct *mm = file->private_data; -+ struct proc_mm_op req; -+ int n, ret; -+ -+ if(count > sizeof(req)) -+ return(-EINVAL); -+ -+ n = copy_from_user(&req, buffer, count); -+ if(n != 0) -+ return(-EFAULT); -+ -+ ret = count; -+ switch(req.op){ -+ case MM_MMAP: { -+ struct mm_mmap *map = &req.u.mmap; -+ -+ ret = do_mmap2(mm, map->addr, map->len, map->prot, -+ map->flags, map->fd, map->offset >> PAGE_SHIFT); -+ if((ret & ~PAGE_MASK) == 0) -+ ret = count; -+ -+ break; -+ } -+ case MM_MUNMAP: { -+ struct mm_munmap *unmap = &req.u.munmap; -+ -+ down_write(&mm->mmap_sem); -+ ret = do_munmap(mm, unmap->addr, unmap->len); -+ up_write(&mm->mmap_sem); -+ -+ if(ret == 0) -+ ret = count; -+ break; -+ } -+ case MM_MPROTECT: { -+ struct mm_mprotect *protect = &req.u.mprotect; -+ -+ ret = do_mprotect(mm, protect->addr, protect->len, -+ protect->prot); -+ if(ret == 0) -+ ret = count; -+ break; -+ } -+ -+ case MM_COPY_SEGMENTS: { -+ struct mm_struct *from = proc_mm_get_mm(req.u.copy_segments); -+ -+ if(IS_ERR(from)){ -+ ret = PTR_ERR(from); -+ break; -+ } -+ -+ mm_copy_segments(from, mm); -+ break; -+ } -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ return(ret); -+} -+ -+static int open_proc_mm(struct inode *inode, struct file *file) -+{ -+ struct mm_struct *mm = mm_alloc(); -+ int ret; -+ -+ ret = -ENOMEM; -+ if(mm == NULL) -+ goto out_mem; -+ -+ ret = init_new_context(current, mm); -+ if(ret) -+ goto out_free; -+ -+ spin_lock(&mmlist_lock); -+ list_add(&mm->mmlist, ¤t->mm->mmlist); -+ mmlist_nr++; -+ spin_unlock(&mmlist_lock); -+ -+ file->private_data = mm; -+ -+ return(0); -+ -+ out_free: -+ mmput(mm); -+ out_mem: -+ return(ret); -+} -+ -+static int release_proc_mm(struct inode *inode, struct file *file) -+{ -+ struct mm_struct *mm = file->private_data; -+ -+ mmput(mm); -+ return(0); -+} -+ -+static struct file_operations proc_mm_fops = { -+ .open = open_proc_mm, -+ .release = release_proc_mm, -+ .write = write_proc_mm, -+}; -+ -+static int make_proc_mm(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ ent = create_proc_entry("mm", 0222, &proc_root); -+ if(ent == NULL){ -+ printk("make_proc_mm : Failed to register /proc/mm\n"); -+ return(0); -+ } -+ ent->proc_fops = &proc_mm_fops; -+ -+ return(0); -+} -+ -+__initcall(make_proc_mm); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -Index: linux-2.4.29/mm/shmem.c -=================================================================== ---- linux-2.4.29.orig/mm/shmem.c 2005-05-03 21:06:51.000000000 +0300 -+++ linux-2.4.29/mm/shmem.c 2005-05-03 22:28:15.000000000 +0300 -@@ -128,16 +128,17 @@ - * +-> 48-51 - * +-> 52-55 - */ --static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page) -+static void *shmem_block(unsigned long index, unsigned long *page, -+ unsigned long *direct, void ***indirect) - { - unsigned long offset; - void **dir; - - if (index < SHMEM_NR_DIRECT) -- return info->i_direct+index; -- if (!info->i_indirect) { -+ return direct+index; -+ if (!*indirect) { - if (page) { -- info->i_indirect = (void **) *page; -+ *indirect = (void **) *page; - *page = 0; - } - return NULL; /* need another page */ -@@ -146,7 +147,7 @@ - index -= SHMEM_NR_DIRECT; - offset = index % ENTRIES_PER_PAGE; - index /= ENTRIES_PER_PAGE; -- dir = info->i_indirect; -+ dir = *indirect; - - if (index >= ENTRIES_PER_PAGE/2) { - index -= ENTRIES_PER_PAGE/2; -@@ -169,7 +170,21 @@ - *dir = (void *) *page; - *page = 0; - } -- return (swp_entry_t *) *dir + offset; -+ return (unsigned long **) *dir + offset; -+} -+ -+static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page) -+{ -+ return((swp_entry_t *) shmem_block(index, page, -+ (unsigned long *) info->i_direct, -+ &info->i_indirect)); -+} -+ -+static unsigned long *shmem_map_count(struct shmem_inode_info *info, -+ unsigned long index, unsigned long *page) -+{ -+ return((unsigned long *) shmem_block(index, page, info->map_direct, -+ &info->map_indirect)); - } - - /* -@@ -847,6 +862,7 @@ - ops = &shmem_vm_ops; - if (!S_ISREG(inode->i_mode)) - return -EACCES; -+ - UPDATE_ATIME(inode); - vma->vm_ops = ops; - return 0; -@@ -1750,4 +1766,125 @@ - return 0; - } - -+static int adjust_map_counts(struct shmem_inode_info *info, -+ unsigned long offset, unsigned long len, -+ int adjust) -+{ -+ unsigned long idx, i, *count, page = 0; -+ -+ spin_lock(&info->lock); -+ offset >>= PAGE_SHIFT; -+ len >>= PAGE_SHIFT; -+ for(i = 0; i < len; i++){ -+ idx = (i + offset) >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); -+ -+ while((count = shmem_map_count(info, idx, &page)) == NULL){ -+ spin_unlock(&info->lock); -+ page = get_zeroed_page(GFP_KERNEL); -+ if(page == 0) -+ return(-ENOMEM); -+ spin_lock(&info->lock); -+ } -+ -+ if(page != 0) -+ free_page(page); -+ -+ *count += adjust; -+ } -+ spin_unlock(&info->lock); -+ return(0); -+} -+ - EXPORT_SYMBOL(shmem_file_setup); -+ -+struct file_operations anon_file_operations; -+ -+static int anon_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ struct file *new; -+ struct inode *inode; -+ loff_t size = vma->vm_end - vma->vm_start; -+ int err; -+ -+ if(file->private_data == NULL){ -+ new = shmem_file_setup("dev/anon", size); -+ if(IS_ERR(new)) -+ return(PTR_ERR(new)); -+ -+ new->f_op = &anon_file_operations; -+ file->private_data = new; -+ } -+ -+ if (vma->vm_file) -+ fput(vma->vm_file); -+ vma->vm_file = file->private_data; -+ get_file(vma->vm_file); -+ -+ inode = vma->vm_file->f_dentry->d_inode; -+ err = adjust_map_counts(SHMEM_I(inode), vma->vm_pgoff, size, 1); -+ if(err) -+ return(err); -+ -+ vma->vm_ops = &shmem_vm_ops; -+ return 0; -+} -+ -+static void anon_munmap(struct file *file, struct vm_area_struct *vma, -+ unsigned long start, unsigned long len) -+{ -+ struct inode *inode = file->f_dentry->d_inode; -+ struct shmem_inode_info *info = SHMEM_I(inode); -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ struct page *page; -+ unsigned long addr, idx, *count; -+ -+ for(addr = start; addr < start + len; addr += PAGE_SIZE){ -+ idx = (addr - vma->vm_start + vma->vm_pgoff); -+ idx >>= PAGE_CACHE_SHIFT; -+ -+ count = shmem_map_count(info, idx, NULL); -+ BUG_ON(count == NULL); -+ -+ (*count)--; -+ if(*count > 0) -+ continue; -+ -+ pgd = pgd_offset(vma->vm_mm, addr); -+ if(pgd_none(*pgd)) -+ continue; -+ -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_none(*pmd)) -+ continue; -+ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte)) /* XXX need to handle swapped pages */ -+ continue; -+ -+ *pte = pte_mkclean(*pte); -+ -+ page = pte_page(*pte); -+ LockPage(page); -+ lru_cache_del(page); -+ ClearPageDirty(page); -+ remove_inode_page(page); -+ UnlockPage(page); -+ -+ page_cache_release(page); -+ } -+} -+ -+int anon_release(struct inode *inode, struct file *file) -+{ -+ if(file->private_data != NULL) -+ fput(file->private_data); -+ return(0); -+} -+ -+struct file_operations anon_file_operations = { -+ .mmap = anon_mmap, -+ .munmap = anon_munmap, -+ .release = anon_release, -+}; diff --git a/lustre/kernel_patches/patches/uml-sigusr1-2.4-vanilla.patch b/lustre/kernel_patches/patches/uml-sigusr1-2.4-vanilla.patch deleted file mode 100644 index fa4ccae..0000000 --- a/lustre/kernel_patches/patches/uml-sigusr1-2.4-vanilla.patch +++ /dev/null @@ -1,22 +0,0 @@ -Index: linux-2.4.24/arch/um/os-Linux/process.c -=================================================================== ---- linux-2.4.24.orig/arch/um/os-Linux/process.c 2005-02-14 14:25:32.784128506 -0500 -+++ linux-2.4.24/arch/um/os-Linux/process.c 2005-02-14 14:26:24.990396165 -0500 -@@ -7,6 +7,7 @@ - #include <stdio.h> - #include <errno.h> - #include <signal.h> -+#include <linux/unistd.h> - #include <sys/mman.h> - #include <sys/wait.h> - #include "os.h" -@@ -92,7 +93,8 @@ - - void os_usr1_process(int pid) - { -- kill(pid, SIGUSR1); -+ syscall(__NR_tkill, pid, SIGUSR1); -+/* tkill(pid, SIGUSR1);*/ - } - - int os_getpid(void) diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch deleted file mode 100644 index 6778eec..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-hp.patch +++ /dev/null @@ -1,1948 +0,0 @@ - fs/dcache.c | 19 ++ - fs/exec.c | 17 +- - fs/namei.c | 330 +++++++++++++++++++++++++++++++++++++++------- - fs/namespace.c | 28 ++- - fs/open.c | 172 +++++++++++++++++------ - fs/proc/base.c | 3 - fs/stat.c | 52 ++++--- - include/linux/dcache.h | 60 ++++++++ - include/linux/fs.h | 32 ++++ - include/linux/fs_struct.h | 4 - kernel/exit.c | 3 - kernel/fork.c | 3 - kernel/ksyms.c | 1 - 13 files changed, 591 insertions(+), 133 deletions(-) - -Index: linux/fs/dcache.c -=================================================================== ---- linux.orig/fs/dcache.c Thu Nov 28 18:53:15 2002 -+++ linux/fs/dcache.c Wed Mar 17 13:11:25 2004 -@@ -181,6 +181,13 @@ - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -830,13 +837,19 @@ - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ -Index: linux/fs/exec.c -=================================================================== ---- linux.orig/fs/exec.c Wed Mar 17 13:00:38 2004 -+++ linux/fs/exec.c Wed Mar 17 13:11:25 2004 -@@ -115,8 +115,10 @@ - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -128,7 +130,8 @@ - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -371,8 +374,10 @@ - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -384,7 +389,8 @@ - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -396,6 +402,7 @@ - return file; - } - } -+ intent_release(&it); - path_release(&nd); - } - goto out; -@@ -1120,7 +1127,7 @@ - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux/fs/namei.c -=================================================================== ---- linux.orig/fs/namei.c Wed Mar 17 13:00:37 2004 -+++ linux/fs/namei.c Wed Mar 17 13:12:31 2004 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct lookup_intent *it) -+{ -+ if (it && it->it_op_release) -+ it->it_op_release(it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,15 @@ - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +320,9 @@ - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, it, flags); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +344,15 @@ - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_it) { -+ if (!result->d_op->d_revalidate_it(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; -+ } - } - return result; - } -@@ -332,7 +364,8 @@ - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= 5) -@@ -346,10 +379,18 @@ - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (!err && it != NULL && !(it->d.lustre.it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(it); -+ path_release(nd); -+ err = -ENOLINK; -+ } - current->link_count--; - return err; - loop: -+ intent_release(it); - path_release(nd); - return -ELOOP; - } -@@ -379,15 +420,26 @@ - return __follow_up(mnt, dentry); - } - --static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) -+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry, -+ struct lookup_intent *it) - { - struct vfsmount *mounted; - - spin_lock(&dcache_lock); - mounted = lookup_mnt(*mnt, *dentry); - if (mounted) { -+ int opc = 0, mode = 0; - *mnt = mntget(mounted); - spin_unlock(&dcache_lock); -+ if (it) { -+ opc = it->it_op; -+ mode = it->it_create_mode; -+ } -+ intent_release(it); -+ if (it) { -+ it->it_op = opc; -+ it->it_create_mode = mode; -+ } - dput(*dentry); - mntput(mounted->mnt_parent); - *dentry = dget(mounted->mnt_root); -@@ -399,7 +451,7 @@ - - int follow_down(struct vfsmount **mnt, struct dentry **dentry) - { -- return __follow_down(mnt,dentry); -+ return __follow_down(mnt, dentry, NULL); - } - - static inline void follow_dotdot(struct nameidata *nd) -@@ -435,7 +487,7 @@ - mntput(nd->mnt); - nd->mnt = parent; - } -- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry)) -+ while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL)) - ; - } - -@@ -447,7 +499,8 @@ - * - * We expect 'base' to be positive and a directory. - */ --int link_path_walk(const char * name, struct nameidata *nd) -+int link_path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -520,15 +573,15 @@ - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } - /* Check mountpoints.. */ -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL)) - ; - - err = -ENOENT; -@@ -540,7 +593,7 @@ - goto out_dput; - - if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -556,7 +609,7 @@ - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -583,19 +636,19 @@ - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it)) - ; - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -609,7 +662,8 @@ - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -633,6 +687,34 @@ - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, it); -+ if (IS_ERR(new)) { -+ err = PTR_ERR(new); -+ break; -+ } -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = new; -+ } -+ if (!nd->dentry->d_inode) -+ goto no_inode; -+ if (lookup_flags & LOOKUP_DIRECTORY) { -+ err = -ENOTDIR; -+ if (!nd->dentry->d_inode->i_op || -+ (!nd->dentry->d_inode->i_op->lookup && -+ !nd->dentry->d_inode->i_op->lookup_it)) -+ break; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -646,15 +721,28 @@ - dput(dentry); - break; - } -+ if (err) -+ intent_release(it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -739,6 +827,17 @@ - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -753,6 +852,7 @@ - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->intent = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -767,7 +867,8 @@ - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -790,13 +891,16 @@ - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, it, 0); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -808,6 +912,12 @@ - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -829,7 +939,7 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -860,6 +970,23 @@ - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -955,7 +1082,8 @@ - return retval; - } - --int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - int error; - -@@ -968,12 +1096,15 @@ - goto exit_lock; - - error = -EACCES; /* shouldn't it be ENOSYS? */ -- if (!dir->i_op || !dir->i_op->create) -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) - goto exit_lock; - - DQUOT_INIT(dir); - lock_kernel(); -- error = dir->i_op->create(dir, dentry, mode); -+ if (dir->i_op->create_it) -+ error = dir->i_op->create_it(dir, dentry, mode, it); -+ else -+ error = dir->i_op->create(dir, dentry, mode); - unlock_kernel(); - exit_lock: - up(&dir->i_zombie); -@@ -982,6 +1113,11 @@ - return error; - } - -+int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ return vfs_create_it(dir, dentry, mode, NULL); -+} -+ - /* - * open_namei() - * -@@ -996,7 +1132,8 @@ - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1006,11 +1143,14 @@ - - acc_mode = ACC_MODE(flag); - -+ if (it) -+ it->it_flags = flag; -+ - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1020,6 +1160,10 @@ - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_create_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1035,7 +1179,7 @@ - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1044,11 +1188,12 @@ - goto exit; - } - -+ it->it_create_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current->fs->umask; -- error = vfs_create(dir->d_inode, dentry, mode); -+ error = vfs_create_it(dir->d_inode, dentry, mode, it); - up(&dir->d_inode->i_sem); - dput(nd->dentry); - nd->dentry = dentry; -@@ -1073,7 +1218,7 @@ - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; -- while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); -+ while (__follow_down(&nd->mnt,&dentry,it) && d_mountpoint(dentry)); - } - error = -ENOENT; - if (!dentry->d_inode) -@@ -1152,7 +1297,7 @@ - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1164,8 +1309,10 @@ - return 0; - - exit_dput: -+ intent_release(it); - dput(dentry); - exit: -+ intent_release(it); - path_release(nd); - return error; - -@@ -1184,7 +1331,16 @@ - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) { -+ intent_release(it); -+ } else if (it != NULL && !(it->d.lustre.it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(it); -+ path_release(nd); -+ error = -ENOLINK; -+ } - dput(dentry); - if (error) - return error; -@@ -1206,13 +1362,20 @@ - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1220,7 +1383,7 @@ - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1276,7 +1439,20 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1298,6 +1474,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1345,7 +1522,18 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1354,6 +1542,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1454,8 +1643,16 @@ - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1513,8 +1710,15 @@ - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1581,15 +1785,27 @@ - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1665,7 +1881,18 @@ - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out_release; -+ } -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1709,7 +1936,7 @@ - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - struct inode *target; -@@ -1788,7 +2015,7 @@ - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - -@@ -1876,9 +2103,18 @@ - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ lock_kernel(); -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1894,16 +2130,16 @@ - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); - unlock_kernel(); -- - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1954,20 +2190,28 @@ - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->intent; -+ else if (it != nd->intent) -+ printk("it != nd->intent: tell phil@clusterfs.com\n"); -+ if (it != NULL) -+ it->d.lustre.it_int_flags |= IT_FL_FOLLOWED; -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -1989,7 +2233,13 @@ - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2031,7 +2281,7 @@ - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); -Index: linux/fs/namespace.c -=================================================================== ---- linux.orig/fs/namespace.c Thu Nov 28 18:53:15 2002 -+++ linux/fs/namespace.c Wed Mar 17 13:11:25 2004 -@@ -99,6 +99,7 @@ - { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; -+ UNPIN(old_nd->dentry, old_nd->mnt, 1); - mnt->mnt_parent = mnt; - mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_child); -@@ -110,6 +111,7 @@ - { - mnt->mnt_parent = mntget(nd->mnt); - mnt->mnt_mountpoint = dget(nd->dentry); -+ PIN(nd->dentry, nd->mnt, 1); - list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); - list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); - nd->dentry->d_mounted++; -@@ -485,14 +487,17 @@ - { - struct nameidata old_nd; - struct vfsmount *mnt = NULL; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int err = mount_is_safe(nd); - if (err) - return err; - if (!old_name || !*old_name) - return -EINVAL; -- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); -- if (err) -+ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); -+ if (err) { -+ intent_release(&it); - return err; -+ } - - down_write(¤t->namespace->sem); - err = -EINVAL; -@@ -515,6 +520,7 @@ - } - - up_write(¤t->namespace->sem); -+ intent_release(&it); - path_release(&old_nd); - return err; - } -@@ -698,6 +704,7 @@ - unsigned long flags, void *data_page) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int retval = 0; - int mnt_flags = 0; - -@@ -722,10 +729,11 @@ - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); - - /* ... and get the mountpoint */ -- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -- if (retval) -+ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); -+ if (retval) { -+ intent_release(&it); - return retval; -- -+ } - if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); -@@ -736,6 +744,8 @@ - else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, - dev_name, data_page); -+ -+ intent_release(&it); - path_release(&nd); - return retval; - } -@@ -901,6 +911,8 @@ - { - struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; -+ struct lookup_intent new_it = { .it_op = IT_GETATTR }; -+ struct lookup_intent old_it = { .it_op = IT_GETATTR }; - int error; - - if (!capable(CAP_SYS_ADMIN)) -@@ -908,14 +920,14 @@ - - lock_kernel(); - -- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); -+ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); - if (error) - goto out0; - error = -EINVAL; - if (!check_mnt(new_nd.mnt)) - goto out1; - -- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); -+ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); - if (error) - goto out1; - -@@ -970,8 +982,10 @@ - up(&old_nd.dentry->d_inode->i_zombie); - up_write(¤t->namespace->sem); - path_release(&user_nd); -+ intent_release(&old_it); - path_release(&old_nd); - out1: -+ intent_release(&new_it); - path_release(&new_nd); - out0: - unlock_kernel(); -Index: linux/fs/open.c -=================================================================== ---- linux.orig/fs/open.c Thu Nov 28 18:53:15 2002 -+++ linux/fs/open.c Wed Mar 17 13:11:25 2004 -@@ -19,6 +19,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -108,7 +111,13 @@ - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - return error; - } -@@ -118,12 +127,13 @@ - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -163,11 +173,13 @@ - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(&it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -215,7 +227,7 @@ - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -260,11 +272,13 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -279,11 +293,25 @@ - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -304,12 +332,14 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -324,7 +354,20 @@ - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -347,6 +390,7 @@ - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -364,13 +408,14 @@ - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(&it); - path_release(&nd); - } - -@@ -385,8 +430,9 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -397,6 +443,7 @@ - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -436,9 +483,10 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -454,39 +502,56 @@ - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode *inode = dentry->d_inode; - struct iattr newattrs; -+ int err = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ if (inode->i_op->setattr_raw) { -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (err != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); - --out_putf: -+out: -+ return err; -+} -+ -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); -+ - fput(file); - out: - return err; -@@ -495,30 +560,14 @@ - asmlinkage long sys_chmod(const char * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; - -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -+ error = chmod_common(nd.dentry, mode); - -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -- --dput_and_out: - path_release(&nd); - out: - return error; -@@ -538,6 +587,20 @@ - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -642,6 +705,7 @@ - { - int namei_flags, error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN }; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -649,14 +713,15 @@ - if (namei_flags & O_TRUNC) - namei_flags |= 2; - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -693,12 +758,15 @@ - } - - if (f->f_op && f->f_op->open) { -+ f->f_it = it; - error = f->f_op->open(inode,f); -+ f->f_it = NULL; - if (error) - goto cleanup_all; - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -+ intent_release(it); - return f; - - cleanup_all: -@@ -713,11 +781,17 @@ - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux/fs/proc/base.c -=================================================================== ---- linux.orig/fs/proc/base.c Wed Mar 17 13:00:35 2004 -+++ linux/fs/proc/base.c Wed Mar 17 13:11:25 2004 -@@ -481,6 +481,9 @@ - - error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); - nd->last_type = LAST_BIND; -+ -+ if (nd->intent != NULL) -+ nd->intent->d.lustre.it_int_flags |= IT_FL_FOLLOWED; - out: - return error; - } -Index: linux/fs/stat.c -=================================================================== ---- linux.orig/fs/stat.c Thu Sep 13 19:04:43 2001 -+++ linux/fs/stat.c Wed Mar 17 13:12:31 2004 -@@ -17,10 +17,12 @@ - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - static __inline__ int --do_revalidate(struct dentry *dentry) -+do_revalidate(struct dentry *dentry, struct lookup_intent *it) - { - struct inode * inode = dentry->d_inode; -- if (inode->i_op && inode->i_op->revalidate) -+ if (inode->i_op && inode->i_op->revalidate_it) -+ return inode->i_op->revalidate_it(dentry, it); -+ else if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; - } -@@ -135,13 +137,15 @@ - asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -151,13 +155,15 @@ - asmlinkage long sys_newstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -172,13 +178,15 @@ - asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -189,13 +197,15 @@ - asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -216,7 +226,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_old_stat(dentry->d_inode, statbuf); - fput(f); -@@ -235,7 +245,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat(dentry->d_inode, statbuf); - fput(f); -@@ -257,7 +267,7 @@ - - error = -EINVAL; - if (inode->i_op && inode->i_op->readlink && -- !(error = do_revalidate(nd.dentry))) { -+ !(error = do_revalidate(nd.dentry, NULL))) { - UPDATE_ATIME(inode); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); - } -@@ -333,12 +343,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -348,12 +360,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -368,7 +382,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat64(dentry->d_inode, statbuf); - fput(f); -Index: linux/include/linux/dcache.h -=================================================================== ---- linux.orig/include/linux/dcache.h Thu Nov 28 18:53:15 2002 -+++ linux/include/linux/dcache.h Wed Mar 17 13:11:25 2004 -@@ -6,6 +6,51 @@ - #include <asm/atomic.h> - #include <linux/mount.h> - #include <linux/kernel.h> -+#include <linux/string.h> -+ -+#define IT_OPEN 0x0001 -+#define IT_CREAT 0x0002 -+#define IT_READDIR 0x0004 -+#define IT_GETATTR 0x0008 -+#define IT_LOOKUP 0x0010 -+#define IT_UNLINK 0x0020 -+#define IT_GETXATTR 0x0040 -+#define IT_EXEC 0x0080 -+#define IT_PIN 0x0100 -+ -+#define IT_FL_LOCKED 0x0001 -+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ -+ -+#define INTENT_MAGIC 0x19620323 -+ -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; -+ int it_int_flags; -+}; -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - - /* - * linux/include/linux/dcache.h -@@ -91,8 +136,22 @@ - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); -+ void (*d_pin)(struct dentry *, struct vfsmount * , int); -+ void (*d_unpin)(struct dentry *, struct vfsmount *, int); - }; - -+#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ -+ de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ -+ de->d_op->d_unpin(de, mnt, flag); -+ -+ -+/* defined in fs/namei.c */ -+extern void intent_release(struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -124,6 +183,7 @@ - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - -Index: linux/include/linux/fs.h -=================================================================== ---- linux.orig/include/linux/fs.h Wed Mar 17 13:11:23 2004 -+++ linux/include/linux/fs.h Wed Mar 17 13:11:31 2004 -@@ -73,6 +73,7 @@ - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define READ 0 - #define WRITE 1 -@@ -340,6 +341,9 @@ - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -474,6 +478,7 @@ - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct char_device *i_cdev; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -576,6 +581,7 @@ - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_it; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -697,6 +703,7 @@ - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *intent; - }; - - /* -@@ -817,7 +824,8 @@ - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry); - - /* - * File types -@@ -877,21 +885,32 @@ - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); -+ int (*revalidate_it) (struct dentry *, struct lookup_intent *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1088,10 +1107,14 @@ - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1354,6 +1377,7 @@ - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1365,6 +1389,8 @@ - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void _inode_init_once(struct inode *); -@@ -1503,6 +1529,8 @@ - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; -Index: linux/include/linux/fs_struct.h -=================================================================== ---- linux.orig/include/linux/fs_struct.h Fri Jul 13 18:10:44 2001 -+++ linux/include/linux/fs_struct.h Wed Mar 17 13:11:25 2004 -@@ -34,10 +34,12 @@ - write_lock(&fs->lock); - old_root = fs->root; - old_rootmnt = fs->rootmnt; -+ PIN(dentry, mnt, 1); - fs->rootmnt = mntget(mnt); - fs->root = dget(dentry); - write_unlock(&fs->lock); - if (old_root) { -+ UNPIN(old_root, old_rootmnt, 1); - dput(old_root); - mntput(old_rootmnt); - } -@@ -57,10 +59,12 @@ - write_lock(&fs->lock); - old_pwd = fs->pwd; - old_pwdmnt = fs->pwdmnt; -+ PIN(dentry, mnt, 0); - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); - write_unlock(&fs->lock); - if (old_pwd) { -+ UNPIN(old_pwd, old_pwdmnt, 0); - dput(old_pwd); - mntput(old_pwdmnt); - } -Index: linux/kernel/exit.c -=================================================================== ---- linux.orig/kernel/exit.c Wed Mar 17 13:00:38 2004 -+++ linux/kernel/exit.c Wed Mar 17 13:11:25 2004 -@@ -239,11 +239,14 @@ - { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { -+ UNPIN(fs->pwd, fs->pwdmnt, 0); -+ UNPIN(fs->root, fs->rootmnt, 1); - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { -+ UNPIN(fs->altroot, fs->altrootmnt, 1); - dput(fs->altroot); - mntput(fs->altrootmnt); - } -Index: linux/kernel/fork.c -=================================================================== ---- linux.orig/kernel/fork.c Wed Mar 17 13:00:38 2004 -+++ linux/kernel/fork.c Wed Mar 17 13:11:25 2004 -@@ -387,10 +387,13 @@ - fs->umask = old->umask; - read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); -+ PIN(old->pwd, old->pwdmnt, 0); -+ PIN(old->root, old->rootmnt, 1); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { -+ PIN(old->altroot, old->altrootmnt, 1); - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); - } else { -Index: linux/kernel/ksyms.c -=================================================================== ---- linux.orig/kernel/ksyms.c Wed Mar 17 13:11:23 2004 -+++ linux/kernel/ksyms.c Wed Mar 17 13:11:25 2004 -@@ -315,6 +315,7 @@ - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch deleted file mode 100644 index 3205465..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch +++ /dev/null @@ -1,1854 +0,0 @@ - fs/dcache.c | 19 ++ - fs/exec.c | 17 +- - fs/namei.c | 295 +++++++++++++++++++++++++++++++++++++++------- - fs/namespace.c | 28 +++- - fs/open.c | 172 +++++++++++++++++++------- - fs/stat.c | 52 +++++--- - include/linux/dcache.h | 60 +++++++++ - include/linux/fs.h | 32 ++++ - include/linux/fs_struct.h | 4 - kernel/exit.c | 3 - kernel/fork.c | 3 - kernel/ksyms.c | 1 - 12 files changed, 558 insertions(+), 128 deletions(-) - -Index: linux-2.4.24/fs/dcache.c -=================================================================== ---- linux-2.4.24.orig/fs/dcache.c Fri Jun 13 07:51:37 2003 -+++ linux-2.4.24/fs/dcache.c Wed Mar 17 17:36:14 2004 -@@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry) - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry) - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ -Index: linux-2.4.24/fs/exec.c -=================================================================== ---- linux-2.4.24.orig/fs/exec.c Fri Nov 28 10:26:21 2003 -+++ linux-2.4.24/fs/exec.c Wed Mar 17 17:36:14 2004 -@@ -112,8 +112,10 @@ asmlinkage long sys_uselib(const char * - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -125,7 +127,8 @@ asmlinkage long sys_uselib(const char * - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -374,8 +377,10 @@ struct file *open_exec(const char *name) - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -387,7 +392,8 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -399,6 +405,7 @@ out: - return file; - } - } -+ intent_release(&it); - path_release(&nd); - } - goto out; -@@ -1132,7 +1139,7 @@ int do_coredump(long signr, struct pt_re - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.4.24/fs/namei.c -=================================================================== ---- linux-2.4.24.orig/fs/namei.c Mon Aug 25 04:44:43 2003 -+++ linux-2.4.24/fs/namei.c Wed Mar 17 17:36:52 2004 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct lookup_intent *it) -+{ -+ if (it && it->it_op_release) -+ it->it_op_release(it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,15 @@ static struct dentry * cached_lookup(str - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +320,9 @@ static struct dentry * real_lookup(struc - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, it, flags); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +344,15 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_it) { -+ if (!result->d_op->d_revalidate_it(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; -+ } - } - return result; - } -@@ -332,7 +364,8 @@ static struct dentry * real_lookup(struc - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= 5) -@@ -346,10 +379,12 @@ static inline int do_follow_link(struct - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - err = dentry->d_inode->i_op->follow_link(dentry, nd); - current->link_count--; - return err; - loop: -+ intent_release(it); - path_release(nd); - return -ELOOP; - } -@@ -447,7 +482,8 @@ static inline void follow_dotdot(struct - * - * We expect 'base' to be positive and a directory. - */ --int link_path_walk(const char * name, struct nameidata *nd) -+int link_path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -520,9 +556,9 @@ int link_path_walk(const char * name, st - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -540,7 +576,7 @@ int link_path_walk(const char * name, st - goto out_dput; - - if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -556,7 +592,7 @@ int link_path_walk(const char * name, st - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -583,9 +619,9 @@ last_component: - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -595,7 +631,7 @@ last_component: - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -609,7 +645,8 @@ last_component: - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -635,6 +672,34 @@ return_reval: - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, it); -+ if (IS_ERR(new)) { -+ err = PTR_ERR(new); -+ break; -+ } -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = new; -+ } -+ if (!nd->dentry->d_inode) -+ goto no_inode; -+ if (lookup_flags & LOOKUP_DIRECTORY) { -+ err = -ENOTDIR; -+ if (!nd->dentry->d_inode->i_op || -+ (!nd->dentry->d_inode->i_op->lookup && -+ !nd->dentry->d_inode->i_op->lookup_it)) -+ break; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -648,15 +706,28 @@ out_dput: - dput(dentry); - break; - } -+ if (err) -+ intent_release(it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -741,6 +812,17 @@ walk_init_root(const char *name, struct - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -755,6 +837,7 @@ int path_init(const char *name, unsigned - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->intent = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -769,7 +852,8 @@ int path_init(const char *name, unsigned - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -792,13 +876,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, it, 0); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -810,6 +897,12 @@ out: - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -831,7 +924,7 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -862,6 +955,23 @@ int __user_walk(const char *name, unsign - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -957,7 +1067,8 @@ static inline int lookup_flags(unsigned - return retval; - } - --int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - int error; - -@@ -970,12 +1081,15 @@ int vfs_create(struct inode *dir, struct - goto exit_lock; - - error = -EACCES; /* shouldn't it be ENOSYS? */ -- if (!dir->i_op || !dir->i_op->create) -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) - goto exit_lock; - - DQUOT_INIT(dir); - lock_kernel(); -- error = dir->i_op->create(dir, dentry, mode); -+ if (dir->i_op->create_it) -+ error = dir->i_op->create_it(dir, dentry, mode, it); -+ else -+ error = dir->i_op->create(dir, dentry, mode); - unlock_kernel(); - exit_lock: - up(&dir->i_zombie); -@@ -984,6 +1098,11 @@ exit_lock: - return error; - } - -+int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ return vfs_create_it(dir, dentry, mode, NULL); -+} -+ - /* - * open_namei() - * -@@ -998,7 +1117,8 @@ exit_lock: - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1008,11 +1128,14 @@ int open_namei(const char * pathname, in - - acc_mode = ACC_MODE(flag); - -+ if (it) -+ it->it_flags = flag; -+ - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1022,6 +1145,10 @@ int open_namei(const char * pathname, in - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_create_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1037,7 +1164,7 @@ int open_namei(const char * pathname, in - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1046,10 +1173,11 @@ do_last: - goto exit; - } - -+ it->it_create_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { -- error = vfs_create(dir->d_inode, dentry, -- mode & ~current->fs->umask); -+ error = vfs_create_it(dir->d_inode, dentry, -+ mode & ~current->fs->umask, it); - up(&dir->d_inode->i_sem); - dput(nd->dentry); - nd->dentry = dentry; -@@ -1153,7 +1281,7 @@ ok: - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1165,8 +1293,10 @@ ok: - return 0; - - exit_dput: -+ intent_release(it); - dput(dentry); - exit: -+ intent_release(it); - path_release(nd); - return error; - -@@ -1185,7 +1315,10 @@ do_link: - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) -+ intent_release(it); - dput(dentry); - if (error) - return error; -@@ -1207,13 +1340,20 @@ do_link: - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1221,7 +1361,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1277,7 +1417,20 @@ asmlinkage long sys_mknod(const char * f - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - mode &= ~current->fs->umask; -@@ -1298,6 +1451,7 @@ asmlinkage long sys_mknod(const char * f - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1345,7 +1499,18 @@ asmlinkage long sys_mkdir(const char * p - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1353,6 +1518,7 @@ asmlinkage long sys_mkdir(const char * p - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1453,8 +1619,16 @@ asmlinkage long sys_rmdir(const char * p - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1512,8 +1686,15 @@ asmlinkage long sys_unlink(const char * - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1580,15 +1761,27 @@ asmlinkage long sys_symlink(const char * - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1664,7 +1857,18 @@ asmlinkage long sys_link(const char * ol - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out_release; -+ } -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1708,7 +1912,7 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - struct inode *target; -@@ -1787,7 +1991,7 @@ out_unlock: - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - -@@ -1875,9 +2079,18 @@ static inline int do_rename(const char * - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ lock_kernel(); -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1893,16 +2106,16 @@ static inline int do_rename(const char * - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); - unlock_kernel(); -- - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1953,20 +2166,26 @@ out: - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->intent; -+ else if (it != nd->intent) -+ printk("it != nd->intent: tell phil@clusterfs.com\n"); -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -1990,7 +2209,13 @@ fail: - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2032,7 +2257,7 @@ int page_follow_link(struct dentry *dent - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); -Index: linux-2.4.24/fs/namespace.c -=================================================================== ---- linux-2.4.24.orig/fs/namespace.c Fri Nov 28 10:26:21 2003 -+++ linux-2.4.24/fs/namespace.c Wed Mar 17 17:36:14 2004 -@@ -98,6 +98,7 @@ static void detach_mnt(struct vfsmount * - { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; -+ UNPIN(old_nd->dentry, old_nd->mnt, 1); - mnt->mnt_parent = mnt; - mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_child); -@@ -109,6 +110,7 @@ static void attach_mnt(struct vfsmount * - { - mnt->mnt_parent = mntget(nd->mnt); - mnt->mnt_mountpoint = dget(nd->dentry); -+ PIN(nd->dentry, nd->mnt, 1); - list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); - list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); - nd->dentry->d_mounted++; -@@ -488,14 +490,17 @@ static int do_loopback(struct nameidata - { - struct nameidata old_nd; - struct vfsmount *mnt = NULL; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int err = mount_is_safe(nd); - if (err) - return err; - if (!old_name || !*old_name) - return -EINVAL; -- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); -- if (err) -+ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); -+ if (err) { -+ intent_release(&it); - return err; -+ } - - down_write(¤t->namespace->sem); - err = -EINVAL; -@@ -518,6 +523,7 @@ static int do_loopback(struct nameidata - } - - up_write(¤t->namespace->sem); -+ intent_release(&it); - path_release(&old_nd); - return err; - } -@@ -701,6 +707,7 @@ long do_mount(char * dev_name, char * di - unsigned long flags, void *data_page) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int retval = 0; - int mnt_flags = 0; - -@@ -725,10 +732,11 @@ long do_mount(char * dev_name, char * di - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); - - /* ... and get the mountpoint */ -- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -- if (retval) -+ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); -+ if (retval) { -+ intent_release(&it); - return retval; -- -+ } - if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); -@@ -739,6 +747,8 @@ long do_mount(char * dev_name, char * di - else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, - dev_name, data_page); -+ -+ intent_release(&it); - path_release(&nd); - return retval; - } -@@ -904,6 +914,8 @@ asmlinkage long sys_pivot_root(const cha - { - struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; -+ struct lookup_intent new_it = { .it_op = IT_GETATTR }; -+ struct lookup_intent old_it = { .it_op = IT_GETATTR }; - int error; - - if (!capable(CAP_SYS_ADMIN)) -@@ -911,14 +923,14 @@ asmlinkage long sys_pivot_root(const cha - - lock_kernel(); - -- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); -+ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); - if (error) - goto out0; - error = -EINVAL; - if (!check_mnt(new_nd.mnt)) - goto out1; - -- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); -+ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); - if (error) - goto out1; - -@@ -973,8 +985,10 @@ out2: - up(&old_nd.dentry->d_inode->i_zombie); - up_write(¤t->namespace->sem); - path_release(&user_nd); -+ intent_release(&old_it); - path_release(&old_nd); - out1: -+ intent_release(&new_it); - path_release(&new_nd); - out0: - unlock_kernel(); -Index: linux-2.4.24/fs/open.c -=================================================================== ---- linux-2.4.24.orig/fs/open.c Mon Aug 25 04:44:43 2003 -+++ linux-2.4.24/fs/open.c Wed Mar 17 17:36:14 2004 -@@ -19,6 +19,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -109,7 +112,13 @@ int do_truncate(struct dentry *dentry, l - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - up_write(&inode->i_alloc_sem); - return error; -@@ -120,12 +129,13 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -165,11 +175,13 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(&it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -217,7 +229,7 @@ static inline long do_sys_ftruncate(unsi - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -262,11 +274,13 @@ asmlinkage long sys_utime(char * filenam - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -281,11 +295,25 @@ asmlinkage long sys_utime(char * filenam - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -306,12 +334,14 @@ asmlinkage long sys_utimes(char * filena - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -326,7 +356,20 @@ asmlinkage long sys_utimes(char * filena - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -349,6 +392,7 @@ asmlinkage long sys_access(const char * - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -366,13 +410,14 @@ asmlinkage long sys_access(const char * - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(&it); - path_release(&nd); - } - -@@ -387,8 +432,9 @@ asmlinkage long sys_chdir(const char * f - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -399,6 +445,7 @@ asmlinkage long sys_chdir(const char * f - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -438,9 +485,10 @@ asmlinkage long sys_chroot(const char * - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -456,39 +504,56 @@ asmlinkage long sys_chroot(const char * - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode *inode = dentry->d_inode; - struct iattr newattrs; -+ int err = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ if (inode->i_op->setattr_raw) { -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (err != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); - --out_putf: -+out: -+ return err; -+} -+ -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); -+ - fput(file); - out: - return err; -@@ -497,30 +562,14 @@ out: - asmlinkage long sys_chmod(const char * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; - -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -+ error = chmod_common(nd.dentry, mode); - -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -- --dput_and_out: - path_release(&nd); - out: - return error; -@@ -540,6 +589,20 @@ static int chown_common(struct dentry * - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -644,6 +707,7 @@ struct file *filp_open(const char * file - { - int namei_flags, error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN }; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -651,14 +715,15 @@ struct file *filp_open(const char * file - if (namei_flags & O_TRUNC) - namei_flags |= 2; - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -695,12 +760,15 @@ struct file *dentry_open(struct dentry * - } - - if (f->f_op && f->f_op->open) { -+ f->f_it = it; - error = f->f_op->open(inode,f); -+ f->f_it = NULL; - if (error) - goto cleanup_all; - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -+ intent_release(it); - return f; - - cleanup_all: -@@ -715,11 +783,17 @@ cleanup_all: - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux-2.4.24/fs/stat.c -=================================================================== ---- linux-2.4.24.orig/fs/stat.c Mon Aug 25 04:44:43 2003 -+++ linux-2.4.24/fs/stat.c Wed Mar 17 17:36:52 2004 -@@ -17,10 +17,12 @@ - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - static __inline__ int --do_revalidate(struct dentry *dentry) -+do_revalidate(struct dentry *dentry, struct lookup_intent *it) - { - struct inode * inode = dentry->d_inode; -- if (inode->i_op && inode->i_op->revalidate) -+ if (inode->i_op && inode->i_op->revalidate_it) -+ return inode->i_op->revalidate_it(dentry, it); -+ else if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; - } -@@ -137,13 +139,15 @@ static int cp_new_stat(struct inode * in - asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -153,13 +157,15 @@ asmlinkage long sys_stat(char * filename - asmlinkage long sys_newstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -174,13 +180,15 @@ asmlinkage long sys_newstat(char * filen - asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -191,13 +199,15 @@ asmlinkage long sys_lstat(char * filenam - asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -218,7 +228,7 @@ asmlinkage long sys_fstat(unsigned int f - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_old_stat(dentry->d_inode, statbuf); - fput(f); -@@ -237,7 +247,7 @@ asmlinkage long sys_newfstat(unsigned in - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat(dentry->d_inode, statbuf); - fput(f); -@@ -259,7 +269,7 @@ asmlinkage long sys_readlink(const char - - error = -EINVAL; - if (inode->i_op && inode->i_op->readlink && -- !(error = do_revalidate(nd.dentry))) { -+ !(error = do_revalidate(nd.dentry, NULL))) { - UPDATE_ATIME(inode); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); - } -@@ -335,12 +345,14 @@ asmlinkage long sys_stat64(char * filena - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -350,12 +362,14 @@ asmlinkage long sys_lstat64(char * filen - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -370,7 +384,7 @@ asmlinkage long sys_fstat64(unsigned lon - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat64(dentry->d_inode, statbuf); - fput(f); -Index: linux-2.4.24/include/linux/dcache.h -=================================================================== ---- linux-2.4.24.orig/include/linux/dcache.h Thu Nov 28 15:53:15 2002 -+++ linux-2.4.24/include/linux/dcache.h Wed Mar 17 17:36:14 2004 -@@ -6,6 +6,51 @@ - #include <asm/atomic.h> - #include <linux/mount.h> - #include <linux/kernel.h> -+#include <linux/string.h> -+ -+#define IT_OPEN 0x0001 -+#define IT_CREAT 0x0002 -+#define IT_READDIR 0x0004 -+#define IT_GETATTR 0x0008 -+#define IT_LOOKUP 0x0010 -+#define IT_UNLINK 0x0020 -+#define IT_GETXATTR 0x0040 -+#define IT_EXEC 0x0080 -+#define IT_PIN 0x0100 -+ -+#define IT_FL_LOCKED 0x0001 -+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ -+ -+#define INTENT_MAGIC 0x19620323 -+ -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; -+ int it_int_flags; -+}; -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - - /* - * linux/include/linux/dcache.h -@@ -91,8 +136,22 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); -+ void (*d_pin)(struct dentry *, struct vfsmount * , int); -+ void (*d_unpin)(struct dentry *, struct vfsmount *, int); - }; - -+#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ -+ de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ -+ de->d_op->d_unpin(de, mnt, flag); -+ -+ -+/* defined in fs/namei.c */ -+extern void intent_release(struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -124,6 +183,7 @@ d_iput: no no yes - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - -Index: linux-2.4.24/include/linux/fs.h -=================================================================== ---- linux-2.4.24.orig/include/linux/fs.h Wed Mar 17 17:36:14 2004 -+++ linux-2.4.24/include/linux/fs.h Wed Mar 17 17:36:14 2004 -@@ -73,6 +73,7 @@ extern int leases_enable, dir_notify_ena - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define READ 0 - #define WRITE 1 -@@ -340,6 +341,9 @@ extern void set_bh_page(struct buffer_he - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -478,6 +482,7 @@ struct inode { - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct char_device *i_cdev; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -582,6 +587,7 @@ struct file { - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_it; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -702,6 +708,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *intent; - }; - - /* -@@ -822,7 +829,8 @@ extern int vfs_symlink(struct inode *, s - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry); - - /* - * File types -@@ -884,21 +892,32 @@ struct file_operations { - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); -+ int (*revalidate_it) (struct dentry *, struct lookup_intent *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1094,10 +1113,14 @@ static inline int get_lease(struct inode - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1386,6 +1409,7 @@ typedef int (*read_actor_t)(read_descrip - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1397,6 +1421,8 @@ extern struct dentry * lookup_one_len(co - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void iput(struct inode *); -@@ -1500,6 +1526,8 @@ extern struct file_operations generic_ro - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; -Index: linux-2.4.24/include/linux/fs_struct.h -=================================================================== ---- linux-2.4.24.orig/include/linux/fs_struct.h Fri Jul 13 15:10:44 2001 -+++ linux-2.4.24/include/linux/fs_struct.h Wed Mar 17 17:36:14 2004 -@@ -34,10 +34,12 @@ static inline void set_fs_root(struct fs - write_lock(&fs->lock); - old_root = fs->root; - old_rootmnt = fs->rootmnt; -+ PIN(dentry, mnt, 1); - fs->rootmnt = mntget(mnt); - fs->root = dget(dentry); - write_unlock(&fs->lock); - if (old_root) { -+ UNPIN(old_root, old_rootmnt, 1); - dput(old_root); - mntput(old_rootmnt); - } -@@ -57,10 +59,12 @@ static inline void set_fs_pwd(struct fs_ - write_lock(&fs->lock); - old_pwd = fs->pwd; - old_pwdmnt = fs->pwdmnt; -+ PIN(dentry, mnt, 0); - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); - write_unlock(&fs->lock); - if (old_pwd) { -+ UNPIN(old_pwd, old_pwdmnt, 0); - dput(old_pwd); - mntput(old_pwdmnt); - } -Index: linux-2.4.24/kernel/exit.c -=================================================================== ---- linux-2.4.24.orig/kernel/exit.c Thu Nov 28 15:53:15 2002 -+++ linux-2.4.24/kernel/exit.c Wed Mar 17 17:36:14 2004 -@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc - { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { -+ UNPIN(fs->pwd, fs->pwdmnt, 0); -+ UNPIN(fs->root, fs->rootmnt, 1); - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { -+ UNPIN(fs->altroot, fs->altrootmnt, 1); - dput(fs->altroot); - mntput(fs->altrootmnt); - } -Index: linux-2.4.24/kernel/fork.c -=================================================================== ---- linux-2.4.24.orig/kernel/fork.c Fri Nov 28 10:26:21 2003 -+++ linux-2.4.24/kernel/fork.c Wed Mar 17 17:36:14 2004 -@@ -386,10 +386,13 @@ static inline struct fs_struct *__copy_f - fs->umask = old->umask; - read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); -+ PIN(old->pwd, old->pwdmnt, 0); -+ PIN(old->root, old->rootmnt, 1); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { -+ PIN(old->altroot, old->altrootmnt, 1); - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); - } else { -Index: linux-2.4.24/kernel/ksyms.c -=================================================================== ---- linux-2.4.24.orig/kernel/ksyms.c Wed Mar 17 17:36:14 2004 -+++ linux-2.4.24/kernel/ksyms.c Wed Mar 17 17:36:14 2004 -@@ -275,6 +275,7 @@ EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(mark_page_accessed); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.21-rhel.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.21-rhel.patch deleted file mode 100644 index 83e1f2d..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.21-rhel.patch +++ /dev/null @@ -1,1920 +0,0 @@ -Index: linux-2.4.21-40.EL/fs/dcache.c -=================================================================== ---- linux-2.4.21-40.EL.orig/fs/dcache.c -+++ linux-2.4.21-40.EL/fs/dcache.c -@@ -187,6 +187,13 @@ int d_invalidate(struct dentry * dentry) - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -848,13 +855,19 @@ void d_delete(struct dentry * dentry) - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ -Index: linux-2.4.21-40.EL/fs/exec.c -=================================================================== ---- linux-2.4.21-40.EL.orig/fs/exec.c -+++ linux-2.4.21-40.EL/fs/exec.c -@@ -116,8 +116,10 @@ asmlinkage long sys_uselib(const char * - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -129,7 +131,8 @@ asmlinkage long sys_uselib(const char * - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -405,8 +408,10 @@ struct file *open_exec(const char *name) - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -418,7 +423,8 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -430,6 +436,7 @@ out: - return file; - } - } -+ intent_release(&it); - path_release(&nd); - } - goto out; -@@ -1383,7 +1390,7 @@ int do_coredump(long signr, int exit_cod - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.4.21-40.EL/fs/namei.c -=================================================================== ---- linux-2.4.21-40.EL.orig/fs/namei.c -+++ linux-2.4.21-40.EL/fs/namei.c -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct lookup_intent *it) -+{ -+ if (it && it->it_op_release) -+ it->it_op_release(it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,15 @@ static struct dentry * cached_lookup(str - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +320,9 @@ static struct dentry * real_lookup(struc - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, it, flags); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +344,15 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_it) { -+ if (!result->d_op->d_revalidate_it(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; -+ } - } - return result; - } -@@ -332,7 +364,8 @@ static struct dentry * real_lookup(struc - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= 8) -@@ -346,10 +379,12 @@ static inline int do_follow_link(struct - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - err = dentry->d_inode->i_op->follow_link(dentry, nd); - current->link_count--; - return err; - loop: -+ intent_release(it); - path_release(nd); - return -ELOOP; - } -@@ -447,7 +482,8 @@ static inline void follow_dotdot(struct - * - * We expect 'base' to be positive and a directory. - */ --int link_path_walk(const char * name, struct nameidata *nd) -+int link_path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -524,12 +560,12 @@ int link_path_walk(const char * name, st - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -548,7 +584,7 @@ int link_path_walk(const char * name, st - - if (inode->i_op->follow_link) { - struct vfsmount *mnt = mntget(nd->mnt); -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - mntput(mnt); - if (err) -@@ -565,7 +601,7 @@ int link_path_walk(const char * name, st - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -592,12 +628,12 @@ last_component: - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, nd->flags); -+ dentry = cached_lookup(nd->dentry, &this, nd->flags, it); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, nd->flags); -+ dentry = real_lookup(nd->dentry, &this, nd->flags, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -608,7 +644,7 @@ last_component: - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { - struct vfsmount *mnt = mntget(nd->mnt); -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - mntput(mnt); - if (err) -@@ -623,7 +659,8 @@ last_component: - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -641,12 +678,42 @@ lookup_parent: - nd->last_type = LAST_DOT; - else if (this.len == 2 && this.name[1] == '.') - nd->last_type = LAST_DOTDOT; -+ else -+ goto return_base; - return_reval: - /* - * We bypassed the ordinary revalidation routines. - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, it); -+ if (IS_ERR(new)) { -+ err = PTR_ERR(new); -+ break; -+ } -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = new; -+ } -+ if (!nd->dentry->d_inode) -+ goto no_inode; -+ if (lookup_flags & LOOKUP_DIRECTORY) { -+ err = -ENOTDIR; -+ if (!nd->dentry->d_inode->i_op || -+ (!nd->dentry->d_inode->i_op->lookup && -+ !nd->dentry->d_inode->i_op->lookup_it)) -+ break; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -660,15 +727,28 @@ out_dput: - dput(dentry); - break; - } -+ if (err) -+ intent_release(it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -753,6 +833,17 @@ walk_init_root(const char *name, struct - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -767,6 +858,7 @@ int path_init(const char *name, unsigned - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->intent = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -781,7 +873,8 @@ int path_init(const char *name, unsigned - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -804,13 +897,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, it, 0); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -822,6 +918,12 @@ out: - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -843,7 +945,7 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -874,6 +976,23 @@ int __user_walk(const char *name, unsign - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -971,7 +1090,8 @@ static inline int lookup_flags(unsigned - return retval; - } - --int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - int error; - -@@ -984,12 +1104,15 @@ int vfs_create(struct inode *dir, struct - goto exit_lock; - - error = -EACCES; /* shouldn't it be ENOSYS? */ -- if (!dir->i_op || !dir->i_op->create) -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) - goto exit_lock; - - DQUOT_INIT(dir); - lock_kernel(); -- error = dir->i_op->create(dir, dentry, mode); -+ if (dir->i_op->create_it) -+ error = dir->i_op->create_it(dir, dentry, mode, it); -+ else -+ error = dir->i_op->create(dir, dentry, mode); - unlock_kernel(); - exit_lock: - up(&dir->i_zombie); -@@ -998,6 +1121,11 @@ exit_lock: - return error; - } - -+int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ return vfs_create_it(dir, dentry, mode, NULL); -+} -+ - /* - * open_namei() - * -@@ -1012,7 +1140,8 @@ exit_lock: - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1023,11 +1152,14 @@ int open_namei(const char * pathname, in - - acc_mode = ACC_MODE(flag); - -+ if (it) -+ it->it_flags = flag; -+ - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1037,6 +1169,10 @@ int open_namei(const char * pathname, in - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_create_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1052,7 +1188,7 @@ int open_namei(const char * pathname, in - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1061,11 +1197,12 @@ do_last: - goto exit; - } - -+ it->it_create_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current->fs->umask; -- error = vfs_create(dir->d_inode, dentry, mode); -+ error = vfs_create_it(dir->d_inode, dentry, mode, it); - up(&dir->d_inode->i_sem); - dput(nd->dentry); - nd->dentry = dentry; -@@ -1169,7 +1306,7 @@ ok: - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1181,8 +1318,10 @@ ok: - return 0; - - exit_dput: -+ intent_release(it); - dput(dentry); - exit: -+ intent_release(it); - path_release(nd); - return error; - -@@ -1202,7 +1341,10 @@ do_link: - */ - UPDATE_ATIME(dentry->d_inode); - mnt = mntget(nd->mnt); -+ nd->intent = it; - error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) -+ intent_release(it); - dput(dentry); - mntput(mnt); - if (error) -@@ -1225,13 +1367,20 @@ do_link: - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1239,7 +1388,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1295,7 +1444,20 @@ asmlinkage long sys_mknod(const char * f - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1317,6 +1479,7 @@ asmlinkage long sys_mknod(const char * f - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1364,7 +1527,18 @@ asmlinkage long sys_mkdir(const char * p - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1373,6 +1547,7 @@ asmlinkage long sys_mkdir(const char * p - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1473,8 +1648,16 @@ asmlinkage long sys_rmdir(const char * p - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1532,8 +1715,15 @@ asmlinkage long sys_unlink(const char * - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1600,15 +1790,27 @@ asmlinkage long sys_symlink(const char * - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1684,7 +1886,18 @@ asmlinkage long sys_link(const char * ol - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out_release; -+ } -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1728,7 +1941,7 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - struct inode *target; -@@ -1807,7 +2020,7 @@ out_unlock: - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - -@@ -1895,9 +2108,18 @@ static inline int do_rename(const char * - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ lock_kernel(); -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1913,16 +2135,16 @@ static inline int do_rename(const char * - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); - unlock_kernel(); -- - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1973,20 +2195,26 @@ out: - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->intent; -+ else if (it != nd->intent) -+ printk("it != nd->intent: tell phil@clusterfs.com\n"); -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -2010,7 +2238,13 @@ fail: - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2052,7 +2286,7 @@ int page_follow_link(struct dentry *dent - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); -Index: linux-2.4.21-40.EL/fs/namespace.c -=================================================================== ---- linux-2.4.21-40.EL.orig/fs/namespace.c -+++ linux-2.4.21-40.EL/fs/namespace.c -@@ -98,6 +98,7 @@ static void detach_mnt(struct vfsmount * - { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; -+ UNPIN(old_nd->dentry, old_nd->mnt, 1); - mnt->mnt_parent = mnt; - mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_child); -@@ -109,6 +110,7 @@ static void attach_mnt(struct vfsmount * - { - mnt->mnt_parent = mntget(nd->mnt); - mnt->mnt_mountpoint = dget(nd->dentry); -+ PIN(nd->dentry, nd->mnt, 1); - list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); - list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); - nd->dentry->d_mounted++; -@@ -488,14 +490,17 @@ static int do_loopback(struct nameidata - { - struct nameidata old_nd; - struct vfsmount *mnt = NULL; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int err = mount_is_safe(nd); - if (err) - return err; - if (!old_name || !*old_name) - return -EINVAL; -- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); -- if (err) -+ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); -+ if (err) { -+ intent_release(&it); - return err; -+ } - - down_write(¤t->namespace->sem); - err = -EINVAL; -@@ -518,6 +523,7 @@ static int do_loopback(struct nameidata - } - - up_write(¤t->namespace->sem); -+ intent_release(&it); - path_release(&old_nd); - return err; - } -@@ -701,6 +707,7 @@ long do_mount(char * dev_name, char * di - unsigned long flags, void *data_page) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int retval = 0; - int mnt_flags = 0; - -@@ -725,9 +732,11 @@ long do_mount(char * dev_name, char * di - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); - - /* ... and get the mountpoint */ -- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -- if (retval) -+ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); -+ if (retval) { -+ intent_release(&it); - return retval; -+ } - - if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, -@@ -739,6 +748,8 @@ long do_mount(char * dev_name, char * di - else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, - dev_name, data_page); -+ -+ intent_release(&it); - path_release(&nd); - return retval; - } -@@ -904,6 +915,8 @@ asmlinkage long sys_pivot_root(const cha - { - struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; -+ struct lookup_intent new_it = { .it_op = IT_GETATTR }; -+ struct lookup_intent old_it = { .it_op = IT_GETATTR }; - int error; - - if (!capable(CAP_SYS_ADMIN)) -@@ -911,14 +924,14 @@ asmlinkage long sys_pivot_root(const cha - - lock_kernel(); - -- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); -+ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); - if (error) - goto out0; - error = -EINVAL; - if (!check_mnt(new_nd.mnt)) - goto out1; - -- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); -+ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); - if (error) - goto out1; - -@@ -973,8 +986,10 @@ out2: - up(&old_nd.dentry->d_inode->i_zombie); - up_write(¤t->namespace->sem); - path_release(&user_nd); -+ intent_release(&old_it); - path_release(&old_nd); - out1: -+ intent_release(&new_it); - path_release(&new_nd); - out0: - unlock_kernel(); -Index: linux-2.4.21-40.EL/fs/open.c -=================================================================== ---- linux-2.4.21-40.EL.orig/fs/open.c -+++ linux-2.4.21-40.EL/fs/open.c -@@ -20,6 +20,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -170,9 +172,10 @@ void fd_install(unsigned int fd, struct - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -184,7 +187,13 @@ int do_truncate(struct dentry *dentry, l - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - up_write(&inode->i_alloc_sem); - return error; -@@ -195,12 +204,13 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -240,11 +250,13 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(&it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -292,7 +304,7 @@ static inline long do_sys_ftruncate(unsi - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -337,11 +349,13 @@ asmlinkage long sys_utime(char * filenam - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -356,11 +370,25 @@ asmlinkage long sys_utime(char * filenam - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -381,12 +409,14 @@ asmlinkage long sys_utimes(char * filena - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -401,7 +431,20 @@ asmlinkage long sys_utimes(char * filena - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -424,6 +467,7 @@ asmlinkage long sys_access(const char * - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -441,13 +485,14 @@ asmlinkage long sys_access(const char * - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(&it); - path_release(&nd); - } - -@@ -462,8 +507,9 @@ asmlinkage long sys_chdir(const char * f - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -474,6 +520,7 @@ asmlinkage long sys_chdir(const char * f - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -513,9 +560,10 @@ asmlinkage long sys_chroot(const char * - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -531,39 +579,56 @@ asmlinkage long sys_chroot(const char * - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode *inode = dentry->d_inode; - struct iattr newattrs; -+ int err = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ if (inode->i_op->setattr_raw) { -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (err != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); - --out_putf: -+out: -+ return err; -+} -+ -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); -+ - fput(file); - out: - return err; -@@ -572,30 +637,14 @@ out: - asmlinkage long sys_chmod(const char * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; - -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; -+ error = chmod_common(nd.dentry, mode); - -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -- -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -- --dput_and_out: - path_release(&nd); - out: - return error; -@@ -615,6 +664,20 @@ static int chown_common(struct dentry * - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -701,7 +764,7 @@ asmlinkage long sys_fchown(unsigned int - return error; - } - --static struct file *__dentry_open(struct dentry *, struct vfsmount *, int, struct file *); -+static struct file *__dentry_open(struct dentry *, struct vfsmount *, int, struct file *, struct lookup_intent *); - - /* - * Note that while the flag value (low two bits) for sys_open means: -@@ -721,7 +784,7 @@ struct file *filp_open(const char * file - { - int namei_flags, error; - struct nameidata nd; -- struct file *f; -+ struct lookup_intent it = { .it_op = IT_OPEN }; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -730,19 +793,16 @@ struct file *filp_open(const char * file - namei_flags |= 2; - - error = -ENFILE; -- f = get_empty_filp(); -- if (f == NULL) -- return ERR_PTR(error); - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return __dentry_open(nd.dentry, nd.mnt, flags, f); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- put_filp(f); -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file *f; - -@@ -753,10 +813,10 @@ struct file *dentry_open(struct dentry * - return ERR_PTR(-ENFILE); - } - -- return __dentry_open(dentry, mnt, flags, f); -+ return __dentry_open(dentry, mnt, flags, f, it); - } - --static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f) -+static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f, struct lookup_intent *it) - { - struct inode *inode; - static LIST_HEAD(kill_list); -@@ -788,7 +848,9 @@ static struct file *__dentry_open(struct - } - - if (f->f_op && f->f_op->open) { -+ f->f_it = it; - error = f->f_op->open(inode,f); -+ f->f_it = NULL; - if (error) - goto cleanup_all; - } -@@ -804,6 +866,7 @@ static struct file *__dentry_open(struct - } - } - -+ intent_release(it); - return f; - - cleanup_all: -@@ -817,11 +880,17 @@ cleanup_all: - f->f_vfsmnt = NULL; - cleanup_file: - put_filp(f); -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux-2.4.21-40.EL/fs/stat.c -=================================================================== ---- linux-2.4.21-40.EL.orig/fs/stat.c -+++ linux-2.4.21-40.EL/fs/stat.c -@@ -17,10 +17,12 @@ - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - static __inline__ int --do_revalidate(struct dentry *dentry) -+do_revalidate(struct dentry *dentry, struct lookup_intent *it) - { - struct inode * inode = dentry->d_inode; -- if (inode->i_op && inode->i_op->revalidate) -+ if (inode->i_op && inode->i_op->revalidate_it) -+ return inode->i_op->revalidate_it(dentry, it); -+ else if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; - } -@@ -143,14 +145,16 @@ static int cp_new_stat(struct inode * in - asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error, errcnt = 0; - - again: -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - if (error == -ESTALE && !errcnt) { -@@ -164,14 +168,16 @@ again: - asmlinkage long sys_newstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error, errcnt = 0; - - again: -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - if (error == -ESTALE && !errcnt) { -@@ -191,14 +197,16 @@ again: - asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error, errcnt = 0; - - again: -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - if (error == -ESTALE && !errcnt) { -@@ -214,14 +222,16 @@ again: - asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error, errcnt = 0; - - again: -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - -@@ -248,7 +258,7 @@ asmlinkage long sys_fstat(unsigned int f - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_old_stat(dentry->d_inode, statbuf); - fput(f); -@@ -267,7 +277,7 @@ asmlinkage long sys_newfstat(unsigned in - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat(dentry->d_inode, statbuf); - fput(f); -@@ -289,7 +299,7 @@ asmlinkage long sys_readlink(const char - - error = -EINVAL; - if (inode->i_op && inode->i_op->readlink && -- !(error = do_revalidate(nd.dentry))) { -+ !(error = do_revalidate(nd.dentry, NULL))) { - UPDATE_ATIME(inode); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); - } -@@ -364,14 +374,16 @@ static long cp_new_stat64(struct inode * - asmlinkage long sys_stat64(char * filename, struct stat64 * statbuf, long flags) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error, errcnt = 0; - - again: -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - if (error == -ESTALE && !errcnt) { -@@ -385,14 +397,16 @@ again: - asmlinkage long sys_lstat64(char * filename, struct stat64 * statbuf, long flags) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error, errcnt = 0; - - again: -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - if (error == -ESTALE && !errcnt) { -@@ -412,7 +426,7 @@ asmlinkage long sys_fstat64(unsigned lon - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat64(dentry->d_inode, statbuf); - fput(f); -Index: linux-2.4.21-40.EL/include/linux/dcache.h -=================================================================== ---- linux-2.4.21-40.EL.orig/include/linux/dcache.h -+++ linux-2.4.21-40.EL/include/linux/dcache.h -@@ -6,6 +6,51 @@ - #include <asm/atomic.h> - #include <linux/mount.h> - #include <linux/kernel.h> -+#include <linux/string.h> -+ -+#define IT_OPEN 0x0001 -+#define IT_CREAT 0x0002 -+#define IT_READDIR 0x0004 -+#define IT_GETATTR 0x0008 -+#define IT_LOOKUP 0x0010 -+#define IT_UNLINK 0x0020 -+#define IT_GETXATTR 0x0040 -+#define IT_EXEC 0x0080 -+#define IT_PIN 0x0100 -+ -+#define IT_FL_LOCKED 0x0001 -+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ -+ -+#define INTENT_MAGIC 0x19620323 -+ -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; -+ int it_int_flags; -+}; -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - - /* - * linux/include/linux/dcache.h -@@ -96,8 +141,22 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); -+ void (*d_pin)(struct dentry *, struct vfsmount * , int); -+ void (*d_unpin)(struct dentry *, struct vfsmount *, int); - }; - -+#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ -+ de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ -+ de->d_op->d_unpin(de, mnt, flag); -+ -+ -+/* defined in fs/namei.c */ -+extern void intent_release(struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -129,6 +188,7 @@ d_iput: no no yes - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - -Index: linux-2.4.21-40.EL/include/linux/fs.h -=================================================================== ---- linux-2.4.21-40.EL.orig/include/linux/fs.h -+++ linux-2.4.21-40.EL/include/linux/fs.h -@@ -73,6 +73,7 @@ extern int leases_enable, dir_notify_ena - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define READ 0 - #define WRITE 1 -@@ -365,6 +366,9 @@ extern void set_bh_page(struct buffer_he - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -502,6 +506,7 @@ struct inode { - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct char_device *i_cdev; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -604,6 +609,7 @@ struct file { - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_it; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -736,6 +742,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *intent; - }; - - /* -@@ -856,7 +863,8 @@ extern int vfs_symlink(struct inode *, s - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry); - - /* - * File types -@@ -935,21 +943,32 @@ struct file_operations_ext { - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); -+ int (*revalidate_it) (struct dentry *, struct lookup_intent *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1151,10 +1170,14 @@ static inline int get_lease(struct inode - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1446,6 +1469,7 @@ typedef int (*read_actor_t)(read_descrip - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1457,6 +1481,8 @@ extern struct dentry * lookup_one_len(co - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void iput(struct inode *); -@@ -1512,6 +1538,7 @@ static inline struct inode *iget_locked( - return iget4_locked(sb, ino, NULL, NULL); - } - -+extern void __iget(struct inode * inode); - extern void clear_inode(struct inode *); - extern struct inode *new_inode(struct super_block *sb); - extern void remove_suid(struct inode *inode); -@@ -1629,6 +1656,8 @@ extern struct file_operations generic_ro - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; -Index: linux-2.4.21-40.EL/include/linux/fs_struct.h -=================================================================== ---- linux-2.4.21-40.EL.orig/include/linux/fs_struct.h -+++ linux-2.4.21-40.EL/include/linux/fs_struct.h -@@ -37,10 +37,12 @@ static inline void set_fs_root(struct fs - write_lock(&fs->lock); - old_root = fs->root; - old_rootmnt = fs->rootmnt; -+ PIN(dentry, mnt, 1); - fs->rootmnt = mntget(mnt); - fs->root = dget(dentry); - write_unlock(&fs->lock); - if (old_root) { -+ UNPIN(old_root, old_rootmnt, 1); - dput(old_root); - mntput(old_rootmnt); - } -@@ -60,10 +62,12 @@ static inline void set_fs_pwd(struct fs_ - write_lock(&fs->lock); - old_pwd = fs->pwd; - old_pwdmnt = fs->pwdmnt; -+ PIN(dentry, mnt, 0); - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); - write_unlock(&fs->lock); - if (old_pwd) { -+ UNPIN(old_pwd, old_pwdmnt, 0); - dput(old_pwd); - mntput(old_pwdmnt); - } -Index: linux-2.4.21-40.EL/kernel/exit.c -=================================================================== ---- linux-2.4.21-40.EL.orig/kernel/exit.c -+++ linux-2.4.21-40.EL/kernel/exit.c -@@ -367,11 +367,14 @@ static inline void __put_fs_struct(struc - { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { -+ UNPIN(fs->pwd, fs->pwdmnt, 0); -+ UNPIN(fs->root, fs->rootmnt, 1); - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { -+ UNPIN(fs->altroot, fs->altrootmnt, 1); - dput(fs->altroot); - mntput(fs->altrootmnt); - } -Index: linux-2.4.21-40.EL/kernel/fork.c -=================================================================== ---- linux-2.4.21-40.EL.orig/kernel/fork.c -+++ linux-2.4.21-40.EL/kernel/fork.c -@@ -473,10 +473,13 @@ static inline struct fs_struct *__copy_f - fs->umask = old->umask; - read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); -+ PIN(old->pwd, old->pwdmnt, 0); -+ PIN(old->root, old->rootmnt, 1); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { -+ PIN(old->altroot, old->altrootmnt, 1); - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); - } else { -Index: linux-2.4.21-40.EL/kernel/ksyms.c -=================================================================== ---- linux-2.4.21-40.EL.orig/kernel/ksyms.c -+++ linux-2.4.21-40.EL/kernel/ksyms.c -@@ -319,6 +319,7 @@ EXPORT_SYMBOL(read_cache_page); - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); -@@ -592,6 +593,7 @@ EXPORT_SYMBOL(si_meminfo); - EXPORT_SYMBOL(sys_tz); - EXPORT_SYMBOL(file_fsync); - EXPORT_SYMBOL(fsync_buffers_list); -+EXPORT_SYMBOL(__iget); - EXPORT_SYMBOL(clear_inode); - EXPORT_SYMBOL(___strtok); - EXPORT_SYMBOL(init_special_inode); -Index: linux-2.4.21-47.0.1.EL/fs/inode.c -=================================================================== ---- linux-2.4.21-47.0.1.EL.orig/fs/inode.c -+++ linux-2.4.21-47.0.1.EL/fs/inode.c -@@ -278,7 +278,7 @@ static inline void write_inode(struct in - inode->i_sb->s_op->write_inode(inode, sync); - } - --static inline void __iget(struct inode * inode) -+void __iget(struct inode * inode) - { - if (atomic_read(&inode->i_count)) { - atomic_inc(&inode->i_count); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse-171.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse-171.patch deleted file mode 100644 index c2609a0..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse-171.patch +++ /dev/null @@ -1,1877 +0,0 @@ - fs/dcache.c | 19 ++ - fs/exec.c | 17 +- - fs/namei.c | 295 +++++++++++++++++++++++++++++++++++++++------- - fs/namespace.c | 28 +++- - fs/open.c | 172 +++++++++++++++++++------- - fs/stat.c | 52 +++++--- - include/linux/dcache.h | 60 +++++++++ - include/linux/fs.h | 32 ++++ - include/linux/fs_struct.h | 4 - kernel/exit.c | 3 - kernel/fork.c | 3 - kernel/ksyms.c | 1 - 12 files changed, 558 insertions(+), 128 deletions(-) - -Index: linux-2.4.21-273/fs/dcache.c -=================================================================== ---- linux-2.4.21-273.orig/fs/dcache.c 2005-01-17 07:08:10.000000000 -0500 -+++ linux-2.4.21-273/fs/dcache.c 2005-04-05 19:59:00.687028845 -0400 -@@ -186,6 +186,13 @@ - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -838,13 +845,19 @@ - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ -Index: linux-2.4.21-273/fs/exec.c -=================================================================== ---- linux-2.4.21-273.orig/fs/exec.c 2005-01-17 07:08:11.000000000 -0500 -+++ linux-2.4.21-273/fs/exec.c 2005-04-05 19:59:00.688028700 -0400 -@@ -113,8 +113,10 @@ - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -126,7 +128,8 @@ - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -390,8 +393,10 @@ - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -403,7 +408,8 @@ - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -415,6 +421,7 @@ - return file; - } - } -+ intent_release(&it); - path_release(&nd); - } - goto out; -@@ -1154,7 +1161,7 @@ - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.4.21-273/fs/namei.c -=================================================================== ---- linux-2.4.21-273.orig/fs/namei.c 2005-01-17 07:08:10.000000000 -0500 -+++ linux-2.4.21-273/fs/namei.c 2005-04-05 19:59:46.773351909 -0400 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct lookup_intent *it) -+{ -+ if (it && it->it_op_release) -+ it->it_op_release(it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -274,10 +281,19 @@ - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -295,11 +311,15 @@ - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -314,6 +334,9 @@ - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, it, flags); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -335,6 +358,15 @@ - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_it) { -+ if (!result->d_op->d_revalidate_it(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; -+ } - } - return result; - } -@@ -346,7 +378,8 @@ - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= 8) -@@ -360,10 +393,12 @@ - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - err = dentry->d_inode->i_op->follow_link(dentry, nd); - current->link_count--; - return err; - loop: -+ intent_release(it); - path_release(nd); - return -ELOOP; - } -@@ -462,7 +497,8 @@ - * We expect 'base' to be positive and a directory. - */ - static inline int __attribute__((always_inline)) --__link_path_walk(const char * name, struct nameidata *nd) -+__link_path_walk_it(const char * name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -539,12 +575,12 @@ - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -562,7 +598,7 @@ - goto out_dput; - - if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -578,7 +614,7 @@ - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -605,12 +641,12 @@ - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -620,7 +656,7 @@ - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -634,7 +670,8 @@ - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -652,12 +689,42 @@ - nd->last_type = LAST_DOT; - else if (this.len == 2 && this.name[1] == '.') - nd->last_type = LAST_DOTDOT; -+ else -+ goto return_base; - return_reval: - /* - * We bypassed the ordinary revalidation routines. - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, it); -+ if (IS_ERR(new)) { -+ err = PTR_ERR(new); -+ break; -+ } -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = new; -+ } -+ if (!nd->dentry->d_inode) -+ goto no_inode; -+ if (lookup_flags & LOOKUP_DIRECTORY) { -+ err = -ENOTDIR; -+ if (!nd->dentry->d_inode->i_op || -+ (!nd->dentry->d_inode->i_op->lookup && -+ !nd->dentry->d_inode->i_op->lookup_it)) -+ break; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, lookup_flags & LOOKUP_PARENT)) { -@@ -671,6 +738,8 @@ - dput(dentry); - break; - } -+ if (err) -+ intent_release(it); - path_release(nd); - return_err: - return err; -@@ -678,13 +747,13 @@ - - int link_path_walk(const char * name, struct nameidata *nd) - { -- return __link_path_walk(name,nd); -+ return __link_path_walk_it(name, nd, NULL); - } - - static inline int __path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return __link_path_walk(name, nd); -+ return __link_path_walk_it(name, nd, NULL); - } - - int path_walk(const char * name, struct nameidata *nd) -@@ -692,6 +761,12 @@ - return __path_walk(name, nd); - } - -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return __link_path_walk_it(name, nd, it); -+} -+ - /* SMP-safe */ - /* returns 1 if everything is done */ - static int __emul_lookup_dentry(const char *name, struct nameidata *nd) -@@ -774,6 +849,16 @@ - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -788,6 +873,7 @@ - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->intent = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -802,7 +888,8 @@ - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -825,13 +912,16 @@ - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, it, 0); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -843,6 +933,12 @@ - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -864,7 +960,7 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -895,6 +991,23 @@ - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -992,7 +1105,8 @@ - return retval; - } - --int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - int error; - -@@ -1005,12 +1119,15 @@ - goto exit_lock; - - error = -EACCES; /* shouldn't it be ENOSYS? */ -- if (!dir->i_op || !dir->i_op->create) -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) - goto exit_lock; - - DQUOT_INIT(dir); - lock_kernel(); -- error = dir->i_op->create(dir, dentry, mode); -+ if (dir->i_op->create_it) -+ error = dir->i_op->create_it(dir, dentry, mode, it); -+ else -+ error = dir->i_op->create(dir, dentry, mode); - unlock_kernel(); - exit_lock: - up(&dir->i_zombie); -@@ -1019,6 +1136,11 @@ - return error; - } - -+int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ return vfs_create_it(dir, dentry, mode, NULL); -+} -+ - /* - * open_namei() - * -@@ -1033,7 +1155,8 @@ - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1043,11 +1166,14 @@ - - acc_mode = ACC_MODE(flag); - -+ if (it) -+ it->it_flags = flag; -+ - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1057,6 +1183,10 @@ - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_create_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1072,7 +1202,7 @@ - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1081,11 +1211,12 @@ - goto exit; - } - -+ it->it_create_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current->fs->umask; -- error = vfs_create(dir->d_inode, dentry, mode); -+ error = vfs_create_it(dir->d_inode, dentry, mode, it); - up(&dir->d_inode->i_sem); - #ifndef DENTRY_WASTE_RAM - if (error) -@@ -1193,7 +1324,7 @@ - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1205,8 +1336,10 @@ - return 0; - - exit_dput: -+ intent_release(it); - dput(dentry); - exit: -+ intent_release(it); - path_release(nd); - return error; - -@@ -1225,7 +1358,10 @@ - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) -+ intent_release(it); - dput(dentry); - if (error) - return error; -@@ -1247,13 +1383,20 @@ - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1261,7 +1404,7 @@ - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1317,7 +1460,20 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1339,6 +1495,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1386,7 +1543,18 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1395,6 +1563,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1495,8 +1664,16 @@ - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1554,8 +1731,15 @@ - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1622,15 +1806,27 @@ - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1706,7 +1902,18 @@ - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out_release; -+ } -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1750,7 +1957,7 @@ - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - struct inode *target; -@@ -1829,7 +2036,7 @@ - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - -@@ -1917,9 +2124,18 @@ - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ lock_kernel(); -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1935,16 +2151,16 @@ - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); - unlock_kernel(); -- - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1995,20 +2211,26 @@ - } - - static inline int __attribute__((always_inline)) --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->intent; -+ else if (it != nd->intent) -+ printk("it != nd->intent: tell phil@clusterfs.com\n"); -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = __link_path_walk(link, nd); -+ res = __link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -2032,7 +2254,13 @@ - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2074,7 +2302,7 @@ - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); -Index: linux-2.4.21-273/fs/namespace.c -=================================================================== ---- linux-2.4.21-273.orig/fs/namespace.c 2005-01-17 07:08:11.000000000 -0500 -+++ linux-2.4.21-273/fs/namespace.c 2005-04-05 19:59:00.692028120 -0400 -@@ -98,6 +98,7 @@ - { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; -+ UNPIN(old_nd->dentry, old_nd->mnt, 1); - mnt->mnt_parent = mnt; - mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_child); -@@ -109,6 +110,7 @@ - { - mnt->mnt_parent = mntget(nd->mnt); - mnt->mnt_mountpoint = dget(nd->dentry); -+ PIN(nd->dentry, nd->mnt, 1); - list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); - list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); - nd->dentry->d_mounted++; -@@ -488,14 +490,17 @@ - { - struct nameidata old_nd; - struct vfsmount *mnt = NULL; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int err = mount_is_safe(nd); - if (err) - return err; - if (!old_name || !*old_name) - return -EINVAL; -- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); -- if (err) -+ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); -+ if (err) { -+ intent_release(&it); - return err; -+ } - - down_write(¤t->namespace->sem); - err = -EINVAL; -@@ -518,6 +523,7 @@ - } - - up_write(¤t->namespace->sem); -+ intent_release(&it); - path_release(&old_nd); - return err; - } -@@ -701,6 +707,7 @@ - unsigned long flags, void *data_page) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int retval = 0; - int mnt_flags = 0; - -@@ -725,10 +732,11 @@ - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); - - /* ... and get the mountpoint */ -- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -- if (retval) -+ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); -+ if (retval) { -+ intent_release(&it); - return retval; -- -+ } - if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); -@@ -739,6 +747,8 @@ - else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, - dev_name, data_page); -+ -+ intent_release(&it); - path_release(&nd); - return retval; - } -@@ -904,6 +914,8 @@ - { - struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; -+ struct lookup_intent new_it = { .it_op = IT_GETATTR }; -+ struct lookup_intent old_it = { .it_op = IT_GETATTR }; - int error; - - if (!capable(CAP_SYS_ADMIN)) -@@ -911,14 +923,14 @@ - - lock_kernel(); - -- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); -+ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); - if (error) - goto out0; - error = -EINVAL; - if (!check_mnt(new_nd.mnt)) - goto out1; - -- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); -+ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); - if (error) - goto out1; - -@@ -973,8 +985,10 @@ - up(&old_nd.dentry->d_inode->i_zombie); - up_write(¤t->namespace->sem); - path_release(&user_nd); -+ intent_release(&old_it); - path_release(&old_nd); - out1: -+ intent_release(&new_it); - path_release(&new_nd); - out0: - unlock_kernel(); -Index: linux-2.4.21-273/fs/open.c -=================================================================== ---- linux-2.4.21-273.orig/fs/open.c 2005-01-17 07:08:11.000000000 -0500 -+++ linux-2.4.21-273/fs/open.c 2005-04-05 19:59:00.693027975 -0400 -@@ -20,6 +20,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -96,9 +98,10 @@ - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -110,7 +113,13 @@ - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - up_write(&inode->i_alloc_sem); - return error; -@@ -121,12 +130,13 @@ - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -166,11 +176,13 @@ - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(&it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -218,7 +230,7 @@ - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -263,11 +275,13 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -282,11 +296,25 @@ - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -307,12 +335,14 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -327,7 +357,20 @@ - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -350,6 +393,7 @@ - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -367,13 +411,14 @@ - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(&it); - path_release(&nd); - } - -@@ -388,8 +433,9 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -400,6 +446,7 @@ - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -439,9 +486,10 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -457,39 +505,56 @@ - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode *inode = dentry->d_inode; - struct iattr newattrs; -+ int err = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ if (inode->i_op->setattr_raw) { -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (err != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); - --out_putf: -+out: -+ return err; -+} -+ -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); -+ - fput(file); - out: - return err; -@@ -498,30 +563,14 @@ - asmlinkage long sys_chmod(const char * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; - -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -+ error = chmod_common(nd.dentry, mode); - -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -- --dput_and_out: - path_release(&nd); - out: - return error; -@@ -541,6 +590,20 @@ - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -645,6 +708,7 @@ - { - int namei_flags, error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN }; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -652,14 +716,15 @@ - if (namei_flags & O_TRUNC) - namei_flags |= 2; - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -687,7 +752,9 @@ - file_move(f, &inode->i_sb->s_files); - - if (f->f_op && f->f_op->open) { -+ f->f_it = it; - error = f->f_op->open(inode,f); -+ f->f_it = NULL; - if (error) - goto cleanup_all; - } -@@ -699,6 +766,7 @@ - !inode->i_mapping->a_ops->direct_IO)) - goto cleanup_all; - -+ intent_release(it); - return f; - - cleanup_all: -@@ -711,11 +779,17 @@ - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux-2.4.21-273/fs/stat.c -=================================================================== ---- linux-2.4.21-273.orig/fs/stat.c 2005-01-17 07:08:11.000000000 -0500 -+++ linux-2.4.21-273/fs/stat.c 2005-04-05 19:59:00.694027831 -0400 -@@ -17,10 +17,12 @@ - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - static __inline__ int --do_revalidate(struct dentry *dentry) -+do_revalidate(struct dentry *dentry, struct lookup_intent *it) - { - struct inode * inode = dentry->d_inode; -- if (inode->i_op && inode->i_op->revalidate) -+ if (inode->i_op && inode->i_op->revalidate_it) -+ return inode->i_op->revalidate_it(dentry, it); -+ else if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; - } -@@ -141,13 +143,15 @@ - asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -157,13 +161,15 @@ - asmlinkage long sys_newstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -178,13 +184,15 @@ - asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -195,13 +203,15 @@ - asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -222,7 +232,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_old_stat(dentry->d_inode, statbuf); - fput(f); -@@ -241,7 +251,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat(dentry->d_inode, statbuf); - fput(f); -@@ -263,7 +273,7 @@ - - error = -EINVAL; - if (inode->i_op && inode->i_op->readlink && -- !(error = do_revalidate(nd.dentry))) { -+ !(error = do_revalidate(nd.dentry, NULL))) { - UPDATE_ATIME(inode); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); - } -@@ -339,12 +349,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -354,12 +366,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -374,7 +388,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat64(dentry->d_inode, statbuf); - fput(f); -Index: linux-2.4.21-273/include/linux/dcache.h -=================================================================== ---- linux-2.4.21-273.orig/include/linux/dcache.h 2005-01-17 07:09:07.000000000 -0500 -+++ linux-2.4.21-273/include/linux/dcache.h 2005-04-05 19:59:00.695027686 -0400 -@@ -7,6 +7,51 @@ - #include <linux/gdb.h> - #include <linux/mount.h> - #include <linux/kernel.h> -+#include <linux/string.h> -+ -+#define IT_OPEN 0x0001 -+#define IT_CREAT 0x0002 -+#define IT_READDIR 0x0004 -+#define IT_GETATTR 0x0008 -+#define IT_LOOKUP 0x0010 -+#define IT_UNLINK 0x0020 -+#define IT_GETXATTR 0x0040 -+#define IT_EXEC 0x0080 -+#define IT_PIN 0x0100 -+ -+#define IT_FL_LOCKED 0x0001 -+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ -+ -+#define INTENT_MAGIC 0x19620323 -+ -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; -+ int it_int_flags; -+}; -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - - /* - * linux/include/linux/dcache.h -@@ -94,8 +139,22 @@ - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); -+ void (*d_pin)(struct dentry *, struct vfsmount * , int); -+ void (*d_unpin)(struct dentry *, struct vfsmount *, int); - }; - -+#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ -+ de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ -+ de->d_op->d_unpin(de, mnt, flag); -+ -+ -+/* defined in fs/namei.c */ -+extern void intent_release(struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -127,6 +186,7 @@ - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - -Index: linux-2.4.21-273/include/linux/fs.h -=================================================================== ---- linux-2.4.21-273.orig/include/linux/fs.h 2005-04-05 19:58:52.741180030 -0400 -+++ linux-2.4.21-273/include/linux/fs.h 2005-04-05 19:59:00.696027541 -0400 -@@ -74,6 +74,7 @@ - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define READ 0 - #define WRITE 1 -@@ -360,6 +361,9 @@ - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -504,6 +508,7 @@ - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct char_device *i_cdev; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -666,6 +671,7 @@ - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_it; - struct list_head f_ep_links; - spinlock_t f_ep_lock; - }; -@@ -795,6 +801,7 @@ - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *intent; - }; - - /* -@@ -916,7 +923,8 @@ - extern int __vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry); - - /* - * File types -@@ -991,21 +999,32 @@ - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); -+ int (*revalidate_it) (struct dentry *, struct lookup_intent *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1204,10 +1223,14 @@ - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1503,6 +1526,7 @@ - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1515,6 +1539,8 @@ - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void _inode_init_once(struct inode *); -@@ -1666,6 +1692,8 @@ - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; -Index: linux-2.4.21-273/include/linux/fs_struct.h -=================================================================== ---- linux-2.4.21-273.orig/include/linux/fs_struct.h 2005-01-17 07:09:10.000000000 -0500 -+++ linux-2.4.21-273/include/linux/fs_struct.h 2005-04-05 19:59:00.697027396 -0400 -@@ -34,10 +34,12 @@ - write_lock(&fs->lock); - old_root = fs->root; - old_rootmnt = fs->rootmnt; -+ PIN(dentry, mnt, 1); - fs->rootmnt = mntget(mnt); - fs->root = dget(dentry); - write_unlock(&fs->lock); - if (old_root) { -+ UNPIN(old_root, old_rootmnt, 1); - dput(old_root); - mntput(old_rootmnt); - } -@@ -57,10 +59,12 @@ - write_lock(&fs->lock); - old_pwd = fs->pwd; - old_pwdmnt = fs->pwdmnt; -+ PIN(dentry, mnt, 0); - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); - write_unlock(&fs->lock); - if (old_pwd) { -+ UNPIN(old_pwd, old_pwdmnt, 0); - dput(old_pwd); - mntput(old_pwdmnt); - } -Index: linux-2.4.21-273/kernel/exit.c -=================================================================== ---- linux-2.4.21-273.orig/kernel/exit.c 2005-01-17 07:08:59.000000000 -0500 -+++ linux-2.4.21-273/kernel/exit.c 2005-04-05 19:59:00.698027251 -0400 -@@ -292,11 +292,14 @@ - { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { -+ UNPIN(fs->pwd, fs->pwdmnt, 0); -+ UNPIN(fs->root, fs->rootmnt, 1); - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { -+ UNPIN(fs->altroot, fs->altrootmnt, 1); - dput(fs->altroot); - mntput(fs->altrootmnt); - } -Index: linux-2.4.21-273/kernel/fork.c -=================================================================== ---- linux-2.4.21-273.orig/kernel/fork.c 2005-01-17 07:09:00.000000000 -0500 -+++ linux-2.4.21-273/kernel/fork.c 2005-04-05 19:59:00.699027106 -0400 -@@ -466,10 +466,13 @@ - fs->umask = old->umask; - read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); -+ PIN(old->pwd, old->pwdmnt, 0); -+ PIN(old->root, old->rootmnt, 1); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { -+ PIN(old->altroot, old->altrootmnt, 1); - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); - } else { -Index: linux-2.4.21-273/kernel/ksyms.c -=================================================================== ---- linux-2.4.21-273.orig/kernel/ksyms.c 2005-04-05 19:58:52.779174526 -0400 -+++ linux-2.4.21-273/kernel/ksyms.c 2005-04-05 19:59:00.699027106 -0400 -@@ -330,6 +330,7 @@ - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch deleted file mode 100644 index d283bd1..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch +++ /dev/null @@ -1,1878 +0,0 @@ - fs/dcache.c | 19 ++ - fs/exec.c | 17 +- - fs/namei.c | 295 +++++++++++++++++++++++++++++++++++++++------- - fs/namespace.c | 28 +++- - fs/open.c | 172 +++++++++++++++++++------- - fs/stat.c | 52 +++++--- - include/linux/dcache.h | 60 +++++++++ - include/linux/fs.h | 32 ++++ - include/linux/fs_struct.h | 4 - kernel/exit.c | 3 - kernel/fork.c | 3 - kernel/ksyms.c | 1 - 12 files changed, 558 insertions(+), 128 deletions(-) - -Index: linux-2.4.21-x86_64/fs/dcache.c -=================================================================== ---- linux-2.4.21-x86_64.orig/fs/dcache.c 2003-10-28 10:33:59.000000000 -0800 -+++ linux-2.4.21-x86_64/fs/dcache.c 2004-04-12 19:57:36.000000000 -0700 -@@ -186,6 +186,13 @@ - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -838,13 +845,19 @@ - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ -Index: linux-2.4.21-x86_64/fs/exec.c -=================================================================== ---- linux-2.4.21-x86_64.orig/fs/exec.c 2003-10-28 10:34:17.000000000 -0800 -+++ linux-2.4.21-x86_64/fs/exec.c 2004-04-12 19:57:36.000000000 -0700 -@@ -112,8 +112,10 @@ - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -125,7 +127,8 @@ - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -382,8 +385,10 @@ - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -395,7 +400,8 @@ - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -407,6 +413,7 @@ - return file; - } - } -+ intent_release(&it); - path_release(&nd); - } - goto out; -@@ -1150,7 +1157,7 @@ - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.4.21-x86_64/fs/namei.c -=================================================================== ---- linux-2.4.21-x86_64.orig/fs/namei.c 2003-10-28 10:34:18.000000000 -0800 -+++ linux-2.4.21-x86_64/fs/namei.c 2004-04-12 19:58:44.000000000 -0700 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct lookup_intent *it) -+{ -+ if (it && it->it_op_release) -+ it->it_op_release(it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,15 @@ - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +320,9 @@ - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, it, flags); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +344,15 @@ - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_it) { -+ if (!result->d_op->d_revalidate_it(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; -+ } - } - return result; - } -@@ -332,7 +364,8 @@ - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= 8) -@@ -346,10 +379,12 @@ - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - err = dentry->d_inode->i_op->follow_link(dentry, nd); - current->link_count--; - return err; - loop: -+ intent_release(it); - path_release(nd); - return -ELOOP; - } -@@ -447,7 +482,8 @@ - * - * We expect 'base' to be positive and a directory. - */ --static inline int __link_path_walk(const char * name, struct nameidata *nd) -+static inline int __link_path_walk_it(const char * name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -524,12 +560,12 @@ - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -547,7 +583,7 @@ - goto out_dput; - - if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -563,7 +599,7 @@ - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -590,12 +626,12 @@ - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -605,7 +641,7 @@ - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -619,7 +655,8 @@ - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -637,12 +672,42 @@ - nd->last_type = LAST_DOT; - else if (this.len == 2 && this.name[1] == '.') - nd->last_type = LAST_DOTDOT; -+ else -+ goto return_base; - return_reval: - /* - * We bypassed the ordinary revalidation routines. - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, it); -+ if (IS_ERR(new)) { -+ err = PTR_ERR(new); -+ break; -+ } -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = new; -+ } -+ if (!nd->dentry->d_inode) -+ goto no_inode; -+ if (lookup_flags & LOOKUP_DIRECTORY) { -+ err = -ENOTDIR; -+ if (!nd->dentry->d_inode->i_op || -+ (!nd->dentry->d_inode->i_op->lookup && -+ !nd->dentry->d_inode->i_op->lookup_it)) -+ break; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, lookup_flags & LOOKUP_PARENT)) { -@@ -656,6 +714,8 @@ - dput(dentry); - break; - } -+ if (err) -+ intent_release(it); - path_release(nd); - return_err: - return err; -@@ -663,13 +723,13 @@ - - int link_path_walk(const char * name, struct nameidata *nd) - { -- return __link_path_walk(name,nd); -+ return __link_path_walk_it(name, nd, NULL); - } - - static inline int __path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return __link_path_walk(name, nd); -+ return __link_path_walk_it(name, nd, NULL); - } - - int path_walk(const char * name, struct nameidata *nd) -@@ -677,6 +737,12 @@ - return __path_walk(name, nd); - } - -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return __link_path_walk_it(name, nd, it); -+} -+ - /* SMP-safe */ - /* returns 1 if everything is done */ - static int __emul_lookup_dentry(const char *name, struct nameidata *nd) -@@ -759,6 +825,17 @@ - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -773,6 +850,7 @@ - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->intent = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -787,7 +865,8 @@ - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -810,13 +889,16 @@ - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, it, 0); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -828,6 +910,12 @@ - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -849,7 +937,7 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -880,6 +968,23 @@ - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -977,7 +1082,8 @@ - return retval; - } - --int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - int error; - -@@ -990,12 +1096,15 @@ - goto exit_lock; - - error = -EACCES; /* shouldn't it be ENOSYS? */ -- if (!dir->i_op || !dir->i_op->create) -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) - goto exit_lock; - - DQUOT_INIT(dir); - lock_kernel(); -- error = dir->i_op->create(dir, dentry, mode); -+ if (dir->i_op->create_it) -+ error = dir->i_op->create_it(dir, dentry, mode, it); -+ else -+ error = dir->i_op->create(dir, dentry, mode); - unlock_kernel(); - exit_lock: - up(&dir->i_zombie); -@@ -1004,6 +1113,11 @@ - return error; - } - -+int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ return vfs_create_it(dir, dentry, mode, NULL); -+} -+ - /* - * open_namei() - * -@@ -1018,7 +1132,8 @@ - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1028,11 +1143,14 @@ - - acc_mode = ACC_MODE(flag); - -+ if (it) -+ it->it_flags = flag; -+ - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1042,6 +1160,10 @@ - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_create_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1057,7 +1179,7 @@ - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1066,11 +1188,12 @@ - goto exit; - } - -+ it->it_create_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current->fs->umask; -- error = vfs_create(dir->d_inode, dentry, mode); -+ error = vfs_create_it(dir->d_inode, dentry, mode, it); - up(&dir->d_inode->i_sem); - #ifndef DENTRY_WASTE_RAM - if (error) -@@ -1178,7 +1301,7 @@ - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1190,8 +1313,10 @@ - return 0; - - exit_dput: -+ intent_release(it); - dput(dentry); - exit: -+ intent_release(it); - path_release(nd); - return error; - -@@ -1210,7 +1335,10 @@ - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) -+ intent_release(it); - dput(dentry); - if (error) - return error; -@@ -1232,13 +1360,20 @@ - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1246,7 +1381,7 @@ - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1302,7 +1437,20 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1324,6 +1472,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1371,7 +1520,18 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - if (!IS_POSIXACL(nd.dentry->d_inode)) -@@ -1380,6 +1540,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1480,8 +1641,16 @@ - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1539,8 +1708,15 @@ - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1607,15 +1783,27 @@ - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1691,7 +1879,18 @@ - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out_release; -+ } -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1735,7 +1934,7 @@ - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - struct inode *target; -@@ -1814,7 +2013,7 @@ - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - -@@ -1902,9 +2101,18 @@ - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ lock_kernel(); -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1920,16 +2128,16 @@ - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); - unlock_kernel(); -- - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1980,20 +2188,26 @@ - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->intent; -+ else if (it != nd->intent) -+ printk("it != nd->intent: tell phil@clusterfs.com\n"); -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = __link_path_walk(link, nd); -+ res = __link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -2017,7 +2231,13 @@ - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2059,7 +2279,7 @@ - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); -Index: linux-2.4.21-x86_64/fs/namespace.c -=================================================================== ---- linux-2.4.21-x86_64.orig/fs/namespace.c 2003-10-28 10:34:12.000000000 -0800 -+++ linux-2.4.21-x86_64/fs/namespace.c 2004-04-12 19:57:36.000000000 -0700 -@@ -98,6 +98,7 @@ - { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; -+ UNPIN(old_nd->dentry, old_nd->mnt, 1); - mnt->mnt_parent = mnt; - mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_child); -@@ -109,6 +110,7 @@ - { - mnt->mnt_parent = mntget(nd->mnt); - mnt->mnt_mountpoint = dget(nd->dentry); -+ PIN(nd->dentry, nd->mnt, 1); - list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); - list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); - nd->dentry->d_mounted++; -@@ -488,14 +490,17 @@ - { - struct nameidata old_nd; - struct vfsmount *mnt = NULL; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int err = mount_is_safe(nd); - if (err) - return err; - if (!old_name || !*old_name) - return -EINVAL; -- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); -- if (err) -+ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); -+ if (err) { -+ intent_release(&it); - return err; -+ } - - down_write(¤t->namespace->sem); - err = -EINVAL; -@@ -518,6 +523,7 @@ - } - - up_write(¤t->namespace->sem); -+ intent_release(&it); - path_release(&old_nd); - return err; - } -@@ -701,6 +707,7 @@ - unsigned long flags, void *data_page) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int retval = 0; - int mnt_flags = 0; - -@@ -725,10 +732,11 @@ - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); - - /* ... and get the mountpoint */ -- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -- if (retval) -+ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); -+ if (retval) { -+ intent_release(&it); - return retval; -- -+ } - if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); -@@ -739,6 +747,8 @@ - else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, - dev_name, data_page); -+ -+ intent_release(&it); - path_release(&nd); - return retval; - } -@@ -904,6 +914,8 @@ - { - struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; -+ struct lookup_intent new_it = { .it_op = IT_GETATTR }; -+ struct lookup_intent old_it = { .it_op = IT_GETATTR }; - int error; - - if (!capable(CAP_SYS_ADMIN)) -@@ -911,14 +923,14 @@ - - lock_kernel(); - -- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); -+ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); - if (error) - goto out0; - error = -EINVAL; - if (!check_mnt(new_nd.mnt)) - goto out1; - -- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); -+ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); - if (error) - goto out1; - -@@ -973,8 +985,10 @@ - up(&old_nd.dentry->d_inode->i_zombie); - up_write(¤t->namespace->sem); - path_release(&user_nd); -+ intent_release(&old_it); - path_release(&old_nd); - out1: -+ intent_release(&new_it); - path_release(&new_nd); - out0: - unlock_kernel(); -Index: linux-2.4.21-x86_64/fs/open.c -=================================================================== ---- linux-2.4.21-x86_64.orig/fs/open.c 2003-10-28 10:33:59.000000000 -0800 -+++ linux-2.4.21-x86_64/fs/open.c 2004-04-12 19:57:36.000000000 -0700 -@@ -19,6 +19,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -109,7 +112,13 @@ - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - up_write(&inode->i_alloc_sem); - return error; -@@ -120,12 +129,13 @@ - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -165,11 +175,13 @@ - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(&it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -217,7 +229,7 @@ - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -262,11 +274,13 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -281,11 +295,25 @@ - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -306,12 +334,14 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -326,7 +356,20 @@ - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -349,6 +392,7 @@ - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -366,13 +410,14 @@ - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(&it); - path_release(&nd); - } - -@@ -387,8 +432,9 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -399,6 +445,7 @@ - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -438,9 +485,10 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -456,39 +504,56 @@ - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode *inode = dentry->d_inode; - struct iattr newattrs; -+ int err = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ if (inode->i_op->setattr_raw) { -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (err != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); - --out_putf: -+out: -+ return err; -+} -+ -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); -+ - fput(file); - out: - return err; -@@ -497,30 +562,14 @@ - asmlinkage long sys_chmod(const char * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; - -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -+ error = chmod_common(nd.dentry, mode); - -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -- --dput_and_out: - path_release(&nd); - out: - return error; -@@ -540,6 +589,20 @@ - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -644,6 +707,7 @@ - { - int namei_flags, error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN }; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -651,14 +715,15 @@ - if (namei_flags & O_TRUNC) - namei_flags |= 2; - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -686,7 +751,9 @@ - file_move(f, &inode->i_sb->s_files); - - if (f->f_op && f->f_op->open) { -+ f->f_it = it; - error = f->f_op->open(inode,f); -+ f->f_it = NULL; - if (error) - goto cleanup_all; - } -@@ -698,6 +765,7 @@ - !inode->i_mapping->a_ops->direct_IO)) - goto cleanup_all; - -+ intent_release(it); - return f; - - cleanup_all: -@@ -710,11 +778,17 @@ - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux-2.4.21-x86_64/fs/stat.c -=================================================================== ---- linux-2.4.21-x86_64.orig/fs/stat.c 2003-10-28 10:33:58.000000000 -0800 -+++ linux-2.4.21-x86_64/fs/stat.c 2004-04-12 19:58:44.000000000 -0700 -@@ -17,10 +17,12 @@ - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - static __inline__ int --do_revalidate(struct dentry *dentry) -+do_revalidate(struct dentry *dentry, struct lookup_intent *it) - { - struct inode * inode = dentry->d_inode; -- if (inode->i_op && inode->i_op->revalidate) -+ if (inode->i_op && inode->i_op->revalidate_it) -+ return inode->i_op->revalidate_it(dentry, it); -+ else if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; - } -@@ -141,13 +143,15 @@ - asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -157,13 +161,15 @@ - asmlinkage long sys_newstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -178,13 +184,15 @@ - asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -195,13 +203,15 @@ - asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -222,7 +232,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_old_stat(dentry->d_inode, statbuf); - fput(f); -@@ -241,7 +251,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat(dentry->d_inode, statbuf); - fput(f); -@@ -263,7 +273,7 @@ - - error = -EINVAL; - if (inode->i_op && inode->i_op->readlink && -- !(error = do_revalidate(nd.dentry))) { -+ !(error = do_revalidate(nd.dentry, NULL))) { - UPDATE_ATIME(inode); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); - } -@@ -339,12 +349,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -354,12 +366,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -374,7 +388,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat64(dentry->d_inode, statbuf); - fput(f); -Index: linux-2.4.21-x86_64/include/linux/dcache.h -=================================================================== ---- linux-2.4.21-x86_64.orig/include/linux/dcache.h 2003-11-10 16:44:28.000000000 -0800 -+++ linux-2.4.21-x86_64/include/linux/dcache.h 2004-04-12 19:57:36.000000000 -0700 -@@ -7,6 +7,51 @@ - #include <linux/gdb.h> - #include <linux/mount.h> - #include <linux/kernel.h> -+#include <linux/string.h> -+ -+#define IT_OPEN 0x0001 -+#define IT_CREAT 0x0002 -+#define IT_READDIR 0x0004 -+#define IT_GETATTR 0x0008 -+#define IT_LOOKUP 0x0010 -+#define IT_UNLINK 0x0020 -+#define IT_GETXATTR 0x0040 -+#define IT_EXEC 0x0080 -+#define IT_PIN 0x0100 -+ -+#define IT_FL_LOCKED 0x0001 -+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ -+ -+#define INTENT_MAGIC 0x19620323 -+ -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; -+ int it_int_flags; -+}; -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - - /* - * linux/include/linux/dcache.h -@@ -94,8 +139,22 @@ - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); -+ void (*d_pin)(struct dentry *, struct vfsmount * , int); -+ void (*d_unpin)(struct dentry *, struct vfsmount *, int); - }; - -+#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ -+ de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ -+ de->d_op->d_unpin(de, mnt, flag); -+ -+ -+/* defined in fs/namei.c */ -+extern void intent_release(struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -127,6 +186,7 @@ - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - -Index: linux-2.4.21-x86_64/include/linux/fs.h -=================================================================== ---- linux-2.4.21-x86_64.orig/include/linux/fs.h 2004-04-12 19:57:33.000000000 -0700 -+++ linux-2.4.21-x86_64/include/linux/fs.h 2004-04-12 19:57:36.000000000 -0700 -@@ -74,6 +74,7 @@ - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define READ 0 - #define WRITE 1 -@@ -361,6 +362,9 @@ - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -505,6 +509,7 @@ - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct char_device *i_cdev; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -667,6 +672,7 @@ - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_it; - struct list_head f_ep_links; - spinlock_t f_ep_lock; - }; -@@ -796,6 +802,7 @@ - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *intent; - }; - - /* -@@ -917,7 +924,8 @@ - extern int __vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry); - - /* - * File types -@@ -992,21 +1000,32 @@ - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); -+ int (*revalidate_it) (struct dentry *, struct lookup_intent *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1205,10 +1224,14 @@ - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1504,6 +1527,7 @@ - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1516,6 +1540,8 @@ - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void _inode_init_once(struct inode *); -@@ -1667,6 +1693,8 @@ - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; -Index: linux-2.4.21-x86_64/include/linux/fs_struct.h -=================================================================== ---- linux-2.4.21-x86_64.orig/include/linux/fs_struct.h 2001-07-13 15:10:44.000000000 -0700 -+++ linux-2.4.21-x86_64/include/linux/fs_struct.h 2004-04-12 19:57:36.000000000 -0700 -@@ -34,10 +34,12 @@ - write_lock(&fs->lock); - old_root = fs->root; - old_rootmnt = fs->rootmnt; -+ PIN(dentry, mnt, 1); - fs->rootmnt = mntget(mnt); - fs->root = dget(dentry); - write_unlock(&fs->lock); - if (old_root) { -+ UNPIN(old_root, old_rootmnt, 1); - dput(old_root); - mntput(old_rootmnt); - } -@@ -57,10 +59,12 @@ - write_lock(&fs->lock); - old_pwd = fs->pwd; - old_pwdmnt = fs->pwdmnt; -+ PIN(dentry, mnt, 0); - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); - write_unlock(&fs->lock); - if (old_pwd) { -+ UNPIN(old_pwd, old_pwdmnt, 0); - dput(old_pwd); - mntput(old_pwdmnt); - } -Index: linux-2.4.21-x86_64/kernel/exit.c -=================================================================== ---- linux-2.4.21-x86_64.orig/kernel/exit.c 2003-10-28 10:34:13.000000000 -0800 -+++ linux-2.4.21-x86_64/kernel/exit.c 2004-04-12 19:57:36.000000000 -0700 -@@ -288,11 +288,14 @@ - { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { -+ UNPIN(fs->pwd, fs->pwdmnt, 0); -+ UNPIN(fs->root, fs->rootmnt, 1); - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { -+ UNPIN(fs->altroot, fs->altrootmnt, 1); - dput(fs->altroot); - mntput(fs->altrootmnt); - } -Index: linux-2.4.21-x86_64/kernel/fork.c -=================================================================== ---- linux-2.4.21-x86_64.orig/kernel/fork.c 2003-10-28 10:34:17.000000000 -0800 -+++ linux-2.4.21-x86_64/kernel/fork.c 2004-04-12 19:57:36.000000000 -0700 -@@ -461,10 +461,13 @@ - fs->umask = old->umask; - read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); -+ PIN(old->pwd, old->pwdmnt, 0); -+ PIN(old->root, old->rootmnt, 1); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { -+ PIN(old->altroot, old->altrootmnt, 1); - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); - } else { -Index: linux-2.4.21-x86_64/kernel/ksyms.c -=================================================================== ---- linux-2.4.21-x86_64.orig/kernel/ksyms.c 2004-04-12 19:57:33.000000000 -0700 -+++ linux-2.4.21-x86_64/kernel/ksyms.c 2004-04-12 19:57:36.000000000 -0700 -@@ -327,6 +327,7 @@ - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.29-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.29-vanilla.patch deleted file mode 100644 index f19fbd4..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.29-vanilla.patch +++ /dev/null @@ -1,1833 +0,0 @@ -Index: linux-2.4.29/fs/dcache.c -=================================================================== ---- linux-2.4.29.orig/fs/dcache.c 2005-04-07 18:52:37.000000000 +0300 -+++ linux-2.4.29/fs/dcache.c 2005-04-07 19:14:06.000000000 +0300 -@@ -184,6 +184,13 @@ - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -836,13 +843,19 @@ - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ -Index: linux-2.4.29/fs/exec.c -=================================================================== ---- linux-2.4.29.orig/fs/exec.c 2005-04-07 18:53:19.000000000 +0300 -+++ linux-2.4.29/fs/exec.c 2005-04-07 19:14:06.000000000 +0300 -@@ -112,8 +112,10 @@ - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -125,7 +127,8 @@ - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -378,8 +381,10 @@ - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, -+ .it_flags = FMODE_READ|FMODE_EXEC }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -391,7 +396,8 @@ - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(&it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -403,6 +409,7 @@ - return file; - } - } -+ intent_release(&it); - path_release(&nd); - } - goto out; -@@ -1163,7 +1170,7 @@ - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.4.29/fs/namei.c -=================================================================== ---- linux-2.4.29.orig/fs/namei.c 2005-04-07 18:53:14.000000000 +0300 -+++ linux-2.4.29/fs/namei.c 2005-05-03 17:23:44.139922792 +0300 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct lookup_intent *it) -+{ -+ if (it && it->it_op_release) -+ it->it_op_release(it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,15 @@ - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +320,9 @@ - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup_it) -+ result = dir->i_op->lookup_it(dir, dentry, it, flags); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +344,15 @@ - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate_it) { -+ if (!result->d_op->d_revalidate_it(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; -+ } - } - return result; - } -@@ -332,7 +364,8 @@ - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= 5) -@@ -346,10 +379,12 @@ - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - err = dentry->d_inode->i_op->follow_link(dentry, nd); - current->link_count--; - return err; - loop: -+ intent_release(it); - path_release(nd); - return -ELOOP; - } -@@ -447,7 +482,8 @@ - * - * We expect 'base' to be positive and a directory. - */ --int fastcall link_path_walk(const char * name, struct nameidata *nd) -+int fastcall link_path_walk_it(const char * name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -520,9 +556,9 @@ - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -540,7 +576,7 @@ - goto out_dput; - - if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -556,7 +592,7 @@ - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup_it) - break; - continue; - /* here ends the main loop */ -@@ -583,9 +619,9 @@ - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, nd->flags); -+ dentry = cached_lookup(nd->dentry, &this, nd->flags, it); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, nd->flags); -+ dentry = real_lookup(nd->dentry, &this, nd->flags, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; -@@ -595,7 +631,7 @@ - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -609,7 +645,8 @@ - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup_it)) - break; - } - goto return_base; -@@ -635,6 +672,34 @@ - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, it); -+ if (IS_ERR(new)) { -+ err = PTR_ERR(new); -+ break; -+ } -+ d_invalidate(dentry); -+ dput(dentry); -+ nd->dentry = new; -+ } -+ if (!nd->dentry->d_inode) -+ goto no_inode; -+ if (lookup_flags & LOOKUP_DIRECTORY) { -+ err = -ENOTDIR; -+ if (!nd->dentry->d_inode->i_op || -+ (!nd->dentry->d_inode->i_op->lookup && -+ !nd->dentry->d_inode->i_op->lookup_it)) -+ break; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -648,15 +713,28 @@ - dput(dentry); - break; - } -+ if (err) -+ intent_release(it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int fastcall path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -741,6 +819,16 @@ - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+/* SMP-safe */ - int fastcall path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -755,6 +843,7 @@ - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->intent = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -769,7 +858,8 @@ - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -792,13 +882,16 @@ - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup_it) -+ dentry = inode->i_op->lookup_it(inode, new, it, 0); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -810,6 +903,12 @@ - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -831,7 +930,7 @@ - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -862,6 +961,23 @@ - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -957,7 +1073,8 @@ - return retval; - } - --int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, -+ struct lookup_intent *it) - { - int error; - -@@ -970,12 +1087,15 @@ - goto exit_lock; - - error = -EACCES; /* shouldn't it be ENOSYS? */ -- if (!dir->i_op || !dir->i_op->create) -+ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) - goto exit_lock; - - DQUOT_INIT(dir); - lock_kernel(); -- error = dir->i_op->create(dir, dentry, mode); -+ if (dir->i_op->create_it) -+ error = dir->i_op->create_it(dir, dentry, mode, it); -+ else -+ error = dir->i_op->create(dir, dentry, mode); - unlock_kernel(); - exit_lock: - up(&dir->i_zombie); -@@ -984,6 +1104,11 @@ - return error; - } - -+int vfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ return vfs_create_it(dir, dentry, mode, NULL); -+} -+ - /* - * open_namei() - * -@@ -998,7 +1123,8 @@ - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1008,11 +1134,14 @@ - - acc_mode = ACC_MODE(flag); - -+ if (it) -+ it->it_flags = flag; -+ - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1022,6 +1151,10 @@ - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_create_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1037,7 +1170,7 @@ - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1046,10 +1179,11 @@ - goto exit; - } - -+ it->it_create_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { -- error = vfs_create(dir->d_inode, dentry, -- mode & ~current->fs->umask); -+ error = vfs_create_it(dir->d_inode, dentry, -+ mode & ~current->fs->umask, it); - up(&dir->d_inode->i_sem); - dput(nd->dentry); - nd->dentry = dentry; -@@ -1153,7 +1287,7 @@ - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1165,8 +1299,10 @@ - return 0; - - exit_dput: -+ intent_release(it); - dput(dentry); - exit: -+ intent_release(it); - path_release(nd); - return error; - -@@ -1185,7 +1321,10 @@ - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -+ nd->intent = it; - error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) -+ intent_release(it); - dput(dentry); - if (error) - return error; -@@ -1207,13 +1346,20 @@ - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1221,7 +1367,7 @@ - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1277,7 +1423,20 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - mode &= ~current->fs->umask; -@@ -1298,6 +1457,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1345,7 +1505,18 @@ - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1353,6 +1524,7 @@ - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1453,8 +1625,16 @@ - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1512,8 +1692,15 @@ - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1580,15 +1767,27 @@ - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out2; -+ } -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1664,7 +1863,18 @@ - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.last_type != LAST_NORM) { -+ error = -EEXIST; -+ goto out_release; -+ } -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1708,7 +1918,7 @@ - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - struct inode *target; -@@ -1787,7 +1997,7 @@ - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error; - -@@ -1875,9 +2085,18 @@ - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ lock_kernel(); -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1893,16 +2112,16 @@ - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); - unlock_kernel(); -- - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1953,20 +2172,26 @@ - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->intent; -+ else if (it != nd->intent) -+ printk("it != nd->intent: tell phil@clusterfs.com\n"); -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -1990,7 +2215,13 @@ - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2032,7 +2263,7 @@ - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); -Index: linux-2.4.29/fs/namespace.c -=================================================================== ---- linux-2.4.29.orig/fs/namespace.c 2005-04-07 18:54:11.000000000 +0300 -+++ linux-2.4.29/fs/namespace.c 2005-04-07 19:14:06.000000000 +0300 -@@ -98,6 +98,7 @@ - { - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; -+ UNPIN(old_nd->dentry, old_nd->mnt, 1); - mnt->mnt_parent = mnt; - mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_child); -@@ -109,6 +110,7 @@ - { - mnt->mnt_parent = mntget(nd->mnt); - mnt->mnt_mountpoint = dget(nd->dentry); -+ PIN(nd->dentry, nd->mnt, 1); - list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); - list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); - nd->dentry->d_mounted++; -@@ -488,14 +490,17 @@ - { - struct nameidata old_nd; - struct vfsmount *mnt = NULL; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int err = mount_is_safe(nd); - if (err) - return err; - if (!old_name || !*old_name) - return -EINVAL; -- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); -- if (err) -+ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); -+ if (err) { -+ intent_release(&it); - return err; -+ } - - down_write(¤t->namespace->sem); - err = -EINVAL; -@@ -518,6 +523,7 @@ - } - - up_write(¤t->namespace->sem); -+ intent_release(&it); - path_release(&old_nd); - return err; - } -@@ -701,6 +707,7 @@ - unsigned long flags, void *data_page) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int retval = 0; - int mnt_flags = 0; - -@@ -728,10 +735,11 @@ - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); - - /* ... and get the mountpoint */ -- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -- if (retval) -+ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); -+ if (retval) { -+ intent_release(&it); - return retval; -- -+ } - if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, - data_page); -@@ -742,6 +750,8 @@ - else - retval = do_add_mount(&nd, type_page, flags, mnt_flags, - dev_name, data_page); -+ -+ intent_release(&it); - path_release(&nd); - return retval; - } -@@ -907,6 +917,8 @@ - { - struct vfsmount *tmp; - struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; -+ struct lookup_intent new_it = { .it_op = IT_GETATTR }; -+ struct lookup_intent old_it = { .it_op = IT_GETATTR }; - int error; - - if (!capable(CAP_SYS_ADMIN)) -@@ -914,14 +926,14 @@ - - lock_kernel(); - -- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); -+ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); - if (error) - goto out0; - error = -EINVAL; - if (!check_mnt(new_nd.mnt)) - goto out1; - -- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); -+ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); - if (error) - goto out1; - -@@ -976,8 +988,10 @@ - up(&old_nd.dentry->d_inode->i_zombie); - up_write(¤t->namespace->sem); - path_release(&user_nd); -+ intent_release(&old_it); - path_release(&old_nd); - out1: -+ intent_release(&new_it); - path_release(&new_nd); - out0: - unlock_kernel(); -Index: linux-2.4.29/fs/open.c -=================================================================== ---- linux-2.4.29.orig/fs/open.c 2005-04-07 18:52:27.000000000 +0300 -+++ linux-2.4.29/fs/open.c 2005-04-07 19:14:06.000000000 +0300 -@@ -19,6 +19,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -109,7 +112,13 @@ - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - up_write(&inode->i_alloc_sem); - return error; -@@ -120,12 +129,13 @@ - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -165,11 +175,13 @@ - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(&it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -217,7 +229,7 @@ - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -262,11 +274,13 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -284,7 +298,20 @@ - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ if (!times) { - error = -EACCES; - if (IS_IMMUTABLE(inode)) - goto dput_and_out; -@@ -312,12 +339,14 @@ - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -335,7 +364,19 @@ - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ if (!utimes) { - error = -EACCES; - if (IS_IMMUTABLE(inode)) - goto dput_and_out; -@@ -362,6 +403,7 @@ - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -379,13 +421,14 @@ - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(&it); - path_release(&nd); - } - -@@ -400,8 +443,9 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -412,6 +456,7 @@ - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; -@@ -451,9 +496,10 @@ - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -469,39 +515,56 @@ - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(&it); - path_release(&nd); - out: - return error; - } - --asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+int chmod_common(struct dentry *dentry, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; -- struct file * file; -- int err = -EBADF; -+ struct inode *inode = dentry->d_inode; - struct iattr newattrs; -+ int err = -EROFS; - -- file = fget(fd); -- if (!file) -+ if (IS_RDONLY(inode)) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -+ if (inode->i_op->setattr_raw) { -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ err = inode->i_op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (err != -EOPNOTSUPP) -+ goto out; -+ } - -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -+ goto out; -+ - if (mode == (mode_t) -1) - mode = inode->i_mode; - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); - --out_putf: -+out: -+ return err; -+} -+ -+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) -+{ -+ struct file * file; -+ int err = -EBADF; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ err = chmod_common(file->f_dentry, mode); -+ - fput(file); - out: - return err; -@@ -510,30 +573,14 @@ - asmlinkage long sys_chmod(const char * filename, mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = user_path_walk(filename, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; -- -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; - -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -+ error = chmod_common(nd.dentry, mode); - --dput_and_out: - path_release(&nd); - out: - return error; -@@ -553,6 +600,20 @@ - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -657,6 +718,7 @@ - { - int namei_flags, error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN }; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -664,14 +726,15 @@ - if (namei_flags & O_TRUNC) - namei_flags |= 2; - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -708,12 +771,15 @@ - } - - if (f->f_op && f->f_op->open) { -+ f->f_it = it; - error = f->f_op->open(inode,f); -+ f->f_it = NULL; - if (error) - goto cleanup_all; - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -+ intent_release(it); - return f; - - cleanup_all: -@@ -728,11 +794,17 @@ - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ -Index: linux-2.4.29/fs/stat.c -=================================================================== ---- linux-2.4.29.orig/fs/stat.c 2005-04-07 18:52:47.000000000 +0300 -+++ linux-2.4.29/fs/stat.c 2005-04-07 19:14:06.000000000 +0300 -@@ -17,10 +17,12 @@ - * Revalidate the inode. This is required for proper NFS attribute caching. - */ - static __inline__ int --do_revalidate(struct dentry *dentry) -+do_revalidate(struct dentry *dentry, struct lookup_intent *it) - { - struct inode * inode = dentry->d_inode; -- if (inode->i_op && inode->i_op->revalidate) -+ if (inode->i_op && inode->i_op->revalidate_it) -+ return inode->i_op->revalidate_it(dentry, it); -+ else if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; - } -@@ -141,13 +143,15 @@ - asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -157,13 +161,15 @@ - asmlinkage long sys_newstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -178,13 +184,15 @@ - asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -195,13 +203,15 @@ - asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -222,7 +232,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_old_stat(dentry->d_inode, statbuf); - fput(f); -@@ -241,7 +251,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat(dentry->d_inode, statbuf); - fput(f); -@@ -263,7 +273,7 @@ - - error = -EINVAL; - if (inode->i_op && inode->i_op->readlink && -- !(error = do_revalidate(nd.dentry))) { -+ !(error = do_revalidate(nd.dentry, NULL))) { - UPDATE_ATIME(inode); - error = inode->i_op->readlink(nd.dentry, buf, bufsiz); - } -@@ -339,12 +349,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -354,12 +366,14 @@ - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { -- error = do_revalidate(nd.dentry); -+ error = do_revalidate(nd.dentry, &it); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(&it); - path_release(&nd); - } - return error; -@@ -374,7 +388,7 @@ - if (f) { - struct dentry * dentry = f->f_dentry; - -- err = do_revalidate(dentry); -+ err = do_revalidate(dentry, NULL); - if (!err) - err = cp_new_stat64(dentry->d_inode, statbuf); - fput(f); -Index: linux-2.4.29/include/linux/dcache.h -=================================================================== ---- linux-2.4.29.orig/include/linux/dcache.h 2005-04-07 18:55:17.000000000 +0300 -+++ linux-2.4.29/include/linux/dcache.h 2005-04-07 19:14:06.000000000 +0300 -@@ -6,6 +6,51 @@ - #include <asm/atomic.h> - #include <linux/mount.h> - #include <linux/kernel.h> -+#include <linux/string.h> -+ -+#define IT_OPEN 0x0001 -+#define IT_CREAT 0x0002 -+#define IT_READDIR 0x0004 -+#define IT_GETATTR 0x0008 -+#define IT_LOOKUP 0x0010 -+#define IT_UNLINK 0x0020 -+#define IT_GETXATTR 0x0040 -+#define IT_EXEC 0x0080 -+#define IT_PIN 0x0100 -+ -+#define IT_FL_LOCKED 0x0001 -+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ -+ -+#define INTENT_MAGIC 0x19620323 -+ -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; -+ int it_int_flags; -+}; -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op, int flags) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+ it->it_flags = flags; -+} -+ - - /* - * linux/include/linux/dcache.h -@@ -91,8 +136,22 @@ - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); -+ void (*d_pin)(struct dentry *, struct vfsmount * , int); -+ void (*d_unpin)(struct dentry *, struct vfsmount *, int); - }; - -+#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ -+ de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ -+ de->d_op->d_unpin(de, mnt, flag); -+ -+ -+/* defined in fs/namei.c */ -+extern void intent_release(struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -124,6 +183,7 @@ - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - -Index: linux-2.4.29/include/linux/fs.h -=================================================================== ---- linux-2.4.29.orig/include/linux/fs.h 2005-04-07 18:59:19.000000000 +0300 -+++ linux-2.4.29/include/linux/fs.h 2005-05-03 17:06:23.738087912 +0300 -@@ -73,6 +73,7 @@ - - #define FMODE_READ 1 - #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 - - #define READ 0 - #define WRITE 1 -@@ -340,6 +341,9 @@ - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -478,6 +482,7 @@ - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct char_device *i_cdev; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -580,6 +585,7 @@ - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_it; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -700,6 +706,7 @@ - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *intent; - }; - - /* -@@ -820,7 +827,8 @@ - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry); - - /* - * File types -@@ -880,21 +888,32 @@ - - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); -+ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); -+ int (*revalidate_it) (struct dentry *, struct lookup_intent *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1091,10 +1110,14 @@ - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1385,6 +1408,7 @@ - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1396,6 +1420,8 @@ - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void __inode_init_once(struct inode *); -@@ -1539,6 +1565,8 @@ - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; -Index: linux-2.4.29/include/linux/fs_struct.h -=================================================================== ---- linux-2.4.29.orig/include/linux/fs_struct.h 2005-04-07 18:54:22.000000000 +0300 -+++ linux-2.4.29/include/linux/fs_struct.h 2005-04-07 19:14:06.000000000 +0300 -@@ -34,10 +34,12 @@ - write_lock(&fs->lock); - old_root = fs->root; - old_rootmnt = fs->rootmnt; -+ PIN(dentry, mnt, 1); - fs->rootmnt = mntget(mnt); - fs->root = dget(dentry); - write_unlock(&fs->lock); - if (old_root) { -+ UNPIN(old_root, old_rootmnt, 1); - dput(old_root); - mntput(old_rootmnt); - } -@@ -57,10 +59,12 @@ - write_lock(&fs->lock); - old_pwd = fs->pwd; - old_pwdmnt = fs->pwdmnt; -+ PIN(dentry, mnt, 0); - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); - write_unlock(&fs->lock); - if (old_pwd) { -+ UNPIN(old_pwd, old_pwdmnt, 0); - dput(old_pwd); - mntput(old_pwdmnt); - } -Index: linux-2.4.29/kernel/exit.c -=================================================================== ---- linux-2.4.29.orig/kernel/exit.c 2005-04-07 18:53:09.000000000 +0300 -+++ linux-2.4.29/kernel/exit.c 2005-04-07 19:14:06.000000000 +0300 -@@ -238,11 +238,14 @@ - { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { -+ UNPIN(fs->pwd, fs->pwdmnt, 0); -+ UNPIN(fs->root, fs->rootmnt, 1); - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { -+ UNPIN(fs->altroot, fs->altrootmnt, 1); - dput(fs->altroot); - mntput(fs->altrootmnt); - } -Index: linux-2.4.29/kernel/fork.c -=================================================================== ---- linux-2.4.29.orig/kernel/fork.c 2005-04-07 18:52:37.000000000 +0300 -+++ linux-2.4.29/kernel/fork.c 2005-04-07 19:14:06.000000000 +0300 -@@ -388,10 +388,13 @@ - fs->umask = old->umask; - read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); -+ PIN(old->pwd, old->pwdmnt, 0); -+ PIN(old->root, old->rootmnt, 1); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { -+ PIN(old->altroot, old->altrootmnt, 1); - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); - } else { -Index: linux-2.4.29/kernel/ksyms.c -=================================================================== ---- linux-2.4.29.orig/kernel/ksyms.c 2005-04-07 18:59:19.000000000 +0300 -+++ linux-2.4.29/kernel/ksyms.c 2005-04-07 19:14:06.000000000 +0300 -@@ -284,6 +284,7 @@ - EXPORT_SYMBOL(mark_page_accessed); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch index c0f6a0d..234e6ca 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch @@ -1,19 +1,6 @@ ---- - fs/exec.c | 14 ++++--- - fs/inode.c | 1 - fs/namei.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++--- - fs/namespace.c | 10 +++++ - fs/open.c | 40 ++++++++++++++------ - fs/stat.c | 24 +++++++++--- - include/linux/dcache.h | 3 + - include/linux/fs.h | 10 +++++ - include/linux/mount.h | 2 + - include/linux/namei.h | 61 ++++++++++++++++++++++++++---- - 10 files changed, 226 insertions(+), 37 deletions(-) - --- linux-2.6.10.orig/fs/exec.c +++ linux-2.6.10/fs/exec.c -@@ -124,9 +124,10 @@ asmlinkage long sys_uselib(const char __ +@@ -124,9 +124,10 @@ struct file * file; struct nameidata nd; int error; @@ -26,7 +13,7 @@ if (error) goto out; -@@ -138,7 +139,7 @@ asmlinkage long sys_uselib(const char __ +@@ -138,7 +139,7 @@ if (error) goto exit; @@ -35,7 +22,7 @@ error = PTR_ERR(file); if (IS_ERR(file)) goto out; -@@ -485,8 +486,9 @@ struct file *open_exec(const char *name) +@@ -485,8 +486,9 @@ int err; struct file *file; @@ -47,7 +34,7 @@ file = ERR_PTR(err); if (!err) { -@@ -499,7 +501,7 @@ struct file *open_exec(const char *name) +@@ -499,7 +501,7 @@ err = -EACCES; file = ERR_PTR(err); if (!err) { @@ -58,7 +45,7 @@ if (err) { --- linux-2.6.10.orig/fs/inode.c +++ linux-2.6.10/fs/inode.c -@@ -233,6 +233,7 @@ void __iget(struct inode * inode) +@@ -233,6 +233,7 @@ inodes_stat.nr_unused--; } @@ -68,7 +55,7 @@ * @inode: inode to clear --- linux-2.6.10.orig/fs/namei.c +++ linux-2.6.10/fs/namei.c -@@ -288,8 +288,19 @@ int deny_write_access(struct file * file +@@ -288,8 +288,19 @@ return 0; } @@ -88,7 +75,7 @@ dput(nd->dentry); mntput(nd->mnt); } -@@ -379,7 +390,10 @@ static struct dentry * real_lookup(struc +@@ -379,7 +390,10 @@ { struct dentry * result; struct inode *dir = parent->d_inode; @@ -99,7 +86,7 @@ down(&dir->i_sem); /* * First re-do the cached lookup just in case it was created -@@ -418,7 +432,10 @@ static struct dentry * real_lookup(struc +@@ -418,7 +432,10 @@ if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); @@ -111,7 +98,7 @@ } } return result; -@@ -449,6 +466,7 @@ static inline int __vfs_follow_link(stru +@@ -449,6 +466,7 @@ { int res = 0; char *name; @@ -119,7 +106,7 @@ if (IS_ERR(link)) goto fail; -@@ -458,6 +476,7 @@ static inline int __vfs_follow_link(stru +@@ -458,6 +476,7 @@ /* weird __emul_prefix() stuff did it */ goto out; } @@ -127,7 +114,7 @@ res = link_path_walk(link, nd); out: if (nd->depth || res || nd->last_type!=LAST_NORM) -@@ -666,6 +685,33 @@ fail: +@@ -666,6 +685,33 @@ return PTR_ERR(dentry); } @@ -161,7 +148,7 @@ /* * Name resolution. * -@@ -767,8 +813,12 @@ int fastcall link_path_walk(const char * +@@ -767,8 +813,12 @@ goto out_dput; if (inode->i_op->follow_link) { @@ -174,7 +161,7 @@ dput(next.dentry); mntput(next.mnt); if (err) -@@ -807,14 +857,34 @@ last_component: +@@ -807,14 +857,34 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -209,7 +196,7 @@ if (err) break; follow_mount(&next.mnt, &next.dentry); -@@ -1032,7 +1102,7 @@ struct dentry * lookup_hash(struct qstr +@@ -1032,7 +1102,7 @@ } /* SMP-safe */ @@ -218,7 +205,7 @@ { unsigned long hash; struct qstr this; -@@ -1052,11 +1122,16 @@ struct dentry * lookup_one_len(const cha +@@ -1052,11 +1122,16 @@ } this.hash = end_name_hash(hash); @@ -236,7 +223,7 @@ /* * namei() * -@@ -1068,7 +1143,7 @@ access: +@@ -1068,7 +1143,7 @@ * that namei follows links, while lnamei does not. * SMP-safe */ @@ -245,7 +232,7 @@ { char *tmp = getname(name); int err = PTR_ERR(tmp); -@@ -1080,6 +1155,12 @@ int fastcall __user_walk(const char __us +@@ -1080,6 +1155,12 @@ return err; } @@ -258,7 +245,7 @@ /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. -@@ -1363,8 +1444,8 @@ int open_namei(const char * pathname, in +@@ -1363,8 +1444,8 @@ acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ @@ -269,7 +256,7 @@ /* * The simplest case - just a plain lookup. -@@ -1379,6 +1460,7 @@ int open_namei(const char * pathname, in +@@ -1379,6 +1460,7 @@ /* * Create - we need to know the parent. */ @@ -277,7 +264,7 @@ error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; -@@ -1395,7 +1477,9 @@ int open_namei(const char * pathname, in +@@ -1395,7 +1477,9 @@ dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); @@ -287,7 +274,7 @@ do_last: error = PTR_ERR(dentry); -@@ -1508,7 +1592,9 @@ do_link: +@@ -1508,7 +1592,9 @@ } dir = nd->dentry; down(&dir->d_inode->i_sem); @@ -299,7 +286,7 @@ } --- linux-2.6.10.orig/fs/namespace.c +++ linux-2.6.10/fs/namespace.c -@@ -62,6 +62,7 @@ struct vfsmount *alloc_vfsmnt(const char +@@ -62,6 +62,7 @@ INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_fslink); @@ -307,7 +294,7 @@ if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); -@@ -113,6 +114,7 @@ static inline int check_mnt(struct vfsmo +@@ -113,6 +114,7 @@ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) { @@ -315,7 +302,7 @@ old_nd->dentry = mnt->mnt_mountpoint; old_nd->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; -@@ -176,6 +178,9 @@ void __mntput(struct vfsmount *mnt) +@@ -176,6 +178,9 @@ { struct super_block *sb = mnt->mnt_sb; dput(mnt->mnt_root); @@ -325,7 +312,7 @@ free_vfsmnt(mnt); deactivate_super(sb); } -@@ -402,6 +407,8 @@ static int do_umount(struct vfsmount *mn +@@ -402,6 +407,8 @@ */ lock_kernel(); @@ -334,7 +321,7 @@ if( (flags&MNT_FORCE) && sb->s_op->umount_begin) sb->s_op->umount_begin(sb); unlock_kernel(); -@@ -627,6 +634,7 @@ static int do_loopback(struct nameidata +@@ -627,6 +634,7 @@ return err; if (!old_name || !*old_name) return -EINVAL; @@ -342,7 +329,7 @@ err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; -@@ -701,6 +709,7 @@ static int do_move_mount(struct nameidat +@@ -701,6 +709,7 @@ return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -350,7 +337,7 @@ err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; -@@ -1012,6 +1021,7 @@ long do_mount(char * dev_name, char * di +@@ -1012,6 +1021,7 @@ int retval = 0; int mnt_flags = 0; @@ -360,7 +347,7 @@ flags &= ~MS_MGC_MSK; --- linux-2.6.10.orig/fs/open.c +++ linux-2.6.10/fs/open.c -@@ -216,12 +216,12 @@ static inline long do_sys_truncate(const +@@ -216,12 +216,12 @@ struct nameidata nd; struct inode * inode; int error; @@ -375,7 +362,7 @@ if (error) goto out; inode = nd.dentry->d_inode; -@@ -475,6 +475,7 @@ asmlinkage long sys_access(const char __ +@@ -475,6 +475,7 @@ int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; @@ -383,7 +370,7 @@ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; -@@ -499,13 +500,14 @@ asmlinkage long sys_access(const char __ +@@ -499,13 +500,14 @@ else current->cap_effective = current->cap_permitted; @@ -399,7 +386,7 @@ path_release(&nd); } -@@ -520,8 +522,9 @@ asmlinkage long sys_chdir(const char __u +@@ -520,8 +522,9 @@ { struct nameidata nd; int error; @@ -410,7 +397,7 @@ if (error) goto out; -@@ -573,8 +576,9 @@ asmlinkage long sys_chroot(const char __ +@@ -573,8 +576,9 @@ { struct nameidata nd; int error; @@ -421,7 +408,7 @@ if (error) goto out; -@@ -758,8 +762,10 @@ asmlinkage long sys_fchown(unsigned int +@@ -758,8 +762,10 @@ struct file *filp_open(const char * filename, int flags, int mode) { int namei_flags, error; @@ -432,7 +419,7 @@ namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) namei_flags++; -@@ -767,15 +773,26 @@ struct file *filp_open(const char * file +@@ -767,15 +773,26 @@ namei_flags |= 2; error = open_namei(filename, namei_flags, mode, &nd); @@ -463,7 +450,7 @@ { struct file * f; struct inode *inode; -@@ -787,6 +804,7 @@ struct file *dentry_open(struct dentry * +@@ -787,6 +804,7 @@ goto cleanup_dentry; f->f_flags = flags; f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; @@ -471,7 +458,7 @@ inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); -@@ -805,6 +823,7 @@ struct file *dentry_open(struct dentry * +@@ -805,6 +823,7 @@ error = f->f_op->open(inode,f); if (error) goto cleanup_all; @@ -479,7 +466,7 @@ } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -@@ -830,13 +849,12 @@ cleanup_all: +@@ -830,13 +849,12 @@ cleanup_file: put_filp(f); cleanup_dentry: @@ -496,7 +483,7 @@ */ --- linux-2.6.10.orig/fs/stat.c +++ linux-2.6.10/fs/stat.c -@@ -38,7 +38,7 @@ void generic_fillattr(struct inode *inod +@@ -38,7 +38,7 @@ EXPORT_SYMBOL(generic_fillattr); @@ -505,7 +492,7 @@ { struct inode *inode = dentry->d_inode; int retval; -@@ -47,6 +47,8 @@ int vfs_getattr(struct vfsmount *mnt, st +@@ -47,6 +47,8 @@ if (retval) return retval; @@ -514,7 +501,7 @@ if (inode->i_op->getattr) return inode->i_op->getattr(mnt, dentry, stat); -@@ -63,14 +65,20 @@ int vfs_getattr(struct vfsmount *mnt, st +@@ -63,14 +65,20 @@ EXPORT_SYMBOL(vfs_getattr); @@ -537,7 +524,7 @@ path_release(&nd); } return error; -@@ -82,10 +90,11 @@ int vfs_lstat(char __user *name, struct +@@ -82,10 +90,11 @@ { struct nameidata nd; int error; @@ -551,7 +538,7 @@ path_release(&nd); } return error; -@@ -97,9 +106,12 @@ int vfs_fstat(unsigned int fd, struct ks +@@ -97,9 +106,12 @@ { struct file *f = fget(fd); int error = -EBADF; @@ -575,7 +562,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/cache.h> -@@ -37,6 +38,8 @@ struct qstr { +@@ -37,6 +38,8 @@ const unsigned char *name; }; @@ -586,7 +573,7 @@ int nr_unused; --- linux-2.6.10.orig/include/linux/fs.h +++ linux-2.6.10/include/linux/fs.h -@@ -78,6 +78,7 @@ extern int dir_notify_enable; +@@ -78,6 +78,7 @@ #define FMODE_READ 1 #define FMODE_WRITE 2 @@ -594,7 +581,7 @@ /* Internal kernel extensions */ #define FMODE_LSEEK 4 -@@ -262,6 +263,8 @@ typedef void (dio_iodone_t)(struct inode +@@ -262,6 +263,8 @@ #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 @@ -603,7 +590,7 @@ /* * This is the Inode Attributes structure, used for notify_change(). It -@@ -465,6 +468,7 @@ struct inode { +@@ -465,6 +468,7 @@ struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; @@ -611,7 +598,7 @@ __u32 i_generation; -@@ -600,6 +604,7 @@ struct file { +@@ -600,6 +604,7 @@ spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; @@ -619,7 +606,7 @@ }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); -@@ -950,7 +955,9 @@ struct inode_operations { +@@ -950,7 +955,9 @@ void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); @@ -629,7 +616,7 @@ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -990,6 +997,7 @@ struct super_operations { +@@ -990,6 +997,7 @@ int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); @@ -637,7 +624,7 @@ int (*show_options)(struct seq_file *, struct vfsmount *); }; -@@ -1181,6 +1189,7 @@ extern int unregister_filesystem(struct +@@ -1181,6 +1189,7 @@ extern struct vfsmount *kern_mount(struct file_system_type *); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); @@ -645,7 +632,7 @@ extern long do_mount(char *, char *, char *, unsigned long, void *); extern int vfs_statfs(struct super_block *, struct kstatfs *); -@@ -1245,6 +1254,7 @@ static inline int break_lease(struct ino +@@ -1245,6 +1254,7 @@ extern int do_truncate(struct dentry *, loff_t start); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); @@ -655,7 +642,7 @@ --- linux-2.6.10.orig/include/linux/mount.h +++ linux-2.6.10/include/linux/mount.h -@@ -36,6 +36,8 @@ struct vfsmount +@@ -36,6 +36,8 @@ struct list_head mnt_list; struct list_head mnt_fslink; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ @@ -725,7 +712,7 @@ enum { MAX_NESTED_LINKS = 8 }; struct nameidata { -@@ -21,10 +62,7 @@ struct nameidata { +@@ -21,10 +62,7 @@ unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; @@ -737,7 +724,7 @@ }; /* -@@ -47,6 +85,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -47,6 +85,8 @@ #define LOOKUP_NOALT 32 #define LOOKUP_ATOMIC 64 #define LOOKUP_REVAL 128 @@ -746,7 +733,7 @@ /* * Intent data -@@ -56,6 +96,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -56,6 +96,12 @@ #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); @@ -759,7 +746,7 @@ #define user_path_walk(name,nd) \ __user_walk(name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ -@@ -68,7 +114,6 @@ extern void path_release_on_umount(struc +@@ -68,7 +114,6 @@ extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch index c29bc5c..1e659f5 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch @@ -1,26 +1,30 @@ ---- - fs/exec.c | 14 ++++--- - fs/inode.c | 1 - fs/namei.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++--- - fs/namespace.c | 10 +++++ - fs/open.c | 57 ++++++++++++++-------------- - fs/stat.c | 24 +++++++++--- - include/linux/dcache.h | 3 + - include/linux/fs.h | 10 +++++ - include/linux/mount.h | 2 + - include/linux/namei.h | 61 ++++++++++++++++++++++++++---- - 10 files changed, 225 insertions(+), 55 deletions(-) +diff -rup RH_2_6_9_55.orig/fs/cifs/dir.c RH_2_6_9_55/fs/cifs/dir.c +--- RH_2_6_9_55.orig/fs/cifs/dir.c ++++ RH_2_6_9_55/fs/cifs/dir.c +@@ -157,11 +157,7 @@ cifs_create(struct inode *inode, struct ---- linux-2.6.9.orig/fs/exec.c -+++ linux-2.6.9/fs/exec.c + #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 5, 0) + if(nd && (nd->flags & LOOKUP_OPEN)) { +-#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,5) /* SUSE included Lustre patch */ + int oflags = nd->intent.it_flags; +-#else +- int oflags = nd->intent.open.flags; +-#endif + + desiredAccess = 0; + if (oflags & FMODE_READ) +diff -rup RH_2_6_9_55.orig/fs/exec.c RH_2_6_9_55/fs/exec.c +--- RH_2_6_9_55.orig/fs/exec.c ++++ RH_2_6_9_55/fs/exec.c @@ -126,9 +126,10 @@ asmlinkage long sys_uselib(const char __ struct file * file; struct nameidata nd; int error; -+ intent_init(&nd.intent, IT_OPEN); - -- nd.intent.open.flags = FMODE_READ; +- +- nd.intent.open.flags = FMODE_READ|FMODE_EXEC; - error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); ++ intent_init(&nd.intent, IT_OPEN); ++ + nd.intent.it_flags = FMODE_READ|FMODE_EXEC; + error = __user_walk_it(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) @@ -39,16 +43,16 @@ int err; struct file *file; -- nd.intent.open.flags = FMODE_READ; +- nd.intent.open.flags = FMODE_READ|FMODE_EXEC; - err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + intent_init(&nd.intent, IT_OPEN); + nd.intent.it_flags = FMODE_READ|FMODE_EXEC; -+ err = path_lookup(name, LOOKUP_FOLLOW, &nd); ++ err = path_lookup_it(name, LOOKUP_FOLLOW, &nd); file = ERR_PTR(err); if (!err) { -@@ -503,7 +505,7 @@ struct file *open_exec(const char *name) - err = -EACCES; +@@ -501,7 +503,7 @@ struct file *open_exec(const char *name) + int err = permission(inode, MAY_EXEC, &nd); file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); @@ -56,8 +60,9 @@ if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { ---- linux-2.6.9.orig/fs/inode.c -+++ linux-2.6.9/fs/inode.c +diff -rup RH_2_6_9_55.orig/fs/inode.c RH_2_6_9_55/fs/inode.c +--- RH_2_6_9_55.orig/fs/inode.c ++++ RH_2_6_9_55/fs/inode.c @@ -235,6 +235,7 @@ void __iget(struct inode * inode) inodes_stat.nr_unused--; } @@ -66,9 +71,10 @@ /** * clear_inode - clear an inode * @inode: inode to clear ---- linux-2.6.9.orig/fs/namei.c -+++ linux-2.6.9/fs/namei.c -@@ -274,8 +274,19 @@ int deny_write_access(struct file * file +diff -rup RH_2_6_9_55.orig/fs/namei.c RH_2_6_9_55/fs/namei.c +--- RH_2_6_9_55.orig/fs/namei.c ++++ RH_2_6_9_55/fs/namei.c +@@ -282,8 +282,19 @@ int deny_write_access(struct file * file return 0; } @@ -88,30 +94,36 @@ dput(nd->dentry); mntput(nd->mnt); } -@@ -367,7 +378,10 @@ static struct dentry * real_lookup(struc +@@ -395,8 +406,12 @@ static struct dentry * real_lookup(struc { struct dentry * result; struct inode *dir = parent->d_inode; +- + int counter = 0; - ++ +again: -+ counter++; down(&dir->i_sem); ++ counter++; ++ /* * First re-do the cached lookup just in case it was created -@@ -406,7 +420,10 @@ static struct dentry * real_lookup(struc + * while we waited for the directory semaphore.. +@@ -433,8 +448,12 @@ static struct dentry * real_lookup(struc + up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { - if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { - dput(result); + result = do_revalidate(result, nd); +- if (!result) - result = ERR_PTR(-ENOENT); ++ if (!result) { + if (counter > 10) + result = ERR_PTR(-ESTALE); + if (!IS_ERR(result)) + goto again; - } ++ } } return result; -@@ -437,6 +454,7 @@ static inline int __vfs_follow_link(stru + } +@@ -464,6 +483,7 @@ static inline int __vfs_follow_link(stru { int res = 0; char *name; @@ -119,7 +131,7 @@ if (IS_ERR(link)) goto fail; -@@ -446,6 +464,7 @@ static inline int __vfs_follow_link(stru +@@ -473,6 +493,7 @@ static inline int __vfs_follow_link(stru /* weird __emul_prefix() stuff did it */ goto out; } @@ -127,7 +139,7 @@ res = link_path_walk(link, nd); out: if (nd->depth || res || nd->last_type!=LAST_NORM) -@@ -654,6 +673,33 @@ fail: +@@ -681,6 +702,33 @@ fail: return PTR_ERR(dentry); } @@ -161,12 +173,17 @@ /* * Name resolution. * This is the basic name resolution function, turning a pathname into -@@ -755,8 +801,12 @@ static fastcall int __link_path_walk(con +@@ -782,13 +830,17 @@ static fastcall int __link_path_walk(con goto out_dput; if (inode->i_op->follow_link) { + int save_flags = nd->flags; mntget(next.mnt); + if (next.mnt != nd->mnt) { + dput(nd->dentry); + nd->mnt = next.mnt; + nd->dentry = dget(next.dentry); + } + nd->flags |= LOOKUP_LINK_NOTLAST; err = do_follow_link(next.dentry, nd); + if (!(save_flags & LOOKUP_LINK_NOTLAST)) @@ -174,17 +191,17 @@ dput(next.dentry); mntput(next.mnt); if (err) -@@ -795,14 +845,34 @@ last_component: +@@ -828,14 +880,34 @@ last_component: inode = nd->dentry->d_inode; /* fallthrough */ case 1: -+ nd->flags |= LOOKUP_LAST; -+ err = revalidate_special(nd); -+ nd->flags &= ~LOOKUP_LAST; ++ nd->flags |= LOOKUP_LAST; ++ err = revalidate_special(nd); ++ nd->flags &= ~LOOKUP_LAST; + if (!nd->dentry->d_inode) + err = -ENOENT; + if (err) { -+ path_release(nd); ++ path_release(nd); + goto return_err; + } + if (lookup_flags & LOOKUP_DIRECTORY) { @@ -197,7 +214,7 @@ + } goto return_reval; } -+ ++ if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); if (err < 0) @@ -209,7 +226,29 @@ if (err) break; follow_mount(&next.mnt, &next.dentry); -@@ -1053,7 +1123,7 @@ struct dentry * lookup_hash(struct qstr +@@ -1007,7 +1079,7 @@ set_it: + } + + /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ +-int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) ++int fastcall path_lookup_it(const char *name, unsigned int flags, struct nameidata *nd) + { + int retval = 0; + +@@ -1041,6 +1113,12 @@ out: + return retval; + } + ++int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) ++{ ++ intent_init(&nd->intent, IT_GETATTR); ++ return path_lookup_it(name, flags, nd); ++} ++ + /* + * Restricted form of lookup. Doesn't follow links, single-component only, + * needs parent already locked. Doesn't follow mounts. +@@ -1091,7 +1169,7 @@ struct dentry * lookup_hash(struct qstr } /* SMP-safe */ @@ -218,7 +257,7 @@ { unsigned long hash; struct qstr this; -@@ -1073,11 +1143,16 @@ struct dentry * lookup_one_len(const cha +@@ -1111,11 +1189,16 @@ struct dentry * lookup_one_len(const cha } this.hash = end_name_hash(hash); @@ -236,7 +275,7 @@ /* * namei() * -@@ -1089,7 +1164,7 @@ access: +@@ -1127,18 +1210,24 @@ access: * that namei follows links, while lnamei does not. * SMP-safe */ @@ -245,7 +284,12 @@ { char *tmp = getname(name); int err = PTR_ERR(tmp); -@@ -1101,6 +1176,12 @@ int fastcall __user_walk(const char __us + + if (!IS_ERR(tmp)) { +- err = path_lookup(tmp, flags, nd); ++ err = path_lookup_it(tmp, flags, nd); + putname(tmp); + } return err; } @@ -258,7 +302,16 @@ /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. -@@ -1387,8 +1468,8 @@ int open_namei(const char * pathname, in +@@ -1384,7 +1473,7 @@ int may_open(struct nameidata *nd, int a + if (!error) { + DQUOT_INIT(inode); + +- error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME); ++ error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_FROM_OPEN); + } + put_write_access(inode); + if (error) +@@ -1425,14 +1514,14 @@ int open_namei(const char * pathname, in acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ @@ -269,15 +322,24 @@ /* * The simplest case - just a plain lookup. -@@ -1403,6 +1484,7 @@ int open_namei(const char * pathname, in + */ + if (!(flag & O_CREAT)) { +- error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); ++ error = path_lookup_it(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); + if (error) + return error; + goto ok; +@@ -1441,7 +1530,8 @@ int open_namei(const char * pathname, in /* * Create - we need to know the parent. */ +- error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + nd->intent.it_op |= IT_CREAT; - error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); ++ error = path_lookup_it(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; -@@ -1419,7 +1501,9 @@ int open_namei(const char * pathname, in + +@@ -1457,7 +1547,9 @@ int open_namei(const char * pathname, in dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); @@ -287,7 +349,7 @@ do_last: error = PTR_ERR(dentry); -@@ -1532,7 +1616,9 @@ do_link: +@@ -1570,7 +1662,9 @@ do_link: } dir = nd->dentry; down(&dir->d_inode->i_sem); @@ -297,8 +359,228 @@ __putname(nd->last.name); goto do_last; } ---- linux-2.6.9.orig/fs/namespace.c -+++ linux-2.6.9/fs/namespace.c +@@ -1644,10 +1738,20 @@ asmlinkage long sys_mknod(const char __u + tmp = getname(filename); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); +- +- error = path_lookup(tmp, LOOKUP_PARENT, &nd); ++ ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = path_lookup_it(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ if (nd.dentry->d_inode->i_op->mknod_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mknod_raw(&nd, mode, dev); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + +@@ -1674,6 +1778,7 @@ asmlinkage long sys_mknod(const char __u + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1716,10 +1821,20 @@ asmlinkage long sys_mkdir(const char __u + if (!IS_ERR(tmp)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + +- error = path_lookup(tmp, LOOKUP_PARENT, &nd); ++ error = path_lookup_it(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mkdir_raw(&nd, mode); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1729,6 +1844,7 @@ asmlinkage long sys_mkdir(const char __u + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1814,7 +1930,8 @@ asmlinkage long sys_rmdir(const char __u + if(IS_ERR(name)) + return PTR_ERR(name); + +- error = path_lookup(name, LOOKUP_PARENT, &nd); ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = path_lookup_it(name, LOOKUP_PARENT, &nd); + if (error) + goto exit; + +@@ -1829,6 +1946,16 @@ asmlinkage long sys_rmdir(const char __u + error = -EBUSY; + goto exit1; + } ++ ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } ++ + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1892,12 +2019,22 @@ asmlinkage long sys_unlink(const char __ + if(IS_ERR(name)) + return PTR_ERR(name); + +- error = path_lookup(name, LOOKUP_PARENT, &nd); ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = path_lookup_it(name, LOOKUP_PARENT, &nd); + if (error) + goto exit; + error = -EISDIR; + if (nd.last_type != LAST_NORM) + goto exit1; ++ ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } ++ + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1965,10 +2102,20 @@ asmlinkage long sys_symlink(const char _ + if (!IS_ERR(to)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + +- error = path_lookup(to, LOOKUP_PARENT, &nd); ++ error = path_lookup_it(to, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ if (nd.dentry->d_inode->i_op->symlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->symlink_raw(&nd, from); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1976,6 +2123,7 @@ asmlinkage long sys_symlink(const char _ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(to); +@@ -2045,15 +2193,26 @@ asmlinkage long sys_link(const char __us + if (IS_ERR(to)) + return PTR_ERR(to); + +- error = __user_walk(oldname, 0, &old_nd); ++ intent_init(&old_nd.intent, IT_LOOKUP); ++ error = __user_walk_it(oldname, 0, &old_nd); + if (error) + goto exit; +- error = path_lookup(to, LOOKUP_PARENT, &nd); ++ ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = path_lookup_it(to, LOOKUP_PARENT, &nd); + if (error) + goto out; + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out_release; ++ if (nd.dentry->d_inode->i_op->link_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->link_raw(&old_nd, &nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out_release; ++ } ++ + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + if (!IS_ERR(new_dentry)) { +@@ -2229,11 +2388,13 @@ static inline int do_rename(const char * + struct dentry * trap; + struct nameidata oldnd, newnd; + +- error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); ++ intent_init(&oldnd.intent, IT_LOOKUP); ++ error = path_lookup_it(oldname, LOOKUP_PARENT, &oldnd); + if (error) + goto exit; + +- error = path_lookup(newname, LOOKUP_PARENT, &newnd); ++ intent_init(&newnd.intent, IT_LOOKUP); ++ error = path_lookup_it(newname, LOOKUP_PARENT, &newnd); + if (error) + goto exit1; + +@@ -2250,6 +2411,13 @@ static inline int do_rename(const char * + if (newnd.last_type != LAST_NORM) + goto exit2; + ++ if (old_dir->d_inode->i_op->rename_raw) { ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } ++ + trap = lock_rename(new_dir, old_dir); + + old_dentry = lookup_hash(&oldnd.last, old_dir); +@@ -2281,8 +2449,7 @@ static inline int do_rename(const char * + if (new_dentry == trap) + goto exit5; + +- error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + exit5: + dput(new_dentry); + exit4: +@@ -2473,6 +2640,7 @@ EXPORT_SYMBOL(page_readlink); + EXPORT_SYMBOL(page_symlink); + EXPORT_SYMBOL(page_symlink_inode_operations); + EXPORT_SYMBOL(path_lookup); ++EXPORT_SYMBOL(path_lookup_it); + EXPORT_SYMBOL(path_release); + EXPORT_SYMBOL(path_walk); + EXPORT_SYMBOL(permission); +diff -urNp RH_2_6_9_42_0_3.orig/fs/namespace.c RH_2_6_9_42_0_3/fs/namespace.c +--- RH_2_6_9_42_0_3.orig/fs/namespace.c ++++ RH_2_6_9_42_0_3/fs/namespace.c @@ -61,6 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); @@ -315,17 +597,7 @@ old_nd->dentry = mnt->mnt_mountpoint; old_nd->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; -@@ -177,6 +179,9 @@ void __mntput(struct vfsmount *mnt) - { - struct super_block *sb = mnt->mnt_sb; - dput(mnt->mnt_root); -+ spin_lock(&dcache_lock); -+ list_del(&mnt->mnt_lustre_list); -+ spin_unlock(&dcache_lock); - free_vfsmnt(mnt); - deactivate_super(sb); - } -@@ -403,6 +408,8 @@ static int do_umount(struct vfsmount *mn +@@ -441,6 +442,8 @@ static int do_umount(struct vfsmount *mn */ lock_kernel(); @@ -334,48 +606,220 @@ if( (flags&MNT_FORCE) && sb->s_op->umount_begin) sb->s_op->umount_begin(sb); unlock_kernel(); -@@ -627,6 +634,7 @@ static int do_loopback(struct nameidata +@@ -665,7 +668,8 @@ static int do_loopback(struct nameidata return err; if (!old_name || !*old_name) return -EINVAL; +- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + intent_init(&old_nd.intent, IT_LOOKUP); - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); ++ err = path_lookup_it(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; -@@ -701,6 +709,7 @@ static int do_move_mount(struct nameidat + +@@ -739,7 +743,8 @@ static int do_move_mount(struct nameidat return -EPERM; if (!old_name || !*old_name) return -EINVAL; +- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + intent_init(&old_nd.intent, IT_LOOKUP); - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); ++ err = path_lookup_it(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; -@@ -1012,6 +1021,7 @@ long do_mount(char * dev_name, char * di - int retval = 0; - int mnt_flags = 0; -+ intent_init(&nd.intent, IT_LOOKUP); - /* Discard magic */ - if ((flags & MS_MGC_MSK) == MS_MGC_VAL) - flags &= ~MS_MGC_MSK; ---- linux-2.6.9.orig/fs/open.c -+++ linux-2.6.9/fs/open.c -@@ -216,12 +216,12 @@ static inline long do_sys_truncate(const +@@ -1074,7 +1079,8 @@ long do_mount(char * dev_name, char * di + flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); + + /* ... and get the mountpoint */ +- retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); ++ intent_init(&nd.intent, IT_LOOKUP); ++ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW, &nd); + if (retval) + return retval; + +diff -rup RH_2_6_9_55.orig/fs/nfs/dir.c RH_2_6_9_55/fs/nfs/dir.c +--- RH_2_6_9_55.orig/fs/nfs/dir.c ++++ RH_2_6_9_55/fs/nfs/dir.c +@@ -839,7 +839,7 @@ int nfs_is_exclusive_create(struct inode + return 0; + if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) + return 0; +- return (nd->intent.open.flags & O_EXCL) != 0; ++ return (nd->intent.it_flags & O_EXCL) != 0; + } + + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +@@ -915,7 +915,7 @@ static int is_atomic_open(struct inode * + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ +- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) ++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; + } +@@ -936,7 +936,7 @@ static struct dentry *nfs_atomic_lookup( + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ +- if (nd->intent.open.flags & O_EXCL) ++ if (nd->intent.it_flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ +@@ -948,7 +948,7 @@ static struct dentry *nfs_atomic_lookup( + goto out; + } + +- if (nd->intent.open.flags & O_CREAT) { ++ if (nd->intent.it_flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); +@@ -967,7 +967,7 @@ static struct dentry *nfs_atomic_lookup( + case -ENOTDIR: + goto no_open; + case -ELOOP: +- if (!(nd->intent.open.flags & O_NOFOLLOW)) ++ if (!(nd->intent.it_flags & O_NOFOLLOW)) + goto no_open; + /* case -EINVAL: */ + default: +@@ -1005,7 +1005,7 @@ static int nfs_open_revalidate(struct de + /* NFS only supports OPEN on regular files */ + if (!S_ISREG(inode->i_mode)) + goto no_open; +- openflags = nd->intent.open.flags; ++ openflags = nd->intent.it_flags; + /* We cannot do exclusive creation on a positive dentry */ + if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + goto no_open; +@@ -1213,7 +1213,7 @@ static int nfs_create(struct inode *dir, + attr.ia_valid = ATTR_MODE; + + if (nd && (nd->flags & LOOKUP_CREATE)) +- open_flags = nd->intent.open.flags; ++ open_flags = nd->intent.it_flags; + + /* + * The 0 argument passed into the create function should one day +diff -rup RH_2_6_9_55.orig/fs/nfs/nfs4proc.c RH_2_6_9_55/fs/nfs/nfs4proc.c +--- RH_2_6_9_55.orig/fs/nfs/nfs4proc.c ++++ RH_2_6_9_55/fs/nfs/nfs4proc.c +@@ -770,17 +770,17 @@ nfs4_atomic_open(struct inode *dir, stru + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { +- attr.ia_mode = nd->intent.open.create_mode; ++ attr.ia_mode = nd->intent.it_create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; +- BUG_ON(nd->intent.open.flags & O_CREAT); ++ BUG_ON(nd->intent.it_flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); +- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; +diff -rup RH_2_6_9_55.orig/fs/open.c RH_2_6_9_55/fs/open.c +--- RH_2_6_9_55.orig/fs/open.c ++++ RH_2_6_9_55/fs/open.c +@@ -195,6 +195,7 @@ out: + int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs) + { + int err; ++ struct inode_operations *op = dentry->d_inode->i_op; + struct iattr newattrs; + + /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ +@@ -204,8 +205,16 @@ int do_truncate(struct dentry *dentry, l + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | time_attrs; + down(&dentry->d_inode->i_sem); +- err = notify_change(dentry, &newattrs); +- up(&dentry->d_inode->i_sem); ++ if (op->setattr_raw) { ++ newattrs.ia_valid |= ATTR_RAW; ++ newattrs.ia_ctime = CURRENT_TIME; ++ down_write(&dentry->d_inode->i_alloc_sem); ++ err = op->setattr_raw(dentry->d_inode, &newattrs); ++ up_write(&dentry->d_inode->i_alloc_sem); ++ } else ++ err = notify_change(dentry, &newattrs); ++ up(&dentry->d_inode->i_sem); ++ + return err; + } + +@@ -214,12 +223,13 @@ static inline long do_sys_truncate(const struct nameidata nd; struct inode * inode; int error; - -+ intent_init(&nd.intent, IT_GETATTR); ++ error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; - +- - error = user_path_walk(path, &nd); ++ ++ intent_init(&nd.intent, IT_GETATTR); + error = user_path_walk_it(path, &nd); if (error) goto out; inode = nd.dentry->d_inode; -@@ -475,6 +475,7 @@ asmlinkage long sys_access(const char __ +@@ -390,9 +400,19 @@ asmlinkage long sys_utime(char __user * + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -443,9 +463,19 @@ long do_utimes(char __user * filename, s + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -473,6 +503,7 @@ asmlinkage long sys_access(const char __ int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; @@ -383,7 +827,7 @@ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; -@@ -499,13 +500,14 @@ asmlinkage long sys_access(const char __ +@@ -497,13 +528,14 @@ asmlinkage long sys_access(const char __ else current->cap_effective = current->cap_permitted; @@ -399,7 +843,7 @@ path_release(&nd); } -@@ -520,8 +522,9 @@ asmlinkage long sys_chdir(const char __u +@@ -518,8 +550,9 @@ asmlinkage long sys_chdir(const char __u { struct nameidata nd; int error; @@ -410,7 +854,7 @@ if (error) goto out; -@@ -573,8 +576,9 @@ asmlinkage long sys_chroot(const char __ +@@ -571,8 +604,9 @@ asmlinkage long sys_chroot(const char __ { struct nameidata nd; int error; @@ -421,7 +865,137 @@ if (error) goto out; -@@ -741,8 +745,6 @@ asmlinkage long sys_fchown(unsigned int +@@ -595,36 +629,52 @@ out: + + EXPORT_SYMBOL_GPL(sys_chroot); + +-asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) ++int chmod_common(struct dentry *dentry, mode_t mode) + { +- struct inode * inode; +- struct dentry * dentry; +- struct file * file; +- int err = -EBADF; ++ struct inode * inode = dentry->d_inode; + struct iattr newattrs; ++ int error = -EROFS; + +- file = fget(fd); +- if (!file) ++ if (IS_RDONLY(inode)) + goto out; ++ ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; + +- dentry = file->f_dentry; +- inode = dentry->d_inode; ++ newattrs.ia_mode = mode; ++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use the normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out; ++ } + +- err = -EROFS; +- if (IS_RDONLY(inode)) +- goto out_putf; +- err = -EPERM; ++ error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) +- goto out_putf; ++ goto out; ++ + down(&inode->i_sem); + if (mode == (mode_t) -1) + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- err = notify_change(dentry, &newattrs); ++ error = notify_change(dentry, &newattrs); + up(&inode->i_sem); ++out: ++ return error; ++} + +-out_putf: ++asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) ++{ ++ struct file * file; ++ int err = -EBADF; ++ ++ file = fget(fd); ++ if (!file) ++ goto out; ++ ++ err = chmod_common(file->f_dentry, mode); + fput(file); + out: + return err; +@@ -633,32 +683,13 @@ out: + asmlinkage long sys_chmod(const char __user * filename, mode_t mode) + { + struct nameidata nd; +- struct inode * inode; + int error; +- struct iattr newattrs; + + error = user_path_walk(filename, &nd); + if (error) + goto out; +- inode = nd.dentry->d_inode; + +- error = -EROFS; +- if (IS_RDONLY(inode)) +- goto dput_and_out; +- +- error = -EPERM; +- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) +- goto dput_and_out; +- +- down(&inode->i_sem); +- if (mode == (mode_t) -1) +- mode = inode->i_mode; +- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); +- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); +- +-dput_and_out: ++ error = chmod_common(nd.dentry, mode); + path_release(&nd); + out: + return error; +@@ -679,6 +710,18 @@ static int chown_common(struct dentry * + if (IS_RDONLY(inode)) + goto out; + error = -EPERM; ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; ++ ++ newattrs.ia_uid = user; ++ newattrs.ia_gid = group; ++ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ return error; ++ } + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + newattrs.ia_valid = ATTR_CTIME; +@@ -692,6 +735,7 @@ static int chown_common(struct dentry * + } + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; ++ + down(&inode->i_sem); + error = notify_change(dentry, &newattrs); + up(&inode->i_sem); +@@ -739,8 +783,6 @@ asmlinkage long sys_fchown(unsigned int return error; } @@ -430,7 +1004,7 @@ /* * Note that while the flag value (low two bits) for sys_open means: * 00 - read-only -@@ -760,8 +762,9 @@ static struct file *__dentry_open(struct +@@ -758,8 +800,9 @@ static struct file *__dentry_open(struct struct file *filp_open(const char * filename, int flags, int mode) { int namei_flags, error; @@ -441,7 +1015,7 @@ namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) -@@ -769,16 +772,11 @@ struct file *filp_open(const char * file +@@ -767,16 +810,11 @@ struct file *filp_open(const char * file if (namei_flags & O_TRUNC) namei_flags |= 2; @@ -458,17 +1032,19 @@ + if (!error) { + temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); + return temp_filp; -+ } ++ } return ERR_PTR(error); } -@@ -786,29 +784,27 @@ EXPORT_SYMBOL(filp_open); +@@ -784,29 +822,27 @@ EXPORT_SYMBOL(filp_open); struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { - int error; - struct file *f; -- ++ struct lookup_intent it; ++ intent_init(&it, IT_LOOKUP); + - error = -ENFILE; - f = get_empty_filp(); - if (f == NULL) { @@ -476,9 +1052,7 @@ - mntput(mnt); - return ERR_PTR(error); - } -+ struct lookup_intent it; -+ intent_init(&it, IT_LOOKUP); - +- - return __dentry_open(dentry, mnt, flags, f); + return dentry_open_it(dentry, mnt, flags, &it); } @@ -502,7 +1076,7 @@ inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); -@@ -827,6 +823,7 @@ static struct file *__dentry_open(struct +@@ -825,6 +861,7 @@ static struct file *__dentry_open(struct error = f->f_op->open(inode,f); if (error) goto cleanup_all; @@ -510,7 +1084,7 @@ } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -@@ -851,6 +848,8 @@ cleanup_all: +@@ -849,6 +886,8 @@ cleanup_all: f->f_vfsmnt = NULL; cleanup_file: put_filp(f); @@ -519,8 +1093,9 @@ dput(dentry); mntput(mnt); return ERR_PTR(error); ---- linux-2.6.9.orig/fs/stat.c -+++ linux-2.6.9/fs/stat.c +diff -rup RH_2_6_9_55.orig/fs/stat.c RH_2_6_9_55/fs/stat.c +--- RH_2_6_9_55.orig/fs/stat.c ++++ RH_2_6_9_55/fs/stat.c @@ -37,7 +37,7 @@ void generic_fillattr(struct inode *inod EXPORT_SYMBOL(generic_fillattr); @@ -539,15 +1114,46 @@ if (inode->i_op->getattr) return inode->i_op->getattr(mnt, dentry, stat); -@@ -62,14 +64,20 @@ int vfs_getattr(struct vfsmount *mnt, st +@@ -62,7 +64,7 @@ int vfs_getattr(struct vfsmount *mnt, st EXPORT_SYMBOL(vfs_getattr); +-int vfs_getattr64(struct vfsmount *mnt, struct dentry *dentry, struct kstat64 *stat) ++int vfs_getattr64_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat64 *stat) + { + struct inode *inode = dentry->d_inode; + int retval; +@@ -79,6 +81,13 @@ int vfs_getattr64(struct vfsmount *mnt, + return ixop->getattr64(mnt, dentry, stat); + } + ++ if (inode->i_op->getattr_it) { ++ retval = inode->i_op->getattr_it(mnt, dentry, it, (struct kstat *) stat); ++ if (retval == 0) ++ stat->ino64 = stat->ino; ++ return retval; ++ } ++ + if (inode->i_op->getattr) { + retval = inode->i_op->getattr(mnt, dentry, (struct kstat *) stat); + if (retval == 0) +@@ -98,16 +107,28 @@ int vfs_getattr64(struct vfsmount *mnt, + return 0; + } + ++ + EXPORT_SYMBOL(vfs_getattr64); + +int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + return vfs_getattr_it(mnt, dentry, NULL, stat); +} + ++int vfs_getattr64(struct vfsmount *mnt, struct dentry *dentry, struct kstat64 *stat) ++{ ++ return vfs_getattr64_it(mnt, dentry, NULL, stat); ++} ++ int vfs_stat(char __user *name, struct kstat *stat) { struct nameidata nd; @@ -562,7 +1168,7 @@ path_release(&nd); } return error; -@@ -81,10 +89,11 @@ int vfs_lstat(char __user *name, struct +@@ -119,10 +140,11 @@ int vfs_lstat(char __user *name, struct { struct nameidata nd; int error; @@ -576,7 +1182,7 @@ path_release(&nd); } return error; -@@ -96,9 +105,12 @@ int vfs_fstat(unsigned int fd, struct ks +@@ -134,9 +156,12 @@ int vfs_fstat(unsigned int fd, struct ks { struct file *f = fget(fd); int error = -EBADF; @@ -590,8 +1196,48 @@ fput(f); } return error; ---- linux-2.6.9.orig/include/linux/dcache.h -+++ linux-2.6.9/include/linux/dcache.h +@@ -148,10 +173,11 @@ int vfs_stat64(char __user *name, struct + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + + error = user_path_walk(name, &nd); + if (!error) { +- error = vfs_getattr64(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr64_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -163,10 +189,11 @@ int vfs_lstat64(char __user *name, struc + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + + error = user_path_walk_link(name, &nd); + if (!error) { +- error = vfs_getattr64(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr64_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -178,9 +205,11 @@ int vfs_fstat64(unsigned int fd, struct + { + struct file *f = fget(fd); + int error = -EBADF; ++ struct nameidata nd; ++ intent_init(&nd.intent, IT_GETATTR); + + if (f) { +- error = vfs_getattr64(f->f_vfsmnt, f->f_dentry, stat); ++ error = vfs_getattr64_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); + fput(f); + } + return error; +diff -rup RH_2_6_9_55.orig/include/linux/dcache.h RH_2_6_9_55/include/linux/dcache.h +--- RH_2_6_9_55.orig/include/linux/dcache.h ++++ RH_2_6_9_55/include/linux/dcache.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ @@ -609,21 +1255,10 @@ struct dentry_stat_t { int nr_dentry; int nr_unused; ---- linux-2.6.9.orig/include/linux/fs.h -+++ linux-2.6.9/include/linux/fs.h -@@ -81,6 +81,11 @@ extern int leases_enable, dir_notify_ena - #define FMODE_PREAD 8 - #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ - -+/* File is being opened for execution. Primary users of this flag are -+ distributed filesystems that can use it to achieve correct ETXTBUSY -+ behavior for cross-node execution/opening_for_writing of files */ -+#define FMODE_EXEC 16 -+ - #define RW_MASK 1 - #define RWA_MASK 2 - #define READ 0 -@@ -259,6 +260,8 @@ typedef void (dio_iodone_t)(struct inode +diff -rup RH_2_6_9_55.orig/include/linux/fs.h RH_2_6_9_55/include/linux/fs.h +--- RH_2_6_9_55.orig/include/linux/fs.h ++++ RH_2_6_9_55/include/linux/fs.h +@@ -266,6 +266,8 @@ typedef void (dio_iodone_t)(struct inode #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 @@ -632,7 +1267,7 @@ /* * This is the Inode Attributes structure, used for notify_change(). It -@@ -457,6 +460,7 @@ struct inode { +@@ -464,6 +466,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; @@ -640,7 +1275,7 @@ __u32 i_generation; -@@ -590,6 +594,7 @@ struct file { +@@ -597,6 +600,7 @@ struct file { spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; @@ -648,7 +1283,27 @@ }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); -@@ -962,7 +967,9 @@ struct inode_operations { +@@ -967,20 +971,29 @@ struct inode_operations { + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + int (*link) (struct dentry *,struct inode *,struct dentry *); ++ int (*link_raw) (struct nameidata *,struct nameidata *); + int (*unlink) (struct inode *,struct dentry *); ++ int (*unlink_raw) (struct nameidata *); + int (*symlink) (struct inode *,struct dentry *,const char *); ++ int (*symlink_raw) (struct nameidata *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); ++ int (*mkdir_raw) (struct nameidata *,int); + int (*rmdir) (struct inode *,struct dentry *); ++ int (*rmdir_raw) (struct nameidata *); + int (*mknod) (struct inode *,struct dentry *,int,dev_t); ++ int (*mknod_raw) (struct nameidata *,int,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename_raw) (struct nameidata *, struct nameidata *); + int (*readlink) (struct dentry *, char __user *,int); + int (*follow_link) (struct dentry *, struct nameidata *); + void (*put_link) (struct dentry *, struct nameidata *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); @@ -658,7 +1313,7 @@ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -1002,6 +1009,7 @@ struct super_operations { +@@ -1025,6 +1038,7 @@ struct super_operations { int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); @@ -666,7 +1321,7 @@ int (*show_options)(struct seq_file *, struct vfsmount *); }; -@@ -1194,6 +1202,7 @@ extern int unregister_filesystem(struct +@@ -1217,6 +1231,7 @@ extern int unregister_filesystem(struct extern struct vfsmount *kern_mount(struct file_system_type *); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); @@ -674,16 +1329,21 @@ extern long do_mount(char *, char *, char *, unsigned long, void *); extern int vfs_statfs(struct super_block *, struct kstatfs *); -@@ -1258,6 +1267,7 @@ static inline int break_lease(struct ino - extern int do_truncate(struct dentry *, loff_t start); +@@ -1277,10 +1292,10 @@ static inline int break_lease(struct ino + } + + /* fs/open.c */ +- + extern int do_truncate(struct dentry *, loff_t start, unsigned int); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); ---- linux-2.6.9.orig/include/linux/mount.h -+++ linux-2.6.9/include/linux/mount.h +diff -rup RH_2_6_9_55.orig/include/linux/mount.h RH_2_6_9_55/include/linux/mount.h +--- RH_2_6_9_55.orig/include/linux/mount.h ++++ RH_2_6_9_55/include/linux/mount.h @@ -34,6 +34,8 @@ struct vfsmount struct list_head mnt_list; struct list_head mnt_fslink; /* link in fs-specific expiry list */ @@ -693,8 +1353,9 @@ }; static inline struct vfsmount *mntget(struct vfsmount *mnt) ---- linux-2.6.9.orig/include/linux/namei.h -+++ linux-2.6.9/include/linux/namei.h +diff -rup RH_2_6_9_55.orig/include/linux/namei.h RH_2_6_9_55/include/linux/namei.h +--- RH_2_6_9_55.orig/include/linux/namei.h ++++ RH_2_6_9_55/include/linux/namei.h @@ -2,14 +2,55 @@ #define _LINUX_NAMEI_H @@ -775,7 +1436,7 @@ /* * Intent data -@@ -56,6 +96,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -56,11 +96,18 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); @@ -788,11 +1449,9 @@ #define user_path_walk(name,nd) \ __user_walk(name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ -@@ -68,7 +114,6 @@ extern void path_release_on_umount(struc - - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -- - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - + __user_walk(name, 0, nd) + extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); ++extern int FASTCALL(path_lookup_it(const char *, unsigned, struct nameidata *)); + extern int FASTCALL(path_walk(const char *, struct nameidata *)); + extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); + extern void path_release(struct nameidata *); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch index 8cd62d5..9071314 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch @@ -1,157 +1,96 @@ ---- - fs/9p/vfs_inode.c | 2 - fs/exec.c | 8 ++ - fs/fuse/dir.c | 2 - fs/inode.c | 1 - fs/namei.c | 137 ++++++++++++++++++++++++++++++++++++++++++------- - fs/namespace.c | 10 +++ - fs/nfsctl.c | 1 - fs/open.c | 31 ++++++----- - fs/stat.c | 24 ++++++-- - include/linux/dcache.h | 3 + - include/linux/fs.h | 10 +++ - include/linux/mount.h | 2 - include/linux/namei.h | 65 ++++++++++++++++++++--- - 13 files changed, 248 insertions(+), 48 deletions(-) - ---- linux-2.6.16.21-0.8.orig/fs/inode.c -+++ linux-2.6.16.21-0.8/fs/inode.c -@@ -236,6 +236,7 @@ void __iget(struct inode * inode) - inodes_stat.nr_unused--; - } - -+EXPORT_SYMBOL(__iget); - /** - * clear_inode - clear an inode - * @inode: inode to clear ---- linux-2.6.16.21-0.8.orig/fs/open.c -+++ linux-2.6.16.21-0.8/fs/open.c -@@ -224,12 +224,12 @@ static long do_sys_truncate(const char _ - struct nameidata nd; - struct inode * inode; - int error; -- -+ intent_init(&nd.intent, IT_GETATTR); - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -494,6 +494,7 @@ asmlinkage long sys_faccessat(int dfd, c - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ intent_init(&nd.intent, IT_GETATTR); +Index: LINUX-SRC-TREE/fs/9p/vfs_inode.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/9p/vfs_inode.c ++++ LINUX-SRC-TREE/fs/9p/vfs_inode.c +@@ -469,7 +469,7 @@ v9fs_vfs_create(struct inode *dir, struc + perm = unixmode2p9mode(v9ses, mode); - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -518,7 +519,7 @@ asmlinkage long sys_faccessat(int dfd, c + if (nd && nd->flags & LOOKUP_OPEN) +- flags = nd->intent.open.flags - 1; ++ flags = nd->intent.flags - 1; else - current->cap_effective = current->cap_permitted; + flags = O_RDWR; -- res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); -+ res = __user_walk_fd_it(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); - if (!res) { - res = vfs_permission(&nd, mode); - /* SuS v2 requires we report a read only fs too */ -@@ -544,8 +545,9 @@ asmlinkage long sys_chdir(const char __u - { +Index: LINUX-SRC-TREE/fs/cifs/dir.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/cifs/dir.c ++++ LINUX-SRC-TREE/fs/cifs/dir.c +@@ -157,11 +157,7 @@ cifs_create(struct inode *inode, struct + + #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 5, 0) + if(nd && (nd->flags & LOOKUP_OPEN)) { +-#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,5) /* SUSE included Lustre patch */ + int oflags = nd->intent.it_flags; +-#else +- int oflags = nd->intent.open.flags; +-#endif + + desiredAccess = 0; + if (oflags & FMODE_READ) +Index: LINUX-SRC-TREE/fs/exec.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/exec.c ++++ LINUX-SRC-TREE/fs/exec.c +@@ -129,7 +129,9 @@ asmlinkage long sys_uselib(const char __ struct nameidata nd; int error; -+ intent_init(&nd.intent, IT_GETATTR); -- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); -+ error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); +- error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); ++ intent_init(&nd.intent, IT_OPEN); ++ error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, ++ FMODE_READ | FMODE_EXEC); if (error) goto out; -@@ -595,8 +597,9 @@ asmlinkage long sys_chroot(const char __ - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - -- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); - if (error) - goto out; +@@ -481,7 +483,9 @@ struct file *open_exec(const char *name) + int err; + struct file *file; -@@ -817,6 +820,7 @@ static struct file *__dentry_open(struct - error = open(inode, f); - if (error) - goto cleanup_all; -+ intent_release(f->f_it); - } +- err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ); ++ intent_init(&nd.intent, IT_OPEN); ++ err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, ++ FMODE_READ | FMODE_EXEC, 0); + file = ERR_PTR(err); - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -@@ -843,6 +847,7 @@ cleanup_all: - f->f_dentry = NULL; - f->f_vfsmnt = NULL; - cleanup_file: -+ intent_release(f->f_it); - put_filp(f); - dput(dentry); - mntput(mnt); -@@ -868,6 +873,7 @@ static struct file *do_filp_open(int dfd - { - int namei_flags, error; - struct nameidata nd; -+ intent_init(&nd.intent, IT_OPEN); + if (!err) { +@@ -1543,7 +1547,7 @@ int do_coredump(long signr, int exit_cod + goto close_fail; + if (!file->f_op->write) + goto close_fail; +- if (do_truncate(file->f_dentry, 0, 0, file) != 0) ++ if (do_truncate(file->f_dentry, 0, 0, file, 0) != 0) + goto close_fail; + + retval = binfmt->core_dump(signr, regs, file); +Index: LINUX-SRC-TREE/fs/fuse/dir.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/fuse/dir.c ++++ LINUX-SRC-TREE/fs/fuse/dir.c +@@ -242,7 +242,7 @@ static int fuse_create_open(struct inode + struct fuse_entry_out outentry; + struct fuse_file *ff; + struct file *file; +- int flags = nd->intent.open.flags - 1; ++ int flags = nd->intent.flags - 1; - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -904,19 +910,19 @@ EXPORT_SYMBOL(filp_open); - struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) - { -- if (IS_ERR(nd->intent.open.file)) -+ if (IS_ERR(nd->intent.file)) - goto out; - if (IS_ERR(dentry)) - goto out_err; -- nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), -- nd->intent.open.flags - 1, -- nd->intent.open.file, -+ nd->intent.file = __dentry_open(dget(dentry), mntget(nd->mnt), -+ nd->intent.flags - 1, -+ nd->intent.file, - open); - out: -- return nd->intent.open.file; -+ return nd->intent.file; - out_err: - release_open_intent(nd); -- nd->intent.open.file = (struct file *)dentry; -+ nd->intent.file = (struct file *)dentry; - goto out; + err = -ENOSYS; + if (fc->no_create) +Index: LINUX-SRC-TREE/fs/inode.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/inode.c ++++ LINUX-SRC-TREE/fs/inode.c +@@ -236,6 +236,7 @@ void __iget(struct inode * inode) + inodes_stat.nr_unused--; } - EXPORT_SYMBOL_GPL(lookup_instantiate_filp); -@@ -933,7 +939,8 @@ struct file *nameidata_to_filp(struct na - struct file *filp; - - /* Pick up the filp from the open intent */ -- filp = nd->intent.open.file; -+ filp = nd->intent.file; -+ filp->f_it = &nd->intent; - /* Has the filesystem initialised the file for us? */ - if (filp->f_dentry == NULL) - filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); ---- linux-2.6.16.21-0.8.orig/fs/nfsctl.c -+++ linux-2.6.16.21-0.8/fs/nfsctl.c -@@ -26,6 +26,7 @@ static struct file *do_open(char *name, - struct nameidata nd; - int error; - -+ intent_init(&nd.intent, IT_OPEN); - nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); - if (IS_ERR(nd.mnt)) ---- linux-2.6.16.21-0.8.orig/fs/namei.c -+++ linux-2.6.16.21-0.8/fs/namei.c ++EXPORT_SYMBOL(__iget); + /** + * clear_inode - clear an inode + * @inode: inode to clear +Index: LINUX-SRC-TREE/fs/namei.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/namei.c ++++ LINUX-SRC-TREE/fs/namei.c @@ -337,8 +337,19 @@ int deny_write_access(struct file * file return 0; } @@ -314,18 +253,54 @@ if (err) break; inode = next.dentry->d_inode; -@@ -1148,13 +1218,13 @@ static int __path_lookup_intent_open(int +@@ -1066,7 +1136,7 @@ set_it: + } + + /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ +-static int fastcall do_path_lookup(int dfd, const char *name, ++static int fastcall do_path_lookup_it(int dfd, const char *name, + unsigned int flags, struct nameidata *nd) + { + int retval = 0; +@@ -1134,10 +1204,23 @@ fput_fail: + goto out_fail; + } + +-int fastcall path_lookup(const char *name, unsigned int flags, ++static int fastcall do_path_lookup(int dfd, const char *name, ++ unsigned int flags, struct nameidata *nd) ++{ ++ intent_init(&nd->intent, IT_GETATTR); ++ return do_path_lookup_it(dfd, name, flags, nd); ++} ++ ++int fastcall path_lookup_it(const char *name, unsigned int flags, + struct nameidata *nd) + { +- return do_path_lookup(AT_FDCWD, name, flags, nd); ++ return do_path_lookup_it(AT_FDCWD, name, flags, nd); ++} ++ ++int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) ++{ ++ intent_init(&nd->intent, IT_GETATTR); ++ return path_lookup_it(name, flags, nd); + } + + static int __path_lookup_intent_open(int dfd, const char *name, +@@ -1149,13 +1232,13 @@ static int __path_lookup_intent_open(int if (filp == NULL) return -ENFILE; - nd->intent.open.file = filp; - nd->intent.open.flags = open_flags; - nd->intent.open.create_mode = create_mode; +- err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); +- if (IS_ERR(nd->intent.open.file)) { + nd->intent.file = filp; + nd->intent.flags = open_flags; + nd->intent.create_mode = create_mode; - err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); -- if (IS_ERR(nd->intent.open.file)) { ++ err = do_path_lookup_it(dfd, name, lookup_flags|LOOKUP_OPEN, nd); + if (IS_ERR(nd->intent.file)) { if (err == 0) { - err = PTR_ERR(nd->intent.open.file); @@ -333,7 +308,20 @@ path_release(nd); } } else if (err != 0) -@@ -1257,7 +1327,7 @@ struct dentry * lookup_hash(struct namei +@@ -1172,10 +1255,10 @@ static int __path_lookup_intent_open(int + * @open_flags: open intent flags + */ + int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, +- struct nameidata *nd, int open_flags) ++ struct nameidata *nd, int open_flags, int create_mode) + { + return __path_lookup_intent_open(dfd, name, lookup_flags, nd, +- open_flags, 0); ++ open_flags, create_mode); + } + + /** +@@ -1258,7 +1341,7 @@ struct dentry * lookup_hash(struct namei } /* SMP-safe */ @@ -342,7 +330,7 @@ { unsigned long hash; struct qstr this; -@@ -1277,11 +1347,17 @@ struct dentry * lookup_one_len(const cha +@@ -1278,11 +1361,17 @@ struct dentry * lookup_one_len(const cha } this.hash = end_name_hash(hash); @@ -361,7 +349,7 @@ /* * namei() * -@@ -1293,8 +1369,9 @@ access: +@@ -1294,22 +1383,36 @@ access: * that namei follows links, while lnamei does not. * SMP-safe */ @@ -373,7 +361,12 @@ { char *tmp = getname(name); int err = PTR_ERR(tmp); -@@ -1306,9 +1383,22 @@ int fastcall __user_walk_fd(int dfd, con + + if (!IS_ERR(tmp)) { +- err = do_path_lookup(dfd, tmp, flags, nd); ++ err = do_path_lookup_it(dfd, tmp, flags, nd); + putname(tmp); + } return err; } @@ -397,16 +390,25 @@ } /* -@@ -1588,6 +1678,8 @@ int open_namei(int dfd, const char *path - if (flag & O_APPEND) - acc_mode |= MAY_APPEND; - -+ nd->intent.it_flags = flag; -+ nd->intent.it_create_mode = mode; - /* - * The simplest case - just a plain lookup. +@@ -1545,7 +1648,7 @@ int may_open(struct nameidata *nd, int a + if (!error) { + DQUOT_INIT(inode); + +- error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); ++ error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL, 1); + } + put_write_access(inode); + if (error) +@@ -1595,7 +1698,7 @@ int open_namei(int dfd, const char *path */ -@@ -1602,6 +1694,7 @@ int open_namei(int dfd, const char *path + if (!(flag & O_CREAT)) { + error = path_lookup_open(dfd, pathname, lookup_flags(flag), +- nd, flag); ++ nd, flag, mode); + if (error) + return error; + goto ok; +@@ -1604,6 +1707,7 @@ int open_namei(int dfd, const char *path /* * Create - we need to know the parent. */ @@ -414,7 +416,7 @@ error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); if (error) return error; -@@ -1618,7 +1711,9 @@ int open_namei(int dfd, const char *path +@@ -1620,7 +1724,9 @@ int open_namei(int dfd, const char *path dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; mutex_lock(&dir->d_inode->i_mutex); @@ -424,7 +426,7 @@ path.mnt = nd->mnt; do_last: -@@ -1628,9 +1723,9 @@ do_last: +@@ -1630,9 +1736,9 @@ do_last: goto exit; } @@ -436,7 +438,7 @@ goto exit_dput; } -@@ -1683,7 +1778,7 @@ ok: +@@ -1687,7 +1793,7 @@ ok: exit_dput: dput_path(&path, nd); exit: @@ -445,7 +447,7 @@ release_open_intent(nd); path_release(nd); return error; -@@ -1726,7 +1821,9 @@ do_link: +@@ -1736,7 +1842,9 @@ do_link: } dir = nd->dentry; mutex_lock(&dir->d_inode->i_mutex); @@ -455,49 +457,761 @@ path.mnt = nd->mnt; __putname(nd->last.name); goto do_last; -@@ -2238,6 +2335,8 @@ asmlinkage long sys_linkat(int olddfd, c - int error; - char * to; +@@ -1821,15 +1929,26 @@ asmlinkage long sys_mknodat(int dfd, con + struct dentry * dentry; + struct nameidata nd; ++ + if (S_ISDIR(mode)) + return -EPERM; + tmp = getname(filename); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + +- error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); + intent_init(&nd.intent, IT_LOOKUP); -+ intent_init(&old_nd.intent, IT_LOOKUP); - if (flags != 0) - return -EINVAL; ++ error = do_path_lookup_it(dfd, tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ if (nd.dentry->d_inode->i_op->mknod_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mknod_raw(&nd, mode, dev); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + +@@ -1856,6 +1975,7 @@ asmlinkage long sys_mknodat(int dfd, con + dput(dentry); + } + mutex_unlock(&nd.dentry->d_inode->i_mutex); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1901,9 +2021,18 @@ asmlinkage long sys_mkdirat(int dfd, con + struct dentry *dentry; + struct nameidata nd; + +- error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = do_path_lookup_it(dfd, tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mkdir_raw(&nd, mode); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1913,6 +2042,7 @@ asmlinkage long sys_mkdirat(int dfd, con + dput(dentry); + } + mutex_unlock(&nd.dentry->d_inode->i_mutex); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1997,8 +2127,9 @@ static long do_rmdir(int dfd, const char + name = getname(pathname); + if(IS_ERR(name)) + return PTR_ERR(name); +- +- error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); ++ ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = do_path_lookup_it(dfd, name, LOOKUP_PARENT, &nd); + if (error) + goto exit; -@@ -2245,7 +2344,7 @@ asmlinkage long sys_linkat(int olddfd, c +@@ -2013,6 +2144,14 @@ static long do_rmdir(int dfd, const char + error = -EBUSY; + goto exit1; + } ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } + mutex_lock(&nd.dentry->d_inode->i_mutex); + dentry = lookup_hash(&nd); + error = PTR_ERR(dentry); +@@ -2081,12 +2220,20 @@ static long do_unlinkat(int dfd, const c + if(IS_ERR(name)) + return PTR_ERR(name); + +- error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = do_path_lookup_it(dfd, name, LOOKUP_PARENT, &nd); + if (error) + goto exit; + error = -EISDIR; + if (nd.last_type != LAST_NORM) + goto exit1; ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } + mutex_lock(&nd.dentry->d_inode->i_mutex); + dentry = lookup_hash(&nd); + error = PTR_ERR(dentry); +@@ -2169,9 +2316,17 @@ asmlinkage long sys_symlinkat(const char + struct dentry *dentry; + struct nameidata nd; + +- error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = do_path_lookup_it(newdfd, to, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->symlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->symlink_raw(&nd, from); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -2179,6 +2334,7 @@ asmlinkage long sys_symlinkat(const char + dput(dentry); + } + mutex_unlock(&nd.dentry->d_inode->i_mutex); ++out2: + path_release(&nd); + out: + putname(to); +@@ -2255,15 +2411,25 @@ asmlinkage long sys_linkat(int olddfd, c if (IS_ERR(to)) return PTR_ERR(to); - error = __user_walk_fd(olddfd, oldname, 0, &old_nd); ++ intent_init(&old_nd.intent, IT_LOOKUP); + error = __user_walk_fd_it(olddfd, oldname, 0, &old_nd); if (error) goto exit; - error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); ---- linux-2.6.16.21-0.8.orig/fs/stat.c -+++ linux-2.6.16.21-0.8/fs/stat.c -@@ -38,7 +38,7 @@ void generic_fillattr(struct inode *inod - - EXPORT_SYMBOL(generic_fillattr); +- error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); ++ ++ intent_init(&nd.intent, IT_LOOKUP); ++ error = do_path_lookup_it(newdfd, to, LOOKUP_PARENT, &nd); + if (error) + goto out; + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out_release; ++ if (nd.dentry->d_inode->i_op->link_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->link_raw(&old_nd, &nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out_release; ++ } + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + if (!IS_ERR(new_dentry)) { +@@ -2440,12 +2606,14 @@ static int do_rename(int olddfd, const c + struct dentry * old_dentry, *new_dentry; + struct dentry * trap; + struct nameidata oldnd, newnd; +- +- error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); ++ ++ intent_init(&oldnd.intent, IT_LOOKUP); ++ error = do_path_lookup_it(olddfd, oldname, LOOKUP_PARENT, &oldnd); + if (error) + goto exit; +- +- error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); ++ ++ intent_init(&newnd.intent, IT_LOOKUP); ++ error = do_path_lookup_it(newdfd, newname, LOOKUP_PARENT, &newnd); + if (error) + goto exit1; --int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) - { - struct inode *inode = dentry->d_inode; - int retval; -@@ -47,6 +47,8 @@ int vfs_getattr(struct vfsmount *mnt, st - if (retval) - return retval; +@@ -2462,6 +2630,13 @@ static int do_rename(int olddfd, const c + if (newnd.last_type != LAST_NORM) + goto exit2; -+ if (inode->i_op->getattr_it) -+ return inode->i_op->getattr_it(mnt, dentry, it, stat); - if (inode->i_op->getattr) - return inode->i_op->getattr(mnt, dentry, stat); ++ if (old_dir->d_inode->i_op->rename_raw) { ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } ++ + trap = lock_rename(new_dir, old_dir); + + old_dentry = lookup_hash(&oldnd); +@@ -2493,8 +2668,7 @@ static int do_rename(int olddfd, const c + if (new_dentry == trap) + goto exit5; + +- error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + exit5: + dput(new_dentry); + exit4: +@@ -2700,6 +2874,7 @@ EXPORT_SYMBOL(__page_symlink); + EXPORT_SYMBOL(page_symlink); + EXPORT_SYMBOL(page_symlink_inode_operations); + EXPORT_SYMBOL(path_lookup); ++EXPORT_SYMBOL(path_lookup_it); + EXPORT_SYMBOL(path_release); + EXPORT_SYMBOL(path_walk); + EXPORT_SYMBOL(permission); +Index: LINUX-SRC-TREE/fs/namespace.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/namespace.c ++++ LINUX-SRC-TREE/fs/namespace.c +@@ -75,6 +75,7 @@ struct vfsmount *alloc_vfsmnt(const char + INIT_LIST_HEAD(&mnt->mnt_share); + INIT_LIST_HEAD(&mnt->mnt_slave_list); + INIT_LIST_HEAD(&mnt->mnt_slave); ++ INIT_LIST_HEAD(&mnt->mnt_lustre_list); + if (name) { + int size = strlen(name) + 1; + char *newname = kmalloc(size, GFP_KERNEL); +@@ -155,6 +156,7 @@ static void __touch_namespace(struct nam -@@ -61,6 +63,11 @@ int vfs_getattr(struct vfsmount *mnt, st - return 0; + static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) + { ++ memset(old_nd, 0, sizeof(*old_nd)); + old_nd->dentry = mnt->mnt_mountpoint; + old_nd->mnt = mnt->mnt_parent; + mnt->mnt_parent = mnt; +@@ -273,6 +275,9 @@ static inline void __mntput(struct vfsmo + { + struct super_block *sb = mnt->mnt_sb; + dput(mnt->mnt_root); ++ spin_lock(&dcache_lock); ++ list_del(&mnt->mnt_lustre_list); ++ spin_unlock(&dcache_lock); + free_vfsmnt(mnt); + deactivate_super(sb); } +@@ -539,6 +544,8 @@ static int do_umount(struct vfsmount *mn + */ -+int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) + lock_kernel(); ++ if (sb->s_op->umount_lustre) ++ sb->s_op->umount_lustre(sb); + if ((flags & MNT_FORCE) && sb->s_op->umount_begin) + sb->s_op->umount_begin(sb); + unlock_kernel(); +@@ -871,7 +878,8 @@ static int do_loopback(struct nameidata + return err; + if (!old_name || !*old_name) + return -EINVAL; +- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); ++ intent_init(&old_nd.intent, IT_LOOKUP); ++ err = path_lookup_it(old_name, LOOKUP_FOLLOW, &old_nd); + if (err) + return err; + +@@ -956,7 +964,8 @@ static int do_move_mount(struct nameidat + return -EPERM; + if (!old_name || !*old_name) + return -EINVAL; +- err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); ++ intent_init(&old_nd.intent, IT_LOOKUP); ++ err = path_lookup_it(old_name, LOOKUP_FOLLOW, &old_nd); + if (err) + return err; + +@@ -1271,6 +1280,7 @@ long do_mount(char *dev_name, char *dir_ + int retval = 0; + int mnt_flags = 0; + ++ + /* Discard magic */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; +@@ -1301,7 +1311,8 @@ long do_mount(char *dev_name, char *dir_ + MS_NOATIME | MS_NODIRATIME); + + /* ... and get the mountpoint */ +- retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); ++ intent_init(&nd.intent, IT_LOOKUP); ++ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW, &nd); + if (retval) + return retval; + +Index: LINUX-SRC-TREE/fs/nfs/dir.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/nfs/dir.c ++++ LINUX-SRC-TREE/fs/nfs/dir.c +@@ -834,7 +834,7 @@ int nfs_is_exclusive_create(struct inode + return 0; + if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) + return 0; +- return (nd->intent.open.flags & O_EXCL) != 0; ++ return (nd->intent.it_flags & O_EXCL) != 0; + } + + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +@@ -911,7 +911,7 @@ static int is_atomic_open(struct inode * + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ +- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) ++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; + } +@@ -932,7 +932,7 @@ static struct dentry *nfs_atomic_lookup( + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ +- if (nd->intent.open.flags & O_EXCL) { ++ if (nd->intent.it_flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } +@@ -947,7 +947,7 @@ static struct dentry *nfs_atomic_lookup( + goto out; + } + +- if (nd->intent.open.flags & O_CREAT) { ++ if (nd->intent.it_flags & O_CREAT) { + nfs_begin_data_update(dir); + res = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); +@@ -966,7 +966,7 @@ static struct dentry *nfs_atomic_lookup( + case -ENOTDIR: + goto no_open; + case -ELOOP: +- if (!(nd->intent.open.flags & O_NOFOLLOW)) ++ if (!(nd->intent.it_flags & O_NOFOLLOW)) + goto no_open; + /* case -EINVAL: */ + default: +@@ -1002,7 +1002,7 @@ static int nfs_open_revalidate(struct de + /* NFS only supports OPEN on regular files */ + if (!S_ISREG(inode->i_mode)) + goto no_open; +- openflags = nd->intent.open.flags; ++ openflags = nd->intent.it_flags; + /* We cannot do exclusive creation on a positive dentry */ + if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + goto no_open; +@@ -1138,7 +1138,7 @@ static int nfs_create(struct inode *dir, + attr.ia_valid = ATTR_MODE; + + if (nd && (nd->flags & LOOKUP_CREATE)) +- open_flags = nd->intent.open.flags; ++ open_flags = nd->intent.it_flags; + + lock_kernel(); + nfs_begin_data_update(dir); +Index: LINUX-SRC-TREE/fs/nfs/nfs4proc.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/nfs/nfs4proc.c ++++ LINUX-SRC-TREE/fs/nfs/nfs4proc.c +@@ -1220,7 +1220,7 @@ static void nfs4_intent_set_file(struct + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + } else +- nfs4_close_state(state, nd->intent.open.flags); ++ nfs4_close_state(state, nd->intent.flags); + } + + struct dentry * +@@ -1232,19 +1232,19 @@ nfs4_atomic_open(struct inode *dir, stru + struct dentry *res; + + if (nd->flags & LOOKUP_CREATE) { +- attr.ia_mode = nd->intent.open.create_mode; ++ attr.ia_mode = nd->intent.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; +- BUG_ON(nd->intent.open.flags & O_CREAT); ++ BUG_ON(nd->intent.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) + return (struct dentry *)cred; +- state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, dentry, nd->intent.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) +Index: LINUX-SRC-TREE/fs/nfsctl.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/nfsctl.c ++++ LINUX-SRC-TREE/fs/nfsctl.c +@@ -26,6 +26,7 @@ static struct file *do_open(char *name, + struct nameidata nd; + int error; + ++ intent_init(&nd.intent, IT_OPEN); + nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); + + if (IS_ERR(nd.mnt)) +Index: LINUX-SRC-TREE/fs/open.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/open.c ++++ LINUX-SRC-TREE/fs/open.c +@@ -198,9 +198,10 @@ out: + } + + int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, +- struct file *filp) ++ struct file *filp, int called_from_open) + { + int err; ++ struct inode_operations *op = dentry->d_inode->i_op; + struct iattr newattrs; + + /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ +@@ -215,7 +216,17 @@ int do_truncate(struct dentry *dentry, l + } + + mutex_lock(&dentry->d_inode->i_mutex); +- err = notify_change(dentry, &newattrs); ++ if (called_from_open) ++ newattrs.ia_valid |= ATTR_FROM_OPEN; ++ if (op->setattr_raw) { ++ newattrs.ia_valid |= ATTR_RAW; ++ newattrs.ia_ctime = CURRENT_TIME; ++ down_write(&dentry->d_inode->i_alloc_sem); ++ err = op->setattr_raw(dentry->d_inode, &newattrs); ++ up_write(&dentry->d_inode->i_alloc_sem); ++ } else ++ err = notify_change(dentry, &newattrs); ++ + mutex_unlock(&dentry->d_inode->i_mutex); + return err; + } +@@ -225,12 +236,12 @@ static long do_sys_truncate(const char _ + struct nameidata nd; + struct inode * inode; + int error; +- ++ intent_init(&nd.intent, IT_GETATTR); + error = -EINVAL; + if (length < 0) /* sorry, but loff_t says... */ + goto out; + +- error = user_path_walk(path, &nd); ++ error = user_path_walk_it(path, &nd); + if (error) + goto out; + inode = nd.dentry->d_inode; +@@ -270,7 +281,7 @@ static long do_sys_truncate(const char _ + error = locks_verify_truncate(inode, NULL, length); + if (!error) { + DQUOT_INIT(inode); +- error = do_truncate(nd.dentry, length, 0, NULL); ++ error = do_truncate(nd.dentry, length, 0, NULL, 0); + } + put_write_access(inode); + +@@ -322,7 +333,7 @@ static long do_sys_ftruncate(unsigned in + + error = locks_verify_truncate(inode, file, length); + if (!error) +- error = do_truncate(dentry, length, 0, file); ++ error = do_truncate(dentry, length, 0, file, 0); + out_putf: + fput(file); + out: +@@ -407,9 +418,20 @@ asmlinkage long sys_utime(char __user * + (error = vfs_permission(&nd, MAY_WRITE)) != 0) + goto dput_and_out; + } +- mutex_lock(&inode->i_mutex); +- error = notify_change(nd.dentry, &newattrs); +- mutex_unlock(&inode->i_mutex); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ mutex_lock(&inode->i_mutex); ++ error = notify_change(nd.dentry, &newattrs); ++ mutex_unlock(&inode->i_mutex); ++ } ++ + dput_and_out: + path_release(&nd); + out: +@@ -495,6 +517,7 @@ asmlinkage long sys_faccessat(int dfd, c + int old_fsuid, old_fsgid; + kernel_cap_t old_cap; + int res; ++ intent_init(&nd.intent, IT_GETATTR); + + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; +@@ -519,7 +542,7 @@ asmlinkage long sys_faccessat(int dfd, c + else + current->cap_effective = current->cap_permitted; + +- res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); ++ res = __user_walk_fd_it(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); + if (!res) { + res = vfs_permission(&nd, mode); + /* SuS v2 requires we report a read only fs too */ +@@ -545,8 +568,9 @@ asmlinkage long sys_chdir(const char __u + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); ++ error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + if (error) + goto out; + +@@ -596,8 +620,9 @@ asmlinkage long sys_chroot(const char __ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); ++ error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + if (error) + goto out; + +@@ -618,38 +643,55 @@ out: + return error; + } + +-asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) ++int chmod_common(struct dentry *dentry, mode_t mode) + { +- struct inode * inode; +- struct dentry * dentry; +- struct file * file; +- int err = -EBADF; ++ struct inode * inode = dentry->d_inode; + struct iattr newattrs; ++ int error = -EROFS; + +- file = fget(fd); +- if (!file) ++ if (IS_RDONLY(inode)) + goto out; + +- dentry = file->f_dentry; +- inode = dentry->d_inode; ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; + +- audit_inode(NULL, inode); ++ newattrs.ia_mode = mode; ++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out; ++ } + +- err = -EROFS; +- if (IS_RDONLY(inode)) +- goto out_putf; +- err = -EPERM; ++ error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) +- goto out_putf; ++ goto out; ++ + mutex_lock(&inode->i_mutex); + if (mode == (mode_t) -1) + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- err = notify_change(dentry, &newattrs); ++ error = notify_change(dentry, &newattrs); + mutex_unlock(&inode->i_mutex); ++out: ++ return error; ++} ++ ++asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) ++{ ++ struct file * file; ++ int err = -EBADF; ++ ++ file = fget(fd); ++ if (!file) ++ goto out; ++ ++ audit_inode(NULL, file->f_dentry->d_inode); ++ ++ err = chmod_common(file->f_dentry, mode); + +-out_putf: + fput(file); + out: + return err; +@@ -659,32 +701,12 @@ asmlinkage long sys_fchmodat(int dfd, co + mode_t mode) + { + struct nameidata nd; +- struct inode * inode; + int error; +- struct iattr newattrs; + + error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); + if (error) + goto out; +- inode = nd.dentry->d_inode; +- +- error = -EROFS; +- if (IS_RDONLY(inode)) +- goto dput_and_out; +- +- error = -EPERM; +- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) +- goto dput_and_out; +- +- mutex_lock(&inode->i_mutex); +- if (mode == (mode_t) -1) +- mode = inode->i_mode; +- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); +- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- error = notify_change(nd.dentry, &newattrs); +- mutex_unlock(&inode->i_mutex); +- +-dput_and_out: ++ error = chmod_common(nd.dentry, mode); + path_release(&nd); + out: + return error; +@@ -710,6 +732,18 @@ static int chown_common(struct dentry * + if (IS_RDONLY(inode)) + goto out; + error = -EPERM; ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; ++ ++ newattrs.ia_uid = user; ++ newattrs.ia_gid = group; ++ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ return error; ++ } + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + newattrs.ia_valid = ATTR_CTIME; +@@ -823,6 +857,7 @@ static struct file *__dentry_open(struct + error = open(inode, f); + if (error) + goto cleanup_all; ++ intent_release(f->f_it); + } + + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); +@@ -849,6 +884,7 @@ cleanup_all: + f->f_dentry = NULL; + f->f_vfsmnt = NULL; + cleanup_file: ++ intent_release(f->f_it); + put_filp(f); + dput(dentry); + mntput(mnt); +@@ -874,6 +910,7 @@ static struct file *do_filp_open(int dfd + { + int namei_flags, error; + struct nameidata nd; ++ intent_init(&nd.intent, IT_OPEN); + + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) +@@ -910,19 +947,19 @@ EXPORT_SYMBOL(filp_open); + struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)) + { +- if (IS_ERR(nd->intent.open.file)) ++ if (IS_ERR(nd->intent.file)) + goto out; + if (IS_ERR(dentry)) + goto out_err; +- nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), +- nd->intent.open.flags - 1, +- nd->intent.open.file, ++ nd->intent.file = __dentry_open(dget(dentry), mntget(nd->mnt), ++ nd->intent.flags - 1, ++ nd->intent.file, + open); + out: +- return nd->intent.open.file; ++ return nd->intent.file; + out_err: + release_open_intent(nd); +- nd->intent.open.file = (struct file *)dentry; ++ nd->intent.file = (struct file *)dentry; + goto out; + } + EXPORT_SYMBOL_GPL(lookup_instantiate_filp); +@@ -939,7 +976,8 @@ struct file *nameidata_to_filp(struct na + struct file *filp; + + /* Pick up the filp from the open intent */ +- filp = nd->intent.open.file; ++ filp = nd->intent.file; ++ filp->f_it = &nd->intent; + /* Has the filesystem initialised the file for us? */ + if (filp->f_dentry == NULL) + filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); +Index: LINUX-SRC-TREE/fs/stat.c +=================================================================== +--- LINUX-SRC-TREE.orig/fs/stat.c ++++ LINUX-SRC-TREE/fs/stat.c +@@ -38,7 +38,7 @@ void generic_fillattr(struct inode *inod + + EXPORT_SYMBOL(generic_fillattr); + +-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) + { + struct inode *inode = dentry->d_inode; + int retval; +@@ -47,6 +47,8 @@ int vfs_getattr(struct vfsmount *mnt, st + if (retval) + return retval; + ++ if (inode->i_op->getattr_it) ++ return inode->i_op->getattr_it(mnt, dentry, it, stat); + if (inode->i_op->getattr) + return inode->i_op->getattr(mnt, dentry, stat); + +@@ -61,6 +63,11 @@ int vfs_getattr(struct vfsmount *mnt, st + return 0; + } + ++int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + return vfs_getattr_it(mnt, dentry, NULL, stat); +} @@ -545,93 +1259,10 @@ fput(f); } return error; ---- linux-2.6.16.21-0.8.orig/fs/namespace.c -+++ linux-2.6.16.21-0.8/fs/namespace.c -@@ -75,6 +75,7 @@ struct vfsmount *alloc_vfsmnt(const char - INIT_LIST_HEAD(&mnt->mnt_share); - INIT_LIST_HEAD(&mnt->mnt_slave_list); - INIT_LIST_HEAD(&mnt->mnt_slave); -+ INIT_LIST_HEAD(&mnt->mnt_lustre_list); - if (name) { - int size = strlen(name) + 1; - char *newname = kmalloc(size, GFP_KERNEL); -@@ -155,6 +156,7 @@ static void __touch_namespace(struct nam - - static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) - { -+ memset(old_nd, 0, sizeof(*old_nd)); - old_nd->dentry = mnt->mnt_mountpoint; - old_nd->mnt = mnt->mnt_parent; - mnt->mnt_parent = mnt; -@@ -273,6 +275,9 @@ static inline void __mntput(struct vfsmo - { - struct super_block *sb = mnt->mnt_sb; - dput(mnt->mnt_root); -+ spin_lock(&dcache_lock); -+ list_del(&mnt->mnt_lustre_list); -+ spin_unlock(&dcache_lock); - free_vfsmnt(mnt); - deactivate_super(sb); - } -@@ -539,6 +544,8 @@ static int do_umount(struct vfsmount *mn - */ - - lock_kernel(); -+ if (sb->s_op->umount_lustre) -+ sb->s_op->umount_lustre(sb); - if ((flags & MNT_FORCE) && sb->s_op->umount_begin) - sb->s_op->umount_begin(sb); - unlock_kernel(); -@@ -871,6 +878,7 @@ static int do_loopback(struct nameidata - return err; - if (!old_name || !*old_name) - return -EINVAL; -+ intent_init(&old_nd.intent, IT_LOOKUP); - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); - if (err) - return err; -@@ -956,6 +964,7 @@ static int do_move_mount(struct nameidat - return -EPERM; - if (!old_name || !*old_name) - return -EINVAL; -+ intent_init(&old_nd.intent, IT_LOOKUP); - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); - if (err) - return err; -@@ -1271,6 +1280,7 @@ long do_mount(char *dev_name, char *dir_ - int retval = 0; - int mnt_flags = 0; - -+ intent_init(&nd.intent, IT_LOOKUP); - /* Discard magic */ - if ((flags & MS_MGC_MSK) == MS_MGC_VAL) - flags &= ~MS_MGC_MSK; ---- linux-2.6.16.21-0.8.orig/fs/exec.c -+++ linux-2.6.16.21-0.8/fs/exec.c -@@ -128,7 +128,9 @@ asmlinkage long sys_uselib(const char __ - struct nameidata nd; - int error; - -- error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); -+ intent_init(&nd.intent, IT_OPEN); -+ error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, -+ FMODE_READ | FMODE_EXEC); - if (error) - goto out; - -@@ -478,7 +480,9 @@ struct file *open_exec(const char *name) - int err; - struct file *file; - -- err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ); -+ intent_init(&nd.intent, IT_OPEN); -+ err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, -+ FMODE_READ | FMODE_EXEC); - file = ERR_PTR(err); - - if (!err) { ---- linux-2.6.16.21-0.8.orig/include/linux/dcache.h -+++ linux-2.6.16.21-0.8/include/linux/dcache.h +Index: LINUX-SRC-TREE/include/linux/dcache.h +=================================================================== +--- LINUX-SRC-TREE.orig/include/linux/dcache.h ++++ LINUX-SRC-TREE/include/linux/dcache.h @@ -36,6 +36,9 @@ struct qstr { const unsigned char *name; }; @@ -642,21 +1273,19 @@ struct dentry_stat_t { int nr_dentry; int nr_unused; ---- linux-2.6.16.21-0.8.orig/include/linux/fs.h -+++ linux-2.6.16.21-0.8/include/linux/fs.h -@@ -81,6 +81,11 @@ extern int leases_enable, dir_notify_ena - #define FMODE_PREAD 8 - #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ - -+/* File is being opened for execution. Primary users of this flag are -+ distributed filesystems that can use it to achieve correct ETXTBUSY -+ behavior for cross-node execution/opening_for_writing of files */ -+#define FMODE_EXEC 16 -+ - #define RW_MASK 1 - #define RWA_MASK 2 - #define READ 0 -@@ -265,6 +266,8 @@ typedef void (dio_iodone_t)(struct kiocb +Index: LINUX-SRC-TREE/include/linux/fs.h +=================================================================== +--- LINUX-SRC-TREE.orig/include/linux/fs.h ++++ LINUX-SRC-TREE/include/linux/fs.h +@@ -61,6 +61,7 @@ extern int dir_notify_enable; + + #define FMODE_READ 1 + #define FMODE_WRITE 2 ++#define FMODE_EXEC 16 + + /* Internal kernel extensions */ + #define FMODE_LSEEK 4 +@@ -272,6 +273,8 @@ typedef void (dio_iodone_t)(struct kiocb #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 #define ATTR_FILE 8192 @@ -665,7 +1294,7 @@ #define ATTR_NO_BLOCK 32768 /* Return EAGAIN and don't block on long truncates */ /* -@@ -500,6 +503,7 @@ struct inode { +@@ -517,6 +520,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; @@ -673,7 +1302,7 @@ __u32 i_generation; -@@ -647,6 +651,7 @@ struct file { +@@ -664,6 +668,7 @@ struct file { spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; @@ -681,7 +1310,27 @@ }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); -@@ -1049,7 +1054,9 @@ struct inode_operations { +@@ -1059,20 +1064,29 @@ struct inode_operations { + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + int (*link) (struct dentry *,struct inode *,struct dentry *); ++ int (*link_raw) (struct nameidata *,struct nameidata *); + int (*unlink) (struct inode *,struct dentry *); ++ int (*unlink_raw) (struct nameidata *); + int (*symlink) (struct inode *,struct dentry *,const char *); ++ int (*symlink_raw) (struct nameidata *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); ++ int (*mkdir_raw) (struct nameidata *,int); + int (*rmdir) (struct inode *,struct dentry *); ++ int (*rmdir_raw) (struct nameidata *); + int (*mknod) (struct inode *,struct dentry *,int,dev_t); ++ int (*mknod_raw) (struct nameidata *,int,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename_raw) (struct nameidata *, struct nameidata *); + int (*readlink) (struct dentry *, char __user *,int); + void * (*follow_link) (struct dentry *, struct nameidata *); + void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); @@ -691,7 +1340,7 @@ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -1090,6 +1097,7 @@ struct super_operations { +@@ -1113,6 +1127,7 @@ struct super_operations { int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); @@ -699,7 +1348,7 @@ int (*show_options)(struct seq_file *, struct vfsmount *); -@@ -1292,6 +1300,7 @@ extern int may_umount_tree(struct vfsmou +@@ -1322,6 +1337,7 @@ extern int may_umount_tree(struct vfsmou extern int may_umount(struct vfsmount *); extern void umount_tree(struct vfsmount *, int, struct list_head *); extern void release_mounts(struct list_head *); @@ -707,7 +1356,13 @@ extern long do_mount(char *, char *, char *, unsigned long, void *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, -@@ -1353,6 +1362,7 @@ extern long do_sys_open(int fdf, const c +@@ -1378,11 +1394,12 @@ static inline int break_lease(struct ino + /* fs/open.c */ + + extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, +- struct file *filp); ++ struct file *filp, int called_from_open); + extern long do_sys_open(int fdf, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); @@ -715,8 +1370,23 @@ extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); ---- linux-2.6.16.21-0.8.orig/include/linux/namei.h -+++ linux-2.6.16.21-0.8/include/linux/namei.h +Index: LINUX-SRC-TREE/include/linux/mount.h +=================================================================== +--- LINUX-SRC-TREE.orig/include/linux/mount.h ++++ LINUX-SRC-TREE/include/linux/mount.h +@@ -46,6 +46,8 @@ struct vfsmount { + struct list_head mnt_slave; /* slave list entry */ + struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ + struct namespace *mnt_namespace; /* containing namespace */ ++ struct list_head mnt_lustre_list; /* GNS mount list */ ++ unsigned long mnt_last_used; /* for GNS auto-umount (jiffies) */ + int mnt_pinned; + }; + +Index: LINUX-SRC-TREE/include/linux/namei.h +=================================================================== +--- LINUX-SRC-TREE.orig/include/linux/namei.h ++++ LINUX-SRC-TREE/include/linux/namei.h @@ -5,10 +5,39 @@ struct vfsmount; @@ -797,7 +1467,7 @@ /* * Intent data */ -@@ -57,10 +99,19 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -57,18 +99,29 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *)); @@ -815,38 +1485,16 @@ +extern void intent_release(struct lookup_intent *); + extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); ++extern int FASTCALL(path_lookup_it(const char *, unsigned, struct nameidata *)); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); ---- linux-2.6.16.21-0.8.orig/include/linux/mount.h -+++ linux-2.6.16.21-0.8/include/linux/mount.h -@@ -46,6 +46,8 @@ struct vfsmount { - struct list_head mnt_slave; /* slave list entry */ - struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ - struct namespace *mnt_namespace; /* containing namespace */ -+ struct list_head mnt_lustre_list; /* GNS mount list */ -+ unsigned long mnt_last_used; /* for GNS auto-umount (jiffies) */ - int mnt_pinned; - }; - ---- linux-2.6.16.21-0.8.orig/fs/9p/vfs_inode.c -+++ linux-2.6.16.21-0.8/fs/9p/vfs_inode.c -@@ -469,7 +469,7 @@ v9fs_vfs_create(struct inode *dir, struc - perm = unixmode2p9mode(v9ses, mode); - - if (nd && nd->flags & LOOKUP_OPEN) -- flags = nd->intent.open.flags - 1; -+ flags = nd->intent.flags - 1; - else - flags = O_RDWR; - ---- linux-2.6.16.21-0.8.orig/fs/fuse/dir.c -+++ linux-2.6.16.21-0.8/fs/fuse/dir.c -@@ -242,7 +242,7 @@ static int fuse_create_open(struct inode - struct fuse_entry_out outentry; - struct fuse_file *ff; - struct file *file; -- int flags = nd->intent.open.flags - 1; -+ int flags = nd->intent.flags - 1; - - err = -ENOSYS; - if (fc->no_create) + extern void path_release(struct nameidata *); + extern void path_release_on_umount(struct nameidata *); + + extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); +-extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); ++extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, ++ int open_flags, int create_mode); + extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)); + extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6.12.patch b/lustre/kernel_patches/patches/vfs_intent-2.6.12.patch index 966bf23..2e700ef 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.6.12.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.6.12.patch @@ -1,19 +1,6 @@ ---- - fs/exec.c | 14 +++--- - fs/inode.c | 1 - fs/namei.c | 99 +++++++++++++++++++++++++++++++++++++++++++++---- - fs/namespace.c | 10 ++++ - fs/open.c | 70 +++++++++++++++++++++------------- - fs/stat.c | 24 ++++++++--- - include/linux/dcache.h | 3 + - include/linux/fs.h | 10 ++++ - include/linux/mount.h | 2 - include/linux/namei.h | 61 ++++++++++++++++++++++++++---- - 10 files changed, 241 insertions(+), 53 deletions(-) - --- linux-2.6.12.6.orig/fs/exec.c +++ linux-2.6.12.6/fs/exec.c -@@ -122,9 +122,10 @@ asmlinkage long sys_uselib(const char __ +@@ -122,9 +122,10 @@ struct file * file; struct nameidata nd; int error; @@ -26,7 +13,7 @@ if (error) goto out; -@@ -136,7 +137,7 @@ asmlinkage long sys_uselib(const char __ +@@ -136,7 +137,7 @@ if (error) goto exit; @@ -35,7 +22,7 @@ error = PTR_ERR(file); if (IS_ERR(file)) goto out; -@@ -492,8 +493,9 @@ struct file *open_exec(const char *name) +@@ -492,8 +493,9 @@ int err; struct file *file; @@ -47,7 +34,7 @@ file = ERR_PTR(err); if (!err) { -@@ -506,7 +508,7 @@ struct file *open_exec(const char *name) +@@ -506,7 +508,7 @@ err = -EACCES; file = ERR_PTR(err); if (!err) { @@ -58,7 +45,7 @@ if (err) { --- linux-2.6.12.6.orig/fs/namei.c +++ linux-2.6.12.6/fs/namei.c -@@ -301,8 +301,19 @@ int deny_write_access(struct file * file +@@ -301,8 +301,19 @@ return 0; } @@ -78,7 +65,7 @@ dput(nd->dentry); mntput(nd->mnt); } -@@ -392,8 +403,11 @@ static struct dentry * real_lookup(struc +@@ -392,8 +403,11 @@ { struct dentry * result; struct inode *dir = parent->d_inode; @@ -90,7 +77,7 @@ /* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. -@@ -427,13 +441,16 @@ static struct dentry * real_lookup(struc +@@ -427,13 +441,16 @@ * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ @@ -109,7 +96,7 @@ return result; } -@@ -462,6 +479,7 @@ static inline int __vfs_follow_link(stru +@@ -462,6 +479,7 @@ { int res = 0; char *name; @@ -117,7 +104,7 @@ if (IS_ERR(link)) goto fail; -@@ -471,6 +489,7 @@ static inline int __vfs_follow_link(stru +@@ -471,6 +489,7 @@ /* weird __emul_prefix() stuff did it */ goto out; } @@ -125,7 +112,7 @@ res = link_path_walk(link, nd); out: if (nd->depth || res || nd->last_type!=LAST_NORM) -@@ -703,6 +722,33 @@ fail: +@@ -703,6 +722,33 @@ return PTR_ERR(dentry); } @@ -159,7 +146,7 @@ /* * Name resolution. * This is the basic name resolution function, turning a pathname into -@@ -800,7 +846,11 @@ static fastcall int __link_path_walk(con +@@ -800,7 +846,11 @@ goto out_dput; if (inode->i_op->follow_link) { @@ -171,7 +158,7 @@ if (err) goto return_err; err = -ENOENT; -@@ -839,6 +889,23 @@ last_component: +@@ -839,6 +889,23 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -195,7 +182,7 @@ goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { -@@ -846,7 +913,9 @@ last_component: +@@ -846,7 +913,9 @@ if (err < 0) break; } @@ -205,7 +192,7 @@ if (err) break; inode = next.dentry->d_inode; -@@ -1097,7 +1166,7 @@ struct dentry * lookup_hash(struct qstr +@@ -1097,7 +1166,7 @@ } /* SMP-safe */ @@ -214,7 +201,7 @@ { unsigned long hash; struct qstr this; -@@ -1117,11 +1186,16 @@ struct dentry * lookup_one_len(const cha +@@ -1117,11 +1186,16 @@ } this.hash = end_name_hash(hash); @@ -232,7 +219,7 @@ /* * namei() * -@@ -1133,7 +1207,7 @@ access: +@@ -1133,7 +1207,7 @@ * that namei follows links, while lnamei does not. * SMP-safe */ @@ -241,7 +228,7 @@ { char *tmp = getname(name); int err = PTR_ERR(tmp); -@@ -1145,6 +1219,12 @@ int fastcall __user_walk(const char __us +@@ -1145,6 +1219,12 @@ return err; } @@ -254,7 +241,7 @@ /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. -@@ -1426,8 +1506,8 @@ int open_namei(const char * pathname, in +@@ -1426,8 +1506,8 @@ acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ @@ -265,7 +252,7 @@ /* * The simplest case - just a plain lookup. -@@ -1442,6 +1522,7 @@ int open_namei(const char * pathname, in +@@ -1442,6 +1522,7 @@ /* * Create - we need to know the parent. */ @@ -273,7 +260,7 @@ error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; -@@ -1458,7 +1539,9 @@ int open_namei(const char * pathname, in +@@ -1458,7 +1539,9 @@ dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); @@ -283,7 +270,7 @@ path.mnt = nd->mnt; do_last: -@@ -1564,7 +1647,9 @@ do_link: +@@ -1564,7 +1647,9 @@ } dir = nd->dentry; down(&dir->d_inode->i_sem); @@ -295,7 +282,7 @@ goto do_last; --- linux-2.6.12.6.orig/fs/namespace.c +++ linux-2.6.12.6/fs/namespace.c -@@ -62,6 +62,7 @@ struct vfsmount *alloc_vfsmnt(const char +@@ -62,6 +62,7 @@ INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_fslink); @@ -303,7 +290,7 @@ if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); -@@ -113,6 +114,7 @@ static inline int check_mnt(struct vfsmo +@@ -113,6 +114,7 @@ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) { @@ -311,7 +298,7 @@ old_nd->dentry = mnt->mnt_mountpoint; old_nd->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; -@@ -176,6 +178,9 @@ void __mntput(struct vfsmount *mnt) +@@ -176,6 +178,9 @@ { struct super_block *sb = mnt->mnt_sb; dput(mnt->mnt_root); @@ -321,7 +308,7 @@ free_vfsmnt(mnt); deactivate_super(sb); } -@@ -402,6 +407,8 @@ static int do_umount(struct vfsmount *mn +@@ -402,6 +407,8 @@ */ lock_kernel(); @@ -330,7 +317,7 @@ if( (flags&MNT_FORCE) && sb->s_op->umount_begin) sb->s_op->umount_begin(sb); unlock_kernel(); -@@ -627,6 +634,7 @@ static int do_loopback(struct nameidata +@@ -627,6 +634,7 @@ return err; if (!old_name || !*old_name) return -EINVAL; @@ -338,7 +325,7 @@ err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; -@@ -701,6 +709,7 @@ static int do_move_mount(struct nameidat +@@ -701,6 +709,7 @@ return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -346,7 +333,7 @@ err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; -@@ -1012,6 +1021,7 @@ long do_mount(char * dev_name, char * di +@@ -1012,6 +1021,7 @@ int retval = 0; int mnt_flags = 0; @@ -356,7 +343,7 @@ flags &= ~MS_MGC_MSK; --- linux-2.6.12.6.orig/fs/open.c +++ linux-2.6.12.6/fs/open.c -@@ -215,12 +215,12 @@ static inline long do_sys_truncate(const +@@ -215,12 +215,12 @@ struct nameidata nd; struct inode * inode; int error; @@ -371,7 +358,7 @@ if (error) goto out; inode = nd.dentry->d_inode; -@@ -474,6 +474,7 @@ asmlinkage long sys_access(const char __ +@@ -474,6 +474,7 @@ int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; @@ -379,7 +366,7 @@ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; -@@ -498,13 +499,14 @@ asmlinkage long sys_access(const char __ +@@ -498,13 +499,14 @@ else current->cap_effective = current->cap_permitted; @@ -395,7 +382,7 @@ path_release(&nd); } -@@ -519,8 +521,9 @@ asmlinkage long sys_chdir(const char __u +@@ -519,8 +521,9 @@ { struct nameidata nd; int error; @@ -406,7 +393,7 @@ if (error) goto out; -@@ -570,8 +573,9 @@ asmlinkage long sys_chroot(const char __ +@@ -570,8 +573,9 @@ { struct nameidata nd; int error; @@ -417,7 +404,7 @@ if (error) goto out; -@@ -750,27 +754,8 @@ asmlinkage long sys_fchown(unsigned int +@@ -750,27 +754,8 @@ * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ @@ -447,7 +434,7 @@ { struct file * f; struct inode *inode; -@@ -782,6 +767,7 @@ struct file *dentry_open(struct dentry * +@@ -782,6 +767,7 @@ goto cleanup_dentry; f->f_flags = flags; f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; @@ -455,7 +442,7 @@ inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); -@@ -800,6 +786,7 @@ struct file *dentry_open(struct dentry * +@@ -800,6 +786,7 @@ error = f->f_op->open(inode,f); if (error) goto cleanup_all; @@ -463,7 +450,7 @@ } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -@@ -825,6 +812,7 @@ cleanup_all: +@@ -825,6 +812,7 @@ cleanup_file: put_filp(f); cleanup_dentry: @@ -471,7 +458,7 @@ dput(dentry); mntput(mnt); return ERR_PTR(error); -@@ -832,6 +820,36 @@ cleanup_dentry: +@@ -832,6 +820,36 @@ EXPORT_SYMBOL(dentry_open); @@ -510,7 +497,7 @@ */ --- linux-2.6.12.6.orig/fs/stat.c +++ linux-2.6.12.6/fs/stat.c -@@ -38,7 +38,7 @@ void generic_fillattr(struct inode *inod +@@ -38,7 +38,7 @@ EXPORT_SYMBOL(generic_fillattr); @@ -519,7 +506,7 @@ { struct inode *inode = dentry->d_inode; int retval; -@@ -47,6 +47,8 @@ int vfs_getattr(struct vfsmount *mnt, st +@@ -47,6 +47,8 @@ if (retval) return retval; @@ -528,7 +515,7 @@ if (inode->i_op->getattr) return inode->i_op->getattr(mnt, dentry, stat); -@@ -63,14 +65,20 @@ int vfs_getattr(struct vfsmount *mnt, st +@@ -63,14 +65,20 @@ EXPORT_SYMBOL(vfs_getattr); @@ -551,7 +538,7 @@ path_release(&nd); } return error; -@@ -82,10 +90,11 @@ int vfs_lstat(char __user *name, struct +@@ -82,10 +90,11 @@ { struct nameidata nd; int error; @@ -565,7 +552,7 @@ path_release(&nd); } return error; -@@ -97,9 +106,12 @@ int vfs_fstat(unsigned int fd, struct ks +@@ -97,9 +106,12 @@ { struct file *f = fget(fd); int error = -EBADF; @@ -581,7 +568,7 @@ return error; --- linux-2.6.12.6.orig/fs/inode.c +++ linux-2.6.12.6/fs/inode.c -@@ -230,6 +230,7 @@ void __iget(struct inode * inode) +@@ -230,6 +230,7 @@ inodes_stat.nr_unused--; } @@ -599,7 +586,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/cache.h> -@@ -37,6 +38,8 @@ struct qstr { +@@ -37,6 +38,8 @@ const unsigned char *name; }; @@ -610,7 +597,7 @@ int nr_unused; --- linux-2.6.12.6.orig/include/linux/fs.h +++ linux-2.6.12.6/include/linux/fs.h -@@ -58,6 +58,7 @@ extern int dir_notify_enable; +@@ -58,6 +58,7 @@ #define FMODE_READ 1 #define FMODE_WRITE 2 @@ -618,7 +605,7 @@ /* Internal kernel extensions */ #define FMODE_LSEEK 4 -@@ -260,6 +261,8 @@ typedef void (dio_iodone_t)(struct inode +@@ -260,6 +261,8 @@ #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 @@ -627,7 +614,7 @@ /* * This is the Inode Attributes structure, used for notify_change(). It -@@ -463,6 +466,7 @@ struct inode { +@@ -463,6 +466,7 @@ struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; @@ -635,7 +622,7 @@ __u32 i_generation; -@@ -600,6 +604,7 @@ struct file { +@@ -600,6 +604,7 @@ spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; @@ -643,7 +630,7 @@ }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); -@@ -968,7 +973,9 @@ struct inode_operations { +@@ -968,7 +973,9 @@ void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); @@ -653,7 +640,7 @@ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -1008,6 +1015,7 @@ struct super_operations { +@@ -1008,6 +1015,7 @@ int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); @@ -661,7 +648,7 @@ int (*show_options)(struct seq_file *, struct vfsmount *); -@@ -1210,6 +1218,7 @@ extern int unregister_filesystem(struct +@@ -1210,6 +1218,7 @@ extern struct vfsmount *kern_mount(struct file_system_type *); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); @@ -669,7 +656,7 @@ extern long do_mount(char *, char *, char *, unsigned long, void *); extern int vfs_statfs(struct super_block *, struct kstatfs *); -@@ -1262,6 +1271,7 @@ static inline int break_lease(struct ino +@@ -1262,6 +1271,7 @@ extern int do_truncate(struct dentry *, loff_t start); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); @@ -738,7 +725,7 @@ enum { MAX_NESTED_LINKS = 5 }; struct nameidata { -@@ -21,10 +62,7 @@ struct nameidata { +@@ -21,10 +62,7 @@ unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; @@ -750,7 +737,7 @@ }; /* -@@ -47,6 +85,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -47,6 +85,8 @@ #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 #define LOOKUP_REVAL 64 @@ -759,7 +746,7 @@ /* * Intent data */ -@@ -55,6 +95,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -55,6 +95,12 @@ #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); @@ -772,7 +759,7 @@ #define user_path_walk(name,nd) \ __user_walk(name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ -@@ -67,7 +113,6 @@ extern void path_release_on_umount(struc +@@ -67,7 +113,6 @@ extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); @@ -782,7 +769,7 @@ --- linux-2.6.12.6.orig/include/linux/mount.h +++ linux-2.6.12.6/include/linux/mount.h -@@ -36,6 +36,8 @@ struct vfsmount +@@ -36,6 +36,8 @@ struct list_head mnt_list; struct list_head mnt_fslink; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ diff --git a/lustre/kernel_patches/patches/vfs_races-2.6-rhel4.patch b/lustre/kernel_patches/patches/vfs_races-2.6-rhel4.patch index ccca64d..3632a41 100644 --- a/lustre/kernel_patches/patches/vfs_races-2.6-rhel4.patch +++ b/lustre/kernel_patches/patches/vfs_races-2.6-rhel4.patch @@ -1,8 +1,7 @@ -Index: linux-2.6.7-vanilla/fs/dcache.c -=================================================================== ---- linux-2.6.7-vanilla.orig/fs/dcache.c 2004-07-01 12:09:19.000000000 +0300 -+++ linux-2.6.7-vanilla/fs/dcache.c 2004-07-01 12:29:12.510193264 +0300 -@@ -219,7 +219,14 @@ +diff -urp RH_2_6_9_42_0_3.orig/fs/dcache.c RH_2_6_9_42_0_3/fs/dcache.c +--- RH_2_6_9_42_0_3.orig/fs/dcache.c ++++ RH_2_6_9_42_0_3/fs/dcache.c +@@ -229,7 +229,14 @@ int d_invalidate(struct dentry * dentry) spin_unlock(&dcache_lock); return 0; } @@ -18,7 +17,7 @@ Index: linux-2.6.7-vanilla/fs/dcache.c * Check whether to do a partial shrink_dcache * to get rid of unused child entries. */ -@@ -1114,19 +1121,28 @@ +@@ -1187,19 +1194,28 @@ void d_delete(struct dentry * dentry) * Adds a dentry to the hash according to its name. */ @@ -50,11 +49,10 @@ Index: linux-2.6.7-vanilla/fs/dcache.c #define do_switch(x,y) do { \ __typeof__ (x) __tmp = x; \ x = y; y = __tmp; } while (0) -Index: linux-2.6.7-vanilla/include/linux/dcache.h -=================================================================== ---- linux-2.6.7-vanilla.orig/include/linux/dcache.h 2004-07-01 12:24:53.602553208 +0300 -+++ linux-2.6.7-vanilla/include/linux/dcache.h 2004-07-01 12:27:29.757814000 +0300 -@@ -159,6 +159,8 @@ +diff -urp RH_2_6_9_42_0_3.orig/include/linux/dcache.h RH_2_6_9_42_0_3/include/linux/dcache.h +--- RH_2_6_9_42_0_3.orig/include/linux/dcache.h ++++ RH_2_6_9_42_0_3/include/linux/dcache.h +@@ -157,6 +157,8 @@ d_iput: no no no yes #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 diff --git a/lustre/kernel_patches/prepare_tree.sh b/lustre/kernel_patches/prepare_tree.sh new file mode 100755 index 0000000..7d688db --- /dev/null +++ b/lustre/kernel_patches/prepare_tree.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +die() { + echo -e $* >&2 + echo aborting.. >&2 + exit 1 +} + +canon() { + cd $1 + CANON=$PWD + cd - +} + +canon $(dirname $0) +MYDIR=$CANON + +while [ ${#*} -gt 1 ]; do + case "$1" in + -t) + shift; + TREE=$1 + ;; + -s) + shift; + SERIES=$1 + ;; + *) + die "unknown argument $1" + break; + ;; + esac + shift; +done + +[ -z "$TREE" -o -z "$SERIES" ] && die "I need a tree and series:\n\t$0 -t kernel_dir -s series_name" +[ ! -d $TREE ] && die "kernel tree '$TREE' isn't a directory" +SERIES=$(basename $SERIES) +[ ! -f $MYDIR/series/$SERIES ] && die "no series file '$SERIES'" + +canon $TREE +TREE=$CANON + +# patch scripts wants a relative path from the linux tree to +# its patch pile :( + +MY=$(echo $MYDIR | sed -e 's_^/__') +TR=$(echo $TREE | sed -e 's_^/__') + +while true ; do + M=$(echo $MY | cut -d/ -f 1) + T=$(echo $TR | cut -d/ -f 1) + + if [ $M != $T ]; then + break; + fi + + MY=$(echo $MY | cut -d/ -f 2-) + TR=$(echo $TR | cut -d/ -f 2-) +done + +[ $MY == $MYDIR ] && die "bad! $MY == $MYDIR" + +REVERSE=$(revpath $TR)${MY} +ABSINO=$(stat $MYDIR | awk '($3 == "Inode:") {print $4}') +REVINO=`(cd $TREE ; stat $REVERSE | awk '($3 == "Inode:") {print $4}')` + +[ $ABSINO != $REVINO ] && die "inodes differ, my reverse path is bad?" + +echo export PATCHSCRIPTS_LIBDIR=$REVERSE + +cd $TREE +ln -sf $REVERSE/series/$SERIES series + +PATH_ELEMENTS=$(echo $PATH | sed -e 's/:/ /g') + +NEW_PATH=$MYDIR/scripts + +for p in $PATH_ELEMENTS; do + if echo $p | grep kernel_patches/scripts > /dev/null 2>&1 ; then + continue; + fi + NEW_PATH="$NEW_PATH:$p" +done + +echo export PATH=$NEW_PATH + +echo "'$TREE' successfully setup" >&2 diff --git a/lustre/kernel_patches/scripts/added-by-patch b/lustre/kernel_patches/scripts/added-by-patch new file mode 100755 index 0000000..e9ccef6 --- /dev/null +++ b/lustre/kernel_patches/scripts/added-by-patch @@ -0,0 +1,14 @@ +#!/bin/sh +# Extract names of new files from a patch, print them out + +PATCHFILE=$1 +case "$PATCHFILE" in +*.gz) CMD="gzip -d < $PATCHFILE";; +*) CMD="cat $PATCHFILE";; +esac + +TMP=$(mktemp /tmp/abp.XXXXXX) + +eval $CMD | egrep '^--- .*1969|^--- .*1970' > $TMP +sed -e 's@[^/]*/\([^ ]*\).*@\1@' < $TMP | sed -e 's@^linux/@@' | sort +rm -f $TMP diff --git a/lustre/kernel_patches/scripts/apatch b/lustre/kernel_patches/scripts/apatch new file mode 100755 index 0000000..be1c68e --- /dev/null +++ b/lustre/kernel_patches/scripts/apatch @@ -0,0 +1,97 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} +do_apply() +{ + FILES=$(cat $P/pc/$PATCH_NAME.pc) + for file in $FILES + do + copy_file_to_bup $file $PATCH_NAME + done + + silent=-s + if [ $opt_force != 0 ] + then + silent= + fi + + if patch -p1 $silent -i "$1" || [ $opt_force != 0 ] + then + true + else + echo SOMETHING WENT WRONG + exit 1 + fi +} + +add_to_db() +{ + basename "$1" >> "$DB" +} + +usage() +{ + echo "Usage: apatch patchname" + exit 1 +} + +opt_force=0 +PATCH_NAMES="" + +for i in $* +do + case "$i" in + -f) + opt_force=1;; + *) + PATCH_NAMES="$PATCH_NAMES $i" + esac +done + +if [ x"$PATCH_NAMES" == x ] +then + usage +fi + +apatch() +{ + PATCH_NAME=$(stripit $1) + + need_file_there $P/pc/$PATCH_NAME.pc + + if is_applied "$PATCH_NAME" + then + echo "$PATCH_NAME" is already applied + exit 1 + fi + + if [ $opt_force != 0 ] + then + echo FORCING PATCH + fi + + if [ $opt_force != 0 ] || can_apply $P/patches/"$PATCH_NAME".patch + then + check_pc_match $P/patches/"$PATCH_NAME".patch $P/pc/"$PATCH_NAME".pc + do_apply $P/patches/"$PATCH_NAME".patch + add_to_db "$PATCH_NAME" + echo applied $PATCH_NAME + echo + else + echo "$PATCH_NAME" does not apply + exit 1 + fi +} + +for i in $PATCH_NAMES +do + if ! apatch $i + then + exit 1 + fi +done + diff --git a/lustre/kernel_patches/scripts/cat-series b/lustre/kernel_patches/scripts/cat-series new file mode 100755 index 0000000..c38b1a8 --- /dev/null +++ b/lustre/kernel_patches/scripts/cat-series @@ -0,0 +1,17 @@ +#!/bin/sh + +. patchfns 2>/dev/null || +. /usr/lib/patch-scripts/patchfns 2>/dev/null || +. $PATCHSCRIPTS_LIBDIR/patchfns 2>/dev/null || +{ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +if [ $# -eq 0 ] +then + cat_series +else + __cat_series $1 +fi diff --git a/lustre/kernel_patches/scripts/combine-applied b/lustre/kernel_patches/scripts/combine-applied new file mode 100755 index 0000000..60ab7e9 --- /dev/null +++ b/lustre/kernel_patches/scripts/combine-applied @@ -0,0 +1,45 @@ +#!/bin/sh + +# +# Make superpatch from currently applied patches using combinediff. +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: combine-applied output-file" + exit 1 +} + +if [ $# -ne 1 ] +then + usage +fi + +need_file_there applied-patches +CURRENT=$(mktemp /tmp/cmbd-XXXXXXXX) +APPLY_FILE=$(mktemp /tmp/cmbd-XXXXXXXX) +for FILE in `cat applied-patches` +do + if [ -f $P/pc/$FILE.pc ] + then + cat $P/pc/$FILE.pc >> $CURRENT + elif [ -f $P/pc/$FILE ] + then + cat $P/pc/$FILE >> $CURRENT + fi +done +cat $CURRENT | sort -u > $APPLY_FILE +echo > $1 +for FILE in `cat $APPLY_FILE` +do + diff -uNp $FILE~orig $FILE >> $1 +done +rm -rf $APPLY_FILE +rm -rf $CURRENT + diff --git a/lustre/kernel_patches/scripts/combine-series b/lustre/kernel_patches/scripts/combine-series new file mode 100755 index 0000000..d00ba36 --- /dev/null +++ b/lustre/kernel_patches/scripts/combine-series @@ -0,0 +1,43 @@ +#!/bin/sh + +# +# Make superpatch from current series using combinediff. +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: combine-series output-file" + exit 1 +} + +if [ $# -ne 1 ] +then + usage +fi + +need_file_there series +CURRENT=$(mktemp /tmp/cmbd-XXXXXXXX) +for FILE in $(cat series) +do + NEXT=$(mktemp /tmp/cmbd-XXXXXXXX) + if [ -f $P/patches/$FILE ] + then + combinediff $CURRENT $P/patches/$FILE > $NEXT + elif [ -f $P/patches/$FILE.patch ] + then + combinediff $CURRENT $P/patches/$FILE.patch > $NEXT + elif [ -f $FILE ] + then + combinediff $CURRENT $FILE > $NEXT + fi + rm $CURRENT + CURRENT=$NEXT +done + +mv $NEXT "$1" diff --git a/lustre/kernel_patches/scripts/cvs-take-patch b/lustre/kernel_patches/scripts/cvs-take-patch new file mode 100755 index 0000000..c6a6a2a --- /dev/null +++ b/lustre/kernel_patches/scripts/cvs-take-patch @@ -0,0 +1,78 @@ +#!/bin/sh + +doit() +{ + echo $* + $* +} + +usage() +{ + echo "Usage: cvs-take-patch patch_file_name" + exit 1 +} + +# +# Find the highest level directory in $1 which does not +# contain the directory $2. Return it in $MISSING +# +highest_missing() +{ + START_DIR="$1" + NAME="$2" + MISSING="" + WHERE=$(dirname "$START_DIR") + PREV_WHERE=$START_DIR + while [ x"$WHERE" != x"$PREV_WHERE" ] + do + WHERE="$PREV_WHERE" + if [ ! -d "$WHERE"/"$NAME" ] + then + MISSING="$WHERE" + fi + PREV_WHERE=$(dirname "$WHERE") + done + echo highest_missing returns $MISSING +} + +# +# Add all new directries to CVS, top-down +# $1: name of a directory +# $2: name of the CVS directory +# +add_cvs_dirs() +{ + MISSING=foo + while [ "$MISSING" != "" ] + do + highest_missing $1 $2 + if [ x"$MISSING" != "x" ] + then + if [ ! -d "$MISSING"/"$2" ] + then + doit cvs add $MISSING + fi + fi + done +} + +PATCHFILE=$1 + +REMOVEDFILES=$(removed-by-patch $PATCHFILE) +if [ "$REMOVEDFILES" != "" ] +then + doit cvs remove $REMOVEDFILES +fi + +NEWFILES=$(added-by-patch $PATCHFILE) +for i in $NEWFILES +do + DIRNAME=$(dirname $i) + echo "Looking at $DIRNAME" + add_cvs_dirs $DIRNAME CVS +done + +if [ "$NEWFILES" != "" ] +then + doit cvs add $NEWFILES +fi diff --git a/lustre/kernel_patches/scripts/export_patch b/lustre/kernel_patches/scripts/export_patch new file mode 100755 index 0000000..d378417 --- /dev/null +++ b/lustre/kernel_patches/scripts/export_patch @@ -0,0 +1,55 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "export_patch: export the patches listed in ./series" 1>&2 + echo "usage: export_patch destination-directory [prefix] " 1>&2 + exit 1 +} + +DIR="$1" +PREFIX="$2""_" + +if [ "$DIR" = "" ] +then + usage +fi + +if [ -e "$DIR" -a ! -d "$DIR" ] +then + echo "$DIR exists already, but is not a directory." 1>&2 + exit 1 +fi + +if [ ! -r ./series ] +then + echo "./series is not readable." 1>&2 + exit 1 +fi + +mkdir -p "$DIR" || exit 1 + +count=1 +for x in `cat ./series` +do + fname=`echo "$count" "$PREFIX" "$x" |\ + awk '{ if ( $2 != "_" ) + printf("p%05d_%s%s\n", $1, $2, $3); + else + printf("p%05d_%s\n", $1, $3); + }'` + if [ ! -r $P/patches/"$x" ] + then + echo "$P/patches/"$x" is not readable. skipping." 1>&2 + continue; + fi + cp -f $P/patches/"$x" "$DIR"/"$fname" || continue; + count=`expr $count + 1` +done + diff --git a/lustre/kernel_patches/scripts/extract_description b/lustre/kernel_patches/scripts/extract_description new file mode 100755 index 0000000..6fa0e68 --- /dev/null +++ b/lustre/kernel_patches/scripts/extract_description @@ -0,0 +1,87 @@ +#!/bin/sh + +insert_line() +{ + PATTERN="$1" + LINE="$2" + FILE="$3" + awk ' BEGIN { found=0; } + /'"$PATTERN"'/ { + print; + if (!found) + printf("%s\n", "'$LINE'"); + found=1; + next; + } + { print; } + ' < "$FILE" +} + +# extract the description from the top of a patch +# filter stdin +# collapse adjacent blank lines to a single blank line +# remove any lines that look like diffstat output +# stop output on encountering a line beginning with '---' (beginning of patch) + + TMPFILE=`mktemp /tmp/xdtmp.XXXXXX` || exit 1 + formail -kfcb -X 'From:' -X 'Subject:' |\ + awk ' + BEGIN { found_end=0; lastone="x"; } + /^ .* [|] +[0-9]+ [+-]+$/ { + #/* we found something like diffstat output... */ + if (found_end == 1) { + /* we are past end of diffstat, let it pass */ + print; + } + next; + } + /^ [1-9][0-9]* files changed/ { + #/* end of diffstat output, stop filtering diffstat */ + found_end=1; + next; + } + /^--- / { exit; } + { + #/* collapse adjacent blank lines to 1 blank line */ + if ( $0 == "" && lastone == "" ) + next; + else + print; + lastone=$0; + } + ' | awk '{ if ($0 == "" && FNR == 1) next; print; }' > "$TMPFILE" + + descs=`head -10 $TMPFILE | grep -c '^[ ]*DESC[ ]*$'` + if [ "$descs" = "0" ] + then + # DESC is not 1st non blank line in the file + echo "DESC" + descs=0 + fi + edescs=`grep -c '^EDESC$' "$TMPFILE"` + subjects=`grep -c '^[ ]*Subject[:]' "$TMPFILE"` + froms=`grep -c '^[ ]*From[:]' "$TMPFILE"` + if [ "$edescs" = "0" ] + then + if [ "$subjects" != "0" ] + then + insert_line '^Subject[:]' 'EDESC' "$TMPFILE" + else + if [ "$froms" != "0" ] + then + insert_line '^From[:]' 'EDESC' "$TMPFILE" + else + if [ "$descs" = "0" ] + then + # blank DESC line... + echo '(undescribed patch)' + echo EDESC + cat "$TMPFILE" + else + insert_line '^DESC$' "EDESC" "$TMPFILE" + fi + fi + fi + else + cat $TMPFILE + fi diff --git a/lustre/kernel_patches/scripts/forkpatch b/lustre/kernel_patches/scripts/forkpatch new file mode 100755 index 0000000..cef297c --- /dev/null +++ b/lustre/kernel_patches/scripts/forkpatch @@ -0,0 +1,76 @@ +#!/bin/sh + +# +# Fork the next patch in the series +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: forkpatch <newname>" + exit 1 +} + +if [ $# -ne 1 ] +then + usage +fi + +NEW=$1 +BASE=`stripit $NEW` +SERIES=series + +if [ ! -e $SERIES ] +then + echo 'File "series" not found' + exit 1 +fi + +if [ -f $P/$BASE.patch ] ; then + echo "Patch $NEW already exists as a file" + exit 1 +fi + +if grep $BASE $SERIES >& /dev/null ; then + echo "Patch $NEW already exists in series" + exit 1 +fi + +TMPSERIES=$(mktemp /tmp/series-XXXXXXXX) +top=$(toppatch) +if [ x"$top" == x ] +then + todo=$(head -1 $SERIES) +else + last_in_series=$(stripit $(tail -1 $SERIES)) + if [ $last_in_series == $top ] + then + echo "Series fully applied. Ends at $top" + exit 0 + fi + todo=$(grep -C1 "^$top\.patch" $SERIES | tail -1) + if [ x$todo = x ] + then + todo=$(head -1 $SERIES) + fi +fi + +basetodo=`stripit $todo` + +sed "s/$todo/$BASE.patch/" < $SERIES > $TMPSERIES +cat $TMPSERIES > $SERIES +rm -f $TMPSERIES +cp -f $P/patches/$todo $P/patches/$BASE.patch +cp -f $P/pc/$basetodo.pc $P/pc/$BASE.pc +if [ -f $P/txt/$basetodo.txt ]; then + cp -f $P/txt/$basetodo.txt $P/txt/$BASE.txt +else + echo "Warning no documentation for $BASE" +fi + +echo "Cloned $todo to $BASE" diff --git a/lustre/kernel_patches/scripts/fpatch b/lustre/kernel_patches/scripts/fpatch new file mode 100755 index 0000000..0cafa65 --- /dev/null +++ b/lustre/kernel_patches/scripts/fpatch @@ -0,0 +1,53 @@ +#!/bin/sh + +# +# Add a file to a patch. +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: fpatch patchname filename" + echo " fpatch filename" + exit 1 +} + +if [ $# == 1 ] +then + PATCH_NAME=$(top_patch) + FILENAME=$1 +elif [ $# == 2 ] +then + PATCH_NAME=$(stripit $1) + FILENAME=$2 +else + usage +fi + + +if is_applied_last $PATCH_NAME +then + true +else + if is_applied $PATCH_NAME + then + echo $PATCH_NAME is not the last-applied patch + exit 1 + else + echo $PATCH_NAME >> $DB + fi +fi + +if file_in_patch $FILENAME $PATCH_NAME +then + echo File $FILENAME is already in patch $PATCH_NAME + exit 1 +fi + +install_file_in_patch $FILENAME $PATCH_NAME + diff --git a/lustre/kernel_patches/scripts/import_patch b/lustre/kernel_patches/scripts/import_patch new file mode 100755 index 0000000..f818f19 --- /dev/null +++ b/lustre/kernel_patches/scripts/import_patch @@ -0,0 +1,102 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "usage: import_patch [ -p prefix-pattern ] patchfile [...]" 1>&2 + exit 1 +} + +XPATTERN="" +if [ "$1" = "-p" ] +then + XPATTERN="$2" + shift; + shift; +fi + +if [ "$1" = "" ] +then + usage +fi + +if [ ! -e applied-patches ] +then + touch applied-patches +fi + +mkdir -p patches || exit 1 +mkdir -p txt || exit 1 +mkdir -p pc || exit 1 + +if [ ! -e ./series ] +then + touch ./series + if [ "$?" != "0" ] + then + echo "Cannot create ./series" 1>&2 + exit 1 + fi +fi + +if [ ! -w ./series ] +then + echo "./series is not writable." 1>&2 + exit 1 +fi + +PATTERN='s/^'"$XPATTERN"'//' +for x in $* +do + if [ ! -r "$x" ] + then + echo "$x does not exist, skipping." 1>&2 + continue + fi + patchname=`basename $x .bz2` + patchname=`basename $patchname .gz` + patchname=`basename $patchname .Z` + patchname=`basename $patchname .patch` + if is_applied $patchname + then + echo $patchname is currently applied + exit 1 + fi + if [ "$XPATTERN" != "" ] + then + patchname=`echo $patchname | sed -e "$PATTERN"` + fi + pname=$P/patches/"$patchname".patch + if [ -r "$pname" ] + then + echo "$pname exists already, skipping." 1>&2 + continue + fi + case "$x" in + *.bz2) + bunzip2 < "$x" > "$pname" + ;; + *.gz) + gunzip < "$x" > "$pname" + ;; + *.Z) zcat < "$z" > "$pname" + ;; + *) + cat "$x" > "$pname" || continue + ;; + esac + echo "$patchname".patch >> series + pcpatch "$pname" + extract_description < "$pname" >$P/txt/"$patchname".txt + grep '^[(]undescribed patch[)]$' < $P/txt/"$patchname".txt > /dev/null + if [ "$?" = "0" ] + then + echo "Warning: $patchname has no description." 1>&2 + fi +done + diff --git a/lustre/kernel_patches/scripts/inpatch b/lustre/kernel_patches/scripts/inpatch new file mode 100755 index 0000000..edb2c20 --- /dev/null +++ b/lustre/kernel_patches/scripts/inpatch @@ -0,0 +1,27 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: inpatch" + exit 1 +} + +if [ $# != 0 ] +then + usage +fi + +if [ -e $DB ] +then + TOP_PATCH=$(top_patch) + if [ x$TOP_PATCH != x ] + then + cat $P/pc/$TOP_PATCH.pc + fi +fi diff --git a/lustre/kernel_patches/scripts/join-patch b/lustre/kernel_patches/scripts/join-patch new file mode 100755 index 0000000..065ea73 --- /dev/null +++ b/lustre/kernel_patches/scripts/join-patch @@ -0,0 +1,28 @@ +#!/bin/sh + +usage() +{ + echo "Usage: join-patch patchname" + exit 1 +} + +if [ $# -ne 1 ] +then + usage +fi + +PATCHNAME=$(stripit $1) + +if ! can_apply $PATCHNAME +then + echo Patch $PATCHNAME does not apply + exit 1 +fi + +pcpatch $PATCHNAME +for i in $(cat $P/pc/$PATCHNAME.pc) +do + fpatch $i +done + +patch -p1 -i "$P/patches/$PATCHNAME.patch" -f diff --git a/lustre/kernel_patches/scripts/linus-patch b/lustre/kernel_patches/scripts/linus-patch new file mode 100755 index 0000000..290b9cf --- /dev/null +++ b/lustre/kernel_patches/scripts/linus-patch @@ -0,0 +1,26 @@ +#!/bin/sh +# +# Grab a patch frmo kernel.org, install it. +# +# Usage: linus-patch http://www.kernel.org/pub/linux/kernel/people/dwmw2/bk-2.5/cset-1.786.152.7-to-1.798.txt.gz +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +poppatch 999 || die poppatch +wget $1 || die wget +FILE=$(basename $1) +gzip -d < $FILE > $P/patches/linus.patch +pcpatch linus || die pcpatch +( + echo DESC + echo $FILE + echo EDESC + echo + echo $FILE +) > $P/txt/linus.txt +rm $FILE diff --git a/lustre/kernel_patches/scripts/mpatch b/lustre/kernel_patches/scripts/mpatch new file mode 100755 index 0000000..16d4eb7 --- /dev/null +++ b/lustre/kernel_patches/scripts/mpatch @@ -0,0 +1,101 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: mpatch patchname [output_dir]" + exit 1 +} + +doit() +{ + echo $* 1>&2 + $* || { + echo oops + exit 1 + } +} + +epoch() +{ +# doit touch -t 7001011000.00 $1 + doit touch -t 7001010000.00 $1 +} + +dirfor() +{ + dir=$(dirname $1) + if [ ! -d $dir ] + then + doit mkdir -p $dir + RMDIRS="$RMDIRS $dir" + fi +} + +if [ $# == 0 ] +then + usage +fi + +PATCH_NAME=$(stripit $1) +OUTPUT_DIR=$2 + +FILES=$(cat $P/pc/$PATCH_NAME.pc) +OUT=$P/patches/$PATCH_NAME.patch +TMPOUT=$(mktemp /tmp/patch-$PATCH_NAME-XXXXXX) +TXT=$P/txt/$PATCH_NAME.txt +OLDDIR=$(basename $(/bin/pwd)) +NEWDIR=$OLDDIR-$LOGNAME + +if is_applied_last $PATCH_NAME +then + true +else + echo $PATCH_NAME is not the last-applied patch + exit 1 +fi + +doit rm -f $OUT +echo "Placing patch in " $OUT + +if [ -e $TXT -a -s $TXT ] +then + echo >> $OUT + body $TXT >> $OUT + echo >> $OUT + echo >> $OUT +else + echo "**** No patch description for $PATCH_NAME ****" +fi + +rm -f $TMPOUT + +for file in $FILES +do + OLD_FILE="$file"~"$PATCH_NAME" + if [ ! -e $OLD_FILE ] + then + OLD_FILE=/dev/null + fi + NEW_FILE=$file + XDIFF_OPTS="" + if [ ! -e $NEW_FILE ] + then + NEW_FILE=/dev/null + XDIFF_OPTS="-L $file" + fi + + echo diff -puN $XDIFF_OPTS $DIFF_OPTS $OLD_FILE $NEW_FILE + diff -puN $XDIFF_OPTS $DIFF_OPTS $OLD_FILE $NEW_FILE | p0-2-p1 $OLDDIR $NEWDIR >> $TMPOUT +done +diffstat -p1 $TMPOUT >> $OUT 2>/dev/null +echo >> $OUT +cat $TMPOUT >> $OUT +echo >> $OUT +echo "_" >> $OUT +rm -f $TMPOUT diff --git a/lustre/kernel_patches/scripts/new-kernel b/lustre/kernel_patches/scripts/new-kernel new file mode 100755 index 0000000..2b065a6 --- /dev/null +++ b/lustre/kernel_patches/scripts/new-kernel @@ -0,0 +1,82 @@ +#!/bin/sh + +usage() +{ + echo "Usage: new-kernel linux-2.4.2-pre2 linux-2.4.3-pre3 linux-2.4.3 patch.gz cvs-dir" + exit 1 +} + +wantdir() +{ + if [ x$1 = x ] + then + usage + fi + if [ ! -d $1 ] + then + echo "directory $1 does not exist" + usage + fi +} + +wantfile() +{ + if [ x$1 = x ] + then + usage + fi + if [ ! -f $1 ] + then + echo "file $1 does not exist" + usage + fi +} + +doit() +{ + echo $* 1>&2 + $* || { + echo oops + exit 1 + } +} + + +CURRENT_KERNEL=$1 +NEXT_KERNEL=$2 +BASE_KERNEL=$3 +PATCH_FILE=$4 +CVS_DIR=$5 + +TEMP_PATCH=$(mktemp /tmp/patch-XXXXXX) +MY_DIFF="$CURRENT_KERNEL"--"$NEXT_KERNEL" + +wantdir $CURRENT_KERNEL +wantdir $BASE_KERNEL +wantdir $CVS_DIR +wantfile $PATCH_FILE + +doit rm -rf $NEXT_KERNEL +doit cp -a $BASE_KERNEL $NEXT_KERNEL +doit rm -f $TEMP_PATCH +doit gunzip < $PATCH_FILE > $TEMP_PATCH +cd $NEXT_KERNEL +doit patch -p1 --dry-run -i $TEMP_PATCH +doit patch -p1 -s -i $TEMP_PATCH +echo cd .. +cd .. + +echo diff -uNrp $CURRENT_KERNEL $NEXT_KERNEL +diff -uNrp $CURRENT_KERNEL $NEXT_KERNEL > $MY_DIFF + +echo cd $CVS_DIR +cd $CVS_DIR +doit patch -p1 --dry-run -s -i ../$MY_DIFF +doit patch -p1 -s -i ../$MY_DIFF +cvs-take-patch ../$MY_DIFF +cvs commit -m "'doing $NEXT_KERNEL'" +cvs update -ko -d -P + +TAG=$(echo $NEXT_KERNEL | sed -e 's@\.@_@g') +cvs tag $TAG +rm -f $TEMP_PATCH diff --git a/lustre/kernel_patches/scripts/p0-2-p1 b/lustre/kernel_patches/scripts/p0-2-p1 new file mode 100755 index 0000000..266c698 --- /dev/null +++ b/lustre/kernel_patches/scripts/p0-2-p1 @@ -0,0 +1,10 @@ +#!/bin/sh +# +# Usage: p0-2-p1 olddir newdir +# +OLDDIR=$1 +NEWDIR=$2 + +sed -e "s/^--- \([^\/].*\)/--- $OLDDIR\/\1/" | +sed -e "s/^+++ \([^\/].*\)/+++ $NEWDIR\/\1/" + diff --git a/lustre/kernel_patches/scripts/p_diff b/lustre/kernel_patches/scripts/p_diff new file mode 100755 index 0000000..1ad3e09 --- /dev/null +++ b/lustre/kernel_patches/scripts/p_diff @@ -0,0 +1,60 @@ +#!/bin/sh + +# +# Bring up a patched file in diff. We show the diffs +# in the topmost patch, unless it was specified +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: pdiff [patchname] filename" + echo " pdiff [patchname] -" + exit 1 +} + +if [ $# == 1 ] +then + PATCH_NAME=$(top_patch) + FILENAME=$1 +elif [ $# == 2 ] +then + PATCH_NAME=$(stripit $1) + FILENAME=$2 +else + usage +fi + +if ! is_applied $PATCH_NAME +then + echo $PATCH_NAME is not applied + exit 1 +fi + +doit() +{ + filename=$1 + unpatched_file=$filename"~"$PATCH_NAME + need_file_there $filename + if [ -e $unpatched_file ] + then + diff -u $unpatched_file $filename + else + echo pdiff: $filename appears to not be in $PATCH_NAME + fi +} + +if [ x"$FILENAME" = "x-" ] +then + FILENAME=$(cat $P/pc/$PATCH_NAME.pc) +fi + +for i in $FILENAME +do + doit $i +done diff --git a/lustre/kernel_patches/scripts/patchdesc b/lustre/kernel_patches/scripts/patchdesc new file mode 100755 index 0000000..9a886fd --- /dev/null +++ b/lustre/kernel_patches/scripts/patchdesc @@ -0,0 +1,21 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +desc1() +{ + PATCH=$(stripit $1) + TXT=$P/txt/$PATCH.txt + echo $PATCH.patch + desc < $TXT + echo +} + +for i in $* +do + desc1 $i +done diff --git a/lustre/kernel_patches/scripts/patchfns b/lustre/kernel_patches/scripts/patchfns new file mode 100644 index 0000000..8d3d4f0 --- /dev/null +++ b/lustre/kernel_patches/scripts/patchfns @@ -0,0 +1,256 @@ +DB=applied-patches + +# +# Work out where the user's pc/, patch/ and txt/ directories live. +# +# If the user specified PATCHSCRIPTS in environment then use that (it's +# probably a relative path) +# +# If there is a directory ./patch-scripts then use that +# +# Otherwise use "." +# + +if [ x$PATCHSCRIPTS_LIBDIR != x ] +then + P=$PATCHSCRIPTS_LIBDIR +elif [ -d ./patch-scripts ] +then + P=./patch-scripts +elif [ -d ./patches ] +then + P=. +else + echo "could not locate your pc/ and patches/ directories" + exit 1 +fi + +top_patch() +{ + tail -1 $DB +} + +die() +{ + echo error: $* + exit 1 +} + +is_numeric() +{ + if echo $1 | egrep '^[0-9]*$' > /dev/null + then + return 0 + fi + return 1 +} + +is_applied_last() +{ + name="$(stripit $1)" + top_patch >$DB.1 + if grep "^$name$" "$DB.1" > /dev/null 2>&1 + then + rm $DB.1 + return 0 + else + rm $DB.1 + return 1 + fi +} + +is_applied() +{ + name=$(stripit "$1") + if grep "^$name$" "$DB" > /dev/null 2>&1 + then + return 0 + else + return 1 + fi +} +check_pc_match() +{ + if [ -f /usr/bin/lsdiff ]; then + tmpfile=$(mktemp /tmp/p_XXXXXX) || exit 1 + lsdiff --strip=1 $1 > $tmpfile + diff $2 $tmpfile > /dev/null + if [ $? != 0 ]; then + echo " $1 do not match with $2 " + echo " $2 will be changed to match $2" + # cat $tmpfile > $P/pc/$PATCH_NAME.pc + fi + rm -rf $tmpfile + fi +} +can_apply() +{ + if patch -p1 --dry-run -i "$1" -f + then + return 0 + else + return 1 + fi +} + +can_remove() +{ + if patch -R -p1 --dry-run -i $P/patches/"$1".patch -f + then + return 0 + else + return 1 + fi +} + +remove_from_db() +{ + tmpfile=$(mktemp /tmp/p_XXXXXX) + name="$1" + sed -e "/^$name$/d" < "$DB" > $tmpfile + mv $tmpfile "$DB" +} + +stripit() +{ + ret=$(basename $1) + ret=$(echo $ret | sed -e 's/\.patch$//') + ret=$(echo $ret | sed -e 's/\.pc$//') + ret=$(echo $ret | sed -e 's/\.txt$//') + echo $ret +} + +top_is_current() +{ + patch_name=$(top_patch) + if [ x$patch_name == x ] + then + return 1 + else + patch_file=$P/patches/"$patch_name".patch + files=$(cat $P/pc/$patch_name.pc) + for file in $files + do + if [ $file -nt $patch_file ] + then + echo $file newer than $patch_file + return 0 + fi + done + fi + return 1 +} + +need_top_current() +{ + if top_is_current + then + echo "Error: Top patch is not up-to-date" + exit 1 + fi +} + +warn_top_current() +{ + if top_is_current + then + echo "Warning: Top patch is not up-to-date" + fi +} + +file_in_patch() +{ + file=$1 + patch=$2 + + if [ -e $P/pc/$patch.pc ] + then + if grep "^"$file"$" $P/pc/$patch.pc > /dev/null + then + return 0 + fi + fi + return 1 +} + +# copy_file_to_bup filename patchname +copy_file_to_bup() +{ + file=$1 + patch=$2 + bup="$file"~"$patch" + orig="$file"~"orig" + src_dir=`pwd` + + if [ -e $bup ] + then + echo "Cannot install file $file in patch $patch: backup $bup exists" + exit 1 + fi + if [ -e $file ] + then + cp -p $file "$file"~"$patch" + else + echo "file $file appears to be newly added" + fi + if [ ! -L "$orig" ]; then + ln -s "$src_dir/$bup" $orig + fi +} + +install_file_in_patch() +{ + file=$1 + patch=$2 + + copy_file_to_bup $file $patch + echo $file >> $P/pc/$patch.pc +# touch $P/txt/$patch.txt +} + +need_file_there() +{ + if [ ! -e $1 ] + then + echo "File $1 does not exist" + exit 1 + fi +} + +desc() +{ + state=0 + while read x + do + if [ x"$x" = xDESC ] + then + state=1 + elif [ x"$x" = xEDESC ] + then + state=0 + elif [ $state = 1 ] + then + echo " $x" + fi + done +} + +body() +{ + file=$1 + + did_stuff=0 + while read x + do + if [ x"$x" = xEDESC ] + then + cat + did_stuff=1 + fi + done < $file + + if [ $did_stuff = 0 ] + then + cat $file + fi +} diff --git a/lustre/kernel_patches/scripts/pcpatch b/lustre/kernel_patches/scripts/pcpatch new file mode 100755 index 0000000..fa53385 --- /dev/null +++ b/lustre/kernel_patches/scripts/pcpatch @@ -0,0 +1,45 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "pcpatch: recreate the pc file from patches/{patchname}.patch" + exit 1 +} + +doit() +{ + echo $* 1>&2 + $* || { + echo oops + exit 1 + } +} + +if [ $# != 1 -o "$1" = "help" ] +then + usage +fi +PATCH=$1 +PATCH_NAME=$(stripit $PATCH) +PC=$P/pc/$PATCH_NAME.pc + +if [ ! -e $P/patches/$PATCH_NAME.patch ] +then + echo "$P/patches/$PATCH_NAME.patch does not exist" + exit 1 +fi + +if is_applied "$PATCH" +then + echo $PATCH is applied! + exit 1 +fi + +touched-by-patch $P/patches/$PATCH_NAME.patch > $PC +echo Recreated $PC diff --git a/lustre/kernel_patches/scripts/poppatch b/lustre/kernel_patches/scripts/poppatch new file mode 100755 index 0000000..70055d6 --- /dev/null +++ b/lustre/kernel_patches/scripts/poppatch @@ -0,0 +1,72 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: poppatch [npatches]" + exit 1 +} + +doit() +{ + echo $* 1>&2 + $* || { + echo oops + exit 1 + } +} + +if [ $# -gt 1 ] +then + usage +fi + +NR=1 +STOP_AT="" +if [ $# -eq 1 ] +then + if is_numeric $1 + then + NR=$1 + else + NR=1000 + STOP_AT=$(stripit $1) + fi +fi + +pop_one() +{ + TOP_PATCH=$(top_patch) + if [ x$TOP_PATCH == x ] + then + echo "no patches applied" + exit 0 + else + popped_patch="$(top_patch)" + if ! rpatch $(top_patch) + then + echo still at $(top_patch) + exit 1 + fi + echo + fi +} + +for i in $(seq 1 $NR) +do + pop_one + if [ x$STOP_AT != "x" ] + then + if [ $STOP_AT == $(toppatch) ] + then + sum-series applied-patch + exit 0 + fi + fi +done +sum-series applied-patch diff --git a/lustre/kernel_patches/scripts/prep-patch b/lustre/kernel_patches/scripts/prep-patch new file mode 100755 index 0000000..1d60ea9 --- /dev/null +++ b/lustre/kernel_patches/scripts/prep-patch @@ -0,0 +1,18 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +if [ $# -ne 1 ] +then + echo "Usage prep-patch patchname" + exit 1 +fi + +PATCHNAME=$(stripit $1) + +xcb -s 2 < $P/patches/$PATCHNAME.patch +head -2 $P/txt/$PATCHNAME.txt | tail -1 | tr -d '\n' | xcb -s 1 diff --git a/lustre/kernel_patches/scripts/pstatus b/lustre/kernel_patches/scripts/pstatus new file mode 100755 index 0000000..f735d8d --- /dev/null +++ b/lustre/kernel_patches/scripts/pstatus @@ -0,0 +1,156 @@ +#!/bin/sh + +# print out patch status. Usage: pstatus [ patchfile ... ] +# +# Stephen Cameron <steve.cameron@hp.com> +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +if [ ! -f ./series ] +then + echo "./series does not exist." 1>&2 + exit 1 +fi + +if [ ! -d ./patches ] +then + echo "Directory ./patches does not exist." 1>&2 + exit 1 +fi + + +PATCHLIST="$*" +if [ "$PATCHLIST" = "" ] +then + series_optimize=yes + PATCHLIST=`cat series | sed -e 's/[.]patch[ ]*$//'` + SORTSERIES=`mktemp /tmp/ser.XXXXXX` || exit 1 + SORTPATCHES=`mktemp /tmp/pat.XXXXXX` || exit 1 + sed -e 's/^[ ]//' -e 's/[.]patch[ ]*$//' < series | \ + sort > $SORTSERIES + exists="`echo $P/patches/*.patch 2>/dev/null`" + if [ "$exists" != "$P/patches/*.patch" ] + then + ls -1 $P/patches/*.patch | sed -e 's/^.*\/patches\///' \ + -e 's/[.]patch[ ]*$//' | sort > $SORTPATCHES + PATCHLIST="$PATCHLIST"" `comm -1 -3 $SORTSERIES $SORTPATCHES`" + fi + rm -f $SORTPATCHES $SORTSERIES +else + series_optimize=no +fi + +NSERIES=`wc -l series | awk '{ print $1; }'` +series=1 +for PATCH_NAME in $PATCHLIST +do + PATCH_NAME=$(stripit $PATCH_NAME) + # see if this patch even exists + if [ ! -f $P/patches/"$PATCH_NAME".patch ] + then + echo "$PATCH_NAME does not exist." + continue + fi + # see if this patch is applied + applied="-" + if [ -f applied-patches ] + then + grep '^'"$PATCH_NAME"'$' applied-patches > /dev/null + if [ "$?" = "0" ] + then + applied="a" + fi + fi + + # figure the status of this patch, that is, + # if it needs changelog, pcpatch, refpatch + + stat="" + if [ ! -f $P/txt/"$PATCH_NAME".txt ] + then + stat="changelog " + fi + if [ ! -f $P/pc/"$PATCH_NAME".pc ] + then + stat="$stat""pcpatch " + elif [ "$applied" != '-' ] + then + rpatch=n + + # for each file this patch touches + for y in `cat $P/pc/"$PATCH_NAME".pc` + do + # is the patch adding the file? + if [ ! -e "$y"'~'"$PATCH_NAME" -a -f "$y" ] + then + # file is newer than the patch? + if [ "$y" -nt $P/patches/"$PATCH_NAME".patch ] + then + rpatch=y + stat="$stat""refpatch " + break + fi + else + # modified file is newer than the patch? + if [ "$y"'~'"$PATCH_NAME" -nt \ + $P/patches/"$PATCH_NAME".patch ] + then + rpatch=y + stat="$stat""refpatch " + break + fi + if [ "`toppatch`" = "$PATCH_NAME" -a \ + "$y" -nt $P/patches/"$PATCH_NAME".patch ] + then + # toppatch, so check if the file + # is newer than the patch? + rpatch=y + stat="$stat""refpatch " + break + fi + fi + done + fi + # check if they changed the changelog recently + if [ "$rpatch" = "n" -a -f $P/txt/"$PATCH_NAME".txt \ + -a $P/txt/"$PATCH_NAME".txt -nt \ + $P/patches/"$PATCH_NAME".patch ] + then + rpatch=y + stat="$stat""refpatch " + fi + if [ "$stat" != "" ] + then + stat="Needs ""$stat" + fi + + if [ "$series_optimize" != "yes" ] + then + # have to find the series number the hard way. + series=`grep -n '^'"$PATCH_NAME"'\.patch$' series |\ + awk -F: '{ printf "%d", $1}' ` + if [ "$series" = "" ] + then + series="?" + fi + fi + + echo "$series":"$applied":"$PATCH_NAME $stat" + + if [ "$series_optimize" = "yes" ] + then + if [ "$series" != "?" ] + then + series=`expr $series + 1` + if [ $series -gt $NSERIES ] + then + series="?" + fi + fi + fi +done diff --git a/lustre/kernel_patches/scripts/ptkdiff b/lustre/kernel_patches/scripts/ptkdiff new file mode 100755 index 0000000..97c9982 --- /dev/null +++ b/lustre/kernel_patches/scripts/ptkdiff @@ -0,0 +1,46 @@ +#!/bin/sh + +# +# Bring up a patched file in tkdiff. We show the diffs +# in the topmost patch, unless it was specified +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: ptkdiff filename ..." + echo " ptkdiff -" + exit 1 +} + +PATCH_NAME=$(top_patch) + +doit() +{ + filename=$1 + unpatched_file=$filename"~"$PATCH_NAME + need_file_there $filename + if [ -e $unpatched_file ] + then + tkdiff $unpatched_file $filename + else + echo ptkdiff: $filename appears to not be in $PATCH_NAME + fi +} + +if [ x"$1" = "x-" ] +then + FILENAME=$(cat $P/pc/$PATCH_NAME.pc) +else + FILENAME="$*" +fi + +for i in $FILENAME +do + doit $i & +done diff --git a/lustre/kernel_patches/scripts/pushpatch b/lustre/kernel_patches/scripts/pushpatch new file mode 100755 index 0000000..6702e63 --- /dev/null +++ b/lustre/kernel_patches/scripts/pushpatch @@ -0,0 +1,86 @@ +#!/bin/sh + +# +# Add next patch in series +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: pushpatch [npatches]" + exit 1 +} + +opt_force=0 + +for i in $* +do + case "$i" in + -f) + opt_force=1;; + *) + if [ -n "$NR" -o -n "$STOP_AT" ] + then + usage + fi + if is_numeric $i + then + NR=$i + else + NR=1000 + STOP_AT=$(stripit $i) + fi;; + esac +done + +[ $opt_force = 1 ] && force="-f" + +SERIES=series + +if [ ! -e $SERIES ] +then + echo 'File "series" not found' + exit 1 +fi + +push_one() +{ + top=$(toppatch) + if [ x"$top" == x ] + then + todo=$(head -1 $SERIES) + else + last_in_series=$(stripit $(tail -1 $SERIES)) + if [ $last_in_series == $top ] + then + echo "Series fully applied. Ends at $top" + exit 0 + fi + todo=$(grep -C1 "^$top\.patch" $SERIES | tail -1) + if [ x$todo = x ] + then + todo=$(head -1 $SERIES) + fi + fi + + apatch $force $todo +} + +for i in $(seq 1 $NR) +do + push_one + if [ x$STOP_AT != "x" ] + then + if [ $STOP_AT == $(toppatch) ] + then + sum-series applied-patch + exit 0 + fi + fi +done +sum-series applied-patch diff --git a/lustre/kernel_patches/scripts/refpatch b/lustre/kernel_patches/scripts/refpatch new file mode 100755 index 0000000..3195a57 --- /dev/null +++ b/lustre/kernel_patches/scripts/refpatch @@ -0,0 +1,32 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: refpatch" + exit 1 +} + +doit() +{ + echo $* 1>&2 + $* || { + echo oops + exit 1 + } +} + +if [ $# != 0 ] +then + usage +fi + +TOP_PATCH=$(top_patch) +mpatch $* $(top_patch) +sum-series applied-patch +echo "Refreshed $TOP_PATCH" diff --git a/lustre/kernel_patches/scripts/removed-by-patch b/lustre/kernel_patches/scripts/removed-by-patch new file mode 100755 index 0000000..ff12970 --- /dev/null +++ b/lustre/kernel_patches/scripts/removed-by-patch @@ -0,0 +1,14 @@ +#!/bin/sh +# Extract names of new files from a patch, print them out + +PATCHFILE=$1 +case "$PATCHFILE" in +*.gz) CMD="gzip -d < $PATCHFILE";; +*) CMD="cat $PATCHFILE";; +esac + +TMP=$(mktemp /tmp/rbp-XXXXXX) + +eval $CMD | egrep '^\+\+\+.*1970|\+\+\+.*1969' > $TMP +sed -e 's@[^/]*/\([^ ]*\).*@\1@' < $TMP | sed -e 's@^linux/@@' | sort +rm -f $TMP diff --git a/lustre/kernel_patches/scripts/rename-patch b/lustre/kernel_patches/scripts/rename-patch new file mode 100755 index 0000000..8334f1e --- /dev/null +++ b/lustre/kernel_patches/scripts/rename-patch @@ -0,0 +1,20 @@ +#!/bin/sh +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} +OLD=$(stripit $1) +NEW=$(stripit $2) + +mv $P/pc/$OLD.pc $P/pc/$NEW.pc +mv $P/patches/$OLD.patch $P/patches/$NEW.patch +mv $P/txt/$OLD.txt $P/txt/$NEW.txt + +cvs remove $P/pc/$OLD.pc +cvs remove $P/patches/$OLD.patch +cvs remove $P/txt/$OLD.txt + +cvs add $P/pc/$NEW.pc +cvs add $P/patches/$NEW.patch +cvs add $P/txt/$NEW.txt diff --git a/lustre/kernel_patches/scripts/rolled-up-patch b/lustre/kernel_patches/scripts/rolled-up-patch new file mode 100755 index 0000000..52676dc --- /dev/null +++ b/lustre/kernel_patches/scripts/rolled-up-patch @@ -0,0 +1,30 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: rolled-up-patch" + exit 1 +} + +if [ $# != 0 ] +then + usage +fi + +RUP=$(mktemp /tmp/rup-XXXXXX) +rm -f $RUP + +for i in $(cat applied-patches) +do + patch_name=$(stripit $i) + cat $P/pc/$patch_name.pc +done | sort | uniq > $RUP + +kdiff $(cat $RUP) +rm -f $RUP diff --git a/lustre/kernel_patches/scripts/rpatch b/lustre/kernel_patches/scripts/rpatch new file mode 100755 index 0000000..5a8da38 --- /dev/null +++ b/lustre/kernel_patches/scripts/rpatch @@ -0,0 +1,90 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +# do_remove() +# { +# if patch -R -p1 -s -i $P/patches/"$1".patch +# then +# true +# else +# echo SOMETHING WENT WRONG +# exit 1 +# fi +# } + +do_remove() +{ + FILES=$(cat $P/pc/$1.pc) + for file in $FILES ; do + base_dir=`pwd` + if [ -L "$file"~"orig" ]; then + if [ `readlink "$file"~"orig"` = "$base_dir/""$file"~"$1" ]; then + rm -rf "$file"~"orig" + fi + fi + if [ -f "$file"~"$1" ]; then + mv -f "$file"~"$1" "$file" + else + rm -f "$file" + fi + done + true +} + +kill_old_ones() +{ + FILES=$(cat $P/pc/$1.pc) + for file in $FILES + do + rm -f "$file"~"$1" + done +} + +usage() +{ + echo "Usage: rpatch patchname" + exit 1 +} + +if [ $# == 0 ] +then + usage +fi + +PATCH_NAME=$(stripit $1) + +warn_top_current +if is_applied "$PATCH_NAME" +then +# if can_remove "$PATCH_NAME" +# then + if [ ! -f $P/pc/$PATCH_NAME.pc ]; then + exit 1 + fi + do_remove "$PATCH_NAME" + kill_old_ones "$PATCH_NAME" + remove_from_db "$PATCH_NAME" +# else +# echo "$PATCH_NAME" does not remove cleanly +# exit 1 +# fi +else + echo "$PATCH_NAME" is not applied + exit 1 +fi + +top=$(top_patch) +if [ x"$top" == x ] +then + msg="no patches applied" +else + msg="now at $top" +fi + +echo Removed $PATCH_NAME, $msg + diff --git a/lustre/kernel_patches/scripts/split-patch b/lustre/kernel_patches/scripts/split-patch new file mode 100755 index 0000000..08ce431 --- /dev/null +++ b/lustre/kernel_patches/scripts/split-patch @@ -0,0 +1,29 @@ +#!/usr/bin/perl -w +$out = ""; +while (<>) { + next if (/^Only/); + next if (/^Binary/); + if (/^diff/ || /^Index/) { + if ($out) { + close OUT; + } + (@out) = split(' ', $_); + shift(@out) if (/^diff/); + $out = pop(@out); + $out =~ s:/*usr/:/:; + $out =~ s:/*src/:/:; + $out =~ s:^/*linux[^/]*::; + $out =~ s:\(w\)::; + next if ($out eq ""); + $out = "/var/tmp/patches/$out"; + $dir = $out; + $dir =~ s:/[^/]*$::; + print STDERR "$out\n"; + system("mkdir -p $dir"); + open(OUT, ">$out") || die("cannot open $out"); + } + if ($out) { + print OUT $_; + } +} + diff --git a/lustre/kernel_patches/scripts/sum-series b/lustre/kernel_patches/scripts/sum-series new file mode 100755 index 0000000..5b628fb --- /dev/null +++ b/lustre/kernel_patches/scripts/sum-series @@ -0,0 +1,41 @@ +#!/bin/sh + +# +# Make superpatch from current series using combinediff. +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: sum-series output-file" + exit 1 +} + +if [ $# -ne 1 ] +then + usage +fi + +need_file_there applied-patches +CURRENT=$(mktemp /tmp/cmbd-XXXXXXXX) +for FILE in $(cat applied-patches) +do +# echo "Adding patch $FILE...." + if [ -f $P/patches/$FILE ] + then + cat $P/patches/$FILE >> $CURRENT + elif [ -f $P/patches/$FILE.patch ] + then + cat $P/patches/$FILE.patch >> $CURRENT + elif [ -f $FILE ] + then + cat $FILE >> $CURRENT + fi +done + +mv $CURRENT "$1" diff --git a/lustre/kernel_patches/scripts/tag-series b/lustre/kernel_patches/scripts/tag-series new file mode 100755 index 0000000..17f3dfe --- /dev/null +++ b/lustre/kernel_patches/scripts/tag-series @@ -0,0 +1,41 @@ +#!/bin/sh + +# tag-series tagname series-file-name +# +# Does a `cvs tag tagname' of all the .pc, .txt and .patch files mentioned +# in series-file-name. Also tags series-file-name. +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +# tag_one tag patchname +# +tag_one() +{ + PN=$(stripit $2) + if [ -r $P/txt/$PN.txt ] + then + cvs tag $1 $P/pc/$PN.pc $P/patches/$PN.patch $P/txt/$PN.txt + else + cvs tag $1 $P/pc/$PN.pc $P/patches/$PN.patch + fi +} + +if [ $# -ne 2 ] +then + echo Usage: tag-series tagname series-file-name + exit 1 +fi + +TAG=$1 +SERIES=$2 + +for p in $(cat $SERIES) +do + tag_one $TAG $p +done +cvs tag $TAG $SERIES diff --git a/lustre/kernel_patches/scripts/toppatch b/lustre/kernel_patches/scripts/toppatch new file mode 100755 index 0000000..6df239d --- /dev/null +++ b/lustre/kernel_patches/scripts/toppatch @@ -0,0 +1,27 @@ +#!/bin/sh + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: toppatch" + exit 1 +} + +if [ $# != 0 ] +then + usage +fi + +if [ -e $DB ] +then + TOP_PATCH=$(top_patch) + if [ x$TOP_PATCH != x ] + then + echo $TOP_PATCH + fi +fi diff --git a/lustre/kernel_patches/scripts/touched-by-patch b/lustre/kernel_patches/scripts/touched-by-patch new file mode 100755 index 0000000..df5b387 --- /dev/null +++ b/lustre/kernel_patches/scripts/touched-by-patch @@ -0,0 +1,32 @@ +#!/bin/sh +# Extract names of new files from a patch, print them out + +PATCHFILE=$1 +case "$PATCHFILE" in +*.gz) CMD="gzip -d < $PATCHFILE";; +*) CMD="cat $PATCHFILE";; +esac + +TMP=$(mktemp /tmp/tbp-XXXXXX) || exit 1 +TMP2=$(mktemp /tmp/tbp2-XXXXXX) || exit 1 + +eval $CMD | egrep '^\+\+\+ |^\-\-\- ' > $TMP + +cat $TMP | sed -e 's@[^/]*/\([^ ]*\).*@\1@' \ + | grep -v '^dev\/null$' \ + | sort \ + | uniq \ + > $TMP2 + +rm -f $TMP +grep < $TMP2 '^[+][+][+]' > /dev/null +if [ "$?" = "0" ] +then + echo "WARNING: $PATCHFILE appears to be -p0 form rather than -p1." 1>&2 + echo " Use "\'"p0-2-p1 . . < $PATCHFILE"\'" to fix" 1>&2 + awk '{ print $2 }' < $TMP2 +else + cat $TMP2 +fi | grep -v '~' + +rm -f $TMP2 diff --git a/lustre/kernel_patches/scripts/trypatch b/lustre/kernel_patches/scripts/trypatch new file mode 100755 index 0000000..2e3cd15 --- /dev/null +++ b/lustre/kernel_patches/scripts/trypatch @@ -0,0 +1,72 @@ +#!/bin/sh + +# +# Fork the next patch in the series +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: trypatch <newname>" + exit 1 +} + +if [ $# -ne 1 ] +then + usage +fi + +NEW=$1 +BASE=`stripit $NEW` +SERIES=series + +if [ ! -e $SERIES ] +then + echo 'File "series" not found' + exit 1 +fi + +if grep $BASE $SERIES >& /dev/null ; then + echo "Patch $NEW already exists in series" + exit 1 +fi + +if [ ! -f $P/patches/$BASE.patch ] ; then + echo "Patch $NEW doesn't exist as a file" + exit 1 +fi + +$TMPSERIES=$(mktemp /tmp/series-XXXXXXXX) +top=$(toppatch) +if [ x"$top" == x ] +then + todo=$(head -1 $SERIES) +else + last_in_series=$(stripit $(tail -1 $SERIES)) + if [ $last_in_series == $top ] + then + echo "Series fully applied. Ends at $top" + exit 0 + fi + todo=$(grep -C1 "^$top\.patch" $SERIES | tail -1) + if [ x$todo = x ] + then + todo=$(head -1 $SERIES) + fi +fi + +if patch -p1 -i $P/patches/$BASE.patch ; then + patch -R -p1 -i $P/patches/$BASE.patch + + $basetodo=$(basename $todo) + sed "s/$todo/$BASE/" < $SERIES > $TMPSERIES + mv -f $TMPSERIES $SERIES + echo "Replaced $todo with $BASE" +else + echo "Failed to replace $todo with $BASE" +fi diff --git a/lustre/kernel_patches/scripts/unitdiff.py b/lustre/kernel_patches/scripts/unitdiff.py new file mode 100755 index 0000000..d19d5e7 --- /dev/null +++ b/lustre/kernel_patches/scripts/unitdiff.py @@ -0,0 +1,223 @@ +#!/usr/bin/python + +import sys +import re +import string + +#TODO +# clean up rest/file +# clean up +6 and like (assumptions). should be turned into 'find' +# make regession tests for all cases (Only in, etc) + +try: + filename = sys.argv[1] +except: + print 'requires a file name' + sys.exit(1) + +filefd = open(filename) +file = filefd.read() +filefd.close() + +rest = file +pat = "(^(?:diff .*\n)?--- .*\n\+\+\+ .*)?\n@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@|^(Only in .*)" +startpat = re.compile(pat, re.M) + +pos = 0 +oldpos = 0 +filelen = len(rest) +oldrest = "" +while(1): + rexp = startpat.search(rest) + if not rexp: + break + + if rexp.group(6): + print rexp.group(6) + rest = rest[rexp.end(6)+1:] + continue + + header = rexp.group(1) + orgfile_start = string.atoi(rexp.group(2)) + if rexp.group(3): + orgfile_len = string.atoi(rexp.group(3)) + else: + orgfile_len = -1 + newfile_start = string.atoi(rexp.group(4)) + if rexp.group(5): + newfile_len = string.atoi(rexp.group(5)) + else: + newfile_len = -1 + rest = rest[rexp.start(2):] + rest = rest[string.find(rest, "\n")+1:] + + rexp2 = startpat.search(rest) + if rexp2: + if rexp2.start(6) != -1: + oldrest = rest[rexp2.start(6)-1:] + rest = rest[:rexp2.start(6)] + elif rexp2.start(1) == -1: + oldrest = rest[rexp2.start(2)-5:] + rest = rest[:rexp2.start(2)-4] + else: + oldrest = rest[rexp2.start(1)-1:] + rest = rest[:rexp2.start(1)] + else: + oldrest = rest + +# pos = filelen - len(oldrest) +# if pos - oldpos > 100: +# sys.stderr.write(`pos`+'/'+`filelen`+'\n') +# oldpos = pos + + first = 1 + oldminuses = 0 + oldplusses = 0 + oldoffset = 0 + while(1): + #erstat early line stuff med lookbehind paa {1,2}-dims + #nedenfor RAA + linepat = "^([^-+\n]*)\n?(((^[-+].*\n)|^(.*\n){1,2}(?=^[-+].*\n))+)(.*)\n?" + compat = re.compile(linepat, re.M) + rexp = compat.search(rest) + if not rexp: + break + + prematch = rexp.group(1) + match = rexp.group(2) + muddle = len(match) + +# print rest +# print 'prematch ', rexp.start(1), rexp.end(1), prematch +# print 'match ---------' +# print match +# print 'match --------' + + # dump unwanted early lines... + if match[0] != "+" and match[0] != "-": + while(1): + next = string.find(match, '\n') + if next == -1: + break + if match[next+1] == "+" or match[next+1] == "-": + prematch = match[:next] + match = match[next+1:] + break + match = match[next+1:] + + +# print 'prematch ', rexp.start(1), rexp.end(1), len(prematch) +# print '('+prematch+')' +# if prematch == ' ': +# print 'space' + muddle = muddle - len(match) + + lines = string.count(match, "\n") + compat = re.compile("^-", re.M) + minuses = len(compat.findall(match)) + compat = re.compile("^\+", re.M) + plusses = len(compat.findall(match)) + orgsize = minuses + 2 + (lines - minuses - plusses) + newsize = plusses + 2 + (lines - minuses - plusses) + + noeol = "^(\\\ No newline at end of file)$" + compnoeol = re.compile(noeol, re.M) + if compnoeol.search(match) or compnoeol.search(rexp.group(6)): + orgsize = orgsize - 1 + newsize = newsize - 1 + + coherent = 0 + if lines - plusses == 0: + coherent = 1 + elif lines - minuses == 0: + coherent = 1 + + # RAA FIXME + if not len(prematch):#or len(prematch) == 1 and prematch == ' ': + orgsize = orgsize -1 + newsize = newsize -1 + if rexp.start(6) == rexp.end(6): + orgsize = orgsize -1 + newsize = newsize -1 + +# print "lines in match: ", lines +# print "number of minuses: ", minuses +# print "number of plusses: ", plusses + + matchpos = rexp.start(2) + muddle + offset = string.count(rest[:matchpos], "\n") + +# print 'offset/oldoffset: ', offset,oldoffset +# print 'oldplusses/oldminuses: ', oldplusses, oldminuses +# print 'orgfile_start/newfile_start: ', orgfile_start, newfile_start + + orgstart = orgfile_start + offset + oldoffset - oldplusses + newstart = newfile_start + offset - oldminuses + oldoffset + + # RAA: Bwadr. Fix antagelse om prematch paa en anden + # maade + orgstartmod = 0 + newstartmod = 0 + if orgfile_start == 1 and not len(prematch): + orgstartmod = 1 + if newfile_start == 1 and not len(prematch): + newstartmod = 1 + if orgfile_start == 0 and orgfile_len == 0: + orgstartmod = 1 + # RAA Hack! + plusses = plusses + 1 + minuses = minuses +1 + if newfile_start == 0 and newfile_len == 0: + newstartmod = 1 + # RAA Hack! + plusses = plusses + 1 + minuses = minuses +1 + + if header and first: + print header + first = 0 + + # should the start(1) == 0 be orgstart == 1? RAA + if orgstart == 1 and newstart == 1 and plusses == 0 and coherent: + print "@@ -"+`orgstart`+","+`orgsize`+" +"+`newstart`+" @@" + print match[:string.rfind(match, "\n")] + print rexp.group(6) + elif rexp.start(6) == rexp.end(6) and plusses == 0 and coherent: + if orgstartmod: + orgstart = orgstart + 1 + if newstartmod: + newstart = newstart + 1 + print "@@ -"+`orgstart-1`+","+`orgsize`+" +"+`newstart-1`+" @@" + print prematch + print match[:string.rfind(match, "\n")] + elif orgstart == 1 and orgstart == 1 and minuses == 0 and coherent: + print "@@ -"+`orgstart`+" +"+`newstart`+","+`newsize`+" @@" + print match[:string.rfind(match, "\n")] + print rexp.group(6) + elif rexp.start(6) == rexp.end(6) and minuses == 0 and coherent: + if orgstartmod: + orgstart = orgstart + 1 + if newstartmod: + newstart = newstart + 1 + print "@@ -"+`orgstart-1`+" +"+`newstart-1`+","+`newsize`+" @@" + print prematch + print match[:string.rfind(match, "\n")] + else: + if orgstartmod: + orgstart = orgstart + 1 + if newstartmod: + newstart = newstart + 1 + print "@@ -"+`orgstart-1`+","+`orgsize`+" +"+`newstart-1`+","+`newsize`+" @@" + if len(prematch): + print prematch + print match[:string.rfind(match, "\n")] + if rexp.start(6) != rexp.end(6): + print rexp.group(6) + + rest = rest[rexp.end(6):] + oldminuses = minuses + oldminuses + oldplusses = plusses + oldplusses + oldoffset = oldoffset + offset + lines #include match()-lines + + + rest = oldrest diff --git a/lustre/kernel_patches/scripts/unused-patches b/lustre/kernel_patches/scripts/unused-patches new file mode 100755 index 0000000..2f3a70a --- /dev/null +++ b/lustre/kernel_patches/scripts/unused-patches @@ -0,0 +1,39 @@ +#!/bin/sh + +# +# List unused patches +# + +. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ + echo "Impossible to find my library 'patchfns'." + echo "Check your install, or go to the right directory" + exit 1 +} + +usage() +{ + echo "Usage: unused-patches" + exit 1 +} + +if [ $# -ne 0 ] +then + usage +fi + +for FILE in $(ls $P/patches) +do + BASE=`stripit $FILE` +# echo checking $BASE in $P/patches + if grep $FILE $P/series/* >& /dev/null ; then + true +# echo $FILE found in $P/series + else + if [ $BASE != CVS ]; then + echo patches/$FILE + echo txt/$BASE.txt + echo pc/$BASE.pc + fi + fi +done + diff --git a/lustre/kernel_patches/series/2.6-fc3.series b/lustre/kernel_patches/series/2.6-fc3.series index b2090f0..6536dce 100644 --- a/lustre/kernel_patches/series/2.6-fc3.series +++ b/lustre/kernel_patches/series/2.6-fc3.series @@ -4,7 +4,6 @@ fc3_to_rhel4_updates.patch vfs_intent-2.6-fc3.patch vfs_nointent-2.6-rhel4.patch vfs_races-2.6-fc3.patch -ext3-wantedi-misc-2.6-suse.patch nfs-cifs-intent-2.6-fc3.patch iopen-misc-2.6-fc3.patch export-truncate-2.6-suse.patch @@ -22,3 +21,4 @@ uml-exprt-clearuser.patch fsprivate-2.6.patch linux-2.6.9-ext3-sub-second-timestamp.patch bitops_ext2_find_next_le_bit-2.6.patch +dynamic-locks-2.6-fc3.patch diff --git a/lustre/kernel_patches/series/2.6-fc5.series b/lustre/kernel_patches/series/2.6-fc5.series index 1835748..01bc69f 100644 --- a/lustre/kernel_patches/series/2.6-fc5.series +++ b/lustre/kernel_patches/series/2.6-fc5.series @@ -2,7 +2,6 @@ lustre_version.patch vfs_intent-2.6-fc5.patch vfs_nointent-2.6-fc5.patch vfs_races-2.6.12.patch -ext3-wantedi-misc-2.6-suse.patch jbd-2.6.10-jcberr.patch nfs-cifs-intent-2.6-fc5.patch iopen-misc-2.6.12.patch @@ -17,3 +16,4 @@ export-show_task-2.6-fc5.patch sd_iostats-2.6-rhel4.patch export_symbol_numa-2.6-fc5.patch vfs_intent-2.6-fc5-fix.patch +dynamic-locks-2.6.9.patch diff --git a/lustre/kernel_patches/series/2.6-rhel4-cmd3.series b/lustre/kernel_patches/series/2.6-rhel4-cmd3.series new file mode 100644 index 0000000..c34052e --- /dev/null +++ b/lustre/kernel_patches/series/2.6-rhel4-cmd3.series @@ -0,0 +1,42 @@ +lustre_version.patch +vfs_intent-2.6-rhel4.patch +vfs_races-2.6-rhel4.patch +iopen-misc-2.6-suse.patch +export-truncate-2.6-suse.patch +export_symbols-2.6-rhel4.patch +dev_read_only-2.6-suse.patch +export-log-2.6-rhel4.patch +lookup_bdev_init_intent.patch +remove-suid-2.6-suse.patch +export-show_task-2.6-vanilla.patch +sd_iostats-2.6-rhel4.patch +fsprivate-2.6.patch +export_symbol_numa.patch +qsnet-rhel4-2.6.patch +linux-2.6-binutils-2.16.patch +vm-tunables-rhel4.patch +tcp-rto_proc-2.6.9.patch +2.6-rhel4-kgdb-ga.patch +iallocsem_consistency.patch +raid5-stats.patch +raid5-configurable-cachesize.patch +raid5-large-io.patch +raid5-stripe-by-stripe-handling.patch +raid5-merge-ios.patch +raid5-serialize-ovelapping-reqs.patch +raid5-zerocopy.patch +jbd-stats-2.6.9.patch +bitops_ext2_find_next_le_bit-2.6.patch +quota-deadlock-on-pagelock-core.patch +quota-umount-race-fix.patch +quota-deadlock-on-pagelock-ext3.patch +inode-nr_unused-2.6.9-rhel4.patch +proc-sleep-2.6.9.patch +dynamic-locks-2.6.9.patch +jbd-checkpoint-on-commit.patch +jbd-copy-out-everything.patch +export-nr_free_buffer_pages.patch +__find_get_block_slow-scale.patch +debugging-fields-in-current.patch +increase-BH_LRU_SIZE.patch +ipoib_tcpdump.patch diff --git a/lustre/kernel_patches/series/2.6-rhel4-titech.series b/lustre/kernel_patches/series/2.6-rhel4-titech.series index 6447260..029abb9 100644 --- a/lustre/kernel_patches/series/2.6-rhel4-titech.series +++ b/lustre/kernel_patches/series/2.6-rhel4-titech.series @@ -1,10 +1,7 @@ lustre_version.patch vfs_intent-2.6-rhel4.patch -vfs_nointent-2.6-rhel4.patch vfs_races-2.6-rhel4.patch -nfs-cifs-intent-2.6-rhel4.patch iopen-misc-2.6-suse.patch -export-truncate-2.6-suse.patch export_symbols-2.6-rhel4.patch dev_read_only-2.6-suse.patch export-log-2.6-rhel4.patch @@ -17,9 +14,9 @@ fsprivate-2.6.patch export_symbol_numa.patch qsnet-rhel4-2.6.patch linux-2.6-binutils-2.16.patch +compile-fixes-2.6.9-rhel4-22.patch vm-tunables-rhel4.patch tcp-rto_proc-2.6.9.patch -iallocsem_consistency.patch raid5-stats.patch raid5-configurable-cachesize.patch raid5-large-io.patch @@ -28,7 +25,7 @@ raid5-optimize-memcpy.patch raid5-merge-ios.patch raid5-serialize-ovelapping-reqs.patch jbd-stats-2.6.9.patch -bitops_ext2_find_next_le_bit-2.6.patch +bitops_ext2_find_next_le_bit-2.6.patch quota-deadlock-on-pagelock-core.patch quota-umount-race-fix.patch quota-deadlock-on-pagelock-ext3.patch diff --git a/lustre/kernel_patches/series/2.6-rhel4.series b/lustre/kernel_patches/series/2.6-rhel4.series index 683de88..e1b835a 100644 --- a/lustre/kernel_patches/series/2.6-rhel4.series +++ b/lustre/kernel_patches/series/2.6-rhel4.series @@ -1,34 +1,32 @@ lustre_version.patch vfs_intent-2.6-rhel4.patch -vfs_nointent-2.6-rhel4.patch vfs_races-2.6-rhel4.patch -nfs-cifs-intent-2.6-rhel4.patch iopen-misc-2.6-suse.patch -export-truncate-2.6-suse.patch export_symbols-2.6-rhel4.patch dev_read_only-2.6-suse.patch -export-log-2.6-rhel4.patch +export-log-2.6-rhel4.patch lookup_bdev_init_intent.patch remove-suid-2.6-suse.patch export-show_task-2.6-vanilla.patch sd_iostats-2.6-rhel4.patch -blkdev_tunables-2.6-suse.patch fsprivate-2.6.patch export_symbol_numa.patch qsnet-rhel4-2.6.patch linux-2.6-binutils-2.16.patch vm-tunables-rhel4.patch tcp-rto_proc-2.6.9.patch -iallocsem_consistency.patch raid5-stats.patch raid5-configurable-cachesize.patch raid5-large-io.patch raid5-stripe-by-stripe-handling.patch raid5-merge-ios.patch raid5-serialize-ovelapping-reqs.patch +raid5-zerocopy.patch jbd-stats-2.6.9.patch bitops_ext2_find_next_le_bit-2.6.patch quota-deadlock-on-pagelock-core.patch quota-umount-race-fix.patch quota-deadlock-on-pagelock-ext3.patch inode-nr_unused-2.6.9-rhel4.patch +dynamic-locks-2.6.9.patch +export-nr_free_buffer_pages.patch diff --git a/lustre/kernel_patches/series/2.6-sles10.series b/lustre/kernel_patches/series/2.6-sles10.series index 5e1cd88..378f7e1 100644 --- a/lustre/kernel_patches/series/2.6-sles10.series +++ b/lustre/kernel_patches/series/2.6-sles10.series @@ -1,9 +1,8 @@ lustre_version.patch vfs_intent-2.6-sles10.patch -vfs_nointent-2.6-sles10.patch vfs_races-2.6.18-vanilla.patch +ext3-wantedi-misc-2.6-suse.patch jbd-2.6.10-jcberr.patch -nfs-cifs-intent-2.6-fc5.patch iopen-misc-2.6.12.patch export-truncate-2.6-suse.patch export_symbols-2.6.12.patch @@ -14,3 +13,5 @@ remove-suid-2.6-suse.patch export-show_task-2.6-fc5.patch sd_iostats-2.6-rhel4.patch export_symbol_numa-2.6-fc5.patch +blkdev_tunables-2.6-sles10.patch + diff --git a/lustre/kernel_patches/series/2.6-suse-newer.series b/lustre/kernel_patches/series/2.6-suse-newer.series index c284949..1e56ba5 100644 --- a/lustre/kernel_patches/series/2.6-suse-newer.series +++ b/lustre/kernel_patches/series/2.6-suse-newer.series @@ -1,7 +1,7 @@ lustre-version-revert_suse.patch lustre_version.patch dev_read_only-2.6-lnxi.patch -sd_iostats-2.6-suse.patch +sd_iostats-2.6-rhel4.patch blkdev_tunables-2.6-suse.patch uml-exprt-clearuser.patch qsnet-suse-2.6.patch diff --git a/lustre/kernel_patches/series/2.6-suse.series b/lustre/kernel_patches/series/2.6-suse.series index 8bc1f6e..7a39b32 100644 --- a/lustre/kernel_patches/series/2.6-suse.series +++ b/lustre/kernel_patches/series/2.6-suse.series @@ -2,6 +2,7 @@ lustre_version.patch vfs_intent-2.6-suse.patch vfs_nointent-2.6-suse.patch vfs_races-2.6-suse.patch +ext3-wantedi-misc-2.6-suse.patch nfs-cifs-intent-2.6-suse.patch iopen-misc-2.6-suse.patch export-truncate-2.6-suse.patch diff --git a/lustre/kernel_patches/series/2.6.12-vanilla.series b/lustre/kernel_patches/series/2.6.12-vanilla.series index 05bf944..c164ffb 100644 --- a/lustre/kernel_patches/series/2.6.12-vanilla.series +++ b/lustre/kernel_patches/series/2.6.12-vanilla.series @@ -1,9 +1,8 @@ lustre_version.patch vfs_intent-2.6.12.patch -vfs_nointent-2.6.12.patch vfs_races-2.6.12.patch +ext3-wantedi-misc-2.6-suse.patch jbd-2.6.10-jcberr.patch -nfs-cifs-intent-2.6.12.patch iopen-misc-2.6.12.patch export-truncate-2.6-suse.patch export_symbols-2.6.12.patch diff --git a/lustre/kernel_patches/series/2.6.18-vanilla.series b/lustre/kernel_patches/series/2.6.18-vanilla.series index 05b76c4..a973855 100644 --- a/lustre/kernel_patches/series/2.6.18-vanilla.series +++ b/lustre/kernel_patches/series/2.6.18-vanilla.series @@ -1,17 +1,13 @@ lustre_version.patch -vfs_intent-2.6.18-vanilla.patch -vfs_nointent-2.6.18-vanilla.patch vfs_races-2.6.18-vanilla.patch +i_filter_data.patch jbd-jcberr-2.6.18-vanilla.patch iopen-misc-2.6.18-vanilla.patch export-truncate-2.6.18-vanilla.patch export_symbols-2.6.18-vanilla.patch dev_read_only-2.6.18-vanilla.patch export-2.6.18-vanilla.patch -lookup_bdev_init_intent-2.6.18-vanilla.patch 8kstack-2.6.12.patch -remove-suid-2.6-suse.patch export-show_task-2.6.18-vanilla.patch sd_iostats-2.6-rhel4.patch -export_symbol_numa-2.6-fc5.patch -export-do_kern_mount.patch +export_symbol_numa-2.6.18.patch diff --git a/lustre/kernel_patches/series/hp-pnnl-2.4.20 b/lustre/kernel_patches/series/hp-pnnl-2.4.20 deleted file mode 100644 index 11a7c93..0000000 --- a/lustre/kernel_patches/series/hp-pnnl-2.4.20 +++ /dev/null @@ -1,49 +0,0 @@ -configurable-x86-stack-2.4.20.patch -dev_read_only_2.4.20-rh.patch -exports_2.4.20-rh-hp.patch -lustre_version.patch -vfs_intent-2.4.20-hp.patch -invalidate_show-2.4.20-hp.patch -export-truncate.patch -iod-stock-24-exports_hp.patch -ext-2.4-patch-1.patch -ext-2.4-patch-2.patch -ext-2.4-patch-3.patch -ext-2.4-patch-4.patch -linux-2.4.20-xattr-0.8.54-hp.patch -ext3-2.4.20-fixes.patch -ext3-2.4-ino_t.patch -ext3-largefile.patch -ext3-truncate_blocks.patch -ext3-use-after-free.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.20-hp.patch -ext3-noread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -add_page_private.patch -socket-exports-vanilla.patch -removepage-2.4.20.patch -jbd-ctx_switch.patch -jbd-flushtime.patch -jbd-get_write_access.patch -nfs_export_kernel-2.4.20-hp.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.20.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -ext3-xattr-ptr-arith-fix.patch -procfs-ndynamic-2.4.patch -ext3-truncate-buffer-head.patch -inode-max-readahead-2.4.24.patch -dcache_refcount_debug.patch -ext3-extents-2.4.24.patch -ext3-extents-asyncdel-2.4.24.patch -ext3-nlinks-2.4.20-hp_pnnl.patch -export-zap-page-range.patch -ext3-sector_t-overflow-2.4.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-fc3.series b/lustre/kernel_patches/series/ldiskfs-2.6-fc3.series deleted file mode 100644 index 3661023..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6-fc3.series +++ /dev/null @@ -1,13 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-rhel4.patch -export_symbols-ext3-2.6-suse.patch -ext3-map_inode_page-2.6-suse.patch -ext3-ea-in-inode-2.6-rhel4.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.9-rhel4.patch -ext3-mballoc2-2.6.9-rhel4.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-lookup-dotdot-2.6.9.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-fc5.series b/lustre/kernel_patches/series/ldiskfs-2.6-fc5.series deleted file mode 100644 index 1c853bd..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6-fc5.series +++ /dev/null @@ -1,12 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-fc5.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.15.patch -ext3-mballoc2-2.6-fc5.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-filterdata-2.6.15.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series b/lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series deleted file mode 100644 index ee07d11..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series +++ /dev/null @@ -1,17 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-rhel4.patch -export_symbols-ext3-2.6-suse.patch -ext3-map_inode_page-2.6-suse.patch -ext3-ea-in-inode-2.6-rhel4.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.9-rhel4.patch -ext3-mballoc2-2.6.9-rhel4.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-lookup-dotdot-2.6.9.patch -ext3-sector_t-overflow-2.6.9-rhel4.patch -ext3-check-jbd-errors-2.6.9.patch -ext3-nanosecond-2.6-rhel4.patch -ext3-extents-bug11324.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-sles10.series b/lustre/kernel_patches/series/ldiskfs-2.6-sles10.series deleted file mode 100644 index 11f62b0..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6-sles10.series +++ /dev/null @@ -1,14 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-fc5.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.16-sles10.patch -ext3-mballoc2-2.6-fc5.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-filterdata-2.6.15.patch -ext3-disable-write-bar-by-default-2.6-sles10.patch -ext3-nanosecond-2.6-sles10.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-suse.series b/lustre/kernel_patches/series/ldiskfs-2.6-suse.series deleted file mode 100644 index e27e861..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6-suse.series +++ /dev/null @@ -1,17 +0,0 @@ -ext3-wantedi-2.6-suse.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-suse.patch -export_symbols-ext3-2.6-suse.patch -ext3-map_inode_page-2.6-suse.patch -ext3-ea-in-inode-2.6-suse.patch -export-ext3-2.6-suse.patch -ext3-include-fixes-2.6-suse.patch -ext3-extents-2.6.5.patch -ext3-mballoc2-2.6-suse.patch -ext3-nlinks-2.6.7.patch -ext3-rename-reserve-2.6-suse.patch -ext3-ialloc-2.6.patch -ext3-lookup-dotdot-2.6.9.patch -ext3-sector_t-overflow-2.6.5-suse.patch -ext3-check-jbd-errors-2.6.5.patch -ext3-nanosecond-2.6-suse.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series b/lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series deleted file mode 100644 index 53c060b..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series +++ /dev/null @@ -1,15 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6.12.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.12.patch -ext3-mballoc2-2.6.12.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-htree-dot-2.6.patch -ext3-external-journal-2.6.12.patch -ext3-lookup-dotdot-2.6.9.patch -ext3-sector_t-overflow-2.6.12.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series b/lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series deleted file mode 100644 index d5a8733..0000000 --- a/lustre/kernel_patches/series/ldiskfs-2.6.18-vanilla.series +++ /dev/null @@ -1,13 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-fc5.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.18-vanilla.patch -ext3-mballoc2-2.6.18-vanilla.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-filterdata-2.6.15.patch -ext3-nanosecond-2.6.18-vanilla.patch diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6-fc3.series b/lustre/kernel_patches/series/ldiskfs2-2.6-fc3.series deleted file mode 100644 index 3661023..0000000 --- a/lustre/kernel_patches/series/ldiskfs2-2.6-fc3.series +++ /dev/null @@ -1,13 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-rhel4.patch -export_symbols-ext3-2.6-suse.patch -ext3-map_inode_page-2.6-suse.patch -ext3-ea-in-inode-2.6-rhel4.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.9-rhel4.patch -ext3-mballoc2-2.6.9-rhel4.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-lookup-dotdot-2.6.9.patch diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6-fc5.series b/lustre/kernel_patches/series/ldiskfs2-2.6-fc5.series deleted file mode 100644 index 1c853bd..0000000 --- a/lustre/kernel_patches/series/ldiskfs2-2.6-fc5.series +++ /dev/null @@ -1,12 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-fc5.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.15.patch -ext3-mballoc2-2.6-fc5.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-filterdata-2.6.15.patch diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6-rhel4.series b/lustre/kernel_patches/series/ldiskfs2-2.6-rhel4.series deleted file mode 100644 index 27aed1d..0000000 --- a/lustre/kernel_patches/series/ldiskfs2-2.6-rhel4.series +++ /dev/null @@ -1,16 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-rhel4.patch -export_symbols-ext3-2.6-suse.patch -ext3-map_inode_page-2.6-suse.patch -ext3-ea-in-inode-2.6-rhel4.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.9-rhel4.patch -ext3-mballoc2-2.6.9-rhel4.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-lookup-dotdot-2.6.9.patch -ext3-sector_t-overflow-2.6.9-rhel4.patch -ext3-check-jbd-errors-2.6.9.patch -ext3-nanosecond-2.6-rhel4.patch diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series b/lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series deleted file mode 100644 index 11f62b0..0000000 --- a/lustre/kernel_patches/series/ldiskfs2-2.6-sles10.series +++ /dev/null @@ -1,14 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-fc5.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.16-sles10.patch -ext3-mballoc2-2.6-fc5.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-filterdata-2.6.15.patch -ext3-disable-write-bar-by-default-2.6-sles10.patch -ext3-nanosecond-2.6-sles10.patch diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6-suse.series b/lustre/kernel_patches/series/ldiskfs2-2.6-suse.series deleted file mode 100644 index e27e861..0000000 --- a/lustre/kernel_patches/series/ldiskfs2-2.6-suse.series +++ /dev/null @@ -1,17 +0,0 @@ -ext3-wantedi-2.6-suse.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-suse.patch -export_symbols-ext3-2.6-suse.patch -ext3-map_inode_page-2.6-suse.patch -ext3-ea-in-inode-2.6-suse.patch -export-ext3-2.6-suse.patch -ext3-include-fixes-2.6-suse.patch -ext3-extents-2.6.5.patch -ext3-mballoc2-2.6-suse.patch -ext3-nlinks-2.6.7.patch -ext3-rename-reserve-2.6-suse.patch -ext3-ialloc-2.6.patch -ext3-lookup-dotdot-2.6.9.patch -ext3-sector_t-overflow-2.6.5-suse.patch -ext3-check-jbd-errors-2.6.5.patch -ext3-nanosecond-2.6-suse.patch diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6.12-vanilla.series b/lustre/kernel_patches/series/ldiskfs2-2.6.12-vanilla.series deleted file mode 100644 index 53c060b..0000000 --- a/lustre/kernel_patches/series/ldiskfs2-2.6.12-vanilla.series +++ /dev/null @@ -1,15 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6.12.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.12.patch -ext3-mballoc2-2.6.12.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-htree-dot-2.6.patch -ext3-external-journal-2.6.12.patch -ext3-lookup-dotdot-2.6.9.patch -ext3-sector_t-overflow-2.6.12.patch diff --git a/lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series b/lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series deleted file mode 100644 index d5a8733..0000000 --- a/lustre/kernel_patches/series/ldiskfs2-2.6.18-vanilla.series +++ /dev/null @@ -1,13 +0,0 @@ -ext3-wantedi-2.6-rhel4.patch -ext3-san-jdike-2.6-suse.patch -iopen-2.6-fc5.patch -ext3-map_inode_page-2.6-suse.patch -export-ext3-2.6-rhel4.patch -ext3-include-fixes-2.6-rhel4.patch -ext3-extents-2.6.18-vanilla.patch -ext3-mballoc2-2.6.18-vanilla.patch -ext3-nlinks-2.6.9.patch -ext3-ialloc-2.6.patch -ext3-remove-cond_resched-calls-2.6.12.patch -ext3-filterdata-2.6.15.patch -ext3-nanosecond-2.6.18-vanilla.patch diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 deleted file mode 100644 index 981707d..0000000 --- a/lustre/kernel_patches/series/rhel-2.4.21 +++ /dev/null @@ -1,54 +0,0 @@ -configurable-x86-stack-2.4.21-chaos.patch -dev_read_only_2.4.21-chaos.patch -exports_2.4.19-suse.patch -lustre_version.patch -vfs_intent-2.4.21-rhel.patch -invalidate_show-2.4.20-rh.patch -iod-rmap-exports-2.4.21-chaos.patch -export-truncate.patch -ext3-htree-2.4.21-rhel.patch -linux-2.4.21-xattr-0.8.54-chaos.patch -ext3-ino_sb_macro-2.4.21-chaos.patch -ext3-orphan_lock-2.4.22-rh.patch -ext3-delete_thread-2.4.21-chaos.patch -extN-misc-fixup.patch -ext3-noread-2.4.21-chaos.patch -extN-wantedi-2.4.21-chaos.patch -ext3-san-2.4.20.patch -extN-2.4.18-ino_sb_fixup.patch -ext3-map_inode_page_2.4.18.patch -ext3-error-export.patch -iopen-2.4.21-chaos.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks-rhel3.patch -ext3-o_direct-2.4.21-chaos.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -ext3-raw-lookup.patch -nfs_export_kernel-2.4.21-chaos.patch -ext3-ea-in-inode-2.4.21-chaos.patch -listman-2.4.21-chaos.patch -pagecache-lock-2.4.21-chaos.patch -ext3-truncate-buffer-head.patch -inode-max-readahead-2.4.24.patch -dcache_refcount_debug.patch -ext3-extents-2.4.21-chaos.patch -ext3-extents-asyncdel-2.4.21-chaos.patch -blkdev_tunables-2.4.21-chaos.patch -ext3-nlinks-2.4.21-chaos.patch -sd_iostats-2.4.21-chaos.patch -llnl-frame-pointer-walk-2.4.21-rhel.patch -llnl-frame-pointer-walk-fix-2.4.21-rhel.patch -export-show_task-2.4-rhel.patch -compile-fixes-2.4.21-rhel.patch -grab_cache_page_nowait_gfp-rh-2.4.patch -remove-suid-2.4-rhel.patch -qsnet-rhel-2.4.patch -nfs_statfs-toomanyfiles-rhel-2.4.patch -statfs64-cast-unsigned-2.4-rhel.patch -fsprivate-2.4.patch -nfsd_iallocsem.patch -linux-2.4.24-jbd-handle-EIO-rhel3.patch -ext3-lookup-dotdot-2.4.20.patch -ext3-sector_t-overflow-2.4.patch -nfs_export_kernel-getattr_on_lookup-2.4.patch diff --git a/lustre/kernel_patches/series/suse-2.4.21-cray b/lustre/kernel_patches/series/suse-2.4.21-cray deleted file mode 100644 index b3b06ff..0000000 --- a/lustre/kernel_patches/series/suse-2.4.21-cray +++ /dev/null @@ -1,43 +0,0 @@ -configurable-x86-stack-2.4.21-suse2.patch -configurable-x86_64-2.4.21.patch -dev_read_only_2.4.20-rh.patch -exports_2.4.20-rh-hp.patch -lustre_version.patch -vfs_intent-2.4.21-suse2.patch -invalidate_show.patch -export-truncate.patch -iod-stock-24-exports_hp.patch -ext3-htree-2.4.21-chaos.patch -linux-2.4.21-xattr-0.8.54-suse2.patch -ext3-orphan_lock-2.4.22-rh.patch -ext3-noread-2.4.21-suse2.patch -ext3-delete_thread-2.4.21-chaos.patch -extN-wantedi-2.4.21-suse2.patch -ext3-san-2.4.20.patch -ext3-map_inode_page-2.4.21-suse2.patch -ext3-error-export.patch -iopen-2.4.21-chaos.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -nfs_export_kernel-2.4.21-suse2.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.21-suse2.patch -listman-2.4.20.patch -ext3-xattr-ptr-arith-fix.patch -procfs-ndynamic-2.4.21-suse2.patch -ext3-truncate-buffer-head.patch -loop-sync-2.4.21-suse.patch -inode-max-readahead-2.4.24.patch -ext3-extents-2.4.21-suse2.patch -ext3-extents-asyncdel-2.4.24.patch -ext3-nlinks-2.4.21-chaos.patch -export-show_task-2.4-cray.patch -grab_cache_page_nowait_gfp-2.4.21-suse2.patch -remove-suid-2.4-rhel.patch -fsprivate-2.4-suse.patch -nfsd_iallocsem.patch -linux-2.4.24-jbd-handle-EIO.patch -ext3-ialloc-2.4.21-suse2.patch -ext3-sector_t-overflow-2.4.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.24 b/lustre/kernel_patches/series/vanilla-2.4.24 deleted file mode 100644 index 02b1603..0000000 --- a/lustre/kernel_patches/series/vanilla-2.4.24 +++ /dev/null @@ -1,50 +0,0 @@ -uml-patch-2.4.24-1.patch -uml-2.4.20-do_mmap_pgoff-fix.patch -uml-export-end_iomem.patch -configurable-x86-stack-2.4.20.patch -configurable-x86_64-2.4.21.patch -dev_read_only_2.4.20-rh.patch -exports_2.4.20-rh-hp.patch -lustre_version.patch -vfs_intent-2.4.20-vanilla.patch -invalidate_show.patch -export-truncate.patch -iod-stock-exports-2.4.22.patch -ext3-htree-2.4.22-rh.patch -linux-2.4.24-xattr-0.8.54.patch -ext3-orphan_lock-2.4.22-rh.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-2.4.24.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -nfs_export_kernel-2.4.22.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.22-rh.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -ext3-xattr-ptr-arith-fix.patch -3.5G-address-space-2.4.22-vanilla.patch -procfs-ndynamic-2.4.patch -ext3-truncate-buffer-head.patch -inode-max-readahead-2.4.24.patch -ext3-extents-2.4.24.patch -ext3-extents-asyncdel-2.4.24.patch -export_num_siblings.patch -ext3-nlinks-2.4.24.patch -export-show_task-2.4-vanilla.patch -export-zap-page-range.patch -uml-sigusr1-2.4-vanilla.patch -remove-suid-2.4-rhel.patch -uml-exprt-clearuser.patch -fsprivate-2.4.patch -nfsd_iallocsem.patch -linux-2.4.24-jbd-handle-EIO.patch -ext3-sector_t-overflow-2.4.patch -nfs_export_kernel-getattr_on_lookup-2.4.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.29 b/lustre/kernel_patches/series/vanilla-2.4.29 deleted file mode 100644 index 88983ad..0000000 --- a/lustre/kernel_patches/series/vanilla-2.4.29 +++ /dev/null @@ -1,45 +0,0 @@ -configurable-x86-stack-2.4.20.patch -configurable-x86_64-2.4.21.patch -dev_read_only_2.4.20-rh.patch -exports_2.4.20-rh-hp.patch -lustre_version.patch -vfs_intent-2.4.29-vanilla.patch -invalidate_show-2.4.29.patch -export-truncate.patch -iod-stock-exports-2.4.22.patch -ext3-htree-2.4.29.patch -linux-2.4.29-xattr-0.8.54.patch -ext3-orphan_lock-2.4.22-rh.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-2.4.29.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -nfs_export_kernel-2.4.29.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.29.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -ext3-xattr-ptr-arith-fix.patch -3.5G-address-space-2.4.22-vanilla.patch -procfs-ndynamic-2.4.patch -ext3-truncate-buffer-head.patch -inode-max-readahead-2.4.24.patch -ext3-extents-2.4.29.patch -ext3-extents-asyncdel-2.4.24.patch -ext3-nlinks-2.4.24.patch -ext3-ialloc-2.4.24.patch -export-show_task-2.4-vanilla.patch -export-zap-page-range.patch -remove-suid-2.4-rhel.patch -kallsyms-2.4.29.patch -fsprivate-2.4.patch -nfsd_iallocsem.patch -linux-2.4.24-jbd-handle-EIO.patch -ext3-sector_t-overflow-2.4.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.29-uml b/lustre/kernel_patches/series/vanilla-2.4.29-uml deleted file mode 100644 index 9740148..0000000 --- a/lustre/kernel_patches/series/vanilla-2.4.29-uml +++ /dev/null @@ -1,47 +0,0 @@ -uml-patch-2.4.29-1.patch -uml-2.4.20-do_mmap_pgoff-fix.patch -uml-export-end_iomem.patch -uml-exprt-clearuser.patch -configurable-x86-stack-2.4.20.patch -configurable-x86_64-2.4.21.patch -dev_read_only_2.4.20-rh.patch -exports_2.4.20-rh-hp.patch -lustre_version.patch -vfs_intent-2.4.29-vanilla.patch -invalidate_show-2.4.29.patch -export-truncate.patch -iod-stock-exports-2.4.22.patch -ext3-htree-2.4.29.patch -linux-2.4.29-xattr-0.8.54.patch -ext3-orphan_lock-2.4.22-rh.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-2.4.29.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -ext3-map_inode_page.patch -ext3-error-export.patch -iopen-2.4.20.patch -jbd-dont-account-blocks-twice.patch -jbd-commit-tricks.patch -ext3-no-write-super-chaos.patch -add_page_private.patch -nfs_export_kernel-2.4.29.patch -ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.29.patch -listman-2.4.20.patch -ext3-trusted_ea-2.4.20.patch -ext3-xattr-ptr-arith-fix.patch -3.5G-address-space-2.4.22-vanilla.patch -procfs-ndynamic-2.4.patch -ext3-truncate-buffer-head.patch -inode-max-readahead-2.4.24.patch -ext3-extents-2.4.29.patch -ext3-extents-asyncdel-2.4.24.patch -ext3-nlinks-2.4.24.patch -export-show_task-2.4-vanilla.patch -export-zap-page-range.patch -remove-suid-2.4-rhel.patch -kallsyms-2.4.29.patch -fsprivate-2.4.patch -nfsd_iallocsem.patch -ext3-sector_t-overflow-2.4.patch diff --git a/lustre/kernel_patches/targets/2.6-rhel4.target.in b/lustre/kernel_patches/targets/2.6-rhel4.target.in index 0133814..e744d92 100644 --- a/lustre/kernel_patches/targets/2.6-rhel4.target.in +++ b/lustre/kernel_patches/targets/2.6-rhel4.target.in @@ -1,5 +1,5 @@ lnxmaj="2.6.9" -lnxrel="42.0.10.EL" +lnxrel="55.EL" KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2 SERIES=2.6-rhel4.series diff --git a/lustre/kernel_patches/targets/2.6-sles10.target.in b/lustre/kernel_patches/targets/2.6-sles10.target.in index cff6a5f..4dc2ba4 100644 --- a/lustre/kernel_patches/targets/2.6-sles10.target.in +++ b/lustre/kernel_patches/targets/2.6-sles10.target.in @@ -1,5 +1,5 @@ lnxmaj="2.6.16" -lnxrel="27-0.9" +lnxrel="46-0.14" KERNEL=linux-$lnxmaj.$lnxrel.tar.bz2 SERIES=2.6-sles10.series diff --git a/lustre/kernel_patches/targets/2.6-suse.target.in b/lustre/kernel_patches/targets/2.6-suse.target.in index 757ed4d..22a8b5c 100644 --- a/lustre/kernel_patches/targets/2.6-suse.target.in +++ b/lustre/kernel_patches/targets/2.6-suse.target.in @@ -1,5 +1,5 @@ lnxmaj="2.6.5" -lnxrel="7.283" +lnxrel="7.276" KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2 # they include our patches diff --git a/lustre/kernel_patches/targets/hp_pnnl-2.4.target.in b/lustre/kernel_patches/targets/hp_pnnl-2.4.target.in deleted file mode 100644 index 28c1be3..0000000 --- a/lustre/kernel_patches/targets/hp_pnnl-2.4.target.in +++ /dev/null @@ -1,17 +0,0 @@ -lnxmaj=2.4.20 -lnxrel=hp_pnnl - -KERNEL=linux-$lnxmaj-$lnxrel.tar.gz -SERIES=hp-pnnl-2.4.20 -VERSION=$lnxmaj -EXTRA_VERSION=$lnxrel_lustre.@VERSION@ -LUSTRE_VERSION=@VERSION@ -RHBUILD=0 - -BASE_ARCHS="ia64" -BIGMEM_ARCHS="" -BOOT_ARCHS="" -JENSEN_ARCHS="" -SMP_ARCHS="ia64" -UP_ARCHS="" -SRC_ARCHS="ia64" diff --git a/lustre/kernel_patches/targets/rh-2.4.target.in b/lustre/kernel_patches/targets/rh-2.4.target.in deleted file mode 100644 index d27ed40..0000000 --- a/lustre/kernel_patches/targets/rh-2.4.target.in +++ /dev/null @@ -1,24 +0,0 @@ -lnxmaj="2.4.20" -lnxrel="31.9" - -KERNEL=linux-${lnxmaj}-${lnxrel}.tar.gz -SERIES=rh-2.4.20 -VERSION=$lnxmaj -EXTRA_VERSION=${lnxrel}_lustre.@VERSION@ -LUSTRE_VERSION=@VERSION@ -RHBUILD=1 - -BASE_ARCHS="i686" -BIGMEM_ARCHS="" -BOOT_ARCHS="" -JENSEN_ARCHS="" -SMP_ARCHS="i686" -UP_ARCHS="" - -# the modules in this kernel do not build with gcc 3 -for cc in i386-redhat-linux-gcc-2.96 gcc296 gcc ; do - if which $cc >/dev/null 2>/dev/null ; then - CC=$cc - break - fi -done diff --git a/lustre/kernel_patches/targets/rhel-2.4.target.in b/lustre/kernel_patches/targets/rhel-2.4.target.in deleted file mode 100644 index cea2b02..0000000 --- a/lustre/kernel_patches/targets/rhel-2.4.target.in +++ /dev/null @@ -1,24 +0,0 @@ -lnxmaj="2.4.21" -lnxrel="47.0.1.EL" - -KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2 -SERIES=rhel-2.4.21 -VERSION=${lnxmaj} -LUSTRE_VERSION=@VERSION@ -EXTRA_VERSION=${lnxrel}_lustre.@VERSION@ -RHBUILD=1 - -BASE_ARCHS="i686 x86_64 ia64" -BIGMEM_ARCHS="" -BOOT_ARCHS="" -JENSEN_ARCHS="" -SMP_ARCHS="i686 x86_64 ia64" -UP_ARCHS="" - -# the modules in this kernel do not build with gcc 4, 3.4, or 2.96 -for cc in gcc32 gcc33 ; do - if which $cc >/dev/null 2>/dev/null ; then - export CC=$cc - break - fi -done diff --git a/lustre/kernel_patches/targets/sles-2.4.target.in b/lustre/kernel_patches/targets/sles-2.4.target.in deleted file mode 100644 index badeaeb..0000000 --- a/lustre/kernel_patches/targets/sles-2.4.target.in +++ /dev/null @@ -1,26 +0,0 @@ -lnxmaj="2.4.21" -lnxrel="273" - -KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2 -SERIES=suse-2.4.21-jvn -VERSION=${lnxmaj} -EXTRA_VERSION=${lnxrel}_lustre.@VERSION@ -LUSTRE_VERSION=@VERSION@ -RHBUILD=0 -LINUX26=0 -SUSEBUILD=1 - -BASE_ARCHS="i686" -BIGMEM_ARCHS="" -BOOT_ARCHS="" -JENSEN_ARCHS="" -SMP_ARCHS="i686" -UP_ARCHS="" - -# the modules in this kernel do not build with gcc 4 or 2.96 -for cc in gcc32 gcc33 ; do - if which $cc >/dev/null 2>/dev/null ; then - export CC=$cc - break - fi -done diff --git a/lustre/kernel_patches/targets/suse-2.4.21-2.target.in b/lustre/kernel_patches/targets/suse-2.4.21-2.target.in deleted file mode 100644 index d00ca78..0000000 --- a/lustre/kernel_patches/targets/suse-2.4.21-2.target.in +++ /dev/null @@ -1,15 +0,0 @@ -KERNEL=linux-2.4.21-x86_64.tar.gz -SERIES=suse-2.4.21-2 -VERSION=2.4.21 -EXTRA_VERSION=lustre.@VERSION@ -LUSTRE_VERSION=@VERSION@ -RHBUILD=0 - -BASE_ARCHS="x86_64" -BIGMEM_ARCHS="" -BOOT_ARCH="" -JENSEN_ARCHS="" -SMP_ARCHS="" -UP_ARCHS="x86_64" - -ARCH="x86_64" diff --git a/lustre/kernel_patches/txt/dev_read_only.txt b/lustre/kernel_patches/txt/dev_read_only.txt new file mode 100644 index 0000000..010cdb7 --- /dev/null +++ b/lustre/kernel_patches/txt/dev_read_only.txt @@ -0,0 +1,3 @@ +DESC +(undescribed patch) +EDESC diff --git a/lustre/kernel_patches/txt/exports.txt b/lustre/kernel_patches/txt/exports.txt new file mode 100644 index 0000000..00b991e --- /dev/null +++ b/lustre/kernel_patches/txt/exports.txt @@ -0,0 +1,3 @@ +DESC +Required kernel function exports for Lustre. +EDESC diff --git a/lustre/kernel_patches/txt/exports_hp.txt b/lustre/kernel_patches/txt/exports_hp.txt new file mode 100644 index 0000000..00b991e --- /dev/null +++ b/lustre/kernel_patches/txt/exports_hp.txt @@ -0,0 +1,3 @@ +DESC +Required kernel function exports for Lustre. +EDESC diff --git a/lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt b/lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt new file mode 100644 index 0000000..b890cbd --- /dev/null +++ b/lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt @@ -0,0 +1,3 @@ +DESC +Fix for block allocation errors if block bitmap or inode block list is corrupt. +EDESC diff --git a/lustre/kernel_patches/txt/ext3-map_inode_page.txt b/lustre/kernel_patches/txt/ext3-map_inode_page.txt new file mode 100644 index 0000000..010cdb7 --- /dev/null +++ b/lustre/kernel_patches/txt/ext3-map_inode_page.txt @@ -0,0 +1,3 @@ +DESC +(undescribed patch) +EDESC diff --git a/lustre/kernel_patches/txt/ext3-map_inode_page_2.4.18.txt b/lustre/kernel_patches/txt/ext3-map_inode_page_2.4.18.txt new file mode 100644 index 0000000..010cdb7 --- /dev/null +++ b/lustre/kernel_patches/txt/ext3-map_inode_page_2.4.18.txt @@ -0,0 +1,3 @@ +DESC +(undescribed patch) +EDESC diff --git a/lustre/kernel_patches/txt/invalidate_show.txt b/lustre/kernel_patches/txt/invalidate_show.txt new file mode 100644 index 0000000..88f093a --- /dev/null +++ b/lustre/kernel_patches/txt/invalidate_show.txt @@ -0,0 +1,3 @@ +DESC +Prints which inodes are busy at filesystem unmount time. +EDESC diff --git a/lustre/kernel_patches/txt/kmem_cache_validate.txt b/lustre/kernel_patches/txt/kmem_cache_validate.txt new file mode 100644 index 0000000..010cdb7 --- /dev/null +++ b/lustre/kernel_patches/txt/kmem_cache_validate.txt @@ -0,0 +1,3 @@ +DESC +(undescribed patch) +EDESC diff --git a/lustre/kernel_patches/txt/lustre_version.txt b/lustre/kernel_patches/txt/lustre_version.txt new file mode 100644 index 0000000..010cdb7 --- /dev/null +++ b/lustre/kernel_patches/txt/lustre_version.txt @@ -0,0 +1,3 @@ +DESC +(undescribed patch) +EDESC diff --git a/lustre/kernel_patches/txt/uml_check_get_page.txt b/lustre/kernel_patches/txt/uml_check_get_page.txt new file mode 100644 index 0000000..010cdb7 --- /dev/null +++ b/lustre/kernel_patches/txt/uml_check_get_page.txt @@ -0,0 +1,3 @@ +DESC +(undescribed patch) +EDESC diff --git a/lustre/kernel_patches/txt/uml_no_panic.txt b/lustre/kernel_patches/txt/uml_no_panic.txt new file mode 100644 index 0000000..010cdb7 --- /dev/null +++ b/lustre/kernel_patches/txt/uml_no_panic.txt @@ -0,0 +1,3 @@ +DESC +(undescribed patch) +EDESC diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch index e9de5ee..fdbf77e 100644 --- a/lustre/kernel_patches/which_patch +++ b/lustre/kernel_patches/which_patch @@ -1,12 +1,11 @@ -SERIES VERSION COMMENT +SERIES VERSION COMMENT SUPPORTED KERNELS: -rhel-2.4.21 RHEL3: 2.4.21-47.0.1.EL -2.6-suse SLES9 before SP1 already in SLES9 SP1 kernel -2.6-suse-newer SLES9: 2.6.5-7.282 extra patches for SLES9 after SP1 -2.6-rhel4 RHEL4: 2.6.9-42.0.8.EL -2.6.12-vanilla kernel.org: 2.6.12.6 -2.6-sles10 SLES10: 2.6.16.21-0.8 +2.6-suse SLES9 before SP1 already in SLES9 SP1 kernel +2.6-suse-newer SLES9: 2.6.5-7.282 extra patches for SLES9 after SP1 +2.6-rhel4 RHEL4: 2.6.9-55.EL +2.6-sles10 SLES10: 2.6.16.46-0.14 +2.6.18-vanilla.series kernel.org: 2.6.18.8 CLIENT SUPPORT FOR UNPATCHED KERNELS: kernel.org 2.6.16-2.6.19 @@ -16,9 +15,3 @@ NB - The patches in the 2.6-suse series should already be in the SLES9 SP1 kernel. The patches in the 2.6-suse-newer series are patches that have been created since the SP1 kernel was released and should be applied to the already-patched SP1 kernel. - -NB - The patches in the ldiskfs series should not be applied to the kernel. - They are instead applied by the lustre build process to create the - ldiskfs kernel module instead of modifying the core ext3 code. - - diff --git a/lustre/ldiskfs/Makefile.in b/lustre/ldiskfs/Makefile.in deleted file mode 100644 index eeb1bed..0000000 --- a/lustre/ldiskfs/Makefile.in +++ /dev/null @@ -1,21 +0,0 @@ -default: all - -MODULES := ldiskfs - -# copy makefile over to not break patches -ext3_extra := $(wildcard @LINUX@/fs/ext3/Makefile) - -ext3_headers := $(wildcard @LINUX@/fs/ext3/*.h) -linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h) - -ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c)) -new_sources := iopen.c iopen.h extents.c mballoc.c -new_headers := ext3_extents.h -ldiskfs_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers) -ldiskfs_sources := $(ldiskfs_patched_sources) - -ldiskfs-objs := $(filter %.o,$(ldiskfs_sources:.c=.o)) - -EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs - -@INCLUDE_RULES@ diff --git a/lustre/ldiskfs/autoMakefile.am b/lustre/ldiskfs/autoMakefile.am deleted file mode 100644 index 8ac1b87..0000000 --- a/lustre/ldiskfs/autoMakefile.am +++ /dev/null @@ -1,80 +0,0 @@ -if MODULES -if LDISKFS -modulefs_DATA = ldiskfs$(KMODEXT) -endif -endif - -ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers)))) - -$(filter %.c,$(ldiskfs_patched_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_patched_sources)) - -ldiskfs_sed_flags = \ - -e "s/dx_hash_info/ext3_dx_hash_info/g" \ - -e "s/dir_private_info/ext3_dir_private_info/g" \ - -e "s/DX_HASH/EXT3_DX_HASH/g" \ - -e "s/reserve_window/ext3_reserve_window/g" \ - -e "s/rsv_window_add/ext3_rsv_window_add/g" \ - -e "s/EXT3/LDISKFS/g" -e "s/ext3/ldiskfs/g" - -%.c: linux-stage/fs/ext3/%.c - sed $(strip $(ldiskfs_sed_flags)) $< > $@ - -%.h: linux-stage/fs/ext3/%.h - sed $(strip $(ldiskfs_sed_flags)) $< > $@ - -linux/ldiskfs%.h: linux-stage/include/linux/ext3%.h - sed $(strip $(ldiskfs_sed_flags)) $< > $@ - -# -# FIXME: we need to grab the series in configure somehow -# (see bug 1679) -# -series := @top_srcdir@/lustre/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES) -patches := @top_srcdir@/lustre/kernel_patches/patches - -sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series) - rm -rf linux-stage linux sources $(ldiskfs_SOURCES) - mkdir -p linux-stage/fs/ext3 linux-stage/include/linux - cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3 - cp $(linux_headers) linux-stage/include/linux -if USE_QUILT - ln -s ../$(patches) linux-stage/patches - ln -s ../$(series) linux-stage/series - cd linux-stage && quilt push -a -q -else - @echo -n "Applying ext3 patches:" - @cd linux-stage && for i in $$(<../$(series)) ; do \ - echo -n " $$i" ; \ - patch -s -p1 < ../$(patches)/$$i || exit 1 ; \ - done - @echo -endif - mkdir linux - @echo -n "Replacing 'ext3' with 'ldiskfs':" - @for i in $(notdir $(ext3_headers) $(ext3_sources)) $(new_sources) ; do \ - echo -n " $$i" ; \ - sed $(strip $(ldiskfs_sed_flags)) \ - linux-stage/fs/ext3/$$i > $$i ; \ - done - @for i in $(subst ext3,,$(notdir $(linux_headers) $(new_headers))) ; do \ - echo -n " ext3$$i" ; \ - sed $(strip $(ldiskfs_sed_flags)) \ - linux-stage/include/linux/ext3$$i \ - > linux/ldiskfs$$i ; \ - done - @echo - touch sources - -foo-check: - @echo "ldiskfs_sources: $(ldiskfs_sources)" - @echo "ldiskfs_SOURCES: $(ldiskfs_SOURCES)" - @echo "ldiskfs_headers: $(ldiskfs_headers)" - @echo "ldiskfs_objects: $(ldiskfs_objects)" - @echo "ldiskfs_OBJECTS: $(ldiskfs_OBJECTS)" - @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)" - -MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ -CLEANFILES = sources $(notdir $(linux_headers) $(ext3_headers) $(ext3_sources) $(new_sources) $(new_headers)) - -clean: clean-am - rm -rf linux linux-stage diff --git a/lustre/ldiskfs2/Makefile.in b/lustre/ldiskfs2/Makefile.in deleted file mode 100644 index d7739d8..0000000 --- a/lustre/ldiskfs2/Makefile.in +++ /dev/null @@ -1,21 +0,0 @@ -default: all - -MODULES := ldiskfs2 - -# copy makefile over to not break patches -ext3_extra := $(wildcard @LINUX@/fs/ext3/Makefile) - -ext3_headers := $(wildcard @LINUX@/fs/ext3/*.h) -linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h) - -ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c)) -new_sources := iopen.c iopen.h extents.c mballoc.c -new_headers := ext3_extents.h -ldiskfs2_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers) -ldiskfs2_sources := $(ldiskfs2_patched_sources) - -ldiskfs2-objs := $(filter %.o,$(ldiskfs2_sources:.c=.o)) - -EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs2 - -@INCLUDE_RULES@ diff --git a/lustre/ldiskfs2/autoMakefile.am b/lustre/ldiskfs2/autoMakefile.am deleted file mode 100644 index 5b7ac7c..0000000 --- a/lustre/ldiskfs2/autoMakefile.am +++ /dev/null @@ -1,80 +0,0 @@ -if MODULES -if LDISKFS -modulefs_DATA = ldiskfs2$(KMODEXT) -endif -endif - -ldiskfs2_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs2,$(notdir $(linux_headers)))) - -$(filter %.c,$(ldiskfs2_patched_sources)): sources $(ldiskfs2_linux_headers) $(filter %.h,$(ldiskfs2_patched_sources)) - -ldiskfs2_sed_flags = \ - -e "s/dx_hash_info/ext3_dx_hash_info/g" \ - -e "s/dir_private_info/ext3_dir_private_info/g" \ - -e "s/DX_HASH/EXT3_DX_HASH/g" \ - -e "s/reserve_window/ext3_reserve_window/g" \ - -e "s/rsv_window_add/ext3_rsv_window_add/g" \ - -e "s/EXT3/LDISKFS2/g" -e "s/ext3/ldiskfs2/g" - -%.c: linux-stage/fs/ext3/%.c - sed $(strip $(ldiskfs2_sed_flags)) $< > $@ - -%.h: linux-stage/fs/ext3/%.h - sed $(strip $(ldiskfs2_sed_flags)) $< > $@ - -linux/ldiskfs2%.h: linux-stage/include/linux/ext3%.h - sed $(strip $(ldiskfs2_sed_flags)) $< > $@ - -# -# FIXME: we need to grab the series in configure somehow -# (see bug 1679) -# -series := @top_srcdir@/lustre/kernel_patches/series/ldiskfs2-$(LDISKFS_SERIES) -patches := @top_srcdir@/lustre/kernel_patches/patches - -sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series) - rm -rf linux-stage linux sources $(ldiskfs2_SOURCES) - mkdir -p linux-stage/fs/ext3 linux-stage/include/linux - cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3 - cp $(linux_headers) linux-stage/include/linux -if USE_QUILT - ln -s ../$(patches) linux-stage/patches - ln -s ../$(series) linux-stage/series - cd linux-stage && quilt push -a -q -else - @echo -n "Applying ext3 patches:" - @cd linux-stage && for i in $$(<../$(series)) ; do \ - echo -n " $$i" ; \ - patch -s -p1 < ../$(patches)/$$i || exit 1 ; \ - done - @echo -endif - mkdir linux - @echo -n "Replacing 'ext3' with 'ldiskfs2':" - @for i in $(notdir $(ext3_headers) $(ext3_sources)) $(new_sources) ; do \ - echo -n " $$i" ; \ - sed $(strip $(ldiskfs2_sed_flags)) \ - linux-stage/fs/ext3/$$i > $$i ; \ - done - @for i in $(subst ext3,,$(notdir $(linux_headers) $(new_headers))) ; do \ - echo -n " ext3$$i" ; \ - sed $(strip $(ldiskfs2_sed_flags)) \ - linux-stage/include/linux/ext3$$i \ - > linux/ldiskfs2$$i ; \ - done - @echo - touch sources - -foo-check: - @echo "ldiskfs2_sources: $(ldiskfs2_sources)" - @echo "ldiskfs2_SOURCES: $(ldiskfs2_SOURCES)" - @echo "ldiskfs2_headers: $(ldiskfs2_headers)" - @echo "ldiskfs2_objects: $(ldiskfs2_objects)" - @echo "ldiskfs2_OBJECTS: $(ldiskfs2_OBJECTS)" - @echo "ldiskfs2_LDADD: $(ldiskfs2_LDADD)" - -MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ -CLEANFILES = sources $(notdir $(linux_headers) $(ext3_headers) $(ext3_sources) $(new_sources) $(new_headers)) - -clean: clean-am - rm -rf linux linux-stage diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index ab4e8a4..d84da7f 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -231,9 +231,9 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req, lock in the waiting queue or if there is not any, then in front of first non-GROUP lock */ if (lock->l_req_mode != LCK_GROUP) { - /* Ok, we hit non-GROUP lock, there should be no - more GROUP locks later on, queue in front of - first non-GROUP lock */ + /* Ok, we hit non-GROUP lock, there should + * be no more GROUP locks later on, queue in + * front of first non-GROUP lock */ ldlm_resource_insert_lock_after(lock, req); list_del_init(&lock->l_res_link); @@ -291,13 +291,13 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req, } if (unlikely(req_mode == LCK_GROUP && - (lock->l_req_mode != lock->l_granted_mode))) { + (lock->l_req_mode != lock->l_granted_mode))) { scan = 1; compat = 0; if (lock->l_req_mode != LCK_GROUP) { - /* Ok, we hit non-GROUP lock, there should - * be no more GROUP locks later on, queue in - * front of first non-GROUP lock */ + /* Ok, we hit non-GROUP lock, there should be no + more GROUP locks later on, queue in front of + first non-GROUP lock */ ldlm_resource_insert_lock_after(lock, req); list_del_init(&lock->l_res_link); diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index ec2e76f..0db3d41 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -334,7 +334,7 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, /* XXX - if ldlm_lock_new() can sleep we should * release the ns_lock, allocate the new lock, * and restart processing this lock. */ - new2 = ldlm_lock_create(ns, NULL, res->lr_name, LDLM_FLOCK, + new2 = ldlm_lock_create(ns, NULL, &res->lr_name, LDLM_FLOCK, lock->l_granted_mode, NULL, NULL, NULL, NULL, 0); if (!new2) { @@ -382,7 +382,7 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, if (*flags != LDLM_FL_WAIT_NOREPROC) { if (first_enq) { /* If this is an unlock, reprocess the waitq and - * send completions ASTs for locks that can now be + * send completions ASTs for locks that can now be * granted. The only problem with doing this * reprocessing here is that the completion ASTs for * newly granted locks will be sent before the unlock @@ -443,7 +443,6 @@ ldlm_flock_interrupted_wait(void *data) ldlm_lock_decref_internal(lock, lock->l_req_mode); ldlm_lock2handle(lock, &lockh); - /* coverity[check_return] */ ldlm_cli_cancel(&lockh); EXIT; } @@ -495,7 +494,7 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "client-side enqueue waking up: rc = %d", rc); RETURN(rc); - + granted: LDLM_DEBUG(lock, "client-side enqueue granted"); diff --git a/lustre/ldlm/ldlm_inodebits.c b/lustre/ldlm/ldlm_inodebits.c index d8e7c3b..ac82ab0 100644 --- a/lustre/ldlm/ldlm_inodebits.c +++ b/lustre/ldlm/ldlm_inodebits.c @@ -62,7 +62,7 @@ ldlm_inodebits_compat_queue(struct list_head *queue, struct ldlm_lock *req, l_sl_mode)->l_res_link; continue; } - + tmp_tail = tmp; if (LDLM_SL_HEAD(&lock->l_sl_mode)) tmp_tail = &list_entry(lock->l_sl_mode.next, @@ -74,7 +74,7 @@ ldlm_inodebits_compat_queue(struct list_head *queue, struct ldlm_lock *req, /* conflicting policy */ if (!work_list) RETURN(0); - + compat = 0; if (lock->l_blocking_ast) ldlm_add_ast_work_item(lock, req, diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index fbc9c18..4a900d9 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -19,7 +19,8 @@ void ldlm_resource_insert_lock_after(struct ldlm_lock *original, void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list); struct ldlm_lock * ldlm_lock_create(struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, struct ldlm_res_id, + const struct lustre_handle *parent_lock_handle, + const struct ldlm_res_id *, ldlm_type_t type, ldlm_mode_t, ldlm_blocking_callback, ldlm_completion_callback, ldlm_glimpse_callback, void *data, __u32 lvb_len); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 29eb851..f6b025d 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -36,6 +36,7 @@ #include <lustre_mds.h> #include <lustre_dlm.h> #include <lustre_net.h> +#include <lustre_sec.h> /* @priority: if non-zero, move the selected to the list head * @create: if zero, only search in existed connections @@ -96,7 +97,6 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid, } else { spin_unlock(&imp->imp_lock); GOTO(out_free, rc = -ENOENT); - } spin_unlock(&imp->imp_lock); @@ -175,6 +175,17 @@ out: RETURN(rc); } +static void destroy_import(struct obd_import *imp) +{ + /* drop security policy instance after all rpc finished/aborted + * to let all busy credentials be released. + */ + class_import_get(imp); + class_destroy_import(imp); + sptlrpc_import_put_sec(imp); + class_import_put(imp); +} + /* configure an RPC client OBD device * * lcfg parameters: @@ -182,9 +193,8 @@ out: * 2 - server UUID * 3 - inactive-on-startup */ -int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) +int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) { - struct lustre_cfg* lcfg = buf; struct client_obd *cli = &obddev->u.cli; struct obd_import *imp; struct obd_uuid server_uuid; @@ -196,12 +206,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) /* In a more perfect world, we would hang a ptlrpc_client off of * obd_type and just use the values from there. */ if (!strcmp(name, LUSTRE_OSC_NAME)) { -#ifdef __KERNEL__ - /* Can be removed in Lustre 1.8, for compatibility only */ - rq_portal = OST_IO_PORTAL; -#else rq_portal = OST_REQUEST_PORTAL; -#endif rp_portal = OSC_REPLY_PORTAL; connect_op = OST_CONNECT; } else if (!strcmp(name, LUSTRE_MDC_NAME)) { @@ -240,6 +245,10 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) sema_init(&cli->cl_sem, 1); sema_init(&cli->cl_mgc_sem, 1); + cli->cl_sec_conf.sfc_rpc_flavor = SPTLRPC_FLVR_NULL; + cli->cl_sec_conf.sfc_bulk_csum = BULK_CSUM_ALG_NULL; + cli->cl_sec_conf.sfc_bulk_priv = BULK_PRIV_ALG_NULL; + cli->cl_sec_conf.sfc_flags = 0; cli->cl_conn_count = 0; memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2), min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2), @@ -258,6 +267,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) client_obd_list_lock_init(&cli->cl_loi_list_lock); cli->cl_r_in_flight = 0; cli->cl_w_in_flight = 0; + spin_lock_init(&cli->cl_read_rpc_hist.oh_lock); spin_lock_init(&cli->cl_write_rpc_hist.oh_lock); spin_lock_init(&cli->cl_read_page_hist.oh_lock); @@ -281,6 +291,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) } else { cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT; } + rc = ldlm_get_ref(); if (rc) { CERROR("ldlm_get_ref failed: %d\n", rc); @@ -318,7 +329,6 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) CDEBUG(D_HA, "marking %s %s->%s as inactive\n", name, obddev->obd_name, cli->cl_target_uuid.uuid); - spin_lock(&imp->imp_lock); imp->imp_invalid = 1; spin_unlock(&imp->imp_lock); @@ -347,7 +357,8 @@ int client_obd_cleanup(struct obd_device *obddev) } /* ->o_connect() method for client side (OSC and MDC and MGC) */ -int client_connect_import(struct lustre_handle *dlm_handle, +int client_connect_import(const struct lu_env *env, + struct lustre_handle *dlm_handle, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) { @@ -380,6 +391,11 @@ int client_connect_import(struct lustre_handle *dlm_handle, if (rc != 0) GOTO(out_ldlm, rc); + rc = sptlrpc_import_get_sec(imp, NULL, cli->cl_sec_conf.sfc_rpc_flavor, + cli->cl_sec_conf.sfc_flags); + if (rc) + GOTO(out_ldlm, rc); + ocd = &imp->imp_connect_data; if (data) { *ocd = *data; @@ -400,6 +416,7 @@ int client_connect_import(struct lustre_handle *dlm_handle, } ptlrpc_pinger_add_import(imp); + EXIT; if (rc) { @@ -451,28 +468,27 @@ int client_disconnect_export(struct obd_export *exp) spin_lock(&imp->imp_lock); imp->imp_deactive = 1; spin_unlock(&imp->imp_lock); - + /* Some non-replayable imports (MDS's OSCs) are pinged, so just * delete it regardless. (It's safe to delete an import that was * never added.) */ (void)ptlrpc_pinger_del_import(imp); if (obd->obd_namespace != NULL) { - /* obd_no_recov == local only */ + /* obd_force == local only */ ldlm_cli_cancel_unused(obd->obd_namespace, NULL, - obd->obd_no_recov ? LDLM_FL_LOCAL_ONLY:0, + obd->obd_force ? LDLM_FL_LOCAL_ONLY:0, NULL); - ldlm_namespace_free(obd->obd_namespace, obd->obd_no_recov); + ldlm_namespace_free(obd->obd_namespace, obd->obd_force); obd->obd_namespace = NULL; } - /* Yeah, obd_no_recov also (mainly) means "forced shutdown". */ - if (!obd->obd_no_recov) + if (!obd->obd_force) rc = ptlrpc_disconnect_import(imp, 0); ptlrpc_invalidate_import(imp); ptlrpc_free_rq_pool(imp->imp_rq_pool); - class_destroy_import(imp); + destroy_import(imp); cli->cl_import = NULL; EXIT; @@ -490,10 +506,10 @@ int client_disconnect_export(struct obd_export *exp) * -------------------------------------------------------------------------- */ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, - struct obd_uuid *cluuid) + struct obd_uuid *cluuid, int initial_conn) { ENTRY; - if (exp->exp_connection && exp->exp_imp_reverse) { + if (exp->exp_connection && exp->exp_imp_reverse && !initial_conn) { struct lustre_handle *hdl; hdl = &exp->exp_imp_reverse->imp_remote_handle; /* Might be a re-connect after a partition. */ @@ -539,7 +555,7 @@ void target_client_add_cb(struct obd_device *obd, __u64 transno, void *cb_data, } EXPORT_SYMBOL(target_client_add_cb); -int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) +int target_handle_connect(struct ptlrpc_request *req) { struct obd_device *target, *targref = NULL; struct obd_export *export = NULL; @@ -550,7 +566,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) struct obd_uuid remote_uuid; struct list_head *p; char *str, *tmp; - int rc = 0, abort_recovery; + int rc = 0; + int initial_conn = 0; struct obd_connect_data *data; int size[2] = { sizeof(struct ptlrpc_body), sizeof(*data) }; ENTRY; @@ -560,29 +577,21 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) LASSERT_REQSWAB(req, REQ_REC_OFF); str = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF, sizeof(tgtuuid)-1); if (str == NULL) { - DEBUG_REQ(D_ERROR, req, "bad target UUID for connect\n"); + DEBUG_REQ(D_ERROR, req, "bad target UUID for connect"); GOTO(out, rc = -EINVAL); } obd_str2uuid (&tgtuuid, str); target = class_uuid2obd(&tgtuuid); - /* COMPAT_146 */ - /* old (pre 1.6) lustre_process_log tries to connect to mdsname - (eg. mdsA) instead of uuid. */ - if (!target) { - snprintf((char *)tgtuuid.uuid, sizeof(tgtuuid), "%s_UUID", str); - target = class_uuid2obd(&tgtuuid); - } if (!target) target = class_name2obd(str); - /* end COMPAT_146 */ if (!target || target->obd_stopping || !target->obd_set_up) { - LCONSOLE_ERROR("UUID '%s' is not available " - " for connect (%s)\n", str, - !target ? "no target" : - (target->obd_stopping ? "stopping" : - "not set up")); + LCONSOLE_ERROR_MSG(0x137, "UUID '%s' is not available " + " for connect (%s)\n", str, + !target ? "no target" : + (target->obd_stopping ? "stopping" : + "not set up")); GOTO(out, rc = -ENODEV); } @@ -593,8 +602,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) GOTO(out, rc = -EAGAIN); } - /* Make sure the target isn't cleaned up while we're here. Yes, - there's still a race between the above check and our incref here. + /* Make sure the target isn't cleaned up while we're here. Yes, + there's still a race between the above check and our incref here. Really, class_uuid2obd should take the ref. */ targref = class_incref(target); @@ -602,7 +611,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) str = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF + 1, sizeof(cluuid) - 1); if (str == NULL) { - DEBUG_REQ(D_ERROR, req, "bad client UUID for connect\n"); + DEBUG_REQ(D_ERROR, req, "bad client UUID for connect"); GOTO(out, rc = -EINVAL); } @@ -623,12 +632,6 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) LBUG(); } - spin_lock_bh(&target->obd_processing_task_lock); - abort_recovery = target->obd_abort_recovery; - spin_unlock_bh(&target->obd_processing_task_lock); - if (abort_recovery) - target_abort_recovery(target); - tmp = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, sizeof conn); if (tmp == NULL) GOTO(out, rc = -EPROTO); @@ -637,6 +640,10 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) data = lustre_swab_reqbuf(req, REQ_REC_OFF + 3, sizeof(*data), lustre_swab_connect); + + if (!data) + GOTO(out, rc = -EPROTO); + rc = lustre_pack_reply(req, 2, size, NULL); if (rc) GOTO(out, rc); @@ -644,14 +651,14 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) { if (!data) { DEBUG_REQ(D_WARNING, req, "Refusing old (unversioned) " - "libclient connection attempt\n"); + "libclient connection attempt"); GOTO(out, rc = -EPROTO); } else if (data->ocd_version < LUSTRE_VERSION_CODE - LUSTRE_VERSION_ALLOWED_OFFSET || data->ocd_version > LUSTRE_VERSION_CODE + LUSTRE_VERSION_ALLOWED_OFFSET) { DEBUG_REQ(D_WARNING, req, "Refusing %s (%d.%d.%d.%d) " - "libclient connection attempt\n", + "libclient connection attempt", data->ocd_version < LUSTRE_VERSION_CODE ? "old" : "new", OBD_OCD_VERSION_MAJOR(data->ocd_version), @@ -670,6 +677,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) } } + if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) + initial_conn = 1; + /* lctl gets a backstage, all-access pass. */ if (obd_uuid_equals(&cluuid, &target->obd_uuid)) goto dont_check_exports; @@ -689,16 +699,16 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) /* make darn sure this is coming from the same peer * if the UUIDs matched */ if ((export->exp_connection != NULL) && - (strcmp(libcfs_nid2str(req->rq_peer.nid), - libcfs_nid2str(export->exp_connection->c_peer.nid)))) { - CWARN("%s: cookie %s seen on new NID %s when " - "existing NID %s is already connected\n", - target->obd_name, cluuid.uuid, - libcfs_nid2str(req->rq_peer.nid), - libcfs_nid2str(export->exp_connection->c_peer.nid)); - export = NULL; - rc = -EALREADY; - break; + (strcmp(libcfs_nid2str(req->rq_peer.nid), + libcfs_nid2str(export->exp_connection->c_peer.nid)))) { + CWARN("%s: cookie %s seen on new NID %s when " + "existing NID %s is already connected\n", + target->obd_name, cluuid.uuid, + libcfs_nid2str(req->rq_peer.nid), + libcfs_nid2str(export->exp_connection->c_peer.nid)); + export = NULL; + rc = -EALREADY; + break; } spin_lock(&export->exp_lock); @@ -707,11 +717,13 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) spin_unlock(&target->obd_dev_lock); LASSERT(export->exp_obd == target); - rc = target_handle_reconnect(&conn, export, &cluuid); + rc = target_handle_reconnect(&conn, export, &cluuid, + initial_conn); break; } export = NULL; } + /* If we found an export, we already unlocked. */ if (!export) { spin_unlock(&target->obd_dev_lock); @@ -724,19 +736,24 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) export, atomic_read(&export->exp_refcount)); GOTO(out, rc = -EBUSY); } else if (req->rq_export != NULL && - atomic_read(&export->exp_rpc_count) > 1) { + (atomic_read(&export->exp_rpc_count) > 1)) { CWARN("%s: refuse reconnection from %s@%s to 0x%p/%d\n", target->obd_name, cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), export, atomic_read(&export->exp_rpc_count)); GOTO(out, rc = -EBUSY); - } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1) { + } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 && + !initial_conn) { CERROR("%s: NID %s (%s) reconnected with 1 conn_cnt; " "cookies not random?\n", target->obd_name, libcfs_nid2str(req->rq_peer.nid), cluuid.uuid); GOTO(out, rc = -EALREADY); } else { OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout); + if (req->rq_export == NULL && initial_conn) + export->exp_last_request_time = + max(export->exp_last_request_time, + (time_t)CURRENT_SECONDS); } /* We want to handle EALREADY but *not* -EALREADY from @@ -747,12 +764,18 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) } else if (rc) { GOTO(out, rc); } - /* Tell the client if we're in recovery. */ /* If this is the first client, start the recovery timer */ + CWARN("%s: connection from %s@%s %st"LPU64" exp %p cur %ld last %ld\n", + target->obd_name, cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), + target->obd_recovering ? "recovering/" : "", data->ocd_transno, + export, (long)CURRENT_SECONDS, + export ? (long)export->exp_last_request_time : 0); + + if (target->obd_recovering) { lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING); - target_start_recovery_timer(target, handler); + target_start_recovery_timer(target); } /* Tell the client if we support replayable requests */ @@ -770,16 +793,15 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) cfs_time_current()))); rc = -EBUSY; } else { - dont_check_exports: - rc = obd_connect(&conn, target, &cluuid, data); +dont_check_exports: + rc = obd_connect(req->rq_svc_thread->t_env, + &conn, target, &cluuid, data); } } else { rc = obd_reconnect(export, target, &cluuid, data); } - if (rc) GOTO(out, rc); - /* Return only the parts of obd_connect_data that we understand, so the * client knows that we don't understand the rest. */ if (data) @@ -797,7 +819,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) * that to go to zero before we get our new export reference. */ export = class_conn2export(&conn); if (!export) { - DEBUG_REQ(D_ERROR, req, "Missing export!\n"); + DEBUG_REQ(D_ERROR, req, "Missing export!"); GOTO(out, rc = -ENODEV); } @@ -814,13 +836,15 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) req->rq_export = export; spin_lock(&export->exp_lock); - if (export->exp_conn_cnt >= lustre_msg_get_conn_cnt(req->rq_reqmsg)) { + if (initial_conn) { + lustre_msg_set_conn_cnt(req->rq_repmsg, export->exp_conn_cnt + 1); + } else if (export->exp_conn_cnt >= lustre_msg_get_conn_cnt(req->rq_reqmsg)) { + spin_unlock(&export->exp_lock); CERROR("%s: %s already connected at higher conn_cnt: %d > %d\n", cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), export->exp_conn_cnt, lustre_msg_get_conn_cnt(req->rq_reqmsg)); - - spin_unlock(&export->exp_lock); + GOTO(out, rc = -EALREADY); } export->exp_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg); @@ -829,7 +853,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) { export->exp_libclient = 1; spin_unlock(&export->exp_lock); - + spin_lock(&target->obd_dev_lock); list_del_init(&export->exp_obd_chain_timed); spin_unlock(&target->obd_dev_lock); @@ -843,19 +867,53 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) req->rq_self, &remote_uuid); - if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) + if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) { + LASSERT(export->exp_imp_reverse); + sptlrpc_svc_install_rvs_ctx(export->exp_imp_reverse, + req->rq_svc_ctx); GOTO(out, rc = 0); + } - if (target->obd_recovering) + spin_lock_bh(&target->obd_processing_task_lock); + if (target->obd_recovering && !export->exp_in_recovery) { + spin_lock(&export->exp_lock); + export->exp_in_recovery = 1; + export->exp_req_replay_needed = 1; + export->exp_lock_replay_needed = 1; + spin_unlock(&export->exp_lock); + if ((lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_TRANSNO) + && data->ocd_transno < target->obd_next_recovery_transno) + target->obd_next_recovery_transno = data->ocd_transno; target->obd_connected_clients++; - + /* each connected client is counted as recoverable */ + target->obd_recoverable_clients++; + atomic_inc(&target->obd_req_replay_clients); + atomic_inc(&target->obd_lock_replay_clients); + if (target->obd_connected_clients == + target->obd_max_recoverable_clients) + wake_up(&target->obd_next_transno_waitq); + } + spin_unlock_bh(&target->obd_processing_task_lock); memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, sizeof conn), sizeof conn); - if (export->exp_imp_reverse != NULL) - class_destroy_import(export->exp_imp_reverse); + if (export->exp_imp_reverse != NULL) { + /* destroyed import can be still referenced in ctxt */ + obd_set_info_async(export, strlen(KEY_REVIMP_UPD), + KEY_REVIMP_UPD, 0, NULL, NULL); + destroy_import(export->exp_imp_reverse); + } + + /* for the rest part, we return -ENOTCONN in case of errors + * in order to let client initialize connection again. + */ revimp = export->exp_imp_reverse = class_new_import(target); + if (!revimp) { + CERROR("fail to alloc new reverse import.\n"); + GOTO(out, rc = -ENOTCONN); + } + revimp->imp_connection = ptlrpc_connection_addref(export->exp_connection); revimp->imp_client = &export->exp_obd->obd_ldlm_client; revimp->imp_remote_handle = conn; @@ -867,14 +925,22 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_NEXT_VER); } + rc = sptlrpc_import_get_sec(revimp, req->rq_svc_ctx, + req->rq_sec_flavor, 0); + if (rc) { + CERROR("Failed to get sec for reverse import: %d\n", rc); + export->exp_imp_reverse = NULL; + class_destroy_import(revimp); + } + class_import_put(revimp); out: if (export) { spin_lock(&export->exp_lock); export->exp_connecting = 0; spin_unlock(&export->exp_lock); - } - if (targref) + } + if (targref) class_decref(targref); if (rc) req->rq_status = rc; @@ -892,6 +958,7 @@ int target_handle_disconnect(struct ptlrpc_request *req) /* keep the rq_export around so we can send the reply */ req->rq_status = obd_disconnect(class_export_get(req->rq_export)); + RETURN(0); } @@ -900,7 +967,7 @@ void target_destroy_export(struct obd_export *exp) /* exports created from last_rcvd data, and "fake" exports created by lctl don't have an import */ if (exp->exp_imp_reverse != NULL) - class_destroy_import(exp->exp_imp_reverse); + destroy_import(exp->exp_imp_reverse); /* We cancel locks at disconnect time, but this will catch any locks * granted in a race with recovery-induced disconnect. */ @@ -912,23 +979,76 @@ void target_destroy_export(struct obd_export *exp) * Recovery functions */ +struct ptlrpc_request *ptlrpc_clone_req( struct ptlrpc_request *orig_req) +{ + struct ptlrpc_request *copy_req; + struct lustre_msg *copy_reqmsg; + struct ptlrpc_user_desc *udesc = NULL; + + OBD_ALLOC_PTR(copy_req); + if (!copy_req) + return NULL; + OBD_ALLOC(copy_reqmsg, orig_req->rq_reqlen); + if (!copy_reqmsg){ + OBD_FREE_PTR(copy_req); + return NULL; + } + + if (orig_req->rq_user_desc) { + int ngroups = orig_req->rq_user_desc->pud_ngroups; + + OBD_ALLOC(udesc, sptlrpc_user_desc_size(ngroups)); + if (!udesc) { + OBD_FREE(copy_reqmsg, orig_req->rq_reqlen); + OBD_FREE_PTR(copy_req); + return NULL; + } + memcpy(udesc, orig_req->rq_user_desc, + sptlrpc_user_desc_size(ngroups)); + } + + *copy_req = *orig_req; + memcpy(copy_reqmsg, orig_req->rq_reqmsg, orig_req->rq_reqlen); + copy_req->rq_reqmsg = copy_reqmsg; + copy_req->rq_user_desc = udesc; + + class_export_get(copy_req->rq_export); + CFS_INIT_LIST_HEAD(©_req->rq_list); + sptlrpc_svc_ctx_addref(copy_req); + + if (copy_req->rq_reply_state) { + /* the copied req takes over the reply state */ + orig_req->rq_reply_state = NULL; + /* to catch further access */ + orig_req->rq_repmsg = NULL; + orig_req->rq_replen = 0; + } + + return copy_req; +} -static void target_release_saved_req(struct ptlrpc_request *req) +void ptlrpc_free_clone( struct ptlrpc_request *req) { - if (req->rq_reply_state != NULL) { + if (req->rq_reply_state) { ptlrpc_rs_decref(req->rq_reply_state); - /* req->rq_reply_state = NULL; */ + req->rq_reply_state = NULL; } + sptlrpc_svc_ctx_decref(req); class_export_put(req->rq_export); + list_del(&req->rq_list); + + if (req->rq_user_desc) { + int ngroups = req->rq_user_desc->pud_ngroups; + OBD_FREE(req->rq_user_desc, sptlrpc_user_desc_size(ngroups)); + } OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); + OBD_FREE_PTR(req); } static void target_finish_recovery(struct obd_device *obd) { - struct list_head *tmp, *n; - + ENTRY; LCONSOLE_INFO("%s: sending delayed replies to recovered clients\n", obd->obd_name); @@ -941,37 +1061,37 @@ static void target_finish_recovery(struct obd_device *obd) rc < 0 ? "failed" : "complete", rc); } - list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) { - struct ptlrpc_request *req; - req = list_entry(tmp, struct ptlrpc_request, rq_list); - list_del(&req->rq_list); - DEBUG_REQ(D_HA, req, "delayed:"); - ptlrpc_reply(req); - target_release_saved_req(req); - } obd->obd_recovery_end = CURRENT_SECONDS; + EXIT; } -static void abort_recovery_queue(struct obd_device *obd) +static void abort_req_replay_queue(struct obd_device *obd) { - struct ptlrpc_request *req; - struct list_head *tmp, *n; - int rc; + struct ptlrpc_request *req, *n; + + list_for_each_entry_safe(req, n, &obd->obd_req_replay_queue, rq_list) { + DEBUG_REQ(D_WARNING, req, "aborted:"); + req->rq_status = -ENOTCONN; + if (ptlrpc_error(req)) { + DEBUG_REQ(D_ERROR, req, + "failed abort_req_reply; skipping"); + } + ptlrpc_free_clone(req); + } +} + +static void abort_lock_replay_queue(struct obd_device *obd) +{ + struct ptlrpc_request *req, *n; - list_for_each_safe(tmp, n, &obd->obd_recovery_queue) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - list_del(&req->rq_list); + list_for_each_entry_safe(req, n, &obd->obd_lock_replay_queue, rq_list){ DEBUG_REQ(D_ERROR, req, "aborted:"); req->rq_status = -ENOTCONN; - req->rq_type = PTL_RPC_MSG_ERR; - rc = lustre_pack_reply(req, 1, NULL, NULL); - if (rc == 0) { - ptlrpc_reply(req); - } else { + if (ptlrpc_error(req)) { DEBUG_REQ(D_ERROR, req, - "packing failed for abort-reply; skipping"); + "failed abort_lock_reply; skipping"); } - target_release_saved_req(req); + ptlrpc_free_clone(req); } } @@ -986,8 +1106,7 @@ static void abort_recovery_queue(struct obd_device *obd) */ void target_cleanup_recovery(struct obd_device *obd) { - struct list_head *tmp, *n; - struct ptlrpc_request *req; + struct ptlrpc_request *req, *n; ENTRY; LASSERT(obd->obd_stopping); @@ -1002,43 +1121,19 @@ void target_cleanup_recovery(struct obd_device *obd) target_cancel_recovery_timer(obd); spin_unlock_bh(&obd->obd_processing_task_lock); - list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - list_del(&req->rq_list); - target_release_saved_req(req); + list_for_each_entry_safe(req, n, &obd->obd_req_replay_queue, rq_list) { + LASSERT (req->rq_reply_state == 0); + ptlrpc_free_clone(req); } - - list_for_each_safe(tmp, n, &obd->obd_recovery_queue) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - list_del(&req->rq_list); - target_release_saved_req(req); + list_for_each_entry_safe(req, n, &obd->obd_lock_replay_queue, rq_list){ + LASSERT (req->rq_reply_state == 0); + ptlrpc_free_clone(req); } - EXIT; -} - -void target_abort_recovery(void *data) -{ - struct obd_device *obd = data; - - ENTRY; - spin_lock_bh(&obd->obd_processing_task_lock); - if (!obd->obd_recovering) { - spin_unlock_bh(&obd->obd_processing_task_lock); - EXIT; - return; + list_for_each_entry_safe(req, n, &obd->obd_final_req_queue, rq_list) { + LASSERT (req->rq_reply_state == 0); + ptlrpc_free_clone(req); } - obd->obd_recovering = obd->obd_abort_recovery = 0; - obd->obd_recoverable_clients = 0; - target_cancel_recovery_timer(obd); - spin_unlock_bh(&obd->obd_processing_task_lock); - LCONSOLE_WARN("%s: recovery period over; disconnecting unfinished " - "clients.\n", obd->obd_name); - class_disconnect_stale_exports(obd); - abort_recovery_queue(obd); - - target_finish_recovery(obd); - CDEBUG(D_HA, "%s: recovery complete\n", obd_uuid2str(&obd->obd_uuid)); EXIT; } @@ -1068,7 +1163,7 @@ static void reset_recovery_timer(struct obd_device *obd) spin_unlock_bh(&obd->obd_processing_task_lock); return; } - cfs_timer_arm(&obd->obd_recovery_timer, + cfs_timer_arm(&obd->obd_recovery_timer, cfs_time_shift(OBD_RECOVERY_TIMEOUT)); spin_unlock_bh(&obd->obd_processing_task_lock); CDEBUG(D_HA, "%s: timer will expire in %u seconds\n", obd->obd_name, @@ -1079,121 +1174,476 @@ static void reset_recovery_timer(struct obd_device *obd) /* Only start it the first time called */ -void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler) +void target_start_recovery_timer(struct obd_device *obd) { spin_lock_bh(&obd->obd_processing_task_lock); - if (obd->obd_recovery_handler) { + if (obd->obd_recovery_handler + || timer_pending(&obd->obd_recovery_timer)) { spin_unlock_bh(&obd->obd_processing_task_lock); return; } CWARN("%s: starting recovery timer (%us)\n", obd->obd_name, OBD_RECOVERY_TIMEOUT); - obd->obd_recovery_handler = handler; cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd); spin_unlock_bh(&obd->obd_processing_task_lock); reset_recovery_timer(obd); } +#ifdef __KERNEL__ static int check_for_next_transno(struct obd_device *obd) { - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; int wake_up = 0, connected, completed, queue_len, max; __u64 next_transno, req_transno; - + ENTRY; spin_lock_bh(&obd->obd_processing_task_lock); - req = list_entry(obd->obd_recovery_queue.next, - struct ptlrpc_request, rq_list); + + if (!list_empty(&obd->obd_req_replay_queue)) { + req = list_entry(obd->obd_req_replay_queue.next, + struct ptlrpc_request, rq_list); + req_transno = lustre_msg_get_transno(req->rq_reqmsg); + } else { + req_transno = 0; + } + max = obd->obd_max_recoverable_clients; - req_transno = lustre_msg_get_transno(req->rq_reqmsg); connected = obd->obd_connected_clients; - completed = max - obd->obd_recoverable_clients; + completed = connected - obd->obd_recoverable_clients; queue_len = obd->obd_requests_queued_for_recovery; next_transno = obd->obd_next_recovery_transno; - CDEBUG(D_HA,"max: %d, connected: %d, completed: %d, queue_len: %d, " + CDEBUG(D_HA, "max: %d, connected: %d, completed: %d, queue_len: %d, " "req_transno: "LPU64", next_transno: "LPU64"\n", max, connected, completed, queue_len, req_transno, next_transno); + if (obd->obd_abort_recovery) { CDEBUG(D_HA, "waking for aborted recovery\n"); wake_up = 1; - } else if (!obd->obd_recovering) { - CDEBUG(D_HA, "waking for completed recovery (?)\n"); + } else if (atomic_read(&obd->obd_req_replay_clients) == 0) { + CDEBUG(D_HA, "waking for completed recovery\n"); wake_up = 1; } else if (req_transno == next_transno) { CDEBUG(D_HA, "waking for next ("LPD64")\n", next_transno); wake_up = 1; } else if (queue_len + completed == max) { - CDEBUG(D_ERROR, + /* handle gaps occured due to lost reply. It is allowed gaps + * because all clients are connected and there will be resend + * for missed transaction */ + LASSERTF(req_transno >= next_transno, + "req_transno: "LPU64", next_transno: "LPU64"\n", + req_transno, next_transno); + + CDEBUG(req_transno > obd->obd_last_committed ? D_ERROR : D_HA, "waking for skipped transno (skip: "LPD64 ", ql: %d, comp: %d, conn: %d, next: "LPD64")\n", - next_transno, queue_len, completed, max, req_transno); + next_transno, queue_len, completed, connected, req_transno); obd->obd_next_recovery_transno = req_transno; wake_up = 1; + } else if (queue_len == atomic_read(&obd->obd_req_replay_clients)) { + /* some clients haven't connected in time, but we can try + * to replay requests that demand on already committed ones + * also, we can replay first non-committed transation */ + LASSERT(req_transno != 0); + if (req_transno == obd->obd_last_committed + 1) { + obd->obd_next_recovery_transno = req_transno; + } else if (req_transno > obd->obd_last_committed) { + /* can't continue recovery: have no needed transno */ + obd->obd_abort_recovery = 1; + CDEBUG(D_ERROR, "abort due to missed clients. max: %d, " + "connected: %d, completed: %d, queue_len: %d, " + "req_transno: "LPU64", next_transno: "LPU64"\n", + max, connected, completed, queue_len, + req_transno, next_transno); + } + wake_up = 1; } + spin_unlock_bh(&obd->obd_processing_task_lock); - LASSERT(lustre_msg_get_transno(req->rq_reqmsg) >= next_transno); return wake_up; } -static void process_recovery_queue(struct obd_device *obd) +static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd) { + struct l_wait_info lwi = { 0 }; struct ptlrpc_request *req; - int abort_recovery = 0; + + CDEBUG(D_HA, "Waiting for transno "LPD64"\n", + obd->obd_next_recovery_transno); + l_wait_event(obd->obd_next_transno_waitq, + check_for_next_transno(obd), &lwi); + + spin_lock_bh(&obd->obd_processing_task_lock); + if (obd->obd_abort_recovery) { + req = NULL; + } else if (!list_empty(&obd->obd_req_replay_queue)) { + req = list_entry(obd->obd_req_replay_queue.next, + struct ptlrpc_request, rq_list); + list_del_init(&req->rq_list); + obd->obd_requests_queued_for_recovery--; + } else { + req = NULL; + } + spin_unlock_bh(&obd->obd_processing_task_lock); + RETURN(req); +} + +static int check_for_next_lock(struct obd_device *obd) +{ + struct ptlrpc_request *req = NULL; + int wake_up = 0; + + spin_lock_bh(&obd->obd_processing_task_lock); + if (!list_empty(&obd->obd_lock_replay_queue)) { + req = list_entry(obd->obd_lock_replay_queue.next, + struct ptlrpc_request, rq_list); + CDEBUG(D_HA, "waking for next lock\n"); + wake_up = 1; + } else if (atomic_read(&obd->obd_lock_replay_clients) == 0) { + CDEBUG(D_HA, "waking for completed lock replay\n"); + wake_up = 1; + } else if (obd->obd_abort_recovery) { + CDEBUG(D_HA, "waking for aborted recovery\n"); + wake_up = 1; + } + spin_unlock_bh(&obd->obd_processing_task_lock); + + return wake_up; +} + +static struct ptlrpc_request *target_next_replay_lock(struct obd_device *obd) +{ struct l_wait_info lwi = { 0 }; - ENTRY; + struct ptlrpc_request *req; - for (;;) { - spin_lock_bh(&obd->obd_processing_task_lock); - LASSERT(obd->obd_processing_task == cfs_curproc_pid()); - req = list_entry(obd->obd_recovery_queue.next, + CDEBUG(D_HA, "Waiting for lock\n"); + l_wait_event(obd->obd_next_transno_waitq, + check_for_next_lock(obd), &lwi); + + spin_lock_bh(&obd->obd_processing_task_lock); + if (obd->obd_abort_recovery) { + req = NULL; + } else if (!list_empty(&obd->obd_lock_replay_queue)) { + req = list_entry(obd->obd_lock_replay_queue.next, struct ptlrpc_request, rq_list); + list_del_init(&req->rq_list); + } else { + req = NULL; + } + spin_unlock_bh(&obd->obd_processing_task_lock); + return req; +} - if (lustre_msg_get_transno(req->rq_reqmsg) != - obd->obd_next_recovery_transno) { - spin_unlock_bh(&obd->obd_processing_task_lock); - CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is " - LPD64")\n", - obd->obd_next_recovery_transno, - lustre_msg_get_transno(req->rq_reqmsg)); - l_wait_event(obd->obd_next_transno_waitq, - check_for_next_transno(obd), &lwi); - spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; - spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) { - target_abort_recovery(obd); - return; - } - continue; - } +static struct ptlrpc_request *target_next_final_ping(struct obd_device *obd) +{ + struct ptlrpc_request *req; + + spin_lock_bh(&obd->obd_processing_task_lock); + if (!list_empty(&obd->obd_final_req_queue)) { + req = list_entry(obd->obd_final_req_queue.next, + struct ptlrpc_request, rq_list); list_del_init(&req->rq_list); - obd->obd_requests_queued_for_recovery--; - spin_unlock_bh(&obd->obd_processing_task_lock); + } else { + req = NULL; + } + spin_unlock_bh(&obd->obd_processing_task_lock); + return req; +} + +static inline int req_replay_done(struct obd_export *exp) +{ + return (exp->exp_req_replay_needed == 0); +} + +static inline int lock_replay_done(struct obd_export *exp) +{ + return (exp->exp_lock_replay_needed == 0); +} + +static inline int connect_done(struct obd_export *exp) +{ + return (exp->exp_in_recovery != 0); +} + +static int check_for_clients(struct obd_device *obd) +{ + if (obd->obd_abort_recovery) + return 1; + LASSERT(obd->obd_connected_clients <= obd->obd_max_recoverable_clients); + if (obd->obd_no_conn == 0 && + obd->obd_connected_clients == obd->obd_max_recoverable_clients) + return 1; + return 0; +} + +static int handle_recovery_req(struct ptlrpc_thread *thread, + struct ptlrpc_request *req, + svc_handler_t handler) +{ + int rc; + ENTRY; + + rc = lu_context_init(&req->rq_session, LCT_SESSION); + if (rc) { + CERROR("Failure to initialize session: %d\n", rc); + return rc; + } + req->rq_session.lc_thread = thread; + lu_context_enter(&req->rq_session); + req->rq_svc_thread = thread; + req->rq_svc_thread->t_env->le_ses = &req->rq_session; + + (void)handler(req); + + lu_context_exit(&req->rq_session); + lu_context_fini(&req->rq_session); + /* don't reset timer for final stage */ + if (!req_replay_done(req->rq_export) || + !lock_replay_done(req->rq_export)) + reset_recovery_timer(class_exp2obd(req->rq_export)); + ptlrpc_free_clone(req); + RETURN(0); +} + +static int target_recovery_thread(void *arg) +{ + struct obd_device *obd = arg; + struct ptlrpc_request *req; + struct target_recovery_data *trd = &obd->obd_recovery_data; + struct l_wait_info lwi = { 0 }; + unsigned long delta; + unsigned long flags; + struct lu_env env; + struct ptlrpc_thread fake_svc_thread, *thread = &fake_svc_thread; + __u32 recov_ctx_tags = LCT_MD_THREAD; + int rc = 0; + ENTRY; + + cfs_daemonize("tgt_recov"); - DEBUG_REQ(D_HA, req, "processing: "); - (void)obd->obd_recovery_handler(req); + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + rc = lu_context_init(&env.le_ctx, recov_ctx_tags); + if (rc) + RETURN(rc); + + thread->t_env = &env; + env.le_ctx.lc_thread = thread; + + CERROR("%s: started recovery thread pid %d\n", obd->obd_name, + current->pid); + trd->trd_processing_task = current->pid; + + obd->obd_recovering = 1; + complete(&trd->trd_starting); + + /* first of all, we have to know the first transno to replay */ + obd->obd_abort_recovery = 0; + l_wait_event(obd->obd_next_transno_waitq, + check_for_clients(obd), &lwi); + + spin_lock_bh(&obd->obd_processing_task_lock); + target_cancel_recovery_timer(obd); + spin_unlock_bh(&obd->obd_processing_task_lock); + + /* If some clients haven't connected in time, evict them */ + if (obd->obd_abort_recovery) { + CWARN("Some clients haven't connect in time (%d/%d)," + "evict them\n", obd->obd_connected_clients, + obd->obd_max_recoverable_clients); + obd->obd_abort_recovery = obd->obd_stopping; + class_disconnect_stale_exports(obd, connect_done); + } + /* next stage: replay requests */ + delta = jiffies; + obd->obd_req_replaying = 1; + CDEBUG(D_INFO, "1: request replay stage - %d clients from t"LPU64"\n", + atomic_read(&obd->obd_req_replay_clients), + obd->obd_next_recovery_transno); + while ((req = target_next_replay_req(obd))) { + LASSERT(trd->trd_processing_task == current->pid); + DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s", + lustre_msg_get_transno(req->rq_reqmsg), + libcfs_nid2str(req->rq_peer.nid)); + + handle_recovery_req(thread, req, + trd->trd_recovery_handler); obd->obd_replayed_requests++; - reset_recovery_timer(obd); - /* bug 1580: decide how to properly sync() in recovery */ - //mds_fsync_super(obd->u.obt.obt_sb); - class_export_put(req->rq_export); - if (req->rq_reply_state != NULL) { - ptlrpc_rs_decref(req->rq_reply_state); - /* req->rq_reply_state = NULL; */ - } - OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); spin_lock_bh(&obd->obd_processing_task_lock); obd->obd_next_recovery_transno++; - if (list_empty(&obd->obd_recovery_queue)) { - obd->obd_processing_task = 0; - spin_unlock_bh(&obd->obd_processing_task_lock); - break; + spin_unlock_bh(&obd->obd_processing_task_lock); + } + + spin_lock_bh(&obd->obd_processing_task_lock); + target_cancel_recovery_timer(obd); + spin_unlock_bh(&obd->obd_processing_task_lock); + + /* If some clients haven't replayed requests in time, evict them */ + if (obd->obd_abort_recovery) { + CDEBUG(D_ERROR, "req replay timed out, aborting ...\n"); + obd->obd_abort_recovery = obd->obd_stopping; + class_disconnect_stale_exports(obd, req_replay_done); + abort_req_replay_queue(obd); + } + /* The second stage: replay locks */ + CDEBUG(D_INFO, "2: lock replay stage - %d clients\n", + atomic_read(&obd->obd_lock_replay_clients)); + while ((req = target_next_replay_lock(obd))) { + LASSERT(trd->trd_processing_task == current->pid); + DEBUG_REQ(D_HA|D_WARNING, req, "processing lock from %s: ", + libcfs_nid2str(req->rq_peer.nid)); + handle_recovery_req(thread, req, + trd->trd_recovery_handler); + obd->obd_replayed_locks++; + } + + spin_lock_bh(&obd->obd_processing_task_lock); + target_cancel_recovery_timer(obd); + spin_unlock_bh(&obd->obd_processing_task_lock); + /* If some clients haven't replayed requests in time, evict them */ + if (obd->obd_abort_recovery) { + int stale; + CERROR("lock replay timed out, aborting ...\n"); + obd->obd_abort_recovery = obd->obd_stopping; + stale = class_disconnect_stale_exports(obd, lock_replay_done); + abort_lock_replay_queue(obd); + } + + /* We drop recoverying flag to forward all new requests + * to regular mds_handle() since now */ + spin_lock_bh(&obd->obd_processing_task_lock); + obd->obd_recovering = obd->obd_abort_recovery = 0; + spin_unlock_bh(&obd->obd_processing_task_lock); + /* The third stage: reply on final pings */ + CDEBUG(D_INFO, "3: final stage - process recovery completion pings\n"); + while ((req = target_next_final_ping(obd))) { + LASSERT(trd->trd_processing_task == current->pid); + DEBUG_REQ(D_HA, req, "processing final ping from %s: ", + libcfs_nid2str(req->rq_peer.nid)); + handle_recovery_req(thread, req, + trd->trd_recovery_handler); + } + + delta = (jiffies - delta) / HZ; + CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n", + delta, obd->obd_replayed_requests, obd->obd_replayed_locks); + LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0); + LASSERT(atomic_read(&obd->obd_lock_replay_clients) == 0); + if (delta > obd_timeout * 2) { + CWARN("too long recovery - read logs\n"); + libcfs_debug_dumplog(); + } + + target_finish_recovery(obd); + + lu_env_fini(&env); + trd->trd_processing_task = 0; + complete(&trd->trd_finishing); + RETURN(rc); +} + +int target_start_recovery_thread(struct obd_device *obd, svc_handler_t handler) +{ + int rc = 0; + struct target_recovery_data *trd = &obd->obd_recovery_data; + + memset(trd, 0, sizeof(*trd)); + init_completion(&trd->trd_starting); + init_completion(&trd->trd_finishing); + trd->trd_recovery_handler = handler; + + if (kernel_thread(target_recovery_thread, obd, 0) > 0) { + wait_for_completion(&trd->trd_starting); + LASSERT(obd->obd_recovering != 0); + } else + rc = -ECHILD; + + return rc; +} + +void target_stop_recovery_thread(struct obd_device *obd) +{ + spin_lock_bh(&obd->obd_processing_task_lock); + if (obd->obd_recovery_data.trd_processing_task > 0) { + struct target_recovery_data *trd = &obd->obd_recovery_data; + CERROR("%s: Aborting recovery\n", obd->obd_name); + obd->obd_abort_recovery = 1; + wake_up(&obd->obd_next_transno_waitq); + spin_unlock_bh(&obd->obd_processing_task_lock); + wait_for_completion(&trd->trd_finishing); + } else { + spin_unlock_bh(&obd->obd_processing_task_lock); + } +} + +void target_recovery_fini(struct obd_device *obd) +{ + class_disconnect_exports(obd); + target_stop_recovery_thread(obd); + target_cleanup_recovery(obd); +} +EXPORT_SYMBOL(target_recovery_fini); + +void target_recovery_init(struct obd_device *obd, svc_handler_t handler) +{ + if (obd->obd_max_recoverable_clients == 0) + return; + + CWARN("RECOVERY: service %s, %d recoverable clients, " + "last_transno "LPU64"\n", obd->obd_name, + obd->obd_max_recoverable_clients, obd->obd_last_committed); + obd->obd_next_recovery_transno = obd->obd_last_committed + 1; + target_start_recovery_thread(obd, handler); + obd->obd_recovery_start = CURRENT_SECONDS; + /* Only used for lprocfs_status */ + obd->obd_recovery_end = obd->obd_recovery_start + OBD_RECOVERY_TIMEOUT; +} +EXPORT_SYMBOL(target_recovery_init); + +#endif + +int target_process_req_flags(struct obd_device *obd, struct ptlrpc_request *req) +{ + struct obd_export *exp = req->rq_export; + LASSERT(exp != NULL); + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REQ_REPLAY_DONE) { + /* client declares he's ready to replay locks */ + spin_lock_bh(&obd->obd_processing_task_lock); + if (exp->exp_req_replay_needed) { + LASSERT(atomic_read(&obd->obd_req_replay_clients) > 0); + spin_lock(&exp->exp_lock); + exp->exp_req_replay_needed = 0; + spin_unlock(&exp->exp_lock); + atomic_dec(&obd->obd_req_replay_clients); + LASSERT(obd->obd_recoverable_clients > 0); + obd->obd_recoverable_clients--; + if (atomic_read(&obd->obd_req_replay_clients) == 0) + CDEBUG(D_HA, "all clients have replayed reqs\n"); + wake_up(&obd->obd_next_transno_waitq); } spin_unlock_bh(&obd->obd_processing_task_lock); } - EXIT; + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) { + /* client declares he's ready to complete recovery + * so, we put the request on th final queue */ + spin_lock_bh(&obd->obd_processing_task_lock); + if (exp->exp_lock_replay_needed) { + LASSERT(atomic_read(&obd->obd_lock_replay_clients) > 0); + spin_lock(&exp->exp_lock); + exp->exp_lock_replay_needed = 0; + spin_unlock(&exp->exp_lock); + atomic_dec(&obd->obd_lock_replay_clients); + if (atomic_read(&obd->obd_lock_replay_clients) == 0) + CDEBUG(D_HA, "all clients have replayed locks\n"); + wake_up(&obd->obd_next_transno_waitq); + } + spin_unlock_bh(&obd->obd_processing_task_lock); + } + + return 0; } int target_queue_recovery_request(struct ptlrpc_request *req, @@ -1202,8 +1652,58 @@ int target_queue_recovery_request(struct ptlrpc_request *req, struct list_head *tmp; int inserted = 0; __u64 transno = lustre_msg_get_transno(req->rq_reqmsg); - struct ptlrpc_request *saved_req; - struct lustre_msg *reqmsg; + + ENTRY; + + if (obd->obd_recovery_data.trd_processing_task == cfs_curproc_pid()) { + /* Processing the queue right now, don't re-add. */ + RETURN(1); + } + + target_process_req_flags(obd, req); + + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) { + /* client declares he's ready to complete recovery + * so, we put the request on th final queue */ + req = ptlrpc_clone_req(req); + if (req == NULL) + RETURN(-ENOMEM); + DEBUG_REQ(D_HA, req, "queue final req"); + spin_lock_bh(&obd->obd_processing_task_lock); + if (obd->obd_recovering) + list_add_tail(&req->rq_list, &obd->obd_final_req_queue); + else { + spin_unlock_bh(&obd->obd_processing_task_lock); + ptlrpc_free_clone(req); + if (obd->obd_stopping) { + RETURN(-ENOTCONN); + } else { + RETURN(1); + } + } + spin_unlock_bh(&obd->obd_processing_task_lock); + RETURN(0); + } + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REQ_REPLAY_DONE) { + /* client declares he's ready to replay locks */ + req = ptlrpc_clone_req(req); + if (req == NULL) + RETURN(-ENOMEM); + DEBUG_REQ(D_HA, req, "queue lock replay req"); + spin_lock_bh(&obd->obd_processing_task_lock); + LASSERT(obd->obd_recovering); + /* usually due to recovery abort */ + if (!req->rq_export->exp_in_recovery) { + spin_unlock_bh(&obd->obd_processing_task_lock); + ptlrpc_free_clone(req); + RETURN(-ENOTCONN); + } + LASSERT(req->rq_export->exp_lock_replay_needed); + list_add_tail(&req->rq_list, &obd->obd_lock_replay_queue); + spin_unlock_bh(&obd->obd_processing_task_lock); + wake_up(&obd->obd_next_transno_waitq); + RETURN(0); + } /* CAVEAT EMPTOR: The incoming request message has been swabbed * (i.e. buflens etc are in my own byte order), but type-dependent @@ -1212,18 +1712,9 @@ int target_queue_recovery_request(struct ptlrpc_request *req, if (!transno) { CFS_INIT_LIST_HEAD(&req->rq_list); DEBUG_REQ(D_HA, req, "not queueing"); - return 1; + RETURN(1); } - /* XXX If I were a real man, these LBUGs would be sane cleanups. */ - /* XXX just like the request-dup code in queue_final_reply */ - OBD_ALLOC(saved_req, sizeof *saved_req); - if (!saved_req) - LBUG(); - OBD_ALLOC(reqmsg, req->rq_reqlen); - if (!reqmsg) - LBUG(); - spin_lock_bh(&obd->obd_processing_task_lock); /* If we're processing the queue, we want don't want to queue this @@ -1236,36 +1727,39 @@ int target_queue_recovery_request(struct ptlrpc_request *req, * Also, a resent, replayed request that has already been * handled will pass through here and be processed immediately. */ - if (obd->obd_processing_task == cfs_curproc_pid() || - transno < obd->obd_next_recovery_transno) { + CWARN("Next recovery transno: "LPU64", current: "LPU64", replaying: %i\n", + obd->obd_next_recovery_transno, transno, obd->obd_req_replaying); + if (transno < obd->obd_next_recovery_transno && obd->obd_req_replaying) { /* Processing the queue right now, don't re-add. */ LASSERT(list_empty(&req->rq_list)); spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *saved_req); - return 1; + RETURN(1); } + spin_unlock_bh(&obd->obd_processing_task_lock); /* A resent, replayed request that is still on the queue; just drop it. The queued request will handle this. */ if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT|MSG_REPLAY)) == (MSG_RESENT | MSG_REPLAY)) { DEBUG_REQ(D_ERROR, req, "dropping resent queued req"); - spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *saved_req); - return 0; + RETURN(0); } - memcpy(saved_req, req, sizeof *req); - memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); - req = saved_req; - req->rq_reqmsg = reqmsg; - class_export_get(req->rq_export); - CFS_INIT_LIST_HEAD(&req->rq_list); + req = ptlrpc_clone_req(req); + if (req == NULL) + RETURN(-ENOMEM); + + spin_lock_bh(&obd->obd_processing_task_lock); + LASSERT(obd->obd_recovering); + if (!req->rq_export->exp_in_recovery) { + spin_unlock_bh(&obd->obd_processing_task_lock); + ptlrpc_free_clone(req); + RETURN(-ENOTCONN); + } + LASSERT(req->rq_export->exp_req_replay_needed); /* XXX O(n^2) */ - list_for_each(tmp, &obd->obd_recovery_queue) { + list_for_each(tmp, &obd->obd_req_replay_queue) { struct ptlrpc_request *reqiter = list_entry(tmp, struct ptlrpc_request, rq_list); @@ -1276,29 +1770,14 @@ int target_queue_recovery_request(struct ptlrpc_request *req, } } - if (!inserted) { - list_add_tail(&req->rq_list, &obd->obd_recovery_queue); - } + if (!inserted) + list_add_tail(&req->rq_list, &obd->obd_req_replay_queue); obd->obd_requests_queued_for_recovery++; - - if (obd->obd_processing_task != 0) { - /* Someone else is processing this queue, we'll leave it to - * them. - */ - cfs_waitq_signal(&obd->obd_next_transno_waitq); - spin_unlock_bh(&obd->obd_processing_task_lock); - return 0; - } - - /* Nobody is processing, and we know there's (at least) one to process - * now, so we'll do the honours. - */ - obd->obd_processing_task = cfs_curproc_pid(); + wake_up(&obd->obd_next_transno_waitq); spin_unlock_bh(&obd->obd_processing_task_lock); + RETURN(0); - process_recovery_queue(obd); - return 0; } struct obd_device * target_req2obd(struct ptlrpc_request *req) @@ -1306,83 +1785,7 @@ struct obd_device * target_req2obd(struct ptlrpc_request *req) return req->rq_export->exp_obd; } -int target_queue_final_reply(struct ptlrpc_request *req, int rc) -{ - struct obd_device *obd = target_req2obd(req); - struct ptlrpc_request *saved_req; - struct lustre_msg *reqmsg; - int recovery_done = 0; - - LASSERT ((rc == 0) == (req->rq_reply_state != NULL)); - - if (rc) { - /* Just like ptlrpc_error, but without the sending. */ - rc = lustre_pack_reply(req, 1, NULL, NULL); - LASSERT(rc == 0); /* XXX handle this */ - req->rq_type = PTL_RPC_MSG_ERR; - } - - LASSERT (!req->rq_reply_state->rs_difficult); - LASSERT(list_empty(&req->rq_list)); - /* XXX a bit like the request-dup code in queue_recovery_request */ - OBD_ALLOC(saved_req, sizeof *saved_req); - if (!saved_req) - LBUG(); - OBD_ALLOC(reqmsg, req->rq_reqlen); - if (!reqmsg) - LBUG(); - *saved_req = *req; - memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); - - /* Don't race cleanup */ - spin_lock_bh(&obd->obd_processing_task_lock); - if (obd->obd_stopping) { - spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *req); - req->rq_status = -ENOTCONN; - /* rv is ignored anyhow */ - return -ENOTCONN; - } - ptlrpc_rs_addref(req->rq_reply_state); /* +1 ref for saved reply */ - req = saved_req; - req->rq_reqmsg = reqmsg; - class_export_get(req->rq_export); - list_add(&req->rq_list, &obd->obd_delayed_reply_queue); - - /* only count the first "replay over" request from each - export */ - if (req->rq_export->exp_replay_needed) { - --obd->obd_recoverable_clients; - - spin_lock(&req->rq_export->exp_lock); - req->rq_export->exp_replay_needed = 0; - spin_unlock(&req->rq_export->exp_lock); - } - recovery_done = (obd->obd_recoverable_clients == 0); - spin_unlock_bh(&obd->obd_processing_task_lock); - - OBD_RACE(OBD_FAIL_LDLM_RECOV_CLIENTS); - if (recovery_done) { - spin_lock_bh(&obd->obd_processing_task_lock); - obd->obd_recovering = obd->obd_abort_recovery = 0; - target_cancel_recovery_timer(obd); - spin_unlock_bh(&obd->obd_processing_task_lock); - - target_finish_recovery(obd); - CDEBUG(D_HA, "%s: recovery complete\n", - obd_uuid2str(&obd->obd_uuid)); - } else { - CWARN("%s: %d recoverable clients remain\n", - obd->obd_name, obd->obd_recoverable_clients); - cfs_waitq_signal(&obd->obd_next_transno_waitq); - } - - return 1; -} - -int -target_send_reply_msg (struct ptlrpc_request *req, int rc, int fail_id) +int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id) { if (OBD_FAIL_CHECK(fail_id | OBD_FAIL_ONCE)) { obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED; @@ -1390,7 +1793,7 @@ target_send_reply_msg (struct ptlrpc_request *req, int rc, int fail_id) return (-ECOMM); } - if (rc) { + if (unlikely(rc)) { DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc); req->rq_status = rc; return (ptlrpc_error(req)); @@ -1401,8 +1804,7 @@ target_send_reply_msg (struct ptlrpc_request *req, int rc, int fail_id) return (ptlrpc_send_reply(req, 1)); } -void -target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) +void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) { int netrc; struct ptlrpc_reply_state *rs; @@ -1495,7 +1897,14 @@ int target_handle_ping(struct ptlrpc_request *req) void target_committed_to_req(struct ptlrpc_request *req) { - struct obd_device *obd = req->rq_export->exp_obd; + struct obd_device *obd; + + if (req == NULL || req->rq_export == NULL) + return; + + obd = req->rq_export->exp_obd; + if (obd == NULL) + return; if (!obd->obd_no_transno && req->rq_repmsg != NULL) lustre_msg_set_last_committed(req->rq_repmsg, @@ -1503,8 +1912,8 @@ void target_committed_to_req(struct ptlrpc_request *req) else DEBUG_REQ(D_IOCTL, req, "not sending last_committed update"); - CDEBUG(D_INFO, "last_committed "LPU64", xid "LPU64"\n", - obd->obd_last_committed, req->rq_xid); + CDEBUG(D_INFO, "last_committed "LPU64", transno "LPU64", xid "LPU64"\n", + obd->obd_last_committed, req->rq_transno, req->rq_xid); } EXPORT_SYMBOL(target_committed_to_req); @@ -1540,7 +1949,7 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) int repsize[2] = { sizeof(struct ptlrpc_body), sizeof(struct qunit_data) }; ENTRY; - + rc = lustre_pack_reply(req, 2, repsize, NULL); if (rc) { CERROR("packing reply failed!: rc = %d\n", rc); @@ -1552,17 +1961,17 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); - rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(struct qunit_data)); LASSERT(rep); - qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata), + qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata), lustre_swab_qdata); } else { CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); - rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(struct qunit_data_old)); LASSERT(rep); - qdata_old = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata_old), + qdata_old = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata_old), lustre_swab_qdata_old); qdata = lustre_quota_old_to_new(qdata_old); } @@ -1576,14 +1985,14 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) LASSERT(obd->obd_observer && obd->obd_observer->obd_observer); master_obd = obd->obd_observer->obd_observer; qctxt = &master_obd->u.obt.obt_qctxt; - + LASSERT(qctxt->lqc_handler); rc = qctxt->lqc_handler(master_obd, qdata, lustre_msg_get_opc(req->rq_reqmsg)); if (rc && rc != -EDQUOT) - CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, + CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, "dqacq failed! (rc:%d)\n", rc); - + /* the qd_count might be changed in lqc_handler */ if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { @@ -1594,8 +2003,8 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) } req->rq_status = rc; rc = ptlrpc_reply(req); - - RETURN(rc); + + RETURN(rc); #else return 0; #endif /* !__KERNEL__ */ diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index dda01b7..13f26ab 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -37,8 +37,6 @@ #include <obd_class.h> #include "ldlm_internal.h" -//struct lustre_lock ldlm_everything_lock; - /* lock's skip list pointers fix mode */ #define LDLM_JOIN_NONE 0 #define LDLM_MODE_JOIN_RIGHT 1 @@ -128,6 +126,12 @@ struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock) return lock; } +static void ldlm_lock_free(struct ldlm_lock *lock, size_t size) +{ + LASSERT(size == sizeof(*lock)); + OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock)); +} + void ldlm_lock_put(struct ldlm_lock *lock) { ENTRY; @@ -137,7 +141,8 @@ void ldlm_lock_put(struct ldlm_lock *lock) if (atomic_dec_and_test(&lock->l_refc)) { struct ldlm_resource *res; - LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing it."); + LDLM_DEBUG(lock, + "final lock_put on destroyed lock, freeing it."); res = lock->l_resource; LASSERT(lock->l_destroyed); @@ -150,13 +155,16 @@ void ldlm_lock_put(struct ldlm_lock *lock) atomic_dec(&res->lr_namespace->ns_locks); ldlm_resource_putref(res); lock->l_resource = NULL; - if (lock->l_export) + if (lock->l_export) { class_export_put(lock->l_export); + lock->l_export = NULL; + } if (lock->l_lvb_data != NULL) OBD_FREE(lock->l_lvb_data, lock->l_lvb_len); - OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock)); + OBD_FREE_RCU_CB(lock, sizeof(*lock), &lock->l_handle, + ldlm_lock_free); } EXIT; @@ -328,7 +336,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent, } int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, - struct ldlm_res_id new_resid) + const struct ldlm_res_id *new_resid) { struct ldlm_resource *oldres = lock->l_resource; struct ldlm_resource *newres; @@ -338,14 +346,14 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, LASSERT(ns->ns_client != 0); lock_res_and_lock(lock); - if (memcmp(&new_resid, &lock->l_resource->lr_name, + if (memcmp(new_resid, &lock->l_resource->lr_name, sizeof(lock->l_resource->lr_name)) == 0) { /* Nothing to do */ unlock_res_and_lock(lock); RETURN(0); } - LASSERT(new_resid.name[0] != 0); + LASSERT(new_resid->name[0] != 0); /* This function assumes that the lock isn't on any lists */ LASSERT(list_empty(&lock->l_res_link)); @@ -360,7 +368,7 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, } lock_res_and_lock(lock); - LASSERT(memcmp(&new_resid, &lock->l_resource->lr_name, + LASSERT(memcmp(new_resid, &lock->l_resource->lr_name, sizeof(lock->l_resource->lr_name)) != 0); lock_res(newres); lock->l_resource = newres; @@ -378,7 +386,7 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, * HANDLES */ -void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh) +void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh) { lockh->cookie = lock->l_handle.h_cookie; } @@ -387,10 +395,11 @@ void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh) * Return NULL if flag already set */ -struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags) +struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle, + int flags) { struct ldlm_namespace *ns; - struct ldlm_lock *lock = NULL, *retval = NULL; + struct ldlm_lock *lock, *retval = NULL; ENTRY; LASSERT(handle); @@ -431,7 +440,7 @@ struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags) } struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns, - struct lustre_handle *handle) + const struct lustre_handle *handle) { struct ldlm_lock *retval = NULL; retval = __ldlm_handle2lock(handle, 0); @@ -521,7 +530,7 @@ void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, check_res_locked(lock->l_resource); if (new) ldlm_add_bl_work_item(lock, new, work_list); - else + else ldlm_add_cp_work_item(lock, work_list); EXIT; } @@ -790,7 +799,7 @@ static void ldlm_granted_list_add_lock(struct ldlm_lock *lock, } } else if (join & LDLM_MODE_JOIN_LEFT) { LASSERT(! LDLM_SL_HEAD(&lockp->l_sl_mode)); - if (LDLM_SL_EMPTY(&lockp->l_sl_mode)) { + if (LDLM_SL_EMPTY(&lockp->l_sl_mode)) { lock->l_sl_mode.prev = &lockp->l_sl_mode; lockp->l_sl_mode.next = &lock->l_sl_mode; } else if (LDLM_SL_TAIL(&lockp->l_sl_mode)) { @@ -961,9 +970,13 @@ void ldlm_lock_allow_match(struct ldlm_lock *lock) * * Returns 1 if it finds an already-existing lock that is compatible; in this * case, lockh is filled in with a addref()ed lock + * + * we also check security context, if that failed we simply return 0 (to keep + * caller code unchanged), the context failure will be discovered by caller + * sometime later. */ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, - struct ldlm_res_id *res_id, ldlm_type_t type, + const struct ldlm_res_id *res_id, ldlm_type_t type, ldlm_policy_data_t *policy, ldlm_mode_t mode, struct lustre_handle *lockh) { @@ -982,7 +995,7 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, mode = old_lock->l_req_mode; } - res = ldlm_resource_get(ns, NULL, *res_id, type, 0); + res = ldlm_resource_get(ns, NULL, res_id, type, 0); if (res == NULL) { LASSERT(old_lock == NULL); RETURN(0); @@ -1009,7 +1022,8 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, if (lock) { ldlm_lock2handle(lock, lockh); - if ((flags & LDLM_FL_LVB_READY) && (!(lock->l_flags & LDLM_FL_LVB_READY))) { + if ((flags & LDLM_FL_LVB_READY) && + (!(lock->l_flags & LDLM_FL_LVB_READY))) { struct l_wait_info lwi; if (lock->l_completion_ast) { int err = lock->l_completion_ast(lock, @@ -1040,6 +1054,19 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, res_id->name[2] : policy->l_extent.start, (type == LDLM_PLAIN || type == LDLM_IBITS) ? res_id->name[3] : policy->l_extent.end); + + /* check user's security context */ + if (lock->l_conn_export && + sptlrpc_import_check_ctx( + class_exp2cliimp(lock->l_conn_export))) { + if (!(flags & LDLM_FL_TEST_LOCK)) + ldlm_lock_decref_internal(lock, mode); + rc = 0; + } + + if (flags & LDLM_FL_TEST_LOCK) + LDLM_LOCK_PUT(lock); + } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/ LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res " LPU64"/"LPU64" ("LPU64" "LPU64")", ns, @@ -1051,16 +1078,15 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, } if (old_lock) LDLM_LOCK_PUT(old_lock); - if (flags & LDLM_FL_TEST_LOCK && rc) - LDLM_LOCK_PUT(lock); return rc; } /* Returns a referenced lock */ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, - struct ldlm_res_id res_id, ldlm_type_t type, + const struct lustre_handle *parent_lock_handle, + const struct ldlm_res_id *res_id, + ldlm_type_t type, ldlm_mode_t mode, ldlm_blocking_callback blocking, ldlm_completion_callback completion, @@ -1283,7 +1309,7 @@ int ldlm_run_cp_ast_work(struct list_head *rpc_list) * will never call the local blocking_ast until we drop our * reader/writer reference, which we won't do until we get the * reply and finish enqueueing. */ - + list_for_each_safe(tmp, pos, rpc_list) { struct ldlm_lock *lock = list_entry(tmp, struct ldlm_lock, l_cp_ast); @@ -1318,6 +1344,9 @@ void ldlm_reprocess_all_ns(struct ldlm_namespace *ns) struct list_head *tmp; int i, rc; + if (ns == NULL) + return; + ENTRY; spin_lock(&ns->ns_hash_lock); for (i = 0; i < RES_HASH_SIZE; i++) { @@ -1388,9 +1417,13 @@ void ldlm_cancel_callback(struct ldlm_lock *lock) } } -static void ldlm_unlink_lock_skiplist(struct ldlm_lock *req) +void ldlm_unlink_lock_skiplist(struct ldlm_lock *req) { struct ldlm_lock *lock; + + if (req->l_resource->lr_type != LDLM_PLAIN && + req->l_resource->lr_type != LDLM_IBITS) + return; if (LDLM_SL_HEAD(&req->l_sl_mode)) { lock = list_entry(req->l_res_link.next, struct ldlm_lock, @@ -1457,17 +1490,12 @@ void ldlm_lock_cancel(struct ldlm_lock *lock) ldlm_del_waiting_lock(lock); - /* Releases res lock */ + /* Releases cancel callback. */ ldlm_cancel_callback(lock); /* Yes, second time, just in case it was added again while we were running with no res lock in ldlm_cancel_callback */ ldlm_del_waiting_lock(lock); - if (!(LDLM_SL_EMPTY(&lock->l_sl_mode) && - LDLM_SL_EMPTY(&lock->l_sl_policy)) && - (lock->l_resource->lr_type == LDLM_PLAIN || - lock->l_resource->lr_type == LDLM_IBITS)) - ldlm_unlink_lock_skiplist(lock); ldlm_resource_unlink_lock(lock); ldlm_lock_destroy_nolock(lock); unlock_res_and_lock(lock); @@ -1521,7 +1549,7 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, int granted = 0; int old_mode, rc; struct ldlm_lock *mark_lock = NULL; - int join= LDLM_JOIN_NONE; + int join = LDLM_JOIN_NONE; ldlm_error_t err; ENTRY; @@ -1565,8 +1593,6 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, else if (lock->l_res_link.next != &res->lr_granted) mark_lock = list_entry(lock->l_res_link.next, struct ldlm_lock, l_res_link); - if (join != LDLM_JOIN_NONE) - ldlm_unlink_lock_skiplist(lock); } ldlm_resource_unlink_lock(lock); @@ -1637,9 +1663,11 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos) libcfs_nid2str(imp->imp_connection->c_peer.nid), lock->l_remote_handle.cookie); } - CDEBUG(level, " Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource, - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1]); + CDEBUG(level, " Resource: %p ("LPU64"/"LPU64"/"LPU64")\n", + lock->l_resource, + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + lock->l_resource->lr_name.name[2]); CDEBUG(level, " Req mode: %s, grant mode: %s, rc: %u, read: %d, " "write: %d flags: %#x\n", ldlm_lockname[lock->l_req_mode], ldlm_lockname[lock->l_granted_mode], @@ -1683,23 +1711,24 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level, { va_list args; cfs_debug_limit_state_t *cdls = data->msg_cdls; + + va_start(args, fmt); - va_start(args, fmt); if (lock->l_resource == NULL) { libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file, data->msg_fn, data->msg_line, fmt, args, - " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " - LPX64" expref: %d pid: %u\n", lock, - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - atomic_read(&lock->l_export->exp_refcount) : -99, - lock->l_pid); - va_end(args); + " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " + LPX64" expref: %d pid: %u\n", lock, + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); + va_end(args); return; } @@ -1707,94 +1736,97 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level, case LDLM_EXTENT: libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file, data->msg_fn, data->msg_line, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64 - "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 - " expref: %d pid: %u\n", - lock->l_resource->lr_namespace->ns_name, lock, - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], - lock->l_policy_data.l_extent.start, - lock->l_policy_data.l_extent.end, - lock->l_req_extent.start, lock->l_req_extent.end, - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - atomic_read(&lock->l_export->exp_refcount) : -99, - lock->l_pid); + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64 + "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 + " expref: %d pid: %u\n", + lock->l_resource->lr_namespace->ns_name, lock, + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_policy_data.l_extent.start, + lock->l_policy_data.l_extent.end, + lock->l_req_extent.start, lock->l_req_extent.end, + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); break; + case LDLM_FLOCK: libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file, data->msg_fn, data->msg_line, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " - "["LPU64"->"LPU64"] flags: %x remote: "LPX64 - " expref: %d pid: %u\n", - lock->l_resource->lr_namespace->ns_name, lock, - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], - lock->l_policy_data.l_flock.pid, - lock->l_policy_data.l_flock.start, - lock->l_policy_data.l_flock.end, - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - atomic_read(&lock->l_export->exp_refcount) : -99, - lock->l_pid); + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " + "["LPU64"->"LPU64"] flags: %x remote: "LPX64 + " expref: %d pid: %u\n", + lock->l_resource->lr_namespace->ns_name, lock, + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_policy_data.l_flock.pid, + lock->l_policy_data.l_flock.start, + lock->l_policy_data.l_flock.end, + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); break; + case LDLM_IBITS: libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file, data->msg_fn, data->msg_line, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " - "flags: %x remote: "LPX64" expref: %d " - "pid %u\n", - lock->l_resource->lr_namespace->ns_name, - lock, lock->l_handle.h_cookie, - atomic_read (&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - lock->l_policy_data.l_inodebits.bits, - atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - atomic_read(&lock->l_export->exp_refcount) : -99, - lock->l_pid); + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " + "flags: %x remote: "LPX64" expref: %d " + "pid %u\n", + lock->l_resource->lr_namespace->ns_name, + lock, lock->l_handle.h_cookie, + atomic_read (&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + lock->l_policy_data.l_inodebits.bits, + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); break; + default: libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file, data->msg_fn, data->msg_line, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " - "remote: "LPX64" expref: %d pid: %u\n", - lock->l_resource->lr_namespace->ns_name, - lock, lock->l_handle.h_cookie, - atomic_read (&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - atomic_read(&lock->l_export->exp_refcount) : -99, - lock->l_pid); + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " + "remote: "LPX64" expref: %d pid: %u\n", + lock->l_resource->lr_namespace->ns_name, + lock, lock->l_handle.h_cookie, + atomic_read (&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); break; } va_end(args); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 9f2b1d5..59c7cfa 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -42,7 +42,6 @@ extern cfs_mem_cache_t *ldlm_resource_slab; extern cfs_mem_cache_t *ldlm_lock_slab; -extern struct lustre_lock ldlm_handle_lock; extern struct list_head ldlm_namespace_list; extern struct semaphore ldlm_namespace_lock; @@ -193,11 +192,14 @@ static int expired_lock_main(void *arg) RETURN(0); } +static int ldlm_add_waiting_lock(struct ldlm_lock *lock); + /* This is called from within a timer interrupt and cannot schedule */ static void waiting_locks_callback(unsigned long unused) { struct ldlm_lock *lock, *last = NULL; +repeat: spin_lock_bh(&waiting_locks_spinlock); while (!list_empty(&waiting_locks_list)) { lock = list_entry(waiting_locks_list.next, struct ldlm_lock, @@ -207,30 +209,45 @@ static void waiting_locks_callback(unsigned long unused) (lock->l_req_mode == LCK_GROUP)) break; - LDLM_ERROR(lock, "lock callback timer expired: evicting client " - "%s@%s nid %s ",lock->l_export->exp_client_uuid.uuid, - lock->l_export->exp_connection->c_remote_uuid.uuid, - libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid)); + if (ptlrpc_check_suspend()) { + /* there is a case when we talk to one mds, holding + * lock from another mds. this way we easily can get + * here, if second mds is being recovered. so, we + * suspend timeouts. bug 6019 */ - if (lock == last) { - LDLM_ERROR(lock, "waiting on lock multiple times"); - CERROR("wll %p n/p %p/%p, l_pending %p n/p %p/%p\n", - &waiting_locks_list, - waiting_locks_list.next, waiting_locks_list.prev, - &lock->l_pending_chain, - lock->l_pending_chain.next, - lock->l_pending_chain.prev); + LDLM_ERROR(lock, "recharge timeout: %s@%s nid %s ", + lock->l_export->exp_client_uuid.uuid, + lock->l_export->exp_connection->c_remote_uuid.uuid, + libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid)); - CFS_INIT_LIST_HEAD(&waiting_locks_list); /* HACK */ - expired_lock_thread.elt_dump = __LINE__; + list_del_init(&lock->l_pending_chain); + spin_unlock_bh(&waiting_locks_spinlock); + ldlm_add_waiting_lock(lock); + goto repeat; + } - /* LBUG(); */ - CEMERG("would be an LBUG, but isn't (bug 5653)\n"); - libcfs_debug_dumpstack(NULL); - /*blocks* libcfs_debug_dumplog(); */ - /*blocks* libcfs_run_lbug_upcall(file, func, line); */ - break; + /* if timeout overlaps the activation time of suspended timeouts + * then extend it to give a chance for client to reconnect */ + if (cfs_time_before(cfs_time_sub(lock->l_callback_timeout, + cfs_time_seconds(obd_timeout)/2), + ptlrpc_suspend_wakeup_time())) { + LDLM_ERROR(lock, "extend timeout due to recovery: %s@%s nid %s ", + lock->l_export->exp_client_uuid.uuid, + lock->l_export->exp_connection->c_remote_uuid.uuid, + libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid)); + + list_del_init(&lock->l_pending_chain); + spin_unlock_bh(&waiting_locks_spinlock); + ldlm_add_waiting_lock(lock); + goto repeat; } + + LDLM_ERROR(lock, "lock callback timer expired: evicting client " + "%s@%s nid %s\n", + lock->l_export->exp_client_uuid.uuid, + lock->l_export->exp_connection->c_remote_uuid.uuid, + libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid)); + last = lock; list_del(&lock->l_pending_chain); @@ -367,7 +384,7 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock) /* * Prolong the lock - * + * * Called with namespace lock held. */ int ldlm_refresh_waiting_lock(struct ldlm_lock *lock) @@ -419,13 +436,13 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, struct ptlrpc_connection *conn = lock->l_export->exp_connection; char *str = libcfs_nid2str(conn->c_peer.nid); - LCONSOLE_ERROR("A client on nid %s was evicted from service %s.\n", - str, lock->l_export->exp_obd->obd_name); + LCONSOLE_ERROR_MSG(0x138, "A client on nid %s was evicted from " + "service %s.\n", str, + lock->l_export->exp_obd->obd_name); - LDLM_ERROR(lock, "%s AST failed (%d): evicting client %s@%s NID %s" - " (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid, - conn->c_remote_uuid.uuid, libcfs_nid2str(conn->c_peer.nid), - str); + LCONSOLE_ERROR_MSG(0x012, "Lock %s callback to %s timed out for " + "resource %d\n", ast_type, + obd_export_nid2str(lock->l_export), rc); if (obd_dump_on_timeout) libcfs_debug_dumplog(); @@ -459,10 +476,11 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock, } else if (rc) { if (rc == -EINVAL) LDLM_DEBUG(lock, "client (nid %s) returned %d" - " from %s AST - normal race", - libcfs_nid2str(peer.nid), - lustre_msg_get_status(req->rq_repmsg), - ast_type); + " from %s AST - normal race", + libcfs_nid2str(peer.nid), + req->rq_repmsg ? + lustre_msg_get_status(req->rq_repmsg) : -1, + ast_type); else LDLM_ERROR(lock, "client (nid %s) returned %d " "from %s AST", libcfs_nid2str(peer.nid), @@ -495,6 +513,10 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, } LASSERT(lock); + if (lock->l_export->exp_obd->obd_recovering != 0) { + LDLM_ERROR(lock, "BUG 6063: lock collide during recovery"); + ldlm_lock_dump(D_ERROR, lock, 0); + } req = ptlrpc_prep_req(lock->l_export->exp_imp_reverse, LUSTRE_DLM_VERSION, LDLM_BL_CALLBACK, 2, size, @@ -519,15 +541,6 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, RETURN(0); } -#if 0 - if (CURRENT_SECONDS - lock->l_export->exp_last_request_time > 30){ - unlock_res(lock->l_resource); - ptlrpc_req_finished(req); - ldlm_failed_ast(lock, -ETIMEDOUT, "Not-attempted blocking"); - RETURN(-ETIMEDOUT); - } -#endif - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) instant_cancel = 1; @@ -560,6 +573,7 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, rc = ptlrpc_queue_wait(req); OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2); } + if (rc != 0) rc = ldlm_handle_ast_error(lock, req, rc, "blocking"); @@ -624,7 +638,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)", total_enqueue_wait); - + ptlrpc_req_set_repsize(req, 1, NULL); req->rq_send_state = LUSTRE_IMP_FULL; @@ -647,8 +661,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) instant_cancel = 1; lock_res_and_lock(lock); } else { - ldlm_add_waiting_lock(lock); /* start the lock-timeout - clock */ + /* start the lock-timeout clock */ + ldlm_add_waiting_lock(lock); } } unlock_res_and_lock(lock); @@ -656,7 +670,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) if (lock->l_export && lock->l_export->exp_ldlm_stats) lprocfs_counter_incr(lock->l_export->exp_ldlm_stats, LDLM_CP_CALLBACK - LDLM_FIRST_OPC); - + rc = ptlrpc_queue_wait(req); if (rc != 0) rc = ldlm_handle_ast_error(lock, req, rc, "completion"); @@ -718,7 +732,8 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) } static struct ldlm_lock * -find_existing_lock(struct obd_export *exp, struct lustre_handle *remote_hdl) +find_existing_lock(struct obd_export *exp, + const struct lustre_handle *remote_hdl) { struct list_head *iter; @@ -736,19 +751,22 @@ find_existing_lock(struct obd_export *exp, struct lustre_handle *remote_hdl) return NULL; } +#ifdef __KERNEL__ +extern unsigned long long lu_time_stamp_get(void); +#else +#define lu_time_stamp_get() time(NULL) +#endif /* * Main server-side entry point into LDLM. This is called by ptlrpc service * threads to carry out client lock enqueueing requests. */ -int ldlm_handle_enqueue(struct ptlrpc_request *req, - ldlm_completion_callback completion_callback, - ldlm_blocking_callback blocking_callback, - ldlm_glimpse_callback glimpse_callback) +int ldlm_handle_enqueue0(struct ldlm_namespace *ns, + struct ptlrpc_request *req, + const struct ldlm_request *dlm_req, + const struct ldlm_callback_suite *cbs) { - struct obd_device *obddev = req->rq_export->exp_obd; struct ldlm_reply *dlm_rep; - struct ldlm_request *dlm_req; int size[3] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREPLY_OFF] = sizeof(*dlm_rep) }; int rc = 0; @@ -760,50 +778,46 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, LDLM_DEBUG_NOLOCK("server-side enqueue handler START"); - dlm_req = lustre_swab_reqbuf(req, DLM_LOCKREQ_OFF, sizeof(*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req == NULL) { - CERROR ("Can't unpack dlm_req\n"); - GOTO(out, rc = -EFAULT); - } - flags = dlm_req->lock_flags; LASSERT(req->rq_export); - + if (req->rq_export->exp_ldlm_stats) lprocfs_counter_incr(req->rq_export->exp_ldlm_stats, LDLM_ENQUEUE - LDLM_FIRST_OPC); - if (dlm_req->lock_desc.l_resource.lr_type < LDLM_MIN_TYPE || - dlm_req->lock_desc.l_resource.lr_type >= LDLM_MAX_TYPE) { - DEBUG_REQ(D_ERROR, req, "invalid lock request type %d\n", + if (unlikely(dlm_req->lock_desc.l_resource.lr_type < LDLM_MIN_TYPE || + dlm_req->lock_desc.l_resource.lr_type >= LDLM_MAX_TYPE)) { + DEBUG_REQ(D_ERROR, req, "invalid lock request type %d", dlm_req->lock_desc.l_resource.lr_type); GOTO(out, rc = -EFAULT); } - if (dlm_req->lock_desc.l_req_mode <= LCK_MINMODE || - dlm_req->lock_desc.l_req_mode >= LCK_MAXMODE || - dlm_req->lock_desc.l_req_mode & (dlm_req->lock_desc.l_req_mode-1)) { - DEBUG_REQ(D_ERROR, req, "invalid lock request mode %d\n", + if (unlikely(dlm_req->lock_desc.l_req_mode <= LCK_MINMODE || + dlm_req->lock_desc.l_req_mode >= LCK_MAXMODE || + dlm_req->lock_desc.l_req_mode & + (dlm_req->lock_desc.l_req_mode-1))) { + DEBUG_REQ(D_ERROR, req, "invalid lock request mode %d", dlm_req->lock_desc.l_req_mode); GOTO(out, rc = -EFAULT); } if (req->rq_export->exp_connect_flags & OBD_CONNECT_IBITS) { - if (dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN) { + if (unlikely(dlm_req->lock_desc.l_resource.lr_type == + LDLM_PLAIN)) { DEBUG_REQ(D_ERROR, req, - "PLAIN lock request from IBITS client?\n"); + "PLAIN lock request from IBITS client?"); GOTO(out, rc = -EPROTO); } - } else if (dlm_req->lock_desc.l_resource.lr_type == LDLM_IBITS) { + } else if (unlikely(dlm_req->lock_desc.l_resource.lr_type == + LDLM_IBITS)) { DEBUG_REQ(D_ERROR, req, - "IBITS lock request from unaware client?\n"); + "IBITS lock request from unaware client?"); GOTO(out, rc = -EPROTO); } #if 0 - /* FIXME this makes it impossible to use LDLM_PLAIN locks -- check + /* FIXME this makes it impossible to use LDLM_PLAIN locks -- check against server's _CONNECT_SUPPORTED flags? (I don't want to use ibits for mgc/mgs) */ @@ -819,7 +833,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, } #endif - if (flags & LDLM_FL_REPLAY) { + if (unlikely(flags & LDLM_FL_REPLAY)) { lock = find_existing_lock(req->rq_export, &dlm_req->lock_handle1); if (lock != NULL) { @@ -830,12 +844,13 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, } /* The lock's callback data might be set in the policy function */ - lock = ldlm_lock_create(obddev->obd_namespace, &dlm_req->lock_handle2, - dlm_req->lock_desc.l_resource.lr_name, + lock = ldlm_lock_create(ns, &dlm_req->lock_handle2, + &dlm_req->lock_desc.l_resource.lr_name, dlm_req->lock_desc.l_resource.lr_type, dlm_req->lock_desc.l_req_mode, - blocking_callback, completion_callback, - glimpse_callback, NULL, 0); + cbs->lcs_blocking, cbs->lcs_completion, + cbs->lcs_glimpse, NULL, 0); + if (!lock) GOTO(out, rc = -ENOMEM); @@ -885,7 +900,7 @@ existing_lock: if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT) lock->l_req_extent = lock->l_policy_data.l_extent; - err = ldlm_lock_enqueue(obddev->obd_namespace, &lock, cookie, &flags); + err = ldlm_lock_enqueue(ns, &lock, cookie, &flags); if (err) GOTO(out, err); @@ -907,17 +922,18 @@ existing_lock: /* Don't move a pending lock onto the export if it has already * been evicted. Cancel it now instead. (bug 5683) */ - if (req->rq_export->exp_failed || - OBD_FAIL_CHECK_ONCE(OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT)) { + if (unlikely(req->rq_export->exp_failed || + OBD_FAIL_CHECK_ONCE(OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT))) { LDLM_ERROR(lock, "lock on destroyed export %p", req->rq_export); rc = -ENOTCONN; } else if (lock->l_flags & LDLM_FL_AST_SENT) { dlm_rep->lock_flags |= LDLM_FL_AST_SENT; if (lock->l_granted_mode == lock->l_req_mode) { - /* Only cancel lock if it was granted, because it - * would be destroyed immediatelly and would never - * be granted in the future, causing timeouts on client. - * Not granted lock will be cancelled immediatelly after + /* + * Only cancel lock if it was granted, because it would + * be destroyed immediatelly and would never be granted + * in the future, causing timeouts on client. Not + * granted lock will be cancelled immediatelly after * sending completion AST. */ if (dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK) { @@ -933,8 +949,8 @@ existing_lock: if ((dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN || dlm_req->lock_desc.l_resource.lr_type == LDLM_IBITS) && req->rq_export->exp_libclient) { - if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) || - !(dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK)) { + if (unlikely(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) || + !(dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK))){ CERROR("Granting sync lock to libclient. " "req fl %d, rep fl %d, lock fl %d\n", dlm_req->lock_flags, dlm_rep->lock_flags, @@ -1004,9 +1020,35 @@ existing_lock: return rc; } -int ldlm_handle_convert(struct ptlrpc_request *req) +int ldlm_handle_enqueue(struct ptlrpc_request *req, + ldlm_completion_callback completion_callback, + ldlm_blocking_callback blocking_callback, + ldlm_glimpse_callback glimpse_callback) { + int rc; struct ldlm_request *dlm_req; + struct ldlm_callback_suite cbs = { + .lcs_completion = completion_callback, + .lcs_blocking = blocking_callback, + .lcs_glimpse = glimpse_callback + }; + + + dlm_req = lustre_swab_reqbuf(req, DLM_LOCKREQ_OFF, + sizeof *dlm_req, lustre_swab_ldlm_request); + if (dlm_req != NULL) { + rc = ldlm_handle_enqueue0(req->rq_export->exp_obd->obd_namespace, + req, dlm_req, &cbs); + } else { + CERROR ("Can't unpack dlm_req\n"); + rc = -EFAULT; + } + return rc; +} + +int ldlm_handle_convert0(struct ptlrpc_request *req, + const struct ldlm_request *dlm_req) +{ struct ldlm_reply *dlm_rep; struct ldlm_lock *lock; int rc; @@ -1014,13 +1056,6 @@ int ldlm_handle_convert(struct ptlrpc_request *req) [DLM_LOCKREPLY_OFF] = sizeof(*dlm_rep) }; ENTRY; - dlm_req = lustre_swab_reqbuf(req, DLM_LOCKREQ_OFF, sizeof(*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req == NULL) { - CERROR ("Can't unpack dlm_req\n"); - RETURN (-EFAULT); - } - if (req->rq_export && req->rq_export->exp_ldlm_stats) lprocfs_counter_incr(req->rq_export->exp_ldlm_stats, LDLM_CONVERT - LDLM_FIRST_OPC); @@ -1065,6 +1100,22 @@ int ldlm_handle_convert(struct ptlrpc_request *req) RETURN(0); } +int ldlm_handle_convert(struct ptlrpc_request *req) +{ + int rc; + struct ldlm_request *dlm_req; + + dlm_req = lustre_swab_reqbuf(req, DLM_LOCKREQ_OFF, sizeof *dlm_req, + lustre_swab_ldlm_request); + if (dlm_req != NULL) { + rc = ldlm_handle_convert0(req, dlm_req); + } else { + CERROR ("Can't unpack dlm_req\n"); + rc = -EFAULT; + } + return rc; +} + int ldlm_handle_cancel(struct ptlrpc_request *req) { struct ldlm_request *dlm_req; @@ -1107,11 +1158,12 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) if (res && res->lr_namespace->ns_lvbo && res->lr_namespace->ns_lvbo->lvbo_update) { (void)res->lr_namespace->ns_lvbo->lvbo_update - (res, NULL, 0, 1); - //(res, req->rq_reqmsg, 1, 1); + (res, NULL, 0, 0); } ldlm_lock_cancel(lock); + if (ldlm_del_waiting_lock(lock)) + CDEBUG(D_DLMTRACE, "cancelled waiting lock %p\n", lock); req->rq_status = rc; } @@ -1190,7 +1242,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, sizeof(lock->l_resource->lr_name)) != 0) { unlock_res_and_lock(lock); ldlm_lock_change_resource(ns, lock, - dlm_req->lock_desc.l_resource.lr_name); + &dlm_req->lock_desc.l_resource.lr_name); LDLM_DEBUG(lock, "completion AST, new resource"); CERROR("change resource!\n"); lock_res_and_lock(lock); @@ -1249,7 +1301,7 @@ static void ldlm_handle_gl_callback(struct ptlrpc_request *req, lock_res_and_lock(lock); if (lock->l_granted_mode == LCK_PW && !lock->l_readers && !lock->l_writers && - cfs_time_after(cfs_time_current(), + cfs_time_after(cfs_time_current(), cfs_time_add(lock->l_last_used, cfs_time_seconds(10)))) { unlock_res_and_lock(lock); if (ldlm_bl_to_thread(ns, NULL, lock)) @@ -1500,6 +1552,65 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) RETURN(0); } +void ldlm_revoke_export_locks(struct obd_export *exp) +{ + struct list_head *locklist = &exp->exp_ldlm_data.led_held_locks; + struct list_head rpc_list; + struct ldlm_lock *lock, *next; + struct ldlm_lock_desc desc; + + ENTRY; + INIT_LIST_HEAD(&rpc_list); + + spin_lock(&exp->exp_ldlm_data.led_lock); + list_for_each_entry_safe(lock, next, locklist, l_export_chain) { + lock_res_and_lock(lock); + + if (lock->l_req_mode != lock->l_granted_mode) { + unlock_res_and_lock(lock); + continue; + } + + LASSERT(lock->l_resource); + if (lock->l_resource->lr_type != LDLM_IBITS && + lock->l_resource->lr_type != LDLM_PLAIN) { + unlock_res_and_lock(lock); + continue; + } + + if (lock->l_flags & LDLM_FL_AST_SENT) { + unlock_res_and_lock(lock); + continue; + } + + LASSERT(lock->l_blocking_ast); + LASSERT(!lock->l_blocking_lock); + + lock->l_flags |= LDLM_FL_AST_SENT; + list_move(&lock->l_export_chain, &rpc_list); + + unlock_res_and_lock(lock); + } + spin_unlock(&exp->exp_ldlm_data.led_lock); + + while (!list_empty(&rpc_list)) { + lock = list_entry(rpc_list.next, struct ldlm_lock, + l_export_chain); + list_del_init(&lock->l_export_chain); + + /* the desc just pretend to exclusive */ + ldlm_lock2desc(lock, &desc); + desc.l_req_mode = LCK_EX; + desc.l_granted_mode = 0; + + LDLM_LOCK_GET(lock); + lock->l_blocking_ast(lock, &desc, lock->l_ast_data, + LDLM_CB_BLOCKING); + LDLM_LOCK_PUT(lock); + } + EXIT; +} + #ifdef __KERNEL__ static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp) { @@ -1625,7 +1736,8 @@ static int ldlm_setup(void) ldlm_callback_handler, "ldlm_cbd", ldlm_svc_proc_dir, NULL, LDLM_THREADS_AUTO_MIN, LDLM_THREADS_AUTO_MAX, - "ldlm_cb"); + "ldlm_cb", + LCT_MD_THREAD|LCT_DT_THREAD); if (!ldlm_state->ldlm_cb_service) { CERROR("failed to start service\n"); @@ -1638,8 +1750,9 @@ static int ldlm_setup(void) LDLM_CANCEL_REPLY_PORTAL, ldlm_timeout * 6000, ldlm_cancel_handler, "ldlm_canceld", ldlm_svc_proc_dir, NULL, - LDLM_THREADS_AUTO_MIN, LDLM_THREADS_AUTO_MAX, - "ldlm_cn"); + LDLM_THREADS_AUTO_MIN, LDLM_THREADS_AUTO_MAX, + "ldlm_cn", + LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD); if (!ldlm_state->ldlm_cancel_service) { CERROR("failed to start service\n"); @@ -1845,12 +1958,15 @@ EXPORT_SYMBOL(ldlm_server_blocking_ast); EXPORT_SYMBOL(ldlm_server_completion_ast); EXPORT_SYMBOL(ldlm_server_glimpse_ast); EXPORT_SYMBOL(ldlm_handle_enqueue); +EXPORT_SYMBOL(ldlm_handle_enqueue0); EXPORT_SYMBOL(ldlm_handle_cancel); EXPORT_SYMBOL(ldlm_handle_convert); +EXPORT_SYMBOL(ldlm_handle_convert0); EXPORT_SYMBOL(ldlm_del_waiting_lock); EXPORT_SYMBOL(ldlm_get_ref); EXPORT_SYMBOL(ldlm_put_ref); EXPORT_SYMBOL(ldlm_refresh_waiting_lock); +EXPORT_SYMBOL(ldlm_revoke_export_locks); /* ldlm_resource.c */ EXPORT_SYMBOL(ldlm_namespace_new); @@ -1869,16 +1985,16 @@ EXPORT_SYMBOL(client_obd_setup); EXPORT_SYMBOL(client_obd_cleanup); EXPORT_SYMBOL(client_connect_import); EXPORT_SYMBOL(client_disconnect_export); -EXPORT_SYMBOL(target_abort_recovery); -EXPORT_SYMBOL(target_cleanup_recovery); +EXPORT_SYMBOL(target_start_recovery_thread); +EXPORT_SYMBOL(target_stop_recovery_thread); EXPORT_SYMBOL(target_handle_connect); +EXPORT_SYMBOL(target_cleanup_recovery); EXPORT_SYMBOL(target_destroy_export); EXPORT_SYMBOL(target_cancel_recovery_timer); EXPORT_SYMBOL(target_send_reply); EXPORT_SYMBOL(target_queue_recovery_request); EXPORT_SYMBOL(target_handle_ping); EXPORT_SYMBOL(target_handle_disconnect); -EXPORT_SYMBOL(target_queue_final_reply); /* l_lock.c */ EXPORT_SYMBOL(lock_res_and_lock); diff --git a/lustre/ldlm/ldlm_plain.c b/lustre/ldlm/ldlm_plain.c index b28d89e..71351d2 100644 --- a/lustre/ldlm/ldlm_plain.c +++ b/lustre/ldlm/ldlm_plain.c @@ -69,6 +69,7 @@ ldlm_plain_compat_queue(struct list_head *queue, struct ldlm_lock *req, compat = 0; if (lock->l_blocking_ast) ldlm_add_ast_work_item(lock, req, work_list); + if (LDLM_SL_HEAD(&lock->l_sl_mode)) { /* add all members of the mode group */ do { diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index a24ce4f..a9bd553 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -54,6 +54,9 @@ int ldlm_expired_completion_wait(void *data) if (lock->l_conn_export == NULL) { static cfs_time_t next_dump = 0, last_dump = 0; + if (ptlrpc_check_suspend()) + RETURN(0); + LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago); " "not entering recovery in server code, just going " "back to sleep", lock->l_enqueued_time.tv_sec, @@ -223,7 +226,8 @@ int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp) return -ELDLM_NO_LOCK_DATA; } -int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, struct ldlm_res_id res_id, +int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, + const struct ldlm_res_id *res_id, ldlm_type_t type, ldlm_policy_data_t *policy, ldlm_mode_t mode, int *flags, ldlm_blocking_callback blocking, @@ -237,14 +241,14 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, struct ldlm_res_id res_id, ENTRY; LASSERT(!(*flags & LDLM_FL_REPLAY)); - if (ns->ns_client) { + if (unlikely(ns->ns_client)) { CERROR("Trying to enqueue local lock in a shadow namespace\n"); LBUG(); } lock = ldlm_lock_create(ns, NULL, res_id, type, mode, blocking, completion, glimpse, data, lvb_len); - if (!lock) + if (unlikely(!lock)) GOTO(out_nolock, err = -ENOMEM); LDLM_DEBUG(lock, "client-side local enqueue handler, new lock created"); @@ -262,13 +266,11 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, struct ldlm_res_id res_id, lock->l_req_extent = policy->l_extent; err = ldlm_lock_enqueue(ns, &lock, policy, flags); - if (err != ELDLM_OK) + if (unlikely(err != ELDLM_OK)) GOTO(out, err); if (policy != NULL) *policy = lock->l_policy_data; - if ((*flags) & LDLM_FL_LOCK_CHANGED) - res_id = lock->l_resource->lr_name; LDLM_DEBUG_NOLOCK("client-side local enqueue handler END (lock %p)", lock); @@ -380,15 +382,21 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, lock->l_req_mode = newmode; } - if (reply->lock_desc.l_resource.lr_name.name[0] != - lock->l_resource->lr_name.name[0]) { - CDEBUG(D_INFO, "remote intent success, locking %ld " - "instead of %ld\n", + if (memcmp(reply->lock_desc.l_resource.lr_name.name, + lock->l_resource->lr_name.name, + sizeof(struct ldlm_res_id))) { + CDEBUG(D_INFO, "remote intent success, locking " + "(%ld,%ld,%ld) instead of " + "(%ld,%ld,%ld)\n", (long)reply->lock_desc.l_resource.lr_name.name[0], - (long)lock->l_resource->lr_name.name[0]); + (long)reply->lock_desc.l_resource.lr_name.name[1], + (long)reply->lock_desc.l_resource.lr_name.name[2], + (long)lock->l_resource->lr_name.name[0], + (long)lock->l_resource->lr_name.name[1], + (long)lock->l_resource->lr_name.name[2]); ldlm_lock_change_resource(ns, lock, - reply->lock_desc.l_resource.lr_name); + &reply->lock_desc.l_resource.lr_name); if (lock->l_resource == NULL) { LBUG(); GOTO(cleanup, rc = -ENOMEM); @@ -461,8 +469,9 @@ cleanup: * request was created in ldlm_cli_enqueue and it is the async request, * pass it to the caller in @reqp. */ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, - struct ldlm_res_id res_id, ldlm_type_t type, - ldlm_policy_data_t *policy, ldlm_mode_t mode, int *flags, + const struct ldlm_res_id *res_id, + ldlm_type_t type, ldlm_policy_data_t *policy, + ldlm_mode_t mode, int *flags, ldlm_blocking_callback blocking, ldlm_completion_callback completion, ldlm_glimpse_callback glimpse, @@ -700,7 +709,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) lock = __ldlm_handle2lock(lockh, LDLM_FL_CANCELING); if (lock == NULL) RETURN(0); - + if (lock->l_conn_export) { int local_only; struct obd_import *imp; @@ -713,7 +722,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) (LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK)); ldlm_cancel_callback(lock); unlock_res_and_lock(lock); - + if (local_only) { CDEBUG(D_INFO, "not sending request (at caller's " "instruction)\n"); @@ -866,8 +875,8 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, ldlm_sync_t sync) } static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, - struct ldlm_res_id res_id, int flags, - void *opaque) + const struct ldlm_res_id *res_id, + int flags, void *opaque) { struct list_head *tmp, *next, list = CFS_LIST_HEAD_INIT(list); struct ldlm_resource *res; @@ -877,7 +886,7 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, res = ldlm_resource_get(ns, NULL, res_id, 0, 0); if (res == NULL) { /* This is not a problem. */ - CDEBUG(D_INFO, "No resource "LPU64"\n", res_id.name[0]); + CDEBUG(D_INFO, "No resource "LPU64"\n", res_id->name[0]); RETURN(0); } @@ -950,7 +959,8 @@ static inline int have_no_nsresource(struct ldlm_namespace *ns) * to notify the server. * If flags & LDLM_FL_WARN, print a warning if some locks are still in use. */ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, - struct ldlm_res_id *res_id, int flags, void *opaque) + const struct ldlm_res_id *res_id, + int flags, void *opaque) { int i; ENTRY; @@ -959,7 +969,7 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, RETURN(ELDLM_OK); if (res_id) - RETURN(ldlm_cli_cancel_unused_resource(ns, *res_id, flags, + RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, flags, opaque)); spin_lock(&ns->ns_hash_lock); @@ -974,7 +984,7 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, ldlm_resource_getref(res); spin_unlock(&ns->ns_hash_lock); - rc = ldlm_cli_cancel_unused_resource(ns, res->lr_name, + rc = ldlm_cli_cancel_unused_resource(ns, &res->lr_name, flags, opaque); if (rc) @@ -993,7 +1003,7 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, /* join/split resource locks to/from lru list */ int ldlm_cli_join_lru(struct ldlm_namespace *ns, - struct ldlm_res_id *res_id, int join) + const struct ldlm_res_id *res_id, int join) { struct ldlm_resource *res; struct ldlm_lock *lock, *n; @@ -1002,7 +1012,7 @@ int ldlm_cli_join_lru(struct ldlm_namespace *ns, LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT); - res = ldlm_resource_get(ns, NULL, *res_id, LDLM_EXTENT, 0); + res = ldlm_resource_get(ns, NULL, res_id, LDLM_EXTENT, 0); if (res == NULL) RETURN(count); LASSERT(res->lr_type == LDLM_EXTENT); @@ -1138,7 +1148,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, } /* non-blocking function to manipulate a lock whose cb_data is being put away.*/ -void ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, +void ldlm_resource_iterate(struct ldlm_namespace *ns, + const struct ldlm_res_id *res_id, ldlm_iterator_t iter, void *data) { struct ldlm_resource *res; @@ -1149,7 +1160,7 @@ void ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id LBUG(); } - res = ldlm_resource_get(ns, NULL, *res_id, 0, 0); + res = ldlm_resource_get(ns, NULL, res_id, 0, 0); if (res == NULL) { EXIT; return; @@ -1268,6 +1279,11 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) size[DLM_REPLY_REC_OFF] = lock->l_lvb_len; } ptlrpc_req_set_repsize(req, buffers, size); + /* notify the server we've replayed all requests. + * also, we mark the request to be put on a dedicated + * queue to be processed after all request replayes. + * bug 6063 */ + lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE); LDLM_DEBUG(lock, "replaying lock:"); diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 8362688..0bce589 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -101,10 +101,10 @@ err: void ldlm_proc_cleanup(void) { - if (ldlm_svc_proc_dir) + if (ldlm_svc_proc_dir) lprocfs_remove(&ldlm_svc_proc_dir); - if (ldlm_ns_proc_dir) + if (ldlm_ns_proc_dir) lprocfs_remove(&ldlm_ns_proc_dir); if (ldlm_type_proc_dir) @@ -122,8 +122,7 @@ static int lprocfs_read_lru_size(char *page, char **start, off_t off, int count, int *eof, void *data) { struct ldlm_namespace *ns = data; - return lprocfs_uint_rd(page, start, off, count, eof, - &ns->ns_max_unused); + return snprintf(page, count, "%u\n", ns->ns_max_unused); } #define MAX_STRING_SIZE 128 @@ -282,10 +281,10 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, int local_only = (flags & LDLM_FL_LOCAL_ONLY); ENTRY; - + do { struct ldlm_lock *lock = NULL; - + /* first, we look for non-cleaned-yet lock * all cleaned locks are marked by CLEANED flag */ lock_res(res); @@ -299,7 +298,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, lock->l_flags |= LDLM_FL_CLEANED; break; } - + if (lock == NULL) { unlock_res(res); break; @@ -431,12 +430,13 @@ int ldlm_namespace_free(struct ldlm_namespace *ns, int force) rc = l_wait_event(ns->ns_waitq, ns->ns_refcount == 0, &lwi); if (ns->ns_refcount) - LCONSOLE_ERROR("Lock manager: wait for %s namespace " - "cleanup aborted with %d resources in " - "use. (%d)\nI'm going to try to clean " - "up anyway, but I might need a reboot " - "of this node.\n", ns->ns_name, - (int) ns->ns_refcount, rc); + LCONSOLE_ERROR_MSG(0x139, "Lock manager: wait for %s " + "namespace cleanup aborted with %d " + "resources in use. (%d)\nI'm going " + "to try to clean up anyway, but I " + "might need a reboot of this node.\n", + ns->ns_name, (int) ns->ns_refcount, + rc); CDEBUG(D_DLMTRACE, "dlm namespace %s free done waiting\n", ns->ns_name); } @@ -448,16 +448,17 @@ int ldlm_namespace_free(struct ldlm_namespace *ns, int force) ldlm_put_ref(force); - return ELDLM_OK; + RETURN(ELDLM_OK); } -static __u32 ldlm_hash_fn(struct ldlm_resource *parent, struct ldlm_res_id name) +static __u32 ldlm_hash_fn(struct ldlm_resource *parent, + const struct ldlm_res_id *name) { __u32 hash = 0; int i; for (i = 0; i < RES_NAME_SIZE; i++) - hash += name.name[i]; + hash += name->name[i]; hash += (__u32)((unsigned long)parent >> 4); @@ -491,7 +492,8 @@ static struct ldlm_resource *ldlm_resource_new(void) /* must be called with hash lock held */ static struct ldlm_resource * -ldlm_resource_find(struct ldlm_namespace *ns, struct ldlm_res_id name, __u32 hash) +ldlm_resource_find(struct ldlm_namespace *ns, const struct ldlm_res_id *name, + __u32 hash) { struct list_head *bucket, *tmp; struct ldlm_resource *res; @@ -501,7 +503,7 @@ ldlm_resource_find(struct ldlm_namespace *ns, struct ldlm_res_id name, __u32 has list_for_each(tmp, bucket) { res = list_entry(tmp, struct ldlm_resource, lr_hash); - if (memcmp(&res->lr_name, &name, sizeof(res->lr_name)) == 0) + if (memcmp(&res->lr_name, name, sizeof(res->lr_name)) == 0) return res; } @@ -512,7 +514,7 @@ ldlm_resource_find(struct ldlm_namespace *ns, struct ldlm_res_id name, __u32 has * Returns: newly-allocated, referenced, unlocked resource */ static struct ldlm_resource * ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, - struct ldlm_res_id name, __u32 hash, ldlm_type_t type) + const struct ldlm_res_id *name, __u32 hash, ldlm_type_t type) { struct list_head *bucket; struct ldlm_resource *res, *old_res; @@ -525,7 +527,7 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, if (!res) RETURN(NULL); - res->lr_name = name; + res->lr_name = *name; res->lr_namespace = ns; res->lr_type = type; res->lr_most_restr = LCK_NL; @@ -566,7 +568,7 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, rc = ns->ns_lvbo->lvbo_init(res); if (rc) CERROR("lvbo_init failed for resource " - LPU64": rc %d\n", name.name[0], rc); + LPU64": rc %d\n", name->name[0], rc); /* we create resource with locked lr_lvb_sem */ up(&res->lr_lvb_sem); } @@ -579,7 +581,7 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, * Returns: referenced, unlocked ldlm_resource or NULL */ struct ldlm_resource * ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, - struct ldlm_res_id name, ldlm_type_t type, int create) + const struct ldlm_res_id *name, ldlm_type_t type, int create) { __u32 hash = ldlm_hash_fn(parent, name); struct ldlm_resource *res = NULL; @@ -587,7 +589,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, LASSERT(ns != NULL); LASSERT(ns->ns_hash != NULL); - LASSERT(name.name[0] != 0); + LASSERT(name->name[0] != 0); spin_lock(&ns->ns_hash_lock); res = ldlm_resource_find(ns, name, hash); @@ -748,6 +750,7 @@ void ldlm_resource_insert_lock_after(struct ldlm_lock *original, void ldlm_resource_unlink_lock(struct ldlm_lock *lock) { check_res_locked(lock->l_resource); + ldlm_unlink_lock_skiplist(lock); list_del_init(&lock->l_res_link); } @@ -782,8 +785,8 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) if (!((libcfs_debug | D_ERROR) & level)) return; - CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n", - ns->ns_name, ns->ns_refcount, ns->ns_client); + CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n", + ns->ns_name, ns->ns_refcount, ns->ns_client); if (cfs_time_before(cfs_time_current(), ns->ns_next_dump)) return; @@ -800,7 +803,7 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) lock_res(res); ldlm_resource_dump(level, res); unlock_res(res); - + spin_lock(&ns->ns_hash_lock); tmp = tmp->next; ldlm_resource_putref_locked(res); diff --git a/lustre/liblustre/Makefile.am b/lustre/liblustre/Makefile.am index dff5989..de73497 100644 --- a/lustre/liblustre/Makefile.am +++ b/lustre/liblustre/Makefile.am @@ -13,6 +13,9 @@ LUSTRE_LIBS = libllite.a \ $(top_builddir)/lustre/lov/liblov.a \ $(top_builddir)/lustre/obdecho/libobdecho.a \ $(top_builddir)/lustre/osc/libosc.a \ + $(top_builddir)/lustre/fid/libfid.a \ + $(top_builddir)/lustre/fld/libfld.a \ + $(top_builddir)/lustre/lmv/liblmv.a \ $(top_builddir)/lustre/mdc/libmdc.a \ $(top_builddir)/lustre/mgc/libmgc.a \ $(top_builddir)/lustre/ptlrpc/libptlrpc.a \ @@ -54,11 +57,11 @@ else install-exec-hook: endif -libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \ +libllite_a_SOURCES = llite_lib.c llite_fid.c super.c namei.c rw.c file.c dir.c \ lutil.c lutil.h llite_lib.h # for make rpms -- need cleanup -liblustre_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \ +liblustre_a_SOURCES = llite_lib.c llite_fid.c super.c namei.c rw.c file.c dir.c \ llite_lib.h liblustre.a : $(LUSTRE_LIBS) $(LND_LIBS) $(LNET_LIBS) $(SYSIO_LIBS) $(QUOTA_LIBS) diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index 87b7536..8498d80 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -67,29 +67,31 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) struct llu_inode_info *lli = llu_i2info(inode); struct intnl_stat *st = llu_i2stat(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); - struct ll_fid mdc_fid; __u64 offset; int rc = 0; struct ptlrpc_request *request; struct lustre_handle lockh; - struct mds_body *body; + struct mdt_body *body; struct lookup_intent it = { .it_op = IT_READDIR }; - struct mdc_op_data data; - struct obd_device *obddev = class_exp2obd(sbi->ll_mdc_exp); + struct md_op_data op_data; + struct obd_device *obddev = class_exp2obd(sbi->ll_md_exp); struct ldlm_res_id res_id = - { .name = {st->st_ino, (__u64)lli->lli_st_generation} }; + { .name = {fid_seq(&lli->lli_fid), + fid_oid(&lli->lli_fid), + fid_ver(&lli->lli_fid)} }; ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_UPDATE } }; ENTRY; rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh); if (!rc) { - llu_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0); + llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY); - rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, &it, LCK_CR, - &data, &lockh, NULL, 0, - ldlm_completion_ast, llu_mdc_blocking_ast, - inode, LDLM_FL_CANCEL_ON_BLOCK); + rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, &it, LCK_CR, + &op_data, &lockh, NULL, 0, + ldlm_completion_ast, llu_md_blocking_ast, + inode, LDLM_FL_CANCEL_ON_BLOCK); request = (struct ptlrpc_request *)it.d.lustre.it_data; if (request) ptlrpc_req_finished(request); @@ -100,16 +102,14 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) } ldlm_lock_dump_handle(D_OTHER, &lockh); - mdc_pack_fid(&mdc_fid, st->st_ino, lli->lli_st_generation, S_IFDIR); - offset = (__u64)page->index << CFS_PAGE_SHIFT; - rc = mdc_readpage(sbi->ll_mdc_exp, &mdc_fid, - offset, page, &request); + rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL, + offset, page, &request); if (!rc) { body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); - LASSERT(body != NULL); /* checked by mdc_readpage() */ - /* swabbed by mdc_readpage() */ + LASSERT(body != NULL); /* checked by md_readpage() */ + /* swabbed by md_readpage() */ LASSERT_REPSWABBED(request, REPLY_REC_OFF); st->st_size = body->size; @@ -202,8 +202,8 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, struct llu_inode_info *lli = llu_i2info(ino); struct intnl_stat *st = llu_i2stat(ino); loff_t pos = *basep, offset; + unsigned long maxpages, pgidx; int filled = 0; - unsigned long pgidx, maxpages; ENTRY; liblustre_wait_event(0); @@ -229,7 +229,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, if (IS_ERR(page)) continue; - /* size might have been updated by mdc_readpage */ + /* size might have been updated by md_readpage */ maxpages = (st->st_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; /* fill in buffer */ @@ -247,8 +247,8 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, offset = (char*) de - addr; over = filldir(buf, nbytes, de->name, de->name_len, - (((__u64)pgidx << CFS_PAGE_SHIFT) | offset) - + le16_to_cpu(de->rec_len), + (((__u64)pgidx << PAGE_SHIFT) | offset) + + le16_to_cpu(de->rec_len), le32_to_cpu(de->inode), d_type, &filled); if (over) { free_page(page); @@ -259,7 +259,6 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, */ if (filled == 0) RETURN(-EINVAL); - GOTO(done, 0); } } @@ -268,7 +267,7 @@ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, free_page(page); } done: - lli->lli_dir_pos = (__u64)pgidx << CFS_PAGE_SHIFT | offset; + lli->lli_dir_pos = pgidx << CFS_PAGE_SHIFT | offset; *basep = lli->lli_dir_pos; liblustre_wait_event(0); RETURN(filled); diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 4069827..1d73aeb 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -71,32 +71,37 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) } } -void llu_prepare_mdc_op_data(struct mdc_op_data *data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode) +void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, + struct inode *i2, const char *name, int namelen, + int mode, __u32 opc) { LASSERT(i1 != NULL || i2 != NULL); + LASSERT(op_data); + memset(op_data, 0, sizeof(*op_data)); if (i1) { - ll_i2gids(data->suppgids, i1, i2); - ll_inode2fid(&data->fid1, i1); + ll_i2gids(op_data->op_suppgids, i1, i2); + op_data->op_fid1 = *ll_inode2fid(i1); }else { - ll_i2gids(data->suppgids, i2, i1); - ll_inode2fid(&data->fid1, i2); + ll_i2gids(op_data->op_suppgids, i2, i1); + op_data->op_fid1 = *ll_inode2fid(i2); } if (i2) - ll_inode2fid(&data->fid2, i2); + op_data->op_fid2 = *ll_inode2fid(i2); else - memset(&data->fid2, 0, sizeof(data->fid2)); + fid_zero(&op_data->op_fid2); - data->name = name; - data->namelen = namelen; - data->create_mode = mode; - data->mod_time = CURRENT_TIME; + op_data->op_opc = opc; + op_data->op_name = name; + op_data->op_mode = mode; + op_data->op_namelen = namelen; + op_data->op_mod_time = CURRENT_TIME; +} + +void llu_finish_md_op_data(struct md_op_data *op_data) +{ + OBD_FREE_PTR(op_data); } void obdo_refresh_inode(struct inode *dst, @@ -138,7 +143,7 @@ int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) { struct ptlrpc_request *req = it->d.lustre.it_data; struct ll_file_data *fd; - struct mds_body *body; + struct mdt_body *body; ENTRY; body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); @@ -153,14 +158,16 @@ int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) OBD_ALLOC(fd, sizeof(*fd)); /* We can't handle this well without reorganizing ll_file_open and - * ll_mdc_close, so don't even try right now. */ + * ll_md_close, so don't even try right now. */ LASSERT(fd != NULL); memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC; + fd->fd_mds_och.och_fid = lli->lli_fid; lli->lli_file_data = fd; - mdc_set_open_replay_data(&fd->fd_mds_och, it->d.lustre.it_data); + md_set_open_replay_data(lli->lli_sbi->ll_md_exp, + &fd->fd_mds_och, it->d.lustre.it_data); RETURN(0); } @@ -240,7 +247,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) { - struct mds_body *body; + struct mdt_body *body; struct lov_mds_md *eadata; struct lov_stripe_md *lsm = NULL; struct obd_trans_info oti = { 0 }; @@ -278,13 +285,14 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } LASSERT(rc >= sizeof(*lsm)); - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) GOTO(out_free_memmd, rc = -ENOMEM); oa->o_id = lsm->lsm_object_id; + oa->o_gr = lsm->lsm_object_gr; oa->o_mode = body->mode & S_IFMT; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP; if (body->valid & OBD_MD_FLCOOKIE) { oa->o_valid |= OBD_MD_FLCOOKIE; @@ -299,7 +307,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL); - obdo_free(oa); + OBDO_FREE(oa); if (rc) CERROR("obd destroy objid 0x"LPX64" error %d\n", lsm->lsm_object_id, rc); @@ -309,15 +317,46 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) return rc; } -int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) +int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, + __u64 ioepoch) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct md_op_data op_data; + struct obdo oa; + int rc; + ENTRY; + + LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); + LASSERT(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM); + + rc = llu_inode_getattr(inode, &oa); + if (rc) { + CERROR("inode_getattr failed (%d): unable to send a " + "Size-on-MDS attribute update for inode %llu/%lu\n", + rc, (long long)llu_i2stat(inode)->st_ino, + lli->lli_st_generation); + RETURN(rc); + } + + md_from_obdo(&op_data, &oa, oa.o_valid); + memcpy(&op_data.op_handle, fh, sizeof(*fh)); + op_data.op_ioepoch = ioepoch; + op_data.op_flags |= MF_SOM_CHANGE; + + rc = llu_md_setattr(inode, &op_data); + RETURN(rc); +} + +int llu_md_close(struct obd_export *md_exp, struct inode *inode) { struct llu_inode_info *lli = llu_i2info(inode); - struct intnl_stat *st = llu_i2stat(inode); struct ll_file_data *fd = lli->lli_file_data; struct ptlrpc_request *req = NULL; struct obd_client_handle *och = &fd->fd_mds_och; - struct obdo obdo; - int rc, valid; + struct intnl_stat *st = llu_i2stat(inode); + struct md_op_data op_data = { { 0 } }; + int rc; ENTRY; /* clear group lock, if present */ @@ -328,25 +367,51 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) &fd->fd_cwlockh); } - obdo.o_id = st->st_ino; - obdo.o_valid = OBD_MD_FLID; - valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLSIZE |OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME; - if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags)) - valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - - obdo_from_inode(&obdo, inode, valid); - - if (0 /* ll_is_inode_dirty(inode) */) { - obdo.o_flags = MDS_BFLAG_UNCOMMITTED_WRITES; - obdo.o_valid |= OBD_MD_FLFLAGS; + op_data.op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET | + ATTR_MTIME_SET | ATTR_CTIME_SET; + + if (fd->fd_flags & FMODE_WRITE) { + struct llu_sb_info *sbi = llu_i2sbi(inode); + if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM) || + !S_ISREG(llu_i2stat(inode)->st_mode)) { + op_data.op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; + } else { + /* Inode cannot be dirty. Close the epoch. */ + op_data.op_flags |= MF_EPOCH_CLOSE; + /* XXX: Send CHANGE flag only if Size-on-MDS inode attributes + * are really changed. */ + op_data.op_flags |= MF_SOM_CHANGE; + + /* Pack Size-on-MDS attributes if we are in IO epoch and + * attributes are valid. */ + LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); + if (!llu_local_size(inode)) + op_data.op_attr.ia_valid |= + OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + } } - rc = mdc_close(mdc_exp, &obdo, och, &req); - if (rc == EAGAIN) { + op_data.op_fid1 = lli->lli_fid; + op_data.op_attr.ia_atime = st->st_atime; + op_data.op_attr.ia_mtime = st->st_mtime; + op_data.op_attr.ia_ctime = st->st_ctime; + op_data.op_attr.ia_size = st->st_size; + op_data.op_attr_blocks = st->st_blocks; + op_data.op_attr.ia_attr_flags = lli->lli_st_flags; + op_data.op_ioepoch = lli->lli_ioepoch; + memcpy(&op_data.op_handle, &och->och_fh, sizeof(op_data.op_handle)); + + rc = md_close(md_exp, &op_data, och, &req); + if (rc == -EAGAIN) { /* We are the last writer, so the MDS has instructed us to get * the file size and any write cookies, then close again. */ - //ll_queue_done_writing(inode); - rc = 0; + LASSERT(fd->fd_flags & FMODE_WRITE); + rc = llu_sizeonmds_update(inode, &och->och_fh, + op_data.op_ioepoch); + if (rc) { + CERROR("inode %llu mdc Size-on-MDS update failed: " + "rc = %d\n", (long long)st->st_ino, rc); + rc = 0; + } } else if (rc) { CERROR("inode %llu close failed: rc %d\n", (long long)st->st_ino, rc); @@ -357,7 +422,7 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) (long long)st->st_ino, rc); } - mdc_clear_open_replay_data(och); + md_clear_open_replay_data(md_exp, och); ptlrpc_req_finished(req); och->och_fh.cookie = DEAD_HANDLE_MAGIC; lli->lli_file_data = NULL; @@ -388,7 +453,7 @@ int llu_file_release(struct inode *inode) if (!fd) /* no process opened the file after an mcreate */ RETURN(0); - rc2 = llu_mdc_close(sbi->ll_mdc_exp, inode); + rc2 = llu_md_close(sbi->ll_md_exp, inode); if (rc2 && !rc) rc = rc2; diff --git a/lustre/liblustre/genlib.sh b/lustre/liblustre/genlib.sh index eb6112e..8635478 100755 --- a/lustre/liblustre/genlib.sh +++ b/lustre/liblustre/genlib.sh @@ -63,7 +63,10 @@ build_obj_list . libllite.a build_obj_list ../lov liblov.a build_obj_list ../obdecho libobdecho.a build_obj_list ../osc libosc.a +build_obj_list ../lmv liblmv.a build_obj_list ../mdc libmdc.a +build_obj_list ../fid libfid.a +build_obj_list ../fld libfld.a build_obj_list ../mgc libmgc.a build_obj_list ../ptlrpc libptlrpc.a build_obj_list ../obdclass liblustreclass.a diff --git a/lustre/liblustre/llite_fid.c b/lustre/liblustre/llite_fid.c new file mode 100644 index 0000000..af1d887 --- /dev/null +++ b/lustre/liblustre/llite_fid.c @@ -0,0 +1,62 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/queue.h> + +#ifdef HAVE_XTIO_H +#include <xtio.h> +#endif +#include <sysio.h> +#include <fs.h> +#include <mount.h> +#include <inode.h> +#ifdef HAVE_FILE_H +#include <file.h> +#endif + +/* both sys/queue.h (libsysio require it) and portals/lists.h have definition + * of 'LIST_HEAD'. undef it to suppress warnings + */ +#undef LIST_HEAD +#include <lnet/lnetctl.h> /* needed for parse_dump */ + +#include "lutil.h" +#include "llite_lib.h" +#include <lustre_ver.h> +#include <lustre_fid.h> + +/* build inode number on passed @fid */ +unsigned long llu_fid_build_ino(struct llu_sb_info *sbi, + struct lu_fid *fid) +{ + unsigned long ino; + ENTRY; + ino = fid_flatten(fid); + RETURN(ino & 0x7fffffff); +} diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index d179c10..1667b5d 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -56,6 +56,7 @@ static int lllib_init(void) init_obdclass() || ptlrpc_init() || mgc_init() || + lmv_init() || mdc_init() || lov_init() || osc_init()) @@ -158,7 +159,7 @@ int liblustre_process_log(struct config_llog_instance *cfg, ocd->ocd_connect_flags = OBD_CONNECT_VERSION; ocd->ocd_version = LUSTRE_VERSION_CODE; - rc = obd_connect(&mgc_conn, obd, &mgc_uuid, ocd); + rc = obd_connect(NULL, &mgc_conn, obd, &mgc_uuid, ocd); if (rc) { CERROR("cannot connect to %s at %s: rc = %d\n", LUSTRE_MGS_OBDNAME, mgsnid, rc); @@ -189,7 +190,7 @@ out_cleanup: err = class_process_config(lcfg); lustre_cfg_free(lcfg); if (err) - CERROR("mdc_cleanup failed: rc = %d\n", err); + CERROR("md_cleanup failed: rc = %d\n", err); out_detach: lustre_cfg_bufs_reset(&bufs, name); @@ -197,7 +198,7 @@ out_detach: err = class_process_config(lcfg); lustre_cfg_free(lcfg); if (err) - CERROR("mdc_detach failed: rc = %d\n", err); + CERROR("md_detach failed: rc = %d\n", err); out_del_uuid: lustre_cfg_bufs_reset(&bufs, name); diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index 3df6cad..f242b45 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -8,7 +8,7 @@ #include <liblustre.h> #include <obd.h> #include <obd_class.h> -#include <lustre_mds.h> +#include <lustre_mdc.h> #include <lustre_lite.h> #include <lustre_ver.h> @@ -26,12 +26,11 @@ struct ll_file_data { unsigned long fd_gid; }; -struct llu_sb_info -{ +struct llu_sb_info { struct obd_uuid ll_sb_uuid; - struct obd_export *ll_mdc_exp; - struct obd_export *ll_osc_exp; - obd_id ll_rootino; + struct obd_export *ll_md_exp; + struct obd_export *ll_dt_exp; + struct lu_fid ll_root_fid; int ll_flags; struct lustre_client_ocd ll_lco; struct list_head ll_conn_chain; @@ -43,18 +42,21 @@ struct llu_sb_info #define LL_SBI_NOLCK 0x1 -#define LLI_F_HAVE_OST_SIZE_LOCK 0 -#define LLI_F_HAVE_MDS_SIZE_LOCK 1 +enum lli_flags { + /* MDS has an authority for the Size-on-MDS attributes. */ + LLIF_MDS_SIZE_LOCK = (1 << 0), +}; struct llu_inode_info { struct llu_sb_info *lli_sbi; - struct ll_fid lli_fid; + struct lu_fid lli_fid; struct lov_stripe_md *lli_smd; char *lli_symlink_name; struct semaphore lli_open_sem; __u64 lli_maxbytes; unsigned long lli_flags; + __u64 lli_ioepoch; /* for libsysio */ struct file_identifier lli_sysio_fid; @@ -99,18 +101,20 @@ static inline struct llu_sb_info *llu_i2sbi(struct inode *inode) static inline struct obd_export *llu_i2obdexp(struct inode *inode) { - return llu_i2info(inode)->lli_sbi->ll_osc_exp; + return llu_i2info(inode)->lli_sbi->ll_dt_exp; } static inline struct obd_export *llu_i2mdcexp(struct inode *inode) { - return llu_i2info(inode)->lli_sbi->ll_mdc_exp; + return llu_i2info(inode)->lli_sbi->ll_md_exp; } static inline int llu_is_root_inode(struct inode *inode) { - return (llu_i2info(inode)->lli_fid.id == - llu_i2info(inode)->lli_sbi->ll_rootino); + return (fid_seq(&llu_i2info(inode)->lli_fid) == + fid_seq(&llu_i2info(inode)->lli_sbi->ll_root_fid) && + fid_oid(&llu_i2info(inode)->lli_fid) == + fid_oid(&llu_i2info(inode)->lli_sbi->ll_root_fid)); } #define LL_SAVE_INTENT(inode, it) \ @@ -139,9 +143,10 @@ do { \ #define LL_LOOKUP_POSITIVE 1 #define LL_LOOKUP_NEGATIVE 2 -static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode) +static inline struct lu_fid *ll_inode2fid(struct inode *inode) { - *fid = llu_i2info(inode)->lli_fid; + LASSERT(inode != NULL); + return &llu_i2info(inode)->lli_fid; } struct it_cb_data { @@ -165,8 +170,8 @@ static inline __u64 ll_file_maxbytes(struct inode *inode) struct mount_option_s { - char *mdc_uuid; - char *osc_uuid; + char *md_uuid; + char *dt_uuid; }; #define IS_BAD_PTR(ptr) \ @@ -181,29 +186,30 @@ int ll_parse_mount_target(const char *target, char **mgsnid, extern struct mount_option_s mount_option; /* super.c */ -void llu_update_inode(struct inode *inode, struct mds_body *body, +void llu_update_inode(struct inode *inode, struct mdt_body *body, struct lov_stripe_md *lmm); void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid); void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); int ll_it_open_error(int phase, struct lookup_intent *it); struct inode *llu_iget(struct filesys *fs, struct lustre_md *md); -int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm); +int llu_inode_getattr(struct inode *inode, struct obdo *obdo); +int llu_md_setattr(struct inode *inode, struct md_op_data *op_data); int llu_setattr_raw(struct inode *inode, struct iattr *attr); extern struct fssw_ops llu_fssw_ops; /* file.c */ -void llu_prepare_mdc_op_data(struct mdc_op_data *data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode); +void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, + struct inode *i2, const char *name, int namelen, + int mode, __u32 opc); +void llu_finish_md_op_data(struct md_op_data *op_data); int llu_create(struct inode *dir, struct pnode_base *pnode, int mode); int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it); int llu_iop_open(struct pnode *pnode, int flags, mode_t mode); -int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode); +int llu_md_close(struct obd_export *md_exp, struct inode *inode); int llu_file_release(struct inode *inode); +int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, + __u64 ioepoch); int llu_iop_close(struct inode *inode); _SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off); int llu_vmtruncate(struct inode * inode, loff_t offset, obd_flag obd_flags); @@ -214,6 +220,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir); int llu_iop_read(struct inode *ino, struct ioctx *ioctxp); int llu_iop_write(struct inode *ino, struct ioctx *ioctxp); int llu_iop_iodone(struct ioctx *ioctxp); +int llu_local_size(struct inode *inode); int llu_glimpse_size(struct inode *inode); int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, int mode, @@ -230,14 +237,18 @@ int llu_iop_lookup(struct pnode *pnode, const char *path); void unhook_stale_inode(struct pnode *pno); struct inode *llu_inode_from_lock(struct ldlm_lock *lock); -int llu_mdc_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag); +int llu_md_blocking_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, + void *data, int flag); /* dir.c */ ssize_t llu_iop_filldirentries(struct inode *ino, _SYSIO_OFF_T *basep, char *buf, size_t nbytes); +/* liblustre/llite_fid.c*/ +unsigned long llu_fid_build_ino(struct llu_sb_info *sbi, + struct lu_fid *fid); + /* ext2 related */ #define EXT2_NAME_LEN (255) diff --git a/lustre/liblustre/lutil.c b/lustre/liblustre/lutil.c index 250c70e..17ec86a 100644 --- a/lustre/liblustre/lutil.c +++ b/lustre/liblustre/lutil.c @@ -191,6 +191,7 @@ int liblustre_init_current(char *comm) strncpy(current->comm, comm, sizeof(current->comm)); current->pid = getpid(); + current->gid = getgid(); current->fsuid = geteuid(); current->fsgid = getegid(); memset(¤t->pending, 0, sizeof(current->pending)); diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c index f01f448..ccedc5f 100644 --- a/lustre/liblustre/namei.c +++ b/lustre/liblustre/namei.c @@ -75,33 +75,6 @@ void ll_intent_release(struct lookup_intent *it) EXIT; } -#if 0 -/* - * remove the stale inode from pnode - */ -void unhook_stale_inode(struct pnode *pno) -{ - struct inode *inode = pno->p_base->pb_ino; - ENTRY; - - LASSERT(inode); - LASSERT(llu_i2info(inode)->lli_stale_flag); - - pno->p_base->pb_ino = NULL; - I_RELE(inode); - - if (!llu_i2info(inode)->lli_open_count) { - CDEBUG(D_INODE, "unhook inode %p (ino %lu) from pno %p\n", - inode, llu_i2info(inode)->lli_st_ino, pno); - if (!inode->i_ref) - _sysio_i_gone(inode); - } - - EXIT; - return; -} -#endif - void llu_lookup_finish_locks(struct lookup_intent *it, struct pnode *pnode) { struct inode *inode; @@ -110,14 +83,19 @@ void llu_lookup_finish_locks(struct lookup_intent *it, struct pnode *pnode) inode = pnode->p_base->pb_ino; if (it->d.lustre.it_lock_mode && inode != NULL) { + struct llu_sb_info *sbi; + CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%llu/%lu)\n", inode, (long long)llu_i2stat(inode)->st_ino, llu_i2info(inode)->lli_st_generation); - mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode); + + sbi = llu_i2sbi(inode); + md_set_lock_data(sbi->ll_md_exp, + &it->d.lustre.it_lock_handle, inode); } /* drop lookup/getattr locks */ - if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) + if (it->it_op & (IT_LOOKUP | IT_GETATTR)) ll_intent_release(it); } @@ -127,12 +105,12 @@ static inline void llu_invalidate_inode_pages(struct inode * inode) /* do nothing */ } -int llu_mdc_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag) +int llu_md_blocking_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, + void *data, int flag) { - int rc; struct lustre_handle lockh; + int rc; ENTRY; @@ -150,6 +128,7 @@ int llu_mdc_blocking_ast(struct ldlm_lock *lock, struct llu_inode_info *lli; struct intnl_stat *st; __u64 bits = lock->l_policy_data.l_inodebits.bits; + struct lu_fid *fid; /* Invalidate all dentries associated with this inode */ if (inode == NULL) @@ -159,12 +138,16 @@ int llu_mdc_blocking_ast(struct ldlm_lock *lock, st = llu_i2stat(inode); if (bits & MDS_INODELOCK_UPDATE) - clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags); - - if (lock->l_resource->lr_name.name[0] != st->st_ino || - lock->l_resource->lr_name.name[1] !=lli->lli_st_generation){ - LDLM_ERROR(lock, "data mismatch with ino %llu/%lu", - (long long)st->st_ino,lli->lli_st_generation); + lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK; + + fid = &lli->lli_fid; + if (lock->l_resource->lr_name.name[0] != fid_seq(fid) || + lock->l_resource->lr_name.name[1] != fid_oid(fid) || + lock->l_resource->lr_name.name[2] != fid_ver(fid)) { + LDLM_ERROR(lock,"data mismatch with ino %llu/%llu/%llu", + (long long)fid_seq(fid), + (long long)fid_oid(fid), + (long long)fid_ver(fid)); } if (S_ISDIR(st->st_mode) && (bits & MDS_INODELOCK_UPDATE)) { @@ -207,7 +190,9 @@ static int pnode_revalidate_finish(struct ptlrpc_request *req, if (it_disposition(it, DISP_LOOKUP_NEG)) RETURN(-ENOENT); - rc = mdc_req2lustre_md(req, offset, llu_i2sbi(inode)->ll_osc_exp, &md); + rc = md_get_lustre_md(llu_i2sbi(inode)->ll_md_exp, req, + offset, llu_i2sbi(inode)->ll_dt_exp, + llu_i2sbi(inode)->ll_md_exp, &md); if (rc) RETURN(rc); @@ -221,7 +206,7 @@ static int llu_pb_revalidate(struct pnode *pnode, int flags, { struct pnode_base *pb = pnode->p_base; struct it_cb_data icbd; - struct mdc_op_data op_data; + struct md_op_data op_data; struct ptlrpc_request *req = NULL; struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; struct obd_export *exp; @@ -265,13 +250,14 @@ static int llu_pb_revalidate(struct pnode *pnode, int flags, it->it_op_release = ll_intent_release; } - llu_prepare_mdc_op_data(&op_data, pnode->p_parent->p_base->pb_ino, - pb->pb_ino, pb->pb_name.name,pb->pb_name.len,0); + llu_prep_md_op_data(&op_data, pnode->p_parent->p_base->pb_ino, + pb->pb_ino, pb->pb_name.name, pb->pb_name.len, + 0, LUSTRE_OPC_ANY); - rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, flags, - &req, llu_mdc_blocking_ast, - LDLM_FL_CANCEL_ON_BLOCK); - /* If req is NULL, then mdc_intent_lock only tried to do a lock match; + rc = md_intent_lock(exp, &op_data, NULL, 0, it, flags, + &req, llu_md_blocking_ast, + LDLM_FL_CANCEL_ON_BLOCK); + /* If req is NULL, then md_intent_lock only tried to do a lock match; * if all was well, it will return 1 if it found locks, 0 otherwise. */ if (req == NULL && rc >= 0) GOTO(out, rc); @@ -326,7 +312,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, if (it_disposition(it, DISP_OPEN_OPEN) && it_open_error(DISP_OPEN_OPEN, it)) { CDEBUG(D_INODE, "detect mds open error\n"); - /* undo which did by mdc_intent_lock */ + /* undo which did by md_intent_lock */ if (it_disposition(it, DISP_OPEN_CREATE) && !it_open_error(DISP_OPEN_CREATE, it)) { LASSERT(request); @@ -349,7 +335,8 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, if (it_disposition(it, DISP_OPEN_CREATE)) ptlrpc_req_finished(request); - rc = mdc_req2lustre_md(request, offset, sbi->ll_osc_exp, &md); + rc = md_get_lustre_md(sbi->ll_md_exp, request, offset, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); if (rc) RETURN(rc); @@ -357,11 +344,11 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, if (!inode || IS_ERR(inode)) { /* free the lsm if we allocated one above */ if (md.lsm != NULL) - obd_free_memmd(sbi->ll_osc_exp, &md.lsm); + obd_free_memmd(sbi->ll_dt_exp, &md.lsm); RETURN(inode ? PTR_ERR(inode) : -ENOMEM); } else if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm) { - obd_free_memmd(sbi->ll_osc_exp, &md.lsm); + obd_free_memmd(sbi->ll_dt_exp, &md.lsm); } lli = llu_i2info(inode); @@ -415,10 +402,11 @@ struct inode *llu_inode_from_lock(struct ldlm_lock *lock) static int llu_lookup_it(struct inode *parent, struct pnode *pnode, struct lookup_intent *it, int flags) { - struct mdc_op_data op_data; + struct md_op_data op_data; struct it_cb_data icbd; struct ptlrpc_request *req = NULL; struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; + __u32 opc; int rc; ENTRY; @@ -433,13 +421,20 @@ static int llu_lookup_it(struct inode *parent, struct pnode *pnode, icbd.icbd_child = pnode; icbd.icbd_parent = parent; - llu_prepare_mdc_op_data(&op_data, parent, NULL, - pnode->p_base->pb_name.name, - pnode->p_base->pb_name.len, flags); - - rc = mdc_intent_lock(llu_i2mdcexp(parent), &op_data, NULL, 0, it, - flags, &req, llu_mdc_blocking_ast, - LDLM_FL_CANCEL_ON_BLOCK); + if (it->it_op & IT_CREAT || + (it->it_op & IT_OPEN && it->it_create_mode & O_CREAT)) { + opc = LUSTRE_OPC_CREATE; + } else { + opc = LUSTRE_OPC_ANY; + } + + llu_prep_md_op_data(&op_data, parent, NULL, + pnode->p_base->pb_name.name, + pnode->p_base->pb_name.len, flags, opc); + + rc = md_intent_lock(llu_i2mdcexp(parent), &op_data, NULL, 0, it, + flags, &req, llu_md_blocking_ast, + LDLM_FL_CANCEL_ON_BLOCK); if (rc < 0) GOTO(out, rc); @@ -530,7 +525,7 @@ translate_lookup_intent(struct intent *intent, const char *path) /* conform to kernel code, if only IT_LOOKUP was set, don't * pass down it */ - if (!it->it_op || it->it_op == IT_LOOKUP) { + if (!it->it_op || it->it_op & IT_LOOKUP) { OBD_FREE(it, sizeof(*it)); it = NULL; } diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index eb22812..b716544 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -218,6 +218,49 @@ static int llu_glimpse_callback(struct ldlm_lock *lock, void *reqp) return rc; } +static void llu_merge_lvb(struct inode *inode) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct intnl_stat *st = llu_i2stat(inode); + struct ost_lvb lvb; + ENTRY; + + inode_init_lvb(inode, &lvb); + obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0); + st->st_size = lvb.lvb_size; + st->st_blocks = lvb.lvb_blocks; + st->st_mtime = lvb.lvb_mtime; + st->st_atime = lvb.lvb_atime; + st->st_ctime = lvb.lvb_ctime; + EXIT; +} + +int llu_local_size(struct inode *inode) +{ + ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } }; + struct llu_inode_info *lli = llu_i2info(inode); + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct lustre_handle lockh = { 0 }; + int flags = 0; + int rc; + ENTRY; + + if (lli->lli_smd->lsm_stripe_count == 0) + RETURN(0); + + rc = obd_match(sbi->ll_dt_exp, lli->lli_smd, LDLM_EXTENT, + &policy, LCK_PR | LCK_PW, &flags, inode, &lockh); + if (rc < 0) + RETURN(rc); + else if (rc == 0) + RETURN(-ENODATA); + + llu_merge_lvb(inode); + obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR, &lockh); + RETURN(0); +} + /* NB: lov_merge_size will prefer locally cached writes if they extend the * file (because it prefers KMS over RSS when larger) */ int llu_glimpse_size(struct inode *inode) @@ -228,10 +271,13 @@ int llu_glimpse_size(struct inode *inode) struct lustre_handle lockh = { 0 }; struct obd_enqueue_info einfo = { 0 }; struct obd_info oinfo = { { { 0 } } }; - struct ost_lvb lvb; int rc; ENTRY; + /* If size is cached on the mds, skip glimpse. */ + if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) + RETURN(0); + CDEBUG(D_DLMTRACE, "Glimpsing inode %llu\n", (long long)st->st_ino); if (!lli->lli_smd) { @@ -252,20 +298,13 @@ int llu_glimpse_size(struct inode *inode) oinfo.oi_lockh = &lockh; oinfo.oi_md = lli->lli_smd; - rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo); + rc = obd_enqueue_rqset(sbi->ll_dt_exp, &oinfo, &einfo); if (rc) { CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc); RETURN(rc > 0 ? -EIO : rc); } - inode_init_lvb(inode, &lvb); - obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0); - st->st_size = lvb.lvb_size; - st->st_blocks = lvb.lvb_blocks; - st->st_mtime = lvb.lvb_mtime; - st->st_atime = lvb.lvb_atime; - st->st_ctime = lvb.lvb_ctime; - + llu_merge_lvb(inode); CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n", (long long)st->st_size, (long long)st->st_blocks); @@ -309,13 +348,13 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, oinfo.oi_lockh = lockh; oinfo.oi_md = lsm; - rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo); + rc = obd_enqueue(sbi->ll_dt_exp, &oinfo, &einfo); *policy = oinfo.oi_policy; if (rc > 0) rc = -EIO; inode_init_lvb(inode, &lvb); - obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1); + obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 1); if (policy->l_extent.start == 0 && policy->l_extent.end == OBD_OBJECT_EOF) st->st_size = lvb.lvb_size; @@ -344,7 +383,7 @@ int llu_extent_unlock(struct ll_file_data *fd, struct inode *inode, (sbi->ll_flags & LL_SBI_NOLCK) || mode == LCK_NL) RETURN(0); - rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh); + rc = obd_cancel(sbi->ll_dt_exp, lsm, mode, lockh); RETURN(rc); } @@ -581,7 +620,7 @@ struct llu_io_group * get_io_group(struct inode *inode, int maxpages, static int max_io_pages(ssize_t len, int iovlen) { - return (((len + CFS_PAGE_SIZE -1) >> CFS_PAGE_SHIFT) + 2 + iovlen - 1); + return (((len + CFS_PAGE_SIZE -1) / CFS_PAGE_SIZE) + 2 + iovlen - 1); } static diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 64fa37a..60c109b 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -90,13 +90,13 @@ static int ll_permission(struct inode *inode, int mask) static void llu_fsop_gone(struct filesys *fs) { struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private; - struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp); + struct obd_device *obd = class_exp2obd(sbi->ll_md_exp); int next = 0; ENTRY; list_del(&sbi->ll_conn_chain); - obd_disconnect(sbi->ll_osc_exp); - obd_disconnect(sbi->ll_mdc_exp); + obd_disconnect(sbi->ll_dt_exp); + obd_disconnect(sbi->ll_md_exp); while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) class_manual_cleanup(obd); @@ -109,7 +109,7 @@ static void llu_fsop_gone(struct filesys *fs) static struct inode_ops llu_inode_ops; -void llu_update_inode(struct inode *inode, struct mds_body *body, +void llu_update_inode(struct inode *inode, struct mdt_body *body, struct lov_stripe_md *lsm) { struct llu_inode_info *lli = llu_i2info(inode); @@ -131,15 +131,16 @@ void llu_update_inode(struct inode *inode, struct mds_body *body, } } - if (body->valid & OBD_MD_FLID) - st->st_ino = body->ino; + if (body->valid & OBD_MD_FLMTIME && + body->mtime > LTIME_S(st->st_mtime)) + LTIME_S(st->st_mtime) = body->mtime; if (body->valid & OBD_MD_FLATIME && body->atime > LTIME_S(st->st_atime)) LTIME_S(st->st_atime) = body->atime; - + /* mtime is always updated with ctime, but can be set in past. As write and utime(2) may happen within 1 second, and utime's - mtime has a priority over write's one, so take mtime from mds + mtime has a priority over write's one, so take mtime from mds for the same ctimes. */ if (body->valid & OBD_MD_FLCTIME && body->ctime >= LTIME_S(st->st_ctime)) { @@ -169,16 +170,6 @@ void llu_update_inode(struct inode *inode, struct mds_body *body, st->st_blocks = body->blocks; if (body->valid & OBD_MD_FLFLAGS) lli->lli_st_flags = body->flags; - if (body->valid & OBD_MD_FLGENER) - lli->lli_st_generation = body->generation; - - /* fillin fid */ - if (body->valid & OBD_MD_FLID) - lli->lli_fid.id = body->ino; - if (body->valid & OBD_MD_FLGENER) - lli->lli_fid.generation = body->generation; - if (body->valid & OBD_MD_FLTYPE) - lli->lli_fid.f_type = body->mode & S_IFMT; } void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) @@ -216,8 +207,6 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) st->st_gid = src->o_gid; if (valid & OBD_MD_FLFLAGS) lli->lli_st_flags = src->o_flags; - if (valid & OBD_MD_FLGENER) - lli->lli_st_generation = src->o_generation; } #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) @@ -293,34 +282,33 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) /* * really does the getattr on the inode and updates its fields */ -int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm) +int llu_inode_getattr(struct inode *inode, struct obdo *obdo) { struct llu_inode_info *lli = llu_i2info(inode); - struct obd_export *exp = llu_i2obdexp(inode); struct ptlrpc_request_set *set; + struct lov_stripe_md *lsm = lli->lli_smd; struct obd_info oinfo = { { { 0 } } }; - struct obdo oa = { 0 }; - obd_flag refresh_valid; int rc; ENTRY; LASSERT(lsm); - LASSERT(lli); oinfo.oi_md = lsm; - oinfo.oi_oa = &oa; - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | - OBD_MD_FLCTIME; + oinfo.oi_oa = obdo; + oinfo.oi_oa->o_id = lsm->lsm_object_id; + oinfo.oi_oa->o_gr = lsm->lsm_object_gr; + oinfo.oi_oa->o_mode = S_IFREG; + oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | + OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | + OBD_MD_FLCTIME; set = ptlrpc_prep_set(); if (set == NULL) { CERROR ("ENOMEM allocing request set\n"); rc = -ENOMEM; } else { - rc = obd_getattr_async(exp, &oinfo, set); + rc = obd_getattr_async(llu_i2obdexp(inode), &oinfo, set); if (rc == 0) rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); @@ -328,26 +316,36 @@ int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm) if (rc) RETURN(rc); - refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE; - - obdo_refresh_inode(inode, &oa, refresh_valid); + oinfo.oi_oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | + OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLSIZE; + obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid); + CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %Lu, " + "blksize %Lu\n", lli->lli_smd->lsm_object_id, + (long long unsigned)llu_i2stat(inode)->st_size, + (long long unsigned)llu_i2stat(inode)->st_blocks, + (long long unsigned)llu_i2stat(inode)->st_blksize); RETURN(0); } static struct inode* llu_new_inode(struct filesys *fs, - struct ll_fid *fid) + struct lu_fid *fid) { struct inode *inode; struct llu_inode_info *lli; struct intnl_stat st = { .st_dev = 0, +#if 0 #ifndef AUTOMOUNT_FILE_NAME .st_mode = fid->f_type & S_IFMT, #else .st_mode = fid->f_type /* all of the bits! */ #endif +#endif + /* FIXME: fix this later */ + .st_mode = 0, + .st_uid = geteuid(), .st_gid = getegid(), }; @@ -391,9 +389,10 @@ static int llu_have_md_lock(struct inode *inode, __u64 lockpart) LASSERT(inode); - obddev = sbi->ll_mdc_exp->exp_obd; - res_id.name[0] = llu_i2stat(inode)->st_ino; - res_id.name[1] = lli->lli_st_generation; + obddev = sbi->ll_md_exp->exp_obd; + res_id.name[0] = fid_seq(&lli->lli_fid); + res_id.name[1] = fid_oid(&lli->lli_fid); + res_id.name[2] = fid_ver(&lli->lli_fid); CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); @@ -419,24 +418,24 @@ static int llu_inode_revalidate(struct inode *inode) struct lustre_md md; struct ptlrpc_request *req = NULL; struct llu_sb_info *sbi = llu_i2sbi(inode); - struct ll_fid fid; unsigned long valid = OBD_MD_FLGETATTR; int rc, ealen = 0; /* Why don't we update all valid MDS fields here, if we're * doing an RPC anyways? -phil */ if (S_ISREG(llu_i2stat(inode)->st_mode)) { - ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL); + ealen = obd_size_diskmd(sbi->ll_dt_exp, NULL); valid |= OBD_MD_FLEASIZE; } - ll_inode2fid(&fid, inode); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), + NULL, valid, ealen, &req); if (rc) { CERROR("failure %d inode %llu\n", rc, (long long)llu_i2stat(inode)->st_ino); RETURN(-abs(rc)); } - rc = mdc_req2lustre_md(req, REPLY_REC_OFF, sbi->ll_osc_exp,&md); + rc = md_get_lustre_md(sbi->ll_md_exp, req, REPLY_REC_OFF, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); /* XXX Too paranoid? */ if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) && @@ -455,11 +454,10 @@ static int llu_inode_revalidate(struct inode *inode) llu_update_inode(inode, md.body, md.lsm); if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm) - obd_free_memmd(sbi->ll_osc_exp, &md.lsm); - - if (md.body->valid & OBD_MD_FLSIZE) - set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, - &llu_i2info(inode)->lli_flags); + obd_free_memmd(sbi->ll_dt_exp, &md.lsm); + if (md.body->valid & OBD_MD_FLSIZE && + sbi->ll_lco.lco_flags & OBD_CONNECT_SOM) + llu_i2info(inode)->lli_flags |= LLIF_MDS_SIZE_LOCK; ptlrpc_req_finished(req); } @@ -521,7 +519,6 @@ static int null_if_equal(struct ldlm_lock *lock, void *data) void llu_clear_inode(struct inode *inode) { - struct ll_fid fid; struct llu_inode_info *lli = llu_i2info(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); ENTRY; @@ -530,16 +527,16 @@ void llu_clear_inode(struct inode *inode) (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation, inode); - ll_inode2fid(&fid, inode); - clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags)); - mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode); + lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK; + md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode), + null_if_equal, inode); if (lli->lli_smd) - obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd, + obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd, null_if_equal, inode); if (lli->lli_smd) { - obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd); + obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd); lli->lli_smd = NULL; } @@ -597,6 +594,74 @@ static int inode_setattr(struct inode * inode, struct iattr * attr) return error; } +int llu_md_setattr(struct inode *inode, struct md_op_data *op_data) +{ + struct lustre_md md; + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct ptlrpc_request *request = NULL; + int rc; + ENTRY; + + llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY); + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &request); + + if (rc) { + ptlrpc_req_finished(request); + if (rc != -EPERM && rc != -EACCES) + CERROR("md_setattr fails: rc = %d\n", rc); + RETURN(rc); + } + + rc = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); + if (rc) { + ptlrpc_req_finished(request); + RETURN(rc); + } + + /* We call inode_setattr to adjust timestamps. + * If there is at least some data in file, we cleared ATTR_SIZE + * above to avoid invoking vmtruncate, otherwise it is important + * to call vmtruncate in inode_setattr to update inode->i_size + * (bug 6196) */ + inode_setattr(inode, &op_data->op_attr); + llu_update_inode(inode, md.body, md.lsm); + ptlrpc_req_finished(request); + + RETURN(rc); +} + +/* Close IO epoch and send Size-on-MDS attribute update. */ +static int llu_setattr_done_writing(struct inode *inode, + struct md_op_data *op_data) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); + int rc = 0; + ENTRY; + + LASSERT(op_data != NULL); + if (!S_ISREG(st->st_mode)) + RETURN(0); + + /* XXX: pass och here for the recovery purpose. */ + CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n", + op_data->op_ioepoch, PFID(&lli->lli_fid)); + + op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE; + rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, NULL); + if (rc == -EAGAIN) { + /* MDS has instructed us to obtain Size-on-MDS attribute + * from OSTs and send setattr to back to MDS. */ + rc = llu_sizeonmds_update(inode, &op_data->op_handle, + op_data->op_ioepoch); + } else if (rc) { + CERROR("inode %llu mdc truncate failed: rc = %d\n", + st->st_ino, rc); + } + RETURN(rc); +} + /* If this inode has objects allocated to it (lsm != NULL), then the OST * object(s) determine the file size and mtime. Otherwise, the MDS will * keep these values until such a time that objects are allocated for it. @@ -615,9 +680,8 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd; struct llu_sb_info *sbi = llu_i2sbi(inode); struct intnl_stat *st = llu_i2stat(inode); - struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; int ia_valid = attr->ia_valid; + struct md_op_data op_data = { { 0 } }; int rc = 0; ENTRY; @@ -648,12 +712,12 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) attr->ia_valid |= ATTR_MTIME_SET; } if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) { - /* To avoid stale mtime on mds, obtain it from ost and send + /* To avoid stale mtime on mds, obtain it from ost and send to mds. */ rc = llu_glimpse_size(inode); - if (rc) + if (rc) RETURN(rc); - + attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME; attr->ia_mtime = inode->i_stbuf.st_mtime; } @@ -662,45 +726,30 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n", LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), LTIME_S(CURRENT_TIME)); + + /* NB: ATTR_SIZE will only be set after this point if the size + * resides on the MDS, ie, this file has no objects. */ if (lsm) attr->ia_valid &= ~ATTR_SIZE; /* If only OST attributes being set on objects, don't do MDS RPC. * In that case, we need to check permissions and update the local * inode ourselves so we can call obdo_from_inode() always. */ - if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) { - struct lustre_md md; - llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - rc = mdc_setattr(sbi->ll_mdc_exp, &op_data, - attr, NULL, 0, NULL, 0, &request); + if (ia_valid & (lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) { + memcpy(&op_data.op_attr, attr, sizeof(*attr)); - if (rc) { - ptlrpc_req_finished(request); - if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); - RETURN(rc); - } - - rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, - &md); - if (rc) { - ptlrpc_req_finished(request); + /* Open epoch for truncate. */ + if (ia_valid & ATTR_SIZE) + op_data.op_flags = MF_EPOCH_OPEN; + rc = llu_md_setattr(inode, &op_data); + if (rc) RETURN(rc); - } - - /* We call inode_setattr to adjust timestamps. - * If there is at least some data in file, we cleared ATTR_SIZE - * above to avoid invoking vmtruncate, otherwise it is important - * to call vmtruncate in inode_setattr to update inode->i_size - * (bug 6196) */ - inode_setattr(inode, attr); - llu_update_inode(inode, md.body, md.lsm); - ptlrpc_req_finished(request); if (!lsm || !S_ISREG(st->st_mode)) { CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); - RETURN(0); + if (op_data.op_ioepoch) + rc = llu_setattr_done_writing(inode, &op_data); + RETURN(rc); } } else { /* The OST doesn't check permissions, but the alternative is @@ -721,6 +770,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) } } + /* Won't invoke llu_vmtruncate(), as we already cleared * ATTR_SIZE */ inode_setattr(inode, attr); @@ -738,7 +788,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) obd_flag obd_flags; /* check that there are no matching locks */ - LASSERT(obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT, &policy, + LASSERT(obd_match(sbi->ll_dt_exp, lsm, LDLM_EXTENT, &policy, LCK_PW, &flags, inode, &match_lockh) <= 0); /* XXX when we fix the AST intents to pass the discard-range @@ -774,6 +824,9 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) if (!rc) rc = err; } + + if (op_data.op_ioepoch) + rc = llu_setattr_done_writing(inode, &op_data); } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) { struct obd_info oinfo = { { { 0 } } }; struct obdo oa; @@ -789,7 +842,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) oinfo.oi_oa = &oa; oinfo.oi_md = lsm; - rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL); + rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL); if (rc) CERROR("obd_setattr_async fails: rc=%d\n", rc); } @@ -858,7 +911,7 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt) int len = qstr->len; struct ptlrpc_request *request = NULL; struct llu_sb_info *sbi = llu_i2sbi(dir); - struct mdc_op_data op_data; + struct md_op_data op_data; int err = -EMLINK; ENTRY; @@ -866,11 +919,13 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt) if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX) RETURN(err); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(sbi->ll_mdc_exp, &op_data, - tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, - current->fsuid, current->fsgid, current->cap_effective, - 0, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, + LUSTRE_OPC_SYMLINK); + + err = md_create(sbi->ll_md_exp, &op_data, + tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, + current->fsuid, current->fsgid, current->cap_effective, + 0, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); RETURN(err); @@ -882,8 +937,7 @@ static int llu_readlink_internal(struct inode *inode, { struct llu_inode_info *lli = llu_i2info(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); - struct ll_fid fid; - struct mds_body *body; + struct mdt_body *body; struct intnl_stat *st = llu_i2stat(inode); int rc, symlen = st->st_size + 1; ENTRY; @@ -896,9 +950,8 @@ static int llu_readlink_internal(struct inode *inode, RETURN(0); } - ll_inode2fid(&fid, inode); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, - OBD_MD_LINKNAME, symlen, request); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), NULL, + OBD_MD_LINKNAME, symlen, request); if (rc) { CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc); RETURN(rc); @@ -973,7 +1026,7 @@ static int llu_iop_mknod_raw(struct pnode *pno, struct ptlrpc_request *request = NULL; struct inode *dir = pno->p_parent->p_base->pb_ino; struct llu_sb_info *sbi = llu_i2sbi(dir); - struct mdc_op_data op_data; + struct md_op_data op_data; int err = -EMLINK; ENTRY; @@ -993,13 +1046,14 @@ static int llu_iop_mknod_raw(struct pnode *pno, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - llu_prepare_mdc_op_data(&op_data, dir, NULL, - pno->p_base->pb_name.name, - pno->p_base->pb_name.len, - 0); - err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode, - current->fsuid, current->fsgid, - current->cap_effective, dev, &request); + llu_prep_md_op_data(&op_data, dir, NULL, + pno->p_base->pb_name.name, + pno->p_base->pb_name.len, 0, + LUSTRE_OPC_MKNOD); + + err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode, + current->fsuid, current->fsgid, + current->cap_effective, dev, &request); ptlrpc_req_finished(request); break; case S_IFDIR: @@ -1019,7 +1073,7 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new) const char *name = new->p_base->pb_name.name; int namelen = new->p_base->pb_name.len; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data; int rc; ENTRY; @@ -1027,8 +1081,9 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new) LASSERT(dir); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0); - rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request); + llu_prep_md_op_data(&op_data, src, dir, name, namelen, 0, + LUSTRE_OPC_ANY); + rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); @@ -1046,15 +1101,16 @@ static int llu_iop_unlink_raw(struct pnode *pno) int len = qstr->len; struct inode *target = pno->p_base->pb_ino; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data; int rc; ENTRY; LASSERT(target); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, + LUSTRE_OPC_ANY); + rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request); if (!rc) rc = llu_objects_destroy(request, dir); ptlrpc_req_finished(request); @@ -1072,7 +1128,7 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) const char *newname = new->p_base->pb_name.name; int newnamelen = new->p_base->pb_name.len; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data; int rc; ENTRY; @@ -1080,10 +1136,11 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) LASSERT(tgt); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0); - rc = mdc_rename(llu_i2sbi(src)->ll_mdc_exp, &op_data, - oldname, oldnamelen, newname, newnamelen, - &request); + llu_prep_md_op_data(&op_data, src, tgt, NULL, 0, 0, + LUSTRE_OPC_ANY); + rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data, + oldname, oldnamelen, newname, newnamelen, + &request); if (!rc) { rc = llu_objects_destroy(request, src); } @@ -1102,17 +1159,17 @@ static int llu_statfs_internal(struct llu_sb_info *sbi, int rc; ENTRY; - rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age); + rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age); if (rc) { - CERROR("mdc_statfs fails: rc = %d\n", rc); + CERROR("md_statfs fails: rc = %d\n", rc); RETURN(rc); } CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n", osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files); - rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp), - &obd_osfs, max_age); + rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp), + &obd_statfs, max_age); if (rc) { CERROR("obd_statfs fails: rc = %d\n", rc); RETURN(rc); @@ -1215,7 +1272,7 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) int len = qstr->len; struct ptlrpc_request *request = NULL; struct intnl_stat *st = llu_i2stat(dir); - struct mdc_op_data op_data; + struct md_op_data op_data; int err = -EMLINK; ENTRY; @@ -1226,10 +1283,12 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) if (st->st_nlink >= EXT2_LINK_MAX) RETURN(err); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0, mode | S_IFDIR, - current->fsuid, current->fsgid, current->cap_effective, - 0, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, + LUSTRE_OPC_MKDIR); + + err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0, + mode | S_IFDIR, current->fsuid, current->fsgid, + current->cap_effective, 0, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); RETURN(err); @@ -1242,7 +1301,7 @@ static int llu_iop_rmdir_raw(struct pnode *pno) const char *name = qstr->name; int len = qstr->len; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data; int rc; ENTRY; @@ -1251,8 +1310,9 @@ static int llu_iop_rmdir_raw(struct pnode *pno) (long long)llu_i2stat(dir)->st_ino, llu_i2info(dir)->lli_st_generation, dir); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR); - rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR, + LUSTRE_OPC_ANY); + rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); @@ -1274,8 +1334,10 @@ static int llu_file_flock(struct inode *ino, struct llu_inode_info *lli = llu_i2info(ino); struct intnl_stat *st = llu_i2stat(ino); struct ldlm_res_id res_id = - { .name = {st->st_ino, - lli->lli_st_generation, LDLM_FLOCK} }; + { .name = {fid_seq(&lli->lli_fid), + fid_oid(&lli->lli_fid), + fid_ver(&lli->lli_fid), + LDLM_FLOCK} }; struct lustre_handle lockh = {0}; ldlm_policy_data_t flock; ldlm_mode_t mode = 0; @@ -1283,7 +1345,7 @@ static int llu_file_flock(struct inode *ino, int rc; CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n", - (unsigned long long) st->st_ino, file_lock); + (unsigned long long)st->st_ino, file_lock); flock.l_flock.pid = file_lock->fl_pid; flock.l_flock.start = file_lock->fl_start; @@ -1336,13 +1398,13 @@ static int llu_file_flock(struct inode *ino, } CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, flags=%#x, mode=%u, " - "start="LPU64", end="LPU64"\n", - (unsigned long long) st->st_ino, flock.l_flock.pid, - flags, mode, flock.l_flock.start, flock.l_flock.end); + "start="LPU64", end="LPU64"\n", (unsigned long long)st->st_ino, + flock.l_flock.pid, flags, mode, flock.l_flock.start, + flock.l_flock.end); - rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, res_id, - LDLM_FLOCK, &flock, mode, &flags, NULL, - ldlm_flock_completion_ast, NULL, + rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, &res_id, + LDLM_FLOCK, &flock, mode, &flags, NULL, + ldlm_flock_completion_ast, NULL, file_lock, NULL, 0, NULL, &lockh, 0); RETURN(rc); } @@ -1477,8 +1539,9 @@ static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn) flags = va_arg(ap, long); flags &= FCNTL_FLMASK; if (flags & FCNTL_FLMASK_INVALID) { - CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, " - "and O_DIRECT on file descriptor\n"); + LCONSOLE_ERROR_MSG(0x010, "liblustre does not support " + "the O_NONBLOCK or O_ASYNC flags. " + "Please fix your application.\n"); *rtn = -EINVAL; err = EINVAL; break; @@ -1584,14 +1647,14 @@ static int llu_put_grouplock(struct inode *inode, unsigned long arg) static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg) { - struct llu_sb_info *sbi = llu_i2sbi(ino); + struct llu_sb_info *sbi = llu_i2sbi(ino); struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; - struct iattr attr = { 0 }; + struct md_op_data op_data; struct lov_user_md lum, *lump = (struct lov_user_md *)arg; int rc = 0; - llu_prepare_mdc_op_data(&op_data, ino, NULL, NULL, 0, 0); + llu_prep_md_op_data(&op_data, ino, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY); LASSERT(sizeof(lum) == sizeof(*lump)); LASSERT(sizeof(lum.lmm_objects[0]) == @@ -1607,12 +1670,12 @@ static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg) lustre_swab_lov_user_md(&lum); /* swabbing is done in lov_setstripe() on server side */ - rc = mdc_setattr(sbi->ll_mdc_exp, &op_data, - &attr, &lum, sizeof(lum), NULL, 0, &request); + rc = md_setattr(sbi->ll_md_exp, &op_data, &lum, + sizeof(lum), NULL, 0, &request); if (rc) { ptlrpc_req_finished(request); if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); + CERROR("md_setattr fails: rc = %d\n", rc); return rc; } ptlrpc_req_finished(request); @@ -1623,30 +1686,29 @@ static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg) static int llu_lov_setstripe_ea_info(struct inode *ino, int flags, struct lov_user_md *lum, int lum_size) { - struct llu_sb_info *sbi = llu_i2sbi(ino); - struct obd_export *exp = llu_i2obdexp(ino); + struct llu_sb_info *sbi = llu_i2sbi(ino); struct llu_inode_info *lli = llu_i2info(ino); struct llu_inode_info *lli2 = NULL; struct lov_stripe_md *lsm; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; struct ptlrpc_request *req = NULL; struct lustre_md md; - struct mdc_op_data data; + struct md_op_data data; struct lustre_handle lockh; int rc = 0; ENTRY; lsm = lli->lli_smd; if (lsm) { - CDEBUG(D_IOCTL, "stripe already exists for ino "LPU64"\n", - lli->lli_fid.id); + CDEBUG(D_IOCTL, "stripe already exists for ino "DFID"\n", + PFID(&lli->lli_fid)); return -EEXIST; } OBD_ALLOC(lli2, sizeof(struct llu_inode_info)); if (!lli2) return -ENOMEM; - + memcpy(lli2, lli, sizeof(struct llu_inode_info)); lli2->lli_open_count = 0; lli2->lli_it = NULL; @@ -1655,44 +1717,46 @@ static int llu_lov_setstripe_ea_info(struct inode *ino, int flags, lli2->lli_symlink_name = NULL; ino->i_private = lli2; - llu_prepare_mdc_op_data(&data, NULL, ino, NULL, 0, O_RDWR); + llu_prep_md_op_data(&data, NULL, ino, NULL, 0, O_RDWR, + LUSTRE_OPC_ANY); - rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, &oit, LCK_CR, &data, - &lockh, lum, lum_size, ldlm_completion_ast, - llu_mdc_blocking_ast, NULL, LDLM_FL_INTENT_ONLY); + rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, &oit, LCK_CR, &data, + &lockh, lum, lum_size, ldlm_completion_ast, + llu_md_blocking_ast, NULL, LDLM_FL_INTENT_ONLY); if (rc) GOTO(out, rc); - + req = oit.d.lustre.it_data; rc = it_open_error(DISP_IT_EXECD, &oit); if (rc) { req->rq_replay = 0; GOTO(out, rc); } - + rc = it_open_error(DISP_OPEN_OPEN, &oit); if (rc) { req->rq_replay = 0; GOTO(out, rc); } - - rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md); + + rc = md_get_lustre_md(sbi->ll_md_exp, req, + DLM_REPLY_REC_OFF, sbi->ll_dt_exp, sbi->ll_md_exp, &md); if (rc) GOTO(out, rc); - + llu_update_inode(ino, md.body, md.lsm); lli->lli_smd = lli2->lli_smd; lli2->lli_smd = NULL; llu_local_open(lli2, &oit); - + /* release intent */ if (lustre_handle_is_used(&lockh)) ldlm_lock_decref(&lockh, LCK_CR); ptlrpc_req_finished(req); req = NULL; - + rc = llu_file_release(ino); out: ino->i_private = lli; @@ -1727,8 +1791,8 @@ static int llu_lov_setstripe(struct inode *ino, unsigned long arg) return llu_lov_file_setstripe(ino, arg); if (S_ISDIR(st->st_mode)) return llu_lov_dir_setstripe(ino, arg); - - return -EINVAL; + + return -EINVAL; } static int llu_lov_getstripe(struct inode *ino, unsigned long arg) @@ -1800,28 +1864,23 @@ struct filesys_ops llu_filesys_ops = struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) { struct inode *inode; - struct ll_fid fid; + struct lu_fid fid; struct file_identifier fileid = {&fid, sizeof(fid)}; - if ((md->body->valid & - (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) != - (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) { + if ((md->body->valid & (OBD_MD_FLID | OBD_MD_FLTYPE)) != + (OBD_MD_FLID | OBD_MD_FLTYPE)) { CERROR("bad md body valid mask "LPX64"\n", md->body->valid); LBUG(); return ERR_PTR(-EPERM); } /* try to find existing inode */ - fid.id = md->body->ino; - fid.generation = md->body->generation; - fid.f_type = md->body->mode & S_IFMT; + fid = md->body->fid1; inode = _sysio_i_find(fs, &fileid); if (inode) { - struct llu_inode_info *lli = llu_i2info(inode); - - if (inode->i_zombie || - lli->lli_st_generation != md->body->generation) { + if (inode->i_zombie/* || + lli->lli_st_generation != md->body->generation*/) { I_RELE(inode); } else { @@ -1837,7 +1896,36 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) return inode; } -extern struct list_head lustre_profile_list; +static int +llu_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) +{ + struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC }; + __u32 valsize = sizeof(struct lov_desc); + int rc, easize, def_easize, cookiesize; + struct lov_desc desc; + __u32 stripes; + ENTRY; + + rc = obd_get_info(dt_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC, + &valsize, &desc); + if (rc) + RETURN(rc); + + stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = stripes; + easize = obd_size_diskmd(dt_exp, &lsm); + + lsm.lsm_stripe_count = desc.ld_default_stripe_count; + def_easize = obd_size_diskmd(dt_exp, &lsm); + + cookiesize = stripes * sizeof(struct llog_cookie); + + CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n", + easize, cookiesize); + + rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize); + RETURN(rc); +} static int llu_fsswop_mount(const char *source, @@ -1850,13 +1938,13 @@ llu_fsswop_mount(const char *source, struct inode *root; struct pnode_base *rootpb; struct obd_device *obd; - struct ll_fid rootfid; + struct lu_fid rootfid; struct llu_sb_info *sbi; struct obd_statfs osfs; static struct qstr noname = { NULL, 0, 0 }; struct ptlrpc_request *request = NULL; - struct lustre_handle mdc_conn = {0, }; - struct lustre_handle osc_conn = {0, }; + struct lustre_handle md_conn = {0, }; + struct lustre_handle dt_conn = {0, }; struct lustre_md md; class_uuid_t uuid; struct config_llog_instance cfg = {0, }; @@ -1906,11 +1994,11 @@ llu_fsswop_mount(const char *source, CERROR("No profile found: %s\n", zconf_profile); GOTO(out_free, err = -EINVAL); } - OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(ll_instance) + 2); - sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance); + OBD_ALLOC(osc, strlen(lprof->lp_dt) + strlen(ll_instance) + 2); + sprintf(osc, "%s-%s", lprof->lp_dt, ll_instance); - OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(ll_instance) + 2); - sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance); + OBD_ALLOC(mdc, strlen(lprof->lp_md) + strlen(ll_instance) + 2); + sprintf(mdc, "%s-%s", lprof->lp_md, ll_instance); if (!osc) { CERROR("no osc\n"); @@ -1940,16 +2028,16 @@ llu_fsswop_mount(const char *source, ocd.ocd_version = LUSTRE_VERSION_CODE; /* setup mdc */ - err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, &ocd); + err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, &ocd); if (err) { CERROR("cannot connect to %s: rc = %d\n", mdc, err); GOTO(out_free, err); } - sbi->ll_mdc_exp = class_conn2export(&mdc_conn); + sbi->ll_md_exp = class_conn2export(&md_conn); err = obd_statfs(obd, &osfs, 100000000); if (err) - GOTO(out_mdc, err); + GOTO(out_md, err); /* * FIXME fill fs stat data into sbi here!!! FIXME @@ -1959,7 +2047,7 @@ llu_fsswop_mount(const char *source, obd = class_name2obd(osc); if (!obd) { CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, err = -EINVAL); + GOTO(out_md, err = -EINVAL); } obd_set_info_async(obd->obd_self_export, strlen("async"), "async", sizeof(async), &async, NULL); @@ -1970,39 +2058,40 @@ llu_fsswop_mount(const char *source, ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK; ocd.ocd_version = LUSTRE_VERSION_CODE; - err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, &ocd); + err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, &ocd); if (err) { CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, err); + GOTO(out_md, err); } - sbi->ll_osc_exp = class_conn2export(&osc_conn); + sbi->ll_dt_exp = class_conn2export(&dt_conn); sbi->ll_lco.lco_flags = ocd.ocd_connect_flags; - mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp); + llu_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp); - err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid); + err = md_getstatus(sbi->ll_md_exp, &rootfid, NULL); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_osc, err); + GOTO(out_dt, err); } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; + CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&rootfid)); + sbi->ll_root_fid = rootfid; /* fetch attr of root inode */ - err = mdc_getattr(sbi->ll_mdc_exp, &rootfid, - OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request); + err = md_getattr(sbi->ll_md_exp, &rootfid, NULL, + OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request); if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, err); + CERROR("md_getattr failed for root: rc = %d\n", err); + GOTO(out_dt, err); } - err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md); + err = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); if (err) { CERROR("failed to understand root inode md: rc = %d\n",err); GOTO(out_request, err); } - LASSERT(sbi->ll_rootino != 0); + LASSERT(fid_is_sane(&sbi->ll_root_fid)); root = llu_iget(fs, &md); if (!root || IS_ERR(root)) { @@ -2036,17 +2125,16 @@ out_inode: _sysio_i_gone(root); out_request: ptlrpc_req_finished(request); -out_osc: - obd_disconnect(sbi->ll_osc_exp); -out_mdc: - obd_disconnect(sbi->ll_mdc_exp); +out_dt: + obd_disconnect(sbi->ll_dt_exp); +out_md: + obd_disconnect(sbi->ll_md_exp); out_free: if (osc) OBD_FREE(osc, strlen(osc) + 1); if (mdc) OBD_FREE(mdc, strlen(mdc) + 1); OBD_FREE(sbi, sizeof(*sbi)); - liblustre_wait_idle(); return err; } diff --git a/lustre/liblustre/tests/Makefile.am b/lustre/liblustre/tests/Makefile.am index 75943ff..d1fd8cd 100644 --- a/lustre/liblustre/tests/Makefile.am +++ b/lustre/liblustre/tests/Makefile.am @@ -20,6 +20,10 @@ if MPITESTS noinst_PROGRAMS += test_lock_cancel endif # MPITESTS +liblustre_testdir=$(libdir)/lustre/liblustre/tests +liblustre_test_PROGRAMS = $(noinst_PROGRAMS) +liblustre_test_LIBRARIES = $(noinst_LIBRARIES) + endif # LIBLUSTRE_TESTS endif # LIBLUSTRE diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index 62a11de..db08cbd 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -277,6 +277,7 @@ int main(int argc, char **argv) if (liblustre_init_current(argv[0]) || init_obdclass() || init_lib_portals() || ptlrpc_init() || + lmv_init() || mdc_init() || lov_init() || osc_init() || diff --git a/lustre/liblustre/tests/recovery_small.c b/lustre/liblustre/tests/recovery_small.c index 6af93f1..79e950b 100644 --- a/lustre/liblustre/tests/recovery_small.c +++ b/lustre/liblustre/tests/recovery_small.c @@ -132,7 +132,7 @@ void cleanup_dir(const char *path) sprintf(cmd, \ "%s %s \"echo %lu > /proc/sys/lustre/fail_loc\"", \ ssh_cmd, mds_server, drop_arr[drop_index].code); \ - if (system(cmd)) { \ + if ((rc = system(cmd))) { \ printf("error excuting remote command: %d\n", rc); \ exit(rc); \ } \ diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c index 9835644..6822189 100644 --- a/lustre/liblustre/tests/sanity.c +++ b/lustre/liblustre/tests/sanity.c @@ -554,7 +554,7 @@ int t18b(char *name) LEAVE(); } -static int check_file_size(char *file, off_t size) +static int check_file_size(char *file, long long size) { struct stat statbuf; @@ -563,7 +563,7 @@ static int check_file_size(char *file, off_t size) return(1); } if (statbuf.st_size != size) { - printf("size of %s: %ld != %lld\n", file, statbuf.st_size, (unsigned long long )size); + printf("size of %s: %ld != %lld\n", file, statbuf.st_size, size); return(-1); } return 0; @@ -778,7 +778,7 @@ int t23(char *name) char path[MAX_PATH_LENGTH]; int fd; long long ret; - loff_t off; + long long off; ENTRY("handle seek > 2GB"); snprintf(path, MAX_PATH_LENGTH, "%s/f%s", lustre_path, name); @@ -827,7 +827,7 @@ int t23(char *name) ret = lseek(fd, -buf_size + 2, SEEK_CUR); if (ret != off) { printf("relative seek error for %d %llu != %llu\n", - -buf_size + 2, ret, (unsigned long long) off); + -buf_size + 2, ret, off); if (ret == -1) perror("relative seek"); return -1; @@ -853,7 +853,7 @@ int t23(char *name) off = 2048ULL * 1024 * 1024, SEEK_SET; ret = lseek(fd, off, SEEK_SET); if (ret != off) { - printf("seek 2GB error for %llu != %llu\n", ret, (unsigned long long) off); + printf("seek 2GB error for %llu != %llu\n", ret, off); if (ret == -1) perror("seek 2GB"); return -1; @@ -986,13 +986,13 @@ int t50b(char *name) loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191, 1024*1024*1024*1024ULL}; int i; - loff_t offset; + long long offset; ENTRY("4k un-aligned i/o sanity"); for (i = 0; i < sizeof(off_array)/sizeof(loff_t); i++) { offset = off_array[i]; printf("16 per xfer(total %d), offset %10lld...\t", - _npages, (unsigned long long) offset); + _npages, offset); if (pages_io(16, offset) != 0) return 1; } @@ -1012,7 +1012,7 @@ int t51(char *name) { char file[MAX_PATH_LENGTH] = ""; int fd; - off_t size; + long long size; int result; ENTRY("truncate() should truncate file to proper length"); @@ -1170,7 +1170,9 @@ int t54(char *name) } /* for O_DIRECTORY */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #define STRIPE_SIZE (2048 * 2048) #define STRIPE_OFFSET 0 @@ -1372,7 +1374,6 @@ int t56(char *name) LEAVE(); } - extern void __liblustre_setup_(void); extern void __liblustre_cleanup_(void); diff --git a/lustre/llite/Makefile.in b/lustre/llite/Makefile.in index dfa273b..8a622d3 100644 --- a/lustre/llite/Makefile.in +++ b/lustre/llite/Makefile.in @@ -1,10 +1,7 @@ MODULES := lustre -lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o - -ifeq ($(PATCHLEVEL),4) -lustre-objs += rw24.o super.o -else +lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o +lustre-objs += llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o +lustre-objs += xattr.o remote_perm.o llite_capa.o lustre-objs += rw26.o super25.o -endif @INCLUDE_RULES@ diff --git a/lustre/llite/autoMakefile.am b/lustre/llite/autoMakefile.am index 102674f..2683107 100644 --- a/lustre/llite/autoMakefile.am +++ b/lustre/llite/autoMakefile.am @@ -7,5 +7,5 @@ if MODULES modulefs_DATA = lustre$(KMODEXT) endif -DIST_SOURCES := $(lustre-objs:.o=.c) llite_internal.h rw24.c super.c rw26.c super25.c +DIST_SOURCES := $(lustre-objs:.o=.c) llite_internal.h rw26.c super25.c MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 6959df5..4cc8bf5 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -30,7 +30,9 @@ #include <lustre_lite.h> #include <lustre/lustre_idl.h> #include <lustre_dlm.h> -#include <linux/lustre_version.h> +#include <lustre_mdc.h> +//#include <lustre_ver.h> +//#include <lustre_version.h> #include "llite_internal.h" @@ -147,6 +149,7 @@ void ll_intent_release(struct lookup_intent *it) { ENTRY; + CDEBUG(D_INFO, "intent %p released\n", it); ll_intent_drop_lock(it); #ifdef LUSTRE_KERNEL_VERSION it->it_magic = 0; @@ -185,12 +188,12 @@ int ll_drop_dentry(struct dentry *dentry) spin_lock(&dcache_lock); return 1; } - /* disconected dentry can not be find without lookup, because we - * not need his to unhash or mark invalid. */ - if (dentry->d_flags & DCACHE_DISCONNECTED) { - unlock_dentry(dentry); - RETURN (0); - } + /* disconected dentry can not be find without lookup, because we + * not need his to unhash or mark invalid. */ + if (dentry->d_flags & DCACHE_DISCONNECTED) { + unlock_dentry(dentry); + RETURN (0); + } #ifdef LUSTRE_KERNEL_VERSION if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) { @@ -213,6 +216,9 @@ int ll_drop_dentry(struct dentry *dentry) list_add(&dentry->d_hash, &ll_i2sbi(dentry->d_inode)->ll_orphan_dentry_list); } +#else + if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode)) + __d_drop(dentry); #endif #else if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode)) @@ -244,6 +250,11 @@ restart: while ((tmp = tmp->next) != head) { struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); + CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p " + "inode %p flags %d\n", dentry->d_name.len, + dentry->d_name.name, dentry, dentry->d_parent, + dentry->d_inode, dentry->d_flags); + if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') { CERROR("called on root (?) dentry=%p, inode=%p " "ino=%lu\n", dentry, inode, inode->i_ino); @@ -269,8 +280,9 @@ restart: EXIT; } -int revalidate_it_finish(struct ptlrpc_request *request, int offset, - struct lookup_intent *it, struct dentry *de) +int ll_revalidate_it_finish(struct ptlrpc_request *request, + int offset, struct lookup_intent *it, + struct dentry *de) { int rc = 0; ENTRY; @@ -278,10 +290,10 @@ int revalidate_it_finish(struct ptlrpc_request *request, int offset, if (!request) RETURN(0); - if (it_disposition(it, DISP_LOOKUP_NEG)) + if (it_disposition(it, DISP_LOOKUP_NEG)) RETURN(-ENOENT); - rc = ll_prep_inode(ll_i2sbi(de->d_inode)->ll_osc_exp, &de->d_inode, + rc = ll_prep_inode(&de->d_inode, request, offset, NULL); RETURN(rc); @@ -294,9 +306,12 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry) if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) { struct inode *inode = dentry->d_inode; + struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); + CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", inode, inode->i_ino, inode->i_generation); - mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode); + md_set_lock_data(sbi->ll_md_exp, &it->d.lustre.it_lock_handle, + inode); } /* drop lookup or getattr locks immediately */ @@ -317,8 +332,9 @@ void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft) struct lookup_intent *it = *itp; #if defined(LUSTRE_KERNEL_VERSION)&&(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) if (it) { - LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n", - it->it_magic); + LASSERTF(it->it_magic == INTENT_MAGIC, + "%p has bad intent magic: %x\n", + it, it->it_magic); } #endif @@ -334,10 +350,11 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, struct lookup_intent *it) { int rc; - struct mdc_op_data op_data; + struct md_op_data *op_data; struct ptlrpc_request *req = NULL; struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; struct obd_export *exp; + struct inode *parent; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name, @@ -363,7 +380,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, RETURN(rc); } - exp = ll_i2mdcexp(de->d_inode); + exp = ll_i2mdexp(de->d_inode); /* Never execute intents for mount points. * Attributes will be fixed up in ll_inode_revalidate_it */ @@ -379,21 +396,36 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, ll_frob_intent(&it, &lookup_it); LASSERT(it); - ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, de->d_inode, - de->d_name.name, de->d_name.len, 0); + parent = de->d_parent->d_inode; + + if (it->it_op & IT_CREAT) { + op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name, + de->d_name.len, 0, LUSTRE_OPC_CREATE); + } else { + op_data = ll_prep_md_op_data(NULL, parent, de->d_inode, + de->d_name.name, de->d_name.len, + 0, LUSTRE_OPC_ANY); + } + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + if ((it->it_op == IT_OPEN) && de->d_inode) { struct inode *inode = de->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct obd_client_handle **och_p; __u64 *och_usecount; - /* We used to check for MDS_INODELOCK_OPEN here, but in fact + + /* + * We used to check for MDS_INODELOCK_OPEN here, but in fact * just having LOOKUP lock is enough to justify inode is the * same. And if inode is the same and we have suitable * openhandle, then there is no point in doing another OPEN RPC - * just to throw away newly received openhandle. - * There are no security implications too, if file owner or - * access mode is change, LOOKUP lock is revoked */ + * just to throw away newly received openhandle. There are no + * security implications too, if file owner or access mode is + * change, LOOKUP lock is revoked. + */ + if (it->it_flags & FMODE_WRITE) { och_p = &lli->lli_mds_write_och; @@ -401,7 +433,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, } else if (it->it_flags & FMODE_EXEC) { och_p = &lli->lli_mds_exec_och; och_usecount = &lli->lli_open_fd_exec_count; - } else { + } else { och_p = &lli->lli_mds_read_och; och_usecount = &lli->lli_open_fd_read_count; } @@ -411,7 +443,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, down(&lli->lli_och_sem); if (*och_p) { /* Everything is open already, do nothing */ /*(*och_usecount)++; Do not let them steal our open - handle from under us */ + handle from under us */ /* XXX The code above was my original idea, but in case we have the handle, but we cannot use it due to later checks (e.g. O_CREAT|O_EXCL flags set), nobody @@ -420,6 +452,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, if it would be, we'll reopen the open request to MDS later during file open path */ up(&lli->lli_och_sem); + ll_finish_md_op_data(op_data); RETURN(1); } else { up(&lli->lli_och_sem); @@ -428,10 +461,13 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, do_lock: it->it_create_mode &= ~current->fs->umask; - - rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags, - &req, ll_mdc_blocking_ast, 0); - /* If req is NULL, then mdc_intent_lock only tried to do a lock match; + it->it_flags |= O_CHECK_STALE; + rc = md_intent_lock(exp, op_data, NULL, 0, it, + lookup_flags, + &req, ll_md_blocking_ast, 0); + it->it_flags &= ~O_CHECK_STALE; + ll_finish_md_op_data(op_data); + /* If req is NULL, then md_intent_lock only tried to do a lock match; * if all was well, it will return 1 if it found locks, 0 otherwise. */ if (req == NULL && rc >= 0) { if (!rc) @@ -448,11 +484,13 @@ do_lock: } revalidate_finish: - rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, it, de); + rc = ll_revalidate_it_finish(req, DLM_REPLY_REC_OFF, it, de); if (rc != 0) { - ll_intent_release(it); + if (rc != -ESTALE && rc != -ENOENT) + ll_intent_release(it); GOTO(out, rc = 0); } + if ((it->it_op & IT_OPEN) && de->d_inode && !S_ISREG(de->d_inode->i_mode) && !S_ISDIR(de->d_inode->i_mode)) { @@ -469,7 +507,7 @@ revalidate_finish: __d_rehash(de, 0); spin_unlock(&dcache_lock); - out: +out: /* We do not free request as it may be reused during following lookup * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will * be freed in ll_lookup_it or in ll_intent_release. But if @@ -480,16 +518,16 @@ revalidate_finish: #ifdef LUSTRE_KERNEL_VERSION ll_unhash_aliases(de->d_inode); /* done in ll_unhash_aliases() - dentry->d_flags |= DCACHE_LUSTRE_INVALID; */ + dentry->d_flags |= DCACHE_LUSTRE_INVALID; */ #else /* We do not want d_invalidate to kill all child dentries too */ d_drop(de); #endif } else { CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p " - "inode %p refc %d\n", de->d_name.len, - de->d_name.name, de, de->d_parent, de->d_inode, - atomic_read(&de->d_count)); + "inode %p refc %d\n", de->d_name.len, + de->d_name.name, de, de->d_parent, de->d_inode, + atomic_read(&de->d_count)); ll_lookup_finish_locks(it, de); #ifdef LUSTRE_KERNEL_VERSION lock_dentry(de); @@ -498,38 +536,59 @@ revalidate_finish: #endif } RETURN(rc); -/* This part is here to combat evil-evil race in real_lookup on 2.6 kernels. - * The race details are: We enter do_lookup() looking for some name, - * there is nothing in dcache for this name yet and d_lookup() returns NULL. - * We proceed to real_lookup(), and while we do this, another process does - * open on the same file we looking up (most simple reproducer), open succeeds - * and the dentry is added. Now back to us. In real_lookup() we do d_lookup() - * again and suddenly find the dentry, so we call d_revalidate on it, but there - * is no lock, so without this code we would return 0, but unpatched - * real_lookup just returns -ENOENT in such a case instead of retrying the - * lookup. Once this is dealt with in real_lookup(), all of this ugly mess - * can go and we can just check locks in ->d_revalidate without doing any - * RPCs ever. */ + + /* + * This part is here to combat evil-evil race in real_lookup on 2.6 + * kernels. The race details are: We enter do_lookup() looking for some + * name, there is nothing in dcache for this name yet and d_lookup() + * returns NULL. We proceed to real_lookup(), and while we do this, + * another process does open on the same file we looking up (most simple + * reproducer), open succeeds and the dentry is added. Now back to + * us. In real_lookup() we do d_lookup() again and suddenly find the + * dentry, so we call d_revalidate on it, but there is no lock, so + * without this code we would return 0, but unpatched real_lookup just + * returns -ENOENT in such a case instead of retrying the lookup. Once + * this is dealt with in real_lookup(), all of this ugly mess can go and + * we can just check locks in ->d_revalidate without doing any RPCs + * ever. + */ do_lookup: if (it != &lookup_it) { + /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */ + if (it->it_op == IT_GETATTR) + lookup_it.it_op = IT_GETATTR; ll_lookup_finish_locks(it, de); it = &lookup_it; } - /*do real lookup here */ - ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, NULL, - de->d_name.name, de->d_name.len, 0); - rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, 0, &req, - ll_mdc_blocking_ast, 0); + + /* Do real lookup here. */ + op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name, + de->d_name.len, 0, (it->it_op & IT_CREAT ? + LUSTRE_OPC_CREATE : + LUSTRE_OPC_ANY)); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + + rc = md_intent_lock(exp, op_data, NULL, 0, it, 0, &req, + ll_md_blocking_ast, 0); if (rc >= 0) { - struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg, + struct mdt_body *mdt_body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, - sizeof(*mds_body)); + sizeof(*mdt_body)); + struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0}; + + if (de->d_inode) + fid = *ll_inode2fid(de->d_inode); + /* see if we got same inode, if not - return error */ - if(!memcmp(&op_data.fid2, &mds_body->fid1, - sizeof(op_data.fid2))) + if (lu_fid_eq(&fid, &mdt_body->fid1)) { + ll_finish_md_op_data(op_data); + op_data = NULL; goto revalidate_finish; + } ll_intent_release(it); } + ll_finish_md_op_data(op_data); GOTO(out, rc = 0); } @@ -539,6 +598,7 @@ do_lookup: struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_dentry_data *ldd = ll_d2d(de); struct obd_client_handle *handle; + struct obd_capa *oc; int rc = 0; ENTRY; LASSERT(ldd); @@ -562,9 +622,9 @@ do_lookup: unlock_kernel(); handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och; - rc = obd_pin(sbi->ll_mdc_exp, inode->i_ino, inode->i_generation, - inode->i_mode & S_IFMT, handle, flag); - + oc = ll_mdscapa_get(inode); + rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag); + capa_put(oc); if (rc) { lock_kernel(); memset(handle, 0, sizeof(*handle)); @@ -612,7 +672,7 @@ do_lookup: return; } - rc = obd_unpin(sbi->ll_mdc_exp, &handle, flag); + rc = obd_unpin(sbi->ll_md_exp, &handle, flag); EXIT; return; } diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index df73032..701398e 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -47,40 +47,131 @@ #include <lustre/lustre_idl.h> #include <lustre_lite.h> #include <lustre_dlm.h> +#include <lustre_fid.h> #include "llite_internal.h" -typedef struct ext2_dir_entry_2 ext2_dirent; +#ifdef HAVE_PG_FS_MISC +#define PageChecked(page) test_bit(PG_fs_misc, &(page)->flags) +#define SetPageChecked(page) set_bit(PG_fs_misc, &(page)->flags) +#endif -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) +/* + * (new) readdir implementation overview. + * + * Original lustre readdir implementation cached exact copy of raw directory + * pages on the client. These pages were indexed in client page cache by + * logical offset in the directory file. This design, while very simple and + * intuitive had some inherent problems: + * + * . it implies that byte offset to the directory entry serves as a + * telldir(3)/seekdir(3) cookie, but that offset is not stable: in + * ext3/htree directory entries may move due to splits, and more + * importantly, + * + * . it is incompatible with the design of split directories for cmd3, + * that assumes that names are distributed across nodes based on their + * hash, and so readdir should be done in hash order. + * + * New readdir implementation does readdir in hash order, and uses hash of a + * file name as a telldir/seekdir cookie. This led to number of complications: + * + * . hash is not unique, so it cannot be used to index cached directory + * pages on the client (note, that it requires a whole pageful of hash + * collided entries to cause two pages to have identical hashes); + * + * . hash is not unique, so it cannot, strictly speaking, be used as an + * entry cookie. ext3/htree has the same problem and lustre implementation + * mimics their solution: seekdir(hash) positions directory at the first + * entry with the given hash. + * + * Client side. + * + * 0. caching + * + * Client caches directory pages using hash of the first entry as an index. As + * noted above hash is not unique, so this solution doesn't work as is: + * special processing is needed for "page hash chains" (i.e., sequences of + * pages filled with entries all having the same hash value). + * + * First, such chains have to be detected. To this end, server returns to the + * client the hash of the first entry on the page next to one returned. When + * client detects that this hash is the same as hash of the first entry on the + * returned page, page hash collision has to be handled. Pages in the + * hash chain, except first one, are termed "overflow pages". + * + * Solution to index uniqueness problem is to not cache overflow + * pages. Instead, when page hash collision is detected, all overflow pages + * from emerging chain are immediately requested from the server and placed in + * a special data structure (struct ll_dir_chain). This data structure is used + * by ll_readdir() to process entries from overflow pages. When readdir + * invocation finishes, overflow pages are discarded. If page hash collision + * chain weren't completely processed, next call to readdir will again detect + * page hash collision, again read overflow pages in, process next portion of + * entries and again discard the pages. This is not as wasteful as it looks, + * because, given reasonable hash, page hash collisions are extremely rare. + * + * 1. directory positioning + * + * When seekdir(hash) is called, original + * + * + * + * + * + * + * + * + * Server. + * + * identification of and access to overflow pages + * + * page format + * + * + * + * + * + */ + +static __u32 hash_x_index(__u32 value) +{ + return ((__u32)~0) - value; +} +#ifdef HAVE_PG_FS_MISC +#define PageChecked(page) test_bit(PG_fs_misc, &(page)->flags) +#define SetPageChecked(page) set_bit(PG_fs_misc, &(page)->flags) +#endif /* returns the page unlocked, but with a reference */ static int ll_dir_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - struct ll_fid mdc_fid; - __u64 offset; struct ptlrpc_request *request; - struct mds_body *body; - int rc = 0; + struct mdt_body *body; + struct obd_capa *oc; + __u64 hash; + int rc; ENTRY; - offset = (__u64)page->index << CFS_PAGE_SHIFT; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off "LPU64"\n", - inode->i_ino, inode->i_generation, inode, offset); - - mdc_pack_fid(&mdc_fid, inode->i_ino, inode->i_generation, S_IFDIR); + hash = hash_x_index(page->index); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off %lu\n", + inode->i_ino, inode->i_generation, inode, (unsigned long)hash); - rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &mdc_fid, - offset, page, &request); + oc = ll_mdscapa_get(inode); + rc = md_readpage(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), + oc, hash, page, &request); + capa_put(oc); if (!rc) { body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); - LASSERT(body != NULL); /* checked by mdc_readpage() */ - /* swabbed by mdc_readpage() */ + /* Checked by mdc_readpage() */ + LASSERT(body != NULL); + + /* Swabbed by mdc_readpage() */ LASSERT_REPSWABBED(request, REPLY_REC_OFF); - inode->i_size = body->size; + if (body->valid & OBD_MD_FLSIZE) + inode->i_size = body->size; SetPageUptodate(page); } ptlrpc_req_finished(request); @@ -94,139 +185,133 @@ struct address_space_operations ll_dir_aops = { .readpage = ll_dir_readpage, }; -/* - * ext2 uses block-sized chunks. Arguably, sector-sized ones would be - * more robust, but we have what we have - */ -static inline unsigned ext2_chunk_size(struct inode *inode) +static inline unsigned long dir_pages(struct inode *inode) +{ + return (inode->i_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; +} + +static inline unsigned ll_chunk_size(struct inode *inode) { return inode->i_sb->s_blocksize; } -static inline void ext2_put_page(struct page *page) +static void ll_check_page(struct inode *dir, struct page *page) +{ + /* XXX: check page format later */ + SetPageChecked(page); +} + +static inline void ll_put_page(struct page *page) { kunmap(page); page_cache_release(page); } -static inline unsigned long dir_pages(struct inode *inode) +/* + * Find, kmap and return page that contains given hash. + */ +static struct page *ll_dir_page_locate(struct inode *dir, unsigned long hash, + __u32 *start, __u32 *end) { - return (inode->i_size+CFS_PAGE_SIZE-1) >> CFS_PAGE_SHIFT; + struct address_space *mapping = dir->i_mapping; + /* + * Complement of hash is used as an index so that + * radix_tree_gang_lookup() can be used to find a page with starting + * hash _smaller_ than one we are looking for. + */ + unsigned long offset = hash_x_index(hash); + struct page *page; + int found; + + spin_lock_irq(&mapping->tree_lock); + found = radix_tree_gang_lookup(&mapping->page_tree, + (void **)&page, offset, 1); + if (found > 0) { + struct lu_dirpage *dp; + + page_cache_get(page); + spin_unlock_irq(&mapping->tree_lock); + /* + * In contrast to find_lock_page() we are sure that directory + * page cannot be truncated (while DLM lock is held) and, + * hence, can avoid restart. + * + * In fact, page cannot be locked here at all, because + * ll_dir_readpage() does synchronous io. + */ + wait_on_page(page); + if (PageUptodate(page)) { + dp = kmap(page); + *start = le32_to_cpu(dp->ldp_hash_start); + *end = le32_to_cpu(dp->ldp_hash_end); + LASSERT(*start <= hash); + if (hash > *end || (*end != *start && hash == *end)) { + kunmap(page); + lock_page(page); + ll_truncate_complete_page(page); + unlock_page(page); + page_cache_release(page); + page = NULL; + } + } else { + page_cache_release(page); + page = ERR_PTR(-EIO); + } + + } else { + spin_unlock_irq(&mapping->tree_lock); + page = NULL; + } + return page; } +/* + * Chain of hash overflow pages. + */ +struct ll_dir_chain { + /* XXX something. Later */ +}; -static void ext2_check_page(struct inode *dir, struct page *page) +static void ll_dir_chain_init(struct ll_dir_chain *chain) { - unsigned chunk_size = ext2_chunk_size(dir); - char *kaddr = page_address(page); - // u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count); - unsigned rec_len; - __u64 offs, limit = CFS_PAGE_SIZE; - ext2_dirent *p; - char *error; - - if ((dir->i_size >> CFS_PAGE_SHIFT) == (__u64)page->index) { - limit = dir->i_size & ~CFS_PAGE_MASK; - if (limit & (chunk_size - 1)) { - CERROR("limit "LPU64" dir size %lld index "LPU64"\n", - limit, dir->i_size, (__u64)page->index); - goto Ebadsize; - } - for (offs = limit; offs < CFS_PAGE_SIZE; offs += chunk_size) { - ext2_dirent *p = (ext2_dirent*)(kaddr + offs); - p->rec_len = cpu_to_le16(chunk_size); - p->name_len = 0; - p->inode = 0; - } - if (!limit) - goto out; - } - for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { - p = (ext2_dirent *)(kaddr + offs); - rec_len = le16_to_cpu(p->rec_len); - - if (rec_len < EXT2_DIR_REC_LEN(1)) - goto Eshort; - if (rec_len & 3) - goto Ealign; - if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) - goto Enamelen; - if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) - goto Espan; - // if (le32_to_cpu(p->inode) > max_inumber) - //goto Einumber; - } - if (offs != limit) - goto Eend; -out: - SetPageChecked(page); - return; - - /* Too bad, we had an error */ - -Ebadsize: - CERROR("%s: directory %lu/%u size %llu is not a multiple of %u\n", - ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino, - dir->i_generation, dir->i_size, chunk_size); - goto fail; -Eshort: - error = "rec_len is smaller than minimal"; - goto bad_entry; -Ealign: - error = "unaligned directory entry"; - goto bad_entry; -Enamelen: - error = "rec_len is too small for name_len"; - goto bad_entry; -Espan: - error = "directory entry across blocks"; - goto bad_entry; - //Einumber: - // error = "inode out of bounds"; -bad_entry: - CERROR("%s: bad entry in directory %lu/%u: %s - " - "offset="LPU64"+"LPU64", inode=%lu, rec_len=%d, name_len=%d\n", - ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino, - dir->i_generation, error, (__u64)page->index << CFS_PAGE_SHIFT, - offs, (unsigned long)le32_to_cpu(p->inode), - rec_len, p->name_len); - goto fail; -Eend: - p = (ext2_dirent *)(kaddr + offs); - CERROR("%s: entry in directory %lu/%u spans the page boundary " - "offset="LPU64"+"LPU64", inode=%lu\n",ll_i2mdcexp(dir)->exp_obd->obd_name, - dir->i_ino, dir->i_generation, - (__u64)page->index << CFS_PAGE_SHIFT, - offs, (unsigned long)le32_to_cpu(p->inode)); -fail: - SetPageChecked(page); - SetPageError(page); } -static struct page *ll_get_dir_page(struct inode *dir, unsigned long n) +static void ll_dir_chain_fini(struct ll_dir_chain *chain) { - struct ldlm_res_id res_id = - { .name = { dir->i_ino, (__u64)dir->i_generation} }; - struct lustre_handle lockh; - struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp); +} + +static struct page *ll_get_dir_page(struct inode *dir, __u32 hash, int exact, + struct ll_dir_chain *chain) +{ + ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} }; struct address_space *mapping = dir->i_mapping; + struct lustre_handle lockh; + struct lu_dirpage *dp; struct page *page; - ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} }; + ldlm_mode_t mode; int rc; + __u32 start; + __u32 end; - rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED, - &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh); + mode = LCK_PR; + rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED, + ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh); if (!rc) { struct lookup_intent it = { .it_op = IT_READDIR }; struct ptlrpc_request *request; - struct mdc_op_data data; + struct md_op_data *op_data; - ll_prepare_mdc_op_data(&data, dir, NULL, NULL, 0, 0); + op_data = ll_prep_md_op_data(NULL, dir, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + return (void *)op_data; - rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, LDLM_IBITS, &it, - LCK_CR, &data, &lockh, NULL, 0, - ldlm_completion_ast, ll_mdc_blocking_ast, dir, - 0); + rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, LDLM_IBITS, &it, + mode, op_data, &lockh, NULL, 0, + ldlm_completion_ast, ll_md_blocking_ast, dir, + 0); + + ll_finish_md_op_data(op_data); request = (struct ptlrpc_request *)it.d.lustre.it_data; if (request) @@ -235,10 +320,50 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n) CERROR("lock enqueue: rc: %d\n", rc); return ERR_PTR(rc); } + } else { + /* for cross-ref object, l_ast_data of the lock may not be set, + * we reset it here */ + md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie, dir); } ldlm_lock_dump_handle(D_OTHER, &lockh); - page = read_cache_page(mapping, n, + page = ll_dir_page_locate(dir, hash, &start, &end); + if (IS_ERR(page)) + GOTO(out_unlock, page); + + if (page != NULL) { + /* + * XXX nikita: not entirely correct handling of a corner case: + * suppose hash chain of entries with hash value HASH crosses + * border between pages P0 and P1. First both P0 and P1 are + * cached, seekdir() is called for some entry from the P0 part + * of the chain. Later P0 goes out of cache. telldir(HASH) + * happens and finds P1, as it starts with matching hash + * value. Remaining entries from P0 part of the chain are + * skipped. (Is that really a bug?) + * + * Possible solutions: 0. don't cache P1 is such case, handle + * it as an "overflow" page. 1. invalidate all pages at + * once. 2. use HASH|1 as an index for P1. + */ + if (exact && hash != start) { + /* + * readdir asked for a page starting _exactly_ from + * given hash, but cache contains stale page, with + * entries with smaller hash values. Stale page should + * be invalidated, and new one fetched. + */ + CWARN("Stale readpage page %p: %#lx != %#lx\n", page, + (unsigned long)hash, (unsigned long)start); + lock_page(page); + ll_truncate_complete_page(page); + unlock_page(page); + page_cache_release(page); + } else + GOTO(hash_collision, page); + } + + page = read_cache_page(mapping, hash_x_index(hash), (filler_t*)mapping->a_ops->readpage, NULL); if (IS_ERR(page)) GOTO(out_unlock, page); @@ -248,125 +373,148 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n) if (!PageUptodate(page)) goto fail; if (!PageChecked(page)) - ext2_check_page(dir, page); + ll_check_page(dir, page); if (PageError(page)) goto fail; - +hash_collision: + dp = page_address(page); + + start = le32_to_cpu(dp->ldp_hash_start); + end = le32_to_cpu(dp->ldp_hash_end); + if (end == start) { + LASSERT(start == hash); + CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end); + /* + * Fetch whole overflow chain... + * + * XXX not yet. + */ + goto fail; + } out_unlock: - ldlm_lock_decref(&lockh, LCK_CR); + ldlm_lock_decref(&lockh, mode); return page; fail: - ext2_put_page(page); + ll_put_page(page); page = ERR_PTR(-EIO); goto out_unlock; } -/* - * p is at least 6 bytes before the end of page - */ -static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) +int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) { - return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); -} - -static inline unsigned -ext2_validate_entry(char *base, unsigned offset, unsigned mask) -{ - ext2_dirent *de = (ext2_dirent*)(base + offset); - ext2_dirent *p = (ext2_dirent*)(base + (offset&mask)); - while ((char*)p < (char*)de) - p = ext2_next_entry(p); - return (char *)p - base; -} - -static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { - [EXT2_FT_UNKNOWN] DT_UNKNOWN, - [EXT2_FT_REG_FILE] DT_REG, - [EXT2_FT_DIR] DT_DIR, - [EXT2_FT_CHRDEV] DT_CHR, - [EXT2_FT_BLKDEV] DT_BLK, - [EXT2_FT_FIFO] DT_FIFO, - [EXT2_FT_SOCK] DT_SOCK, - [EXT2_FT_SYMLINK] DT_LNK, -}; - - -int ll_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - struct inode *inode = filp->f_dentry->d_inode; - loff_t pos = filp->f_pos; - // XXX struct super_block *sb = inode->i_sb; - __u64 offset = pos & ~CFS_PAGE_MASK; - __u64 n = pos >> CFS_PAGE_SHIFT; - unsigned long npages = dir_pages(inode); - unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); - unsigned char *types = ext2_filetype_table; - int need_revalidate = (filp->f_version != inode->i_version); - int rc = 0; + struct inode *inode = filp->f_dentry->d_inode; + struct ll_inode_info *info = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + __u32 pos = filp->f_pos; + struct page *page; + struct ll_dir_chain chain; + int rc; + int done; + int shift; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %llu/%llu\n", - inode->i_ino, inode->i_generation, inode, pos, inode->i_size); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu\n", + inode->i_ino, inode->i_generation, inode, + (unsigned long)pos, inode->i_size); - if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) + if (pos == DIR_END_OFF) + /* + * end-of-file. + */ RETURN(0); - for ( ; n < npages; n++, offset = 0) { - char *kaddr, *limit; - ext2_dirent *de; - struct page *page; - - CDEBUG(D_EXT2,"read %lu of dir %lu/%u page "LPU64"/%lu " - "size %llu\n", - CFS_PAGE_SIZE, inode->i_ino, inode->i_generation, - n, npages, inode->i_size); - page = ll_get_dir_page(inode, n); - - /* size might have been updated by mdc_readpage */ - npages = dir_pages(inode); - - if (IS_ERR(page)) { - rc = PTR_ERR(page); - CERROR("error reading dir %lu/%u page "LPU64": rc %d\n", - inode->i_ino, inode->i_generation, n, rc); - continue; - } - - kaddr = page_address(page); - if (need_revalidate) { - /* page already checked from ll_get_dir_page() */ - offset = ext2_validate_entry(kaddr, offset, chunk_mask); - need_revalidate = 0; - } - de = (ext2_dirent *)(kaddr+offset); - limit = kaddr + CFS_PAGE_SIZE - EXT2_DIR_REC_LEN(1); - for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { - if (de->inode) { - int over; - - rc = 0; /* no error if we return something */ - - offset = (char *)de - kaddr; - over = filldir(dirent, de->name, de->name_len, - (n << CFS_PAGE_SHIFT) | offset, - le32_to_cpu(de->inode), - types[de->file_type & - (EXT2_FT_MAX - 1)]); - if (over) { - ext2_put_page(page); - GOTO(done, rc); - } + rc = 0; + done = 0; + shift = 0; + ll_dir_chain_init(&chain); + + page = ll_get_dir_page(inode, pos, 0, &chain); + + while (rc == 0 && !done) { + struct lu_dirpage *dp; + struct lu_dirent *ent; + + if (!IS_ERR(page)) { + /* + * If page is empty (end of directoryis reached), + * use this value. + */ + __u32 hash = DIR_END_OFF; + __u32 next; + + dp = page_address(page); + for (ent = lu_dirent_start(dp); ent != NULL && !done; + ent = lu_dirent_next(ent)) { + char *name; + int namelen; + struct lu_fid fid; + ino_t ino; + + /* + * XXX: implement correct swabbing here. + */ + + hash = le32_to_cpu(ent->lde_hash); + namelen = le16_to_cpu(ent->lde_namelen); + + if (hash < pos) + /* + * Skip until we find target hash + * value. + */ + continue; + + if (namelen == 0) + /* + * Skip dummy record. + */ + continue; + + fid = ent->lde_fid; + name = ent->lde_name; + fid_le_to_cpu(&fid, &fid); + ino = ll_fid_build_ino(sbi, &fid); + + done = filldir(cookie, name, namelen, + (loff_t)hash, ino, DT_UNKNOWN); } + next = le32_to_cpu(dp->ldp_hash_end); + ll_put_page(page); + if (!done) { + pos = next; + if (pos == DIR_END_OFF) + /* + * End of directory reached. + */ + done = 1; + else if (1 /* chain is exhausted*/) + /* + * Normal case: continue to the next + * page. + */ + page = ll_get_dir_page(inode, pos, 1, + &chain); + else { + /* + * go into overflow page. + */ + } + } else + pos = hash; + } else { + rc = PTR_ERR(page); + CERROR("error reading dir "DFID" at %lu: rc %d\n", + PFID(&info->lli_fid), (unsigned long)pos, rc); } - ext2_put_page(page); } -done: - filp->f_pos = (n << CFS_PAGE_SHIFT) | offset; + filp->f_pos = (loff_t)(__s32)pos; filp->f_version = inode->i_version; touch_atime(filp->f_vfsmnt, filp->f_dentry); + ll_dir_chain_fini(&chain); + RETURN(rc); } @@ -383,10 +531,8 @@ do { \ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump) { struct ll_sb_info *sbi = ll_i2sbi(inode); - struct mdc_op_data data; + struct md_op_data *op_data; struct ptlrpc_request *req = NULL; - - struct iattr attr = { 0 }; int rc = 0; /* @@ -400,19 +546,20 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump) if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC)) lustre_swab_lov_user_md(lump); - ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0); + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); /* swabbing is done in lov_setstripe() on server side */ - rc = mdc_setattr(sbi->ll_mdc_exp, &data, - &attr, lump, sizeof(*lump), NULL, 0, &req); + rc = md_setattr(sbi->ll_md_exp, op_data, lump, sizeof(*lump), + NULL, 0, &req); + ll_finish_md_op_data(op_data); + ptlrpc_req_finished(req); if (rc) { - ptlrpc_req_finished(req); if (rc != -EPERM && rc != -EACCES) CERROR("mdc_setattr fails: rc = %d\n", rc); - return rc; } - ptlrpc_req_finished(req); - return rc; } @@ -421,30 +568,30 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, int *lmm_size, struct ptlrpc_request **request) { struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_fid fid; - struct mds_body *body; + struct mdt_body *body; struct lov_mds_md *lmm = NULL; struct ptlrpc_request *req = NULL; int rc, lmmsize; - - ll_inode2fid(&fid, inode); - + struct obd_capa *oc; + rc = ll_get_max_mdsize(sbi, &lmmsize); if (rc) RETURN(rc); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, - OBD_MD_FLEASIZE|OBD_MD_FLDIREA, + oc = ll_mdscapa_get(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), + oc, OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize, &req); + capa_put(oc); if (rc < 0) { - CDEBUG(D_INFO, "mdc_getattr failed on inode " + CDEBUG(D_INFO, "md_getattr failed on inode " "%lu/%u: rc %d\n", inode->i_ino, inode->i_generation, rc); GOTO(out, rc); } - body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*body)); - LASSERT(body != NULL); /* checked by mdc_getattr_name */ + + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); /* checked by md_getattr_name */ /* swabbed by mdc_getattr_name */ LASSERT_REPSWABBED(req, REPLY_REC_OFF); @@ -489,7 +636,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ return -ENOTTY; - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); switch(cmd) { case EXT3_IOC_GETFLAGS: case EXT3_IOC_SETFLAGS: @@ -505,10 +652,10 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, */ case IOC_MDC_LOOKUP: { struct ptlrpc_request *request = NULL; - struct ll_fid fid; + int namelen, rc, len = 0; char *buf = NULL; char *filename; - int namelen, rc, len = 0; + struct obd_capa *oc; rc = obd_ioctl_getdata(&buf, &len, (void *)arg); if (rc) @@ -523,11 +670,13 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, GOTO(out, rc = -EINVAL); } - ll_inode2fid(&fid, inode); - rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename, namelen, - OBD_MD_FLID, 0, &request); + oc = ll_mdscapa_get(inode); + rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode), oc, + filename, namelen, OBD_MD_FLID, 0, + &request); + capa_put(oc); if (rc < 0) { - CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc); + CDEBUG(D_INFO, "md_getattr_name: %d\n", rc); GOTO(out, rc); } @@ -547,11 +696,11 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, sizeof(lump->lmm_objects[0])); rc = copy_from_user(&lum, lump, sizeof(lum)); if (rc) - return(-EFAULT); + RETURN(-EFAULT); rc = ll_dir_setstripe(inode, &lum); - return rc; + RETURN(rc); } case LL_IOC_OBD_STATFS: RETURN(ll_obd_statfs(inode, (void *)arg)); @@ -560,9 +709,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, case IOC_MDC_GETFILEINFO: case IOC_MDC_GETFILESTRIPE: { struct ptlrpc_request *request = NULL; - struct mds_body *body; struct lov_user_md *lump; struct lov_mds_md *lmm = NULL; + struct mdt_body *body; char *filename = NULL; int rc, lmmsize; @@ -579,10 +728,10 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, } if (request) { - body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, - sizeof(*body)); - LASSERT(body != NULL); /* checked by mdc_getattr_name */ - /* swabbed by mdc_getattr_name */ + body = lustre_msg_buf(request->rq_repmsg, + REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); /* checked by md_getattr_name */ + /* swabbed by md_getattr_name */ LASSERT_REPSWABBED(request, REPLY_REC_OFF); } else { GOTO(out_req, rc); @@ -624,7 +773,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, st.st_atime = body->atime; st.st_mtime = body->mtime; st.st_ctime = body->ctime; - st.st_ino = body->ino; + st.st_ino = inode->i_ino; lmdp = (struct lov_user_mds_data *)arg; rc = copy_to_user(&lmdp->lmd_st, &st, sizeof(st)); @@ -663,11 +812,11 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, if (rc) GOTO(free_lmm, rc = -EFAULT); - rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize); + rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize); if (rc < 0) GOTO(free_lmm, rc = -ENOMEM); - rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm); + rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm); if (rc) GOTO(free_lsm, rc); @@ -684,7 +833,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, EXIT; free_lsm: - obd_free_memmd(sbi->ll_osc_exp, &lsm); + obd_free_memmd(sbi->ll_dt_exp, &lsm); free_lmm: OBD_FREE(lmm, lmmsize); return rc; @@ -727,10 +876,11 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, ptlrpc_req_set_repsize(req, 2, size); rc = ptlrpc_queue_wait(req); - str = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF, - data->ioc_plen1); - if (!rc) - rc = copy_to_user(data->ioc_pbuf1, str,data->ioc_plen1); + if (!rc) { + str = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF, + data->ioc_plen1); + rc = copy_to_user(data->ioc_pbuf1, str, data->ioc_plen1); + } ptlrpc_req_finished(req); out_catinfo: obd_ioctl_freedata(buf, len); @@ -747,15 +897,15 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, if (!oqctl) RETURN(-ENOMEM); oqctl->qc_type = arg; - rc = obd_quotacheck(sbi->ll_mdc_exp, oqctl); + rc = obd_quotacheck(sbi->ll_md_exp, oqctl); if (rc < 0) { - CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc); + CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc); error = rc; } - rc = obd_quotacheck(sbi->ll_osc_exp, oqctl); + rc = obd_quotacheck(sbi->ll_dt_exp, oqctl); if (rc < 0) - CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc); + CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc); OBD_FREE_PTR(oqctl); return error ?: rc; @@ -771,7 +921,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, if (!check) RETURN(-ENOMEM); - rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)check, + rc = obd_iocontrol(cmd, sbi->ll_md_exp, 0, (void *)check, NULL); if (rc) { CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc); @@ -780,7 +930,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, GOTO(out_poll, rc); } - rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)check, + rc = obd_iocontrol(cmd, sbi->ll_dt_exp, 0, (void *)check, NULL); if (rc) { CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc); @@ -831,14 +981,14 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, /* XXX: dqb_valid is borrowed as a flag to mark that * only mds quota is wanted */ if (qctl->qc_dqblk.dqb_valid) - qctl->obd_uuid = sbi->ll_mdc_exp->exp_obd-> + qctl->obd_uuid = sbi->ll_md_exp->exp_obd-> u.cli.cl_target_uuid; break; case Q_GETINFO: break; default: CERROR("unsupported quotactl op: %#x\n", cmd); - GOTO(out_quotactl, -ENOTTY); + GOTO(out_quotactl, rc = -ENOTTY); } QCTL_COPY(oqctl, qctl); @@ -848,7 +998,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, struct obd_uuid *uuid = &qctl->obd_uuid; obd = class_find_client_notype(uuid, - &sbi->ll_osc_exp->exp_obd->obd_uuid); + &sbi->ll_dt_exp->exp_obd->obd_uuid); if (!obd) GOTO(out_quotactl, rc = -ENOENT); @@ -859,12 +1009,12 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, else GOTO(out_quotactl, rc = -EINVAL); - if (sbi->ll_mdc_exp->exp_obd == obd) { - rc = obd_quotactl(sbi->ll_mdc_exp, oqctl); + if (sbi->ll_md_exp->exp_obd == obd) { + rc = obd_quotactl(sbi->ll_md_exp, oqctl); } else { int i; struct obd_export *exp; - struct lov_obd *lov = &sbi->ll_osc_exp-> + struct lov_obd *lov = &sbi->ll_dt_exp-> exp_obd->u.lov; for (i = 0; i < lov->desc.ld_tgt_count; i++) { @@ -888,10 +1038,10 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, GOTO(out_quotactl, rc); } - rc = obd_quotactl(sbi->ll_mdc_exp, oqctl); + rc = obd_quotactl(sbi->ll_md_exp, oqctl); if (rc && rc != -EBUSY && cmd == Q_QUOTAON) { oqctl->qc_cmd = Q_QUOTAOFF; - obd_quotactl(sbi->ll_mdc_exp, oqctl); + obd_quotactl(sbi->ll_md_exp, oqctl); } QCTL_COPY(qctl, oqctl); @@ -905,7 +1055,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, } #endif /* HAVE_QUOTA_SUPPORT */ case OBD_IOC_GETNAME: { - struct obd_device *obd = class_exp2obd(sbi->ll_osc_exp); + struct obd_device *obd = class_exp2obd(sbi->ll_dt_exp); if (!obd) RETURN(-EFAULT); if (copy_to_user((void *)arg, obd->obd_name, @@ -913,8 +1063,26 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, RETURN (-EFAULT); RETURN(0); } + case LL_IOC_FLUSHCTX: + RETURN(ll_flush_ctx(inode)); + case LL_IOC_GETFACL: { + struct rmtacl_ioctl_data ioc; + + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_getfacl(inode, &ioc)); + } + case LL_IOC_SETFACL: { + struct rmtacl_ioctl_data ioc; + + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_setfacl(inode, &ioc)); + } default: - RETURN(obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg)); + RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg)); } } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 08f55aa..1a7c6f6 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -25,6 +25,7 @@ #define DEBUG_SUBSYSTEM S_LLITE #include <lustre_dlm.h> #include <lustre_lite.h> +#include <lustre_mdc.h> #include <linux/pagemap.h> #include <linux/file.h> #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) @@ -37,7 +38,7 @@ struct ll_file_data *ll_file_data_get(void) { struct ll_file_data *fd; - OBD_SLAB_ALLOC(fd, ll_file_data_slab, SLAB_KERNEL, sizeof *fd); + OBD_SLAB_ALLOC(fd, ll_file_data_slab, GFP_KERNEL, sizeof *fd); return fd; } @@ -47,57 +48,100 @@ static void ll_file_data_put(struct ll_file_data *fd) OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof *fd); } -static int ll_close_inode_openhandle(struct inode *inode, +void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, + struct lustre_handle *fh) +{ + op_data->op_fid1 = ll_i2info(inode)->lli_fid; + op_data->op_attr.ia_mode = inode->i_mode; + op_data->op_attr.ia_atime = inode->i_atime; + op_data->op_attr.ia_mtime = inode->i_mtime; + op_data->op_attr.ia_ctime = inode->i_ctime; + op_data->op_attr.ia_size = inode->i_size; + op_data->op_attr_blocks = inode->i_blocks; + ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags; + op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; + memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle)); + op_data->op_capa1 = ll_mdscapa_get(inode); +} + +static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, + struct obd_client_handle *och) +{ + ENTRY; + + op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET | + ATTR_MTIME_SET | ATTR_CTIME_SET; + + if (!(och->och_flags & FMODE_WRITE)) + goto out; + + if (!(ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) || + !S_ISREG(inode->i_mode)) + op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; + else + ll_epoch_close(inode, op_data, &och, 0); + +out: + ll_pack_inode2opdata(inode, op_data, &och->och_fh); + EXIT; +} + +static int ll_close_inode_openhandle(struct obd_export *md_exp, + struct inode *inode, struct obd_client_handle *och) { + struct obd_export *exp = ll_i2mdexp(inode); + struct md_op_data *op_data; struct ptlrpc_request *req = NULL; - struct obd_device *obd; - struct obdo *oa; + struct obd_device *obd = class_exp2obd(exp); + int epoch_close = 1; int rc; ENTRY; - obd = class_exp2obd(ll_i2mdcexp(inode)); if (obd == NULL) { + /* + * XXX: in case of LMV, is this correct to access + * ->exp_handle? + */ CERROR("Invalid MDC connection handle "LPX64"\n", - ll_i2mdcexp(inode)->exp_handle.h_cookie); + ll_i2mdexp(inode)->exp_handle.h_cookie); GOTO(out, rc = 0); } /* * here we check if this is forced umount. If so this is called on - * canceling "open lock" and we do not call mdc_close() in this case, as + * canceling "open lock" and we do not call md_close() in this case, as * it will not be successful, as import is already deactivated. */ - if (obd->obd_no_recov) + if (obd->obd_force) GOTO(out, rc = 0); - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); // XXX We leak openhandle and request here. - - oa->o_id = inode->i_ino; - oa->o_valid = OBD_MD_FLID; - obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME); - if (0 /* ll_is_inode_dirty(inode) */) { - oa->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES; - oa->o_valid |= OBD_MD_FLFLAGS; - } - - rc = mdc_close(ll_i2mdcexp(inode), oa, och, &req); - if (rc == EAGAIN) { - /* We are the last writer, so the MDS has instructed us to get - * the file size and any write cookies, then close again. */ - //ll_queue_done_writing(inode); - rc = 0; + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here. + + ll_prepare_close(inode, op_data, och); + epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE); + rc = md_close(md_exp, op_data, och, &req); + + if (rc == -EAGAIN) { + /* This close must have the epoch closed. */ + LASSERT(exp->exp_connect_flags & OBD_CONNECT_SOM); + LASSERT(epoch_close); + /* MDS has instructed us to obtain Size-on-MDS attribute from + * OSTs and send setattr to back to MDS. */ + rc = ll_sizeonmds_update(inode, &och->och_fh, + op_data->op_ioepoch); + if (rc) { + CERROR("inode %lu mdc Size-on-MDS update failed: " + "rc = %d\n", inode->i_ino, rc); + rc = 0; + } } else if (rc) { CERROR("inode %lu mdc close failed: rc = %d\n", inode->i_ino, rc); } - - obdo_free(oa); + ll_finish_md_op_data(op_data); if (rc == 0) { rc = ll_objects_destroy(req, inode); @@ -109,19 +153,27 @@ static int ll_close_inode_openhandle(struct inode *inode, ptlrpc_req_finished(req); /* This is close request */ EXIT; out: - mdc_clear_open_replay_data(och); - + + if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close && + S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) { + ll_queue_done_writing(inode, LLIF_DONE_WRITING); + } else { + md_clear_open_replay_data(md_exp, och); + /* Free @och if it is not waiting for DONE_WRITING. */ + och->och_fh.cookie = DEAD_HANDLE_MAGIC; + OBD_FREE_PTR(och); + } + return rc; } -int ll_mdc_real_close(struct inode *inode, int flags) +int ll_md_real_close(struct inode *inode, int flags) { struct ll_inode_info *lli = ll_i2info(inode); - int rc = 0; struct obd_client_handle **och_p; struct obd_client_handle *och; __u64 *och_usecount; - + int rc = 0; ENTRY; if (flags & FMODE_WRITE) { @@ -130,7 +182,7 @@ int ll_mdc_real_close(struct inode *inode, int flags) } else if (flags & FMODE_EXEC) { och_p = &lli->lli_mds_exec_och; och_usecount = &lli->lli_open_fd_exec_count; - } else { + } else { LASSERT(flags & FMODE_READ); och_p = &lli->lli_mds_read_och; och_usecount = &lli->lli_open_fd_read_count; @@ -148,16 +200,15 @@ int ll_mdc_real_close(struct inode *inode, int flags) if (och) { /* There might be a race and somebody have freed this och already */ - rc = ll_close_inode_openhandle(inode, och); - och->och_fh.cookie = DEAD_HANDLE_MAGIC; - OBD_FREE(och, sizeof *och); + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, + inode, och); } RETURN(rc); } -int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, - struct file *file) +int ll_md_close(struct obd_export *md_exp, struct inode *inode, + struct file *file) { struct ll_file_data *fd = LUSTRE_FPRIVATE(file); struct ll_inode_info *lli = ll_i2info(inode); @@ -179,8 +230,6 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; struct lustre_handle lockh; struct inode *inode = file->f_dentry->d_inode; - struct ldlm_res_id file_res_id = {.name={inode->i_ino, - inode->i_generation}}; ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}}; down(&lli->lli_och_sem); @@ -199,11 +248,11 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, } up(&lli->lli_och_sem); - if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags, - &file_res_id, LDLM_IBITS, &policy,lockmode, - &lockh)) { - rc = ll_mdc_real_close(file->f_dentry->d_inode, - fd->fd_omode); + if (!md_lock_match(md_exp, flags, ll_inode2fid(inode), + LDLM_IBITS, &policy, lockmode, + &lockh)) { + rc = ll_md_real_close(file->f_dentry->d_inode, + fd->fd_omode); } } else { CERROR("Releasing a file %p with negative dentry %p. Name %s", @@ -212,6 +261,7 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, LUSTRE_FPRIVATE(file) = NULL; ll_file_data_put(fd); + ll_capa_close(inode); RETURN(rc); } @@ -234,22 +284,27 @@ int ll_file_release(struct inode *inode, struct file *file) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); - ll_vfs_ops_tally(sbi, VFS_OPS_RELEASE); /* don't do anything for / */ if (inode->i_sb->s_root == file->f_dentry) RETURN(0); - lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_RELEASE); - + ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1); fd = LUSTRE_FPRIVATE(file); LASSERT(fd != NULL); + /* don't do anything for / */ + if (inode->i_sb->s_root == file->f_dentry) { + LUSTRE_FPRIVATE(file) = NULL; + ll_file_data_put(fd); + RETURN(0); + } + if (lsm) lov_test_and_clear_async_rc(lsm); lli->lli_async_rc = 0; - rc = ll_mdc_close(sbi->ll_mdc_exp, inode, file); + rc = ll_md_close(sbi->ll_md_exp, inode, file); RETURN(rc); } @@ -257,19 +312,16 @@ static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize, struct lookup_intent *itp) { struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode); - struct mdc_op_data data; struct dentry *parent = file->f_dentry->d_parent; const char *name = file->f_dentry->d_name.name; const int len = file->f_dentry->d_name.len; - struct inode *inode = file->f_dentry->d_inode; + struct md_op_data *op_data; struct ptlrpc_request *req; int rc; if (!parent) RETURN(-ENOENT); - ll_prepare_mdc_op_data(&data, parent->d_inode, inode, name, len, O_RDWR); - /* Usually we come here only for NFSD, and we want open lock. But we can also get here with pre 2.6.15 patchless kernels, and in that case that lock is also ok */ @@ -282,29 +334,39 @@ static int ll_intent_file_open(struct file *file, void *lmm, if (!lmm && !lmmsize) itp->it_flags |= MDS_OPEN_LOCK; - rc = mdc_intent_lock(sbi->ll_mdc_exp, &data, lmm, lmmsize, itp, - 0 /*unused */, &req, ll_mdc_blocking_ast, 0); + op_data = ll_prep_md_op_data(NULL, parent->d_inode, + file->f_dentry->d_inode, name, len, + O_RDWR, LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + + rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp, + 0 /*unused */, &req, ll_md_blocking_ast, 0); + ll_finish_md_op_data(op_data); if (rc == -ESTALE) { /* reason for keep own exit path - don`t flood log * with messages with -ESTALE errors. */ - if (!it_disposition(itp, DISP_OPEN_OPEN)) + if (!it_disposition(itp, DISP_OPEN_OPEN) || + it_open_error(DISP_OPEN_OPEN, itp)) GOTO(out, rc); ll_release_openhandle(file->f_dentry, itp); GOTO(out_stale, rc); } - if (rc != 0) { - CERROR("lock enqueue: err: %d\n", rc); - GOTO(out, rc); + if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) { + rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp); + CERROR("lock enqueue: err: %d\n", rc); + GOTO(out, rc); } if (itp->d.lustre.it_lock_mode) - mdc_set_lock_data(&itp->d.lustre.it_lock_handle, - inode); + md_set_lock_data(sbi->ll_md_exp, + &itp->d.lustre.it_lock_handle, + file->f_dentry->d_inode); - rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode, - req, DLM_REPLY_REC_OFF, NULL); + rc = ll_prep_inode(&file->f_dentry->d_inode, req, DLM_REPLY_REC_OFF, + NULL); out: ptlrpc_req_finished(itp->d.lustre.it_data); @@ -315,41 +377,61 @@ out_stale: RETURN(rc); } - -static void ll_och_fill(struct ll_inode_info *lli, struct lookup_intent *it, - struct obd_client_handle *och) +static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli, + struct lookup_intent *it, struct obd_client_handle *och) { struct ptlrpc_request *req = it->d.lustre.it_data; - struct mds_body *body; + struct mdt_body *body; LASSERT(och); body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); - LASSERT(body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); /* and swabbed in mdc_enqueue */ + LASSERT(body != NULL); /* reply already checked out */ + LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); /* and swabbed in md_enqueue */ memcpy(&och->och_fh, &body->handle, sizeof(body->handle)); och->och_magic = OBD_CLIENT_HANDLE_MAGIC; - lli->lli_io_epoch = body->io_epoch; + och->och_fid = lli->lli_fid; + och->och_flags = it->it_flags; + lli->lli_ioepoch = body->ioepoch; - mdc_set_open_replay_data(och, it->d.lustre.it_data); + return md_set_open_replay_data(md_exp, och, req); } int ll_local_open(struct file *file, struct lookup_intent *it, struct ll_file_data *fd, struct obd_client_handle *och) { + struct inode *inode = file->f_dentry->d_inode; + struct ll_inode_info *lli = ll_i2info(inode); ENTRY; LASSERT(!LUSTRE_FPRIVATE(file)); LASSERT(fd != NULL); - if (och) - ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och); + if (och) { + struct ptlrpc_request *req = it->d.lustre.it_data; + struct mdt_body *body; + int rc; + + rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och); + if (rc) + RETURN(rc); + + body = lustre_msg_buf(req->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + + if ((it->it_flags & FMODE_WRITE) && + (body->valid & OBD_MD_FLSIZE)) + { + CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n", + lli->lli_ioepoch, PFID(&lli->lli_fid)); + } + } + LUSTRE_FPRIVATE(file) = fd; - ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras); + ll_readahead_init(inode, &fd->fd_ras); fd->fd_omode = it->it_flags; - RETURN(0); } @@ -360,7 +442,7 @@ int ll_local_open(struct file *file, struct lookup_intent *it, * stripe MD to the MDS, or try to destroy the objects if that fails. * * If we already have the stripe MD locally then we don't request it in - * mdc_open(), by passing a lmm_size = 0. + * md_open(), by passing a lmm_size = 0. * * It is up to the application to ensure no other processes open this file * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be @@ -383,7 +465,6 @@ int ll_file_open(struct inode *inode, struct file *file) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, inode->i_generation, inode, file->f_flags); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_OPEN); /* don't do anything for / */ if (inode->i_sb->s_root == file->f_dentry) @@ -400,9 +481,16 @@ int ll_file_open(struct inode *inode, struct file *file) if (fd == NULL) RETURN(-ENOMEM); + /* don't do anything for / */ + if (inode->i_sb->s_root == file->f_dentry) { + LUSTRE_FPRIVATE(file) = fd; + RETURN(0); + } + if (!it || !it->d.lustre.it_disposition) { /* Convert f_flags into access mode. We cannot use file->f_mode, - * because everything but O_ACCMODE mask was stripped from it */ + * because everything but O_ACCMODE mask was stripped from + * there */ if ((oit.it_flags + 1) & O_ACCMODE) oit.it_flags++; if (file->f_flags & O_TRUNC) @@ -433,22 +521,29 @@ int ll_file_open(struct inode *inode, struct file *file) och_p = &lli->lli_mds_read_och; och_usecount = &lli->lli_open_fd_read_count; } - - LASSERTF(it->it_flags != 0, "it %p dist %d \n", it, - it->d.lustre.it_disposition); - + down(&lli->lli_och_sem); if (*och_p) { /* Open handle is present */ if (it_disposition(it, DISP_OPEN_OPEN)) { /* Well, there's extra open request that we do not need, let's close it somehow. This will decref request. */ + rc = it_open_error(DISP_OPEN_OPEN, it); + if (rc) { + ll_file_data_put(fd); + GOTO(out_och_free, rc); + } ll_release_openhandle(file->f_dentry, it); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, + LPROC_LL_OPEN); } (*och_usecount)++; rc = ll_local_open(file, it, fd, NULL); - - LASSERTF(rc == 0, "rc = %d\n", rc); + if (rc) { + up(&lli->lli_och_sem); + ll_file_data_put(fd); + RETURN(rc); + } } else { LASSERT(*och_usecount == 0); OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); @@ -458,7 +553,9 @@ int ll_file_open(struct inode *inode, struct file *file) } (*och_usecount)++; if (!it->d.lustre.it_disposition) { + it->it_flags |= O_CHECK_STALE; rc = ll_intent_file_open(file, NULL, 0, it); + it->it_flags &= ~O_CHECK_STALE; if (rc) { ll_file_data_put(fd); GOTO(out_och_free, rc); @@ -469,12 +566,13 @@ int ll_file_open(struct inode *inode, struct file *file) req = it->d.lustre.it_data; ptlrpc_req_finished(req); } - mdc_set_lock_data(&it->d.lustre.it_lock_handle, - file->f_dentry->d_inode); + md_set_lock_data(ll_i2sbi(inode)->ll_md_exp, + &it->d.lustre.it_lock_handle, + file->f_dentry->d_inode); } req = it->d.lustre.it_data; - /* mdc_intent_lock() didn't get a request ref if there was an + /* md_intent_lock() didn't get a request ref if there was an * open error, so don't do cleanup on the request here * (bug 3430) */ /* XXX (green): Should not we bail out on any error here, not @@ -485,9 +583,13 @@ int ll_file_open(struct inode *inode, struct file *file) GOTO(out_och_free, rc); } - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1); rc = ll_local_open(file, it, fd, *och_p); - LASSERTF(rc == 0, "rc = %d\n", rc); + if (rc) { + up(&lli->lli_och_sem); + ll_file_data_put(fd); + GOTO(out_och_free, rc); + } } up(&lli->lli_och_sem); @@ -497,6 +599,8 @@ int ll_file_open(struct inode *inode, struct file *file) if (!S_ISREG(inode->i_mode)) GOTO(out, rc); + ll_capa_open(inode); + lsm = lli->lli_smd; if (lsm == NULL) { if (file->f_flags & O_LOV_DELAY_CREATE || @@ -507,14 +611,12 @@ int ll_file_open(struct inode *inode, struct file *file) } file->f_flags &= ~O_LOV_DELAY_CREATE; GOTO(out, rc); - out: +out: ptlrpc_req_finished(req); if (req) it_clear_disposition(it, DISP_ENQ_OPEN_REF); - if (rc == 0) { - ll_open_complete(inode); - } else { out_och_free: + if (rc) { if (*och_p) { OBD_FREE(*och_p, sizeof (struct obd_client_handle)); *och_p = NULL; /* OBD_FREE writes some magic there */ @@ -522,43 +624,57 @@ out_och_free: } up(&lli->lli_och_sem); } + return rc; } /* Fills the obdo with the attributes for the inode defined by lsm */ -int ll_lsm_getattr(struct obd_export *exp, struct lov_stripe_md *lsm, - struct obdo *oa) +int ll_inode_getattr(struct inode *inode, struct obdo *obdo) { struct ptlrpc_request_set *set; + struct ll_inode_info *lli = ll_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct obd_info oinfo = { { { 0 } } }; int rc; ENTRY; LASSERT(lsm != NULL); - memset(oa, 0, sizeof *oa); oinfo.oi_md = lsm; - oinfo.oi_oa = oa; - oa->o_id = lsm->lsm_object_id; - oa->o_mode = S_IFREG; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | - OBD_MD_FLCTIME; + oinfo.oi_oa = obdo; + oinfo.oi_oa->o_id = lsm->lsm_object_id; + oinfo.oi_oa->o_gr = lsm->lsm_object_gr; + oinfo.oi_oa->o_mode = S_IFREG; + oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | + OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLBLKSZ | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLGROUP; + oinfo.oi_capa = ll_mdscapa_get(inode); set = ptlrpc_prep_set(); if (set == NULL) { + CERROR("can't allocate ptlrpc set\n"); rc = -ENOMEM; } else { - rc = obd_getattr_async(exp, &oinfo, set); + rc = obd_getattr_async(ll_i2dtexp(inode), &oinfo, set); if (rc == 0) rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); } + capa_put(oinfo.oi_capa); if (rc) RETURN(rc); - oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE); + oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | + OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLCTIME | OBD_MD_FLSIZE); + + obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid); + CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %lu, blksize %lu\n", + lli->lli_smd->lsm_object_id, inode->i_size, inode->i_blocks, + inode->i_blksize); RETURN(0); } @@ -581,7 +697,7 @@ static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock) { struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; - struct obd_export *exp = ll_i2obdexp(inode); + struct obd_export *exp = ll_i2dtexp(inode); struct { char name[16]; struct ldlm_lock *lock; @@ -604,7 +720,7 @@ static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock) check: if (lsm->lsm_oinfo[stripe]->loi_id != lock->l_resource->lr_name.name[0]|| - lsm->lsm_oinfo[stripe]->loi_gr != lock->l_resource->lr_name.name[1]){ + lsm->lsm_oinfo[stripe]->loi_gr != lock->l_resource->lr_name.name[2]){ LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64, lsm->lsm_oinfo[stripe]->loi_id, lsm->lsm_oinfo[stripe]->loi_gr); @@ -642,7 +758,8 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, * whole page. */ if ((tmpex.l_extent.start & ~CFS_PAGE_MASK) != 0 || ((tmpex.l_extent.end + 1) & ~CFS_PAGE_MASK) != 0) - LDLM_ERROR(lock, "lock not aligned on CFS_PAGE_SIZE %lu", CFS_PAGE_SIZE); + LDLM_ERROR(lock, "lock not aligned on PAGE_SIZE %lu", + CFS_PAGE_SIZE); LASSERT((tmpex.l_extent.start & ~CFS_PAGE_MASK) == 0); LASSERT(((tmpex.l_extent.end + 1) & ~CFS_PAGE_MASK) == 0); @@ -660,7 +777,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, if (end < tmpex.l_extent.end >> CFS_PAGE_SHIFT) end = ~0; - i = inode->i_size ? (inode->i_size - 1) >> CFS_PAGE_SHIFT : 0; + i = inode->i_size ? (__u64)(inode->i_size - 1) >> CFS_PAGE_SHIFT : 0; if (i < end) end = i; @@ -728,15 +845,17 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, } tmpex.l_extent.end = tmpex.l_extent.start + CFS_PAGE_SIZE - 1; - /* check to see if another DLM lock covers this page b=2765 */ + /* check to see if another DLM lock covers this page b=2765 */ rc2 = ldlm_lock_match(lock->l_resource->lr_namespace, LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK, &lock->l_resource->lr_name, LDLM_EXTENT, &tmpex, LCK_PR | LCK_PW, &lockh); - if (rc2 == 0 && page->mapping != NULL) { + + if (rc2 <= 0 && page->mapping != NULL) { struct ll_async_page *llap = llap_cast_private(page); - // checking again to account for writeback's lock_page() + /* checking again to account for writeback's + * lock_page() */ LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n"); if (llap) ll_ra_accounting(llap, inode->i_mapping); @@ -812,7 +931,6 @@ static int ll_extent_lock_callback(struct ldlm_lock *lock, lsm->lsm_oinfo[stripe]->loi_kms = kms; unlock_res_and_lock(lock); lov_stripe_unlock(lsm); - //ll_try_done_writing(inode); iput: iput(inode); break; @@ -857,16 +975,16 @@ int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data) lvb = lock->l_lvb_data; lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size; - LOCK_INODE_MUTEX(inode); lock_res_and_lock(lock); + ll_inode_size_lock(inode, 1); kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size); kms = ldlm_extent_shift_kms(NULL, kms); if (lsm->lsm_oinfo[stripe].loi_kms != kms) LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64, lsm->lsm_oinfo[stripe].loi_kms, kms); lsm->lsm_oinfo[stripe].loi_kms = kms; + ll_inode_size_unlock(inode, 1); unlock_res_and_lock(lock); - UNLOCK_INODE_MUTEX(inode); } iput: @@ -933,6 +1051,50 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp) return rc; } +static void ll_merge_lvb(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ost_lvb lvb; + ENTRY; + + ll_inode_size_lock(inode, 1); + inode_init_lvb(inode, &lvb); + obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0); + inode->i_size = lvb.lvb_size; + inode->i_blocks = lvb.lvb_blocks; + LTIME_S(inode->i_mtime) = lvb.lvb_mtime; + LTIME_S(inode->i_atime) = lvb.lvb_atime; + LTIME_S(inode->i_ctime) = lvb.lvb_ctime; + ll_inode_size_unlock(inode, 1); + EXIT; +} + +int ll_local_size(struct inode *inode) +{ + ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } }; + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct lustre_handle lockh = { 0 }; + int flags = 0; + int rc; + ENTRY; + + if (lli->lli_smd->lsm_stripe_count == 0) + RETURN(0); + + rc = obd_match(sbi->ll_dt_exp, lli->lli_smd, LDLM_EXTENT, + &policy, LCK_PR | LCK_PW, &flags, inode, &lockh); + if (rc < 0) + RETURN(rc); + else if (rc == 0) + RETURN(-ENODATA); + + ll_merge_lvb(inode); + obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR | LCK_PW, &lockh); + RETURN(0); +} + int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, lstat_t *st) { @@ -941,9 +1103,9 @@ int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, struct obd_info oinfo = { { { 0 } } }; struct ost_lvb lvb; int rc; - + ENTRY; - + einfo.ei_type = LDLM_EXTENT; einfo.ei_mode = LCK_PR; einfo.ei_flags = LDLM_FL_HAS_INTENT; @@ -956,7 +1118,7 @@ int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, oinfo.oi_lockh = &lockh; oinfo.oi_md = lsm; - rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo); + rc = obd_enqueue_rqset(sbi->ll_dt_exp, &oinfo, &einfo); if (rc == -ENOENT) RETURN(rc); if (rc != 0) { @@ -964,17 +1126,17 @@ int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, "returning -EIO\n", rc); RETURN(rc > 0 ? -EIO : rc); } - + lov_stripe_lock(lsm); memset(&lvb, 0, sizeof(lvb)); - obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 0); + obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 0); st->st_size = lvb.lvb_size; st->st_blocks = lvb.lvb_blocks; st->st_mtime = lvb.lvb_mtime; st->st_atime = lvb.lvb_atime; st->st_ctime = lvb.lvb_ctime; lov_stripe_unlock(lsm); - + RETURN(rc); } @@ -987,10 +1149,12 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) struct lustre_handle lockh = { 0 }; struct obd_enqueue_info einfo = { 0 }; struct obd_info oinfo = { { { 0 } } }; - struct ost_lvb lvb; int rc; ENTRY; + if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) + RETURN(0); + CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino); if (!lli->lli_smd) { @@ -1017,7 +1181,7 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) oinfo.oi_lockh = &lockh; oinfo.oi_md = lli->lli_smd; - rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo); + rc = obd_enqueue_rqset(sbi->ll_dt_exp, &oinfo, &einfo); if (rc == -ENOENT) RETURN(rc); if (rc != 0) { @@ -1025,18 +1189,10 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) RETURN(rc > 0 ? -EIO : rc); } - ll_inode_size_lock(inode, 1); - inode_init_lvb(inode, &lvb); - obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0); - inode->i_size = lvb.lvb_size; - inode->i_blocks = lvb.lvb_blocks; - LTIME_S(inode->i_mtime) = lvb.lvb_mtime; - LTIME_S(inode->i_atime) = lvb.lvb_atime; - LTIME_S(inode->i_ctime) = lvb.lvb_ctime; - ll_inode_size_unlock(inode, 1); + ll_merge_lvb(inode); - CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n", - inode->i_size, (long long)inode->i_blocks); + CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n", + inode->i_size, inode->i_blocks); RETURN(rc); } @@ -1080,14 +1236,14 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, oinfo.oi_lockh = lockh; oinfo.oi_md = lsm; - rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo); + rc = obd_enqueue(sbi->ll_dt_exp, &oinfo, &einfo); *policy = oinfo.oi_policy; if (rc > 0) rc = -EIO; ll_inode_size_lock(inode, 1); inode_init_lvb(inode, &lvb); - obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1); + obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 1); if (policy->l_extent.start == 0 && policy->l_extent.end == OBD_OBJECT_EOF) { @@ -1102,7 +1258,7 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, * ll_inode_size_lock() after the enqueue maintains the DLM * -> ll_inode_size_lock() acquiring order. */ inode->i_size = lvb.lvb_size; - CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n", + CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n", inode->i_ino, inode->i_size); } @@ -1129,7 +1285,7 @@ int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, (sbi->ll_flags & LL_SBI_NOLCK)) RETURN(0); - rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh); + rc = obd_cancel(sbi->ll_dt_exp, lsm, mode, lockh); RETURN(rc); } @@ -1153,14 +1309,12 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count, ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", inode->i_ino, inode->i_generation, inode, count, *ppos); - ll_vfs_ops_tally(sbi, VFS_OPS_READ); - /* "If nbyte is 0, read() will return 0 and have no other results." * -- Single Unix Spec */ if (count == 0) RETURN(0); - lprocfs_counter_add(sbi->ll_stats, LPROC_LL_READ_BYTES, count); + ll_stats_ops_tally(sbi, LPROC_LL_READ_BYTES, count); if (!lsm) { /* Read on file with no objects should return zero-filled @@ -1192,7 +1346,7 @@ repeat: if (sbi->ll_max_rw_chunk != 0) { /* first, let's know the end of the current stripe */ end = *ppos; - obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END, + obd_extent_calc(sbi->ll_dt_exp, lsm, OBD_CALC_STRIPE_END, (obd_off *)&end); /* correct, the end is beyond the request */ @@ -1205,7 +1359,7 @@ repeat: } else { end = *ppos + count - 1; } - + node = ll_node_from_inode(inode, *ppos, end, LCK_PR); tree.lt_fd = LUSTRE_FPRIVATE(file); rc = ll_tree_lock(&tree, node, buf, count, @@ -1234,7 +1388,7 @@ repeat: * correctly in the face of concurrent writes and truncates. */ inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1); + obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 1); kms = lvb.lvb_size; if (*ppos + count - 1 > kms) { /* A glimpse is necessary to determine whether we return a @@ -1258,8 +1412,8 @@ repeat: } chunk = end - *ppos + 1; - CDEBUG(D_INODE,"Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", - inode->i_ino, chunk, *ppos, inode->i_size); + CDEBUG(D_INODE, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", + inode->i_ino, chunk, *ppos, inode->i_size); /* turn off the kernel's read-ahead */ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) @@ -1278,7 +1432,7 @@ repeat: /* BUG: 5972 */ file_accessed(file); retval = generic_file_read(file, buf, chunk, ppos); - ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 0); + ll_rw_stats_tally(sbi, current->pid, file, count, 0); ll_tree_unlock(&tree); @@ -1316,8 +1470,7 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", inode->i_ino, inode->i_generation, inode, count, *ppos); - ll_vfs_ops_tally(sbi, VFS_OPS_WRITE); - + SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ /* POSIX, but surprised the VFS doesn't check this already */ @@ -1344,7 +1497,7 @@ repeat: } else if (sbi->ll_max_rw_chunk != 0) { /* first, let's know the end of the current stripe */ end = *ppos; - obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END, + obd_extent_calc(sbi->ll_dt_exp, lsm, OBD_CALC_STRIPE_END, (obd_off *)&end); /* correct, the end is beyond the request */ @@ -1408,8 +1561,8 @@ out: up(&ll_i2info(inode)->lli_write_sem); retval = (sum > 0) ? sum : retval; - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES, - retval > 0 ? retval : 0); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES, + retval > 0 ? retval : 0); RETURN(retval); } @@ -1439,9 +1592,7 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, if (count == 0) RETURN(0); - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES, - count); - + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count); /* turn off the kernel's read-ahead */ in_file->f_ra.ra_pages = 0; @@ -1477,7 +1628,7 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, * correctly in the face of concurrent writes and truncates. */ inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1); + obd_merge_lvb(ll_i2sbi(inode)->ll_dt_exp, lsm, &lvb, 1); kms = lvb.lvb_size; if (*ppos + count - 1 > kms) { /* A glimpse is necessary to determine whether we return a @@ -1513,7 +1664,7 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file, unsigned long arg) { struct ll_inode_info *lli = ll_i2info(inode); - struct obd_export *exp = ll_i2obdexp(inode); + struct obd_export *exp = ll_i2dtexp(inode); struct ll_recreate_obj ucreatp; struct obd_trans_info oti = { 0 }; struct obdo *oa = NULL; @@ -1530,11 +1681,11 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file, if (rc) { RETURN(-EFAULT); } - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) RETURN(-ENOMEM); - down(&lli->lli_open_sem); + down(&lli->lli_size_sem); lsm = lli->lli_smd; if (lsm == NULL) GOTO(out, rc = -ENOENT); @@ -1546,9 +1697,10 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file, GOTO(out, rc = -ENOMEM); oa->o_id = ucreatp.lrc_id; + oa->o_gr = ucreatp.lrc_group; oa->o_nlink = ucreatp.lrc_ost_idx; oa->o_flags |= OBD_FL_RECREATE_OBJS; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP; obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME); @@ -1559,14 +1711,13 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file, OBD_FREE(lsm2, lsm_size); GOTO(out, rc); out: - up(&lli->lli_open_sem); - obdo_free(oa); + up(&lli->lli_size_sem); + OBDO_FREE(oa); return rc; } int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, - int flags, struct lov_user_md *lum, - int lum_size) + int flags, struct lov_user_md *lum, int lum_size) { struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm; @@ -1574,10 +1725,10 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, int rc = 0; ENTRY; - down(&lli->lli_open_sem); + down(&lli->lli_size_sem); lsm = lli->lli_smd; if (lsm) { - up(&lli->lli_open_sem); + up(&lli->lli_size_sem); CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", inode->i_ino); RETURN(-EEXIST); @@ -1595,7 +1746,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, ll_release_openhandle(file->f_dentry, &oit); out: - up(&lli->lli_open_sem); + up(&lli->lli_size_sem); ll_intent_release(&oit); RETURN(rc); out_req_free: @@ -1608,30 +1759,28 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct ptlrpc_request **request) { struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_fid fid; - struct mds_body *body; + struct mdt_body *body; struct lov_mds_md *lmm = NULL; struct ptlrpc_request *req = NULL; + struct obd_capa *oc; int rc, lmmsize; - ll_inode2fid(&fid, inode); - rc = ll_get_max_mdsize(sbi, &lmmsize); if (rc) RETURN(rc); - rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, - filename, strlen(filename) + 1, - OBD_MD_FLEASIZE | OBD_MD_FLDIREA, - lmmsize, &req); + oc = ll_mdscapa_get(inode); + rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode), + oc, filename, strlen(filename) + 1, + OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize, &req); + capa_put(oc); if (rc < 0) { - CDEBUG(D_INFO, "mdc_getattr_name failed " - "on %s: rc %d\n", filename, rc); + CDEBUG(D_INFO, "md_getattr_name failed " + "on %s: rc %d\n", filename, rc); GOTO(out, rc); } - body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*body)); + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); LASSERT(body != NULL); /* checked by mdc_getattr_name */ /* swabbed by mdc_getattr_name */ LASSERT_REPSWABBED(req, REPLY_REC_OFF); @@ -1643,8 +1792,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, GOTO(out, rc = -ENODATA); } - lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, - lmmsize); + lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize); LASSERT(lmm != NULL); LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1); @@ -1665,16 +1813,16 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct lov_user_md_join *lmj; int lmj_size, i, aindex = 0; - rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize); + rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize); if (rc < 0) GOTO(out, rc = -ENOMEM); - rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm); + rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm); if (rc) GOTO(out_free_memmd, rc); lmj_size = sizeof(struct lov_user_md_join) + - lsm->lsm_stripe_count * - sizeof(struct lov_user_ost_data_join); + lsm->lsm_stripe_count * + sizeof(struct lov_user_ost_data_join); OBD_ALLOC(lmj, lmj_size); if (!lmj) GOTO(out_free_memmd, rc = -ENOMEM); @@ -1709,7 +1857,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, lmm = (struct lov_mds_md *)lmj; lmmsize = lmj_size; out_free_memmd: - obd_free_memmd(sbi->ll_osc_exp, &lsm); + obd_free_memmd(sbi->ll_dt_exp, &lsm); } out: *lmmp = lmm; @@ -1717,6 +1865,7 @@ out: *request = req; return rc; } + static int ll_lov_setea(struct inode *inode, struct file *file, unsigned long arg) { @@ -1764,7 +1913,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum)); if (rc == 0) { put_user(0, &lump->lmm_stripe_count); - rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), + rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, ll_i2info(inode)->lli_smd, lump); } RETURN(rc); @@ -1777,7 +1926,7 @@ static int ll_lov_getstripe(struct inode *inode, unsigned long arg) if (!lsm) RETURN(-ENODATA); - return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), 0, lsm, + return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm, (void *)arg); } @@ -1872,7 +2021,7 @@ static int join_file(struct inode *head_inode, struct file *head_filp, struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = head_filp->f_flags|O_JOIN_FILE}; struct lustre_handle lockh; - struct mdc_op_data *op_data; + struct md_op_data *op_data; __u32 hsize = head_inode->i_size >> 32; __u32 tsize = head_inode->i_size; int rc; @@ -1882,26 +2031,27 @@ static int join_file(struct inode *head_inode, struct file *head_filp, tail_inode = tail_dentry->d_inode; tail_parent = tail_dentry->d_parent->d_inode; - OBD_ALLOC_PTR(op_data); - if (op_data == NULL) { - RETURN(-ENOMEM); - } + op_data = ll_prep_md_op_data(NULL, head_inode, tail_parent, + tail_dentry->d_name.name, + tail_dentry->d_name.len, 0, + LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); - ll_prepare_mdc_op_data(op_data, head_inode, tail_parent, - tail_dentry->d_name.name, - tail_dentry->d_name.len, 0); - rc = mdc_enqueue(ll_i2mdcexp(head_inode), LDLM_IBITS, &oit, LCK_PW, - op_data, &lockh, &tsize, 0, ldlm_completion_ast, - ll_mdc_blocking_ast, &hsize, 0); + rc = md_enqueue(ll_i2mdexp(head_inode), LDLM_IBITS, &oit, LCK_CW, + op_data, &lockh, &tsize, 0, ldlm_completion_ast, + ll_md_blocking_ast, &hsize, 0); + ll_finish_md_op_data(op_data); if (rc < 0) GOTO(out, rc); rc = oit.d.lustre.it_status; - if (rc < 0) { + if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) { + rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit); ptlrpc_req_finished((struct ptlrpc_request *) - oit.d.lustre.it_data); + oit.d.lustre.it_data); GOTO(out, rc); } @@ -1912,8 +2062,6 @@ static int join_file(struct inode *head_inode, struct file *head_filp, } ll_release_openhandle(head_filp->f_dentry, &oit); out: - if (op_data) - OBD_FREE_PTR(op_data); ll_intent_release(&oit); RETURN(rc); } @@ -1987,18 +2135,18 @@ cleanup: switch (cleanup_phase) { case 3: ll_tree_unlock(&second_tree); - obd_cancel_unused(ll_i2obdexp(second), + obd_cancel_unused(ll_i2dtexp(second), ll_i2info(second)->lli_smd, 0, NULL); case 2: ll_tree_unlock(&first_tree); - obd_cancel_unused(ll_i2obdexp(first), + obd_cancel_unused(ll_i2dtexp(first), ll_i2info(first)->lli_smd, 0, NULL); case 1: filp_close(tail_filp, 0); if (tail) iput(tail); if (head && rc == 0) { - obd_free_memmd(ll_i2sbi(head)->ll_osc_exp, + obd_free_memmd(ll_i2sbi(head)->ll_dt_exp, &hlli->lli_smd); hlli->lli_smd = NULL; } @@ -2028,15 +2176,17 @@ int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) if (!it_disposition(it, DISP_OPEN_OPEN)) RETURN(0); + LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0); + OBD_ALLOC(och, sizeof(*och)); if (!och) GOTO(out, rc = -ENOMEM); - ll_och_fill(ll_i2info(inode), it, och); - - rc = ll_close_inode_openhandle(inode, och); + ll_och_fill(ll_i2sbi(inode)->ll_md_exp, + ll_i2info(inode), it, och); - OBD_FREE(och, sizeof(*och)); + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, + inode, och); out: /* this one is in place of ll_file_open */ ptlrpc_req_finished(it->d.lustre.it_data); @@ -2053,13 +2203,12 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, inode->i_generation, inode, cmd); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_IOCTL); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ RETURN(-ENOTTY); - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL); switch(cmd) { case LL_IOC_GETFLAGS: /* Get the current value of the file flags */ @@ -2124,8 +2273,26 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, case EXT3_IOC_SETVERSION_OLD: case EXT3_IOC_SETVERSION: */ + case LL_IOC_FLUSHCTX: + RETURN(ll_flush_ctx(inode)); + case LL_IOC_GETFACL: { + struct rmtacl_ioctl_data ioc; + + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_getfacl(inode, &ioc)); + } + case LL_IOC_SETFACL: { + struct rmtacl_ioctl_data ioc; + + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_setfacl(inode, &ioc)); + } default: - RETURN(obd_iocontrol(cmd, ll_i2obdexp(inode), 0, NULL, + RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, (void *)arg)); } } @@ -2142,8 +2309,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n", inode->i_ino, inode->i_generation, inode, retval, retval, origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET"); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_SEEK); - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); if (origin == 2) { /* SEEK_END */ int nonblock = 0, rc; @@ -2175,7 +2341,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) } retval = offset; } - + RETURN(retval); } @@ -2184,14 +2350,13 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; - struct ll_fid fid; struct ptlrpc_request *req; + struct obd_capa *oc; int rc, err; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_FSYNC); - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_FSYNC); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); /* fsync's caller has already called _fdata{sync,write}, we want * that IO to finish before calling the osc and mdc sync methods */ @@ -2209,29 +2374,36 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) rc = err; } - ll_inode2fid(&fid, inode); - err = mdc_sync(ll_i2sbi(inode)->ll_mdc_exp, &fid, &req); + oc = ll_mdscapa_get(inode); + err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, + &req); + capa_put(oc); if (!rc) rc = err; if (!err) ptlrpc_req_finished(req); if (data && lsm) { - struct obdo *oa = obdo_alloc(); - + struct obdo *oa; + + OBDO_ALLOC(oa); if (!oa) RETURN(rc ? rc : -ENOMEM); oa->o_id = lsm->lsm_object_id; - oa->o_valid = OBD_MD_FLID; + oa->o_gr = lsm->lsm_object_gr; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); + OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLGROUP); - err = obd_sync(ll_i2sbi(inode)->ll_osc_exp, oa, lsm, - 0, OBD_OBJECT_EOF); + oc = ll_osscapa_get(inode, 0, CAPA_OPC_OSS_WRITE); + err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm, + 0, OBD_OBJECT_EOF, oc); + capa_put(oc); if (!rc) rc = err; - obdo_free(oa); + OBDO_FREE(oa); } RETURN(rc); @@ -2242,7 +2414,10 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) struct inode *inode = file->f_dentry->d_inode; struct ll_sb_info *sbi = ll_i2sbi(inode); struct ldlm_res_id res_id = - { .name = {inode->i_ino, inode->i_generation, LDLM_FLOCK} }; + { .name = { fid_seq(ll_inode2fid(inode)), + fid_oid(ll_inode2fid(inode)), + fid_ver(ll_inode2fid(inode)), + LDLM_FLOCK} }; struct lustre_handle lockh = {0}; ldlm_policy_data_t flock; ldlm_mode_t mode = 0; @@ -2252,8 +2427,9 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", inode->i_ino, file_lock); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_FLOCK); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); + if (file_lock->fl_flags & FL_FLOCK) { LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK)); /* set missing params for flock() calls */ @@ -2318,7 +2494,7 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid, flags, mode, flock.l_flock.start, flock.l_flock.end); - rc = ldlm_cli_enqueue(sbi->ll_mdc_exp, NULL, res_id, + rc = ldlm_cli_enqueue(sbi->ll_md_exp, NULL, &res_id, LDLM_FLOCK, &flock, mode, &flags, NULL, ldlm_flock_completion_ast, NULL, file_lock, NULL, 0, NULL, &lockh, 0); @@ -2343,24 +2519,20 @@ int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) int ll_have_md_lock(struct inode *inode, __u64 bits) { struct lustre_handle lockh; - struct ldlm_res_id res_id = { .name = {0} }; - struct obd_device *obddev; ldlm_policy_data_t policy = { .l_inodebits = {bits}}; + struct lu_fid *fid; int flags; ENTRY; if (!inode) RETURN(0); - obddev = ll_i2mdcexp(inode)->exp_obd; - res_id.name[0] = inode->i_ino; - res_id.name[1] = inode->i_generation; - - CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); + fid = &ll_i2info(inode)->lli_fid; + CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; - if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS, - &policy, LCK_CR|LCK_CW|LCK_PR, &lockh)) { + if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy, + LCK_CR|LCK_CW|LCK_PR, &lockh)) { RETURN(1); } @@ -2392,6 +2564,7 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) { struct inode *inode = dentry->d_inode; struct ptlrpc_request *req = NULL; + struct ll_sb_info *sbi; struct obd_export *exp; int rc; ENTRY; @@ -2400,32 +2573,41 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) CERROR("REPORT THIS LINE TO PETER\n"); RETURN(0); } + sbi = ll_i2sbi(inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", inode->i_ino, inode->i_generation, inode, dentry->d_name.name); #if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)) - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_REVALIDATE); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REVALIDATE, 1); #endif - exp = ll_i2mdcexp(inode); + exp = ll_i2mdexp(inode); if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) { struct lookup_intent oit = { .it_op = IT_GETATTR }; - struct mdc_op_data op_data; + struct md_op_data *op_data; /* Call getattr by fid, so do not provide name at all. */ - ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode, - dentry->d_inode, NULL, 0, 0); - rc = mdc_intent_lock(exp, &op_data, NULL, 0, - /* we are not interested in name - based lookup */ - &oit, 0, &req, - ll_mdc_blocking_ast, 0); + op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode, + dentry->d_inode, NULL, 0, 0, + LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + + oit.it_flags |= O_CHECK_STALE; + rc = md_intent_lock(exp, op_data, NULL, 0, + /* we are not interested in name + based lookup */ + &oit, 0, &req, + ll_md_blocking_ast, 0); + ll_finish_md_op_data(op_data); + oit.it_flags &= ~O_CHECK_STALE; if (rc < 0) { rc = ll_inode_revalidate_fini(inode, rc); GOTO (out, rc); } - - rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, &oit, dentry); + + rc = ll_revalidate_it_finish(req, DLM_REPLY_REC_OFF, &oit, dentry); if (rc != 0) { ll_intent_release(&oit); GOTO(out, rc); @@ -2443,42 +2625,47 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) ll_lookup_finish_locks(&oit, dentry); } else if (!ll_have_md_lock(dentry->d_inode, - MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) { + MDS_INODELOCK_UPDATE)) { struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); - struct ll_fid fid; obd_valid valid = OBD_MD_FLGETATTR; + struct obd_capa *oc; int ealen = 0; if (S_ISREG(inode->i_mode)) { rc = ll_get_max_mdsize(sbi, &ealen); - if (rc) - RETURN(rc); + if (rc) + RETURN(rc); valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; } - ll_inode2fid(&fid, inode); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req); + /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one + * capa for this inode. Because we only keep capas of dirs + * fresh. */ + oc = ll_mdscapa_get(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, + ealen, &req); + capa_put(oc); if (rc) { rc = ll_inode_revalidate_fini(inode, rc); RETURN(rc); } - rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, REPLY_REC_OFF, + rc = ll_prep_inode(&inode, req, REPLY_REC_OFF, NULL); if (rc) GOTO(out, rc); } /* if object not yet allocated, don't validate size */ - if (ll_i2info(inode)->lli_smd == NULL) + if (ll_i2info(inode)->lli_smd == NULL) GOTO(out, rc = 0); /* ll_glimpse_size will prefer locally cached writes if they extend * the file */ rc = ll_glimpse_size(inode, 0); - + EXIT; out: ptlrpc_req_finished(req); - RETURN(rc); + return rc; } #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) @@ -2489,7 +2676,7 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, int res = 0; res = ll_inode_revalidate_it(de, it); - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1); if (res) return res; @@ -2507,7 +2694,7 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, #ifdef HAVE_INODE_BLKSIZE stat->blksize = inode->i_blksize; #else - stat->blksize = 1<<inode->i_blkbits; + stat->blksize = 1 << inode->i_blkbits; #endif ll_inode_size_lock(inode, 0); @@ -2521,7 +2708,6 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) { struct lookup_intent it = { .it_op = IT_GETATTR }; - ll_vfs_ops_tally(ll_i2sbi(de->d_inode), VFS_OPS_GETATTR); return ll_getattr_it(mnt, de, &it, stat); } #endif @@ -2556,8 +2742,10 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) { CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n", inode->i_ino, inode->i_generation, inode, mask); - - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_INODE_PERMISSION); + if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) + return lustre_check_remote_perm(inode, mask); + + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); return generic_permission(inode, mask, lustre_check_acl); } #else @@ -2572,7 +2760,11 @@ int ll_inode_permission(struct inode *inode, int mask) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n", inode->i_ino, inode->i_generation, inode, mask); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_INODE_PERMISSION); + + if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) + return lustre_check_remote_perm(inode, mask); + + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); if ((mask & MAY_WRITE) && IS_RDONLY(inode) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) @@ -2607,11 +2799,12 @@ check_capabilities: if (capable(CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))) return 0; - + return -EACCES; } #endif +/* -o localflock - only provides locally consistent flock locks */ struct file_operations ll_file_operations = { .read = ll_file_read, .write = ll_file_write, @@ -2624,10 +2817,6 @@ struct file_operations ll_file_operations = { .sendfile = ll_file_sendfile, #endif .fsync = ll_fsync, -#ifdef HAVE_F_OP_FLOCK - .flock = ll_file_noflock, -#endif - .lock = ll_file_noflock }; struct file_operations ll_file_operations_flock = { @@ -2648,6 +2837,24 @@ struct file_operations ll_file_operations_flock = { .lock = ll_file_flock }; +/* These are for -o noflock - to return ENOSYS on flock calls */ +struct file_operations ll_file_operations_noflock = { + .read = ll_file_read, + .write = ll_file_write, + .ioctl = ll_file_ioctl, + .open = ll_file_open, + .release = ll_file_release, + .mmap = ll_file_mmap, + .llseek = ll_file_seek, +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + .sendfile = ll_file_sendfile, +#endif + .fsync = ll_fsync, +#ifdef HAVE_F_OP_FLOCK + .flock = ll_file_noflock, +#endif + .lock = ll_file_noflock +}; struct inode_operations ll_file_inode_operations = { #ifdef LUSTRE_KERNEL_VERSION diff --git a/lustre/llite/llite_capa.c b/lustre/llite/llite_capa.c new file mode 100644 index 0000000..536644d --- /dev/null +++ b/lustre/llite/llite_capa.c @@ -0,0 +1,638 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * + * Author: Lai Siyao <lsy@clusterfs.com> + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include <linux/fs.h> +#include <linux/version.h> +#include <asm/uaccess.h> +#include <linux/file.h> +#include <linux/kmod.h> + +#include <lustre_lite.h> +#include "llite_internal.h" + +/* for obd_capa.c_list, client capa might stay in three places: + * 1. ll_capa_list. + * 2. ll_idle_capas. + * 3. stand alone: just allocated. + */ + +/* capas for oss writeback and those failed to renew */ +static LIST_HEAD(ll_idle_capas); +static struct ptlrpc_thread ll_capa_thread; +static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT]; + +/* llite capa renewal timer */ +struct timer_list ll_capa_timer; +/* for debug: indicate whether capa on llite is enabled or not */ +static atomic_t ll_capa_debug = ATOMIC_INIT(0); +static unsigned long long ll_capa_renewed = 0; +static unsigned long long ll_capa_renewal_noent = 0; +static unsigned long long ll_capa_renewal_failed = 0; +static unsigned long long ll_capa_renewal_retries = 0; + +static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry) +{ + if (time_before(expiry, ll_capa_timer.expires) || + !timer_pending(&ll_capa_timer)) { + mod_timer(&ll_capa_timer, expiry); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "ll_capa_timer update: %lu/%lu by", expiry, jiffies); + } +} + +static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa) +{ + return cfs_time_sub(ocapa->c_expiry, + cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2); +} + +static inline int capa_is_to_expire(struct obd_capa *ocapa) +{ + return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current()); +} + +static inline int have_expired_capa(void) +{ + struct obd_capa *ocapa = NULL; + int expired = 0; + + /* if ll_capa_list has client capa to expire or ll_idle_capas has + * expired capa, return 1. + */ + spin_lock(&capa_lock); + if (!list_empty(ll_capa_list)) { + ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list); + expired = capa_is_to_expire(ocapa); + if (!expired) + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } else if (!list_empty(&ll_idle_capas)) { + ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list); + expired = capa_is_expired(ocapa); + if (!expired) + update_capa_timer(ocapa, ocapa->c_expiry); + } + spin_unlock(&capa_lock); + + if (expired) + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired"); + return expired; +} + +static inline int ll_capa_check_stop(void) +{ + return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0; +} + +static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head) +{ + struct obd_capa *tmp; + struct list_head *before = NULL; + + /* TODO: client capa is sorted by expiry, this could be optimized */ + list_for_each_entry_reverse(tmp, head, c_list) { + if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) { + before = &tmp->c_list; + break; + } + } + + LASSERT(&ocapa->c_list != before); + list_add(&ocapa->c_list, before ?: head); +} + +static inline int obd_capa_open_count(struct obd_capa *oc) +{ + struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode); + return atomic_read(&lli->lli_open_count); +} + +static void ll_delete_capa(struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode); + + if (capa_for_mds(&ocapa->c_capa)) { + LASSERT(lli->lli_mds_capa == ocapa); + lli->lli_mds_capa = NULL; + } else if (capa_for_oss(&ocapa->c_capa)) { + list_del_init(&ocapa->u.cli.lli_list); + } + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client"); + list_del(&ocapa->c_list); + capa_count[CAPA_SITE_CLIENT]--; + free_capa(ocapa); +} + +/* three places where client capa is deleted: + * 1. capa_thread_main(), main place to delete expired capa. + * 2. ll_clear_inode_capas() in ll_clear_inode(). + * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate(). + */ +static int capa_thread_main(void *unused) +{ + struct obd_capa *ocapa, *tmp, *next; + struct inode *inode = NULL; + struct l_wait_info lwi = { 0 }; + int rc; + ENTRY; + + cfs_daemonize("ll_capa"); + + ll_capa_thread.t_flags = SVC_RUNNING; + wake_up(&ll_capa_thread.t_ctl_waitq); + + while (1) { + l_wait_event(ll_capa_thread.t_ctl_waitq, + (ll_capa_check_stop() || have_expired_capa()), + &lwi); + + if (ll_capa_check_stop()) + break; + + next = NULL; + + spin_lock(&capa_lock); + list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) { + LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC); + + if (!capa_is_to_expire(ocapa)) { + next = ocapa; + break; + } + + list_del_init(&ocapa->c_list); + + /* for MDS capability, only renew those which belong to + * dir, or its inode is opened, or client holds LOOKUP + * lock. + */ + if (capa_for_mds(&ocapa->c_capa) && + !S_ISDIR(ocapa->u.cli.inode->i_mode) && + obd_capa_open_count(ocapa) == 0 && + !ll_have_md_lock(ocapa->u.cli.inode, + MDS_INODELOCK_LOOKUP)) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "skip renewal for"); + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } + + /* for OSS capability, only renew those whose inode is + * opened. + */ + if (capa_for_oss(&ocapa->c_capa) && + obd_capa_open_count(ocapa) == 0) { + /* oss capa with open count == 0 won't renew, + * move to idle list */ + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } + + /* NB iput() is in ll_update_capa() */ + inode = igrab(ocapa->u.cli.inode); + if (inode == NULL) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "igrab failed for"); + continue; + } + + capa_get(ocapa); + ll_capa_renewed++; + spin_unlock(&capa_lock); + + rc = md_renew_capa(ll_i2mdexp(inode), ocapa, + ll_update_capa); + spin_lock(&capa_lock); + if (rc) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renew failed: %d", rc); + ll_capa_renewal_failed++; + } + } + + if (next) + update_capa_timer(next, capa_renewal_time(next)); + + list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) { + if (!capa_is_expired(ocapa)) { + if (!next) + update_capa_timer(ocapa, ocapa->c_expiry); + break; + } + + if (atomic_read(&ocapa->c_refc)) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "expired(c_refc %d), don't release", + atomic_read(&ocapa->c_refc)); + /* don't try to renew any more */ + list_del_init(&ocapa->c_list); + continue; + } + + /* expired capa is released. */ + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired"); + ll_delete_capa(ocapa); + } + + spin_unlock(&capa_lock); + } + + ll_capa_thread.t_flags = SVC_STOPPED; + wake_up(&ll_capa_thread.t_ctl_waitq); + RETURN(0); +} + +void ll_capa_timer_callback(unsigned long unused) +{ + wake_up(&ll_capa_thread.t_ctl_waitq); +} + +int ll_capa_thread_start(void) +{ + int rc; + ENTRY; + + init_waitqueue_head(&ll_capa_thread.t_ctl_waitq); + + rc = kernel_thread(capa_thread_main, NULL, 0); + if (rc < 0) { + CERROR("cannot start expired capa thread: rc %d\n", rc); + RETURN(rc); + } + wait_event(ll_capa_thread.t_ctl_waitq, + ll_capa_thread.t_flags & SVC_RUNNING); + + RETURN(0); +} + +void ll_capa_thread_stop(void) +{ + ll_capa_thread.t_flags = SVC_STOPPING; + wake_up(&ll_capa_thread.t_ctl_waitq); + wait_event(ll_capa_thread.t_ctl_waitq, + ll_capa_thread.t_flags & SVC_STOPPED); +} + +static struct obd_capa *do_lookup_oss_capa(struct inode *inode, uid_t uid, + int opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + + /* inside capa_lock */ + list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if (uid != capa_uid(&ocapa->c_capa)) + continue; + if ((capa_opc(&ocapa->c_capa) & opc) != opc) + continue; + + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + return ocapa; + } + + return NULL; +} + +/* FIXME: once uid is 0, this is mmaped IO, or fsync, truncate. */ +struct obd_capa *ll_osscapa_get(struct inode *inode, uid_t uid, __u64 opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + int found = 0; + + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0) + return NULL; + ENTRY; + + LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW || + opc == CAPA_OPC_OSS_TRUNC); + + spin_lock(&capa_lock); + list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if (capa_is_expired(ocapa)) + continue; + if (uid != 0 && uid != capa_uid(&ocapa->c_capa)) + continue; + if ((opc & CAPA_OPC_OSS_WRITE) && + capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) { + found = 1; break; + } else if ((opc & CAPA_OPC_OSS_READ) && + capa_opc_supported(&ocapa->c_capa, + CAPA_OPC_OSS_READ)) { + found = 1; break; + } else if ((opc & CAPA_OPC_OSS_TRUNC) && + capa_opc_supported(&ocapa->c_capa, opc)) { + found = 1; break; + } + } + + if (found) { + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + capa_get(ocapa); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + } else { + ocapa = NULL; + + if (atomic_read(&ll_capa_debug)) { + CERROR("no capability for "DFID" opc "LPX64"\n", + PFID(&lli->lli_fid), opc); + atomic_set(&ll_capa_debug, 0); + } + } + spin_unlock(&capa_lock); + + RETURN(ocapa); +} + +struct obd_capa *ll_mdscapa_get(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + ENTRY; + + LASSERT(inode != NULL); + + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0) + RETURN(NULL); + + spin_lock(&capa_lock); + ocapa = capa_get(lli->lli_mds_capa); + spin_unlock(&capa_lock); + if (!ocapa && atomic_read(&ll_capa_debug)) { + CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid)); + atomic_set(&ll_capa_debug, 0); + } + + RETURN(ocapa); +} + +static struct obd_capa *do_add_mds_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *old = lli->lli_mds_capa; + struct lustre_capa *capa = &ocapa->c_capa; + + if (!old) { + ocapa->u.cli.inode = inode; + lli->lli_mds_capa = ocapa; + capa_count[CAPA_SITE_CLIENT]++; + + DEBUG_CAPA(D_SEC, capa, "add MDS"); + } else { + spin_lock(&old->c_lock); + old->c_capa = *capa; + spin_unlock(&old->c_lock); + + DEBUG_CAPA(D_SEC, capa, "update MDS"); + + free_capa(ocapa); + ocapa = old; + } + return ocapa; +} + +static inline void inode_add_oss_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *tmp; + struct list_head *next = NULL; + + /* capa is sorted in lli_oss_capas so lookup can always find the + * latest one */ + list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) { + if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) { + next = &tmp->u.cli.lli_list; + break; + } + } + LASSERT(&ocapa->u.cli.lli_list != next); + list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas); +} + +static struct obd_capa *do_add_oss_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct obd_capa *old; + struct lustre_capa *capa = &ocapa->c_capa; + + LASSERTF(S_ISREG(inode->i_mode), + "inode has oss capa, but not regular file, mode: %d\n", + inode->i_mode); + + /* FIXME: can't replace it so easily with fine-grained opc */ + old = do_lookup_oss_capa(inode, capa_uid(capa), + capa_opc(capa) & CAPA_OPC_OSS_ONLY); + if (!old) { + ocapa->u.cli.inode = inode; + INIT_LIST_HEAD(&ocapa->u.cli.lli_list); + capa_count[CAPA_SITE_CLIENT]++; + + DEBUG_CAPA(D_SEC, capa, "add OSS"); + } else { + spin_lock(&old->c_lock); + old->c_capa = *capa; + spin_unlock(&old->c_lock); + + DEBUG_CAPA(D_SEC, capa, "update OSS"); + + free_capa(ocapa); + ocapa = old; + } + + inode_add_oss_capa(inode, ocapa); + return ocapa; +} + +struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa) +{ + spin_lock(&capa_lock); + ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) : + do_add_oss_capa(inode, ocapa); + + /* truncate capa won't renew */ + if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) { + set_capa_expiry(ocapa); + list_del(&ocapa->c_list); + sort_add_capa(ocapa, ll_capa_list); + + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } + + spin_unlock(&capa_lock); + + atomic_set(&ll_capa_debug, 1); + return ocapa; +} + +static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay) +{ + /* NB: set a fake expiry for this capa to prevent it renew too soon */ + oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay)); +} + +int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa) +{ + struct inode *inode = ocapa->u.cli.inode; + int rc = 0; + ENTRY; + + LASSERT(ocapa); + + if (IS_ERR(capa)) { + /* set error code */ + rc = PTR_ERR(capa); + spin_lock(&capa_lock); + if (rc == -ENOENT) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "renewal canceled because object removed"); + ll_capa_renewal_noent++; + } else { + ll_capa_renewal_failed++; + + /* failed capa won't be renewed any longer, but if -EIO, + * client might be doing recovery, retry in 2 min. */ + if (rc == -EIO && !capa_is_expired(ocapa)) { + delay_capa_renew(ocapa, 120); + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renewal failed: -EIO, retry in 2 mins"); + ll_capa_renewal_retries++; + GOTO(retry, rc); + } else { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renewal failed(rc: %d) for", rc); + } + } + + list_del(&ocapa->c_list); + sort_add_capa(ocapa, &ll_idle_capas); + spin_unlock(&capa_lock); + + capa_put(ocapa); + iput(inode); + return rc; + } + + spin_lock(&ocapa->c_lock); + LASSERT(!memcmp(&ocapa->c_capa, capa, + offsetof(struct lustre_capa, lc_flags))); + ocapa->c_capa = *capa; + set_capa_expiry(ocapa); + spin_unlock(&ocapa->c_lock); + + spin_lock(&capa_lock); + if (capa_for_oss(capa)) + inode_add_oss_capa(inode, ocapa); + DEBUG_CAPA(D_SEC, capa, "renew"); + EXIT; +retry: + list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, ll_capa_list); + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + spin_unlock(&capa_lock); + + capa_put(ocapa); + iput(inode); + return rc; +} + +void ll_capa_open(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + + if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + == 0) + return; + + if (!S_ISREG(inode->i_mode)) + return; + + atomic_inc(&lli->lli_open_count); +} + +void ll_capa_close(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + + if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + == 0) + return; + + if (!S_ISREG(inode->i_mode)) + return; + + atomic_dec(&lli->lli_open_count); +} + +/* delete CAPA_OPC_OSS_TRUNC only */ +void ll_truncate_free_capa(struct obd_capa *ocapa) +{ + if (!ocapa) + return; + + LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate"); + + capa_put(ocapa); + spin_lock(&capa_lock); + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); +} + +void ll_clear_inode_capas(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa, *tmp; + + spin_lock(&capa_lock); + ocapa = lli->lli_mds_capa; + if (ocapa) + ll_delete_capa(ocapa); + + list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas, + u.cli.lli_list) + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); +} + +void ll_print_capa_stat(struct ll_sb_info *sbi) +{ + if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + LCONSOLE_INFO("Fid capabilities renewed: %llu\n" + "Fid capabilities renewal ENOENT: %llu\n" + "Fid capabilities failed to renew: %llu\n" + "Fid capabilities renewal retries: %llu\n", + ll_capa_renewed, ll_capa_renewal_noent, + ll_capa_renewal_failed, ll_capa_renewal_retries); +} diff --git a/lustre/llite/llite_close.c b/lustre/llite/llite_close.c index f310bf1..ffddd6e 100644 --- a/lustre/llite/llite_close.c +++ b/lustre/llite/llite_close.c @@ -25,6 +25,7 @@ #define DEBUG_SUBSYSTEM S_LLITE +//#include <lustre_mdc.h> #include <lustre_lite.h> #include "llite_internal.h" @@ -32,144 +33,246 @@ void llap_write_pending(struct inode *inode, struct ll_async_page *llap) { struct ll_inode_info *lli = ll_i2info(inode); + + ENTRY; spin_lock(&lli->lli_lock); - list_add(&llap->llap_pending_write, &lli->lli_pending_write_llaps); + lli->lli_flags |= LLIF_SOM_DIRTY; + if (llap && list_empty(&llap->llap_pending_write)) + list_add(&llap->llap_pending_write, + &lli->lli_pending_write_llaps); spin_unlock(&lli->lli_lock); + EXIT; } /* record that a write has completed */ -void llap_write_complete(struct inode *inode, struct ll_async_page *llap) -{ - struct ll_inode_info *lli = ll_i2info(inode); - spin_lock(&lli->lli_lock); - list_del_init(&llap->llap_pending_write); - spin_unlock(&lli->lli_lock); -} - -void ll_open_complete(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - spin_lock(&lli->lli_lock); - lli->lli_send_done_writing = 0; - spin_unlock(&lli->lli_lock); -} - -/* if we close with writes in flight then we want the completion or cancelation - * of those writes to send a DONE_WRITING rpc to the MDS */ -int ll_is_inode_dirty(struct inode *inode) +int llap_write_complete(struct inode *inode, struct ll_async_page *llap) { struct ll_inode_info *lli = ll_i2info(inode); int rc = 0; + ENTRY; - spin_lock(&lli->lli_lock); - if (!list_empty(&lli->lli_pending_write_llaps)) + if (llap && !list_empty(&llap->llap_pending_write)) { + list_del_init(&llap->llap_pending_write); rc = 1; + } spin_unlock(&lli->lli_lock); RETURN(rc); } -void ll_try_done_writing(struct inode *inode) +/* Queue DONE_WRITING if + * - done writing is allowed; + * - inode has no no dirty pages; */ +void ll_queue_done_writing(struct inode *inode, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); - struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; spin_lock(&lli->lli_lock); + lli->lli_flags |= flags; - if (lli->lli_send_done_writing && + if ((lli->lli_flags & LLIF_DONE_WRITING) && list_empty(&lli->lli_pending_write_llaps)) { - + struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; + + if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) + CWARN("ino %lu/%u(flags %lu) som valid it just after " + "recovery\n", + inode->i_ino, inode->i_generation, + lli->lli_flags); + /* DONE_WRITING is allowed and inode has no dirty page. */ spin_lock(&lcq->lcq_lock); - if (list_empty(&lli->lli_close_item)) { - CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", - inode->i_ino, inode->i_generation); - igrab(inode); - list_add_tail(&lli->lli_close_item, &lcq->lcq_list); - wake_up(&lcq->lcq_waitq); - } + + LASSERT(list_empty(&lli->lli_close_list)); + CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", + inode->i_ino, inode->i_generation); + list_add_tail(&lli->lli_close_list, &lcq->lcq_head); + + /* Avoid a concurrent insertion into the close thread queue: + * an inode is already in the close thread, open(), write(), + * close() happen, epoch is closed as the inode is marked as + * LLIF_EPOCH_PENDING. When pages are written inode should not + * be inserted into the queue again, clear this flag to avoid + * it. */ + lli->lli_flags &= ~LLIF_DONE_WRITING; + + wake_up(&lcq->lcq_waitq); spin_unlock(&lcq->lcq_lock); } - spin_unlock(&lli->lli_lock); } -/* The MDS needs us to get the real file attributes, then send a DONE_WRITING */ -void ll_queue_done_writing(struct inode *inode) +/* Close epoch and send Size-on-MDS attribute update if possible. + * Call this under @lli->lli_lock spinlock. */ +void ll_epoch_close(struct inode *inode, struct md_op_data *op_data, + struct obd_client_handle **och, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); ENTRY; spin_lock(&lli->lli_lock); - lli->lli_send_done_writing = 1; - spin_unlock(&lli->lli_lock); + if (!(list_empty(&lli->lli_pending_write_llaps))) { + if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { + LASSERT(*och != NULL); + LASSERT(lli->lli_pending_och == NULL); + /* Inode is dirty and there is no pending write done + * request yet, DONE_WRITE is to be sent later. */ + lli->lli_flags |= LLIF_EPOCH_PENDING; + lli->lli_pending_och = *och; + spin_unlock(&lli->lli_lock); + + inode = igrab(inode); + LASSERT(inode); + GOTO(out, 0); + } + if (flags & LLIF_DONE_WRITING) { + /* Some pages are still dirty, it is early to send + * DONE_WRITE. Wait untill all pages will be flushed + * and try DONE_WRITE again later. */ + LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); + lli->lli_flags |= LLIF_DONE_WRITING; + spin_unlock(&lli->lli_lock); + + inode = igrab(inode); + LASSERT(inode); + GOTO(out, 0); + } + } + CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID"\n", + ll_i2info(inode)->lli_ioepoch, PFID(&lli->lli_fid)); + op_data->op_flags |= MF_EPOCH_CLOSE; + + if (flags & LLIF_DONE_WRITING) { + LASSERT(lli->lli_flags & LLIF_SOM_DIRTY); + LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); + *och = lli->lli_pending_och; + lli->lli_pending_och = NULL; + lli->lli_flags &= ~LLIF_EPOCH_PENDING; + } else { + /* Pack Size-on-MDS inode attributes only if they has changed */ + if (!(lli->lli_flags & LLIF_SOM_DIRTY)) { + spin_unlock(&lli->lli_lock); + GOTO(out, 0); + } + + /* There is a pending DONE_WRITE -- close epoch with no + * attribute change. */ + if (lli->lli_flags & LLIF_EPOCH_PENDING) { + spin_unlock(&lli->lli_lock); + GOTO(out, 0); + } + } - ll_try_done_writing(inode); + LASSERT(list_empty(&lli->lli_pending_write_llaps)); + lli->lli_flags &= ~LLIF_SOM_DIRTY; + spin_unlock(&lli->lli_lock); + op_data->op_flags |= MF_SOM_CHANGE; + + /* Check if Size-on-MDS attributes are valid. */ + if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) + CWARN("ino %lu/%u(flags %lu) som valid it just after " + "recovery\n", + inode->i_ino, inode->i_generation, lli->lli_flags); + + if (!ll_local_size(inode)) { + /* Send Size-on-MDS Attributes if valid. Atime is sent along + * with all the attributes. */ + op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET | + ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS; + } EXIT; +out: + return; } -#if 0 -/* If we know the file size and have the cookies: - * - send a DONE_WRITING rpc - * - * Otherwise: - * - get a whole-file lock - * - get the authoritative size and all cookies with GETATTRs - * - send a DONE_WRITING rpc - */ -static void ll_close_done_writing(struct inode *inode) +int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, + __u64 ioepoch) { struct ll_inode_info *lli = ll_i2info(inode); - ldlm_policy_data_t policy = { .l_extent = {0, OBD_OBJECT_EOF } }; - struct lustre_handle lockh = { 0 }; - struct obdo obdo; - obd_flag valid; - int rc, ast_flags = 0; + struct md_op_data *op_data; + struct obdo *oa; + int rc; ENTRY; - - memset(&obdo, 0, sizeof(obdo)); - if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags)) - goto rpc; - - rc = ll_extent_lock(NULL, inode, lli->lli_smd, LCK_PW, &policy, &lockh, - ast_flags); - if (rc != 0) { - CERROR("lock acquisition failed (%d): unable to send " - "DONE_WRITING for inode %lu/%u\n", rc, inode->i_ino, - inode->i_generation); - GOTO(out, rc); + + /* LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); */ + /* After recovery that can be valid. */ + if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) + CWARN("ino %lu/%u(flags %lu) som valid it just after " + "recovery\n", inode->i_ino, inode->i_generation, + lli->lli_flags); + + OBDO_ALLOC(oa); + OBD_ALLOC_PTR(op_data); + if (!oa || !op_data) { + CERROR("can't allocate memory for Size-on-MDS update.\n"); + RETURN(-ENOMEM); } - - rc = ll_lsm_getattr(ll_i2obdexp(inode), lli->lli_smd, &obdo); + rc = ll_inode_getattr(inode, oa); if (rc) { - CERROR("inode_getattr failed (%d): unable to send DONE_WRITING " - "for inode %lu/%u\n", rc, inode->i_ino, - inode->i_generation); - ll_extent_unlock(NULL, inode, lli->lli_smd, LCK_PW, &lockh); + CERROR("inode_getattr failed (%d): unable to send a " + "Size-on-MDS attribute update for inode %lu/%u\n", + rc, inode->i_ino, inode->i_generation); GOTO(out, rc); } + CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", PFID(&lli->lli_fid)); + + md_from_obdo(op_data, oa, oa->o_valid); + memcpy(&op_data->op_handle, fh, sizeof(*fh)); + + op_data->op_ioepoch = ioepoch; + op_data->op_flags |= MF_SOM_CHANGE; + + rc = ll_md_setattr(inode, op_data); + EXIT; +out: + if (oa) + OBDO_FREE(oa); + if (op_data) + ll_finish_md_op_data(op_data); + return rc; +} - obdo_refresh_inode(inode, &obdo, valid); - - CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %lu, blksize %lu\n", - lli->lli_smd->lsm_object_id, inode->i_size, inode->i_blocks, - 1<<inode->i_blkbits); - - set_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags); - - rc = ll_extent_unlock(NULL, inode, lli->lli_smd, LCK_PW, &lockh); - if (rc != ELDLM_OK) - CERROR("unlock failed (%d)? proceeding anyways...\n", rc); +/* Send a DONE_WRITING rpc, pack Size-on-MDS attributes into it, if possible */ +static void ll_done_writing(struct inode *inode) +{ + struct obd_client_handle *och = NULL; + struct md_op_data *op_data; + int rc; + ENTRY; - rpc: - obdo.o_id = inode->i_ino; - obdo.o_size = inode->i_size; - obdo.o_blocks = inode->i_blocks; - obdo.o_valid = OBD_MD_FLID | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + LASSERT(ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM); + + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) { + CERROR("can't allocate op_data\n"); + EXIT; + return; + } - rc = mdc_done_writing(ll_i2sbi(inode)->ll_mdc_exp, &obdo); - out: + ll_epoch_close(inode, op_data, &och, LLIF_DONE_WRITING); + /* If there is no @och, we do not do D_W yet. */ + if (och == NULL) + GOTO(out, 0); + + ll_pack_inode2opdata(inode, op_data, &och->och_fh); + + rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och); + if (rc == -EAGAIN) { + /* MDS has instructed us to obtain Size-on-MDS attribute from + * OSTs and send setattr to back to MDS. */ + rc = ll_sizeonmds_update(inode, &och->och_fh, + op_data->op_ioepoch); + } else if (rc) { + CERROR("inode %lu mdc done_writing failed: rc = %d\n", + inode->i_ino, rc); + } +out: + ll_finish_md_op_data(op_data); + if (och) { + md_clear_open_replay_data(ll_i2sbi(inode)->ll_md_exp, och); + OBD_FREE_PTR(och); + } + EXIT; } -#endif static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) { @@ -177,13 +280,12 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) spin_lock(&lcq->lcq_lock); - if (lcq->lcq_list.next == NULL) - lli = ERR_PTR(-1); - else if (!list_empty(&lcq->lcq_list)) { - lli = list_entry(lcq->lcq_list.next, struct ll_inode_info, - lli_close_item); - list_del(&lli->lli_close_item); - } + if (!list_empty(&lcq->lcq_head)) { + lli = list_entry(lcq->lcq_head.next, struct ll_inode_info, + lli_close_list); + list_del_init(&lli->lli_close_list); + } else if (atomic_read(&lcq->lcq_stop)) + lli = ERR_PTR(-EALREADY); spin_unlock(&lcq->lcq_lock); return lli; @@ -205,7 +307,7 @@ static int ll_close_thread(void *arg) while (1) { struct l_wait_info lwi = { 0 }; struct ll_inode_info *lli; - //struct inode *inode; + struct inode *inode; l_wait_event_exclusive(lcq->lcq_waitq, (lli = ll_close_next_lli(lcq)) != NULL, @@ -213,11 +315,14 @@ static int ll_close_thread(void *arg) if (IS_ERR(lli)) break; - //inode = ll_info2i(lli); - //ll_close_done_writing(inode); - //iput(inode); + inode = ll_info2i(lli); + CDEBUG(D_INFO, "done_writting for inode %lu/%u\n", + inode->i_ino, inode->i_generation); + ll_done_writing(inode); + iput(inode); } + CDEBUG(D_INFO, "ll_close exiting\n"); complete(&lcq->lcq_comp); RETURN(0); } @@ -232,7 +337,7 @@ int ll_close_thread_start(struct ll_close_queue **lcq_ret) return -ENOMEM; spin_lock_init(&lcq->lcq_lock); - INIT_LIST_HEAD(&lcq->lcq_list); + INIT_LIST_HEAD(&lcq->lcq_head); init_waitqueue_head(&lcq->lcq_waitq); init_completion(&lcq->lcq_comp); @@ -250,8 +355,10 @@ int ll_close_thread_start(struct ll_close_queue **lcq_ret) void ll_close_thread_shutdown(struct ll_close_queue *lcq) { init_completion(&lcq->lcq_comp); - lcq->lcq_list.next = NULL; + atomic_inc(&lcq->lcq_stop); wake_up(&lcq->lcq_waitq); wait_for_completion(&lcq->lcq_comp); OBD_FREE(lcq, sizeof(*lcq)); } + + diff --git a/lustre/llite/llite_fid.c b/lustre/llite/llite_fid.c new file mode 100644 index 0000000..85ad4f3 --- /dev/null +++ b/lustre/llite/llite_fid.c @@ -0,0 +1,52 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/random.h> +#include <linux/version.h> + +#include <lustre_fid.h> +#include <lustre_lite.h> +#include <lustre_ha.h> +#include <lustre_ver.h> +#include <lustre_dlm.h> +#include <lustre_disk.h> +#include "llite_internal.h" + +/* Build inode number on passed @fid */ +ino_t ll_fid_build_ino(struct ll_sb_info *sbi, + struct lu_fid *fid) +{ + ino_t ino; + ENTRY; + + /* + * Very stupid and having many downsides inode allocation algorithm + * based on fid. + */ + ino = fid_flatten(fid); + RETURN(ino & 0x7fffffff); +} diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 0d3e52b..9b1f5bd 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -5,6 +5,8 @@ #ifndef LLITE_INTERNAL_H #define LLITE_INTERNAL_H +# include <linux/lustre_acl.h> + #ifdef CONFIG_FS_POSIX_ACL # include <linux/fs.h> #ifdef HAVE_XATTR_ACL @@ -17,21 +19,7 @@ #include <lustre_debug.h> #include <lustre_ver.h> -#include <linux/lustre_version.h> #include <lustre_disk.h> /* for s2sbi */ - -/* -struct lustre_intent_data { - __u64 it_lock_handle[2]; - __u32 it_disposition; - __u32 it_status; - __u32 it_lock_mode; - }; */ - -/* If there is no FMODE_EXEC defined, make it to match nothing */ -#ifndef FMODE_EXEC -#define FMODE_EXEC 0 -#endif #define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0") #define LUSTRE_FPRIVATE(file) ((file)->private_data) @@ -63,34 +51,64 @@ extern struct file_operations ll_pgcache_seq_fops; #define LLI_INODE_MAGIC 0x111d0de5 #define LLI_INODE_DEAD 0xdeadd00d -#define LLI_F_HAVE_OST_SIZE_LOCK 0 -#define LLI_F_HAVE_MDS_SIZE_LOCK 1 + +/* remote client permission cache */ +#define REMOTE_PERM_HASHSIZE 16 + +/* llite setxid/access permission for user on remote client */ +struct ll_remote_perm { + struct hlist_node lrp_list; + uid_t lrp_uid; + gid_t lrp_gid; + uid_t lrp_fsuid; + gid_t lrp_fsgid; + int lrp_access_perm; /* MAY_READ/WRITE/EXEC, this + is access permission with + lrp_fsuid/lrp_fsgid. */ +}; + +enum lli_flags { + /* MDS has an authority for the Size-on-MDS attributes. */ + LLIF_MDS_SIZE_LOCK = (1 << 0), + /* Epoch close is postponed. */ + LLIF_EPOCH_PENDING = (1 << 1), + /* DONE WRITING is allowed. */ + LLIF_DONE_WRITING = (1 << 2), + /* Sizeon-on-MDS attributes are changed. An attribute update needs to + * be sent to MDS. */ + LLIF_SOM_DIRTY = (1 << 3), +}; struct ll_inode_info { int lli_inode_magic; - struct semaphore lli_size_sem; + struct semaphore lli_size_sem; /* protect open and change size */ void *lli_size_sem_owner; - struct semaphore lli_open_sem; struct semaphore lli_write_sem; - struct lov_stripe_md *lli_smd; char *lli_symlink_name; __u64 lli_maxbytes; - __u64 lli_io_epoch; + __u64 lli_ioepoch; unsigned long lli_flags; - /* this lock protects s_d_w and p_w_ll and mmap_cnt */ + /* this lock protects posix_acl, pending_write_llaps, mmap_cnt */ spinlock_t lli_lock; struct list_head lli_pending_write_llaps; - int lli_send_done_writing; + struct list_head lli_close_list; + /* handle is to be sent to MDS later on done_writing and setattr. + * Open handle data are needed for the recovery to reconstruct + * the inode state on the MDS. XXX: recovery is not ready yet. */ + struct obd_client_handle *lli_pending_och; atomic_t lli_mmap_cnt; - struct list_head lli_close_item; - /* for writepage() only to communicate to fsync */ int lli_async_rc; struct posix_acl *lli_posix_acl; + /* remote permission hash */ + struct hlist_head *lli_remote_perms; + unsigned long lli_rmtperm_utime; + struct semaphore lli_rmtperm_sem; + struct list_head lli_dead_list; struct semaphore lli_och_sem; /* Protects access to och pointers @@ -107,6 +125,17 @@ struct ll_inode_info { #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) struct inode lli_vfs_inode; #endif + + /* identifying fields for both metadata and data stacks. */ + struct lu_fid lli_fid; + struct lov_stripe_md *lli_smd; + + /* fid capability */ + /* open count currently used by capability only, indicate whether + * capability needs renewal */ + atomic_t lli_open_count; + struct obd_capa *lli_mds_capa; + struct list_head lli_oss_capas; }; /* @@ -127,6 +156,7 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode) #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) return container_of(inode, struct ll_inode_info, lli_vfs_inode); #else + CLASSERT(sizeof(inode->u) >= sizeof(struct ll_inode_info)); return (struct ll_inode_info *)&(inode->u.generic_ip); #endif } @@ -189,53 +219,38 @@ struct ll_rw_process_info { struct file *rw_last_file; }; -enum vfs_ops_list { - VFS_OPS_READ = 0, - VFS_OPS_WRITE, - VFS_OPS_IOCTL, - VFS_OPS_OPEN, - VFS_OPS_RELEASE, - VFS_OPS_MMAP, - VFS_OPS_SEEK, - VFS_OPS_FSYNC, - VFS_OPS_FLOCK, - VFS_OPS_SETATTR, - VFS_OPS_GETATTR, - VFS_OPS_SETXATTR, - VFS_OPS_GETXATTR, - VFS_OPS_LISTXATTR, - VFS_OPS_REMOVEXATTR, - VFS_OPS_TRUNCATE, - VFS_OPS_INODE_PERMISSION, - VFS_OPS_LAST, -}; - -enum vfs_track_type { - VFS_TRACK_ALL = 0, /* track all processes */ - VFS_TRACK_PID, /* track process with this pid */ - VFS_TRACK_PPID, /* track processes with this ppid */ - VFS_TRACK_GID, /* track processes with this gid */ - VFS_TRACK_LAST, +enum stats_track_type { + STATS_TRACK_ALL = 0, /* track all processes */ + STATS_TRACK_PID, /* track process with this pid */ + STATS_TRACK_PPID, /* track processes with this ppid */ + STATS_TRACK_GID, /* track processes with this gid */ + STATS_TRACK_LAST, }; /* flags for sbi->ll_flags */ -#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */ -#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */ -#define LL_SBI_FLOCK 0x04 -#define LL_SBI_USER_XATTR 0x08 /* support user xattr */ -#define LL_SBI_ACL 0x10 /* support ACL */ -#define LL_SBI_JOIN 0x20 /* support JOIN */ +#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */ +#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */ +#define LL_SBI_FLOCK 0x04 +#define LL_SBI_USER_XATTR 0x08 /* support user xattr */ +#define LL_SBI_ACL 0x10 /* support ACL */ +#define LL_SBI_JOIN 0x20 /* support JOIN */ +#define LL_SBI_RMT_CLIENT 0x40 /* remote client */ +#define LL_SBI_MDS_CAPA 0x80 /* support mds capa */ +#define LL_SBI_OSS_CAPA 0x100 /* support oss capa */ +#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */ struct ll_sb_info { struct list_head ll_list; /* this protects pglist and ra_info. It isn't safe to * grab from interrupt contexts */ spinlock_t ll_lock; + spinlock_t ll_pp_extent_lock; /* Lock for pp_extent entries */ + spinlock_t ll_process_lock; /* Lock for ll_rw_process_info */ struct obd_uuid ll_sb_uuid; - struct obd_export *ll_mdc_exp; - struct obd_export *ll_osc_exp; - struct proc_dir_entry *ll_proc_root; - obd_id ll_rootino; /* number of root inode */ + struct obd_export *ll_md_exp; + struct obd_export *ll_dt_exp; + struct proc_dir_entry* ll_proc_root; + struct lu_fid ll_root_fid; /* root object fid */ int ll_flags; struct list_head ll_conn_chain; /* per-conn chain of SBs */ @@ -270,9 +285,9 @@ struct ll_sb_info { unsigned int ll_offset_process_count; struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX]; unsigned int ll_rw_offset_entry_count; - struct lprocfs_stats *ll_vfs_ops_stats; - enum vfs_track_type ll_vfs_track_type; - int ll_vfs_track_id; + enum stats_track_type ll_stats_track_type; + int ll_stats_track_id; + int ll_rw_stats_on; }; #define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024) @@ -310,7 +325,7 @@ struct ll_readahead_state { unsigned long ras_consecutive_pages; /* * number of read requests after the last read-ahead window reset - * As window is reset on each seek, this is effectively the number + * As window is reset on each seek, this is effectively the number * on consecutive read request and is used to trigger read-ahead. */ unsigned long ras_consecutive_requests; @@ -337,7 +352,7 @@ struct ll_readahead_state { */ unsigned long ras_requests; /* - * Page index with respect to the current request, these value + * Page index with respect to the current request, these value * will not be accurate when dealing with reads issued via mmap. */ unsigned long ras_request_index; @@ -349,13 +364,17 @@ struct ll_readahead_state { struct list_head ras_read_beads; }; -extern kmem_cache_t *ll_file_data_slab; +struct ll_file_dir { +}; + +extern cfs_mem_cache_t *ll_file_data_slab; struct lustre_handle; struct ll_file_data { struct ll_readahead_state fd_ras; int fd_omode; struct lustre_handle fd_cwlockh; unsigned long fd_gid; + struct ll_file_dir fd_dir; __u32 fd_flags; }; @@ -384,7 +403,7 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1,struct inode *i2); #define LLAP_MAGIC 98764321 -extern kmem_cache_t *ll_async_page_slab; +extern cfs_mem_cache_t *ll_async_page_slab; extern size_t ll_async_page_slab_size; struct ll_async_page { int llap_magic; @@ -399,7 +418,9 @@ struct ll_async_page { struct list_head llap_pending_write; struct list_head llap_pglist_item; /* checksum for paranoid I/O debugging */ - __u32 llap_checksum; + __u32 llap_checksum; + /* uid who operate on this page, used to lookup fid capability only */ + uid_t llap_fsuid; }; /* @@ -434,12 +455,12 @@ struct ll_ra_read *ll_ra_read_get(struct file *f); int lprocfs_register_mountpoint(struct proc_dir_entry *parent, struct super_block *sb, char *osc, char *mdc); void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi); -void ll_vfs_ops_tally(struct ll_sb_info *sbi, int op); +void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count); #else static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent, struct super_block *sb, char *osc, char *mdc){return 0;} static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {} -static void ll_vfs_ops_tally(struct ll_sb_info *sbi, int op) {} +static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {} #endif @@ -452,13 +473,10 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir); struct inode *ll_iget(struct super_block *sb, ino_t hash, struct lustre_md *lic); struct dentry *ll_find_alias(struct inode *, struct dentry *); -int ll_mdc_cancel_unused(struct lustre_handle *, struct inode *, int flags, - void *opaque); -int ll_mdc_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, - void *data, int flag); -int ll_prepare_mdc_op_data(struct mdc_op_data *, - struct inode *i1, struct inode *i2, - const char *name, int namelen, int mode); +int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, + void *data, int flag); +int ll_md_cancel_unused(struct lustre_handle *, struct inode *, int flags, + void *opaque); #ifndef LUSTRE_KERNEL_VERSION struct lookup_intent *ll_convert_intent(struct open_intent *oit, int lookup_flags); @@ -471,10 +489,10 @@ int ll_writepage(struct page *page); void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa); int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc); int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction); +struct ll_async_page *llap_from_page(struct page *page, unsigned origin); extern struct cache_definition ll_cache_definition; void ll_removepage(struct page *page); int ll_readpage(struct file *file, struct page *page); -struct ll_async_page *llap_from_cookie(void *cookie); struct ll_async_page *llap_cast_private(struct page *page); void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); void ll_ra_accounting(struct ll_async_page *llap,struct address_space *mapping); @@ -484,6 +502,7 @@ int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t); /* llite/file.c */ extern struct file_operations ll_file_operations; extern struct file_operations ll_file_operations_flock; +extern struct file_operations ll_file_operations_noflock; extern struct inode_operations ll_file_inode_operations; extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *); extern int ll_have_md_lock(struct inode *inode, __u64 bits); @@ -495,16 +514,25 @@ int ll_extent_unlock(struct ll_file_data *, struct inode *, int ll_file_open(struct inode *inode, struct file *file); int ll_file_release(struct inode *inode, struct file *file); int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *); -int ll_glimpse_ioctl(struct ll_sb_info *sbi, +int ll_local_size(struct inode *inode); +int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, lstat_t *st); int ll_glimpse_size(struct inode *inode, int ast_flags); int ll_local_open(struct file *file, struct lookup_intent *it, struct ll_file_data *fd, struct obd_client_handle *och); int ll_release_openhandle(struct dentry *, struct lookup_intent *); -int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, - struct file *file); -int ll_mdc_real_close(struct inode *inode, int flags); +int ll_md_close(struct obd_export *md_exp, struct inode *inode, + struct file *file); +int ll_md_real_close(struct inode *inode, int flags); +void ll_epoch_close(struct inode *inode, struct md_op_data *op_data, + struct obd_client_handle **och, unsigned long flags); +int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, + __u64 ioepoch); +int ll_inode_getattr(struct inode *inode, struct obdo *obdo); +int ll_md_setattr(struct inode *inode, struct md_op_data *op_data); +void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, + struct lustre_handle *fh); extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file *file, size_t count, int rw); #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) @@ -531,14 +559,15 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm, /* llite/dcache.c */ void ll_intent_drop_lock(struct lookup_intent *); void ll_intent_release(struct lookup_intent *); +int ll_drop_dentry(struct dentry *dentry); extern void ll_set_dd(struct dentry *de); int ll_drop_dentry(struct dentry *dentry); void ll_unhash_aliases(struct inode *); void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft); void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry); int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name); -int revalidate_it_finish(struct ptlrpc_request *request, int offset, - struct lookup_intent *it, struct dentry *de); +int ll_revalidate_it_finish(struct ptlrpc_request *request, int offset, + struct lookup_intent *it, struct dentry *de); /* llite/llite_lib.c */ extern struct super_operations lustre_super_operations; @@ -560,16 +589,18 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, __u64 max_age); void ll_update_inode(struct inode *inode, struct lustre_md *md); void ll_read_inode2(struct inode *inode, void *opaque); +void ll_delete_inode(struct inode *inode); int ll_iocontrol(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); +int ll_flush_ctx(struct inode *inode); #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT void ll_umount_begin(struct vfsmount *vfsmnt, int flags); #else void ll_umount_begin(struct super_block *sb); #endif int ll_remount_fs(struct super_block *sb, int *flags, char *data); -int ll_prep_inode(struct obd_export *exp, struct inode **inode, - struct ptlrpc_request *req, int offset, struct super_block *); +int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, + int offset, struct super_block *); void lustre_dump_dentry(struct dentry *, int recur); void lustre_dump_inode(struct inode *); struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, @@ -577,6 +608,13 @@ struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, int ll_obd_statfs(struct inode *inode, void *arg); int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); int ll_process_config(struct lustre_cfg *lcfg); +int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc); +int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc); +struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, + struct inode *i1, struct inode *i2, + const char *name, int namelen, + int mode, __u32 opc); +void ll_finish_md_op_data(struct md_op_data *op_data); /* llite/llite_nfs.c */ extern struct export_operations lustre_export_operations; @@ -600,17 +638,16 @@ extern struct inode_operations ll_fast_symlink_inode_operations; /* llite/llite_close.c */ struct ll_close_queue { spinlock_t lcq_lock; - struct list_head lcq_list; + struct list_head lcq_head; wait_queue_head_t lcq_waitq; struct completion lcq_comp; + atomic_t lcq_stop; }; void llap_write_pending(struct inode *inode, struct ll_async_page *llap); -void llap_write_complete(struct inode *inode, struct ll_async_page *llap); -void ll_open_complete(struct inode *inode); -int ll_is_inode_dirty(struct inode *inode); -void ll_try_done_writing(struct inode *inode); -void ll_queue_done_writing(struct inode *inode); +int llap_write_complete(struct inode *inode, struct ll_async_page *llap); +int ll_inode_dirty(struct inode *inode, unsigned long flags); +void ll_queue_done_writing(struct inode *inode, unsigned long flags); void ll_close_thread_shutdown(struct ll_close_queue *lcq); int ll_close_thread_start(struct ll_close_queue **lcq_ret); @@ -653,20 +690,20 @@ static inline __u64 ll_ts2u64(time_t *time) #endif /* don't need an addref as the sb_info should be holding one */ -static inline struct obd_export *ll_s2obdexp(struct super_block *sb) +static inline struct obd_export *ll_s2dtexp(struct super_block *sb) { - return ll_s2sbi(sb)->ll_osc_exp; + return ll_s2sbi(sb)->ll_dt_exp; } /* don't need an addref as the sb_info should be holding one */ -static inline struct obd_export *ll_s2mdcexp(struct super_block *sb) +static inline struct obd_export *ll_s2mdexp(struct super_block *sb) { - return ll_s2sbi(sb)->ll_mdc_exp; + return ll_s2sbi(sb)->ll_md_exp; } static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi) { - struct obd_device *obd = sbi->ll_mdc_exp->exp_obd; + struct obd_device *obd = sbi->ll_md_exp->exp_obd; if (obd == NULL) LBUG(); return &obd->u.cli; @@ -678,20 +715,23 @@ static inline struct ll_sb_info *ll_i2sbi(struct inode *inode) return ll_s2sbi(inode->i_sb); } -static inline struct obd_export *ll_i2obdexp(struct inode *inode) +static inline struct obd_export *ll_i2dtexp(struct inode *inode) { - return ll_s2obdexp(inode->i_sb); + return ll_s2dtexp(inode->i_sb); } -static inline struct obd_export *ll_i2mdcexp(struct inode *inode) +static inline struct obd_export *ll_i2mdexp(struct inode *inode) { - return ll_s2mdcexp(inode->i_sb); + return ll_s2mdexp(inode->i_sb); } -static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode) +static inline struct lu_fid *ll_inode2fid(struct inode *inode) { - mdc_pack_fid(fid, inode->i_ino, inode->i_generation, - inode->i_mode & S_IFMT); + struct lu_fid *fid; + LASSERT(inode != NULL); + fid = &ll_i2info(inode)->lli_fid; + LASSERT(fid_is_igif(fid) || fid_ver(fid) == 0); + return fid; } static inline int ll_mds_max_easize(struct super_block *sb) @@ -712,4 +752,36 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name, ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_removexattr(struct dentry *dentry, const char *name); +/* llite/remote_perm.c */ +extern cfs_mem_cache_t *ll_remote_perm_cachep; +extern cfs_mem_cache_t *ll_rmtperm_hash_cachep; + +struct hlist_head *alloc_rmtperm_hash(void); +void free_rmtperm_hash(struct hlist_head *hash); +int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm); +int lustre_check_remote_perm(struct inode *inode, int mask); + +/* llite/llite_fid.c */ +ino_t ll_fid_build_ino(struct ll_sb_info *sbi, struct lu_fid *fid); + +/* llite/llite_capa.c */ +extern cfs_timer_t ll_capa_timer; + +int ll_capa_thread_start(void); +void ll_capa_thread_stop(void); +void ll_capa_timer_callback(unsigned long unused); + +struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa); +int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa); + +void ll_capa_open(struct inode *inode); +void ll_capa_close(struct inode *inode); + +struct obd_capa *ll_mdscapa_get(struct inode *inode); +struct obd_capa *ll_osscapa_get(struct inode *inode, uid_t fsuid, __u64 opc); + +void ll_truncate_free_capa(struct obd_capa *ocapa); +void ll_clear_inode_capas(struct inode *inode); +void ll_print_capa_stat(struct ll_sb_info *sbi); + #endif /* LLITE_INTERNAL_H */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 872fafd..cf4b503 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -34,9 +34,10 @@ #include <lprocfs_status.h> #include <lustre_disk.h> #include <lustre_param.h> +#include <lustre_log.h> #include "llite_internal.h" -kmem_cache_t *ll_file_data_slab; +cfs_mem_cache_t *ll_file_data_slab; LIST_HEAD(ll_super_blocks); spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED; @@ -62,6 +63,9 @@ static struct ll_sb_info *ll_init_sbi(void) spin_lock_init(&sbi->ll_lock); spin_lock_init(&sbi->ll_lco.lco_lock); + spin_lock_init(&sbi->ll_pp_extent_lock); + spin_lock_init(&sbi->ll_process_lock); + sbi->ll_rw_stats_on = 0; INIT_LIST_HEAD(&sbi->ll_pglist); if (num_physpages >> (20 - CFS_PAGE_SHIFT) < 512) sbi->ll_async_page_max = num_physpages / 2; @@ -87,7 +91,7 @@ static struct ll_sb_info *ll_init_sbi(void) INIT_LIST_HEAD(&sbi->ll_deathrow); spin_lock_init(&sbi->ll_deathrow_lock); #endif - for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { + for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock); spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock); } @@ -115,35 +119,71 @@ static struct dentry_operations ll_d_root_ops = { #endif }; -static int client_common_fill_super(struct super_block *sb, - char *mdc, char *osc) +/* Initialize the default and maximum LOV EA and cookie sizes. This allows + * us to make MDS RPCs with large enough reply buffers to hold the + * maximum-sized (= maximum striped) EA and cookie without having to + * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */ +static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) +{ + struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC }; + __u32 valsize = sizeof(struct lov_desc); + int rc, easize, def_easize, cookiesize; + struct lov_desc desc; + __u32 stripes; + ENTRY; + + rc = obd_get_info(dt_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC, + &valsize, &desc); + if (rc) + RETURN(rc); + + stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = stripes; + easize = obd_size_diskmd(dt_exp, &lsm); + + lsm.lsm_stripe_count = desc.ld_default_stripe_count; + def_easize = obd_size_diskmd(dt_exp, &lsm); + + cookiesize = stripes * sizeof(struct llog_cookie); + + CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n", + easize, cookiesize); + + rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize); + RETURN(rc); +} + +static int client_common_fill_super(struct super_block *sb, char *md, char *dt, + uid_t nllu, gid_t nllg) { struct inode *root = 0; struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; - struct ll_fid rootfid; + struct lu_fid rootfid; + struct obd_capa *oc = NULL; struct obd_statfs osfs; struct ptlrpc_request *request = NULL; - struct lustre_handle osc_conn = {0, }; - struct lustre_handle mdc_conn = {0, }; - struct lustre_md md; + struct lustre_handle dt_conn = {0, }; + struct lustre_handle md_conn = {0, }; struct obd_connect_data *data = NULL; - int err; + struct lustre_md lmd; + obd_valid valid; + int size, err; ENTRY; - obd = class_name2obd(mdc); + obd = class_name2obd(md); if (!obd) { - CERROR("MDC %s: not setup or attached\n", mdc); + CERROR("MD %s: not setup or attached\n", md); RETURN(-EINVAL); } - OBD_ALLOC(data, sizeof(*data)); + OBD_ALLOC_PTR(data); if (data == NULL) RETURN(-ENOMEM); if (proc_lustre_fs_root) { err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, - osc, mdc); + dt, md); if (err < 0) CERROR("could not register mount in /proc/lustre"); } @@ -151,7 +191,8 @@ static int client_common_fill_super(struct super_block *sb, /* indicate the features supported by this client */ data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH | OBD_CONNECT_JOIN | - OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION; + OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION | + OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA; #ifdef CONFIG_FS_POSIX_ACL data->ocd_connect_flags |= OBD_CONNECT_ACL; #endif @@ -170,29 +211,54 @@ static int client_common_fill_super(struct super_block *sb, if (sbi->ll_flags & LL_SBI_FLOCK) sbi->ll_fop = &ll_file_operations_flock; - else + else if (sbi->ll_flags & LL_SBI_LOCALFLOCK) sbi->ll_fop = &ll_file_operations; + else + sbi->ll_fop = &ll_file_operations_noflock; + + /* real client */ + data->ocd_connect_flags |= OBD_CONNECT_REAL; + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + data->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT; + data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT; + } else { + data->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; + data->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT; + } + data->ocd_nllu = nllu; + data->ocd_nllg = nllg; - err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data); + err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data); if (err == -EBUSY) { - LCONSOLE_ERROR("An MDT (mdc %s) is performing recovery, of " - "which this client is not a part. Please wait " - "for recovery to complete, abort, or " - "time out.\n", mdc); + LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing " + "recovery, of which this client is not a " + "part. Please wait for recovery to complete," + " abort, or time out.\n", md); GOTO(out, err); } else if (err) { - CERROR("cannot connect to %s: rc = %d\n", mdc, err); + CERROR("cannot connect to %s: rc = %d\n", md, err); GOTO(out, err); } - sbi->ll_mdc_exp = class_conn2export(&mdc_conn); + sbi->ll_md_exp = class_conn2export(&md_conn); + + err = obd_fid_init(sbi->ll_md_exp); + if (err) { + CERROR("Can't init metadata layer FID infrastructure, " + "rc %d\n", err); + GOTO(out_md, err); + } err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ); if (err) - GOTO(out_mdc, err); + GOTO(out_md_fid, err); - /* MDC connect is surely finished by now because we actually sent - * a statfs RPC, otherwise obd_connect() is asynchronous. */ - *data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data; + size = sizeof(*data); + err = obd_get_info(sbi->ll_md_exp, strlen(KEY_CONN_DATA), + KEY_CONN_DATA, &size, data); + if (err) { + CERROR("Get connect data failed: %d \n", err); + GOTO(out_md, err); + } LASSERT(osfs.os_bsize); sb->s_blocksize = osfs.os_bsize; @@ -227,30 +293,67 @@ static int client_common_fill_super(struct super_block *sb, sb->s_flags |= MS_POSIXACL; #endif sbi->ll_flags |= LL_SBI_ACL; - } else + } else { + LCONSOLE_INFO("client wants to enable acl, but mdt not!\n"); +#ifdef MS_POSIXACL + sb->s_flags &= ~MS_POSIXACL; +#endif sbi->ll_flags &= ~LL_SBI_ACL; + } if (data->ocd_connect_flags & OBD_CONNECT_JOIN) sbi->ll_flags |= LL_SBI_JOIN; + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + if (!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT)) { + /* sometimes local client claims to be remote, but mdt + * will disagree when client gss not applied. */ + LCONSOLE_INFO("client claims to be remote, but server " + "rejected, forced to be local.\n"); + sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; + } + } else { + if (!(data->ocd_connect_flags & OBD_CONNECT_LCL_CLIENT)) { + /* with gss applied, remote client can not claim to be + * local, so mdt maybe force client to be remote. */ + LCONSOLE_INFO("client claims to be local, but server " + "rejected, forced to be remote.\n"); + sbi->ll_flags |= LL_SBI_RMT_CLIENT; + } + } + + if (data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) { + LCONSOLE_INFO("client enabled MDS capability!\n"); + sbi->ll_flags |= LL_SBI_MDS_CAPA; + } + + if (data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA) { + LCONSOLE_INFO("client enabled OSS capability!\n"); + sbi->ll_flags |= LL_SBI_OSS_CAPA; + } + #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) /* We set sb->s_dev equal on all lustre clients in order to support * NFS export clustering. NFSD requires that the FSID be the same * on all clients. */ /* s_dev is also used in lt_compare() to compare two fs, but that is * only a node-local comparison. */ + + /* XXX: this will not work with LMV */ sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid, strlen(sbi2mdc(sbi)->cl_target_uuid.uuid)); #endif - obd = class_name2obd(osc); + obd = class_name2obd(dt); if (!obd) { - CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, err = -ENODEV); + CERROR("DT %s: not setup or attached\n", dt); + GOTO(out_md_fid, err = -ENODEV); } data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE; + if (sbi->ll_flags & LL_SBI_OSS_CAPA) + data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA; CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d " "ocd_grant: %d\n", data->ocd_connect_flags, @@ -260,51 +363,59 @@ static int client_common_fill_super(struct super_block *sb, obd->obd_upcall.onu_upcall = ll_ocd_update; data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT; - - err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, data); - if (err == -EBUSY) { - LCONSOLE_ERROR("An OST (osc %s) is performing recovery, of " - "which this client is not a part. Please wait " - "for recovery to complete, abort, or " - "time out.\n", osc); - GOTO(out, err); + err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, data); + if (err == -EBUSY) { + LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing " + "recovery, of which this client is not a " + "part. Please wait for recovery to " + "complete, abort, or time out.\n", dt); + GOTO(out_md_fid, err); } else if (err) { - CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, err); + CERROR("Cannot connect to %s: rc = %d\n", dt, err); + GOTO(out_md_fid, err); + } + + sbi->ll_dt_exp = class_conn2export(&dt_conn); + + err = obd_fid_init(sbi->ll_dt_exp); + if (err) { + CERROR("Can't init data layer FID infrastructure, " + "rc %d\n", err); + GOTO(out_dt, err); } - sbi->ll_osc_exp = class_conn2export(&osc_conn); + spin_lock(&sbi->ll_lco.lco_lock); sbi->ll_lco.lco_flags = data->ocd_connect_flags; spin_unlock(&sbi->ll_lco.lco_lock); - mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp); + ll_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp); - err = obd_prep_async_page(sbi->ll_osc_exp, NULL, NULL, NULL, + err = obd_prep_async_page(sbi->ll_dt_exp, NULL, NULL, NULL, 0, NULL, NULL, NULL); if (err < 0) { - LCONSOLE_ERROR("There are no OST's in this filesystem. " - "There must be at least one active OST for " - "a client to start.\n"); - GOTO(out_osc, err); + LCONSOLE_ERROR_MSG(0x151, "There are no OST's in this " + "filesystem. There must be at least one " + "active OST for a client to start.\n"); + GOTO(out_dt_fid, err); } if (!ll_async_page_slab) { ll_async_page_slab_size = size_round(sizeof(struct ll_async_page)) + err; - ll_async_page_slab = kmem_cache_create("ll_async_page", - ll_async_page_slab_size, - 0, 0, NULL, NULL); + ll_async_page_slab = cfs_mem_cache_create("ll_async_page", + ll_async_page_slab_size, + 0, 0); if (!ll_async_page_slab) - GOTO(out_osc, -ENOMEM); + GOTO(out_dt_fid, err = -ENOMEM); } - err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid); + err = md_getstatus(sbi->ll_md_exp, &rootfid, &oc); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_osc, err); + GOTO(out_dt_fid, err); } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; + CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&rootfid)); + sbi->ll_root_fid = rootfid; sb->s_op = &lustre_super_operations; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) @@ -313,29 +424,43 @@ static int client_common_fill_super(struct super_block *sb, /* make root inode * XXX: move this to after cbd setup? */ - err = mdc_getattr(sbi->ll_mdc_exp, &rootfid, - OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | - (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0), - 0, &request); + valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMDSCAPA; + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + valid |= OBD_MD_FLRMTPERM; + else if (sbi->ll_flags & LL_SBI_ACL) + valid |= OBD_MD_FLACL; + + err = md_getattr(sbi->ll_md_exp, &rootfid, oc, valid, 0, &request); + if (oc) + free_capa(oc); if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, err); + CERROR("md_getattr failed for root: rc = %d\n", err); + GOTO(out_dt_fid, err); } - - err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md); + memset(&lmd, 0, sizeof(lmd)); + err = md_get_lustre_md(sbi->ll_md_exp, request, + REPLY_REC_OFF, sbi->ll_dt_exp, sbi->ll_md_exp, + &lmd); if (err) { - CERROR("failed to understand root inode md: rc = %d\n",err); + CERROR("failed to understand root inode md: rc = %d\n", err); ptlrpc_req_finished (request); - GOTO(out_osc, err); + GOTO(out_dt_fid, err); } - LASSERT(sbi->ll_rootino != 0); - root = ll_iget(sb, sbi->ll_rootino, &md); - + LASSERT(fid_is_sane(&sbi->ll_root_fid)); + root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd); + md_free_lustre_md(sbi->ll_md_exp, &lmd); ptlrpc_req_finished(request); if (root == NULL || is_bad_inode(root)) { - mdc_free_lustre_md(sbi->ll_osc_exp, &md); + if (lmd.lsm) + obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm); +#ifdef CONFIG_FS_POSIX_ACL + if (lmd.posix_acl) { + posix_acl_release(lmd.posix_acl); + lmd.posix_acl = NULL; + } +#endif CERROR("lustre_lite: bad iget4 for root\n"); GOTO(out_root, err = -EBADF); } @@ -360,30 +485,33 @@ static int client_common_fill_super(struct super_block *sb, OBD_FREE(data, sizeof(*data)); sb->s_root->d_op = &ll_d_root_ops; RETURN(err); - out_root: if (root) iput(root); -out_osc: - obd_disconnect(sbi->ll_osc_exp); - sbi->ll_osc_exp = NULL; -out_mdc: - obd_disconnect(sbi->ll_mdc_exp); - sbi->ll_mdc_exp = NULL; +out_dt_fid: + obd_fid_fini(sbi->ll_dt_exp); +out_dt: + obd_disconnect(sbi->ll_dt_exp); + sbi->ll_dt_exp = NULL; +out_md_fid: + obd_fid_fini(sbi->ll_md_exp); +out_md: + obd_disconnect(sbi->ll_md_exp); + sbi->ll_md_exp = NULL; out: if (data != NULL) - OBD_FREE(data, sizeof(*data)); + OBD_FREE_PTR(data); lprocfs_unregister_mountpoint(sbi); - RETURN(err); + return err; } int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) { int size, rc; - *lmmsize = obd_size_diskmd(sbi->ll_osc_exp, NULL); + *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); size = sizeof(int); - rc = obd_get_info(sbi->ll_mdc_exp, strlen("max_easize"), "max_easize", + rc = obd_get_info(sbi->ll_md_exp, strlen("max_easize"), "max_easize", &size, lmmsize); if (rc) CERROR("Get max mdsize error rc %d \n", rc); @@ -402,7 +530,7 @@ void ll_dump_inode(struct inode *inode) dentry_count++; CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n", - inode, ll_i2mdcexp(inode)->exp_obd->obd_name, inode->i_ino, + inode, ll_i2mdexp(inode)->exp_obd->obd_name, inode->i_ino, inode->i_mode, atomic_read(&inode->i_count), dentry_count); } @@ -557,19 +685,24 @@ void client_common_put_super(struct super_block *sb) struct ll_sb_info *sbi = ll_s2sbi(sb); ENTRY; - ll_close_thread_shutdown(sbi->ll_lcq); + obd_cancel_unused(sbi->ll_dt_exp, NULL, 0, NULL); - lprocfs_unregister_mountpoint(sbi); + ll_close_thread_shutdown(sbi->ll_lcq); /* destroy inodes in deathrow */ prune_deathrow(sbi, 0); list_del(&sbi->ll_conn_chain); - obd_disconnect(sbi->ll_osc_exp); - sbi->ll_osc_exp = NULL; + + obd_fid_fini(sbi->ll_dt_exp); + obd_disconnect(sbi->ll_dt_exp); + sbi->ll_dt_exp = NULL; - obd_disconnect(sbi->ll_mdc_exp); - sbi->ll_mdc_exp = NULL; + lprocfs_unregister_mountpoint(sbi); + + obd_fid_fini(sbi->ll_md_exp); + obd_disconnect(sbi->ll_md_exp); + sbi->ll_md_exp = NULL; lustre_throw_orphan_dentries(sb); EXIT; @@ -631,7 +764,12 @@ static int ll_options(char *options, int *flags) *flags |= tmp; goto next; } - tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK); + tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK); + if (tmp) { + *flags |= tmp; + goto next; + } + tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK); if (tmp) { *flags &= ~tmp; goto next; @@ -657,29 +795,36 @@ static int ll_options(char *options, int *flags) if (tmp) { goto next; } + tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT); + if (tmp) { + *flags |= tmp; + goto next; + } - LCONSOLE_ERROR("Unknown option '%s', won't mount.\n", s1); + LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n", + s1); RETURN(-EINVAL); next: /* Find next opt */ s2 = strchr(s1, ','); - if (s2 == NULL) + if (s2 == NULL) break; s1 = s2 + 1; } RETURN(0); } - + void ll_lli_init(struct ll_inode_info *lli) { - sema_init(&lli->lli_open_sem, 1); + lli->lli_inode_magic = LLI_INODE_MAGIC; sema_init(&lli->lli_size_sem, 1); sema_init(&lli->lli_write_sem, 1); lli->lli_flags = 0; lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; spin_lock_init(&lli->lli_lock); INIT_LIST_HEAD(&lli->lli_pending_write_llaps); + INIT_LIST_HEAD(&lli->lli_close_list); lli->lli_inode_magic = LLI_INODE_MAGIC; sema_init(&lli->lli_och_sem, 1); lli->lli_mds_read_och = lli->lli_mds_write_och = NULL; @@ -687,190 +832,18 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0; lli->lli_open_fd_exec_count = 0; INIT_LIST_HEAD(&lli->lli_dead_list); + lli->lli_remote_perms = NULL; + lli->lli_rmtperm_utime = 0; + sema_init(&lli->lli_rmtperm_sem, 1); + INIT_LIST_HEAD(&lli->lli_oss_capas); } -/* COMPAT_146 */ -#define MDCDEV "mdc_dev" -static int old_lustre_process_log(struct super_block *sb, char *newprofile, - struct config_llog_instance *cfg) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - struct obd_device *obd; - struct lustre_handle mdc_conn = {0, }; - struct obd_export *exp; - char *ptr, *mdt, *profile; - char niduuid[10] = "mdtnid0"; - class_uuid_t uuid; - struct obd_uuid mdc_uuid; - struct llog_ctxt *ctxt; - struct obd_connect_data ocd = { 0 }; - lnet_nid_t nid; - int i, rc = 0, recov_bk = 1, failnodes = 0; - ENTRY; - - ll_generate_random_uuid(uuid); - class_uuid_unparse(uuid, &mdc_uuid); - CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid); - - /* Figure out the old mdt and profile name from new-style profile - ("lustre" from "mds/lustre-client") */ - mdt = newprofile; - profile = strchr(mdt, '/'); - if (profile == NULL) { - CDEBUG(D_CONFIG, "Can't find MDT name in %s\n", newprofile); - GOTO(out, rc = -EINVAL); - } - *profile = '\0'; - profile++; - ptr = strrchr(profile, '-'); - if (ptr == NULL) { - CDEBUG(D_CONFIG, "Can't find client name in %s\n", newprofile); - GOTO(out, rc = -EINVAL); - } - *ptr = '\0'; - - LCONSOLE_WARN("This looks like an old mount command; I will try to " - "contact MDT '%s' for profile '%s'\n", mdt, profile); - - /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ - i = 0; - ptr = lsi->lsi_lmd->lmd_dev; - while (class_parse_nid(ptr, &nid, &ptr) == 0) { - rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid, 0,0,0); - i++; - /* Stop at the first failover nid */ - if (*ptr == ':') - break; - } - if (i == 0) { - CERROR("No valid MDT nids found.\n"); - GOTO(out, rc = -EINVAL); - } - failnodes++; - - rc = do_lcfg(MDCDEV, 0, LCFG_ATTACH, LUSTRE_MDC_NAME,mdc_uuid.uuid,0,0); - if (rc < 0) - GOTO(out_del_uuid, rc); - - rc = do_lcfg(MDCDEV, 0, LCFG_SETUP, mdt, niduuid, 0, 0); - if (rc < 0) { - LCONSOLE_ERROR("I couldn't establish a connection with the MDT." - " Check that the MDT host NID is correct and the" - " networks are up.\n"); - GOTO(out_detach, rc); - } - - obd = class_name2obd(MDCDEV); - if (obd == NULL) - GOTO(out_cleanup, rc = -EINVAL); - - /* Add any failover nids */ - while (*ptr == ':') { - /* New failover node */ - sprintf(niduuid, "mdtnid%d", failnodes); - i = 0; - while (class_parse_nid(ptr, &nid, &ptr) == 0) { - i++; - rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid,0,0,0); - if (rc) - CERROR("Add uuid for %s failed %d\n", - libcfs_nid2str(nid), rc); - if (*ptr == ':') - break; - } - if (i > 0) { - rc = do_lcfg(MDCDEV, 0, LCFG_ADD_CONN, niduuid, 0, 0,0); - if (rc) - CERROR("Add conn for %s failed %d\n", - libcfs_nid2str(nid), rc); - failnodes++; - } else { - /* at ":/fsname" */ - break; - } - } - - /* Try all connections, but only once. */ - rc = obd_set_info_async(obd->obd_self_export, - strlen("init_recov_bk"), "init_recov_bk", - sizeof(recov_bk), &recov_bk, NULL); - if (rc) - GOTO(out_cleanup, rc); - - /* If we don't have this then an ACL MDS will refuse the connection */ - ocd.ocd_connect_flags = OBD_CONNECT_ACL; - - rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd); - if (rc) { - CERROR("cannot connect to %s: rc = %d\n", mdt, rc); - GOTO(out_cleanup, rc); - } - - exp = class_conn2export(&mdc_conn); - - ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT); - - cfg->cfg_flags |= CFG_F_COMPAT146; - -#if 1 - rc = class_config_parse_llog(ctxt, profile, cfg); -#else - /* - * For debugging, it's useful to just dump the log - */ - rc = class_config_dump_llog(ctxt, profile, cfg); -#endif - switch (rc) { - case 0: { - /* Set the caller's profile name to the old-style */ - memcpy(newprofile, profile, strlen(profile) + 1); - break; - } - case -EINVAL: - LCONSOLE_ERROR("%s: The configuration '%s' could not be read " - "from the MDT '%s'. Make sure this client and " - "the MDT are running compatible versions of " - "Lustre.\n", - obd->obd_name, profile, mdt); - /* fall through */ - default: - LCONSOLE_ERROR("%s: The configuration '%s' could not be read " - "from the MDT '%s'. This may be the result of " - "communication errors between the client and " - "the MDT, or if the MDT is not running.\n", - obd->obd_name, profile, mdt); - break; - } - - /* We don't so much care about errors in cleaning up the config llog - * connection, as we have already read the config by this point. */ - obd_disconnect(exp); - -out_cleanup: - do_lcfg(MDCDEV, 0, LCFG_CLEANUP, 0, 0, 0, 0); - -out_detach: - do_lcfg(MDCDEV, 0, LCFG_DETACH, 0, 0, 0, 0); - -out_del_uuid: - /* class_add_uuid adds a nid even if the same uuid exists; we might - delete any copy here. So they all better match. */ - for (i = 0; i < failnodes; i++) { - sprintf(niduuid, "mdtnid%d", i); - do_lcfg(MDCDEV, 0, LCFG_DEL_UUID, niduuid, 0, 0, 0); - } - /* class_import_put will get rid of the additional connections */ -out: - RETURN(rc); -} -/* end COMPAT_146 */ - int ll_fill_super(struct super_block *sb) { struct lustre_profile *lprof; struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi; - char *osc = NULL, *mdc = NULL; + char *dt = NULL, *md = NULL; char *profilenm = get_profile_name(sb); struct config_llog_instance cfg = {0, }; char ll_instance[sizeof(sb) * 2 + 1]; @@ -881,6 +854,7 @@ int ll_fill_super(struct super_block *sb) cfs_module_get(); + sb->s_type->fs_flags |= FS_ODD_RENAME; /* client additional sb info */ lsi->lsi_llsbi = sbi = ll_init_sbi(); if (!sbi) { @@ -893,7 +867,7 @@ int ll_fill_super(struct super_block *sb) GOTO(out_free, err); /* Generate a string unique to this super, in case some joker tries - to mount the same fs at two mount points. + to mount the same fs at two mount points. Use the address of the super itself.*/ sprintf(ll_instance, "%p", sb); cfg.cfg_instance = ll_instance; @@ -901,37 +875,6 @@ int ll_fill_super(struct super_block *sb) /* set up client obds */ err = lustre_process_log(sb, profilenm, &cfg); - /* COMPAT_146 */ - if (err < 0) { - char *oldname; - int rc, oldnamelen; - oldnamelen = strlen(profilenm) + 1; - /* Temp storage for 1.4.6 profile name */ - OBD_ALLOC(oldname, oldnamelen); - if (oldname) { - memcpy(oldname, profilenm, oldnamelen); - rc = old_lustre_process_log(sb, oldname, &cfg); - if (rc >= 0) { - /* That worked - update the profile name - permanently */ - err = rc; - OBD_FREE(lsi->lsi_lmd->lmd_profile, - strlen(lsi->lsi_lmd->lmd_profile) + 1); - OBD_ALLOC(lsi->lsi_lmd->lmd_profile, - strlen(oldname) + 1); - if (!lsi->lsi_lmd->lmd_profile) { - OBD_FREE(oldname, oldnamelen); - GOTO(out_free, err = -ENOMEM); - } - memcpy(lsi->lsi_lmd->lmd_profile, oldname, - strlen(oldname) + 1); - profilenm = get_profile_name(sb); - cfg.cfg_flags |= CFG_F_SERVER146; - } - OBD_FREE(oldname, oldnamelen); - } - } - /* end COMPAT_146 */ if (err < 0) { CERROR("Unable to process log: %d\n", err); GOTO(out_free, err); @@ -939,34 +882,36 @@ int ll_fill_super(struct super_block *sb) lprof = class_get_profile(profilenm); if (lprof == NULL) { - LCONSOLE_ERROR("The client profile '%s' could not be read " - "from the MGS. Does that filesystem exist?\n", - profilenm); + LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be" + " read from the MGS. Does that filesystem " + "exist?\n", profilenm); GOTO(out_free, err = -EINVAL); } - CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm, - lprof->lp_mdc, lprof->lp_osc); + CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm, + lprof->lp_md, lprof->lp_dt); - OBD_ALLOC(osc, strlen(lprof->lp_osc) + + OBD_ALLOC(dt, strlen(lprof->lp_dt) + strlen(ll_instance) + 2); - if (!osc) + if (!dt) GOTO(out_free, err = -ENOMEM); - sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance); + sprintf(dt, "%s-%s", lprof->lp_dt, ll_instance); - OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + + OBD_ALLOC(md, strlen(lprof->lp_md) + strlen(ll_instance) + 2); - if (!mdc) + if (!md) GOTO(out_free, err = -ENOMEM); - sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance); - + sprintf(md, "%s-%s", lprof->lp_md, ll_instance); + /* connections, registrations, sb setup */ - err = client_common_fill_super(sb, mdc, osc); - + err = client_common_fill_super(sb, md, dt, + lsi->lsi_lmd->lmd_nllu, + lsi->lsi_lmd->lmd_nllg); + out_free: - if (mdc) - OBD_FREE(mdc, strlen(mdc) + 1); - if (osc) - OBD_FREE(osc, strlen(osc) + 1); + if (md) + OBD_FREE(md, strlen(md) + 1); + if (dt) + OBD_FREE(dt, strlen(dt) + 1); if (err) ll_put_super(sb); else @@ -988,15 +933,17 @@ void ll_put_super(struct super_block *sb) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); - + + ll_print_capa_stat(sbi); + sprintf(ll_instance, "%p", sb); cfg.cfg_instance = ll_instance; lustre_end_log(sb, NULL, &cfg); - if (sbi->ll_mdc_exp) { - obd = class_exp2obd(sbi->ll_mdc_exp); + if (sbi->ll_md_exp) { + obd = class_exp2obd(sbi->ll_md_exp); if (obd) - force = obd->obd_no_recov; + force = obd->obd_force; } /* We need to set force before the lov_disconnect in @@ -1013,13 +960,12 @@ void ll_put_super(struct super_block *sb) /* Only if client_common_fill_super succeeded */ client_common_put_super(sb); } - next = 0; while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) { class_manual_cleanup(obd); - } - - if (profilenm) + } + + if (profilenm) class_del_profile(profilenm); ll_free_sbi(sb); @@ -1097,7 +1043,6 @@ static int null_if_equal(struct ldlm_lock *lock, void *data) void ll_clear_inode(struct inode *inode) { - struct ll_fid fid; struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); ENTRY; @@ -1105,31 +1050,26 @@ void ll_clear_inode(struct inode *inode) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); - ll_inode2fid(&fid, inode); - clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags); - mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode); + ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK; + md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode), + null_if_equal, inode); LASSERT(!lli->lli_open_fd_write_count); LASSERT(!lli->lli_open_fd_read_count); LASSERT(!lli->lli_open_fd_exec_count); if (lli->lli_mds_write_och) - ll_mdc_real_close(inode, FMODE_WRITE); - if (lli->lli_mds_exec_och) { - if (!FMODE_EXEC) - CERROR("No FMODE exec, bug exec och is present for " - "inode %ld\n", inode->i_ino); - ll_mdc_real_close(inode, FMODE_EXEC); - } + ll_md_real_close(inode, FMODE_WRITE); + if (lli->lli_mds_exec_och) + ll_md_real_close(inode, FMODE_EXEC); if (lli->lli_mds_read_och) - ll_mdc_real_close(inode, FMODE_READ); - + ll_md_real_close(inode, FMODE_READ); if (lli->lli_smd) { - obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd, + obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd, null_if_equal, inode); - obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd); + obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd); lli->lli_smd = NULL; } @@ -1139,14 +1079,21 @@ void ll_clear_inode(struct inode *inode) lli->lli_symlink_name = NULL; } + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + LASSERT(lli->lli_posix_acl == NULL); + if (lli->lli_remote_perms) { + free_rmtperm_hash(lli->lli_remote_perms); + lli->lli_remote_perms = NULL; + } + } #ifdef CONFIG_FS_POSIX_ACL - if (lli->lli_posix_acl) { + else if (lli->lli_posix_acl) { LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1); + LASSERT(lli->lli_remote_perms == NULL); posix_acl_release(lli->lli_posix_acl); lli->lli_posix_acl = NULL; } #endif - lli->lli_inode_magic = LLI_INODE_DEAD; #ifdef HAVE_EXPORT___IGET @@ -1154,10 +1101,95 @@ void ll_clear_inode(struct inode *inode) list_del_init(&lli->lli_dead_list); spin_unlock(&sbi->ll_deathrow_lock); #endif + ll_clear_inode_capas(inode); EXIT; } +int ll_md_setattr(struct inode *inode, struct md_op_data *op_data) +{ + struct lustre_md md; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ptlrpc_request *request = NULL; + int rc; + ENTRY; + + op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, + &request); + if (rc) { + ptlrpc_req_finished(request); + if (rc == -ENOENT) { + inode->i_nlink = 0; + /* Unlinked special device node? Or just a race? + * Pretend we done everything. */ + if (!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) + rc = inode_setattr(inode, &op_data->op_attr); + } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) { + CERROR("md_setattr fails: rc = %d\n", rc); + } + RETURN(rc); + } + + rc = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); + if (rc) { + ptlrpc_req_finished(request); + RETURN(rc); + } + + /* We call inode_setattr to adjust timestamps. + * If there is at least some data in file, we cleared ATTR_SIZE + * above to avoid invoking vmtruncate, otherwise it is important + * to call vmtruncate in inode_setattr to update inode->i_size + * (bug 6196) */ + rc = inode_setattr(inode, &op_data->op_attr); + + /* Extract epoch data if obtained. */ + memcpy(&op_data->op_handle, &md.body->handle, sizeof(op_data->op_handle)); + op_data->op_ioepoch = md.body->ioepoch; + + ll_update_inode(inode, &md); + ptlrpc_req_finished(request); + + RETURN(rc); +} + +/* Close IO epoch and send Size-on-MDS attribute update. */ +static int ll_setattr_done_writing(struct inode *inode, + struct md_op_data *op_data) +{ + struct ll_inode_info *lli = ll_i2info(inode); + int rc = 0; + ENTRY; + + LASSERT(op_data != NULL); + if (!S_ISREG(inode->i_mode)) + RETURN(0); + + CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n", + op_data->op_ioepoch, PFID(&lli->lli_fid)); + + op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE; + /* XXX: pass och here for the recovery purpose. */ + rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL); + if (rc == -EAGAIN) { + /* MDS has instructed us to obtain Size-on-MDS attribute + * from OSTs and send setattr to back to MDS. */ + rc = ll_sizeonmds_update(inode, &op_data->op_handle, + op_data->op_ioepoch); + } else if (rc) { + CERROR("inode %lu mdc truncate failed: rc = %d\n", + inode->i_ino, rc); + } + RETURN(rc); +} + /* If this inode has objects allocated to it (lsm != NULL), then the OST * object(s) determine the file size and mtime. Otherwise, the MDS will * keep these values until such a time that objects are allocated for it. @@ -1176,16 +1208,14 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; - struct lustre_md md; + struct md_op_data *op_data = NULL; int ia_valid = attr->ia_valid; - int rc = 0; + int rc = 0, rc1 = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino, attr->ia_valid); - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_SETATTR); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETATTR, 1); if (ia_valid & ATTR_SIZE) { if (attr->ia_size > ll_file_maxbytes(inode)) { @@ -1220,9 +1250,9 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) /* To avoid stale mtime on mds, obtain it from ost and send to mds. */ rc = ll_glimpse_size(inode, 0); - if (rc) + if (rc) RETURN(rc); - + attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME; attr->ia_mtime = inode->i_mtime; } @@ -1239,43 +1269,28 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - rc = mdc_setattr(sbi->ll_mdc_exp, &op_data, - attr, NULL, 0, NULL, 0, &request); - - if (rc) { - ptlrpc_req_finished(request); - if (rc == -ENOENT) { - inode->i_nlink = 0; - /* Unlinked special device node? Or just a race? - * Pretend we done everything. */ - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode)) - rc = inode_setattr(inode, attr); - } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) - CERROR("mdc_setattr fails: rc = %d\n", rc); - RETURN(rc); - } - rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md); - if (rc) { - ptlrpc_req_finished(request); - RETURN(rc); - } - - /* We call inode_setattr to adjust timestamps. - * If there is at least some data in file, we cleared ATTR_SIZE above to - * avoid invoking vmtruncate, otherwise it is important to call - * vmtruncate in inode_setattr to update inode->i_size (bug 6196) */ - rc = inode_setattr(inode, attr); + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + RETURN(-ENOMEM); + + memcpy(&op_data->op_attr, attr, sizeof(*attr)); + + /* Open epoch for truncate. */ + if (ia_valid & ATTR_SIZE) + op_data->op_flags = MF_EPOCH_OPEN; - ll_update_inode(inode, &md); - ptlrpc_req_finished(request); + rc = ll_md_setattr(inode, op_data); + if (rc) + GOTO(out, rc); + + if (op_data->op_ioepoch) + CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for " + "truncate\n", op_data->op_ioepoch, PFID(&lli->lli_fid)); if (!lsm || !S_ISREG(inode->i_mode)) { - CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); - RETURN(rc); + CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); + GOTO(out, rc = 0); } /* We really need to get our PW lock before we change inode->i_size. @@ -1307,7 +1322,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) DOWN_WRITE_I_ALLOC_SEM(inode); #endif if (rc != 0) - RETURN(rc); + GOTO(out, rc); /* Only ll_inode_size_lock is taken at this level. * lov_stripe_lock() is grabbed by ll_truncate() only over @@ -1329,39 +1344,51 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) { obd_flag flags; struct obd_info oinfo = { { { 0 } } }; - struct obdo *oa = obdo_alloc(); + struct obdo *oa; CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", inode->i_ino, LTIME_S(attr->ia_mtime)); + OBDO_ALLOC(oa); if (oa) { oa->o_id = lsm->lsm_object_id; - oa->o_valid = OBD_MD_FLID; + oa->o_gr = lsm->lsm_object_gr; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; flags = OBD_MD_FLTYPE | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLFID | OBD_MD_FLGENER; + OBD_MD_FLFID | OBD_MD_FLGENER | + OBD_MD_FLGROUP; obdo_from_inode(oa, inode, flags); oinfo.oi_oa = oa; oinfo.oi_md = lsm; + oinfo.oi_capa = ll_mdscapa_get(inode); - rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL); + /* XXX: this looks unnecessary now. */ + rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL); + capa_put(oinfo.oi_capa); if (rc) CERROR("obd_setattr_async fails: rc=%d\n", rc); - obdo_free(oa); + OBDO_FREE(oa); } else { rc = -ENOMEM; } } - RETURN(rc); + EXIT; +out: + if (op_data) { + if (op_data->op_ioepoch) { + rc1 = ll_setattr_done_writing(inode, op_data); + } + ll_finish_md_op_data(op_data); + } + return rc ? rc : rc1; } int ll_setattr(struct dentry *de, struct iattr *attr) { - ll_vfs_ops_tally(ll_i2sbi(de->d_inode), VFS_OPS_SETATTR); - if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) == (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; @@ -1377,9 +1404,9 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, int rc; ENTRY; - rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age); + rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age); if (rc) { - CERROR("mdc_statfs fails: rc = %d\n", rc); + CERROR("md_statfs fails: rc = %d\n", rc); RETURN(rc); } @@ -1388,7 +1415,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n", osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files); - rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp), + rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp), &obd_osfs, max_age); if (rc) { CERROR("obd_statfs fails: rc = %d\n", rc); @@ -1427,7 +1454,7 @@ int ll_statfs(struct dentry *de, struct kstatfs *sfs) int rc; CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64()); - lprocfs_counter_incr(ll_s2sbi(sb)->ll_stats, LPROC_LL_STAFS); + ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1); /* For now we will always get up-to-date statfs values, but in the * future we may allow some amount of caching on the client (e.g. @@ -1500,7 +1527,7 @@ static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm) dump_lsm(D_INODE, lli->lli_smd); LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN, "lsm must be joined lsm %p\n", lsm); - obd_free_memmd(ll_i2obdexp(inode), &lli->lli_smd); + obd_free_memmd(ll_i2dtexp(inode), &lli->lli_smd); CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n", lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode); lli->lli_smd = lsm; @@ -1512,8 +1539,9 @@ static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm) void ll_update_inode(struct inode *inode, struct lustre_md *md) { struct ll_inode_info *lli = ll_i2info(inode); - struct mds_body *body = md->body; + struct mdt_body *body = md->body; struct lov_stripe_md *lsm = md->lsm; + struct ll_sb_info *sbi = ll_i2sbi(inode); LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); if (lsm != NULL) { @@ -1549,12 +1577,15 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) ll_replace_lsm(inode, lsm); } if (lli->lli_smd != lsm) - obd_free_memmd(ll_i2obdexp(inode), &lsm); + obd_free_memmd(ll_i2dtexp(inode), &lsm); } + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + if (body->valid & OBD_MD_FLRMTPERM) + ll_update_remote_perm(inode, md->remote_perm); + } #ifdef CONFIG_FS_POSIX_ACL - LASSERT(!md->posix_acl || (body->valid & OBD_MD_FLACL)); - if (body->valid & OBD_MD_FLACL) { + else if (body->valid & OBD_MD_FLACL) { spin_lock(&lli->lli_lock); if (lli->lli_posix_acl) posix_acl_release(lli->lli_posix_acl); @@ -1562,9 +1593,6 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) spin_unlock(&lli->lli_lock); } #endif - - if (body->valid & OBD_MD_FLID) - inode->i_ino = body->ino; if (body->valid & OBD_MD_FLATIME && body->atime > LTIME_S(inode->i_atime)) LTIME_S(inode->i_atime) = body->atime; @@ -1588,7 +1616,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (body->valid & OBD_MD_FLTYPE) inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); if (S_ISREG(inode->i_mode)) { - inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS); + inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, LL_MAX_BLKSIZE_BITS); } else { inode->i_blkbits = inode->i_sb->s_blocksize_bits; } @@ -1603,21 +1631,54 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) inode->i_flags = ll_ext_to_inode_flags(body->flags); if (body->valid & OBD_MD_FLNLINK) inode->i_nlink = body->nlink; - if (body->valid & OBD_MD_FLGENER) - inode->i_generation = body->generation; if (body->valid & OBD_MD_FLRDEV) #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) inode->i_rdev = body->rdev; #else inode->i_rdev = old_decode_dev(body->rdev); #endif - if (body->valid & OBD_MD_FLSIZE) - inode->i_size = body->size; - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; + if (body->valid & OBD_MD_FLSIZE) { + if (ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) { + if (lli->lli_flags & (LLIF_DONE_WRITING | + LLIF_EPOCH_PENDING | + LLIF_SOM_DIRTY)) + CWARN("ino %lu flags %lu still has size authority!" + "do not trust the size got from MDS\n", + inode->i_ino, lli->lli_flags); + else { + inode->i_size = body->size; + lli->lli_flags |= LLIF_MDS_SIZE_LOCK; + } + } else { + inode->i_size = body->size; + } - if (body->valid & OBD_MD_FLSIZE) - set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags); + if (body->valid & OBD_MD_FLBLOCKS) + inode->i_blocks = body->blocks; + } + + if (body->valid & OBD_MD_FLID) { + /* FID shouldn't be changed! */ + if (fid_is_sane(&lli->lli_fid)) { + LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1), + "Trying to change FID "DFID + " to the "DFID", inode %lu/%u(%p)\n", + PFID(&lli->lli_fid), PFID(&body->fid1), + inode->i_ino, inode->i_generation, inode); + } else + lli->lli_fid = body->fid1; + } + + LASSERT(fid_seq(&lli->lli_fid) != 0); + + if (body->valid & OBD_MD_FLMDSCAPA) { + LASSERT(md->mds_capa); + ll_add_capa(inode, md->mds_capa); + } + if (body->valid & OBD_MD_FLOSSCAPA) { + LASSERT(md->oss_capa); + ll_add_capa(inode, md->oss_capa); + } } #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) @@ -1637,8 +1698,8 @@ void ll_read_inode2(struct inode *inode, void *opaque) struct ll_inode_info *lli = ll_i2info(inode); ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", + inode->i_ino, inode->i_generation, inode); ll_lli_init(lli); @@ -1686,6 +1747,21 @@ void ll_read_inode2(struct inode *inode, void *opaque) } } +void ll_delete_inode(struct inode *inode) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc; + ENTRY; + + rc = obd_fid_delete(sbi->ll_md_exp, ll_inode2fid(inode)); + if (rc) { + CERROR("fid_delete() failed, rc %d\n", rc); + } + clear_inode(inode); + + EXIT; +} + int ll_iocontrol(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -1696,11 +1772,13 @@ int ll_iocontrol(struct inode *inode, struct file *file, switch(cmd) { case EXT3_IOC_GETFLAGS: { - struct ll_fid fid; - struct mds_body *body; + struct mdt_body *body; + struct obd_capa *oc; - ll_inode2fid(&fid, inode); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLFLAGS,0,&req); + oc = ll_mdscapa_get(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, + OBD_MD_FLFLAGS, 0, &req); + capa_put(oc); if (rc) { CERROR("failure %d inode %lu\n", rc, inode->i_ino); RETURN(-abs(rc)); @@ -1709,52 +1787,58 @@ int ll_iocontrol(struct inode *inode, struct file *file, body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); - /* We want to return EXT3_*_FL flags to the caller via this - * ioctl. An older MDS may be sending S_* flags, fix it up. */ - flags = ll_inode_to_ext_flags(body->flags, body->flags); + /*Now the ext3 will be packed directly back to client, + *no need convert here*/ + flags = body->flags; + ptlrpc_req_finished (req); RETURN(put_user(flags, (int *)arg)); } case EXT3_IOC_SETFLAGS: { - struct mdc_op_data op_data; - struct ll_iattr_struct attr; - struct obd_info oinfo = { { { 0 } } }; struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + struct obd_info oinfo = { { { 0 } } }; + struct md_op_data *op_data; if (get_user(flags, (int *)arg)) RETURN(-EFAULT); oinfo.oi_md = lsm; - oinfo.oi_oa = obdo_alloc(); + OBDO_ALLOC(oinfo.oi_oa); if (!oinfo.oi_oa) RETURN(-ENOMEM); - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - memset(&attr, 0, sizeof(attr)); - attr.ia_attr_flags = flags; - ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG; + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); - rc = mdc_setattr(sbi->ll_mdc_exp, &op_data, - (struct iattr *)&attr, NULL, 0, NULL, 0, &req); + ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags; + op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; + rc = md_setattr(sbi->ll_md_exp, op_data, + NULL, 0, NULL, 0, &req); + ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); if (rc || lsm == NULL) { - obdo_free(oinfo.oi_oa); + OBDO_FREE(oinfo.oi_oa); RETURN(rc); } oinfo.oi_oa->o_id = lsm->lsm_object_id; + oinfo.oi_oa->o_gr = lsm->lsm_object_gr; oinfo.oi_oa->o_flags = flags; - oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS; + oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | + OBD_MD_FLGROUP; + oinfo.oi_capa = ll_mdscapa_get(inode); obdo_from_inode(oinfo.oi_oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER); - rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL); - obdo_free(oinfo.oi_oa); + rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL); + capa_put(oinfo.oi_capa); + OBDO_FREE(oinfo.oi_oa); if (rc) { if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr_async fails: rc = %d\n", rc); + CERROR("md_setattr_async fails: rc = %d\n", rc); RETURN(rc); } @@ -1769,6 +1853,21 @@ int ll_iocontrol(struct inode *inode, struct file *file, RETURN(0); } +int ll_flush_ctx(struct inode *inode) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + + CDEBUG(D_SEC, "flush context for user %d\n", current->uid); + + obd_set_info_async(sbi->ll_md_exp, + sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX, + 0, NULL, NULL); + obd_set_info_async(sbi->ll_dt_exp, + sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX, + 0, NULL, NULL); + return 0; +} + /* umount -f client means force down, don't save state */ #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT void ll_umount_begin(struct vfsmount *vfsmnt, int flags) @@ -1797,27 +1896,27 @@ void ll_umount_begin(struct super_block *sb) CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, sb->s_count, atomic_read(&sb->s_active)); - obd = class_exp2obd(sbi->ll_mdc_exp); + obd = class_exp2obd(sbi->ll_md_exp); if (obd == NULL) { CERROR("Invalid MDC connection handle "LPX64"\n", - sbi->ll_mdc_exp->exp_handle.h_cookie); + sbi->ll_md_exp->exp_handle.h_cookie); EXIT; return; } - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_mdc_exp, sizeof ioc_data, + obd->obd_force = 1; + obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp, sizeof ioc_data, &ioc_data, NULL); - obd = class_exp2obd(sbi->ll_osc_exp); + obd = class_exp2obd(sbi->ll_dt_exp); if (obd == NULL) { CERROR("Invalid LOV connection handle "LPX64"\n", - sbi->ll_osc_exp->exp_handle.h_cookie); + sbi->ll_dt_exp->exp_handle.h_cookie); EXIT; return; } - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_osc_exp, sizeof ioc_data, + obd->obd_force = 1; + obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp, sizeof ioc_data, &ioc_data, NULL); /* Really, we'd like to wait until there are no requests outstanding, @@ -1834,18 +1933,19 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) struct ll_sb_info *sbi = ll_s2sbi(sb); int err; __u32 read_only; - + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { read_only = *flags & MS_RDONLY; - err = obd_set_info_async(sbi->ll_mdc_exp, strlen("read-only"), - "read-only", sizeof(read_only), + err = obd_set_info_async(sbi->ll_md_exp, + sizeof(KEY_READ_ONLY) - 1, + KEY_READ_ONLY, sizeof(read_only), &read_only, NULL); if (err) { CERROR("Failed to change the read-only flag during " "remount: %d\n", err); return err; } - + if (read_only) sb->s_flags |= MS_RDONLY; else @@ -1854,38 +1954,55 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -int ll_prep_inode(struct obd_export *exp, struct inode **inode, - struct ptlrpc_request *req, int offset,struct super_block *sb) +int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, + int offset, struct super_block *sb) { - struct lustre_md md; struct ll_sb_info *sbi = NULL; + struct lustre_md md; int rc = 0; ENTRY; LASSERT(*inode || sb); sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); prune_deathrow(sbi, 1); + memset(&md, 0, sizeof(struct lustre_md)); - rc = mdc_req2lustre_md(req, offset, exp, &md); + rc = md_get_lustre_md(sbi->ll_md_exp, req, offset, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); if (rc) RETURN(rc); if (*inode) { ll_update_inode(*inode, &md); } else { - LASSERT(sb); - *inode = ll_iget(sb, md.body->ino, &md); + LASSERT(sb != NULL); + + /* + * At this point server returns to client's same fid as client + * generated for creating. So using ->fid1 is okay here. + */ + LASSERT(fid_is_sane(&md.body->fid1)); + + *inode = ll_iget(sb, ll_fid_build_ino(sbi, &md.body->fid1), &md); if (*inode == NULL || is_bad_inode(*inode)) { - mdc_free_lustre_md(exp, &md); + if (md.lsm) + obd_free_memmd(sbi->ll_dt_exp, &md.lsm); +#ifdef CONFIG_FS_POSIX_ACL + if (md.posix_acl) { + posix_acl_release(md.posix_acl); + md.posix_acl = NULL; + } +#endif rc = -ENOMEM; CERROR("new_inode -fatal: rc %d\n", rc); GOTO(out, rc); } } - rc = obd_checkmd(exp, ll_i2mdcexp(*inode), + rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, ll_i2info(*inode)->lli_smd); out: + md_free_lustre_md(sbi->ll_md_exp, &md); RETURN(rc); } @@ -1944,14 +2061,13 @@ int ll_obd_statfs(struct inode *inode, void *arg) if (type == LL_STATFS_MDC) { if (index > 0) GOTO(out_statfs, rc = -ENODEV); - client_obd = class_exp2obd(sbi->ll_mdc_exp); + client_obd = class_exp2obd(sbi->ll_md_exp); } else if (type == LL_STATFS_LOV) { - lov_obd = class_exp2obd(sbi->ll_osc_exp); + lov_obd = class_exp2obd(sbi->ll_dt_exp); lov = &lov_obd->u.lov; - if (index >= lov->desc.ld_tgt_count) + if ((index >= lov->desc.ld_tgt_count)) GOTO(out_statfs, rc = -ENODEV); - if (!lov->lov_tgts[index]) /* Try again with the next index */ GOTO(out_statfs, rc = -EAGAIN); @@ -2009,3 +2125,148 @@ int ll_process_config(struct lustre_cfg *lcfg) return(rc); } +/* this function prepares md_op_data hint for passing ot down to MD stack. */ +struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, + struct inode *i1, struct inode *i2, + const char *name, int namelen, + int mode, __u32 opc) +{ + LASSERT(i1 != NULL); + + if (namelen > ll_i2sbi(i1)->ll_namelen) + return ERR_PTR(-ENAMETOOLONG); + + if (op_data == NULL) + OBD_ALLOC_PTR(op_data); + + if (op_data == NULL) + return ERR_PTR(-ENOMEM); + + ll_i2gids(op_data->op_suppgids, i1, i2); + op_data->op_fid1 = *ll_inode2fid(i1); + op_data->op_capa1 = ll_mdscapa_get(i1); + + if (i2) { + op_data->op_fid2 = *ll_inode2fid(i2); + op_data->op_capa2 = ll_mdscapa_get(i2); + } else { + fid_zero(&op_data->op_fid2); + } + + op_data->op_name = name; + op_data->op_namelen = namelen; + op_data->op_mode = mode; + op_data->op_mod_time = CURRENT_SECONDS; + op_data->op_fsuid = current->fsuid; + op_data->op_fsgid = current->fsgid; + op_data->op_cap = current->cap_effective; + op_data->op_bias = MDS_CHECK_SPLIT; + op_data->op_opc = opc; + op_data->op_mds = 0; + + return op_data; +} + +void ll_finish_md_op_data(struct md_op_data *op_data) +{ + capa_put(op_data->op_capa1); + capa_put(op_data->op_capa2); + OBD_FREE_PTR(op_data); +} + +int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ptlrpc_request *req = NULL; + struct mdt_body *body; + char *cmd, *buf; + struct obd_capa *oc; + int rc, buflen; + ENTRY; + + if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) + RETURN(-EBADE); + + LASSERT(ioc->cmd && ioc->cmd_len && ioc->res && ioc->res_len); + + OBD_ALLOC(cmd, ioc->cmd_len); + if (!cmd) + RETURN(-ENOMEM); + if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len)) + GOTO(out, rc = -EFAULT); + + oc = ll_mdscapa_get(inode); + rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, + OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd, + ioc->cmd_len, ioc->res_len, 0, &req); + capa_put(oc); + if (rc < 0) { + CERROR("mdc_getxattr %s [%s] failed: %d\n", + XATTR_NAME_LUSTRE_ACL, cmd, rc); + GOTO(out, rc); + } + + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); + LASSERT(body); + + buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF); + LASSERT(buflen <= ioc->res_len); + buf = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF + 1, ioc->res_len); + LASSERT(buf); + if (copy_to_user(ioc->res, buf, buflen)) + GOTO(out, rc = -EFAULT); + EXIT; +out: + if (req) + ptlrpc_req_finished(req); + OBD_FREE(cmd, ioc->cmd_len); + return rc; +} + +int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ptlrpc_request *req = NULL; + char *cmd, *buf; + struct obd_capa *oc; + int buflen, rc; + ENTRY; + + if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) + RETURN(-EBADE); + + if (!(sbi->ll_flags & LL_SBI_ACL)) + RETURN(-EOPNOTSUPP); + + LASSERT(ioc->cmd && ioc->cmd_len && ioc->res && ioc->res_len); + + OBD_ALLOC(cmd, ioc->cmd_len); + if (!cmd) + RETURN(-ENOMEM); + if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len)) + GOTO(out, rc = -EFAULT); + + oc = ll_mdscapa_get(inode); + rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, + OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd, + ioc->cmd_len, ioc->res_len, 0, &req); + capa_put(oc); + if (rc) { + CERROR("mdc_setxattr %s [%s] failed: %d\n", + XATTR_NAME_LUSTRE_ACL, cmd, rc); + GOTO(out, rc); + } + + buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF); + LASSERT(buflen <= ioc->res_len); + buf = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF, ioc->res_len); + LASSERT(buf); + if (copy_to_user(ioc->res, buf, buflen)) + GOTO(out, rc = -EFAULT); + EXIT; +out: + if (req) + ptlrpc_req_finished(req); + OBD_FREE(cmd, ioc->cmd_len); + return rc; +} diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index f1ff91a..f966cfd 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -18,6 +18,7 @@ * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + #ifdef HAVE_KERNEL_CONFIG_H #include <linux/config.h> #endif @@ -45,6 +46,7 @@ #define DEBUG_SUBSYSTEM S_LLITE +//#include <lustre_mdc.h> #include <lustre_lite.h> #include "llite_internal.h" #include <linux/lustre_compat25.h> @@ -270,7 +272,7 @@ static void policy_from_vma(ldlm_policy_data_t *policy, size_t count) { policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) + - ((__u64)vma->vm_pgoff << CFS_PAGE_SHIFT); + (vma->vm_pgoff << CFS_PAGE_SHIFT); policy->l_extent.end = (policy->l_extent.start + count - 1) | ~CFS_PAGE_MASK; } @@ -399,7 +401,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, lov_stripe_lock(lsm); inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1); + obd_merge_lvb(ll_i2dtexp(inode), lsm, &lvb, 1); kms = lvb.lvb_size; pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff; @@ -420,7 +422,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, * will always be >= the kms value here. b=11081 */ if (inode->i_size < kms) { inode->i_size = kms; - CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n", + CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n", inode->i_ino, inode->i_size); } lov_stripe_unlock(lsm); @@ -432,8 +434,8 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, * bug 10919 */ lov_stripe_lock(lsm); if (mode == LCK_PW) - obd_adjust_kms(ll_i2obdexp(inode), lsm, - min_t(loff_t, policy.l_extent.end,inode->i_size), + obd_adjust_kms(ll_i2dtexp(inode), lsm, + min_t(loff_t, policy.l_extent.end, inode->i_size), 0); lov_stripe_unlock(lsm); @@ -484,7 +486,7 @@ static void ll_vm_open(struct vm_area_struct * vma) if (!lsm) return; - count = obd_join_lru(sbi->ll_osc_exp, lsm, 0); + count = obd_join_lru(sbi->ll_dt_exp, lsm, 0); VMA_DEBUG(vma, "split %d unused locks from lru\n", count); } else { spin_unlock(&lli->lli_lock); @@ -513,7 +515,7 @@ static void ll_vm_close(struct vm_area_struct *vma) if (!lsm) return; - count = obd_join_lru(sbi->ll_osc_exp, lsm, 1); + count = obd_join_lru(sbi->ll_dt_exp, lsm, 1); VMA_DEBUG(vma, "join %d unused locks to lru\n", count); } else { spin_unlock(&lli->lli_lock); @@ -628,7 +630,7 @@ int ll_file_mmap(struct file * file, struct vm_area_struct * vma) int rc; ENTRY; - ll_vfs_ops_tally(ll_i2sbi(file->f_dentry->d_inode), VFS_OPS_MMAP); + ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1); rc = generic_file_mmap(file, vma); if (rc == 0) { #if !defined(HAVE_FILEMAP_POPULATE) && \ diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index 123fef6..a568f0a 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -3,7 +3,10 @@ * * NFS export of Lustre Light File System * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002, 2006 Cluster File Systems, Inc. + * + * Author: Yury Umanets <umka@clusterfs.com> + * Huang Hua <huanghua@clusterfs.com> * * This file is part of Lustre, http://www.lustre.org. * @@ -25,236 +28,209 @@ #include <lustre_lite.h> #include "llite_internal.h" -__u32 get_uuid2int(const char *name, int len) -{ - __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9; - while (len--) { - __u32 key = key1 + (key0 ^ (*name++ * 7152373)); - if (key & 0x80000000) key -= 0x7fffffff; - key1 = key0; - key0 = key; - } - return (key0 << 1); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int ll_nfs_test_inode(struct inode *inode, unsigned long ino, void *opaque) -#else static int ll_nfs_test_inode(struct inode *inode, void *opaque) -#endif { - struct ll_fid *iid = opaque; - - if (inode->i_ino == iid->id && inode->i_generation == iid->generation) - return 1; - - return 0; + return lu_fid_eq(&ll_i2info(inode)->lli_fid, + (struct lu_fid *)opaque); } -static struct inode * search_inode_for_lustre(struct super_block *sb, - unsigned long ino, - unsigned long generation, - int mode) +static struct inode *search_inode_for_lustre(struct super_block *sb, + struct lu_fid *fid, + int mode) { + struct ll_sb_info *sbi = ll_s2sbi(sb); struct ptlrpc_request *req = NULL; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct ll_fid fid; - unsigned long valid = 0; - int eadatalen = 0, rc; - struct inode *inode = NULL; - struct ll_fid iid = { .id = ino, .generation = generation }; + struct inode *inode = NULL; + unsigned long valid = 0; + int eadatalen = 0; + ino_t ino = ll_fid_build_ino(sbi, fid); + int rc; + ENTRY; - inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid); + CDEBUG(D_INFO, "searching inode for:(%lu,"DFID")\n", ino, PFID(fid)); + inode = ILOOKUP(sb, ino, ll_nfs_test_inode, fid); if (inode) - return inode; + RETURN(inode); + if (S_ISREG(mode)) { rc = ll_get_max_mdsize(sbi, &eadatalen); if (rc) - return ERR_PTR(rc); + RETURN(ERR_PTR(rc)); valid |= OBD_MD_FLEASIZE; } - fid.id = (__u64)ino; - fid.generation = generation; - fid.f_type = mode; - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req); + rc = md_getattr(sbi->ll_md_exp, fid, NULL, valid, eadatalen, &req); if (rc) { - CERROR("failure %d inode %lu\n", rc, ino); - return ERR_PTR(rc); + CERROR("can't get object attrs, fid "DFID", rc %d\n", + PFID(fid), rc); + RETURN(ERR_PTR(rc)); } - rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, REPLY_REC_OFF, sb); - if (rc) { - ptlrpc_req_finished(req); - return ERR_PTR(rc); - } + rc = ll_prep_inode(&inode, req, REPLY_REC_OFF, sb); ptlrpc_req_finished(req); + if (rc) + RETURN(ERR_PTR(rc)); - return inode; + RETURN(inode); } extern struct dentry_operations ll_d_ops; -static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino, - __u32 generation, umode_t mode) +static struct dentry *ll_iget_for_nfs(struct super_block *sb, + struct lu_fid *fid, + umode_t mode) { - struct inode *inode; + struct inode *inode; struct dentry *result; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct list_head *lp; -#endif + ENTRY; - if (ino == 0) - return ERR_PTR(-ESTALE); + CDEBUG(D_INFO, "Get dentry for fid: "DFID"\n", PFID(fid)); + if (!fid_is_sane(fid)) + RETURN(ERR_PTR(-ESTALE)); - inode = search_inode_for_lustre(sb, ino, generation, mode); - if (IS_ERR(inode)) { - return ERR_PTR(PTR_ERR(inode)); - } - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)){ + inode = search_inode_for_lustre(sb, fid, mode); + if (IS_ERR(inode)) + RETURN(ERR_PTR(PTR_ERR(inode))); + + if (is_bad_inode(inode)) { /* we didn't find the right inode.. */ - CERROR("Inode %lu, Bad count: %lu %d or version %u %u\n", - inode->i_ino, (unsigned long)inode->i_nlink, - atomic_read(&inode->i_count), inode->i_generation, - generation); + CERROR("can't get inode by fid "DFID"\n", + PFID(fid)); iput(inode); - return ERR_PTR(-ESTALE); + RETURN(ERR_PTR(-ESTALE)); } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) result = d_alloc_anon(inode); if (!result) { iput(inode); - return ERR_PTR(-ENOMEM); - } -#else - /* now to find a dentry. - * If possible, get a well-connected one - */ - spin_lock(&dcache_lock); - for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) { - result = list_entry(lp,struct dentry, d_alias); - lock_dentry(result); - if (!(result->d_flags & DCACHE_DISCONNECTED)) { - dget_locked(result); - ll_set_dflags(result, DCACHE_REFERENCED); - unlock_dentry(result); - spin_unlock(&dcache_lock); - iput(inode); - return result; - } - unlock_dentry(result); - } - spin_unlock(&dcache_lock); - result = d_alloc_root(inode); - if (result == NULL) { - iput(inode); - return ERR_PTR(-ENOMEM); + RETURN(ERR_PTR(-ENOMEM)); } - result->d_flags |= DCACHE_DISCONNECTED; - -#endif ll_set_dd(result); result->d_op = &ll_d_ops; - return result; + RETURN(result); } -struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len, - int fhtype, int parent) +/* + * This length is counted as amount of __u32, + * It is composed of a fid and a mode + */ +#define ONE_FH_LEN (sizeof(struct lu_fid)/4 + 1) + +static struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, + int fh_type, + int (*acceptable)(void *, struct dentry *), + void *context) { - switch (fhtype) { - case 2: - if (len < 5) - break; - if (parent) - return ll_iget_for_nfs(sb, data[3], 0, data[4]); - case 1: - if (len < 3) - break; - if (parent) - break; - return ll_iget_for_nfs(sb, data[0], data[1], data[2]); - default: break; - } - return ERR_PTR(-EINVAL); + struct lu_fid *parent = NULL; + struct lu_fid *child; + struct dentry *entry; + ENTRY; + + CDEBUG(D_INFO, "decoding for "DFID" fh_len=%d fh_type=%d\n", + PFID((struct lu_fid*)fh), fh_len, fh_type); + + if (fh_type != 1 && fh_type != 2) + RETURN(ERR_PTR(-ESTALE)); + if (fh_len < ONE_FH_LEN * fh_type) + RETURN(ERR_PTR(-ESTALE)); + + child = (struct lu_fid*)fh; + if (fh_type == 2) + parent = (struct lu_fid*)(fh + ONE_FH_LEN); + + entry = sb->s_export_op->find_exported_dentry(sb, child, parent, + acceptable, context); + RETURN(entry); } -int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp, - int need_parent) +/* The return value is file handle type: + * 1 -- contains child file handle; + * 2 -- contains child file handle and parent file handle; + * 255 -- error. + */ +static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen, int connectable) { - if (*lenp < 3) - return 255; - *datap++ = dentry->d_inode->i_ino; - *datap++ = dentry->d_inode->i_generation; - *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode); - - if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) { - *lenp = 3; - return 1; - } - if (dentry->d_parent) { - *datap++ = dentry->d_parent->d_inode->i_ino; - *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode); + struct inode *inode = de->d_inode; + struct lu_fid *fid = ll_inode2fid(inode); + ENTRY; - *lenp = 5; - return 2; + CDEBUG(D_INFO, "encoding for (%lu,"DFID") maxlen=%d minlen=%d\n", + inode->i_ino, PFID(fid), *plen, ONE_FH_LEN); + + if (*plen < ONE_FH_LEN) + RETURN(255); + + memcpy((char*)fh, fid, sizeof(*fid)); + *(fh + ONE_FH_LEN - 1) = (__u32)(S_IFMT & inode->i_mode); + + if (de->d_parent && *plen >= ONE_FH_LEN * 2) { + struct inode *parent = de->d_parent->d_inode; + fh += ONE_FH_LEN; + memcpy((char*)fh, &ll_i2info(parent)->lli_fid, sizeof(*fid)); + *(fh + ONE_FH_LEN - 1) = (__u32)(S_IFMT & parent->i_mode); + *plen = ONE_FH_LEN * 2; + RETURN(2); + } else { + *plen = ONE_FH_LEN; + RETURN(1); } - *lenp = 3; - return 1; } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -struct dentry *ll_get_dentry(struct super_block *sb, void *data) +static struct dentry *ll_get_dentry(struct super_block *sb, void *data) { - __u32 *inump = (__u32*)data; - return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG); + struct lu_fid *fid; + struct dentry *entry; + __u32 mode; + ENTRY; + + fid = (struct lu_fid *)data; + mode = *((__u32*)data + ONE_FH_LEN - 1); + + entry = ll_iget_for_nfs(sb, fid, mode); + RETURN(entry); } -struct dentry *ll_get_parent(struct dentry *dchild) +static struct dentry *ll_get_parent(struct dentry *dchild) { struct ptlrpc_request *req = NULL; - struct inode *dir = dchild->d_inode; - struct ll_sb_info *sbi; - struct dentry *result = NULL; - struct ll_fid fid; - struct mds_body *body; - char dotdot[] = ".."; - int rc = 0; + struct inode *dir = dchild->d_inode; + struct ll_sb_info *sbi; + struct dentry *result = NULL; + struct mdt_body *body; + static char dotdot[] = ".."; + int rc; ENTRY; LASSERT(dir && S_ISDIR(dir->i_mode)); - sbi = ll_s2sbi(dir->i_sb); + sbi = ll_s2sbi(dir->i_sb); - fid.id = (__u64)dir->i_ino; - fid.generation = dir->i_generation; - fid.f_type = S_IFDIR; + CDEBUG(D_INFO, "getting parent for (%lu,"DFID")\n", + dir->i_ino, PFID(ll_inode2fid(dir))); - rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, dotdot, strlen(dotdot) + 1, - 0, 0, &req); + rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir), NULL, + dotdot, strlen(dotdot) + 1, 0, 0, &req); if (rc) { CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); - return ERR_PTR(rc); + RETURN(ERR_PTR(rc)); } - body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); - LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID)); + LASSERT(body->valid & OBD_MD_FLID); - result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR); + CDEBUG(D_INFO, "parent for "DFID" is "DFID"\n", + PFID(ll_inode2fid(dir)), PFID(&body->fid1)); - if (IS_ERR(result)) - rc = PTR_ERR(result); + result = ll_iget_for_nfs(dir->i_sb, &body->fid1, S_IFDIR); ptlrpc_req_finished(req); - if (rc) - return ERR_PTR(rc); RETURN(result); } struct export_operations lustre_export_operations = { .get_parent = ll_get_parent, - .get_dentry = ll_get_dentry, + .get_dentry = ll_get_dentry, + .encode_fh = ll_encode_fh, + .decode_fh = ll_decode_fh, }; -#endif diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 23841d6..d8334ef 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -156,6 +156,23 @@ static int ll_rd_filesfree(char *page, char **start, off_t off, int count, } +static int ll_rd_client_type(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct ll_sb_info *sbi = ll_s2sbi((struct super_block *)data); + int rc; + + LASSERT(sbi != NULL); + + *eof = 1; + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + rc = snprintf(page, count, "remote client\n"); + else + rc = snprintf(page, count, "local client\n"); + + return rc; +} + static int ll_rd_fstype(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -188,7 +205,7 @@ static int ll_rd_max_readahead_mb(char *page, char **start, off_t off, pages_number = sbi->ll_ra_info.ra_max_pages; spin_unlock(&sbi->ll_lock); - mult = 1 << (20 - CFS_PAGE_SHIFT); + mult = 1 << (20 - PAGE_CACHE_SHIFT); return lprocfs_read_frac_helper(page, count, pages_number, mult); } @@ -299,7 +316,7 @@ static int ll_wr_max_cached_mb(struct file *file, const char *buffer, sbi->ll_async_page_max = pages_number ; spin_unlock(&sbi->ll_lock); - if (!sbi->ll_osc_exp) + if (!sbi->ll_dt_exp) /* Not set up yet, don't call llap_shrink_cache */ return count; @@ -326,7 +343,7 @@ static int ll_wr_checksum(struct file *file, const char *buffer, struct ll_sb_info *sbi = ll_s2sbi(sb); int val, rc; - if (!sbi->ll_osc_exp) + if (!sbi->ll_dt_exp) /* Not set up yet */ return -EAGAIN; @@ -338,7 +355,7 @@ static int ll_wr_checksum(struct file *file, const char *buffer, else sbi->ll_flags &= ~LL_SBI_CHECKSUM; - rc = obd_set_info_async(sbi->ll_osc_exp, strlen("checksum"), "checksum", + rc = obd_set_info_async(sbi->ll_dt_exp, strlen("checksum"), "checksum", sizeof(val), &val, NULL); if (rc) CWARN("Failed to set OSC checksum flags: %d\n", rc); @@ -368,15 +385,15 @@ static int ll_wr_max_rw_chunk(struct file *file, const char *buffer, } static int ll_rd_track_id(char *page, int count, void *data, - enum vfs_track_type type) + enum stats_track_type type) { struct super_block *sb = data; - if (ll_s2sbi(sb)->ll_vfs_track_type == type) { + if (ll_s2sbi(sb)->ll_stats_track_type == type) { return snprintf(page, count, "%d\n", - ll_s2sbi(sb)->ll_vfs_track_id); + ll_s2sbi(sb)->ll_stats_track_id); - } else if (ll_s2sbi(sb)->ll_vfs_track_type == VFS_TRACK_ALL) { + } else if (ll_s2sbi(sb)->ll_stats_track_type == STATS_TRACK_ALL) { return snprintf(page, count, "0 (all)\n"); } else { return snprintf(page, count, "untracked\n"); @@ -384,7 +401,7 @@ static int ll_rd_track_id(char *page, int count, void *data, } static int ll_wr_track_id(const char *buffer, unsigned long count, void *data, - enum vfs_track_type type) + enum stats_track_type type) { struct super_block *sb = data; int rc, pid; @@ -392,49 +409,49 @@ static int ll_wr_track_id(const char *buffer, unsigned long count, void *data, rc = lprocfs_write_helper(buffer, count, &pid); if (rc) return rc; - ll_s2sbi(sb)->ll_vfs_track_id = pid; + ll_s2sbi(sb)->ll_stats_track_id = pid; if (pid == 0) - ll_s2sbi(sb)->ll_vfs_track_type = VFS_TRACK_ALL; + ll_s2sbi(sb)->ll_stats_track_type = STATS_TRACK_ALL; else - ll_s2sbi(sb)->ll_vfs_track_type = type; - lprocfs_clear_stats(ll_s2sbi(sb)->ll_vfs_ops_stats); + ll_s2sbi(sb)->ll_stats_track_type = type; + lprocfs_clear_stats(ll_s2sbi(sb)->ll_stats); return count; } static int ll_rd_track_pid(char *page, char **start, off_t off, int count, int *eof, void *data) { - return (ll_rd_track_id(page, count, data, VFS_TRACK_PID)); + return (ll_rd_track_id(page, count, data, STATS_TRACK_PID)); } static int ll_wr_track_pid(struct file *file, const char *buffer, unsigned long count, void *data) { - return (ll_wr_track_id(buffer, count, data, VFS_TRACK_PID)); + return (ll_wr_track_id(buffer, count, data, STATS_TRACK_PID)); } static int ll_rd_track_ppid(char *page, char **start, off_t off, int count, int *eof, void *data) { - return (ll_rd_track_id(page, count, data, VFS_TRACK_PPID)); + return (ll_rd_track_id(page, count, data, STATS_TRACK_PPID)); } static int ll_wr_track_ppid(struct file *file, const char *buffer, unsigned long count, void *data) { - return (ll_wr_track_id(buffer, count, data, VFS_TRACK_PPID)); + return (ll_wr_track_id(buffer, count, data, STATS_TRACK_PPID)); } static int ll_rd_track_gid(char *page, char **start, off_t off, int count, int *eof, void *data) { - return (ll_rd_track_id(page, count, data, VFS_TRACK_GID)); + return (ll_rd_track_id(page, count, data, STATS_TRACK_GID)); } static int ll_wr_track_gid(struct file *file, const char *buffer, unsigned long count, void *data) { - return (ll_wr_track_id(buffer, count, data, VFS_TRACK_GID)); + return (ll_wr_track_id(buffer, count, data, STATS_TRACK_GID)); } static struct lprocfs_vars lprocfs_obd_vars[] = { @@ -447,17 +464,18 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "kbytesavail", ll_rd_kbytesavail, 0, 0 }, { "filestotal", ll_rd_filestotal, 0, 0 }, { "filesfree", ll_rd_filesfree, 0, 0 }, + { "client_type", ll_rd_client_type, 0, 0 }, //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, { "max_read_ahead_mb", ll_rd_max_readahead_mb, ll_wr_max_readahead_mb, 0 }, { "max_read_ahead_whole_mb", ll_rd_max_read_ahead_whole_mb, ll_wr_max_read_ahead_whole_mb, 0 }, - { "max_cached_mb", ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 }, - { "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 }, - { "max_rw_chunk", ll_rd_max_rw_chunk, ll_wr_max_rw_chunk, 0 }, - { "vfs_track_pid", ll_rd_track_pid, ll_wr_track_pid, 0 }, - { "vfs_track_ppid", ll_rd_track_ppid, ll_wr_track_ppid, 0 }, - { "vfs_track_gid", ll_rd_track_gid, ll_wr_track_gid, 0 }, + { "max_cached_mb", ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 }, + { "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 }, + { "max_rw_chunk", ll_rd_max_rw_chunk, ll_wr_max_rw_chunk, 0 }, + { "stats_track_pid", ll_rd_track_pid, ll_wr_track_pid, 0 }, + { "stats_track_ppid", ll_rd_track_ppid, ll_wr_track_ppid, 0 }, + { "stats_track_gid", ll_rd_track_gid, ll_wr_track_gid, 0 }, { 0 } }; @@ -496,7 +514,8 @@ struct llite_file_opcode { { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" }, /* inode operation */ { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" }, - { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "punch" }, + { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "truncate" }, + { LPROC_LL_FLOCK, LPROCFS_TYPE_REGS, "flock" }, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" }, #else @@ -507,6 +526,9 @@ struct llite_file_opcode { { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" }, { LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" }, { LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" }, + { LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" }, + { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" }, + { LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" }, { LPROC_LL_DIRECT_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, "direct_read" }, { LPROC_LL_DIRECT_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, @@ -514,19 +536,21 @@ struct llite_file_opcode { }; -void ll_vfs_ops_tally(struct ll_sb_info *sbi, int op) +void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) { - if (sbi->ll_vfs_ops_stats && sbi->ll_vfs_track_type == VFS_TRACK_ALL) - lprocfs_counter_incr(sbi->ll_vfs_ops_stats, op); - else if (sbi->ll_vfs_track_type == VFS_TRACK_PID && - sbi->ll_vfs_track_id == current->pid) - lprocfs_counter_incr(sbi->ll_vfs_ops_stats, op); - else if (sbi->ll_vfs_track_type == VFS_TRACK_PPID && - sbi->ll_vfs_track_id == current->p_pptr->pid) - lprocfs_counter_incr(sbi->ll_vfs_ops_stats, op); - else if (sbi->ll_vfs_track_type == VFS_TRACK_GID && - sbi->ll_vfs_track_id == current->gid) - lprocfs_counter_incr(sbi->ll_vfs_ops_stats, op); + if (!sbi->ll_stats) + return; + if (sbi->ll_stats_track_type == STATS_TRACK_ALL) + lprocfs_counter_add(sbi->ll_stats, op, count); + else if (sbi->ll_stats_track_type == STATS_TRACK_PID && + sbi->ll_stats_track_id == current->pid) + lprocfs_counter_add(sbi->ll_stats, op, count); + else if (sbi->ll_stats_track_type == STATS_TRACK_PPID && + sbi->ll_stats_track_id == current->p_pptr->pid) + lprocfs_counter_add(sbi->ll_stats, op, count); + else if (sbi->ll_stats_track_type == STATS_TRACK_GID && + sbi->ll_stats_track_id == current->gid) + lprocfs_counter_add(sbi->ll_stats, op, count); } int lprocfs_register_mountpoint(struct proc_dir_entry *parent, @@ -538,7 +562,6 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, struct obd_device *obd; char name[MAX_STRING_SIZE + 1], *ptr; int err, id, len; - struct lprocfs_stats *vfs_ops_stats = NULL; struct proc_dir_entry *entry; ENTRY; @@ -626,44 +649,6 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, if (err) GOTO(out, err); - /* VFS operations stats */ - vfs_ops_stats = sbi->ll_vfs_ops_stats = - lprocfs_alloc_stats(VFS_OPS_LAST); - if (vfs_ops_stats == NULL) - GOTO(out, err = -ENOMEM); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_READ, 0, "read", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_WRITE, 0, "write", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_IOCTL, 0, "ioctl", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_OPEN, 0, "open", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_RELEASE, 0, "release", - "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_MMAP, 0, "mmap", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_SEEK, 0, "seek", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_FSYNC, 0, "fsync", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_FLOCK, 0, "flock", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_SETATTR, 0, "setattr", - "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_GETATTR, 0, "getattr", - "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_SETXATTR, 0, "setxattr", - "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_GETXATTR, 0, "getxattr", - "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_LISTXATTR, 0, "listxattr", - "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_REMOVEXATTR, 0, - "removexattr", "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_TRUNCATE, 0, "truncate", - "reqs"); - lprocfs_counter_init(vfs_ops_stats, VFS_OPS_INODE_PERMISSION, 0, - "inode_permission", "reqs"); - - err = lprocfs_register_stats(sbi->ll_proc_root, "vfs_ops_stats", - vfs_ops_stats); - if (err) - GOTO(out, err); - - /* Static configuration info */ err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb); if (err) GOTO(out, err); @@ -709,7 +694,6 @@ out: if (err) { lprocfs_remove(&sbi->ll_proc_root); lprocfs_free_stats(&sbi->ll_stats); - lprocfs_free_stats(&sbi->ll_vfs_ops_stats); } RETURN(err); } @@ -719,7 +703,6 @@ void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) if (sbi->ll_proc_root) { lprocfs_remove(&sbi->ll_proc_root); lprocfs_free_stats(&sbi->ll_stats); - lprocfs_free_stats(&sbi->ll_vfs_ops_stats); } } #undef MAX_STRING_SIZE @@ -861,7 +844,6 @@ static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file) OBD_ALLOC_GFP(dummy_llap, sizeof(*dummy_llap), GFP_KERNEL); if (dummy_llap == NULL) GOTO(out, rc); - dummy_llap->llap_page = NULL; dummy_llap->llap_cookie = sbi; dummy_llap->llap_magic = 0; @@ -1013,14 +995,18 @@ static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v) do_gettimeofday(&now); + if (!sbi->ll_rw_stats_on) { + seq_printf(seq, "Disabled\n" + "Write anything in this file to activate\n"); + return 0; + } seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", "extents", "calls", "%", "cum%", "calls", "%", "cum%"); - - spin_lock(&sbi->ll_lock); + spin_lock(&sbi->ll_pp_extent_lock); for(k = 0; k < LL_PROCESS_HIST_MAX; k++) { if(io_extents->pp_extents[k].pid != 0) { seq_printf(seq, "\nPID: %d\n", @@ -1028,8 +1014,7 @@ static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v) ll_display_extents_info(io_extents, seq, k); } } - spin_unlock(&sbi->ll_lock); - + spin_unlock(&sbi->ll_pp_extent_lock); return 0; } @@ -1042,13 +1027,14 @@ static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file, struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; int i; - spin_lock(&sbi->ll_lock); + sbi->ll_rw_stats_on = 1; + spin_lock(&sbi->ll_pp_extent_lock); for(i = 0; i < LL_PROCESS_HIST_MAX; i++) { io_extents->pp_extents[i].pid = 0; lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist); } - spin_unlock(&sbi->ll_lock); + spin_unlock(&sbi->ll_pp_extent_lock); return len; } @@ -1062,6 +1048,11 @@ static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v) do_gettimeofday(&now); + if (!sbi->ll_rw_stats_on) { + seq_printf(seq, "Disabled\n" + "Write anything in this file to activate\n"); + return 0; + } seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); @@ -1069,7 +1060,6 @@ static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", "extents", "calls", "%", "cum%", "calls", "%", "cum%"); - spin_lock(&sbi->ll_lock); ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX); spin_unlock(&sbi->ll_lock); @@ -1083,9 +1073,17 @@ static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf, struct seq_file *seq = file->private_data; struct ll_sb_info *sbi = seq->private; struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + int i; - lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist); - lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist); + sbi->ll_rw_stats_on = 1; + spin_lock(&sbi->ll_pp_extent_lock); + for(i = 0; i <= LL_PROCESS_HIST_MAX; i++) + { + io_extents->pp_extents[i].pid = 0; + lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); + lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist); + } + spin_unlock(&sbi->ll_pp_extent_lock); return len; } @@ -1102,10 +1100,12 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file int *process_count = &sbi->ll_offset_process_count; struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + if(!sbi->ll_rw_stats_on) + return; process = sbi->ll_rw_process_info; offset = sbi->ll_rw_offset_info; - spin_lock(&sbi->ll_lock); + spin_lock(&sbi->ll_pp_extent_lock); /* Extent statistics */ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) { if(io_extents->pp_extents[i].pid == pid) { @@ -1133,7 +1133,9 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++; io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++; } + spin_unlock(&sbi->ll_pp_extent_lock); + spin_lock(&sbi->ll_process_lock); /* Offset statistics */ for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { if (process[i].rw_pid == pid) { @@ -1145,7 +1147,7 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file process[i].rw_largest_extent = count; process[i].rw_offset = 0; process[i].rw_last_file = file; - spin_unlock(&sbi->ll_lock); + spin_unlock(&sbi->ll_process_lock); return; } if (process[i].rw_last_file_pos != file->f_pos) { @@ -1175,7 +1177,7 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file if(process[i].rw_largest_extent < count) process[i].rw_largest_extent = count; process[i].rw_last_file_pos = file->f_pos + count; - spin_unlock(&sbi->ll_lock); + spin_unlock(&sbi->ll_process_lock); return; } } @@ -1188,7 +1190,7 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file process[*process_count].rw_largest_extent = count; process[*process_count].rw_offset = 0; process[*process_count].rw_last_file = file; - spin_unlock(&sbi->ll_lock); + spin_unlock(&sbi->ll_process_lock); } char lpszt[] = LPSZ; @@ -1204,14 +1206,19 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v) do_gettimeofday(&now); - spin_lock(&sbi->ll_lock); + if (!sbi->ll_rw_stats_on) { + seq_printf(seq, "Disabled\n" + "Write anything in this file to activate\n"); + return 0; + } + spin_lock(&sbi->ll_process_lock); seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n", "R/W", "PID", "RANGE START", "RANGE END", "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET"); - sprintf(format, "%s%s%s%s%s\n", + sprintf(format, "%s%s%s%s%s\n", "%3c %10d %14Lu %14Lu %17", lpszt+1, " %17", lpszt+1, " %14Ld"); /* We stored the discontiguous offsets here; print them first */ for(i = 0; i < LL_OFFSET_HIST_MAX; i++) { @@ -1238,7 +1245,7 @@ static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v) process[i].rw_largest_extent, process[i].rw_offset); } - spin_unlock(&sbi->ll_lock); + spin_unlock(&sbi->ll_process_lock); return 0; } @@ -1251,14 +1258,16 @@ static ssize_t ll_rw_offset_stats_seq_write(struct file *file, const char *buf, struct ll_rw_process_info *process_info = sbi->ll_rw_process_info; struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info; - spin_lock(&sbi->ll_lock); + sbi->ll_rw_stats_on = 1; + + spin_lock(&sbi->ll_process_lock); sbi->ll_offset_process_count = 0; sbi->ll_rw_offset_entry_count = 0; memset(process_info, 0, sizeof(struct ll_rw_process_info) * LL_PROCESS_HIST_MAX); memset(offset_info, 0, sizeof(struct ll_rw_process_info) * LL_OFFSET_HIST_MAX); - spin_unlock(&sbi->ll_lock); + spin_unlock(&sbi->ll_process_lock); return len; } diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 3ec9b6a..83e10cb 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -30,76 +30,37 @@ #define DEBUG_SUBSYSTEM S_LLITE #include <obd_support.h> +#include <lustre_fid.h> #include <lustre_lite.h> #include <lustre_dlm.h> -#include <linux/lustre_version.h> +#include <lustre_ver.h> +#include <lustre_mdc.h> #include "llite_internal.h" /* methods */ +extern struct dentry_operations ll_d_ops; -/* called from iget{4,5_locked}->find_inode() under inode_lock spinlock */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque) -#else -static int ll_test_inode(struct inode *inode, void *opaque) -#endif +/* + * Check if we have something mounted at the named dchild. + * In such a case there would always be dentry present. + */ +static int ll_d_mountpoint(struct dentry *dparent, struct dentry *dchild, + struct qstr *name) { - static int last_ino, last_gen, last_count; - struct lustre_md *md = opaque; - - if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID))) { - CERROR("MDS body missing inum or generation\n"); - return 0; - } - - if (last_ino == md->body->ino && last_gen == md->body->generation && - last_count < 500) { - last_count++; - } else { - if (last_count > 1) - CDEBUG(D_VFSTRACE, "compared %u/%u %u times\n", - last_ino, last_gen, last_count); - last_count = 0; - last_ino = md->body->ino; - last_gen = md->body->generation; - CDEBUG(D_VFSTRACE, - "comparing inode %p ino %lu/%u to body "LPU64"/%u\n", - inode, inode->i_ino, inode->i_generation, - md->body->ino, md->body->generation); - } - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - if (inode->i_ino != md->body->ino) - return 0; -#endif - if (inode->i_generation != md->body->generation) { -#ifdef HAVE_EXPORT___IGET - if (inode->i_state & (I_FREEING | I_CLEAR)) - return 0; - if (inode->i_nlink == 0) - return 0; - - /* add "duplicate" inode into deathrow for destroy */ - spin_lock(&ll_i2sbi(inode)->ll_deathrow_lock); - if (list_empty(&ll_i2info(inode)->lli_dead_list)) { - __iget(inode); - list_add(&ll_i2info(inode)->lli_dead_list, - &ll_i2sbi(inode)->ll_deathrow); + int mounted = 0; + + if (unlikely(dchild)) { + mounted = d_mountpoint(dchild); + } else if (dparent) { + dchild = d_lookup(dparent, name); + if (dchild) { + mounted = d_mountpoint(dchild); + dput(dchild); } - spin_unlock(&ll_i2sbi(inode)->ll_deathrow_lock); -#endif - - return 0; } - - /* Apply the attributes in 'opaque' to this inode */ - if (!(inode->i_state & (I_FREEING | I_CLEAR))) - ll_update_inode(inode, md); - return 1; + return mounted; } -extern struct dentry_operations ll_d_ops; - int ll_unlock(__u32 mode, struct lustre_handle *lockh) { ENTRY; @@ -109,29 +70,30 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh) RETURN(0); } -/* Get an inode by inode number (already instantiated by the intent lookup). +/* + * Get an inode by inode number (already instantiated by the intent lookup). * Returns inode or NULL */ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -int ll_set_inode(struct inode *inode, void *opaque) -{ - ll_read_inode2(inode, opaque); - return 0; -} - struct inode *ll_iget(struct super_block *sb, ino_t hash, struct lustre_md *md) { + struct ll_inode_info *lli; struct inode *inode; - LASSERT(hash != 0); - inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md); + inode = iget_locked(sb, hash); if (inode) { - if (inode->i_state & I_NEW) + if (inode->i_state & I_NEW) { + lli = ll_i2info(inode); + ll_read_inode2(inode, md); unlock_new_inode(inode); - CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + } else { + if (!(inode->i_state & (I_FREEING | I_CLEAR))) + ll_update_inode(inode, md); + } + CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", + inode->i_ino, inode->i_generation, inode); } return inode; @@ -142,16 +104,48 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, { struct inode *inode; LASSERT(hash != 0); - inode = iget4(sb, hash, ll_test_inode, md); - if (inode) - CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + + inode = iget4(sb, hash, NULL, md); + if (inode) { + if (!(inode->i_state & (I_FREEING | I_CLEAR))) + ll_update_inode(inode, md); + + CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", + inode->i_ino, inode->i_generation, inode); + } return inode; } #endif -int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag) +static void ll_drop_negative_dentry(struct inode *dir) +{ + struct dentry *dentry, *tmp_alias, *tmp_subdir; + + spin_lock(&dcache_lock); +restart: + list_for_each_entry_safe(dentry, tmp_alias, + &dir->i_dentry,d_alias) { + if (!list_empty(&dentry->d_subdirs)) { + struct dentry *child; + list_for_each_entry_safe(child, tmp_subdir, + &dentry->d_subdirs, + d_child) { + /* XXX Print some debug here? */ + if (!child->d_inode) + /* Negative dentry. If we were + dropping dcache lock, go + throught the list again */ + if (ll_drop_dentry(child)) + goto restart; + } + } + } + spin_unlock(&dcache_lock); +} + + +int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, + void *data, int flag) { int rc; struct lustre_handle lockh; @@ -169,15 +163,18 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, case LDLM_CB_CANCELING: { struct inode *inode = ll_inode_from_lock(lock); __u64 bits = lock->l_policy_data.l_inodebits.bits; + struct lu_fid *fid; /* Invalidate all dentries associated with this inode */ if (inode == NULL) break; - if (lock->l_resource->lr_name.name[0] != inode->i_ino || - lock->l_resource->lr_name.name[1] != inode->i_generation) { - LDLM_ERROR(lock, "data mismatch with ino %lu/%u (%p)", - inode->i_ino, inode->i_generation, inode); + fid = ll_inode2fid(inode); + if (lock->l_resource->lr_name.name[0] != fid_seq(fid) || + lock->l_resource->lr_name.name[1] != fid_oid(fid) || + lock->l_resource->lr_name.name[2] != fid_ver(fid)) { + LDLM_ERROR(lock, "data mismatch with object " + DFID" (%p)", PFID(fid), inode); } if (bits & MDS_INODELOCK_OPEN) { @@ -188,8 +185,6 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, break; case LCK_PR: flags = FMODE_EXEC; - if (!FMODE_EXEC) - CERROR("open PR lock without FMODE_EXEC\n"); break; case LCK_CR: flags = FMODE_READ; @@ -199,55 +194,18 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, "%d, inode %ld\n", lock->l_req_mode, inode->i_ino); } - ll_mdc_real_close(inode, flags); + ll_md_real_close(inode, flags); } if (bits & MDS_INODELOCK_UPDATE) - clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, - &(ll_i2info(inode)->lli_flags)); + ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK; if (S_ISDIR(inode->i_mode) && (bits & MDS_INODELOCK_UPDATE)) { - struct dentry *dentry, *tmp, *dir; - int alias_counter = 0; - CDEBUG(D_INODE, "invalidating inode %lu\n", inode->i_ino); truncate_inode_pages(inode->i_mapping, 0); - - /* Drop possible cached negative dentries */ - dir = NULL; - spin_lock(&dcache_lock); - - /* It is possible to have several dentries (with - racer?) */ - list_for_each_entry_safe(dentry, tmp, - &inode->i_dentry,d_alias) { - if (!list_empty(&dentry->d_subdirs)) - dir = dentry; - alias_counter ++; - } - - if (alias_counter > 1) - CWARN("More than 1 alias dir %lu alias %d\n", - inode->i_ino, alias_counter); - - if (dir) { -restart: - list_for_each_entry_safe(dentry, tmp, - &dir->d_subdirs, - d_child) - { - /* XXX Print some debug here? */ - if (!dentry->d_inode) - /* Negative dentry. If we were - dropping dcache lock, go - throught the list again */ - if (ll_drop_dentry(dentry)) - goto restart; - } - } - spin_unlock(&dcache_lock); + ll_drop_negative_dentry(inode); } if (inode->i_sb->s_root && @@ -264,18 +222,6 @@ restart: RETURN(0); } -int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode, - int flags, void *opaque) -{ - struct ldlm_res_id res_id = - { .name = {inode->i_ino, inode->i_generation} }; - struct obd_device *obddev = class_conn2obd(conn); - ENTRY; - - RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags, - opaque)); -} - /* Pack the required supplementary groups into the supplied groups array. * If we don't need to use the groups from the target inode(s) then we * instead pack one or more groups from the user's supplementary group @@ -316,30 +262,6 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) } } -int ll_prepare_mdc_op_data(struct mdc_op_data *data, struct inode *i1, - struct inode *i2, const char *name, int namelen, - int mode) -{ - LASSERT(i1); - - if (namelen > ll_i2sbi(i1)->ll_namelen) - return -ENAMETOOLONG; - ll_i2gids(data->suppgids, i1, i2); - ll_inode2fid(&data->fid1, i1); - - if (i2) - ll_inode2fid(&data->fid2, i2); - else - memset(&data->fid2, 0, sizeof(data->fid2)); - - data->name = name; - data->namelen = namelen; - data->create_mode = mode; - data->mod_time = CURRENT_SECONDS; - - return 0; -} - static void ll_d_add(struct dentry *de, struct inode *inode) { CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de); @@ -376,7 +298,7 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) struct list_head *tmp; struct dentry *dentry; struct dentry *last_discon = NULL; - + spin_lock(&dcache_lock); list_for_each(tmp, &inode->i_dentry) { dentry = list_entry(tmp, struct dentry, d_alias); @@ -421,18 +343,20 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) de->d_parent, de->d_inode, atomic_read(&de->d_count)); return dentry; } + if (last_discon) { - CDEBUG(D_DENTRY, "Reuse disconnected dentry %p inode %p " + CDEBUG(D_DENTRY, "Reuse disconnected dentry %p inode %p " "refc %d\n", last_discon, last_discon->d_inode, atomic_read(&last_discon->d_count)); - dget_locked(last_discon); - spin_unlock(&dcache_lock); - d_rehash(de); - d_move(last_discon, de); - iput(inode); - return last_discon; + __d_rehash(de, 0); + dget_locked(last_discon); + __d_move(last_discon, de); + spin_unlock(&dcache_lock); + d_rehash(de); + iput(inode); + return last_discon; } - + ll_d_add(de, inode); spin_unlock(&dcache_lock); @@ -455,14 +379,15 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, if (!it_disposition(it, DISP_LOOKUP_NEG)) { ENTRY; - rc = ll_prep_inode(sbi->ll_osc_exp, &inode, request, offset, + rc = ll_prep_inode(&inode, request, offset, (*de)->d_sb); if (rc) RETURN(rc); CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", inode, inode->i_ino, inode->i_generation); - mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode); + md_set_lock_data(sbi->ll_md_exp, + &it->d.lustre.it_lock_handle, inode); /* We used to query real size from OSTs here, but actually this is not needed. For stat() calls size would be updated @@ -472,6 +397,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, Everybody else who needs correct file size would call ll_glimpse_size or some equivalent themselves anyway. Also see bug 7198. */ + *de = ll_find_alias(inode, *de); } else { ENTRY; @@ -483,6 +409,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, ll_d_add(*de, inode); spin_unlock(&dcache_lock); } else { + (*de)->d_inode = NULL; /* We do not want to hash the dentry if don`t have a * lock, but if this dentry is later used in d_move, * we'd hit uninitialised list head d_hash, so we just @@ -502,14 +429,18 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, struct lookup_intent *it, int lookup_flags) { + struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; struct dentry *save = dentry, *retval; - struct mdc_op_data op_data; - struct it_cb_data icbd; struct ptlrpc_request *req = NULL; - struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; + struct md_op_data *op_data; + struct it_cb_data icbd; + __u32 opc; int rc; ENTRY; + if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen) + RETURN(ERR_PTR(-ENAMETOOLONG)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n", dentry->d_name.len, dentry->d_name.name, parent->i_ino, parent->i_generation, parent, LL_IT2STR(it)); @@ -531,16 +462,22 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, icbd.icbd_childp = &dentry; icbd.icbd_parent = parent; - rc = ll_prepare_mdc_op_data(&op_data, parent, NULL, dentry->d_name.name, - dentry->d_name.len, lookup_flags); - if (rc) - RETURN(ERR_PTR(rc)); + if (it->it_op & IT_CREAT || + (it->it_op & IT_OPEN && it->it_create_mode & O_CREAT)) + opc = LUSTRE_OPC_CREATE; + else + opc = LUSTRE_OPC_ANY; - it->it_create_mode &= ~current->fs->umask; + op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name, + dentry->d_name.len, lookup_flags, opc); + if (IS_ERR(op_data)) + RETURN((void *)op_data); - rc = mdc_intent_lock(ll_i2mdcexp(parent), &op_data, NULL, 0, it, - lookup_flags, &req, ll_mdc_blocking_ast, 0); + it->it_create_mode &= ~current->fs->umask; + rc = md_intent_lock(ll_i2mdexp(parent), op_data, NULL, 0, it, + lookup_flags, &req, ll_md_blocking_ast, 0); + ll_finish_md_op_data(op_data); if (rc < 0) GOTO(out, retval = ERR_PTR(rc)); @@ -715,8 +652,7 @@ static struct inode *ll_create_node(struct inode *dir, const char *name, LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF)); request = it->d.lustre.it_data; it_clear_disposition(it, DISP_ENQ_CREATE_REF); - rc = ll_prep_inode(sbi->ll_osc_exp, &inode, request, DLM_REPLY_REC_OFF, - dir->i_sb); + rc = ll_prep_inode(&inode, request, DLM_REPLY_REC_OFF, dir->i_sb); if (rc) GOTO(out, inode = ERR_PTR(rc)); @@ -727,7 +663,8 @@ static struct inode *ll_create_node(struct inode *dir, const char *name, * stuff it in the lock. */ CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n", inode, inode->i_ino, inode->i_generation); - mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode); + md_set_lock_data(sbi->ll_md_exp, + &it->d.lustre.it_lock_handle, inode); EXIT; out: ptlrpc_req_finished(request); @@ -752,7 +689,6 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct lookup_intent *it) { struct inode *inode; - struct ptlrpc_request *request = it->d.lustre.it_data; int rc = 0; ENTRY; @@ -764,8 +700,6 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, if (rc) RETURN(rc); - mdc_store_inode_generation(request, DLM_INTENT_REC_OFF, - DLM_REPLY_REC_OFF); inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, NULL, 0, mode, 0, it); if (IS_ERR(inode)) { @@ -786,7 +720,7 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, static void ll_update_times(struct ptlrpc_request *request, int offset, struct inode *inode) { - struct mds_body *body = lustre_msg_buf(request->rq_repmsg, offset, + struct mdt_body *body = lustre_msg_buf(request->rq_repmsg, offset, sizeof(*body)); LASSERT(body); @@ -808,36 +742,37 @@ static void ll_update_times(struct ptlrpc_request *request, int offset, } static int ll_new_node(struct inode *dir, struct qstr *name, - const char *tgt, int mode, - int rdev, struct dentry *dchild) + const char *tgt, int mode, int rdev, + struct dentry *dchild, __u32 opc) { struct ptlrpc_request *request = NULL; + struct md_op_data *op_data; struct inode *inode = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); - struct mdc_op_data op_data; int tgt_len = 0; int err; ENTRY; if (unlikely(tgt != NULL)) - tgt_len = strlen(tgt)+1; + tgt_len = strlen(tgt) + 1; - err = ll_prepare_mdc_op_data(&op_data, dir, NULL, name->name, - name->len, 0); - if (err) - GOTO(err_exit, err); + op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, + name->len, 0, opc); + if (IS_ERR(op_data)) + GOTO(err_exit, err = PTR_ERR(op_data)); - err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, tgt_len, - mode, current->fsuid, current->fsgid, - current->cap_effective, rdev, &request); + err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode, + current->fsuid, current->fsgid, + current->cap_effective, rdev, &request); + ll_finish_md_op_data(op_data); if (err) GOTO(err_exit, err); ll_update_times(request, REPLY_REC_OFF, dir); if (dchild) { - err = ll_prep_inode(sbi->ll_osc_exp, &inode, request, - REPLY_REC_OFF, dchild->d_sb); + err = ll_prep_inode(&inode, request, REPLY_REC_OFF, + dchild->d_sb); if (err) GOTO(err_exit, err); @@ -851,7 +786,6 @@ err_exit: return err; } - static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode, unsigned rdev, struct dentry *dchild) { @@ -872,7 +806,8 @@ static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - err = ll_new_node(dir, name, NULL, mode, rdev, dchild); + err = ll_new_node(dir, name, NULL, mode, rdev, dchild, + LUSTRE_OPC_MKNOD); break; case S_IFDIR: err = -EPERM; @@ -885,7 +820,8 @@ static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) #ifndef LUSTRE_KERNEL_VERSION -static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +static int ll_create_nd(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { struct lookup_intent *it = ll_d2d(dentry)->lld_it; int rc; @@ -918,9 +854,9 @@ out: return rc; } #else -static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +static int ll_create_nd(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { - if (!nd || !nd->intent.d.lustre.it_disposition) /* No saved request? Just mknod the file */ return ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry); @@ -941,17 +877,17 @@ static int ll_symlink_generic(struct inode *dir, struct qstr *name, dir, 3000, tgt); err = ll_new_node(dir, name, (char *)tgt, S_IFLNK | S_IRWXUGO, - 0, dchild); + 0, dchild, LUSTRE_OPC_SYMLINK); RETURN(err); } static int ll_link_generic(struct inode *src, struct inode *dir, struct qstr *name, struct dentry *dchild) { + struct ll_sb_info *sbi = ll_i2sbi(dir); struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data *op_data; int err; - struct ll_sb_info *sbi = ll_i2sbi(dir); ENTRY; CDEBUG(D_VFSTRACE, @@ -959,82 +895,71 @@ static int ll_link_generic(struct inode *src, struct inode *dir, src->i_ino, src->i_generation, src, dir->i_ino, dir->i_generation, dir, name->len, name->name); - err = ll_prepare_mdc_op_data(&op_data, src, dir, name->name, - name->len, 0); + op_data = ll_prep_md_op_data(NULL, src, dir, name->name, name->len, + 0, LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + + err = md_link(sbi->ll_md_exp, op_data, &request); + ll_finish_md_op_data(op_data); if (err) GOTO(out, err); - err = mdc_link(sbi->ll_mdc_exp, &op_data, &request); - if (err) - GOTO(out, err); - - if (dchild) { + if (dchild) d_drop(dchild); - } - ll_update_times(request, REPLY_REC_OFF, dir); + ll_update_times(request, REPLY_REC_OFF, dir); EXIT; out: ptlrpc_req_finished(request); RETURN(err); } -static int ll_mkdir_generic(struct inode *dir, struct qstr *name, int mode, - struct dentry *dchild) +static int ll_mkdir_generic(struct inode *dir, struct qstr *name, + int mode, struct dentry *dchild) { int err; - ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", name->len, name->name, dir->i_ino, dir->i_generation, dir); mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; - err = ll_new_node(dir, name, NULL, mode, 0, dchild); + err = ll_new_node(dir, name, NULL, mode, 0, dchild, LUSTRE_OPC_MKDIR); RETURN(err); } static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent, - struct qstr *name) + struct dentry *dchild, struct qstr *name) { struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; - struct dentry *dentry; + struct md_op_data *op_data; int rc; ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", name->len, name->name, dir->i_ino, dir->i_generation, dir); - /* Check if we have something mounted at the dir we are going to delete - * In such a case there would always be dentry present. */ - if (dparent) { - dentry = d_lookup(dparent, name); - if (dentry) { - int mounted = d_mountpoint(dentry); - dput(dentry); - if (mounted) - GOTO(out, rc = -EBUSY); - } - } + if (unlikely(ll_d_mountpoint(dparent, dchild, name))) + RETURN(-EBUSY); - rc = ll_prepare_mdc_op_data(&op_data, dir, NULL, name->name, - name->len, S_IFDIR); - if (rc) - GOTO(out, rc); - rc = mdc_unlink(ll_i2sbi(dir)->ll_mdc_exp, &op_data, &request); - if (rc) - GOTO(out, rc); - ll_update_times(request, REPLY_REC_OFF, dir); + op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, + S_IFDIR, LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); - EXIT; -out: + rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); + ll_finish_md_op_data(op_data); + if (rc == 0) + ll_update_times(request, REPLY_REC_OFF, dir); ptlrpc_req_finished(request); - return(rc); + RETURN(rc); } int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) { - struct mds_body *body; + struct mdt_body *body; struct lov_mds_md *eadata; struct lov_stripe_md *lsm = NULL; struct obd_trans_info oti = { 0 }; @@ -1065,24 +990,25 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) GOTO(out, rc = -EPROTO); } - rc = obd_unpackmd(ll_i2obdexp(dir), &lsm, eadata, body->eadatasize); + rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize); if (rc < 0) { CERROR("obd_unpackmd: %d\n", rc); GOTO(out, rc); } LASSERT(rc >= sizeof(*lsm)); - rc = obd_checkmd(ll_i2obdexp(dir), ll_i2mdcexp(dir), lsm); + rc = obd_checkmd(ll_i2dtexp(dir), ll_i2mdexp(dir), lsm); if (rc) GOTO(out_free_memmd, rc); - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) GOTO(out_free_memmd, rc = -ENOMEM); oa->o_id = lsm->lsm_object_id; + oa->o_gr = lsm->lsm_object_gr; oa->o_mode = body->mode & S_IFMT; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP; if (body->valid & OBD_MD_FLCOOKIE) { oa->o_valid |= OBD_MD_FLCOOKIE; @@ -1096,79 +1022,90 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } } - rc = obd_destroy(ll_i2obdexp(dir), oa, lsm, &oti, ll_i2mdcexp(dir)); - obdo_free(oa); + rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir)); + OBDO_FREE(oa); if (rc) CERROR("obd destroy objid "LPX64" error %d\n", lsm->lsm_object_id, rc); out_free_memmd: - obd_free_memmd(ll_i2obdexp(dir), &lsm); + obd_free_memmd(ll_i2dtexp(dir), &lsm); out: return rc; } -static int ll_unlink_generic(struct inode * dir, struct qstr *name) +static int ll_unlink_generic(struct inode *dir, struct dentry *dparent, + struct dentry *dchild, struct qstr *name) { struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data *op_data; int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", name->len, name->name, dir->i_ino, dir->i_generation, dir); - rc = ll_prepare_mdc_op_data(&op_data, dir, NULL, name->name, - name->len, 0); - if (rc) - GOTO(out, rc); - rc = mdc_unlink(ll_i2sbi(dir)->ll_mdc_exp, &op_data, &request); + /* + * XXX: unlink bind mountpoint maybe call to here, + * just check it as vfs_unlink does. + */ + if (unlikely(ll_d_mountpoint(dparent, dchild, name))) + RETURN(-EBUSY); + + op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, + name->len, 0, LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + + rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); + ll_finish_md_op_data(op_data); + if (rc) GOTO(out, rc); ll_update_times(request, REPLY_REC_OFF, dir); rc = ll_objects_destroy(request, dir); - if (rc) - GOTO(out, rc); - EXIT; out: ptlrpc_req_finished(request); - return(rc); + RETURN(rc); } -static int ll_rename_generic(struct inode *src, struct qstr *src_name, - struct inode *tgt, struct qstr *tgt_name) +static int ll_rename_generic(struct inode *src, struct dentry *src_dparent, + struct dentry *src_dchild, struct qstr *src_name, + struct inode *tgt, struct dentry *tgt_dparent, + struct dentry *tgt_dchild, struct qstr *tgt_name) { struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(src); - struct mdc_op_data op_data; + struct md_op_data *op_data; int err; - ENTRY; CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s," "tgt_dir=%lu/%u(%p)\n", src_name->len, src_name->name, src->i_ino, src->i_generation, src, tgt_name->len, tgt_name->name, tgt->i_ino, tgt->i_generation, tgt); - err = ll_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0); - if (err) - GOTO(out, err); - err = mdc_rename(sbi->ll_mdc_exp, &op_data, - src_name->name, src_name->len, - tgt_name->name, tgt_name->len, &request); - if (err) - GOTO(out, err); - ll_update_times(request, REPLY_REC_OFF, src); - ll_update_times(request, REPLY_REC_OFF, tgt); - err = ll_objects_destroy(request, src); - if (err) - GOTO(out, err); + if (unlikely(ll_d_mountpoint(src_dparent, src_dchild, src_name) || + ll_d_mountpoint(tgt_dparent, tgt_dchild, tgt_name))) + RETURN(-EBUSY); + + op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0, + LUSTRE_OPC_ANY); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); + + err = md_rename(sbi->ll_md_exp, op_data, + src_name->name, src_name->len, + tgt_name->name, tgt_name->len, &request); + ll_finish_md_op_data(op_data); + if (!err) { + ll_update_times(request, REPLY_REC_OFF, src); + ll_update_times(request, REPLY_REC_OFF, tgt); + err = ll_objects_destroy(request, src); + } - EXIT; -out: ptlrpc_req_finished(request); - return(err); + RETURN(err); } #ifdef LUSTRE_KERNEL_VERSION @@ -1178,8 +1115,10 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) } static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd) { - return ll_rename_generic(srcnd->dentry->d_inode, &srcnd->last, - tgtnd->dentry->d_inode, &tgtnd->last); + return ll_rename_generic(srcnd->dentry->d_inode, srcnd->dentry, + NULL, &srcnd->last, + tgtnd->dentry->d_inode, tgtnd->dentry, + NULL, &tgtnd->last); } static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) { @@ -1192,7 +1131,8 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt) } static int ll_rmdir_raw(struct nameidata *nd) { - return ll_rmdir_generic(nd->dentry->d_inode, nd->dentry, &nd->last); + return ll_rmdir_generic(nd->dentry->d_inode, nd->dentry, NULL, + &nd->last); } static int ll_mkdir_raw(struct nameidata *nd, int mode) { @@ -1200,7 +1140,8 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode) } static int ll_unlink_raw(struct nameidata *nd) { - return ll_unlink_generic(nd->dentry->d_inode, &nd->last); + return ll_unlink_generic(nd->dentry->d_inode, nd->dentry, NULL, + &nd->last); } #endif @@ -1214,7 +1155,7 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) static int ll_unlink(struct inode * dir, struct dentry *dentry) { - return ll_unlink_generic(dir, &dentry->d_name); + return ll_unlink_generic(dir, NULL, dentry, &dentry->d_name); } static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode) { @@ -1222,7 +1163,7 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode) } static int ll_rmdir(struct inode *dir, struct dentry *dentry) { - return ll_rmdir_generic(dir, NULL, &dentry->d_name); + return ll_rmdir_generic(dir, NULL, dentry, &dentry->d_name); } static int ll_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) @@ -1232,13 +1173,15 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry, static int ll_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { - return ll_link_generic(old_dentry->d_inode, dir, - &new_dentry->d_name, new_dentry); + return ll_link_generic(old_dentry->d_inode, dir, &new_dentry->d_name, + new_dentry); } static int ll_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - return ll_rename_generic(old_dir, &old_dentry->d_name, new_dir, + return ll_rename_generic(old_dir, NULL, + old_dentry, &old_dentry->d_name, + new_dir, NULL, new_dentry, &new_dentry->d_name); } #endif diff --git a/lustre/llite/remote_perm.c b/lustre/llite/remote_perm.c new file mode 100644 index 0000000..2207bf8 --- /dev/null +++ b/lustre/llite/remote_perm.c @@ -0,0 +1,310 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Permission Cache for Remote Client + * Author: Lai Siyao <lsy@clusterfs.com> + * Author: Fan Yong <fanyong@clusterfs.com> + * + * Copyright (c) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/random.h> +#include <linux/version.h> + +#include <lustre_lite.h> +#include <lustre_ha.h> +#include <lustre_dlm.h> +#include <lprocfs_status.h> +#include <lustre_disk.h> +#include <lustre_param.h> +#include "llite_internal.h" + +cfs_mem_cache_t *ll_remote_perm_cachep = NULL; +cfs_mem_cache_t *ll_rmtperm_hash_cachep = NULL; + +static inline struct ll_remote_perm *alloc_ll_remote_perm(void) +{ + struct ll_remote_perm *lrp; + + OBD_SLAB_ALLOC(lrp, ll_remote_perm_cachep, SLAB_KERNEL, sizeof(*lrp)); + if (lrp) + INIT_HLIST_NODE(&lrp->lrp_list); + return lrp; +} + +static inline void free_ll_remote_perm(struct ll_remote_perm *lrp) +{ + if (!lrp) + return; + + if (!hlist_unhashed(&lrp->lrp_list)) + hlist_del(&lrp->lrp_list); + OBD_SLAB_FREE(lrp, ll_remote_perm_cachep, sizeof(*lrp)); +} + +struct hlist_head *alloc_rmtperm_hash(void) +{ + struct hlist_head *hash; + int i; + + OBD_SLAB_ALLOC(hash, ll_rmtperm_hash_cachep, SLAB_KERNEL, + REMOTE_PERM_HASHSIZE * sizeof(*hash)); + + if (!hash) + return NULL; + + for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) + INIT_HLIST_HEAD(hash + i); + + return hash; +} + +void free_rmtperm_hash(struct hlist_head *hash) +{ + int i; + struct ll_remote_perm *lrp; + struct hlist_node *node, *next; + + if(!hash) + return; + + for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) + hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list) + free_ll_remote_perm(lrp); + OBD_SLAB_FREE(hash, ll_rmtperm_hash_cachep, + REMOTE_PERM_HASHSIZE * sizeof(*hash)); +} + +static inline int remote_perm_hashfunc(uid_t uid) +{ + return uid & (REMOTE_PERM_HASHSIZE - 1); +} + +/* NB: setxid permission is not checked here, instead it's done on + * MDT when client get remote permission. (lookup/mdc_get_remote_perm). */ +static int do_check_remote_perm(struct ll_inode_info *lli, int mask) +{ + struct hlist_head *head; + struct ll_remote_perm *lrp; + struct hlist_node *node; + int found = 0, rc; + ENTRY; + + if (!lli->lli_remote_perms) + RETURN(-ENOENT); + + head = lli->lli_remote_perms + remote_perm_hashfunc(current->uid); + + spin_lock(&lli->lli_lock); + hlist_for_each_entry(lrp, node, head, lrp_list) { + if (lrp->lrp_uid != current->uid) + continue; + if (lrp->lrp_gid != current->gid) + continue; + if (lrp->lrp_fsuid != current->fsuid) + continue; + if (lrp->lrp_fsgid != current->fsgid) + continue; + found = 1; + break; + } + + if (!found) + GOTO(out, rc = -ENOENT); + + CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n", + lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid, + lrp->lrp_access_perm); + rc = ((lrp->lrp_access_perm & mask) == mask) ? 0 : -EACCES; + +out: + spin_unlock(&lli->lli_lock); + return rc; +} + +int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_remote_perm *lrp = NULL, *tmp = NULL; + struct hlist_head *head, *perm_hash = NULL; + struct hlist_node *node; + ENTRY; + + LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT); + +#if 0 + if (perm->rp_uid != current->uid || + perm->rp_gid != current->gid || + perm->rp_fsuid != current->fsuid || + perm->rp_fsgid != current->fsgid) { + /* user might setxid in this small period */ + CDEBUG(D_SEC, + "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n", + perm->rp_uid, perm->rp_gid, perm->rp_fsuid, + perm->rp_fsgid, current->uid, current->gid, + current->fsuid, current->fsgid); + RETURN(-EAGAIN); + } +#endif + + if (!lli->lli_remote_perms) { + perm_hash = alloc_rmtperm_hash(); + if (perm_hash == NULL) { + CERROR("alloc lli_remote_perms failed!\n"); + RETURN(-ENOMEM); + } + } + + spin_lock(&lli->lli_lock); + + if (!lli->lli_remote_perms) + lli->lli_remote_perms = perm_hash; + else if (perm_hash) + free_rmtperm_hash(perm_hash); + + head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid); + +again: + hlist_for_each_entry(tmp, node, head, lrp_list) { + if (tmp->lrp_uid != perm->rp_uid) + continue; + if (tmp->lrp_gid != perm->rp_gid) + continue; + if (tmp->lrp_fsuid != perm->rp_fsuid) + continue; + if (tmp->lrp_fsgid != perm->rp_fsgid) + continue; + if (lrp) + free_ll_remote_perm(lrp); + lrp = tmp; + break; + } + + if (!lrp) { + spin_unlock(&lli->lli_lock); + lrp = alloc_ll_remote_perm(); + if (!lrp) { + CERROR("alloc memory for ll_remote_perm failed!\n"); + RETURN(-ENOMEM); + } + spin_lock(&lli->lli_lock); + goto again; + } + + lrp->lrp_access_perm = perm->rp_access_perm; + if (lrp != tmp) { + lrp->lrp_uid = perm->rp_uid; + lrp->lrp_gid = perm->rp_gid; + lrp->lrp_fsuid = perm->rp_fsuid; + lrp->lrp_fsgid = perm->rp_fsgid; + hlist_add_head(&lrp->lrp_list, head); + } + lli->lli_rmtperm_utime = jiffies; + spin_unlock(&lli->lli_lock); + + CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n", + lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid, + lrp->lrp_access_perm); + + RETURN(0); +} + +int lustre_check_remote_perm(struct inode *inode, int mask) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ptlrpc_request *req = NULL; + struct mdt_remote_perm *perm; + struct obd_capa *oc; + unsigned long utime; + int i = 0, rc; + ENTRY; + +check: + utime = lli->lli_rmtperm_utime; + rc = do_check_remote_perm(lli, mask); + if (!rc || ((rc != -ENOENT) && i)) + RETURN(rc); + + might_sleep(); + + down(&lli->lli_rmtperm_sem); + /* check again */ + if (utime != lli->lli_rmtperm_utime) { + rc = do_check_remote_perm(lli, mask); + if (!rc || ((rc != -ENOENT) && i)) { + up(&lli->lli_rmtperm_sem); + RETURN(rc); + } + } + + if (i++ > 5) { + CERROR("check remote perm falls in dead loop!\n"); + LBUG(); + } + + oc = ll_mdscapa_get(inode); + rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), oc, &req); + capa_put(oc); + if (rc) { + up(&lli->lli_rmtperm_sem); + RETURN(rc); + } + + perm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, sizeof(*perm)); + LASSERT(perm); + LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1); + + rc = ll_update_remote_perm(inode, perm); + up(&lli->lli_rmtperm_sem); + + ptlrpc_req_finished(req); + + if (rc == -ENOMEM) + RETURN(rc); + + goto check; +} + +#if 0 /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock, + * because it will fail sanity test 48. + */ +void ll_free_remote_perms(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct hlist_head *hash = lli->lli_remote_perms; + struct ll_remote_perm *lrp; + struct hlist_node *node, *next; + int i; + + LASSERT(hash); + + spin_lock(&lli->lli_lock); + + for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) { + hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list) + free_ll_remote_perm(lrp); + } + + spin_unlock(&lli->lli_lock); +} +#endif diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 588bfeb..435b145 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -20,6 +20,7 @@ * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + #ifdef HAVE_KERNEL_CONFIG_H #include <linux/config.h> #endif @@ -44,6 +45,7 @@ #define DEBUG_SUBSYSTEM S_LLITE +//#include <lustre_mdc.h> #include <lustre_lite.h> #include "llite_internal.h" #include <linux/lustre_compat25.h> @@ -54,7 +56,7 @@ pos = n, n = pos->prev ) #endif -kmem_cache_t *ll_async_page_slab = NULL; +cfs_mem_cache_t *ll_async_page_slab = NULL; size_t ll_async_page_slab_size = 0; /* SYNCHRONOUS I/O to object storage for an inode */ @@ -65,7 +67,7 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, struct lov_stripe_md *lsm = lli->lli_smd; struct obd_info oinfo = { { { 0 } } }; struct brw_page pg; - int rc; + int opc, rc; ENTRY; pg.pg = page; @@ -89,14 +91,18 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, pg.flag = flags; if (cmd & OBD_BRW_WRITE) - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_BRW_WRITE, pg.count); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, + pg.count); else - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_BRW_READ, pg.count); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, + pg.count); oinfo.oi_oa = oa; oinfo.oi_md = lsm; - rc = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 1, &pg, NULL); + /* NB partial write, so we might not have CAPA_OPC_OSS_READ capa */ + opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW; + oinfo.oi_capa = ll_osscapa_get(inode, current->fsuid, opc); + rc = obd_brw(cmd, ll_i2dtexp(inode), &oinfo, 1, &pg, NULL); + capa_put(oinfo.oi_capa); if (rc == 0) obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS); else if (rc != -EIO) @@ -121,7 +127,7 @@ void ll_truncate(struct inode *inode) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino, inode->i_generation, inode, inode->i_size, inode->i_size); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_TRUNCATE); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1); if (lli->lli_size_sem_owner != current) { EXIT; return; @@ -139,15 +145,15 @@ void ll_truncate(struct inode *inode) * race condition. */ lov_stripe_lock(lli->lli_smd); inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 0); + obd_merge_lvb(ll_i2dtexp(inode), lli->lli_smd, &lvb, 0); if (lvb.lvb_size == inode->i_size) { CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n", - lli->lli_smd->lsm_object_id,inode->i_size,inode->i_size); + lli->lli_smd->lsm_object_id, inode->i_size, inode->i_size); lov_stripe_unlock(lli->lli_smd); GOTO(out_unlock, 0); } - obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd, inode->i_size, 1); + obd_adjust_kms(ll_i2dtexp(inode), lli->lli_smd, inode->i_size, 1); lov_stripe_unlock(lli->lli_smd); if (unlikely((ll_i2sbi(inode)->ll_flags & LL_SBI_CHECKSUM) && @@ -175,16 +181,18 @@ void ll_truncate(struct inode *inode) oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF; oinfo.oi_oa = &oa; oa.o_id = lli->lli_smd->lsm_object_id; - oa.o_valid = OBD_MD_FLID; + oa.o_gr = lli->lli_smd->lsm_object_gr; + oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |OBD_MD_FLFID| + obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGENER | - OBD_MD_FLBLOCKS); + OBD_MD_FLFID | OBD_MD_FLGENER); ll_inode_size_unlock(inode, 0); - rc = obd_punch_rqset(ll_i2obdexp(inode), &oinfo, NULL); + oinfo.oi_capa = ll_osscapa_get(inode, 0, CAPA_OPC_OSS_TRUNC); + rc = obd_punch_rqset(ll_i2dtexp(inode), &oinfo, NULL); + ll_truncate_free_capa(oinfo.oi_capa); if (rc) CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino); else @@ -222,12 +230,14 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from, oa.o_mode = inode->i_mode; oa.o_id = lsm->lsm_object_id; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE; + oa.o_gr = lsm->lsm_object_gr; + oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | + OBD_MD_FLTYPE | OBD_MD_FLGROUP; obdo_from_inode(&oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER); oinfo.oi_oa = &oa; oinfo.oi_md = lsm; - rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), &oinfo, 1, &pga, NULL); + rc = obd_brw(OBD_BRW_CHECK, ll_i2dtexp(inode), &oinfo, 1, &pga, NULL); if (rc) RETURN(rc); @@ -249,7 +259,7 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from, * treat it like i_size. */ lov_stripe_lock(lsm); inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1); + obd_merge_lvb(ll_i2dtexp(inode), lsm, &lvb, 1); lov_stripe_unlock(lsm); if (lvb.lvb_size <= offset) { LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n", @@ -354,7 +364,7 @@ static int ll_ap_refresh_count(void *data, int cmd) lov_stripe_lock(lsm); inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1); + obd_merge_lvb(ll_i2dtexp(inode), lsm, &lvb, 1); kms = lvb.lvb_size; lov_stripe_unlock(lsm); @@ -377,11 +387,12 @@ void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa) lsm = ll_i2info(inode)->lli_smd; oa->o_id = lsm->lsm_object_id; - oa->o_valid = OBD_MD_FLID; + oa->o_gr = lsm->lsm_object_gr; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME; if (cmd & OBD_BRW_WRITE) { oa->o_valid |= OBD_MD_FLEPOCH; - oa->o_easize = ll_i2info(inode)->lli_io_epoch; + oa->o_easize = ll_i2info(inode)->lli_ioepoch; valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID | @@ -414,12 +425,22 @@ static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa, EXIT; } +static struct obd_capa *ll_ap_lookup_capa(void *data, int cmd) +{ + struct ll_async_page *llap = LLAP_FROM_COOKIE(data); + int opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW; + + return ll_osscapa_get(llap->llap_page->mapping->host, llap->llap_fsuid, + opc); +} + static struct obd_async_page_ops ll_async_page_ops = { .ap_make_ready = ll_ap_make_ready, .ap_refresh_count = ll_ap_refresh_count, .ap_fill_obdo = ll_ap_fill_obdo, .ap_update_obdo = ll_ap_update_obdo, .ap_completion = ll_ap_completion, + .ap_lookup_capa = ll_ap_lookup_capa, }; struct ll_async_page *llap_cast_private(struct page *page) @@ -547,7 +568,7 @@ int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction) return count; } -static struct ll_async_page *llap_from_page(struct page *page, unsigned origin) +struct ll_async_page *llap_from_page(struct page *page, unsigned origin) { struct ll_async_page *llap; struct obd_export *exp; @@ -585,7 +606,7 @@ static struct ll_async_page *llap_from_page(struct page *page, unsigned origin) GOTO(out, llap); } - exp = ll_i2obdexp(page->mapping->host); + exp = ll_i2dtexp(page->mapping->host); if (exp == NULL) RETURN(ERR_PTR(-EINVAL)); @@ -593,7 +614,7 @@ static struct ll_async_page *llap_from_page(struct page *page, unsigned origin) if (sbi->ll_async_page_count >= sbi->ll_async_page_max) llap_shrink_cache(sbi, 0); - OBD_SLAB_ALLOC(llap, ll_async_page_slab, SLAB_KERNEL, + OBD_SLAB_ALLOC(llap, ll_async_page_slab, GFP_KERNEL, ll_async_page_slab_size); if (llap == NULL) RETURN(ERR_PTR(-ENOMEM)); @@ -614,11 +635,11 @@ static struct ll_async_page *llap_from_page(struct page *page, unsigned origin) /* also zeroing the PRIVBITS low order bitflags */ __set_page_ll_data(page, llap); llap->llap_page = page; - spin_lock(&sbi->ll_lock); sbi->ll_pglist_gen++; sbi->ll_async_page_count++; list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist); + INIT_LIST_HEAD(&llap->llap_pending_write); spin_unlock(&sbi->ll_lock); out: @@ -665,12 +686,13 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode, 0, 0, 0, async_flags); if (rc == 0) { LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n"); - //llap_write_pending(inode, llap); GOTO(out, 0); } llap->llap_write_queued = 0; - + /* Do not pass llap here as it is sync write. */ + llap_write_pending(inode, NULL); + rc = oig_init(&oig); if (rc) GOTO(out, rc); @@ -726,6 +748,9 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode, } } + if (rc == 0 && llap_write_complete(inode, llap)) + ll_queue_done_writing(inode, 0); + LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc); free_oig: @@ -762,24 +787,40 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, if (IS_ERR(llap)) RETURN(PTR_ERR(llap)); - exp = ll_i2obdexp(inode); + exp = ll_i2dtexp(inode); if (exp == NULL) RETURN(-EINVAL); llap->llap_ignore_quota = capable(CAP_SYS_RESOURCE); - /* queue a write for some time in the future the first time we - * dirty the page */ + /* + * queue a write for some time in the future the first time we + * dirty the page. + * + * This is different from what other file systems do: they usually + * just mark page (and some of its buffers) dirty and rely on + * balance_dirty_pages() to start a write-back. Lustre wants write-back + * to be started earlier for the following reasons: + * + * (1) with a large number of clients we need to limit the amount + * of cached data on the clients a lot; + * + * (2) large compute jobs generally want compute-only then io-only + * and the IO should complete as quickly as possible; + * + * (3) IO is batched up to the RPC size and is async until the + * client max cache is hit + * (/proc/fs/lustre/osc/OSC.../max_dirty_mb) + * + */ if (!PageDirty(page)) { - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRTY_MISSES); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_MISSES, 1); rc = queue_or_sync_write(exp, inode, llap, to, 0); if (rc) GOTO(out, rc); } else { - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRTY_HITS); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_HITS, 1); } /* put the page in the page cache, from now on ll_removepage is @@ -844,7 +885,7 @@ int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) page = llap->llap_page; LASSERT(PageLocked(page)); LASSERT(CheckWriteback(page,cmd)); - + LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc); if (cmd & OBD_BRW_READ && llap->llap_defer_uptodate) @@ -870,9 +911,12 @@ int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) unlock_page(page); - if (0 && cmd & OBD_BRW_WRITE) { - llap_write_complete(page->mapping->host, llap); - ll_try_done_writing(page->mapping->host); + if (cmd & OBD_BRW_WRITE) { + /* Only rc == 0, write succeed, then this page could be deleted + * from the pending_writing list + */ + if (rc == 0 && llap_write_complete(page->mapping->host, llap)) + ll_queue_done_writing(page->mapping->host, 0); } if (PageWriteback(page)) { @@ -907,14 +951,14 @@ void ll_removepage(struct page *page) LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n"); - exp = ll_i2obdexp(inode); + exp = ll_i2dtexp(inode); if (exp == NULL) { CERROR("page %p ind %lu gave null export\n", page, page->index); EXIT; return; } - llap = llap_from_page(page, 0); + llap = llap_from_page(page, LLAP_ORIGIN_REMOVEPAGE); if (IS_ERR(llap)) { CERROR("page %p ind %lu couldn't find llap: %ld\n", page, page->index, PTR_ERR(llap)); @@ -922,7 +966,9 @@ void ll_removepage(struct page *page) return; } - //llap_write_complete(inode, llap); + if (llap_write_complete(inode, llap)) + ll_queue_done_writing(inode, 0); + rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL, llap->llap_cookie); if (rc != 0) @@ -959,7 +1005,7 @@ static int ll_page_matches(struct page *page, int fd_flags) flags = LDLM_FL_TEST_LOCK | LDLM_FL_BLOCK_GRANTED; if (!(fd_flags & LL_FILE_READAHEAD)) flags |= LDLM_FL_CBPENDING; - matches = obd_match(ll_i2sbi(inode)->ll_osc_exp, + matches = obd_match(ll_i2sbi(inode)->ll_dt_exp, ll_i2info(inode)->lli_smd, LDLM_EXTENT, &page_extent, LCK_PR | LCK_PW, &flags, inode, &match_lockh); @@ -978,8 +1024,8 @@ static int ll_issue_page_read(struct obd_export *exp, llap->llap_ra_used = 0; rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd, NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0, - CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE | ASYNC_READY | - ASYNC_URGENT); + CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE | + ASYNC_READY | ASYNC_URGENT); if (rc) { LL_CDEBUG_PAGE(D_ERROR, page, "read queue failed: rc %d\n", rc); page_cache_release(page); @@ -1112,7 +1158,7 @@ static int ll_readahead(struct ll_readahead_state *ras, lov_stripe_lock(lsm); inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1); + obd_merge_lvb(ll_i2dtexp(inode), lsm, &lvb, 1); kms = lvb.lvb_size; lov_stripe_unlock(lsm); if (kms == 0) { @@ -1359,7 +1405,7 @@ int ll_writepage(struct page *page) LASSERT(!PageDirty(page)); LASSERT(PageLocked(page)); - exp = ll_i2obdexp(inode); + exp = ll_i2dtexp(inode); if (exp == NULL) GOTO(out, rc = -EINVAL); @@ -1439,7 +1485,7 @@ int ll_readpage(struct file *filp, struct page *page) if (rc < 0) GOTO(out, rc); - exp = ll_i2obdexp(inode); + exp = ll_i2dtexp(inode); if (exp == NULL) GOTO(out, rc = -EINVAL); @@ -1447,6 +1493,8 @@ int ll_readpage(struct file *filp, struct page *page) if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); + llap->llap_fsuid = current->fsuid; + if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages) ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index, llap->llap_defer_uptodate); diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c deleted file mode 100644 index 508636b..0000000 --- a/lustre/llite/rw24.c +++ /dev/null @@ -1,149 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Lite I/O page cache for the 2.4 kernel version - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/version.h> -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/iobuf.h> -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/smp_lock.h> - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <lustre_lite.h> -#include "llite_internal.h" -#include <linux/lustre_compat25.h> - -static int ll_direct_IO_24(int rw, -#ifdef HAVE_DIO_FILE - struct file *file, -#else - struct inode *inode, -#endif - struct kiobuf *iobuf, unsigned long blocknr, - int blocksize) -{ -#ifdef HAVE_DIO_FILE - struct inode *inode = file->f_dentry->d_inode; -#endif - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct brw_page *pga; - struct obdo oa; - int length, i, flags, rc = 0; - loff_t offset; - ENTRY; - - if (!lsm || !lsm->lsm_object_id) - RETURN(-EBADF); - - offset = ((obd_off)blocknr << inode->i_blkbits); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), size="LPSZ - ", offset=%lld=%llx, pages %u\n", - inode->i_ino, inode->i_generation, inode, iobuf->length, - offset, offset, iobuf->nr_pages); - - /* FIXME: io smaller than CFS_PAGE_SIZE is broken on ia64 */ - if ((iobuf->offset & (~CFS_PAGE_MASK)) || - (iobuf->length & (~CFS_PAGE_MASK))) - RETURN(-EINVAL); - - OBD_ALLOC(pga, sizeof(*pga) * iobuf->nr_pages); - if (!pga) - RETURN(-ENOMEM); - - flags = 0 /* | OBD_BRW_DIRECTIO */; - length = iobuf->length; - rw = rw ? OBD_BRW_WRITE : OBD_BRW_READ; - - for (i = 0, length = iobuf->length; length > 0; - length -= pga[i].count, offset += pga[i].count, i++) { /*i last!*/ - pga[i].pg = iobuf->maplist[i]; - pga[i].off = offset; - /* To the end of the page, or the length, whatever is less */ - pga[i].count = min_t(int, CFS_PAGE_SIZE - (offset & ~CFS_PAGE_MASK), - length); - pga[i].flag = flags; - if (rw == OBD_BRW_READ) - POISON_PAGE(iobuf->maplist[i], 0x0d); - } - - ll_inode_fill_obdo(inode, rw, &oa); - - if (rw == OBD_BRW_WRITE) - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRECT_WRITE, iobuf->length); - else - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRECT_READ, iobuf->length); - rc = obd_brw_rqset(rw, ll_i2obdexp(inode), &oa, lsm, iobuf->nr_pages, - pga, NULL); - if (rc == 0) { - rc = iobuf->length; - if (rw == OBD_BRW_WRITE) { - lov_stripe_lock(lsm); - obd_adjust_kms(ll_i2obdexp(inode), lsm, offset, 0); - lov_stripe_unlock(lsm); - } - } - - OBD_FREE(pga, sizeof(*pga) * iobuf->nr_pages); - RETURN(rc); -} - -#ifdef KERNEL_HAS_AS_MAX_READAHEAD -static int ll_max_readahead(struct inode *inode) -{ - return 0; -} -#endif - -struct address_space_operations ll_aops = { - .readpage = ll_readpage, - .direct_IO = ll_direct_IO_24, - .writepage = ll_writepage, - .prepare_write = ll_prepare_write, - .commit_write = ll_commit_write, - .removepage = ll_removepage, - .sync_page = NULL, - .bmap = NULL, -#ifdef KERNEL_HAS_AS_MAX_READAHEAD - .max_readahead = ll_max_readahead, -#endif -}; - diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 4fb767f..8454d47 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -20,6 +20,7 @@ * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + #ifdef HAVE_KERNEL_CONFIG_H #include <linux/config.h> #endif @@ -47,6 +48,7 @@ #define DEBUG_SUBSYSTEM S_LLITE +//#include <lustre_mdc.h> #include <lustre_lite.h> #include "llite_internal.h" #include <linux/lustre_compat25.h> @@ -71,13 +73,10 @@ static int ll_invalidatepage(struct page *page, unsigned long offset) #else static void ll_invalidatepage(struct page *page, unsigned long offset) { - if (offset) - return; - if (PagePrivate(page)) + if (offset == 0 && PagePrivate(page)) ll_removepage(page); } #endif - static int ll_releasepage(struct page *page, gfp_t gfp_mask) { if (PagePrivate(page)) @@ -85,6 +84,19 @@ static int ll_releasepage(struct page *page, gfp_t gfp_mask) return 1; } +static int ll_set_page_dirty(struct page *page) +{ + struct ll_async_page *llap; + ENTRY; + + llap = llap_from_page(page, LLAP_ORIGIN_UNKNOWN); + if (IS_ERR(llap)) + RETURN(PTR_ERR(llap)); + + llap_write_pending(page->mapping->host, llap); + RETURN(__set_page_dirty_nobuffers(page)); +} + #define MAX_DIRECTIO_SIZE 2*1024*1024*1024UL static inline int ll_get_user_pages(int rw, unsigned long user_addr, @@ -99,8 +111,8 @@ static inline int ll_get_user_pages(int rw, unsigned long user_addr, return -EFBIG; } - page_count = ((user_addr + size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT)- - (user_addr >> CFS_PAGE_SHIFT); + page_count = (user_addr + size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; + page_count -= user_addr >> CFS_PAGE_SHIFT; OBD_ALLOC_GFP(*pages, page_count * sizeof(**pages), GFP_KERNEL); if (*pages) { @@ -139,14 +151,16 @@ static ssize_t ll_direct_IO_26_seg(int rw, struct inode *inode, { struct brw_page *pga; struct obdo oa; - int i, rc = 0; + int opc, i, rc = 0; size_t length; + struct obd_capa *ocapa; + loff_t file_offset_orig = file_offset; ENTRY; OBD_ALLOC(pga, sizeof(*pga) * page_count); if (!pga) { CDEBUG(D_VFSTRACE, "sizeof(*pga) = %u page_count = %u\n", - (int)sizeof(*pga), page_count); + (int)sizeof(*pga), page_count); RETURN(-ENOMEM); } @@ -155,7 +169,8 @@ static ssize_t ll_direct_IO_26_seg(int rw, struct inode *inode, pga[i].pg = pages[i]; pga[i].off = file_offset; /* To the end of the page, or the length, whatever is less */ - pga[i].count = min_t(int, CFS_PAGE_SIZE -(file_offset & ~CFS_PAGE_MASK), + pga[i].count = min_t(int, CFS_PAGE_SIZE - + (file_offset & ~CFS_PAGE_MASK), length); pga[i].flag = 0; if (rw == READ) @@ -164,15 +179,25 @@ static ssize_t ll_direct_IO_26_seg(int rw, struct inode *inode, ll_inode_fill_obdo(inode, rw, &oa); + if (rw == WRITE) { + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_DIRECT_WRITE, size); + opc = CAPA_OPC_OSS_WRITE; + llap_write_pending(inode, NULL); + } else { + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_DIRECT_READ, size); + opc = CAPA_OPC_OSS_RW; + } + ocapa = ll_osscapa_get(inode, current->fsuid, opc); rc = obd_brw_rqset(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, - ll_i2obdexp(inode), &oa, lsm, page_count, pga, NULL); - if (rc == 0) { - rc = size; - if (rw == WRITE) { - lov_stripe_lock(lsm); - obd_adjust_kms(ll_i2obdexp(inode), lsm, file_offset, 0); - lov_stripe_unlock(lsm); - } + ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL, + ocapa); + capa_put(ocapa); + if ((rc > 0) && (rw == WRITE)) { + lov_stripe_lock(lsm); + obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset_orig + rc, 0); + lov_stripe_unlock(lsm); } OBD_FREE(pga, sizeof(*pga) * page_count); @@ -193,15 +218,15 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, struct inode *inode = file->f_mapping->host; ssize_t count = iov_length(iov, nr_segs), tot_bytes = 0; struct ll_inode_info *lli = ll_i2info(inode); - unsigned long seg; + unsigned long seg = 0; size_t size = MAX_DIO_SIZE; ENTRY; if (!lli->lli_smd || !lli->lli_smd->lsm_object_id) RETURN(-EBADF); - /* FIXME: io smaller than CFS_PAGE_SIZE is broken on ia64 ??? */ - if ((file_offset & (~CFS_PAGE_MASK)) || (count & ~CFS_PAGE_MASK)) + /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ + if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK)) RETURN(-EINVAL); CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), size="LPSZ" (max %lu), " @@ -211,11 +236,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, MAX_DIO_SIZE >> CFS_PAGE_SHIFT); if (rw == WRITE) - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRECT_WRITE, count); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRECT_WRITE, count); else - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRECT_READ, count); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRECT_READ, count); /* Check that all user buffers are aligned as well */ for (seg = 0; seg < nr_segs; seg++) { @@ -264,6 +287,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, (int)size); continue; } + if (tot_bytes > 0) RETURN(tot_bytes); RETURN(page_count < 0 ? page_count : result); @@ -284,7 +308,7 @@ struct address_space_operations ll_aops = { .direct_IO = ll_direct_IO_26, .writepage = ll_writepage_26, .writepages = generic_writepages, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = ll_set_page_dirty, .sync_page = NULL, .prepare_write = ll_prepare_write, .commit_write = ll_commit_write, diff --git a/lustre/llite/super.c b/lustre/llite/super.c deleted file mode 100644 index 4543b52..0000000 --- a/lustre/llite/super.c +++ /dev/null @@ -1,127 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Client Super operations - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/random.h> -#include <linux/version.h> -#include <lustre_lite.h> -#include <lustre_ha.h> -#include <lustre_dlm.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/random.h> -#include <linux/cache_def.h> -#include <lprocfs_status.h> -#include "llite_internal.h" -#include <lustre/lustre_user.h> - -extern struct address_space_operations ll_aops; -extern struct address_space_operations ll_dir_aops; - - -/* exported operations */ -struct super_operations lustre_super_operations = -{ - .read_inode2 = ll_read_inode2, - .clear_inode = ll_clear_inode, - .put_super = ll_put_super, - .statfs = ll_statfs, - .umount_begin = ll_umount_begin, - .fh_to_dentry = ll_fh_to_dentry, - .dentry_to_fh = ll_dentry_to_fh, - .remount_fs = ll_remount_fs, -}; - - -void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg)); - -static int __init init_lustre_lite(void) -{ - int i, seed[2]; - struct timeval tv; - lnet_process_id_t lnet_id; - - printk(KERN_INFO "Lustre: Lustre Client File System; " - "info@clusterfs.com\n"); - ll_file_data_slab = kmem_cache_create("ll_file_data", - sizeof(struct ll_file_data), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ll_file_data_slab == NULL) - return -ENOMEM; - - if (proc_lustre_root) - proc_lustre_fs_root = proc_mkdir("llite", proc_lustre_root); - - ll_register_cache(&ll_cache_definition); - - lustre_register_client_fill_super(ll_fill_super); - lustre_register_client_process_config(ll_process_config); - - get_random_bytes(seed, sizeof(seed)); - - /* Nodes with small feet have little entropy - * the NID for this node gives the most entropy in the low bits */ - for (i = 0; ; i++) { - if (LNetGetId(i, &lnet_id) == -ENOENT) { - break; - } - if (LNET_NETTYP(LNET_NIDNET(lnet_id.nid)) != LOLND) { - seed[0] ^= LNET_NIDADDR(lnet_id.nid); - } - } - - do_gettimeofday(&tv); - ll_srand(tv.tv_sec ^ seed[0], tv.tv_usec ^ seed[1]); - - return 0; -} - -static void __exit exit_lustre_lite(void) -{ - int rc; - - lustre_register_client_fill_super(NULL); - lustre_register_client_process_config(NULL); - - ll_unregister_cache(&ll_cache_definition); - - rc = kmem_cache_destroy(ll_file_data_slab); - LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n"); - if (ll_async_page_slab) { - rc = kmem_cache_destroy(ll_async_page_slab); - LASSERTF(rc == 0, "couldn't destroy ll_async_page slab\n"); - } - - if (proc_lustre_fs_root) - lprocfs_remove(&proc_lustre_fs_root); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Lite Client File System"); -MODULE_LICENSE("GPL"); - -module_init(init_lustre_lite); -module_exit(exit_lustre_lite); diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index e56d773..376a106 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -35,13 +35,13 @@ #include <lprocfs_status.h> #include "llite_internal.h" -static kmem_cache_t *ll_inode_cachep; +static cfs_mem_cache_t *ll_inode_cachep; static struct inode *ll_alloc_inode(struct super_block *sb) { struct ll_inode_info *lli; - lprocfs_counter_incr((ll_s2sbi(sb))->ll_stats, LPROC_LL_ALLOC_INODE); - OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli); + ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_ALLOC_INODE, 1); + OBD_SLAB_ALLOC(lli, ll_inode_cachep, GFP_KERNEL, sizeof *lli); if (lli == NULL) return NULL; @@ -57,21 +57,11 @@ static void ll_destroy_inode(struct inode *inode) OBD_SLAB_FREE(ptr, ll_inode_cachep, sizeof(*ptr)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) -{ - struct ll_inode_info *lli = foo; - - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&lli->lli_vfs_inode); -} - int ll_init_inodecache(void) { - ll_inode_cachep = kmem_cache_create("lustre_inode_cache", - sizeof(struct ll_inode_info), - 0, SLAB_HWCACHE_ALIGN, - init_once, NULL); + ll_inode_cachep = cfs_mem_cache_create("lustre_inode_cache", + sizeof(struct ll_inode_info), + 0, SLAB_HWCACHE_ALIGN); if (ll_inode_cachep == NULL) return -ENOMEM; return 0; @@ -79,14 +69,10 @@ int ll_init_inodecache(void) void ll_destroy_inodecache(void) { -#ifdef HAVE_KMEM_CACHE_DESTROY_INT int rc; - - rc = kmem_cache_destroy(ll_inode_cachep); + + rc = cfs_mem_cache_destroy(ll_inode_cachep); LASSERTF(rc == 0, "ll_inode_cache: not all structures were freed\n"); -#else - kmem_cache_destroy(ll_inode_cachep); -#endif } /* exported operations */ @@ -95,6 +81,7 @@ struct super_operations lustre_super_operations = .alloc_inode = ll_alloc_inode, .destroy_inode = ll_destroy_inode, .clear_inode = ll_clear_inode, + .delete_inode = ll_delete_inode, .put_super = ll_put_super, .statfs = ll_statfs, .umount_begin = ll_umount_begin, @@ -109,20 +96,42 @@ static int __init init_lustre_lite(void) int i, rc, seed[2]; struct timeval tv; lnet_process_id_t lnet_id; - - printk(KERN_INFO "Lustre: Lustre Client File System; " - "info@clusterfs.com\n"); + rc = ll_init_inodecache(); if (rc) return -ENOMEM; - ll_file_data_slab = kmem_cache_create("ll_file_data", + + ll_file_data_slab = cfs_mem_cache_create("ll_file_data", sizeof(struct ll_file_data), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN); if (ll_file_data_slab == NULL) { ll_destroy_inodecache(); return -ENOMEM; } + ll_remote_perm_cachep = cfs_mem_cache_create("ll_remote_perm_cache", + sizeof(struct ll_remote_perm), + 0, 0); + if (ll_remote_perm_cachep == NULL) { + cfs_mem_cache_destroy(ll_file_data_slab); + ll_file_data_slab = NULL; + ll_destroy_inodecache(); + return -ENOMEM; + } + + ll_rmtperm_hash_cachep = cfs_mem_cache_create("ll_rmtperm_hash_cache", + REMOTE_PERM_HASHSIZE * + sizeof(struct list_head), + 0, 0); + if (ll_rmtperm_hash_cachep == NULL) { + cfs_mem_cache_destroy(ll_remote_perm_cachep); + ll_remote_perm_cachep = NULL; + cfs_mem_cache_destroy(ll_file_data_slab); + ll_file_data_slab = NULL; + ll_destroy_inodecache(); + return -ENOMEM; + } + proc_lustre_fs_root = proc_lustre_root ? proc_mkdir("llite", proc_lustre_root) : NULL; @@ -147,6 +156,9 @@ static int __init init_lustre_lite(void) do_gettimeofday(&tv); ll_srand(tv.tv_sec ^ seed[0], tv.tv_usec ^ seed[1]); + init_timer(&ll_capa_timer); + ll_capa_timer.function = ll_capa_timer_callback; + rc = ll_capa_thread_start(); return rc; } @@ -155,6 +167,11 @@ static void __exit exit_lustre_lite(void) #ifdef HAVE_KMEM_CACHE_DESTROY_INT int rc; #endif + del_timer(&ll_capa_timer); + ll_capa_thread_stop(); + LASSERTF(capa_count[CAPA_SITE_CLIENT] == 0, + "client remaining capa count %d\n", + capa_count[CAPA_SITE_CLIENT]); lustre_register_client_fill_super(NULL); lustre_register_client_process_config(NULL); @@ -162,23 +179,25 @@ static void __exit exit_lustre_lite(void) ll_unregister_cache(&ll_cache_definition); ll_destroy_inodecache(); -#ifdef HAVE_KMEM_CACHE_DESTROY_INT - rc = kmem_cache_destroy(ll_file_data_slab); + + rc = cfs_mem_cache_destroy(ll_rmtperm_hash_cachep); + LASSERTF(rc == 0, "couldn't destroy ll_rmtperm_hash_cachep\n"); + ll_rmtperm_hash_cachep = NULL; + + rc = cfs_mem_cache_destroy(ll_remote_perm_cachep); + LASSERTF(rc == 0, "couldn't destroy ll_remote_perm_cachep\n"); + ll_remote_perm_cachep = NULL; + + rc = cfs_mem_cache_destroy(ll_file_data_slab); LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n"); -#else - kmem_cache_destroy(ll_file_data_slab); -#endif if (ll_async_page_slab) { -#ifdef HAVE_KMEM_CACHE_DESTROY_INT - rc = kmem_cache_destroy(ll_async_page_slab); + rc = cfs_mem_cache_destroy(ll_async_page_slab); LASSERTF(rc == 0, "couldn't destroy ll_async_page slab\n"); -#else - kmem_cache_destroy(ll_async_page_slab); -#endif } - if (proc_lustre_fs_root) + if (proc_lustre_fs_root) { lprocfs_remove(&proc_lustre_fs_root); + } } MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index 683f36f..3edbb1f 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -34,9 +34,9 @@ static int ll_readlink_internal(struct inode *inode, { struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_fid fid; - struct mds_body *body; int rc, symlen = inode->i_size + 1; + struct mdt_body *body; + struct obd_capa *oc; ENTRY; *request = NULL; @@ -47,9 +47,10 @@ static int ll_readlink_internal(struct inode *inode, RETURN(0); } - ll_inode2fid(&fid, inode); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, - OBD_MD_LINKNAME, symlen, request); + oc = ll_mdscapa_get(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, + OBD_MD_LINKNAME, symlen, request); + capa_put(oc); if (rc) { if (rc != -ENOENT) CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); @@ -110,7 +111,7 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) CDEBUG(D_VFSTRACE, "VFS Op\n"); /* on symlinks lli_open_sem protects lli_symlink_name allocation/data */ - down(&lli->lli_open_sem); + down(&lli->lli_size_sem); rc = ll_readlink_internal(inode, &request, &symname); if (rc) GOTO(out, rc); @@ -118,7 +119,7 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) rc = vfs_readlink(dentry, buffer, buflen, symname); ptlrpc_req_finished(request); out: - up(&lli->lli_open_sem); + up(&lli->lli_size_sem); RETURN(rc); } @@ -128,7 +129,8 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) # define LL_FOLLOW_LINK_RETURN_TYPE int #endif -static LL_FOLLOW_LINK_RETURN_TYPE ll_follow_link(struct dentry *dentry, struct nameidata *nd) +static LL_FOLLOW_LINK_RETURN_TYPE ll_follow_link(struct dentry *dentry, + struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); @@ -152,9 +154,9 @@ static LL_FOLLOW_LINK_RETURN_TYPE ll_follow_link(struct dentry *dentry, struct n #endif CDEBUG(D_VFSTRACE, "VFS Op\n"); - down(&lli->lli_open_sem); + down(&lli->lli_size_sem); rc = ll_readlink_internal(inode, &request, &symname); - up(&lli->lli_open_sem); + up(&lli->lli_size_sem); if (rc) { path_release(nd); /* Kernel assumes that ->follow_link() releases nameidata on error */ @@ -164,21 +166,21 @@ static LL_FOLLOW_LINK_RETURN_TYPE ll_follow_link(struct dentry *dentry, struct n #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)) rc = vfs_follow_link(nd, symname); #else -# ifdef HAVE_COOKIE_FOLLOW_LINK +#ifdef HAVE_COOKIE_FOLLOW_LINK nd_set_link(nd, symname); /* @symname may contain a pointer to the request message buffer, we delay request releasing until ll_put_link then. */ RETURN(request); -# else +#else if (request != NULL) { /* falling back to recursive follow link if the request * needs to be cleaned up still. */ - rc = vfs_follow_link(nd, symname); + rc = vfs_follow_link(nd, symname); GOTO(out, rc); } nd_set_link(nd, symname); RETURN(0); -# endif +#endif #endif out: ptlrpc_req_finished(request); diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 63322d6..5450038 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -29,20 +29,9 @@ #include <obd_support.h> #include <lustre_lite.h> #include <lustre_dlm.h> -#include <linux/lustre_version.h> - -#ifndef POSIX_ACL_XATTR_ACCESS -#ifndef XATTR_NAME_ACL_ACCESS -#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" -#endif -#define POSIX_ACL_XATTR_ACCESS XATTR_NAME_ACL_ACCESS -#endif -#ifndef POSIX_ACL_XATTR_DEFAULT -#ifndef XATTR_NAME_ACL_DEFAULT -#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default" -#endif -#define POSIX_ACL_XATTR_DEFAULT XATTR_NAME_ACL_DEFAULT -#endif +#include <lustre_ver.h> +//#include <lustre_mdc.h> +#include <linux/lustre_acl.h> #include "llite_internal.h" @@ -84,9 +73,10 @@ int get_xattr_type(const char *name) static int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type) { - if ((xattr_type == XATTR_ACL_ACCESS_T || - xattr_type == XATTR_ACL_DEFAULT_T) && - !(sbi->ll_flags & LL_SBI_ACL)) + if (((xattr_type == XATTR_ACL_ACCESS_T) || + (xattr_type == XATTR_ACL_DEFAULT_T)) && + (!(sbi->ll_flags & LL_SBI_ACL) || + (sbi->ll_flags & LL_SBI_RMT_CLIENT))) return -EOPNOTSUPP; if (xattr_type == XATTR_USER_T && !(sbi->ll_flags & LL_SBI_USER_XATTR)) @@ -106,12 +96,10 @@ int ll_setxattr_common(struct inode *inode, const char *name, { struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req; - struct ll_fid fid; int xattr_type, rc; + struct obd_capa *oc; ENTRY; - lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_SETXATTR); - xattr_type = get_xattr_type(name); rc = xattr_type_filter(sbi, xattr_type); if (rc) @@ -121,9 +109,10 @@ int ll_setxattr_common(struct inode *inode, const char *name, if (xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) RETURN(0); - ll_inode2fid(&fid, inode); - rc = mdc_setxattr(sbi->ll_mdc_exp, &fid, valid, - name, value, size, 0, flags, &req); + oc = ll_mdscapa_get(inode); + rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name, + value, size, 0, flags, &req); + capa_put(oc); if (rc) { if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO("Disabling user_xattr feature because " @@ -148,7 +137,7 @@ int ll_setxattr(struct dentry *dentry, const char *name, CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", inode->i_ino, inode->i_generation, inode, name); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_SETXATTR); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1); if (strncmp(name, XATTR_TRUSTED_PREFIX, 8) == 0 && strcmp(name + 8, "lov") == 0) { @@ -185,7 +174,7 @@ int ll_removexattr(struct dentry *dentry, const char *name) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", inode->i_ino, inode->i_generation, inode, name); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_REMOVEXATTR); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1); return ll_setxattr_common(inode, name, NULL, 0, 0, OBD_MD_FLXATTRRM); } @@ -196,17 +185,15 @@ int ll_getxattr_common(struct inode *inode, const char *name, { struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req = NULL; - struct mds_body *body; - struct ll_fid fid; - void *xdata; + struct mdt_body *body; int xattr_type, rc; + void *xdata; + struct obd_capa *oc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); - lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_GETXATTR); - /* listxattr have slightly different behavior from of ext3: * without 'user_xattr' ext3 will list all xattr names but * filtered out "^user..*"; we list them all for simplicity. @@ -244,9 +231,10 @@ int ll_getxattr_common(struct inode *inode, const char *name, #endif do_getxattr: - ll_inode2fid(&fid, inode); - rc = mdc_getxattr(sbi->ll_mdc_exp, &fid, valid, name, NULL, 0, size, - &req); + oc = ll_mdscapa_get(inode); + rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name, + NULL, 0, size, 0, &req); + capa_put(oc); if (rc) { if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO("Disabling user_xattr feature because " @@ -305,7 +293,7 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name, CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", inode->i_ino, inode->i_generation, inode, name); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_GETXATTR); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); if (strncmp(name, XATTR_TRUSTED_PREFIX, 8) == 0 && strcmp(name + 8, "lov") == 0) { @@ -349,13 +337,13 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct inode *inode = dentry->d_inode; int rc = 0, rc2 = 0; - + LASSERT(inode); CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); - ll_vfs_ops_tally(ll_i2sbi(inode), VFS_OPS_LISTXATTR); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1); rc = ll_getxattr_common(inode, NULL, buffer, size, OBD_MD_FLXATTRLS); @@ -377,7 +365,7 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) if (rc2 < 0) { GOTO(out, rc2 = 0); } else { - const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; const size_t name_len = sizeof("lov") - 1; const size_t total_len = prefix_len + name_len + 1; diff --git a/lustre/lmv/.cvsignore b/lustre/lmv/.cvsignore new file mode 100644 index 0000000..5d26f00 --- /dev/null +++ b/lustre/lmv/.cvsignore @@ -0,0 +1,15 @@ +.Xrefs +config.log +config.status +configure +Makefile +.deps +TAGS +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lustre/lmv/Makefile.in b/lustre/lmv/Makefile.in new file mode 100644 index 0000000..2f77e68 --- /dev/null +++ b/lustre/lmv/Makefile.in @@ -0,0 +1,4 @@ +MODULES := lmv +lmv-objs := lmv_obd.o lmv_intent.o lmv_fld.o lmv_object.o lproc_lmv.o + +@INCLUDE_RULES@ diff --git a/lustre/lmv/autoMakefile.am b/lustre/lmv/autoMakefile.am new file mode 100644 index 0000000..6a9913c --- /dev/null +++ b/lustre/lmv/autoMakefile.am @@ -0,0 +1,18 @@ +# Copyright (C) 2002 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if LIBLUSTRE +noinst_LIBRARIES = liblmv.a +liblmv_a_SOURCES = lmv_obd.c lmv_intent.c lmv_object.c lmv_fld.c +liblmv_a_CPPFLAGS = $(LLCPPFLAGS) +liblmv_a_CFLAGS = $(LLCFLAGS) +endif + +if MODULES +modulefs_DATA = lmv$(KMODEXT) +endif # MODULES + +DIST_SOURCES = $(lmv-objs:.o=.c) lmv_internal.h +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ diff --git a/lustre/lmv/lmv_fld.c b/lustre/lmv/lmv_fld.c new file mode 100644 index 0000000..432210f --- /dev/null +++ b/lustre/lmv/lmv_fld.c @@ -0,0 +1,72 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LMV +#ifdef __KERNEL__ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <asm/div64.h> +#include <linux/seq_file.h> +#else +#include <liblustre.h> +#endif + +#include <lustre/lustre_idl.h> +#include <obd_support.h> +#include <lustre_fid.h> +#include <lustre_lib.h> +#include <lustre_net.h> +#include <lustre_dlm.h> +#include <obd_class.h> +#include <lprocfs_status.h> +#include "lmv_internal.h" + +int lmv_fld_lookup(struct lmv_obd *lmv, + const struct lu_fid *fid, + mdsno_t *mds) +{ + int rc; + ENTRY; + + LASSERT(fid_is_sane(fid)); + rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds, NULL); + if (rc) { + CERROR("Error while looking for mds number. Seq "LPU64 + ", err = %d\n", fid_seq(fid), rc); + RETURN(rc); + } + + CDEBUG(D_INFO, "Got mds "LPU64" for sequence: "LPU64"\n", + *mds, fid_seq(fid)); + + if (*mds >= lmv->desc.ld_tgt_count) { + CERROR("Got invalid mds: "LPU64" (max: %d)\n", + *mds, lmv->desc.ld_tgt_count); + rc = -EINVAL; + } + RETURN(rc); +} diff --git a/lustre/lmv/lmv_intent.c b/lustre/lmv/lmv_intent.c new file mode 100644 index 0000000..ccc6d1b --- /dev/null +++ b/lustre/lmv/lmv_intent.c @@ -0,0 +1,1043 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LMV +#ifdef __KERNEL__ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <asm/div64.h> +#include <linux/seq_file.h> +#include <linux/namei.h> +#else +#include <liblustre.h> +#endif + +#include <lustre/lustre_idl.h> +#include <obd_support.h> +#include <lustre_lib.h> +#include <lustre_net.h> +#include <lustre_dlm.h> +#include <obd_class.h> +#include <lprocfs_status.h> +#include "lmv_internal.h" + +static inline void lmv_drop_intent_lock(struct lookup_intent *it) +{ + if (it->d.lustre.it_lock_mode != 0) { + ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle, + it->d.lustre.it_lock_mode); + it->d.lustre.it_lock_mode = 0; + } +} + +int lmv_intent_remote(struct obd_export *exp, void *lmm, + int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct ptlrpc_request *req = NULL; + struct lustre_handle plock; + struct md_op_data *op_data; + struct obd_export *tgt_exp; + struct mdt_body *body; + int pmode, rc = 0; + ENTRY; + + body = lustre_msg_buf((*reqp)->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*reqp), DLM_REPLY_REC_OFF); + + if (!(body->valid & OBD_MD_MDS)) + RETURN(0); + + /* + * oh, MDS reports that this is remote inode case i.e. we have to ask + * for real attrs on another MDS. + */ + if (it->it_op & IT_LOOKUP) { + /* + * unfortunately, we have to lie to MDC/MDS to retrieve + * attributes llite needs. + */ + it->it_op = IT_GETATTR; + } + + /* we got LOOKUP lock, but we really need attrs */ + pmode = it->d.lustre.it_lock_mode; + if (pmode) { + plock.cookie = it->d.lustre.it_lock_handle; + it->d.lustre.it_lock_mode = 0; + it->d.lustre.it_data = 0; + } + + LASSERT(fid_is_sane(&body->fid1)); + + it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE; + + tgt_exp = lmv_find_export(lmv, &body->fid1); + if (IS_ERR(tgt_exp)) + GOTO(out, rc = PTR_ERR(tgt_exp)); + + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + GOTO(out, rc = -ENOMEM); + + op_data->op_fid1 = body->fid1; + op_data->op_bias = MDS_CROSS_REF; + + rc = md_intent_lock(tgt_exp, op_data, lmm, lmmsize, it, flags, + &req, cb_blocking, extra_lock_flags); + + /* + * llite needs LOOKUP lock to track dentry revocation in order to + * maintain dcache consistency. Thus drop UPDATE lock here and put + * LOOKUP in request. + */ + if (rc == 0) { + lmv_drop_intent_lock(it); + it->d.lustre.it_lock_handle = plock.cookie; + it->d.lustre.it_lock_mode = pmode; + } + + OBD_FREE_PTR(op_data); + EXIT; +out: + if (rc && pmode) + ldlm_lock_decref(&plock, pmode); + + ptlrpc_req_finished(*reqp); + *reqp = req; + return rc; +} + +int lmv_alloc_slave_fids(struct obd_device *obd, struct lu_fid *pid, + struct md_op_data *op, struct lu_fid *fid) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_obj *obj; + mdsno_t mds; + int mea_idx; + int rc; + ENTRY; + + obj = lmv_obj_grab(obd, pid); + if (!obj) { + CERROR("Object "DFID" should be split\n", + PFID(pid)); + RETURN(0); + } + + mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + (char *)op->op_name, op->op_namelen); + mds = obj->lo_inodes[mea_idx].li_mds; + lmv_obj_put(obj); + + rc = __lmv_fid_alloc(lmv, fid, mds); + if (rc) { + CERROR("Can't allocate new fid, rc %d\n", + rc); + RETURN(rc); + } + + CDEBUG(D_INFO, "Allocate new fid "DFID" for split " + "obj\n", PFID(fid)); + + RETURN(rc); +} + +/* + * IT_OPEN is intended to open (and create, possible) an object. Parent (pid) + * may be split dir. + */ +int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + struct obd_device *obd = exp->exp_obd; + struct lu_fid rpid = op_data->op_fid1; + struct lmv_obd *lmv = &obd->u.lmv; + struct md_op_data *sop_data; + struct obd_export *tgt_exp; + struct lmv_stripe_md *mea; + struct mdt_body *body; + struct lmv_obj *obj; + int rc, loop = 0; + ENTRY; + + OBD_ALLOC_PTR(sop_data); + if (sop_data == NULL) + RETURN(-ENOMEM); + + /* save op_data fro repeat case */ + *sop_data = *op_data; + +repeat: + + ++loop; + LASSERT(loop <= 2); + obj = lmv_obj_grab(obd, &rpid); + if (obj) { + int mea_idx; + + /* + * Directory is already split, so we have to forward request to + * the right MDS. + */ + mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + (char *)op_data->op_name, + op_data->op_namelen); + + rpid = obj->lo_inodes[mea_idx].li_fid; + + sop_data->op_mds = obj->lo_inodes[mea_idx].li_mds; + tgt_exp = lmv_get_export(lmv, sop_data->op_mds); + sop_data->op_bias &= ~MDS_CHECK_SPLIT; + lmv_obj_put(obj); + CDEBUG(D_OTHER, "Choose slave dir ("DFID")\n", PFID(&rpid)); + } else { + struct lmv_tgt_desc *tgt; + + sop_data->op_bias |= MDS_CHECK_SPLIT; + tgt = lmv_find_target(lmv, &rpid); + sop_data->op_mds = tgt->ltd_idx; + tgt_exp = tgt->ltd_exp; + } + if (IS_ERR(tgt_exp)) + GOTO(out_free_sop_data, rc = PTR_ERR(tgt_exp)); + + sop_data->op_fid1 = rpid; + + if (it->it_op & IT_CREAT) { + /* + * For open with IT_CREATE and for IT_CREATE cases allocate new + * fid and setup FLD for it. + */ + rc = lmv_fid_alloc(exp, &sop_data->op_fid2, sop_data); + if (rc) + GOTO(out_free_sop_data, rc); + + if (rc == -ERESTART) + goto repeat; + else if (rc) + GOTO(out_free_sop_data, rc); + } + + rc = md_intent_lock(tgt_exp, sop_data, lmm, lmmsize, it, flags, + reqp, cb_blocking, extra_lock_flags); + + if (rc == -ERESTART) { + LASSERT(*reqp != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp, + "Got -ERESTART during open!\n"); + ptlrpc_req_finished(*reqp); + *reqp = NULL; + it->d.lustre.it_data = 0; + + /* + * Directory got split. Time to update local object and repeat + * the request with proper MDS. + */ + LASSERT(lu_fid_eq(&op_data->op_fid1, &rpid)); + rc = lmv_handle_split(exp, &rpid); + if (rc == 0) { + /* We should reallocate child FID. */ + rc = lmv_alloc_slave_fids(obd, &rpid, op_data, + &sop_data->op_fid2); + if (rc == 0) + goto repeat; + } + } + + if (rc != 0) + GOTO(out_free_sop_data, rc); + + /* + * Okay, MDS has returned success. Probably name has been resolved in + * remote inode. + */ + rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp, + cb_blocking, extra_lock_flags); + if (rc != 0) { + LASSERT(rc < 0); + /* + * This is possible, that some userspace application will try to + * open file as directory and we will have -ENOTDIR here. As + * this is normal situation, we should not print error here, + * only debug info. + */ + CDEBUG(D_OTHER, "can't handle remote %s: dir "DFID"("DFID"):" + "%*s: %d\n", LL_IT2STR(it), PFID(&op_data->op_fid2), + PFID(&rpid), op_data->op_namelen, op_data->op_name, rc); + GOTO(out_free_sop_data, rc); + } + + /* + * Nothing is found, do not access body->fid1 as it is zero and thus + * pointless. + */ + if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) && + !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) && + !(it->d.lustre.it_disposition & DISP_OPEN_OPEN)) + GOTO(out_free_sop_data, rc = 0); + + /* caller may use attrs MDS returns on IT_OPEN lock request so, we have + * to update them for split dir */ + body = lustre_msg_buf((*reqp)->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*reqp), DLM_REPLY_REC_OFF); + + /* could not find object, FID is not present in response. */ + if (!(body->valid & OBD_MD_FLID)) + GOTO(out_free_sop_data, rc = 0); + + obj = lmv_obj_grab(obd, &body->fid1); + if (!obj && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) { + /* FIXME: capability for remote! */ + /* wow! this is split dir, we'd like to handle it */ + obj = lmv_obj_create(exp, &body->fid1, mea); + if (IS_ERR(obj)) + GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); + } + + if (obj) { + /* This is split dir and we'd want to get attrs. */ + CDEBUG(D_OTHER, "attrs from slaves for "DFID"\n", + PFID(&body->fid1)); + + rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1, + cb_blocking, extra_lock_flags); + } else if (S_ISDIR(body->mode)) { + CDEBUG(D_OTHER, "object "DFID" has not lmv obj?\n", + PFID(&body->fid1)); + } + + if (obj) + lmv_obj_put(obj); + + EXIT; +out_free_sop_data: + OBD_FREE_PTR(sop_data); + return rc; +} + +int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + struct lmv_obj *obj = NULL, *obj2 = NULL; + struct obd_device *obd = exp->exp_obd; + struct lu_fid rpid = op_data->op_fid1; + struct lmv_obd *lmv = &obd->u.lmv; + struct md_op_data *sop_data; + struct lmv_stripe_md *mea; + struct mdt_body *body; + mdsno_t mds; + int rc = 0; + ENTRY; + + OBD_ALLOC_PTR(sop_data); + if (sop_data == NULL) + RETURN(-ENOMEM); + + /* save op_data fro repeat case */ + *sop_data = *op_data; + + if (fid_is_sane(&op_data->op_fid2)) { + /* + * Caller wants to revalidate attrs of obj we have to revalidate + * slaves if requested object is split directory. + */ + CDEBUG(D_OTHER, "revalidate attrs for "DFID"\n", + PFID(&op_data->op_fid2)); + + rc = lmv_fld_lookup(lmv, &op_data->op_fid2, &mds); + if (rc) + GOTO(out_free_sop_data, rc); +#if 0 + /* + * In fact, we do not need this with current intent_lock(), but + * it may change some day. + */ + obj = lmv_obj_grab(obd, &op_data->op_fid2); + if (obj) { + if (!lu_fid_eq(&op_data->op_fid1, &op_data->op_fid2)){ + rpid = obj->lo_inodes[mds].li_fid; + mds = obj->lo_inodes[mds].li_mds; + } + lmv_obj_put(obj); + } +#endif + } else { + CDEBUG(D_OTHER, "INTENT getattr for %*s on "DFID"\n", + op_data->op_namelen, op_data->op_name, + PFID(&op_data->op_fid1)); + + rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds); + if (rc) + GOTO(out_free_sop_data, rc); + obj = lmv_obj_grab(obd, &op_data->op_fid1); + if (obj && op_data->op_namelen) { + int mea_idx; + + /* directory is already split. calculate mds */ + mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + (char *)op_data->op_name, + op_data->op_namelen); + rpid = obj->lo_inodes[mea_idx].li_fid; + mds = obj->lo_inodes[mea_idx].li_mds; + sop_data->op_bias &= ~MDS_CHECK_SPLIT; + lmv_obj_put(obj); + + CDEBUG(D_OTHER, "forward to MDS #"LPU64" (slave "DFID")\n", + mds, PFID(&rpid)); + } else { + rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds); + if (rc) + GOTO(out_free_sop_data, rc); + sop_data->op_bias |= MDS_CHECK_SPLIT; + } + } + + sop_data->op_fid1 = rpid; + + rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, + lmmsize, it, flags, reqp, cb_blocking, + extra_lock_flags); + + LASSERTF(rc != -ERESTART, "GETATTR: Got unhandled -ERESTART!\n"); + if (rc < 0) + GOTO(out_free_sop_data, rc); + + if (obj && rc > 0) { + /* + * This is split dir. In order to optimize things a bit, we + * consider obj valid updating missing parts. + + * FIXME: do we need to return any lock here? It would be fine + * if we don't. This means that nobody should use UPDATE lock to + * notify about object * removal. + */ + CDEBUG(D_OTHER, + "revalidate slaves for "DFID", rc %d\n", + PFID(&op_data->op_fid2), rc); + + LASSERT(fid_is_sane(&op_data->op_fid2)); + rc = lmv_revalidate_slaves(exp, reqp, &op_data->op_fid2, it, rc, + cb_blocking, extra_lock_flags); + GOTO(out_free_sop_data, rc); + } + + if (*reqp == NULL) + GOTO(out_free_sop_data, rc); + + /* + * okay, MDS has returned success. Probably name has been resolved in + * remote inode. + */ + rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, + reqp, cb_blocking, extra_lock_flags); + if (rc < 0) + GOTO(out_free_sop_data, rc); + + /* + * Nothing is found, do not access body->fid1 as it is zero and thus + * pointless. + */ + if (it->d.lustre.it_disposition & DISP_LOOKUP_NEG) + GOTO(out_free_sop_data, rc = 0); + + LASSERT(*reqp); + LASSERT((*reqp)->rq_repmsg); + body = lustre_msg_buf((*reqp)->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*reqp), DLM_REPLY_REC_OFF); + + /* could not find object, FID is not present in response. */ + if (!(body->valid & OBD_MD_FLID)) + GOTO(out_free_sop_data, rc = 0); + + obj2 = lmv_obj_grab(obd, &body->fid1); + + if (!obj2 && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) { + + /* FIXME remote capability! */ + /* wow! this is split dir, we'd like to handle it. */ + obj2 = lmv_obj_create(exp, &body->fid1, mea); + if (IS_ERR(obj2)) + GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj2)); + } + + if (obj2) { + /* this is split dir and we'd want to get attrs */ + CDEBUG(D_OTHER, "attrs from slaves for "DFID", rc %d\n", + PFID(&body->fid1), rc); + + rc = lmv_revalidate_slaves(exp, reqp, &body->fid1, it, 1, + cb_blocking, extra_lock_flags); + lmv_obj_put(obj2); + } + + EXIT; +out_free_sop_data: + OBD_FREE_PTR(sop_data); + return rc; +} + +/* this is not used currently */ +int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lustre_handle *lockh; + struct md_op_data *op_data; + struct ldlm_lock *lock; + struct mdt_body *body2; + struct mdt_body *body; + struct lmv_obj *obj; + int i, rc = 0; + ENTRY; + + LASSERT(reqp); + LASSERT(*reqp); + + /* + * Master is locked. we'd like to take locks on slaves and update + * attributes to be returned from the slaves it's important that lookup + * is called in two cases: + + * - for first time (dcache has no such a resolving yet). - + * ->d_revalidate() returned false. + + * Last case possible only if all the objs (master and all slaves aren't + * valid. + */ + + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + RETURN(-ENOMEM); + + body = lustre_msg_buf((*reqp)->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*reqp), DLM_REPLY_REC_OFF); + + LASSERT((body->valid & OBD_MD_FLID) != 0); + obj = lmv_obj_grab(obd, &body->fid1); + LASSERT(obj != NULL); + + CDEBUG(D_OTHER, "lookup slaves for "DFID"\n", + PFID(&body->fid1)); + + lmv_obj_lock(obj); + + for (i = 0; i < obj->lo_objcount; i++) { + struct lu_fid fid = obj->lo_inodes[i].li_fid; + struct ptlrpc_request *req = NULL; + struct obd_export *tgt_exp; + struct lookup_intent it; + + if (lu_fid_eq(&fid, &obj->lo_fid)) + /* skip master obj */ + continue; + + CDEBUG(D_OTHER, "lookup slave "DFID"\n", PFID(&fid)); + + /* is obj valid? */ + memset(&it, 0, sizeof(it)); + it.it_op = IT_GETATTR; + + memset(op_data, 0, sizeof(*op_data)); + op_data->op_fid1 = fid; + op_data->op_fid2 = fid; + op_data->op_bias = MDS_CROSS_REF; + + tgt_exp = lmv_get_export(lmv, obj->lo_inodes[i].li_mds); + if (IS_ERR(tgt_exp)) + GOTO(cleanup, rc = PTR_ERR(tgt_exp)); + + rc = md_intent_lock(tgt_exp, op_data, NULL, 0, &it, 0, + &req, lmv_blocking_ast, 0); + + lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle; + if (rc > 0 && req == NULL) { + /* nice, this slave is valid */ + LASSERT(req == NULL); + CDEBUG(D_OTHER, "cached\n"); + goto release_lock; + } + + if (rc < 0) { + /* error during lookup */ + GOTO(cleanup, rc); + } + lock = ldlm_handle2lock(lockh); + LASSERT(lock); + + lock->l_ast_data = lmv_obj_get(obj); + + body2 = lustre_msg_buf(req->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body2)); + LASSERT(body2 != NULL); + LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); + + obj->lo_inodes[i].li_size = body2->size; + + CDEBUG(D_OTHER, "fresh: %lu\n", + (unsigned long)obj->lo_inodes[i].li_size); + + LDLM_LOCK_PUT(lock); + + if (req) + ptlrpc_req_finished(req); +release_lock: + lmv_update_body(body, obj->lo_inodes + i); + + if (it.d.lustre.it_lock_mode) { + ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode); + it.d.lustre.it_lock_mode = 0; + } + } + + EXIT; +cleanup: + lmv_obj_unlock(obj); + lmv_obj_put(obj); + OBD_FREE_PTR(op_data); + return rc; +} + +int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + struct obd_device *obd = exp->exp_obd; + struct lu_fid rpid = op_data->op_fid1; + struct lmv_obd *lmv = &obd->u.lmv; + struct md_op_data *sop_data; + struct lmv_stripe_md *mea; + struct mdt_body *body; + struct lmv_obj *obj; + int rc, loop = 0; + int mea_idx; + mdsno_t mds; + ENTRY; + + OBD_ALLOC_PTR(sop_data); + if (sop_data == NULL) + RETURN(-ENOMEM); + + /* save op_data fro repeat case */ + *sop_data = *op_data; + + /* + * IT_LOOKUP is intended to produce name -> fid resolving (let's call + * this lookup below) or to confirm requested resolving is still valid + * (let's call this revalidation) fid_is_sane(&sop_data->op_fid2) specifies + * revalidation. + */ + if (fid_is_sane(&op_data->op_fid2)) { + /* + * This is revalidate: we have to check is LOOKUP lock still + * valid for given fid. Very important part is that we have to + * choose right mds because namespace is per mds. + */ + rpid = op_data->op_fid1; + obj = lmv_obj_grab(obd, &rpid); + if (obj) { + mea_idx = raw_name2idx(obj->lo_hashtype, + obj->lo_objcount, + (char *)op_data->op_name, + op_data->op_namelen); + rpid = obj->lo_inodes[mea_idx].li_fid; + mds = obj->lo_inodes[mea_idx].li_mds; + sop_data->op_bias &= ~MDS_CHECK_SPLIT; + lmv_obj_put(obj); + } else { + rc = lmv_fld_lookup(lmv, &rpid, &mds); + if (rc) + GOTO(out_free_sop_data, rc); + sop_data->op_bias |= MDS_CHECK_SPLIT; + } + + CDEBUG(D_OTHER, "revalidate lookup for "DFID" to #"LPU64" MDS\n", + PFID(&op_data->op_fid2), mds); + } else { +repeat: + ++loop; + LASSERT(loop <= 2); + + /* + * This is lookup. During lookup we have to update all the + * attributes, because returned values will be put in struct + * inode. + */ + obj = lmv_obj_grab(obd, &op_data->op_fid1); + if (obj) { + if (op_data->op_namelen) { + /* directory is already split. calculate mds */ + mea_idx = raw_name2idx(obj->lo_hashtype, + obj->lo_objcount, + (char *)op_data->op_name, + op_data->op_namelen); + rpid = obj->lo_inodes[mea_idx].li_fid; + mds = obj->lo_inodes[mea_idx].li_mds; + } + sop_data->op_bias &= ~MDS_CHECK_SPLIT; + lmv_obj_put(obj); + } else { + rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds); + if (rc) + GOTO(out_free_sop_data, rc); + sop_data->op_bias |= MDS_CHECK_SPLIT; + } + fid_zero(&sop_data->op_fid2); + } + + sop_data->op_bias &= ~MDS_CROSS_REF; + sop_data->op_fid1 = rpid; + + rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, lmmsize, + it, flags, reqp, cb_blocking, extra_lock_flags); + if (rc > 0) { + LASSERT(fid_is_sane(&op_data->op_fid2)); + /* + * Very interesting. it seems object is still valid but for some + * reason llite calls lookup, not revalidate. + */ + CDEBUG(D_OTHER, "lookup for "DFID" and data should be uptodate\n", + PFID(&rpid)); + LASSERT(*reqp == NULL); + GOTO(out_free_sop_data, rc); + } + + if (rc == 0 && *reqp == NULL) { + /* once again, we're asked for lookup, not revalidate */ + CDEBUG(D_OTHER, "lookup for "DFID" and data should be uptodate\n", + PFID(&rpid)); + GOTO(out_free_sop_data, rc); + } + + if (rc == -ERESTART) { + LASSERT(*reqp != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *reqp, + "Got -ERESTART during lookup!\n"); + ptlrpc_req_finished(*reqp); + *reqp = NULL; + it->d.lustre.it_data = 0; + /* + * Directory got split since last update. This shouldn't be + * because splitting causes lock revocation, so revalidate had + * to fail and lookup on dir had to return mea. + */ + CWARN("we haven't knew about directory splitting!\n"); + LASSERT(obj == NULL); + + obj = lmv_obj_create(exp, &rpid, NULL); + if (IS_ERR(obj)) + GOTO(out_free_sop_data, rc = PTR_ERR(obj)); + lmv_obj_put(obj); + goto repeat; + } + + if (rc < 0) + GOTO(out_free_sop_data, rc); + + /* + * Okay, MDS has returned success. Probably name has been resolved in + * remote inode. + */ + rc = lmv_intent_remote(exp, lmm, lmmsize, it, flags, reqp, + cb_blocking, extra_lock_flags); + + if (rc == 0 && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) { + /* Wow! This is split dir, we'd like to handle it. */ + body = lustre_msg_buf((*reqp)->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*reqp), DLM_REPLY_REC_OFF); + LASSERT((body->valid & OBD_MD_FLID) != 0); + + obj = lmv_obj_grab(obd, &body->fid1); + if (!obj) { + obj = lmv_obj_create(exp, &body->fid1, mea); + if (IS_ERR(obj)) + GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); + } + lmv_obj_put(obj); + } + + EXIT; +out_free_sop_data: + OBD_FREE_PTR(sop_data); + return rc; +} + +int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + struct obd_device *obd = exp->exp_obd; + int rc; + ENTRY; + + LASSERT(it != NULL); + LASSERT(fid_is_sane(&op_data->op_fid1)); + + CDEBUG(D_OTHER, "INTENT LOCK '%s' for '%*s' on "DFID"\n", + LL_IT2STR(it), op_data->op_namelen, op_data->op_name, + PFID(&op_data->op_fid1)); + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + if (it->it_op & IT_LOOKUP) + rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it, + flags, reqp, cb_blocking, + extra_lock_flags); + else if (it->it_op & IT_OPEN) + rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it, + flags, reqp, cb_blocking, + extra_lock_flags); + else if (it->it_op & IT_GETATTR) + rc = lmv_intent_getattr(exp, op_data,lmm, lmmsize, it, + flags, reqp, cb_blocking, + extra_lock_flags); + else + LBUG(); + RETURN(rc); +} + +int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp, + const struct lu_fid *mid, struct lookup_intent *oit, + int master_valid, ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + struct obd_device *obd = exp->exp_obd; + struct ptlrpc_request *mreq = *reqp; + struct lmv_obd *lmv = &obd->u.lmv; + struct lustre_handle master_lockh; + struct obd_export *tgt_exp; + struct md_op_data *op_data; + struct ldlm_lock *lock; + unsigned long size = 0; + struct mdt_body *body; + struct lmv_obj *obj; + int master_lock_mode; + int i, rc = 0; + ENTRY; + + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + RETURN(-ENOMEM); + + /* + * We have to loop over the subobjects, check validity and update them + * from MDSs if needed. it's very useful that we need not to update all + * the fields. say, common fields (that are equal on all the subojects + * need not to be update, another fields (i_size, for example) are + * cached all the time. + */ + obj = lmv_obj_grab(obd, mid); + LASSERT(obj != NULL); + + master_lock_mode = 0; + + lmv_obj_lock(obj); + + for (i = 0; i < obj->lo_objcount; i++) { + struct lu_fid fid = obj->lo_inodes[i].li_fid; + struct lustre_handle *lockh = NULL; + struct ptlrpc_request *req = NULL; + ldlm_blocking_callback cb; + struct lookup_intent it; + int master = 0; + + CDEBUG(D_OTHER, "revalidate subobj "DFID"\n", + PFID(&fid)); + + memset(op_data, 0, sizeof(*op_data)); + memset(&it, 0, sizeof(it)); + it.it_op = IT_GETATTR; + + cb = lmv_blocking_ast; + + if (lu_fid_eq(&fid, &obj->lo_fid)) { + if (master_valid) { + /* + * lmv_intent_getattr() already checked + * validness and took the lock. + */ + if (mreq) { + /* + * It even got the reply refresh attrs + * from that reply. + */ + body = lustre_msg_buf(mreq->rq_repmsg, + DLM_REPLY_REC_OFF, + sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED(mreq, DLM_REPLY_REC_OFF); + goto update; + } + /* take already cached attrs into account */ + CDEBUG(D_OTHER, + "master is locked and cached\n"); + goto release_lock; + } + master = 1; + cb = cb_blocking; + } + + op_data->op_fid1 = fid; + op_data->op_fid2 = fid; + op_data->op_bias = MDS_CROSS_REF; + + /* Is obj valid? */ + tgt_exp = lmv_get_export(lmv, obj->lo_inodes[i].li_mds); + if (IS_ERR(tgt_exp)) + GOTO(cleanup, rc = PTR_ERR(tgt_exp)); + + rc = md_intent_lock(tgt_exp, op_data, NULL, 0, &it, 0, &req, cb, + extra_lock_flags); + + lockh = (struct lustre_handle *)&it.d.lustre.it_lock_handle; + if (rc > 0 && req == NULL) { + /* Nice, this slave is valid */ + LASSERT(req == NULL); + CDEBUG(D_OTHER, "cached\n"); + goto release_lock; + } + + if (rc < 0) + GOTO(cleanup, rc); + + if (master) { + LASSERT(master_valid == 0); + /* Save lock on master to be returned to the caller. */ + CDEBUG(D_OTHER, "no lock on master yet\n"); + memcpy(&master_lockh, lockh, sizeof(master_lockh)); + master_lock_mode = it.d.lustre.it_lock_mode; + it.d.lustre.it_lock_mode = 0; + } else { + /* This is slave. We want to control it. */ + lock = ldlm_handle2lock(lockh); + LASSERT(lock != NULL); + lock->l_ast_data = lmv_obj_get(obj); + LDLM_LOCK_PUT(lock); + } + + if (*reqp == NULL) { + /* + * This is first reply, we'll use it to return updated + * data back to the caller. + */ + LASSERT(req); + ptlrpc_request_addref(req); + *reqp = req; + } + + body = lustre_msg_buf(req->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); + +update: + obj->lo_inodes[i].li_size = body->size; + + CDEBUG(D_OTHER, "fresh: %lu\n", + (unsigned long)obj->lo_inodes[i].li_size); + + if (req) + ptlrpc_req_finished(req); +release_lock: + size += obj->lo_inodes[i].li_size; + + if (it.d.lustre.it_lock_mode) { + ldlm_lock_decref(lockh, it.d.lustre.it_lock_mode); + it.d.lustre.it_lock_mode = 0; + } + } + + if (*reqp) { + /* + * Some attrs got refreshed, we have reply and it's time to put + * fresh attrs to it. + */ + CDEBUG(D_OTHER, "return refreshed attrs: size = %lu\n", + (unsigned long)size); + + body = lustre_msg_buf((*reqp)->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*reqp), DLM_REPLY_REC_OFF); + + body->size = size; + + if (mreq == NULL) { + /* + * Very important to maintain mds num the same because + * of revalidation. mreq == NULL means that caller has + * no reply and the only attr we can return is size. + */ + body->valid = OBD_MD_FLSIZE; + } + if (master_valid == 0) { + memcpy(&oit->d.lustre.it_lock_handle, + &master_lockh, sizeof(master_lockh)); + oit->d.lustre.it_lock_mode = master_lock_mode; + } + rc = 0; + } else { + /* It seems all the attrs are fresh and we did no request */ + CDEBUG(D_OTHER, "all the attrs were fresh\n"); + if (master_valid == 0) + oit->d.lustre.it_lock_mode = master_lock_mode; + rc = 1; + } + + EXIT; +cleanup: + OBD_FREE_PTR(op_data); + lmv_obj_unlock(obj); + lmv_obj_put(obj); + return rc; +} diff --git a/lustre/lmv/lmv_internal.h b/lustre/lmv/lmv_internal.h new file mode 100644 index 0000000..7e8510f --- /dev/null +++ b/lustre/lmv/lmv_internal.h @@ -0,0 +1,225 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LMV_INTERNAL_H_ +#define _LMV_INTERNAL_H_ + +#include <lustre/lustre_idl.h> +#include <obd.h> + +#ifndef __KERNEL__ +/* XXX: dirty hack, needs to be fixed more clever way. */ +struct qstr { + const char *name; + size_t len; + unsigned hashval; +}; +#endif + +#define LMV_MAX_TGT_COUNT 128 + +#define lmv_init_lock(lmv) down(&lmv->init_sem); +#define lmv_init_unlock(lmv) up(&lmv->init_sem); + +#define LL_IT2STR(it) \ + ((it) ? ldlm_it2str((it)->it_op) : "0") + +struct lmv_inode { + struct lu_fid li_fid; /* id of dirobj */ + mdsno_t li_mds; /* cached mdsno where @li_fid lives */ + unsigned long li_size; /* slave size value */ + int li_flags; +}; + +#define O_FREEING (1 << 0) + +struct lmv_obj { + struct list_head lo_list; + struct semaphore lo_guard; + int lo_state; /* object state. */ + atomic_t lo_count; /* ref counter. */ + struct lu_fid lo_fid; /* master id of dir */ + void *lo_update; /* bitmap of status (up-to-date) */ + __u32 lo_hashtype; + int lo_objcount; /* number of slaves */ + struct lmv_inode *lo_inodes; /* array of sub-objs */ + struct obd_device *lo_obd; /* pointer to LMV itself */ +}; + +int lmv_obj_setup(struct obd_device *obd); +void lmv_obj_cleanup(struct obd_device *obd); + +static inline void +lmv_obj_lock(struct lmv_obj *obj) +{ + LASSERT(obj); + down(&obj->lo_guard); +} + +static inline void +lmv_obj_unlock(struct lmv_obj *obj) +{ + LASSERT(obj); + up(&obj->lo_guard); +} + +void lmv_obj_add(struct lmv_obj *obj); +void lmv_obj_del(struct lmv_obj *obj); + +void lmv_obj_put(struct lmv_obj *obj); +void lmv_obj_free(struct lmv_obj *obj); + +struct lmv_obj *lmv_obj_get(struct lmv_obj *obj); + +struct lmv_obj *lmv_obj_grab(struct obd_device *obd, + const struct lu_fid *fid); + +struct lmv_obj *lmv_obj_alloc(struct obd_device *obd, + const struct lu_fid *fid, + struct lmv_stripe_md *mea); + +struct lmv_obj *lmv_obj_create(struct obd_export *exp, + const struct lu_fid *fid, + struct lmv_stripe_md *mea); + +int lmv_obj_delete(struct obd_export *exp, + const struct lu_fid *fid); + +int lmv_check_connect(struct obd_device *obd); + +int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags); + +int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags); + +int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags); + +int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data, + void *lmm, int lmmsize, struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags); + +int lmv_revalidate_slaves(struct obd_export *, struct ptlrpc_request **, + const struct lu_fid *, struct lookup_intent *, int, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags); + +int lmv_handle_split(struct obd_export *, const struct lu_fid *); +int lmv_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, + void *, int); +int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, + mdsno_t *mds); +int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, + mdsno_t mds); +int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid, + struct md_op_data *op_data); +int lmv_alloc_slave_fids(struct obd_device *obd, struct lu_fid *pid, + struct md_op_data *op, struct lu_fid *fid); + +static inline struct lmv_stripe_md * +lmv_get_mea(struct ptlrpc_request *req, int offset) +{ + struct mdt_body *body; + struct lmv_stripe_md *mea; + + LASSERT(req); + + body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); + LASSERT_REPSWABBED(req, offset); + + if (!body || !S_ISDIR(body->mode) || !body->eadatasize) + return NULL; + + mea = lustre_msg_buf(req->rq_repmsg, offset + 1, + body->eadatasize); + LASSERT(mea != NULL); + + if (mea->mea_count == 0) + return NULL; + + return mea; +} + +static inline int lmv_get_easize(struct lmv_obd *lmv) +{ + return sizeof(struct lmv_stripe_md) + + lmv->desc.ld_tgt_count * + sizeof(struct lu_fid); +} + +static inline struct lmv_tgt_desc * +lmv_get_target(struct lmv_obd *lmv, mdsno_t mds) +{ + return &lmv->tgts[mds]; +} + +static inline struct obd_export * +lmv_get_export(struct lmv_obd *lmv, mdsno_t mds) +{ + return lmv_get_target(lmv, mds)->ltd_exp; +} + +static inline struct lmv_tgt_desc * +lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid) +{ + mdsno_t mds; + int rc; + + rc = lmv_fld_lookup(lmv, fid, &mds); + if (rc) + return ERR_PTR(rc); + + return lmv_get_target(lmv, mds); +} + +static inline struct obd_export * +lmv_find_export(struct lmv_obd *lmv, const struct lu_fid *fid) +{ + struct lmv_tgt_desc *tgt = lmv_find_target(lmv, fid); + if (IS_ERR(tgt)) + return (struct obd_export *)tgt; + return tgt->ltd_exp; +} + +static inline void lmv_update_body(struct mdt_body *body, + struct lmv_inode *lino) +{ + /* update object size */ + body->size += lino->li_size; +} + +/* lproc_lmv.c */ +extern struct file_operations lmv_proc_target_fops; + +#endif + diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c new file mode 100644 index 0000000..333104d --- /dev/null +++ b/lustre/lmv/lmv_obd.c @@ -0,0 +1,2792 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LMV +#ifdef __KERNEL__ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/mm.h> +#include <asm/div64.h> +#include <linux/seq_file.h> +#include <linux/namei.h> +#else +#include <liblustre.h> +#endif + +#include <linux/ext2_fs.h> + +#include <lustre/lustre_idl.h> +#include <lustre_log.h> +#include <obd_support.h> +#include <lustre_lib.h> +#include <lustre_net.h> +#include <obd_class.h> +#include <lprocfs_status.h> +#include <lustre_lite.h> +#include <lustre_fid.h> +#include "lmv_internal.h" + +/* not defined for liblustre building */ +#if !defined(ATOMIC_INIT) +#define ATOMIC_INIT(val) { (val) } +#endif + +/* object cache. */ +cfs_mem_cache_t *obj_cache; +atomic_t obj_cache_count = ATOMIC_INIT(0); + +static void lmv_activate_target(struct lmv_obd *lmv, + struct lmv_tgt_desc *tgt, + int activate) +{ + if (tgt->ltd_active == activate) + return; + + tgt->ltd_active = activate; + lmv->desc.ld_active_tgt_count += (activate ? 1 : -1); +} + +/* Error codes: + * + * -EINVAL : UUID can't be found in the LMV's target list + * -ENOTCONN: The UUID is found, but the target connection is bad (!) + * -EBADF : The UUID is found, but the OBD of the wrong type (!) + */ +static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid, + int activate) +{ + struct lmv_tgt_desc *tgt; + struct obd_device *obd; + int i, rc = 0; + ENTRY; + + CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n", + lmv, uuid->uuid, activate); + + spin_lock(&lmv->lmv_lock); + for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) { + if (tgt->ltd_exp == NULL) + continue; + + CDEBUG(D_INFO, "lmv idx %d is %s conn "LPX64"\n", + i, tgt->ltd_uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie); + + if (obd_uuid_equals(uuid, &tgt->ltd_uuid)) + break; + } + + if (i == lmv->desc.ld_tgt_count) + GOTO(out_lmv_lock, rc = -EINVAL); + + obd = class_exp2obd(tgt->ltd_exp); + if (obd == NULL) + GOTO(out_lmv_lock, rc = -ENOTCONN); + + CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n", + obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd, + obd->obd_type->typ_name, i); + LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0); + + if (tgt->ltd_active == activate) { + CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd, + activate ? "" : "in"); + GOTO(out_lmv_lock, rc); + } + + CDEBUG(D_INFO, "Marking OBD %p %sactive\n", + obd, activate ? "" : "in"); + + lmv_activate_target(lmv, tgt, activate); + + EXIT; + + out_lmv_lock: + spin_unlock(&lmv->lmv_lock); + return rc; +} + +static int lmv_set_mdc_data(struct lmv_obd *lmv, struct obd_uuid *uuid, + struct obd_connect_data *data) +{ + struct lmv_tgt_desc *tgt; + int i; + ENTRY; + + LASSERT(data != NULL); + + spin_lock(&lmv->lmv_lock); + for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) { + if (tgt->ltd_exp == NULL) + continue; + + if (obd_uuid_equals(uuid, &tgt->ltd_uuid)) { + lmv->datas[tgt->ltd_idx] = *data; + break; + } + } + spin_unlock(&lmv->lmv_lock); + RETURN(0); +} + +static int lmv_notify(struct obd_device *obd, struct obd_device *watched, + enum obd_notify_event ev, void *data) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_uuid *uuid; + int rc = 0; + ENTRY; + + if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) { + CERROR("unexpected notification of %s %s!\n", + watched->obd_type->typ_name, + watched->obd_name); + RETURN(-EINVAL); + } + + uuid = &watched->u.cli.cl_target_uuid; + if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { + /* + * Set MDC as active before notifying the observer, so the + * observer can use the MDC normally. + */ + rc = lmv_set_mdc_active(lmv, uuid, + ev == OBD_NOTIFY_ACTIVE); + if (rc) { + CERROR("%sactivation of %s failed: %d\n", + ev == OBD_NOTIFY_ACTIVE ? "" : "de", + uuid->uuid, rc); + RETURN(rc); + } + } else if (ev == OBD_NOTIFY_OCD) { + struct obd_connect_data *conn_data = + &watched->u.cli.cl_import->imp_connect_data; + + /* Set connect data to desired target, update + * exp_connect_flags. */ + rc = lmv_set_mdc_data(lmv, uuid, conn_data); + if (rc) { + CERROR("can't set connect data to target %s, rc %d\n", + uuid->uuid, rc); + RETURN(rc); + } + + /* + * XXX: Make sure that ocd_connect_flags from all targets are + * the same. Otherwise one of MDTs runs wrong version or + * something like this. --umka + */ + obd->obd_self_export->exp_connect_flags = + conn_data->ocd_connect_flags; + } +#if 0 + else if (ev == OBD_NOTIFY_DISCON) { + /* For disconnect event, flush fld cache for failout MDS case. */ + fld_client_flush(&lmv->lmv_fld); + } +#endif + /* Pass the notification up the chain. */ + if (obd->obd_observer) + rc = obd_notify(obd->obd_observer, watched, ev, data); + + RETURN(rc); +} + +/* this is fake connect function. Its purpose is to initialize lmv and say + * caller that everything is okay. Real connection will be performed later. */ +static int lmv_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *cluuid, struct obd_connect_data *data) +{ +#ifdef __KERNEL__ + struct proc_dir_entry *lmv_proc_dir; +#endif + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *exp; + int rc = 0; + ENTRY; + + rc = class_connect(conn, obd, cluuid); + if (rc) { + CERROR("class_connection() returned %d\n", rc); + RETURN(rc); + } + + exp = class_conn2export(conn); + + /* we don't want to actually do the underlying connections more than + * once, so keep track. */ + lmv->refcount++; + if (lmv->refcount > 1) { + class_export_put(exp); + RETURN(0); + } + + lmv->exp = exp; + lmv->connected = 0; + lmv->cluuid = *cluuid; + + if (data) + lmv->conn_data = *data; + +#ifdef __KERNEL__ + lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry, + NULL, NULL); + if (IS_ERR(lmv_proc_dir)) { + CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.", + obd->obd_type->typ_name, obd->obd_name); + lmv_proc_dir = NULL; + } +#endif + + /* all real clients should perform actual connection right away, because + * it is possible, that LMV will not have opportunity to connect targets + * and MDC stuff will be called directly, for instance while reading + * ../mdc/../kbytesfree procfs file, etc. */ + if (data->ocd_connect_flags & OBD_CONNECT_REAL) + rc = lmv_check_connect(obd); + +#ifdef __KERNEL__ + if (rc) { + if (lmv_proc_dir) + lprocfs_remove(&lmv_proc_dir); + } +#endif + + RETURN(rc); +} + +static void lmv_set_timeouts(struct obd_device *obd) +{ + struct lmv_tgt_desc *tgts; + struct lmv_obd *lmv; + int i; + + lmv = &obd->u.lmv; + if (lmv->server_timeout == 0) + return; + + if (lmv->connected == 0) + return; + + for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgts++) { + if (tgts->ltd_exp == NULL) + continue; + + obd_set_info_async(tgts->ltd_exp, strlen("inter_mds"), + "inter_mds", 0, NULL, NULL); + } +} + +static int lmv_init_ea_size(struct obd_export *exp, int easize, + int def_easize, int cookiesize) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int i, rc = 0, change = 0; + ENTRY; + + if (lmv->max_easize < easize) { + lmv->max_easize = easize; + change = 1; + } + if (lmv->max_def_easize < def_easize) { + lmv->max_def_easize = def_easize; + change = 1; + } + if (lmv->max_cookiesize < cookiesize) { + lmv->max_cookiesize = cookiesize; + change = 1; + } + if (change == 0) + RETURN(0); + + if (lmv->connected == 0) + RETURN(0); + + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + if (lmv->tgts[i].ltd_exp == NULL) { + CWARN("%s: NULL export for %d\n", obd->obd_name, i); + continue; + } + + rc = md_init_ea_size(lmv->tgts[i].ltd_exp, easize, def_easize, + cookiesize); + if (rc) { + CERROR("obd_init_ea_size() failed on MDT target %d, " + "error %d.\n", i, rc); + break; + } + } + RETURN(rc); +} + +#define MAX_STRING_SIZE 128 + +int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_uuid *cluuid = &lmv->cluuid; + struct obd_connect_data *mdc_data = NULL; + struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" }; + struct lustre_handle conn = {0, }; + struct obd_device *mdc_obd; + struct obd_export *mdc_exp; + struct lu_fld_target target; + int rc; +#ifdef __KERNEL__ + struct proc_dir_entry *lmv_proc_dir; +#endif + ENTRY; + + /* for MDS: don't connect to yourself */ + if (obd_uuid_equals(&tgt->ltd_uuid, cluuid)) { + CDEBUG(D_CONFIG, "don't connect back to %s\n", cluuid->uuid); + /* XXX - the old code didn't increment active tgt count. + * should we ? */ + RETURN(0); + } + + mdc_obd = class_find_client_obd(&tgt->ltd_uuid, LUSTRE_MDC_NAME, + &obd->obd_uuid); + if (!mdc_obd) { + CERROR("target %s not attached\n", tgt->ltd_uuid.uuid); + RETURN(-EINVAL); + } + + CDEBUG(D_CONFIG, "connect to %s(%s) - %s, %s FOR %s\n", + mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, + tgt->ltd_uuid.uuid, obd->obd_uuid.uuid, + cluuid->uuid); + + if (!mdc_obd->obd_set_up) { + CERROR("target %s is not set up\n", tgt->ltd_uuid.uuid); + RETURN(-EINVAL); + } + + rc = obd_connect(NULL, &conn, mdc_obd, &lmv_mdc_uuid, + &lmv->conn_data); + if (rc) { + CERROR("target %s connect error %d\n", tgt->ltd_uuid.uuid, rc); + RETURN(rc); + } + + mdc_exp = class_conn2export(&conn); + + /* Init fid sequence client for this mdc. */ + rc = obd_fid_init(mdc_exp); + if (rc) + RETURN(rc); + + /* Add new FLD target. */ + target.ft_srv = NULL; + target.ft_exp = mdc_exp; + target.ft_idx = tgt->ltd_idx; + + fld_client_add_target(&lmv->lmv_fld, &target); + + mdc_data = &class_exp2cliimp(mdc_exp)->imp_connect_data; + + rc = obd_register_observer(mdc_obd, obd); + if (rc) { + obd_disconnect(mdc_exp); + CERROR("target %s register_observer error %d\n", + tgt->ltd_uuid.uuid, rc); + RETURN(rc); + } + + if (obd->obd_observer) { + /* tell the mds_lmv about the new target */ + rc = obd_notify(obd->obd_observer, mdc_exp->exp_obd, + OBD_NOTIFY_ACTIVE, (void *)(tgt - lmv->tgts)); + if (rc) { + obd_disconnect(mdc_exp); + RETURN(rc); + } + } + + tgt->ltd_active = 1; + tgt->ltd_exp = mdc_exp; + lmv->desc.ld_active_tgt_count++; + + /* copy connect data, it may be used later */ + lmv->datas[tgt->ltd_idx] = *mdc_data; + + md_init_ea_size(tgt->ltd_exp, lmv->max_easize, + lmv->max_def_easize, lmv->max_cookiesize); + + CDEBUG(D_CONFIG, "connected to %s(%s) successfully (%d)\n", + mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, + atomic_read(&obd->obd_refcount)); + +#ifdef __KERNEL__ + lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); + if (lmv_proc_dir) { + struct proc_dir_entry *mdc_symlink; + char name[MAX_STRING_SIZE + 1]; + + LASSERT(mdc_obd->obd_type != NULL); + LASSERT(mdc_obd->obd_type->typ_name != NULL); + name[MAX_STRING_SIZE] = '\0'; + snprintf(name, MAX_STRING_SIZE, "../../../%s/%s", + mdc_obd->obd_type->typ_name, + mdc_obd->obd_name); + mdc_symlink = proc_symlink(mdc_obd->obd_name, + lmv_proc_dir, name); + if (mdc_symlink == NULL) { + CERROR("could not register LMV target " + "/proc/fs/lustre/%s/%s/target_obds/%s.", + obd->obd_type->typ_name, obd->obd_name, + mdc_obd->obd_name); + lprocfs_remove(&lmv_proc_dir); + lmv_proc_dir = NULL; + } + } +#endif + RETURN(0); +} + +int lmv_add_target(struct obd_device *obd, struct obd_uuid *tgt_uuid) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt; + int rc = 0; + ENTRY; + + CDEBUG(D_CONFIG, "tgt_uuid: %s.\n", tgt_uuid->uuid); + + lmv_init_lock(lmv); + + if (lmv->desc.ld_active_tgt_count >= LMV_MAX_TGT_COUNT) { + lmv_init_unlock(lmv); + CERROR("can't add %s, LMV module compiled for %d MDCs. " + "That many MDCs already configured.\n", + tgt_uuid->uuid, LMV_MAX_TGT_COUNT); + RETURN(-EINVAL); + } + if (lmv->desc.ld_tgt_count == 0) { + struct obd_device *mdc_obd; + + mdc_obd = class_find_client_obd(tgt_uuid, LUSTRE_MDC_NAME, + &obd->obd_uuid); + if (!mdc_obd) { + lmv_init_unlock(lmv); + CERROR("Target %s not attached\n", tgt_uuid->uuid); + RETURN(-EINVAL); + } + + rc = obd_llog_init(obd, NULL, mdc_obd, 0, NULL, tgt_uuid); + if (rc) { + lmv_init_unlock(lmv); + CERROR("lmv failed to setup llogging subsystems\n"); + } + } + spin_lock(&lmv->lmv_lock); + tgt = lmv->tgts + lmv->desc.ld_tgt_count++; + tgt->ltd_uuid = *tgt_uuid; + spin_unlock(&lmv->lmv_lock); + + if (lmv->connected) { + rc = lmv_connect_mdc(obd, tgt); + if (rc) { + spin_lock(&lmv->lmv_lock); + lmv->desc.ld_tgt_count--; + memset(tgt, 0, sizeof(*tgt)); + spin_unlock(&lmv->lmv_lock); + } else { + int easize = sizeof(struct lmv_stripe_md) + + lmv->desc.ld_tgt_count * + sizeof(struct lu_fid); + lmv_init_ea_size(obd->obd_self_export, easize, 0, 0); + } + } + + lmv_init_unlock(lmv); + RETURN(rc); +} + +/* performs a check if passed obd is connected. If no - connect it. */ +int lmv_check_connect(struct obd_device *obd) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt; + int i, rc, easize; + ENTRY; + + if (lmv->connected) + RETURN(0); + + lmv_init_lock(lmv); + if (lmv->connected) { + lmv_init_unlock(lmv); + RETURN(0); + } + + if (lmv->desc.ld_tgt_count == 0) { + CERROR("%s: no targets configured.\n", obd->obd_name); + RETURN(-EINVAL); + } + + CDEBUG(D_CONFIG, "time to connect %s to %s\n", + lmv->cluuid.uuid, obd->obd_name); + + LASSERT(lmv->tgts != NULL); + + for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) { + rc = lmv_connect_mdc(obd, tgt); + if (rc) + GOTO(out_disc, rc); + } + + lmv_set_timeouts(obd); + class_export_put(lmv->exp); + lmv->connected = 1; + easize = lmv_get_easize(lmv); + lmv_init_ea_size(obd->obd_self_export, easize, 0, 0); + lmv_init_unlock(lmv); + RETURN(0); + + out_disc: + while (i-- > 0) { + int rc2; + --tgt; + tgt->ltd_active = 0; + if (tgt->ltd_exp) { + --lmv->desc.ld_active_tgt_count; + rc2 = obd_disconnect(tgt->ltd_exp); + if (rc2) { + CERROR("error: LMV target %s disconnect on " + "MDC idx %d: error %d\n", + tgt->ltd_uuid.uuid, i, rc2); + } + } + } + class_disconnect(lmv->exp); + lmv_init_unlock(lmv); + RETURN(rc); +} + +static int lmv_disconnect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) +{ +#ifdef __KERNEL__ + struct proc_dir_entry *lmv_proc_dir; +#endif + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_device *mdc_obd; + int rc; + ENTRY; + + LASSERT(tgt != NULL); + LASSERT(obd != NULL); + + mdc_obd = class_exp2obd(tgt->ltd_exp); + + if (mdc_obd) + mdc_obd->obd_no_recov = obd->obd_no_recov; + +#ifdef __KERNEL__ + lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); + if (lmv_proc_dir) { + struct proc_dir_entry *mdc_symlink; + + mdc_symlink = lprocfs_srch(lmv_proc_dir, mdc_obd->obd_name); + if (mdc_symlink) { + lprocfs_remove(&mdc_symlink); + } else { + CERROR("/proc/fs/lustre/%s/%s/target_obds/%s missing\n", + obd->obd_type->typ_name, obd->obd_name, + mdc_obd->obd_name); + } + } +#endif + rc = obd_fid_fini(tgt->ltd_exp); + if (rc) + CERROR("Can't finanize fids factory\n"); + + CDEBUG(D_OTHER, "Disconnected from %s(%s) successfully\n", + tgt->ltd_exp->exp_obd->obd_name, + tgt->ltd_exp->exp_obd->obd_uuid.uuid); + + obd_register_observer(tgt->ltd_exp->exp_obd, NULL); + rc = obd_disconnect(tgt->ltd_exp); + if (rc) { + if (tgt->ltd_active) { + CERROR("Target %s disconnect error %d\n", + tgt->ltd_uuid.uuid, rc); + } + } + + lmv_activate_target(lmv, tgt, 0); + tgt->ltd_exp = NULL; + RETURN(0); +} + +static int lmv_disconnect(struct obd_export *exp) +{ + struct obd_device *obd = class_exp2obd(exp); +#ifdef __KERNEL__ + struct proc_dir_entry *lmv_proc_dir; +#endif + struct lmv_obd *lmv = &obd->u.lmv; + int rc, i; + ENTRY; + + if (!lmv->tgts) + goto out_local; + + /* Only disconnect the underlying layers on the final disconnect. */ + lmv->refcount--; + if (lmv->refcount != 0) + goto out_local; + + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + if (lmv->tgts[i].ltd_exp == NULL) + continue; + lmv_disconnect_mdc(obd, &lmv->tgts[i]); + } + +#ifdef __KERNEL__ + lmv_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); + if (lmv_proc_dir) { + lprocfs_remove(&lmv_proc_dir); + } else { + CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n", + obd->obd_type->typ_name, obd->obd_name); + } +#endif + +out_local: + /* + * This is the case when no real connection is established by + * lmv_check_connect(). + */ + if (!lmv->connected) + class_export_put(exp); + rc = class_disconnect(exp); + if (lmv->refcount == 0) + lmv->connected = 0; + RETURN(rc); +} + +static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, + int len, void *karg, void *uarg) +{ + struct obd_device *obddev = class_exp2obd(exp); + struct lmv_obd *lmv = &obddev->u.lmv; + int i, rc = 0, set = 0; + ENTRY; + + if (lmv->desc.ld_tgt_count == 0) + RETURN(-ENOTTY); + + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + int err; + + if (lmv->tgts[i].ltd_exp == NULL) + continue; + + err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg); + if (err) { + if (lmv->tgts[i].ltd_active) { + CERROR("error: iocontrol MDC %s on MDT" + "idx %d: err = %d\n", + lmv->tgts[i].ltd_uuid.uuid, i, err); + if (!rc) + rc = err; + } + } else + set = 1; + } + if (!set && !rc) + rc = -EIO; + + RETURN(rc); +} + +enum MDS_POLICY { + CHAR_TYPE, + NID_TYPE +}; + +static int lmv_all_chars_policy(int count, const char *name, + int len) +{ + unsigned int c = 0; + + while (len > 0) + c += name[--len]; + c = c % count; + return c; +} + +static int lmv_nid_policy(struct lmv_obd *lmv) +{ + struct obd_import *imp = class_exp2cliimp(lmv->tgts[0].ltd_exp); + __u32 id; + /* + * XXX Hack: to get nid we assume that underlying obd device is mdc. + */ + id = imp->imp_connection->c_self ^ (imp->imp_connection->c_self >> 32); + return id % lmv->desc.ld_tgt_count; +} + +static int lmv_choose_mds(struct lmv_obd *lmv, struct md_op_data *op_data, + int type) +{ + switch (type) { + case CHAR_TYPE: + return lmv_all_chars_policy(lmv->desc.ld_tgt_count, + op_data->op_name, + op_data->op_namelen); + case NID_TYPE: + return lmv_nid_policy(lmv); + + default: + break; + } + + CERROR("unsupport type %d \n", type); + return -EINVAL; +} + +/* This is _inode_ placement policy function (not name). */ +static int lmv_placement_policy(struct obd_device *obd, + struct md_op_data *op_data, + mdsno_t *mds) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_obj *obj; + int rc; + ENTRY; + + LASSERT(mds != NULL); + + /* + * Allocate new fid on target according to operation type and parent + * home mds. + */ + obj = lmv_obj_grab(obd, &op_data->op_fid1); + if (obj != NULL || op_data->op_name == NULL || + op_data->op_opc != LUSTRE_OPC_MKDIR) { + /* + * Allocate fid for non-dir or for null name or for case parent + * dir is split. + */ + if (obj) { + lmv_obj_put(obj); + + /* + * If we have this flag turned on, and we see that + * parent dir is split, this means, that caller did not + * notice split yet. This is race and we would like to + * let caller know that. + */ + if (op_data->op_bias & MDS_CHECK_SPLIT) + RETURN(-ERESTART); + } + + /* + * Allocate new fid on same mds where parent fid is located and + * where operation will be sent. In case of split dir, ->op_fid1 + * and ->op_mds here will contain fid and mds of slave directory + * object (assigned by caller). + */ + *mds = op_data->op_mds; + rc = 0; + +#if 0 + /* XXX: This should be removed later wehn we sure it is not + * needed. */ + rc = lmv_fld_lookup(lmv, &op_data->op_fid1, mds); + if (rc) + GOTO(out, rc); +#endif + } else { + /* + * Parent directory is not split and we want to create a + * directory in it. Let's calculate where to place it according + * to name. + */ + *mds = lmv_choose_mds(lmv, op_data, NID_TYPE); + rc = 0; + } + EXIT; +#if 0 +out: +#endif + if (rc) { + CERROR("Can't choose MDS, err = %d\n", rc); + } else { + LASSERT(*mds < lmv->desc.ld_tgt_count); + } + + return rc; +} + +int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, + mdsno_t mds) +{ + struct lmv_tgt_desc *tgt = &lmv->tgts[mds]; + int rc; + ENTRY; + + /* New seq alloc and FLD setup should be atomic. */ + down(&tgt->ltd_fid_sem); + + /* Asking underlaying tgt layer to allocate new fid. */ + rc = obd_fid_alloc(tgt->ltd_exp, fid, NULL); + if (rc > 0) { + LASSERT(fid_is_sane(fid)); + + /* Client switches to new sequence, setup FLD. */ + rc = fld_client_create(&lmv->lmv_fld, fid_seq(fid), + mds, NULL); + if (rc) { + CERROR("Can't create fld entry, rc %d\n", rc); + /* Delete just allocated fid sequence */ + obd_fid_delete(tgt->ltd_exp, NULL); + } + } + up(&tgt->ltd_fid_sem); + RETURN(rc); +} + +int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid, + struct md_op_data *op_data) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lmv_obd *lmv = &obd->u.lmv; + mdsno_t mds; + int rc; + ENTRY; + + LASSERT(op_data != NULL); + LASSERT(fid != NULL); + + rc = lmv_placement_policy(obd, op_data, &mds); + if (rc) { + CERROR("Can't get target for allocating fid, " + "rc %d\n", rc); + RETURN(rc); + } + + rc = __lmv_fid_alloc(lmv, fid, mds); + if (rc) { + CERROR("Can't alloc new fid, rc %d\n", rc); + RETURN(rc); + } + + RETURN(rc); +} + +static int lmv_fid_delete(struct obd_export *exp, const struct lu_fid *fid) +{ + ENTRY; + + LASSERT(exp && fid); + if (lmv_obj_delete(exp, fid)) { + CDEBUG(D_OTHER, "lmv object "DFID" is destroyed.\n", + PFID(fid)); + } + RETURN(0); +} + +static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct lprocfs_static_vars lvars; + struct lmv_desc *desc; + int rc, i = 0; + ENTRY; + + if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { + CERROR("LMV setup requires a descriptor\n"); + RETURN(-EINVAL); + } + + desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1); + if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) { + CERROR("descriptor size wrong: %d > %d\n", + (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1)); + RETURN(-EINVAL); + } + + lmv->tgts_size = LMV_MAX_TGT_COUNT * sizeof(struct lmv_tgt_desc); + + OBD_ALLOC(lmv->tgts, lmv->tgts_size); + if (lmv->tgts == NULL) + RETURN(-ENOMEM); + + for (i = 0; i < LMV_MAX_TGT_COUNT; i++) { + sema_init(&lmv->tgts[i].ltd_fid_sem, 1); + lmv->tgts[i].ltd_idx = i; + } + + lmv->datas_size = LMV_MAX_TGT_COUNT * sizeof(struct obd_connect_data); + + OBD_ALLOC(lmv->datas, lmv->datas_size); + if (lmv->datas == NULL) + GOTO(out_free_tgts, rc = -ENOMEM); + + obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid); + lmv->desc.ld_tgt_count = 0; + lmv->desc.ld_active_tgt_count = 0; + lmv->max_cookiesize = 0; + lmv->max_def_easize = 0; + lmv->max_easize = 0; + + spin_lock_init(&lmv->lmv_lock); + sema_init(&lmv->init_sem, 1); + + rc = lmv_obj_setup(obd); + if (rc) { + CERROR("Can't setup LMV object manager, " + "error %d.\n", rc); + GOTO(out_free_datas, rc); + } + + lprocfs_init_vars(lmv, &lvars); + lprocfs_obd_setup(obd, lvars.obd_vars); +#ifdef LPROCFS + { + struct proc_dir_entry *entry; + + entry = create_proc_entry("target_obd_status", 0444, + obd->obd_proc_entry); + if (entry != NULL) { + entry->proc_fops = &lmv_proc_target_fops; + entry->data = obd; + } + } +#endif + rc = fld_client_init(&lmv->lmv_fld, obd->obd_name, + LUSTRE_CLI_FLD_HASH_DHT); + if (rc) { + CERROR("can't init FLD, err %d\n", + rc); + GOTO(out_free_datas, rc); + } + + RETURN(0); + +out_free_datas: + OBD_FREE(lmv->datas, lmv->datas_size); + lmv->datas = NULL; +out_free_tgts: + OBD_FREE(lmv->tgts, lmv->tgts_size); + lmv->tgts = NULL; + return rc; +} + +static int lmv_cleanup(struct obd_device *obd) +{ + struct lmv_obd *lmv = &obd->u.lmv; + ENTRY; + + fld_client_fini(&lmv->lmv_fld); + lprocfs_obd_cleanup(obd); + lmv_obj_cleanup(obd); + OBD_FREE(lmv->datas, lmv->datas_size); + OBD_FREE(lmv->tgts, lmv->tgts_size); + + RETURN(0); +} + +static int lmv_process_config(struct obd_device *obd, obd_count len, void *buf) +{ + struct lustre_cfg *lcfg = buf; + struct obd_uuid tgt_uuid; + int rc; + ENTRY; + + switch(lcfg->lcfg_command) { + case LCFG_ADD_MDC: + if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(tgt_uuid.uuid)) + GOTO(out, rc = -EINVAL); + + obd_str2uuid(&tgt_uuid, lustre_cfg_string(lcfg, 1)); + rc = lmv_add_target(obd, &tgt_uuid); + GOTO(out, rc); + default: { + CERROR("Unknown command: %d\n", lcfg->lcfg_command); + GOTO(out, rc = -EINVAL); + } + } +out: + RETURN(rc); +} + +static int lmv_statfs(struct obd_device *obd, struct obd_statfs *osfs, + __u64 max_age) +{ + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_statfs *temp; + int rc = 0, i; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + OBD_ALLOC(temp, sizeof(*temp)); + if (temp == NULL) + RETURN(-ENOMEM); + + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + if (lmv->tgts[i].ltd_exp == NULL) + continue; + + rc = obd_statfs(lmv->tgts[i].ltd_exp->exp_obd, temp, max_age); + if (rc) { + CERROR("can't stat MDS #%d (%s), error %d\n", i, + lmv->tgts[i].ltd_exp->exp_obd->obd_name, + rc); + GOTO(out_free_temp, rc); + } + if (i == 0) { + *osfs = *temp; + } else { + osfs->os_bavail += temp->os_bavail; + osfs->os_blocks += temp->os_blocks; + osfs->os_ffree += temp->os_ffree; + osfs->os_files += temp->os_files; + } + } + + EXIT; +out_free_temp: + OBD_FREE(temp, sizeof(*temp)); + return rc; +} + +static int lmv_getstatus(struct obd_export *exp, + struct lu_fid *fid, + struct obd_capa **pc) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + rc = md_getstatus(lmv->tgts[0].ltd_exp, fid, pc); + + RETURN(rc); +} + +static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, const char *name, + const char *input, int input_size, int output_size, + int flags, struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_getxattr(tgt_exp, fid, oc, valid, name, input, input_size, + output_size, flags, request); + + RETURN(rc); +} + +static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, const char *name, + const char *input, int input_size, int output_size, + int flags, struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_setxattr(tgt_exp, fid, oc, valid, name, + input, input_size, output_size, flags, request); + + RETURN(rc); +} + +static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, int ea_size, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + struct lmv_obj *obj; + int rc, i; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_getattr(tgt_exp, fid, oc, valid, ea_size, request); + if (rc) + RETURN(rc); + + obj = lmv_obj_grab(obd, fid); + + CDEBUG(D_OTHER, "GETATTR for "DFID" %s\n", PFID(fid), + obj ? "(split)" : ""); + + /* + * If object is split, then we loop over all the slaves and gather size + * attribute. In ideal world we would have to gather also mds field from + * all slaves, as object is spread over the cluster and this is + * definitely interesting information and it is not good to loss it, + * but... + */ + if (obj) { + struct mdt_body *body; + + if (*request == NULL) { + lmv_obj_put(obj); + RETURN(rc); + } + + body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF, + sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*request), REPLY_REC_OFF); + + lmv_obj_lock(obj); + + for (i = 0; i < obj->lo_objcount; i++) { + if (lmv->tgts[i].ltd_exp == NULL) { + CWARN("%s: NULL export for %d\n", + obd->obd_name, i); + continue; + } + + /* skip master obj. */ + if (lu_fid_eq(&obj->lo_fid, &obj->lo_inodes[i].li_fid)) + continue; + + lmv_update_body(body, &obj->lo_inodes[i]); + } + + lmv_obj_unlock(obj); + lmv_obj_put(obj); + } + + RETURN(rc); +} + +static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid, + ldlm_iterator_t it, void *data) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int i, rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + CDEBUG(D_OTHER, "CBDATA for "DFID"\n", PFID(fid)); + + /* + * With CMD every object can have two locks in different namespaces: + * lookup lock in space of mds storing direntry and update/open lock in + * space of mds storing inode. + */ + for (i = 0; i < lmv->desc.ld_tgt_count; i++) + md_change_cbdata(lmv->tgts[i].ltd_exp, fid, it, data); + + RETURN(0); +} + +static int lmv_close(struct obd_export *exp, + struct md_op_data *op_data, + struct obd_client_handle *och, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, &op_data->op_fid1); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + CDEBUG(D_OTHER, "CLOSE "DFID"\n", PFID(&op_data->op_fid1)); + rc = md_close(tgt_exp, op_data, och, request); + RETURN(rc); +} + +/* + * Called in the case MDS returns -ERESTART on create on open, what means that + * directory is split and its LMV presentation object has to be updated. + */ +int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct ptlrpc_request *req = NULL; + struct obd_export *tgt_exp; + struct lmv_obj *obj; + struct lustre_md md; + int mealen, rc; + __u64 valid; + ENTRY; + + md.mea = NULL; + mealen = lmv_get_easize(lmv); + + valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA; + + tgt_exp = lmv_find_export(lmv, fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + /* time to update mea of parent fid */ + rc = md_getattr(tgt_exp, fid, NULL, valid, mealen, &req); + if (rc) { + CERROR("md_getattr() failed, error %d\n", rc); + GOTO(cleanup, rc); + } + + rc = md_get_lustre_md(tgt_exp, req, 1, NULL, exp, &md); + if (rc) { + CERROR("mdc_get_lustre_md() failed, error %d\n", rc); + GOTO(cleanup, rc); + } + + if (md.mea == NULL) + GOTO(cleanup, rc = -ENODATA); + + obj = lmv_obj_create(exp, fid, md.mea); + if (IS_ERR(obj)) + rc = PTR_ERR(obj); + else + lmv_obj_put(obj); + + obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea); + + EXIT; +cleanup: + if (req) + ptlrpc_req_finished(req); + return rc; +} + +int lmv_create(struct obd_export *exp, struct md_op_data *op_data, + const void *data, int datalen, int mode, __u32 uid, + __u32 gid, __u32 cap_effective, __u64 rdev, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + struct lmv_obj *obj; + int rc, loop = 0; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + if (!lmv->desc.ld_active_tgt_count) + RETURN(-EIO); +repeat: + ++loop; + LASSERT(loop <= 2); + obj = lmv_obj_grab(obd, &op_data->op_fid1); + if (obj) { + int mea_idx; + + mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + op_data->op_name, op_data->op_namelen); + op_data->op_fid1 = obj->lo_inodes[mea_idx].li_fid; + op_data->op_bias &= ~MDS_CHECK_SPLIT; + op_data->op_mds = obj->lo_inodes[mea_idx].li_mds; + tgt_exp = lmv_get_export(lmv, op_data->op_mds); + lmv_obj_put(obj); + } else { + struct lmv_tgt_desc *tgt; + + tgt = lmv_find_target(lmv, &op_data->op_fid1); + op_data->op_bias |= MDS_CHECK_SPLIT; + op_data->op_mds = tgt->ltd_idx; + tgt_exp = tgt->ltd_exp; + } + + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data); + if (rc == -ERESTART) + goto repeat; + else if (rc) + RETURN(rc); + + CDEBUG(D_OTHER, "CREATE '%*s' on "DFID"\n", op_data->op_namelen, + op_data->op_name, PFID(&op_data->op_fid1)); + + rc = md_create(tgt_exp, op_data, data, datalen, mode, uid, gid, + cap_effective, rdev, request); + if (rc == 0) { + if (*request == NULL) + RETURN(rc); + CDEBUG(D_OTHER, "created - "DFID"\n", PFID(&op_data->op_fid1)); + } else if (rc == -ERESTART) { + LASSERT(*request != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *request, + "Got -ERESTART during create!\n"); + ptlrpc_req_finished(*request); + *request = NULL; + + /* + * Directory got split. Time to update local object and repeat + * the request with proper MDS. + */ + rc = lmv_handle_split(exp, &op_data->op_fid1); + if (rc == 0) { + rc = lmv_alloc_slave_fids(obd, &op_data->op_fid1, + op_data, &op_data->op_fid2); + if (rc) + RETURN(rc); + goto repeat; + } + } + RETURN(rc); +} + +static int lmv_done_writing(struct obd_export *exp, + struct md_op_data *op_data, + struct obd_client_handle *och) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, &op_data->op_fid1); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_done_writing(tgt_exp, op_data, och); + RETURN(rc); +} + +static int +lmv_enqueue_slaves(struct obd_export *exp, int locktype, + struct lookup_intent *it, int lockmode, + struct md_op_data *op_data, struct lustre_handle *lockh, + void *lmm, int lmmsize, ldlm_completion_callback cb_compl, + ldlm_blocking_callback cb_blocking, void *cb_data) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_stripe_md *mea = op_data->op_mea1; + struct md_op_data *op_data2; + struct obd_export *tgt_exp; + int i, rc = 0; + ENTRY; + + OBD_ALLOC_PTR(op_data2); + if (op_data2 == NULL) + RETURN(-ENOMEM); + + LASSERT(mea != NULL); + for (i = 0; i < mea->mea_count; i++) { + memset(op_data2, 0, sizeof(*op_data2)); + op_data2->op_fid1 = mea->mea_ids[i]; + op_data2->op_bias = 0; + + tgt_exp = lmv_find_export(lmv, &op_data2->op_fid1); + if (IS_ERR(tgt_exp)) + GOTO(cleanup, rc = PTR_ERR(tgt_exp)); + + if (tgt_exp == NULL) + continue; + + rc = md_enqueue(tgt_exp, locktype, it, lockmode, op_data2, + lockh + i, lmm, lmmsize, cb_compl, cb_blocking, + cb_data, 0); + + CDEBUG(D_OTHER, "take lock on slave "DFID" -> %d/%d\n", + PFID(&mea->mea_ids[i]), rc, it->d.lustre.it_status); + + if (rc) + GOTO(cleanup, rc); + + if (it->d.lustre.it_data) { + struct ptlrpc_request *req; + req = (struct ptlrpc_request *)it->d.lustre.it_data; + ptlrpc_req_finished(req); + } + + if (it->d.lustre.it_status) + GOTO(cleanup, rc = it->d.lustre.it_status); + } + + EXIT; +cleanup: + OBD_FREE_PTR(op_data2); + + if (rc != 0) { + /* drop all taken locks */ + while (--i >= 0) { + if (lockh[i].cookie) + ldlm_lock_decref(lockh + i, lockmode); + lockh[i].cookie = 0; + } + } + return rc; +} + +static int +lmv_enqueue_remote(struct obd_export *exp, int lock_type, + struct lookup_intent *it, int lock_mode, + struct md_op_data *op_data, struct lustre_handle *lockh, + void *lmm, int lmmsize, ldlm_completion_callback cb_compl, + ldlm_blocking_callback cb_blocking, void *cb_data, + int extra_lock_flags) +{ + struct ptlrpc_request *req = it->d.lustre.it_data; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lustre_handle plock; + struct obd_export *tgt_exp; + struct md_op_data *rdata; + struct lu_fid fid_copy; + struct mdt_body *body; + int rc = 0, pmode; + ENTRY; + + body = lustre_msg_buf(req->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); + + if (!(body->valid & OBD_MD_MDS)) + RETURN(0); + + CDEBUG(D_OTHER, "ENQUEUE '%s' on "DFID" -> "DFID"\n", + LL_IT2STR(it), PFID(&op_data->op_fid1), PFID(&body->fid1)); + + /* We got LOOKUP lock, but we really need attrs */ + pmode = it->d.lustre.it_lock_mode; + LASSERT(pmode != 0); + memcpy(&plock, lockh, sizeof(plock)); + it->d.lustre.it_lock_mode = 0; + it->d.lustre.it_data = NULL; + fid_copy = body->fid1; + + it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE; + ptlrpc_req_finished(req); + + tgt_exp = lmv_find_export(lmv, &fid_copy); + if (IS_ERR(tgt_exp)) + GOTO(out, rc = PTR_ERR(tgt_exp)); + + OBD_ALLOC_PTR(rdata); + if (rdata == NULL) + GOTO(out, rc = -ENOMEM); + + rdata->op_fid1 = fid_copy; + rdata->op_bias = MDS_CROSS_REF; + + rc = md_enqueue(tgt_exp, lock_type, it, lock_mode, rdata, + lockh, lmm, lmmsize, cb_compl, cb_blocking, + cb_data, extra_lock_flags); + OBD_FREE_PTR(rdata); + EXIT; +out: + ldlm_lock_decref(&plock, pmode); + return rc; +} + +static int +lmv_enqueue(struct obd_export *exp, int lock_type, + struct lookup_intent *it, int lock_mode, + struct md_op_data *op_data, struct lustre_handle *lockh, + void *lmm, int lmmsize, ldlm_completion_callback cb_compl, + ldlm_blocking_callback cb_blocking, void *cb_data, + int extra_lock_flags) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp = NULL; + struct lmv_obj *obj; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + if (op_data->op_mea1 && it->it_op == IT_UNLINK) { + rc = lmv_enqueue_slaves(exp, lock_type, it, lock_mode, + op_data, lockh, lmm, lmmsize, + cb_compl, cb_blocking, cb_data); + RETURN(rc); + } + + if (op_data->op_namelen) { + obj = lmv_obj_grab(obd, &op_data->op_fid1); + if (obj) { + int mea_idx; + + /* directory is split. look for right mds for this + * name */ + mea_idx = raw_name2idx(obj->lo_hashtype, + obj->lo_objcount, + (char *)op_data->op_name, + op_data->op_namelen); + op_data->op_fid1 = obj->lo_inodes[mea_idx].li_fid; + tgt_exp = lmv_get_export(lmv, obj->lo_inodes[mea_idx].li_mds); + lmv_obj_put(obj); + } + } + + if (tgt_exp == NULL) + tgt_exp = lmv_find_export(lmv, &op_data->op_fid1); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + CDEBUG(D_OTHER, "ENQUEUE '%s' on "DFID"\n", LL_IT2STR(it), + PFID(&op_data->op_fid1)); + + rc = md_enqueue(tgt_exp, lock_type, it, lock_mode, op_data, lockh, + lmm, lmmsize, cb_compl, cb_blocking, cb_data, + extra_lock_flags); + + if (rc == 0 && it->it_op == IT_OPEN) + rc = lmv_enqueue_remote(exp, lock_type, it, lock_mode, + op_data, lockh, lmm, lmmsize, + cb_compl, cb_blocking, cb_data, + extra_lock_flags); + RETURN(rc); +} + +static int +lmv_getattr_name(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, const char *filename, int namelen, + obd_valid valid, int ea_size, struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lu_fid rid = *fid; + struct obd_export *tgt_exp; + struct mdt_body *body; + struct lmv_obj *obj; + int rc, loop = 0; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + +repeat: + ++loop; + LASSERT(loop <= 2); + obj = lmv_obj_grab(obd, &rid); + if (obj) { + int mea_idx; + + /* Directory is split. Look for right mds for this name */ + mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + filename, namelen - 1); + rid = obj->lo_inodes[mea_idx].li_fid; + tgt_exp = lmv_get_export(lmv, obj->lo_inodes[mea_idx].li_mds); + lmv_obj_put(obj); + valid &= ~OBD_MD_FLCKSPLIT; + } else { + tgt_exp = lmv_find_export(lmv, &rid); + valid |= OBD_MD_FLCKSPLIT; + } + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + CDEBUG(D_OTHER, "getattr_name for %*s on "DFID" -> "DFID"\n", + namelen, filename, PFID(fid), PFID(&rid)); + + rc = md_getattr_name(tgt_exp, &rid, oc, filename, namelen, valid, + ea_size, request); + if (rc == 0) { + body = lustre_msg_buf((*request)->rq_repmsg, + REQ_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT_REPSWABBED((*request), REQ_REC_OFF); + + if (body->valid & OBD_MD_MDS) { + struct ptlrpc_request *req = NULL; + + rid = body->fid1; + CDEBUG(D_OTHER, "request attrs for "DFID"\n", + PFID(&rid)); + + tgt_exp = lmv_find_export(lmv, &rid); + if (IS_ERR(tgt_exp)) { + ptlrpc_req_finished(*request); + RETURN(PTR_ERR(tgt_exp)); + } + + rc = md_getattr_name(tgt_exp, &rid, NULL, NULL, 1, + valid | OBD_MD_FLCROSSREF, + ea_size, &req); + ptlrpc_req_finished(*request); + *request = req; + } + } else if (rc == -ERESTART) { + LASSERT(*request != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *request, + "Got -ERESTART during getattr!\n"); + ptlrpc_req_finished(*request); + *request = NULL; + + /* + * Directory got split. Time to update local object and repeat + * the request with proper MDS. + */ + rc = lmv_handle_split(exp, &rid); + if (rc == 0) + goto repeat; + } + RETURN(rc); +} + +/* + * llite passes fid of an target inode in op_data->op_fid1 and id of directory in + * op_data->op_fid2 + */ +static int lmv_link(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_obj *obj; + int rc, loop = 0; + mdsno_t mds; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + +repeat: + ++loop; + LASSERT(loop <= 2); + if (op_data->op_namelen != 0) { + int mea_idx; + + /* Usual link request */ + obj = lmv_obj_grab(obd, &op_data->op_fid2); + if (obj) { + mea_idx = raw_name2idx(obj->lo_hashtype, + obj->lo_objcount, + op_data->op_name, + op_data->op_namelen); + op_data->op_fid2 = obj->lo_inodes[mea_idx].li_fid; + mds = obj->lo_inodes[mea_idx].li_mds; + lmv_obj_put(obj); + } else { + rc = lmv_fld_lookup(lmv, &op_data->op_fid2, &mds); + if (rc) + RETURN(rc); + } + + CDEBUG(D_OTHER,"link "DFID":%*s to "DFID"\n", + PFID(&op_data->op_fid2), op_data->op_namelen, + op_data->op_name, PFID(&op_data->op_fid1)); + } else { + rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds); + if (rc) + RETURN(rc); + + /* request from MDS to acquire i_links for inode by fid1 */ + CDEBUG(D_OTHER, "inc i_nlinks for "DFID"\n", + PFID(&op_data->op_fid1)); + } + + CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", + mds, PFID(&op_data->op_fid1)); + + op_data->op_fsuid = current->fsuid; + op_data->op_fsgid = current->fsgid; + op_data->op_cap = current->cap_effective; + + rc = md_link(lmv->tgts[mds].ltd_exp, op_data, request); + if (rc == -ERESTART) { + LASSERT(*request != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *request, + "Got -ERESTART during link!\n"); + ptlrpc_req_finished(*request); + *request = NULL; + + /* + * Directory got split. Time to update local object and repeat + * the request with proper MDS. + */ + rc = lmv_handle_split(exp, &op_data->op_fid2); + if (rc == 0) + goto repeat; + } + + RETURN(rc); +} + +static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, + const char *old, int oldlen, const char *new, int newlen, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int rc, mea_idx, loop = 0; + struct lmv_obj *obj; + mdsno_t mds; + ENTRY; + + CDEBUG(D_OTHER, "rename %*s in "DFID" to %*s in "DFID"\n", + oldlen, old, PFID(&op_data->op_fid1), + newlen, new, PFID(&op_data->op_fid2)); + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + if (oldlen == 0) { + /* + * MDS with old dir entry is asking another MDS to create name + * there. + */ + CDEBUG(D_OTHER, + "create %*s(%d/%d) in "DFID" pointing " + "to "DFID"\n", newlen, new, oldlen, newlen, + PFID(&op_data->op_fid2), PFID(&op_data->op_fid1)); + + rc = lmv_fld_lookup(lmv, &op_data->op_fid2, &mds); + if (rc) + RETURN(rc); + + /* + * Target directory can be split, sowe should forward request to + * the right MDS. + */ + obj = lmv_obj_grab(obd, &op_data->op_fid2); + if (obj) { + mea_idx = raw_name2idx(obj->lo_hashtype, + obj->lo_objcount, + (char *)new, newlen); + op_data->op_fid2 = obj->lo_inodes[mea_idx].li_fid; + CDEBUG(D_OTHER, "Parent obj "DFID"\n", + PFID(&op_data->op_fid2)); + lmv_obj_put(obj); + } + goto request; + } + +repeat: + ++loop; + LASSERT(loop <= 2); + obj = lmv_obj_grab(obd, &op_data->op_fid1); + if (obj) { + /* + * directory is already split, so we have to forward request to + * the right MDS. + */ + mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + (char *)old, oldlen); + op_data->op_fid1 = obj->lo_inodes[mea_idx].li_fid; + mds = obj->lo_inodes[mea_idx].li_mds; + CDEBUG(D_OTHER, "Parent obj "DFID"\n", PFID(&op_data->op_fid1)); + lmv_obj_put(obj); + } else { + rc = lmv_fld_lookup(lmv, &op_data->op_fid1, &mds); + if (rc) + RETURN(rc); + } + + obj = lmv_obj_grab(obd, &op_data->op_fid2); + if (obj) { + /* + * Directory is already split, so we have to forward request to + * the right MDS. + */ + mea_idx = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, + (char *)new, newlen); + + op_data->op_fid2 = obj->lo_inodes[mea_idx].li_fid; + CDEBUG(D_OTHER, "Parent obj "DFID"\n", PFID(&op_data->op_fid2)); + lmv_obj_put(obj); + } + +request: + op_data->op_fsuid = current->fsuid; + op_data->op_fsgid = current->fsgid; + op_data->op_cap = current->cap_effective; + + rc = md_rename(lmv->tgts[mds].ltd_exp, op_data, old, oldlen, + new, newlen, request); + if (rc == -ERESTART) { + LASSERT(*request != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *request, + "Got -ERESTART during rename!\n"); + ptlrpc_req_finished(*request); + *request = NULL; + + /* + * Directory got split. Time to update local object and repeat + * the request with proper MDS. + */ + rc = lmv_handle_split(exp, &op_data->op_fid1); + if (rc == 0) + goto repeat; + } + RETURN(rc); +} + +static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, + void *ea, int ealen, void *ea2, int ea2len, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct ptlrpc_request *req; + struct obd_export *tgt_exp; + struct lmv_obj *obj; + int rc = 0, i; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + obj = lmv_obj_grab(obd, &op_data->op_fid1); + + CDEBUG(D_OTHER, "SETATTR for "DFID", valid 0x%x%s\n", + PFID(&op_data->op_fid1), op_data->op_attr.ia_valid, + obj ? ", split" : ""); + + if (obj) { + for (i = 0; i < obj->lo_objcount; i++) { + op_data->op_fid1 = obj->lo_inodes[i].li_fid; + + tgt_exp = lmv_get_export(lmv, obj->lo_inodes[i].li_mds); + if (IS_ERR(tgt_exp)) { + rc = PTR_ERR(tgt_exp); + break; + } + + rc = md_setattr(tgt_exp, op_data, ea, ealen, + ea2, ea2len, &req); + + if (lu_fid_eq(&obj->lo_fid, &obj->lo_inodes[i].li_fid)) { + /* + * this is master object and this request should + * be returned back to llite. + */ + *request = req; + } else { + ptlrpc_req_finished(req); + } + + if (rc) + break; + } + lmv_obj_put(obj); + } else { + tgt_exp = lmv_find_export(lmv, &op_data->op_fid1); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_setattr(tgt_exp, op_data, ea, ealen, ea2, + ea2len, request); + } + RETURN(rc); +} + +static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_sync(tgt_exp, fid, oc, request); + RETURN(rc); +} + +/* main purpose of LMV blocking ast is to remove split directory LMV + * presentation object (struct lmv_obj) attached to the lock being revoked. */ +int lmv_blocking_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, + void *data, int flag) +{ + struct lustre_handle lockh; + struct lmv_obj *obj; + int rc; + ENTRY; + + switch (flag) { + case LDLM_CB_BLOCKING: + ldlm_lock2handle(lock, &lockh); + rc = ldlm_cli_cancel(&lockh); + if (rc < 0) { + CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); + RETURN(rc); + } + break; + case LDLM_CB_CANCELING: + /* time to drop cached attrs for dirobj */ + obj = lock->l_ast_data; + if (obj) { + CDEBUG(D_OTHER, "cancel %s on "LPU64"/"LPU64 + ", master "DFID"\n", + lock->l_resource->lr_name.name[3] == 1 ? + "LOOKUP" : "UPDATE", + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + PFID(&obj->lo_fid)); + lmv_obj_put(obj); + } + break; + default: + LBUG(); + } + RETURN(0); +} + +static void lmv_hash_adjust(__u32 *hash, __u32 hash_adj) +{ + __u32 val; + + val = le32_to_cpu(*hash); + if (val < hash_adj) + val += MAX_HASH_SIZE; + if (val != DIR_END_OFF) + *hash = cpu_to_le32(val - hash_adj); +} + +static __u32 lmv_node_rank(struct obd_export *exp, const struct lu_fid *fid) +{ + __u64 id; + struct obd_import *imp; + + /* + * XXX Hack: to get nid we assume that underlying obd device is mdc. + */ + imp = class_exp2cliimp(exp); + id = imp->imp_connection->c_self + fid_flatten(fid); + + CDEBUG(D_INFO, "node rank: %llx "DFID" %llx %llx\n", + imp->imp_connection->c_self, PFID(fid), id, id ^ (id >> 32)); + + return id ^ (id >> 32); +} + +static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, __u64 offset64, struct page *page, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + struct lu_fid rid = *fid; + struct lmv_obj *obj; + __u32 offset0; + __u32 offset; + __u32 hash_adj = 0; + __u32 rank = 0; + __u32 seg_size = 0; + int tgt = 0; + int tgt0 = 0; + int rc; + int nr = 0; + ENTRY; + + offset0 = offset = offset64; + /* + * Check that offset is representable by 32bit number. + */ + LASSERT((__u64)offset == offset64); + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + CDEBUG(D_INFO, "READPAGE at %x from "DFID"\n", offset, PFID(&rid)); + + obj = lmv_obj_grab(obd, fid); + if (obj) { + struct lmv_inode *loi; + + lmv_obj_lock(obj); + + nr = obj->lo_objcount; + LASSERT(nr > 0); + seg_size = MAX_HASH_SIZE / nr; + loi = obj->lo_inodes; + rank = lmv_node_rank(lmv_get_export(lmv, loi[0].li_mds), + fid) % nr; + tgt0 = (offset / seg_size) % nr; + tgt = (tgt0 + rank) % nr; + + if (tgt < tgt0) + /* + * Wrap around. + * + * Last segment has unusual length due to division + * rounding. + */ + hash_adj = MAX_HASH_SIZE - seg_size * nr; + else + hash_adj = 0; + + hash_adj += rank * seg_size; + + CDEBUG(D_INFO, "hash_adj: %x %x %x/%x -> %x/%x\n", + rank, hash_adj, offset, tgt0, offset + hash_adj, tgt); + + offset = (offset + hash_adj) % MAX_HASH_SIZE; + rid = obj->lo_inodes[tgt].li_fid; + tgt_exp = lmv_get_export(lmv, loi[tgt].li_mds); + + CDEBUG(D_INFO, "forward to "DFID" with offset %lu i %d\n", + PFID(&rid), (unsigned long)offset, tgt); + } else + tgt_exp = lmv_find_export(lmv, &rid); + + if (IS_ERR(tgt_exp)) + GOTO(cleanup, rc = PTR_ERR(tgt_exp)); + + rc = md_readpage(tgt_exp, &rid, oc, offset, page, request); + if (rc) + GOTO(cleanup, rc); + if (obj) { + struct lu_dirpage *dp; + struct lu_dirent *ent; + + dp = cfs_kmap(page); + + lmv_hash_adjust(&dp->ldp_hash_start, hash_adj); + lmv_hash_adjust(&dp->ldp_hash_end, hash_adj); + LASSERT(cpu_to_le32(dp->ldp_hash_start) <= offset0); + + for (ent = lu_dirent_start(dp); ent != NULL; + ent = lu_dirent_next(ent)) + lmv_hash_adjust(&ent->lde_hash, hash_adj); + + if (tgt0 != nr - 1) { + __u32 end; + + end = le32_to_cpu(dp->ldp_hash_end); + if (end == DIR_END_OFF) { + dp->ldp_hash_end = cpu_to_le32(seg_size * + (tgt0 + 1)); + CDEBUG(D_INFO, ""DFID" reset end %x tgt %d\n", + PFID(&rid), + le32_to_cpu(dp->ldp_hash_end), tgt); + } + } + cfs_kunmap(page); + } + /* + * Here we could remove "." and ".." from all pages which at not from + * master. But MDS has only "." and ".." for master dir. + */ + EXIT; +cleanup: + if (obj) { + lmv_obj_unlock(obj); + lmv_obj_put(obj); + } + return rc; +} + +static int lmv_unlink_slaves(struct obd_export *exp, + struct md_op_data *op_data, + struct ptlrpc_request **req) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_stripe_md *mea = op_data->op_mea1; + struct md_op_data *op_data2; + struct obd_export *tgt_exp; + int i, rc = 0; + ENTRY; + + OBD_ALLOC_PTR(op_data2); + if (op_data2 == NULL) + RETURN(-ENOMEM); + + op_data2->op_mode = S_IFDIR; + op_data2->op_fsuid = current->fsuid; + op_data2->op_fsgid = current->fsgid; + op_data2->op_bias = 0; + + LASSERT(mea != NULL); + for (i = 0; i < mea->mea_count; i++) { + memset(op_data2, 0, sizeof(*op_data2)); + op_data2->op_fid1 = mea->mea_ids[i]; + tgt_exp = lmv_find_export(lmv, &op_data2->op_fid1); + if (IS_ERR(tgt_exp)) + GOTO(out_free_op_data2, rc = PTR_ERR(tgt_exp)); + + if (tgt_exp == NULL) + continue; + + rc = md_unlink(tgt_exp, op_data2, req); + + CDEBUG(D_OTHER, "unlink slave "DFID" -> %d\n", + PFID(&mea->mea_ids[i]), rc); + + if (*req) { + ptlrpc_req_finished(*req); + *req = NULL; + } + if (rc) + GOTO(out_free_op_data2, rc); + } + + EXIT; +out_free_op_data2: + OBD_FREE_PTR(op_data2); + return rc; +} + +static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp = NULL; + int rc, loop = 0; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + if (op_data->op_namelen == 0 && op_data->op_mea1 != NULL) { + /* mds asks to remove slave objects */ + rc = lmv_unlink_slaves(exp, op_data, request); + RETURN(rc); + } + +repeat: + ++loop; + LASSERT(loop <= 2); + if (op_data->op_namelen != 0) { + struct lmv_obj *obj; + int mea_idx; + + obj = lmv_obj_grab(obd, &op_data->op_fid1); + if (obj) { + mea_idx = raw_name2idx(obj->lo_hashtype, + obj->lo_objcount, + op_data->op_name, + op_data->op_namelen); + op_data->op_bias &= ~MDS_CHECK_SPLIT; + op_data->op_fid1 = obj->lo_inodes[mea_idx].li_fid; + tgt_exp = lmv_get_export(lmv, + obj->lo_inodes[mea_idx].li_mds); + lmv_obj_put(obj); + CDEBUG(D_OTHER, "unlink '%*s' in "DFID" -> %u\n", + op_data->op_namelen, op_data->op_name, + PFID(&op_data->op_fid1), mea_idx); + } + } else { + CDEBUG(D_OTHER, "drop i_nlink on "DFID"\n", + PFID(&op_data->op_fid1)); + } + if (tgt_exp == NULL) { + tgt_exp = lmv_find_export(lmv, &op_data->op_fid1); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + op_data->op_bias |= MDS_CHECK_SPLIT; + } + + op_data->op_fsuid = current->fsuid; + op_data->op_fsgid = current->fsgid; + op_data->op_cap = current->cap_effective; + + rc = md_unlink(tgt_exp, op_data, request); + if (rc == -ERESTART) { + LASSERT(*request != NULL); + DEBUG_REQ(D_WARNING|D_RPCTRACE, *request, + "Got -ERESTART during unlink!\n"); + ptlrpc_req_finished(*request); + *request = NULL; + + /* + * Directory got split. Time to update local object and repeat + * the request with proper MDS. + */ + rc = lmv_handle_split(exp, &op_data->op_fid1); + if (rc == 0) + goto repeat; + } + RETURN(rc); +} + +static int lmv_llog_init(struct obd_device *obd, struct obd_llogs* llogs, + struct obd_device *tgt, int count, + struct llog_catid *logid, struct obd_uuid *uuid) +{ + struct llog_ctxt *ctxt; + int rc; + ENTRY; + + rc = llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, + &llog_client_ops); + if (rc == 0) { + ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); + ctxt->loc_imp = tgt->u.cli.cl_import; + } + + RETURN(rc); +} + +static int lmv_llog_finish(struct obd_device *obd, int count) +{ + int rc; + ENTRY; + + rc = llog_cleanup(llog_get_context(obd, LLOG_CONFIG_REPL_CTXT)); + RETURN(rc); +} + +static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) +{ + int rc = 0; + + switch (stage) { + case OBD_CLEANUP_EARLY: + /* XXX: here should be calling obd_precleanup() down to + * stack. */ + break; + case OBD_CLEANUP_SELF_EXP: + rc = obd_llog_finish(obd, 0); + if (rc != 0) + CERROR("failed to cleanup llogging subsystems\n"); + break; + default: + break; + } + RETURN(rc); +} + +static int lmv_get_info(struct obd_export *exp, __u32 keylen, + void *key, __u32 *vallen, void *val) +{ + struct obd_device *obd; + struct lmv_obd *lmv; + int rc = 0; + ENTRY; + + obd = class_exp2obd(exp); + if (obd == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + RETURN(-EINVAL); + } + + lmv = &obd->u.lmv; + if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) { + struct lmv_tgt_desc *tgts; + int i; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + LASSERT(*vallen == sizeof(__u32)); + for (i = 0, tgts = lmv->tgts; i < lmv->desc.ld_tgt_count; + i++, tgts++) { + + /* all tgts should be connected when this get called. */ + if (!tgts || !tgts->ltd_exp) { + CERROR("target not setup?\n"); + continue; + } + + if (!obd_get_info(tgts->ltd_exp, keylen, key, + vallen, val)) + RETURN(0); + } + RETURN(-EINVAL); + } else if (KEY_IS(KEY_MAX_EASIZE) || KEY_IS(KEY_CONN_DATA)) { + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + /* forwarding this request to first MDS, it should know LOV + * desc. */ + rc = obd_get_info(lmv->tgts[0].ltd_exp, keylen, key, + vallen, val); + if (!rc && KEY_IS(KEY_CONN_DATA)) { + exp->exp_connect_flags = + ((struct obd_connect_data *)val)->ocd_connect_flags; + } + RETURN(rc); + } + + CDEBUG(D_IOCTL, "invalid key\n"); + RETURN(-EINVAL); +} + +int lmv_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set) +{ + struct lmv_tgt_desc *tgt; + struct obd_device *obd; + struct lmv_obd *lmv; + int rc = 0; + ENTRY; + + obd = class_exp2obd(exp); + if (obd == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + RETURN(-EINVAL); + } + lmv = &obd->u.lmv; + + if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX) || + KEY_IS(KEY_INIT_RECOV_BACKUP)) { + int i, err = 0; + + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + tgt = &lmv->tgts[i]; + + if (!tgt->ltd_exp) + continue; + + err = obd_set_info_async(tgt->ltd_exp, + keylen, key, vallen, val, set); + if (err && rc == 0) + rc = err; + } + + RETURN(rc); + } + + RETURN(-EINVAL); +} + +int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, + struct lov_stripe_md *lsm) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_stripe_md *meap, *lsmp; + int mea_size, i; + ENTRY; + + mea_size = lmv_get_easize(lmv); + if (!lmmp) + RETURN(mea_size); + + if (*lmmp && !lsm) { + OBD_FREE(*lmmp, mea_size); + *lmmp = NULL; + RETURN(0); + } + + if (*lmmp == NULL) { + OBD_ALLOC(*lmmp, mea_size); + if (*lmmp == NULL) + RETURN(-ENOMEM); + } + + if (!lsm) + RETURN(mea_size); + + lsmp = (struct lmv_stripe_md *)lsm; + meap = (struct lmv_stripe_md *)*lmmp; + + if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR && + lsmp->mea_magic != MEA_MAGIC_ALL_CHARS) + RETURN(-EINVAL); + + meap->mea_magic = cpu_to_le32(lsmp->mea_magic); + meap->mea_count = cpu_to_le32(lsmp->mea_count); + meap->mea_master = cpu_to_le32(lsmp->mea_master); + + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + meap->mea_ids[i] = meap->mea_ids[i]; + fid_cpu_to_le(&meap->mea_ids[i], &meap->mea_ids[i]); + } + + RETURN(mea_size); +} + +int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_mds_md *lmm, int lmm_size) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lmv_stripe_md **tmea = (struct lmv_stripe_md **)lsmp; + struct lmv_stripe_md *mea = (struct lmv_stripe_md *)lmm; + struct lmv_obd *lmv = &obd->u.lmv; + int mea_size, i; + __u32 magic; + ENTRY; + + mea_size = lmv_get_easize(lmv); + if (lsmp == NULL) + return mea_size; + + if (*lsmp != NULL && lmm == NULL) { + OBD_FREE(*tmea, mea_size); + *lsmp = NULL; + RETURN(0); + } + + LASSERT(mea_size == lmm_size); + + OBD_ALLOC(*tmea, mea_size); + if (*tmea == NULL) + RETURN(-ENOMEM); + + if (!lmm) + RETURN(mea_size); + + if (mea->mea_magic == MEA_MAGIC_LAST_CHAR || + mea->mea_magic == MEA_MAGIC_ALL_CHARS || + mea->mea_magic == MEA_MAGIC_HASH_SEGMENT) + { + magic = le32_to_cpu(mea->mea_magic); + } else { + /* old mea is not handled here */ + LBUG(); + } + + (*tmea)->mea_magic = magic; + (*tmea)->mea_count = le32_to_cpu(mea->mea_count); + (*tmea)->mea_master = le32_to_cpu(mea->mea_master); + + for (i = 0; i < (*tmea)->mea_count; i++) { + (*tmea)->mea_ids[i] = mea->mea_ids[i]; + fid_le_to_cpu(&(*tmea)->mea_ids[i], &(*tmea)->mea_ids[i]); + } + RETURN(mea_size); +} + +static int lmv_cancel_unused(struct obd_export *exp, + const struct lu_fid *fid, + int flags, void *opaque) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int rc = 0, err, i; + ENTRY; + + LASSERT(fid != NULL); + + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + if (!lmv->tgts[i].ltd_exp || !lmv->tgts[i].ltd_active) + continue; + + err = md_cancel_unused(lmv->tgts[i].ltd_exp, + fid, flags, opaque); + if (!rc) + rc = err; + } + RETURN(rc); +} + +int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + + ENTRY; + RETURN(md_set_lock_data(lmv->tgts[0].ltd_exp, lockh, data)); +} + +int lmv_lock_match(struct obd_export *exp, int flags, + const struct lu_fid *fid, ldlm_type_t type, + ldlm_policy_data_t *policy, ldlm_mode_t mode, + struct lustre_handle *lockh) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int i, rc = 0; + ENTRY; + + CDEBUG(D_OTHER, "lock match for "DFID"\n", PFID(fid)); + + /* with CMD every object can have two locks in different namespaces: + * lookup lock in space of mds storing direntry and update/open lock in + * space of mds storing inode. Thus we check all targets, not only that + * one fid was created in. */ + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + rc = md_lock_match(lmv->tgts[i].ltd_exp, flags, fid, + type, policy, mode, lockh); + if (rc) + RETURN(1); + } + + RETURN(rc); +} + +int lmv_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, + int offset, struct obd_export *dt_exp, + struct obd_export *md_exp, struct lustre_md *md) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int rc; + + ENTRY; + rc = md_get_lustre_md(lmv->tgts[0].ltd_exp, req, offset, dt_exp, md_exp, + md); + RETURN(rc); +} + +int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + + ENTRY; + if (md->mea) + obd_free_memmd(exp, (struct lov_stripe_md**)&md->mea); + RETURN(md_free_lustre_md(lmv->tgts[0].ltd_exp, md)); +} + +int lmv_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + + ENTRY; + + tgt_exp = lmv_find_export(lmv, &och->och_fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + RETURN(md_set_open_replay_data(tgt_exp, och, open_req)); +} + +int lmv_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + ENTRY; + + tgt_exp = lmv_find_export(lmv, &och->och_fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + RETURN(md_clear_open_replay_data(tgt_exp, och)); +} + +static int lmv_get_remote_perm(struct obd_export *exp, + const struct lu_fid *fid, + struct obd_capa *oc, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_get_remote_perm(tgt_exp, fid, oc, request); + + RETURN(rc); +} + +static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *oc, + renew_capa_cb_t cb) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_find_export(lmv, &oc->c_capa.lc_fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + + rc = md_renew_capa(tgt_exp, oc, cb); + RETURN(rc); +} + +struct obd_ops lmv_obd_ops = { + .o_owner = THIS_MODULE, + .o_setup = lmv_setup, + .o_cleanup = lmv_cleanup, + .o_precleanup = lmv_precleanup, + .o_process_config = lmv_process_config, + .o_connect = lmv_connect, + .o_disconnect = lmv_disconnect, + .o_statfs = lmv_statfs, + .o_llog_init = lmv_llog_init, + .o_llog_finish = lmv_llog_finish, + .o_get_info = lmv_get_info, + .o_set_info_async = lmv_set_info_async, + .o_packmd = lmv_packmd, + .o_unpackmd = lmv_unpackmd, + .o_notify = lmv_notify, + .o_iocontrol = lmv_iocontrol, + .o_fid_delete = lmv_fid_delete +}; + +struct md_ops lmv_md_ops = { + .m_getstatus = lmv_getstatus, + .m_change_cbdata = lmv_change_cbdata, + .m_close = lmv_close, + .m_create = lmv_create, + .m_done_writing = lmv_done_writing, + .m_enqueue = lmv_enqueue, + .m_getattr = lmv_getattr, + .m_getxattr = lmv_getxattr, + .m_getattr_name = lmv_getattr_name, + .m_intent_lock = lmv_intent_lock, + .m_link = lmv_link, + .m_rename = lmv_rename, + .m_setattr = lmv_setattr, + .m_setxattr = lmv_setxattr, + .m_sync = lmv_sync, + .m_readpage = lmv_readpage, + .m_unlink = lmv_unlink, + .m_init_ea_size = lmv_init_ea_size, + .m_cancel_unused = lmv_cancel_unused, + .m_set_lock_data = lmv_set_lock_data, + .m_lock_match = lmv_lock_match, + .m_get_lustre_md = lmv_get_lustre_md, + .m_free_lustre_md = lmv_free_lustre_md, + .m_set_open_replay_data = lmv_set_open_replay_data, + .m_clear_open_replay_data = lmv_clear_open_replay_data, + .m_get_remote_perm = lmv_get_remote_perm, + .m_renew_capa = lmv_renew_capa +}; + +int __init lmv_init(void) +{ + struct lprocfs_static_vars lvars; + int rc; + + obj_cache = cfs_mem_cache_create("lmv_objects", + sizeof(struct lmv_obj), + 0, 0); + if (!obj_cache) { + CERROR("error allocating lmv objects cache\n"); + return -ENOMEM; + } + + lprocfs_init_vars(lmv, &lvars); + rc = class_register_type(&lmv_obd_ops, &lmv_md_ops, + lvars.module_vars, LUSTRE_LMV_NAME, NULL); + if (rc) + cfs_mem_cache_destroy(obj_cache); + + return rc; +} + +#ifdef __KERNEL__ +static void lmv_exit(void) +{ + int rc; + + class_unregister_type(LUSTRE_LMV_NAME); + + rc = cfs_mem_cache_destroy(obj_cache); + LASSERTF(rc == 0, + "can't free lmv objects cache, %d object(s)" + "still in use\n", atomic_read(&obj_cache_count)); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); +MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver"); +MODULE_LICENSE("GPL"); + +module_init(lmv_init); +module_exit(lmv_exit); +#endif diff --git a/lustre/lmv/lmv_object.c b/lustre/lmv/lmv_object.c new file mode 100644 index 0000000..68c4bca --- /dev/null +++ b/lustre/lmv/lmv_object.c @@ -0,0 +1,426 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LMV +#ifdef __KERNEL__ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <asm/div64.h> +#include <linux/seq_file.h> +#else +#include <liblustre.h> +#endif + +#include <lustre/lustre_idl.h> +#include <obd_support.h> +#include <lustre_lib.h> +#include <lustre_net.h> +#include <lustre_dlm.h> +#include <obd_class.h> +#include <lprocfs_status.h> +#include "lmv_internal.h" + +/* objects cache. */ +extern cfs_mem_cache_t *obj_cache; +extern atomic_t obj_cache_count; + +/* object list and its guard. */ +static LIST_HEAD(obj_list); +static spinlock_t obj_list_lock = SPIN_LOCK_UNLOCKED; + +/* creates new obj on passed @fid and @mea. */ +struct lmv_obj * +lmv_obj_alloc(struct obd_device *obd, + const struct lu_fid *fid, + struct lmv_stripe_md *mea) +{ + int i; + struct lmv_obj *obj; + unsigned int obj_size; + struct lmv_obd *lmv = &obd->u.lmv; + + LASSERT(mea->mea_magic == MEA_MAGIC_LAST_CHAR + || mea->mea_magic == MEA_MAGIC_ALL_CHARS + || mea->mea_magic == MEA_MAGIC_HASH_SEGMENT); + + OBD_SLAB_ALLOC(obj, obj_cache, CFS_ALLOC_STD, + sizeof(*obj)); + if (!obj) + return NULL; + + atomic_inc(&obj_cache_count); + + obj->lo_fid = *fid; + obj->lo_obd = obd; + obj->lo_state = 0; + obj->lo_hashtype = mea->mea_magic; + + init_MUTEX(&obj->lo_guard); + atomic_set(&obj->lo_count, 0); + obj->lo_objcount = mea->mea_count; + + obj_size = sizeof(struct lmv_inode) * + lmv->desc.ld_tgt_count; + + OBD_ALLOC(obj->lo_inodes, obj_size); + if (!obj->lo_inodes) + goto err_obj; + + memset(obj->lo_inodes, 0, obj_size); + + /* put all ids in */ + for (i = 0; i < mea->mea_count; i++) { + int rc; + + CDEBUG(D_OTHER, "subobj "DFID"\n", + PFID(&mea->mea_ids[i])); + obj->lo_inodes[i].li_fid = mea->mea_ids[i]; + LASSERT(fid_is_sane(&obj->lo_inodes[i].li_fid)); + + /* + * Cache slave mds number to use it in all cases it is needed + * instead of constant lookup. + */ + rc = lmv_fld_lookup(lmv, &obj->lo_inodes[i].li_fid, + &obj->lo_inodes[i].li_mds); + if (rc) + goto err_obj; + } + + return obj; + +err_obj: + OBD_FREE(obj, sizeof(*obj)); + return NULL; +} + +/* destroy passed @obj. */ +void +lmv_obj_free(struct lmv_obj *obj) +{ + struct lmv_obd *lmv = &obj->lo_obd->u.lmv; + unsigned int obj_size; + + LASSERT(!atomic_read(&obj->lo_count)); + + obj_size = sizeof(struct lmv_inode) * + lmv->desc.ld_tgt_count; + + OBD_FREE(obj->lo_inodes, obj_size); + OBD_SLAB_FREE(obj, obj_cache, sizeof(*obj)); + atomic_dec(&obj_cache_count); +} + +static void +__lmv_obj_add(struct lmv_obj *obj) +{ + atomic_inc(&obj->lo_count); + list_add(&obj->lo_list, &obj_list); +} + +void +lmv_obj_add(struct lmv_obj *obj) +{ + spin_lock(&obj_list_lock); + __lmv_obj_add(obj); + spin_unlock(&obj_list_lock); +} + +static void +__lmv_obj_del(struct lmv_obj *obj) +{ + list_del(&obj->lo_list); + lmv_obj_free(obj); +} + +void +lmv_obj_del(struct lmv_obj *obj) +{ + spin_lock(&obj_list_lock); + __lmv_obj_del(obj); + spin_unlock(&obj_list_lock); +} + +static struct lmv_obj * +__lmv_obj_get(struct lmv_obj *obj) +{ + LASSERT(obj != NULL); + atomic_inc(&obj->lo_count); + return obj; +} + +struct lmv_obj * +lmv_obj_get(struct lmv_obj *obj) +{ + spin_lock(&obj_list_lock); + __lmv_obj_get(obj); + spin_unlock(&obj_list_lock); + return obj; +} + +static void +__lmv_obj_put(struct lmv_obj *obj) +{ + LASSERT(obj); + + if (atomic_dec_and_test(&obj->lo_count)) { + CDEBUG(D_OTHER, "last reference to "DFID" - " + "destroying\n", PFID(&obj->lo_fid)); + __lmv_obj_del(obj); + } +} + +void +lmv_obj_put(struct lmv_obj *obj) +{ + spin_lock(&obj_list_lock); + __lmv_obj_put(obj); + spin_unlock(&obj_list_lock); +} + +static struct lmv_obj * +__lmv_obj_grab(struct obd_device *obd, const struct lu_fid *fid) +{ + struct lmv_obj *obj; + struct list_head *cur; + + list_for_each(cur, &obj_list) { + obj = list_entry(cur, struct lmv_obj, lo_list); + + /* check if object is in progress of destroying. If so - skip + * it. */ + if (obj->lo_state & O_FREEING) + continue; + + /* + * we should make sure, that we have found object belong to + * passed obd. It is possible that, object manager will have two + * objects with the same fid belong to different obds, if client + * and mds runs on the same host. May be it is good idea to have + * objects list associated with obd. + */ + if (obj->lo_obd != obd) + continue; + + /* check if this is what we're looking for. */ + if (lu_fid_eq(&obj->lo_fid, fid)) + return __lmv_obj_get(obj); + } + + return NULL; +} + +struct lmv_obj * +lmv_obj_grab(struct obd_device *obd, const struct lu_fid *fid) +{ + struct lmv_obj *obj; + ENTRY; + + spin_lock(&obj_list_lock); + obj = __lmv_obj_grab(obd, fid); + spin_unlock(&obj_list_lock); + + RETURN(obj); +} + +/* looks in objects list for an object that matches passed @fid. If it is not + * found -- creates it using passed @mea and puts onto list. */ +static struct lmv_obj * +__lmv_obj_create(struct obd_device *obd, const struct lu_fid *fid, + struct lmv_stripe_md *mea) +{ + struct lmv_obj *new, *obj; + ENTRY; + + obj = lmv_obj_grab(obd, fid); + if (obj) + RETURN(obj); + + /* no such object yet, allocate and initialize it. */ + new = lmv_obj_alloc(obd, fid, mea); + if (!new) + RETURN(NULL); + + /* check if someone create it already while we were dealing with + * allocating @obj. */ + spin_lock(&obj_list_lock); + obj = __lmv_obj_grab(obd, fid); + if (obj) { + /* someone created it already - put @obj and getting out. */ + spin_unlock(&obj_list_lock); + lmv_obj_free(new); + RETURN(obj); + } + + __lmv_obj_add(new); + __lmv_obj_get(new); + + spin_unlock(&obj_list_lock); + + CDEBUG(D_OTHER, "new obj in lmv cache: "DFID"\n", + PFID(fid)); + + RETURN(new); + +} + +/* creates object from passed @fid and @mea. If @mea is NULL, it will be + * obtained from correct MDT and used for constructing the object. */ +struct lmv_obj * +lmv_obj_create(struct obd_export *exp, const struct lu_fid *fid, + struct lmv_stripe_md *mea) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct ptlrpc_request *req = NULL; + struct obd_export *tgt_exp; + struct lmv_obj *obj; + struct lustre_md md; + int mealen, rc; + ENTRY; + + CDEBUG(D_OTHER, "get mea for "DFID" and create lmv obj\n", + PFID(fid)); + + md.mea = NULL; + + if (mea == NULL) { + __u64 valid; + + CDEBUG(D_OTHER, "mea isn't passed in, get it now\n"); + mealen = lmv_get_easize(lmv); + + /* time to update mea of parent fid */ + md.mea = NULL; + valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA; + + tgt_exp = lmv_find_export(lmv, fid); + if (IS_ERR(tgt_exp)) + GOTO(cleanup, obj = (void *)tgt_exp); + + rc = md_getattr(tgt_exp, fid, NULL, valid, mealen, &req); + if (rc) { + CERROR("md_getattr() failed, error %d\n", rc); + GOTO(cleanup, obj = ERR_PTR(rc)); + } + + rc = md_get_lustre_md(exp, req, 0, NULL, exp, &md); + if (rc) { + CERROR("mdc_get_lustre_md() failed, error %d\n", rc); + GOTO(cleanup, obj = ERR_PTR(rc)); + } + + if (md.mea == NULL) + GOTO(cleanup, obj = ERR_PTR(-ENODATA)); + + mea = md.mea; + } + + /* got mea, now create obj for it. */ + obj = __lmv_obj_create(obd, fid, mea); + if (!obj) { + CERROR("Can't create new object "DFID"\n", + PFID(fid)); + GOTO(cleanup, obj = ERR_PTR(-ENOMEM)); + } + + if (md.mea != NULL) + obd_free_memmd(exp, (struct lov_stripe_md **)&md.mea); + + EXIT; +cleanup: + if (req) + ptlrpc_req_finished(req); + return obj; +} + +/* + * looks for object with @fid and orders to destroy it. It is possible the object + * will not be destroyed right now, because it is still using by someone. In + * this case it will be marked as "freeing" and will not be accessible anymore + * for subsequent callers of lmv_obj_grab(). + */ +int +lmv_obj_delete(struct obd_export *exp, const struct lu_fid *fid) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obj *obj; + int rc = 0; + ENTRY; + + spin_lock(&obj_list_lock); + obj = __lmv_obj_grab(obd, fid); + if (obj) { + obj->lo_state |= O_FREEING; + __lmv_obj_put(obj); + __lmv_obj_put(obj); + rc = 1; + } + spin_unlock(&obj_list_lock); + + RETURN(rc); +} + +int +lmv_obj_setup(struct obd_device *obd) +{ + ENTRY; + LASSERT(obd != NULL); + + CDEBUG(D_INFO, "LMV object manager setup (%s)\n", + obd->obd_uuid.uuid); + + RETURN(0); +} + +void +lmv_obj_cleanup(struct obd_device *obd) +{ + struct list_head *cur, *tmp; + struct lmv_obj *obj; + ENTRY; + + CDEBUG(D_INFO, "LMV object manager cleanup (%s)\n", + obd->obd_uuid.uuid); + + spin_lock(&obj_list_lock); + list_for_each_safe(cur, tmp, &obj_list) { + obj = list_entry(cur, struct lmv_obj, lo_list); + + if (obj->lo_obd != obd) + continue; + + obj->lo_state |= O_FREEING; + if (atomic_read(&obj->lo_count) > 1) { + CERROR("obj "DFID" has count > 1 (%d)\n", + PFID(&obj->lo_fid), atomic_read(&obj->lo_count)); + } + __lmv_obj_put(obj); + } + spin_unlock(&obj_list_lock); + EXIT; +} diff --git a/lustre/lmv/lproc_lmv.c b/lustre/lmv/lproc_lmv.c new file mode 100644 index 0000000..9b1615a --- /dev/null +++ b/lustre/lmv/lproc_lmv.c @@ -0,0 +1,151 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_CLASS + +#include <linux/version.h> +#include <linux/seq_file.h> +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +#include <asm/statfs.h> +#endif +#include <lprocfs_status.h> +#include <obd_class.h> + +#ifndef LPROCFS +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +#else +static int lmv_rd_numobd(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *dev = (struct obd_device*)data; + struct lmv_desc *desc; + + LASSERT(dev != NULL); + desc = &dev->u.lmv.desc; + *eof = 1; + return snprintf(page, count, "%u\n", desc->ld_tgt_count); + +} + +static int lmv_rd_activeobd(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device* dev = (struct obd_device*)data; + struct lmv_desc *desc; + + LASSERT(dev != NULL); + desc = &dev->u.lmv.desc; + *eof = 1; + return snprintf(page, count, "%u\n", desc->ld_active_tgt_count); +} + +static int lmv_rd_desc_uuid(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *dev = (struct obd_device*) data; + struct lmv_obd *lmv; + + LASSERT(dev != NULL); + lmv = &dev->u.lmv; + *eof = 1; + return snprintf(page, count, "%s\n", lmv->desc.ld_uuid.uuid); +} + +static void *lmv_tgt_seq_start(struct seq_file *p, loff_t *pos) +{ + struct obd_device *dev = p->private; + struct lmv_obd *lmv = &dev->u.lmv; + + return (*pos >= lmv->desc.ld_tgt_count) ? NULL : &(lmv->tgts[*pos]); + +} + +static void lmv_tgt_seq_stop(struct seq_file *p, void *v) +{ + return; +} + +static void *lmv_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) +{ + struct obd_device *dev = p->private; + struct lmv_obd *lmv = &dev->u.lmv; + + ++*pos; + return (*pos >=lmv->desc.ld_tgt_count) ? NULL : &(lmv->tgts[*pos]); +} + +static int lmv_tgt_seq_show(struct seq_file *p, void *v) +{ + struct lmv_tgt_desc *tgt = v; + struct obd_device *dev = p->private; + struct lmv_obd *lmv = &dev->u.lmv; + int idx = tgt - &(lmv->tgts[0]); + + return seq_printf(p, "%d: %s %sACTIVE\n", idx, tgt->ltd_uuid.uuid, + tgt->ltd_active ? "" : "IN"); +} + +struct seq_operations lmv_tgt_sops = { + .start = lmv_tgt_seq_start, + .stop = lmv_tgt_seq_stop, + .next = lmv_tgt_seq_next, + .show = lmv_tgt_seq_show, +}; + +static int lmv_target_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + struct seq_file *seq; + int rc = seq_open(file, &lmv_tgt_sops); + + if (rc) + return rc; + + seq = file->private_data; + seq->private = dp->data; + + return 0; +} + +struct lprocfs_vars lprocfs_obd_vars[] = { + { "numobd", lmv_rd_numobd, 0, 0 }, + { "activeobd", lmv_rd_activeobd, 0, 0 }, + { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "desc_uuid", lmv_rd_desc_uuid, 0, 0 }, + { 0 } +}; + +static struct lprocfs_vars lprocfs_module_vars[] = { + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, + { 0 } +}; + +struct file_operations lmv_proc_target_fops = { + .owner = THIS_MODULE, + .open = lmv_target_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* LPROCFS */ +LPROCFS_INIT_VARS(lmv, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 07936e3..51258b3 100755 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -56,13 +56,13 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes, lov_dump_lmm_v1(D_WARNING, lmm); return -EINVAL; } - + if (lmm->lmm_object_id == 0) { CERROR("zero object id\n"); lov_dump_lmm_v1(D_WARNING, lmm); return -EINVAL; } - + if (lmm->lmm_pattern != cpu_to_le32(LOV_PATTERN_RAID0)) { CERROR("bad striping pattern\n"); lov_dump_lmm_v1(D_WARNING, lmm); @@ -95,7 +95,7 @@ struct lov_stripe_md *lsm_alloc_plain(int stripe_count, int *size) return NULL;; for (i = 0; i < stripe_count; i++) { - OBD_SLAB_ALLOC(loi, lov_oinfo_slab, SLAB_NOFS, sizeof(*loi)); + OBD_SLAB_ALLOC(loi, lov_oinfo_slab, GFP_NOFS, sizeof(*loi)); if (loi == NULL) goto err; lsm->lsm_oinfo[i] = loi; diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 7099226..5ded7e1 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -62,6 +62,7 @@ struct lov_async_page { int lap_stripe; obd_off lap_sub_offset; obd_id lap_loi_id; + obd_gr lap_loi_gr; void *lap_sub_cookie; struct obd_async_page_ops *lap_caller_ops; void *lap_caller_data; @@ -111,9 +112,14 @@ static inline void lov_llh_put(struct lov_lock_handles *llh) atomic_read(&llh->llh_refcount) < 0x5a5a); if (atomic_dec_and_test(&llh->llh_refcount)) { class_handle_unhash(&llh->llh_handle); - LASSERT(list_empty(&llh->llh_handle.h_link)); - OBD_FREE(llh, sizeof *llh + - sizeof(*llh->llh_handles) * llh->llh_stripe_count); + /* The structure may be held by other threads because RCU. + * -jxiong */ + if (atomic_read(&llh->llh_refcount)) + return; + + OBD_FREE_RCU(llh, sizeof *llh + + sizeof(*llh->llh_handles) * llh->llh_stripe_count, + &llh->llh_handle); } } @@ -223,8 +229,9 @@ void lov_getref(struct obd_device *obd); void lov_putref(struct obd_device *obd); /* lov_log.c */ -int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid, struct obd_uuid *uuid); +int lov_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, struct llog_catid *logid, + struct obd_uuid *uuid); int lov_llog_finish(struct obd_device *obd, int count); /* lov_pack.c */ diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 232df7a..90603db 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -59,7 +59,7 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; - int i, rc = 0; + int i, rc = 0, rc1; ENTRY; LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count, @@ -89,9 +89,12 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, default: break; } - - rc += llog_add(cctxt, rec, NULL, logcookies + rc, - numcookies - rc); + LASSERT(lsm->lsm_object_gr == loi->loi_gr); + rc1 = llog_add(cctxt, rec, NULL, logcookies + rc, + numcookies - rc); + if (rc1 < 0) + RETURN(rc1); + rc += rc1; } RETURN(rc); @@ -175,20 +178,21 @@ static struct llog_operations lov_size_repl_logops = { lop_cancel: lov_llog_repl_cancel }; -int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid, struct obd_uuid *uuid) +int lov_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, struct llog_catid *logid, + struct obd_uuid *uuid) { struct lov_obd *lov = &obd->u.lov; struct obd_device *child; int i, rc = 0, err = 0; ENTRY; - rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL, &lov_mds_ost_orig_logops); if (rc) RETURN(rc); - rc = llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, &lov_size_repl_logops); if (rc) RETURN(rc); @@ -203,7 +207,7 @@ int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, CDEBUG(D_CONFIG, "init %d/%d\n", i, count); LASSERT(lov->lov_tgts[i]->ltd_exp); child = lov->lov_tgts[i]->ltd_exp->exp_obd; - rc = obd_llog_init(child, tgt, 1, logid + i, uuid); + rc = obd_llog_init(child, llogs, tgt, 1, logid + i, uuid); if (rc) { CERROR("error osc_llog_init idx %d osc '%s' tgt '%s' " "(rc=%d)\n", i, child->obd_name, tgt->obd_name, diff --git a/lustre/lov/lov_merge.c b/lustre/lov/lov_merge.c index 984a8ee..f13c389 100644 --- a/lustre/lov/lov_merge.c +++ b/lustre/lov/lov_merge.c @@ -50,7 +50,6 @@ int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm, struct ost_lvb *lvb, int kms_only) { - struct lov_oinfo *loi; __u64 size = 0; __u64 blocks = 0; __u64 current_mtime = lvb->lvb_mtime; @@ -64,9 +63,9 @@ int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm, #endif for (i = 0; i < lsm->lsm_stripe_count; i++) { + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; obd_size lov_size, tmpsize; - loi = lsm->lsm_oinfo[i]; tmpsize = loi->loi_kms; if (kms_only == 0 && loi->loi_lvb.lvb_size > tmpsize) tmpsize = loi->loi_lvb.lvb_size; @@ -112,14 +111,12 @@ int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm, #endif if (shrink) { - struct lov_oinfo *loi; for (; stripe < lsm->lsm_stripe_count; stripe++) { - loi = lsm->lsm_oinfo[stripe]; + struct lov_oinfo *loi = lsm->lsm_oinfo[stripe]; kms = lov_size_to_stripe(lsm, size, stripe); CDEBUG(D_INODE, "stripe %d KMS %sing "LPU64"->"LPU64"\n", - stripe, kms > loi->loi_kms ? "increas" : - kms < loi->loi_kms ? "shrink" : "leav", + stripe, kms > loi->loi_kms ? "increas":"shrink", loi->loi_kms, kms); loi->loi_kms = loi->loi_lvb.lvb_size = kms; } diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 5b9112f..5e6e1bb 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -74,7 +74,7 @@ void lov_putref(struct obd_device *obd) /* ok to dec to 0 more than once -- ltd_exp's will be null */ if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { int i; - CDEBUG(D_CONFIG, "destroying %d lov targets\n", + CDEBUG(D_CONFIG, "destroying %d lov targets\n", lov->lov_death_row); for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_reap) @@ -88,11 +88,11 @@ void lov_putref(struct obd_device *obd) } #define MAX_STRING_SIZE 128 -static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, +static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, struct obd_connect_data *data) { struct lov_obd *lov = &obd->u.lov; - struct obd_uuid tgt_uuid; + struct obd_uuid tgt_uuid = lov->lov_tgts[index]->ltd_uuid; struct obd_device *tgt_obd; struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; struct lustre_handle conn = {0, }; @@ -106,7 +106,6 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, if (!lov->lov_tgts[index]) RETURN(-EINVAL); - tgt_uuid = lov->lov_tgts[index]->ltd_uuid; tgt_obd = class_find_client_obd(&tgt_uuid, LUSTRE_OSC_NAME, &obd->obd_uuid); @@ -114,7 +113,6 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, CERROR("Target %s not attached\n", obd_uuid2str(&tgt_uuid)); RETURN(-EINVAL); } - if (!tgt_obd->obd_set_up) { CERROR("Target %s not set up\n", obd_uuid2str(&tgt_uuid)); RETURN(-EINVAL); @@ -147,7 +145,7 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, RETURN(0); } - rc = obd_connect(&conn, tgt_obd, &lov_osc_uuid, data); + rc = obd_connect(NULL, &conn, tgt_obd, &lov_osc_uuid, data); if (rc) { CERROR("Target %s connect error %d\n", obd_uuid2str(&tgt_uuid), rc); @@ -202,13 +200,14 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, #endif rc = qos_add_tgt(obd, index); - if (rc) + if (rc) CERROR("qos_add_tgt failed %d\n", rc); RETURN(0); } -static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, +static int lov_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) { struct lov_obd *lov = &obd->u.lov; @@ -225,7 +224,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, /* Why should there ever be more than 1 connect? */ lov->lov_connects++; LASSERT(lov->lov_connects == 1); - + memset(&lov->lov_ocd, 0, sizeof(lov->lov_ocd)); if (data) lov->lov_ocd = *data; @@ -236,16 +235,15 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, if (!tgt || obd_uuid_empty(&tgt->ltd_uuid)) continue; /* Flags will be lowest common denominator */ - rc = lov_connect_obd(obd, i, lov->lov_tgts[i]->ltd_activate, - &lov->lov_ocd); + rc = lov_connect_obd(obd, i, tgt->ltd_activate, &lov->lov_ocd); if (rc) { - CERROR("%s: lov connect tgt %d failed: %d\n", + CERROR("%s: lov connect tgt %d failed: %d\n", obd->obd_name, i, rc); continue; } } lov_putref(obd); - + RETURN(0); } @@ -258,7 +256,7 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index) int rc; ENTRY; - CDEBUG(D_CONFIG, "%s: disconnecting target %s\n", + CDEBUG(D_CONFIG, "%s: disconnecting target %s\n", obd->obd_name, osc_obd->obd_name); if (lov->lov_tgts[index]->ltd_active) { @@ -280,13 +278,14 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index) } } - if (obd->obd_no_recov) { + if (osc_obd) { /* Pass it on to our clients. * XXX This should be an argument to disconnect, * XXX not a back-door flag on the OBD. Ah well. */ - if (osc_obd) - osc_obd->obd_no_recov = 1; + osc_obd->obd_force = obd->obd_force; + osc_obd->obd_fail = obd->obd_fail; + osc_obd->obd_no_recov = obd->obd_no_recov; } obd_register_observer(osc_obd, NULL); @@ -304,7 +303,7 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index) RETURN(0); } -static int lov_del_target(struct obd_device *obd, __u32 index, +static int lov_del_target(struct obd_device *obd, __u32 index, struct obd_uuid *uuidp, int gen); static int lov_disconnect(struct obd_export *exp) @@ -406,7 +405,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, struct obd_uuid *uuid; LASSERT(watched); - + if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { CERROR("unexpected notification of %s %s!\n", watched->obd_type->typ_name, @@ -443,7 +442,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, rc = obd_notify_observer(obd, tgt_obd, ev, data); if (rc) { CERROR("%s: notify %s of %s failed %d\n", - obd->obd_name, + obd->obd_name, obd->obd_observer->obd_name, tgt_obd->obd_name, rc); break; @@ -488,7 +487,7 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, __u32 newsize, oldsize = 0; newsize = max(lov->lov_tgt_size, (__u32)2); - while (newsize < index + 1) + while (newsize < index + 1) newsize = newsize << 1; OBD_ALLOC(newtgts, sizeof(*newtgts) * newsize); if (newtgts == NULL) { @@ -497,7 +496,7 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, } if (lov->lov_tgt_size) { - memcpy(newtgts, lov->lov_tgts, sizeof(*newtgts) * + memcpy(newtgts, lov->lov_tgts, sizeof(*newtgts) * lov->lov_tgt_size); old = lov->lov_tgts; oldsize = lov->lov_tgt_size; @@ -513,7 +512,7 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, CDEBUG(D_CONFIG, "tgts: %p size: %d\n", lov->lov_tgts, lov->lov_tgt_size); - } + } OBD_ALLOC_PTR(tgt); @@ -535,8 +534,8 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n", index, tgt->ltd_gen, lov->desc.ld_tgt_count); - - if (lov->lov_connects == 0) { + + if (lov->lov_connects == 0) { /* lov_connect hasn't been called yet. We'll do the lov_connect_obd on this target when that fn first runs, because we don't know the connect flags yet. */ @@ -549,13 +548,13 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, if (rc) GOTO(out, rc); - rc = lov_notify(obd, tgt->ltd_exp->exp_obd, + rc = lov_notify(obd, tgt->ltd_exp->exp_obd, active ? OBD_NOTIFY_ACTIVE : OBD_NOTIFY_INACTIVE, (void *)&index); out: if (rc) { - CERROR("add failed (%d), deleting %s\n", rc, + CERROR("add failed (%d), deleting %s\n", rc, obd_uuid2str(&tgt->ltd_uuid)); lov_del_target(obd, index, 0, 0); } @@ -564,7 +563,7 @@ out: } /* Schedule a target for deletion */ -static int lov_del_target(struct obd_device *obd, __u32 index, +static int lov_del_target(struct obd_device *obd, __u32 index, struct obd_uuid *uuidp, int gen) { struct lov_obd *lov = &obd->u.lov; @@ -594,7 +593,7 @@ static int lov_del_target(struct obd_device *obd, __u32 index, CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d exp: %p active: %d\n", lov_uuid2str(lov, index), index, - lov->lov_tgts[index]->ltd_gen, lov->lov_tgts[index]->ltd_exp, + lov->lov_tgts[index]->ltd_gen, lov->lov_tgts[index]->ltd_exp, lov->lov_tgts[index]->ltd_active); lov->lov_tgts[index]->ltd_reap = 1; @@ -619,7 +618,7 @@ static void __lov_del_obd(struct obd_device *obd, __u32 index) osc_obd = class_exp2obd(tgt->ltd_exp); CDEBUG(D_CONFIG, "Removing tgt %s : %s\n", - lov_uuid2str(lov, index), + lov_uuid2str(lov, index), osc_obd ? osc_obd->obd_name : "<no obd>"); if (tgt->ltd_exp) @@ -630,17 +629,13 @@ static void __lov_del_obd(struct obd_device *obd, __u32 index) * shrink it. */ lov->lov_tgts[index] = NULL; - OBD_FREE_PTR(tgt); + OBD_FREE_PTR(tgt); /* Manual cleanup - no cleanup logs to clean up the osc's. We must do it ourselves. And we can't do it from lov_cleanup, because we just lost our only reference to it. */ - if (osc_obd) { - /* Use lov's force/fail flags. */ - osc_obd->obd_force = obd->obd_force; - osc_obd->obd_fail = obd->obd_fail; + if (osc_obd) class_manual_cleanup(osc_obd); - } } void lov_fix_desc(struct lov_desc *desc) @@ -660,20 +655,19 @@ void lov_fix_desc(struct lov_desc *desc) desc->ld_default_stripe_count = 1; /* from lov_setstripe */ - if ((desc->ld_pattern != 0) && + if ((desc->ld_pattern != 0) && (desc->ld_pattern != LOV_PATTERN_RAID0)) { LCONSOLE_WARN("Unknown stripe pattern: %#x\n",desc->ld_pattern); desc->ld_pattern = 0; } - /* fix qos_maxage */ + if (desc->ld_qos_maxage == 0) desc->ld_qos_maxage = QOS_DEFAULT_MAXAGE; } -static int lov_setup(struct obd_device *obd, obd_count len, void *buf) +static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct lprocfs_static_vars lvars; - struct lustre_cfg *lcfg = buf; struct lov_desc *desc; struct lov_obd *lov = &obd->u.lov; int count; @@ -795,17 +789,17 @@ static int lov_cleanup(struct obd_device *obd) disconnect. */ CERROR("lov tgt %d not cleaned!" " deathrow=%d, lovrc=%d\n", - i, lov->lov_death_row, + i, lov->lov_death_row, atomic_read(&lov->lov_refcount)); lov_del_target(obd, i, 0, 0); } } - OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) * + OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) * lov->lov_tgt_size); lov->lov_tgt_size = 0; } - - if (lov->lov_qos.lq_rr_size) + + if (lov->lov_qos.lq_rr_size) OBD_FREE(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size); RETURN(0); @@ -846,12 +840,12 @@ static int lov_process_config(struct obd_device *obd, obd_count len, void *buf) case LCFG_PARAM: { struct lprocfs_static_vars lvars; struct lov_desc *desc = &(obd->u.lov.desc); - + if (!desc) GOTO(out, rc = -EINVAL); - + lprocfs_init_vars(lov, &lvars); - + rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars, lcfg, obd); GOTO(out, rc); @@ -885,7 +879,7 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, lov = &export->exp_obd->u.lov; - tmp_oa = obdo_alloc(); + OBDO_ALLOC(tmp_oa); if (tmp_oa == NULL) RETURN(-ENOMEM); @@ -916,14 +910,14 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, if (ost_uuid && !obd_uuid_equals(ost_uuid, &tgt->ltd_uuid)) continue; - CDEBUG(D_CONFIG,"Clear orphans for %d:%s\n", i, + CDEBUG(D_CONFIG,"Clear orphans for %d:%s\n", i, obd_uuid2str(ost_uuid)); memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); LASSERT(lov->lov_tgts[i]->ltd_exp); /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ - err = obd_create(lov->lov_tgts[i]->ltd_exp, + err = obd_create(lov->lov_tgts[i]->ltd_exp, tmp_oa, &obj_mdp, oti); if (err) /* This export will be disabled until it is recovered, @@ -936,7 +930,7 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, } lov_putref(export->exp_obd); - obdo_free(tmp_oa); + OBDO_FREE(tmp_oa); RETURN(rc); } @@ -1119,7 +1113,7 @@ static int lov_getattr(struct obd_export *exp, struct obd_info *oinfo) req = list_entry(pos, struct lov_request, rq_link); CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, + "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, req->rq_oi.oi_oa->o_id, req->rq_idx); rc = obd_getattr(lov->lov_tgts[req->rq_idx]->ltd_exp, @@ -1140,7 +1134,7 @@ static int lov_getattr(struct obd_export *exp, struct obd_info *oinfo) RETURN(rc); } -static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, +static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) { struct lov_request_set *lovset = (struct lov_request_set *)data; @@ -1177,14 +1171,14 @@ static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, RETURN(rc); CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n", - oinfo->oi_md->lsm_object_id, oinfo->oi_md->lsm_stripe_count, + oinfo->oi_md->lsm_object_id, oinfo->oi_md->lsm_stripe_count, oinfo->oi_md->lsm_stripe_size); list_for_each (pos, &lovset->set_list) { req = list_entry(pos, struct lov_request, rq_link); CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, + "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, req->rq_oi.oi_oa->o_id, req->rq_idx); rc = obd_getattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, &req->rq_oi, rqset); @@ -1228,11 +1222,11 @@ static int lov_setattr(struct obd_export *exp, struct obd_info *oinfo, RETURN(-ENODEV); /* for now, we only expect the following updates here */ - LASSERT(!(oinfo->oi_oa->o_valid & ~(OBD_MD_FLID | OBD_MD_FLTYPE | - OBD_MD_FLMODE | OBD_MD_FLATIME | + LASSERT(!(oinfo->oi_oa->o_valid & ~(OBD_MD_FLID | OBD_MD_FLTYPE | + OBD_MD_FLMODE | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLFLAGS | OBD_MD_FLSIZE | - OBD_MD_FLGROUP | OBD_MD_FLUID | + OBD_MD_FLFLAGS | OBD_MD_FLSIZE | + OBD_MD_FLGROUP | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLINLINE | OBD_MD_FLFID | OBD_MD_FLGENER))); lov = &exp->exp_obd->u.lov; @@ -1243,13 +1237,13 @@ static int lov_setattr(struct obd_export *exp, struct obd_info *oinfo, list_for_each (pos, &set->set_list) { req = list_entry(pos, struct lov_request, rq_link); - rc = obd_setattr(lov->lov_tgts[req->rq_idx]->ltd_exp, + rc = obd_setattr(lov->lov_tgts[req->rq_idx]->ltd_exp, &req->rq_oi, NULL); err = lov_update_setattr_set(set, req, rc); if (err) { CERROR("error: setattr objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", - set->set_oi->oi_oa->o_id, + set->set_oi->oi_oa->o_id, req->rq_oi.oi_oa->o_id, req->rq_idx, err); if (!rc) rc = err; @@ -1410,7 +1404,8 @@ static int lov_punch(struct obd_export *exp, struct obd_info *oinfo, } static int lov_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, obd_off start, obd_off end) + struct lov_stripe_md *lsm, obd_off start, obd_off end, + void *capa) { struct lov_request_set *set; struct obd_info oinfo; @@ -1433,10 +1428,10 @@ static int lov_sync(struct obd_export *exp, struct obdo *oa, list_for_each (pos, &set->set_list) { req = list_entry(pos, struct lov_request, rq_link); - rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_oa, NULL, + rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp, + req->rq_oi.oi_oa, NULL, req->rq_oi.oi_policy.l_extent.start, - req->rq_oi.oi_policy.l_extent.end); + req->rq_oi.oi_policy.l_extent.end, capa); err = lov_update_common_set(set, req, rc); if (err) { CERROR("error: fsync objid "LPX64" subobj "LPX64 @@ -1614,6 +1609,8 @@ static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa) lap->lap_caller_ops->ap_fill_obdo(lap->lap_caller_data, cmd, oa); /* XXX woah, shouldn't we be altering more here? size? */ oa->o_id = lap->lap_loi_id; + oa->o_gr = lap->lap_loi_gr; + oa->o_valid |= OBD_MD_FLGROUP; oa->o_stripe_idx = lap->lap_stripe; } @@ -1636,12 +1633,19 @@ static int lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc) return rc; } +static struct obd_capa *lov_ap_lookup_capa(void *data, int cmd) +{ + struct lov_async_page *lap = LAP_FROM_COOKIE(data); + return lap->lap_caller_ops->ap_lookup_capa(lap->lap_caller_data, cmd); +} + static struct obd_async_page_ops lov_async_page_ops = { .ap_make_ready = lov_ap_make_ready, .ap_refresh_count = lov_ap_refresh_count, .ap_fill_obdo = lov_ap_fill_obdo, .ap_update_obdo = lov_ap_update_obdo, .ap_completion = lov_ap_completion, + .ap_lookup_capa = lov_ap_lookup_capa, }; int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, @@ -1657,12 +1661,12 @@ int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, if (!page) { int i = 0; /* Find an existing osc so we can get it's stupid sizeof(*oap). - Only because of this layering limitation will a client + Only because of this layering limitation will a client mount with no osts fail */ - while (!lov->lov_tgts || !lov->lov_tgts[i] || + while (!lov->lov_tgts || !lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp) { i++; - if (i >= lov->desc.ld_tgt_count) + if (i >= lov->desc.ld_tgt_count) RETURN(-ENOMEDIUM); } rc = size_round(sizeof(*lap)) + @@ -1685,7 +1689,9 @@ int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, /* so the callback doesn't need the lsm */ lap->lap_loi_id = loi->loi_id; - + lap->lap_loi_gr = lsm->lsm_object_gr; + LASSERT(lsm->lsm_object_gr > 0); + lap->lap_sub_cookie = (void *)lap + size_round(sizeof(*lap)); rc = obd_prep_async_page(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, @@ -1787,7 +1793,7 @@ static int lov_trigger_group_io(struct obd_export *exp, for (i = 0; i < lsm->lsm_stripe_count; i++) { loi = lsm->lsm_oinfo[i]; - if (!lov->lov_tgts[loi->loi_ost_idx] || + if (!lov->lov_tgts[loi->loi_ost_idx] || !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); continue; @@ -1929,7 +1935,6 @@ static int lov_change_cbdata(struct obd_export *exp, void *data) { struct lov_obd *lov; - struct lov_oinfo *loi; int rc = 0, i; ENTRY; @@ -1938,12 +1943,20 @@ static int lov_change_cbdata(struct obd_export *exp, if (!exp || !exp->exp_obd) RETURN(-ENODEV); + LASSERT(lsm->lsm_object_gr > 0); + lov = &exp->exp_obd->u.lov; for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_stripe_md submd; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - loi = lsm->lsm_oinfo[i]; + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); + continue; + } + submd.lsm_object_id = loi->loi_id; + submd.lsm_object_gr = lsm->lsm_object_gr; submd.lsm_stripe_count = 0; rc = obd_change_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, &submd, it, data); @@ -1968,6 +1981,7 @@ static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, if (!exp || !exp->exp_obd) RETURN(-ENODEV); + LASSERT(lsm->lsm_object_gr > 0); LASSERT(lockh); lov = &exp->exp_obd->u.lov; rc = lov_prep_cancel_set(exp, &oinfo, lsm, mode, lockh, &set); @@ -1996,10 +2010,10 @@ static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, } static int lov_cancel_unused(struct obd_export *exp, - struct lov_stripe_md *lsm, int flags, void *opaque) + struct lov_stripe_md *lsm, + int flags, void *opaque) { struct lov_obd *lov; - struct lov_oinfo *loi; int rc = 0, i; ENTRY; @@ -2023,16 +2037,22 @@ static int lov_cancel_unused(struct obd_export *exp, ASSERT_LSM_MAGIC(lsm); + LASSERT(lsm->lsm_object_gr > 0); for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_stripe_md submd; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; int err; - loi = lsm->lsm_oinfo[i]; - if (!lov->lov_tgts[loi->loi_ost_idx] || - !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx); + continue; + } + + if (!lov->lov_tgts[loi->loi_ost_idx]->ltd_active) CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); submd.lsm_object_id = loi->loi_id; + submd.lsm_object_gr = lsm->lsm_object_gr; submd.lsm_stripe_count = 0; err = obd_cancel_unused(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, &submd, flags, opaque); @@ -2051,7 +2071,6 @@ static int lov_join_lru(struct obd_export *exp, struct lov_stripe_md *lsm, int join) { struct lov_obd *lov; - struct lov_oinfo *loi; int i, count = 0; ENTRY; @@ -2062,14 +2081,19 @@ static int lov_join_lru(struct obd_export *exp, lov = &exp->exp_obd->u.lov; for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_stripe_md submd; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; int rc = 0; - loi = lsm->lsm_oinfo[i]; - if (!lov->lov_tgts[loi->loi_ost_idx] || - !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx); + continue; + } + + if (!lov->lov_tgts[loi->loi_ost_idx]->ltd_active) CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); submd.lsm_object_id = loi->loi_id; + submd.lsm_object_gr = lsm->lsm_object_gr; submd.lsm_stripe_count = 0; rc = obd_join_lru(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, &submd, join); @@ -2108,7 +2132,7 @@ static int lov_statfs_async(struct obd_device *obd, struct obd_info *oinfo, struct lov_obd *lov; int rc = 0; ENTRY; - + LASSERT(oinfo != NULL); LASSERT(oinfo->oi_osfs != NULL); @@ -2159,7 +2183,7 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, continue; } - err = obd_statfs(class_exp2obd(lov->lov_tgts[i]->ltd_exp), + err = obd_statfs(class_exp2obd(lov->lov_tgts[i]->ltd_exp), &lov_sfs, max_age); if (err) { if (lov->lov_tgts[i]->ltd_active && !rc) @@ -2221,7 +2245,7 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, genp = (__u32 *)data->ioc_inlbuf3; /* the uuid will be empty for deleted OSTs */ for (i = 0; i < count; i++, uuidp++, genp++) { - if (!lov->lov_tgts[i]) + if (!lov->lov_tgts[i]) continue; *uuidp = lov->lov_tgts[i]->ltd_uuid; *genp = lov->lov_tgts[i]->ltd_gen; @@ -2323,7 +2347,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp == data->lock->l_conn_export && loi->loi_id == res_id->name[0] && - loi->loi_gr == res_id->name[1]) { + loi->loi_gr == res_id->name[2]) { *stripe = i; GOTO(out, rc = 0); } @@ -2362,8 +2386,9 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, { struct obd_device *obddev = class_exp2obd(exp); struct lov_obd *lov = &obddev->u.lov; - int i, rc = 0, err, incr = 0, check_uuid = 0, do_inactive = 0; + int i, rc = 0, err; int no_set = !set; + int incr = 0, check_uuid = 0, do_inactive = 0; ENTRY; if (no_set) { @@ -2380,12 +2405,10 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, do_inactive = 1; } else if (KEY_IS("checksum")) { do_inactive = 1; - } else if (KEY_IS("mds_conn") || KEY_IS("unlinked")) { + } else if (KEY_IS("unlinked")) { check_uuid = val ? 1 : 0; } else if (KEY_IS("evict_by_nid")) { - /* use defaults: - do_inactive = incr = 0; - */ + /* use defaults: do_inactive = incr = 0; */ } lov_getref(obddev); @@ -2399,16 +2422,34 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, if (!lov->lov_tgts[i]->ltd_active && !do_inactive) continue; - /* Only want a specific OSC */ - if (check_uuid && - !obd_uuid_equals(val, &lov->lov_tgts[i]->ltd_uuid)) - continue; + if (KEY_IS(KEY_MDS_CONN)) { + struct mds_group_info *mgi; - err = obd_set_info_async(lov->lov_tgts[i]->ltd_exp, + LASSERT(vallen == sizeof(*mgi)); + mgi = (struct mds_group_info *)val; + + /* Only want a specific OSC */ + if (mgi->uuid && !obd_uuid_equals(mgi->uuid, + &lov->lov_tgts[i]->ltd_uuid)) + continue; + + err = obd_set_info_async(lov->lov_tgts[i]->ltd_exp, + keylen, key, sizeof(int), + &mgi->group, set); + } else { + /* Only want a specific OSC */ + if (check_uuid && + !obd_uuid_equals(val, &lov->lov_tgts[i]->ltd_uuid)) + continue; + + err = obd_set_info_async(lov->lov_tgts[i]->ltd_exp, keylen, key, vallen, val, set); + } + if (!rc) rc = err; } + lov_putref(obddev); if (no_set) { err = ptlrpc_set_wait(set); @@ -2436,12 +2477,11 @@ static int lov_checkmd(struct obd_export *exp, struct obd_export *md_exp, int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm) { - struct lov_oinfo *loi; int i, rc = 0; ENTRY; for (i = 0; i < lsm->lsm_stripe_count; i++) { - loi = lsm->lsm_oinfo[i]; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; if (loi->loi_ar.ar_rc && !rc) rc = loi->loi_ar.ar_rc; loi->loi_ar.ar_rc = 0; @@ -2454,15 +2494,16 @@ EXPORT_SYMBOL(lov_test_and_clear_async_rc); static int lov_extent_calc(struct obd_export *exp, struct lov_stripe_md *lsm, int cmd, __u64 *offset) { + __u32 ssize = lsm->lsm_stripe_size; __u64 start; - __u32 ssize = lsm->lsm_stripe_size; start = *offset; do_div(start, ssize); start = start * ssize; CDEBUG(D_DLMTRACE, "offset "LPU64", stripe %u, start "LPU64 - ", end "LPU64"\n", *offset, ssize, start, start + ssize - 1); + ", end "LPU64"\n", *offset, ssize, start, + start + ssize - 1); if (cmd == OBD_CALC_STRIPE_END) { *offset = start + ssize - 1; } else if (cmd == OBD_CALC_STRIPE_START) { @@ -2648,8 +2689,8 @@ int __init lov_init(void) quota_interface = PORTAL_SYMBOL_GET(lov_quota_interface); init_obd_quota_ops(quota_interface, &lov_obd_ops); - rc = class_register_type(&lov_obd_ops, lvars.module_vars, - LUSTRE_LOV_NAME); + rc = class_register_type(&lov_obd_ops, NULL, lvars.module_vars, + LUSTRE_LOV_NAME, NULL); if (rc) { if (quota_interface) PORTAL_SYMBOL_PUT(lov_quota_interface); @@ -2664,7 +2705,7 @@ int __init lov_init(void) static void /*__exit*/ lov_exit(void) { int rc; - + if (quota_interface) PORTAL_SYMBOL_PUT(lov_quota_interface); diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index f9b5ee3..f0c84bb 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -41,7 +41,7 @@ void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm) { struct lov_ost_data_v1 *lod; int i; - + CDEBUG(level, "objid "LPX64", magic 0x%08x, pattern %#x\n", le64_to_cpu(lmm->lmm_object_id), le32_to_cpu(lmm->lmm_magic), le32_to_cpu(lmm->lmm_pattern)); @@ -60,12 +60,12 @@ void lov_dump_lmm_join(int level, struct lov_mds_md_join *lmmj) { CDEBUG(level, "objid "LPX64", magic 0x%08X, pattern %#X\n", - le64_to_cpu(lmmj->lmmj_md.lmm_object_id), + le64_to_cpu(lmmj->lmmj_md.lmm_object_id), le32_to_cpu(lmmj->lmmj_md.lmm_magic), le32_to_cpu(lmmj->lmmj_md.lmm_pattern)); CDEBUG(level,"stripe_size %u, stripe_count %u extent_count %u \n", le32_to_cpu(lmmj->lmmj_md.lmm_stripe_size), - le32_to_cpu(lmmj->lmmj_md.lmm_stripe_count), + le32_to_cpu(lmmj->lmmj_md.lmm_stripe_count), le32_to_cpu(lmmj->lmmj_extent_count)); } @@ -88,7 +88,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, { struct obd_device *obd = class_exp2obd(exp); struct lov_obd *lov = &obd->u.lov; - struct lov_oinfo *loi; struct lov_mds_md *lmm; int stripe_count = lov->desc.ld_tgt_count; int lmm_size; @@ -136,7 +135,8 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, lmm->lmm_pattern = cpu_to_le32(lsm->lsm_pattern); for (i = 0; i < stripe_count; i++) { - loi = lsm->lsm_oinfo[i]; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; + /* XXX LOV STACKING call down to osc_packmd() to do packing */ LASSERTF(loi->loi_id, "lmm_oid "LPU64" stripe %u/%u idx %u\n", lmm->lmm_object_id, i, stripe_count, loi->loi_ost_idx); @@ -183,7 +183,7 @@ static int lov_verify_lmm(void *lmm, int lmm_bytes, int *stripe_count) return rc; } -int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, +int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern, int magic) { int i, lsm_size; @@ -193,7 +193,7 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, *lsmp = lsm_alloc_plain(stripe_count, &lsm_size); if (!*lsmp) { - CERROR("can't allocate lsmp, stripe_count %d\n", stripe_count); + CERROR("can't allocate lsmp stripe_count %d\n", stripe_count); RETURN(-ENOMEM); } @@ -213,10 +213,10 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, void lov_free_memmd(struct lov_stripe_md **lsmp) { struct lov_stripe_md *lsm = *lsmp; - + LASSERT(lsm_op_find(lsm->lsm_magic) != NULL); lsm_op_find(lsm->lsm_magic)->lsm_free(lsm); - + *lsmp = NULL; } @@ -224,7 +224,7 @@ void lov_free_memmd(struct lov_stripe_md **lsmp) /* Unpack LOV object metadata from disk storage. It is packed in LE byte * order and is opaque to the networking layer. */ -int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, +int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_mds_md *lmm, int lmm_bytes) { struct obd_device *obd = class_exp2obd(exp); @@ -250,14 +250,13 @@ int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, LBUG(); RETURN(lov_stripe_md_size(stripe_count)); } - /* If we are passed an allocated struct but nothing to unpack, free */ if (*lsmp && !lmm) { lov_free_memmd(lsmp); RETURN(0); } - lsm_size = lov_alloc_memmd(lsmp, stripe_count, LOV_PATTERN_RAID0, + lsm_size = lov_alloc_memmd(lsmp, stripe_count, LOV_PATTERN_RAID0, magic); if (lsm_size < 0) RETURN(lsm_size); @@ -276,15 +275,8 @@ int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, RETURN(lsm_size); } -/* Configure object striping information on a new file. - * - * @lmmu is a pointer to a user struct with one or more of the fields set to - * indicate the application preference: lmm_stripe_count, lmm_stripe_size, - * lmm_stripe_offset, and lmm_stripe_pattern. lmm_magic must be LOV_MAGIC. - * @lsmp is a pointer to an in-core stripe MD that needs to be filled in. - */ -int lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, - struct lov_user_md *lump) +static int __lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_user_md *lump) { struct obd_device *obd = class_exp2obd(exp); struct lov_obd *lov = &obd->u.lov; @@ -335,23 +327,44 @@ int lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, } stripe_count = lov_get_stripecnt(lov, lum.lmm_stripe_count); - if ((__u64)lum.lmm_stripe_size * stripe_count > ~0U) { - CDEBUG(D_IOCTL, "stripe width %ux%u exceeds %u bytes\n", - lum.lmm_stripe_size, (int)lum.lmm_stripe_count, ~0U); + if ((__u64)lum.lmm_stripe_size * stripe_count > ~0UL) { + CDEBUG(D_IOCTL, "stripe width %ux%i exeeds %lu bytes\n", + lum.lmm_stripe_size, (int)lum.lmm_stripe_count, ~0UL); RETURN(-EINVAL); } rc = lov_alloc_memmd(lsmp, stripe_count, lum.lmm_pattern, LOV_MAGIC); - if (rc < 0) - RETURN(rc); - - (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lum.lmm_stripe_offset; - (*lsmp)->lsm_stripe_size = lum.lmm_stripe_size; + if (rc >= 0) { + (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lum.lmm_stripe_offset; + (*lsmp)->lsm_stripe_size = lum.lmm_stripe_size; + rc = 0; + } RETURN(0); } +/* Configure object striping information on a new file. + * + * @lmmu is a pointer to a user struct with one or more of the fields set to + * indicate the application preference: lmm_stripe_count, lmm_stripe_size, + * lmm_stripe_offset, and lmm_stripe_pattern. lmm_magic must be LOV_MAGIC. + * @lsmp is a pointer to an in-core stripe MD that needs to be filled in. + */ +int lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_user_md *lump) +{ + int rc; + mm_segment_t seg; + + seg = get_fs(); + set_fs(KERNEL_DS); + + rc = __lov_setstripe(exp, lsmp, lump); + set_fs(seg); + RETURN(rc); +} + int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_user_md *lump) { @@ -401,42 +414,52 @@ int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp, int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_user_md *lump) { + /* + * XXX huge struct allocated on stack. + */ struct lov_user_md lum; struct lov_mds_md *lmmk = NULL; int rc, lmm_size; + mm_segment_t seg; ENTRY; if (!lsm) RETURN(-ENODATA); + /* + * "Switch to kernel segment" to allow copying from kernel space by + * copy_{to,from}_user(). + */ + seg = get_fs(); + set_fs(KERNEL_DS); rc = copy_from_user(&lum, lump, sizeof(lum)); if (rc) - RETURN(-EFAULT); - - if (lum.lmm_magic != LOV_USER_MAGIC) - RETURN(-EINVAL); - - rc = lov_packmd(exp, &lmmk, lsm); - if (rc < 0) - RETURN(rc); - lmm_size = rc; - rc = 0; - - /* FIXME: Bug 1185 - copy fields properly when structs change */ - LASSERT(sizeof(lum) == sizeof(*lmmk)); - LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lmmk->lmm_objects[0])); - - /* User wasn't expecting this many OST entries */ - if (lum.lmm_stripe_count == 0) { - if (copy_to_user(lump, lmmk, sizeof(lum))) - rc = -EFAULT; - } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { - rc = -EOVERFLOW; - } else if (copy_to_user(lump, lmmk, lmm_size)) { rc = -EFAULT; - } - - obd_free_diskmd(exp, &lmmk); + else if (lum.lmm_magic != LOV_USER_MAGIC) + rc = -EINVAL; + else { + rc = lov_packmd(exp, &lmmk, lsm); + if (rc < 0) + RETURN(rc); + lmm_size = rc; + rc = 0; + + /* FIXME: Bug 1185 - copy fields properly when structs change */ + CLASSERT(sizeof lum == sizeof *lmmk); + CLASSERT(sizeof lum.lmm_objects[0] == + sizeof lmmk->lmm_objects[0]); + + /* User wasn't expecting this many OST entries */ + if (lum.lmm_stripe_count == 0) { + if (copy_to_user(lump, lmmk, sizeof lum)) + rc = -EFAULT; + } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { + rc = -EOVERFLOW; + } else if (copy_to_user(lump, lmmk, sizeof lum)) + rc = -EFAULT; + obd_free_diskmd(exp, &lmmk); + } + set_fs(seg); RETURN(rc); } diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index 8bcc7e2..9ab0f3b 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -37,12 +37,13 @@ #include <obd_lov.h> #include "lov_internal.h" + /* #define QOS_DEBUG 1 */ #define D_QOS D_OTHER -#define TGT_BAVAIL(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bavail*\ - lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bsize) -#define TGT_FFREE(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_ffree) +#define TGT_BAVAIL(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bavail * \ + lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bsize) +#define TGT_FFREE(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_ffree) int qos_add_tgt(struct obd_device *obd, __u32 index) @@ -54,7 +55,7 @@ int qos_add_tgt(struct obd_device *obd, __u32 index) ENTRY; /* We only need this QOS struct on MDT, not clients - but we may not - * have registered the LOV's observer yet, so there's no way to know */ + have registered the LOV's observer yet, so there's no way to know */ if (!exp || !exp->exp_connection) { CERROR("Missing connection\n"); RETURN(-ENOTCONN); @@ -63,7 +64,7 @@ int qos_add_tgt(struct obd_device *obd, __u32 index) down_write(&lov->lov_qos.lq_rw_sem); mutex_down(&lov->lov_lock); list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) { - if (obd_uuid_equals(&oss->lqo_uuid, + if (obd_uuid_equals(&oss->lqo_uuid, &exp->exp_connection->c_remote_uuid)) { found++; break; @@ -72,7 +73,7 @@ int qos_add_tgt(struct obd_device *obd, __u32 index) if (!found) { OBD_ALLOC_PTR(oss); - if (!oss) + if (!oss) GOTO(out, rc = -ENOMEM); memcpy(&oss->lqo_uuid, &exp->exp_connection->c_remote_uuid, @@ -81,14 +82,14 @@ int qos_add_tgt(struct obd_device *obd, __u32 index) /* Assume we have to move this one */ list_del(&oss->lqo_oss_list); } - + oss->lqo_ost_count++; lov->lov_tgts[index]->ltd_qos.ltq_oss = oss; /* Add sorted by # of OSTs. Find the first entry that we're bigger than... */ list_for_each_entry(temposs, &lov->lov_qos.lq_oss_list, lqo_oss_list) { - if (oss->lqo_ost_count > temposs->lqo_ost_count) + if (oss->lqo_ost_count > temposs->lqo_ost_count) break; } /* ...and add before it. If we're the first or smallest, temposs @@ -98,7 +99,7 @@ int qos_add_tgt(struct obd_device *obd, __u32 index) lov->lov_qos.lq_dirty = 1; lov->lov_qos.lq_dirty_rr = 1; - CDEBUG(D_QOS, "add tgt %s to OSS %s (%d OSTs)\n", + CDEBUG(D_QOS, "add tgt %s to OSS %s (%d OSTs)\n", obd_uuid2str(&lov->lov_tgts[index]->ltd_uuid), obd_uuid2str(&oss->lqo_uuid), oss->lqo_ost_count); @@ -127,12 +128,12 @@ int qos_del_tgt(struct obd_device *obd, __u32 index) oss->lqo_ost_count--; if (oss->lqo_ost_count == 0) { - CDEBUG(D_QOS, "removing OSS %s\n", + CDEBUG(D_QOS, "removing OSS %s\n", obd_uuid2str(&oss->lqo_uuid)); list_del(&oss->lqo_oss_list); OBD_FREE_PTR(oss); } - + lov->lov_qos.lq_dirty = 1; lov->lov_qos.lq_dirty_rr = 1; out: @@ -140,8 +141,8 @@ out: RETURN(rc); } -/* Recalculate per-object penalties for OSSs and OSTs, - depends on size of each ost in an oss */ +/* Recalculate per-object penalties for OSSs and OSTs, + depends on size of each ost in an oss */ static int qos_calc_ppo(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; @@ -151,10 +152,10 @@ static int qos_calc_ppo(struct obd_device *obd) int rc, i, prio_wide; ENTRY; - if (!lov->lov_qos.lq_dirty) + if (!lov->lov_qos.lq_dirty) GOTO(out, rc = 0); - num_active = lov->desc.ld_active_tgt_count - 1; + num_active = lov->desc.ld_active_tgt_count - 1; if (num_active < 1) GOTO(out, rc = -EAGAIN); @@ -181,7 +182,7 @@ static int qos_calc_ppo(struct obd_device *obd) continue; ba_min = min(temp, ba_min); ba_max = max(temp, ba_max); - + /* Count the number of usable OSS's */ if (lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_bavail == 0) lov->lov_qos.lq_active_oss_count++; @@ -190,10 +191,10 @@ static int qos_calc_ppo(struct obd_device *obd) /* per-OST penalty is prio * TGT_bavail / (num_ost - 1) / 2 */ temp >>= 1; do_div(temp, num_active); - lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj = + lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj = (temp * prio_wide) >> 8; - if (lov->lov_qos.lq_reset == 0) + if (lov->lov_qos.lq_reset == 0) lov->lov_tgts[i]->ltd_qos.ltq_penalty = 0; } @@ -202,30 +203,32 @@ static int qos_calc_ppo(struct obd_device *obd) /* If there's only 1 OSS, we can't penalize it, so instead we have to double the OST penalty */ num_active = 1; - for (i = 0; i < lov->desc.ld_tgt_count; i++) - if (lov->lov_tgts[i]) - lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj <<= 1; + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + if (lov->lov_tgts[i] == NULL) + continue; + lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj <<= 1; + } } - + /* Per-OSS penalty is prio * oss_avail / oss_osts / (num_oss - 1) / 2 */ list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) { temp = oss->lqo_bavail >> 1; do_div(temp, oss->lqo_ost_count * num_active); oss->lqo_penalty_per_obj = (temp * prio_wide) >> 8; - if (lov->lov_qos.lq_reset == 0) + if (lov->lov_qos.lq_reset == 0) oss->lqo_penalty = 0; } lov->lov_qos.lq_dirty = 0; lov->lov_qos.lq_reset = 0; - /* If each ost has almost same free space, + /* If each ost has almost same free space, * do rr allocation for better creation performance */ lov->lov_qos.lq_same_space = 0; temp = ba_max - ba_min; - ba_min = (ba_min * 51) >> 8; /* 51/256 = .20 */ + ba_min = (ba_min * 51) >> 8; /* 51/256 = .20 */ if (temp < ba_min) { - /* Difference is less than 20% */ + /* Difference is less than 20% */ lov->lov_qos.lq_same_space = 1; /* Reset weights for the next time we enter qos mode */ lov->lov_qos.lq_reset = 0; @@ -241,12 +244,12 @@ out: static int qos_calc_weight(struct lov_obd *lov, int i) { __u64 temp, temp2; - + /* Final ost weight = TGT_BAVAIL - ost_penalty - oss_penalty */ temp = TGT_BAVAIL(i); - temp2 = lov->lov_tgts[i]->ltd_qos.ltq_penalty + + temp2 = lov->lov_tgts[i]->ltd_qos.ltq_penalty + lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty; - if (temp < temp2) + if (temp < temp2) lov->lov_tgts[i]->ltd_qos.ltq_weight = 0; else lov->lov_tgts[i]->ltd_qos.ltq_weight = temp - temp2; @@ -264,7 +267,7 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) lov->lov_tgts[index]->ltd_qos.ltq_usable = 0; oss = lov->lov_tgts[index]->ltd_qos.ltq_oss; - + /* Decay old penalty by half (we're adding max penalty, and don't want it to run away.) */ lov->lov_tgts[index]->ltd_qos.ltq_penalty >>= 1; @@ -274,12 +277,12 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) lov->lov_tgts[index]->ltd_qos.ltq_penalty += lov->lov_tgts[index]->ltd_qos.ltq_penalty_per_obj * lov->desc.ld_active_tgt_count; - oss->lqo_penalty += oss->lqo_penalty_per_obj * + oss->lqo_penalty += oss->lqo_penalty_per_obj * lov->lov_qos.lq_active_oss_count; - + /* Decrease all OSS penalties */ list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) { - if (oss->lqo_penalty < oss->lqo_penalty_per_obj) + if (oss->lqo_penalty < oss->lqo_penalty_per_obj) oss->lqo_penalty = 0; else oss->lqo_penalty -= oss->lqo_penalty_per_obj; @@ -288,7 +291,7 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) *total_wt = 0; /* Decrease all OST penalties */ for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) + if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; if (lov->lov_tgts[i]->ltd_qos.ltq_penalty < lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj) @@ -296,7 +299,7 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) else lov->lov_tgts[i]->ltd_qos.ltq_penalty -= lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj; - + qos_calc_weight(lov, i); /* Recalc the total weight of usable osts */ @@ -307,12 +310,12 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) CDEBUG(D_QOS, "recalc tgt %d avail="LPU64 " ostppo="LPU64" ostp="LPU64" ossppo="LPU64 " ossp="LPU64" wt="LPU64"\n", - i, TGT_BAVAIL(i) >> 10, + i, TGT_BAVAIL(i) >> 10, lov->lov_tgts[i]->ltd_qos.ltq_penalty_per_obj >> 10, - lov->lov_tgts[i]->ltd_qos.ltq_penalty >> 10, - lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty_per_obj>>10, - lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty>>10, - lov->lov_tgts[i]->ltd_qos.ltq_weight>>10); + lov->lov_tgts[i]->ltd_qos.ltq_penalty >> 10, + lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty_per_obj >> 10, + lov->lov_tgts[i]->ltd_qos.ltq_oss->lqo_penalty >> 10, + lov->lov_tgts[i]->ltd_qos.ltq_weight >> 10); #endif } @@ -333,12 +336,24 @@ static int qos_calc_rr(struct lov_obd *lov) RETURN(0); } + /* Do actuall allocation. */ down_write(&lov->lov_qos.lq_rw_sem); + + /* + * Check again. While we were sleeping on @lq_rw_sem something could + * change. + */ + if (!lov->lov_qos.lq_dirty_rr) { + LASSERT(lov->lov_qos.lq_rr_size); + up_write(&lov->lov_qos.lq_rw_sem); + RETURN(0); + } + ost_count = lov->desc.ld_tgt_count; - if (lov->lov_qos.lq_rr_size) + if (lov->lov_qos.lq_rr_size) OBD_FREE(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size); - lov->lov_qos.lq_rr_size = ost_count * + lov->lov_qos.lq_rr_size = ost_count * sizeof(lov->lov_qos.lq_rr_array[0]); OBD_ALLOC(lov->lov_qos.lq_rr_array, lov->lov_qos.lq_rr_size); if (!lov->lov_qos.lq_rr_array) { @@ -359,10 +374,11 @@ static int qos_calc_rr(struct lov_obd *lov) list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) { int j = 0; for (i = 0; i < ost_count; i++) { - if (lov->lov_tgts[i] && - (lov->lov_tgts[i]->ltd_qos.ltq_oss == oss)) { + LASSERT(lov->lov_tgts[i] != NULL); + if (lov->lov_tgts[i]->ltd_qos.ltq_oss == oss) { /* Evenly space these OSTs across arrayspace */ int next = j * ost_count / oss->lqo_ost_count; + LASSERT(next < ost_count); while (lov->lov_qos.lq_rr_array[next] != LOV_QOS_EMPTY) next = (next + 1) % ost_count; @@ -379,8 +395,9 @@ static int qos_calc_rr(struct lov_obd *lov) if (placed != real_count) { /* This should never happen */ - LCONSOLE_ERROR("Failed to place all OSTs in the round-robin " - "list (%d of %d).\n", placed, real_count); + LCONSOLE_ERROR_MSG(0x14e, "Failed to place all OSTs in the " + "round-robin list (%d of %d).\n", + placed, real_count); for (i = 0; i < ost_count; i++) { LCONSOLE(D_WARNING, "rr #%d ost idx=%d\n", i, lov->lov_qos.lq_rr_array[i]); @@ -395,7 +412,7 @@ static int qos_calc_rr(struct lov_obd *lov) lov->lov_qos.lq_rr_array[i]); } #endif - + RETURN(0); } @@ -422,7 +439,7 @@ void qos_shrink_lsm(struct lov_request_set *set) } } - CWARN("using fewer stripes for object "LPU64": old %u new %u\n", + CWARN("using fewer stripes for object "LPX64": old %u new %u\n", lsm->lsm_object_id, lsm->lsm_stripe_count, set->set_count); LASSERT(lsm->lsm_stripe_count >= set->set_count); @@ -458,8 +475,8 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req) ost_idx = (req->rq_idx + lsm->lsm_stripe_count) % ost_count; for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { - if (!lov->lov_tgts[ost_idx] || - !lov->lov_tgts[ost_idx]->ltd_active) + if (!lov->lov_tgts[ost_idx] || + !lov->lov_tgts[ost_idx]->ltd_active) continue; /* check if objects has been created on this ost */ for (stripe = 0; stripe < lsm->lsm_stripe_count; stripe++) { @@ -493,7 +510,7 @@ static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt) ENTRY; i = qos_calc_rr(lov); - if (i) + if (i) RETURN(i); if (--lov->lov_start_count <= 0) { @@ -501,7 +518,7 @@ static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt) lov->lov_start_count = (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) + LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U); - } else if (*stripe_cnt >= ost_active_count || + } else if (*stripe_cnt >= ost_active_count || lov->lov_start_idx > ost_count) { /* If we have allocated from all of the OSTs, slowly precess the next start */ @@ -521,18 +538,18 @@ static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt) ost_idx = lov->lov_qos.lq_rr_array[array_idx]; #ifdef QOS_DEBUG CDEBUG(D_QOS, "#%d strt %d act %d strp %d ary %d idx %d\n", - i, lov->lov_start_idx, + i, lov->lov_start_idx, ((ost_idx != LOV_QOS_EMPTY) && lov->lov_tgts[ost_idx]) ? lov->lov_tgts[ost_idx]->ltd_active : 0, idx_pos - idx_arr, array_idx, ost_idx); #endif if ((ost_idx == LOV_QOS_EMPTY) || !lov->lov_tgts[ost_idx] || - !lov->lov_tgts[ost_idx]->ltd_active) + !lov->lov_tgts[ost_idx]->ltd_active) continue; *idx_pos = ost_idx; idx_pos++; /* We have enough stripes */ - if (idx_pos - idx_arr == *stripe_cnt) + if (idx_pos - idx_arr == *stripe_cnt) break; } up_read(&lov->lov_qos.lq_rw_sem); @@ -551,7 +568,7 @@ static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm, ost_idx = lsm->lsm_oinfo[0]->loi_ost_idx; for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { - if (!lov->lov_tgts[ost_idx] || + if (!lov->lov_tgts[ost_idx] || !lov->lov_tgts[ost_idx]->ltd_active) { continue; } @@ -585,19 +602,33 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt) __u32 ost_count; int nfound, good_osts, i, warn = 0, rc = 0; ENTRY; - + lov_getref(exp->exp_obd); + + /* Detect -EAGAIN early, before expensive lock is taken. */ + if (!lov->lov_qos.lq_dirty && lov->lov_qos.lq_same_space) + GOTO(out, rc = -EAGAIN); + + /* Do actuall allocation, use write lock here. */ down_write(&lov->lov_qos.lq_rw_sem); + /* + * Check again, while we were sleeping on @lq_rw_sem things could + * change. + */ + if (!lov->lov_qos.lq_dirty && lov->lov_qos.lq_same_space) { + up_write(&lov->lov_qos.lq_rw_sem); + GOTO(out, rc = -EAGAIN); + } ost_count = lov->desc.ld_tgt_count; - if (lov->desc.ld_active_tgt_count < 2) - GOTO(out, rc = -EAGAIN); + if (lov->desc.ld_active_tgt_count < 2) + GOTO(out_up_write, rc = -EAGAIN); rc = qos_calc_ppo(exp->exp_obd); - if (rc) - GOTO(out, rc); - + if (rc) + GOTO(out_up_write, rc); + total_bavail = 0; good_osts = 0; /* Warn users about zero available space/inode every 30 min */ @@ -606,22 +637,22 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt) /* Find all the OSTs that are valid stripe candidates */ for (i = 0; i < ost_count; i++) { __u64 bavail; - + if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; bavail = TGT_BAVAIL(i); if (!bavail) { if (warn) { - CDEBUG(D_QOS, "no free space on %s\n", - obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid)); + CDEBUG(D_QOS, "no free space on %s\n", + obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid)); last_warn = now; } continue; } if (!TGT_FFREE(i)) { if (warn) { - CDEBUG(D_QOS, "no free inodes on %s\n", - obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid)); + CDEBUG(D_QOS, "no free inodes on %s\n", + obd_uuid2str(&lov->lov_tgts[i]->ltd_uuid)); last_warn = now; } continue; @@ -634,23 +665,22 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt) good_osts++; } - + if (!total_bavail) - GOTO(out, rc = -ENOSPC); - + GOTO(out_up_write, rc = -ENOSPC); + /* if we don't have enough good OSTs, we reduce the stripe count. */ if (good_osts < *stripe_cnt) *stripe_cnt = good_osts; - if (!*stripe_cnt) - GOTO(out, rc = -EAGAIN); - + if (!*stripe_cnt) + GOTO(out_up_write, rc = -EAGAIN); + /* Find enough OSTs with weighted random allocation. */ nfound = 0; while (nfound < *stripe_cnt) { - __u64 rand, cur_weight; + __u64 rand, cur_weight = 0; - cur_weight = 0; rc = -ENODEV; if (total_weight) { @@ -669,7 +699,6 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt) rand |= ll_rand() % (unsigned)total_weight; else rand |= ll_rand(); - #else rand = ((__u64)ll_rand() << 32 | ll_rand()) % total_weight; @@ -703,19 +732,20 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt) } } LASSERT(nfound == *stripe_cnt); - -out: + +out_up_write: up_write(&lov->lov_qos.lq_rw_sem); - + +out: if (rc == -EAGAIN) rc = alloc_rr(lov, idx_arr, stripe_cnt); - + lov_putref(exp->exp_obd); RETURN(rc); } /* return new alloced stripe count on success */ -static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, +static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, int newea, int **idx_arr, int *arr_cnt) { struct lov_obd *lov = &exp->exp_obd->u.lov; @@ -731,8 +761,8 @@ static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, for (i = 0; i < *arr_cnt; i++) tmp_arr[i] = -1; - if (newea || - lsm->lsm_oinfo[0]->loi_ost_idx >= lov->desc.ld_tgt_count) + if (newea || + lsm->lsm_oinfo[0]->loi_ost_idx >= lov->desc.ld_tgt_count) rc = alloc_qos(exp, tmp_arr, &stripe_cnt); else rc = alloc_specific(lov, lsm, tmp_arr); @@ -765,22 +795,23 @@ int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) ENTRY; LASSERT(src_oa->o_valid & OBD_MD_FLID); - + LASSERT(src_oa->o_valid & OBD_MD_FLGROUP); + if (set->set_oi->oi_md == NULL) { int stripe_cnt = lov_get_stripecnt(lov, 0); /* If the MDS file was truncated up to some size, stripe over - * enough OSTs to allow the file to be created at that size. + * enough OSTs to allow the file to be created at that size. * This may mean we use more than the default # of stripes. */ if (src_oa->o_valid & OBD_MD_FLSIZE) { obd_size min_bavail = LUSTRE_STRIPE_MAXBYTES; - - /* Find a small number of stripes we can use + + /* Find a small number of stripes we can use (up to # of active osts). */ stripes = 1; lov_getref(exp->exp_obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i] || + if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; min_bavail = min(min_bavail, TGT_BAVAIL(i)); @@ -796,7 +827,7 @@ int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) stripes = stripe_cnt; } - rc = lov_alloc_memmd(&set->set_oi->oi_md, stripes, + rc = lov_alloc_memmd(&set->set_oi->oi_md, stripes, lov->desc.ld_pattern ? lov->desc.ld_pattern : LOV_PATTERN_RAID0, LOV_MAGIC); @@ -808,6 +839,8 @@ int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) lsm = set->set_oi->oi_md; lsm->lsm_object_id = src_oa->o_id; + lsm->lsm_object_gr = src_oa->o_gr; + if (!lsm->lsm_stripe_size) lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; if (!lsm->lsm_pattern) { @@ -820,7 +853,7 @@ int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) GOTO(out_err, rc = stripes ? stripes : -EIO); LASSERTF(stripes <= lsm->lsm_stripe_count,"requested %d allocated %d\n", lsm->lsm_stripe_count, stripes); - + for (i = 0; i < stripes; i++) { struct lov_request *req; int ost_idx = idx_arr[i]; @@ -836,7 +869,7 @@ int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) if (req->rq_oi.oi_md == NULL) GOTO(out_err, rc = -ENOMEM); - req->rq_oi.oi_oa = obdo_alloc(); + OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) GOTO(out_err, rc = -ENOMEM); @@ -850,13 +883,12 @@ int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) * stripe which holds the existing file size. */ if (src_oa->o_valid & OBD_MD_FLSIZE) { - req->rq_oi.oi_oa->o_size = + req->rq_oi.oi_oa->o_size = lov_size_to_stripe(lsm, src_oa->o_size, i); CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", i, req->rq_oi.oi_oa->o_size, src_oa->o_size); } - } LASSERT(set->set_count == stripes); diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index c7d96a9..fbdefef 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -61,7 +61,7 @@ static void lov_finish_set(struct lov_request_set *set) list_del_init(&req->rq_link); if (req->rq_oi.oi_oa) - obdo_free(req->rq_oi.oi_oa); + OBDO_FREE(req->rq_oi.oi_oa); if (req->rq_oi.oi_md) OBD_FREE(req->rq_oi.oi_md, req->rq_buflen); if (req->rq_oi.oi_osfs) @@ -174,7 +174,7 @@ int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc) struct lov_obd *lov = &exp->exp_obd->u.lov; memset(lov_lockhp, 0, sizeof(*lov_lockhp)); - if (lov->lov_tgts[req->rq_idx] && + if (lov->lov_tgts[req->rq_idx] && lov->lov_tgts[req->rq_idx]->ltd_active) { if (rc != -EINTR) CERROR("enqueue objid "LPX64" subobj " @@ -267,7 +267,6 @@ int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_obd *lov = &exp->exp_obd->u.lov; struct lov_request_set *set; int i, rc = 0; - struct lov_oinfo *loi; ENTRY; OBD_ALLOC(set, sizeof(*set)); @@ -284,6 +283,7 @@ int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo, oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie; for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) { + struct lov_oinfo *loi; struct lov_request *req; obd_off start, end; @@ -334,6 +334,7 @@ int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo, /* XXX LOV STACKING: submd should be from the subobj */ req->rq_oi.oi_md->lsm_object_id = loi->loi_id; + req->rq_oi.oi_md->lsm_object_gr = oinfo->oi_md->lsm_object_gr; req->rq_oi.oi_md->lsm_stripe_count = 0; req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid = loi->loi_kms_valid; @@ -392,7 +393,6 @@ int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_obd *lov = &exp->exp_obd->u.lov; struct lov_request_set *set; int i, rc = 0; - struct lov_oinfo *loi; ENTRY; OBD_ALLOC(set, sizeof(*set)); @@ -409,6 +409,7 @@ int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo, lockh->cookie = set->set_lockh->llh_handle.h_cookie; for (i = 0; i < lsm->lsm_stripe_count; i++){ + struct lov_oinfo *loi; struct lov_request *req; obd_off start, end; @@ -444,6 +445,7 @@ int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo, /* XXX LOV STACKING: submd should be from the subobj */ req->rq_oi.oi_md->lsm_object_id = loi->loi_id; + req->rq_oi.oi_md->lsm_object_gr = lsm->lsm_object_gr; req->rq_oi.oi_md->lsm_stripe_count = 0; lov_set_add_req(req, set); @@ -482,7 +484,6 @@ int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo, { struct lov_request_set *set; int i, rc = 0; - struct lov_oinfo *loi; ENTRY; OBD_ALLOC(set, sizeof(*set)); @@ -503,8 +504,8 @@ int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo, for (i = 0; i < lsm->lsm_stripe_count; i++){ struct lov_request *req; struct lustre_handle *lov_lockhp; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - loi = lsm->lsm_oinfo[i]; lov_lockhp = set->set_lockh->llh_handles + i; if (!lustre_handle_is_used(lov_lockhp)) { CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n", @@ -528,6 +529,7 @@ int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo, /* XXX LOV STACKING: submd should be from the subobj */ req->rq_oi.oi_md->lsm_object_id = loi->loi_id; + req->rq_oi.oi_md->lsm_object_gr = lsm->lsm_object_gr; req->rq_oi.oi_md->lsm_stripe_count = 0; lov_set_add_req(req, set); @@ -585,7 +587,7 @@ static int create_done(struct obd_export *exp, struct lov_request_set *set, qos_shrink_lsm(set); } - ret_oa = obdo_alloc(); + OBDO_ALLOC(ret_oa); if (ret_oa == NULL) GOTO(cleanup, rc = -ENOMEM); @@ -603,8 +605,10 @@ static int create_done(struct obd_export *exp, struct lov_request_set *set, LBUG(); } ret_oa->o_id = src_oa->o_id; + ret_oa->o_gr = src_oa->o_gr; + ret_oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP; memcpy(src_oa, ret_oa, sizeof(*src_oa)); - obdo_free(ret_oa); + OBDO_FREE(ret_oa); *lsmp = set->set_oi->oi_md; GOTO(done, rc = 0); @@ -688,6 +692,7 @@ int lov_update_create_set(struct lov_request_set *set, oti->oti_objid[req->rq_idx] = req->rq_oi.oi_oa->o_id; loi->loi_id = req->rq_oi.oi_oa->o_id; + loi->loi_gr = req->rq_oi.oi_oa->o_gr; loi->loi_ost_idx = req->rq_idx; CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPU64" at idx %d\n", lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx); @@ -745,7 +750,7 @@ static int common_attr_done(struct lov_request_set *set) if (!set->set_success) RETURN(-EIO); - tmp_oa = obdo_alloc(); + OBDO_ALLOC(tmp_oa); if (tmp_oa == NULL) GOTO(out, rc = -ENOMEM); @@ -768,7 +773,7 @@ static int common_attr_done(struct lov_request_set *set) memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa)); out: if (tmp_oa) - obdo_free(tmp_oa); + OBDO_FREE(tmp_oa); RETURN(rc); } @@ -825,7 +830,6 @@ int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo, obd_count off; } *info = NULL; struct lov_request_set *set; - struct lov_oinfo *loi = NULL; struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i, shift; ENTRY; @@ -855,12 +859,13 @@ int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo, /* alloc and initialize lov request */ shift = 0; - for (i = 0 ; i < oinfo->oi_md->lsm_stripe_count; i++){ + for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){ + struct lov_oinfo *loi = NULL; struct lov_request *req; if (info[i].count == 0) continue; - + loi = oinfo->oi_md->lsm_oinfo[i]; if (!lov->lov_tgts[loi->loi_ost_idx] || !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) { @@ -872,7 +877,7 @@ int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo, if (req == NULL) GOTO(out, rc = -ENOMEM); - req->rq_oi.oi_oa = obdo_alloc(); + OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) { OBD_FREE(req, sizeof(*req)); GOTO(out, rc = -ENOMEM); @@ -888,7 +893,7 @@ int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo, req->rq_buflen = sizeof(*req->rq_oi.oi_md); OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen); if (req->rq_oi.oi_md == NULL) { - obdo_free(req->rq_oi.oi_oa); + OBDO_FREE(req->rq_oi.oi_oa); OBD_FREE(req, sizeof(*req)); GOTO(out, rc = -ENOMEM); } @@ -906,6 +911,8 @@ int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo, /* remember the index for sort brw_page array */ info[i].index = req->rq_pgaidx; + req->rq_oi.oi_capa = oinfo->oi_capa; + lov_set_add_req(req, set); } if (!set->set_count) @@ -964,7 +971,6 @@ int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request_set **reqset) { struct lov_request_set *set; - struct lov_oinfo *loi = NULL; struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i; ENTRY; @@ -978,6 +984,7 @@ int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo, set->set_oi = oinfo; for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) { + struct lov_oinfo *loi; struct lov_request *req; loi = oinfo->oi_md->lsm_oinfo[i]; @@ -994,7 +1001,7 @@ int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo, req->rq_stripe = i; req->rq_idx = loi->loi_ost_idx; - req->rq_oi.oi_oa = obdo_alloc(); + OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) { OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); @@ -1003,6 +1010,8 @@ int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo, sizeof(*req->rq_oi.oi_oa)); req->rq_oi.oi_oa->o_id = loi->loi_id; req->rq_oi.oi_cb_up = cb_getattr_update; + req->rq_oi.oi_capa = oinfo->oi_capa; + req->rq_rqset = set; lov_set_add_req(req, set); } @@ -1038,7 +1047,6 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request_set **reqset) { struct lov_request_set *set; - struct lov_oinfo *loi = NULL; struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i; ENTRY; @@ -1057,6 +1065,7 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo, set->set_cookies = oti->oti_logcookies; for (i = 0; i < lsm->lsm_stripe_count; i++) { + struct lov_oinfo *loi; struct lov_request *req; loi = lsm->lsm_oinfo[i]; @@ -1073,7 +1082,7 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo, req->rq_stripe = i; req->rq_idx = loi->loi_ost_idx; - req->rq_oi.oi_oa = obdo_alloc(); + OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) { OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); @@ -1152,7 +1161,6 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request_set **reqset) { struct lov_request_set *set; - struct lov_oinfo *loi = NULL; struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i; ENTRY; @@ -1169,9 +1177,9 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, set->set_cookies = oti->oti_logcookies; for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) { + struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i]; struct lov_request *req; - loi = oinfo->oi_md->lsm_oinfo[i]; if (!lov->lov_tgts[loi->loi_ost_idx] || !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); @@ -1184,7 +1192,7 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, req->rq_stripe = i; req->rq_idx = loi->loi_ost_idx; - req->rq_oi.oi_oa = obdo_alloc(); + OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) { OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); @@ -1192,8 +1200,11 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, memcpy(req->rq_oi.oi_oa, oinfo->oi_oa, sizeof(*req->rq_oi.oi_oa)); req->rq_oi.oi_oa->o_id = loi->loi_id; + LASSERT(!(req->rq_oi.oi_oa->o_valid & OBD_MD_FLGROUP) + || req->rq_oi.oi_oa->o_gr>0); req->rq_oi.oi_oa->o_stripe_idx = i; req->rq_oi.oi_cb_up = cb_setattr_update; + req->rq_oi.oi_capa = oinfo->oi_capa; req->rq_rqset = set; if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) { @@ -1241,7 +1252,7 @@ int lov_fini_punch_set(struct lov_request_set *set) } int lov_update_punch_set(struct lov_request_set *set, - struct lov_request *req, int rc) + struct lov_request *req, int rc) { struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov; struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md; @@ -1282,7 +1293,6 @@ int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request_set **reqset) { struct lov_request_set *set; - struct lov_oinfo *loi = NULL; struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i; ENTRY; @@ -1296,10 +1306,10 @@ int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo, set->set_exp = exp; for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) { + struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i]; struct lov_request *req; obd_off rs, re; - loi = oinfo->oi_md->lsm_oinfo[i]; if (!lov->lov_tgts[loi->loi_ost_idx] || !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); @@ -1318,7 +1328,7 @@ int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo, req->rq_stripe = i; req->rq_idx = loi->loi_ost_idx; - req->rq_oi.oi_oa = obdo_alloc(); + OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) { OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); @@ -1326,6 +1336,9 @@ int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo, memcpy(req->rq_oi.oi_oa, oinfo->oi_oa, sizeof(*req->rq_oi.oi_oa)); req->rq_oi.oi_oa->o_id = loi->loi_id; + req->rq_oi.oi_oa->o_gr = loi->loi_gr; + req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP; + req->rq_oi.oi_oa->o_stripe_idx = i; req->rq_oi.oi_cb_up = cb_update_punch; req->rq_rqset = set; @@ -1334,6 +1347,8 @@ int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo, req->rq_oi.oi_policy.l_extent.end = re; req->rq_oi.oi_policy.l_extent.gid = -1; + req->rq_oi.oi_capa = oinfo->oi_capa; + lov_set_add_req(req, set); } if (!set->set_count) @@ -1371,7 +1386,6 @@ int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request_set **reqset) { struct lov_request_set *set; - struct lov_oinfo *loi = NULL; struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i; ENTRY; @@ -1387,10 +1401,10 @@ int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo, set->set_oi->oi_oa = src_oa; for (i = 0; i < lsm->lsm_stripe_count; i++) { + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; struct lov_request *req; obd_off rs, re; - loi = lsm->lsm_oinfo[i]; if (!lov->lov_tgts[loi->loi_ost_idx] || !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); @@ -1406,7 +1420,7 @@ int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo, req->rq_stripe = i; req->rq_idx = loi->loi_ost_idx; - req->rq_oi.oi_oa = obdo_alloc(); + OBDO_ALLOC(req->rq_oi.oi_oa); if (req->rq_oi.oi_oa == NULL) { OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 874798c..6e41caf 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -304,7 +304,7 @@ static int lov_target_seq_open(struct inode *inode, struct file *file) struct proc_dir_entry *dp = PDE(inode); struct seq_file *seq; int rc; - + LPROCFS_ENTRY_AND_CHECK(dp); rc = seq_open(file, &lov_tgt_sops); if (rc) { diff --git a/lustre/lvfs/.cvsignore b/lustre/lvfs/.cvsignore index fe3989f..d18b255 100644 --- a/lustre/lvfs/.cvsignore +++ b/lustre/lvfs/.cvsignore @@ -15,5 +15,5 @@ autoMakefile .tmp_versions .depend sources -fsfilt_ldiskfs* -fsfilt-ldiskfs* +fsfilt_ldiskfs.* +fsfilt-ldiskfs.* diff --git a/lustre/lvfs/Makefile.in b/lustre/lvfs/Makefile.in index 6a8feb5..afa2511 100644 --- a/lustre/lvfs/Makefile.in +++ b/lustre/lvfs/Makefile.in @@ -1,6 +1,5 @@ MODULES := lvfs @SERVER_TRUE@MODULES += fsfilt_@BACKINGFS@ -@LDISKFS_TRUE@MODULES += fsfilt_ldiskfs2 @QUOTA_TRUE@MODULES += quotafmt_test lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o upcall_cache.o @@ -10,13 +9,12 @@ lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o upcall_cache.o ifeq ($(PATCHLEVEL),6) fsfilt_@BACKINGFS@-objs := fsfilt-@BACKINGFS@.o -fsfilt_ldiskfs2-objs := fsfilt-ldiskfs2.o $(obj)/fsfilt-%.c: $(obj)/fsfilt_%.c ln -s $< $@ endif # for <ext3/xattr.h> on 2.6 -EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs -I@LUSTRE@/ldiskfs2 +EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs @INCLUDE_RULES@ diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am index 77faaf2..e923452 100644 --- a/lustre/lvfs/autoMakefile.am +++ b/lustre/lvfs/autoMakefile.am @@ -22,23 +22,13 @@ modulefs_DATA := lvfs$(KMODEXT) if SERVER modulefs_DATA += fsfilt_$(BACKINGFS)$(KMODEXT) -if LDISKFS - -modulefs_DATA += fsfilt_ldiskfs2$(KMODEXT) -sources: fsfilt_$(BACKINGFS).c fsfilt_ldiskfs2.c - touch sources - -else #LDISKFS - sources: fsfilt_$(BACKINGFS).c touch sources -endif #LDISKFS - else #SERVER sources: -endif #SERVER +endif fsfilt_extN.c: fsfilt_ext3.c sed -e "s/EXT3/EXTN/g" -e "s/ext3/extN/g" $< > $@ @@ -51,18 +41,8 @@ ldiskfs_sed_flags = \ -e "s/rsv_window_add/ext3_rsv_window_add/g" \ -e "s/EXT3/LDISKFS/g" -e "s/ext3/ldiskfs/g" -ldiskfs2_sed_flags = \ - -e "s/dx_hash_info/ext3_dx_hash_info/g" \ - -e "s/dir_private_info/ext3_dir_private_info/g" \ - -e "s/DX_HASH/EXT3_DX_HASH/g" \ - -e "s/reserve_window/ext3_reserve_window/g" \ - -e "s/rsv_window_add/ext3_rsv_window_add/g" \ - -e "s/EXT3/LDISKFS2/g" -e "s/ext3/ldiskfs2/g" - fsfilt_ldiskfs.c: fsfilt_ext3.c sed $(strip $(ldiskfs_sed_flags)) $< > $@ -fsfilt_ldiskfs2.c: fsfilt_ext3.c - sed $(strip $(ldiskfs2_sed_flags)) $< > $@ fsfilt_ldiskfs_quota.h: fsfilt_ext3_quota.h sed $(strip $(ldiskfs_sed_flags)) $< > $@ diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index ad0d2e4..6f84809 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -39,20 +39,13 @@ #include <linux/quota.h> #include <linux/quotaio_v1.h> #include <linux/quotaio_v2.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/ext3_xattr.h> -#else #include <ext3/xattr.h> -#endif #include <libcfs/kp30.h> #include <lustre_fsfilt.h> #include <obd.h> #include <obd_class.h> #include <lustre_quota.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/iobuf.h> -#endif #include <linux/lustre_compat25.h> #include <linux/lprocfs_status.h> @@ -68,7 +61,10 @@ #define FSFILT_DELETE_TRANS_BLOCKS(sb) EXT3_DELETE_TRANS_BLOCKS(sb) #endif -static kmem_cache_t *fcb_cache; +#define fsfilt_ext3_journal_start(inode, nblocks) ext3_journal_start(inode, nblocks) +#define fsfilt_ext3_journal_stop(handle) ext3_journal_stop(handle) + +static cfs_mem_cache_t *fcb_cache; struct fsfilt_cb_data { struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */ @@ -125,6 +121,28 @@ static char *fsfilt_ext3_uuid(struct super_block *sb) return EXT3_SB(sb)->s_es->s_uuid; } +#ifdef HAVE_DISK_INODE_VERSION +/* + * Get the 64-bit version for an inode. + */ +static __u64 fsfilt_ext3_get_version(struct inode *inode) +{ + return EXT3_I(inode)->i_fs_version; +} + +/* + * Set the 64-bit version and return the old version. + */ +static __u64 fsfilt_ext3_set_version(struct inode *inode, __u64 new_version) +{ + __u64 old_version = EXT3_I(inode)->i_fs_version; + + (EXT3_I(inode))->i_fs_version = new_version; + return old_version; +} + +#endif + /* * We don't currently need any additional blocks for rmdir and * unlink transactions because we are storing the OST oa_id inside @@ -207,11 +225,11 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb) * logs; break; case FSFILT_OP_JOIN: - /* delete 2 file(file + array id) + create 1 file (array id) + /* delete 2 file(file + array id) + create 1 file (array id) * create/update logs for each stripe */ nblocks += 2 * FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb); - - /*create array log for head file*/ + + /*create array log for head file*/ nblocks += 3; nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + EXT3_SINGLEDATA_TRANS_BLOCKS); @@ -227,14 +245,14 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, journal = EXT3_SB(inode->i_sb)->s_journal; if (nblocks > journal->j_max_transaction_buffers) { CWARN("too many credits %d for op %ux%u using %d instead\n", - nblocks, op, logs, journal->j_max_transaction_buffers); + nblocks, op, logs, journal->j_max_transaction_buffers); nblocks = journal->j_max_transaction_buffers; } journal_start: LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks); lock_24kernel(); - handle = journal_start(EXT3_JOURNAL(inode), nblocks); + handle = fsfilt_ext3_journal_start(inode, nblocks); unlock_24kernel(); if (!IS_ERR(handle)) @@ -373,7 +391,7 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, LASSERTF(needed > 0, "can't start %d credit transaction\n", needed); lock_24kernel(); - handle = journal_start(journal, needed); + handle = fsfilt_ext3_journal_start(fso->fso_dentry->d_inode, needed); unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't get handle for %d credits: rc = %ld\n", needed, @@ -415,7 +433,7 @@ static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync) handle->h_sync = 1; /* recovery likes this */ lock_24kernel(); - rc = journal_stop(handle); + rc = fsfilt_ext3_journal_stop(handle); unlock_24kernel(); return rc; @@ -426,9 +444,6 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h, { unsigned long tid; transaction_t *transaction; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) - unsigned long rtid; -#endif handle_t *handle = h; journal_t *journal; int rc; @@ -441,7 +456,7 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h, tid = transaction->t_tid; /* we don't want to be blocked */ handle->h_sync = 0; - rc = journal_stop(handle); + rc = fsfilt_ext3_journal_stop(handle); if (rc) { CERROR("error while stopping transaction: %d\n", rc); unlock_24kernel(); @@ -606,43 +621,11 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size, return rc; } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct bio *bio) { submit_bio(rw, bio); return 0; } -#else -static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct kiobuf *bio) -{ - int rc, blk_per_page; - - rc = brw_kiovec(rw, 1, &bio, inode->i_dev, - KIOBUF_GET_BLOCKS(bio), 1 << inode->i_blkbits); - /* - * brw_kiovec() returns number of bytes actually written. If error - * occurred after something was written, error code is returned though - * kiobuf->errno. (See bug 6854.) - */ - - blk_per_page = CFS_PAGE_SIZE >> inode->i_blkbits; - - if (rc != (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page) { - CERROR("short write? expected %d, wrote %d (%d)\n", - (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page, - rc, bio->errno); - } - if (bio->errno != 0) { - CERROR("IO error. Wrote %d of %d (%d)\n", - rc, - (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page, - bio->errno); - rc = bio->errno; - } - - return rc; -} -#endif static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count, loff_t *off) @@ -746,7 +729,8 @@ static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs) int rc; memset(&sfs, 0, sizeof(sfs)); - rc = ll_do_statfs(sb,&sfs); + + rc = ll_do_statfs(sb, &sfs); if (!rc && sfs.f_bfree < sfs.f_ffree) { sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree; @@ -771,10 +755,7 @@ static int fsfilt_ext3_sync(struct super_block *sb) #endif #ifdef EXT3_MULTIBLOCK_ALLOCATOR -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define ext3_up_truncate_sem(inode) up_write(&EXT3_I(inode)->truncate_sem); -#define ext3_down_truncate_sem(inode) down_write(&EXT3_I(inode)->truncate_sem); -#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)) #define ext3_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem); #define ext3_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem); #else @@ -842,24 +823,55 @@ static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, return bg_start + colour + block; } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/locks.h> -static void ll_unmap_underlying_metadata(struct super_block *sb, - unsigned long blocknr) +#define ll_unmap_underlying_metadata(sb, blocknr) \ + unmap_underlying_metadata((sb)->s_bdev, blocknr) + +#ifndef EXT3_MB_HINT_GROUP_ALLOC +static unsigned long new_blocks(handle_t *handle, struct ext3_extents_tree *tree, + struct ext3_ext_path *path, unsigned long block, + int *count, int *err) { - struct buffer_head *old_bh; - - old_bh = get_hash_table(sb->s_dev, blocknr, sb->s_blocksize); - if (old_bh) { - mark_buffer_clean(old_bh); - wait_on_buffer(old_bh); - clear_bit(BH_Req, &old_bh->b_state); - __brelse(old_bh); - } + unsigned long pblock, goal; + int aflags = 0; + + goal = ext3_ext_find_goal(tree->inode, path, block, &aflags); + aflags |= 2; /* block have been already reserved */ + lock_24kernel(); + pblock = ext3_mb_new_blocks(handle, tree->inode, goal, count, aflags, err); + unlock_24kernel(); + return pblock; + } #else -#define ll_unmap_underlying_metadata(sb, blocknr) \ - unmap_underlying_metadata((sb)->s_bdev, blocknr) +static unsigned long new_blocks(handle_t *handle, struct ext3_extents_tree *tree, + struct ext3_ext_path *path, unsigned long block, + int *count, int *err) +{ + struct ext3_allocation_request ar; + unsigned long pblock; + int aflags; + + /* find neighbour allocated blocks */ + ar.lleft = block; + *err = ext3_ext_search_left(tree, path, &ar.lleft, &ar.pleft); + if (*err) + return 0; + ar.lright = block; + *err = ext3_ext_search_right(tree, path, &ar.lright, &ar.pright); + if (*err) + return 0; + + /* allocate new block */ + ar.goal = ext3_ext_find_goal(tree->inode, path, block, &aflags); + ar.inode = tree->inode; + ar.logical = block; + ar.len = *count; + ar.flags = EXT3_MB_HINT_DATA; + pblock = ext3_mb_new_blocks(handle, &ar, err); + *count = ar.len; + return pblock; + +} #endif static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, @@ -869,11 +881,10 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, struct inode *inode = tree->inode; struct bpointers *bp = tree->private; struct ext3_extent nex; - int count, err, goal; unsigned long pblock; unsigned long tgen; + int count, err, i; handle_t *handle; - int i, aflags = 0; i = EXT_DEPTH(tree); EXT_ASSERT(i == path->p_depth); @@ -908,7 +919,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ext3_up_truncate_sem(inode); lock_24kernel(); - handle = journal_start(EXT3_JOURNAL(inode), count+EXT3_ALLOC_NEEDED+1); + handle = fsfilt_ext3_journal_start(inode, count+EXT3_ALLOC_NEEDED+1); unlock_24kernel(); if (IS_ERR(handle)) { ext3_down_truncate_sem(inode); @@ -919,17 +930,13 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, if (tgen != EXT_GENERATION(tree)) { /* the tree has changed. so path can be invalid at moment */ lock_24kernel(); - journal_stop(handle); + fsfilt_ext3_journal_stop(handle); unlock_24kernel(); return EXT_REPEAT; } count = cex->ec_len; - goal = ext3_ext_find_goal(inode, path, cex->ec_block, &aflags); - aflags |= 2; /* block have been already reserved */ - lock_24kernel(); - pblock = ext3_mb_new_blocks(handle, inode, goal, &count, aflags, &err); - unlock_24kernel(); + pblock = new_blocks(handle, tree, path, cex->ec_block, &count, &err); if (!pblock) goto out; EXT_ASSERT(count <= cex->ec_len); @@ -939,8 +946,12 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, nex.ee_start = pblock; nex.ee_len = count; err = ext3_ext_insert_extent(handle, tree, path, &nex); - if (err) + if (err) { + CERROR("can't insert extent: %d\n", err); + /* XXX: export ext3_free_blocks() */ + /*ext3_free_blocks(handle, inode, nex.ee_start, nex.ee_len, 0);*/ goto out; + } /* * Putting len of the actual extent we just inserted, @@ -954,7 +965,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, out: lock_24kernel(); - journal_stop(handle); + fsfilt_ext3_journal_stop(handle); unlock_24kernel(); map: if (err >= 0) { @@ -1118,21 +1129,11 @@ int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page, return rc; } -extern int ext3_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); -static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize) +int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs) { - return ext3_prep_san_write(inode, blocks, nblocks, newsize); -} - -static int fsfilt_ext3_read_record(struct file * file, void *buf, - int size, loff_t *offs) -{ - struct inode *inode = file->f_dentry->d_inode; unsigned long block; struct buffer_head *bh; - int err, blocksize, csize, boffs; + int err, blocksize, csize, boffs, osize = size; /* prevent reading after eof */ lock_kernel(); @@ -1169,36 +1170,28 @@ static int fsfilt_ext3_read_record(struct file * file, void *buf, buf += csize; size -= csize; } - return 0; + return osize; } +EXPORT_SYMBOL(fsfilt_ext3_read); -static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, - loff_t *offs, int force_sync) +static int fsfilt_ext3_read_record(struct file * file, void *buf, + int size, loff_t *offs) +{ + int rc; + rc = fsfilt_ext3_read(file->f_dentry->d_inode, buf, size, offs); + if (rc > 0) + rc = 0; + return rc; +} + +int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize, + loff_t *offs, handle_t *handle) { struct buffer_head *bh = NULL; - unsigned long block; - struct inode *inode = file->f_dentry->d_inode; loff_t old_size = inode->i_size, offset = *offs; loff_t new_size = inode->i_size; - journal_t *journal; - handle_t *handle; - int err = 0, block_count = 0, blocksize, size, boffs; - - /* Determine how many transaction credits are needed */ - blocksize = 1 << inode->i_blkbits; - block_count = (*offs & (blocksize - 1)) + bufsize; - block_count = (block_count + blocksize - 1) >> inode->i_blkbits; - - journal = EXT3_SB(inode->i_sb)->s_journal; - lock_24kernel(); - handle = journal_start(journal, - block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2); - unlock_24kernel(); - if (IS_ERR(handle)) { - CERROR("can't start transaction for %d blocks (%d bytes)\n", - block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize); - return PTR_ERR(handle); - } + unsigned long block; + int err = 0, blocksize = 1 << inode->i_blkbits, size, boffs; while (bufsize > 0) { if (bh != NULL) @@ -1210,14 +1203,14 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, bh = ext3_bread(handle, inode, block, 1, &err); if (!bh) { CERROR("can't read/create block: %d\n", err); - goto out; + break; } err = ext3_journal_get_write_access(handle, bh); if (err) { CERROR("journal_get_write_access() returned error %d\n", err); - goto out; + break; } LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size); memcpy(bh->b_data + boffs, buf, size); @@ -1225,7 +1218,7 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, if (err) { CERROR("journal_dirty_metadata() returned error %d\n", err); - goto out; + break; } if (offset + size > new_size) new_size = offset + size; @@ -1233,10 +1226,6 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, bufsize -= size; buf += size; } - - if (force_sync) - handle->h_sync = 1; /* recovery likes this */ -out: if (bh) brelse(bh); @@ -1252,12 +1241,43 @@ out: unlock_kernel(); } + if (err == 0) + *offs = offset; + return err; +} +EXPORT_SYMBOL(fsfilt_ext3_write_handle); + +static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, + loff_t *offs, int force_sync) +{ + struct inode *inode = file->f_dentry->d_inode; + handle_t *handle; + int err, block_count = 0, blocksize; + + /* Determine how many transaction credits are needed */ + blocksize = 1 << inode->i_blkbits; + block_count = (*offs & (blocksize - 1)) + bufsize; + block_count = (block_count + blocksize - 1) >> inode->i_blkbits; + lock_24kernel(); - journal_stop(handle); + handle = fsfilt_ext3_journal_start(inode, + block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2); + unlock_24kernel(); + if (IS_ERR(handle)) { + CERROR("can't start transaction for %d blocks (%d bytes)\n", + block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize); + return PTR_ERR(handle); + } + + err = fsfilt_ext3_write_handle(inode, buf, bufsize, offs, handle); + + if (!err && force_sync) + handle->h_sync = 1; /* recovery likes this */ + + lock_24kernel(); + fsfilt_ext3_journal_stop(handle); unlock_24kernel(); - if (err == 0) - *offs = offset; return err; } @@ -1274,6 +1294,9 @@ static int fsfilt_ext3_setup(struct super_block *sb) #endif if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) CWARN("filesystem doesn't have dir_index feature enabled\n"); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)) && HAVE_QUOTA_SUPPORT + set_opt(EXT3_SB(sb)->s_mount_opt, QUOTA); +#endif return 0; } @@ -1331,7 +1354,7 @@ do { \ Q_COPY(out, in, dqb_valid); \ } while (0) - + static int fsfilt_ext3_quotactl(struct super_block *sb, struct obd_quotactl *oqc) @@ -1749,13 +1772,13 @@ static int commit_chkquot(struct super_block *sb, struct qchk_ctxt *qctxt, if (cdqb->dqb_bsoftlimit && toqb(cdqb->dqb_curspace) >= cdqb->dqb_bsoftlimit && !cdqb->dqb_btime) - cdqb->dqb_btime = + cdqb->dqb_btime = now + qctxt->qckt_dqinfo[cdqb->dqb_type].dqi_bgrace; if (cdqb->dqb_isoftlimit && cdqb->dqb_curinodes >= cdqb->dqb_isoftlimit && !cdqb->dqb_itime) - cdqb->dqb_itime = + cdqb->dqb_itime = now + qctxt->qckt_dqinfo[cdqb->dqb_type].dqi_igrace; cdqb->dqb_valid = QIF_ALL; @@ -1835,7 +1858,7 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb, bitmap_bh = read_inode_bitmap(sb, group); if (!bitmap_bh) { CERROR("read_inode_bitmap group %d failed", group); - GOTO(out, -EIO); + GOTO(out, rc = -EIO); } for (i = 0; i < sbi->s_inodes_per_group; i++, ino++) { @@ -1915,7 +1938,7 @@ out: } #ifdef HAVE_QUOTA_SUPPORT -static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, +static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, int cmd) { int rc = 0; @@ -1989,14 +2012,11 @@ static int fsfilt_ext3_dquot(struct lustre_dquot *dquot, int cmd) } #endif -static lvfs_sbdev_type fsfilt_ext3_journal_sbdev(struct super_block *sb) +lvfs_sbdev_type fsfilt_ext3_journal_sbdev(struct super_block *sb) { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) return (EXT3_SB(sb)->journal_bdev); -#else - return kdev_t_to_nr(EXT3_SB(sb)->s_journal->j_dev); -#endif } +EXPORT_SYMBOL(fsfilt_ext3_journal_sbdev); static struct fsfilt_operations fsfilt_ext3_ops = { .fs_type = "ext3", @@ -2019,7 +2039,6 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_statfs = fsfilt_ext3_statfs, .fs_sync = fsfilt_ext3_sync, .fs_map_inode_pages = fsfilt_ext3_map_inode_pages, - .fs_prep_san_write = fsfilt_ext3_prep_san_write, .fs_write_record = fsfilt_ext3_write_record, .fs_read_record = fsfilt_ext3_read_record, .fs_setup = fsfilt_ext3_setup, @@ -2027,6 +2046,10 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_get_op_len = fsfilt_ext3_get_op_len, .fs_quotactl = fsfilt_ext3_quotactl, .fs_quotacheck = fsfilt_ext3_quotacheck, +#ifdef HAVE_DISK_INODE_VERSION + .fs_get_version = fsfilt_ext3_get_version, + .fs_set_version = fsfilt_ext3_set_version, +#endif #ifdef HAVE_QUOTA_SUPPORT .fs_quotainfo = fsfilt_ext3_quotainfo, .fs_qids = fsfilt_ext3_qids, @@ -2039,9 +2062,8 @@ static int __init fsfilt_ext3_init(void) { int rc; - fcb_cache = kmem_cache_create("fsfilt_ext3_fcb", - sizeof(struct fsfilt_cb_data), 0, - 0, NULL, NULL); + fcb_cache = cfs_mem_cache_create("fsfilt_ext3_fcb", + sizeof(struct fsfilt_cb_data), 0, 0); if (!fcb_cache) { CERROR("error allocating fsfilt journal callback cache\n"); GOTO(out, rc = -ENOMEM); @@ -2050,7 +2072,7 @@ static int __init fsfilt_ext3_init(void) rc = fsfilt_register_ops(&fsfilt_ext3_ops); if (rc) { - int err = kmem_cache_destroy(fcb_cache); + int err = cfs_mem_cache_destroy(fcb_cache); LASSERTF(err == 0, "error destroying new cache: rc %d\n", err); } out: @@ -2062,7 +2084,7 @@ static void __exit fsfilt_ext3_exit(void) int rc; fsfilt_unregister_ops(&fsfilt_ext3_ops); - rc = kmem_cache_destroy(fcb_cache); + rc = cfs_mem_cache_destroy(fcb_cache); LASSERTF(rc == 0, "couldn't destroy fcb_cache slab\n"); } diff --git a/lustre/lvfs/fsfilt_reiserfs.c b/lustre/lvfs/fsfilt_reiserfs.c index 3441c96..684afe9 100644 --- a/lustre/lvfs/fsfilt_reiserfs.c +++ b/lustre/lvfs/fsfilt_reiserfs.c @@ -172,7 +172,9 @@ static int fsfilt_reiserfs_statfs(struct super_block *sb, int rc; memset(&sfs, 0, sizeof(sfs)); + rc = ll_do_statfs(sb, &sfs); + statfs_pack(osfs, &sfs); return rc; } diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 53147a4..442d3fb 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -66,10 +66,8 @@ int obd_memmax; #endif static void push_group_info(struct lvfs_run_ctxt *save, - struct upcall_cache_entry *uce) + struct group_info *ginfo) { - struct group_info *ginfo = uce ? uce->ue_group_info : NULL; - if (!ginfo) { save->ngroups = current_ngroups; current_ngroups = 0; @@ -97,10 +95,8 @@ static void push_group_info(struct lvfs_run_ctxt *save, } static void pop_group_info(struct lvfs_run_ctxt *save, - struct upcall_cache_entry *uce) + struct group_info *ginfo) { - struct group_info *ginfo = uce ? uce->ue_group_info : NULL; - if (!ginfo) { current_ngroups = save->ngroups; } else { @@ -142,6 +138,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, save->pwd = dget(current->fs->pwd); save->pwdmnt = mntget(current->fs->pwdmnt); save->luc.luc_umask = current->fs->umask; + save->ngroups = current->group_info->ngroups; LASSERT(save->pwd); LASSERT(save->pwdmnt); @@ -149,14 +146,22 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, LASSERT(new_ctx->pwdmnt); if (uc) { + save->luc.luc_uid = current->uid; + save->luc.luc_gid = current->gid; save->luc.luc_fsuid = current->fsuid; save->luc.luc_fsgid = current->fsgid; save->luc.luc_cap = current->cap_effective; + current->uid = uc->luc_uid; + current->gid = uc->luc_gid; current->fsuid = uc->luc_fsuid; current->fsgid = uc->luc_fsgid; current->cap_effective = uc->luc_cap; - push_group_info(save, uc->luc_uce); + + push_group_info(save, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } current->fs->umask = 0; /* umask already applied on client */ set_fs(new_ctx->fs); @@ -206,10 +211,15 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, mntput(saved->pwdmnt); current->fs->umask = saved->luc.luc_umask; if (uc) { + current->uid = saved->luc.luc_uid; + current->gid = saved->luc.luc_gid; current->fsuid = saved->luc.luc_fsuid; current->fsgid = saved->luc.luc_fsgid; current->cap_effective = saved->luc.luc_cap; - pop_group_info(saved, uc->luc_uce); + pop_group_info(saved, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } /* @@ -232,7 +242,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix) int err = 0; ENTRY; - ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); + // ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); @@ -277,7 +287,7 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) int err = 0; ENTRY; - ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); + // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) @@ -445,7 +455,7 @@ long l_readdir(struct file *file, struct list_head *dentry_list) int error; buf.lrc_dirent = NULL; - buf.lrc_list = dentry_list; + buf.lrc_list = dentry_list; error = vfs_readdir(file, l_filldir, &buf); if (error < 0) @@ -455,17 +465,206 @@ long l_readdir(struct file *file, struct list_head *dentry_list) if (lastdirent) lastdirent->lld_off = file->f_pos; - return 0; + return 0; } EXPORT_SYMBOL(l_readdir); EXPORT_SYMBOL(obd_memory); EXPORT_SYMBOL(obd_memmax); +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) +static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED; +static struct hlist_head *obd_memtable = NULL; +static unsigned long obd_memtable_size = 0; + +static int lvfs_memdbg_init(int size) +{ + struct hlist_head *head; + int i; + + LASSERT(size > sizeof(sizeof(struct hlist_head))); + obd_memtable_size = size / sizeof(struct hlist_head); + + CWARN("Allocating %lu memdbg entries.\n", + (unsigned long)obd_memtable_size); + + LASSERT(obd_memtable == NULL); + obd_memtable = kmalloc(size, GFP_KERNEL); + if (!obd_memtable) + return -ENOMEM; + + i = obd_memtable_size; + head = obd_memtable; + do { + INIT_HLIST_HEAD(head); + head++; + i--; + } while(i); + + return 0; +} + +static int lvfs_memdbg_cleanup(void) +{ + struct hlist_node *node = NULL, *tmp = NULL; + struct hlist_head *head; + struct obd_mem_track *mt; + int i; + + spin_lock(&obd_memlist_lock); + for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { + hlist_for_each_safe(node, tmp, head) { + mt = hlist_entry(node, struct obd_mem_track, mt_hash); + hlist_del_init(&mt->mt_hash); + kfree(mt); + } + } + spin_unlock(&obd_memlist_lock); + kfree(obd_memtable); + return 0; +} + +static inline unsigned long const hashfn(void *ptr) +{ + return (unsigned long)ptr & + (obd_memtable_size - 1); +} + +static void __lvfs_memdbg_insert(struct obd_mem_track *mt) +{ + struct hlist_head *head = obd_memtable + + hashfn(mt->mt_ptr); + hlist_add_head(&mt->mt_hash, head); +} + +void lvfs_memdbg_insert(struct obd_mem_track *mt) +{ + spin_lock(&obd_memlist_lock); + __lvfs_memdbg_insert(mt); + spin_unlock(&obd_memlist_lock); +} +EXPORT_SYMBOL(lvfs_memdbg_insert); + +static void __lvfs_memdbg_remove(struct obd_mem_track *mt) +{ + hlist_del_init(&mt->mt_hash); +} + +void lvfs_memdbg_remove(struct obd_mem_track *mt) +{ + spin_lock(&obd_memlist_lock); + __lvfs_memdbg_remove(mt); + spin_unlock(&obd_memlist_lock); +} +EXPORT_SYMBOL(lvfs_memdbg_remove); + +static struct obd_mem_track *__lvfs_memdbg_find(void *ptr) +{ + struct hlist_node *node = NULL; + struct obd_mem_track *mt = NULL; + struct hlist_head *head; + + head = obd_memtable + hashfn(ptr); + + hlist_for_each(node, head) { + mt = hlist_entry(node, struct obd_mem_track, mt_hash); + if ((unsigned long)mt->mt_ptr == (unsigned long)ptr) + break; + mt = NULL; + } + return mt; +} + +struct obd_mem_track *lvfs_memdbg_find(void *ptr) +{ + struct obd_mem_track *mt; + + spin_lock(&obd_memlist_lock); + mt = __lvfs_memdbg_find(ptr); + spin_unlock(&obd_memlist_lock); + + return mt; +} +EXPORT_SYMBOL(lvfs_memdbg_find); + +int lvfs_memdbg_check_insert(struct obd_mem_track *mt) +{ + struct obd_mem_track *tmp; + + spin_lock(&obd_memlist_lock); + tmp = __lvfs_memdbg_find(mt->mt_ptr); + if (tmp == NULL) { + __lvfs_memdbg_insert(mt); + spin_unlock(&obd_memlist_lock); + return 1; + } + spin_unlock(&obd_memlist_lock); + return 0; +} +EXPORT_SYMBOL(lvfs_memdbg_check_insert); + +struct obd_mem_track * +lvfs_memdbg_check_remove(void *ptr) +{ + struct obd_mem_track *mt; + + spin_lock(&obd_memlist_lock); + mt = __lvfs_memdbg_find(ptr); + if (mt) { + __lvfs_memdbg_remove(mt); + spin_unlock(&obd_memlist_lock); + return mt; + } + spin_unlock(&obd_memlist_lock); + return NULL; +} +EXPORT_SYMBOL(lvfs_memdbg_check_remove); +#endif + +void lvfs_memdbg_show(void) +{ +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + struct hlist_node *node = NULL; + struct hlist_head *head; + struct obd_mem_track *mt; + int header = 0; +#endif + int leaked; + +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + int i; +#endif + + leaked = atomic_read(&obd_memory); + + if (leaked > 0) { + CWARN("Memory leaks detected (max %d, leaked %d)\n", + obd_memmax, leaked); + } + +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + spin_lock(&obd_memlist_lock); + for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { + hlist_for_each(node, head) { + if (header == 0) { + CWARN("Abnormal memory activities:\n"); + header = 1; + } + mt = hlist_entry(node, struct obd_mem_track, mt_hash); + CWARN(" [%s] ptr: 0x%p, size: %d, src at %s\n", + ((mt->mt_flags & OBD_MT_WRONG_SIZE) ? + "wrong size" : "leaked memory"), + mt->mt_ptr, mt->mt_size, mt->mt_loc); + } + } + spin_unlock(&obd_memlist_lock); +#endif +} +EXPORT_SYMBOL(lvfs_memdbg_show); + #ifdef LUSTRE_KERNEL_VERSION #ifndef HAVE_CLEAR_RDONLY_ON_PUT #error rdonly patchset must be updated [cfs bz11248] #endif - void dev_set_rdonly(lvfs_sbdev_type dev); int dev_check_rdonly(lvfs_sbdev_type dev); @@ -488,7 +687,6 @@ int lvfs_check_rdonly(lvfs_sbdev_type dev) EXPORT_SYMBOL(__lvfs_set_rdonly); EXPORT_SYMBOL(lvfs_check_rdonly); -#endif /* LUSTRE_KERNEL_VERSION */ int lvfs_check_io_health(struct obd_device *obd, struct file *file) { @@ -500,32 +698,36 @@ int lvfs_check_io_health(struct obd_device *obd, struct file *file) OBD_ALLOC(write_page, CFS_PAGE_SIZE); if (!write_page) RETURN(-ENOMEM); - + rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1); - + OBD_FREE(write_page, CFS_PAGE_SIZE); CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc); - RETURN(rc); + RETURN(rc); } EXPORT_SYMBOL(lvfs_check_io_health); +#endif /* LUSTRE_KERNEL_VERSION */ static int __init lvfs_linux_init(void) { + ENTRY; +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + lvfs_memdbg_init(PAGE_SIZE); +#endif RETURN(0); } static void __exit lvfs_linux_exit(void) { - int leaked; ENTRY; - leaked = atomic_read(&obd_memory); - CDEBUG(leaked ? D_ERROR : D_INFO, - "obd mem max: %d leaked: %d\n", obd_memmax, leaked); - + lvfs_memdbg_show(); + +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + lvfs_memdbg_cleanup(); +#endif EXIT; - return; } MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); diff --git a/lustre/lvfs/quotafmt_test.c b/lustre/lvfs/quotafmt_test.c index 09e10af..9c901fd 100644 --- a/lustre/lvfs/quotafmt_test.c +++ b/lustre/lvfs/quotafmt_test.c @@ -440,10 +440,9 @@ static int quotfmt_test_cleanup(struct obd_device *obd) RETURN(0); } -static int quotfmt_test_setup(struct obd_device *obd, obd_count len, void *buf) +static int quotfmt_test_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct lprocfs_static_vars lvars; - struct lustre_cfg *lcfg = buf; struct obd_device *tgt; int rc; ENTRY; @@ -487,8 +486,8 @@ static int __init quotfmt_test_init(void) struct lprocfs_static_vars lvars; lprocfs_init_vars(quotfmt_test, &lvars); - return class_register_type("fmt_obd_ops, lvars.module_vars, - "quotfmt_test"); + return class_register_type("fmt_obd_ops, NULL, lvars.module_vars, + "quotfmt_test", NULL); } static void __exit quotfmt_test_exit(void) diff --git a/lustre/lvfs/upcall_cache.c b/lustre/lvfs/upcall_cache.c index 2db5d90..71a0022 100644 --- a/lustre/lvfs/upcall_cache.c +++ b/lustre/lvfs/upcall_cache.c @@ -76,11 +76,12 @@ void groups_free(struct group_info *ginfo) } #endif -static struct upcall_cache_entry *alloc_entry(__u64 key) +static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache, + __u64 key, void *args) { struct upcall_cache_entry *entry; - OBD_ALLOC(entry, sizeof(*entry)); + OBD_ALLOC_PTR(entry); if (!entry) return NULL; @@ -89,34 +90,66 @@ static struct upcall_cache_entry *alloc_entry(__u64 key) entry->ue_key = key; atomic_set(&entry->ue_refcount, 0); init_waitqueue_head(&entry->ue_waitq); + if (cache->uc_ops->init_entry) + cache->uc_ops->init_entry(entry, args); return entry; } -/* protected by hash lock */ -static void free_entry(struct upcall_cache_entry *entry) +/* protected by cache lock */ +static void free_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry) { - if (entry->ue_group_info) - groups_free(entry->ue_group_info); + if (cache->uc_ops->free_entry) + cache->uc_ops->free_entry(cache, entry); + list_del(&entry->ue_hash); CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n", entry, entry->ue_key); - OBD_FREE(entry, sizeof(*entry)); + OBD_FREE_PTR(entry); +} + +static inline int upcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + if (entry->ue_key != key) + return -1; + + if (cache->uc_ops->upcall_compare) + return cache->uc_ops->upcall_compare(cache, entry, key, args); + + return 0; +} + +static inline int downcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + if (entry->ue_key != key) + return -1; + + if (cache->uc_ops->downcall_compare) + return cache->uc_ops->downcall_compare(cache, entry, key, args); + + return 0; } -static void get_entry(struct upcall_cache_entry *entry) +static inline void get_entry(struct upcall_cache_entry *entry) { atomic_inc(&entry->ue_refcount); } -static void put_entry(struct upcall_cache_entry *entry) +static inline void put_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry) { if (atomic_dec_and_test(&entry->ue_refcount) && (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) { - free_entry(entry); + free_entry(cache, entry); } } -static int check_unlink_entry(struct upcall_cache_entry *entry) +static int check_unlink_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry) { if (UC_CACHE_IS_VALID(entry) && time_before(jiffies, entry->ue_expire)) @@ -134,86 +167,19 @@ static int check_unlink_entry(struct upcall_cache_entry *entry) list_del_init(&entry->ue_hash); if (!atomic_read(&entry->ue_refcount)) - free_entry(entry); + free_entry(cache, entry); return 1; } -static int refresh_entry(struct upcall_cache *hash, +static inline int refresh_entry(struct upcall_cache *cache, struct upcall_cache_entry *entry) { - char *argv[4]; - char *envp[3]; - char keystr[16]; - int rc; - ENTRY; - - snprintf(keystr, 16, LPU64, entry->ue_key); - - CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall); - argv[0] = hash->uc_upcall; - argv[1] = hash->uc_name; - argv[2] = keystr; - argv[3] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/usr/sbin"; - envp[2] = NULL; - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; " - "check /proc/fs/lustre/mds/%s/group_upcall\n", - hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]); - } else { - CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name, - argv[0], argv[1], argv[2]); - rc = 0; - } - RETURN(rc); + LASSERT(cache->uc_ops->do_upcall); + return cache->uc_ops->do_upcall(cache, entry); } -static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary, - __u32 ngroups, __u32 *groups) -{ - struct group_info *ginfo; - int i, j; - ENTRY; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) - if (ngroups > NGROUPS) - ngroups = NGROUPS; -#endif - - if (ngroups > NGROUPS_MAX) { - CERROR("using first %d supplementary groups for uid "LPU64"\n", - NGROUPS_MAX, entry->ue_key); - ngroups = NGROUPS_MAX; - } - - ginfo = groups_alloc(ngroups); - if (!ginfo) { - CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n", - entry->ue_key, ngroups); - RETURN(-ENOMEM); - } - entry->ue_group_info = ginfo; - entry->ue_primary = primary; - - for (i = 0; i < ginfo->nblocks; i++) { - int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups); - int off = i * NGROUPS_PER_BLOCK; - - for (j = 0; j < cp_count; j++) - ginfo->blocks[i][j] = groups[off + j]; - - ngroups -= cp_count; - } - RETURN(0); -} - -struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, - __u64 key, __u32 primary, - __u32 ngroups, __u32 *groups) +struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache, + __u64 key, void *args) { struct upcall_cache_entry *entry = NULL, *new = NULL, *next; struct list_head *head; @@ -221,49 +187,17 @@ struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, int rc, found; ENTRY; - LASSERT(hash); + LASSERT(cache); - if (strcmp(hash->uc_upcall, "NONE") == 0) { - new = alloc_entry(key); - if (!new) { - CERROR("fail to alloc entry\n"); - RETURN(NULL); - } - get_entry(new); - - /* We have to sort the groups for 2.6 kernels */ - LASSERT(ngroups <= 2); - if (ngroups == 2 && groups[1] == -1) - ngroups--; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) - /* 2.6 needs groups array sorted */ - if (ngroups == 2 && groups[0] > groups[1]) { - __u32 tmp = groups[1]; - groups[1] = groups[0]; - groups[0] = tmp; - } -#endif - if (ngroups > 0 && groups[0] == -1) { - groups[0] = groups[1]; - ngroups--; - } - - rc = entry_set_group_info(new, primary, ngroups, groups); - - /* We can't cache this entry as it only has a subset of - * the user's groups, as sent in suppgid1, suppgid2. */ - UC_CACHE_SET_EXPIRED(new); - RETURN(new); - } - head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; + head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; find_again: found = 0; - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); list_for_each_entry_safe(entry, next, head, ue_hash) { /* check invalid & expired items */ - if (check_unlink_entry(entry)) + if (check_unlink_entry(cache, entry)) continue; - if (entry->ue_key == key) { + if (upcall_compare(cache, entry, key, args) == 0) { found = 1; break; } @@ -271,8 +205,8 @@ find_again: if (!found) { /* didn't find it */ if (!new) { - spin_unlock(&hash->uc_lock); - new = alloc_entry(key); + spin_unlock(&cache->uc_lock); + new = alloc_entry(cache, key, args); if (!new) { CERROR("fail to alloc entry\n"); RETURN(ERR_PTR(-ENOMEM)); @@ -284,7 +218,7 @@ find_again: } } else { if (new) { - free_entry(new); + free_entry(cache, new); new = NULL; } list_move(&entry->ue_hash, head); @@ -295,10 +229,10 @@ find_again: if (UC_CACHE_IS_NEW(entry)) { UC_CACHE_SET_ACQUIRING(entry); UC_CACHE_CLEAR_NEW(entry); - entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire; - spin_unlock(&hash->uc_lock); - rc = refresh_entry(hash, entry); - spin_lock(&hash->uc_lock); + entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire; + spin_unlock(&cache->uc_lock); + rc = refresh_entry(cache, entry); + spin_lock(&cache->uc_lock); if (rc < 0) { UC_CACHE_CLEAR_ACQUIRING(entry); UC_CACHE_SET_INVALID(entry); @@ -309,32 +243,31 @@ find_again: * this item, just wait it complete */ if (UC_CACHE_IS_ACQUIRING(entry)) { + unsigned long expiry = jiffies + cache->uc_acquire_expire; + init_waitqueue_entry(&wait, current); add_wait_queue(&entry->ue_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); - schedule_timeout(hash->uc_acquire_expire); + schedule_timeout(cache->uc_acquire_expire); - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); remove_wait_queue(&entry->ue_waitq, &wait); if (UC_CACHE_IS_ACQUIRING(entry)) { - static unsigned long next; /* we're interrupted or upcall failed in the middle */ - if (time_after(jiffies, next)) { - CERROR("acquire timeout exceeded for key "LPU64 - "\n", entry->ue_key); - next = jiffies + 1800; - } - put_entry(entry); - GOTO(out, entry = ERR_PTR(-EIDRM)); + rc = time_before(jiffies, expiry) ? -EINTR : -ETIMEDOUT; + put_entry(cache, entry); + CERROR("acquire timeout exceeded for key "LPU64 + "\n", entry->ue_key); + GOTO(out, entry = ERR_PTR(rc)); } /* fall through */ } /* invalid means error, don't need to try again */ if (UC_CACHE_IS_INVALID(entry)) { - put_entry(entry); + put_entry(cache, entry); GOTO(out, entry = ERR_PTR(-EIDRM)); } @@ -342,15 +275,15 @@ find_again: * We can't refresh the existing one because some * memory might be shared by multiple processes. */ - if (check_unlink_entry(entry)) { + if (check_unlink_entry(cache, entry)) { /* if expired, try again. but if this entry is * created by me but too quickly turn to expired * without any error, should at least give a * chance to use it once. */ if (entry != new) { - put_entry(entry); - spin_unlock(&hash->uc_lock); + put_entry(cache, entry); + spin_unlock(&cache->uc_lock); new = NULL; goto find_again; } @@ -358,12 +291,12 @@ find_again: /* Now we know it's good */ out: - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); RETURN(entry); } EXPORT_SYMBOL(upcall_cache_get_entry); -void upcall_cache_put_entry(struct upcall_cache *hash, +void upcall_cache_put_entry(struct upcall_cache *cache, struct upcall_cache_entry *entry) { ENTRY; @@ -374,28 +307,28 @@ void upcall_cache_put_entry(struct upcall_cache *hash, } LASSERT(atomic_read(&entry->ue_refcount) > 0); - spin_lock(&hash->uc_lock); - put_entry(entry); - spin_unlock(&hash->uc_lock); + spin_lock(&cache->uc_lock); + put_entry(cache, entry); + spin_unlock(&cache->uc_lock); EXIT; } EXPORT_SYMBOL(upcall_cache_put_entry); -int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key, - __u32 primary, __u32 ngroups, __u32 *groups) +int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, + void *args) { struct upcall_cache_entry *entry = NULL; struct list_head *head; int found = 0, rc = 0; ENTRY; - LASSERT(hash); + LASSERT(cache); - head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; + head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); list_for_each_entry(entry, head, ue_hash) { - if (entry->ue_key == key) { + if (downcall_compare(cache, entry, key, args) == 0) { found = 1; get_entry(entry); break; @@ -404,73 +337,74 @@ int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key, if (!found) { CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n", - hash->uc_name, entry->ue_key); + cache->uc_name, key); /* haven't found, it's possible */ - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); RETURN(-EINVAL); } if (err) { CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n", - hash->uc_name, entry->ue_key, err); + cache->uc_name, entry->ue_key, err); GOTO(out, rc = -EINVAL); } if (!UC_CACHE_IS_ACQUIRING(entry)) { CDEBUG(D_HA, "%s: found uptodate entry %p (key "LPU64")\n", - hash->uc_name, entry, entry->ue_key); + cache->uc_name, entry, entry->ue_key); GOTO(out, rc = 0); } if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) { CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n", - hash->uc_name, entry, entry->ue_key); + cache->uc_name, entry, entry->ue_key); GOTO(out, rc = -EINVAL); } - spin_unlock(&hash->uc_lock); - rc = entry_set_group_info(entry, primary, ngroups, groups); - spin_lock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); + if (cache->uc_ops->parse_downcall) + rc = cache->uc_ops->parse_downcall(cache, entry, args); + spin_lock(&cache->uc_lock); if (rc) GOTO(out, rc); - entry->ue_expire = jiffies + hash->uc_entry_expire; + entry->ue_expire = jiffies + cache->uc_entry_expire; UC_CACHE_SET_VALID(entry); CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n", - hash->uc_name, entry, entry->ue_key); + cache->uc_name, entry, entry->ue_key); out: if (rc) { UC_CACHE_SET_INVALID(entry); list_del_init(&entry->ue_hash); } UC_CACHE_CLEAR_ACQUIRING(entry); - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); wake_up_all(&entry->ue_waitq); - put_entry(entry); + put_entry(cache, entry); RETURN(rc); } EXPORT_SYMBOL(upcall_cache_downcall); -static void cache_flush(struct upcall_cache *hash, int force) +static void cache_flush(struct upcall_cache *cache, int force) { struct upcall_cache_entry *entry, *next; int i; ENTRY; - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); for (i = 0; i < UC_CACHE_HASH_SIZE; i++) { list_for_each_entry_safe(entry, next, - &hash->uc_hashtable[i], ue_hash) { + &cache->uc_hashtable[i], ue_hash) { if (!force && atomic_read(&entry->ue_refcount)) { UC_CACHE_SET_EXPIRED(entry); continue; } LASSERT(!atomic_read(&entry->ue_refcount)); - free_entry(entry); + free_entry(cache, entry); } } - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); EXIT; } @@ -486,34 +420,68 @@ void upcall_cache_flush_all(struct upcall_cache *cache) } EXPORT_SYMBOL(upcall_cache_flush_all); -struct upcall_cache *upcall_cache_init(const char *name) +void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args) +{ + struct list_head *head; + struct upcall_cache_entry *entry; + int found = 0; + ENTRY; + + head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; + + spin_lock(&cache->uc_lock); + list_for_each_entry(entry, head, ue_hash) { + if (upcall_compare(cache, entry, key, args) == 0) { + found = 1; + break; + } + } + + if (found) { + CWARN("%s: flush entry %p: key "LPU64", ref %d, fl %x, " + "cur %lu, ex %ld/%ld\n", + cache->uc_name, entry, entry->ue_key, + atomic_read(&entry->ue_refcount), entry->ue_flags, + get_seconds(), entry->ue_acquire_expire, + entry->ue_expire); + UC_CACHE_SET_EXPIRED(entry); + if (!atomic_read(&entry->ue_refcount)) + free_entry(cache, entry); + } + spin_unlock(&cache->uc_lock); +} +EXPORT_SYMBOL(upcall_cache_flush_one); + +struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, + struct upcall_cache_ops *ops) { - struct upcall_cache *hash; + struct upcall_cache *cache; int i; ENTRY; - OBD_ALLOC(hash, sizeof(*hash)); - if (!hash) + OBD_ALLOC(cache, sizeof(*cache)); + if (!cache) RETURN(ERR_PTR(-ENOMEM)); - spin_lock_init(&hash->uc_lock); + spin_lock_init(&cache->uc_lock); for (i = 0; i < UC_CACHE_HASH_SIZE; i++) - INIT_LIST_HEAD(&hash->uc_hashtable[i]); - strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1); - /* set default value, proc tunable */ - strcpy(hash->uc_upcall, "NONE"); - hash->uc_entry_expire = 10 * 60 * HZ; - hash->uc_acquire_expire = 15 * HZ; - - RETURN(hash); + INIT_LIST_HEAD(&cache->uc_hashtable[i]); + strncpy(cache->uc_name, name, sizeof(cache->uc_name) - 1); + /* upcall pathname proc tunable */ + strncpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall) - 1); + cache->uc_entry_expire = 10 * 60 * HZ; + cache->uc_acquire_expire = 15 * HZ; + cache->uc_ops = ops; + + RETURN(cache); } EXPORT_SYMBOL(upcall_cache_init); -void upcall_cache_cleanup(struct upcall_cache *hash) +void upcall_cache_cleanup(struct upcall_cache *cache) { - if (!hash) + if (!cache) return; - upcall_cache_flush_all(hash); - OBD_FREE(hash, sizeof(*hash)); + upcall_cache_flush_all(cache); + OBD_FREE(cache, sizeof(*cache)); } EXPORT_SYMBOL(upcall_cache_cleanup); diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 27107cd..d7b00dc 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -79,6 +79,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight, mdc_wr_max_rpcs_in_flight, 0 }, + { "sptlrpc", sptlrpc_lprocfs_rd, 0, 0 }, { 0 } }; diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 70b6d00..f390e3c 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -1,41 +1,66 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * This file is part of Lustre, http://www.lustre.org + * Copyright (c) 2003 Cluster File Systems, Inc. * - * MDC internal definitions. + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. */ -#include <lustre_mds.h> +#ifndef _MDC_INTERNAL_H +#define _MDC_INTERNAL_H + +#include <lustre_mdc.h> + void mdc_pack_req_body(struct ptlrpc_request *req, int offset, - __u64 valid, struct ll_fid *fid, int ea_size, int flags); + __u64 valid, const struct lu_fid *fid, + struct obd_capa *oc, int ea_size, int flags); +void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc); void mdc_pack_rep_body(struct ptlrpc_request *); -void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off, - __u32 size, struct ll_fid *mdc_fid); -void mdc_getattr_pack(struct ptlrpc_request *req, int offset, int valid, - int flags, struct mdc_op_data *data); +void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset, + const struct lu_fid *pfid, const struct lu_fid *cfid, + int flags); +void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset, + __u32 size, const struct lu_fid *fid, + struct obd_capa *oc); +void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid, + int flags, struct md_op_data *data); void mdc_setattr_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen, - void *ea2, int ea2len); + struct md_op_data *op_data, + void *ea, int ealen, void *ea2, int ea2len); void mdc_create_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, const void *data, int datalen, - __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective, - __u64 rdev); + struct md_op_data *op_data, const void *data, int datalen, + __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective, + __u64 rdev); void mdc_open_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, __u32 mode, __u64 rdev, + struct md_op_data *op_data, __u32 mode, __u64 rdev, __u32 flags, const void *data, int datalen); -void mdc_join_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, __u64 head_size); +void mdc_join_pack(struct ptlrpc_request *req, int offset, + struct md_op_data *op_data, __u64 head_size); void mdc_unlink_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data); + struct md_op_data *op_data); void mdc_link_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data); + struct md_op_data *op_data); void mdc_rename_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data, + struct md_op_data *op_data, const char *old, int oldlen, const char *new, int newlen); -void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa, - int valid, struct obd_client_handle *och); +void mdc_close_pack(struct ptlrpc_request *req, int offset, + struct md_op_data *op_data); void mdc_exit_request(struct client_obd *cli); void mdc_enter_request(struct client_obd *cli); @@ -45,35 +70,97 @@ struct mdc_open_data { struct ptlrpc_request *mod_close_req; }; -struct mdc_rpc_lock { - struct semaphore rpcl_sem; - struct lookup_intent *rpcl_it; -}; - -static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck) +static inline int client_is_remote(struct obd_export *exp) { - sema_init(&lck->rpcl_sem, 1); - lck->rpcl_it = NULL; -} + struct obd_import *imp = class_exp2cliimp(exp); -static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - ENTRY; - if (!it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) { - down(&lck->rpcl_sem); - LASSERT(lck->rpcl_it == NULL); - lck->rpcl_it = it; + if (imp->imp_connect_flags_orig & OBD_CONNECT_RMT_CLIENT) { + if (!(imp->imp_connect_data.ocd_connect_flags & + OBD_CONNECT_RMT_CLIENT)) + return 0; + else + return 1; + } else { + if (!(imp->imp_connect_data.ocd_connect_flags & + OBD_CONNECT_LCL_CLIENT)) + return 1; + else + return 0; } } -static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - if (!it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) { - LASSERT(it == lck->rpcl_it); - lck->rpcl_it = NULL; - up(&lck->rpcl_sem); - } - EXIT; -} +/* Quota stuff */ +extern quota_interface_t *quota_interface; + +/* mdc/mdc_locks.c */ +int mdc_set_lock_data(struct obd_export *exp, + __u64 *lockh, void *data); + +int mdc_change_cbdata(struct obd_export *exp, const struct lu_fid *fid, + ldlm_iterator_t it, void *data); + +int mdc_intent_lock(struct obd_export *exp, + struct md_op_data *, + void *lmm, int lmmsize, + struct lookup_intent *, int, + struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, int extra_lock_flags); +int mdc_enqueue(struct obd_export *exp, + int lock_type, + struct lookup_intent *it, + int lock_mode, + struct md_op_data *op_data, + struct lustre_handle *lockh, + void *lmm, + int lmmlen, + ldlm_completion_callback cb_completion, + ldlm_blocking_callback cb_blocking, + void *cb_data, int extra_lock_flags); + +/* mdc/mdc_request.c */ +int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid, + struct md_op_data *op_data); + +int mdc_init_ea_size(struct obd_export *exp, int easize, int def_easzie, + int cookiesize); + +int mdc_open(struct obd_export *exp, obd_id ino, int type, int flags, + struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh, + struct ptlrpc_request **); + +struct obd_client_handle; + +int mdc_get_lustre_md(struct obd_export *md_exp, struct ptlrpc_request *req, + int offset, struct obd_export *dt_exp, + struct obd_export *lmv_exp, + struct lustre_md *md); + +int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md); + +int mdc_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req); + +int mdc_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och); + +int mdc_create(struct obd_export *exp, struct md_op_data *op_data, + const void *data, int datalen, int mode, __u32 uid, __u32 gid, + __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request); +int mdc_link(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request); +int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, + const char *old, int oldlen, const char *new, int newlen, + struct ptlrpc_request **request); +int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, + void *ea, int ealen, void *ea2, int ea2len, + struct ptlrpc_request **request); +int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request); +int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, + int flags, void *opaque); +int mdc_lock_match(struct obd_export *exp, int flags, + const struct lu_fid *fid, ldlm_type_t type, + ldlm_policy_data_t *policy, ldlm_mode_t mode, + struct lustre_handle *lockh); +#endif diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 7f1a4bc..8f36d2c 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -38,70 +38,107 @@ #endif #endif -void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off, - __u32 size, struct ll_fid *fid) +static void mdc_pack_body(struct mdt_body *b) { - struct mds_body *b; + LASSERT (b != NULL); - b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); b->fsuid = current->fsuid; b->fsgid = current->fsgid; b->capability = current->cap_effective; - b->fid1 = *fid; - b->size = pg_off; /* !! */ - b->suppgid = -1; - b->nlink = size; /* !! */ } -static void mdc_pack_body(struct mds_body *b) +void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc) { - LASSERT (b != NULL); + struct lustre_capa *c; - b->fsuid = current->fsuid; - b->fsgid = current->fsgid; - b->capability = current->cap_effective; + if (oc == NULL) { + LASSERT(lustre_msg_buflen(req->rq_reqmsg, offset) == 0); + return; + } + + c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c)); + LASSERT(c != NULL); + capa_cpy(c, oc); + DEBUG_CAPA(D_SEC, c, "pack"); +} + +void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset, + const struct lu_fid *pfid, + const struct lu_fid *cfid, int flags) +{ + struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + + if (pfid) + b->fid1 = *pfid; + if (cfid) + b->fid2 = *cfid; + b->valid = OBD_MD_FLID; + b->flags = flags; } void mdc_pack_req_body(struct ptlrpc_request *req, int offset, - __u64 valid, struct ll_fid *fid, int ea_size, int flags) + __u64 valid, const struct lu_fid *fid, + struct obd_capa *oc, int ea_size, int flags) { - struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); - if (fid) - b->fid1 = *fid; b->valid = valid; b->eadatasize = ea_size; b->flags = flags; mdc_pack_body(b); + if (fid) { + b->fid1 = *fid; + mdc_pack_capa(req, offset + 1, oc); + } +} + +void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pgoff, + __u32 size, const struct lu_fid *fid, struct obd_capa *oc) +{ + struct mdt_body *b; + + b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + b->fid1 = *fid; + b->size = pgoff; /* !! */ + b->suppgid = -1; + b->nlink = size; /* !! */ + mdc_pack_body(b); + mdc_pack_capa(req, offset + 1, oc); } /* packing of MDS records */ void mdc_create_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, const void *data, int datalen, + struct md_op_data *op_data, const void *data, int datalen, __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev) { - struct mds_rec_create *rec; + struct mdt_rec_create *rec; char *tmp; - rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); + + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*rec)); rec->cr_opcode = REINT_CREATE; rec->cr_fsuid = uid; rec->cr_fsgid = gid; rec->cr_cap = cap_effective; - rec->cr_fid = op_data->fid1; - memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); + rec->cr_fid1 = op_data->op_fid1; + rec->cr_fid2 = op_data->op_fid2; rec->cr_mode = mode; rec->cr_rdev = rdev; - rec->cr_time = op_data->mod_time; - rec->cr_suppgid = op_data->suppgids[0]; + rec->cr_time = op_data->op_mod_time; + rec->cr_suppgid1 = op_data->op_suppgids[0]; + rec->cr_suppgid2 = op_data->op_suppgids[1]; + rec->cr_flags = op_data->op_flags; + rec->cr_bias = op_data->op_bias; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); - LOGL0(op_data->name, op_data->namelen, tmp); + mdc_pack_capa(req, offset + 1, op_data->op_capa1); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->op_namelen + 1); + LOGL0(op_data->op_name, op_data->op_namelen, tmp); if (data) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen); - memcpy (tmp, data, datalen); + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, datalen); + memcpy(tmp, data, datalen); } } @@ -127,21 +164,21 @@ static __u32 mds_pack_open_flags(__u32 flags) /* packing of MDS records */ void mdc_join_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, __u64 head_size) + struct md_op_data *op_data, __u64 head_size) { - struct mds_rec_join *rec; + struct mdt_rec_join *rec; rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*rec)); LASSERT(rec != NULL); - rec->jr_fid = op_data->fid2; + rec->jr_fid = op_data->op_fid2; rec->jr_headsize = head_size; } void mdc_open_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, __u32 mode, __u64 rdev, + struct md_op_data *op_data, __u32 mode, __u64 rdev, __u32 flags, const void *lmm, int lmmlen) { - struct mds_rec_create *rec; + struct mdt_rec_create *rec; char *tmp; rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); @@ -150,210 +187,233 @@ void mdc_open_pack(struct ptlrpc_request *req, int offset, rec->cr_fsuid = current->fsuid; rec->cr_fsgid = current->fsgid; rec->cr_cap = current->cap_effective; - rec->cr_fid = op_data->fid1; - memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); + if (op_data != NULL) { + rec->cr_fid1 = op_data->op_fid1; + rec->cr_fid2 = op_data->op_fid2; + } rec->cr_mode = mode; rec->cr_flags = mds_pack_open_flags(flags); + rec->cr_time = op_data->op_mod_time; rec->cr_rdev = rdev; - rec->cr_time = op_data->mod_time; - rec->cr_suppgid = op_data->suppgids[0]; - - if (op_data->name) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, - op_data->namelen + 1); - LOGL0(op_data->name, op_data->namelen, tmp); + rec->cr_suppgid1 = op_data->op_suppgids[0]; + rec->cr_suppgid2 = op_data->op_suppgids[1]; + rec->cr_bias = op_data->op_bias; + + mdc_pack_capa(req, offset + 1, op_data->op_capa1); + /* the next buffer is child capa, which is used for replay, + * will be packed from the data in reply message. */ + + if (op_data->op_name) { + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, + op_data->op_namelen + 1); + LOGL0(op_data->op_name, op_data->op_namelen, tmp); } if (lmm) { rec->cr_flags |= MDS_OPEN_HAS_EA; #ifndef __KERNEL__ /*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */ - rec->cr_replayfid = op_data->fid2; + rec->cr_fid2 = op_data->op_fid2; #endif - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen); + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, lmmlen); memcpy (tmp, lmm, lmmlen); } } -void mdc_setattr_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data, struct iattr *iattr, void *ea, - int ealen, void *ea2, int ea2len) +static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec, + struct md_op_data *op_data) { - struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset, - sizeof(*rec)); rec->sa_opcode = REINT_SETATTR; rec->sa_fsuid = current->fsuid; rec->sa_fsgid = current->fsgid; rec->sa_cap = current->cap_effective; - rec->sa_fid = data->fid1; rec->sa_suppgid = -1; - if (iattr) { - rec->sa_valid = iattr->ia_valid; - rec->sa_mode = iattr->ia_mode; - rec->sa_uid = iattr->ia_uid; - rec->sa_gid = iattr->ia_gid; - rec->sa_size = iattr->ia_size; - rec->sa_atime = LTIME_S(iattr->ia_atime); - rec->sa_mtime = LTIME_S(iattr->ia_mtime); - rec->sa_ctime = LTIME_S(iattr->ia_ctime); - rec->sa_attr_flags = - ((struct ll_iattr_struct *)iattr)->ia_attr_flags; - if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid)) - rec->sa_suppgid = iattr->ia_gid; - else - rec->sa_suppgid = data->suppgids[0]; + rec->sa_fid = op_data->op_fid1; + rec->sa_valid = op_data->op_attr.ia_valid; + rec->sa_mode = op_data->op_attr.ia_mode; + rec->sa_uid = op_data->op_attr.ia_uid; + rec->sa_gid = op_data->op_attr.ia_gid; + rec->sa_size = op_data->op_attr.ia_size; + rec->sa_blocks = op_data->op_attr_blocks; + rec->sa_atime = LTIME_S(op_data->op_attr.ia_atime); + rec->sa_mtime = LTIME_S(op_data->op_attr.ia_mtime); + rec->sa_ctime = LTIME_S(op_data->op_attr.ia_ctime); + rec->sa_attr_flags = ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags; + if ((op_data->op_attr.ia_valid & ATTR_GID) && + in_group_p(op_data->op_attr.ia_gid)) + rec->sa_suppgid = op_data->op_attr.ia_gid; + else + rec->sa_suppgid = op_data->op_suppgids[0]; +} + +static void mdc_epoch_pack(struct mdt_epoch *epoch, struct md_op_data *op_data) +{ + memcpy(&epoch->handle, &op_data->op_handle, sizeof(epoch->handle)); + epoch->ioepoch = op_data->op_ioepoch; + epoch->flags = op_data->op_flags; +} + +void mdc_setattr_pack(struct ptlrpc_request *req, int offset, + struct md_op_data *op_data, void *ea, + int ealen, void *ea2, int ea2len) +{ + struct mdt_rec_setattr *rec; + struct mdt_epoch *epoch; + + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); + mdc_setattr_pack_rec(rec, op_data); + + mdc_pack_capa(req, offset + 1, op_data->op_capa1); + + if (op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) { + epoch = lustre_msg_buf(req->rq_reqmsg, offset + 2, + sizeof(*epoch)); + mdc_epoch_pack(epoch, op_data); } if (ealen == 0) return; - memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 1, ealen), ea, ealen); + memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ealen), ea, ealen); if (ea2len == 0) return; - memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ea2len), ea2, ea2len); + memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 4, ea2len), ea2, ea2len); } void mdc_unlink_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data) + struct md_op_data *op_data) { - struct mds_rec_unlink *rec; + struct mdt_rec_unlink *rec; char *tmp; rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); LASSERT (rec != NULL); rec->ul_opcode = REINT_UNLINK; - rec->ul_fsuid = current->fsuid; - rec->ul_fsgid = current->fsgid; - rec->ul_cap = current->cap_effective; - rec->ul_mode = data->create_mode; - rec->ul_suppgid = data->suppgids[0]; - rec->ul_fid1 = data->fid1; - rec->ul_fid2 = data->fid2; - rec->ul_time = data->mod_time; - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1); - LASSERT (tmp != NULL); - LOGL0(data->name, data->namelen, tmp); + rec->ul_fsuid = op_data->op_fsuid;//current->fsuid; + rec->ul_fsgid = op_data->op_fsgid;//current->fsgid; + rec->ul_cap = op_data->op_cap;//current->cap_effective; + rec->ul_mode = op_data->op_mode; + rec->ul_suppgid = op_data->op_suppgids[0]; + rec->ul_fid1 = op_data->op_fid1; + rec->ul_fid2 = op_data->op_fid2; + rec->ul_time = op_data->op_mod_time; + rec->ul_bias = op_data->op_bias; + + mdc_pack_capa(req, offset + 1, op_data->op_capa1); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->op_namelen + 1); + LASSERT(tmp != NULL); + LOGL0(op_data->op_name, op_data->op_namelen, tmp); } void mdc_link_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data) + struct md_op_data *op_data) { - struct mds_rec_link *rec; + struct mdt_rec_link *rec; char *tmp; rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); rec->lk_opcode = REINT_LINK; - rec->lk_fsuid = current->fsuid; - rec->lk_fsgid = current->fsgid; - rec->lk_cap = current->cap_effective; - rec->lk_suppgid1 = data->suppgids[0]; - rec->lk_suppgid2 = data->suppgids[1]; - rec->lk_fid1 = data->fid1; - rec->lk_fid2 = data->fid2; - rec->lk_time = data->mod_time; - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1); - LOGL0(data->name, data->namelen, tmp); + rec->lk_fsuid = op_data->op_fsuid;//current->fsuid; + rec->lk_fsgid = op_data->op_fsgid;//current->fsgid; + rec->lk_cap = op_data->op_cap;//current->cap_effective; + rec->lk_suppgid1 = op_data->op_suppgids[0]; + rec->lk_suppgid2 = op_data->op_suppgids[1]; + rec->lk_fid1 = op_data->op_fid1; + rec->lk_fid2 = op_data->op_fid2; + rec->lk_time = op_data->op_mod_time; + rec->lk_bias = op_data->op_bias; + + mdc_pack_capa(req, offset + 1, op_data->op_capa1); + mdc_pack_capa(req, offset + 2, op_data->op_capa2); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, op_data->op_namelen + 1); + LOGL0(op_data->op_name, op_data->op_namelen, tmp); } void mdc_rename_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data, + struct md_op_data *op_data, const char *old, int oldlen, const char *new, int newlen) { - struct mds_rec_rename *rec; + struct mdt_rec_rename *rec; char *tmp; rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); /* XXX do something about time, uid, gid */ rec->rn_opcode = REINT_RENAME; - rec->rn_fsuid = current->fsuid; - rec->rn_fsgid = current->fsgid; - rec->rn_cap = current->cap_effective; - rec->rn_suppgid1 = data->suppgids[0]; - rec->rn_suppgid2 = data->suppgids[1]; - rec->rn_fid1 = data->fid1; - rec->rn_fid2 = data->fid2; - rec->rn_time = data->mod_time; - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1); + rec->rn_fsuid = op_data->op_fsuid;//current->fsuid; + rec->rn_fsgid = op_data->op_fsgid;//current->fsgid; + rec->rn_cap = op_data->op_cap;//current->cap_effective; + rec->rn_suppgid1 = op_data->op_suppgids[0]; + rec->rn_suppgid2 = op_data->op_suppgids[1]; + rec->rn_fid1 = op_data->op_fid1; + rec->rn_fid2 = op_data->op_fid2; + rec->rn_time = op_data->op_mod_time; + rec->rn_mode = op_data->op_mode; + rec->rn_bias = op_data->op_bias; + + mdc_pack_capa(req, offset + 1, op_data->op_capa1); + mdc_pack_capa(req, offset + 2, op_data->op_capa2); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, oldlen + 1); LOGL0(old, oldlen, tmp); if (new) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1); + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, newlen + 1); LOGL0(new, newlen, tmp); } } -void mdc_getattr_pack(struct ptlrpc_request *req, int offset, int valid, - int flags, struct mdc_op_data *data) +void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid, + int flags, struct md_op_data *op_data) { - struct mds_body *b; - b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + struct mdt_body *b; + b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*b)); b->fsuid = current->fsuid; b->fsgid = current->fsgid; b->capability = current->cap_effective; b->valid = valid; + if (op_data->op_bias & MDS_CHECK_SPLIT) + b->valid |= OBD_MD_FLCKSPLIT; + if (op_data->op_bias & MDS_CROSS_REF) + b->valid |= OBD_MD_FLCROSSREF; b->flags = flags | MDS_BFLAG_EXT_FLAGS; - b->suppgid = data->suppgids[0]; + b->suppgid = op_data->op_suppgids[0]; - b->fid1 = data->fid1; - b->fid2 = data->fid2; - if (data->name) { + b->fid1 = op_data->op_fid1; + b->fid2 = op_data->op_fid2; + + mdc_pack_capa(req, offset + 1, op_data->op_capa1); + + if (op_data->op_name) { char *tmp; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, - data->namelen + 1); - LOGL0(data->name, data->namelen, tmp); + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, + op_data->op_namelen + 1); + LOGL0(op_data->op_name, op_data->op_namelen, tmp); } } -void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa, - int valid, struct obd_client_handle *och) +void mdc_close_pack(struct ptlrpc_request *req, int offset, + struct md_op_data *op_data) { - struct mds_body *body; + struct mdt_epoch *epoch; + struct mdt_rec_setattr *rec; - body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); + epoch = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*epoch)); + rec = lustre_msg_buf(req->rq_reqmsg, offset + 1, sizeof(*rec)); - mdc_pack_fid(&body->fid1, oa->o_id, 0, oa->o_mode); - memcpy(&body->handle, &och->och_fh, sizeof(body->handle)); - if (oa->o_valid & OBD_MD_FLATIME) { - body->atime = oa->o_atime; - body->valid |= OBD_MD_FLATIME; - } - if (oa->o_valid & OBD_MD_FLMTIME) { - body->mtime = oa->o_mtime; - body->valid |= OBD_MD_FLMTIME; - } - if (oa->o_valid & OBD_MD_FLCTIME) { - body->ctime = oa->o_ctime; - body->valid |= OBD_MD_FLCTIME; - } - if (oa->o_valid & OBD_MD_FLSIZE) { - body->size = oa->o_size; - body->valid |= OBD_MD_FLSIZE; - } - if (oa->o_valid & OBD_MD_FLBLOCKS) { - body->blocks = oa->o_blocks; - body->valid |= OBD_MD_FLBLOCKS; - } - if (oa->o_valid & OBD_MD_FLFLAGS) { - body->flags = oa->o_flags; - body->valid |= OBD_MD_FLFLAGS; - } + mdc_setattr_pack_rec(rec, op_data); + mdc_pack_capa(req, offset + 2, op_data->op_capa1); + mdc_epoch_pack(epoch, op_data); } -struct mdc_cache_waiter { - struct list_head mcw_entry; - wait_queue_head_t mcw_waitq; -}; - static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) { int rc; diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 618f430..797485f 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -36,8 +36,11 @@ # include <liblustre.h> #endif +#include <linux/lustre_acl.h> #include <obd_class.h> #include <lustre_dlm.h> +/* fid_res_name_eq() */ +#include <lustre_fid.h> #include <lprocfs_status.h> #include "mdc_internal.h" @@ -61,11 +64,13 @@ EXPORT_SYMBOL(it_clear_disposition); static int it_to_lock_mode(struct lookup_intent *it) { + ENTRY; + /* CREAT needs to be tested before open (both could be set) */ if (it->it_op & IT_CREAT) - return LCK_CW; + return LCK_PW; else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP)) - return LCK_CR; + return LCK_PR; LBUG(); RETURN(-EINVAL); @@ -108,18 +113,17 @@ int it_open_error(int phase, struct lookup_intent *it) EXPORT_SYMBOL(it_open_error); /* this must be called on a lockh that is known to have a referenced lock */ -void mdc_set_lock_data(__u64 *l, void *data) +int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data) { struct ldlm_lock *lock; - struct lustre_handle *lockh = (struct lustre_handle *)l; ENTRY; - if (!*l) { + if (!*lockh) { EXIT; - return; + RETURN(0); } - lock = ldlm_handle2lock(lockh); + lock = ldlm_handle2lock((struct lustre_handle *)lockh); LASSERT(lock != NULL); lock_res_and_lock(lock); @@ -139,21 +143,59 @@ void mdc_set_lock_data(__u64 *l, void *data) unlock_res_and_lock(lock); LDLM_LOCK_PUT(lock); - EXIT; + RETURN(0); +} + +int mdc_lock_match(struct obd_export *exp, int flags, + const struct lu_fid *fid, ldlm_type_t type, + ldlm_policy_data_t *policy, ldlm_mode_t mode, + struct lustre_handle *lockh) +{ + struct ldlm_res_id res_id = + { .name = {fid_seq(fid), + fid_oid(fid), + fid_ver(fid)} }; + struct obd_device *obd = class_exp2obd(exp); + int rc; + ENTRY; + + rc = ldlm_lock_match(obd->obd_namespace, flags, + &res_id, type, policy, mode, lockh); + + RETURN(rc); +} + +int mdc_cancel_unused(struct obd_export *exp, + const struct lu_fid *fid, + int flags, void *opaque) +{ + struct ldlm_res_id res_id = + { .name = {fid_seq(fid), + fid_oid(fid), + fid_ver(fid)} }; + struct obd_device *obd = class_exp2obd(exp); + int rc; + + ENTRY; + + rc = ldlm_cli_cancel_unused(obd->obd_namespace, &res_id, + flags, opaque); + RETURN(rc); } -EXPORT_SYMBOL(mdc_set_lock_data); -int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, +int mdc_change_cbdata(struct obd_export *exp, + const struct lu_fid *fid, ldlm_iterator_t it, void *data) { struct ldlm_res_id res_id = { .name = {0} }; ENTRY; - res_id.name[0] = fid->id; - res_id.name[1] = fid->generation; + res_id.name[0] = fid_seq(fid); + res_id.name[1] = fid_oid(fid); + res_id.name[2] = fid_ver(fid); - ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id, - it, data); + ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, + &res_id, it, data); EXIT; return 0; @@ -173,16 +215,6 @@ static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc) } } -static int round_up(int val) -{ - int ret = 1; - while (val) { - val >>= 1; - ret <<= 1; - } - return ret; -} - /* Save a large LOV EA into the request buffer so that it is available * for replay. We don't do this in the initial request because the * original request doesn't need this buffer (at most it sends just the @@ -195,36 +227,20 @@ static int round_up(int val) * but this is incredibly unlikely, and questionable whether the client * could do MDS recovery under OOM anyways... */ static void mdc_realloc_openmsg(struct ptlrpc_request *req, - struct mds_body *body, int size[6]) + struct mdt_body *body, int size[9]) { - int new_size, old_size; - struct lustre_msg *new_msg; - - /* save old size */ - old_size = lustre_msg_size(lustre_request_magic(req), 6, size); - - size[DLM_INTENT_REC_OFF + 2] = body->eadatasize; - new_size = lustre_msg_size(lustre_request_magic(req), 6, size); - OBD_ALLOC(new_msg, new_size); - if (new_msg != NULL) { - struct lustre_msg *old_msg = req->rq_reqmsg; - - DEBUG_REQ(D_INFO, req, "replace reqmsg for larger EA %u\n", - body->eadatasize); - memcpy(new_msg, old_msg, old_size); - lustre_msg_set_buflen(new_msg, DLM_INTENT_REC_OFF + 2, - body->eadatasize); - - spin_lock(&req->rq_lock); - req->rq_reqmsg = new_msg; - req->rq_reqlen = new_size; - spin_unlock(&req->rq_lock); + int rc; + ENTRY; - OBD_FREE(old_msg, old_size); - } else { + rc = sptlrpc_cli_enlarge_reqbuf(req, DLM_INTENT_REC_OFF + 4, + body->eadatasize); + if (rc) { + CERROR("Can't enlarge segment %d size to %d\n", + DLM_INTENT_REC_OFF + 4, body->eadatasize); body->valid &= ~OBD_MD_FLEASIZE; body->eadatasize = 0; } + EXIT; } /* We always reserve enough space in the reply packet for a stripe MD, because @@ -233,7 +249,7 @@ int mdc_enqueue(struct obd_export *exp, int lock_type, struct lookup_intent *it, int lock_mode, - struct mdc_op_data *data, + struct md_op_data *op_data, struct lustre_handle *lockh, void *lmm, int lmmsize, @@ -244,22 +260,23 @@ int mdc_enqueue(struct obd_export *exp, struct ptlrpc_request *req; struct obd_device *obddev = class_exp2obd(exp); struct ldlm_res_id res_id = - { .name = {data->fid1.id, data->fid1.generation} }; + { .name = {fid_seq(&op_data->op_fid1), + fid_oid(&op_data->op_fid1), + fid_ver(&op_data->op_fid1)} }; ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_LOOKUP } }; struct ldlm_request *lockreq; struct ldlm_intent *lit; struct ldlm_reply *lockrep; - int size[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), + int size[9] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREQ_OFF] = sizeof(*lockreq), [DLM_INTENT_IT_OFF] = sizeof(*lit) }; - int repsize[5] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), + int repsize[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREPLY_OFF] = sizeof(*lockrep), - [DLM_REPLY_REC_OFF] = sizeof(struct mds_body), + [DLM_REPLY_REC_OFF] = sizeof(struct mdt_body), [DLM_REPLY_REC_OFF+1] = obddev->u.cli. cl_max_mds_easize }; int flags = extra_lock_flags | LDLM_FL_HAS_INTENT; int repbufcnt = 4, rc; - void *eadata; ENTRY; LASSERTF(lock_type == LDLM_IBITS, "lock type %d\n", lock_type); @@ -267,51 +284,46 @@ int mdc_enqueue(struct obd_export *exp, // ldlm_it2str(it->it_op), it_name, it_inode->i_ino); if (it->it_op & IT_OPEN) { - it->it_create_mode |= S_IFREG; + int do_join = !!(it->it_flags & O_JOIN_FILE); - size[DLM_INTENT_REC_OFF] = sizeof(struct mds_rec_create); - size[DLM_INTENT_REC_OFF + 1] = data->namelen + 1; + it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG; + + size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_create); + /* parent capability */ + size[DLM_INTENT_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + /* child capability, used for replay only */ + size[DLM_INTENT_REC_OFF + 2] = sizeof(struct lustre_capa); + size[DLM_INTENT_REC_OFF + 3] = op_data->op_namelen + 1; /* As an optimization, we allocate an RPC request buffer for * at least a default-sized LOV EA even if we aren't sending - * one. We grow the whole request to the next power-of-two - * size since we get that much from a slab allocation anyways. - * This avoids an allocation below in the common case where - * we need to save a default-sized LOV EA for open replay. */ - size[DLM_INTENT_REC_OFF + 2] = max(lmmsize, - obddev->u.cli.cl_default_mds_easize); - rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic, 6, - size); - if (rc & (rc - 1)) - size[DLM_INTENT_REC_OFF + 2] = - min(size[DLM_INTENT_REC_OFF+2]+round_up(rc)-rc, - obddev->u.cli.cl_max_mds_easize); - - if (it->it_flags & O_JOIN_FILE) { + * one. + */ + size[DLM_INTENT_REC_OFF + 4] = max(lmmsize, + obddev->u.cli.cl_default_mds_easize); + if (do_join) + size[DLM_INTENT_REC_OFF + 5] = + sizeof(struct mdt_rec_join); + + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, + LDLM_ENQUEUE, 8 + do_join, size, NULL); + if (!req) + RETURN(-ENOMEM); + + if (do_join) { __u64 head_size = *(__u32*)cb_data; __u32 tsize = *(__u32*)lmm; /* join is like an unlink of the tail */ policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; - size[DLM_INTENT_REC_OFF + 3] = - sizeof(struct mds_rec_join); - req = ptlrpc_prep_req(class_exp2cliimp(exp), - LUSTRE_DLM_VERSION, LDLM_ENQUEUE, - 7, size, NULL); /* when joining file, cb_data and lmm args together * indicate the head file size*/ - mdc_join_pack(req, DLM_INTENT_REC_OFF + 3, data, + mdc_join_pack(req, DLM_INTENT_REC_OFF + 5, op_data, (head_size << 32) | tsize); cb_data = NULL; lmm = NULL; - } else { - req = ptlrpc_prep_req(class_exp2cliimp(exp), - LUSTRE_DLM_VERSION, LDLM_ENQUEUE, - 6, size, NULL); } - if (!req) - RETURN(-ENOMEM); - spin_lock(&req->rq_lock); req->rq_replay = 1; spin_unlock(&req->rq_lock); @@ -322,16 +334,24 @@ int mdc_enqueue(struct obd_export *exp, lit->opc = (__u64)it->it_op; /* pack the intended request */ - mdc_open_pack(req, DLM_INTENT_REC_OFF, data, it->it_create_mode, - 0, it->it_flags, lmm, lmmsize); - - repsize[repbufcnt++] = LUSTRE_POSIX_ACL_MAX_SIZE; + mdc_open_pack(req, DLM_INTENT_REC_OFF, op_data, + it->it_create_mode, 0, it->it_flags, + lmm, lmmsize); + + /* for remote client, fetch remote perm for current user */ + repsize[repbufcnt++] = client_is_remote(exp) ? + sizeof(struct mdt_remote_perm) : + LUSTRE_POSIX_ACL_MAX_SIZE; + repsize[repbufcnt++] = sizeof(struct lustre_capa); + repsize[repbufcnt++] = sizeof(struct lustre_capa); } else if (it->it_op & IT_UNLINK) { - size[DLM_INTENT_REC_OFF] = sizeof(struct mds_rec_unlink); - size[DLM_INTENT_REC_OFF + 1] = data->namelen + 1; + size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_unlink); + size[DLM_INTENT_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + size[DLM_INTENT_REC_OFF + 2] = op_data->op_namelen + 1; policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, - LDLM_ENQUEUE, 5, size, NULL); + LDLM_ENQUEUE, 6, size, NULL); if (!req) RETURN(-ENOMEM); @@ -341,21 +361,25 @@ int mdc_enqueue(struct obd_export *exp, lit->opc = (__u64)it->it_op; /* pack the intended request */ - mdc_unlink_pack(req, DLM_INTENT_REC_OFF, data); + mdc_unlink_pack(req, DLM_INTENT_REC_OFF, op_data); repsize[repbufcnt++] = obddev->u.cli.cl_max_mds_cookiesize; } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { obd_valid valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE | - OBD_MD_FLACL | OBD_MD_FLMODEASIZE | - OBD_MD_FLDIREA; - size[DLM_INTENT_REC_OFF] = sizeof(struct mds_body); - size[DLM_INTENT_REC_OFF + 1] = data->namelen + 1; + OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA | + OBD_MD_FLMDSCAPA | OBD_MD_MEA; + valid |= client_is_remote(exp) ? OBD_MD_FLRMTPERM : + OBD_MD_FLACL; + size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_body); + size[DLM_INTENT_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + size[DLM_INTENT_REC_OFF + 2] = op_data->op_namelen + 1; if (it->it_op & IT_GETATTR) policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, - LDLM_ENQUEUE, 5, size, NULL); + LDLM_ENQUEUE, 6, size, NULL); if (!req) RETURN(-ENOMEM); @@ -366,9 +390,12 @@ int mdc_enqueue(struct obd_export *exp, /* pack the intended request */ mdc_getattr_pack(req, DLM_INTENT_REC_OFF, valid, - it->it_flags, data); + it->it_flags, op_data); - repsize[repbufcnt++] = LUSTRE_POSIX_ACL_MAX_SIZE; + repsize[repbufcnt++] = client_is_remote(exp) ? + sizeof(struct mdt_remote_perm) : + LUSTRE_POSIX_ACL_MAX_SIZE; + repsize[repbufcnt++] = sizeof(struct lustre_capa); } else if (it->it_op == IT_READDIR) { policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, @@ -390,7 +417,7 @@ int mdc_enqueue(struct obd_export *exp, * rpcs in flight counter */ mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it); mdc_enter_request(&obddev->u.cli); - rc = ldlm_cli_enqueue(exp, &req, res_id, lock_type, &policy, + rc = ldlm_cli_enqueue(exp, &req, &res_id, lock_type, &policy, lock_mode, &flags, cb_blocking, cb_completion, NULL, cb_data, NULL, 0, NULL, lockh, 0); mdc_exit_request(&obddev->u.cli); @@ -458,74 +485,127 @@ int mdc_enqueue(struct obd_export *exp, it->it_op,it->d.lustre.it_disposition,it->d.lustre.it_status); /* We know what to expect, so we do any byte flipping required here */ - LASSERT(repbufcnt == 5 || repbufcnt == 2); - if (repbufcnt == 5) { - struct mds_body *body; + LASSERT(repbufcnt == 7 || repbufcnt == 6 || repbufcnt == 2); + if (repbufcnt >= 6) { + int reply_off = DLM_REPLY_REC_OFF; + struct mdt_body *body; - body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body), - lustre_swab_mds_body); + body = lustre_swab_repbuf(req, reply_off++, sizeof(*body), + lustre_swab_mdt_body); if (body == NULL) { - CERROR ("Can't swab mds_body\n"); + CERROR ("Can't swab mdt_body\n"); RETURN (-EPROTO); } - /* If this is a successful OPEN request, we need to set - replay handler and data early, so that if replay happens - immediately after swabbing below, new reply is swabbed - by that handler correctly */ - if (it_disposition(it, DISP_OPEN_OPEN) && - !it_open_error(DISP_OPEN_OPEN, it)) - mdc_set_open_replay_data(NULL, req); - - if ((body->valid & OBD_MD_FLEASIZE) != 0) { - /* The eadata is opaque; just check that it is there. - * Eventually, obd_unpackmd() will check the contents */ - eadata = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF + 1, + if (req->rq_replay && it_disposition(it, DISP_OPEN_OPEN) && + !it_open_error(DISP_OPEN_OPEN, it)) { + /* + * If this is a successful OPEN request, we need to set + * replay handler and data early, so that if replay + * happens immediately after swabbing below, new reply + * is swabbed by that handler correctly. + */ + mdc_set_open_replay_data(NULL, NULL, req); + } + + if ((body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) { + void *eadata; + + /* + * The eadata is opaque; just check that it is there. + * Eventually, obd_unpackmd() will check the contents. + */ + eadata = lustre_swab_repbuf(req, reply_off++, body->eadatasize, NULL); if (eadata == NULL) { - CERROR ("Missing/short eadata\n"); - RETURN (-EPROTO); + CERROR("Missing/short eadata\n"); + RETURN(-EPROTO); } if (body->valid & OBD_MD_FLMODEASIZE) { - if (obddev->u.cli.cl_max_mds_easize < - body->max_mdsize) { - obddev->u.cli.cl_max_mds_easize = + if (obddev->u.cli.cl_max_mds_easize < + body->max_mdsize) { + obddev->u.cli.cl_max_mds_easize = body->max_mdsize; CDEBUG(D_INFO, "maxeasize become %d\n", body->max_mdsize); } if (obddev->u.cli.cl_max_mds_cookiesize < - body->max_cookiesize) { + body->max_cookiesize) { obddev->u.cli.cl_max_mds_cookiesize = body->max_cookiesize; CDEBUG(D_INFO, "cookiesize become %d\n", body->max_cookiesize); } } - /* We save the reply LOV EA in case we have to replay - * a create for recovery. If we didn't allocate a - * large enough request buffer above we need to - * reallocate it here to hold the actual LOV EA. */ - if (it->it_op & IT_OPEN) { - int offset = DLM_INTENT_REC_OFF + 2; - if (lustre_msg_buflen(req->rq_reqmsg, offset) < + /* + * We save the reply LOV EA in case we have to replay a + * create for recovery. If we didn't allocate a large + * enough request buffer above we need to reallocate it + * here to hold the actual LOV EA. + * + * To not save LOV EA if request is not going to replay + * (for example error one). + */ + if ((it->it_op & IT_OPEN) && req->rq_replay) { + if (lustre_msg_buflen(req->rq_reqmsg, + DLM_INTENT_REC_OFF + 4) < body->eadatasize) mdc_realloc_openmsg(req, body, size); - lmm = lustre_msg_buf(req->rq_reqmsg, offset, + lmm = lustre_msg_buf(req->rq_reqmsg, + DLM_INTENT_REC_OFF + 4, body->eadatasize); if (lmm) memcpy(lmm, eadata, body->eadatasize); } } + if (body->valid & OBD_MD_FLRMTPERM) { + struct mdt_remote_perm *perm; + + LASSERT(client_is_remote(exp)); + perm = lustre_swab_repbuf(req, reply_off++, + sizeof(*perm), + lustre_swab_mdt_remote_perm); + if (perm == NULL) { + CERROR("missing remote permission!\n"); + RETURN(-EPROTO); + } + } else if ((body->valid & OBD_MD_FLACL) && body->aclsize) { + reply_off++; + } + if (body->valid & OBD_MD_FLMDSCAPA) { + struct lustre_capa *capa, *p; + + capa = lustre_unpack_capa(req->rq_repmsg, reply_off++); + if (capa == NULL) { + CERROR("Missing/short MDS capability\n"); + RETURN(-EPROTO); + } + + if (it->it_op & IT_OPEN) { + /* client fid capa will be checked in replay */ + p = lustre_msg_buf(req->rq_reqmsg, + DLM_INTENT_REC_OFF + 2, + sizeof(*p)); + LASSERT(p); + *p = *capa; + } + } + if (body->valid & OBD_MD_FLOSSCAPA) { + struct lustre_capa *capa; + + capa = lustre_unpack_capa(req->rq_repmsg, reply_off++); + if (capa == NULL) { + CERROR("Missing/short OSS capability\n"); + RETURN(-EPROTO); + } + } } RETURN(rc); } -EXPORT_SYMBOL(mdc_enqueue); - -/* +/* * This long block is all about fixing up the lock and request state * so that it is correct as of the moment _before_ the operation was * applied; that way, the VFS will think that everything is normal and @@ -552,57 +632,72 @@ EXPORT_SYMBOL(mdc_enqueue); * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the * child lookup. */ -int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, +int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, void *lmm, int lmmsize, struct lookup_intent *it, int lookup_flags, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, int extra_lock_flags) + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) { - struct lustre_handle lockh; struct ptlrpc_request *request; - int rc = 0; - struct mds_body *mds_body; struct lustre_handle old_lock; + struct lustre_handle lockh; + struct mdt_body *mdt_body; struct ldlm_lock *lock; + int rc = 0; ENTRY; LASSERT(it); - CDEBUG(D_DLMTRACE,"name: %.*s in inode "LPU64", intent: %s flags %#o\n", - op_data->namelen, op_data->name, op_data->fid1.id, - ldlm_it2str(it->it_op), it->it_flags); + CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID + ", intent: %s flags %#o\n", op_data->op_namelen, + op_data->op_name, PFID(&op_data->op_fid2), + PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), + it->it_flags); - if (op_data->fid2.id && - (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)) { + if (fid_is_sane(&op_data->op_fid2) && + (it->it_op & (IT_LOOKUP | IT_GETATTR))) { /* We could just return 1 immediately, but since we should only * be called in revalidate_it if we already have a lock, let's * verify that. */ - struct ldlm_res_id res_id = {.name ={op_data->fid2.id, - op_data->fid2.generation}}; - struct lustre_handle lockh; + struct ldlm_res_id res_id = { .name = { fid_seq(&op_data->op_fid2), + fid_oid(&op_data->op_fid2), + fid_ver(&op_data->op_fid2) } }; ldlm_policy_data_t policy; - int mode = LCK_CR; + ldlm_mode_t mode = LCK_CR; - /* As not all attributes are kept under update lock, e.g. - owner/group/acls are under lookup lock, we need both + /* As not all attributes are kept under update lock, e.g. + owner/group/acls are under lookup lock, we need both ibits for GETATTR. */ + + /* For CMD, UPDATE lock and LOOKUP lock can not be got + * at the same for cross-object, so we can not match + * the 2 lock at the same time FIXME: but how to handle + * the above situation */ policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ? - MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP : - MDS_INODELOCK_LOOKUP; + MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP; rc = ldlm_lock_match(exp->exp_obd->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, - LDLM_IBITS, &policy, LCK_CR, &lockh); + LDLM_IBITS, &policy, mode, &lockh); if (!rc) { mode = LCK_CW; rc = ldlm_lock_match(exp->exp_obd->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, - LDLM_IBITS, &policy,LCK_CW,&lockh); + LDLM_IBITS, &policy, mode, &lockh); } if (!rc) { mode = LCK_PR; rc = ldlm_lock_match(exp->exp_obd->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, - LDLM_IBITS, &policy,LCK_PR,&lockh); + LDLM_IBITS, &policy, mode, &lockh); + } + + if (!rc) { + mode = LCK_PW; + rc = ldlm_lock_match(exp->exp_obd->obd_namespace, + LDLM_FL_BLOCK_GRANTED, &res_id, + LDLM_IBITS, &policy, mode, &lockh); } + if (rc) { memcpy(&it->d.lustre.it_lock_handle, &lockh, sizeof(lockh)); @@ -611,7 +706,7 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, /* Only return failure if it was not GETATTR by cfid (from inode_revalidate) */ - if (rc || op_data->namelen != 0) + if (rc || op_data->op_namelen != 0) RETURN(rc); } @@ -624,7 +719,15 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, * this and use the request from revalidate. In this case, revalidate * never dropped its reference, so the refcounts are all OK */ if (!it_disposition(it, DISP_ENQ_COMPLETE)) { - + /* For case if upper layer did not alloc fid, do it now. */ + if (!fid_is_sane(&op_data->op_fid2) && it->it_op & IT_CREAT) { + rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data); + if (rc < 0) { + CERROR("Can't alloc new fid, rc %d\n", rc); + RETURN(rc); + } + } + rc = mdc_enqueue(exp, LDLM_IBITS, it, it_to_lock_mode(it), op_data, &lockh, lmm, lmmsize, ldlm_completion_ast, cb_blocking, NULL, @@ -632,7 +735,8 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, if (rc < 0) RETURN(rc); memcpy(&it->d.lustre.it_lock_handle, &lockh, sizeof(lockh)); - } else if (!op_data->fid2.id) { + } else if (!fid_is_sane(&op_data->op_fid2) || + !(it->it_flags & O_CHECK_STALE)) { /* DISP_ENQ_COMPLETE set means there is extra reference on * request referenced from this intent, saved for subsequent * lookup. This path is executed when we proceed to this @@ -654,19 +758,20 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, if (rc) RETURN(rc); - mds_body = lustre_msg_buf(request->rq_repmsg, DLM_REPLY_REC_OFF, - sizeof(*mds_body)); - LASSERT(mds_body != NULL); /* mdc_enqueue checked */ + mdt_body = lustre_msg_buf(request->rq_repmsg, DLM_REPLY_REC_OFF, + sizeof(*mdt_body)); + LASSERT(mdt_body != NULL); /* mdc_enqueue checked */ LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */ /* If we were revalidating a fid/name pair, mark the intent in * case we fail and get called again from lookup */ - if (op_data->fid2.id && (it->it_op != IT_GETATTR)) { + if (fid_is_sane(&op_data->op_fid2) && (it->it_flags & O_CHECK_STALE) && + (it->it_op != IT_GETATTR)) { it_set_disposition(it, DISP_ENQ_COMPLETE); + /* Also: did we find the same inode? */ - if (memcmp(&op_data->fid2, &mds_body->fid1, - sizeof(op_data->fid2))) - RETURN (-ESTALE); + if (!lu_fid_eq(&op_data->op_fid2, &mdt_body->fid1)) + RETURN(-ESTALE); } rc = it_open_error(DISP_LOOKUP_EXECD, it); @@ -708,7 +813,18 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, if (lock) { ldlm_policy_data_t policy = lock->l_policy_data; LDLM_DEBUG(lock, "matching against this"); + + LASSERTF(fid_res_name_eq(&mdt_body->fid1, + &lock->l_resource->lr_name), + "Lock res_id: %lu/%lu/%lu, fid: %lu/%lu/%lu.\n", + (unsigned long)lock->l_resource->lr_name.name[0], + (unsigned long)lock->l_resource->lr_name.name[1], + (unsigned long)lock->l_resource->lr_name.name[2], + (unsigned long)fid_seq(&mdt_body->fid1), + (unsigned long)fid_oid(&mdt_body->fid1), + (unsigned long)fid_ver(&mdt_body->fid1)); LDLM_LOCK_PUT(lock); + memcpy(&old_lock, &lockh, sizeof(lockh)); if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL, LDLM_IBITS, &policy, LCK_NL, &old_lock)) { @@ -720,9 +836,8 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, } } CDEBUG(D_DENTRY,"D_IT dentry %.*s intent: %s status %d disp %x rc %d\n", - op_data->namelen, op_data->name, ldlm_it2str(it->it_op), + op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op), it->d.lustre.it_status, it->d.lustre.it_disposition, rc); RETURN(rc); } -EXPORT_SYMBOL(mdc_intent_lock); diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index e00a369..63bbb0c 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -54,9 +54,9 @@ static int mdc_reint(struct ptlrpc_request *request, if (rc) CDEBUG(D_INFO, "error in handling %d\n", rc); else if (!lustre_swab_repbuf(request, REPLY_REC_OFF, - sizeof(struct mds_body), - lustre_swab_mds_body)) { - CERROR ("Can't unpack mds_body\n"); + sizeof(struct mdt_body), + lustre_swab_mdt_body)) { + CERROR ("Can't unpack mdt_body\n"); rc = -EPROTO; } return rc; @@ -68,20 +68,26 @@ static int mdc_reint(struct ptlrpc_request *request, * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a * magic open-path setattr that should take the setattr semaphore and * go to the setattr portal. */ -int mdc_setattr(struct obd_export *exp, struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len, +int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, + void *ea, int ealen, void *ea2, int ea2len, struct ptlrpc_request **request) { struct ptlrpc_request *req; - struct mds_rec_setattr *rec; + struct mdt_rec_setattr *rec; struct mdc_rpc_lock *rpc_lock; struct obd_device *obd = exp->exp_obd; - int size[4] = { sizeof(struct ptlrpc_body), - sizeof(*rec), ealen, ea2len }; - int bufcount = 2, rc; + int size[6] = { sizeof(struct ptlrpc_body), + sizeof(*rec), 0, 0, ealen, ea2len }; + int bufcount = 4, rc; ENTRY; - LASSERT(iattr != NULL); + LASSERT(op_data != NULL); + + size[REQ_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + + if (op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) + size[REQ_REC_OFF + 2] = sizeof(struct mdt_epoch); if (ealen > 0) { bufcount++; @@ -94,20 +100,22 @@ int mdc_setattr(struct obd_export *exp, struct mdc_op_data *data, if (req == NULL) RETURN(-ENOMEM); - if (iattr->ia_valid & ATTR_FROM_OPEN) { + if (op_data->op_attr.ia_valid & ATTR_FROM_OPEN) { req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249 rpc_lock = obd->u.cli.cl_setattr_lock; } else { rpc_lock = obd->u.cli.cl_rpc_lock; } - if (iattr->ia_valid & (ATTR_MTIME | ATTR_CTIME)) + if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME)) CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n", - LTIME_S(iattr->ia_mtime), LTIME_S(iattr->ia_ctime)); - mdc_setattr_pack(req, REQ_REC_OFF, data, iattr, ea, ealen, ea2, ea2len); + LTIME_S(op_data->op_attr.ia_mtime), + LTIME_S(op_data->op_attr.ia_ctime)); + mdc_setattr_pack(req, REQ_REC_OFF, op_data, ea, ealen, ea2, ea2len); - size[REPLY_REC_OFF] = sizeof(struct mds_body); - ptlrpc_req_set_repsize(req, 2, size); + size[REPLY_REC_OFF] = sizeof(struct mdt_body); + size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa); + ptlrpc_req_set_repsize(req, 3, size); rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL); *request = req; @@ -117,18 +125,34 @@ int mdc_setattr(struct obd_export *exp, struct mdc_op_data *data, RETURN(rc); } -int mdc_create(struct obd_export *exp, struct mdc_op_data *op_data, +int mdc_create(struct obd_export *exp, struct md_op_data *op_data, const void *data, int datalen, int mode, __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request) { + int size[5] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_rec_create), + 0, op_data->op_namelen + 1 }; struct obd_device *obd = exp->exp_obd; + int level, bufcount = 4, rc; struct ptlrpc_request *req; - int level, bufcount = 3, rc; - int size[4] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_rec_create), - op_data->namelen + 1 }; ENTRY; + /* For case if upper layer did not alloc fid, do it now. */ + if (!fid_is_sane(&op_data->op_fid2)) { + /* + * mdc_fid_alloc() may return errno 1 in case of switch to new + * sequence, handle this. + */ + rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data); + if (rc < 0) { + CERROR("Can't alloc new fid, rc %d\n", rc); + RETURN(rc); + } + } + + size[REQ_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + if (data && datalen) { size[bufcount] = datalen; bufcount++; @@ -139,54 +163,74 @@ int mdc_create(struct obd_export *exp, struct mdc_op_data *op_data, if (req == NULL) RETURN(-ENOMEM); - /* mdc_create_pack fills msg->bufs[1] with name - * and msg->bufs[2] with tgt, for symlinks or lov MD data */ + /* + * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with + * tgt, for symlinks or lov MD data. + */ mdc_create_pack(req, REQ_REC_OFF, op_data, data, datalen, mode, uid, gid, cap_effective, rdev); - size[REPLY_REC_OFF] = sizeof(struct mds_body); - ptlrpc_req_set_repsize(req, 2, size); + size[REPLY_REC_OFF] = sizeof(struct mdt_body); + size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa); + ptlrpc_req_set_repsize(req, 3, size); level = LUSTRE_IMP_FULL; resend: rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, level); + /* Resend if we were told to. */ if (rc == -ERESTARTSYS) { level = LUSTRE_IMP_RECOVER; goto resend; + } else if (rc == 0) { + struct mdt_body *body; + struct lustre_capa *capa; + + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + sizeof(*body)); + LASSERT(body); + if (body->valid & OBD_MD_FLMDSCAPA) { + capa = lustre_unpack_capa(req->rq_repmsg, + REPLY_REC_OFF + 1); + if (capa == NULL) { + CERROR("Missing/short MDS capability\n"); + rc = -EPROTO; + } + } } - if (!rc) - mdc_store_inode_generation(req, REQ_REC_OFF, REPLY_REC_OFF); - *request = req; RETURN(rc); } -int mdc_unlink(struct obd_export *exp, struct mdc_op_data *data, +int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req = *request; int size[4] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_rec_unlink), - data->namelen + 1 }; + sizeof(struct mdt_rec_unlink), + 0, op_data->op_namelen + 1 }; int rc; ENTRY; LASSERT(req == NULL); + + size[REQ_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_REINT, 3, size, NULL); + MDS_REINT, 4, size, NULL); if (req == NULL) RETURN(-ENOMEM); *request = req; - size[REPLY_REC_OFF] = sizeof(struct mds_body); + size[REPLY_REC_OFF] = sizeof(struct mdt_body); size[REPLY_REC_OFF + 1] = obd->u.cli.cl_max_mds_easize; size[REPLY_REC_OFF + 2] = obd->u.cli.cl_max_mds_cookiesize; ptlrpc_req_set_repsize(req, 4, size); - mdc_unlink_pack(req, REQ_REC_OFF, data); + mdc_unlink_pack(req, REQ_REC_OFF, op_data); rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); if (rc == -ERESTARTSYS) @@ -194,25 +238,30 @@ int mdc_unlink(struct obd_export *exp, struct mdc_op_data *data, RETURN(rc); } -int mdc_link(struct obd_export *exp, struct mdc_op_data *data, +int mdc_link(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct ptlrpc_request *req; - int size[3] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_rec_link), - data->namelen + 1 }; + int size[5] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_rec_link), + 0, 0, op_data->op_namelen + 1 }; int rc; ENTRY; + size[REQ_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + size[REQ_REC_OFF + 2] = op_data->op_capa2 ? + sizeof(struct lustre_capa) : 0; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_REINT, 3, size, NULL); + MDS_REINT, 5, size, NULL); if (req == NULL) RETURN(-ENOMEM); - mdc_link_pack(req, REQ_REC_OFF, data); + mdc_link_pack(req, REQ_REC_OFF, op_data); - size[REPLY_REC_OFF] = sizeof(struct mds_body); + size[REPLY_REC_OFF] = sizeof(struct mdt_body); ptlrpc_req_set_repsize(req, 2, size); rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); @@ -223,27 +272,31 @@ int mdc_link(struct obd_export *exp, struct mdc_op_data *data, RETURN(rc); } -int mdc_rename(struct obd_export *exp, struct mdc_op_data *data, +int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, const char *old, int oldlen, const char *new, int newlen, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct ptlrpc_request *req; - int size[4] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_rec_rename), - oldlen + 1, - newlen + 1 }; + int size[6] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_rec_rename), + 0, 0, oldlen + 1, newlen + 1 }; int rc; ENTRY; + size[REQ_REC_OFF + 1] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; + size[REQ_REC_OFF + 2] = op_data->op_capa2 ? + sizeof(struct lustre_capa) : 0; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_REINT, 4, size, NULL); + MDS_REINT, 6, size, NULL); if (req == NULL) RETURN(-ENOMEM); - mdc_rename_pack(req, REQ_REC_OFF, data, old, oldlen, new, newlen); + mdc_rename_pack(req, REQ_REC_OFF, op_data, old, oldlen, new, newlen); - size[REPLY_REC_OFF] = sizeof(struct mds_body); + size[REPLY_REC_OFF] = sizeof(struct mdt_body); size[REPLY_REC_OFF + 1] = obd->u.cli.cl_max_mds_easize; size[REPLY_REC_OFF + 2] = obd->u.cli.cl_max_mds_cookiesize; ptlrpc_req_set_repsize(req, 4, size); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index bb148d7..20605d8 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -36,8 +36,11 @@ # include <liblustre.h> #endif +#include <linux/lustre_acl.h> #include <obd_class.h> #include <lustre_dlm.h> +#include <lustre_fid.h> +#include <md_object.h> #include <lprocfs_status.h> #include <lustre_param.h> #include "mdc_internal.h" @@ -51,15 +54,39 @@ extern quota_interface_t mdc_quota_interface; static int mdc_cleanup(struct obd_device *obd); -extern int mds_queue_req(struct ptlrpc_request *); +static struct obd_capa *mdc_unpack_capa(struct ptlrpc_request *req, + unsigned int offset) +{ + struct lustre_capa *capa; + struct obd_capa *oc; + + /* swabbed already in mdc_enqueue */ + capa = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*capa)); + if (capa == NULL) { + CERROR("missing capa at offset %d failed!\n", offset); + return ERR_PTR(-EFAULT); + } + + oc = alloc_capa(CAPA_SITE_CLIENT); + if (!oc) { + CERROR("alloc capa failed!\n"); + return ERR_PTR(-ENOMEM); + } + oc->c_capa = *capa; + + return oc; +} + /* Helper that implements most of mdc_getstatus and signal_completed_replay. */ /* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */ -static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, - int level, int msg_flags) +static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid, + struct obd_capa **pc, int level, int msg_flags) { struct ptlrpc_request *req; - struct mds_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + struct mdt_body *body; + int rc, size[3] = { sizeof(struct ptlrpc_body), + sizeof(*body), + sizeof(struct lustre_capa) }; ENTRY; req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_GETSTATUS, 2, size, @@ -68,25 +95,34 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, GOTO(out, rc = -ENOMEM); req->rq_send_state = level; - ptlrpc_req_set_repsize(req, 2, size); + ptlrpc_req_set_repsize(req, 3, size); - mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0, 0); + mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, NULL, 0, 0); lustre_msg_add_flags(req->rq_reqmsg, msg_flags); rc = ptlrpc_queue_wait(req); if (!rc) { body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), - lustre_swab_mds_body); + lustre_swab_mdt_body); if (body == NULL) { - CERROR ("Can't extract mds_body\n"); + CERROR ("Can't extract mdt_body\n"); GOTO (out, rc = -EPROTO); } - memcpy(rootfid, &body->fid1, sizeof(*rootfid)); + *rootfid = body->fid1; + + if (body->valid & OBD_MD_FLMDSCAPA) { + struct obd_capa *oc; - CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64 + oc = mdc_unpack_capa(req, REPLY_REC_OFF + 1); + if (IS_ERR(oc)) + GOTO(out, rc = PTR_ERR(oc)); + *pc = oc; + } + + CDEBUG(D_NET, "root fid="DFID", last_committed="LPU64 ", last_xid="LPU64"\n", - rootfid->id, + PFID(rootfid), lustre_msg_get_last_committed(req->rq_repmsg), lustre_msg_get_last_xid(req->rq_repmsg)); } @@ -98,98 +134,128 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, } /* This should be mdc_get_info("rootfid") */ -int mdc_getstatus(struct obd_export *exp, struct ll_fid *rootfid) +int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid, + struct obd_capa **pc) { - return send_getstatus(class_exp2cliimp(exp), rootfid, LUSTRE_IMP_FULL, - 0); + return send_getstatus(class_exp2cliimp(exp), rootfid, pc, + LUSTRE_IMP_FULL, 0); } -static -int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size, - unsigned int acl_size, struct ptlrpc_request *req) +/* + * This function now is known to always saying that it will receive 4 buffers + * from server. Even for cases when acl_size and md_size is zero, RPC header + * willcontain 4 fields and RPC itself will contain zero size fields. This is + * because mdt_getattr*() _always_ returns 4 fields, but if acl is not needed + * and thus zero, it shirinks it, making zero size. The same story about + * md_size. And this is course of problem when client waits for smaller number + * of fields. This issue will be fixed later when client gets awar of RPC + * layouts. --umka + */ +static int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size, + unsigned int acl_size, int mdscapa, + struct ptlrpc_request *req) { - struct mds_body *body; + struct mdt_body *body; void *eadata; - int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) }; - int bufcount = 2, rc; + int size[5] = { sizeof(struct ptlrpc_body), + sizeof(*body), + ea_size, + acl_size, + sizeof(struct lustre_capa) }; + int offset, rc; ENTRY; - /* request message already built */ - - if (ea_size != 0) { - size[bufcount++] = ea_size; + /* Request message already built. */ + if (ea_size) CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n", ea_size); - } - if (acl_size) { - size[bufcount++] = acl_size; + if (acl_size) CDEBUG(D_INODE, "reserved %u bytes for ACL\n", acl_size); - } - ptlrpc_req_set_repsize(req, bufcount, size); + ptlrpc_req_set_repsize(req, 5, size); rc = ptlrpc_queue_wait(req); if (rc != 0) RETURN (rc); body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), - lustre_swab_mds_body); + lustre_swab_mdt_body); if (body == NULL) { - CERROR ("Can't unpack mds_body\n"); + CERROR ("Can't unpack mdt_body\n"); RETURN (-EPROTO); } CDEBUG(D_NET, "mode: %o\n", body->mode); - LASSERT_REPSWAB(req, REPLY_REC_OFF + 1); + offset = REPLY_REC_OFF + 1; + LASSERT_REPSWAB(req, offset); if (body->eadatasize != 0) { /* reply indicates presence of eadata; check it's there... */ - eadata = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, + eadata = lustre_msg_buf(req->rq_repmsg, offset++, body->eadatasize); if (eadata == NULL) { CERROR ("Missing/short eadata\n"); RETURN (-EPROTO); } } - + if (body->valid & OBD_MD_FLMODEASIZE) { - if (exp->exp_obd->u.cli.cl_max_mds_easize < body->max_mdsize) - exp->exp_obd->u.cli.cl_max_mds_easize = - body->max_mdsize; - if (exp->exp_obd->u.cli.cl_max_mds_cookiesize < - body->max_cookiesize) - exp->exp_obd->u.cli.cl_max_mds_cookiesize = - body->max_cookiesize; + struct client_obd *cli = &exp->exp_obd->u.cli; + + if (cli->cl_max_mds_easize < body->max_mdsize) + cli->cl_max_mds_easize = body->max_mdsize; + if (cli->cl_max_mds_cookiesize < body->max_cookiesize) + cli->cl_max_mds_cookiesize = body->max_cookiesize; + } + + offset += !!body->aclsize; + + if (body->valid & OBD_MD_FLMDSCAPA) { + struct lustre_capa *capa; + + LASSERT(mdscapa); + capa = lustre_unpack_capa(req->rq_repmsg, offset++); + if (capa == NULL) { + CERROR("Missing/short client MDS capability\n"); + RETURN(-EPROTO); + } } RETURN (0); } -int mdc_getattr(struct obd_export *exp, struct ll_fid *fid, - obd_valid valid, unsigned int ea_size, +int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, int ea_size, struct ptlrpc_request **request) { struct ptlrpc_request *req; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mds_body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; int acl_size = 0, rc; ENTRY; - /* XXX do we need to make another request here? We just did a getattr - * to do the lookup in the first place. + size[REQ_REC_OFF + 1] = oc ? sizeof(struct lustre_capa) : 0; + + /* + * XXX: Do we need to make another request here? We just did a getattr + * to do the lookup in the first place. */ req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_GETATTR, 2, size, NULL); + MDS_GETATTR, 3, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size, + mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size, MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/); - /* currently only root inode will call us with FLACL */ - if (valid & OBD_MD_FLACL) + if (valid & OBD_MD_FLRMTPERM) + acl_size = sizeof(struct mdt_remote_perm); + + /* Currently only root inode will call us with FLACL */ + else if (valid & OBD_MD_FLACL) acl_size = LUSTRE_POSIX_ACL_MAX_SIZE; - rc = mdc_getattr_common(exp, ea_size, acl_size, req); + rc = mdc_getattr_common(exp, ea_size, acl_size, + !!(valid & OBD_MD_FLMDSCAPA), req); if (rc != 0) { ptlrpc_req_finished (req); req = NULL; @@ -199,28 +265,35 @@ int mdc_getattr(struct obd_export *exp, struct ll_fid *fid, RETURN (rc); } -int mdc_getattr_name(struct obd_export *exp, struct ll_fid *fid, - const char *filename, int namelen, unsigned long valid, - unsigned int ea_size, struct ptlrpc_request **request) +int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, const char *filename, int namelen, + obd_valid valid, int ea_size, + struct ptlrpc_request **request) { struct ptlrpc_request *req; - struct mds_body *body; - int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body), namelen}; + struct mdt_body *body; + int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body), 0, namelen}; + int rc; ENTRY; + size[REQ_REC_OFF + 1] = oc ? sizeof(struct lustre_capa) : 0; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_GETATTR_NAME, 3, size, NULL); + MDS_GETATTR_NAME, 4, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size, + mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size, MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/); - - LASSERT(strnlen(filename, namelen) == namelen - 1); - memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, namelen), - filename, namelen); - rc = mdc_getattr_common(exp, ea_size, 0, req); + if (filename) { + LASSERT(strnlen(filename, namelen) == namelen - 1); + memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, namelen), + filename, namelen); + } + + rc = mdc_getattr_common(exp, ea_size, 0, !!(valid & OBD_MD_FLMDSCAPA), + req); if (rc != 0) { ptlrpc_req_finished (req); req = NULL; @@ -230,19 +303,55 @@ int mdc_getattr_name(struct obd_export *exp, struct ll_fid *fid, RETURN(rc); } +static int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid, + const struct lu_fid *cfid, struct ptlrpc_request **request) +{ + int size[2] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_body) }; + struct ptlrpc_request *req; + struct mdt_body *body; + int rc; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_IS_SUBDIR, 2, size, NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + mdc_is_subdir_pack(req, REQ_REC_OFF, pfid, cfid, 0); + + ptlrpc_req_set_repsize(req, 2, size); + rc = ptlrpc_queue_wait(req); + if (rc != 0 && rc != -EREMOTE) + GOTO(out, rc); + + body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), + lustre_swab_mdt_body); + if (body == NULL) { + CERROR ("Can't unpack mdt_body\n"); + GOTO(out, rc = -EPROTO); + } + EXIT; + out: + *request = req; + return rc; +} + static -int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid, +int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, int opcode, obd_valid valid, const char *xattr_name, const char *input, int input_size, int output_size, int flags, struct ptlrpc_request **request) { struct ptlrpc_request *req; - int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mds_body) }; - // int size[3] = {sizeof(struct mds_body)}, bufcnt = 1; - int rc, xattr_namelen = 0, bufcnt = 2, offset; + int size[5] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; + int bufcnt = 3, offset = REQ_REC_OFF + 2; + int rc, xattr_namelen = 0, remote_acl = 0; void *tmp; ENTRY; + size[REQ_REC_OFF + 1] = oc ? sizeof(struct lustre_capa) : 0; if (xattr_name) { xattr_namelen = strlen(xattr_name) + 1; size[bufcnt++] = xattr_namelen; @@ -258,13 +367,14 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid, GOTO(out, rc = -ENOMEM); /* request data */ - mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size, flags); + mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, output_size, flags); - offset = REQ_REC_OFF + 1; if (xattr_name) { tmp = lustre_msg_buf(req->rq_reqmsg, offset++, xattr_namelen); memcpy(tmp, xattr_name, xattr_namelen); + if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) + remote_acl = 1; } if (input_size) { tmp = lustre_msg_buf(req->rq_reqmsg, offset++, input_size); @@ -273,7 +383,7 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid, /* reply buffers */ if (opcode == MDS_GETXATTR) { - size[REPLY_REC_OFF] = sizeof(struct mds_body); + size[REPLY_REC_OFF] = sizeof(struct mdt_body); bufcnt = 2; } else { bufcnt = 1; @@ -281,27 +391,28 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid, /* we do this even output_size is 0, because server is doing that */ size[bufcnt++] = output_size; - ptlrpc_req_set_repsize(req, bufcnt, size); /* make rpc */ - if (opcode == MDS_SETXATTR) + /* NB: set remote acl doesn't need hold rpc lock, because it just + * send command to MDS, and when it's executed on mountpoint on MDS, + * another mdc_xattr_common() will be invoked there. */ + if (opcode == MDS_SETXATTR && !remote_acl) mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); - if (opcode == MDS_SETXATTR) + if (opcode == MDS_SETXATTR && !remote_acl) mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc != 0) GOTO(err_out, rc); if (opcode == MDS_GETXATTR) { - struct mds_body * body = lustre_swab_repbuf(req, REPLY_REC_OFF, - sizeof(*body), - lustre_swab_mds_body); + struct mdt_body * body = lustre_swab_repbuf(req, REPLY_REC_OFF, + sizeof(*body), lustre_swab_mdt_body); if (body == NULL) { - CERROR ("Can't unpack mds_body\n"); + CERROR ("Can't unpack mdt_body\n"); GOTO(err_out, rc = -EPROTO); } } @@ -314,46 +425,22 @@ err_out: goto out; } -int mdc_setxattr(struct obd_export *exp, struct ll_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, int flags, +int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, const char *xattr_name, + const char *input, int input_size, int output_size, int flags, struct ptlrpc_request **request) { - return mdc_xattr_common(exp, fid, MDS_SETXATTR, valid, xattr_name, + return mdc_xattr_common(exp, fid, oc, MDS_SETXATTR, valid, xattr_name, input, input_size, output_size, flags, request); } -int mdc_getxattr(struct obd_export *exp, struct ll_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, struct ptlrpc_request **request) -{ - return mdc_xattr_common(exp, fid, MDS_GETXATTR, valid, xattr_name, - input, input_size, output_size, 0, request); -} - -/* This should be called with both the request and the reply still packed. */ -void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, - int repoff) +int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, const char *xattr_name, + const char *input, int input_size, int output_size, int flags, + struct ptlrpc_request **request) { - struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff, - sizeof(*rec)); - struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff, - sizeof(*body)); - - LASSERT (rec != NULL); - LASSERT (body != NULL); - - memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid); - if (body->fid1.id == 0) { - DEBUG_REQ(D_ERROR, req, "saving replay request with id = 0 " - "gen = %u", body->fid1.generation); - LBUG(); - } - - DEBUG_REQ(D_HA, req, "storing generation %u for ino "LPU64, - rec->cr_replayfid.generation, rec->cr_replayfid.id); + return mdc_xattr_common(exp, fid, oc, MDS_GETXATTR, valid, xattr_name, + input, input_size, output_size, flags, request); } #ifdef CONFIG_FS_POSIX_ACL @@ -361,7 +448,7 @@ static int mdc_unpack_acl(struct obd_export *exp, struct ptlrpc_request *req, struct lustre_md *md, unsigned int offset) { - struct mds_body *body = md->body; + struct mdt_body *body = md->body; struct posix_acl *acl; void *buf; int rc; @@ -399,11 +486,12 @@ int mdc_unpack_acl(struct obd_export *exp, struct ptlrpc_request *req, #define mdc_unpack_acl(exp, req, md, offset) 0 #endif -int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, - struct obd_export *exp, +int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, + int offset, struct obd_export *dt_exp, + struct obd_export *md_exp, struct lustre_md *md) { - int rc = 0; + int rc; ENTRY; LASSERT(md); @@ -421,7 +509,7 @@ int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, LASSERT(S_ISREG(md->body->mode)); if (md->body->eadatasize == 0) { - CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n"); + CERROR("OBD_MD_FLEASIZE set, but eadatasize 0\n"); RETURN(-EPROTO); } lmmsize = md->body->eadatasize; @@ -429,46 +517,97 @@ int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, LASSERT (lmm != NULL); LASSERT_REPSWABBED(req, offset); - rc = obd_unpackmd(exp, &md->lsm, lmm, lmmsize); + rc = obd_unpackmd(dt_exp, &md->lsm, lmm, lmmsize); if (rc < 0) RETURN(rc); LASSERT (rc >= sizeof (*md->lsm)); - rc = 0; + offset++; + } else if (md->body->valid & OBD_MD_FLDIREA) { + int lmvsize; + struct lov_mds_md *lmv; + + LASSERT(S_ISDIR(md->body->mode)); + if (md->body->eadatasize == 0) { + CERROR("OBD_MD_FLEASIZE is set, but eadatasize 0\n"); + RETURN(-EPROTO); + } + if (md->body->valid & OBD_MD_MEA) { + lmvsize = md->body->eadatasize; + lmv = lustre_msg_buf(req->rq_repmsg, offset, lmvsize); + LASSERT (lmv != NULL); + LASSERT_REPSWABBED(req, offset); + + rc = obd_unpackmd(md_exp, (void *)&md->mea, lmv, + lmvsize); + if (rc < 0) + RETURN(rc); + + LASSERT (rc >= sizeof (*md->mea)); + } offset++; } + rc = 0; - /* for ACL, it's possible that FLACL is set but aclsize is zero. - * only when aclsize != 0 there's an actual segment for ACL in - * reply buffer. - */ - if ((md->body->valid & OBD_MD_FLACL) && md->body->aclsize) { - rc = mdc_unpack_acl(exp, req, md, offset); - if (rc) - GOTO(err_out, rc); - offset++; + /* remote permission */ + if (md->body->valid & OBD_MD_FLRMTPERM) { + md->remote_perm = lustre_msg_buf(req->rq_repmsg, offset++, + sizeof(struct mdt_remote_perm)); + LASSERT(md->remote_perm); } -out: - RETURN(rc); -err_out: - if (md->lsm) - obd_free_memmd(exp, &md->lsm); - goto out; -} + /* for ACL, it's possible that FLACL is set but aclsize is zero. only + * when aclsize != 0 there's an actual segment for ACL in reply + * buffer. */ + else if (md->body->valid & OBD_MD_FLACL) { + if (md->body->aclsize) { + rc = mdc_unpack_acl(dt_exp, req, md, offset++); + if (rc) + GOTO(out, rc); +#ifdef CONFIG_FS_POSIX_ACL + } else { + md->posix_acl = NULL; +#endif + } + } -void mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) -{ - if (md->lsm) - obd_free_memmd(exp, &md->lsm); + if (md->body->valid & OBD_MD_FLMDSCAPA) { + struct obd_capa *oc = mdc_unpack_capa(req, offset++); + + if (IS_ERR(oc)) + GOTO(out, rc = PTR_ERR(oc)); + md->mds_capa = oc; + } + + if (md->body->valid & OBD_MD_FLOSSCAPA) { + struct obd_capa *oc = mdc_unpack_capa(req, offset++); + + if (IS_ERR(oc)) + GOTO(out, rc = PTR_ERR(oc)); + md->oss_capa = oc; + } + EXIT; +out: + if (rc) { + if (md->oss_capa) + free_capa(md->oss_capa); + if (md->mds_capa) + free_capa(md->mds_capa); #ifdef CONFIG_FS_POSIX_ACL - if (md->posix_acl) { posix_acl_release(md->posix_acl); - md->posix_acl = NULL; - } #endif + if (md->lsm) + obd_free_memmd(dt_exp, &md->lsm); + } + return rc; +} + +int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) +{ + ENTRY; + RETURN(0); } static void mdc_commit_open(struct ptlrpc_request *req) @@ -490,112 +629,131 @@ static void mdc_commit_open(struct ptlrpc_request *req) static void mdc_replay_open(struct ptlrpc_request *req) { struct mdc_open_data *mod = req->rq_cb_data; - struct obd_client_handle *och; struct ptlrpc_request *close_req; + struct obd_client_handle *och; struct lustre_handle old; - struct mds_body *body; + struct mdt_body *body; ENTRY; - body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body), - lustre_swab_mds_body); - LASSERT (body != NULL); - if (mod == NULL) { DEBUG_REQ(D_ERROR, req, - "can't properly replay without open data"); + "Can't properly replay without open data."); EXIT; return; } + body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body), + lustre_swab_mdt_body); + och = mod->mod_och; if (och != NULL) { struct lustre_handle *file_fh; + LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); + LASSERT(body != NULL); + file_fh = &och->och_fh; CDEBUG(D_HA, "updating handle from "LPX64" to "LPX64"\n", file_fh->cookie, body->handle.cookie); memcpy(&old, file_fh, sizeof(old)); memcpy(file_fh, &body->handle, sizeof(*file_fh)); } - close_req = mod->mod_close_req; if (close_req != NULL) { - struct mds_body *close_body; + struct mdt_epoch *epoch; + LASSERT(lustre_msg_get_opc(close_req->rq_reqmsg) == MDS_CLOSE); - close_body = lustre_msg_buf(close_req->rq_reqmsg, REQ_REC_OFF, - sizeof(*close_body)); + LASSERT(body != NULL); + + epoch = lustre_msg_buf(close_req->rq_reqmsg, REQ_REC_OFF, + sizeof(*epoch)); + LASSERT(epoch); if (och != NULL) - LASSERT(!memcmp(&old, &close_body->handle, sizeof old)); + LASSERT(!memcmp(&old, &epoch->handle, sizeof(old))); DEBUG_REQ(D_HA, close_req, "updating close body with new fh"); - memcpy(&close_body->handle, &body->handle, - sizeof(close_body->handle)); + memcpy(&epoch->handle, &body->handle, sizeof(epoch->handle)); } - EXIT; } -void mdc_set_open_replay_data(struct obd_client_handle *och, - struct ptlrpc_request *open_req) +int mdc_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req) { struct mdc_open_data *mod; - struct mds_rec_create *rec = lustre_msg_buf(open_req->rq_reqmsg, + struct mdt_rec_create *rec = lustre_msg_buf(open_req->rq_reqmsg, DLM_INTENT_REC_OFF, sizeof(*rec)); - struct mds_body *body = lustre_msg_buf(open_req->rq_repmsg, + struct mdt_body *body = lustre_msg_buf(open_req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); + struct obd_import *imp = open_req->rq_import; + ENTRY; - /* incoming message in my byte order (it's been swabbed) */ LASSERT(rec != NULL); + + /* Incoming message in my byte order (it's been swabbed). */ LASSERT_REPSWABBED(open_req, DLM_REPLY_REC_OFF); - /* outgoing messages always in my byte order */ + + /* Outgoing messages always in my byte order. */ LASSERT(body != NULL); - if (och) { + /*Only the import is replayable, we set replay_open data */ + if (och && imp->imp_replayable) { OBD_ALLOC(mod, sizeof(*mod)); if (mod == NULL) { - DEBUG_REQ(D_ERROR, open_req, "can't allocate mdc_open_data"); - return; + DEBUG_REQ(D_ERROR, open_req, + "Can't allocate mdc_open_data"); + RETURN(0); } spin_lock(&open_req->rq_lock); if (!open_req->rq_replay) { OBD_FREE(mod, sizeof(*mod)); spin_unlock(&open_req->rq_lock); - return; + RETURN(0); } och->och_mod = mod; mod->mod_och = och; - mod->mod_open_req = open_req; open_req->rq_cb_data = mod; + mod->mod_open_req = open_req; open_req->rq_commit_cb = mdc_commit_open; spin_unlock(&open_req->rq_lock); } - memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid); + rec->cr_fid2 = body->fid1; + rec->cr_ioepoch = body->ioepoch; + rec->cr_old_handle.cookie = body->handle.cookie; open_req->rq_replay_cb = mdc_replay_open; - if (body->fid1.id == 0) { - DEBUG_REQ(D_ERROR, open_req, "saving replay request with " - "id = 0 gen = %u", body->fid1.generation); + if (!fid_is_sane(&body->fid1)) { + DEBUG_REQ(D_ERROR, open_req, "Saving replay request with " + "insane fid"); LBUG(); } - DEBUG_REQ(D_HA, open_req, "set up replay data"); + DEBUG_REQ(D_HA, open_req, "Set up open replay data"); + RETURN(0); } -void mdc_clear_open_replay_data(struct obd_client_handle *och) +int mdc_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och) { struct mdc_open_data *mod = och->och_mod; + ENTRY; - /* Don't free the structure now (it happens in mdc_commit_open, after + /* + * Don't free the structure now (it happens in mdc_commit_open(), after * we're sure we won't need to fix up the close request in the future), * but make sure that replay doesn't poke at the och, which is about to - * be freed. */ + * be freed. + */ LASSERT(mod != LP_POISON); if (mod != NULL) mod->mod_och = NULL; + och->och_mod = NULL; + RETURN(0); } static void mdc_commit_close(struct ptlrpc_request *req) @@ -621,29 +779,35 @@ static void mdc_commit_close(struct ptlrpc_request *req) LASSERT(open_req->rq_transno != 0); LASSERT(open_req->rq_import == imp); - /* We no longer want to preserve this for transno-unconditional - * replay. */ + /* + * We no longer want to preserve this for transno-unconditional + * replay. Decref open req here as well. + */ spin_lock(&open_req->rq_lock); open_req->rq_replay = 0; spin_unlock(&open_req->rq_lock); } -int mdc_close(struct obd_export *exp, struct obdo *oa, +int mdc_close(struct obd_export *exp, struct md_op_data *op_data, struct obd_client_handle *och, struct ptlrpc_request **request) { struct obd_device *obd = class_exp2obd(exp); - int reqsize[2] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_body) }; - int rc, repsize[4] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_body), - obd->u.cli.cl_max_mds_easize, - obd->u.cli.cl_max_mds_cookiesize }; + int reqsize[4] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_epoch), + sizeof(struct mdt_rec_setattr)}; + int repsize[4] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_body), + obd->u.cli.cl_max_mds_easize, + obd->u.cli.cl_max_mds_cookiesize }; struct ptlrpc_request *req; struct mdc_open_data *mod; + int rc; ENTRY; + reqsize[REQ_REC_OFF + 2] = op_data->op_capa1 ? + sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_CLOSE, 2, reqsize, NULL); + MDS_CLOSE, 4, reqsize, NULL); if (req == NULL) GOTO(out, rc = -ENOMEM); @@ -671,8 +835,7 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, CDEBUG(D_HA, "couldn't find open req; expecting close error\n"); } - mdc_close_pack(req, REQ_REC_OFF, oa, oa->o_valid, och); - + mdc_close_pack(req, REQ_REC_OFF, op_data); ptlrpc_req_set_repsize(req, 4, repsize); req->rq_commit_cb = mdc_commit_close; LASSERT(req->rq_cb_data == NULL); @@ -687,7 +850,7 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, req->rq_status); if (rc == 0) rc = req->rq_status ? req->rq_status : -EIO; - } else if (rc == 0) { + } else if (rc == 0 || rc == -EAGAIN) { rc = lustre_msg_get_status(req->rq_repmsg); if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) { DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, err " @@ -695,13 +858,15 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, if (rc > 0) rc = -rc; } else if (mod == NULL) { - CERROR("Unexpected: can't find mdc_open_data, but the " - "close succeeded. Please tell CFS.\n"); + if (req->rq_import->imp_replayable) + CERROR("Unexpected: can't find mdc_open_data," + "but close succeeded with replayable imp" + "Please tell CFS.\n"); } if (!lustre_swab_repbuf(req, REPLY_REC_OFF, - sizeof(struct mds_body), - lustre_swab_mds_body)) { - CERROR("Error unpacking mds_body\n"); + sizeof(struct mdt_body), + lustre_swab_mdt_body)) { + CERROR("Error unpacking mdt_body\n"); rc = -EPROTO; } } @@ -709,52 +874,97 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, EXIT; *request = req; out: - if (rc != 0 && req && req->rq_commit_cb) + if (rc != 0 && rc != -EAGAIN && req && req->rq_commit_cb) req->rq_commit_cb(req); return rc; } -int mdc_done_writing(struct obd_export *exp, struct obdo *obdo) +int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, + struct obd_client_handle *och) { + struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req; - struct mds_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[4] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_epoch), + sizeof(struct mdt_rec_setattr)}; + int repsize[2] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_body)}; + int rc; ENTRY; + if (op_data->op_capa1) + size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa); req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_DONE_WRITING, 2, size, NULL); + MDS_DONE_WRITING, 4, size, NULL); if (req == NULL) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - mdc_pack_fid(&body->fid1, obdo->o_id, 0, obdo->o_mode); - body->size = obdo->o_size; - body->blocks = obdo->o_blocks; - body->flags = obdo->o_flags; - body->valid = obdo->o_valid; -// memcpy(&body->handle, &och->och_fh, sizeof(body->handle)); - - ptlrpc_req_set_repsize(req, 2, size); + /* XXX: add DONE_WRITING request to och -- when Size-on-MDS + * recovery will be ready. */ + mdc_close_pack(req, REQ_REC_OFF, op_data); + ptlrpc_req_set_repsize(req, 2, repsize); + mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL); rc = ptlrpc_queue_wait(req); + mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL); ptlrpc_req_finished(req); RETURN(rc); } -int mdc_readpage(struct obd_export *exp, struct ll_fid *fid, __u64 offset, - struct page *page, struct ptlrpc_request **request) +#ifdef HAVE_SPLIT_SUPPORT +int mdc_sendpage(struct obd_export *exp, const struct lu_fid *fid, + const struct page *page, int offset) { + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; struct obd_import *imp = class_exp2cliimp(exp); + struct ptlrpc_bulk_desc *desc = NULL; struct ptlrpc_request *req = NULL; + ENTRY; + + CDEBUG(D_INODE, "object: "DFID"\n", PFID(fid)); + + req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_WRITEPAGE, 3, + size, NULL); + if (req == NULL) + GOTO(out, rc = -ENOMEM); + + req->rq_request_portal = MDS_READPAGE_PORTAL; + + desc = ptlrpc_prep_bulk_imp(req, 1, BULK_GET_SOURCE, MDS_BULK_PORTAL); + if (desc == NULL) + GOTO(out, rc = -ENOMEM); + + /* NB req now owns desc and will free it when it gets freed. */ + ptlrpc_prep_bulk_page(desc, (struct page *)page, 0, offset); + mdc_readdir_pack(req, REQ_REC_OFF, 0, offset, fid, NULL); + + ptlrpc_req_set_repsize(req, 2, size); + rc = ptlrpc_queue_wait(req); + EXIT; +out: + if (req != NULL) + ptlrpc_req_finished(req); + return rc; +} +EXPORT_SYMBOL(mdc_sendpage); +#endif + +int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, __u64 offset, struct page *page, + struct ptlrpc_request **request) +{ + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; + struct obd_import *imp = class_exp2cliimp(exp); struct ptlrpc_bulk_desc *desc = NULL; - struct mds_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + struct ptlrpc_request *req = NULL; + struct mdt_body *body; ENTRY; - CDEBUG(D_INODE, "inode: "LPU64"\n", fid->id); + CDEBUG(D_INODE, "object: "DFID"\n", PFID(fid)); - req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 2, size, + size[REQ_REC_OFF + 1] = oc ? sizeof(struct lustre_capa) : 0; + req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 3, size, NULL); if (req == NULL) GOTO(out, rc = -ENOMEM); @@ -765,29 +975,28 @@ int mdc_readpage(struct obd_export *exp, struct ll_fid *fid, __u64 offset, desc = ptlrpc_prep_bulk_imp(req, 1, BULK_PUT_SINK, MDS_BULK_PORTAL); if (desc == NULL) GOTO(out, rc = -ENOMEM); - /* NB req now owns desc and will free it when it gets freed */ + /* NB req now owns desc and will free it when it gets freed */ ptlrpc_prep_bulk_page(desc, page, 0, CFS_PAGE_SIZE); - - mdc_readdir_pack(req, REQ_REC_OFF, offset, CFS_PAGE_SIZE, fid); + mdc_readdir_pack(req, REQ_REC_OFF, offset, CFS_PAGE_SIZE, fid, oc); ptlrpc_req_set_repsize(req, 2, size); rc = ptlrpc_queue_wait(req); if (rc == 0) { body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), - lustre_swab_mds_body); + lustre_swab_mdt_body); if (body == NULL) { - CERROR("Can't unpack mds_body\n"); + CERROR("Can't unpack mdt_body\n"); GOTO(out, rc = -EPROTO); } if (req->rq_bulk->bd_nob_transferred != CFS_PAGE_SIZE) { CERROR ("Unexpected # bytes transferred: %d" - " (%lu expected)\n", + " (%ld expected)\n", req->rq_bulk->bd_nob_transferred, CFS_PAGE_SIZE); - GOTO (out, rc = -EPROTO); + GOTO(out, rc = -EPROTO); } } @@ -797,7 +1006,6 @@ int mdc_readpage(struct obd_export *exp, struct ll_fid *fid, __u64 offset, return rc; } - static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -870,7 +1078,6 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, spin_lock(&imp->imp_lock); imp->imp_initial_recov = *(int *)val; spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", exp->exp_obd->obd_name, imp->imp_initial_recov); RETURN(0); @@ -879,18 +1086,16 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { if (vallen != sizeof(int)) RETURN(-EINVAL); - spin_lock(&imp->imp_lock); imp->imp_initial_recov_bk = *(int *)val; if (imp->imp_initial_recov_bk) imp->imp_initial_recov = 1; spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n", exp->exp_obd->obd_name, imp->imp_initial_recov_bk); RETURN(0); } - if (KEY_IS("read-only")) { + if (KEY_IS(KEY_READ_ONLY)) { struct ptlrpc_request *req; int size[3] = { sizeof(struct ptlrpc_body), keylen, vallen }; char *bufs[3] = { NULL, key, val }; @@ -898,6 +1103,7 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, if (vallen != sizeof(int)) RETURN(-EINVAL); + spin_lock(&imp->imp_lock); if (*((int *)val)) { imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY; imp->imp_connect_data.ocd_connect_flags |= @@ -907,6 +1113,7 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, imp->imp_connect_data.ocd_connect_flags &= ~OBD_CONNECT_RDONLY; } + spin_unlock(&imp->imp_lock); req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_SET_INFO, 3, size, bufs); @@ -925,6 +1132,21 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(rc); } + if (KEY_IS(KEY_FLUSH_CTX)) { + sptlrpc_import_flush_my_ctx(imp); + RETURN(0); + } + if (KEY_IS(KEY_MDS_CONN)) { + struct obd_import *imp = class_exp2cliimp(exp); + + /* mds-mds import */ + spin_lock(&imp->imp_lock); + imp->imp_server_timeout = 1; + spin_unlock(&imp->imp_lock); + imp->imp_client->cli_request_portal = MDS_MDS_PORTAL; + CDEBUG(D_OTHER|D_WARNING, "%s: timeout / 2\n", exp->exp_obd->obd_name); + RETURN(0); + } RETURN(rc); } @@ -934,8 +1156,7 @@ int mdc_get_info(struct obd_export *exp, __u32 keylen, void *key, { int rc = -EINVAL; - if (keylen == strlen("max_easize") && - memcmp(key, "max_easize", strlen("max_easize")) == 0) { + if (KEY_IS(KEY_MAX_EASIZE)) { int mdsize, *max_easize; if (*vallen != sizeof(int)) @@ -947,6 +1168,17 @@ int mdc_get_info(struct obd_export *exp, __u32 keylen, void *key, *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize; RETURN(0); } + if (KEY_IS(KEY_CONN_DATA)) { + struct obd_import *imp = class_exp2cliimp(exp); + struct obd_connect_data *data = val; + + if (*vallen != sizeof(*data)) + RETURN(-EINVAL); + + *data = imp->imp_connect_data; + RETURN(0); + } + RETURN(rc); } @@ -973,8 +1205,13 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, rc = ptlrpc_queue_wait(req); - if (rc) + if (rc) { + /* check connection error first */ + if (obd->u.cli.cl_import->imp_connect_error) + rc = obd->u.cli.cl_import->imp_connect_error; + GOTO(out, rc); + } msfs = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*msfs), lustre_swab_obd_statfs); @@ -991,22 +1228,25 @@ out: return rc; } -static int mdc_pin(struct obd_export *exp, obd_id ino, __u32 gen, int type, +static int mdc_pin(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct obd_client_handle *handle, int flag) { struct ptlrpc_request *req; - struct mds_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + struct mdt_body *body; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; + size[REQ_REC_OFF + 1] = oc ? sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_PIN, 2, size, NULL); + MDS_PIN, 3, size, NULL); if (req == NULL) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - mdc_pack_fid(&body->fid1, ino, gen, type); + body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof (*body)); + body->fid1 = *fid; body->flags = flag; + mdc_pack_capa(req, REQ_REC_OFF + 1, oc); ptlrpc_req_set_repsize(req, 2, size); @@ -1020,7 +1260,7 @@ static int mdc_pin(struct obd_export *exp, obd_id ino, __u32 gen, int type, } body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), - lustre_swab_mds_body); + lustre_swab_mdt_body); if (body == NULL) { ptlrpc_req_finished(req); RETURN(rc); @@ -1043,7 +1283,7 @@ static int mdc_unpin(struct obd_export *exp, struct obd_client_handle *handle, int flag) { struct ptlrpc_request *req; - struct mds_body *body; + struct mdt_body *body; int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; @@ -1073,20 +1313,21 @@ static int mdc_unpin(struct obd_export *exp, RETURN(rc); } -int mdc_sync(struct obd_export *exp, struct ll_fid *fid, - struct ptlrpc_request **request) +int mdc_sync(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) { struct ptlrpc_request *req; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mds_body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; int rc; ENTRY; + size[REQ_REC_OFF + 1] = oc ? sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_SYNC, 2, size, NULL); + MDS_SYNC, 3, size, NULL); if (!req) RETURN(rc = -ENOMEM); - mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0, 0); + mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, oc, 0, 0); ptlrpc_req_set_repsize(req, 2, size); @@ -1108,9 +1349,24 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, switch (event) { case IMP_EVENT_DISCON: { +#if 0 + /* XXX Pass event up to OBDs stack. used only for FLD now */ + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_DISCON, NULL); +#endif break; } case IMP_EVENT_INACTIVE: { + struct client_obd *cli = &obd->u.cli; + /* + * Flush current sequence to make client obtain new one + * from server in case of disconnect/reconnect. + * If range is already empty then no need to flush it. + */ + if (cli->cl_seq != NULL && + !range_is_exhausted(&cli->cl_seq->lcs_space)) { + seq_client_flush(cli->cl_seq); + } + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL); break; } @@ -1126,6 +1382,7 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, break; } case IMP_EVENT_OCD: + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL); break; default: @@ -1135,7 +1392,73 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, RETURN(rc); } -static int mdc_setup(struct obd_device *obd, obd_count len, void *buf) +static int mdc_fid_init(struct obd_export *exp) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + char *prefix; + int rc; + ENTRY; + + OBD_ALLOC_PTR(cli->cl_seq); + if (cli->cl_seq == NULL) + RETURN(-ENOMEM); + + OBD_ALLOC(prefix, MAX_OBD_NAME + 5); + if (prefix == NULL) + GOTO(out_free_seq, rc = -ENOMEM); + + snprintf(prefix, MAX_OBD_NAME + 5, "srv-%s", + exp->exp_obd->obd_name); + + /* Init client side sequence-manager */ + rc = seq_client_init(cli->cl_seq, exp, + LUSTRE_SEQ_METADATA, + prefix, NULL); + OBD_FREE(prefix, MAX_OBD_NAME + 5); + if (rc) + GOTO(out_free_seq, rc); + + RETURN(rc); +out_free_seq: + OBD_FREE_PTR(cli->cl_seq); + cli->cl_seq = NULL; + return rc; +} + +static int mdc_fid_fini(struct obd_export *exp) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + ENTRY; + + if (cli->cl_seq != NULL) { + seq_client_fini(cli->cl_seq); + OBD_FREE_PTR(cli->cl_seq); + cli->cl_seq = NULL; + } + + RETURN(0); +} + +int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid, + struct md_op_data *op_data) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + struct lu_client_seq *seq = cli->cl_seq; + ENTRY; + RETURN(seq_client_alloc_fid(seq, fid)); +} + +/* XXX This method is used only to clear current fid seq + * once fld/mds insert failed */ +static int mdc_fid_delete(struct obd_export *exp, const struct lu_fid *fid) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + + seq_client_flush(cli->cl_seq); + return 0; +} + +static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) { struct client_obd *cli = &obd->u.cli; struct lprocfs_static_vars lvars; @@ -1159,13 +1482,13 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_setattr_lock, rc = -ENOMEM); mdc_init_rpc_lock(cli->cl_close_lock); - rc = client_obd_setup(obd, len, buf); + rc = client_obd_setup(obd, cfg); if (rc) GOTO(err_close_lock, rc); lprocfs_init_vars(mdc, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); - rc = obd_llog_init(obd, obd, 0, NULL, NULL); + rc = obd_llog_init(obd, NULL, obd, 0, NULL, NULL); if (rc) { mdc_cleanup(obd); CERROR("failed to setup llogging subsystems\n"); @@ -1187,42 +1510,22 @@ err_rpc_lock: * us to make MDS RPCs with large enough reply buffers to hold the * maximum-sized (= maximum striped) EA and cookie without having to * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */ -int mdc_init_ea_size(struct obd_export *mdc_exp, struct obd_export *lov_exp) +int mdc_init_ea_size(struct obd_export *exp, int easize, + int def_easize, int cookiesize) { - struct obd_device *obd = mdc_exp->exp_obd; + struct obd_device *obd = exp->exp_obd; struct client_obd *cli = &obd->u.cli; - struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC }; - struct lov_desc desc; - __u32 valsize = sizeof(desc); - __u32 stripes; - int rc, size; ENTRY; - rc = obd_get_info(lov_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC, - &valsize, &desc); - if (rc) - RETURN(rc); - - stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT); - lsm.lsm_stripe_count = stripes; - size = obd_size_diskmd(lov_exp, &lsm); - - if (cli->cl_max_mds_easize < size) - cli->cl_max_mds_easize = size; - - lsm.lsm_stripe_count = desc.ld_default_stripe_count; - size = obd_size_diskmd(lov_exp, &lsm); + if (cli->cl_max_mds_easize < easize) + cli->cl_max_mds_easize = easize; - if (cli->cl_default_mds_easize < size) - cli->cl_default_mds_easize = size; + if (cli->cl_default_mds_easize < def_easize) + cli->cl_default_mds_easize = def_easize; - size = stripes * sizeof(struct llog_cookie); - if (cli->cl_max_mds_cookiesize < size) - cli->cl_max_mds_cookiesize = size; + if (cli->cl_max_mds_cookiesize < cookiesize) + cli->cl_max_mds_cookiesize = cookiesize; - CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n", - cli->cl_max_mds_easize, cli->cl_max_mds_cookiesize); - RETURN(0); } @@ -1232,7 +1535,7 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) ENTRY; switch (stage) { - case OBD_CLEANUP_EARLY: + case OBD_CLEANUP_EARLY: case OBD_CLEANUP_EXPORTS: /* If we set up but never connected, the client import will not have been cleaned. */ @@ -1271,7 +1574,8 @@ static int mdc_cleanup(struct obd_device *obd) } -static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt, +static int mdc_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, struct llog_catid *logid, struct obd_uuid *uuid) { @@ -1279,14 +1583,14 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt, int rc; ENTRY; - rc = llog_setup(obd, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, &llog_client_ops); if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); ctxt->loc_imp = obd->u.cli.cl_import; } - rc = llog_setup(obd, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL, &llog_client_ops); if (rc == 0) { ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); @@ -1321,38 +1625,192 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf) return(rc); } +/* get remote permission for current user on fid */ +int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) +{ + struct ptlrpc_request *req; + struct mdt_body *body; + struct mdt_remote_perm *perm; + int size[5] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc; + ENTRY; + + size[REQ_REC_OFF + 1] = oc ? sizeof(struct lustre_capa) : 0; + + *request = NULL; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_GETATTR, 3, size, NULL); + if (!req) + RETURN(-ENOMEM); + + mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, oc, 0, 0); + + size[REPLY_REC_OFF + 1] = sizeof(*perm); + ptlrpc_req_set_repsize(req, 5, size); + rc = ptlrpc_queue_wait(req); + if (rc) { + ptlrpc_req_finished(req); + RETURN(rc); + } + + body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), + lustre_swab_mdt_body); + LASSERT(body); + LASSERT(body->valid & OBD_MD_FLRMTPERM); + + perm = lustre_swab_repbuf(req, REPLY_REC_OFF + 1, sizeof(*perm), + lustre_swab_mdt_remote_perm); + LASSERT(perm); + + *request = req; + RETURN(0); +} + +static int mdc_interpret_renew_capa(struct ptlrpc_request *req, void *unused, + int status) +{ + struct obd_capa *oc = req->rq_async_args.pointer_arg[0]; + renew_capa_cb_t cb = req->rq_async_args.pointer_arg[1]; + struct mdt_body *body = NULL; + struct lustre_capa *capa; + ENTRY; + + if (status) + GOTO(out, capa = ERR_PTR(status)); + + body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), + lustre_swab_mdt_body); + if (body == NULL) + GOTO(out, capa = ERR_PTR(-EFAULT)); + + if ((body->valid & OBD_MD_FLOSSCAPA) == 0) + GOTO(out, capa = ERR_PTR(-ENOENT)); + + capa = lustre_unpack_capa(req->rq_repmsg, REPLY_REC_OFF + 1); + if (!capa) + GOTO(out, capa = ERR_PTR(-EFAULT)); + EXIT; +out: + cb(oc, capa); + return 0; +} + +static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc, + renew_capa_cb_t cb) +{ + struct ptlrpc_request *req; + int size[5] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_body), + sizeof(struct lustre_capa) }; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_GETATTR, 3, size, NULL); + if (!req) + RETURN(-ENOMEM); + + mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLOSSCAPA, + &oc->c_capa.lc_fid, oc, 0, 0); + + ptlrpc_req_set_repsize(req, 5, size); + req->rq_async_args.pointer_arg[0] = oc; + req->rq_async_args.pointer_arg[1] = cb; + req->rq_interpret_reply = mdc_interpret_renew_capa; + ptlrpcd_add_req(req); + + RETURN(0); +} + +static int mdc_connect(const struct lu_env *env, + struct lustre_handle *dlm_handle, + struct obd_device *obd, struct obd_uuid *cluuid, + struct obd_connect_data *data) { + struct obd_import *imp = obd->u.cli.cl_import; + + /* mds-mds import features */ + if (data && (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS)) { + spin_lock(&imp->imp_lock); + imp->imp_server_timeout = 1; + spin_unlock(&imp->imp_lock); + imp->imp_client->cli_request_portal = MDS_MDS_PORTAL; + CDEBUG(D_OTHER, "%s: Set 'mds' portal and timeout\n", + obd->obd_name); + } + + return client_connect_import(env, dlm_handle, obd, cluuid, data); +} + struct obd_ops mdc_obd_ops = { - .o_owner = THIS_MODULE, - .o_setup = mdc_setup, - .o_precleanup = mdc_precleanup, - .o_cleanup = mdc_cleanup, - .o_add_conn = client_import_add_conn, - .o_del_conn = client_import_del_conn, - .o_connect = client_connect_import, - .o_disconnect = client_disconnect_export, - .o_iocontrol = mdc_iocontrol, - .o_set_info_async = mdc_set_info_async, - .o_get_info = mdc_get_info, - .o_statfs = mdc_statfs, - .o_pin = mdc_pin, - .o_unpin = mdc_unpin, - .o_import_event = mdc_import_event, - .o_llog_init = mdc_llog_init, - .o_llog_finish = mdc_llog_finish, - .o_process_config = mdc_process_config, + .o_owner = THIS_MODULE, + .o_setup = mdc_setup, + .o_precleanup = mdc_precleanup, + .o_cleanup = mdc_cleanup, + .o_add_conn = client_import_add_conn, + .o_del_conn = client_import_del_conn, + .o_connect = mdc_connect, + .o_disconnect = client_disconnect_export, + .o_iocontrol = mdc_iocontrol, + .o_set_info_async = mdc_set_info_async, + .o_statfs = mdc_statfs, + .o_pin = mdc_pin, + .o_unpin = mdc_unpin, + .o_fid_init = mdc_fid_init, + .o_fid_fini = mdc_fid_fini, + .o_fid_alloc = mdc_fid_alloc, + .o_fid_delete = mdc_fid_delete, + .o_import_event = mdc_import_event, + .o_llog_init = mdc_llog_init, + .o_llog_finish = mdc_llog_finish, + .o_get_info = mdc_get_info, + .o_process_config = mdc_process_config, +}; + +struct md_ops mdc_md_ops = { + .m_getstatus = mdc_getstatus, + .m_change_cbdata = mdc_change_cbdata, + .m_close = mdc_close, + .m_create = mdc_create, + .m_done_writing = mdc_done_writing, + .m_enqueue = mdc_enqueue, + .m_getattr = mdc_getattr, + .m_getattr_name = mdc_getattr_name, + .m_intent_lock = mdc_intent_lock, + .m_link = mdc_link, + .m_is_subdir = mdc_is_subdir, + .m_rename = mdc_rename, + .m_setattr = mdc_setattr, + .m_setxattr = mdc_setxattr, + .m_getxattr = mdc_getxattr, + .m_sync = mdc_sync, + .m_readpage = mdc_readpage, + .m_unlink = mdc_unlink, + .m_cancel_unused = mdc_cancel_unused, + .m_init_ea_size = mdc_init_ea_size, + .m_set_lock_data = mdc_set_lock_data, + .m_lock_match = mdc_lock_match, + .m_get_lustre_md = mdc_get_lustre_md, + .m_free_lustre_md = mdc_free_lustre_md, + .m_set_open_replay_data = mdc_set_open_replay_data, + .m_clear_open_replay_data = mdc_clear_open_replay_data, + .m_get_remote_perm = mdc_get_remote_perm, + .m_renew_capa = mdc_renew_capa }; +extern quota_interface_t mdc_quota_interface; + int __init mdc_init(void) { int rc; struct lprocfs_static_vars lvars; lprocfs_init_vars(mdc, &lvars); + request_module("lquota"); quota_interface = PORTAL_SYMBOL_GET(mdc_quota_interface); init_obd_quota_ops(quota_interface, &mdc_obd_ops); - rc = class_register_type(&mdc_obd_ops, lvars.module_vars, - LUSTRE_MDC_NAME); + rc = class_register_type(&mdc_obd_ops, &mdc_md_ops, lvars.module_vars, + LUSTRE_MDC_NAME, NULL); if (rc && quota_interface) PORTAL_SYMBOL_PUT(mdc_quota_interface); @@ -1372,28 +1830,6 @@ MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); MODULE_DESCRIPTION("Lustre Metadata Client"); MODULE_LICENSE("GPL"); -EXPORT_SYMBOL(mdc_req2lustre_md); -EXPORT_SYMBOL(mdc_free_lustre_md); -EXPORT_SYMBOL(mdc_change_cbdata); -EXPORT_SYMBOL(mdc_getstatus); -EXPORT_SYMBOL(mdc_getattr); -EXPORT_SYMBOL(mdc_getattr_name); -EXPORT_SYMBOL(mdc_create); -EXPORT_SYMBOL(mdc_unlink); -EXPORT_SYMBOL(mdc_rename); -EXPORT_SYMBOL(mdc_link); -EXPORT_SYMBOL(mdc_readpage); -EXPORT_SYMBOL(mdc_setattr); -EXPORT_SYMBOL(mdc_close); -EXPORT_SYMBOL(mdc_done_writing); -EXPORT_SYMBOL(mdc_sync); -EXPORT_SYMBOL(mdc_set_open_replay_data); -EXPORT_SYMBOL(mdc_clear_open_replay_data); -EXPORT_SYMBOL(mdc_store_inode_generation); -EXPORT_SYMBOL(mdc_init_ea_size); -EXPORT_SYMBOL(mdc_getxattr); -EXPORT_SYMBOL(mdc_setxattr); - module_init(mdc_init); module_exit(mdc_exit); #endif diff --git a/lustre/mdd/.cvsignore b/lustre/mdd/.cvsignore new file mode 100644 index 0000000..5d26f00 --- /dev/null +++ b/lustre/mdd/.cvsignore @@ -0,0 +1,15 @@ +.Xrefs +config.log +config.status +configure +Makefile +.deps +TAGS +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lustre/mdd/Makefile.in b/lustre/mdd/Makefile.in new file mode 100644 index 0000000..f1568ea --- /dev/null +++ b/lustre/mdd/Makefile.in @@ -0,0 +1,7 @@ +MODULES := mdd +mdd-objs := mdd_object.o mdd_lov.o mdd_orphans.o mdd_lproc.o mdd_dir.o +mdd-objs += mdd_device.o mdd_trans.o mdd_permission.o mdd_lock.o + +EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs + +@INCLUDE_RULES@ diff --git a/lustre/mdd/autoMakefile.am b/lustre/mdd/autoMakefile.am new file mode 100644 index 0000000..3a7413f --- /dev/null +++ b/lustre/mdd/autoMakefile.am @@ -0,0 +1,11 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +modulefs_DATA = mdd$(KMODEXT) +endif + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ +DIST_SOURCES := $(mdd-objs:%.o=%.c) mdd_internal.h diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c new file mode 100644 index 0000000..0ad9650 --- /dev/null +++ b/lustre/mdd/mdd_device.c @@ -0,0 +1,503 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_handler.c + * Lustre Metadata Server (mdd) routines + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <linux/jbd.h> +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <lprocfs_status.h> + +#include <linux/ldiskfs_fs.h> +#include <lustre_mds.h> +#include <lustre/lustre_idl.h> + +#include "mdd_internal.h" + +struct md_device_operations mdd_ops; + +static const char *mdd_root_dir_name = "root"; +static int mdd_device_init(const struct lu_env *env, struct lu_device *d, + const char *name, struct lu_device *next) +{ + struct mdd_device *mdd = lu2mdd_dev(d); + int rc; + ENTRY; + + mdd->mdd_child = lu2dt_dev(next); + + /* Prepare transactions callbacks. */ + mdd->mdd_txn_cb.dtc_txn_start = mdd_txn_start_cb; + mdd->mdd_txn_cb.dtc_txn_stop = mdd_txn_stop_cb; + mdd->mdd_txn_cb.dtc_txn_commit = mdd_txn_commit_cb; + mdd->mdd_txn_cb.dtc_cookie = mdd; + INIT_LIST_HEAD(&mdd->mdd_txn_cb.dtc_linkage); + rc = mdd_procfs_init(mdd, name); + RETURN(rc); +} + +static struct lu_device *mdd_device_fini(const struct lu_env *env, + struct lu_device *d) +{ + struct mdd_device *mdd = lu2mdd_dev(d); + struct lu_device *next = &mdd->mdd_child->dd_lu_dev; + int rc; + + rc = mdd_procfs_fini(mdd); + if (rc) { + CERROR("proc fini error %d \n", rc); + return ERR_PTR(rc); + } + return next; +} + +static int mdd_mount(const struct lu_env *env, struct mdd_device *mdd) +{ + int rc; + struct dt_object *root; + ENTRY; + + dt_txn_callback_add(mdd->mdd_child, &mdd->mdd_txn_cb); + root = dt_store_open(env, mdd->mdd_child, mdd_root_dir_name, + &mdd->mdd_root_fid); + if (!IS_ERR(root)) { + LASSERT(root != NULL); + lu_object_put(env, &root->do_lu); + rc = orph_index_init(env, mdd); + } else + rc = PTR_ERR(root); + + RETURN(rc); +} + +static void mdd_device_shutdown(const struct lu_env *env, + struct mdd_device *m, struct lustre_cfg *cfg) +{ + ENTRY; + dt_txn_callback_del(m->mdd_child, &m->mdd_txn_cb); + if (m->mdd_obd_dev) + mdd_fini_obd(env, m, cfg); + orph_index_fini(env, m); + /* remove upcall device*/ + md_upcall_fini(&m->mdd_md_dev); + EXIT; +} + +static int mdd_process_config(const struct lu_env *env, + struct lu_device *d, struct lustre_cfg *cfg) +{ + struct mdd_device *m = lu2mdd_dev(d); + struct dt_device *dt = m->mdd_child; + struct lu_device *next = &dt->dd_lu_dev; + int rc; + ENTRY; + + switch (cfg->lcfg_command) { + case LCFG_SETUP: + rc = next->ld_ops->ldo_process_config(env, next, cfg); + if (rc) + GOTO(out, rc); + dt->dd_ops->dt_conf_get(env, dt, &m->mdd_dt_conf); + + rc = mdd_init_obd(env, m, cfg); + if (rc) { + CERROR("lov init error %d \n", rc); + GOTO(out, rc); + } + rc = mdd_mount(env, m); + if (rc) + GOTO(out, rc); + rc = mdd_txn_init_credits(env, m); + break; + case LCFG_CLEANUP: + mdd_device_shutdown(env, m, cfg); + default: + rc = next->ld_ops->ldo_process_config(env, next, cfg); + break; + } +out: + RETURN(rc); +} +#if 0 +static int mdd_lov_set_nextid(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct mds_obd *mds = &mdd->mdd_obd_dev->u.mds; + int rc; + ENTRY; + + LASSERT(mds->mds_lov_objids != NULL); + rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_NEXT_ID), + KEY_NEXT_ID, mds->mds_lov_desc.ld_tgt_count, + mds->mds_lov_objids, NULL); + + RETURN(rc); +} + +static int mdd_cleanup_unlink_llog(const struct lu_env *env, + struct mdd_device *mdd) +{ + /* XXX: to be implemented! */ + return 0; +} +#endif + +static int mdd_recovery_complete(const struct lu_env *env, + struct lu_device *d) +{ + struct mdd_device *mdd = lu2mdd_dev(d); + struct lu_device *next = &mdd->mdd_child->dd_lu_dev; + struct obd_device *obd = mdd2obd_dev(mdd); + int rc; + ENTRY; + + LASSERT(mdd != NULL); + LASSERT(obd != NULL); +#if 0 + /* XXX: Do we need this in new stack? */ + rc = mdd_lov_set_nextid(env, mdd); + if (rc) { + CERROR("mdd_lov_set_nextid() failed %d\n", + rc); + RETURN(rc); + } + + /* XXX: cleanup unlink. */ + rc = mdd_cleanup_unlink_llog(env, mdd); + if (rc) { + CERROR("mdd_cleanup_unlink_llog() failed %d\n", + rc); + RETURN(rc); + } +#endif + /* Call that with obd_recovering = 1 just to update objids */ + obd_notify(obd->u.mds.mds_osc_obd, NULL, (obd->obd_async_recov ? + OBD_NOTIFY_SYNC_NONBLOCK : OBD_NOTIFY_SYNC), NULL); + + /* Drop obd_recovering to 0 and call o_postrecov to recover mds_lov */ + obd->obd_recovering = 0; + obd->obd_type->typ_dt_ops->o_postrecov(obd); + + /* XXX: orphans handling. */ + __mdd_orphan_cleanup(env, mdd); + rc = next->ld_ops->ldo_recovery_complete(env, next); + + RETURN(rc); +} + +struct lu_device_operations mdd_lu_ops = { + .ldo_object_alloc = mdd_object_alloc, + .ldo_process_config = mdd_process_config, + .ldo_recovery_complete = mdd_recovery_complete +}; + +/* + * No permission check is needed. + */ +static int mdd_root_get(const struct lu_env *env, + struct md_device *m, struct lu_fid *f) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + + ENTRY; + *f = mdd->mdd_root_fid; + RETURN(0); +} + +/* + * No permission check is needed. + */ +static int mdd_statfs(const struct lu_env *env, struct md_device *m, + struct kstatfs *sfs) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + int rc; + + ENTRY; + + rc = mdd_child_ops(mdd)->dt_statfs(env, mdd->mdd_child, sfs); + + RETURN(rc); +} + +/* + * No permission check is needed. + */ +static int mdd_maxsize_get(const struct lu_env *env, struct md_device *m, + int *md_size, int *cookie_size) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + ENTRY; + + *md_size = mdd_lov_mdsize(env, mdd); + *cookie_size = mdd_lov_cookiesize(env, mdd); + + RETURN(0); +} + +static int mdd_init_capa_ctxt(const struct lu_env *env, struct md_device *m, + int mode, unsigned long timeout, __u32 alg, + struct lustre_capa_key *keys) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct mds_obd *mds = &mdd2obd_dev(mdd)->u.mds; + int rc; + ENTRY; + + mds->mds_capa_keys = keys; + rc = mdd_child_ops(mdd)->dt_init_capa_ctxt(env, mdd->mdd_child, mode, + timeout, alg, keys); + RETURN(rc); +} + +static int mdd_update_capa_key(const struct lu_env *env, + struct md_device *m, + struct lustre_capa_key *key) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_export *lov_exp = mdd2obd_dev(mdd)->u.mds.mds_osc_exp; + int rc; + ENTRY; + + rc = obd_set_info_async(lov_exp, strlen(KEY_CAPA_KEY), KEY_CAPA_KEY, + sizeof(*key), key, NULL); + RETURN(rc); +} + +static struct lu_device *mdd_device_alloc(const struct lu_env *env, + struct lu_device_type *t, + struct lustre_cfg *lcfg) +{ + struct lu_device *l; + struct mdd_device *m; + + OBD_ALLOC_PTR(m); + if (m == NULL) { + l = ERR_PTR(-ENOMEM); + } else { + md_device_init(&m->mdd_md_dev, t); + l = mdd2lu_dev(m); + l->ld_ops = &mdd_lu_ops; + m->mdd_md_dev.md_ops = &mdd_ops; + md_upcall_init(&m->mdd_md_dev, NULL); + } + + return l; +} + +static void mdd_device_free(const struct lu_env *env, + struct lu_device *lu) +{ + struct mdd_device *m = lu2mdd_dev(lu); + + LASSERT(atomic_read(&lu->ld_ref) == 0); + md_device_fini(&m->mdd_md_dev); + OBD_FREE_PTR(m); +} + +static struct obd_ops mdd_obd_device_ops = { + .o_owner = THIS_MODULE +}; + +static void *mdd_ucred_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct md_ucred *uc; + + OBD_ALLOC_PTR(uc); + if (uc == NULL) + uc = ERR_PTR(-ENOMEM); + return uc; +} + +static void mdd_ucred_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct md_ucred *uc = data; + if (!IS_ERR(uc)) + OBD_FREE_PTR(uc); +} + +static struct lu_context_key mdd_ucred_key = { + .lct_tags = LCT_SESSION, + .lct_init = mdd_ucred_key_init, + .lct_fini = mdd_ucred_key_fini +}; + +struct md_ucred *md_ucred(const struct lu_env *env) +{ + LASSERT(env->le_ses != NULL); + return lu_context_key_get(env->le_ses, &mdd_ucred_key); +} +EXPORT_SYMBOL(md_ucred); + +static void *mdd_capainfo_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct md_capainfo *ci; + + OBD_ALLOC_PTR(ci); + if (ci == NULL) + ci = ERR_PTR(-ENOMEM); + return ci; +} + +static void mdd_capainfo_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct md_capainfo *ci = data; + if (!IS_ERR(ci)) + OBD_FREE_PTR(ci); +} + +struct lu_context_key mdd_capainfo_key = { + .lct_tags = LCT_SESSION, + .lct_init = mdd_capainfo_key_init, + .lct_fini = mdd_capainfo_key_fini +}; + +struct md_capainfo *md_capainfo(const struct lu_env *env) +{ + /* NB, in mdt_init0 */ + if (env->le_ses == NULL) + return NULL; + return lu_context_key_get(env->le_ses, &mdd_capainfo_key); +} +EXPORT_SYMBOL(md_capainfo); + +static int mdd_type_init(struct lu_device_type *t) +{ + int result; + + LU_CONTEXT_KEY_INIT(&mdd_thread_key); + result = lu_context_key_register(&mdd_thread_key); + if (result == 0) { + LU_CONTEXT_KEY_INIT(&mdd_ucred_key); + result = lu_context_key_register(&mdd_ucred_key); + } + if (result == 0) { + LU_CONTEXT_KEY_INIT(&mdd_capainfo_key); + result = lu_context_key_register(&mdd_capainfo_key); + } + return result; +} + +static void mdd_type_fini(struct lu_device_type *t) +{ + lu_context_key_degister(&mdd_capainfo_key); + lu_context_key_degister(&mdd_ucred_key); + lu_context_key_degister(&mdd_thread_key); +} + +struct md_device_operations mdd_ops = { + .mdo_statfs = mdd_statfs, + .mdo_root_get = mdd_root_get, + .mdo_maxsize_get = mdd_maxsize_get, + .mdo_init_capa_ctxt = mdd_init_capa_ctxt, + .mdo_update_capa_key= mdd_update_capa_key, +}; + +static struct lu_device_type_operations mdd_device_type_ops = { + .ldto_init = mdd_type_init, + .ldto_fini = mdd_type_fini, + + .ldto_device_alloc = mdd_device_alloc, + .ldto_device_free = mdd_device_free, + + .ldto_device_init = mdd_device_init, + .ldto_device_fini = mdd_device_fini +}; + +static struct lu_device_type mdd_device_type = { + .ldt_tags = LU_DEVICE_MD, + .ldt_name = LUSTRE_MDD_NAME, + .ldt_ops = &mdd_device_type_ops, + .ldt_ctx_tags = LCT_MD_THREAD +}; + +static void *mdd_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct mdd_thread_info *info; + + OBD_ALLOC_PTR(info); + if (info == NULL) + info = ERR_PTR(-ENOMEM); + return info; +} + +static void mdd_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct mdd_thread_info *info = data; + if (info->mti_max_lmm != NULL) + OBD_FREE(info->mti_max_lmm, info->mti_max_lmm_size); + if (info->mti_max_cookie != NULL) + OBD_FREE(info->mti_max_cookie, info->mti_max_cookie_size); + OBD_FREE_PTR(info); +} + +struct lu_context_key mdd_thread_key = { + .lct_tags = LCT_MD_THREAD, + .lct_init = mdd_key_init, + .lct_fini = mdd_key_fini +}; + +struct lprocfs_vars lprocfs_mdd_obd_vars[] = { + { 0 } +}; + +struct lprocfs_vars lprocfs_mdd_module_vars[] = { + { 0 } +}; + +LPROCFS_INIT_VARS(mdd, lprocfs_mdd_module_vars, lprocfs_mdd_obd_vars); + +static int __init mdd_mod_init(void) +{ + struct lprocfs_static_vars lvars; + lprocfs_init_vars(mdd, &lvars); + return class_register_type(&mdd_obd_device_ops, NULL, lvars.module_vars, + LUSTRE_MDD_NAME, &mdd_device_type); +} + +static void __exit mdd_mod_exit(void) +{ + class_unregister_type(LUSTRE_MDD_NAME); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); +MODULE_DESCRIPTION("Lustre Meta-data Device Prototype ("LUSTRE_MDD_NAME")"); +MODULE_LICENSE("GPL"); + +cfs_module(mdd, "0.1.0", mdd_mod_init, mdd_mod_exit); diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c new file mode 100644 index 0000000..b20bb9c --- /dev/null +++ b/lustre/mdd/mdd_dir.c @@ -0,0 +1,1636 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_handler.c + * Lustre Metadata Server (mdd) routines + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <linux/jbd.h> +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <lprocfs_status.h> + +#include <linux/ldiskfs_fs.h> +#include <lustre_mds.h> +#include <lustre/lustre_idl.h> +#include <lustre_fid.h> + +#include "mdd_internal.h" + +static const char dot[] = "."; +static const char dotdot[] = ".."; + +static struct lu_name lname_dotdot = { + (char *) dotdot, + sizeof(dotdot) - 1 +}; + +static int __mdd_lookup(const struct lu_env *env, struct md_object *pobj, + const struct lu_name *lname, struct lu_fid* fid, + int mask); +static int +__mdd_lookup_locked(const struct lu_env *env, struct md_object *pobj, + const struct lu_name *lname, struct lu_fid* fid, int mask) +{ + char *name = lname->ln_name; + struct mdd_object *mdd_obj = md2mdd_obj(pobj); + struct dynlock_handle *dlh; + int rc; + + dlh = mdd_pdo_read_lock(env, mdd_obj, name); + if (unlikely(dlh == NULL)) + return -ENOMEM; + rc = __mdd_lookup(env, pobj, lname, fid, mask); + mdd_pdo_read_unlock(env, mdd_obj, dlh); + + return rc; +} + +static int mdd_lookup(const struct lu_env *env, + struct md_object *pobj, const struct lu_name *lname, + struct lu_fid* fid, struct md_op_spec *spec) +{ + int rc; + ENTRY; + rc = __mdd_lookup_locked(env, pobj, lname, fid, MAY_EXEC); + RETURN(rc); +} + + +static int mdd_parent_fid(const struct lu_env *env, struct mdd_object *obj, + struct lu_fid *fid) +{ + return __mdd_lookup_locked(env, &obj->mod_obj, &lname_dotdot, fid, 0); +} + +/* + * For root fid use special function, whcih does not compare version component + * of fid. Vresion component is different for root fids on all MDTs. + */ +static int mdd_is_root(struct mdd_device *mdd, const struct lu_fid *fid) +{ + return fid_seq(&mdd->mdd_root_fid) == fid_seq(fid) && + fid_oid(&mdd->mdd_root_fid) == fid_oid(fid); +} + +/* + * return 1: if lf is the fid of the ancestor of p1; + * return 0: if not; + * + * return -EREMOTE: if remote object is found, in this + * case fid of remote object is saved to @pf; + * + * otherwise: values < 0, errors. + */ +static int mdd_is_parent(const struct lu_env *env, + struct mdd_device *mdd, + struct mdd_object *p1, + const struct lu_fid *lf, + struct lu_fid *pf) +{ + struct mdd_object *parent = NULL; + struct lu_fid *pfid; + int rc; + ENTRY; + + LASSERT(!lu_fid_eq(mdo2fid(p1), lf)); + pfid = &mdd_env_info(env)->mti_fid; + + /* Check for root first. */ + if (mdd_is_root(mdd, mdo2fid(p1))) + RETURN(0); + + for(;;) { + /* this is done recursively, bypass capa for each obj */ + mdd_set_capainfo(env, 4, p1, BYPASS_CAPA); + rc = mdd_parent_fid(env, p1, pfid); + if (rc) + GOTO(out, rc); + if (mdd_is_root(mdd, pfid)) + GOTO(out, rc = 0); + if (lu_fid_eq(pfid, lf)) + GOTO(out, rc = 1); + if (parent) + mdd_object_put(env, parent); + parent = mdd_object_find(env, mdd, pfid); + + /* cross-ref parent */ + if (parent == NULL) { + if (pf != NULL) + *pf = *pfid; + GOTO(out, rc = -EREMOTE); + } else if (IS_ERR(parent)) + GOTO(out, rc = PTR_ERR(parent)); + p1 = parent; + } + EXIT; +out: + if (parent && !IS_ERR(parent)) + mdd_object_put(env, parent); + return rc; +} + +/* + * No permission check is needed. + * + * returns 1: if fid is ancestor of @mo; + * returns 0: if fid is not a ancestor of @mo; + * + * returns EREMOTE if remote object is found, fid of remote object is saved to + * @fid; + * + * returns < 0: if error + */ +static int mdd_is_subdir(const struct lu_env *env, + struct md_object *mo, const struct lu_fid *fid, + struct lu_fid *sfid) +{ + struct mdd_device *mdd = mdo2mdd(mo); + int rc; + ENTRY; + + if (!S_ISDIR(mdd_object_type(md2mdd_obj(mo)))) + RETURN(0); + + rc = mdd_is_parent(env, mdd, md2mdd_obj(mo), fid, sfid); + if (rc == 0) { + /* found root */ + fid_zero(sfid); + } else if (rc == 1) { + /* found @fid is parent */ + *sfid = *fid; + rc = 0; + } + RETURN(rc); +} + +/* + * Check that @dir contains no entries except (possibly) dot and dotdot. + * + * Returns: + * + * 0 empty + * -ENOTDIR not a directory object + * -ENOTEMPTY not empty + * -ve other error + * + */ +static int mdd_dir_is_empty(const struct lu_env *env, + struct mdd_object *dir) +{ + struct dt_it *it; + struct dt_object *obj; + struct dt_it_ops *iops; + int result; + ENTRY; + + obj = mdd_object_child(dir); + if (!dt_try_as_dir(env, obj)) + RETURN(-ENOTDIR); + + iops = &obj->do_index_ops->dio_it; + it = iops->init(env, obj, 0, BYPASS_CAPA); + if (it != NULL) { + result = iops->get(env, it, (const void *)""); + if (result > 0) { + int i; + for (result = 0, i = 0; result == 0 && i < 3; ++i) + result = iops->next(env, it); + if (result == 0) + result = -ENOTEMPTY; + else if (result == +1) + result = 0; + } else if (result == 0) + /* + * Huh? Index contains no zero key? + */ + result = -EIO; + + iops->put(env, it); + iops->fini(env, it); + } else + result = -ENOMEM; + RETURN(result); +} + +static int __mdd_may_link(const struct lu_env *env, struct mdd_object *obj) +{ + struct mdd_device *m = mdd_obj2mdd_dev(obj); + struct lu_attr *la = &mdd_env_info(env)->mti_la; + int rc; + ENTRY; + + rc = mdd_la_get(env, obj, la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + if (la->la_nlink >= m->mdd_dt_conf.ddp_max_nlink) + RETURN(-EMLINK); + else + RETURN(0); +} + +/* + * Check whether it may create the cobj under the pobj. + * cobj maybe NULL + */ +int mdd_may_create(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, int check_perm, int check_nlink) +{ + int rc = 0; + ENTRY; + + if (cobj && mdd_object_exists(cobj)) + RETURN(-EEXIST); + + if (mdd_is_dead_obj(pobj)) + RETURN(-ENOENT); + + if (check_perm) + rc = mdd_permission_internal_locked(env, pobj, NULL, + MAY_WRITE | MAY_EXEC); + + if (!rc && check_nlink) + rc = __mdd_may_link(env, pobj); + + RETURN(rc); +} + +/* + * Check whether can unlink from the pobj in the case of "cobj == NULL". + */ +int mdd_may_unlink(const struct lu_env *env, struct mdd_object *pobj, + const struct md_attr *ma) +{ + int rc; + ENTRY; + + if (mdd_is_dead_obj(pobj)) + RETURN(-ENOENT); + + if ((ma->ma_attr.la_valid & LA_FLAGS) && + (ma->ma_attr.la_flags & (LUSTRE_APPEND_FL | LUSTRE_IMMUTABLE_FL))) + RETURN(-EPERM); + + rc = mdd_permission_internal_locked(env, pobj, NULL, + MAY_WRITE | MAY_EXEC); + if (rc) + RETURN(rc); + + if (mdd_is_append(pobj)) + RETURN(-EPERM); + + RETURN(rc); +} + +/* + * pobj == NULL is remote ops case, under such case, pobj's + * VTX feature has been checked already, no need check again. + */ +static inline int mdd_is_sticky(const struct lu_env *env, + struct mdd_object *pobj, + struct mdd_object *cobj) +{ + struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la; + struct md_ucred *uc = md_ucred(env); + int rc; + + if (pobj) { + rc = mdd_la_get(env, pobj, tmp_la, BYPASS_CAPA); + if (rc) + return rc; + + if (!(tmp_la->la_mode & S_ISVTX) || + (tmp_la->la_uid == uc->mu_fsuid)) + return 0; + } + + rc = mdd_la_get(env, cobj, tmp_la, BYPASS_CAPA); + if (rc) + return rc; + + if (tmp_la->la_uid == uc->mu_fsuid) + return 0; + + return !mdd_capable(uc, CAP_FOWNER); +} + +/* + * Check whether it may delete the cobj from the pobj. + * pobj maybe NULL + */ +int mdd_may_delete(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, struct md_attr *ma, + int check_perm, int check_empty) +{ + int rc = 0; + ENTRY; + + LASSERT(cobj); + if (!mdd_object_exists(cobj)) + RETURN(-ENOENT); + + if (pobj) { + if (mdd_is_dead_obj(pobj)) + RETURN(-ENOENT); + + if (check_perm) { + rc = mdd_permission_internal_locked(env, pobj, NULL, + MAY_WRITE | MAY_EXEC); + if (rc) + RETURN(rc); + } + + if (mdd_is_append(pobj)) + RETURN(-EPERM); + } + + if (!(ma->ma_attr_flags & MDS_VTX_BYPASS) && + mdd_is_sticky(env, pobj, cobj)) + RETURN(-EPERM); + + if (mdd_is_immutable(cobj) || mdd_is_append(cobj)) + RETURN(-EPERM); + + if ((ma->ma_attr.la_valid & LA_FLAGS) && + (ma->ma_attr.la_flags & (LUSTRE_APPEND_FL | LUSTRE_IMMUTABLE_FL))) + RETURN(-EPERM); + + if (S_ISDIR(ma->ma_attr.la_mode)) { + struct mdd_device *mdd = mdo2mdd(&cobj->mod_obj); + + if (!S_ISDIR(mdd_object_type(cobj))) + RETURN(-ENOTDIR); + + if (lu_fid_eq(mdo2fid(cobj), &mdd->mdd_root_fid)) + RETURN(-EBUSY); + } else if (S_ISDIR(mdd_object_type(cobj))) + RETURN(-EISDIR); + + if (S_ISDIR(ma->ma_attr.la_mode) && check_empty) + rc = mdd_dir_is_empty(env, cobj); + + RETURN(rc); +} + +/* + * tgt maybe NULL + * has mdd_write_lock on src already, but not on tgt yet + */ +int mdd_link_sanity_check(const struct lu_env *env, + struct mdd_object *tgt_obj, + const struct lu_name *lname, + struct mdd_object *src_obj) +{ + struct mdd_device *m = mdd_obj2mdd_dev(src_obj); + int rc = 0; + ENTRY; + + /* Local ops, no lookup before link, check filename length here. */ + if (lname && (lname->ln_namelen > m->mdd_dt_conf.ddp_max_name_len)) + RETURN(-ENAMETOOLONG); + + if (mdd_is_immutable(src_obj) || mdd_is_append(src_obj)) + RETURN(-EPERM); + + if (S_ISDIR(mdd_object_type(src_obj))) + RETURN(-EPERM); + + LASSERT(src_obj != tgt_obj); + if (tgt_obj) { + rc = mdd_may_create(env, tgt_obj, NULL, 1, 0); + if (rc) + RETURN(rc); + } + + rc = __mdd_may_link(env, src_obj); + + RETURN(rc); +} + +const struct dt_rec *__mdd_fid_rec(const struct lu_env *env, + const struct lu_fid *fid) +{ + struct lu_fid_pack *pack = &mdd_env_info(env)->mti_pack; + + fid_pack(pack, fid, &mdd_env_info(env)->mti_fid2); + return (const struct dt_rec *)pack; +} + + +/* insert named index, add reference if isdir */ +static int __mdd_index_insert(const struct lu_env *env, struct mdd_object *pobj, + const struct lu_fid *lf, const char *name, int is_dir, + struct thandle *handle, struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(pobj); + int rc; + ENTRY; + + if (dt_try_as_dir(env, next)) { + rc = next->do_index_ops->dio_insert(env, next, + __mdd_fid_rec(env, lf), + (const struct dt_key *)name, + handle, capa); + } else { + rc = -ENOTDIR; + } + + if (rc == 0) { + if (is_dir) { + mdd_write_lock(env, pobj); + mdo_ref_add(env, pobj, handle); + mdd_write_unlock(env, pobj); + } + } + RETURN(rc); +} + +/* delete named index, drop reference if isdir */ +static int __mdd_index_delete(const struct lu_env *env, struct mdd_object *pobj, + const char *name, int is_dir, struct thandle *handle, + struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(pobj); + int rc; + ENTRY; + + if (dt_try_as_dir(env, next)) { + rc = next->do_index_ops->dio_delete(env, next, + (struct dt_key *)name, + handle, capa); + if (rc == 0 && is_dir) { + mdd_write_lock(env, pobj); + mdo_ref_del(env, pobj, handle); + mdd_write_unlock(env, pobj); + } + } else + rc = -ENOTDIR; + + RETURN(rc); +} + +static int +__mdd_index_insert_only(const struct lu_env *env, struct mdd_object *pobj, + const struct lu_fid *lf, const char *name, + struct thandle *handle, struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(pobj); + int rc; + ENTRY; + + if (dt_try_as_dir(env, next)) { + rc = next->do_index_ops->dio_insert(env, next, + __mdd_fid_rec(env, lf), + (const struct dt_key *)name, + handle, capa); + } else { + rc = -ENOTDIR; + } + RETURN(rc); +} + +static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, + struct md_object *src_obj, const struct lu_name *lname, + struct md_attr *ma) +{ + char *name = lname->ln_name; + struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_tobj = md2mdd_obj(tgt_obj); + struct mdd_object *mdd_sobj = md2mdd_obj(src_obj); + struct mdd_device *mdd = mdo2mdd(src_obj); + struct dynlock_handle *dlh; + struct thandle *handle; + int rc; + ENTRY; + + mdd_txn_param_build(env, mdd, MDD_TXN_LINK_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + dlh = mdd_pdo_write_lock(env, mdd_tobj, name); + if (dlh == NULL) + GOTO(out_trans, rc = -ENOMEM); + mdd_write_lock(env, mdd_sobj); + + rc = mdd_link_sanity_check(env, mdd_tobj, lname, mdd_sobj); + if (rc) + GOTO(out_unlock, rc); + + rc = __mdd_index_insert_only(env, mdd_tobj, mdo2fid(mdd_sobj), + name, handle, + mdd_object_capa(env, mdd_tobj)); + if (rc) + GOTO(out_unlock, rc); + + mdo_ref_add(env, mdd_sobj, handle); + + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_tobj, la, handle, 0); + if (rc) + GOTO(out_unlock, rc); + + la->la_valid = LA_CTIME; + rc = mdd_attr_set_internal(env, mdd_sobj, la, handle, 0); + EXIT; +out_unlock: + mdd_write_unlock(env, mdd_sobj); + mdd_pdo_write_unlock(env, mdd_tobj, dlh); +out_trans: + mdd_trans_stop(env, mdd, rc, handle); + return rc; +} + +/* caller should take a lock before calling */ +int mdd_finish_unlink(const struct lu_env *env, + struct mdd_object *obj, struct md_attr *ma, + struct thandle *th) +{ + int rc; + ENTRY; + + rc = mdd_iattr_get(env, obj, ma); + if (rc == 0 && ma->ma_attr.la_nlink == 0) { + /* add new orphan and the object + * will be deleted during the object_put() */ + if (__mdd_orphan_add(env, obj, th) == 0) + obj->mod_flags |= ORPHAN_OBJ; + + obj->mod_flags |= DEAD_OBJ; + if (obj->mod_count == 0) + rc = mdd_object_kill(env, obj, ma); + else + /* clear MA_LOV | MA_COOKIE, if we do not + * unlink it in case we get it somewhere */ + ma->ma_valid &= ~(MA_LOV | MA_COOKIE); + } else + ma->ma_valid &= ~(MA_LOV | MA_COOKIE); + + RETURN(rc); +} + +/* + * pobj maybe NULL + * has mdd_write_lock on cobj already, but not on pobj yet + */ +int mdd_unlink_sanity_check(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, struct md_attr *ma) +{ + int rc; + ENTRY; + + rc = mdd_may_delete(env, pobj, cobj, ma, 1, 1); + + RETURN(rc); +} + +static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, + struct md_object *cobj, const struct lu_name *lname, + struct md_attr *ma) +{ + char *name = lname->ln_name; + struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_pobj = md2mdd_obj(pobj); + struct mdd_object *mdd_cobj = md2mdd_obj(cobj); + struct mdd_device *mdd = mdo2mdd(pobj); + struct dynlock_handle *dlh; + struct thandle *handle; + int rc, is_dir; + ENTRY; + + LASSERTF(mdd_object_exists(mdd_cobj) > 0, "FID is "DFID"\n", + PFID(mdd_object_fid(mdd_cobj))); + + rc = mdd_log_txn_param_build(env, cobj, ma, MDD_TXN_UNLINK_OP); + if (rc) + RETURN(rc); + + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + + dlh = mdd_pdo_write_lock(env, mdd_pobj, name); + if (dlh == NULL) + GOTO(out_trans, rc = -ENOMEM); + mdd_write_lock(env, mdd_cobj); + + is_dir = S_ISDIR(ma->ma_attr.la_mode); + rc = mdd_unlink_sanity_check(env, mdd_pobj, mdd_cobj, ma); + if (rc) + GOTO(cleanup, rc); + + rc = __mdd_index_delete(env, mdd_pobj, name, is_dir, handle, + mdd_object_capa(env, mdd_pobj)); + if (rc) + GOTO(cleanup, rc); + + mdo_ref_del(env, mdd_cobj, handle); + if (is_dir) + /* unlink dot */ + mdo_ref_del(env, mdd_cobj, handle); + + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_pobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + + la->la_valid = LA_CTIME; + rc = mdd_attr_set_internal(env, mdd_cobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + + rc = mdd_finish_unlink(env, mdd_cobj, ma, handle); + + if (rc == 0) + obd_set_info_async(mdd2obd_dev(mdd)->u.mds.mds_osc_exp, + strlen("unlinked"), "unlinked", 0, + NULL, NULL); + EXIT; +cleanup: + mdd_write_unlock(env, mdd_cobj); + mdd_pdo_write_unlock(env, mdd_pobj, dlh); +out_trans: + mdd_trans_stop(env, mdd, rc, handle); + return rc; +} + +/* has not lock on pobj yet */ +static int mdd_ni_sanity_check(const struct lu_env *env, + struct md_object *pobj, + const struct md_attr *ma) +{ + struct mdd_object *obj = md2mdd_obj(pobj); + int rc; + ENTRY; + + if (ma->ma_attr_flags & MDS_PERM_BYPASS) + RETURN(0); + + rc = mdd_may_create(env, obj, NULL, 1, S_ISDIR(ma->ma_attr.la_mode)); + + RETURN(rc); +} + +/* + * Partial operation. + */ +static int mdd_name_insert(const struct lu_env *env, + struct md_object *pobj, + const struct lu_name *lname, + const struct lu_fid *fid, + const struct md_attr *ma) +{ + char *name = lname->ln_name; + struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_obj = md2mdd_obj(pobj); + struct mdd_device *mdd = mdo2mdd(pobj); + struct dynlock_handle *dlh; + struct thandle *handle; + int is_dir = S_ISDIR(ma->ma_attr.la_mode); + int rc; + ENTRY; + + mdd_txn_param_build(env, mdd, MDD_TXN_INDEX_INSERT_OP); + handle = mdd_trans_start(env, mdo2mdd(pobj)); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + dlh = mdd_pdo_write_lock(env, mdd_obj, name); + if (dlh == NULL) + GOTO(out_trans, rc = -ENOMEM); + + rc = mdd_ni_sanity_check(env, pobj, ma); + if (rc) + GOTO(out_unlock, rc); + + rc = __mdd_index_insert(env, mdd_obj, fid, name, is_dir, + handle, BYPASS_CAPA); + if (rc) + GOTO(out_unlock, rc); + + /* + * For some case, no need update obj's ctime (LA_CTIME is not set), + * e.g. split_dir. + * For other cases, update obj's ctime (LA_CTIME is set), + * e.g. cmr_link. + */ + if (ma->ma_attr.la_valid & LA_CTIME) { + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_obj, la, handle, 0); + } + EXIT; +out_unlock: + mdd_pdo_write_unlock(env, mdd_obj, dlh); +out_trans: + mdd_trans_stop(env, mdo2mdd(pobj), rc, handle); + return rc; +} + +/* has not lock on pobj yet */ +static int mdd_nr_sanity_check(const struct lu_env *env, + struct md_object *pobj, + const struct md_attr *ma) +{ + struct mdd_object *obj = md2mdd_obj(pobj); + int rc; + ENTRY; + + if (ma->ma_attr_flags & MDS_PERM_BYPASS) + RETURN(0); + + rc = mdd_may_unlink(env, obj, ma); + + RETURN(rc); +} + +/* + * Partial operation. + */ +static int mdd_name_remove(const struct lu_env *env, + struct md_object *pobj, + const struct lu_name *lname, + const struct md_attr *ma) +{ + char *name = lname->ln_name; + struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_obj = md2mdd_obj(pobj); + struct mdd_device *mdd = mdo2mdd(pobj); + struct dynlock_handle *dlh; + struct thandle *handle; + int is_dir = S_ISDIR(ma->ma_attr.la_mode); + int rc; + ENTRY; + + mdd_txn_param_build(env, mdd, MDD_TXN_INDEX_DELETE_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + dlh = mdd_pdo_write_lock(env, mdd_obj, name); + if (dlh == NULL) + GOTO(out_trans, rc = -ENOMEM); + + rc = mdd_nr_sanity_check(env, pobj, ma); + if (rc) + GOTO(out_unlock, rc); + + rc = __mdd_index_delete(env, mdd_obj, name, is_dir, + handle, BYPASS_CAPA); + if (rc) + GOTO(out_unlock, rc); + + /* + * For some case, no need update obj's ctime (LA_CTIME is not set), + * e.g. split_dir. + * For other cases, update obj's ctime (LA_CTIME is set), + * e.g. cmr_unlink. + */ + if (ma->ma_attr.la_valid & LA_CTIME) { + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_obj, la, handle, 0); + } + EXIT; +out_unlock: + mdd_pdo_write_unlock(env, mdd_obj, dlh); +out_trans: + mdd_trans_stop(env, mdd, rc, handle); + return rc; +} + +/* + * tobj maybe NULL + * has mdd_write_lock on tobj alreay, but not on tgt_pobj yet + */ +static int mdd_rt_sanity_check(const struct lu_env *env, + struct mdd_object *tgt_pobj, + struct mdd_object *tobj, + struct md_attr *ma) +{ + int rc; + ENTRY; + + if (unlikely(ma->ma_attr_flags & MDS_PERM_BYPASS)) + RETURN(0); + + /* XXX: for mdd_rename_tgt, "tobj == NULL" does not mean tobj not + * exist. In fact, tobj must exist, otherwise the call trace will be: + * mdt_reint_rename_tgt -> mdo_name_insert -> ... -> mdd_name_insert. + * When get here, tobj must be NOT NULL, the other case has been + * processed in cmr_rename_tgt before mdd_rename_tgt and enable + * MDS_PERM_BYPASS. + * So check may_delete, but not check nlink of tgt_pobj. */ + LASSERT(tobj); + rc = mdd_may_delete(env, tgt_pobj, tobj, ma, 1, 1); + + RETURN(rc); +} + +static int mdd_rename_tgt(const struct lu_env *env, + struct md_object *pobj, struct md_object *tobj, + const struct lu_fid *lf, const struct lu_name *lname, + struct md_attr *ma) +{ + char *name = lname->ln_name; + struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_tpobj = md2mdd_obj(pobj); + struct mdd_object *mdd_tobj = md2mdd_obj(tobj); + struct mdd_device *mdd = mdo2mdd(pobj); + struct dynlock_handle *dlh; + struct thandle *handle; + int rc; + ENTRY; + + mdd_txn_param_build(env, mdd, MDD_TXN_RENAME_TGT_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + dlh = mdd_pdo_write_lock(env, mdd_tpobj, name); + if (dlh == NULL) + GOTO(out_trans, rc = -ENOMEM); + if (tobj) + mdd_write_lock(env, mdd_tobj); + + rc = mdd_rt_sanity_check(env, mdd_tpobj, mdd_tobj, ma); + if (rc) + GOTO(cleanup, rc); + + /* + * If rename_tgt is called then we should just re-insert name with + * correct fid, no need to dec/inc parent nlink if obj is dir. + */ + rc = __mdd_index_delete(env, mdd_tpobj, name, 0, handle, BYPASS_CAPA); + if (rc) + GOTO(cleanup, rc); + + rc = __mdd_index_insert_only(env, mdd_tpobj, lf, name, handle, + BYPASS_CAPA); + if (rc) + GOTO(cleanup, rc); + + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_tpobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + + /* + * For tobj is remote case cmm layer has processed + * and pass NULL tobj to here. So when tobj is NOT NULL, + * it must be local one. + */ + if (tobj && mdd_object_exists(mdd_tobj)) { + mdo_ref_del(env, mdd_tobj, handle); + + /* Remove dot reference. */ + if (S_ISDIR(ma->ma_attr.la_mode)) + mdo_ref_del(env, mdd_tobj, handle); + + la->la_valid = LA_CTIME; + rc = mdd_attr_set_internal(env, mdd_tobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + + rc = mdd_finish_unlink(env, mdd_tobj, ma, handle); + if (rc) + GOTO(cleanup, rc); + } + EXIT; +cleanup: + if (tobj) + mdd_write_unlock(env, mdd_tobj); + mdd_pdo_write_unlock(env, mdd_tpobj, dlh); +out_trans: + mdd_trans_stop(env, mdd, rc, handle); + return rc; +} + +/* + * The permission has been checked when obj created, no need check again. + */ +static int mdd_cd_sanity_check(const struct lu_env *env, + struct mdd_object *obj) +{ + ENTRY; + + /* EEXIST check */ + if (!obj || mdd_is_dead_obj(obj)) + RETURN(-ENOENT); + + RETURN(0); + +} + +static int mdd_create_data(const struct lu_env *env, struct md_object *pobj, + struct md_object *cobj, const struct md_op_spec *spec, + struct md_attr *ma) +{ + struct mdd_device *mdd = mdo2mdd(cobj); + struct mdd_object *mdd_pobj = md2mdd_obj(pobj); + struct mdd_object *son = md2mdd_obj(cobj); + struct lu_attr *attr = &ma->ma_attr; + struct lov_mds_md *lmm = NULL; + int lmm_size = 0; + struct thandle *handle; + int rc; + ENTRY; + + rc = mdd_cd_sanity_check(env, son); + if (rc) + RETURN(rc); + + if (spec->sp_cr_flags & MDS_OPEN_DELAY_CREATE || + !(spec->sp_cr_flags & FMODE_WRITE)) + RETURN(0); + + rc = mdd_lov_create(env, mdd, mdd_pobj, son, &lmm, &lmm_size, + spec, attr); + if (rc) + RETURN(rc); + + mdd_txn_param_build(env, mdd, MDD_TXN_CREATE_DATA_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + GOTO(out_free, rc = PTR_ERR(handle)); + + /* + * XXX: Setting the lov ea is not locked but setting the attr is locked? + * Should this be fixed? + */ + + /* Replay creates has objects already */ +#if 0 + if (spec->u.sp_ea.no_lov_create) { + CDEBUG(D_INFO, "we already have lov ea\n"); + rc = mdd_lov_set_md(env, mdd_pobj, son, + (struct lov_mds_md *)spec->u.sp_ea.eadata, + spec->u.sp_ea.eadatalen, handle, 0); + } else +#endif + /* No need mdd_lsm_sanity_check here */ + rc = mdd_lov_set_md(env, mdd_pobj, son, lmm, + lmm_size, handle, 0); + + if (rc == 0) + rc = mdd_attr_get_internal_locked(env, son, ma); + + /* update lov_objid data, must be before transaction stop! */ + if (rc == 0) + mdd_lov_objid_update(env, mdd); + + mdd_trans_stop(env, mdd, rc, handle); +out_free: + /* Finish mdd_lov_create() stuff. */ + mdd_lov_create_finish(env, mdd, lmm, lmm_size, spec); + RETURN(rc); +} + +static int +__mdd_lookup(const struct lu_env *env, struct md_object *pobj, + const struct lu_name *lname, struct lu_fid* fid, int mask) +{ + char *name = lname->ln_name; + const struct dt_key *key = (const struct dt_key *)name; + struct mdd_object *mdd_obj = md2mdd_obj(pobj); + struct mdd_device *m = mdo2mdd(pobj); + struct dt_object *dir = mdd_object_child(mdd_obj); + struct lu_fid_pack *pack = &mdd_env_info(env)->mti_pack; + int rc; + ENTRY; + + if (unlikely(mdd_is_dead_obj(mdd_obj))) + RETURN(-ESTALE); + + rc = mdd_object_exists(mdd_obj); + if (unlikely(rc == 0)) + RETURN(-ESTALE); + else if (unlikely(rc < 0)) { + CERROR("Object "DFID" locates on remote server\n", + PFID(mdo2fid(mdd_obj))); + LBUG(); + } + + /* The common filename length check. */ + if (unlikely(lname->ln_namelen > m->mdd_dt_conf.ddp_max_name_len)) + RETURN(-ENAMETOOLONG); + + rc = mdd_permission_internal_locked(env, mdd_obj, NULL, mask); + if (rc) + RETURN(rc); + + if (likely(S_ISDIR(mdd_object_type(mdd_obj)) && + dt_try_as_dir(env, dir))) { + rc = dir->do_index_ops->dio_lookup(env, dir, + (struct dt_rec *)pack, key, + mdd_object_capa(env, mdd_obj)); + if (rc == 0) + fid_unpack(pack, fid); + } else + rc = -ENOTDIR; + + RETURN(rc); +} + +int mdd_object_initialize(const struct lu_env *env, const struct lu_fid *pfid, + struct mdd_object *child, struct md_attr *ma, + struct thandle *handle) +{ + int rc; + ENTRY; + + /* + * Update attributes for child. + * + * FIXME: + * (1) the valid bits should be converted between Lustre and Linux; + * (2) maybe, the child attributes should be set in OSD when creation. + */ + + rc = mdd_attr_set_internal(env, child, &ma->ma_attr, handle, 0); + if (rc != 0) + RETURN(rc); + + if (S_ISDIR(ma->ma_attr.la_mode)) { + /* Add "." and ".." for newly created dir */ + mdo_ref_add(env, child, handle); + rc = __mdd_index_insert_only(env, child, mdo2fid(child), + dot, handle, BYPASS_CAPA); + if (rc == 0) { + rc = __mdd_index_insert_only(env, child, pfid, + dotdot, handle, + BYPASS_CAPA); + if (rc != 0) { + int rc2; + + rc2 = __mdd_index_delete(env, child, dot, 0, + handle, BYPASS_CAPA); + if (rc2 != 0) + CERROR("Failure to cleanup after dotdot" + " creation: %d (%d)\n", rc2, rc); + else + mdo_ref_del(env, child, handle); + } + } + } + RETURN(rc); +} + +/* has not lock on pobj yet */ +static int mdd_create_sanity_check(const struct lu_env *env, + struct md_object *pobj, + const struct lu_name *lname, + struct md_attr *ma, + struct md_op_spec *spec) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la; + struct lu_fid *fid = &info->mti_fid; + struct mdd_object *obj = md2mdd_obj(pobj); + struct mdd_device *m = mdo2mdd(pobj); + int lookup = spec->sp_cr_lookup; + int rc; + ENTRY; + + /* EEXIST check */ + if (mdd_is_dead_obj(obj)) + RETURN(-ENOENT); + + /* + * In some cases this lookup is not needed - we know before if name + * exists or not because MDT performs lookup for it. + * name length check is done in lookup. + */ + if (lookup) { + /* + * Check if the name already exist, though it will be checked in + * _index_insert also, for avoiding rolling back if exists + * _index_insert. + */ + rc = __mdd_lookup_locked(env, pobj, lname, fid, + MAY_WRITE | MAY_EXEC); + if (rc != -ENOENT) + RETURN(rc ? : -EEXIST); + } else { + /* + * Check WRITE permission for the parent. + * EXEC permission have been checked + * when lookup before create already. + */ + rc = mdd_permission_internal_locked(env, obj, NULL, MAY_WRITE); + if (rc) + RETURN(rc); + } + + /* sgid check */ + rc = mdd_la_get(env, obj, la, BYPASS_CAPA); + if (rc != 0) + RETURN(rc); + + if (la->la_mode & S_ISGID) { + ma->ma_attr.la_gid = la->la_gid; + if (S_ISDIR(ma->ma_attr.la_mode)) { + ma->ma_attr.la_mode |= S_ISGID; + ma->ma_attr.la_valid |= LA_MODE; + } + } + + switch (ma->ma_attr.la_mode & S_IFMT) { + case S_IFDIR: { + if (la->la_nlink >= m->mdd_dt_conf.ddp_max_nlink) + RETURN(-EMLINK); + else + RETURN(0); + } + case S_IFLNK: { + unsigned int symlen = strlen(spec->u.sp_symname) + 1; + + if (symlen > (1 << m->mdd_dt_conf.ddp_block_shift)) + RETURN(-ENAMETOOLONG); + else + RETURN(0); + } + case S_IFREG: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + rc = 0; + break; + default: + rc = -EINVAL; + break; + } + RETURN(rc); +} + +/* + * Create object and insert it into namespace. + */ +static int mdd_create(const struct lu_env *env, + struct md_object *pobj, + const struct lu_name *lname, + struct md_object *child, + struct md_op_spec *spec, + struct md_attr* ma) +{ + char *name = lname->ln_name; + struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_pobj = md2mdd_obj(pobj); + struct mdd_object *son = md2mdd_obj(child); + struct mdd_device *mdd = mdo2mdd(pobj); + struct lu_attr *attr = &ma->ma_attr; + struct lov_mds_md *lmm = NULL; + struct thandle *handle; + int rc, created = 0, inserted = 0, lmm_size = 0; + struct dynlock_handle *dlh; + ENTRY; + + /* + * Two operations have to be performed: + * + * - allocation of new object (->do_create()), and + * + * - insertion into parent index (->dio_insert()). + * + * Due to locking, operation order is not important, when both are + * successful, *but* error handling cases are quite different: + * + * - if insertion is done first, and following object creation fails, + * insertion has to be rolled back, but this operation might fail + * also leaving us with dangling index entry. + * + * - if creation is done first, is has to be undone if insertion + * fails, leaving us with leaked space, which is neither good, nor + * fatal. + * + * It seems that creation-first is simplest solution, but it is + * sub-optimal in the frequent + * + * $ mkdir foo + * $ mkdir foo + * + * case, because second mkdir is bound to create object, only to + * destroy it immediately. + * + * To avoid this follow local file systems that do double lookup: + * + * 0. lookup -> -EEXIST (mdd_create_sanity_check()) + * + * 1. create (mdd_object_create_internal()) + * + * 2. insert (__mdd_index_insert(), lookup again) + */ + + /* Sanity checks before big job. */ + rc = mdd_create_sanity_check(env, pobj, lname, ma, spec); + if (rc) + RETURN(rc); + + /* + * No RPC inside the transaction, so OST objects should be created at + * first. + */ + if (S_ISREG(attr->la_mode)) { + rc = mdd_lov_create(env, mdd, mdd_pobj, son, &lmm, &lmm_size, + spec, attr); + if (rc) + RETURN(rc); + } + + mdd_txn_param_build(env, mdd, MDD_TXN_MKDIR_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + GOTO(out_free, rc = PTR_ERR(handle)); + + dlh = mdd_pdo_write_lock(env, mdd_pobj, name); + if (dlh == NULL) + GOTO(out_trans, rc = -ENOMEM); + + /* + * XXX: Check that link can be added to the parent in mkdir case. + */ + + mdd_write_lock(env, son); + rc = mdd_object_create_internal(env, mdd_pobj, son, ma, handle); + if (rc) { + mdd_write_unlock(env, son); + GOTO(cleanup, rc); + } + + created = 1; + +#ifdef CONFIG_FS_POSIX_ACL + mdd_read_lock(env, mdd_pobj); + rc = mdd_acl_init(env, mdd_pobj, son, &ma->ma_attr.la_mode, handle); + mdd_read_unlock(env, mdd_pobj); + if (rc) { + mdd_write_unlock(env, son); + GOTO(cleanup, rc); + } else { + ma->ma_attr.la_valid |= LA_MODE; + } +#endif + + rc = mdd_object_initialize(env, mdo2fid(mdd_pobj), + son, ma, handle); + mdd_write_unlock(env, son); + if (rc) + /* + * Object has no links, so it will be destroyed when last + * reference is released. (XXX not now.) + */ + GOTO(cleanup, rc); + + rc = __mdd_index_insert(env, mdd_pobj, mdo2fid(son), + name, S_ISDIR(attr->la_mode), handle, + mdd_object_capa(env, mdd_pobj)); + + if (rc) + GOTO(cleanup, rc); + + inserted = 1; + + /* No need mdd_lsm_sanity_check here */ + rc = mdd_lov_set_md(env, mdd_pobj, son, lmm, lmm_size, handle, 0); + if (rc) { + CERROR("error on stripe info copy %d \n", rc); + GOTO(cleanup, rc); + } + if (lmm && lmm_size > 0) { + /* Set Lov here, do not get lmm again later */ + memcpy(ma->ma_lmm, lmm, lmm_size); + ma->ma_lmm_size = lmm_size; + ma->ma_valid |= MA_LOV; + } + + if (S_ISLNK(attr->la_mode)) { + struct dt_object *dt = mdd_object_child(son); + const char *target_name = spec->u.sp_symname; + int sym_len = strlen(target_name); + const struct lu_buf *buf; + loff_t pos = 0; + + buf = mdd_buf_get_const(env, target_name, sym_len); + rc = dt->do_body_ops->dbo_write(env, dt, buf, &pos, handle, + mdd_object_capa(env, son)); + + if (rc == sym_len) + rc = 0; + else + GOTO(cleanup, rc = -EFAULT); + } + + *la = ma->ma_attr; + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_pobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + + /* Return attr back. */ + rc = mdd_attr_get_internal_locked(env, son, ma); + EXIT; +cleanup: + if (rc && created) { + int rc2 = 0; + + if (inserted) { + rc2 = __mdd_index_delete(env, mdd_pobj, name, + S_ISDIR(attr->la_mode), + handle, BYPASS_CAPA); + if (rc2) + CERROR("error can not cleanup destroy %d\n", + rc2); + } + if (rc2 == 0) { + mdd_write_lock(env, son); + mdo_ref_del(env, son, handle); + mdd_write_unlock(env, son); + } + } + + /* update lov_objid data, must be before transaction stop! */ + if (rc == 0) + mdd_lov_objid_update(env, mdd); + + mdd_pdo_write_unlock(env, mdd_pobj, dlh); +out_trans: + mdd_trans_stop(env, mdd, rc, handle); +out_free: + /* finis lov_create stuff, free all temporary data */ + mdd_lov_create_finish(env, mdd, lmm, lmm_size, spec); + return rc; +} + +/* + * Get locks on parents in proper order + * RETURN: < 0 - error, rename_order if successful + */ +enum rename_order { + MDD_RN_SAME, + MDD_RN_SRCTGT, + MDD_RN_TGTSRC +}; + +static int mdd_rename_order(const struct lu_env *env, + struct mdd_device *mdd, + struct mdd_object *src_pobj, + struct mdd_object *tgt_pobj) +{ + /* order of locking, 1 - tgt-src, 0 - src-tgt*/ + int rc; + ENTRY; + + if (src_pobj == tgt_pobj) + RETURN(MDD_RN_SAME); + + /* compared the parent child relationship of src_p&tgt_p */ + if (lu_fid_eq(&mdd->mdd_root_fid, mdo2fid(src_pobj))){ + rc = MDD_RN_SRCTGT; + } else if (lu_fid_eq(&mdd->mdd_root_fid, mdo2fid(tgt_pobj))) { + rc = MDD_RN_TGTSRC; + } else { + rc = mdd_is_parent(env, mdd, src_pobj, mdo2fid(tgt_pobj), NULL); + if (rc == -EREMOTE) + rc = 0; + + if (rc == 1) + rc = MDD_RN_TGTSRC; + else if (rc == 0) + rc = MDD_RN_SRCTGT; + } + + RETURN(rc); +} + +/* has not mdd_write{read}_lock on any obj yet. */ +static int mdd_rename_sanity_check(const struct lu_env *env, + struct mdd_object *src_pobj, + struct mdd_object *tgt_pobj, + struct mdd_object *sobj, + struct mdd_object *tobj, + struct md_attr *ma) +{ + int rc = 0; + ENTRY; + + if (unlikely(ma->ma_attr_flags & MDS_PERM_BYPASS)) + RETURN(0); + + /* XXX: when get here, sobj must NOT be NULL, + * the other case has been processed in cml_rename + * before mdd_rename and enable MDS_PERM_BYPASS. */ + LASSERT(sobj); + rc = mdd_may_delete(env, src_pobj, sobj, ma, 1, 0); + if (rc) + RETURN(rc); + + /* XXX: when get here, "tobj == NULL" means tobj must + * NOT exist (neither on remote MDS, such case has been + * processed in cml_rename before mdd_rename and enable + * MDS_PERM_BYPASS). + * So check may_create, but not check may_unlink. */ + if (!tobj) + rc = mdd_may_create(env, tgt_pobj, NULL, + (src_pobj != tgt_pobj), 0); + else + rc = mdd_may_delete(env, tgt_pobj, tobj, ma, + (src_pobj != tgt_pobj), 1); + + if (!rc && !tobj && (src_pobj != tgt_pobj) && + S_ISDIR(ma->ma_attr.la_mode)) + rc = __mdd_may_link(env, tgt_pobj); + + RETURN(rc); +} + +/* src object can be remote that is why we use only fid and type of object */ +static int mdd_rename(const struct lu_env *env, + struct md_object *src_pobj, struct md_object *tgt_pobj, + const struct lu_fid *lf, const struct lu_name *lsname, + struct md_object *tobj, const struct lu_name *ltname, + struct md_attr *ma) +{ + char *sname = lsname->ln_name; + char *tname = ltname->ln_name; + struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_spobj = md2mdd_obj(src_pobj); + struct mdd_object *mdd_tpobj = md2mdd_obj(tgt_pobj); + struct mdd_device *mdd = mdo2mdd(src_pobj); + struct mdd_object *mdd_sobj = NULL; + struct mdd_object *mdd_tobj = NULL; + struct dynlock_handle *sdlh, *tdlh; + struct thandle *handle; + int is_dir; + int rc; + ENTRY; + + LASSERT(ma->ma_attr.la_mode & S_IFMT); + is_dir = S_ISDIR(ma->ma_attr.la_mode); + + if (tobj) + mdd_tobj = md2mdd_obj(tobj); + + mdd_txn_param_build(env, mdd, MDD_TXN_RENAME_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + /* FIXME: Should consider tobj and sobj too in rename_lock. */ + rc = mdd_rename_order(env, mdd, mdd_spobj, mdd_tpobj); + if (rc < 0) + GOTO(cleanup_unlocked, rc); + + /* Get locks in determined order */ + if (rc == MDD_RN_SAME) { + sdlh = mdd_pdo_write_lock(env, mdd_spobj, sname); + /* check hashes to determine do we need one lock or two */ + if (mdd_name2hash(sname) != mdd_name2hash(tname)) + tdlh = mdd_pdo_write_lock(env, mdd_tpobj, tname); + else + tdlh = sdlh; + } else if (rc == MDD_RN_SRCTGT) { + sdlh = mdd_pdo_write_lock(env, mdd_spobj, sname); + tdlh = mdd_pdo_write_lock(env, mdd_tpobj, tname); + } else { + tdlh = mdd_pdo_write_lock(env, mdd_tpobj, tname); + sdlh = mdd_pdo_write_lock(env, mdd_spobj, sname); + } + if (sdlh == NULL || tdlh == NULL) + GOTO(cleanup, rc = -ENOMEM); + + mdd_sobj = mdd_object_find(env, mdd, lf); + rc = mdd_rename_sanity_check(env, mdd_spobj, mdd_tpobj, + mdd_sobj, mdd_tobj, ma); + if (rc) + GOTO(cleanup, rc); + + rc = __mdd_index_delete(env, mdd_spobj, sname, is_dir, handle, + mdd_object_capa(env, mdd_spobj)); + if (rc) + GOTO(cleanup, rc); + + /* + * Here tobj can be remote one, so we do index_delete unconditionally + * and -ENOENT is allowed. + */ + rc = __mdd_index_delete(env, mdd_tpobj, tname, is_dir, handle, + mdd_object_capa(env, mdd_tpobj)); + if (rc != 0 && rc != -ENOENT) + GOTO(cleanup, rc); + + rc = __mdd_index_insert(env, mdd_tpobj, lf, tname, is_dir, handle, + mdd_object_capa(env, mdd_tpobj)); + if (rc) + GOTO(cleanup, rc); + + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + + /* XXX: mdd_sobj must be local one if it is NOT NULL. */ + if (mdd_sobj) { + la->la_valid = LA_CTIME; + rc = mdd_attr_set_internal_locked(env, mdd_sobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + } + + /* + * For tobj is remote case cmm layer has processed + * and set tobj to NULL then. So when tobj is NOT NULL, + * it must be local one. + */ + if (tobj && mdd_object_exists(mdd_tobj)) { + mdd_write_lock(env, mdd_tobj); + mdo_ref_del(env, mdd_tobj, handle); + + /* Remove dot reference. */ + if (is_dir) + mdo_ref_del(env, mdd_tobj, handle); + + la->la_valid = LA_CTIME; + rc = mdd_attr_set_internal(env, mdd_tobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + + rc = mdd_finish_unlink(env, mdd_tobj, ma, handle); + mdd_write_unlock(env, mdd_tobj); + if (rc) + GOTO(cleanup, rc); + } + + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_spobj, la, handle, 0); + if (rc) + GOTO(cleanup, rc); + + if (mdd_spobj != mdd_tpobj) { + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdd_attr_set_internal_locked(env, mdd_tpobj, la, + handle, 0); + } + + EXIT; +cleanup: + if (likely(tdlh) && sdlh != tdlh) + mdd_pdo_write_unlock(env, mdd_tpobj, tdlh); + if (likely(sdlh)) + mdd_pdo_write_unlock(env, mdd_spobj, sdlh); +cleanup_unlocked: + mdd_trans_stop(env, mdd, rc, handle); + if (mdd_sobj) + mdd_object_put(env, mdd_sobj); + return rc; +} + +struct md_dir_operations mdd_dir_ops = { + .mdo_is_subdir = mdd_is_subdir, + .mdo_lookup = mdd_lookup, + .mdo_create = mdd_create, + .mdo_rename = mdd_rename, + .mdo_link = mdd_link, + .mdo_unlink = mdd_unlink, + .mdo_name_insert = mdd_name_insert, + .mdo_name_remove = mdd_name_remove, + .mdo_rename_tgt = mdd_rename_tgt, + .mdo_create_data = mdd_create_data +}; diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h new file mode 100644 index 0000000..8e2f8d3 --- /dev/null +++ b/lustre/mdd/mdd_internal.h @@ -0,0 +1,575 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * mdd/mdd_internel.c + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef _MDD_INTERNAL_H +#define _MDD_INTERNAL_H + +#include <asm/semaphore.h> + +#include <linux/lustre_acl.h> +#include <obd.h> +#include <md_object.h> +#include <dt_object.h> +#include <linux/sched.h> +#include <linux/capability.h> +#include <linux/dynlocks.h> + +enum mdd_txn_op { + MDD_TXN_OBJECT_DESTROY_OP = 0, + MDD_TXN_OBJECT_CREATE_OP, + MDD_TXN_ATTR_SET_OP, + MDD_TXN_XATTR_SET_OP, + MDD_TXN_INDEX_INSERT_OP, + MDD_TXN_INDEX_DELETE_OP, + MDD_TXN_LINK_OP, + MDD_TXN_UNLINK_OP, + MDD_TXN_RENAME_OP, + MDD_TXN_RENAME_TGT_OP, + MDD_TXN_CREATE_DATA_OP, + MDD_TXN_MKDIR_OP, + MDD_TXN_LAST_OP +}; + +struct mdd_txn_op_descr { + enum mdd_txn_op mod_op; + unsigned int mod_credits; +}; + +struct mdd_device { + struct md_device mdd_md_dev; + struct dt_device *mdd_child; + struct obd_device *mdd_obd_dev; + struct lu_fid mdd_root_fid; + struct dt_device_param mdd_dt_conf; + struct dt_object *mdd_orphans; + struct dt_txn_callback mdd_txn_cb; + cfs_proc_dir_entry_t *mdd_proc_entry; + struct lprocfs_stats *mdd_stats; + struct mdd_txn_op_descr mdd_tod[MDD_TXN_LAST_OP]; +}; + +enum mod_flags { + /* The dir object has been unlinked */ + DEAD_OBJ = 1 << 0, + APPEND_OBJ = 1 << 1, + IMMUTE_OBJ = 1 << 2, + ORPHAN_OBJ = 1 << 3 +}; + +#define LUSTRE_APPEND_FL LDISKFS_APPEND_FL +#define LUSTRE_IMMUTABLE_FL LDISKFS_IMMUTABLE_FL +#define LUSTRE_DIRSYNC_FL LDISKFS_DIRSYNC_FL + +struct mdd_object { + struct md_object mod_obj; + /* open count */ + __u32 mod_count; + __u32 mod_valid; + unsigned long mod_flags; + struct dynlock mod_pdlock; +}; + +struct orph_key { + /* fid of the object*/ + struct lu_fid ok_fid; + /* type of operation: unlink, truncate */ + __u32 ok_op; +} __attribute__((packed)); + +struct mdd_thread_info { + struct txn_param mti_param; + struct lu_fid mti_fid; + struct lu_attr mti_la; + struct md_attr mti_ma; + struct lu_attr mti_la_for_fix; + struct obd_info mti_oi; + struct orph_key mti_orph_key; + struct obd_trans_info mti_oti; + struct lu_buf mti_buf; + struct obdo mti_oa; + char mti_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE]; + struct lu_fid mti_fid2; /* used for be & cpu converting */ + struct lu_fid_pack mti_pack; + struct dt_allocation_hint mti_hint; + struct lov_mds_md *mti_max_lmm; + int mti_max_lmm_size; + struct llog_cookie *mti_max_cookie; + int mti_max_cookie_size; +}; + +struct lov_mds_md *mdd_max_lmm_get(const struct lu_env *env, + struct mdd_device *mdd); + +struct llog_cookie *mdd_max_cookie_get(const struct lu_env *env, + struct mdd_device *mdd); + +int mdd_init_obd(const struct lu_env *env, struct mdd_device *mdd, + struct lustre_cfg *cfg); +int mdd_fini_obd(const struct lu_env *env, struct mdd_device *mdd, + struct lustre_cfg *lcfg); +int mdd_xattr_set_txn(const struct lu_env *env, struct mdd_object *obj, + const struct lu_buf *buf, const char *name, int fl, + struct thandle *txn); +int mdd_lsm_sanity_check(const struct lu_env *env, struct mdd_object *obj); +int mdd_lov_set_md(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *child, struct lov_mds_md *lmm, + int lmm_size, struct thandle *handle, int set_stripe); +int mdd_lov_create(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *parent, struct mdd_object *child, + struct lov_mds_md **lmm, int *lmm_size, + const struct md_op_spec *spec, struct lu_attr *la); +void mdd_lov_objid_update(const struct lu_env *env, struct mdd_device *mdd); +void mdd_lov_create_finish(const struct lu_env *env, struct mdd_device *mdd, + struct lov_mds_md *lmm, int lmm_size, + const struct md_op_spec *spec); +int mdd_get_md(const struct lu_env *env, struct mdd_object *obj, + void *md, int *md_size, const char *name); +int mdd_get_md_locked(const struct lu_env *env, struct mdd_object *obj, + void *md, int *md_size, const char *name); +int mdd_la_get(const struct lu_env *env, struct mdd_object *obj, + struct lu_attr *la, struct lustre_capa *capa); +int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *o, + const struct lu_attr *attr, struct thandle *handle, + const int needacl); +int mdd_object_kill(const struct lu_env *env, struct mdd_object *obj, + struct md_attr *ma); +int mdd_iattr_get(const struct lu_env *env, struct mdd_object *mdd_obj, + struct md_attr *ma); +int mdd_attr_get_internal_locked(const struct lu_env *env, + struct mdd_object *mdd_obj, + struct md_attr *ma); +int mdd_object_create_internal(const struct lu_env *env, struct mdd_object *p, + struct mdd_object *c, struct md_attr *ma, + struct thandle *handle); +int mdd_attr_set_internal_locked(const struct lu_env *env, + struct mdd_object *o, + const struct lu_attr *attr, + struct thandle *handle, const int needacl); +int mdd_lmm_get_locked(const struct lu_env *env, struct mdd_object *mdd_obj, + struct md_attr *ma); +/* mdd_lock.c */ +void mdd_write_lock(const struct lu_env *env, struct mdd_object *obj); +void mdd_read_lock(const struct lu_env *env, struct mdd_object *obj); +void mdd_write_unlock(const struct lu_env *env, struct mdd_object *obj); +void mdd_read_unlock(const struct lu_env *env, struct mdd_object *obj); + +void mdd_pdlock_init(struct mdd_object *obj); +unsigned long mdd_name2hash(const char *name); +struct dynlock_handle *mdd_pdo_write_lock(const struct lu_env *env, + struct mdd_object *obj, + const char *name); +struct dynlock_handle *mdd_pdo_read_lock(const struct lu_env *env, + struct mdd_object *obj, + const char *name); +void mdd_pdo_write_unlock(const struct lu_env *env, struct mdd_object *obj, + struct dynlock_handle *dlh); +void mdd_pdo_read_unlock(const struct lu_env *env, struct mdd_object *obj, + struct dynlock_handle *dlh); +/* mdd_dir.c */ +int mdd_may_create(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, int check_perm, int check_nlink); +int mdd_may_unlink(const struct lu_env *env, struct mdd_object *pobj, + const struct md_attr *ma); +int mdd_may_delete(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, struct md_attr *ma, + int check_perm, int check_empty); +int mdd_unlink_sanity_check(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, struct md_attr *ma); +int mdd_finish_unlink(const struct lu_env *env, struct mdd_object *obj, + struct md_attr *ma, struct thandle *th); +int mdd_object_initialize(const struct lu_env *env, const struct lu_fid *pfid, + struct mdd_object *child, struct md_attr *ma, + struct thandle *handle); +int mdd_link_sanity_check(const struct lu_env *env, struct mdd_object *tgt_obj, + const struct lu_name *lname, struct mdd_object *src_obj); +/* mdd_lov.c */ +int mdd_unlink_log(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *mdd_cobj, struct md_attr *ma); + +int mdd_setattr_log(const struct lu_env *env, struct mdd_device *mdd, + const struct md_attr *ma, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, int cookies_size); + +int mdd_get_cookie_size(const struct lu_env *env, struct mdd_device *mdd, + struct lov_mds_md *lmm); + +int mdd_lov_setattr_async(const struct lu_env *env, struct mdd_object *obj, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies); + +struct mdd_thread_info *mdd_env_info(const struct lu_env *env); + +struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len); +const struct lu_buf *mdd_buf_get_const(const struct lu_env *env, + const void *area, ssize_t len); + +int __mdd_orphan_cleanup(const struct lu_env *env, struct mdd_device *d); +int __mdd_orphan_add(const struct lu_env *, struct mdd_object *, + struct thandle *); +int __mdd_orphan_del(const struct lu_env *, struct mdd_object *, + struct thandle *); +int orph_index_init(const struct lu_env *env, struct mdd_device *mdd); +void orph_index_fini(const struct lu_env *env, struct mdd_device *mdd); +int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd); + +int mdd_procfs_init(struct mdd_device *mdd, const char *name); +int mdd_procfs_fini(struct mdd_device *mdd); +void mdd_lprocfs_time_start(const struct lu_env *env); +void mdd_lprocfs_time_end(const struct lu_env *env, + struct mdd_device *mdd, int op); + +int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj); + +extern struct md_dir_operations mdd_dir_ops; +extern struct md_object_operations mdd_obj_ops; + +/* mdd_trans.c */ +void mdd_txn_param_build(const struct lu_env *env, struct mdd_device *mdd, + enum mdd_txn_op); +int mdd_log_txn_param_build(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma, enum mdd_txn_op); + +static inline void mdd_object_put(const struct lu_env *env, + struct mdd_object *o) +{ + lu_object_put(env, &o->mod_obj.mo_lu); +} + +struct thandle* mdd_trans_start(const struct lu_env *env, + struct mdd_device *); + +void mdd_trans_stop(const struct lu_env *env, struct mdd_device *mdd, + int rc, struct thandle *handle); + +int mdd_txn_start_cb(const struct lu_env *env, struct txn_param *param, + void *cookie); + +int mdd_txn_stop_cb(const struct lu_env *env, struct thandle *txn, + void *cookie); + +int mdd_txn_commit_cb(const struct lu_env *env, struct thandle *txn, + void *cookie); +/* mdd_device.c */ +struct lu_object *mdd_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *d); + +/* mdd_object.c */ +int accmode(const struct lu_env *env, struct lu_attr *la, int flags); +extern struct lu_context_key mdd_thread_key; +extern struct lu_device_operations mdd_lu_ops; + +struct mdd_object *mdd_object_find(const struct lu_env *env, + struct mdd_device *d, + const struct lu_fid *f); +/* mdd_permission.c */ +#define mdd_cap_t(x) (x) + +#define MDD_CAP_TO_MASK(x) (1 << (x)) + +#define mdd_cap_raised(c, flag) (mdd_cap_t(c) & MDD_CAP_TO_MASK(flag)) + +/* capable() is copied from linux kernel! */ +static inline int mdd_capable(struct md_ucred *uc, int cap) +{ + if (mdd_cap_raised(uc->mu_cap, cap)) + return 1; + return 0; +} + +int mdd_in_group_p(struct md_ucred *uc, gid_t grp); +int mdd_acl_def_get(const struct lu_env *env, struct mdd_object *mdd_obj, + struct md_attr *ma); +int mdd_acl_chmod(const struct lu_env *env, struct mdd_object *o, __u32 mode, + struct thandle *handle); +int __mdd_acl_init(const struct lu_env *env, struct mdd_object *obj, + struct lu_buf *buf, __u32 *mode, struct thandle *handle); +int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, __u32 *mode, struct thandle *handle); +int __mdd_permission_internal(const struct lu_env *env, struct mdd_object *obj, + struct lu_attr *la, int mask, int needlock); +int mdd_permission(const struct lu_env *env, + struct md_object *pobj, struct md_object *cobj, + struct md_attr *ma, int mask); +int mdd_capa_get(const struct lu_env *env, struct md_object *obj, + struct lustre_capa *capa, int renewal); + +static inline int lu_device_is_mdd(struct lu_device *d) +{ + return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &mdd_lu_ops); +} + +static inline struct mdd_device* lu2mdd_dev(struct lu_device *d) +{ + LASSERT(lu_device_is_mdd(d)); + return container_of0(d, struct mdd_device, mdd_md_dev.md_lu_dev); +} + +static inline struct lu_device *mdd2lu_dev(struct mdd_device *d) +{ + return (&d->mdd_md_dev.md_lu_dev); +} + +static inline struct mdd_object *lu2mdd_obj(struct lu_object *o) +{ + LASSERT(ergo(o != NULL, lu_device_is_mdd(o->lo_dev))); + return container_of0(o, struct mdd_object, mod_obj.mo_lu); +} + +static inline struct mdd_device* mdo2mdd(struct md_object *mdo) +{ + return lu2mdd_dev(mdo->mo_lu.lo_dev); +} + +static inline struct mdd_object* md2mdd_obj(struct md_object *mdo) +{ + return container_of0(mdo, struct mdd_object, mod_obj); +} + +static inline struct dt_device_operations *mdd_child_ops(struct mdd_device *d) +{ + return d->mdd_child->dd_ops; +} + +static inline struct lu_object *mdd2lu_obj(struct mdd_object *obj) +{ + return &obj->mod_obj.mo_lu; +} + +static inline struct dt_object* mdd_object_child(struct mdd_object *o) +{ + return container_of0(lu_object_next(mdd2lu_obj(o)), + struct dt_object, do_lu); +} + +static inline struct obd_device *mdd2obd_dev(struct mdd_device *mdd) +{ + return mdd->mdd_obd_dev; +} + +static inline struct mdd_device *mdd_obj2mdd_dev(struct mdd_object *obj) +{ + return mdo2mdd(&obj->mod_obj); +} + +static inline const struct lu_fid *mdo2fid(const struct mdd_object *obj) +{ + return lu_object_fid(&obj->mod_obj.mo_lu); +} + +static inline umode_t mdd_object_type(const struct mdd_object *obj) +{ + return lu_object_attr(&obj->mod_obj.mo_lu); +} + +static inline int mdd_lov_mdsize(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + return obd->u.mds.mds_max_mdsize; +} + +static inline int mdd_lov_cookiesize(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + return obd->u.mds.mds_max_cookiesize; +} + +static inline int mdd_is_immutable(struct mdd_object *obj) +{ + return obj->mod_flags & IMMUTE_OBJ; +} + +static inline int mdd_is_dead_obj(struct mdd_object *obj) +{ + return obj && obj->mod_flags & DEAD_OBJ; +} + +static inline int mdd_is_append(struct mdd_object *obj) +{ + return obj->mod_flags & APPEND_OBJ; +} + +static inline int mdd_object_exists(struct mdd_object *obj) +{ + return lu_object_exists(mdd2lu_obj(obj)); +} + +static inline const struct lu_fid *mdd_object_fid(struct mdd_object *obj) +{ + return lu_object_fid(mdd2lu_obj(obj)); +} + +static inline struct lustre_capa *mdd_object_capa(const struct lu_env *env, + const struct mdd_object *obj) +{ + struct md_capainfo *ci = md_capainfo(env); + const struct lu_fid *fid = mdo2fid(obj); + int i; + + /* NB: in mdt_init0 */ + if (!ci) + return BYPASS_CAPA; + for (i = 0; i < MD_CAPAINFO_MAX; i++) + if (ci->mc_fid[i] && lu_fid_eq(ci->mc_fid[i], fid)) + return ci->mc_capa[i]; + return NULL; +} + +static inline void mdd_set_capainfo(const struct lu_env *env, int offset, + const struct mdd_object *obj, + struct lustre_capa *capa) +{ + struct md_capainfo *ci = md_capainfo(env); + const struct lu_fid *fid = mdo2fid(obj); + + LASSERT(offset >= 0 && offset <= MD_CAPAINFO_MAX); + /* NB: in mdt_init0 */ + if (!ci) + return; + ci->mc_fid[offset] = fid; + ci->mc_capa[offset] = capa; +} + +enum { + LPROC_MDD_NR +}; + +static inline int mdd_permission_internal(const struct lu_env *env, + struct mdd_object *obj, + struct lu_attr *la, int mask) +{ + return __mdd_permission_internal(env, obj, la, mask, 0); +} + +static inline int mdd_permission_internal_locked(const struct lu_env *env, + struct mdd_object *obj, + struct lu_attr *la, int mask) +{ + return __mdd_permission_internal(env, obj, la, mask, 1); +} + +/* mdd inline func for calling osd_dt_object ops */ +static inline int mdo_attr_get(const struct lu_env *env, struct mdd_object *obj, + struct lu_attr *la, struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(obj); + return next->do_ops->do_attr_get(env, next, la, capa); +} + +static inline int mdo_attr_set(const struct lu_env *env, struct mdd_object *obj, + const struct lu_attr *la, struct thandle *handle, + struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(obj); + LASSERT(mdd_object_exists(obj)); + return next->do_ops->do_attr_set(env, next, la, handle, capa); +} + +static inline int mdo_xattr_get(const struct lu_env *env,struct mdd_object *obj, + struct lu_buf *buf, const char *name, + struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(obj); + return next->do_ops->do_xattr_get(env, next, buf, name, capa); +} + +static inline int mdo_xattr_set(const struct lu_env *env,struct mdd_object *obj, + const struct lu_buf *buf, const char *name, + int fl, struct thandle *handle, + struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(obj); + LASSERT(mdd_object_exists(obj)); + return next->do_ops->do_xattr_set(env, next, buf, name, fl, handle, + capa); +} + +static inline int mdo_xattr_del(const struct lu_env *env,struct mdd_object *obj, + const char *name, struct thandle *handle, + struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(obj); + LASSERT(mdd_object_exists(obj)); + return next->do_ops->do_xattr_del(env, next, name, handle, capa); +} + +static inline +int mdo_xattr_list(const struct lu_env *env, struct mdd_object *obj, + struct lu_buf *buf, struct lustre_capa *capa) +{ + struct dt_object *next = mdd_object_child(obj); + LASSERT(mdd_object_exists(obj)); + return next->do_ops->do_xattr_list(env, next, buf, capa); +} + +static inline +int mdo_index_try(const struct lu_env *env, struct mdd_object *obj, + const struct dt_index_features *feat) +{ + struct dt_object *next = mdd_object_child(obj); + return next->do_ops->do_index_try(env, next, feat); +} + +static inline void mdo_ref_add(const struct lu_env *env, struct mdd_object *obj, + struct thandle *handle) +{ + struct dt_object *next = mdd_object_child(obj); + LASSERT(mdd_object_exists(obj)); + return next->do_ops->do_ref_add(env, next, handle); +} + +static inline void mdo_ref_del(const struct lu_env *env, struct mdd_object *obj, + struct thandle *handle) +{ + struct dt_object *next = mdd_object_child(obj); + LASSERT(mdd_object_exists(obj)); + return next->do_ops->do_ref_del(env, next, handle); +} + +static inline +int mdo_create_obj(const struct lu_env *env, struct mdd_object *o, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct thandle *handle) +{ + struct dt_object *next = mdd_object_child(o); + return next->do_ops->do_create(env, next, attr, hint, handle); +} + +static inline struct obd_capa *mdo_capa_get(const struct lu_env *env, + struct mdd_object *obj, + struct lustre_capa *old, + __u32 uid, __u64 opc) +{ + struct dt_object *next = mdd_object_child(obj); + LASSERT(mdd_object_exists(obj)); + return next->do_ops->do_capa_get(env, next, old, uid, opc); +} + +#endif diff --git a/lustre/mdd/mdd_lock.c b/lustre/mdd/mdd_lock.c new file mode 100644 index 0000000..2bbbbda --- /dev/null +++ b/lustre/mdd/mdd_lock.c @@ -0,0 +1,105 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_handler.c + * Lustre Metadata Server (mdd) routines + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin <tappro@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <lustre_ver.h> +#include "mdd_internal.h" + +void mdd_write_lock(const struct lu_env *env, struct mdd_object *obj) +{ + struct dt_object *next = mdd_object_child(obj); + + next->do_ops->do_write_lock(env, next); +} + +void mdd_read_lock(const struct lu_env *env, struct mdd_object *obj) +{ + struct dt_object *next = mdd_object_child(obj); + next->do_ops->do_read_lock(env, next); +} + +void mdd_write_unlock(const struct lu_env *env, struct mdd_object *obj) +{ + struct dt_object *next = mdd_object_child(obj); + + next->do_ops->do_write_unlock(env, next); +} + +void mdd_read_unlock(const struct lu_env *env, struct mdd_object *obj) +{ + struct dt_object *next = mdd_object_child(obj); + + next->do_ops->do_read_unlock(env, next); +} + + +/* Methods for parallel directory locking */ + +void mdd_pdlock_init(struct mdd_object *obj) +{ + dynlock_init(&obj->mod_pdlock); + +} + +unsigned long mdd_name2hash(const char *name) +{ + return full_name_hash((unsigned char*)name, strlen(name)); +} + +struct dynlock_handle *mdd_pdo_write_lock(const struct lu_env *env, + struct mdd_object *obj, + const char *name) +{ + unsigned long value = mdd_name2hash(name); + return dynlock_lock(&obj->mod_pdlock, value, DLT_WRITE, GFP_NOFS); +} + +struct dynlock_handle *mdd_pdo_read_lock(const struct lu_env *env, + struct mdd_object *obj, + const char *name) +{ + unsigned long value = mdd_name2hash(name); + return dynlock_lock(&obj->mod_pdlock, value, DLT_READ, GFP_NOFS); +} + +void mdd_pdo_write_unlock(const struct lu_env *env, struct mdd_object *obj, + struct dynlock_handle *dlh) +{ + return dynlock_unlock(&obj->mod_pdlock, dlh); +} + +void mdd_pdo_read_unlock(const struct lu_env *env, struct mdd_object *obj, + struct dynlock_handle *dlh) +{ + return dynlock_unlock(&obj->mod_pdlock, dlh); +} + diff --git a/lustre/mdd/mdd_lov.c b/lustre/mdd/mdd_lov.c new file mode 100644 index 0000000..ec21996 --- /dev/null +++ b/lustre/mdd/mdd_lov.c @@ -0,0 +1,717 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mdd/mdd_lov.c + * Lustre Metadata Server (mds) handling of striped file data + * + * Copyright (C) 2001-2006 Cluster File Systems, Inc. + * Author: Peter Braam <braam@clusterfs.com> + * wangdi <wangdi@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <obd_lov.h> +#include <lprocfs_status.h> +#include <lustre_mds.h> +#include <lustre_fid.h> +#include <lustre/lustre_idl.h> + +#include "mdd_internal.h" + +static int mdd_lov_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner) +{ + struct mdd_device *mdd = owner; + int rc = 0; + ENTRY; + + LASSERT(owner != NULL); + + rc = md_do_upcall(NULL, &mdd->mdd_md_dev, MD_LOV_SYNC); + + RETURN(rc); +} + +/* The obd is created for handling data stack for mdd */ +int mdd_init_obd(const struct lu_env *env, struct mdd_device *mdd, + struct lustre_cfg *cfg) +{ + char *dev = lustre_cfg_string(cfg, 0); + int rc, name_size, uuid_size; + char *name, *uuid; + __u32 mds_id; + struct lustre_cfg_bufs *bufs; + struct lustre_cfg *lcfg; + struct obd_device *obd; + ENTRY; + + mds_id = mdd2lu_dev(mdd)->ld_site->ls_node_id; + name_size = strlen(MDD_OBD_NAME) + 35; + uuid_size = strlen(MDD_OBD_UUID) + 35; + + OBD_ALLOC(name, name_size); + OBD_ALLOC(uuid, uuid_size); + if (name == NULL || uuid == NULL) + GOTO(cleanup_mem, rc = -ENOMEM); + + OBD_ALLOC_PTR(bufs); + if (!bufs) + GOTO(cleanup_mem, rc = -ENOMEM); + + snprintf(name, strlen(MDD_OBD_NAME) + 35, "%s-%s-%d", + MDD_OBD_NAME, dev, mds_id); + + snprintf(uuid, strlen(MDD_OBD_UUID) + 35, "%s-%s-%d", + MDD_OBD_UUID, dev, mds_id); + + lustre_cfg_bufs_reset(bufs, name); + lustre_cfg_bufs_set_string(bufs, 1, MDD_OBD_TYPE); + lustre_cfg_bufs_set_string(bufs, 2, uuid); + lustre_cfg_bufs_set_string(bufs, 3, (char*)dev/* MDD_OBD_PROFILE */); + lustre_cfg_bufs_set_string(bufs, 4, (char*)dev); + + lcfg = lustre_cfg_new(LCFG_ATTACH, bufs); + OBD_FREE_PTR(bufs); + if (!lcfg) + GOTO(cleanup_mem, rc = -ENOMEM); + + rc = class_attach(lcfg); + if (rc) + GOTO(lcfg_cleanup, rc); + + obd = class_name2obd(name); + if (!obd) { + CERROR("Can not find obd %s\n", MDD_OBD_NAME); + LBUG(); + } + + obd->obd_recovering = 1; + obd->u.mds.mds_id = mds_id; + rc = class_setup(obd, lcfg); + if (rc) + GOTO(class_detach, rc); + + /* + * Add here for obd notify mechanism, when adding a new ost, the mds + * will notify this mdd. + */ + obd->obd_upcall.onu_upcall = mdd_lov_update; + obd->obd_upcall.onu_owner = mdd; + mdd->mdd_obd_dev = obd; + + EXIT; +class_detach: + if (rc) + class_detach(obd, lcfg); +lcfg_cleanup: + lustre_cfg_free(lcfg); +cleanup_mem: + if (name) + OBD_FREE(name, name_size); + if (uuid) + OBD_FREE(uuid, uuid_size); + return rc; +} + +int mdd_fini_obd(const struct lu_env *env, struct mdd_device *mdd, + struct lustre_cfg *lcfg) +{ + struct obd_device *obd; + int rc; + ENTRY; + + obd = mdd2obd_dev(mdd); + LASSERT(obd); + + rc = class_cleanup(obd, lcfg); + if (rc) + GOTO(lcfg_cleanup, rc); + + obd->obd_upcall.onu_upcall = NULL; + obd->obd_upcall.onu_owner = NULL; + rc = class_detach(obd, lcfg); + if (rc) + GOTO(lcfg_cleanup, rc); + mdd->mdd_obd_dev = NULL; + + EXIT; +lcfg_cleanup: + return rc; +} + +int mdd_get_md(const struct lu_env *env, struct mdd_object *obj, + void *md, int *md_size, const char *name) +{ + int rc; + ENTRY; + + rc = mdo_xattr_get(env, obj, mdd_buf_get(env, md, *md_size), name, + mdd_object_capa(env, obj)); + /* + * XXX: Handling of -ENODATA, the right way is to have ->do_md_get() + * exported by dt layer. + */ + if (rc == 0 || rc == -ENODATA) { + *md_size = 0; + rc = 0; + } else if (rc < 0) { + CERROR("Error %d reading eadata \n", rc); + } else { + /* XXX: Convert lov EA but fixed after verification test. */ + *md_size = rc; + } + + RETURN(rc); +} + +int mdd_get_md_locked(const struct lu_env *env, struct mdd_object *obj, + void *md, int *md_size, const char *name) +{ + int rc = 0; + mdd_read_lock(env, obj); + rc = mdd_get_md(env, obj, md, md_size, name); + mdd_read_unlock(env, obj); + return rc; +} + +static int mdd_lov_set_stripe_md(const struct lu_env *env, + struct mdd_object *obj, struct lu_buf *buf, + struct thandle *handle) +{ + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct obd_device *obd = mdd2obd_dev(mdd); + struct obd_export *lov_exp = obd->u.mds.mds_osc_exp; + struct lov_stripe_md *lsm = NULL; + int rc; + ENTRY; + + LASSERT(S_ISDIR(mdd_object_type(obj)) || S_ISREG(mdd_object_type(obj))); + rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, lov_exp, 0, + &lsm, buf->lb_buf); + if (rc) + RETURN(rc); + obd_free_memmd(lov_exp, &lsm); + + rc = mdd_xattr_set_txn(env, obj, buf, MDS_LOV_MD_NAME, 0, handle); + + CDEBUG(D_INFO, "set lov ea of "DFID" rc %d \n", PFID(mdo2fid(obj)), rc); + RETURN(rc); +} + +/* + * Permission check is done before call it, + * no need check again. + */ +static int mdd_lov_set_dir_md(const struct lu_env *env, + struct mdd_object *obj, struct lu_buf *buf, + struct thandle *handle) +{ + struct lov_user_md *lum = NULL; + int rc = 0; + ENTRY; + + LASSERT(S_ISDIR(mdd_object_type(obj))); + lum = (struct lov_user_md*)buf->lb_buf; + + /* if { size, offset, count } = { 0, -1, 0 } (i.e. all default + * values specified) then delete default striping from dir. */ + if ((lum->lmm_stripe_size == 0 && lum->lmm_stripe_count == 0 && + lum->lmm_stripe_offset == (typeof(lum->lmm_stripe_offset))(-1)) || + /* lmm_stripe_size == -1 is deprecated in 1.4.6 */ + lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1)){ + rc = mdd_xattr_set_txn(env, obj, &LU_BUF_NULL, + MDS_LOV_MD_NAME, 0, handle); + if (rc == -ENODATA) + rc = 0; + CDEBUG(D_INFO, "delete lov ea of "DFID" rc %d \n", + PFID(mdo2fid(obj)), rc); + } else { + rc = mdd_lov_set_stripe_md(env, obj, buf, handle); + } + RETURN(rc); +} + +int mdd_lsm_sanity_check(const struct lu_env *env, struct mdd_object *obj) +{ + struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la; + struct md_ucred *uc = md_ucred(env); + int rc; + ENTRY; + + rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + if ((uc->mu_fsuid != tmp_la->la_uid) && !mdd_capable(uc, CAP_FOWNER)) + rc = mdd_permission_internal_locked(env, obj, tmp_la, + MAY_WRITE); + + RETURN(rc); +} + +int mdd_lov_set_md(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *child, struct lov_mds_md *lmmp, + int lmm_size, struct thandle *handle, int set_stripe) +{ + struct lu_buf *buf; + umode_t mode; + int rc = 0; + ENTRY; + + buf = mdd_buf_get(env, lmmp, lmm_size); + mode = mdd_object_type(child); + if (S_ISREG(mode) && lmm_size > 0) { + if (set_stripe) { + rc = mdd_lov_set_stripe_md(env, child, buf, handle); + } else { + rc = mdd_xattr_set_txn(env, child, buf, + MDS_LOV_MD_NAME, 0, handle); + } + } else if (S_ISDIR(mode)) { + if (lmmp == NULL && lmm_size == 0) { + struct mdd_device *mdd = mdd_obj2mdd_dev(child); + struct lov_mds_md *lmm = mdd_max_lmm_get(env, mdd); + int size = sizeof(*lmm); + + /* Get parent dir stripe and set */ + if (pobj != NULL) + rc = mdd_get_md_locked(env, pobj, lmm, &size, + MDS_LOV_MD_NAME); + if (rc > 0) { + buf = mdd_buf_get(env, lmm, size); + rc = mdd_xattr_set_txn(env, child, buf, + MDS_LOV_MD_NAME, 0, handle); + if (rc) + CERROR("error on copy stripe info: rc " + "= %d\n", rc); + } + } else { + LASSERT(lmmp != NULL && lmm_size > 0); + rc = mdd_lov_set_dir_md(env, child, buf, handle); + } + } + CDEBUG(D_INFO, "Set lov md %p size %d for fid "DFID" rc %d\n", + lmmp, lmm_size, PFID(mdo2fid(child)), rc); + RETURN(rc); +} + +/* + * XXX: this is for create lsm object id, which should identify the lsm object + * unique in the whole mds, as I see. But it seems, we still not need it + * now. Right? So just borrow the ll_fid_build_ino(). + */ +static obd_id mdd_lov_create_id(const struct lu_fid *fid) +{ + return fid_flatten(fid); +} + +static int mdd_lov_objid_alloc(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct mds_obd *mds = &mdd->mdd_obd_dev->u.mds; + + OBD_ALLOC(info->mti_oti.oti_objid, + mds->mds_lov_desc.ld_tgt_count * sizeof(obd_id)); + return (info->mti_oti.oti_objid == NULL ? -ENOMEM : 0); +} + +void mdd_lov_objid_update(const struct lu_env *env, struct mdd_device *mdd) +{ + struct mdd_thread_info *info = mdd_env_info(env); + if (info->mti_oti.oti_objid != NULL) + mds_lov_update_objids(mdd->mdd_obd_dev, + info->mti_oti.oti_objid); +} + +static void mdd_lov_objid_from_lmm(const struct lu_env *env, + struct mdd_device *mdd, + struct lov_mds_md *lmm) +{ + struct mds_obd *mds = &mdd->mdd_obd_dev->u.mds; + struct mdd_thread_info *info = mdd_env_info(env); + mds_objids_from_lmm(info->mti_oti.oti_objid, lmm, &mds->mds_lov_desc); +} + +static void mdd_lov_objid_free(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct mds_obd *mds = &mdd->mdd_obd_dev->u.mds; + + OBD_FREE(info->mti_oti.oti_objid, + mds->mds_lov_desc.ld_tgt_count * sizeof(obd_id)); + info->mti_oti.oti_objid = NULL; +} + +void mdd_lov_create_finish(const struct lu_env *env, struct mdd_device *mdd, + struct lov_mds_md *lmm, int lmm_size, + const struct md_op_spec *spec) +{ + struct mdd_thread_info *info = mdd_env_info(env); + + if (lmm && !spec->u.sp_ea.no_lov_create) + OBD_FREE(lmm, lmm_size); + + if (info->mti_oti.oti_objid != NULL) + mdd_lov_objid_free(env, mdd); +} + +int mdd_lov_create(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *parent, struct mdd_object *child, + struct lov_mds_md **lmm, int *lmm_size, + const struct md_op_spec *spec, struct lu_attr *la) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + struct obd_export *lov_exp = obd->u.mds.mds_osc_exp; + struct obdo *oa; + struct lov_stripe_md *lsm = NULL; + const void *eadata = spec->u.sp_ea.eadata; + __u32 create_flags = spec->sp_cr_flags; + struct obd_trans_info *oti = &mdd_env_info(env)->mti_oti; + int rc = 0; + ENTRY; + + if (create_flags & MDS_OPEN_DELAY_CREATE || + !(create_flags & FMODE_WRITE)) + RETURN(0); + + oti_init(oti, NULL); + rc = mdd_lov_objid_alloc(env, mdd); + if (rc != 0) + RETURN(rc); + + /* replay case, has objects already, only get lov from eadata */ + if (spec->u.sp_ea.no_lov_create != 0) { + *lmm = (struct lov_mds_md *)spec->u.sp_ea.eadata; + *lmm_size = spec->u.sp_ea.eadatalen; + mdd_lov_objid_from_lmm(env, mdd, *lmm); + RETURN(0); + } + + if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_ALLOC_OBDO)) + GOTO(out_ids, rc = -ENOMEM); + + LASSERT(lov_exp != NULL); + oa = &mdd_env_info(env)->mti_oa; + + oa->o_uid = 0; /* must have 0 uid / gid on OST */ + oa->o_gid = 0; + oa->o_gr = FILTER_GROUP_MDS0 + mdd2lu_dev(mdd)->ld_site->ls_node_id; + oa->o_mode = S_IFREG | 0600; + oa->o_id = mdd_lov_create_id(mdd_object_fid(child)); + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLFLAGS | + OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGROUP; + oa->o_size = 0; + + if (!(create_flags & MDS_OPEN_HAS_OBJS)) { + if (create_flags & MDS_OPEN_HAS_EA) { + LASSERT(eadata != NULL); + rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, lov_exp, + 0, &lsm, (void*)eadata); + if (rc) + GOTO(out_oti, rc); + lsm->lsm_object_id = oa->o_id; + lsm->lsm_object_gr = oa->o_gr; + } else if (parent != NULL) { + /* get lov ea from parent and set to lov */ + struct lov_mds_md *_lmm; + int _lmm_size; + + _lmm_size = mdd_lov_mdsize(env, mdd); + _lmm = mdd_max_lmm_get(env, mdd); + + if (_lmm == NULL) + GOTO(out_oti, rc = -ENOMEM); + + rc = mdd_get_md_locked(env, parent, _lmm, + &_lmm_size, + MDS_LOV_MD_NAME); + if (rc > 0) + rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, + lov_exp, 0, &lsm, _lmm); + if (rc) + GOTO(out_oti, rc); + } + + rc = obd_create(lov_exp, oa, &lsm, oti); + if (rc) { + if (rc > 0) { + CERROR("Create error for "DFID": %d\n", + PFID(mdo2fid(child)), rc); + rc = -EIO; + } + GOTO(out_oti, rc); + } + LASSERT(lsm->lsm_object_gr >= FILTER_GROUP_MDS0); + } else { + LASSERT(eadata != NULL); + rc = obd_iocontrol(OBD_IOC_LOV_SETEA, lov_exp, 0, &lsm, + (void*)eadata); + if (rc) + GOTO(out_oti, rc); + lsm->lsm_object_id = oa->o_id; + lsm->lsm_object_gr = oa->o_gr; + } + + /* + * Sometimes, we may truncate some object(without lsm) then open it + * (with write flags), so creating lsm above. The Nonzero(truncated) + * size should tell ost, since size attr is in charge by OST. + */ + if (la->la_size && la->la_valid & LA_SIZE) { + struct obd_info *oinfo = &mdd_env_info(env)->mti_oi; + + memset(oinfo, 0, sizeof(*oinfo)); + + oa->o_size = la->la_size; + + /* When setting attr to ost, FLBKSZ is not needed. */ + oa->o_valid &= ~OBD_MD_FLBLKSZ; + obdo_from_la(oa, la, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLSIZE); + + /* + * XXX: Pack lustre id to OST, in OST, it will be packed by + * filter_fid, but can not see what is the usages. So just pack + * o_seq o_ver here, maybe fix it after this cycle. + */ + oa->o_fid = fid_seq(mdd_object_fid(child)); + oa->o_generation = fid_oid(mdd_object_fid(child)); + oa->o_valid |= OBD_MD_FLFID | OBD_MD_FLGENER; + oinfo->oi_oa = oa; + oinfo->oi_md = lsm; + oinfo->oi_capa = mdo_capa_get(env, child, NULL, 0, + CAPA_OPC_MDS_DEFAULT); + if (IS_ERR(oinfo->oi_capa)) + oinfo->oi_capa = NULL; + + rc = obd_setattr(lov_exp, oinfo, oti); + capa_put(oinfo->oi_capa); + if (rc) { + CERROR("Error setting attrs for "DFID": rc %d\n", + PFID(mdo2fid(child)), rc); + if (rc > 0) { + CERROR("obd_setattr for "DFID" rc %d\n", + PFID(mdo2fid(child)), rc); + rc = -EIO; + } + GOTO(out_oti, rc); + } + } + + /* blksize should be changed after create data object */ + la->la_valid |= LA_BLKSIZE; + la->la_blksize = oa->o_blksize; + *lmm = NULL; + rc = obd_packmd(lov_exp, lmm, lsm); + if (rc < 0) { + CERROR("Cannot pack lsm, err = %d\n", rc); + GOTO(out_oti, rc); + } + *lmm_size = rc; + rc = 0; + EXIT; +out_oti: + oti_free_cookies(oti); +out_ids: + if (lsm) + obd_free_memmd(lov_exp, &lsm); + if (rc != 0) + mdd_lov_objid_free(env, mdd); + + return rc; +} + + +/* + * used when destroying orphans and from mds_reint_unlink() when MDS wants to + * destroy objects on OSS. + */ +static +int mdd_lovobj_unlink(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *obj, struct lu_attr *la, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, + int log_unlink) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + struct obd_export *lov_exp = obd->u.mds.mds_osc_exp; + struct lov_stripe_md *lsm = NULL; + struct obd_trans_info *oti = &mdd_env_info(env)->mti_oti; + struct obdo *oa = &mdd_env_info(env)->mti_oa; + int rc; + ENTRY; + + if (lmm_size == 0) + RETURN(0); + + rc = obd_unpackmd(lov_exp, &lsm, lmm, lmm_size); + if (rc < 0) { + CERROR("Error unpack md %p\n", lmm); + RETURN(rc); + } else { + LASSERT(rc >= sizeof(*lsm)); + rc = 0; + } + + oa->o_id = lsm->lsm_object_id; + oa->o_gr = FILTER_GROUP_MDS0 + mdd2lu_dev(mdd)->ld_site->ls_node_id; + oa->o_mode = la->la_mode & S_IFMT; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP; + + oti_init(oti, NULL); + if (log_unlink && logcookies) { + oa->o_valid |= OBD_MD_FLCOOKIE; + oti->oti_logcookies = logcookies; + } + + CDEBUG(D_INFO, "destroying OSS object %d/%d\n", + (int)oa->o_id, (int)oa->o_gr); + + rc = obd_destroy(lov_exp, oa, lsm, oti, NULL); + + obd_free_memmd(lov_exp, &lsm); + RETURN(rc); +} + + +/* + * called with obj not locked. + */ +int mdd_lov_destroy(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *obj, struct lu_attr *la) +{ + struct md_attr *ma = &mdd_env_info(env)->mti_ma; + int rc; + ENTRY; + + if (unlikely(la->la_nlink != 0)) { + CWARN("Attempt to destroy OSS object when nlink == %d\n", + la->la_nlink); + RETURN(0); + } + + ma->ma_lmm_size = mdd_lov_mdsize(env, mdd); + ma->ma_lmm = mdd_max_lmm_get(env, mdd); + ma->ma_cookie_size = mdd_lov_cookiesize(env, mdd); + ma->ma_cookie = mdd_max_cookie_get(env, mdd); + if (ma->ma_lmm == NULL || ma->ma_cookie == NULL) + RETURN(rc = -ENOMEM); + + /* get lov ea */ + rc = mdd_get_md_locked(env, obj, ma->ma_lmm, &ma->ma_lmm_size, + MDS_LOV_MD_NAME); + if (rc) { + CWARN("Get lov ea failed for "DFID"\n", PFID(mdo2fid(obj))); + RETURN(rc); + } + ma->ma_valid = MA_LOV; + + rc = mdd_unlink_log(env, mdd, obj, ma); + if (rc) { + CWARN("mds unlink log for "DFID" failed: %d\n", + PFID(mdo2fid(obj)), rc); + RETURN(rc); + } + if (ma->ma_valid | MA_COOKIE) + rc = mdd_lovobj_unlink(env, mdd, obj, la, + ma->ma_lmm, ma->ma_lmm_size, + ma->ma_cookie, 1); + RETURN(rc); +} + + +int mdd_unlink_log(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *mdd_cobj, struct md_attr *ma) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + + LASSERT(ma->ma_valid & MA_LOV); + + if ((ma->ma_cookie_size > 0) && + (mds_log_op_unlink(obd, ma->ma_lmm, ma->ma_lmm_size, + ma->ma_cookie, ma->ma_cookie_size) > 0)) { + ma->ma_valid |= MA_COOKIE; + } + return 0; +} + +int mdd_setattr_log(const struct lu_env *env, struct mdd_device *mdd, + const struct md_attr *ma, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies, int cookies_size) +{ + struct obd_device *obd = mdd2obd_dev(mdd); + + /* journal chown/chgrp in llog, just like unlink */ + if (lmm_size > 0) { + CDEBUG(D_INFO, "setattr llog for uid/gid=%lu/%lu\n", + (unsigned long)ma->ma_attr.la_uid, + (unsigned long)ma->ma_attr.la_gid); + return mds_log_op_setattr(obd, ma->ma_attr.la_uid, + ma->ma_attr.la_gid, lmm, + lmm_size, logcookies, + cookies_size); + } else + return 0; +} + +int mdd_lov_setattr_async(const struct lu_env *env, struct mdd_object *obj, + struct lov_mds_md *lmm, int lmm_size, + struct llog_cookie *logcookies) +{ + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct obd_device *obd = mdd2obd_dev(mdd); + struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la; + const struct lu_fid *fid = mdd_object_fid(obj); + struct obd_capa *oc; + int rc = 0; + ENTRY; + + mdd_read_lock(env, obj); + rc = mdo_attr_get(env, obj, tmp_la, mdd_object_capa(env, obj)); + mdd_read_unlock(env, obj); + if (rc) + RETURN(rc); + + oc = mdo_capa_get(env, obj, NULL, 0, CAPA_OPC_MDS_DEFAULT); + if (IS_ERR(oc)) + oc = NULL; + + rc = mds_osc_setattr_async(obd, tmp_la->la_uid, tmp_la->la_gid, lmm, + lmm_size, logcookies, fid_seq(fid), + fid_oid(fid), oc); + + capa_put(oc); + + RETURN(rc); +} + diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c new file mode 100644 index 0000000..c4987dc --- /dev/null +++ b/lustre/mdd/mdd_lproc.c @@ -0,0 +1,102 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_lproc.c + * Lustre Metadata Server (mdd) routines + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <lprocfs_status.h> +#include <lu_time.h> + +#include <lustre/lustre_idl.h> + +#include "mdd_internal.h" + +static const char *mdd_counter_names[LPROC_MDD_NR] = { +}; + +int mdd_procfs_init(struct mdd_device *mdd, const char *name) +{ + struct lu_device *ld = &mdd->mdd_md_dev.md_lu_dev; + struct obd_type *type; + int rc; + ENTRY; + + type = ld->ld_type->ldt_obd_type; + + LASSERT(name != NULL); + LASSERT(type != NULL); + + /* Find the type procroot and add the proc entry for this device */ + mdd->mdd_proc_entry = lprocfs_register(name, type->typ_procroot, + NULL, NULL); + if (IS_ERR(mdd->mdd_proc_entry)) { + rc = PTR_ERR(mdd->mdd_proc_entry); + CERROR("Error %d setting up lprocfs for %s\n", + rc, name); + mdd->mdd_proc_entry = NULL; + GOTO(out, rc); + } + + rc = lu_time_init(&mdd->mdd_stats, + mdd->mdd_proc_entry, + mdd_counter_names, ARRAY_SIZE(mdd_counter_names)); + EXIT; +out: + if (rc) + mdd_procfs_fini(mdd); + return rc; +} + +int mdd_procfs_fini(struct mdd_device *mdd) +{ + if (mdd->mdd_stats) + lu_time_fini(&mdd->mdd_stats); + + if (mdd->mdd_proc_entry) { + lprocfs_remove(&mdd->mdd_proc_entry); + mdd->mdd_proc_entry = NULL; + } + RETURN(0); +} + +void mdd_lprocfs_time_start(const struct lu_env *env) +{ + lu_lprocfs_time_start(env); +} + +void mdd_lprocfs_time_end(const struct lu_env *env, struct mdd_device *mdd, + int idx) +{ + lu_lprocfs_time_end(env, mdd->mdd_stats, idx); +} diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c new file mode 100644 index 0000000..b45d37b --- /dev/null +++ b/lustre/mdd/mdd_object.c @@ -0,0 +1,1509 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_handler.c + * Lustre Metadata Server (mdd) routines + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <linux/jbd.h> +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lprocfs_status.h> +/* fid_be_cpu(), fid_cpu_to_be(). */ +#include <lustre_fid.h> + +#include <linux/ldiskfs_fs.h> +#include <lustre_mds.h> +#include <lustre/lustre_idl.h> + +#include "mdd_internal.h" + +static struct lu_object_operations mdd_lu_obj_ops; + +int mdd_la_get(const struct lu_env *env, struct mdd_object *obj, + struct lu_attr *la, struct lustre_capa *capa) +{ + LASSERTF(mdd_object_exists(obj), "FID is "DFID"\n", + PFID(mdd_object_fid(obj))); + return mdo_attr_get(env, obj, la, capa); +} + +static void mdd_flags_xlate(struct mdd_object *obj, __u32 flags) +{ + obj->mod_flags &= ~(APPEND_OBJ|IMMUTE_OBJ); + + if (flags & LUSTRE_APPEND_FL) + obj->mod_flags |= APPEND_OBJ; + + if (flags & LUSTRE_IMMUTABLE_FL) + obj->mod_flags |= IMMUTE_OBJ; +} + +struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len) +{ + struct lu_buf *buf; + + buf = &mdd_env_info(env)->mti_buf; + buf->lb_buf = area; + buf->lb_len = len; + return buf; +} + +struct llog_cookie *mdd_max_cookie_get(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct mdd_thread_info *mti = mdd_env_info(env); + int max_cookie_size; + + max_cookie_size = mdd_lov_cookiesize(env, mdd); + if (unlikely(mti->mti_max_cookie_size < max_cookie_size)) { + if (mti->mti_max_cookie) + OBD_FREE(mti->mti_max_cookie, mti->mti_max_cookie_size); + mti->mti_max_cookie = NULL; + mti->mti_max_cookie_size = 0; + } + if (unlikely(mti->mti_max_cookie == NULL)) { + OBD_ALLOC(mti->mti_max_cookie, max_cookie_size); + if (unlikely(mti->mti_max_cookie != NULL)) + mti->mti_max_cookie_size = max_cookie_size; + } + return mti->mti_max_cookie; +} + +struct lov_mds_md *mdd_max_lmm_get(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct mdd_thread_info *mti = mdd_env_info(env); + int max_lmm_size; + + max_lmm_size = mdd_lov_mdsize(env, mdd); + if (unlikely(mti->mti_max_lmm_size < max_lmm_size)) { + if (mti->mti_max_lmm) + OBD_FREE(mti->mti_max_lmm, mti->mti_max_lmm_size); + mti->mti_max_lmm = NULL; + mti->mti_max_lmm_size = 0; + } + if (unlikely(mti->mti_max_lmm == NULL)) { + OBD_ALLOC(mti->mti_max_lmm, max_lmm_size); + if (unlikely(mti->mti_max_lmm != NULL)) + mti->mti_max_lmm_size = max_lmm_size; + } + return mti->mti_max_lmm; +} + +const struct lu_buf *mdd_buf_get_const(const struct lu_env *env, + const void *area, ssize_t len) +{ + struct lu_buf *buf; + + buf = &mdd_env_info(env)->mti_buf; + buf->lb_buf = (void *)area; + buf->lb_len = len; + return buf; +} + +struct mdd_thread_info *mdd_env_info(const struct lu_env *env) +{ + struct mdd_thread_info *info; + + info = lu_context_key_get(&env->le_ctx, &mdd_thread_key); + LASSERT(info != NULL); + return info; +} + +struct lu_object *mdd_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *d) +{ + struct mdd_object *mdd_obj; + + OBD_ALLOC_PTR(mdd_obj); + if (mdd_obj != NULL) { + struct lu_object *o; + + o = mdd2lu_obj(mdd_obj); + lu_object_init(o, NULL, d); + mdd_obj->mod_obj.mo_ops = &mdd_obj_ops; + mdd_obj->mod_obj.mo_dir_ops = &mdd_dir_ops; + mdd_obj->mod_count = 0; + o->lo_ops = &mdd_lu_obj_ops; + return o; + } else { + return NULL; + } +} + +static int mdd_object_init(const struct lu_env *env, struct lu_object *o) +{ + struct mdd_device *d = lu2mdd_dev(o->lo_dev); + struct lu_object *below; + struct lu_device *under; + ENTRY; + + under = &d->mdd_child->dd_lu_dev; + below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under); + mdd_pdlock_init(lu2mdd_obj(o)); + if (below == NULL) + RETURN(-ENOMEM); + + lu_object_add(o, below); + RETURN(0); +} + +static int mdd_object_start(const struct lu_env *env, struct lu_object *o) +{ + if (lu_object_exists(o)) + return mdd_get_flags(env, lu2mdd_obj(o)); + else + return 0; +} + +static void mdd_object_free(const struct lu_env *env, struct lu_object *o) +{ + struct mdd_object *mdd = lu2mdd_obj(o); + + lu_object_fini(o); + OBD_FREE_PTR(mdd); +} + +static int mdd_object_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *o) +{ + return (*p)(env, cookie, LUSTRE_MDD_NAME"-object@%p", o); +} + +/* orphan handling is here */ +static void mdd_object_delete(const struct lu_env *env, + struct lu_object *o) +{ + struct mdd_object *mdd_obj = lu2mdd_obj(o); + struct thandle *handle = NULL; + ENTRY; + + if (lu2mdd_dev(o->lo_dev)->mdd_orphans == NULL) + return; + + if (mdd_obj->mod_flags & ORPHAN_OBJ) { + mdd_txn_param_build(env, lu2mdd_dev(o->lo_dev), + MDD_TXN_INDEX_DELETE_OP); + handle = mdd_trans_start(env, lu2mdd_dev(o->lo_dev)); + if (IS_ERR(handle)) + CERROR("Cannot get thandle\n"); + else { + mdd_write_lock(env, mdd_obj); + /* let's remove obj from the orphan list */ + __mdd_orphan_del(env, mdd_obj, handle); + mdd_write_unlock(env, mdd_obj); + mdd_trans_stop(env, lu2mdd_dev(o->lo_dev), + 0, handle); + } + } +} + +static struct lu_object_operations mdd_lu_obj_ops = { + .loo_object_init = mdd_object_init, + .loo_object_start = mdd_object_start, + .loo_object_free = mdd_object_free, + .loo_object_print = mdd_object_print, + .loo_object_delete = mdd_object_delete +}; + +struct mdd_object *mdd_object_find(const struct lu_env *env, + struct mdd_device *d, + const struct lu_fid *f) +{ + struct lu_object *o, *lo; + struct mdd_object *m; + ENTRY; + + o = lu_object_find(env, mdd2lu_dev(d)->ld_site, f); + if (IS_ERR(o)) + m = (struct mdd_object *)o; + else { + lo = lu_object_locate(o->lo_header, mdd2lu_dev(d)->ld_type); + /* remote object can't be located and should be put then */ + if (lo == NULL) + lu_object_put(env, o); + m = lu2mdd_obj(lo); + } + RETURN(m); +} + +int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj) +{ + struct lu_attr *la = &mdd_env_info(env)->mti_la; + int rc; + + ENTRY; + rc = mdd_la_get(env, obj, la, BYPASS_CAPA); + if (rc == 0) + mdd_flags_xlate(obj, la->la_flags); + RETURN(rc); +} + +/* get only inode attributes */ +int mdd_iattr_get(const struct lu_env *env, struct mdd_object *mdd_obj, + struct md_attr *ma) +{ + int rc = 0; + ENTRY; + + if (ma->ma_valid & MA_INODE) + RETURN(0); + + rc = mdd_la_get(env, mdd_obj, &ma->ma_attr, + mdd_object_capa(env, mdd_obj)); + if (rc == 0) + ma->ma_valid |= MA_INODE; + RETURN(rc); +} + +/* get lov EA only */ +static int __mdd_lmm_get(const struct lu_env *env, + struct mdd_object *mdd_obj, struct md_attr *ma) +{ + int rc; + ENTRY; + + if (ma->ma_valid & MA_LOV) + RETURN(0); + + rc = mdd_get_md(env, mdd_obj, ma->ma_lmm, &ma->ma_lmm_size, + MDS_LOV_MD_NAME); + if (rc > 0) { + ma->ma_valid |= MA_LOV; + rc = 0; + } + RETURN(rc); +} + +int mdd_lmm_get_locked(const struct lu_env *env, struct mdd_object *mdd_obj, + struct md_attr *ma) +{ + int rc; + ENTRY; + + mdd_read_lock(env, mdd_obj); + rc = __mdd_lmm_get(env, mdd_obj, ma); + mdd_read_unlock(env, mdd_obj); + RETURN(rc); +} + +/* get lmv EA only*/ +static int __mdd_lmv_get(const struct lu_env *env, + struct mdd_object *mdd_obj, struct md_attr *ma) +{ + int rc; + ENTRY; + + if (ma->ma_valid & MA_LMV) + RETURN(0); + + rc = mdd_get_md(env, mdd_obj, ma->ma_lmv, &ma->ma_lmv_size, + MDS_LMV_MD_NAME); + if (rc > 0) { + ma->ma_valid |= MA_LMV; + rc = 0; + } + RETURN(rc); +} + +static int mdd_attr_get_internal(const struct lu_env *env, + struct mdd_object *mdd_obj, + struct md_attr *ma) +{ + int rc = 0; + ENTRY; + + if (ma->ma_need & MA_INODE) + rc = mdd_iattr_get(env, mdd_obj, ma); + + if (rc == 0 && ma->ma_need & MA_LOV) { + if (S_ISREG(mdd_object_type(mdd_obj)) || + S_ISDIR(mdd_object_type(mdd_obj))) + rc = __mdd_lmm_get(env, mdd_obj, ma); + } + if (rc == 0 && ma->ma_need & MA_LMV) { + if (S_ISDIR(mdd_object_type(mdd_obj))) + rc = __mdd_lmv_get(env, mdd_obj, ma); + } +#ifdef CONFIG_FS_POSIX_ACL + if (rc == 0 && ma->ma_need & MA_ACL_DEF) { + if (S_ISDIR(mdd_object_type(mdd_obj))) + rc = mdd_acl_def_get(env, mdd_obj, ma); + } +#endif + CDEBUG(D_INODE, "after getattr rc = %d, ma_valid = "LPX64"\n", + rc, ma->ma_valid); + RETURN(rc); +} + +int mdd_attr_get_internal_locked(const struct lu_env *env, + struct mdd_object *mdd_obj, struct md_attr *ma) +{ + int rc; + int needlock = ma->ma_need & (MA_LOV | MA_LMV | MA_ACL_DEF); + + if (needlock) + mdd_read_lock(env, mdd_obj); + rc = mdd_attr_get_internal(env, mdd_obj, ma); + if (needlock) + mdd_read_unlock(env, mdd_obj); + return rc; +} + +/* + * No permission check is needed. + */ +static int mdd_attr_get(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + int rc; + + ENTRY; + rc = mdd_attr_get_internal_locked(env, mdd_obj, ma); + RETURN(rc); +} + +/* + * No permission check is needed. + */ +static int mdd_xattr_get(const struct lu_env *env, + struct md_object *obj, struct lu_buf *buf, + const char *name) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + int rc; + + ENTRY; + + LASSERT(mdd_object_exists(mdd_obj)); + + mdd_read_lock(env, mdd_obj); + rc = mdo_xattr_get(env, mdd_obj, buf, name, + mdd_object_capa(env, mdd_obj)); + mdd_read_unlock(env, mdd_obj); + + RETURN(rc); +} + +/* + * Permission check is done when open, + * no need check again. + */ +static int mdd_readlink(const struct lu_env *env, struct md_object *obj, + struct lu_buf *buf) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct dt_object *next; + loff_t pos = 0; + int rc; + ENTRY; + + LASSERT(mdd_object_exists(mdd_obj)); + + next = mdd_object_child(mdd_obj); + mdd_read_lock(env, mdd_obj); + rc = next->do_body_ops->dbo_read(env, next, buf, &pos, + mdd_object_capa(env, mdd_obj)); + mdd_read_unlock(env, mdd_obj); + RETURN(rc); +} + +/* + * No permission check is needed. + */ +static int mdd_xattr_list(const struct lu_env *env, struct md_object *obj, + struct lu_buf *buf) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + int rc; + + ENTRY; + + mdd_read_lock(env, mdd_obj); + rc = mdo_xattr_list(env, mdd_obj, buf, mdd_object_capa(env, mdd_obj)); + mdd_read_unlock(env, mdd_obj); + + RETURN(rc); +} + +int mdd_object_create_internal(const struct lu_env *env, struct mdd_object *p, + struct mdd_object *c, struct md_attr *ma, + struct thandle *handle) +{ + struct lu_attr *attr = &ma->ma_attr; + struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint; + int rc; + ENTRY; + + if (!mdd_object_exists(c)) { + struct dt_object *next = mdd_object_child(c); + LASSERT(next); + + /* @hint will be initialized by underlying device. */ + next->do_ops->do_ah_init(env, hint, + p ? mdd_object_child(p) : NULL, + attr->la_mode & S_IFMT); + rc = mdo_create_obj(env, c, attr, hint, handle); + LASSERT(ergo(rc == 0, mdd_object_exists(c))); + } else + rc = -EEXIST; + + RETURN(rc); +} + + +int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *obj, + const struct lu_attr *attr, struct thandle *handle, + const int needacl) +{ + int rc; + ENTRY; + + rc = mdo_attr_set(env, obj, attr, handle, mdd_object_capa(env, obj)); +#ifdef CONFIG_FS_POSIX_ACL + if (!rc && (attr->la_valid & LA_MODE) && needacl) + rc = mdd_acl_chmod(env, obj, attr->la_mode, handle); +#endif + RETURN(rc); +} + +int mdd_attr_set_internal_locked(const struct lu_env *env, + struct mdd_object *o, + const struct lu_attr *attr, + struct thandle *handle, int needacl) +{ + int rc; + ENTRY; + + needacl = needacl && (attr->la_valid & LA_MODE); + + if (needacl) + mdd_write_lock(env, o); + + rc = mdd_attr_set_internal(env, o, attr, handle, needacl); + + if (needacl) + mdd_write_unlock(env, o); + RETURN(rc); +} + +static int __mdd_xattr_set(const struct lu_env *env, struct mdd_object *obj, + const struct lu_buf *buf, const char *name, + int fl, struct thandle *handle) +{ + struct lustre_capa *capa = mdd_object_capa(env, obj); + int rc = -EINVAL; + ENTRY; + + if (buf->lb_buf && buf->lb_len > 0) + rc = mdo_xattr_set(env, obj, buf, name, 0, handle, capa); + else if (buf->lb_buf == NULL && buf->lb_len == 0) + rc = mdo_xattr_del(env, obj, name, handle, capa); + + RETURN(rc); +} + +/* + * This gives the same functionality as the code between + * sys_chmod and inode_setattr + * chown_common and inode_setattr + * utimes and inode_setattr + * This API is ported from mds_fix_attr but remove some unnecesssary stuff. + */ +static int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj, + struct lu_attr *la, const struct md_attr *ma) +{ + struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la; + struct md_ucred *uc = md_ucred(env); + int rc; + ENTRY; + + if (!la->la_valid) + RETURN(0); + + /* Do not permit change file type */ + if (la->la_valid & LA_TYPE) + RETURN(-EPERM); + + /* They should not be processed by setattr */ + if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE)) + RETURN(-EPERM); + + /* This is only for set ctime when rename's source is on remote MDS. */ + if (unlikely(la->la_valid == LA_CTIME)) { + rc = mdd_may_delete(env, NULL, obj, (struct md_attr *)ma, 1, 0); + RETURN(rc); + } + + rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + if (la->la_valid == LA_ATIME) { + /* This is atime only set for read atime update on close. */ + if (la->la_atime <= tmp_la->la_atime + 0/*XXX:mds_atime_diff*/) + la->la_valid &= ~LA_ATIME; + RETURN(0); + } + + /* Check if flags change. */ + if (la->la_valid & LA_FLAGS) { + unsigned int oldflags = 0; + unsigned int newflags = la->la_flags & + (LUSTRE_IMMUTABLE_FL | LUSTRE_APPEND_FL); + + if ((uc->mu_fsuid != tmp_la->la_uid) && + !mdd_capable(uc, CAP_FOWNER)) + RETURN(-EPERM); + + /* XXX: the IMMUTABLE and APPEND_ONLY flags can + * only be changed by the relevant capability. */ + if (mdd_is_immutable(obj)) + oldflags |= LUSTRE_IMMUTABLE_FL; + if (mdd_is_append(obj)) + oldflags |= LUSTRE_APPEND_FL; + if ((oldflags ^ newflags) && + !mdd_capable(uc, CAP_LINUX_IMMUTABLE)) + RETURN(-EPERM); + + if (!S_ISDIR(tmp_la->la_mode)) + la->la_flags &= ~LUSTRE_DIRSYNC_FL; + } + + if ((mdd_is_immutable(obj) || mdd_is_append(obj)) && + (la->la_valid & ~LA_FLAGS) && + !(ma->ma_attr_flags & MDS_PERM_BYPASS)) + RETURN(-EPERM); + + /* Check for setting the obj time. */ + if ((la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) && + !(la->la_valid & ~(LA_MTIME | LA_ATIME | LA_CTIME))) { + if ((uc->mu_fsuid != tmp_la->la_uid) && + !mdd_capable(uc, CAP_FOWNER)) { + rc = mdd_permission_internal_locked(env, obj, tmp_la, + MAY_WRITE); + if (rc) + RETURN(rc); + } + } + + /* Make sure a caller can chmod. */ + if (la->la_valid & LA_MODE) { + /* + * Bypass la_vaild == LA_MODE, + * this is for changing file with SUID or SGID. + */ + if ((la->la_valid & ~LA_MODE) && + (uc->mu_fsuid != tmp_la->la_uid) && + !mdd_capable(uc, CAP_FOWNER)) + RETURN(-EPERM); + + if (la->la_mode == (umode_t) -1) + la->la_mode = tmp_la->la_mode; + else + la->la_mode = (la->la_mode & S_IALLUGO) | + (tmp_la->la_mode & ~S_IALLUGO); + + /* Also check the setgid bit! */ + if (!mdd_in_group_p(uc, (la->la_valid & LA_GID) ? la->la_gid : + tmp_la->la_gid) && !mdd_capable(uc, CAP_FSETID)) + la->la_mode &= ~S_ISGID; + } else { + la->la_mode = tmp_la->la_mode; + } + + /* Make sure a caller can chown. */ + if (la->la_valid & LA_UID) { + if (la->la_uid == (uid_t) -1) + la->la_uid = tmp_la->la_uid; + if (((uc->mu_fsuid != tmp_la->la_uid) || + (la->la_uid != tmp_la->la_uid)) && + !mdd_capable(uc, CAP_CHOWN)) + RETURN(-EPERM); + + /* + * If the user or group of a non-directory has been + * changed by a non-root user, remove the setuid bit. + * 19981026 David C Niemi <niemi@tux.org> + * + * Changed this to apply to all users, including root, + * to avoid some races. This is the behavior we had in + * 2.0. The check for non-root was definitely wrong + * for 2.2 anyway, as it should have been using + * CAP_FSETID rather than fsuid -- 19990830 SD. + */ + if (((tmp_la->la_mode & S_ISUID) == S_ISUID) && + !S_ISDIR(tmp_la->la_mode)) { + la->la_mode &= ~S_ISUID; + la->la_valid |= LA_MODE; + } + } + + /* Make sure caller can chgrp. */ + if (la->la_valid & LA_GID) { + if (la->la_gid == (gid_t) -1) + la->la_gid = tmp_la->la_gid; + if (((uc->mu_fsuid != tmp_la->la_uid) || + ((la->la_gid != tmp_la->la_gid) && + !mdd_in_group_p(uc, la->la_gid))) && + !mdd_capable(uc, CAP_CHOWN)) + RETURN(-EPERM); + + /* + * Likewise, if the user or group of a non-directory + * has been changed by a non-root user, remove the + * setgid bit UNLESS there is no group execute bit + * (this would be a file marked for mandatory + * locking). 19981026 David C Niemi <niemi@tux.org> + * + * Removed the fsuid check (see the comment above) -- + * 19990830 SD. + */ + if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) == + (S_ISGID | S_IXGRP)) && !S_ISDIR(tmp_la->la_mode)) { + la->la_mode &= ~S_ISGID; + la->la_valid |= LA_MODE; + } + } + + /* For truncate (or setsize), we should have MAY_WRITE perm */ + if (la->la_valid & (LA_SIZE | LA_BLOCKS)) { + if (!((la->la_valid & MDS_OPEN_OWNEROVERRIDE) && + (uc->mu_fsuid == tmp_la->la_uid)) && + !(ma->ma_attr_flags & MDS_PERM_BYPASS)) { + rc = mdd_permission_internal_locked(env, obj, tmp_la, + MAY_WRITE); + if (rc) + RETURN(rc); + } + + /* For the "Size-on-MDS" setattr update, merge coming + * attributes with the set in the inode. BUG 10641 */ + if ((la->la_valid & LA_ATIME) && + (la->la_atime <= tmp_la->la_atime)) + la->la_valid &= ~LA_ATIME; + + /* OST attributes do not have a priority over MDS attributes, + * so drop times if ctime is equal. */ + if ((la->la_valid & LA_CTIME) && + (la->la_ctime <= tmp_la->la_ctime)) + la->la_valid &= ~(LA_MTIME | LA_CTIME); + } else if (la->la_valid & LA_CTIME) { + /* The pure setattr, it has the priority over what is already + * set, do not drop it if ctime is equal. */ + if (la->la_ctime < tmp_la->la_ctime) + la->la_valid &= ~(LA_ATIME | LA_MTIME | LA_CTIME); + } + + RETURN(0); +} + +/* set attr and LOV EA at once, return updated attr */ +static int mdd_attr_set(const struct lu_env *env, struct md_object *obj, + const struct md_attr *ma) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct mdd_device *mdd = mdo2mdd(obj); + struct thandle *handle; + struct lov_mds_md *lmm = NULL; + struct llog_cookie *logcookies = NULL; + int rc, lmm_size = 0, cookie_size = 0; + struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix; + ENTRY; + + mdd_txn_param_build(env, mdd, MDD_TXN_ATTR_SET_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + /*TODO: add lock here*/ + /* start a log jounal handle if needed */ + if (S_ISREG(mdd_object_type(mdd_obj)) && + ma->ma_attr.la_valid & (LA_UID | LA_GID)) { + lmm_size = mdd_lov_mdsize(env, mdd); + lmm = mdd_max_lmm_get(env, mdd); + if (lmm == NULL) + GOTO(cleanup, rc = -ENOMEM); + + rc = mdd_get_md_locked(env, mdd_obj, lmm, &lmm_size, + MDS_LOV_MD_NAME); + + if (rc < 0) + GOTO(cleanup, rc); + } + + if (ma->ma_attr.la_valid & (ATTR_MTIME | ATTR_CTIME)) + CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n", + ma->ma_attr.la_mtime, ma->ma_attr.la_ctime); + + *la_copy = ma->ma_attr; + rc = mdd_fix_attr(env, mdd_obj, la_copy, ma); + if (rc) + GOTO(cleanup, rc); + + if (la_copy->la_valid & LA_FLAGS) { + rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy, + handle, 1); + if (rc == 0) + mdd_flags_xlate(mdd_obj, la_copy->la_flags); + } else if (la_copy->la_valid) { /* setattr */ + rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy, + handle, 1); + /* journal chown/chgrp in llog, just like unlink */ + if (rc == 0 && lmm_size){ + cookie_size = mdd_lov_cookiesize(env, mdd); + logcookies = mdd_max_cookie_get(env, mdd); + if (logcookies == NULL) + GOTO(cleanup, rc = -ENOMEM); + + if (mdd_setattr_log(env, mdd, ma, lmm, lmm_size, + logcookies, cookie_size) <= 0) + logcookies = NULL; + } + } + + if (rc == 0 && ma->ma_valid & MA_LOV) { + umode_t mode; + + mode = mdd_object_type(mdd_obj); + if (S_ISREG(mode) || S_ISDIR(mode)) { + rc = mdd_lsm_sanity_check(env, mdd_obj); + if (rc) + GOTO(cleanup, rc); + + rc = mdd_lov_set_md(env, NULL, mdd_obj, ma->ma_lmm, + ma->ma_lmm_size, handle, 1); + } + + } +cleanup: + mdd_trans_stop(env, mdd, rc, handle); + if (rc == 0 && (lmm != NULL && lmm_size > 0 )) { + /*set obd attr, if needed*/ + rc = mdd_lov_setattr_async(env, mdd_obj, lmm, lmm_size, + logcookies); + } + RETURN(rc); +} + +int mdd_xattr_set_txn(const struct lu_env *env, struct mdd_object *obj, + const struct lu_buf *buf, const char *name, int fl, + struct thandle *handle) +{ + int rc; + ENTRY; + + mdd_write_lock(env, obj); + rc = __mdd_xattr_set(env, obj, buf, name, fl, handle); + mdd_write_unlock(env, obj); + + RETURN(rc); +} + +static int mdd_xattr_sanity_check(const struct lu_env *env, + struct mdd_object *obj) +{ + struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la; + struct md_ucred *uc = md_ucred(env); + int rc; + ENTRY; + + if (mdd_is_immutable(obj) || mdd_is_append(obj)) + RETURN(-EPERM); + + rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + if ((uc->mu_fsuid != tmp_la->la_uid) && !mdd_capable(uc, CAP_FOWNER)) + RETURN(-EPERM); + + RETURN(rc); +} + +static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj, + const struct lu_buf *buf, const char *name, int fl) +{ + struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct mdd_device *mdd = mdo2mdd(obj); + struct thandle *handle; + int rc; + ENTRY; + + rc = mdd_xattr_sanity_check(env, mdd_obj); + if (rc) + RETURN(rc); + + mdd_txn_param_build(env, mdd, MDD_TXN_XATTR_SET_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + rc = mdd_xattr_set_txn(env, md2mdd_obj(obj), buf, name, + fl, handle); + if (rc == 0) { + la_copy->la_ctime = CURRENT_SECONDS; + la_copy->la_valid = LA_CTIME; + rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy, + handle, 0); + } + mdd_trans_stop(env, mdd, rc, handle); + + RETURN(rc); +} + +int mdd_xattr_del(const struct lu_env *env, struct md_object *obj, + const char *name) +{ + struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct mdd_device *mdd = mdo2mdd(obj); + struct thandle *handle; + int rc; + ENTRY; + + rc = mdd_xattr_sanity_check(env, mdd_obj); + if (rc) + RETURN(rc); + + mdd_txn_param_build(env, mdd, MDD_TXN_XATTR_SET_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + mdd_write_lock(env, mdd_obj); + rc = mdo_xattr_del(env, md2mdd_obj(obj), name, handle, + mdd_object_capa(env, mdd_obj)); + mdd_write_unlock(env, mdd_obj); + if (rc == 0) { + la_copy->la_ctime = CURRENT_SECONDS; + la_copy->la_valid = LA_CTIME; + rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy, + handle, 0); + } + + mdd_trans_stop(env, mdd, rc, handle); + + RETURN(rc); +} + +/* partial unlink */ +static int mdd_ref_del(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma) +{ + struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct mdd_device *mdd = mdo2mdd(obj); + struct thandle *handle; + int rc; + ENTRY; + + /* + * Check -ENOENT early here because we need to get object type + * to calculate credits before transaction start + */ + if (!mdd_object_exists(mdd_obj)) + RETURN(-ENOENT); + + LASSERT(mdd_object_exists(mdd_obj) > 0); + + rc = mdd_log_txn_param_build(env, obj, ma, MDD_TXN_UNLINK_OP); + if (rc) + RETURN(rc); + + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(-ENOMEM); + + mdd_write_lock(env, mdd_obj); + + rc = mdd_unlink_sanity_check(env, NULL, mdd_obj, ma); + if (rc) + GOTO(cleanup, rc); + + mdo_ref_del(env, mdd_obj, handle); + + if (S_ISDIR(lu_object_attr(&obj->mo_lu))) { + /* unlink dot */ + mdo_ref_del(env, mdd_obj, handle); + } + + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + la_copy->la_ctime = ma->ma_attr.la_ctime; + + la_copy->la_valid = LA_CTIME; + rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 0); + if (rc) + GOTO(cleanup, rc); + + rc = mdd_finish_unlink(env, mdd_obj, ma, handle); + + EXIT; +cleanup: + mdd_write_unlock(env, mdd_obj); + mdd_trans_stop(env, mdd, rc, handle); + return rc; +} + +/* partial operation */ +static int mdd_oc_sanity_check(const struct lu_env *env, + struct mdd_object *obj, + struct md_attr *ma) +{ + int rc; + ENTRY; + + switch (ma->ma_attr.la_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + rc = 0; + break; + default: + rc = -EINVAL; + break; + } + RETURN(rc); +} + +static int mdd_object_create(const struct lu_env *env, + struct md_object *obj, + const struct md_op_spec *spec, + struct md_attr *ma) +{ + + struct mdd_device *mdd = mdo2mdd(obj); + struct mdd_object *mdd_obj = md2mdd_obj(obj); + const struct lu_fid *pfid = spec->u.sp_pfid; + struct thandle *handle; + int rc; + ENTRY; + + mdd_txn_param_build(env, mdd, MDD_TXN_OBJECT_CREATE_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + mdd_write_lock(env, mdd_obj); + rc = mdd_oc_sanity_check(env, mdd_obj, ma); + if (rc) + GOTO(unlock, rc); + + rc = mdd_object_create_internal(env, NULL, mdd_obj, ma, handle); + if (rc) + GOTO(unlock, rc); + + if (spec->sp_cr_flags & MDS_CREATE_SLAVE_OBJ) { + /* If creating the slave object, set slave EA here. */ + int lmv_size = spec->u.sp_ea.eadatalen; + struct lmv_stripe_md *lmv; + + lmv = (struct lmv_stripe_md *)spec->u.sp_ea.eadata; + LASSERT(lmv != NULL && lmv_size > 0); + + rc = __mdd_xattr_set(env, mdd_obj, + mdd_buf_get_const(env, lmv, lmv_size), + MDS_LMV_MD_NAME, 0, handle); + if (rc) + GOTO(unlock, rc); + + rc = mdd_attr_set_internal(env, mdd_obj, &ma->ma_attr, handle, 0); + } else { +#ifdef CONFIG_FS_POSIX_ACL + if (spec->sp_cr_flags & MDS_CREATE_RMT_ACL) { + struct lu_buf *buf = &mdd_env_info(env)->mti_buf; + + buf->lb_buf = (void *)spec->u.sp_ea.eadata; + buf->lb_len = spec->u.sp_ea.eadatalen; + if ((buf->lb_len > 0) && (buf->lb_buf != NULL)) { + rc = __mdd_acl_init(env, mdd_obj, buf, + &ma->ma_attr.la_mode, + handle); + if (rc) + GOTO(unlock, rc); + else + ma->ma_attr.la_valid |= LA_MODE; + } + + pfid = spec->u.sp_ea.fid; + } +#endif + rc = mdd_object_initialize(env, pfid, mdd_obj, ma, handle); + } + EXIT; +unlock: + mdd_write_unlock(env, mdd_obj); + if (rc == 0) + rc = mdd_attr_get_internal_locked(env, mdd_obj, ma); + + mdd_trans_stop(env, mdd, rc, handle); + return rc; +} + +/* partial link */ +static int mdd_ref_add(const struct lu_env *env, struct md_object *obj, + const struct md_attr *ma) +{ + struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix; + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct mdd_device *mdd = mdo2mdd(obj); + struct thandle *handle; + int rc; + ENTRY; + + mdd_txn_param_build(env, mdd, MDD_TXN_XATTR_SET_OP); + handle = mdd_trans_start(env, mdd); + if (IS_ERR(handle)) + RETURN(-ENOMEM); + + mdd_write_lock(env, mdd_obj); + rc = mdd_link_sanity_check(env, NULL, NULL, mdd_obj); + if (rc == 0) + mdo_ref_add(env, mdd_obj, handle); + mdd_write_unlock(env, mdd_obj); + if (rc == 0) { + LASSERT(ma->ma_attr.la_valid & LA_CTIME); + la_copy->la_ctime = ma->ma_attr.la_ctime; + + la_copy->la_valid = LA_CTIME; + rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy, + handle, 0); + } + mdd_trans_stop(env, mdd, 0, handle); + + RETURN(rc); +} + +/* + * do NOT or the MAY_*'s, you'll get the weakest + */ +int accmode(const struct lu_env *env, struct lu_attr *la, int flags) +{ + int res = 0; + + /* Sadly, NFSD reopens a file repeatedly during operation, so the + * "acc_mode = 0" allowance for newly-created files isn't honoured. + * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file + * owner can write to a file even if it is marked readonly to hide + * its brokenness. (bug 5781) */ + if (flags & MDS_OPEN_OWNEROVERRIDE) { + struct md_ucred *uc = md_ucred(env); + + if ((uc == NULL) || (uc->mu_valid == UCRED_INIT) || + (la->la_uid == uc->mu_fsuid)) + return 0; + } + + if (flags & FMODE_READ) + res |= MAY_READ; + if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND)) + res |= MAY_WRITE; + if (flags & MDS_FMODE_EXEC) + res |= MAY_EXEC; + return res; +} + +static int mdd_open_sanity_check(const struct lu_env *env, + struct mdd_object *obj, int flag) +{ + struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la; + int mode, rc; + ENTRY; + + /* EEXIST check */ + if (mdd_is_dead_obj(obj)) + RETURN(-ENOENT); + + rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + if (S_ISLNK(tmp_la->la_mode)) + RETURN(-ELOOP); + + mode = accmode(env, tmp_la, flag); + + if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE)) + RETURN(-EISDIR); + + if (!(flag & MDS_OPEN_CREATED)) { + rc = mdd_permission_internal(env, obj, tmp_la, mode); + if (rc) + RETURN(rc); + } + + if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) || + S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode)) + flag &= ~MDS_OPEN_TRUNC; + + /* For writing append-only file must open it with append mode. */ + if (mdd_is_append(obj)) { + if ((flag & FMODE_WRITE) && !(flag & MDS_OPEN_APPEND)) + RETURN(-EPERM); + if (flag & MDS_OPEN_TRUNC) + RETURN(-EPERM); + } + +#if 0 + /* + * Now, flag -- O_NOATIME does not be packed by client. + */ + if (flag & O_NOATIME) { + struct md_ucred *uc = md_ucred(env); + + if (uc && ((uc->mu_valid == UCRED_OLD) || + (uc->mu_valid == UCRED_NEW)) && + (uc->mu_fsuid != tmp_la->la_uid) && + !mdd_capable(uc, CAP_FOWNER)) + RETURN(-EPERM); + } +#endif + + RETURN(0); +} + +static int mdd_open(const struct lu_env *env, struct md_object *obj, + int flags) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + int rc = 0; + + mdd_write_lock(env, mdd_obj); + + rc = mdd_open_sanity_check(env, mdd_obj, flags); + if (rc == 0) + mdd_obj->mod_count++; + + mdd_write_unlock(env, mdd_obj); + return rc; +} + +/* return md_attr back, + * if it is last unlink then return lov ea + llog cookie*/ +int mdd_object_kill(const struct lu_env *env, struct mdd_object *obj, + struct md_attr *ma) +{ + int rc = 0; + ENTRY; + + if (S_ISREG(mdd_object_type(obj))) { + /* Return LOV & COOKIES unconditionally here. We clean evth up. + * Caller must be ready for that. */ + rc = __mdd_lmm_get(env, obj, ma); + if ((ma->ma_valid & MA_LOV)) + rc = mdd_unlink_log(env, mdo2mdd(&obj->mod_obj), + obj, ma); + } + RETURN(rc); +} + +/* + * No permission check is needed. + */ +static int mdd_close(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma) +{ + int rc; + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct thandle *handle; + ENTRY; + + rc = mdd_log_txn_param_build(env, obj, ma, MDD_TXN_UNLINK_OP); + if (rc) + RETURN(rc); + handle = mdd_trans_start(env, mdo2mdd(obj)); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + mdd_write_lock(env, mdd_obj); + /* release open count */ + mdd_obj->mod_count --; + + rc = mdd_iattr_get(env, mdd_obj, ma); + if (rc == 0 && mdd_obj->mod_count == 0 && ma->ma_attr.la_nlink == 0) + rc = mdd_object_kill(env, mdd_obj, ma); + else + ma->ma_valid &= ~(MA_LOV | MA_COOKIE); + + mdd_write_unlock(env, mdd_obj); + mdd_trans_stop(env, mdo2mdd(obj), rc, handle); + RETURN(rc); +} + +/* + * Permission check is done when open, + * no need check again. + */ +static int mdd_readpage_sanity_check(const struct lu_env *env, + struct mdd_object *obj) +{ + struct dt_object *next = mdd_object_child(obj); + int rc; + ENTRY; + + if (S_ISDIR(mdd_object_type(obj)) && dt_try_as_dir(env, next)) + rc = 0; + else + rc = -ENOTDIR; + + RETURN(rc); +} + +static int mdd_dir_page_build(const struct lu_env *env, int first, + void *area, int nob, struct dt_it_ops *iops, + struct dt_it *it, __u32 *start, __u32 *end, + struct lu_dirent **last) +{ + struct lu_fid *fid = &mdd_env_info(env)->mti_fid2; + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_fid_pack *pack = &info->mti_pack; + int result; + struct lu_dirent *ent; + + if (first) { + memset(area, 0, sizeof (struct lu_dirpage)); + area += sizeof (struct lu_dirpage); + nob -= sizeof (struct lu_dirpage); + } + + LASSERT(nob > sizeof *ent); + + ent = area; + result = 0; + do { + char *name; + int len; + int recsize; + __u32 hash; + + name = (char *)iops->key(env, it); + len = iops->key_size(env, it); + + pack = (struct lu_fid_pack *)iops->rec(env, it); + fid_unpack(pack, fid); + + recsize = (sizeof(*ent) + len + 3) & ~3; + hash = iops->store(env, it); + *end = hash; + + CDEBUG(D_INFO, "%p %p %d "DFID": %#8.8x (%d) \"%*.*s\"\n", + name, ent, nob, PFID(fid), hash, len, len, len, name); + + if (nob >= recsize) { + ent->lde_fid = *fid; + fid_cpu_to_le(&ent->lde_fid, &ent->lde_fid); + ent->lde_hash = hash; + ent->lde_namelen = cpu_to_le16(len); + ent->lde_reclen = cpu_to_le16(recsize); + memcpy(ent->lde_name, name, len); + if (first && ent == area) + *start = hash; + *last = ent; + ent = (void *)ent + recsize; + nob -= recsize; + result = iops->next(env, it); + } else { + /* + * record doesn't fit into page, enlarge previous one. + */ + LASSERT(*last != NULL); + (*last)->lde_reclen = + cpu_to_le16(le16_to_cpu((*last)->lde_reclen) + + nob); + break; + } + } while (result == 0); + + return result; +} + +static int __mdd_readpage(const struct lu_env *env, struct mdd_object *obj, + const struct lu_rdpg *rdpg) +{ + struct dt_it *it; + struct dt_object *next = mdd_object_child(obj); + struct dt_it_ops *iops; + struct page *pg; + struct lu_dirent *last; + int i; + int rc; + int nob; + __u32 hash_start; + __u32 hash_end; + + LASSERT(rdpg->rp_pages != NULL); + LASSERT(next->do_index_ops != NULL); + + if (rdpg->rp_count <= 0) + return -EFAULT; + + /* + * iterate through directory and fill pages from @rdpg + */ + iops = &next->do_index_ops->dio_it; + it = iops->init(env, next, 0, mdd_object_capa(env, obj)); + if (it == NULL) + return -ENOMEM; + + rc = iops->load(env, it, rdpg->rp_hash); + + if (rc == 0) + /* + * Iterator didn't find record with exactly the key requested. + * + * It is currently either + * + * - positioned above record with key less than + * requested---skip it. + * + * - or not positioned at all (is in IAM_IT_SKEWED + * state)---position it on the next item. + */ + rc = iops->next(env, it); + else if (rc > 0) + rc = 0; + + /* + * At this point and across for-loop: + * + * rc == 0 -> ok, proceed. + * rc > 0 -> end of directory. + * rc < 0 -> error. + */ + for (i = 0, nob = rdpg->rp_count; rc == 0 && nob > 0; + i++, nob -= CFS_PAGE_SIZE) { + LASSERT(i < rdpg->rp_npages); + pg = rdpg->rp_pages[i]; + rc = mdd_dir_page_build(env, !i, kmap(pg), + min_t(int, nob, CFS_PAGE_SIZE), iops, + it, &hash_start, &hash_end, &last); + if (rc != 0 || i == rdpg->rp_npages - 1) + last->lde_reclen = 0; + kunmap(pg); + } + if (rc > 0) { + /* + * end of directory. + */ + hash_end = DIR_END_OFF; + rc = 0; + } + if (rc == 0) { + struct lu_dirpage *dp; + + dp = kmap(rdpg->rp_pages[0]); + dp->ldp_hash_start = rdpg->rp_hash; + dp->ldp_hash_end = hash_end; + if (i == 0) + /* + * No pages were processed, mark this. + */ + dp->ldp_flags |= LDF_EMPTY; + dp->ldp_flags = cpu_to_le16(dp->ldp_flags); + kunmap(rdpg->rp_pages[0]); + } + iops->put(env, it); + iops->fini(env, it); + + return rc; +} + +static int mdd_readpage(const struct lu_env *env, struct md_object *obj, + const struct lu_rdpg *rdpg) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + int rc; + ENTRY; + + LASSERT(mdd_object_exists(mdd_obj)); + + mdd_read_lock(env, mdd_obj); + rc = mdd_readpage_sanity_check(env, mdd_obj); + if (rc) + GOTO(out_unlock, rc); + + if (mdd_is_dead_obj(mdd_obj)) { + struct page *pg; + struct lu_dirpage *dp; + + /* + * According to POSIX, please do not return any entry to client: + * even dot and dotdot should not be returned. + */ + CWARN("readdir from dead object: "DFID"\n", + PFID(mdd_object_fid(mdd_obj))); + + if (rdpg->rp_count <= 0) + GOTO(out_unlock, rc = -EFAULT); + LASSERT(rdpg->rp_pages != NULL); + + pg = rdpg->rp_pages[0]; + dp = (struct lu_dirpage*)kmap(pg); + memset(dp, 0 , sizeof(struct lu_dirpage)); + dp->ldp_hash_start = rdpg->rp_hash; + dp->ldp_hash_end = DIR_END_OFF; + dp->ldp_flags |= LDF_EMPTY; + dp->ldp_flags = cpu_to_le16(dp->ldp_flags); + kunmap(pg); + GOTO(out_unlock, rc = 0); + } + + rc = __mdd_readpage(env, mdd_obj, rdpg); + + EXIT; +out_unlock: + mdd_read_unlock(env, mdd_obj); + return rc; +} + +struct md_object_operations mdd_obj_ops = { + .moo_permission = mdd_permission, + .moo_attr_get = mdd_attr_get, + .moo_attr_set = mdd_attr_set, + .moo_xattr_get = mdd_xattr_get, + .moo_xattr_set = mdd_xattr_set, + .moo_xattr_list = mdd_xattr_list, + .moo_xattr_del = mdd_xattr_del, + .moo_object_create = mdd_object_create, + .moo_ref_add = mdd_ref_add, + .moo_ref_del = mdd_ref_del, + .moo_open = mdd_open, + .moo_close = mdd_close, + .moo_readpage = mdd_readpage, + .moo_readlink = mdd_readlink, + .moo_capa_get = mdd_capa_get +}; diff --git a/lustre/mdd/mdd_orphans.c b/lustre/mdd/mdd_orphans.c new file mode 100644 index 0000000..268be97 --- /dev/null +++ b/lustre/mdd/mdd_orphans.c @@ -0,0 +1,219 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_orphans.c + * + * Orphan handling code + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Mike Pershin <tappro@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <lustre_fid.h> +#include "mdd_internal.h" + +const char orph_index_name[] = "orphans"; + +static const struct dt_index_features orph_index_features = { + .dif_flags = DT_IND_UPDATE, + .dif_keysize_min = sizeof(struct orph_key), + .dif_keysize_max = sizeof(struct orph_key), + .dif_recsize_min = sizeof(loff_t), + .dif_recsize_max = sizeof(loff_t) +}; + +enum { + ORPH_OP_UNLINK, + ORPH_OP_TRUNCATE +}; + +static struct orph_key *orph_key_fill(const struct lu_env *env, + const struct lu_fid *lf, __u32 op) +{ + struct orph_key *key = &mdd_env_info(env)->mti_orph_key; + LASSERT(key); + fid_cpu_to_be(&key->ok_fid, lf); + key->ok_op = cpu_to_be32(op); + return key; +} + +static int orph_index_insert(const struct lu_env *env, + struct mdd_object *obj, __u32 op, + loff_t *offset, struct thandle *th) +{ + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_object *dor = mdd->mdd_orphans; + struct orph_key *key = orph_key_fill(env, mdo2fid(obj), op); + int rc; + ENTRY; + + rc = dor->do_index_ops->dio_insert(env, dor, (struct dt_rec *)offset, + (struct dt_key *)key, th, + BYPASS_CAPA); + RETURN(rc); +} + +static int orph_index_delete(const struct lu_env *env, + struct mdd_object *obj, __u32 op, + struct thandle *th) +{ + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_object *dor = mdd->mdd_orphans; + struct orph_key *key = orph_key_fill(env, mdo2fid(obj), op); + int rc; + ENTRY; + LASSERT(dor); + rc = dor->do_index_ops->dio_delete(env, dor, + (struct dt_key *)key, th, + BYPASS_CAPA); + RETURN(rc); + +} + +static inline struct orph_key *orph_key_empty(const struct lu_env *env, + __u32 op) +{ + struct orph_key *key = &mdd_env_info(env)->mti_orph_key; + LASSERT(key); + fid_zero(&key->ok_fid); + key->ok_op = cpu_to_be32(op); + return key; +} + +static void orph_key_test_and_del(const struct lu_env *env, + struct mdd_device *mdd, + const struct orph_key *key) +{ + struct mdd_object *mdo; + + mdo = mdd_object_find(env, mdd, &key->ok_fid); + if (IS_ERR(mdo)) + CERROR("Invalid orphan!\n"); + else { + mdd_write_lock(env, mdo); + if (mdo->mod_count == 0) { + /* non-opened orphan, let's delete it */ + struct md_attr *ma = &mdd_env_info(env)->mti_ma; + CWARN("Found orphan!\n"); + mdd_object_kill(env, mdo, ma); + /* TODO: now handle OST objects */ + //mdd_ost_objects_destroy(env, ma); + /* TODO: destroy index entry */ + } + mdd_write_unlock(env, mdo); + mdd_object_put(env, mdo); + } +} + +static int orph_index_iterate(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct dt_object *dt_obj = mdd->mdd_orphans; + struct dt_it *it; + struct dt_it_ops *iops; + struct orph_key *key = orph_key_empty(env, 0); + int result; + ENTRY; + + iops = &dt_obj->do_index_ops->dio_it; + it = iops->init(env, dt_obj, 1, BYPASS_CAPA); + if (it != NULL) { + result = iops->get(env, it, (const void *)key); + if (result > 0) { + int i; + /* main cycle */ + for (result = 0, i = 0; result == +1; ++i) { + key = (void *)iops->key(env, it); + fid_be_to_cpu(&key->ok_fid, &key->ok_fid); + orph_key_test_and_del(env, mdd, key); + result = iops->next(env, it); + } + } else if (result == 0) + /* Index contains no zero key? */ + result = -EIO; + + iops->put(env, it); + iops->fini(env, it); + } else + result = -ENOMEM; + + RETURN(result); +} + +int orph_index_init(const struct lu_env *env, struct mdd_device *mdd) +{ + struct lu_fid fid; + struct dt_object *d; + int rc; + ENTRY; + + d = dt_store_open(env, mdd->mdd_child, orph_index_name, &fid); + if (!IS_ERR(d)) { + mdd->mdd_orphans = d; + rc = d->do_ops->do_index_try(env, d, &orph_index_features); + if (rc == 0) + LASSERT(d->do_index_ops != NULL); + else + CERROR("\"%s\" is not an index!\n", orph_index_name); + } else { + CERROR("cannot find \"%s\" obj %d\n", + orph_index_name, (int)PTR_ERR(d)); + rc = PTR_ERR(d); + } + + RETURN(rc); +} + +void orph_index_fini(const struct lu_env *env, struct mdd_device *mdd) +{ + ENTRY; + if (mdd->mdd_orphans != NULL) { + lu_object_put(env, &mdd->mdd_orphans->do_lu); + mdd->mdd_orphans = NULL; + } + EXIT; +} + +int __mdd_orphan_cleanup(const struct lu_env *env, struct mdd_device *d) +{ + return orph_index_iterate(env, d); +} + +int __mdd_orphan_add(const struct lu_env *env, + struct mdd_object *obj, struct thandle *th) +{ + loff_t offset = 0; + return orph_index_insert(env, obj, ORPH_OP_UNLINK, &offset, th); +} + +int __mdd_orphan_del(const struct lu_env *env, + struct mdd_object *obj, struct thandle *th) +{ + return orph_index_delete(env, obj, ORPH_OP_UNLINK, th); +} diff --git a/lustre/mdd/mdd_permission.c b/lustre/mdd/mdd_permission.c new file mode 100644 index 0000000..1195593 --- /dev/null +++ b/lustre/mdd/mdd_permission.c @@ -0,0 +1,654 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_handler.c + * Lustre Metadata Server (mdd) routines + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: fangyong@clusterfs.com + * lsy@clusterfs.com + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <linux/jbd.h> +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <lprocfs_status.h> + +#include <linux/ldiskfs_fs.h> +#include <lustre_mds.h> +#include <lustre/lustre_idl.h> + +#ifdef CONFIG_FS_POSIX_ACL +# include <linux/posix_acl_xattr.h> +# include <linux/posix_acl.h> +#endif + +#include "mdd_internal.h" + +#define mdd_get_group_info(group_info) do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +#define mdd_put_group_info(group_info) do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +#define MDD_NGROUPS_PER_BLOCK ((int)(CFS_PAGE_SIZE / sizeof(gid_t))) + +#define MDD_GROUP_AT(gi, i) \ + ((gi)->blocks[(i) / MDD_NGROUPS_PER_BLOCK][(i) % MDD_NGROUPS_PER_BLOCK]) + +/* + * groups_search() is copied from linux kernel! + * A simple bsearch. + */ +static int mdd_groups_search(struct group_info *group_info, gid_t grp) +{ + int left, right; + + if (!group_info) + return 0; + + left = 0; + right = group_info->ngroups; + while (left < right) { + int mid = (left + right) / 2; + int cmp = grp - MDD_GROUP_AT(group_info, mid); + + if (cmp > 0) + left = mid + 1; + else if (cmp < 0) + right = mid; + else + return 1; + } + return 0; +} + +int mdd_in_group_p(struct md_ucred *uc, gid_t grp) +{ + int rc = 1; + + if (grp != uc->mu_fsgid) { + struct group_info *group_info = NULL; + + if (uc->mu_ginfo || !uc->mu_identity || + uc->mu_valid == UCRED_OLD) + if (grp == uc->mu_suppgids[0] || + grp == uc->mu_suppgids[1]) + return 1; + + if (uc->mu_ginfo) + group_info = uc->mu_ginfo; + else if (uc->mu_identity) + group_info = uc->mu_identity->mi_ginfo; + + if (!group_info) + return 0; + + mdd_get_group_info(group_info); + rc = mdd_groups_search(group_info, grp); + mdd_put_group_info(group_info); + } + return rc; +} + +#ifdef CONFIG_FS_POSIX_ACL +static inline void mdd_acl_le_to_cpu(posix_acl_xattr_entry *p) +{ + p->e_tag = le16_to_cpu(p->e_tag); + p->e_perm = le16_to_cpu(p->e_perm); + p->e_id = le32_to_cpu(p->e_id); +} + +static inline void mdd_acl_cpu_to_le(posix_acl_xattr_entry *p) +{ + p->e_tag = cpu_to_le16(p->e_tag); + p->e_perm = cpu_to_le16(p->e_perm); + p->e_id = cpu_to_le32(p->e_id); +} + +/* + * Check permission based on POSIX ACL. + */ +static int mdd_posix_acl_permission(struct md_ucred *uc, struct lu_attr *la, + int want, posix_acl_xattr_entry *entry, + int count) +{ + posix_acl_xattr_entry *pa, *pe, *mask_obj; + int found = 0; + ENTRY; + + if (count <= 0) + RETURN(-EACCES); + + for (pa = &entry[0], pe = &entry[count - 1]; pa <= pe; pa++) { + mdd_acl_le_to_cpu(pa); + switch(pa->e_tag) { + case ACL_USER_OBJ: + /* (May have been checked already) */ + if (la->la_uid == uc->mu_fsuid) + goto check_perm; + break; + case ACL_USER: + if (pa->e_id == uc->mu_fsuid) + goto mask; + break; + case ACL_GROUP_OBJ: + if (mdd_in_group_p(uc, la->la_gid)) { + found = 1; + if ((pa->e_perm & want) == want) + goto mask; + } + break; + case ACL_GROUP: + if (mdd_in_group_p(uc, pa->e_id)) { + found = 1; + if ((pa->e_perm & want) == want) + goto mask; + } + break; + case ACL_MASK: + break; + case ACL_OTHER: + if (found) + RETURN(-EACCES); + else + goto check_perm; + default: + RETURN(-EIO); + } + } + RETURN(-EIO); + +mask: + for (mask_obj = pa + 1; mask_obj <= pe; mask_obj++) { + mdd_acl_le_to_cpu(mask_obj); + if (mask_obj->e_tag == ACL_MASK) { + if ((pa->e_perm & mask_obj->e_perm & want) == want) + RETURN(0); + + RETURN(-EACCES); + } + } + +check_perm: + if ((pa->e_perm & want) == want) + RETURN(0); + + RETURN(-EACCES); +} + +/* + * Get default acl EA only. + * Hold read_lock for mdd_obj. + */ +int mdd_acl_def_get(const struct lu_env *env, struct mdd_object *mdd_obj, + struct md_attr *ma) +{ + struct lu_buf *buf; + int rc; + ENTRY; + + if (ma->ma_valid & MA_ACL_DEF) + RETURN(0); + + buf = mdd_buf_get(env, ma->ma_acl, ma->ma_acl_size); + rc = mdo_xattr_get(env, mdd_obj, buf, XATTR_NAME_ACL_DEFAULT, + BYPASS_CAPA); + if (rc > 0) { + ma->ma_acl_size = rc; + ma->ma_valid |= MA_ACL_DEF; + rc = 0; + } else if ((rc == -EOPNOTSUPP) || (rc == -ENODATA)) { + rc = 0; + } + RETURN(rc); +} + +/* + * Modify the ACL for the chmod. + */ +static int mdd_posix_acl_chmod_masq(posix_acl_xattr_entry *entry, + __u32 mode, int count) +{ + posix_acl_xattr_entry *group_obj = NULL, *mask_obj = NULL, *pa, *pe; + + for (pa = &entry[0], pe = &entry[count - 1]; pa <= pe; pa++) { + mdd_acl_le_to_cpu(pa); + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_perm = (mode & S_IRWXU) >> 6; + break; + + case ACL_USER: + case ACL_GROUP: + break; + + case ACL_GROUP_OBJ: + group_obj = pa; + break; + + case ACL_MASK: + mask_obj = pa; + break; + + case ACL_OTHER: + pa->e_perm = (mode & S_IRWXO); + break; + + default: + return -EIO; + } + mdd_acl_cpu_to_le(pa); + } + + if (mask_obj) { + mask_obj->e_perm = cpu_to_le16((mode & S_IRWXG) >> 3); + } else { + if (!group_obj) + return -EIO; + group_obj->e_perm = cpu_to_le16((mode & S_IRWXG) >> 3); + } + + return 0; +} + +/* + * Hold write_lock for o. + */ +int mdd_acl_chmod(const struct lu_env *env, struct mdd_object *o, __u32 mode, + struct thandle *handle) +{ + struct lu_buf *buf; + posix_acl_xattr_header *head; + posix_acl_xattr_entry *entry; + int entry_count; + int rc; + + ENTRY; + + buf = mdd_buf_get(env, mdd_env_info(env)->mti_xattr_buf, + sizeof(mdd_env_info(env)->mti_xattr_buf)); + + rc = mdo_xattr_get(env, o, buf, XATTR_NAME_ACL_ACCESS, BYPASS_CAPA); + if ((rc == -EOPNOTSUPP) || (rc == -ENODATA)) + RETURN(0); + else if (rc <= 0) + RETURN(rc); + + buf->lb_len = rc; + head = (posix_acl_xattr_header *)(buf->lb_buf); + entry = head->a_entries; + entry_count = (buf->lb_len - sizeof(head->a_version)) / + sizeof(posix_acl_xattr_entry); + if (entry_count <= 0) + RETURN(0); + + rc = mdd_posix_acl_chmod_masq(entry, mode, entry_count); + if (rc) + RETURN(rc); + + rc = mdo_xattr_set(env, o, buf, XATTR_NAME_ACL_ACCESS, + 0, handle, BYPASS_CAPA); + RETURN(rc); +} + +/* + * Modify acl when creating a new obj. + */ +static int mdd_posix_acl_create_masq(posix_acl_xattr_entry *entry, + __u32 *mode_p, int count) +{ + posix_acl_xattr_entry *group_obj = NULL, *mask_obj = NULL, *pa, *pe; + __u32 mode = *mode_p; + int not_equiv = 0; + + for (pa = &entry[0], pe = &entry[count - 1]; pa <= pe; pa++) { + mdd_acl_le_to_cpu(pa); + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_perm &= (mode >> 6) | ~S_IRWXO; + mode &= (pa->e_perm << 6) | ~S_IRWXU; + break; + + case ACL_USER: + case ACL_GROUP: + not_equiv = 1; + break; + + case ACL_GROUP_OBJ: + group_obj = pa; + break; + + case ACL_OTHER: + pa->e_perm &= mode | ~S_IRWXO; + mode &= pa->e_perm | ~S_IRWXO; + break; + + case ACL_MASK: + mask_obj = pa; + not_equiv = 1; + break; + + default: + return -EIO; + } + mdd_acl_cpu_to_le(pa); + } + + if (mask_obj) { + mask_obj->e_perm = le16_to_cpu(mask_obj->e_perm) & + ((mode >> 3) | ~S_IRWXO); + mode &= (mask_obj->e_perm << 3) | ~S_IRWXG; + mask_obj->e_perm = cpu_to_le16(mask_obj->e_perm); + } else { + if (!group_obj) + return -EIO; + group_obj->e_perm = le16_to_cpu(group_obj->e_perm) & + ((mode >> 3) | ~S_IRWXO); + mode &= (group_obj->e_perm << 3) | ~S_IRWXG; + group_obj->e_perm = cpu_to_le16(group_obj->e_perm); + } + + *mode_p = (*mode_p & ~S_IRWXUGO) | mode; + return not_equiv; +} + +/* + * Hold write_lock for obj. + */ +int __mdd_acl_init(const struct lu_env *env, struct mdd_object *obj, + struct lu_buf *buf, __u32 *mode, struct thandle *handle) +{ + posix_acl_xattr_header *head; + posix_acl_xattr_entry *entry; + int entry_count; + int rc; + + ENTRY; + + head = (posix_acl_xattr_header *)(buf->lb_buf); + entry = head->a_entries; + entry_count = (buf->lb_len - sizeof(head->a_version)) / + sizeof(posix_acl_xattr_entry); + if (entry_count <= 0) + RETURN(0); + + if (S_ISDIR(*mode)) { + rc = mdo_xattr_set(env, obj, buf, XATTR_NAME_ACL_DEFAULT, 0, + handle, BYPASS_CAPA); + if (rc) + RETURN(rc); + } + + rc = mdd_posix_acl_create_masq(entry, mode, entry_count); + if (rc <= 0) + RETURN(rc); + + rc = mdo_xattr_set(env, obj, buf, XATTR_NAME_ACL_ACCESS, 0, handle, + BYPASS_CAPA); + RETURN(rc); +} + +/* + * Hold read_lock for pobj. + * Hold write_lock for cobj. + */ +int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj, + struct mdd_object *cobj, __u32 *mode, struct thandle *handle) +{ + struct lu_buf *buf; + int rc; + ENTRY; + + if (S_ISLNK(*mode)) + RETURN(0); + + buf = mdd_buf_get(env, mdd_env_info(env)->mti_xattr_buf, + sizeof(mdd_env_info(env)->mti_xattr_buf)); + rc = mdo_xattr_get(env, pobj, buf, XATTR_NAME_ACL_DEFAULT, BYPASS_CAPA); + if ((rc == -EOPNOTSUPP) || (rc == -ENODATA)) + RETURN(0); + else if (rc <= 0) + RETURN(rc); + + buf->lb_len = rc; + rc = __mdd_acl_init(env, cobj, buf, mode, handle); + RETURN(rc); +} +#endif + +/* + * Hold read_lock for obj. + */ +static int mdd_check_acl(const struct lu_env *env, struct mdd_object *obj, + struct lu_attr *la, int mask) +{ +#ifdef CONFIG_FS_POSIX_ACL + struct md_ucred *uc = md_ucred(env); + posix_acl_xattr_header *head; + posix_acl_xattr_entry *entry; + struct lu_buf *buf; + int entry_count; + int rc; + ENTRY; + + buf = mdd_buf_get(env, mdd_env_info(env)->mti_xattr_buf, + sizeof(mdd_env_info(env)->mti_xattr_buf)); + rc = mdo_xattr_get(env, obj, buf, XATTR_NAME_ACL_ACCESS, + mdd_object_capa(env, obj)); + if (rc <= 0) + RETURN(rc ? : -EACCES); + + buf->lb_len = rc; + head = (posix_acl_xattr_header *)(buf->lb_buf); + entry = head->a_entries; + entry_count = (buf->lb_len - sizeof(head->a_version)) / + sizeof(posix_acl_xattr_entry); + + rc = mdd_posix_acl_permission(uc, la, mask, entry, entry_count); + RETURN(rc); +#else + ENTRY; + RETURN(-EAGAIN); +#endif +} + +int __mdd_permission_internal(const struct lu_env *env, struct mdd_object *obj, + struct lu_attr *la, int mask, int needlock) +{ + struct md_ucred *uc = md_ucred(env); + __u32 mode; + int rc; + ENTRY; + + if (mask == 0) + RETURN(0); + + /* These means unnecessary for permission check */ + if ((uc == NULL) || (uc->mu_valid == UCRED_INIT)) + RETURN(0); + + /* Invalid user credit */ + if (uc->mu_valid == UCRED_INVALID) + RETURN(-EACCES); + + /* + * Nobody gets write access to an immutable file. + */ + if ((mask & MAY_WRITE) && mdd_is_immutable(obj)) + RETURN(-EACCES); + + if (la == NULL) { + la = &mdd_env_info(env)->mti_la; + rc = mdd_la_get(env, obj, la, BYPASS_CAPA); + if (rc) + RETURN(rc); + } + + mode = la->la_mode; + if (uc->mu_fsuid == la->la_uid) { + mode >>= 6; + } else { + if (mode & S_IRWXG) { + if (needlock) + mdd_read_lock(env, obj); + rc = mdd_check_acl(env, obj, la, mask); + if (needlock) + mdd_read_unlock(env, obj); + if (rc == -EACCES) + goto check_capabilities; + else if ((rc != -EAGAIN) && (rc != -EOPNOTSUPP) && + (rc != -ENODATA)) + RETURN(rc); + } + if (mdd_in_group_p(uc, la->la_gid)) + mode >>= 3; + } + + if (((mode & mask & S_IRWXO) == mask)) + RETURN(0); + +check_capabilities: + if (!(mask & MAY_EXEC) || + (la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode)) + if (mdd_capable(uc, CAP_DAC_OVERRIDE)) + RETURN(0); + + if ((mask == MAY_READ) || + (S_ISDIR(la->la_mode) && !(mask & MAY_WRITE))) + if (mdd_capable(uc, CAP_DAC_READ_SEARCH)) + RETURN(0); + + RETURN(-EACCES); +} + +int mdd_permission(const struct lu_env *env, + struct md_object *pobj, struct md_object *cobj, + struct md_attr *ma, int mask) +{ + struct mdd_object *mdd_pobj, *mdd_cobj; + struct lu_attr *la = NULL; + int check_create, check_link; + int check_unlink; + int check_rename_src, check_rename_tar; + int check_vtx_part, check_vtx_full; + int rc = 0; + ENTRY; + + LASSERT(cobj); + mdd_cobj = md2mdd_obj(cobj); + + /* For cross_open case, the "mask" is open flags, + * so convert it to permission mask first. + * XXX: MDS_OPEN_CROSS must be NOT equal to permission mask MAY_*. */ + if (unlikely(mask & MDS_OPEN_CROSS)) { + la = &mdd_env_info(env)->mti_la; + rc = mdd_la_get(env, mdd_cobj, la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + mask = accmode(env, la, mask & ~MDS_OPEN_CROSS); + } + + check_create = mask & MAY_CREATE; + check_link = mask & MAY_LINK; + check_unlink = mask & MAY_UNLINK; + check_rename_src = mask & MAY_RENAME_SRC; + check_rename_tar = mask & MAY_RENAME_TAR; + check_vtx_part = mask & MAY_VTX_PART; + check_vtx_full = mask & MAY_VTX_FULL; + + mask &= ~(MAY_CREATE | MAY_LINK | + MAY_UNLINK | + MAY_RENAME_SRC | MAY_RENAME_TAR | + MAY_VTX_PART | MAY_VTX_FULL); + + rc = mdd_permission_internal_locked(env, mdd_cobj, NULL, mask); + + if (!rc && (check_create || check_link)) + rc = mdd_may_create(env, mdd_cobj, NULL, 1, check_link); + + if (!rc && check_unlink) { + LASSERT(ma); + rc = mdd_may_unlink(env, mdd_cobj, ma); + } + + if (!rc && (check_rename_src || check_rename_tar)) { + LASSERT(pobj); + LASSERT(ma); + mdd_pobj = md2mdd_obj(pobj); + rc = mdd_may_delete(env, mdd_pobj, mdd_cobj, ma, 1, + check_rename_tar); + } + + if (!rc && (check_vtx_part || check_vtx_full)) { + struct md_ucred *uc = md_ucred(env); + + LASSERT(ma); + if (likely(!la)) { + la = &mdd_env_info(env)->mti_la; + rc = mdd_la_get(env, mdd_cobj, la, BYPASS_CAPA); + if (rc) + RETURN(rc); + } + + if (!(la->la_mode & S_ISVTX) || (la->la_uid == uc->mu_fsuid) || + (check_vtx_full && (ma->ma_attr.la_valid & LA_UID) && + (ma->ma_attr.la_uid == uc->mu_fsuid))) { + ma->ma_attr_flags |= MDS_VTX_BYPASS; + } else { + ma->ma_attr_flags &= ~MDS_VTX_BYPASS; + if (check_vtx_full) + rc = -EPERM; + } + } + + RETURN(rc); +} + +int mdd_capa_get(const struct lu_env *env, struct md_object *obj, + struct lustre_capa *capa, int renewal) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct obd_capa *oc; + int rc = 0; + ENTRY; + + oc = mdo_capa_get(env, mdd_obj, renewal ? capa : NULL, capa->lc_uid, + capa->lc_opc); + if (IS_ERR(oc)) { + rc = PTR_ERR(oc); + } else { + capa_cpy(capa, oc); + capa_put(oc); + } + + RETURN(rc); +} diff --git a/lustre/mdd/mdd_trans.c b/lustre/mdd/mdd_trans.c new file mode 100644 index 0000000..3c84ee7 --- /dev/null +++ b/lustre/mdd/mdd_trans.c @@ -0,0 +1,206 @@ +/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * mdd/mdd_handler.c + * Lustre Metadata Server (mdd) routines + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +#include <linux/jbd.h> +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <lprocfs_status.h> + +#include <linux/ldiskfs_fs.h> +#include <lustre_mds.h> +#include <lustre/lustre_idl.h> + +#include "mdd_internal.h" + +int mdd_txn_start_cb(const struct lu_env *env, struct txn_param *param, + void *cookie) +{ + return 0; +} + +int mdd_txn_stop_cb(const struct lu_env *env, struct thandle *txn, + void *cookie) +{ + struct mdd_device *mdd = cookie; + struct obd_device *obd = mdd2obd_dev(mdd); + + LASSERT(obd); + return mds_lov_write_objids(obd); +} + +int mdd_txn_commit_cb(const struct lu_env *env, struct thandle *txn, + void *cookie) +{ + return 0; +} + +static int dto_txn_credits[DTO_NR]; +void mdd_txn_param_build(const struct lu_env *env, struct mdd_device *mdd, + enum mdd_txn_op op) +{ + LASSERT(0 <= op && op < MDD_TXN_LAST_OP); + + txn_param_init(&mdd_env_info(env)->mti_param, + mdd->mdd_tod[op].mod_credits); +} + +int mdd_log_txn_param_build(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma, enum mdd_txn_op op) +{ + struct mdd_device *mdd = mdo2mdd(&md2mdd_obj(obj)->mod_obj); + int rc, log_credits, stripe; + ENTRY; + + mdd_txn_param_build(env, mdd, op); + + if (S_ISDIR(lu_object_attr(&obj->mo_lu))) + RETURN(0); + + LASSERT(op == MDD_TXN_UNLINK_OP || op == MDD_TXN_RENAME_OP); + rc = mdd_lmm_get_locked(env, md2mdd_obj(obj), ma); + if (rc || !(ma->ma_valid & MA_LOV)) + RETURN(rc); + + LASSERT(le32_to_cpu(ma->ma_lmm->lmm_magic) == LOV_MAGIC); + if ((int)le32_to_cpu(ma->ma_lmm->lmm_stripe_count) < 0) + stripe = mdd2obd_dev(mdd)->u.mds.mds_lov_desc.ld_tgt_count; + else + stripe = le32_to_cpu(ma->ma_lmm->lmm_stripe_count); + + log_credits = stripe * dto_txn_credits[DTO_LOG_REC]; + mdd_env_info(env)->mti_param.tp_credits += log_credits; + RETURN(rc); +} + +static void mdd_txn_init_dto_credits(const struct lu_env *env, + struct mdd_device *mdd, int *dto_credits) +{ + int op, credits; + for (op = 0; op < DTO_NR; op++) { + credits = mdd_child_ops(mdd)->dt_credit_get(env, mdd->mdd_child, + op); + LASSERT(credits > 0); + dto_txn_credits[op] = credits; + } +} + +int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd) +{ + int op; + + /* Init credits for each ops. */ + mdd_txn_init_dto_credits(env, mdd, dto_txn_credits); + + /* Calculate the mdd credits. */ + for (op = MDD_TXN_OBJECT_DESTROY_OP; op < MDD_TXN_LAST_OP; op++) { + int *c = &mdd->mdd_tod[op].mod_credits; + int *dt = dto_txn_credits; + mdd->mdd_tod[op].mod_op = op; + switch(op) { + case MDD_TXN_OBJECT_DESTROY_OP: + *c = dt[DTO_OBJECT_DELETE]; + break; + case MDD_TXN_OBJECT_CREATE_OP: + /* OI_INSERT + CREATE OBJECT */ + *c = dt[DTO_INDEX_INSERT] + + dt[DTO_OBJECT_CREATE]; + break; + case MDD_TXN_ATTR_SET_OP: + /* ATTR set + XATTR(lsm, lmv) set */ + *c = dt[DTO_ATTR_SET] + dt[DTO_XATTR_SET]; + break; + case MDD_TXN_XATTR_SET_OP: + *c = dt[DTO_XATTR_SET]; + break; + case MDD_TXN_INDEX_INSERT_OP: + *c = dt[DTO_INDEX_INSERT]; + break; + case MDD_TXN_INDEX_DELETE_OP: + *c = dt[DTO_INDEX_DELETE]; + break; + case MDD_TXN_LINK_OP: + *c = dt[DTO_INDEX_INSERT]; + break; + case MDD_TXN_UNLINK_OP: + /* delete index + Unlink log */ + *c = dt[DTO_INDEX_DELETE]; + break; + case MDD_TXN_RENAME_OP: + /* 2 delete index + 1 insert + Unlink log */ + *c = 2 * dt[DTO_INDEX_DELETE] + + dt[DTO_INDEX_INSERT]; + break; + case MDD_TXN_RENAME_TGT_OP: + /* index insert + index delete */ + *c = dt[DTO_INDEX_DELETE] + + dt[DTO_INDEX_INSERT]; + break; + case MDD_TXN_CREATE_DATA_OP: + /* same as set xattr(lsm) */ + *c = dt[DTO_XATTR_SET]; + break; + case MDD_TXN_MKDIR_OP: + /* INDEX INSERT + OI INSERT + + * CREATE_OBJECT_CREDITS + * SET_MD CREDITS is already counted in + * CREATE_OBJECT CREDITS + */ + *c = 2 * dt[DTO_INDEX_INSERT] + + dt[DTO_OBJECT_CREATE]; + break; + default: + CERROR("Invalid op %d init its credit\n", op); + LBUG(); + } + } + RETURN(0); +} + +struct thandle* mdd_trans_start(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct txn_param *p = &mdd_env_info(env)->mti_param; + struct thandle *th; + + th = mdd_child_ops(mdd)->dt_trans_start(env, mdd->mdd_child, p); + return th; +} + +void mdd_trans_stop(const struct lu_env *env, struct mdd_device *mdd, + int result, struct thandle *handle) +{ + handle->th_result = result; + mdd_child_ops(mdd)->dt_trans_stop(env, handle); +} diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 3a99194..6d1a6e7 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -50,6 +50,7 @@ # include <linux/locks.h> #endif +#include <linux/lustre_acl.h> #include <obd_class.h> #include <lustre_dlm.h> #include <obd_lov.h> @@ -142,7 +143,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, rc = -ETIMEDOUT; /* XXX should this be a different errno? */ } - DEBUG_REQ(D_ERROR, req, "bulk failed: %s %d(%d), evicting %s@%s\n", + DEBUG_REQ(D_ERROR, req, "bulk failed: %s %d(%d), evicting %s@%s", (rc == -ETIMEDOUT) ? "timeout" : "network error", desc->bd_nob_transferred, count, req->rq_export->exp_client_uuid.uuid, @@ -169,13 +170,13 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid, struct vfsmount **mnt, int lock_mode, struct lustre_handle *lockh, - char *name, int namelen, __u64 lockpart) + __u64 lockpart) { struct mds_obd *mds = &obd->u.mds; struct dentry *de = mds_fid2dentry(mds, fid, mnt), *retval = de; struct ldlm_res_id res_id = { .name = {0} }; int flags = LDLM_FL_ATOMIC_CB, rc; - ldlm_policy_data_t policy = { .l_inodebits = { lockpart} }; + ldlm_policy_data_t policy = { .l_inodebits = { lockpart} }; ENTRY; if (IS_ERR(de)) @@ -183,8 +184,8 @@ struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid, res_id.name[0] = de->d_inode->i_ino; res_id.name[1] = de->d_inode->i_generation; - rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id, - LDLM_IBITS, &policy, lock_mode, &flags, + rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, + LDLM_IBITS, &policy, lock_mode, &flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, NULL, 0, NULL, lockh); if (rc != ELDLM_OK) { @@ -254,12 +255,12 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, RETURN(result); } -static int mds_connect_internal(struct obd_export *exp, +static int mds_connect_internal(struct obd_export *exp, struct obd_connect_data *data) { struct obd_device *obd = exp->exp_obd; if (data != NULL) { - data->ocd_connect_flags &= MDS_CONNECT_SUPPORTED; + data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED; data->ocd_ibits_known &= MDS_INODELOCK_FULL; /* If no known bits (which should not happen, probably, @@ -310,25 +311,19 @@ static int mds_reconnect(struct obd_export *exp, struct obd_device *obd, * about that client, like open files, the last operation number it did * on the server, etc. */ -static int mds_connect(struct lustre_handle *conn, struct obd_device *obd, +static int mds_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) { struct obd_export *exp; struct mds_export_data *med; struct mds_client_data *mcd = NULL; - int rc, abort_recovery; + int rc; ENTRY; if (!conn || !obd || !cluuid) RETURN(-EINVAL); - /* Check for aborted recovery. */ - spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; - spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) - target_abort_recovery(obd); - /* XXX There is a small race between checking the list and adding a * new connection for the same UUID, but the real threat (list * corruption when multiple different clients connect) is solved. @@ -380,7 +375,7 @@ int mds_init_export(struct obd_export *exp) INIT_LIST_HEAD(&med->med_open_head); spin_lock_init(&med->med_open_lock); - + spin_lock(&exp->exp_lock); exp->exp_connecting = 1; spin_unlock(&exp->exp_lock); @@ -420,7 +415,7 @@ static int mds_destroy_export(struct obd_export *export) CWARN("%s: allocation failure during cleanup; can not force " "close file handles on this service.\n", obd->obd_name); OBD_FREE(lmm, mds->mds_max_mdsize); - GOTO(out, rc = -ENOMEM); + GOTO(out_lmm, rc = -ENOMEM); } spin_lock(&med->med_open_lock); @@ -444,7 +439,7 @@ static int mds_destroy_export(struct obd_export *export) mfd->mfd_dentry->d_name.len,mfd->mfd_dentry->d_name.name, mfd->mfd_dentry->d_inode->i_ino); - rc = mds_get_md(obd, mfd->mfd_dentry->d_inode, lmm,&lmm_size,1); + rc = mds_get_md(obd, mfd->mfd_dentry->d_inode, lmm, &lmm_size, 1); if (rc < 0) CWARN("mds_get_md failure, rc=%d\n", rc); else @@ -453,7 +448,6 @@ static int mds_destroy_export(struct obd_export *export) /* child orphan sem protects orphan_dec_test and * is_orphan race, mds_mfd_close drops it */ MDS_DOWN_WRITE_ORPHAN_SEM(mfd->mfd_dentry->d_inode); - rc = mds_mfd_close(NULL, REQ_REC_OFF, obd, mfd, !(export->exp_flags & OBD_OPT_FAILOVER), lmm, lmm_size, logcookies, @@ -476,16 +470,14 @@ static int mds_destroy_export(struct obd_export *export) spin_lock(&med->med_open_lock); } + spin_unlock(&med->med_open_lock); OBD_FREE(logcookies, mds->mds_max_cookiesize); +out_lmm: OBD_FREE(lmm, mds->mds_max_mdsize); - - spin_unlock(&med->med_open_lock); - +out: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); mds_client_free(export); - - out: RETURN(rc); } @@ -546,7 +538,7 @@ static int mds_getstatus(struct ptlrpc_request *req) /* get the LOV EA from @inode and store it into @md. It can be at most * @size bytes, and @size is updated with the actual EA size. - * The EA size is also returned on success, and -ve errno on failure. + * The EA size is also returned on success, and -ve errno on failure. * If there is no EA then 0 is returned. */ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, int *size, int lock) @@ -864,7 +856,7 @@ static int mds_getattr_lock(struct ptlrpc_request *req, int offset, struct lvfs_run_ctxt saved; struct mds_body *body; struct dentry *dparent = NULL, *dchild = NULL; - struct lvfs_ucred uc = {NULL,}; + struct lvfs_ucred uc = {0,}; struct lustre_handle parent_lockh; int namesize; int rc = 0, cleanup_phase = 0, resent_req = 0; @@ -939,10 +931,10 @@ static int mds_getattr_lock(struct ptlrpc_request *req, int offset, if (resent_req == 0) { if (name) { - OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RESEND, obd_timeout*2); - rc = mds_get_parent_child_locked(obd, &obd->u.mds, + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RESEND, obd_timeout * 2); + rc = mds_get_parent_child_locked(obd, &obd->u.mds, &body->fid1, - &parent_lockh, + &parent_lockh, &dparent, LCK_CR, MDS_INODELOCK_UPDATE, name, namesize, @@ -952,11 +944,11 @@ static int mds_getattr_lock(struct ptlrpc_request *req, int offset, /* For revalidate by fid we always take UPDATE lock */ dchild = mds_fid2locked_dentry(obd, &body->fid2, NULL, LCK_CR, child_lockh, - NULL, 0, child_part); + child_part); LASSERT(dchild); if (IS_ERR(dchild)) rc = PTR_ERR(dchild); - } + } if (rc) GOTO(cleanup, rc); } else { @@ -1031,7 +1023,7 @@ static int mds_getattr(struct ptlrpc_request *req, int offset) struct lvfs_run_ctxt saved; struct dentry *de; struct mds_body *body; - struct lvfs_ucred uc = { NULL, }; + struct lvfs_ucred uc = {0,}; int rc = 0; ENTRY; @@ -1059,7 +1051,8 @@ static int mds_getattr(struct ptlrpc_request *req, int offset) GOTO(out_pop, rc); } - req->rq_status = mds_getattr_internal(obd, de, req, body,REPLY_REC_OFF); + req->rq_status = mds_getattr_internal(obd, de, req, body, + REPLY_REC_OFF); l_dput(de); GOTO(out_pop, rc); @@ -1184,7 +1177,7 @@ static int mds_readpage(struct ptlrpc_request *req, int offset) struct mds_body *body, *repbody; struct lvfs_run_ctxt saved; int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) }; - struct lvfs_ucred uc = {NULL,}; + struct lvfs_ucred uc = {0,}; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_MDS_READPAGE_PACK)) @@ -1275,8 +1268,8 @@ int mds_reint(struct ptlrpc_request *req, int offset, return rc; } -static int mds_filter_recovery_request(struct ptlrpc_request *req, - struct obd_device *obd, int *process) +int mds_filter_recovery_request(struct ptlrpc_request *req, + struct obd_device *obd, int *process) { switch (lustre_msg_get_opc(req->rq_reqmsg)) { case MDS_CONNECT: /* This will never get here, but for completeness. */ @@ -1287,21 +1280,23 @@ static int mds_filter_recovery_request(struct ptlrpc_request *req, RETURN(0); case MDS_CLOSE: + case MDS_DONE_WRITING: case MDS_SYNC: /* used in unmounting */ case OBD_PING: case MDS_REINT: + case SEQ_QUERY: + case FLD_QUERY: case LDLM_ENQUEUE: *process = target_queue_recovery_request(req, obd); RETURN(0); default: DEBUG_REQ(D_ERROR, req, "not permitted during recovery"); - *process = 0; - /* XXX what should we set rq_status to here? */ - req->rq_status = -EAGAIN; - RETURN(ptlrpc_error(req)); + *process = -EAGAIN; + RETURN(0); } } +EXPORT_SYMBOL(mds_filter_recovery_request); static char *reint_names[] = { [REINT_SETATTR] "setattr", @@ -1392,7 +1387,7 @@ static int mds_handle_quotactl(struct ptlrpc_request *req) RETURN(0); } -static int mds_msg_check_version(struct lustre_msg *msg) +int mds_msg_check_version(struct lustre_msg *msg) { int rc; @@ -1400,6 +1395,9 @@ static int mds_msg_check_version(struct lustre_msg *msg) case MDS_CONNECT: case MDS_DISCONNECT: case OBD_PING: + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION); if (rc) CERROR("bad opc %u version %08x, expecting %08x\n", @@ -1412,6 +1410,8 @@ static int mds_msg_check_version(struct lustre_msg *msg) case MDS_GETATTR_NAME: case MDS_STATFS: case MDS_READPAGE: + case MDS_WRITEPAGE: + case MDS_IS_SUBDIR: case MDS_REINT: case MDS_CLOSE: case MDS_DONE_WRITING: @@ -1424,6 +1424,8 @@ static int mds_msg_check_version(struct lustre_msg *msg) case MDS_QUOTACTL: case QUOTA_DQACQ: case QUOTA_DQREL: + case SEQ_QUERY: + case FLD_QUERY: rc = lustre_msg_check_version(msg, LUSTRE_MDS_VERSION); if (rc) CERROR("bad opc %u version %08x, expecting %08x\n", @@ -1463,11 +1465,12 @@ static int mds_msg_check_version(struct lustre_msg *msg) } return rc; } +EXPORT_SYMBOL(mds_msg_check_version); int mds_handle(struct ptlrpc_request *req) { int should_process, fail = OBD_FAIL_MDS_ALL_REPLY_NET; - int rc = 0; + int rc; struct mds_obd *mds = NULL; /* quell gcc overwarning */ struct obd_device *obd = NULL; ENTRY; @@ -1485,7 +1488,7 @@ int mds_handle(struct ptlrpc_request *req) /* XXX identical to OST */ if (lustre_msg_get_opc(req->rq_reqmsg) != MDS_CONNECT) { struct mds_export_data *med; - int recovering, abort_recovery; + int recovering; if (req->rq_export == NULL) { CERROR("operation %d on unconnected MDS from %s\n", @@ -1497,7 +1500,7 @@ int mds_handle(struct ptlrpc_request *req) med = &req->rq_export->exp_mds_data; obd = req->rq_export->exp_obd; - mds = &obd->u.mds; + mds = mds_req2mds(req); /* sanity check: if the xid matches, the request must * be marked as a resent or replayed */ @@ -1520,16 +1523,18 @@ int mds_handle(struct ptlrpc_request *req) /* Check for aborted recovery. */ spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; recovering = obd->obd_recovering; spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) { - target_abort_recovery(obd); - } else if (recovering) { + if (recovering) { rc = mds_filter_recovery_request(req, obd, &should_process); if (rc || !should_process) RETURN(rc); + else if (should_process < 0) { + req->rq_status = should_process; + rc = ptlrpc_error(req); + RETURN(rc); + } } } @@ -1537,9 +1542,15 @@ int mds_handle(struct ptlrpc_request *req) case MDS_CONNECT: DEBUG_REQ(D_INODE, req, "connect"); OBD_FAIL_RETURN(OBD_FAIL_MDS_CONNECT_NET, 0); - rc = target_handle_connect(req, mds_handle); + rc = target_handle_connect(req); if (!rc) { /* Now that we have an export, set mds. */ + /* + * XXX nikita: these assignments are useless: mds is + * never used below, and obd is only used for + * MSG_LAST_REPLAY case, which never happens for + * MDS_CONNECT. + */ obd = req->rq_export->exp_obd; mds = mds_req2mds(req); } @@ -1770,7 +1781,7 @@ int mds_handle(struct ptlrpc_request *req) /* If we're DISCONNECTing, the mds_export_data is already freed */ if (!rc && lustre_msg_get_opc(req->rq_reqmsg) != MDS_DISCONNECT) { struct mds_export_data *med = &req->rq_export->exp_mds_data; - + /* I don't think last_xid is used for anyway, so I'm not sure if we need to care about last_close_xid here.*/ lustre_msg_set_last_xid(req->rq_repmsg, @@ -1782,15 +1793,6 @@ int mds_handle(struct ptlrpc_request *req) EXIT; out: - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) { - if (obd && obd->obd_recovering) { - DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply"); - return target_queue_final_reply(req, rc); - } - /* Lost a race with recovery; let the error path DTRT. */ - rc = req->rq_status = -ENOTCONN; - } - target_send_reply(req, rc, fail); return 0; } @@ -1824,7 +1826,6 @@ int mds_update_server_data(struct obd_device *obd, int force_sync) pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc) CERROR("error writing MDS server data: rc = %d\n", rc); - RETURN(rc); } @@ -1869,6 +1870,30 @@ static void fsoptions_to_mds_flags(struct mds_obd *mds, char *options) options = ++p; } } +static int mds_lov_presetup (struct mds_obd *mds, struct lustre_cfg *lcfg) +{ + int rc; + ENTRY; + + rc = llog_start_commit_thread(); + if (rc < 0) + RETURN(rc); + + if (lcfg->lcfg_bufcount >= 4 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { + class_uuid_t uuid; + + ll_generate_random_uuid(uuid); + class_uuid_unparse(uuid, &mds->mds_lov_uuid); + + OBD_ALLOC(mds->mds_profile, LUSTRE_CFG_BUFLEN(lcfg, 3)); + if (mds->mds_profile == NULL) + RETURN(-ENOMEM); + + strncpy(mds->mds_profile, lustre_cfg_string(lcfg, 3), + LUSTRE_CFG_BUFLEN(lcfg, 3)); + } + RETURN(rc); +} /* mount the file system (secretly). lustre_cfg parameters are: * 1 = device @@ -1876,14 +1901,13 @@ static void fsoptions_to_mds_flags(struct mds_obd *mds, char *options) * 3 = config name * 4 = mount options */ -static int mds_setup(struct obd_device *obd, obd_count len, void *buf) +static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg) { struct lprocfs_static_vars lvars; - struct lustre_cfg* lcfg = buf; struct mds_obd *mds = &obd->u.mds; - struct lustre_sb_info *lsi; struct lustre_mount_info *lmi; struct vfsmount *mnt; + struct lustre_sb_info *lsi; struct obd_uuid uuid; __u8 *uuid_ptr; char *str, *label; @@ -1910,6 +1934,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) /* We mounted in lustre_fill_super. lcfg bufs 1, 2, 4 (device, fstype, mount opts) are ignored.*/ + lsi = s2lsi(lmi->lmi_sb); fsoptions_to_mds_flags(mds, lsi->lsi_ldd->ldd_mount_opts); fsoptions_to_mds_flags(mds, lsi->lsi_lmd->lmd_opts); @@ -1936,6 +1961,15 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) } ldlm_register_intent(obd->obd_namespace, mds_intent_policy); + lprocfs_init_vars(mds, &lvars); + if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 && + lprocfs_alloc_obd_stats(obd, LPROC_MDS_LAST) == 0) { + /* Init private stats here */ + mds_stats_counter_init(obd->obd_stats); + obd->obd_proc_exports = proc_mkdir("exports", + obd->obd_proc_entry); + } + rc = mds_fs_setup(obd, mnt); if (rc) { CERROR("%s: MDS filesystem method init failed: rc = %d\n", @@ -1943,24 +1977,10 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_ns, rc); } - rc = llog_start_commit_thread(); + rc = mds_lov_presetup(mds, lcfg); if (rc < 0) GOTO(err_fs, rc); - if (lcfg->lcfg_bufcount >= 4 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { - class_uuid_t uuid; - - ll_generate_random_uuid(uuid); - class_uuid_unparse(uuid, &mds->mds_lov_uuid); - - OBD_ALLOC(mds->mds_profile, LUSTRE_CFG_BUFLEN(lcfg, 3)); - if (mds->mds_profile == NULL) - GOTO(err_fs, rc = -ENOMEM); - - strncpy(mds->mds_profile, lustre_cfg_string(lcfg, 3), - LUSTRE_CFG_BUFLEN(lcfg, 3)); - } - ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "mds_ldlm_client", &obd->obd_ldlm_client); obd->obd_replayable = 1; @@ -1969,12 +1989,14 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) if (rc) GOTO(err_fs, rc); +#if 0 mds->mds_group_hash = upcall_cache_init(obd->obd_name); if (IS_ERR(mds->mds_group_hash)) { rc = PTR_ERR(mds->mds_group_hash); mds->mds_group_hash = NULL; GOTO(err_qctxt, rc); } +#endif /* Don't wait for mds_postrecov trying to clear orphans */ obd->obd_async_recov = 1; @@ -1986,15 +2008,6 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) if (rc) GOTO(err_qctxt, rc); - lprocfs_init_vars(mds, &lvars); - if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 && - lprocfs_alloc_obd_stats(obd, LPROC_MDS_LAST) == 0) { - /* Init private stats here */ - mds_stats_counter_init(obd->obd_stats); - obd->obd_proc_exports = proc_mkdir("exports", - obd->obd_proc_entry); - } - uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb); if (uuid_ptr != NULL) { class_uuid_unparse(uuid_ptr, &uuid); @@ -2013,8 +2026,8 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) "/proc/fs/lustre/mds/%s/recovery_status.\n", obd->obd_name, lustre_cfg_string(lcfg, 1), label ?: "", label ? "/" : "", str, - obd->obd_recoverable_clients, - (obd->obd_recoverable_clients == 1) ? + obd->obd_max_recoverable_clients, + (obd->obd_max_recoverable_clients == 1) ? "client" : "clients", (int)(OBD_RECOVERY_TIMEOUT) / 60, (int)(OBD_RECOVERY_TIMEOUT) % 60, @@ -2036,9 +2049,13 @@ err_qctxt: err_fs: /* No extra cleanup needed for llog_init_commit_thread() */ mds_fs_cleanup(obd); +#if 0 upcall_cache_cleanup(mds->mds_group_hash); mds->mds_group_hash = NULL; +#endif err_ns: + lprocfs_obd_cleanup(obd); + lprocfs_free_obd_stats(obd); ldlm_namespace_free(obd->obd_namespace, 0); obd->obd_namespace = NULL; err_ops: @@ -2087,29 +2104,29 @@ static int mds_postsetup(struct obd_device *obd) int rc = 0; ENTRY; - rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, + rc = llog_setup(obd, NULL, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, &llog_lvfs_ops); if (rc) RETURN(rc); - rc = llog_setup(obd, LLOG_LOVEA_ORIG_CTXT, obd, 0, NULL, + rc = llog_setup(obd, NULL, LLOG_LOVEA_ORIG_CTXT, obd, 0, NULL, &llog_lvfs_ops); if (rc) RETURN(rc); if (mds->mds_profile) { struct lustre_profile *lprof; - /* The profile defines which osc and mdc to connect to, for a + /* The profile defines which osc and mdc to connect to, for a client. We reuse that here to figure out the name of the - lov to use (and ignore lprof->lp_mdc). - The profile was set in the config log with + lov to use (and ignore lprof->lp_md). + The profile was set in the config log with LCFG_MOUNTOPT profilenm oscnm mdcnm */ lprof = class_get_profile(mds->mds_profile); if (lprof == NULL) { CERROR("No profile found: %s\n", mds->mds_profile); GOTO(err_cleanup, rc = -ENOENT); } - rc = mds_lov_connect(obd, lprof->lp_osc); + rc = mds_lov_connect(obd, lprof->lp_dt); if (rc) GOTO(err_cleanup, rc); } @@ -2144,15 +2161,16 @@ int mds_postrecov(struct obd_device *obd) } /* clean PENDING dir */ - rc = mds_cleanup_pending(obd); - if (rc < 0) - GOTO(out, rc); + if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME))) + rc = mds_cleanup_pending(obd); + if (rc < 0) + GOTO(out, rc); /* FIXME Does target_finish_recovery really need this to block? */ /* Notify the LOV, which will in turn call mds_notify for each tgt */ /* This means that we have to hack obd_notify to think we're obd_set_up during mds_lov_connect. */ - obd_notify(obd->u.mds.mds_osc_obd, NULL, + obd_notify(obd->u.mds.mds_osc_obd, NULL, obd->obd_async_recov ? OBD_NOTIFY_SYNC_NONBLOCK : OBD_NOTIFY_SYNC, NULL); @@ -2185,7 +2203,11 @@ static int mds_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) case OBD_CLEANUP_EARLY: break; case OBD_CLEANUP_EXPORTS: - target_cleanup_recovery(obd); + /*XXX Use this for mdd mds cleanup, so comment out + *this target_cleanup_recovery for this tmp MDD MDS + *Wangdi*/ + if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME))) + target_cleanup_recovery(obd); mds_lov_early_clean(obd); break; case OBD_CLEANUP_SELF_EXP: @@ -2222,12 +2244,14 @@ static int mds_cleanup(struct obd_device *obd) lquota_cleanup(mds_quota_interface_ref, obd); mds_update_server_data(obd, 1); - if (mds->mds_lov_objids != NULL) + if (mds->mds_lov_objids != NULL) OBD_FREE(mds->mds_lov_objids, mds->mds_lov_objids_size); mds_fs_cleanup(obd); +#if 0 upcall_cache_cleanup(mds->mds_group_hash); mds->mds_group_hash = NULL; +#endif server_put_mount(obd->obd_name, mds->mds_vfsmnt); obd->u.obt.obt_sb = NULL; @@ -2494,7 +2518,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, RETURN(ELDLM_LOCK_REPLACED); } -static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) +static int mdt_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct mds_obd *mds = &obd->u.mds; struct lprocfs_static_vars lvars; @@ -2533,7 +2557,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) MDC_REPLY_PORTAL, MDS_SERVICE_WATCHDOG_TIMEOUT, mds_handle, LUSTRE_MDS_NAME, obd->obd_proc_entry, NULL, - mds_min_threads, mds_max_threads, "ll_mdt"); + mds_min_threads, mds_max_threads, "ll_mdt", 0); if (!mds->mds_service) { CERROR("failed to start service\n"); @@ -2551,7 +2575,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds_handle, "mds_setattr", obd->obd_proc_entry, NULL, mds_min_threads, mds_max_threads, - "ll_mdt_attr"); + "ll_mdt_attr", 0); if (!mds->mds_setattr_service) { CERROR("failed to start getattr service\n"); GOTO(err_thread, rc = -ENOMEM); @@ -2568,7 +2592,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds_handle, "mds_readpage", obd->obd_proc_entry, NULL, MDS_THREADS_MIN_READPAGE, mds_max_threads, - "ll_mdt_rdpg"); + "ll_mdt_rdpg", 0); if (!mds->mds_readpage_service) { CERROR("failed to start readpage service\n"); GOTO(err_thread2, rc = -ENOMEM); @@ -2580,7 +2604,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_thread3, rc); ping_evictor_start(); - + RETURN(0); err_thread3: @@ -2664,7 +2688,6 @@ static int mds_health_check(struct obd_device *obd) LASSERT(mds->mds_health_check_filp != NULL); rc |= !!lvfs_check_io_health(obd, mds->mds_health_check_filp); #endif - return rc; } @@ -2675,9 +2698,8 @@ static int mds_process_config(struct obd_device *obd, obd_count len, void *buf) int rc; lprocfs_init_vars(mds, &lvars); - + rc = class_process_proc_param(PARAM_MDT, lvars.obd_vars, lcfg, obd); - return(rc); } @@ -2718,7 +2740,7 @@ static struct obd_ops mdt_obd_ops = { quota_interface_t *mds_quota_interface_ref; extern quota_interface_t mds_quota_interface; -static int __init mds_init(void) +static __attribute__((unused)) int __init mds_init(void) { int rc; struct lprocfs_static_vars lvars; @@ -2732,28 +2754,211 @@ static int __init mds_init(void) return rc; } init_obd_quota_ops(mds_quota_interface_ref, &mds_obd_ops); - + lprocfs_init_vars(mds, &lvars); - class_register_type(&mds_obd_ops, lvars.module_vars, LUSTRE_MDS_NAME); + class_register_type(&mds_obd_ops, NULL, + lvars.module_vars, LUSTRE_MDS_NAME, NULL); lprocfs_init_vars(mdt, &lvars); - class_register_type(&mdt_obd_ops, lvars.module_vars, LUSTRE_MDT_NAME); + mdt_obd_ops = mdt_obd_ops; //make compiler happy +// class_register_type(&mdt_obd_ops, NULL, +// lvars.module_vars, LUSTRE_MDT_NAME, NULL); return 0; } -static void /*__exit*/ mds_exit(void) +static __attribute__((unused)) void /*__exit*/ mds_exit(void) { lquota_exit(mds_quota_interface_ref); if (mds_quota_interface_ref) PORTAL_SYMBOL_PUT(mds_quota_interface); class_unregister_type(LUSTRE_MDS_NAME); - class_unregister_type(LUSTRE_MDT_NAME); +// class_unregister_type(LUSTRE_MDT_NAME); +} +/*mds still need lov setup here*/ +static int mds_cmd_setup(struct obd_device *obd, struct lustre_cfg *lcfg) +{ + struct mds_obd *mds = &obd->u.mds; + struct lvfs_run_ctxt saved; + const char *dev; + struct vfsmount *mnt; + struct lustre_sb_info *lsi; + struct lustre_mount_info *lmi; + struct dentry *dentry; + struct file *file; + int rc = 0; + ENTRY; + + CDEBUG(D_INFO, "obd %s setup \n", obd->obd_name); + if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME))) + RETURN(0); + + if (lcfg->lcfg_bufcount < 5) { + CERROR("invalid arg for setup %s\n", MDD_OBD_NAME); + RETURN(-EINVAL); + } + dev = lustre_cfg_string(lcfg, 4); + lmi = server_get_mount(dev); + LASSERT(lmi != NULL); + + lsi = s2lsi(lmi->lmi_sb); + mnt = lmi->lmi_mnt; + /* FIXME: MDD LOV initialize objects. + * we need only lmi here but not get mount + * OSD did mount already, so put mount back + */ + atomic_dec(&lsi->lsi_mounts); + mntput(mnt); + + obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd)); + mds_init_ctxt(obd, mnt); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create OBJECTS directory: rc = %d\n", rc); + GOTO(err_putfs, rc); + } + mds->mds_objects_dir = dentry; + + dentry = lookup_one_len("__iopen__", current->fs->pwd, + strlen("__iopen__")); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot lookup __iopen__ directory: rc = %d\n", rc); + GOTO(err_objects, rc); + } + + mds->mds_fid_de = dentry; + if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) { + rc = -ENOENT; + CERROR("__iopen__ directory has no inode? rc = %d\n", rc); + GOTO(err_fid, rc); + } + + /* open and test the lov objd file */ + file = filp_open(LOV_OBJID, O_RDWR | O_CREAT, 0644); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + CERROR("cannot open/create %s file: rc = %d\n", LOV_OBJID, rc); + GOTO(err_fid, rc = PTR_ERR(file)); + } + mds->mds_lov_objid_filp = file; + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + CERROR("%s is not a regular file!: mode = %o\n", LOV_OBJID, + file->f_dentry->d_inode->i_mode); + GOTO(err_lov_objid, rc = -ENOENT); + } + + rc = mds_lov_presetup(mds, lcfg); + if (rc < 0) + GOTO(err_objects, rc); + + /* Don't wait for mds_postrecov trying to clear orphans */ + obd->obd_async_recov = 1; + rc = mds_postsetup(obd); + /* Bug 11557 - allow async abort_recov start + FIXME can remove most of this obd_async_recov plumbing + obd->obd_async_recov = 0; + */ + + if (rc) + GOTO(err_objects, rc); + + mds->mds_max_mdsize = sizeof(struct lov_mds_md); + mds->mds_max_cookiesize = sizeof(struct llog_cookie); + +err_pop: + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + RETURN(rc); +err_lov_objid: + if (mds->mds_lov_objid_filp && + filp_close((struct file *)mds->mds_lov_objid_filp, 0)) + CERROR("can't close %s after error\n", LOV_OBJID); +err_fid: + dput(mds->mds_fid_de); +err_objects: + dput(mds->mds_objects_dir); +err_putfs: + fsfilt_put_ops(obd->obd_fsops); + goto err_pop; +} + +static int mds_cmd_cleanup(struct obd_device *obd) +{ + struct mds_obd *mds = &obd->u.mds; + struct lvfs_run_ctxt saved; + int rc = 0; + ENTRY; + + if (obd->obd_fail) + LCONSOLE_WARN("%s: shutting down for failover; client state " + "will be preserved.\n", obd->obd_name); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + if (mds->mds_lov_objid_filp) { + rc = filp_close((struct file *)mds->mds_lov_objid_filp, 0); + mds->mds_lov_objid_filp = NULL; + if (rc) + CERROR("%s file won't close, rc=%d\n", LOV_OBJID, rc); + } + if (mds->mds_objects_dir != NULL) { + l_dput(mds->mds_objects_dir); + mds->mds_objects_dir = NULL; + } + + if (mds->mds_lov_objids != NULL) + OBD_FREE(mds->mds_lov_objids, mds->mds_lov_objids_size); + + shrink_dcache_parent(mds->mds_fid_de); + dput(mds->mds_fid_de); + LL_DQUOT_OFF(obd->u.obt.obt_sb); + fsfilt_put_ops(obd->obd_fsops); + + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + RETURN(rc); +} + +#if 0 +static int mds_cmd_health_check(struct obd_device *obd) +{ + return 0; +} +#endif +static struct obd_ops mds_cmd_obd_ops = { + .o_owner = THIS_MODULE, + .o_setup = mds_cmd_setup, + .o_cleanup = mds_cmd_cleanup, + .o_precleanup = mds_precleanup, + .o_create = mds_obd_create, + .o_destroy = mds_obd_destroy, + .o_llog_init = mds_llog_init, + .o_llog_finish = mds_llog_finish, + .o_notify = mds_notify, + .o_postrecov = mds_postrecov, + // .o_health_check = mds_cmd_health_check, +}; + +static int __init mds_cmd_init(void) +{ + struct lprocfs_static_vars lvars; + + lprocfs_init_vars(mds, &lvars); + class_register_type(&mds_cmd_obd_ops, NULL, lvars.module_vars, + LUSTRE_MDS_NAME, NULL); + + return 0; +} + +static void /*__exit*/ mds_cmd_exit(void) +{ + class_unregister_type(LUSTRE_MDS_NAME); } MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); MODULE_DESCRIPTION("Lustre Metadata Server (MDS)"); MODULE_LICENSE("GPL"); -module_init(mds_init); -module_exit(mds_exit); +module_init(mds_cmd_init); +module_exit(mds_cmd_exit); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index e93b5a5..f273d0e 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -91,6 +91,7 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, return count; } +#if 0 static int lprocfs_wr_group_info(struct file *file, const char *buffer, unsigned long count, void *data) { @@ -244,6 +245,7 @@ static int lprocfs_wr_group_flush(struct file *file, const char *buffer, upcall_cache_flush_idle(obd->u.mds.mds_group_hash); return count; } +#endif static int lprocfs_wr_atime_diff(struct file *file, const char *buffer, unsigned long count, void *data) @@ -299,6 +301,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "quota_itune_sz", lprocfs_rd_itune, lprocfs_wr_itune, 0 }, { "quota_type", lprocfs_rd_type, lprocfs_wr_type, 0 }, #endif +#if 0 { "group_expire_interval", lprocfs_rd_group_expire, lprocfs_wr_group_expire, 0}, { "group_acquire_expire", lprocfs_rd_group_acquire_expire, @@ -307,6 +310,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { lprocfs_wr_group_upcall, 0}, { "group_flush", 0, lprocfs_wr_group_flush, 0}, { "group_info", 0, lprocfs_wr_group_info, 0 }, +#endif { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 }, { 0 } }; diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 3c386d4..b1e80ec 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -55,10 +55,10 @@ static int mds_export_stats_init(struct obd_device *obd, struct obd_export *exp) int rc, num_stats; rc = lprocfs_exp_setup(exp); - if (rc) + if (rc) return rc; - num_stats = (sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) + - LPROC_MDS_LAST - 1; + num_stats = (sizeof(*obd->obd_type->typ_dt_ops) / sizeof(void *)) + + LPROC_MDS_LAST - 1; exp->exp_ops_stats = lprocfs_alloc_stats(num_stats); if (exp->exp_ops_stats == NULL) return -ENOMEM; @@ -83,7 +83,6 @@ int mds_client_add(struct obd_device *obd, struct obd_export *exp, struct mds_export_data *med = &exp->exp_mds_data; unsigned long *bitmap = mds->mds_client_bitmap; int new_client = (cl_idx == -1); - int rc; ENTRY; LASSERT(bitmap != NULL); @@ -101,7 +100,7 @@ int mds_client_add(struct obd_device *obd, struct obd_export *exp, repeat: if (cl_idx >= LR_MAX_CLIENTS || OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_CLIENT_ADD)) { - CERROR("no room for %u clients - fix LR_MAX_CLIENTS\n", + CERROR("no room for %u client - fix LR_MAX_CLIENTS\n", cl_idx); return -EOVERFLOW; } @@ -132,7 +131,8 @@ int mds_client_add(struct obd_device *obd, struct obd_export *exp, loff_t off = med->med_lr_off; struct file *file = mds->mds_rcvd_filp; void *handle; - + int rc; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); handle = fsfilt_start(obd, file->f_dentry->d_inode, FSFILT_OP_SETATTR, NULL); @@ -212,7 +212,8 @@ int mds_client_free(struct obd_export *exp) push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_write_record(obd, mds->mds_rcvd_filp, &zero_mcd, sizeof(zero_mcd), &off, - (!exp->exp_libclient || exp->exp_need_sync)); + (!exp->exp_libclient || + exp->exp_need_sync)); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); CDEBUG(rc == 0 ? D_INFO : D_ERROR, @@ -296,18 +297,12 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) GOTO(err_msd, rc); } if (strcmp(lsd->lsd_uuid, obd->obd_uuid.uuid) != 0) { - LCONSOLE_ERROR("Trying to start OBD %s using the wrong" - " disk %s. Were the /dev/ assignments " - "rearranged?\n", - obd->obd_uuid.uuid, lsd->lsd_uuid); + LCONSOLE_ERROR_MSG(0x157, "Trying to start OBD %s " + "using the wrong disk %s. Were the " + "/dev/ assignments rearranged?\n", + obd->obd_uuid.uuid, lsd->lsd_uuid); GOTO(err_msd, rc = -EINVAL); } - /* COMPAT_146 */ - /* Assume old last_rcvd format unless I_C_LR is set */ - if (!(lsd->lsd_feature_incompat & - cpu_to_le32(OBD_INCOMPAT_COMMON_LR))) - lsd->lsd_mount_count = lsd->lsd_compat14; - /* end COMPAT_146 */ mount_count = le64_to_cpu(lsd->lsd_mount_count); } @@ -324,9 +319,9 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) /* Do something like remount filesystem read-only */ GOTO(err_msd, rc = -EINVAL); } - - lsd->lsd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT); + lsd->lsd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT); + mds->mds_last_transno = le64_to_cpu(lsd->lsd_last_transno); CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n", @@ -399,26 +394,28 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) le64_to_cpu(mcd->mcd_last_xid)); exp = class_new_export(obd, (struct obd_uuid *)mcd->mcd_uuid); - if (IS_ERR(exp)) - GOTO(err_client, rc = PTR_ERR(exp)); - - med = &exp->exp_mds_data; - med->med_mcd = mcd; - rc = mds_client_add(obd, exp, cl_idx); - LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */ - - - mcd = NULL; - - spin_lock(&exp->exp_lock); - exp->exp_replay_needed = 1; - exp->exp_connecting = 0; - spin_unlock(&exp->exp_lock); - - obd->obd_recoverable_clients++; - obd->obd_max_recoverable_clients++; - class_export_put(exp); - + if (IS_ERR(exp)) { + if (PTR_ERR(exp) == -EALREADY) { + /* export already exists, zero out this one */ + mcd->mcd_uuid[0] = '\0'; + } else { + GOTO(err_client, rc = PTR_ERR(exp)); + } + } else { + med = &exp->exp_mds_data; + med->med_mcd = mcd; + rc = mds_client_add(obd, exp, cl_idx); + LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */ + + mcd = NULL; + spin_lock(&exp->exp_lock); + exp->exp_req_replay_needed = 1; + exp->exp_connecting = 0; + spin_unlock(&exp->exp_lock); + obd->obd_max_recoverable_clients++; + class_export_put(exp); + } + /* Need to check last_rcvd even for duplicated exports. */ CDEBUG(D_OTHER, "client at idx %d has last_transno = "LPU64"\n", cl_idx, last_transno); @@ -431,10 +428,12 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) obd->obd_last_committed = mds->mds_last_transno; - if (obd->obd_recoverable_clients) { + if (obd->obd_max_recoverable_clients) { + /* shouldn't happen in b_new_cmd */ + LBUG(); CWARN("RECOVERY: service %s, %d recoverable clients, " "last_transno "LPU64"\n", obd->obd_name, - obd->obd_recoverable_clients, mds->mds_last_transno); + obd->obd_max_recoverable_clients, mds->mds_last_transno); obd->obd_next_recovery_transno = obd->obd_last_committed + 1; obd->obd_recovering = 1; obd->obd_recovery_start = CURRENT_SECONDS; @@ -444,7 +443,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) } mds->mds_mount_count = mount_count + 1; - lsd->lsd_mount_count = lsd->lsd_compat14 = + lsd->lsd_mount_count = lsd->lsd_compat14 = cpu_to_le64(mds->mds_mount_count); /* save it, so mount count and last_transno is current */ @@ -461,33 +460,41 @@ err_msd: RETURN(rc); } -int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) +void mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt) { struct mds_obd *mds = &obd->u.mds; - struct lvfs_run_ctxt saved; - struct dentry *dentry; - struct file *file; - int rc; - ENTRY; - - OBD_FAIL_RETURN(OBD_FAIL_MDS_FS_SETUP, -ENOENT); - - rc = cleanup_group_info(); - if (rc) - RETURN(rc); mds->mds_vfsmnt = mnt; /* why not mnt->mnt_sb instead of mnt->mnt_root->d_inode->i_sb? */ obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb; fsfilt_setup(obd, obd->u.obt.obt_sb); - + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); obd->obd_lvfs_ctxt.pwdmnt = mnt; obd->obd_lvfs_ctxt.pwd = mnt->mnt_root; obd->obd_lvfs_ctxt.fs = get_ds(); obd->obd_lvfs_ctxt.cb_ops = mds_lvfs_ops; + return; +} +int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) +{ + struct mds_obd *mds = &obd->u.mds; + struct lvfs_run_ctxt saved; + struct dentry *dentry; + struct file *file; + int rc; + ENTRY; + + OBD_FAIL_RETURN(OBD_FAIL_MDS_FS_SETUP, -ENOENT); + + rc = cleanup_group_info(); + if (rc) + RETURN(rc); + + mds_init_ctxt(obd, mnt); + /* setup the directory tree */ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); dentry = simple_mkdir(current->fs->pwd, "ROOT", 0755, 0); @@ -583,7 +590,8 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) file = filp_open(HEALTH_CHECK, O_RDWR | O_CREAT, 0644); if (IS_ERR(file)) { rc = PTR_ERR(file); - CERROR("cannot open/create %s file: rc = %d\n", HEALTH_CHECK, rc); + CERROR("cannot open/create %s file: rc = %d\n", HEALTH_CHECK, + rc); GOTO(err_lov_objid, rc = PTR_ERR(file)); } mds->mds_health_check_filp = file; @@ -601,11 +609,12 @@ err_pop: return rc; err_health_check: - if (mds->mds_health_check_filp && + if (mds->mds_health_check_filp && filp_close(mds->mds_health_check_filp, 0)) CERROR("can't close %s after error\n", HEALTH_CHECK); err_lov_objid: - if (mds->mds_lov_objid_filp && filp_close(mds->mds_lov_objid_filp, 0)) + if (mds->mds_lov_objid_filp && + filp_close((struct file *)mds->mds_lov_objid_filp, 0)) CERROR("can't close %s after error\n", LOV_OBJID); err_client: class_disconnect_exports(obd); @@ -645,7 +654,7 @@ int mds_fs_cleanup(struct obd_device *obd) CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc); } if (mds->mds_lov_objid_filp) { - rc = filp_close(mds->mds_lov_objid_filp, 0); + rc = filp_close((struct file *)mds->mds_lov_objid_filp, 0); mds->mds_lov_objid_filp = NULL; if (rc) CERROR("%s file won't close, rc=%d\n", LOV_OBJID, rc); @@ -654,7 +663,8 @@ int mds_fs_cleanup(struct obd_device *obd) rc = filp_close(mds->mds_health_check_filp, 0); mds->mds_health_check_filp = NULL; if (rc) - CERROR("%s file won't close, rc=%d\n", HEALTH_CHECK, rc); + CERROR("%s file won't close, rc=%d\n", HEALTH_CHECK, + rc); } if (mds->mds_objects_dir != NULL) { l_dput(mds->mds_objects_dir); @@ -700,6 +710,11 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa, /* the owner of object file should always be root */ ucred.luc_cap = current->cap_effective | CAP_SYS_RESOURCE; + if (strncmp(exp->exp_obd->obd_name, MDD_OBD_NAME, + strlen(MDD_OBD_NAME))) { + RETURN(0); + } + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, &ucred); sprintf(fidname, "OBJECTS/%u.%u", tmpname, current->pid); @@ -749,9 +764,10 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa, err = fsfilt_commit(exp->exp_obd, mds->mds_objects_dir->d_inode, handle, 0); - if (!err) - oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGENER; - else if (!rc) + if (!err) { + oa->o_gr = FILTER_GROUP_MDS0 + mds->mds_id; + oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLGROUP; + } else if (!rc) rc = err; out_dput: dput(new_child); diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index af32d2f..35e312d 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -122,17 +122,19 @@ static inline void mds_inode_unset_orphan(struct inode *inode) } /* mds/mds_reint.c */ -int res_gt(struct ldlm_res_id *res1, struct ldlm_res_id *res2, +int res_gt(const struct ldlm_res_id *res1, const struct ldlm_res_id *res2, ldlm_policy_data_t *p1, ldlm_policy_data_t *p2); -int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id, +int enqueue_ordered_locks(struct obd_device *obd, + const struct ldlm_res_id *p1_res_id, struct lustre_handle *p1_lockh, int p1_lock_mode, ldlm_policy_data_t *p1_policy, - struct ldlm_res_id *p2_res_id, + const struct ldlm_res_id *p2_res_id, struct lustre_handle *p2_lockh, int p2_lock_mode, ldlm_policy_data_t *p2_policy); void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error); int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data); + struct ptlrpc_request *req, int rc, __u32 op_data, + int force_sync); void mds_reconstruct_generic(struct ptlrpc_request *req); void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd); int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, @@ -146,9 +148,6 @@ int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, __u64 child_lockpart); int mds_lock_new_child(struct obd_device *obd, struct inode *inode, struct lustre_handle *child_lockh); -int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, - struct lov_mds_md *lmm, int lmm_size, - struct llog_cookie *logcookies, struct ll_fid *fid); int mds_get_parents_children_locked(struct obd_device *obd, struct mds_obd *mds, @@ -180,14 +179,10 @@ int mds_osc_destroy_orphan(struct obd_device *obd, umode_t mode, struct llog_cookie *logcookies, int log_unlink); int mds_cleanup_pending(struct obd_device *obd); + /* mds/mds_log.c */ -int mds_log_op_unlink(struct obd_device *obd, - struct lov_mds_md *lmm, int lmm_size, - struct llog_cookie *logcookies, int cookies_size); -int mds_log_op_setattr(struct obd_device *obd, struct inode *inode, - struct lov_mds_md *lmm, int lmm_size, - struct llog_cookie *logcookies, int cookies_size); -int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int count, +int mds_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, struct llog_catid *logid, struct obd_uuid *uuid); int mds_llog_finish(struct obd_device *obd, int count); @@ -195,10 +190,10 @@ int mds_llog_finish(struct obd_device *obd, int count); int mds_lov_connect(struct obd_device *obd, char * lov_name); int mds_lov_disconnect(struct obd_device *obd); int mds_lov_write_objids(struct obd_device *obd); -void mds_lov_update_objids(struct obd_device *obd, obd_id *ids); int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid); +void mds_lov_update_objids(struct obd_device *obd, obd_id *ids); int mds_lov_set_nextid(struct obd_device *obd); -int mds_lov_start_synchronize(struct obd_device *obd, +int mds_lov_start_synchronize(struct obd_device *obd, struct obd_device *watched, void *data, int nonblock); int mds_post_mds_lovconf(struct obd_device *obd); @@ -225,7 +220,7 @@ int mds_close(struct ptlrpc_request *req, int offset); int mds_done_writing(struct ptlrpc_request *req, int offset); /*mds/mds_join.c*/ -int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, +int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, struct dentry *dchild, struct lustre_handle *lockh); /* mds/mds_fs.c */ @@ -236,6 +231,7 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa, int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, struct obd_export *md_exp); +void mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt); /* mds/handler.c */ extern struct lvfs_callback_ops mds_lvfs_ops; diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c index d0bb7f2..ba069c6 100644 --- a/lustre/mds/mds_join.c +++ b/lustre/mds/mds_join.c @@ -221,7 +221,7 @@ static void mds_finish_join(struct mds_obd *mds, struct ptlrpc_request *req, CDEBUG(D_INFO, "change the max md size from %d to "LPSZ"\n", mds->mds_max_mdsize, sizeof(*lmmj)); - if (mds->mds_max_mdsize < max_easize || + if (mds->mds_max_mdsize < max_easize || mds->mds_max_cookiesize < max_cookiesize) { body->max_mdsize = mds->mds_max_mdsize > max_easize ? mds->mds_max_mdsize : max_easize; @@ -260,8 +260,10 @@ static int mds_join_unlink_tail_inode(struct mds_update_record *rec, ldlm_lock_decref(lockh, LCK_EX); head_inode = dchild->d_inode; - mdc_pack_fid(&head_fid, head_inode->i_ino, head_inode->i_generation, - head_inode->i_mode & S_IFMT); + + head_fid.id = head_inode->i_ino; + head_fid.generation = head_inode->i_generation; + head_fid.f_type = head_inode->i_mode & S_IFMT; rc = mds_get_parents_children_locked(obd, mds, &join_rec->jr_fid, &de_tailparent, &head_fid, @@ -477,7 +479,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, sizeof(struct lov_mds_md_join), "lov"); mds_finish_join(mds, req, head_inode, head_lmmj); cleanup: - rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0); + rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0, 0); switch(cleanup_phase){ case 3: llog_close(llh_head); diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index 2a68fb5..0678fe9 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -106,8 +106,10 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->sa_fsuid; r->ur_uc.luc_fsgid = rec->sa_fsgid; r->ur_uc.luc_cap = rec->sa_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->sa_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_fid1 = &rec->sa_fid; attr->ia_valid = rec->sa_valid; attr->ia_mode = rec->sa_mode; @@ -152,8 +154,10 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->cr_fsuid; r->ur_uc.luc_fsgid = rec->cr_fsgid; r->ur_uc.luc_cap = rec->cr_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->cr_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_fid1 = &rec->cr_fid; r->ur_fid2 = &rec->cr_replayfid; r->ur_mode = rec->cr_mode; @@ -198,8 +202,10 @@ static int mds_link_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->lk_fsuid; r->ur_uc.luc_fsgid = rec->lk_fsgid; r->ur_uc.luc_cap = rec->lk_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->lk_suppgid1; r->ur_uc.luc_suppgid2 = rec->lk_suppgid2; +#endif r->ur_fid1 = &rec->lk_fid1; r->ur_fid2 = &rec->lk_fid2; r->ur_time = rec->lk_time; @@ -226,8 +232,10 @@ static int mds_unlink_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->ul_fsuid; r->ur_uc.luc_fsgid = rec->ul_fsgid; r->ur_uc.luc_cap = rec->ul_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->ul_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_mode = rec->ul_mode; r->ur_fid1 = &rec->ul_fid1; r->ur_fid2 = &rec->ul_fid2; @@ -255,8 +263,10 @@ static int mds_rename_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->rn_fsuid; r->ur_uc.luc_fsgid = rec->rn_fsgid; r->ur_uc.luc_cap = rec->rn_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->rn_suppgid1; r->ur_uc.luc_suppgid2 = rec->rn_suppgid2; +#endif r->ur_fid1 = &rec->rn_fid1; r->ur_fid2 = &rec->rn_fid2; r->ur_time = rec->rn_time; @@ -289,8 +299,10 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->cr_fsuid; r->ur_uc.luc_fsgid = rec->cr_fsgid; r->ur_uc.luc_cap = rec->cr_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->cr_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_fid1 = &rec->cr_fid; r->ur_fid2 = &rec->cr_replayfid; r->ur_mode = rec->cr_mode; @@ -360,8 +372,10 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, { struct mds_body *body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); +#if 0 struct mds_obd *mds = mds_req2mds(req); int rc; +#endif LASSERT(body != NULL); /* previously verified & swabbed by caller */ @@ -378,6 +392,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, ucred->luc_cap = body->capability; } +#if 0 ucred->luc_uce = upcall_cache_get_entry(mds->mds_group_hash, ucred->luc_fsuid, ucred->luc_fsgid, 1, @@ -392,11 +407,14 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, if (ucred->luc_uce) ucred->luc_fsgid = ucred->luc_uce->ue_primary; #endif +#endif return 0; } void mds_exit_ucred(struct lvfs_ucred *ucred, struct mds_obd *mds) { +#if 0 upcall_cache_put_entry(mds->mds_group_hash, ucred->luc_uce); +#endif } diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index 5878a2d..887d6ef 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -125,8 +125,8 @@ out: obd_free_memmd(mds->mds_osc_exp, &lsm); RETURN(rc); } - -int mds_log_op_setattr(struct obd_device *obd, struct inode *inode, +EXPORT_SYMBOL(mds_log_op_unlink); +int mds_log_op_setattr(struct obd_device *obd, __u32 uid, __u32 gid, struct lov_mds_md *lmm, int lmm_size, struct llog_cookie *logcookies, int cookies_size) { @@ -155,8 +155,8 @@ int mds_log_op_setattr(struct obd_device *obd, struct inode *inode, /* prepare setattr log record */ lsr->lsr_hdr.lrh_len = lsr->lsr_tail.lrt_len = sizeof(*lsr); lsr->lsr_hdr.lrh_type = MDS_SETATTR_REC; - lsr->lsr_uid = inode->i_uid; - lsr->lsr_gid = inode->i_gid; + lsr->lsr_uid = uid; + lsr->lsr_gid = gid; /* write setattr log */ ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); @@ -168,6 +168,7 @@ int mds_log_op_setattr(struct obd_device *obd, struct inode *inode, obd_free_memmd(mds->mds_osc_exp, &lsm); RETURN(rc); } +EXPORT_SYMBOL(mds_log_op_setattr); static struct llog_operations mds_ost_orig_logops = { lop_add: mds_llog_origin_add, @@ -178,24 +179,25 @@ static struct llog_operations mds_size_repl_logops = { lop_cancel: mds_llog_repl_cancel, }; -int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid, struct obd_uuid *uuid) +int mds_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, struct llog_catid *logid, + struct obd_uuid *uuid) { struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; int rc; ENTRY; - rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_MDS_OST_ORIG_CTXT, tgt, 0, NULL, &mds_ost_orig_logops); if (rc) RETURN(rc); - rc = llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, &mds_size_repl_logops); if (rc) RETURN(rc); - rc = obd_llog_init(lov_obd, tgt, count, logid, uuid); + rc = obd_llog_init(lov_obd, llogs, tgt, count, logid, uuid); if (rc) CERROR("lov_llog_init err %d\n", rc); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index a411b89..0206299 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -56,6 +56,7 @@ void mds_lov_update_objids(struct obd_device *obd, obd_id *ids) unlock_kernel(); EXIT; } +EXPORT_SYMBOL(mds_lov_update_objids); static int mds_lov_read_objids(struct obd_device *obd) { @@ -125,6 +126,7 @@ int mds_lov_write_objids(struct obd_device *obd) RETURN(rc); } +EXPORT_SYMBOL(mds_lov_write_objids); int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid) { @@ -140,8 +142,9 @@ int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid) * missing objects below this ID, they will be created. If it finds * objects above this ID, they will be removed. */ memset(&oa, 0, sizeof(oa)); - oa.o_valid = OBD_MD_FLFLAGS; oa.o_flags = OBD_FL_DELORPHAN; + oa.o_gr = FILTER_GROUP_MDS0 + mds->mds_id; + oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP; if (ost_uuid != NULL) { memcpy(&oa.o_inline, ost_uuid, sizeof(*ost_uuid)); oa.o_valid |= OBD_MD_FLINLINE; @@ -235,6 +238,19 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) "%d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize, stripes); + /* If we added a target we have to reconnect the llogs */ + /* We only _need_ to do this at first add (idx), or the first time + after recovery. However, it should now be safe to call anytime. */ + mutex_down(&obd->obd_dev_sem); + llog_cat_initialize(obd, NULL, mds->mds_lov_desc.ld_tgt_count, NULL); + mutex_up(&obd->obd_dev_sem); + + /*XXX this notifies the MDD until lov handling use old mds code */ + if (obd->obd_upcall.onu_owner) { + LASSERT(obd->obd_upcall.onu_upcall != NULL); + rc = obd->obd_upcall.onu_upcall(NULL, NULL, 0, + obd->obd_upcall.onu_owner); + } out: OBD_FREE(ld, sizeof(*ld)); RETURN(rc); @@ -292,14 +308,6 @@ static int mds_lov_update_mds(struct obd_device *obd, mds->mds_lov_objids[idx], idx); } - /* If we added a target we have to reconnect the llogs */ - /* We only _need_ to do this at first add (idx), or the first time - after recovery. However, it should now be safe to call anytime. */ - CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx); - mutex_down(&obd->obd_dev_sem); - llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, uuid); - mutex_up(&obd->obd_dev_sem); - RETURN(rc); } @@ -329,10 +337,12 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) if (data == NULL) RETURN(-ENOMEM); data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX | - OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64; + OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64 | + OBD_CONNECT_OSS_CAPA; data->ocd_version = LUSTRE_VERSION_CODE; + data->ocd_group = mds->mds_id + FILTER_GROUP_MDS0; /* NB: lov_connect() needs to fill in .ocd_index for each OST */ - rc = obd_connect(&conn, mds->mds_osc_obd, &obd->obd_uuid, data); + rc = obd_connect(NULL, &conn, mds->mds_osc_obd, &obd->obd_uuid, data); OBD_FREE(data, sizeof(*data)); if (rc) { CERROR("MDS cannot connect to LOV %s (%d)\n", lov_name, rc); @@ -347,7 +357,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) lov_name, rc); GOTO(err_discon, rc); } - + /* Deny new client connections until we are sure we have some OSTs */ obd->obd_no_conn = 1; @@ -362,7 +372,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) GOTO(err_reg, rc); /* tgt_count may be 0! */ - rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL); + rc = llog_cat_initialize(obd, NULL, mds->mds_lov_desc.ld_tgt_count, NULL); if (rc) { CERROR("failed to initialize catalog %d\n", rc); GOTO(err_reg, rc); @@ -391,8 +401,10 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) * set_nextid(). The class driver can help us here, because * it can use the obd_recovering flag to determine when the * the OBD is full available. */ + /* MDD device will care about that if (!obd->obd_recovering) rc = mds_postrecov(obd); + */ RETURN(rc); err_reg: @@ -583,15 +595,17 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); int rc2; + __u32 group; obd_llog_finish(obd, mds->mds_lov_desc.ld_tgt_count); push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); - llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL); + llog_cat_initialize(obd, NULL, mds->mds_lov_desc.ld_tgt_count, NULL); + group = FILTER_GROUP_MDS0 + mds->mds_id; rc2 = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN), KEY_MDS_CONN, - 0, NULL, NULL); + sizeof(group), &group, NULL); if (!rc) rc = rc2; RETURN(rc); @@ -610,7 +624,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_ABORT_RECOVERY: CERROR("aborting recovery for device %s\n", obd->obd_name); - target_abort_recovery(obd); + target_stop_recovery_thread(obd); RETURN(0); default: @@ -627,6 +641,32 @@ struct mds_lov_sync_info { __u32 mlsi_index; /* index of target */ }; +static int mds_propagate_capa_keys(struct mds_obd *mds) +{ + struct lustre_capa_key *key; + int i, rc = 0; + + ENTRY; + + if (!mds->mds_capa_keys) + RETURN(0); + + for (i = 0; i < 2; i++) { + key = &mds->mds_capa_keys[i]; + DEBUG_CAPA_KEY(D_SEC, key, "propagate"); + + rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_CAPA_KEY), + KEY_CAPA_KEY, sizeof(*key), key, NULL); + if (rc) { + DEBUG_CAPA_KEY(D_ERROR, key, + "propagate failed (rc = %d) for", rc); + RETURN(rc); + } + } + + RETURN(0); +} + /* We only sync one osc at a time, so that we don't have to hold any kind of lock on the whole mds_lov_desc, which may change (grow) as a result of mds_lov_add_ost. This also avoids any @@ -640,6 +680,7 @@ static int __mds_lov_synchronize(void *data) struct mds_obd *mds = &obd->u.mds; struct obd_uuid *uuid; __u32 idx = mlsi->mlsi_index; + struct mds_group_info mgi; int rc = 0; ENTRY; @@ -653,12 +694,18 @@ static int __mds_lov_synchronize(void *data) rc = mds_lov_update_mds(obd, watched, idx, uuid); if (rc != 0) GOTO(out, rc); - + mgi.group = FILTER_GROUP_MDS0 + mds->mds_id; + mgi.uuid = uuid; rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN), - KEY_MDS_CONN, 0, uuid, NULL); + KEY_MDS_CONN, sizeof(mgi), &mgi, NULL); if (rc != 0) GOTO(out, rc); + /* propagate capability keys */ + rc = mds_propagate_capa_keys(mds); + if (rc) + GOTO(out, rc); + rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT), mds->mds_lov_desc.ld_tgt_count, NULL, NULL, uuid); @@ -671,7 +718,10 @@ static int __mds_lov_synchronize(void *data) LCONSOLE_INFO("MDS %s: %s now active, resetting orphans\n", obd->obd_name, obd_uuid2str(uuid)); - + /* + * FIXME: this obd_stopping was useless, + * since obd in mdt layer was set + */ if (obd->obd_stopping) GOTO(out, rc = -ENODEV); @@ -681,7 +731,16 @@ static int __mds_lov_synchronize(void *data) obd->obd_name, rc); GOTO(out, rc); } - + + if (obd->obd_upcall.onu_owner) { + /* + * This is a hack for mds_notify->mdd_notify. When the mds obd + * in mdd is removed, This hack should be removed. + */ + LASSERT(obd->obd_upcall.onu_upcall != NULL); + rc = obd->obd_upcall.onu_upcall(NULL, NULL, 0, + obd->obd_upcall.onu_owner); + } EXIT; out: class_decref(obd); @@ -777,7 +836,6 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, } CDEBUG(D_CONFIG, "notify %s ev=%d\n", watched->obd_name, ev); - if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) != 0) { CERROR("unexpected notification of %s %s!\n", watched->obd_type->typ_name, watched->obd_name); @@ -792,6 +850,15 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, after the mdt in the config log. They didn't make it into mds_lov_connect. */ rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp); + if (rc) + RETURN(rc); + /* We should update init llog here too for replay unlink and + * possiable llog init race when recovery complete */ + mutex_down(&obd->obd_dev_sem); + llog_cat_initialize(obd, NULL, + obd->u.mds.mds_lov_desc.ld_tgt_count, + &watched->u.cli.cl_target_uuid); + mutex_up(&obd->obd_dev_sem); RETURN(rc); } @@ -868,4 +935,4 @@ void mds_objids_from_lmm(obd_id *ids, struct lov_mds_md *lmm, le64_to_cpu(lmm->lmm_objects[i].l_object_id); } } - +EXPORT_SYMBOL(mds_objids_from_lmm); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index d12865b..58e4bce 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -110,8 +110,7 @@ static void mds_mfd_put(struct mds_file_data *mfd) LASSERT(atomic_read(&mfd->mfd_refcount) > 0 && atomic_read(&mfd->mfd_refcount) < 0x5a5a); if (atomic_dec_and_test(&mfd->mfd_refcount)) { - LASSERT(list_empty(&mfd->mfd_handle.h_link)); - OBD_FREE(mfd, sizeof *mfd); + OBD_FREE_RCU(mfd, sizeof *mfd, &mfd->mfd_handle); } } @@ -371,7 +370,7 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_ALLOC_OBDO)) GOTO(out_ids, rc = -ENOMEM); - oinfo.oi_oa = obdo_alloc(); + OBDO_ALLOC(oinfo.oi_oa); if (oinfo.oi_oa == NULL) GOTO(out_ids, rc = -ENOMEM); oinfo.oi_oa->o_uid = 0; /* must have 0 uid / gid on OST */ @@ -409,7 +408,7 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, if (rc) GOTO(out_oa, rc); } - rc = obd_create(mds->mds_osc_exp, oinfo.oi_oa, + rc = obd_create(mds->mds_osc_exp, oinfo.oi_oa, &oinfo.oi_md, &oti); if (rc) { int level = D_ERROR; @@ -432,11 +431,12 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, GOTO(out_oa, rc); } oinfo.oi_md->lsm_object_id = oinfo.oi_oa->o_id; + oinfo.oi_md->lsm_object_gr = oinfo.oi_oa->o_gr; } if (inode->i_size) { oinfo.oi_oa->o_size = inode->i_size; - obdo_from_inode(oinfo.oi_oa, inode, OBD_MD_FLTYPE | - OBD_MD_FLATIME | OBD_MD_FLMTIME | + obdo_from_inode(oinfo.oi_oa, inode, OBD_MD_FLTYPE | + OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLSIZE); /* pack lustre id to OST */ @@ -475,18 +475,19 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, if (IS_ERR(*handle)) { rc = PTR_ERR(*handle); *handle = NULL; - GOTO(out_oa, rc); + GOTO(free_diskmd, rc); } rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov"); lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size); LASSERT(lmm_buf); memcpy(lmm_buf, lmm, lmm_size); +free_diskmd: obd_free_diskmd(mds->mds_osc_exp, &lmm); - out_oa: +out_oa: oti_free_cookies(&oti); - obdo_free(oinfo.oi_oa); - out_ids: + OBDO_FREE(oinfo.oi_oa); +out_ids: if (rc) { OBD_FREE(*ids, mds->mds_lov_desc.ld_tgt_count * sizeof(**ids)); *ids = NULL; @@ -797,7 +798,7 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep, NULL); rc = mds_finish_transno(mds, dchild->d_inode, handle, - req, rc, rep ? rep->lock_policy_res1 : 0); + req, rc, rep ? rep->lock_policy_res1 : 0, 0); /* XXX what do we do here if mds_finish_transno itself failed? */ l_dput(dchild); @@ -843,7 +844,7 @@ int mds_lock_new_child(struct obd_device *obd, struct inode *inode, if (child_lockh == NULL) child_lockh = &lockh; - rc = ldlm_cli_enqueue_local(obd->obd_namespace, child_res_id, + rc = ldlm_cli_enqueue_local(obd->obd_namespace, &child_res_id, LDLM_PLAIN, NULL, LCK_EX, &lock_flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, NULL, 0, NULL, child_lockh); @@ -954,9 +955,7 @@ int mds_open(struct mds_update_record *rec, int offset, if (rec->ur_flags & (MDS_OPEN_CREAT | MDS_OPEN_JOIN_FILE)) parent_mode = LCK_EX; dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode, - &parent_lockh, rec->ur_name, - rec->ur_namelen - 1, - MDS_INODELOCK_UPDATE); + &parent_lockh, MDS_INODELOCK_UPDATE); if (IS_ERR(dparent)) { rc = PTR_ERR(dparent); if (rc != -ENOENT) { @@ -1144,16 +1143,16 @@ found_child: else child_mode = LCK_CR; - if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && + if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && (rec->ur_flags & MDS_OPEN_LOCK)) { /* In case of replay we do not get a lock assuming that the caller has it already */ child_res_id.name[0] = dchild->d_inode->i_ino; child_res_id.name[1] = dchild->d_inode->i_generation; - rc = ldlm_cli_enqueue_local(obd->obd_namespace, child_res_id, - LDLM_IBITS, &policy, child_mode, - &lock_flags, ldlm_blocking_ast, + rc = ldlm_cli_enqueue_local(obd->obd_namespace, &child_res_id, + LDLM_IBITS, &policy, child_mode, + &lock_flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, NULL, 0, NULL, child_lockh); if (rc != ELDLM_OK) @@ -1179,7 +1178,7 @@ found_child: cleanup: rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, - req, rc, rep ? rep->lock_policy_res1 : 0); + req, rc, rep ? rep->lock_policy_res1 : 0, 0); cleanup_no_trans: switch (cleanup_phase) { @@ -1216,6 +1215,13 @@ found_child: /* trigger dqacq on the owner of child and parent */ lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, FSFILT_OP_CREATE); + + /* If we have not taken the "open" lock, we may not return 0 here, + because caller expects 0 to mean "lock is taken", and it needs + nonzero return here for caller to return EDLM_LOCK_ABORTED to + client. Later caller should rewrite the return value back to zero + if it to be used any further + */ RETURN(rc); } @@ -1228,9 +1234,9 @@ found_child: * (it will not even _have_ an entry in last_rcvd anymore). * * Returns EAGAIN if the client needs to get more data and re-close. */ -int mds_mfd_close(struct ptlrpc_request *req, int offset, - struct obd_device *obd, struct mds_file_data *mfd, - int unlink_orphan, struct lov_mds_md *lmm, int lmm_size, +int mds_mfd_close(struct ptlrpc_request *req, int offset,struct obd_device *obd, + struct mds_file_data *mfd, int unlink_orphan, + struct lov_mds_md *lmm, int lmm_size, struct llog_cookie *logcookies, int cookies_size, __u64 *valid) { @@ -1313,6 +1319,7 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset, handle = NULL; GOTO(cleanup, rc); } + if (lmm != NULL && (*valid & OBD_MD_FLEASIZE) && mds_log_op_unlink(obd, lmm, lmm_size, logcookies, cookies_size) > 0) { @@ -1390,7 +1397,7 @@ out: cleanup: if (req != NULL && reply_body != NULL) { - rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0); + rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0, 0); } else if (handle) { int err = fsfilt_commit(obd, pending_dir, handle, 0); if (err) { @@ -1450,9 +1457,6 @@ int mds_close(struct ptlrpc_request *req, int offset) RETURN(-EFAULT); } - if (body->flags & MDS_BFLAG_UNCOMMITTED_WRITES) - /* do some stuff */ ; - spin_lock(&med->med_open_lock); mfd = mds_handle2mfd(&body->handle); if (mfd == NULL) { diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 75b9dbe..d30103c 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -74,7 +74,7 @@ static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno, CDEBUG(D_HA, "cancelling %d cookies\n", (int)(mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies))); - rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, mlcd->mlcd_lmm, + rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, mlcd->mlcd_lmm, mlcd->mlcd_eadatalen); if (rc < 0) { CERROR("bad LSM cancelling %d log cookies: rc %d\n", @@ -102,13 +102,14 @@ static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno, /* Assumes caller has already pushed us into the kernel context. */ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data) + struct ptlrpc_request *req, int rc, __u32 op_data, + int force_sync) { struct mds_export_data *med = &req->rq_export->exp_mds_data; struct mds_client_data *mcd = med->med_mcd; struct obd_device *obd = req->rq_export->exp_obd; - __u64 transno, prev_transno; int err; + __u64 transno, prev_transno; loff_t off; int log_pri = D_HA; ENTRY; @@ -119,7 +120,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, } /* if the export has already been failed, we have no last_rcvd slot */ - if (req->rq_export->exp_failed) { + if (req->rq_export->exp_failed || obd->obd_fail) { CWARN("commit transaction for disconnected client %s: rc %d\n", req->rq_export->exp_client_uuid.uuid, rc); if (rc == 0) @@ -184,11 +185,18 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off); err = -EINVAL; } else { - fsfilt_add_journal_cb(req->rq_export->exp_obd, transno, handle, - mds_commit_cb, NULL); + struct obd_export *exp = req->rq_export; + + if (!force_sync) + force_sync = fsfilt_add_journal_cb(exp->exp_obd,transno, + handle, mds_commit_cb, + NULL); + err = fsfilt_write_record(obd, mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off, - req->rq_export->exp_need_sync); + force_sync | exp->exp_need_sync); + if (force_sync) + mds_commit_cb(obd, transno, NULL, err); } if (err) { @@ -199,7 +207,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, DEBUG_REQ(log_pri, req, "wrote trans #"LPU64" rc %d client %s at idx %u: err = %d", - transno, rc, mcd->mcd_uuid, med->med_lr_idx, err); + transno, rc, mcd->mcd_uuid, med->med_lr_idx, err); err = mds_lov_write_objids(obd); if (err) { @@ -365,7 +373,7 @@ void mds_steal_ack_locks(struct ptlrpc_request *req) } spin_unlock(&exp->exp_lock); } - +EXPORT_SYMBOL(mds_steal_ack_locks); void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd) { if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) { @@ -416,9 +424,10 @@ static void reconstruct_reint_setattr(struct mds_update_record *rec, l_dput(de); } -int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, +int mds_osc_setattr_async(struct obd_device *obd, __u32 uid, __u32 gid, struct lov_mds_md *lmm, int lmm_size, - struct llog_cookie *logcookies, struct ll_fid *fid) + struct llog_cookie *logcookies, __u64 id, __u32 gen, + struct obd_capa *oc) { struct mds_obd *mds = &obd->u.mds; struct obd_trans_info oti = { 0 }; @@ -430,7 +439,7 @@ int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, RETURN(0); /* first get memory EA */ - oinfo.oi_oa = obdo_alloc(); + OBDO_ALLOC(oinfo.oi_oa); if (!oinfo.oi_oa) RETURN(-ENOMEM); @@ -438,7 +447,7 @@ int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, rc = obd_unpackmd(mds->mds_osc_exp, &oinfo.oi_md, lmm, lmm_size); if (rc < 0) { - CERROR("Error unpack md %p for inode %lu\n", lmm, inode->i_ino); + CERROR("Error unpack md %p for inode "LPU64"\n", lmm, id); GOTO(out, rc); } @@ -449,18 +458,21 @@ int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, } /* then fill oa */ - obdo_from_inode(oinfo.oi_oa, inode, OBD_MD_FLUID | OBD_MD_FLGID); - oinfo.oi_oa->o_valid |= OBD_MD_FLID; + oinfo.oi_oa->o_uid = uid; + oinfo.oi_oa->o_gid = gid; oinfo.oi_oa->o_id = oinfo.oi_md->lsm_object_id; + oinfo.oi_oa->o_gr = oinfo.oi_md->lsm_object_gr; + oinfo.oi_oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP | + OBD_MD_FLUID | OBD_MD_FLGID; if (logcookies) { oinfo.oi_oa->o_valid |= OBD_MD_FLCOOKIE; oti.oti_logcookies = logcookies; } - LASSERT(fid != NULL); - oinfo.oi_oa->o_fid = fid->id; - oinfo.oi_oa->o_generation = fid->generation; + oinfo.oi_oa->o_fid = id; + oinfo.oi_oa->o_generation = gen; oinfo.oi_oa->o_valid |= OBD_MD_FLFID | OBD_MD_FLGENER; + oinfo.oi_capa = oc; /* do async setattr from mds to ost not waiting for responses. */ rc = obd_setattr_async(mds->mds_osc_exp, &oinfo, &oti, NULL); @@ -470,9 +482,10 @@ int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, out: if (oinfo.oi_md) obd_free_memmd(mds->mds_osc_exp, &oinfo.oi_md); - obdo_free(oinfo.oi_oa); + OBDO_FREE(oinfo.oi_oa); RETURN(rc); } +EXPORT_SYMBOL(mds_osc_setattr_async); /* In the raw-setattr case, we lock the child inode. * In the write-back case or if being called from open, the client holds a lock @@ -495,9 +508,9 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, struct lov_mds_md *lmm = NULL; struct llog_cookie *logcookies = NULL; int lmm_size = 0, need_lock = 1, cookie_size = 0; - int rc = 0, cleanup_phase = 0, err, locked = 0; + int rc = 0, cleanup_phase = 0, err, locked = 0, sync = 0; unsigned int qcids[MAXQUOTAS] = { 0, 0 }; - unsigned int qpids[MAXQUOTAS] = { rec->ur_iattr.ia_uid, + unsigned int qpids[MAXQUOTAS] = { rec->ur_iattr.ia_uid, rec->ur_iattr.ia_gid }; ENTRY; @@ -523,7 +536,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, lockpart |= MDS_INODELOCK_LOOKUP; de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_EX, - &lockh, NULL, 0, lockpart); + &lockh, lockpart); if (IS_ERR(de)) GOTO(cleanup, rc = PTR_ERR(de)); locked = 1; @@ -550,7 +563,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, need_lock = 0; } - OBD_FAIL_WRITE(obd, OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb); /* start a log jounal handle if needed */ if (S_ISREG(inode->i_mode) && @@ -594,7 +607,8 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, if (logcookies == NULL) GOTO(cleanup, rc = -ENOMEM); - if (mds_log_op_setattr(obd, inode, lmm, lmm_size, + if (mds_log_op_setattr(obd, inode->i_uid, inode->i_gid, + lmm, lmm_size, logcookies, cookie_size) <= 0) { OBD_FREE(logcookies, cookie_size); logcookies = NULL; @@ -674,13 +688,14 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, EXIT; cleanup: if (mlcd != NULL) - fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle, - mds_cancel_cookies_cb, mlcd); - err = mds_finish_transno(mds, inode, handle, req, rc, 0); + sync = fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle, + mds_cancel_cookies_cb, mlcd); + err = mds_finish_transno(mds, inode, handle, req, rc, 0, sync); /* do mds to ost setattr if needed */ if (!rc && !err && lmm_size) - mds_osc_setattr_async(obd, inode, lmm, lmm_size, - logcookies, rec->ur_fid1); + mds_osc_setattr_async(obd, inode->i_ino, inode->i_generation, lmm, + lmm_size, logcookies, rec->ur_fid1->id, + rec->ur_fid1->generation, NULL); switch (cleanup_phase) { case 2: @@ -777,7 +792,6 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, GOTO(cleanup, rc = -ESTALE); dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_EX, &lockh, - rec->ur_name, rec->ur_namelen - 1, MDS_INODELOCK_UPDATE); if (IS_ERR(dparent)) { rc = PTR_ERR(dparent); @@ -802,7 +816,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, cleanup_phase = 2; /* child dentry */ - OBD_FAIL_WRITE(obd, OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb); if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) { if (dchild->d_inode) @@ -933,7 +947,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, EXIT; cleanup: - err = mds_finish_transno(mds, dir, handle, req, rc, 0); + err = mds_finish_transno(mds, dir, handle, req, rc, 0, 0); if (rc && created) { /* Destroy the file we just created. This should not need @@ -992,7 +1006,7 @@ cleanup: return 0; } -int res_gt(struct ldlm_res_id *res1, struct ldlm_res_id *res2, +int res_gt(const struct ldlm_res_id *res1, const struct ldlm_res_id *res2, ldlm_policy_data_t *p1, ldlm_policy_data_t *p2) { int i; @@ -1023,14 +1037,15 @@ int res_gt(struct ldlm_res_id *res1, struct ldlm_res_id *res2, * * One or two locks are taken in numerical order. A res_id->name[0] of 0 means * no lock is taken for that res_id. Must be at least one non-zero res_id. */ -int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id, +int enqueue_ordered_locks(struct obd_device *obd, + const struct ldlm_res_id *p1_res_id, struct lustre_handle *p1_lockh, int p1_lock_mode, ldlm_policy_data_t *p1_policy, - struct ldlm_res_id *p2_res_id, + const struct ldlm_res_id *p2_res_id, struct lustre_handle *p2_lockh, int p2_lock_mode, ldlm_policy_data_t *p2_policy) { - struct ldlm_res_id *res_id[2] = { p1_res_id, p2_res_id }; + const struct ldlm_res_id *res_id[2] = { p1_res_id, p2_res_id }; struct lustre_handle *handles[2] = { p1_lockh, p2_lockh }; int lock_modes[2] = { p1_lock_mode, p2_lock_mode }; ldlm_policy_data_t *policies[2] = {p1_policy, p2_policy}; @@ -1057,10 +1072,10 @@ int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id, res_id[0]->name[0], res_id[1]->name[0]); flags = LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB; - rc = ldlm_cli_enqueue_local(obd->obd_namespace, *res_id[0], + rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id[0], LDLM_IBITS, policies[0], lock_modes[0], - &flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, NULL, 0, + &flags, ldlm_blocking_ast, + ldlm_completion_ast, NULL, NULL, 0, NULL, handles[0]); if (rc != ELDLM_OK) RETURN(-EIO); @@ -1072,9 +1087,9 @@ int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id, ldlm_lock_addref(handles[1], lock_modes[1]); } else if (res_id[1]->name[0] != 0) { flags = LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB; - rc = ldlm_cli_enqueue_local(obd->obd_namespace, *res_id[1], + rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id[1], LDLM_IBITS, policies[1], - lock_modes[1], &flags, + lock_modes[1], &flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, NULL, 0, NULL, handles[1]); @@ -1088,14 +1103,15 @@ int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id, RETURN(0); } -static inline int res_eq(struct ldlm_res_id *res1, struct ldlm_res_id *res2) +static inline int res_eq(const struct ldlm_res_id *res1, + const struct ldlm_res_id *res2) { return !memcmp(res1, res2, sizeof(*res1)); } static inline void -try_to_aggregate_locks(struct ldlm_res_id *res1, ldlm_policy_data_t *p1, - struct ldlm_res_id *res2, ldlm_policy_data_t *p2) +try_to_aggregate_locks(const struct ldlm_res_id *res1, ldlm_policy_data_t *p1, + const struct ldlm_res_id *res2, ldlm_policy_data_t *p2) { if (!res_eq(res1, res2)) return; @@ -1104,21 +1120,22 @@ try_to_aggregate_locks(struct ldlm_res_id *res1, ldlm_policy_data_t *p1, p1->l_inodebits.bits |= p2->l_inodebits.bits; } -int enqueue_4ordered_locks(struct obd_device *obd,struct ldlm_res_id *p1_res_id, +int enqueue_4ordered_locks(struct obd_device *obd, + const struct ldlm_res_id *p1_res_id, struct lustre_handle *p1_lockh, int p1_lock_mode, - ldlm_policy_data_t *p1_policy, - struct ldlm_res_id *p2_res_id, + ldlm_policy_data_t *p1_policy, + const struct ldlm_res_id *p2_res_id, struct lustre_handle *p2_lockh, int p2_lock_mode, - ldlm_policy_data_t *p2_policy, - struct ldlm_res_id *c1_res_id, + ldlm_policy_data_t *p2_policy, + const struct ldlm_res_id *c1_res_id, struct lustre_handle *c1_lockh, int c1_lock_mode, - ldlm_policy_data_t *c1_policy, - struct ldlm_res_id *c2_res_id, + ldlm_policy_data_t *c1_policy, + const struct ldlm_res_id *c2_res_id, struct lustre_handle *c2_lockh, int c2_lock_mode, ldlm_policy_data_t *c2_policy) { - struct ldlm_res_id *res_id[5] = { p1_res_id, p2_res_id, - c1_res_id, c2_res_id }; + const struct ldlm_res_id *res_id[5] = { p1_res_id, p2_res_id, + c1_res_id, c2_res_id }; struct lustre_handle *dlm_handles[5] = { p1_lockh, p2_lockh, c1_lockh, c2_lockh }; int lock_modes[5] = { p1_lock_mode, p2_lock_mode, @@ -1183,11 +1200,11 @@ int enqueue_4ordered_locks(struct obd_device *obd,struct ldlm_res_id *p1_res_id, try_to_aggregate_locks(res_id[i], policies[i], res_id[i+1], policies[i+1]); rc = ldlm_cli_enqueue_local(obd->obd_namespace, - *res_id[i], LDLM_IBITS, + res_id[i], LDLM_IBITS, policies[i], lock_modes[i], &flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, - NULL, 0, NULL, + ldlm_completion_ast, NULL, + NULL, 0, NULL, dlm_handles[i]); if (rc != ELDLM_OK) GOTO(out_err, rc = -EIO); @@ -1216,7 +1233,7 @@ out_err: * Returns 1 if the child changed and we need to re-lock (no locks held). * Returns -ve error with a valid dchild (no locks held). */ static int mds_verify_child(struct obd_device *obd, - struct ldlm_res_id *parent_res_id, + const struct ldlm_res_id *parent_res_id, struct lustre_handle *parent_lockh, struct dentry *dparent, int parent_mode, struct ldlm_res_id *child_res_id, @@ -1224,7 +1241,7 @@ static int mds_verify_child(struct obd_device *obd, struct dentry **dchildp, int child_mode, ldlm_policy_data_t *child_policy, const char *name, int namelen, - struct ldlm_res_id *maxres) + const struct ldlm_res_id *maxres) { struct dentry *vchild, *dchild = *dchildp; int rc = 0, cleanup_phase = 2; /* parent, child locks */ @@ -1273,11 +1290,11 @@ static int mds_verify_child(struct obd_device *obd, GOTO(cleanup, rc = 1); } - rc = ldlm_cli_enqueue_local(obd->obd_namespace, *child_res_id, - LDLM_IBITS, child_policy, - child_mode, &flags, - ldlm_blocking_ast, - ldlm_completion_ast, NULL, + rc = ldlm_cli_enqueue_local(obd->obd_namespace, child_res_id, + LDLM_IBITS, child_policy, + child_mode, &flags, + ldlm_blocking_ast, + ldlm_completion_ast, NULL, NULL, 0, NULL, child_lockh); if (rc != ELDLM_OK) GOTO(cleanup, rc = -EIO); @@ -1362,7 +1379,7 @@ int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, child_res_id.name[1] = inode->i_generation; /* If we want a LCK_CR for a directory, and this directory has not been - changed for some time, we return not only a LOOKUP lock, but also an + changed for some time, we return not only a LOOKUP lock, but also an UPDATE lock to have negative dentry starts working for this dir. Also we apply same logic to non-directories. If the file is rarely changed - we return both locks and this might save us RPC on @@ -1507,7 +1524,7 @@ out_dput: int mds_get_cookie_size(struct obd_device *obd, struct lov_mds_md *lmm) { int count = le32_to_cpu(lmm->lmm_stripe_count); - int real_csize = count * sizeof(struct llog_cookie); + int real_csize = count * sizeof(struct llog_cookie); return real_csize; } @@ -1527,10 +1544,10 @@ void mds_shrink_reply(struct obd_device *obd, struct ptlrpc_request *req, CDEBUG(D_INFO, "Shrink to md_size %d cookie_size %d \n", md_size, cookie_size); - + lustre_shrink_reply(req, md_off, md_size, 1); - - lustre_shrink_reply(req, md_off + (md_size > 0), cookie_size, 0); + + lustre_shrink_reply(req, md_off + (md_size > 0), cookie_size, 0); } static int mds_reint_unlink(struct mds_update_record *rec, int offset, @@ -1562,9 +1579,9 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, rc = mds_get_parent_child_locked(obd, mds, rec->ur_fid1, &parent_lockh, &dparent, LCK_EX, - MDS_INODELOCK_UPDATE, + MDS_INODELOCK_UPDATE, rec->ur_name, rec->ur_namelen, - &child_lockh, &dchild, LCK_EX, + &child_lockh, &dchild, LCK_EX, MDS_INODELOCK_FULL); if (rc) GOTO(cleanup, rc); @@ -1610,7 +1627,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, cleanup_phase = 3; /* child inum lock */ - OBD_FAIL_WRITE(obd, OBD_FAIL_MDS_REINT_UNLINK_WRITE, dparent->d_inode->i_sb); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dparent->d_inode->i_sb); /* ldlm_reply in buf[0] if called via intent */ if (offset == DLM_INTENT_REC_OFF) @@ -1724,7 +1741,7 @@ cleanup: } rc = mds_finish_transno(mds, dparent ? dparent->d_inode : NULL, - handle, req, rc, 0); + handle, req, rc, 0, 0); if (!rc) (void)obd_set_info_async(mds->mds_osc_exp, strlen("unlinked"), "unlinked", 0, NULL, NULL); @@ -1763,8 +1780,7 @@ cleanup: mds_shrink_reply(obd, req, body, REPLY_REC_OFF + 1); /* trigger dqrel on the owner of child and parent */ - lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, - FSFILT_OP_UNLINK); + lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, FSFILT_OP_UNLINK); return 0; } @@ -1791,7 +1807,6 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, DEBUG_REQ(D_INODE, req, "original "LPU64"/%u to "LPU64"/%u %s", rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_name); - mds_counter_incr(req->rq_export, LPROC_MDS_LINK); MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); @@ -1860,7 +1875,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, } /* Step 4: Do it. */ - OBD_FAIL_WRITE(obd, OBD_FAIL_MDS_REINT_LINK_WRITE, de_src->d_inode->i_sb); + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, de_src->d_inode->i_sb); if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) GOTO(cleanup, rc = -EROFS); @@ -1874,7 +1889,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, CERROR("vfs_link error %d\n", rc); cleanup: rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL, - handle, req, rc, 0); + handle, req, rc, 0, 0); EXIT; switch (cleanup_phase) { @@ -1984,7 +1999,7 @@ int mds_get_parents_children_locked(struct obd_device *obd, *de_oldp = ll_lookup_one_len(old_name, *de_srcdirp, old_len - 1); if (IS_ERR(*de_oldp)) { rc = PTR_ERR(*de_oldp); - CERROR("old child lookup error (%.*s): %d\n", + CDEBUG(D_INODE, "old child lookup error (%.*s): %d\n", old_len - 1, old_name, rc); GOTO(cleanup, rc); } @@ -2136,9 +2151,8 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, DEBUG_REQ(D_INODE, req, "parent "LPU64"/%u %s to "LPU64"/%u %s", rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_name, rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_tgt); - mds_counter_incr(req->rq_export, LPROC_MDS_RENAME); - + MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); rc = mds_get_parents_children_locked(obd, mds, rec->ur_fid1, &de_srcdir, @@ -2212,7 +2226,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, } no_unlink: - OBD_FAIL_WRITE(obd, OBD_FAIL_MDS_REINT_RENAME_WRITE, + OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, de_srcdir->d_inode->i_sb); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) @@ -2266,7 +2280,7 @@ no_unlink: GOTO(cleanup, rc); cleanup: rc = mds_finish_transno(mds, de_tgtdir ? de_tgtdir->d_inode : NULL, - handle, req, rc, 0); + handle, req, rc, 0, 0); switch (cleanup_phase) { case 4: @@ -2302,8 +2316,7 @@ cleanup: req->rq_status = rc; /* acquire/release qunit */ - lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, - FSFILT_OP_RENAME); + lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, FSFILT_OP_RENAME); return 0; } @@ -2323,7 +2336,9 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, struct lustre_handle *lockh) { struct obd_device *obd = req->rq_export->exp_obd; +#if 0 struct mds_obd *mds = &obd->u.mds; +#endif struct lvfs_run_ctxt saved; int rc; ENTRY; @@ -2338,6 +2353,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, } #endif +#if 0 /* get group info of this user */ rec->ur_uc.luc_uce = upcall_cache_get_entry(mds->mds_group_hash, rec->ur_uc.luc_fsuid, @@ -2357,11 +2373,14 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, if (rec->ur_uc.luc_uce) rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary; #endif +#endif push_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); rc = reinters[rec->ur_opcode] (rec, offset, req, lockh); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); +#if 0 upcall_cache_put_entry(mds->mds_group_hash, rec->ur_uc.luc_uce); +#endif RETURN(rc); } diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index 5de98b2..2857244 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -47,11 +47,11 @@ #include "mds_internal.h" int mds_osc_destroy_orphan(struct obd_device *obd, - umode_t mode, - struct lov_mds_md *lmm, - int lmm_size, - struct llog_cookie *logcookies, - int log_unlink) + umode_t mode, + struct lov_mds_md *lmm, + int lmm_size, + struct llog_cookie *logcookies, + int log_unlink) { struct mds_obd *mds = &obd->u.mds; struct lov_stripe_md *lsm = NULL; @@ -76,19 +76,20 @@ int mds_osc_destroy_orphan(struct obd_device *obd, if (rc) GOTO(out_free_memmd, rc); - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) GOTO(out_free_memmd, rc = -ENOMEM); oa->o_id = lsm->lsm_object_id; + oa->o_gr = lsm->lsm_object_gr; oa->o_mode = mode & S_IFMT; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP; if (log_unlink && logcookies) { oa->o_valid |= OBD_MD_FLCOOKIE; oti.oti_logcookies = logcookies; } rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export); - obdo_free(oa); + OBDO_FREE(oa); if (rc) CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error " "%d\n", lsm->lsm_object_id, rc); diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c index 5b16864..52e9a5f 100644 --- a/lustre/mds/mds_xattr.c +++ b/lustre/mds/mds_xattr.c @@ -129,7 +129,7 @@ static int mds_getxattr_internal(struct obd_device *obd, if (reqbody->valid & OBD_MD_FLXATTR) { xattr_name = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF+1,0); - DEBUG_REQ(D_INODE, req, "getxattr %s\n", xattr_name); + DEBUG_REQ(D_INODE, req, "getxattr %s", xattr_name); if (inode->i_op && inode->i_op->getxattr) { lock_24kernel(); @@ -142,7 +142,7 @@ static int mds_getxattr_internal(struct obd_device *obd, rc != -ERANGE) CDEBUG(D_OTHER, "getxattr failed: %d\n", rc); } else if (reqbody->valid & OBD_MD_FLXATTRLS) { - DEBUG_REQ(D_INODE, req, "listxattr\n"); + DEBUG_REQ(D_INODE, req, "listxattr"); if (inode->i_op && inode->i_op->listxattr) { lock_24kernel(); @@ -170,7 +170,7 @@ int mds_getxattr(struct ptlrpc_request *req) struct lvfs_run_ctxt saved; struct dentry *de; struct mds_body *body; - struct lvfs_ucred uc = { NULL, }; + struct lvfs_ucred uc = {0,}; int rc = 0; ENTRY; @@ -243,7 +243,7 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) GOTO(out, rc = -EPROTO); } - DEBUG_REQ(D_INODE, req, "%sxattr %s\n", + DEBUG_REQ(D_INODE, req, "%sxattr %s", body->valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name); @@ -261,14 +261,14 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) lockpart |= MDS_INODELOCK_LOOKUP; de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_EX, - &lockh, NULL, 0, lockpart); + &lockh, lockpart); if (IS_ERR(de)) GOTO(out, rc = PTR_ERR(de)); inode = de->d_inode; LASSERT(inode); - OBD_FAIL_WRITE(obd, OBD_FAIL_MDS_SETXATTR_WRITE, inode->i_sb); + OBD_FAIL_WRITE(OBD_FAIL_MDS_SETXATTR_WRITE, inode->i_sb); /* filter_op simply use setattr one */ handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); @@ -310,7 +310,7 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) LASSERT(rc <= 0); out_trans: - err = mds_finish_transno(mds, inode, handle, req, rc, 0); + err = mds_finish_transno(mds, inode, handle, req, rc, 0, 0); out_dput: l_dput(de); @@ -332,7 +332,7 @@ int mds_setxattr(struct ptlrpc_request *req) struct obd_device *obd = req->rq_export->exp_obd; struct lvfs_run_ctxt saved; struct mds_body *body; - struct lvfs_ucred uc = { NULL, }; + struct lvfs_ucred uc = {0,}; int rc; ENTRY; diff --git a/lustre/mdt/.cvsignore b/lustre/mdt/.cvsignore new file mode 100644 index 0000000..5d26f00 --- /dev/null +++ b/lustre/mdt/.cvsignore @@ -0,0 +1,15 @@ +.Xrefs +config.log +config.status +configure +Makefile +.deps +TAGS +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lustre/mdt/Makefile.in b/lustre/mdt/Makefile.in new file mode 100644 index 0000000..cc854bb --- /dev/null +++ b/lustre/mdt/Makefile.in @@ -0,0 +1,5 @@ +MODULES := mdt +mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o +mdt-objs += mdt_open.o mdt_idmap.o mdt_identity.o mdt_rmtacl.o mdt_capa.o mdt_lproc.o + +@INCLUDE_RULES@ diff --git a/lustre/mdt/autoMakefile.am b/lustre/mdt/autoMakefile.am new file mode 100644 index 0000000..3052be5 --- /dev/null +++ b/lustre/mdt/autoMakefile.am @@ -0,0 +1,11 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +modulefs_DATA = mdt$(KMODEXT) +endif + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ +DIST_SOURCES := $(mdt-objs:%.o=%.c) mdt_internal.h diff --git a/lustre/mdt/mdt_capa.c b/lustre/mdt/mdt_capa.c new file mode 100644 index 0000000..5e967b1 --- /dev/null +++ b/lustre/mdt/mdt_capa.c @@ -0,0 +1,302 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mdt/mdt_capa.c + * Lustre Metadata Target (mdt) capability key read/write/update. + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * Author: Lai Siyao <lsy@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include "mdt_internal.h" + +static inline void set_capa_key_expiry(struct mdt_device *mdt) +{ + mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * HZ; +} + +static void make_capa_key(struct lustre_capa_key *key, + mdsno_t mdsnum, int keyid) +{ + key->lk_mdsid = mdsnum; + key->lk_keyid = keyid + 1; + get_random_bytes(key->lk_key, sizeof(key->lk_key)); +} + +enum { + MDT_TXN_CAPA_KEYS_WRITE_CREDITS = 1 +}; + +static inline void lck_cpu_to_le(struct lustre_capa_key *tgt, + struct lustre_capa_key *src) +{ + tgt->lk_mdsid = cpu_to_le64(src->lk_mdsid); + tgt->lk_keyid = cpu_to_le32(src->lk_keyid); + tgt->lk_padding = cpu_to_le32(src->lk_padding); + memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key)); +} + +static inline void lck_le_to_cpu(struct lustre_capa_key *tgt, + struct lustre_capa_key *src) +{ + tgt->lk_mdsid = le64_to_cpu(src->lk_mdsid); + tgt->lk_keyid = le32_to_cpu(src->lk_keyid); + tgt->lk_padding = le32_to_cpu(src->lk_padding); + memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key)); +} + +static int write_capa_keys(const struct lu_env *env, + struct mdt_device *mdt, + struct lustre_capa_key *keys) +{ + struct mdt_thread_info *mti; + struct lustre_capa_key *tmp; + struct thandle *th; + loff_t off = 0; + int i, rc; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + + th = mdt_trans_start(env, mdt, MDT_TXN_CAPA_KEYS_WRITE_CREDITS); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + tmp = &mti->mti_capa_key; + + for (i = 0; i < 2; i++) { + lck_cpu_to_le(tmp, &keys[i]); + + rc = mdt_record_write(env, mdt->mdt_ck_obj, + mdt_buf_const(env, tmp, sizeof(*tmp)), + &off, th); + if (rc) + break; + } + + mdt_trans_stop(env, mdt, th); + + CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc); + return rc; +} + +static int read_capa_keys(const struct lu_env *env, + struct mdt_device *mdt, + struct lustre_capa_key *keys) +{ + struct mdt_thread_info *mti; + struct lustre_capa_key *tmp; + loff_t off = 0; + int i, rc; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + tmp = &mti->mti_capa_key; + + for (i = 0; i < 2; i++) { + rc = mdt_record_read(env, mdt->mdt_ck_obj, + mdt_buf(env, tmp, sizeof(*tmp)), &off); + if (rc) + return rc; + + lck_le_to_cpu(&keys[i], tmp); + DEBUG_CAPA_KEY(D_SEC, &keys[i], "read"); + } + + return 0; +} + +int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt) +{ + struct lustre_capa_key *keys = mdt->mdt_capa_keys; + struct mdt_thread_info *mti; + struct dt_object *obj; + struct lu_attr *la; + mdsno_t mdsnum; + unsigned long size; + int rc; + ENTRY; + + mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(mti != NULL); + la = &mti->mti_attr.ma_attr; + + obj = mdt->mdt_ck_obj; + rc = obj->do_ops->do_attr_get(env, mdt->mdt_ck_obj, la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + size = (unsigned long)la->la_size; + if (size == 0) { + int i; + + for (i = 0; i < 2; i++) { + make_capa_key(&keys[i], mdsnum, i); + DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing"); + } + + rc = write_capa_keys(env, mdt, keys); + if (rc) { + CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc); + RETURN(rc); + } + } else { + rc = read_capa_keys(env, mdt, keys); + if (rc) { + CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc); + RETURN(rc); + } + } + set_capa_key_expiry(mdt); + mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry); + CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry); + RETURN(0); +} + +void mdt_ck_timer_callback(unsigned long castmeharder) +{ + struct mdt_device *mdt = (struct mdt_device *)castmeharder; + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + + ENTRY; + thread->t_flags |= SVC_EVENT; + wake_up(&thread->t_ctl_waitq); + EXIT; +} + +static int mdt_ck_thread_main(void *args) +{ + struct mdt_device *mdt = args; + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + struct lustre_capa_key *bkey = &mdt->mdt_capa_keys[0], + *rkey = &mdt->mdt_capa_keys[1]; + struct lustre_capa_key *tmp; + struct lu_env env; + struct mdt_thread_info *info; + struct md_device *next; + struct l_wait_info lwi = { 0 }; + mdsno_t mdsnum; + int rc; + ENTRY; + + ptlrpc_daemonize("mdt_ck"); + cfs_block_allsigs(); + + thread->t_flags = SVC_RUNNING; + cfs_waitq_signal(&thread->t_ctl_waitq); + + rc = lu_env_init(&env, NULL, LCT_MD_THREAD); + if (rc) + RETURN(rc); + + thread->t_env = &env; + env.le_ctx.lc_thread = thread; + + info = lu_context_key_get(&env.le_ctx, &mdt_thread_key); + LASSERT(info != NULL); + + tmp = &info->mti_capa_key; + mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id; + while (1) { + l_wait_event(thread->t_ctl_waitq, + thread->t_flags & (SVC_STOPPING | SVC_EVENT), + &lwi); + + if (thread->t_flags & SVC_STOPPING) + break; + thread->t_flags &= ~SVC_EVENT; + + if (time_after(mdt->mdt_ck_expiry, jiffies)) + break; + + *tmp = *rkey; + make_capa_key(tmp, mdsnum, rkey->lk_keyid); + + next = mdt->mdt_child; + rc = next->md_ops->mdo_update_capa_key(&env, next, tmp); + if (!rc) { + spin_lock(&capa_lock); + *bkey = *rkey; + *rkey = *tmp; + spin_unlock(&capa_lock); + + rc = write_capa_keys(&env, mdt, mdt->mdt_capa_keys); + if (rc) { + spin_lock(&capa_lock); + *rkey = *bkey; + memset(bkey, 0, sizeof(*bkey)); + spin_unlock(&capa_lock); + } else { + set_capa_key_expiry(mdt); + DEBUG_CAPA_KEY(D_SEC, rkey, "new"); + } + } + if (rc) { + DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for"); + /* next retry is in 300 sec */ + mdt->mdt_ck_expiry = jiffies + 300 * HZ; + } + + mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry); + CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry); + } + lu_env_fini(&env); + + thread->t_flags = SVC_STOPPED; + cfs_waitq_signal(&thread->t_ctl_waitq); + RETURN(0); +} + +int mdt_ck_thread_start(struct mdt_device *mdt) +{ + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + int rc; + + cfs_waitq_init(&thread->t_ctl_waitq); + rc = kernel_thread(mdt_ck_thread_main, mdt, + (CLONE_VM | CLONE_FILES)); + if (rc < 0) { + CERROR("cannot start mdt_ck thread, rc = %d\n", rc); + return rc; + } + + wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING); + return 0; +} + +void mdt_ck_thread_stop(struct mdt_device *mdt) +{ + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + + if (!(thread->t_flags & SVC_RUNNING)) + return; + + thread->t_flags = SVC_STOPPING; + cfs_waitq_signal(&thread->t_ctl_waitq); + wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); +} + + diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c new file mode 100644 index 0000000..4aabd13 --- /dev/null +++ b/lustre/mdt/mdt_handler.c @@ -0,0 +1,4860 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mdt/mdt_handler.c + * Lustre Metadata Target (mdt) request handler + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Peter Braam <braam@clusterfs.com> + * Author: Andreas Dilger <adilger@clusterfs.com> + * Author: Phil Schwan <phil@clusterfs.com> + * Author: Mike Shaver <shaver@clusterfs.com> + * Author: Nikita Danilov <nikita@clusterfs.com> + * Author: Huang Hua <huanghua@clusterfs.com> + * Author: Yury Umanets <umka@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> +/* + * struct OBD_{ALLOC,FREE}*() + * MDT_FAIL_CHECK + */ +#include <obd_support.h> +/* struct ptlrpc_request */ +#include <lustre_net.h> +/* struct obd_export */ +#include <lustre_export.h> +/* struct obd_device */ +#include <obd.h> +/* lu2dt_dev() */ +#include <dt_object.h> +#include <lustre_mds.h> +#include <lustre_mdt.h> +#include "mdt_internal.h" +#include <linux/lustre_acl.h> +#include <lustre_param.h> + +mdl_mode_t mdt_mdl_lock_modes[] = { + [LCK_MINMODE] = MDL_MINMODE, + [LCK_EX] = MDL_EX, + [LCK_PW] = MDL_PW, + [LCK_PR] = MDL_PR, + [LCK_CW] = MDL_CW, + [LCK_CR] = MDL_CR, + [LCK_NL] = MDL_NL, + [LCK_GROUP] = MDL_GROUP +}; + +ldlm_mode_t mdt_dlm_lock_modes[] = { + [MDL_MINMODE] = LCK_MINMODE, + [MDL_EX] = LCK_EX, + [MDL_PW] = LCK_PW, + [MDL_PR] = LCK_PR, + [MDL_CW] = LCK_CW, + [MDL_CR] = LCK_CR, + [MDL_NL] = LCK_NL, + [MDL_GROUP] = LCK_GROUP +}; + +/* + * Initialized in mdt_mod_init(). + */ +unsigned long mdt_num_threads; + +/* ptlrpc request handler for MDT. All handlers are + * grouped into several slices - struct mdt_opc_slice, + * and stored in an array - mdt_handlers[]. + */ +struct mdt_handler { + /* The name of this handler. */ + const char *mh_name; + /* Fail id for this handler, checked at the beginning of this handler*/ + int mh_fail_id; + /* Operation code for this handler */ + __u32 mh_opc; + /* flags are listed in enum mdt_handler_flags below. */ + __u32 mh_flags; + /* The actual handler function to execute. */ + int (*mh_act)(struct mdt_thread_info *info); + /* Request format for this request. */ + const struct req_format *mh_fmt; +}; + +enum mdt_handler_flags { + /* + * struct mdt_body is passed in the incoming message, and object + * identified by this fid exists on disk. + * + * "habeo corpus" == "I have a body" + */ + HABEO_CORPUS = (1 << 0), + /* + * struct ldlm_request is passed in the incoming message. + * + * "habeo clavis" == "I have a key" + */ + HABEO_CLAVIS = (1 << 1), + /* + * this request has fixed reply format, so that reply message can be + * packed by generic code. + * + * "habeo refero" == "I have a reply" + */ + HABEO_REFERO = (1 << 2), + /* + * this request will modify something, so check whether the filesystem + * is readonly or not, then return -EROFS to client asap if necessary. + * + * "mutabor" == "I shall modify" + */ + MUTABOR = (1 << 3) +}; + +struct mdt_opc_slice { + __u32 mos_opc_start; + int mos_opc_end; + struct mdt_handler *mos_hs; +}; + +static struct mdt_opc_slice mdt_regular_handlers[]; +static struct mdt_opc_slice mdt_readpage_handlers[]; +static struct mdt_opc_slice mdt_xmds_handlers[]; +static struct mdt_opc_slice mdt_seq_handlers[]; +static struct mdt_opc_slice mdt_fld_handlers[]; + +static struct mdt_device *mdt_dev(struct lu_device *d); +static int mdt_regular_handle(struct ptlrpc_request *req); +static int mdt_unpack_req_pack_rep(struct mdt_thread_info *info, __u32 flags); + +static struct lu_object_operations mdt_obj_ops; + +int mdt_get_disposition(struct ldlm_reply *rep, int flag) +{ + if (!rep) + return 0; + return (rep->lock_policy_res1 & flag); +} + +void mdt_clear_disposition(struct mdt_thread_info *info, + struct ldlm_reply *rep, int flag) +{ + if (info) + info->mti_opdata &= ~flag; + if (rep) + rep->lock_policy_res1 &= ~flag; +} + +void mdt_set_disposition(struct mdt_thread_info *info, + struct ldlm_reply *rep, int flag) +{ + if (info) + info->mti_opdata |= flag; + if (rep) + rep->lock_policy_res1 |= flag; +} + +void mdt_lock_reg_init(struct mdt_lock_handle *lh, ldlm_mode_t lm) +{ + lh->mlh_pdo_hash = 0; + lh->mlh_reg_mode = lm; + lh->mlh_type = MDT_REG_LOCK; +} + +void mdt_lock_pdo_init(struct mdt_lock_handle *lh, ldlm_mode_t lm, + const char *name, int namelen) +{ + lh->mlh_reg_mode = lm; + lh->mlh_type = MDT_PDO_LOCK; + + if (name != NULL) { + LASSERT(namelen > 0); + lh->mlh_pdo_hash = full_name_hash(name, namelen); + } else { + LASSERT(namelen == 0); + lh->mlh_pdo_hash = 0ull; + } +} + +static void mdt_lock_pdo_mode(struct mdt_thread_info *info, struct mdt_object *o, + struct mdt_lock_handle *lh) +{ + mdl_mode_t mode; + ENTRY; + + /* + * Any dir access needs couple of locks: + * + * 1) on part of dir we gonna take lookup/modify; + * + * 2) on whole dir to protect it from concurrent splitting and/or to + * flush client's cache for readdir(). + * + * so, for a given mode and object this routine decides what lock mode + * to use for lock #2: + * + * 1) if caller's gonna lookup in dir then we need to protect dir from + * being splitted only - LCK_CR + * + * 2) if caller's gonna modify dir then we need to protect dir from + * being splitted and to flush cache - LCK_CW + * + * 3) if caller's gonna modify dir and that dir seems ready for + * splitting then we need to protect it from any type of access + * (lookup/modify/split) - LCK_EX --bzzz + */ + + LASSERT(lh->mlh_reg_mode != LCK_MINMODE); + LASSERT(lh->mlh_pdo_mode == LCK_MINMODE); + + /* + * Ask underlaying level its opinion about preferable PDO lock mode + * having access type passed as regular lock mode: + * + * - MDL_MINMODE means that lower layer does not want to specify lock + * mode; + * + * - MDL_NL means that no PDO lock should be taken. This is used in some + * cases. Say, for non-splittable directories no need to use PDO locks + * at all. + */ + mode = mdo_lock_mode(info->mti_env, mdt_object_child(o), + mdt_dlm_mode2mdl_mode(lh->mlh_reg_mode)); + + if (mode != MDL_MINMODE) { + lh->mlh_pdo_mode = mdt_mdl_mode2dlm_mode(mode); + } else { + /* + * Lower layer does not want to specify locking mode. We do it + * our selves. No special protection is needed, just flush + * client's cache on modification and allow concurrent + * mondification. + */ + switch (lh->mlh_reg_mode) { + case LCK_EX: + lh->mlh_pdo_mode = LCK_EX; + break; + case LCK_PR: + lh->mlh_pdo_mode = LCK_CR; + break; + case LCK_PW: + lh->mlh_pdo_mode = LCK_CW; + break; + default: + CERROR("Not expected lock type (0x%x)\n", + (int)lh->mlh_reg_mode); + LBUG(); + } + } + + LASSERT(lh->mlh_pdo_mode != LCK_MINMODE); + EXIT; +} + +static int mdt_getstatus(struct mdt_thread_info *info) +{ + struct mdt_device *mdt = info->mti_mdt; + struct md_device *next = mdt->mdt_child; + struct mdt_body *repbody; + int rc; + + ENTRY; + + rc = mdt_check_ucred(info); + if (rc) + RETURN(err_serious(rc)); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) + RETURN(err_serious(-ENOMEM)); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + rc = next->md_ops->mdo_root_get(info->mti_env, next, &repbody->fid1); + if (rc != 0) + RETURN(rc); + + repbody->valid |= OBD_MD_FLID; + + if (mdt->mdt_opts.mo_mds_capa) { + struct mdt_object *root; + struct lustre_capa *capa; + + root = mdt_object_find(info->mti_env, mdt, &repbody->fid1); + if (IS_ERR(root)) + RETURN(PTR_ERR(root)); + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_MDS_DEFAULT; + + rc = mo_capa_get(info->mti_env, mdt_object_child(root), capa, + 0); + mdt_object_put(info->mti_env, root); + if (rc == 0) + repbody->valid |= OBD_MD_FLMDSCAPA; + } + + RETURN(rc); +} + +static int mdt_statfs(struct mdt_thread_info *info) +{ + struct md_device *next = info->mti_mdt->mdt_child; + struct obd_statfs *osfs; + int rc; + + ENTRY; + + /* This will trigger a watchdog timeout */ + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP, + (MDT_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1); + + rc = mdt_check_ucred(info); + if (rc) + RETURN(err_serious(rc)); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) { + rc = err_serious(-ENOMEM); + } else { + osfs = req_capsule_server_get(&info->mti_pill,&RMF_OBD_STATFS); + rc = next->md_ops->mdo_statfs(info->mti_env, next, + &info->mti_u.ksfs); + statfs_pack(osfs, &info->mti_u.ksfs); + } + RETURN(rc); +} + +void mdt_pack_size2body(struct mdt_thread_info *info, struct mdt_object *o) +{ + struct mdt_body *b; + struct lu_attr *attr = &info->mti_attr.ma_attr; + + b = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + + /* Check if Size-on-MDS is enabled. */ + if ((mdt_conn_flags(info) & OBD_CONNECT_SOM) && + S_ISREG(attr->la_mode) && mdt_sizeonmds_enabled(o)) { + b->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); + b->size = attr->la_size; + b->blocks = attr->la_blocks; + } +} + +void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b, + const struct lu_attr *attr, const struct lu_fid *fid) +{ + /*XXX should pack the reply body according to lu_valid*/ + b->valid |= OBD_MD_FLCTIME | OBD_MD_FLUID | + OBD_MD_FLGID | OBD_MD_FLTYPE | + OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLFLAGS | + OBD_MD_FLATIME | OBD_MD_FLMTIME ; + + if (!S_ISREG(attr->la_mode)) + b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLRDEV; + + b->atime = attr->la_atime; + b->mtime = attr->la_mtime; + b->ctime = attr->la_ctime; + b->mode = attr->la_mode; + b->size = attr->la_size; + b->blocks = attr->la_blocks; + b->uid = attr->la_uid; + b->gid = attr->la_gid; + b->flags = attr->la_flags; + b->nlink = attr->la_nlink; + b->rdev = attr->la_rdev; + + if (fid) { + b->fid1 = *fid; + b->valid |= OBD_MD_FLID; + CDEBUG(D_INODE, ""DFID": nlink=%d, mode=%o, size="LPU64"\n", + PFID(fid), b->nlink, b->mode, b->size); + } + + if (info) + mdt_body_reverse_idmap(info, b); +} + +static inline int mdt_body_has_lov(const struct lu_attr *la, + const struct mdt_body *body) +{ + return ((S_ISREG(la->la_mode) && (body->valid & OBD_MD_FLEASIZE)) || + (S_ISDIR(la->la_mode) && (body->valid & OBD_MD_FLDIREA )) ); +} + +static int mdt_getattr_internal(struct mdt_thread_info *info, + struct mdt_object *o) +{ + struct md_object *next = mdt_object_child(o); + const struct mdt_body *reqbody = info->mti_body; + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct md_attr *ma = &info->mti_attr; + struct lu_attr *la = &ma->ma_attr; + struct req_capsule *pill = &info->mti_pill; + const struct lu_env *env = info->mti_env; + struct mdt_body *repbody; + struct lu_buf *buffer = &info->mti_buf; + int rc; + ENTRY; + + if (unlikely(MDT_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK))) + RETURN(err_serious(-ENOMEM)); + + repbody = req_capsule_server_get(pill, &RMF_MDT_BODY); + + ma->ma_valid = 0; + + rc = mdt_object_exists(o); + if (rc < 0) { + /* This object is located on remote node.*/ + repbody->fid1 = *mdt_object_fid(o); + repbody->valid = OBD_MD_FLID | OBD_MD_MDS; + RETURN(0); + } + + buffer->lb_buf = req_capsule_server_get(pill, &RMF_MDT_MD); + buffer->lb_len = req_capsule_get_size(pill, &RMF_MDT_MD, RCL_SERVER); + + /* If it is dir object and client require MEA, then we got MEA */ + if (S_ISDIR(lu_object_attr(&next->mo_lu)) && + reqbody->valid & OBD_MD_MEA) { + /* Assumption: MDT_MD size is enough for lmv size. */ + ma->ma_lmv = buffer->lb_buf; + ma->ma_lmv_size = buffer->lb_len; + ma->ma_need = MA_LMV | MA_INODE; + } else { + ma->ma_lmm = buffer->lb_buf; + ma->ma_lmm_size = buffer->lb_len; + ma->ma_need = MA_LOV | MA_INODE; + } + + rc = mo_attr_get(env, next, ma); + if (unlikely(rc)) { + CERROR("getattr error for "DFID": %d\n", + PFID(mdt_object_fid(o)), rc); + RETURN(rc); + } + + if (likely(ma->ma_valid & MA_INODE)) + mdt_pack_attr2body(info, repbody, la, mdt_object_fid(o)); + else + RETURN(-EFAULT); + + if (mdt_body_has_lov(la, reqbody)) { + if (ma->ma_valid & MA_LOV) { + LASSERT(ma->ma_lmm_size); + mdt_dump_lmm(D_INFO, ma->ma_lmm); + repbody->eadatasize = ma->ma_lmm_size; + if (S_ISDIR(la->la_mode)) + repbody->valid |= OBD_MD_FLDIREA; + else + repbody->valid |= OBD_MD_FLEASIZE; + } + if (ma->ma_valid & MA_LMV) { + LASSERT(S_ISDIR(la->la_mode)); + repbody->eadatasize = ma->ma_lmv_size; + repbody->valid |= (OBD_MD_FLDIREA|OBD_MD_MEA); + } + if (!(ma->ma_valid & MA_LOV) && !(ma->ma_valid & MA_LMV)) { + repbody->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + } + } else if (S_ISLNK(la->la_mode) && + reqbody->valid & OBD_MD_LINKNAME) { + buffer->lb_buf = ma->ma_lmm; + buffer->lb_len = reqbody->eadatasize; + rc = mo_readlink(env, next, buffer); + if (unlikely(rc <= 0)) { + CERROR("readlink failed: %d\n", rc); + rc = -EFAULT; + } else { + repbody->valid |= OBD_MD_LINKNAME; + repbody->eadatasize = rc; + /* NULL terminate */ + ((char*)ma->ma_lmm)[rc - 1] = 0; + CDEBUG(D_INODE, "symlink dest %s, len = %d\n", + (char*)ma->ma_lmm, rc); + rc = 0; + } + } + + if (reqbody->valid & OBD_MD_FLMODEASIZE) { + repbody->max_cookiesize = info->mti_mdt->mdt_max_cookiesize; + repbody->max_mdsize = info->mti_mdt->mdt_max_mdsize; + repbody->valid |= OBD_MD_FLMODEASIZE; + CDEBUG(D_INODE, "I am going to change the MAX_MD_SIZE & " + "MAX_COOKIE to : %d:%d\n", repbody->max_mdsize, + repbody->max_cookiesize); + } + + if (med->med_rmtclient && (reqbody->valid & OBD_MD_FLRMTPERM)) { + void *buf = req_capsule_server_get(pill, &RMF_ACL); + + /* mdt_getattr_lock only */ + rc = mdt_pack_remote_perm(info, o, buf); + if (rc) { + repbody->valid &= ~OBD_MD_FLRMTPERM; + repbody->aclsize = 0; + RETURN(rc); + } else { + repbody->valid |= OBD_MD_FLRMTPERM; + repbody->aclsize = sizeof(struct mdt_remote_perm); + } + } +#ifdef CONFIG_FS_POSIX_ACL + else if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) && + (reqbody->valid & OBD_MD_FLACL)) { + buffer->lb_buf = req_capsule_server_get(pill, &RMF_ACL); + buffer->lb_len = req_capsule_get_size(pill, + &RMF_ACL, RCL_SERVER); + if (buffer->lb_len > 0) { + rc = mo_xattr_get(env, next, buffer, + XATTR_NAME_ACL_ACCESS); + if (rc < 0) { + if (rc == -ENODATA) { + repbody->aclsize = 0; + repbody->valid |= OBD_MD_FLACL; + rc = 0; + } else if (rc == -EOPNOTSUPP) { + rc = 0; + } else { + CERROR("got acl size: %d\n", rc); + } + } else { + repbody->aclsize = rc; + repbody->valid |= OBD_MD_FLACL; + rc = 0; + } + } + } +#endif + + if ((reqbody->valid & OBD_MD_FLMDSCAPA) && + info->mti_mdt->mdt_opts.mo_mds_capa) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_MDS_DEFAULT; + rc = mo_capa_get(env, next, capa, 0); + if (rc) + RETURN(rc); + repbody->valid |= OBD_MD_FLMDSCAPA; + } + RETURN(rc); +} + +static int mdt_renew_capa(struct mdt_thread_info *info) +{ + struct mdt_device *mdt = info->mti_mdt; + struct mdt_object *obj = info->mti_object; + struct mdt_body *body; + struct lustre_capa *capa, *c; + int rc; + ENTRY; + + /* if object doesn't exist, or server has disabled capability, + * return directly, client will find body->valid OBD_MD_FLOSSCAPA + * flag not set. + */ + if (!obj || !mdt->mdt_opts.mo_mds_capa) + RETURN(0); + + body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(body != NULL); + + c = req_capsule_client_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(c); + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + + *capa = *c; + rc = mo_capa_get(info->mti_env, mdt_object_child(obj), capa, 1); + if (rc == 0) + body->valid |= OBD_MD_FLOSSCAPA; + + RETURN(rc); +} + +static int mdt_getattr(struct mdt_thread_info *info) +{ + struct mdt_object *obj = info->mti_object; + struct req_capsule *pill = &info->mti_pill; + struct mdt_body *reqbody; + struct mdt_body *repbody; + mode_t mode; + int md_size; + int rc; + ENTRY; + + reqbody = req_capsule_client_get(pill, &RMF_MDT_BODY); + LASSERT(reqbody); + + if (reqbody->valid & OBD_MD_FLOSSCAPA) { + rc = req_capsule_pack(pill); + if (unlikely(rc)) + rc = err_serious(rc); + else { + rc = mdt_renew_capa(info); + mdt_shrink_reply(info); + } + GOTO(out, rc); + } + + LASSERT(obj != NULL); + LASSERT(lu_object_assert_exists(&obj->mot_obj.mo_lu)); + + mode = lu_object_attr(&obj->mot_obj.mo_lu); + if (S_ISLNK(mode) && (reqbody->valid & OBD_MD_LINKNAME) && + (reqbody->eadatasize > info->mti_mdt->mdt_max_mdsize)) + md_size = reqbody->eadatasize; + else + md_size = info->mti_mdt->mdt_max_mdsize; + + req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, md_size); + + rc = req_capsule_pack(pill); + if (unlikely(rc != 0)) + GOTO(out, rc = err_serious(rc)); + + repbody = req_capsule_server_get(pill, &RMF_MDT_BODY); + LASSERT(repbody != NULL); + repbody->eadatasize = 0; + repbody->aclsize = 0; + + if (reqbody->valid & OBD_MD_FLRMTPERM) + rc = mdt_init_ucred(info, reqbody); + else + rc = mdt_check_ucred(info); + if (unlikely(rc)) + GOTO(out_shrink, rc); + + info->mti_spec.sp_ck_split = !!(reqbody->valid & OBD_MD_FLCKSPLIT); + info->mti_cross_ref = !!(reqbody->valid & OBD_MD_FLCROSSREF); + + /* + * Don't check capability at all, because rename might getattr for + * remote obj, and at that time no capability is available. + */ + mdt_set_capainfo(info, 1, &reqbody->fid1, BYPASS_CAPA); + rc = mdt_getattr_internal(info, obj); + if (reqbody->valid & OBD_MD_FLRMTPERM) + mdt_exit_ucred(info); + EXIT; +out_shrink: + mdt_shrink_reply(info); +out: + return rc; +} + +static int mdt_is_subdir(struct mdt_thread_info *info) +{ + struct mdt_object *o = info->mti_object; + struct req_capsule *pill = &info->mti_pill; + const struct mdt_body *body = info->mti_body; + struct mdt_body *repbody; + int rc; + ENTRY; + + LASSERT(o != NULL); + + repbody = req_capsule_server_get(pill, &RMF_MDT_BODY); + + /* + * We save last checked parent fid to @repbody->fid1 for remote + * directory case. + */ + LASSERT(fid_is_sane(&body->fid2)); + LASSERT(mdt_object_exists(o) > 0); + rc = mdo_is_subdir(info->mti_env, mdt_object_child(o), + &body->fid2, &repbody->fid1); + if (rc == 0 || rc == -EREMOTE) + repbody->valid |= OBD_MD_FLID; + + RETURN(rc); +} + +static int mdt_raw_lookup(struct mdt_thread_info *info, + struct mdt_object *parent, + const struct lu_name *lname, + struct ldlm_reply *ldlm_rep) +{ + struct md_object *next = mdt_object_child(info->mti_object); + const struct mdt_body *reqbody = info->mti_body; + struct lu_fid *child_fid = &info->mti_tmp_fid1; + struct mdt_body *repbody; + int rc; + ENTRY; + + if (reqbody->valid != OBD_MD_FLID) + RETURN(0); + + LASSERT(!info->mti_cross_ref); + + /* Only got the fid of this obj by name */ + rc = mdo_lookup(info->mti_env, next, lname, child_fid, + &info->mti_spec); +#if 0 + /* XXX is raw_lookup possible as intent operation? */ + if (rc != 0) { + if (rc == -ENOENT) + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG); + RETURN(rc); + } else + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); +#endif + if (rc == 0) { + repbody = req_capsule_server_get(&info->mti_pill, + &RMF_MDT_BODY); + repbody->fid1 = *child_fid; + repbody->valid = OBD_MD_FLID; + } + RETURN(1); +} + +/* + * UPDATE lock should be taken against parent, and be release before exit; + * child_bits lock should be taken against child, and be returned back: + * (1)normal request should release the child lock; + * (2)intent request will grant the lock to client. + */ +static int mdt_getattr_name_lock(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc, + __u64 child_bits, + struct ldlm_reply *ldlm_rep) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_body *reqbody = NULL; + struct mdt_object *parent = info->mti_object; + struct mdt_object *child; + struct md_object *next = mdt_object_child(parent); + struct lu_fid *child_fid = &info->mti_tmp_fid1; + struct lu_name *lname = NULL; + const char *name; + int namelen = 0; + struct mdt_lock_handle *lhp; + struct ldlm_lock *lock; + struct ldlm_res_id *res_id; + int is_resent; + int rc; + + ENTRY; + + is_resent = lustre_handle_is_used(&lhc->mlh_reg_lh); + LASSERT(ergo(is_resent, + lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)); + + LASSERT(parent != NULL); + name = req_capsule_client_get(&info->mti_pill, &RMF_NAME); + if (name == NULL) + RETURN(err_serious(-EFAULT)); + + namelen = req_capsule_get_size(&info->mti_pill, &RMF_NAME, + RCL_CLIENT) - 1; + LASSERT(namelen >= 0); + + /* XXX: "namelen == 0" is for getattr by fid (OBD_CONNECT_ATTRFID), + * otherwise do not allow empty name, that is the name must contain + * at least one character and the terminating '\0'*/ + if (namelen == 0) { + reqbody =req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(fid_is_sane(&reqbody->fid2)); + name = NULL; + + CDEBUG(D_INODE, "getattr with lock for "DFID"/"DFID", " + "ldlm_rep = %p\n", + PFID(mdt_object_fid(parent)), PFID(&reqbody->fid2), + ldlm_rep); + } else { + lname = mdt_name(info->mti_env, (char *)name, namelen); + CDEBUG(D_INODE, "getattr with lock for "DFID"/%s, " + "ldlm_rep = %p\n", + PFID(mdt_object_fid(parent)), name, ldlm_rep); + } + + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_EXECD); + + rc = mdt_object_exists(parent); + if (unlikely(rc == 0)) { + LU_OBJECT_DEBUG(D_WARNING, info->mti_env, + &parent->mot_obj.mo_lu, + "Parent doesn't exist!\n"); + RETURN(-ESTALE); + } else + LASSERTF(rc > 0, "Parent "DFID" is on remote server\n", + PFID(mdt_object_fid(parent))); + + if (lname) { + rc = mdt_raw_lookup(info, parent, lname, ldlm_rep); + if (rc != 0) { + if (rc > 0) + rc = 0; + RETURN(rc); + } + } + + if (info->mti_cross_ref) { + /* Only getattr on the child. Parent is on another node. */ + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); + child = parent; + CDEBUG(D_INODE, "partial getattr_name child_fid = "DFID", " + "ldlm_rep=%p\n", PFID(mdt_object_fid(child)), ldlm_rep); + + if (is_resent) { + /* Do not take lock for resent case. */ + lock = ldlm_handle2lock(&lhc->mlh_reg_lh); + LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n", + lhc->mlh_reg_lh.cookie); + LASSERT(fid_res_name_eq(mdt_object_fid(child), + &lock->l_resource->lr_name)); + LDLM_LOCK_PUT(lock); + rc = 0; + } else { + mdt_lock_handle_init(lhc); + mdt_lock_reg_init(lhc, LCK_PR); + + /* + * Object's name is on another MDS, no lookup lock is + * needed here but update is. + */ + child_bits &= ~MDS_INODELOCK_LOOKUP; + child_bits |= MDS_INODELOCK_UPDATE; + + rc = mdt_object_lock(info, child, lhc, child_bits, + MDT_LOCAL_LOCK); + } + if (rc == 0) { + /* Finally, we can get attr for child. */ + mdt_set_capainfo(info, 0, mdt_object_fid(child), + BYPASS_CAPA); + rc = mdt_getattr_internal(info, child); + if (unlikely(rc != 0)) + mdt_object_unlock(info, child, lhc, 1); + } + GOTO(out, rc); + } + + /* step 1: lock parent */ + lhp = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(lhp, LCK_PR, name, namelen); + rc = mdt_object_lock(info, parent, lhp, MDS_INODELOCK_UPDATE, + MDT_LOCAL_LOCK); + + if (unlikely(rc != 0)) + RETURN(rc); + + if (lname) { + /* step 2: lookup child's fid by name */ + rc = mdo_lookup(info->mti_env, next, lname, child_fid, + &info->mti_spec); + + if (rc != 0) { + if (rc == -ENOENT) + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG); + GOTO(out_parent, rc); + } else + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); + } else { + *child_fid = reqbody->fid2; + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); + } + + /* + *step 3: find the child object by fid & lock it. + * regardless if it is local or remote. + */ + child = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); + + if (unlikely(IS_ERR(child))) + GOTO(out_parent, rc = PTR_ERR(child)); + if (is_resent) { + /* Do not take lock for resent case. */ + lock = ldlm_handle2lock(&lhc->mlh_reg_lh); + LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n", + lhc->mlh_reg_lh.cookie); + + res_id = &lock->l_resource->lr_name; + if (!fid_res_name_eq(mdt_object_fid(child), + &lock->l_resource->lr_name)) { + LASSERTF(fid_res_name_eq(mdt_object_fid(parent), + &lock->l_resource->lr_name), + "Lock res_id: %lu/%lu/%lu, Fid: "DFID".\n", + (unsigned long)res_id->name[0], + (unsigned long)res_id->name[1], + (unsigned long)res_id->name[2], + PFID(mdt_object_fid(parent))); + CWARN("Although resent, but still not get child lock" + "parent:"DFID" child:"DFID"\n", + PFID(mdt_object_fid(parent)), + PFID(mdt_object_fid(child))); + lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT); + LDLM_LOCK_PUT(lock); + GOTO(relock, 0); + } + LDLM_LOCK_PUT(lock); + rc = 0; + } else { +relock: + mdt_lock_handle_init(lhc); + mdt_lock_reg_init(lhc, LCK_PR); + + if (mdt_object_exists(child) == 0) { + LU_OBJECT_DEBUG(D_WARNING, info->mti_env, + &child->mot_obj.mo_lu, + "Object doesn't exist!\n"); + } + rc = mdt_object_lock(info, child, lhc, child_bits, + MDT_CROSS_LOCK); + + if (unlikely(rc != 0)) + GOTO(out_child, rc); + } + + /* finally, we can get attr for child. */ + mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); + rc = mdt_getattr_internal(info, child); + if (unlikely(rc != 0)) { + mdt_object_unlock(info, child, lhc, 1); + } else { + lock = ldlm_handle2lock(&lhc->mlh_reg_lh); + if (lock) { + struct mdt_body *repbody; + struct lu_attr *ma; + + /* Debugging code. */ + res_id = &lock->l_resource->lr_name; + LDLM_DEBUG(lock, "Returning lock to client\n"); + LASSERTF(fid_res_name_eq(mdt_object_fid(child), + &lock->l_resource->lr_name), + "Lock res_id: %lu/%lu/%lu, Fid: "DFID".\n", + (unsigned long)res_id->name[0], + (unsigned long)res_id->name[1], + (unsigned long)res_id->name[2], + PFID(mdt_object_fid(child))); + /* + * Pack Size-on-MDS inode attributes to the body if + * update lock is given. + */ + repbody = req_capsule_server_get(&info->mti_pill, + &RMF_MDT_BODY); + ma = &info->mti_attr.ma_attr; + if (lock->l_policy_data.l_inodebits.bits & + MDS_INODELOCK_UPDATE) + mdt_pack_size2body(info, child); + LDLM_LOCK_PUT(lock); + } + } + EXIT; +out_child: + mdt_object_put(info->mti_env, child); +out_parent: + mdt_object_unlock(info, parent, lhp, 1); +out: + return rc; +} + +/* normal handler: should release the child lock */ +static int mdt_getattr_name(struct mdt_thread_info *info) +{ + struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_CHILD]; + struct mdt_body *reqbody; + struct mdt_body *repbody; + int rc; + ENTRY; + + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(reqbody != NULL); + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(repbody != NULL); + + info->mti_spec.sp_ck_split = !!(reqbody->valid & OBD_MD_FLCKSPLIT); + info->mti_cross_ref = !!(reqbody->valid & OBD_MD_FLCROSSREF); + repbody->eadatasize = 0; + repbody->aclsize = 0; + + rc = mdt_init_ucred(info, reqbody); + if (unlikely(rc)) + GOTO(out, rc); + + rc = mdt_getattr_name_lock(info, lhc, MDS_INODELOCK_UPDATE, NULL); + if (lustre_handle_is_used(&lhc->mlh_reg_lh)) { + ldlm_lock_decref(&lhc->mlh_reg_lh, lhc->mlh_reg_mode); + lhc->mlh_reg_lh.cookie = 0; + } + mdt_exit_ucred(info); + EXIT; +out: + mdt_shrink_reply(info); + return rc; +} + +static struct lu_device_operations mdt_lu_ops; + +static int lu_device_is_mdt(struct lu_device *d) +{ + return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &mdt_lu_ops); +} + +static int mdt_set_info(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + char *key; + __u32 *val; + int keylen, rc = 0; + ENTRY; + + rc = lustre_pack_reply(req, 1, NULL, NULL); + if (rc) + RETURN(rc); + + key = req_capsule_client_get(&info->mti_pill, &RMF_SETINFO_KEY); + if (key == NULL) { + DEBUG_REQ(D_HA, req, "no set_info key"); + RETURN(-EFAULT); + } + + keylen = req_capsule_get_size(&info->mti_pill, &RMF_SETINFO_KEY, + RCL_CLIENT); + + val = req_capsule_client_get(&info->mti_pill, &RMF_SETINFO_VAL); + if (val == NULL) { + DEBUG_REQ(D_HA, req, "no set_info val"); + RETURN(-EFAULT); + } + + if (keylen != (sizeof(KEY_READ_ONLY) - 1) || + memcmp(key, KEY_READ_ONLY, keylen) != 0) + RETURN(-EINVAL); + + req->rq_status = 0; + lustre_msg_set_status(req->rq_repmsg, 0); + + spin_lock(&req->rq_export->exp_lock); + if (*val) + req->rq_export->exp_connect_flags |= OBD_CONNECT_RDONLY; + else + req->rq_export->exp_connect_flags &= ~OBD_CONNECT_RDONLY; + spin_unlock(&req->rq_export->exp_lock); + + RETURN(0); +} + +static int mdt_connect(struct mdt_thread_info *info) +{ + int rc; + struct ptlrpc_request *req; + + req = mdt_info_req(info); + rc = target_handle_connect(req); + if (rc == 0) { + LASSERT(req->rq_export != NULL); + info->mti_mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev); + rc = mdt_init_idmap(info); + if (rc != 0) + /* if mdt_init_idmap failed, revocation for connect */ + obd_disconnect(class_export_get(req->rq_export)); + } else + rc = err_serious(rc); + return rc; +} + +static int mdt_disconnect(struct mdt_thread_info *info) +{ + int rc; + ENTRY; + + rc = target_handle_disconnect(mdt_info_req(info)); + if (rc) + rc = err_serious(rc); + RETURN(rc); +} + +static int mdt_sendpage(struct mdt_thread_info *info, + struct lu_rdpg *rdpg) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct ptlrpc_bulk_desc *desc; + struct l_wait_info *lwi = &info->mti_u.rdpg.mti_wait_info; + int tmpcount; + int tmpsize; + int i; + int rc; + ENTRY; + + desc = ptlrpc_prep_bulk_exp(req, rdpg->rp_npages, BULK_PUT_SOURCE, + MDS_BULK_PORTAL); + if (desc == NULL) + GOTO(out, rc = -ENOMEM); + + for (i = 0, tmpcount = rdpg->rp_count; + i < rdpg->rp_npages; i++, tmpcount -= tmpsize) { + tmpsize = min_t(int, tmpcount, CFS_PAGE_SIZE); + ptlrpc_prep_bulk_page(desc, rdpg->rp_pages[i], 0, tmpsize); + } + + LASSERT(desc->bd_nob == rdpg->rp_count); + rc = ptlrpc_start_bulk_transfer(desc); + if (rc) + GOTO(free_desc, rc); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) + GOTO(abort_bulk, rc); + + *lwi = LWI_TIMEOUT(obd_timeout * HZ / 4, NULL, NULL); + rc = l_wait_event(desc->bd_waitq, !ptlrpc_bulk_active(desc), lwi); + LASSERT (rc == 0 || rc == -ETIMEDOUT); + + if (rc == 0) { + if (desc->bd_success && + desc->bd_nob_transferred == rdpg->rp_count) + GOTO(free_desc, rc); + + rc = -ETIMEDOUT; /* XXX should this be a different errno? */ + } + + DEBUG_REQ(D_ERROR, req, "bulk failed: %s %d(%d), evicting %s@%s", + (rc == -ETIMEDOUT) ? "timeout" : "network error", + desc->bd_nob_transferred, rdpg->rp_count, + req->rq_export->exp_client_uuid.uuid, + req->rq_export->exp_connection->c_remote_uuid.uuid); + + class_fail_export(req->rq_export); + + EXIT; +abort_bulk: + ptlrpc_abort_bulk(desc); +free_desc: + ptlrpc_free_bulk(desc); +out: + return rc; +} + +#ifdef HAVE_SPLIT_SUPPORT +/* + * Retrieve dir entry from the page and insert it to the slave object, actually, + * this should be in osd layer, but since it will not in the final product, so + * just do it here and do not define more moo api anymore for this. + */ +static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page, + int size) +{ + struct mdt_object *object = info->mti_object; + struct lu_fid *lf = &info->mti_tmp_fid2; + struct md_attr *ma = &info->mti_attr; + struct lu_dirpage *dp; + struct lu_dirent *ent; + int rc = 0, offset = 0; + ENTRY; + + /* Make sure we have at least one entry. */ + if (size == 0) + RETURN(-EINVAL); + + /* + * Disable trans for this name insert, since it will include many trans + * for this. + */ + info->mti_no_need_trans = 1; + /* + * When write_dir_page, no need update parent's ctime, + * and no permission check for name_insert. + */ + ma->ma_attr.la_ctime = 0; + ma->ma_attr.la_valid = LA_MODE; + ma->ma_valid = MA_INODE; + + kmap(page); + dp = page_address(page); + offset = (int)((__u32)lu_dirent_start(dp) - (__u32)dp); + + for (ent = lu_dirent_start(dp); ent != NULL; + ent = lu_dirent_next(ent)) { + struct lu_name *lname; + char *name; + + if (le16_to_cpu(ent->lde_namelen) == 0) + continue; + + fid_le_to_cpu(lf, &ent->lde_fid); + if (le32_to_cpu(ent->lde_hash) & MAX_HASH_HIGHEST_BIT) + ma->ma_attr.la_mode = S_IFDIR; + else + ma->ma_attr.la_mode = 0; + OBD_ALLOC(name, le16_to_cpu(ent->lde_namelen) + 1); + if (name == NULL) + GOTO(out, rc = -ENOMEM); + + memcpy(name, ent->lde_name, le16_to_cpu(ent->lde_namelen)); + lname = mdt_name(info->mti_env, name, + le16_to_cpu(ent->lde_namelen) + 1); + ma->ma_attr_flags |= MDS_PERM_BYPASS; + rc = mdo_name_insert(info->mti_env, + md_object_next(&object->mot_obj), + lname, lf, ma); + OBD_FREE(name, le16_to_cpu(ent->lde_namelen) + 1); + if (rc) { + CERROR("Can't insert %*.*s, rc %d\n", + le16_to_cpu(ent->lde_namelen), + le16_to_cpu(ent->lde_namelen), + ent->lde_name, rc); + GOTO(out, rc); + } + + offset += lu_dirent_size(ent); + if (offset >= size) + break; + } + EXIT; +out: + kunmap(page); + return rc; +} + +static int mdt_bulk_timeout(void *data) +{ + ENTRY; + + CERROR("mdt bulk transfer timeout \n"); + + RETURN(1); +} + +static int mdt_writepage(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_body *reqbody; + struct l_wait_info *lwi; + struct ptlrpc_bulk_desc *desc; + struct page *page; + int rc; + ENTRY; + + + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(err_serious(-EFAULT)); + + desc = ptlrpc_prep_bulk_exp(req, 1, BULK_GET_SINK, MDS_BULK_PORTAL); + if (desc == NULL) + RETURN(err_serious(-ENOMEM)); + + /* allocate the page for the desc */ + page = alloc_pages(GFP_KERNEL, 0); + if (page == NULL) + GOTO(desc_cleanup, rc = -ENOMEM); + + CDEBUG(D_INFO, "Received page offset %d size %d \n", + (int)reqbody->size, (int)reqbody->nlink); + + ptlrpc_prep_bulk_page(desc, page, (int)reqbody->size, + (int)reqbody->nlink); + + /* + * Check if client was evicted while we were doing i/o before touching + * network. + */ + OBD_ALLOC_PTR(lwi); + if (!lwi) + GOTO(cleanup_page, rc = -ENOMEM); + + if (desc->bd_export->exp_failed) + rc = -ENOTCONN; + else + rc = ptlrpc_start_bulk_transfer (desc); + if (rc == 0) { + *lwi = LWI_TIMEOUT_INTERVAL(obd_timeout * HZ / 4, HZ, + mdt_bulk_timeout, desc); + rc = l_wait_event(desc->bd_waitq, !ptlrpc_bulk_active(desc) || + desc->bd_export->exp_failed, lwi); + LASSERT(rc == 0 || rc == -ETIMEDOUT); + if (rc == -ETIMEDOUT) { + DEBUG_REQ(D_ERROR, req, "timeout on bulk GET"); + ptlrpc_abort_bulk(desc); + } else if (desc->bd_export->exp_failed) { + DEBUG_REQ(D_ERROR, req, "Eviction on bulk GET"); + rc = -ENOTCONN; + ptlrpc_abort_bulk(desc); + } else if (!desc->bd_success || + desc->bd_nob_transferred != desc->bd_nob) { + DEBUG_REQ(D_ERROR, req, "%s bulk GET %d(%d)", + desc->bd_success ? + "truncated" : "network error on", + desc->bd_nob_transferred, desc->bd_nob); + /* XXX should this be a different errno? */ + rc = -ETIMEDOUT; + } + } else { + DEBUG_REQ(D_ERROR, req, "ptlrpc_bulk_get failed: rc %d", rc); + } + if (rc) + GOTO(cleanup_lwi, rc); + rc = mdt_write_dir_page(info, page, reqbody->nlink); + +cleanup_lwi: + OBD_FREE_PTR(lwi); +cleanup_page: + __free_pages(page, 0); +desc_cleanup: + ptlrpc_free_bulk(desc); + RETURN(rc); +} +#endif + +static int mdt_readpage(struct mdt_thread_info *info) +{ + struct mdt_object *object = info->mti_object; + struct lu_rdpg *rdpg = &info->mti_u.rdpg.mti_rdpg; + struct mdt_body *reqbody; + struct mdt_body *repbody; + int rc; + int i; + ENTRY; + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_READPAGE_PACK)) + RETURN(err_serious(-ENOMEM)); + + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + if (reqbody == NULL || repbody == NULL) + RETURN(err_serious(-EFAULT)); + + rc = mdt_check_ucred(info); + if (rc) + RETURN(err_serious(rc)); + + /* + * prepare @rdpg before calling lower layers and transfer itself. Here + * reqbody->size contains offset of where to start to read and + * reqbody->nlink contains number bytes to read. + */ + rdpg->rp_hash = reqbody->size; + if ((__u64)rdpg->rp_hash != reqbody->size) { + CERROR("Invalid hash: %#llx != %#llx\n", + (__u64)rdpg->rp_hash, reqbody->size); + RETURN(-EFAULT); + } + rdpg->rp_count = reqbody->nlink; + rdpg->rp_npages = (rdpg->rp_count + CFS_PAGE_SIZE - 1)>>CFS_PAGE_SHIFT; + OBD_ALLOC(rdpg->rp_pages, rdpg->rp_npages * sizeof rdpg->rp_pages[0]); + if (rdpg->rp_pages == NULL) + RETURN(-ENOMEM); + + for (i = 0; i < rdpg->rp_npages; ++i) { + rdpg->rp_pages[i] = alloc_pages(GFP_KERNEL, 0); + if (rdpg->rp_pages[i] == NULL) + GOTO(free_rdpg, rc = -ENOMEM); + } + + /* call lower layers to fill allocated pages with directory data */ + rc = mo_readpage(info->mti_env, mdt_object_child(object), rdpg); + if (rc) + GOTO(free_rdpg, rc); + + /* send pages to client */ + rc = mdt_sendpage(info, rdpg); + + EXIT; +free_rdpg: + + for (i = 0; i < rdpg->rp_npages; i++) + if (rdpg->rp_pages[i] != NULL) + __free_pages(rdpg->rp_pages[i], 0); + OBD_FREE(rdpg->rp_pages, rdpg->rp_npages * sizeof rdpg->rp_pages[0]); + + MDT_FAIL_RETURN(OBD_FAIL_MDS_SENDPAGE, 0); + + return rc; +} + +static int mdt_reint_internal(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc, + __u32 op) +{ + struct req_capsule *pill = &info->mti_pill; + struct mdt_device *mdt = info->mti_mdt; + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_body *repbody; + int need_shrink = 0; + int rc; + ENTRY; + + /* pack reply */ + if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) { + req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, + mdt->mdt_max_mdsize); + need_shrink = 1; + } + if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) { + req_capsule_set_size(pill, &RMF_LOGCOOKIES, RCL_SERVER, + mdt->mdt_max_cookiesize); + need_shrink = 1; + } + rc = req_capsule_pack(pill); + if (rc != 0) { + CERROR("Can't pack response, rc %d\n", rc); + RETURN(err_serious(rc)); + } + + if (req_capsule_has_field(pill, &RMF_MDT_BODY, RCL_SERVER)) { + repbody = req_capsule_server_get(pill, &RMF_MDT_BODY); + LASSERT(repbody); + repbody->eadatasize = 0; + repbody->aclsize = 0; + } + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNPACK)) + GOTO(out_shrink, rc = err_serious(-EFAULT)); + + rc = mdt_reint_unpack(info, op); + if (rc != 0) { + CERROR("Can't unpack reint, rc %d\n", rc); + GOTO(out_shrink, rc = err_serious(rc)); + } + + rc = mdt_init_ucred_reint(info); + if (rc) + GOTO(out_shrink, rc); + + rc = mdt_fix_attr_ucred(info, op); + if (rc != 0) + GOTO(out_ucred, rc = err_serious(rc)); + + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + struct mdt_client_data *mcd; + + mcd = req->rq_export->exp_mdt_data.med_mcd; + if (req_xid_is_last(req)) { + need_shrink = 0; + mdt_reconstruct(info, lhc); + rc = lustre_msg_get_status(req->rq_repmsg); + GOTO(out_ucred, rc); + } + DEBUG_REQ(D_HA, req, "no reply for RESENT (xid "LPD64")", + mcd->mcd_last_xid); + } + + need_shrink = 0; + rc = mdt_reint_rec(info, lhc); + EXIT; +out_ucred: + mdt_exit_ucred(info); +out_shrink: + if (need_shrink) + mdt_shrink_reply(info); + return rc; +} + +static long mdt_reint_opcode(struct mdt_thread_info *info, + const struct req_format **fmt) +{ + __u32 *ptr; + long opc; + + opc = err_serious(-EFAULT); + ptr = req_capsule_client_get(&info->mti_pill, &RMF_REINT_OPC); + if (ptr != NULL) { + opc = *ptr; + DEBUG_REQ(D_INODE, mdt_info_req(info), "reint opt = %ld", opc); + if (opc < REINT_MAX && fmt[opc] != NULL) + req_capsule_extend(&info->mti_pill, fmt[opc]); + else { + CERROR("Unsupported opc: %ld\n", opc); + opc = err_serious(opc); + } + } + return opc; +} + +static int mdt_reint(struct mdt_thread_info *info) +{ + long opc; + int rc; + + static const struct req_format *reint_fmts[REINT_MAX] = { + [REINT_SETATTR] = &RQF_MDS_REINT_SETATTR, + [REINT_CREATE] = &RQF_MDS_REINT_CREATE, + [REINT_LINK] = &RQF_MDS_REINT_LINK, + [REINT_UNLINK] = &RQF_MDS_REINT_UNLINK, + [REINT_RENAME] = &RQF_MDS_REINT_RENAME, + [REINT_OPEN] = &RQF_MDS_REINT_OPEN + }; + + ENTRY; + + opc = mdt_reint_opcode(info, reint_fmts); + if (opc >= 0) { + /* + * No lock possible here from client to pass it to reint code + * path. + */ + rc = mdt_reint_internal(info, NULL, opc); + } else { + rc = opc; + } + + info->mti_fail_id = OBD_FAIL_MDS_REINT_NET_REP; + RETURN(rc); +} + +/* TODO these two methods not available now. */ + +/* this should sync the whole device */ +static int mdt_device_sync(struct mdt_thread_info *info) +{ + return 0; +} + +/* this should sync this object */ +static int mdt_object_sync(struct mdt_thread_info *info) +{ + return 0; +} + +static int mdt_sync(struct mdt_thread_info *info) +{ + struct req_capsule *pill = &info->mti_pill; + struct mdt_body *body; + int rc; + ENTRY; + + /* The fid may be zero, so we req_capsule_set manually */ + req_capsule_set(pill, &RQF_MDS_SYNC); + + body = req_capsule_client_get(pill, &RMF_MDT_BODY); + if (body == NULL) + RETURN(err_serious(-EINVAL)); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_SYNC_PACK)) + RETURN(err_serious(-ENOMEM)); + + if (fid_seq(&body->fid1) == 0) { + /* sync the whole device */ + rc = req_capsule_pack(pill); + if (rc == 0) + rc = mdt_device_sync(info); + else + rc = err_serious(rc); + } else { + /* sync an object */ + rc = mdt_unpack_req_pack_rep(info, HABEO_CORPUS|HABEO_REFERO); + if (rc == 0) { + rc = mdt_object_sync(info); + if (rc == 0) { + struct md_object *next; + const struct lu_fid *fid; + struct lu_attr *la = &info->mti_attr.ma_attr; + + next = mdt_object_child(info->mti_object); + info->mti_attr.ma_need = MA_INODE; + info->mti_attr.ma_valid = 0; + rc = mo_attr_get(info->mti_env, next, + &info->mti_attr); + if (rc == 0) { + body = req_capsule_server_get(pill, + &RMF_MDT_BODY); + fid = mdt_object_fid(info->mti_object); + mdt_pack_attr2body(info, body, la, fid); + } + } + } else + rc = err_serious(rc); + } + RETURN(rc); +} + +static int mdt_quotacheck_handle(struct mdt_thread_info *info) +{ + return err_serious(-EOPNOTSUPP); +} + +static int mdt_quotactl_handle(struct mdt_thread_info *info) +{ + return err_serious(-EOPNOTSUPP); +} + +/* + * OBD PING and other handlers. + */ +static int mdt_obd_ping(struct mdt_thread_info *info) +{ + int rc; + ENTRY; + rc = target_handle_ping(mdt_info_req(info)); + if (rc < 0) + rc = err_serious(rc); + RETURN(rc); +} + +static int mdt_obd_log_cancel(struct mdt_thread_info *info) +{ + return err_serious(-EOPNOTSUPP); +} + +static int mdt_obd_qc_callback(struct mdt_thread_info *info) +{ + return err_serious(-EOPNOTSUPP); +} + + +/* + * DLM handlers. + */ +static struct ldlm_callback_suite cbs = { + .lcs_completion = ldlm_server_completion_ast, + .lcs_blocking = ldlm_server_blocking_ast, + .lcs_glimpse = NULL +}; + +static int mdt_enqueue(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req; + __u64 req_bits; + int rc; + + /* + * info->mti_dlm_req already contains swapped and (if necessary) + * converted dlm request. + */ + LASSERT(info->mti_dlm_req != NULL); + + if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_ENQUEUE)) { + info->mti_fail_id = OBD_FAIL_LDLM_ENQUEUE; + return 0; + } + + req = mdt_info_req(info); + + /* + * Lock without inodebits makes no sense and will oops later in + * ldlm. Let's check it now to see if we have wrong lock from client or + * bits get corrupted somewhere in mdt_intent_policy(). + */ + req_bits = info->mti_dlm_req->lock_desc.l_policy_data.l_inodebits.bits; + LASSERT(req_bits != 0); + + rc = ldlm_handle_enqueue0(info->mti_mdt->mdt_namespace, + req, info->mti_dlm_req, &cbs); + info->mti_fail_id = OBD_FAIL_LDLM_REPLY; + return rc ? err_serious(rc) : req->rq_status; +} + +static int mdt_convert(struct mdt_thread_info *info) +{ + int rc; + struct ptlrpc_request *req; + + LASSERT(info->mti_dlm_req); + req = mdt_info_req(info); + rc = ldlm_handle_convert0(req, info->mti_dlm_req); + return rc ? err_serious(rc) : req->rq_status; +} + +static int mdt_bl_callback(struct mdt_thread_info *info) +{ + CERROR("bl callbacks should not happen on MDS\n"); + LBUG(); + return err_serious(-EOPNOTSUPP); +} + +static int mdt_cp_callback(struct mdt_thread_info *info) +{ + CERROR("cp callbacks should not happen on MDS\n"); + LBUG(); + return err_serious(-EOPNOTSUPP); +} + +/* + * sec context handlers + */ +static int mdt_sec_ctx_handle(struct mdt_thread_info *info) +{ + int rc; + + rc = mdt_handle_idmap(info); + + if (unlikely(rc)) { + struct ptlrpc_request *req = mdt_info_req(info); + __u32 opc; + + opc = lustre_msg_get_opc(req->rq_reqmsg); + if (opc == SEC_CTX_INIT || opc == SEC_CTX_INIT_CONT) + sptlrpc_svc_ctx_invalidate(req); + } + + return rc; +} + +static struct mdt_object *mdt_obj(struct lu_object *o) +{ + LASSERT(lu_device_is_mdt(o->lo_dev)); + return container_of0(o, struct mdt_object, mot_obj.mo_lu); +} + +struct mdt_object *mdt_object_find(const struct lu_env *env, + struct mdt_device *d, + const struct lu_fid *f) +{ + struct lu_object *o; + struct mdt_object *m; + ENTRY; + + o = lu_object_find(env, d->mdt_md_dev.md_lu_dev.ld_site, f); + if (unlikely(IS_ERR(o))) + m = (struct mdt_object *)o; + else + m = mdt_obj(o); + RETURN(m); +} + +int mdt_object_lock(struct mdt_thread_info *info, struct mdt_object *o, + struct mdt_lock_handle *lh, __u64 ibits, int locality) +{ + struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace; + ldlm_policy_data_t *policy = &info->mti_policy; + struct ldlm_res_id *res_id = &info->mti_res_id; + int rc; + ENTRY; + + LASSERT(!lustre_handle_is_used(&lh->mlh_reg_lh)); + LASSERT(!lustre_handle_is_used(&lh->mlh_pdo_lh)); + LASSERT(lh->mlh_reg_mode != LCK_MINMODE); + LASSERT(lh->mlh_type != MDT_NUL_LOCK); + + if (mdt_object_exists(o) < 0) { + if (locality == MDT_CROSS_LOCK) { + /* cross-ref object fix */ + ibits &= ~MDS_INODELOCK_UPDATE; + ibits |= MDS_INODELOCK_LOOKUP; + } else { + LASSERT(!(ibits & MDS_INODELOCK_UPDATE)); + LASSERT(ibits & MDS_INODELOCK_LOOKUP); + } + /* No PDO lock on remote object */ + LASSERT(lh->mlh_type != MDT_PDO_LOCK); + } + + memset(policy, 0, sizeof(*policy)); + fid_build_reg_res_name(mdt_object_fid(o), res_id); + + /* + * Take PDO lock on whole directory and build correct @res_id for lock + * on part of directory. + */ + if (lh->mlh_pdo_hash != 0) { + LASSERT(lh->mlh_type == MDT_PDO_LOCK); + mdt_lock_pdo_mode(info, o, lh); + if (lh->mlh_pdo_mode != LCK_NL) { + /* + * Do not use LDLM_FL_LOCAL_ONLY for parallel lock, it + * is never going to be sent to client and we do not + * want it slowed down due to possible cancels. + */ + policy->l_inodebits.bits = MDS_INODELOCK_UPDATE; + rc = mdt_fid_lock(ns, &lh->mlh_pdo_lh, lh->mlh_pdo_mode, + policy, res_id, LDLM_FL_ATOMIC_CB); + if (unlikely(rc)) + RETURN(rc); + } + + /* + * Finish res_id initializing by name hash marking patr of + * directory which is taking modification. + */ + res_id->name[LUSTRE_RES_ID_HSH_OFF] = lh->mlh_pdo_hash; + } + + policy->l_inodebits.bits = ibits; + + /* + * Use LDLM_FL_LOCAL_ONLY for this lock. We do not know yet if it is + * going to be sent to client. If it is - mdt_intent_policy() path will + * fix it up and turns FL_LOCAL flag off. + */ + rc = mdt_fid_lock(ns, &lh->mlh_reg_lh, lh->mlh_reg_mode, policy, + res_id, LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB); + + if (rc) + GOTO(out, rc); + + if (lh->mlh_type == MDT_PDO_LOCK) { + /* check for exists after object is locked */ + if (mdt_object_exists(o) == 0) { + /* Non-existent object shouldn't have PDO lock */ + rc = -ESTALE; + } else { + /* Non-dir object shouldn't have PDO lock */ + LASSERT(S_ISDIR(lu_object_attr(&o->mot_obj.mo_lu))); + } + } +out: + if (rc) + mdt_object_unlock(info, o, lh, 1); + + + RETURN(rc); +} + +/* + * Just call ldlm_lock_decref() if decref, else we only call ptlrpc_save_lock() + * to save this lock in req. when transaction committed, req will be released, + * and lock will, too. + */ +void mdt_object_unlock(struct mdt_thread_info *info, struct mdt_object *o, + struct mdt_lock_handle *lh, int decref) +{ + struct ptlrpc_request *req = mdt_info_req(info); + ENTRY; + + if (lustre_handle_is_used(&lh->mlh_pdo_lh)) { + /* Do not save PDO locks to request, just decref. */ + mdt_fid_unlock(&lh->mlh_pdo_lh, + lh->mlh_pdo_mode); + lh->mlh_pdo_lh.cookie = 0ull; + } + + if (lustre_handle_is_used(&lh->mlh_reg_lh)) { + if (decref) { + mdt_fid_unlock(&lh->mlh_reg_lh, + lh->mlh_reg_mode); + } else { + ptlrpc_save_lock(req, &lh->mlh_reg_lh, + lh->mlh_reg_mode); + } + lh->mlh_reg_lh.cookie = 0ull; + } + + EXIT; +} + +struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *info, + const struct lu_fid *f, + struct mdt_lock_handle *lh, + __u64 ibits) +{ + struct mdt_object *o; + + o = mdt_object_find(info->mti_env, info->mti_mdt, f); + if (!IS_ERR(o)) { + int rc; + + rc = mdt_object_lock(info, o, lh, ibits, + MDT_LOCAL_LOCK); + if (rc != 0) { + mdt_object_put(info->mti_env, o); + o = ERR_PTR(rc); + } + } + return o; +} + +void mdt_object_unlock_put(struct mdt_thread_info * info, + struct mdt_object * o, + struct mdt_lock_handle *lh, + int decref) +{ + mdt_object_unlock(info, o, lh, decref); + mdt_object_put(info->mti_env, o); +} + +static struct mdt_handler *mdt_handler_find(__u32 opc, + struct mdt_opc_slice *supported) +{ + struct mdt_opc_slice *s; + struct mdt_handler *h; + + h = NULL; + for (s = supported; s->mos_hs != NULL; s++) { + if (s->mos_opc_start <= opc && opc < s->mos_opc_end) { + h = s->mos_hs + (opc - s->mos_opc_start); + if (likely(h->mh_opc != 0)) + LASSERT(h->mh_opc == opc); + else + h = NULL; /* unsupported opc */ + break; + } + } + return h; +} + +static int mdt_lock_resname_compat(struct mdt_device *m, + struct ldlm_request *req) +{ + /* XXX something... later. */ + return 0; +} + +static int mdt_lock_reply_compat(struct mdt_device *m, struct ldlm_reply *rep) +{ + /* XXX something... later. */ + return 0; +} + +/* + * Generic code handling requests that have struct mdt_body passed in: + * + * - extract mdt_body from request and save it in @info, if present; + * + * - create lu_object, corresponding to the fid in mdt_body, and save it in + * @info; + * + * - if HABEO_CORPUS flag is set for this request type check whether object + * actually exists on storage (lu_object_exists()). + * + */ +static int mdt_body_unpack(struct mdt_thread_info *info, __u32 flags) +{ + const struct mdt_body *body; + struct mdt_object *obj; + const struct lu_env *env; + struct req_capsule *pill; + int rc; + + env = info->mti_env; + pill = &info->mti_pill; + + body = info->mti_body = req_capsule_client_get(pill, &RMF_MDT_BODY); + if (body == NULL) + return -EFAULT; + + if (!fid_is_sane(&body->fid1)) { + CERROR("Invalid fid: "DFID"\n", PFID(&body->fid1)); + return -EINVAL; + } + + /* + * Do not get size or any capa fields before we check that request + * contains capa actually. There are some requests which do not, for + * instance MDS_IS_SUBDIR. + */ + if (req_capsule_has_field(pill, &RMF_CAPA1, RCL_CLIENT) && + req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + mdt_set_capainfo(info, 0, &body->fid1, + req_capsule_client_get(pill, &RMF_CAPA1)); + + obj = mdt_object_find(env, info->mti_mdt, &body->fid1); + if (!IS_ERR(obj)) { + if ((flags & HABEO_CORPUS) && + !mdt_object_exists(obj)) { + mdt_object_put(env, obj); + /* for capability renew ENOENT will be handled in + * mdt_renew_capa */ + if (body->valid & OBD_MD_FLOSSCAPA) + rc = 0; + else + rc = -ENOENT; + } else { + info->mti_object = obj; + rc = 0; + } + } else + rc = PTR_ERR(obj); + + return rc; +} + +static int mdt_unpack_req_pack_rep(struct mdt_thread_info *info, __u32 flags) +{ + struct req_capsule *pill; + int rc; + + ENTRY; + pill = &info->mti_pill; + + if (req_capsule_has_field(pill, &RMF_MDT_BODY, RCL_CLIENT)) + rc = mdt_body_unpack(info, flags); + else + rc = 0; + + if (rc == 0 && (flags & HABEO_REFERO)) { + struct mdt_device *mdt = info->mti_mdt; + + /* Pack reply. */ + if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) + req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, + mdt->mdt_max_mdsize); + if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) + req_capsule_set_size(pill, &RMF_LOGCOOKIES, RCL_SERVER, + mdt->mdt_max_cookiesize); + + rc = req_capsule_pack(pill); + } + RETURN(rc); +} + +static int mdt_init_capa_ctxt(const struct lu_env *env, struct mdt_device *m) +{ + struct md_device *next = m->mdt_child; + + return next->md_ops->mdo_init_capa_ctxt(env, next, + m->mdt_opts.mo_mds_capa, + m->mdt_capa_timeout, + m->mdt_capa_alg, + m->mdt_capa_keys); +} + +/* + * Invoke handler for this request opc. Also do necessary preprocessing + * (according to handler ->mh_flags), and post-processing (setting of + * ->last_{xid,committed}). + */ +static int mdt_req_handle(struct mdt_thread_info *info, + struct mdt_handler *h, struct ptlrpc_request *req) +{ + int rc, serious = 0; + __u32 flags; + + ENTRY; + + LASSERT(h->mh_act != NULL); + LASSERT(h->mh_opc == lustre_msg_get_opc(req->rq_reqmsg)); + LASSERT(current->journal_info == NULL); + + /* + * Do not use *_FAIL_CHECK_ONCE() macros, because they will stop + * correct handling of failed req later in ldlm due to doing + * obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED without actually + * correct actions like it is done in target_send_reply_msg(). + */ + if (h->mh_fail_id != 0) { + /* + * Set to info->mti_fail_id to handler fail_id, it will be used + * later, and better than use default fail_id. + */ + if (OBD_FAIL_CHECK(h->mh_fail_id)) { + info->mti_fail_id = h->mh_fail_id; + RETURN(0); + } + } + + rc = 0; + flags = h->mh_flags; + LASSERT(ergo(flags & (HABEO_CORPUS|HABEO_REFERO), h->mh_fmt != NULL)); + + if (h->mh_fmt != NULL) { + req_capsule_set(&info->mti_pill, h->mh_fmt); + rc = mdt_unpack_req_pack_rep(info, flags); + } + + if (rc == 0 && flags & MUTABOR && + req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) + /* should it be rq_status? */ + rc = -EROFS; + + if (rc == 0 && flags & HABEO_CLAVIS) { + struct ldlm_request *dlm_req; + + LASSERT(h->mh_fmt != NULL); + + dlm_req = req_capsule_client_get(&info->mti_pill, &RMF_DLM_REQ); + if (dlm_req != NULL) { + if (info->mti_mdt->mdt_opts.mo_compat_resname) + rc = mdt_lock_resname_compat(info->mti_mdt, + dlm_req); + info->mti_dlm_req = dlm_req; + } else { + CERROR("Can't unpack dlm request\n"); + rc = -EFAULT; + } + } + + /* capability setting changed via /proc, needs reinitialize ctxt */ + if (info->mti_mdt && info->mti_mdt->mdt_capa_conf) { + mdt_init_capa_ctxt(info->mti_env, info->mti_mdt); + info->mti_mdt->mdt_capa_conf = 0; + } + + if (likely(rc == 0)) { + /* + * Process request, there can be two types of rc: + * 1) errors with msg unpack/pack, other failures outside the + * operation itself. This is counted as serious errors; + * 2) errors during fs operation, should be placed in rq_status + * only + */ + rc = h->mh_act(info); + serious = is_serious(rc); + rc = clear_serious(rc); + } else + serious = 1; + + req->rq_status = rc; + + /* + * ELDLM_* codes which > 0 should be in rq_status only as well as + * all non-serious errors. + */ + if (rc > 0 || !serious) + rc = 0; + + LASSERT(current->journal_info == NULL); + + if (rc == 0 && (flags & HABEO_CLAVIS) && + info->mti_mdt->mdt_opts.mo_compat_resname) { + struct ldlm_reply *dlmrep; + + dlmrep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP); + if (dlmrep != NULL) + rc = mdt_lock_reply_compat(info->mti_mdt, dlmrep); + } + + /* If we're DISCONNECTing, the mdt_export_data is already freed */ + if (likely(rc == 0 && h->mh_opc != MDS_DISCONNECT)) + target_committed_to_req(req); + + if (unlikely((lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && + lustre_msg_get_transno(req->rq_reqmsg) == 0)) { + DEBUG_REQ(D_ERROR, req, "transno is 0 during REPLAY"); + LBUG(); + } + + RETURN(rc); +} + +void mdt_lock_handle_init(struct mdt_lock_handle *lh) +{ + lh->mlh_type = MDT_NUL_LOCK; + lh->mlh_reg_lh.cookie = 0ull; + lh->mlh_reg_mode = LCK_MINMODE; + lh->mlh_pdo_lh.cookie = 0ull; + lh->mlh_pdo_mode = LCK_MINMODE; +} + +void mdt_lock_handle_fini(struct mdt_lock_handle *lh) +{ + LASSERT(!lustre_handle_is_used(&lh->mlh_reg_lh)); + LASSERT(!lustre_handle_is_used(&lh->mlh_pdo_lh)); +} + +/* + * Initialize fields of struct mdt_thread_info. Other fields are left in + * uninitialized state, because it's too expensive to zero out whole + * mdt_thread_info (> 1K) on each request arrival. + */ +static void mdt_thread_info_init(struct ptlrpc_request *req, + struct mdt_thread_info *info) +{ + int i; + struct md_capainfo *ci; + + info->mti_rep_buf_nr = ARRAY_SIZE(info->mti_rep_buf_size); + for (i = 0; i < ARRAY_SIZE(info->mti_rep_buf_size); i++) + info->mti_rep_buf_size[i] = -1; + req_capsule_init(&info->mti_pill, req, RCL_SERVER, + info->mti_rep_buf_size); + + /* lock handle */ + for (i = 0; i < ARRAY_SIZE(info->mti_lh); i++) + mdt_lock_handle_init(&info->mti_lh[i]); + + /* mdt device: it can be NULL while CONNECT */ + if (req->rq_export) { + info->mti_mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev); + info->mti_exp = req->rq_export; + } else + info->mti_mdt = NULL; + info->mti_env = req->rq_svc_thread->t_env; + ci = md_capainfo(info->mti_env); + memset(ci, 0, sizeof *ci); + + info->mti_fail_id = OBD_FAIL_MDS_ALL_REPLY_NET; + info->mti_transno = lustre_msg_get_transno(req->rq_reqmsg); + + memset(&info->mti_attr, 0, sizeof(info->mti_attr)); + info->mti_body = NULL; + info->mti_object = NULL; + info->mti_dlm_req = NULL; + info->mti_has_trans = 0; + info->mti_no_need_trans = 0; + info->mti_cross_ref = 0; + info->mti_opdata = 0; + + /* To not check for split by default. */ + info->mti_spec.sp_ck_split = 0; +} + +static void mdt_thread_info_fini(struct mdt_thread_info *info) +{ + int i; + + req_capsule_fini(&info->mti_pill); + if (info->mti_object != NULL) { + mdt_object_put(info->mti_env, info->mti_object); + info->mti_object = NULL; + } + for (i = 0; i < ARRAY_SIZE(info->mti_lh); i++) + mdt_lock_handle_fini(&info->mti_lh[i]); + info->mti_env = NULL; +} + +/* mds/handler.c */ +extern int mds_filter_recovery_request(struct ptlrpc_request *req, + struct obd_device *obd, int *process); +/* + * Handle recovery. Return: + * +1: continue request processing; + * -ve: abort immediately with the given error code; + * 0: send reply with error code in req->rq_status; + */ +static int mdt_recovery(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + int recovering; + struct obd_device *obd; + + ENTRY; + + switch (lustre_msg_get_opc(req->rq_reqmsg)) { + case MDS_CONNECT: + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: + { +#if 0 + int rc; + + rc = mdt_handle_idmap(info); + if (rc) + RETURN(rc); + else +#endif + RETURN(+1); + } + } + + if (unlikely(req->rq_export == NULL)) { + CERROR("operation %d on unconnected MDS from %s\n", + lustre_msg_get_opc(req->rq_reqmsg), + libcfs_id2str(req->rq_peer)); + /* FIXME: For CMD cleanup, when mds_B stop, the req from + * mds_A will get -ENOTCONN(especially for ping req), + * which will cause that mds_A deactive timeout, then when + * mds_A cleanup, the cleanup process will be suspended since + * deactive timeout is not zero. + */ + req->rq_status = -ENOTCONN; + target_send_reply(req, -ENOTCONN, info->mti_fail_id); + RETURN(0); + } + + /* sanity check: if the xid matches, the request must be marked as a + * resent or replayed */ + if (req_xid_is_last(req)) { + if (!(lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY))) { + DEBUG_REQ(D_WARNING, req, "rq_xid "LPU64" matches last_xid, " + "expected REPLAY or RESENT flag (%x)", req->rq_xid, + lustre_msg_get_flags(req->rq_reqmsg)); + LBUG(); + req->rq_status = -ENOTCONN; + RETURN(-ENOTCONN); + } + } + + /* else: note the opposite is not always true; a RESENT req after a + * failover will usually not match the last_xid, since it was likely + * never committed. A REPLAYed request will almost never match the + * last xid, however it could for a committed, but still retained, + * open. */ + + obd = req->rq_export->exp_obd; + + /* Check for aborted recovery... */ + spin_lock_bh(&obd->obd_processing_task_lock); + recovering = obd->obd_recovering; + spin_unlock_bh(&obd->obd_processing_task_lock); + if (unlikely(recovering)) { + int rc; + int should_process; + DEBUG_REQ(D_INFO, req, "Got new replay"); + rc = mds_filter_recovery_request(req, obd, &should_process); + if (rc != 0 || !should_process) + RETURN(rc); + else if (should_process < 0) { + req->rq_status = should_process; + rc = ptlrpc_error(req); + RETURN(rc); + } + } + RETURN(+1); +} + +static int mdt_reply(struct ptlrpc_request *req, int rc, + struct mdt_thread_info *info) +{ + ENTRY; + +#if 0 + if (req->rq_reply_state == NULL && rc == 0) { + req->rq_status = rc; + lustre_pack_reply(req, 1, NULL, NULL); + } +#endif + target_send_reply(req, rc, info->mti_fail_id); + RETURN(0); +} + +/* mds/handler.c */ +extern int mds_msg_check_version(struct lustre_msg *msg); + +static int mdt_handle0(struct ptlrpc_request *req, + struct mdt_thread_info *info, + struct mdt_opc_slice *supported) +{ + struct mdt_handler *h; + struct lustre_msg *msg; + int rc; + + ENTRY; + + MDT_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0); + + LASSERT(current->journal_info == NULL); + + msg = req->rq_reqmsg; + rc = mds_msg_check_version(msg); + if (likely(rc == 0)) { + rc = mdt_recovery(info); + if (likely(rc == +1)) { + h = mdt_handler_find(lustre_msg_get_opc(msg), + supported); + if (likely(h != NULL)) { + rc = mdt_req_handle(info, h, req); + rc = mdt_reply(req, rc, info); + } else { + CERROR("The unsupported opc: 0x%x\n", lustre_msg_get_opc(msg) ); + req->rq_status = -ENOTSUPP; + rc = ptlrpc_error(req); + RETURN(rc); + } + } + } else + CERROR(LUSTRE_MDT_NAME" drops mal-formed request\n"); + RETURN(rc); +} + +/* + * MDT handler function called by ptlrpc service thread when request comes. + * + * XXX common "target" functionality should be factored into separate module + * shared by mdt, ost and stand-alone services like fld. + */ +static int mdt_handle_common(struct ptlrpc_request *req, + struct mdt_opc_slice *supported) +{ + struct lu_env *env; + struct mdt_thread_info *info; + int rc; + ENTRY; + + env = req->rq_svc_thread->t_env; + LASSERT(env != NULL); + LASSERT(env->le_ses != NULL); + LASSERT(env->le_ctx.lc_thread == req->rq_svc_thread); + info = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(info != NULL); + + mdt_thread_info_init(req, info); + + rc = mdt_handle0(req, info, supported); + + mdt_thread_info_fini(info); + RETURN(rc); +} + +/* + * This is called from recovery code as handler of _all_ RPC types, FLD and SEQ + * as well. + */ +int mdt_recovery_handle(struct ptlrpc_request *req) +{ + int rc; + ENTRY; + + switch (lustre_msg_get_opc(req->rq_reqmsg)) { + case FLD_QUERY: + rc = mdt_handle_common(req, mdt_fld_handlers); + break; + case SEQ_QUERY: + rc = mdt_handle_common(req, mdt_seq_handlers); + break; + default: + rc = mdt_handle_common(req, mdt_regular_handlers); + break; + } + + RETURN(rc); +} + +static int mdt_regular_handle(struct ptlrpc_request *req) +{ + return mdt_handle_common(req, mdt_regular_handlers); +} + +static int mdt_readpage_handle(struct ptlrpc_request *req) +{ + return mdt_handle_common(req, mdt_readpage_handlers); +} + +static int mdt_xmds_handle(struct ptlrpc_request *req) +{ + return mdt_handle_common(req, mdt_xmds_handlers); +} + +static int mdt_mdsc_handle(struct ptlrpc_request *req) +{ + return mdt_handle_common(req, mdt_seq_handlers); +} + +static int mdt_mdss_handle(struct ptlrpc_request *req) +{ + return mdt_handle_common(req, mdt_seq_handlers); +} + +static int mdt_dtss_handle(struct ptlrpc_request *req) +{ + return mdt_handle_common(req, mdt_seq_handlers); +} + +static int mdt_fld_handle(struct ptlrpc_request *req) +{ + return mdt_handle_common(req, mdt_fld_handlers); +} + +enum mdt_it_code { + MDT_IT_OPEN, + MDT_IT_OCREAT, + MDT_IT_CREATE, + MDT_IT_GETATTR, + MDT_IT_READDIR, + MDT_IT_LOOKUP, + MDT_IT_UNLINK, + MDT_IT_TRUNC, + MDT_IT_GETXATTR, + MDT_IT_NR +}; + +static int mdt_intent_getattr(enum mdt_it_code opcode, + struct mdt_thread_info *info, + struct ldlm_lock **, + int); +static int mdt_intent_reint(enum mdt_it_code opcode, + struct mdt_thread_info *info, + struct ldlm_lock **, + int); + +static struct mdt_it_flavor { + const struct req_format *it_fmt; + __u32 it_flags; + int (*it_act)(enum mdt_it_code , + struct mdt_thread_info *, + struct ldlm_lock **, + int); + long it_reint; +} mdt_it_flavor[] = { + [MDT_IT_OPEN] = { + .it_fmt = &RQF_LDLM_INTENT, + /*.it_flags = HABEO_REFERO,*/ + .it_flags = 0, + .it_act = mdt_intent_reint, + .it_reint = REINT_OPEN + }, + [MDT_IT_OCREAT] = { + .it_fmt = &RQF_LDLM_INTENT, + .it_flags = MUTABOR, + .it_act = mdt_intent_reint, + .it_reint = REINT_OPEN + }, + [MDT_IT_CREATE] = { + .it_fmt = &RQF_LDLM_INTENT, + .it_flags = MUTABOR, + .it_act = mdt_intent_reint, + .it_reint = REINT_CREATE + }, + [MDT_IT_GETATTR] = { + .it_fmt = &RQF_LDLM_INTENT_GETATTR, + .it_flags = HABEO_REFERO, + .it_act = mdt_intent_getattr + }, + [MDT_IT_READDIR] = { + .it_fmt = NULL, + .it_flags = 0, + .it_act = NULL + }, + [MDT_IT_LOOKUP] = { + .it_fmt = &RQF_LDLM_INTENT_GETATTR, + .it_flags = HABEO_REFERO, + .it_act = mdt_intent_getattr + }, + [MDT_IT_UNLINK] = { + .it_fmt = &RQF_LDLM_INTENT_UNLINK, + .it_flags = MUTABOR, + .it_act = NULL, + .it_reint = REINT_UNLINK + }, + [MDT_IT_TRUNC] = { + .it_fmt = NULL, + .it_flags = MUTABOR, + .it_act = NULL + }, + [MDT_IT_GETXATTR] = { + .it_fmt = NULL, + .it_flags = 0, + .it_act = NULL + } +}; + +int mdt_intent_lock_replace(struct mdt_thread_info *info, + struct ldlm_lock **lockp, + struct ldlm_lock *new_lock, + struct mdt_lock_handle *lh, + int flags) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct ldlm_lock *lock = *lockp; + + /* + * Get new lock only for cases when possible resent did not find any + * lock. + */ + if (new_lock == NULL) + new_lock = ldlm_handle2lock(&lh->mlh_reg_lh); + + if (new_lock == NULL && (flags & LDLM_FL_INTENT_ONLY)) { + lh->mlh_reg_lh.cookie = 0; + RETURN(0); + } + + LASSERTF(new_lock != NULL, + "lockh "LPX64"\n", lh->mlh_reg_lh.cookie); + + /* + * If we've already given this lock to a client once, then we should + * have no readers or writers. Otherwise, we should have one reader + * _or_ writer ref (which will be zeroed below) before returning the + * lock to a client. + */ + if (new_lock->l_export == req->rq_export) { + LASSERT(new_lock->l_readers + new_lock->l_writers == 0); + } else { + LASSERT(new_lock->l_export == NULL); + LASSERT(new_lock->l_readers + new_lock->l_writers == 1); + } + + *lockp = new_lock; + + if (new_lock->l_export == req->rq_export) { + /* + * Already gave this to the client, which means that we + * reconstructed a reply. + */ + LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & + MSG_RESENT); + lh->mlh_reg_lh.cookie = 0; + RETURN(ELDLM_LOCK_REPLACED); + } + + /* This lock might already be given to the client by an resent req, + * in this case we should return ELDLM_LOCK_ABORTED, + * so we should check led_held_locks here, but it will affect + * performance, FIXME + */ + /* Fixup the lock to be given to the client */ + lock_res_and_lock(new_lock); + new_lock->l_readers = 0; + new_lock->l_writers = 0; + + new_lock->l_export = class_export_get(req->rq_export); + spin_lock(&req->rq_export->exp_ldlm_data.led_lock); + list_add(&new_lock->l_export_chain, + &new_lock->l_export->exp_ldlm_data.led_held_locks); + spin_unlock(&req->rq_export->exp_ldlm_data.led_lock); + + new_lock->l_blocking_ast = lock->l_blocking_ast; + new_lock->l_completion_ast = lock->l_completion_ast; + new_lock->l_remote_handle = lock->l_remote_handle; + new_lock->l_flags &= ~LDLM_FL_LOCAL; + + unlock_res_and_lock(new_lock); + LDLM_LOCK_PUT(new_lock); + lh->mlh_reg_lh.cookie = 0; + + RETURN(ELDLM_LOCK_REPLACED); +} + +static void mdt_intent_fixup_resent(struct mdt_thread_info *info, + struct ldlm_lock *new_lock, + struct ldlm_lock **old_lock, + struct mdt_lock_handle *lh) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct obd_export *exp = req->rq_export; + struct lustre_handle remote_hdl; + struct ldlm_request *dlmreq; + struct list_head *iter; + + if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)) + return; + + dlmreq = req_capsule_client_get(&info->mti_pill, &RMF_DLM_REQ); + remote_hdl = dlmreq->lock_handle1; + + spin_lock(&exp->exp_ldlm_data.led_lock); + list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) { + struct ldlm_lock *lock; + lock = list_entry(iter, struct ldlm_lock, l_export_chain); + if (lock == new_lock) + continue; + if (lock->l_remote_handle.cookie == remote_hdl.cookie) { + lh->mlh_reg_lh.cookie = lock->l_handle.h_cookie; + lh->mlh_reg_mode = lock->l_granted_mode; + + LDLM_DEBUG(lock, "restoring lock cookie"); + DEBUG_REQ(D_HA, req, "restoring lock cookie "LPX64, + lh->mlh_reg_lh.cookie); + if (old_lock) + *old_lock = LDLM_LOCK_GET(lock); + spin_unlock(&exp->exp_ldlm_data.led_lock); + return; + } + } + spin_unlock(&exp->exp_ldlm_data.led_lock); + + /* + * If the xid matches, then we know this is a resent request, and allow + * it. (It's probably an OPEN, for which we don't send a lock. + */ + if (req_xid_is_last(req)) + return; + + /* + * This remote handle isn't enqueued, so we never received or processed + * this request. Clear MSG_RESENT, because it can be handled like any + * normal request now. + */ + lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT); + + DEBUG_REQ(D_HA, req, "no existing lock with rhandle "LPX64, + remote_hdl.cookie); +} + +static int mdt_intent_getattr(enum mdt_it_code opcode, + struct mdt_thread_info *info, + struct ldlm_lock **lockp, + int flags) +{ + struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_RMT]; + struct ldlm_lock *new_lock = NULL; + __u64 child_bits; + struct ldlm_reply *ldlm_rep; + struct ptlrpc_request *req; + struct mdt_body *reqbody; + struct mdt_body *repbody; + int rc; + ENTRY; + + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(reqbody); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(repbody); + + info->mti_spec.sp_ck_split = !!(reqbody->valid & OBD_MD_FLCKSPLIT); + info->mti_cross_ref = !!(reqbody->valid & OBD_MD_FLCROSSREF); + repbody->eadatasize = 0; + repbody->aclsize = 0; + + switch (opcode) { + case MDT_IT_LOOKUP: + child_bits = MDS_INODELOCK_LOOKUP; + break; + case MDT_IT_GETATTR: + child_bits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE; + break; + default: + CERROR("Unhandled till now"); + GOTO(out, rc = -EINVAL); + } + + rc = mdt_init_ucred(info, reqbody); + if (rc) + GOTO(out, rc); + + req = info->mti_pill.rc_req; + ldlm_rep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP); + mdt_set_disposition(info, ldlm_rep, DISP_IT_EXECD); + + /* Get lock from request for possible resent case. */ + mdt_intent_fixup_resent(info, *lockp, &new_lock, lhc); + + ldlm_rep->lock_policy_res2 = + mdt_getattr_name_lock(info, lhc, child_bits, ldlm_rep); + + if (mdt_get_disposition(ldlm_rep, DISP_LOOKUP_NEG)) + ldlm_rep->lock_policy_res2 = 0; + if (!mdt_get_disposition(ldlm_rep, DISP_LOOKUP_POS) || + ldlm_rep->lock_policy_res2) { + lhc->mlh_reg_lh.cookie = 0ull; + GOTO(out_ucred, rc = ELDLM_LOCK_ABORTED); + } + + rc = mdt_intent_lock_replace(info, lockp, new_lock, lhc, flags); + EXIT; +out_ucred: + mdt_exit_ucred(info); +out: + mdt_shrink_reply(info); + return rc; +} + +static int mdt_intent_reint(enum mdt_it_code opcode, + struct mdt_thread_info *info, + struct ldlm_lock **lockp, + int flags) +{ + struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_RMT]; + struct ldlm_reply *rep = NULL; + long opc; + int rc; + + static const struct req_format *intent_fmts[REINT_MAX] = { + [REINT_CREATE] = &RQF_LDLM_INTENT_CREATE, + [REINT_OPEN] = &RQF_LDLM_INTENT_OPEN + }; + + ENTRY; + + opc = mdt_reint_opcode(info, intent_fmts); + if (opc < 0) + GOTO(out, rc = opc); + + if (mdt_it_flavor[opcode].it_reint != opc) { + CERROR("Reint code %ld doesn't match intent: %d\n", + opc, opcode); + GOTO(out, rc = err_serious(-EPROTO)); + } + + /* Get lock from request for possible resent case. */ + mdt_intent_fixup_resent(info, *lockp, NULL, lhc); + + rc = mdt_reint_internal(info, lhc, opc); + + /* Check whether the reply has been packed successfully. */ + if (mdt_info_req(info)->rq_repmsg != NULL) + rep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP); + if (rep == NULL) + GOTO(out, rc = err_serious(-EFAULT)); + + /* MDC expects this in any case */ + if (rc != 0) + mdt_set_disposition(info, rep, DISP_LOOKUP_EXECD); + + /* Cross-ref case, the lock should be returned to the client */ + if (rc == -EREMOTE) { + LASSERT(lustre_handle_is_used(&lhc->mlh_reg_lh)); + rep->lock_policy_res2 = 0; + rc = mdt_intent_lock_replace(info, lockp, NULL, lhc, flags); + GOTO(out, rc); + } + rep->lock_policy_res2 = clear_serious(rc); + + lhc->mlh_reg_lh.cookie = 0ull; + rc = ELDLM_LOCK_ABORTED; + EXIT; +out: + return rc; +} + +static int mdt_intent_code(long itcode) +{ + int rc; + + switch(itcode) { + case IT_OPEN: + rc = MDT_IT_OPEN; + break; + case IT_OPEN|IT_CREAT: + rc = MDT_IT_OCREAT; + break; + case IT_CREAT: + rc = MDT_IT_CREATE; + break; + case IT_READDIR: + rc = MDT_IT_READDIR; + break; + case IT_GETATTR: + rc = MDT_IT_GETATTR; + break; + case IT_LOOKUP: + rc = MDT_IT_LOOKUP; + break; + case IT_UNLINK: + rc = MDT_IT_UNLINK; + break; + case IT_TRUNC: + rc = MDT_IT_TRUNC; + break; + case IT_GETXATTR: + rc = MDT_IT_GETXATTR; + break; + default: + CERROR("Unknown intent opcode: %ld\n", itcode); + rc = -EINVAL; + break; + } + return rc; +} + +static int mdt_intent_opc(long itopc, struct mdt_thread_info *info, + struct ldlm_lock **lockp, int flags) +{ + struct req_capsule *pill; + struct mdt_it_flavor *flv; + int opc; + int rc; + ENTRY; + + opc = mdt_intent_code(itopc); + if (opc < 0) + RETURN(-EINVAL); + + pill = &info->mti_pill; + flv = &mdt_it_flavor[opc]; + + if (flv->it_fmt != NULL) + req_capsule_extend(pill, flv->it_fmt); + + rc = mdt_unpack_req_pack_rep(info, flv->it_flags); + if (rc == 0) { + struct ptlrpc_request *req = mdt_info_req(info); + if (flv->it_flags & MUTABOR && + req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) + rc = -EROFS; + } + if (rc == 0 && flv->it_act != NULL) { + /* execute policy */ + rc = flv->it_act(opc, info, lockp, flags); + } else + rc = -EOPNOTSUPP; + RETURN(rc); +} + +static int mdt_intent_policy(struct ldlm_namespace *ns, + struct ldlm_lock **lockp, void *req_cookie, + ldlm_mode_t mode, int flags, void *data) +{ + struct mdt_thread_info *info; + struct ptlrpc_request *req = req_cookie; + struct ldlm_intent *it; + struct req_capsule *pill; + int rc; + + ENTRY; + + LASSERT(req != NULL); + + info = lu_context_key_get(&req->rq_svc_thread->t_env->le_ctx, + &mdt_thread_key); + LASSERT(info != NULL); + pill = &info->mti_pill; + LASSERT(pill->rc_req == req); + + if (req->rq_reqmsg->lm_bufcount > DLM_INTENT_IT_OFF) { + req_capsule_extend(pill, &RQF_LDLM_INTENT); + it = req_capsule_client_get(pill, &RMF_LDLM_INTENT); + if (it != NULL) { + const struct ldlm_request *dlmreq; + __u64 req_bits; +#if 0 + struct ldlm_lock *lock = *lockp; + + LDLM_DEBUG(lock, "intent policy opc: %s\n", + ldlm_it2str(it->opc)); +#endif + + rc = mdt_intent_opc(it->opc, info, lockp, flags); + if (rc == 0) + rc = ELDLM_OK; + + /* + * Lock without inodebits makes no sense and will oops + * later in ldlm. Let's check it now to see if we have + * wrong lock from client or bits get corrupted + * somewhere in mdt_intent_opc(). + */ + dlmreq = info->mti_dlm_req; + req_bits = dlmreq->lock_desc.l_policy_data.l_inodebits.bits; + LASSERT(req_bits != 0); + + } else + rc = err_serious(-EFAULT); + } else { + /* No intent was provided */ + LASSERT(pill->rc_fmt == &RQF_LDLM_ENQUEUE); + rc = req_capsule_pack(pill); + if (rc) + rc = err_serious(rc); + } + RETURN(rc); +} + +/* + * Seq wrappers + */ +static void mdt_seq_adjust(const struct lu_env *env, + struct mdt_device *m, int lost) +{ + struct lu_site *ls = m->mdt_md_dev.md_lu_dev.ld_site; + struct lu_range out; + ENTRY; + + LASSERT(ls && ls->ls_server_seq); + LASSERT(lost >= 0); + /* get extra seq from seq_server, moving it's range up */ + while (lost-- > 0) { + seq_server_alloc_meta(ls->ls_server_seq, NULL, &out, env); + } + EXIT; +} + +static int mdt_seq_fini(const struct lu_env *env, + struct mdt_device *m) +{ + struct lu_site *ls = m->mdt_md_dev.md_lu_dev.ld_site; + ENTRY; + + if (ls && ls->ls_server_seq) { + seq_server_fini(ls->ls_server_seq, env); + OBD_FREE_PTR(ls->ls_server_seq); + ls->ls_server_seq = NULL; + } + + if (ls && ls->ls_control_seq) { + seq_server_fini(ls->ls_control_seq, env); + OBD_FREE_PTR(ls->ls_control_seq); + ls->ls_control_seq = NULL; + } + + if (ls && ls->ls_client_seq) { + seq_client_fini(ls->ls_client_seq); + OBD_FREE_PTR(ls->ls_client_seq); + ls->ls_client_seq = NULL; + } + + RETURN(0); +} + +static int mdt_seq_init(const struct lu_env *env, + const char *uuid, + struct mdt_device *m) +{ + struct lu_site *ls; + char *prefix; + int rc; + ENTRY; + + ls = m->mdt_md_dev.md_lu_dev.ld_site; + + /* + * This is sequence-controller node. Init seq-controller server on local + * MDT. + */ + if (ls->ls_node_id == 0) { + LASSERT(ls->ls_control_seq == NULL); + + OBD_ALLOC_PTR(ls->ls_control_seq); + if (ls->ls_control_seq == NULL) + RETURN(-ENOMEM); + + rc = seq_server_init(ls->ls_control_seq, + m->mdt_bottom, uuid, + LUSTRE_SEQ_CONTROLLER, + env); + + if (rc) + GOTO(out_seq_fini, rc); + + OBD_ALLOC_PTR(ls->ls_client_seq); + if (ls->ls_client_seq == NULL) + GOTO(out_seq_fini, rc = -ENOMEM); + + OBD_ALLOC(prefix, MAX_OBD_NAME + 5); + if (prefix == NULL) { + OBD_FREE_PTR(ls->ls_client_seq); + GOTO(out_seq_fini, rc = -ENOMEM); + } + + snprintf(prefix, MAX_OBD_NAME + 5, "ctl-%s", + uuid); + + /* + * Init seq-controller client after seq-controller server is + * ready. Pass ls->ls_control_seq to it for direct talking. + */ + rc = seq_client_init(ls->ls_client_seq, NULL, + LUSTRE_SEQ_METADATA, prefix, + ls->ls_control_seq); + OBD_FREE(prefix, MAX_OBD_NAME + 5); + + if (rc) + GOTO(out_seq_fini, rc); + } + + /* Init seq-server on local MDT */ + LASSERT(ls->ls_server_seq == NULL); + + OBD_ALLOC_PTR(ls->ls_server_seq); + if (ls->ls_server_seq == NULL) + GOTO(out_seq_fini, rc = -ENOMEM); + + rc = seq_server_init(ls->ls_server_seq, + m->mdt_bottom, uuid, + LUSTRE_SEQ_SERVER, + env); + if (rc) + GOTO(out_seq_fini, rc = -ENOMEM); + + /* Assign seq-controller client to local seq-server. */ + if (ls->ls_node_id == 0) { + LASSERT(ls->ls_client_seq != NULL); + + rc = seq_server_set_cli(ls->ls_server_seq, + ls->ls_client_seq, + env); + } + + EXIT; +out_seq_fini: + if (rc) + mdt_seq_fini(env, m); + + return rc; +} +/* + * Init client sequence manager which is used by local MDS to talk to sequence + * controller on remote node. + */ +static int mdt_seq_init_cli(const struct lu_env *env, + struct mdt_device *m, + struct lustre_cfg *cfg) +{ + struct lu_site *ls = m->mdt_md_dev.md_lu_dev.ld_site; + struct obd_device *mdc; + struct obd_uuid *uuidp, *mdcuuidp; + char *uuid_str, *mdc_uuid_str; + int rc; + int index; + struct mdt_thread_info *info; + char *p, *index_string = lustre_cfg_string(cfg, 2); + ENTRY; + + info = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + uuidp = &info->mti_u.uuid[0]; + mdcuuidp = &info->mti_u.uuid[1]; + + LASSERT(index_string); + + index = simple_strtol(index_string, &p, 10); + if (*p) { + CERROR("Invalid index in lustre_cgf, offset 2\n"); + RETURN(-EINVAL); + } + + /* check if this is adding the first MDC and controller is not yet + * initialized. */ + if (index != 0 || ls->ls_client_seq) + RETURN(0); + + uuid_str = lustre_cfg_string(cfg, 1); + mdc_uuid_str = lustre_cfg_string(cfg, 4); + obd_str2uuid(uuidp, uuid_str); + obd_str2uuid(mdcuuidp, mdc_uuid_str); + + mdc = class_find_client_obd(uuidp, LUSTRE_MDC_NAME, mdcuuidp); + if (!mdc) { + CERROR("can't find controller MDC by uuid %s\n", + uuid_str); + rc = -ENOENT; + } else if (!mdc->obd_set_up) { + CERROR("target %s not set up\n", mdc->obd_name); + rc = -EINVAL; + } else { + LASSERT(ls->ls_control_exp); + OBD_ALLOC_PTR(ls->ls_client_seq); + if (ls->ls_client_seq != NULL) { + char *prefix; + + OBD_ALLOC(prefix, MAX_OBD_NAME + 5); + if (!prefix) + RETURN(-ENOMEM); + + snprintf(prefix, MAX_OBD_NAME + 5, "ctl-%s", + mdc->obd_name); + + rc = seq_client_init(ls->ls_client_seq, + ls->ls_control_exp, + LUSTRE_SEQ_METADATA, + prefix, NULL); + OBD_FREE(prefix, MAX_OBD_NAME + 5); + } else + rc = -ENOMEM; + + if (rc) + RETURN(rc); + + LASSERT(ls->ls_server_seq != NULL); + rc = seq_server_set_cli(ls->ls_server_seq, ls->ls_client_seq, + env); + } + + RETURN(rc); +} + +static void mdt_seq_fini_cli(struct mdt_device *m) +{ + struct lu_site *ls; + + ENTRY; + + ls = m->mdt_md_dev.md_lu_dev.ld_site; + + if (ls && ls->ls_server_seq) + seq_server_set_cli(ls->ls_server_seq, + NULL, NULL); + + if (ls && ls->ls_control_exp) { + class_export_put(ls->ls_control_exp); + ls->ls_control_exp = NULL; + } + EXIT; +} + +/* + * FLD wrappers + */ +static int mdt_fld_fini(const struct lu_env *env, + struct mdt_device *m) +{ + struct lu_site *ls = m->mdt_md_dev.md_lu_dev.ld_site; + ENTRY; + + if (ls && ls->ls_server_fld) { + fld_server_fini(ls->ls_server_fld, env); + OBD_FREE_PTR(ls->ls_server_fld); + ls->ls_server_fld = NULL; + } + + RETURN(0); +} + +static int mdt_fld_init(const struct lu_env *env, + const char *uuid, + struct mdt_device *m) +{ + struct lu_site *ls; + int rc; + ENTRY; + + ls = m->mdt_md_dev.md_lu_dev.ld_site; + + OBD_ALLOC_PTR(ls->ls_server_fld); + if (ls->ls_server_fld == NULL) + RETURN(rc = -ENOMEM); + + rc = fld_server_init(ls->ls_server_fld, + m->mdt_bottom, uuid, env); + if (rc) { + OBD_FREE_PTR(ls->ls_server_fld); + ls->ls_server_fld = NULL; + RETURN(rc); + } + + RETURN(0); +} + +/* device init/fini methods */ +static void mdt_stop_ptlrpc_service(struct mdt_device *m) +{ + ENTRY; + if (m->mdt_regular_service != NULL) { + ptlrpc_unregister_service(m->mdt_regular_service); + m->mdt_regular_service = NULL; + } + if (m->mdt_readpage_service != NULL) { + ptlrpc_unregister_service(m->mdt_readpage_service); + m->mdt_readpage_service = NULL; + } + if (m->mdt_xmds_service != NULL) { + ptlrpc_unregister_service(m->mdt_xmds_service); + m->mdt_xmds_service = NULL; + } + if (m->mdt_setattr_service != NULL) { + ptlrpc_unregister_service(m->mdt_setattr_service); + m->mdt_setattr_service = NULL; + } + if (m->mdt_mdsc_service != NULL) { + ptlrpc_unregister_service(m->mdt_mdsc_service); + m->mdt_mdsc_service = NULL; + } + if (m->mdt_mdss_service != NULL) { + ptlrpc_unregister_service(m->mdt_mdss_service); + m->mdt_mdss_service = NULL; + } + if (m->mdt_dtss_service != NULL) { + ptlrpc_unregister_service(m->mdt_dtss_service); + m->mdt_dtss_service = NULL; + } + if (m->mdt_fld_service != NULL) { + ptlrpc_unregister_service(m->mdt_fld_service); + m->mdt_fld_service = NULL; + } + ENTRY; +} + +static int mdt_start_ptlrpc_service(struct mdt_device *m) +{ + int rc; + static struct ptlrpc_service_conf conf; + cfs_proc_dir_entry_t *procfs_entry; + ENTRY; + + procfs_entry = m->mdt_md_dev.md_lu_dev.ld_obd->obd_proc_entry; + + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = MDS_MAXREQSIZE, + .psc_max_reply_size = MDS_MAXREPSIZE, + .psc_req_portal = MDS_REQUEST_PORTAL, + .psc_rep_portal = MDC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + /* + * We'd like to have a mechanism to set this on a per-device + * basis, but alas... + */ + .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS), + MDT_MAX_THREADS), + .psc_max_threads = MDT_MAX_THREADS, + .psc_ctx_tags = LCT_MD_THREAD + }; + + m->mdt_ldlm_client = &m->mdt_md_dev.md_lu_dev.ld_obd->obd_ldlm_client; + ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, + "mdt_ldlm_client", m->mdt_ldlm_client); + + m->mdt_regular_service = + ptlrpc_init_svc_conf(&conf, mdt_regular_handle, LUSTRE_MDT_NAME, + procfs_entry, NULL, LUSTRE_MDT_NAME); + if (m->mdt_regular_service == NULL) + RETURN(-ENOMEM); + + rc = ptlrpc_start_threads(NULL, m->mdt_regular_service); + if (rc) + GOTO(err_mdt_svc, rc); + + /* + * readpage service configuration. Parameters have to be adjusted, + * ideally. + */ + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = MDS_MAXREQSIZE, + .psc_max_reply_size = MDS_MAXREPSIZE, + .psc_req_portal = MDS_READPAGE_PORTAL, + .psc_rep_portal = MDC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS), + MDT_MAX_THREADS), + .psc_max_threads = MDT_MAX_THREADS, + .psc_ctx_tags = LCT_MD_THREAD + }; + m->mdt_readpage_service = + ptlrpc_init_svc_conf(&conf, mdt_readpage_handle, + LUSTRE_MDT_NAME "_readpage", + procfs_entry, NULL, "mdt_rdpg"); + + if (m->mdt_readpage_service == NULL) { + CERROR("failed to start readpage service\n"); + GOTO(err_mdt_svc, rc = -ENOMEM); + } + + rc = ptlrpc_start_threads(NULL, m->mdt_readpage_service); + + /* + * setattr service configuration. + */ + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = MDS_MAXREQSIZE, + .psc_max_reply_size = MDS_MAXREPSIZE, + .psc_req_portal = MDS_SETATTR_PORTAL, + .psc_rep_portal = MDC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS), + MDT_MAX_THREADS), + .psc_max_threads = MDT_MAX_THREADS, + .psc_ctx_tags = LCT_MD_THREAD + }; + + m->mdt_setattr_service = + ptlrpc_init_svc_conf(&conf, mdt_regular_handle, + LUSTRE_MDT_NAME "_setattr", + procfs_entry, NULL, "mdt_attr"); + + if (!m->mdt_setattr_service) { + CERROR("failed to start setattr service\n"); + GOTO(err_mdt_svc, rc = -ENOMEM); + } + + rc = ptlrpc_start_threads(NULL, m->mdt_setattr_service); + if (rc) + GOTO(err_mdt_svc, rc); + + /* + * sequence controller service configuration + */ + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = SEQ_MAXREQSIZE, + .psc_max_reply_size = SEQ_MAXREPSIZE, + .psc_req_portal = SEQ_CONTROLLER_PORTAL, + .psc_rep_portal = MDC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + .psc_min_threads = SEQ_NUM_THREADS, + .psc_max_threads = SEQ_NUM_THREADS, + .psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD + }; + + m->mdt_mdsc_service = + ptlrpc_init_svc_conf(&conf, mdt_mdsc_handle, + LUSTRE_MDT_NAME"_mdsc", + procfs_entry, NULL, "mdt_mdsc"); + if (!m->mdt_mdsc_service) { + CERROR("failed to start seq controller service\n"); + GOTO(err_mdt_svc, rc = -ENOMEM); + } + + rc = ptlrpc_start_threads(NULL, m->mdt_mdsc_service); + if (rc) + GOTO(err_mdt_svc, rc); + + /* + * metadata sequence server service configuration + */ + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = SEQ_MAXREQSIZE, + .psc_max_reply_size = SEQ_MAXREPSIZE, + .psc_req_portal = SEQ_METADATA_PORTAL, + .psc_rep_portal = MDC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + .psc_min_threads = SEQ_NUM_THREADS, + .psc_max_threads = SEQ_NUM_THREADS, + .psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD + }; + + m->mdt_mdss_service = + ptlrpc_init_svc_conf(&conf, mdt_mdss_handle, + LUSTRE_MDT_NAME"_mdss", + procfs_entry, NULL, "mdt_mdss"); + if (!m->mdt_mdss_service) { + CERROR("failed to start metadata seq server service\n"); + GOTO(err_mdt_svc, rc = -ENOMEM); + } + + rc = ptlrpc_start_threads(NULL, m->mdt_mdss_service); + if (rc) + GOTO(err_mdt_svc, rc); + + + /* + * Data sequence server service configuration. We want to have really + * cluster-wide sequences space. This is why we start only one sequence + * controller which manages space. + */ + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = SEQ_MAXREQSIZE, + .psc_max_reply_size = SEQ_MAXREPSIZE, + .psc_req_portal = SEQ_DATA_PORTAL, + .psc_rep_portal = OSC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + .psc_min_threads = SEQ_NUM_THREADS, + .psc_max_threads = SEQ_NUM_THREADS, + .psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD + }; + + m->mdt_dtss_service = + ptlrpc_init_svc_conf(&conf, mdt_dtss_handle, + LUSTRE_MDT_NAME"_dtss", + procfs_entry, NULL, "mdt_dtss"); + if (!m->mdt_dtss_service) { + CERROR("failed to start data seq server service\n"); + GOTO(err_mdt_svc, rc = -ENOMEM); + } + + rc = ptlrpc_start_threads(NULL, m->mdt_dtss_service); + if (rc) + GOTO(err_mdt_svc, rc); + + /* FLD service start */ + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = FLD_MAXREQSIZE, + .psc_max_reply_size = FLD_MAXREPSIZE, + .psc_req_portal = FLD_REQUEST_PORTAL, + .psc_rep_portal = MDC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + .psc_min_threads = FLD_NUM_THREADS, + .psc_max_threads = FLD_NUM_THREADS, + .psc_ctx_tags = LCT_DT_THREAD|LCT_MD_THREAD + }; + + m->mdt_fld_service = + ptlrpc_init_svc_conf(&conf, mdt_fld_handle, + LUSTRE_MDT_NAME"_fld", + procfs_entry, NULL, "mdt_fld"); + if (!m->mdt_fld_service) { + CERROR("failed to start fld service\n"); + GOTO(err_mdt_svc, rc = -ENOMEM); + } + + rc = ptlrpc_start_threads(NULL, m->mdt_fld_service); + if (rc) + GOTO(err_mdt_svc, rc); + + /* + * mds-mds service configuration. Separate portal is used to allow + * mds-mds requests be not blocked during recovery. + */ + conf = (typeof(conf)) { + .psc_nbufs = MDS_NBUFS, + .psc_bufsize = MDS_BUFSIZE, + .psc_max_req_size = MDS_MAXREQSIZE, + .psc_max_reply_size = MDS_MAXREPSIZE, + .psc_req_portal = MDS_MDS_PORTAL, + .psc_rep_portal = MDC_REPLY_PORTAL, + .psc_watchdog_timeout = MDT_SERVICE_WATCHDOG_TIMEOUT, + .psc_min_threads = min(max(mdt_num_threads, MDT_MIN_THREADS), + MDT_MAX_THREADS), + .psc_max_threads = MDT_MAX_THREADS, + .psc_ctx_tags = LCT_MD_THREAD + }; + m->mdt_xmds_service = ptlrpc_init_svc_conf(&conf, mdt_xmds_handle, + LUSTRE_MDT_NAME "_mds", + procfs_entry, NULL, "mdt_xmds"); + + if (m->mdt_xmds_service == NULL) { + CERROR("failed to start readpage service\n"); + GOTO(err_mdt_svc, rc = -ENOMEM); + } + + rc = ptlrpc_start_threads(NULL, m->mdt_xmds_service); + if (rc) + GOTO(err_mdt_svc, rc); + + EXIT; +err_mdt_svc: + if (rc) + mdt_stop_ptlrpc_service(m); + + return rc; +} + +static void mdt_stack_fini(const struct lu_env *env, + struct mdt_device *m, struct lu_device *top) +{ + struct lu_device *d = top, *n; + struct obd_device *obd = m->mdt_md_dev.md_lu_dev.ld_obd; + struct lustre_cfg_bufs *bufs; + struct lustre_cfg *lcfg; + struct mdt_thread_info *info; + char flags[3]=""; + ENTRY; + + info = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(info != NULL); + + bufs = &info->mti_u.bufs; + /* process cleanup, pass mdt obd name to get obd umount flags */ + lustre_cfg_bufs_reset(bufs, obd->obd_name); + if (obd->obd_force) + strcat(flags, "F"); + if (obd->obd_fail) + strcat(flags, "A"); + lustre_cfg_bufs_set_string(bufs, 1, flags); + lcfg = lustre_cfg_new(LCFG_CLEANUP, bufs); + if (!lcfg) { + CERROR("Cannot alloc lcfg!\n"); + return; + } + + LASSERT(top); + top->ld_ops->ldo_process_config(env, top, lcfg); + lustre_cfg_free(lcfg); + + lu_site_purge(env, top->ld_site, ~0); + while (d != NULL) { + struct obd_type *type; + struct lu_device_type *ldt = d->ld_type; + + /* each fini() returns next device in stack of layers + * * so we can avoid the recursion */ + n = ldt->ldt_ops->ldto_device_fini(env, d); + lu_device_put(d); + ldt->ldt_ops->ldto_device_free(env, d); + type = ldt->ldt_obd_type; + type->typ_refcnt--; + class_put_type(type); + + /* switch to the next device in the layer */ + d = n; + } + m->mdt_child = NULL; + m->mdt_bottom = NULL; +} + +static struct lu_device *mdt_layer_setup(const struct lu_env *env, + const char *typename, + struct lu_device *child, + struct lustre_cfg *cfg) +{ + const char *dev = lustre_cfg_string(cfg, 0); + struct obd_type *type; + struct lu_device_type *ldt; + struct lu_device *d; + int rc; + ENTRY; + + /* find the type */ + type = class_get_type(typename); + if (!type) { + CERROR("Unknown type: '%s'\n", typename); + GOTO(out, rc = -ENODEV); + } + + rc = lu_context_refill(&env->le_ctx); + if (rc != 0) { + CERROR("Failure to refill context: '%d'\n", rc); + GOTO(out_type, rc); + } + + if (env->le_ses != NULL) { + rc = lu_context_refill(env->le_ses); + if (rc != 0) { + CERROR("Failure to refill session: '%d'\n", rc); + GOTO(out_type, rc); + } + } + + ldt = type->typ_lu; + if (ldt == NULL) { + CERROR("type: '%s'\n", typename); + GOTO(out_type, rc = -EINVAL); + } + + ldt->ldt_obd_type = type; + d = ldt->ldt_ops->ldto_device_alloc(env, ldt, cfg); + if (IS_ERR(d)) { + CERROR("Cannot allocate device: '%s'\n", typename); + GOTO(out_type, rc = -ENODEV); + } + + LASSERT(child->ld_site); + d->ld_site = child->ld_site; + + type->typ_refcnt++; + rc = ldt->ldt_ops->ldto_device_init(env, d, dev, child); + if (rc) { + CERROR("can't init device '%s', rc %d\n", typename, rc); + GOTO(out_alloc, rc); + } + lu_device_get(d); + + RETURN(d); + +out_alloc: + ldt->ldt_ops->ldto_device_free(env, d); + type->typ_refcnt--; +out_type: + class_put_type(type); +out: + return ERR_PTR(rc); +} + +static int mdt_stack_init(const struct lu_env *env, + struct mdt_device *m, struct lustre_cfg *cfg) +{ + struct lu_device *d = &m->mdt_md_dev.md_lu_dev; + struct lu_device *tmp; + struct md_device *md; + int rc; + ENTRY; + + /* init the stack */ + tmp = mdt_layer_setup(env, LUSTRE_OSD_NAME, d, cfg); + if (IS_ERR(tmp)) { + RETURN(PTR_ERR(tmp)); + } + m->mdt_bottom = lu2dt_dev(tmp); + d = tmp; + tmp = mdt_layer_setup(env, LUSTRE_MDD_NAME, d, cfg); + if (IS_ERR(tmp)) { + GOTO(out, rc = PTR_ERR(tmp)); + } + d = tmp; + md = lu2md_dev(d); + + tmp = mdt_layer_setup(env, LUSTRE_CMM_NAME, d, cfg); + if (IS_ERR(tmp)) { + GOTO(out, rc = PTR_ERR(tmp)); + } + d = tmp; + /*set mdd upcall device*/ + md_upcall_dev_set(md, lu2md_dev(d)); + + md = lu2md_dev(d); + /*set cmm upcall device*/ + md_upcall_dev_set(md, &m->mdt_md_dev); + + m->mdt_child = lu2md_dev(d); + + /* process setup config */ + tmp = &m->mdt_md_dev.md_lu_dev; + rc = tmp->ld_ops->ldo_process_config(env, tmp, cfg); + GOTO(out, rc); +out: + /* fini from last known good lu_device */ + if (rc) + mdt_stack_fini(env, m, d); + + return rc; +} + +static void mdt_fini(const struct lu_env *env, struct mdt_device *m) +{ + struct md_device *next = m->mdt_child; + struct lu_device *d = &m->mdt_md_dev.md_lu_dev; + struct lu_site *ls = d->ld_site; + struct obd_device *obd = m->mdt_md_dev.md_lu_dev.ld_obd; + ENTRY; + + ping_evictor_stop(); + + target_recovery_fini(obd); + mdt_stop_ptlrpc_service(m); + + mdt_fs_cleanup(env, m); + + upcall_cache_cleanup(m->mdt_rmtacl_cache); + m->mdt_rmtacl_cache = NULL; + + upcall_cache_cleanup(m->mdt_identity_cache); + m->mdt_identity_cache = NULL; + + if (m->mdt_namespace != NULL) { + ldlm_namespace_free(m->mdt_namespace, d->ld_obd->obd_force); + d->ld_obd->obd_namespace = m->mdt_namespace = NULL; + } + + mdt_seq_fini(env, m); + mdt_seq_fini_cli(m); + mdt_fld_fini(env, m); + mdt_procfs_fini(m); + ptlrpc_lprocfs_unregister_obd(d->ld_obd); + lprocfs_obd_cleanup(d->ld_obd); + + if (m->mdt_rootsquash_info) { + OBD_FREE_PTR(m->mdt_rootsquash_info); + m->mdt_rootsquash_info = NULL; + } + + next->md_ops->mdo_init_capa_ctxt(env, next, 0, 0, 0, NULL); + del_timer(&m->mdt_ck_timer); + mdt_ck_thread_stop(m); + + /* finish the stack */ + mdt_stack_fini(env, m, md2lu_dev(m->mdt_child)); + + if (ls) { + if (!list_empty(&ls->ls_lru) || ls->ls_total != 0) { + /* + * Uh-oh, objects still exist. + */ + static DECLARE_LU_CDEBUG_PRINT_INFO(cookie, D_ERROR); + + lu_site_print(env, ls, &cookie, lu_cdebug_printer); + } + + lu_site_fini(ls); + OBD_FREE_PTR(ls); + d->ld_site = NULL; + } + LASSERT(atomic_read(&d->ld_ref) == 0); + md_device_fini(&m->mdt_md_dev); + + EXIT; +} + +static void fsoptions_to_mdt_flags(struct mdt_device *m, char *options) +{ + char *p = options; + + if (!options) + return; + + while (*options) { + int len; + + while (*p && *p != ',') + p++; + + len = p - options; + if ((len == sizeof("user_xattr") - 1) && + (memcmp(options, "user_xattr", len) == 0)) { + m->mdt_opts.mo_user_xattr = 1; + LCONSOLE_INFO("Enabling user_xattr\n"); + } else if ((len == sizeof("nouser_xattr") - 1) && + (memcmp(options, "nouser_xattr", len) == 0)) { + m->mdt_opts.mo_user_xattr = 0; + LCONSOLE_INFO("Disabling user_xattr\n"); + } else if ((len == sizeof("acl") - 1) && + (memcmp(options, "acl", len) == 0)) { +#ifdef CONFIG_FS_POSIX_ACL + m->mdt_opts.mo_acl = 1; + LCONSOLE_INFO("Enabling ACL\n"); +#else + m->mdt_opts.mo_acl = 0; + CWARN("ignoring unsupported acl mount option\n"); + LCONSOLE_INFO("Disabling ACL\n"); +#endif + } else if ((len == sizeof("noacl") - 1) && + (memcmp(options, "noacl", len) == 0)) { + m->mdt_opts.mo_acl = 0; + LCONSOLE_INFO("Disabling ACL\n"); + } + + options = ++p; + } +} + +int mdt_postrecov(const struct lu_env *, struct mdt_device *); + +static int mdt_init0(const struct lu_env *env, struct mdt_device *m, + struct lu_device_type *ldt, struct lustre_cfg *cfg) +{ + struct lprocfs_static_vars lvars; + struct mdt_thread_info *info; + struct obd_device *obd; + const char *dev = lustre_cfg_string(cfg, 0); + const char *num = lustre_cfg_string(cfg, 2); + struct lustre_mount_info *lmi; + struct lustre_sb_info *lsi; + struct lu_site *s; + int rc; + ENTRY; + + info = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(info != NULL); + + obd = class_name2obd(dev); + LASSERT(obd != NULL); + + spin_lock_init(&m->mdt_transno_lock); + + m->mdt_max_mdsize = MAX_MD_SIZE; + m->mdt_max_cookiesize = sizeof(struct llog_cookie); + + m->mdt_opts.mo_user_xattr = 0; + m->mdt_opts.mo_acl = 0; + lmi = server_get_mount_2(dev); + if (lmi == NULL) { + CERROR("Cannot get mount info for %s!\n", dev); + RETURN(-EFAULT); + } else { + lsi = s2lsi(lmi->lmi_sb); + fsoptions_to_mdt_flags(m, lsi->lsi_lmd->lmd_opts); + server_put_mount_2(dev, lmi->lmi_mnt); + } + + spin_lock_init(&m->mdt_ioepoch_lock); + m->mdt_opts.mo_compat_resname = 0; + m->mdt_capa_timeout = CAPA_TIMEOUT; + m->mdt_capa_alg = CAPA_HMAC_ALG_SHA1; + m->mdt_ck_timeout = CAPA_KEY_TIMEOUT; + + spin_lock_init(&m->mdt_client_bitmap_lock); + + OBD_ALLOC_PTR(s); + if (s == NULL) + RETURN(-ENOMEM); + + md_device_init(&m->mdt_md_dev, ldt); + m->mdt_md_dev.md_lu_dev.ld_ops = &mdt_lu_ops; + m->mdt_md_dev.md_lu_dev.ld_obd = obd; + /* set this lu_device to obd, because error handling need it */ + obd->obd_lu_dev = &m->mdt_md_dev.md_lu_dev; + + rc = lu_site_init(s, &m->mdt_md_dev.md_lu_dev); + if (rc) { + CERROR("Can't init lu_site, rc %d\n", rc); + GOTO(err_free_site, rc); + } + + lprocfs_init_vars(mdt, &lvars); + rc = lprocfs_obd_setup(obd, lvars.obd_vars); + if (rc) { + CERROR("Can't init lprocfs, rc %d\n", rc); + GOTO(err_fini_site, rc); + } + ptlrpc_lprocfs_register_obd(obd); + + rc = mdt_procfs_init(m, dev); + if (rc) { + CERROR("Can't init MDT lprocfs, rc %d\n", rc); + GOTO(err_fini_proc, rc); + } + + /* set server index */ + LASSERT(num); + s->ls_node_id = simple_strtol(num, NULL, 10); + + /* failover is the default + * FIXME: we do not failout mds0/mgs, which may cause some problems. + * assumed whose ls_node_id == 0 XXX + * */ + obd->obd_replayable = 1; + /* No connection accepted until configurations will finish */ + obd->obd_no_conn = 1; + + if (cfg->lcfg_bufcount > 4 && LUSTRE_CFG_BUFLEN(cfg, 4) > 0) { + char *str = lustre_cfg_string(cfg, 4); + if (strchr(str, 'n')) { + CWARN("%s: recovery disabled\n", obd->obd_name); + obd->obd_replayable = 0; + } + } + + /* init the stack */ + rc = mdt_stack_init(env, m, cfg); + if (rc) { + CERROR("Can't init device stack, rc %d\n", rc); + GOTO(err_fini_proc, rc); + } + + rc = mdt_fld_init(env, obd->obd_name, m); + if (rc) + GOTO(err_fini_stack, rc); + + rc = mdt_seq_init(env, obd->obd_name, m); + if (rc) + GOTO(err_fini_fld, rc); + + snprintf(info->mti_u.ns_name, sizeof info->mti_u.ns_name, + LUSTRE_MDT_NAME"-%p", m); + m->mdt_namespace = ldlm_namespace_new(info->mti_u.ns_name, + LDLM_NAMESPACE_SERVER); + if (m->mdt_namespace == NULL) + GOTO(err_fini_seq, rc = -ENOMEM); + + ldlm_register_intent(m->mdt_namespace, mdt_intent_policy); + /* set obd_namespace for compatibility with old code */ + obd->obd_namespace = m->mdt_namespace; + + m->mdt_identity_cache = upcall_cache_init(obd->obd_name, + "NONE", + &mdt_identity_upcall_cache_ops); + if (IS_ERR(m->mdt_identity_cache)) { + rc = PTR_ERR(m->mdt_identity_cache); + m->mdt_identity_cache = NULL; + GOTO(err_free_ns, rc); + } + + m->mdt_rmtacl_cache = upcall_cache_init(obd->obd_name, + MDT_RMTACL_UPCALL_PATH, + &mdt_rmtacl_upcall_cache_ops); + if (IS_ERR(m->mdt_rmtacl_cache)) { + rc = PTR_ERR(m->mdt_rmtacl_cache); + m->mdt_rmtacl_cache = NULL; + GOTO(err_free_ns, rc); + } + + m->mdt_ck_timer.function = mdt_ck_timer_callback; + m->mdt_ck_timer.data = (unsigned long)m; + init_timer(&m->mdt_ck_timer); + rc = mdt_ck_thread_start(m); + if (rc) + GOTO(err_free_ns, rc); + + rc = mdt_fs_setup(env, m, obd); + if (rc) + GOTO(err_capa, rc); + + target_recovery_init(obd, mdt_recovery_handle); + + rc = mdt_start_ptlrpc_service(m); + if (rc) + GOTO(err_fs_cleanup, rc); + + ping_evictor_start(); + + rc = lu_site_init_finish(s); + if (rc) + GOTO(err_stop_service, rc); + + if (obd->obd_recovering == 0) + mdt_postrecov(env, m); + + mdt_init_capa_ctxt(env, m); + + if (ldlm_timeout == LDLM_TIMEOUT_DEFAULT) + ldlm_timeout = 6; + + RETURN(0); + +err_stop_service: + ping_evictor_stop(); + mdt_stop_ptlrpc_service(m); +err_fs_cleanup: + target_recovery_fini(obd); + mdt_fs_cleanup(env, m); +err_capa: + del_timer(&m->mdt_ck_timer); + mdt_ck_thread_stop(m); +err_free_ns: + upcall_cache_cleanup(m->mdt_rmtacl_cache); + m->mdt_rmtacl_cache = NULL; + upcall_cache_cleanup(m->mdt_identity_cache); + m->mdt_identity_cache = NULL; + ldlm_namespace_free(m->mdt_namespace, 0); + obd->obd_namespace = m->mdt_namespace = NULL; +err_fini_seq: + mdt_seq_fini(env, m); +err_fini_fld: + mdt_fld_fini(env, m); +err_fini_stack: + mdt_stack_fini(env, m, md2lu_dev(m->mdt_child)); +err_fini_proc: + mdt_procfs_fini(m); + lprocfs_obd_cleanup(obd); +err_fini_site: + lu_site_fini(s); +err_free_site: + OBD_FREE_PTR(s); + + md_device_fini(&m->mdt_md_dev); + return (rc); +} + +/* used by MGS to process specific configurations */ +static int mdt_process_config(const struct lu_env *env, + struct lu_device *d, struct lustre_cfg *cfg) +{ + struct mdt_device *m = mdt_dev(d); + struct md_device *md_next = m->mdt_child; + struct lu_device *next = md2lu_dev(md_next); + int rc = 0; + ENTRY; + + switch (cfg->lcfg_command) { + case LCFG_PARAM: { + struct lprocfs_static_vars lvars; + struct obd_device *obd = d->ld_obd; + + lprocfs_init_vars(mdt, &lvars); + rc = class_process_proc_param(PARAM_MDT, lvars.obd_vars, cfg, obd); + if (rc) + /* others are passed further */ + rc = next->ld_ops->ldo_process_config(env, next, cfg); + break; + } + case LCFG_ADD_MDC: + /* + * Add mdc hook to get first MDT uuid and connect it to + * ls->controller to use for seq manager. + */ + rc = next->ld_ops->ldo_process_config(env, next, cfg); + if (rc) + CERROR("Can't add mdc, rc %d\n", rc); + else + rc = mdt_seq_init_cli(env, mdt_dev(d), cfg); + break; + default: + /* others are passed further */ + rc = next->ld_ops->ldo_process_config(env, next, cfg); + break; + } + RETURN(rc); +} + +static struct lu_object *mdt_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *d) +{ + struct mdt_object *mo; + + ENTRY; + + OBD_ALLOC_PTR(mo); + if (mo != NULL) { + struct lu_object *o; + struct lu_object_header *h; + + o = &mo->mot_obj.mo_lu; + h = &mo->mot_header; + lu_object_header_init(h); + lu_object_init(o, h, d); + lu_object_add_top(h, o); + o->lo_ops = &mdt_obj_ops; + RETURN(o); + } else + RETURN(NULL); +} + +static int mdt_object_init(const struct lu_env *env, struct lu_object *o) +{ + struct mdt_device *d = mdt_dev(o->lo_dev); + struct lu_device *under; + struct lu_object *below; + int rc = 0; + ENTRY; + + CDEBUG(D_INFO, "object init, fid = "DFID"\n", + PFID(lu_object_fid(o))); + + under = &d->mdt_child->md_lu_dev; + below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under); + if (below != NULL) { + lu_object_add(o, below); + } else + rc = -ENOMEM; + + RETURN(rc); +} + +static void mdt_object_free(const struct lu_env *env, struct lu_object *o) +{ + struct mdt_object *mo = mdt_obj(o); + struct lu_object_header *h; + ENTRY; + + h = o->lo_header; + CDEBUG(D_INFO, "object free, fid = "DFID"\n", + PFID(lu_object_fid(o))); + + lu_object_fini(o); + lu_object_header_fini(h); + OBD_FREE_PTR(mo); + EXIT; +} + +static int mdt_object_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *o) +{ + return (*p)(env, cookie, LUSTRE_MDT_NAME"-object@%p", o); +} + +static struct lu_device_operations mdt_lu_ops = { + .ldo_object_alloc = mdt_object_alloc, + .ldo_process_config = mdt_process_config +}; + +static struct lu_object_operations mdt_obj_ops = { + .loo_object_init = mdt_object_init, + .loo_object_free = mdt_object_free, + .loo_object_print = mdt_object_print +}; + +/* mds_connect_internal */ +static int mdt_connect_internal(struct obd_export *exp, + struct mdt_device *mdt, + struct obd_connect_data *data) +{ + __u64 flags; + + if (data != NULL) { + data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED; + data->ocd_ibits_known &= MDS_INODELOCK_FULL; + + /* If no known bits (which should not happen, probably, + as everybody should support LOOKUP and UPDATE bits at least) + revert to compat mode with plain locks. */ + if (!data->ocd_ibits_known && + data->ocd_connect_flags & OBD_CONNECT_IBITS) + data->ocd_connect_flags &= ~OBD_CONNECT_IBITS; + + if (!mdt->mdt_opts.mo_acl) + data->ocd_connect_flags &= ~OBD_CONNECT_ACL; + + if (!mdt->mdt_opts.mo_user_xattr) + data->ocd_connect_flags &= ~OBD_CONNECT_XATTR; + + if (!mdt->mdt_opts.mo_mds_capa) + data->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA; + + if (!mdt->mdt_opts.mo_oss_capa) + data->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA; + + spin_lock(&exp->exp_lock); + exp->exp_connect_flags = data->ocd_connect_flags; + spin_unlock(&exp->exp_lock); + data->ocd_version = LUSTRE_VERSION_CODE; + exp->exp_mdt_data.med_ibits_known = data->ocd_ibits_known; + } + +#if 0 + if (mdt->mdt_opts.mo_acl && + ((exp->exp_connect_flags & OBD_CONNECT_ACL) == 0)) { + CWARN("%s: MDS requires ACL support but client does not\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); + return -EBADE; + } +#endif + + flags = OBD_CONNECT_LCL_CLIENT | OBD_CONNECT_RMT_CLIENT; + if ((exp->exp_connect_flags & flags) == flags) { + CWARN("%s: both local and remote client flags are set\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); + return -EBADE; + } + + if (mdt->mdt_opts.mo_mds_capa && + ((exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) == 0)) { + CWARN("%s: MDS requires capability support, but client not\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); + return -EBADE; + } + + if (mdt->mdt_opts.mo_oss_capa && + ((exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA) == 0)) { + CWARN("%s: MDS requires OSS capability support, " + "but client not\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); + return -EBADE; + } + + return 0; +} + +/* mds_connect copy */ +static int mdt_obd_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *data) +{ + struct mdt_client_data *mcd; + struct obd_export *exp; + struct mdt_device *mdt; + int rc; + ENTRY; + + LASSERT(env != NULL); + if (!conn || !obd || !cluuid) + RETURN(-EINVAL); + + mdt = mdt_dev(obd->obd_lu_dev); + + rc = class_connect(conn, obd, cluuid); + if (rc) + RETURN(rc); + + exp = class_conn2export(conn); + LASSERT(exp != NULL); + + rc = mdt_connect_internal(exp, mdt, data); + if (rc == 0) { + OBD_ALLOC_PTR(mcd); + if (mcd != NULL) { + struct mdt_thread_info *mti; + mti = lu_context_key_get(&env->le_ctx, + &mdt_thread_key); + LASSERT(mti != NULL); + mti->mti_exp = exp; + memcpy(mcd->mcd_uuid, cluuid, sizeof mcd->mcd_uuid); + exp->exp_mdt_data.med_mcd = mcd; + rc = mdt_client_new(env, mdt); + if (rc != 0) { + OBD_FREE_PTR(mcd); + exp->exp_mdt_data.med_mcd = NULL; + } + } else + rc = -ENOMEM; + } + + if (rc != 0) + class_disconnect(exp); + else + class_export_put(exp); + + RETURN(rc); +} + +static int mdt_obd_reconnect(struct obd_export *exp, struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *data) +{ + int rc; + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + rc = mdt_connect_internal(exp, mdt_dev(obd->obd_lu_dev), data); + + RETURN(rc); +} + +static int mdt_obd_disconnect(struct obd_export *exp) +{ + struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev); + int rc; + ENTRY; + + LASSERT(exp); + class_export_get(exp); + + /* Disconnect early so that clients can't keep using export */ + rc = class_disconnect(exp); + if (mdt->mdt_namespace != NULL || exp->exp_obd->obd_namespace != NULL) + ldlm_cancel_locks_for_export(exp); + + /* complete all outstanding replies */ + spin_lock(&exp->exp_lock); + while (!list_empty(&exp->exp_outstanding_replies)) { + struct ptlrpc_reply_state *rs = + list_entry(exp->exp_outstanding_replies.next, + struct ptlrpc_reply_state, rs_exp_list); + struct ptlrpc_service *svc = rs->rs_service; + + spin_lock(&svc->srv_lock); + list_del_init(&rs->rs_exp_list); + ptlrpc_schedule_difficult_reply(rs); + spin_unlock(&svc->srv_lock); + } + spin_unlock(&exp->exp_lock); + + class_export_put(exp); + RETURN(rc); +} + +/* FIXME: Can we avoid using these two interfaces? */ +static int mdt_init_export(struct obd_export *exp) +{ + struct mdt_export_data *med = &exp->exp_mdt_data; + ENTRY; + + INIT_LIST_HEAD(&med->med_open_head); + spin_lock_init(&med->med_open_lock); + spin_lock(&exp->exp_lock); + exp->exp_connecting = 1; + spin_unlock(&exp->exp_lock); + RETURN(0); +} + +static int mdt_destroy_export(struct obd_export *export) +{ + struct mdt_export_data *med; + struct obd_device *obd = export->exp_obd; + struct mdt_device *mdt; + struct mdt_thread_info *info; + struct lu_env env; + struct md_attr *ma; + int lmm_size; + int cookie_size; + int rc = 0; + ENTRY; + + med = &export->exp_mdt_data; + if (med->med_rmtclient) + mdt_cleanup_idmap(med); + + target_destroy_export(export); + + if (obd_uuid_equals(&export->exp_client_uuid, &obd->obd_uuid)) + RETURN(0); + + mdt = mdt_dev(obd->obd_lu_dev); + LASSERT(mdt != NULL); + + rc = lu_env_init(&env, NULL, LCT_MD_THREAD); + if (rc) + RETURN(rc); + + info = lu_context_key_get(&env.le_ctx, &mdt_thread_key); + LASSERT(info != NULL); + memset(info, 0, sizeof *info); + info->mti_env = &env; + info->mti_mdt = mdt; + info->mti_exp = export; + + ma = &info->mti_attr; + lmm_size = ma->ma_lmm_size = mdt->mdt_max_mdsize; + cookie_size = ma->ma_cookie_size = mdt->mdt_max_cookiesize; + OBD_ALLOC(ma->ma_lmm, lmm_size); + OBD_ALLOC(ma->ma_cookie, cookie_size); + + if (ma->ma_lmm == NULL || ma->ma_cookie == NULL) + GOTO(out, rc = -ENOMEM); + ma->ma_need = MA_LOV | MA_COOKIE; + ma->ma_valid = 0; + /* Close any open files (which may also cause orphan unlinking). */ + spin_lock(&med->med_open_lock); + while (!list_empty(&med->med_open_head)) { + struct list_head *tmp = med->med_open_head.next; + struct mdt_file_data *mfd = + list_entry(tmp, struct mdt_file_data, mfd_list); + + /* Remove mfd handle so it can't be found again. + * We are consuming the mfd_list reference here. */ + class_handle_unhash(&mfd->mfd_handle); + list_del_init(&mfd->mfd_list); + spin_unlock(&med->med_open_lock); + mdt_mfd_close(info, mfd); + /* TODO: if we close the unlinked file, + * we need to remove it's objects from OST */ + memset(&ma->ma_attr, 0, sizeof(ma->ma_attr)); + spin_lock(&med->med_open_lock); + ma->ma_lmm_size = lmm_size; + ma->ma_cookie_size = cookie_size; + ma->ma_need = MA_LOV | MA_COOKIE; + ma->ma_valid = 0; + } + spin_unlock(&med->med_open_lock); + info->mti_mdt = NULL; + mdt_client_del(&env, mdt); + + EXIT; +out: + if (lmm_size) { + OBD_FREE(ma->ma_lmm, lmm_size); + ma->ma_lmm = NULL; + } + if (cookie_size) { + OBD_FREE(ma->ma_cookie, cookie_size); + ma->ma_cookie = NULL; + } + lu_env_fini(&env); + + return rc; +} + +static int mdt_upcall(const struct lu_env *env, struct md_device *md, + enum md_upcall_event ev) +{ + struct mdt_device *m = mdt_dev(&md->md_lu_dev); + struct md_device *next = m->mdt_child; + struct mdt_thread_info *mti; + int rc = 0; + ENTRY; + + switch (ev) { + case MD_LOV_SYNC: + rc = next->md_ops->mdo_maxsize_get(env, next, + &m->mdt_max_mdsize, + &m->mdt_max_cookiesize); + CDEBUG(D_INFO, "get max mdsize %d max cookiesize %d\n", + m->mdt_max_mdsize, m->mdt_max_cookiesize); + break; + case MD_NO_TRANS: + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + mti->mti_no_need_trans = 1; + CDEBUG(D_INFO, "disable mdt trans for this thread\n"); + break; + default: + CERROR("invalid event\n"); + rc = -EINVAL; + break; + } + RETURN(rc); +} + +static int mdt_obd_notify(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner) +{ + ENTRY; + + switch (ev) { + case OBD_NOTIFY_CONFIG: + host->obd_no_conn = 0; + default: + CDEBUG(D_INFO, "Notification 0x%x\n", ev); + } + RETURN(0); +} + +static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void *uarg) +{ + struct lu_env env; + struct obd_device *obd= exp->exp_obd; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct dt_device *dt = mdt->mdt_bottom; + int rc; + + ENTRY; + CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd); + rc = lu_env_init(&env, NULL, LCT_MD_THREAD); + if (rc) + RETURN(rc); + + switch (cmd) { + case OBD_IOC_SYNC: + rc = dt->dd_ops->dt_sync(&env, dt); + break; + case OBD_IOC_SET_READONLY: + rc = dt->dd_ops->dt_sync(&env, dt); + dt->dd_ops->dt_ro(&env, dt); + break; + case OBD_IOC_ABORT_RECOVERY: + CERROR("Aborting recovery for device %s\n", obd->obd_name); + target_stop_recovery_thread(obd); + rc = 0; + break; + default: + CERROR("Not supported cmd = %d for device %s\n", + cmd, obd->obd_name); + rc = -EOPNOTSUPP; + } + + lu_env_fini(&env); + RETURN(rc); +} + +int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt) +{ + struct lu_device *ld = md2lu_dev(mdt->mdt_child); + struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + int rc, lost; + ENTRY; + /* if some clients didn't participate in recovery then we can possibly + * lost sequence. Now we should increase sequence for safe value */ + lost = obd->obd_max_recoverable_clients - obd->obd_connected_clients; + mdt_seq_adjust(env, mdt, lost); + + rc = ld->ld_ops->ldo_recovery_complete(env, ld); + RETURN(rc); +} + +int mdt_obd_postrecov(struct obd_device *obd) +{ + struct lu_env env; + int rc; + + rc = lu_env_init(&env, NULL, LCT_MD_THREAD); + if (rc) + RETURN(rc); + rc = mdt_postrecov(&env, mdt_dev(obd->obd_lu_dev)); + lu_env_fini(&env); + return rc; +} + +static struct obd_ops mdt_obd_device_ops = { + .o_owner = THIS_MODULE, + .o_connect = mdt_obd_connect, + .o_reconnect = mdt_obd_reconnect, + .o_disconnect = mdt_obd_disconnect, + .o_init_export = mdt_init_export, + .o_destroy_export = mdt_destroy_export, + .o_iocontrol = mdt_iocontrol, + .o_postrecov = mdt_obd_postrecov, + .o_notify = mdt_obd_notify +}; + +static struct lu_device* mdt_device_fini(const struct lu_env *env, + struct lu_device *d) +{ + struct mdt_device *m = mdt_dev(d); + ENTRY; + + mdt_fini(env, m); + RETURN(NULL); +} + +static void mdt_device_free(const struct lu_env *env, struct lu_device *d) +{ + struct mdt_device *m = mdt_dev(d); + + OBD_FREE_PTR(m); +} + +static struct lu_device *mdt_device_alloc(const struct lu_env *env, + struct lu_device_type *t, + struct lustre_cfg *cfg) +{ + struct lu_device *l; + struct mdt_device *m; + + OBD_ALLOC_PTR(m); + if (m != NULL) { + int rc; + + l = &m->mdt_md_dev.md_lu_dev; + rc = mdt_init0(env, m, t, cfg); + if (rc != 0) { + OBD_FREE_PTR(m); + l = ERR_PTR(rc); + return l; + } + md_upcall_init(&m->mdt_md_dev, mdt_upcall); + } else + l = ERR_PTR(-ENOMEM); + return l; +} + +/* + * context key constructor/destructor + */ +static void *mdt_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct mdt_thread_info *info; + + /* + * check that no high order allocations are incurred. + */ + CLASSERT(CFS_PAGE_SIZE >= sizeof *info); + OBD_ALLOC_PTR(info); + if (info == NULL) + info = ERR_PTR(-ENOMEM); + return info; +} + +static void mdt_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct mdt_thread_info *info = data; + OBD_FREE_PTR(info); +} + +struct lu_context_key mdt_thread_key = { + .lct_tags = LCT_MD_THREAD, + .lct_init = mdt_key_init, + .lct_fini = mdt_key_fini +}; + +static void *mdt_txn_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct mdt_txn_info *txi; + + /* + * check that no high order allocations are incurred. + */ + CLASSERT(CFS_PAGE_SIZE >= sizeof *txi); + OBD_ALLOC_PTR(txi); + if (txi == NULL) + txi = ERR_PTR(-ENOMEM); + memset(txi, 0, sizeof(*txi)); + return txi; +} + +static void mdt_txn_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct mdt_txn_info *txi = data; + OBD_FREE_PTR(txi); +} + +struct lu_context_key mdt_txn_key = { + .lct_tags = LCT_TX_HANDLE, + .lct_init = mdt_txn_key_init, + .lct_fini = mdt_txn_key_fini +}; + +struct md_ucred *mdt_ucred(const struct mdt_thread_info *info) +{ + return md_ucred(info->mti_env); +} + +static int mdt_type_init(struct lu_device_type *t) +{ + int rc; + + LU_CONTEXT_KEY_INIT(&mdt_thread_key); + rc = lu_context_key_register(&mdt_thread_key); + if (rc == 0) { + LU_CONTEXT_KEY_INIT(&mdt_txn_key); + rc = lu_context_key_register(&mdt_txn_key); + } + return rc; +} + +static void mdt_type_fini(struct lu_device_type *t) +{ + lu_context_key_degister(&mdt_thread_key); + lu_context_key_degister(&mdt_txn_key); +} + +static struct lu_device_type_operations mdt_device_type_ops = { + .ldto_init = mdt_type_init, + .ldto_fini = mdt_type_fini, + + .ldto_device_alloc = mdt_device_alloc, + .ldto_device_free = mdt_device_free, + .ldto_device_fini = mdt_device_fini +}; + +static struct lu_device_type mdt_device_type = { + .ldt_tags = LU_DEVICE_MD, + .ldt_name = LUSTRE_MDT_NAME, + .ldt_ops = &mdt_device_type_ops, + .ldt_ctx_tags = LCT_MD_THREAD +}; + +static int __init mdt_mod_init(void) +{ + struct lprocfs_static_vars lvars; + int rc; + + mdt_num_threads = MDT_NUM_THREADS; + lprocfs_init_vars(mdt, &lvars); + rc = class_register_type(&mdt_obd_device_ops, NULL, + lvars.module_vars, LUSTRE_MDT_NAME, + &mdt_device_type); + + return rc; +} + +static void __exit mdt_mod_exit(void) +{ + class_unregister_type(LUSTRE_MDT_NAME); +} + + +#define DEF_HNDL(prefix, base, suffix, flags, opc, fn, fmt) \ +[prefix ## _ ## opc - prefix ## _ ## base] = { \ + .mh_name = #opc, \ + .mh_fail_id = OBD_FAIL_ ## prefix ## _ ## opc ## suffix, \ + .mh_opc = prefix ## _ ## opc, \ + .mh_flags = flags, \ + .mh_act = fn, \ + .mh_fmt = fmt \ +} + +#define DEF_MDT_HNDL(flags, name, fn, fmt) \ + DEF_HNDL(MDS, GETATTR, _NET, flags, name, fn, fmt) + +#define DEF_SEQ_HNDL(flags, name, fn, fmt) \ + DEF_HNDL(SEQ, QUERY, _NET, flags, name, fn, fmt) + +#define DEF_FLD_HNDL(flags, name, fn, fmt) \ + DEF_HNDL(FLD, QUERY, _NET, flags, name, fn, fmt) +/* + * Request with a format known in advance + */ +#define DEF_MDT_HNDL_F(flags, name, fn) \ + DEF_HNDL(MDS, GETATTR, _NET, flags, name, fn, &RQF_MDS_ ## name) + +#define DEF_SEQ_HNDL_F(flags, name, fn) \ + DEF_HNDL(SEQ, QUERY, _NET, flags, name, fn, &RQF_SEQ_ ## name) + +#define DEF_FLD_HNDL_F(flags, name, fn) \ + DEF_HNDL(FLD, QUERY, _NET, flags, name, fn, &RQF_FLD_ ## name) +/* + * Request with a format we do not yet know + */ +#define DEF_MDT_HNDL_0(flags, name, fn) \ + DEF_HNDL(MDS, GETATTR, _NET, flags, name, fn, NULL) + +static struct mdt_handler mdt_mds_ops[] = { +DEF_MDT_HNDL_F(0, CONNECT, mdt_connect), +DEF_MDT_HNDL_F(0, DISCONNECT, mdt_disconnect), +DEF_MDT_HNDL_F(0, SET_INFO, mdt_set_info), +DEF_MDT_HNDL_F(0 |HABEO_REFERO, GETSTATUS, mdt_getstatus), +DEF_MDT_HNDL_F(HABEO_CORPUS, GETATTR, mdt_getattr), +DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, GETATTR_NAME, mdt_getattr_name), +DEF_MDT_HNDL_F(HABEO_CORPUS|MUTABOR, SETXATTR, mdt_setxattr), +DEF_MDT_HNDL_F(HABEO_CORPUS, GETXATTR, mdt_getxattr), +DEF_MDT_HNDL_F(0 |HABEO_REFERO, STATFS, mdt_statfs), +DEF_MDT_HNDL_F(0 |MUTABOR, REINT, mdt_reint), +DEF_MDT_HNDL_F(HABEO_CORPUS, CLOSE, mdt_close), +DEF_MDT_HNDL_F(HABEO_CORPUS, DONE_WRITING, mdt_done_writing), +DEF_MDT_HNDL_F(0 |HABEO_REFERO, PIN, mdt_pin), +DEF_MDT_HNDL_0(0, SYNC, mdt_sync), +DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, IS_SUBDIR, mdt_is_subdir), +DEF_MDT_HNDL_0(0, QUOTACHECK, mdt_quotacheck_handle), +DEF_MDT_HNDL_0(0, QUOTACTL, mdt_quotactl_handle) +}; + +#define DEF_OBD_HNDL(flags, name, fn) \ + DEF_HNDL(OBD, PING, _NET, flags, name, fn, NULL) + + +static struct mdt_handler mdt_obd_ops[] = { + DEF_OBD_HNDL(0, PING, mdt_obd_ping), + DEF_OBD_HNDL(0, LOG_CANCEL, mdt_obd_log_cancel), + DEF_OBD_HNDL(0, QC_CALLBACK, mdt_obd_qc_callback) +}; + +#define DEF_DLM_HNDL_0(flags, name, fn) \ + DEF_HNDL(LDLM, ENQUEUE, , flags, name, fn, NULL) +#define DEF_DLM_HNDL_F(flags, name, fn) \ + DEF_HNDL(LDLM, ENQUEUE, , flags, name, fn, &RQF_LDLM_ ## name) + +static struct mdt_handler mdt_dlm_ops[] = { + DEF_DLM_HNDL_F(HABEO_CLAVIS, ENQUEUE, mdt_enqueue), + DEF_DLM_HNDL_0(HABEO_CLAVIS, CONVERT, mdt_convert), + DEF_DLM_HNDL_0(0, BL_CALLBACK, mdt_bl_callback), + DEF_DLM_HNDL_0(0, CP_CALLBACK, mdt_cp_callback) +}; + +static struct mdt_handler mdt_llog_ops[] = { +}; + +#define DEF_SEC_CTX_HNDL(name, fn) \ + DEF_HNDL(SEC_CTX, INIT, _NET, 0, name, fn, NULL) + +static struct mdt_handler mdt_sec_ctx_ops[] = { + DEF_SEC_CTX_HNDL(INIT, mdt_sec_ctx_handle), + DEF_SEC_CTX_HNDL(INIT_CONT, mdt_sec_ctx_handle), + DEF_SEC_CTX_HNDL(FINI, mdt_sec_ctx_handle) +}; + +static struct mdt_opc_slice mdt_regular_handlers[] = { + { + .mos_opc_start = MDS_GETATTR, + .mos_opc_end = MDS_LAST_OPC, + .mos_hs = mdt_mds_ops + }, + { + .mos_opc_start = OBD_PING, + .mos_opc_end = OBD_LAST_OPC, + .mos_hs = mdt_obd_ops + }, + { + .mos_opc_start = LDLM_ENQUEUE, + .mos_opc_end = LDLM_LAST_OPC, + .mos_hs = mdt_dlm_ops + }, + { + .mos_opc_start = LLOG_ORIGIN_HANDLE_CREATE, + .mos_opc_end = LLOG_LAST_OPC, + .mos_hs = mdt_llog_ops + }, + { + .mos_opc_start = SEC_CTX_INIT, + .mos_opc_end = SEC_LAST_OPC, + .mos_hs = mdt_sec_ctx_ops + }, + { + .mos_hs = NULL + } +}; + +static struct mdt_handler mdt_readpage_ops[] = { + DEF_MDT_HNDL_F(0, CONNECT, mdt_connect), + DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, READPAGE, mdt_readpage), +#ifdef HAVE_SPLIT_SUPPORT + DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, WRITEPAGE, mdt_writepage), +#endif + + /* + * XXX: this is ugly and should be fixed one day, see mdc_close() for + * detailed comments. --umka + */ + DEF_MDT_HNDL_F(HABEO_CORPUS, CLOSE, mdt_close), + DEF_MDT_HNDL_F(HABEO_CORPUS, DONE_WRITING, mdt_done_writing), +}; + +static struct mdt_opc_slice mdt_readpage_handlers[] = { + { + .mos_opc_start = MDS_GETATTR, + .mos_opc_end = MDS_LAST_OPC, + .mos_hs = mdt_readpage_ops + }, + { + .mos_hs = NULL + } +}; + +static struct mdt_handler mdt_xmds_ops[] = { + DEF_MDT_HNDL_F(0, CONNECT, mdt_connect), + DEF_MDT_HNDL_F(HABEO_CORPUS , GETATTR, mdt_getattr), + DEF_MDT_HNDL_F(0 | MUTABOR , REINT, mdt_reint), + DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, IS_SUBDIR, mdt_is_subdir), +}; + +static struct mdt_opc_slice mdt_xmds_handlers[] = { + { + .mos_opc_start = MDS_GETATTR, + .mos_opc_end = MDS_LAST_OPC, + .mos_hs = mdt_xmds_ops + }, + { + .mos_opc_start = OBD_PING, + .mos_opc_end = OBD_LAST_OPC, + .mos_hs = mdt_obd_ops + }, + { + .mos_hs = NULL + } +}; + +static struct mdt_handler mdt_seq_ops[] = { + DEF_SEQ_HNDL_F(0, QUERY, (int (*)(struct mdt_thread_info *))seq_query) +}; + +static struct mdt_opc_slice mdt_seq_handlers[] = { + { + .mos_opc_start = SEQ_QUERY, + .mos_opc_end = SEQ_LAST_OPC, + .mos_hs = mdt_seq_ops + }, + { + .mos_hs = NULL + } +}; + +static struct mdt_handler mdt_fld_ops[] = { + DEF_FLD_HNDL_F(0, QUERY, (int (*)(struct mdt_thread_info *))fld_query) +}; + +static struct mdt_opc_slice mdt_fld_handlers[] = { + { + .mos_opc_start = FLD_QUERY, + .mos_opc_end = FLD_LAST_OPC, + .mos_hs = mdt_fld_ops + }, + { + .mos_hs = NULL + } +}; + +MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); +MODULE_DESCRIPTION("Lustre Meta-data Target ("LUSTRE_MDT_NAME")"); +MODULE_LICENSE("GPL"); + +CFS_MODULE_PARM(mdt_num_threads, "ul", ulong, 0444, + "number of mdt service threads to start"); + +cfs_module(mdt, "0.2.0", mdt_mod_init, mdt_mod_exit); diff --git a/lustre/mdt/mdt_identity.c b/lustre/mdt/mdt_identity.c new file mode 100644 index 0000000..9e94281 --- /dev/null +++ b/lustre/mdt/mdt_identity.c @@ -0,0 +1,272 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * Author: Lai Siyao <lsy@clusterfs.com> + * Author: Fan Yong <fanyong@clusterfs.com> + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#ifdef HAVE_KERNEL_CONFIG_H +#include <linux/config.h> +#endif +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/kmod.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <asm/segment.h> + +#include <libcfs/kp30.h> +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_dlm.h> +#include <lustre_lib.h> +#include <lustre_ucache.h> + +#include "mdt_internal.h" + +static void mdt_identity_entry_init(struct upcall_cache_entry *entry, + void *unused) +{ + entry->u.identity.mi_uc_entry = entry; +} + +static void mdt_identity_entry_free(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + struct mdt_identity *identity = &entry->u.identity; + + if (identity->mi_ginfo) + groups_free(identity->mi_ginfo); + + if (identity->mi_nperms) { + LASSERT(identity->mi_perms); + OBD_FREE(identity->mi_perms, + identity->mi_nperms * sizeof(struct mdt_setxid_perm)); + } +} + +static int mdt_identity_do_upcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + char keystr[16]; + char *argv[] = { + [0] = cache->uc_upcall, + [1] = cache->uc_name, + [2] = keystr, + [3] = NULL + }; + char *envp[] = { + [0] = "HOME=/", + [1] = "PATH=/sbin:/usr/sbin", + [2] = NULL + }; + int rc; + ENTRY; + + snprintf(keystr, sizeof(keystr), LPU64, entry->ue_key); + + LASSERTF(strcmp(cache->uc_upcall, "NONE"), "no upcall set!"); + CDEBUG(D_INFO, "The upcall is: %s \n", cache->uc_upcall); + + rc = USERMODEHELPER(argv[0], argv, envp); + if (rc < 0) { + CERROR("%s: error invoking upcall %s %s %s: rc %d; " + "check /proc/fs/lustre/mdt/%s/identity_upcall\n", + cache->uc_name, argv[0], argv[1], argv[2], rc, + cache->uc_name); + } else { + CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", cache->uc_name, + argv[0], argv[1], argv[2]); + rc = 0; + } + RETURN(rc); +} + +static int mdt_identity_parse_downcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + void *args) +{ + struct mdt_identity *identity = &entry->u.identity; + struct identity_downcall_data *data = args; + struct group_info *ginfo; + struct mdt_setxid_perm *perms = NULL; + int size, i; + ENTRY; + + LASSERT(data); + if (data->idd_ngroups > NGROUPS_MAX) + RETURN(-E2BIG); + + ginfo = groups_alloc(data->idd_ngroups); + if (!ginfo) { + CERROR("failed to alloc %d groups\n", data->idd_ngroups); + RETURN(-ENOMEM); + } + + groups_from_list(ginfo, data->idd_groups); + groups_sort(ginfo); + identity->mi_ginfo = ginfo; + + if (data->idd_nperms) { + size = data->idd_nperms * sizeof(*perms); + OBD_ALLOC(perms, size); + if (!perms) { + CERROR("failed to alloc %d permissions\n", + data->idd_nperms); + put_group_info(ginfo); + RETURN(-ENOMEM); + } + for (i = 0; i < data->idd_nperms; i++) { + perms[i].mp_nid = data->idd_perms[i].pdd_nid; + perms[i].mp_perm = data->idd_perms[i].pdd_perm; + } + } + + identity->mi_uid = data->idd_uid; + identity->mi_gid = data->idd_gid; + identity->mi_ginfo = ginfo; + identity->mi_nperms = data->idd_nperms; + identity->mi_perms = perms; + + CDEBUG(D_OTHER, "parse mdt identity@%p: %d:%d, ngroups %u, nperms %u\n", + identity, identity->mi_uid, identity->mi_gid, + identity->mi_ginfo->ngroups, identity->mi_nperms); + + RETURN(0); +} + +struct mdt_identity *mdt_identity_get(struct upcall_cache *cache, __u32 uid) +{ + struct upcall_cache_entry *entry; + + if (!cache) + return NULL; + + entry = upcall_cache_get_entry(cache, (__u64)uid, NULL); + if (IS_ERR(entry)) { + CERROR("upcall_cache_get_entry failed: %ld\n", PTR_ERR(entry)); + return NULL; + } + + return &entry->u.identity; +} + +void mdt_identity_put(struct upcall_cache *cache, struct mdt_identity *identity) +{ + if (!cache) + return; + + LASSERT(identity); + upcall_cache_put_entry(cache, identity->mi_uc_entry); +} + +struct upcall_cache_ops mdt_identity_upcall_cache_ops = { + .init_entry = mdt_identity_entry_init, + .free_entry = mdt_identity_entry_free, + .do_upcall = mdt_identity_do_upcall, + .parse_downcall = mdt_identity_parse_downcall, +}; + +void mdt_flush_identity(struct upcall_cache *cache, int uid) +{ + if (uid < 0) + upcall_cache_flush_idle(cache); + else + upcall_cache_flush_one(cache, (__u64)uid, NULL); +} + +/* + * If there is LNET_NID_ANY in perm[i].mp_nid, + * it must be perm[0].mp_nid, and act as default perm. + */ +__u32 mdt_identity_get_setxid_perm(struct mdt_identity *identity, + __u32 is_rmtclient, lnet_nid_t nid) +{ + struct mdt_setxid_perm *perm = identity->mi_perms; + int i; + + /* check exactly matched nid first */ + for (i = identity->mi_nperms - 1; i > 0; i--) { + if (perm[i].mp_nid != nid) + continue; + return perm[i].mp_perm; + } + + /* check LNET_NID_ANY then */ + if ((identity->mi_nperms > 0) && + ((perm[0].mp_nid == nid) || (perm[0].mp_nid == LNET_NID_ANY))) + return perm[0].mp_perm; + + /* return default last */ + return is_rmtclient ? 0 : LUSTRE_SETGRP_PERM; +} + +int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o, + void *buf) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = mdt_ucred(info); + struct md_object *next = mdt_object_child(o); + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_remote_perm *perm = buf; + + ENTRY; + + /* remote client request always pack ptlrpc_user_desc! */ + LASSERT(perm); + + if (!med->med_rmtclient) + RETURN(-EBADE); + + if ((uc->mu_valid != UCRED_OLD) && (uc->mu_valid != UCRED_NEW)) + RETURN(-EINVAL); + + perm->rp_uid = uc->mu_o_uid; + perm->rp_gid = uc->mu_o_gid; + perm->rp_fsuid = uc->mu_o_fsuid; + perm->rp_fsgid = uc->mu_o_fsgid; + + perm->rp_access_perm = 0; + if (mo_permission(info->mti_env, NULL, next, NULL, MAY_READ) == 0) + perm->rp_access_perm |= MAY_READ; + if (mo_permission(info->mti_env, NULL, next, NULL, MAY_WRITE) == 0) + perm->rp_access_perm |= MAY_WRITE; + if (mo_permission(info->mti_env, NULL, next, NULL, MAY_EXEC) == 0) + perm->rp_access_perm |= MAY_EXEC; + + RETURN(0); +} diff --git a/lustre/mdt/mdt_idmap.c b/lustre/mdt/mdt_idmap.c new file mode 100644 index 0000000..aaa6ee9 --- /dev/null +++ b/lustre/mdt/mdt_idmap.c @@ -0,0 +1,800 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * Author: Lai Siyao <lsy@clusterfs.com> + * Author: Fan Yong <fanyong@clusterfs.com> + * + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#ifdef HAVE_KERNEL_CONFIG_H +#include <linux/config.h> +#endif +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/kmod.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <asm/segment.h> + +#include <libcfs/kp30.h> +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_dlm.h> +#include <lustre_sec.h> +#include <lustre_lib.h> +#include <lustre_ucache.h> + +#include "mdt_internal.h" + +enum { + MDT_IDMAP_NOTFOUND = -1, +}; + +struct mdt_idmap_entry { + struct list_head mie_rmt_hash; /* hashed as mie_rmt_id; */ + struct list_head mie_lcl_hash; /* hashed as mie_lcl_id; */ + int mie_refcount; + uid_t mie_rmt_id; /* remote uid/gid */ + uid_t mie_lcl_id; /* local uid/gid */ +}; + +/* uid/gid mapping */ +static struct mdt_idmap_table *mdt_idmap_alloc(void) +{ + struct mdt_idmap_table *tbl; + int i, j; + + OBD_ALLOC_PTR(tbl); + if (!tbl) + return NULL; + + spin_lock_init(&tbl->mit_lock); + for (i = 0; i < ARRAY_SIZE(tbl->mit_idmaps); i++) + for (j = 0; j < ARRAY_SIZE(tbl->mit_idmaps[i]); j++) + INIT_LIST_HEAD(&tbl->mit_idmaps[i][j]); + + return tbl; +} + +static struct mdt_idmap_entry *idmap_entry_alloc(__u32 mie_rmt_id, + __u32 mie_lcl_id) +{ + struct mdt_idmap_entry *e; + + OBD_ALLOC_PTR(e); + if (!e) + return NULL; + + INIT_LIST_HEAD(&e->mie_rmt_hash); + INIT_LIST_HEAD(&e->mie_lcl_hash); + e->mie_refcount = 1; + e->mie_rmt_id = mie_rmt_id; + e->mie_lcl_id = mie_lcl_id; + + return e; +} + +static void idmap_entry_free(struct mdt_idmap_entry *e) +{ + if (!list_empty(&e->mie_rmt_hash)) + list_del(&e->mie_rmt_hash); + if (!list_empty(&e->mie_lcl_hash)) + list_del(&e->mie_lcl_hash); + OBD_FREE_PTR(e); +} + +int mdt_init_idmap(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + char *client = libcfs_nid2str(req->rq_peer.nid); + struct mdt_export_data *med = mdt_req2med(req); + struct obd_device *obd = req->rq_export->exp_obd; + struct obd_connect_data *data, *reply; + int rc = 0, remote; + ENTRY; + + data = req_capsule_client_get(&info->mti_pill, &RMF_CONNECT_DATA); + reply = req_capsule_server_get(&info->mti_pill, &RMF_CONNECT_DATA); + if (data == NULL || reply == NULL) + RETURN(-EFAULT); + + if (!req->rq_auth_gss || req->rq_auth_usr_mdt) { + med->med_rmtclient = 0; + reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; + //reply->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT; + RETURN(0); + } + + remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT; + + if (remote) { + med->med_rmtclient = 1; + if (!req->rq_auth_remote) + CWARN("client (local realm) %s -> target %s asked " + "to be remote!\n", client, obd->obd_name); + } else if (req->rq_auth_remote) { + med->med_rmtclient = 1; + CWARN("client (remote realm) %s -> target %s forced " + "to be remote!\n", client, obd->obd_name); + } + + if (med->med_rmtclient) { + med->med_nllu = data->ocd_nllu; + med->med_nllg = data->ocd_nllg; + if (!med->med_idmap) + med->med_idmap = mdt_idmap_alloc(); + if (!med->med_idmap) { + CERROR("client %s -> target %s failed to alloc idmap!\n" + , client, obd->obd_name); + RETURN(-ENOMEM); + } + + reply->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT; + //reply->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT; + CDEBUG(D_SEC, "client %s -> target %s is remote.\n", + client, obd->obd_name); + + /* NB, MDT_CONNECT establish root idmap too! */ + rc = mdt_handle_idmap(info); + } else { + if (req->rq_auth_uid == INVALID_UID) { + CERROR("client %s -> target %s: user is not " + "authenticated!\n", client, obd->obd_name); + RETURN(-EACCES); + } + reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; + //reply->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT; + } + + RETURN(rc); +} + +static void idmap_clear_mie_rmt_hash(struct list_head *list) +{ + struct mdt_idmap_entry *e; + int i; + + for (i = 0; i < MDT_IDMAP_HASHSIZE; i++) { + while (!list_empty(&list[i])) { + e = list_entry(list[i].next, struct mdt_idmap_entry, + mie_rmt_hash); + idmap_entry_free(e); + } + } +} + +void mdt_cleanup_idmap(struct mdt_export_data *med) +{ + struct mdt_idmap_table *tbl = med->med_idmap; + int i; + + LASSERT(med->med_rmtclient); + LASSERT(tbl); + + spin_lock(&tbl->mit_lock); + idmap_clear_mie_rmt_hash(tbl->mit_idmaps[RMT_UIDMAP_IDX]); + idmap_clear_mie_rmt_hash(tbl->mit_idmaps[RMT_GIDMAP_IDX]); + + /* paranoid checking */ + for (i = 0; i < MDT_IDMAP_HASHSIZE; i++) { + LASSERT(list_empty(&tbl->mit_idmaps[LCL_UIDMAP_IDX][i])); + LASSERT(list_empty(&tbl->mit_idmaps[LCL_GIDMAP_IDX][i])); + } + spin_unlock(&tbl->mit_lock); + + OBD_FREE_PTR(tbl); + med->med_idmap = NULL; +} + +static inline void mdt_revoke_export_locks(struct obd_export *exp) +{ + /* don't revoke locks during recovery */ + if (exp->exp_obd->obd_recovering) + return; + + ldlm_revoke_export_locks(exp); +} + +static +struct mdt_idmap_entry *idmap_lookup_entry(struct list_head *mie_rmt_hash, + uid_t mie_rmt_id, uid_t mie_lcl_id) +{ + struct list_head *rmt_head = + &mie_rmt_hash[MDT_IDMAP_HASHFUNC(mie_rmt_id)]; + struct mdt_idmap_entry *e; + + list_for_each_entry(e, rmt_head, mie_rmt_hash) { + if ((e->mie_rmt_id == mie_rmt_id) && + (e->mie_lcl_id == mie_lcl_id)) + return e; + } + return NULL; +} + +/* + * return value + * NULL: not found entry + * ERR_PTR(-EACCES): found multi->single mapped entry + * others: found normal entry + */ +static +struct mdt_idmap_entry *idmap_search_entry(struct list_head *mie_rmt_hash, + uid_t mie_rmt_id, + struct list_head *mie_lcl_hash, + uid_t mie_lcl_id, + const char *warn_msg) +{ + struct list_head *rmt_head = + &mie_rmt_hash[MDT_IDMAP_HASHFUNC(mie_rmt_id)]; + struct list_head *lcl_head = + &mie_lcl_hash[MDT_IDMAP_HASHFUNC(mie_lcl_id)]; + struct mdt_idmap_entry *e; + + list_for_each_entry(e, rmt_head, mie_rmt_hash) { + if (e->mie_rmt_id == mie_rmt_id) { + if (e->mie_lcl_id == mie_lcl_id) { + e->mie_refcount++; + return e; + } else { + CERROR("%s: rmt id %u already be mapped to %u" + " (new %u)\n", warn_msg, e->mie_rmt_id, + e->mie_lcl_id, mie_lcl_id); + return ERR_PTR(-EACCES); + } + } + } + + list_for_each_entry(e, lcl_head, mie_lcl_hash) { + if (e->mie_lcl_id == mie_lcl_id) { + if (e->mie_rmt_id == mie_rmt_id) { + e->mie_refcount++; + return e; + } else { + CERROR("%s: lcl id %u already be mapped from %u" + " (new %u)\n", warn_msg, e->mie_lcl_id, + e->mie_rmt_id, mie_rmt_id); + return ERR_PTR(-EACCES); + } + } + } + + return NULL; +} + +static +struct mdt_idmap_entry *idmap_insert_entry(struct list_head *mie_rmt_hash, + struct list_head *mie_lcl_hash, + struct mdt_idmap_entry *new, + const char *warn_msg) +{ + struct list_head *rmt_head = + &mie_rmt_hash[MDT_IDMAP_HASHFUNC(new->mie_rmt_id)]; + struct list_head *lcl_head = + &mie_lcl_hash[MDT_IDMAP_HASHFUNC(new->mie_lcl_id)]; + struct mdt_idmap_entry *e; + + e = idmap_search_entry(mie_rmt_hash, new->mie_rmt_id, + mie_lcl_hash, new->mie_lcl_id, + warn_msg); + if (e == NULL) { + list_add_tail(&new->mie_rmt_hash, rmt_head); + list_add_tail(&new->mie_lcl_hash, lcl_head); + } + return e; +} + +static int idmap_remove_entry(struct list_head *mie_rmt_hash, + struct list_head *mie_lcl_hash, + __u32 mie_rmt_id, __u32 mie_lcl_id) +{ + struct mdt_idmap_entry *e; + int rc = -ENOENT; + + e = idmap_lookup_entry(mie_rmt_hash, mie_rmt_id, mie_lcl_id); + if (e != NULL) { + e->mie_refcount--; + if ((rc = e->mie_refcount) <= 0) + idmap_entry_free(e); + } + return rc; +} + +static int mdt_idmap_add(struct mdt_idmap_table *tbl, + uid_t ruid, uid_t luid, + gid_t rgid, gid_t lgid) +{ + struct mdt_idmap_entry *ue0, *ue1, *ge0, *ge1; + ENTRY; + + LASSERT(tbl); + + spin_lock(&tbl->mit_lock); + ue0 = idmap_search_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX], ruid, + tbl->mit_idmaps[LCL_UIDMAP_IDX], luid, + "UID mapping"); + spin_unlock(&tbl->mit_lock); + if (!ue0) { + ue0 = idmap_entry_alloc(ruid, luid); + if (!ue0) + RETURN(-ENOMEM); + + spin_lock(&tbl->mit_lock); + ue1 = idmap_insert_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX], + tbl->mit_idmaps[LCL_UIDMAP_IDX], + ue0, "UID mapping"); + if (ue1 != NULL) { + idmap_entry_free(ue0); + ue0 = ue1; + } + spin_unlock(&tbl->mit_lock); + + if (IS_ERR(ue1)) + RETURN(PTR_ERR(ue1)); + } else if (IS_ERR(ue0)) { + RETURN(PTR_ERR(ue0)); + } + + spin_lock(&tbl->mit_lock); + ge0 = idmap_search_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX], rgid, + tbl->mit_idmaps[LCL_GIDMAP_IDX], lgid, + "GID mapping"); + spin_unlock(&tbl->mit_lock); + if (!ge0) { + ge0 = idmap_entry_alloc(rgid, lgid); + spin_lock(&tbl->mit_lock); + if (!ge0) { + ue0->mie_refcount--; + if (ue0->mie_refcount <= 0) + idmap_entry_free(ue0); + spin_unlock(&tbl->mit_lock); + RETURN(-ENOMEM); + } + + ge1 = idmap_insert_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX], + tbl->mit_idmaps[LCL_GIDMAP_IDX], + ge0, "GID mapping"); + if (ge1 != NULL) { + ue0->mie_refcount--; + if (ue0->mie_refcount <= 0) + idmap_entry_free(ue0); + idmap_entry_free(ge0); + } + spin_unlock(&tbl->mit_lock); + + if (IS_ERR(ge1)) + RETURN(PTR_ERR(ge1)); + } else if (IS_ERR(ge0)) { + spin_lock(&tbl->mit_lock); + ue0->mie_refcount--; + if (ue0->mie_refcount <= 0) + idmap_entry_free(ue0); + spin_unlock(&tbl->mit_lock); + RETURN(PTR_ERR(ge0)); + } + + RETURN(0); +} + +static int mdt_idmap_del(struct mdt_idmap_table *tbl, + uid_t ruid, uid_t luid, + gid_t rgid, gid_t lgid) +{ + ENTRY; + + if (!tbl) + RETURN(0); + + spin_lock(&tbl->mit_lock); + idmap_remove_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX], + tbl->mit_idmaps[LCL_UIDMAP_IDX], + ruid, luid); + idmap_remove_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX], + tbl->mit_idmaps[LCL_GIDMAP_IDX], + rgid, lgid); + spin_unlock(&tbl->mit_lock); + + RETURN(0); +} + +int mdt_handle_idmap(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_device *mdt = info->mti_mdt; + struct mdt_export_data *med; + struct ptlrpc_user_desc *pud = req->rq_user_desc; + struct mdt_identity *identity; + __u32 opc; + int rc = 0; + + ENTRY; + + if (!req->rq_export) + RETURN(0); + + med = mdt_req2med(req); + if (!med->med_rmtclient) + RETURN(0); + + opc = lustre_msg_get_opc(req->rq_reqmsg); + /* Bypass other opc */ + if ((opc != SEC_CTX_INIT) && (opc != SEC_CTX_INIT_CONT) && + (opc != SEC_CTX_FINI) && (opc != MDS_CONNECT)) + RETURN(0); + + LASSERT(pud); + LASSERT(med->med_idmap); + + if (req->rq_auth_mapped_uid == INVALID_UID) { + CERROR("invalid authorized mapped uid, please check " + "/etc/lustre/idmap.conf!\n"); + RETURN(-EACCES); + } + + if (is_identity_get_disabled(mdt->mdt_identity_cache)) { + CERROR("remote client must run with identity_get enabled!\n"); + RETURN(-EACCES); + } + + identity = mdt_identity_get(mdt->mdt_identity_cache, + req->rq_auth_mapped_uid); + if (!identity) { + CERROR("can't get mdt identity(%u), no mapping added\n", + req->rq_auth_mapped_uid); + RETURN(-EACCES); + } + + switch (opc) { + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case MDS_CONNECT: + rc = mdt_idmap_add(med->med_idmap, + pud->pud_uid, identity->mi_uid, + pud->pud_gid, identity->mi_gid); + break; + case SEC_CTX_FINI: + rc = mdt_idmap_del(med->med_idmap, + pud->pud_uid, identity->mi_uid, + pud->pud_gid, identity->mi_gid); + break; + } + + mdt_identity_put(mdt->mdt_identity_cache, identity); + + if (rc) + RETURN(rc); + + switch (opc) { + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: + mdt_revoke_export_locks(req->rq_export); + break; + } + RETURN(0); +} + +static __u32 idmap_lookup_id(struct list_head *hash, int reverse, __u32 id) +{ + struct list_head *head = &hash[MDT_IDMAP_HASHFUNC(id)]; + struct mdt_idmap_entry *e; + + if (!reverse) { + list_for_each_entry(e, head, mie_rmt_hash) { + if (e->mie_rmt_id == id) + return e->mie_lcl_id; + } + } else { + list_for_each_entry(e, head, mie_lcl_hash) { + if (e->mie_lcl_id == id) + return e->mie_rmt_id; + } + } + return MDT_IDMAP_NOTFOUND; +} + +static int mdt_idmap_lookup_uid(struct mdt_idmap_table *tbl, int reverse, + uid_t uid) +{ + struct list_head *hash; + + if (!tbl) + return MDT_IDMAP_NOTFOUND; + + hash = tbl->mit_idmaps[reverse ? LCL_UIDMAP_IDX : RMT_UIDMAP_IDX]; + + spin_lock(&tbl->mit_lock); + uid = idmap_lookup_id(hash, reverse, uid); + spin_unlock(&tbl->mit_lock); + + return uid; +} + +static int mdt_idmap_lookup_gid(struct mdt_idmap_table *tbl, int reverse, + gid_t gid) +{ + struct list_head *hash; + + if (!tbl) + return MDT_IDMAP_NOTFOUND; + + hash = tbl->mit_idmaps[reverse ? LCL_GIDMAP_IDX : RMT_GIDMAP_IDX]; + + spin_lock(&tbl->mit_lock); + gid = idmap_lookup_id(hash, reverse, gid); + spin_unlock(&tbl->mit_lock); + + return gid; +} + +int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *req, + struct ptlrpc_user_desc *pud) +{ + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_idmap_table *idmap = med->med_idmap; + uid_t uid, fsuid; + gid_t gid, fsgid; + + /* Only remote client need desc_to_idmap. */ + if (!med->med_rmtclient) + return 0; + + uid = mdt_idmap_lookup_uid(idmap, 0, pud->pud_uid); + if (uid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for uid %u\n", pud->pud_uid); + return -EACCES; + } + + if (pud->pud_uid == pud->pud_fsuid) { + fsuid = uid; + } else { + fsuid = mdt_idmap_lookup_uid(idmap, 0, pud->pud_fsuid); + if (fsuid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsuid %u\n", pud->pud_fsuid); + return -EACCES; + } + } + + gid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_gid); + if (gid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for gid %u\n", pud->pud_gid); + return -EACCES; + } + + if (pud->pud_gid == pud->pud_fsgid) { + fsgid = gid; + } else { + fsgid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_fsgid); + if (fsgid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsgid %u\n", pud->pud_fsgid); + return -EACCES; + } + } + + pud->pud_uid = uid; + pud->pud_gid = gid; + pud->pud_fsuid = fsuid; + pud->pud_fsgid = fsgid; + + return 0; +} + +/* + * Reverse map + * Do not ignore rootsquash. + */ +void mdt_body_reverse_idmap(struct mdt_thread_info *info, struct mdt_body *body) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = mdt_ucred(info); + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_idmap_table *idmap = med->med_idmap; + uid_t uid; + gid_t gid; + + if (!med->med_rmtclient) + return; + + if (body->valid & OBD_MD_FLUID) { + if (((uc->mu_valid == UCRED_OLD) || + (uc->mu_valid == UCRED_NEW)) && + !(uc->mu_squash & SQUASH_UID)) { + if (body->uid == uc->mu_uid) + uid = uc->mu_o_uid; + else if (body->uid == uc->mu_fsuid) + uid = uc->mu_o_fsuid; + else + uid = mdt_idmap_lookup_uid(idmap, 1, body->uid); + } else { + uid = mdt_idmap_lookup_uid(idmap, 1, body->uid); + } + + if (uid == MDT_IDMAP_NOTFOUND) { + uid = med->med_nllu; + if (body->valid & OBD_MD_FLMODE) + body->mode = (body->mode & ~S_IRWXU) | + ((body->mode & S_IRWXO) << 6); + } + + body->uid = uid; + } + + if (body->valid & OBD_MD_FLGID) { + if (((uc->mu_valid == UCRED_OLD) || + (uc->mu_valid == UCRED_NEW)) && + !(uc->mu_squash & SQUASH_GID)) { + if (body->gid == uc->mu_gid) + gid = uc->mu_o_gid; + else if (body->gid == uc->mu_fsgid) + gid = uc->mu_o_fsgid; + else + gid = mdt_idmap_lookup_gid(idmap, 1, body->gid); + } else { + gid = mdt_idmap_lookup_gid(idmap, 1, body->gid); + } + + if (gid == MDT_IDMAP_NOTFOUND) { + gid = med->med_nllg; + if (body->valid & OBD_MD_FLMODE) + body->mode = (body->mode & ~S_IRWXG) | + ((body->mode & S_IRWXO) << 3); + } + + body->gid = gid; + } +} + +/* NB: return error if no mapping, so this will look strange: + * if client hasn't kinit the to map xid for the mapped xid, client + * will always get -EPERM, and the same for rootsquash case. */ +int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *req, + struct mdt_remote_perm *perm) +{ + struct mdt_export_data *med = mdt_req2med(req); + uid_t uid, fsuid; + gid_t gid, fsgid; + + LASSERT(med->med_rmtclient); + + uid = mdt_idmap_lookup_uid(med->med_idmap, 1, perm->rp_uid); + if (uid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for uid %u\n", perm->rp_uid); + return -EPERM; + } + + gid = mdt_idmap_lookup_gid(med->med_idmap, 1, perm->rp_gid); + if (gid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for gid %u\n", perm->rp_gid); + return -EPERM; + } + + if (perm->rp_uid != perm->rp_fsuid) { + fsuid = mdt_idmap_lookup_uid(med->med_idmap, 1, perm->rp_fsuid); + if (fsuid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsuid %u\n", perm->rp_fsuid); + return -EPERM; + } + } else { + fsuid = uid; + } + + if (perm->rp_gid != perm->rp_fsgid) { + fsgid = mdt_idmap_lookup_gid(med->med_idmap, 1, perm->rp_fsgid); + if (fsgid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsgid %u\n", perm->rp_fsgid); + return -EPERM; + } + } else { + fsgid = gid; + } + + perm->rp_uid = uid; + perm->rp_gid = gid; + perm->rp_fsuid = fsuid; + perm->rp_fsgid = fsgid; + return 0; +} + +/* Process remote client and rootsquash */ +int mdt_fix_attr_ucred(struct mdt_thread_info *info, __u32 op) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = mdt_ucred(info); + struct lu_attr *attr = &info->mti_attr.ma_attr; + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_idmap_table *idmap = med->med_idmap; + + ENTRY; + + if ((uc->mu_valid != UCRED_OLD) && (uc->mu_valid != UCRED_NEW)) + RETURN(-EINVAL); + + if (!med->med_rmtclient && (uc->mu_squash == SQUASH_NONE)) + RETURN(0); + + if (op != REINT_SETATTR) { + if ((attr->la_valid & LA_UID) && (attr->la_uid != -1)) + attr->la_uid = uc->mu_fsuid; + if (op != REINT_CREATE) { + if ((attr->la_valid & LA_GID) && (attr->la_gid != -1)) + attr->la_gid = uc->mu_fsgid; + } else { + /* for S_ISGID, inherit gid from his parent */ + if (!(attr->la_mode & S_ISGID) && (attr->la_gid != -1)) + attr->la_gid = uc->mu_fsgid; + } + } else if (med->med_rmtclient) { + /* NB: -1 case will be handled by mdt_fix_attr() later. */ + if ((attr->la_valid & LA_UID) && (attr->la_uid != -1)) { + uid_t uid; + + if (attr->la_uid == uc->mu_o_uid) + uid = uc->mu_uid; + else if (attr->la_uid == uc->mu_o_fsuid) + uid = uc->mu_fsuid; + else + uid = mdt_idmap_lookup_uid(idmap, 0, + attr->la_uid); + + if (uid == MDT_IDMAP_NOTFOUND) { + CWARN("Deny chown to uid %u\n", attr->la_uid); + RETURN(-EPERM); + } + + attr->la_uid = uid; + } + if ((attr->la_valid & LA_GID) && (attr->la_gid != -1)) { + gid_t gid; + + if (attr->la_gid == uc->mu_o_gid) + gid = uc->mu_gid; + else if (attr->la_gid == uc->mu_o_fsgid) + gid = uc->mu_fsgid; + else + gid = mdt_idmap_lookup_gid(idmap, 0, + attr->la_gid); + + if (gid == MDT_IDMAP_NOTFOUND) { + CWARN("Deny chown to gid %u\n", attr->la_gid); + RETURN(-EPERM); + } + + attr->la_gid = gid; + } + } + + RETURN(0); +} diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h new file mode 100644 index 0000000..2434c62 --- /dev/null +++ b/lustre/mdt/mdt_internal.h @@ -0,0 +1,835 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mdt/mdt_internal.h + * Lustre Metadata Target (mdt) request handler + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Peter Braam <braam@clusterfs.com> + * Author: Andreas Dilger <adilger@clusterfs.com> + * Author: Phil Schwan <phil@clusterfs.com> + * Author: Mike Shaver <shaver@clusterfs.com> + * Author: Nikita Danilov <nikita@clusterfs.com> + * Author: Huang Hua <huanghua@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef _MDT_INTERNAL_H +#define _MDT_INTERNAL_H + +#if defined(__KERNEL__) + +/* + * struct ptlrpc_client + */ +#include <lustre_net.h> +#include <obd.h> +/* + * struct obd_connect_data + * struct lustre_handle + */ +#include <lustre/lustre_idl.h> +#include <md_object.h> +#include <dt_object.h> +#include <lustre_fid.h> +#include <lustre_fld.h> +#include <lustre_req_layout.h> +/* LR_CLIENT_SIZE, etc. */ +#include <lustre_disk.h> +#include <lustre_sec.h> +#include <lvfs.h> + + +/* Data stored per client in the last_rcvd file. In le32 order. */ +struct mdt_client_data { + __u8 mcd_uuid[40]; /* client UUID */ + __u64 mcd_last_transno; /* last completed transaction ID */ + __u64 mcd_last_xid; /* xid for the last transaction */ + __u32 mcd_last_result; /* result from last RPC */ + __u32 mcd_last_data; /* per-op data (disposition for open &c.) */ + /* for MDS_CLOSE and MDS_DONE_WRITTING requests */ + __u64 mcd_last_close_transno; /* last completed transaction ID */ + __u64 mcd_last_close_xid; /* xid for the last transaction */ + __u32 mcd_last_close_result; /* result from last RPC */ + __u8 mcd_padding[LR_CLIENT_SIZE - 84]; +}; + +static inline __u64 mcd_last_transno(struct mdt_client_data *mcd) +{ + return max(mcd->mcd_last_transno, mcd->mcd_last_close_transno); +} + +static inline __u64 mcd_last_xid(struct mdt_client_data *mcd) +{ + return max(mcd->mcd_last_xid, mcd->mcd_last_close_xid); +} + +/* check if request's xid is equal to last one or not*/ +static inline int req_xid_is_last(struct ptlrpc_request *req) +{ + struct mdt_client_data *mcd = req->rq_export->exp_mdt_data.med_mcd; + return (req->rq_xid == mcd->mcd_last_xid || + req->rq_xid == mcd->mcd_last_close_xid); +} + +/* copied from lr_server_data. + * mds data stored at the head of last_rcvd file. In le32 order. */ +struct mdt_server_data { + __u8 msd_uuid[40]; /* server UUID */ + __u64 msd_last_transno; /* last completed transaction ID */ + __u64 msd_mount_count; /* incarnation number */ + __u32 msd_feature_compat; /* compatible feature flags */ + __u32 msd_feature_rocompat;/* read-only compatible feature flags */ + __u32 msd_feature_incompat;/* incompatible feature flags */ + __u32 msd_server_size; /* size of server data area */ + __u32 msd_client_start; /* start of per-client data area */ + __u16 msd_client_size; /* size of per-client data area */ + //__u16 msd_subdir_count; /* number of subdirectories for objects */ + //__u64 msd_catalog_oid; /* recovery catalog object id */ + //__u32 msd_catalog_ogen; /* recovery catalog inode generation */ + //__u8 msd_peeruuid[40]; /* UUID of MDS associated with this OST */ + //__u32 msd_ost_index; /* index number of OST in LOV */ + //__u32 msd_mdt_index; /* index number of MDT in LMV */ + __u8 msd_padding[LR_SERVER_SIZE - 78]; +}; + +struct mdt_object; +/* file data for open files on MDS */ +struct mdt_file_data { + struct portals_handle mfd_handle; /* must be first */ + struct list_head mfd_list; /* protected by med_open_lock */ + __u64 mfd_xid; /* xid of the open request */ + struct lustre_handle mfd_old_handle; /* old handle in replay case */ + int mfd_mode; /* open mode provided by client */ + struct mdt_object *mfd_object; /* point to opened object */ +}; + +struct mdt_device { + /* super-class */ + struct md_device mdt_md_dev; + struct ptlrpc_service *mdt_regular_service; + struct ptlrpc_service *mdt_readpage_service; + struct ptlrpc_service *mdt_xmds_service; + struct ptlrpc_service *mdt_setattr_service; + struct ptlrpc_service *mdt_mdsc_service; + struct ptlrpc_service *mdt_mdss_service; + struct ptlrpc_service *mdt_dtss_service; + struct ptlrpc_service *mdt_fld_service; + /* DLM name-space for meta-data locks maintained by this server */ + struct ldlm_namespace *mdt_namespace; + /* ptlrpc handle for MDS->client connections (for lock ASTs). */ + struct ptlrpc_client *mdt_ldlm_client; + /* underlying device */ + struct md_device *mdt_child; + struct dt_device *mdt_bottom; + /* + * Options bit-fields. + */ + struct { + signed int mo_user_xattr :1, + mo_acl :1, + mo_compat_resname:1, + mo_mds_capa :1, + mo_oss_capa :1; + } mdt_opts; + + /* lock to pretect epoch and write count */ + spinlock_t mdt_ioepoch_lock; + __u64 mdt_ioepoch; + + /* Transaction related stuff here */ + spinlock_t mdt_transno_lock; + __u64 mdt_last_transno; + + /* transaction callbacks */ + struct dt_txn_callback mdt_txn_cb; + /* last_rcvd file */ + struct dt_object *mdt_last_rcvd; + + /* these values should be updated from lov if necessary. + * or should be placed somewhere else. */ + int mdt_max_mdsize; + int mdt_max_cookiesize; + __u64 mdt_mount_count; + + /* last_rcvd data */ + struct mdt_server_data mdt_msd; + spinlock_t mdt_client_bitmap_lock; + unsigned long mdt_client_bitmap[(LR_MAX_CLIENTS >> 3) / sizeof(long)]; + + struct upcall_cache *mdt_identity_cache; + struct upcall_cache *mdt_rmtacl_cache; + + /* root squash */ + struct rootsquash_info *mdt_rootsquash_info; + + /* capability keys */ + unsigned long mdt_capa_timeout; + __u32 mdt_capa_alg; + struct dt_object *mdt_ck_obj; + unsigned long mdt_ck_timeout; + unsigned long mdt_ck_expiry; + struct timer_list mdt_ck_timer; + struct ptlrpc_thread mdt_ck_thread; + struct lustre_capa_key mdt_capa_keys[2]; + unsigned int mdt_capa_conf:1; + + cfs_proc_dir_entry_t *mdt_proc_entry; + struct lprocfs_stats *mdt_stats; +}; + +/*XXX copied from mds_internal.h */ +#define MDT_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000) +#define MDT_ROCOMPAT_SUPP (OBD_ROCOMPAT_LOVOBJID) +#define MDT_INCOMPAT_SUPP (OBD_INCOMPAT_MDT | OBD_INCOMPAT_COMMON_LR) + +struct mdt_object { + struct lu_object_header mot_header; + struct md_object mot_obj; + __u64 mot_ioepoch; + __u64 mot_flags; + int mot_epochcount; + int mot_writecount; +}; + +struct mdt_lock_handle { + /* Lock type, reg for cross-ref use or pdo lock. */ + mdl_type_t mlh_type; + + /* Regular lock */ + struct lustre_handle mlh_reg_lh; + ldlm_mode_t mlh_reg_mode; + + /* Pdirops lock */ + struct lustre_handle mlh_pdo_lh; + ldlm_mode_t mlh_pdo_mode; + unsigned int mlh_pdo_hash; +}; + +enum { + MDT_LH_PARENT, /* parent lockh */ + MDT_LH_CHILD, /* child lockh */ + MDT_LH_OLD, /* old lockh for rename */ + MDT_LH_NEW, /* new lockh for rename */ + MDT_LH_RMT, /* used for return lh to caller */ + MDT_LH_NR +}; + +enum { + MDT_LOCAL_LOCK, + MDT_CROSS_LOCK +}; + +struct mdt_reint_record { + mdt_reint_t rr_opcode; + const struct lustre_handle *rr_handle; + const struct lu_fid *rr_fid1; + const struct lu_fid *rr_fid2; + const char *rr_name; + int rr_namelen; + const char *rr_tgt; + int rr_tgtlen; + const void *rr_eadata; + int rr_eadatalen; + int rr_logcookielen; + const struct llog_cookie *rr_logcookies; + __u32 rr_flags; +}; + +enum mdt_reint_flag { + MRF_SETATTR_LOCKED = 1 << 0, +}; + +/* + * Common data shared by mdt-level handlers. This is allocated per-thread to + * reduce stack consumption. + */ +struct mdt_thread_info { + /* + * XXX: Part One: + * The following members will be filled expilictly + * with specific data in mdt_thread_info_init(). + */ + + /* + * for req-layout interface. This field should be first to be compatible + * with "struct com_thread_info" in seq and fld. + */ + struct req_capsule mti_pill; + /* although we have export in req, there are cases when it is not + * available, e.g. closing files upon export destroy */ + struct obd_export *mti_exp; + /* + * number of buffers in reply message. + */ + int mti_rep_buf_nr; + /* + * sizes of reply buffers. + */ + int mti_rep_buf_size[REQ_MAX_FIELD_NR]; + /* + * A couple of lock handles. + */ + struct mdt_lock_handle mti_lh[MDT_LH_NR]; + + struct mdt_device *mti_mdt; + const struct lu_env *mti_env; + + /* + * Additional fail id that can be set by handler. Passed to + * target_send_reply(). + */ + int mti_fail_id; + + /* transaction number of current request */ + __u64 mti_transno; + + + /* + * XXX: Part Two: + * The following members will be filled expilictly + * with zero in mdt_thread_info_init(). These members may be used + * by all requests. + */ + + /* + * Object attributes. + */ + struct md_attr mti_attr; + /* + * Body for "habeo corpus" operations. + */ + const struct mdt_body *mti_body; + /* + * Host object. This is released at the end of mdt_handler(). + */ + struct mdt_object *mti_object; + /* + * Lock request for "habeo clavis" operations. + */ + const struct ldlm_request *mti_dlm_req; + + __u32 mti_has_trans:1, /* has txn already? */ + mti_no_need_trans:1, + mti_cross_ref:1; + + /* opdata for mdt_reint_open(), has the same as + * ldlm_reply:lock_policy_res1. mdt_update_last_rcvd() stores this + * value onto disk for recovery when mdt_trans_stop_cb() is called. + */ + __u64 mti_opdata; + + /* + * XXX: Part Three: + * The following members will be filled expilictly + * with zero in mdt_reint_unpack(), because they are only used + * by reint requests (including mdt_reint_open()). + */ + + /* + * reint record. contains information for reint operations. + */ + struct mdt_reint_record mti_rr; + + /* + * Operation specification (currently create and lookup) + */ + struct md_op_spec mti_spec; + + /* + * XXX: Part Four: + * The following members will _NOT_ be initialized at all. + * DO NOT expect them to contain any valid value. + * They should be initialized explicitly by the user themselves. + */ + + /* XXX: If something is in a union, make sure they do not conflict */ + + struct lu_fid mti_tmp_fid1; + struct lu_fid mti_tmp_fid2; + ldlm_policy_data_t mti_policy; /* for mdt_object_lock() and + * mdt_rename_lock() */ + struct ldlm_res_id mti_res_id; /* for mdt_object_lock() and + mdt_rename_lock() */ + union { + struct obd_uuid uuid[2]; /* for mdt_seq_init_cli() */ + char ns_name[48]; /* for mdt_init0() */ + struct lustre_cfg_bufs bufs; /* for mdt_stack_fini() */ + struct kstatfs ksfs; /* for mdt_statfs() */ + struct { + /* for mdt_readpage() */ + struct lu_rdpg mti_rdpg; + /* for mdt_sendpage() */ + struct l_wait_info mti_wait_info; + } rdpg; + } mti_u; + + /* IO epoch related stuff. */ + struct mdt_epoch *mti_epoch; + __u64 mti_replayepoch; + + /* server and client data buffers */ + struct mdt_server_data mti_msd; + struct mdt_client_data mti_mcd; + loff_t mti_off; + struct txn_param mti_txn_param; + struct lu_buf mti_buf; + struct lustre_capa_key mti_capa_key; + + /* Ops object filename */ + struct lu_name mti_name; +}; + +typedef void (*mdt_cb_t)(const struct mdt_device *mdt, __u64 transno, + void *data, int err); +struct mdt_commit_cb { + mdt_cb_t mdt_cb_func; + void *mdt_cb_data; +}; + +/* + * Info allocated per-transaction. + */ +#define MDT_MAX_COMMIT_CB 4 +struct mdt_txn_info { + __u64 txi_transno; + unsigned int txi_cb_count; + struct mdt_commit_cb txi_cb[MDT_MAX_COMMIT_CB]; +}; + +extern struct lu_context_key mdt_txn_key; + +static inline void mdt_trans_add_cb(const struct thandle *th, + mdt_cb_t cb_func, void *cb_data) +{ + struct mdt_txn_info *txi; + + txi = lu_context_key_get(&th->th_ctx, &mdt_txn_key); + LASSERT(txi->txi_cb_count < ARRAY_SIZE(txi->txi_cb)); + + /* add new callback */ + txi->txi_cb[txi->txi_cb_count].mdt_cb_func = cb_func; + txi->txi_cb[txi->txi_cb_count].mdt_cb_data = cb_data; + txi->txi_cb_count++; +} + +static inline struct md_device_operations *mdt_child_ops(struct mdt_device * m) +{ + LASSERT(m->mdt_child); + return m->mdt_child->md_ops; +} + +static inline struct md_object *mdt_object_child(struct mdt_object *o) +{ + return lu2md(lu_object_next(&o->mot_obj.mo_lu)); +} + +static inline struct ptlrpc_request *mdt_info_req(struct mdt_thread_info *info) +{ + return info->mti_pill.rc_req; +} + +static inline __u64 mdt_conn_flags(struct mdt_thread_info *info) +{ + LASSERT(info->mti_exp); + return info->mti_exp->exp_connect_flags; +} + +static inline void mdt_object_get(const struct lu_env *env, + struct mdt_object *o) +{ + ENTRY; + lu_object_get(&o->mot_obj.mo_lu); + EXIT; +} + +static inline void mdt_object_put(const struct lu_env *env, + struct mdt_object *o) +{ + ENTRY; + lu_object_put(env, &o->mot_obj.mo_lu); + EXIT; +} + +static inline int mdt_object_exists(const struct mdt_object *o) +{ + return lu_object_exists(&o->mot_obj.mo_lu); +} + +static inline const struct lu_fid *mdt_object_fid(struct mdt_object *o) +{ + return lu_object_fid(&o->mot_obj.mo_lu); +} + +int mdt_get_disposition(struct ldlm_reply *rep, int flag); +void mdt_set_disposition(struct mdt_thread_info *info, + struct ldlm_reply *rep, int flag); +void mdt_clear_disposition(struct mdt_thread_info *info, + struct ldlm_reply *rep, int flag); + +void mdt_lock_pdo_init(struct mdt_lock_handle *lh, + ldlm_mode_t lm, const char *name, + int namelen); + +void mdt_lock_reg_init(struct mdt_lock_handle *lh, + ldlm_mode_t lm); + +int mdt_lock_setup(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *lh); + +int mdt_object_lock(struct mdt_thread_info *, + struct mdt_object *, + struct mdt_lock_handle *, + __u64, int); + +void mdt_object_unlock(struct mdt_thread_info *, + struct mdt_object *, + struct mdt_lock_handle *, + int decref); + +struct mdt_object *mdt_object_find(const struct lu_env *, + struct mdt_device *, + const struct lu_fid *); +struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *, + const struct lu_fid *, + struct mdt_lock_handle *, + __u64); +void mdt_object_unlock_put(struct mdt_thread_info *, + struct mdt_object *, + struct mdt_lock_handle *, + int decref); + +int mdt_close_unpack(struct mdt_thread_info *info); +int mdt_reint_unpack(struct mdt_thread_info *info, __u32 op); +int mdt_reint_rec(struct mdt_thread_info *, struct mdt_lock_handle *); +void mdt_pack_size2body(struct mdt_thread_info *info, + struct mdt_object *o); +void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b, + const struct lu_attr *attr, const struct lu_fid *fid); + +int mdt_getxattr(struct mdt_thread_info *info); +int mdt_setxattr(struct mdt_thread_info *info); + +void mdt_lock_handle_init(struct mdt_lock_handle *lh); +void mdt_lock_handle_fini(struct mdt_lock_handle *lh); + +void mdt_reconstruct(struct mdt_thread_info *, struct mdt_lock_handle *); + +extern void target_recovery_fini(struct obd_device *obd); +extern void target_recovery_init(struct obd_device *obd, + svc_handler_t handler); +int mdt_fs_setup(const struct lu_env *, struct mdt_device *, + struct obd_device *); +void mdt_fs_cleanup(const struct lu_env *, struct mdt_device *); + +int mdt_client_del(const struct lu_env *env, + struct mdt_device *mdt); +int mdt_client_add(const struct lu_env *env, + struct mdt_device *mdt, + int cl_idx); +int mdt_client_new(const struct lu_env *env, + struct mdt_device *mdt); + +int mdt_pin(struct mdt_thread_info* info); + +int mdt_lock_new_child(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *child_lockh); + +void mdt_mfd_set_mode(struct mdt_file_data *mfd, + int mode); + +int mdt_reint_open(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc); + +struct mdt_file_data *mdt_handle2mfd(struct mdt_thread_info *, + const struct lustre_handle *); +int mdt_epoch_open(struct mdt_thread_info *info, struct mdt_object *o); +void mdt_sizeonmds_enable(struct mdt_thread_info *info, struct mdt_object *mo); +int mdt_sizeonmds_enabled(struct mdt_object *mo); +int mdt_write_get(struct mdt_device *mdt, struct mdt_object *o); +int mdt_write_read(struct mdt_device *mdt, struct mdt_object *o); +struct mdt_file_data *mdt_mfd_new(void); +int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd); +void mdt_mfd_free(struct mdt_file_data *mfd); +int mdt_close(struct mdt_thread_info *info); +int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, + int flags); +int mdt_done_writing(struct mdt_thread_info *info); +void mdt_shrink_reply(struct mdt_thread_info *info); +int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *, + const struct md_attr *); +void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *); +struct thandle* mdt_trans_start(const struct lu_env *env, + struct mdt_device *mdt, int credits); +void mdt_trans_stop(const struct lu_env *env, + struct mdt_device *mdt, struct thandle *th); +int mdt_record_write(const struct lu_env *env, + struct dt_object *dt, const struct lu_buf *buf, + loff_t *pos, struct thandle *th); +int mdt_record_read(const struct lu_env *env, + struct dt_object *dt, struct lu_buf *buf, loff_t *pos); + +struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len); +const struct lu_buf *mdt_buf_const(const struct lu_env *env, + const void *area, ssize_t len); + +void mdt_dump_lmm(int level, const struct lov_mds_md *lmm); + +int mdt_check_ucred(struct mdt_thread_info *); + +int mdt_init_ucred(struct mdt_thread_info *, struct mdt_body *); + +int mdt_init_ucred_reint(struct mdt_thread_info *); + +void mdt_exit_ucred(struct mdt_thread_info *); + +int groups_from_list(struct group_info *, gid_t *); + +void groups_sort(struct group_info *); + +/* mdt_idmap.c */ +int mdt_init_idmap(struct mdt_thread_info *); + +void mdt_cleanup_idmap(struct mdt_export_data *); + +int mdt_handle_idmap(struct mdt_thread_info *); + +int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *, + struct ptlrpc_user_desc *); + +void mdt_body_reverse_idmap(struct mdt_thread_info *, + struct mdt_body *); + +int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *, + struct mdt_remote_perm *); + +int mdt_fix_attr_ucred(struct mdt_thread_info *, __u32); + +static inline struct mdt_device *mdt_dev(struct lu_device *d) +{ +// LASSERT(lu_device_is_mdt(d)); + return container_of0(d, struct mdt_device, mdt_md_dev.md_lu_dev); +} + +/* mdt/mdt_identity.c */ +#define MDT_IDENTITY_UPCALL_PATH "/usr/sbin/l_getidentity" + +extern struct upcall_cache_ops mdt_identity_upcall_cache_ops; + +struct mdt_identity *mdt_identity_get(struct upcall_cache *, __u32); + +void mdt_identity_put(struct upcall_cache *, struct mdt_identity *); + +void mdt_flush_identity(struct upcall_cache *, int); + +__u32 mdt_identity_get_setxid_perm(struct mdt_identity *, __u32, lnet_nid_t); + +int mdt_pack_remote_perm(struct mdt_thread_info *, struct mdt_object *, void *); + +/* mdt/mdt_rmtacl.c */ +#define MDT_RMTACL_UPCALL_PATH "/usr/sbin/l_facl" + +extern struct upcall_cache_ops mdt_rmtacl_upcall_cache_ops; + +int mdt_rmtacl_upcall(struct mdt_thread_info *, char *, struct lu_buf *); + +extern struct lu_context_key mdt_thread_key; +/* debug issues helper starts here*/ +static inline void mdt_fail_write(const struct lu_env *env, + struct dt_device *dd, int id) +{ + if (OBD_FAIL_CHECK(id)) { + CERROR(LUSTRE_MDT_NAME": obd_fail_loc=%x, fail write ops\n", + id); + dd->dd_ops->dt_ro(env, dd); + /* We set FAIL_ONCE because we never "un-fail" a device */ + obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE; + } +} + +static inline struct mdt_export_data *mdt_req2med(struct ptlrpc_request *req) +{ + return &req->rq_export->exp_mdt_data; +} + +#define MDT_FAIL_CHECK(id) \ +({ \ + if (unlikely(OBD_FAIL_CHECK(id))) \ + CERROR(LUSTRE_MDT_NAME": " #id " test failed\n"); \ + OBD_FAIL_CHECK(id); \ +}) + +#define MDT_FAIL_CHECK_ONCE(id) \ +({ int _ret_ = 0; \ + if (unlikely(OBD_FAIL_CHECK(id))) { \ + CERROR(LUSTRE_MDT_NAME": *** obd_fail_loc=%x ***\n", id); \ + obd_fail_loc |= OBD_FAILED; \ + if ((id) & OBD_FAIL_ONCE) \ + obd_fail_loc |= OBD_FAIL_ONCE; \ + _ret_ = 1; \ + } \ + _ret_; \ +}) + +#define MDT_FAIL_RETURN(id, ret) \ +do { \ + if (unlikely(MDT_FAIL_CHECK_ONCE(id))) { \ + RETURN(ret); \ + } \ +} while(0) + +struct md_ucred *mdt_ucred(const struct mdt_thread_info *info); + +static inline int is_identity_get_disabled(struct upcall_cache *cache) +{ + return cache ? (strcmp(cache->uc_upcall, "NONE") == 0) : 1; +} + +/* Issues dlm lock on passed @ns, @f stores it lock handle into @lh. */ +static inline int mdt_fid_lock(struct ldlm_namespace *ns, + struct lustre_handle *lh, + ldlm_mode_t mode, + ldlm_policy_data_t *policy, + const struct ldlm_res_id *res_id, + int flags) +{ + int rc; + + LASSERT(ns != NULL); + LASSERT(lh != NULL); + + rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_IBITS, policy, + mode, &flags, ldlm_blocking_ast, + ldlm_completion_ast, NULL, NULL, + 0, NULL, lh); + return rc == ELDLM_OK ? 0 : -EIO; +} + +static inline void mdt_fid_unlock(struct lustre_handle *lh, + ldlm_mode_t mode) +{ + ldlm_lock_decref(lh, mode); +} + +extern mdl_mode_t mdt_mdl_lock_modes[]; +extern ldlm_mode_t mdt_dlm_lock_modes[]; + +static inline mdl_mode_t mdt_dlm_mode2mdl_mode(ldlm_mode_t mode) +{ + LASSERT(IS_PO2(mode)); + return mdt_mdl_lock_modes[mode]; +} + +static inline ldlm_mode_t mdt_mdl_mode2dlm_mode(mdl_mode_t mode) +{ + LASSERT(IS_PO2(mode)); + return mdt_dlm_lock_modes[mode]; +} + +static inline struct lu_name *mdt_name(const struct lu_env *env, + char *name, int namelen) +{ + struct lu_name *lname; + struct mdt_thread_info *mti; + + LASSERT(namelen > 0); + /* trailing '\0' in buffer */ + LASSERT(name[namelen] == '\0'); + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + lname = &mti->mti_name; + lname->ln_name = name; + lname->ln_namelen = namelen; + return lname; +} + +static inline struct lu_name *mdt_name_copy(struct lu_name *tlname, + struct lu_name *slname) +{ + LASSERT(tlname); + LASSERT(slname); + + tlname->ln_name = slname->ln_name; + tlname->ln_namelen = slname->ln_namelen; + return tlname; +} + +/* lprocfs stuff */ +int mdt_procfs_init(struct mdt_device *mdt, const char *name); +int mdt_procfs_fini(struct mdt_device *mdt); + +void mdt_time_start(const struct mdt_thread_info *info); +void mdt_time_end(const struct mdt_thread_info *info, int idx); + +enum { + LPROC_MDT_NR +}; + +/* Capability */ +int mdt_ck_thread_start(struct mdt_device *mdt); +void mdt_ck_thread_stop(struct mdt_device *mdt); +void mdt_ck_timer_callback(unsigned long castmeharder); +int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt); + +static inline void mdt_set_capainfo(struct mdt_thread_info *info, int offset, + const struct lu_fid *fid, + struct lustre_capa *capa) +{ + struct mdt_device *dev = info->mti_mdt; + struct md_capainfo *ci; + + LASSERT(offset >= 0 && offset <= MD_CAPAINFO_MAX); + if (!dev->mdt_opts.mo_mds_capa) + return; + + ci = md_capainfo(info->mti_env); + LASSERT(ci); + ci->mc_fid[offset] = fid; + ci->mc_capa[offset] = capa; +} + +static inline void mdt_dump_capainfo(struct mdt_thread_info *info) +{ + struct md_capainfo *ci = md_capainfo(info->mti_env); + int i; + + if (!ci) + return; + for (i = 0; i < MD_CAPAINFO_MAX; i++) { + if (!ci->mc_fid[i]) + continue; + if (!ci->mc_capa[i]) { + CERROR("no capa for index %d "DFID"\n", + i, PFID(ci->mc_fid[i])); + continue; + } + if (ci->mc_capa[i] == BYPASS_CAPA) { + CERROR("bypass for index %d "DFID"\n", + i, PFID(ci->mc_fid[i])); + continue; + } + DEBUG_CAPA(D_ERROR, ci->mc_capa[i], "index %d", i); + } +} + +#endif /* __KERNEL__ */ +#endif /* _MDT_H */ diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c new file mode 100644 index 0000000..01793a5 --- /dev/null +++ b/lustre/mdt/mdt_lib.c @@ -0,0 +1,1143 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mdt/mdt_lib.c + * Lustre Metadata Target (mdt) request unpacking helper. + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Peter Braam <braam@clusterfs.com> + * Author: Andreas Dilger <adilger@clusterfs.com> + * Author: Phil Schwan <phil@clusterfs.com> + * Author: Mike Shaver <shaver@clusterfs.com> + * Author: Nikita Danilov <nikita@clusterfs.com> + * Author: Huang Hua <huanghua@clusterfs.com> + * + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include "mdt_internal.h" + + +typedef enum ucred_init_type { + NONE_INIT = 0, + BODY_INIT = 1, + REC_INIT = 2 +} ucred_init_type_t; + +int groups_from_list(struct group_info *ginfo, gid_t *glist) +{ + int i; + int count = ginfo->ngroups; + + /* fill group_info from gid array */ + for (i = 0; i < ginfo->nblocks; i++) { + int cp_count = min(NGROUPS_PER_BLOCK, count); + int off = i * NGROUPS_PER_BLOCK; + int len = cp_count * sizeof(*glist); + + if (memcpy(ginfo->blocks[i], glist + off, len)) + return -EFAULT; + + count -= cp_count; + } + return 0; +} + +/* groups_sort() is copied from linux kernel! */ +/* a simple shell-metzner sort */ +void groups_sort(struct group_info *group_info) +{ + int base, max, stride; + int gidsetsize = group_info->ngroups; + + for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1) + ; /* nothing */ + stride /= 3; + + while (stride) { + max = gidsetsize - stride; + for (base = 0; base < max; base++) { + int left = base; + int right = left + stride; + gid_t tmp = GROUP_AT(group_info, right); + + while (left >= 0 && GROUP_AT(group_info, left) > tmp) { + GROUP_AT(group_info, right) = + GROUP_AT(group_info, left); + right = left; + left -= stride; + } + GROUP_AT(group_info, right) = tmp; + } + stride /= 3; + } +} + +void mdt_exit_ucred(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_device *mdt = info->mti_mdt; + + if (uc->mu_valid != UCRED_INIT) { + uc->mu_suppgids[0] = uc->mu_suppgids[1] = -1; + if (uc->mu_ginfo) { + groups_free(uc->mu_ginfo); + uc->mu_ginfo = NULL; + } + if (uc->mu_identity) { + mdt_identity_put(mdt->mdt_identity_cache, + uc->mu_identity); + uc->mu_identity = NULL; + } + uc->mu_valid = UCRED_INIT; + } +} + +static int old_init_ucred(struct mdt_thread_info *info, + struct mdt_body *body) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_device *mdt = info->mti_mdt; + struct mdt_identity *identity = NULL; + + ENTRY; + + uc->mu_valid = UCRED_INVALID; + + if (!is_identity_get_disabled(mdt->mdt_identity_cache)) { + /* get identity info of this user */ + identity = mdt_identity_get(mdt->mdt_identity_cache, + body->fsuid); + if (!identity) { + CERROR("Deny access without identity: uid %d\n", + body->fsuid); + RETURN(-EACCES); + } + } + + uc->mu_valid = UCRED_OLD; + uc->mu_squash = SQUASH_NONE; + uc->mu_o_uid = uc->mu_uid = body->uid; + uc->mu_o_gid = uc->mu_gid = body->gid; + uc->mu_o_fsuid = uc->mu_fsuid = body->fsuid; + uc->mu_o_fsgid = uc->mu_fsgid = body->fsgid; + uc->mu_suppgids[0] = body->suppgid; + uc->mu_suppgids[1] = -1; + if (uc->mu_fsuid) + uc->mu_cap = body->capability & ~CAP_FS_MASK; + else + uc->mu_cap = body->capability; + uc->mu_ginfo = NULL; + uc->mu_identity = identity; + + RETURN(0); +} + +static int old_init_ucred_reint(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_device *mdt = info->mti_mdt; + struct mdt_identity *identity = NULL; + + ENTRY; + + uc->mu_valid = UCRED_INVALID; + + if (!is_identity_get_disabled(mdt->mdt_identity_cache)) { + /* get identity info of this user */ + identity = mdt_identity_get(mdt->mdt_identity_cache, + uc->mu_fsuid); + if (!identity) { + CERROR("Deny access without identity: uid %d\n", + uc->mu_fsuid); + RETURN(-EACCES); + } + } + + uc->mu_valid = UCRED_OLD; + uc->mu_squash = SQUASH_NONE; + uc->mu_o_uid = uc->mu_o_fsuid = uc->mu_uid = uc->mu_fsuid; + uc->mu_o_gid = uc->mu_o_fsgid = uc->mu_gid = uc->mu_fsgid; + if (uc->mu_fsuid) + uc->mu_cap &= ~CAP_FS_MASK; + uc->mu_ginfo = NULL; + uc->mu_identity = identity; + + RETURN(0); +} + +static int nid_nosquash(struct mdt_device *mdt, lnet_nid_t nid) +{ + struct rootsquash_info *rsi = mdt->mdt_rootsquash_info; + int i; + + for (i = 0; i < rsi->rsi_n_nosquash_nids; i++) + if ((rsi->rsi_nosquash_nids[i] == nid) || + (rsi->rsi_nosquash_nids[i] == LNET_NID_ANY)) + return 1; + + return 0; +} + +static int mdt_squash_root(struct mdt_device *mdt, struct md_ucred *ucred, + struct ptlrpc_user_desc *pud, lnet_nid_t peernid) +{ + struct rootsquash_info *rsi = mdt->mdt_rootsquash_info; + + if (!rsi || (!rsi->rsi_uid && !rsi->rsi_gid) || + nid_nosquash(mdt, peernid)) + return 0; + + CDEBUG(D_SEC, "squash req from "LPX64":" + "(%u:%u-%u:%u/%x)=>(%u:%u-%u:%u/%x)\n", peernid, + pud->pud_uid, pud->pud_gid, + pud->pud_fsuid, pud->pud_fsgid, pud->pud_cap, + pud->pud_uid ? pud->pud_uid : rsi->rsi_uid, + pud->pud_uid ? pud->pud_gid : rsi->rsi_gid, + pud->pud_fsuid ? pud->pud_fsuid : rsi->rsi_uid, + pud->pud_fsuid ? pud->pud_fsgid : rsi->rsi_gid, + pud->pud_cap & ~CAP_FS_MASK); + + if (rsi->rsi_uid) { + if (!pud->pud_uid) { + ucred->mu_uid = rsi->rsi_uid; + ucred->mu_squash |= SQUASH_UID; + } else { + ucred->mu_uid = pud->pud_uid; + } + + if (!pud->pud_fsuid) { + ucred->mu_fsuid = rsi->rsi_uid; + ucred->mu_squash |= SQUASH_UID; + } else { + ucred->mu_fsuid = pud->pud_fsuid; + } + } else { + ucred->mu_uid = pud->pud_uid; + ucred->mu_fsuid = pud->pud_fsuid; + } + + if (rsi->rsi_gid) { + int i; + + if (!pud->pud_gid) { + ucred->mu_gid = rsi->rsi_gid; + ucred->mu_squash |= SQUASH_GID; + } else { + ucred->mu_gid = pud->pud_gid; + } + + if (!pud->pud_fsgid) { + ucred->mu_fsgid = rsi->rsi_gid; + ucred->mu_squash |= SQUASH_GID; + } else { + ucred->mu_fsgid = pud->pud_fsgid; + } + + for (i = 0; i < 2; i++) { + if (!ucred->mu_suppgids[i]) { + ucred->mu_suppgids[i] = rsi->rsi_gid; + ucred->mu_squash |= SQUASH_GID; + } + } + + for (i = 0; i < pud->pud_ngroups; i++) { + if (!pud->pud_groups[i]) { + pud->pud_groups[i] = rsi->rsi_gid; + ucred->mu_squash |= SQUASH_GID; + } + } + } else { + ucred->mu_gid = pud->pud_gid; + ucred->mu_fsgid = pud->pud_fsgid; + } + + return 1; +} + +static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, + void *buf) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_device *mdt = info->mti_mdt; + struct ptlrpc_user_desc *pud = req->rq_user_desc; + struct md_ucred *ucred = mdt_ucred(info); + struct mdt_identity *identity = NULL; + lnet_nid_t peernid = req->rq_peer.nid; + __u32 setxid_perm = 0; + int setuid; + int setgid; + int rc = 0; + + ENTRY; + + LASSERT(req->rq_auth_gss); + LASSERT(!req->rq_auth_usr_mdt); + LASSERT(req->rq_user_desc); + + ucred->mu_valid = UCRED_INVALID; + + ucred->mu_o_uid = pud->pud_uid; + ucred->mu_o_gid = pud->pud_gid; + ucred->mu_o_fsuid = pud->pud_fsuid; + ucred->mu_o_fsgid = pud->pud_fsgid; + + if (type == BODY_INIT) { + struct mdt_body *body = (struct mdt_body *)buf; + + ucred->mu_suppgids[0] = body->suppgid; + ucred->mu_suppgids[1] = -1; + } + + /* sanity check: we expect the uid which client claimed is true */ + if (med->med_rmtclient) { + if (req->rq_auth_mapped_uid == INVALID_UID) { + CWARN("remote user not mapped, deny access!\n"); + RETURN(-EACCES); + } + + if (ptlrpc_user_desc_do_idmap(req, pud)) + RETURN(-EACCES); + + if (req->rq_auth_mapped_uid != pud->pud_uid) { + CERROR("remote client "LPU64": auth uid %u " + "while client claim %u:%u/%u:%u\n", + peernid, req->rq_auth_uid, pud->pud_uid, + pud->pud_gid, pud->pud_fsuid, pud->pud_fsgid); + RETURN(-EACCES); + } + } else { + if (req->rq_auth_uid != pud->pud_uid) { + CERROR("local client "LPU64": auth uid %u " + "while client claim %u:%u/%u:%u\n", + peernid, req->rq_auth_uid, pud->pud_uid, + pud->pud_gid, pud->pud_fsuid, pud->pud_fsgid); + RETURN(-EACCES); + } + } + + if (is_identity_get_disabled(mdt->mdt_identity_cache)) { + if (med->med_rmtclient) { + CERROR("remote client must run with identity_get " + "enabled!\n"); + RETURN(-EACCES); + } else { + setxid_perm |= LUSTRE_SETGRP_PERM; + goto check_squash; + } + } + + identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid); + if (!identity) { + CERROR("Deny access without identity: uid %d\n", pud->pud_uid); + RETURN(-EACCES); + } + + setxid_perm = mdt_identity_get_setxid_perm(identity, + med->med_rmtclient, + peernid); + + /* find out the setuid/setgid attempt */ + setuid = (pud->pud_uid != pud->pud_fsuid); + setgid = (pud->pud_gid != pud->pud_fsgid || + pud->pud_gid != identity->mi_gid); + + /* check permission of setuid */ + if (setuid && !(setxid_perm & LUSTRE_SETUID_PERM)) { + CWARN("mdt blocked setuid attempt (%u -> %u) from " + LPX64"\n", pud->pud_uid, pud->pud_fsuid, peernid); + GOTO(out, rc = -EACCES); + } + + /* check permission of setgid */ + if (setgid && !(setxid_perm & LUSTRE_SETGID_PERM)) { + CWARN("mdt blocked setgid attempt (%u:%u/%u:%u -> %u) " + "from "LPX64"\n", pud->pud_uid, pud->pud_gid, + pud->pud_fsuid, pud->pud_fsgid, identity->mi_gid, + peernid); + GOTO(out, rc = -EACCES); + } + +check_squash: + /* FIXME: The exact behavior of root_squash is not defined. */ + ucred->mu_squash = SQUASH_NONE; + if (mdt_squash_root(mdt, ucred, pud, peernid) == 0) { + ucred->mu_uid = pud->pud_uid; + ucred->mu_gid = pud->pud_gid; + ucred->mu_fsuid = pud->pud_fsuid; + ucred->mu_fsgid = pud->pud_fsgid; + } + + /* remove fs privilege for non-root user */ + if (ucred->mu_fsuid) + ucred->mu_cap = pud->pud_cap & ~CAP_FS_MASK; + else + ucred->mu_cap = pud->pud_cap; + + /* + * NB: remote client not allowed to setgroups anyway. + */ + if (!med->med_rmtclient && pud->pud_ngroups && + (setxid_perm & LUSTRE_SETGRP_PERM)) { + struct group_info *ginfo; + + /* setgroups for local client */ + ginfo = groups_alloc(pud->pud_ngroups); + if (!ginfo) { + CERROR("failed to alloc %d groups\n", + pud->pud_ngroups); + GOTO(out, rc = -ENOMEM); + } + groups_from_list(ginfo, pud->pud_groups); + groups_sort(ginfo); + ucred->mu_ginfo = ginfo; + } else { + ucred->mu_ginfo = NULL; + } + + ucred->mu_identity = identity; + ucred->mu_valid = UCRED_NEW; + + EXIT; + +out: + if (rc && identity) + mdt_identity_put(mdt->mdt_identity_cache, identity); + + return rc; +} + +int mdt_check_ucred(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_device *mdt = info->mti_mdt; + struct ptlrpc_user_desc *pud = req->rq_user_desc; + struct md_ucred *ucred = mdt_ucred(info); + struct mdt_identity *identity; + lnet_nid_t peernid = req->rq_peer.nid; + + ENTRY; + + if ((ucred->mu_valid == UCRED_OLD) || (ucred->mu_valid == UCRED_NEW)) + RETURN(0); + + if (!req->rq_user_desc) + RETURN(0); + + /* sanity check: if we use strong authentication, we expect the + * uid which client claimed is true */ + if (req->rq_auth_gss) { + if (med->med_rmtclient) { + if (req->rq_auth_mapped_uid == INVALID_UID) { + CWARN("remote user not mapped, deny access!\n"); + RETURN(-EACCES); + } + + if (ptlrpc_user_desc_do_idmap(req, pud)) + RETURN(-EACCES); + + if (req->rq_auth_mapped_uid != pud->pud_uid) { + CERROR("remote client "LPU64": auth uid %u " + "while client claim %u:%u/%u:%u\n", + peernid, req->rq_auth_uid, pud->pud_uid, + pud->pud_gid, pud->pud_fsuid, + pud->pud_fsgid); + RETURN(-EACCES); + } + } else { + if (req->rq_auth_uid != pud->pud_uid) { + CERROR("local client "LPU64": auth uid %u " + "while client claim %u:%u/%u:%u\n", + peernid, req->rq_auth_uid, pud->pud_uid, + pud->pud_gid, pud->pud_fsuid, + pud->pud_fsgid); + RETURN(-EACCES); + } + } + } + + if (is_identity_get_disabled(mdt->mdt_identity_cache)) { + if (med->med_rmtclient) { + CERROR("remote client must run with " + "identity_get enabled!\n"); + RETURN(-EACCES); + } + } else { + identity = mdt_identity_get(mdt->mdt_identity_cache, + pud->pud_uid); + if (!identity) { + CERROR("Deny access without identity: uid %d\n", + pud->pud_uid); + RETURN(-EACCES); + } + + mdt_identity_put(mdt->mdt_identity_cache, identity); + } + + RETURN(0); +} + +int mdt_init_ucred(struct mdt_thread_info *info, struct mdt_body *body) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = mdt_ucred(info); + + if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW)) + return 0; + + mdt_exit_ucred(info); + + if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc) + return old_init_ucred(info, body); + else + return new_init_ucred(info, BODY_INIT, body); +} + +int mdt_init_ucred_reint(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = mdt_ucred(info); + + if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW)) + return 0; + + mdt_exit_ucred(info); + + if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc) + return old_init_ucred_reint(info); + else + return new_init_ucred(info, REC_INIT, NULL); +} + +/* copied from lov/lov_ea.c, just for debugging, will be removed later */ +void mdt_dump_lmm(int level, const struct lov_mds_md *lmm) +{ + const struct lov_ost_data_v1 *lod; + int i; + __s16 stripe_count = + le16_to_cpu(((struct lov_user_md*)lmm)->lmm_stripe_count); + + CDEBUG(level, "objid "LPX64", magic 0x%08X, pattern %#X\n", + le64_to_cpu(lmm->lmm_object_id), le32_to_cpu(lmm->lmm_magic), + le32_to_cpu(lmm->lmm_pattern)); + CDEBUG(level,"stripe_size=0x%x, stripe_count=0x%x\n", + le32_to_cpu(lmm->lmm_stripe_size), + le32_to_cpu(lmm->lmm_stripe_count)); + LASSERT(stripe_count <= (__s16)LOV_MAX_STRIPE_COUNT); + for (i = 0, lod = lmm->lmm_objects; i < stripe_count; i++, lod++) { + CDEBUG(level, "stripe %u idx %u subobj "LPX64"/"LPX64"\n", + i, le32_to_cpu(lod->l_ost_idx), + le64_to_cpu(lod->l_object_gr), + le64_to_cpu(lod->l_object_id)); + } +} + +void mdt_shrink_reply(struct mdt_thread_info *info) +{ + struct req_capsule *pill = &info->mti_pill; + struct mdt_body *body; + int acl_size, md_size, adjust = 0; + ENTRY; + + body = req_capsule_server_get(pill, &RMF_MDT_BODY); + LASSERT(body != NULL); + + if (body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE | OBD_MD_LINKNAME)) + md_size = body->eadatasize; + else + md_size = 0; + + acl_size = body->aclsize; + + CDEBUG(D_INFO, "Shrink to md_size = %d cookie/acl_size = %d" + " MDSCAPA = %d, OSSCAPA = %d\n", + md_size, acl_size, + (int)(body->valid & OBD_MD_FLMDSCAPA), + (int)(body->valid & OBD_MD_FLOSSCAPA)); +/* + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, or &RMF_LOGCOOKIES +(optional) &RMF_CAPA1, +(optional) &RMF_CAPA2, +(optional) something else +*/ + adjust += req_capsule_shrink(pill, &RMF_MDT_MD, + md_size, adjust, 1); + + if (req_capsule_has_field(pill, &RMF_ACL, RCL_SERVER)) + adjust += req_capsule_shrink(pill, &RMF_ACL, + acl_size, adjust, 1); + else if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) + adjust += req_capsule_shrink(pill, &RMF_LOGCOOKIES, + acl_size, adjust, 1); + + if ((req_capsule_has_field(pill, &RMF_CAPA1, RCL_SERVER) && + !(body->valid & OBD_MD_FLMDSCAPA))) + adjust += req_capsule_shrink(pill, &RMF_CAPA1, 0, adjust, 1); + + if ((req_capsule_has_field(pill, &RMF_CAPA2, RCL_SERVER) && + !(body->valid & OBD_MD_FLOSSCAPA))) + adjust += req_capsule_shrink(pill, &RMF_CAPA2, 0, adjust, 0); + + /* + * Some more field should be shrinked if needed. + * This should be done by those who added fields to reply message. + */ + EXIT; +} + + +/* if object is dying, pack the lov/llog data, + * parameter info->mti_attr should be valid at this point! */ +int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo, + const struct md_attr *ma) +{ + struct mdt_body *repbody; + const struct lu_attr *la = &ma->ma_attr; + ENTRY; + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(repbody != NULL); + + if (ma->ma_valid & MA_INODE) + mdt_pack_attr2body(info, repbody, la, mdt_object_fid(mo)); + + if (ma->ma_valid & MA_LOV) { + __u32 mode; + + if (mdt_object_exists(mo) < 0) + /* If it is a remote object, and we do not retrieve + * EA back unlink reg file*/ + mode = S_IFREG; + else + mode = lu_object_attr(&mo->mot_obj.mo_lu); + + LASSERT(ma->ma_lmm_size); + mdt_dump_lmm(D_INFO, ma->ma_lmm); + repbody->eadatasize = ma->ma_lmm_size; + if (S_ISREG(mode)) + repbody->valid |= OBD_MD_FLEASIZE; + else if (S_ISDIR(mode)) + repbody->valid |= OBD_MD_FLDIREA; + else + LBUG(); + } + + if (ma->ma_cookie_size && (ma->ma_valid & MA_COOKIE)) { + repbody->aclsize = ma->ma_cookie_size; + repbody->valid |= OBD_MD_FLCOOKIE; + } + + RETURN(0); +} + +static __u64 mdt_attr_valid_xlate(__u64 in, struct mdt_reint_record *rr, + struct md_attr *ma) +{ + __u64 out; + + out = 0; + if (in & ATTR_MODE) + out |= LA_MODE; + if (in & ATTR_UID) + out |= LA_UID; + if (in & ATTR_GID) + out |= LA_GID; + if (in & ATTR_SIZE) + out |= LA_SIZE; + if (in & ATTR_BLOCKS) + out |= LA_BLOCKS; + + if (in & ATTR_FROM_OPEN) + rr->rr_flags |= MRF_SETATTR_LOCKED; + + if (in & ATTR_ATIME_SET) + out |= LA_ATIME; + + if (in & ATTR_CTIME_SET) + out |= LA_CTIME; + + if (in & ATTR_MTIME_SET) + out |= LA_MTIME; + + if (in & ATTR_ATTR_FLAG) + out |= LA_FLAGS; + + if (in & MDS_OPEN_OWNEROVERRIDE) + out |= MDS_OPEN_OWNEROVERRIDE; + + /*XXX need ATTR_RAW?*/ + in &= ~(ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_BLOCKS| + ATTR_ATIME|ATTR_MTIME|ATTR_CTIME|ATTR_FROM_OPEN| + ATTR_ATIME_SET|ATTR_CTIME_SET|ATTR_MTIME_SET| + ATTR_ATTR_FLAG|ATTR_RAW|MDS_OPEN_OWNEROVERRIDE); + if (in != 0) + CERROR("Unknown attr bits: %#llx\n", in); + return out; +} +/* unpacking */ + +static int mdt_setattr_unpack_rec(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct md_attr *ma = &info->mti_attr; + struct lu_attr *la = &ma->ma_attr; + struct req_capsule *pill = &info->mti_pill; + struct mdt_reint_record *rr = &info->mti_rr; + struct mdt_rec_setattr *rec; + ENTRY; + + rec = req_capsule_client_get(pill, &RMF_REC_SETATTR); + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->sa_fsuid; + uc->mu_fsgid = rec->sa_fsgid; + uc->mu_cap = rec->sa_cap; + uc->mu_suppgids[0] = rec->sa_suppgid; + uc->mu_suppgids[1] = -1; + + rr->rr_fid1 = &rec->sa_fid; + la->la_valid = mdt_attr_valid_xlate(rec->sa_valid, rr, ma); + la->la_mode = rec->sa_mode; + la->la_flags = rec->sa_attr_flags; + la->la_uid = rec->sa_uid; + la->la_gid = rec->sa_gid; + la->la_size = rec->sa_size; + la->la_blocks = rec->sa_blocks; + la->la_ctime = rec->sa_ctime; + la->la_atime = rec->sa_atime; + la->la_mtime = rec->sa_mtime; + ma->ma_valid = MA_INODE; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + mdt_set_capainfo(info, 0, rr->rr_fid1, + req_capsule_client_get(pill, &RMF_CAPA1)); + + RETURN(0); +} + +static int mdt_epoch_unpack(struct mdt_thread_info *info) +{ + struct req_capsule *pill = &info->mti_pill; + ENTRY; + + if (req_capsule_get_size(pill, &RMF_MDT_EPOCH, RCL_CLIENT)) + info->mti_epoch = req_capsule_client_get(pill, &RMF_MDT_EPOCH); + else + info->mti_epoch = NULL; + RETURN(info->mti_epoch == NULL ? -EFAULT : 0); +} + +static int mdt_setattr_unpack(struct mdt_thread_info *info) +{ + struct md_attr *ma = &info->mti_attr; + struct req_capsule *pill = &info->mti_pill; + int rc; + ENTRY; + + rc = mdt_setattr_unpack_rec(info); + if (rc) + RETURN(rc); + + /* Epoch may be absent */ + mdt_epoch_unpack(info); + + if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) { + ma->ma_lmm = req_capsule_client_get(pill, &RMF_EADATA); + ma->ma_lmm_size = req_capsule_get_size(pill, &RMF_EADATA, + RCL_CLIENT); + ma->ma_valid |= MA_LOV; + } + + if (req_capsule_field_present(pill, &RMF_LOGCOOKIES, RCL_CLIENT)) { + ma->ma_cookie = req_capsule_client_get(pill, + &RMF_LOGCOOKIES); + ma->ma_cookie_size = req_capsule_get_size(pill, + &RMF_LOGCOOKIES, + RCL_CLIENT); + ma->ma_valid |= MA_COOKIE; + } + + RETURN(0); +} + +int mdt_close_unpack(struct mdt_thread_info *info) +{ + int rc; + ENTRY; + + rc = mdt_epoch_unpack(info); + if (rc) + RETURN(rc); + + RETURN(mdt_setattr_unpack_rec(info)); +} + +static int mdt_create_unpack(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_rec_create *rec; + struct lu_attr *attr = &info->mti_attr.ma_attr; + struct mdt_reint_record *rr = &info->mti_rr; + struct req_capsule *pill = &info->mti_pill; + struct md_op_spec *sp = &info->mti_spec; + ENTRY; + + rec = req_capsule_client_get(pill, &RMF_REC_CREATE); + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->cr_fsuid; + uc->mu_fsgid = rec->cr_fsgid; + uc->mu_cap = rec->cr_cap; + uc->mu_suppgids[0] = rec->cr_suppgid1; + uc->mu_suppgids[1] = -1; + + rr->rr_fid1 = &rec->cr_fid1; + rr->rr_fid2 = &rec->cr_fid2; + attr->la_mode = rec->cr_mode; + attr->la_rdev = rec->cr_rdev; + attr->la_uid = rec->cr_fsuid; + attr->la_gid = rec->cr_fsgid; + attr->la_ctime = rec->cr_time; + attr->la_mtime = rec->cr_time; + attr->la_atime = rec->cr_time; + attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID | + LA_CTIME | LA_MTIME | LA_ATIME; + memset(&sp->u, 0, sizeof(sp->u)); + sp->sp_cr_flags = rec->cr_flags; + sp->sp_ck_split = !!(rec->cr_bias & MDS_CHECK_SPLIT); + info->mti_cross_ref = !!(rec->cr_bias & MDS_CROSS_REF); + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + mdt_set_capainfo(info, 0, rr->rr_fid1, + req_capsule_client_get(pill, &RMF_CAPA1)); + mdt_set_capainfo(info, 1, rr->rr_fid2, BYPASS_CAPA); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1; + LASSERT(rr->rr_namelen > 0); + +#ifdef CONFIG_FS_POSIX_ACL + if (sp->sp_cr_flags & MDS_CREATE_RMT_ACL) { + if (S_ISDIR(attr->la_mode)) + sp->u.sp_pfid = rr->rr_fid1; + req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_RMT_ACL); + LASSERT(req_capsule_field_present(pill, &RMF_EADATA, + RCL_CLIENT)); + sp->u.sp_ea.eadata = req_capsule_client_get(pill, &RMF_EADATA); + sp->u.sp_ea.eadatalen = req_capsule_get_size(pill, &RMF_EADATA, + RCL_CLIENT); + sp->u.sp_ea.fid = rr->rr_fid1; + RETURN(0); + } +#endif + if (S_ISDIR(attr->la_mode)) { + /* pass parent fid for cross-ref cases */ + sp->u.sp_pfid = rr->rr_fid1; + if (sp->sp_cr_flags & MDS_CREATE_SLAVE_OBJ) { + /* create salve object req, need + * unpack split ea here + */ + req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SLAVE); + LASSERT(req_capsule_field_present(pill, &RMF_EADATA, + RCL_CLIENT)); + sp->u.sp_ea.eadata = req_capsule_client_get(pill, + &RMF_EADATA); + sp->u.sp_ea.eadatalen = req_capsule_get_size(pill, + &RMF_EADATA, + RCL_CLIENT); + sp->u.sp_ea.fid = rr->rr_fid1; + } + } else if (S_ISLNK(attr->la_mode)) { + const char *tgt = NULL; + + req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SYM); + if (req_capsule_field_present(pill, &RMF_SYMTGT, RCL_CLIENT)) { + tgt = req_capsule_client_get(pill, &RMF_SYMTGT); + sp->u.sp_symname = tgt; + } + if (tgt == NULL) + RETURN(-EFAULT); + } + RETURN(0); +} + +static int mdt_link_unpack(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_rec_link *rec; + struct lu_attr *attr = &info->mti_attr.ma_attr; + struct mdt_reint_record *rr = &info->mti_rr; + struct req_capsule *pill = &info->mti_pill; + ENTRY; + + rec = req_capsule_client_get(pill, &RMF_REC_LINK); + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->lk_fsuid; + uc->mu_fsgid = rec->lk_fsgid; + uc->mu_cap = rec->lk_cap; + uc->mu_suppgids[0] = rec->lk_suppgid1; + uc->mu_suppgids[1] = rec->lk_suppgid2; + + attr->la_uid = rec->lk_fsuid; + attr->la_gid = rec->lk_fsgid; + rr->rr_fid1 = &rec->lk_fid1; + rr->rr_fid2 = &rec->lk_fid2; + attr->la_ctime = rec->lk_time; + attr->la_mtime = rec->lk_time; + attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + mdt_set_capainfo(info, 0, rr->rr_fid1, + req_capsule_client_get(pill, &RMF_CAPA1)); + if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT)) + mdt_set_capainfo(info, 1, rr->rr_fid2, + req_capsule_client_get(pill, &RMF_CAPA2)); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + if (rr->rr_name == NULL) + RETURN(-EFAULT); + rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1; + LASSERT(rr->rr_namelen > 0); + info->mti_spec.sp_ck_split = !!(rec->lk_bias & MDS_CHECK_SPLIT); + info->mti_cross_ref = !!(rec->lk_bias & MDS_CROSS_REF); + + RETURN(0); +} + +static int mdt_unlink_unpack(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_rec_unlink *rec; + struct md_attr *ma = &info->mti_attr; + struct lu_attr *attr = &info->mti_attr.ma_attr; + struct mdt_reint_record *rr = &info->mti_rr; + struct req_capsule *pill = &info->mti_pill; + ENTRY; + + rec = req_capsule_client_get(pill, &RMF_REC_UNLINK); + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->ul_fsuid; + uc->mu_fsgid = rec->ul_fsgid; + uc->mu_cap = rec->ul_cap; + uc->mu_suppgids[0] = rec->ul_suppgid; + uc->mu_suppgids[1] = -1; + + attr->la_uid = rec->ul_fsuid; + attr->la_gid = rec->ul_fsgid; + rr->rr_fid1 = &rec->ul_fid1; + rr->rr_fid2 = &rec->ul_fid2; + attr->la_ctime = rec->ul_time; + attr->la_mtime = rec->ul_time; + attr->la_mode = rec->ul_mode; + attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + mdt_set_capainfo(info, 0, rr->rr_fid1, + req_capsule_client_get(pill, &RMF_CAPA1)); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + if (rr->rr_name == NULL) + RETURN(-EFAULT); + rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1; + LASSERT(rr->rr_namelen > 0); + info->mti_spec.sp_ck_split = !!(rec->ul_bias & MDS_CHECK_SPLIT); + info->mti_cross_ref = !!(rec->ul_bias & MDS_CROSS_REF); + if (rec->ul_bias & MDS_VTX_BYPASS) + ma->ma_attr_flags |= MDS_VTX_BYPASS; + else + ma->ma_attr_flags &= ~MDS_VTX_BYPASS; + + RETURN(0); +} + +static int mdt_rename_unpack(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_rec_rename *rec; + struct md_attr *ma = &info->mti_attr; + struct lu_attr *attr = &info->mti_attr.ma_attr; + struct mdt_reint_record *rr = &info->mti_rr; + struct req_capsule *pill = &info->mti_pill; + ENTRY; + + rec = req_capsule_client_get(pill, &RMF_REC_RENAME); + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->rn_fsuid; + uc->mu_fsgid = rec->rn_fsgid; + uc->mu_cap = rec->rn_cap; + uc->mu_suppgids[0] = rec->rn_suppgid1; + uc->mu_suppgids[1] = rec->rn_suppgid2; + + attr->la_uid = rec->rn_fsuid; + attr->la_gid = rec->rn_fsgid; + rr->rr_fid1 = &rec->rn_fid1; + rr->rr_fid2 = &rec->rn_fid2; + attr->la_ctime = rec->rn_time; + attr->la_mtime = rec->rn_time; + /* rename_tgt contains the mode already */ + attr->la_mode = rec->rn_mode; + attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + mdt_set_capainfo(info, 0, rr->rr_fid1, + req_capsule_client_get(pill, &RMF_CAPA1)); + if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT)) + mdt_set_capainfo(info, 1, rr->rr_fid2, + req_capsule_client_get(pill, &RMF_CAPA2)); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT); + if (rr->rr_name == NULL || rr->rr_tgt == NULL) + RETURN(-EFAULT); + rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1; + LASSERT(rr->rr_namelen > 0); + rr->rr_tgtlen = req_capsule_get_size(pill, &RMF_SYMTGT, RCL_CLIENT) - 1; + LASSERT(rr->rr_tgtlen > 0); + info->mti_spec.sp_ck_split = !!(rec->rn_bias & MDS_CHECK_SPLIT); + info->mti_cross_ref = !!(rec->rn_bias & MDS_CROSS_REF); + if (rec->rn_bias & MDS_VTX_BYPASS) + ma->ma_attr_flags |= MDS_VTX_BYPASS; + else + ma->ma_attr_flags &= ~MDS_VTX_BYPASS; + + RETURN(0); +} + +static int mdt_open_unpack(struct mdt_thread_info *info) +{ + struct md_ucred *uc = mdt_ucred(info); + struct mdt_rec_create *rec; + struct lu_attr *attr = &info->mti_attr.ma_attr; + struct req_capsule *pill = &info->mti_pill; + struct mdt_reint_record *rr = &info->mti_rr; + struct ptlrpc_request *req = mdt_info_req(info); + ENTRY; + + rec = req_capsule_client_get(pill, &RMF_REC_CREATE); + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->cr_fsuid; + uc->mu_fsgid = rec->cr_fsgid; + uc->mu_cap = rec->cr_cap; + uc->mu_suppgids[0] = rec->cr_suppgid1; + uc->mu_suppgids[1] = rec->cr_suppgid2; + + rr->rr_fid1 = &rec->cr_fid1; + rr->rr_fid2 = &rec->cr_fid2; + rr->rr_handle = &rec->cr_old_handle; + attr->la_mode = rec->cr_mode; + attr->la_rdev = rec->cr_rdev; + attr->la_uid = rec->cr_fsuid; + attr->la_gid = rec->cr_fsgid; + attr->la_ctime = rec->cr_time; + attr->la_mtime = rec->cr_time; + attr->la_atime = rec->cr_time; + attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID | + LA_CTIME | LA_MTIME | LA_ATIME; + memset(&info->mti_spec.u, 0, sizeof(info->mti_spec.u)); + info->mti_spec.sp_cr_flags = rec->cr_flags; + info->mti_replayepoch = rec->cr_ioepoch; + + info->mti_spec.sp_ck_split = !!(rec->cr_bias & MDS_CHECK_SPLIT); + info->mti_cross_ref = !!(rec->cr_bias & MDS_CROSS_REF); + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + mdt_set_capainfo(info, 0, rr->rr_fid1, + req_capsule_client_get(pill, &RMF_CAPA1)); + if ((lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && + (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))) { +#if 0 + mdt_set_capainfo(info, 1, rr->rr_fid2, + req_capsule_client_get(pill, &RMF_CAPA2)); +#else + /* + * FIXME: capa in replay open request might have expired, + * bypass capa check. Security hole? + */ + mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA); + mdt_set_capainfo(info, 1, rr->rr_fid2, BYPASS_CAPA); +#endif + } + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + if (rr->rr_name == NULL) + RETURN(-EFAULT); + rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1; + LASSERT(rr->rr_namelen > 0); + + if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) { + struct md_op_spec *sp = &info->mti_spec; + sp->u.sp_ea.eadata = req_capsule_client_get(pill, + &RMF_EADATA); + sp->u.sp_ea.eadatalen = req_capsule_get_size(pill, + &RMF_EADATA, + RCL_CLIENT); + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) + sp->u.sp_ea.no_lov_create = 1; + } + + RETURN(0); +} + +typedef int (*reint_unpacker)(struct mdt_thread_info *info); + +static reint_unpacker mdt_reint_unpackers[REINT_MAX] = { + [REINT_SETATTR] = mdt_setattr_unpack, + [REINT_CREATE] = mdt_create_unpack, + [REINT_LINK] = mdt_link_unpack, + [REINT_UNLINK] = mdt_unlink_unpack, + [REINT_RENAME] = mdt_rename_unpack, + [REINT_OPEN] = mdt_open_unpack +}; + +int mdt_reint_unpack(struct mdt_thread_info *info, __u32 op) +{ + int rc; + ENTRY; + + memset(&info->mti_rr, 0, sizeof(info->mti_rr)); + if (op < REINT_MAX && mdt_reint_unpackers[op] != NULL) { + info->mti_rr.rr_opcode = op; + rc = mdt_reint_unpackers[op](info); + } else { + CERROR("Unexpected opcode %d\n", op); + rc = -EFAULT; + } + RETURN(rc); +} diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c new file mode 100644 index 0000000..d2517d3 --- /dev/null +++ b/lustre/mdt/mdt_lproc.c @@ -0,0 +1,763 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * Author: Lai Siyao <lsy@clusterfs.com> + * Author: Fan Yong <fanyong@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/version.h> +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +#include <asm/statfs.h> +#endif + +#include <linux/module.h> + +/* LUSTRE_VERSION_CODE */ +#include <lustre_ver.h> +/* + * struct OBD_{ALLOC,FREE}*() + * MDT_FAIL_CHECK + */ +#include <obd_support.h> +/* struct obd_export */ +#include <lustre_export.h> +/* struct obd_device */ +#include <obd.h> +#include <obd_class.h> +#include <lustre_mds.h> +#include <lustre_mdt.h> +#include <lprocfs_status.h> +#include <lu_time.h> +#include "mdt_internal.h" + +static const char *mdt_proc_names[LPROC_MDT_NR] = { +}; + +int mdt_procfs_init(struct mdt_device *mdt, const char *name) +{ + struct lu_device *ld = &mdt->mdt_md_dev.md_lu_dev; + int result; + ENTRY; + + LASSERT(name != NULL); + mdt->mdt_proc_entry = ld->ld_obd->obd_proc_entry; + LASSERT(mdt->mdt_proc_entry != NULL); + + result = lu_time_init(&mdt->mdt_stats, mdt->mdt_proc_entry, + mdt_proc_names, ARRAY_SIZE(mdt_proc_names)); + if (result == 0) + result = lu_time_named_init(&ld->ld_site->ls_time_stats, + "site_time", mdt->mdt_proc_entry, + lu_time_names, + ARRAY_SIZE(lu_time_names)); + RETURN(result); +} + +int mdt_procfs_fini(struct mdt_device *mdt) +{ + struct lu_device *ld = &mdt->mdt_md_dev.md_lu_dev; + lu_time_fini(&ld->ld_site->ls_time_stats); + lu_time_fini(&mdt->mdt_stats); + mdt->mdt_proc_entry = NULL; + RETURN(0); +} + +void mdt_time_start(const struct mdt_thread_info *info) +{ + lu_lprocfs_time_start(info->mti_env); +} + +void mdt_time_end(const struct mdt_thread_info *info, int idx) +{ + lu_lprocfs_time_end(info->mti_env, info->mti_mdt->mdt_stats, idx); +} + +static int lprocfs_rd_identity_expire(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + *eof = 1; + return snprintf(page, count, "%lu\n", + mdt->mdt_identity_cache->uc_entry_expire / HZ); +} + +static int lprocfs_wr_identity_expire(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int rc, val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + mdt->mdt_identity_cache->uc_entry_expire = val * HZ; + return count; +} + +static int lprocfs_rd_identity_acquire_expire(char *page, char **start, + off_t off, int count, int *eof, + void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + *eof = 1; + return snprintf(page, count, "%lu\n", + mdt->mdt_identity_cache->uc_acquire_expire / HZ); +} + +static int lprocfs_wr_identity_acquire_expire(struct file *file, + const char *buffer, + unsigned long count, + void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int rc, val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + mdt->mdt_identity_cache->uc_acquire_expire = val * HZ; + return count; +} + +static int lprocfs_rd_identity_upcall(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + *eof = 1; + return snprintf(page, count, "%s\n", + mdt->mdt_identity_cache->uc_upcall); +} + +static int lprocfs_wr_identity_upcall(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct upcall_cache *hash = mdt->mdt_identity_cache; + char kernbuf[UC_CACHE_UPCALL_MAXPATH] = { '\0' }; + + if (count >= UC_CACHE_UPCALL_MAXPATH) { + CERROR("%s: identity upcall too long\n", obd->obd_name); + return -EINVAL; + } + + if (copy_from_user(kernbuf, buffer, + min(count, UC_CACHE_UPCALL_MAXPATH - 1))) + return -EFAULT; + + /* Remove any extraneous bits from the upcall (e.g. linefeeds) */ + sscanf(kernbuf, "%s", hash->uc_upcall); + + if (strcmp(hash->uc_name, obd->obd_name) != 0) + CWARN("%s: write to upcall name %s\n", + obd->obd_name, hash->uc_upcall); + CWARN("%s: identity upcall set to %s\n", obd->obd_name, hash->uc_upcall); + + return count; +} + +static int lprocfs_wr_identity_flush(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int rc, uid; + + rc = lprocfs_write_helper(buffer, count, &uid); + if (rc) + return rc; + + mdt_flush_identity(mdt->mdt_identity_cache, uid); + return count; +} + +static int lprocfs_wr_identity_info(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct identity_downcall_data sparam, *param = &sparam; + int size = 0, rc = 0; + + if (count < sizeof(*param)) { + CERROR("%s: invalid data size %lu\n", obd->obd_name, count); + return count; + } + + if (copy_from_user(&sparam, buffer, sizeof(sparam))) { + CERROR("%s: bad identity data\n", obd->obd_name); + GOTO(out, rc = -EFAULT); + } + + if (sparam.idd_magic != IDENTITY_DOWNCALL_MAGIC) { + CERROR("%s: MDS identity downcall bad params\n", obd->obd_name); + GOTO(out, rc = -EINVAL); + } + + if (sparam.idd_nperms > N_SETXID_PERMS_MAX) { + CERROR("%s: perm count %d more than maximum %d\n", + obd->obd_name, sparam.idd_nperms, N_SETXID_PERMS_MAX); + GOTO(out, rc = -EINVAL); + } + + if (sparam.idd_ngroups > NGROUPS_MAX) { + CERROR("%s: group count %d more than maximum %d\n", + obd->obd_name, sparam.idd_ngroups, NGROUPS_MAX); + GOTO(out, rc = -EINVAL); + } + + if (sparam.idd_ngroups) { + size = offsetof(struct identity_downcall_data, + idd_groups[sparam.idd_ngroups]); + OBD_ALLOC(param, size); + if (!param) { + CERROR("%s: fail to alloc %d bytes for uid %u" + " with %d groups\n", obd->obd_name, size, + sparam.idd_uid, sparam.idd_ngroups); + param = &sparam; + param->idd_ngroups = 0; + } else if (copy_from_user(param, buffer, size)) { + CERROR("%s: uid %u bad supplementary group data\n", + obd->obd_name, sparam.idd_uid); + OBD_FREE(param, size); + param = &sparam; + param->idd_ngroups = 0; + } + } + + rc = upcall_cache_downcall(mdt->mdt_identity_cache, param->idd_err, + param->idd_uid, param); + +out: + if (param && (param != &sparam)) + OBD_FREE(param, size); + + return rc ?: count; +} + +static int lprocfs_rd_rmtacl_expire(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + *eof = 1; + return snprintf(page, count, "%lu\n", + mdt->mdt_rmtacl_cache->uc_entry_expire / HZ); +} + +static int lprocfs_wr_rmtacl_expire(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int rc, val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + mdt->mdt_rmtacl_cache->uc_entry_expire = val * HZ; + return count; +} + +static int lprocfs_rd_rmtacl_acquire_expire(char *page, char **start, + off_t off, int count, int *eof, + void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + *eof = 1; + return snprintf(page, count, "%lu\n", + mdt->mdt_rmtacl_cache->uc_acquire_expire / HZ); +} + +static int lprocfs_wr_rmtacl_acquire_expire(struct file *file, + const char *buffer, + unsigned long count, + void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int rc, val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + mdt->mdt_rmtacl_cache->uc_acquire_expire = val * HZ; + return count; +} + +static int lprocfs_rd_rmtacl_upcall(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + *eof = 1; + return snprintf(page, count, "%s\n", + mdt->mdt_rmtacl_cache->uc_upcall); +} + +static int lprocfs_wr_rmtacl_upcall(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct upcall_cache *hash = mdt->mdt_rmtacl_cache; + char kernbuf[UC_CACHE_UPCALL_MAXPATH] = { '\0' }; + + if (count >= UC_CACHE_UPCALL_MAXPATH) { + CERROR("%s: remote ACL upcall too long\n", obd->obd_name); + return -EINVAL; + } + + if (copy_from_user(kernbuf, buffer, + min(count, UC_CACHE_UPCALL_MAXPATH - 1))) + return -EFAULT; + + /* Remove any extraneous bits from the upcall (e.g. linefeeds) */ + sscanf(kernbuf, "%s", hash->uc_upcall); + + if (strcmp(hash->uc_name, obd->obd_name) != 0) + CWARN("%s: write to upcall name %s\n", + obd->obd_name, hash->uc_upcall); + CWARN("%s: remote ACL upcall set to %s\n", obd->obd_name, hash->uc_upcall); + + return count; +} + +static int lprocfs_wr_rmtacl_info(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct rmtacl_downcall_data sparam, *param = &sparam; + int size = 0, rc = 0; + + if (count < sizeof(*param)) { + CERROR("%s: invalid data size %lu\n", obd->obd_name, count); + return count; + } + + if (copy_from_user(&sparam, buffer, sizeof(sparam))) { + CERROR("%s: bad remote acl data\n", obd->obd_name); + GOTO(out, rc = -EFAULT); + } + + if (sparam.add_magic != RMTACL_DOWNCALL_MAGIC) { + CERROR("%s: MDT remote acl downcall bad params\n", obd->obd_name); + GOTO(out, rc = -EINVAL); + } + + if (sparam.add_buflen) { + size = offsetof(struct rmtacl_downcall_data, + add_buf[sparam.add_buflen]); + OBD_ALLOC(param, size); + if (!param) { + CERROR("%s: fail to alloc %d bytes for ino "LPU64"\n", + obd->obd_name, size, sparam.add_key); + param = &sparam; + param->add_buflen = 0; + } else if (copy_from_user(param, buffer, size)) { + CERROR("%s: ino "LPU64" bad remote acl data\n", + obd->obd_name, sparam.add_key); + OBD_FREE(param, size); + param = &sparam; + param->add_buflen = 0; + } + } + + rc = upcall_cache_downcall(mdt->mdt_rmtacl_cache, 0, param->add_key, + param); + +out: + if (param && (param != &sparam)) + OBD_FREE(param, size); + + return rc ?: count; +} + +static int lprocfs_rd_rootsquash_uid(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct rootsquash_info *rsi = mdt->mdt_rootsquash_info; + + *eof = 1; + return snprintf(page, count, "%u\n", + rsi ? rsi->rsi_uid : 0); +} + +static int lprocfs_wr_rootsquash_uid(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (!mdt->mdt_rootsquash_info) + OBD_ALLOC_PTR(mdt->mdt_rootsquash_info); + if (!mdt->mdt_rootsquash_info) + return -ENOMEM; + + mdt->mdt_rootsquash_info->rsi_uid = val; + return count; +} + +static int lprocfs_rd_rootsquash_gid(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct rootsquash_info *rsi = mdt->mdt_rootsquash_info; + + *eof = 1; + return snprintf(page, count, "%u\n", + rsi ? rsi->rsi_gid : 0); +} + +static int lprocfs_wr_rootsquash_gid(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (!mdt->mdt_rootsquash_info) + OBD_ALLOC_PTR(mdt->mdt_rootsquash_info); + if (!mdt->mdt_rootsquash_info) + return -ENOMEM; + + mdt->mdt_rootsquash_info->rsi_gid = val; + return count; +} + +static int lprocfs_rd_nosquash_nids(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct rootsquash_info *rsi = mdt->mdt_rootsquash_info; + int i, ret; + + ret = snprintf(page, count, "rootsquash skip list:\n"); + for (i = 0; rsi && (i < rsi->rsi_n_nosquash_nids); i++) { + ret += snprintf(page + ret, count - ret, "%s\n", + libcfs_nid2str(rsi->rsi_nosquash_nids[i])); + } + + *eof = 1; + return ret; +} + +static inline void remove_newline(char *str) +{ + int len = strlen(str); + + if (str[len - 1] == '\n') + str[len - 1] = '\0'; +} + +/* XXX: This macro is copied from lnet/libcfs/nidstring.c */ +#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ + +static void do_process_nosquash_nids(struct mdt_device *m, char *buf) +{ + struct rootsquash_info *rsi = m->mdt_rootsquash_info; + char str[LNET_NIDSTR_SIZE], *end; + lnet_nid_t nid; + + LASSERT(rsi); + rsi->rsi_n_nosquash_nids = 0; + while (rsi->rsi_n_nosquash_nids < N_NOSQUASH_NIDS) { + end = strchr(buf, ','); + memset(str, 0, sizeof(str)); + if (end) + strncpy(str, buf, min_t(int, sizeof(str), end - buf)); + else + strncpy(str, buf, min_t(int, sizeof(str), strlen(buf))); + + if (!strcmp(str, "*")) { + nid = LNET_NID_ANY; + } else { + nid = libcfs_str2nid(str); + if (nid == LNET_NID_ANY) + goto ignore; + } + rsi->rsi_nosquash_nids[rsi->rsi_n_nosquash_nids++] = nid; +ignore: + if (!end || (*(end + 1) == 0)) + return; + buf = end + 1; + } +} + +static int lprocfs_wr_nosquash_nids(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + char skips[LNET_NIDSTR_SIZE * N_NOSQUASH_NIDS] = ""; + unsigned long size = sizeof(skips); + + if (count > size) { + CERROR("parameter exceeds max limit %lu\n", size); + return -EINVAL; + } + + if (copy_from_user(skips, buffer, min(size, count))) + return -EFAULT; + + if (!mdt->mdt_rootsquash_info) + OBD_ALLOC_PTR(mdt->mdt_rootsquash_info); + if (!mdt->mdt_rootsquash_info) + return -ENOMEM; + + remove_newline(skips); + do_process_nosquash_nids(mdt, skips); + return count; +} + +/* for debug only */ +static int lprocfs_rd_capa(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + return snprintf(page, count, "capability on: %s %s\n", + mdt->mdt_opts.mo_oss_capa ? "oss" : "", + mdt->mdt_opts.mo_mds_capa ? "mds" : ""); +} + +static int lprocfs_wr_capa(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 0 || val > 3) { + CERROR("invalid capability mode, only 0/2/3 is accepted.\n" + " 0: disable fid capability\n" + " 2: enable MDS fid capability\n" + " 3: enable both MDS and OSS fid capability\n"); + return -EINVAL; + } + + /* OSS fid capability needs enable both MDS and OSS fid capability on + * MDS */ + if (val == 1) { + CERROR("can't enable OSS fid capability only, you should use " + "'3' to enable both MDS and OSS fid capability.\n"); + return -EINVAL; + } + + mdt->mdt_opts.mo_oss_capa = (val & 0x1); + mdt->mdt_opts.mo_mds_capa = !!(val & 0x2); + mdt->mdt_capa_conf = 1; + LCONSOLE_INFO("MDS %s %s MDS fid capability.\n", + obd->obd_name, + mdt->mdt_opts.mo_mds_capa ? "enabled" : "disabled"); + LCONSOLE_INFO("MDS %s %s OSS fid capability.\n", + obd->obd_name, + mdt->mdt_opts.mo_oss_capa ? "enabled" : "disabled"); + return count; +} + +static int lprocfs_rd_capa_count(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + return snprintf(page, count, "%d %d\n", + capa_count[CAPA_SITE_CLIENT], + capa_count[CAPA_SITE_SERVER]); +} + +static int lprocfs_rd_site_stats(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + struct lu_site *s = mdt->mdt_md_dev.md_lu_dev.ld_site; + int i; + int populated; + + /* + * How many hash buckets are not-empty? Don't bother with locks: it's + * an estimation anyway. + */ + for (i = 0, populated = 0; i < s->ls_hash_size; i++) + populated += !hlist_empty(&s->ls_hash[i]); + + return snprintf(page, count, "%d %d %d/%d %d %d %d %d %d %d\n", + s->ls_total, + s->ls_busy, + populated, + s->ls_hash_size, + s->ls_stats.s_created, + s->ls_stats.s_cache_hit, + s->ls_stats.s_cache_miss, + s->ls_stats.s_cache_check, + s->ls_stats.s_cache_race, + s->ls_stats.s_lru_purged); +} + +static int lprocfs_rd_capa_timeout(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + return snprintf(page, count, "%lu\n", mdt->mdt_capa_timeout); +} + +static int lprocfs_wr_capa_timeout(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + mdt->mdt_capa_timeout = (unsigned long)val; + mdt->mdt_capa_conf = 1; + return count; +} + +static int lprocfs_rd_ck_timeout(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + return snprintf(page, count, "%lu\n", mdt->mdt_ck_timeout); +} + +static int lprocfs_wr_ck_timeout(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + mdt->mdt_ck_timeout = (unsigned long)val; + mdt->mdt_capa_conf = 1; + return count; +} + +static int lprocfs_mdt_wr_evict_client(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char tmpbuf[sizeof(struct obd_uuid)]; + + sscanf(buffer, "%40s", tmpbuf); + + if (strncmp(tmpbuf, "nid:", 4) != 0) + return lprocfs_wr_evict_client(file, buffer, count, data); + + CERROR("NOT implement evict client by nid %s\n", tmpbuf); + + return count; +} + +static struct lprocfs_vars lprocfs_mdt_obd_vars[] = { + { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, + { "num_exports", lprocfs_rd_num_exports, 0, 0 }, + { "identity_expire", lprocfs_rd_identity_expire, + lprocfs_wr_identity_expire, 0 }, + { "identity_acquire_expire", lprocfs_rd_identity_acquire_expire, + lprocfs_wr_identity_acquire_expire, 0 }, + { "identity_upcall", lprocfs_rd_identity_upcall, + lprocfs_wr_identity_upcall, 0 }, + { "identity_flush", 0, lprocfs_wr_identity_flush, 0 }, + { "identity_info", 0, lprocfs_wr_identity_info, 0 }, + { "rmtacl_expire", lprocfs_rd_rmtacl_expire, + lprocfs_wr_rmtacl_expire, 0 }, + { "rmtacl_acquire_expire", lprocfs_rd_rmtacl_acquire_expire, + lprocfs_wr_rmtacl_acquire_expire, 0 }, + { "rmtacl_upcall", lprocfs_rd_rmtacl_upcall, + lprocfs_wr_rmtacl_upcall, 0 }, + { "rmtacl_info", 0, lprocfs_wr_rmtacl_info, 0 }, + { "rootsquash_uid", lprocfs_rd_rootsquash_uid, + lprocfs_wr_rootsquash_uid, 0 }, + { "rootsquash_gid", lprocfs_rd_rootsquash_gid, + lprocfs_wr_rootsquash_gid, 0 }, + { "nosquash_nids", lprocfs_rd_nosquash_nids, + lprocfs_wr_nosquash_nids, 0 }, + { "capa", lprocfs_rd_capa, + lprocfs_wr_capa, 0 }, + { "capa_timeout", lprocfs_rd_capa_timeout, + lprocfs_wr_capa_timeout, 0 }, + { "capa_key_timeout", lprocfs_rd_ck_timeout, + lprocfs_wr_ck_timeout, 0 }, + { "capa_count", lprocfs_rd_capa_count, 0, 0 }, + { "site_stats", lprocfs_rd_site_stats, 0, 0 }, + { "evict_client", 0, lprocfs_mdt_wr_evict_client, 0 }, + { 0 } +}; + +static struct lprocfs_vars lprocfs_mdt_module_vars[] = { + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, + { 0 } +}; + +LPROCFS_INIT_VARS(mdt, lprocfs_mdt_module_vars, lprocfs_mdt_obd_vars); diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c new file mode 100644 index 0000000..fe4b6f3 --- /dev/null +++ b/lustre/mdt/mdt_open.c @@ -0,0 +1,1277 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * linux/mdt/mdt_open.c + * Lustre Metadata Target (mdt) open/close file handling + * + * Copyright (C) 2002-2006 Cluster File Systems, Inc. + * Author: Huang Hua <huanghua@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/lustre_acl.h> +#include <lustre_mds.h> +#include "mdt_internal.h" + +/* we do nothing because we do not have refcount now */ +static void mdt_mfd_get(void *mfdp) +{ +} + +/* Create a new mdt_file_data struct, initialize it, + * and insert it to global hash table */ +struct mdt_file_data *mdt_mfd_new(void) +{ + struct mdt_file_data *mfd; + ENTRY; + + OBD_ALLOC_PTR(mfd); + if (mfd != NULL) { + INIT_LIST_HEAD(&mfd->mfd_handle.h_link); + INIT_LIST_HEAD(&mfd->mfd_list); + class_handle_hash(&mfd->mfd_handle, mdt_mfd_get); + } + RETURN(mfd); +} + +/* + * Find the mfd pointed to by handle in global hash table. + * In case of replay the handle is obsoleted + * but mfd can be found in mfd list by that handle + */ +struct mdt_file_data *mdt_handle2mfd(struct mdt_thread_info *info, + const struct lustre_handle *handle) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_file_data *mfd; + ENTRY; + + LASSERT(handle != NULL); + mfd = class_handle2object(handle->cookie); + /* during dw/setattr replay the mfd can be found by old handle */ + if (mfd == NULL && + lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + list_for_each_entry(mfd, &med->med_open_head, mfd_list) { + if (mfd->mfd_old_handle.cookie == handle->cookie) + RETURN (mfd); + } + mfd = NULL; + } + RETURN (mfd); +} + +/* free mfd */ +void mdt_mfd_free(struct mdt_file_data *mfd) +{ + LASSERT(list_empty(&mfd->mfd_list)); + OBD_FREE_RCU(mfd, sizeof *mfd, &mfd->mfd_handle); +} + +static int mdt_create_data(struct mdt_thread_info *info, + struct mdt_object *p, struct mdt_object *o) +{ + struct md_op_spec *spec = &info->mti_spec; + struct md_attr *ma = &info->mti_attr; + int rc; + ENTRY; + + if ((spec->sp_cr_flags & MDS_OPEN_DELAY_CREATE) || + !(spec->sp_cr_flags & FMODE_WRITE)) + RETURN(0); + + ma->ma_need = MA_INODE | MA_LOV; + ma->ma_valid = 0; + rc = mdo_create_data(info->mti_env, + p ? mdt_object_child(p) : NULL, + mdt_object_child(o), spec, ma); + RETURN(rc); +} + +static int mdt_epoch_opened(struct mdt_object *mo) +{ + return mo->mot_epochcount; +} + +int mdt_sizeonmds_enabled(struct mdt_object *mo) +{ + return !mo->mot_ioepoch; +} + +/* Re-enable Size-on-MDS. */ +void mdt_sizeonmds_enable(struct mdt_thread_info *info, + struct mdt_object *mo) +{ + spin_lock(&info->mti_mdt->mdt_ioepoch_lock); + if (info->mti_epoch->ioepoch == mo->mot_ioepoch) { + LASSERT(!mdt_epoch_opened(mo)); + mo->mot_ioepoch = 0; + mo->mot_flags = 0; + } + spin_unlock(&info->mti_mdt->mdt_ioepoch_lock); +} + +/* Open the epoch. Epoch open is allowed if @writecount is not negative. + * The epoch and writecount handling is performed under the mdt_ioepoch_lock. */ +int mdt_epoch_open(struct mdt_thread_info *info, struct mdt_object *o) +{ + struct mdt_device *mdt = info->mti_mdt; + int cancel = 0; + int rc = 0; + ENTRY; + + if (!(mdt_conn_flags(info) & OBD_CONNECT_SOM) || + !S_ISREG(lu_object_attr(&o->mot_obj.mo_lu))) + RETURN(0); + + spin_lock(&mdt->mdt_ioepoch_lock); + if (mdt_epoch_opened(o)) { + /* Epoch continues even if there is no writers yet. */ + CDEBUG(D_INODE, "continue epoch "LPU64" for "DFID"\n", + o->mot_ioepoch, PFID(mdt_object_fid(o))); + } else { + if (info->mti_replayepoch > mdt->mdt_ioepoch) + mdt->mdt_ioepoch = info->mti_replayepoch; + else + mdt->mdt_ioepoch++; + o->mot_ioepoch = info->mti_replayepoch ? + info->mti_replayepoch : mdt->mdt_ioepoch; + CDEBUG(D_INODE, "starting epoch "LPU64" for "DFID"\n", + mdt->mdt_ioepoch, PFID(mdt_object_fid(o))); + cancel = 1; + } + o->mot_epochcount++; + spin_unlock(&mdt->mdt_ioepoch_lock); + + /* Cancel Size-on-MDS attributes on clients if not truncate. + * In the later case, mdt_reint_setattr will do it. */ + if (cancel && (info->mti_rr.rr_fid1 != NULL)) { + struct mdt_lock_handle *lh = &info->mti_lh[MDT_LH_CHILD]; + mdt_lock_reg_init(lh, LCK_EX); + rc = mdt_object_lock(info, o, lh, MDS_INODELOCK_UPDATE, + MDT_LOCAL_LOCK); + if (rc == 0) + mdt_object_unlock(info, o, lh, 1); + } + RETURN(rc); +} + +/* Update the on-disk attributes if needed and re-enable Size-on-MDS caching. */ +static int mdt_sizeonmds_update(struct mdt_thread_info *info, + struct mdt_object *o) +{ + ENTRY; + + CDEBUG(D_INODE, "Closing epoch "LPU64" on "DFID". Count %d\n", + o->mot_ioepoch, PFID(mdt_object_fid(o)), o->mot_epochcount); + + if (info->mti_attr.ma_attr.la_valid & LA_SIZE) { + /* Do Size-on-MDS attribute update. + * Size-on-MDS is re-enabled inside. */ + /* XXX: since we have opened the file, it is unnecessary + * to check permission when close it. Between the "open" + * and "close", maybe someone has changed the file mode + * or flags, or the file created mode do not permit wirte, + * and so on. Just set MDS_PERM_BYPASS for all the cases. */ + info->mti_attr.ma_attr_flags |= MDS_PERM_BYPASS; + info->mti_attr.ma_attr.la_valid &= LA_SIZE | LA_BLOCKS | + LA_ATIME | LA_MTIME | LA_CTIME; + RETURN(mdt_attr_set(info, o, 0)); + } else + mdt_sizeonmds_enable(info, o); + RETURN(0); +} + +/* Epoch closes. + * Returns 1 if epoch does not close. + * Returns 0 if epoch closes. + * Returns -EAGAIN if epoch closes but an Size-on-MDS Update is still needed + * from the client. */ +static int mdt_epoch_close(struct mdt_thread_info *info, struct mdt_object *o) +{ + int eviction = (mdt_info_req(info) == NULL ? 1 : 0); + struct lu_attr *la = &info->mti_attr.ma_attr; + int achange = 0; + int opened; + int rc = 1; + ENTRY; + + if (!(mdt_conn_flags(info) & OBD_CONNECT_SOM) || + !S_ISREG(lu_object_attr(&o->mot_obj.mo_lu))) + RETURN(0); + + spin_lock(&info->mti_mdt->mdt_ioepoch_lock); + + /* Epoch closes only if client tells about it or eviction occures. */ + if (eviction || (info->mti_epoch->flags & MF_EPOCH_CLOSE)) { + LASSERT(o->mot_epochcount); + o->mot_epochcount--; + + CDEBUG(D_INODE, "Closing epoch "LPU64" on "DFID". Count %d\n", + o->mot_ioepoch, PFID(mdt_object_fid(o)), + o->mot_epochcount); + + if (!eviction) + achange = (info->mti_epoch->flags & MF_SOM_CHANGE); + + rc = 0; + if (!eviction && !mdt_epoch_opened(o)) { + /* Epoch ends. Is an Size-on-MDS update needed? */ + if (o->mot_flags & MF_SOM_CHANGE) { + /* Some previous writer changed the attribute. + * Do not believe to the current Size-on-MDS + * update, re-ask client. */ + rc = -EAGAIN; + } else if (!(la->la_valid & LA_SIZE) && achange) { + /* Attributes were changed by the last writer + * only but no Size-on-MDS update is received.*/ + rc = -EAGAIN; + } + } + + if (achange || eviction) + o->mot_flags |= MF_SOM_CHANGE; + } + + opened = mdt_epoch_opened(o); + spin_unlock(&info->mti_mdt->mdt_ioepoch_lock); + + /* If eviction occurred, do nothing. */ + if ((rc == 0) && !opened && !eviction) { + /* Epoch ends and wanted Size-on-MDS update is obtained. */ + rc = mdt_sizeonmds_update(info, o); + /* Avoid the following setattrs of these attributes, e.g. + * for atime update. */ + info->mti_attr.ma_valid = 0; + } + RETURN(rc); +} + +int mdt_write_read(struct mdt_device *mdt, struct mdt_object *o) +{ + int rc = 0; + ENTRY; + spin_lock(&mdt->mdt_ioepoch_lock); + rc = o->mot_writecount; + spin_unlock(&mdt->mdt_ioepoch_lock); + RETURN(rc); +} + +int mdt_write_get(struct mdt_device *mdt, struct mdt_object *o) +{ + int rc = 0; + ENTRY; + spin_lock(&mdt->mdt_ioepoch_lock); + if (o->mot_writecount < 0) + rc = -ETXTBSY; + else + o->mot_writecount++; + spin_unlock(&mdt->mdt_ioepoch_lock); + RETURN(rc); +} + +static void mdt_write_put(struct mdt_device *mdt, struct mdt_object *o) +{ + ENTRY; + spin_lock(&mdt->mdt_ioepoch_lock); + o->mot_writecount--; + spin_unlock(&mdt->mdt_ioepoch_lock); + EXIT; +} + +static int mdt_write_deny(struct mdt_device *mdt, struct mdt_object *o) +{ + int rc = 0; + ENTRY; + spin_lock(&mdt->mdt_ioepoch_lock); + if (o->mot_writecount > 0) + rc = -ETXTBSY; + else + o->mot_writecount--; + spin_unlock(&mdt->mdt_ioepoch_lock); + RETURN(rc); +} + +static void mdt_write_allow(struct mdt_device *mdt, struct mdt_object *o) +{ + ENTRY; + spin_lock(&mdt->mdt_ioepoch_lock); + o->mot_writecount++; + spin_unlock(&mdt->mdt_ioepoch_lock); + EXIT; +} + +/* there can be no real transaction so prepare the fake one */ +static void mdt_empty_transno(struct mdt_thread_info* info) +{ + struct mdt_device *mdt = info->mti_mdt; + struct ptlrpc_request *req = mdt_info_req(info); + + ENTRY; + /* transaction is occured already */ + if (lustre_msg_get_transno(req->rq_repmsg) != 0) { + EXIT; + return; + } + + spin_lock(&mdt->mdt_transno_lock); + if (info->mti_transno == 0) { + info->mti_transno = ++ mdt->mdt_last_transno; + } else { + /* should be replay */ + if (info->mti_transno > mdt->mdt_last_transno) + mdt->mdt_last_transno = info->mti_transno; + } + spin_unlock(&mdt->mdt_transno_lock); + + CDEBUG(D_INODE, "transno = %llu, last_committed = %llu\n", + info->mti_transno, + req->rq_export->exp_obd->obd_last_committed); + + req->rq_transno = info->mti_transno; + lustre_msg_set_transno(req->rq_repmsg, info->mti_transno); + lustre_msg_set_last_xid(req->rq_repmsg, req->rq_xid); + EXIT; +} + +void mdt_mfd_set_mode(struct mdt_file_data *mfd, int mode) +{ + LASSERT(mfd != NULL); + + CDEBUG(D_HA, "Change mfd %p mode 0x%x->0x%x\n", + mfd, (unsigned int)mfd->mfd_mode, (unsigned int)mode); + + mfd->mfd_mode = mode; +} + +static int mdt_mfd_open(struct mdt_thread_info *info, struct mdt_object *p, + struct mdt_object *o, int flags, int created) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_file_data *mfd; + struct md_attr *ma = &info->mti_attr; + struct lu_attr *la = &ma->ma_attr; + struct mdt_body *repbody; + int rc = 0, isdir, isreg; + ENTRY; + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + + isreg = S_ISREG(la->la_mode); + isdir = S_ISDIR(la->la_mode); + if ((isreg && !(ma->ma_valid & MA_LOV))) { + /* + * No EA, check whether it is will set regEA and dirEA since in + * above attr get, these size might be zero, so reset it, to + * retrieve the MD after create obj. + */ + ma->ma_lmm_size = req_capsule_get_size(&info->mti_pill, + &RMF_MDT_MD, + RCL_SERVER); + /* in replay case, p == NULL */ + rc = mdt_create_data(info, p, o); + if (rc) + RETURN(rc); + } + + CDEBUG(D_INODE, "after open, ma_valid bit = "LPX64" lmm_size = %d\n", + ma->ma_valid, ma->ma_lmm_size); + + if (ma->ma_valid & MA_LOV) { + LASSERT(ma->ma_lmm_size != 0); + repbody->eadatasize = ma->ma_lmm_size; + if (isdir) + repbody->valid |= OBD_MD_FLDIREA; + else + repbody->valid |= OBD_MD_FLEASIZE; + } + + if (flags & FMODE_WRITE) { + rc = mdt_write_get(info->mti_mdt, o); + if (rc == 0) { + mdt_epoch_open(info, o); + repbody->ioepoch = o->mot_ioepoch; + } + } else if (flags & MDS_FMODE_EXEC) { + rc = mdt_write_deny(info->mti_mdt, o); + } + if (rc) + RETURN(rc); + + rc = mo_open(info->mti_env, mdt_object_child(o), + created ? flags | MDS_OPEN_CREATED : flags); + if (rc) + RETURN(rc); + + mfd = mdt_mfd_new(); + if (mfd != NULL) { + /* + * Keep a reference on this object for this open, and is + * released by mdt_mfd_close(). + */ + mdt_object_get(info->mti_env, o); + + /* + * @flags is always not zero. At least it should be FMODE_READ, + * FMODE_WRITE or FMODE_EXEC. + */ + LASSERT(flags != 0); + + /* Open handling. */ + mdt_mfd_set_mode(mfd, flags); + + mfd->mfd_object = o; + mfd->mfd_xid = req->rq_xid; + + /* replay handle */ + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { + struct mdt_file_data *old_mfd; + /* Check wheather old cookie already exist in + * the list, becasue when do recovery, client + * might be disconnected from server, and + * restart replay, so there maybe some orphan + * mfd here, we should remove them */ + LASSERT(info->mti_rr.rr_handle != NULL); + old_mfd = mdt_handle2mfd(info, info->mti_rr.rr_handle); + if (old_mfd) { + CDEBUG(D_HA, "del orph mfd %p cookie" LPX64"\n", + mfd, info->mti_rr.rr_handle->cookie); + spin_lock(&med->med_open_lock); + class_handle_unhash(&old_mfd->mfd_handle); + list_del_init(&old_mfd->mfd_list); + spin_unlock(&med->med_open_lock); + mdt_mfd_free(old_mfd); + } + CDEBUG(D_HA, "Store old cookie "LPX64" in new mfd\n", + info->mti_rr.rr_handle->cookie); + mfd->mfd_old_handle.cookie = + info->mti_rr.rr_handle->cookie; + } + spin_lock(&med->med_open_lock); + list_add(&mfd->mfd_list, &med->med_open_head); + spin_unlock(&med->med_open_lock); + + repbody->handle.cookie = mfd->mfd_handle.h_cookie; + mdt_empty_transno(info); + } else + rc = -ENOMEM; + + RETURN(rc); +} + + +static int mdt_finish_open(struct mdt_thread_info *info, + struct mdt_object *p, struct mdt_object *o, + int flags, int created, struct ldlm_reply *rep) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_device *mdt = info->mti_mdt; + struct md_attr *ma = &info->mti_attr; + struct lu_attr *la = &ma->ma_attr; + struct mdt_file_data *mfd; + struct mdt_body *repbody; + int rc = 0; + int isreg, isdir, islnk; + struct list_head *t; + ENTRY; + + LASSERT(ma->ma_valid & MA_INODE); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + + isreg = S_ISREG(la->la_mode); + isdir = S_ISDIR(la->la_mode); + islnk = S_ISLNK(la->la_mode); + mdt_pack_attr2body(info, repbody, la, mdt_object_fid(o)); + + if (med->med_rmtclient) { + void *buf = req_capsule_server_get(&info->mti_pill, &RMF_ACL); + + rc = mdt_pack_remote_perm(info, o, buf); + if (rc) { + repbody->valid &= ~OBD_MD_FLRMTPERM; + repbody->aclsize = 0; + } else { + repbody->valid |= OBD_MD_FLRMTPERM; + repbody->aclsize = sizeof(struct mdt_remote_perm); + } + } +#ifdef CONFIG_FS_POSIX_ACL + else if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) { + const struct lu_env *env = info->mti_env; + struct md_object *next = mdt_object_child(o); + struct lu_buf *buf = &info->mti_buf; + + buf->lb_buf = req_capsule_server_get(&info->mti_pill, &RMF_ACL); + buf->lb_len = req_capsule_get_size(&info->mti_pill, &RMF_ACL, + RCL_SERVER); + if (buf->lb_len > 0) { + rc = mo_xattr_get(env, next, buf, + XATTR_NAME_ACL_ACCESS); + if (rc < 0) { + if (rc == -ENODATA) { + repbody->aclsize = 0; + repbody->valid |= OBD_MD_FLACL; + rc = 0; + } else if (rc == -EOPNOTSUPP) { + rc = 0; + } else { + CERROR("got acl size: %d\n", rc); + } + } else { + repbody->aclsize = rc; + repbody->valid |= OBD_MD_FLACL; + rc = 0; + } + } + } +#endif + + if (mdt->mdt_opts.mo_mds_capa) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_MDS_DEFAULT; + capa->lc_uid = 0; + rc = mo_capa_get(info->mti_env, mdt_object_child(o), capa, 0); + if (rc) + RETURN(rc); + repbody->valid |= OBD_MD_FLMDSCAPA; + } + if (mdt->mdt_opts.mo_oss_capa && + S_ISREG(lu_object_attr(&o->mot_obj.mo_lu))) { + struct lustre_capa *capa; + struct md_ucred *ucred = mdt_ucred(info); + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA2); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_OSS_DEFAULT | capa_open_opc(flags); + capa->lc_uid = ucred->mu_o_fsuid; + rc = mo_capa_get(info->mti_env, mdt_object_child(o), capa, 0); + if (rc) + RETURN(rc); + repbody->valid |= OBD_MD_FLOSSCAPA; + } + + /* + * If we are following a symlink, don't open; and do not return open + * handle for special nodes as client required. + */ + if (islnk || (!isreg && !isdir && + (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH))) { + lustre_msg_set_transno(req->rq_repmsg, 0); + RETURN(0); + } + + mdt_set_disposition(info, rep, DISP_OPEN_OPEN); + + /* + * We need to return the existing object's fid back, so it is done here, + * after preparing the reply. + */ + if (!created && (flags & MDS_OPEN_EXCL) && (flags & MDS_OPEN_CREAT)) + RETURN(-EEXIST); + + /* This can't be done earlier, we need to return reply body */ + if (isdir) { + if (flags & (MDS_OPEN_CREAT | FMODE_WRITE)) { + /* We are trying to create or write an existing dir. */ + RETURN(-EISDIR); + } + } else if (flags & MDS_OPEN_DIRECTORY) + RETURN(-ENOTDIR); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_OPEN_CREATE)) { + obd_fail_loc = OBD_FAIL_LDLM_REPLY | OBD_FAIL_ONCE; + RETURN(-EAGAIN); + } + + mfd = NULL; + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + spin_lock(&med->med_open_lock); + list_for_each(t, &med->med_open_head) { + mfd = list_entry(t, struct mdt_file_data, mfd_list); + if (mfd->mfd_xid == req->rq_xid) { + break; + } + mfd = NULL; + } + spin_unlock(&med->med_open_lock); + + if (mfd != NULL) { + repbody->handle.cookie = mfd->mfd_handle.h_cookie; + /*set repbody->ea_size for resent case*/ + if (ma->ma_valid & MA_LOV) { + LASSERT(ma->ma_lmm_size != 0); + repbody->eadatasize = ma->ma_lmm_size; + if (isdir) + repbody->valid |= OBD_MD_FLDIREA; + else + repbody->valid |= OBD_MD_FLEASIZE; + } + RETURN(0); + } + } + + rc = mdt_mfd_open(info, p, o, flags, created); + RETURN(rc); +} + +extern void mdt_req_from_mcd(struct ptlrpc_request *req, + struct mdt_client_data *mcd); + +void mdt_reconstruct_open(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc) +{ + const struct lu_env *env = info->mti_env; + struct mdt_device *mdt = info->mti_mdt; + struct req_capsule *pill = &info->mti_pill; + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_client_data *mcd = med->med_mcd; + struct md_attr *ma = &info->mti_attr; + struct mdt_reint_record *rr = &info->mti_rr; + __u32 flags = info->mti_spec.sp_cr_flags; + struct ldlm_reply *ldlm_rep; + struct mdt_object *parent; + struct mdt_object *child; + struct mdt_body *repbody; + int rc; + ENTRY; + + LASSERT(pill->rc_fmt == &RQF_LDLM_INTENT_OPEN); + ldlm_rep = req_capsule_server_get(pill, &RMF_DLM_REP); + repbody = req_capsule_server_get(pill, &RMF_MDT_BODY); + + ma->ma_lmm = req_capsule_server_get(pill, &RMF_MDT_MD); + ma->ma_lmm_size = req_capsule_get_size(pill, &RMF_MDT_MD, + RCL_SERVER); + ma->ma_need = MA_INODE | MA_LOV; + ma->ma_valid = 0; + + mdt_req_from_mcd(req, med->med_mcd); + mdt_set_disposition(info, ldlm_rep, mcd->mcd_last_data); + + CERROR("This is reconstruct open: disp="LPX64", result=%d\n", + ldlm_rep->lock_policy_res1, req->rq_status); + + if (mdt_get_disposition(ldlm_rep, DISP_OPEN_CREATE) && + req->rq_status != 0) { + /* We did not create successfully, return error to client. */ + mdt_shrink_reply(info); + GOTO(out, rc = req->rq_status); + } + + if (mdt_get_disposition(ldlm_rep, DISP_OPEN_CREATE)) { + /* + * We failed after creation, but we do not know in which step + * we failed. So try to check the child object. + */ + parent = mdt_object_find(env, mdt, rr->rr_fid1); + LASSERT(!IS_ERR(parent)); + + child = mdt_object_find(env, mdt, rr->rr_fid2); + LASSERT(!IS_ERR(child)); + + rc = mdt_object_exists(child); + if (rc > 0) { + struct md_object *next; + + mdt_set_capainfo(info, 1, rr->rr_fid2, BYPASS_CAPA); + next = mdt_object_child(child); + rc = mo_attr_get(env, next, ma); + if (rc == 0) + rc = mdt_finish_open(info, parent, child, + flags, 1, ldlm_rep); + } else if (rc < 0) { + /* the child object was created on remote server */ + repbody->fid1 = *rr->rr_fid2; + repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); + rc = 0; + } else if (rc == 0) { + /* the child does not exist, we should do regular open */ + mdt_object_put(env, parent); + mdt_object_put(env, child); + GOTO(regular_open, 0); + } + mdt_object_put(env, parent); + mdt_object_put(env, child); + mdt_shrink_reply(info); + GOTO(out, rc); + } else { +regular_open: + /* We did not try to create, so we are a pure open */ + rc = mdt_reint_open(info, lhc); + } + + EXIT; +out: + req->rq_status = rc; + lustre_msg_set_status(req->rq_repmsg, req->rq_status); + LASSERT(ergo(rc < 0, lustre_msg_get_transno(req->rq_repmsg) == 0)); +} + +static int mdt_open_by_fid(struct mdt_thread_info* info, + struct ldlm_reply *rep) +{ + const struct lu_env *env = info->mti_env; + __u32 flags = info->mti_spec.sp_cr_flags; + struct mdt_reint_record *rr = &info->mti_rr; + struct md_attr *ma = &info->mti_attr; + struct mdt_object *o; + int rc; + ENTRY; + + LASSERT(info->mti_spec.u.sp_ea.no_lov_create); + o = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid2); + if (IS_ERR(o)) + RETURN(rc = PTR_ERR(o)); + + rc = mdt_object_exists(o); + if (rc > 0) { + mdt_set_disposition(info, rep, (DISP_IT_EXECD | + DISP_LOOKUP_EXECD | + DISP_LOOKUP_POS)); + + rc = mo_attr_get(env, mdt_object_child(o), ma); + if (rc == 0) + rc = mdt_mfd_open(info, NULL, o, flags, 0); + } else if (rc == 0) { + rc = -ENOENT; + } else { + /* the child object was created on remote server */ + struct mdt_body *repbody; + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + repbody->fid1 = *rr->rr_fid2; + repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); + rc = 0; + } + + mdt_object_put(info->mti_env, o); + RETURN(rc); +} + +int mdt_pin(struct mdt_thread_info* info) +{ + ENTRY; + RETURN(err_serious(-EOPNOTSUPP)); +} + +/* Cross-ref request. Currently it can only be a pure open (w/o create) */ +static int mdt_cross_open(struct mdt_thread_info* info, + const struct lu_fid *fid, + struct ldlm_reply *rep, __u32 flags) +{ + struct md_attr *ma = &info->mti_attr; + struct mdt_object *o; + int rc; + ENTRY; + + o = mdt_object_find(info->mti_env, info->mti_mdt, fid); + if (IS_ERR(o)) + RETURN(rc = PTR_ERR(o)); + + rc = mdt_object_exists(o); + if (rc > 0) { + /* Do permission check for cross-open. */ + rc = mo_permission(info->mti_env, NULL, mdt_object_child(o), + NULL, flags | MDS_OPEN_CROSS); + if (rc) + goto out; + + mdt_set_capainfo(info, 0, fid, BYPASS_CAPA); + rc = mo_attr_get(info->mti_env, mdt_object_child(o), ma); + if (rc == 0) + rc = mdt_finish_open(info, NULL, o, flags, 0, rep); + } else if (rc == 0) { + /* + * Something is wrong here. lookup was positive but there is + * no object! + */ + CERROR("Cross-ref object doesn't exist!\n"); + rc = -EFAULT; + } else { + /* Something is wrong here, the object is on another MDS! */ + CERROR("The object isn't on this server! FLD error?\n"); + LU_OBJECT_DEBUG(D_WARNING, info->mti_env, + &o->mot_obj.mo_lu, + "Object isn't on this server! FLD error?\n"); + + rc = -EFAULT; + } + +out: + mdt_object_put(info->mti_env, o); + RETURN(rc); +} + +int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) +{ + struct mdt_device *mdt = info->mti_mdt; + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_object *parent; + struct mdt_object *child; + struct mdt_lock_handle *lh; + struct ldlm_reply *ldlm_rep; + struct mdt_body *repbody; + struct lu_fid *child_fid = &info->mti_tmp_fid1; + struct md_attr *ma = &info->mti_attr; + __u32 create_flags = info->mti_spec.sp_cr_flags; + struct mdt_reint_record *rr = &info->mti_rr; + struct lu_name *lname; + int result; + int created = 0; + ENTRY; + + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE, + (obd_timeout + 1) / 4); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + + ma->ma_lmm = req_capsule_server_get(&info->mti_pill, &RMF_MDT_MD); + ma->ma_lmm_size = req_capsule_get_size(&info->mti_pill, &RMF_MDT_MD, + RCL_SERVER); + ma->ma_need = MA_INODE | MA_LOV; + ma->ma_valid = 0; + + LASSERT(info->mti_pill.rc_fmt == &RQF_LDLM_INTENT_OPEN); + ldlm_rep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP); + + /* TODO: JOIN file */ + if (create_flags & MDS_OPEN_JOIN_FILE) { + CERROR("JOIN file will be supported soon\n"); + GOTO(out, result = err_serious(-EOPNOTSUPP)); + } + + CDEBUG(D_INODE, "I am going to open "DFID"/(%s->"DFID") " + "cr_flag=0%o mode=0%06o msg_flag=0x%x\n", + PFID(rr->rr_fid1), rr->rr_name, + PFID(rr->rr_fid2), create_flags, + ma->ma_attr.la_mode, lustre_msg_get_flags(req->rq_reqmsg)); + + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { + /* This is a replay request. */ + result = mdt_open_by_fid(info, ldlm_rep); + + if (result != -ENOENT) + GOTO(out, result); + + /* + * We didn't find the correct object, so we need to re-create it + * via a regular replay. + */ + if (!(create_flags & MDS_OPEN_CREAT)) { + DEBUG_REQ(D_ERROR, req,"OPEN & CREAT not in open replay."); + GOTO(out, result = -EFAULT); + } + CDEBUG(D_INFO, "Open replay did find object, continue as " + "regular open\n"); + } + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) + GOTO(out, result = err_serious(-ENOMEM)); + + mdt_set_disposition(info, ldlm_rep, + (DISP_IT_EXECD | DISP_LOOKUP_EXECD)); + + if (info->mti_cross_ref) { + /* This is cross-ref open */ + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); + result = mdt_cross_open(info, rr->rr_fid1, ldlm_rep, + create_flags); + GOTO(out, result); + } + + lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(lh, (create_flags & MDS_OPEN_CREAT) ? + LCK_PW : LCK_PR, rr->rr_name, rr->rr_namelen); + + parent = mdt_object_find_lock(info, rr->rr_fid1, lh, + MDS_INODELOCK_UPDATE); + if (IS_ERR(parent)) + GOTO(out, result = PTR_ERR(parent)); + + fid_zero(child_fid); + + lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); + + result = mdo_lookup(info->mti_env, mdt_object_child(parent), + lname, child_fid, &info->mti_spec); + LASSERTF(ergo(result == 0, fid_is_sane(child_fid)), + "looking for "DFID"/%s, result fid="DFID"\n", + PFID(mdt_object_fid(parent)), rr->rr_name, PFID(child_fid)); + + if (result != 0 && result != -ENOENT && result != -ESTALE) + GOTO(out_parent, result); + + if (result == -ENOENT || result == -ESTALE) { + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG); + if (result == -ESTALE) { + /* + * -ESTALE means the parent is a dead(unlinked) dir, so + * it should return -ENOENT to in accordance with the + * original mds implementaion. + */ + GOTO(out_parent, result = -ENOENT); + } + if (!(create_flags & MDS_OPEN_CREAT)) + GOTO(out_parent, result); + *child_fid = *info->mti_rr.rr_fid2; + LASSERTF(fid_is_sane(child_fid), "fid="DFID"\n", + PFID(child_fid)); + } else { + /* + * Check for O_EXCL is moved to the mdt_finish_open(), we need to + * return FID back in that case. + */ + mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); + } + + child = mdt_object_find(info->mti_env, mdt, child_fid); + if (IS_ERR(child)) + GOTO(out_parent, result = PTR_ERR(child)); + + mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); + if (result == -ENOENT) { + /* Not found and with MDS_OPEN_CREAT: let's create it. */ + mdt_set_disposition(info, ldlm_rep, DISP_OPEN_CREATE); + + /* Let lower layers know what is lock mode on directory. */ + info->mti_spec.sp_cr_mode = + mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode); + + /* + * Do not perform lookup sanity check. We know that name does + * not exist. + */ + info->mti_spec.sp_cr_lookup = 0; + + result = mdo_create(info->mti_env, + mdt_object_child(parent), + lname, + mdt_object_child(child), + &info->mti_spec, + &info->mti_attr); + if (result == -ERESTART) { + mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_CREATE); + GOTO(out_child, result); + } else { + if (result != 0) + GOTO(out_child, result); + } + created = 1; + } else { + /* We have to get attr & lov ea for this object */ + result = mo_attr_get(info->mti_env, mdt_object_child(child), + ma); + /* + * The object is on remote node, return its FID for remote open. + */ + if (result == -EREMOTE) { + int rc; + + /* + * Check if this lock already was sent to client and + * this is resent case. For resent case do not take lock + * again, use what is already granted. + */ + LASSERT(lhc != NULL); + + if (lustre_handle_is_used(&lhc->mlh_reg_lh)) { + struct ldlm_lock *lock; + + LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & + MSG_RESENT); + + lock = ldlm_handle2lock(&lhc->mlh_reg_lh); + if (!lock) { + CERROR("Invalid lock handle "LPX64"\n", + lhc->mlh_reg_lh.cookie); + LBUG(); + } + LASSERT(fid_res_name_eq(mdt_object_fid(child), + &lock->l_resource->lr_name)); + LDLM_LOCK_PUT(lock); + rc = 0; + } else { + mdt_lock_handle_init(lhc); + mdt_lock_reg_init(lhc, LCK_PR); + + rc = mdt_object_lock(info, child, lhc, + MDS_INODELOCK_LOOKUP, + MDT_CROSS_LOCK); + } + repbody->fid1 = *mdt_object_fid(child); + repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); + if (rc != 0) + result = rc; + GOTO(out_child, result); + } + } + + /* Try to open it now. */ + result = mdt_finish_open(info, parent, child, create_flags, + created, ldlm_rep); + + if (result != 0 && created) { + int rc2; + ma->ma_need = 0; + ma->ma_valid = 0; + ma->ma_cookie_size = 0; + info->mti_no_need_trans = 1; + rc2 = mdo_unlink(info->mti_env, + mdt_object_child(parent), + mdt_object_child(child), + lname, + &info->mti_attr); + if (rc2 != 0) + CERROR("Error in cleanup of open\n"); + } + EXIT; +out_child: + mdt_object_put(info->mti_env, child); +out_parent: + mdt_object_unlock_put(info, parent, lh, result); +out: + mdt_shrink_reply(info); + if (result) + lustre_msg_set_transno(req->rq_repmsg, 0); + return result; +} + +#define MFD_CLOSED(mode) (((mode) & ~(FMODE_EPOCH | FMODE_SOM | \ + FMODE_EPOCHLCK)) == FMODE_CLOSED) + +static int mdt_mfd_closed(struct mdt_file_data *mfd) +{ + return ((mfd == NULL) || MFD_CLOSED(mfd->mfd_mode)); +} + +int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd) +{ + struct mdt_object *o = mfd->mfd_object; + struct md_object *next = mdt_object_child(o); + struct md_attr *ma = &info->mti_attr; + int rc = 0, ret = 0; + int mode; + ENTRY; + + mode = mfd->mfd_mode; + + if ((mode & FMODE_WRITE) || (mode & FMODE_EPOCHLCK)) { + mdt_write_put(info->mti_mdt, o); + ret = mdt_epoch_close(info, o); + } else if (mode & MDS_FMODE_EXEC) { + mdt_write_allow(info->mti_mdt, o); + } else if (mode & FMODE_EPOCH) { + ret = mdt_epoch_close(info, o); + } + + /* Update atime on close only. */ + if ((mode & MDS_FMODE_EXEC || mode & FMODE_READ || mode & FMODE_WRITE) + && (ma->ma_valid & MA_INODE) && (ma->ma_attr.la_valid & LA_ATIME)) { + /* Set the atime only. */ + ma->ma_attr.la_valid = LA_ATIME; + rc = mo_attr_set(info->mti_env, next, ma); + } + + ma->ma_need |= MA_INODE; + ma->ma_valid = 0; + + if (!MFD_CLOSED(mode)) + rc = mo_close(info->mti_env, next, ma); + else if (ret == -EAGAIN) + rc = mo_attr_get(info->mti_env, next, ma); + + /* If the object is unlinked, do not try to re-enable SIZEONMDS */ + if ((ret == -EAGAIN) && (ma->ma_valid & MA_INODE) && + (ma->ma_attr.la_nlink == 0)) { + ret = 0; + } + + if ((ret == -EAGAIN) || (ret == 1)) { + struct mdt_export_data *med; + + /* The epoch has not closed or Size-on-MDS update is needed. + * Put mfd back into the list. */ + LASSERT(mdt_conn_flags(info) & OBD_CONNECT_SOM); + mdt_mfd_set_mode(mfd, (ret == 1 ? FMODE_EPOCH : FMODE_SOM)); + + LASSERT(mdt_info_req(info)); + med = &mdt_info_req(info)->rq_export->exp_mdt_data; + spin_lock(&med->med_open_lock); + list_add(&mfd->mfd_list, &med->med_open_head); + class_handle_hash_back(&mfd->mfd_handle); + spin_unlock(&med->med_open_lock); + + if (ret == 1) { + ret = 0; + } else { + CDEBUG(D_INODE, "Size-on-MDS attribute update is " + "needed on "DFID"\n", PFID(mdt_object_fid(o))); + } + } else { + mdt_mfd_free(mfd); + mdt_object_put(info->mti_env, o); + } + + RETURN(rc ? rc : ret); +} + +int mdt_close(struct mdt_thread_info *info) +{ + struct mdt_export_data *med; + struct mdt_file_data *mfd; + struct mdt_object *o; + struct md_attr *ma = &info->mti_attr; + struct mdt_body *repbody = NULL; + int rc, ret = 0; + ENTRY; + + /* Close may come with the Size-on-MDS update. Unpack it. */ + rc = mdt_close_unpack(info); + if (rc) + RETURN(err_serious(rc)); + + LASSERT(info->mti_epoch); + + req_capsule_set_size(&info->mti_pill, &RMF_MDT_MD, RCL_SERVER, + info->mti_mdt->mdt_max_mdsize); + req_capsule_set_size(&info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER, + info->mti_mdt->mdt_max_cookiesize); + rc = req_capsule_pack(&info->mti_pill); + /* Continue to close handle even if we can not pack reply */ + if (rc == 0) { + repbody = req_capsule_server_get(&info->mti_pill, + &RMF_MDT_BODY); + ma->ma_lmm = req_capsule_server_get(&info->mti_pill, + &RMF_MDT_MD); + ma->ma_lmm_size = req_capsule_get_size(&info->mti_pill, + &RMF_MDT_MD, + RCL_SERVER); + ma->ma_cookie = req_capsule_server_get(&info->mti_pill, + &RMF_LOGCOOKIES); + ma->ma_cookie_size = req_capsule_get_size(&info->mti_pill, + &RMF_LOGCOOKIES, + RCL_SERVER); + ma->ma_need = MA_INODE | MA_LOV | MA_COOKIE; + repbody->eadatasize = 0; + repbody->aclsize = 0; + } else + rc = err_serious(rc); + + med = &mdt_info_req(info)->rq_export->exp_mdt_data; + spin_lock(&med->med_open_lock); + mfd = mdt_handle2mfd(info, &info->mti_epoch->handle); + if (mdt_mfd_closed(mfd)) { + spin_unlock(&med->med_open_lock); + CDEBUG(D_INODE, "no handle for file close: fid = "DFID + ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1), + info->mti_epoch->handle.cookie); + rc = err_serious(-ESTALE); + } else { + class_handle_unhash(&mfd->mfd_handle); + list_del_init(&mfd->mfd_list); + spin_unlock(&med->med_open_lock); + + /* Do not lose object before last unlink. */ + o = mfd->mfd_object; + mdt_object_get(info->mti_env, o); + ret = mdt_mfd_close(info, mfd); + if (repbody != NULL) + rc = mdt_handle_last_unlink(info, o, ma); + mdt_empty_transno(info); + mdt_object_put(info->mti_env, o); + } + if (repbody != NULL) + mdt_shrink_reply(info); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) + RETURN(err_serious(-ENOMEM)); + + RETURN(rc ? rc : ret); +} + +int mdt_done_writing(struct mdt_thread_info *info) +{ + struct mdt_body *repbody = NULL; + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med; + struct mdt_file_data *mfd; + int rc; + ENTRY; + + rc = req_capsule_pack(&info->mti_pill); + if (rc) + RETURN(err_serious(rc)); + + repbody = req_capsule_server_get(&info->mti_pill, + &RMF_MDT_BODY); + repbody->eadatasize = 0; + repbody->aclsize = 0; + + /* Done Writing may come with the Size-on-MDS update. Unpack it. */ + rc = mdt_close_unpack(info); + if (rc) + RETURN(err_serious(rc)); + + med = &info->mti_exp->exp_mdt_data; + spin_lock(&med->med_open_lock); + mfd = mdt_handle2mfd(info, &info->mti_epoch->handle); + if (mfd == NULL) { + spin_unlock(&med->med_open_lock); + CDEBUG(D_INODE, "no handle for done write: fid = "DFID + ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1), + info->mti_epoch->handle.cookie); + RETURN(-ESTALE); + } + + if (!(mfd->mfd_mode == FMODE_EPOCH || + mfd->mfd_mode == FMODE_EPOCHLCK)) { + spin_unlock(&med->med_open_lock); + DEBUG_REQ(D_WARNING, req, "req should be resent req"); + LASSERT(mfd->mfd_mode == FMODE_SOM); + LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY)); + /*Since we did not bond this req with open/close, + *Why we should keep this req as replay req XXX*/ + GOTO(empty_transno, rc); + } + class_handle_unhash(&mfd->mfd_handle); + list_del_init(&mfd->mfd_list); + spin_unlock(&med->med_open_lock); + + /* Set EPOCH CLOSE flag if not set by client. */ + info->mti_epoch->flags |= MF_EPOCH_CLOSE; + info->mti_attr.ma_valid = 0; + rc = mdt_mfd_close(info, mfd); +empty_transno: + mdt_empty_transno(info); + RETURN(rc); +} + diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c new file mode 100644 index 0000000..61c2092 --- /dev/null +++ b/lustre/mdt/mdt_recovery.c @@ -0,0 +1,1131 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * linux/mdt/mdt_recovery.c + * Lustre Metadata Target (mdt) recovery-related methods + * + * Copyright (C) 2002-2006 Cluster File Systems, Inc. + * Author: Huang Hua <huanghua@clusterfs.com> + * Author: Pershin Mike <tappro@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include "mdt_internal.h" + +static int mdt_server_data_update(const struct lu_env *env, + struct mdt_device *mdt); + +struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len) +{ + struct lu_buf *buf; + struct mdt_thread_info *mti; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + buf = &mti->mti_buf; + buf->lb_buf = area; + buf->lb_len = len; + return buf; +} + +const struct lu_buf *mdt_buf_const(const struct lu_env *env, + const void *area, ssize_t len) +{ + struct lu_buf *buf; + struct mdt_thread_info *mti; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + buf = &mti->mti_buf; + + buf->lb_buf = (void *)area; + buf->lb_len = len; + return buf; +} + +int mdt_record_read(const struct lu_env *env, + struct dt_object *dt, struct lu_buf *buf, loff_t *pos) +{ + int rc; + + LASSERTF(dt != NULL, "dt is NULL when we want to read record\n"); + + rc = dt->do_body_ops->dbo_read(env, dt, buf, pos, BYPASS_CAPA); + + if (rc == buf->lb_len) + rc = 0; + else if (rc >= 0) + rc = -EFAULT; + return rc; +} + +int mdt_record_write(const struct lu_env *env, + struct dt_object *dt, const struct lu_buf *buf, + loff_t *pos, struct thandle *th) +{ + int rc; + + LASSERTF(dt != NULL, "dt is NULL when we want to write record\n"); + LASSERT(th != NULL); + rc = dt->do_body_ops->dbo_write(env, dt, buf, pos, th, BYPASS_CAPA); + if (rc == buf->lb_len) + rc = 0; + else if (rc >= 0) + rc = -EFAULT; + return rc; +} +/* only one record write */ + +enum { + MDT_TXN_LAST_RCVD_WRITE_CREDITS = 3 +}; + +struct thandle* mdt_trans_start(const struct lu_env *env, + struct mdt_device *mdt, int credits) +{ + struct mdt_thread_info *mti; + struct txn_param *p; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + p = &mti->mti_txn_param; + txn_param_init(p, credits); + + /* export can require sync operations */ + if (mti->mti_exp != NULL) + p->tp_sync = mti->mti_exp->exp_need_sync; + + return mdt->mdt_bottom->dd_ops->dt_trans_start(env, mdt->mdt_bottom, p); +} + +void mdt_trans_stop(const struct lu_env *env, + struct mdt_device *mdt, struct thandle *th) +{ + mdt->mdt_bottom->dd_ops->dt_trans_stop(env, th); +} + +/* last_rcvd handling */ +static inline void msd_le_to_cpu(struct mdt_server_data *buf, + struct mdt_server_data *msd) +{ + memcpy(msd->msd_uuid, buf->msd_uuid, sizeof (msd->msd_uuid)); + msd->msd_last_transno = le64_to_cpu(buf->msd_last_transno); + msd->msd_mount_count = le64_to_cpu(buf->msd_mount_count); + msd->msd_feature_compat = le32_to_cpu(buf->msd_feature_compat); + msd->msd_feature_rocompat = le32_to_cpu(buf->msd_feature_rocompat); + msd->msd_feature_incompat = le32_to_cpu(buf->msd_feature_incompat); + msd->msd_server_size = le32_to_cpu(buf->msd_server_size); + msd->msd_client_start = le32_to_cpu(buf->msd_client_start); + msd->msd_client_size = le16_to_cpu(buf->msd_client_size); +} + +static inline void msd_cpu_to_le(struct mdt_server_data *msd, + struct mdt_server_data *buf) +{ + memcpy(buf->msd_uuid, msd->msd_uuid, sizeof (msd->msd_uuid)); + buf->msd_last_transno = cpu_to_le64(msd->msd_last_transno); + buf->msd_mount_count = cpu_to_le64(msd->msd_mount_count); + buf->msd_feature_compat = cpu_to_le32(msd->msd_feature_compat); + buf->msd_feature_rocompat = cpu_to_le32(msd->msd_feature_rocompat); + buf->msd_feature_incompat = cpu_to_le32(msd->msd_feature_incompat); + buf->msd_server_size = cpu_to_le32(msd->msd_server_size); + buf->msd_client_start = cpu_to_le32(msd->msd_client_start); + buf->msd_client_size = cpu_to_le16(msd->msd_client_size); +} + +static inline void mcd_le_to_cpu(struct mdt_client_data *buf, + struct mdt_client_data *mcd) +{ + memcpy(mcd->mcd_uuid, buf->mcd_uuid, sizeof (mcd->mcd_uuid)); + mcd->mcd_last_transno = le64_to_cpu(buf->mcd_last_transno); + mcd->mcd_last_xid = le64_to_cpu(buf->mcd_last_xid); + mcd->mcd_last_result = le32_to_cpu(buf->mcd_last_result); + mcd->mcd_last_data = le32_to_cpu(buf->mcd_last_data); + mcd->mcd_last_close_transno = le64_to_cpu(buf->mcd_last_close_transno); + mcd->mcd_last_close_xid = le64_to_cpu(buf->mcd_last_close_xid); + mcd->mcd_last_close_result = le32_to_cpu(buf->mcd_last_close_result); +} + +static inline void mcd_cpu_to_le(struct mdt_client_data *mcd, + struct mdt_client_data *buf) +{ + memcpy(buf->mcd_uuid, mcd->mcd_uuid, sizeof (mcd->mcd_uuid)); + buf->mcd_last_transno = cpu_to_le64(mcd->mcd_last_transno); + buf->mcd_last_xid = cpu_to_le64(mcd->mcd_last_xid); + buf->mcd_last_result = cpu_to_le32(mcd->mcd_last_result); + buf->mcd_last_data = cpu_to_le32(mcd->mcd_last_data); + buf->mcd_last_close_transno = cpu_to_le64(mcd->mcd_last_close_transno); + buf->mcd_last_close_xid = cpu_to_le64(mcd->mcd_last_close_xid); + buf->mcd_last_close_result = cpu_to_le32(mcd->mcd_last_close_result); +} + +static int mdt_last_rcvd_header_read(const struct lu_env *env, + struct mdt_device *mdt, + struct mdt_server_data *msd) +{ + struct mdt_thread_info *mti; + struct mdt_server_data *tmp; + loff_t *off; + int rc; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + /* temporary stuff for read */ + tmp = &mti->mti_msd; + off = &mti->mti_off; + *off = 0; + rc = mdt_record_read(env, mdt->mdt_last_rcvd, + mdt_buf(env, tmp, sizeof(*tmp)), off); + if (rc == 0) + msd_le_to_cpu(tmp, msd); + + CDEBUG(D_INFO, "read last_rcvd header rc = %d:\n" + "uuid = %s\n" + "last_transno = "LPU64"\n", + rc, + msd->msd_uuid, + msd->msd_last_transno); + return rc; +} + +static int mdt_last_rcvd_header_write(const struct lu_env *env, + struct mdt_device *mdt, + struct mdt_server_data *msd) +{ + struct mdt_thread_info *mti; + struct mdt_server_data *tmp; + struct thandle *th; + loff_t *off; + int rc; + ENTRY; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + + th = mdt_trans_start(env, mdt, MDT_TXN_LAST_RCVD_WRITE_CREDITS); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + /* temporary stuff for read */ + tmp = &mti->mti_msd; + off = &mti->mti_off; + *off = 0; + + msd_cpu_to_le(msd, tmp); + + rc = mdt_record_write(env, mdt->mdt_last_rcvd, + mdt_buf_const(env, tmp, sizeof(*tmp)), off, th); + + mdt_trans_stop(env, mdt, th); + + CDEBUG(D_INFO, "write last_rcvd header rc = %d:\n" + "uuid = %s\nlast_transno = "LPU64"\n", + rc, msd->msd_uuid, msd->msd_last_transno); + + RETURN(rc); +} + +static int mdt_last_rcvd_read(const struct lu_env *env, + struct mdt_device *mdt, + struct mdt_client_data *mcd, loff_t *off) +{ + struct mdt_thread_info *mti; + struct mdt_client_data *tmp; + int rc; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + tmp = &mti->mti_mcd; + rc = mdt_record_read(env, mdt->mdt_last_rcvd, + mdt_buf(env, tmp, sizeof(*tmp)), off); + if (rc == 0) + mcd_le_to_cpu(tmp, mcd); + + CDEBUG(D_INFO, "read mcd @%d rc = %d:\n" + "uuid = %s\n" + "last_transno = "LPU64"\n" + "last_xid = "LPU64"\n" + "last_result = %d\n" + "last_data = %d\n" + "last_close_transno = "LPU64"\n" + "last_close_xid = "LPU64"\n" + "last_close_result = %d\n", + (int)*off - sizeof(*tmp), + rc, + mcd->mcd_uuid, + mcd->mcd_last_transno, + mcd->mcd_last_xid, + mcd->mcd_last_result, + mcd->mcd_last_data, + mcd->mcd_last_close_transno, + mcd->mcd_last_close_xid, + mcd->mcd_last_close_result); + return rc; +} + +static int mdt_last_rcvd_write(const struct lu_env *env, + struct mdt_device *mdt, + struct mdt_client_data *mcd, + loff_t *off, struct thandle *th) +{ + struct mdt_thread_info *mti; + struct mdt_client_data *tmp; + int rc; + + LASSERT(th != NULL); + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + tmp = &mti->mti_mcd; + + mcd_cpu_to_le(mcd, tmp); + + rc = mdt_record_write(env, mdt->mdt_last_rcvd, + mdt_buf_const(env, tmp, sizeof(*tmp)), off, th); + + CDEBUG(D_INFO, "write mcd @%d rc = %d:\n" + "uuid = %s\n" + "last_transno = "LPU64"\n" + "last_xid = "LPU64"\n" + "last_result = %d\n" + "last_data = %d\n" + "last_close_transno = "LPU64"\n" + "last_close_xid = "LPU64"\n" + "last_close_result = %d\n", + (int)*off - sizeof(*tmp), + rc, + mcd->mcd_uuid, + mcd->mcd_last_transno, + mcd->mcd_last_xid, + mcd->mcd_last_result, + mcd->mcd_last_data, + mcd->mcd_last_close_transno, + mcd->mcd_last_close_xid, + mcd->mcd_last_close_result); + return rc; +} + + +static int mdt_clients_data_init(const struct lu_env *env, + struct mdt_device *mdt, + unsigned long last_size) +{ + struct mdt_server_data *msd = &mdt->mdt_msd; + struct mdt_client_data *mcd = NULL; + struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + loff_t off; + int cl_idx; + int rc = 0; + ENTRY; + + /* When we do a clean MDS shutdown, we save the last_transno into + * the header. If we find clients with higher last_transno values + * then those clients may need recovery done. */ + LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0); + for (cl_idx = 0, off = msd->msd_client_start; + off < last_size; cl_idx++) { + __u64 last_transno; + struct obd_export *exp; + + if (!mcd) { + OBD_ALLOC_PTR(mcd); + if (!mcd) + RETURN(-ENOMEM); + } + + off = msd->msd_client_start + + cl_idx * msd->msd_client_size; + + rc = mdt_last_rcvd_read(env, mdt, mcd, &off); + if (rc) { + CERROR("error reading MDS %s idx %d, off %llu: rc %d\n", + LAST_RCVD, cl_idx, off, rc); + rc = 0; + break; /* read error shouldn't cause startup to fail */ + } + + if (mcd->mcd_uuid[0] == '\0') { + CDEBUG(D_INFO, "skipping zeroed client at offset %d\n", + cl_idx); + continue; + } + + last_transno = mcd_last_transno(mcd); + + /* These exports are cleaned up by mdt_obd_disconnect(), so + * they need to be set up like real exports as + * mdt_obd_connect() does. + */ + CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 + " srv lr: "LPU64" lx: "LPU64"\n", mcd->mcd_uuid, cl_idx, + last_transno, msd->msd_last_transno, + mcd_last_xid(mcd)); + + exp = class_new_export(obd, (struct obd_uuid *)mcd->mcd_uuid); + if (IS_ERR(exp)) { + if (PTR_ERR(exp) == -EALREADY) { + /* export already exists, zero out this one */ + mcd->mcd_uuid[0] = '\0'; + } else + GOTO(err_client, rc = PTR_ERR(exp)); + } else { + struct mdt_thread_info *mti; + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(mti != NULL); + mti->mti_exp = exp; + exp->exp_mdt_data.med_mcd = mcd; + rc = mdt_client_add(env, mdt, cl_idx); + /* can't fail existing */ + LASSERTF(rc == 0, "rc = %d\n", rc); + mcd = NULL; + spin_lock(&exp->exp_lock); + exp->exp_connecting = 0; + exp->exp_in_recovery = 0; + spin_unlock(&exp->exp_lock); + obd->obd_max_recoverable_clients++; + class_export_put(exp); + } + + CDEBUG(D_OTHER, "client at idx %d has last_transno="LPU64"\n", + cl_idx, last_transno); + /* protect __u64 value update */ + spin_lock(&mdt->mdt_transno_lock); + mdt->mdt_last_transno = max(last_transno, + mdt->mdt_last_transno); + spin_unlock(&mdt->mdt_transno_lock); + } + +err_client: + if (mcd) + OBD_FREE_PTR(mcd); + RETURN(rc); +} + +static int mdt_server_data_init(const struct lu_env *env, + struct mdt_device *mdt) +{ + struct mdt_server_data *msd = &mdt->mdt_msd; + struct mdt_client_data *mcd = NULL; + struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct mdt_thread_info *mti; + struct dt_object *obj; + struct lu_attr *la; + unsigned long last_rcvd_size; + __u64 mount_count; + int rc; + ENTRY; + + /* ensure padding in the struct is the correct size */ + CLASSERT(offsetof(struct mdt_server_data, msd_padding) + + sizeof(msd->msd_padding) == LR_SERVER_SIZE); + CLASSERT(offsetof(struct mdt_client_data, mcd_padding) + + sizeof(mcd->mcd_padding) == LR_CLIENT_SIZE); + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(mti != NULL); + la = &mti->mti_attr.ma_attr; + + obj = mdt->mdt_last_rcvd; + rc = obj->do_ops->do_attr_get(env, mdt->mdt_last_rcvd, la, BYPASS_CAPA); + if (rc) + RETURN(rc); + + last_rcvd_size = (unsigned long)la->la_size; + + if (last_rcvd_size == 0) { + LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name); + + memcpy(msd->msd_uuid, obd->obd_uuid.uuid, + sizeof(msd->msd_uuid)); + msd->msd_last_transno = 0; + msd->msd_mount_count = 0; + msd->msd_server_size = LR_SERVER_SIZE; + msd->msd_client_start = LR_CLIENT_START; + msd->msd_client_size = LR_CLIENT_SIZE; + msd->msd_feature_rocompat = OBD_ROCOMPAT_LOVOBJID; + msd->msd_feature_incompat = OBD_INCOMPAT_MDT | + OBD_INCOMPAT_COMMON_LR; + } else { + LCONSOLE_WARN("%s: used disk, loading\n", obd->obd_name); + rc = mdt_last_rcvd_header_read(env, mdt, msd); + if (rc) { + CERROR("error reading MDS %s: rc %d\n", LAST_RCVD, rc); + GOTO(out, rc); + } + if (strcmp(msd->msd_uuid, obd->obd_uuid.uuid) != 0) { + LCONSOLE_ERROR_MSG(0x157, "Trying to start OBD %s using" + "the wrong disk %s. Were the /dev/ " + "assignments rearranged?\n", + obd->obd_uuid.uuid, msd->msd_uuid); + GOTO(out, rc = -EINVAL); + } + } + mount_count = msd->msd_mount_count; + + msd->msd_feature_compat = OBD_COMPAT_MDT; + + spin_lock(&mdt->mdt_transno_lock); + mdt->mdt_last_transno = msd->msd_last_transno; + spin_unlock(&mdt->mdt_transno_lock); + + CDEBUG(D_INODE, "========BEGIN DUMPING LAST_RCVD========\n"); + CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n", + obd->obd_name, mdt->mdt_last_transno); + CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n", + obd->obd_name, mount_count + 1); + CDEBUG(D_INODE, "%s: server data size: %u\n", + obd->obd_name, msd->msd_server_size); + CDEBUG(D_INODE, "%s: per-client data start: %u\n", + obd->obd_name, msd->msd_client_start); + CDEBUG(D_INODE, "%s: per-client data size: %u\n", + obd->obd_name, msd->msd_client_size); + CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n", + obd->obd_name, last_rcvd_size); + CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name, + last_rcvd_size <= msd->msd_client_start ? 0 : + (last_rcvd_size - msd->msd_client_start) / + msd->msd_client_size); + CDEBUG(D_INODE, "========END DUMPING LAST_RCVD========\n"); + + if (!msd->msd_server_size || !msd->msd_client_start || + !msd->msd_client_size) { + CERROR("Bad last_rcvd contents!\n"); + GOTO(out, rc = -EINVAL); + } + + rc = mdt_clients_data_init(env, mdt, last_rcvd_size); + if (rc) + GOTO(err_client, rc); + + spin_lock(&mdt->mdt_transno_lock); + /* obd_last_committed is used for compatibility + * with other lustre recovery code */ + obd->obd_last_committed = mdt->mdt_last_transno; + spin_unlock(&mdt->mdt_transno_lock); + + mdt->mdt_mount_count++; + msd->msd_mount_count = mdt->mdt_mount_count; + + /* save it, so mount count and last_transno is current */ + rc = mdt_server_data_update(env, mdt); + if (rc) + GOTO(err_client, rc); + + RETURN(0); + +err_client: + target_recovery_fini(obd); +out: + return rc; +} + +static int mdt_server_data_update(const struct lu_env *env, + struct mdt_device *mdt) +{ + struct mdt_server_data *msd = &mdt->mdt_msd; + int rc = 0; + ENTRY; + + CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n", + mdt->mdt_mount_count, mdt->mdt_last_transno); + + spin_lock(&mdt->mdt_transno_lock); + msd->msd_last_transno = mdt->mdt_last_transno; + spin_unlock(&mdt->mdt_transno_lock); + + /* + * This may be called from difficult reply handler and + * mdt->mdt_last_rcvd may be NULL that time. + */ + if (mdt->mdt_last_rcvd != NULL) + rc = mdt_last_rcvd_header_write(env, mdt, msd); + RETURN(rc); +} + +void mdt_cb_new_client(const struct mdt_device *mdt, __u64 transno, + void *data, int err) +{ + struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + + target_client_add_cb(obd, transno, data, err); +} + +int mdt_client_new(const struct lu_env *env, struct mdt_device *mdt) +{ + unsigned long *bitmap = mdt->mdt_client_bitmap; + struct mdt_thread_info *mti; + struct mdt_export_data *med; + struct mdt_client_data *mcd; + struct mdt_server_data *msd = &mdt->mdt_msd; + struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct thandle *th; + loff_t off; + int rc; + int cl_idx; + ENTRY; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(mti != NULL); + + med = &mti->mti_exp->exp_mdt_data; + mcd = med->med_mcd; + + LASSERT(bitmap != NULL); + if (!strcmp(med->med_mcd->mcd_uuid, obd->obd_uuid.uuid)) + RETURN(0); + + /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so + * there's no need for extra complication here + */ + spin_lock(&mdt->mdt_client_bitmap_lock); + cl_idx = find_first_zero_bit(bitmap, LR_MAX_CLIENTS); + if (cl_idx >= LR_MAX_CLIENTS || + MDT_FAIL_CHECK_ONCE(OBD_FAIL_MDS_CLIENT_ADD)) { + CERROR("no room for %u clients - fix LR_MAX_CLIENTS\n", + cl_idx); + spin_unlock(&mdt->mdt_client_bitmap_lock); + RETURN(-EOVERFLOW); + } + set_bit(cl_idx, bitmap); + spin_unlock(&mdt->mdt_client_bitmap_lock); + + CDEBUG(D_INFO, "client at idx %d with UUID '%s' added\n", + cl_idx, med->med_mcd->mcd_uuid); + + med->med_lr_idx = cl_idx; + med->med_lr_off = msd->msd_client_start + + (cl_idx * msd->msd_client_size); + init_mutex(&med->med_mcd_lock); + + LASSERTF(med->med_lr_off > 0, "med_lr_off = %llu\n", med->med_lr_off); + /* write new client data */ + off = med->med_lr_off; + th = mdt_trans_start(env, mdt, MDT_TXN_LAST_RCVD_WRITE_CREDITS); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + /* until this operations will be committed the sync is needed for this + * export */ + mdt_trans_add_cb(th, mdt_cb_new_client, mti->mti_exp); + spin_lock(&mti->mti_exp->exp_lock); + mti->mti_exp->exp_need_sync = 1; + spin_unlock(&mti->mti_exp->exp_lock); + + rc = mdt_last_rcvd_write(env, mdt, mcd, &off, th); + CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n", + cl_idx, med->med_lr_off, sizeof(*mcd)); + mdt_trans_stop(env, mdt, th); + + RETURN(rc); +} + +/* Add client data to the MDS. We use a bitmap to locate a free space + * in the last_rcvd file if cl_off is -1 (i.e. a new client). + * Otherwise, we just have to read the data from the last_rcvd file and + * we know its offset. + * + * It should not be possible to fail adding an existing client - otherwise + * mdt_init_server_data() callsite needs to be fixed. + */ +int mdt_client_add(const struct lu_env *env, + struct mdt_device *mdt, int cl_idx) +{ + struct mdt_thread_info *mti; + struct mdt_export_data *med; + unsigned long *bitmap = mdt->mdt_client_bitmap; + struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct mdt_server_data *msd = &mdt->mdt_msd; + int rc = 0; + ENTRY; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(mti != NULL); + + med = &mti->mti_exp->exp_mdt_data; + + LASSERT(bitmap != NULL); + LASSERTF(cl_idx >= 0, "%d\n", cl_idx); + + if (!strcmp(med->med_mcd->mcd_uuid, obd->obd_uuid.uuid)) + RETURN(0); + + spin_lock(&mdt->mdt_client_bitmap_lock); + if (test_and_set_bit(cl_idx, bitmap)) { + CERROR("MDS client %d: bit already set in bitmap!!\n", + cl_idx); + LBUG(); + } + spin_unlock(&mdt->mdt_client_bitmap_lock); + + CDEBUG(D_INFO, "client at idx %d with UUID '%s' added\n", + cl_idx, med->med_mcd->mcd_uuid); + + med->med_lr_idx = cl_idx; + med->med_lr_off = msd->msd_client_start + + (cl_idx * msd->msd_client_size); + init_mutex(&med->med_mcd_lock); + + LASSERTF(med->med_lr_off > 0, "med_lr_off = %llu\n", med->med_lr_off); + + RETURN(rc); +} + +int mdt_client_del(const struct lu_env *env, struct mdt_device *mdt) +{ + struct mdt_thread_info *mti; + struct mdt_export_data *med; + struct mdt_client_data *mcd; + struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct thandle *th; + loff_t off; + int rc = 0; + ENTRY; + + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + LASSERT(mti != NULL); + + med = &mti->mti_exp->exp_mdt_data; + mcd = med->med_mcd; + if (!mcd) + RETURN(0); + + /* XXX: If mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ + if (!strcmp(med->med_mcd->mcd_uuid, obd->obd_uuid.uuid)) + GOTO(free, 0); + + CDEBUG(D_INFO, "freeing client at idx %u, offset %lld\n", + med->med_lr_idx, med->med_lr_off); + + off = med->med_lr_off; + + /* + * Don't clear med_lr_idx here as it is likely also unset. At worst we + * leak a client slot that will be cleaned on the next recovery. + */ + if (off <= 0) { + CERROR("client idx %d has offset %lld\n", + med->med_lr_idx, off); + GOTO(free, rc = -EINVAL); + } + + /* + * Clear the bit _after_ zeroing out the client so we don't race with + * mdt_client_add and zero out new clients. + */ + if (!test_bit(med->med_lr_idx, mdt->mdt_client_bitmap)) { + CERROR("MDT client %u: bit already clear in bitmap!!\n", + med->med_lr_idx); + LBUG(); + } + + /* + * This may be called from difficult reply handler path and + * mdt->mdt_last_rcvd may be NULL that time. + */ + if (mdt->mdt_last_rcvd != NULL) { + th = mdt_trans_start(env, mdt, MDT_TXN_LAST_RCVD_WRITE_CREDITS); + if (IS_ERR(th)) + GOTO(free, rc = PTR_ERR(th)); + + mutex_down(&med->med_mcd_lock); + memset(mcd, 0, sizeof *mcd); + + rc = mdt_last_rcvd_write(env, mdt, mcd, &off, th); + mutex_up(&med->med_mcd_lock); + mdt_trans_stop(env, mdt, th); + } + + CDEBUG(rc == 0 ? D_INFO : D_ERROR, "Zeroing out client idx %u in " + "%s rc %d\n", med->med_lr_idx, LAST_RCVD, rc); + + spin_lock(&mdt->mdt_client_bitmap_lock); + clear_bit(med->med_lr_idx, mdt->mdt_client_bitmap); + spin_unlock(&mdt->mdt_client_bitmap_lock); + + /* + * Make sure the server's last_transno is up to date. Do this after the + * client is freed so we know all the client's transactions have been + * committed. + */ + mdt_server_data_update(env, mdt); + EXIT; +free: + OBD_FREE_PTR(mcd); + med->med_mcd = NULL; + return 0; +} + +/* + * last_rcvd & last_committed update callbacks + */ +static int mdt_last_rcvd_update(struct mdt_thread_info *mti, + struct thandle *th) +{ + struct mdt_device *mdt = mti->mti_mdt; + struct ptlrpc_request *req = mdt_info_req(mti); + struct mdt_export_data *med; + struct mdt_client_data *mcd; + loff_t off; + int err; + __s32 rc = th->th_result; + __u64 *transno_p; + + ENTRY; + LASSERT(req); + LASSERT(req->rq_export); + LASSERT(mdt); + med = &req->rq_export->exp_mdt_data; + LASSERT(med); + mcd = med->med_mcd; + /* if the export has already been failed, we have no last_rcvd slot */ + if (req->rq_export->exp_failed) { + CWARN("commit transaction for disconnected client %s: rc %d\n", + req->rq_export->exp_client_uuid.uuid, rc); + if (rc == 0) + rc = -ENOTCONN; + RETURN(rc); + } + + off = med->med_lr_off; + mutex_down(&med->med_mcd_lock); + if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE || + lustre_msg_get_opc(req->rq_reqmsg) == MDS_DONE_WRITING) { + transno_p = &mcd->mcd_last_close_transno; + mcd->mcd_last_close_xid = req->rq_xid; + mcd->mcd_last_close_result = rc; + } else { + transno_p = &mcd->mcd_last_transno; + mcd->mcd_last_xid = req->rq_xid; + mcd->mcd_last_result = rc; + /*XXX: save intent_disposition in mdt_thread_info? + * also there is bug - intent_dispostion is __u64, + * see struct ldlm_reply->lock_policy_res1; */ + mcd->mcd_last_data = mti->mti_opdata; + } + + /* + * When we store zero transno in mcd we can lost last transno value + * because mcd contains 0, but msd is not yet written + * The server data should be updated also if the latest + * transno is rewritten by zero. See the bug 11125 for details. + */ + if (mti->mti_transno == 0 && + *transno_p == mdt->mdt_last_transno) + mdt_server_data_update(mti->mti_env, mdt); + + *transno_p = mti->mti_transno; + + if (off <= 0) { + CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off); + err = -EINVAL; + } else { + err = mdt_last_rcvd_write(mti->mti_env, mdt, mcd, &off, th); + } + mutex_up(&med->med_mcd_lock); + RETURN(err); +} + +extern struct lu_context_key mdt_thread_key; + +/* add credits for last_rcvd update */ +static int mdt_txn_start_cb(const struct lu_env *env, + struct txn_param *param, void *cookie) +{ + param->tp_credits += MDT_TXN_LAST_RCVD_WRITE_CREDITS; + return 0; +} + +/* Update last_rcvd records with latests transaction data */ +static int mdt_txn_stop_cb(const struct lu_env *env, + struct thandle *txn, void *cookie) +{ + struct mdt_device *mdt = cookie; + struct mdt_txn_info *txi; + struct mdt_thread_info *mti; + struct ptlrpc_request *req; + + /* transno in two contexts - for commit_cb and for thread */ + txi = lu_context_key_get(&txn->th_ctx, &mdt_txn_key); + mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); + req = mdt_info_req(mti); + + if (mti->mti_mdt == NULL || req == NULL || mti->mti_no_need_trans) { + txi->txi_transno = 0; + mti->mti_no_need_trans = 0; + return 0; + } + + if (mti->mti_has_trans) { + /* XXX: currently there are allowed cases, but the wrong cases + * are also possible, so better check is needed here */ + CDEBUG(D_INFO, "More than one transaction "LPU64"\n", mti->mti_transno); + return 0; + } + + mti->mti_has_trans = 1; + spin_lock(&mdt->mdt_transno_lock); + if (txn->th_result != 0) { + if (mti->mti_transno != 0) { + CERROR("Replay transno "LPU64" failed: rc %i\n", + mti->mti_transno, txn->th_result); + mti->mti_transno = 0; + } + } else if (mti->mti_transno == 0) { + mti->mti_transno = ++ mdt->mdt_last_transno; + } else { + /* should be replay */ + if (mti->mti_transno > mdt->mdt_last_transno) + mdt->mdt_last_transno = mti->mti_transno; + } + + /* sometimes the reply message has not been successfully packed */ + LASSERT(req != NULL && req->rq_repmsg != NULL); + + /* filling reply data */ + CDEBUG(D_INODE, "transno = %llu, last_committed = %llu\n", + mti->mti_transno, req->rq_export->exp_obd->obd_last_committed); + + req->rq_transno = mti->mti_transno; + lustre_msg_set_transno(req->rq_repmsg, mti->mti_transno); + lustre_msg_set_last_xid(req->rq_repmsg, + mcd_last_xid(req->rq_export->exp_mdt_data.med_mcd)); + /* save transno for the commit callback */ + txi->txi_transno = mti->mti_transno; + spin_unlock(&mdt->mdt_transno_lock); + + return mdt_last_rcvd_update(mti, txn); +} + +/* commit callback, need to update last_commited value */ +static int mdt_txn_commit_cb(const struct lu_env *env, + struct thandle *txn, void *cookie) +{ + struct mdt_device *mdt = cookie; + struct obd_device *obd = md2lu_dev(&mdt->mdt_md_dev)->ld_obd; + struct mdt_txn_info *txi; + int i; + + txi = lu_context_key_get(&txn->th_ctx, &mdt_txn_key); + + /* copy of obd_transno_commit_cb() but with locking */ + spin_lock(&mdt->mdt_transno_lock); + if (txi->txi_transno > obd->obd_last_committed) { + obd->obd_last_committed = txi->txi_transno; + spin_unlock(&mdt->mdt_transno_lock); + ptlrpc_commit_replies(obd); + } else + spin_unlock(&mdt->mdt_transno_lock); + + if (txi->txi_transno) + CDEBUG(D_HA, "%s: transno "LPD64" is committed\n", + obd->obd_name, txi->txi_transno); + + /* iterate through all additional callbacks */ + for (i = 0; i < txi->txi_cb_count; i++) { + txi->txi_cb[i].mdt_cb_func(mdt, txi->txi_transno, + txi->txi_cb[i].mdt_cb_data, 0); + } + return 0; +} + +int mdt_fs_setup(const struct lu_env *env, struct mdt_device *mdt, + struct obd_device *obd) +{ + struct lu_fid fid; + struct dt_object *o; + int rc = 0; + ENTRY; + + OBD_FAIL_RETURN(OBD_FAIL_MDS_FS_SETUP, -ENOENT); + + /* prepare transactions callbacks */ + mdt->mdt_txn_cb.dtc_txn_start = mdt_txn_start_cb; + mdt->mdt_txn_cb.dtc_txn_stop = mdt_txn_stop_cb; + mdt->mdt_txn_cb.dtc_txn_commit = mdt_txn_commit_cb; + mdt->mdt_txn_cb.dtc_cookie = mdt; + INIT_LIST_HEAD(&mdt->mdt_txn_cb.dtc_linkage); + + dt_txn_callback_add(mdt->mdt_bottom, &mdt->mdt_txn_cb); + + o = dt_store_open(env, mdt->mdt_bottom, LAST_RCVD, &fid); + if (!IS_ERR(o)) { + mdt->mdt_last_rcvd = o; + rc = mdt_server_data_init(env, mdt); + if (rc) + GOTO(put_last_rcvd, rc); + } else { + rc = PTR_ERR(o); + CERROR("cannot open %s: rc = %d\n", LAST_RCVD, rc); + RETURN(rc); + } + + o = dt_store_open(env, mdt->mdt_bottom, CAPA_KEYS, &fid); + if (!IS_ERR(o)) { + mdt->mdt_ck_obj = o; + rc = mdt_capa_keys_init(env, mdt); + if (rc) + GOTO(put_ck_object, rc); + } else { + rc = PTR_ERR(o); + CERROR("cannot open %s: rc = %d\n", CAPA_KEYS, rc); + GOTO(put_last_rcvd, rc); + } + RETURN(0); + +put_ck_object: + lu_object_put(env, &o->do_lu); + mdt->mdt_ck_obj = NULL; +put_last_rcvd: + lu_object_put(env, &mdt->mdt_last_rcvd->do_lu); + mdt->mdt_last_rcvd = NULL; + return rc; +} + + +void mdt_fs_cleanup(const struct lu_env *env, struct mdt_device *mdt) +{ + ENTRY; + + /* Remove transaction callback */ + dt_txn_callback_del(mdt->mdt_bottom, &mdt->mdt_txn_cb); + if (mdt->mdt_last_rcvd) + lu_object_put(env, &mdt->mdt_last_rcvd->do_lu); + mdt->mdt_last_rcvd = NULL; + if (mdt->mdt_ck_obj) + lu_object_put(env, &mdt->mdt_ck_obj->do_lu); + mdt->mdt_ck_obj = NULL; + EXIT; +} + +/* reconstruction code */ +extern void mds_steal_ack_locks(struct ptlrpc_request *req); +void mdt_req_from_mcd(struct ptlrpc_request *req, + struct mdt_client_data *mcd) +{ + DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d", + mcd->mcd_last_transno, mcd->mcd_last_result); + + if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE || + lustre_msg_get_opc(req->rq_repmsg) == MDS_DONE_WRITING) { + req->rq_transno = mcd->mcd_last_close_transno; + req->rq_status = mcd->mcd_last_close_result; + lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); + lustre_msg_set_status(req->rq_repmsg, req->rq_status); + } else { + req->rq_transno = mcd->mcd_last_transno; + req->rq_status = mcd->mcd_last_result; + lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); + lustre_msg_set_status(req->rq_repmsg, req->rq_status); + } + mds_steal_ack_locks(req); +} + +static void mdt_reconstruct_generic(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc) +{ + struct ptlrpc_request *req = mdt_info_req(mti); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + + return mdt_req_from_mcd(req, med->med_mcd); +} + +static void mdt_reconstruct_create(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc) +{ + struct ptlrpc_request *req = mdt_info_req(mti); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_device *mdt = mti->mti_mdt; + struct mdt_object *child; + struct mdt_body *body; + int rc; + + mdt_req_from_mcd(req, med->med_mcd); + if (req->rq_status) + return; + + /* if no error, so child was created with requested fid */ + child = mdt_object_find(mti->mti_env, mdt, mti->mti_rr.rr_fid2); + LASSERT(!IS_ERR(child)); + + body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY); + rc = mo_attr_get(mti->mti_env, mdt_object_child(child), &mti->mti_attr); + if (rc == -EREMOTE) { + /* object was created on remote server */ + req->rq_status = rc; + body->valid |= OBD_MD_MDS; + } + mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr, mdt_object_fid(child)); + mdt_object_put(mti->mti_env, child); +} + +static void mdt_reconstruct_setattr(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc) +{ + struct ptlrpc_request *req = mdt_info_req(mti); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_device *mdt = mti->mti_mdt; + struct mdt_object *obj; + struct mdt_body *body; + + mdt_req_from_mcd(req, med->med_mcd); + if (req->rq_status) + return; + + body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY); + obj = mdt_object_find(mti->mti_env, mdt, mti->mti_rr.rr_fid1); + LASSERT(!IS_ERR(obj)); + mo_attr_get(mti->mti_env, mdt_object_child(obj), &mti->mti_attr); + mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr, mdt_object_fid(obj)); + + /* Don't return OST-specific attributes if we didn't just set them */ +/* + if (rec->ur_iattr.ia_valid & ATTR_SIZE) + body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) + body->valid |= OBD_MD_FLMTIME; + if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET)) + body->valid |= OBD_MD_FLATIME; +*/ + mdt_object_put(mti->mti_env, obj); +} + +static void mdt_reconstruct_with_shrink(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc) +{ + mdt_reconstruct_generic(mti, lhc); + mdt_shrink_reply(mti); +} + +typedef void (*mdt_reconstructor)(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc); + +static mdt_reconstructor reconstructors[REINT_MAX] = { + [REINT_SETATTR] = mdt_reconstruct_setattr, + [REINT_CREATE] = mdt_reconstruct_create, + [REINT_LINK] = mdt_reconstruct_generic, + [REINT_UNLINK] = mdt_reconstruct_with_shrink, + [REINT_RENAME] = mdt_reconstruct_with_shrink, + [REINT_OPEN] = mdt_reconstruct_open +}; + +void mdt_reconstruct(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc) +{ + ENTRY; + reconstructors[mti->mti_rr.rr_opcode](mti, lhc); + EXIT; +} + diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c new file mode 100644 index 0000000..ec8298e --- /dev/null +++ b/lustre/mdt/mdt_reint.c @@ -0,0 +1,983 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * linux/mdt/mdt_reint.c + * Lustre Metadata Target (mdt) reintegration routines + * + * Copyright (C) 2002-2006 Cluster File Systems, Inc. + * Author: Peter Braam <braam@clusterfs.com> + * Author: Andreas Dilger <adilger@clusterfs.com> + * Author: Phil Schwan <phil@clusterfs.com> + * Author: Huang Hua <huanghua@clusterfs.com> + * Author: Yury Umanets <umka@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include "mdt_internal.h" +#include "lu_time.h" + +static inline void mdt_reint_init_ma(struct mdt_thread_info *info, + struct md_attr *ma) +{ + ma->ma_lmm = req_capsule_server_get(&info->mti_pill, &RMF_MDT_MD); + ma->ma_lmm_size = req_capsule_get_size(&info->mti_pill, + &RMF_MDT_MD, RCL_SERVER); + + ma->ma_cookie = req_capsule_server_get(&info->mti_pill, + &RMF_LOGCOOKIES); + ma->ma_cookie_size = req_capsule_get_size(&info->mti_pill, + &RMF_LOGCOOKIES, + RCL_SERVER); + + ma->ma_need = MA_INODE | MA_LOV | MA_COOKIE; + ma->ma_valid = 0; +} + +static int mdt_create_pack_capa(struct mdt_thread_info *info, int rc, + struct mdt_object *object, + struct mdt_body *repbody) +{ + ENTRY; + + /* for cross-ref mkdir, mds capa has been fetched from remote obj, then + * we won't go to below*/ + if (repbody->valid & OBD_MD_FLMDSCAPA) + RETURN(rc); + + if (rc == 0 && info->mti_mdt->mdt_opts.mo_mds_capa) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_MDS_DEFAULT; + rc = mo_capa_get(info->mti_env, mdt_object_child(object), capa, + 0); + if (rc == 0) + repbody->valid |= OBD_MD_FLMDSCAPA; + } + if (!(repbody->valid & OBD_MD_FLMDSCAPA)) + lustre_shrink_reply(mdt_info_req(info), REPLY_REC_OFF+1, 0, 1); + + RETURN(rc); +} + +static int mdt_md_create(struct mdt_thread_info *info) +{ + struct mdt_device *mdt = info->mti_mdt; + struct mdt_object *parent; + struct mdt_object *child; + struct mdt_lock_handle *lh; + struct mdt_body *repbody; + struct md_attr *ma = &info->mti_attr; + struct mdt_reint_record *rr = &info->mti_rr; + struct lu_name *lname; + int rc; + ENTRY; + + DEBUG_REQ(D_INODE, mdt_info_req(info), "Create (%s->"DFID") in "DFID, + rr->rr_name, PFID(rr->rr_fid2), PFID(rr->rr_fid1)); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + + lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(lh, LCK_PW, rr->rr_name, rr->rr_namelen); + + parent = mdt_object_find_lock(info, rr->rr_fid1, lh, + MDS_INODELOCK_UPDATE); + if (IS_ERR(parent)) + RETURN(PTR_ERR(parent)); + + child = mdt_object_find(info->mti_env, mdt, rr->rr_fid2); + if (likely(!IS_ERR(child))) { + struct md_object *next = mdt_object_child(parent); + + ma->ma_need = MA_INODE; + ma->ma_valid = 0; + /* capa for cross-ref will be stored here */ + ma->ma_capa = req_capsule_server_get(&info->mti_pill, + &RMF_CAPA1); + LASSERT(ma->ma_capa); + + mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, + OBD_FAIL_MDS_REINT_CREATE_WRITE); + + /* Let lower layer know current lock mode. */ + info->mti_spec.sp_cr_mode = + mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode); + + /* + * Do perform lookup sanity check. We do not know if name exists + * or not. + */ + info->mti_spec.sp_cr_lookup = 1; + + lname = mdt_name(info->mti_env, (char *)rr->rr_name, + rr->rr_namelen); + rc = mdo_create(info->mti_env, next, lname, + mdt_object_child(child), + &info->mti_spec, ma); + if (rc == 0) { + /* Return fid & attr to client. */ + if (ma->ma_valid & MA_INODE) + mdt_pack_attr2body(info, repbody, &ma->ma_attr, + mdt_object_fid(child)); + } + mdt_object_put(info->mti_env, child); + } else + rc = PTR_ERR(child); + + mdt_create_pack_capa(info, rc, child, repbody); + mdt_object_unlock_put(info, parent, lh, rc); + RETURN(rc); +} + +/* Partial request to create object only */ +static int mdt_md_mkobj(struct mdt_thread_info *info) +{ + struct mdt_device *mdt = info->mti_mdt; + struct mdt_object *o; + struct mdt_body *repbody; + struct md_attr *ma = &info->mti_attr; + int rc; + ENTRY; + + DEBUG_REQ(D_INODE, mdt_info_req(info), "Partial create "DFID"", + PFID(info->mti_rr.rr_fid2)); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + + o = mdt_object_find(info->mti_env, mdt, info->mti_rr.rr_fid2); + if (!IS_ERR(o)) { + struct md_object *next = mdt_object_child(o); + + ma->ma_need = MA_INODE; + ma->ma_valid = 0; + + /* + * Cross-ref create can encounter already created obj in case of + * recovery, just get attr in that case. + */ + if (mdt_object_exists(o) == 1) { + rc = mo_attr_get(info->mti_env, next, ma); + } else { + /* + * Here, NO permission check for object_create, + * such check has been done on the original MDS. + */ + rc = mo_object_create(info->mti_env, next, + &info->mti_spec, ma); + } + if (rc == 0) { + /* Return fid & attr to client. */ + if (ma->ma_valid & MA_INODE) + mdt_pack_attr2body(info, repbody, &ma->ma_attr, + mdt_object_fid(o)); + } + mdt_object_put(info->mti_env, o); + } else + rc = PTR_ERR(o); + + mdt_create_pack_capa(info, rc, o, repbody); + RETURN(rc); +} + +/* In the raw-setattr case, we lock the child inode. + * In the write-back case or if being called from open, + * the client holds a lock already. + * We use the ATTR_FROM_OPEN (translated into MRF_SETATTR_LOCKED by + * mdt_setattr_unpack()) flag to tell these cases apart. */ +int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, int flags) +{ + struct md_attr *ma = &info->mti_attr; + struct mdt_lock_handle *lh; + int som_update = 0; + int rc; + ENTRY; + + /* attr shouldn't be set on remote object */ + LASSERT(mdt_object_exists(mo) >= 0); + + if (info->mti_epoch) + som_update = (info->mti_epoch->flags & MF_SOM_CHANGE); + + /* Try to avoid object_lock if another epoch has been started + * already. */ + if (som_update && (info->mti_epoch->ioepoch != mo->mot_ioepoch)) + RETURN(0); + + lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_reg_init(lh, LCK_PW); + + if (!(flags & MRF_SETATTR_LOCKED)) { + __u64 lockpart = MDS_INODELOCK_UPDATE; + if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID)) + lockpart |= MDS_INODELOCK_LOOKUP; + + rc = mdt_object_lock(info, mo, lh, lockpart, MDT_LOCAL_LOCK); + if (rc != 0) + RETURN(rc); + } + + /* Setattrs are syncronized through dlm lock taken above. If another + * epoch started, its attributes may be already flushed on disk, + * skip setattr. */ + if (som_update && (info->mti_epoch->ioepoch != mo->mot_ioepoch)) + GOTO(out_unlock, rc = 0); + + if (mdt_object_exists(mo) == 0) + GOTO(out_unlock, rc = -ENOENT); + + /* all attrs are packed into mti_attr in unpack_setattr */ + mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, + OBD_FAIL_MDS_REINT_SETATTR_WRITE); + + /* This is only for set ctime when rename's source is on remote MDS. */ + if (unlikely(ma->ma_attr.la_valid == LA_CTIME)) + ma->ma_attr_flags |= MDS_VTX_BYPASS; + + /* all attrs are packed into mti_attr in unpack_setattr */ + rc = mo_attr_set(info->mti_env, mdt_object_child(mo), ma); + if (rc != 0) + GOTO(out_unlock, rc); + + /* Re-enable SIZEONMDS. */ + if (som_update) { + CDEBUG(D_INODE, "Closing epoch "LPU64" on "DFID". Count %d\n", + mo->mot_ioepoch, PFID(mdt_object_fid(mo)), + mo->mot_epochcount); + mdt_sizeonmds_enable(info, mo); + } + + EXIT; +out_unlock: + mdt_object_unlock(info, mo, lh, rc); + return rc; +} + +static int mdt_reint_setattr(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc) +{ + struct mdt_device *mdt = info->mti_mdt; + struct md_attr *ma = &info->mti_attr; + struct mdt_reint_record *rr = &info->mti_rr; + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_file_data *mfd; + struct mdt_object *mo; + struct md_object *next; + struct mdt_body *repbody; + int rc; + ENTRY; + + DEBUG_REQ(D_INODE, req, "setattr "DFID" %x", PFID(rr->rr_fid1), + (unsigned int)ma->ma_attr.la_valid); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + mo = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); + if (IS_ERR(mo)) + GOTO(out, rc = PTR_ERR(mo)); + + if (!(mdt_conn_flags(info) & OBD_CONNECT_SOM)) { + if ((ma->ma_attr.la_valid & LA_SIZE) || + (rr->rr_flags & MRF_SETATTR_LOCKED)) { + /* Check write access for the O_TRUNC case */ + if (mdt_write_read(info->mti_mdt, mo) < 0) + GOTO(out_put, rc = -ETXTBSY); + } + } else if (info->mti_epoch && + (info->mti_epoch->flags & MF_EPOCH_OPEN)) { + /* Truncate case. */ + rc = mdt_write_get(info->mti_mdt, mo); + if (rc) + GOTO(out_put, rc); + + mfd = mdt_mfd_new(); + if (mfd == NULL) + GOTO(out_put, rc = -ENOMEM); + + mdt_epoch_open(info, mo); + repbody->ioepoch = mo->mot_ioepoch; + + mdt_object_get(info->mti_env, mo); + mdt_mfd_set_mode(mfd, FMODE_EPOCHLCK); + mfd->mfd_object = mo; + mfd->mfd_xid = req->rq_xid; + + spin_lock(&med->med_open_lock); + list_add(&mfd->mfd_list, &med->med_open_head); + spin_unlock(&med->med_open_lock); + repbody->handle.cookie = mfd->mfd_handle.h_cookie; + } + + rc = mdt_attr_set(info, mo, rr->rr_flags); + if (rc) + GOTO(out_put, rc); + + if (info->mti_epoch && (info->mti_epoch->flags & MF_SOM_CHANGE)) { + LASSERT(mdt_conn_flags(info) & OBD_CONNECT_SOM); + LASSERT(info->mti_epoch); + + spin_lock(&med->med_open_lock); + /* Size-on-MDS Update. Find and free mfd. */ + mfd = mdt_handle2mfd(info, &info->mti_epoch->handle); + if (mfd == NULL) { + spin_unlock(&med->med_open_lock); + CDEBUG(D_INODE | D_ERROR, "no handle for file close: " + "fid = "DFID": cookie = "LPX64"\n", + PFID(info->mti_rr.rr_fid1), + info->mti_epoch->handle.cookie); + GOTO(out_put, rc = -ESTALE); + } + if (mfd->mfd_mode != FMODE_SOM) { + CWARN("mfd_mode %d not right should be in replay\n", + mfd->mfd_mode); + LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & + MSG_REPLAY); + LASSERT(mfd->mfd_mode == FMODE_EPOCH); + mfd->mfd_mode = FMODE_SOM; + } + + LASSERT(ma->ma_attr.la_valid & LA_SIZE); + LASSERT(!(info->mti_epoch->flags & MF_EPOCH_CLOSE)); + + class_handle_unhash(&mfd->mfd_handle); + list_del_init(&mfd->mfd_list); + spin_unlock(&med->med_open_lock); + + mdt_mfd_close(info, mfd); + } + + ma->ma_need = MA_INODE; + ma->ma_valid = 0; + next = mdt_object_child(mo); + rc = mo_attr_get(info->mti_env, next, ma); + if (rc != 0) + GOTO(out_put, rc); + + mdt_pack_attr2body(info, repbody, &ma->ma_attr, mdt_object_fid(mo)); + + if (mdt->mdt_opts.mo_oss_capa && + S_ISREG(lu_object_attr(&mo->mot_obj.mo_lu)) && + (ma->ma_attr.la_valid & LA_SIZE)) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_OSS_DEFAULT | CAPA_OPC_OSS_TRUNC; + rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0); + if (rc) + GOTO(out_put, rc); + repbody->valid |= OBD_MD_FLOSSCAPA; + } + + EXIT; +out_put: + mdt_object_put(info->mti_env, mo); +out: + return rc; +} + +static int mdt_reint_create(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc) +{ + int rc; + ENTRY; + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) + GOTO(out, rc = err_serious(-ESTALE)); + + switch (info->mti_attr.ma_attr.la_mode & S_IFMT) { + case S_IFDIR:{ + /* Cross-ref case. */ + if (info->mti_cross_ref) { + rc = mdt_md_mkobj(info); + break; + } + } + case S_IFREG: + case S_IFLNK: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK:{ + /* Special file should stay on the same node as parent. */ + LASSERT(info->mti_rr.rr_namelen > 0); + rc = mdt_md_create(info); + break; + } + default: + rc = err_serious(-EOPNOTSUPP); + } + EXIT; +out: + return rc; +} + +static int mdt_reint_unlink(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc) +{ + struct mdt_reint_record *rr = &info->mti_rr; + struct ptlrpc_request *req = mdt_info_req(info); + struct md_attr *ma = &info->mti_attr; + struct lu_fid *child_fid = &info->mti_tmp_fid1; + struct mdt_object *mp; + struct mdt_object *mc; + struct mdt_lock_handle *parent_lh; + struct mdt_lock_handle *child_lh; + struct lu_name *lname; + int rc; + ENTRY; + + DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), + rr->rr_name); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) + GOTO(out, rc = err_serious(-ENOENT)); + + /* step 1: lock the parent */ + parent_lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, + rr->rr_namelen); + + mp = mdt_object_find_lock(info, rr->rr_fid1, parent_lh, + MDS_INODELOCK_UPDATE); + if (IS_ERR(mp)) { + rc = PTR_ERR(mp); + /* errors are possible here in cross-ref cases, see below */ + if (info->mti_cross_ref) + rc = 0; + GOTO(out, rc); + } + + mdt_reint_init_ma(info, ma); + if (!ma->ma_lmm || !ma->ma_cookie) + GOTO(out_unlock_parent, rc = -EINVAL); + + if (info->mti_cross_ref) { + /* + * Remote partial operation. It is possible that replay may + * happen on parent MDT and this operation will be repeated. + * Therefore the object absense is allowed case and nothing + * should be done here. + */ + if (mdt_object_exists(mp) > 0) { + mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA); + rc = mo_ref_del(info->mti_env, + mdt_object_child(mp), ma); + mdt_handle_last_unlink(info, mp, ma); + } else + rc = 0; + GOTO(out_unlock_parent, rc); + } + + /* step 2: find & lock the child */ + lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); + rc = mdo_lookup(info->mti_env, mdt_object_child(mp), + lname, child_fid, &info->mti_spec); + if (rc != 0) + GOTO(out_unlock_parent, rc); + + /* We will lock the child regardless it is local or remote. No harm. */ + mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); + if (IS_ERR(mc)) + GOTO(out_unlock_parent, rc = PTR_ERR(mc)); + child_lh = &info->mti_lh[MDT_LH_CHILD]; + mdt_lock_reg_init(child_lh, LCK_EX); + rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL, + MDT_CROSS_LOCK); + if (rc != 0) { + mdt_object_put(info->mti_env, mc); + GOTO(out_unlock_parent, rc); + } + + mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, + OBD_FAIL_MDS_REINT_UNLINK_WRITE); + + /* + * Now we can only make sure we need MA_INODE, in mdd layer, will check + * whether need MA_LOV and MA_COOKIE. + */ + ma->ma_need = MA_INODE; + ma->ma_valid = 0; + mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); + rc = mdo_unlink(info->mti_env, mdt_object_child(mp), + mdt_object_child(mc), lname, ma); + if (rc == 0) + mdt_handle_last_unlink(info, mc, ma); + + EXIT; + mdt_object_unlock_put(info, mc, child_lh, rc); +out_unlock_parent: + mdt_object_unlock_put(info, mp, parent_lh, rc); +out: + mdt_shrink_reply(info); + return rc; +} + +static int mdt_reint_link(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc) +{ + struct mdt_reint_record *rr = &info->mti_rr; + struct ptlrpc_request *req = mdt_info_req(info); + struct md_attr *ma = &info->mti_attr; + struct mdt_object *ms; + struct mdt_object *mp; + struct mdt_lock_handle *lhs; + struct mdt_lock_handle *lhp; + struct lu_name *lname; + int rc; + ENTRY; + + DEBUG_REQ(D_INODE, req, "link "DFID" to "DFID"/%s", + PFID(rr->rr_fid1), PFID(rr->rr_fid2), rr->rr_name); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK)) + GOTO(out, rc = err_serious(-ENOENT)); + + if (info->mti_cross_ref) { + /* MDT holding name ask us to add ref. */ + lhs = &info->mti_lh[MDT_LH_CHILD]; + mdt_lock_reg_init(lhs, LCK_EX); + ms = mdt_object_find_lock(info, rr->rr_fid1, lhs, + MDS_INODELOCK_UPDATE); + if (IS_ERR(ms)) + GOTO(out, rc = PTR_ERR(ms)); + + mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA); + rc = mo_ref_add(info->mti_env, mdt_object_child(ms), ma); + mdt_object_unlock_put(info, ms, lhs, rc); + GOTO(out, rc); + } + + /* Invalid case so return error immediately instead of + * processing it */ + if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) + GOTO(out, rc = -EPERM); + + /* step 1: find & lock the target parent dir */ + lhp = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(lhp, LCK_EX, rr->rr_name, + rr->rr_namelen); + mp = mdt_object_find_lock(info, rr->rr_fid2, lhp, + MDS_INODELOCK_UPDATE); + if (IS_ERR(mp)) + GOTO(out, rc = PTR_ERR(mp)); + + /* step 2: find & lock the source */ + lhs = &info->mti_lh[MDT_LH_CHILD]; + mdt_lock_reg_init(lhs, LCK_EX); + + ms = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); + if (IS_ERR(ms)) + GOTO(out_unlock_parent, rc = PTR_ERR(ms)); + + rc = mdt_object_lock(info, ms, lhs, MDS_INODELOCK_UPDATE, + MDT_CROSS_LOCK); + if (rc != 0) { + mdt_object_put(info->mti_env, ms); + GOTO(out_unlock_parent, rc); + } + + /* step 3: link it */ + mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, + OBD_FAIL_MDS_REINT_LINK_WRITE); + + lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); + rc = mdo_link(info->mti_env, mdt_object_child(mp), + mdt_object_child(ms), lname, ma); + + EXIT; + mdt_object_unlock_put(info, ms, lhs, rc); +out_unlock_parent: + mdt_object_unlock_put(info, mp, lhp, rc); +out: + return rc; +} + +/* partial operation for rename */ +static int mdt_reint_rename_tgt(struct mdt_thread_info *info) +{ + struct mdt_reint_record *rr = &info->mti_rr; + struct ptlrpc_request *req = mdt_info_req(info); + struct md_attr *ma = &info->mti_attr; + struct mdt_object *mtgtdir; + struct mdt_object *mtgt = NULL; + struct mdt_lock_handle *lh_tgtdir; + struct mdt_lock_handle *lh_tgt = NULL; + struct lu_fid *tgt_fid = &info->mti_tmp_fid1; + struct lu_name *lname; + int rc; + ENTRY; + + DEBUG_REQ(D_INODE, req, "rename_tgt: insert (%s->"DFID") in "DFID, + rr->rr_tgt, PFID(rr->rr_fid2), PFID(rr->rr_fid1)); + + /* step 1: lookup & lock the tgt dir. */ + lh_tgtdir = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(lh_tgtdir, LCK_PW, rr->rr_tgt, + rr->rr_tgtlen); + mtgtdir = mdt_object_find_lock(info, rr->rr_fid1, lh_tgtdir, + MDS_INODELOCK_UPDATE); + if (IS_ERR(mtgtdir)) + GOTO(out, rc = PTR_ERR(mtgtdir)); + + /* step 2: find & lock the target object if exists. */ + mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA); + lname = mdt_name(info->mti_env, (char *)rr->rr_tgt, rr->rr_tgtlen); + rc = mdo_lookup(info->mti_env, mdt_object_child(mtgtdir), + lname, tgt_fid, &info->mti_spec); + if (rc != 0 && rc != -ENOENT) { + GOTO(out_unlock_tgtdir, rc); + } else if (rc == 0) { + /* + * In case of replay that name can be already inserted, check + * that and do nothing if so. + */ + if (lu_fid_eq(tgt_fid, rr->rr_fid2)) + GOTO(out_unlock_tgtdir, rc); + + lh_tgt = &info->mti_lh[MDT_LH_CHILD]; + mdt_lock_reg_init(lh_tgt, LCK_EX); + + mtgt = mdt_object_find_lock(info, tgt_fid, lh_tgt, + MDS_INODELOCK_LOOKUP); + if (IS_ERR(mtgt)) + GOTO(out_unlock_tgtdir, rc = PTR_ERR(mtgt)); + + mdt_reint_init_ma(info, ma); + if (!ma->ma_lmm || !ma->ma_cookie) + GOTO(out_unlock_tgt, rc = -EINVAL); + + rc = mdo_rename_tgt(info->mti_env, mdt_object_child(mtgtdir), + mdt_object_child(mtgt), rr->rr_fid2, + lname, ma); + } else /* -ENOENT */ { + rc = mdo_name_insert(info->mti_env, mdt_object_child(mtgtdir), + lname, rr->rr_fid2, ma); + } + + /* handle last link of tgt object */ + if (rc == 0 && mtgt) + mdt_handle_last_unlink(info, mtgt, ma); + + EXIT; +out_unlock_tgt: + if (mtgt) + mdt_object_unlock_put(info, mtgt, lh_tgt, rc); +out_unlock_tgtdir: + mdt_object_unlock_put(info, mtgtdir, lh_tgtdir, rc); +out: + mdt_shrink_reply(info); + return rc; +} + +static int mdt_rename_lock(struct mdt_thread_info *info, + struct lustre_handle *lh) +{ + struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace; + ldlm_policy_data_t *policy = &info->mti_policy; + struct ldlm_res_id *res_id = &info->mti_res_id; + struct lu_site *ls; + int rc; + ENTRY; + + /* + * Disable global rename BFL lock temporarily because + * when a mds do rename recoverying, which might enqueue + * BFL lock to the controller mds. and this req might be + * replay req for controller mds. but we did not have + * such handling in controller mds. XXX + */ + RETURN(0); + ls = info->mti_mdt->mdt_md_dev.md_lu_dev.ld_site; + fid_build_reg_res_name(&LUSTRE_BFL_FID, res_id); + + memset(policy, 0, sizeof *policy); + policy->l_inodebits.bits = MDS_INODELOCK_UPDATE; + + if (ls->ls_control_exp == NULL) { + int flags = LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB; + + /* + * Current node is controller, that is mdt0, where we should + * take BFL lock. + */ + rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_IBITS, policy, + LCK_EX, &flags, ldlm_blocking_ast, + ldlm_completion_ast, NULL, NULL, 0, + NULL, lh); + } else { + int flags = 0; + + /* + * This is the case mdt0 is remote node, issue DLM lock like + * other clients. + */ + rc = ldlm_cli_enqueue(ls->ls_control_exp, NULL, res_id, + LDLM_IBITS, policy, LCK_EX, &flags, + ldlm_blocking_ast, ldlm_completion_ast, + NULL, NULL, NULL, 0, NULL, lh, 0); + } + + RETURN(rc); +} + +static void mdt_rename_unlock(struct lustre_handle *lh) +{ + ENTRY; + /* Disable global rename BFL lock temporarily. see above XXX*/ + EXIT; + return; + LASSERT(lustre_handle_is_used(lh)); + ldlm_lock_decref(lh, LCK_EX); + EXIT; +} + +/* + * This is is_subdir() variant, it is CMD if cmm forwards it to correct + * target. Source should not be ancestor of target dir. May be other rename + * checks can be moved here later. + */ +static int mdt_rename_sanity(struct mdt_thread_info *info, struct lu_fid *fid) +{ + struct mdt_reint_record *rr = &info->mti_rr; + struct lu_fid dst_fid = *rr->rr_fid2; + struct mdt_object *dst; + int rc = 0; + ENTRY; + + do { + LASSERT(fid_is_sane(&dst_fid)); + dst = mdt_object_find(info->mti_env, info->mti_mdt, &dst_fid); + if (!IS_ERR(dst)) { + rc = mdo_is_subdir(info->mti_env, + mdt_object_child(dst), fid, + &dst_fid); + mdt_object_put(info->mti_env, dst); + if (rc != -EREMOTE && rc < 0) { + CERROR("Failed mdo_is_subdir(), rc %d\n", rc); + } else { + /* check the found fid */ + if (lu_fid_eq(&dst_fid, fid)) + rc = -EINVAL; + } + } else { + rc = PTR_ERR(dst); + } + } while (rc == -EREMOTE); + + RETURN(rc); +} + +static int mdt_reint_rename(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc) +{ + struct mdt_reint_record *rr = &info->mti_rr; + struct md_attr *ma = &info->mti_attr; + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_object *msrcdir; + struct mdt_object *mtgtdir; + struct mdt_object *mold; + struct mdt_object *mnew = NULL; + struct mdt_lock_handle *lh_srcdirp; + struct mdt_lock_handle *lh_tgtdirp; + struct mdt_lock_handle *lh_oldp; + struct mdt_lock_handle *lh_newp; + struct lu_fid *old_fid = &info->mti_tmp_fid1; + struct lu_fid *new_fid = &info->mti_tmp_fid2; + struct lustre_handle rename_lh = { 0 }; + struct lu_name slname = { 0 }; + struct lu_name *lname; + int rc; + ENTRY; + + if (info->mti_cross_ref) { + rc = mdt_reint_rename_tgt(info); + RETURN(rc); + } + + DEBUG_REQ(D_INODE, req, "rename "DFID"/%s to "DFID"/%s", + PFID(rr->rr_fid1), rr->rr_name, + PFID(rr->rr_fid2), rr->rr_tgt); + + rc = mdt_rename_lock(info, &rename_lh); + if (rc) { + CERROR("Can't lock FS for rename, rc %d\n", rc); + GOTO(out, rc); + } + + lh_newp = &info->mti_lh[MDT_LH_NEW]; + + /* step 1: lock the source dir. */ + lh_srcdirp = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(lh_srcdirp, LCK_PW, rr->rr_name, + rr->rr_namelen); + msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp, + MDS_INODELOCK_UPDATE); + if (IS_ERR(msrcdir)) + GOTO(out_rename_lock, rc = PTR_ERR(msrcdir)); + + /* step 2: find & lock the target dir. */ + lh_tgtdirp = &info->mti_lh[MDT_LH_CHILD]; + mdt_lock_pdo_init(lh_tgtdirp, LCK_PW, rr->rr_tgt, + rr->rr_tgtlen); + if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) { + mdt_object_get(info->mti_env, msrcdir); + mtgtdir = msrcdir; + } else { + mtgtdir = mdt_object_find(info->mti_env, info->mti_mdt, + rr->rr_fid2); + if (IS_ERR(mtgtdir)) + GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir)); + + rc = mdt_object_exists(mtgtdir); + if (rc == 0) + GOTO(out_unlock_target, rc = -ESTALE); + else if (rc > 0) { + /* we lock the target dir if it is local */ + rc = mdt_object_lock(info, mtgtdir, lh_tgtdirp, + MDS_INODELOCK_UPDATE, + MDT_LOCAL_LOCK); + if (rc != 0) + GOTO(out_unlock_target, rc); + } + } + + /* step 3: find & lock the old object. */ + lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); + mdt_name_copy(&slname, lname); + rc = mdo_lookup(info->mti_env, mdt_object_child(msrcdir), + &slname, old_fid, &info->mti_spec); + if (rc != 0) + GOTO(out_unlock_target, rc); + + if (lu_fid_eq(old_fid, rr->rr_fid1) || lu_fid_eq(old_fid, rr->rr_fid2)) + GOTO(out_unlock_target, rc = -EINVAL); + + mold = mdt_object_find(info->mti_env, info->mti_mdt, old_fid); + if (IS_ERR(mold)) + GOTO(out_unlock_target, rc = PTR_ERR(mold)); + + lh_oldp = &info->mti_lh[MDT_LH_OLD]; + mdt_lock_reg_init(lh_oldp, LCK_EX); + rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP, + MDT_CROSS_LOCK); + if (rc != 0) { + mdt_object_put(info->mti_env, mold); + GOTO(out_unlock_target, rc); + } + mdt_set_capainfo(info, 2, old_fid, BYPASS_CAPA); + + /* step 4: find & lock the new object. */ + /* new target object may not exist now */ + lname = mdt_name(info->mti_env, (char *)rr->rr_tgt, rr->rr_tgtlen); + rc = mdo_lookup(info->mti_env, mdt_object_child(mtgtdir), + lname, new_fid, &info->mti_spec); + if (rc == 0) { + /* the new_fid should have been filled at this moment */ + if (lu_fid_eq(old_fid, new_fid)) + GOTO(out_unlock_old, rc); + + if (lu_fid_eq(new_fid, rr->rr_fid1) || + lu_fid_eq(new_fid, rr->rr_fid2)) + GOTO(out_unlock_old, rc = -EINVAL); + + mdt_lock_reg_init(lh_newp, LCK_EX); + mnew = mdt_object_find(info->mti_env, info->mti_mdt, new_fid); + if (IS_ERR(mnew)) + GOTO(out_unlock_old, rc = PTR_ERR(mnew)); + + rc = mdt_object_lock(info, mnew, lh_newp, + MDS_INODELOCK_FULL, MDT_CROSS_LOCK); + if (rc != 0) { + mdt_object_put(info->mti_env, mnew); + GOTO(out_unlock_old, rc); + } + mdt_set_capainfo(info, 3, new_fid, BYPASS_CAPA); + } else if (rc != -EREMOTE && rc != -ENOENT) + GOTO(out_unlock_old, rc); + + /* step 5: rename it */ + mdt_reint_init_ma(info, ma); + if (!ma->ma_lmm || !ma->ma_cookie) + GOTO(out_unlock_new, rc = -EINVAL); + + mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, + OBD_FAIL_MDS_REINT_RENAME_WRITE); + + + /* Check if @dst is subdir of @src. */ + rc = mdt_rename_sanity(info, old_fid); + if (rc) + GOTO(out_unlock_new, rc); + + rc = mdo_rename(info->mti_env, mdt_object_child(msrcdir), + mdt_object_child(mtgtdir), old_fid, &slname, + (mnew ? mdt_object_child(mnew) : NULL), + lname, ma); + + /* handle last link of tgt object */ + if (rc == 0 && mnew) + mdt_handle_last_unlink(info, mnew, ma); + + EXIT; +out_unlock_new: + if (mnew) + mdt_object_unlock_put(info, mnew, lh_newp, rc); +out_unlock_old: + mdt_object_unlock_put(info, mold, lh_oldp, rc); +out_unlock_target: + mdt_object_unlock_put(info, mtgtdir, lh_tgtdirp, rc); +out_unlock_source: + mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc); +out_rename_lock: + mdt_rename_unlock(&rename_lh); +out: + mdt_shrink_reply(info); + return rc; +} + +typedef int (*mdt_reinter)(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc); + +static mdt_reinter reinters[REINT_MAX] = { + [REINT_SETATTR] = mdt_reint_setattr, + [REINT_CREATE] = mdt_reint_create, + [REINT_LINK] = mdt_reint_link, + [REINT_UNLINK] = mdt_reint_unlink, + [REINT_RENAME] = mdt_reint_rename, + [REINT_OPEN] = mdt_reint_open +}; + +int mdt_reint_rec(struct mdt_thread_info *info, + struct mdt_lock_handle *lhc) +{ + int rc; + ENTRY; + + rc = reinters[info->mti_rr.rr_opcode](info, lhc); + + RETURN(rc); +} diff --git a/lustre/mdt/mdt_rmtacl.c b/lustre/mdt/mdt_rmtacl.c new file mode 100644 index 0000000..0f6008d --- /dev/null +++ b/lustre/mdt/mdt_rmtacl.c @@ -0,0 +1,260 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * Author: Lai Siyao <lsy@clusterfs.com> + * Author: Fan Yong <fanyong@clusterfs.com> + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#ifdef HAVE_KERNEL_CONFIG_H +#include <linux/config.h> +#endif +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/kmod.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/version.h> +#include <linux/unistd.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <asm/segment.h> + +#include <libcfs/kp30.h> +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_dlm.h> +#include <lustre_lib.h> +#include <lustre_ucache.h> + +#include "mdt_internal.h" + +#define MAX_CMD_LEN 256 + +static __u64 rmtacl_key = 0; +static spinlock_t rmtacl_key_lock = SPIN_LOCK_UNLOCKED; + +/* + * For remote acl operation, do NOT cache! + * Use different key for each remote acl operation. + */ +static __u64 mdt_rmtacl_getkey(void) +{ + __u64 key; + + spin_lock(&rmtacl_key_lock); + key = ++rmtacl_key; + spin_unlock(&rmtacl_key_lock); + + return key; +} + +static void mdt_rmtacl_entry_init(struct upcall_cache_entry *entry, void *args) +{ + struct rmtacl_upcall_data *data = args; + struct mdt_rmtacl *acl = &entry->u.acl; + char *cmd; + + acl->ra_uid = data->aud_uid; + acl->ra_gid = data->aud_gid; + /* we use address of this cache entry as handle */ + acl->ra_handle = (__u32)entry; + OBD_ALLOC(cmd, strlen(data->aud_cmd) + 1); + if (!cmd) + return; /* upcall will fail later! */ + + strcpy(cmd, data->aud_cmd); + acl->ra_cmd = cmd; +} + +static void mdt_rmtacl_entry_free(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + struct mdt_rmtacl *acl = &entry->u.acl; + int len; + + if (acl->ra_cmd) { + len = strlen(acl->ra_cmd) + 1; + OBD_FREE(acl->ra_cmd, len); + } + + if (acl->ra_buf) { + len = strlen(acl->ra_buf) + 1; + OBD_FREE(acl->ra_buf, len); + } +} + +static int mdt_rmtacl_upcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + struct rmtacl_upcall_data *data = args; + + LASSERT(entry && data); + LASSERT(entry->u.acl.ra_cmd && data->aud_cmd); + return strncmp(entry->u.acl.ra_cmd, data->aud_cmd, MAX_CMD_LEN); +} + +static int mdt_rmtacl_downcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + struct rmtacl_downcall_data *data = args; + + return entry->u.acl.ra_handle - data->add_handle; +} + +static int mdt_rmtacl_do_upcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + struct mdt_rmtacl *acl = &entry->u.acl; + char uidstr[8] = ""; + char gidstr[8] = ""; + char handle[20] = ""; + char keystr[20] = ""; + char *argv[] = { + [0] = cache->uc_upcall, + [1] = uidstr, + [2] = gidstr, + [3] = cache->uc_name, + [4] = keystr, + [5] = handle, + [6] = acl->ra_cmd, + [7] = NULL + }; + char *envp[] = { + [0] = "HOME=/", + [1] = "PATH=/bin:/usr/bin:/sbin:/usr/sbin", + [2] = NULL + }; + int rc; + ENTRY; + + if (!acl->ra_cmd) + RETURN(-ENOMEM); + + snprintf(uidstr, sizeof(uidstr), "%u", acl->ra_uid); + snprintf(gidstr, sizeof(gidstr), "%u", acl->ra_gid); + snprintf(keystr, sizeof(keystr), LPU64, entry->ue_key); + snprintf(handle, sizeof(handle), "%u", acl->ra_handle); + + LASSERTF(strcmp(cache->uc_upcall, "NONE"), "no upcall set!"); + + CDEBUG(D_INFO, "%s: remote acl upcall %s %s %s %s %s %s %s\n", + cache->uc_name, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6]); + + rc = USERMODEHELPER(argv[0], argv, envp); + if (rc < 0) { + CERROR("%s: error invoking upcall %s %s %s %s %s %s %s: rc %d; " + "check /proc/fs/lustre/mdt/%s/rmtacl_upcall\n", + cache->uc_name, argv[0], argv[1], argv[2], argv[3], + argv[4], argv[5], argv[6], rc, cache->uc_name); + } else { + CDEBUG(D_HA, "%s: invoked upcall %s %s %s %s %s %s %s\n", + cache->uc_name, argv[0], argv[1], argv[2], argv[3], + argv[4], argv[5], argv[6]); + rc = 0; + } + RETURN(rc); +} + +static int mdt_rmtacl_parse_downcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + void *args) +{ + struct mdt_rmtacl *acl = &entry->u.acl; + struct rmtacl_downcall_data *data; + char *buf; + int len; + ENTRY; + + data = (struct rmtacl_downcall_data *)args; + LASSERT(data); + + len = strlen(data->add_buf) + 1; + OBD_ALLOC(buf, len); + if (!buf) + RETURN(-ENOMEM); + + memcpy(buf, data->add_buf, len); + acl->ra_buf = buf; + + CDEBUG(D_OTHER, "parse mdt acl@%p: %s %s\n", + acl, acl->ra_cmd, acl->ra_buf); + + RETURN(0); +} + +struct upcall_cache_ops mdt_rmtacl_upcall_cache_ops = { + .init_entry = mdt_rmtacl_entry_init, + .free_entry = mdt_rmtacl_entry_free, + .upcall_compare = mdt_rmtacl_upcall_compare, + .downcall_compare = mdt_rmtacl_downcall_compare, + .do_upcall = mdt_rmtacl_do_upcall, + .parse_downcall = mdt_rmtacl_parse_downcall, +}; + +int mdt_rmtacl_upcall(struct mdt_thread_info *info, char *cmd, + struct lu_buf *buf) +{ + struct mdt_device *mdt = info->mti_mdt; + struct md_ucred *uc = mdt_ucred(info); + struct rmtacl_upcall_data data; + struct upcall_cache_entry *entry; + __u64 key; + int rc = 0; + ENTRY; + + data.aud_uid = uc->mu_fsuid; + data.aud_gid = uc->mu_fsgid; + data.aud_cmd = cmd; + + key = mdt_rmtacl_getkey(); + + entry = upcall_cache_get_entry(mdt->mdt_rmtacl_cache, key, &data); + if (IS_ERR(entry)) + GOTO(out, rc = PTR_ERR(entry)); + + if (buf->lb_len <= strlen(entry->u.acl.ra_buf)) + GOTO(out, rc = -EFAULT); + + memcpy(buf->lb_buf, entry->u.acl.ra_buf, strlen(entry->u.acl.ra_buf)); + /* remote acl operation expire at once! */ + UC_CACHE_SET_EXPIRED(entry); + upcall_cache_put_entry(mdt->mdt_rmtacl_cache, entry); + +out: + if (rc) + sprintf(buf->lb_buf, "server processing error: %d\n", rc); + RETURN(0); +} diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c new file mode 100644 index 0000000..96e875a --- /dev/null +++ b/lustre/mdt/mdt_xattr.c @@ -0,0 +1,363 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * linux/mdt/mdt_xattr.c + * Lustre Metadata Target (mdt) extended attributes management. + * + * Copyright (C) 2002-2006 Cluster File Systems, Inc. + * Author: Peter Braam <braam@clusterfs.com> + * Author: Andreas Dilger <adilger@clusterfs.com> + * Author: Phil Schwan <phil@clusterfs.com> + * Author: Huang Hua <huanghua@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +/* prerequisite for linux/xattr.h */ +#include <linux/types.h> +/* prerequisite for linux/xattr.h */ +#include <linux/fs.h> +/* XATTR_{REPLACE,CREATE} */ +#include <linux/xattr.h> + +#include "mdt_internal.h" + + +/* return EADATA length to the caller. negative value means error */ +static int mdt_getxattr_pack_reply(struct mdt_thread_info * info) +{ + struct req_capsule *pill = &info->mti_pill ; + struct ptlrpc_request *req = mdt_info_req(info); + char *xattr_name; + __u64 valid = info->mti_body->valid; + static const char user_string[] = "user."; + int size, rc; + ENTRY; + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK)) + RETURN(-ENOMEM); + + /* Determine how many bytes we need */ + if ((valid & OBD_MD_FLXATTR) == OBD_MD_FLXATTR) { + xattr_name = req_capsule_client_get(pill, &RMF_NAME); + if (!xattr_name) + RETURN(-EFAULT); + + if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) && + !strncmp(xattr_name, user_string, sizeof(user_string) - 1)) + RETURN(-EOPNOTSUPP); + + if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) + size = RMTACL_SIZE_MAX; + else + size = mo_xattr_get(info->mti_env, + mdt_object_child(info->mti_object), + &LU_BUF_NULL, xattr_name); + } else if ((valid & OBD_MD_FLXATTRLS) == OBD_MD_FLXATTRLS) { + size = mo_xattr_list(info->mti_env, + mdt_object_child(info->mti_object), + &LU_BUF_NULL); + } else { + CERROR("Valid bits: "LPX64"\n", info->mti_body->valid); + RETURN(-EINVAL); + } + + if (size < 0) { + if (size == -ENODATA) + size = 0; + else if (size != -EOPNOTSUPP) { + CERROR("Error geting EA size: %d\n", size); + RETURN(size); + } + } + + if (info->mti_body->eadatasize != 0 && + info->mti_body->eadatasize < size) + RETURN(-ERANGE); + + req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER, + min_t(int, size, info->mti_body->eadatasize)); + + rc = req_capsule_pack(pill); + if (rc) { + LASSERT(rc < 0); + RETURN(rc); + } + + RETURN(size); +} + +static int do_remote_getfacl(struct mdt_thread_info *info, struct lu_buf *buf) +{ + struct ptlrpc_request *req = mdt_info_req(info); + char *cmd; + int rc; + ENTRY; + + if (!buf->lb_buf || (buf->lb_len != RMTACL_SIZE_MAX)) + RETURN(-EINVAL); + + cmd = req_capsule_client_get(&info->mti_pill, &RMF_EADATA); + if (!cmd) { + CERROR("missing getfacl command!\n"); + RETURN(-EFAULT); + } + + rc = mdt_rmtacl_upcall(info, cmd, buf); + if (rc) + CERROR("remote acl upcall failed: %d\n", rc); + + lustre_shrink_reply(req, REPLY_REC_OFF + 1, strlen(buf->lb_buf) + 1, 0); + RETURN(rc ?: strlen(buf->lb_buf) + 1); +} + +int mdt_getxattr(struct mdt_thread_info *info) +{ + struct mdt_body *reqbody; + struct mdt_body *repbody = NULL; + struct md_object *next; + struct lu_buf *buf; + int easize, rc; + ENTRY; + + LASSERT(info->mti_object != NULL); + LASSERT(lu_object_assert_exists(&info->mti_object->mot_obj.mo_lu)); + + CDEBUG(D_INODE, "getxattr "DFID"\n", PFID(&info->mti_body->fid1)); + + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(err_serious(-EFAULT)); + + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(err_serious(rc)); + + easize = mdt_getxattr_pack_reply(info); + if (easize < 0) + GOTO(out, rc = err_serious(easize)); + + repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(repbody != NULL); + + /* No need further getxattr. */ + if (easize == 0 || reqbody->eadatasize == 0) + GOTO(out, rc = easize); + + buf = &info->mti_buf; + buf->lb_buf = req_capsule_server_get(&info->mti_pill, &RMF_EADATA); + buf->lb_len = easize; + next = mdt_object_child(info->mti_object); + + if (info->mti_body->valid & OBD_MD_FLXATTR) { + char *xattr_name = req_capsule_client_get(&info->mti_pill, + &RMF_NAME); + CDEBUG(D_INODE, "getxattr %s\n", xattr_name); + + if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) + rc = do_remote_getfacl(info, buf); + else + rc = mo_xattr_get(info->mti_env, next, buf, xattr_name); + + if (rc < 0) + CERROR("getxattr failed: %d\n", rc); + } else if (info->mti_body->valid & OBD_MD_FLXATTRLS) { + CDEBUG(D_INODE, "listxattr\n"); + + rc = mo_xattr_list(info->mti_env, next, buf); + if (rc < 0) + CERROR("listxattr failed: %d\n", rc); + } else + LBUG(); + + EXIT; +out: + if (rc >= 0) { + repbody->eadatasize = rc; + rc = 0; + } + mdt_exit_ucred(info); + return rc; +} + +/* return EADATA length to the caller. negative value means error */ +static int mdt_setxattr_pack_reply(struct mdt_thread_info * info) +{ + struct req_capsule *pill = &info->mti_pill ; + __u64 valid = info->mti_body->valid; + int rc = 0, rc1; + + if ((valid & OBD_MD_FLXATTR) == OBD_MD_FLXATTR) { + char *xattr_name; + + xattr_name = req_capsule_client_get(pill, &RMF_NAME); + if (!xattr_name) + return -EFAULT; + + if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) + rc = RMTACL_SIZE_MAX; + } + + req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER, rc); + + rc1 = req_capsule_pack(pill); + + return rc = rc1 ? rc1 : rc; +} + +static int do_remote_setfacl(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct lu_buf *buf = &info->mti_buf; + char *cmd; + int rc; + ENTRY; + + cmd = req_capsule_client_get(&info->mti_pill, &RMF_EADATA); + if (!cmd) { + CERROR("missing setfacl command!\n"); + RETURN(-EFAULT); + } + + buf->lb_buf = req_capsule_server_get(&info->mti_pill, &RMF_EADATA); + LASSERT(buf->lb_buf); + buf->lb_len = RMTACL_SIZE_MAX; + + rc = mdt_rmtacl_upcall(info, cmd, buf); + if (rc) + CERROR("remote acl upcall failed: %d\n", rc); + + lustre_shrink_reply(req, REPLY_REC_OFF, strlen(buf->lb_buf) + 1, 0); + RETURN(rc); +} + +int mdt_setxattr(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_body *reqbody; + const char user_string[] = "user."; + const char trust_string[] = "trusted."; + struct mdt_lock_handle *lh; + struct req_capsule *pill = &info->mti_pill; + struct mdt_object *obj = info->mti_object; + struct mdt_body *body = (struct mdt_body *)info->mti_body; + const struct lu_env *env = info->mti_env; + struct md_object *child = mdt_object_child(obj); + struct lu_buf *buf = &info->mti_buf; + __u64 valid = body->valid; + char *xattr_name; + int xattr_len; + __u64 lockpart; + int rc; + ENTRY; + + CDEBUG(D_INODE, "setxattr "DFID"\n", PFID(&body->fid1)); + + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR)) + RETURN(err_serious(-ENOMEM)); + + reqbody = req_capsule_client_get(pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(err_serious(-EFAULT)); + + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(err_serious(rc)); + + rc = mdt_setxattr_pack_reply(info); + if (rc < 0) + GOTO(out, rc = err_serious(rc)); + + /* various sanity check for xattr name */ + xattr_name = req_capsule_client_get(pill, &RMF_NAME); + if (!xattr_name) + GOTO(out, rc = err_serious(-EFAULT)); + + CDEBUG(D_INODE, "%s xattr %s\n", + body->valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name); + + if (((valid & OBD_MD_FLXATTR) == OBD_MD_FLXATTR) && + (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL))) { + rc = do_remote_setfacl(info); + GOTO(out, rc); + } + + if (strncmp(xattr_name, trust_string, sizeof(trust_string) - 1) == 0) { + if (strcmp(xattr_name + 8, XATTR_NAME_LOV) == 0) + GOTO(out, rc = -EACCES); + } + + if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) && + (strncmp(xattr_name, user_string, sizeof(user_string) - 1) == 0)) { + GOTO(out, rc = -EOPNOTSUPP); + } + + lockpart = MDS_INODELOCK_UPDATE; + if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS)) + lockpart |= MDS_INODELOCK_LOOKUP; + + lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_reg_init(lh, LCK_PW); + rc = mdt_object_lock(info, obj, lh, lockpart, MDT_LOCAL_LOCK); + if (rc != 0) + GOTO(out, rc); + + if ((valid & OBD_MD_FLXATTR) == OBD_MD_FLXATTR) { + char * xattr; + if (!req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) { + CERROR("no xattr data supplied\n"); + GOTO(out_unlock, rc = -EFAULT); + } + + xattr_len = req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT); + if (xattr_len) { + int flags = 0; + xattr = req_capsule_client_get(pill, &RMF_EADATA); + + if (body->flags & XATTR_REPLACE) + flags |= LU_XATTR_REPLACE; + + if (body->flags & XATTR_CREATE) + flags |= LU_XATTR_CREATE; + + mdt_fail_write(env, info->mti_mdt->mdt_bottom, + OBD_FAIL_MDS_SETXATTR_WRITE); + + buf->lb_buf = xattr; + buf->lb_len = xattr_len; + rc = mo_xattr_set(env, child, buf, xattr_name, flags); + } + } else if ((valid & OBD_MD_FLXATTRRM) == OBD_MD_FLXATTRRM) { + rc = mo_xattr_del(env, child, xattr_name); + } else { + CERROR("valid bits: "LPX64"\n", body->valid); + rc = -EINVAL; + } + EXIT; +out_unlock: + mdt_object_unlock(info, obj, lh, rc); +out: + mdt_exit_ucred(info); + return rc; +} diff --git a/lustre/mgc/libmgc.c b/lustre/mgc/libmgc.c index 49a8260..88fe853 100644 --- a/lustre/mgc/libmgc.c +++ b/lustre/mgc/libmgc.c @@ -39,18 +39,19 @@ #include <lustre_fsfilt.h> #include <lustre_disk.h> -static int mgc_setup(struct obd_device *obd, obd_count len, void *buf) + +static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { int rc; ENTRY; ptlrpcd_addref(); - rc = client_obd_setup(obd, len, buf); + rc = client_obd_setup(obd, lcfg); if (rc) GOTO(err_decref, rc); - rc = obd_llog_init(obd, obd, 0, NULL, NULL); + rc = obd_llog_init(obd, NULL, obd, 0, NULL, NULL); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_cleanup, rc); @@ -99,15 +100,15 @@ static int mgc_cleanup(struct obd_device *obd) RETURN(rc); } -static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid, - struct obd_uuid *uuid) +static int mgc_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, + struct llog_catid *logid, struct obd_uuid *uuid) { struct llog_ctxt *ctxt; int rc; ENTRY; - rc = llog_setup(obd, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, &llog_client_ops); if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); @@ -142,6 +143,7 @@ struct obd_ops mgc_obd_ops = { int __init mgc_init(void) { - return class_register_type(&mgc_obd_ops, NULL, LUSTRE_MGC_NAME); + return class_register_type(&mgc_obd_ops, NULL, + NULL, LUSTRE_MGC_NAME, NULL); } diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 4b8fd76..e251dde 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -23,7 +23,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ - + #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif @@ -45,13 +45,12 @@ #include <lustre_fsfilt.h> #include <lustre_disk.h> - int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id) { char *name_end; int len; __u64 resname = 0; - + /* fsname is at most 8 chars long at the beginning of the logname e.g. "lustre-MDT0001" or "lustre" */ name_end = strrchr(logname, '-'); @@ -70,6 +69,7 @@ int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id) memcpy(&resname, logname, len); memset(res_id, 0, sizeof(*res_id)); + /* Always use the same endianness for the resid */ res_id->name[0] = cpu_to_le64(resname); CDEBUG(D_MGC, "log %s to resid "LPX64"/"LPX64" (%.8s)\n", logname, @@ -94,7 +94,7 @@ static int config_log_get(struct config_llog_data *cld) RETURN(0); } -/* Drop a reference to a config log. When no longer referenced, +/* Drop a reference to a config log. When no longer referenced, we can free the config log data */ static void config_log_put(struct config_llog_data *cld) { @@ -109,7 +109,7 @@ static void config_log_put(struct config_llog_data *cld) spin_unlock(&config_list_lock); OBD_FREE(cld->cld_logname, strlen(cld->cld_logname) + 1); if (cld->cld_cfg.cfg_instance != NULL) - OBD_FREE(cld->cld_cfg.cfg_instance, + OBD_FREE(cld->cld_cfg.cfg_instance, strlen(cld->cld_cfg.cfg_instance) + 1); OBD_FREE(cld, sizeof(*cld)); } @@ -117,8 +117,8 @@ static void config_log_put(struct config_llog_data *cld) } /* Find a config log by name */ -static struct config_llog_data *config_log_find(char *logname, - struct config_llog_instance *cfg) +static struct config_llog_data *config_log_find(char *logname, + struct config_llog_instance *cfg) { struct list_head *tmp; struct config_llog_data *cld; @@ -138,16 +138,16 @@ static struct config_llog_data *config_log_find(char *logname, spin_lock(&config_list_lock); list_for_each(tmp, &config_llog_list) { cld = list_entry(tmp, struct config_llog_data, cld_list_chain); - if (match_instance && cld->cld_cfg.cfg_instance && + if (match_instance && cld->cld_cfg.cfg_instance && strcmp(logid, cld->cld_cfg.cfg_instance) == 0) goto out_found; - if (!match_instance && + if (!match_instance && strcmp(logid, cld->cld_logname) == 0) goto out_found; } spin_unlock(&config_list_lock); - CERROR("can't get log %s\n", logid); + CDEBUG(D_CONFIG, "can't get log %s\n", logid); RETURN(ERR_PTR(-ENOENT)); out_found: atomic_inc(&cld->cld_refcount); @@ -155,7 +155,7 @@ out_found: RETURN(cld); } -/* Add this log to our list of active logs. +/* Add this log to our list of active logs. We have one active log per "mount" - client instance or servername. Each instance may be at a different point in the log. */ static int config_log_add(char *logname, struct config_llog_instance *cfg, @@ -167,12 +167,12 @@ static int config_log_add(char *logname, struct config_llog_instance *cfg, ENTRY; CDEBUG(D_MGC, "adding config log %s:%s\n", logname, cfg->cfg_instance); - + OBD_ALLOC(cld, sizeof(*cld)); - if (!cld) + if (!cld) RETURN(-ENOMEM); OBD_ALLOC(cld->cld_logname, strlen(logname) + 1); - if (!cld->cld_logname) { + if (!cld->cld_logname) { OBD_FREE(cld, sizeof(*cld)); RETURN(-ENOMEM); } @@ -182,12 +182,12 @@ static int config_log_add(char *logname, struct config_llog_instance *cfg, cld->cld_cfg.cfg_flags = 0; cld->cld_cfg.cfg_sb = sb; atomic_set(&cld->cld_refcount, 1); - + /* Keep the mgc around until we are done */ cld->cld_mgcexp = class_export_get(lsi->lsi_mgc->obd_self_export); - + if (cfg->cfg_instance != NULL) { - OBD_ALLOC(cld->cld_cfg.cfg_instance, + OBD_ALLOC(cld->cld_cfg.cfg_instance, strlen(cfg->cfg_instance) + 1); strcpy(cld->cld_cfg.cfg_instance, cfg->cfg_instance); } @@ -200,19 +200,19 @@ static int config_log_add(char *logname, struct config_llog_instance *cfg, config_log_put(cld); RETURN(rc); } - + RETURN(rc); } /* Stop watching for updates on this log. */ static int config_log_end(char *logname, struct config_llog_instance *cfg) -{ +{ struct config_llog_data *cld; int rc = 0; ENTRY; - + cld = config_log_find(logname, cfg); - if (IS_ERR(cld)) + if (IS_ERR(cld)) RETURN(PTR_ERR(cld)); /* drop the ref from the find */ config_log_put(cld); @@ -243,7 +243,7 @@ static int mgc_requeue_thread(void *data) char name[] = "ll_cfg_requeue"; int rc = 0; ENTRY; - + ptlrpc_daemonize(name); CDEBUG(D_MGC, "Starting requeue thread\n"); @@ -268,8 +268,7 @@ static int mgc_requeue_thread(void *data) spin_lock(&config_list_lock); list_for_each_entry_safe(cld, n, &config_llog_list, cld_list_chain) { - spin_unlock(&config_list_lock); - + spin_unlock(&config_list_lock); if (cld->cld_lostlock) { CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname); @@ -278,10 +277,9 @@ static int mgc_requeue_thread(void *data) cld); /* Whether we enqueued again or not in mgc_process_log, we're done with the ref - from the old enqueue */ + from the old enqueue */ config_log_put(cld); } - spin_lock(&config_list_lock); } spin_unlock(&config_list_lock); @@ -345,7 +343,7 @@ static int mgc_requeue_add(struct config_llog_data *cld, int later) /********************** class fns **********************/ -static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, +static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, struct vfsmount *mnt) { struct lvfs_run_ctxt saved; @@ -367,7 +365,7 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd)); if (IS_ERR(obd->obd_fsops)) { up(&cli->cl_mgc_sem); - CERROR("No fstype %s rc=%ld\n", MT_STR(lsi->lsi_ldd), + CERROR("No fstype %s rc=%ld\n", MT_STR(lsi->lsi_ldd), PTR_ERR(obd->obd_fsops)); RETURN(PTR_ERR(obd->obd_fsops)); } @@ -386,7 +384,7 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); - CERROR("cannot lookup %s directory: rc = %d\n", + CERROR("cannot lookup %s directory: rc = %d\n", MOUNT_CONFIGS_DIR, err); GOTO(err_ops, err); } @@ -403,7 +401,7 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, /* We keep the cl_mgc_sem until mgc_fs_cleanup */ RETURN(0); -err_ops: +err_ops: fsfilt_put_ops(obd->obd_fsops); obd->obd_fsops = NULL; cli->cl_mgc_vfsmnt = NULL; @@ -423,15 +421,15 @@ static int mgc_fs_cleanup(struct obd_device *obd) struct lvfs_run_ctxt saved; push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); l_dput(cli->cl_mgc_configs_dir); - cli->cl_mgc_configs_dir = NULL; + cli->cl_mgc_configs_dir = NULL; pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); class_decref(obd); } cli->cl_mgc_vfsmnt = NULL; - if (obd->obd_fsops) + if (obd->obd_fsops) fsfilt_put_ops(obd->obd_fsops); - + up(&cli->cl_mgc_sem); RETURN(rc); @@ -444,7 +442,7 @@ static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) ENTRY; switch (stage) { - case OBD_CLEANUP_EARLY: + case OBD_CLEANUP_EARLY: break; case OBD_CLEANUP_EXPORTS: if (atomic_dec_and_test(&mgc_count)) { @@ -474,10 +472,10 @@ static int mgc_cleanup(struct obd_device *obd) ENTRY; LASSERT(cli->cl_mgc_vfsmnt == NULL); - - /* COMPAT_146 - old config logs may have added profiles we don't + + /* COMPAT_146 - old config logs may have added profiles we don't know about */ - if (obd->obd_type->typ_refcnt <= 1) + if (obd->obd_type->typ_refcnt <= 1) /* Only for the last mgc */ class_del_profiles(); @@ -487,18 +485,18 @@ static int mgc_cleanup(struct obd_device *obd) RETURN(rc); } -static int mgc_setup(struct obd_device *obd, obd_count len, void *buf) +static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { int rc; ENTRY; ptlrpcd_addref(); - rc = client_obd_setup(obd, len, buf); + rc = client_obd_setup(obd, lcfg); if (rc) GOTO(err_decref, rc); - rc = obd_llog_init(obd, obd, 0, NULL, NULL); + rc = obd_llog_init(obd, NULL, obd, 0, NULL, NULL); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_cleanup, rc); @@ -540,11 +538,11 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, case LDLM_CB_CANCELING: { /* We've given up the lock, prepare ourselves to update. */ LDLM_DEBUG(lock, "MGC cancel CB"); - + CDEBUG(D_MGC, "Lock res "LPX64" (%.8s)\n", - lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[0], (char *)&lock->l_resource->lr_name.name[0]); - + if (!cld) { CERROR("missing data, won't requeue\n"); break; @@ -583,7 +581,7 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, if (rc) { - CERROR("%s CB failed %d:\n", flag == LDLM_CB_BLOCKING ? + CERROR("%s CB failed %d:\n", flag == LDLM_CB_BLOCKING ? "blocking" : "cancel", rc); LDLM_ERROR(lock, "MGC ast"); } @@ -596,14 +594,14 @@ static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, int *flags, void *bl_cb, void *cp_cb, void *gl_cb, void *data, __u32 lvb_len, void *lvb_swabber, struct lustre_handle *lockh) -{ +{ struct config_llog_data *cld = (struct config_llog_data *)data; int rc; ENTRY; CDEBUG(D_MGC, "Enqueue for %s (res "LPX64")\n", cld->cld_logname, cld->cld_resid.name[0]); - + /* We can only drop this config log ref when we drop the lock */ if (config_log_get(cld)) RETURN(ELDLM_LOCK_ABORTED); @@ -611,8 +609,8 @@ static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, /* We need a callback for every lockholder, so don't try to ldlm_lock_match (see rev 1.1.2.11.2.47) */ - rc = ldlm_cli_enqueue(exp, NULL, cld->cld_resid, - type, NULL, mode, flags, + rc = ldlm_cli_enqueue(exp, NULL, &cld->cld_resid, + type, NULL, mode, flags, mgc_blocking_ast, ldlm_completion_ast, NULL, data, NULL, 0, NULL, lockh, 0); /* A failed enqueue should still call the mgc_blocking_ast, @@ -710,14 +708,14 @@ static int mgc_target_register(struct obd_export *exp, RETURN(-ENOMEM); req_mti = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*req_mti)); - if (!req_mti) + if (!req_mti) RETURN(-ENOMEM); memcpy(req_mti, mti, sizeof(*req_mti)); ptlrpc_req_set_repsize(req, 2, rep_size); CDEBUG(D_MGC, "register %s\n", mti->mti_svname); - + rc = ptlrpc_queue_wait(req); if (!rc) { rep_mti = lustre_swab_repbuf(req, REPLY_REC_OFF, @@ -733,7 +731,7 @@ static int mgc_target_register(struct obd_export *exp, } int mgc_set_info_async(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val, + void *key, obd_count vallen, void *val, struct ptlrpc_request_set *set) { struct obd_import *imp = class_exp2cliimp(exp); @@ -759,25 +757,25 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, value = *(int *)val; spin_lock(&imp->imp_lock); imp->imp_initial_recov_bk = value > 0; - /* Even after the initial connection, give up all comms if + /* Even after the initial connection, give up all comms if nobody answers the first time. */ imp->imp_recon_bk = 1; spin_unlock(&imp->imp_lock); - CDEBUG(D_MGC, "InitRecov %s %d/%d:d%d:i%d:r%d:or%d:%s\n", + CDEBUG(D_MGC, "InitRecov %s %d/%d:d%d:i%d:r%d:or%d:%s\n", imp->imp_obd->obd_name, value, imp->imp_initial_recov, - imp->imp_deactive, imp->imp_invalid, + imp->imp_deactive, imp->imp_invalid, imp->imp_replayable, imp->imp_obd->obd_replayable, ptlrpc_import_state_name(imp->imp_state)); /* Resurrect if we previously died */ if (imp->imp_invalid || value > 1) { - /* See client_disconnect_export */ - /* Allow reconnect attempts */ - imp->imp_obd->obd_no_recov = 0; /* Force a new connect attempt */ /* (can't put these in obdclass, module loop) */ ptlrpc_invalidate_import(imp); /* Do a fresh connect next time by zeroing the handle */ ptlrpc_disconnect_import(imp, 1); + /* See client_disconnect_export */ + /* Allow reconnect attempts */ + imp->imp_obd->obd_no_recov = 0; /* Remove 'invalid' flag */ ptlrpc_activate_import(imp); /* Attempt a new connect */ @@ -819,7 +817,7 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, } RETURN(rc); -} +} static int mgc_import_event(struct obd_device *obd, struct obd_import *imp, @@ -831,18 +829,18 @@ static int mgc_import_event(struct obd_device *obd, CDEBUG(D_MGC, "import event %#x\n", event); switch (event) { - case IMP_EVENT_DISCON: + case IMP_EVENT_DISCON: /* MGC imports should not wait for recovery */ ptlrpc_invalidate_import(imp); break; - case IMP_EVENT_INACTIVE: + case IMP_EVENT_INACTIVE: break; case IMP_EVENT_INVALIDATE: { struct ldlm_namespace *ns = obd->obd_namespace; ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); break; } - case IMP_EVENT_ACTIVE: + case IMP_EVENT_ACTIVE: LCONSOLE_WARN("%s: Reactivating import\n", obd->obd_name); break; case IMP_EVENT_OCD: @@ -854,20 +852,20 @@ static int mgc_import_event(struct obd_device *obd, RETURN(rc); } -static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid, - struct obd_uuid *uuid) +static int mgc_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, + struct llog_catid *logid, struct obd_uuid *uuid) { struct llog_ctxt *ctxt; int rc; ENTRY; - rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_CONFIG_ORIG_CTXT, tgt, 0, NULL, &llog_lvfs_ops); if (rc) RETURN(rc); - rc = llog_setup(obd, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, &llog_client_ops); if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); @@ -908,7 +906,7 @@ static int mgc_llog_is_empty(struct obd_device *obd, struct llog_ctxt *ctxt, return(rc <= 1); } -static int mgc_copy_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, +static int mgc_copy_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, void *data) { struct llog_rec_hdr local_rec = *rec; @@ -920,12 +918,12 @@ static int mgc_copy_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, /* Append all records */ local_rec.lrh_len -= sizeof(*rec) + sizeof(struct llog_rec_tail); - rc = llog_write_rec(local_llh, &local_rec, NULL, 0, + rc = llog_write_rec(local_llh, &local_rec, NULL, 0, (void *)cfg_buf, -1); lcfg = (struct lustre_cfg *)cfg_buf; - CDEBUG(D_INFO, "idx=%d, rc=%d, len=%d, cmd %x %s %s\n", - rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command, + CDEBUG(D_INFO, "idx=%d, rc=%d, len=%d, cmd %x %s %s\n", + rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command, lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1)); RETURN(rc); @@ -1014,7 +1012,7 @@ DECLARE_MUTEX(llog_process_lock); /* Get a config log from the MGS and process it. This func is called for both clients and servers. */ -static int mgc_process_log(struct obd_device *mgc, +static int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld) { struct llog_ctxt *ctxt, *lctxt; @@ -1030,19 +1028,14 @@ static int mgc_process_log(struct obd_device *mgc, CERROR("Missing cld, aborting log update\n"); RETURN(-EINVAL); } - if (cld->cld_stopping) - RETURN(0); - - if (cld->cld_cfg.cfg_flags & CFG_F_SERVER146) - /* If we started from an old MDT, don't bother trying to - get log updates from the MGS */ + if (cld->cld_stopping) RETURN(0); OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PROCESS_LOG, 20); lsi = s2lsi(cld->cld_cfg.cfg_sb); - CDEBUG(D_MGC, "Process log %s:%s from %d\n", cld->cld_logname, + CDEBUG(D_MGC, "Process log %s:%s from %d\n", cld->cld_logname, cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1); ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT); @@ -1051,69 +1044,69 @@ static int mgc_process_log(struct obd_device *mgc, RETURN(-EINVAL); } - /* I don't want mutliple processes running process_log at once -- - sounds like badness. It actually might be fine, as long as + /* I don't want mutliple processes running process_log at once -- + sounds like badness. It actually might be fine, as long as we're not trying to update from the same log simultaneously (in which case we should use a per-log sem.) */ down(&llog_process_lock); /* Get the cfg lock on the llog */ - rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL, - LCK_CR, &flags, NULL, NULL, NULL, + rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL, + LCK_CR, &flags, NULL, NULL, NULL, cld, 0, NULL, &lockh); - if (rcl) + if (rcl) CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl); - + lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT); - /* Copy the setup log locally if we can. Don't mess around if we're + /* Copy the setup log locally if we can. Don't mess around if we're running an MGS though (logs are already local). */ - if (lctxt && lsi && (lsi->lsi_flags & LSI_SERVER) && + if (lctxt && lsi && (lsi->lsi_flags & LSI_SERVER) && (lsi->lsi_srv_mnt == cli->cl_mgc_vfsmnt) && !IS_MGS(lsi->lsi_ldd)) { push_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL); must_pop++; - if (rcl == 0) + if (rcl == 0) /* Only try to copy log if we have the lock. */ rc = mgc_copy_llog(mgc, ctxt, lctxt, cld->cld_logname); if (rcl || rc) { if (mgc_llog_is_empty(mgc, lctxt, cld->cld_logname)) { - LCONSOLE_ERROR("Failed to get MGS log %s " - "and no local copy.\n", - cld->cld_logname); + LCONSOLE_ERROR_MSG(0x13a, "Failed to get MGS " + "log %s and no local copy." + "\n", cld->cld_logname); GOTO(out_pop, rc = -ENOTCONN); } LCONSOLE_WARN("Failed to get MGS log %s, using " "local copy.\n", cld->cld_logname); } /* Now, whether we copied or not, start using the local llog. - If we failed to copy, we'll start using whatever the old + If we failed to copy, we'll start using whatever the old log has. */ ctxt = lctxt; } - /* logname and instance info should be the same, so use our + /* logname and instance info should be the same, so use our copy of the instance for the update. The cfg_last_idx will be updated here. */ rc = class_config_parse_llog(ctxt, cld->cld_logname, &cld->cld_cfg); - + out_pop: - if (must_pop) + if (must_pop) pop_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL); - /* Now drop the lock so MGS can revoke it */ + /* Now drop the lock so MGS can revoke it */ if (!rcl) { - rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, NULL, + rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, NULL, LCK_CR, &lockh); - if (rcl) + if (rcl) CERROR("Can't drop cfg lock: %d\n", rcl); } - + CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n", mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc); up(&llog_process_lock); - + RETURN(rc); } @@ -1129,18 +1122,18 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf) /* Add any new target, not just osts */ struct mgs_target_info *mti; - if (LUSTRE_CFG_BUFLEN(lcfg, 1) != + if (LUSTRE_CFG_BUFLEN(lcfg, 1) != sizeof(struct mgs_target_info)) GOTO(out, rc = -EINVAL); mti = (struct mgs_target_info *)lustre_cfg_buf(lcfg, 1); - CDEBUG(D_MGC, "add_target %s %#x\n", + CDEBUG(D_MGC, "add_target %s %#x\n", mti->mti_svname, mti->mti_flags); rc = mgc_target_register(obd->u.cli.cl_mgc_mgsexp, mti); break; } case LCFG_LOV_DEL_OBD: - /* Remove target from the fs? */ + /* Remove target from the fs? */ /* FIXME */ CERROR("lov_del_obd unimplemented\n"); rc = -ENOSYS; @@ -1152,29 +1145,29 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf) char *logname = lustre_cfg_string(lcfg, 1); cfg = (struct config_llog_instance *)lustre_cfg_buf(lcfg, 2); sb = *(struct super_block **)lustre_cfg_buf(lcfg, 3); - - CDEBUG(D_MGC, "parse_log %s from %d\n", logname, + + CDEBUG(D_MGC, "parse_log %s from %d\n", logname, cfg->cfg_last_idx); /* We're only called through here on the initial mount */ rc = config_log_add(logname, cfg, sb); - if (rc) + if (rc) break; cld = config_log_find(logname, cfg); if (IS_ERR(cld)) { rc = PTR_ERR(cld); break; } - + /* COMPAT_146 */ /* FIXME only set this for old logs! Right now this forces us to always skip the "inside markers" check */ cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146; - + rc = mgc_process_log(obd, cld); config_log_put(cld); - - break; + + break; } case LCFG_LOG_END: { struct config_llog_instance *cfg = NULL; @@ -1216,7 +1209,8 @@ struct obd_ops mgc_obd_ops = { int __init mgc_init(void) { - return class_register_type(&mgc_obd_ops, NULL, LUSTRE_MGC_NAME); + return class_register_type(&mgc_obd_ops, NULL, NULL, + LUSTRE_MGC_NAME, NULL); } #ifdef __KERNEL__ diff --git a/lustre/mgs/lproc_mgs.c b/lustre/mgs/lproc_mgs.c index 2453dc3..7320eba 100644 --- a/lustre/mgs/lproc_mgs.c +++ b/lustre/mgs/lproc_mgs.c @@ -33,7 +33,6 @@ #include <lprocfs_status.h> #include "mgs_internal.h" - #ifdef LPROCFS static int lprocfs_mgs_rd_mntdev(char *page, char **start, off_t off, int count, diff --git a/lustre/mgs/mgs_fs.c b/lustre/mgs/mgs_fs.c index 0794c67..aab8443d 100644 --- a/lustre/mgs/mgs_fs.c +++ b/lustre/mgs/mgs_fs.c @@ -46,24 +46,24 @@ /* Look up an entry by inode number. */ /* this function ONLY returns valid dget'd dentries with an initialized inode or errors */ -static struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid) +static struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, + __u64 ino, __u32 gen) { char fid_name[32]; - unsigned long ino = fid->id; - __u32 generation = fid->generation; struct inode *inode; struct dentry *result; + ENTRY; CDEBUG(D_DENTRY, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n", - ino, generation, mgs->mgs_sb); + (unsigned long)ino, gen, mgs->mgs_sb); if (ino == 0) RETURN(ERR_PTR(-ESTALE)); - snprintf(fid_name, sizeof(fid_name), "0x%lx", ino); + snprintf(fid_name, sizeof(fid_name), "0x%lx", (unsigned long)ino); - /* under ext3 this is neither supposed to return bad inodes - nor NULL inodes. */ + /* under ext3 this is neither supposed to return bad inodes nor NULL + inodes. */ result = ll_lookup_one_len(fid_name, mgs->mgs_fid_de, strlen(fid_name)); if (IS_ERR(result)) RETURN(result); @@ -82,13 +82,12 @@ static struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid) RETURN(ERR_PTR(-ENOENT)); } - if (generation && inode->i_generation != generation) { + if (gen && inode->i_generation != gen) { /* we didn't find the right inode.. */ CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, " "count: %d, generation %u/%u\n", inode->i_ino, - (unsigned long)inode->i_nlink, - atomic_read(&inode->i_count), inode->i_generation, - generation); + (unsigned long)inode->i_nlink, atomic_read(&inode->i_count), + inode->i_generation, gen); l_dput(result); RETURN(ERR_PTR(-ENOENT)); } @@ -96,14 +95,11 @@ static struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid) RETURN(result); } -static struct dentry *mgs_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, - void *data) +static struct dentry *mgs_lvfs_fid2dentry(__u64 id, __u32 gen, + __u64 gr, void *data) { struct obd_device *obd = data; - struct ll_fid fid; - fid.id = id; - fid.generation = gen; - return mgs_fid2dentry(&obd->u.mgs, &fid); + return mgs_fid2dentry(&obd->u.mgs, id, gen); } struct lvfs_callback_ops mgs_lvfs_ops = { diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index b53339a..9fb82c7 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -48,7 +48,8 @@ /* Establish a connection to the MGS.*/ -static int mgs_connect(struct lustre_handle *conn, struct obd_device *obd, +static int mgs_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) { struct obd_export *exp; @@ -114,7 +115,7 @@ static int mgs_cleanup(struct obd_device *obd); static int mgs_handle(struct ptlrpc_request *req); /* Start the MGS obd */ -static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) +static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct lprocfs_static_vars lvars; struct mgs_obd *mgs = &obd->u.mgs; @@ -128,7 +129,7 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) /* Find our disk */ lmi = server_get_mount(obd->obd_name); - if (!lmi) + if (!lmi) RETURN(rc = -EINVAL); mnt = lmi->lmi_mnt; @@ -161,7 +162,7 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) if (rc < 0) GOTO(err_fs, rc); - rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, + rc = llog_setup(obd, NULL, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, &llog_lvfs_ops); if (rc) GOTO(err_fs, rc); @@ -181,7 +182,7 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) mgs_handle, LUSTRE_MGS_NAME, obd->obd_proc_entry, NULL, MGS_THREADS_AUTO_MIN, MGS_THREADS_AUTO_MAX, - "ll_mgs"); + "ll_mgs", LCT_MD_THREAD); if (!mgs->mgs_service) { CERROR("failed to start service\n"); @@ -259,7 +260,7 @@ static int mgs_cleanup(struct obd_device *obd) if (mgs->mgs_sb == NULL) RETURN(0); - + ptlrpc_unregister_service(mgs->mgs_service); mgs_cleanup_fsdb_list(obd); @@ -272,12 +273,13 @@ static int mgs_cleanup(struct obd_device *obd) server_put_mount(obd->obd_name, mgs->mgs_vfsmnt); mgs->mgs_sb = NULL; - /* Free the namespace in it's own thread, so that if the - ldlm_cancel_handler put the last mgs obd ref, we won't + /* Free the namespace in it's own thread, so that if the + ldlm_cancel_handler put the last mgs obd ref, we won't deadlock here. */ - cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, + cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, CLONE_VM | CLONE_FILES); + fsfilt_put_ops(obd->obd_fsops); LCONSOLE_INFO("%s has stopped.\n", obd->obd_name); @@ -293,15 +295,15 @@ static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname, ENTRY; rc = mgc_logname2resid(fsname, &res_id); - if (!rc) - rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id, + if (!rc) + rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_PLAIN, NULL, LCK_EX, &flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, fsname, 0, NULL, lockh); - if (rc) + if (rc) CERROR("can't take cfg lock for %s (%d)\n", fsname, rc); - + RETURN(rc); } @@ -322,21 +324,21 @@ static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti) rc = mgs_check_index(obd, mti); if (rc == 0) { - LCONSOLE_ERROR("%s claims to have registered, but this MGS " - "does not know about it. Assuming writeconf.\n", - mti->mti_svname); + LCONSOLE_ERROR_MSG(0x13b, "%s claims to have registered, but " + "this MGS does not know about it. Assuming" + " writeconf.\n", mti->mti_svname); mti->mti_flags |= LDD_F_WRITECONF; rc = 1; } else if (rc == -1) { - LCONSOLE_ERROR("Client log %s-client has disappeared! " - "Regenerating all logs.\n", - mti->mti_fsname); + LCONSOLE_ERROR_MSG(0x13c, "Client log %s-client has " + "disappeared! Regenerating all logs.\n", + mti->mti_fsname); mti->mti_flags |= LDD_F_WRITECONF; rc = 1; } else { /* Index is correctly marked as used */ - /* If the logs don't contain the mti_nids then add + /* If the logs don't contain the mti_nids then add them as failover nids */ rc = mgs_check_failnid(obd, mti); } @@ -346,7 +348,7 @@ static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti) /* Called whenever a target starts up. Flags indicate first connect, etc. */ static int mgs_handle_target_reg(struct ptlrpc_request *req) -{ +{ struct obd_device *obd = req->rq_export->exp_obd; struct lustre_handle lockh; struct mgs_target_info *mti, *rep_mti; @@ -356,7 +358,7 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) mti = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*mti), lustre_swab_mgs_target_info); - + if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 | LDD_F_UPDATE))) { /* We're just here as a startup ping. */ @@ -364,22 +366,23 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) mti->mti_svname, obd_export_nid2str(req->rq_export)); rc = mgs_check_target(obd, mti); /* above will set appropriate mti flags */ - if (rc <= 0) + if (rc <= 0) /* Nothing wrong, or fatal error */ GOTO(out_nolock, rc); } /* Revoke the config lock to make sure nobody is reading. */ /* Although actually I think it should be alright if - someone was reading while we were updating the logs - if we + someone was reading while we were updating the logs - if we revoke at the end they will just update from where they left off. */ lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh); if (lockrc != ELDLM_OK) { - LCONSOLE_ERROR("%s: Can't signal other nodes to update " - "their configuration (%d). Updating local logs " - "anyhow; you might have to manually restart " - "other nodes to get the latest configuration.\n", - obd->obd_name, lockrc); + LCONSOLE_ERROR_MSG(0x13d, "%s: Can't signal other nodes to " + "update their configuration (%d). Updating " + "local logs anyhow; you might have to " + "manually restart other nodes to get the " + "latest configuration.\n", + obd->obd_name, lockrc); } OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_TARGET_REG, 10); @@ -418,19 +421,19 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) /* end COMPAT_146 */ if (mti->mti_flags & LDD_F_UPDATE) { - CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, + CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, mti->mti_stripe_index); - - /* create or update the target log + + /* create or update the target log and update the client/mdt logs */ rc = mgs_write_log_target(obd, mti); if (rc) { - CERROR("Failed to write %s log (%d)\n", + CERROR("Failed to write %s log (%d)\n", mti->mti_svname, rc); GOTO(out, rc); } - mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | + mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | LDD_F_NEED_INDEX | LDD_F_WRITECONF | LDD_F_UPGRADE14); mti->mti_flags |= LDD_F_REWRITE_LDD; @@ -441,9 +444,9 @@ out: if (lockrc == ELDLM_OK) mgs_put_cfg_lock(&lockh); out_nolock: - CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, + CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, mti->mti_stripe_index, rc); - lustre_pack_reply(req, 2, rep_size, NULL); + lustre_pack_reply(req, 2, rep_size, NULL); /* send back the whole mti in the reply */ rep_mti = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*rep_mti)); @@ -461,7 +464,6 @@ int mgs_handle(struct ptlrpc_request *req) ENTRY; OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_REQUEST_NET, 2); - OBD_FAIL_RETURN(OBD_FAIL_MGS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0); LASSERT(current->journal_info == NULL); opc = lustre_msg_get_opc(req->rq_reqmsg); @@ -477,7 +479,7 @@ int mgs_handle(struct ptlrpc_request *req) switch (opc) { case MGS_CONNECT: DEBUG_REQ(D_MGS, req, "connect"); - rc = target_handle_connect(req, mgs_handle); + rc = target_handle_connect(req); if (!rc && (lustre_msg_get_conn_cnt(req->rq_reqmsg) > 1)) /* Make clients trying to reconnect after a MGS restart happy; also requires obd_replayable */ @@ -490,11 +492,11 @@ int mgs_handle(struct ptlrpc_request *req) req->rq_status = rc; /* superfluous? */ break; case MGS_TARGET_REG: - DEBUG_REQ(D_MGS, req, "target add\n"); + DEBUG_REQ(D_MGS, req, "target add"); rc = mgs_handle_target_reg(req); break; case MGS_TARGET_DEL: - DEBUG_REQ(D_MGS, req, "target del\n"); + DEBUG_REQ(D_MGS, req, "target del"); //rc = mgs_handle_target_del(req); break; @@ -515,7 +517,7 @@ int mgs_handle(struct ptlrpc_request *req) rc = target_handle_ping(req); break; case OBD_LOG_CANCEL: - DEBUG_REQ(D_MGS, req, "log cancel\n"); + DEBUG_REQ(D_MGS, req, "log cancel"); rc = -ENOTSUPP; /* la la la */ break; @@ -546,11 +548,11 @@ int mgs_handle(struct ptlrpc_request *req) } LASSERT(current->journal_info == NULL); - - if (rc) + + if (rc) CERROR("MGS handle cmd=%d rc=%d\n", opc, rc); - out: +out: target_send_reply(req, rc, fail); RETURN(0); } @@ -598,7 +600,7 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (lcfg == NULL) RETURN(-ENOMEM); rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1); - if (rc) + if (rc) GOTO(out_free, rc); if (lcfg->lcfg_bufcount < 1) @@ -616,8 +618,8 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, writing (above). */ if (fsname[0]) { lockrc = mgs_get_cfg_lock(obd, fsname, &lockh); - if (lockrc != ELDLM_OK) - CERROR("lock error %d for fs %s\n", lockrc, + if (lockrc != ELDLM_OK) + CERROR("lock error %d for fs %s\n", lockrc, fsname); else mgs_put_cfg_lock(&lockh); @@ -677,7 +679,8 @@ static int __init mgs_init(void) struct lprocfs_static_vars lvars; lprocfs_init_vars(mgs, &lvars); - class_register_type(&mgs_obd_ops, lvars.module_vars, LUSTRE_MGS_NAME); + class_register_type(&mgs_obd_ops, NULL, + lvars.module_vars, LUSTRE_MGS_NAME, NULL); return 0; } diff --git a/lustre/mgs/mgs_internal.h b/lustre/mgs/mgs_internal.h index 81c94ad..9fd1250 100644 --- a/lustre/mgs/mgs_internal.h +++ b/lustre/mgs/mgs_internal.h @@ -15,7 +15,6 @@ #include <lustre_log.h> #include <lustre_export.h> - /* in ms */ #define MGS_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000) @@ -23,11 +22,12 @@ int class_dentry_readdir(struct obd_device *obd, struct dentry *dir, struct vfsmount *inmnt, struct list_head *dentry_list); - -#define INDEX_MAP_SIZE 8192 /* covers indicies to FFFF */ + +#define INDEX_MAP_SIZE 8192 /* covers indicies to FFFF */ #define FSDB_LOG_EMPTY 0x0001 /* missing client log */ #define FSDB_OLDLOG14 0x0002 /* log starts in old (1.4) style */ + struct fs_db { char fsdb_name[9]; struct list_head fsdb_list; /* list of databases */ @@ -36,7 +36,9 @@ struct fs_db { void *fsdb_mdt_index_map; /* bitmap of used indicies */ /* COMPAT_146 these items must be recorded out of the old client log */ char *fsdb_clilov; /* COMPAT_146 client lov name */ + char *fsdb_clilmv; char *fsdb_mdtlov; /* COMPAT_146 mds lov name */ + char *fsdb_mdtlmv; char *fsdb_mdc; /* COMPAT_146 mdc name */ /* end COMPAT_146 */ __u32 fsdb_flags; @@ -58,7 +60,6 @@ int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt); int mgs_fs_cleanup(struct obd_device *obddev); #define strsuf(buf, suffix) (strcmp((buf)+strlen(buf)-strlen(suffix), (suffix))) - #ifdef LPROCFS int lproc_mgs_setup(struct obd_device *dev); int lproc_mgs_add_live(struct obd_device *obd, struct fs_db *fsdb); @@ -72,6 +73,6 @@ static int lproc_mgs_del_live(struct obd_device *obd, struct fs_db *fsdb) {return 0;} #endif - #endif /* _MGS_INTERNAL_H */ + diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 0cb5b69..5e4db6a 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -45,14 +45,15 @@ #include <lustre_fsfilt.h> #include <lustre_disk.h> #include <lustre_param.h> +#include <lustre_sec.h> #include "mgs_internal.h" -/******************** Class functions *********************/ +/********************** Class functions ********************/ /* Caller must list_del and OBD_FREE each dentry from the list */ int class_dentry_readdir(struct obd_device *obd, struct dentry *dir, - struct vfsmount *inmnt, - struct list_head *dentry_list){ + struct vfsmount *inmnt, + struct list_head *dentry_list){ /* see mds_cleanup_pending */ struct lvfs_run_ctxt saved; struct file *file; @@ -138,21 +139,21 @@ static int mgs_fsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, lcfg = (struct lustre_cfg *)cfg_buf; - CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command, + CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command, lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1)); - /* Figure out ost indicies */ + /* Figure out ost indicies */ /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */ if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD || lcfg->lcfg_command == LCFG_LOV_DEL_OBD) { index = simple_strtoul(lustre_cfg_string(lcfg, 2), NULL, 10); CDEBUG(D_MGS, "OST index for %s is %u (%s)\n", - lustre_cfg_string(lcfg, 1), index, + lustre_cfg_string(lcfg, 1), index, lustre_cfg_string(lcfg, 2)); set_bit(index, fsdb->fsdb_ost_index_map); } - + /* Figure out mdt indicies */ /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */ if ((lcfg->lcfg_command == LCFG_ATTACH) && @@ -219,6 +220,7 @@ static int mgs_fsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, RETURN(rc); } +/* fsdb->fsdb_sem is already held in mgs_find_or_make_fsdb*/ static int mgs_get_fsdb_from_llog(struct obd_device *obd, struct fs_db *fsdb) { char *logname; @@ -230,7 +232,7 @@ static int mgs_get_fsdb_from_llog(struct obd_device *obd, struct fs_db *fsdb) name_create(&logname, fsdb->fsdb_name, "-client"); down(&fsdb->fsdb_sem); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - + rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), &loghandle, NULL, logname); if (rc) @@ -279,9 +281,9 @@ static struct fs_db *mgs_new_fsdb(struct obd_device *obd, char *fsname) struct fs_db *fsdb; int rc; ENTRY; - + OBD_ALLOC_PTR(fsdb); - if (!fsdb) + if (!fsdb) RETURN(NULL); OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); @@ -290,28 +292,37 @@ static struct fs_db *mgs_new_fsdb(struct obd_device *obd, char *fsname) CERROR("No memory for index maps\n"); GOTO(err, 0); } - + strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name)); fsdb->fsdb_name[sizeof(fsdb->fsdb_name) - 1] = 0; rc = name_create(&fsdb->fsdb_mdtlov, fsname, "-mdtlov"); if (rc) GOTO(err, rc); + rc = name_create(&fsdb->fsdb_mdtlmv, fsname, "-mdtlmv"); + if (rc) + GOTO(err, rc); rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov"); if (rc) GOTO(err, rc); + rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv"); + if (rc) + GOTO(err, rc); + sema_init(&fsdb->fsdb_sem, 1); list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list); lproc_mgs_add_live(obd, fsdb); RETURN(fsdb); err: - if (fsdb->fsdb_ost_index_map) + if (fsdb->fsdb_ost_index_map) OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); - if (fsdb->fsdb_mdt_index_map) + if (fsdb->fsdb_mdt_index_map) OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); - name_destroy(&fsdb->fsdb_clilov); + name_destroy(&fsdb->fsdb_clilov); + name_destroy(&fsdb->fsdb_clilmv); name_destroy(&fsdb->fsdb_mdtlov); + name_destroy(&fsdb->fsdb_mdtlmv); OBD_FREE_PTR(fsdb); RETURN(NULL); } @@ -325,7 +336,9 @@ static void mgs_free_fsdb(struct obd_device *obd, struct fs_db *fsdb) OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); name_destroy(&fsdb->fsdb_clilov); + name_destroy(&fsdb->fsdb_clilmv); name_destroy(&fsdb->fsdb_mdtlov); + name_destroy(&fsdb->fsdb_mdtlmv); name_destroy(&fsdb->fsdb_mdc); OBD_FREE_PTR(fsdb); } @@ -369,7 +382,7 @@ static int mgs_find_or_make_fsdb(struct obd_device *obd, char *name, CDEBUG(D_MGS, "Creating new db\n"); fsdb = mgs_new_fsdb(obd, name); up(&mgs->mgs_sem); - if (!fsdb) + if (!fsdb) return -ENOMEM; /* populate the db from the client llog */ @@ -381,12 +394,12 @@ static int mgs_find_or_make_fsdb(struct obd_device *obd, char *name, } *dbh = fsdb; - + return 0; } /* 1 = index in use - 0 = index unused + 0 = index unused -1= empty client log */ int mgs_check_index(struct obd_device *obd, struct mgs_target_info *mti) { @@ -397,23 +410,23 @@ int mgs_check_index(struct obd_device *obd, struct mgs_target_info *mti) LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX)); - rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb); + rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb); if (rc) { CERROR("Can't get db for %s\n", mti->mti_fsname); RETURN(rc); } - if (fsdb->fsdb_flags & FSDB_LOG_EMPTY) + if (fsdb->fsdb_flags & FSDB_LOG_EMPTY) RETURN(-1); - if (mti->mti_flags & LDD_F_SV_TYPE_OST) + if (mti->mti_flags & LDD_F_SV_TYPE_OST) imap = fsdb->fsdb_ost_index_map; - else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) + else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) imap = fsdb->fsdb_mdt_index_map; else RETURN(-EINVAL); - if (test_bit(mti->mti_stripe_index, imap)) + if (test_bit(mti->mti_stripe_index, imap)) RETURN(1); RETURN(0); } @@ -440,15 +453,15 @@ int mgs_set_index(struct obd_device *obd, struct mgs_target_info *mti) int rc = 0; ENTRY; - rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb); + rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb); if (rc) { CERROR("Can't get db for %s\n", mti->mti_fsname); RETURN(rc); } - if (mti->mti_flags & LDD_F_SV_TYPE_OST) + if (mti->mti_flags & LDD_F_SV_TYPE_OST) imap = fsdb->fsdb_ost_index_map; - else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) + else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) imap = fsdb->fsdb_mdt_index_map; else RETURN(-EINVAL); @@ -460,27 +473,20 @@ int mgs_set_index(struct obd_device *obd, struct mgs_target_info *mti) mti->mti_stripe_index = rc; } - /* Remove after CMD */ - if ((mti->mti_flags & LDD_F_SV_TYPE_MDT) && - (mti->mti_stripe_index > 0)) { - LCONSOLE_ERROR("MDT index must = 0 (until Clustered MetaData " - "feature is ready.)\n"); - mti->mti_stripe_index = 0; - } - if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) { - LCONSOLE_ERROR("Server %s requested index %d, but the" - "max index is %d.\n", - mti->mti_svname, mti->mti_stripe_index, - INDEX_MAP_SIZE * 8); + LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %d, " + "but the max index is %d.\n", + mti->mti_svname, mti->mti_stripe_index, + INDEX_MAP_SIZE * 8); RETURN(-ERANGE); } - + if (test_bit(mti->mti_stripe_index, imap)) { if (mti->mti_flags & LDD_F_VIRGIN) { - LCONSOLE_ERROR("Server %s requested index %d, but that " - "index is already in use\n", - mti->mti_svname, mti->mti_stripe_index); + LCONSOLE_ERROR_MSG(0x140, "Server %s requested index " + "%d, but that index is already in " + "use\n", mti->mti_svname, + mti->mti_stripe_index); RETURN(-EADDRINUSE); } else { CDEBUG(D_MGS, "Server %s updating index %d\n", @@ -494,7 +500,7 @@ int mgs_set_index(struct obd_device *obd, struct mgs_target_info *mti) server_make_name(mti->mti_flags, mti->mti_stripe_index, mti->mti_fsname, mti->mti_svname); - CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname, + CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname, mti->mti_stripe_index); RETURN(0); @@ -611,7 +617,6 @@ out_pop: RETURN(rc); } - /******************** config log recording functions *********************/ static int record_lcfg(struct obd_device *obd, struct llog_handle *llh, @@ -630,6 +635,7 @@ static int record_lcfg(struct obd_device *obd, struct llog_handle *llh, lcfg->lcfg_buflens); rec.lrh_len = llog_data_len(buflen); rec.lrh_type = OBD_CFG_REC; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); /* idx = -1 means append */ rc = llog_write_rec(llh, &rec, NULL, 0, (void *)lcfg, -1); @@ -646,18 +652,18 @@ static int record_base(struct obd_device *obd, struct llog_handle *llh, struct lustre_cfg_bufs bufs; struct lustre_cfg *lcfg; int rc; - + CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname, - cmd, s1, s2, s3, s4); + cmd, s1, s2, s3, s4); lustre_cfg_bufs_reset(&bufs, cfgname); - if (s1) + if (s1) lustre_cfg_bufs_set_string(&bufs, 1, s1); - if (s2) + if (s2) lustre_cfg_bufs_set_string(&bufs, 2, s2); - if (s3) + if (s3) lustre_cfg_bufs_set_string(&bufs, 3, s3); - if (s4) + if (s4) lustre_cfg_bufs_set_string(&bufs, 4, s4); lcfg = lustre_cfg_new(cmd, &bufs); @@ -666,26 +672,26 @@ static int record_base(struct obd_device *obd, struct llog_handle *llh, lcfg->lcfg_nid = nid; rc = record_lcfg(obd, llh, lcfg); - + lustre_cfg_free(lcfg); - + if (rc) { CERROR("error %d: lcfg %s %#x %s %s %s %s\n", rc, cfgname, - cmd, s1, s2, s3, s4); + cmd, s1, s2, s3, s4); } return(rc); } -static inline int record_add_uuid(struct obd_device *obd, - struct llog_handle *llh, +static inline int record_add_uuid(struct obd_device *obd, + struct llog_handle *llh, uint64_t nid, char *uuid) { return record_base(obd,llh,NULL,nid,LCFG_ADD_UUID,uuid,0,0,0); } -static inline int record_add_conn(struct obd_device *obd, +static inline int record_add_conn(struct obd_device *obd, struct llog_handle *llh, char *devname, char *uuid) @@ -700,12 +706,30 @@ static inline int record_attach(struct obd_device *obd, struct llog_handle *llh, } static inline int record_setup(struct obd_device *obd, struct llog_handle *llh, - char *devname, + char *devname, char *s1, char *s2, char *s3, char *s4) { return record_base(obd,llh,devname,0,LCFG_SETUP,s1,s2,s3,s4); } +static inline int record_sec_flavor(struct obd_device *obd, + struct llog_handle *llh, char *devname, + struct sec_flavor_config *conf) +{ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int rc; + + lustre_cfg_bufs_reset(&bufs, devname); + lustre_cfg_bufs_set(&bufs, 1, conf, sizeof(*conf)); + lcfg = lustre_cfg_new(LCFG_SEC_FLAVOR, &bufs); + + rc = record_lcfg(obd, llh, lcfg); + + lustre_cfg_free(lcfg); + return rc; +} + static int record_lov_setup(struct obd_device *obd, struct llog_handle *llh, char *devname, struct lov_desc *desc) { @@ -724,6 +748,33 @@ static int record_lov_setup(struct obd_device *obd, struct llog_handle *llh, return rc; } +static int record_lmv_setup(struct obd_device *obd, struct llog_handle *llh, + char *devname, struct lmv_desc *desc) +{ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int rc; + + lustre_cfg_bufs_reset(&bufs, devname); + lustre_cfg_bufs_set(&bufs, 1, desc, sizeof(*desc)); + lcfg = lustre_cfg_new(LCFG_SETUP, &bufs); + + rc = record_lcfg(obd, llh, lcfg); + + lustre_cfg_free(lcfg); + return rc; +} + +static inline int record_mdc_add(struct obd_device *obd, + struct llog_handle *llh, + char *logname, char *mdcuuid, + char *mdtuuid, char *index, + char *gen) +{ + return record_base(obd,llh,logname,0,LCFG_ADD_MDC, + mdtuuid,index,gen,mdcuuid); +} + static inline int record_lov_add(struct obd_device *obd, struct llog_handle *llh, char *lov_name, char *ost_uuid, @@ -731,16 +782,16 @@ static inline int record_lov_add(struct obd_device *obd, { return record_base(obd,llh,lov_name,0,LCFG_LOV_ADD_OBD, ost_uuid,index,gen,0); -} +} -static inline int record_mount_opt(struct obd_device *obd, +static inline int record_mount_opt(struct obd_device *obd, struct llog_handle *llh, char *profile, char *lov_name, char *mdc_name) { return record_base(obd,llh,NULL,0,LCFG_MOUNTOPT, profile,lov_name,mdc_name,0); -} +} static int record_marker(struct obd_device *obd, struct llog_handle *llh, struct fs_db *fsdb, __u32 flags, @@ -751,7 +802,7 @@ static int record_marker(struct obd_device *obd, struct llog_handle *llh, struct lustre_cfg *lcfg; int rc; - if (flags & CM_START) + if (flags & CM_START) fsdb->fsdb_gen++; marker.cm_step = fsdb->fsdb_gen; marker.cm_flags = flags; @@ -771,13 +822,13 @@ static int record_marker(struct obd_device *obd, struct llog_handle *llh, return rc; } -static int record_start_log(struct obd_device *obd, +static int record_start_log(struct obd_device *obd, struct llog_handle **llh, char *name) { static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" }; struct lvfs_run_ctxt saved; int rc = 0; - + if (*llh) { GOTO(out, rc = -EBUSY); } @@ -806,10 +857,10 @@ static int record_end_log(struct obd_device *obd, struct llog_handle **llh) int rc = 0; push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - + rc = llog_close(*llh); *llh = NULL; - + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); RETURN(rc); } @@ -850,15 +901,15 @@ static int mgs_write_log_direct(struct obd_device *obd, struct fs_db *fsdb, rc = record_start_log(obd, &llh, logname); if (rc) RETURN(rc); - + /* FIXME These should be a single journal transaction */ rc = record_marker(obd, llh, fsdb, CM_START, devname, comment); rc = record_lcfg(obd, llh, lcfg); - rc = record_marker(obd, llh, fsdb, CM_END, devname, comment); + rc = record_marker(obd, llh, fsdb, CM_END, devname, comment); rc = record_end_log(obd, &llh); - + RETURN(rc); } @@ -897,7 +948,7 @@ int mgs_write_log_direct_all(struct obd_device *obd, struct fs_db *fsdb, CERROR("Can't read %s dir\n", MOUNT_CONFIGS_DIR); RETURN(rc); } - + /* Could use fsdb index maps instead of directory listing */ list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) { list_del(&dirent->lld_list); @@ -915,12 +966,228 @@ int mgs_write_log_direct_all(struct obd_device *obd, struct fs_db *fsdb, } OBD_FREE(dirent, sizeof(*dirent)); } + + RETURN(rc); +} + +struct temp_comp +{ + struct mgs_target_info *comp_tmti; + struct mgs_target_info *comp_mti; + struct fs_db *comp_fsdb; + struct obd_device *comp_obd; + struct sec_flavor_config comp_sec; +}; + +static int mgs_write_log_mdc_to_mdt(struct obd_device *, struct fs_db *, + struct mgs_target_info *, + struct sec_flavor_config *, char *); + +static int mgs_steal_llog_handler(struct llog_handle *llh, + struct llog_rec_hdr *rec, + void *data) +{ + struct obd_device * obd; + struct mgs_target_info *mti, *tmti; + struct fs_db *fsdb; + int cfg_len = rec->lrh_len; + char *cfg_buf = (char*) (rec + 1); + struct lustre_cfg *lcfg; + struct sec_flavor_config *sec_conf; + int rc = 0; + struct llog_handle *mdt_llh = NULL; + static int got_an_osc_or_mdc = 0; + /* 0: not found any osc/mdc; + 1: found osc; + 2: found mdc; + */ + static int last_step = -1; + + ENTRY; + + mti = ((struct temp_comp*)data)->comp_mti; + tmti = ((struct temp_comp*)data)->comp_tmti; + fsdb = ((struct temp_comp*)data)->comp_fsdb; + obd = ((struct temp_comp*)data)->comp_obd; + sec_conf = &((struct temp_comp*)data)->comp_sec; + + if (rec->lrh_type != OBD_CFG_REC) { + CERROR("unhandled lrh_type: %#x\n", rec->lrh_type); + RETURN(-EINVAL); + } + + rc = lustre_cfg_sanity_check(cfg_buf, cfg_len); + if (rc) { + CERROR("Insane cfg\n"); + RETURN(rc); + } + + lcfg = (struct lustre_cfg *)cfg_buf; + + if (lcfg->lcfg_command == LCFG_MARKER) { + struct cfg_marker *marker; + marker = lustre_cfg_buf(lcfg, 1); + if (!strncmp(marker->cm_comment,"add osc",7) && + (marker->cm_flags & CM_START)){ + got_an_osc_or_mdc = 1; + rc = record_start_log(obd, &mdt_llh, mti->mti_svname); + rc = record_marker(obd, mdt_llh, fsdb, CM_START, + mti->mti_svname,"add osc(copied)"); + rc = record_end_log(obd, &mdt_llh); + last_step = marker->cm_step; + RETURN(rc); + } + if (!strncmp(marker->cm_comment,"add osc",7) && + (marker->cm_flags & CM_END)){ + LASSERT(last_step == marker->cm_step); + last_step = -1; + got_an_osc_or_mdc = 0; + rc = record_start_log(obd, &mdt_llh, mti->mti_svname); + rc = record_marker(obd, mdt_llh, fsdb, CM_END, + mti->mti_svname,"add osc(copied)"); + rc = record_end_log(obd, &mdt_llh); + RETURN(rc); + } + if (!strncmp(marker->cm_comment,"add mdc",7) && + (marker->cm_flags & CM_START)){ + got_an_osc_or_mdc = 2; + last_step = marker->cm_step; + memcpy(tmti->mti_svname, marker->cm_tgtname, + strlen(marker->cm_tgtname)); + + RETURN(rc); + } + if (!strncmp(marker->cm_comment,"add mdc",7) && + (marker->cm_flags & CM_END)){ + LASSERT(last_step == marker->cm_step); + last_step = -1; + got_an_osc_or_mdc = 0; + RETURN(rc); + } + } + + if (got_an_osc_or_mdc == 0 || last_step < 0) + RETURN(rc); + if (lcfg->lcfg_command == LCFG_ADD_UUID) { + uint64_t nodenid; + nodenid = lcfg->lcfg_nid; + + tmti->mti_nids[tmti->mti_nid_count] = nodenid; + tmti->mti_nid_count++; + + RETURN(rc); + } + + if (lcfg->lcfg_command == LCFG_SETUP) { + char *target; + + target = lustre_cfg_string(lcfg, 1); + memcpy(tmti->mti_uuid, target, strlen(target)); + RETURN(rc); + } + + if (lcfg->lcfg_command == LCFG_SEC_FLAVOR) { + memcpy(sec_conf, lustre_cfg_buf(lcfg, 1), sizeof(*sec_conf)); + RETURN(rc); + } + + if (lcfg->lcfg_command == LCFG_ADD_MDC) { + int index; + + if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) + RETURN (-EINVAL); + + memcpy(tmti->mti_fsname, mti->mti_fsname, + strlen(mti->mti_fsname)); + tmti->mti_stripe_index = index; + + mgs_write_log_mdc_to_mdt(obd, fsdb, tmti, sec_conf, + mti->mti_svname); + memset(tmti, 0, sizeof(*tmti)); + RETURN(rc); + } RETURN(rc); } +/* fsdb->fsdb_sem is already held in mgs_write_log_target*/ +/* stealed from mgs_get_fsdb_from_llog*/ +static int mgs_steal_llog_for_mdt_from_client(struct obd_device *obd, + char *client_name, + struct temp_comp* comp) +{ + struct llog_handle *loghandle; + struct lvfs_run_ctxt saved; + struct mgs_target_info *tmti; + int rc, rc2; + ENTRY; + + OBD_ALLOC_PTR(tmti); + if (tmti == NULL) + RETURN(-ENOMEM); + + comp->comp_tmti = tmti; + comp->comp_obd = obd; + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), + &loghandle, NULL, client_name); + if (rc) + GOTO(out_pop, rc); + + rc = llog_init_handle(loghandle, LLOG_F_IS_PLAIN, NULL); + if (rc) + GOTO(out_close, rc); + + rc = llog_process(loghandle, mgs_steal_llog_handler, (void *)comp, NULL); + CDEBUG(D_MGS, "steal llog re = %d\n", rc); +out_close: + rc2 = llog_close(loghandle); + if (!rc) + rc = rc2; +out_pop: + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + OBD_FREE_PTR(tmti); + RETURN(rc); +} + +/* lmv is the second thing for client logs */ +/* copied from mgs_write_log_lov. Please refer to that. */ +static int mgs_write_log_lmv(struct obd_device *obd, struct fs_db *fsdb, + struct mgs_target_info *mti, + char *logname, char *lmvname) +{ + struct llog_handle *llh = NULL; + struct lmv_desc *lmvdesc; + char *uuid; + int rc = 0; + ENTRY; + + CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname,logname); + + OBD_ALLOC(lmvdesc, sizeof(*lmvdesc)); + if (lmvdesc == NULL) + RETURN(-ENOMEM); + lmvdesc->ld_active_tgt_count = 0; + lmvdesc->ld_tgt_count = 0; + sprintf((char*)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname); + uuid = (char *)lmvdesc->ld_uuid.uuid; + + rc = record_start_log(obd, &llh, logname); + rc = record_marker(obd, llh, fsdb, CM_START, lmvname, "lmv setup"); + rc = record_attach(obd, llh, lmvname, "lmv", uuid); + rc = record_lmv_setup(obd, llh, lmvname, lmvdesc); + rc = record_marker(obd, llh, fsdb, CM_END, lmvname, "lmv setup"); + rc = record_end_log(obd, &llh); + + OBD_FREE(lmvdesc, sizeof(*lmvdesc)); + RETURN(rc); +} +/***************************************END PROTO**********************/ + /* lov is the first thing in the mdt and client logs */ -static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *fsdb, +static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti, char *logname, char *lovname) { @@ -930,7 +1197,7 @@ static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *fsdb, int rc = 0; ENTRY; - CDEBUG(D_MGS, "Writing log %s\n", logname); + CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname); /* #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1 @@ -944,7 +1211,7 @@ static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *fsdb, RETURN(-ENOMEM); lovdesc->ld_magic = LOV_DESC_MAGIC; lovdesc->ld_tgt_count = 0; - /* Defaults. Can be changed later by lcfg config_param */ + /* Defaults. Can be changed later by lcfg config_param */ lovdesc->ld_default_stripe_count = 1; lovdesc->ld_pattern = LOV_PATTERN_RAID0; lovdesc->ld_default_stripe_size = 1024 * 1024; @@ -963,7 +1230,7 @@ static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *fsdb, rc = record_marker(obd, llh, fsdb, CM_START, lovname, "lov setup"); rc = record_attach(obd, llh, lovname, "lov", uuid); rc = record_lov_setup(obd, llh, lovname, lovdesc); - rc = record_marker(obd, llh, fsdb, CM_END, lovname, "lov setup"); + rc = record_marker(obd, llh, fsdb, CM_END, lovname, "lov setup"); rc = record_end_log(obd, &llh); out: OBD_FREE(lovdesc, sizeof(*lovdesc)); @@ -994,7 +1261,7 @@ static int mgs_write_log_failnids(struct obd_device *obd, while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) { while (class_parse_nid(ptr, &nid, &ptr) == 0) { if (failnodeuuid == NULL) { - /* We don't know the failover node name, + /* We don't know the failover node name, so just use the first nid as the uuid */ rc = name_create(&failnodeuuid, libcfs_nid2str(nid), ""); @@ -1016,67 +1283,242 @@ static int mgs_write_log_failnids(struct obd_device *obd, return rc; } +static +void extract_sec_flavor(char *params, char *key, char **ptr) +{ + char *val = NULL, *tail; + int len; + + *ptr = NULL; + + if (class_find_param(params, key, &val)) + return; + + tail = strchr(val, ' '); + if (tail == NULL) + len = strlen(val); + else + len = tail - val; + + OBD_ALLOC(*ptr, len + 1); + if (*ptr == NULL) + return; + + memcpy(*ptr, val, len); + (*ptr)[len] = '\0'; +} + +static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb, + struct mgs_target_info *mti, + struct sec_flavor_config *sec_conf, + char *logname, char *lmvname) +{ + struct llog_handle *llh = NULL; + char *mdcname, *nodeuuid, *mdcuuid, *lmvuuid; + char index[5]; + int i, rc; + ENTRY; + + if (mgs_log_is_empty(obd, logname)) { + CERROR("log is empty! Logical error\n"); + RETURN(-EINVAL); + } + + CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n", + mti->mti_svname, logname, lmvname); + + name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), ""); + name_create(&mdcname, mti->mti_svname, "-mdc"); + name_create(&mdcuuid, mdcname, "_UUID"); + name_create(&lmvuuid, lmvname, "_UUID"); + + rc = record_start_log(obd, &llh, logname); + rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname, + "add mdc"); + + for (i = 0; i < mti->mti_nid_count; i++) { + CDEBUG(D_MGS, "add nid %s for mdt\n", + libcfs_nid2str(mti->mti_nids[i])); + + rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid); + } + + rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid); + rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_sec_flavor(obd, llh, mdcname, sec_conf); + rc = mgs_write_log_failnids(obd, mti, llh, mdcname); + snprintf(index, sizeof(index), "%d", mti->mti_stripe_index); + rc = record_mdc_add(obd, llh, lmvname, mdcuuid, mti->mti_uuid, + index, "1"); + rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, + "add mdc"); + rc = record_end_log(obd, &llh); + + name_destroy(&lmvuuid); + name_destroy(&mdcuuid); + name_destroy(&mdcname); + name_destroy(&nodeuuid); + RETURN(rc); +} + +/* add new mdc to already existent MDS */ +static int mgs_write_log_mdc_to_mdt(struct obd_device *obd, struct fs_db *fsdb, + struct mgs_target_info *mti, + struct sec_flavor_config *sec_conf, + char *logname) +{ + struct llog_handle *llh = NULL; + char *nodeuuid, *mdcname, *mdcuuid, *mdtuuid; + int idx = mti->mti_stripe_index; + char index[9]; + int i, rc; + + ENTRY; + if (mgs_log_is_empty(obd, mti->mti_svname)) { + CERROR("log is empty! Logical error\n"); + RETURN (-EINVAL); + } + + CDEBUG(D_MGS, "adding mdc index %d to %s\n", idx, logname); + + name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), ""); + snprintf(index, sizeof(index), "-mdc%04x", idx); + name_create(&mdcname, logname, index); + name_create(&mdcuuid, mdcname, "_UUID"); + name_create(&mdtuuid, logname, "_UUID"); + + rc = record_start_log(obd, &llh, logname); + rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname, "add mdc"); + for (i = 0; i < mti->mti_nid_count; i++) { + CDEBUG(D_MGS, "add nid %s for mdt\n", + libcfs_nid2str(mti->mti_nids[i])); + rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid); + } + rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid); + rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_sec_flavor(obd, llh, mdcname, sec_conf); + rc = mgs_write_log_failnids(obd, mti, llh, mdcname); + snprintf(index, sizeof(index), "%d", idx); + + rc = record_mdc_add(obd, llh, logname, mdcuuid, mti->mti_uuid, + index, "1"); + rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add mdc"); + rc = record_end_log(obd, &llh); + + name_destroy(&mdcuuid); + name_destroy(&mdcname); + name_destroy(&nodeuuid); + name_destroy(&mdtuuid); + RETURN(rc); +} + +static int mgs_write_log_mdt0(struct obd_device *obd, struct fs_db *fsdb, + struct mgs_target_info *mti) +{ + char *log = mti->mti_svname; + struct llog_handle *llh = NULL; + char *uuid, *lovname; + char mdt_index[5]; + char *ptr = mti->mti_params; + int rc = 0, failout = 0; + ENTRY; + + OBD_ALLOC(uuid, sizeof(struct obd_uuid)); + if (uuid == NULL) + RETURN(-ENOMEM); + + if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0) + failout = (strncmp(ptr, "failout", 7) == 0); + + name_create(&lovname, log, "-mdtlov"); + if (mgs_log_is_empty(obd, log)) + rc = mgs_write_log_lov(obd, fsdb, mti, log, lovname); + + sprintf(uuid, "%s_UUID", log); + sprintf(mdt_index,"%d",mti->mti_stripe_index); + + /* add MDT itself */ + rc = record_start_log(obd, &llh, log); + if (rc) + GOTO(out, rc); + + /* FIXME this whole fn should be a single journal transaction */ + rc = record_marker(obd, llh, fsdb, CM_START, log, "add mdt"); + rc = record_attach(obd, llh, log, LUSTRE_MDT_NAME, uuid); + rc = record_mount_opt(obd, llh, log, lovname, NULL); + rc = record_setup(obd, llh, log, uuid, mdt_index, lovname, + failout ? "n" : "f"); + rc = record_marker(obd, llh, fsdb, CM_END, log, "add mdt"); + rc = record_end_log(obd, &llh); +out: + name_destroy(&lovname); + OBD_FREE(uuid, sizeof(struct obd_uuid)); + RETURN(rc); +} + +/* envelope method for all layers log */ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, - struct mgs_target_info *mti) + struct mgs_target_info *mti) { + char *cliname, *sec; struct llog_handle *llh = NULL; - char *cliname, *mdcname, *nodeuuid, *mdcuuid; - int rc, i, first_log = 0; + struct temp_comp comp = { 0 }; + struct sec_flavor_config sec_conf_mdt, sec_conf_cli; + char mdt_index[9]; + int rc, i = 0; ENTRY; CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname); - + +#if 0 + /* COMPAT_146 */ + if (mti->mti_flags & LDD_F_UPGRADE14) { + /* We're starting with an old uuid. Assume old name for lov + as well since the lov entry already exists in the log. */ + CDEBUG(D_MGS, "old mds uuid %s\n", mti->mti_uuid); + if (strncmp(mti->mti_uuid, fsdb->fsdb_mdtlov + 4, + strlen(fsdb->fsdb_mdtlov) - 4) != 0) { + CERROR("old mds uuid %s doesn't match log %s (%s)\n", + mti->mti_uuid, fsdb->fsdb_mdtlov, + fsdb->fsdb_mdtlov + 4); + RETURN(-EINVAL); + } + } + /* end COMPAT_146 */ +#endif if (mti->mti_uuid[0] == '\0') { /* Make up our own uuid */ snprintf(mti->mti_uuid, sizeof(mti->mti_uuid), "%s_UUID", mti->mti_svname); } - /* Append mdt info to mdt log */ - if (mgs_log_is_empty(obd, mti->mti_svname)) { - /* This is the first time for all logs for this fs, - since any ost should have already started the mdt log. */ - first_log++; - rc = mgs_write_log_lov(obd, fsdb, mti, mti->mti_svname, - fsdb->fsdb_mdtlov); - } - /* else there's already some ost entries in the mdt log. */ + /* security flavor */ + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_MDT, sec, &sec_conf_mdt); + name_destroy(&sec); + if (rc) + RETURN(rc); - /* We added the lov, maybe some osc's, now for the mdt. - We might add more ost's after this. Note that during the parsing - of this log, this is when the mdt will start. (This was not - formerly part of the old mds log, it was directly executed by - lconf.) */ - /* - mount_option 0: 1:mdsA 2:lov_mdsA - attach mds mdsA mdsA_UUID - setup /dev/loop2 ldiskfs mdsA errors=remount-ro,user_xattr - */ - rc = record_start_log(obd, &llh, mti->mti_svname); - if (rc) + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_MDT, sec, &sec_conf_cli); + name_destroy(&sec); + if (rc) RETURN(rc); - /* FIXME this whole fn should be a single journal transaction */ - rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,"add mdt"); - rc = record_mount_opt(obd, llh, mti->mti_svname, fsdb->fsdb_mdtlov, 0); - rc = record_attach(obd, llh, mti->mti_svname, LUSTRE_MDS_NAME, - mti->mti_uuid); - rc = record_setup(obd, llh, mti->mti_svname, - "dev"/*ignored*/, "type"/*ignored*/, - mti->mti_svname, 0/*options*/); - rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add mdt"); - rc = record_end_log(obd, &llh); + /* add mdt */ + rc = mgs_write_log_mdt0(obd, fsdb, mti); + /* Append the mdt info to the client log */ name_create(&cliname, mti->mti_fsname, "-client"); - if (first_log) { + + if (mgs_log_is_empty(obd, cliname)) { /* Start client log */ - rc = mgs_write_log_lov(obd, fsdb, mti, cliname, + rc = mgs_write_log_lov(obd, fsdb, mti, cliname, fsdb->fsdb_clilov); + rc = mgs_write_log_lmv(obd, fsdb, mti, cliname, + fsdb->fsdb_clilmv); } - name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]),/*"_UUID"*/""); - name_create(&mdcname, mti->mti_svname, "-mdc"); - name_create(&mdcuuid, mdcname, "_UUID"); /* #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f @@ -1085,12 +1527,17 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client */ - rc = record_start_log(obd, &llh, cliname); - if (rc) - GOTO(out, rc); - rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,"add mdc"); + +#if 0 /* COMPAT_146 */ - if (fsdb->fsdb_flags & FSDB_OLDLOG14) { + if (mti->mti_flags & LDD_F_UPGRADE14) { + rc = record_start_log(obd, &llh, cliname); + if (rc) + GOTO(out, rc); + + rc = record_marker(obd, llh, fsdb, CM_START, + mti->mti_svname,"add mdc"); + /* Old client log already has MDC entry, but needs mount opt for new client name (lustre-client) */ /* FIXME Old MDT log already has an old mount opt @@ -1098,55 +1545,81 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, class_del_profiles()) */ rc = record_mount_opt(obd, llh, cliname, fsdb->fsdb_clilov, fsdb->fsdb_mdc); - /* Only add failnids with --writeconf - rc = mgs_write_log_failnids(obd, mti, llh, fsdb->fsdb_mdc); - */ /* end COMPAT_146 */ - } else { - for (i = 0; i < mti->mti_nid_count; i++) { - CDEBUG(D_MGS, "add nid %s\n", - libcfs_nid2str(mti->mti_nids[i])); - rc = record_add_uuid(obd, llh, mti->mti_nids[i], - nodeuuid); - } - rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid); - rc = record_setup(obd, llh, mdcname, mti->mti_uuid,nodeuuid, - 0, 0); - rc = mgs_write_log_failnids(obd, mti, llh, mdcname); + + rc = record_marker(obd, llh, fsdb, CM_END, + mti->mti_svname, "add mdc"); + } else +#endif + { + /* copy client info about lov/lmv */ + comp.comp_mti = mti; + comp.comp_fsdb = fsdb; + + rc = mgs_steal_llog_for_mdt_from_client(obd, cliname, + &comp); + + rc = mgs_write_log_mdc_to_lmv(obd, fsdb, mti, &sec_conf_cli, + cliname, fsdb->fsdb_clilmv); + /* add mountopts */ + rc = record_start_log(obd, &llh, cliname); + if (rc) + GOTO(out, rc); + + rc = record_marker(obd, llh, fsdb, CM_START, cliname, + "mount opts"); rc = record_mount_opt(obd, llh, cliname, fsdb->fsdb_clilov, - mdcname); + fsdb->fsdb_clilmv); + rc = record_marker(obd, llh, fsdb, CM_END, cliname, + "mount opts"); } - rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add mdc"); + rc = record_end_log(obd, &llh); out: - name_destroy(&mdcuuid); - name_destroy(&mdcname); - name_destroy(&nodeuuid); name_destroy(&cliname); + + // for_all_existing_mdt except current one + for (i = 0; i < INDEX_MAP_SIZE * 8; i++){ + char *mdtname; + if (i != mti->mti_stripe_index && + test_bit(i, fsdb->fsdb_mdt_index_map)) { + sprintf(mdt_index,"-MDT%04x",i); + + name_create(&mdtname, mti->mti_fsname, mdt_index); + rc = mgs_write_log_mdc_to_mdt(obd, fsdb, mti, + &sec_conf_mdt, mdtname); + name_destroy(&mdtname); + } + } + RETURN(rc); } /* Add the ost info to the client/mdt lov */ -static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb, - struct mgs_target_info *mti, - char *logname, char *lovname, int flags) +static int mgs_write_log_osc_to_lov(struct obd_device *obd, struct fs_db *fsdb, + struct mgs_target_info *mti, + char *logname, char *suffix, char *lovname, + struct sec_flavor_config *sec_conf, + int flags) { struct llog_handle *llh = NULL; - char *nodeuuid, *oscname, *oscuuid, *lovuuid; + char *nodeuuid, *oscname, *oscuuid, *lovuuid, *svname; char index[5]; int i, rc; + ENTRY; + CDEBUG(D_INFO, "adding osc for %s to log %s\n", + mti->mti_svname, logname); + if (mgs_log_is_empty(obd, logname)) { /* The first item in the log must be the lov, so we have somewhere to add our osc. */ rc = mgs_write_log_lov(obd, fsdb, mti, logname, lovname); } - CDEBUG(D_MGS, "adding osc for %s to log %s\n", - mti->mti_svname, logname); - name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), ""); - name_create(&oscname, mti->mti_svname, "-osc"); + name_create(&svname, mti->mti_svname, "-osc"); + name_create(&oscname, svname, suffix); name_create(&oscuuid, oscname, "_UUID"); name_create(&lovuuid, lovname, "_UUID"); @@ -1161,6 +1634,7 @@ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb, #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */ + rc = record_start_log(obd, &llh, logname); if (rc) GOTO(out, rc); @@ -1173,6 +1647,7 @@ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb, } rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid); rc = record_setup(obd, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_sec_flavor(obd, llh, oscname, sec_conf); rc = mgs_write_log_failnids(obd, mti, llh, oscname); snprintf(index, sizeof(index), "%d", mti->mti_stripe_index); rc = record_lov_add(obd, llh, lovname, mti->mti_uuid, index, "1"); @@ -1183,17 +1658,20 @@ out: name_destroy(&lovuuid); name_destroy(&oscuuid); name_destroy(&oscname); + name_destroy(&svname); name_destroy(&nodeuuid); - return rc; + RETURN(rc); } static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti) { struct llog_handle *llh = NULL; - char *logname; + char *logname, *lovname, *sec; + char mdt_index[9]; char *ptr = mti->mti_params; - int rc, flags = 0, failout = 0; + struct sec_flavor_config sec_conf_mdt, sec_conf_cli; + int rc, flags = 0, failout = 0, i; ENTRY; CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname); @@ -1203,13 +1681,27 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, /* If the ost log already exists, that means that someone reformatted the ost and it called target_add again. */ if (!mgs_log_is_empty(obd, mti->mti_svname)) { - LCONSOLE_ERROR("The config log for %s already exists, yet the " - "server claims it never registered. It may have" - " been reformatted, or the index changed. " - "writeconf the MDT to regenerate all logs.\n", - mti->mti_svname); + LCONSOLE_ERROR_MSG(0x141, "The config log for %s already " + "exists, yet the server claims it never " + "registered. It may have been reformatted, " + "or the index changed. writeconf the MDT to " + "regenerate all logs.\n", mti->mti_svname); RETURN(-EALREADY); } + + /* security flavors */ + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_OST, sec, &sec_conf_mdt); + name_destroy(&sec); + if (rc) + RETURN(rc); + + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_OST, sec, &sec_conf_cli); + name_destroy(&sec); + if (rc) + RETURN(rc); + /* attach obdfilter ost1 ost1_UUID setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr @@ -1241,27 +1733,34 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, /* Note that we can't add any new failnids, since we don't know the old osc names. */ flags = CM_SKIP | CM_UPGRADE146; + } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) { - /* If the update flag isn't set, don't really update - client/mdt logs. */ + /* If the update flag isn't set, don't update client/mdt + logs. */ flags |= CM_SKIP; LCONSOLE_WARN("Client log for %s was not updated; writeconf " "the MDT first to regenerate it.\n", mti->mti_svname); } - - /* Append ost info to mdt log */ - /* FIXME add to all MDT logs for CMD */ - /* FIXME need real MDT name, but MDT may not have registered yet! */ - name_create(&logname, mti->mti_fsname, "-MDT0000"); - rc = mgs_write_log_osc(obd, fsdb, mti, logname, fsdb->fsdb_mdtlov, - flags); - name_destroy(&logname); - + + // for_all_existing_mdt + for (i = 0; i < INDEX_MAP_SIZE * 8; i++){ + if (test_bit(i, fsdb->fsdb_mdt_index_map)) { + sprintf(mdt_index,"-MDT%04x",i); + name_create(&logname, mti->mti_fsname, mdt_index); + name_create(&lovname, logname, "-mdtlov"); + mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, + mdt_index, lovname, + &sec_conf_mdt, flags); + name_destroy(&logname); + name_destroy(&lovname); + } + } + /* Append ost info to the client log */ name_create(&logname, mti->mti_fsname, "-client"); - rc = mgs_write_log_osc(obd, fsdb, mti, logname, fsdb->fsdb_clilov, - flags); + mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, "", + fsdb->fsdb_clilov, &sec_conf_cli, 0); name_destroy(&logname); RETURN(rc); @@ -1287,29 +1786,16 @@ static int mgs_write_log_add_failnid(struct obd_device *obd, struct fs_db *fsdb, /* Verify that we know about this target */ if (mgs_log_is_empty(obd, mti->mti_svname)) { - LCONSOLE_ERROR("The target %s has not registered yet. " - "It must be started before failnids can " - "be added.\n", mti->mti_svname); + LCONSOLE_ERROR_MSG(0x142, "The target %s has not registered " + "yet. It must be started before failnids " + "can be added.\n", mti->mti_svname); RETURN(-ENOENT); } /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */ if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { - /* COMPAT_146 */ - if (fsdb->fsdb_mdc) - name_create(&cliname, fsdb->fsdb_mdc, ""); - else - name_create(&cliname, mti->mti_svname, "-mdc"); + name_create(&cliname, mti->mti_svname, "-mdc"); } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) { - /* COMPAT_146 */ - if (fsdb->fsdb_flags & FSDB_OLDLOG14) { - LCONSOLE_ERROR("Failover NIDs cannot be added to " - "upgraded client logs for %s. Consider " - "updating the configuration with " - "--writeconf.\n", - mti->mti_svname); - RETURN(-EINVAL); - } name_create(&cliname, mti->mti_svname, "-osc"); } else { RETURN(-EINVAL); @@ -1410,18 +1896,22 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, /* Processed in lustre_start_mgc */ if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0) - goto end_while; + GOTO(end_while, rc); /* Processed in mgs_write_log_ost */ if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) { if (mti->mti_flags & LDD_F_PARAM) { - LCONSOLE_ERROR("%s can only be changed with " - "tunefs.lustre and --writeconf\n", - ptr); + LCONSOLE_ERROR_MSG(0x169, "%s can only be " + "changed with tunefs.lustre" + "and --writeconf\n", ptr); rc = -EPERM; } - goto end_while; + GOTO(end_while, rc); } + /* Processed in mgs_write_log_mdt/mgs_write_log_ost */ + if (class_match_param(ptr, PARAM_SEC_RPC_MDT, NULL) == 0 || + class_match_param(ptr, PARAM_SEC_RPC_CLI, NULL) == 0) + GOTO(end_while, rc); if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) { /* Add a failover nidlist */ @@ -1432,7 +1922,7 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, CDEBUG(D_MGS, "Adding failnode\n"); rc = mgs_write_log_add_failnid(obd, fsdb, mti); } - goto end_while; + GOTO(end_while, rc); } if (class_match_param(ptr, PARAM_SYS_TIMEOUT, &tmp) == 0) { @@ -1441,7 +1931,6 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, timeout = simple_strtoul(tmp, NULL, 0); CDEBUG(D_MGS, "obd timeout %d\n", timeout); - lustre_cfg_bufs_reset(&bufs, NULL); lcfg = lustre_cfg_new(LCFG_SET_TIMEOUT, &bufs); lcfg->lcfg_num = timeout; @@ -1450,15 +1939,19 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, mti->mti_fsname, "timeout"); lustre_cfg_free(lcfg); - goto end_while; + GOTO(end_while, rc); } - if (class_match_param(ptr, PARAM_OSC""PARAM_ACTIVE, &tmp) == 0){ + if (class_match_param(ptr, PARAM_OSC""PARAM_ACTIVE, &tmp) == 0) { /* active=0 means off, anything else means on */ + char mdt_index[16]; int flag = (*tmp == '0') ? CM_EXCLUDE : 0; + int i; + if (!(mti->mti_flags & LDD_F_SV_TYPE_OST)) { - LCONSOLE_ERROR("%s: Only OSCs can be (de)activ" - "ated.\n", mti->mti_svname); + LCONSOLE_ERROR_MSG(0x144, "%s: Only OSCs can " + "be (de)activated.\n", + mti->mti_svname); rc = -EINVAL; goto end_while; } @@ -1473,59 +1966,78 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, goto active_err; /* Modify mdtlov */ /* FIXME add to all MDT logs for CMD */ - name_create(&logname, mti->mti_fsname, "-MDT0000"); - rc = mgs_modify(obd, fsdb, mti, logname, - mti->mti_svname, "add osc", flag); - name_destroy(&logname); + for (i = 0; i < INDEX_MAP_SIZE * 8; i++) { + if (!test_bit(i, fsdb->fsdb_mdt_index_map)) + continue; + sprintf(mdt_index,"-MDT%04x", i); + name_create(&logname, mti->mti_fsname, mdt_index); + rc = mgs_modify(obd, fsdb, mti, logname, + mti->mti_svname, "add osc", flag); + name_destroy(&logname); + if (rc) + goto active_err; + } active_err: if (rc) { - LCONSOLE_ERROR("Couldn't find %s in log (%d). " - "No permanent changes were made to the " - "config log.\n", mti->mti_svname, rc); + LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in" + "log (%d). No permanent " + "changes were made to the " + "config log.\n", + mti->mti_svname, rc); if (fsdb->fsdb_flags & FSDB_OLDLOG14) - LCONSOLE_ERROR("This may be because the" - " log is in the old 1.4 style. Consider" - " --writeconf to update the logs.\n"); + LCONSOLE_ERROR_MSG(0x146, "This may be" + " because the log " + "is in the old 1.4" + "style. Consider " + " --writeconf to " + "update the logs.\n"); goto end_while; } /* Fall through to osc proc for deactivating live OSC on running MDT / clients. */ } - /* Below here, let obd's XXX_process_config methods handle it */ - + /* All lov. in proc */ if (class_match_param(ptr, PARAM_LOV, NULL) == 0) { + char mdt_index[16]; + char *mdtlovname; + CDEBUG(D_MGS, "lov param %s\n", ptr); if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) { - LCONSOLE_ERROR("LOV params must be set on the " - "MDT, not %s. Ignoring.\n", - mti->mti_svname); + LCONSOLE_ERROR_MSG(0x147, "LOV params must be " + "set on the MDT, not %s. " + "Ignoring.\n", + mti->mti_svname); rc = 0; goto end_while; } /* Modify mdtlov */ - if (mgs_log_is_empty(obd, mti->mti_svname)) { - rc = -ENODEV; - goto end_while; - } + if (mgs_log_is_empty(obd, mti->mti_svname)) + GOTO(end_while, rc = -ENODEV); + + sprintf(mdt_index,"-MDT%04x", mti->mti_stripe_index); + name_create(&logname, mti->mti_fsname, mdt_index); + name_create(&mdtlovname, logname, "-mdtlov"); rc = mgs_wlp_lcfg(obd, fsdb, mti, mti->mti_svname, - &bufs, fsdb->fsdb_mdtlov, ptr); - if (rc) - goto end_while; + &bufs, mdtlovname, ptr); + name_destroy(&logname); + name_destroy(&mdtlovname); + if (rc) + GOTO(end_while, rc); /* Modify clilov */ name_create(&logname, mti->mti_fsname, "-client"); rc = mgs_wlp_lcfg(obd, fsdb, mti, logname, &bufs, fsdb->fsdb_clilov, ptr); name_destroy(&logname); - goto end_while; + GOTO(end_while, rc); } /* All osc., mdc., llite. params in proc */ if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) || - (class_match_param(ptr, PARAM_MDC, NULL) == 0) || + (class_match_param(ptr, PARAM_MDC, NULL) == 0) || (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) { char *cname; if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) { @@ -1537,16 +2049,19 @@ active_err: if (fsdb->fsdb_mdc) name_create(&cname, fsdb->fsdb_mdc, ""); else - name_create(&cname, mti->mti_svname, + name_create(&cname, mti->mti_svname, "-mdc"); } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) { /* COMPAT_146 */ if (fsdb->fsdb_flags & FSDB_OLDLOG14) { - LCONSOLE_ERROR("Upgraded client logs " - "for %s cannot be modified. " - "Consider updating the " - "configuration with --writeconf\n", - mti->mti_svname); + LCONSOLE_ERROR_MSG(0x148, "Upgraded " + "client logs for %s" + " cannot be " + "modified. Consider" + " updating the " + "configuration with" + " --writeconf\n", + mti->mti_svname); /* We don't know the names of all the old oscs*/ rc = -EINVAL; @@ -1562,23 +2077,37 @@ active_err: /* Modify client */ name_create(&logname, mti->mti_fsname, "-client"); - rc = mgs_wlp_lcfg(obd, fsdb, mti, logname, &bufs, + rc = mgs_wlp_lcfg(obd, fsdb, mti, logname, &bufs, cname, ptr); - name_destroy(&logname); - + /* osc params affect the MDT as well */ - if (mti->mti_flags & LDD_F_SV_TYPE_OST) { - /* FIXME add to all MDT logs for CMD */ - name_create(&logname, mti->mti_fsname, - "-MDT0000"); - if (!mgs_log_is_empty(obd, logname)) - rc = mgs_wlp_lcfg(obd, fsdb, mti, - logname, &bufs, - cname, ptr); - name_destroy(&logname); + if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) { + char mdt_index[16]; + int i; + + for (i = 0; i < INDEX_MAP_SIZE * 8; i++){ + if (!test_bit(i, fsdb->fsdb_mdt_index_map)) + continue; + name_destroy(&cname); + sprintf(mdt_index, "-osc-MDT%04x", i); + name_create(&cname, mti->mti_svname, + mdt_index); + name_destroy(&logname); + sprintf(mdt_index, "-MDT%04x", i); + name_create(&logname, mti->mti_fsname, + mdt_index); + if (!mgs_log_is_empty(obd, logname)) + rc = mgs_wlp_lcfg(obd, fsdb, + mti, logname, + &bufs, cname, + ptr); + if (rc) + break; + } } + name_destroy(&logname); name_destroy(&cname); - goto end_while; + GOTO(end_while, rc); } /* All mdt., ost. params in proc */ @@ -1591,7 +2120,7 @@ active_err: } rc = mgs_wlp_lcfg(obd, fsdb, mti, mti->mti_svname, &bufs, mti->mti_svname, ptr); - goto end_while; + GOTO(end_while, rc); } LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr); @@ -1613,10 +2142,9 @@ end_while: RETURN(rc); } +/* Not implementing automatic failover nid addition at this time. */ int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti) { - /* Not implementing automatic failover nid addition at this time. */ - return 0; #if 0 struct fs_db *fsdb; int rc; @@ -1642,6 +2170,7 @@ int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti) RETURN(rc); #endif + return 0; } int mgs_write_log_target(struct obd_device *obd, @@ -1657,6 +2186,7 @@ int mgs_write_log_target(struct obd_device *obd, CERROR("Can't get index (%d)\n", rc); RETURN(rc); } + /* COMPAT_146 */ if (mti->mti_flags & LDD_F_UPGRADE14) { if (rc == EALREADY) { @@ -1664,11 +2194,12 @@ int mgs_write_log_target(struct obd_device *obd, "upgrading\n", mti->mti_stripe_index, mti->mti_svname); } else { - LCONSOLE_ERROR("Failed to find %s in the old client " - "log. Apparently it is not part of this " - "filesystem, or the old log is wrong.\n" - "Use 'writeconf' on the MDT to force log" - " regeneration.\n", mti->mti_svname); + LCONSOLE_ERROR_MSG(0x149, "Failed to find %s in the old" + " client log. Apparently it is not " + "part of this filesystem, or the old" + " log is wrong.\nUse 'writeconf' on " + "the MDT to force log regeneration." + "\n", mti->mti_svname); /* Not in client log? Upgrade anyhow...*/ /* Argument against upgrading: reformat MDT, upgrade OST, then OST will start but will be SKIPped @@ -1678,10 +2209,6 @@ int mgs_write_log_target(struct obd_device *obd, /* end COMPAT_146 */ } else { if (rc == EALREADY) { - /* This might be a params update, or a - local writeconf. (For "full" writeconf, the client - log won't have an entry for this target, so we - won't get here.) */ LCONSOLE_WARN("Found index %d for %s, updating log\n", mti->mti_stripe_index, mti->mti_svname); /* We would like to mark old log sections as invalid @@ -1756,11 +2283,11 @@ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti) rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb); if (rc) RETURN(rc); - + if (fsdb->fsdb_flags & FSDB_LOG_EMPTY) { - LCONSOLE_ERROR("The old client log %s-client is missing. Was " - "tunefs.lustre successful?\n", - mti->mti_fsname); + LCONSOLE_ERROR_MSG(0x14a, "The old client log %s-client is " + "missing. Was tunefs.lustre successful?\n", + mti->mti_fsname); RETURN(-ENOENT); } @@ -1772,12 +2299,12 @@ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti) if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { if (mgs_log_is_empty(obd, mti->mti_svname)) { - LCONSOLE_ERROR("The old MDT log %s is missing. Was " - "tunefs.lustre successful?\n", - mti->mti_svname); + LCONSOLE_ERROR_MSG(0x14b, "The old MDT log %s is " + "missing. Was tunefs.lustre " + "successful?\n", + mti->mti_svname); RETURN(-ENOENT); } - /* We're starting with an old uuid. Assume old name for lov as well since the lov entry already exists in the log. */ CDEBUG(D_MGS, "old mds uuid %s\n", mti->mti_uuid); @@ -1791,10 +2318,10 @@ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti) } if (!(fsdb->fsdb_flags & FSDB_OLDLOG14)) { - LCONSOLE_ERROR("%s-client is supposedly an old log, but no old " - "LOV or MDT was found. Consider updating the " - "configuration with --writeconf.\n", - mti->mti_fsname); + LCONSOLE_ERROR_MSG(0x14c, "%s-client is supposedly an old " + "log, but no old LOV or MDT was found. " + "Consider updating the configuration with" + " --writeconf.\n", mti->mti_fsname); } RETURN(rc); @@ -1912,7 +2439,7 @@ int mgs_setparam(struct obd_device *obd, struct lustre_cfg *lcfg, char *fsname) } } if (!devname) { - LCONSOLE_ERROR("No target specified: %s\n", param); + LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param); RETURN(-ENOSYS); } @@ -1966,7 +2493,6 @@ out: RETURN(rc); } - #if 0 /******************** unused *********************/ static int mgs_backup_llog(struct obd_device *obd, char* fsname) @@ -2026,6 +2552,4 @@ out: return rc; } - - #endif diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index a33afd2..ffd9cbf 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -23,10 +23,13 @@ obdclass-all-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o obdclass-all-objs += class_obd.o obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o -obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o +obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o +obdclass-all-objs += lu_object.o dt_object.o hash.o capa.o lu_time.o obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs) +EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs + ifeq ($(PATCHLEVEL),6) llog_test-objs := llog-test.o diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am index e0982ca..5f70443 100644 --- a/lustre/obdclass/autoMakefile.am +++ b/lustre/obdclass/autoMakefile.am @@ -7,12 +7,12 @@ DIST_SUBDIRS := $(SUBDIRS) if LIBLUSTRE noinst_LIBRARIES = liblustreclass.a -liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c uuid.c +liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid.c liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c -liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c +liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c capa.c liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c -liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) -DLUSTRE_VERSION=\"32\" -DBUILD_VERSION=\"1\" +liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) liblustreclass_a_CFLAGS = $(LLCFLAGS) endif @@ -33,7 +33,8 @@ obdclass_SOURCES := \ class_obd.c genops.c lprocfs_status.c \ lustre_handles.c lustre_peer.c obd_config.c \ obdo.c debug.c llog_ioctl.c uuid.c prng.c \ - llog_swab.c llog_obd.c llog.c llog_cat.c llog_lvfs.c + llog_swab.c llog_obd.c llog.c llog_cat.c llog_lvfs.c \ + mea.c lu_object.c dt_object.c hash.c obdclass_CFLAGS := $(EXTRA_KCFLAGS) obdclass_LDFLAGS := $(EXTRA_KLDFLAGS) diff --git a/lustre/obdclass/capa.c b/lustre/obdclass/capa.c new file mode 100644 index 0000000..b47b58b --- /dev/null +++ b/lustre/obdclass/capa.c @@ -0,0 +1,290 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/obdclass/capa.c + * Lustre Capability Hash Management + * + * Copyright (c) 2005 Cluster File Systems, Inc. + * Author: Lai Siyao<lsy@clusterfs.com> + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_SEC + +#ifdef __KERNEL__ +#include <linux/version.h> +#include <linux/fs.h> +#include <asm/unistd.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <obd_class.h> +#include <lustre_debug.h> +#include <lustre/lustre_idl.h> +#else +#include <liblustre.h> +#endif + +#include <libcfs/list.h> +#include <lustre_capa.h> + +#define NR_CAPAHASH 32 +#define CAPA_HASH_SIZE 3000 /* for MDS & OSS */ + +cfs_mem_cache_t *capa_cachep = NULL; + +#ifdef __KERNEL__ +/* lock for capa hash/capa_list/fo_capa_keys */ +spinlock_t capa_lock = SPIN_LOCK_UNLOCKED; + +struct list_head capa_list[CAPA_SITE_MAX]; +#endif +/* capa count */ +int capa_count[CAPA_SITE_MAX] = { 0, }; + +static struct capa_hmac_alg capa_hmac_algs[] = { + DEF_CAPA_HMAC_ALG("sha1", SHA1, 20, 20), +}; + +EXPORT_SYMBOL(capa_cachep); +EXPORT_SYMBOL(capa_list); +EXPORT_SYMBOL(capa_lock); +EXPORT_SYMBOL(capa_count); + +struct hlist_head *init_capa_hash(void) +{ + struct hlist_head *hash; + int nr_hash, i; + + OBD_ALLOC(hash, PAGE_SIZE); + if (!hash) + return NULL; + + nr_hash = PAGE_SIZE / sizeof(struct hlist_head); + LASSERT(nr_hash > NR_CAPAHASH); + + for (i = 0; i < NR_CAPAHASH; i++) + INIT_HLIST_HEAD(hash + i); + return hash; +} + +#ifdef __KERNEL__ +static inline int capa_on_server(struct obd_capa *ocapa) +{ + return ocapa->c_site == CAPA_SITE_SERVER; +} + +static inline void capa_delete(struct obd_capa *ocapa) +{ + LASSERT(capa_on_server(ocapa)); + hlist_del(&ocapa->u.tgt.c_hash); + list_del(&ocapa->c_list); + capa_count[ocapa->c_site]--; + free_capa(ocapa); +} + +void cleanup_capa_hash(struct hlist_head *hash) +{ + int i; + struct hlist_node *pos, *next; + struct obd_capa *oc; + + spin_lock(&capa_lock); + for (i = 0; i < NR_CAPAHASH; i++) { + hlist_for_each_entry_safe(oc, pos, next, hash + i, u.tgt.c_hash) + capa_delete(oc); + } + spin_unlock(&capa_lock); + + OBD_FREE(hash, PAGE_SIZE); +} + +static inline int const capa_hashfn(struct lu_fid *fid) +{ + return (fid_oid(fid) ^ fid_ver(fid)) * + (unsigned long)(fid_seq(fid) + 1) % NR_CAPAHASH; +} + +/* capa renewal time check is earlier than that on client, which is to prevent + * client renew right after obtaining it. */ +static inline int capa_is_to_expire(struct obd_capa *oc) +{ + return cfs_time_before(cfs_time_sub(oc->c_expiry, + cfs_time_seconds(oc->c_capa.lc_timeout)*2/3), + cfs_time_current()); +} + +static struct obd_capa *find_capa(struct lustre_capa *capa, + struct hlist_head *head, int alive) +{ + struct hlist_node *pos; + struct obd_capa *ocapa; + int len = alive ? offsetof(struct lustre_capa, lc_keyid):sizeof(*capa); + + hlist_for_each_entry(ocapa, pos, head, u.tgt.c_hash) { + if (memcmp(&ocapa->c_capa, capa, len)) + continue; + /* don't return one that will expire soon in this case */ + if (alive && capa_is_to_expire(ocapa)) + continue; + + LASSERT(capa_on_server(ocapa)); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found"); + return ocapa; + } + + return NULL; +} + +#define LRU_CAPA_DELETE_COUNT 12 +static inline void capa_delete_lru(struct list_head *head) +{ + struct obd_capa *ocapa; + struct list_head *node = head->next; + int count = 0; + + /* free LRU_CAPA_DELETE_COUNT unused capa from head */ + while (count++ < LRU_CAPA_DELETE_COUNT) { + ocapa = list_entry(node, struct obd_capa, c_list); + node = node->next; + if (atomic_read(&ocapa->c_refc)) + continue; + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free lru"); + capa_delete(ocapa); + } +} + +/* add or update */ +struct obd_capa *capa_add(struct hlist_head *hash, struct lustre_capa *capa) +{ + struct hlist_head *head = hash + capa_hashfn(&capa->lc_fid); + struct obd_capa *ocapa, *old = NULL; + struct list_head *list = &capa_list[CAPA_SITE_SERVER]; + + ocapa = alloc_capa(CAPA_SITE_SERVER); + if (!ocapa) + return NULL; + + spin_lock(&capa_lock); + old = find_capa(capa, head, 0); + if (!old) { + ocapa->c_capa = *capa; + set_capa_expiry(ocapa); + hlist_add_head(&ocapa->u.tgt.c_hash, head); + list_add_tail(&ocapa->c_list, list); + capa_count[CAPA_SITE_SERVER]++; + capa_get(ocapa); + + if (capa_count[CAPA_SITE_SERVER] > CAPA_HASH_SIZE) + capa_delete_lru(list); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "new"); + + spin_unlock(&capa_lock); + return ocapa; + } + + capa_get(old); + spin_unlock(&capa_lock); + + DEBUG_CAPA(D_SEC, &old->c_capa, "update"); + + free_capa(ocapa); + return old; +} + +struct obd_capa *capa_lookup(struct hlist_head *hash, struct lustre_capa *capa, + int alive) +{ + struct obd_capa *ocapa; + + spin_lock(&capa_lock); + ocapa = find_capa(capa, hash + capa_hashfn(&capa->lc_fid), alive); + if (ocapa) { + list_move_tail(&ocapa->c_list, &capa_list[CAPA_SITE_SERVER]); + capa_get(ocapa); + } + spin_unlock(&capa_lock); + + return ocapa; +} + +int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key) +{ + struct crypto_tfm *tfm; + struct capa_hmac_alg *alg; + int keylen; + struct scatterlist sl = { + .page = virt_to_page(capa), + .offset = (unsigned long)(capa) % PAGE_SIZE, + .length = offsetof(struct lustre_capa, lc_hmac), + }; + + if (capa_alg(capa) != CAPA_HMAC_ALG_SHA1) { + CERROR("unknown capability hmac algorithm!\n"); + return -EFAULT; + } + + alg = &capa_hmac_algs[capa_alg(capa)]; + + tfm = crypto_alloc_tfm(alg->ha_name, 0); + if (!tfm) { + CERROR("crypto_alloc_tfm failed, check whether your kernel" + "has crypto support!\n"); + return -ENOMEM; + } + keylen = alg->ha_keylen; + + crypto_hmac(tfm, key, &keylen, &sl, 1, hmac); + crypto_free_tfm(tfm); + + return 0; +} +#endif + +void capa_cpy(void *capa, struct obd_capa *ocapa) +{ + spin_lock(&ocapa->c_lock); + *(struct lustre_capa *)capa = ocapa->c_capa; + spin_unlock(&ocapa->c_lock); +} + +char *dump_capa_content(char *buf, char *key, int len) +{ + int i, n = 0; + + for (i = 0; i < len; i++) + n += sprintf(buf + n, "%02x", (unsigned char) key[i]); + return buf; +} + +EXPORT_SYMBOL(init_capa_hash); +EXPORT_SYMBOL(cleanup_capa_hash); + +EXPORT_SYMBOL(capa_add); +EXPORT_SYMBOL(capa_lookup); + +EXPORT_SYMBOL(capa_hmac); +EXPORT_SYMBOL(capa_cpy); + +EXPORT_SYMBOL(dump_capa_content); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 1684e728..1c0c4c7 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -39,9 +39,7 @@ #include <obd_class.h> #include <lustre_debug.h> #include <lprocfs_status.h> -#ifdef __KERNEL__ -#include <linux/lustre_build_version.h> -#endif +#include <lustre/lustre_build_version.h> #include <libcfs/list.h> #include "llog_internal.h" @@ -185,6 +183,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1); if (!err) err = class_process_config(lcfg); + OBD_FREE(lcfg, data->ioc_plen1); GOTO(out, err); } @@ -273,7 +272,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) if (!data->ioc_inlbuf1) { CERROR("No buffer passed in ioctl\n"); GOTO(out, err = -EINVAL); - } + } if (data->ioc_inllen1 < 128) { CERROR("ioctl buffer too small to hold version\n"); GOTO(out, err = -EINVAL); @@ -282,7 +281,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) obd = class_num2obd(index); if (!obd) GOTO(out, err = -ENOENT); - + if (obd->obd_stopping) status = "ST"; else if (obd->obd_set_up) @@ -290,7 +289,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) else if (obd->obd_attached) status = "AT"; else - status = "--"; + status = "--"; str = (char *)data->ioc_bulk; snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d", (int)index, status, obd->obd_type->typ_name, @@ -417,7 +416,9 @@ EXPORT_SYMBOL(lustre_uuid_to_peer); EXPORT_SYMBOL(class_handle_hash); EXPORT_SYMBOL(class_handle_unhash); +EXPORT_SYMBOL(class_handle_hash_back); EXPORT_SYMBOL(class_handle2object); +EXPORT_SYMBOL(class_handle_free_cb); /* obd_config.c */ EXPORT_SYMBOL(class_incref); @@ -435,6 +436,10 @@ EXPORT_SYMBOL(class_cleanup); EXPORT_SYMBOL(class_detach); EXPORT_SYMBOL(class_manual_cleanup); +/* mea.c */ +EXPORT_SYMBOL(mea_name2idx); +EXPORT_SYMBOL(raw_name2idx); + #define OBD_INIT_CHECK #ifdef OBD_INIT_CHECK int obd_init_checks(void) @@ -498,7 +503,7 @@ int obd_init_checks(void) ret = -EINVAL; } if ((u64val & ~CFS_PAGE_MASK) >= CFS_PAGE_SIZE) { - CWARN("mask failed: u64val "LPU64" >= %lu\n", u64val, + CWARN("mask failed: u64val "LPU64" >= %lu\n", u64val, CFS_PAGE_SIZE); ret = -EINVAL; } @@ -510,7 +515,6 @@ int obd_init_checks(void) #endif extern spinlock_t obd_types_lock; -extern spinlock_t handle_lock; extern int class_procfs_init(void); extern int class_procfs_clean(void); @@ -527,6 +531,9 @@ int init_obdclass(void) printk(KERN_INFO "Lustre: OBD class driver, info@clusterfs.com\n"); printk(KERN_INFO " Lustre Version: "LUSTRE_VERSION_STRING"\n"); printk(KERN_INFO " Build Version: "BUILD_VERSION"\n"); + + for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++) + INIT_LIST_HEAD(&capa_list[i]); #else CDEBUG(D_INFO, "Lustre: OBD class driver, info@clusterfs.com\n"); CDEBUG(D_INFO, " Lustre Version: "LUSTRE_VERSION_STRING"\n"); @@ -534,7 +541,6 @@ int init_obdclass(void) #endif spin_lock_init(&obd_types_lock); - spin_lock_init(&handle_lock); cfs_waitq_init(&obd_race_waitq); obd_zombie_impexp_init(); @@ -567,6 +573,9 @@ int init_obdclass(void) if (err) return err; #ifdef __KERNEL__ + err = lu_global_init(); + if (err) + return err; err = class_procfs_init(); if (err) return err; @@ -597,6 +606,7 @@ static void cleanup_obdclass(void) OBP(obd, detach)(obd); } } + lu_global_fini(); obd_cleanup_caches(); obd_sysctl_clean(); diff --git a/lustre/obdclass/darwin/.cvsignore b/lustre/obdclass/darwin/.cvsignore deleted file mode 100644 index 70845e0..0000000 --- a/lustre/obdclass/darwin/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -Makefile.in diff --git a/lustre/obdclass/darwin/darwin-sysctl.c b/lustre/obdclass/darwin/darwin-sysctl.c index b12156a..7f69593 100644 --- a/lustre/obdclass/darwin/darwin-sysctl.c +++ b/lustre/obdclass/darwin/darwin-sysctl.c @@ -6,7 +6,7 @@ #include <sys/proc.h> #include <sys/unistd.h> #include <mach/mach_types.h> -#include <linux/lustre_build_version.h> +#include <lustre/lustre_build_version.h> #define DEBUG_SUBSYSTEM S_CLASS @@ -64,7 +64,7 @@ static cfs_sysctl_table_t parent_table[] = { &sysctl__lustre_fail_loc, &sysctl__lustre_timeout, &sysctl__lustre_dump_on_timeout, - &sysctl__lustre_debug_peer_on_timeout, + &sysctl__lustre_debug_peer_on_timeout, &sysctl__lustre_upcall, &sysctl__lustre_memused, &sysctl__lustre_filter_sync_on_commit, diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c new file mode 100644 index 0000000..07ea54b --- /dev/null +++ b/lustre/obdclass/dt_object.c @@ -0,0 +1,214 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Dt Object. + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + * Generic functions from dt_object.h + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#include <obd.h> +#include <dt_object.h> +#include <libcfs/list.h> +/* fid_be_to_cpu() */ +#include <lustre_fid.h> + +/* no lock is necessary to protect the list, because call-backs + * are added during system startup. Please refer to "struct dt_device". + */ +void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb) +{ + list_add(&cb->dtc_linkage, &dev->dd_txn_callbacks); +} +EXPORT_SYMBOL(dt_txn_callback_add); + +void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb) +{ + list_del_init(&cb->dtc_linkage); +} +EXPORT_SYMBOL(dt_txn_callback_del); + +int dt_txn_hook_start(const struct lu_env *env, + struct dt_device *dev, struct txn_param *param) +{ + int result; + struct dt_txn_callback *cb; + + result = 0; + list_for_each_entry(cb, &dev->dd_txn_callbacks, dtc_linkage) { + if (cb->dtc_txn_start == NULL) + continue; + result = cb->dtc_txn_start(env, param, cb->dtc_cookie); + if (result < 0) + break; + } + return result; +} +EXPORT_SYMBOL(dt_txn_hook_start); + +int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn) +{ + struct dt_device *dev = txn->th_dev; + struct dt_txn_callback *cb; + int result; + + result = 0; + list_for_each_entry(cb, &dev->dd_txn_callbacks, dtc_linkage) { + if (cb->dtc_txn_stop == NULL) + continue; + result = cb->dtc_txn_stop(env, txn, cb->dtc_cookie); + if (result < 0) + break; + } + return result; +} +EXPORT_SYMBOL(dt_txn_hook_stop); + +int dt_txn_hook_commit(const struct lu_env *env, struct thandle *txn) +{ + struct dt_device *dev = txn->th_dev; + struct dt_txn_callback *cb; + int result; + + result = 0; + list_for_each_entry(cb, &dev->dd_txn_callbacks, dtc_linkage) { + if (cb->dtc_txn_commit == NULL) + continue; + result = cb->dtc_txn_commit(env, txn, cb->dtc_cookie); + if (result < 0) + break; + } + return result; +} +EXPORT_SYMBOL(dt_txn_hook_commit); + +int dt_device_init(struct dt_device *dev, struct lu_device_type *t) +{ + + CFS_INIT_LIST_HEAD(&dev->dd_txn_callbacks); + return lu_device_init(&dev->dd_lu_dev, t); +} +EXPORT_SYMBOL(dt_device_init); + +void dt_device_fini(struct dt_device *dev) +{ + lu_device_fini(&dev->dd_lu_dev); +} +EXPORT_SYMBOL(dt_device_fini); + +int dt_object_init(struct dt_object *obj, + struct lu_object_header *h, struct lu_device *d) + +{ + return lu_object_init(&obj->do_lu, h, d); +} +EXPORT_SYMBOL(dt_object_init); + +void dt_object_fini(struct dt_object *obj) +{ + lu_object_fini(&obj->do_lu); +} +EXPORT_SYMBOL(dt_object_fini); + +int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj) +{ + if (obj->do_index_ops == NULL) + obj->do_ops->do_index_try(env, obj, &dt_directory_features); + return obj->do_index_ops != NULL; +} +EXPORT_SYMBOL(dt_try_as_dir); + +extern struct lu_context_key lu_global_key; + +static int dt_lookup(const struct lu_env *env, struct dt_object *dir, + const char *name, struct lu_fid *fid) +{ + struct lu_fid_pack *pack = lu_context_key_get(&env->le_ctx, + &lu_global_key); + struct dt_rec *rec = (struct dt_rec *)pack; + const struct dt_key *key = (const struct dt_key *)name; + int result; + + if (dt_try_as_dir(env, dir)) { + result = dir->do_index_ops->dio_lookup(env, dir, rec, key, + BYPASS_CAPA); + fid_unpack(pack, fid); + } else + result = -ENOTDIR; + return result; +} + +static struct dt_object *dt_locate(const struct lu_env *env, + struct dt_device *dev, + const struct lu_fid *fid) +{ + struct lu_object *obj; + struct dt_object *dt; + + obj = lu_object_find(env, dev->dd_lu_dev.ld_site, fid); + if (!IS_ERR(obj)) { + obj = lu_object_locate(obj->lo_header, dev->dd_lu_dev.ld_type); + LASSERT(obj != NULL); + dt = container_of(obj, struct dt_object, do_lu); + } else + dt = (void *)obj; + return dt; +} + +struct dt_object *dt_store_open(const struct lu_env *env, + struct dt_device *dt, const char *name, + struct lu_fid *fid) +{ + int result; + + struct dt_object *root; + struct dt_object *child; + + result = dt->dd_ops->dt_root_get(env, dt, fid); + if (result == 0) { + root = dt_locate(env, dt, fid); + if (!IS_ERR(root)) { + result = dt_lookup(env, root, name, fid); + if (result == 0) + child = dt_locate(env, dt, fid); + else + child = ERR_PTR(result); + lu_object_put(env, &root->do_lu); + } else { + CERROR("No root\n"); + child = (void *)root; + } + } else + child = ERR_PTR(result); + return child; +} +EXPORT_SYMBOL(dt_store_open); + +const struct dt_index_features dt_directory_features; +EXPORT_SYMBOL(dt_directory_features); + diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 34b137c..c005804 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -58,7 +58,7 @@ static struct obd_device *obd_device_alloc(void) { struct obd_device *obd; - OBD_SLAB_ALLOC(obd, obd_device_cachep, SLAB_KERNEL, sizeof(*obd)); + OBD_SLAB_ALLOC(obd, obd_device_cachep, GFP_KERNEL, sizeof(*obd)); if (obd != NULL) { obd->obd_magic = OBD_DEVICE_MAGIC; } @@ -99,20 +99,19 @@ struct obd_type *class_get_type(const char *name) #ifdef CONFIG_KMOD if (!type) { const char *modname = name; - if (strcmp(modname, LUSTRE_MDT_NAME) == 0) - modname = LUSTRE_MDS_NAME; if (!request_module(modname)) { CDEBUG(D_INFO, "Loaded module '%s'\n", modname); type = class_search_type(name); } else { - LCONSOLE_ERROR("Can't load module '%s'\n", modname); + LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n", + modname); } } #endif if (type) { spin_lock(&type->obd_type_lock); type->typ_refcnt++; - try_module_get(type->typ_ops->o_owner); + try_module_get(type->typ_dt_ops->o_owner); spin_unlock(&type->obd_type_lock); } return type; @@ -123,18 +122,22 @@ void class_put_type(struct obd_type *type) LASSERT(type); spin_lock(&type->obd_type_lock); type->typ_refcnt--; - module_put(type->typ_ops->o_owner); + module_put(type->typ_dt_ops->o_owner); spin_unlock(&type->obd_type_lock); } -int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, - const char *name) +#define CLASS_MAX_NAME 1024 + +int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, + struct lprocfs_vars *vars, const char *name, + struct lu_device_type *ldt) { struct obd_type *type; int rc = 0; ENTRY; - LASSERT(strnlen(name, 1024) < 1024); /* sanity check */ + /* sanity check */ + LASSERT(strnlen(name, CLASS_MAX_NAME) < CLASS_MAX_NAME); if (class_search_type(name)) { CDEBUG(D_IOCTL, "Type %s already registered\n", name); @@ -146,12 +149,19 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, if (type == NULL) RETURN(rc); - OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops)); + OBD_ALLOC_PTR(type->typ_dt_ops); + OBD_ALLOC_PTR(type->typ_md_ops); OBD_ALLOC(type->typ_name, strlen(name) + 1); - if (type->typ_ops == NULL || type->typ_name == NULL) + + if (type->typ_dt_ops == NULL || + type->typ_md_ops == NULL || + type->typ_name == NULL) GOTO (failed, rc); - *(type->typ_ops) = *ops; + *(type->typ_dt_ops) = *dt_ops; + /* md_ops is optional */ + if (md_ops) + *(type->typ_md_ops) = *md_ops; strcpy(type->typ_name, name); spin_lock_init(&type->obd_type_lock); @@ -164,6 +174,12 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, GOTO (failed, rc); } #endif + if (ldt != NULL) { + type->typ_lu = ldt; + rc = ldt->ldt_ops->ldto_init(ldt); + if (rc != 0) + GOTO (failed, rc); + } spin_lock(&obd_types_lock); list_add(&type->typ_chain, &obd_types); @@ -174,8 +190,10 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, failed: if (type->typ_name != NULL) OBD_FREE(type->typ_name, strlen(name) + 1); - if (type->typ_ops != NULL) - OBD_FREE (type->typ_ops, sizeof (*type->typ_ops)); + if (type->typ_md_ops != NULL) + OBD_FREE_PTR(type->typ_md_ops); + if (type->typ_dt_ops != NULL) + OBD_FREE_PTR(type->typ_dt_ops); OBD_FREE(type, sizeof(*type)); RETURN(rc); } @@ -194,19 +212,26 @@ int class_unregister_type(const char *name) CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt); /* This is a bad situation, let's make the best of it */ /* Remove ops, but leave the name for debugging */ - OBD_FREE(type->typ_ops, sizeof(*type->typ_ops)); + OBD_FREE_PTR(type->typ_dt_ops); + OBD_FREE_PTR(type->typ_md_ops); RETURN(-EBUSY); } - if (type->typ_procroot) + if (type->typ_procroot) { lprocfs_remove(&type->typ_procroot); + } + + if (type->typ_lu) + type->typ_lu->ldt_ops->ldto_fini(type->typ_lu); spin_lock(&obd_types_lock); list_del(&type->typ_chain); spin_unlock(&obd_types_lock); OBD_FREE(type->typ_name, strlen(name) + 1); - if (type->typ_ops != NULL) - OBD_FREE(type->typ_ops, sizeof(*type->typ_ops)); + if (type->typ_dt_ops != NULL) + OBD_FREE_PTR(type->typ_dt_ops); + if (type->typ_md_ops != NULL) + OBD_FREE_PTR(type->typ_md_ops); OBD_FREE(type, sizeof(*type)); RETURN(0); } /* class_unregister_type */ @@ -224,7 +249,7 @@ struct obd_device *class_newdev(const char *type_name, const char *name) RETURN(ERR_PTR(-EINVAL)); } - type = class_get_type(type_name); + type = class_get_type(type_name); if (type == NULL){ CERROR("OBD: unknown type: %s\n", type_name); RETURN(ERR_PTR(-ENODEV)); @@ -240,7 +265,8 @@ struct obd_device *class_newdev(const char *type_name, const char *name) spin_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); - if (obd && obd->obd_name && (strcmp(name, obd->obd_name) == 0)){ + if (obd && obd->obd_name && + (strcmp(name, obd->obd_name) == 0)) { CERROR("Device %s already exists, won't add\n", name); if (result) { LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC, @@ -252,7 +278,7 @@ struct obd_device *class_newdev(const char *type_name, const char *name) obd_devs[result->obd_minor] = NULL; result->obd_name[0]='\0'; - } + } result = ERR_PTR(-EEXIST); break; } @@ -508,6 +534,11 @@ void obd_cleanup_caches(void) LASSERTF(rc == 0, "Cannot destory ll_import_cache\n"); import_cachep = NULL; } + if (capa_cachep) { + rc = cfs_mem_cache_destroy(capa_cachep); + LASSERTF(rc == 0, "Cannot destory capa_cache\n"); + capa_cachep = NULL; + } EXIT; } @@ -534,6 +565,12 @@ int obd_init_caches(void) if (!import_cachep) GOTO(out, -ENOMEM); + LASSERT(capa_cachep == NULL); + capa_cachep = cfs_mem_cache_create("capa_cache", + sizeof(struct obd_capa), 0, 0); + if (!capa_cachep) + GOTO(out, -ENOMEM); + RETURN(0); out: obd_cleanup_caches(); @@ -624,11 +661,12 @@ EXPORT_SYMBOL(__class_export_put); void class_export_destroy(struct obd_export *exp) { struct obd_device *obd = exp->exp_obd; + ENTRY; LASSERT (atomic_read(&exp->exp_refcount) == 0); - CDEBUG(D_IOCTL, "destroying export %p/%s\n", exp, - exp->exp_client_uuid.uuid); + CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp, + exp->exp_client_uuid.uuid, obd->obd_name); LASSERT(obd != NULL); @@ -637,11 +675,11 @@ void class_export_destroy(struct obd_export *exp) ptlrpc_put_connection_superhack(exp->exp_connection); LASSERT(list_empty(&exp->exp_outstanding_replies)); - LASSERT(list_empty(&exp->exp_handle.h_link)); obd_destroy_export(exp); - - OBD_FREE(exp, sizeof(*exp)); + + OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle); class_decref(obd); + EXIT; } /* Creates a new export, adds it to the hash table, and returns a @@ -759,7 +797,8 @@ void class_import_destroy(struct obd_import *import) { ENTRY; - CDEBUG(D_IOCTL, "destroying import %p\n", import); + CDEBUG(D_IOCTL, "destroying import %p for %s\n", import, + import->imp_obd->obd_name); LASSERT(atomic_read(&import->imp_refcount) == 0); @@ -775,10 +814,9 @@ void class_import_destroy(struct obd_import *import) OBD_FREE(imp_conn, sizeof(*imp_conn)); } - LASSERT(list_empty(&import->imp_handle.h_link)); + LASSERT(import->imp_sec == NULL); class_decref(import->imp_obd); - OBD_FREE(import, sizeof(*import)); - + OBD_FREE_RCU(import, sizeof(*import), &import->imp_handle); EXIT; } EXPORT_SYMBOL(class_import_put); @@ -826,7 +864,6 @@ void class_destroy_import(struct obd_import *import) spin_lock(&import->imp_lock); import->imp_generation++; spin_unlock(&import->imp_lock); - class_import_put(import); } EXPORT_SYMBOL(class_destroy_import); @@ -857,6 +894,37 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd, } EXPORT_SYMBOL(class_connect); +/* if export is involved in recovery then clean up related things */ +void class_export_recovery_cleanup(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + + spin_lock_bh(&obd->obd_processing_task_lock); + if (obd->obd_recovering && exp->exp_in_recovery) { + spin_lock(&exp->exp_lock); + exp->exp_in_recovery = 0; + spin_unlock(&exp->exp_lock); + obd->obd_connected_clients--; + /* each connected client is counted as recoverable */ + obd->obd_recoverable_clients--; + if (exp->exp_req_replay_needed) { + spin_lock(&exp->exp_lock); + exp->exp_req_replay_needed = 0; + spin_unlock(&exp->exp_lock); + LASSERT(atomic_read(&obd->obd_req_replay_clients)); + atomic_dec(&obd->obd_req_replay_clients); + } + if (exp->exp_lock_replay_needed) { + spin_lock(&exp->exp_lock); + exp->exp_lock_replay_needed = 0; + spin_unlock(&exp->exp_lock); + LASSERT(atomic_read(&obd->obd_lock_replay_clients)); + atomic_dec(&obd->obd_lock_replay_clients); + } + } + spin_unlock_bh(&obd->obd_processing_task_lock); +} + /* This function removes two references from the export: one for the * hash entry and one for the export pointer passed in. The export * pointer passed to this function is destroyed should not be used @@ -886,6 +954,7 @@ int class_disconnect(struct obd_export *export) CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", export->exp_handle.h_cookie); + class_export_recovery_cleanup(export); class_unlink_export(export); class_export_put(export); RETURN(0); @@ -959,17 +1028,23 @@ void class_disconnect_exports(struct obd_device *obd) list_add(&work_list, &obd->obd_exports); list_del_init(&obd->obd_exports); spin_unlock(&obd->obd_dev_lock); - - CDEBUG(D_HA, "OBD device %d (%p) has exports, " - "disconnecting them\n", obd->obd_minor, obd); - class_disconnect_export_list(&work_list, get_exp_flags_from_obd(obd)); + + if (!list_empty(&work_list)) { + CDEBUG(D_HA, "OBD device %d (%p) has exports, " + "disconnecting them\n", obd->obd_minor, obd); + class_disconnect_export_list(&work_list, + get_exp_flags_from_obd(obd)); + } else + CDEBUG(D_HA, "OBD device %d (%p) has no exports\n", + obd->obd_minor, obd); EXIT; } EXPORT_SYMBOL(class_disconnect_exports); /* Remove exports that have not completed recovery. */ -void class_disconnect_stale_exports(struct obd_device *obd) +int class_disconnect_stale_exports(struct obd_device *obd, + int (*test_export)(struct obd_export *)) { struct list_head work_list; struct list_head *pos, *n; @@ -981,18 +1056,28 @@ void class_disconnect_stale_exports(struct obd_device *obd) spin_lock(&obd->obd_dev_lock); list_for_each_safe(pos, n, &obd->obd_exports) { exp = list_entry(pos, struct obd_export, exp_obd_chain); - if (exp->exp_replay_needed) { - list_del(&exp->exp_obd_chain); - list_add(&exp->exp_obd_chain, &work_list); - cnt++; - } + if (test_export(exp)) + continue; + + list_del(&exp->exp_obd_chain); + list_add(&exp->exp_obd_chain, &work_list); + /* don't count self-export as client */ + if (obd_uuid_equals(&exp->exp_client_uuid, + &exp->exp_obd->obd_uuid)) + continue; + + cnt++; + CDEBUG(D_ERROR, "%s: disconnect stale client %s@%s\n", + obd->obd_name, exp->exp_client_uuid.uuid, + exp->exp_connection == NULL ? "<unknown>" : + libcfs_nid2str(exp->exp_connection->c_peer.nid)); } spin_unlock(&obd->obd_dev_lock); CDEBUG(D_ERROR, "%s: disconnecting %d stale clients\n", obd->obd_name, cnt); class_disconnect_export_list(&work_list, get_exp_flags_from_obd(obd)); - EXIT; + RETURN(cnt); } EXPORT_SYMBOL(class_disconnect_stale_exports); @@ -1174,13 +1259,13 @@ char *obd_export_nid2str(struct obd_export *exp) { if (exp->exp_connection != NULL) return libcfs_nid2str(exp->exp_connection->c_peer.nid); - + return "(no nid)"; } EXPORT_SYMBOL(obd_export_nid2str); #define EVICT_BATCH 32 -int obd_export_evict_by_nid(struct obd_device *obd, char *nid) +int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) { struct obd_export *doomed_exp[EVICT_BATCH] = { NULL }; struct list_head *p; @@ -1220,7 +1305,7 @@ search_again: } EXPORT_SYMBOL(obd_export_evict_by_nid); -int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid) +int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) { struct obd_export *doomed_exp = NULL; struct list_head *p; @@ -1260,6 +1345,7 @@ void obd_zombie_impexp_cull(void) { struct obd_import *import; struct obd_export *export; + ENTRY; do { spin_lock (&obd_zombie_impexp_lock); @@ -1289,6 +1375,7 @@ void obd_zombie_impexp_cull(void) class_export_destroy(export); } while (import != NULL || export != NULL); + EXIT; } EXPORT_SYMBOL(obd_zombie_impexp_cull); @@ -1298,3 +1385,4 @@ void obd_zombie_impexp_init(void) INIT_LIST_HEAD(&obd_zombie_exports); spin_lock_init(&obd_zombie_impexp_lock); } + diff --git a/lustre/obdclass/hash.c b/lustre/obdclass/hash.c new file mode 100644 index 0000000..4775939 --- /dev/null +++ b/lustre/obdclass/hash.c @@ -0,0 +1,241 @@ +/* + * linux/fs/ldiskfs/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This file is released under the GPL v2. + * + * This file may be redistributed under the terms of the GNU Public + * License. + */ + +#include <linux/fs.h> +#include <linux/jbd.h> +#include <linux/sched.h> +#include <linux/ldiskfs_fs.h> + +#define DELTA 0x9E3779B9 + +static void TEA_transform(__u32 buf[4], __u32 const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while(--n); + + buf[0] += b0; + buf[1] += b1; +} + +/* F, G and H are basic MD4 functions: selection, majority, parity */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +/* + * The generic round function. The application is so specific that + * we don't bother protecting all the arguments with parens, as is generally + * good macro practice, in favor of extra legibility. + * Rotation is separate from addition to prevent recomputation + */ +#define ROUND(f, a, b, c, d, x, s) \ + (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) +#define K1 0 +#define K2 013240474631UL +#define K3 015666365641UL + +/* + * Basic cut-down MD4 transform. Returns only 32 bits of result. + */ +static void halfMD4Transform (__u32 buf[4], __u32 const in[]) +{ + __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ + ROUND(F, a, b, c, d, in[0] + K1, 3); + ROUND(F, d, a, b, c, in[1] + K1, 7); + ROUND(F, c, d, a, b, in[2] + K1, 11); + ROUND(F, b, c, d, a, in[3] + K1, 19); + ROUND(F, a, b, c, d, in[4] + K1, 3); + ROUND(F, d, a, b, c, in[5] + K1, 7); + ROUND(F, c, d, a, b, in[6] + K1, 11); + ROUND(F, b, c, d, a, in[7] + K1, 19); + + /* Round 2 */ + ROUND(G, a, b, c, d, in[1] + K2, 3); + ROUND(G, d, a, b, c, in[3] + K2, 5); + ROUND(G, c, d, a, b, in[5] + K2, 9); + ROUND(G, b, c, d, a, in[7] + K2, 13); + ROUND(G, a, b, c, d, in[0] + K2, 3); + ROUND(G, d, a, b, c, in[2] + K2, 5); + ROUND(G, c, d, a, b, in[4] + K2, 9); + ROUND(G, b, c, d, a, in[6] + K2, 13); + + /* Round 3 */ + ROUND(H, a, b, c, d, in[3] + K3, 3); + ROUND(H, d, a, b, c, in[7] + K3, 9); + ROUND(H, c, d, a, b, in[2] + K3, 11); + ROUND(H, b, c, d, a, in[6] + K3, 15); + ROUND(H, a, b, c, d, in[1] + K3, 3); + ROUND(H, d, a, b, c, in[5] + K3, 9); + ROUND(H, c, d, a, b, in[0] + K3, 11); + ROUND(H, b, c, d, a, in[4] + K3, 15); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +#undef ROUND +#undef F +#undef G +#undef H +#undef K1 +#undef K2 +#undef K3 + +/* The old legacy hash */ +static __u32 dx_hack_hash (const char *name, int len) +{ + __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; + while (len--) { + __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); + + if (hash & 0x80000000) hash -= 0x7fffffff; + hash1 = hash0; + hash0 = hash; + } + return (hash0 << 1); +} + +static __u32 dx_r5_hash(const signed char *msg, int len) +{ + __u32 a = 0; + while (len--) { + a += *msg << 4; + a += *msg >> 4; + a *= 11; + msg++; + } + return a; +} + +static __u32 dx_same_hash(const signed char *msg, int len) +{ + return 0xcafebabeUL; +} + +static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) +{ + __u32 pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num*4) + len = num * 4; + for (i=0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +/* + * Returns the hash of a filename. If len is 0 and name is NULL, then + * this function can be used to test whether or not a hash version is + * supported. + * + * The seed is an 4 longword (32 bits) "secret" which can be used to + * uniquify a hash. If the seed is all zero's, then some default seed + * may be used. + * + * A particular hash version specifies whether or not the seed is + * represented, and whether or not the returned hash is 32 bits or 64 + * bits. 32 bit hashes will return 0 for the minor hash. + */ +int ldiskfsfs_dirhash(const char *name, int len, struct ldiskfs_dx_hash_info *hinfo) +{ + __u32 hash; + __u32 minor_hash = 0; + const char *p; + int i; + __u32 in[8], buf[4]; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + /* Check to see if the seed is all zero's */ + if (hinfo->seed) { + for (i=0; i < 4; i++) { + if (hinfo->seed[i]) + break; + } + if (i < 4) + memcpy(buf, hinfo->seed, sizeof(buf)); + } + + switch (hinfo->hash_version) { + case LDISKFS_DX_HASH_LEGACY: + hash = dx_hack_hash(name, len); + break; + case LDISKFS_DX_HASH_HALF_MD4: + p = name; + while (len > 0) { + str2hashbuf(p, len, in, 8); + halfMD4Transform(buf, in); + len -= 32; + p += 32; + } + minor_hash = buf[2]; + hash = buf[1]; + break; + case LDISKFS_DX_HASH_TEA: + p = name; + while (len > 0) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + len -= 16; + p += 16; + } + hash = buf[0]; + minor_hash = buf[1]; + break; + case LDISKFS_DX_HASH_R5: + hash = dx_r5_hash(name, len); + break; + case LDISKFS_DX_HASH_SAME: + hash = dx_same_hash(name, len); + break; + default: + hinfo->hash = 0; + return -1; + } + hash = hash & ~1; + if (hash == (LDISKFS_HTREE_EOF << 1)) + hash = (LDISKFS_HTREE_EOF-1) << 1; + hinfo->hash = hash; + hinfo->minor_hash = minor_hash; + return 0; +} diff --git a/lustre/obdclass/linux/.cvsignore b/lustre/obdclass/linux/.cvsignore deleted file mode 100644 index 13e3fc4..0000000 --- a/lustre/obdclass/linux/.cvsignore +++ /dev/null @@ -1,5 +0,0 @@ -Makefile -Makefile.in -.linux-module.o.cmd -.linux-obdo.o.cmd -.linux-sysctl.o.cmd diff --git a/lustre/obdclass/linux/linux-module.c b/lustre/obdclass/linux/linux-module.c index 3c8b2b4..57da966 100644 --- a/lustre/obdclass/linux/linux-module.c +++ b/lustre/obdclass/linux/linux-module.c @@ -66,8 +66,8 @@ #include <obd_class.h> #include <lprocfs_status.h> #include <lustre_ver.h> +#include <lustre/lustre_build_version.h> #ifdef __KERNEL__ -#include <linux/lustre_build_version.h> #include <linux/lustre_version.h> int proc_version; @@ -323,7 +323,7 @@ struct lprocfs_vars lprocfs_base[] = { #endif /* LPROCFS */ #ifdef __KERNEL__ -static void *obd_device_list_seq_start(struct seq_file *p, loff_t*pos) +static void *obd_device_list_seq_start(struct seq_file *p, loff_t *pos) { if (*pos >= class_devno_max()) return NULL; @@ -346,7 +346,7 @@ static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos) static int obd_device_list_seq_show(struct seq_file *p, void *v) { - int index = *(int*)v; + int index = *(loff_t *)v; struct obd_device *obd = class_num2obd(index); char *status; @@ -431,8 +431,9 @@ int class_procfs_init(void) int class_procfs_clean(void) { ENTRY; - if (proc_lustre_root) + if (proc_lustre_root) { lprocfs_remove(&proc_lustre_root); + } RETURN(0); } diff --git a/lustre/obdclass/linux/linux-obdo.c b/lustre/obdclass/linux/linux-obdo.c index d7f37dc..d6bc3c3 100644 --- a/lustre/obdclass/linux/linux-obdo.c +++ b/lustre/obdclass/linux/linux-obdo.c @@ -44,89 +44,6 @@ #include <linux/fs.h> #include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */ -void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid) -{ - if (ia_valid & ATTR_ATIME) { - oa->o_atime = LTIME_S(attr->ia_atime); - oa->o_valid |= OBD_MD_FLATIME; - } - if (ia_valid & ATTR_MTIME) { - oa->o_mtime = LTIME_S(attr->ia_mtime); - oa->o_valid |= OBD_MD_FLMTIME; - } - if (ia_valid & ATTR_CTIME) { - oa->o_ctime = LTIME_S(attr->ia_ctime); - oa->o_valid |= OBD_MD_FLCTIME; - } - if (ia_valid & ATTR_SIZE) { - oa->o_size = attr->ia_size; - oa->o_valid |= OBD_MD_FLSIZE; - } - if (ia_valid & ATTR_MODE) { - oa->o_mode = attr->ia_mode; - oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - oa->o_mode &= ~S_ISGID; - } - if (ia_valid & ATTR_UID) { - oa->o_uid = attr->ia_uid; - oa->o_valid |= OBD_MD_FLUID; - } - if (ia_valid & ATTR_GID) { - oa->o_gid = attr->ia_gid; - oa->o_valid |= OBD_MD_FLGID; - } -} -EXPORT_SYMBOL(obdo_from_iattr); - -void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid) -{ - valid &= oa->o_valid; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n", - oa->o_valid, oa->o_mtime, oa->o_ctime); - - attr->ia_valid = 0; - if (valid & OBD_MD_FLATIME) { - LTIME_S(attr->ia_atime) = oa->o_atime; - attr->ia_valid |= ATTR_ATIME; - } - if (valid & OBD_MD_FLMTIME) { - LTIME_S(attr->ia_mtime) = oa->o_mtime; - attr->ia_valid |= ATTR_MTIME; - } - if (valid & OBD_MD_FLCTIME) { - LTIME_S(attr->ia_ctime) = oa->o_ctime; - attr->ia_valid |= ATTR_CTIME; - } - if (valid & OBD_MD_FLSIZE) { - attr->ia_size = oa->o_size; - attr->ia_valid |= ATTR_SIZE; - } -#if 0 /* you shouldn't be able to change a file's type with setattr */ - if (valid & OBD_MD_FLTYPE) { - attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT); - attr->ia_valid |= ATTR_MODE; - } -#endif - if (valid & OBD_MD_FLMODE) { - attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT); - attr->ia_valid |= ATTR_MODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - attr->ia_mode &= ~S_ISGID; - } - if (valid & OBD_MD_FLUID) { - attr->ia_uid = oa->o_uid; - attr->ia_valid |= ATTR_UID; - } - if (valid & OBD_MD_FLGID) { - attr->ia_gid = oa->o_gid; - attr->ia_valid |= ATTR_GID; - } -} -EXPORT_SYMBOL(iattr_from_obdo); - /* WARNING: the file systems must take care not to tinker with attributes they don't manage (such as blocks). */ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) @@ -159,7 +76,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) newvalid |= OBD_MD_FLBLOCKS; } if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */ - dst->o_blksize = 1<<src->i_blkbits; + dst->o_blksize = 1 << src->i_blkbits; newvalid |= OBD_MD_FLBLKSZ; } if (valid & OBD_MD_FLTYPE) { @@ -195,6 +112,51 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) } EXPORT_SYMBOL(obdo_from_inode); +/*FIXME: Just copy from obdo_from_inode*/ +void obdo_from_la(struct obdo *dst, struct lu_attr *la, obd_flag valid) +{ + obd_flag newvalid = 0; + + if (valid & OBD_MD_FLATIME) { + dst->o_atime = la->la_atime; + newvalid |= OBD_MD_FLATIME; + } + if (valid & OBD_MD_FLMTIME) { + dst->o_mtime = la->la_mtime; + newvalid |= OBD_MD_FLMTIME; + } + if (valid & OBD_MD_FLCTIME) { + dst->o_ctime = la->la_ctime; + newvalid |= OBD_MD_FLCTIME; + } + if (valid & OBD_MD_FLSIZE) { + dst->o_size = la->la_size; + newvalid |= OBD_MD_FLSIZE; + } + if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */ + dst->o_blocks = la->la_blocks; + newvalid |= OBD_MD_FLBLOCKS; + } + if (valid & OBD_MD_FLTYPE) { + dst->o_mode = (dst->o_mode & S_IALLUGO)|(la->la_mode & S_IFMT); + newvalid |= OBD_MD_FLTYPE; + } + if (valid & OBD_MD_FLMODE) { + dst->o_mode = (dst->o_mode & S_IFMT)|(la->la_mode & S_IALLUGO); + newvalid |= OBD_MD_FLMODE; + } + if (valid & OBD_MD_FLUID) { + dst->o_uid = la->la_uid; + newvalid |= OBD_MD_FLUID; + } + if (valid & OBD_MD_FLGID) { + dst->o_gid = la->la_gid; + newvalid |= OBD_MD_FLGID; + } + dst->o_valid |= newvalid; +} +EXPORT_SYMBOL(obdo_from_la); + void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid) { valid &= src->o_valid; @@ -220,10 +182,10 @@ void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid) if (valid & OBD_MD_FLSIZE) dst->i_size = src->o_size; /* optimum IO size */ - if (valid & OBD_MD_FLBLKSZ && src->o_blksize > (1<<dst->i_blkbits)) { - dst->i_blkbits = ffs(src->o_blksize)-1; + if (valid & OBD_MD_FLBLKSZ && src->o_blksize > (1 << dst->i_blkbits)) { + dst->i_blkbits = ffs(src->o_blksize) - 1; #ifdef HAVE_INODE_BLKSIZE - dst->i_blksize = src->o_blksize; + dst->i_blkbits = src->o_blksize; #endif } @@ -285,4 +247,3 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) } EXPORT_SYMBOL(obdo_to_inode); #endif - diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index f8e4800..fcbd13a 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -110,7 +110,7 @@ void obd_sysctl_init (void) { #ifdef CONFIG_SYSCTL if ( !obd_table_header ) - obd_table_header = register_sysctl_table(parent_table, 0); + obd_table_header = cfs_register_sysctl_table(parent_table, 0); #endif } @@ -118,7 +118,7 @@ void obd_sysctl_clean (void) { #ifdef CONFIG_SYSCTL if ( obd_table_header ) - unregister_sysctl_table(obd_table_header); + cfs_unregister_sysctl_table(obd_table_header); obd_table_header = NULL; #endif } diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 217b3f8..b83a74c 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -246,6 +246,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, index, last_index); /* get the buf with our target record; avoid old garbage */ + memset(buf, 0, LLOG_CHUNK_SIZE); last_offset = cur_offset; rc = llog_next_block(loghandle, &saved_index, index, &cur_offset, buf, LLOG_CHUNK_SIZE); @@ -267,7 +268,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n", rec->lrh_type, rec->lrh_index); - + if (rec->lrh_index == 0) GOTO(out, 0); /* no more records */ @@ -284,7 +285,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, continue; } - CDEBUG(D_OTHER, + CDEBUG(D_OTHER, "lrh_index: %d lrh_len: %d (%d remains)\n", rec->lrh_index, rec->lrh_len, (int)(buf + LLOG_CHUNK_SIZE - (char *)rec)); diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index 9fe257a..a757e68 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -65,29 +65,32 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) if (llh->llh_cat_idx == index) { CERROR("no free catalog slots for log...\n"); RETURN(ERR_PTR(-ENOSPC)); - } else { - if (index == 0) - index = 1; - if (ext2_set_bit(index, llh->llh_bitmap)) { - CERROR("argh, index %u already set in log bitmap?\n", - index); - LBUG(); /* should never happen */ - } - cathandle->lgh_last_idx = index; - llh->llh_count++; - llh->llh_tail.lrt_index = index; } + if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_LLOG_CREATE_FAILED)) + RETURN(ERR_PTR(-ENOSPC)); + rc = llog_create(cathandle->lgh_ctxt, &loghandle, NULL, NULL); - if (rc) + if (rc) RETURN(ERR_PTR(rc)); - + rc = llog_init_handle(loghandle, LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY, &cathandle->lgh_hdr->llh_tgtuuid); if (rc) GOTO(out_destroy, rc); + if (index == 0) + index = 1; + if (ext2_set_bit(index, llh->llh_bitmap)) { + CERROR("argh, index %u already set in log bitmap?\n", + index); + LBUG(); /* should never happen */ + } + cathandle->lgh_last_idx = index; + llh->llh_count++; + llh->llh_tail.lrt_index = index; + CDEBUG(D_HA, "new recovery log "LPX64":%x for index %u of catalog " LPX64"\n", loghandle->lgh_id.lgl_oid, loghandle->lgh_id.lgl_ogen, index, cathandle->lgh_id.lgl_oid); diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 53260c5..6e0309b 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -89,6 +89,7 @@ static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file, struct llog_rec_tail end; loff_t saved_off = file->f_pos; int buflen = rec->lrh_len; + ENTRY; file->f_pos = off; @@ -231,7 +232,7 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, if (buf) /* write_blob adds header and tail to lrh_len. */ reclen = sizeof(*rec) + rec->lrh_len + - sizeof(struct llog_rec_tail); + sizeof(struct llog_rec_tail); if (idx != -1) { loff_t saved_offset; @@ -241,7 +242,7 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, CERROR("idx != -1 in empty log\n"); LBUG(); } - + if (idx && llh->llh_size && llh->llh_size != rec->lrh_len) RETURN(-EINVAL); @@ -421,7 +422,7 @@ static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx, /* put number of bytes read into rc to make code simpler */ rc = ppos - *cur_offset; *cur_offset = ppos; - + if (rc < len) { /* signal the end of the valid buffer to llog_process */ memset(buf + rc, 0, len - rc); @@ -638,7 +639,7 @@ static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, handle->lgh_id.lgl_ogen = handle->lgh_file->f_dentry->d_inode->i_generation; } else { - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) GOTO(cleanup, rc = -ENOMEM); @@ -668,7 +669,7 @@ static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, handle->lgh_ctxt = ctxt; finish: if (oa) - obdo_free(oa); + OBDO_FREE(oa); RETURN(rc); cleanup: switch (cleanup_phase) { @@ -727,7 +728,7 @@ static int llog_lvfs_destroy(struct llog_handle *handle) RETURN(rc); } - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) RETURN(-ENOMEM); @@ -742,7 +743,7 @@ static int llog_lvfs_destroy(struct llog_handle *handle) rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL); out: - obdo_free(oa); + OBDO_FREE(oa); RETURN(rc); } diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 6cf90d1..8508670 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -51,8 +51,8 @@ int llog_cleanup(struct llog_ctxt *ctxt) if (CTXTP(ctxt, cleanup)) rc = CTXTP(ctxt, cleanup)(ctxt); - ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL; + if (ctxt->loc_exp) class_export_put(ctxt->loc_exp); OBD_FREE(ctxt, sizeof(*ctxt)); @@ -61,8 +61,9 @@ int llog_cleanup(struct llog_ctxt *ctxt) } EXPORT_SYMBOL(llog_cleanup); -int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, - int count, struct llog_logid *logid, struct llog_operations *op) +int llog_setup(struct obd_device *obd, struct obd_llogs *llogs, int index, + struct obd_device *disk_obd, int count, struct llog_logid *logid, + struct llog_operations *op) { int rc = 0; struct llog_ctxt *ctxt; @@ -71,7 +72,10 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, if (index < 0 || index >= LLOG_MAX_CTXTS) RETURN(-EFAULT); - if (obd->obd_llog_ctxt[index]) { + /* in some recovery cases, obd_llog_ctxt might already be set, + * but llogs might still be zero, for example in obd_filter recovery */ + if (obd->obd_llog_ctxt[index] && + (!llogs || (llogs && llogs->llog_ctxt[index]))) { /* mds_lov_update_mds might call here multiple times. So if the llog is already set up then don't to do it again. */ CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n", @@ -82,12 +86,17 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, LASSERT(ctxt->loc_logops == op); GOTO(out, rc = 0); } - + OBD_ALLOC(ctxt, sizeof(*ctxt)); if (!ctxt) GOTO(out, rc = -ENOMEM); - obd->obd_llog_ctxt[index] = ctxt; + if (llogs) + llogs->llog_ctxt[index] = ctxt; + + if (!obd->obd_llog_ctxt[index]) + obd->obd_llog_ctxt[index] = ctxt; + ctxt->loc_obd = obd; ctxt->loc_exp = class_export_get(disk_obd->obd_self_export); ctxt->loc_idx = index; @@ -95,14 +104,14 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, sema_init(&ctxt->loc_sem, 1); if (op->lop_setup) - rc = op->lop_setup(obd, index, disk_obd, count, logid); - + rc = op->lop_setup(obd, llogs, index, disk_obd, count, logid); + if (rc) { obd->obd_llog_ctxt[index] = NULL; class_export_put(ctxt->loc_exp); OBD_FREE(ctxt, sizeof(*ctxt)); } - + out: RETURN(rc); } @@ -136,6 +145,7 @@ int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, } CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP); + rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies); RETURN(rc); } @@ -197,10 +207,10 @@ static int cat_cancel_cb(struct llog_handle *cathandle, llog_cat_set_first_idx(cathandle, index); rc = llog_cancel_rec(cathandle, index); if (rc == 0) - CDEBUG(D_HA, "cancel log "LPX64":%x at index %u of " - "catalog "LPX64"\n", lir->lid_id.lgl_oid, - lir->lid_id.lgl_ogen, rec->lrh_index, - cathandle->lgh_id.lgl_oid); + CDEBUG(D_HA, "cancel log "LPX64":%x at index %u of catalog " + LPX64"\n", lir->lid_id.lgl_oid, + lir->lid_id.lgl_ogen, rec->lrh_index, + cathandle->lgh_id.lgl_oid); } RETURN(rc); @@ -208,8 +218,8 @@ static int cat_cancel_cb(struct llog_handle *cathandle, /* lop_setup method for filter/osc */ // XXX how to set exports -int llog_obd_origin_setup(struct obd_device *obd, int index, - struct obd_device *disk_obd, int count, +int llog_obd_origin_setup(struct obd_device *obd, struct obd_llogs *llogs, + int index, struct obd_device *disk_obd, int count, struct llog_logid *logid) { struct llog_ctxt *ctxt; @@ -223,7 +233,11 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, LASSERT(count == 1); - ctxt = llog_get_context(obd, index); + if (!llogs) + ctxt = llog_get_context(obd, index); + else + ctxt = llog_get_context_from_llogs(llogs, index); + LASSERT(ctxt); llog_gen_init(ctxt); @@ -312,8 +326,8 @@ int llog_obd_origin_add(struct llog_ctxt *ctxt, } EXPORT_SYMBOL(llog_obd_origin_add); -int llog_cat_initialize(struct obd_device *obd, int count, - struct obd_uuid *uuid) +int llog_cat_initialize(struct obd_device *obd, struct obd_llogs *llogs, + int count, struct obd_uuid *uuid) { char name[32] = CATLIST; struct llog_catid *idarray; @@ -331,7 +345,7 @@ int llog_cat_initialize(struct obd_device *obd, int count, GOTO(out, rc); } - rc = obd_llog_init(obd, obd, count, idarray, uuid); + rc = obd_llog_init(obd, llogs, obd, count, idarray, uuid); if (rc) { CERROR("rc: %d\n", rc); GOTO(out, rc); @@ -349,15 +363,17 @@ int llog_cat_initialize(struct obd_device *obd, int count, } EXPORT_SYMBOL(llog_cat_initialize); -int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, - int count, struct llog_catid *logid, struct obd_uuid *uuid) +int obd_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *disk_obd, int count, + struct llog_catid *logid, struct obd_uuid *uuid) { int rc; ENTRY; - OBD_CHECK_OP(obd, llog_init, 0); + OBD_CHECK_DT_OP(obd, llog_init, 0); OBD_COUNTER_INCREMENT(obd, llog_init); - rc = OBP(obd, llog_init)(obd, disk_obd, count, logid, uuid); + rc = OBP(obd, llog_init)(obd, llogs, disk_obd, count, logid, + uuid); RETURN(rc); } EXPORT_SYMBOL(obd_llog_init); @@ -366,10 +382,11 @@ int obd_llog_finish(struct obd_device *obd, int count) { int rc; ENTRY; - OBD_CHECK_OP(obd, llog_finish, 0); + OBD_CHECK_DT_OP(obd, llog_finish, 0); OBD_COUNTER_INCREMENT(obd, llog_finish); rc = OBP(obd, llog_finish)(obd, count); RETURN(rc); } EXPORT_SYMBOL(obd_llog_finish); + diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index 4f45df0..47d1ddd 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -85,6 +85,21 @@ void lustre_swab_ll_fid(struct ll_fid *fid) } EXPORT_SYMBOL(lustre_swab_ll_fid); +void lustre_swab_lu_fid(struct lu_fid *fid) +{ + __swab64s (&fid->f_seq); + __swab32s (&fid->f_oid); + __swab32s (&fid->f_ver); +} +EXPORT_SYMBOL(lustre_swab_lu_fid); + +void lustre_swab_lu_range(struct lu_range *range) +{ + __swab64s (&range->lr_start); + __swab64s (&range->lr_end); +} +EXPORT_SYMBOL(lustre_swab_lu_range); + void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) { __swab32s(&rec->lrh_len); @@ -97,7 +112,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) (struct llog_size_change_rec *)rec; lustre_swab_ll_fid(&lsc->lsc_fid); - __swab32s(&lsc->lsc_io_epoch); + __swab32s(&lsc->lsc_ioepoch); break; } diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index d991f51..a70eed2 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -461,7 +461,8 @@ static int llog_test_6(struct obd_device *obd, char *name) RETURN(-ENOENT); } - rc = obd_connect(&exph, mdc_obd, &uuid, NULL /* obd_connect_data */); + rc = obd_connect(NULL, + &exph, mdc_obd, &uuid, NULL /* obd_connect_data */); if (rc) { CERROR("6: failed to connect to MDC: %s\n", mdc_obd->obd_name); RETURN(rc); @@ -529,10 +530,10 @@ static int llog_test_7(struct obd_device *obd) } rc = llog_destroy(llh); - if (rc) + if (rc) CERROR("7: llog_destroy failed: %d\n", rc); else - llog_free_handle(llh); + llog_free_handle(llh); RETURN(rc); } @@ -596,14 +597,15 @@ static int llog_run_tests(struct obd_device *obd) } -static int llog_test_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid, - struct obd_uuid *uuid) +static int llog_test_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, + struct llog_catid *logid, struct obd_uuid *uuid) { int rc; ENTRY; - rc = llog_setup(obd, LLOG_TEST_ORIG_CTXT, tgt, 0, NULL, &llog_lvfs_ops); + rc = llog_setup(obd, llogs, LLOG_TEST_ORIG_CTXT, tgt, 0, NULL, + &llog_lvfs_ops); RETURN(rc); } @@ -627,10 +629,9 @@ static int llog_test_cleanup(struct obd_device *obd) return rc; } -static int llog_test_setup(struct obd_device *obd, obd_count len, void *buf) +static int llog_test_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct lprocfs_static_vars lvars; - struct lustre_cfg *lcfg = buf; struct obd_device *tgt; int rc; ENTRY; @@ -652,7 +653,7 @@ static int llog_test_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(-EINVAL); } - rc = obd_llog_init(obd, tgt, 0, NULL, NULL); + rc = obd_llog_init(obd, NULL, tgt, 0, NULL, NULL); if (rc) RETURN(rc); @@ -687,7 +688,8 @@ static int __init llog_test_init(void) struct lprocfs_static_vars lvars; lprocfs_init_vars(llog_test, &lvars); - return class_register_type(&llog_obd_ops,lvars.module_vars,"llog_test"); + return class_register_type(&llog_obd_ops, NULL, + lvars.module_vars,"llog_test", NULL); } static void __exit llog_test_exit(void) diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index af5b102..6f9d903 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -114,14 +114,14 @@ int lprocfs_add_simple(struct proc_dir_entry *root, char *name, return 0; } - -static ssize_t lprocfs_fops_read(struct file *f, char __user *buf, size_t size, loff_t *ppos) +static ssize_t lprocfs_fops_read(struct file *f, char __user *buf, + size_t size, loff_t *ppos) { struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode); char *page, *start = NULL; int rc = 0, eof = 1, count; - if (*ppos >= PAGE_SIZE) + if (*ppos >= CFS_PAGE_SIZE) return 0; page = (char *)__get_free_page(GFP_KERNEL); @@ -131,7 +131,7 @@ static ssize_t lprocfs_fops_read(struct file *f, char __user *buf, size_t size, LPROCFS_ENTRY(); OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10); if (!dp->deleted && dp->read_proc) - rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE, + rc = dp->read_proc(page, &start, *ppos, CFS_PAGE_SIZE, &eof, dp->data); LPROCFS_EXIT(); if (rc <= 0) @@ -181,7 +181,6 @@ static struct file_operations lprocfs_generic_fops = { .write = lprocfs_fops_write, }; - int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, void *data) { @@ -465,8 +464,8 @@ int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count, imp_state_name = ptlrpc_import_state_name(imp->imp_state); *eof = 1; rc = snprintf(page, count, "%s\t%s%s\n", - obd2cli_tgt(obd), imp_state_name, - imp->imp_deactive ? "\tDEACTIVATED" : ""); + obd2cli_tgt(obd), imp_state_name, + imp->imp_deactive ? "\tDEACTIVATED" : ""); LPROCFS_CLIMP_EXIT(obd); return rc; @@ -479,12 +478,18 @@ int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, struct ptlrpc_connection *conn; int rc = 0; - LASSERT(obd != NULL); + LASSERT(obd != NULL); + LPROCFS_CLIMP_CHECK(obd); conn = obd->u.cli.cl_import->imp_connection; LASSERT(conn != NULL); *eof = 1; - rc = snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid); + if (obd->u.cli.cl_import) { + rc = snprintf(page, count, "%s\n", + conn->c_remote_uuid.uuid); + } else { + rc = snprintf(page, count, "%s\n", "<none>"); + } LPROCFS_CLIMP_EXIT(obd); return rc; @@ -500,19 +505,21 @@ static const char *obd_connect_names[] = { "request_portal", "acl", "xattr", - "create_on_write", + "real_conn", "truncate_lock", - "initial_transno", + "obsoleted", "inode_bit_locks", "join_file", "getattr_by_fid", "no_oh_for_devices", - "local_1.8_client", - "remote_1.8_client", + "local_client", + "remote_client", "max_byte_per_rpc", "64bit_qdata", - "fid_capability", + "mds_capability", "oss_capability", + "mds_mds_connection", + "size_on_mds", NULL }; @@ -626,8 +633,8 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num) void lprocfs_free_stats(struct lprocfs_stats **statsh) { struct lprocfs_stats *stats = *statsh; - - if (!stats || (stats->ls_num == 0)) + + if (stats == NULL || stats->ls_num == 0) return; *statsh = NULL; @@ -765,7 +772,6 @@ static int lprocfs_stats_seq_open(struct inode *inode, struct file *file) LPROCFS_EXIT(); return rc; } - seq = file->private_data; seq->private = dp->data; return 0; @@ -838,6 +844,10 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect); LPROCFS_OBD_OP_INIT(num_private_stats, stats, reconnect); LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, fid_init); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, fid_fini); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, fid_alloc); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, fid_delete); LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs); LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs_async); LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd); @@ -877,6 +887,7 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export); LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc); LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_init); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_connect); LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_finish); LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin); LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin); @@ -898,7 +909,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LASSERT(obd->obd_proc_entry != NULL); LASSERT(obd->obd_cntr_base == 0); - num_stats = ((int)sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) + + num_stats = ((int)sizeof(*obd->obd_type->typ_dt_ops) / sizeof(void *)) + num_private_stats - 1 /* o_owner */; stats = lprocfs_alloc_stats(num_stats); if (stats == NULL) @@ -932,6 +943,87 @@ void lprocfs_free_obd_stats(struct obd_device *obd) lprocfs_free_stats(&obd->obd_stats); } +#define LPROCFS_MD_OP_INIT(base, stats, op) \ +do { \ + unsigned int coffset = base + MD_COUNTER_OFFSET(op); \ + LASSERT(coffset < stats->ls_num); \ + lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \ +} while (0) + +int lprocfs_alloc_md_stats(struct obd_device *obd, + unsigned num_private_stats) +{ + struct lprocfs_stats *stats; + unsigned int num_stats; + int rc, i; + + LASSERT(obd->md_stats == NULL); + LASSERT(obd->obd_proc_entry != NULL); + LASSERT(obd->md_cntr_base == 0); + + num_stats = 1 + MD_COUNTER_OFFSET(get_remote_perm) + + num_private_stats; + stats = lprocfs_alloc_stats(num_stats); + if (stats == NULL) + return -ENOMEM; + + LPROCFS_MD_OP_INIT(num_private_stats, stats, getstatus); + LPROCFS_MD_OP_INIT(num_private_stats, stats, change_cbdata); + LPROCFS_MD_OP_INIT(num_private_stats, stats, close); + LPROCFS_MD_OP_INIT(num_private_stats, stats, create); + LPROCFS_MD_OP_INIT(num_private_stats, stats, done_writing); + LPROCFS_MD_OP_INIT(num_private_stats, stats, enqueue); + LPROCFS_MD_OP_INIT(num_private_stats, stats, getattr); + LPROCFS_MD_OP_INIT(num_private_stats, stats, getattr_name); + LPROCFS_MD_OP_INIT(num_private_stats, stats, intent_lock); + LPROCFS_MD_OP_INIT(num_private_stats, stats, link); + LPROCFS_MD_OP_INIT(num_private_stats, stats, rename); + LPROCFS_MD_OP_INIT(num_private_stats, stats, is_subdir); + LPROCFS_MD_OP_INIT(num_private_stats, stats, setattr); + LPROCFS_MD_OP_INIT(num_private_stats, stats, sync); + LPROCFS_MD_OP_INIT(num_private_stats, stats, readpage); + LPROCFS_MD_OP_INIT(num_private_stats, stats, unlink); + LPROCFS_MD_OP_INIT(num_private_stats, stats, setxattr); + LPROCFS_MD_OP_INIT(num_private_stats, stats, getxattr); + LPROCFS_MD_OP_INIT(num_private_stats, stats, init_ea_size); + LPROCFS_MD_OP_INIT(num_private_stats, stats, get_lustre_md); + LPROCFS_MD_OP_INIT(num_private_stats, stats, free_lustre_md); + LPROCFS_MD_OP_INIT(num_private_stats, stats, set_open_replay_data); + LPROCFS_MD_OP_INIT(num_private_stats, stats, clear_open_replay_data); + LPROCFS_MD_OP_INIT(num_private_stats, stats, set_lock_data); + LPROCFS_MD_OP_INIT(num_private_stats, stats, lock_match); + LPROCFS_MD_OP_INIT(num_private_stats, stats, cancel_unused); + LPROCFS_MD_OP_INIT(num_private_stats, stats, renew_capa); + LPROCFS_MD_OP_INIT(num_private_stats, stats, get_remote_perm); + + for (i = num_private_stats; i < num_stats; i++) { + if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) { + CERROR("Missing md_stat initializer md_op " + "operation at offset %d. Aborting.\n", + i - num_private_stats); + LBUG(); + } + } + rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats); + if (rc < 0) { + lprocfs_free_stats(&stats); + } else { + obd->md_stats = stats; + obd->md_cntr_base = num_private_stats; + } + return rc; +} + +void lprocfs_free_md_stats(struct obd_device *obd) +{ + struct lprocfs_stats *stats = obd->md_stats; + + if (stats != NULL) { + obd->md_stats = NULL; + lprocfs_free_stats(&stats); + } +} + int lprocfs_exp_rd_nid(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -950,16 +1042,14 @@ int lprocfs_exp_rd_uuid(char *page, char **start, off_t off, int count, return snprintf(page, count, "%s\n", obd_uuid2str(&exp->exp_client_uuid)); } - + int lprocfs_exp_setup(struct obd_export *exp) { char name[sizeof (exp->exp_client_uuid.uuid) + 3]; int i = 1, rc; ENTRY; - if (!exp || !exp->exp_obd || !exp->exp_obd->obd_proc_exports) RETURN(-EINVAL); - mutex_down(&exp->exp_obd->obd_proc_exp_sem); sprintf(name, "%s", (char *)exp->exp_client_uuid.uuid); while (lprocfs_srch(exp->exp_obd->obd_proc_exports, name)) { @@ -987,7 +1077,6 @@ int lprocfs_exp_setup(struct obd_export *exp) lprocfs_exp_rd_uuid, NULL, exp); if (rc) GOTO(out, rc); - /* Always add ldlm stats */ exp->exp_ldlm_stats = lprocfs_alloc_stats(LDLM_LAST_OPC - LDLM_FIRST_OPC); @@ -1187,7 +1276,7 @@ int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count, units <<= 10; } /* Specified units override the multiplier */ - if (units) + if (units) mult = mult < 0 ? -units : units; frac *= mult; diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c new file mode 100644 index 0000000..ba17f7d --- /dev/null +++ b/lustre/obdclass/lu_object.c @@ -0,0 +1,1214 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Object. + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + * These are the only exported functions, they provide some generic + * infrastructure for managing object devices + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#include <linux/seq_file.h> +#include <linux/module.h> +/* nr_free_pages() */ +#include <linux/swap.h> +/* hash_long() */ +#include <linux/hash.h> +#include <obd_support.h> +#include <lustre_disk.h> +#include <lustre_fid.h> +#include <lu_object.h> +#include <libcfs/list.h> +/* lu_time_global_{init,fini}() */ +#include <lu_time.h> + +static void lu_object_free(const struct lu_env *env, struct lu_object *o); + +/* + * Decrease reference counter on object. If last reference is freed, return + * object to the cache, unless lu_object_is_dying(o) holds. In the latter + * case, free object immediately. + */ +void lu_object_put(const struct lu_env *env, struct lu_object *o) +{ + struct lu_object_header *top; + struct lu_site *site; + struct lu_object *orig; + int kill_it; + + top = o->lo_header; + site = o->lo_dev->ld_site; + orig = o; + kill_it = 0; + write_lock(&site->ls_guard); + if (atomic_dec_and_test(&top->loh_ref)) { + /* + * When last reference is released, iterate over object + * layers, and notify them that object is no longer busy. + */ + list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) { + if (o->lo_ops->loo_object_release != NULL) + o->lo_ops->loo_object_release(env, o); + } + -- site->ls_busy; + if (lu_object_is_dying(top)) { + /* + * If object is dying (will not be cached), removed it + * from hash table and LRU. + * + * This is done with hash table and LRU lists + * locked. As the only way to acquire first reference + * to previously unreferenced object is through + * hash-table lookup (lu_object_find()), or LRU + * scanning (lu_site_purge()), that are done under + * hash-table and LRU lock, no race with concurrent + * object lookup is possible and we can safely destroy + * object below. + */ + hlist_del_init(&top->loh_hash); + list_del_init(&top->loh_lru); + -- site->ls_total; + kill_it = 1; + } + } + write_unlock(&site->ls_guard); + if (kill_it) + /* + * Object was already removed from hash and lru above, can + * kill it. + */ + lu_object_free(env, orig); +} +EXPORT_SYMBOL(lu_object_put); + +/* + * Allocate new object. + * + * This follows object creation protocol, described in the comment within + * struct lu_device_operations definition. + */ +static struct lu_object *lu_object_alloc(const struct lu_env *env, + struct lu_site *s, + const struct lu_fid *f) +{ + struct lu_object *scan; + struct lu_object *top; + struct list_head *layers; + int clean; + int result; + + /* + * Create top-level object slice. This will also create + * lu_object_header. + */ + top = s->ls_top_dev->ld_ops->ldo_object_alloc(env, + NULL, s->ls_top_dev); + if (IS_ERR(top)) + RETURN(top); + /* + * This is the only place where object fid is assigned. It's constant + * after this point. + */ + LASSERT(fid_is_igif(f) || fid_ver(f) == 0); + top->lo_header->loh_fid = *f; + layers = &top->lo_header->loh_layers; + do { + /* + * Call ->loo_object_init() repeatedly, until no more new + * object slices are created. + */ + clean = 1; + list_for_each_entry(scan, layers, lo_linkage) { + if (scan->lo_flags & LU_OBJECT_ALLOCATED) + continue; + clean = 0; + scan->lo_header = top->lo_header; + result = scan->lo_ops->loo_object_init(env, scan); + if (result != 0) { + lu_object_free(env, top); + RETURN(ERR_PTR(result)); + } + scan->lo_flags |= LU_OBJECT_ALLOCATED; + } + } while (!clean); + + list_for_each_entry_reverse(scan, layers, lo_linkage) { + if (scan->lo_ops->loo_object_start != NULL) { + result = scan->lo_ops->loo_object_start(env, scan); + if (result != 0) { + lu_object_free(env, top); + RETURN(ERR_PTR(result)); + } + } + } + + s->ls_stats.s_created ++; + RETURN(top); +} + +/* + * Free object. + */ +static void lu_object_free(const struct lu_env *env, struct lu_object *o) +{ + struct list_head splice; + struct lu_object *scan; + + /* + * First call ->loo_object_delete() method to release all resources. + */ + list_for_each_entry_reverse(scan, + &o->lo_header->loh_layers, lo_linkage) { + if (scan->lo_ops->loo_object_delete != NULL) + scan->lo_ops->loo_object_delete(env, scan); + } + + /* + * Then, splice object layers into stand-alone list, and call + * ->loo_object_free() on all layers to free memory. Splice is + * necessary, because lu_object_header is freed together with the + * top-level slice. + */ + INIT_LIST_HEAD(&splice); + list_splice_init(&o->lo_header->loh_layers, &splice); + while (!list_empty(&splice)) { + o = container_of0(splice.next, struct lu_object, lo_linkage); + list_del_init(&o->lo_linkage); + LASSERT(o->lo_ops->loo_object_free != NULL); + o->lo_ops->loo_object_free(env, o); + } +} + +/* + * Free @nr objects from the cold end of the site LRU list. + */ +int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) +{ + struct list_head dispose; + struct lu_object_header *h; + struct lu_object_header *temp; + + INIT_LIST_HEAD(&dispose); + /* + * Under LRU list lock, scan LRU list and move unreferenced objects to + * the dispose list, removing them from LRU and hash table. + */ + write_lock(&s->ls_guard); + list_for_each_entry_safe(h, temp, &s->ls_lru, loh_lru) { + /* + * Objects are sorted in lru order, and "busy" objects (ones + * with h->loh_ref > 0) naturally tend to live near hot end + * that we scan last. Unfortunately, sites usually have small + * (less then ten) number of busy yet rarely accessed objects + * (some global objects, accessed directly through pointers, + * bypassing hash table). Currently algorithm scans them over + * and over again. Probably we should move busy objects out of + * LRU, or we can live with that. + */ + if (nr-- == 0) + break; + if (atomic_read(&h->loh_ref) > 0) + continue; + hlist_del_init(&h->loh_hash); + list_move(&h->loh_lru, &dispose); + s->ls_total --; + } + write_unlock(&s->ls_guard); + /* + * Free everything on the dispose list. This is safe against races due + * to the reasons described in lu_object_put(). + */ + while (!list_empty(&dispose)) { + h = container_of0(dispose.next, + struct lu_object_header, loh_lru); + list_del_init(&h->loh_lru); + lu_object_free(env, lu_object_top(h)); + s->ls_stats.s_lru_purged ++; + } + return nr; +} +EXPORT_SYMBOL(lu_site_purge); + +/* + * Object printing. + * + * Code below has to jump through certain loops to output object description + * into libcfs_debug_msg-based log. The problem is that lu_object_print() + * composes object description from strings that are parts of _lines_ of + * output (i.e., strings that are not terminated by newline). This doesn't fit + * very well into libcfs_debug_msg() interface that assumes that each message + * supplied to it is a self-contained output line. + * + * To work around this, strings are collected in a temporary buffer + * (implemented as a value of lu_cdebug_key key), until terminating newline + * character is detected. + * + */ + +enum { + /* + * Maximal line size. + * + * XXX overflow is not handled correctly. + */ + LU_CDEBUG_LINE = 256 +}; + +struct lu_cdebug_data { + /* + * Temporary buffer. + */ + char lck_area[LU_CDEBUG_LINE]; + /* + * fid staging area used by dt_store_open(). + */ + struct lu_fid_pack lck_pack; +}; + +static void *lu_global_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct lu_cdebug_data *value; + + OBD_ALLOC_PTR(value); + if (value == NULL) + value = ERR_PTR(-ENOMEM); + return value; +} + +static void lu_global_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct lu_cdebug_data *value = data; + OBD_FREE_PTR(value); +} + +/* + * Key, holding temporary buffer. This key is registered very early by + * lu_global_init(). + */ +struct lu_context_key lu_global_key = { + .lct_tags = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD, + .lct_init = lu_global_key_init, + .lct_fini = lu_global_key_fini +}; + +/* + * Printer function emitting messages through libcfs_debug_msg(). + */ +int lu_cdebug_printer(const struct lu_env *env, + void *cookie, const char *format, ...) +{ + struct lu_cdebug_print_info *info = cookie; + struct lu_cdebug_data *key; + int used; + int complete; + va_list args; + + va_start(args, format); + + key = lu_context_key_get(&env->le_ctx, &lu_global_key); + LASSERT(key != NULL); + + used = strlen(key->lck_area); + complete = format[strlen(format) - 1] == '\n'; + /* + * Append new chunk to the buffer. + */ + vsnprintf(key->lck_area + used, + ARRAY_SIZE(key->lck_area) - used, format, args); + if (complete) { + libcfs_debug_msg(NULL, info->lpi_subsys, info->lpi_mask, + (char *)info->lpi_file, info->lpi_fn, + info->lpi_line, "%s", key->lck_area); + key->lck_area[0] = 0; + } + va_end(args); + return 0; +} +EXPORT_SYMBOL(lu_cdebug_printer); + +/* + * Print object header. + */ +static void lu_object_header_print(const struct lu_env *env, + void *cookie, lu_printer_t printer, + const struct lu_object_header *hdr) +{ + (*printer)(env, cookie, "header@%p[%#lx, %d, "DFID"%s%s%s]", + hdr, hdr->loh_flags, atomic_read(&hdr->loh_ref), + PFID(&hdr->loh_fid), + hlist_unhashed(&hdr->loh_hash) ? "" : " hash", + list_empty(&hdr->loh_lru) ? "" : " lru", + hdr->loh_attr & LOHA_EXISTS ? " exist":""); +} + +/* + * Print human readable representation of the @o to the @printer. + */ +void lu_object_print(const struct lu_env *env, void *cookie, + lu_printer_t printer, const struct lu_object *o) +{ + static const char ruler[] = "........................................"; + struct lu_object_header *top; + int depth; + + top = o->lo_header; + lu_object_header_print(env, cookie, printer, top); + (*printer)(env, cookie, "\n"); + list_for_each_entry(o, &top->loh_layers, lo_linkage) { + depth = o->lo_depth + 4; + LASSERT(o->lo_ops->loo_object_print != NULL); + /* + * print `.' @depth times. + */ + (*printer)(env, cookie, "%*.*s", depth, depth, ruler); + o->lo_ops->loo_object_print(env, cookie, printer, o); + (*printer)(env, cookie, "\n"); + } +} +EXPORT_SYMBOL(lu_object_print); + +/* + * Check object consistency. + */ +int lu_object_invariant(const struct lu_object *o) +{ + struct lu_object_header *top; + + top = o->lo_header; + list_for_each_entry(o, &top->loh_layers, lo_linkage) { + if (o->lo_ops->loo_object_invariant != NULL && + !o->lo_ops->loo_object_invariant(o)) + return 0; + } + return 1; +} +EXPORT_SYMBOL(lu_object_invariant); + +static struct lu_object *htable_lookup(struct lu_site *s, + const struct hlist_head *bucket, + const struct lu_fid *f) +{ + struct lu_object_header *h; + struct hlist_node *scan; + + hlist_for_each_entry(h, scan, bucket, loh_hash) { + s->ls_stats.s_cache_check ++; + if (likely(lu_fid_eq(&h->loh_fid, f) && + !lu_object_is_dying(h))) { + /* bump reference count... */ + if (atomic_add_return(1, &h->loh_ref) == 1) + ++ s->ls_busy; + /* and move to the head of the LRU */ + /* + * XXX temporary disable this to measure effects of + * read-write locking. + */ + /* list_move_tail(&h->loh_lru, &s->ls_lru); */ + s->ls_stats.s_cache_hit ++; + return lu_object_top(h); + } + } + s->ls_stats.s_cache_miss ++; + return NULL; +} + +static __u32 fid_hash(const struct lu_fid *f, int bits) +{ + /* all objects with same id and different versions will belong to same + * collisions list. */ + return hash_long(fid_flatten(f), bits); +} + +/* + * Search cache for an object with the fid @f. If such object is found, return + * it. Otherwise, create new object, insert it into cache and return it. In + * any case, additional reference is acquired on the returned object. + */ +struct lu_object *lu_object_find(const struct lu_env *env, + struct lu_site *s, const struct lu_fid *f) +{ + struct lu_object *o; + struct lu_object *shadow; + struct hlist_head *bucket; + + /* + * This uses standard index maintenance protocol: + * + * - search index under lock, and return object if found; + * - otherwise, unlock index, allocate new object; + * - lock index and search again; + * - if nothing is found (usual case), insert newly created + * object into index; + * - otherwise (race: other thread inserted object), free + * object just allocated. + * - unlock index; + * - return object. + */ + + bucket = s->ls_hash + fid_hash(f, s->ls_hash_bits); + + read_lock(&s->ls_guard); + o = htable_lookup(s, bucket, f); + read_unlock(&s->ls_guard); + + if (o != NULL) + return o; + + /* + * Allocate new object. This may result in rather complicated + * operations, including fld queries, inode loading, etc. + */ + o = lu_object_alloc(env, s, f); + if (unlikely(IS_ERR(o))) + return o; + + LASSERT(lu_fid_eq(lu_object_fid(o), f)); + + write_lock(&s->ls_guard); + shadow = htable_lookup(s, bucket, f); + if (likely(shadow == NULL)) { + hlist_add_head(&o->lo_header->loh_hash, bucket); + list_add_tail(&o->lo_header->loh_lru, &s->ls_lru); + ++ s->ls_busy; + ++ s->ls_total; + shadow = o; + o = NULL; + } else + s->ls_stats.s_cache_race ++; + write_unlock(&s->ls_guard); + if (o != NULL) + lu_object_free(env, o); + return shadow; +} +EXPORT_SYMBOL(lu_object_find); + +/* + * Global list of all sites on this node + */ +static LIST_HEAD(lu_sites); +static DECLARE_MUTEX(lu_sites_guard); + +/* + * Global environment used by site shrinker. + */ +static struct lu_env lu_shrink_env; + +/* + * Print all objects in @s. + */ +void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie, + lu_printer_t printer) +{ + int i; + + for (i = 0; i < s->ls_hash_size; ++i) { + struct lu_object_header *h; + struct hlist_node *scan; + + read_lock(&s->ls_guard); + hlist_for_each_entry(h, scan, &s->ls_hash[i], loh_hash) { + + if (!list_empty(&h->loh_layers)) { + const struct lu_object *obj; + + obj = lu_object_top(h); + lu_object_print(env, cookie, printer, obj); + } else + lu_object_header_print(env, cookie, printer, h); + } + read_unlock(&s->ls_guard); + } +} +EXPORT_SYMBOL(lu_site_print); + +enum { + LU_CACHE_PERCENT = 30, +}; + +/* + * Return desired hash table order. + */ +static int lu_htable_order(void) +{ + int bits; + unsigned long cache_size; + + /* + * Calculate hash table size, assuming that we want reasonable + * performance when 30% of available memory is occupied by cache of + * lu_objects. + * + * Size of lu_object is (arbitrary) taken as 1K (together with inode). + */ + cache_size = nr_free_buffer_pages() / 100 * + LU_CACHE_PERCENT * (CFS_PAGE_SIZE / 1024); + + for (bits = 1; (1 << bits) < cache_size; ++bits) { + ; + } + return bits; +} + +/* + * Initialize site @s, with @d as the top level device. + */ +int lu_site_init(struct lu_site *s, struct lu_device *top) +{ + int bits; + int size; + int i; + ENTRY; + + memset(s, 0, sizeof *s); + rwlock_init(&s->ls_guard); + CFS_INIT_LIST_HEAD(&s->ls_lru); + CFS_INIT_LIST_HEAD(&s->ls_linkage); + s->ls_top_dev = top; + top->ld_site = s; + lu_device_get(top); + + for (bits = lu_htable_order(), size = 1 << bits; + (s->ls_hash = + cfs_alloc_large(size * sizeof s->ls_hash[0])) == NULL; + --bits, size >>= 1) { + /* + * Scale hash table down, until allocation succeeds. + */ + ; + } + + s->ls_hash_size = size; + s->ls_hash_bits = bits; + s->ls_hash_mask = size - 1; + + for (i = 0; i < size; i++) + INIT_HLIST_HEAD(&s->ls_hash[i]); + + RETURN(0); +} +EXPORT_SYMBOL(lu_site_init); + +/* + * Finalize @s and release its resources. + */ +void lu_site_fini(struct lu_site *s) +{ + LASSERT(list_empty(&s->ls_lru)); + LASSERT(s->ls_total == 0); + + down(&lu_sites_guard); + list_del_init(&s->ls_linkage); + up(&lu_sites_guard); + + if (s->ls_hash != NULL) { + int i; + for (i = 0; i < s->ls_hash_size; i++) + LASSERT(hlist_empty(&s->ls_hash[i])); + cfs_free_large(s->ls_hash); + s->ls_hash = NULL; + } + if (s->ls_top_dev != NULL) { + s->ls_top_dev->ld_site = NULL; + lu_device_put(s->ls_top_dev); + s->ls_top_dev = NULL; + } +} +EXPORT_SYMBOL(lu_site_fini); + +/* + * Called when initialization of stack for this site is completed. + */ +int lu_site_init_finish(struct lu_site *s) +{ + int result; + down(&lu_sites_guard); + result = lu_context_refill(&lu_shrink_env.le_ctx); + if (result == 0) + list_add(&s->ls_linkage, &lu_sites); + up(&lu_sites_guard); + return result; +} +EXPORT_SYMBOL(lu_site_init_finish); + +/* + * Acquire additional reference on device @d + */ +void lu_device_get(struct lu_device *d) +{ + atomic_inc(&d->ld_ref); +} +EXPORT_SYMBOL(lu_device_get); + +/* + * Release reference on device @d. + */ +void lu_device_put(struct lu_device *d) +{ + atomic_dec(&d->ld_ref); +} +EXPORT_SYMBOL(lu_device_put); + +/* + * Initialize device @d of type @t. + */ +int lu_device_init(struct lu_device *d, struct lu_device_type *t) +{ + memset(d, 0, sizeof *d); + atomic_set(&d->ld_ref, 0); + d->ld_type = t; + return 0; +} +EXPORT_SYMBOL(lu_device_init); + +/* + * Finalize device @d. + */ +void lu_device_fini(struct lu_device *d) +{ + if (d->ld_obd != NULL) + /* finish lprocfs */ + lprocfs_obd_cleanup(d->ld_obd); + + LASSERTF(atomic_read(&d->ld_ref) == 0, + "Refcount is %u\n", atomic_read(&d->ld_ref)); +} +EXPORT_SYMBOL(lu_device_fini); + +/* + * Initialize object @o that is part of compound object @h and was created by + * device @d. + */ +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d) +{ + memset(o, 0, sizeof *o); + o->lo_header = h; + o->lo_dev = d; + lu_device_get(d); + CFS_INIT_LIST_HEAD(&o->lo_linkage); + return 0; +} +EXPORT_SYMBOL(lu_object_init); + +/* + * Finalize object and release its resources. + */ +void lu_object_fini(struct lu_object *o) +{ + LASSERT(list_empty(&o->lo_linkage)); + + if (o->lo_dev != NULL) { + lu_device_put(o->lo_dev); + o->lo_dev = NULL; + } +} +EXPORT_SYMBOL(lu_object_fini); + +/* + * Add object @o as first layer of compound object @h + * + * This is typically called by the ->ldo_object_alloc() method of top-level + * device. + */ +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o) +{ + list_move(&o->lo_linkage, &h->loh_layers); +} +EXPORT_SYMBOL(lu_object_add_top); + +/* + * Add object @o as a layer of compound object, going after @before.1 + * + * This is typically called by the ->ldo_object_alloc() method of + * @before->lo_dev. + */ +void lu_object_add(struct lu_object *before, struct lu_object *o) +{ + list_move(&o->lo_linkage, &before->lo_linkage); +} +EXPORT_SYMBOL(lu_object_add); + +/* + * Initialize compound object. + */ +int lu_object_header_init(struct lu_object_header *h) +{ + memset(h, 0, sizeof *h); + atomic_set(&h->loh_ref, 1); + INIT_HLIST_NODE(&h->loh_hash); + CFS_INIT_LIST_HEAD(&h->loh_lru); + CFS_INIT_LIST_HEAD(&h->loh_layers); + return 0; +} +EXPORT_SYMBOL(lu_object_header_init); + +/* + * Finalize compound object. + */ +void lu_object_header_fini(struct lu_object_header *h) +{ + LASSERT(list_empty(&h->loh_layers)); + LASSERT(list_empty(&h->loh_lru)); + LASSERT(hlist_unhashed(&h->loh_hash)); +} +EXPORT_SYMBOL(lu_object_header_fini); + +/* + * Given a compound object, find its slice, corresponding to the device type + * @dtype. + */ +struct lu_object *lu_object_locate(struct lu_object_header *h, + struct lu_device_type *dtype) +{ + struct lu_object *o; + + list_for_each_entry(o, &h->loh_layers, lo_linkage) { + if (o->lo_dev->ld_type == dtype) + return o; + } + return NULL; +} +EXPORT_SYMBOL(lu_object_locate); + +enum { + /* + * Maximal number of tld slots. + */ + LU_CONTEXT_KEY_NR = 16 +}; + +static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, }; + +static spinlock_t lu_keys_guard = SPIN_LOCK_UNLOCKED; + +/* + * Register new key. + */ +int lu_context_key_register(struct lu_context_key *key) +{ + int result; + int i; + + LASSERT(key->lct_init != NULL); + LASSERT(key->lct_fini != NULL); + LASSERT(key->lct_tags != 0); + LASSERT(key->lct_owner != NULL); + + result = -ENFILE; + spin_lock(&lu_keys_guard); + for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { + if (lu_keys[i] == NULL) { + key->lct_index = i; + atomic_set(&key->lct_used, 1); + lu_keys[i] = key; + result = 0; + break; + } + } + spin_unlock(&lu_keys_guard); + return result; +} +EXPORT_SYMBOL(lu_context_key_register); + +static void key_fini(struct lu_context *ctx, int index) +{ + if (ctx->lc_value[index] != NULL) { + struct lu_context_key *key; + + key = lu_keys[index]; + LASSERT(key != NULL); + LASSERT(key->lct_fini != NULL); + LASSERT(atomic_read(&key->lct_used) > 1); + + key->lct_fini(ctx, key, ctx->lc_value[index]); + atomic_dec(&key->lct_used); + LASSERT(key->lct_owner != NULL); + if (!(ctx->lc_tags & LCT_NOREF)) { + LASSERT(module_refcount(key->lct_owner) > 0); + module_put(key->lct_owner); + } + ctx->lc_value[index] = NULL; + } +} + +/* + * Deregister key. + */ +void lu_context_key_degister(struct lu_context_key *key) +{ + LASSERT(atomic_read(&key->lct_used) >= 1); + LASSERT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys)); + + key_fini(&lu_shrink_env.le_ctx, key->lct_index); + + if (atomic_read(&key->lct_used) > 1) + CERROR("key has instances.\n"); + spin_lock(&lu_keys_guard); + lu_keys[key->lct_index] = NULL; + spin_unlock(&lu_keys_guard); +} +EXPORT_SYMBOL(lu_context_key_degister); + +/* + * Return value associated with key @key in context @ctx. + */ +void *lu_context_key_get(const struct lu_context *ctx, + struct lu_context_key *key) +{ + LASSERT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys)); + return ctx->lc_value[key->lct_index]; +} +EXPORT_SYMBOL(lu_context_key_get); + +static void keys_fini(struct lu_context *ctx) +{ + int i; + + if (ctx->lc_value != NULL) { + for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) + key_fini(ctx, i); + OBD_FREE(ctx->lc_value, + ARRAY_SIZE(lu_keys) * sizeof ctx->lc_value[0]); + ctx->lc_value = NULL; + } +} + +static int keys_fill(const struct lu_context *ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { + struct lu_context_key *key; + + key = lu_keys[i]; + if (ctx->lc_value[i] == NULL && + key != NULL && key->lct_tags & ctx->lc_tags) { + void *value; + + LASSERT(key->lct_init != NULL); + LASSERT(key->lct_index == i); + + value = key->lct_init(ctx, key); + if (unlikely(IS_ERR(value))) + return PTR_ERR(value); + LASSERT(key->lct_owner != NULL); + if (!(ctx->lc_tags & LCT_NOREF)) + try_module_get(key->lct_owner); + atomic_inc(&key->lct_used); + ctx->lc_value[i] = value; + } + } + return 0; +} + +static int keys_init(struct lu_context *ctx) +{ + int result; + + OBD_ALLOC(ctx->lc_value, ARRAY_SIZE(lu_keys) * sizeof ctx->lc_value[0]); + if (likely(ctx->lc_value != NULL)) + result = keys_fill(ctx); + else + result = -ENOMEM; + + if (result != 0) + keys_fini(ctx); + return result; +} + +/* + * Initialize context data-structure. Create values for all keys. + */ +int lu_context_init(struct lu_context *ctx, __u32 tags) +{ + memset(ctx, 0, sizeof *ctx); + ctx->lc_tags = tags; + return keys_init(ctx); +} +EXPORT_SYMBOL(lu_context_init); + +/* + * Finalize context data-structure. Destroy key values. + */ +void lu_context_fini(struct lu_context *ctx) +{ + keys_fini(ctx); +} +EXPORT_SYMBOL(lu_context_fini); + +/* + * Called before entering context. + */ +void lu_context_enter(struct lu_context *ctx) +{ +} +EXPORT_SYMBOL(lu_context_enter); + +/* + * Called after exiting from @ctx + */ +void lu_context_exit(struct lu_context *ctx) +{ + int i; + + if (ctx->lc_value != NULL) { + for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { + if (ctx->lc_value[i] != NULL) { + struct lu_context_key *key; + + key = lu_keys[i]; + LASSERT(key != NULL); + if (key->lct_exit != NULL) + key->lct_exit(ctx, + key, ctx->lc_value[i]); + } + } + } +} +EXPORT_SYMBOL(lu_context_exit); + +/* + * Allocate for context all missing keys that were registered after context + * creation. + */ +int lu_context_refill(const struct lu_context *ctx) +{ + LASSERT(ctx->lc_value != NULL); + return keys_fill(ctx); +} +EXPORT_SYMBOL(lu_context_refill); + +static int lu_env_setup(struct lu_env *env, struct lu_context *ses, + __u32 tags, int noref) +{ + int result; + + LASSERT(ergo(!noref, !(tags & LCT_NOREF))); + + env->le_ses = ses; + result = lu_context_init(&env->le_ctx, tags); + if (likely(result == 0)) + lu_context_enter(&env->le_ctx); + return result; +} + +static int lu_env_init_noref(struct lu_env *env, struct lu_context *ses, + __u32 tags) +{ + return lu_env_setup(env, ses, tags, 1); +} + +int lu_env_init(struct lu_env *env, struct lu_context *ses, __u32 tags) +{ + return lu_env_setup(env, ses, tags, 0); +} +EXPORT_SYMBOL(lu_env_init); + +void lu_env_fini(struct lu_env *env) +{ + lu_context_exit(&env->le_ctx); + lu_context_fini(&env->le_ctx); + env->le_ses = NULL; +} +EXPORT_SYMBOL(lu_env_fini); + +static int lu_cache_shrink(int nr, unsigned int gfp_mask) +{ + struct lu_site *s; + struct lu_site *tmp; + int cached = 0; + int remain = nr; + LIST_HEAD(splice); + + if (nr != 0 && !(gfp_mask & __GFP_FS)) + return -1; + + down(&lu_sites_guard); + list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { + if (nr != 0) { + remain = lu_site_purge(&lu_shrink_env, s, remain); + /* + * Move just shrunk site to the tail of site list to + * assure shrinking fairness. + */ + list_move_tail(&s->ls_linkage, &splice); + } + read_lock(&s->ls_guard); + cached += s->ls_total - s->ls_busy; + read_unlock(&s->ls_guard); + if (remain <= 0) + break; + } + list_splice(&splice, lu_sites.prev); + up(&lu_sites_guard); + return cached; +} + +static struct shrinker *lu_site_shrinker = NULL; + +/* + * Initialization of global lu_* data. + */ +int lu_global_init(void) +{ + int result; + + LU_CONTEXT_KEY_INIT(&lu_global_key); + result = lu_context_key_register(&lu_global_key); + if (result == 0) { + /* + * At this level, we don't know what tags are needed, so + * allocate them conservatively. This should not be too bad, + * because this environment is global. + */ + down(&lu_sites_guard); + result = lu_env_init_noref(&lu_shrink_env, NULL, LCT_SHRINKER); + up(&lu_sites_guard); + if (result == 0) { + /* + * seeks estimation: 3 seeks to read a record from oi, + * one to read inode, one for ea. Unfortunately + * setting this high value results in lu_object/inode + * cache consuming all the memory. + */ + lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, + lu_cache_shrink); + if (result == 0) + result = lu_time_global_init(); + } + } + return result; +} + +/* + * Dual to lu_global_init(). + */ +void lu_global_fini(void) +{ + lu_time_global_fini(); + if (lu_site_shrinker != NULL) { + remove_shrinker(lu_site_shrinker); + lu_site_shrinker = NULL; + } + + lu_context_key_degister(&lu_global_key); + + /* + * Tear shrinker environment down _after_ de-registering + * lu_global_key, because the latter has a value in the former. + */ + down(&lu_sites_guard); + lu_env_fini(&lu_shrink_env); + up(&lu_sites_guard); +} + +struct lu_buf LU_BUF_NULL = { + .lb_buf = NULL, + .lb_len = 0 +}; +EXPORT_SYMBOL(LU_BUF_NULL); + +/* + * XXX: Functions below logically belong to fid module, but they are used by + * dt_store_open(). Put them here until better place is found. + */ + +void fid_pack(struct lu_fid_pack *pack, const struct lu_fid *fid, + struct lu_fid *befider) +{ + int recsize; + __u64 seq; + __u32 oid; + + seq = fid_seq(fid); + oid = fid_oid(fid); + + /* + * Two cases: compact 6 bytes representation for a common case, and + * full 17 byte representation for "unusual" fid. + */ + + /* + * Check that usual case is really usual. + */ + CLASSERT(LUSTRE_SEQ_MAX_WIDTH < 0xffffull); + + if (fid_is_igif(fid) || + seq > 0xffffffull || oid > 0xffff || fid_ver(fid) != 0) { + fid_cpu_to_be(befider, fid); + recsize = sizeof *befider; + } else { + unsigned char *small_befider; + + small_befider = (char *)befider; + + small_befider[0] = seq >> 16; + small_befider[1] = seq >> 8; + small_befider[2] = seq; + + small_befider[3] = oid >> 8; + small_befider[4] = oid; + + recsize = 5; + } + memcpy(pack->fp_area, befider, recsize); + pack->fp_len = recsize + 1; +} +EXPORT_SYMBOL(fid_pack); + +void fid_unpack(const struct lu_fid_pack *pack, struct lu_fid *fid) +{ + switch (pack->fp_len) { + case sizeof *fid + 1: + memcpy(fid, pack->fp_area, sizeof *fid); + fid_be_to_cpu(fid, fid); + break; + case 6: { + const unsigned char *area; + + area = pack->fp_area; + fid->f_seq = (area[0] << 16) | (area[1] << 8) | area[2]; + fid->f_oid = (area[3] << 8) | area[4]; + fid->f_ver = 0; + break; + } + default: + CERROR("Unexpected packed fid size: %d\n", pack->fp_len); + LBUG(); + } +} +EXPORT_SYMBOL(fid_unpack); + +const char *lu_time_names[LU_TIME_NR] = { + [LU_TIME_FIND_LOOKUP] = "find_lookup", + [LU_TIME_FIND_ALLOC] = "find_alloc", + [LU_TIME_FIND_INSERT] = "find_insert" +}; +EXPORT_SYMBOL(lu_time_names); diff --git a/lustre/obdclass/lu_time.c b/lustre/obdclass/lu_time.c new file mode 100644 index 0000000..e51b7af --- /dev/null +++ b/lustre/obdclass/lu_time.c @@ -0,0 +1,214 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Time Tracking. + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + * These are the only exported functions, they provide some generic + * infrastructure for managing object devices + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +/* OBD_{ALLOC,FREE}_PTR() */ +#include <obd_support.h> +#include <lprocfs_status.h> +#include <lu_object.h> +#include <lu_time.h> + +enum { + LU_TIME_DEPTH_MAX = 16 +}; + +struct lu_time_data { + int ltd_tos; /* top of the stack */ + unsigned long long ltd_timestamp[LU_TIME_DEPTH_MAX]; +}; + +static void *lu_time_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct lu_time_data *value; + + OBD_ALLOC_PTR(value); + if (value == NULL) + value = ERR_PTR(-ENOMEM); + return value; +} + +static void lu_time_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct lu_time_data *value = data; + OBD_FREE_PTR(value); +} + +void lu_time_key_exit(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct lu_time_data *value = data; + LASSERT(value->ltd_tos == 0); +} + +/* + * Key, holding temporary buffer. This key is registered very early by + * lu_global_init(). + */ +static struct lu_context_key lu_time_key = { + .lct_tags = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD, + .lct_init = lu_time_key_init, + .lct_fini = lu_time_key_fini, + .lct_exit = lu_time_key_exit +}; + +int lu_time_global_init(void) +{ + LU_CONTEXT_KEY_INIT(&lu_time_key); + return lu_context_key_register(&lu_time_key); +} + +void lu_time_global_fini(void) +{ + lu_context_key_degister(&lu_time_key); +} + +int lu_time_named_init(struct lprocfs_stats **stats, const char *name, + cfs_proc_dir_entry_t *entry, + const char **names, int nr) +{ + int result; + int i; + + ENTRY; + + *stats = NULL; + if (nr == 0) + RETURN(0); + + *stats = lprocfs_alloc_stats(nr); + if (*stats != NULL) { + result = lprocfs_register_stats(entry, name, *stats); + if (result == 0) { + for (i = 0; i < nr; ++i) { + lprocfs_counter_init(*stats, i, + LPROCFS_CNTR_AVGMINMAX, + names[i], "usec"); + } + } + } else + result = -ENOMEM; + + if (result != 0) + lu_time_fini(stats); + + RETURN(result); +} +EXPORT_SYMBOL(lu_time_named_init); + +int lu_time_init(struct lprocfs_stats **stats, cfs_proc_dir_entry_t *entry, + const char **names, int nr) +{ + return lu_time_named_init(stats, "stats", entry, names, nr); +} +EXPORT_SYMBOL(lu_time_init); + +void lu_time_fini(struct lprocfs_stats **stats) +{ + if (*stats != NULL) { + lprocfs_free_stats(stats); + *stats = NULL; + } +} +EXPORT_SYMBOL(lu_time_fini); + +static inline struct lu_time_data *lu_time_data_get(const struct lu_env *env) +{ + return lu_context_key_get(&env->le_ctx, &lu_time_key); +} + +int lu_time_is_clean(const struct lu_env *env) +{ + return lu_time_data_get(env)->ltd_tos == 0; +} +EXPORT_SYMBOL(lu_time_is_clean); + +/* from sleepometer by Andrew Morton */ +unsigned long long lu_time_stamp_get(void) +{ + /* + * Return timestamp with microsecond precision. This has to be cheap. + */ +//#ifdef CONFIG_X86 +#if defined(CONFIG_X86) && !defined(CONFIG_X86_64) + /* + * do_gettimeofday() goes backwards sometimes :(. Usethe TSC + */ + unsigned long long ret; + extern unsigned long cpu_khz; + + rdtscll(ret); + do_div(ret, cpu_khz / 1000); + return ret; +#else + struct timeval now; + unsigned long long ret; + + do_gettimeofday(&now); + ret = now.tv_sec; + ret *= 1000000; + ret += now.tv_usec; + return ret; +#endif +} +/* + * Export it, but do not advertise in headers. This is limited use only. + */ +EXPORT_SYMBOL(lu_time_stamp_get); + +void lu_lprocfs_time_start(const struct lu_env *env) +{ + struct lu_time_data *ltd = lu_time_data_get(env); + + LASSERT(0 <= ltd->ltd_tos); + LASSERT(ltd->ltd_tos < ARRAY_SIZE(ltd->ltd_timestamp)); + ltd->ltd_timestamp[ltd->ltd_tos++] = lu_time_stamp_get(); +} +EXPORT_SYMBOL(lu_lprocfs_time_start); + +void lu_lprocfs_time_end(const struct lu_env *env, + struct lprocfs_stats *stats, int idx) +{ + struct lu_time_data *ltd = lu_time_data_get(env); + long long diff; + + --ltd->ltd_tos; + LASSERT(0 <= ltd->ltd_tos); + LASSERT(ltd->ltd_tos < ARRAY_SIZE(ltd->ltd_timestamp)); + diff = lu_time_stamp_get() - ltd->ltd_timestamp[ltd->ltd_tos]; + if (diff >= 0 && stats != NULL) + lprocfs_counter_add(stats, idx, diff); +} +EXPORT_SYMBOL(lu_lprocfs_time_end); diff --git a/lustre/obdclass/lustre_handles.c b/lustre/obdclass/lustre_handles.c index 9d4d1ec..0a08a3d 100644 --- a/lustre/obdclass/lustre_handles.c +++ b/lustre/obdclass/lustre_handles.c @@ -32,11 +32,25 @@ #include <lustre_handles.h> #include <lustre_lib.h> -spinlock_t handle_lock; +#if !defined(HAVE_RCU) || !defined(__KERNEL__) +# define list_add_rcu list_add +# define list_del_rcu list_del +# define list_for_each_rcu list_for_each +# define list_for_each_safe_rcu list_for_each_safe +# define rcu_read_lock() spin_lock(&bucket->lock) +# define rcu_read_unlock() spin_unlock(&bucket->lock) +#endif /* ifndef HAVE_RCU */ + static __u64 handle_base; #define HANDLE_INCR 7 -static struct list_head *handle_hash = NULL; -static int handle_count = 0; +static spinlock_t handle_base_lock; + +static struct handle_bucket { + spinlock_t lock; + struct list_head head; +} *handle_hash; + +static atomic_t handle_count = ATOMIC_INIT(0); #define HANDLE_HASH_SIZE (1 << 14) #define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1) @@ -47,25 +61,20 @@ static int handle_count = 0; */ void class_handle_hash(struct portals_handle *h, portals_handle_addref_cb cb) { - struct list_head *bucket; + struct handle_bucket *bucket; ENTRY; LASSERT(h != NULL); LASSERT(list_empty(&h->h_link)); - spin_lock(&handle_lock); - /* * This is fast, but simplistic cookie generation algorithm, it will * need a re-do at some point in the future for security. */ - h->h_cookie = handle_base; + spin_lock(&handle_base_lock); handle_base += HANDLE_INCR; - bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK); - list_add(&h->h_link, bucket); - handle_count++; - + h->h_cookie = handle_base; if (unlikely(handle_base == 0)) { /* * Cookie of zero is "dangerous", because in many places it's @@ -75,10 +84,17 @@ void class_handle_hash(struct portals_handle *h, portals_handle_addref_cb cb) CWARN("The universe has been exhausted: cookie wrap-around.\n"); handle_base += HANDLE_INCR; } + spin_unlock(&handle_base_lock); + + atomic_inc(&handle_count); + h->h_addref = cb; + spin_lock_init(&h->h_lock); - spin_unlock(&handle_lock); + bucket = &handle_hash[h->h_cookie & HANDLE_HASH_MASK]; + spin_lock(&bucket->lock); + list_add_rcu(&h->h_link, &bucket->head); + spin_unlock(&bucket->lock); - h->h_addref = cb; CDEBUG(D_INFO, "added object %p with handle "LPX64" to hash\n", h, h->h_cookie); EXIT; @@ -95,57 +111,104 @@ static void class_handle_unhash_nolock(struct portals_handle *h) CDEBUG(D_INFO, "removing object %p with handle "LPX64" from hash\n", h, h->h_cookie); - handle_count--; - list_del_init(&h->h_link); + spin_lock(&h->h_lock); + if (h->h_cookie == 0) { + spin_unlock(&h->h_lock); + return; + } + h->h_cookie = 0; + spin_unlock(&h->h_lock); + list_del_rcu(&h->h_link); } void class_handle_unhash(struct portals_handle *h) { - spin_lock(&handle_lock); + struct handle_bucket *bucket; + bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK); + + spin_lock(&bucket->lock); class_handle_unhash_nolock(h); - spin_unlock(&handle_lock); + spin_unlock(&bucket->lock); + + atomic_dec(&handle_count); +} + +void class_handle_hash_back(struct portals_handle *h) +{ + struct handle_bucket *bucket; + ENTRY; + + bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK); + + atomic_inc(&handle_count); + spin_lock(&bucket->lock); + list_add_rcu(&h->h_link, &bucket->head); + spin_unlock(&bucket->lock); + + EXIT; } void *class_handle2object(__u64 cookie) { - struct list_head *bucket, *tmp; + struct handle_bucket *bucket; + struct list_head *tmp; void *retval = NULL; ENTRY; LASSERT(handle_hash != NULL); + /* Be careful when you want to change this code. See the + * rcu_read_lock() definition on top this file. - jxiong */ bucket = handle_hash + (cookie & HANDLE_HASH_MASK); - spin_lock(&handle_lock); - list_for_each(tmp, bucket) { + rcu_read_lock(); + list_for_each_rcu(tmp, &bucket->head) { struct portals_handle *h; h = list_entry(tmp, struct portals_handle, h_link); + if (h->h_cookie != cookie) + continue; - if (h->h_cookie == cookie) { + spin_lock(&h->h_lock); + if (likely(h->h_cookie != 0)) { h->h_addref(h); retval = h; - break; } + spin_unlock(&h->h_lock); + break; } - spin_unlock(&handle_lock); + rcu_read_unlock(); RETURN(retval); } +void class_handle_free_cb(struct rcu_head *rcu) +{ + struct portals_handle *h = RCU2HANDLE(rcu); + if (h->h_free_cb) { + h->h_free_cb(h->h_ptr, h->h_size); + } else { + void *ptr = h->h_ptr; + unsigned int size = h->h_size; + OBD_FREE(ptr, size); + } +} + int class_handle_init(void) { - struct list_head *bucket; + struct handle_bucket *bucket; LASSERT(handle_hash == NULL); - OBD_VMALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); + OBD_VMALLOC(handle_hash, sizeof(*bucket) * HANDLE_HASH_SIZE); if (handle_hash == NULL) return -ENOMEM; + spin_lock_init(&handle_base_lock); for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash; - bucket--) - CFS_INIT_LIST_HEAD(bucket); - + bucket--) { + CFS_INIT_LIST_HEAD(&bucket->head); + spin_lock_init(&bucket->lock); + } ll_get_random_bytes(&handle_base, sizeof(handle_base)); LASSERT(handle_base != 0ULL); @@ -156,10 +219,10 @@ static void cleanup_all_handles(void) { int i; - spin_lock(&handle_lock); for (i = 0; i < HANDLE_HASH_SIZE; i++) { struct list_head *tmp, *pos; - list_for_each_safe(tmp, pos, &(handle_hash[i])) { + spin_lock(&handle_hash[i].lock); + list_for_each_safe_rcu(tmp, pos, &(handle_hash[i].head)) { struct portals_handle *h; h = list_entry(tmp, struct portals_handle, h_link); @@ -168,22 +231,24 @@ static void cleanup_all_handles(void) class_handle_unhash_nolock(h); } + spin_unlock(&handle_hash[i].lock); } - spin_unlock(&handle_lock); } void class_handle_cleanup(void) { + int count; LASSERT(handle_hash != NULL); - if (handle_count != 0) { - CERROR("handle_count at cleanup: %d\n", handle_count); + count = atomic_read(&handle_count); + if (count != 0) { + CERROR("handle_count at cleanup: %d\n", count); cleanup_all_handles(); } OBD_VFREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); handle_hash = NULL; - if (handle_count) - CERROR("leaked %d handles\n", handle_count); + if (atomic_read(&handle_count)) + CERROR("leaked %d handles\n", atomic_read(&handle_count)); } diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c index 8728c7d..59dfd96 100644 --- a/lustre/obdclass/lustre_peer.c +++ b/lustre/obdclass/lustre_peer.c @@ -59,7 +59,7 @@ void class_exit_uuidlist(void) class_del_uuid(NULL); } -int lustre_uuid_to_peer(char *uuid, lnet_nid_t *peer_nid, int index) +int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index) { struct list_head *tmp; @@ -82,9 +82,9 @@ int lustre_uuid_to_peer(char *uuid, lnet_nid_t *peer_nid, int index) return -ENOENT; } -/* Add a nid to a niduuid. Multiple nids can be added to a single uuid; +/* Add a nid to a niduuid. Multiple nids can be added to a single uuid; LNET will choose the best one. */ -int class_add_uuid(char *uuid, __u64 nid) +int class_add_uuid(const char *uuid, __u64 nid) { struct uuid_nid_data *data, *entry; int nob = strnlen (uuid, CFS_PAGE_SIZE) + 1; @@ -110,6 +110,7 @@ int class_add_uuid(char *uuid, __u64 nid) data->un_count = 1; spin_lock (&g_uuid_lock); + list_for_each_entry(entry, &g_uuid_list, un_list) { if (entry->un_nid == nid && (strcmp(entry->un_uuid, uuid) == 0)) { @@ -134,7 +135,7 @@ int class_add_uuid(char *uuid, __u64 nid) } /* Delete the nids for one uuid if specified, otherwise delete all */ -int class_del_uuid (char *uuid) +int class_del_uuid(const char *uuid) { struct list_head deathrow; struct uuid_nid_data *data, *n; @@ -142,6 +143,7 @@ int class_del_uuid (char *uuid) CFS_INIT_LIST_HEAD (&deathrow); spin_lock (&g_uuid_lock); + list_for_each_entry_safe(data, n, &g_uuid_list, un_list) { if (uuid == NULL) { list_del (&data->un_list); diff --git a/lustre/obdclass/mea.c b/lustre/obdclass/mea.c new file mode 100644 index 0000000..10ab6ca --- /dev/null +++ b/lustre/obdclass/mea.c @@ -0,0 +1,180 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#ifdef __KERNEL__ +#include <linux/kmod.h> /* for request_module() */ +#include <linux/module.h> +#include <obd_class.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#else +#include <liblustre.h> +#include <obd_class.h> +#include <obd.h> +#endif +#include <lprocfs_status.h> +#include <lustre/lustre_idl.h> + +#ifdef __KERNEL__ +#include <linux/jbd.h> +/* LDISKFS_SB() */ +#include <linux/ldiskfs_fs.h> +#endif +static int mea_last_char_hash(int count, char *name, int namelen) +{ + unsigned int c; + + c = name[namelen - 1]; + if (c == 0) + CWARN("looks like wrong len is passed\n"); + c = c % count; + return c; +} + +static int mea_all_chars_hash(int count, char *name, int namelen) +{ + unsigned int c = 0; + + while (--namelen >= 0) + c += name[namelen]; + c = c % count; + return c; +} + +#ifdef __KERNEL__ +/* This hash calculate method must be same as the lvar hash method */ + +#define LVAR_HASH_SANDWICH (0) +#define LVAR_HASH_TEA (1) +#define LVAR_HASH_R5 (0) +#define LVAR_HASH_PREFIX (0) + +static __u32 hash_build0(const char *name, int namelen) +{ + __u32 result; + + if (namelen == 0) + return 0; + if (strncmp(name, ".", 1) == 0 && namelen == 1) + return 1; + if (strncmp(name, "..", 2) == 0 && namelen == 2) + return 2; + + if (LVAR_HASH_PREFIX) { + result = 0; + strncpy((void *)&result, + name, min(namelen, (int)sizeof result)); + } else { + struct ldiskfs_dx_hash_info hinfo; + + if (LVAR_HASH_TEA) + hinfo.hash_version = LDISKFS_DX_HASH_TEA; + else + hinfo.hash_version = LDISKFS_DX_HASH_R5; + hinfo.seed = 0; + ldiskfsfs_dirhash(name, namelen, &hinfo); + result = hinfo.hash; + if (LVAR_HASH_SANDWICH) { + __u32 result2; + + hinfo.hash_version = LDISKFS_DX_HASH_TEA; + hinfo.seed = 0; + ldiskfsfs_dirhash(name, namelen, &hinfo); + result2 = hinfo.hash; + result = (0xfc000000 & result2) | (0x03ffffff & result); + } + } + + return result; +} + +enum { + HASH_GRAY_AREA = 1024 +}; + +static __u32 hash_build(const char *name, int namelen) +{ + __u32 hash; + + hash = (hash_build0(name, namelen) << 1) & MAX_HASH_SIZE; + if (hash > MAX_HASH_SIZE - HASH_GRAY_AREA) + hash &= HASH_GRAY_AREA - 1; + return hash; +} + +static int mea_hash_segment(int count, const char *name, int namelen) +{ + __u32 hash; + + LASSERT(IS_PO2(MAX_HASH_SIZE + 1)); + + hash = hash_build(name, namelen) / (MAX_HASH_SIZE / count); + LASSERTF(hash < count, "hash %x count %d \n", hash, count); + + return hash; +} +#else +static int mea_hash_segment(int count, char *name, int namelen) +{ +#warning "fix for liblustre" + return 0; +} +#endif +int raw_name2idx(int hashtype, int count, const char *name, int namelen) +{ + unsigned int c = 0; + + LASSERT(namelen > 0); + if (count <= 1) + return 0; + + switch (hashtype) { + case MEA_MAGIC_LAST_CHAR: + c = mea_last_char_hash(count, (char *)name, namelen); + break; + case MEA_MAGIC_ALL_CHARS: + c = mea_all_chars_hash(count, (char *)name, namelen); + break; + case MEA_MAGIC_HASH_SEGMENT: + c = mea_hash_segment(count, (char *)name, namelen); + break; + default: + CERROR("Unknown hash type 0x%x\n", hashtype); + } + + LASSERT(c < count); + return c; +} + +int mea_name2idx(struct lmv_stripe_md *mea, const char *name, int namelen) +{ + unsigned int c; + + LASSERT(mea && mea->mea_count); + + c = raw_name2idx(mea->mea_magic, mea->mea_count, name, namelen); + + LASSERT(c < mea->mea_count); + return c; +} + diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index ca283d1..676a7ad 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -98,7 +98,7 @@ int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh) *endp = '\0'; *nid = libcfs_str2nid(buf); if (*nid == LNET_NID_ANY) { - LCONSOLE_ERROR("Can't parse NID '%s'\n", buf); + LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", buf); *endp = tmp; return -EINVAL; } @@ -147,26 +147,6 @@ int class_attach(struct lustre_cfg *lcfg) CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n", MKSTR(typename), MKSTR(name), MKSTR(uuid)); - /* Mountconf transitional hack, should go away after 1.6. - 1.4.7 uses the old names, so translate back if the - mountconf flag is set. - 1.6 should set this flag, and translate the other way here - if not set. */ - if (lcfg->lcfg_flags & LCFG_FLG_MOUNTCONF){ - char *tmp = NULL; - if (strcmp(typename, "mds") == 0) - tmp = "mdt"; - if (strcmp(typename, "mdt") == 0) - tmp = "mds"; - if (strcmp(typename, "osd") == 0) - tmp = "obdfilter"; - if (tmp) { - LCONSOLE_WARN("Using type %s for %s %s\n", tmp, - MKSTR(typename), MKSTR(name)); - typename = tmp; - } - } - obd = class_newdev(typename, name); if (IS_ERR(obd)) { /* Already exists or out of obds */ @@ -199,8 +179,9 @@ int class_attach(struct lustre_cfg *lcfg) cfs_init_timer(&obd->obd_recovery_timer); spin_lock_init(&obd->obd_processing_task_lock); cfs_waitq_init(&obd->obd_next_transno_waitq); - CFS_INIT_LIST_HEAD(&obd->obd_recovery_queue); - CFS_INIT_LIST_HEAD(&obd->obd_delayed_reply_queue); + CFS_INIT_LIST_HEAD(&obd->obd_req_replay_queue); + CFS_INIT_LIST_HEAD(&obd->obd_lock_replay_queue); + CFS_INIT_LIST_HEAD(&obd->obd_final_req_queue); spin_lock_init(&obd->obd_uncommitted_replies_lock); CFS_INIT_LIST_HEAD(&obd->obd_uncommitted_replies); @@ -280,11 +261,12 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg) list_del_init(&exp->exp_obd_chain_timed); class_export_put(exp); - err = obd_setup(obd, sizeof(*lcfg), lcfg); + err = obd_setup(obd, lcfg); if (err) GOTO(err_exp, err); obd->obd_set_up = 1; + spin_lock(&obd->obd_dev_lock); /* cleanup drops this */ class_incref(obd); @@ -387,7 +369,7 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) obd->obd_force = 1; break; case 'A': - LCONSOLE_WARN("Failing over %s\n", + LCONSOLE_WARN("Failing over %s\n", obd->obd_name); obd->obd_fail = 1; obd->obd_no_transno = 1; @@ -416,7 +398,6 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) dump_exports(obd); /* Allow a failed cleanup to try again. */ obd->obd_stopping = 0; - RETURN(-EBUSY); } #endif /* refcounf - 3 might be the number of real exports @@ -427,7 +408,6 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) dump_exports(obd); class_disconnect_exports(obd); } - LASSERT(obd->obd_self_export); /* Precleanup stage 1, we must make sure all exports (other than the @@ -439,7 +419,6 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) class_decref(obd); obd->obd_set_up = 0; - RETURN(0); } @@ -475,7 +454,7 @@ void class_decref(struct obd_device *obd) if (err) CERROR("Precleanup %s returned %d\n", obd->obd_name, err); - + spin_lock(&obd->obd_self_export->exp_lock); obd->obd_self_export->exp_flags |= (obd->obd_fail ? OBD_OPT_FAILOVER : 0) | @@ -520,7 +499,7 @@ int class_add_conn(struct obd_device *obd, struct lustre_cfg *lcfg) RETURN(-EINVAL); } if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) && - strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) && + strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) && strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME)) { CERROR("can't add connection on non-client dev\n"); RETURN(-EINVAL); @@ -568,9 +547,39 @@ int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) RETURN(rc); } +int class_sec_flavor(struct obd_device *obd, struct lustre_cfg *lcfg) +{ + struct sec_flavor_config *conf; + ENTRY; + + if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) && + strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) { + CERROR("Can't set security flavor on obd %s\n", + obd->obd_type->typ_name); + RETURN(-EINVAL); + } + + if (LUSTRE_CFG_BUFLEN(lcfg, 1) != sizeof(*conf)) { + CERROR("invalid data\n"); + RETURN(-EINVAL); + } + + conf = &obd->u.cli.cl_sec_conf; + memcpy(conf, lustre_cfg_buf(lcfg, 1), sizeof(*conf)); + +#ifdef __BIG_ENDIAN + __swab32s(&conf->sfc_rpc_flavor); + __swab32s(&conf->sfc_bulk_csum); + __swab32s(&conf->sfc_bulk_priv); + __swab32s(&conf->sfc_flags); +#endif + + RETURN(0); +} + CFS_LIST_HEAD(lustre_profile_list); -struct lustre_profile *class_get_profile(char * prof) +struct lustre_profile *class_get_profile(const char * prof) { struct lustre_profile *lprof; @@ -604,34 +613,34 @@ int class_add_profile(int proflen, char *prof, int osclen, char *osc, memcpy(lprof->lp_profile, prof, proflen); LASSERT(osclen == (strlen(osc) + 1)); - OBD_ALLOC(lprof->lp_osc, osclen); - if (lprof->lp_osc == NULL) + OBD_ALLOC(lprof->lp_dt, osclen); + if (lprof->lp_dt == NULL) GOTO(out, err = -ENOMEM); - memcpy(lprof->lp_osc, osc, osclen); + memcpy(lprof->lp_dt, osc, osclen); if (mdclen > 0) { LASSERT(mdclen == (strlen(mdc) + 1)); - OBD_ALLOC(lprof->lp_mdc, mdclen); - if (lprof->lp_mdc == NULL) + OBD_ALLOC(lprof->lp_md, mdclen); + if (lprof->lp_md == NULL) GOTO(out, err = -ENOMEM); - memcpy(lprof->lp_mdc, mdc, mdclen); + memcpy(lprof->lp_md, mdc, mdclen); } list_add(&lprof->lp_list, &lustre_profile_list); RETURN(err); out: - if (lprof->lp_mdc) - OBD_FREE(lprof->lp_mdc, mdclen); - if (lprof->lp_osc) - OBD_FREE(lprof->lp_osc, osclen); + if (lprof->lp_md) + OBD_FREE(lprof->lp_md, mdclen); + if (lprof->lp_dt) + OBD_FREE(lprof->lp_dt, osclen); if (lprof->lp_profile) OBD_FREE(lprof->lp_profile, proflen); OBD_FREE(lprof, sizeof(*lprof)); RETURN(err); } -void class_del_profile(char *prof) +void class_del_profile(const char *prof) { struct lustre_profile *lprof; ENTRY; @@ -642,9 +651,9 @@ void class_del_profile(char *prof) if (lprof) { list_del(&lprof->lp_list); OBD_FREE(lprof->lp_profile, strlen(lprof->lp_profile) + 1); - OBD_FREE(lprof->lp_osc, strlen(lprof->lp_osc) + 1); - if (lprof->lp_mdc) - OBD_FREE(lprof->lp_mdc, strlen(lprof->lp_mdc) + 1); + OBD_FREE(lprof->lp_dt, strlen(lprof->lp_dt) + 1); + if (lprof->lp_md) + OBD_FREE(lprof->lp_md, strlen(lprof->lp_md) + 1); OBD_FREE(lprof, sizeof *lprof); } EXIT; @@ -659,9 +668,9 @@ void class_del_profiles(void) list_for_each_entry_safe(lprof, n, &lustre_profile_list, lp_list) { list_del(&lprof->lp_list); OBD_FREE(lprof->lp_profile, strlen(lprof->lp_profile) + 1); - OBD_FREE(lprof->lp_osc, strlen(lprof->lp_osc) + 1); - if (lprof->lp_mdc) - OBD_FREE(lprof->lp_mdc, strlen(lprof->lp_mdc) + 1); + OBD_FREE(lprof->lp_dt, strlen(lprof->lp_dt) + 1); + if (lprof->lp_md) + OBD_FREE(lprof->lp_md, strlen(lprof->lp_md) + 1); OBD_FREE(lprof, sizeof *lprof); } EXIT; @@ -735,7 +744,7 @@ int class_process_config(struct lustre_cfg *lcfg) GOTO(out, err = 0); } case LCFG_SET_UPCALL: { - LCONSOLE_ERROR("recovery upcall is deprecated\n"); + LCONSOLE_ERROR_MSG(0x15a, "recovery upcall is deprecated\n"); /* COMPAT_146 Don't fail on old configs */ GOTO(out, err = 0); } @@ -743,7 +752,7 @@ int class_process_config(struct lustre_cfg *lcfg) struct cfg_marker *marker; marker = lustre_cfg_buf(lcfg, 1); CDEBUG(D_IOCTL, "marker %d (%#x) %.16s %s\n", marker->cm_step, - marker->cm_flags, marker->cm_tgtname, marker->cm_comment); + marker->cm_flags, marker->cm_tgtname, marker->cm_comment); GOTO(out, err = 0); } case LCFG_PARAM: { @@ -792,6 +801,10 @@ int class_process_config(struct lustre_cfg *lcfg) err = class_del_conn(obd, lcfg); GOTO(out, err = 0); } + case LCFG_SEC_FLAVOR: { + err = class_sec_flavor(obd, lcfg); + GOTO(out, err = 0); + } default: { err = obd_process_config(obd, sizeof(*lcfg), lcfg); GOTO(out, err); @@ -813,7 +826,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, #ifdef __KERNEL__ struct lprocfs_vars *var; char *key, *sval; - int i, vallen; + int i, keylen, vallen; int matched = 0, j = 0; int rc = 0; ENTRY; @@ -837,6 +850,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, /* continue parsing other params */ continue; } + keylen = sval - key; sval++; vallen = strlen(sval); matched = 0; @@ -844,7 +858,8 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, /* Search proc entries */ while (lvars[j].name) { var = &lvars[j]; - if (class_match_param(key, (char *)var->name, 0) == 0) { + if (class_match_param(key, (char *)var->name, 0) == 0 && + keylen == strlen(var->name)) { matched++; rc = -EROFS; if (var->write_fptr) { @@ -902,7 +917,7 @@ static int class_config_llog_handler(struct llog_handle * handle, char *cfg_buf = (char*) (rec + 1); int rc = 0; ENTRY; - + //class_config_dump_handler(handle, rec, data); switch (rec->lrh_type) { @@ -949,11 +964,12 @@ static int class_config_llog_handler(struct llog_handle * handle, if (!(clli->cfg_flags & CFG_F_COMPAT146) && !(clli->cfg_flags & CFG_F_MARKER) && (lcfg->lcfg_command != LCFG_MARKER)) { - CWARN("Config not inside markers, ignoring! (%#x)\n", - clli->cfg_flags); + CWARN("Config not inside markers, ignoring! " + "(inst: %s, uuid: %s, flags: %#x)\n", + clli->cfg_instance ? clli->cfg_instance : "<null>", + clli->cfg_uuid.uuid, clli->cfg_flags); clli->cfg_flags |= CFG_F_SKIP; } - if (clli->cfg_flags & CFG_F_SKIP) { CDEBUG(D_CONFIG, "skipping %#x\n", clli->cfg_flags); @@ -981,7 +997,7 @@ static int class_config_llog_handler(struct llog_handle * handle, lustre_cfg_string(lcfg, 0), clli->cfg_instance); lustre_cfg_bufs_set_string(&bufs, 0, inst_name); - CDEBUG(D_CONFIG, "cmd %x, instance name: %s\n", + CDEBUG(D_CONFIG, "cmd %x, instance name: %s\n", lcfg->lcfg_command, inst_name); } @@ -1061,6 +1077,7 @@ int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, CDEBUG(D_CONFIG, "Processed log %s gen %d-%d (rc=%d)\n", name, cd.first_idx + 1, cd.last_idx, rc); + if (cfg) cfg->cfg_last_idx = cd.last_idx; @@ -1114,7 +1131,7 @@ int class_config_dump_handler(struct llog_handle * handle, if (lcfg->lcfg_command == LCFG_MARKER) { struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1); ptr += snprintf(ptr, end-ptr, "marker=%d(%#x)%s '%s'", - marker->cm_step, marker->cm_flags, + marker->cm_step, marker->cm_flags, marker->cm_tgtname, marker->cm_comment); } else { for (i = 0; i < lcfg->lcfg_bufcount; i++) { diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 0dd655e..cfd04ed 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -46,7 +46,7 @@ static int (*client_fill_super)(struct super_block *sb) = NULL; DECLARE_MUTEX(lustre_mount_info_lock); struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list); -static struct lustre_mount_info *server_find_mount(char *name) +static struct lustre_mount_info *server_find_mount(const char *name) { struct list_head *tmp; struct lustre_mount_info *lmi; @@ -63,7 +63,7 @@ static struct lustre_mount_info *server_find_mount(char *name) /* we must register an obd for a mount before we call the setup routine. *_setup will call lustre_get_mount to get the mnt struct by obd_name, since we can't pass the pointer to setup. */ -static int server_register_mount(char *name, struct super_block *sb, +static int server_register_mount(const char *name, struct super_block *sb, struct vfsmount *mnt) { struct lustre_mount_info *lmi; @@ -106,7 +106,7 @@ static int server_register_mount(char *name, struct super_block *sb, } /* when an obd no longer needs a mount */ -static int server_deregister_mount(char *name) +static int server_deregister_mount(const char *name) { struct lustre_mount_info *lmi; ENTRY; @@ -133,7 +133,7 @@ static int server_deregister_mount(char *name) /* obd's look up a registered mount using their obdname. This is just for initial obd setup to find the mount struct. It should not be called every time you want to mntget. */ -struct lustre_mount_info *server_get_mount(char *name) +struct lustre_mount_info *server_get_mount(const char *name) { struct lustre_mount_info *lmi; struct lustre_sb_info *lsi; @@ -157,6 +157,25 @@ struct lustre_mount_info *server_get_mount(char *name) RETURN(lmi); } +/* + * Used by mdt to get mount_info from obdname. + * There are no blocking when using the mount_info. + * Do not use server_get_mount for this purpose. + */ +struct lustre_mount_info *server_get_mount_2(const char *name) +{ + struct lustre_mount_info *lmi; + ENTRY; + + down(&lustre_mount_info_lock); + lmi = server_find_mount(name); + up(&lustre_mount_info_lock); + if (!lmi) + CERROR("Can't find mount for %s\n", name); + + RETURN(lmi); +} + static void unlock_mntput(struct vfsmount *mnt) { if (kernel_locked()) { @@ -171,7 +190,7 @@ static void unlock_mntput(struct vfsmount *mnt) static int lustre_put_lsi(struct super_block *sb); /* to be called from obd_cleanup methods */ -int server_put_mount(char *name, struct vfsmount *mnt) +int server_put_mount(const char *name, struct vfsmount *mnt) { struct lustre_mount_info *lmi; struct lustre_sb_info *lsi; @@ -180,7 +199,7 @@ int server_put_mount(char *name, struct vfsmount *mnt) /* This might be the last one, can't deref after this */ unlock_mntput(mnt); - + down(&lustre_mount_info_lock); lmi = server_find_mount(name); up(&lustre_mount_info_lock); @@ -208,9 +227,16 @@ int server_put_mount(char *name, struct vfsmount *mnt) RETURN(0); } +/* Corresponding to server_get_mount_2 */ +int server_put_mount_2(const char *name, struct vfsmount *mnt) +{ + ENTRY; + RETURN(0); +} /******* mount helper utilities *********/ +#if 0 static void ldd_print(struct lustre_disk_data *ldd) { PRINT_CMD(PRINT_MASK, " disk data:\n"); @@ -225,6 +251,7 @@ static void ldd_print(struct lustre_disk_data *ldd) PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params); PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata); } +#endif static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt, struct lustre_disk_data *ldd) @@ -282,8 +309,6 @@ static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt, GOTO(out_close, rc = -EINVAL); } - ldd_print(ldd); - out_close: filp_close(file, 0); out: @@ -322,7 +347,6 @@ static int ldd_write(struct lvfs_run_ctxt *mount_ctxt, } rc = 0; - ldd_print(ldd); out_close: filp_close(file, 0); @@ -362,19 +386,20 @@ int lustre_process_log(struct super_block *sb, char *logname, lustre_cfg_free(lcfg); if (rc == -EINVAL) - LCONSOLE_ERROR("%s: The configuration from log '%s' failed " - "(%d). Make sure this client and " - "the MGS are running compatible versions of " - "Lustre.\n", - mgc->obd_name, logname, rc); + LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'" + "failed from the MGS (%d). Make sure this " + "client and the MGS are running compatible " + "versions of Lustre.\n", + mgc->obd_name, logname, rc); if (rc) - LCONSOLE_ERROR("%s: The configuration from log '%s' failed " - "(%d). This may be the result of " - "communication errors between this node and " - "the MGS, a bad configuration, or other errors." - " See the syslog for more information.\n", - mgc->obd_name, logname, rc); + LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' " + "failed (%d). This may be the result of " + "communication errors between this node and " + "the MGS, a bad configuration, or other " + "errors. See the syslog for more " + "information.\n", mgc->obd_name, logname, + rc); /* class_obd_list(); */ RETURN(rc); @@ -468,8 +493,9 @@ static int server_start_mgs(struct super_block *sb) lmi = server_find_mount(LUSTRE_MGS_OBDNAME); if (lmi) { lsi = s2lsi(lmi->lmi_sb); - LCONSOLE_ERROR("The MGS service was already started from " - "server %s\n", lsi->lsi_ldd->ldd_svname); + LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started" + " from server %s\n", + lsi->lsi_ldd->ldd_svname); RETURN(-EALREADY); } @@ -483,9 +509,9 @@ static int server_start_mgs(struct super_block *sb) server_deregister_mount(LUSTRE_MGS_OBDNAME); if (rc) - LCONSOLE_ERROR("Failed to start MGS '%s' (%d). Is the 'mgs' " - "module loaded?\n", LUSTRE_MGS_OBDNAME, rc); - + LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). " + "Is the 'mgs' module loaded?\n", + LUSTRE_MGS_OBDNAME, rc); RETURN(rc); } @@ -695,9 +721,8 @@ static int lustre_start_mgc(struct super_block *sb) if (rc) /* nonfatal */ CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc); - /* We connect to the MGS at setup, and don't disconnect until cleanup */ - rc = obd_connect(&mgc_conn, obd, &(obd->obd_uuid), &ocd); + rc = obd_connect(NULL, &mgc_conn, obd, &(obd->obd_uuid), &ocd); if (rc) { CERROR("connect failed %d\n", rc); GOTO(out, rc); @@ -870,11 +895,82 @@ int server_mti_print(char *title, struct mgs_target_info *mti) return(0); } +static +int mti_set_sec_opts(struct mgs_target_info *mti, struct lustre_mount_data *lmd) +{ + char *s1, *s2; + + if (lmd->lmd_sec_mdt == NULL && lmd->lmd_sec_cli == NULL) { + /* just let on-disk params do its work. but we have an + * assumption that any changes of on-disk data by tune2fs + * should lead to server rewrite log. + */ + return 0; + } + + /* filter out existing sec options */ + s1 = mti->mti_params; + while (*s1) { + int clear; + + while (*s1 == ' ') + s1++; + + if (strncmp(s1, PARAM_SEC_RPC_MDT, + sizeof(PARAM_SEC_RPC_MDT) - 1) == 0 || + strncmp(s1, PARAM_SEC_RPC_CLI, + sizeof(PARAM_SEC_RPC_CLI) - 1) == 0) + clear = 1; + else + clear = 0; + + s2 = strchr(s1, ' '); + if (s2 == NULL) { + if (clear) + *s1 = '\0'; + break; + } + s2++; + if (clear) + memmove(s1, s2, strlen(s2) + 1); + else + s1 = s2; + } + + /* append sec options from lmd */ + /* FIXME add flag LDD_F_UPDATE after mountconf start supporting + * log updating. + */ + if (lmd->lmd_sec_mdt) { + if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_mdt) + + sizeof(PARAM_SEC_RPC_MDT) + 1 >= sizeof(mti->mti_params)) { + CERROR("security params too big for mti\n"); + return -ENOMEM; + } + strcat(mti->mti_params, " "PARAM_SEC_RPC_MDT); + strcat(mti->mti_params, lmd->lmd_sec_mdt); + //mti->mti_flags |= LDD_F_UPDATE; + } + if (lmd->lmd_sec_cli) { + if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_cli) + + sizeof(PARAM_SEC_RPC_CLI) + 2 > sizeof(mti->mti_params)) { + CERROR("security params too big for mti\n"); + return -ENOMEM; + } + strcat(mti->mti_params, " "PARAM_SEC_RPC_CLI); + strcat(mti->mti_params, lmd->lmd_sec_cli); + //mti->mti_flags |= LDD_F_UPDATE; + } + + return 0; +} + static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) { - struct lustre_sb_info *lsi = s2lsi(sb); - struct lustre_disk_data *ldd = lsi->lsi_ldd; - lnet_process_id_t id; + struct lustre_sb_info *lsi = s2lsi(sb); + struct lustre_disk_data *ldd = lsi->lsi_ldd; + struct lustre_mount_data *lmd = lsi->lsi_lmd; + lnet_process_id_t id; int i = 0; ENTRY; @@ -909,7 +1005,8 @@ static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) RETURN(-ENOMEM); } memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params)); - RETURN(0); + + RETURN(mti_set_sec_opts(mti, lmd)); } /* Register an old or new target with the MGS. If needed MGS will construct @@ -966,7 +1063,6 @@ int server_register_target(struct super_block *sb) sizeof(ldd->ldd_svname)); /* or ldd_make_sv_name(ldd); */ ldd_write(&mgc->obd_lvfs_ctxt, ldd); - err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb, mti->mti_svname); if (err) @@ -996,6 +1092,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname); +#if 0 /* If we're an MDT, make sure the global MDS is running */ if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) { /* make sure the MDS is started */ @@ -1015,6 +1112,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) } mutex_up(&server_start_lock); } +#endif /* If we're an OST, make sure the global OSS is running */ if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) { @@ -1048,15 +1146,16 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) CERROR("Required registration failed for %s: %d\n", lsi->lsi_ldd->ldd_svname, rc); if (rc == -EIO) { - LCONSOLE_ERROR("Communication error with the MGS. Is " - "the MGS running?\n"); + LCONSOLE_ERROR_MSG(0x15f, "Communication error with " + "the MGS. Is the MGS running?\n"); } GOTO(out_mgc, rc); } if (rc == -EINVAL) { - LCONSOLE_ERROR("The MGS is refusing to allow this server (%s) " - "to start. Please see messages on the MGS node." - "\n", lsi->lsi_ldd->ldd_svname); + LCONSOLE_ERROR_MSG(0x160, "The MGS is refusing to allow this " + "server (%s) to start. Please see messages" + " on the MGS node.\n", + lsi->lsi_ldd->ldd_svname); GOTO(out_mgc, rc); } @@ -1123,6 +1222,9 @@ struct lustre_sb_info *lustre_init_lsi(struct super_block *sb) /* Default umount style */ lsi->lsi_flags = LSI_UMOUNT_FAILOVER; + + lsi->lsi_lmd->lmd_nllu = NOBODY_UID; + lsi->lsi_lmd->lmd_nllg = NOBODY_GID; RETURN(lsi); } @@ -1149,6 +1251,12 @@ static int lustre_free_lsi(struct super_block *sb) if (lsi->lsi_lmd->lmd_profile != NULL) OBD_FREE(lsi->lsi_lmd->lmd_profile, strlen(lsi->lsi_lmd->lmd_profile) + 1); + if (lsi->lsi_lmd->lmd_sec_mdt != NULL) + OBD_FREE(lsi->lsi_lmd->lmd_sec_mdt, + strlen(lsi->lsi_lmd->lmd_sec_mdt) + 1); + if (lsi->lsi_lmd->lmd_sec_cli != NULL) + OBD_FREE(lsi->lsi_lmd->lmd_sec_cli, + strlen(lsi->lsi_lmd->lmd_sec_cli) + 1); if (lsi->lsi_lmd->lmd_opts != NULL) OBD_FREE(lsi->lsi_lmd->lmd_opts, strlen(lsi->lsi_lmd->lmd_opts) + 1); @@ -1160,7 +1268,6 @@ static int lustre_free_lsi(struct super_block *sb) } LASSERT(lsi->lsi_llsbi == NULL); - OBD_FREE(lsi, sizeof(*lsi)); s2lsi_nocast(sb) = NULL; @@ -1345,9 +1452,9 @@ static void server_put_super(struct super_block *sb) If there are any setup/cleanup errors, save the lov name for safety cleanup later. */ lprof = class_get_profile(lsi->lsi_ldd->ldd_svname); - if (lprof && lprof->lp_osc) { - OBD_ALLOC(extraname, strlen(lprof->lp_osc) + 1); - strcpy(extraname, lprof->lp_osc); + if (lprof && lprof->lp_dt) { + OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1); + strcpy(extraname, lprof->lp_dt); } obd = class_name2obd(lsi->lsi_ldd->ldd_svname); @@ -1365,7 +1472,6 @@ static void server_put_super(struct super_block *sb) CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname); server_deregister_mount(lsi->lsi_ldd->ldd_svname); } - } /* If they wanted the mgs to stop separately from the mdt, they @@ -1408,6 +1514,7 @@ static void server_put_super(struct super_block *sb) OBD_FREE(tmpname, tmpname_sz); EXIT; } + #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT static void server_umount_begin(struct vfsmount *vfsmnt, int flags) { @@ -1539,9 +1646,10 @@ static int server_fill_super(struct super_block *sb) lsi->lsi_lmd->lmd_dev); if (class_name2obd(lsi->lsi_ldd->ldd_svname)) { - LCONSOLE_ERROR("The target named %s is already running. " - "Double-mount may have compromised the disk " - "journal.\n", lsi->lsi_ldd->ldd_svname); + LCONSOLE_ERROR_MSG(0x161, "The target named %s is already " + "running. Double-mount may have compromised" + " the disk journal.\n", + lsi->lsi_ldd->ldd_svname); unlock_mntput(mnt); lustre_put_lsi(sb); GOTO(out, rc = -EALREADY); @@ -1642,6 +1750,7 @@ int lustre_common_put_super(struct super_block *sb) RETURN(rc); } +#if 0 static void lmd_print(struct lustre_mount_data *lmd) { int i; @@ -1651,6 +1760,10 @@ static void lmd_print(struct lustre_mount_data *lmd) PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile); PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev); PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags); + if (lmd->lmd_sec_mdt) + PRINT_CMD(PRINT_MASK, "sec_mdt: %s\n", lmd->lmd_sec_mdt); + if (lmd->lmd_sec_cli) + PRINT_CMD(PRINT_MASK, "sec_cli: %s\n", lmd->lmd_sec_cli); if (lmd->lmd_opts) PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts); for (i = 0; i < lmd->lmd_exclude_count; i++) { @@ -1658,6 +1771,7 @@ static void lmd_print(struct lustre_mount_data *lmd) lmd->lmd_exclude[i]); } } +#endif /* Is this server on the exclusion list */ int lustre_check_exclusion(struct super_block *sb, char *svname) @@ -1718,7 +1832,6 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) s1 = s2; /* now we are pointing at ':' (next exclude) or ',' (end of excludes) */ - if (lmd->lmd_exclude_count >= devmax) break; } @@ -1741,6 +1854,66 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) RETURN(rc); } +static +int lmd_set_sec_opts(char **set, char *opts, int length) +{ + if (*set) + OBD_FREE(*set, strlen(*set) + 1); + + OBD_ALLOC(*set, length + 1); + if (*set == NULL) + return -ENOMEM; + + memcpy(*set, opts, length); + (*set)[length] = '\0'; + + return 0; +} + +static +int lmd_parse_sec_opts(struct lustre_mount_data *lmd, char *ptr) +{ + char *tail; + char **set = NULL; + int length; + + /* check peer name */ + if (strncmp(ptr, "sec_mdt=", 8) == 0) { + set = &lmd->lmd_sec_mdt; + ptr += 8; + } else if (strncmp(ptr, "sec_cli=", 8) == 0) { + set = &lmd->lmd_sec_cli; + ptr += 8; + } else if (strncmp(ptr, "sec=", 4) == 0) { + /* leave 'set' be null */ + ptr += 4; + } else { + CERROR("invalid security options: %s\n", ptr); + return -EINVAL; + } + + tail = strchr(ptr, ','); + if (tail == NULL) + length = strlen(ptr); + else + length = tail - ptr; + + if (set) { + if (lmd_set_sec_opts(set, ptr, length)) + return -EINVAL; + } else { + if (lmd->lmd_sec_mdt == NULL && + lmd_set_sec_opts(&lmd->lmd_sec_mdt, ptr, length)) + return -EINVAL; + + if (lmd->lmd_sec_cli == NULL && + lmd_set_sec_opts(&lmd->lmd_sec_cli, ptr, length)) + return -EINVAL; + } + + return 0; +} + /* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */ static int lmd_parse(char *options, struct lustre_mount_data *lmd) { @@ -1751,16 +1924,16 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) LASSERT(lmd); if (!options) { - LCONSOLE_ERROR("Missing mount data: check that " - "/sbin/mount.lustre is installed.\n"); + LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that " + "/sbin/mount.lustre is installed.\n"); RETURN(-EINVAL); } /* Options should be a string - try to detect old lmd data */ if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) { - LCONSOLE_ERROR("You're using an old version of " - "/sbin/mount.lustre. Please install version " - "%s\n", LUSTRE_VERSION_STRING); + LCONSOLE_ERROR_MSG(0x163, "You're using an old version of " + "/sbin/mount.lustre. Please install " + "version %s\n", LUSTRE_VERSION_STRING); RETURN(-EINVAL); } lmd->lmd_magic = LMD_MAGIC; @@ -1791,8 +1964,18 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) if (rc) goto invalid; clear++; + } else if (strncmp(s1, "nllu=", 5) == 0) { + lmd->lmd_nllu = simple_strtoul(s1 + 5, NULL, 10); + clear++; + } else if (strncmp(s1, "nllg=", 5) == 0) { + lmd->lmd_nllg = simple_strtoul(s1 + 5, NULL, 10); + clear++; + } else if (strncmp(s1, "sec", 3) == 0) { + rc = lmd_parse_sec_opts(lmd, s1); + if (rc) + goto invalid; + clear++; } - /* Linux 2.4 doesn't pass the device, so we stuck it at the end of the options. */ else if (strncmp(s1, "device=", 7) == 0) { @@ -1818,8 +2001,8 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) } if (!devname) { - LCONSOLE_ERROR("Can't find the device name " - "(need mount option 'device=...')\n"); + LCONSOLE_ERROR_MSG(0x164, "Can't find the device name " + "(need mount option 'device=...')\n"); goto invalid; } @@ -1855,7 +2038,6 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) lmd->lmd_magic = LMD_MAGIC; - lmd_print(lmd); RETURN(rc); invalid: @@ -1888,8 +2070,9 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) if (lmd_is_client(lmd)) { CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile); if (!client_fill_super) { - LCONSOLE_ERROR("Nothing registered for client mount!" - " Is the 'lustre' module loaded?\n"); + LCONSOLE_ERROR_MSG(0x165, "Nothing registered for " + "client mount! Is the 'lustre' " + "module loaded?\n"); rc = -ENODEV; } else { rc = lustre_start_mgc(sb); @@ -1901,7 +2084,6 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) /* (should always be ll_fill_super) */ rc = (*client_fill_super)(sb); /* c_f_s will call lustre_common_put_super on failure */ - } } else { CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev); @@ -2001,7 +2183,9 @@ EXPORT_SYMBOL(lustre_common_put_super); EXPORT_SYMBOL(lustre_process_log); EXPORT_SYMBOL(lustre_end_log); EXPORT_SYMBOL(server_get_mount); +EXPORT_SYMBOL(server_get_mount_2); EXPORT_SYMBOL(server_put_mount); +EXPORT_SYMBOL(server_put_mount_2); EXPORT_SYMBOL(server_register_target); EXPORT_SYMBOL(server_name2index); EXPORT_SYMBOL(server_mti_print); diff --git a/lustre/obdclass/obdo.c b/lustre/obdclass/obdo.c index 6b61657..d5a65bc 100644 --- a/lustre/obdclass/obdo.c +++ b/lustre/obdclass/obdo.c @@ -34,6 +34,7 @@ #ifndef __KERNEL__ #include <liblustre.h> +#include <obd.h> #else #include <obd_class.h> #include <lustre/lustre_idl.h> @@ -79,6 +80,45 @@ void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid) } EXPORT_SYMBOL(obdo_cpy_md); +/* returns FALSE if comparison (by flags) is same, TRUE if changed */ +int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare) +{ + int res = 0; + + if ( compare & OBD_MD_FLATIME ) + res = (res || (dst->o_atime != src->o_atime)); + if ( compare & OBD_MD_FLMTIME ) + res = (res || (dst->o_mtime != src->o_mtime)); + if ( compare & OBD_MD_FLCTIME ) + res = (res || (dst->o_ctime != src->o_ctime)); + if ( compare & OBD_MD_FLSIZE ) + res = (res || (dst->o_size != src->o_size)); + if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */ + res = (res || (dst->o_blocks != src->o_blocks)); + if ( compare & OBD_MD_FLBLKSZ ) + res = (res || (dst->o_blksize != src->o_blksize)); + if ( compare & OBD_MD_FLTYPE ) + res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0)); + if ( compare & OBD_MD_FLMODE ) + res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0)); + if ( compare & OBD_MD_FLUID ) + res = (res || (dst->o_uid != src->o_uid)); + if ( compare & OBD_MD_FLGID ) + res = (res || (dst->o_gid != src->o_gid)); + if ( compare & OBD_MD_FLFLAGS ) + res = (res || (dst->o_flags != src->o_flags)); + if ( compare & OBD_MD_FLNLINK ) + res = (res || (dst->o_nlink != src->o_nlink)); + if ( compare & OBD_MD_FLGENER ) + res = (res || (dst->o_generation != src->o_generation)); + /* XXX Don't know if thses should be included here - wasn't previously + if ( compare & OBD_MD_FLINLINE ) + res = (res || memcmp(dst->o_inline, src->o_inline)); + */ + return res; +} +EXPORT_SYMBOL(obdo_cmp_md); + void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj) { ioobj->ioo_id = oa->o_id; @@ -89,3 +129,117 @@ void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj) ioobj->ioo_type = oa->o_mode; } EXPORT_SYMBOL(obdo_to_ioobj); + +void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid) +{ + if (ia_valid & ATTR_ATIME) { + oa->o_atime = LTIME_S(attr->ia_atime); + oa->o_valid |= OBD_MD_FLATIME; + } + if (ia_valid & ATTR_MTIME) { + oa->o_mtime = LTIME_S(attr->ia_mtime); + oa->o_valid |= OBD_MD_FLMTIME; + } + if (ia_valid & ATTR_CTIME) { + oa->o_ctime = LTIME_S(attr->ia_ctime); + oa->o_valid |= OBD_MD_FLCTIME; + } + if (ia_valid & ATTR_SIZE) { + oa->o_size = attr->ia_size; + oa->o_valid |= OBD_MD_FLSIZE; + } + if (ia_valid & ATTR_MODE) { + oa->o_mode = attr->ia_mode; + oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE; + if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) + oa->o_mode &= ~S_ISGID; + } + if (ia_valid & ATTR_UID) { + oa->o_uid = attr->ia_uid; + oa->o_valid |= OBD_MD_FLUID; + } + if (ia_valid & ATTR_GID) { + oa->o_gid = attr->ia_gid; + oa->o_valid |= OBD_MD_FLGID; + } +} +EXPORT_SYMBOL(obdo_from_iattr); + +void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid) +{ + valid &= oa->o_valid; + + if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) + CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n", + oa->o_valid, oa->o_mtime, oa->o_ctime); + + attr->ia_valid = 0; + if (valid & OBD_MD_FLATIME) { + LTIME_S(attr->ia_atime) = oa->o_atime; + attr->ia_valid |= ATTR_ATIME; + } + if (valid & OBD_MD_FLMTIME) { + LTIME_S(attr->ia_mtime) = oa->o_mtime; + attr->ia_valid |= ATTR_MTIME; + } + if (valid & OBD_MD_FLCTIME) { + LTIME_S(attr->ia_ctime) = oa->o_ctime; + attr->ia_valid |= ATTR_CTIME; + } + if (valid & OBD_MD_FLSIZE) { + attr->ia_size = oa->o_size; + attr->ia_valid |= ATTR_SIZE; + } +#if 0 /* you shouldn't be able to change a file's type with setattr */ + if (valid & OBD_MD_FLTYPE) { + attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT); + attr->ia_valid |= ATTR_MODE; + } +#endif + if (valid & OBD_MD_FLMODE) { + attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT); + attr->ia_valid |= ATTR_MODE; + if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) + attr->ia_mode &= ~S_ISGID; + } + if (valid & OBD_MD_FLUID) { + attr->ia_uid = oa->o_uid; + attr->ia_valid |= ATTR_UID; + } + if (valid & OBD_MD_FLGID) { + attr->ia_gid = oa->o_gid; + attr->ia_valid |= ATTR_GID; + } +} +EXPORT_SYMBOL(iattr_from_obdo); + +void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, obd_flag valid) +{ + iattr_from_obdo(&op_data->op_attr, oa, valid); + if (valid & OBD_MD_FLBLOCKS) { + op_data->op_attr_blocks = oa->o_blocks; + op_data->op_attr.ia_valid |= ATTR_BLOCKS; + } + if (valid & OBD_MD_FLFLAGS) { + ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = + oa->o_flags; + op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; + } +} +EXPORT_SYMBOL(md_from_obdo); + +void obdo_from_md(struct obdo *oa, struct md_op_data *op_data, + unsigned int valid) +{ + obdo_from_iattr(oa, &op_data->op_attr, valid); + if (valid & ATTR_BLOCKS) { + oa->o_blocks = op_data->op_attr_blocks; + oa->o_valid |= OBD_MD_FLBLOCKS; + } + if (valid & ATTR_ATTR_FLAG) { + oa->o_flags = + ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags; + oa->o_valid |= OBD_MD_FLFLAGS; + } +} +EXPORT_SYMBOL(obdo_from_md); diff --git a/lustre/obdclass/prng.c b/lustre/obdclass/prng.c index 4922c04..b067338 100644 --- a/lustre/obdclass/prng.c +++ b/lustre/obdclass/prng.c @@ -105,3 +105,4 @@ void ll_generate_random_uuid(class_uuid_t uuid_out) ll_get_random_bytes(uuid_out, sizeof(class_uuid_t)); } EXPORT_SYMBOL(ll_generate_random_uuid); + diff --git a/lustre/obdclass/uuid.c b/lustre/obdclass/uuid.c index ad072ff..cbcebb2a 100644 --- a/lustre/obdclass/uuid.c +++ b/lustre/obdclass/uuid.c @@ -53,76 +53,6 @@ static void uuid_unpack(class_uuid_t in, struct uuid *uu) memcpy(uu->node, ptr, 6); } -#if 0 -static void uuid_pack(struct uuid *uu, class_uuid_t ptr) -{ - __u32 tmp; - unsigned char *out = ptr; - - tmp = uu->time_low; - out[3] = (unsigned char) tmp; - tmp >>= 8; - out[2] = (unsigned char) tmp; - tmp >>= 8; - out[1] = (unsigned char) tmp; - tmp >>= 8; - out[0] = (unsigned char) tmp; - - tmp = uu->time_mid; - out[5] = (unsigned char) tmp; - tmp >>= 8; - out[4] = (unsigned char) tmp; - - tmp = uu->time_hi_and_version; - out[7] = (unsigned char) tmp; - tmp >>= 8; - out[6] = (unsigned char) tmp; - - tmp = uu->clock_seq; - out[9] = (unsigned char) tmp; - tmp >>= 8; - out[8] = (unsigned char) tmp; - - memcpy(out+10, uu->node, 6); -} - -int class_uuid_parse(struct obd_uuid in, class_uuid_t uu) -{ - struct uuid uuid; - int i; - char *cp, buf[3]; - - if (strlen(in) != 36) - return -1; - for (i=0, cp = in; i <= 36; i++,cp++) { - if ((i == 8) || (i == 13) || (i == 18) || - (i == 23)) - if (*cp == '-') - continue; - if (i== 36) - if (*cp == 0) - continue; - if (!isxdigit(*cp)) - return -1; - } - uuid.time_low = simple_strtoul(in, NULL, 16); - uuid.time_mid = simple_strtoul(in+9, NULL, 16); - uuid.time_hi_and_version = simple_strtoul(in+14, NULL, 16); - uuid.clock_seq = simple_strtoul(in+19, NULL, 16); - cp = in+24; - buf[2] = 0; - for (i=0; i < 6; i++) { - buf[0] = *cp++; - buf[1] = *cp++; - uuid.node[i] = simple_strtoul(buf, NULL, 16); - } - - uuid_pack(&uuid, uu); - return 0; -} -#endif - - void generate_random_uuid(unsigned char uuid_out[16]); void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out) diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 541cf5d..3ba4c3c 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -49,7 +49,8 @@ enum { LPROC_ECHO_LAST = LPROC_ECHO_WRITE_BYTES +1 }; -static int echo_connect(struct lustre_handle *conn, struct obd_device *obd, +static int echo_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) { data->ocd_connect_flags &= ECHO_CONNECT_SUPPORTED; @@ -128,7 +129,7 @@ int echo_create(struct obd_export *exp, struct obdo *oa, } int echo_destroy(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti, + struct lov_stripe_md *ea, struct obd_trans_info *oti, struct obd_export *md_exp) { struct obd_device *obd = class_exp2obd(exp); @@ -166,7 +167,7 @@ static int echo_getattr(struct obd_export *exp, struct obd_info *oinfo) } if (!(oinfo->oi_oa->o_valid & OBD_MD_FLID)) { - CERROR("obdo missing FLID valid flag: "LPX64"\n", + CERROR("obdo missing FLID valid flag: "LPX64"\n", oinfo->oi_oa->o_valid); RETURN(-EINVAL); } @@ -190,7 +191,7 @@ static int echo_setattr(struct obd_export *exp, struct obd_info *oinfo, } if (!(oinfo->oi_oa->o_valid & OBD_MD_FLID)) { - CERROR("obdo missing FLID valid flag: "LPX64"\n", + CERROR("obdo missing FLID valid flag: "LPX64"\n", oinfo->oi_oa->o_valid); RETURN(-EINVAL); } @@ -269,7 +270,7 @@ echo_page_debug_check(cfs_page_t *page, obd_id id, int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, struct lustre_capa *unused) { struct obd_device *obd; struct niobuf_local *r = res; @@ -453,7 +454,7 @@ commitrw_cleanup: return rc; } -static int echo_setup(struct obd_device *obd, obd_count len, void *buf) +static int echo_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct lprocfs_static_vars lvars; int rc; @@ -471,9 +472,9 @@ static int echo_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(-ENOMEM); } - rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id, LDLM_PLAIN, - NULL, LCK_NL, &lock_flags, NULL, - ldlm_completion_ast, NULL, NULL, + rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_PLAIN, + NULL, LCK_NL, &lock_flags, NULL, + ldlm_completion_ast, NULL, NULL, 0, NULL, &obd->u.echo.eo_nl_lock); LASSERT (rc == ELDLM_OK); @@ -588,8 +589,8 @@ static int __init obdecho_init(void) if (rc != 0) goto failed_0; - rc = class_register_type(&echo_obd_ops, lvars.module_vars, - LUSTRE_ECHO_NAME); + rc = class_register_type(&echo_obd_ops, NULL, lvars.module_vars, + LUSTRE_ECHO_NAME, NULL); if (rc != 0) goto failed_1; diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 83f4ab9..1dcb3e0 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -92,7 +92,7 @@ echo_copyout_lsm (struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) sizeof(lsm->lsm_oinfo[0]))) return (-EFAULT); } - return (0); + return 0; } static int @@ -114,13 +114,13 @@ echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm, ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL)) return (-EINVAL); + for (i = 0; i < lsm->lsm_stripe_count; i++) { if (copy_from_user(lsm->lsm_oinfo[i], ((struct lov_stripe_md *)ulsm)->lsm_oinfo[i], sizeof(lsm->lsm_oinfo[0]))) return (-EFAULT); } - return (0); } @@ -157,9 +157,9 @@ echo_free_object (struct ec_object *eco) struct echo_client_obd *ec = &obd->u.echo_client; LASSERT (eco->eco_refcount == 0); - if (!eco->eco_lsm) + if (!eco->eco_lsm) CERROR("No object %s\n", obd->obd_name); - else + else obd_free_memmd(ec->ec_exp, &eco->eco_lsm); OBD_FREE (eco, sizeof (*eco)); } @@ -184,7 +184,7 @@ static int echo_create_object(struct obd_device *obd, int on_target, if (ulsm != NULL) { eco = echo_allocate_object (obd); - if (eco == NULL) + if (eco == NULL) return (-ENOMEM); lsm = eco->eco_lsm; @@ -435,9 +435,9 @@ echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp) *offp = offset * stripe_size + woffset % stripe_size; } -static void -echo_client_page_debug_setup(struct lov_stripe_md *lsm, - cfs_page_t *page, int rw, obd_id id, +static void +echo_client_page_debug_setup(struct lov_stripe_md *lsm, + cfs_page_t *page, int rw, obd_id id, obd_off offset, obd_off count) { char *addr; @@ -459,17 +459,16 @@ echo_client_page_debug_setup(struct lov_stripe_md *lsm, stripe_off = 0xdeadbeef00c0ffeeULL; stripe_id = 0xdeadbeef00c0ffeeULL; } - block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE, + block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE, stripe_off, stripe_id); } cfs_kunmap(page); } -static int -echo_client_page_debug_check(struct lov_stripe_md *lsm, - cfs_page_t *page, obd_id id, - obd_off offset, obd_off count) +static int echo_client_page_debug_check(struct lov_stripe_md *lsm, + cfs_page_t *page, obd_id id, + obd_off offset, obd_off count) { obd_off stripe_off; obd_id stripe_id; @@ -488,8 +487,8 @@ echo_client_page_debug_check(struct lov_stripe_md *lsm, stripe_id = id; echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id); - rc2 = block_debug_check("test_brw", - addr + delta, OBD_ECHO_BLOCK_SIZE, + rc2 = block_debug_check("test_brw", + addr + delta, OBD_ECHO_BLOCK_SIZE, stripe_off, stripe_id); if (rc2 != 0) { CERROR ("Error in echo object "LPX64"\n", id); @@ -659,7 +658,7 @@ static int echo_client_ubrw(struct obd_device *obd, int rw, obd_off offset, obd_size count, char *buffer, struct obd_trans_info *oti) { - /* echo_client_ubrw() needs to be ported on 2.6 yet */ +#warning "echo_client_ubrw() needs to be ported on 2.6 yet" LBUG(); return 0; } @@ -735,7 +734,7 @@ static int ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc) eas->eas_oa.o_id != ECHO_PERSISTENT_OBJID && (eas->eas_oa.o_valid & OBD_MD_FLFLAGS) != 0 && (eas->eas_oa.o_flags & OBD_FL_DEBUG_CHECK) != 0) - echo_client_page_debug_check(eas->eas_lsm, eap->eap_page, + echo_client_page_debug_check(eas->eas_lsm, eap->eap_page, eas->eas_oa.o_id, eap->eap_off, CFS_PAGE_SIZE); @@ -831,7 +830,7 @@ static int echo_client_async_page(struct obd_export *exp, int rw, /* sleep until we have a page to send */ spin_unlock(&eas.eas_lock); - rc = wait_event_interruptible(eas.eas_waitq, + rc = wait_event_interruptible(eas.eas_waitq, eas_should_wake(&eas)); spin_lock(&eas.eas_lock); if (rc && !eas.eas_rc) @@ -847,7 +846,7 @@ static int echo_client_async_page(struct obd_export *exp, int rw, /* unbind the eap from its old page offset */ if (eap->eap_cookie != NULL) { - obd_teardown_async_page(exp, lsm, NULL, + obd_teardown_async_page(exp, lsm, NULL, eap->eap_cookie); eap->eap_cookie = NULL; } @@ -867,8 +866,8 @@ static int echo_client_async_page(struct obd_export *exp, int rw, if (oa->o_id != ECHO_PERSISTENT_OBJID && (oa->o_valid & OBD_MD_FLFLAGS) != 0 && (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0) - echo_client_page_debug_setup(lsm, eap->eap_page, rw, - oa->o_id, + echo_client_page_debug_setup(lsm, eap->eap_page, rw, + oa->o_id, eap->eap_off, CFS_PAGE_SIZE); /* always asserts urgent, which isn't quite right */ @@ -884,14 +883,14 @@ static int echo_client_async_page(struct obd_export *exp, int rw, eas.eas_in_flight++; if (eas.eas_next_offset == eas.eas_end_offset) break; - } + } /* still hold the eas_lock here.. */ /* now we just spin waiting for all the rpcs to complete */ while(eas.eas_in_flight) { spin_unlock(&eas.eas_lock); - wait_event_interruptible(eas.eas_waitq, + wait_event_interruptible(eas.eas_waitq, eas.eas_in_flight == 0); spin_lock(&eas.eas_lock); } @@ -918,7 +917,7 @@ out: static int echo_client_prep_commit(struct obd_export *exp, int rw, struct obdo *oa, struct lov_stripe_md *lsm, - obd_off offset, obd_size count, + obd_off offset, obd_size count, obd_size batch, struct obd_trans_info *oti) { struct obd_ioobj ioo; @@ -958,7 +957,8 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw, ioo.ioo_bufcnt = npages; oti->oti_transno = 0; - ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti); + ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti, + NULL); if (ret != 0) GOTO(out, ret); @@ -1239,6 +1239,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, case OBD_IOC_DESTROY: if (!capable (CAP_SYS_ADMIN)) GOTO (out, rc = -EPERM); + rc = echo_get_object (&eco, obd, &data->ioc_obdo1); if (rc == 0) { oa = &data->ioc_obdo1; @@ -1336,7 +1337,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, out: /* XXX this should be in a helper also called by target_send_reply */ - for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4; + for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4; i++, ack_lock++) { if (!ack_lock->mode) break; @@ -1348,10 +1349,8 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, return rc; } -static int -echo_client_setup(struct obd_device *obddev, obd_count len, void *buf) +static int echo_client_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) { - struct lustre_cfg* lcfg = buf; struct echo_client_obd *ec = &obddev->u.echo_client; struct obd_device *tgt; struct lustre_handle conn = {0, }; @@ -1383,10 +1382,11 @@ echo_client_setup(struct obd_device *obddev, obd_count len, void *buf) return -ENOMEM; } - ocd->ocd_connect_flags = OBD_CONNECT_VERSION; + ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL; ocd->ocd_version = LUSTRE_VERSION_CODE; + ocd->ocd_group = FILTER_GROUP_ECHO; - rc = obd_connect(&conn, tgt, &echo_uuid, ocd); + rc = obd_connect(NULL, &conn, tgt, &echo_uuid, ocd); OBD_FREE(ocd, sizeof(*ocd)); @@ -1431,7 +1431,8 @@ static int echo_client_cleanup(struct obd_device *obddev) RETURN(rc); } -static int echo_client_connect(struct lustre_handle *conn, +static int echo_client_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *src, struct obd_uuid *cluuid, struct obd_connect_data *data) { @@ -1499,8 +1500,8 @@ int echo_client_init(void) struct lprocfs_static_vars lvars; lprocfs_init_vars(echo, &lvars); - return class_register_type(&echo_obd_ops, lvars.module_vars, - LUSTRE_ECHO_CLIENT_NAME); + return class_register_type(&echo_obd_ops, NULL, lvars.module_vars, + LUSTRE_ECHO_CLIENT_NAME, NULL); } void echo_client_exit(void) diff --git a/lustre/obdfilter/Makefile.in b/lustre/obdfilter/Makefile.in index 8305eb5..2a15c71 100644 --- a/lustre/obdfilter/Makefile.in +++ b/lustre/obdfilter/Makefile.in @@ -1,12 +1,7 @@ MODULES := obdfilter obdfilter-objs := filter.o filter_io.o filter_log.o -obdfilter-objs += lproc_obdfilter.o filter_lvb.o - -ifeq ($(PATCHLEVEL),4) -obdfilter-objs += filter_io_24.o -else +obdfilter-objs += lproc_obdfilter.o filter_lvb.o filter_capa.o obdfilter-objs += filter_io_26.o -endif # PATCHLEVEL @INCLUDE_RULES@ diff --git a/lustre/obdfilter/autoMakefile.am b/lustre/obdfilter/autoMakefile.am index 5f90afb..ac0c6d1 100644 --- a/lustre/obdfilter/autoMakefile.am +++ b/lustre/obdfilter/autoMakefile.am @@ -8,4 +8,4 @@ modulefs_DATA = obdfilter$(KMODEXT) endif MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ -DIST_SOURCES = $(obdfilter-objs:%.o=%.c) filter_io_24.c filter_io_26.c filter_internal.h +DIST_SOURCES = $(obdfilter-objs:%.o=%.c) filter_io_26.c filter_internal.h diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 6974d87..3e8c928 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -35,7 +35,9 @@ #define DEBUG_SUBSYSTEM S_FILTER +#ifdef HAVE_KERNEL_CONFIG_H #include <linux/config.h> +#endif #include <linux/module.h> #include <linux/fs.h> #include <linux/dcache.h> @@ -62,8 +64,10 @@ #include "filter_internal.h" +/* Group 0 is no longer a legal group, to catch uninitialized IDs */ +#define FILTER_MIN_GROUPS 3 static struct lvfs_callback_ops filter_lvfs_ops; -kmem_cache_t *ll_fmd_cachep; +cfs_mem_cache_t *ll_fmd_cachep; static void filter_commit_cb(struct obd_device *obd, __u64 transno, void *cb_data, int error) @@ -73,7 +77,7 @@ static void filter_commit_cb(struct obd_device *obd, __u64 transno, /* Assumes caller has already pushed us into the kernel context. */ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, - int rc) + int rc, int force_sync) { struct filter_obd *filter = &exp->exp_obd->u.filter; struct filter_export_data *fed = &exp->exp_filter_data; @@ -115,11 +119,18 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, fed->fed_lr_idx, fed->fed_lr_off); err = -EINVAL; } else { - fsfilt_add_journal_cb(exp->exp_obd, last_rcvd, oti->oti_handle, - filter_commit_cb, NULL); + if (!force_sync) + force_sync = fsfilt_add_journal_cb(exp->exp_obd, + last_rcvd, + oti->oti_handle, + filter_commit_cb, + NULL); + err = fsfilt_write_record(exp->exp_obd, filter->fo_rcvd_filp, fcd, sizeof(*fcd), &off, - exp->exp_need_sync); + force_sync | exp->exp_need_sync); + if (force_sync) + filter_commit_cb(exp->exp_obd, last_rcvd, NULL, err); } if (err) { log_pri = D_ERROR; @@ -165,9 +176,9 @@ static int filter_export_stats_init(struct obd_device *obd, if (obd_uuid_equals(&exp->exp_client_uuid, &obd->obd_uuid)) /* Self-export gets no proc entry */ RETURN(0); - + rc = lprocfs_exp_setup(exp); - if (rc) + if (rc) RETURN(rc); /* Create a per export proc entry for brw_stats */ @@ -178,7 +189,7 @@ static int filter_export_stats_init(struct obd_device *obd, brw_entry->data = fed; /* Create a per export proc entry for ops stats */ - num_stats = (sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) + + num_stats = (sizeof(*obd->obd_type->typ_dt_ops) / sizeof(void *)) + LPROC_FILTER_LAST - 1; exp->exp_ops_stats = lprocfs_alloc_stats(num_stats); if (exp->exp_ops_stats == NULL) @@ -203,14 +214,14 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp, struct filter_export_data *fed = &exp->exp_filter_data; unsigned long *bitmap = filter->fo_last_rcvd_slots; int new_client = (cl_idx == -1); - + ENTRY; LASSERT(bitmap != NULL); LASSERTF(cl_idx > -2, "%d\n", cl_idx); /* Self-export */ - if (strcmp(fed->fed_fcd->fcd_uuid, obd->obd_uuid.uuid) == 0) + if (strcmp(fed->fed_fcd->fcd_uuid, obd->obd_uuid.uuid) == 0) RETURN(0); /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so @@ -220,7 +231,7 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp, cl_idx = find_first_zero_bit(bitmap, LR_MAX_CLIENTS); repeat: if (cl_idx >= LR_MAX_CLIENTS) { - CERROR("no room for %u clients - fix LR_MAX_CLIENTS\n", + CERROR("no room for %u client - fix LR_MAX_CLIENTS\n", cl_idx); RETURN(-EOVERFLOW); } @@ -248,8 +259,8 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp, if (new_client) { struct lvfs_run_ctxt saved; loff_t off = fed->fed_lr_off; - void *handle; int rc; + void *handle; CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n", fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd)); @@ -308,7 +319,7 @@ static int filter_client_free(struct obd_export *exp) GOTO(free, 0); CDEBUG(D_INFO, "freeing client at idx %u, offset %lld with UUID '%s'\n", - fed->fed_lr_idx, off, fed->fed_fcd->fcd_uuid); + fed->fed_lr_idx, fed->fed_lr_off, fed->fed_fcd->fcd_uuid); LASSERT(filter->fo_last_rcvd_slots != NULL); @@ -335,8 +346,8 @@ static int filter_client_free(struct obd_export *exp) push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_fcd, sizeof(zero_fcd), &off, - (!exp->exp_libclient || exp->exp_need_sync)); - + (!exp->exp_libclient || + exp->exp_need_sync)); if (rc == 0) /* update server's transno */ filter_update_server_data(obd, filter->fo_rcvd_filp, @@ -473,7 +484,7 @@ struct filter_mod_data *filter_fmd_get(struct obd_export *exp, struct filter_export_data *fed = &exp->exp_filter_data; struct filter_mod_data *found = NULL, *fmd_new = NULL; - OBD_SLAB_ALLOC(fmd_new, ll_fmd_cachep, SLAB_NOFS, sizeof(*fmd_new)); + OBD_SLAB_ALLOC(fmd_new, ll_fmd_cachep, GFP_NOFS, sizeof(*fmd_new)); spin_lock(&fed->fed_lock); found = filter_fmd_find_nolock(&exp->exp_obd->u.filter,fed,objid,group); @@ -571,7 +582,7 @@ int filter_update_server_data(struct obd_device *obd, struct file *filp, le64_to_cpu(fsd->lsd_mount_count)); fsd->lsd_compat14 = fsd->lsd_last_transno; - rc = fsfilt_write_record(obd, filp, fsd, sizeof(*fsd), &off,force_sync); + rc = fsfilt_write_record(obd, filp, fsd, sizeof(*fsd), &off, force_sync); if (rc) CERROR("error writing lr_server_data: rc = %d\n", rc); @@ -604,7 +615,7 @@ int filter_update_last_objid(struct obd_device *obd, obd_gr group, group, rc); RETURN(rc); } - +extern int ost_handle(struct ptlrpc_request *req); /* assumes caller has already in kernel ctxt */ static int filter_init_server_data(struct obd_device *obd, struct file * filp) { @@ -655,17 +666,17 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) GOTO(err_fsd, rc); } if (strcmp(fsd->lsd_uuid, obd->obd_uuid.uuid) != 0) { - LCONSOLE_ERROR("Trying to start OBD %s using the wrong" - " disk %s. Were the /dev/ assignments " - "rearranged?\n", - obd->obd_uuid.uuid, fsd->lsd_uuid); + LCONSOLE_ERROR_MSG(0x134, "Trying to start OBD %s " + "using the wrong disk %s. Were the " + "/dev/ assignments rearranged?\n", + obd->obd_uuid.uuid, fsd->lsd_uuid); GOTO(err_fsd, rc = -EINVAL); } mount_count = le64_to_cpu(fsd->lsd_mount_count); filter->fo_subdir_count = le16_to_cpu(fsd->lsd_subdir_count); /* COMPAT_146 */ /* Assume old last_rcvd format unless I_C_LR is set */ - if (!(fsd->lsd_feature_incompat & + if (!(fsd->lsd_feature_incompat & cpu_to_le32(OBD_INCOMPAT_COMMON_LR))) fsd->lsd_last_transno = fsd->lsd_compat14; /* end COMPAT_146 */ @@ -743,9 +754,11 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) * need to be set up like real exports as filter_connect() does. */ exp = class_new_export(obd, (struct obd_uuid *)fcd->fcd_uuid); + CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 - " srv lr: "LPU64"\n", fcd->fcd_uuid, cl_idx, - last_rcvd, le64_to_cpu(fsd->lsd_last_transno)); + " srv lr: "LPU64" fcd_group %d\n", fcd->fcd_uuid, cl_idx, + last_rcvd, le64_to_cpu(fsd->lsd_last_transno), + le32_to_cpu(fcd->fcd_group)); if (IS_ERR(exp)) { if (PTR_ERR(exp) == -EALREADY) { /* export already exists, zero out this one */ @@ -758,18 +771,17 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) } else { fed = &exp->exp_filter_data; fed->fed_fcd = fcd; + fed->fed_group = le32_to_cpu(fcd->fcd_group); filter_export_stats_init(obd, exp); rc = filter_client_add(obd, exp, cl_idx); - LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */ + /* can't fail for existing client */ + LASSERTF(rc == 0, "rc = %d\n", rc); fcd = NULL; - spin_lock(&exp->exp_lock); - exp->exp_replay_needed = 1; exp->exp_connecting = 0; + exp->exp_in_recovery = 0; spin_unlock(&exp->exp_lock); - - obd->obd_recoverable_clients++; obd->obd_max_recoverable_clients++; class_export_put(exp); } @@ -787,18 +799,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) obd->obd_last_committed = le64_to_cpu(fsd->lsd_last_transno); - if (obd->obd_recoverable_clients) { - CWARN("RECOVERY: service %s, %d recoverable clients, " - "last_rcvd "LPU64"\n", obd->obd_name, - obd->obd_recoverable_clients, - le64_to_cpu(fsd->lsd_last_transno)); - obd->obd_next_recovery_transno = obd->obd_last_committed + 1; - obd->obd_recovering = 1; - obd->obd_recovery_start = CURRENT_SECONDS; - /* Only used for lprocfs_status */ - obd->obd_recovery_end = obd->obd_recovery_start + - OBD_RECOVERY_TIMEOUT; - } + target_recovery_init(obd, ost_handle); out: filter->fo_mount_count = mount_count + 1; @@ -812,7 +813,7 @@ out: RETURN(0); err_client: - class_disconnect_exports(obd); + target_recovery_fini(obd); err_fsd: filter_free_server_data(filter); RETURN(rc); @@ -823,43 +824,48 @@ static int filter_cleanup_groups(struct obd_device *obd) struct filter_obd *filter = &obd->u.filter; struct file *filp; struct dentry *dentry; - int i; + int i, j; ENTRY; if (filter->fo_dentry_O_groups != NULL) { - for (i = 0; i < FILTER_GROUPS; i++) { + for (i = 0; i < filter->fo_group_count; i++) { dentry = filter->fo_dentry_O_groups[i]; if (dentry != NULL) f_dput(dentry); } OBD_FREE(filter->fo_dentry_O_groups, - FILTER_GROUPS * sizeof(*filter->fo_dentry_O_groups)); + filter->fo_group_count * + sizeof(*filter->fo_dentry_O_groups)); filter->fo_dentry_O_groups = NULL; } if (filter->fo_last_objid_files != NULL) { - for (i = 0; i < FILTER_GROUPS; i++) { + for (i = 0; i < filter->fo_group_count; i++) { filp = filter->fo_last_objid_files[i]; if (filp != NULL) filp_close(filp, 0); } OBD_FREE(filter->fo_last_objid_files, - FILTER_GROUPS * sizeof(*filter->fo_last_objid_files)); + filter->fo_group_count * + sizeof(*filter->fo_last_objid_files)); filter->fo_last_objid_files = NULL; } if (filter->fo_dentry_O_sub != NULL) { - for (i = 0; i < filter->fo_subdir_count; i++) { - dentry = filter->fo_dentry_O_sub[i]; - if (dentry != NULL) - f_dput(dentry); + for (i = 0; i < filter->fo_group_count; i++) { + for (j = 0; j < filter->fo_subdir_count; j++) { + dentry = filter->fo_dentry_O_sub[i].dentry[j]; + if (dentry != NULL) + f_dput(dentry); + } } OBD_FREE(filter->fo_dentry_O_sub, - filter->fo_subdir_count * + filter->fo_group_count * sizeof(*filter->fo_dentry_O_sub)); filter->fo_dentry_O_sub = NULL; } if (filter->fo_last_objids != NULL) { OBD_FREE(filter->fo_last_objids, - FILTER_GROUPS * sizeof(*filter->fo_last_objids)); + filter->fo_group_count * + sizeof(*filter->fo_last_objids)); filter->fo_last_objids = NULL; } if (filter->fo_dentry_O != NULL) { @@ -869,13 +875,236 @@ static int filter_cleanup_groups(struct obd_device *obd) RETURN(0); } +static int filter_update_last_group(struct obd_device *obd, int group) +{ + struct filter_obd *filter = &obd->u.filter; + struct file *filp = NULL; + int last_group = 0, rc; + loff_t off = 0; + ENTRY; + + if (group <= filter->fo_committed_group) + RETURN(0); + + filp = filp_open("LAST_GROUP", O_RDWR, 0700); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + filp = NULL; + CERROR("cannot open LAST_GROUP: rc = %d\n", rc); + GOTO(cleanup, rc); + } + + rc = fsfilt_read_record(obd, filp, &last_group, sizeof(__u32), &off); + if (rc) { + CDEBUG(D_INODE, "error reading LAST_GROUP: rc %d\n",rc); + GOTO(cleanup, rc); + } + LASSERT(off == 0 || last_group >= FILTER_MIN_GROUPS); + CDEBUG(D_INODE, "%s: previous %d, new %d\n", + obd->obd_name, last_group, group); + + off = 0; + last_group = group; + /* must be sync: bXXXX */ + rc = fsfilt_write_record(obd, filp, &last_group, sizeof(__u32), &off, 1); + if (rc) { + CDEBUG(D_INODE, "error updating LAST_GROUP: rc %d\n", rc); + GOTO(cleanup, rc); + } + + filter->fo_committed_group = group; +cleanup: + if (filp) + filp_close(filp, 0); + RETURN(rc); +} + +static int filter_read_group_internal(struct obd_device *obd, int group, + int create) +{ + struct filter_obd *filter = &obd->u.filter; + __u64 *new_objids = NULL; + struct filter_subdirs *new_subdirs = NULL, *tmp_subdirs = NULL; + struct dentry **new_groups = NULL; + struct file **new_files = NULL; + struct dentry *dentry; + struct file *filp; + int old_count = filter->fo_group_count, rc, stage = 0, i; + char name[25]; + __u64 last_objid; + loff_t off = 0; + int len = group + 1; + + snprintf(name, 24, "%d", group); + name[24] = '\0'; + + if (!create) { + dentry = ll_lookup_one_len(name, filter->fo_dentry_O, + strlen(name)); + if (IS_ERR(dentry)) { + CERROR("Cannot lookup expected object group %d: %ld\n", + group, PTR_ERR(dentry)); + RETURN(PTR_ERR(dentry)); + } + } else { + dentry = simple_mkdir(filter->fo_dentry_O, name, 0700, 1); + if (IS_ERR(dentry)) { + CERROR("cannot lookup/create O/%s: rc = %ld\n", name, + PTR_ERR(dentry)); + RETURN(PTR_ERR(dentry)); + } + } + stage = 1; + + snprintf(name, 24, "O/%d/LAST_ID", group); + name[24] = '\0'; + filp = filp_open(name, O_CREAT | O_RDWR, 0700); + if (IS_ERR(filp)) { + CERROR("cannot create %s: rc = %ld\n", name, PTR_ERR(filp)); + GOTO(cleanup, rc = PTR_ERR(filp)); + } + stage = 2; + + rc = fsfilt_read_record(obd, filp, &last_objid, sizeof(__u64), &off); + if (rc) { + CDEBUG(D_INODE, "error reading %s: rc %d\n", name, rc); + GOTO(cleanup, rc); + } + + if (filter->fo_subdir_count) { + OBD_ALLOC(tmp_subdirs, sizeof(*tmp_subdirs)); + if (tmp_subdirs == NULL) + GOTO(cleanup, rc = -ENOMEM); + stage = 3; + + for (i = 0; i < filter->fo_subdir_count; i++) { + char dir[20]; + snprintf(dir, sizeof(dir), "d%u", i); + + tmp_subdirs->dentry[i] = simple_mkdir(dentry, dir, 0700, 1); + if (IS_ERR(tmp_subdirs->dentry[i])) { + rc = PTR_ERR(tmp_subdirs->dentry[i]); + CERROR("can't lookup/create O/%d/%s: rc = %d\n", + group, dir, rc); + GOTO(cleanup, rc); + } + + CDEBUG(D_INODE, "got/created O/%d/%s: %p\n", group, dir, + tmp_subdirs->dentry[i]); + } + } + + /* 'group' is an index; we need an array of length 'group + 1' */ + if (group + 1 > old_count) { + OBD_ALLOC(new_objids, len * sizeof(*new_objids)); + OBD_ALLOC(new_subdirs, len * sizeof(*new_subdirs)); + OBD_ALLOC(new_groups, len * sizeof(*new_groups)); + OBD_ALLOC(new_files, len * sizeof(*new_files)); + stage = 4; + if (new_objids == NULL || new_subdirs == NULL || + new_groups == NULL || new_files == NULL) + GOTO(cleanup, rc = -ENOMEM); + + if (old_count) { + memcpy(new_objids, filter->fo_last_objids, + old_count * sizeof(*new_objids)); + memcpy(new_subdirs, filter->fo_dentry_O_sub, + old_count * sizeof(*new_subdirs)); + memcpy(new_groups, filter->fo_dentry_O_groups, + old_count * sizeof(*new_groups)); + memcpy(new_files, filter->fo_last_objid_files, + old_count * sizeof(*new_files)); + + OBD_FREE(filter->fo_last_objids, + old_count * sizeof(*new_objids)); + OBD_FREE(filter->fo_dentry_O_sub, + old_count * sizeof(*new_subdirs)); + OBD_FREE(filter->fo_dentry_O_groups, + old_count * sizeof(*new_groups)); + OBD_FREE(filter->fo_last_objid_files, + old_count * sizeof(*new_files)); + } + filter->fo_last_objids = new_objids; + filter->fo_dentry_O_sub = new_subdirs; + filter->fo_dentry_O_groups = new_groups; + filter->fo_last_objid_files = new_files; + filter->fo_group_count = len; + } + + filter->fo_dentry_O_groups[group] = dentry; + filter->fo_last_objid_files[group] = filp; + if (filter->fo_subdir_count) { + filter->fo_dentry_O_sub[group] = *tmp_subdirs; + OBD_FREE(tmp_subdirs, sizeof(*tmp_subdirs)); + } + + filter_update_last_group(obd, group); + + if (filp->f_dentry->d_inode->i_size == 0) { + filter->fo_last_objids[group] = FILTER_INIT_OBJID; + rc = filter_update_last_objid(obd, group, 1); + RETURN(rc); + } + + filter->fo_last_objids[group] = le64_to_cpu(last_objid); + CDEBUG(D_INODE, "%s: server last_objid group %d: "LPU64"\n", + obd->obd_name, group, last_objid); + RETURN(0); + cleanup: + switch (stage) { + case 4: + if (new_objids != NULL) + OBD_FREE(new_objids, len * sizeof(*new_objids)); + if (new_subdirs != NULL) + OBD_FREE(new_subdirs, len * sizeof(*new_subdirs)); + if (new_groups != NULL) + OBD_FREE(new_groups, len * sizeof(*new_groups)); + if (new_files != NULL) + OBD_FREE(new_files, len * sizeof(*new_files)); + case 3: + if (filter->fo_subdir_count) { + for (i = 0; i < filter->fo_subdir_count; i++) { + if (tmp_subdirs->dentry[i] != NULL) + dput(tmp_subdirs->dentry[i]); + } + OBD_FREE(tmp_subdirs, sizeof(*tmp_subdirs)); + } + case 2: + filp_close(filp, 0); + case 1: + dput(dentry); + } + RETURN(rc); +} + +static int filter_read_groups(struct obd_device *obd, int last_group, + int create) +{ + struct filter_obd *filter = &obd->u.filter; + int old_count, group, rc = 0; + + down(&filter->fo_init_lock); + old_count = filter->fo_group_count; + for (group = old_count; group <= last_group; group++) { + if (group == 0) + continue; /* no group zero */ + + rc = filter_read_group_internal(obd, group, create); + if (rc != 0) + break; + } + up(&filter->fo_init_lock); + return rc; +} + /* FIXME: object groups */ static int filter_prep_groups(struct obd_device *obd) { struct filter_obd *filter = &obd->u.filter; struct dentry *dentry, *O_dentry; struct file *filp; - int i, rc = 0, cleanup_phase = 0; + int last_group, rc = 0, cleanup_phase = 0; + loff_t off = 0; ENTRY; O_dentry = simple_mkdir(current->fs->pwd, "O", 0700, 1); @@ -887,91 +1116,101 @@ static int filter_prep_groups(struct obd_device *obd) } filter->fo_dentry_O = O_dentry; cleanup_phase = 1; /* O_dentry */ - - OBD_ALLOC(filter->fo_last_objids, FILTER_GROUPS * sizeof(__u64)); - if (filter->fo_last_objids == NULL) - GOTO(cleanup, rc = -ENOMEM); - cleanup_phase = 2; /* groups */ - - OBD_ALLOC(filter->fo_dentry_O_groups, FILTER_GROUPS * sizeof(dentry)); - if (filter->fo_dentry_O_groups == NULL) - GOTO(cleanup, rc = -ENOMEM); - OBD_ALLOC(filter->fo_last_objid_files, FILTER_GROUPS * sizeof(filp)); - if (filter->fo_last_objid_files == NULL) - GOTO(cleanup, rc = -ENOMEM); - - for (i = 0; i < FILTER_GROUPS; i++) { - char name[25]; - loff_t off = 0; - sprintf(name, "%d", i); - dentry = simple_mkdir(O_dentry, name, 0700, 1); - CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("cannot lookup/create O/%s: rc = %d\n", - name, rc); - GOTO(cleanup, rc); + /* Lookup "R" to tell if we're on an old OST FS and need to convert + * from O/R/<dir>/<objid> to O/0/<dir>/<objid>. This can be removed + * some time post 1.0 when all old-style OSTs have converted along + * with the init_objid hack. */ + dentry = ll_lookup_one_len("R", O_dentry, 1); + if (IS_ERR(dentry)) + GOTO(cleanup, rc = PTR_ERR(dentry)); + if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { + struct dentry *O0_dentry = lookup_one_len("0", O_dentry, 1); + ENTRY; + + CWARN("converting OST to new object layout\n"); + if (IS_ERR(O0_dentry)) { + rc = PTR_ERR(O0_dentry); + CERROR("error looking up O/0: rc %d\n", rc); + GOTO(cleanup_R, rc); } - filter->fo_dentry_O_groups[i] = dentry; - sprintf(name, "O/%d/LAST_ID", i); - filp = filp_open(name, O_CREAT | O_RDWR, 0700); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - CERROR("cannot create %s: rc = %d\n", name, rc); - GOTO(cleanup, rc); + if (O0_dentry->d_inode) { + CERROR("Both O/R and O/0 exist. Fix manually.\n"); + GOTO(cleanup_O0, rc = -EEXIST); } - filter->fo_last_objid_files[i] = filp; - if (filp->f_dentry->d_inode->i_size == 0) { - filter->fo_last_objids[i] = FILTER_INIT_OBJID; - rc = filter_update_last_objid(obd, i, 1); - if (rc) - GOTO(cleanup, rc); - continue; - } + LOCK_INODE_MUTEX(O_dentry->d_inode); + rc = vfs_rename(O_dentry->d_inode, dentry, + O_dentry->d_inode, O0_dentry); + UNLOCK_INODE_MUTEX(O_dentry->d_inode); - rc = fsfilt_read_record(obd, filp, &filter->fo_last_objids[i], - sizeof(__u64), &off); if (rc) { - CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", - name, rc); - GOTO(cleanup, rc); + CERROR("error renaming O/R to O/0: rc %d\n", rc); + GOTO(cleanup_O0, rc); } - filter->fo_last_objids[i] = - le64_to_cpu(filter->fo_last_objids[i]); - CDEBUG(D_HA, "%s: server last_objid group %d: "LPU64"\n", - obd->obd_name, i, filter->fo_last_objids[i]); + filter->fo_fsd->lsd_feature_incompat |= + cpu_to_le32(OBD_INCOMPAT_GROUPS); + rc = filter_update_server_data(obd, filter->fo_rcvd_filp, + filter->fo_fsd, 1); + GOTO(cleanup_O0, rc); + + cleanup_O0: + f_dput(O0_dentry); + cleanup_R: + f_dput(dentry); + if (rc) + GOTO(cleanup, rc); + } else { + f_dput(dentry); } - if (filter->fo_subdir_count) { - O_dentry = filter->fo_dentry_O_groups[0]; - OBD_ALLOC(filter->fo_dentry_O_sub, - filter->fo_subdir_count * sizeof(dentry)); - if (filter->fo_dentry_O_sub == NULL) - GOTO(cleanup, rc = -ENOMEM); + cleanup_phase = 2; /* groups */ - for (i = 0; i < filter->fo_subdir_count; i++) { - char dir[20]; - snprintf(dir, sizeof(dir), "d%u", i); + /* we have to initialize all groups before first connections from + * clients because they may send create/destroy for any group -bzzz */ + filp = filp_open("LAST_GROUP", O_CREAT | O_RDWR, 0700); + if (IS_ERR(filp)) { + CERROR("cannot create LAST_GROUP: rc = %ld\n", PTR_ERR(filp)); + GOTO(cleanup, rc = PTR_ERR(filp)); + } + cleanup_phase = 3; /* filp */ - dentry = simple_mkdir(O_dentry, dir, 0700, 1); - CDEBUG(D_INODE, "got/created O/0/%s: %p\n", dir,dentry); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("can't lookup/create O/0/%s: rc = %d\n", - dir, rc); - GOTO(cleanup, rc); - } - filter->fo_dentry_O_sub[i] = dentry; - } + rc = fsfilt_read_record(obd, filp, &last_group, sizeof(__u32), &off); + if (rc) { + CDEBUG(D_INODE, "error reading LAST_GROUP: rc %d\n", rc); + GOTO(cleanup, rc); + } + if (off == 0) { + last_group = FILTER_MIN_GROUPS; + } else { + LASSERT(last_group >= FILTER_MIN_GROUPS); } + + CWARN("%s: initialize groups [%d,%d]\n", obd->obd_name, + FILTER_MIN_GROUPS, last_group); + filter->fo_committed_group = last_group; + rc = filter_read_groups(obd, last_group, 1); + if (rc) + GOTO(cleanup, rc); + + filp_close(filp, 0); RETURN(0); cleanup: - filter_cleanup_groups(obd); + switch (cleanup_phase) { + case 3: + filp_close(filp, 0); + case 2: + filter_cleanup_groups(obd); + case 1: + f_dput(filter->fo_dentry_O); + filter->fo_dentry_O = NULL; + default: + break; + } return rc; + } /* setup the object store with correct subdirectories */ @@ -1018,7 +1257,7 @@ static int filter_prep(struct obd_device *obd) rc = PTR_ERR(file); CERROR("OBD filter: cannot open/create %s rc = %d\n", HEALTH_CHECK, rc); - GOTO(err_filp, rc); + GOTO(err_server_data, rc); } filter->fo_health_check_filp = file; if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { @@ -1032,20 +1271,20 @@ static int filter_prep(struct obd_device *obd) rc = filter_prep_groups(obd); if (rc) - GOTO(err_server_data, rc); - out: + GOTO(err_health_check, rc); +out: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); return(rc); - err_server_data: - //class_disconnect_exports(obd, 0); - filter_free_server_data(filter); - err_health_check: +err_health_check: if (filp_close(filter->fo_health_check_filp, 0)) CERROR("can't close %s after error\n", HEALTH_CHECK); filter->fo_health_check_filp = NULL; - err_filp: +err_server_data: + target_recovery_fini(obd); + filter_free_server_data(filter); +err_filp: if (filp_close(filter->fo_rcvd_filp, 0)) CERROR("can't close %s after error\n", LAST_RCVD); filter->fo_rcvd_filp = NULL; @@ -1069,8 +1308,9 @@ static void filter_post(struct obd_device *obd) if (rc) CERROR("error writing server data: rc = %d\n", rc); - for (i = 0; i < FILTER_GROUPS; i++) { - rc = filter_update_last_objid(obd, i, (i == FILTER_GROUPS - 1)); + for (i = 1; i < filter->fo_group_count; i++) { + rc = filter_update_last_objid(obd, i, + (i == filter->fo_group_count - 1)); if (rc) CERROR("error writing group %d lastobjid: rc = %d\n", i, rc); @@ -1089,13 +1329,16 @@ static void filter_post(struct obd_device *obd) filter_cleanup_groups(obd); filter_free_server_data(filter); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + filter_free_capa_keys(filter); + cleanup_capa_hash(filter->fo_capa_hash); } static void filter_set_last_id(struct filter_obd *filter, obd_id id, obd_gr group) { LASSERT(filter->fo_fsd != NULL); - LASSERT(group <= FILTER_GROUPS); + LASSERT(group <= filter->fo_group_count); spin_lock(&filter->fo_objidlock); filter->fo_last_objids[group] = id; @@ -1106,7 +1349,7 @@ obd_id filter_last_id(struct filter_obd *filter, obd_gr group) { obd_id id; LASSERT(filter->fo_fsd != NULL); - LASSERT(group <= FILTER_GROUPS); + LASSERT(group <= filter->fo_group_count); /* FIXME: object groups */ spin_lock(&filter->fo_objidlock); @@ -1126,12 +1369,14 @@ static int filter_lock_dentry(struct obd_device *obd, struct dentry *dparent) struct dentry *filter_parent(struct obd_device *obd, obd_gr group, obd_id objid) { struct filter_obd *filter = &obd->u.filter; - LASSERT(group < FILTER_GROUPS); /* FIXME: object groups */ + struct filter_subdirs *subdirs; + LASSERT(group < filter->fo_group_count); /* FIXME: object groups */ if (group > 0 || filter->fo_subdir_count == 0) return filter->fo_dentry_O_groups[group]; - return filter->fo_dentry_O_sub[objid & (filter->fo_subdir_count - 1)]; + subdirs = &filter->fo_dentry_O_sub[group]; + return subdirs->dentry[objid & (filter->fo_subdir_count - 1)]; } /* We never dget the object parent, so DON'T dput it either */ @@ -1174,13 +1419,12 @@ struct dentry *filter_fid2dentry(struct obd_device *obd, ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT) && - !obd->u.filter.fo_destroy_in_progress) { + obd->u.filter.fo_destroys_in_progress == 0) { /* don't fail lookups for orphan recovery, it causes * later LBUGs when objects still exist during precreate */ CDEBUG(D_INFO, "*** obd_fail_loc=%x ***\n",OBD_FAIL_OST_ENOENT); RETURN(ERR_PTR(-ENOENT)); } - if (id == 0) { CERROR("fatal: invalid object id 0\n"); RETURN(ERR_PTR(-ESTALE)); @@ -1222,17 +1466,18 @@ struct dentry *filter_fid2dentry(struct obd_device *obd, RETURN(dchild); } -static int filter_prepare_destroy(struct obd_device *obd, obd_id objid) +static int filter_prepare_destroy(struct obd_device *obd, obd_id objid, + obd_id group) { struct lustre_handle lockh; int flags = LDLM_AST_DISCARD_DATA, rc; - struct ldlm_res_id res_id = { .name = { objid } }; + struct ldlm_res_id res_id = { .name = { objid, 0, group, 0} }; ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } }; ENTRY; /* Tell the clients that the object is gone now and that they should * throw away any cached pages. */ - rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id, LDLM_EXTENT, + rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_EXTENT, &policy, LCK_PW, &flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, NULL, 0, NULL, &lockh); @@ -1561,6 +1806,7 @@ static int filter_iobuf_pool_init(struct filter_obd *filter) ENTRY; + OBD_ALLOC_GFP(filter->fo_iobuf_pool, OSS_THREADS_MAX * sizeof(*pool), GFP_KERNEL); if (filter->fo_iobuf_pool == NULL) @@ -1602,10 +1848,9 @@ void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti) * 3 = flags: failover=f, failout=n * 4 = mount options */ -int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, +int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, void *option) { - struct lustre_cfg* lcfg = buf; struct filter_obd *filter = &obd->u.filter; struct vfsmount *mnt; struct lustre_mount_info *lmi; @@ -1613,7 +1858,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, __u8 *uuid_ptr; char *str, *label; char ns_name[48]; - int rc; + int rc, i; ENTRY; if (lcfg->lcfg_bufcount < 3 || @@ -1633,11 +1878,11 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, CERROR("Using old MDS mount method\n"); mnt = ll_kern_mount(lustre_cfg_string(lcfg, 2), MS_NOATIME|MS_NODIRATIME, - lustre_cfg_string(lcfg, 1), option); + lustre_cfg_string(lcfg, 1), option); if (IS_ERR(mnt)) { rc = PTR_ERR(mnt); - LCONSOLE_ERROR("Can't mount disk %s (%d)\n", - lustre_cfg_string(lcfg, 1), rc); + LCONSOLE_ERROR_MSG(0x135, "Can't mount disk %s (%d)\n", + lustre_cfg_string(lcfg, 1), rc); RETURN(rc); } @@ -1668,18 +1913,25 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, filter->fo_fstype = mnt->mnt_sb->s_type->name; CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt); + fsfilt_setup(obd, obd->u.obt.obt_sb); + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); obd->obd_lvfs_ctxt.pwdmnt = mnt; obd->obd_lvfs_ctxt.pwd = mnt->mnt_root; obd->obd_lvfs_ctxt.fs = get_ds(); obd->obd_lvfs_ctxt.cb_ops = filter_lvfs_ops; + sema_init(&filter->fo_init_lock, 1); + filter->fo_committed_group = 0; + rc = filter_prep(obd); if (rc) GOTO(err_ops, rc); - filter->fo_destroy_in_progress = 0; - sema_init(&filter->fo_create_lock, 1); + filter->fo_destroys_in_progress = 0; + for (i = 0; i < 32; i++) + sema_init(&filter->fo_create_locks[i], 1); + spin_lock_init(&filter->fo_translock); spin_lock_init(&filter->fo_objidlock); INIT_LIST_HEAD(&filter->fo_export_list); @@ -1689,6 +1941,15 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, filter->fo_fmd_max_num = FILTER_FMD_MAX_NUM_DEFAULT; filter->fo_fmd_max_age = FILTER_FMD_MAX_AGE_DEFAULT; + INIT_LIST_HEAD(&filter->fo_llog_list); + spin_lock_init(&filter->fo_llog_list_lock); + + filter->fo_fl_oss_capa = 0; + INIT_LIST_HEAD(&filter->fo_capa_keys); + filter->fo_capa_hash = init_capa_hash(); + if (filter->fo_capa_hash == NULL) + GOTO(err_ops, rc = -ENOMEM); + sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid); obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER); if (obd->obd_namespace == NULL) @@ -1700,7 +1961,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "filter_ldlm_cb_client", &obd->obd_ldlm_client); - rc = llog_cat_initialize(obd, 1, NULL); + rc = llog_cat_initialize(obd, NULL, 1, NULL); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_post, rc); @@ -1717,7 +1978,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, } else { str = "no UUID"; } - + label = fsfilt_get_label(obd, obd->u.obt.obt_sb); if (obd->obd_recovering) { @@ -1729,8 +1990,8 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, "/proc/fs/lustre/obdfilter/%s/recovery_status.\n", obd->obd_name, lustre_cfg_string(lcfg, 1), label ?: "", label ? "/" : "", str, - obd->obd_recoverable_clients, - (obd->obd_recoverable_clients == 1) + obd->obd_max_recoverable_clients, + (obd->obd_max_recoverable_clients == 1) ? "client" : "clients", (int)(OBD_RECOVERY_TIMEOUT) / 60, (int)(OBD_RECOVERY_TIMEOUT) % 60, @@ -1755,10 +2016,9 @@ err_mntput: return rc; } -static int filter_setup(struct obd_device *obd, obd_count len, void *buf) +static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg) { struct lprocfs_static_vars lvars; - struct lustre_cfg* lcfg = buf; unsigned long page; int rc; @@ -1773,13 +2033,10 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) if (!page) RETURN(-ENOMEM); - memcpy((void *)page, lustre_cfg_buf(lcfg, 4), - LUSTRE_CFG_BUFLEN(lcfg, 4)); - rc = filter_common_setup(obd, len, buf, (void *)page); - free_page(page); - + /* lprocfs must be setup before the filter so state can be safely added + * to /proc incrementally as the filter is setup */ lprocfs_init_vars(filter, &lvars); - if (rc == 0 && lprocfs_obd_setup(obd, lvars.obd_vars) == 0 && + if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 && lprocfs_alloc_obd_stats(obd, LPROC_FILTER_LAST) == 0) { /* Init obdfilter private stats here */ lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_READ_BYTES, @@ -1788,11 +2045,22 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes"); + lproc_filter_attach_seqstat(obd); obd->obd_proc_exports = proc_mkdir("exports", obd->obd_proc_entry); } + memcpy((void *)page, lustre_cfg_buf(lcfg, 4), + LUSTRE_CFG_BUFLEN(lcfg, 4)); + rc = filter_common_setup(obd, lcfg, (void *)page); + free_page(page); + + if (rc) { + lprocfs_obd_cleanup(obd); + lprocfs_free_obd_stats(obd); + } + return rc; } @@ -1803,8 +2071,9 @@ static struct llog_operations filter_size_orig_logops = { lop_add: llog_obd_origin_add }; -static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *catid, +static int filter_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, + struct llog_catid *catid, struct obd_uuid *uuid) { struct llog_ctxt *ctxt; @@ -1816,20 +2085,59 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, filter_mds_ost_repl_logops.lop_connect = llog_repl_connect; filter_mds_ost_repl_logops.lop_sync = llog_obd_repl_sync; - rc = llog_setup(obd, LLOG_MDS_OST_REPL_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_MDS_OST_REPL_CTXT, tgt, 0, NULL, &filter_mds_ost_repl_logops); if (rc) RETURN(rc); /* FIXME - assign unlink_cb for filter's recovery */ - ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); + if (!llogs) + ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); + else + ctxt = llog_get_context_from_llogs(llogs, LLOG_MDS_OST_REPL_CTXT); + + LASSERT(ctxt != NULL); ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb; - rc = llog_setup(obd, LLOG_SIZE_ORIG_CTXT, tgt, 0, NULL, + rc = llog_setup(obd, llogs, LLOG_SIZE_ORIG_CTXT, tgt, 0, NULL, &filter_size_orig_logops); RETURN(rc); } +static int filter_group_llog_cleanup(struct llog_ctxt *ctxt) +{ + int rc = 0; + ENTRY; + + if (CTXTP(ctxt, cleanup)) + rc = CTXTP(ctxt, cleanup)(ctxt); + + if (ctxt->loc_exp) + class_export_put(ctxt->loc_exp); + OBD_FREE(ctxt, sizeof(*ctxt)); + + RETURN(rc); +} + +static int filter_group_llog_finish(struct obd_llogs *llogs) +{ + struct llog_ctxt *ctxt; + int rc = 0, rc2 = 0; + ENTRY; + + ctxt = llog_get_context_from_llogs(llogs, LLOG_MDS_OST_REPL_CTXT); + if (ctxt) + rc = filter_group_llog_cleanup(ctxt); + + ctxt = llog_get_context_from_llogs(llogs, LLOG_SIZE_ORIG_CTXT); + if (ctxt) + rc2 = filter_group_llog_cleanup(ctxt); + if (!rc) + rc = rc2; + + RETURN(rc); +} + static int filter_llog_finish(struct obd_device *obd, int count) { struct llog_ctxt *ctxt; @@ -1849,6 +2157,134 @@ static int filter_llog_finish(struct obd_device *obd, int count) RETURN(rc); } +struct obd_llogs *filter_grab_llog_for_group(struct obd_device *obd, int group, + struct obd_export *export) +{ + struct filter_group_llog *fglog, *nlog; + struct filter_obd *filter; + struct llog_ctxt *ctxt; + struct list_head *cur; + int rc; + + filter = &obd->u.filter; + + spin_lock(&filter->fo_llog_list_lock); + list_for_each(cur, &filter->fo_llog_list) { + fglog = list_entry(cur, struct filter_group_llog, list); + if (fglog->group == group) { + if (!(fglog->exp == NULL || fglog->exp == export || export == NULL)) + CWARN("%s: export for group %d changes: 0x%p -> 0x%p\n", + obd->obd_name, group, fglog->exp, export); + spin_unlock(&filter->fo_llog_list_lock); + goto init; + } + } + spin_unlock(&filter->fo_llog_list_lock); + + if (export == NULL) + RETURN(NULL); + + OBD_ALLOC_PTR(fglog); + if (fglog == NULL) + RETURN(NULL); + fglog->group = group; + + OBD_ALLOC_PTR(fglog->llogs); + if (fglog->llogs == NULL) { + OBD_FREE_PTR(fglog); + RETURN(NULL); + } + + spin_lock(&filter->fo_llog_list_lock); + list_for_each(cur, &filter->fo_llog_list) { + nlog = list_entry(cur, struct filter_group_llog, list); + LASSERT(nlog->group != group); + } + list_add(&fglog->list, &filter->fo_llog_list); + spin_unlock(&filter->fo_llog_list_lock); + + rc = llog_cat_initialize(obd, fglog->llogs, 1, NULL); + if (rc) { + OBD_FREE_PTR(fglog->llogs); + OBD_FREE_PTR(fglog); + RETURN(NULL); + } + +init: + if (export) { + fglog->exp = export; + ctxt = llog_get_context_from_llogs(fglog->llogs, + LLOG_MDS_OST_REPL_CTXT); + LASSERT(ctxt != NULL); + + llog_receptor_accept(ctxt, export->exp_imp_reverse); + } + CDEBUG(D_OTHER, "%s: new llog 0x%p for group %u\n", + obd->obd_name, fglog->llogs, group); + + RETURN(fglog->llogs); +} + +static int filter_llog_connect(struct obd_export *exp, + struct llogd_conn_body *body) +{ + struct obd_device *obd = exp->exp_obd; + struct llog_ctxt *ctxt; + struct obd_llogs *llog; + int rc; + ENTRY; + + CDEBUG(D_OTHER, "handle connect for %s: %u/%u/%u\n", obd->obd_name, + (unsigned) body->lgdc_logid.lgl_ogr, + (unsigned) body->lgdc_logid.lgl_oid, + (unsigned) body->lgdc_logid.lgl_ogen); + + llog = filter_grab_llog_for_group(obd, body->lgdc_logid.lgl_ogr, exp); + LASSERT(llog != NULL); + ctxt = llog_get_context_from_llogs(llog, body->lgdc_ctxt_idx); + LASSERTF(ctxt != NULL, "ctxt is not null, ctxt idx %d \n", + body->lgdc_ctxt_idx); + rc = llog_connect(ctxt, 1, &body->lgdc_logid, + &body->lgdc_gen, NULL); + if (rc != 0) + CERROR("failed to connect rc %d idx %d\n", rc, + body->lgdc_ctxt_idx); + + RETURN(rc); +} + +static int filter_llog_preclean (struct obd_device *obd) +{ + struct filter_group_llog *log; + struct filter_obd *filter; + int rc = 0; + ENTRY; + + filter = &obd->u.filter; + spin_lock(&filter->fo_llog_list_lock); + while (!list_empty(&filter->fo_llog_list)) { + log = list_entry(filter->fo_llog_list.next, + struct filter_group_llog, list); + list_del(&log->list); + spin_unlock(&filter->fo_llog_list_lock); + + rc = filter_group_llog_finish(log->llogs); + if (rc) + CERROR("failed to cleanup llogging subsystem for %u\n", + log->group); + OBD_FREE_PTR(log->llogs); + OBD_FREE_PTR(log); + spin_lock(&filter->fo_llog_list_lock); + } + spin_unlock(&filter->fo_llog_list_lock); + + rc = obd_llog_finish(obd, 0); + if (rc) + CERROR("failed to cleanup llogging subsystem\n"); + + RETURN(rc); +} + static int filter_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) { @@ -1862,7 +2298,7 @@ static int filter_precleanup(struct obd_device *obd, target_cleanup_recovery(obd); break; case OBD_CLEANUP_SELF_EXP: - rc = filter_llog_finish(obd, 0); + rc = filter_llog_preclean(obd); break; case OBD_CLEANUP_OBD: break; @@ -1890,9 +2326,12 @@ static int filter_cleanup(struct obd_device *obd) lprocfs_obd_cleanup(obd); lprocfs_free_obd_stats(obd); - lquota_cleanup(filter_quota_interface_ref, obd); + /* Stop recovery before namespace cleanup. */ + target_stop_recovery_thread(obd); + target_cleanup_recovery(obd); + ldlm_namespace_free(obd->obd_namespace, obd->obd_force); if (obd->u.obt.obt_sb == NULL) @@ -1933,12 +2372,14 @@ static int filter_connect_internal(struct obd_export *exp, data->ocd_version = LUSTRE_VERSION_CODE; if (exp->exp_connect_flags & OBD_CONNECT_GRANT) { + struct filter_export_data *fed = &exp->exp_filter_data; obd_size left, want; spin_lock(&exp->exp_obd->obd_osfs_lock); left = filter_grant_space_left(exp); want = data->ocd_grant; - data->ocd_grant = filter_grant(exp, 0, want, left); + filter_grant(exp, fed->fed_grant, want, left); + data->ocd_grant = fed->fed_grant; spin_unlock(&exp->exp_obd->obd_osfs_lock); CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: " @@ -1951,7 +2392,7 @@ static int filter_connect_internal(struct obd_export *exp, struct filter_obd *filter = &exp->exp_obd->u.filter; struct lr_server_data *lsd = filter->fo_fsd; int index = le32_to_cpu(lsd->lsd_ost_index); - + if (!(lsd->lsd_feature_compat & cpu_to_le32(OBD_COMPAT_OST))) { /* this will only happen on the first connect */ @@ -1960,11 +2401,12 @@ static int filter_connect_internal(struct obd_export *exp, filter_update_server_data(exp->exp_obd, filter->fo_rcvd_filp, lsd, 1); } else if (index != data->ocd_index) { - LCONSOLE_ERROR("Connection from %s to index %u doesn't " - "match actual OST index %u in last_rcvd " - "file, bad configuration?\n", - obd_export_nid2str(exp), index, - data->ocd_index); + LCONSOLE_ERROR_MSG(0x136, "Connection from %s to index" + " %u doesn't match actual OST index" + " %u in last_rcvd file, bad " + "configuration?\n", + obd_export_nid2str(exp), index, + data->ocd_index); RETURN(-EBADF); } } @@ -1973,8 +2415,8 @@ static int filter_connect_internal(struct obd_export *exp, data->ocd_brw_size = 65536; } else if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) { data->ocd_brw_size = min(data->ocd_brw_size, - (__u32)(PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT)); - LASSERT(data->ocd_brw_size); + (__u32)(PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT)); + LASSERT(data->ocd_brw_size); } /* FIXME: Do the same with the MDS UUID and fsd_peeruuid. @@ -2001,13 +2443,16 @@ static int filter_reconnect(struct obd_export *exp, struct obd_device *obd, } /* nearly identical to mds_connect */ -static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, +static int filter_connect(const struct lu_env *env, + struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) { + struct lvfs_run_ctxt saved; struct obd_export *exp; struct filter_export_data *fed; struct filter_client_data *fcd = NULL; + __u32 group; int rc; ENTRY; @@ -2027,20 +2472,42 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, GOTO(cleanup, rc); filter_export_stats_init(obd, exp); + group = data->ocd_group; + if (obd->obd_replayable) { + OBD_ALLOC(fcd, sizeof(*fcd)); + if (!fcd) { + CERROR("filter: out of memory for client data\n"); + GOTO(cleanup, rc = -ENOMEM); + } - if (!obd->obd_replayable) - GOTO(cleanup, rc = 0); - - OBD_ALLOC(fcd, sizeof(*fcd)); - if (!fcd) { - CERROR("filter: out of memory for client data\n"); - GOTO(cleanup, rc = -ENOMEM); + memcpy(fcd->fcd_uuid, cluuid, sizeof(fcd->fcd_uuid)); + fed->fed_fcd = fcd; + fed->fed_fcd->fcd_group = group; + rc = filter_client_add(obd, exp, -1); + if (rc) + GOTO(cleanup, rc); } + CWARN("%s: Received MDS connection ("LPX64"); group %d\n", + obd->obd_name, exp->exp_handle.h_cookie, group); + if (group == 0) + GOTO(cleanup, rc); - memcpy(fcd->fcd_uuid, cluuid, sizeof(fcd->fcd_uuid)); - fed->fed_fcd = fcd; + if (fed->fed_group != 0 && fed->fed_group != group) { + CERROR("!!! This export (nid %s) used object group %d " + "earlier; now it's trying to use group %d! This could " + "be a bug in the MDS. Tell CFS.\n", + obd_export_nid2str(exp), fed->fed_group, group); + GOTO(cleanup, rc = -EPROTO); + } + fed->fed_group = group; - rc = filter_client_add(obd, exp, -1); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = filter_read_groups(obd, group, 1); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + if (rc != 0) { + CERROR("can't read group %u\n", group); + GOTO(cleanup, rc); + } GOTO(cleanup, rc); @@ -2072,6 +2539,11 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) if (list_empty(&obd->obd_exports)) return; + /* We don't want to do this for large machines that do lots of + mounts or unmounts. It burns... */ + if (obd->obd_num_exports > 100) + return; + spin_lock(&obd->obd_osfs_lock); spin_lock(&obd->obd_dev_lock); list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { @@ -2195,12 +2667,57 @@ static int filter_destroy_export(struct obd_export *exp) RETURN(0); } +static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp) +{ + struct filter_group_llog *fglog, *nlog; + struct filter_obd *filter; + int worked = 0, group; + struct llog_ctxt *ctxt; + ENTRY; + + filter = &obd->u.filter; + + /* we can't sync log holding spinlock. also, we do not want to get + * into livelock. so we do following: loop over MDS's exports in + * group order and skip already synced llogs -bzzz */ + do { + /* look for group with min. number, but > worked */ + fglog = NULL; + group = 1 << 30; + spin_lock(&filter->fo_llog_list_lock); + list_for_each_entry(nlog, &filter->fo_llog_list, list) { + if (nlog->group <= worked) { + /* this group is already synced */ + continue; + } + if (group < nlog->group) { + /* we have group with smaller number to sync */ + continue; + } + /* store current minimal group */ + fglog = nlog; + group = nlog->group; + } + spin_unlock(&filter->fo_llog_list_lock); + + if (fglog == NULL) + break; + + worked = fglog->group; + if (fglog->exp && (dexp == fglog->exp || dexp == NULL)) { + ctxt = llog_get_context_from_llogs(fglog->llogs, + LLOG_MDS_OST_REPL_CTXT); + LASSERT(ctxt != NULL); + llog_sync(ctxt, fglog->exp); + } + } while (fglog != NULL); +} + /* also incredibly similar to mds_disconnect */ static int filter_disconnect(struct obd_export *exp) { struct obd_device *obd = exp->exp_obd; - struct llog_ctxt *ctxt; - int rc, err; + int rc; ENTRY; LASSERT(exp); @@ -2215,16 +2732,28 @@ static int filter_disconnect(struct obd_export *exp) if (exp->exp_obd->obd_namespace != NULL) ldlm_cancel_locks_for_export(exp); - /* flush any remaining cancel messages out to the target */ - ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); - err = llog_sync(ctxt, exp); - if (err) - CERROR("error flushing logs to MDS: rc %d\n", err); + fsfilt_sync(obd, obd->u.obt.obt_sb); + /* flush any remaining cancel messages out to the target */ + filter_sync_llogs(obd, exp); class_export_put(exp); RETURN(rc); } +/* reverse import is changed, sync all cancels */ +static void filter_revimp_update(struct obd_export *exp) +{ + ENTRY; + + LASSERT(exp); + class_export_get(exp); + + /* flush any remaining cancel messages out to the target */ + filter_sync_llogs(exp->exp_obd, exp); + class_export_put(exp); + EXIT; +} + static int filter_ping(struct obd_export *exp) { filter_fmd_expire(exp); @@ -2236,11 +2765,12 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa, const char *what, int quiet) { struct dentry *dchild = NULL; + obd_gr group = 0; - if (!(oa->o_valid & OBD_MD_FLGROUP)) - oa->o_gr = 0; + if (oa->o_valid & OBD_MD_FLGROUP) + group = oa->o_gr; - dchild = filter_fid2dentry(obd, NULL, oa->o_gr, oa->o_id); + dchild = filter_fid2dentry(obd, NULL, group, oa->o_id); if (IS_ERR(dchild)) { CERROR("%s error looking up object: "LPU64"\n", @@ -2266,6 +2796,11 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo) int rc = 0; ENTRY; + rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo), + oinfo_capa(oinfo), CAPA_OPC_META_READ); + if (rc) + RETURN(rc); + obd = class_exp2obd(exp); if (obd == NULL) { CDEBUG(D_IOCTL, "invalid client export %p\n", exp); @@ -2298,7 +2833,6 @@ int filter_update_fidea(struct obd_export *exp, struct inode *inode, if (!(oa->o_valid & OBD_MD_FLGROUP)) oa->o_gr = 0; - /* packing fid and converting it to LE for storing into EA. * Here ->o_stripe_idx should be filled by LOV and rest of * fields - by client. */ @@ -2329,7 +2863,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, unsigned int orig_ids[MAXQUOTAS] = {0, 0}; struct llog_cookie *fcc = NULL; struct filter_obd *filter; - int rc, err, locked = 0; + int rc, err, locked = 0, sync = 0; unsigned int ia_valid; struct inode *inode; struct iattr iattr; @@ -2387,7 +2921,6 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, orig_ids[GRPQUOTA] = inode->i_gid; handle = fsfilt_start_log(exp->exp_obd, inode, FSFILT_OP_SETATTR, oti, 1); - if (IS_ERR(handle)) GOTO(out_unlock, rc = PTR_ERR(handle)); @@ -2399,28 +2932,19 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, } else { handle = fsfilt_start(exp->exp_obd, inode, FSFILT_OP_SETATTR, oti); - if (IS_ERR(handle)) GOTO(out_unlock, rc = PTR_ERR(handle)); } - if (oa->o_valid & OBD_MD_FLFLAGS) { rc = fsfilt_iocontrol(exp->exp_obd, inode, NULL, EXT3_IOC_SETFLAGS, (long)&oa->o_flags); } else { rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1); - if (fcc != NULL) { + if (fcc != NULL) /* set cancel cookie callback function */ - if (fsfilt_add_journal_cb(exp->exp_obd, 0, handle, - filter_cancel_cookies_cb, - fcc)) { - spin_lock(&exp->exp_lock); - exp->exp_need_sync = 1; - spin_unlock(&exp->exp_lock); - } else { - fcc = NULL; - } - } + sync = fsfilt_add_journal_cb(exp->exp_obd, 0, handle, + filter_cancel_cookies_cb, + fcc); } if (OBD_FAIL_CHECK(OBD_FAIL_OST_SETATTR_CREDITS)) @@ -2429,13 +2953,20 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, /* The truncate might have used up our transaction credits. Make * sure we have one left for the last_rcvd update. */ err = fsfilt_extend(exp->exp_obd, inode, 1, handle); - rc = filter_finish_transno(exp, oti, rc); + + rc = filter_finish_transno(exp, oti, rc, sync); + if (sync) { + filter_cancel_cookies_cb(exp->exp_obd, 0, fcc, rc); + fcc = NULL; + } err = fsfilt_commit(exp->exp_obd, inode, handle, 0); if (err) { CERROR("error on commit, err = %d\n", err); if (!rc) rc = err; + } else { + fcc = NULL; } if (locked) { @@ -2461,7 +2992,8 @@ out_unlock: /* trigger quota release */ if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) { unsigned int cur_ids[MAXQUOTAS] = {oa->o_uid, oa->o_gid}; - int rc2 = lquota_adjust(filter_quota_interface_ref, exp->exp_obd, cur_ids, + int rc2 = lquota_adjust(filter_quota_interface_ref, + exp->exp_obd, cur_ids, orig_ids, rc, FSFILT_OP_SETATTR); CDEBUG(rc2 ? D_ERROR : D_QUOTA, "filter adjust qunit. (rc:%d)\n", rc2); @@ -2473,7 +3005,8 @@ out_unlock: int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, struct obd_trans_info *oti) { - struct ldlm_res_id res_id = { .name = { oinfo->oi_oa->o_id } }; + struct ldlm_res_id res_id = { .name = { oinfo->oi_oa->o_id, 0, + oinfo->oi_oa->o_gr, 0 } }; struct ldlm_valblock_ops *ns_lvbo; struct filter_mod_data *fmd; struct lvfs_run_ctxt saved; @@ -2483,6 +3016,11 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, int rc; ENTRY; + rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo), + oinfo_capa(oinfo), CAPA_OPC_META_WRITE); + if (rc) + RETURN(rc); + dentry = __filter_oa2dentry(exp->exp_obd, oinfo->oi_oa, __FUNCTION__, 1); if (IS_ERR(dentry)) @@ -2506,7 +3044,7 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, GOTO(out_unlock, rc); res = ldlm_resource_get(exp->exp_obd->obd_namespace, NULL, - res_id, LDLM_EXTENT, 0); + &res_id, LDLM_EXTENT, 0); if (res != NULL) { ns_lvbo = res->lr_namespace->ns_lvbo; @@ -2565,6 +3103,7 @@ static int filter_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, OBD_ALLOC(*lsmp, lsm_size); if (*lsmp == NULL) RETURN(-ENOMEM); + OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo)); if ((*lsmp)->lsm_oinfo[0] == NULL) { OBD_FREE(*lsmp, lsm_size); @@ -2584,7 +3123,7 @@ static int filter_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, RETURN(lsm_size); } -/* caller must hold fo_create_lock */ +/* caller must hold fo_create_locks[oa->o_gr] */ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, struct filter_obd *filter) { @@ -2594,26 +3133,27 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, ENTRY; LASSERT(oa); - LASSERT(down_trylock(&filter->fo_create_lock) != 0); + LASSERT(oa->o_gr != 0); + LASSERT(oa->o_valid & OBD_MD_FLGROUP); + LASSERT(down_trylock(&filter->fo_create_locks[oa->o_gr]) != 0); memset(&doa, 0, sizeof(doa)); - if (oa->o_valid & OBD_MD_FLGROUP) { - doa.o_valid |= OBD_MD_FLGROUP; - doa.o_gr = oa->o_gr; - } else { - doa.o_gr = 0; - } + + doa.o_valid |= OBD_MD_FLGROUP; + doa.o_gr = oa->o_gr; doa.o_mode = S_IFREG; - if (!filter->fo_destroy_in_progress) { - CERROR("%s: destroy_in_progress already cleared\n", - exp->exp_obd->obd_name); + if (!test_bit(doa.o_gr, &filter->fo_destroys_in_progress)) { + CERROR("%s:["LPU64"] destroys_in_progress already cleared\n", + exp->exp_obd->obd_name, doa.o_gr); RETURN(0); } last = filter_last_id(filter, doa.o_gr); + CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n", exp->exp_obd->obd_name, oa->o_id + 1, last); + for (id = last; id > oa->o_id; id--) { doa.o_id = id; rc = filter_destroy(exp, &doa, NULL, NULL, NULL); @@ -2631,7 +3171,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, exp->exp_obd->obd_name, doa.o_gr, oa->o_id); rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1); - filter->fo_destroy_in_progress = 0; + clear_bit(doa.o_gr, &filter->fo_destroys_in_progress); RETURN(rc); } @@ -2654,10 +3194,15 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa, obd->obd_name); RETURN(0); } - /* This causes inflight precreates to abort and drop lock */ - filter->fo_destroy_in_progress = 1; - down(&filter->fo_create_lock); + set_bit(group, &filter->fo_destroys_in_progress); + down(&filter->fo_create_locks[group]); + if (!test_bit(group, &filter->fo_destroys_in_progress)) { + CERROR("%s:["LPU64"] destroys_in_progress already cleared\n", + exp->exp_obd->obd_name, group); + up(&filter->fo_create_locks[group]); + RETURN(0); + } diff = oa->o_id - filter_last_id(filter, group); CDEBUG(D_HA, "filter_last_id() = "LPU64" -> diff = %d\n", filter_last_id(filter, group), diff); @@ -2676,18 +3221,18 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa, "orphans were deleted\n", obd->obd_name); GOTO(out, rc); } else { - /*XXX used by MDS for the first time! */ - filter->fo_destroy_in_progress = 0; + /* XXX: Used by MDS for the first time! */ + clear_bit(group, &filter->fo_destroys_in_progress); } } else { - down(&filter->fo_create_lock); + down(&filter->fo_create_locks[group]); if (oti->oti_conn_cnt < exp->exp_conn_cnt) { CERROR("%s: dropping old precreate request\n", obd->obd_name); GOTO(out, rc = 0); } /* only precreate if group == 0 and o_id is specfied */ - if (group != 0 || oa->o_id == 0) + if (group < FILTER_GROUP_MDS0 || oa->o_id == 0) diff = 1; else diff = oa->o_id - filter_last_id(filter, group); @@ -2702,13 +3247,14 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa, oa->o_id = filter_last_id(&obd->u.filter, group); rc = filter_precreate(obd, oa, group, &diff); oa->o_id = filter_last_id(&obd->u.filter, group); - oa->o_valid = OBD_MD_FLID; + oa->o_gr = group; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; GOTO(out, rc); } /* else diff == 0 */ GOTO(out, rc = 0); out: - up(&filter->fo_create_lock); + up(&filter->fo_create_locks[group]); return rc; } @@ -2757,13 +3303,14 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, * thread 2: creates object (x + 1) * thread 1: tries to create object x, gets -ENOSPC * - * Caller must hold fo_create_lock + * Caller must hold fo_create_locks[group] */ static int filter_precreate(struct obd_device *obd, struct obdo *oa, obd_gr group, int *num) { struct dentry *dchild = NULL, *dparent = NULL; struct filter_obd *filter; + struct obd_statfs *osfs; int err = 0, rc = 0, recreate_obj = 0, i; unsigned long enough_time = jiffies + min(obd_timeout * HZ / 4, 10U*HZ); obd_id next_id; @@ -2772,14 +3319,12 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, filter = &obd->u.filter; - LASSERT(down_trylock(&filter->fo_create_lock) != 0); + LASSERT(down_trylock(&filter->fo_create_locks[group]) != 0); if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_RECREATE_OBJS)) { recreate_obj = 1; } else { - struct obd_statfs *osfs; - OBD_ALLOC(osfs, sizeof(*osfs)); if (osfs == NULL) RETURN(-ENOMEM); @@ -2802,13 +3347,13 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, for (i = 0; i < *num && err == 0; i++) { int cleanup_phase = 0; - if (filter->fo_destroy_in_progress) { - CWARN("%s: precreate aborted by destroy\n", + if (test_bit(group, &filter->fo_destroys_in_progress)) { + CWARN("%s: create aborted by destroy\n", obd->obd_name); rc = -EAGAIN; break; } - + if (recreate_obj) { __u64 last_id; next_id = oa->o_id; @@ -2827,12 +3372,12 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, dparent = filter_parent_lock(obd, group, next_id); if (IS_ERR(dparent)) GOTO(cleanup, rc = PTR_ERR(dparent)); - cleanup_phase = 1; /* filter_parent_unlock(dparent) */ + cleanup_phase = 1; /* filter_parent_unlock(dparent) */ dchild = filter_fid2dentry(obd, dparent, group, next_id); if (IS_ERR(dchild)) GOTO(cleanup, rc = PTR_ERR(dchild)); - cleanup_phase = 2; /* f_dput(dchild) */ + cleanup_phase = 2; /* f_dput(dchild) */ if (dchild->d_inode != NULL) { /* This would only happen if lastobjid was bad on disk*/ @@ -2904,8 +3449,8 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, } *num = i; - CDEBUG(D_HA, "%s: created %d objects for group "LPU64": "LPU64" rc %d\n", - obd->obd_name, i, group, filter->fo_last_objids[group], rc); + CDEBUG(D_HA, "%s: created %d objects for group "LPU64": "LPU64"\n", + obd->obd_name, i, group, filter->fo_last_objids[group]); RETURN(rc); } @@ -2913,14 +3458,31 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, static int filter_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { + struct filter_export_data *fed; struct obd_device *obd = NULL; + struct filter_obd *filter; struct lvfs_run_ctxt saved; struct lov_stripe_md *lsm = NULL; - int rc = 0; + int rc = 0, diff, group = oa->o_gr; ENTRY; - if (!(oa->o_valid & OBD_MD_FLGROUP)) - oa->o_gr = 0; + if (!(oa->o_valid & OBD_MD_FLGROUP) || group == 0) { + CERROR("!!! nid %s sent invalid object group %d\n", + obd_export_nid2str(exp), group); + RETURN(-EINVAL); + } + + obd = exp->exp_obd; + fed = &exp->exp_filter_data; + filter = &obd->u.filter; + + if (fed->fed_group != group) { + CERROR("!!! this export (nid %s) used object group %d " + "earlier; now it's trying to use group %d! This could " + "be a bug in the MDS. Tell CFS.\n", + obd_export_nid2str(exp), fed->fed_group, group); + RETURN(-ENOTUNIQ); + } CDEBUG(D_INFO, "filter_create(od->o_gr="LPU64",od->o_id="LPU64")\n", oa->o_gr, oa->o_id); @@ -2938,18 +3500,16 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_RECREATE_OBJS)) { - if (oa->o_id > filter_last_id(&obd->u.filter, oa->o_gr)) { + if (oa->o_id > filter_last_id(filter, oa->o_gr)) { CERROR("recreate objid "LPU64" > last id "LPU64"\n", - oa->o_id, filter_last_id(&obd->u.filter, + oa->o_id, filter_last_id(filter, oa->o_gr)); rc = -EINVAL; } else { - struct filter_obd *filter = &obd->u.filter; - int diff = 1; - - down(&filter->fo_create_lock); + diff = 1; + down(&filter->fo_create_locks[oa->o_gr]); rc = filter_precreate(obd, oa, oa->o_gr, &diff); - up(&filter->fo_create_lock); + up(&filter->fo_create_locks[oa->o_gr]); } } else { rc = filter_handle_precreate(exp, oa, oa->o_gr, oti); @@ -2981,12 +3541,11 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, struct lvfs_run_ctxt saved; void *handle = NULL; struct llog_cookie *fcc = NULL; - int rc, rc2, cleanup_phase = 0; + int rc, rc2, cleanup_phase = 0, sync = 0; struct iattr iattr; ENTRY; - if (!(oa->o_valid & OBD_MD_FLGROUP)) - oa->o_gr = 0; + LASSERT(oa->o_valid & OBD_MD_FLGROUP); obd = exp->exp_obd; filter = &obd->u.filter; @@ -3012,7 +3571,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, GOTO(cleanup, rc = -ENOENT); } - filter_prepare_destroy(obd, oa->o_id); + filter_prepare_destroy(obd, oa->o_id, oa->o_gr); /* Our MDC connection is established by the MDS to us */ if (oa->o_valid & OBD_MD_FLCOOKIE) { @@ -3076,19 +3635,28 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, cleanup: switch(cleanup_phase) { case 4: - if (fcc != NULL) { - if (fsfilt_add_journal_cb(obd, 0, oti ? - oti->oti_handle : handle, - filter_cancel_cookies_cb, - fcc) == 0) - fcc = NULL; + if (fcc != NULL) + sync = fsfilt_add_journal_cb(obd, 0, oti ? + oti->oti_handle : handle, + filter_cancel_cookies_cb, + fcc); + /* If add_journal_cb failed, then filter_finish_transno + * will commit the handle and we will do a sync + * on commit. then we call callback directly to free + * the fcc. + */ + rc = filter_finish_transno(exp, oti, rc, sync); + if (sync) { + filter_cancel_cookies_cb(obd, 0, fcc, rc); + fcc = NULL; } - rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (rc2) { CERROR("error on commit, err = %d\n", rc2); if (!rc) rc = rc2; + } else { + fcc = NULL; } case 3: filter_parent_unlock(dparent); @@ -3109,8 +3677,9 @@ cleanup: qcids[GRPQUOTA] = oa->o_gid; rc2 = lquota_adjust(filter_quota_interface_ref, obd, qcids, NULL, rc, FSFILT_OP_UNLINK); - if (rc2) - CERROR("filter adjust qunit! (rc:%d)\n", rc2); + + CDEBUG(rc ? D_ERROR : D_QUOTA, + "filter adjust qunit! (rc:%d)\n", rc? rc : rc2); return rc; } @@ -3132,13 +3701,19 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo, ", o_size = "LPD64"\n", oinfo->oi_oa->o_id, oinfo->oi_oa->o_valid, oinfo->oi_policy.l_extent.start); + rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo), + oinfo_capa(oinfo), CAPA_OPC_OSS_TRUNC); + if (rc) + RETURN(rc); + oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start; rc = filter_setattr(exp, oinfo, oti); RETURN(rc); } static int filter_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, obd_off start, obd_off end) + struct lov_stripe_md *lsm, obd_off start, obd_off end, + void *capa) { struct lvfs_run_ctxt saved; struct filter_obd *filter; @@ -3147,6 +3722,11 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, int rc, rc2; ENTRY; + rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa), + (struct lustre_capa *)capa, CAPA_OPC_OSS_WRITE); + if (rc) + RETURN(rc); + filter = &exp->exp_obd->u.filter; /* an objid of zero is taken to mean "sync whole filesystem" */ @@ -3232,8 +3812,9 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, struct ptlrpc_request_set *set) { struct obd_device *obd; + struct obd_llogs *llog; struct llog_ctxt *ctxt; - int rc = 0; + int rc = 0, group; ENTRY; obd = exp->exp_obd; @@ -3242,6 +3823,18 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, RETURN(-EINVAL); } + if (KEY_IS(KEY_CAPA_KEY)) { + rc = filter_update_capa_key(obd, (struct lustre_capa_key *)val); + if (rc) + CERROR("filter update capability key failed: %d\n", rc); + RETURN(rc); + } + + if (KEY_IS(KEY_REVIMP_UPD)) { + filter_revimp_update(exp); + RETURN(0); + } + if (keylen < strlen(KEY_MDS_CONN) || memcmp(key, KEY_MDS_CONN, keylen) != 0) RETURN(-EINVAL); @@ -3251,7 +3844,15 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie; /* setup llog imports */ - ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT); + LASSERT(val != NULL); + group = (int)(*(__u32 *)val); + LASSERT(group >= FILTER_GROUP_MDS0); + + llog = filter_grab_llog_for_group(obd, group, exp); + LASSERT(llog != NULL); + ctxt = llog_get_context_from_llogs(llog, LLOG_MDS_OST_REPL_CTXT); + LASSERTF(ctxt != NULL, "ctxt is not null\n"), + rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse); lquota_setinfo(filter_quota_interface_ref, exp, obd); @@ -3269,7 +3870,7 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, switch (cmd) { case OBD_IOC_ABORT_RECOVERY: { CERROR("aborting recovery for device %s\n", obd->obd_name); - target_abort_recovery(obd); + target_stop_recovery_thread(obd); RETURN(0); } @@ -3345,7 +3946,6 @@ static int filter_health_check(struct obd_device *obd) LASSERT(filter->fo_health_check_filp != NULL); rc |= !!lvfs_check_io_health(obd, filter->fo_health_check_filp); #endif - return rc; } @@ -3355,17 +3955,17 @@ static struct dentry *filter_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, return filter_fid2dentry(data, NULL, gr, id); } -static int filter_process_config(struct obd_device *obd, obd_count len, void *buf) +static int filter_process_config(struct obd_device *obd, obd_count len, + void *buf) { struct lustre_cfg *lcfg = buf; struct lprocfs_static_vars lvars; int rc = 0; lprocfs_init_vars(filter, &lvars); - - rc = class_process_proc_param(PARAM_OST, lvars.obd_vars, lcfg, obd); - return(rc); + rc = class_process_proc_param(PARAM_OST, lvars.obd_vars, lcfg, obd); + return rc; } static struct lvfs_callback_ops filter_lvfs_ops = { @@ -3397,6 +3997,7 @@ static struct obd_ops filter_obd_ops = { .o_preprw = filter_preprw, .o_commitrw = filter_commitrw, .o_llog_init = filter_llog_init, + .o_llog_connect = filter_llog_connect, .o_llog_finish = filter_llog_finish, .o_iocontrol = filter_iocontrol, .o_health_check = filter_health_check, @@ -3411,8 +4012,6 @@ static int __init obdfilter_init(void) struct lprocfs_static_vars lvars; int rc; - printk(KERN_INFO "Lustre: Filtering OBD driver; info@clusterfs.com\n"); - lprocfs_init_vars(filter, &lvars); request_module("lquota"); @@ -3422,21 +4021,21 @@ static int __init obdfilter_init(void) if (obdfilter_created_scratchpad == NULL) return -ENOMEM; - ll_fmd_cachep = kmem_cache_create("ll_fmd_cache", + ll_fmd_cachep = cfs_mem_cache_create("ll_fmd_cache", sizeof(struct filter_mod_data), - 0, 0, NULL, NULL); + 0, 0); if (!ll_fmd_cachep) GOTO(out, rc = -ENOMEM); filter_quota_interface_ref = PORTAL_SYMBOL_GET(filter_quota_interface); init_obd_quota_ops(filter_quota_interface_ref, &filter_obd_ops); - rc = class_register_type(&filter_obd_ops, lvars.module_vars, - LUSTRE_OST_NAME); + rc = class_register_type(&filter_obd_ops, NULL, lvars.module_vars, + LUSTRE_OST_NAME, NULL); if (rc) { int err; - err = kmem_cache_destroy(ll_fmd_cachep); + err = cfs_mem_cache_destroy(ll_fmd_cachep); LASSERTF(err == 0, "Cannot destroy ll_fmd_cachep: rc %d\n",err); ll_fmd_cachep = NULL; out: @@ -3457,7 +4056,7 @@ static void __exit obdfilter_exit(void) PORTAL_SYMBOL_PUT(filter_quota_interface); if (ll_fmd_cachep) { - int rc = kmem_cache_destroy(ll_fmd_cachep); + int rc = cfs_mem_cache_destroy(ll_fmd_cachep); LASSERTF(rc == 0, "Cannot destroy ll_fmd_cachep: rc %d\n", rc); ll_fmd_cachep = NULL; } diff --git a/lustre/obdfilter/filter_capa.c b/lustre/obdfilter/filter_capa.c new file mode 100644 index 0000000..83ccbed --- /dev/null +++ b/lustre/obdfilter/filter_capa.c @@ -0,0 +1,303 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * + * Author: Lai Siyao <lsy@clusterfs.com> + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_FILTER + +#include <linux/fs.h> +#include <linux/version.h> +#include <asm/uaccess.h> +#include <linux/file.h> +#include <linux/kmod.h> + +#include <lustre_fsfilt.h> +#include <lustre_capa.h> + +#include "filter_internal.h" + +/* + * FIXME + * keep this as simple as possible. we suppose the blacklist usually + * be empry or very short (<5), since long term blacklist should be + * done on MDS side. A more sophisticated blacklist will be implemented + * later. + * + * note blacklist didn't take effect when OSS capability disabled. this + * looks reasonable to me. + */ +#define BLACKLIST_MAX (32) +static int nblacklist = 0; +static uid_t blacklist[BLACKLIST_MAX]; +static spinlock_t blacklist_lock = SPIN_LOCK_UNLOCKED; + +int blacklist_display(char *buf, int bufsize) +{ + char one[16]; + int i; + LASSERT(buf); + + buf[0] = '\0'; + spin_lock(&blacklist_lock); + for (i = 0; i < nblacklist; i++) { + snprintf(one, 16, "%u\n", blacklist[i]); + strncat(buf, one, bufsize); + } + spin_unlock(&blacklist_lock); + return strnlen(buf, bufsize); +} + +void blacklist_add(uid_t uid) +{ + int i; + + spin_lock(&blacklist_lock); + if (nblacklist == BLACKLIST_MAX) { + CERROR("can't add more in blacklist\n"); + spin_unlock(&blacklist_lock); + return; + } + + for (i = 0; i < nblacklist; i++) { + if (blacklist[i] == uid) { + spin_unlock(&blacklist_lock); + return; + } + } + + blacklist[nblacklist++] = uid; + spin_unlock(&blacklist_lock); +} + +void blacklist_del(uid_t uid) +{ + int i; + + spin_lock(&blacklist_lock); + for (i = 0; i < nblacklist; i++) { + if (blacklist[i] == uid) { + nblacklist--; + while (i < nblacklist) { + blacklist[i] = blacklist[i+1]; + i++; + } + spin_unlock(&blacklist_lock); + return; + } + } + spin_unlock(&blacklist_lock); +} + +static int blacklist_check(uid_t uid) +{ + int i, rc = 0; + + spin_lock(&blacklist_lock); + for (i = 0; i < nblacklist; i++) { + if (blacklist[i] == uid) { + rc = 1; + break; + } + } + spin_unlock(&blacklist_lock); + return rc; +} + +static inline __u32 filter_ck_keyid(struct filter_capa_key *key) +{ + return key->k_key.lk_keyid; +} + +int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *new) +{ + struct filter_obd *filter = &obd->u.filter; + struct filter_capa_key *k, *keys[2] = { NULL, NULL }; + int i; + + spin_lock(&capa_lock); + list_for_each_entry(k, &filter->fo_capa_keys, k_list) { + if (k->k_key.lk_mdsid != new->lk_mdsid) + continue; + + if (keys[0]) { + keys[1] = k; + if (filter_ck_keyid(keys[1]) > filter_ck_keyid(keys[0])) + keys[1] = keys[0], keys[0] = k; + } else { + keys[0] = k; + } + } + spin_unlock(&capa_lock); + + for (i = 0; i < 2; i++) { + if (!keys[i]) + continue; + if (filter_ck_keyid(keys[i]) != new->lk_keyid) + continue; + /* maybe because of recovery or other reasons, MDS sent the + * the old capability key again. + */ + spin_lock(&capa_lock); + keys[i]->k_key = *new; + spin_unlock(&capa_lock); + + RETURN(0); + } + + if (keys[1]) { + /* if OSS already have two keys, update the old one */ + k = keys[1]; + } else { + OBD_ALLOC_PTR(k); + if (!k) + RETURN(-ENOMEM); + INIT_LIST_HEAD(&k->k_list); + } + + spin_lock(&capa_lock); + k->k_key = *new; + if (list_empty(&k->k_list)) + list_add(&k->k_list, &filter->fo_capa_keys); + spin_unlock(&capa_lock); + + DEBUG_CAPA_KEY(D_SEC, new, "new"); + RETURN(0); +} + +int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, + struct lustre_capa *capa, __u64 opc) +{ + struct obd_device *obd = exp->exp_obd; + struct filter_obd *filter = &obd->u.filter; + struct filter_capa_key *k; + struct lustre_capa_key key; + struct obd_capa *oc; + __u8 *hmac; + int keys_ready = 0, key_found = 0, rc = 0; + ENTRY; + + /* capability is disabled */ + if (!filter->fo_fl_oss_capa) + RETURN(0); + + if (capa == NULL) { + if (fid) + CERROR("mdsno/fid/opc "LPU64"/"DFID"/"LPX64 + ": no capability has been passed\n", + mdsid, PFID(fid), opc); + else + CERROR("mdsno/opc "LPU64"/"LPX64 + ": no capability has been passed\n", + mdsid, opc); + RETURN(-EACCES); + } + + if (blacklist_check(capa->lc_uid)) { + DEBUG_CAPA(D_ERROR, capa, "uid %u found in blacklist,", + capa->lc_uid); + RETURN(-EACCES); + } + +#warning "enable fid check in filter_auth_capa() when fid stored in OSS object" + + if (opc == CAPA_OPC_OSS_READ) { + if (!(capa->lc_opc & CAPA_OPC_OSS_RW)) + rc = -EACCES; + } else if (!capa_opc_supported(capa, opc)) { + rc = -EACCES; + } + if (rc) { + DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc); + RETURN(rc); + } + + oc = capa_lookup(filter->fo_capa_hash, capa, 0); + if (oc) { + spin_lock(&oc->c_lock); + if (capa_is_expired(oc)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + rc = -ESTALE; + } + spin_unlock(&oc->c_lock); + + capa_put(oc); + RETURN(rc); + } + + spin_lock(&capa_lock); + list_for_each_entry(k, &filter->fo_capa_keys, k_list) + if (k->k_key.lk_mdsid == mdsid) { + keys_ready = 1; + if (k->k_key.lk_keyid == capa_keyid(capa)) { + key = k->k_key; + key_found = 1; + break; + } + } + spin_unlock(&capa_lock); + + if (!keys_ready) { + CDEBUG(D_SEC, "MDS hasn't propagated capability keys yet, " + "ignore check!\n"); + RETURN(0); + } + + if (!key_found) { + DEBUG_CAPA(D_ERROR, capa, "no matched capability key for"); + RETURN(-ESTALE); + } + + OBD_ALLOC(hmac, CAPA_HMAC_MAX_LEN); + if (hmac == NULL) + RETURN(-ENOMEM); + + rc = capa_hmac(hmac, capa, key.lk_key); + if (rc) { + DEBUG_CAPA(D_ERROR, capa, "HMAC failed: rc %d", rc); + OBD_FREE(hmac, CAPA_HMAC_MAX_LEN); + RETURN(rc); + } + + rc = memcmp(hmac, capa->lc_hmac, CAPA_HMAC_MAX_LEN); + OBD_FREE(hmac, CAPA_HMAC_MAX_LEN); + if (rc) { + DEBUG_CAPA_KEY(D_ERROR, &key, "calculate HMAC with "); + DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); + RETURN(-EACCES); + } + + /* store in capa hash */ + oc = capa_add(filter->fo_capa_hash, capa); + capa_put(oc); + RETURN(0); +} + +void filter_free_capa_keys(struct filter_obd *filter) +{ + struct filter_capa_key *key, *n; + + spin_lock(&capa_lock); + list_for_each_entry_safe(key, n, &filter->fo_capa_keys, k_list) { + list_del_init(&key->k_list); + OBD_FREE(key, sizeof(*key)); + } + spin_unlock(&capa_lock); +} diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 1c5d0f5..444e0a6 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -18,8 +18,8 @@ #define FILTER_INIT_OBJID 0 -#define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ -#define FILTER_GROUPS 3 /* must be at least 3; not dynamic yet */ +#define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ +#define FILTER_GROUPS 3 /* must be at least 3; not dynamic yet */ #define FILTER_ROCOMPAT_SUPP (0) @@ -38,7 +38,8 @@ struct filter_client_data { __u8 fcd_uuid[40]; /* client UUID */ __u64 fcd_last_rcvd; /* last completed transaction ID */ __u64 fcd_last_xid; /* client RPC xid for the last transaction */ - __u8 fcd_padding[LR_CLIENT_SIZE - 56]; + __u32 fcd_group; /* mds group */ + __u8 fcd_padding[LR_CLIENT_SIZE - 60]; }; /* Limit the returned fields marked valid to those that we actually might set */ @@ -90,6 +91,10 @@ enum { #define OBDFILTER_CREATED_SCRATCHPAD_ENTRIES 1024 extern int *obdfilter_created_scratchpad; +extern void target_recovery_fini(struct obd_device *obd); +extern void target_recovery_init(struct obd_device *obd, + svc_handler_t handler); + /* filter.c */ void f_dput(struct dentry *); struct dentry *filter_fid2dentry(struct obd_device *, struct dentry *dir, @@ -98,7 +103,8 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa, const char *what, int quiet); #define filter_oa2dentry(obd, oa) __filter_oa2dentry(obd, oa, __FUNCTION__, 0) -int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc); +int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc, + int force_sync); __u64 filter_next_id(struct filter_obd *, struct obdo *); __u64 filter_last_id(struct filter_obd *, obd_gr group); int filter_update_fidea(struct obd_export *exp, struct inode *inode, @@ -106,7 +112,7 @@ int filter_update_fidea(struct obd_export *exp, struct inode *inode, int filter_update_server_data(struct obd_device *, struct file *, struct lr_server_data *, int force_sync); int filter_update_last_objid(struct obd_device *, obd_gr, int force_sync); -int filter_common_setup(struct obd_device *, obd_count len, void *buf, +int filter_common_setup(struct obd_device *, struct lustre_cfg *lcfg, void *option); int filter_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *, @@ -118,6 +124,9 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, struct dentry *filter_create_object(struct obd_device *obd, struct obdo *oa); +struct obd_llogs *filter_grab_llog_for_group(struct obd_device *obd, int group, + struct obd_export *export); + /* filter_lvb.c */ extern struct ldlm_valblock_ops filter_lvbo; @@ -125,7 +134,8 @@ extern struct ldlm_valblock_ops filter_lvbo; /* filter_io.c */ int filter_preprw(int cmd, struct obd_export *, struct obdo *, int objcount, struct obd_ioobj *, int niocount, struct niobuf_remote *, - struct niobuf_local *, struct obd_trans_info *); + struct niobuf_local *, struct obd_trans_info *, + struct lustre_capa *); int filter_commitrw(int cmd, struct obd_export *, struct obdo *, int objcount, struct obd_ioobj *, int niocount, struct niobuf_local *, struct obd_trans_info *, int rc); @@ -163,7 +173,7 @@ struct ost_filterdata { }; int filter_log_sz_change(struct llog_handle *cathandle, struct ll_fid *mds_fid, - __u32 io_epoch, + __u32 ioepoch, struct llog_cookie *logcookie, struct inode *inode); //int filter_get_catalog(struct obd_device *); @@ -173,28 +183,32 @@ int filter_recov_log_mds_ost_cb(struct llog_handle *llh, struct llog_rec_hdr *rec, void *data); #ifdef LPROCFS -void filter_tally_write(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks, - int blocks_per_page); -void filter_tally_read(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks, - int blocks_per_page); +void filter_tally(struct obd_export *exp, struct page **pages, int nr_pages, + unsigned long *blocks, int blocks_per_page, int wr); int lproc_filter_attach_seqstat(struct obd_device *dev); #else -static inline void filter_tally_write(struct obd_export *exp, - struct page **pages, int nr_pages, - unsigned long *blocks, int blocks_per_page) {} -static inline void filter_tally_read(struct obd_export *exp, - struct page **pages, int nr_pages, - unsigned long *blocks, int blocks_per_page) {} -static inline void filter_tally_read(struct filter_obd *filter, - struct page **pages, int nr_pages, - unsigned long *blocks, int blocks_per_page) {} +static inline void filter_tally(struct obd_export *exp, struct page **pages, + int nr_pages, unsigned long *blocks, + int blocks_per_page, int wr) {} static inline int lproc_filter_attach_seqstat(struct obd_device *dev) {} #endif /* Quota stuff */ extern quota_interface_t *filter_quota_interface_ref; +/* Capability */ +static inline __u64 obdo_mdsno(struct obdo *oa) +{ + return oa->o_gr - FILTER_GROUP_MDS0; +} + +int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key); +int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, + struct lustre_capa *capa, __u64 opc); +void filter_free_capa_keys(struct filter_obd *filter); + +void blacklist_add(uid_t uid); +void blacklist_del(uid_t uid); +int blacklist_display(char *buf, int bufsize); #endif /* _FILTER_INTERNAL_H */ diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 3948593..2a6a326 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -258,8 +258,10 @@ long filter_grant(struct obd_export *exp, obd_size current_grant, } } - CDEBUG(D_CACHE,"%s: cli %s/%p wants: "LPU64" granting: "LPU64"\n", - obd->obd_name, exp->exp_client_uuid.uuid, exp, want, grant); + CDEBUG(D_CACHE, + "%s: cli %s/%p wants: "LPU64" current grant "LPU64 + " granting: "LPU64"\n", obd->obd_name, exp->exp_client_uuid.uuid, + exp, want, current_grant, grant); CDEBUG(D_CACHE, "%s: cli %s/%p tot cached:"LPU64" granted:"LPU64 " num_exports: %d\n", obd->obd_name, exp->exp_client_uuid.uuid, @@ -273,7 +275,8 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct lustre_capa *capa) { struct obd_device *obd = exp->exp_obd; struct lvfs_run_ctxt saved; @@ -292,7 +295,12 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, LASSERTF(objcount == 1, "%d\n", objcount); LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt); - if (oa->o_valid & OBD_MD_FLGRANT) { + rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa), capa, + CAPA_OPC_OSS_READ); + if (rc) + RETURN(rc); + + if (oa && oa->o_valid & OBD_MD_FLGRANT) { spin_lock(&obd->obd_osfs_lock); filter_grant_incoming(exp, oa); @@ -356,6 +364,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, GOTO(cleanup, rc); lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes); + lprocfs_counter_add(exp->exp_ops_stats, LPROC_FILTER_READ_BYTES, tot_bytes); @@ -386,9 +395,9 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, * right on through. * * Caller must hold obd_osfs_lock. */ -static int filter_grant_check(struct obd_export *exp, int objcount, - struct fsfilt_objinfo *fso, int niocount, - struct niobuf_remote *rnb, +static int filter_grant_check(struct obd_export *exp, struct obdo *oa, + int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_remote *rnb, struct niobuf_local *lnb, obd_size *left, struct inode *inode) { @@ -410,7 +419,8 @@ static int filter_grant_check(struct obd_export *exp, int objcount, if (tmp) bytes += blocksize - tmp; - if (rnb[n].flags & OBD_BRW_FROM_GRANT) { + if ((rnb[n].flags & OBD_BRW_FROM_GRANT) && + (oa->o_valid & OBD_MD_FLGRANT)) { if (fed->fed_grant < used + bytes) { CDEBUG(D_CACHE, "%s: cli %s/%p claims %ld+%d " @@ -432,6 +442,7 @@ static int filter_grant_check(struct obd_export *exp, int objcount, /* if enough space, pretend it was granted */ ungranted += bytes; rnb[n].flags |= OBD_BRW_GRANTED; + lnb[n].lnb_grant_used = bytes; CDEBUG(0, "idx %d ungranted=%lu\n",n,ungranted); rc = 0; continue; @@ -457,8 +468,9 @@ static int filter_grant_check(struct obd_export *exp, int objcount, * happens in filter_grant_commit() after the writes are done. */ *left -= ungranted; fed->fed_grant -= used; - fed->fed_pending += used; - exp->exp_obd->u.filter.fo_tot_pending += used; + fed->fed_pending += used + ungranted; + exp->exp_obd->u.filter.fo_tot_granted += ungranted; + exp->exp_obd->u.filter.fo_tot_pending += used + ungranted; CDEBUG(mask, "%s: cli %s/%p used: %lu ungranted: %lu grant: %lu dirty: %lu\n", @@ -506,7 +518,8 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct lustre_capa *capa) { struct lvfs_run_ctxt saved; struct niobuf_remote *rnb; @@ -522,6 +535,11 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); + rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa), capa, + CAPA_OPC_OSS_WRITE); + if (rc) + RETURN(rc); + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); iobuf = filter_iobuf_get(&exp->exp_obd->u.filter, oti); if (IS_ERR(iobuf)) @@ -552,26 +570,25 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, * already exist so we can store the reservation handle there. */ fmd = filter_fmd_find(exp, obj->ioo_id, obj->ioo_gr); + LASSERT(oa != NULL); spin_lock(&exp->exp_obd->obd_osfs_lock); - if (oa) { - filter_grant_incoming(exp, oa); - if (fmd && fmd->fmd_mactime_xid > oti->oti_xid) - oa->o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLATIME); - else - obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - } + filter_grant_incoming(exp, oa); + if (fmd && fmd->fmd_mactime_xid > oti->oti_xid) + oa->o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLATIME); + else + obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); cleanup_phase = 3; left = filter_grant_space_left(exp); - rc = filter_grant_check(exp, objcount, &fso, niocount, nb, res, + rc = filter_grant_check(exp, oa, objcount, &fso, niocount, nb, res, &left, dentry->d_inode); /* do not zero out oa->o_valid as it is used in filter_commitrw_write() * for setting UID/GID and fid EA in first write time. */ - if (oa && oa->o_valid & OBD_MD_FLGRANT) { + if (oa->o_valid & OBD_MD_FLGRANT) { oa->o_grant = filter_grant(exp,oa->o_grant,oa->o_undirty,left); oa->o_valid |= OBD_MD_FLGRANT; } @@ -648,9 +665,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, fsfilt_check_slow(exp->exp_obd, now, obd_timeout, "start_page_write"); - lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_WRITE_BYTES, - tot_bytes); - lprocfs_counter_add(exp->exp_ops_stats, LPROC_FILTER_WRITE_BYTES, + lprocfs_counter_add(exp->exp_ops_stats, LPROC_FILTER_WRITE_BYTES, tot_bytes); EXIT; cleanup: @@ -680,14 +695,14 @@ cleanup: int filter_preprw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, struct lustre_capa *capa) { if (cmd == OBD_BRW_WRITE) return filter_preprw_write(cmd, exp, oa, objcount, obj, - niocount, nb, res, oti); + niocount, nb, res, oti, capa); if (cmd == OBD_BRW_READ) return filter_preprw_read(cmd, exp, oa, objcount, obj, - niocount, nb, res, oti); + niocount, nb, res, oti, capa); LBUG(); return -EPROTO; } @@ -716,7 +731,8 @@ static int filter_commitrw_read(struct obd_export *exp, struct obdo *oa, struct obd_trans_info *oti, int rc) { struct inode *inode = NULL; - struct ldlm_res_id res_id = { .name = { obj->ioo_id } }; + struct ldlm_res_id res_id = { .name = { obj->ioo_id, 0, + obj->ioo_gr, 0} }; struct ldlm_resource *resource = NULL; struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; ENTRY; @@ -725,7 +741,7 @@ static int filter_commitrw_read(struct obd_export *exp, struct obdo *oa, * and we should update the lvb so that other glimpses will also * get the updated value. bug 5972 */ if (oa && ns && ns->ns_lvbo && ns->ns_lvbo->lvbo_update) { - resource = ldlm_resource_get(ns, NULL, res_id, LDLM_EXTENT, 0); + resource = ldlm_resource_get(ns, NULL, &res_id, LDLM_EXTENT, 0); if (resource != NULL) { ns->ns_lvbo->lvbo_update(resource, NULL, 0, 1); @@ -855,7 +871,7 @@ int filter_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, ioo.ioo_bufcnt = oa_bufs; ret = filter_preprw(cmd, exp, oinfo->oi_oa, 1, &ioo, - oa_bufs, rnb, lnb, oti); + oa_bufs, rnb, lnb, oti, oinfo_capa(oinfo)); if (ret != 0) GOTO(out, ret); diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c deleted file mode 100644 index 49ca0fe..0000000 --- a/lustre/obdfilter/filter_io_24.c +++ /dev/null @@ -1,544 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/fs/obdfilter/filter_io.c - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/pagemap.h> // XXX kill me soon -#include <linux/version.h> - -#define DEBUG_SUBSYSTEM S_FILTER - -#include <linux/iobuf.h> -#include <linux/locks.h> - -#include <obd_class.h> -#include <lustre_fsfilt.h> -#include "filter_internal.h" - -/* Bug 2254 -- this is better done in ext3_map_inode_page, but this - * workaround will suffice until everyone has upgraded their kernels */ -static void check_pending_bhs(unsigned long *blocks, int nr_pages, dev_t dev, - int size) -{ -#if (LUSTRE_KERNEL_VERSION < 32) - struct buffer_head *bh; - int i; - - for (i = 0; i < nr_pages; i++) { - bh = get_hash_table(dev, blocks[i], size); - if (bh == NULL) - continue; - if (!buffer_dirty(bh)) { - put_bh(bh); - continue; - } - mark_buffer_clean(bh); - wait_on_buffer(bh); - clear_bit(BH_Req, &bh->b_state); - __brelse(bh); - } -#endif -} - -/* when brw_kiovec() is asked to read from block -1UL it just zeros - * the page. this gives us a chance to verify the write mappings - * as well */ -static int filter_cleanup_mappings(int rw, struct kiobuf *iobuf, - struct inode *inode) -{ - int i, blocks_per_page_bits = CFS_PAGE_SHIFT - inode->i_blkbits; - ENTRY; - - for (i = 0 ; i < iobuf->nr_pages << blocks_per_page_bits; i++) { - if (KIOBUF_GET_BLOCKS(iobuf)[i] > 0) - continue; - - if (rw == OBD_BRW_WRITE) - RETURN(-EINVAL); - - KIOBUF_GET_BLOCKS(iobuf)[i] = -1UL; - } - RETURN(0); -} - -#if 0 -static void dump_page(int rw, unsigned long block, struct page *page) -{ - char *blah = kmap(page); - CDEBUG(D_PAGE, "rw %d block %lu: %02x %02x %02x %02x\n", rw, block, - blah[0], blah[1], blah[2], blah[3]); - kunmap(page); -} -#endif - -/* These are our hacks to keep our directio/bh IO coherent with ext3's - * page cache use. Most notably ext3 reads file data into the page - * cache when it is zeroing the tail of partial-block truncates and - * leaves it there, sometimes generating io from it at later truncates. - * This removes the partial page and its buffers from the page cache, - * so it should only ever cause a wait in rare cases, as otherwise we - * always do full-page IO to the OST. - * - * The call to truncate_complete_page() will call journal_flushpage() to - * free the buffers and drop the page from cache. The buffers should not - * be dirty, because we already called fdatasync/fdatawait on them. - */ -static int filter_sync_inode_data(struct inode *inode) -{ - int rc, rc2; - - /* This is nearly generic_osync_inode, without the waiting on the inode - rc = generic_osync_inode(inode, inode->i_mapping, - OSYNC_DATA|OSYNC_METADATA); - */ - rc = filemap_fdatasync(inode->i_mapping); - rc2 = fsync_inode_data_buffers(inode); - if (rc == 0) - rc = rc2; - rc2 = filemap_fdatawait(inode->i_mapping); - if (rc == 0) - rc = rc2; - - return rc; -} - -static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf) -{ - struct page *page; - int i, rc; - - check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages, - inode->i_dev, 1 << inode->i_blkbits); - - rc = filter_sync_inode_data(inode); - if (rc != 0) - RETURN(rc); - - /* be careful to call this after fsync_inode_data_buffers has waited - * for IO to complete before we evict it from the cache */ - for (i = 0; i < iobuf->nr_pages ; i++) { - page = find_lock_page(inode->i_mapping, - iobuf->maplist[i]->index); - if (page == NULL) - continue; - if (page->mapping != NULL) { - /* Now that the only source of such pages in truncate - * path flushes these pages to disk and and then - * discards, this is error condition */ - CERROR("Data page in page cache during write!\n"); - ll_truncate_complete_page(page); - } - - unlock_page(page); - page_cache_release(page); - } - - return 0; -} - -int filter_clear_truncated_page(struct inode *inode) -{ - struct page *page; - int rc; - - /* Truncate on page boundary, so nothing to flush? */ - if (!(inode->i_size & ~CFS_PAGE_MASK)) - return 0; - - rc = filter_sync_inode_data(inode); - if (rc != 0) - RETURN(rc); - - /* be careful to call this after fsync_inode_data_buffers has waited - * for IO to complete before we evict it from the cache */ - page = find_lock_page(inode->i_mapping, - inode->i_size >> CFS_PAGE_SHIFT); - if (page) { - if (page->mapping != NULL) - ll_truncate_complete_page(page); - - unlock_page(page); - page_cache_release(page); - } - - return 0; -} - -/* Must be called with i_sem taken for writes; this will drop it */ -int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *buf, - struct obd_export *exp, struct iattr *attr, - struct obd_trans_info *oti, void **wait_handle) -{ - struct obd_device *obd = exp->exp_obd; - struct inode *inode = dchild->d_inode; - struct kiobuf *iobuf = (void *)buf; - int rc, create = (rw == OBD_BRW_WRITE), committed = 0; - int blocks_per_page = CFS_PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0; - struct semaphore *sem = NULL; - ENTRY; - - LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); - - if (iobuf->nr_pages == 0) - GOTO(cleanup, rc = 0); - - if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS) - GOTO(cleanup, rc = -EINVAL); - - if (iobuf->nr_pages * blocks_per_page > - OBDFILTER_CREATED_SCRATCHPAD_ENTRIES) - GOTO(cleanup, rc = -EINVAL); - - cleanup_phase = 1; - - rc = lock_kiovec(1, &iobuf, 1); - if (rc < 0) - GOTO(cleanup, rc); - cleanup_phase = 2; - - if (rw == OBD_BRW_WRITE) { - create = 1; - sem = &obd->u.filter.fo_alloc_lock; - } - rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist, - iobuf->nr_pages, KIOBUF_GET_BLOCKS(iobuf), - obdfilter_created_scratchpad, create, sem); - if (rc) - GOTO(cleanup, rc); - - rc = filter_cleanup_mappings(rw, iobuf, inode); - if (rc) - GOTO(cleanup, rc); - - if (rw == OBD_BRW_WRITE) { - if (rc == 0) { - filter_tally_write(exp, iobuf->maplist, iobuf->nr_pages, - KIOBUF_GET_BLOCKS(iobuf), - blocks_per_page); - - if (attr->ia_size > inode->i_size) - attr->ia_valid |= ATTR_SIZE; - rc = fsfilt_setattr(obd, dchild, - oti->oti_handle, attr, 0); - if (rc) - GOTO(cleanup, rc); - } - - up(&inode->i_sem); - cleanup_phase = 3; - - rc = filter_finish_transno(exp, oti, 0); - if (rc) - GOTO(cleanup, rc); - - rc = fsfilt_commit_async(obd,inode,oti->oti_handle,wait_handle); - committed = 1; - if (rc) - GOTO(cleanup, rc); - } else { - filter_tally_read(exp, iobuf->maplist, iobuf->nr_pages, - KIOBUF_GET_BLOCKS(iobuf), blocks_per_page); - } - - rc = filter_clear_page_cache(inode, iobuf); - if (rc < 0) - GOTO(cleanup, rc); - - rc = fsfilt_send_bio(rw, obd, inode, iobuf); - - CDEBUG(D_INFO, "tried to %s %d pages, rc = %d\n", - rw & OBD_BRW_WRITE ? "write" : "read", iobuf->nr_pages, rc); - - if (rc > 0) - rc = 0; - - EXIT; -cleanup: - if (!committed && (rw == OBD_BRW_WRITE)) { - int err = fsfilt_commit_async(obd, inode, - oti->oti_handle, wait_handle); - if (err) - CERROR("can't close transaction: %d\n", err); - /* - * this is error path, so we prefer to return - * original error, not this one - */ - } - - switch(cleanup_phase) { - case 3: - case 2: - unlock_kiovec(1, &iobuf); - case 1: - case 0: - if (cleanup_phase != 3 && rw == OBD_BRW_WRITE) - up(&inode->i_sem); - break; - default: - CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase); - LBUG(); - break; - } - return rc; -} - -/* See if there are unallocated parts in given file region */ -int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) -{ - int (*fs_bmap)(struct address_space *, long) = - inode->i_mapping->a_ops->bmap; - int j; - - /* We can't know if the range is mapped already or not */ - if (fs_bmap == NULL) - return 0; - - offset >>= inode->i_blkbits; - len >>= inode->i_blkbits; - - for (j = 0; j < len; j++) - if (fs_bmap(inode->i_mapping, offset + j) == 0) - return 0; - - return 1; -} - -/* some kernels require alloc_kiovec callers to zero members through the use of - * map_user_kiobuf and unmap_.. we don't use those, so we have a little helper - * that makes sure we don't break the rules. */ -static void clear_kiobuf(struct kiobuf *iobuf) -{ - int i; - - for (i = 0; i < iobuf->array_len; i++) - iobuf->maplist[i] = NULL; - - iobuf->nr_pages = 0; - iobuf->offset = 0; - iobuf->length = 0; -} - -struct filter_iobuf *filter_alloc_iobuf(struct filter_obd *filter, - int rw, int num_pages) -{ - struct kiobuf *iobuf; - int rc; - ENTRY; - - LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); - - rc = alloc_kiovec(1, &iobuf); - if (rc) - RETURN(ERR_PTR(rc)); - - rc = expand_kiobuf(iobuf, num_pages); - if (rc) { - free_kiovec(1, &iobuf); - RETURN(ERR_PTR(rc)); - } - -#ifdef HAVE_KIOBUF_DOVARY - iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */ -#endif - clear_kiobuf(iobuf); - RETURN((void *)iobuf); -} - -void filter_free_iobuf(struct filter_iobuf *buf) -{ - struct kiobuf *iobuf = (void *)buf; - - clear_kiobuf(iobuf); - free_kiovec(1, &iobuf); -} - -void filter_iobuf_put(struct filter_obd *filter, struct filter_iobuf *iobuf, - struct obd_trans_info *oti) -{ - int thread_id = oti ? oti->oti_thread_id : -1; - - if (unlikely(thread_id < 0)) { - filter_free_iobuf(iobuf); - return; - } - - LASSERTF(filter->fo_iobuf_pool[thread_id] == iobuf, - "iobuf mismatch for thread %d: pool %p iobuf %p\n", - thread_id, filter->fo_iobuf_pool[thread_id], iobuf); - clear_kiobuf((void *)iobuf); -} - -int filter_iobuf_add_page(struct obd_device *obd, struct filter_iobuf *buf, - struct inode *inode, struct page *page) -{ - struct kiobuf *iobuf = (void *)buf; - - iobuf->maplist[iobuf->nr_pages++] = page; - iobuf->length += CFS_PAGE_SIZE; - - return 0; -} - -int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, - struct obd_ioobj *obj, int niocount, - struct niobuf_local *res, struct obd_trans_info *oti, - int rc) -{ - struct obd_device *obd = exp->exp_obd; - struct lvfs_run_ctxt saved; - struct niobuf_local *lnb; - struct fsfilt_objinfo fso; - struct iattr iattr = { 0 }; - void *iobuf = NULL; - struct inode *inode = NULL; - int i, n, cleanup_phase = 0, err; - unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ - void *wait_handle; - ENTRY; - LASSERT(oti != NULL); - LASSERT(objcount == 1); - LASSERT(current->journal_info == NULL); - - if (rc != 0) - GOTO(cleanup, rc); - - iobuf = filter_iobuf_get(&obd->u.filter, oti); - if (IS_ERR(iobuf)) - GOTO(cleanup, rc = PTR_ERR(iobuf)); - cleanup_phase = 1; - - fso.fso_dentry = res->dentry; - fso.fso_bufcnt = obj->ioo_bufcnt; - inode = res->dentry->d_inode; - - for (i = 0, lnb = res, n = 0; i < obj->ioo_bufcnt; i++, lnb++) { - loff_t this_size; - - /* If overwriting an existing block, we don't need a grant */ - if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC && - filter_range_is_mapped(inode, lnb->offset, lnb->len)) - lnb->rc = 0; - - if (lnb->rc) /* ENOSPC, network RPC error */ - continue; - - filter_iobuf_add_page(obd, iobuf, inode, lnb->page); - - /* We expect these pages to be in offset order, but we'll - * be forgiving */ - this_size = lnb->offset + lnb->len; - if (this_size > iattr.ia_size) - iattr.ia_size = this_size; - } - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - cleanup_phase = 2; - - down(&inode->i_sem); - oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res, - oti); - if (IS_ERR(oti->oti_handle)) { - up(&inode->i_sem); - rc = PTR_ERR(oti->oti_handle); - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "error starting transaction: rc = %d\n", rc); - oti->oti_handle = NULL; - GOTO(cleanup, rc); - } - - fsfilt_check_slow(obd, now, obd_timeout, "brw_start"); - - i = OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME; - - /* If the inode still has SUID+SGID bits set (see filter_precreate()) - * then we will accept the UID+GID if sent by the client for - * initializing the ownership of this inode. We only allow this to - * happen once (so clear these bits) and later only allow setattr. */ - if (inode->i_mode & S_ISUID) - i |= OBD_MD_FLUID; - if (inode->i_mode & S_ISGID) - i |= OBD_MD_FLGID; - - iattr_from_obdo(&iattr, oa, i); - if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) { - CDEBUG(D_INODE, "update UID/GID to %lu/%lu\n", - (unsigned long)oa->o_uid, (unsigned long)oa->o_gid); - - cap_raise(current->cap_effective, CAP_SYS_RESOURCE); - - iattr.ia_valid |= ATTR_MODE; - iattr.ia_mode = inode->i_mode; - if (iattr.ia_valid & ATTR_UID) - iattr.ia_mode &= ~S_ISUID; - if (iattr.ia_valid & ATTR_GID) - iattr.ia_mode &= ~S_ISGID; - - rc = filter_update_fidea(exp, inode, oti->oti_handle, oa); - } - - /* filter_direct_io drops i_sem */ - rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, iobuf, exp, &iattr, - oti, &wait_handle); - if (rc == 0) - obdo_from_inode(oa, inode, FILTER_VALID_FLAGS); - - fsfilt_check_slow(obd, now, obd_timeout, "direct_io"); - - err = fsfilt_commit_wait(obd, inode, wait_handle); - if (err) { - CERROR("Failure to commit OST transaction (%d)?\n", err); - rc = err; - } - if (obd->obd_replayable && !rc) - LASSERTF(oti->oti_transno <= obd->obd_last_committed, - "oti_transno "LPU64" last_committed "LPU64"\n", - oti->oti_transno, obd->obd_last_committed); - fsfilt_check_slow(obd, now, obd_timeout, "commitrw commit"); - -cleanup: - filter_grant_commit(exp, niocount, res); - - switch (cleanup_phase) { - case 2: - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - LASSERT(current->journal_info == NULL); - case 1: - filter_iobuf_put(&obd->u.filter, iobuf, oti); - case 0: - /* - * lnb->page automatically returns back into per-thread page - * pool (bug 5137) - */ - f_dput(res->dentry); - } - - RETURN(rc); -} diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 73bb316..3371bdd 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -68,7 +68,8 @@ static void record_start_io(struct filter_iobuf *iobuf, int rw, int size, atomic_inc(&filter->fo_r_in_flight); lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_RPC_HIST], atomic_read(&filter->fo_r_in_flight)); - lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_DISK_IOSIZE], size); + lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_DISK_IOSIZE], + size); lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_RPC_HIST], atomic_read(&filter->fo_r_in_flight)); lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_DISK_IOSIZE], size); @@ -76,7 +77,8 @@ static void record_start_io(struct filter_iobuf *iobuf, int rw, int size, atomic_inc(&filter->fo_w_in_flight); lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_W_RPC_HIST], atomic_read(&filter->fo_w_in_flight)); - lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_W_DISK_IOSIZE], size); + lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_W_DISK_IOSIZE], + size); lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_RPC_HIST], atomic_read(&filter->fo_w_in_flight)); lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_DISK_IOSIZE], size); @@ -182,6 +184,7 @@ struct filter_iobuf *filter_alloc_iobuf(struct filter_obd *filter, spin_lock_init(&iobuf->dr_lock); iobuf->dr_max_pages = num_pages; iobuf->dr_npages = 0; + iobuf->dr_error = 0; RETURN(iobuf); @@ -197,6 +200,7 @@ struct filter_iobuf *filter_alloc_iobuf(struct filter_obd *filter, static void filter_clear_iobuf(struct filter_iobuf *iobuf) { iobuf->dr_npages = 0; + iobuf->dr_error = 0; atomic_set(&iobuf->dr_numreqs, 0); } @@ -362,21 +366,21 @@ int filter_do_bio(struct obd_export *exp, struct inode *inode, wait_event(iobuf->dr_wait, atomic_read(&iobuf->dr_numreqs) == 0); if (rw == OBD_BRW_READ) { - lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_R_DIO_FRAGS], frags); + lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_R_DIO_FRAGS], + frags); lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_DIO_FRAGS], frags); lprocfs_oh_tally_log2(&obd->u.filter.fo_filter_stats.hist[BRW_R_IO_TIME], jiffies - start_time); - lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_IO_TIME], - jiffies - start_time); + lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_IO_TIME], jiffies - start_time); } else { - lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_W_DIO_FRAGS], frags); + lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_W_DIO_FRAGS], + frags); lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_DIO_FRAGS], frags); lprocfs_oh_tally_log2(&obd->u.filter.fo_filter_stats.hist[BRW_W_IO_TIME], jiffies - start_time); - lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_IO_TIME], - jiffies - start_time); + lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_IO_TIME], jiffies - start_time); } if (rc == 0) @@ -407,18 +411,21 @@ static int filter_sync_inode_data(struct inode *inode, int locked) if (!locked) LOCK_INODE_MUTEX(inode); if (inode->i_mapping->nrpages) { +#ifdef PF_SYNCWRITE current->flags |= PF_SYNCWRITE; +#endif rc = filemap_fdatawrite(inode->i_mapping); if (rc == 0) rc = filemap_fdatawait(inode->i_mapping); +#ifdef PF_SYNCWRITE current->flags &= ~PF_SYNCWRITE; +#endif } if (!locked) UNLOCK_INODE_MUTEX(inode); return rc; } - /* Clear pages from the mapping before we do direct IO to that offset. * Now that the only source of such pages in the truncate path flushes * these pages to disk and then discards them, this is error condition. @@ -511,8 +518,7 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf, create = 1; sem = &obd->u.filter.fo_alloc_lock; - lquota_enforce(filter_quota_interface_ref, obd, - iobuf->dr_ignore_quota); + lquota_enforce(filter_quota_interface_ref, obd, iobuf->dr_ignore_quota); } rc = fsfilt_map_inode_pages(obd, inode, iobuf->dr_pages, @@ -521,9 +527,9 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf, if (rw == OBD_BRW_WRITE) { if (rc == 0) { - filter_tally_write(exp, iobuf->dr_pages, - iobuf->dr_npages, iobuf->dr_blocks, - blocks_per_page); + filter_tally(exp, iobuf->dr_pages, + iobuf->dr_npages, iobuf->dr_blocks, + blocks_per_page, 1); if (attr->ia_size > inode->i_size) attr->ia_valid |= ATTR_SIZE; rc = fsfilt_setattr(obd, dchild, @@ -532,22 +538,22 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf, UNLOCK_INODE_MUTEX(inode); - rc2 = filter_finish_transno(exp, oti, 0); + rc2 = filter_finish_transno(exp, oti, 0, 0); if (rc2 != 0) { CERROR("can't close transaction: %d\n", rc2); if (rc == 0) rc = rc2; } - rc2 =fsfilt_commit_async(obd,inode,oti->oti_handle,wait_handle); + rc2 = fsfilt_commit_async(obd,inode,oti->oti_handle, + wait_handle); if (rc == 0) rc = rc2; if (rc != 0) RETURN(rc); } else if (rc == 0) { - filter_tally_read(exp, iobuf->dr_pages, - iobuf->dr_npages, iobuf->dr_blocks, - blocks_per_page); + filter_tally(exp, iobuf->dr_pages, iobuf->dr_npages, + iobuf->dr_blocks, blocks_per_page, 0); } rc = filter_clear_page_cache(inode, iobuf); diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index 8cda048..b18d80d 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -44,7 +44,7 @@ int filter_log_sz_change(struct llog_handle *cathandle, struct ll_fid *mds_fid, - __u32 io_epoch, + __u32 ioepoch, struct llog_cookie *logcookie, struct inode *inode) { @@ -56,23 +56,23 @@ int filter_log_sz_change(struct llog_handle *cathandle, LOCK_INODE_MUTEX(inode); ofd = inode->i_filterdata; - if (ofd && ofd->ofd_epoch >= io_epoch) { - if (ofd->ofd_epoch > io_epoch) + if (ofd && ofd->ofd_epoch >= ioepoch) { + if (ofd->ofd_epoch > ioepoch) CERROR("client sent old epoch %d for obj ino %ld\n", - io_epoch, inode->i_ino); + ioepoch, inode->i_ino); UNLOCK_INODE_MUTEX(inode); RETURN(0); } - if (ofd && ofd->ofd_epoch < io_epoch) { - ofd->ofd_epoch = io_epoch; + if (ofd && ofd->ofd_epoch < ioepoch) { + ofd->ofd_epoch = ioepoch; } else if (!ofd) { OBD_ALLOC(ofd, sizeof(*ofd)); if (!ofd) GOTO(out, rc = -ENOMEM); igrab(inode); inode->i_filterdata = ofd; - ofd->ofd_epoch = io_epoch; + ofd->ofd_epoch = ioepoch; } /* the decision to write a record is now made, unlock */ UNLOCK_INODE_MUTEX(inode); @@ -83,7 +83,7 @@ int filter_log_sz_change(struct llog_handle *cathandle, lsc->lsc_hdr.lrh_len = lsc->lsc_tail.lrt_len = sizeof(*lsc); lsc->lsc_hdr.lrh_type = OST_SZ_REC; lsc->lsc_fid = *mds_fid; - lsc->lsc_io_epoch = io_epoch; + lsc->lsc_ioepoch = ioepoch; rc = llog_cat_add_rec(cathandle, &lsc->lsc_hdr, logcookie, NULL); OBD_FREE(lsc, sizeof(*lsc)); @@ -102,19 +102,29 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, void *cb_data, int error) { struct llog_cookie *cookie = cb_data; - int rc; - + struct obd_llogs *llogs; + struct llog_ctxt *ctxt; + + /* we have to find context for right group */ if (error != 0) { CDEBUG(D_INODE, "not cancelling llog cookie on error %d\n", error); - return; + goto out; } - - rc = llog_cancel(llog_get_context(obd, cookie->lgc_subsys + 1), - NULL, 1, cookie, 0); - if (rc) - CERROR("error cancelling log cookies: rc = %d\n", rc); - OBD_FREE(cookie, sizeof(*cookie)); + llogs = filter_grab_llog_for_group(obd, cookie->lgc_lgl.lgl_ogr, NULL); + + if (llogs) { + ctxt = llog_get_context_from_llogs(llogs, cookie->lgc_subsys + 1); + if (ctxt) { + llog_cancel(ctxt, NULL, 1, cookie, 0); + } else + CERROR("no valid context for group "LPU64"\n", + cookie->lgc_lgl.lgl_ogr); + } else { + CDEBUG(D_HA, "unknown group "LPU64"!\n", cookie->lgc_lgl.lgl_ogr); + } +out: + OBD_FREE(cookie, sizeof(struct llog_cookie)); } /* Callback for processing the unlink log record received from MDS by @@ -132,17 +142,18 @@ static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt, ENTRY; lur = (struct llog_unlink_rec *)rec; - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) RETURN(-ENOMEM); oa->o_valid |= OBD_MD_FLCOOKIE; oa->o_id = lur->lur_oid; oa->o_gr = lur->lur_ogen; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie)); oid = oa->o_id; rc = filter_destroy(exp, oa, NULL, NULL, NULL); - obdo_free(oa); + OBDO_FREE(oa); if (rc == -ENOENT) { CDEBUG(D_HA, "object already removed, send cookie\n"); llog_cancel(ctxt, NULL, 1, cookie, 0); @@ -170,19 +181,22 @@ static int filter_recov_log_setattr_cb(struct llog_ctxt *ctxt, ENTRY; lsr = (struct llog_setattr_rec *)rec; - oinfo.oi_oa = obdo_alloc(); + OBDO_ALLOC(oinfo.oi_oa); + if (oinfo.oi_oa == NULL) + RETURN(-ENOMEM); oinfo.oi_oa->o_valid |= (OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLCOOKIE); oinfo.oi_oa->o_id = lsr->lsr_oid; oinfo.oi_oa->o_gr = lsr->lsr_ogen; + oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; oinfo.oi_oa->o_uid = lsr->lsr_uid; oinfo.oi_oa->o_gid = lsr->lsr_gid; memcpy(obdo_logcookie(oinfo.oi_oa), cookie, sizeof(*cookie)); oid = oinfo.oi_oa->o_id; rc = filter_setattr(exp, &oinfo, NULL); - obdo_free(oinfo.oi_oa); + OBDO_FREE(oinfo.oi_oa); if (rc == -ENOENT) { CDEBUG(D_HA, "object already removed, send cookie\n"); diff --git a/lustre/obdfilter/filter_lvb.c b/lustre/obdfilter/filter_lvb.c index 3dfa30e..aaccf0f 100644 --- a/lustre/obdfilter/filter_lvb.c +++ b/lustre/obdfilter/filter_lvb.c @@ -71,7 +71,8 @@ static int filter_lvbo_init(struct ldlm_resource *res) obd = res->lr_namespace->ns_lvbp; LASSERT(obd != NULL); - dentry = filter_fid2dentry(obd, NULL, 0, res->lr_name.name[0]); + dentry = filter_fid2dentry(obd, NULL, res->lr_name.name[2], + res->lr_name.name[0]); if (IS_ERR(dentry)) { rc = PTR_ERR(dentry); CERROR("%s: bad object "LPU64"/"LPU64": rc %d\n", obd->obd_name, @@ -168,8 +169,9 @@ static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m, /* Update the LVB from the disk inode */ obd = res->lr_namespace->ns_lvbp; LASSERT(obd); - - dentry = filter_fid2dentry(obd, NULL, 0, res->lr_name.name[0]); + + dentry = filter_fid2dentry(obd, NULL, res->lr_name.name[2], + res->lr_name.name[0]); if (IS_ERR(dentry)) GOTO(out, rc = PTR_ERR(dentry)); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index a56bcff..a206d81 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -36,8 +36,9 @@ static int lprocfs_filter_rd_groups(char *page, char **start, off_t off, int count, int *eof, void *data) { + struct obd_device *obd = (struct obd_device *)data; *eof = 1; - return snprintf(page, count, "%u\n", FILTER_GROUPS); + return snprintf(page, count, "%u\n", obd->u.filter.fo_group_count); } static int lprocfs_filter_rd_tot_dirty(char *page, char **start, off_t off, @@ -86,12 +87,23 @@ static int lprocfs_filter_rd_last_id(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = data; + struct filter_obd *filter = &obd->u.filter; + int retval = 0, rc, i; if (obd == NULL) return 0; - return snprintf(page, count, LPU64"\n", - filter_last_id(&obd->u.filter, 0)); + for (i = FILTER_GROUP_MDS0; i < filter->fo_group_count; i++) { + rc = snprintf(page, count, LPU64"\n",filter_last_id(filter, i)); + if (rc < 0) { + retval = rc; + break; + } + page += rc; + count -= rc; + retval += rc; + } + return retval; } int lprocfs_filter_rd_readcache(char *page, char **start, off_t off, int count, @@ -120,6 +132,7 @@ int lprocfs_filter_wr_readcache(struct file *file, const char *buffer, return count; } + int lprocfs_filter_rd_fmd_max_num(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -176,6 +189,83 @@ int lprocfs_filter_wr_fmd_max_age(struct file *file, const char *buffer, return count; } +static int lprocfs_filter_rd_capa(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + int rc; + + rc = snprintf(page, count, "capability on: %s\n", + obd->u.filter.fo_fl_oss_capa ? "oss" : ""); + return rc; +} + +static int lprocfs_filter_wr_capa(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val & ~0x1) { + CERROR("invalid capability mode, only 0/1 are accepted.\n" + " 1: enable oss fid capability\n" + " 0: disable oss fid capability\n"); + return -EINVAL; + } + + obd->u.filter.fo_fl_oss_capa = val; + LCONSOLE_INFO("OSS %s %s fid capability.\n", obd->obd_name, + val ? "enabled" : "disabled"); + return count; +} + +static int lprocfs_filter_rd_capa_count(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + return snprintf(page, count, "%d %d\n", + capa_count[CAPA_SITE_CLIENT], + capa_count[CAPA_SITE_SERVER]); +} + +static +int lprocfs_filter_rd_blacklist(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int rc; + + rc = blacklist_display(page, count); + *eof = 1; + return rc; +} + +static +int lprocfs_filter_wr_blacklist(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int add; + uid_t uid = -1; + + if (count < 2) + return count; + if (buffer[0] == '+') + add = 1; + else if (buffer[0] == '-') + add = 0; + else + return count; + + sscanf(buffer + 1, "%u", &uid); + if (add) + blacklist_add(uid); + else + blacklist_del(uid); + return count; +} + static struct lprocfs_vars lprocfs_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "blocksize", lprocfs_rd_blksize, 0, 0 }, @@ -208,6 +298,11 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { lprocfs_filter_wr_fmd_max_num, 0 }, { "client_cache_seconds", lprocfs_filter_rd_fmd_max_age, lprocfs_filter_wr_fmd_max_age, 0 }, + { "capa", lprocfs_filter_rd_capa, + lprocfs_filter_wr_capa, 0 }, + { "capa_count", lprocfs_filter_rd_capa_count, 0, 0 }, + { "blacklist", lprocfs_filter_rd_blacklist, + lprocfs_filter_wr_blacklist, 0 }, { 0 } }; @@ -216,8 +311,8 @@ static struct lprocfs_vars lprocfs_module_vars[] = { { 0 } }; -void filter_tally_write(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks,int blocks_per_page) +void filter_tally(struct obd_export *exp, struct page **pages, int nr_pages, + unsigned long *blocks, int blocks_per_page, int wr) { struct filter_obd *filter = &exp->exp_obd->u.filter; struct filter_export_data *fed = &exp->exp_filter_data; @@ -230,9 +325,9 @@ void filter_tally_write(struct obd_export *exp, struct page **pages, if (nr_pages == 0) return; - lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_W_PAGES], + lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_PAGES + wr], nr_pages); - lprocfs_oh_tally_log2(&fed->fed_brw_stats.hist[BRW_W_PAGES], + lprocfs_oh_tally_log2(&fed->fed_brw_stats.hist[BRW_R_PAGES + wr], nr_pages); while (nr_pages-- > 0) { @@ -247,79 +342,51 @@ void filter_tally_write(struct obd_export *exp, struct page **pages, } } - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_W_DISCONT_PAGES], + lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_PAGES +wr], discont_pages); - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_W_DISCONT_BLOCKS], - discont_blocks); - - lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_W_DISCONT_PAGES], + lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_PAGES + wr], discont_pages); - lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_W_DISCONT_BLOCKS], + lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_BLOCKS+wr], discont_blocks); -} - -void filter_tally_read(struct obd_export *exp, struct page **pages, - int nr_pages, unsigned long *blocks, int blocks_per_page) -{ - struct filter_obd *filter = &exp->exp_obd->u.filter; - struct page *last_page = NULL; - unsigned long *last_block = NULL; - unsigned long discont_pages = 0; - unsigned long discont_blocks = 0; - int i; - - if (nr_pages == 0) - return; - - lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_PAGES], nr_pages); - - while (nr_pages-- > 0) { - if (last_page && (*pages)->index != (last_page->index + 1)) - discont_pages++; - last_page = *pages; - pages++; - for (i = 0; i < blocks_per_page; i++) { - if (last_block && *blocks != (*last_block + 1)) - discont_blocks++; - last_block = blocks++; - } - } - - lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_PAGES], nr_pages); - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_PAGES], discont_pages); - lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_BLOCKS], discont_blocks); - - lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_PAGES], - nr_pages); - lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_DISCONT_PAGES], - discont_pages); - lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_DISCONT_BLOCKS], + lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_BLOCKS + wr], discont_blocks); } #define pct(a,b) (b ? a * 100 / b : 0) -static void display_brw_stats(struct seq_file *seq, struct obd_histogram *read, - struct obd_histogram *write) +static void display_brw_stats(struct seq_file *seq, char *name, char *units, + struct obd_histogram *read, struct obd_histogram *write, int log2) { - unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; + unsigned long read_tot, write_tot, r, w, read_cum = 0, write_cum = 0; int i; + seq_printf(seq, "\n%26s read | write\n", " "); + seq_printf(seq, "%-22s %-5s %% cum %% | %-5s %% cum %%\n", + name, units, units); + read_tot = lprocfs_oh_sum(read); write_tot = lprocfs_oh_sum(write); - - read_cum = 0; - write_cum = 0; for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = read->oh_buckets[i]; - unsigned long w = write->oh_buckets[i]; + r = read->oh_buckets[i]; + w = write->oh_buckets[i]; read_cum += r; write_cum += w; - seq_printf(seq, "%u:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - 1 << i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, - pct(w, write_tot), - pct(write_cum, write_tot)); + if (read_cum == 0 && write_cum == 0) + continue; + + if (!log2) + seq_printf(seq, "%u", i); + else if (i < 10) + seq_printf(seq, "%u", 1<<i); + else if (i < 20) + seq_printf(seq, "%uK", 1<<(i-10)); + else + seq_printf(seq, "%uM", 1<<(i-20)); + + seq_printf(seq, ":\t\t%10lu %3lu %3lu | %4lu %3lu %3lu\n", + r, pct(r, read_tot), pct(read_cum, read_tot), + w, pct(w, write_tot), pct(write_cum, write_tot)); + if (read_cum == read_tot && write_cum == write_tot) break; } @@ -328,91 +395,44 @@ static void display_brw_stats(struct seq_file *seq, struct obd_histogram *read, static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats) { struct timeval now; -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; - int i; -#endif - - do_gettimeofday(&now); /* this sampling races with updates */ - + do_gettimeofday(&now); seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "pages per brw brws %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_PAGES], &brw_stats->hist[BRW_W_PAGES]); + display_brw_stats(seq, "pages per bulk r/w", "rpcs", + &brw_stats->hist[BRW_R_PAGES], + &brw_stats->hist[BRW_W_PAGES], 1); - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "discont pages rpcs %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); + display_brw_stats(seq, "discontiguous pages", "rpcs", + &brw_stats->hist[BRW_R_DISCONT_PAGES], + &brw_stats->hist[BRW_W_DISCONT_PAGES], 0); - display_brw_stats(seq, &brw_stats->hist[BRW_R_DISCONT_PAGES], - &brw_stats->hist[BRW_W_DISCONT_PAGES]); + display_brw_stats(seq, "discontiguous blocks", "rpcs", + &brw_stats->hist[BRW_R_DISCONT_BLOCKS], + &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0); - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "discont blocks rpcs %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_DISCONT_BLOCKS], - &brw_stats->hist[BRW_W_DISCONT_BLOCKS]); - - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "dio frags rpcs %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_DIO_FRAGS], - &brw_stats->hist[BRW_W_DIO_FRAGS]); + display_brw_stats(seq, "disk fragmented I/Os", "ios", + &brw_stats->hist[BRW_R_DIO_FRAGS], + &brw_stats->hist[BRW_W_DIO_FRAGS], 0); #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "disk ios in flight ios %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_RPC_HIST], - &brw_stats->hist[BRW_W_RPC_HIST]); - - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "io time (1/%ds) rpcs %% cum %% |", HZ); - seq_printf(seq, " rpcs %% cum %%\n"); - - display_brw_stats(seq, &brw_stats->hist[BRW_R_IO_TIME], - &brw_stats->hist[BRW_W_IO_TIME]); - - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "disk I/O size count %% cum %% |"); - seq_printf(seq, " count %% cum %%\n"); - - read_tot = lprocfs_oh_sum(&brw_stats->hist[BRW_R_DISK_IOSIZE]); - write_tot = lprocfs_oh_sum(&brw_stats->hist[BRW_W_DISK_IOSIZE]); - - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = brw_stats->hist[BRW_R_DISK_IOSIZE].oh_buckets[i]; - unsigned long w = brw_stats->hist[BRW_W_DISK_IOSIZE].oh_buckets[i]; - - read_cum += r; - write_cum += w; - if (read_cum == 0 && write_cum == 0) - continue; - - if (i < 10) - seq_printf(seq, "%u", 1<<i); - else if (i < 20) - seq_printf(seq, "%uK", 1<<(i-10)); - else - seq_printf(seq, "%uM", 1<<(i-20)); - - seq_printf(seq, ":\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - r, pct(r, read_tot), pct(read_cum, read_tot), - w, pct(w, write_tot), pct(write_cum, write_tot)); - if (read_cum == read_tot && write_cum == write_tot) - break; + display_brw_stats(seq, "disk I/Os in flight", "ios", + &brw_stats->hist[BRW_R_RPC_HIST], + &brw_stats->hist[BRW_W_RPC_HIST], 0); + + { + char title[24]; + sprintf(title, "I/O time (1/%ds)", HZ); + display_brw_stats(seq, title, "ios", + &brw_stats->hist[BRW_R_IO_TIME], + &brw_stats->hist[BRW_W_IO_TIME], 1); } + + display_brw_stats(seq, "disk I/O size", "ios", + &brw_stats->hist[BRW_R_DISK_IOSIZE], + &brw_stats->hist[BRW_W_DISK_IOSIZE], 1); #endif } diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 4a0f183..eabdc3f 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -69,7 +69,6 @@ static int osc_wr_active(struct file *file, const char *buffer, return count; } - static int osc_rd_max_pages_per_rpc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -88,7 +87,7 @@ static int osc_wr_max_pages_per_rpc(struct file *file, const char *buffer, { struct obd_device *dev = data; struct client_obd *cli = &dev->u.cli; - struct obd_connect_data *ocd; + struct obd_connect_data *ocd = &cli->cl_import->imp_connect_data; int val, rc; rc = lprocfs_write_helper(buffer, count, &val); @@ -96,7 +95,6 @@ static int osc_wr_max_pages_per_rpc(struct file *file, const char *buffer, return rc; LPROCFS_CLIMP_CHECK(dev); - ocd = &cli->cl_import->imp_connect_data; if (val < 1 || val > ocd->ocd_brw_size >> CFS_PAGE_SHIFT) { LPROCFS_CLIMP_EXIT(dev); return -ERANGE; @@ -104,6 +102,7 @@ static int osc_wr_max_pages_per_rpc(struct file *file, const char *buffer, client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_max_pages_per_rpc = val; client_obd_list_unlock(&cli->cl_loi_list_lock); + LPROCFS_CLIMP_EXIT(dev); return count; } @@ -126,24 +125,24 @@ static int osc_wr_max_rpcs_in_flight(struct file *file, const char *buffer, { struct obd_device *dev = data; struct client_obd *cli = &dev->u.cli; - struct ptlrpc_request_pool *pool; + struct ptlrpc_request_pool *pool = cli->cl_import->imp_rq_pool; int val, rc; rc = lprocfs_write_helper(buffer, count, &val); if (rc) return rc; + if (val < 1 || val > OSC_MAX_RIF_MAX) return -ERANGE; LPROCFS_CLIMP_CHECK(dev); - pool = cli->cl_import->imp_rq_pool; if (pool && val > cli->cl_max_rpcs_in_flight) pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight); client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_max_rpcs_in_flight = val; client_obd_list_unlock(&cli->cl_loi_list_lock); - + LPROCFS_CLIMP_EXIT(dev); return count; } @@ -176,7 +175,8 @@ static int osc_wr_max_dirty_mb(struct file *file, const char *buffer, if (rc) return rc; - if (pages_number < 0 || pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - CFS_PAGE_SHIFT) || + if (pages_number < 0 || + pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - CFS_PAGE_SHIFT) || pages_number > num_physpages / 4) /* 1/4 of RAM */ return -ERANGE; @@ -329,6 +329,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 }, { "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 }, { "checksums", osc_rd_checksum, osc_wr_checksum, 0 }, + { "sptlrpc", sptlrpc_lprocfs_rd, 0, 0 }, { 0 } }; diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index 251483e..cd0d373 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -79,12 +79,12 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) oscc->oscc_grow_count = OST_MIN_PRECREATE; spin_unlock(&oscc->oscc_lock); DEBUG_REQ(D_ERROR, req, - "unknown rc %d from async create: failing oscc", rc); + "Unknown rc %d from async create: failing oscc", rc); ptlrpc_fail_import(req->rq_import, lustre_msg_get_conn_cnt(req->rq_reqmsg)); } else { if (rc == 0) { - if (body) { + if (body) { int diff = body->oa.o_id - oscc->oscc_last_id; if (diff < oscc->oscc_grow_count) @@ -153,7 +153,9 @@ static int oscc_internal_create(struct osc_creator *oscc) spin_lock(&oscc->oscc_lock); body->oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count; - body->oa.o_valid |= OBD_MD_FLID; + body->oa.o_gr = oscc->oscc_oa.o_gr; + LASSERT(body->oa.o_gr > 0); + body->oa.o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP; spin_unlock(&oscc->oscc_lock); CDEBUG(D_HA, "preallocating through id "LPU64" (last seen "LPU64")\n", body->oa.o_id, oscc->oscc_last_id); @@ -235,21 +237,24 @@ int oscc_recovering(struct osc_creator *oscc) int osc_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { - struct lov_stripe_md *lsm; struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; + struct lov_stripe_md *lsm; int try_again = 1, rc = 0; ENTRY; + LASSERT(oa); LASSERT(ea); - - if ((oa->o_valid & OBD_MD_FLGROUP) && (oa->o_gr != 0)) - RETURN(osc_real_create(exp, oa, ea, oti)); + LASSERT(oa->o_gr > 0); + LASSERT(oa->o_valid & OBD_MD_FLGROUP); if ((oa->o_valid & OBD_MD_FLFLAGS) && oa->o_flags == OBD_FL_RECREATE_OBJS) { RETURN(osc_real_create(exp, oa, ea, oti)); } + if (oa->o_gr == FILTER_GROUP_LLOG || oa->o_gr == FILTER_GROUP_ECHO) + RETURN(osc_real_create(exp, oa, ea, oti)); + /* this is the special case where create removes orphans */ if ((oa->o_valid & OBD_MD_FLFLAGS) && oa->o_flags == OBD_FL_DELORPHAN) { diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index e7aa732..6ff5ab8 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -40,7 +40,7 @@ # include <liblustre.h> #endif -# include <lustre_dlm.h> +#include <lustre_dlm.h> #include <libcfs/kp30.h> #include <lustre_net.h> #include <lustre/lustre_user.h> @@ -63,9 +63,6 @@ extern quota_interface_t osc_quota_interface; static void osc_release_ppga(struct brw_page **ppga, obd_count count); -static quota_interface_t *quota_interface; -extern quota_interface_t osc_quota_interface; - /* Pack OSC object metadata for disk storage (LE byte order). */ static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, struct lov_stripe_md *lsm) @@ -91,7 +88,9 @@ static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, if (lsm) { LASSERT(lsm->lsm_object_id); + LASSERT(lsm->lsm_object_gr); (*lmmp)->lmm_object_id = cpu_to_le64(lsm->lsm_object_id); + (*lmmp)->lmm_object_gr = cpu_to_le64(lsm->lsm_object_gr); } RETURN(lmm_size); @@ -144,7 +143,9 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, if (lmm != NULL) { /* XXX zero *lsmp? */ (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id); + (*lsmp)->lsm_object_gr = le64_to_cpu (lmm->lmm_object_gr); LASSERT((*lsmp)->lsm_object_id); + LASSERT((*lsmp)->lsm_object_gr); } (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES; @@ -152,6 +153,32 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, RETURN(lsm_size); } +static inline void osc_pack_capa(struct ptlrpc_request *req, int offset, + struct ost_body *body, void *capa) +{ + struct obd_capa *oc = (struct obd_capa *)capa; + struct lustre_capa *c; + + if (!capa) + return; + + c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c)); + LASSERT(c); + capa_cpy(c, oc); + body->oa.o_valid |= OBD_MD_FLOSSCAPA; + DEBUG_CAPA(D_SEC, c, "pack"); +} + +static inline void osc_pack_req_body(struct ptlrpc_request *req, int offset, + struct obd_info *oinfo) +{ + struct ost_body *body; + + body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); + body->oa = *oinfo->oi_oa; + osc_pack_capa(req, offset + 1, body, oinfo->oi_capa); +} + static int osc_getattr_interpret(struct ptlrpc_request *req, struct osc_async_args *aa, int rc) { @@ -185,17 +212,17 @@ static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo, { struct ptlrpc_request *req; struct ost_body *body; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; struct osc_async_args *aa; ENTRY; + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_GETATTR, 2, size,NULL); + OST_GETATTR, 3, size,NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); + osc_pack_req_body(req, REQ_REC_OFF, oinfo); ptlrpc_req_set_repsize(req, 2, size); req->rq_interpret_reply = osc_getattr_interpret; @@ -212,16 +239,16 @@ static int osc_getattr(struct obd_export *exp, struct obd_info *oinfo) { struct ptlrpc_request *req; struct ost_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_GETATTR, 2, size, NULL); + OST_GETATTR, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); + osc_pack_req_body(req, REQ_REC_OFF, oinfo); ptlrpc_req_set_repsize(req, 2, size); @@ -239,7 +266,7 @@ static int osc_getattr(struct obd_export *exp, struct obd_info *oinfo) } CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - memcpy(oinfo->oi_oa, &body->oa, sizeof(*oinfo->oi_oa)); + *oinfo->oi_oa = body->oa; /* This should really be sent by the OST */ oinfo->oi_oa->o_blksize = PTLRPC_MAX_BRW_SIZE; @@ -256,16 +283,18 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo, { struct ptlrpc_request *req; struct ost_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; + LASSERT(!(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP) || + oinfo->oi_oa->o_gr > 0); + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SETATTR, 2, size, NULL); + OST_SETATTR, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); + osc_pack_req_body(req, REQ_REC_OFF, oinfo); ptlrpc_req_set_repsize(req, 2, size); @@ -278,7 +307,7 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo, if (body == NULL) GOTO(out, rc = -EPROTO); - memcpy(oinfo->oi_oa, &body->oa, sizeof(*oinfo->oi_oa)); + *oinfo->oi_oa = body->oa; EXIT; out: @@ -302,7 +331,7 @@ static int osc_setattr_interpret(struct ptlrpc_request *req, GOTO(out, rc = -EPROTO); } - memcpy(aa->aa_oi->oi_oa, &body->oa, sizeof(*aa->aa_oi->oi_oa)); + *aa->aa_oi->oi_oa = body->oa; out: rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc); RETURN(rc); @@ -313,25 +342,22 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo, struct ptlrpc_request_set *rqset) { struct ptlrpc_request *req; - struct ost_body *body; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct ost_body) }; struct osc_async_args *aa; ENTRY; + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SETATTR, 2, size, NULL); + OST_SETATTR, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - + osc_pack_req_body(req, REQ_REC_OFF, oinfo); if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) { LASSERT(oti); - memcpy(obdo_logcookie(oinfo->oi_oa), oti->oti_logcookies, - sizeof(*oti->oti_logcookies)); + *obdo_logcookie(oinfo->oi_oa) = *oti->oti_logcookies; } - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); ptlrpc_req_set_repsize(req, 2, size); /* do mds to ost setattr asynchronouly */ if (!rqset) { @@ -340,7 +366,7 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo, } else { req->rq_interpret_reply = osc_setattr_interpret; - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); aa = (struct osc_async_args *)&req->rq_async_args; aa->aa_oi = oinfo; @@ -375,7 +401,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, GOTO(out, rc = -ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oa, sizeof(body->oa)); + body->oa = *oa; ptlrpc_req_set_repsize(req, 2, size); if (oa->o_valid & OBD_MD_FLINLINE) { @@ -398,7 +424,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, GOTO (out_req, rc = -EPROTO); } - memcpy(oa, &body->oa, sizeof(*oa)); + *oa = body->oa; /* This should really be sent by the OST */ oa->o_blksize = PTLRPC_MAX_BRW_SIZE; @@ -409,6 +435,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, * This needs to be fixed in a big way. */ lsm->lsm_object_id = oa->o_id; + lsm->lsm_object_gr = oa->o_gr; *ea = lsm; if (oti != NULL) { @@ -417,8 +444,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, if (oa->o_valid & OBD_MD_FLCOOKIE) { if (!oti->oti_logcookies) oti_alloc_cookies(oti, 1); - memcpy(oti->oti_logcookies, obdo_logcookie(oa), - sizeof(oti->oti_onecookie)); + *oti->oti_logcookies = *obdo_logcookie(oa); } } @@ -449,7 +475,7 @@ static int osc_punch_interpret(struct ptlrpc_request *req, GOTO(out, rc = -EPROTO); } - memcpy(aa->aa_oi->oi_oa, &body->oa, sizeof(*aa->aa_oi->oi_oa)); + *aa->aa_oi->oi_oa = body->oa; out: rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc); RETURN(rc); @@ -462,7 +488,7 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, struct ptlrpc_request *req; struct osc_async_args *aa; struct ost_body *body; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; if (!oinfo->oi_oa) { @@ -470,20 +496,17 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, RETURN(-EINVAL); } + size[REQ_REC_OFF + 1] = oinfo->oi_capa? sizeof(struct lustre_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_PUNCH, 2, size, NULL); + OST_PUNCH, 3, size, NULL); if (!req) RETURN(-ENOMEM); - /* FIXME bug 249. Also see bug 7198 */ - if (class_exp2cliimp(exp)->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_REQPORTAL) - req->rq_request_portal = OST_IO_PORTAL; - - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); + req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ + osc_pack_req_body(req, REQ_REC_OFF, oinfo); /* overload the size and blocks fields in the oa with start/end */ + body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); body->oa.o_size = oinfo->oi_policy.l_extent.start; body->oa.o_blocks = oinfo->oi_policy.l_extent.end; body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); @@ -491,7 +514,7 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, ptlrpc_req_set_repsize(req, 2, size); req->rq_interpret_reply = osc_punch_interpret; - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); aa = (struct osc_async_args *)&req->rq_async_args; aa->aa_oi = oinfo; ptlrpc_set_add_req(rqset, req); @@ -500,11 +523,12 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, } static int osc_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *md, obd_size start, obd_size end) + struct lov_stripe_md *md, obd_size start, obd_size end, + void *capa) { struct ptlrpc_request *req; struct ost_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; if (!oa) { @@ -512,19 +536,22 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa, RETURN(-EINVAL); } + size[REQ_REC_OFF + 1] = capa ? sizeof(struct lustre_capa) : 0; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SYNC, 2, size, NULL); + OST_SYNC, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - /* overload the size and blocks fields in the oa with start/end */ + body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); + body->oa = *oa; body->oa.o_size = start; body->oa.o_blocks = end; body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); + osc_pack_capa(req, REQ_REC_OFF + 1, body, capa); + ptlrpc_req_set_repsize(req, 2, size); rc = ptlrpc_queue_wait(req); @@ -538,7 +565,7 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa, GOTO (out, rc = -EPROTO); } - memcpy(oa, &body->oa, sizeof(*oa)); + *oa = body->oa; EXIT; out: @@ -575,19 +602,14 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, if (!req) RETURN(-ENOMEM); - /* FIXME bug 249. Also see bug 7198 */ - if (class_exp2cliimp(exp)->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_REQPORTAL) - req->rq_request_portal = OST_IO_PORTAL; + req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - - if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) { + if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) memcpy(obdo_logcookie(oa), oti->oti_logcookies, sizeof(*oti->oti_logcookies)); - } + body->oa = *oa; - memcpy(&body->oa, oa, sizeof(*oa)); ptlrpc_req_set_repsize(req, 2, size); ptlrpcd_add_req(req); @@ -630,7 +652,8 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, } /* caller must hold loi_list_lock */ -static void osc_consume_write_grant(struct client_obd *cli,struct brw_page *pga) +static void osc_consume_write_grant(struct client_obd *cli, + struct brw_page *pga) { atomic_inc(&obd_dirty_pages); cli->cl_dirty += CFS_PAGE_SIZE; @@ -695,7 +718,7 @@ void osc_wake_cache_waiters(struct client_obd *cli) list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { /* if we can't dirty more, we must wait until some is written */ if ((cli->cl_dirty + CFS_PAGE_SIZE > cli->cl_dirty_max) || - ((atomic_read(&obd_dirty_pages)+1)>(obd_max_dirty_pages))) { + (atomic_read(&obd_dirty_pages) + 1 > obd_max_dirty_pages)) { CDEBUG(D_CACHE, "no dirty room: dirty: %ld " "osc max %ld, sys max %d\n", cli->cl_dirty, cli->cl_dirty_max, obd_max_dirty_pages); @@ -763,7 +786,7 @@ static void handle_short_read(int nob_read, obd_count page_count, if (pga[i]->count > nob_read) { /* EOF inside this page */ - ptr = cfs_kmap(pga[i]->pg) + + ptr = cfs_kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK); memset(ptr + nob_read, 0, pga[i]->count - nob_read); cfs_kunmap(pga[i]->pg); @@ -853,7 +876,8 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count, /* corrupt the data before we compute the checksum, to * simulate an OST->client data error */ - if (i == 0 &&OBD_FAIL_CHECK_ONCE(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) + if (i == 0 && + OBD_FAIL_CHECK_ONCE(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) memcpy(ptr + off, "bad1", min(4, nob)); cksum = crc32_le(cksum, ptr + off, count); cfs_kunmap(pga[i]->pg); @@ -874,22 +898,29 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count, static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page **pga, - struct ptlrpc_request **reqp) + struct brw_page **pga, + struct ptlrpc_request **reqp, + struct obd_capa *ocapa) { struct ptlrpc_request *req; struct ptlrpc_bulk_desc *desc; struct ost_body *body; struct obd_ioobj *ioobj; struct niobuf_remote *niobuf; - int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[5] = { sizeof(struct ptlrpc_body), sizeof(*body) }; int niocount, i, requested_nob, opc, rc; struct ptlrpc_request_pool *pool; + struct lustre_capa *capa; struct osc_brw_async_args *aa; ENTRY; - opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ; - pool = ((cmd & OBD_BRW_WRITE) != 0) ? cli->cl_import->imp_rq_pool :NULL; + if ((cmd & OBD_BRW_WRITE) != 0) { + opc = OST_WRITE; + pool = cli->cl_import->imp_rq_pool; + } else { + opc = OST_READ; + pool = NULL; + } for (niocount = i = 1; i < page_count; i++) { if (!can_merge_pages(pga[i - 1], pga[i])) @@ -898,17 +929,16 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, size[REQ_REC_OFF + 1] = sizeof(*ioobj); size[REQ_REC_OFF + 2] = niocount * sizeof(*niobuf); + if (ocapa) + size[REQ_REC_OFF + 3] = sizeof(*capa); OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM); - req = ptlrpc_prep_req_pool(cli->cl_import, LUSTRE_OST_VERSION, opc, 4, size, - NULL, pool); + req = ptlrpc_prep_req_pool(cli->cl_import, LUSTRE_OST_VERSION, opc, 5, + size, NULL, pool, NULL); if (req == NULL) RETURN (-ENOMEM); - /* FIXME bug 249. Also see bug 7198 */ - if (cli->cl_import->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_REQPORTAL) - req->rq_request_portal = OST_IO_PORTAL; + req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ if (opc == OST_WRITE) desc = ptlrpc_prep_bulk_imp (req, page_count, @@ -925,10 +955,16 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, niobuf = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, niocount * sizeof(*niobuf)); - memcpy(&body->oa, oa, sizeof(*oa)); + body->oa = *oa; obdo_to_ioobj(oa, ioobj); ioobj->ioo_bufcnt = niocount; + if (ocapa) { + capa = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 3, + sizeof(*capa)); + capa_cpy(capa, ocapa); + body->oa.o_valid |= OBD_MD_FLOSSCAPA; + } LASSERT (page_count > 0); for (requested_nob = i = 0; i < page_count; i++, niobuf++) { @@ -1019,8 +1055,9 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, } static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer, - __u32 client_cksum, __u32 server_cksum, int nob, - obd_count page_count, struct brw_page **pga) + __u32 client_cksum, __u32 server_cksum, + int nob, obd_count page_count, + struct brw_page **pga) { __u32 new_cksum; char *msg; @@ -1033,25 +1070,28 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer, new_cksum = osc_checksum_bulk(nob, page_count, pga); if (new_cksum == server_cksum) - msg = "changed on the client after we checksummed it"; + msg = "changed on the client after we checksummed it - " + "likely false positive due to mmap IO (bug 11742)"; else if (new_cksum == client_cksum) msg = "changed in transit before arrival at OST"; else - msg = "changed in transit AND doesn't match the original"; - - LCONSOLE_ERROR("BAD WRITE CHECKSUM: %s: from %s inum "LPU64"/"LPU64 - " object "LPU64"/"LPU64" extent ["LPU64"-"LPU64"]\n", - msg, libcfs_nid2str(peer->nid), - oa->o_valid & OBD_MD_FLFID ? oa->o_fid : (__u64)0, - oa->o_valid & OBD_MD_FLFID ? oa->o_generation : (__u64)0, - oa->o_id, - oa->o_valid & OBD_MD_FLGROUP ? oa->o_gr : (__u64)0, - pga[0]->off, - pga[page_count-1]->off + pga[page_count-1]->count - 1); + msg = "changed in transit AND doesn't match the original - " + "likely false positive due to mmap IO (bug 11742)"; + + LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inum " + LPU64"/"LPU64" object "LPU64"/"LPU64" extent " + "["LPU64"-"LPU64"]\n", + msg, libcfs_nid2str(peer->nid), + oa->o_valid & OBD_MD_FLFID ? oa->o_fid : (__u64)0, + oa->o_valid & OBD_MD_FLFID ? oa->o_generation : + (__u64)0, + oa->o_id, + oa->o_valid & OBD_MD_FLGROUP ? oa->o_gr : (__u64)0, + pga[0]->off, + pga[page_count-1]->off + pga[page_count-1]->count - 1); CERROR("original client csum %x, server csum %x, client csum now %x\n", client_cksum, server_cksum, new_cksum); - - return 1; + return 1; } /* Note rc enters this function as number of bytes transferred */ @@ -1101,12 +1141,14 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) if (unlikely((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum && check_write_checksum(&body->oa, peer, client_cksum, - body->oa.o_cksum, - aa->aa_requested_nob, - aa->aa_page_count, - aa->aa_ppga))) + body->oa.o_cksum, + aa->aa_requested_nob, + aa->aa_page_count, + aa->aa_ppga))) RETURN(-EAGAIN); + sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk); + rc = check_write_rcs(req, aa->aa_requested_nob,aa->aa_nio_count, aa->aa_page_count, aa->aa_ppga); GOTO(out, rc); @@ -1143,29 +1185,30 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) via = " via "; router = libcfs_nid2str(req->rq_bulk->bd_sender); } - + if (server_cksum == ~0 && rc > 0) { CERROR("Protocol error: server %s set the 'checksum' " "bit, but didn't send a checksum. Not fatal, " "but please tell CFS.\n", libcfs_nid2str(peer->nid)); } else if (server_cksum != client_cksum) { - LCONSOLE_ERROR("%s: BAD READ CHECKSUM: from %s%s%s inum " - LPU64"/"LPU64" object "LPU64"/"LPU64 - " extent ["LPU64"-"LPU64"]\n", - req->rq_import->imp_obd->obd_name, - libcfs_nid2str(peer->nid), - via, router, - body->oa.o_valid & OBD_MD_FLFID ? + LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from " + "%s%s%s inum "LPU64"/"LPU64" object " + LPU64"/"LPU64" extent " + "["LPU64"-"LPU64"]\n", + req->rq_import->imp_obd->obd_name, + libcfs_nid2str(peer->nid), + via, router, + body->oa.o_valid & OBD_MD_FLFID ? body->oa.o_fid : (__u64)0, - body->oa.o_valid & OBD_MD_FLFID ? + body->oa.o_valid & OBD_MD_FLFID ? body->oa.o_generation :(__u64)0, - body->oa.o_id, - body->oa.o_valid & OBD_MD_FLGROUP ? + body->oa.o_id, + body->oa.o_valid & OBD_MD_FLGROUP ? body->oa.o_gr : (__u64)0, - aa->aa_ppga[0]->off, - aa->aa_ppga[aa->aa_page_count-1]->off + - aa->aa_ppga[aa->aa_page_count-1]->count - + aa->aa_ppga[0]->off, + aa->aa_ppga[aa->aa_page_count-1]->off + + aa->aa_ppga[aa->aa_page_count-1]->count - 1); CERROR("client %x, server %x\n", client_cksum, server_cksum); @@ -1189,49 +1232,52 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) } out: if (rc >= 0) - memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa)); + *aa->aa_oa = body->oa; + + sptlrpc_cli_unwrap_bulk_read(req, rc, aa->aa_page_count, aa->aa_ppga); RETURN(rc); } -static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa, +static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page **pga) + obd_count page_count, struct brw_page **pga, + struct obd_capa *ocapa) { - struct ptlrpc_request *request; + struct ptlrpc_request *req; int rc, retries = 5; /* lprocfs? */ ENTRY; restart_bulk: rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm, - page_count, pga, &request); + page_count, pga, &req, ocapa); if (rc != 0) return (rc); - rc = ptlrpc_queue_wait(request); + rc = ptlrpc_queue_wait(req); - if (rc == -ETIMEDOUT && request->rq_resend) { - DEBUG_REQ(D_HA, request, "BULK TIMEOUT"); - ptlrpc_req_finished(request); + if (rc == -ETIMEDOUT && req->rq_resend) { + DEBUG_REQ(D_HA, req, "BULK TIMEOUT"); + ptlrpc_req_finished(req); goto restart_bulk; } - rc = osc_brw_fini_request(request, rc); + rc = osc_brw_fini_request(req, rc); - ptlrpc_req_finished(request); + ptlrpc_req_finished(req); if (rc == -EAGAIN) { if (retries-- > 0) goto restart_bulk; rc = -EIO; } - RETURN(rc); + RETURN (rc); } -int osc_brw_redo_request(struct ptlrpc_request *request, +int osc_brw_redo_request(struct ptlrpc_request *req, struct osc_brw_async_args *aa) { struct ptlrpc_request *new_req; - struct ptlrpc_request_set *set = request->rq_set; + struct ptlrpc_request_set *set = req->rq_set; struct osc_brw_async_args *new_aa; struct osc_async_page *oap; int rc = 0; @@ -1242,12 +1288,12 @@ int osc_brw_redo_request(struct ptlrpc_request *request, RETURN(-EIO); } - DEBUG_REQ(D_ERROR, request, "redo for checksum error"); + DEBUG_REQ(D_ERROR, req, "redo for checksum error"); list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) { if (oap->oap_request != NULL) { - LASSERTF(request == oap->oap_request, + LASSERTF(req == oap->oap_request, "request %p != oap_request %p\n", - request, oap->oap_request); + req, oap->oap_request); if (oap->oap_interrupted) { ptlrpc_mark_interrupted(oap->oap_request); rc = -EINTR; @@ -1257,19 +1303,20 @@ int osc_brw_redo_request(struct ptlrpc_request *request, } if (rc) RETURN(rc); - - rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == + /* TODO-MERGE: and where to get ocapa?? */ + rc = osc_brw_prep_request(lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ, aa->aa_cli, aa->aa_oa, NULL /* lsm unused by osc currently */, - aa->aa_page_count, aa->aa_ppga, &new_req); + aa->aa_page_count, aa->aa_ppga, &new_req, + NULL /* ocapa */); if (rc) RETURN(rc); /* New request takes over pga and oaps from old request. * Note that copying a list_head doesn't work, need to move it... */ - new_req->rq_interpret_reply = request->rq_interpret_reply; - new_req->rq_async_args = request->rq_async_args; + new_req->rq_interpret_reply = req->rq_interpret_reply; + new_req->rq_async_args = req->rq_async_args; new_aa = (struct osc_brw_async_args *)&new_req->rq_async_args; INIT_LIST_HEAD(&new_aa->aa_oaps); list_splice(&aa->aa_oaps, &new_aa->aa_oaps); @@ -1287,18 +1334,21 @@ int osc_brw_redo_request(struct ptlrpc_request *request, RETURN(0); } -static int brw_interpret(struct ptlrpc_request *request, void *data, int rc) +static int brw_interpret(struct ptlrpc_request *req, void *data, int rc) { struct osc_brw_async_args *aa = data; int i; + int nob = rc; ENTRY; - rc = osc_brw_fini_request(request, rc); + rc = osc_brw_fini_request(req, rc); if (rc == -EAGAIN) { - rc = osc_brw_redo_request(request, aa); + rc = osc_brw_redo_request(req, aa); if (rc == 0) RETURN(0); } + if ((rc >= 0) && req->rq_set && req->rq_set->set_countp) + atomic_add(nob, (atomic_t *)req->rq_set->set_countp); spin_lock(&aa->aa_cli->cl_loi_list_lock); for (i = 0; i < aa->aa_page_count; i++) @@ -1312,9 +1362,10 @@ static int brw_interpret(struct ptlrpc_request *request, void *data, int rc) static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page **pga, struct ptlrpc_request_set *set) + struct brw_page **pga, struct ptlrpc_request_set *set, + struct obd_capa *ocapa) { - struct ptlrpc_request *request; + struct ptlrpc_request *req; struct client_obd *cli = &exp->exp_obd->u.cli; int rc, i; ENTRY; @@ -1330,19 +1381,17 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, spin_unlock(&cli->cl_loi_list_lock); } - rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm, - page_count, pga, &request); - + rc = osc_brw_prep_request(cmd, cli, oa, lsm, page_count, pga, + &req, ocapa); if (rc == 0) { - request->rq_interpret_reply = brw_interpret; - ptlrpc_set_add_req(set, request); + req->rq_interpret_reply = brw_interpret; + ptlrpc_set_add_req(set, req); } else if (cmd == OBD_BRW_WRITE) { spin_lock(&cli->cl_loi_list_lock); for (i = 0; i < page_count; i++) osc_release_write_grant(cli, pga[i], 0); spin_unlock(&cli->cl_loi_list_lock); } - RETURN (rc); } @@ -1368,7 +1417,7 @@ static void sort_brw_pages(struct brw_page **array, int num) for (i = stride ; i < num ; i++) { tmp = array[i]; j = i; - while (j >= stride && array[j-stride]->off > tmp->off) { + while (j >= stride && array[j - stride]->off > tmp->off) { array[j] = array[j - stride]; j -= stride; } @@ -1384,7 +1433,7 @@ static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages) int i = 0; LASSERT (pages > 0); - offset = pg[i]->off & (~CFS_PAGE_MASK); + offset = pg[i]->off & ~CFS_PAGE_MASK; for (;;) { pages--; @@ -1395,7 +1444,7 @@ static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages) return count; /* doesn't end on page boundary */ i++; - offset = pg[i]->off & (~CFS_PAGE_MASK); + offset = pg[i]->off & ~CFS_PAGE_MASK; if (offset != 0) /* doesn't start on page boundary */ return count; @@ -1469,14 +1518,14 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, *oinfo->oi_oa = *saved_oa; } else if (page_count > pages_per_brw) { /* save a copy of oa (brw will clobber it) */ - saved_oa = obdo_alloc(); + OBDO_ALLOC(saved_oa); if (saved_oa == NULL) GOTO(out, rc = -ENOMEM); *saved_oa = *oinfo->oi_oa; } rc = osc_brw_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md, - pages_per_brw, ppga); + pages_per_brw, ppga, oinfo->oi_capa); if (rc != 0) break; @@ -1489,7 +1538,7 @@ out: osc_release_ppga(orig, page_count_orig); if (saved_oa != NULL) - obdo_free(saved_oa); + OBDO_FREE(saved_oa); RETURN(rc); } @@ -1500,14 +1549,15 @@ static int osc_brw_async(int cmd, struct obd_export *exp, struct ptlrpc_request_set *set) { struct brw_page **ppga, **orig; + struct client_obd *cli = &exp->exp_obd->u.cli; int page_count_orig; int rc = 0; ENTRY; if (cmd & OBD_BRW_CHECK) { + struct obd_import *imp = class_exp2cliimp(exp); /* The caller just wants to know if there's a chance that this * I/O can succeed */ - struct obd_import *imp = class_exp2cliimp(exp); if (imp == NULL || imp->imp_invalid) RETURN(-EIO); @@ -1525,28 +1575,27 @@ static int osc_brw_async(int cmd, struct obd_export *exp, obd_count pages_per_brw; pages_per_brw = min_t(obd_count, page_count, - class_exp2cliimp(exp)->imp_obd->u.cli.cl_max_pages_per_rpc); + cli->cl_max_pages_per_rpc); pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw); /* use ppga only if single RPC is going to fly */ if (pages_per_brw != page_count_orig || ppga != orig) { - OBD_ALLOC(copy, pages_per_brw * sizeof(*copy)); + OBD_ALLOC(copy, sizeof(*copy) * pages_per_brw); if (copy == NULL) GOTO(out, rc = -ENOMEM); - memcpy(copy, ppga, pages_per_brw * sizeof(*copy)); + memcpy(copy, ppga, sizeof(*copy) * pages_per_brw); } else copy = ppga; rc = async_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md, - pages_per_brw, copy, set); + pages_per_brw, copy, set, oinfo->oi_capa); if (rc != 0) { if (copy != ppga) - OBD_FREE(copy, pages_per_brw * sizeof(*copy)); + OBD_FREE(copy, sizeof(*copy) * pages_per_brw); break; } - if (copy == orig) { /* we passed it to async_internal() which is * now responsible for releasing memory */ @@ -1573,6 +1622,7 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap, osc_release_write_grant(cli, &oap->oap_brw_page, sent); } + /* This maintains the lists of pending pages to read/write for a given object * (lop). This is used by osc_check_rpcs->osc_next_loi() and loi_list_maint() * to quickly find objects that are ready to send an RPC. */ @@ -1600,7 +1650,6 @@ static int lop_makes_rpc(struct client_obd *cli, struct loi_oap_pages *lop, CDEBUG(D_CACHE, "urgent request forcing RPC\n"); RETURN(1); } - /* fire off rpcs when we have 'optimal' rpcs as tuned for the wire. */ optimal = cli->cl_max_pages_per_rpc; if (cmd & OBD_BRW_WRITE) { @@ -1611,7 +1660,6 @@ static int lop_makes_rpc(struct client_obd *cli, struct loi_oap_pages *lop, CDEBUG(D_CACHE, "cache waiters forcing RPC\n"); RETURN(1); } - /* +16 to avoid triggering rpcs that would want to include pages * that are being queued but which can't be made ready until * the queuer finishes with the page. this is a wart for @@ -1793,17 +1841,17 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa, EXIT; } -static int brw_interpret_oap(struct ptlrpc_request *request, void *data, int rc) +static int brw_interpret_oap(struct ptlrpc_request *req, void *data, int rc) { - struct osc_brw_async_args *aa = data; struct osc_async_page *oap, *tmp; + struct osc_brw_async_args *aa = data; struct client_obd *cli; ENTRY; - rc = osc_brw_fini_request(request, rc); - CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc); + rc = osc_brw_fini_request(req, rc); + CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc); if (rc == -EAGAIN) { - rc = osc_brw_redo_request(request, aa); + rc = osc_brw_redo_request(req, aa); if (rc == 0) RETURN(0); GOTO(out, rc); @@ -1816,7 +1864,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request, void *data, int rc) /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters * is called so we know whether to go to sync BRWs or wait for more * RPCs to complete */ - if (lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE) + if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) cli->cl_w_in_flight--; else cli->cl_r_in_flight--; @@ -1833,8 +1881,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request, void *data, int rc) client_obd_list_unlock(&cli->cl_loi_list_lock); - obdo_free(aa->aa_oa); - + OBDO_FREE(aa->aa_oa); rc = 0; out: osc_release_ppga(aa->aa_ppga, aa->aa_page_count); @@ -1851,6 +1898,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, struct obdo *oa = NULL; struct obd_async_page_ops *ops = NULL; void *caller_data = NULL; + struct obd_capa *ocapa; struct osc_async_page *oap; int i, rc; @@ -1861,7 +1909,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, if (pga == NULL) RETURN(ERR_PTR(-ENOMEM)); - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) GOTO(out, req = ERR_PTR(-ENOMEM)); @@ -1881,9 +1929,12 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, /* always get the data for the obdo for the rpc */ LASSERT(ops != NULL); ops->ap_fill_obdo(caller_data, cmd, oa); + ocapa = ops->ap_lookup_capa(caller_data, cmd); sort_brw_pages(pga, page_count); - rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, pga, &req); + rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, + pga, &req, ocapa); + capa_put(ocapa); if (rc != 0) { CERROR("prep_req failed: %d\n", rc); GOTO(out, req = ERR_PTR(rc)); @@ -1906,7 +1957,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, out: if (IS_ERR(req)) { if (oa) - obdo_free(oa); + OBDO_FREE(oa); if (pga) OBD_FREE(pga, sizeof(*pga) * page_count); } @@ -1929,7 +1980,8 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, ENTRY; /* first we find the pages we're allowed to work with */ - list_for_each_entry_safe(oap, tmp, &lop->lop_pending, oap_pending_item){ + list_for_each_entry_safe(oap, tmp, &lop->lop_pending, + oap_pending_item) { ops = oap->oap_caller_ops; LASSERT(oap->oap_magic == OAP_MAGIC); @@ -2066,6 +2118,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, } aa = (struct osc_brw_async_args *)&req->rq_async_args; + if (cmd == OBD_BRW_READ) { lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count); lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight); @@ -2241,6 +2294,7 @@ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi, { struct osc_cache_waiter ocw; struct l_wait_info lwi = { 0 }; + ENTRY; CDEBUG(D_CACHE, "dirty: %ld/%d dirty_max: %ld/%d dropped: %lu " @@ -2358,7 +2412,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, struct obd_async_page_ops *ops; struct obdo *oa; - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) RETURN(-ENOMEM); @@ -2368,7 +2422,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, NO_QUOTA) rc = -EDQUOT; - obdo_free(oa); + OBDO_FREE(oa); if (rc) RETURN(rc); } @@ -2645,9 +2699,12 @@ static void osc_set_data_with_check(struct lustre_handle *lockh, void *data, static int osc_change_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, ldlm_iterator_t replace, void *data) { - struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; + struct ldlm_res_id res_id = { .name = {0} }; struct obd_device *obd = class_exp2obd(exp); + res_id.name[0] = lsm->lsm_object_id; + res_id.name[2] = lsm->lsm_object_gr; + ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data); return 0; } @@ -2727,7 +2784,7 @@ static int osc_enqueue_interpret(struct ptlrpc_request *req, static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, struct obd_enqueue_info *einfo) { - struct ldlm_res_id res_id = { .name = {oinfo->oi_md->lsm_object_id} }; + struct ldlm_res_id res_id = { .name = {0} }; struct obd_device *obd = exp->exp_obd; struct ldlm_reply *rep; struct ptlrpc_request *req = NULL; @@ -2735,6 +2792,9 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, int rc; ENTRY; + res_id.name[0] = oinfo->oi_md->lsm_object_id; + res_id.name[2] = oinfo->oi_md->lsm_object_gr; + /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother. */ oinfo->oi_policy.l_extent.start -= @@ -2745,7 +2805,8 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, goto no_match; /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obd->obd_namespace, einfo->ei_flags | LDLM_FL_LVB_READY, &res_id, + rc = ldlm_lock_match(obd->obd_namespace, + einfo->ei_flags | LDLM_FL_LVB_READY, &res_id, einfo->ei_type, &oinfo->oi_policy, einfo->ei_mode, oinfo->oi_lockh); if (rc == 1) { @@ -2780,7 +2841,8 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, * locks out from other users right now, too. */ if (einfo->ei_mode == LCK_PR) { - rc = ldlm_lock_match(obd->obd_namespace, einfo->ei_flags | LDLM_FL_LVB_READY, + rc = ldlm_lock_match(obd->obd_namespace, + einfo->ei_flags | LDLM_FL_LVB_READY, &res_id, einfo->ei_type, &oinfo->oi_policy, LCK_PW, oinfo->oi_lockh); if (rc == 1) { @@ -2811,7 +2873,7 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, RETURN(-ENOMEM); size[DLM_LOCKREPLY_OFF] = sizeof(*rep); - size[DLM_REPLY_REC_OFF] = + size[DLM_REPLY_REC_OFF] = sizeof(oinfo->oi_md->lsm_oinfo[0]->loi_lvb); ptlrpc_req_set_repsize(req, 3, size); } @@ -2819,7 +2881,7 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, /* users of osc_enqueue() can pass this flag for ldlm_lock_match() */ einfo->ei_flags &= ~LDLM_FL_BLOCK_GRANTED; - rc = ldlm_cli_enqueue(exp, &req, res_id, einfo->ei_type, + rc = ldlm_cli_enqueue(exp, &req, &res_id, einfo->ei_type, &oinfo->oi_policy, einfo->ei_mode, &einfo->ei_flags, einfo->ei_cb_bl, einfo->ei_cb_cp, einfo->ei_cb_gl, @@ -2831,7 +2893,7 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, if (einfo->ei_rqset) { if (!rc) { struct osc_enqueue_args *aa; - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); aa = (struct osc_enqueue_args *)&req->rq_async_args; aa->oa_oi = oinfo; aa->oa_ei = einfo; @@ -2856,12 +2918,15 @@ static int osc_match(struct obd_export *exp, struct lov_stripe_md *lsm, __u32 type, ldlm_policy_data_t *policy, __u32 mode, int *flags, void *data, struct lustre_handle *lockh) { - struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; + struct ldlm_res_id res_id = { .name = {0} }; struct obd_device *obd = exp->exp_obd; int rc; int lflags = *flags; ENTRY; + res_id.name[0] = lsm->lsm_object_id; + res_id.name[2] = lsm->lsm_object_gr; + OBD_FAIL_RETURN(OBD_FAIL_OSC_MATCH, -EIO); /* Filesystem lock extents are extended to page boundaries so that @@ -2870,8 +2935,8 @@ static int osc_match(struct obd_export *exp, struct lov_stripe_md *lsm, policy->l_extent.end |= ~CFS_PAGE_MASK; /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obd->obd_namespace, lflags | LDLM_FL_LVB_READY, &res_id, type, - policy, mode, lockh); + rc = ldlm_lock_match(obd->obd_namespace, lflags | LDLM_FL_LVB_READY, + &res_id, type, policy, mode, lockh); if (rc) { //if (!(*flags & LDLM_FL_TEST_LOCK)) osc_set_data_with_check(lockh, data, lflags); @@ -2881,9 +2946,9 @@ static int osc_match(struct obd_export *exp, struct lov_stripe_md *lsm, * VFS and page cache already protect us locally, so lots of readers/ * writers can share a single PW lock. */ if (mode == LCK_PR) { - rc = ldlm_lock_match(obd->obd_namespace, lflags | LDLM_FL_LVB_READY, - &res_id, type, - policy, LCK_PW, lockh); + rc = ldlm_lock_match(obd->obd_namespace, + lflags | LDLM_FL_LVB_READY, &res_id, + type, policy, LCK_PW, lockh); if (rc == 1 && !(lflags & LDLM_FL_TEST_LOCK)) { /* FIXME: This is not incredibly elegant, but it might * be more elegant than adding another parameter to @@ -2910,12 +2975,19 @@ static int osc_cancel(struct obd_export *exp, struct lov_stripe_md *md, } static int osc_cancel_unused(struct obd_export *exp, - struct lov_stripe_md *lsm, int flags, void *opaque) + struct lov_stripe_md *lsm, int flags, + void *opaque) { struct obd_device *obd = class_exp2obd(exp); - struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; + struct ldlm_res_id res_id = { .name = {0} }, *resp = NULL; - return ldlm_cli_cancel_unused(obd->obd_namespace, &res_id, flags, + if (lsm != NULL) { + res_id.name[0] = lsm->lsm_object_id; + res_id.name[2] = lsm->lsm_object_gr; + resp = &res_id; + } + + return ldlm_cli_cancel_unused(obd->obd_namespace, resp, flags, opaque); } @@ -2923,9 +2995,15 @@ static int osc_join_lru(struct obd_export *exp, struct lov_stripe_md *lsm, int join) { struct obd_device *obd = class_exp2obd(exp); - struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; + struct ldlm_res_id res_id = { .name = {0} }, *resp = NULL; + + if (lsm != NULL) { + res_id.name[0] = lsm->lsm_object_id; + res_id.name[2] = lsm->lsm_object_gr; + resp = &res_id; + } - return ldlm_cli_join_lru(obd->obd_namespace, &res_id, join); + return ldlm_cli_join_lru(obd->obd_namespace, resp, join); } static int osc_statfs_interpret(struct ptlrpc_request *req, @@ -2973,7 +3051,7 @@ static int osc_statfs_async(struct obd_device *obd, struct obd_info *oinfo, req->rq_request_portal = OST_CREATE_PORTAL; //XXX FIXME bug 249 req->rq_interpret_reply = osc_statfs_interpret; - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); aa = (struct osc_async_args *)&req->rq_async_args; aa->aa_oi = oinfo; @@ -3050,12 +3128,14 @@ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump) RETURN(-ENOMEM); lumk->lmm_objects[0].l_object_id = lsm->lsm_object_id; + lumk->lmm_objects[0].l_object_gr = lsm->lsm_object_gr; } else { lum_size = sizeof(lum); lumk = &lum; } lumk->lmm_object_id = lsm->lsm_object_id; + lumk->lmm_object_gr = lsm->lsm_object_gr; lumk->lmm_stripe_count = 1; if (copy_to_user(lump, lumk, lum_size)) @@ -3283,6 +3363,11 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(0); } + if (KEY_IS(KEY_FLUSH_CTX)) { + sptlrpc_import_flush_my_ctx(imp); + RETURN(0); + } + if (!set) RETURN(-EINVAL); @@ -3298,8 +3383,14 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, if (req == NULL) RETURN(-ENOMEM); - if (KEY_IS("mds_conn")) + if (KEY_IS("mds_conn")) { + struct osc_creator *oscc = &obd->u.cli.cl_oscc; + + oscc->oscc_oa.o_gr = (*(__u32 *)val); + oscc->oscc_oa.o_valid |= OBD_MD_FLGROUP; + LASSERT(oscc->oscc_oa.o_gr > 0); req->rq_interpret_reply = osc_setinfo_mds_conn_interpret; + } ptlrpc_req_set_repsize(req, 1, NULL); ptlrpc_set_add_req(set, req); @@ -3314,9 +3405,9 @@ static struct llog_operations osc_size_repl_logops = { }; static struct llog_operations osc_mds_ost_orig_logops; -static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *catid, - struct obd_uuid *uuid) +static int osc_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *tgt, int count, + struct llog_catid *catid, struct obd_uuid *uuid) { int rc; ENTRY; @@ -3331,20 +3422,20 @@ static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt, } spin_unlock(&obd->obd_dev_lock); - rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, count, + rc = llog_setup(obd, llogs, LLOG_MDS_OST_ORIG_CTXT, tgt, count, &catid->lci_logid, &osc_mds_ost_orig_logops); if (rc) { CERROR("failed LLOG_MDS_OST_ORIG_CTXT\n"); GOTO (out, rc); } - rc = llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, count, NULL, + rc = llog_setup(obd, llogs, LLOG_SIZE_REPL_CTXT, tgt, count, NULL, &osc_size_repl_logops); - if (rc) + if (rc) CERROR("failed LLOG_SIZE_REPL_CTXT\n"); out: if (rc) { - CERROR("osc '%s' tgt '%s' cnt %d catid %p rc=%d\n", + CERROR("osc '%s' tgt '%s' cnt %d catid %p rc=%d\n", obd->obd_name, tgt->obd_name, count, catid, rc); CERROR("logid "LPX64":0x%x\n", catid->lci_logid.lgl_oid, catid->lci_logid.lgl_ogen); @@ -3432,7 +3523,11 @@ static int osc_import_event(struct obd_device *obd, oscc->oscc_flags |= OSCC_FLAG_RECOVERING; spin_unlock(&oscc->oscc_lock); } - + cli = &obd->u.cli; + client_obd_list_lock(&cli->cl_loi_list_lock); + cli->cl_avail_grant = 0; + cli->cl_lost_grant = 0; + client_obd_list_unlock(&cli->cl_loi_list_lock); break; } case IMP_EVENT_INACTIVE: { @@ -3445,8 +3540,6 @@ static int osc_import_event(struct obd_device *obd, /* Reset grants */ cli = &obd->u.cli; client_obd_list_lock(&cli->cl_loi_list_lock); - cli->cl_avail_grant = 0; - cli->cl_lost_grant = 0; /* all pages go to failing rpcs due to the invalid import */ osc_check_rpcs(cli); client_obd_list_unlock(&cli->cl_loi_list_lock); @@ -3487,7 +3580,7 @@ static int osc_import_event(struct obd_device *obd, RETURN(rc); } -int osc_setup(struct obd_device *obd, obd_count len, void *buf) +int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { int rc; ENTRY; @@ -3497,7 +3590,7 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf) if (rc) RETURN(rc); - rc = client_obd_setup(obd, len, buf); + rc = client_obd_setup(obd, lcfg); if (rc) { ptlrpcd_decref(); } else { @@ -3537,6 +3630,9 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name); /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */ ptlrpc_deactivate_import(imp); + spin_lock(&imp->imp_lock); + imp->imp_pingable = 0; + spin_unlock(&imp->imp_lock); break; } case OBD_CLEANUP_EXPORTS: { @@ -3658,8 +3754,8 @@ int __init osc_init(void) lquota_init(quota_interface); init_obd_quota_ops(quota_interface, &osc_obd_ops); - rc = class_register_type(&osc_obd_ops, lvars.module_vars, - LUSTRE_OSC_NAME); + rc = class_register_type(&osc_obd_ops, NULL, lvars.module_vars, + LUSTRE_OSC_NAME, NULL); if (rc) { if (quota_interface) PORTAL_SYMBOL_PUT(osc_quota_interface); diff --git a/lustre/osd/.cvsignore b/lustre/osd/.cvsignore new file mode 100644 index 0000000..5d26f00 --- /dev/null +++ b/lustre/osd/.cvsignore @@ -0,0 +1,15 @@ +.Xrefs +config.log +config.status +configure +Makefile +.deps +TAGS +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lustre/osd/Makefile.in b/lustre/osd/Makefile.in new file mode 100644 index 0000000..54150fde --- /dev/null +++ b/lustre/osd/Makefile.in @@ -0,0 +1,6 @@ +MODULES := osd +osd-objs := osd_handler.o osd_oi.o osd_igif.o + +EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs + +@INCLUDE_RULES@ diff --git a/lustre/osd/autoMakefile.am b/lustre/osd/autoMakefile.am new file mode 100644 index 0000000..1a14ef4 --- /dev/null +++ b/lustre/osd/autoMakefile.am @@ -0,0 +1,11 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +modulefs_DATA = osd$(KMODEXT) +endif + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ +DIST_SOURCES := $(osd-objs:%.o=%.c) osd_internal.h osd_oi.h osd_igif.h diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c new file mode 100644 index 0000000..ccf33ee --- /dev/null +++ b/lustre/osd/osd_handler.c @@ -0,0 +1,2630 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/osd/osd_handler.c + * Top-level entry points into osd module + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> + +/* LUSTRE_VERSION_CODE */ +#include <lustre_ver.h> +/* prerequisite for linux/xattr.h */ +#include <linux/types.h> +/* prerequisite for linux/xattr.h */ +#include <linux/fs.h> +/* XATTR_{REPLACE,CREATE} */ +#include <linux/xattr.h> +/* + * XXX temporary stuff: direct access to ldiskfs/jdb. Interface between osd + * and file system is not yet specified. + */ +/* handle_t, journal_start(), journal_stop() */ +#include <linux/jbd.h> +/* LDISKFS_SB() */ +#include <linux/ldiskfs_fs.h> +#include <linux/ldiskfs_jbd.h> +/* simple_mkdir() */ +#include <lvfs.h> + +/* + * struct OBD_{ALLOC,FREE}*() + * OBD_FAIL_CHECK + */ +#include <obd_support.h> +/* struct ptlrpc_thread */ +#include <lustre_net.h> +/* LUSTRE_OSD_NAME */ +#include <obd.h> +/* class_register_type(), class_unregister_type(), class_get_type() */ +#include <obd_class.h> +#include <lustre_disk.h> + +/* fid_is_local() */ +#include <lustre_fid.h> +#include <linux/lustre_iam.h> + +#include "osd_internal.h" +#include "osd_igif.h" + +struct osd_directory { + struct iam_container od_container; + struct iam_descr od_descr; + struct semaphore od_sem; +}; + +struct osd_object { + struct dt_object oo_dt; + /* + * Inode for file system object represented by this osd_object. This + * inode is pinned for the whole duration of lu_object life. + * + * Not modified concurrently (either setup early during object + * creation, or assigned by osd_object_create() under write lock). + */ + struct inode *oo_inode; + struct rw_semaphore oo_sem; + struct osd_directory *oo_dir; + /* protects inode attributes. */ + spinlock_t oo_guard; +#if OSD_COUNTERS + const struct lu_env *oo_owner; +#endif +}; + +/* + * osd device. + */ +struct osd_device { + /* super-class */ + struct dt_device od_dt_dev; + /* information about underlying file system */ + struct lustre_mount_info *od_mount; + /* object index */ + struct osd_oi od_oi; + /* + * XXX temporary stuff for object index: directory where every object + * is named by its fid. + */ + struct dentry *od_obj_area; + + /* Environment for transaction commit callback. + * Currently, OSD is based on ext3/JBD. Transaction commit in ext3/JBD + * is serialized, that is there is no more than one transaction commit + * at a time (JBD journal_commit_transaction() is serialized). + * This means that it's enough to have _one_ lu_context. + */ + struct lu_env od_env_for_commit; + + /* + * Fid Capability + */ + unsigned int od_fl_capa:1; + unsigned long od_capa_timeout; + __u32 od_capa_alg; + struct lustre_capa_key *od_capa_keys; + struct hlist_head *od_capa_hash; + + /* + * statfs optimization: we cache a bit. + */ + cfs_time_t od_osfs_age; + struct kstatfs od_kstatfs; + spinlock_t od_osfs_lock; +}; + +static int osd_root_get (const struct lu_env *env, + struct dt_device *dev, struct lu_fid *f); +static int osd_statfs (const struct lu_env *env, + struct dt_device *dev, struct kstatfs *sfs); + +static int lu_device_is_osd (const struct lu_device *d); +static void osd_mod_exit (void) __exit; +static int osd_mod_init (void) __init; +static int osd_type_init (struct lu_device_type *t); +static void osd_type_fini (struct lu_device_type *t); +static int osd_object_init (const struct lu_env *env, + struct lu_object *l); +static void osd_object_release(const struct lu_env *env, + struct lu_object *l); +static int osd_object_print (const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *o); +static void osd_device_free (const struct lu_env *env, + struct lu_device *m); +static void *osd_key_init (const struct lu_context *ctx, + struct lu_context_key *key); +static void osd_key_fini (const struct lu_context *ctx, + struct lu_context_key *key, void *data); +static void osd_key_exit (const struct lu_context *ctx, + struct lu_context_key *key, void *data); +static int osd_has_index (const struct osd_object *obj); +static void osd_object_init0 (struct osd_object *obj); +static int osd_device_init (const struct lu_env *env, + struct lu_device *d, const char *, + struct lu_device *); +static int osd_fid_lookup (const struct lu_env *env, + struct osd_object *obj, + const struct lu_fid *fid); +static void osd_inode_getattr (const struct lu_env *env, + struct inode *inode, struct lu_attr *attr); +static void osd_inode_setattr (const struct lu_env *env, + struct inode *inode, const struct lu_attr *attr); +static int osd_param_is_sane (const struct osd_device *dev, + const struct txn_param *param); +static int osd_index_lookup (const struct lu_env *env, + struct dt_object *dt, + struct dt_rec *rec, const struct dt_key *key, + struct lustre_capa *capa); +static int osd_index_insert (const struct lu_env *env, + struct dt_object *dt, + const struct dt_rec *rec, + const struct dt_key *key, + struct thandle *handle, + struct lustre_capa *capa); +static int osd_index_delete (const struct lu_env *env, + struct dt_object *dt, const struct dt_key *key, + struct thandle *handle, + struct lustre_capa *capa); +static int osd_index_probe (const struct lu_env *env, + struct osd_object *o, + const struct dt_index_features *feat); +static int osd_index_try (const struct lu_env *env, + struct dt_object *dt, + const struct dt_index_features *feat); +static void osd_index_fini (struct osd_object *o); + +static void osd_it_fini (const struct lu_env *env, struct dt_it *di); +static int osd_it_get (const struct lu_env *env, + struct dt_it *di, const struct dt_key *key); +static void osd_it_put (const struct lu_env *env, struct dt_it *di); +static int osd_it_next (const struct lu_env *env, struct dt_it *di); +static int osd_it_del (const struct lu_env *env, struct dt_it *di, + struct thandle *th); +static int osd_it_key_size (const struct lu_env *env, + const struct dt_it *di); +static void osd_conf_get (const struct lu_env *env, + const struct dt_device *dev, + struct dt_device_param *param); +static void osd_trans_stop (const struct lu_env *env, + struct thandle *th); +static int osd_object_is_root(const struct osd_object *obj); + +static struct osd_object *osd_obj (const struct lu_object *o); +static struct osd_device *osd_dev (const struct lu_device *d); +static struct osd_device *osd_dt_dev (const struct dt_device *d); +static struct osd_object *osd_dt_obj (const struct dt_object *d); +static struct osd_device *osd_obj2dev (const struct osd_object *o); +static struct lu_device *osd2lu_dev (struct osd_device *osd); +static struct lu_device *osd_device_fini (const struct lu_env *env, + struct lu_device *d); +static struct lu_device *osd_device_alloc (const struct lu_env *env, + struct lu_device_type *t, + struct lustre_cfg *cfg); +static struct lu_object *osd_object_alloc (const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *d); +static struct inode *osd_iget (struct osd_thread_info *info, + struct osd_device *dev, + const struct osd_inode_id *id); +static struct super_block *osd_sb (const struct osd_device *dev); +static struct dt_it *osd_it_init (const struct lu_env *env, + struct dt_object *dt, int wable, + struct lustre_capa *capa); +static struct dt_key *osd_it_key (const struct lu_env *env, + const struct dt_it *di); +static struct dt_rec *osd_it_rec (const struct lu_env *env, + const struct dt_it *di); +static struct timespec *osd_inode_time (const struct lu_env *env, + struct inode *inode, + __u64 seconds); +static struct thandle *osd_trans_start (const struct lu_env *env, + struct dt_device *d, + struct txn_param *p); +static journal_t *osd_journal (const struct osd_device *dev); + +static struct lu_device_type_operations osd_device_type_ops; +static struct lu_device_type osd_device_type; +static struct lu_object_operations osd_lu_obj_ops; +static struct obd_ops osd_obd_device_ops; +static struct lprocfs_vars lprocfs_osd_module_vars[]; +static struct lprocfs_vars lprocfs_osd_obd_vars[]; +static struct lu_device_operations osd_lu_ops; +static struct lu_context_key osd_key; +static struct dt_object_operations osd_obj_ops; +static struct dt_body_operations osd_body_ops; +static struct dt_index_operations osd_index_ops; +static struct dt_index_operations osd_index_compat_ops; + +struct osd_thandle { + struct thandle ot_super; + handle_t *ot_handle; + struct journal_callback ot_jcb; +}; + +/* + * Invariants, assertions. + */ + +/* + * XXX: do not enable this, until invariant checking code is made thread safe + * in the face of pdirops locking. + */ +#define OSD_INVARIANT_CHECKS (0) + +#if OSD_INVARIANT_CHECKS +static int osd_invariant(const struct osd_object *obj) +{ + return + obj != NULL && + ergo(obj->oo_inode != NULL, + obj->oo_inode->i_sb == osd_sb(osd_obj2dev(obj)) && + atomic_read(&obj->oo_inode->i_count) > 0) && + ergo(obj->oo_dir != NULL && + obj->oo_dir->od_conationer.ic_object != NULL, + obj->oo_dir->od_conationer.ic_object == obj->oo_inode); +} +#else +#define osd_invariant(obj) (1) +#endif + +static inline struct osd_thread_info *osd_oti_get(const struct lu_env *env) +{ + return lu_context_key_get(&env->le_ctx, &osd_key); +} + +#if OSD_COUNTERS +/* + * Concurrency: doesn't matter + */ +static int osd_read_locked(const struct lu_env *env, struct osd_object *o) +{ + return osd_oti_get(env)->oti_r_locks > 0; +} + +/* + * Concurrency: doesn't matter + */ +static int osd_write_locked(const struct lu_env *env, struct osd_object *o) +{ + struct osd_thread_info *oti = osd_oti_get(env); + return oti->oti_w_locks > 0 && o->oo_owner == env; +} + +#define OSD_COUNTERS_DO(exp) exp +#else + + +#define osd_read_locked(env, o) (1) +#define osd_write_locked(env, o) (1) +#define OSD_COUNTERS_DO(exp) ((void)0) +#endif + +/* + * Concurrency: doesn't access mutable data + */ +static int osd_root_get(const struct lu_env *env, + struct dt_device *dev, struct lu_fid *f) +{ + struct inode *inode; + + inode = osd_sb(osd_dt_dev(dev))->s_root->d_inode; + lu_igif_build(f, inode->i_ino, inode->i_generation); + return 0; +} + +/* + * OSD object methods. + */ + +/* + * Concurrency: no concurrent access is possible that early in object + * life-cycle. + */ +static struct lu_object *osd_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *d) +{ + struct osd_object *mo; + + OBD_ALLOC_PTR(mo); + if (mo != NULL) { + struct lu_object *l; + + l = &mo->oo_dt.do_lu; + dt_object_init(&mo->oo_dt, NULL, d); + mo->oo_dt.do_ops = &osd_obj_ops; + l->lo_ops = &osd_lu_obj_ops; + init_rwsem(&mo->oo_sem); + spin_lock_init(&mo->oo_guard); + return l; + } else + return NULL; +} + +/* + * Concurrency: shouldn't matter. + */ +static void osd_object_init0(struct osd_object *obj) +{ + LASSERT(obj->oo_inode != NULL); + obj->oo_dt.do_body_ops = &osd_body_ops; + obj->oo_dt.do_lu.lo_header->loh_attr |= + (LOHA_EXISTS | (obj->oo_inode->i_mode & S_IFMT)); +} + +/* + * Concurrency: no concurrent access is possible that early in object + * life-cycle. + */ +static int osd_object_init(const struct lu_env *env, struct lu_object *l) +{ + struct osd_object *obj = osd_obj(l); + int result; + + LASSERT(osd_invariant(obj)); + + result = osd_fid_lookup(env, obj, lu_object_fid(l)); + if (result == 0) { + if (obj->oo_inode != NULL) + osd_object_init0(obj); + } + LASSERT(osd_invariant(obj)); + return result; +} + +/* + * Concurrency: no concurrent access is possible that late in object + * life-cycle. + */ +static void osd_object_free(const struct lu_env *env, struct lu_object *l) +{ + struct osd_object *obj = osd_obj(l); + + LASSERT(osd_invariant(obj)); + + dt_object_fini(&obj->oo_dt); + OBD_FREE_PTR(obj); +} + +static struct iam_path_descr *osd_ipd_get(const struct lu_env *env, + const struct iam_container *bag) +{ + return bag->ic_descr->id_ops->id_ipd_alloc(bag, + osd_oti_get(env)->oti_ipd); +} + +static void osd_ipd_put(const struct lu_env *env, + const struct iam_container *bag, + struct iam_path_descr *ipd) +{ + bag->ic_descr->id_ops->id_ipd_free(ipd); +} + +/* + * Concurrency: no concurrent access is possible that late in object + * life-cycle. + */ +static void osd_index_fini(struct osd_object *o) +{ + struct iam_container *bag; + + if (o->oo_dir != NULL) { + bag = &o->oo_dir->od_container; + if (o->oo_inode != NULL) { + if (bag->ic_object == o->oo_inode) + iam_container_fini(bag); + } + OBD_FREE_PTR(o->oo_dir); + o->oo_dir = NULL; + } +} + +/* + * Concurrency: no concurrent access is possible that late in object + * life-cycle (for all existing callers, that is. New callers have to provide + * their own locking.) + */ +static int osd_inode_unlinked(const struct inode *inode) +{ + return inode->i_nlink == 0; +} + +enum { + OSD_TXN_OI_DELETE_CREDITS = 20, + OSD_TXN_INODE_DELETE_CREDITS = 20 +}; + +/* + * Concurrency: no concurrent access is possible that late in object + * life-cycle. + */ +static int osd_inode_remove(const struct lu_env *env, struct osd_object *obj) +{ + const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu); + struct osd_device *osd = osd_obj2dev(obj); + struct osd_thread_info *oti = osd_oti_get(env); + struct txn_param *prm = &oti->oti_txn; + struct thandle *th; + int result; + + txn_param_init(prm, OSD_TXN_OI_DELETE_CREDITS + + OSD_TXN_INODE_DELETE_CREDITS); + th = osd_trans_start(env, &osd->od_dt_dev, prm); + if (!IS_ERR(th)) { + result = osd_oi_delete(oti, &osd->od_oi, fid, th); + osd_trans_stop(env, th); + } else + result = PTR_ERR(th); + return result; +} + +/* + * Called just before object is freed. Releases all resources except for + * object itself (that is released by osd_object_free()). + * + * Concurrency: no concurrent access is possible that late in object + * life-cycle. + */ +static void osd_object_delete(const struct lu_env *env, struct lu_object *l) +{ + struct osd_object *obj = osd_obj(l); + struct inode *inode = obj->oo_inode; + + LASSERT(osd_invariant(obj)); + + /* + * If object is unlinked remove fid->ino mapping from object index. + * + * File body will be deleted by iput(). + */ + + osd_index_fini(obj); + if (inode != NULL) { + int result; + + if (osd_inode_unlinked(inode)) { + result = osd_inode_remove(env, obj); + if (result != 0) + LU_OBJECT_DEBUG(D_ERROR, env, l, + "Failed to cleanup: %d\n", + result); + } + iput(inode); + obj->oo_inode = NULL; + } +} + +/* + * Concurrency: ->loo_object_release() is called under site spin-lock. + */ +static void osd_object_release(const struct lu_env *env, + struct lu_object *l) +{ + struct osd_object *o = osd_obj(l); + + LASSERT(!lu_object_is_dying(l->lo_header)); + if (o->oo_inode != NULL && osd_inode_unlinked(o->oo_inode)) + set_bit(LU_OBJECT_HEARD_BANSHEE, &l->lo_header->loh_flags); +} + +/* + * Concurrency: shouldn't matter. + */ +static int osd_object_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *l) +{ + struct osd_object *o = osd_obj(l); + struct iam_descr *d; + + if (o->oo_dir != NULL) + d = o->oo_dir->od_container.ic_descr; + else + d = NULL; + return (*p)(env, cookie, LUSTRE_OSD_NAME"-object@%p(i:%p:%lu/%u)[%s]", + o, o->oo_inode, + o->oo_inode ? o->oo_inode->i_ino : 0UL, + o->oo_inode ? o->oo_inode->i_generation : 0, + d ? d->id_ops->id_name : "plain"); +} + +/* + * Concurrency: shouldn't matter. + */ +static int osd_statfs(const struct lu_env *env, + struct dt_device *d, struct kstatfs *sfs) +{ + struct osd_device *osd = osd_dt_dev(d); + struct super_block *sb = osd_sb(osd); + int result = 0; + + spin_lock(&osd->od_osfs_lock); + /* cache 1 second */ + if (cfs_time_before_64(osd->od_osfs_age, cfs_time_shift_64(-1))) { + result = sb->s_op->statfs(sb, &osd->od_kstatfs); + if (likely(result == 0)) /* N.B. statfs can't really fail */ + osd->od_osfs_age = cfs_time_current_64(); + } + + if (likely(result == 0)) + *sfs = osd->od_kstatfs; + spin_unlock(&osd->od_osfs_lock); + + return result; +} + +/* + * Concurrency: doesn't access mutable data. + */ +static void osd_conf_get(const struct lu_env *env, + const struct dt_device *dev, + struct dt_device_param *param) +{ + /* + * XXX should be taken from not-yet-existing fs abstraction layer. + */ + param->ddp_max_name_len = LDISKFS_NAME_LEN; + param->ddp_max_nlink = LDISKFS_LINK_MAX; + param->ddp_block_shift = osd_sb(osd_dt_dev(dev))->s_blocksize_bits; +} + +/* + * Journal + */ + +/* + * Concurrency: doesn't access mutable data. + */ +static int osd_param_is_sane(const struct osd_device *dev, + const struct txn_param *param) +{ + return param->tp_credits <= osd_journal(dev)->j_max_transaction_buffers; +} + +/* + * Concurrency: shouldn't matter. + */ +static void osd_trans_commit_cb(struct journal_callback *jcb, int error) +{ + struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb); + struct thandle *th = &oh->ot_super; + struct dt_device *dev = th->th_dev; + + LASSERT(dev != NULL); + LASSERT(oh->ot_handle == NULL); + + if (error) { + CERROR("transaction @0x%p commit error: %d\n", th, error); + } else { + /* + * This od_env_for_commit is only for commit usage. see + * "struct dt_device" + */ + dt_txn_hook_commit(&osd_dt_dev(dev)->od_env_for_commit, th); + } + + lu_device_put(&dev->dd_lu_dev); + th->th_dev = NULL; + + lu_context_exit(&th->th_ctx); + lu_context_fini(&th->th_ctx); + OBD_FREE_PTR(oh); +} + +/* + * Concurrency: shouldn't matter. + */ +static struct thandle *osd_trans_start(const struct lu_env *env, + struct dt_device *d, + struct txn_param *p) +{ + struct osd_device *dev = osd_dt_dev(d); + handle_t *jh; + struct osd_thandle *oh; + struct thandle *th; + int hook_res; + + ENTRY; + + hook_res = dt_txn_hook_start(env, d, p); + if (hook_res != 0) + RETURN(ERR_PTR(hook_res)); + + if (osd_param_is_sane(dev, p)) { + OBD_ALLOC_GFP(oh, sizeof *oh, GFP_NOFS); + if (oh != NULL) { + /* + * XXX temporary stuff. Some abstraction layer should + * be used. + */ + + jh = journal_start(osd_journal(dev), p->tp_credits); + if (!IS_ERR(jh)) { + oh->ot_handle = jh; + th = &oh->ot_super; + th->th_dev = d; + th->th_result = 0; + jh->h_sync = p->tp_sync; + lu_device_get(&d->dd_lu_dev); + /* add commit callback */ + lu_context_init(&th->th_ctx, LCT_TX_HANDLE); + lu_context_enter(&th->th_ctx); + journal_callback_set(jh, osd_trans_commit_cb, + (struct journal_callback *)&oh->ot_jcb); +#if OSD_COUNTERS + { + struct osd_thread_info *oti = + osd_oti_get(env); + + LASSERT(oti->oti_txns == 0); + LASSERT(oti->oti_r_locks == 0); + LASSERT(oti->oti_w_locks == 0); + oti->oti_txns++; + } +#endif + } else { + OBD_FREE_PTR(oh); + th = (void *)jh; + } + } else + th = ERR_PTR(-ENOMEM); + } else { + CERROR("Invalid transaction parameters\n"); + th = ERR_PTR(-EINVAL); + } + + RETURN(th); +} + +/* + * Concurrency: shouldn't matter. + */ +static void osd_trans_stop(const struct lu_env *env, struct thandle *th) +{ + int result; + struct osd_thandle *oh; + + ENTRY; + + oh = container_of0(th, struct osd_thandle, ot_super); + if (oh->ot_handle != NULL) { + handle_t *hdl = oh->ot_handle; + /* + * XXX temporary stuff. Some abstraction layer should be used. + */ + result = dt_txn_hook_stop(env, th); + if (result != 0) + CERROR("Failure in transaction hook: %d\n", result); + + /**/ + oh->ot_handle = NULL; + result = journal_stop(hdl); + if (result != 0) + CERROR("Failure to stop transaction: %d\n", result); + +#if OSD_COUNTERS + { + struct osd_thread_info *oti = osd_oti_get(env); + + LASSERT(oti->oti_txns == 1); + LASSERT(oti->oti_r_locks == 0); + LASSERT(oti->oti_w_locks == 0); + oti->oti_txns--; + } +#endif + } + EXIT; +} + +/* + * Concurrency: shouldn't matter. + */ +static int osd_sync(const struct lu_env *env, struct dt_device *d) +{ + CDEBUG(D_HA, "syncing OSD %s\n", LUSTRE_OSD_NAME); + return ldiskfs_force_commit(osd_sb(osd_dt_dev(d))); +} + +/* + * Concurrency: shouldn't matter. + */ +lvfs_sbdev_type fsfilt_ldiskfs_journal_sbdev(struct super_block *); + +static void osd_ro(const struct lu_env *env, struct dt_device *d) +{ + ENTRY; + + CERROR("*** setting device %s read-only ***\n", LUSTRE_OSD_NAME); + + __lvfs_set_rdonly(lvfs_sbdev(osd_sb(osd_dt_dev(d))), + fsfilt_ldiskfs_journal_sbdev(osd_sb(osd_dt_dev(d)))); + EXIT; +} + +/* + * Concurrency: serialization provided by callers. + */ +static int osd_init_capa_ctxt(const struct lu_env *env, struct dt_device *d, + int mode, unsigned long timeout, __u32 alg, + struct lustre_capa_key *keys) +{ + struct osd_device *dev = osd_dt_dev(d); + ENTRY; + + dev->od_fl_capa = mode; + dev->od_capa_timeout = timeout; + dev->od_capa_alg = alg; + dev->od_capa_keys = keys; + RETURN(0); +} + +/* Note: we did not count into QUOTA here, If we mount with --data_journal + * we may need more*/ +static const int osd_dto_credits[DTO_NR] = { + /* + * Insert/Delete. IAM EXT3_INDEX_EXTRA_TRANS_BLOCKS(8) + + * EXT3_SINGLEDATA_TRANS_BLOCKS 8 XXX Note: maybe iam need more,since + * iam have more level than Ext3 htree + */ + [DTO_INDEX_INSERT] = 16, + [DTO_INDEX_DELETE] = 16, + [DTO_IDNEX_UPDATE] = 16, + /* + * Create a object. Same as create object in Ext3 filesystem, but did + * not count QUOTA i EXT3_DATA_TRANS_BLOCKS(12) + + * INDEX_EXTRA_BLOCKS(8) + 3(inode bits,groups, GDT) + */ + [DTO_OBJECT_CREATE] = 23, + [DTO_OBJECT_DELETE] = 23, + /* + * Attr set credits 3 inode, group, GDT + */ + [DTO_ATTR_SET] = 3, + /* + * XATTR_SET. SAME AS XATTR of EXT3 EXT3_DATA_TRANS_BLOCKS XXX Note: + * in original MDS implmentation EXT3_INDEX_EXTRA_TRANS_BLOCKS are + * also counted in. Do not know why? + */ + [DTO_XATTR_SET] = 16, + [DTO_LOG_REC] = 16 +}; + +static int osd_credit_get(const struct lu_env *env, struct dt_device *d, + enum dt_txn_op op) +{ + LASSERT(0 <= op && op < ARRAY_SIZE(osd_dto_credits)); + return osd_dto_credits[op]; +} + +static struct dt_device_operations osd_dt_ops = { + .dt_root_get = osd_root_get, + .dt_statfs = osd_statfs, + .dt_trans_start = osd_trans_start, + .dt_trans_stop = osd_trans_stop, + .dt_conf_get = osd_conf_get, + .dt_sync = osd_sync, + .dt_ro = osd_ro, + .dt_credit_get = osd_credit_get, + .dt_init_capa_ctxt = osd_init_capa_ctxt, +}; + +static void osd_object_read_lock(const struct lu_env *env, + struct dt_object *dt) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(osd_invariant(obj)); + + OSD_COUNTERS_DO(LASSERT(obj->oo_owner != env)); + down_read(&obj->oo_sem); +#if OSD_COUNTERS + { + struct osd_thread_info *oti = osd_oti_get(env); + + LASSERT(obj->oo_owner == NULL); + oti->oti_r_locks++; + } +#endif +} + +static void osd_object_write_lock(const struct lu_env *env, + struct dt_object *dt) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(osd_invariant(obj)); + + OSD_COUNTERS_DO(LASSERT(obj->oo_owner != env)); + down_write(&obj->oo_sem); +#if OSD_COUNTERS + { + struct osd_thread_info *oti = osd_oti_get(env); + + LASSERT(obj->oo_owner == NULL); + obj->oo_owner = env; + oti->oti_w_locks++; + } +#endif +} + +static void osd_object_read_unlock(const struct lu_env *env, + struct dt_object *dt) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(osd_invariant(obj)); +#if OSD_COUNTERS + { + struct osd_thread_info *oti = osd_oti_get(env); + + LASSERT(oti->oti_r_locks > 0); + oti->oti_r_locks--; + } +#endif + up_read(&obj->oo_sem); +} + +static void osd_object_write_unlock(const struct lu_env *env, + struct dt_object *dt) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(osd_invariant(obj)); +#if OSD_COUNTERS + { + struct osd_thread_info *oti = osd_oti_get(env); + + LASSERT(obj->oo_owner == env); + LASSERT(oti->oti_w_locks > 0); + oti->oti_w_locks--; + obj->oo_owner = NULL; + } +#endif + up_write(&obj->oo_sem); +} + +static int capa_is_sane(const struct lu_env *env, + struct osd_device *dev, + struct lustre_capa *capa, + struct lustre_capa_key *keys) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct obd_capa *oc; + int i, rc = 0; + ENTRY; + + oc = capa_lookup(dev->od_capa_hash, capa, 0); + if (oc) { + if (capa_is_expired(oc)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + rc = -ESTALE; + } + capa_put(oc); + RETURN(rc); + } + + spin_lock(&capa_lock); + for (i = 0; i < 2; i++) { + if (keys[i].lk_keyid == capa->lc_keyid) { + oti->oti_capa_key = keys[i]; + break; + } + } + spin_unlock(&capa_lock); + + if (i == 2) { + DEBUG_CAPA(D_ERROR, capa, "no matched capa key"); + RETURN(-ESTALE); + } + + rc = capa_hmac(oti->oti_capa.lc_hmac, capa, oti->oti_capa_key.lk_key); + if (rc) + RETURN(rc); + if (memcmp(oti->oti_capa.lc_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) + { + DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); + RETURN(-EACCES); + } + + oc = capa_add(dev->od_capa_hash, capa); + capa_put(oc); + + RETURN(0); +} + +static int osd_object_auth(const struct lu_env *env, struct dt_object *dt, + struct lustre_capa *capa, __u64 opc) +{ + const struct lu_fid *fid = lu_object_fid(&dt->do_lu); + struct osd_device *dev = osd_dev(dt->do_lu.lo_dev); + int rc; + + if (!dev->od_fl_capa) + return 0; + + if (capa == BYPASS_CAPA) + return 0; + + if (!capa) { + CERROR("no capability is provided for fid "DFID"\n", PFID(fid)); + return -EACCES; + } + + if (!lu_fid_eq(fid, &capa->lc_fid)) { + DEBUG_CAPA(D_ERROR, capa, "fid "DFID" mismatch with", + PFID(fid)); + return -EACCES; + } + + if (!capa_opc_supported(capa, opc)) { + DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc); + return -EACCES; + } + + if ((rc = capa_is_sane(env, dev, capa, dev->od_capa_keys))) { + DEBUG_CAPA(D_ERROR, capa, "insane (rc %d)", rc); + return -EACCES; + } + + return 0; +} + +static int osd_attr_get(const struct lu_env *env, + struct dt_object *dt, + struct lu_attr *attr, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ)) + return -EACCES; + + spin_lock(&obj->oo_guard); + osd_inode_getattr(env, obj->oo_inode, attr); + spin_unlock(&obj->oo_guard); + return 0; +} + +static int osd_attr_set(const struct lu_env *env, + struct dt_object *dt, + const struct lu_attr *attr, + struct thandle *handle, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(handle != NULL); + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) + return -EACCES; + + spin_lock(&obj->oo_guard); + osd_inode_setattr(env, obj->oo_inode, attr); + spin_unlock(&obj->oo_guard); + + mark_inode_dirty(obj->oo_inode); + return 0; +} + +static struct timespec *osd_inode_time(const struct lu_env *env, + struct inode *inode, __u64 seconds) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct timespec *t = &oti->oti_time; + + t->tv_sec = seconds; + t->tv_nsec = 0; + *t = timespec_trunc(*t, get_sb_time_gran(inode->i_sb)); + return t; +} + +static void osd_inode_setattr(const struct lu_env *env, + struct inode *inode, const struct lu_attr *attr) +{ + __u64 bits; + + bits = attr->la_valid; + + LASSERT(!(bits & LA_TYPE)); /* Huh? You want too much. */ + + if (bits & LA_ATIME) + inode->i_atime = *osd_inode_time(env, inode, attr->la_atime); + if (bits & LA_CTIME) + inode->i_ctime = *osd_inode_time(env, inode, attr->la_ctime); + if (bits & LA_MTIME) + inode->i_mtime = *osd_inode_time(env, inode, attr->la_mtime); + if (bits & LA_SIZE) + LDISKFS_I(inode)->i_disksize = inode->i_size = attr->la_size; + if (bits & LA_BLOCKS) + inode->i_blocks = attr->la_blocks; + if (bits & LA_MODE) + inode->i_mode = (inode->i_mode & S_IFMT) | + (attr->la_mode & ~S_IFMT); + if (bits & LA_UID) + inode->i_uid = attr->la_uid; + if (bits & LA_GID) + inode->i_gid = attr->la_gid; + if (bits & LA_NLINK) + inode->i_nlink = attr->la_nlink; + if (bits & LA_RDEV) + inode->i_rdev = attr->la_rdev; + if (bits & LA_BLKSIZE) + inode->i_blksize = attr->la_blksize; + + if (bits & LA_FLAGS) { + struct ldiskfs_inode_info *li = LDISKFS_I(inode); + + li->i_flags = (li->i_flags & ~LDISKFS_FL_USER_MODIFIABLE) | + (attr->la_flags & LDISKFS_FL_USER_MODIFIABLE); + } +} + +/* + * Object creation. + * + * XXX temporary solution. + */ + +static int osd_create_pre(struct osd_thread_info *info, struct osd_object *obj, + struct lu_attr *attr, struct thandle *th) +{ + return 0; +} + +static int osd_create_post(struct osd_thread_info *info, struct osd_object *obj, + struct lu_attr *attr, struct thandle *th) +{ + LASSERT(obj->oo_inode != NULL); + + osd_object_init0(obj); + return 0; +} + +extern struct inode *ldiskfs_create_inode(handle_t *handle, + struct inode * dir, int mode); + +static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, + umode_t mode, + struct dt_allocation_hint *hint, + struct thandle *th) +{ + int result; + struct osd_device *osd = osd_obj2dev(obj); + struct osd_thandle *oth; + struct inode *parent; + struct inode *inode; + + LASSERT(osd_invariant(obj)); + LASSERT(obj->oo_inode == NULL); + LASSERT(osd->od_obj_area != NULL); + + oth = container_of(th, struct osd_thandle, ot_super); + LASSERT(oth->ot_handle->h_transaction != NULL); + + if (hint && hint->dah_parent) + parent = osd_dt_obj(hint->dah_parent)->oo_inode; + else + parent = osd->od_obj_area->d_inode; + LASSERT(parent->i_op != NULL); + + inode = ldiskfs_create_inode(oth->ot_handle, parent, mode); + if (!IS_ERR(inode)) { + obj->oo_inode = inode; + result = 0; + } else + result = PTR_ERR(inode); + LASSERT(osd_invariant(obj)); + return result; +} + + +extern int iam_lvar_create(struct inode *obj, int keysize, int ptrsize, + int recsize, handle_t *handle); + +enum { + OSD_NAME_LEN = 255 +}; + +static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct thandle *th) +{ + int result; + struct osd_thandle *oth; + + LASSERT(S_ISDIR(attr->la_mode)); + + oth = container_of(th, struct osd_thandle, ot_super); + LASSERT(oth->ot_handle->h_transaction != NULL); + result = osd_mkfile(info, obj, (attr->la_mode & + (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th); + if (result == 0) { + LASSERT(obj->oo_inode != NULL); + /* + * XXX uh-oh... call low-level iam function directly. + */ + result = iam_lvar_create(obj->oo_inode, OSD_NAME_LEN, 4, + sizeof (struct lu_fid_pack), + oth->ot_handle); + } + return result; +} + +static int osd_mkreg(struct osd_thread_info *info, struct osd_object *obj, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct thandle *th) +{ + LASSERT(S_ISREG(attr->la_mode)); + return osd_mkfile(info, obj, (attr->la_mode & + (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th); +} + +static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct thandle *th) +{ + LASSERT(S_ISLNK(attr->la_mode)); + return osd_mkfile(info, obj, (attr->la_mode & + (S_IFMT | S_IRWXUGO | S_ISVTX)), hint, th); +} + +static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct thandle *th) +{ + int result; + struct osd_device *osd = osd_obj2dev(obj); + struct inode *dir; + umode_t mode = attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX); + + LASSERT(osd_invariant(obj)); + LASSERT(obj->oo_inode == NULL); + LASSERT(osd->od_obj_area != NULL); + LASSERT(S_ISCHR(mode) || S_ISBLK(mode) || + S_ISFIFO(mode) || S_ISSOCK(mode)); + + dir = osd->od_obj_area->d_inode; + LASSERT(dir->i_op != NULL); + + result = osd_mkfile(info, obj, mode, hint, th); + if (result == 0) { + LASSERT(obj->oo_inode != NULL); + init_special_inode(obj->oo_inode, mode, attr->la_rdev); + } + LASSERT(osd_invariant(obj)); + return result; +} + +typedef int (*osd_obj_type_f)(struct osd_thread_info *, struct osd_object *, + struct lu_attr *, + struct dt_allocation_hint *hint, + struct thandle *); + +static osd_obj_type_f osd_create_type_f(__u32 mode) +{ + osd_obj_type_f result; + + switch (mode) { + case S_IFDIR: + result = osd_mkdir; + break; + case S_IFREG: + result = osd_mkreg; + break; + case S_IFLNK: + result = osd_mksym; + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + result = osd_mknod; + break; + default: + LBUG(); + break; + } + return result; +} + + +static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah, + struct dt_object *parent, umode_t child_mode) +{ + LASSERT(ah); + + memset(ah, 0, sizeof(*ah)); + ah->dah_parent = parent; + ah->dah_mode = child_mode; +} + + +/* + * Concurrency: @dt is write locked. + */ +static int osd_object_create(const struct lu_env *env, struct dt_object *dt, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct thandle *th) +{ + const struct lu_fid *fid = lu_object_fid(&dt->do_lu); + struct osd_object *obj = osd_dt_obj(dt); + struct osd_device *osd = osd_obj2dev(obj); + struct osd_thread_info *info = osd_oti_get(env); + int result; + + ENTRY; + + LASSERT(osd_invariant(obj)); + LASSERT(!dt_object_exists(dt)); + LASSERT(osd_write_locked(env, obj)); + LASSERT(th != NULL); + + /* + * XXX missing: Quote handling. + */ + + result = osd_create_pre(info, obj, attr, th); + if (result == 0) { + result = osd_create_type_f(attr->la_mode & S_IFMT)(info, obj, + attr, hint, th); + if (result == 0) + result = osd_create_post(info, obj, attr, th); + } + if (result == 0) { + struct osd_inode_id *id = &info->oti_id; + + LASSERT(obj->oo_inode != NULL); + + id->oii_ino = obj->oo_inode->i_ino; + id->oii_gen = obj->oo_inode->i_generation; + + result = osd_oi_insert(info, &osd->od_oi, fid, id, th); + } + + LASSERT(ergo(result == 0, dt_object_exists(dt))); + LASSERT(osd_invariant(obj)); + RETURN(result); +} + +/* + * Concurrency: @dt is write locked. + */ +static void osd_object_ref_add(const struct lu_env *env, + struct dt_object *dt, + struct thandle *th) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + + LASSERT(osd_invariant(obj)); + LASSERT(dt_object_exists(dt)); + LASSERT(osd_write_locked(env, obj)); + LASSERT(th != NULL); + + spin_lock(&obj->oo_guard); + if (inode->i_nlink < LDISKFS_LINK_MAX) { + inode->i_nlink ++; + spin_unlock(&obj->oo_guard); + mark_inode_dirty(inode); + } else { + spin_unlock(&obj->oo_guard); + LU_OBJECT_DEBUG(D_ERROR, env, &dt->do_lu, + "Overflowed nlink\n"); + } + LASSERT(osd_invariant(obj)); +} + +/* + * Concurrency: @dt is write locked. + */ +static void osd_object_ref_del(const struct lu_env *env, + struct dt_object *dt, + struct thandle *th) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + + LASSERT(osd_invariant(obj)); + LASSERT(dt_object_exists(dt)); + LASSERT(osd_write_locked(env, obj)); + LASSERT(th != NULL); + + spin_lock(&obj->oo_guard); + if (inode->i_nlink > 0) { + inode->i_nlink --; + spin_unlock(&obj->oo_guard); + mark_inode_dirty(inode); + } else { + spin_unlock(&obj->oo_guard); + LU_OBJECT_DEBUG(D_ERROR, env, &dt->do_lu, + "Underflowed nlink\n"); + } + LASSERT(osd_invariant(obj)); +} + +/* + * Concurrency: @dt is read locked. + */ +static int osd_xattr_get(const struct lu_env *env, + struct dt_object *dt, + struct lu_buf *buf, + const char *name, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *dentry = &info->oti_dentry; + + LASSERT(dt_object_exists(dt)); + LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL); + LASSERT(osd_read_locked(env, obj) || osd_write_locked(env, obj)); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ)) + return -EACCES; + + dentry->d_inode = inode; + return inode->i_op->getxattr(dentry, name, buf->lb_buf, buf->lb_len); +} + +/* + * Concurrency: @dt is write locked. + */ +static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, + const struct lu_buf *buf, const char *name, int fl, + struct thandle *handle, struct lustre_capa *capa) +{ + int fs_flags; + + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *dentry = &info->oti_dentry; + + LASSERT(dt_object_exists(dt)); + LASSERT(inode->i_op != NULL && inode->i_op->setxattr != NULL); + LASSERT(osd_write_locked(env, obj)); + LASSERT(handle != NULL); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) + return -EACCES; + + dentry->d_inode = inode; + + fs_flags = 0; + if (fl & LU_XATTR_REPLACE) + fs_flags |= XATTR_REPLACE; + + if (fl & LU_XATTR_CREATE) + fs_flags |= XATTR_CREATE; + + return inode->i_op->setxattr(dentry, name, + buf->lb_buf, buf->lb_len, fs_flags); +} + +/* + * Concurrency: @dt is read locked. + */ +static int osd_xattr_list(const struct lu_env *env, + struct dt_object *dt, + struct lu_buf *buf, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *dentry = &info->oti_dentry; + + LASSERT(dt_object_exists(dt)); + LASSERT(inode->i_op != NULL && inode->i_op->listxattr != NULL); + LASSERT(osd_read_locked(env, obj) || osd_write_locked(env, obj)); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ)) + return -EACCES; + + dentry->d_inode = inode; + return inode->i_op->listxattr(dentry, buf->lb_buf, buf->lb_len); +} + +/* + * Concurrency: @dt is write locked. + */ +static int osd_xattr_del(const struct lu_env *env, + struct dt_object *dt, + const char *name, + struct thandle *handle, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *dentry = &info->oti_dentry; + + LASSERT(dt_object_exists(dt)); + LASSERT(inode->i_op != NULL && inode->i_op->removexattr != NULL); + LASSERT(osd_write_locked(env, obj)); + LASSERT(handle != NULL); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) + return -EACCES; + + dentry->d_inode = inode; + return inode->i_op->removexattr(dentry, name); +} + +static struct obd_capa *osd_capa_get(const struct lu_env *env, + struct dt_object *dt, + struct lustre_capa *old, + __u32 uid, __u64 opc) +{ + struct osd_thread_info *info = osd_oti_get(env); + const struct lu_fid *fid = lu_object_fid(&dt->do_lu); + struct osd_object *obj = osd_dt_obj(dt); + struct osd_device *dev = osd_obj2dev(obj); + struct lustre_capa_key *key = &info->oti_capa_key; + struct lustre_capa *capa = &info->oti_capa; + struct obd_capa *oc; + int rc; + ENTRY; + + if (!dev->od_fl_capa) + RETURN(ERR_PTR(-ENOENT)); + + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + + /* renewal sanity check */ + if (old && osd_object_auth(env, dt, old, opc)) + RETURN(ERR_PTR(-EACCES)); + + capa->lc_fid = *fid; + capa->lc_opc = opc; + capa->lc_uid = uid; + capa->lc_flags = dev->od_capa_alg << 24; + capa->lc_timeout = dev->od_capa_timeout; + capa->lc_expiry = 0; + + oc = capa_lookup(dev->od_capa_hash, capa, 1); + if (oc) { + LASSERT(!capa_is_expired(oc)); + RETURN(oc); + } + + spin_lock(&capa_lock); + *key = dev->od_capa_keys[1]; + spin_unlock(&capa_lock); + + capa->lc_keyid = key->lk_keyid; + capa->lc_expiry = CURRENT_SECONDS + dev->od_capa_timeout; + + rc = capa_hmac(capa->lc_hmac, capa, key->lk_key); + if (rc) { + DEBUG_CAPA(D_ERROR, capa, "HMAC failed: %d for", rc); + RETURN(ERR_PTR(rc)); + } + + oc = capa_add(dev->od_capa_hash, capa); + RETURN(oc); +} + +static struct dt_object_operations osd_obj_ops = { + .do_read_lock = osd_object_read_lock, + .do_write_lock = osd_object_write_lock, + .do_read_unlock = osd_object_read_unlock, + .do_write_unlock = osd_object_write_unlock, + .do_attr_get = osd_attr_get, + .do_attr_set = osd_attr_set, + .do_ah_init = osd_ah_init, + .do_create = osd_object_create, + .do_index_try = osd_index_try, + .do_ref_add = osd_object_ref_add, + .do_ref_del = osd_object_ref_del, + .do_xattr_get = osd_xattr_get, + .do_xattr_set = osd_xattr_set, + .do_xattr_del = osd_xattr_del, + .do_xattr_list = osd_xattr_list, + .do_capa_get = osd_capa_get, +}; + +/* + * Body operations. + */ + +/* + * XXX: Another layering violation for now. + * + * We don't want to use ->f_op->read methods, because generic file write + * + * - serializes on ->i_sem, and + * + * - does a lot of extra work like balance_dirty_pages(), + * + * which doesn't work for globally shared files like /last-received. + */ +int fsfilt_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); +int fsfilt_ldiskfs_write_handle(struct inode *inode, void *buf, int bufsize, + loff_t *offs, handle_t *handle); + +static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, + struct lu_buf *buf, loff_t *pos, + struct lustre_capa *capa) +{ + struct inode *inode = osd_dt_obj(dt)->oo_inode; + + if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ)) + RETURN(-EACCES); + + return fsfilt_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos); +} + +static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, + const struct lu_buf *buf, loff_t *pos, + struct thandle *handle, struct lustre_capa *capa) +{ + struct inode *inode = osd_dt_obj(dt)->oo_inode; + struct osd_thandle *oh; + ssize_t result; + + LASSERT(handle != NULL); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_WRITE)) + RETURN(-EACCES); + + oh = container_of(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle->h_transaction != NULL); + result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len, + pos, oh->ot_handle); + if (result == 0) + result = buf->lb_len; + return result; +} + +static struct dt_body_operations osd_body_ops = { + .dbo_read = osd_read, + .dbo_write = osd_write +}; + +/* + * Index operations. + */ + +static int osd_object_is_root(const struct osd_object *obj) +{ + return osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode; +} + +static int osd_index_probe(const struct lu_env *env, struct osd_object *o, + const struct dt_index_features *feat) +{ + struct iam_descr *descr; + + if (osd_object_is_root(o)) + return feat == &dt_directory_features; + + LASSERT(o->oo_dir != NULL); + + descr = o->oo_dir->od_container.ic_descr; + if (feat == &dt_directory_features) + return descr == &iam_htree_compat_param || + (descr->id_rec_size == sizeof(struct lu_fid_pack) && + 1 /* + * XXX check that index looks like directory. + */ + ); + else + return + feat->dif_keysize_min <= descr->id_key_size && + descr->id_key_size <= feat->dif_keysize_max && + feat->dif_recsize_min <= descr->id_rec_size && + descr->id_rec_size <= feat->dif_recsize_max && + !(feat->dif_flags & (DT_IND_VARKEY | + DT_IND_VARREC | DT_IND_NONUNQ)) && + ergo(feat->dif_flags & DT_IND_UPDATE, + 1 /* XXX check that object (and file system) is + * writable */); +} + +static int osd_container_init(const struct lu_env *env, + struct osd_object *obj, + struct osd_directory *dir) +{ + int result; + struct iam_container *bag; + + bag = &dir->od_container; + result = iam_container_init(bag, &dir->od_descr, obj->oo_inode); + if (result == 0) { + result = iam_container_setup(bag); + if (result == 0) + obj->oo_dt.do_index_ops = &osd_index_ops; + else + iam_container_fini(bag); + } + return result; +} + +/* + * Concurrency: no external locking is necessary. + */ +static int osd_index_try(const struct lu_env *env, struct dt_object *dt, + const struct dt_index_features *feat) +{ + int result; + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(osd_invariant(obj)); + LASSERT(dt_object_exists(dt)); + + if (osd_object_is_root(obj)) { + dt->do_index_ops = &osd_index_compat_ops; + result = 0; + } else if (!osd_has_index(obj)) { + struct osd_directory *dir; + + OBD_ALLOC_PTR(dir); + if (dir != NULL) { + sema_init(&dir->od_sem, 1); + + spin_lock(&obj->oo_guard); + if (obj->oo_dir == NULL) + obj->oo_dir = dir; + else + /* + * Concurrent thread allocated container data. + */ + OBD_FREE_PTR(dir); + spin_unlock(&obj->oo_guard); + /* + * Now, that we have container data, serialize its + * initialization. + */ + down(&obj->oo_dir->od_sem); + /* + * recheck under lock. + */ + if (!osd_has_index(obj)) + result = osd_container_init(env, obj, dir); + else + result = 0; + up(&obj->oo_dir->od_sem); + } else + result = -ENOMEM; + } else + result = 0; + + if (result == 0) { + if (!osd_index_probe(env, obj, feat)) + result = -ENOTDIR; + } + LASSERT(osd_invariant(obj)); + + return result; +} + +static int osd_index_delete(const struct lu_env *env, struct dt_object *dt, + const struct dt_key *key, struct thandle *handle, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct osd_thandle *oh; + struct iam_path_descr *ipd; + struct iam_container *bag = &obj->oo_dir->od_container; + int rc; + + ENTRY; + + LASSERT(osd_invariant(obj)); + LASSERT(dt_object_exists(dt)); + LASSERT(bag->ic_object == obj->oo_inode); + LASSERT(handle != NULL); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_DELETE)) + RETURN(-EACCES); + + ipd = osd_ipd_get(env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); + + oh = container_of0(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle != NULL); + LASSERT(oh->ot_handle->h_transaction != NULL); + + rc = iam_delete(oh->ot_handle, bag, (const struct iam_key *)key, ipd); + osd_ipd_put(env, bag, ipd); + LASSERT(osd_invariant(obj)); + RETURN(rc); +} + +static int osd_index_lookup(const struct lu_env *env, struct dt_object *dt, + struct dt_rec *rec, const struct dt_key *key, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct iam_path_descr *ipd; + struct iam_container *bag = &obj->oo_dir->od_container; + int rc; + + ENTRY; + + LASSERT(osd_invariant(obj)); + LASSERT(dt_object_exists(dt)); + LASSERT(bag->ic_object == obj->oo_inode); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_LOOKUP)) + return -EACCES; + + ipd = osd_ipd_get(env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); + + rc = iam_lookup(bag, (const struct iam_key *)key, + (struct iam_rec *)rec, ipd); + osd_ipd_put(env, bag, ipd); + LASSERT(osd_invariant(obj)); + + RETURN(rc); +} + +static int osd_index_insert(const struct lu_env *env, struct dt_object *dt, + const struct dt_rec *rec, const struct dt_key *key, + struct thandle *th, struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct iam_path_descr *ipd; + struct osd_thandle *oh; + struct iam_container *bag = &obj->oo_dir->od_container; + int rc; + + ENTRY; + + LASSERT(osd_invariant(obj)); + LASSERT(dt_object_exists(dt)); + LASSERT(bag->ic_object == obj->oo_inode); + LASSERT(th != NULL); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT)) + return -EACCES; + + ipd = osd_ipd_get(env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); + + oh = container_of0(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle != NULL); + LASSERT(oh->ot_handle->h_transaction != NULL); + rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key, + (struct iam_rec *)rec, ipd); + osd_ipd_put(env, bag, ipd); + LASSERT(osd_invariant(obj)); + RETURN(rc); +} + +/* + * Iterator operations. + */ +struct osd_it { + struct osd_object *oi_obj; + struct iam_path_descr *oi_ipd; + struct iam_iterator oi_it; +}; + +static struct dt_it *osd_it_init(const struct lu_env *env, + struct dt_object *dt, int writable, + struct lustre_capa *capa) +{ + struct osd_it *it; + struct osd_object *obj = osd_dt_obj(dt); + struct lu_object *lo = &dt->do_lu; + struct iam_path_descr *ipd; + struct iam_container *bag = &obj->oo_dir->od_container; + __u32 flags; + + LASSERT(lu_object_exists(lo)); + + if (osd_object_auth(env, dt, capa, writable ? CAPA_OPC_BODY_WRITE : + CAPA_OPC_BODY_READ)) + return ERR_PTR(-EACCES); + + flags = writable ? IAM_IT_MOVE|IAM_IT_WRITE : IAM_IT_MOVE; + OBD_ALLOC_PTR(it); + if (it != NULL) { + /* + * XXX: as ipd is allocated within osd_thread_info, assignment + * below implies that iterator usage is confined within single + * environment. + */ + ipd = osd_ipd_get(env, bag); + if (likely(ipd != NULL)) { + it->oi_obj = obj; + it->oi_ipd = ipd; + lu_object_get(lo); + iam_it_init(&it->oi_it, bag, flags, ipd); + return (struct dt_it *)it; + } else + OBD_FREE_PTR(it); + } + return ERR_PTR(-ENOMEM); +} + +static void osd_it_fini(const struct lu_env *env, struct dt_it *di) +{ + struct osd_it *it = (struct osd_it *)di; + struct osd_object *obj = it->oi_obj; + + iam_it_fini(&it->oi_it); + osd_ipd_put(env, &obj->oo_dir->od_container, it->oi_ipd); + lu_object_put(env, &obj->oo_dt.do_lu); + OBD_FREE_PTR(it); +} + +static int osd_it_get(const struct lu_env *env, + struct dt_it *di, const struct dt_key *key) +{ + struct osd_it *it = (struct osd_it *)di; + + return iam_it_get(&it->oi_it, (const struct iam_key *)key); +} + +static void osd_it_put(const struct lu_env *env, struct dt_it *di) +{ + struct osd_it *it = (struct osd_it *)di; + + iam_it_put(&it->oi_it); +} + +static int osd_it_next(const struct lu_env *env, struct dt_it *di) +{ + struct osd_it *it = (struct osd_it *)di; + + return iam_it_next(&it->oi_it); +} + +static int osd_it_del(const struct lu_env *env, struct dt_it *di, + struct thandle *th) +{ + struct osd_it *it = (struct osd_it *)di; + struct osd_thandle *oh; + + LASSERT(th != NULL); + + oh = container_of0(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle != NULL); + LASSERT(oh->ot_handle->h_transaction != NULL); + + return iam_it_rec_delete(oh->ot_handle, &it->oi_it); +} + +static struct dt_key *osd_it_key(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_it *it = (struct osd_it *)di; + + return (struct dt_key *)iam_it_key_get(&it->oi_it); +} + +static int osd_it_key_size(const struct lu_env *env, const struct dt_it *di) +{ + struct osd_it *it = (struct osd_it *)di; + + return iam_it_key_size(&it->oi_it); +} + +static struct dt_rec *osd_it_rec(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_it *it = (struct osd_it *)di; + + return (struct dt_rec *)iam_it_rec_get(&it->oi_it); +} + +static __u32 osd_it_store(const struct lu_env *env, const struct dt_it *di) +{ + struct osd_it *it = (struct osd_it *)di; + + return iam_it_store(&it->oi_it); +} + +static int osd_it_load(const struct lu_env *env, + const struct dt_it *di, __u32 hash) +{ + struct osd_it *it = (struct osd_it *)di; + + return iam_it_load(&it->oi_it, hash); +} + +static struct dt_index_operations osd_index_ops = { + .dio_lookup = osd_index_lookup, + .dio_insert = osd_index_insert, + .dio_delete = osd_index_delete, + .dio_it = { + .init = osd_it_init, + .fini = osd_it_fini, + .get = osd_it_get, + .put = osd_it_put, + .del = osd_it_del, + .next = osd_it_next, + .key = osd_it_key, + .key_size = osd_it_key_size, + .rec = osd_it_rec, + .store = osd_it_store, + .load = osd_it_load + } +}; + +static int osd_index_compat_delete(const struct lu_env *env, + struct dt_object *dt, + const struct dt_key *key, + struct thandle *handle, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LASSERT(handle != NULL); + LASSERT(S_ISDIR(obj->oo_inode->i_mode)); + ENTRY; + +#if 0 + if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_DELETE)) + RETURN(-EACCES); +#endif + + RETURN(-EOPNOTSUPP); +} + +/* + * Compatibility index operations. + */ + + +static void osd_build_pack(const struct lu_env *env, struct osd_device *osd, + struct dentry *dentry, struct lu_fid_pack *pack) +{ + struct inode *inode = dentry->d_inode; + struct lu_fid *fid = &osd_oti_get(env)->oti_fid; + + lu_igif_build(fid, inode->i_ino, inode->i_generation); + fid_cpu_to_be(fid, fid); + pack->fp_len = sizeof *fid + 1; + memcpy(pack->fp_area, fid, sizeof *fid); +} + +static int osd_index_compat_lookup(const struct lu_env *env, + struct dt_object *dt, + struct dt_rec *rec, const struct dt_key *key, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + + struct osd_device *osd = osd_obj2dev(obj); + struct osd_thread_info *info = osd_oti_get(env); + struct inode *dir; + + int result; + + /* + * XXX temporary solution. + */ + struct dentry *dentry; + struct dentry *parent; + + LASSERT(osd_invariant(obj)); + LASSERT(S_ISDIR(obj->oo_inode->i_mode)); + LASSERT(osd_has_index(obj)); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_LOOKUP)) + return -EACCES; + + info->oti_str.name = (const char *)key; + info->oti_str.len = strlen((const char *)key); + + dir = obj->oo_inode; + LASSERT(dir->i_op != NULL && dir->i_op->lookup != NULL); + + parent = d_alloc_root(dir); + if (parent == NULL) + return -ENOMEM; + igrab(dir); + dentry = d_alloc(parent, &info->oti_str); + if (dentry != NULL) { + struct dentry *d; + + /* + * XXX passing NULL for nameidata should work for + * ext3/ldiskfs. + */ + d = dir->i_op->lookup(dir, dentry, NULL); + if (d == NULL) { + /* + * normal case, result is in @dentry. + */ + if (dentry->d_inode != NULL) { + osd_build_pack(env, osd, dentry, + (struct lu_fid_pack *)rec); + result = 0; + } else + result = -ENOENT; + } else { + /* What? Disconnected alias? Ppheeeww... */ + CERROR("Aliasing where not expected\n"); + result = -EIO; + dput(d); + } + dput(dentry); + } else + result = -ENOMEM; + dput(parent); + LASSERT(osd_invariant(obj)); + return result; +} + +static int osd_add_rec(struct osd_thread_info *info, struct osd_device *dev, + struct inode *dir, struct inode *inode, const char *name) +{ + struct dentry *old; + struct dentry *new; + struct dentry *parent; + + int result; + + info->oti_str.name = name; + info->oti_str.len = strlen(name); + + LASSERT(atomic_read(&dir->i_count) > 0); + result = -ENOMEM; + old = d_alloc(dev->od_obj_area, &info->oti_str); + if (old != NULL) { + d_instantiate(old, inode); + igrab(inode); + LASSERT(atomic_read(&dir->i_count) > 0); + parent = d_alloc_root(dir); + if (parent != NULL) { + igrab(dir); + LASSERT(atomic_read(&dir->i_count) > 1); + new = d_alloc(parent, &info->oti_str); + LASSERT(atomic_read(&dir->i_count) > 1); + if (new != NULL) { + LASSERT(atomic_read(&dir->i_count) > 1); + result = dir->i_op->link(old, dir, new); + LASSERT(atomic_read(&dir->i_count) > 1); + dput(new); + LASSERT(atomic_read(&dir->i_count) > 1); + } + LASSERT(atomic_read(&dir->i_count) > 1); + dput(parent); + LASSERT(atomic_read(&dir->i_count) > 0); + } + dput(old); + } + LASSERT(atomic_read(&dir->i_count) > 0); + return result; +} + + +/* + * XXX Temporary stuff. + */ +static int osd_index_compat_insert(const struct lu_env *env, + struct dt_object *dt, + const struct dt_rec *rec, + const struct dt_key *key, struct thandle *th, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + + const char *name = (const char *)key; + + struct lu_device *ludev = dt->do_lu.lo_dev; + struct lu_object *luch; + + struct osd_thread_info *info = osd_oti_get(env); + const struct lu_fid_pack *pack = (const struct lu_fid_pack *)rec; + struct lu_fid *fid = &osd_oti_get(env)->oti_fid; + + int result; + + LASSERT(S_ISDIR(obj->oo_inode->i_mode)); + LASSERT(osd_invariant(obj)); + LASSERT(th != NULL); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT)) + return -EACCES; + + fid_unpack(pack, fid); + luch = lu_object_find(env, ludev->ld_site, fid); + if (!IS_ERR(luch)) { + if (lu_object_exists(luch)) { + struct osd_object *child; + + child = osd_obj(lu_object_locate(luch->lo_header, + ludev->ld_type)); + if (child != NULL) + result = osd_add_rec(info, osd_obj2dev(obj), + obj->oo_inode, + child->oo_inode, name); + else { + CERROR("No osd slice.\n"); + result = -ENOENT; + } + LASSERT(osd_invariant(obj)); + LASSERT(osd_invariant(child)); + } else { + CERROR("Sorry.\n"); + result = -ENOENT; + } + lu_object_put(env, luch); + } else + result = PTR_ERR(luch); + LASSERT(osd_invariant(obj)); + return result; +} + +static struct dt_index_operations osd_index_compat_ops = { + .dio_lookup = osd_index_compat_lookup, + .dio_insert = osd_index_compat_insert, + .dio_delete = osd_index_compat_delete +}; + +/* + * OSD device type methods + */ +static int osd_type_init(struct lu_device_type *t) +{ + LU_CONTEXT_KEY_INIT(&osd_key); + return lu_context_key_register(&osd_key); +} + +static void osd_type_fini(struct lu_device_type *t) +{ + lu_context_key_degister(&osd_key); +} + +static struct lu_context_key osd_key = { + .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD, + .lct_init = osd_key_init, + .lct_fini = osd_key_fini, + .lct_exit = osd_key_exit +}; + +static void *osd_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct osd_thread_info *info; + + OBD_ALLOC_PTR(info); + if (info != NULL) + info->oti_env = container_of(ctx, struct lu_env, le_ctx); + else + info = ERR_PTR(-ENOMEM); + return info; +} + +static void osd_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct osd_thread_info *info = data; + OBD_FREE_PTR(info); +} + +static void osd_key_exit(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ +#if OSD_COUNTERS + struct osd_thread_info *info = data; + + LASSERT(info->oti_r_locks == 0); + LASSERT(info->oti_w_locks == 0); + LASSERT(info->oti_txns == 0); +#endif +} + +static int osd_device_init(const struct lu_env *env, struct lu_device *d, + const char *name, struct lu_device *next) +{ + return lu_env_init(&osd_dev(d)->od_env_for_commit, NULL, LCT_MD_THREAD); +} + +static int osd_shutdown(const struct lu_env *env, struct osd_device *o) +{ + struct osd_thread_info *info = osd_oti_get(env); + ENTRY; + if (o->od_obj_area != NULL) { + dput(o->od_obj_area); + o->od_obj_area = NULL; + } + osd_oi_fini(info, &o->od_oi); + + RETURN(0); +} + +static int osd_mount(const struct lu_env *env, + struct osd_device *o, struct lustre_cfg *cfg) +{ + struct lustre_mount_info *lmi; + const char *dev = lustre_cfg_string(cfg, 0); + struct osd_thread_info *info = osd_oti_get(env); + int result; + + ENTRY; + + if (o->od_mount != NULL) { + CERROR("Already mounted (%s)\n", dev); + RETURN(-EEXIST); + } + + /* get mount */ + lmi = server_get_mount(dev); + if (lmi == NULL) { + CERROR("Cannot get mount info for %s!\n", dev); + RETURN(-EFAULT); + } + + LASSERT(lmi != NULL); + /* save lustre_mount_info in dt_device */ + o->od_mount = lmi; + + result = osd_oi_init(info, &o->od_oi, &o->od_dt_dev); + if (result == 0) { + struct dentry *d; + + d = simple_mkdir(osd_sb(o)->s_root, "*OBJ-TEMP*", 0777, 1); + if (!IS_ERR(d)) { + o->od_obj_area = d; + } else + result = PTR_ERR(d); + } + if (result != 0) + osd_shutdown(env, o); + RETURN(result); +} + +static struct lu_device *osd_device_fini(const struct lu_env *env, + struct lu_device *d) +{ + ENTRY; + + shrink_dcache_sb(osd_sb(osd_dev(d))); + osd_sync(env, lu2dt_dev(d)); + + if (osd_dev(d)->od_mount) + server_put_mount(osd_dev(d)->od_mount->lmi_name, + osd_dev(d)->od_mount->lmi_mnt); + osd_dev(d)->od_mount = NULL; + + lu_env_fini(&osd_dev(d)->od_env_for_commit); + RETURN(NULL); +} + +static struct lu_device *osd_device_alloc(const struct lu_env *env, + struct lu_device_type *t, + struct lustre_cfg *cfg) +{ + struct lu_device *l; + struct osd_device *o; + + OBD_ALLOC_PTR(o); + if (o != NULL) { + int result; + + result = dt_device_init(&o->od_dt_dev, t); + if (result == 0) { + l = osd2lu_dev(o); + l->ld_ops = &osd_lu_ops; + o->od_dt_dev.dd_ops = &osd_dt_ops; + spin_lock_init(&o->od_osfs_lock); + o->od_osfs_age = cfs_time_shift_64(-1000); + o->od_capa_hash = init_capa_hash(); + if (o->od_capa_hash == NULL) + l = ERR_PTR(-ENOMEM); + } else + l = ERR_PTR(result); + } else + l = ERR_PTR(-ENOMEM); + return l; +} + +static void osd_device_free(const struct lu_env *env, struct lu_device *d) +{ + struct osd_device *o = osd_dev(d); + + cleanup_capa_hash(o->od_capa_hash); + dt_device_fini(&o->od_dt_dev); + OBD_FREE_PTR(o); +} + +static int osd_process_config(const struct lu_env *env, + struct lu_device *d, struct lustre_cfg *cfg) +{ + struct osd_device *o = osd_dev(d); + int err; + ENTRY; + + switch(cfg->lcfg_command) { + case LCFG_SETUP: + err = osd_mount(env, o, cfg); + break; + case LCFG_CLEANUP: + err = osd_shutdown(env, o); + break; + default: + err = -ENOTTY; + } + + RETURN(err); +} +extern void ldiskfs_orphan_cleanup (struct super_block * sb, + struct ldiskfs_super_block * es); + +static int osd_recovery_complete(const struct lu_env *env, + struct lu_device *d) +{ + struct osd_device *o = osd_dev(d); + ENTRY; + /* TODO: orphans handling */ + ldiskfs_orphan_cleanup(osd_sb(o), LDISKFS_SB(osd_sb(o))->s_es); + RETURN(0); +} + +static struct inode *osd_iget(struct osd_thread_info *info, + struct osd_device *dev, + const struct osd_inode_id *id) +{ + struct inode *inode; + + inode = iget(osd_sb(dev), id->oii_ino); + if (inode == NULL) { + CERROR("no inode\n"); + inode = ERR_PTR(-EACCES); + } else if (is_bad_inode(inode)) { + CERROR("bad inode\n"); + iput(inode); + inode = ERR_PTR(-ENOENT); + } else if (inode->i_generation != id->oii_gen) { + CERROR("stale inode\n"); + iput(inode); + inode = ERR_PTR(-ESTALE); + } + + return inode; + +} + +static int osd_fid_lookup(const struct lu_env *env, + struct osd_object *obj, const struct lu_fid *fid) +{ + struct osd_thread_info *info; + struct lu_device *ldev = obj->oo_dt.do_lu.lo_dev; + struct osd_device *dev; + struct osd_inode_id *id; + struct osd_oi *oi; + struct inode *inode; + int result; + + LASSERT(osd_invariant(obj)); + LASSERT(obj->oo_inode == NULL); + LASSERT(fid_is_sane(fid)); + /* + * This assertion checks that osd layer sees only local + * fids. Unfortunately it is somewhat expensive (does a + * cache-lookup). Disabling it for production/acceptance-testing. + */ + LASSERT(1 || fid_is_local(ldev->ld_site, fid)); + + ENTRY; + + info = osd_oti_get(env); + dev = osd_dev(ldev); + id = &info->oti_id; + oi = &dev->od_oi; + + if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) + RETURN(-ENOENT); + + result = osd_oi_lookup(info, oi, fid, id); + if (result == 0) { + inode = osd_iget(info, dev, id); + if (!IS_ERR(inode)) { + obj->oo_inode = inode; + LASSERT(obj->oo_inode->i_sb == osd_sb(dev)); + result = 0; + } else + /* + * If fid wasn't found in oi, inode-less object is + * created, for which lu_object_exists() returns + * false. This is used in a (frequent) case when + * objects are created as locking anchors or + * place holders for objects yet to be created. + */ + result = PTR_ERR(inode); + } else if (result == -ENOENT) + result = 0; + LASSERT(osd_invariant(obj)); + RETURN(result); +} + +static void osd_inode_getattr(const struct lu_env *env, + struct inode *inode, struct lu_attr *attr) +{ + attr->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | + LA_SIZE | LA_BLOCKS | LA_UID | LA_GID | + LA_FLAGS | LA_NLINK | LA_RDEV | LA_BLKSIZE; + + attr->la_atime = LTIME_S(inode->i_atime); + attr->la_mtime = LTIME_S(inode->i_mtime); + attr->la_ctime = LTIME_S(inode->i_ctime); + attr->la_mode = inode->i_mode; + attr->la_size = inode->i_size; + attr->la_blocks = inode->i_blocks; + attr->la_uid = inode->i_uid; + attr->la_gid = inode->i_gid; + attr->la_flags = LDISKFS_I(inode)->i_flags; + attr->la_nlink = inode->i_nlink; + attr->la_rdev = inode->i_rdev; + attr->la_blksize = inode->i_blksize; +} + +/* + * Helpers. + */ + +static int lu_device_is_osd(const struct lu_device *d) +{ + return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &osd_lu_ops); +} + +static struct osd_object *osd_obj(const struct lu_object *o) +{ + LASSERT(lu_device_is_osd(o->lo_dev)); + return container_of0(o, struct osd_object, oo_dt.do_lu); +} + +static struct osd_device *osd_dt_dev(const struct dt_device *d) +{ + LASSERT(lu_device_is_osd(&d->dd_lu_dev)); + return container_of0(d, struct osd_device, od_dt_dev); +} + +static struct osd_device *osd_dev(const struct lu_device *d) +{ + LASSERT(lu_device_is_osd(d)); + return osd_dt_dev(container_of0(d, struct dt_device, dd_lu_dev)); +} + +static struct osd_object *osd_dt_obj(const struct dt_object *d) +{ + return osd_obj(&d->do_lu); +} + +static struct osd_device *osd_obj2dev(const struct osd_object *o) +{ + return osd_dev(o->oo_dt.do_lu.lo_dev); +} + +static struct lu_device *osd2lu_dev(struct osd_device *osd) +{ + return &osd->od_dt_dev.dd_lu_dev; +} + +static struct super_block *osd_sb(const struct osd_device *dev) +{ + return dev->od_mount->lmi_mnt->mnt_sb; +} + +static journal_t *osd_journal(const struct osd_device *dev) +{ + return LDISKFS_SB(osd_sb(dev))->s_journal; +} + +static int osd_has_index(const struct osd_object *obj) +{ + return obj->oo_dt.do_index_ops != NULL; +} + +static int osd_object_invariant(const struct lu_object *l) +{ + return osd_invariant(osd_obj(l)); +} + +static struct lu_object_operations osd_lu_obj_ops = { + .loo_object_init = osd_object_init, + .loo_object_delete = osd_object_delete, + .loo_object_release = osd_object_release, + .loo_object_free = osd_object_free, + .loo_object_print = osd_object_print, + .loo_object_invariant = osd_object_invariant +}; + +static struct lu_device_operations osd_lu_ops = { + .ldo_object_alloc = osd_object_alloc, + .ldo_process_config = osd_process_config, + .ldo_recovery_complete = osd_recovery_complete +}; + +static struct lu_device_type_operations osd_device_type_ops = { + .ldto_init = osd_type_init, + .ldto_fini = osd_type_fini, + + .ldto_device_alloc = osd_device_alloc, + .ldto_device_free = osd_device_free, + + .ldto_device_init = osd_device_init, + .ldto_device_fini = osd_device_fini +}; + +static struct lu_device_type osd_device_type = { + .ldt_tags = LU_DEVICE_DT, + .ldt_name = LUSTRE_OSD_NAME, + .ldt_ops = &osd_device_type_ops, + .ldt_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD +}; + +/* + * lprocfs legacy support. + */ +static struct lprocfs_vars lprocfs_osd_obd_vars[] = { + { 0 } +}; + +static struct lprocfs_vars lprocfs_osd_module_vars[] = { + { 0 } +}; + +static struct obd_ops osd_obd_device_ops = { + .o_owner = THIS_MODULE +}; + +LPROCFS_INIT_VARS(osd, lprocfs_osd_module_vars, lprocfs_osd_obd_vars); + +static int __init osd_mod_init(void) +{ + struct lprocfs_static_vars lvars; + + lprocfs_init_vars(osd, &lvars); + return class_register_type(&osd_obd_device_ops, NULL, lvars.module_vars, + LUSTRE_OSD_NAME, &osd_device_type); +} + +static void __exit osd_mod_exit(void) +{ + class_unregister_type(LUSTRE_OSD_NAME); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); +MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_NAME")"); +MODULE_LICENSE("GPL"); + +cfs_module(osd, "0.0.2", osd_mod_init, osd_mod_exit); diff --git a/lustre/osd/osd_igif.c b/lustre/osd/osd_igif.c new file mode 100644 index 0000000..9bf9870 --- /dev/null +++ b/lustre/osd/osd_igif.c @@ -0,0 +1,70 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/osd/osd_igif.c + * igif (compatibility fids) support + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> + +/* LUSTRE_VERSION_CODE */ +#include <lustre_ver.h> +/* fid stuff */ +#include <lustre/lustre_idl.h> + +/* struct osd_inode_id */ +#include "osd_oi.h" +#include "osd_igif.h" + +void lu_igif_to_id(const struct lu_fid *fid, struct osd_inode_id *id) +{ + LASSERT(fid_is_igif(fid)); + id->oii_ino = lu_igif_ino(fid); + id->oii_gen = lu_igif_gen(fid); +} + +__u32 lu_igif_ino(const struct lu_fid *fid) +{ + LASSERT(fid_is_igif(fid)); + return fid_oid(fid); +} + +__u32 lu_igif_gen(const struct lu_fid *fid) +{ + LASSERT(fid_is_igif(fid)); + return fid_ver(fid); +} + +void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen) +{ + fid->f_seq = LUSTRE_ROOT_FID_SEQ; + fid->f_oid = ino; + fid->f_ver = gen; + LASSERT(fid_is_igif(fid)); +} diff --git a/lustre/osd/osd_igif.h b/lustre/osd/osd_igif.h new file mode 100644 index 0000000..04439d1 --- /dev/null +++ b/lustre/osd/osd_igif.h @@ -0,0 +1,43 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/osd/osd_igif.h + * igif (compatibility fids) support + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef _OSD_IGIF_H +#define _OSD_IGIF_H + +#if defined(__KERNEL__) + +struct lu_fid; +struct osd_inode_id; + +void lu_igif_to_id(const struct lu_fid *fid, struct osd_inode_id *id); +__u32 lu_igif_ino(const struct lu_fid *fid); +__u32 lu_igif_gen(const struct lu_fid *fid); +void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen); + +#endif /* __KERNEL__ */ +#endif /* _OSD_IGIF_H */ diff --git a/lustre/osd/osd_internal.h b/lustre/osd/osd_internal.h new file mode 100644 index 0000000..9c34a64 --- /dev/null +++ b/lustre/osd/osd_internal.h @@ -0,0 +1,83 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/osd/osd_internal.h + * Shared definitions and declarations for osd module + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef _OSD_INTERNAL_H +#define _OSD_INTERNAL_H + +#if defined(__KERNEL__) + +/* struct rw_semaphore */ +#include <linux/rwsem.h> +/* handle_t, journal_start(), journal_stop() */ +#include <linux/jbd.h> +/* struct dx_hash_info */ +#include <linux/ldiskfs_fs.h> +/* struct dentry */ +#include <linux/dcache.h> +#include <linux/lustre_iam.h> + +#include <dt_object.h> +#include "osd_oi.h" + +struct inode; + +#define OSD_COUNTERS (0) + +struct osd_thread_info { + const struct lu_env *oti_env; + + struct lu_fid oti_fid; + struct osd_inode_id oti_id; + /* + * XXX temporary: for ->i_op calls. + */ + struct qstr oti_str; + struct txn_param oti_txn; + /* + * XXX temporary: fake dentry used by xattr calls. + */ + struct dentry oti_dentry; + struct timespec oti_time; + /* + * XXX temporary: for capa operations. + */ + struct lustre_capa_key oti_capa_key; + struct lustre_capa oti_capa; + + struct lu_fid_pack oti_pack; + + char oti_ipd[DX_IPD_MAX_SIZE]; +#if OSD_COUNTERS + int oti_r_locks; + int oti_w_locks; + int oti_txns; +#endif +}; + +#endif /* __KERNEL__ */ +#endif /* _OSD_INTERNAL_H */ diff --git a/lustre/osd/osd_oi.c b/lustre/osd/osd_oi.c new file mode 100644 index 0000000..96e5e74 --- /dev/null +++ b/lustre/osd/osd_oi.c @@ -0,0 +1,215 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/osd/osd_oi.c + * Object Index. + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +/* + * oi uses two mechanisms to implement fid->cookie mapping: + * + * - persistent index, where cookie is a record and fid is a key, and + * + * - algorithmic mapping for "igif" fids. + * + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include <linux/module.h> + +/* LUSTRE_VERSION_CODE */ +#include <lustre_ver.h> +/* + * struct OBD_{ALLOC,FREE}*() + * OBD_FAIL_CHECK + */ +#include <obd.h> +#include <obd_support.h> + +/* fid_cpu_to_be() */ +#include <lustre_fid.h> + +#include "osd_oi.h" +/* osd_lookup(), struct osd_thread_info */ +#include "osd_internal.h" +#include "osd_igif.h" +#include "dt_object.h" + +struct oi_descr { + int fid_size; + char *name; +}; + +static const struct oi_descr oi_descr[OSD_OI_FID_NR] = { + [OSD_OI_FID_SMALL] = { + .fid_size = 5, + .name = "oi.5" + }, + [OSD_OI_FID_OTHER] = { + .fid_size = sizeof(struct lu_fid), + .name = "oi.16" + } +}; + +int osd_oi_init(struct osd_thread_info *info, + struct osd_oi *oi, struct dt_device *dev) +{ + int rc; + int i; + const struct lu_env *env; + + CLASSERT(ARRAY_SIZE(oi->oi_dir) == ARRAY_SIZE(oi_descr)); + + env = info->oti_env; + + memset(oi, 0, sizeof *oi); + + for (i = rc = 0; i < ARRAY_SIZE(oi->oi_dir) && rc == 0; ++i) { + const char *name; + /* + * Allocate on stack---this is initialization. + */ + const struct dt_index_features feat = { + .dif_flags = DT_IND_UPDATE, + .dif_keysize_min = oi_descr[i].fid_size, + .dif_keysize_max = oi_descr[i].fid_size, + .dif_recsize_min = sizeof(struct osd_inode_id), + .dif_recsize_max = sizeof(struct osd_inode_id) + }; + struct dt_object *obj; + + name = oi_descr[i].name; + obj = dt_store_open(env, dev, name, &info->oti_fid); + if (!IS_ERR(obj)) { + rc = obj->do_ops->do_index_try(env, obj, &feat); + if (rc == 0) { + LASSERT(obj->do_index_ops != NULL); + oi->oi_dir[i] = obj; + } else { + CERROR("Wrong index \"%s\": %d\n", name, rc); + lu_object_put(env, &obj->do_lu); + } + } else { + rc = PTR_ERR(obj); + CERROR("Cannot open \"%s\": %d\n", name, rc); + } + } + if (rc != 0) + osd_oi_fini(info, oi); + return rc; +} + +void osd_oi_fini(struct osd_thread_info *info, struct osd_oi *oi) +{ + int i; + for (i = 0; i < ARRAY_SIZE(oi->oi_dir); ++i) { + if (oi->oi_dir[i] != NULL) { + lu_object_put(info->oti_env, &oi->oi_dir[i]->do_lu); + oi->oi_dir[i] = NULL; + } + } +} + +static const struct dt_key *oi_fid_key(struct osd_thread_info *info, + struct osd_oi *oi, + const struct lu_fid *fid, + struct dt_object **idx) +{ + int i; + struct lu_fid_pack *pack; + + pack = &info->oti_pack; + fid_pack(pack, fid, &info->oti_fid); + for (i = 0; i < ARRAY_SIZE(oi->oi_dir); ++i) { + if (pack->fp_len == oi_descr[i].fid_size + sizeof pack->fp_len){ + *idx = oi->oi_dir[i]; + return (const struct dt_key *)&pack->fp_area; + } + } + CERROR("Unsupported packed fid size: %d ("DFID")\n", + pack->fp_len, PFID(fid)); + LBUG(); + return NULL; +} + +int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi, + const struct lu_fid *fid, struct osd_inode_id *id) +{ + int rc; + + if (fid_is_igif(fid)) { + lu_igif_to_id(fid, id); + rc = 0; + } else { + struct dt_object *idx; + const struct dt_key *key; + + key = oi_fid_key(info, oi, fid, &idx); + rc = idx->do_index_ops->dio_lookup(info->oti_env, idx, + (struct dt_rec *)id, key, + BYPASS_CAPA); + id->oii_ino = be32_to_cpu(id->oii_ino); + id->oii_gen = be32_to_cpu(id->oii_gen); + } + return rc; +} + +int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi, + const struct lu_fid *fid, const struct osd_inode_id *id0, + struct thandle *th) +{ + struct dt_object *idx; + struct osd_inode_id *id; + const struct dt_key *key; + + if (fid_is_igif(fid)) + return 0; + + key = oi_fid_key(info, oi, fid, &idx); + id = &info->oti_id; + id->oii_ino = cpu_to_be32(id0->oii_ino); + id->oii_gen = cpu_to_be32(id0->oii_gen); + return idx->do_index_ops->dio_insert(info->oti_env, idx, + (const struct dt_rec *)id, + key, th, BYPASS_CAPA); +} + +int osd_oi_delete(struct osd_thread_info *info, + struct osd_oi *oi, const struct lu_fid *fid, + struct thandle *th) +{ + struct dt_object *idx; + const struct dt_key *key; + + if (fid_is_igif(fid)) + return 0; + + key = oi_fid_key(info, oi, fid, &idx); + return idx->do_index_ops->dio_delete(info->oti_env, idx, + key, th, BYPASS_CAPA); +} + diff --git a/lustre/osd/osd_oi.h b/lustre/osd/osd_oi.h new file mode 100644 index 0000000..66799b5 --- /dev/null +++ b/lustre/osd/osd_oi.h @@ -0,0 +1,92 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/osd/osd_oi.h + * OSD Object Index + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ +/* + * Object Index (oi) service runs in the bottom layer of server stack. In + * translates fid local to this service to the storage cookie that uniquely + * and efficiently identifies object (inode) of the underlying file system. + */ + +#ifndef _OSD_OI_H +#define _OSD_OI_H + +#if defined(__KERNEL__) + +/* struct rw_semaphore */ +#include <linux/rwsem.h> +#include <lu_object.h> + +struct lu_fid; +struct osd_thread_info; +struct lu_site; +struct thandle; + +struct dt_device; + +enum { + OSD_OI_FID_SMALL, + OSD_OI_FID_OTHER, + OSD_OI_FID_NR +}; + +/* + * Object Index (oi) instance. + */ +struct osd_oi { + /* + * underlying index object, where fid->id mapping in stored. + */ + struct dt_object *oi_dir[OSD_OI_FID_NR]; +}; + +/* + * Storage cookie. Datum uniquely identifying inode on the underlying file + * system. + * + * XXX Currently this is ext2/ext3/ldiskfs specific thing. In the future this + * should be generalized to work with other local file systems. + */ +struct osd_inode_id { + __u32 oii_ino; /* inode number */ + __u32 oii_gen; /* inode generation */ +}; + +int osd_oi_init(struct osd_thread_info *info, + struct osd_oi *oi, struct dt_device *dev); +void osd_oi_fini(struct osd_thread_info *info, struct osd_oi *oi); + +int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi, + const struct lu_fid *fid, struct osd_inode_id *id); +int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi, + const struct lu_fid *fid, const struct osd_inode_id *id, + struct thandle *th); +int osd_oi_delete(struct osd_thread_info *info, + struct osd_oi *oi, const struct lu_fid *fid, + struct thandle *th); + +#endif /* __KERNEL__ */ +#endif /* _OSD_OI_H */ diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 1992393..2b14646 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -55,6 +55,10 @@ static int oss_num_threads; CFS_MODULE_PARM(oss_num_threads, "i", int, 0444, "number of OSS service threads to start"); +static int ost_num_threads; +CFS_MODULE_PARM(ost_num_threads, "i", int, 0444, + "number of OST service threads to start (deprecated)"); + void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req) { struct oti_req_ack_lock *ack_lock; @@ -119,9 +123,12 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req) repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); + repbody->oa = body->oa; oinfo.oi_oa = &repbody->oa; + if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) + oinfo.oi_capa = lustre_unpack_capa(req->rq_reqmsg, + REQ_REC_OFF + 1); req->rq_status = obd_getattr(exp, &oinfo); RETURN(0); } @@ -138,7 +145,7 @@ static int ost_statfs(struct ptlrpc_request *req) osfs = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*osfs)); - req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, + req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, cfs_time_current_64() - HZ); if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC)) osfs->os_bfree = osfs->os_bavail = 64; @@ -181,7 +188,7 @@ static int ost_punch_lock_get(struct obd_export *exp, struct obdo *oa, struct lustre_handle *lh) { int flags; - struct ldlm_res_id res_id = { .name = { oa->o_id } }; + struct ldlm_res_id res_id = { .name = { oa->o_id, 0, oa->o_gr, 0} }; ldlm_policy_data_t policy; __u64 start; __u64 finis; @@ -216,7 +223,7 @@ static int ost_punch_lock_get(struct obd_export *exp, struct obdo *oa, else policy.l_extent.end = finis | ~CFS_PAGE_MASK; - RETURN(ldlm_cli_enqueue_local(exp->exp_obd->obd_namespace, res_id, + RETURN(ldlm_cli_enqueue_local(exp->exp_obd->obd_namespace, &res_id, LDLM_EXTENT, &policy, LCK_PW, &flags, ldlm_blocking_ast, ldlm_completion_ast, ldlm_glimpse_ast, NULL, 0, NULL, lh)); @@ -277,6 +284,9 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req, */ oinfo.oi_oa->o_valid &= ~OBD_MD_FLFLAGS; + if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) + oinfo.oi_capa = lustre_unpack_capa(req->rq_reqmsg, + REQ_REC_OFF + 1); req->rq_status = obd_punch(exp, &oinfo, oti, NULL); ost_punch_lock_put(exp, oinfo.oi_oa, &lh); } @@ -287,6 +297,7 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req, static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req) { struct ost_body *body, *repbody; + struct lustre_capa *capa = NULL; int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) }; ENTRY; @@ -295,6 +306,9 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req) if (body == NULL) RETURN(-EFAULT); + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 1); + rc = lustre_pack_reply(req, 2, size, NULL); if (rc) RETURN(rc); @@ -303,7 +317,7 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req) sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_sync(exp, &repbody->oa, NULL, repbody->oa.o_size, - repbody->oa.o_blocks); + repbody->oa.o_blocks, capa); RETURN(0); } @@ -326,9 +340,12 @@ static int ost_setattr(struct obd_export *exp, struct ptlrpc_request *req, repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); + repbody->oa = body->oa; oinfo.oi_oa = &repbody->oa; + if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) + oinfo.oi_capa = lustre_unpack_capa(req->rq_reqmsg, + REQ_REC_OFF + 1); req->rq_status = obd_setattr(exp, &oinfo, oti); RETURN(0); } @@ -366,7 +383,8 @@ static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo, for (j = 0; j < ioo->ioo_bufcnt; j++, rnbidx++) { obd_off offset = rnb[rnbidx].offset; obd_off p0 = offset >> CFS_PAGE_SHIFT; - obd_off pn = (offset + rnb[rnbidx].len - 1)>>CFS_PAGE_SHIFT; + obd_off pn = (offset + rnb[rnbidx].len - 1) >> + CFS_PAGE_SHIFT; LASSERT(rnbidx < nrnb); @@ -409,7 +427,7 @@ static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo, do { obd_off poff = off & ~CFS_PAGE_MASK; int pnob = (poff + nob > CFS_PAGE_SIZE) ? - CFS_PAGE_SIZE - poff : nob; + PAGE_SIZE - poff : nob; LASSERT(page < npages); pp_rnb[page].len = pnob; @@ -449,7 +467,8 @@ static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc) /* corrupt the data before we compute the checksum, to * simulate a client->OST data error */ - if (i == 0 &&OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_CHECKSUM_RECEIVE)) + if (i == 0 && + OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_CHECKSUM_RECEIVE)) memcpy(ptr, "bad3", min(4, len)); cksum = crc32_le(cksum, ptr, len); /* corrupt the data after we compute the checksum, to @@ -516,7 +535,8 @@ static int ost_brw_lock_get(int mode, struct obd_export *exp, { int flags = 0; int nrbufs = obj->ioo_bufcnt; - struct ldlm_res_id res_id = { .name = { obj->ioo_id } }; + struct ldlm_res_id res_id = { .name = { obj->ioo_id, 0, + obj->ioo_gr, 0} }; ldlm_policy_data_t policy; int i; @@ -537,7 +557,7 @@ static int ost_brw_lock_get(int mode, struct obd_export *exp, policy.l_extent.end = (nb[nrbufs - 1].offset + nb[nrbufs - 1].len - 1) | ~CFS_PAGE_MASK; - RETURN(ldlm_cli_enqueue_local(exp->exp_obd->obd_namespace, res_id, + RETURN(ldlm_cli_enqueue_local(exp->exp_obd->obd_namespace, &res_id, LDLM_EXTENT, &policy, mode, &flags, ldlm_blocking_ast, ldlm_completion_ast, ldlm_glimpse_ast, NULL, 0, NULL, lh)); @@ -604,7 +624,8 @@ static int ost_prolong_locks_iter(struct ldlm_lock *lock, void *data) static void ost_prolong_locks(struct obd_export *exp, struct obd_ioobj *obj, struct niobuf_remote *nb, ldlm_mode_t mode) { - struct ldlm_res_id res_id = { .name = { obj->ioo_id } }; + struct ldlm_res_id res_id = { .name = { obj->ioo_id, 0, + obj->ioo_gr, 0} }; int nrbufs = obj->ioo_bufcnt; struct ost_prolong_data opd; @@ -631,6 +652,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) struct niobuf_local *local_nb; struct obd_ioobj *ioo; struct ost_body *body, *repbody; + struct lustre_capa *capa = NULL; struct l_wait_info lwi; struct lustre_handle lockh = { 0 }; int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; @@ -638,6 +660,8 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) int no_reply = 0; ENTRY; + req->rq_bulk_read = 1; + if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK)) GOTO(out, rc = -EIO); @@ -660,7 +684,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) niocount = ioo->ioo_bufcnt; if (niocount > PTLRPC_MAX_BRW_PAGES) { - DEBUG_REQ(D_ERROR, req, "bulk has too many pages (%d)\n", + DEBUG_REQ(D_ERROR, req, "bulk has too many pages (%d)", niocount); GOTO(out, rc = -EFAULT); } @@ -677,6 +701,9 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) lustre_swab_niobuf_remote (&remote_nb[i]); } + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3); + rc = lustre_pack_reply(req, 2, size, NULL); if (rc) GOTO(out, rc); @@ -722,7 +749,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) } rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1, - ioo, npages, pp_rnb, local_nb, oti); + ioo, npages, pp_rnb, local_nb, oti, capa); if (rc != 0) GOTO(out_lock, rc); @@ -769,8 +796,12 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) if (rc == 0) { if (desc->bd_export->exp_failed) rc = -ENOTCONN; - else + else { + sptlrpc_svc_wrap_bulk(req, desc); + rc = ptlrpc_start_bulk_transfer(desc); + } + if (rc == 0) { lwi = LWI_TIMEOUT_INTERVAL(obd_timeout * HZ / 4, HZ, ost_bulk_timeout, desc); @@ -796,7 +827,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) rc = -ETIMEDOUT; } } else { - DEBUG_REQ(D_ERROR, req, "bulk PUT failed: rc %d\n", rc); + DEBUG_REQ(D_ERROR, req, "bulk PUT failed: rc %d", rc); } comms_error = rc != 0; } @@ -813,13 +844,13 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa)); } - out_lock: +out_lock: ost_brw_lock_put(LCK_PR, ioo, pp_rnb, &lockh); - out_bulk: +out_bulk: ptlrpc_free_bulk(desc); if (no_reply) RETURN(rc); - out: +out: LASSERT(rc <= 0); if (rc == 0) { req->rq_status = nob; @@ -857,6 +888,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) struct ost_body *body, *repbody; struct l_wait_info lwi; struct lustre_handle lockh = {0}; + struct lustre_capa *capa = NULL; __u32 *rcs; int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; int objcount, niocount, npages, comms_error = 0; @@ -865,6 +897,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) int no_reply = 0; ENTRY; + req->rq_bulk_write = 1; + if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK)) GOTO(out, rc = -EIO); @@ -906,7 +940,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) } if (niocount > PTLRPC_MAX_BRW_PAGES) { - DEBUG_REQ(D_ERROR, req, "bulk has too many pages (%d)\n", + DEBUG_REQ(D_ERROR, req, "bulk has too many pages (%d)", niocount); GOTO(out, rc = -EFAULT); } @@ -923,6 +957,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) lustre_swab_niobuf_remote (&remote_nb[i]); } + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3); + size[REPLY_REC_OFF + 1] = niocount * sizeof(*rcs); rc = lustre_pack_reply(req, 3, size, NULL); if (rc != 0) @@ -974,9 +1011,17 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) /* obd_preprw clobbers oa->valid, so save what we need */ client_cksum = body->oa.o_valid & OBD_MD_FLCKSUM ? body->oa.o_cksum : 0; + + /* Because we already sync grant info with client when reconnect, + * grant info will be cleared for resent req, then fed_grant and + * total_grant will not be modified in following preprw_write */ + if (lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY)) { + DEBUG_REQ(D_CACHE, req, "clear resent/replay req grant info\n"); + body->oa.o_valid &= ~OBD_MD_FLGRANT; + } rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount, - ioo, npages, pp_rnb, local_nb, oti); + ioo, npages, pp_rnb, local_nb, oti, capa); if (rc != 0) GOTO(out_lock, rc); @@ -1016,7 +1061,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) rc = -ETIMEDOUT; } } else { - DEBUG_REQ(D_ERROR, req, "ptlrpc_bulk_get failed: rc %d\n", rc); + DEBUG_REQ(D_ERROR, req, "ptlrpc_bulk_get failed: rc %d", rc); } comms_error = rc != 0; @@ -1026,7 +1071,6 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if (unlikely(client_cksum != 0 && rc == 0)) { static int cksum_counter; - server_cksum = ost_checksum_bulk(desc); repbody->oa.o_valid |= OBD_MD_FLCKSUM; repbody->oa.o_cksum = server_cksum; @@ -1042,6 +1086,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) } } + sptlrpc_svc_unwrap_bulk(req, desc); + /* Must commit after prep above in all cases */ rc = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa, objcount, ioo, npages, local_nb, oti, rc); @@ -1066,21 +1112,22 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) router = libcfs_nid2str(desc->bd_sender); } - LCONSOLE_ERROR("%s: BAD WRITE CHECKSUM: %s from %s%s%s inum " - LPU64"/"LPU64" object "LPU64"/"LPU64 - " extent ["LPU64"-"LPU64"]\n", - req->rq_export->exp_obd->obd_name, msg, - libcfs_id2str(req->rq_peer), - via, router, - body->oa.o_valid & OBD_MD_FLFID ? + LCONSOLE_ERROR_MSG(0x168, "%s: BAD WRITE CHECKSUM: %s from " + "%s%s%s inum "LPU64"/"LPU64" object " + LPU64"/"LPU64" extent ["LPU64"-"LPU64"]\n", + req->rq_export->exp_obd->obd_name, msg, + libcfs_id2str(req->rq_peer), + via, router, + body->oa.o_valid & OBD_MD_FLFID ? body->oa.o_fid : (__u64)0, - body->oa.o_valid & OBD_MD_FLFID ? + body->oa.o_valid & OBD_MD_FLFID ? body->oa.o_generation :(__u64)0, - body->oa.o_id, - body->oa.o_valid & OBD_MD_FLGROUP ? + body->oa.o_id, + body->oa.o_valid & OBD_MD_FLGROUP ? body->oa.o_gr : (__u64)0, - pp_rnb[0].offset, - pp_rnb[npages-1].offset+pp_rnb[npages-1].len-1); + pp_rnb[0].offset, + pp_rnb[npages-1].offset+pp_rnb[npages-1].len + - 1 ); CERROR("client csum %x, original server csum %x, " "server csum now %x\n", client_cksum, server_cksum, new_cksum); @@ -1106,13 +1153,13 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) LASSERT(j == npages); } - out_lock: +out_lock: ost_brw_lock_put(LCK_PW, ioo, pp_rnb, &lockh); - out_bulk: +out_bulk: ptlrpc_free_bulk(desc); if (no_reply) RETURN(rc); - out: +out: if (rc == 0) { oti_to_request(oti, req); target_committed_to_req(req); @@ -1231,7 +1278,7 @@ static int ost_handle_quotacheck(struct ptlrpc_request *req) oqctl = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*oqctl), lustre_swab_obd_quotactl); - if (oqctl == NULL) + if (oqctl == NULL) RETURN(-EPROTO); rc = lustre_pack_reply(req, 1, NULL, NULL); @@ -1244,6 +1291,19 @@ static int ost_handle_quotacheck(struct ptlrpc_request *req) RETURN(0); } +static int ost_llog_handle_connect(struct obd_export *exp, + struct ptlrpc_request *req) +{ + struct llogd_conn_body *body; + int rc; + ENTRY; + + body = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*body)); + rc = obd_llog_connect(exp, body); + RETURN(rc); +} + + static int ost_filter_recovery_request(struct ptlrpc_request *req, struct obd_device *obd, int *process) { @@ -1267,10 +1327,8 @@ static int ost_filter_recovery_request(struct ptlrpc_request *req, default: DEBUG_REQ(D_ERROR, req, "not permitted during recovery"); - *process = 0; - /* XXX what should we set rq_status to here? */ - req->rq_status = -EAGAIN; - RETURN(ptlrpc_error(req)); + *process = -EAGAIN; + RETURN(0); } } @@ -1282,6 +1340,9 @@ int ost_msg_check_version(struct lustre_msg *msg) case OST_CONNECT: case OST_DISCONNECT: case OBD_PING: + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION); if (rc) CERROR("bad opc %u version %08x, expecting %08x\n", @@ -1337,7 +1398,7 @@ int ost_msg_check_version(struct lustre_msg *msg) return rc; } -static int ost_handle(struct ptlrpc_request *req) +int ost_handle(struct ptlrpc_request *req) { struct obd_trans_info trans_info = { 0, }; struct obd_trans_info *oti = &trans_info; @@ -1346,9 +1407,18 @@ static int ost_handle(struct ptlrpc_request *req) ENTRY; LASSERT(current->journal_info == NULL); + + /* primordial rpcs don't affect server recovery */ + switch (lustre_msg_get_opc(req->rq_reqmsg)) { + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: + GOTO(out, rc = 0); + } + /* XXX identical to MDS */ if (lustre_msg_get_opc(req->rq_reqmsg) != OST_CONNECT) { - int abort_recovery, recovering; + int recovering; if (req->rq_export == NULL) { CDEBUG(D_HA,"operation %d on unconnected OST from %s\n", @@ -1362,24 +1432,23 @@ static int ost_handle(struct ptlrpc_request *req) /* Check for aborted recovery. */ spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; recovering = obd->obd_recovering; spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) { - target_abort_recovery(obd); - } else if (recovering) { + if (recovering) { rc = ost_filter_recovery_request(req, obd, &should_process); if (rc || !should_process) RETURN(rc); + else if (should_process < 0) { + req->rq_status = should_process; + rc = ptlrpc_error(req); + RETURN(rc); + } } } oti_init(oti, req); - rc = ost_msg_check_version(req->rq_reqmsg); - if (rc) - RETURN(rc); - + rc = ost_msg_check_version(req->rq_reqmsg); if (rc) RETURN(rc); @@ -1388,7 +1457,7 @@ static int ost_handle(struct ptlrpc_request *req) case OST_CONNECT: { CDEBUG(D_INODE, "connect\n"); OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0); - rc = target_handle_connect(req, ost_handle); + rc = target_handle_connect(req); if (!rc) obd = req->rq_export->exp_obd; break; @@ -1426,6 +1495,14 @@ static int ost_handle(struct ptlrpc_request *req) break; case OST_WRITE: CDEBUG(D_INODE, "write\n"); + /* req->rq_request_portal would be nice, if it was set */ + if (req->rq_rqbd->rqbd_service->srv_req_portal !=OST_IO_PORTAL){ + CERROR("%s: deny write request from %s to portal %u\n", + req->rq_export->exp_obd->obd_name, + obd_export_nid2str(req->rq_export), + req->rq_rqbd->rqbd_service->srv_req_portal); + GOTO(out, rc = -EPROTO); + } OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC)) GOTO(out, rc = -ENOSPC); @@ -1437,6 +1514,14 @@ static int ost_handle(struct ptlrpc_request *req) RETURN(rc); case OST_READ: CDEBUG(D_INODE, "read\n"); + /* req->rq_request_portal would be nice, if it was set */ + if (req->rq_rqbd->rqbd_service->srv_req_portal !=OST_IO_PORTAL){ + CERROR("%s: deny read request from %s to portal %u\n", + req->rq_export->exp_obd->obd_name, + obd_export_nid2str(req->rq_export), + req->rq_rqbd->rqbd_service->srv_req_portal); + GOTO(out, rc = -EPROTO); + } OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); rc = ost_brw_read(req, oti); LASSERT(current->journal_info == NULL); @@ -1483,8 +1568,8 @@ static int ost_handle(struct ptlrpc_request *req) break; /* FIXME - just reply status */ case LLOG_ORIGIN_CONNECT: - DEBUG_REQ(D_INODE, req, "log connect\n"); - rc = llog_handle_connect(req); + DEBUG_REQ(D_INODE, req, "log connect"); + rc = ost_llog_handle_connect(req->rq_export, req); req->rq_status = rc; rc = lustre_pack_reply(req, 1, NULL, NULL); if (rc) @@ -1538,22 +1623,13 @@ static int ost_handle(struct ptlrpc_request *req) target_committed_to_req(req); out: - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) { - if (obd && obd->obd_recovering) { - DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply"); - return target_queue_final_reply(req, rc); - } - /* Lost a race with recovery; let the error path DTRT. */ - rc = req->rq_status = -ENOTCONN; - } - if (!rc) oti_to_request(oti, req); target_send_reply(req, rc, fail); return 0; } - +EXPORT_SYMBOL(ost_handle); /* * free per-thread pool created by ost_thread_init(). */ @@ -1596,7 +1672,7 @@ static int ost_thread_init(struct ptlrpc_thread *thread) LASSERT(thread != NULL); LASSERT(thread->t_data == NULL); - LASSERT(thread->t_id <= OSS_THREADS_MAX); + LASSERTF(thread->t_id <= OSS_THREADS_MAX, "%u\n", thread->t_id); OBD_ALLOC_PTR(tls); if (tls != NULL) { @@ -1618,8 +1694,10 @@ static int ost_thread_init(struct ptlrpc_thread *thread) RETURN(result); } +#define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000) + /* Sigh - really, this is an OSS, the _server_, not the _target_ */ -static int ost_setup(struct obd_device *obd, obd_count len, void *buf) +static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg) { struct ost_obd *ost = &obd->u.ost; struct lprocfs_static_vars lvars; @@ -1664,11 +1742,13 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, OST_MAXREPSIZE, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, - obd_timeout * 1000, ost_handle, LUSTRE_OSS_NAME, - obd->obd_proc_entry, ost_print_req, - oss_min_threads, oss_max_threads, "ll_ost"); + OST_WATCHDOG_TIMEOUT, ost_handle, + LUSTRE_OSS_NAME, obd->obd_proc_entry, + ost_print_req, oss_min_threads, + oss_max_threads, "ll_ost", + LCT_DT_THREAD); if (ost->ost_service == NULL) { - CERROR("failed to start OST service\n"); + CERROR("failed to start service\n"); GOTO(out_lprocfs, rc = -ENOMEM); } @@ -1680,9 +1760,9 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, OST_MAXREPSIZE, OST_CREATE_PORTAL, OSC_REPLY_PORTAL, - obd_timeout * 1000, ost_handle, "ost_create", - obd->obd_proc_entry, ost_print_req, - 1, 1, "ll_ost_creat"); + OST_WATCHDOG_TIMEOUT, ost_handle, "ost_create", + obd->obd_proc_entry, ost_print_req, 1, 1, + "ll_ost_creat", LCT_DT_THREAD); if (ost->ost_create_service == NULL) { CERROR("failed to start OST create service\n"); GOTO(out_service, rc = -ENOMEM); @@ -1696,9 +1776,10 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, OST_MAXREPSIZE, OST_IO_PORTAL, OSC_REPLY_PORTAL, - obd_timeout * 1000, ost_handle, "ost_io", + OST_WATCHDOG_TIMEOUT, ost_handle, "ost_io", obd->obd_proc_entry, ost_print_req, - oss_min_threads, oss_max_threads, "ll_ost_io"); + oss_min_threads, oss_max_threads, + "ll_ost_io", LCT_DT_THREAD); if (ost->ost_io_service == NULL) { CERROR("failed to start OST I/O service\n"); GOTO(out_create, rc = -ENOMEM); @@ -1799,8 +1880,16 @@ static int __init ost_init(void) ENTRY; lprocfs_init_vars(ost, &lvars); - rc = class_register_type(&ost_obd_ops, lvars.module_vars, - LUSTRE_OSS_NAME); + rc = class_register_type(&ost_obd_ops, NULL, lvars.module_vars, + LUSTRE_OSS_NAME, NULL); + + if (ost_num_threads != 0 && oss_num_threads == 0) { + LCONSOLE_INFO("ost_num_threads module parameter is deprecated, " + "use oss_num_threads instead or unset both for " + "dynamic thread startup\n"); + oss_num_threads = ost_num_threads; + } + RETURN(rc); } diff --git a/lustre/ptlrpc/Makefile.in b/lustre/ptlrpc/Makefile.in index cc625c6..bc45564 100644 --- a/lustre/ptlrpc/Makefile.in +++ b/lustre/ptlrpc/Makefile.in @@ -12,10 +12,13 @@ ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o -ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o +ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o +ptlrpc_objs += sec.o sec_bulk.o sec_null.o sec_plain.o sec_lproc.o ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs) +@GSS_TRUE@subdir-m += gss + default: all ldlm_%.c: @LUSTRE@/ldlm/ldlm_%.c diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am index ca1231a..c50cbcf 100644 --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -18,7 +18,8 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \ events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \ llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c \ - ptlrpc_internal.h $(LDLM_COMM_SOURCES) + ptlrpc_internal.h layout.c sec.c sec_bulk.c sec_null.c sec_plain.c \ + sec_lproc.c lproc_ptlrpc.c $(LDLM_COMM_SOURCES) if LIBLUSTRE @@ -57,6 +58,9 @@ ptlrpc_SOURCES := \ recov_thread.c \ service.c \ wiretest.c \ + sec.c \ + sec_null.c \ + sec_plain.c \ $(LDLM_COMM_SOURCES) ptlrpc_CFLAGS := $(EXTRA_KCFLAGS) @@ -71,6 +75,10 @@ endif # DARWIN endif # MODULES +if GSS +SUBDIRS = gss +endif + install-data-hook: $(install_data_hook) DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ ldlm_*.c l_lock.c diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 39a77af..ebf03fc 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -179,6 +179,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */ LASSERT(!desc->bd_network_rw); /* network hands off or */ LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); + + sptlrpc_enc_pool_put_pages(desc); + if (desc->bd_export) class_export_put(desc->bd_export); else @@ -200,8 +203,9 @@ void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool) list_for_each_safe(l, tmp, &pool->prp_req_list) { req = list_entry(l, struct ptlrpc_request, rq_list); list_del(&req->rq_list); - LASSERT (req->rq_reqmsg); - OBD_FREE(req->rq_reqmsg, pool->prp_rq_size); + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf_len == pool->prp_rq_size); + OBD_FREE(req->rq_reqbuf, pool->prp_rq_size); OBD_FREE(req, sizeof(*req)); } OBD_FREE(pool, sizeof(*pool)); @@ -212,7 +216,7 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) int i; int size = 1; - while (size < pool->prp_rq_size) + while (size < pool->prp_rq_size + SPTLRPC_MAX_PAYLOAD) size <<= 1; LASSERTF(list_empty(&pool->prp_req_list) || size == pool->prp_rq_size, @@ -234,7 +238,8 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) OBD_FREE(req, sizeof(struct ptlrpc_request)); return; } - req->rq_reqmsg = msg; + req->rq_reqbuf = msg; + req->rq_reqbuf_len = size; req->rq_pool = pool; spin_lock(&pool->prp_lock); list_add_tail(&req->rq_list, &pool->prp_req_list); @@ -273,7 +278,7 @@ struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int num_rq, int msgsize, static struct ptlrpc_request *ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool) { struct ptlrpc_request *request; - struct lustre_msg *reqmsg; + struct lustre_msg *reqbuf; if (!pool) return NULL; @@ -294,21 +299,32 @@ static struct ptlrpc_request *ptlrpc_prep_req_from_pool(struct ptlrpc_request_po list_del(&request->rq_list); spin_unlock(&pool->prp_lock); - LASSERT(request->rq_reqmsg); + LASSERT(request->rq_reqbuf); LASSERT(request->rq_pool); - reqmsg = request->rq_reqmsg; + reqbuf = request->rq_reqbuf; memset(request, 0, sizeof(*request)); - request->rq_reqmsg = reqmsg; + request->rq_reqbuf = reqbuf; + request->rq_reqbuf_len = pool->prp_rq_size; request->rq_pool = pool; - request->rq_reqlen = pool->prp_rq_size; return request; } +static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) +{ + struct ptlrpc_request_pool *pool = request->rq_pool; + + spin_lock(&pool->prp_lock); + LASSERT(list_empty(&request->rq_list)); + list_add_tail(&request->rq_list, &pool->prp_req_list); + spin_unlock(&pool->prp_lock); +} + struct ptlrpc_request * ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, int count, int *lengths, char **bufs, - struct ptlrpc_request_pool *pool) + struct ptlrpc_request_pool *pool, + struct ptlrpc_cli_ctx *ctx) { struct ptlrpc_request *request = NULL; int rc; @@ -333,12 +349,23 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, RETURN(NULL); } + request->rq_import = class_import_get(imp); + + if (unlikely(ctx)) + request->rq_cli_ctx = sptlrpc_ctx_get(ctx); + else { + rc = sptlrpc_req_get_ctx(request); + if (rc) + GOTO(out_free, rc); + } + + sptlrpc_req_set_flavor(request, opcode); + rc = lustre_pack_request(request, imp->imp_msg_magic, count, lengths, bufs); if (rc) { LASSERT(!request->rq_pool); - OBD_FREE(request, sizeof(*request)); - RETURN(NULL); + GOTO(out_ctx, rc); } lustre_msg_add_version(request->rq_reqmsg, version); @@ -349,7 +376,6 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, request->rq_timeout = obd_timeout; request->rq_send_state = LUSTRE_IMP_FULL; request->rq_type = PTL_RPC_MSG_REQUEST; - request->rq_import = class_import_get(imp); request->rq_export = NULL; request->rq_req_cbid.cbid_fn = request_out_callback; @@ -367,6 +393,7 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, spin_lock_init(&request->rq_lock); CFS_INIT_LIST_HEAD(&request->rq_list); CFS_INIT_LIST_HEAD(&request->rq_replay_list); + CFS_INIT_LIST_HEAD(&request->rq_ctx_chain); CFS_INIT_LIST_HEAD(&request->rq_set_chain); cfs_waitq_init(&request->rq_reply_waitq); request->rq_xid = ptlrpc_next_xid(); @@ -376,6 +403,15 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, lustre_msg_set_flags(request->rq_reqmsg, 0); RETURN(request); +out_ctx: + sptlrpc_req_put_ctx(request); +out_free: + class_import_put(imp); + if (request->rq_pool) + __ptlrpc_free_req_to_pool(request); + else + OBD_FREE(request, sizeof(*request)); + return NULL; } struct ptlrpc_request * @@ -383,7 +419,7 @@ ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count, int *lengths, char **bufs) { return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs, - NULL); + NULL, NULL); } struct ptlrpc_request_set *ptlrpc_prep_set(void) @@ -500,7 +536,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp, LASSERT (status != NULL); *status = 0; - if (imp->imp_state == LUSTRE_IMP_NEW) { + if (req->rq_ctx_init || req->rq_ctx_fini) { + /* always allow ctx init/fini rpc go through */ + } else if (imp->imp_state == LUSTRE_IMP_NEW) { DEBUG_REQ(D_ERROR, req, "Uninitialized import."); *status = -EIO; LBUG(); @@ -570,8 +608,13 @@ static int ptlrpc_check_status(struct ptlrpc_request *req) err = lustre_msg_get_status(req->rq_repmsg); if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) { - DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, err == %d", - err); + LCONSOLE_ERROR_MSG(0x011, "an error ocurred while communicating" + " with %s The %s operation failed with %d", + req->rq_export ? + obd_export_nid2str(req->rq_export) + : "(no nid)", + ll_opcode2str(lustre_msg_get_opc(req->rq_reqmsg)), + err); RETURN(err < 0 ? err : -EINVAL); } @@ -592,6 +635,7 @@ static int after_reply(struct ptlrpc_request *req) ENTRY; LASSERT(!req->rq_receiving_reply); + LASSERT(req->rq_nob_received <= req->rq_repbuf_len); /* NB Until this point, the whole of the incoming message, * including buflens, status etc is in the sender's byte order. */ @@ -600,27 +644,37 @@ static int after_reply(struct ptlrpc_request *req) /* Clear reply swab mask; this is a new reply in sender's byte order */ req->rq_rep_swab_mask = 0; #endif - LASSERT (req->rq_nob_received <= req->rq_replen); - rc = lustre_unpack_msg(req->rq_repmsg, req->rq_nob_received); + rc = sptlrpc_cli_unwrap_reply(req); + if (rc) { + DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc); + RETURN(rc); + } + + /* security layer unwrap might ask resend this request */ + if (req->rq_resend) + RETURN(0); + + rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen); if (rc) { - DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d\n", rc); + DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d", rc); RETURN(-EPROTO); } rc = lustre_unpack_rep_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF); if (rc) { - DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d\n", rc); + DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d", rc); RETURN(-EPROTO); } if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY && lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) { - DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)\n", + DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)", lustre_msg_get_type(req->rq_repmsg)); RETURN(-EPROTO); } rc = ptlrpc_check_status(req); + imp->imp_connect_error = rc; /* Either we've been evicted, or the server has failed for * some reason. Try to reconnect, and if that fails, punt to the @@ -656,9 +710,10 @@ static int after_reply(struct ptlrpc_request *req) } /* Replay-enabled imports return commit-status information. */ - if (lustre_msg_get_last_committed(req->rq_repmsg)) + if (lustre_msg_get_last_committed(req->rq_repmsg)) { imp->imp_peer_committed_transno = lustre_msg_get_last_committed(req->rq_repmsg); + } ptlrpc_free_committed(imp); spin_unlock(&imp->imp_lock); } @@ -710,6 +765,20 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) spin_unlock(&imp->imp_lock); lustre_msg_set_status(req->rq_reqmsg, cfs_curproc_pid()); + + rc = sptlrpc_req_refresh_ctx(req, -1); + if (rc) { + if (req->rq_err) { + req->rq_status = rc; + RETURN(1); + } else { + /* here begins timeout counting */ + req->rq_sent = CURRENT_SECONDS; + req->rq_wait_ctx = 1; + RETURN(0); + } + } + CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc" " %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, @@ -766,6 +835,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) if (req->rq_err) { ptlrpc_unregister_reply(req); + req->rq_replied = 0; if (req->rq_status == 0) req->rq_status = -EIO; req->rq_phase = RQ_PHASE_INTERPRET; @@ -782,7 +852,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) * path sets rq_intr irrespective of whether ptlrpcd has * seen a timeout. our policy is to only interpret * interrupted rpcs after they have timed out */ - if (req->rq_intr && (req->rq_timedout || req->rq_waiting)) { + if (req->rq_intr && (req->rq_timedout || req->rq_waiting || + req->rq_wait_ctx)) { /* NB could be on delayed list */ ptlrpc_unregister_reply(req); req->rq_status = -EINTR; @@ -796,9 +867,16 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) } if (req->rq_phase == RQ_PHASE_RPC) { - if (req->rq_timedout||req->rq_waiting||req->rq_resend) { + if (req->rq_timedout || req->rq_resend || + req->rq_waiting || req->rq_wait_ctx) { int status; + /* rq_wait_ctx is only touched in ptlrpcd, + * no lock needed here. + */ + if (req->rq_wait_ctx) + goto check_ctx; + ptlrpc_unregister_reply(req); spin_lock(&imp->imp_lock); @@ -815,7 +893,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) spin_unlock(&imp->imp_lock); GOTO(interpret, req->rq_status); } - if (req->rq_no_resend) { + if (req->rq_no_resend && !req->rq_wait_ctx) { req->rq_status = -ENOTCONN; req->rq_phase = RQ_PHASE_INTERPRET; spin_unlock(&imp->imp_lock); @@ -843,6 +921,23 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) old_xid, req->rq_xid); } } +check_ctx: + status = sptlrpc_req_refresh_ctx(req, -1); + if (status) { + if (req->rq_err) { + req->rq_status = status; + force_timer_recalc = 1; + } + if (!req->rq_wait_ctx) { + /* begins timeout counting */ + req->rq_sent = CURRENT_SECONDS; + req->rq_wait_ctx = 1; + } + continue; + } else { + req->rq_sent = 0; + req->rq_wait_ctx = 0; + } rc = ptl_send_rpc(req, 0); if (rc) { @@ -872,10 +967,10 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) /* Add this req to the delayed list so it can be errored if the import is evicted after recovery. */ - spin_lock(&req->rq_lock); + spin_lock(&imp->imp_lock); list_add_tail(&req->rq_list, &imp->imp_delayed_list); - spin_unlock(&req->rq_lock); + spin_unlock(&imp->imp_lock); continue; } @@ -947,7 +1042,8 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) int rc = 0; ENTRY; - DEBUG_REQ(D_ERROR|D_NETERROR, req, "timeout (sent at %lu, %lus ago)", + DEBUG_REQ(D_ERROR|D_NETERROR, req, "%s (sent at %lu, %lus ago)", + req->rq_net_err ? "network error" : "timeout", (long)req->rq_sent, CURRENT_SECONDS - req->rq_sent); if (imp != NULL && obd_debug_peer_on_timeout) @@ -955,6 +1051,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) spin_lock(&req->rq_lock); req->rq_timedout = 1; + req->rq_wait_ctx = 0; spin_unlock(&req->rq_lock); ptlrpc_unregister_reply (req); @@ -976,7 +1073,8 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) /* If this request is for recovery or other primordial tasks, * then error it out here. */ - if (req->rq_send_state != LUSTRE_IMP_FULL || + if (req->rq_ctx_init || req->rq_ctx_fini || + req->rq_send_state != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov) { spin_lock(&req->rq_lock); req->rq_status = -ETIMEDOUT; @@ -984,7 +1082,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) spin_unlock(&req->rq_lock); RETURN(1); } - + /* if request can't be resend we can't wait answer after timeout */ if (req->rq_no_resend) { DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:"); @@ -1148,15 +1246,6 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) RETURN(rc); } -static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) -{ - struct ptlrpc_request_pool *pool = request->rq_pool; - - spin_lock(&pool->prp_lock); - list_add_tail(&request->rq_list, &pool->prp_req_list); - spin_unlock(&pool->prp_lock); -} - static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) { ENTRY; @@ -1169,6 +1258,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */ LASSERTF(list_empty(&request->rq_list), "req %p\n", request); LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request); + LASSERT(request->rq_cli_ctx); /* We must take it off the imp_replay_list first. Otherwise, we'll set * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ @@ -1187,10 +1277,8 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) LBUG(); } - if (request->rq_repmsg != NULL) { - OBD_FREE(request->rq_repmsg, request->rq_replen); - request->rq_repmsg = NULL; - } + if (request->rq_repbuf != NULL) + sptlrpc_cli_free_repbuf(request); if (request->rq_export != NULL) { class_export_put(request->rq_export); request->rq_export = NULL; @@ -1202,15 +1290,15 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) if (request->rq_bulk != NULL) ptlrpc_free_bulk(request->rq_bulk); - if (request->rq_pool) { + if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL) + sptlrpc_cli_free_reqbuf(request); + + sptlrpc_req_put_ctx(request); + + if (request->rq_pool) __ptlrpc_free_req_to_pool(request); - } else { - if (request->rq_reqmsg != NULL) { - OBD_FREE(request->rq_reqmsg, request->rq_reqlen); - request->rq_reqmsg = NULL; - } + else OBD_FREE(request, sizeof(*request)); - } EXIT; } @@ -1271,7 +1359,6 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) int rc; cfs_waitq_t *wq; struct l_wait_info lwi; - ENTRY; LASSERT(!in_interrupt ()); /* might sleep */ @@ -1299,7 +1386,6 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) LASSERT (rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout"); } - EXIT; } /* caller must hold imp->imp_lock */ @@ -1414,6 +1500,10 @@ static int expired_request(void *data) struct ptlrpc_request *req = data; ENTRY; + /* some failure can suspend regular timeouts */ + if (ptlrpc_check_suspend()) + RETURN(1); + RETURN(ptlrpc_expire_one_request(req)); } @@ -1578,6 +1668,23 @@ restart: list_add_tail(&req->rq_list, &imp->imp_sending_list); spin_unlock(&imp->imp_lock); + rc = sptlrpc_req_refresh_ctx(req, 0); + if (rc) { + if (req->rq_err) { + /* we got fatal ctx refresh error, directly jump out + * thus we can pass back the actual error code. + */ + spin_lock(&imp->imp_lock); + list_del_init(&req->rq_list); + spin_unlock(&imp->imp_lock); + + CERROR("Failed to refresh ctx of req %p: %d\n", req, rc); + GOTO(out, rc); + } + /* simulating we got error during send rpc */ + goto after_send; + } + rc = ptl_send_rpc(req, 0); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc); @@ -1587,10 +1694,14 @@ restart: DEBUG_REQ(D_NET, req, "-- sleeping for "CFS_DURATION_T" jiffies", timeout); } +repeat: lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request, req); - l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi); + rc = l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi); + if (rc == -ETIMEDOUT && ptlrpc_check_and_wait_suspend(req)) + goto repeat; +after_send: CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:opc " "%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, @@ -1722,10 +1833,17 @@ static int ptlrpc_replay_interpret(struct ptlrpc_request *req, lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status); } - spin_lock(&imp->imp_lock); - imp->imp_last_replay_transno = req->rq_transno; - spin_unlock(&imp->imp_lock); - + /* + * Errors while replay can set transno to 0, but + * imp_last_replay_transno shouldn't be set to 0 anyway + */ + if (req->rq_transno > 0) { + spin_lock(&imp->imp_lock); + LASSERT(req->rq_transno <= imp->imp_last_replay_transno); + imp->imp_last_replay_transno = req->rq_transno; + spin_unlock(&imp->imp_lock); + } else + CERROR("Transno is 0 during replay!\n"); /* continue with recovery */ rc = ptlrpc_import_recovery_state_machine(imp); out: @@ -1738,7 +1856,6 @@ static int ptlrpc_replay_interpret(struct ptlrpc_request *req, RETURN(rc); } - int ptlrpc_replay_req(struct ptlrpc_request *req) { struct ptlrpc_replay_async_args *aa; diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index 52994ba..1d2e228 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -130,7 +130,6 @@ struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer, int ptlrpc_put_connection(struct ptlrpc_connection *c) { int rc = 0; - lnet_process_id_t peer; ENTRY; if (c == NULL) { @@ -138,8 +137,6 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c) RETURN(0); } - peer = c->c_peer; - CDEBUG (D_INFO, "connection=%p refcount %d to %s\n", c, atomic_read(&c->c_refcount) - 1, libcfs_nid2str(c->c_peer.nid)); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 9a7a13e..d2693fe 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -34,11 +34,12 @@ #endif #include <obd_class.h> #include <lustre_net.h> +#include <lustre_sec.h> #include "ptlrpc_internal.h" lnet_handle_eq_t ptlrpc_eq_h; -/* +/* * Client's outgoing request callback */ void request_out_callback(lnet_event_t *ev) @@ -54,6 +55,8 @@ void request_out_callback(lnet_event_t *ev) DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req, "type %d, status %d", ev->type, ev->status); + sptlrpc_request_out_callback(req); + if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { /* Failed send: make it seem like the reply timed out, just @@ -85,10 +88,10 @@ void reply_in_callback(lnet_event_t *ev) LASSERT (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); LASSERT (ev->unlinked); - LASSERT (ev->md.start == req->rq_repmsg); + LASSERT (ev->md.start == req->rq_repbuf); LASSERT (ev->offset == 0); - LASSERT (ev->mlength <= req->rq_replen); - + LASSERT (ev->mlength <= req->rq_repbuf_len); + DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req, "type %d, status %d", ev->type, ev->status); @@ -110,7 +113,7 @@ void reply_in_callback(lnet_event_t *ev) EXIT; } -/* +/* * Client's bulk has been written/read */ void client_bulk_callback (lnet_event_t *ev) @@ -119,7 +122,7 @@ void client_bulk_callback (lnet_event_t *ev) struct ptlrpc_bulk_desc *desc = cbid->cbid_arg; ENTRY; - LASSERT ((desc->bd_type == BULK_PUT_SINK && + LASSERT ((desc->bd_type == BULK_PUT_SINK && ev->type == LNET_EVENT_PUT) || (desc->bd_type == BULK_GET_SOURCE && ev->type == LNET_EVENT_GET) || @@ -127,7 +130,7 @@ void client_bulk_callback (lnet_event_t *ev) LASSERT (ev->unlinked); CDEBUG((ev->status == 0) ? D_NET : D_ERROR, - "event type %d, status %d, desc %p\n", + "event type %d, status %d, desc %p\n", ev->type, ev->status, desc); spin_lock(&desc->bd_lock); @@ -141,6 +144,8 @@ void client_bulk_callback (lnet_event_t *ev) desc->bd_sender = ev->sender; } + sptlrpc_enc_pool_put_pages(desc); + /* NB don't unlock till after wakeup; desc can disappear under us * otherwise */ ptlrpc_wake_client_req(desc->bd_req); @@ -149,7 +154,7 @@ void client_bulk_callback (lnet_event_t *ev) EXIT; } -/* +/* * Server's incoming request callback */ void request_in_callback(lnet_event_t *ev) @@ -167,7 +172,7 @@ void request_in_callback(lnet_event_t *ev) rqbd->rqbd_buffer + service->srv_buf_size); CDEBUG((ev->status == 0) ? D_NET : D_ERROR, - "event type %d, status %d, service %s\n", + "event type %d, status %d, service %s\n", ev->type, ev->status, service->srv_name); if (ev->unlinked) { @@ -188,7 +193,7 @@ void request_in_callback(lnet_event_t *ev) if (req == NULL) { CERROR("Can't allocate incoming request descriptor: " "Dropping %s RPC from %s\n", - service->srv_name, + service->srv_name, libcfs_id2str(ev->initiator)); return; } @@ -198,9 +203,9 @@ void request_in_callback(lnet_event_t *ev) * flags are reset and scalars are zero. We only set the message * size to non-zero if this was a successful receive. */ req->rq_xid = ev->match_bits; - req->rq_reqmsg = ev->md.start + ev->offset; + req->rq_reqbuf = ev->md.start + ev->offset; if (ev->type == LNET_EVENT_PUT && ev->status == 0) - req->rq_reqlen = ev->mlength; + req->rq_reqdata_len = ev->mlength; do_gettimeofday(&req->rq_arrival_time); req->rq_peer = ev->initiator; req->rq_self = ev->target.nid; @@ -210,6 +215,8 @@ void request_in_callback(lnet_event_t *ev) req->rq_uid = ev->uid; #endif + CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer)); + spin_lock(&service->srv_lock); req->rq_history_seq = service->srv_request_seq++; @@ -300,11 +307,11 @@ void server_bulk_callback (lnet_event_t *ev) ev->type == LNET_EVENT_REPLY)); CDEBUG((ev->status == 0) ? D_NET : D_ERROR, - "event type %d, status %d, desc %p\n", + "event type %d, status %d, desc %p\n", ev->type, ev->status, desc); spin_lock(&desc->bd_lock); - + if ((ev->type == LNET_EVENT_ACK || ev->type == LNET_EVENT_REPLY) && ev->status == 0) { @@ -339,11 +346,11 @@ static void ptlrpc_master_callback(lnet_event_t *ev) callback == request_in_callback || callback == reply_out_callback || callback == server_bulk_callback); - + callback (ev); } -int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, +int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, lnet_process_id_t *peer, lnet_nid_t *self) { int best_dist = 0; @@ -371,7 +378,7 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, rc = 0; break; } - + LASSERT (order >= 0); if (rc < 0 || dist < best_dist || @@ -394,7 +401,7 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, } CDEBUG(D_NET,"%s->%s\n", uuid->uuid, libcfs_id2str(*peer)); - if (rc != 0) + if (rc != 0) CERROR("No NID found for %s\n", uuid->uuid); return rc; } @@ -405,7 +412,7 @@ void ptlrpc_ni_fini(void) struct l_wait_info lwi; int rc; int retries; - + /* Wait for the event queue to become idle since there may still be * messages in flight with pending events (i.e. the fire-and-forget * messages == client requests and "non-difficult" server @@ -420,11 +427,11 @@ void ptlrpc_ni_fini(void) case 0: LNetNIFini(); return; - + case -EBUSY: if (retries != 0) CWARN("Event queue still busy\n"); - + /* Wait for a bit */ cfs_waitq_init(&waitq); lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL); @@ -446,7 +453,7 @@ lnet_pid_t ptl_get_pid(void) #endif return pid; } - + int ptlrpc_ni_init(void) { int rc; @@ -495,15 +502,15 @@ liblustre_register_waitidle_callback (struct list_head *callback_list, int (*fn)(void *arg), void *arg) { struct liblustre_wait_callback *llwc; - + OBD_ALLOC(llwc, sizeof(*llwc)); LASSERT (llwc != NULL); - + llwc->llwc_name = name; llwc->llwc_fn = fn; llwc->llwc_arg = arg; list_add_tail(&llwc->llwc_list, callback_list); - + return (llwc); } @@ -511,7 +518,7 @@ void liblustre_deregister_waitidle_callback (void *opaque) { struct liblustre_wait_callback *llwc = opaque; - + list_del(&llwc->llwc_list); OBD_FREE(llwc, sizeof(*llwc)); } @@ -555,16 +562,16 @@ liblustre_check_events (int timeout) rc = LNetEQPoll(&ptlrpc_eq_h, 1, timeout * 1000, &ev, &i); if (rc == 0) RETURN(0); - + LASSERT (rc == -EOVERFLOW || rc == 1); - + /* liblustre: no asynch callback so we can't affort to miss any * events... */ if (rc == -EOVERFLOW) { CERROR ("Dropped an event!!!\n"); abort(); } - + ptlrpc_master_callback (&ev); RETURN(1); } @@ -588,9 +595,9 @@ liblustre_wait_event (int timeout) /* Give all registered callbacks a bite at the cherry */ list_for_each(tmp, &liblustre_wait_callbacks) { - llwc = list_entry(tmp, struct liblustre_wait_callback, + llwc = list_entry(tmp, struct liblustre_wait_callback, llwc_list); - + if (llwc->llwc_fn(llwc->llwc_arg)) found_something = 1; } @@ -652,9 +659,10 @@ int ptlrpc_init_portals(void) return -EIO; } #ifndef __KERNEL__ - liblustre_services_callback = + liblustre_services_callback = liblustre_register_wait_callback("liblustre_check_services", - &liblustre_check_services, NULL); + &liblustre_check_services, + NULL); #endif rc = ptlrpcd_addref(); if (rc == 0) diff --git a/lustre/ptlrpc/gss/.cvsignore b/lustre/ptlrpc/gss/.cvsignore new file mode 100644 index 0000000..9acae98 --- /dev/null +++ b/lustre/ptlrpc/gss/.cvsignore @@ -0,0 +1,15 @@ +.Xrefs +config.log +config.status +configure +Makefile +.deps +tags +TAGS +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.depend diff --git a/lustre/ptlrpc/gss/Makefile.in b/lustre/ptlrpc/gss/Makefile.in new file mode 100644 index 0000000..3871c65 --- /dev/null +++ b/lustre/ptlrpc/gss/Makefile.in @@ -0,0 +1,9 @@ +MODULES := ptlrpc_gss + +ptlrpc_gss-objs := sec_gss.o gss_bulk.o gss_cli_upcall.o gss_svc_upcall.o \ + gss_rawobj.o lproc_gss.o gss_generic_token.o \ + gss_mech_switch.o gss_krb5_mech.o + +default: all + +@INCLUDE_RULES@ diff --git a/lustre/ptlrpc/gss/autoMakefile.am b/lustre/ptlrpc/gss/autoMakefile.am new file mode 100644 index 0000000..3f9be08 --- /dev/null +++ b/lustre/ptlrpc/gss/autoMakefile.am @@ -0,0 +1,15 @@ +# Copyright (C) 2006 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if LIBLUSTRE +endif + +if MODULES +modulefs_DATA = ptlrpc_gss$(KMODEXT) +endif # MODULES + +DIST_SOURCES = $(ptlrpc_gss-objs:.o=.c) gss_api.h gss_asn1.h gss_err.h \ + gss_internal.h gss_krb5.h +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ diff --git a/lustre/ptlrpc/gss/gss_api.h b/lustre/ptlrpc/gss/gss_api.h new file mode 100644 index 0000000..cf31747 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_api.h @@ -0,0 +1,152 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * Somewhat simplified version of the gss api. + * + * Dug Song <dugsong@monkey.org> + * Andy Adamson <andros@umich.edu> + * Bruce Fields <bfields@umich.edu> + * Copyright (c) 2000 The Regents of the University of Michigan + * + */ + +#ifndef __PTLRPC_GSS_GSS_API_H_ +#define __PTLRPC_GSS_GSS_API_H_ + +struct gss_api_mech; + +/* The mechanism-independent gss-api context: */ +struct gss_ctx { + struct gss_api_mech *mech_type; + void *internal_ctx_id; +}; + +#define GSS_C_NO_BUFFER ((rawobj_t) 0) +#define GSS_C_NO_CONTEXT ((struct gss_ctx *) 0) +#define GSS_C_NULL_OID ((rawobj_t) 0) + +/* + * gss-api prototypes; note that these are somewhat simplified versions of + * the prototypes specified in RFC 2744. + */ +__u32 lgss_import_sec_context( + rawobj_t *input_token, + struct gss_api_mech *mech, + struct gss_ctx **ctx); +__u32 lgss_copy_reverse_context( + struct gss_ctx *ctx, + struct gss_ctx **ctx_new); +__u32 lgss_inquire_context( + struct gss_ctx *ctx, + unsigned long *endtime); +__u32 lgss_get_mic( + struct gss_ctx *ctx, + int msgcnt, + rawobj_t *msgs, + rawobj_t *mic_token); +__u32 lgss_verify_mic( + struct gss_ctx *ctx, + int msgcnt, + rawobj_t *msgs, + rawobj_t *mic_token); +__u32 lgss_wrap( + struct gss_ctx *ctx, + rawobj_t *msg, + int msg_buflen, + rawobj_t *out_token); +__u32 lgss_unwrap( + struct gss_ctx *ctx, + rawobj_t *token, + rawobj_t *out_msg); +__u32 lgss_plain_encrypt( + struct gss_ctx *ctx, + int length, + void *in_buf, + void *out_buf); +__u32 lgss_delete_sec_context( + struct gss_ctx **ctx); +int lgss_display( + struct gss_ctx *ctx, + char *buf, + int bufsize); + +struct subflavor_desc { + __u32 sf_subflavor; + __u32 sf_qop; + __u32 sf_service; + char *sf_name; +}; + +/* Each mechanism is described by the following struct: */ +struct gss_api_mech { + struct list_head gm_list; + struct module *gm_owner; + char *gm_name; + rawobj_t gm_oid; + atomic_t gm_count; + struct gss_api_ops *gm_ops; + int gm_sf_num; + struct subflavor_desc *gm_sfs; +}; + +/* and must provide the following operations: */ +struct gss_api_ops { + __u32 (*gss_import_sec_context)( + rawobj_t *input_token, + struct gss_ctx *ctx); + __u32 (*gss_copy_reverse_context)( + struct gss_ctx *ctx, + struct gss_ctx *ctx_new); + __u32 (*gss_inquire_context)( + struct gss_ctx *ctx, + unsigned long *endtime); + __u32 (*gss_get_mic)( + struct gss_ctx *ctx, + int msgcnt, + rawobj_t *msgs, + rawobj_t *mic_token); + __u32 (*gss_verify_mic)( + struct gss_ctx *ctx, + int msgcnt, + rawobj_t *msgs, + rawobj_t *mic_token); + __u32 (*gss_wrap)( + struct gss_ctx *ctx, + rawobj_t *msg, + int msg_buflen, + rawobj_t *out_token); + __u32 (*gss_unwrap)( + struct gss_ctx *ctx, + rawobj_t *token, + rawobj_t *out_msg); + __u32 (*gss_plain_encrypt)( + struct gss_ctx *ctx, + int length, + void *in_buf, + void *out_buf); + void (*gss_delete_sec_context)( + void *ctx); + int (*gss_display)( + struct gss_ctx *ctx, + char *buf, + int bufsize); +}; + +int lgss_mech_register(struct gss_api_mech *mech); +void lgss_mech_unregister(struct gss_api_mech *mech); + +struct gss_api_mech * lgss_OID_to_mech(rawobj_t *oid); +struct gss_api_mech * lgss_name_to_mech(char *name); +struct gss_api_mech * lgss_subflavor_to_mech(__u32 subflavor); + +struct gss_api_mech * lgss_mech_get(struct gss_api_mech *mech); +void lgss_mech_put(struct gss_api_mech *mech); + +#endif /* __PTLRPC_GSS_GSS_API_H_ */ diff --git a/lustre/ptlrpc/gss/gss_asn1.h b/lustre/ptlrpc/gss/gss_asn1.h new file mode 100644 index 0000000..1148478 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_asn1.h @@ -0,0 +1,85 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * minimal asn1 for generic encoding/decoding of gss tokens + * + * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, + * lib/gssapi/krb5/gssapiP_krb5.h, and others + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + */ + +/* + * Copyright 1995 by the Massachusetts Institute of Technology. + * All Rights Reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + * + */ + +#define SIZEOF_INT 4 + +/* from gssapi_err_generic.h */ +#define G_BAD_SERVICE_NAME (-2045022976L) +#define G_BAD_STRING_UID (-2045022975L) +#define G_NOUSER (-2045022974L) +#define G_VALIDATE_FAILED (-2045022973L) +#define G_BUFFER_ALLOC (-2045022972L) +#define G_BAD_MSG_CTX (-2045022971L) +#define G_WRONG_SIZE (-2045022970L) +#define G_BAD_USAGE (-2045022969L) +#define G_UNKNOWN_QOP (-2045022968L) +#define G_NO_HOSTNAME (-2045022967L) +#define G_BAD_HOSTNAME (-2045022966L) +#define G_WRONG_MECH (-2045022965L) +#define G_BAD_TOK_HEADER (-2045022964L) +#define G_BAD_DIRECTION (-2045022963L) +#define G_TOK_TRUNC (-2045022962L) +#define G_REFLECT (-2045022961L) +#define G_WRONG_TOKID (-2045022960L) + +#define g_OID_equal(o1,o2) \ + (((o1)->len == (o2)->len) && \ + (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0)) + +__u32 g_verify_token_header(rawobj_t *mech, + int *body_size, + unsigned char **buf_in, + int toksize); + +__u32 g_get_mech_oid(rawobj_t *mech, + rawobj_t *in_buf); + +int g_token_size(rawobj_t *mech, + unsigned int body_size); + +void g_make_token_header(rawobj_t *mech, + int body_size, + unsigned char **buf); diff --git a/lustre/ptlrpc/gss/gss_cli_upcall.c b/lustre/ptlrpc/gss/gss_cli_upcall.c new file mode 100644 index 0000000..3bd7b2f --- /dev/null +++ b/lustre/ptlrpc/gss/gss_cli_upcall.c @@ -0,0 +1,981 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC +#ifdef __KERNEL__ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/dcache.h> +#include <linux/fs.h> +#include <linux/random.h> +/* for rpc_pipefs */ +struct rpc_clnt; +#include <linux/sunrpc/rpc_pipe_fs.h> +#else +#include <liblustre.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre/lustre_idl.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_sec.h> + +#include "gss_err.h" +#include "gss_internal.h" +#include "gss_api.h" + +#define LUSTRE_PIPE_ROOT "/lustre" +#define LUSTRE_PIPE_KRB5 LUSTRE_PIPE_ROOT"/krb5" + +struct gss_upcall_msg_data { + __u32 gum_seq; + __u32 gum_uid; + __u32 gum_gid; + __u32 gum_svc; /* MDS/OSS... */ + __u64 gum_nid; /* peer NID */ + __u8 gum_obd[64]; /* client obd name */ +}; + +struct gss_upcall_msg { + struct rpc_pipe_msg gum_base; + atomic_t gum_refcount; + struct list_head gum_list; + __u32 gum_mechidx; + struct gss_sec *gum_gsec; + struct gss_cli_ctx *gum_gctx; + struct gss_upcall_msg_data gum_data; +}; + +static atomic_t upcall_seq = ATOMIC_INIT(0); + +static inline +__u32 upcall_get_sequence(void) +{ + return (__u32) atomic_inc_return(&upcall_seq); +} + +enum mech_idx_t { + MECH_KRB5 = 0, + MECH_MAX +}; + +static inline +__u32 mech_name2idx(const char *name) +{ + LASSERT(!strcmp(name, "krb5")); + return MECH_KRB5; +} + +/* pipefs dentries for each mechanisms */ +static struct dentry *de_pipes[MECH_MAX] = { NULL, }; +/* all upcall messgaes linked here */ +static struct list_head upcall_lists[MECH_MAX]; +/* and protected by this */ +static spinlock_t upcall_locks[MECH_MAX]; + +static inline +void upcall_list_lock(int idx) +{ + spin_lock(&upcall_locks[idx]); +} + +static inline +void upcall_list_unlock(int idx) +{ + spin_unlock(&upcall_locks[idx]); +} + +static +void upcall_msg_enlist(struct gss_upcall_msg *msg) +{ + __u32 idx = msg->gum_mechidx; + + upcall_list_lock(idx); + list_add(&msg->gum_list, &upcall_lists[idx]); + upcall_list_unlock(idx); +} + +static +void upcall_msg_delist(struct gss_upcall_msg *msg) +{ + __u32 idx = msg->gum_mechidx; + + upcall_list_lock(idx); + list_del_init(&msg->gum_list); + upcall_list_unlock(idx); +} + +/********************************************** + * rpc_pipe upcall helpers * + **********************************************/ +static +void gss_release_msg(struct gss_upcall_msg *gmsg) +{ + ENTRY; + LASSERT(atomic_read(&gmsg->gum_refcount) > 0); + + if (!atomic_dec_and_test(&gmsg->gum_refcount)) { + EXIT; + return; + } + + if (gmsg->gum_gctx) { + sptlrpc_ctx_wakeup(&gmsg->gum_gctx->gc_base); + sptlrpc_ctx_put(&gmsg->gum_gctx->gc_base, 1); + gmsg->gum_gctx = NULL; + } + + LASSERT(list_empty(&gmsg->gum_list)); + LASSERT(list_empty(&gmsg->gum_base.list)); + OBD_FREE_PTR(gmsg); + EXIT; +} + +static +void gss_unhash_msg_nolock(struct gss_upcall_msg *gmsg) +{ + __u32 idx = gmsg->gum_mechidx; + + LASSERT(idx < MECH_MAX); + LASSERT_SPIN_LOCKED(&upcall_locks[idx]); + + if (list_empty(&gmsg->gum_list)) + return; + + list_del_init(&gmsg->gum_list); + LASSERT(atomic_read(&gmsg->gum_refcount) > 1); + atomic_dec(&gmsg->gum_refcount); +} + +static +void gss_unhash_msg(struct gss_upcall_msg *gmsg) +{ + __u32 idx = gmsg->gum_mechidx; + + LASSERT(idx < MECH_MAX); + upcall_list_lock(idx); + gss_unhash_msg_nolock(gmsg); + upcall_list_unlock(idx); +} + +static +void gss_msg_fail_ctx(struct gss_upcall_msg *gmsg) +{ + if (gmsg->gum_gctx) { + struct ptlrpc_cli_ctx *ctx = &gmsg->gum_gctx->gc_base; + + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + sptlrpc_ctx_expire(ctx); + set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags); + } +} + +static +struct gss_upcall_msg * gss_find_upcall(__u32 mechidx, __u32 seq) +{ + struct gss_upcall_msg *gmsg; + + upcall_list_lock(mechidx); + list_for_each_entry(gmsg, &upcall_lists[mechidx], gum_list) { + if (gmsg->gum_data.gum_seq != seq) + continue; + + LASSERT(atomic_read(&gmsg->gum_refcount) > 0); + LASSERT(gmsg->gum_mechidx == mechidx); + + atomic_inc(&gmsg->gum_refcount); + upcall_list_unlock(mechidx); + return gmsg; + } + upcall_list_unlock(mechidx); + return NULL; +} + +static +int simple_get_bytes(char **buf, __u32 *buflen, void *res, __u32 reslen) +{ + if (*buflen < reslen) { + CERROR("buflen %u < %u\n", *buflen, reslen); + return -EINVAL; + } + + memcpy(res, *buf, reslen); + *buf += reslen; + *buflen -= reslen; + return 0; +} + +/******************************************* + * rpc_pipe APIs * + *******************************************/ +static +ssize_t gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + ssize_t mlen = msg->len; + ssize_t left; + ENTRY; + + if (mlen > buflen) + mlen = buflen; + left = copy_to_user(dst, data, mlen); + if (left < 0) { + msg->errno = left; + RETURN(left); + } + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + RETURN(mlen); +} + +static +ssize_t gss_pipe_downcall(struct file *filp, const char *src, size_t mlen) +{ + struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); + struct gss_upcall_msg *gss_msg; + struct ptlrpc_cli_ctx *ctx; + struct gss_cli_ctx *gctx = NULL; + char *buf, *data; + int datalen; + int timeout, rc; + __u32 mechidx, seq, gss_err; + ENTRY; + + mechidx = (__u32) (long) rpci->private; + LASSERT(mechidx < MECH_MAX); + + OBD_ALLOC(buf, mlen); + if (!buf) + RETURN(-ENOMEM); + + if (copy_from_user(buf, src, mlen)) { + CERROR("failed copy user space data\n"); + GOTO(out_free, rc = -EFAULT); + } + data = buf; + datalen = mlen; + + /* data passed down format: + * - seq + * - timeout + * - gc_win / error + * - wire_ctx (rawobj) + * - mech_ctx (rawobj) + */ + if (simple_get_bytes(&data, &datalen, &seq, sizeof(seq))) { + CERROR("fail to get seq\n"); + GOTO(out_free, rc = -EFAULT); + } + + gss_msg = gss_find_upcall(mechidx, seq); + if (!gss_msg) { + CERROR("upcall %u has aborted earlier\n", seq); + GOTO(out_free, rc = -EINVAL); + } + + gss_unhash_msg(gss_msg); + gctx = gss_msg->gum_gctx; + LASSERT(gctx); + LASSERT(atomic_read(&gctx->gc_base.cc_refcount) > 0); + + /* timeout is not in use for now */ + if (simple_get_bytes(&data, &datalen, &timeout, sizeof(timeout))) + GOTO(out_msg, rc = -EFAULT); + + /* lgssd signal an error by gc_win == 0 */ + if (simple_get_bytes(&data, &datalen, &gctx->gc_win, + sizeof(gctx->gc_win))) + GOTO(out_msg, rc = -EFAULT); + + if (gctx->gc_win == 0) { + /* followed by: + * - rpc error + * - gss error + */ + if (simple_get_bytes(&data, &datalen, &rc, sizeof(rc))) + GOTO(out_msg, rc = -EFAULT); + if (simple_get_bytes(&data, &datalen, &gss_err,sizeof(gss_err))) + GOTO(out_msg, rc = -EFAULT); + + if (rc == 0 && gss_err == GSS_S_COMPLETE) { + CWARN("both rpc & gss error code not set\n"); + rc = -EPERM; + } + } else { + rawobj_t tmpobj; + + /* handle */ + if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen)) + GOTO(out_msg, rc = -EFAULT); + if (rawobj_dup(&gctx->gc_handle, &tmpobj)) + GOTO(out_msg, rc = -ENOMEM); + + /* mechctx */ + if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen)) + GOTO(out_msg, rc = -EFAULT); + gss_err = lgss_import_sec_context(&tmpobj, + gss_msg->gum_gsec->gs_mech, + &gctx->gc_mechctx); + rc = 0; + } + + if (likely(rc == 0 && gss_err == GSS_S_COMPLETE)) { + gss_cli_ctx_uptodate(gctx); + } else { + ctx = &gctx->gc_base; + sptlrpc_ctx_expire(ctx); + if (rc != -ERESTART || gss_err != GSS_S_COMPLETE) + set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags); + + CERROR("refresh ctx %p(uid %d) failed: %d/0x%08x: %s\n", + ctx, ctx->cc_vcred.vc_uid, rc, gss_err, + test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags) ? + "fatal error" : "non-fatal"); + } + + rc = mlen; + +out_msg: + gss_release_msg(gss_msg); + +out_free: + OBD_FREE(buf, mlen); + /* FIXME + * hack pipefs: always return asked length unless all following + * downcalls might be messed up. + */ + rc = mlen; + RETURN(rc); +} + +static +void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + struct gss_upcall_msg *gmsg; + struct gss_upcall_msg_data *gumd; + static cfs_time_t ratelimit = 0; + ENTRY; + + LASSERT(list_empty(&msg->list)); + + /* normally errno is >= 0 */ + if (msg->errno >= 0) { + EXIT; + return; + } + + gmsg = container_of(msg, struct gss_upcall_msg, gum_base); + gumd = &gmsg->gum_data; + LASSERT(atomic_read(&gmsg->gum_refcount) > 0); + + CERROR("failed msg %p (seq %u, uid %u, svc %u, nid "LPX64", obd %.*s): " + "errno %d\n", msg, gumd->gum_seq, gumd->gum_uid, gumd->gum_svc, + gumd->gum_nid, (int) sizeof(gumd->gum_obd), + gumd->gum_obd, msg->errno); + + atomic_inc(&gmsg->gum_refcount); + gss_unhash_msg(gmsg); + if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { + cfs_time_t now = cfs_time_current_sec(); + + if (cfs_time_after(now, ratelimit)) { + CWARN("upcall timed out, is lgssd running?\n"); + ratelimit = now + 15; + } + } + gss_msg_fail_ctx(gmsg); + gss_release_msg(gmsg); + EXIT; +} + +static +void gss_pipe_release(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + __u32 idx; + ENTRY; + + idx = (__u32) (long) rpci->private; + LASSERT(idx < MECH_MAX); + + upcall_list_lock(idx); + while (!list_empty(&upcall_lists[idx])) { + struct gss_upcall_msg *gmsg; + struct gss_upcall_msg_data *gumd; + + gmsg = list_entry(upcall_lists[idx].next, + struct gss_upcall_msg, gum_list); + gumd = &gmsg->gum_data; + LASSERT(list_empty(&gmsg->gum_base.list)); + + CERROR("failing remaining msg %p:seq %u, uid %u, svc %u, " + "nid "LPX64", obd %.*s\n", gmsg, + gumd->gum_seq, gumd->gum_uid, gumd->gum_svc, + gumd->gum_nid, (int) sizeof(gumd->gum_obd), + gumd->gum_obd); + + gmsg->gum_base.errno = -EPIPE; + atomic_inc(&gmsg->gum_refcount); + gss_unhash_msg_nolock(gmsg); + + gss_msg_fail_ctx(gmsg); + + upcall_list_unlock(idx); + gss_release_msg(gmsg); + upcall_list_lock(idx); + } + upcall_list_unlock(idx); + EXIT; +} + +static struct rpc_pipe_ops gss_upcall_ops = { + .upcall = gss_pipe_upcall, + .downcall = gss_pipe_downcall, + .destroy_msg = gss_pipe_destroy_msg, + .release_pipe = gss_pipe_release, +}; + + +/******************************************* + * upcall helper functions * + *******************************************/ + +static inline +__u32 import_to_gss_svc(struct obd_import *imp) +{ + const char *name = imp->imp_obd->obd_type->typ_name; + if (!strcmp(name, LUSTRE_MDC_NAME)) + return LUSTRE_GSS_TGT_MDS; + if (!strcmp(name, LUSTRE_OSC_NAME)) + return LUSTRE_GSS_TGT_OSS; + LBUG(); + return 0; +} + +int gss_ctx_refresh_pipefs(struct ptlrpc_cli_ctx *ctx) +{ + struct obd_import *imp; + struct gss_sec *gsec; + struct gss_upcall_msg *gmsg; + int rc = 0; + ENTRY; + + might_sleep(); + + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_import); + LASSERT(ctx->cc_sec->ps_import->imp_obd); + + imp = ctx->cc_sec->ps_import; + if (!imp->imp_connection) { + CERROR("import has no connection set\n"); + RETURN(-EINVAL); + } + + gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base); + + OBD_ALLOC_PTR(gmsg); + if (!gmsg) + RETURN(-ENOMEM); + + /* initialize pipefs base msg */ + INIT_LIST_HEAD(&gmsg->gum_base.list); + gmsg->gum_base.data = &gmsg->gum_data; + gmsg->gum_base.len = sizeof(gmsg->gum_data); + gmsg->gum_base.copied = 0; + gmsg->gum_base.errno = 0; + + /* init upcall msg */ + atomic_set(&gmsg->gum_refcount, 1); + gmsg->gum_mechidx = mech_name2idx(gsec->gs_mech->gm_name); + gmsg->gum_gsec = gsec; + gmsg->gum_gctx = container_of(sptlrpc_ctx_get(ctx), + struct gss_cli_ctx, gc_base); + gmsg->gum_data.gum_seq = upcall_get_sequence(); + gmsg->gum_data.gum_uid = ctx->cc_vcred.vc_uid; + gmsg->gum_data.gum_gid = 0; /* not used for now */ + gmsg->gum_data.gum_svc = import_to_gss_svc(imp); + gmsg->gum_data.gum_nid = imp->imp_connection->c_peer.nid; + strncpy(gmsg->gum_data.gum_obd, imp->imp_obd->obd_name, + sizeof(gmsg->gum_data.gum_obd)); + + /* This only could happen when sysadmin set it dead/expired + * using lctl by force. + */ + smp_mb(); + if (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK) { + CWARN("ctx %p(%u->%s) was set flags %lx unexpectedly\n", + ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec), + ctx->cc_flags); + + LASSERT(!(ctx->cc_flags & PTLRPC_CTX_UPTODATE)); + ctx->cc_flags |= PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR; + + rc = -EIO; + goto err_free; + } + + upcall_msg_enlist(gmsg); + + rc = rpc_queue_upcall(de_pipes[gmsg->gum_mechidx]->d_inode, + &gmsg->gum_base); + if (rc) { + CERROR("rpc_queue_upcall failed: %d\n", rc); + + upcall_msg_delist(gmsg); + goto err_free; + } + + RETURN(0); +err_free: + OBD_FREE_PTR(gmsg); + RETURN(rc); +} + +int gss_sec_upcall_init(struct gss_sec *gsec) +{ + return 0; +} + +void gss_sec_upcall_cleanup(struct gss_sec *gsec) +{ +} + +int gss_init_pipefs(void) +{ + struct dentry *de; + + /* pipe dir */ + de = rpc_mkdir(LUSTRE_PIPE_ROOT, NULL); + if (IS_ERR(de) && PTR_ERR(de) != -EEXIST) { + CERROR("Failed to create gss pipe dir: %ld\n", PTR_ERR(de)); + return PTR_ERR(de); + } + /* FIXME + * hack pipefs: dput will sometimes cause oops during module unload + * and lgssd close the pipe fds. + */ + //dput(de); + + /* krb5 mechanism */ + de = rpc_mkpipe(LUSTRE_PIPE_KRB5, (void *) MECH_KRB5, &gss_upcall_ops, + RPC_PIPE_WAIT_FOR_OPEN); + if (!de || IS_ERR(de)) { + CERROR("failed to make rpc_pipe %s: %ld\n", + LUSTRE_PIPE_KRB5, PTR_ERR(de)); + rpc_rmdir(LUSTRE_PIPE_ROOT); + return PTR_ERR(de); + } + + de_pipes[MECH_KRB5] = de; + INIT_LIST_HEAD(&upcall_lists[MECH_KRB5]); + upcall_locks[MECH_KRB5] = SPIN_LOCK_UNLOCKED; + + return 0; +} + +void gss_cleanup_pipefs(void) +{ + __u32 i; + + for (i = 0; i < MECH_MAX; i++) { + LASSERT(list_empty(&upcall_lists[i])); + /* FIXME + * hack pipefs, dput pipe dentry here might cause lgssd oops. + */ + //dput(de_pipes[i]); + de_pipes[i] = NULL; + } + + rpc_unlink(LUSTRE_PIPE_KRB5); + rpc_rmdir(LUSTRE_PIPE_ROOT); +} + +/********************************************** + * gss context init/fini helper * + **********************************************/ + +static +int ctx_init_pack_request(struct obd_import *imp, + struct ptlrpc_request *req, + int lustre_srv, + uid_t uid, gid_t gid, + long token_size, + char __user *token) +{ + struct lustre_msg *msg = req->rq_reqbuf; + struct gss_sec *gsec; + struct gss_header *ghdr; + struct ptlrpc_user_desc *pud; + __u32 *p, size, offset = 2; + rawobj_t obj; + + LASSERT(msg->lm_bufcount <= 4); + + /* gss hdr */ + ghdr = lustre_msg_buf(msg, 0, sizeof(*ghdr)); + ghdr->gh_version = PTLRPC_GSS_VERSION; + ghdr->gh_flags = 0; + ghdr->gh_proc = PTLRPC_GSS_PROC_INIT; + ghdr->gh_seq = 0; + ghdr->gh_svc = PTLRPC_GSS_SVC_NONE; + ghdr->gh_handle.len = 0; + + /* fix the user desc */ + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + pud = lustre_msg_buf(msg, offset, sizeof(*pud)); + LASSERT(pud); + pud->pud_uid = pud->pud_fsuid = uid; + pud->pud_gid = pud->pud_fsgid = gid; + pud->pud_cap = 0; + pud->pud_ngroups = 0; + offset++; + } + + /* security payload */ + p = lustre_msg_buf(msg, offset, 0); + size = msg->lm_buflens[offset]; + + /* 1. lustre svc type */ + LASSERT(size > 4); + *p++ = cpu_to_le32(lustre_srv); + size -= 4; + + /* 2. target uuid */ + obj.len = strlen(imp->imp_obd->u.cli.cl_target_uuid.uuid) + 1; + obj.data = imp->imp_obd->u.cli.cl_target_uuid.uuid; + if (rawobj_serialize(&obj, &p, &size)) + LBUG(); + + /* 3. reverse context handle. actually only needed by root user, + * but we send it anyway. + */ + gsec = container_of(imp->imp_sec, struct gss_sec, gs_base); + obj.len = sizeof(gsec->gs_rvs_hdl); + obj.data = (__u8 *) &gsec->gs_rvs_hdl; + if (rawobj_serialize(&obj, &p, &size)) + LBUG(); + + /* 4. now the token */ + LASSERT(size >= (sizeof(__u32) + token_size)); + *p++ = cpu_to_le32(((__u32) token_size)); + if (copy_from_user(p, token, token_size)) { + CERROR("can't copy token\n"); + return -EFAULT; + } + size -= sizeof(__u32) + size_round4(token_size); + + req->rq_reqdata_len = lustre_shrink_msg(req->rq_reqbuf, offset, + msg->lm_buflens[offset] - size, 0); + return 0; +} + +static +int ctx_init_parse_reply(struct lustre_msg *msg, + char __user *outbuf, long outlen) +{ + struct gss_rep_header *ghdr; + __u32 obj_len, round_len; + __u32 status, effective = 0; + + if (msg->lm_bufcount != 3) { + CERROR("unexpected bufcount %u\n", msg->lm_bufcount); + return -EPROTO; + } + + ghdr = (struct gss_rep_header *) gss_swab_header(msg, 0); + if (ghdr == NULL) { + CERROR("unable to extract gss reply header\n"); + return -EPROTO; + } + + if (ghdr->gh_version != PTLRPC_GSS_VERSION) { + CERROR("invalid gss version %u\n", ghdr->gh_version); + return -EPROTO; + } + + if (outlen < (4 + 2) * 4 + size_round4(ghdr->gh_handle.len) + + size_round4(msg->lm_buflens[2])) { + CERROR("output buffer size %ld too small\n", outlen); + return -EFAULT; + } + + status = 0; + effective = 0; + + if (copy_to_user(outbuf, &status, 4)) + return -EFAULT; + outbuf += 4; + if (copy_to_user(outbuf, &ghdr->gh_major, 4)) + return -EFAULT; + outbuf += 4; + if (copy_to_user(outbuf, &ghdr->gh_minor, 4)) + return -EFAULT; + outbuf += 4; + if (copy_to_user(outbuf, &ghdr->gh_seqwin, 4)) + return -EFAULT; + outbuf += 4; + effective += 4 * 4; + + /* handle */ + obj_len = ghdr->gh_handle.len; + round_len = (obj_len + 3) & ~ 3; + if (copy_to_user(outbuf, &obj_len, 4)) + return -EFAULT; + outbuf += 4; + if (copy_to_user(outbuf, (char *) ghdr->gh_handle.data, round_len)) + return -EFAULT; + outbuf += round_len; + effective += 4 + round_len; + + /* out token */ + obj_len = msg->lm_buflens[2]; + round_len = (obj_len + 3) & ~ 3; + if (copy_to_user(outbuf, &obj_len, 4)) + return -EFAULT; + outbuf += 4; + if (copy_to_user(outbuf, lustre_msg_buf(msg, 2, 0), round_len)) + return -EFAULT; + outbuf += round_len; + effective += 4 + round_len; + + return effective; +} + +/* XXX move to where lgssd could see */ +struct lgssd_ioctl_param { + int version; /* in */ + char *uuid; /* in */ + int lustre_svc; /* in */ + uid_t uid; /* in */ + gid_t gid; /* in */ + long send_token_size;/* in */ + char *send_token; /* in */ + long reply_buf_size; /* in */ + char *reply_buf; /* in */ + long status; /* out */ + long reply_length; /* out */ +}; + +int gss_do_ctx_init_rpc(__user char *buffer, unsigned long count) +{ + struct obd_import *imp; + struct ptlrpc_request *req; + struct lgssd_ioctl_param param; + struct obd_device *obd; + char obdname[64]; + long lsize; + int lmsg_size = sizeof(struct ptlrpc_body); + int rc; + + if (count != sizeof(param)) { + CERROR("ioctl size %lu, expect %lu, please check lgssd version\n", + count, (unsigned long) sizeof(param)); + RETURN(-EINVAL); + } + if (copy_from_user(¶m, buffer, sizeof(param))) { + CERROR("failed copy data from lgssd\n"); + RETURN(-EFAULT); + } + + if (param.version != GSSD_INTERFACE_VERSION) { + CERROR("gssd interface version %d (expect %d)\n", + param.version, GSSD_INTERFACE_VERSION); + RETURN(-EINVAL); + } + + /* take name */ + if (strncpy_from_user(obdname, param.uuid, sizeof(obdname)) <= 0) { + CERROR("Invalid obdname pointer\n"); + RETURN(-EFAULT); + } + + obd = class_name2obd(obdname); + if (!obd) { + CERROR("no such obd %s\n", obdname); + RETURN(-EINVAL); + } + + imp = class_import_get(obd->u.cli.cl_import); + LASSERT(imp->imp_sec); + + /* force this import to use v2 msg */ + imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2; + + req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, SEC_CTX_INIT, + 1, &lmsg_size, NULL); + if (!req) { + param.status = -ENOMEM; + goto out_copy; + } + + /* get token */ + rc = ctx_init_pack_request(imp, req, + param.lustre_svc, + param.uid, param.gid, + param.send_token_size, + param.send_token); + if (rc) { + param.status = rc; + goto out_copy; + } + + req->rq_replen = lustre_msg_size_v2(1, &lmsg_size); + + rc = ptlrpc_queue_wait(req); + if (rc) { + /* If any _real_ denial be made, we expect server return + * -EACCES reply or return success but indicate gss error + * inside reply messsage. All other errors are treated as + * timeout, caller might try the negotiation repeatedly, + * leave recovery decisions to general ptlrpc layer. + * + * FIXME maybe some other error code shouldn't be treated + * as timeout. + */ + param.status = rc; + if (rc != -EACCES) + param.status = -ETIMEDOUT; + goto out_copy; + } + + lsize = ctx_init_parse_reply(req->rq_repbuf, + param.reply_buf, param.reply_buf_size); + if (lsize < 0) { + param.status = (int) lsize; + goto out_copy; + } + + param.status = 0; + param.reply_length = lsize; + +out_copy: + if (copy_to_user(buffer, ¶m, sizeof(param))) + rc = -EFAULT; + else + rc = 0; + + class_import_put(imp); + ptlrpc_req_finished(req); + RETURN(rc); +} + +int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx) +{ + struct ptlrpc_cli_ctx *ctx = &gctx->gc_base; + struct obd_import *imp = ctx->cc_sec->ps_import; + struct ptlrpc_request *req; + struct ptlrpc_user_desc *pud; + int buflens = sizeof(struct ptlrpc_body); + int rc; + ENTRY; + + if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { + CWARN("ctx %p(%u) is reverse, don't send destroy rpc\n", + ctx, ctx->cc_vcred.vc_uid); + RETURN(0); + } + + if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags) || + !test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags)) { + CWARN("ctx %p(%u->%s) already dead, don't send destroy rpc\n", + ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec)); + RETURN(0); + } + + might_sleep(); + + CWARN("client destroy ctx %p(%u->%s)\n", + ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec)); + + /* context's refcount could be 0, steal one */ + atomic_inc(&ctx->cc_refcount); + + gctx->gc_proc = PTLRPC_GSS_PROC_DESTROY; + + req = ptlrpc_prep_req_pool(imp, LUSTRE_OBD_VERSION, SEC_CTX_FINI, + 1, &buflens, NULL, NULL, ctx); + if (!req) { + CWARN("ctx %p(%u): fail to prepare rpc, destroy locally\n", + ctx, ctx->cc_vcred.vc_uid); + GOTO(out_ref, rc = -ENOMEM); + } + + /* fix the user desc */ + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + /* we rely the fact that this request is in AUTH mode, + * and user_desc at offset 2. + */ + pud = lustre_msg_buf(req->rq_reqbuf, 2, sizeof(*pud)); + LASSERT(pud); + pud->pud_uid = pud->pud_fsuid = ctx->cc_vcred.vc_uid; + pud->pud_gid = pud->pud_fsgid = ctx->cc_vcred.vc_gid; + pud->pud_cap = 0; + pud->pud_ngroups = 0; + } + + req->rq_replen = lustre_msg_size_v2(1, &buflens); + + rc = ptlrpc_queue_wait(req); + if (rc) { + CWARN("ctx %p(%u): rpc error %d, destroy locally\n", + ctx, ctx->cc_vcred.vc_uid, rc); + } + + ptlrpc_req_finished(req); +out_ref: + atomic_dec(&ctx->cc_refcount); + RETURN(rc); +} + +int __init gss_init_upcall(void) +{ + int rc; + + rc = gss_svc_init_upcall(); + if (rc) + return rc; + + rc = gss_init_pipefs(); + if (rc) + gss_svc_exit_upcall(); + + return rc; +} + +void __exit gss_exit_upcall(void) +{ + gss_svc_exit_upcall(); + gss_cleanup_pipefs(); +} diff --git a/lustre/ptlrpc/gss/gss_err.h b/lustre/ptlrpc/gss/gss_err.h new file mode 100644 index 0000000..a184501 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_err.h @@ -0,0 +1,194 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * Adapted from MIT Kerberos 5-1.2.1 include/gssapi/gssapi.h + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + */ + +/* + * Copyright 1993 by OpenVision Technologies, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appears in all copies and + * that both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of OpenVision not be used + * in advertising or publicity pertaining to distribution of the software + * without specific, written prior permission. OpenVision makes no + * representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied warranty. + * + * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __PTLRPC_GSS_GSS_ERR_H_ +#define __PTLRPC_GSS_GSS_ERR_H_ + +typedef unsigned int OM_uint32; + +/* + * Flag bits for context-level services. + */ +#define GSS_C_DELEG_FLAG (1) +#define GSS_C_MUTUAL_FLAG (2) +#define GSS_C_REPLAY_FLAG (4) +#define GSS_C_SEQUENCE_FLAG (8) +#define GSS_C_CONF_FLAG (16) +#define GSS_C_INTEG_FLAG (32) +#define GSS_C_ANON_FLAG (64) +#define GSS_C_PROT_READY_FLAG (128) +#define GSS_C_TRANS_FLAG (256) + +/* + * Credential usage options + */ +#define GSS_C_BOTH (0) +#define GSS_C_INITIATE (1) +#define GSS_C_ACCEPT (2) + +/* + * Status code types for gss_display_status + */ +#define GSS_C_GSS_CODE (1) +#define GSS_C_MECH_CODE (2) + + +/* + * Define the default Quality of Protection for per-message services. Note + * that an implementation that offers multiple levels of QOP may either reserve + * a value (for example zero, as assumed here) to mean "default protection", or + * alternatively may simply equate GSS_C_QOP_DEFAULT to a specific explicit + * QOP value. However a value of 0 should always be interpreted by a GSSAPI + * implementation as a request for the default protection level. + */ +#define GSS_C_QOP_DEFAULT (0) + +/* + * Expiration time of 2^32-1 seconds means infinite lifetime for a + * credential or security context + */ +#define GSS_C_INDEFINITE ((OM_uint32) 0xfffffffful) + + +/* Major status codes */ + +#define GSS_S_COMPLETE (0) + +/* + * Some "helper" definitions to make the status code macros obvious. + */ +#define GSS_C_CALLING_ERROR_OFFSET (24) +#define GSS_C_ROUTINE_ERROR_OFFSET (16) +#define GSS_C_SUPPLEMENTARY_OFFSET (0) +#define GSS_C_CALLING_ERROR_MASK ((OM_uint32) 0377ul) +#define GSS_C_ROUTINE_ERROR_MASK ((OM_uint32) 0377ul) +#define GSS_C_SUPPLEMENTARY_MASK ((OM_uint32) 0177777ul) + +/* + * The macros that test status codes for error conditions. Note that the + * GSS_ERROR() macro has changed slightly from the V1 GSSAPI so that it now + * evaluates its argument only once. + */ +#define GSS_CALLING_ERROR(x) \ + ((x) & (GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET)) +#define GSS_ROUTINE_ERROR(x) \ + ((x) & (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET)) +#define GSS_SUPPLEMENTARY_INFO(x) \ + ((x) & (GSS_C_SUPPLEMENTARY_MASK << GSS_C_SUPPLEMENTARY_OFFSET)) +#define GSS_ERROR(x) \ + ((x) & ((GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET) | \ + (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET))) + +/* + * Now the actual status code definitions + */ + +/* + * Calling errors: + */ +#define GSS_S_CALL_INACCESSIBLE_READ \ + (((OM_uint32) 1ul) << GSS_C_CALLING_ERROR_OFFSET) +#define GSS_S_CALL_INACCESSIBLE_WRITE \ + (((OM_uint32) 2ul) << GSS_C_CALLING_ERROR_OFFSET) +#define GSS_S_CALL_BAD_STRUCTURE \ + (((OM_uint32) 3ul) << GSS_C_CALLING_ERROR_OFFSET) + +/* + * Routine errors: + */ +#define GSS_S_BAD_MECH \ + (((OM_uint32) 1ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_NAME \ + (((OM_uint32) 2ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_NAMETYPE \ + (((OM_uint32) 3ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_BINDINGS \ + (((OM_uint32) 4ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_STATUS \ + (((OM_uint32) 5ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_SIG \ + (((OM_uint32) 6ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_NO_CRED \ + (((OM_uint32) 7ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_NO_CONTEXT \ + (((OM_uint32) 8ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_DEFECTIVE_TOKEN \ + (((OM_uint32) 9ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_DEFECTIVE_CREDENTIAL \ + (((OM_uint32) 10ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_CREDENTIALS_EXPIRED \ + (((OM_uint32) 11ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_CONTEXT_EXPIRED \ + (((OM_uint32) 12ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_FAILURE \ + (((OM_uint32) 13ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_BAD_QOP \ + (((OM_uint32) 14ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_UNAUTHORIZED \ + (((OM_uint32) 15ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_UNAVAILABLE \ + (((OM_uint32) 16ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_DUPLICATE_ELEMENT \ + (((OM_uint32) 17ul) << GSS_C_ROUTINE_ERROR_OFFSET) +#define GSS_S_NAME_NOT_MN \ + (((OM_uint32) 18ul) << GSS_C_ROUTINE_ERROR_OFFSET) + +/* + * Supplementary info bits: + */ +#define GSS_S_CONTINUE_NEEDED (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 0)) +#define GSS_S_DUPLICATE_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 1)) +#define GSS_S_OLD_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 2)) +#define GSS_S_UNSEQ_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 3)) +#define GSS_S_GAP_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 4)) + +/* XXXX these are not part of the GSSAPI C bindings! (but should be) */ + +#define GSS_CALLING_ERROR_FIELD(x) \ + (((x) >> GSS_C_CALLING_ERROR_OFFSET) & GSS_C_CALLING_ERROR_MASK) +#define GSS_ROUTINE_ERROR_FIELD(x) \ + (((x) >> GSS_C_ROUTINE_ERROR_OFFSET) & GSS_C_ROUTINE_ERROR_MASK) +#define GSS_SUPPLEMENTARY_INFO_FIELD(x) \ + (((x) >> GSS_C_SUPPLEMENTARY_OFFSET) & GSS_C_SUPPLEMENTARY_MASK) + +/* XXXX This is a necessary evil until the spec is fixed */ +#define GSS_S_CRED_UNAVAIL GSS_S_FAILURE + +#endif /* __PTLRPC_GSS_GSS_ERR_H_ */ diff --git a/lustre/ptlrpc/gss/gss_generic_token.c b/lustre/ptlrpc/gss/gss_generic_token.c new file mode 100644 index 0000000..6cb4028 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_generic_token.c @@ -0,0 +1,290 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * linux/net/sunrpc/gss_generic_token.c + * + * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + */ + +/* + * Copyright 1993 by OpenVision Technologies, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appears in all copies and + * that both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of OpenVision not be used + * in advertising or publicity pertaining to distribution of the software + * without specific, written prior permission. OpenVision makes no + * representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied warranty. + * + * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC +#ifdef __KERNEL__ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#else +#include <liblustre.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre/lustre_idl.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_sec.h> + +#include "gss_err.h" +#include "gss_internal.h" +#include "gss_api.h" +#include "gss_krb5.h" +#include "gss_asn1.h" + + +/* TWRITE_STR from gssapiP_generic.h */ +#define TWRITE_STR(ptr, str, len) \ + memcpy((ptr), (char *) (str), (len)); \ + (ptr) += (len); + +/* XXXX this code currently makes the assumption that a mech oid will + never be longer than 127 bytes. This assumption is not inherent in + the interfaces, so the code can be fixed if the OSI namespace + balloons unexpectedly. */ + +/* Each token looks like this: + +0x60 tag for APPLICATION 0, SEQUENCE + (constructed, definite-length) + <length> possible multiple bytes, need to parse/generate + 0x06 tag for OBJECT IDENTIFIER + <moid_length> compile-time constant string (assume 1 byte) + <moid_bytes> compile-time constant string + <inner_bytes> the ANY containing the application token + bytes 0,1 are the token type + bytes 2,n are the token data + +For the purposes of this abstraction, the token "header" consists of +the sequence tag and length octets, the mech OID DER encoding, and the +first two inner bytes, which indicate the token type. The token +"body" consists of everything else. + +*/ + +static +int der_length_size(int length) +{ + if (length < (1 << 7)) + return 1; + else if (length < (1 << 8)) + return 2; +#if (SIZEOF_INT == 2) + else + return 3; +#else + else if (length < (1 << 16)) + return 3; + else if (length < (1 << 24)) + return 4; + else + return 5; +#endif +} + +static +void der_write_length(unsigned char **buf, int length) +{ + if (length < (1 << 7)) { + *(*buf)++ = (unsigned char) length; + } else { + *(*buf)++ = (unsigned char) (der_length_size(length) + 127); +#if (SIZEOF_INT > 2) + if (length >= (1 << 24)) + *(*buf)++ = (unsigned char) (length >> 24); + if (length >= (1 << 16)) + *(*buf)++ = (unsigned char) ((length >> 16) & 0xff); +#endif + if (length >= (1 << 8)) + *(*buf)++ = (unsigned char) ((length >> 8) & 0xff); + *(*buf)++ = (unsigned char) (length & 0xff); + } +} + +/* + * returns decoded length, or < 0 on failure. Advances buf and + * decrements bufsize + */ +static +int der_read_length(unsigned char **buf, int *bufsize) +{ + unsigned char sf; + int ret; + + if (*bufsize < 1) + return -1; + sf = *(*buf)++; + (*bufsize)--; + if (sf & 0x80) { + if ((sf &= 0x7f) > ((*bufsize) - 1)) + return -1; + if (sf > SIZEOF_INT) + return -1; + ret = 0; + for (; sf; sf--) { + ret = (ret << 8) + (*(*buf)++); + (*bufsize)--; + } + } else { + ret = sf; + } + + return ret; +} + +/* + * returns the length of a token, given the mech oid and the body size + */ +int g_token_size(rawobj_t *mech, unsigned int body_size) +{ + /* set body_size to sequence contents size */ + body_size += 4 + (int) mech->len; /* NEED overflow check */ + return (1 + der_length_size(body_size) + body_size); +} + +/* + * fills in a buffer with the token header. The buffer is assumed to + * be the right size. buf is advanced past the token header + */ +void g_make_token_header(rawobj_t *mech, int body_size, unsigned char **buf) +{ + *(*buf)++ = 0x60; + der_write_length(buf, 4 + mech->len + body_size); + *(*buf)++ = 0x06; + *(*buf)++ = (unsigned char) mech->len; + TWRITE_STR(*buf, mech->data, ((int) mech->len)); +} + +/* + * Given a buffer containing a token, reads and verifies the token, + * leaving buf advanced past the token header, and setting body_size + * to the number of remaining bytes. Returns 0 on success, + * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the + * mechanism in the token does not match the mech argument. buf and + * *body_size are left unmodified on error. + */ +__u32 g_verify_token_header(rawobj_t *mech, int *body_size, + unsigned char **buf_in, int toksize) +{ + unsigned char *buf = *buf_in; + int seqsize; + rawobj_t toid; + int ret = 0; + + if ((toksize -= 1) < 0) + return (G_BAD_TOK_HEADER); + if (*buf++ != 0x60) + return (G_BAD_TOK_HEADER); + + if ((seqsize = der_read_length(&buf, &toksize)) < 0) + return(G_BAD_TOK_HEADER); + + if (seqsize != toksize) + return (G_BAD_TOK_HEADER); + + if ((toksize -= 1) < 0) + return (G_BAD_TOK_HEADER); + if (*buf++ != 0x06) + return (G_BAD_TOK_HEADER); + + if ((toksize -= 1) < 0) + return (G_BAD_TOK_HEADER); + toid.len = *buf++; + + if ((toksize -= toid.len) < 0) + return (G_BAD_TOK_HEADER); + toid.data = buf; + buf += toid.len; + + if (!g_OID_equal(&toid, mech)) + ret = G_WRONG_MECH; + + /* G_WRONG_MECH is not returned immediately because it's more + * important to return G_BAD_TOK_HEADER if the token header is + * in fact bad + */ + if ((toksize -= 2) < 0) + return (G_BAD_TOK_HEADER); + + if (ret) + return (ret); + + if (!ret) { + *buf_in = buf; + *body_size = toksize; + } + + return (ret); +} + +/* + * Given a buffer containing a token, returns a copy of the mech oid in + * the parameter mech. + */ +__u32 g_get_mech_oid(rawobj_t *mech, rawobj_t *in_buf) +{ + unsigned char *buf = in_buf->data; + int len = in_buf->len; + int ret = 0; + int seqsize; + + if ((len -= 1) < 0) + return (G_BAD_TOK_HEADER); + if (*buf++ != 0x60) + return (G_BAD_TOK_HEADER); + + if ((seqsize = der_read_length(&buf, &len)) < 0) + return (G_BAD_TOK_HEADER); + + if ((len -= 1) < 0) + return (G_BAD_TOK_HEADER); + if (*buf++ != 0x06) + return (G_BAD_TOK_HEADER); + + if ((len -= 1) < 0) + return (G_BAD_TOK_HEADER); + mech->len = *buf++; + + if ((len -= mech->len) < 0) + return (G_BAD_TOK_HEADER); + OBD_ALLOC(mech->data, mech->len); + if (!mech->data) + return (G_BUFFER_ALLOC); + memcpy(mech->data, buf, mech->len); + + return ret; +} diff --git a/lustre/ptlrpc/gss/gss_internal.h b/lustre/ptlrpc/gss/gss_internal.h new file mode 100644 index 0000000..8fd8014 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_internal.h @@ -0,0 +1,352 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modified from NFSv4 project for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +#ifndef __PTLRPC_GSS_GSS_INTERNAL_H_ +#define __PTLRPC_GSS_GSS_INTERNAL_H_ + +#include <lustre_sec.h> + +/* + * rawobj stuff + */ +typedef struct netobj_s { + __u32 len; + __u8 data[0]; +} netobj_t; + +#define NETOBJ_EMPTY ((netobj_t) { 0 }) + +typedef struct rawobj_s { + __u32 len; + __u8 *data; +} rawobj_t; + +#define RAWOBJ_EMPTY ((rawobj_t) { 0, NULL }) + +typedef struct rawobj_buf_s { + __u32 dataoff; + __u32 datalen; + __u32 buflen; + __u8 *buf; +} rawobj_buf_t; + +int rawobj_alloc(rawobj_t *obj, char *buf, int len); +void rawobj_free(rawobj_t *obj); +int rawobj_equal(rawobj_t *a, rawobj_t *b); +int rawobj_dup(rawobj_t *dest, rawobj_t *src); +int rawobj_serialize(rawobj_t *obj, __u32 **buf, __u32 *buflen); +int rawobj_extract(rawobj_t *obj, __u32 **buf, __u32 *buflen); +int rawobj_extract_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen); +int rawobj_extract_local(rawobj_t *obj, __u32 **buf, __u32 *buflen); +int rawobj_from_netobj(rawobj_t *rawobj, netobj_t *netobj); +int rawobj_from_netobj_alloc(rawobj_t *obj, netobj_t *netobj); + + +/* + * several timeout values. client refresh upcall timeout we using + * default in pipefs implemnetation. + */ +#define __TIMEOUT_DELTA (10) + +#define GSS_SECINIT_RPC_TIMEOUT \ + (obd_timeout < __TIMEOUT_DELTA ? \ + __TIMEOUT_DELTA : obd_timeout - __TIMEOUT_DELTA) + +#define GSS_SECFINI_RPC_TIMEOUT (__TIMEOUT_DELTA) +#define GSS_SECSVC_UPCALL_TIMEOUT (GSS_SECINIT_RPC_TIMEOUT) + +static inline +unsigned long gss_round_ctx_expiry(unsigned long expiry, + unsigned long sec_flags) +{ + if (sec_flags & PTLRPC_SEC_FL_REVERSE) + return expiry; + + if (get_seconds() + __TIMEOUT_DELTA <= expiry) + return expiry - __TIMEOUT_DELTA; + + return expiry; +} + +/* we try to force reconnect import 20m eariler than real expiry. + * kerberos 5 usually allow 5m time skew, but which is adjustable, + * so if we set krb5 to allow > 20m time skew, we have chance that + * server's reverse ctx expired but client still hasn't start to + * refresh it -- it's BAD. So here we actually put a limit on the + * enviroment of krb5 (or other authentication mechanism) + */ +#define GSS_MAX_TIME_SKEW (20 * 60) + +static inline +unsigned long gss_round_imp_reconnect(unsigned long expiry) +{ + unsigned long now = get_seconds(); + unsigned long nice = GSS_MAX_TIME_SKEW + __TIMEOUT_DELTA; + + while (nice && (now + nice >= expiry)) + nice = nice / 2; + + return (expiry - nice); +} + +/* + * Max encryption element in block cipher algorithms. + */ +#define GSS_MAX_CIPHER_BLOCK (16) + +/* + * XXX make it visible of kernel and lgssd/lsvcgssd + */ +#define GSSD_INTERFACE_VERSION (1) + +#define PTLRPC_GSS_VERSION (1) + + +enum ptlrpc_gss_proc { + PTLRPC_GSS_PROC_DATA = 0, + PTLRPC_GSS_PROC_INIT = 1, + PTLRPC_GSS_PROC_CONTINUE_INIT = 2, + PTLRPC_GSS_PROC_DESTROY = 3, + PTLRPC_GSS_PROC_ERR = 4, +}; + +enum ptlrpc_gss_svc { + PTLRPC_GSS_SVC_NONE = 1, + PTLRPC_GSS_SVC_INTEGRITY = 2, + PTLRPC_GSS_SVC_PRIVACY = 3, +}; + +enum ptlrpc_gss_tgt { + LUSTRE_GSS_TGT_MDS = 0, + LUSTRE_GSS_TGT_OSS = 1, +}; + +/* + * following 3 header must have the same size and offset + */ +struct gss_header { + __u32 gh_version; /* gss version */ + __u32 gh_flags; /* wrap flags */ + __u32 gh_proc; /* proc */ + __u32 gh_seq; /* sequence */ + __u32 gh_svc; /* service */ + __u32 gh_pad1; + __u32 gh_pad2; + __u32 gh_pad3; + netobj_t gh_handle; /* context handle */ +}; + +struct gss_rep_header { + __u32 gh_version; + __u32 gh_flags; + __u32 gh_proc; + __u32 gh_major; + __u32 gh_minor; + __u32 gh_seqwin; + __u32 gh_pad2; + __u32 gh_pad3; + netobj_t gh_handle; +}; + +struct gss_err_header { + __u32 gh_version; + __u32 gh_flags; + __u32 gh_proc; + __u32 gh_major; + __u32 gh_minor; + __u32 gh_pad1; + __u32 gh_pad2; + __u32 gh_pad3; + netobj_t gh_handle; +}; + +/* + * part of wire context information send from client which be saved and + * used later by server. + */ +struct gss_wire_ctx { + __u32 gw_proc; + __u32 gw_seq; + __u32 gw_svc; + rawobj_t gw_handle; +}; + +#define PTLRPC_GSS_MAX_HANDLE_SIZE (8) +#define PTLRPC_GSS_HEADER_SIZE (sizeof(struct gss_header) + \ + PTLRPC_GSS_MAX_HANDLE_SIZE) + + +#define GSS_SEQ_WIN (256) +#define GSS_SEQ_WIN_MAIN GSS_SEQ_WIN +#define GSS_SEQ_WIN_BACK (64) +#define GSS_SEQ_REPACK_THRESHOLD (GSS_SEQ_WIN_MAIN / 2) + +struct gss_svc_seq_data { + spinlock_t ssd_lock; + /* + * highest sequence number seen so far, for main and back window + */ + __u32 ssd_max_main; + __u32 ssd_max_back; + /* + * main and back window + * for i such that ssd_max - GSS_SEQ_WIN < i <= ssd_max, the i-th bit + * of ssd_win is nonzero iff sequence number i has been seen already. + */ + unsigned long ssd_win_main[GSS_SEQ_WIN_MAIN/BITS_PER_LONG]; + unsigned long ssd_win_back[GSS_SEQ_WIN_BACK/BITS_PER_LONG]; +}; + +struct gss_svc_ctx { + unsigned int gsc_usr_root:1, + gsc_usr_mds:1, + gsc_remote:1; + uid_t gsc_uid; + gid_t gsc_gid; + uid_t gsc_mapped_uid; + rawobj_t gsc_rvs_hdl; + struct gss_svc_seq_data gsc_seqdata; + struct gss_ctx *gsc_mechctx; +}; + +struct gss_svc_reqctx { + struct ptlrpc_svc_ctx src_base; + struct gss_wire_ctx src_wirectx; + struct gss_svc_ctx *src_ctx; + unsigned int src_init:1, + src_init_continue:1, + src_err_notify:1; + int src_reserve_len; +}; + +struct gss_cli_ctx { + struct ptlrpc_cli_ctx gc_base; + __u32 gc_flavor; + __u32 gc_proc; + __u32 gc_win; + atomic_t gc_seq; + rawobj_t gc_handle; + struct gss_ctx *gc_mechctx; +}; + +struct gss_sec { + struct ptlrpc_sec gs_base; + struct gss_api_mech *gs_mech; + spinlock_t gs_lock; + __u64 gs_rvs_hdl; +}; + +#define GSS_CTX_INIT_MAX_LEN (1024) + +/* + * This only guaranteed be enough for current krb5 des-cbc-crc . We might + * adjust this when new enc type or mech added in. + */ +#define GSS_PRIVBUF_PREFIX_LEN (32) +#define GSS_PRIVBUF_SUFFIX_LEN (32) + +static inline +struct gss_svc_reqctx *gss_svc_ctx2reqctx(struct ptlrpc_svc_ctx *ctx) +{ + LASSERT(ctx); + return container_of(ctx, struct gss_svc_reqctx, src_base); +} + +/* sec_gss.c */ +struct gss_header *gss_swab_header(struct lustre_msg *msg, int segment); +netobj_t *gss_swab_netobj(struct lustre_msg *msg, int segment); + +void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx); +int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor); +int gss_check_seq_num(struct gss_svc_seq_data *sd, __u32 seq_num, int set); + +/* gss_bulk.c */ +int gss_cli_ctx_wrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int gss_cli_ctx_unwrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int gss_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int gss_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); + +/* gss_mech_switch.c */ +int init_kerberos_module(void); +void cleanup_kerberos_module(void); + +/* gss_generic_token.c */ +int g_token_size(rawobj_t *mech, unsigned int body_size); +void g_make_token_header(rawobj_t *mech, int body_size, unsigned char **buf); +__u32 g_verify_token_header(rawobj_t *mech, int *body_size, + unsigned char **buf_in, int toksize); + + +/* gss_upcall.c */ +int gss_do_ctx_init_rpc(char *buffer, unsigned long count); +int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx); +int gss_ctx_refresh_pipefs(struct ptlrpc_cli_ctx *ctx); +int gss_sec_upcall_init(struct gss_sec *gsec); +void gss_sec_upcall_cleanup(struct gss_sec *gsec); +int __init gss_init_upcall(void); +void __exit gss_exit_upcall(void); + +/* gss_svc_upcall.c */ +__u64 gss_get_next_ctx_index(void); +int gss_svc_upcall_install_rvs_ctx(struct obd_import *imp, + struct gss_sec *gsec, + struct gss_cli_ctx *gctx); +int gss_svc_upcall_handle_init(struct ptlrpc_request *req, + struct gss_svc_reqctx *grctx, + struct gss_wire_ctx *gw, + struct obd_device *target, + __u32 lustre_svc, + rawobj_t *rvs_hdl, + rawobj_t *in_token); +struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req, + struct gss_wire_ctx *gw); +void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx); +void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx); + +int __init gss_svc_init_upcall(void); +void __exit gss_svc_exit_upcall(void); + +/* lproc_gss.c */ +void gss_stat_oos_record_cli(int behind); +void gss_stat_oos_record_svc(int phase, int replay); +int gss_init_lproc(void); +void gss_exit_lproc(void); + +/* gss_krb5_mech.c */ +int __init init_kerberos_module(void); +void __exit cleanup_kerberos_module(void); + + +/* debug */ +static inline +void __dbg_memdump(char *name, void *ptr, int size) +{ + char *buf, *p = (char *) ptr; + int bufsize = size * 2 + 1, i; + + OBD_ALLOC(buf, bufsize); + if (!buf) { + printk("DUMP ERROR: can't alloc %d bytes\n", bufsize); + return; + } + + for (i = 0; i < size; i++) + sprintf(&buf[i+i], "%02x", (__u8) p[i]); + buf[size + size] = '\0'; + printk("DUMP %s@%p(%d): %s\n", name, ptr, size, buf); + OBD_FREE(buf, bufsize); +} + +#endif /* __PTLRPC_GSS_GSS_INTERNAL_H_ */ diff --git a/lustre/ptlrpc/gss/gss_krb5.h b/lustre/ptlrpc/gss/gss_krb5.h new file mode 100644 index 0000000..8cc4d44 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_krb5.h @@ -0,0 +1,166 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * linux/include/linux/sunrpc/gss_krb5_types.h + * + * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, + * lib/gssapi/krb5/gssapiP_krb5.h, and others + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + * Bruce Fields <bfields@umich.edu> + */ + +/* + * Copyright 1995 by the Massachusetts Institute of Technology. + * All Rights Reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + * + */ + +#ifndef PTLRPC_GSS_KRB5_H +#define PTLRPC_GSS_KRB5_H + +extern spinlock_t krb5_seq_lock; + +/* + * RFC 4142 + */ + +#define KG_USAGE_ACCEPTOR_SEAL 22 +#define KG_USAGE_ACCEPTOR_SIGN 23 +#define KG_USAGE_INITIATOR_SEAL 24 +#define KG_USAGE_INITIATOR_SIGN 25 + +#define KG_TOK_MIC_MSG 0x0404 +#define KG_TOK_WRAP_MSG 0x0504 + +#define FLAG_SENDER_IS_ACCEPTOR 0x01 +#define FLAG_WRAP_CONFIDENTIAL 0x02 +#define FLAG_ACCEPTOR_SUBKEY 0x04 + +struct krb5_header { + __u16 kh_tok_id; /* token id */ + __u8 kh_flags; /* acceptor flags */ + __u8 kh_filler; /* 0xff */ + __u16 kh_ec; /* extra count */ + __u16 kh_rrc; /* right rotation count */ + __u64 kh_seq; /* sequence number */ + __u8 kh_cksum[0]; /* checksum */ +}; + +struct krb5_keyblock { + rawobj_t kb_key; + struct crypto_tfm *kb_tfm; +}; + +struct krb5_ctx { + unsigned int kc_initiate:1, + kc_cfx:1, + kc_seed_init:1, + kc_have_acceptor_subkey:1; + __s32 kc_endtime; + __u8 kc_seed[16]; + __u64 kc_seq_send; + __u64 kc_seq_recv; + __u32 kc_enctype; + struct krb5_keyblock kc_keye; /* encryption */ + struct krb5_keyblock kc_keyi; /* integrity */ + struct krb5_keyblock kc_keyc; /* checksum */ + rawobj_t kc_mech_used; +}; + +enum sgn_alg { + SGN_ALG_DES_MAC_MD5 = 0x0000, + SGN_ALG_MD2_5 = 0x0001, + SGN_ALG_DES_MAC = 0x0002, + SGN_ALG_3 = 0x0003, /* not published */ + SGN_ALG_HMAC_MD5 = 0x0011, /* microsoft w2k; no support */ + SGN_ALG_HMAC_SHA1_DES3_KD = 0x0004 +}; + +enum seal_alg { + SEAL_ALG_NONE = 0xffff, + SEAL_ALG_DES = 0x0000, + SEAL_ALG_1 = 0x0001, /* not published */ + SEAL_ALG_MICROSOFT_RC4 = 0x0010, /* microsoft w2k; no support */ + SEAL_ALG_DES3KD = 0x0002 +}; + +#define CKSUMTYPE_CRC32 0x0001 +#define CKSUMTYPE_RSA_MD4 0x0002 +#define CKSUMTYPE_RSA_MD4_DES 0x0003 +#define CKSUMTYPE_DESCBC 0x0004 +/* des-mac-k */ +/* rsa-md4-des-k */ +#define CKSUMTYPE_RSA_MD5 0x0007 +#define CKSUMTYPE_RSA_MD5_DES 0x0008 +#define CKSUMTYPE_NIST_SHA 0x0009 +#define CKSUMTYPE_HMAC_SHA1_DES3 0x000c +#define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f +#define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 + +/* from gssapi_err_krb5.h */ +#define KG_CCACHE_NOMATCH (39756032L) +#define KG_KEYTAB_NOMATCH (39756033L) +#define KG_TGT_MISSING (39756034L) +#define KG_NO_SUBKEY (39756035L) +#define KG_CONTEXT_ESTABLISHED (39756036L) +#define KG_BAD_SIGN_TYPE (39756037L) +#define KG_BAD_LENGTH (39756038L) +#define KG_CTX_INCOMPLETE (39756039L) +#define KG_CONTEXT (39756040L) +#define KG_CRED (39756041L) +#define KG_ENC_DESC (39756042L) +#define KG_BAD_SEQ (39756043L) +#define KG_EMPTY_CCACHE (39756044L) +#define KG_NO_CTYPES (39756045L) + +/* per Kerberos v5 protocol spec crypto types from the wire. + * these get mapped to linux kernel crypto routines. + */ +#define ENCTYPE_NULL 0x0000 +#define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */ +#define ENCTYPE_DES_CBC_MD4 0x0002 /* DES cbc mode with RSA-MD4 */ +#define ENCTYPE_DES_CBC_MD5 0x0003 /* DES cbc mode with RSA-MD5 */ +#define ENCTYPE_DES_CBC_RAW 0x0004 /* DES cbc mode raw */ +/* XXX deprecated? */ +#define ENCTYPE_DES3_CBC_SHA 0x0005 /* DES-3 cbc mode with NIST-SHA */ +#define ENCTYPE_DES3_CBC_RAW 0x0006 /* DES-3 cbc mode raw */ +#define ENCTYPE_DES_HMAC_SHA1 0x0008 +#define ENCTYPE_DES3_CBC_SHA1 0x0010 +#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011 +#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012 +#define ENCTYPE_ARCFOUR_HMAC 0x0017 +#define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 +#define ENCTYPE_UNKNOWN 0x01ff + +#endif /* PTLRPC_GSS_KRB5_H */ diff --git a/lustre/ptlrpc/gss/gss_krb5_mech.c b/lustre/ptlrpc/gss/gss_krb5_mech.c new file mode 100644 index 0000000..b0f9292 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_krb5_mech.c @@ -0,0 +1,1263 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * linux/net/sunrpc/gss_krb5_mech.c + * linux/net/sunrpc/gss_krb5_crypto.c + * linux/net/sunrpc/gss_krb5_seal.c + * linux/net/sunrpc/gss_krb5_seqnum.c + * linux/net/sunrpc/gss_krb5_unseal.c + * + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + * J. Bruce Fields <bfields@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC +#ifdef __KERNEL__ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/crypto.h> +#include <linux/random.h> +#else +#include <liblustre.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre/lustre_idl.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_sec.h> + +#include "gss_err.h" +#include "gss_internal.h" +#include "gss_api.h" +#include "gss_asn1.h" +#include "gss_krb5.h" + +spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED; + +struct krb5_enctype { + char *ke_dispname; + char *ke_enc_name; /* linux tfm name */ + char *ke_hash_name; /* linux tfm name */ + int ke_enc_mode; /* linux tfm mode */ + int ke_hash_size; /* checksum size */ + int ke_conf_size; /* confounder size */ + unsigned int ke_hash_hmac:1; /* is hmac? */ +}; + +/* + * NOTE: for aes128-cts and aes256-cts, MIT implementation use CTS encryption. + * but currently we simply CBC with padding, because linux doesn't support CTS + * yet. this need to be fixed in the future. + */ +static struct krb5_enctype enctypes[] = { + [ENCTYPE_DES_CBC_RAW] = { /* des-cbc-md5 */ + "des-cbc-md5", + "des", + "md5", + CRYPTO_TFM_MODE_CBC, + 16, + 8, + 0, + }, + [ENCTYPE_DES3_CBC_RAW] = { /* des3-hmac-sha1 */ + "des-hmac-sha1", + "des3_ede", + "sha1", + CRYPTO_TFM_MODE_CBC, + 20, + 8, + 1, + }, + [ENCTYPE_AES128_CTS_HMAC_SHA1_96] = { /* aes128-cts */ + "aes128-cts-hmac-sha1-96", + "aes", + "sha1", + CRYPTO_TFM_MODE_CBC, + 12, + 16, + 1, + }, + [ENCTYPE_AES256_CTS_HMAC_SHA1_96] = { /* aes256-cts */ + "aes256-cts-hmac-sha1-96", + "aes", + "sha1", + CRYPTO_TFM_MODE_CBC, + 12, + 16, + 1, + }, + [ENCTYPE_ARCFOUR_HMAC] = { /* arcfour-hmac-md5 */ + "arcfour-hmac-md5", + "arc4", + "md5", + CRYPTO_TFM_MODE_ECB, + 16, + 8, + 1, + }, +}; + +#define MAX_ENCTYPES sizeof(enctypes)/sizeof(struct krb5_enctype) + +static const char * enctype2str(__u32 enctype) +{ + if (enctype < MAX_ENCTYPES && enctypes[enctype].ke_dispname) + return enctypes[enctype].ke_dispname; + + return "unknown"; +} + +static +int keyblock_init(struct krb5_keyblock *kb, char *alg_name, int alg_mode) +{ + kb->kb_tfm = crypto_alloc_tfm(alg_name, alg_mode); + if (kb->kb_tfm == NULL) { + CERROR("failed to alloc tfm: %s, mode %d\n", + alg_name, alg_mode); + return -1; + } + + if (crypto_cipher_setkey(kb->kb_tfm, kb->kb_key.data, kb->kb_key.len)) { + CERROR("failed to set %s key, len %d\n", + alg_name, kb->kb_key.len); + return -1; + } + + return 0; +} + +static +int krb5_init_keys(struct krb5_ctx *kctx) +{ + struct krb5_enctype *ke; + + if (kctx->kc_enctype >= MAX_ENCTYPES || + enctypes[kctx->kc_enctype].ke_hash_size == 0) { + CERROR("unsupported enctype %x\n", kctx->kc_enctype); + return -1; + } + + ke = &enctypes[kctx->kc_enctype]; + + /* tfm arc4 is stateful, user should alloc-use-free by his own */ + if (kctx->kc_enctype != ENCTYPE_ARCFOUR_HMAC && + keyblock_init(&kctx->kc_keye, ke->ke_enc_name, ke->ke_enc_mode)) + return -1; + + /* tfm hmac is stateful, user should alloc-use-free by his own */ + if (ke->ke_hash_hmac == 0 && + keyblock_init(&kctx->kc_keyi, ke->ke_enc_name, ke->ke_enc_mode)) + return -1; + if (ke->ke_hash_hmac == 0 && + keyblock_init(&kctx->kc_keyc, ke->ke_enc_name, ke->ke_enc_mode)) + return -1; + + return 0; +} + +static +void keyblock_free(struct krb5_keyblock *kb) +{ + rawobj_free(&kb->kb_key); + if (kb->kb_tfm) + crypto_free_tfm(kb->kb_tfm); +} + +static +int keyblock_dup(struct krb5_keyblock *new, struct krb5_keyblock *kb) +{ + return rawobj_dup(&new->kb_key, &kb->kb_key); +} + +static +int get_bytes(char **ptr, const char *end, void *res, int len) +{ + char *p, *q; + p = *ptr; + q = p + len; + if (q > end || q < p) + return -1; + memcpy(res, p, len); + *ptr = q; + return 0; +} + +static +int get_rawobj(char **ptr, const char *end, rawobj_t *res) +{ + char *p, *q; + __u32 len; + + p = *ptr; + if (get_bytes(&p, end, &len, sizeof(len))) + return -1; + + q = p + len; + if (q > end || q < p) + return -1; + + OBD_ALLOC(res->data, len); + if (!res->data) + return -1; + + res->len = len; + memcpy(res->data, p, len); + *ptr = q; + return 0; +} + +static +int get_keyblock(char **ptr, const char *end, + struct krb5_keyblock *kb, __u32 keysize) +{ + char *buf; + + OBD_ALLOC(buf, keysize); + if (buf == NULL) + return -1; + + if (get_bytes(ptr, end, buf, keysize)) { + OBD_FREE(buf, keysize); + return -1; + } + + kb->kb_key.len = keysize; + kb->kb_key.data = buf; + return 0; +} + +static +void delete_context_kerberos(struct krb5_ctx *kctx) +{ + rawobj_free(&kctx->kc_mech_used); + + keyblock_free(&kctx->kc_keye); + keyblock_free(&kctx->kc_keyi); + keyblock_free(&kctx->kc_keyc); +} + +static +__u32 import_context_rfc1964(struct krb5_ctx *kctx, char *p, char *end) +{ + unsigned int tmp_uint, keysize; + + /* seed_init flag */ + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint))) + goto out_err; + kctx->kc_seed_init = (tmp_uint != 0); + + /* seed */ + if (get_bytes(&p, end, kctx->kc_seed, sizeof(kctx->kc_seed))) + goto out_err; + + /* sign/seal algorithm, not really used now */ + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)) || + get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint))) + goto out_err; + + /* end time */ + if (get_bytes(&p, end, &kctx->kc_endtime, sizeof(kctx->kc_endtime))) + goto out_err; + + /* seq send */ + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint))) + goto out_err; + kctx->kc_seq_send = tmp_uint; + + /* mech oid */ + if (get_rawobj(&p, end, &kctx->kc_mech_used)) + goto out_err; + + /* old style enc/seq keys in format: + * - enctype (u32) + * - keysize (u32) + * - keydata + * we decompose them to fit into the new context + */ + + /* enc key */ + if (get_bytes(&p, end, &kctx->kc_enctype, sizeof(kctx->kc_enctype))) + goto out_err; + + if (get_bytes(&p, end, &keysize, sizeof(keysize))) + goto out_err; + + if (get_keyblock(&p, end, &kctx->kc_keye, keysize)) + goto out_err; + + /* seq key */ + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)) || + tmp_uint != kctx->kc_enctype) + goto out_err; + + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)) || + tmp_uint != keysize) + goto out_err; + + if (get_keyblock(&p, end, &kctx->kc_keyc, keysize)) + goto out_err; + + /* old style fallback */ + if (keyblock_dup(&kctx->kc_keyi, &kctx->kc_keyc)) + goto out_err; + + if (p != end) + goto out_err; + + CDEBUG(D_SEC, "succesfully imported rfc1964 context\n"); + return 0; +out_err: + return GSS_S_FAILURE; +} + +/* Flags for version 2 context flags */ +#define KRB5_CTX_FLAG_INITIATOR 0x00000001 +#define KRB5_CTX_FLAG_CFX 0x00000002 +#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 + +static +__u32 import_context_rfc4121(struct krb5_ctx *kctx, char *p, char *end) +{ + unsigned int tmp_uint, keysize; + + /* end time */ + if (get_bytes(&p, end, &kctx->kc_endtime, sizeof(kctx->kc_endtime))) + goto out_err; + + /* flags */ + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint))) + goto out_err; + + if (tmp_uint & KRB5_CTX_FLAG_INITIATOR) + kctx->kc_initiate = 1; + if (tmp_uint & KRB5_CTX_FLAG_CFX) + kctx->kc_cfx = 1; + if (tmp_uint & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) + kctx->kc_have_acceptor_subkey = 1; + + /* seq send */ + if (get_bytes(&p, end, &kctx->kc_seq_send, sizeof(kctx->kc_seq_send))) + goto out_err; + + /* enctype */ + if (get_bytes(&p, end, &kctx->kc_enctype, sizeof(kctx->kc_enctype))) + goto out_err; + + /* size of each key */ + if (get_bytes(&p, end, &keysize, sizeof(keysize))) + goto out_err; + + /* number of keys - should always be 3 */ + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint))) + goto out_err; + + if (tmp_uint != 3) { + CERROR("Invalid number of keys: %u\n", tmp_uint); + goto out_err; + } + + /* ke */ + if (get_keyblock(&p, end, &kctx->kc_keye, keysize)) + goto out_err; + /* ki */ + if (get_keyblock(&p, end, &kctx->kc_keyi, keysize)) + goto out_err; + /* ki */ + if (get_keyblock(&p, end, &kctx->kc_keyc, keysize)) + goto out_err; + + CDEBUG(D_SEC, "succesfully imported v2 context\n"); + return 0; +out_err: + return GSS_S_FAILURE; +} + +/* + * The whole purpose here is trying to keep user level gss context parsing + * from nfs-utils unchanged as possible as we can, they are not quite mature + * yet, and many stuff still not clear, like heimdal etc. + */ +static +__u32 gss_import_sec_context_kerberos(rawobj_t *inbuf, + struct gss_ctx *gctx) +{ + struct krb5_ctx *kctx; + char *p = (char *) inbuf->data; + char *end = (char *) (inbuf->data + inbuf->len); + unsigned int tmp_uint, rc; + + if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint))) { + CERROR("Fail to read version\n"); + return GSS_S_FAILURE; + } + + /* only support 0, 1 for the moment */ + if (tmp_uint > 2) { + CERROR("Invalid version %u\n", tmp_uint); + return GSS_S_FAILURE; + } + + OBD_ALLOC_PTR(kctx); + if (!kctx) + return GSS_S_FAILURE; + + if (tmp_uint == 0 || tmp_uint == 1) { + kctx->kc_initiate = tmp_uint; + rc = import_context_rfc1964(kctx, p, end); + } else { + rc = import_context_rfc4121(kctx, p, end); + } + + if (rc == 0) + rc = krb5_init_keys(kctx); + + if (rc) { + delete_context_kerberos(kctx); + OBD_FREE_PTR(kctx); + + return GSS_S_FAILURE; + } + + gctx->internal_ctx_id = kctx; + return GSS_S_COMPLETE; +} + +static +__u32 gss_copy_reverse_context_kerberos(struct gss_ctx *gctx, + struct gss_ctx *gctx_new) +{ + struct krb5_ctx *kctx = gctx->internal_ctx_id; + struct krb5_ctx *knew; + + OBD_ALLOC_PTR(knew); + if (!knew) + return GSS_S_FAILURE; + + knew->kc_initiate = kctx->kc_initiate ? 0 : 1; + knew->kc_cfx = kctx->kc_cfx; + knew->kc_seed_init = kctx->kc_seed_init; + knew->kc_have_acceptor_subkey = kctx->kc_have_acceptor_subkey; +#if 0 + knew->kc_endtime = kctx->kc_endtime; +#else + /* FIXME reverse context don't expire for now */ + knew->kc_endtime = INT_MAX; +#endif + memcpy(knew->kc_seed, kctx->kc_seed, sizeof(kctx->kc_seed)); + knew->kc_seq_send = kctx->kc_seq_recv; + knew->kc_seq_recv = kctx->kc_seq_send; + knew->kc_enctype = kctx->kc_enctype; + + if (rawobj_dup(&knew->kc_mech_used, &kctx->kc_mech_used)) + goto out_err; + + if (keyblock_dup(&knew->kc_keye, &kctx->kc_keye)) + goto out_err; + if (keyblock_dup(&knew->kc_keyi, &kctx->kc_keyi)) + goto out_err; + if (keyblock_dup(&knew->kc_keyc, &kctx->kc_keyc)) + goto out_err; + if (krb5_init_keys(knew)) + goto out_err; + + gctx_new->internal_ctx_id = knew; + CDEBUG(D_SEC, "succesfully copied reverse context\n"); + return GSS_S_COMPLETE; + +out_err: + delete_context_kerberos(knew); + OBD_FREE_PTR(knew); + return GSS_S_FAILURE; +} + +static +__u32 gss_inquire_context_kerberos(struct gss_ctx *gctx, + unsigned long *endtime) +{ + struct krb5_ctx *kctx = gctx->internal_ctx_id; + + *endtime = (unsigned long) ((__u32) kctx->kc_endtime); + return GSS_S_COMPLETE; +} + +static +void gss_delete_sec_context_kerberos(void *internal_ctx) +{ + struct krb5_ctx *kctx = internal_ctx; + + delete_context_kerberos(kctx); + OBD_FREE_PTR(kctx); +} + +static +void buf_to_sg(struct scatterlist *sg, char *ptr, int len) +{ + sg->page = virt_to_page(ptr); + sg->offset = offset_in_page(ptr); + sg->length = len; +} + +static +__u32 krb5_encrypt(struct crypto_tfm *tfm, + int decrypt, + void * iv, + void * in, + void * out, + int length) +{ + struct scatterlist sg; + __u8 local_iv[16] = {0}; + __u32 ret = -EINVAL; + + LASSERT(tfm); + + if (length % crypto_tfm_alg_blocksize(tfm) != 0) { + CERROR("output length %d mismatch blocksize %d\n", + length, crypto_tfm_alg_blocksize(tfm)); + goto out; + } + + if (crypto_tfm_alg_ivsize(tfm) > 16) { + CERROR("iv size too large %d\n", crypto_tfm_alg_ivsize(tfm)); + goto out; + } + + if (iv) + memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); + + memcpy(out, in, length); + buf_to_sg(&sg, out, length); + + if (decrypt) + ret = crypto_cipher_decrypt_iv(tfm, &sg, &sg, length, local_iv); + else + ret = crypto_cipher_encrypt_iv(tfm, &sg, &sg, length, local_iv); + +out: + return(ret); +} + +static inline +int krb5_digest_hmac(struct crypto_tfm *tfm, + rawobj_t *key, + struct krb5_header *khdr, + int msgcnt, rawobj_t *msgs, + rawobj_t *cksum) +{ + struct scatterlist sg[1]; + __u32 keylen = key->len, i; + + crypto_hmac_init(tfm, key->data, &keylen); + + for (i = 0; i < msgcnt; i++) { + if (msgs[i].len == 0) + continue; + buf_to_sg(sg, (char *) msgs[i].data, msgs[i].len); + crypto_hmac_update(tfm, sg, 1); + } + + if (khdr) { + buf_to_sg(sg, (char *) khdr, sizeof(*khdr)); + crypto_hmac_update(tfm, sg, 1); + } + + crypto_hmac_final(tfm, key->data, &keylen, cksum->data); + return 0; +} + +static inline +int krb5_digest_norm(struct crypto_tfm *tfm, + struct krb5_keyblock *kb, + struct krb5_header *khdr, + int msgcnt, rawobj_t *msgs, + rawobj_t *cksum) +{ + struct scatterlist sg[1]; + int i; + + LASSERT(kb->kb_tfm); + + crypto_digest_init(tfm); + + for (i = 0; i < msgcnt; i++) { + if (msgs[i].len == 0) + continue; + buf_to_sg(sg, (char *) msgs[i].data, msgs[i].len); + crypto_digest_update(tfm, sg, 1); + } + + if (khdr) { + buf_to_sg(sg, (char *) khdr, sizeof(*khdr)); + crypto_digest_update(tfm, sg, 1); + } + + crypto_digest_final(tfm, cksum->data); + + return krb5_encrypt(kb->kb_tfm, 0, NULL, cksum->data, + cksum->data, cksum->len); +} + +/* + * compute (keyed/keyless) checksum against the plain text which appended + * with krb5 wire token header. + */ +static +__s32 krb5_make_checksum(__u32 enctype, + struct krb5_keyblock *kb, + struct krb5_header *khdr, + int msgcnt, rawobj_t *msgs, + rawobj_t *cksum) +{ + struct krb5_enctype *ke = &enctypes[enctype]; + struct crypto_tfm *tfm; + __u32 code = GSS_S_FAILURE; + int rc; + + if (!(tfm = crypto_alloc_tfm(ke->ke_hash_name, 0))) { + CERROR("failed to alloc TFM: %s\n", ke->ke_hash_name); + return GSS_S_FAILURE; + } + + cksum->len = crypto_tfm_alg_digestsize(tfm); + OBD_ALLOC(cksum->data, cksum->len); + if (!cksum->data) { + cksum->len = 0; + goto out_tfm; + } + + if (ke->ke_hash_hmac) + rc = krb5_digest_hmac(tfm, &kb->kb_key, + khdr, msgcnt, msgs, cksum); + else + rc = krb5_digest_norm(tfm, kb, + khdr, msgcnt, msgs, cksum); + + if (rc == 0) + code = GSS_S_COMPLETE; +out_tfm: + crypto_free_tfm(tfm); + return code; +} + +static +__u32 gss_get_mic_kerberos(struct gss_ctx *gctx, + int msgcnt, + rawobj_t *msgs, + rawobj_t *token) +{ + struct krb5_ctx *kctx = gctx->internal_ctx_id; + struct krb5_enctype *ke = &enctypes[kctx->kc_enctype]; + struct krb5_header *khdr; + unsigned char acceptor_flag; + rawobj_t cksum = RAWOBJ_EMPTY; + __u32 rc = GSS_S_FAILURE; + + acceptor_flag = kctx->kc_initiate ? 0 : FLAG_SENDER_IS_ACCEPTOR; + + /* fill krb5 header */ + LASSERT(token->len >= sizeof(*khdr)); + khdr = (struct krb5_header *) token->data; + + khdr->kh_tok_id = cpu_to_be16(KG_TOK_MIC_MSG); + khdr->kh_flags = acceptor_flag; + khdr->kh_filler = 0xff; + khdr->kh_ec = cpu_to_be16(0xffff); + khdr->kh_rrc = cpu_to_be16(0xffff); + spin_lock(&krb5_seq_lock); + khdr->kh_seq = cpu_to_be64(kctx->kc_seq_send++); + spin_unlock(&krb5_seq_lock); + + /* checksum */ + if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyc, + khdr, msgcnt, msgs, &cksum)) + goto out_err; + + LASSERT(cksum.len >= ke->ke_hash_size); + LASSERT(token->len >= sizeof(*khdr) + ke->ke_hash_size); + memcpy(khdr + 1, cksum.data + cksum.len - ke->ke_hash_size, + ke->ke_hash_size); + + token->len = sizeof(*khdr) + ke->ke_hash_size; + rc = GSS_S_COMPLETE; +out_err: + rawobj_free(&cksum); + return rc; +} + +static +__u32 gss_verify_mic_kerberos(struct gss_ctx *gctx, + int msgcnt, + rawobj_t *msgs, + rawobj_t *token) +{ + struct krb5_ctx *kctx = gctx->internal_ctx_id; + struct krb5_enctype *ke = &enctypes[kctx->kc_enctype]; + struct krb5_header *khdr; + unsigned char acceptor_flag; + rawobj_t cksum = RAWOBJ_EMPTY; + __u32 rc = GSS_S_FAILURE; + + acceptor_flag = kctx->kc_initiate ? FLAG_SENDER_IS_ACCEPTOR : 0; + + if (token->len < sizeof(*khdr)) { + CERROR("short signature: %u\n", token->len); + return GSS_S_DEFECTIVE_TOKEN; + } + + khdr = (struct krb5_header *) token->data; + + /* sanity checks */ + if (be16_to_cpu(khdr->kh_tok_id) != KG_TOK_MIC_MSG) { + CERROR("bad token id\n"); + return GSS_S_DEFECTIVE_TOKEN; + } + if ((khdr->kh_flags & FLAG_SENDER_IS_ACCEPTOR) != acceptor_flag) { + CERROR("bad direction flag\n"); + return GSS_S_BAD_SIG; + } + if (khdr->kh_filler != 0xff) { + CERROR("bad filler\n"); + return GSS_S_DEFECTIVE_TOKEN; + } + if (be16_to_cpu(khdr->kh_ec) != 0xffff || + be16_to_cpu(khdr->kh_rrc) != 0xffff) { + CERROR("bad EC or RRC\n"); + return GSS_S_DEFECTIVE_TOKEN; + } + + if (token->len < sizeof(*khdr) + ke->ke_hash_size) { + CERROR("short signature: %u, require %d\n", + token->len, (int) sizeof(*khdr) + ke->ke_hash_size); + goto out; + } + + if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyc, + khdr, msgcnt, msgs, &cksum)) + return GSS_S_FAILURE; + + LASSERT(cksum.len >= ke->ke_hash_size); + if (memcmp(khdr + 1, cksum.data + cksum.len - ke->ke_hash_size, + ke->ke_hash_size)) { + CERROR("checksum mismatch\n"); + rc = GSS_S_BAD_SIG; + goto out; + } + + rc = GSS_S_COMPLETE; +out: + rawobj_free(&cksum); + return rc; +} + +static +int add_padding(rawobj_t *msg, int msg_buflen, int blocksize) +{ + int padding; + + padding = (blocksize - (msg->len & (blocksize - 1))) & + (blocksize - 1); + if (!padding) + return 0; + + if (msg->len + padding > msg_buflen) { + CERROR("bufsize %u too small: datalen %u, padding %u\n", + msg_buflen, msg->len, padding); + return -EINVAL; + } + + memset(msg->data + msg->len, padding, padding); + msg->len += padding; + return 0; +} + +static +int krb5_encrypt_rawobjs(struct crypto_tfm *tfm, + int mode_ecb, + int inobj_cnt, + rawobj_t *inobjs, + rawobj_t *outobj, + int enc) +{ + struct scatterlist src, dst; + __u8 local_iv[16] = {0}, *buf; + __u32 datalen = 0; + int i, rc; + ENTRY; + + buf = outobj->data; + + for (i = 0; i < inobj_cnt; i++) { + LASSERT(buf + inobjs[i].len <= outobj->data + outobj->len); + + buf_to_sg(&src, inobjs[i].data, inobjs[i].len); + buf_to_sg(&dst, buf, outobj->len - datalen); + + if (mode_ecb) { + if (enc) + rc = crypto_cipher_encrypt( + tfm, &dst, &src, src.length); + else + rc = crypto_cipher_decrypt( + tfm, &dst, &src, src.length); + } else { + if (enc) + rc = crypto_cipher_encrypt_iv( + tfm, &dst, &src, src.length, local_iv); + else + rc = crypto_cipher_decrypt_iv( + tfm, &dst, &src, src.length, local_iv); + } + + if (rc) { + CERROR("encrypt error %d\n", rc); + RETURN(rc); + } + + datalen += inobjs[i].len; + buf += inobjs[i].len; + } + + outobj->len = datalen; + RETURN(0); +} + +static +__u32 gss_wrap_kerberos(struct gss_ctx *gctx, + rawobj_t *msg, + int msg_buflen, + rawobj_t *token) +{ + struct krb5_ctx *kctx = gctx->internal_ctx_id; + struct krb5_enctype *ke = &enctypes[kctx->kc_enctype]; + struct krb5_header *khdr; + unsigned char acceptor_flag; + int blocksize; + rawobj_t cksum = RAWOBJ_EMPTY; + rawobj_t data_desc[3], cipher; + __u8 conf[GSS_MAX_CIPHER_BLOCK]; + int enc_rc = 0; + + LASSERT(ke); + LASSERT(ke->ke_conf_size <= GSS_MAX_CIPHER_BLOCK); + LASSERT(kctx->kc_keye.kb_tfm == NULL || + ke->ke_conf_size >= + crypto_tfm_alg_blocksize(kctx->kc_keye.kb_tfm)); + + acceptor_flag = kctx->kc_initiate ? 0 : FLAG_SENDER_IS_ACCEPTOR; + + /* fill krb5 header */ + LASSERT(token->len >= sizeof(*khdr)); + khdr = (struct krb5_header *) token->data; + + khdr->kh_tok_id = cpu_to_be16(KG_TOK_WRAP_MSG); + khdr->kh_flags = acceptor_flag | FLAG_WRAP_CONFIDENTIAL; + khdr->kh_filler = 0xff; + khdr->kh_ec = cpu_to_be16(0); + khdr->kh_rrc = cpu_to_be16(0); + spin_lock(&krb5_seq_lock); + khdr->kh_seq = cpu_to_be64(kctx->kc_seq_send++); + spin_unlock(&krb5_seq_lock); + + /* generate confounder */ + get_random_bytes(conf, ke->ke_conf_size); + + /* get encryption blocksize. note kc_keye might not associated with + * a tfm, currently only for arcfour-hmac + */ + if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) { + LASSERT(kctx->kc_keye.kb_tfm == NULL); + blocksize = 1; + } else { + LASSERT(kctx->kc_keye.kb_tfm); + blocksize = crypto_tfm_alg_blocksize(kctx->kc_keye.kb_tfm); + } + LASSERT(blocksize <= ke->ke_conf_size); + + /* padding the message */ + if (add_padding(msg, msg_buflen, blocksize)) + return GSS_S_FAILURE; + + /* + * clear text layout, same for both checksum & encryption: + * ----------------------------------------- + * | confounder | clear msgs | krb5 header | + * ----------------------------------------- + */ + data_desc[0].data = conf; + data_desc[0].len = ke->ke_conf_size; + data_desc[1].data = msg->data; + data_desc[1].len = msg->len; + data_desc[2].data = (__u8 *) khdr; + data_desc[2].len = sizeof(*khdr); + + /* compute checksum */ + if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi, + khdr, 3, data_desc, &cksum)) + return GSS_S_FAILURE; + LASSERT(cksum.len >= ke->ke_hash_size); + + /* encrypting, cipher text will be directly inplace */ + cipher.data = (__u8 *) (khdr + 1); + cipher.len = token->len - sizeof(*khdr); + LASSERT(cipher.len >= ke->ke_conf_size + msg->len + sizeof(*khdr)); + + if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) { + rawobj_t arc4_keye; + struct crypto_tfm *arc4_tfm; + + if (krb5_make_checksum(ENCTYPE_ARCFOUR_HMAC, &kctx->kc_keyi, + NULL, 1, &cksum, &arc4_keye)) { + CERROR("failed to obtain arc4 enc key\n"); + GOTO(arc4_out, enc_rc = -EACCES); + } + + arc4_tfm = crypto_alloc_tfm("arc4", CRYPTO_TFM_MODE_ECB); + if (arc4_tfm == NULL) { + CERROR("failed to alloc tfm arc4 in ECB mode\n"); + GOTO(arc4_out_key, enc_rc = -EACCES); + } + + if (crypto_cipher_setkey(arc4_tfm, + arc4_keye.data, arc4_keye.len)) { + CERROR("failed to set arc4 key, len %d\n", + arc4_keye.len); + GOTO(arc4_out_tfm, enc_rc = -EACCES); + } + + enc_rc = krb5_encrypt_rawobjs(arc4_tfm, 1, + 3, data_desc, &cipher, 1); +arc4_out_tfm: + crypto_free_tfm(arc4_tfm); +arc4_out_key: + rawobj_free(&arc4_keye); +arc4_out: + do {} while(0); /* just to avoid compile warning */ + } else { + enc_rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0, + 3, data_desc, &cipher, 1); + } + + if (enc_rc != 0) { + rawobj_free(&cksum); + return GSS_S_FAILURE; + } + + /* fill in checksum */ + LASSERT(token->len >= sizeof(*khdr) + cipher.len + ke->ke_hash_size); + memcpy((char *)(khdr + 1) + cipher.len, + cksum.data + cksum.len - ke->ke_hash_size, + ke->ke_hash_size); + rawobj_free(&cksum); + + /* final token length */ + token->len = sizeof(*khdr) + cipher.len + ke->ke_hash_size; + return GSS_S_COMPLETE; +} + +static +__u32 gss_unwrap_kerberos(struct gss_ctx *gctx, + rawobj_t *token, + rawobj_t *msg) +{ + struct krb5_ctx *kctx = gctx->internal_ctx_id; + struct krb5_enctype *ke = &enctypes[kctx->kc_enctype]; + struct krb5_header *khdr; + unsigned char acceptor_flag; + unsigned char *tmpbuf; + int blocksize, bodysize; + rawobj_t cksum = RAWOBJ_EMPTY; + rawobj_t cipher_in, plain_out; + __u32 rc = GSS_S_FAILURE, enc_rc = 0; + + LASSERT(ke); + + acceptor_flag = kctx->kc_initiate ? FLAG_SENDER_IS_ACCEPTOR : 0; + + if (token->len < sizeof(*khdr)) { + CERROR("short signature: %u\n", token->len); + return GSS_S_DEFECTIVE_TOKEN; + } + + khdr = (struct krb5_header *) token->data; + + /* sanity check header */ + if (be16_to_cpu(khdr->kh_tok_id) != KG_TOK_WRAP_MSG) { + CERROR("bad token id\n"); + return GSS_S_DEFECTIVE_TOKEN; + } + if ((khdr->kh_flags & FLAG_SENDER_IS_ACCEPTOR) != acceptor_flag) { + CERROR("bad direction flag\n"); + return GSS_S_BAD_SIG; + } + if ((khdr->kh_flags & FLAG_WRAP_CONFIDENTIAL) == 0) { + CERROR("missing confidential flag\n"); + return GSS_S_BAD_SIG; + } + if (khdr->kh_filler != 0xff) { + CERROR("bad filler\n"); + return GSS_S_DEFECTIVE_TOKEN; + } + if (be16_to_cpu(khdr->kh_ec) != 0x0 || + be16_to_cpu(khdr->kh_rrc) != 0x0) { + CERROR("bad EC or RRC\n"); + return GSS_S_DEFECTIVE_TOKEN; + } + + /* block size */ + if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) { + LASSERT(kctx->kc_keye.kb_tfm == NULL); + blocksize = 1; + } else { + LASSERT(kctx->kc_keye.kb_tfm); + blocksize = crypto_tfm_alg_blocksize(kctx->kc_keye.kb_tfm); + } + + /* expected token layout: + * ---------------------------------------- + * | krb5 header | cipher text | checksum | + * ---------------------------------------- + */ + bodysize = token->len - sizeof(*khdr) - ke->ke_hash_size; + + if (bodysize % blocksize) { + CERROR("odd bodysize %d\n", bodysize); + return GSS_S_DEFECTIVE_TOKEN; + } + + if (bodysize <= ke->ke_conf_size + sizeof(*khdr)) { + CERROR("incomplete token: bodysize %d\n", bodysize); + return GSS_S_DEFECTIVE_TOKEN; + } + + if (msg->len < bodysize - ke->ke_conf_size - sizeof(*khdr)) { + CERROR("buffer too small: %u, require %d\n", + msg->len, bodysize - ke->ke_conf_size); + return GSS_S_FAILURE; + } + + /* decrypting */ + OBD_ALLOC(tmpbuf, bodysize); + if (!tmpbuf) + return GSS_S_FAILURE; + + cipher_in.data = (__u8 *) (khdr + 1); + cipher_in.len = bodysize; + plain_out.data = tmpbuf; + plain_out.len = bodysize; + + if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) { + rawobj_t arc4_keye; + struct crypto_tfm *arc4_tfm; + + cksum.data = token->data + token->len - ke->ke_hash_size; + cksum.len = ke->ke_hash_size; + + if (krb5_make_checksum(ENCTYPE_ARCFOUR_HMAC, &kctx->kc_keyi, + NULL, 1, &cksum, &arc4_keye)) { + CERROR("failed to obtain arc4 enc key\n"); + GOTO(arc4_out, enc_rc = -EACCES); + } + + arc4_tfm = crypto_alloc_tfm("arc4", CRYPTO_TFM_MODE_ECB); + if (arc4_tfm == NULL) { + CERROR("failed to alloc tfm arc4 in ECB mode\n"); + GOTO(arc4_out_key, enc_rc = -EACCES); + } + + if (crypto_cipher_setkey(arc4_tfm, + arc4_keye.data, arc4_keye.len)) { + CERROR("failed to set arc4 key, len %d\n", + arc4_keye.len); + GOTO(arc4_out_tfm, enc_rc = -EACCES); + } + + enc_rc = krb5_encrypt_rawobjs(arc4_tfm, 1, + 1, &cipher_in, &plain_out, 0); +arc4_out_tfm: + crypto_free_tfm(arc4_tfm); +arc4_out_key: + rawobj_free(&arc4_keye); +arc4_out: + cksum = RAWOBJ_EMPTY; + } else { + enc_rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0, + 1, &cipher_in, &plain_out, 0); + } + + if (enc_rc != 0) { + CERROR("error decrypt\n"); + goto out_free; + } + LASSERT(plain_out.len == bodysize); + + /* expected clear text layout: + * ----------------------------------------- + * | confounder | clear msgs | krb5 header | + * ----------------------------------------- + */ + + /* last part must be identical to the krb5 header */ + if (memcmp(khdr, plain_out.data + plain_out.len - sizeof(*khdr), + sizeof(*khdr))) { + CERROR("decrypted header mismatch\n"); + goto out_free; + } + + /* verify checksum */ + if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi, + khdr, 1, &plain_out, &cksum)) + goto out_free; + + LASSERT(cksum.len >= ke->ke_hash_size); + if (memcmp((char *)(khdr + 1) + bodysize, + cksum.data + cksum.len - ke->ke_hash_size, + ke->ke_hash_size)) { + CERROR("cksum mismatch\n"); + goto out_free; + } + + msg->len = bodysize - ke->ke_conf_size - sizeof(*khdr); + memcpy(msg->data, tmpbuf + ke->ke_conf_size, msg->len); + + rc = GSS_S_COMPLETE; +out_free: + OBD_FREE(tmpbuf, bodysize); + rawobj_free(&cksum); + return rc; +} + +static +__u32 gss_plain_encrypt_kerberos(struct gss_ctx *ctx, + int length, + void *in_buf, + void *out_buf) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + __u32 rc; + + rc = krb5_encrypt(kctx->kc_keye.kb_tfm, 0, + NULL, in_buf, out_buf, length); + if (rc) + CERROR("plain encrypt error: %d\n", rc); + + return rc; +} + +int gss_display_kerberos(struct gss_ctx *ctx, + char *buf, + int bufsize) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + int written; + + written = snprintf(buf, bufsize, + " mech: krb5\n" + " enctype: %s\n", + enctype2str(kctx->kc_enctype)); + return written; +} + +static struct gss_api_ops gss_kerberos_ops = { + .gss_import_sec_context = gss_import_sec_context_kerberos, + .gss_copy_reverse_context = gss_copy_reverse_context_kerberos, + .gss_inquire_context = gss_inquire_context_kerberos, + .gss_get_mic = gss_get_mic_kerberos, + .gss_verify_mic = gss_verify_mic_kerberos, + .gss_wrap = gss_wrap_kerberos, + .gss_unwrap = gss_unwrap_kerberos, + .gss_plain_encrypt = gss_plain_encrypt_kerberos, + .gss_delete_sec_context = gss_delete_sec_context_kerberos, + .gss_display = gss_display_kerberos, +}; + +static struct subflavor_desc gss_kerberos_sfs[] = { + { + .sf_subflavor = SPTLRPC_SUBFLVR_KRB5, + .sf_qop = 0, + .sf_service = SPTLRPC_SVC_NONE, + .sf_name = "krb5" + }, + { + .sf_subflavor = SPTLRPC_SUBFLVR_KRB5I, + .sf_qop = 0, + .sf_service = SPTLRPC_SVC_AUTH, + .sf_name = "krb5i" + }, + { + .sf_subflavor = SPTLRPC_SUBFLVR_KRB5P, + .sf_qop = 0, + .sf_service = SPTLRPC_SVC_PRIV, + .sf_name = "krb5p" + }, +}; + +/* + * currently we leave module owner NULL + */ +static struct gss_api_mech gss_kerberos_mech = { + .gm_owner = NULL, /*THIS_MODULE, */ + .gm_name = "krb5", + .gm_oid = (rawobj_t) + {9, "\052\206\110\206\367\022\001\002\002"}, + .gm_ops = &gss_kerberos_ops, + .gm_sf_num = 3, + .gm_sfs = gss_kerberos_sfs, +}; + +int __init init_kerberos_module(void) +{ + int status; + + status = lgss_mech_register(&gss_kerberos_mech); + if (status) + CERROR("Failed to register kerberos gss mechanism!\n"); + return status; +} + +void __exit cleanup_kerberos_module(void) +{ + lgss_mech_unregister(&gss_kerberos_mech); +} diff --git a/lustre/ptlrpc/gss/gss_mech_switch.c b/lustre/ptlrpc/gss/gss_mech_switch.c new file mode 100644 index 0000000..4e2b17e --- /dev/null +++ b/lustre/ptlrpc/gss/gss_mech_switch.c @@ -0,0 +1,344 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * linux/net/sunrpc/gss_mech_switch.c + * + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * J. Bruce Fields <bfields@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC +#ifdef __KERNEL__ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#else +#include <liblustre.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre/lustre_idl.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_sec.h> + +#include "gss_err.h" +#include "gss_internal.h" +#include "gss_api.h" + +static LIST_HEAD(registered_mechs); +static spinlock_t registered_mechs_lock = SPIN_LOCK_UNLOCKED; + +int lgss_mech_register(struct gss_api_mech *gm) +{ + spin_lock(®istered_mechs_lock); + list_add(&gm->gm_list, ®istered_mechs); + spin_unlock(®istered_mechs_lock); + CWARN("Register %s mechanism\n", gm->gm_name); + return 0; +} + +void lgss_mech_unregister(struct gss_api_mech *gm) +{ + spin_lock(®istered_mechs_lock); + list_del(&gm->gm_list); + spin_unlock(®istered_mechs_lock); + CWARN("Unregister %s mechanism\n", gm->gm_name); +} + + +struct gss_api_mech *lgss_mech_get(struct gss_api_mech *gm) +{ + __module_get(gm->gm_owner); + return gm; +} + +struct gss_api_mech *lgss_name_to_mech(char *name) +{ + struct gss_api_mech *pos, *gm = NULL; + + spin_lock(®istered_mechs_lock); + list_for_each_entry(pos, ®istered_mechs, gm_list) { + if (0 == strcmp(name, pos->gm_name)) { + if (!try_module_get(pos->gm_owner)) + continue; + gm = pos; + break; + } + } + spin_unlock(®istered_mechs_lock); + return gm; + +} + +static inline +int mech_supports_subflavor(struct gss_api_mech *gm, __u32 subflavor) +{ + int i; + + for (i = 0; i < gm->gm_sf_num; i++) { + if (gm->gm_sfs[i].sf_subflavor == subflavor) + return 1; + } + return 0; +} + +struct gss_api_mech *lgss_subflavor_to_mech(__u32 subflavor) +{ + struct gss_api_mech *pos, *gm = NULL; + + spin_lock(®istered_mechs_lock); + list_for_each_entry(pos, ®istered_mechs, gm_list) { + if (!try_module_get(pos->gm_owner)) + continue; + if (!mech_supports_subflavor(pos, subflavor)) { + module_put(pos->gm_owner); + continue; + } + gm = pos; + break; + } + spin_unlock(®istered_mechs_lock); + return gm; +} + +void lgss_mech_put(struct gss_api_mech *gm) +{ + module_put(gm->gm_owner); +} + +/* The mech could probably be determined from the token instead, but it's just + * as easy for now to pass it in. */ +__u32 lgss_import_sec_context(rawobj_t *input_token, + struct gss_api_mech *mech, + struct gss_ctx **ctx_id) +{ + OBD_ALLOC_PTR(*ctx_id); + if (*ctx_id == NULL) + return GSS_S_FAILURE; + + (*ctx_id)->mech_type = lgss_mech_get(mech); + + LASSERT(mech); + LASSERT(mech->gm_ops); + LASSERT(mech->gm_ops->gss_import_sec_context); + return mech->gm_ops->gss_import_sec_context(input_token, *ctx_id); +} + +__u32 lgss_copy_reverse_context(struct gss_ctx *ctx_id, + struct gss_ctx **ctx_id_new) +{ + struct gss_api_mech *mech = ctx_id->mech_type; + __u32 major; + + LASSERT(mech); + + OBD_ALLOC_PTR(*ctx_id_new); + if (*ctx_id_new == NULL) + return GSS_S_FAILURE; + + (*ctx_id_new)->mech_type = lgss_mech_get(mech); + + LASSERT(mech); + LASSERT(mech->gm_ops); + LASSERT(mech->gm_ops->gss_copy_reverse_context); + + major = mech->gm_ops->gss_copy_reverse_context(ctx_id, *ctx_id_new); + if (major != GSS_S_COMPLETE) { + lgss_mech_put(mech); + OBD_FREE_PTR(*ctx_id_new); + *ctx_id_new = NULL; + } + return major; +} + +/* + * this interface is much simplified, currently we only need endtime. + */ +__u32 lgss_inquire_context(struct gss_ctx *context_handle, + unsigned long *endtime) +{ + LASSERT(context_handle); + LASSERT(context_handle->mech_type); + LASSERT(context_handle->mech_type->gm_ops); + LASSERT(context_handle->mech_type->gm_ops->gss_inquire_context); + + return context_handle->mech_type->gm_ops + ->gss_inquire_context(context_handle, + endtime); +} + +/* gss_get_mic: compute a mic over message and return mic_token. */ +__u32 lgss_get_mic(struct gss_ctx *context_handle, + int msgcnt, + rawobj_t *msg, + rawobj_t *mic_token) +{ + LASSERT(context_handle); + LASSERT(context_handle->mech_type); + LASSERT(context_handle->mech_type->gm_ops); + LASSERT(context_handle->mech_type->gm_ops->gss_get_mic); + + return context_handle->mech_type->gm_ops + ->gss_get_mic(context_handle, + msgcnt, + msg, + mic_token); +} + +/* gss_verify_mic: check whether the provided mic_token verifies message. */ +__u32 lgss_verify_mic(struct gss_ctx *context_handle, + int msgcnt, + rawobj_t *msg, + rawobj_t *mic_token) +{ + LASSERT(context_handle); + LASSERT(context_handle->mech_type); + LASSERT(context_handle->mech_type->gm_ops); + LASSERT(context_handle->mech_type->gm_ops->gss_verify_mic); + + return context_handle->mech_type->gm_ops + ->gss_verify_mic(context_handle, + msgcnt, + msg, + mic_token); +} + +#if 0 +__u32 lgss_wrap(struct gss_ctx *context_handle, + __u32 qop, + rawobj_buf_t *inbuf, + rawobj_t *outbuf) +{ + LASSERT(context_handle); + LASSERT(context_handle->mech_type); + LASSERT(context_handle->mech_type->gm_ops); + LASSERT(context_handle->mech_type->gm_ops->gss_wrap); + + return context_handle->mech_type->gm_ops + ->gss_wrap(context_handle, qop, inbuf, outbuf); +} +#endif + +__u32 lgss_wrap(struct gss_ctx *context_handle, + rawobj_t *msg, + int msg_buflen, + rawobj_t *out_token) +{ + LASSERT(context_handle); + LASSERT(context_handle->mech_type); + LASSERT(context_handle->mech_type->gm_ops); + LASSERT(context_handle->mech_type->gm_ops->gss_wrap); + + return context_handle->mech_type->gm_ops + ->gss_wrap(context_handle, msg, msg_buflen, out_token); +} + +__u32 lgss_unwrap(struct gss_ctx *context_handle, + rawobj_t *token, + rawobj_t *out_msg) +{ + LASSERT(context_handle); + LASSERT(context_handle->mech_type); + LASSERT(context_handle->mech_type->gm_ops); + LASSERT(context_handle->mech_type->gm_ops->gss_unwrap); + + return context_handle->mech_type->gm_ops + ->gss_unwrap(context_handle, token, out_msg); +} + + +__u32 lgss_plain_encrypt(struct gss_ctx *ctx, + int length, + void *in_buf, + void *out_buf) +{ + LASSERT(ctx); + LASSERT(ctx->mech_type); + LASSERT(ctx->mech_type->gm_ops); + LASSERT(ctx->mech_type->gm_ops->gss_plain_encrypt); + + return ctx->mech_type->gm_ops + ->gss_plain_encrypt(ctx, length, in_buf, out_buf); +} + +/* gss_delete_sec_context: free all resources associated with context_handle. + * Note this differs from the RFC 2744-specified prototype in that we don't + * bother returning an output token, since it would never be used anyway. */ + +__u32 lgss_delete_sec_context(struct gss_ctx **context_handle) +{ + struct gss_api_mech *mech; + + CDEBUG(D_SEC, "deleting %p\n", *context_handle); + + if (!*context_handle) + return(GSS_S_NO_CONTEXT); + + mech = (*context_handle)->mech_type; + if ((*context_handle)->internal_ctx_id != 0) { + LASSERT(mech); + LASSERT(mech->gm_ops); + LASSERT(mech->gm_ops->gss_delete_sec_context); + mech->gm_ops->gss_delete_sec_context( + (*context_handle)->internal_ctx_id); + } + if (mech) + lgss_mech_put(mech); + + OBD_FREE_PTR(*context_handle); + *context_handle=NULL; + return GSS_S_COMPLETE; +} + +int lgss_display(struct gss_ctx *ctx, + char *buf, + int bufsize) +{ + LASSERT(ctx); + LASSERT(ctx->mech_type); + LASSERT(ctx->mech_type->gm_ops); + LASSERT(ctx->mech_type->gm_ops->gss_display); + + return ctx->mech_type->gm_ops->gss_display(ctx, buf, bufsize); +} diff --git a/lustre/ptlrpc/gss/gss_rawobj.c b/lustre/ptlrpc/gss/gss_rawobj.c new file mode 100644 index 0000000..847cb4d --- /dev/null +++ b/lustre/ptlrpc/gss/gss_rawobj.c @@ -0,0 +1,195 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre_sec.h> + +#include "gss_internal.h" + +int rawobj_alloc(rawobj_t *obj, char *buf, int len) +{ + LASSERT(obj); + LASSERT(len >= 0); + + obj->len = len; + if (len) { + OBD_ALLOC(obj->data, len); + if (!obj->data) { + obj->len = 0; + RETURN(-ENOMEM); + } + memcpy(obj->data, buf, len); + } else + obj->data = NULL; + return 0; +} + +void rawobj_free(rawobj_t *obj) +{ + LASSERT(obj); + + if (obj->len) { + LASSERT(obj->data); + OBD_FREE(obj->data, obj->len); + obj->len = 0; + obj->data = NULL; + } else + LASSERT(!obj->data); +} + +int rawobj_equal(rawobj_t *a, rawobj_t *b) +{ + LASSERT(a && b); + + return (a->len == b->len && + (!a->len || !memcmp(a->data, b->data, a->len))); +} + +int rawobj_dup(rawobj_t *dest, rawobj_t *src) +{ + LASSERT(src && dest); + + dest->len = src->len; + if (dest->len) { + OBD_ALLOC(dest->data, dest->len); + if (!dest->data) { + dest->len = 0; + return -ENOMEM; + } + memcpy(dest->data, src->data, dest->len); + } else + dest->data = NULL; + return 0; +} + +int rawobj_serialize(rawobj_t *obj, __u32 **buf, __u32 *buflen) +{ + __u32 len; + + LASSERT(obj); + LASSERT(buf); + LASSERT(buflen); + + len = size_round4(obj->len); + + if (*buflen < 4 + len) { + CERROR("buflen %u < %u\n", *buflen, 4 + len); + return -EINVAL; + } + + *(*buf)++ = cpu_to_le32(obj->len); + memcpy(*buf, obj->data, obj->len); + *buf += (len >> 2); + *buflen -= (4 + len); + + return 0; +} + +static int __rawobj_extract(rawobj_t *obj, __u32 **buf, __u32 *buflen, + int alloc, int local) +{ + __u32 len; + + if (*buflen < sizeof(__u32)) { + CERROR("buflen %u\n", *buflen); + return -EINVAL; + } + + obj->len = *(*buf)++; + if (!local) + obj->len = le32_to_cpu(obj->len); + *buflen -= sizeof(__u32); + + if (!obj->len) { + obj->data = NULL; + return 0; + } + + len = local ? obj->len : size_round4(obj->len); + if (*buflen < len) { + CERROR("buflen %u < %u\n", *buflen, len); + obj->len = 0; + return -EINVAL; + } + + if (!alloc) + obj->data = (__u8 *) *buf; + else { + OBD_ALLOC(obj->data, obj->len); + if (!obj->data) { + CERROR("fail to alloc %u bytes\n", obj->len); + obj->len = 0; + return -ENOMEM; + } + memcpy(obj->data, *buf, obj->len); + } + + *((char **)buf) += len; + *buflen -= len; + + return 0; +} + +int rawobj_extract(rawobj_t *obj, __u32 **buf, __u32 *buflen) +{ + return __rawobj_extract(obj, buf, buflen, 0, 0); +} + +int rawobj_extract_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen) +{ + return __rawobj_extract(obj, buf, buflen, 1, 0); +} + +int rawobj_extract_local(rawobj_t *obj, __u32 **buf, __u32 *buflen) +{ + return __rawobj_extract(obj, buf, buflen, 0, 1); +} + +int rawobj_from_netobj(rawobj_t *rawobj, netobj_t *netobj) +{ + rawobj->len = netobj->len; + rawobj->data = netobj->data; + return 0; +} + +int rawobj_from_netobj_alloc(rawobj_t *rawobj, netobj_t *netobj) +{ + rawobj->len = 0; + rawobj->data = NULL; + + if (netobj->len == 0) + return 0; + + OBD_ALLOC(rawobj->data, netobj->len); + if (rawobj->data == NULL) + return -ENOMEM; + + rawobj->len = netobj->len; + memcpy(rawobj->data, netobj->data, netobj->len); + return 0; +} diff --git a/lustre/ptlrpc/gss/gss_svc_upcall.c b/lustre/ptlrpc/gss/gss_svc_upcall.c new file mode 100644 index 0000000..bc6c4f0 --- /dev/null +++ b/lustre/ptlrpc/gss/gss_svc_upcall.c @@ -0,0 +1,998 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * Neil Brown <neilb@cse.unsw.edu.au> + * J. Bruce Fields <bfields@umich.edu> + * Andy Adamson <andros@umich.edu> + * Dug Song <dugsong@monkey.org> + * + * RPCSEC_GSS server authentication. + * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078 + * (gssapi) + * + * The RPCSEC_GSS involves three stages: + * 1/ context creation + * 2/ data exchange + * 3/ context destruction + * + * Context creation is handled largely by upcalls to user-space. + * In particular, GSS_Accept_sec_context is handled by an upcall + * Data exchange is handled entirely within the kernel + * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel. + * Context destruction is handled in-kernel + * GSS_Delete_sec_context is in-kernel + * + * Context creation is initiated by a RPCSEC_GSS_INIT request arriving. + * The context handle and gss_token are used as a key into the rpcsec_init cache. + * The content of this cache includes some of the outputs of GSS_Accept_sec_context, + * being major_status, minor_status, context_handle, reply_token. + * These are sent back to the client. + * Sequence window management is handled by the kernel. The window size if currently + * a compile time constant. + * + * When user-space is happy that a context is established, it places an entry + * in the rpcsec_context cache. The key for this cache is the context_handle. + * The content includes: + * uid/gidlist - for determining access rights + * mechanism type + * mechanism specific information, such as a key + * + */ + +#define DEBUG_SUBSYSTEM S_SEC +#ifdef __KERNEL__ +#include <linux/types.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/hash.h> +#else +#include <liblustre.h> +#endif + +#include <linux/sunrpc/cache.h> + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre/lustre_idl.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_sec.h> + +#include "gss_err.h" +#include "gss_internal.h" +#include "gss_api.h" + +#define GSS_SVC_UPCALL_TIMEOUT (20) + +static spinlock_t __ctx_index_lock = SPIN_LOCK_UNLOCKED; +static __u64 __ctx_index; + +__u64 gss_get_next_ctx_index(void) +{ + __u64 idx; + + spin_lock(&__ctx_index_lock); + idx = __ctx_index++; + spin_unlock(&__ctx_index_lock); + + return idx; +} + +static inline +unsigned long hash_mem(char *buf, int length, int bits) +{ + unsigned long hash = 0; + unsigned long l = 0; + int len = 0; + unsigned char c; + + do { + if (len == length) { + c = (char) len; + len = -1; + } else + c = *buf++; + + l = (l << 8) | c; + len++; + + if ((len & (BITS_PER_LONG/8-1)) == 0) + hash = hash_long(hash^l, BITS_PER_LONG); + } while (len); + + return hash >> (BITS_PER_LONG - bits); +} + +/**************************************** + * rsi cache * + ****************************************/ + +#define RSI_HASHBITS (6) +#define RSI_HASHMAX (1 << RSI_HASHBITS) +#define RSI_HASHMASK (RSI_HASHMAX - 1) + +struct rsi { + struct cache_head h; + __u32 lustre_svc; + __u64 nid; + wait_queue_head_t waitq; + rawobj_t in_handle, in_token; + rawobj_t out_handle, out_token; + int major_status, minor_status; +}; + +static struct cache_head *rsi_table[RSI_HASHMAX]; +static struct cache_detail rsi_cache; +static struct rsi *rsi_lookup(struct rsi *item, int set); + +static +void rsi_free(struct rsi *rsi) +{ + rawobj_free(&rsi->in_handle); + rawobj_free(&rsi->in_token); + rawobj_free(&rsi->out_handle); + rawobj_free(&rsi->out_token); +} + +static +void rsi_put(struct cache_head *item, struct cache_detail *cd) +{ + struct rsi *rsi = container_of(item, struct rsi, h); + + LASSERT(atomic_read(&item->refcnt) > 0); + + if (cache_put(item, cd)) { + LASSERT(item->next == NULL); + rsi_free(rsi); + kfree(rsi); /* created by cache mgmt using kmalloc */ + } +} + +static inline +int rsi_hash(struct rsi *item) +{ + return hash_mem((char *)item->in_handle.data, item->in_handle.len, + RSI_HASHBITS) ^ + hash_mem((char *)item->in_token.data, item->in_token.len, + RSI_HASHBITS); +} + +static inline +int rsi_match(struct rsi *item, struct rsi *tmp) +{ + return (rawobj_equal(&item->in_handle, &tmp->in_handle) && + rawobj_equal(&item->in_token, &tmp->in_token)); +} + +static +void rsi_request(struct cache_detail *cd, + struct cache_head *h, + char **bpp, int *blen) +{ + struct rsi *rsi = container_of(h, struct rsi, h); + __u64 index = 0; + + /* if in_handle is null, provide kernel suggestion */ + if (rsi->in_handle.len == 0) + index = gss_get_next_ctx_index(); + + qword_addhex(bpp, blen, (char *) &rsi->lustre_svc, + sizeof(rsi->lustre_svc)); + qword_addhex(bpp, blen, (char *) &rsi->nid, sizeof(rsi->nid)); + qword_addhex(bpp, blen, (char *) &index, sizeof(index)); + qword_addhex(bpp, blen, rsi->in_handle.data, rsi->in_handle.len); + qword_addhex(bpp, blen, rsi->in_token.data, rsi->in_token.len); + (*bpp)[-1] = '\n'; +} + +static inline +void rsi_init(struct rsi *new, struct rsi *item) +{ + new->out_handle = RAWOBJ_EMPTY; + new->out_token = RAWOBJ_EMPTY; + + new->in_handle = item->in_handle; + item->in_handle = RAWOBJ_EMPTY; + new->in_token = item->in_token; + item->in_token = RAWOBJ_EMPTY; + + new->lustre_svc = item->lustre_svc; + new->nid = item->nid; + init_waitqueue_head(&new->waitq); +} + +static inline +void rsi_update(struct rsi *new, struct rsi *item) +{ + LASSERT(new->out_handle.len == 0); + LASSERT(new->out_token.len == 0); + + new->out_handle = item->out_handle; + item->out_handle = RAWOBJ_EMPTY; + new->out_token = item->out_token; + item->out_token = RAWOBJ_EMPTY; + + new->major_status = item->major_status; + new->minor_status = item->minor_status; +} + +static +int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) +{ + char *buf = mesg; + char *ep; + int len; + struct rsi rsii, *rsip = NULL; + time_t expiry; + int status = -EINVAL; + ENTRY; + + + memset(&rsii, 0, sizeof(rsii)); + + /* handle */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + if (rawobj_alloc(&rsii.in_handle, buf, len)) { + status = -ENOMEM; + goto out; + } + + /* token */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + if (rawobj_alloc(&rsii.in_token, buf, len)) { + status = -ENOMEM; + goto out; + } + + /* expiry */ + expiry = get_expiry(&mesg); + if (expiry == 0) + goto out; + + len = qword_get(&mesg, buf, mlen); + if (len <= 0) + goto out; + + /* major */ + rsii.major_status = simple_strtol(buf, &ep, 10); + if (*ep) + goto out; + + /* minor */ + len = qword_get(&mesg, buf, mlen); + if (len <= 0) + goto out; + rsii.minor_status = simple_strtol(buf, &ep, 10); + if (*ep) + goto out; + + /* out_handle */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + if (rawobj_alloc(&rsii.out_handle, buf, len)) { + status = -ENOMEM; + goto out; + } + + /* out_token */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + if (rawobj_alloc(&rsii.out_token, buf, len)) { + status = -ENOMEM; + goto out; + } + + rsii.h.expiry_time = expiry; + rsip = rsi_lookup(&rsii, 1); + status = 0; +out: + rsi_free(&rsii); + if (rsip) { + wake_up_all(&rsip->waitq); + rsi_put(&rsip->h, &rsi_cache); + } + + if (status) + CERROR("rsi parse error %d\n", status); + RETURN(status); +} + +static struct cache_detail rsi_cache = { + .hash_size = RSI_HASHMAX, + .hash_table = rsi_table, + .name = "auth.ptlrpcs.init", + .cache_put = rsi_put, + .cache_request = rsi_request, + .cache_parse = rsi_parse, +}; + +static DefineSimpleCacheLookup(rsi, 0) + +/**************************************** + * rsc cache * + ****************************************/ + +#define RSC_HASHBITS (10) +#define RSC_HASHMAX (1 << RSC_HASHBITS) +#define RSC_HASHMASK (RSC_HASHMAX - 1) + +struct rsc { + struct cache_head h; + struct obd_device *target; + rawobj_t handle; + struct gss_svc_ctx ctx; +}; + +static struct cache_head *rsc_table[RSC_HASHMAX]; +static struct cache_detail rsc_cache; +static struct rsc *rsc_lookup(struct rsc *item, int set); + +static +void rsc_free(struct rsc *rsci) +{ + rawobj_free(&rsci->handle); + rawobj_free(&rsci->ctx.gsc_rvs_hdl); + lgss_delete_sec_context(&rsci->ctx.gsc_mechctx); +} + +static +void rsc_put(struct cache_head *item, struct cache_detail *cd) +{ + struct rsc *rsci = container_of(item, struct rsc, h); + + LASSERT(atomic_read(&item->refcnt) > 0); + + if (cache_put(item, cd)) { + LASSERT(item->next == NULL); + rsc_free(rsci); + kfree(rsci); /* created by cache mgmt using kmalloc */ + } +} + +static inline +int rsc_hash(struct rsc *rsci) +{ + return hash_mem((char *)rsci->handle.data, + rsci->handle.len, RSC_HASHBITS); +} + +static inline +int rsc_match(struct rsc *new, struct rsc *tmp) +{ + return rawobj_equal(&new->handle, &tmp->handle); +} + +static inline +void rsc_init(struct rsc *new, struct rsc *tmp) +{ + new->handle = tmp->handle; + tmp->handle = RAWOBJ_EMPTY; + + new->target = NULL; + memset(&new->ctx, 0, sizeof(new->ctx)); + new->ctx.gsc_rvs_hdl = RAWOBJ_EMPTY; +} + +static inline +void rsc_update(struct rsc *new, struct rsc *tmp) +{ + new->ctx = tmp->ctx; + tmp->ctx.gsc_rvs_hdl = RAWOBJ_EMPTY; + tmp->ctx.gsc_mechctx = NULL; + + memset(&new->ctx.gsc_seqdata, 0, sizeof(new->ctx.gsc_seqdata)); + spin_lock_init(&new->ctx.gsc_seqdata.ssd_lock); +} + +static +int rsc_parse(struct cache_detail *cd, char *mesg, int mlen) +{ + char *buf = mesg; + int len, rv, tmp_int; + struct rsc rsci, *rscp = NULL; + time_t expiry; + int status = -EINVAL; + + memset(&rsci, 0, sizeof(rsci)); + + /* context handle */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) goto out; + status = -ENOMEM; + if (rawobj_alloc(&rsci.handle, buf, len)) + goto out; + + rsci.h.flags = 0; + /* expiry */ + expiry = get_expiry(&mesg); + status = -EINVAL; + if (expiry == 0) + goto out; + + /* remote flag */ + rv = get_int(&mesg, &tmp_int); + if (rv) { + CERROR("fail to get remote flag\n"); + goto out; + } + rsci.ctx.gsc_remote = (tmp_int != 0); + + /* root user flag */ + rv = get_int(&mesg, &tmp_int); + if (rv) { + CERROR("fail to get oss user flag\n"); + goto out; + } + rsci.ctx.gsc_usr_root = (tmp_int != 0); + + /* mds user flag */ + rv = get_int(&mesg, &tmp_int); + if (rv) { + CERROR("fail to get mds user flag\n"); + goto out; + } + rsci.ctx.gsc_usr_mds = (tmp_int != 0); + + /* mapped uid */ + rv = get_int(&mesg, (int *) &rsci.ctx.gsc_mapped_uid); + if (rv) { + CERROR("fail to get mapped uid\n"); + goto out; + } + + /* uid, or NEGATIVE */ + rv = get_int(&mesg, (int *) &rsci.ctx.gsc_uid); + if (rv == -EINVAL) + goto out; + if (rv == -ENOENT) { + CERROR("NOENT? set rsc entry negative\n"); + set_bit(CACHE_NEGATIVE, &rsci.h.flags); + } else { + struct gss_api_mech *gm; + rawobj_t tmp_buf; + unsigned long ctx_expiry; + + /* gid */ + if (get_int(&mesg, (int *) &rsci.ctx.gsc_gid)) + goto out; + + /* mech name */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + gm = lgss_name_to_mech(buf); + status = -EOPNOTSUPP; + if (!gm) + goto out; + + status = -EINVAL; + /* mech-specific data: */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) { + lgss_mech_put(gm); + goto out; + } + tmp_buf.len = len; + tmp_buf.data = (unsigned char *)buf; + if (lgss_import_sec_context(&tmp_buf, gm, + &rsci.ctx.gsc_mechctx)) { + lgss_mech_put(gm); + goto out; + } + + /* currently the expiry time passed down from user-space + * is invalid, here we retrive it from mech. + */ + if (lgss_inquire_context(rsci.ctx.gsc_mechctx, &ctx_expiry)) { + CERROR("unable to get expire time, drop it\n"); + lgss_mech_put(gm); + goto out; + } + expiry = (time_t) ctx_expiry; + + lgss_mech_put(gm); + } + + rsci.h.expiry_time = expiry; + rscp = rsc_lookup(&rsci, 1); + status = 0; +out: + rsc_free(&rsci); + if (rscp) + rsc_put(&rscp->h, &rsc_cache); + + if (status) + CERROR("parse rsc error %d\n", status); + return status; +} + +/**************************************** + * rsc cache flush * + ****************************************/ + +typedef int rsc_entry_match(struct rsc *rscp, long data); + +static +void rsc_flush(rsc_entry_match *match, long data) +{ + struct cache_head **ch; + struct rsc *rscp; + int n; + ENTRY; + + write_lock(&rsc_cache.hash_lock); + for (n = 0; n < RSC_HASHMAX; n++) { + for (ch = &rsc_cache.hash_table[n]; *ch;) { + rscp = container_of(*ch, struct rsc, h); + + if (!match(rscp, data)) { + ch = &((*ch)->next); + continue; + } + + /* it seems simply set NEGATIVE doesn't work */ + *ch = (*ch)->next; + rscp->h.next = NULL; + cache_get(&rscp->h); + set_bit(CACHE_NEGATIVE, &rscp->h.flags); + rsc_put(&rscp->h, &rsc_cache); + rsc_cache.entries--; + } + } + write_unlock(&rsc_cache.hash_lock); + EXIT; +} + +static +int match_uid(struct rsc *rscp, long uid) +{ + if ((int) uid == -1) + return 1; + return ((int) rscp->ctx.gsc_uid == (int) uid); +} + +static +int match_target(struct rsc *rscp, long target) +{ + return (rscp->target == (struct obd_device *) target); +} + +static inline +void rsc_flush_uid(int uid) +{ + if (uid == -1) + CWARN("flush all gss contexts...\n"); + + rsc_flush(match_uid, (long) uid); +} + +static inline +void rsc_flush_target(struct obd_device *target) +{ + rsc_flush(match_target, (long) target); +} + +void gss_secsvc_flush(struct obd_device *target) +{ + rsc_flush_target(target); +} +EXPORT_SYMBOL(gss_secsvc_flush); + +static struct cache_detail rsc_cache = { + .hash_size = RSC_HASHMAX, + .hash_table = rsc_table, + .name = "auth.ptlrpcs.context", + .cache_put = rsc_put, + .cache_parse = rsc_parse, +}; + +static DefineSimpleCacheLookup(rsc, 0); + +static +struct rsc *gss_svc_searchbyctx(rawobj_t *handle) +{ + struct rsc rsci; + struct rsc *found; + + memset(&rsci, 0, sizeof(rsci)); + if (rawobj_dup(&rsci.handle, handle)) + return NULL; + + found = rsc_lookup(&rsci, 0); + rsc_free(&rsci); + if (!found) + return NULL; + if (cache_check(&rsc_cache, &found->h, NULL)) + return NULL; + return found; +} + +int gss_svc_upcall_install_rvs_ctx(struct obd_import *imp, + struct gss_sec *gsec, + struct gss_cli_ctx *gctx) +{ + struct rsc rsci, *rscp; + unsigned long ctx_expiry; + __u32 major; + ENTRY; + + memset(&rsci, 0, sizeof(rsci)); + + if (rawobj_alloc(&rsci.handle, (char *) &gsec->gs_rvs_hdl, + sizeof(gsec->gs_rvs_hdl))) { + CERROR("unable alloc handle\n"); + RETURN(-ENOMEM); + } + + major = lgss_copy_reverse_context(gctx->gc_mechctx, + &rsci.ctx.gsc_mechctx); + if (major != GSS_S_COMPLETE) { + CERROR("unable to copy reverse context\n"); + rsc_free(&rsci); + RETURN(-ENOMEM); + } + + if (lgss_inquire_context(rsci.ctx.gsc_mechctx, &ctx_expiry)) { + CERROR("unable to get expire time, drop it\n"); + rsc_free(&rsci); + RETURN(-EINVAL); + } + + rsci.h.expiry_time = (time_t) ctx_expiry; + rsci.target = imp->imp_obd; + + rscp = rsc_lookup(&rsci, 1); + rsc_free(&rsci); + if (rscp) + rsc_put(&rscp->h, &rsc_cache); + + CWARN("client installed reverse svc ctx to %s: idx "LPX64"\n", + imp->imp_obd->u.cli.cl_target_uuid.uuid, + gsec->gs_rvs_hdl); + + imp->imp_next_reconnect = gss_round_imp_reconnect(ctx_expiry); + CWARN("import(%s) to %s: set force reconnect at %lu(%lds valid time)\n", + ptlrpc_import_state_name(imp->imp_state), + imp->imp_obd->u.cli.cl_target_uuid.uuid, + imp->imp_next_reconnect, + (long) (imp->imp_next_reconnect - get_seconds())); + + RETURN(0); +} + +#if 0 +static int +gss_svc_unseal_request(struct ptlrpc_request *req, + struct rsc *rsci, + struct gss_wire_cred *gc, + __u32 *vp, __u32 vlen) +{ + struct ptlrpcs_wire_hdr *sec_hdr; + struct gss_ctx *ctx = rsci->mechctx; + rawobj_t cipher_text, plain_text; + __u32 major; + ENTRY; + + sec_hdr = (struct ptlrpcs_wire_hdr *) req->rq_reqbuf; + + if (vlen < 4) { + CERROR("vlen only %u\n", vlen); + RETURN(GSS_S_CALL_BAD_STRUCTURE); + } + + cipher_text.len = le32_to_cpu(*vp++); + cipher_text.data = (__u8 *) vp; + vlen -= 4; + + if (cipher_text.len > vlen) { + CERROR("cipher claimed %u while buf only %u\n", + cipher_text.len, vlen); + RETURN(GSS_S_CALL_BAD_STRUCTURE); + } + + plain_text = cipher_text; + + major = lgss_unwrap(ctx, GSS_C_QOP_DEFAULT, &cipher_text, &plain_text); + if (major) { + CERROR("unwrap error 0x%x\n", major); + RETURN(major); + } + + if (gss_check_seq_num(&rsci->seqdata, gc->gc_seq)) { + CERROR("discard replayed request %p(o%u,x"LPU64",t"LPU64")\n", + req, req->rq_reqmsg->opc, req->rq_xid, + req->rq_reqmsg->transno); + RETURN(GSS_S_DUPLICATE_TOKEN); + } + + req->rq_reqmsg = (struct lustre_msg *) (vp); + req->rq_reqlen = plain_text.len; + + CDEBUG(D_SEC, "msg len %d\n", req->rq_reqlen); + + RETURN(GSS_S_COMPLETE); +} +#endif + +static +struct cache_deferred_req* cache_upcall_defer(struct cache_req *req) +{ + return NULL; +} +static struct cache_req cache_upcall_chandle = { cache_upcall_defer }; + +int gss_svc_upcall_handle_init(struct ptlrpc_request *req, + struct gss_svc_reqctx *grctx, + struct gss_wire_ctx *gw, + struct obd_device *target, + __u32 lustre_svc, + rawobj_t *rvs_hdl, + rawobj_t *in_token) +{ + struct ptlrpc_reply_state *rs; + struct rsc *rsci = NULL; + struct rsi *rsip = NULL, rsikey; + wait_queue_t wait; + int replen = sizeof(struct ptlrpc_body); + struct gss_rep_header *rephdr; + int first_check = 1; + int rc = SECSVC_DROP; + ENTRY; + + memset(&rsikey, 0, sizeof(rsikey)); + rsikey.lustre_svc = lustre_svc; + rsikey.nid = (__u64) req->rq_peer.nid; + + /* duplicate context handle. for INIT it always 0 */ + if (rawobj_dup(&rsikey.in_handle, &gw->gw_handle)) { + CERROR("fail to dup context handle\n"); + GOTO(out, rc); + } + + if (rawobj_dup(&rsikey.in_token, in_token)) { + CERROR("can't duplicate token\n"); + rawobj_free(&rsikey.in_handle); + GOTO(out, rc); + } + + rsip = rsi_lookup(&rsikey, 0); + rsi_free(&rsikey); + if (!rsip) { + CERROR("error in rsi_lookup.\n"); + + if (!gss_pack_err_notify(req, GSS_S_FAILURE, 0)) + rc = SECSVC_COMPLETE; + + GOTO(out, rc); + } + + cache_get(&rsip->h); /* take an extra ref */ + init_waitqueue_head(&rsip->waitq); + init_waitqueue_entry(&wait, current); + add_wait_queue(&rsip->waitq, &wait); + +cache_check: + /* Note each time cache_check() will drop a reference if return + * non-zero. We hold an extra reference on initial rsip, but must + * take care of following calls. + */ + rc = cache_check(&rsi_cache, &rsip->h, &cache_upcall_chandle); + switch (rc) { + case -EAGAIN: { + int valid; + + if (first_check) { + first_check = 0; + + read_lock(&rsi_cache.hash_lock); + valid = test_bit(CACHE_VALID, &rsip->h.flags); + if (valid == 0) + set_current_state(TASK_INTERRUPTIBLE); + read_unlock(&rsi_cache.hash_lock); + + if (valid == 0) + schedule_timeout(GSS_SVC_UPCALL_TIMEOUT * HZ); + + cache_get(&rsip->h); + goto cache_check; + } + CWARN("waited %ds timeout, drop\n", GSS_SVC_UPCALL_TIMEOUT); + break; + } + case -ENOENT: + CWARN("cache_check return ENOENT, drop\n"); + break; + case 0: + /* if not the first check, we have to release the extra + * reference we just added on it. + */ + if (!first_check) + cache_put(&rsip->h, &rsi_cache); + CDEBUG(D_SEC, "cache_check is good\n"); + break; + } + + remove_wait_queue(&rsip->waitq, &wait); + cache_put(&rsip->h, &rsi_cache); + + if (rc) + GOTO(out, rc = SECSVC_DROP); + + rc = SECSVC_DROP; + rsci = gss_svc_searchbyctx(&rsip->out_handle); + if (!rsci) { + CERROR("authentication failed\n"); + + if (!gss_pack_err_notify(req, GSS_S_FAILURE, 0)) + rc = SECSVC_COMPLETE; + + GOTO(out, rc); + } else { + cache_get(&rsci->h); + grctx->src_ctx = &rsci->ctx; + } + + if (rawobj_dup(&rsci->ctx.gsc_rvs_hdl, rvs_hdl)) { + CERROR("failed duplicate reverse handle\n"); + GOTO(out, rc); + } + + rsci->target = target; + + CWARN("server create rsc %p(%u->%s)\n", + rsci, rsci->ctx.gsc_uid, libcfs_nid2str(req->rq_peer.nid)); + + if (rsip->out_handle.len > PTLRPC_GSS_MAX_HANDLE_SIZE) { + CERROR("handle size %u too large\n", rsip->out_handle.len); + GOTO(out, rc = SECSVC_DROP); + } + + grctx->src_init = 1; + grctx->src_reserve_len = size_round4(rsip->out_token.len); + + rc = lustre_pack_reply_v2(req, 1, &replen, NULL); + if (rc) { + CERROR("failed to pack reply: %d\n", rc); + GOTO(out, rc = SECSVC_DROP); + } + + rs = req->rq_reply_state; + LASSERT(rs->rs_repbuf->lm_bufcount == 3); + LASSERT(rs->rs_repbuf->lm_buflens[0] >= + sizeof(*rephdr) + rsip->out_handle.len); + LASSERT(rs->rs_repbuf->lm_buflens[2] >= rsip->out_token.len); + + rephdr = lustre_msg_buf(rs->rs_repbuf, 0, 0); + rephdr->gh_version = PTLRPC_GSS_VERSION; + rephdr->gh_flags = 0; + rephdr->gh_proc = PTLRPC_GSS_PROC_ERR; + rephdr->gh_major = rsip->major_status; + rephdr->gh_minor = rsip->minor_status; + rephdr->gh_seqwin = GSS_SEQ_WIN; + rephdr->gh_handle.len = rsip->out_handle.len; + memcpy(rephdr->gh_handle.data, rsip->out_handle.data, + rsip->out_handle.len); + + memcpy(lustre_msg_buf(rs->rs_repbuf, 2, 0), rsip->out_token.data, + rsip->out_token.len); + + rs->rs_repdata_len = lustre_shrink_msg(rs->rs_repbuf, 2, + rsip->out_token.len, 0); + + if (rsci->ctx.gsc_usr_mds) + CWARN("user from %s authenticated as mds\n", + libcfs_nid2str(req->rq_peer.nid)); + + rc = SECSVC_OK; + +out: + /* it looks like here we should put rsip also, but this mess up + * with NFS cache mgmt code... FIXME + */ +#if 0 + if (rsip) + rsi_put(&rsip->h, &rsi_cache); +#endif + + if (rsci) { + /* if anything went wrong, we don't keep the context too */ + if (rc != SECSVC_OK) + set_bit(CACHE_NEGATIVE, &rsci->h.flags); + + rsc_put(&rsci->h, &rsc_cache); + } + RETURN(rc); +} + +struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req, + struct gss_wire_ctx *gw) +{ + struct rsc *rsc; + + rsc = gss_svc_searchbyctx(&gw->gw_handle); + if (!rsc) { + CWARN("Invalid gss context handle from %s\n", + libcfs_nid2str(req->rq_peer.nid)); + return NULL; + } + + return &rsc->ctx; +} + +void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx) +{ + struct rsc *rsc = container_of(ctx, struct rsc, ctx); + + rsc_put(&rsc->h, &rsc_cache); +} + +void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx) +{ + struct rsc *rsc = container_of(ctx, struct rsc, ctx); + + set_bit(CACHE_NEGATIVE, &rsc->h.flags); +} + +int __init gss_svc_init_upcall(void) +{ + int i; + + cache_register(&rsi_cache); + cache_register(&rsc_cache); + + /* FIXME this looks stupid. we intend to give lsvcgssd a chance to open + * the init upcall channel, otherwise there's big chance that the first + * upcall issued before the channel be opened thus nfsv4 cache code will + * drop the request direclty, thus lead to unnecessary recovery time. + * here we wait at miximum 1.5 seconds. + */ + for (i = 0; i < 6; i++) { + if (atomic_read(&rsi_cache.readers) > 0) + break; + set_current_state(TASK_UNINTERRUPTIBLE); + LASSERT(HZ >= 4); + schedule_timeout(HZ / 4); + } + + if (atomic_read(&rsi_cache.readers) == 0) + CWARN("Init channel is not opened by lsvcgssd, following " + "request might be dropped until lsvcgssd is active\n"); + + /* + * this helps reducing context index confliction. after server reboot, + * conflicting request from clients might be filtered out by initial + * sequence number checking, thus no chance to sent error notification + * back to clients. + */ + get_random_bytes(&__ctx_index, sizeof(__ctx_index)); + + return 0; +} + +void __exit gss_svc_exit_upcall(void) +{ + int rc; + + cache_purge(&rsi_cache); + if ((rc = cache_unregister(&rsi_cache))) + CERROR("unregister rsi cache: %d\n", rc); + + cache_purge(&rsc_cache); + if ((rc = cache_unregister(&rsc_cache))) + CERROR("unregister rsc cache: %d\n", rc); +} diff --git a/lustre/ptlrpc/gss/lproc_gss.c b/lustre/ptlrpc/gss/lproc_gss.c new file mode 100644 index 0000000..df50946 --- /dev/null +++ b/lustre/ptlrpc/gss/lproc_gss.c @@ -0,0 +1,165 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * Author Peter Braam <braam@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC +#ifdef __KERNEL__ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/dcache.h> +#include <linux/fs.h> +#include <linux/random.h> +#else +#include <liblustre.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre/lustre_idl.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lprocfs_status.h> +#include <lustre_sec.h> + +#include "gss_err.h" +#include "gss_internal.h" +#include "gss_api.h" + +static struct proc_dir_entry *gss_proc_root = NULL; + +/* + * statistic of "out-of-sequence-window" + */ +static struct { + spinlock_t oos_lock; + atomic_t oos_cli_count; /* client occurrence */ + int oos_cli_behind; /* client max seqs behind */ + atomic_t oos_svc_replay[3]; /* server replay detected */ + atomic_t oos_svc_pass[3]; /* server verified ok */ +} gss_stat_oos = { + .oos_lock = SPIN_LOCK_UNLOCKED, + .oos_cli_count = ATOMIC_INIT(0), + .oos_cli_behind = 0, + .oos_svc_replay = { ATOMIC_INIT(0), }, + .oos_svc_pass = { ATOMIC_INIT(0), }, +}; + +void gss_stat_oos_record_cli(int behind) +{ + atomic_inc(&gss_stat_oos.oos_cli_count); + + spin_lock(&gss_stat_oos.oos_lock); + if (behind > gss_stat_oos.oos_cli_behind) + gss_stat_oos.oos_cli_behind = behind; + spin_unlock(&gss_stat_oos.oos_lock); +} + +void gss_stat_oos_record_svc(int phase, int replay) +{ + LASSERT(phase >= 0 && phase <= 2); + + if (replay) + atomic_inc(&gss_stat_oos.oos_svc_replay[phase]); + else + atomic_inc(&gss_stat_oos.oos_svc_pass[phase]); +} + +static int gss_proc_read_oos(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int written; + + written = snprintf(page, count, + "seqwin: %u\n" + "backwin: %u\n" + "client fall behind seqwin\n" + " occurrence: %d\n" + " max seq behind: %d\n" + "server replay detected:\n" + " phase 0: %d\n" + " phase 1: %d\n" + " phase 2: %d\n" + "server verify ok:\n" + " phase 2: %d\n", + GSS_SEQ_WIN_MAIN, + GSS_SEQ_WIN_BACK, + atomic_read(&gss_stat_oos.oos_cli_count), + gss_stat_oos.oos_cli_behind, + atomic_read(&gss_stat_oos.oos_svc_replay[0]), + atomic_read(&gss_stat_oos.oos_svc_replay[1]), + atomic_read(&gss_stat_oos.oos_svc_replay[2]), + atomic_read(&gss_stat_oos.oos_svc_pass[2])); + + return written; +} + +static int gss_proc_write_secinit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int rc; + + rc = gss_do_ctx_init_rpc((char *) buffer, count); + if (rc) { + LASSERT(rc < 0); + return rc; + } + + return ((int) count); +} + +static struct lprocfs_vars gss_lprocfs_vars[] = { + { "replays", gss_proc_read_oos, NULL }, + { "init_channel", NULL, gss_proc_write_secinit, NULL }, + { NULL } +}; + +int gss_init_lproc(void) +{ + int rc; + gss_proc_root = lprocfs_register("gss", sptlrpc_proc_root, + gss_lprocfs_vars, NULL); + + if (IS_ERR(gss_proc_root)) { + rc = PTR_ERR(gss_proc_root); + gss_proc_root = NULL; + CERROR("failed to initialize lproc entries: %d\n", rc); + return rc; + } + + return 0; +} + +void gss_exit_lproc(void) +{ + if (gss_proc_root) { + lprocfs_remove(&gss_proc_root); + gss_proc_root = NULL; + } +} diff --git a/lustre/ptlrpc/gss/sec_gss.c b/lustre/ptlrpc/gss/sec_gss.c new file mode 100644 index 0000000..121a5de --- /dev/null +++ b/lustre/ptlrpc/gss/sec_gss.c @@ -0,0 +1,2608 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Modifications for Lustre + * Copyright 2004 - 2006, Cluster File Systems, Inc. + * All rights reserved + * Author: Eric Mei <ericm@clusterfs.com> + */ + +/* + * linux/net/sunrpc/auth_gss.c + * + * RPCSEC_GSS client authentication. + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + * Dug Song <dugsong@monkey.org> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC +#ifdef __KERNEL__ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/dcache.h> +#include <linux/fs.h> +#include <linux/random.h> +#include <asm/atomic.h> +#else +#include <liblustre.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre/lustre_idl.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_sec.h> + +#include "gss_err.h" +#include "gss_internal.h" +#include "gss_api.h" + +#include <linux/crypto.h> + +/* pre-definition */ +static struct ptlrpc_sec_policy gss_policy; +static struct ptlrpc_cli_ctx * gss_sec_create_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred); +static void gss_sec_destroy_ctx(struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx); +/******************************************** + * wire data swabber * + ********************************************/ + +static +void gss_header_swabber(struct gss_header *ghdr) +{ + __swab32s(&ghdr->gh_version); + __swab32s(&ghdr->gh_flags); + __swab32s(&ghdr->gh_proc); + __swab32s(&ghdr->gh_seq); + __swab32s(&ghdr->gh_svc); + __swab32s(&ghdr->gh_pad1); + __swab32s(&ghdr->gh_pad2); + __swab32s(&ghdr->gh_pad3); + __swab32s(&ghdr->gh_handle.len); +} + +struct gss_header *gss_swab_header(struct lustre_msg *msg, int segment) +{ + struct gss_header *ghdr; + + ghdr = lustre_swab_buf(msg, segment, sizeof(*ghdr), + gss_header_swabber); + + if (ghdr && + sizeof(*ghdr) + ghdr->gh_handle.len > msg->lm_buflens[segment]) { + CERROR("gss header require length %u, now %u received\n", + (unsigned int) sizeof(*ghdr) + ghdr->gh_handle.len, + msg->lm_buflens[segment]); + return NULL; + } + + return ghdr; +} + +static +void gss_netobj_swabber(netobj_t *obj) +{ + __swab32s(&obj->len); +} + +netobj_t *gss_swab_netobj(struct lustre_msg *msg, int segment) +{ + netobj_t *obj; + + obj = lustre_swab_buf(msg, segment, sizeof(*obj), gss_netobj_swabber); + if (obj && sizeof(*obj) + obj->len > msg->lm_buflens[segment]) { + CERROR("netobj require length %u but only %u received\n", + (unsigned int) sizeof(*obj) + obj->len, + msg->lm_buflens[segment]); + return NULL; + } + + return obj; +} + +/* + * payload should be obtained from mechanism. but currently since we + * only support kerberos, we could simply use fixed value. + * krb5 header: 16 + * krb5 checksum: 20 + */ +#define GSS_KRB5_INTEG_MAX_PAYLOAD (40) + +static inline +int gss_estimate_payload(struct gss_ctx *mechctx, int msgsize, int privacy) +{ + if (privacy) { + /* we suppose max cipher block size is 16 bytes. here we + * add 16 for confounder and 16 for padding. + */ + return GSS_KRB5_INTEG_MAX_PAYLOAD + msgsize + 16 + 16 + 16; + } else { + return GSS_KRB5_INTEG_MAX_PAYLOAD; + } +} + +/* + * return signature size, otherwise < 0 to indicate error + */ +static +int gss_sign_msg(struct lustre_msg *msg, + struct gss_ctx *mechctx, + __u32 proc, __u32 seq, + rawobj_t *handle) +{ + struct gss_header *ghdr; + rawobj_t text[3], mic; + int textcnt, mic_idx = msg->lm_bufcount - 1; + __u32 major; + + LASSERT(msg->lm_bufcount >= 3); + + /* gss hdr */ + LASSERT(msg->lm_buflens[0] >= + sizeof(*ghdr) + (handle ? handle->len : 0)); + ghdr = lustre_msg_buf(msg, 0, 0); + + ghdr->gh_version = PTLRPC_GSS_VERSION; + ghdr->gh_flags = 0; + ghdr->gh_proc = proc; + ghdr->gh_seq = seq; + ghdr->gh_svc = PTLRPC_GSS_SVC_INTEGRITY; + if (!handle) { + /* fill in a fake one */ + ghdr->gh_handle.len = 0; + } else { + ghdr->gh_handle.len = handle->len; + memcpy(ghdr->gh_handle.data, handle->data, handle->len); + } + + /* MIC */ + for (textcnt = 0; textcnt < mic_idx; textcnt++) { + text[textcnt].len = msg->lm_buflens[textcnt]; + text[textcnt].data = lustre_msg_buf(msg, textcnt, 0); + } + + mic.len = msg->lm_buflens[mic_idx]; + mic.data = lustre_msg_buf(msg, mic_idx, 0); + + major = lgss_get_mic(mechctx, textcnt, text, &mic); + if (major != GSS_S_COMPLETE) { + CERROR("fail to generate MIC: %08x\n", major); + return -EPERM; + } + LASSERT(mic.len <= msg->lm_buflens[mic_idx]); + + return lustre_shrink_msg(msg, mic_idx, mic.len, 0); +} + +/* + * return gss error + */ +static +__u32 gss_verify_msg(struct lustre_msg *msg, + struct gss_ctx *mechctx) +{ + rawobj_t text[3]; + rawobj_t mic; + int textcnt, mic_idx = msg->lm_bufcount - 1; + __u32 major; + + for (textcnt = 0; textcnt < mic_idx; textcnt++) { + text[textcnt].len = msg->lm_buflens[textcnt]; + text[textcnt].data = lustre_msg_buf(msg, textcnt, 0); + } + + mic.len = msg->lm_buflens[mic_idx]; + mic.data = lustre_msg_buf(msg, mic_idx, 0); + + major = lgss_verify_mic(mechctx, textcnt, text, &mic); + if (major != GSS_S_COMPLETE) + CERROR("mic verify error: %08x\n", major); + + return major; +} + +/* + * return gss error code + */ +static +__u32 gss_unseal_msg(struct gss_ctx *mechctx, + struct lustre_msg *msgbuf, + int *msg_len, int msgbuf_len) +{ + rawobj_t clear_obj, micobj, msgobj, token; + __u8 *clear_buf; + int clear_buflen; + __u32 major; + ENTRY; + + if (msgbuf->lm_bufcount != 3) { + CERROR("invalid bufcount %d\n", msgbuf->lm_bufcount); + RETURN(GSS_S_FAILURE); + } + + /* verify gss header */ + msgobj.len = msgbuf->lm_buflens[0]; + msgobj.data = lustre_msg_buf(msgbuf, 0, 0); + micobj.len = msgbuf->lm_buflens[1]; + micobj.data = lustre_msg_buf(msgbuf, 1, 0); + + major = lgss_verify_mic(mechctx, 1, &msgobj, &micobj); + if (major != GSS_S_COMPLETE) { + CERROR("priv: mic verify error: %08x\n", major); + RETURN(major); + } + + /* temporary clear text buffer */ + clear_buflen = msgbuf->lm_buflens[2]; + OBD_ALLOC(clear_buf, clear_buflen); + if (!clear_buf) + RETURN(GSS_S_FAILURE); + + token.len = msgbuf->lm_buflens[2]; + token.data = lustre_msg_buf(msgbuf, 2, 0); + + clear_obj.len = clear_buflen; + clear_obj.data = clear_buf; + + major = lgss_unwrap(mechctx, &token, &clear_obj); + if (major != GSS_S_COMPLETE) { + CERROR("priv: unwrap message error: %08x\n", major); + GOTO(out_free, major = GSS_S_FAILURE); + } + LASSERT(clear_obj.len <= clear_buflen); + + /* now the decrypted message */ + memcpy(msgbuf, clear_obj.data, clear_obj.len); + *msg_len = clear_obj.len; + + major = GSS_S_COMPLETE; +out_free: + OBD_FREE(clear_buf, clear_buflen); + RETURN(major); +} + +/******************************************** + * gss client context manipulation helpers * + ********************************************/ + +void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx) +{ + struct ptlrpc_cli_ctx *ctx = &gctx->gc_base; + unsigned long ctx_expiry; + + if (lgss_inquire_context(gctx->gc_mechctx, &ctx_expiry)) { + CERROR("ctx %p(%u): unable to inquire, expire it now\n", + gctx, ctx->cc_vcred.vc_uid); + ctx_expiry = 1; /* make it expired now */ + } + + ctx->cc_expire = gss_round_ctx_expiry(ctx_expiry, + ctx->cc_sec->ps_flags); + + /* At this point this ctx might have been marked as dead by + * someone else, in which case nobody will make further use + * of it. we don't care, and mark it UPTODATE will help + * destroying server side context when it be destroied. + */ + set_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags); + + CWARN("%s ctx %p(%u->%s), will expire at %lu(%lds lifetime)\n", + (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE ? + "server installed reverse" : "client refreshed"), + ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec), + ctx->cc_expire, (long) (ctx->cc_expire - get_seconds())); +} + +static +void gss_cli_ctx_finalize(struct gss_cli_ctx *gctx) +{ + if (gctx->gc_mechctx) + lgss_delete_sec_context(&gctx->gc_mechctx); + + rawobj_free(&gctx->gc_handle); +} + +/* + * Based on sequence number algorithm as specified in RFC 2203. + * + * modified for our own problem: arriving request has valid sequence number, + * but unwrapping request might cost a long time, after that its sequence + * are not valid anymore (fall behind the window). It rarely happen, mostly + * under extreme load. + * + * note we should not check sequence before verify the integrity of incoming + * request, because just one attacking request with high sequence number might + * cause all following request be dropped. + * + * so here we use a multi-phase approach: prepare 2 sequence windows, + * "main window" for normal sequence and "back window" for fall behind sequence. + * and 3-phase checking mechanism: + * 0 - before integrity verification, perform a initial sequence checking in + * main window, which only try and don't actually set any bits. if the + * sequence is high above the window or fit in the window and the bit + * is 0, then accept and proceed to integrity verification. otherwise + * reject this sequence. + * 1 - after integrity verification, check in main window again. if this + * sequence is high above the window or fit in the window and the bit + * is 0, then set the bit and accept; if it fit in the window but bit + * already set, then reject; if it fall behind the window, then proceed + * to phase 2. + * 2 - check in back window. if it is high above the window or fit in the + * window and the bit is 0, then set the bit and accept. otherwise reject. + * + * return value: + * 1: looks like a replay + * 0: is ok + * -1: is a replay + * + * note phase 0 is necessary, because otherwise replay attacking request of + * sequence which between the 2 windows can't be detected. + * + * this mechanism can't totally solve the problem, but could help much less + * number of valid requests be dropped. + */ +static +int gss_do_check_seq(unsigned long *window, __u32 win_size, __u32 *max_seq, + __u32 seq_num, int phase) +{ + LASSERT(phase >= 0 && phase <= 2); + + if (seq_num > *max_seq) { + /* + * 1. high above the window + */ + if (phase == 0) + return 0; + + if (seq_num >= *max_seq + win_size) { + memset(window, 0, win_size / 8); + *max_seq = seq_num; + } else { + while(*max_seq < seq_num) { + (*max_seq)++; + __clear_bit((*max_seq) % win_size, window); + } + } + __set_bit(seq_num % win_size, window); + } else if (seq_num + win_size <= *max_seq) { + /* + * 2. low behind the window + */ + if (phase == 0 || phase == 2) + goto replay; + + CWARN("seq %u is %u behind (size %d), check backup window\n", + seq_num, *max_seq - win_size - seq_num, win_size); + return 1; + } else { + /* + * 3. fit into the window + */ + switch (phase) { + case 0: + if (test_bit(seq_num % win_size, window)) + goto replay; + break; + case 1: + case 2: + if (__test_and_set_bit(seq_num % win_size, window)) + goto replay; + break; + } + } + + return 0; + +replay: + CERROR("seq %u (%s %s window) is a replay: max %u, winsize %d\n", + seq_num, + seq_num + win_size > *max_seq ? "in" : "behind", + phase == 2 ? "backup " : "main", + *max_seq, win_size); + return -1; +} + +/* + * Based on sequence number algorithm as specified in RFC 2203. + * + * if @set == 0: initial check, don't set any bit in window + * if @sec == 1: final check, set bit in window + */ +int gss_check_seq_num(struct gss_svc_seq_data *ssd, __u32 seq_num, int set) +{ + int rc = 0; + + spin_lock(&ssd->ssd_lock); + + if (set == 0) { + /* + * phase 0 testing + */ + rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN, + &ssd->ssd_max_main, seq_num, 0); + if (unlikely(rc)) + gss_stat_oos_record_svc(0, 1); + } else { + /* + * phase 1 checking main window + */ + rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN, + &ssd->ssd_max_main, seq_num, 1); + switch (rc) { + case -1: + gss_stat_oos_record_svc(1, 1); + /* fall through */ + case 0: + goto exit; + } + /* + * phase 2 checking back window + */ + rc = gss_do_check_seq(ssd->ssd_win_back, GSS_SEQ_WIN_BACK, + &ssd->ssd_max_back, seq_num, 2); + if (rc) + gss_stat_oos_record_svc(2, 1); + else + gss_stat_oos_record_svc(2, 0); + } +exit: + spin_unlock(&ssd->ssd_lock); + return rc; +} + +/*************************************** + * cred APIs * + ***************************************/ + +static inline +int gss_cli_payload(struct ptlrpc_cli_ctx *ctx, + int msgsize, int privacy) +{ + return gss_estimate_payload(NULL, msgsize, privacy); +} + +static +int gss_cli_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + /* if we are refreshing for root, also update the reverse + * handle index, do not confuse reverse contexts. + */ + if (ctx->cc_vcred.vc_uid == 0) { + struct gss_sec *gsec; + + gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base); + gsec->gs_rvs_hdl = gss_get_next_ctx_index(); + } + + return gss_ctx_refresh_pipefs(ctx); +} + +static +int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred) +{ + return (ctx->cc_vcred.vc_uid == vcred->vc_uid); +} + +static +void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize) +{ + buf[0] = '\0'; + + if (flags & PTLRPC_CTX_UPTODATE) + strncat(buf, "uptodate,", bufsize); + if (flags & PTLRPC_CTX_DEAD) + strncat(buf, "dead,", bufsize); + if (flags & PTLRPC_CTX_ERROR) + strncat(buf, "error,", bufsize); + if (flags & PTLRPC_CTX_HASHED) + strncat(buf, "hashed,", bufsize); + if (flags & PTLRPC_CTX_ETERNAL) + strncat(buf, "eternal,", bufsize); + if (buf[0] == '\0') + strncat(buf, "-,", bufsize); + + buf[strlen(buf) - 1] = '\0'; +} + +static +int gss_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize) +{ + struct gss_cli_ctx *gctx; + char flags_str[40]; + int written; + + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); + + gss_cli_ctx_flags2str(ctx->cc_flags, flags_str, sizeof(flags_str)); + + written = snprintf(buf, bufsize, + "UID %d:\n" + " flags: %s\n" + " seqwin: %d\n" + " sequence: %d\n", + ctx->cc_vcred.vc_uid, + flags_str, + gctx->gc_win, + atomic_read(&gctx->gc_seq)); + + if (gctx->gc_mechctx) { + written += lgss_display(gctx->gc_mechctx, + buf + written, bufsize - written); + } + + return written; +} + +static +int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req) +{ + struct gss_cli_ctx *gctx; + __u32 seq; + int rc; + ENTRY; + + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf->lm_bufcount >= 3); + LASSERT(req->rq_cli_ctx == ctx); + + /* nothing to do for context negotiation RPCs */ + if (req->rq_ctx_init) + RETURN(0); + + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); +redo: + seq = atomic_inc_return(&gctx->gc_seq); + + rc = gss_sign_msg(req->rq_reqbuf, gctx->gc_mechctx, + gctx->gc_proc, seq, &gctx->gc_handle); + if (rc < 0) + RETURN(rc); + + /* gss_sign_msg() msg might take long time to finish, in which period + * more rpcs could be wrapped up and sent out. if we found too many + * of them we should repack this rpc, because sent it too late might + * lead to the sequence number fall behind the window on server and + * be dropped. also applies to gss_cli_ctx_seal(). + */ + if (atomic_read(&gctx->gc_seq) - seq > GSS_SEQ_REPACK_THRESHOLD) { + int behind = atomic_read(&gctx->gc_seq) - seq; + + gss_stat_oos_record_cli(behind); + CWARN("req %p: %u behind, retry signing\n", req, behind); + goto redo; + } + + req->rq_reqdata_len = rc; + RETURN(0); +} + +static +int gss_cli_ctx_handle_err_notify(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct gss_header *ghdr) +{ + struct gss_err_header *errhdr; + int rc; + + LASSERT(ghdr->gh_proc == PTLRPC_GSS_PROC_ERR); + + errhdr = (struct gss_err_header *) ghdr; + + /* server return NO_CONTEXT might be caused by context expire + * or server reboot/failover. we refresh the cred transparently + * to upper layer. + * In some cases, our gss handle is possible to be incidentally + * identical to another handle since the handle itself is not + * fully random. In krb5 case, the GSS_S_BAD_SIG will be + * returned, maybe other gss error for other mechanism. + * + * if we add new mechanism, make sure the correct error are + * returned in this case. + * + * but in any cases, don't resend ctx destroying rpc, don't resend + * reverse rpc. + */ + if (req->rq_ctx_fini) { + CWARN("server respond error (%08x/%08x) for ctx fini\n", + errhdr->gh_major, errhdr->gh_minor); + rc = -EINVAL; + } else if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { + CWARN("reverse server respond error (%08x/%08x)\n", + errhdr->gh_major, errhdr->gh_minor); + rc = -EINVAL; + } else if (errhdr->gh_major == GSS_S_NO_CONTEXT || + errhdr->gh_major == GSS_S_BAD_SIG) { + CWARN("req x"LPU64"/t"LPU64": server respond ctx %p(%u->%s) " + "%s, server might lost the context.\n", + req->rq_xid, req->rq_transno, ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), + errhdr->gh_major == GSS_S_NO_CONTEXT ? + "NO_CONTEXT" : "BAD_SIG"); + + sptlrpc_ctx_expire(ctx); + /* + * we need replace the ctx right here, otherwise during + * resent we'll hit the logic in sptlrpc_req_refresh_ctx() + * which keep the ctx with RESEND flag, thus we'll never + * get rid of this ctx. + */ + rc = sptlrpc_req_replace_dead_ctx(req); + if (rc == 0) + req->rq_resend = 1; + } else { + CERROR("req %p: server report gss error (%x/%x)\n", + req, errhdr->gh_major, errhdr->gh_minor); + rc = -EACCES; + } + + return rc; +} + +static +int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req) +{ + struct gss_cli_ctx *gctx; + struct gss_header *ghdr, *reqhdr; + struct lustre_msg *msg = req->rq_repbuf; + __u32 major; + int rc = 0; + ENTRY; + + LASSERT(req->rq_cli_ctx == ctx); + LASSERT(msg); + + req->rq_repdata_len = req->rq_nob_received; + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); + + /* special case for context negotiation, rq_repmsg/rq_replen actually + * are not used currently. + */ + if (req->rq_ctx_init) { + req->rq_repmsg = lustre_msg_buf(msg, 1, 0); + req->rq_replen = msg->lm_buflens[1]; + RETURN(0); + } + + if (msg->lm_bufcount < 3 || msg->lm_bufcount > 4) { + CERROR("unexpected bufcount %u\n", msg->lm_bufcount); + RETURN(-EPROTO); + } + + ghdr = gss_swab_header(msg, 0); + if (ghdr == NULL) { + CERROR("can't decode gss header\n"); + RETURN(-EPROTO); + } + + /* sanity checks */ + reqhdr = lustre_msg_buf(msg, 0, sizeof(*reqhdr)); + LASSERT(reqhdr); + + if (ghdr->gh_version != reqhdr->gh_version) { + CERROR("gss version %u mismatch, expect %u\n", + ghdr->gh_version, reqhdr->gh_version); + RETURN(-EPROTO); + } + + switch (ghdr->gh_proc) { + case PTLRPC_GSS_PROC_DATA: + if (ghdr->gh_seq != reqhdr->gh_seq) { + CERROR("seqnum %u mismatch, expect %u\n", + ghdr->gh_seq, reqhdr->gh_seq); + RETURN(-EPROTO); + } + + if (ghdr->gh_svc != PTLRPC_GSS_SVC_INTEGRITY) { + CERROR("unexpected svc %d\n", ghdr->gh_svc); + RETURN(-EPROTO); + } + + if (lustre_msg_swabbed(msg)) + gss_header_swabber(ghdr); + + major = gss_verify_msg(msg, gctx->gc_mechctx); + if (major != GSS_S_COMPLETE) + RETURN(-EPERM); + + req->rq_repmsg = lustre_msg_buf(msg, 1, 0); + req->rq_replen = msg->lm_buflens[1]; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (msg->lm_bufcount < 4) { + CERROR("Invalid reply bufcount %u\n", + msg->lm_bufcount); + RETURN(-EPROTO); + } + + /* bulk checksum is the second last segment */ + rc = bulk_sec_desc_unpack(msg, msg->lm_bufcount - 2); + } + break; + case PTLRPC_GSS_PROC_ERR: + rc = gss_cli_ctx_handle_err_notify(ctx, req, ghdr); + break; + default: + CERROR("unknown gss proc %d\n", ghdr->gh_proc); + rc = -EPROTO; + } + + RETURN(rc); +} + +static +int gss_cli_ctx_seal(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req) +{ + struct gss_cli_ctx *gctx; + rawobj_t msgobj, cipher_obj, micobj; + struct gss_header *ghdr; + int buflens[3], wiresize, rc; + __u32 major; + ENTRY; + + LASSERT(req->rq_clrbuf); + LASSERT(req->rq_cli_ctx == ctx); + LASSERT(req->rq_reqlen); + + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); + + /* close clear data length */ + req->rq_clrdata_len = lustre_msg_size_v2(req->rq_clrbuf->lm_bufcount, + req->rq_clrbuf->lm_buflens); + + /* calculate wire data length */ + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = gss_cli_payload(&gctx->gc_base, buflens[0], 0); + buflens[2] = gss_cli_payload(&gctx->gc_base, req->rq_clrdata_len, 1); + wiresize = lustre_msg_size_v2(3, buflens); + + /* allocate wire buffer */ + if (req->rq_pool) { + /* pre-allocated */ + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf != req->rq_clrbuf); + LASSERT(req->rq_reqbuf_len >= wiresize); + } else { + OBD_ALLOC(req->rq_reqbuf, wiresize); + if (!req->rq_reqbuf) + RETURN(-ENOMEM); + req->rq_reqbuf_len = wiresize; + } + + lustre_init_msg_v2(req->rq_reqbuf, 3, buflens, NULL); + req->rq_reqbuf->lm_secflvr = req->rq_sec_flavor; + + /* gss header */ + ghdr = lustre_msg_buf(req->rq_reqbuf, 0, 0); + ghdr->gh_version = PTLRPC_GSS_VERSION; + ghdr->gh_flags = 0; + ghdr->gh_proc = gctx->gc_proc; + ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq); + ghdr->gh_svc = PTLRPC_GSS_SVC_PRIVACY; + ghdr->gh_handle.len = gctx->gc_handle.len; + memcpy(ghdr->gh_handle.data, gctx->gc_handle.data, gctx->gc_handle.len); + +redo: + /* header signature */ + msgobj.len = req->rq_reqbuf->lm_buflens[0]; + msgobj.data = lustre_msg_buf(req->rq_reqbuf, 0, 0); + micobj.len = req->rq_reqbuf->lm_buflens[1]; + micobj.data = lustre_msg_buf(req->rq_reqbuf, 1, 0); + + major = lgss_get_mic(gctx->gc_mechctx, 1, &msgobj, &micobj); + if (major != GSS_S_COMPLETE) { + CERROR("priv: sign message error: %08x\n", major); + GOTO(err_free, rc = -EPERM); + } + /* perhaps shrink msg has potential problem in re-packing??? + * ship a little bit more data is fine. + lustre_shrink_msg(req->rq_reqbuf, 1, micobj.len, 0); + */ + + /* clear text */ + msgobj.len = req->rq_clrdata_len; + msgobj.data = (__u8 *) req->rq_clrbuf; + + /* cipher text */ + cipher_obj.len = req->rq_reqbuf->lm_buflens[2]; + cipher_obj.data = lustre_msg_buf(req->rq_reqbuf, 2, 0); + + major = lgss_wrap(gctx->gc_mechctx, &msgobj, req->rq_clrbuf_len, + &cipher_obj); + if (major != GSS_S_COMPLETE) { + CERROR("priv: wrap message error: %08x\n", major); + GOTO(err_free, rc = -EPERM); + } + LASSERT(cipher_obj.len <= buflens[2]); + + /* see explain in gss_cli_ctx_sign() */ + if (atomic_read(&gctx->gc_seq) - ghdr->gh_seq > + GSS_SEQ_REPACK_THRESHOLD) { + int behind = atomic_read(&gctx->gc_seq) - ghdr->gh_seq; + + gss_stat_oos_record_cli(behind); + CWARN("req %p: %u behind, retry sealing\n", req, behind); + + ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq); + goto redo; + } + + /* now set the final wire data length */ + req->rq_reqdata_len = lustre_shrink_msg(req->rq_reqbuf, 2, + cipher_obj.len, 0); + + RETURN(0); + +err_free: + if (!req->rq_pool) { + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } + RETURN(rc); +} + +static +int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req) +{ + struct gss_cli_ctx *gctx; + struct gss_header *ghdr; + int msglen, rc; + __u32 major; + ENTRY; + + LASSERT(req->rq_repbuf); + LASSERT(req->rq_cli_ctx == ctx); + + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); + + ghdr = gss_swab_header(req->rq_repbuf, 0); + if (ghdr == NULL) { + CERROR("can't decode gss header\n"); + RETURN(-EPROTO); + } + + /* sanity checks */ + if (ghdr->gh_version != PTLRPC_GSS_VERSION) { + CERROR("gss version %u mismatch, expect %u\n", + ghdr->gh_version, PTLRPC_GSS_VERSION); + RETURN(-EPROTO); + } + + switch (ghdr->gh_proc) { + case PTLRPC_GSS_PROC_DATA: + if (lustre_msg_swabbed(req->rq_repbuf)) + gss_header_swabber(ghdr); + + major = gss_unseal_msg(gctx->gc_mechctx, req->rq_repbuf, + &msglen, req->rq_repbuf_len); + if (major != GSS_S_COMPLETE) { + rc = -EPERM; + break; + } + + if (lustre_unpack_msg(req->rq_repbuf, msglen)) { + CERROR("Failed to unpack after decryption\n"); + RETURN(-EPROTO); + } + req->rq_repdata_len = msglen; + + if (req->rq_repbuf->lm_bufcount < 1) { + CERROR("Invalid reply buffer: empty\n"); + RETURN(-EPROTO); + } + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (req->rq_repbuf->lm_bufcount < 2) { + CERROR("Too few request buffer segments %d\n", + req->rq_repbuf->lm_bufcount); + RETURN(-EPROTO); + } + + /* bulk checksum is the last segment */ + if (bulk_sec_desc_unpack(req->rq_repbuf, + req->rq_repbuf->lm_bufcount-1)) + RETURN(-EPROTO); + } + + req->rq_repmsg = lustre_msg_buf(req->rq_repbuf, 0, 0); + req->rq_replen = req->rq_repbuf->lm_buflens[0]; + + rc = 0; + break; + case PTLRPC_GSS_PROC_ERR: + rc = gss_cli_ctx_handle_err_notify(ctx, req, ghdr); + break; + default: + CERROR("unexpected proc %d\n", ghdr->gh_proc); + rc = -EPERM; + } + + RETURN(rc); +} + +static struct ptlrpc_ctx_ops gss_ctxops = { + .refresh = gss_cli_ctx_refresh, + .match = gss_cli_ctx_match, + .display = gss_cli_ctx_display, + .sign = gss_cli_ctx_sign, + .verify = gss_cli_ctx_verify, + .seal = gss_cli_ctx_seal, + .unseal = gss_cli_ctx_unseal, + .wrap_bulk = gss_cli_ctx_wrap_bulk, + .unwrap_bulk = gss_cli_ctx_unwrap_bulk, +}; + +/********************************************* + * reverse context installation * + *********************************************/ +static +int gss_install_rvs_cli_ctx(struct gss_sec *gsec, + struct ptlrpc_svc_ctx *svc_ctx) +{ + struct vfs_cred vcred; + struct gss_svc_reqctx *grctx; + struct ptlrpc_cli_ctx *cli_ctx; + struct gss_cli_ctx *cli_gctx; + struct gss_ctx *mechctx = NULL; + __u32 major; + int rc; + ENTRY; + + vcred.vc_uid = 0; + vcred.vc_gid = 0; + + cli_ctx = gss_sec_create_ctx(&gsec->gs_base, &vcred); + if (!cli_ctx) + RETURN(-ENOMEM); + + grctx = container_of(svc_ctx, struct gss_svc_reqctx, src_base); + LASSERT(grctx); + LASSERT(grctx->src_ctx); + LASSERT(grctx->src_ctx->gsc_mechctx); + + major = lgss_copy_reverse_context(grctx->src_ctx->gsc_mechctx, &mechctx); + if (major != GSS_S_COMPLETE) + GOTO(err_ctx, rc = -ENOMEM); + + cli_gctx = container_of(cli_ctx, struct gss_cli_ctx, gc_base); + + cli_gctx->gc_proc = PTLRPC_GSS_PROC_DATA; + cli_gctx->gc_win = GSS_SEQ_WIN; + atomic_set(&cli_gctx->gc_seq, 0); + + if (rawobj_dup(&cli_gctx->gc_handle, &grctx->src_ctx->gsc_rvs_hdl)) + GOTO(err_mechctx, rc = -ENOMEM); + + cli_gctx->gc_mechctx = mechctx; + gss_cli_ctx_uptodate(cli_gctx); + + sptlrpc_ctx_replace(&gsec->gs_base, cli_ctx); + RETURN(0); + +err_mechctx: + lgss_delete_sec_context(&mechctx); +err_ctx: + gss_sec_destroy_ctx(cli_ctx->cc_sec, cli_ctx); + return rc; +} + + +static inline +int gss_install_rvs_svc_ctx(struct obd_import *imp, + struct gss_sec *gsec, + struct gss_cli_ctx *gctx) +{ + return gss_svc_upcall_install_rvs_ctx(imp, gsec, gctx); +} + +/********************************************* + * GSS security APIs * + *********************************************/ + +static +struct ptlrpc_cli_ctx * gss_sec_create_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred) +{ + struct gss_cli_ctx *gctx; + struct ptlrpc_cli_ctx *ctx; + ENTRY; + + OBD_ALLOC_PTR(gctx); + if (!gctx) + RETURN(NULL); + + gctx->gc_win = 0; + atomic_set(&gctx->gc_seq, 0); + + ctx = &gctx->gc_base; + INIT_HLIST_NODE(&ctx->cc_hash); + atomic_set(&ctx->cc_refcount, 0); + ctx->cc_sec = sec; + ctx->cc_ops = &gss_ctxops; + ctx->cc_expire = 0; + ctx->cc_flags = 0; + ctx->cc_vcred = *vcred; + spin_lock_init(&ctx->cc_lock); + INIT_LIST_HEAD(&ctx->cc_req_list); + + CDEBUG(D_SEC, "create a gss cred at %p(uid %u)\n", ctx, vcred->vc_uid); + RETURN(ctx); +} + +static +void gss_sec_destroy_ctx(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx) +{ + struct gss_cli_ctx *gctx; + ENTRY; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount) == 0); + + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); + if (gctx->gc_mechctx) { + gss_do_ctx_fini_rpc(gctx); + gss_cli_ctx_finalize(gctx); + } + + CWARN("%s@%p: destroy ctx %p(%u->%s)\n", + ctx->cc_sec->ps_policy->sp_name, ctx->cc_sec, + ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec)); + + OBD_FREE_PTR(gctx); + EXIT; +} + +#define GSS_CCACHE_SIZE (32) + +static +struct ptlrpc_sec* gss_sec_create(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + struct gss_sec *gsec; + struct ptlrpc_sec *sec; + int alloc_size, cache_size, i; + ENTRY; + + LASSERT(imp); + LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_GSS); + + if (ctx || flags & (PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_REVERSE)) + cache_size = 1; + else + cache_size = GSS_CCACHE_SIZE; + + alloc_size = sizeof(*gsec) + sizeof(struct list_head) * cache_size; + + OBD_ALLOC(gsec, alloc_size); + if (!gsec) + RETURN(NULL); + + gsec->gs_mech = lgss_subflavor_to_mech(SEC_FLAVOR_SUB(flavor)); + if (!gsec->gs_mech) { + CERROR("gss backend 0x%x not found\n", SEC_FLAVOR_SUB(flavor)); + goto err_free; + } + + spin_lock_init(&gsec->gs_lock); + gsec->gs_rvs_hdl = 0ULL; /* will be updated later */ + + sec = &gsec->gs_base; + sec->ps_policy = &gss_policy; + sec->ps_flavor = flavor; + sec->ps_flags = flags; + sec->ps_import = class_import_get(imp); + sec->ps_lock = SPIN_LOCK_UNLOCKED; + sec->ps_ccache_size = cache_size; + sec->ps_ccache = (struct hlist_head *) (gsec + 1); + atomic_set(&sec->ps_busy, 0); + + for (i = 0; i < cache_size; i++) + INIT_HLIST_HEAD(&sec->ps_ccache[i]); + + if (!ctx) { + if (gss_sec_upcall_init(gsec)) + goto err_mech; + + sec->ps_gc_interval = 30 * 60; /* 30 minutes */ + sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval; + } else { + LASSERT(sec->ps_flags & PTLRPC_SEC_FL_REVERSE); + + if (gss_install_rvs_cli_ctx(gsec, ctx)) + goto err_mech; + + /* never do gc on reverse sec */ + sec->ps_gc_interval = 0; + sec->ps_gc_next = 0; + } + + if (SEC_FLAVOR_SVC(flavor) == SPTLRPC_SVC_PRIV && + flags & PTLRPC_SEC_FL_BULK) + sptlrpc_enc_pool_add_user(); + + CWARN("create %s%s@%p\n", (ctx ? "reverse " : ""), + gss_policy.sp_name, gsec); + RETURN(sec); + +err_mech: + lgss_mech_put(gsec->gs_mech); +err_free: + OBD_FREE(gsec, alloc_size); + RETURN(NULL); +} + +static +void gss_sec_destroy(struct ptlrpc_sec *sec) +{ + struct gss_sec *gsec; + ENTRY; + + gsec = container_of(sec, struct gss_sec, gs_base); + CWARN("destroy %s@%p\n", gss_policy.sp_name, gsec); + + LASSERT(gsec->gs_mech); + LASSERT(sec->ps_import); + LASSERT(sec->ps_ccache); + LASSERT(sec->ps_ccache_size); + LASSERT(atomic_read(&sec->ps_refcount) == 0); + LASSERT(atomic_read(&sec->ps_busy) == 0); + + gss_sec_upcall_cleanup(gsec); + lgss_mech_put(gsec->gs_mech); + + class_import_put(sec->ps_import); + + if (SEC_FLAVOR_SVC(sec->ps_flavor) == SPTLRPC_SVC_PRIV && + sec->ps_flags & PTLRPC_SEC_FL_BULK) + sptlrpc_enc_pool_del_user(); + + OBD_FREE(gsec, sizeof(*gsec) + + sizeof(struct list_head) * sec->ps_ccache_size); + EXIT; +} + +static +int gss_alloc_reqbuf_auth(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + struct sec_flavor_config *conf; + int bufsize, txtsize; + int buflens[5], bufcnt = 2; + ENTRY; + + /* + * - gss header + * - lustre message + * - user descriptor + * - bulk sec descriptor + * - signature + */ + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = msgsize; + txtsize = buflens[0] + buflens[1]; + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + buflens[bufcnt] = sptlrpc_current_user_desc_size(); + txtsize += buflens[bufcnt]; + bufcnt++; + } + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + buflens[bufcnt] = bulk_sec_desc_size(conf->sfc_bulk_csum, 1, + req->rq_bulk_read); + txtsize += buflens[bufcnt]; + bufcnt++; + } + + buflens[bufcnt++] = req->rq_ctx_init ? GSS_CTX_INIT_MAX_LEN : + gss_cli_payload(req->rq_cli_ctx, txtsize, 0); + + bufsize = lustre_msg_size_v2(bufcnt, buflens); + + if (!req->rq_reqbuf) { + bufsize = size_roundup_power2(bufsize); + + OBD_ALLOC(req->rq_reqbuf, bufsize); + if (!req->rq_reqbuf) + RETURN(-ENOMEM); + + req->rq_reqbuf_len = bufsize; + } else { + LASSERT(req->rq_pool); + LASSERT(req->rq_reqbuf_len >= bufsize); + memset(req->rq_reqbuf, 0, bufsize); + } + + lustre_init_msg_v2(req->rq_reqbuf, bufcnt, buflens, NULL); + req->rq_reqbuf->lm_secflvr = req->rq_sec_flavor; + + req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 1, msgsize); + LASSERT(req->rq_reqmsg); + + /* pack user desc here, later we might leave current user's process */ + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) + sptlrpc_pack_user_desc(req->rq_reqbuf, 2); + + RETURN(0); +} + +static +int gss_alloc_reqbuf_priv(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + struct sec_flavor_config *conf; + int ibuflens[3], ibufcnt; + int buflens[3]; + int clearsize, wiresize; + ENTRY; + + LASSERT(req->rq_clrbuf == NULL); + LASSERT(req->rq_clrbuf_len == 0); + + /* Inner (clear) buffers + * - lustre message + * - user descriptor + * - bulk checksum + */ + ibufcnt = 1; + ibuflens[0] = msgsize; + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) + ibuflens[ibufcnt++] = sptlrpc_current_user_desc_size(); + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + ibuflens[ibufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 1, + req->rq_bulk_read); + } + clearsize = lustre_msg_size_v2(ibufcnt, ibuflens); + /* to allow append padding during encryption */ + clearsize += GSS_MAX_CIPHER_BLOCK; + + /* Wrapper (wire) buffers + * - gss header + * - signature of gss header + * - cipher text + */ + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = gss_cli_payload(req->rq_cli_ctx, buflens[0], 0); + buflens[2] = gss_cli_payload(req->rq_cli_ctx, clearsize, 1); + wiresize = lustre_msg_size_v2(3, buflens); + + if (req->rq_pool) { + /* rq_reqbuf is preallocated */ + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf_len >= wiresize); + + memset(req->rq_reqbuf, 0, req->rq_reqbuf_len); + + /* if the pre-allocated buffer is big enough, we just pack + * both clear buf & request buf in it, to avoid more alloc. + */ + if (clearsize + wiresize <= req->rq_reqbuf_len) { + req->rq_clrbuf = + (void *) (((char *) req->rq_reqbuf) + wiresize); + } else { + CWARN("pre-allocated buf size %d is not enough for " + "both clear (%d) and cipher (%d) text, proceed " + "with extra allocation\n", req->rq_reqbuf_len, + clearsize, wiresize); + } + } + + if (!req->rq_clrbuf) { + clearsize = size_roundup_power2(clearsize); + + OBD_ALLOC(req->rq_clrbuf, clearsize); + if (!req->rq_clrbuf) + RETURN(-ENOMEM); + } + req->rq_clrbuf_len = clearsize; + + lustre_init_msg_v2(req->rq_clrbuf, ibufcnt, ibuflens, NULL); + req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, msgsize); + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) + sptlrpc_pack_user_desc(req->rq_clrbuf, 1); + + RETURN(0); +} + +/* + * NOTE: any change of request buffer allocation should also consider + * changing enlarge_reqbuf() series functions. + */ +static +int gss_alloc_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + LASSERT(!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor) || + (req->rq_bulk_read || req->rq_bulk_write)); + + switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { + case SPTLRPC_SVC_NONE: + case SPTLRPC_SVC_AUTH: + return gss_alloc_reqbuf_auth(sec, req, msgsize); + case SPTLRPC_SVC_PRIV: + return gss_alloc_reqbuf_priv(sec, req, msgsize); + default: + LBUG(); + } + return 0; +} + +static +void gss_free_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + int privacy; + ENTRY; + + LASSERT(!req->rq_pool || req->rq_reqbuf); + privacy = SEC_FLAVOR_SVC(req->rq_sec_flavor) == SPTLRPC_SVC_PRIV; + + if (!req->rq_clrbuf) + goto release_reqbuf; + + /* release clear buffer */ + LASSERT(privacy); + LASSERT(req->rq_clrbuf_len); + + if (req->rq_pool && + req->rq_clrbuf >= req->rq_reqbuf && + (char *) req->rq_clrbuf < + (char *) req->rq_reqbuf + req->rq_reqbuf_len) + goto release_reqbuf; + + OBD_FREE(req->rq_clrbuf, req->rq_clrbuf_len); + req->rq_clrbuf = NULL; + req->rq_clrbuf_len = 0; + +release_reqbuf: + if (!req->rq_pool && req->rq_reqbuf) { + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } + + EXIT; +} + +static +int gss_alloc_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + struct sec_flavor_config *conf; + int privacy = (SEC_FLAVOR_SVC(req->rq_sec_flavor) == SPTLRPC_SVC_PRIV); + int bufsize, txtsize; + int buflens[4], bufcnt; + ENTRY; + + LASSERT(!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor) || + (req->rq_bulk_read || req->rq_bulk_write)); + + if (privacy) { + bufcnt = 1; + buflens[0] = msgsize; + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + buflens[bufcnt++] = bulk_sec_desc_size( + conf->sfc_bulk_csum, 0, + req->rq_bulk_read); + } + txtsize = lustre_msg_size_v2(bufcnt, buflens); + txtsize += GSS_MAX_CIPHER_BLOCK; + + bufcnt = 3; + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = gss_cli_payload(req->rq_cli_ctx, buflens[0], 0); + buflens[2] = gss_cli_payload(req->rq_cli_ctx, txtsize, 1); + } else { + bufcnt = 2; + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = msgsize; + txtsize = buflens[0] + buflens[1]; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + buflens[bufcnt] = bulk_sec_desc_size( + conf->sfc_bulk_csum, 0, + req->rq_bulk_read); + txtsize += buflens[bufcnt]; + bufcnt++; + } + buflens[bufcnt++] = req->rq_ctx_init ? GSS_CTX_INIT_MAX_LEN : + gss_cli_payload(req->rq_cli_ctx, txtsize, 0); + } + + bufsize = lustre_msg_size_v2(bufcnt, buflens); + bufsize = size_roundup_power2(bufsize); + + OBD_ALLOC(req->rq_repbuf, bufsize); + if (!req->rq_repbuf) + return -ENOMEM; + + req->rq_repbuf_len = bufsize; + return 0; +} + +static +void gss_free_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + OBD_FREE(req->rq_repbuf, req->rq_repbuf_len); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; +} + +static int get_enlarged_msgsize(struct lustre_msg *msg, + int segment, int newsize) +{ + int save, newmsg_size; + + LASSERT(newsize >= msg->lm_buflens[segment]); + + save = msg->lm_buflens[segment]; + msg->lm_buflens[segment] = newsize; + newmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + msg->lm_buflens[segment] = save; + + return newmsg_size; +} + +static int get_enlarged_msgsize2(struct lustre_msg *msg, + int segment1, int newsize1, + int segment2, int newsize2) +{ + int save1, save2, newmsg_size; + + LASSERT(newsize1 >= msg->lm_buflens[segment1]); + LASSERT(newsize2 >= msg->lm_buflens[segment2]); + + save1 = msg->lm_buflens[segment1]; + save2 = msg->lm_buflens[segment2]; + msg->lm_buflens[segment1] = newsize1; + msg->lm_buflens[segment2] = newsize2; + newmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + msg->lm_buflens[segment1] = save1; + msg->lm_buflens[segment2] = save2; + + return newmsg_size; +} + +static inline int msg_last_seglen(struct lustre_msg *msg) +{ + return msg->lm_buflens[msg->lm_bufcount - 1]; +} + +static +int gss_enlarge_reqbuf_auth(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize) +{ + struct lustre_msg *newbuf; + int txtsize, sigsize, i; + int newmsg_size, newbuf_size; + + /* + * embedded msg is at seg 1; signature is at the last seg + */ + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf_len > req->rq_reqlen); + LASSERT(req->rq_reqbuf->lm_bufcount >= 2); + LASSERT(lustre_msg_buf(req->rq_reqbuf, 1, 0) == req->rq_reqmsg); + + /* compute new embedded msg size */ + newmsg_size = get_enlarged_msgsize(req->rq_reqmsg, segment, newsize); + LASSERT(newmsg_size >= req->rq_reqbuf->lm_buflens[1]); + + /* compute new wrapper msg size */ + for (txtsize = 0, i = 0; i < req->rq_reqbuf->lm_bufcount; i++) + txtsize += req->rq_reqbuf->lm_buflens[i]; + txtsize += newmsg_size - req->rq_reqbuf->lm_buflens[1]; + + sigsize = gss_cli_payload(req->rq_cli_ctx, txtsize, 0); + LASSERT(sigsize >= msg_last_seglen(req->rq_reqbuf)); + newbuf_size = get_enlarged_msgsize2(req->rq_reqbuf, 1, newmsg_size, + req->rq_reqbuf->lm_bufcount - 1, + sigsize); + + /* request from pool should always have enough buffer */ + LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size); + + if (req->rq_reqbuf_len < newbuf_size) { + newbuf_size = size_roundup_power2(newbuf_size); + + OBD_ALLOC(newbuf, newbuf_size); + if (newbuf == NULL) + RETURN(-ENOMEM); + + memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len); + + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = newbuf; + req->rq_reqbuf_len = newbuf_size; + req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 1, 0); + } + + _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1, sigsize); + _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, 1, newmsg_size); + _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); + + req->rq_reqlen = newmsg_size; + RETURN(0); +} + +static +int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize) +{ + struct lustre_msg *newclrbuf; + int newmsg_size, newclrbuf_size, newcipbuf_size; + int buflens[3]; + + /* + * embedded msg is at seg 0 of clear buffer; + * cipher text is at seg 2 of cipher buffer; + */ + LASSERT(req->rq_pool || + (req->rq_reqbuf == NULL && req->rq_reqbuf_len == 0)); + LASSERT(req->rq_reqbuf == NULL || + (req->rq_pool && req->rq_reqbuf->lm_bufcount == 3)); + LASSERT(req->rq_clrbuf); + LASSERT(req->rq_clrbuf_len > req->rq_reqlen); + LASSERT(lustre_msg_buf(req->rq_clrbuf, 0, 0) == req->rq_reqmsg); + + /* compute new embedded msg size */ + newmsg_size = get_enlarged_msgsize(req->rq_reqmsg, segment, newsize); + + /* compute new clear buffer size */ + newclrbuf_size = get_enlarged_msgsize(req->rq_clrbuf, 0, newmsg_size); + newclrbuf_size += GSS_MAX_CIPHER_BLOCK; + + /* compute new cipher buffer size */ + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = gss_cli_payload(req->rq_cli_ctx, buflens[0], 0); + buflens[2] = gss_cli_payload(req->rq_cli_ctx, newclrbuf_size, 1); + newcipbuf_size = lustre_msg_size_v2(3, buflens); + + /* + * handle the case that we put both clear buf and cipher buf into + * pre-allocated single buffer. + */ + if (unlikely(req->rq_pool) && + req->rq_clrbuf >= req->rq_reqbuf && + (char *) req->rq_clrbuf < + (char *) req->rq_reqbuf + req->rq_reqbuf_len) { + /* + * it couldn't be better we still fit into the + * pre-allocated buffer. + */ + if (newclrbuf_size + newcipbuf_size <= req->rq_reqbuf_len) { + void *src, *dst; + + /* move clear text backward. */ + src = req->rq_clrbuf; + dst = (char *) req->rq_reqbuf + newcipbuf_size; + + memmove(dst, src, req->rq_clrbuf_len); + + req->rq_clrbuf = (struct lustre_msg *) dst; + req->rq_clrbuf_len = newclrbuf_size; + req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0); + } else { + /* + * sadly we have to split out the clear buffer + */ + LASSERT(req->rq_reqbuf_len >= newcipbuf_size); + LASSERT(req->rq_clrbuf_len < newclrbuf_size); + } + } + + if (req->rq_clrbuf_len < newclrbuf_size) { + newclrbuf_size = size_roundup_power2(newclrbuf_size); + + OBD_ALLOC(newclrbuf, newclrbuf_size); + if (newclrbuf == NULL) + RETURN(-ENOMEM); + + memcpy(newclrbuf, req->rq_clrbuf, req->rq_clrbuf_len); + + if (req->rq_reqbuf == NULL || + req->rq_clrbuf < req->rq_reqbuf || + (char *) req->rq_clrbuf >= + (char *) req->rq_reqbuf + req->rq_reqbuf_len) { + OBD_FREE(req->rq_clrbuf, req->rq_clrbuf_len); + } + + req->rq_clrbuf = newclrbuf; + req->rq_clrbuf_len = newclrbuf_size; + req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0); + } + + _sptlrpc_enlarge_msg_inplace(req->rq_clrbuf, 0, newmsg_size); + _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); + req->rq_reqlen = newmsg_size; + + RETURN(0); +} + +static +int gss_enlarge_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize) +{ + LASSERT(!req->rq_ctx_init && !req->rq_ctx_fini); + + switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { + case SPTLRPC_SVC_AUTH: + return gss_enlarge_reqbuf_auth(sec, req, segment, newsize); + case SPTLRPC_SVC_PRIV: + return gss_enlarge_reqbuf_priv(sec, req, segment, newsize); + default: + LASSERTF(0, "bad flavor %x\n", req->rq_sec_flavor); + return 0; + } +} + +static +int gss_sec_install_rctx(struct obd_import *imp, + struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx) +{ + struct gss_sec *gsec; + struct gss_cli_ctx *gctx; + int rc; + + gsec = container_of(sec, struct gss_sec, gs_base); + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); + + rc = gss_install_rvs_svc_ctx(imp, gsec, gctx); + return rc; +} + +static struct ptlrpc_sec_cops gss_sec_cops = { + .create_sec = gss_sec_create, + .destroy_sec = gss_sec_destroy, + .create_ctx = gss_sec_create_ctx, + .destroy_ctx = gss_sec_destroy_ctx, + .install_rctx = gss_sec_install_rctx, + .alloc_reqbuf = gss_alloc_reqbuf, + .free_reqbuf = gss_free_reqbuf, + .alloc_repbuf = gss_alloc_repbuf, + .free_repbuf = gss_free_repbuf, + .enlarge_reqbuf = gss_enlarge_reqbuf, +}; + +/******************************************** + * server side API * + ********************************************/ + +static inline +int gss_svc_reqctx_is_special(struct gss_svc_reqctx *grctx) +{ + LASSERT(grctx); + return (grctx->src_init || grctx->src_init_continue || + grctx->src_err_notify); +} + +static +void gss_svc_reqctx_free(struct gss_svc_reqctx *grctx) +{ + if (grctx->src_ctx) + gss_svc_upcall_put_ctx(grctx->src_ctx); + + sptlrpc_policy_put(grctx->src_base.sc_policy); + OBD_FREE_PTR(grctx); +} + +static inline +void gss_svc_reqctx_addref(struct gss_svc_reqctx *grctx) +{ + LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0); + atomic_inc(&grctx->src_base.sc_refcount); +} + +static inline +void gss_svc_reqctx_decref(struct gss_svc_reqctx *grctx) +{ + LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0); + + if (atomic_dec_and_test(&grctx->src_base.sc_refcount)) + gss_svc_reqctx_free(grctx); +} + +static +int gss_svc_sign(struct ptlrpc_request *req, + struct ptlrpc_reply_state *rs, + struct gss_svc_reqctx *grctx) +{ + int rc; + ENTRY; + + LASSERT(rs->rs_msg == lustre_msg_buf(rs->rs_repbuf, 1, 0)); + + /* embedded lustre_msg might have been shrinked */ + if (req->rq_replen != rs->rs_repbuf->lm_buflens[1]) + lustre_shrink_msg(rs->rs_repbuf, 1, req->rq_replen, 1); + + rc = gss_sign_msg(rs->rs_repbuf, grctx->src_ctx->gsc_mechctx, + PTLRPC_GSS_PROC_DATA, grctx->src_wirectx.gw_seq, + NULL); + if (rc < 0) + RETURN(rc); + + rs->rs_repdata_len = rc; + RETURN(0); +} + +int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor) +{ + struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); + struct ptlrpc_reply_state *rs; + struct gss_err_header *ghdr; + int replen = sizeof(struct ptlrpc_body); + int rc; + ENTRY; + + //OBD_FAIL_RETURN(OBD_FAIL_SVCGSS_ERR_NOTIFY|OBD_FAIL_ONCE, -EINVAL); + + grctx->src_err_notify = 1; + grctx->src_reserve_len = 0; + + rc = lustre_pack_reply_v2(req, 1, &replen, NULL); + if (rc) { + CERROR("could not pack reply, err %d\n", rc); + RETURN(rc); + } + + /* gss hdr */ + rs = req->rq_reply_state; + LASSERT(rs->rs_repbuf->lm_buflens[1] >= sizeof(*ghdr)); + ghdr = lustre_msg_buf(rs->rs_repbuf, 0, 0); + ghdr->gh_version = PTLRPC_GSS_VERSION; + ghdr->gh_flags = 0; + ghdr->gh_proc = PTLRPC_GSS_PROC_ERR; + ghdr->gh_major = major; + ghdr->gh_minor = minor; + ghdr->gh_handle.len = 0; /* fake context handle */ + + rs->rs_repdata_len = lustre_msg_size_v2(rs->rs_repbuf->lm_bufcount, + rs->rs_repbuf->lm_buflens); + + CDEBUG(D_SEC, "prepare gss error notify(0x%x/0x%x) to %s\n", + major, minor, libcfs_nid2str(req->rq_peer.nid)); + RETURN(0); +} + +static +int gss_svc_handle_init(struct ptlrpc_request *req, + struct gss_wire_ctx *gw) +{ + struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); + struct lustre_msg *reqbuf = req->rq_reqbuf; + struct obd_uuid *uuid; + struct obd_device *target; + rawobj_t uuid_obj, rvs_hdl, in_token; + __u32 lustre_svc; + __u32 *secdata, seclen; + int rc; + ENTRY; + + CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc, + libcfs_nid2str(req->rq_peer.nid)); + + if (gw->gw_proc == PTLRPC_GSS_PROC_INIT && gw->gw_handle.len != 0) { + CERROR("proc %u: invalid handle length %u\n", + gw->gw_proc, gw->gw_handle.len); + RETURN(SECSVC_DROP); + } + + if (reqbuf->lm_bufcount < 3 || reqbuf->lm_bufcount > 4){ + CERROR("Invalid bufcount %d\n", reqbuf->lm_bufcount); + RETURN(SECSVC_DROP); + } + + /* ctx initiate payload is in last segment */ + secdata = lustre_msg_buf(reqbuf, reqbuf->lm_bufcount - 1, 0); + seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1]; + + if (seclen < 4 + 4) { + CERROR("sec size %d too small\n", seclen); + RETURN(SECSVC_DROP); + } + + /* lustre svc type */ + lustre_svc = le32_to_cpu(*secdata++); + seclen -= 4; + + /* extract target uuid, note this code is somewhat fragile + * because touched internal structure of obd_uuid + */ + if (rawobj_extract(&uuid_obj, &secdata, &seclen)) { + CERROR("failed to extract target uuid\n"); + RETURN(SECSVC_DROP); + } + uuid_obj.data[uuid_obj.len - 1] = '\0'; + + uuid = (struct obd_uuid *) uuid_obj.data; + target = class_uuid2obd(uuid); + if (!target || target->obd_stopping || !target->obd_set_up) { + CERROR("target '%s' is not available for context init (%s)", + uuid->uuid, target == NULL ? "no target" : + (target->obd_stopping ? "stopping" : "not set up")); + RETURN(SECSVC_DROP); + } + + /* extract reverse handle */ + if (rawobj_extract(&rvs_hdl, &secdata, &seclen)) { + CERROR("failed extract reverse handle\n"); + RETURN(SECSVC_DROP); + } + + /* extract token */ + if (rawobj_extract(&in_token, &secdata, &seclen)) { + CERROR("can't extract token\n"); + RETURN(SECSVC_DROP); + } + + rc = gss_svc_upcall_handle_init(req, grctx, gw, target, lustre_svc, + &rvs_hdl, &in_token); + if (rc != SECSVC_OK) + RETURN(rc); + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + if (reqbuf->lm_bufcount < 4) { + CERROR("missing user descriptor\n"); + RETURN(SECSVC_DROP); + } + if (sptlrpc_unpack_user_desc(reqbuf, 2)) { + CERROR("Mal-formed user descriptor\n"); + RETURN(SECSVC_DROP); + } + req->rq_user_desc = lustre_msg_buf(reqbuf, 2, 0); + } + + req->rq_reqmsg = lustre_msg_buf(reqbuf, 1, 0); + req->rq_reqlen = lustre_msg_buflen(reqbuf, 1); + + RETURN(rc); +} + +/* + * last segment must be the gss signature. + */ +static +int gss_svc_verify_request(struct ptlrpc_request *req, + struct gss_svc_ctx *gctx, + struct gss_wire_ctx *gw, + __u32 *major) +{ + struct lustre_msg *msg = req->rq_reqbuf; + int offset = 2; + ENTRY; + + *major = GSS_S_COMPLETE; + + if (msg->lm_bufcount < 3) { + CERROR("Too few segments (%u) in request\n", msg->lm_bufcount); + RETURN(-EINVAL); + } + + if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) { + CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq); + *major = GSS_S_DUPLICATE_TOKEN; + RETURN(-EACCES); + } + + *major = gss_verify_msg(msg, gctx->gsc_mechctx); + if (*major != GSS_S_COMPLETE) + RETURN(-EACCES); + + if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) { + CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq); + *major = GSS_S_DUPLICATE_TOKEN; + RETURN(-EACCES); + } + + /* user descriptor */ + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + if (msg->lm_bufcount < (offset + 1 + 1)) { + CERROR("no user desc included\n"); + RETURN(-EINVAL); + } + + if (sptlrpc_unpack_user_desc(msg, offset)) { + CERROR("Mal-formed user descriptor\n"); + RETURN(-EINVAL); + } + + req->rq_user_desc = lustre_msg_buf(msg, offset, 0); + offset++; + } + + /* check bulk cksum data */ + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (msg->lm_bufcount < (offset + 1 + 1)) { + CERROR("no bulk checksum included\n"); + RETURN(-EINVAL); + } + + if (bulk_sec_desc_unpack(msg, offset)) + RETURN(-EINVAL); + } + + req->rq_reqmsg = lustre_msg_buf(msg, 1, 0); + req->rq_reqlen = msg->lm_buflens[1]; + RETURN(0); +} + +static +int gss_svc_unseal_request(struct ptlrpc_request *req, + struct gss_svc_ctx *gctx, + struct gss_wire_ctx *gw, + __u32 *major) +{ + struct lustre_msg *msg = req->rq_reqbuf; + int msglen, offset = 1; + ENTRY; + + if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) { + CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq); + *major = GSS_S_DUPLICATE_TOKEN; + RETURN(-EACCES); + } + + *major = gss_unseal_msg(gctx->gsc_mechctx, msg, + &msglen, req->rq_reqdata_len); + if (*major != GSS_S_COMPLETE) + RETURN(-EACCES); + + if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) { + CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq); + *major = GSS_S_DUPLICATE_TOKEN; + RETURN(-EACCES); + } + + if (lustre_unpack_msg(msg, msglen)) { + CERROR("Failed to unpack after decryption\n"); + RETURN(-EINVAL); + } + req->rq_reqdata_len = msglen; + + if (msg->lm_bufcount < 1) { + CERROR("Invalid buffer: is empty\n"); + RETURN(-EINVAL); + } + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + if (msg->lm_bufcount < offset + 1) { + CERROR("no user descriptor included\n"); + RETURN(-EINVAL); + } + + if (sptlrpc_unpack_user_desc(msg, offset)) { + CERROR("Mal-formed user descriptor\n"); + RETURN(-EINVAL); + } + + req->rq_user_desc = lustre_msg_buf(msg, offset, 0); + offset++; + } + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (msg->lm_bufcount < offset + 1) { + CERROR("no bulk checksum included\n"); + RETURN(-EINVAL); + } + + if (bulk_sec_desc_unpack(msg, offset)) + RETURN(-EINVAL); + } + + req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 0, 0); + req->rq_reqlen = req->rq_reqbuf->lm_buflens[0]; + RETURN(0); +} + +static +int gss_svc_handle_data(struct ptlrpc_request *req, + struct gss_wire_ctx *gw) +{ + struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); + __u32 major = 0; + int rc = 0; + ENTRY; + + grctx->src_ctx = gss_svc_upcall_get_ctx(req, gw); + if (!grctx->src_ctx) { + major = GSS_S_NO_CONTEXT; + goto error; + } + + switch (gw->gw_svc) { + case PTLRPC_GSS_SVC_INTEGRITY: + rc = gss_svc_verify_request(req, grctx->src_ctx, gw, &major); + break; + case PTLRPC_GSS_SVC_PRIVACY: + rc = gss_svc_unseal_request(req, grctx->src_ctx, gw, &major); + break; + default: + CERROR("unsupported gss service %d\n", gw->gw_svc); + rc = -EINVAL; + } + + if (rc == 0) + RETURN(SECSVC_OK); + + CERROR("svc %u failed: major 0x%08x: ctx %p(%u->%s)\n", + gw->gw_svc, major, grctx->src_ctx, grctx->src_ctx->gsc_uid, + libcfs_nid2str(req->rq_peer.nid)); +error: + /* + * we only notify client in case of NO_CONTEXT/BAD_SIG, which + * might happen after server reboot, to allow recovery. + */ + if ((major == GSS_S_NO_CONTEXT || major == GSS_S_BAD_SIG) && + gss_pack_err_notify(req, major, 0) == 0) + RETURN(SECSVC_COMPLETE); + + RETURN(SECSVC_DROP); +} + +static +int gss_svc_handle_destroy(struct ptlrpc_request *req, + struct gss_wire_ctx *gw) +{ + struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); + int replen = sizeof(struct ptlrpc_body); + __u32 major; + ENTRY; + + grctx->src_ctx = gss_svc_upcall_get_ctx(req, gw); + if (!grctx->src_ctx) { + CWARN("invalid gss context handle for destroy.\n"); + RETURN(SECSVC_DROP); + } + + if (gw->gw_svc != PTLRPC_GSS_SVC_INTEGRITY) { + CERROR("svc %u is not supported in destroy.\n", gw->gw_svc); + RETURN(SECSVC_DROP); + } + + if (gss_svc_verify_request(req, grctx->src_ctx, gw, &major)) + RETURN(SECSVC_DROP); + + if (lustre_pack_reply_v2(req, 1, &replen, NULL)) + RETURN(SECSVC_DROP); + + CWARN("gss svc destroy ctx %p(%u->%s)\n", grctx->src_ctx, + grctx->src_ctx->gsc_uid, libcfs_nid2str(req->rq_peer.nid)); + + gss_svc_upcall_destroy_ctx(grctx->src_ctx); + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + if (req->rq_reqbuf->lm_bufcount < 4) { + CERROR("missing user descriptor, ignore it\n"); + RETURN(SECSVC_OK); + } + if (sptlrpc_unpack_user_desc(req->rq_reqbuf, 2)) { + CERROR("Mal-formed user descriptor, ignore it\n"); + RETURN(SECSVC_OK); + } + req->rq_user_desc = lustre_msg_buf(req->rq_reqbuf, 2, 0); + } + + RETURN(SECSVC_OK); +} + +static +int gss_svc_accept(struct ptlrpc_request *req) +{ + struct gss_header *ghdr; + struct gss_svc_reqctx *grctx; + struct gss_wire_ctx *gw; + int rc; + ENTRY; + + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_svc_ctx == NULL); + + if (req->rq_reqbuf->lm_bufcount < 2) { + CERROR("buf count only %d\n", req->rq_reqbuf->lm_bufcount); + RETURN(SECSVC_DROP); + } + + ghdr = gss_swab_header(req->rq_reqbuf, 0); + if (ghdr == NULL) { + CERROR("can't decode gss header\n"); + RETURN(SECSVC_DROP); + } + + /* sanity checks */ + if (ghdr->gh_version != PTLRPC_GSS_VERSION) { + CERROR("gss version %u, expect %u\n", ghdr->gh_version, + PTLRPC_GSS_VERSION); + RETURN(SECSVC_DROP); + } + + /* alloc grctx data */ + OBD_ALLOC_PTR(grctx); + if (!grctx) { + CERROR("fail to alloc svc reqctx\n"); + RETURN(SECSVC_DROP); + } + grctx->src_base.sc_policy = sptlrpc_policy_get(&gss_policy); + atomic_set(&grctx->src_base.sc_refcount, 1); + req->rq_svc_ctx = &grctx->src_base; + gw = &grctx->src_wirectx; + + /* save wire context */ + gw->gw_proc = ghdr->gh_proc; + gw->gw_seq = ghdr->gh_seq; + gw->gw_svc = ghdr->gh_svc; + rawobj_from_netobj(&gw->gw_handle, &ghdr->gh_handle); + + /* keep original wire header which subject to checksum verification */ + if (lustre_msg_swabbed(req->rq_reqbuf)) + gss_header_swabber(ghdr); + + switch(ghdr->gh_proc) { + case PTLRPC_GSS_PROC_INIT: + case PTLRPC_GSS_PROC_CONTINUE_INIT: + rc = gss_svc_handle_init(req, gw); + break; + case PTLRPC_GSS_PROC_DATA: + rc = gss_svc_handle_data(req, gw); + break; + case PTLRPC_GSS_PROC_DESTROY: + rc = gss_svc_handle_destroy(req, gw); + break; + default: + CERROR("unknown proc %u\n", gw->gw_proc); + rc = SECSVC_DROP; + break; + } + + switch (rc) { + case SECSVC_OK: + LASSERT (grctx->src_ctx); + + req->rq_auth_gss = 1; + req->rq_auth_remote = grctx->src_ctx->gsc_remote; + req->rq_auth_usr_mdt = grctx->src_ctx->gsc_usr_mds; + req->rq_auth_usr_root = grctx->src_ctx->gsc_usr_root; + req->rq_auth_uid = grctx->src_ctx->gsc_uid; + req->rq_auth_mapped_uid = grctx->src_ctx->gsc_mapped_uid; + break; + case SECSVC_COMPLETE: + break; + case SECSVC_DROP: + gss_svc_reqctx_free(grctx); + req->rq_svc_ctx = NULL; + break; + } + + RETURN(rc); +} + +static +void gss_svc_invalidate_ctx(struct ptlrpc_svc_ctx *svc_ctx) +{ + struct gss_svc_reqctx *grctx; + ENTRY; + + if (svc_ctx == NULL) { + EXIT; + return; + } + + grctx = gss_svc_ctx2reqctx(svc_ctx); + + CWARN("gss svc invalidate ctx %p(%u)\n", + grctx->src_ctx, grctx->src_ctx->gsc_uid); + gss_svc_upcall_destroy_ctx(grctx->src_ctx); + + EXIT; +} + +static inline +int gss_svc_payload(struct gss_svc_reqctx *grctx, int msgsize, int privacy) +{ + if (gss_svc_reqctx_is_special(grctx)) + return grctx->src_reserve_len; + + return gss_estimate_payload(NULL, msgsize, privacy); +} + +static +int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen) +{ + struct gss_svc_reqctx *grctx; + struct ptlrpc_reply_state *rs; + struct ptlrpc_bulk_sec_desc *bsd; + int privacy; + int ibuflens[2], ibufcnt = 0; + int buflens[4], bufcnt; + int txtsize, wmsg_size, rs_size; + ENTRY; + + LASSERT(msglen % 8 == 0); + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor) && + !req->rq_bulk_read && !req->rq_bulk_write) { + CERROR("client request bulk sec on non-bulk rpc\n"); + RETURN(-EPROTO); + } + + grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); + if (gss_svc_reqctx_is_special(grctx)) + privacy = 0; + else + privacy = (SEC_FLAVOR_SVC(req->rq_sec_flavor) == + SPTLRPC_SVC_PRIV); + + if (privacy) { + /* Inner buffer */ + ibufcnt = 1; + ibuflens[0] = msglen; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + LASSERT(req->rq_reqbuf->lm_bufcount >= 2); + bsd = lustre_msg_buf(req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1, + sizeof(*bsd)); + + ibuflens[ibufcnt++] = bulk_sec_desc_size( + bsd->bsd_csum_alg, 0, + req->rq_bulk_read); + } + + txtsize = lustre_msg_size_v2(ibufcnt, ibuflens); + txtsize += GSS_MAX_CIPHER_BLOCK; + + /* wrapper buffer */ + bufcnt = 3; + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = gss_svc_payload(grctx, buflens[0], 0); + buflens[2] = gss_svc_payload(grctx, txtsize, 1); + } else { + bufcnt = 2; + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = msglen; + txtsize = buflens[0] + buflens[1]; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + LASSERT(req->rq_reqbuf->lm_bufcount >= 4); + bsd = lustre_msg_buf(req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 2, + sizeof(*bsd)); + + buflens[bufcnt] = bulk_sec_desc_size( + bsd->bsd_csum_alg, 0, + req->rq_bulk_read); + txtsize += buflens[bufcnt]; + bufcnt++; + } + buflens[bufcnt++] = gss_svc_payload(grctx, txtsize, 0); + } + + wmsg_size = lustre_msg_size_v2(bufcnt, buflens); + + rs_size = sizeof(*rs) + wmsg_size; + rs = req->rq_reply_state; + + if (rs) { + /* pre-allocated */ + LASSERT(rs->rs_size >= rs_size); + } else { + OBD_ALLOC(rs, rs_size); + if (rs == NULL) + RETURN(-ENOMEM); + + rs->rs_size = rs_size; + } + + rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf_len = wmsg_size; + + if (privacy) { + lustre_init_msg_v2(rs->rs_repbuf, ibufcnt, ibuflens, NULL); + rs->rs_msg = lustre_msg_buf(rs->rs_repbuf, 0, msglen); + } else { + lustre_init_msg_v2(rs->rs_repbuf, bufcnt, buflens, NULL); + rs->rs_repbuf->lm_secflvr = req->rq_sec_flavor; + + rs->rs_msg = (struct lustre_msg *) + lustre_msg_buf(rs->rs_repbuf, 1, 0); + } + + gss_svc_reqctx_addref(grctx); + rs->rs_svc_ctx = req->rq_svc_ctx; + + LASSERT(rs->rs_msg); + req->rq_reply_state = rs; + RETURN(0); +} + +static +int gss_svc_seal(struct ptlrpc_request *req, + struct ptlrpc_reply_state *rs, + struct gss_svc_reqctx *grctx) +{ + struct gss_svc_ctx *gctx = grctx->src_ctx; + rawobj_t msgobj, cipher_obj, micobj; + struct gss_header *ghdr; + __u8 *cipher_buf; + int cipher_buflen, buflens[3]; + int msglen, rc; + __u32 major; + ENTRY; + + /* embedded lustre_msg might have been shrinked */ + if (req->rq_replen != rs->rs_repbuf->lm_buflens[0]) + lustre_shrink_msg(rs->rs_repbuf, 0, req->rq_replen, 1); + + /* clear data length */ + msglen = lustre_msg_size_v2(rs->rs_repbuf->lm_bufcount, + rs->rs_repbuf->lm_buflens); + + /* clear text */ + msgobj.len = msglen; + msgobj.data = (__u8 *) rs->rs_repbuf; + + /* allocate temporary cipher buffer */ + cipher_buflen = gss_estimate_payload(gctx->gsc_mechctx, msglen, 1); + OBD_ALLOC(cipher_buf, cipher_buflen); + if (!cipher_buf) + RETURN(-ENOMEM); + + cipher_obj.len = cipher_buflen; + cipher_obj.data = cipher_buf; + + major = lgss_wrap(gctx->gsc_mechctx, &msgobj, rs->rs_repbuf_len, + &cipher_obj); + if (major != GSS_S_COMPLETE) { + CERROR("priv: wrap message error: %08x\n", major); + GOTO(out_free, rc = -EPERM); + } + LASSERT(cipher_obj.len <= cipher_buflen); + + /* now the real wire data */ + buflens[0] = PTLRPC_GSS_HEADER_SIZE; + buflens[1] = gss_estimate_payload(gctx->gsc_mechctx, buflens[0], 0); + buflens[2] = cipher_obj.len; + + LASSERT(lustre_msg_size_v2(3, buflens) <= rs->rs_repbuf_len); + lustre_init_msg_v2(rs->rs_repbuf, 3, buflens, NULL); + rs->rs_repbuf->lm_secflvr = req->rq_sec_flavor; + + /* gss header */ + ghdr = lustre_msg_buf(rs->rs_repbuf, 0, 0); + ghdr->gh_version = PTLRPC_GSS_VERSION; + ghdr->gh_flags = 0; + ghdr->gh_proc = PTLRPC_GSS_PROC_DATA; + ghdr->gh_seq = grctx->src_wirectx.gw_seq; + ghdr->gh_svc = PTLRPC_GSS_SVC_PRIVACY; + ghdr->gh_handle.len = 0; + + /* header signature */ + msgobj.len = rs->rs_repbuf->lm_buflens[0]; + msgobj.data = lustre_msg_buf(rs->rs_repbuf, 0, 0); + micobj.len = rs->rs_repbuf->lm_buflens[1]; + micobj.data = lustre_msg_buf(rs->rs_repbuf, 1, 0); + + major = lgss_get_mic(gctx->gsc_mechctx, 1, &msgobj, &micobj); + if (major != GSS_S_COMPLETE) { + CERROR("priv: sign message error: %08x\n", major); + GOTO(out_free, rc = -EPERM); + } + lustre_shrink_msg(rs->rs_repbuf, 1, micobj.len, 0); + + /* cipher token */ + memcpy(lustre_msg_buf(rs->rs_repbuf, 2, 0), + cipher_obj.data, cipher_obj.len); + + rs->rs_repdata_len = lustre_shrink_msg(rs->rs_repbuf, 2, + cipher_obj.len, 0); + + /* to catch upper layer's further access */ + rs->rs_msg = NULL; + req->rq_repmsg = NULL; + req->rq_replen = 0; + + rc = 0; +out_free: + OBD_FREE(cipher_buf, cipher_buflen); + RETURN(rc); +} + +int gss_svc_authorize(struct ptlrpc_request *req) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); + struct gss_wire_ctx *gw; + int rc; + ENTRY; + + if (gss_svc_reqctx_is_special(grctx)) + RETURN(0); + + gw = &grctx->src_wirectx; + if (gw->gw_proc != PTLRPC_GSS_PROC_DATA && + gw->gw_proc != PTLRPC_GSS_PROC_DESTROY) { + CERROR("proc %d not support\n", gw->gw_proc); + RETURN(-EINVAL); + } + + LASSERT(grctx->src_ctx); + + switch (gw->gw_svc) { + case PTLRPC_GSS_SVC_INTEGRITY: + rc = gss_svc_sign(req, rs, grctx); + break; + case PTLRPC_GSS_SVC_PRIVACY: + rc = gss_svc_seal(req, rs, grctx); + break; + default: + CERROR("Unknown service %d\n", gw->gw_svc); + GOTO(out, rc = -EINVAL); + } + rc = 0; + +out: + RETURN(rc); +} + +static +void gss_svc_free_rs(struct ptlrpc_reply_state *rs) +{ + struct gss_svc_reqctx *grctx; + + LASSERT(rs->rs_svc_ctx); + grctx = container_of(rs->rs_svc_ctx, struct gss_svc_reqctx, src_base); + + gss_svc_reqctx_decref(grctx); + rs->rs_svc_ctx = NULL; + + if (!rs->rs_prealloc) + OBD_FREE(rs, rs->rs_size); +} + +static +void gss_svc_free_ctx(struct ptlrpc_svc_ctx *ctx) +{ + LASSERT(atomic_read(&ctx->sc_refcount) == 0); + gss_svc_reqctx_free(gss_svc_ctx2reqctx(ctx)); +} + +static +int gss_svc_install_rctx(struct obd_import *imp, struct ptlrpc_svc_ctx *ctx) +{ + struct gss_sec *gsec; + + LASSERT(imp->imp_sec); + LASSERT(ctx); + + gsec = container_of(imp->imp_sec, struct gss_sec, gs_base); + return gss_install_rvs_cli_ctx(gsec, ctx); +} + +static struct ptlrpc_sec_sops gss_sec_sops = { + .accept = gss_svc_accept, + .invalidate_ctx = gss_svc_invalidate_ctx, + .alloc_rs = gss_svc_alloc_rs, + .authorize = gss_svc_authorize, + .free_rs = gss_svc_free_rs, + .free_ctx = gss_svc_free_ctx, + .unwrap_bulk = gss_svc_unwrap_bulk, + .wrap_bulk = gss_svc_wrap_bulk, + .install_rctx = gss_svc_install_rctx, +}; + +static struct ptlrpc_sec_policy gss_policy = { + .sp_owner = THIS_MODULE, + .sp_name = "sec.gss", + .sp_policy = SPTLRPC_POLICY_GSS, + .sp_cops = &gss_sec_cops, + .sp_sops = &gss_sec_sops, +}; + +int __init sptlrpc_gss_init(void) +{ + int rc; + + rc = gss_init_lproc(); + if (rc) + return rc; + + rc = gss_init_upcall(); + if (rc) + goto out_lproc; + + rc = init_kerberos_module(); + if (rc) + goto out_upcall; + + /* + * register policy after all other stuff be intialized, because it + * might be in used immediately after the registration. + */ + rc = sptlrpc_register_policy(&gss_policy); + if (rc) + goto out_kerberos; + + return 0; +out_kerberos: + cleanup_kerberos_module(); +out_upcall: + gss_exit_upcall(); +out_lproc: + gss_exit_lproc(); + return rc; +} + +static void __exit sptlrpc_gss_exit(void) +{ + sptlrpc_unregister_policy(&gss_policy); + cleanup_kerberos_module(); + gss_exit_upcall(); + gss_exit_lproc(); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); +MODULE_DESCRIPTION("GSS security policy for Lustre"); +MODULE_LICENSE("GPL"); + +module_init(sptlrpc_gss_init); +module_exit(sptlrpc_gss_exit); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 9104a6d..cb0209d 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -125,6 +125,7 @@ int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt) deuuidify(obd2cli_tgt(imp->imp_obd), NULL, &target_start, &target_len); + if (imp->imp_replayable) { LCONSOLE_WARN("%s: Connection to service %.*s via nid " "%s was lost; in progress operations using this " @@ -132,12 +133,15 @@ int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt) imp->imp_obd->obd_name, target_len, target_start, libcfs_nid2str(imp->imp_connection->c_peer.nid)); } else { - LCONSOLE_ERROR("%s: Connection to service %.*s via nid " - "%s was lost; in progress operations using this " - "service will fail.\n", - imp->imp_obd->obd_name, target_len, target_start, - libcfs_nid2str(imp->imp_connection->c_peer.nid)); + LCONSOLE_ERROR_MSG(0x166, "%s: Connection to service " + "%.*s via nid %s was lost; in progress" + "operations using this service will" + "fail.\n", + imp->imp_obd->obd_name, + target_len, target_start, + libcfs_nid2str(imp->imp_connection->c_peer.nid)); } + ptlrpc_deactivate_timeouts(imp); IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); spin_unlock(&imp->imp_lock); @@ -168,6 +172,12 @@ void ptlrpc_deactivate_import(struct obd_import *imp) ENTRY; spin_lock(&imp->imp_lock); + if (imp->imp_invalid) { + spin_unlock(&imp->imp_lock); + EXIT; + return; + } + CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd)); imp->imp_invalid = 1; imp->imp_generation++; @@ -175,6 +185,8 @@ void ptlrpc_deactivate_import(struct obd_import *imp) ptlrpc_abort_inflight(imp); obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE); + + EXIT; } /* @@ -188,8 +200,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp) struct l_wait_info lwi; int rc; - if (!imp->imp_invalid) - ptlrpc_deactivate_import(imp); + ptlrpc_deactivate_import(imp); LASSERT(imp->imp_invalid); @@ -197,8 +208,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp) lwi = LWI_TIMEOUT_INTERVAL(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), HZ, NULL, NULL); rc = l_wait_event(imp->imp_recovery_waitq, - (atomic_read(&imp->imp_inflight) == 0), - &lwi); + (atomic_read(&imp->imp_inflight) == 0), &lwi); if (rc) CDEBUG(D_HA, "%s: rc = %d waiting for callback (%d != 0)\n", @@ -206,6 +216,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp) atomic_read(&imp->imp_inflight)); obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE); + sptlrpc_import_flush_all_ctx(imp); } /* unset imp_invalid */ @@ -215,8 +226,8 @@ void ptlrpc_activate_import(struct obd_import *imp) spin_lock(&imp->imp_lock); imp->imp_invalid = 0; + ptlrpc_activate_timeouts(imp); spin_unlock(&imp->imp_lock); - obd_import_event(obd, imp, IMP_EVENT_ACTIVE); } @@ -330,10 +341,27 @@ static int import_select_connection(struct obd_import *imp) RETURN(0); } +/* + * must be called under imp_lock + */ +int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno) +{ + struct ptlrpc_request *req; + struct list_head *tmp; + + if (list_empty(&imp->imp_replay_list)) + return 0; + tmp = imp->imp_replay_list.next; + req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + *transno = req->rq_transno; + return 1; +} + int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) { struct obd_device *obd = imp->imp_obd; int initial_connect = 0; + int set_transno = 0; int rc; __u64 committed_before_reconnect = 0; struct ptlrpc_request *request; @@ -348,8 +376,8 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) (char *)&imp->imp_dlm_handle, (char *)&imp->imp_connect_data }; struct ptlrpc_connect_async_args *aa; - ENTRY; + spin_lock(&imp->imp_lock); if (imp->imp_state == LUSTRE_IMP_CLOSED) { spin_unlock(&imp->imp_lock); @@ -375,6 +403,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) else committed_before_reconnect = imp->imp_peer_committed_transno; + set_transno = ptlrpc_first_transno(imp, &imp->imp_connect_data.ocd_transno); spin_unlock(&imp->imp_lock); if (new_uuid) { @@ -454,8 +483,14 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) #else request->rq_timeout = max((int)(obd_timeout / 20), 5); #endif + lustre_msg_add_op_flags(request->rq_reqmsg, + MSG_CONNECT_INITIAL); } + if (set_transno) + lustre_msg_add_op_flags(request->rq_reqmsg, + MSG_CONNECT_TRANSNO); + DEBUG_REQ(D_RPCTRACE, request, "(re)connect request"); ptlrpcd_add_req(request); rc = 0; @@ -490,11 +525,11 @@ static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp) ptlrpc_ping_import_soon(imp); wake_pinger = 1; } - #else /* liblustre has no pinger thead, so we wakup pinger anyway */ wake_pinger = 1; #endif + unlock: spin_unlock(&imp->imp_lock); @@ -524,6 +559,10 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, if (rc) GOTO(out, rc); + rc = sptlrpc_cli_install_rvs_ctx(imp, request->rq_cli_ctx); + if (rc) + GOTO(out, rc); + LASSERT(imp->imp_conn_current); msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); @@ -686,7 +725,6 @@ finish: if (!exp) { /* This could happen if export is cleaned during the connect attempt */ - spin_unlock(&imp->imp_lock); CERROR("Missing export for %s\n", imp->imp_obd->obd_name); GOTO(out, rc = -ENODEV); @@ -710,10 +748,10 @@ finish: of macro arguments */ #ifdef __KERNEL__ const char *older = - "older. Consider upgrading this client"; + "older. Consider upgrading this client"; #else const char *older = - "older. Consider recompiling this application"; + "older. Consider recompiling this application"; #endif const char *newer = "newer than client version"; @@ -737,20 +775,29 @@ finish: (cli->cl_max_pages_per_rpc > 0)); } - out: +out: if (rc != 0) { IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); if (aa->pcaa_initial_connect && !imp->imp_initial_recov) ptlrpc_deactivate_import(imp); - if (imp->imp_recon_bk && imp->imp_last_recon) { - /* Give up trying to reconnect */ + if ((imp->imp_recon_bk && imp->imp_last_recon) || + (rc == -EACCES)) { + /* + * Give up trying to reconnect + * EACCES means client has no permission for connection + */ imp->imp_obd->obd_no_recov = 1; ptlrpc_deactivate_import(imp); } if (rc == -EPROTO) { struct obd_connect_data *ocd; + + /* reply message might not be ready */ + if (request->rq_repmsg != NULL) + RETURN(-EPROTO); + ocd = lustre_swab_repbuf(request, REPLY_REC_OFF, sizeof *ocd, lustre_swab_connect); @@ -760,7 +807,8 @@ finish: /* Actually servers are only supposed to refuse connection from liblustre clients, so we should never see this from VFS context */ - LCONSOLE_ERROR("Server %s version (%d.%d.%d.%d)" + LCONSOLE_ERROR_MSG(0x16a, "Server %s version " + "(%d.%d.%d.%d)" " refused connection from this client " "with an incompatible version (%s). " "Client must be recompiled\n", @@ -824,7 +872,8 @@ static int signal_completed_replay(struct obd_import *imp) ptlrpc_req_set_repsize(req, 1, NULL); req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT; - lustre_msg_add_flags(req->rq_reqmsg, MSG_LAST_REPLAY); + lustre_msg_add_flags(req->rq_reqmsg, + MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE); req->rq_timeout *= 3; req->rq_interpret_reply = completed_replay_interpret; @@ -873,10 +922,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) /* Don't care about MGC eviction */ if (strcmp(imp->imp_obd->obd_type->typ_name, LUSTRE_MGC_NAME) != 0) { - LCONSOLE_ERROR("This client was evicted by %.*s; " - "in progress operations using this " - "service will fail.\n", - target_len, target_start); + LCONSOLE_ERROR_MSG(0x167, "This client was evicted by " + "%.*s; in progress operations using " + "this service will fail.\n", + target_len, target_start); } CDEBUG(D_HA, "evicted from %s@%s; invalidating\n", obd2cli_tgt(imp->imp_obd), @@ -884,7 +933,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) #ifdef __KERNEL__ rc = cfs_kernel_thread(ptlrpc_invalidate_import_thread, imp, - CLONE_VM | CLONE_FILES); + CLONE_VM | CLONE_FILES); if (rc < 0) CERROR("error starting invalidate thread: %d\n", rc); else @@ -951,7 +1000,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) ptlrpc_wake_delayed(imp); } - out: +out: RETURN(rc); } @@ -978,8 +1027,14 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) if (ptlrpc_import_in_recovery(imp)) { struct l_wait_info lwi; - cfs_duration_t timeout = cfs_time_seconds(obd_timeout); - + cfs_duration_t timeout; + if (imp->imp_server_timeout) + timeout = cfs_time_seconds(obd_timeout / 2); + else + timeout = cfs_time_seconds(obd_timeout); + + timeout = MAX(timeout * HZ, 1); + lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(timeout), back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL); rc = l_wait_event(imp->imp_recovery_waitq, @@ -1018,6 +1073,8 @@ out: else IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED); memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); + imp->imp_conn_cnt = 0; + imp->imp_last_recon = 0; spin_unlock(&imp->imp_lock); RETURN(rc); diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c new file mode 100644 index 0000000..e7af9cf --- /dev/null +++ b/lustre/ptlrpc/layout.c @@ -0,0 +1,1058 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/ptlrpc/layout.c + * Lustre Metadata Target (mdt) request handler + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#if !defined(__REQ_LAYOUT_USER__) + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_RPC + +#ifdef __KERNEL__ +#include <linux/module.h> +#else +# include <liblustre.h> +#endif + +/* LUSTRE_VERSION_CODE */ +#include <lustre_ver.h> + +#include <obd_support.h> +/* lustre_swab_mdt_body */ +#include <lustre/lustre_idl.h> +/* obd2cli_tgt() (required by DEBUG_REQ()) */ +#include <obd.h> + +/* __REQ_LAYOUT_USER__ */ +#endif +/* struct ptlrpc_request, lustre_msg* */ +#include <lustre_req_layout.h> +#include <linux/lustre_acl.h> + +#if __KERNEL__ +#define __POSIX_ACL_MAX_SIZE \ + (sizeof(xattr_acl_header) + 32 * sizeof(xattr_acl_entry)) +#else +#define __POSIX_ACL_MAX_SIZE 0 +#endif + +/* + * empty set of fields... for suitable definition of emptiness. + */ +static const struct req_msg_field *empty[] = { + &RMF_PTLRPC_BODY +}; + +static const struct req_msg_field *mdt_body_only[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY +}; + +static const struct req_msg_field *mdt_body_capa[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1 +}; + +static const struct req_msg_field *mdt_close_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_EPOCH, + &RMF_REC_SETATTR, + &RMF_CAPA1 +}; + +static const struct req_msg_field *mds_statfs_server[] = { + &RMF_PTLRPC_BODY, + &RMF_OBD_STATFS +}; + +static const struct req_msg_field *seq_query_client[] = { + &RMF_PTLRPC_BODY, + &RMF_SEQ_OPC, + &RMF_SEQ_RANGE +}; + +static const struct req_msg_field *seq_query_server[] = { + &RMF_PTLRPC_BODY, + &RMF_SEQ_RANGE +}; + +static const struct req_msg_field *fld_query_client[] = { + &RMF_PTLRPC_BODY, + &RMF_FLD_OPC, + &RMF_FLD_MDFLD +}; + +static const struct req_msg_field *fld_query_server[] = { + &RMF_PTLRPC_BODY, + &RMF_FLD_MDFLD +}; + +static const struct req_msg_field *mds_getattr_name_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *mds_reint_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REINT_OPC +}; + +static const struct req_msg_field *mds_reint_create_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_CREATE, + &RMF_CAPA1, + &RMF_NAME, +}; + +static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_CREATE, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_reint_create_sym_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_CREATE, + &RMF_CAPA1, + &RMF_NAME, + &RMF_SYMTGT +}; + +static const struct req_msg_field *mds_reint_create_slave_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_CREATE, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_reint_open_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_CREATE, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_reint_open_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 +}; + +static const struct req_msg_field *mds_reint_unlink_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_UNLINK, + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *mds_reint_link_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_LINK, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME +}; + +static const struct req_msg_field *mds_reint_rename_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_RENAME, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_SYMTGT +}; + +static const struct req_msg_field *mds_last_unlink_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_LOGCOOKIES +}; + +static const struct req_msg_field *mds_reint_setattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_SETATTR, + &RMF_CAPA1, + &RMF_MDT_EPOCH, + &RMF_EADATA, + &RMF_LOGCOOKIES +}; + +static const struct req_msg_field *mds_connect_client[] = { + &RMF_PTLRPC_BODY, + &RMF_TGTUUID, + &RMF_CLUUID, + &RMF_CONN, + &RMF_CONNECT_DATA +}; + +static const struct req_msg_field *mds_connect_server[] = { + &RMF_PTLRPC_BODY, + &RMF_CONNECT_DATA +}; + +static const struct req_msg_field *mds_set_info_client[] = { + &RMF_PTLRPC_BODY, + &RMF_SETINFO_KEY, + &RMF_SETINFO_VAL +}; + +static const struct req_msg_field *ldlm_enqueue_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *ldlm_enqueue_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP +}; + +static const struct req_msg_field *ldlm_intent_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REINT_OPC +}; + +static const struct req_msg_field *ldlm_intent_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1 +}; + +static const struct req_msg_field *ldlm_intent_open_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 +}; + +static const struct req_msg_field *ldlm_intent_getattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */ + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *ldlm_intent_create_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REC_CREATE, /* coincides with mds_reint_create_client[] */ + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *ldlm_intent_open_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REC_CREATE, /* coincides with mds_reint_open_client[] */ + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *ldlm_intent_unlink_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REC_UNLINK, /* coincides with mds_reint_unlink_client[] */ + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *mds_getxattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_getxattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_setxattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_setxattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_getattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1 +}; + +static const struct req_format *req_formats[] = { + &RQF_MDS_CONNECT, + &RQF_MDS_DISCONNECT, + &RQF_MDS_SET_INFO, + &RQF_MDS_GETSTATUS, + &RQF_MDS_STATFS, + &RQF_MDS_GETATTR, + &RQF_MDS_GETATTR_NAME, + &RQF_MDS_REINT, + &RQF_MDS_REINT_CREATE, + &RQF_MDS_REINT_CREATE_RMT_ACL, + &RQF_MDS_REINT_CREATE_SYM, + &RQF_MDS_REINT_CREATE_SLAVE, + &RQF_MDS_REINT_OPEN, + &RQF_MDS_REINT_UNLINK, + &RQF_MDS_REINT_LINK, + &RQF_MDS_REINT_RENAME, + &RQF_MDS_REINT_SETATTR, + &RQF_LDLM_ENQUEUE, + &RQF_LDLM_INTENT, + &RQF_LDLM_INTENT_GETATTR, + &RQF_LDLM_INTENT_OPEN, + &RQF_LDLM_INTENT_CREATE, + &RQF_LDLM_INTENT_UNLINK, + &RQF_SEQ_QUERY, + &RQF_FLD_QUERY, + &RQF_MDS_GETXATTR, + &RQF_MDS_SETXATTR, + &RQF_MDS_SYNC, + &RQF_MDS_CLOSE, + &RQF_MDS_PIN, + &RQF_MDS_READPAGE, + &RQF_MDS_WRITEPAGE, + &RQF_MDS_IS_SUBDIR, + &RQF_MDS_DONE_WRITING +}; + +struct req_msg_field { + __u32 rmf_flags; + const char *rmf_name; + /* + * Field length. (-1) means "variable length". + */ + int rmf_size; + void (*rmf_swabber)(void *); + int rmf_offset[ARRAY_SIZE(req_formats)][RCL_NR]; +}; + +enum rmf_flags { + RMF_F_STRING = 1 << 0 +}; + +struct req_capsule; + +/* + * Request fields. + */ +#define DEFINE_MSGF(name, flags, size, swabber) { \ + .rmf_name = (name), \ + .rmf_flags = (flags), \ + .rmf_size = (size), \ + .rmf_swabber = (void (*)(void*))(swabber) \ +} + +const struct req_msg_field RMF_SETINFO_VAL = + DEFINE_MSGF("setinfo_val", 0, + sizeof(__u32), lustre_swab_generic_32s); +EXPORT_SYMBOL(RMF_SETINFO_VAL); + +const struct req_msg_field RMF_SEQ_OPC = + DEFINE_MSGF("seq_query_opc", 0, + sizeof(__u32), lustre_swab_generic_32s); +EXPORT_SYMBOL(RMF_SEQ_OPC); + +const struct req_msg_field RMF_SEQ_RANGE = + DEFINE_MSGF("seq_query_range", 0, + sizeof(struct lu_range), lustre_swab_lu_range); +EXPORT_SYMBOL(RMF_SEQ_RANGE); + +const struct req_msg_field RMF_FLD_OPC = + DEFINE_MSGF("fld_query_opc", 0, + sizeof(__u32), lustre_swab_generic_32s); +EXPORT_SYMBOL(RMF_FLD_OPC); + +const struct req_msg_field RMF_FLD_MDFLD = + DEFINE_MSGF("fld_query_mdfld", 0, + sizeof(struct md_fld), lustre_swab_md_fld); +EXPORT_SYMBOL(RMF_FLD_MDFLD); + +const struct req_msg_field RMF_MDT_BODY = + DEFINE_MSGF("mdt_body", 0, + sizeof(struct mdt_body), lustre_swab_mdt_body); +EXPORT_SYMBOL(RMF_MDT_BODY); + +const struct req_msg_field RMF_MDT_EPOCH = + DEFINE_MSGF("mdt_epoch", 0, + sizeof(struct mdt_epoch), lustre_swab_mdt_epoch); +EXPORT_SYMBOL(RMF_MDT_EPOCH); + +const struct req_msg_field RMF_PTLRPC_BODY = + DEFINE_MSGF("ptlrpc_body", 0, + sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body); +EXPORT_SYMBOL(RMF_PTLRPC_BODY); + +const struct req_msg_field RMF_OBD_STATFS = + DEFINE_MSGF("obd_statfs", 0, + sizeof(struct obd_statfs), lustre_swab_obd_statfs); +EXPORT_SYMBOL(RMF_OBD_STATFS); + +const struct req_msg_field RMF_SETINFO_KEY = + DEFINE_MSGF("setinfo_key", 0, -1, NULL); +EXPORT_SYMBOL(RMF_SETINFO_KEY); + +const struct req_msg_field RMF_NAME = + DEFINE_MSGF("name", RMF_F_STRING, -1, NULL); +EXPORT_SYMBOL(RMF_NAME); + +const struct req_msg_field RMF_SYMTGT = + DEFINE_MSGF("symtgt", RMF_F_STRING, -1, NULL); +EXPORT_SYMBOL(RMF_SYMTGT); + +const struct req_msg_field RMF_TGTUUID = + DEFINE_MSGF("tgtuuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL); +EXPORT_SYMBOL(RMF_TGTUUID); + +const struct req_msg_field RMF_CLUUID = + DEFINE_MSGF("cluuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL); +EXPORT_SYMBOL(RMF_CLUUID); + +/* + * connection handle received in MDS_CONNECT request. + * + * XXX no swabbing? + */ +const struct req_msg_field RMF_CONN = + DEFINE_MSGF("conn", 0, sizeof(struct lustre_handle), NULL); +EXPORT_SYMBOL(RMF_CONN); + +const struct req_msg_field RMF_CONNECT_DATA = + DEFINE_MSGF("cdata", 0, + sizeof(struct obd_connect_data), lustre_swab_connect); +EXPORT_SYMBOL(RMF_CONNECT_DATA); + +const struct req_msg_field RMF_DLM_REQ = + DEFINE_MSGF("dlm_req", 0, + sizeof(struct ldlm_request), lustre_swab_ldlm_request); +EXPORT_SYMBOL(RMF_DLM_REQ); + +const struct req_msg_field RMF_DLM_REP = + DEFINE_MSGF("dlm_rep", 0, + sizeof(struct ldlm_reply), lustre_swab_ldlm_reply); +EXPORT_SYMBOL(RMF_DLM_REP); + +const struct req_msg_field RMF_LDLM_INTENT = + DEFINE_MSGF("ldlm_intent", 0, + sizeof(struct ldlm_intent), lustre_swab_ldlm_intent); +EXPORT_SYMBOL(RMF_LDLM_INTENT); + +const struct req_msg_field RMF_MDT_MD = + DEFINE_MSGF("mdt_md", 0, MIN_MD_SIZE, lustre_swab_lov_mds_md); +EXPORT_SYMBOL(RMF_MDT_MD); + +const struct req_msg_field RMF_REC_UNLINK = + DEFINE_MSGF("rec_unlink", 0, sizeof(struct mdt_rec_unlink), + lustre_swab_mdt_rec_unlink); +EXPORT_SYMBOL(RMF_REC_UNLINK); + +const struct req_msg_field RMF_REC_LINK = + DEFINE_MSGF("rec_link", 0, sizeof(struct mdt_rec_link), + lustre_swab_mdt_rec_link); +EXPORT_SYMBOL(RMF_REC_LINK); + +const struct req_msg_field RMF_REC_RENAME = + DEFINE_MSGF("rec_rename", 0, sizeof(struct mdt_rec_rename), + lustre_swab_mdt_rec_rename); +EXPORT_SYMBOL(RMF_REC_RENAME); + +const struct req_msg_field RMF_REC_CREATE = + DEFINE_MSGF("rec_create", 0, + sizeof(struct mdt_rec_create), lustre_swab_mdt_rec_create); +EXPORT_SYMBOL(RMF_REC_CREATE); + +const struct req_msg_field RMF_REC_SETATTR = + DEFINE_MSGF("rec_setattr", 0, sizeof(struct mdt_rec_setattr), + lustre_swab_mdt_rec_setattr); +EXPORT_SYMBOL(RMF_REC_SETATTR); + +/* FIXME: this length should be defined as a macro */ +const struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1, NULL); +EXPORT_SYMBOL(RMF_EADATA); + +const struct req_msg_field RMF_ACL = DEFINE_MSGF("acl", 0, + __POSIX_ACL_MAX_SIZE, NULL); +EXPORT_SYMBOL(RMF_ACL); + +const struct req_msg_field RMF_LOGCOOKIES = + DEFINE_MSGF("logcookies", 0, sizeof(struct llog_cookie), NULL); +EXPORT_SYMBOL(RMF_LOGCOOKIES); + +const struct req_msg_field RMF_REINT_OPC = + DEFINE_MSGF("reint_opc", 0, sizeof(__u32), lustre_swab_generic_32s); +EXPORT_SYMBOL(RMF_REINT_OPC); + +const struct req_msg_field RMF_CAPA1 = + DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), + lustre_swab_lustre_capa); +EXPORT_SYMBOL(RMF_CAPA1); + +const struct req_msg_field RMF_CAPA2 = + DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), + lustre_swab_lustre_capa); +EXPORT_SYMBOL(RMF_CAPA2); + +/* + * Request formats. + */ + +struct req_format { + const char *rf_name; + int rf_idx; + struct { + int nr; + const struct req_msg_field **d; + } rf_fields[RCL_NR]; +}; + +#define DEFINE_REQ_FMT(name, client, client_nr, server, server_nr) { \ + .rf_name = name, \ + .rf_fields = { \ + [RCL_CLIENT] = { \ + .nr = client_nr, \ + .d = client \ + }, \ + [RCL_SERVER] = { \ + .nr = server_nr, \ + .d = server \ + } \ + } \ +} + +#define DEFINE_REQ_FMT0(name, client, server) \ +DEFINE_REQ_FMT(name, client, ARRAY_SIZE(client), server, ARRAY_SIZE(server)) + +const struct req_format RQF_SEQ_QUERY = + DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server); +EXPORT_SYMBOL(RQF_SEQ_QUERY); + +const struct req_format RQF_FLD_QUERY = + DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server); +EXPORT_SYMBOL(RQF_FLD_QUERY); + +const struct req_format RQF_MDS_GETSTATUS = + DEFINE_REQ_FMT0("MDS_GETSTATUS", empty, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_GETSTATUS); + +const struct req_format RQF_MDS_STATFS = + DEFINE_REQ_FMT0("MDS_STATFS", empty, mds_statfs_server); +EXPORT_SYMBOL(RQF_MDS_STATFS); + +const struct req_format RQF_MDS_SYNC = + DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_SYNC); + +const struct req_format RQF_MDS_GETATTR = + DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server); +EXPORT_SYMBOL(RQF_MDS_GETATTR); + +const struct req_format RQF_MDS_GETXATTR = + DEFINE_REQ_FMT0("MDS_GETXATTR", + mds_getxattr_client, mds_getxattr_server); +EXPORT_SYMBOL(RQF_MDS_GETXATTR); + +const struct req_format RQF_MDS_SETXATTR = + DEFINE_REQ_FMT0("MDS_SETXATTR", + mds_setxattr_client, mds_setxattr_server); +EXPORT_SYMBOL(RQF_MDS_SETXATTR); + +const struct req_format RQF_MDS_GETATTR_NAME = + DEFINE_REQ_FMT0("MDS_GETATTR_NAME", + mds_getattr_name_client, mds_getattr_server); +EXPORT_SYMBOL(RQF_MDS_GETATTR_NAME); + +const struct req_format RQF_MDS_REINT = + DEFINE_REQ_FMT0("MDS_REINT", mds_reint_client, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_REINT); + +const struct req_format RQF_MDS_REINT_CREATE = + DEFINE_REQ_FMT0("MDS_REINT_CREATE", + mds_reint_create_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE); + +const struct req_format RQF_MDS_REINT_CREATE_RMT_ACL = + DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL", + mds_reint_create_rmt_acl_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL); + +const struct req_format RQF_MDS_REINT_CREATE_SLAVE = + DEFINE_REQ_FMT0("MDS_REINT_CREATE_SLAVE", + mds_reint_create_slave_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SLAVE); + +const struct req_format RQF_MDS_REINT_CREATE_SYM = + DEFINE_REQ_FMT0("MDS_REINT_CREATE_SYM", + mds_reint_create_sym_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SYM); + +const struct req_format RQF_MDS_REINT_OPEN = + DEFINE_REQ_FMT0("MDS_REINT_OPEN", + mds_reint_open_client, mds_reint_open_server); +EXPORT_SYMBOL(RQF_MDS_REINT_OPEN); + +const struct req_format RQF_MDS_REINT_UNLINK = + DEFINE_REQ_FMT0("MDS_REINT_UNLINK", mds_reint_unlink_client, + mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_REINT_UNLINK); + +const struct req_format RQF_MDS_REINT_LINK = + DEFINE_REQ_FMT0("MDS_REINT_LINK", + mds_reint_link_client, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_REINT_LINK); + +const struct req_format RQF_MDS_REINT_RENAME = + DEFINE_REQ_FMT0("MDS_REINT_RENAME", mds_reint_rename_client, + mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_REINT_RENAME); + +const struct req_format RQF_MDS_REINT_SETATTR = + DEFINE_REQ_FMT0("MDS_REINT_SETATTR", + mds_reint_setattr_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR); + +const struct req_format RQF_MDS_CONNECT = + DEFINE_REQ_FMT0("MDS_CONNECT", + mds_connect_client, mds_connect_server); +EXPORT_SYMBOL(RQF_MDS_CONNECT); + +const struct req_format RQF_MDS_DISCONNECT = + DEFINE_REQ_FMT0("MDS_DISCONNECT", empty, empty); +EXPORT_SYMBOL(RQF_MDS_DISCONNECT); + +const struct req_format RQF_MDS_SET_INFO = + DEFINE_REQ_FMT0("MDS_SET_INFO", mds_set_info_client, empty); +EXPORT_SYMBOL(RQF_MDS_SET_INFO); + +const struct req_format RQF_LDLM_ENQUEUE = + DEFINE_REQ_FMT0("LDLM_ENQUEUE", + ldlm_enqueue_client, ldlm_enqueue_server); +EXPORT_SYMBOL(RQF_LDLM_ENQUEUE); + +const struct req_format RQF_LDLM_INTENT = + DEFINE_REQ_FMT0("LDLM_INTENT", + ldlm_intent_client, ldlm_intent_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT); + +const struct req_format RQF_LDLM_INTENT_GETATTR = + DEFINE_REQ_FMT0("LDLM_INTENT_GETATTR", + ldlm_intent_getattr_client, ldlm_intent_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_GETATTR); + +const struct req_format RQF_LDLM_INTENT_OPEN = + DEFINE_REQ_FMT0("LDLM_INTENT_OPEN", + ldlm_intent_open_client, ldlm_intent_open_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_OPEN); + +const struct req_format RQF_LDLM_INTENT_CREATE = + DEFINE_REQ_FMT0("LDLM_INTENT_CREATE", + ldlm_intent_create_client, ldlm_intent_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_CREATE); + +const struct req_format RQF_LDLM_INTENT_UNLINK = + DEFINE_REQ_FMT0("LDLM_INTENT_UNLINK", + ldlm_intent_unlink_client, ldlm_intent_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_UNLINK); + +const struct req_format RQF_MDS_CLOSE = + DEFINE_REQ_FMT0("MDS_CLOSE", + mdt_close_client, mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_CLOSE); + +const struct req_format RQF_MDS_PIN = + DEFINE_REQ_FMT0("MDS_PIN", + mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_PIN); + +const struct req_format RQF_MDS_DONE_WRITING = + DEFINE_REQ_FMT0("MDS_DONE_WRITING", + mdt_close_client, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_DONE_WRITING); + +const struct req_format RQF_MDS_READPAGE = + DEFINE_REQ_FMT0("MDS_READPAGE", + mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_READPAGE); + +/* This is for split */ +const struct req_format RQF_MDS_WRITEPAGE = + DEFINE_REQ_FMT0("MDS_WRITEPAGE", + mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_WRITEPAGE); + +const struct req_format RQF_MDS_IS_SUBDIR = + DEFINE_REQ_FMT0("MDS_IS_SUBDIR", + mdt_body_only, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR); + +#if !defined(__REQ_LAYOUT_USER__) + +int req_layout_init(void) +{ + int i; + int j; + int k; + + for (i = 0; i < ARRAY_SIZE(req_formats); ++i) { + struct req_format *rf; + + rf = (struct req_format *)req_formats[i]; + rf->rf_idx = i; + for (j = 0; j < RCL_NR; ++j) { + LASSERT(rf->rf_fields[j].nr <= REQ_MAX_FIELD_NR); + for (k = 0; k < rf->rf_fields[j].nr; ++k) { + struct req_msg_field *field; + + field = (typeof(field))rf->rf_fields[j].d[k]; + LASSERT(field->rmf_offset[i][j] == 0); + /* + * k + 1 to detect unused format/field + * combinations. + */ + field->rmf_offset[i][j] = k + 1; + } + } + } + return 0; +} +EXPORT_SYMBOL(req_layout_init); + +void req_layout_fini(void) +{ +} +EXPORT_SYMBOL(req_layout_fini); + +/* + * Initialize capsule. + * + * @area is an array of REQ_MAX_FIELD_NR elements, used to store sizes of + * variable-sized fields. + */ +void req_capsule_init(struct req_capsule *pill, + struct ptlrpc_request *req, enum req_location location, + int *area) +{ + LASSERT(location == RCL_SERVER || location == RCL_CLIENT); + + memset(pill, 0, sizeof *pill); + pill->rc_req = req; + pill->rc_loc = location; + pill->rc_area = area; +} +EXPORT_SYMBOL(req_capsule_init); + +void req_capsule_fini(struct req_capsule *pill) +{ +} +EXPORT_SYMBOL(req_capsule_fini); + +static int __req_format_is_sane(const struct req_format *fmt) +{ + return + 0 <= fmt->rf_idx && fmt->rf_idx < ARRAY_SIZE(req_formats) && + req_formats[fmt->rf_idx] == fmt; +} + +static struct lustre_msg *__req_msg(const struct req_capsule *pill, + enum req_location loc) +{ + struct ptlrpc_request *req; + + req = pill->rc_req; + return loc == RCL_CLIENT ? req->rq_reqmsg : req->rq_repmsg; +} + +void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt) +{ + LASSERT(pill->rc_fmt == NULL); + LASSERT(__req_format_is_sane(fmt)); + + pill->rc_fmt = fmt; +} +EXPORT_SYMBOL(req_capsule_set); + +int req_capsule_pack(struct req_capsule *pill) +{ + int i; + int nr; + int result; + int total; + + const struct req_format *fmt; + + LASSERT(pill->rc_loc == RCL_SERVER); + fmt = pill->rc_fmt; + LASSERT(fmt != NULL); + + nr = fmt->rf_fields[RCL_SERVER].nr; + for (total = 0, i = 0; i < nr; ++i) { + int *size; + + size = &pill->rc_area[i]; + if (*size == -1) { + *size = fmt->rf_fields[RCL_SERVER].d[i]->rmf_size; + LASSERT(*size != -1); + } + total += *size; + } + result = lustre_pack_reply(pill->rc_req, nr, pill->rc_area, NULL); + if (result != 0) { + DEBUG_REQ(D_ERROR, pill->rc_req, + "Cannot pack %d fields (%d bytes) in format `%s': ", + nr, total, fmt->rf_name); + } + return result; +} +EXPORT_SYMBOL(req_capsule_pack); + +static int __req_capsule_offset(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + int offset; + + offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc]; + LASSERT(offset > 0); + offset --; + LASSERT(0 <= offset && offset < (sizeof(pill->rc_swabbed) << 3)); + return offset; +} + +static void *__req_capsule_get(struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + const struct req_format *fmt; + struct lustre_msg *msg; + void *value; + int len; + int offset; + + void *(*getter)(struct lustre_msg *m, int n, int minlen); + + static const char *rcl_names[RCL_NR] = { + [RCL_CLIENT] = "client", + [RCL_SERVER] = "server" + }; + + fmt = pill->rc_fmt; + LASSERT(fmt != NULL); + LASSERT(__req_format_is_sane(fmt)); + + offset = __req_capsule_offset(pill, field, loc); + + msg = __req_msg(pill, loc); + + getter = (field->rmf_flags & RMF_F_STRING) ? + (typeof(getter))lustre_msg_string : lustre_msg_buf; + + len = max(field->rmf_size, 0); + value = getter(msg, offset, len); + + if (!(pill->rc_swabbed & (1 << offset)) && loc != pill->rc_loc && + field->rmf_swabber != NULL && value != NULL && + lustre_msg_swabbed(msg)) { + field->rmf_swabber(value); + pill->rc_swabbed |= (1 << offset); + } + if (value == NULL) + DEBUG_REQ(D_ERROR, pill->rc_req, + "Wrong buffer for field `%s' (%d of %d) " + "in format `%s': %d vs. %d (%s)\n", + field->rmf_name, offset, lustre_msg_bufcount(msg), fmt->rf_name, + lustre_msg_buflen(msg, offset), field->rmf_size, + rcl_names[loc]); + return value; +} + +void *req_capsule_client_get(struct req_capsule *pill, + const struct req_msg_field *field) +{ + return __req_capsule_get(pill, field, RCL_CLIENT); +} +EXPORT_SYMBOL(req_capsule_client_get); + +void *req_capsule_server_get(struct req_capsule *pill, + const struct req_msg_field *field) +{ + return __req_capsule_get(pill, field, RCL_SERVER); +} +EXPORT_SYMBOL(req_capsule_server_get); + +const void *req_capsule_other_get(struct req_capsule *pill, + const struct req_msg_field *field) +{ + return __req_capsule_get(pill, field, pill->rc_loc ^ 1); +} +EXPORT_SYMBOL(req_capsule_other_get); + +void req_capsule_set_size(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc, int size) +{ + pill->rc_area[__req_capsule_offset(pill, field, loc)] = size; +} +EXPORT_SYMBOL(req_capsule_set_size); + +int req_capsule_get_size(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); + + return lustre_msg_buflen(__req_msg(pill, loc), + __req_capsule_offset(pill, field, loc)); +} +EXPORT_SYMBOL(req_capsule_get_size); + +#define FMT_FIELD(fmt, i, j) (fmt)->rf_fields[(i)].d[(j)] + +void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt) +{ + int i; + int j; + + const struct req_format *old; + + LASSERT(pill->rc_fmt != NULL); + LASSERT(__req_format_is_sane(fmt)); + + old = pill->rc_fmt; + /* + * Sanity checking... + */ + for (i = 0; i < RCL_NR; ++i) { + LASSERT(fmt->rf_fields[i].nr >= old->rf_fields[i].nr); + for (j = 0; j < old->rf_fields[i].nr - 1; ++j) { + LASSERT(FMT_FIELD(fmt, i, j) == FMT_FIELD(old, i, j)); + } + /* + * Last field in old format can be shorter than in new. + */ + LASSERT(FMT_FIELD(fmt, i, j)->rmf_size >= + FMT_FIELD(old, i, j)->rmf_size); + } + /* last field should be returned to the unswabbed state */ + pill->rc_swabbed &= ~(__u32)(1 << j); + pill->rc_fmt = fmt; +} +EXPORT_SYMBOL(req_capsule_extend); + +int req_capsule_has_field(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); + + return field->rmf_offset[pill->rc_fmt->rf_idx][loc]; +} +EXPORT_SYMBOL(req_capsule_has_field); + +int req_capsule_field_present(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + int offset; + + LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); + LASSERT(req_capsule_has_field(pill, field, loc)); + + offset = __req_capsule_offset(pill, field, loc); + return lustre_msg_bufcount(__req_msg(pill, loc)) > offset; +} +EXPORT_SYMBOL(req_capsule_field_present); + + +/* + * Shrink the specified reply message buffer @field to a specified @newlen. + * If @move_data is non-zero, then move following buffer backward + * if @newlen is zero; + * The internal offset should be adjusted by @adjust because buffer maybe has + * been moved by previous call. (@adjust >= 0) is a must. + * Return value: 1 if buffer has been moved, otherwise 0 is returned. + */ +int req_capsule_shrink(const struct req_capsule *pill, + const struct req_msg_field *field, + const unsigned int newlen, + const int adjust, + const int move_data) +{ + int offset; + + LASSERT(adjust >= 0); + LASSERT(req_capsule_has_field(pill, field, RCL_SERVER)); + + offset = __req_capsule_offset(pill, field, RCL_SERVER); + offset -= adjust; + LASSERT(offset >= 1); + + lustre_shrink_reply(pill->rc_req, offset, newlen, move_data); + return (newlen == 0) ? 1 : 0; +} +EXPORT_SYMBOL(req_capsule_shrink); + +/* __REQ_LAYOUT_USER__ */ +#endif diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index d4bd482..a45208b3 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -223,7 +223,7 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) ptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (body)); memcpy(ptr, body, sizeof(*body)); - ptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF+1, LLOG_CHUNK_SIZE); + ptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, LLOG_CHUNK_SIZE); memcpy(ptr, buf, LLOG_CHUNK_SIZE); out_close: diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 023bd07..b423fa3 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -24,6 +24,10 @@ */ #define DEBUG_SUBSYSTEM S_CLASS +#ifndef __KERNEL__ +# include <liblustre.h> +#endif + #include <obd_support.h> #include <obd.h> #include <lprocfs_status.h> @@ -51,8 +55,8 @@ struct ll_rpc_opcode { { OST_OPEN, "ost_open" }, { OST_CLOSE, "ost_close" }, { OST_STATFS, "ost_statfs" }, - { 14, NULL }, - { 15, NULL }, + { 14, "ost_san_read" }, + { 15, "ost_san_write" }, { OST_SYNC, "ost_sync" }, { OST_SET_INFO, "ost_set_info" }, { OST_QUOTACHECK, "ost_quotacheck" }, @@ -75,14 +79,36 @@ struct ll_rpc_opcode { { MDS_QUOTACTL, "mds_quotactl" }, { MDS_GETXATTR, "mds_getxattr" }, { MDS_SETXATTR, "mds_setxattr" }, + { MDS_WRITEPAGE, "mds_writepage" }, + { MDS_IS_SUBDIR, "mds_is_subdir" }, { LDLM_ENQUEUE, "ldlm_enqueue" }, { LDLM_CONVERT, "ldlm_convert" }, { LDLM_CANCEL, "ldlm_cancel" }, { LDLM_BL_CALLBACK, "ldlm_bl_callback" }, { LDLM_CP_CALLBACK, "ldlm_cp_callback" }, { LDLM_GL_CALLBACK, "ldlm_gl_callback" }, + { MGS_CONNECT, "mgs_connect" }, + { MGS_DISCONNECT, "mgs_disconnect" }, + { MGS_EXCEPTION, "mgs_exception" }, + { MGS_TARGET_REG, "mgs_target_reg" }, + { MGS_TARGET_DEL, "mgs_target_del" }, { OBD_PING, "obd_ping" }, - { OBD_LOG_CANCEL, "llog_origin_handle_cancel"}, + { OBD_LOG_CANCEL, "llog_origin_handle_cancel" }, + { OBD_QC_CALLBACK, "obd_qc_callback" }, + { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_create" }, + { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" }, + { LLOG_ORIGIN_HANDLE_READ_HEADER, "llog_origin_handle_read_header" }, + { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" }, + { LLOG_ORIGIN_HANDLE_CLOSE, "llog_origin_handle_close" }, + { LLOG_ORIGIN_CONNECT, "llog_origin_connect" }, + { LLOG_CATINFO, "llog_catinfo" }, + { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" }, + { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" }, + { FLD_QUERY, "fld_query" }, + { SEQ_QUERY, "seq_query" }, + { SEC_CTX_INIT, "sec_ctx_init" }, + { SEC_CTX_INIT_CONT,"sec_ctx_init_cont" }, + { SEC_CTX_FINI, "sec_ctx_fini" } }; const char* ll_opcode2str(__u32 opcode) @@ -195,7 +221,7 @@ ptlrpc_lprocfs_write_req_history_max(struct file *file, const char *buffer, * hose a kernel by allowing the request history to grow too * far. */ bufpages = (svc->srv_buf_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; - if (val > num_physpages/(2*bufpages)) + if (val > num_physpages/(2 * bufpages)) return -ERANGE; spin_lock(&svc->srv_lock); @@ -327,8 +353,8 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) * parser. Currently I only print stuff here I know is OK * to look at coz it was set up in request_in_callback()!!! */ seq_printf(s, LPD64":%s:%s:"LPD64":%d:%s ", - req->rq_history_seq, libcfs_nid2str(req->rq_self), - libcfs_id2str(req->rq_peer), req->rq_xid, + req->rq_history_seq, libcfs_nid2str(req->rq_self), + libcfs_id2str(req->rq_peer), req->rq_xid, req->rq_reqlen,ptlrpc_rqphase2str(req)); if (svc->srv_request_history_print_fn == NULL) @@ -441,9 +467,10 @@ EXPORT_SYMBOL(ptlrpc_lprocfs_brw); void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) { - if (svc->srv_procroot != NULL) + if (svc->srv_procroot != NULL) lprocfs_remove(&svc->srv_procroot); - if (svc->srv_stats) + + if (svc->srv_stats) lprocfs_free_stats(&svc->srv_stats); } @@ -451,6 +478,7 @@ void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) { if (obd->obd_svc_procroot) lprocfs_remove(&obd->obd_svc_procroot); + if (obd->obd_svc_stats) lprocfs_free_stats(&obd->obd_svc_stats); } @@ -472,7 +500,7 @@ int lprocfs_wr_evict_client(struct file *file, const char *buffer, * - jay, jxiong@clusterfs.com */ class_incref(obd); LPROCFS_EXIT(); - + sscanf(buffer, "%40s", tmpbuf); obd_export_evict_by_uuid(obd, tmpbuf); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 060fb8e..fb1bb1d 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -51,15 +51,15 @@ static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len, md.user_ptr = cbid; md.eq_handle = ptlrpc_eq_h; - if (ack == LNET_ACK_REQ && - OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_ACK | OBD_FAIL_ONCE)) { + if (unlikely(ack == LNET_ACK_REQ && + OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_ACK | OBD_FAIL_ONCE))) { /* don't ask for the ack to simulate failing client */ ack = LNET_NOACK_REQ; obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED; } rc = LNetMDBind (md, LNET_UNLINK, mdh); - if (rc != 0) { + if (unlikely(rc != 0)) { CERROR ("LNetMDBind failed: %d\n", rc); LASSERT (rc == -ENOMEM); RETURN (-ENOMEM); @@ -68,9 +68,9 @@ static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len, CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64"\n", len, portal, xid); - rc = LNetPut (conn->c_self, *mdh, ack, + rc = LNetPut (conn->c_self, *mdh, ack, conn->c_peer, portal, xid, 0, 0); - if (rc != 0) { + if (unlikely(rc != 0)) { int rc2; /* We're going to get an UNLINK event when I unlink below, * which will complete just like any other failed send, so @@ -93,7 +93,7 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) __u64 xid; ENTRY; - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_PUT_NET)) + if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_PUT_NET)) RETURN(0); /* NB no locking required until desc is on the network */ @@ -101,7 +101,6 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) LASSERT (desc->bd_type == BULK_PUT_SOURCE || desc->bd_type == BULK_GET_SINK); desc->bd_success = 0; - desc->bd_sender = LNET_NID_ANY; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; @@ -126,17 +125,17 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) xid = desc->bd_req->rq_xid; CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d " "id %s xid "LPX64"\n", desc->bd_iov_count, - desc->bd_nob, desc->bd_portal, + desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer), xid); /* Network is about to get at the memory */ desc->bd_network_rw = 1; if (desc->bd_type == BULK_PUT_SOURCE) - rc = LNetPut (conn->c_self, desc->bd_md_h, LNET_ACK_REQ, + rc = LNetPut (conn->c_self, desc->bd_md_h, LNET_ACK_REQ, conn->c_peer, desc->bd_portal, xid, 0, 0); else - rc = LNetGet (conn->c_self, desc->bd_md_h, + rc = LNetGet (conn->c_self, desc->bd_md_h, conn->c_peer, desc->bd_portal, xid, 0); if (rc != 0) { @@ -163,7 +162,7 @@ void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc) if (!ptlrpc_bulk_active(desc)) /* completed or */ return; /* never started */ - + /* Do not send any meaningful data over the wire for evicted clients */ if (desc->bd_export && desc->bd_export->exp_failed) ptl_rpc_wipe_bulk_pages(desc); @@ -179,7 +178,7 @@ void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc) /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ lwi = LWI_TIMEOUT (cfs_time_seconds(300), NULL, NULL); - rc = l_wait_event(desc->bd_waitq, + rc = l_wait_event(desc->bd_waitq, !ptlrpc_bulk_active(desc), &lwi); if (rc == 0) return; @@ -199,7 +198,7 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) lnet_md_t md; ENTRY; - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_GET_NET)) + if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_GET_NET)) RETURN(0); /* NB no locking required until desc is on the network */ @@ -211,15 +210,14 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) desc->bd_type == BULK_GET_SOURCE); desc->bd_success = 0; - desc->bd_sender = LNET_NID_ANY; peer = desc->bd_import->imp_connection->c_peer; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ - md.options = PTLRPC_MD_OPTIONS | - ((desc->bd_type == BULK_GET_SOURCE) ? + md.options = PTLRPC_MD_OPTIONS | + ((desc->bd_type == BULK_GET_SOURCE) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc); @@ -285,7 +283,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req) * a chance to run client_bulk_callback() */ LNetMDUnlink (desc->bd_md_h); - + if (req->rq_set != NULL) wq = &req->rq_set->set_waitq; else @@ -300,7 +298,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req) return; LASSERT (rc == -ETIMEDOUT); - DEBUG_REQ(D_WARNING,req,"Unexpectedly long timeout: desc %p\n", + DEBUG_REQ(D_WARNING,req,"Unexpectedly long timeout: desc %p", desc); } } @@ -313,19 +311,22 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) int rc; /* We must already have a reply buffer (only ptlrpc_error() may be - * called without one). We must also have a request buffer which - * is either the actual (swabbed) incoming request, or a saved copy - * if this is a req saved in target_queue_final_reply(). */ - LASSERT (req->rq_reqmsg != NULL); - LASSERT (req->rq_repmsg != NULL); + * called without one). The reply generated by security layer (e.g. + * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must + * have a request buffer which is either the actual (swabbed) incoming + * request, or a saved copy if this is a req saved in + * target_queue_final_reply(). + */ + LASSERT (req->rq_reqbuf != NULL); LASSERT (rs != NULL); - LASSERT (req->rq_repmsg == rs->rs_msg); LASSERT (may_be_difficult || !rs->rs_difficult); + LASSERT (req->rq_repmsg != NULL); + LASSERT (req->rq_repmsg == rs->rs_msg); LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback); LASSERT (rs->rs_cb_id.cbid_arg == rs); - if (req->rq_export && req->rq_export->exp_obd && - req->rq_export->exp_obd->obd_fail) { + if (unlikely(req->rq_export && req->rq_export->exp_obd && + req->rq_export->exp_obd->obd_fail)) { /* Failed obd's only send ENODEV */ req->rq_type = PTL_RPC_MSG_ERR; req->rq_status = -ENODEV; @@ -338,25 +339,31 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) lustre_msg_set_type(req->rq_repmsg, req->rq_type); lustre_msg_set_status(req->rq_repmsg, req->rq_status); - lustre_msg_set_opc(req->rq_repmsg, lustre_msg_get_opc(req->rq_reqmsg)); + lustre_msg_set_opc(req->rq_repmsg, + req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0); if (req->rq_export == NULL || req->rq_export->exp_connection == NULL) conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL); else conn = ptlrpc_connection_addref(req->rq_export->exp_connection); - if (conn == NULL) { + if (unlikely(conn == NULL)) { CERROR("not replying on NULL connection\n"); /* bug 9635 */ return -ENOTCONN; } atomic_inc (&svc->srv_outstanding_replies); ptlrpc_rs_addref(rs); /* +1 ref for the network */ - rc = ptl_send_buf (&rs->rs_md_h, req->rq_repmsg, req->rq_replen, + rc = sptlrpc_svc_wrap_reply(req); + if (unlikely(rc)) + goto out; + + rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len, rs->rs_difficult ? LNET_ACK_REQ : LNET_NOACK_REQ, &rs->rs_cb_id, conn, svc->srv_rep_portal, req->rq_xid); - if (rc != 0) { +out: + if (unlikely(rc != 0)) { atomic_dec (&svc->srv_outstanding_replies); ptlrpc_rs_decref(rs); } @@ -395,7 +402,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) lnet_md_t reply_md; ENTRY; - OBD_FAIL_RETURN(OBD_FAIL_PTLRPC_DROP_RPC, 0); + OBD_FAIL_RETURN(OBD_FAIL_PTLRPC_DROP_RPC, 0); LASSERT (request->rq_type == PTL_RPC_MSG_REQUEST); @@ -411,14 +418,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) request->rq_err = 1; RETURN(-ENODEV); } - - connection = request->rq_import->imp_connection; - if (request->rq_bulk != NULL) { - rc = ptlrpc_register_bulk (request); - if (rc != 0) - RETURN(rc); - } + connection = request->rq_import->imp_connection; lustre_msg_set_handle(request->rq_reqmsg, &request->rq_import->imp_remote_handle); @@ -426,12 +427,25 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) lustre_msg_set_conn_cnt(request->rq_reqmsg, request->rq_import->imp_conn_cnt); + rc = sptlrpc_cli_wrap_request(request); + if (rc) + RETURN(rc); + + /* bulk register should be done after wrap_request() */ + if (request->rq_bulk != NULL) { + rc = ptlrpc_register_bulk (request); + if (rc != 0) + RETURN(rc); + } + if (!noreply) { LASSERT (request->rq_replen != 0); - if (request->rq_repmsg == NULL) - OBD_ALLOC(request->rq_repmsg, request->rq_replen); - if (request->rq_repmsg == NULL) - GOTO(cleanup_bulk, rc = -ENOMEM); + if (request->rq_repbuf == NULL) { + rc = sptlrpc_cli_alloc_repbuf(request, + request->rq_replen); + if (rc) + GOTO(cleanup_bulk, rc); + } rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ connection->c_peer, request->rq_xid, 0, @@ -439,7 +453,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); - GOTO(cleanup_repmsg, rc = -ENOMEM); + GOTO(cleanup_bulk, rc = -ENOMEM); } } @@ -456,14 +470,14 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) spin_unlock(&request->rq_lock); if (!noreply) { - reply_md.start = request->rq_repmsg; - reply_md.length = request->rq_replen; + reply_md.start = request->rq_repbuf; + reply_md.length = request->rq_repbuf_len; reply_md.threshold = 1; reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT; reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; - rc = LNetMDAttach(reply_me_h, reply_md, LNET_UNLINK, + rc = LNetMDAttach(reply_me_h, reply_md, LNET_UNLINK, &request->rq_reply_md_h); if (rc != 0) { CERROR("LNetMDAttach failed: %d\n", rc); @@ -477,7 +491,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 ", portal %u\n", - request->rq_replen, request->rq_xid, + request->rq_repbuf_len, request->rq_xid, request->rq_reply_portal); } @@ -490,7 +504,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) request->rq_sent = CURRENT_SECONDS; ptlrpc_pinger_sending_on_import(request->rq_import); rc = ptl_send_buf(&request->rq_req_md_h, - request->rq_reqmsg, request->rq_reqlen, + request->rq_reqbuf, request->rq_reqdata_len, LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, @@ -517,10 +531,6 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) /* UNLINKED callback called synchronously */ LASSERT (!request->rq_receiving_reply); - cleanup_repmsg: - OBD_FREE(request->rq_repmsg, request->rq_replen); - request->rq_repmsg = NULL; - cleanup_bulk: if (request->rq_bulk != NULL) ptlrpc_unregister_bulk(request); @@ -559,7 +569,7 @@ int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd) md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE; md.user_ptr = &rqbd->rqbd_cbid; md.eq_handle = ptlrpc_eq_h; - + rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h); if (rc == 0) return (0); @@ -569,6 +579,6 @@ int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd) rc = LNetMEUnlink (me_h); LASSERT (rc == 0); rqbd->rqbd_refcount = 0; - + return (-ENOMEM); } diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 0279dcf..ab389d0 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -78,8 +78,8 @@ int lustre_msg_check_version(struct lustre_msg *msg, __u32 version) case LUSTRE_MSG_MAGIC_V2_SWABBED: return lustre_msg_check_version_v2(msg, version); default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - return -EINVAL; + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; } } @@ -96,7 +96,7 @@ static inline int lustre_msg_size_v1(int count, int *lengths) return size; } -static inline int lustre_msg_size_v2(int count, int *lengths) +int lustre_msg_size_v2(int count, int *lengths) { int size; int i; @@ -107,6 +107,7 @@ static inline int lustre_msg_size_v2(int count, int *lengths) return size; } +EXPORT_SYMBOL(lustre_msg_size_v2); /* This returns the size of the buffer that is required to hold a lustre_msg * with the given sub-buffer lengths. */ @@ -128,13 +129,13 @@ int lustre_msg_size(__u32 magic, int count, int *lens) case LUSTRE_MSG_MAGIC_V2: return lustre_msg_size_v2(count, lens); default: - CERROR("incorrect message magic: %08x\n", magic); - return 0; + LASSERTF(0, "incorrect message magic: %08x\n", magic); + return -EINVAL; } } -static void -lustre_init_msg_v1(void *m, int count, int *lens, char **bufs) +static +void lustre_init_msg_v1(void *m, int count, int *lens, char **bufs) { struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)m; char *ptr; @@ -158,8 +159,8 @@ lustre_init_msg_v1(void *m, int count, int *lens, char **bufs) } } -static void -lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, char **bufs) +void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, + char **bufs) { char *ptr; int i; @@ -180,32 +181,18 @@ lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, char **bufs) LOGL(tmp, lens[i], ptr); } } +EXPORT_SYMBOL(lustre_init_msg_v2); static int lustre_pack_request_v1(struct ptlrpc_request *req, int count, int *lens, char **bufs) { - int reqlen; + int reqlen, rc; reqlen = lustre_msg_size_v1(count, lens); - /* See if we got it from prealloc pool */ - if (req->rq_reqmsg) { - /* Cannot return error here, that would create - infinite loop in ptlrpc_prep_req_pool */ - /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen - to maximum size that would fit into this preallocated - request */ - LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, " - "reqlen %d\n",req->rq_reqlen, - reqlen); - memset(req->rq_reqmsg, 0, reqlen); - } else { - OBD_ALLOC(req->rq_reqmsg, reqlen); - if (req->rq_reqmsg == NULL) { - CERROR("alloc reqmsg (len %d) failed\n", reqlen); - return -ENOMEM; - } - } + rc = sptlrpc_cli_alloc_reqbuf(req, reqlen); + if (rc) + return rc; req->rq_reqlen = reqlen; @@ -216,28 +203,13 @@ static int lustre_pack_request_v1(struct ptlrpc_request *req, static int lustre_pack_request_v2(struct ptlrpc_request *req, int count, int *lens, char **bufs) { - int reqlen; + int reqlen, rc; reqlen = lustre_msg_size_v2(count, lens); - /* See if we got it from prealloc pool */ - if (req->rq_reqmsg) { - /* Cannot return error here, that would create - infinite loop in ptlrpc_prep_req_pool */ - /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen - to maximum size that would fit into this preallocated - request */ - LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, " - "reqlen %d\n",req->rq_reqlen, - reqlen); - memset(req->rq_reqmsg, 0, reqlen); - } else { - OBD_ALLOC(req->rq_reqmsg, reqlen); - if (req->rq_reqmsg == NULL) { - CERROR("alloc reqmsg (len %d) failed\n", reqlen); - return -ENOMEM; - } - } + rc = sptlrpc_cli_alloc_reqbuf(req, reqlen); + if (rc) + return rc; req->rq_reqlen = reqlen; @@ -260,6 +232,13 @@ int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count, LASSERT(count > 0); LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body)); + /* if we choose policy other than null, we have also choosed + * to use new message format. + */ + if (magic == LUSTRE_MSG_MAGIC_V1 && + req->rq_sec_flavor != SPTLRPC_FLVR_NULL) + magic = LUSTRE_MSG_MAGIC_V2; + switch (magic) { case LUSTRE_MSG_MAGIC_V1: return lustre_pack_request_v1(req, count - 1, lens + 1, @@ -294,8 +273,7 @@ do { \ # define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while(0) #endif -static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc, - int size) +struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc) { struct ptlrpc_reply_state *rs = NULL; @@ -314,46 +292,52 @@ static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc goto out; spin_lock(&svc->srv_lock); } - + rs = list_entry(svc->srv_free_rs_list.next, struct ptlrpc_reply_state, rs_list); list_del(&rs->rs_list); spin_unlock(&svc->srv_lock); LASSERT(rs); - LASSERTF(svc->srv_max_reply_size > size, "Want %d, prealloc %d\n", size, - svc->srv_max_reply_size); - memset(rs, 0, size); + memset(rs, 0, svc->srv_max_reply_size); + rs->rs_service = svc; rs->rs_prealloc = 1; out: return rs; } +void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_service *svc = rs->rs_service; + + LASSERT(svc); + + spin_lock(&svc->srv_lock); + list_add(&rs->rs_list, &svc->srv_free_rs_list); + spin_unlock(&svc->srv_lock); + cfs_waitq_signal(&svc->srv_free_rs_waitq); +} + static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count, int *lens, char **bufs) { struct ptlrpc_reply_state *rs; - int msg_len; - int size; + int msg_len, rc; ENTRY; LASSERT (req->rq_reply_state == NULL); msg_len = lustre_msg_size_v1(count, lens); - size = sizeof(struct ptlrpc_reply_state) + msg_len; - OBD_ALLOC(rs, size); - if (unlikely(rs == NULL)) { - rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size); - if (!rs) - RETURN (-ENOMEM); - } + rc = sptlrpc_svc_alloc_rs(req, msg_len); + if (rc) + RETURN(rc); + + rs = req->rq_reply_state; atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */ rs->rs_cb_id.cbid_fn = reply_out_callback; rs->rs_cb_id.cbid_arg = rs; rs->rs_service = req->rq_rqbd->rqbd_service; - rs->rs_size = size; CFS_INIT_LIST_HEAD(&rs->rs_exp_list); CFS_INIT_LIST_HEAD(&rs->rs_obd_list); - rs->rs_msg = (struct lustre_msg *)(rs + 1); req->rq_replen = msg_len; req->rq_reply_state = rs; @@ -365,32 +349,27 @@ static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count, RETURN (0); } -static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, - int *lens, char **bufs) +int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, + int *lens, char **bufs) { struct ptlrpc_reply_state *rs; - int msg_len; - int size; + int msg_len, rc; ENTRY; LASSERT(req->rq_reply_state == NULL); msg_len = lustre_msg_size_v2(count, lens); - size = sizeof(struct ptlrpc_reply_state) + msg_len; - OBD_ALLOC(rs, size); - if (unlikely(rs == NULL)) { - rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size); - if (!rs) - RETURN (-ENOMEM); - } + rc = sptlrpc_svc_alloc_rs(req, msg_len); + if (rc) + RETURN(rc); + + rs = req->rq_reply_state; atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */ rs->rs_cb_id.cbid_fn = reply_out_callback; rs->rs_cb_id.cbid_arg = rs; rs->rs_service = req->rq_rqbd->rqbd_service; - rs->rs_size = size; CFS_INIT_LIST_HEAD(&rs->rs_exp_list); CFS_INIT_LIST_HEAD(&rs->rs_obd_list); - rs->rs_msg = (struct lustre_msg *)(rs + 1); req->rq_replen = msg_len; req->rq_reply_state = rs; @@ -403,6 +382,7 @@ static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, RETURN(0); } +EXPORT_SYMBOL(lustre_pack_reply_v2); int lustre_pack_reply(struct ptlrpc_request *req, int count, int *lens, char **bufs) @@ -441,14 +421,14 @@ void *lustre_msg_buf_v1(void *msg, int n, int min_size) LASSERT(n >= 0); bufcount = m->lm_bufcount; - if (n >= bufcount) { + if (unlikely(n >= bufcount)) { CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n", m, n, bufcount); return NULL; } buflen = m->lm_buflens[n]; - if (buflen < min_size) { + if (unlikely(buflen < min_size)) { CERROR("msg %p buffer[%d] size %d too small (required %d)\n", m, n, buflen, min_size); LBUG(); @@ -470,14 +450,14 @@ void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size) LASSERT(n >= 0); bufcount = m->lm_bufcount; - if (n >= bufcount) { + if (unlikely(n >= bufcount)) { CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n", m, n, bufcount); return NULL; } buflen = m->lm_buflens[n]; - if (buflen < min_size) { + if (unlikely(buflen < min_size)) { CERROR("msg %p buffer[%d] size %d too small (required %d)\n", m, n, buflen, min_size); return NULL; @@ -500,26 +480,24 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size) case LUSTRE_MSG_MAGIC_V2_SWABBED: return lustre_msg_buf_v2(m, n, min_size); default: - LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic); + LASSERTF(0, "incorrect message magic: %08x(msg:%p)\n", m->lm_magic, m); return NULL; } } -void lustre_shrink_reply_v1(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) +int lustre_shrink_msg_v1(struct lustre_msg_v1 *msg, int segment, + unsigned int newlen, int move_data) { - struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)req->rq_repmsg; - char *tail = NULL, *newpos; - int tail_len = 0, n; + char *tail = NULL, *newpos; + int tail_len = 0, n; - LASSERT(req->rq_reply_state); LASSERT(msg); LASSERT(segment >= 0); LASSERT(msg->lm_bufcount > segment); LASSERT(msg->lm_buflens[segment] >= newlen); if (msg->lm_buflens[segment] == newlen) - return; + goto out; if (move_data && msg->lm_bufcount > segment + 1) { tail = lustre_msg_buf_v1(msg, segment + 1, 0); @@ -542,23 +520,22 @@ void lustre_shrink_reply_v1(struct ptlrpc_request *req, int segment, msg->lm_buflens[msg->lm_bufcount - 1] = 0; } - req->rq_replen = lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens); +out: + return lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens); } -void lustre_shrink_reply_v2(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) +int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment, + unsigned int newlen, int move_data) { - struct lustre_msg_v2 *msg = req->rq_repmsg; - char *tail = NULL, *newpos; - int tail_len = 0, n; + char *tail = NULL, *newpos; + int tail_len = 0, n; - LASSERT(req->rq_reply_state); LASSERT(msg); LASSERT(msg->lm_bufcount > segment); LASSERT(msg->lm_buflens[segment] >= newlen); if (msg->lm_buflens[segment] == newlen) - return; + goto out; if (move_data && msg->lm_bufcount > segment + 1) { tail = lustre_msg_buf_v2(msg, segment + 1, 0); @@ -581,36 +558,37 @@ void lustre_shrink_reply_v2(struct ptlrpc_request *req, int segment, msg->lm_buflens[msg->lm_bufcount - 1] = 0; } - req->rq_replen = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); +out: + return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); } /* - * shrink @segment to size @newlen. if @move_data is non-zero, we also move - * data forward from @segment + 1. - * + * for @msg, shrink @segment to size @newlen. if @move_data is non-zero, + * we also move data forward from @segment + 1. + * * if @newlen == 0, we remove the segment completely, but we still keep the * totally bufcount the same to save possible data moving. this will leave a * unused segment with size 0 at the tail, but that's ok. * + * return new msg size after shrinking. + * * CAUTION: * + if any buffers higher than @segment has been filled in, must call shrink * with non-zero @move_data. * + caller should NOT keep pointers to msg buffers which higher than @segment * after call shrink. */ -void lustre_shrink_reply(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) +int lustre_shrink_msg(struct lustre_msg *msg, int segment, + unsigned int newlen, int move_data) { - switch (req->rq_repmsg->lm_magic) { + switch (msg->lm_magic) { case LUSTRE_MSG_MAGIC_V1: - lustre_shrink_reply_v1(req, segment - 1, newlen, move_data); - return; + return lustre_shrink_msg_v1((struct lustre_msg_v1 *) msg, + segment - 1, newlen, move_data); case LUSTRE_MSG_MAGIC_V2: - lustre_shrink_reply_v2(req, segment, newlen, move_data); - return; + return lustre_shrink_msg_v2(msg, segment, newlen, move_data); default: - LASSERTF(0, "incorrect message magic: %08x\n", - req->rq_repmsg->lm_magic); + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); } } @@ -627,17 +605,7 @@ void lustre_free_reply_state(struct ptlrpc_reply_state *rs) LASSERT (list_empty(&rs->rs_exp_list)); LASSERT (list_empty(&rs->rs_obd_list)); - if (unlikely(rs->rs_prealloc)) { - struct ptlrpc_service *svc = rs->rs_service; - - spin_lock(&svc->srv_lock); - list_add(&rs->rs_list, - &svc->srv_free_rs_list); - spin_unlock(&svc->srv_lock); - cfs_waitq_signal(&svc->srv_free_rs_waitq); - } else { - OBD_FREE(rs, rs->rs_size); - } + sptlrpc_svc_free_rs(rs); } int lustre_unpack_msg_v1(void *msg, int len) @@ -708,7 +676,7 @@ static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len) if (len < required_len) { /* can't even look inside the message */ CERROR("message length %d too small for lustre_msg\n", len); - RETURN(-EINVAL); + return -EINVAL; } flipped = lustre_msg_swabbed(m); @@ -730,7 +698,7 @@ static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len) len, m->lm_bufcount); return -EINVAL; } - + for (i = 0; i < m->lm_bufcount; i++) { if (flipped) __swab32s(&m->lm_buflens[i]); @@ -1285,7 +1253,7 @@ __u32 lustre_msg_get_opc(struct lustre_msg *msg) return pb->pb_opc; } default: - CERROR( "incorrect message magic: %08x\n", msg->lm_magic); + CERROR("incorrect message magic: %08x(msg:%p)\n", msg->lm_magic, msg); return 0; } } @@ -1600,15 +1568,16 @@ void lustre_swab_connect(struct obd_connect_data *ocd) __swab64s(&ocd->ocd_connect_flags); __swab32s(&ocd->ocd_version); __swab32s(&ocd->ocd_grant); + __swab64s(&ocd->ocd_ibits_known); __swab32s(&ocd->ocd_index); __swab32s(&ocd->ocd_brw_size); - __swab64s(&ocd->ocd_ibits_known); __swab32s(&ocd->ocd_nllu); __swab32s(&ocd->ocd_nllg); + __swab64s(&ocd->ocd_transno); + __swab32s(&ocd->ocd_group); CLASSERT(offsetof(typeof(*ocd), padding1) != 0); CLASSERT(offsetof(typeof(*ocd), padding2) != 0); CLASSERT(offsetof(typeof(*ocd), padding3) != 0); - CLASSERT(offsetof(typeof(*ocd), padding4) != 0); } void lustre_swab_obdo (struct obdo *o) @@ -1679,6 +1648,11 @@ void lustre_swab_ost_last_id(obd_id *id) __swab64s(id); } +void lustre_swab_generic_32s(__u32 *val) +{ + __swab32s(val); +} + void lustre_swab_ost_lvb(struct ost_lvb *lvb) { __swab64s(&lvb->lvb_size); @@ -1725,6 +1699,42 @@ void lustre_swab_mds_body (struct mds_body *b) __swab32s (&b->padding_4); } +void lustre_swab_mdt_body (struct mdt_body *b) +{ + lustre_swab_lu_fid (&b->fid1); + lustre_swab_lu_fid (&b->fid2); + /* handle is opaque */ + __swab64s (&b->valid); + __swab64s (&b->size); + __swab64s (&b->mtime); + __swab64s (&b->atime); + __swab64s (&b->ctime); + __swab64s (&b->blocks); + __swab64s (&b->ioepoch); + __swab32s (&b->fsuid); + __swab32s (&b->fsgid); + __swab32s (&b->capability); + __swab32s (&b->mode); + __swab32s (&b->uid); + __swab32s (&b->gid); + __swab32s (&b->flags); + __swab32s (&b->rdev); + __swab32s (&b->nlink); + __swab32s (&b->suppgid); + __swab32s (&b->eadatasize); + __swab32s (&b->aclsize); + __swab32s (&b->max_mdsize); + __swab32s (&b->max_cookiesize); +} + +void lustre_swab_mdt_epoch (struct mdt_epoch *b) +{ + /* handle is opaque */ + __swab64s (&b->ioepoch); + __swab32s (&b->flags); + CLASSERT(offsetof(typeof(*b), padding) != 0); +} + void lustre_swab_mgs_target_info(struct mgs_target_info *mti) { int i; @@ -1734,7 +1744,7 @@ void lustre_swab_mgs_target_info(struct mgs_target_info *mti) __swab32s(&mti->mti_flags); __swab32s(&mti->mti_nid_count); CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64)); - for (i = 0; i < MTI_NIDS_MAX; i++) + for (i = 0; i < MTI_NIDS_MAX; i++) __swab64s(&mti->mti_nids[i]); } @@ -1770,6 +1780,24 @@ void lustre_swab_obd_quotactl (struct obd_quotactl *q) lustre_swab_obd_dqblk (&q->qc_dqblk); } +void lustre_swab_mds_remote_perm (struct mds_remote_perm *p) +{ + __swab32s (&p->rp_uid); + __swab32s (&p->rp_gid); + __swab32s (&p->rp_fsuid); + __swab32s (&p->rp_fsgid); + __swab32s (&p->rp_access_perm); +}; + +void lustre_swab_mdt_remote_perm (struct mdt_remote_perm *p) +{ + __swab32s (&p->rp_uid); + __swab32s (&p->rp_gid); + __swab32s (&p->rp_fsuid); + __swab32s (&p->rp_fsgid); + __swab32s (&p->rp_access_perm); +}; + void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa) { __swab32s (&sa->sa_opcode); @@ -1790,12 +1818,39 @@ void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa) CLASSERT(offsetof(typeof(*sa), sa_padding) != 0); } +void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa) +{ + __swab32s (&sa->sa_opcode); + __swab32s (&sa->sa_fsuid); + __swab32s (&sa->sa_fsgid); + __swab32s (&sa->sa_cap); + __swab32s (&sa->sa_suppgid); + __swab32s (&sa->sa_mode); + lustre_swab_lu_fid (&sa->sa_fid); + __swab64s (&sa->sa_valid); + __swab64s (&sa->sa_size); + __swab64s (&sa->sa_blocks); + __swab64s (&sa->sa_mtime); + __swab64s (&sa->sa_atime); + __swab64s (&sa->sa_ctime); + __swab32s (&sa->sa_uid); + __swab32s (&sa->sa_gid); + __swab32s (&sa->sa_attr_flags); + CLASSERT(offsetof(typeof(*sa), sa_padding) != 0); +} + void lustre_swab_mds_rec_join (struct mds_rec_join *jr) { __swab64s(&jr->jr_headsize); lustre_swab_ll_fid(&jr->jr_fid); } +void lustre_swab_mdt_rec_join (struct mdt_rec_join *jr) +{ + __swab64s(&jr->jr_headsize); + lustre_swab_lu_fid(&jr->jr_fid); +} + void lustre_swab_mds_rec_create (struct mds_rec_create *cr) { __swab32s (&cr->cr_opcode); @@ -1816,6 +1871,26 @@ void lustre_swab_mds_rec_create (struct mds_rec_create *cr) CLASSERT(offsetof(typeof(*cr), cr_padding_5) != 0); } +void lustre_swab_mdt_rec_create (struct mdt_rec_create *cr) +{ + __swab32s (&cr->cr_opcode); + __swab32s (&cr->cr_fsuid); + __swab32s (&cr->cr_fsgid); + __swab32s (&cr->cr_cap); + __swab32s (&cr->cr_flags); /* for use with open */ + __swab32s (&cr->cr_mode); + /* handle is opaque */ + lustre_swab_lu_fid (&cr->cr_fid1); + lustre_swab_lu_fid (&cr->cr_fid2); + __swab64s (&cr->cr_time); + __swab64s (&cr->cr_rdev); + __swab64s (&cr->cr_ioepoch); + __swab32s (&cr->cr_suppgid1); + __swab32s (&cr->cr_suppgid2); + __swab32s (&cr->cr_bias); + CLASSERT(offsetof(typeof(*cr), cr_padding_1) != 0); +} + void lustre_swab_mds_rec_link (struct mds_rec_link *lk) { __swab32s (&lk->lk_opcode); @@ -1833,6 +1908,23 @@ void lustre_swab_mds_rec_link (struct mds_rec_link *lk) CLASSERT(offsetof(typeof(*lk), lk_padding_4) != 0); } +void lustre_swab_mdt_rec_link (struct mdt_rec_link *lk) +{ + __swab32s (&lk->lk_opcode); + __swab32s (&lk->lk_fsuid); + __swab32s (&lk->lk_fsgid); + __swab32s (&lk->lk_cap); + __swab32s (&lk->lk_suppgid1); + __swab32s (&lk->lk_suppgid2); + lustre_swab_lu_fid (&lk->lk_fid1); + lustre_swab_lu_fid (&lk->lk_fid2); + __swab64s (&lk->lk_time); + __swab32s (&lk->lk_bias); + CLASSERT(offsetof(typeof(*lk), lk_padding_2) != 0); + CLASSERT(offsetof(typeof(*lk), lk_padding_3) != 0); + CLASSERT(offsetof(typeof(*lk), lk_padding_4) != 0); +} + void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul) { __swab32s (&ul->ul_opcode); @@ -1850,6 +1942,23 @@ void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul) CLASSERT(offsetof(typeof(*ul), ul_padding_4) != 0); } +void lustre_swab_mdt_rec_unlink (struct mdt_rec_unlink *ul) +{ + __swab32s (&ul->ul_opcode); + __swab32s (&ul->ul_fsuid); + __swab32s (&ul->ul_fsgid); + __swab32s (&ul->ul_cap); + __swab32s (&ul->ul_suppgid); + __swab32s (&ul->ul_mode); + lustre_swab_lu_fid (&ul->ul_fid1); + lustre_swab_lu_fid (&ul->ul_fid2); + __swab64s (&ul->ul_time); + __swab32s (&ul->ul_bias); + CLASSERT(offsetof(typeof(*ul), ul_padding_2) != 0); + CLASSERT(offsetof(typeof(*ul), ul_padding_3) != 0); + CLASSERT(offsetof(typeof(*ul), ul_padding_4) != 0); +} + void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn) { __swab32s (&rn->rn_opcode); @@ -1867,6 +1976,23 @@ void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn) CLASSERT(offsetof(typeof(*rn), rn_padding_4) != 0); } +void lustre_swab_mdt_rec_rename (struct mdt_rec_rename *rn) +{ + __swab32s (&rn->rn_opcode); + __swab32s (&rn->rn_fsuid); + __swab32s (&rn->rn_fsgid); + __swab32s (&rn->rn_cap); + __swab32s (&rn->rn_suppgid1); + __swab32s (&rn->rn_suppgid2); + lustre_swab_lu_fid (&rn->rn_fid1); + lustre_swab_lu_fid (&rn->rn_fid2); + __swab64s (&rn->rn_time); + __swab32s (&rn->rn_mode); + __swab32s (&rn->rn_bias); + CLASSERT(offsetof(typeof(*rn), rn_padding_3) != 0); + CLASSERT(offsetof(typeof(*rn), rn_padding_4) != 0); +} + void lustre_swab_lov_desc (struct lov_desc *ld) { __swab32s (&ld->ld_tgt_count); @@ -1879,6 +2005,20 @@ void lustre_swab_lov_desc (struct lov_desc *ld) /* uuid endian insensitive */ } +/*begin adding MDT by huanghua@clusterfs.com*/ +void lustre_swab_lmv_desc (struct lmv_desc *ld) +{ + __swab32s (&ld->ld_tgt_count); + __swab32s (&ld->ld_active_tgt_count); + /* uuid endian insensitive */ +} +/*end adding MDT by huanghua@clusterfs.com*/ +void lustre_swab_md_fld (struct md_fld *mf) +{ + __swab64s(&mf->mf_seq); + __swab64s(&mf->mf_mds); +} + static void print_lum (struct lov_user_md *lum) { CDEBUG(D_OTHER, "lov_user_md %p:\n", lum); @@ -1967,6 +2107,30 @@ void lustre_swab_lov_user_md_objects(struct lov_user_md *lum) EXIT; } + +void lustre_swab_lov_mds_md(struct lov_mds_md *lmm) +{ + struct lov_ost_data *lod; + int i; + ENTRY; + for (i = 0; i < lmm->lmm_stripe_count; i++) { + lod = &lmm->lmm_objects[i]; + __swab64s(&lod->l_object_id); + __swab64s(&lod->l_object_gr); + __swab32s(&lod->l_ost_gen); + __swab32s(&lod->l_ost_idx); + } + __swab32s(&lmm->lmm_magic); + __swab32s(&lmm->lmm_pattern); + __swab64s(&lmm->lmm_object_id); + __swab64s(&lmm->lmm_object_gr); + __swab32s(&lmm->lmm_stripe_size); + __swab32s(&lmm->lmm_stripe_count); + + EXIT; +} + + void lustre_swab_ldlm_res_id (struct ldlm_res_id *id) { int i; @@ -2128,33 +2292,52 @@ static inline int rep_ptlrpc_body_swabbed(struct ptlrpc_request *req) void _debug_req(struct ptlrpc_request *req, __u32 mask, struct libcfs_debug_msg_data *data, const char *fmt, ... ) - { va_list args; va_start(args, fmt); libcfs_debug_vmsg2(data->msg_cdls, data->msg_subsys, mask, data->msg_file, data->msg_fn, data->msg_line, fmt, args, - " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " - REQ_FLAGS_FMT"/%x/%x rc %d/%d\n", + " req@%p x"LPD64"/t"LPD64"("LPD64") o%d->%s@%s:%d lens" + " %d/%d ref %d fl "REQ_FLAGS_FMT"/%x/%x rc %d/%d\n", req, req->rq_xid, req->rq_transno, + req->rq_reqmsg ? lustre_msg_get_transno(req->rq_reqmsg) : 0, req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1, req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : req->rq_export ? - (char*)req->rq_export->exp_client_uuid.uuid : "<?>", + (char*)req->rq_export->exp_client_uuid.uuid : "<?>", req->rq_import ? - (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : + (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : req->rq_export ? - (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>", + (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>", (req->rq_import && req->rq_import->imp_client) ? - req->rq_import->imp_client->cli_request_portal : -1, + req->rq_import->imp_client->cli_request_portal : -1, req->rq_reqlen, req->rq_replen, atomic_read(&req->rq_refcount), DEBUG_REQ_FLAGS(req), - req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0, - req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0, + req->rq_reqmsg && req_ptlrpc_body_swabbed(req) ? + lustre_msg_get_flags(req->rq_reqmsg) : -1, + req->rq_repmsg && rep_ptlrpc_body_swabbed(req) ? + lustre_msg_get_flags(req->rq_repmsg) : -1, req->rq_status, - req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0); - va_end(args); + req->rq_repmsg && rep_ptlrpc_body_swabbed(req) ? + lustre_msg_get_status(req->rq_repmsg) : -1); } - EXPORT_SYMBOL(_debug_req); + +void lustre_swab_lustre_capa(struct lustre_capa *c) +{ + lustre_swab_lu_fid(&c->lc_fid); + __swab64s (&c->lc_opc); + __swab32s (&c->lc_uid); + __swab32s (&c->lc_flags); + __swab32s (&c->lc_keyid); + __swab32s (&c->lc_timeout); + __swab64s (&c->lc_expiry); +} + +void lustre_swab_lustre_capa_key (struct lustre_capa_key *k) +{ + __swab64s (&k->lk_mdsid); + __swab32s (&k->lk_keyid); + __swab32s (&k->lk_padding); +} diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 77507ca..fd5a000 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -52,6 +52,7 @@ int ptlrpc_ping(struct obd_import *imp) obd2cli_tgt(imp->imp_obd)); req->rq_no_resend = req->rq_no_delay = 1; ptlrpc_req_set_repsize(req, 1, NULL); + req->rq_timeout = PING_INTERVAL; ptlrpcd_add_req(req); } else { CERROR("OOM trying to ping %s->%s\n", @@ -75,7 +76,113 @@ void ptlrpc_ping_import_soon(struct obd_import *imp) imp->imp_next_ping = cfs_time_current(); } +static inline int ptlrpc_next_reconnect(struct obd_import *imp) +{ + if (imp->imp_server_timeout) + return cfs_time_shift(obd_timeout / 2); + else + return cfs_time_shift(obd_timeout); +} + +static atomic_t suspend_timeouts = ATOMIC_INIT(0); +static cfs_time_t suspend_wakeup_time = 0; +static wait_queue_head_t suspend_timeouts_waitq; + +cfs_time_t ptlrpc_suspend_wakeup_time(void) +{ + return suspend_wakeup_time; +} + +void ptlrpc_deactivate_timeouts(struct obd_import *imp) +{ + /*XXX: disabled for now, will be replaced by adaptive timeouts */ +#if 0 + if (imp->imp_no_timeout) + return; + imp->imp_no_timeout = 1; + atomic_inc(&suspend_timeouts); + CDEBUG(D_HA|D_WARNING, "deactivate timeouts %u\n", atomic_read(&suspend_timeouts)); +#endif +} + +void ptlrpc_activate_timeouts(struct obd_import *imp) +{ + /*XXX: disabled for now, will be replaced by adaptive timeouts */ +#if 0 + if (!imp->imp_no_timeout) + return; + imp->imp_no_timeout = 0; + LASSERT(atomic_read(&suspend_timeouts) > 0); + if (atomic_dec_and_test(&suspend_timeouts)) { + suspend_wakeup_time = cfs_time_current(); + wake_up(&suspend_timeouts_waitq); + } + CDEBUG(D_HA|D_WARNING, "activate timeouts %u\n", atomic_read(&suspend_timeouts)); +#endif +} + +int ptlrpc_check_suspend(void) +{ + if (atomic_read(&suspend_timeouts)) + return 1; + return 0; +} + +int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req) +{ + struct l_wait_info lwi; + + if (atomic_read(&suspend_timeouts)) { + DEBUG_REQ(D_NET, req, "-- suspend %d regular timeout", + atomic_read(&suspend_timeouts)); + lwi = LWI_INTR(NULL, NULL); + l_wait_event(suspend_timeouts_waitq, + atomic_read(&suspend_timeouts) == 0, &lwi); + DEBUG_REQ(D_NET, req, "-- recharge regular timeout"); + return 1; + } + return 0; +} + #ifdef __KERNEL__ + +static void ptlrpc_pinger_process_import(struct obd_import *imp, + unsigned long this_ping) +{ + int force, level; + + spin_lock(&imp->imp_lock); + level = imp->imp_state; + force = imp->imp_force_verify; + if (force) + imp->imp_force_verify = 0; + spin_unlock(&imp->imp_lock); + + CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, + "level %s/%u force %u deactive %u pingable %u\n", + ptlrpc_import_state_name(level), level, + force, imp->imp_deactive, imp->imp_pingable); + + if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, + this_ping) && force == 0) + return; + + if (level == LUSTRE_IMP_DISCON && !imp->imp_deactive) { + /* wait at least a timeout before trying recovery again */ + imp->imp_next_ping = ptlrpc_next_reconnect(imp); + ptlrpc_initiate_recovery(imp); + } else if (level != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov || + imp->imp_deactive) { + CDEBUG(D_HA, "not pinging %s (in recovery " + " or recovery disabled: %s)\n", + obd2cli_tgt(imp->imp_obd), + ptlrpc_import_state_name(level)); + } else if (imp->imp_pingable || force) { + ptlrpc_ping(imp); + } +} + static int ptlrpc_pinger_main(void *arg) { struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; @@ -100,55 +207,11 @@ static int ptlrpc_pinger_main(void *arg) struct obd_import *imp = list_entry(iter, struct obd_import, imp_pinger_chain); - int force, level; - - spin_lock(&imp->imp_lock); - level = imp->imp_state; - force = imp->imp_force_verify; - imp->imp_force_verify = 0; - spin_unlock(&imp->imp_lock); - - CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, - "level %s/%u force %u deactive %u pingable %u\n", - ptlrpc_import_state_name(level), level, - force, imp->imp_deactive, imp->imp_pingable); - - if (force || - /* if the next ping is within, say, 5 jiffies from - now, go ahead and ping. See note below. */ - cfs_time_aftereq(this_ping, - imp->imp_next_ping - 5 * CFS_TICK)) { - if (level == LUSTRE_IMP_DISCON && - !imp->imp_deactive) { - /* wait at least a timeout before - trying recovery again. */ - imp->imp_next_ping = cfs_time_shift(obd_timeout); - ptlrpc_initiate_recovery(imp); - } else if (level != LUSTRE_IMP_FULL || - imp->imp_obd->obd_no_recov || - imp->imp_deactive) { - CDEBUG(D_HA, "not pinging %s " - "(in recovery: %s or recovery " - "disabled: %u/%u)\n", - obd2cli_tgt(imp->imp_obd), - ptlrpc_import_state_name(level), - imp->imp_deactive, - imp->imp_obd->obd_no_recov); - } else if (imp->imp_pingable || force) { - ptlrpc_ping(imp); - } - } else { - if (!imp->imp_pingable) - continue; - CDEBUG(D_INFO, - "don't need to ping %s ("CFS_TIME_T - " > "CFS_TIME_T")\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_next_ping, this_ping); - } + ptlrpc_pinger_process_import(imp, this_ping); /* obd_timeout might have changed */ - if (cfs_time_after(imp->imp_next_ping, + if (imp->imp_pingable && imp->imp_next_ping && + cfs_time_after(imp->imp_next_ping, cfs_time_add(this_ping, cfs_time_seconds(PING_INTERVAL)))) ptlrpc_update_next_ping(imp); @@ -157,7 +220,7 @@ static int ptlrpc_pinger_main(void *arg) /* Wait until the next ping time, or until we're stopped. */ time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, - cfs_time_seconds(PING_INTERVAL)), + cfs_time_seconds(PING_INTERVAL)), cfs_time_current()); /* The ping sent by ptlrpc_send_rpc may get sent out @@ -208,11 +271,12 @@ int ptlrpc_start_pinger(void) if (pinger_thread != NULL) RETURN(-EALREADY); - OBD_ALLOC(pinger_thread, sizeof(*pinger_thread)); + OBD_ALLOC_PTR(pinger_thread); if (pinger_thread == NULL) RETURN(-ENOMEM); cfs_waitq_init(&pinger_thread->t_ctl_waitq); - + cfs_waitq_init(&suspend_timeouts_waitq); + d.name = "ll_ping"; d.thread = pinger_thread; @@ -250,7 +314,7 @@ int ptlrpc_stop_pinger(void) l_wait_event(pinger_thread->t_ctl_waitq, (pinger_thread->t_flags & SVC_STOPPED), &lwi); - OBD_FREE(pinger_thread, sizeof(*pinger_thread)); + OBD_FREE_PTR(pinger_thread); pinger_thread = NULL; RETURN(rc); } @@ -269,6 +333,8 @@ int ptlrpc_pinger_add_import(struct obd_import *imp) mutex_down(&pinger_sem); CDEBUG(D_HA, "adding pingable import %s->%s\n", imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); + /* if we add to pinger we want recovery on this import */ + imp->imp_obd->obd_no_recov = 0; ptlrpc_update_next_ping(imp); /* XXX sort, blah blah */ list_add_tail(&imp->imp_pinger_chain, &pinger_imports); @@ -290,6 +356,8 @@ int ptlrpc_pinger_del_import(struct obd_import *imp) list_del_init(&imp->imp_pinger_chain); CDEBUG(D_HA, "removing pingable import %s->%s\n", imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); + /* if we remove from pinger we don't want recovery on this import */ + imp->imp_obd->obd_no_recov = 1; class_import_put(imp); mutex_up(&pinger_sem); RETURN(0); @@ -375,14 +443,19 @@ static int ping_evictor_main(void *arg) if (expire_time > exp->exp_last_request_time) { class_export_get(exp); spin_unlock(&obd->obd_dev_lock); - LCONSOLE_WARN("%s: haven't heard from client %s" + LCONSOLE_WARN("%s: haven't heard from client %s" " (at %s) in %ld seconds. I think" " it's dead, and I am evicting" - " it.\n", obd->obd_name, + " it. exp %p, cur %ld expire %ld" + " last %ld\n", + obd->obd_name, obd_uuid2str(&exp->exp_client_uuid), obd_export_nid2str(exp), (long)(CURRENT_SECONDS - - exp->exp_last_request_time)); + exp->exp_last_request_time), + exp, (long)CURRENT_SECONDS, + (long)expire_time, + (long)exp->exp_last_request_time); CDEBUG(D_HA, "Last request was at %ld\n", exp->exp_last_request_time); class_fail_export(exp); @@ -557,7 +630,7 @@ do_check_set: req = list_entry(iter, struct ptlrpc_request, rq_set_chain); - if (req->rq_replied) + if (req->rq_phase == RQ_PHASE_COMPLETE) continue; req->rq_phase = RQ_PHASE_COMPLETE; @@ -661,9 +734,9 @@ int ptlrpc_pinger_del_import(struct obd_import *imp) void ptlrpc_pinger_wake_up() { #ifdef ENABLE_PINGER - ENTRY; /* XXX force pinger to run, if needed */ struct obd_import *imp; + ENTRY; list_for_each_entry(imp, &pinger_imports, imp_pinger_chain) { CDEBUG(D_HA, "Checking that we need to do anything about import" " %s->%s\n", imp->imp_obd->obd_uuid.uuid, @@ -683,7 +756,7 @@ void ptlrpc_pinger_wake_up() obd2cli_tgt(imp->imp_obd), imp->imp_state, imp->imp_deactive); } -#endif EXIT; +#endif } #endif /* !__KERNEL__ */ diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 2f886ef..5d09f96 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -79,9 +79,54 @@ static inline int opcode_offset(__u32 opc) { return (opc - LDLM_FIRST_OPC + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); + } else if (opc < MGS_LAST_OPC) { + /* MGS Opcode */ + return (opc - MGS_FIRST_OPC + + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + + (MDS_LAST_OPC - MDS_FIRST_OPC) + + (OST_LAST_OPC - OST_FIRST_OPC)); } else if (opc < OBD_LAST_OPC) { /* OBD Ping */ return (opc - OBD_FIRST_OPC + + (MGS_LAST_OPC - MGS_FIRST_OPC) + + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + + (MDS_LAST_OPC - MDS_FIRST_OPC) + + (OST_LAST_OPC - OST_FIRST_OPC)); + } else if (opc < LLOG_LAST_OPC) { + /* LLOG opcode */ + return (opc - LLOG_FIRST_OPC + + (OBD_LAST_OPC - OBD_FIRST_OPC) + + (MGS_LAST_OPC - MGS_FIRST_OPC) + + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + + (MDS_LAST_OPC - MDS_FIRST_OPC) + + (OST_LAST_OPC - OST_FIRST_OPC)); + } else if (opc < FLD_LAST_OPC) { + /* FLD opcode */ + return (opc - FLD_FIRST_OPC + + (LLOG_LAST_OPC - LLOG_FIRST_OPC) + + (OBD_LAST_OPC - OBD_FIRST_OPC) + + (MGS_LAST_OPC - MGS_FIRST_OPC) + + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + + (MDS_LAST_OPC - MDS_FIRST_OPC) + + (OST_LAST_OPC - OST_FIRST_OPC)); + } else if (opc < SEQ_LAST_OPC) { + /* SEQ opcode */ + return (opc - SEQ_FIRST_OPC + + (FLD_LAST_OPC - FLD_FIRST_OPC) + + (LLOG_LAST_OPC - LLOG_FIRST_OPC) + + (OBD_LAST_OPC - OBD_FIRST_OPC) + + (MGS_LAST_OPC - MGS_FIRST_OPC) + + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + + (MDS_LAST_OPC - MDS_FIRST_OPC) + + (OST_LAST_OPC - OST_FIRST_OPC)); + } else if (opc < SEC_LAST_OPC) { + /* SEC opcode */ + return (opc - SEC_FIRST_OPC + + (SEQ_LAST_OPC - SEQ_FIRST_OPC) + + (FLD_LAST_OPC - FLD_FIRST_OPC) + + (LLOG_LAST_OPC - LLOG_FIRST_OPC) + + (OBD_LAST_OPC - OBD_FIRST_OPC) + + (MGS_LAST_OPC - MGS_FIRST_OPC) + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); @@ -94,8 +139,12 @@ static inline int opcode_offset(__u32 opc) { #define LUSTRE_MAX_OPCODES ((LDLM_LAST_OPC - LDLM_FIRST_OPC) + \ (MDS_LAST_OPC - MDS_FIRST_OPC) + \ (OST_LAST_OPC - OST_FIRST_OPC) + \ - (OBD_LAST_OPC - OBD_FIRST_OPC)) - + (OBD_LAST_OPC - OBD_FIRST_OPC) + \ + (FLD_LAST_OPC - FLD_FIRST_OPC) + \ + (SEQ_LAST_OPC - SEQ_FIRST_OPC) + \ + (MGS_LAST_OPC - MGS_FIRST_OPC) + \ + (LLOG_LAST_OPC - LLOG_FIRST_OPC) + \ + (SEC_LAST_OPC - SEC_FIRST_OPC)) enum { PTLRPC_REQWAIT_CNTR = 0, PTLRPC_REQQDEPTH_CNTR, @@ -112,6 +161,10 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, int pageoffset, int len); void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc); +/* pack_generic.c */ +struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc); +void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs); + /* pinger.c */ int ptlrpc_start_pinger(void); int ptlrpc_stop_pinger(void); @@ -124,4 +177,27 @@ int ping_evictor_wake(struct obd_export *exp); #define ping_evictor_wake(exp) 1 #endif +/* sec_null.c */ +int sptlrpc_null_init(void); +void sptlrpc_null_fini(void); + +/* sec_plain.c */ +int sptlrpc_plain_init(void); +void sptlrpc_plain_fini(void); + +/* sec_bulk.c */ +int sptlrpc_enc_pool_init(void); +void sptlrpc_enc_pool_fini(void); +int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count, + int *eof, void *data); +const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg); + +/* sec_lproc.c */ +int sptlrpc_lproc_init(void); +void sptlrpc_lproc_fini(void); + +/* sec.c */ +int sptlrpc_init(void); +void sptlrpc_fini(void); + #endif /* PTLRPC_INTERNAL_H */ diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 7eb5a5d..c7d7093 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -35,6 +35,7 @@ #include <obd_support.h> #include <obd_class.h> #include <lustre_net.h> +#include <lustre_req_layout.h> #include "ptlrpc_internal.h" @@ -58,38 +59,53 @@ __init int ptlrpc_init(void) init_mutex(&pinger_sem); init_mutex(&ptlrpcd_sem); - rc = ptlrpc_init_portals(); + rc = req_layout_init(); if (rc) RETURN(rc); cleanup_phase = 1; + rc = ptlrpc_init_portals(); + if (rc) + RETURN(rc); + cleanup_phase = 2; + ptlrpc_init_connection(); rc = llog_init_commit_master(); if (rc) GOTO(cleanup, rc); - cleanup_phase = 2; + cleanup_phase = 3; ptlrpc_put_connection_superhack = ptlrpc_put_connection; rc = ptlrpc_start_pinger(); if (rc) GOTO(cleanup, rc); - cleanup_phase = 3; + cleanup_phase = 4; rc = ldlm_init(); if (rc) GOTO(cleanup, rc); + cleanup_phase = 5; + + rc = sptlrpc_init(); + if (rc) + GOTO(cleanup, rc); + RETURN(0); cleanup: switch(cleanup_phase) { - case 3: + case 5: + ldlm_exit(); + case 4: ptlrpc_stop_pinger(); - case 2: + case 3: llog_cleanup_commit_master(1); ptlrpc_cleanup_connection(); - case 1: + case 2: ptlrpc_exit_portals(); + case 1: + req_layout_fini(); default: ; } @@ -99,6 +115,7 @@ cleanup: #ifdef __KERNEL__ static void __exit ptlrpc_exit(void) { + sptlrpc_fini(); ldlm_exit(); ptlrpc_stop_pinger(); ptlrpc_exit_portals(); @@ -179,12 +196,13 @@ EXPORT_SYMBOL(lustre_msg_swabbed); EXPORT_SYMBOL(lustre_msg_check_version); EXPORT_SYMBOL(lustre_pack_request); EXPORT_SYMBOL(lustre_pack_reply); -EXPORT_SYMBOL(lustre_shrink_reply); +EXPORT_SYMBOL(lustre_shrink_msg); EXPORT_SYMBOL(lustre_free_reply_state); EXPORT_SYMBOL(lustre_msg_size); EXPORT_SYMBOL(lustre_unpack_msg); EXPORT_SYMBOL(lustre_msg_buf); EXPORT_SYMBOL(lustre_msg_string); +EXPORT_SYMBOL(lustre_swab_ptlrpc_body); EXPORT_SYMBOL(lustre_swab_buf); EXPORT_SYMBOL(lustre_swab_reqbuf); EXPORT_SYMBOL(lustre_swab_repbuf); @@ -197,15 +215,26 @@ EXPORT_SYMBOL(lustre_swab_ost_last_id); EXPORT_SYMBOL(lustre_swab_ost_lvb); EXPORT_SYMBOL(lustre_swab_mds_status_req); EXPORT_SYMBOL(lustre_swab_mds_body); +EXPORT_SYMBOL(lustre_swab_mdt_body); +EXPORT_SYMBOL(lustre_swab_mdt_epoch); EXPORT_SYMBOL(lustre_swab_obd_quotactl); +EXPORT_SYMBOL(lustre_swab_mds_remote_perm); +EXPORT_SYMBOL(lustre_swab_mdt_remote_perm); EXPORT_SYMBOL(lustre_swab_mds_rec_setattr); +EXPORT_SYMBOL(lustre_swab_mdt_rec_setattr); EXPORT_SYMBOL(lustre_swab_mds_rec_create); +EXPORT_SYMBOL(lustre_swab_mdt_rec_create); EXPORT_SYMBOL(lustre_swab_mds_rec_join); +EXPORT_SYMBOL(lustre_swab_mdt_rec_join); EXPORT_SYMBOL(lustre_swab_mds_rec_link); +EXPORT_SYMBOL(lustre_swab_mdt_rec_link); EXPORT_SYMBOL(lustre_swab_mds_rec_unlink); +EXPORT_SYMBOL(lustre_swab_mdt_rec_unlink); EXPORT_SYMBOL(lustre_swab_mds_rec_rename); +EXPORT_SYMBOL(lustre_swab_mdt_rec_rename); EXPORT_SYMBOL(lustre_swab_lov_desc); EXPORT_SYMBOL(lustre_swab_lov_user_md); +EXPORT_SYMBOL(lustre_swab_lov_mds_md); EXPORT_SYMBOL(lustre_swab_lov_user_md_objects); EXPORT_SYMBOL(lustre_swab_lov_user_md_join); EXPORT_SYMBOL(lustre_swab_ldlm_res_id); @@ -244,6 +273,10 @@ EXPORT_SYMBOL(lustre_msg_set_transno); EXPORT_SYMBOL(lustre_msg_set_status); EXPORT_SYMBOL(lustre_msg_set_conn_cnt); EXPORT_SYMBOL(lustre_swab_mgs_target_info); +EXPORT_SYMBOL(lustre_swab_md_fld); +EXPORT_SYMBOL(lustre_swab_generic_32s); +EXPORT_SYMBOL(lustre_swab_lustre_capa); +EXPORT_SYMBOL(lustre_swab_lustre_capa_key); /* recover.c */ EXPORT_SYMBOL(ptlrpc_disconnect_import); diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 16c448c..7765faa 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -54,6 +54,7 @@ struct ptlrpcd_ctl { char pc_name[16]; #ifndef __KERNEL__ int pc_recurred; + void *pc_callback; void *pc_wait_callback; void *pc_idle_callback; #endif diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 2355264..6fe00c8 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -166,7 +166,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, CERROR("couldn't get an llcd - dropped "LPX64 ":%x+%u\n", cookies->lgc_lgl.lgl_oid, - cookies->lgc_lgl.lgl_ogen, + cookies->lgc_lgl.lgl_ogen, cookies->lgc_index); GOTO(out, rc = -ENOMEM); } @@ -174,7 +174,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, ctxt->loc_llcd = llcd; } - memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, + memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies, sizeof(*cookies)); llcd->llcd_cookiebytes += sizeof(*cookies); } else { @@ -222,6 +222,7 @@ static int log_commit_thread(void *arg) struct llog_commit_master *lcm = arg; struct llog_commit_daemon *lcd; struct llog_canceld_ctxt *llcd, *n; + struct obd_import *import = NULL; ENTRY; OBD_ALLOC(lcd, sizeof(*lcd)); @@ -243,10 +244,13 @@ static int log_commit_thread(void *arg) CDEBUG(D_HA, "%s started\n", cfs_curproc_comm()); do { struct ptlrpc_request *request; - struct obd_import *import = NULL; struct list_head *sending_list; int rc = 0; + if (import) + class_import_put(import); + import = NULL; + /* If we do not have enough pages available, allocate some */ while (atomic_read(&lcm->lcm_llcd_numfree) < lcm->lcm_llcd_minfree) { @@ -272,6 +276,8 @@ static int log_commit_thread(void *arg) sending_list = &lcm->lcm_llcd_pending; resend: + if (import) + class_import_put(import); import = NULL; if (lcm->lcm_flags & LLOG_LCM_FL_EXIT) { lcm->lcm_llcd_maxfree = 0; @@ -301,6 +307,8 @@ static int log_commit_thread(void *arg) typeof(*llcd), llcd_list); LASSERT(llcd->llcd_lcm == lcm); import = llcd->llcd_ctxt->loc_imp; + if (import) + class_import_get(import); } list_for_each_entry_safe(llcd, n, sending_list, llcd_list) { LASSERT(llcd->llcd_lcm == lcm); @@ -351,6 +359,8 @@ static int log_commit_thread(void *arg) continue; } + OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_RECOV, 10); + request = ptlrpc_prep_req(import, LUSTRE_LOG_VERSION, OBD_LOG_CANCEL, 2, size,bufs); if (request == NULL) { @@ -404,6 +414,9 @@ static int log_commit_thread(void *arg) } } while(1); + if (import) + class_import_put(import); + /* If we are force exiting, just drop all of the cookies. */ if (lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE) { spin_lock(&lcm->lcm_llcd_lock); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 94c0d78..4a232fa 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -97,7 +97,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) */ list_for_each_safe(tmp, pos, &imp->imp_replay_list) { req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); - + /* If need to resend the last sent transno (because a reconnect has occurred), then stop on the matching req and send it again. If, however, the last sent @@ -150,7 +150,6 @@ int ptlrpc_resend(struct obd_import *imp) spin_unlock(&imp->imp_lock); RETURN(-1); } - spin_unlock(&imp->imp_lock); list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) { LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON, @@ -159,6 +158,7 @@ int ptlrpc_resend(struct obd_import *imp) if (!req->rq_no_resend) ptlrpc_resend_req(req); } + spin_unlock(&imp->imp_lock); RETURN(0); } @@ -244,7 +244,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) spin_lock(&imp->imp_lock); imp->imp_deactive = 0; spin_unlock(&imp->imp_lock); - + CDEBUG(D_HA, "setting import %s VALID\n", obd2cli_tgt(imp->imp_obd)); rc = ptlrpc_recover_import(imp, NULL); diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c new file mode 100644 index 0000000..5c24c6c --- /dev/null +++ b/lustre/ptlrpc/sec.c @@ -0,0 +1,2204 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#include <libcfs/libcfs.h> +#ifndef __KERNEL__ +#include <liblustre.h> +#include <libcfs/list.h> +#else +#include <linux/crypto.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_dlm.h> +#include <lustre_sec.h> + +#include "ptlrpc_internal.h" + +static void sptlrpc_sec_destroy(struct ptlrpc_sec *sec); +static int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx); +static void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx); + +/*********************************************** + * policy registers * + ***********************************************/ + +static rwlock_t policy_lock = RW_LOCK_UNLOCKED; +static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = { + NULL, +}; + +int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy) +{ + __u32 number = policy->sp_policy; + + LASSERT(policy->sp_name); + LASSERT(policy->sp_cops); + LASSERT(policy->sp_sops); + + if (number >= SPTLRPC_POLICY_MAX) + return -EINVAL; + + write_lock(&policy_lock); + if (unlikely(policies[number])) { + write_unlock(&policy_lock); + return -EALREADY; + } + policies[number] = policy; + write_unlock(&policy_lock); + + CDEBUG(D_SEC, "%s: registered\n", policy->sp_name); + return 0; +} +EXPORT_SYMBOL(sptlrpc_register_policy); + +int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy) +{ + __u32 number = policy->sp_policy; + + LASSERT(number < SPTLRPC_POLICY_MAX); + + write_lock(&policy_lock); + if (unlikely(policies[number] == NULL)) { + write_unlock(&policy_lock); + CERROR("%s: already unregistered\n", policy->sp_name); + return -EINVAL; + } + + LASSERT(policies[number] == policy); + policies[number] = NULL; + write_unlock(&policy_lock); + + CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name); + return 0; +} +EXPORT_SYMBOL(sptlrpc_unregister_policy); + +static +struct ptlrpc_sec_policy * sptlrpc_flavor2policy(ptlrpc_sec_flavor_t flavor) +{ + static DECLARE_MUTEX(load_mutex); + static atomic_t loaded = ATOMIC_INIT(0); + struct ptlrpc_sec_policy *policy; + __u32 number = SEC_FLAVOR_POLICY(flavor), flag = 0; + + if (number >= SPTLRPC_POLICY_MAX) + return NULL; + +again: + read_lock(&policy_lock); + policy = policies[number]; + if (policy && !try_module_get(policy->sp_owner)) + policy = NULL; + if (policy == NULL) + flag = atomic_read(&loaded); + read_unlock(&policy_lock); + +#ifdef CONFIG_KMOD + /* if failure, try to load gss module, once */ + if (unlikely(policy == NULL) && + number == SPTLRPC_POLICY_GSS && flag == 0) { + mutex_down(&load_mutex); + if (atomic_read(&loaded) == 0) { + if (request_module("ptlrpc_gss") != 0) + CERROR("Unable to load module ptlrpc_gss\n"); + else + CWARN("module ptlrpc_gss loaded\n"); + + atomic_set(&loaded, 1); + } + mutex_up(&load_mutex); + + goto again; + } +#endif + + return policy; +} + +ptlrpc_sec_flavor_t sptlrpc_name2flavor(const char *name) +{ + if (!strcmp(name, "null")) + return SPTLRPC_FLVR_NULL; + if (!strcmp(name, "plain")) + return SPTLRPC_FLVR_PLAIN; + if (!strcmp(name, "krb5")) + return SPTLRPC_FLVR_KRB5; + if (!strcmp(name, "krb5i")) + return SPTLRPC_FLVR_KRB5I; + if (!strcmp(name, "krb5p")) + return SPTLRPC_FLVR_KRB5P; + + return SPTLRPC_FLVR_INVALID; +} +EXPORT_SYMBOL(sptlrpc_name2flavor); + +char *sptlrpc_flavor2name(ptlrpc_sec_flavor_t flavor) +{ + switch (flavor) { + case SPTLRPC_FLVR_NULL: + return "null"; + case SPTLRPC_FLVR_PLAIN: + return "plain"; + case SPTLRPC_FLVR_KRB5: + return "krb5"; + case SPTLRPC_FLVR_KRB5I: + return "krb5i"; + case SPTLRPC_FLVR_KRB5P: + return "krb5p"; + default: + CERROR("invalid flavor 0x%x(p%u,s%u,v%u)\n", flavor, + SEC_FLAVOR_POLICY(flavor), SEC_FLAVOR_SUBPOLICY(flavor), + SEC_FLAVOR_SVC(flavor)); + } + return "UNKNOWN"; +} +EXPORT_SYMBOL(sptlrpc_flavor2name); + +/*********************************************** + * context helpers * + * internal APIs * + * cache management * + ***********************************************/ + +static inline +unsigned long ctx_status(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK); +} + +static inline +int ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx) +{ + return (ctx_status(ctx) == PTLRPC_CTX_UPTODATE); +} + +static inline +int ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx) +{ + return (ctx_status(ctx) != 0); +} + +static inline +int ctx_is_dead(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0); +} + +static inline +int ctx_is_eternal(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0); +} + +static +int ctx_expire(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(atomic_read(&ctx->cc_refcount)); + + if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) { + cfs_time_t now = cfs_time_current_sec(); + + smp_mb(); + clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags); + + if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire)) + CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n", + ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), + cfs_time_sub(now, ctx->cc_expire)); + else + CWARN("ctx %p(%u->%s): force to die (%lds remains)\n", + ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), + ctx->cc_expire == 0 ? 0 : + cfs_time_sub(ctx->cc_expire, now)); + + return 1; + } + return 0; +} + +static +void ctx_enhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash) +{ + set_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags); + atomic_inc(&ctx->cc_refcount); + hlist_add_head(&ctx->cc_hash, hash); +} + +static +void ctx_unhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist) +{ + LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)); + LASSERT(!hlist_unhashed(&ctx->cc_hash)); + + clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags); + + if (atomic_dec_and_test(&ctx->cc_refcount)) { + __hlist_del(&ctx->cc_hash); + hlist_add_head(&ctx->cc_hash, freelist); + } else + hlist_del_init(&ctx->cc_hash); +} + +/* + * return 1 if the context is dead. + */ +static +int ctx_check_death(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist) +{ + if (unlikely(ctx_is_dead(ctx))) + goto unhash; + + /* expire is 0 means never expire. a newly created gss context + * which during upcall also has 0 expiration + */ + smp_mb(); + if (ctx->cc_expire == 0) + return 0; + + /* check real expiration */ + smp_mb(); + if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec())) + return 0; + + ctx_expire(ctx); + +unhash: + if (freelist) + ctx_unhash(ctx, freelist); + + return 1; +} + +static inline +int ctx_check_death_locked(struct ptlrpc_cli_ctx *ctx, + struct hlist_head *freelist) +{ + LASSERT(ctx->cc_sec); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock); + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)); + + return ctx_check_death(ctx, freelist); +} + +static +int ctx_check_uptodate(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(ctx->cc_sec); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + if (!ctx_check_death(ctx, NULL) && ctx_is_uptodate(ctx)) + return 1; + return 0; +} + +static inline +int ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred) +{ + /* a little bit optimization for null policy */ + if (!ctx->cc_ops->match) + return 1; + + return ctx->cc_ops->match(ctx, vcred); +} + +static +void ctx_list_destroy(struct hlist_head *head) +{ + struct ptlrpc_cli_ctx *ctx; + + while (!hlist_empty(head)) { + ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash); + + LASSERT(atomic_read(&ctx->cc_refcount) == 0); + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0); + + hlist_del_init(&ctx->cc_hash); + sptlrpc_sec_destroy_ctx(ctx->cc_sec, ctx); + } +} + +static +void ctx_cache_gc(struct ptlrpc_sec *sec, struct hlist_head *freelist) +{ + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + int i; + ENTRY; + + CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec); + + for (i = 0; i < sec->ps_ccache_size; i++) { + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[i], cc_hash) + ctx_check_death_locked(ctx, freelist); + } + + sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval; + EXIT; +} + +/* + * @uid: which user. "-1" means flush all. + * @grace: mark context DEAD, allow graceful destroy like notify + * server side, etc. + * @force: also flush busy entries. + * + * return the number of busy context encountered. + * + * In any cases, never touch "eternal" contexts. + */ +static +int ctx_cache_flush(struct ptlrpc_sec *sec, uid_t uid, int grace, int force) +{ + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + HLIST_HEAD(freelist); + int i, busy = 0; + ENTRY; + + might_sleep_if(grace); + + spin_lock(&sec->ps_lock); + for (i = 0; i < sec->ps_ccache_size; i++) { + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[i], cc_hash) { + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + if (ctx_is_eternal(ctx)) + continue; + if (uid != -1 && uid != ctx->cc_vcred.vc_uid) + continue; + + if (atomic_read(&ctx->cc_refcount) > 1) { + busy++; + if (!force) + continue; + + CWARN("flush busy(%d) ctx %p(%u->%s) by force, " + "grace %d\n", + atomic_read(&ctx->cc_refcount), + ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), grace); + } + ctx_unhash(ctx, &freelist); + + set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags); + if (!grace) + clear_bit(PTLRPC_CTX_UPTODATE_BIT, + &ctx->cc_flags); + } + } + spin_unlock(&sec->ps_lock); + + ctx_list_destroy(&freelist); + RETURN(busy); +} + +static inline +unsigned int ctx_hash_index(struct ptlrpc_sec *sec, __u64 key) +{ + return (unsigned int) (key & (sec->ps_ccache_size - 1)); +} + +/* + * return matched context. If it's a newly created one, we also give the + * first push to refresh. return NULL if error happens. + */ +static +struct ptlrpc_cli_ctx * ctx_cache_lookup(struct ptlrpc_sec *sec, + struct vfs_cred *vcred, + int create, int remove_dead) +{ + struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL; + struct hlist_head *hash_head; + struct hlist_node *pos, *next; + HLIST_HEAD(freelist); + unsigned int hash, gc = 0, found = 0; + ENTRY; + + might_sleep(); + + hash = ctx_hash_index(sec, (__u64) vcred->vc_uid); + LASSERT(hash < sec->ps_ccache_size); + hash_head = &sec->ps_ccache[hash]; + +retry: + spin_lock(&sec->ps_lock); + + /* gc_next == 0 means never do gc */ + if (remove_dead && sec->ps_gc_next && + cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) { + ctx_cache_gc(sec, &freelist); + gc = 1; + } + + hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) { + if (gc == 0 && + ctx_check_death_locked(ctx, remove_dead ? &freelist : NULL)) + continue; + + if (ctx_match(ctx, vcred)) { + found = 1; + break; + } + } + + if (found) { + if (new && new != ctx) { + /* lost the race, just free it */ + hlist_add_head(&new->cc_hash, &freelist); + new = NULL; + } + + /* hot node, move to head */ + if (hash_head->first != &ctx->cc_hash) { + __hlist_del(&ctx->cc_hash); + hlist_add_head(&ctx->cc_hash, hash_head); + } + } else { + /* don't allocate for reverse sec */ + if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { + spin_unlock(&sec->ps_lock); + RETURN(NULL); + } + + if (new) { + ctx_enhash(new, hash_head); + ctx = new; + } else if (create) { + spin_unlock(&sec->ps_lock); + new = sec->ps_policy->sp_cops->create_ctx(sec, vcred); + if (new) { + atomic_inc(&sec->ps_busy); + goto retry; + } + } else + ctx = NULL; + } + + /* hold a ref */ + if (ctx) + atomic_inc(&ctx->cc_refcount); + + spin_unlock(&sec->ps_lock); + + /* the allocator of the context must give the first push to refresh */ + if (new) { + LASSERT(new == ctx); + sptlrpc_ctx_refresh(new); + } + + ctx_list_destroy(&freelist); + RETURN(ctx); +} + +static inline +struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec) +{ + struct vfs_cred vcred; + int create = 1, remove_dead = 1; + + LASSERT(sec); + + if (sec->ps_flags & (PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY)) { + vcred.vc_uid = 0; + vcred.vc_gid = 0; + if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { + create = 0; + remove_dead = 0; + } + } else { + vcred.vc_uid = cfs_current()->uid; + vcred.vc_gid = cfs_current()->gid; + } + + if (sec->ps_policy->sp_cops->lookup_ctx) + return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred); + else + return ctx_cache_lookup(sec, &vcred, create, remove_dead); +} + +/************************************************** + * client context APIs * + **************************************************/ + +static +void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + if (!ctx_is_refreshed(ctx) && ctx->cc_ops->refresh) + ctx->cc_ops->refresh(ctx); +} + +struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + atomic_inc(&ctx->cc_refcount); + return ctx; +} +EXPORT_SYMBOL(sptlrpc_ctx_get); + +void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync) +{ + struct ptlrpc_sec *sec = ctx->cc_sec; + + LASSERT(sec); + LASSERT(atomic_read(&ctx->cc_refcount)); + + if (!atomic_dec_and_test(&ctx->cc_refcount)) + return; + + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0); + LASSERT(hlist_unhashed(&ctx->cc_hash)); + + /* if required async, we must clear the UPTODATE bit to prevent extra + * rpcs during destroy procedure. + */ + if (!sync) + clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags); + + /* destroy this context */ + if (!sptlrpc_sec_destroy_ctx(sec, ctx)) + return; + + CWARN("%s@%p: put last ctx, also destroy the sec\n", + sec->ps_policy->sp_name, sec); + + sptlrpc_sec_destroy(sec); +} +EXPORT_SYMBOL(sptlrpc_ctx_put); + +/* + * mark a ctx as DEAD, and pull it out from hash table. + * + * NOTE: the caller must hold at least 1 ref on the ctx. + */ +void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(ctx->cc_sec); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + ctx_expire(ctx); + + spin_lock(&ctx->cc_sec->ps_lock); + + if (test_and_clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)) { + LASSERT(!hlist_unhashed(&ctx->cc_hash)); + LASSERT(atomic_read(&ctx->cc_refcount) > 1); + + hlist_del_init(&ctx->cc_hash); + if (atomic_dec_and_test(&ctx->cc_refcount)) + LBUG(); + } + + spin_unlock(&ctx->cc_sec->ps_lock); +} +EXPORT_SYMBOL(sptlrpc_ctx_expire); + +void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new) +{ + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + HLIST_HEAD(freelist); + unsigned int hash; + ENTRY; + + hash = ctx_hash_index(sec, (__u64) new->cc_vcred.vc_uid); + LASSERT(hash < sec->ps_ccache_size); + + spin_lock(&sec->ps_lock); + + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[hash], cc_hash) { + if (!ctx_match(ctx, &new->cc_vcred)) + continue; + + ctx_expire(ctx); + ctx_unhash(ctx, &freelist); + break; + } + + ctx_enhash(new, &sec->ps_ccache[hash]); + atomic_inc(&sec->ps_busy); + + spin_unlock(&sec->ps_lock); + + ctx_list_destroy(&freelist); + EXIT; +} +EXPORT_SYMBOL(sptlrpc_ctx_replace); + +int sptlrpc_req_get_ctx(struct ptlrpc_request *req) +{ + struct obd_import *imp = req->rq_import; + ENTRY; + + LASSERT(!req->rq_cli_ctx); + LASSERT(imp); + + if (imp->imp_sec == NULL) { + CERROR("import %p (%s) with no sec pointer\n", + imp, ptlrpc_import_state_name(imp->imp_state)); + RETURN(-EACCES); + } + + req->rq_cli_ctx = get_my_ctx(imp->imp_sec); + + if (!req->rq_cli_ctx) { + CERROR("req %p: fail to get context from cache\n", req); + RETURN(-ENOMEM); + } + + RETURN(0); +} + +void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx) +{ + struct ptlrpc_request *req, *next; + + spin_lock(&ctx->cc_lock); + list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) { + list_del_init(&req->rq_ctx_chain); + ptlrpc_wake_client_req(req); + } + spin_unlock(&ctx->cc_lock); +} +EXPORT_SYMBOL(sptlrpc_ctx_wakeup); + +int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize) +{ + LASSERT(ctx->cc_ops); + + if (ctx->cc_ops->display == NULL) + return 0; + + return ctx->cc_ops->display(ctx, buf, bufsize); +} + +void sptlrpc_req_put_ctx(struct ptlrpc_request *req) +{ + ENTRY; + + LASSERT(req); + LASSERT(req->rq_cli_ctx); + + /* request might be asked to release earlier while still + * in the context waiting list. + */ + if (!list_empty(&req->rq_ctx_chain)) { + spin_lock(&req->rq_cli_ctx->cc_lock); + list_del_init(&req->rq_ctx_chain); + spin_unlock(&req->rq_cli_ctx->cc_lock); + } + + /* this could be called with spinlock hold, use async mode */ + sptlrpc_ctx_put(req->rq_cli_ctx, 0); + req->rq_cli_ctx = NULL; + EXIT; +} + +/* + * request must have a context. if failed to get new context, + * just restore the old one + */ +int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc; + ENTRY; + + LASSERT(ctx); + LASSERT(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)); + + /* make sure not on context waiting list */ + spin_lock(&ctx->cc_lock); + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + + sptlrpc_ctx_get(ctx); + sptlrpc_req_put_ctx(req); + rc = sptlrpc_req_get_ctx(req); + if (!rc) { + LASSERT(req->rq_cli_ctx); + LASSERT(req->rq_cli_ctx != ctx); + sptlrpc_ctx_put(ctx, 1); + } else { + LASSERT(!req->rq_cli_ctx); + req->rq_cli_ctx = ctx; + } + RETURN(rc); +} +EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx); + +static +int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + if (ctx_is_refreshed(ctx)) + return 1; + return 0; +} + +static +int ctx_refresh_timeout(void *data) +{ + struct ptlrpc_request *req = data; + int rc; + + /* conn_cnt is needed in expire_one_request */ + lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt); + + rc = ptlrpc_expire_one_request(req); + /* if we started recovery, we should mark this ctx dead; otherwise + * in case of lgssd died nobody would retire this ctx, following + * connecting will still find the same ctx thus cause deadlock. + * there's an assumption that expire time of the request should be + * later than the context refresh expire time. + */ + if (rc == 0) + ctx_expire(req->rq_cli_ctx); + return rc; +} + +static +void ctx_refresh_interrupt(void *data) +{ + /* do nothing */ +} + +/* + * the status of context could be subject to be changed by other threads at any + * time. we allow this race. but once we return with 0, the caller will + * suppose it's uptodated and keep using it until the affected rpc is done. + * + * @timeout: + * < 0 - don't wait + * = 0 - wait until success or fatal error occur + * > 0 - timeout value + * + * return 0 only if the context is uptodated. + */ +int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct l_wait_info lwi; + int rc; + ENTRY; + + LASSERT(ctx); + + /* special ctxs */ + if (ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini) + RETURN(0); + + /* reverse ctxs, don't refresh */ + if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE) + RETURN(0); + + spin_lock(&ctx->cc_lock); +again: + if (ctx_check_uptodate(ctx)) { + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + RETURN(0); + } + + if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags)) { + req->rq_err = 1; + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + RETURN(-EPERM); + } + + /* This is subtle. For resent message we have to keep original + * context to survive following situation: + * 1. the request sent to server + * 2. recovery was kick start + * 3. recovery finished, the request marked as resent + * 4. resend the request + * 5. old reply from server received (because xid is the same) + * 6. verify reply (has to be success) + * 7. new reply from server received, lnet drop it + * + * Note we can't simply change xid for resent request because + * server reply on it for reply reconstruction. + * + * Commonly the original context should be uptodate because we + * have a expiry nice time; And server will keep their half part + * context because we at least hold a ref of old context which + * prevent the context detroy RPC be sent. So server still can + * accept the request and finish RPC. Two cases: + * 1. If server side context has been trimed, a NO_CONTEXT will + * be returned, gss_cli_ctx_verify/unseal will switch to new + * context by force. + * 2. Current context never be refreshed, then we are fine: we + * never really send request with old context before. + */ + if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) && + req->rq_reqmsg && + lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + RETURN(0); + } + + if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) { + spin_unlock(&ctx->cc_lock); + + /* don't have to, but we don't want to release it too soon */ + sptlrpc_ctx_get(ctx); + + rc = sptlrpc_req_replace_dead_ctx(req); + if (rc) { + LASSERT(ctx == req->rq_cli_ctx); + CERROR("req %p: failed to replace dead ctx %p\n", + req, ctx); + req->rq_err = 1; + LASSERT(list_empty(&req->rq_ctx_chain)); + sptlrpc_ctx_put(ctx, 1); + RETURN(-ENOMEM); + } + + LASSERT(ctx != req->rq_cli_ctx); + CWARN("req %p: replace dead ctx %p(%u->%s) => %p\n", + req, ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), req->rq_cli_ctx); + + sptlrpc_ctx_put(ctx, 1); + ctx = req->rq_cli_ctx; + LASSERT(list_empty(&req->rq_ctx_chain)); + + spin_lock(&ctx->cc_lock); + goto again; + } + + /* Now we're sure this context is during upcall, add myself into + * waiting list + */ + if (list_empty(&req->rq_ctx_chain)) + list_add(&req->rq_ctx_chain, &ctx->cc_req_list); + + spin_unlock(&ctx->cc_lock); + + if (timeout < 0) { + RETURN(-EWOULDBLOCK); + } + + /* Clear any flags that may be present from previous sends */ + LASSERT(req->rq_receiving_reply == 0); + spin_lock(&req->rq_lock); + req->rq_err = 0; + req->rq_timedout = 0; + req->rq_resend = 0; + req->rq_restart = 0; + spin_unlock(&req->rq_lock); + + lwi = LWI_TIMEOUT_INTR(timeout == 0 ? LONG_MAX : timeout * HZ, + ctx_refresh_timeout, ctx_refresh_interrupt, req); + rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi); + + spin_lock(&ctx->cc_lock); + /* five cases we are here: + * 1. successfully refreshed; + * 2. someone else mark this ctx dead by force; + * 3. interruptted; + * 4. timedout, and we don't want recover from the failure; + * 5. timedout, and waked up upon recovery finished; + */ + if (!ctx_is_refreshed(ctx)) { + /* timed out or interruptted */ + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + + LASSERT(rc != 0); + RETURN(rc); + } + + goto again; +} + +void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode) +{ + struct sec_flavor_config *conf; + + LASSERT(req->rq_import); + LASSERT(req->rq_import->imp_sec); + LASSERT(req->rq_cli_ctx); + LASSERT(req->rq_cli_ctx->cc_sec); + LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0); + + /* special security flags accoding to opcode */ + switch (opcode) { + case OST_READ: + req->rq_bulk_read = 1; + break; + case OST_WRITE: + req->rq_bulk_write = 1; + break; + case SEC_CTX_INIT: + req->rq_ctx_init = 1; + break; + case SEC_CTX_FINI: + req->rq_ctx_fini = 1; + break; + } + + req->rq_sec_flavor = req->rq_cli_ctx->cc_sec->ps_flavor; + + /* force SVC_NONE for context initiation rpc, SVC_AUTH for context + * destruction rpc + */ + if (unlikely(req->rq_ctx_init)) { + req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR( + SEC_FLAVOR_POLICY(req->rq_sec_flavor), + SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor), + SEC_FLAVOR_SVC(SPTLRPC_SVC_NONE)); + } else if (unlikely(req->rq_ctx_fini)) { + req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR( + SEC_FLAVOR_POLICY(req->rq_sec_flavor), + SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor), + SEC_FLAVOR_SVC(SPTLRPC_SVC_AUTH)); + } + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + + /* user descriptor flag, except ROOTONLY which don't need, and + * null security which can't + */ + if ((conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) == 0 && + req->rq_sec_flavor != SPTLRPC_FLVR_NULL) + req->rq_sec_flavor |= SEC_FLAVOR_FL_USER; + + /* bulk security flag */ + if ((req->rq_bulk_read || req->rq_bulk_write) && + (conf->sfc_bulk_priv != BULK_PRIV_ALG_NULL || + conf->sfc_bulk_csum != BULK_CSUM_ALG_NULL)) + req->rq_sec_flavor |= SEC_FLAVOR_FL_BULK; +} + +void sptlrpc_request_out_callback(struct ptlrpc_request *req) +{ + if (SEC_FLAVOR_SVC(req->rq_sec_flavor) != SPTLRPC_SVC_PRIV) + return; + + LASSERT(req->rq_clrbuf); + if (req->rq_pool || !req->rq_reqbuf) + return; + + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; +} + +/* + * check whether current user have valid context for an import or not. + * might repeatedly try in case of non-fatal errors. + * return 0 on success, < 0 on failure + */ +int sptlrpc_import_check_ctx(struct obd_import *imp) +{ + struct ptlrpc_cli_ctx *ctx; + struct ptlrpc_request *req = NULL; + int rc; + ENTRY; + + might_sleep(); + + ctx = get_my_ctx(imp->imp_sec); + if (!ctx) + RETURN(1); + + if (ctx_is_eternal(ctx)) { + sptlrpc_ctx_put(ctx, 1); + RETURN(0); + } + + OBD_ALLOC_PTR(req); + if (!req) + RETURN(-ENOMEM); + + spin_lock_init(&req->rq_lock); + atomic_set(&req->rq_refcount, 10000); + INIT_LIST_HEAD(&req->rq_ctx_chain); + init_waitqueue_head(&req->rq_reply_waitq); + req->rq_import = imp; + req->rq_cli_ctx = ctx; + + rc = sptlrpc_req_refresh_ctx(req, 0); + LASSERT(list_empty(&req->rq_ctx_chain)); + sptlrpc_ctx_put(req->rq_cli_ctx, 1); + OBD_FREE_PTR(req); + + RETURN(rc); +} + +int sptlrpc_cli_wrap_request(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc = 0; + ENTRY; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + /* we wrap bulk request here because now we can be sure + * the context is uptodate. + */ + if (req->rq_bulk) { + rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk); + if (rc) + RETURN(rc); + } + + switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { + case SPTLRPC_SVC_NONE: + case SPTLRPC_SVC_AUTH: + LASSERT(ctx->cc_ops->sign); + rc = ctx->cc_ops->sign(ctx, req); + break; + case SPTLRPC_SVC_PRIV: + LASSERT(ctx->cc_ops->seal); + rc = ctx->cc_ops->seal(ctx, req); + break; + default: + LBUG(); + } + + if (rc == 0) { + LASSERT(req->rq_reqdata_len); + LASSERT(req->rq_reqdata_len % 8 == 0); + LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len); + } + + RETURN(rc); +} + +/* + * rq_nob_received is the actual received data length + */ +int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc; + ENTRY; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_ops); + LASSERT(req->rq_repbuf); + + req->rq_repdata_len = req->rq_nob_received; + + if (req->rq_nob_received < sizeof(struct lustre_msg)) { + CERROR("replied data length %d too small\n", + req->rq_nob_received); + RETURN(-EPROTO); + } + + if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1 || + req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) { + /* it's must be null flavor, so our requets also should be + * in null flavor */ + if (SEC_FLAVOR_POLICY(req->rq_sec_flavor) != + SPTLRPC_POLICY_NULL) { + CERROR("request flavor is %x but reply with null\n", + req->rq_sec_flavor); + RETURN(-EPROTO); + } + } else { + /* v2 message... */ + ptlrpc_sec_flavor_t tmpf = req->rq_repbuf->lm_secflvr; + + if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED) + __swab32s(&tmpf); + + if (SEC_FLAVOR_POLICY(tmpf) != + SEC_FLAVOR_POLICY(req->rq_sec_flavor)) { + CERROR("request policy %u while reply with %d\n", + SEC_FLAVOR_POLICY(req->rq_sec_flavor), + SEC_FLAVOR_POLICY(tmpf)); + RETURN(-EPROTO); + } + + if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) != + SPTLRPC_POLICY_NULL) && + lustre_unpack_msg(req->rq_repbuf, req->rq_nob_received)) + RETURN(-EPROTO); + } + + switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { + case SPTLRPC_SVC_NONE: + case SPTLRPC_SVC_AUTH: + LASSERT(ctx->cc_ops->verify); + rc = ctx->cc_ops->verify(ctx, req); + break; + case SPTLRPC_SVC_PRIV: + LASSERT(ctx->cc_ops->unseal); + rc = ctx->cc_ops->unseal(ctx, req); + break; + default: + LBUG(); + } + + LASSERT(rc || req->rq_repmsg || req->rq_resend); + RETURN(rc); +} + +/************************************************** + * security APIs * + **************************************************/ + +/* + * let policy module to determine whether take refrence of + * import or not. + */ +static +struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + struct ptlrpc_sec_policy *policy; + struct ptlrpc_sec *sec; + ENTRY; + + flavor = SEC_FLAVOR_RPC(flavor); + + if (ctx) { + LASSERT(imp->imp_dlm_fake == 1); + + CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n", + imp->imp_obd->obd_type->typ_name, + imp->imp_obd->obd_name, + sptlrpc_flavor2name(flavor)); + + policy = sptlrpc_policy_get(ctx->sc_policy); + flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY; + } else { + LASSERT(imp->imp_dlm_fake == 0); + + CDEBUG(D_SEC, "%s %s: select security flavor %s\n", + imp->imp_obd->obd_type->typ_name, + imp->imp_obd->obd_name, + sptlrpc_flavor2name(flavor)); + + policy = sptlrpc_flavor2policy(flavor); + if (!policy) { + CERROR("invalid flavor 0x%x\n", flavor); + RETURN(NULL); + } + } + + sec = policy->sp_cops->create_sec(imp, ctx, flavor, flags); + if (sec) { + atomic_inc(&sec->ps_refcount); + + /* take 1 busy count on behalf of sec itself, + * balanced in sptlrpc_set_put() + */ + atomic_inc(&sec->ps_busy); + } else + sptlrpc_policy_put(policy); + + RETURN(sec); +} + +static +void sptlrpc_sec_destroy(struct ptlrpc_sec *sec) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + LASSERT(policy); + LASSERT(atomic_read(&sec->ps_refcount) == 0); + LASSERT(atomic_read(&sec->ps_busy) == 0); + LASSERT(policy->sp_cops->destroy_sec); + + policy->sp_cops->destroy_sec(sec); + sptlrpc_policy_put(policy); +} + +static +void sptlrpc_sec_put(struct ptlrpc_sec *sec) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + if (!atomic_dec_and_test(&sec->ps_refcount)) { + sptlrpc_policy_put(policy); + return; + } + + ctx_cache_flush(sec, -1, 1, 1); + + if (atomic_dec_and_test(&sec->ps_busy)) + sptlrpc_sec_destroy(sec); + else + CWARN("delay to destroy %s@%p: busy contexts\n", + policy->sp_name, sec); +} + +/* + * return 1 means we should also destroy the sec structure. + * normally return 0 + */ +static +int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(sec == ctx->cc_sec); + LASSERT(atomic_read(&sec->ps_busy)); + LASSERT(atomic_read(&ctx->cc_refcount) == 0); + LASSERT(hlist_unhashed(&ctx->cc_hash)); + LASSERT(list_empty(&ctx->cc_req_list)); + LASSERT(sec->ps_policy->sp_cops->destroy_ctx); + + sec->ps_policy->sp_cops->destroy_ctx(sec, ctx); + + if (atomic_dec_and_test(&sec->ps_busy)) { + LASSERT(atomic_read(&sec->ps_refcount) == 0); + return 1; + } + + return 0; +} + +/* + * when complete successfully, req->rq_reqmsg should point to the + * right place. + */ +int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + int rc; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT(req->rq_reqmsg == NULL); + + policy = ctx->cc_sec->ps_policy; + rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize); + if (!rc) { + LASSERT(req->rq_reqmsg); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + /* zeroing preallocated buffer */ + if (req->rq_pool) + memset(req->rq_reqmsg, 0, msgsize); + } + + return rc; +} + +void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + policy = ctx->cc_sec->ps_policy; + policy->sp_cops->free_reqbuf(ctx->cc_sec, req); +} + +/* + * NOTE caller must guarantee the buffer size is enough for the enlargement + */ +void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, + int segment, int newsize) +{ + void *src, *dst; + int oldsize, oldmsg_size, movesize; + + LASSERT(segment < msg->lm_bufcount); + LASSERT(msg->lm_buflens[segment] <= newsize); + + if (msg->lm_buflens[segment] == newsize) + return; + + /* nothing to do if we are enlarging the last segment */ + if (segment == msg->lm_bufcount - 1) { + msg->lm_buflens[segment] = newsize; + return; + } + + oldsize = msg->lm_buflens[segment]; + + src = lustre_msg_buf(msg, segment + 1, 0); + msg->lm_buflens[segment] = newsize; + dst = lustre_msg_buf(msg, segment + 1, 0); + msg->lm_buflens[segment] = oldsize; + + /* move from segment + 1 to end segment */ + LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2); + oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg); + LASSERT(movesize >= 0); + + if (movesize) + memmove(dst, src, movesize); + + /* note we don't clear the ares where old data live, not secret */ + + /* finally set new segment size */ + msg->lm_buflens[segment] = newsize; +} +EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace); + +/* + * enlarge @segment of upper message req->rq_reqmsg to @newsize, all data + * will be preserved after enlargement. this must be called after rq_reqmsg has + * been intialized at least. + * + * caller's attention: upon return, rq_reqmsg and rq_reqlen might have + * been changed. + */ +int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, + int segment, int newsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_cops *cops; + struct lustre_msg *msg = req->rq_reqmsg; + + LASSERT(ctx); + LASSERT(msg); + LASSERT(msg->lm_bufcount > segment); + LASSERT(msg->lm_buflens[segment] <= newsize); + + if (msg->lm_buflens[segment] == newsize) + return 0; + + cops = ctx->cc_sec->ps_policy->sp_cops; + LASSERT(cops->enlarge_reqbuf); + return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize); +} +EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf); + +int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + ENTRY; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + + if (req->rq_repbuf) + RETURN(0); + + policy = ctx->cc_sec->ps_policy; + RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize)); +} + +void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + ENTRY; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT(req->rq_repbuf); + + policy = ctx->cc_sec->ps_policy; + policy->sp_cops->free_repbuf(ctx->cc_sec, req); + EXIT; +} + +int sptlrpc_import_get_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + struct obd_device *obd = imp->imp_obd; + ENTRY; + + LASSERT(obd); + LASSERT(obd->obd_type); + + /* old sec might be still there in reconnecting */ + if (imp->imp_sec) + RETURN(0); + + imp->imp_sec = sptlrpc_sec_create(imp, ctx, flavor, flags); + if (!imp->imp_sec) + RETURN(-EINVAL); + + RETURN(0); +} + +void sptlrpc_import_put_sec(struct obd_import *imp) +{ + if (imp->imp_sec == NULL) + return; + + sptlrpc_sec_put(imp->imp_sec); + imp->imp_sec = NULL; +} + +void sptlrpc_import_flush_root_ctx(struct obd_import *imp) +{ + if (imp == NULL || imp->imp_sec == NULL) + return; + + /* use 'grace' mode, it's crutial see explain in + * sptlrpc_req_refresh_ctx() + */ + ctx_cache_flush(imp->imp_sec, 0, 1, 1); +} + +void sptlrpc_import_flush_my_ctx(struct obd_import *imp) +{ + if (imp == NULL || imp->imp_sec == NULL) + return; + + ctx_cache_flush(imp->imp_sec, cfs_current()->uid, 1, 1); +} +EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx); + +void sptlrpc_import_flush_all_ctx(struct obd_import *imp) +{ + if (imp == NULL || imp->imp_sec == NULL) + return; + + ctx_cache_flush(imp->imp_sec, -1, 0, 1); +} +EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx); + +int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_cli_ctx *ctx) +{ + struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy; + + if (!policy->sp_cops->install_rctx) + return 0; + return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx); +} + +int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx) +{ + struct ptlrpc_sec_policy *policy = ctx->sc_policy; + + if (!policy->sp_sops->install_rctx) + return 0; + return policy->sp_sops->install_rctx(imp, ctx); +} + +/**************************************** + * server side security * + ****************************************/ + +int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req) +{ + struct ptlrpc_sec_policy *policy; + struct lustre_msg *msg = req->rq_reqbuf; + int rc; + ENTRY; + + LASSERT(msg); + LASSERT(req->rq_reqmsg == NULL); + LASSERT(req->rq_repmsg == NULL); + + /* + * in any case we avoid to call unpack_msg() for request of null flavor + * which will later be done by ptlrpc_server_handle_request(). + */ + if (req->rq_reqdata_len < sizeof(struct lustre_msg)) { + CERROR("request size %d too small\n", req->rq_reqdata_len); + RETURN(SECSVC_DROP); + } + + if (msg->lm_magic == LUSTRE_MSG_MAGIC_V1 || + msg->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) { + req->rq_sec_flavor = SPTLRPC_FLVR_NULL; + } else { + req->rq_sec_flavor = msg->lm_secflvr; + + if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED) + __swab32s(&req->rq_sec_flavor); + + if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) != + SPTLRPC_POLICY_NULL) && + lustre_unpack_msg(msg, req->rq_reqdata_len)) + RETURN(SECSVC_DROP); + } + + policy = sptlrpc_flavor2policy(req->rq_sec_flavor); + if (!policy) { + CERROR("unsupported security flavor %x\n", req->rq_sec_flavor); + RETURN(SECSVC_DROP); + } + + LASSERT(policy->sp_sops->accept); + rc = policy->sp_sops->accept(req); + + LASSERT(req->rq_reqmsg || rc != SECSVC_OK); + sptlrpc_policy_put(policy); + + /* FIXME move to proper place */ + if (rc == SECSVC_OK) { + __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); + + if (opc == OST_WRITE) + req->rq_bulk_write = 1; + else if (opc == OST_READ) + req->rq_bulk_read = 1; + } + + LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP); + RETURN(rc); +} + +int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, + int msglen) +{ + struct ptlrpc_sec_policy *policy; + struct ptlrpc_reply_state *rs; + int rc; + ENTRY; + + LASSERT(req->rq_svc_ctx); + LASSERT(req->rq_svc_ctx->sc_policy); + + policy = req->rq_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->alloc_rs); + + rc = policy->sp_sops->alloc_rs(req, msglen); + if (unlikely(rc == -ENOMEM)) { + /* failed alloc, try emergency pool */ + rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service); + if (rs == NULL) + RETURN(-ENOMEM); + + req->rq_reply_state = rs; + rc = policy->sp_sops->alloc_rs(req, msglen); + if (rc) { + lustre_put_emerg_rs(rs); + req->rq_reply_state = NULL; + } + } + + LASSERT(rc != 0 || + (req->rq_reply_state && req->rq_reply_state->rs_msg)); + + RETURN(rc); +} + +int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_sec_policy *policy; + int rc; + ENTRY; + + LASSERT(req->rq_svc_ctx); + LASSERT(req->rq_svc_ctx->sc_policy); + + policy = req->rq_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->authorize); + + rc = policy->sp_sops->authorize(req); + LASSERT(rc || req->rq_reply_state->rs_repdata_len); + + RETURN(rc); +} + +void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_sec_policy *policy; + unsigned int prealloc; + ENTRY; + + LASSERT(rs->rs_svc_ctx); + LASSERT(rs->rs_svc_ctx->sc_policy); + + policy = rs->rs_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->free_rs); + + prealloc = rs->rs_prealloc; + policy->sp_sops->free_rs(rs); + + if (prealloc) + lustre_put_emerg_rs(rs); + EXIT; +} + +void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx == NULL) + return; + + LASSERT(atomic_read(&ctx->sc_refcount) > 0); + atomic_inc(&ctx->sc_refcount); +} + +void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx == NULL) + return; + + LASSERT(atomic_read(&ctx->sc_refcount) > 0); + if (atomic_dec_and_test(&ctx->sc_refcount)) { + if (ctx->sc_policy->sp_sops->free_ctx) + ctx->sc_policy->sp_sops->free_ctx(ctx); + } + req->rq_svc_ctx = NULL; +} + +void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx == NULL) + return; + + LASSERT(atomic_read(&ctx->sc_refcount) > 0); + if (ctx->sc_policy->sp_sops->invalidate_ctx) + ctx->sc_policy->sp_sops->invalidate_ctx(ctx); +} +EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate); + +/**************************************** + * bulk security * + ****************************************/ + +int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_cli_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->wrap_bulk) + return ctx->cc_ops->wrap_bulk(ctx, req, desc); + return 0; +} +EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk); + +static +void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga, + struct ptlrpc_bulk_desc *desc) +{ + int i; + + LASSERT(pga); + LASSERT(*pga); + + for (i = 0; i < pg_count && nob > 0; i++) { +#ifdef __KERNEL__ + desc->bd_iov[i].kiov_page = pga[i]->pg; + desc->bd_iov[i].kiov_len = pga[i]->count > nob ? + nob : pga[i]->count; + desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK; +#else +#warning FIXME for liblustre! + desc->bd_iov[i].iov_base = pga[i]->pg->addr; + desc->bd_iov[i].iov_len = pga[i]->count > nob ? + nob : pga[i]->count; +#endif + + desc->bd_iov_count++; + nob -= pga[i]->count; + } +} + +int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, + int nob, obd_count pg_count, + struct brw_page **pga) +{ + struct ptlrpc_bulk_desc *desc; + struct ptlrpc_cli_ctx *ctx; + int rc = 0; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read && !req->rq_bulk_write); + + OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count])); + if (desc == NULL) { + CERROR("out of memory, can't verify bulk read data\n"); + return -ENOMEM; + } + + pga_to_bulk_desc(nob, pg_count, pga, desc); + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->unwrap_bulk) + rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); + + OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count])); + + return rc; +} +EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read); + +int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_cli_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(!req->rq_bulk_read && req->rq_bulk_write); + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->unwrap_bulk) + return ctx->cc_ops->unwrap_bulk(ctx, req, desc); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write); + +int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_svc_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + ctx = req->rq_svc_ctx; + if (ctx->sc_policy->sp_sops->wrap_bulk) + return ctx->sc_policy->sp_sops->wrap_bulk(req, desc); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk); + +int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_svc_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + ctx = req->rq_svc_ctx; + if (ctx->sc_policy->sp_sops->unwrap_bulk); + return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk); + + +/**************************************** + * user descriptor helpers * + ****************************************/ + +int sptlrpc_current_user_desc_size(void) +{ + int ngroups; + +#ifdef __KERNEL__ + ngroups = current_ngroups; + + if (ngroups > LUSTRE_MAX_GROUPS) + ngroups = LUSTRE_MAX_GROUPS; +#else + ngroups = 0; +#endif + return sptlrpc_user_desc_size(ngroups); +} +EXPORT_SYMBOL(sptlrpc_current_user_desc_size); + +int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_user_desc *pud; + + pud = lustre_msg_buf(msg, offset, 0); + + pud->pud_uid = cfs_current()->uid; + pud->pud_gid = cfs_current()->gid; + pud->pud_fsuid = cfs_current()->fsuid; + pud->pud_fsgid = cfs_current()->fsgid; + pud->pud_cap = cfs_current()->cap_effective; + pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4; + +#ifdef __KERNEL__ + task_lock(current); + if (pud->pud_ngroups > current_ngroups) + pud->pud_ngroups = current_ngroups; + memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0], + pud->pud_ngroups * sizeof(__u32)); + task_unlock(current); +#endif + + return 0; +} +EXPORT_SYMBOL(sptlrpc_pack_user_desc); + +int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_user_desc *pud; + int i; + + pud = lustre_msg_buf(msg, offset, sizeof(*pud)); + if (!pud) + return -EINVAL; + + if (lustre_msg_swabbed(msg)) { + __swab32s(&pud->pud_uid); + __swab32s(&pud->pud_gid); + __swab32s(&pud->pud_fsuid); + __swab32s(&pud->pud_fsgid); + __swab32s(&pud->pud_cap); + __swab32s(&pud->pud_ngroups); + } + + if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) { + CERROR("%u groups is too large\n", pud->pud_ngroups); + return -EINVAL; + } + + if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) > + msg->lm_buflens[offset]) { + CERROR("%u groups are claimed but bufsize only %u\n", + pud->pud_ngroups, msg->lm_buflens[offset]); + return -EINVAL; + } + + if (lustre_msg_swabbed(msg)) { + for (i = 0; i < pud->pud_ngroups; i++) + __swab32s(&pud->pud_groups[i]); + } + + return 0; +} +EXPORT_SYMBOL(sptlrpc_unpack_user_desc); + +/**************************************** + * user supplied flavor string parsing * + ****************************************/ + +static +int get_default_flavor(enum lustre_part to_part, struct sec_flavor_config *conf) +{ + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; + conf->sfc_flags = 0; + + switch (to_part) { + case LUSTRE_MDT: + conf->sfc_rpc_flavor = SPTLRPC_FLVR_PLAIN; + return 0; + case LUSTRE_OST: + conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL; + return 0; + default: + CERROR("Unknown to lustre part %d, apply defaults\n", to_part); + conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL; + return -EINVAL; + } +} + +static +void get_flavor_by_rpc(__u32 rpc_flavor, struct sec_flavor_config *conf) +{ + conf->sfc_rpc_flavor = rpc_flavor; + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; + conf->sfc_flags = 0; + + switch (rpc_flavor) { + case SPTLRPC_FLVR_NULL: + case SPTLRPC_FLVR_PLAIN: + break; + case SPTLRPC_FLVR_KRB5P: + conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; + /* fall through */ + case SPTLRPC_FLVR_KRB5I: + conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1; + break; + default: + LBUG(); + } +} + +static +void get_flavor_by_rpc_bulk(__u32 rpc_flavor, int bulk_priv, + struct sec_flavor_config *conf) +{ + if (bulk_priv) + conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; + else + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + + switch (rpc_flavor) { + case SPTLRPC_FLVR_PLAIN: + conf->sfc_bulk_csum = BULK_CSUM_ALG_MD5; + break; + case SPTLRPC_FLVR_KRB5I: + case SPTLRPC_FLVR_KRB5P: + conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1; + break; + default: + LBUG(); + } +} + +static __u32 __flavors[] = { + SPTLRPC_FLVR_NULL, + SPTLRPC_FLVR_PLAIN, + SPTLRPC_FLVR_KRB5I, + SPTLRPC_FLVR_KRB5P, +}; + +#define __nflavors (sizeof(__flavors)/sizeof(__u32)) + +/* + * flavor string format: rpc[-bulk{n|i|p}[:cksum/enc]] + * for examples: + * null + * plain-bulki + * krb5p-bulkn + * krb5i-bulkp + * krb5i-bulkp:sha512/arc4 + */ +int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part, + char *str, struct sec_flavor_config *conf) +{ + char *f, *bulk, *alg, *enc; + char buf[64]; + int i, bulk_priv; + ENTRY; + + if (str == NULL) { + if (get_default_flavor(to_part, conf)) + return -EINVAL; + goto set_flags; + } + + for (i = 0; i < __nflavors; i++) { + f = sptlrpc_flavor2name(__flavors[i]); + if (strncmp(str, f, strlen(f)) == 0) + break; + } + + if (i >= __nflavors) + GOTO(invalid, -EINVAL); + + /* prepare local buffer thus we can modify it as we want */ + strncpy(buf, str, 64); + buf[64 - 1] = '\0'; + + /* find bulk string */ + bulk = strchr(buf, '-'); + if (bulk) + *bulk++ = '\0'; + + /* now the first part must equal to rpc flavor name */ + if (strcmp(buf, f) != 0) + GOTO(invalid, -EINVAL); + + get_flavor_by_rpc(__flavors[i], conf); + + if (bulk == NULL) + goto set_flags; + + /* null flavor should not have any suffix */ + if (__flavors[i] == SPTLRPC_FLVR_NULL) + GOTO(invalid, -EINVAL); + + /* find bulk algorithm string */ + alg = strchr(bulk, ':'); + if (alg) + *alg++ = '\0'; + + /* verify bulk section */ + if (strcmp(bulk, "bulkn") == 0) { + conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + goto set_flags; + } + + if (strcmp(bulk, "bulki") == 0) + bulk_priv = 0; + else if (strcmp(bulk, "bulkp") == 0) + bulk_priv = 1; + else + GOTO(invalid, -EINVAL); + + /* plain policy dosen't support bulk encryption */ + if (bulk_priv && __flavors[i] == SPTLRPC_FLVR_PLAIN) + GOTO(invalid, -EINVAL); + + get_flavor_by_rpc_bulk(__flavors[i], bulk_priv, conf); + + if (alg == NULL) + goto set_flags; + + /* find encryption algorithm string */ + enc = strchr(alg, '/'); + if (enc) + *enc++ = '\0'; + + /* bulk combination sanity check */ + if ((bulk_priv && enc == NULL) || (bulk_priv == 0 && enc)) + GOTO(invalid, -EINVAL); + + /* checksum algorithm */ + for (i = 0; i < BULK_CSUM_ALG_MAX; i++) { + if (strcmp(alg, sptlrpc_bulk_csum_alg2name(i)) == 0) { + conf->sfc_bulk_csum = i; + break; + } + } + if (i >= BULK_CSUM_ALG_MAX) + GOTO(invalid, -EINVAL); + + /* privacy algorithm */ + if (enc) { + if (strcmp(enc, "arc4") != 0) + GOTO(invalid, -EINVAL); + conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; + } + +set_flags: + /* * set ROOTONLY flag: + * - to OST + * - from MDT to MDT + * * set BULK flag for: + * - from CLI to OST + */ + if (to_part == LUSTRE_OST || + (from_part == LUSTRE_MDT && to_part == LUSTRE_MDT)) + conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY; + if (from_part == LUSTRE_CLI && to_part == LUSTRE_OST) + conf->sfc_flags |= PTLRPC_SEC_FL_BULK; + +#ifdef __BIG_ENDIAN + __swab32s(&conf->sfc_rpc_flavor); + __swab32s(&conf->sfc_bulk_csum); + __swab32s(&conf->sfc_bulk_priv); + __swab32s(&conf->sfc_flags); +#endif + return 0; +invalid: + CERROR("invalid flavor string: %s\n", str); + return -EINVAL; +} +EXPORT_SYMBOL(sptlrpc_parse_flavor); + +/**************************************** + * misc helpers * + ****************************************/ + +const char * sec2target_str(struct ptlrpc_sec *sec) +{ + if (!sec || !sec->ps_import || !sec->ps_import->imp_obd) + return "*"; + if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) + return "c"; + return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid); +} +EXPORT_SYMBOL(sec2target_str); + +/**************************************** + * initialize/finalize * + ****************************************/ + +int sptlrpc_init(void) +{ + int rc; + + rc = sptlrpc_enc_pool_init(); + if (rc) + goto out; + + rc = sptlrpc_null_init(); + if (rc) + goto out_pool; + + rc = sptlrpc_plain_init(); + if (rc) + goto out_null; + + rc = sptlrpc_lproc_init(); + if (rc) + goto out_plain; + + return 0; + +out_plain: + sptlrpc_plain_fini(); +out_null: + sptlrpc_null_fini(); +out_pool: + sptlrpc_enc_pool_fini(); +out: + return rc; +} + +void sptlrpc_fini(void) +{ + sptlrpc_lproc_fini(); + sptlrpc_plain_fini(); + sptlrpc_null_fini(); + sptlrpc_enc_pool_fini(); +} diff --git a/lustre/ptlrpc/sec_bulk.c b/lustre/ptlrpc/sec_bulk.c new file mode 100644 index 0000000..b8de051 --- /dev/null +++ b/lustre/ptlrpc/sec_bulk.c @@ -0,0 +1,1012 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#include <libcfs/libcfs.h> +#ifndef __KERNEL__ +#include <liblustre.h> +#include <libcfs/list.h> +#else +#include <linux/crypto.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_dlm.h> +#include <lustre_sec.h> + +#include "ptlrpc_internal.h" + +/**************************************** + * bulk encryption page pools * + ****************************************/ + +#ifdef __KERNEL__ + +#define PTRS_PER_PAGE (CFS_PAGE_SIZE / sizeof(void *)) +#define PAGES_PER_POOL (PTRS_PER_PAGE) + +static struct ptlrpc_enc_page_pool { + /* + * constants + */ + unsigned long epp_max_pages; /* maximum pages can hold, const */ + unsigned int epp_max_pools; /* number of pools, const */ + /* + * users of the pools. the capacity grow as more user added, + * but doesn't shrink when users gone -- just current policy. + * during failover there might be user add/remove activities. + */ + atomic_t epp_users; /* shared by how many users (osc) */ + atomic_t epp_users_gone; /* users removed */ + /* + * wait queue in case of not enough free pages. + */ + cfs_waitq_t epp_waitq; /* waiting threads */ + unsigned int epp_waitqlen; /* wait queue length */ + unsigned long epp_pages_short; /* # of pages wanted of in-q users */ + unsigned long epp_adding:1, /* during adding pages */ + epp_full:1; /* pools are all full */ + /* + * in-pool pages bookkeeping + */ + spinlock_t epp_lock; /* protect following fields */ + unsigned long epp_total_pages; /* total pages in pools */ + unsigned long epp_free_pages; /* current pages available */ + /* + * statistics + */ + unsigned int epp_st_adds; + unsigned int epp_st_failadds; /* # of add pages failures */ + unsigned long epp_st_reqs; /* # of get_pages requests */ + unsigned long epp_st_missings; /* # of cache missing */ + unsigned long epp_st_lowfree; /* lowest free pages ever reached */ + unsigned long epp_st_max_wqlen;/* highest waitqueue length ever */ + cfs_time_t epp_st_max_wait; /* in jeffies */ + /* + * pointers to pools + */ + cfs_page_t ***epp_pools; +} page_pools; + +int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int rc; + + spin_lock(&page_pools.epp_lock); + + rc = snprintf(page, count, + "physical pages: %lu\n" + "pages per pool: %lu\n" + "max pages: %lu\n" + "max pools: %u\n" + "users: %d - %d\n" + "current waitqueue len: %u\n" + "current pages in short: %lu\n" + "total pages: %lu\n" + "total free: %lu\n" + "add page times: %u\n" + "add page failed times: %u\n" + "total requests: %lu\n" + "cache missing: %lu\n" + "lowest free pages: %lu\n" + "max waitqueue depth: %lu\n" + "max wait time: "CFS_TIME_T"\n" + , + num_physpages, + PAGES_PER_POOL, + page_pools.epp_max_pages, + page_pools.epp_max_pools, + atomic_read(&page_pools.epp_users), + atomic_read(&page_pools.epp_users_gone), + page_pools.epp_waitqlen, + page_pools.epp_pages_short, + page_pools.epp_total_pages, + page_pools.epp_free_pages, + page_pools.epp_st_adds, + page_pools.epp_st_failadds, + page_pools.epp_st_reqs, + page_pools.epp_st_missings, + page_pools.epp_st_lowfree, + page_pools.epp_st_max_wqlen, + page_pools.epp_st_max_wait + ); + + spin_unlock(&page_pools.epp_lock); + return rc; +} + +static inline +int npages_to_npools(unsigned long npages) +{ + return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); +} + +/* + * return how many pages cleaned up. + */ +static unsigned long enc_cleanup_pools(cfs_page_t ***pools, int npools) +{ + unsigned long cleaned = 0; + int i, j; + + for (i = 0; i < npools; i++) { + if (pools[i]) { + for (j = 0; j < PAGES_PER_POOL; j++) { + if (pools[i][j]) { + cfs_free_page(pools[i][j]); + cleaned++; + } + } + OBD_FREE(pools[i], CFS_PAGE_SIZE); + pools[i] = NULL; + } + } + + return cleaned; +} + +/* + * merge @npools pointed by @pools which contains @npages new pages + * into current pools. + * + * we have options to avoid most memory copy with some tricks. but we choose + * the simplest way to avoid complexity. It's not frequently called. + */ +static void enc_insert_pool(cfs_page_t ***pools, int npools, int npages) +{ + int freeslot; + int op_idx, np_idx, og_idx, ng_idx; + int cur_npools, end_npools; + + LASSERT(npages > 0); + LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages); + LASSERT(npages_to_npools(npages) == npools); + + spin_lock(&page_pools.epp_lock); + + /* + * (1) fill all the free slots of current pools. + */ + /* + * free slots are those left by rent pages, and the extra ones with + * index >= eep_total_pages, locate at the tail of last pool. + */ + freeslot = page_pools.epp_total_pages % PAGES_PER_POOL; + if (freeslot != 0) + freeslot = PAGES_PER_POOL - freeslot; + freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages; + + op_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + og_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + np_idx = npools - 1; + ng_idx = (npages - 1) % PAGES_PER_POOL; + + while (freeslot) { + LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL); + LASSERT(pools[np_idx][ng_idx] != NULL); + + page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx]; + pools[np_idx][ng_idx] = NULL; + + freeslot--; + + if (++og_idx == PAGES_PER_POOL) { + op_idx++; + og_idx = 0; + } + if (--ng_idx < 0) { + if (np_idx == 0) + break; + np_idx--; + ng_idx = PAGES_PER_POOL - 1; + } + } + + /* + * (2) add pools if needed. + */ + cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) / + PAGES_PER_POOL; + end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL -1) / + PAGES_PER_POOL; + LASSERT(end_npools <= page_pools.epp_max_pools); + + np_idx = 0; + while (cur_npools < end_npools) { + LASSERT(page_pools.epp_pools[cur_npools] == NULL); + LASSERT(np_idx < npools); + LASSERT(pools[np_idx] != NULL); + + page_pools.epp_pools[cur_npools++] = pools[np_idx]; + pools[np_idx++] = NULL; + } + + page_pools.epp_total_pages += npages; + page_pools.epp_free_pages += npages; + page_pools.epp_st_lowfree = page_pools.epp_free_pages; + + if (page_pools.epp_total_pages == page_pools.epp_max_pages) + page_pools.epp_full = 1; + + CDEBUG(D_SEC, "add %d pages to total %lu\n", npages, + page_pools.epp_total_pages); + + spin_unlock(&page_pools.epp_lock); +} + +static int enc_pools_add_pages(int npages) +{ + static DECLARE_MUTEX(sem_add_pages); + cfs_page_t ***pools; + int npools, alloced = 0; + int i, j, rc = -ENOMEM; + + down(&sem_add_pages); + + if (npages > page_pools.epp_max_pages - page_pools.epp_total_pages) + npages = page_pools.epp_max_pages - page_pools.epp_total_pages; + if (npages == 0) { + rc = 0; + goto out; + } + + page_pools.epp_st_adds++; + + npools = npages_to_npools(npages); + OBD_ALLOC(pools, npools * sizeof(*pools)); + if (pools == NULL) + goto out; + + for (i = 0; i < npools; i++) { + OBD_ALLOC(pools[i], CFS_PAGE_SIZE); + if (pools[i] == NULL) + goto out_pools; + + for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) { + pools[i][j] = cfs_alloc_page(CFS_ALLOC_IO | + CFS_ALLOC_HIGH); + if (pools[i][j] == NULL) + goto out_pools; + + alloced++; + } + } + + enc_insert_pool(pools, npools, npages); + CDEBUG(D_SEC, "add %d pages into enc page pools\n", npages); + rc = 0; + +out_pools: + enc_cleanup_pools(pools, npools); + OBD_FREE(pools, npools * sizeof(*pools)); +out: + if (rc) { + page_pools.epp_st_failadds++; + CERROR("Failed to pre-allocate %d enc pages\n", npages); + } + + up(&sem_add_pages); + return rc; +} + +/* + * both "max bulk rpcs inflight" and "lnet MTU" are tunable, we use the + * default fixed value initially. + */ +int sptlrpc_enc_pool_add_user(void) +{ + int page_plus = PTLRPC_MAX_BRW_PAGES * OSC_MAX_RIF_DEFAULT; + int users, users_gone, shift, rc; + + LASSERT(!in_interrupt()); + LASSERT(atomic_read(&page_pools.epp_users) >= 0); + + users_gone = atomic_dec_return(&page_pools.epp_users_gone); + if (users_gone >= 0) { + CWARN("%d users gone, skip\n", users_gone + 1); + return 0; + } + atomic_inc(&page_pools.epp_users_gone); + + /* + * prepare full pages for first 2 users; 1/2 for next 2 users; + * 1/4 for next 4 users; 1/8 for next 8 users; 1/16 for next 16 users; + * ... + */ + users = atomic_add_return(1, &page_pools.epp_users); + shift = fls(users - 1); + shift = shift > 1 ? shift - 1 : 0; + page_plus = page_plus >> shift; + page_plus = page_plus > 2 ? page_plus : 2; + + rc = enc_pools_add_pages(page_plus); + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_add_user); + +int sptlrpc_enc_pool_del_user(void) +{ + atomic_inc(&page_pools.epp_users_gone); + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_del_user); + +/* + * we allocate the requested pages atomically. + */ +int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) +{ + cfs_waitlink_t waitlink; + cfs_time_t tick1 = 0, tick2; + int p_idx, g_idx; + int i; + + LASSERT(desc->bd_max_iov > 0); + LASSERT(desc->bd_max_iov <= page_pools.epp_total_pages); + + /* resent bulk, enc pages might have been allocated previously */ + if (desc->bd_enc_pages != NULL) + return 0; + + OBD_ALLOC(desc->bd_enc_pages, + desc->bd_max_iov * sizeof(*desc->bd_enc_pages)); + if (desc->bd_enc_pages == NULL) + return -ENOMEM; + + spin_lock(&page_pools.epp_lock); +again: + page_pools.epp_st_reqs++; + + if (unlikely(page_pools.epp_free_pages < desc->bd_max_iov)) { + if (tick1 == 0) + tick1 = cfs_time_current(); + + page_pools.epp_st_missings++; + page_pools.epp_pages_short += desc->bd_max_iov; + + if (++page_pools.epp_waitqlen > page_pools.epp_st_max_wqlen) + page_pools.epp_st_max_wqlen = page_pools.epp_waitqlen; + /* + * we just wait if someone else is adding more pages, or + * wait queue length is not deep enough. otherwise try to + * add more pages in the pools. + * + * FIXME the policy of detecting resource tight & growing pool + * need to be reconsidered. + */ + if (page_pools.epp_adding || page_pools.epp_waitqlen < 2 || + page_pools.epp_full) { + set_current_state(TASK_UNINTERRUPTIBLE); + cfs_waitlink_init(&waitlink); + cfs_waitq_add(&page_pools.epp_waitq, &waitlink); + + spin_unlock(&page_pools.epp_lock); + cfs_schedule(); + spin_lock(&page_pools.epp_lock); + } else { + page_pools.epp_adding = 1; + + spin_unlock(&page_pools.epp_lock); + enc_pools_add_pages(page_pools.epp_pages_short / 2); + spin_lock(&page_pools.epp_lock); + + page_pools.epp_adding = 0; + } + + LASSERT(page_pools.epp_pages_short >= desc->bd_max_iov); + LASSERT(page_pools.epp_waitqlen > 0); + page_pools.epp_pages_short -= desc->bd_max_iov; + page_pools.epp_waitqlen--; + + goto again; + } + /* + * record max wait time + */ + if (unlikely(tick1 != 0)) { + tick2 = cfs_time_current(); + if (tick2 - tick1 > page_pools.epp_st_max_wait) + page_pools.epp_st_max_wait = tick2 - tick1; + } + /* + * proceed with rest of allocation + */ + page_pools.epp_free_pages -= desc->bd_max_iov; + + p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + + for (i = 0; i < desc->bd_max_iov; i++) { + LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); + desc->bd_enc_pages[i] = page_pools.epp_pools[p_idx][g_idx]; + page_pools.epp_pools[p_idx][g_idx] = NULL; + + if (++g_idx == PAGES_PER_POOL) { + p_idx++; + g_idx = 0; + } + } + + if (page_pools.epp_free_pages < page_pools.epp_st_lowfree) + page_pools.epp_st_lowfree = page_pools.epp_free_pages; + + spin_unlock(&page_pools.epp_lock); + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages); + +void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) +{ + int p_idx, g_idx; + int i; + + if (desc->bd_enc_pages == NULL) + return; + if (desc->bd_max_iov == 0) + return; + + spin_lock(&page_pools.epp_lock); + + p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + + LASSERT(page_pools.epp_free_pages + desc->bd_max_iov <= + page_pools.epp_total_pages); + LASSERT(page_pools.epp_pools[p_idx]); + + for (i = 0; i < desc->bd_max_iov; i++) { + LASSERT(desc->bd_enc_pages[i] != NULL); + LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]); + LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL); + + page_pools.epp_pools[p_idx][g_idx] = desc->bd_enc_pages[i]; + + if (++g_idx == PAGES_PER_POOL) { + p_idx++; + g_idx = 0; + } + } + + page_pools.epp_free_pages += desc->bd_max_iov; + + if (unlikely(page_pools.epp_waitqlen)) { + LASSERT(page_pools.epp_waitqlen > 0); + LASSERT(cfs_waitq_active(&page_pools.epp_waitq)); + cfs_waitq_broadcast(&page_pools.epp_waitq); + } + + spin_unlock(&page_pools.epp_lock); + + OBD_FREE(desc->bd_enc_pages, + desc->bd_max_iov * sizeof(*desc->bd_enc_pages)); + desc->bd_enc_pages = NULL; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages); + +int sptlrpc_enc_pool_init(void) +{ + /* constants */ + page_pools.epp_max_pages = num_physpages / 4; + page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); + + atomic_set(&page_pools.epp_users, 0); + atomic_set(&page_pools.epp_users_gone, 0); + + cfs_waitq_init(&page_pools.epp_waitq); + page_pools.epp_waitqlen = 0; + page_pools.epp_pages_short = 0; + + page_pools.epp_adding = 0; + page_pools.epp_full = 0; + + spin_lock_init(&page_pools.epp_lock); + page_pools.epp_total_pages = 0; + page_pools.epp_free_pages = 0; + + page_pools.epp_st_adds = 0; + page_pools.epp_st_failadds = 0; + page_pools.epp_st_reqs = 0; + page_pools.epp_st_missings = 0; + page_pools.epp_st_lowfree = 0; + page_pools.epp_st_max_wqlen = 0; + page_pools.epp_st_max_wait = 0; + + OBD_ALLOC(page_pools.epp_pools, + page_pools.epp_max_pools * sizeof(*page_pools.epp_pools)); + if (page_pools.epp_pools == NULL) + return -ENOMEM; + + return 0; +} + +void sptlrpc_enc_pool_fini(void) +{ + unsigned long cleaned, npools; + + LASSERT(page_pools.epp_pools); + LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); + + npools = npages_to_npools(page_pools.epp_total_pages); + cleaned = enc_cleanup_pools(page_pools.epp_pools, npools); + LASSERT(cleaned == page_pools.epp_total_pages); + + OBD_FREE(page_pools.epp_pools, + page_pools.epp_max_pools * sizeof(*page_pools.epp_pools)); +} + +#else /* !__KERNEL__ */ + +int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) +{ + return 0; +} + +void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) +{ +} + +int sptlrpc_enc_pool_init(void) +{ + return 0; +} + +void sptlrpc_enc_pool_fini(void) +{ +} +#endif + +/**************************************** + * Helpers to assist policy modules to * + * implement checksum funcationality * + ****************************************/ + +static struct { + char *name; + int size; +} csum_types[] = { + [BULK_CSUM_ALG_NULL] = { "null", 0 }, + [BULK_CSUM_ALG_CRC32] = { "crc32", 4 }, + [BULK_CSUM_ALG_MD5] = { "md5", 16 }, + [BULK_CSUM_ALG_SHA1] = { "sha1", 20 }, + [BULK_CSUM_ALG_SHA256] = { "sha256", 32 }, + [BULK_CSUM_ALG_SHA384] = { "sha384", 48 }, + [BULK_CSUM_ALG_SHA512] = { "sha512", 64 }, +}; + +const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg) +{ + if (csum_alg < BULK_CSUM_ALG_MAX) + return csum_types[csum_alg].name; + return "unknown_cksum"; +} +EXPORT_SYMBOL(sptlrpc_bulk_csum_alg2name); + +int bulk_sec_desc_size(__u32 csum_alg, int request, int read) +{ + int size = sizeof(struct ptlrpc_bulk_sec_desc); + + LASSERT(csum_alg < BULK_CSUM_ALG_MAX); + + /* read request don't need extra data */ + if (!(read && request)) + size += csum_types[csum_alg].size; + + return size; +} +EXPORT_SYMBOL(bulk_sec_desc_size); + +int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_bulk_sec_desc *bsd; + int size = msg->lm_buflens[offset]; + + bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); + if (bsd == NULL) { + CERROR("Invalid bulk sec desc: size %d\n", size); + return -EINVAL; + } + + if (lustre_msg_swabbed(msg)) { + __swab32s(&bsd->bsd_version); + __swab32s(&bsd->bsd_pad); + __swab32s(&bsd->bsd_csum_alg); + __swab32s(&bsd->bsd_priv_alg); + } + + if (bsd->bsd_version != 0) { + CERROR("Unexpected version %u\n", bsd->bsd_version); + return -EPROTO; + } + + if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) { + CERROR("Unsupported checksum algorithm %u\n", + bsd->bsd_csum_alg); + return -EINVAL; + } + if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) { + CERROR("Unsupported cipher algorithm %u\n", + bsd->bsd_priv_alg); + return -EINVAL; + } + + if (size > sizeof(*bsd) && + size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) { + CERROR("Mal-formed checksum data: csum alg %u, size %d\n", + bsd->bsd_csum_alg, size); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(bulk_sec_desc_unpack); + +#ifdef __KERNEL__ +static +int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf) +{ + struct page *page; + int off; + char *ptr; + __u32 crc32 = ~0; + int len, i; + + for (i = 0; i < desc->bd_iov_count; i++) { + page = desc->bd_iov[i].kiov_page; + off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + ptr = cfs_kmap(page) + off; + len = desc->bd_iov[i].kiov_len; + + crc32 = crc32_le(crc32, ptr, len); + + cfs_kunmap(page); + } + + *((__u32 *) buf) = crc32; + return 0; +} + +static +int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) +{ + struct crypto_tfm *tfm; + struct scatterlist *sl; + int i, rc = 0; + + LASSERT(alg > BULK_CSUM_ALG_NULL && + alg < BULK_CSUM_ALG_MAX); + + if (alg == BULK_CSUM_ALG_CRC32) + return do_bulk_checksum_crc32(desc, buf); + + tfm = crypto_alloc_tfm(csum_types[alg].name, 0); + if (tfm == NULL) { + CERROR("Unable to allocate tfm %s\n", csum_types[alg].name); + return -ENOMEM; + } + + OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count); + if (sl == NULL) { + rc = -ENOMEM; + goto out_tfm; + } + + for (i = 0; i < desc->bd_iov_count; i++) { + sl[i].page = desc->bd_iov[i].kiov_page; + sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + sl[i].length = desc->bd_iov[i].kiov_len; + } + + crypto_digest_init(tfm); + crypto_digest_update(tfm, sl, desc->bd_iov_count); + crypto_digest_final(tfm, buf); + + OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count); + +out_tfm: + crypto_free_tfm(tfm); + return rc; +} + +#else /* !__KERNEL__ */ +static +int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) +{ + __u32 crc32 = ~0; + int i; + + LASSERT(alg == BULK_CSUM_ALG_CRC32); + + for (i = 0; i < desc->bd_iov_count; i++) { + char *ptr = desc->bd_iov[i].iov_base; + int len = desc->bd_iov[i].iov_len; + + crc32 = crc32_le(crc32, ptr, len); + } + + *((__u32 *) buf) = crc32; + return 0; +} +#endif + +/* + * perform algorithm @alg checksum on @desc, store result in @buf. + * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL. + */ +static +int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg, + struct ptlrpc_bulk_sec_desc *bsd, int bsdsize) +{ + int rc; + + LASSERT(bsd); + LASSERT(alg < BULK_CSUM_ALG_MAX); + + bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL; + + if (alg == BULK_CSUM_ALG_NULL) + return 0; + + LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size); + + rc = do_bulk_checksum(desc, alg, bsd->bsd_csum); + if (rc == 0) + bsd->bsd_csum_alg = alg; + + return rc; +} + +static +int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read, + struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize, + struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize) +{ + char *csum_p; + char *buf = NULL; + int csum_size, rc = 0; + + LASSERT(bsdv); + LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX); + + if (bsdr) + bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL; + + if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL) + return 0; + + /* for all supported algorithms */ + csum_size = csum_types[bsdv->bsd_csum_alg].size; + + if (bsdvsize < sizeof(*bsdv) + csum_size) { + CERROR("verifier size %d too small, require %d\n", + bsdvsize, (int) sizeof(*bsdv) + csum_size); + return -EINVAL; + } + + if (bsdr) { + LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size); + csum_p = (char *) bsdr->bsd_csum; + } else { + OBD_ALLOC(buf, csum_size); + if (buf == NULL) + return -EINVAL; + csum_p = buf; + } + + rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p); + + if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) { + CERROR("BAD %s CHECKSUM (%s), data mutated during " + "transfer!\n", read ? "READ" : "WRITE", + csum_types[bsdv->bsd_csum_alg].name); + rc = -EINVAL; + } else { + CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n", + read ? "read" : "write", + csum_types[bsdv->bsd_csum_alg].name); + } + + if (bsdr) { + bsdr->bsd_csum_alg = bsdv->bsd_csum_alg; + memcpy(bsdr->bsd_csum, csum_p, csum_size); + } else { + LASSERT(buf); + OBD_FREE(buf, csum_size); + } + + return rc; +} + +int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read, + __u32 alg, struct lustre_msg *rmsg, int roff) +{ + struct ptlrpc_bulk_sec_desc *bsdr; + int rsize, rc = 0; + + rsize = rmsg->lm_buflens[roff]; + bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr)); + + LASSERT(bsdr); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(alg < BULK_CSUM_ALG_MAX); + + if (read) + bsdr->bsd_csum_alg = alg; + else { + rc = generate_bulk_csum(desc, alg, bsdr, rsize); + if (rc) { + CERROR("client bulk write: failed to perform " + "checksum: %d\n", rc); + } + } + + return rc; +} +EXPORT_SYMBOL(bulk_csum_cli_request); + +int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *rmsg, int roff, + struct lustre_msg *vmsg, int voff) +{ + struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; + int rsize, vsize; + + rsize = rmsg->lm_buflens[roff]; + vsize = vmsg->lm_buflens[voff]; + bsdr = lustre_msg_buf(rmsg, roff, 0); + bsdv = lustre_msg_buf(vmsg, voff, 0); + + if (bsdv == NULL || vsize < sizeof(*bsdv)) { + CERROR("Invalid checksum verifier from server: size %d\n", + vsize); + return -EINVAL; + } + + LASSERT(bsdr); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(vsize >= sizeof(*bsdv)); + + if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) { + CERROR("bulk %s: checksum algorithm mismatch: client request " + "%s but server reply with %s. try to use the new one " + "for checksum verification\n", + read ? "read" : "write", + csum_types[bsdr->bsd_csum_alg].name, + csum_types[bsdv->bsd_csum_alg].name); + } + + if (read) + return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0); + else { + char *cli, *srv, *new = NULL; + int csum_size = csum_types[bsdr->bsd_csum_alg].size; + + LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX); + if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL) + return 0; + + if (vsize < sizeof(*bsdv) + csum_size) { + CERROR("verifier size %d too small, require %d\n", + vsize, (int) sizeof(*bsdv) + csum_size); + return -EINVAL; + } + + cli = (char *) (bsdr + 1); + srv = (char *) (bsdv + 1); + + if (!memcmp(cli, srv, csum_size)) { + /* checksum confirmed */ + CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n", + csum_types[bsdr->bsd_csum_alg].name); + return 0; + } + + /* checksum mismatch, re-compute a new one and compare with + * others, give out proper warnings. + */ + OBD_ALLOC(new, csum_size); + if (new == NULL) + return -ENOMEM; + + do_bulk_checksum(desc, bsdr->bsd_csum_alg, new); + + if (!memcmp(new, srv, csum_size)) { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "on the client after we checksummed them\n", + csum_types[bsdr->bsd_csum_alg].name); + } else if (!memcmp(new, cli, csum_size)) { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "in transit\n", + csum_types[bsdr->bsd_csum_alg].name); + } else { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "in transit, and the current page contents " + "don't match the originals and what the server " + "received\n", + csum_types[bsdr->bsd_csum_alg].name); + } + OBD_FREE(new, csum_size); + + return -EINVAL; + } +} +EXPORT_SYMBOL(bulk_csum_cli_reply); + +int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *vmsg, int voff, + struct lustre_msg *rmsg, int roff) +{ + struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; + int vsize, rsize, rc; + + vsize = vmsg->lm_buflens[voff]; + rsize = rmsg->lm_buflens[roff]; + bsdv = lustre_msg_buf(vmsg, voff, 0); + bsdr = lustre_msg_buf(rmsg, roff, 0); + + LASSERT(vsize >= sizeof(*bsdv)); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(bsdv && bsdr); + + if (read) { + rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize); + if (rc) + CERROR("bulk read: server failed to generate %s " + "checksum: %d\n", + csum_types[bsdv->bsd_csum_alg].name, rc); + } else + rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize); + + return rc; +} +EXPORT_SYMBOL(bulk_csum_svc); + +/**************************************** + * Helpers to assist policy modules to * + * implement encryption funcationality * + ****************************************/ + +/* + * NOTE: These algorithms must be stream cipher! + */ +static struct { + char *name; + __u32 flags; +} priv_types[] = { + [BULK_PRIV_ALG_NULL] = { "null", 0 }, + [BULK_PRIV_ALG_ARC4] = { "arc4", 0 }, +}; + +const char * sptlrpc_bulk_priv_alg2name(__u32 priv_alg) +{ + if (priv_alg < BULK_PRIV_ALG_MAX) + return priv_types[priv_alg].name; + return "unknown_priv"; +} +EXPORT_SYMBOL(sptlrpc_bulk_priv_alg2name); diff --git a/lustre/ptlrpc/sec_lproc.c b/lustre/ptlrpc/sec_lproc.c new file mode 100644 index 0000000..77c7cf4 --- /dev/null +++ b/lustre/ptlrpc/sec_lproc.c @@ -0,0 +1,182 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#include <libcfs/libcfs.h> +#ifndef __KERNEL__ +#include <liblustre.h> +#include <libcfs/list.h> +#else +#include <linux/crypto.h> +#endif + +#include <obd.h> +#include <obd_class.h> +#include <obd_support.h> +#include <lustre_net.h> +#include <lustre_import.h> +#include <lustre_dlm.h> +#include <lustre_sec.h> + +#include "ptlrpc_internal.h" + +#ifdef __KERNEL__ + +struct proc_dir_entry *sptlrpc_proc_root = NULL; +EXPORT_SYMBOL(sptlrpc_proc_root); + +void sec_flags2str(unsigned long flags, char *buf, int bufsize) +{ + buf[0] = '\0'; + + if (flags & PTLRPC_SEC_FL_REVERSE) + strncat(buf, "reverse,", bufsize); + if (flags & PTLRPC_SEC_FL_ROOTONLY) + strncat(buf, "rootonly,", bufsize); + if (flags & PTLRPC_SEC_FL_BULK) + strncat(buf, "bulk,", bufsize); + if (buf[0] == '\0') + strncat(buf, "-,", bufsize); + + buf[strlen(buf) - 1] = '\0'; + +} + +int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = data; + struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf; + struct ptlrpc_sec *sec = NULL; + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + char flags_str[32]; + int written, i; + + if (obd == NULL) + return 0; + + LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || + strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || + strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); + LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX); + LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX); + + if (obd->u.cli.cl_import) + sec = obd->u.cli.cl_import->imp_sec; + + if (sec == NULL) { + written = snprintf(page, count, "\n"); + goto out; + } + + sec_flags2str(sec->ps_flags, flags_str, sizeof(flags_str)); + + written = snprintf(page, count, + "rpc msg flavor: %s\n" + "bulk checksum: %s\n" + "bulk encrypt: %s\n" + "flags: %s\n" + "ctx cache size %u\n" + "ctx cache busy %d\n" + "gc interval %lu\n" + "gc next %ld\n", + sptlrpc_flavor2name(sec->ps_flavor), + sptlrpc_bulk_csum_alg2name(conf->sfc_bulk_csum), + sptlrpc_bulk_priv_alg2name(conf->sfc_bulk_priv), + flags_str, + sec->ps_ccache_size, + atomic_read(&sec->ps_busy), + sec->ps_gc_interval, + sec->ps_gc_interval ? + sec->ps_gc_next - cfs_time_current_sec() : 0 + ); + /* + * list contexts + */ + if (sec->ps_policy->sp_policy != SPTLRPC_POLICY_GSS) + goto out; + + written += snprintf(page + written, count - written, + "GSS contexts ==>\n"); + + spin_lock(&sec->ps_lock); + for (i = 0; i < sec->ps_ccache_size; i++) { + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[i], cc_hash) { + if (written >= count) + break; + written += sptlrpc_ctx_display(ctx, page + written, + count - written); + } + } + spin_unlock(&sec->ps_lock); + +out: + return written; +} +EXPORT_SYMBOL(sptlrpc_lprocfs_rd); + +static struct lprocfs_vars sptlrpc_lprocfs_vars[] = { + { "enc_pool", sptlrpc_proc_read_enc_pool, NULL, NULL }, + { NULL } +}; + +int sptlrpc_lproc_init(void) +{ + int rc; + + LASSERT(sptlrpc_proc_root == NULL); + + sptlrpc_proc_root = lprocfs_register("sptlrpc", proc_lustre_root, + sptlrpc_lprocfs_vars, NULL); + if (IS_ERR(sptlrpc_proc_root)) { + rc = PTR_ERR(sptlrpc_proc_root); + sptlrpc_proc_root = NULL; + return rc; + } + return 0; +} + +void sptlrpc_lproc_fini(void) +{ + if (sptlrpc_proc_root) { + lprocfs_remove(&sptlrpc_proc_root); + sptlrpc_proc_root = NULL; + } +} + +#else /* !__KERNEL__ */ + +int sptlrpc_lproc_init(void) +{ + return 0; +} + +void sptlrpc_lproc_fini(void) +{ +} + +#endif diff --git a/lustre/ptlrpc/sec_null.c b/lustre/ptlrpc/sec_null.c new file mode 100644 index 0000000..2b06e00 --- /dev/null +++ b/lustre/ptlrpc/sec_null.c @@ -0,0 +1,361 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#ifndef __KERNEL__ +#include <liblustre.h> +#endif + +#include <obd_support.h> +#include <obd_class.h> +#include <lustre_net.h> +#include <lustre_sec.h> + +static struct ptlrpc_sec_policy null_policy; +static struct ptlrpc_sec null_sec; +static struct ptlrpc_cli_ctx null_cli_ctx; +static struct ptlrpc_svc_ctx null_svc_ctx; + +static +int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + /* should never reach here */ + LBUG(); + return 0; +} + +static +int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + if (req->rq_reqbuf->lm_magic != LUSTRE_MSG_MAGIC_V1) + req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL; + req->rq_reqdata_len = req->rq_reqlen; + return 0; +} + +static +int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + req->rq_repmsg = req->rq_repbuf; + req->rq_replen = req->rq_repdata_len; + return 0; +} + +static struct ptlrpc_ctx_ops null_ctx_ops = { + .refresh = null_ctx_refresh, + .sign = null_ctx_sign, + .verify = null_ctx_verify, +}; + +static struct ptlrpc_svc_ctx null_svc_ctx = { + .sc_refcount = ATOMIC_INIT(1), + .sc_policy = &null_policy, +}; + +static +struct ptlrpc_sec* null_create_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_NULL); + return &null_sec; +} + +static +void null_destroy_sec(struct ptlrpc_sec *sec) +{ + LASSERT(sec == &null_sec); +} + +static +struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred) +{ + atomic_inc(&null_cli_ctx.cc_refcount); + return &null_cli_ctx; +} + +static +int null_alloc_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + if (!req->rq_reqbuf) { + int alloc_size = size_roundup_power2(msgsize); + + LASSERT(!req->rq_pool); + OBD_ALLOC(req->rq_reqbuf, alloc_size); + if (!req->rq_reqbuf) + return -ENOMEM; + + req->rq_reqbuf_len = alloc_size; + } else { + LASSERT(req->rq_pool); + LASSERT(req->rq_reqbuf_len >= msgsize); + memset(req->rq_reqbuf, 0, msgsize); + } + + req->rq_reqmsg = req->rq_reqbuf; + return 0; +} + +static +void null_free_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + if (!req->rq_pool) { + LASSERTF(req->rq_reqmsg == req->rq_reqbuf, + "reqmsg %p is not reqbuf %p in null sec\n", + req->rq_reqmsg, req->rq_reqbuf); + LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen, + "reqlen %d should smaller than buflen %d\n", + req->rq_reqlen, req->rq_reqbuf_len); + + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqmsg = req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } +} + +static +int null_alloc_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + msgsize = size_roundup_power2(msgsize); + + OBD_ALLOC(req->rq_repbuf, msgsize); + if (!req->rq_repbuf) + return -ENOMEM; + + req->rq_repbuf_len = msgsize; + return 0; +} + +static +void null_free_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + OBD_FREE(req->rq_repbuf, req->rq_repbuf_len); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; +} + +static +int null_enlarge_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize) +{ + struct lustre_msg *newbuf; + int oldsize, newmsg_size, alloc_size; + + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf == req->rq_reqmsg); + LASSERT(req->rq_reqbuf_len >= req->rq_reqlen); + LASSERT(req->rq_reqlen == lustre_msg_size(req->rq_reqmsg->lm_magic, + req->rq_reqmsg->lm_bufcount, + req->rq_reqmsg->lm_buflens)); + + /* compute new message size */ + oldsize = req->rq_reqbuf->lm_buflens[segment]; + req->rq_reqbuf->lm_buflens[segment] = newsize; + newmsg_size = lustre_msg_size(req->rq_reqbuf->lm_magic, + req->rq_reqbuf->lm_bufcount, + req->rq_reqbuf->lm_buflens); + req->rq_reqbuf->lm_buflens[segment] = oldsize; + + /* request from pool should always have enough buffer */ + LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size); + + if (req->rq_reqbuf_len < newmsg_size) { + alloc_size = size_roundup_power2(newmsg_size); + + OBD_ALLOC(newbuf, alloc_size); + if (newbuf == NULL) + return -ENOMEM; + + memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen); + + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = req->rq_reqmsg = newbuf; + req->rq_reqbuf_len = alloc_size; + } + + _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); + req->rq_reqlen = newmsg_size; + + return 0; +} + +static +int null_accept(struct ptlrpc_request *req) +{ + LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_NULL); + + if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_NULL) { + CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor); + return SECSVC_DROP; + } + + req->rq_reqmsg = req->rq_reqbuf; + req->rq_reqlen = req->rq_reqdata_len; + + req->rq_svc_ctx = &null_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + return SECSVC_OK; +} + +static +int null_alloc_rs(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_reply_state *rs; + int rs_size = sizeof(*rs) + msgsize; + + LASSERT(msgsize % 8 == 0); + + rs = req->rq_reply_state; + + if (rs) { + /* pre-allocated */ + LASSERT(rs->rs_size >= rs_size); + } else { + OBD_ALLOC(rs, rs_size); + if (rs == NULL) + return -ENOMEM; + + rs->rs_size = rs_size; + } + + rs->rs_svc_ctx = req->rq_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf_len = rs_size - sizeof(*rs); + rs->rs_msg = rs->rs_repbuf; + + req->rq_reply_state = rs; + return 0; +} + +static +void null_free_rs(struct ptlrpc_reply_state *rs) +{ + LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1); + atomic_dec(&rs->rs_svc_ctx->sc_refcount); + + if (!rs->rs_prealloc) + OBD_FREE(rs, rs->rs_size); +} + +static +int null_authorize(struct ptlrpc_request *req) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + + LASSERT(rs); + if (rs->rs_repbuf->lm_magic != LUSTRE_MSG_MAGIC_V1) + rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL; + rs->rs_repdata_len = req->rq_replen; + return 0; +} + +static struct ptlrpc_sec_cops null_sec_cops = { + .create_sec = null_create_sec, + .destroy_sec = null_destroy_sec, + .lookup_ctx = null_lookup_ctx, + .alloc_reqbuf = null_alloc_reqbuf, + .alloc_repbuf = null_alloc_repbuf, + .free_reqbuf = null_free_reqbuf, + .free_repbuf = null_free_repbuf, + .enlarge_reqbuf = null_enlarge_reqbuf, +}; + +static struct ptlrpc_sec_sops null_sec_sops = { + .accept = null_accept, + .alloc_rs = null_alloc_rs, + .authorize = null_authorize, + .free_rs = null_free_rs, +}; + +static struct ptlrpc_sec_policy null_policy = { + .sp_owner = THIS_MODULE, + .sp_name = "sec.null", + .sp_policy = SPTLRPC_POLICY_NULL, + .sp_cops = &null_sec_cops, + .sp_sops = &null_sec_sops, +}; + +static +void null_init_internal(void) +{ + static HLIST_HEAD(__list); + + null_sec.ps_policy = &null_policy; + atomic_set(&null_sec.ps_refcount, 1); /* always busy */ + null_sec.ps_import = NULL; + null_sec.ps_flavor = SPTLRPC_FLVR_NULL; + null_sec.ps_flags = 0; + null_sec.ps_gc_interval = 0; + null_sec.ps_gc_next = 0; + spin_lock_init(&null_sec.ps_lock); + null_sec.ps_ccache_size = 1; + null_sec.ps_ccache = &__list; + atomic_set(&null_sec.ps_busy, 1); /* for "null_cli_ctx" */ + + hlist_add_head(&null_cli_ctx.cc_hash, &__list); + atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */ + null_cli_ctx.cc_sec = &null_sec; + null_cli_ctx.cc_ops = &null_ctx_ops; + null_cli_ctx.cc_expire = 0; + null_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL | + PTLRPC_CTX_UPTODATE; + null_cli_ctx.cc_vcred.vc_uid = 0; + spin_lock_init(&null_cli_ctx.cc_lock); + INIT_LIST_HEAD(&null_cli_ctx.cc_req_list); +} + +int sptlrpc_null_init(void) +{ + int rc; + + null_init_internal(); + + rc = sptlrpc_register_policy(&null_policy); + if (rc) + CERROR("failed to register sec.null: %d\n", rc); + + return rc; +} + +void sptlrpc_null_fini(void) +{ + int rc; + + rc = sptlrpc_unregister_policy(&null_policy); + if (rc) + CERROR("cannot unregister sec.null: %d\n", rc); +} diff --git a/lustre/ptlrpc/sec_plain.c b/lustre/ptlrpc/sec_plain.c new file mode 100644 index 0000000..6993852 --- /dev/null +++ b/lustre/ptlrpc/sec_plain.c @@ -0,0 +1,553 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#ifndef __KERNEL__ +#include <liblustre.h> +#endif + +#include <obd_support.h> +#include <obd_class.h> +#include <lustre_net.h> +#include <lustre_sec.h> + +static struct ptlrpc_sec_policy plain_policy; +static struct ptlrpc_sec plain_sec; +static struct ptlrpc_cli_ctx plain_cli_ctx; +static struct ptlrpc_svc_ctx plain_svc_ctx; + +static +int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + /* should never reach here */ + LBUG(); + return 0; +} + +static +int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + struct lustre_msg_v2 *msg = req->rq_reqbuf; + ENTRY; + + msg->lm_secflvr = req->rq_sec_flavor; + req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount, + msg->lm_buflens); + RETURN(0); +} + +static +int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + struct lustre_msg *msg = req->rq_repbuf; + ENTRY; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (msg->lm_bufcount != 2) { + CERROR("Protocol error: invalid buf count %d\n", + msg->lm_bufcount); + RETURN(-EPROTO); + } + + if (bulk_sec_desc_unpack(msg, 1)) { + CERROR("Mal-formed bulk checksum reply\n"); + RETURN(-EINVAL); + } + } + + req->rq_repmsg = lustre_msg_buf(msg, 0, 0); + req->rq_replen = msg->lm_buflens[0]; + RETURN(0); +} + +static +int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct sec_flavor_config *conf; + + LASSERT(req->rq_import); + LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)); + LASSERT(req->rq_reqbuf->lm_bufcount >= 2); + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + return bulk_csum_cli_request(desc, req->rq_bulk_read, + conf->sfc_bulk_csum, + req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1); +} + +static +int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)); + LASSERT(req->rq_reqbuf->lm_bufcount >= 2); + LASSERT(req->rq_repbuf->lm_bufcount >= 2); + + return bulk_csum_cli_reply(desc, req->rq_bulk_read, + req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1, + req->rq_repbuf, + req->rq_repbuf->lm_bufcount - 1); +} + +static struct ptlrpc_ctx_ops plain_ctx_ops = { + .refresh = plain_ctx_refresh, + .sign = plain_ctx_sign, + .verify = plain_ctx_verify, + .wrap_bulk = plain_cli_wrap_bulk, + .unwrap_bulk = plain_cli_unwrap_bulk, +}; + +static struct ptlrpc_svc_ctx plain_svc_ctx = { + .sc_refcount = ATOMIC_INIT(1), + .sc_policy = &plain_policy, +}; + +static +struct ptlrpc_sec* plain_create_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + ENTRY; + LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_PLAIN); + RETURN(&plain_sec); +} + +static +void plain_destroy_sec(struct ptlrpc_sec *sec) +{ + ENTRY; + LASSERT(sec == &plain_sec); + EXIT; +} + +static +struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred) +{ + ENTRY; + atomic_inc(&plain_cli_ctx.cc_refcount); + RETURN(&plain_cli_ctx); +} + +static +int plain_alloc_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + struct sec_flavor_config *conf; + int bufcnt = 1, buflens[2], alloc_len; + ENTRY; + + buflens[0] = msgsize; + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) + buflens[bufcnt++] = sptlrpc_current_user_desc_size(); + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 1, + req->rq_bulk_read); + } + + alloc_len = lustre_msg_size_v2(bufcnt, buflens); + + if (!req->rq_reqbuf) { + LASSERT(!req->rq_pool); + + alloc_len = size_roundup_power2(alloc_len); + OBD_ALLOC(req->rq_reqbuf, alloc_len); + if (!req->rq_reqbuf) + RETURN(-ENOMEM); + + req->rq_reqbuf_len = alloc_len; + } else { + LASSERT(req->rq_pool); + LASSERT(req->rq_reqbuf_len >= alloc_len); + memset(req->rq_reqbuf, 0, alloc_len); + } + + lustre_init_msg_v2(req->rq_reqbuf, bufcnt, buflens, NULL); + req->rq_reqmsg = lustre_msg_buf_v2(req->rq_reqbuf, 0, 0); + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) + sptlrpc_pack_user_desc(req->rq_reqbuf, 1); + + RETURN(0); +} + +static +void plain_free_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + ENTRY; + if (!req->rq_pool) { + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } + EXIT; +} + +static +int plain_alloc_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + struct sec_flavor_config *conf; + int bufcnt = 1, buflens[2], alloc_len; + ENTRY; + + buflens[0] = msgsize; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 0, + req->rq_bulk_read); + } + + alloc_len = lustre_msg_size_v2(bufcnt, buflens); + alloc_len = size_roundup_power2(alloc_len); + + OBD_ALLOC(req->rq_repbuf, alloc_len); + if (!req->rq_repbuf) + RETURN(-ENOMEM); + + req->rq_repbuf_len = alloc_len; + RETURN(0); +} + +static +void plain_free_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + ENTRY; + OBD_FREE(req->rq_repbuf, req->rq_repbuf_len); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; + EXIT; +} + +static +int plain_enlarge_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize) +{ + struct lustre_msg *newbuf; + int oldsize; + int newmsg_size, newbuf_size; + ENTRY; + + /* embedded msg always at seg 0 */ + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf_len >= req->rq_reqlen); + LASSERT(lustre_msg_buf(req->rq_reqbuf, 0, 0) == req->rq_reqmsg); + + /* compute new embedded msg size. */ + oldsize = req->rq_reqmsg->lm_buflens[segment]; + req->rq_reqmsg->lm_buflens[segment] = newsize; + newmsg_size = lustre_msg_size_v2(req->rq_reqmsg->lm_bufcount, + req->rq_reqmsg->lm_buflens); + req->rq_reqmsg->lm_buflens[segment] = oldsize; + + /* compute new wrapper msg size. */ + oldsize = req->rq_reqbuf->lm_buflens[0]; + req->rq_reqbuf->lm_buflens[0] = newmsg_size; + newbuf_size = lustre_msg_size_v2(req->rq_reqbuf->lm_bufcount, + req->rq_reqbuf->lm_buflens); + req->rq_reqbuf->lm_buflens[0] = oldsize; + + /* request from pool should always have enough buffer */ + LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size); + + if (req->rq_reqbuf_len < newbuf_size) { + newbuf_size = size_roundup_power2(newbuf_size); + + OBD_ALLOC(newbuf, newbuf_size); + if (newbuf == NULL) + RETURN(-ENOMEM); + + memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len); + + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = newbuf; + req->rq_reqbuf_len = newbuf_size; + req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 0, 0); + } + + _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, 0, newmsg_size); + _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); + + req->rq_reqlen = newmsg_size; + RETURN(0); +} + +static +int plain_accept(struct ptlrpc_request *req) +{ + struct lustre_msg *msg = req->rq_reqbuf; + int bufcnt = 1; + ENTRY; + + LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_PLAIN); + + if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_PLAIN) { + CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor); + return SECSVC_DROP; + } + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + if (msg->lm_bufcount < ++bufcnt) { + CERROR("Protocal error: too small buf count %d\n", + msg->lm_bufcount); + RETURN(SECSVC_DROP); + } + + if (sptlrpc_unpack_user_desc(msg, bufcnt - 1)) { + CERROR("Mal-formed user descriptor\n"); + RETURN(SECSVC_DROP); + } + + req->rq_user_desc = lustre_msg_buf(msg, bufcnt - 1, 0); + } + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (msg->lm_bufcount != ++bufcnt) { + CERROR("Protocal error: invalid buf count %d\n", + msg->lm_bufcount); + RETURN(SECSVC_DROP); + } + + if (bulk_sec_desc_unpack(msg, bufcnt - 1)) { + CERROR("Mal-formed bulk checksum request\n"); + RETURN(SECSVC_DROP); + } + } + + req->rq_reqmsg = lustre_msg_buf(msg, 0, 0); + req->rq_reqlen = msg->lm_buflens[0]; + + req->rq_svc_ctx = &plain_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + RETURN(SECSVC_OK); +} + +static +int plain_alloc_rs(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_reply_state *rs; + struct ptlrpc_bulk_sec_desc *bsd; + int bufcnt = 1, buflens[2]; + int rs_size = sizeof(*rs); + ENTRY; + + LASSERT(msgsize % 8 == 0); + + buflens[0] = msgsize; + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor) && + (req->rq_bulk_read || req->rq_bulk_write)) { + bsd = lustre_msg_buf(req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1, + sizeof(*bsd)); + LASSERT(bsd); + + buflens[bufcnt++] = bulk_sec_desc_size(bsd->bsd_csum_alg, 0, + req->rq_bulk_read); + } + rs_size += lustre_msg_size_v2(bufcnt, buflens); + + rs = req->rq_reply_state; + + if (rs) { + /* pre-allocated */ + LASSERT(rs->rs_size >= rs_size); + } else { + OBD_ALLOC(rs, rs_size); + if (rs == NULL) + RETURN(-ENOMEM); + + rs->rs_size = rs_size; + } + + rs->rs_svc_ctx = req->rq_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf_len = rs_size - sizeof(*rs); + + lustre_init_msg_v2(rs->rs_repbuf, bufcnt, buflens, NULL); + rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, 0, 0); + + req->rq_reply_state = rs; + RETURN(0); +} + +static +void plain_free_rs(struct ptlrpc_reply_state *rs) +{ + ENTRY; + + LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1); + atomic_dec(&rs->rs_svc_ctx->sc_refcount); + + if (!rs->rs_prealloc) + OBD_FREE(rs, rs->rs_size); + EXIT; +} + +static +int plain_authorize(struct ptlrpc_request *req) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + struct lustre_msg_v2 *msg = rs->rs_repbuf; + int len; + ENTRY; + + LASSERT(rs); + LASSERT(msg); + + if (req->rq_replen != msg->lm_buflens[0]) + len = lustre_shrink_msg(msg, 0, req->rq_replen, 1); + else + len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + + msg->lm_secflvr = req->rq_sec_flavor; + rs->rs_repdata_len = len; + RETURN(0); +} + +static +int plain_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + + LASSERT(rs); + + return bulk_csum_svc(desc, req->rq_bulk_read, + req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1, + rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1); +} + +static +int plain_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + + LASSERT(rs); + + return bulk_csum_svc(desc, req->rq_bulk_read, + req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1, + rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1); +} + +static struct ptlrpc_sec_cops plain_sec_cops = { + .create_sec = plain_create_sec, + .destroy_sec = plain_destroy_sec, + .lookup_ctx = plain_lookup_ctx, + .alloc_reqbuf = plain_alloc_reqbuf, + .alloc_repbuf = plain_alloc_repbuf, + .free_reqbuf = plain_free_reqbuf, + .free_repbuf = plain_free_repbuf, + .enlarge_reqbuf = plain_enlarge_reqbuf, +}; + +static struct ptlrpc_sec_sops plain_sec_sops = { + .accept = plain_accept, + .alloc_rs = plain_alloc_rs, + .authorize = plain_authorize, + .free_rs = plain_free_rs, + .unwrap_bulk = plain_svc_unwrap_bulk, + .wrap_bulk = plain_svc_wrap_bulk, +}; + +static struct ptlrpc_sec_policy plain_policy = { + .sp_owner = THIS_MODULE, + .sp_name = "sec.plain", + .sp_policy = SPTLRPC_POLICY_PLAIN, + .sp_cops = &plain_sec_cops, + .sp_sops = &plain_sec_sops, +}; + +static +void plain_init_internal(void) +{ + static HLIST_HEAD(__list); + + plain_sec.ps_policy = &plain_policy; + atomic_set(&plain_sec.ps_refcount, 1); /* always busy */ + plain_sec.ps_import = NULL; + plain_sec.ps_flavor = SPTLRPC_FLVR_PLAIN; + plain_sec.ps_flags = 0; + plain_sec.ps_gc_interval = 0; + plain_sec.ps_gc_next = 0; + spin_lock_init(&plain_sec.ps_lock); + plain_sec.ps_ccache_size = 1; + plain_sec.ps_ccache = &__list; + atomic_set(&plain_sec.ps_busy, 1); /* for "plain_cli_ctx" */ + + hlist_add_head(&plain_cli_ctx.cc_hash, &__list); + atomic_set(&plain_cli_ctx.cc_refcount, 1); /* for hash */ + plain_cli_ctx.cc_sec = &plain_sec; + plain_cli_ctx.cc_ops = &plain_ctx_ops; + plain_cli_ctx.cc_expire = 0; + plain_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL | + PTLRPC_CTX_UPTODATE; + plain_cli_ctx.cc_vcred.vc_uid = 0; + spin_lock_init(&plain_cli_ctx.cc_lock); + INIT_LIST_HEAD(&plain_cli_ctx.cc_req_list); +} + +int sptlrpc_plain_init(void) +{ + int rc; + + plain_init_internal(); + + rc = sptlrpc_register_policy(&plain_policy); + if (rc) + CERROR("failed to register sec.plain: %d\n", rc); + + return rc; +} + +void sptlrpc_plain_fini(void) +{ + int rc; + + rc = sptlrpc_unregister_policy(&plain_policy); + if (rc) + CERROR("cannot unregister sec.plain: %d\n", rc); +} diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index c1bca21..f785ede 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -31,6 +31,7 @@ #include <obd_support.h> #include <obd_class.h> #include <lustre_net.h> +#include <lu_object.h> #include <lnet/types.h> #include "ptlrpc_internal.h" @@ -71,7 +72,7 @@ ptlrpc_alloc_rqbd (struct ptlrpc_service *svc) { struct ptlrpc_request_buffer_desc *rqbd; - OBD_ALLOC(rqbd, sizeof (*rqbd)); + OBD_ALLOC_PTR(rqbd); if (rqbd == NULL) return (NULL); @@ -83,7 +84,7 @@ ptlrpc_alloc_rqbd (struct ptlrpc_service *svc) rqbd->rqbd_buffer = ptlrpc_alloc_request_buffer(svc->srv_buf_size); if (rqbd->rqbd_buffer == NULL) { - OBD_FREE(rqbd, sizeof (*rqbd)); + OBD_FREE_PTR(rqbd); return (NULL); } @@ -109,7 +110,7 @@ ptlrpc_free_rqbd (struct ptlrpc_request_buffer_desc *rqbd) spin_unlock(&svc->srv_lock); ptlrpc_free_request_buffer (rqbd->rqbd_buffer, svc->srv_buf_size); - OBD_FREE (rqbd, sizeof (*rqbd)); + OBD_FREE_PTR(rqbd); } int @@ -251,23 +252,41 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc) return (-1); } +struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, + svc_handler_t h, char *name, + struct proc_dir_entry *proc_entry, + svcreq_printfn_t prntfn, + char *threadname) +{ + return ptlrpc_init_svc(c->psc_nbufs, c->psc_bufsize, + c->psc_max_req_size, c->psc_max_reply_size, + c->psc_req_portal, c->psc_rep_portal, + c->psc_watchdog_timeout, + h, name, proc_entry, + prntfn, c->psc_min_threads, c->psc_max_threads, + threadname, c->psc_ctx_tags); +} +EXPORT_SYMBOL(ptlrpc_init_svc_conf); + /* @threadname should be 11 characters or less - 3 will be added on */ struct ptlrpc_service * ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, int req_portal, int rep_portal, int watchdog_timeout, svc_handler_t handler, char *name, cfs_proc_dir_entry_t *proc_entry, - svcreq_printfn_t svcreq_printfn, - int min_threads, int max_threads, char *threadname) + svcreq_printfn_t svcreq_printfn, + int min_threads, int max_threads, + char *threadname, __u32 ctx_tags) { int rc; struct ptlrpc_service *service; ENTRY; LASSERT (nbufs > 0); - LASSERT (bufsize >= max_req_size); - - OBD_ALLOC(service, sizeof(*service)); + LASSERT (bufsize >= max_req_size + SPTLRPC_MAX_PAYLOAD); + LASSERT (ctx_tags != 0); + + OBD_ALLOC_PTR(service); if (service == NULL) RETURN(NULL); @@ -279,7 +298,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, cfs_waitq_init(&service->srv_waitq); service->srv_nbuf_per_group = test_req_buffer_pressure ? 1 : nbufs; - service->srv_max_req_size = max_req_size; + service->srv_max_req_size = max_req_size + SPTLRPC_MAX_PAYLOAD; service->srv_buf_size = bufsize; service->srv_rep_portal = rep_portal; service->srv_req_portal = req_portal; @@ -291,6 +310,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, service->srv_threads_min = min_threads; service->srv_threads_max = max_threads; service->srv_thread_name = threadname; + service->srv_ctx_tags = ctx_tags; rc = LNetSetLazyPortal(service->srv_req_portal); LASSERT (rc == 0); @@ -308,7 +328,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, spin_lock (&ptlrpc_all_services_lock); list_add (&service->srv_list, &ptlrpc_all_services); spin_unlock (&ptlrpc_all_services_lock); - + /* Now allocate the request buffers */ rc = ptlrpc_grow_req_bufs(service); /* We shouldn't be under memory pressure at startup, so @@ -319,7 +339,8 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, /* Now allocate pool of reply buffers */ /* Increase max reply size to next power of two */ service->srv_max_reply_size = 1; - while (service->srv_max_reply_size < max_reply_size) + while (service->srv_max_reply_size < + max_reply_size + SPTLRPC_MAX_PAYLOAD) service->srv_max_reply_size <<= 1; if (proc_entry != NULL) @@ -345,6 +366,8 @@ static void __ptlrpc_server_free_request(struct ptlrpc_request *req) req->rq_reply_state = NULL; } + sptlrpc_svc_ctx_decref(req); + if (req != &rqbd->rqbd_req) { /* NB request buffers use an embedded * req if the incoming req unlinked the @@ -444,9 +467,9 @@ static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay) exp->exp_last_request_time = max(exp->exp_last_request_time, (time_t)CURRENT_SECONDS + extra_delay); - CDEBUG(D_INFO, "updating export %s at %ld\n", + CDEBUG(D_HA, "updating export %s at %ld exp %p\n", exp->exp_client_uuid.uuid, - exp->exp_last_request_time); + exp->exp_last_request_time, exp); /* exports may get disconnected from the chain even though the export has references, so we must keep the spin lock while @@ -503,6 +526,26 @@ static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay) EXIT; } +#ifndef __KERNEL__ +int lu_context_init(struct lu_context *ctx, __u32 tags) +{ + return 0; +} + +void lu_context_fini(struct lu_context *ctx) +{ +} + +void lu_context_enter(struct lu_context *ctx) +{ +} + +void lu_context_exit(struct lu_context *ctx) +{ +} + +#endif + static int ptlrpc_server_handle_request(struct ptlrpc_service *svc, struct ptlrpc_thread *thread) @@ -518,9 +561,9 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, LASSERT(svc); spin_lock(&svc->srv_lock); - if (list_empty (&svc->srv_request_queue) || - (svc->srv_n_difficult_replies != 0 && - svc->srv_n_active_reqs >= (svc->srv_threads_running - 1))) { + if (unlikely(list_empty (&svc->srv_request_queue) || + (svc->srv_n_difficult_replies != 0 && + svc->srv_n_active_reqs >= (svc->srv_threads_running - 1)))) { /* If all the other threads are handling requests, I must * remain free to handle any 'difficult' reply that might * block them */ @@ -538,7 +581,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, do_gettimeofday(&work_start); timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,NULL); - if (svc->srv_stats != NULL) { + if (likely(svc->srv_stats != NULL)) { lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR, timediff); lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR, @@ -547,6 +590,23 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, svc->srv_n_active_reqs); } + /* go through security check/transform */ + request->rq_auth_uid = INVALID_UID; + request->rq_auth_mapped_uid = INVALID_UID; + + rc = sptlrpc_svc_unwrap_request(request); + switch (rc) { + case SECSVC_OK: + break; + case SECSVC_COMPLETE: + target_send_reply(request, 0, OBD_FAIL_MDS_ALL_REPLY_NET); + goto put_conn; + case SECSVC_DROP: + goto out_req; + default: + LBUG(); + } + #if SWAB_PARANOIA /* Clear request swab mask; this is a new request */ request->rq_req_swab_mask = 0; @@ -575,23 +635,34 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, goto out_req; } + rc = lu_context_init(&request->rq_session, LCT_SESSION); + if (rc) { + CERROR("Failure to initialize session: %d\n", rc); + goto out_req; + } + request->rq_session.lc_thread = thread; + lu_context_enter(&request->rq_session); + CDEBUG(D_NET, "got req "LPD64"\n", request->rq_xid); request->rq_svc_thread = thread; + if (thread) + request->rq_svc_thread->t_env->le_ses = &request->rq_session; + request->rq_export = class_conn2export( lustre_msg_get_handle(request->rq_reqmsg)); - if (request->rq_export) { - if (lustre_msg_get_conn_cnt(request->rq_reqmsg) < - request->rq_export->exp_conn_cnt) { + if (likely(request->rq_export)) { + if (unlikely(lustre_msg_get_conn_cnt(request->rq_reqmsg) < + request->rq_export->exp_conn_cnt)) { DEBUG_REQ(D_ERROR, request, "DROPPING req from old connection %d < %d", lustre_msg_get_conn_cnt(request->rq_reqmsg), request->rq_export->exp_conn_cnt); goto put_conn; } - if (request->rq_export->exp_obd && - request->rq_export->exp_obd->obd_fail) { + if (unlikely(request->rq_export->exp_obd && + request->rq_export->exp_obd->obd_fail)) { /* Failing over, don't handle any more reqs, send error response instead. */ CDEBUG(D_HA, "Dropping req %p for failed obd %s\n", @@ -600,7 +671,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, ptlrpc_error(request); goto put_conn; } - ptlrpc_update_export_timer(request->rq_export, timediff/500000); export = class_export_rpc_get(request->rq_export); } @@ -608,7 +678,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, /* Discard requests queued for longer than my timeout. If the * client's timeout is similar to mine, she'll be timing out this * REQ anyway (bug 1502) */ - if (timediff / 1000000 > (long)obd_timeout) { + if (unlikely(timediff / 1000000 > (long)obd_timeout)) { CERROR("Dropping timed-out opc %d request from %s" ": %ld seconds old\n", lustre_msg_get_opc(request->rq_reqmsg), @@ -646,37 +716,44 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, put_rpc_export: if (export != NULL) class_export_rpc_put(export); - put_conn: - if (request->rq_export != NULL) + if (likely(request->rq_export != NULL)) class_export_put(request->rq_export); + lu_context_exit(&request->rq_session); + lu_context_fini(&request->rq_session); + reply = request->rq_reply_state && request->rq_repmsg; /* bug 11169 */ do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); - if (timediff / 1000000 > (long)obd_timeout) + + if (unlikely(timediff / 1000000 > (long)obd_timeout)) CERROR("request "LPU64" opc %u from %s processed in %lds " "trans "LPU64" rc %d/%d\n", - request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg), + request->rq_xid, + request->rq_reqmsg ? + lustre_msg_get_opc(request->rq_reqmsg) : 0, libcfs_id2str(request->rq_peer), cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL) / 1000000, reply ? lustre_msg_get_transno(request->rq_repmsg) : - request->rq_transno, - request->rq_status, - reply ? lustre_msg_get_status(request->rq_repmsg): -999); + request->rq_transno, request->rq_status, + reply ? lustre_msg_get_status(request->rq_repmsg) : -999); else CDEBUG(D_HA, "request "LPU64" opc %u from %s processed in " "%ldus (%ldus total) trans "LPU64" rc %d/%d\n", - request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg), + request->rq_xid, + request->rq_reqmsg ? + lustre_msg_get_opc(request->rq_reqmsg) : 0, libcfs_id2str(request->rq_peer), timediff, cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL), request->rq_transno, request->rq_status, - reply ? lustre_msg_get_status(request->rq_repmsg): -999); + reply ? lustre_msg_get_status(request->rq_repmsg) : -999); - if (svc->srv_stats != NULL) { + if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) { int opc = opcode_offset(lustre_msg_get_opc(request->rq_reqmsg)); if (opc > 0) { LASSERT(opc < LUSTRE_MAX_OPCODES); @@ -882,12 +959,13 @@ static int ptlrpc_main(void *arg) #ifdef WITH_GROUP_INFO struct group_info *ginfo = NULL; #endif + struct lu_env env; int rc = 0; ENTRY; ptlrpc_daemonize(data->name); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) && defined(CONFIG_NUMA) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) && defined CONFIG_NUMA /* we need to do this before any per-thread allocation is done so that * we get the per-thread allocations on local node. bug 7342 */ if (svc->srv_cpu_affinity) { @@ -921,11 +999,18 @@ static int ptlrpc_main(void *arg) goto out; } + rc = lu_context_init(&env.le_ctx, svc->srv_ctx_tags); + if (rc) + goto out_srv_fini; + + thread->t_env = &env; + env.le_ctx.lc_thread = thread; + /* Alloc reply state structure for this one */ OBD_ALLOC_GFP(rs, svc->srv_max_reply_size, CFS_ALLOC_STD); if (!rs) { rc = -ENOMEM; - goto out_srv_init; + goto out_srv_fini; } /* Record that the thread is running */ @@ -944,8 +1029,8 @@ static int ptlrpc_main(void *arg) spin_unlock(&svc->srv_lock); cfs_waitq_signal(&svc->srv_free_rs_waitq); - CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id, - svc->srv_threads_running); + CDEBUG(D_NET, "service thread %d (#%d)started\n", thread->t_id, + svc->srv_threads_running); /* XXX maintain a list of all managed devices: insert here */ @@ -957,6 +1042,8 @@ static int ptlrpc_main(void *arg) lc_watchdog_disable(watchdog); + cond_resched(); + l_wait_event_exclusive (svc->srv_waitq, ((thread->t_flags & SVC_STOPPING) != 0 && svc->srv_n_difficult_replies == 0) || @@ -987,8 +1074,11 @@ static int ptlrpc_main(void *arg) * requests */ if (!list_empty (&svc->srv_request_queue) && (svc->srv_n_difficult_replies == 0 || - svc->srv_n_active_reqs < (svc->srv_threads_running - 1))) + svc->srv_n_active_reqs < (svc->srv_threads_running - 1))) { + lu_context_enter(&env.le_ctx); ptlrpc_server_handle_request(svc, thread); + lu_context_exit(&env.le_ctx); + } if (!list_empty(&svc->srv_idle_rqbds) && ptlrpc_server_post_idle_rqbds(svc) < 0) { @@ -1003,18 +1093,19 @@ static int ptlrpc_main(void *arg) lc_watchdog_delete(watchdog); -out_srv_init: +out_srv_fini: /* * deconstruct service specific state created by ptlrpc_start_thread() */ if (svc->srv_done != NULL) svc->srv_done(thread); + lu_env_fini(&env); out: CDEBUG(D_NET, "service thread %d exiting: rc %d\n", thread->t_id, rc); spin_lock(&svc->srv_lock); - svc->srv_threads_running--; /* must know immediately */ + svc->srv_threads_running--; /* must know immediately */ thread->t_id = rc; thread->t_flags = SVC_STOPPED; @@ -1041,7 +1132,7 @@ static void ptlrpc_stop_thread(struct ptlrpc_service *svc, list_del(&thread->t_link); spin_unlock(&svc->srv_lock); - OBD_FREE(thread, sizeof(*thread)); + OBD_FREE_PTR(thread); } void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) @@ -1070,7 +1161,7 @@ int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc) for (i = 0; i < svc->srv_threads_min; i++) { rc = ptlrpc_start_thread(dev, svc); if (rc) { - CERROR("cannot start %s thread #%d: rc %d\n", + CERROR("cannot start %s thread #%d: rc %d\n", svc->srv_thread_name, i, rc); ptlrpc_stop_all_threads(svc); } @@ -1090,10 +1181,10 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc) CDEBUG(D_RPCTRACE, "%s started %d min %d max %d running %d\n", svc->srv_name, svc->srv_threads_started, svc->srv_threads_min, svc->srv_threads_max, svc->srv_threads_running); - if (svc->srv_threads_started >= svc->srv_threads_max) + if (svc->srv_threads_started >= svc->srv_threads_max) RETURN(-EMFILE); - OBD_ALLOC(thread, sizeof(*thread)); + OBD_ALLOC_PTR(thread); if (thread == NULL) RETURN(-ENOMEM); cfs_waitq_init(&thread->t_ctl_waitq); @@ -1101,11 +1192,11 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc) spin_lock(&svc->srv_lock); if (svc->srv_threads_started >= svc->srv_threads_max) { spin_unlock(&svc->srv_lock); - OBD_FREE(thread, sizeof(*thread)); + OBD_FREE_PTR(thread); RETURN(-EMFILE); } list_add(&thread->t_link, &svc->srv_threads); - id = ++svc->srv_threads_started; + id = svc->srv_threads_started++; spin_unlock(&svc->srv_lock); thread->t_id = id; @@ -1117,7 +1208,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc) CDEBUG(D_RPCTRACE, "starting thread '%s'\n", name); - /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we + /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we * just drop the VM and FILES in ptlrpc_daemonize() right away. */ rc = cfs_kernel_thread(ptlrpc_main, &d, CLONE_VM | CLONE_FILES); @@ -1169,7 +1260,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) * its 'unlink' flag set for each posted rqbd */ list_for_each(tmp, &service->srv_active_rqbds) { struct ptlrpc_request_buffer_desc *rqbd = - list_entry(tmp, struct ptlrpc_request_buffer_desc, + list_entry(tmp, struct ptlrpc_request_buffer_desc, rqbd_list); rc = LNetMDUnlink(rqbd->rqbd_md_h); @@ -1259,7 +1350,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) OBD_FREE(rs, service->srv_max_reply_size); } - OBD_FREE(service, sizeof(*service)); + OBD_FREE_PTR(service); return 0; } diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 0f40241..e20e618 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -12,8 +12,8 @@ void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' * (make -C lustre/utils newwirecheck) - * running on Linux pancake 2.6.18-skas3-v9-pre9 #2 Tue Oct 17 13:08:24 PDT 2006 i686 i686 i3 - * with gcc version 3.4.4 */ + * running on Linux duplo 2.6.17.3 #4 Wed Oct 18 02:46:08 MSD 2006 i686 unknown unknown GNU/L + * with gcc version 3.4.6 */ /* Constants... */ @@ -111,7 +111,7 @@ void lustre_assert_wire_constants(void) (long long)MDS_QUOTACHECK); LASSERTF(MDS_QUOTACTL == 48, " found %lld\n", (long long)MDS_QUOTACTL); - LASSERTF(MDS_LAST_OPC == 51, " found %lld\n", + LASSERTF(MDS_LAST_OPC == 53, " found %lld\n", (long long)MDS_LAST_OPC); LASSERTF(REINT_SETATTR == 1, " found %lld\n", (long long)REINT_SETATTR); @@ -427,41 +427,48 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_connect_data, ocd_nllg)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllg) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllg)); - LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 40, " found %lld\n", + LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, " found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_transno)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, " found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_group)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group)); + LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 52, " found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding1)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, " found %lld\n", + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1)); - LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 48, " found %lld\n", + LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 56, " found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding2)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2)); - LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 56, " found %lld\n", + LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 64, " found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding3)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3)); - LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 64, " found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding4)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, " found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4)); - CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL); - CLASSERT(OBD_CONNECT_INDEX == 0x2ULL); - CLASSERT(OBD_CONNECT_GRANT == 0x8ULL); - CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL); - CLASSERT(OBD_CONNECT_VERSION == 0x20ULL); - CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL); - CLASSERT(OBD_CONNECT_ACL == 0x80ULL); - CLASSERT(OBD_CONNECT_XATTR == 0x100ULL); - CLASSERT(OBD_CONNECT_CROW == 0x200ULL); - CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL); - CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL); - CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL); - CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL); - CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL); - CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL); - CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x10000ULL); - CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x20000ULL); - CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL); - CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL); + CLASSERT(OBD_CONNECT_RDONLY == 0x00000001ULL); + CLASSERT(OBD_CONNECT_INDEX == 0x00000002ULL); + CLASSERT(OBD_CONNECT_GRANT == 0x00000008ULL); + CLASSERT(OBD_CONNECT_SRVLOCK == 0x00000010ULL); + CLASSERT(OBD_CONNECT_VERSION == 0x00000020ULL); + CLASSERT(OBD_CONNECT_REQPORTAL == 0x00000040ULL); + CLASSERT(OBD_CONNECT_ACL == 0x00000080ULL); + CLASSERT(OBD_CONNECT_XATTR == 0x00000100ULL); + CLASSERT(OBD_CONNECT_REAL == 0x00000200ULL); + CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x00000400ULL); + CLASSERT(OBD_CONNECT_IBITS == 0x00001000ULL); + CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL); + CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL); + CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL); + CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x00010000ULL); + CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00020000ULL); + CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL); + CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL); + CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL); + CLASSERT(OBD_CONNECT_OSS_CAPA == 0x00200000ULL); + CLASSERT(OBD_CONNECT_MDS_MDS == 0x00400000ULL); + CLASSERT(OBD_CONNECT_SOM == 0x00800000ULL); /* Checks for struct obdo */ LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n", @@ -605,7 +612,6 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_FL_DEBUG_CHECK == (0x00000040)); CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100)); CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200)); - CLASSERT(OBD_FL_CREATE_CROW == (0x00000400)); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n", @@ -1766,10 +1772,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, " found %lld\n", (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_io_epoch) == 32, " found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_io_epoch)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch) == 4, " found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, " found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch)); LASSERTF((int)offsetof(struct llog_size_change_rec, padding) == 36, " found %lld\n", (long long)(int)offsetof(struct llog_size_change_rec, padding)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->padding) == 4, " found %lld\n", diff --git a/lustre/quota/quota_context.c b/lustre/quota/quota_context.c index f6340b3..3f03425 100644 --- a/lustre/quota/quota_context.c +++ b/lustre/quota/quota_context.c @@ -36,7 +36,7 @@ unsigned long default_btune_ratio = 50; /* 50 percentage */ unsigned long default_iunit_sz = 5000; /* 5000 inodes */ unsigned long default_itune_ratio = 50; /* 50 percentage */ -kmem_cache_t *qunit_cachep = NULL; +cfs_mem_cache_t *qunit_cachep = NULL; struct list_head qunit_hash[NR_DQHASH]; spinlock_t qunit_hash_lock = SPIN_LOCK_UNLOCKED; @@ -71,13 +71,9 @@ void qunit_cache_cleanup(void) spin_unlock(&qunit_hash_lock); if (qunit_cachep) { -#ifdef HAVE_KMEM_CACHE_DESTROY_INT int rc; - rc = kmem_cache_destroy(qunit_cachep); + rc = cfs_mem_cache_destroy(qunit_cachep); LASSERTF(rc == 0, "couldn't destory qunit_cache slab\n"); -#else - kmem_cache_destroy(qunit_cachep); -#endif qunit_cachep = NULL; } EXIT; @@ -89,9 +85,9 @@ int qunit_cache_init(void) ENTRY; LASSERT(qunit_cachep == NULL); - qunit_cachep = kmem_cache_create("ll_qunit_cache", + qunit_cachep = cfs_mem_cache_create("ll_qunit_cache", sizeof(struct lustre_qunit), - 0, 0, NULL, NULL); + 0, 0); if (!qunit_cachep) RETURN(-ENOMEM); @@ -117,6 +113,59 @@ qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) return tmp; } +/* compute the remaining quota for certain gid or uid b=11693 */ +int compute_remquota(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) +{ + struct super_block *sb = qctxt->lqc_sb; + __u64 usage, limit; + struct obd_quotactl *qctl; + int ret = QUOTA_RET_OK; + __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; + ENTRY; + + if (!sb_any_quota_enabled(sb)) + RETURN(QUOTA_RET_NOQUOTA); + + /* ignore root user */ + if (qdata->qd_id == 0 && qdata_type == USRQUOTA) + RETURN(QUOTA_RET_NOLIMIT); + + OBD_ALLOC_PTR(qctl); + if (qctl == NULL) + RETURN(-ENOMEM); + + /* get fs quota usage & limit */ + qctl->qc_cmd = Q_GETQUOTA; + qctl->qc_id = qdata->qd_id; + qctl->qc_type = qdata_type; + ret = fsfilt_quotactl(obd, sb, qctl); + if (ret) { + if (ret == -ESRCH) /* no limit */ + ret = QUOTA_RET_NOLIMIT; + else + CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", + ret); + GOTO(out, ret); + } + + usage = qctl->qc_dqblk.dqb_curspace; + limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; + if (!limit){ /* no limit */ + ret = QUOTA_RET_NOLIMIT; + GOTO(out, ret); + } + + if (limit >= usage) + qdata->qd_count = limit - usage; + else + qdata->qd_count = 0; + EXIT; +out: + OBD_FREE_PTR(qctl); + return ret; +} + /* caller must hold qunit_hash_lock */ static inline struct lustre_qunit *find_qunit(unsigned int hashent, struct lustre_quota_ctxt *qctxt, @@ -221,59 +270,6 @@ out: return ret; } -/* compute the remaining quota for certain gid or uid b=11693 */ -int compute_remquota(struct obd_device *obd, - struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) -{ - struct super_block *sb = qctxt->lqc_sb; - __u64 usage, limit; - struct obd_quotactl *qctl; - int ret = QUOTA_RET_OK; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - ENTRY; - - if (!sb_any_quota_enabled(sb)) - RETURN(QUOTA_RET_NOQUOTA); - - /* ignore root user */ - if (qdata->qd_id == 0 && qdata_type == USRQUOTA) - RETURN(QUOTA_RET_NOLIMIT); - - OBD_ALLOC_PTR(qctl); - if (qctl == NULL) - RETURN(-ENOMEM); - - /* get fs quota usage & limit */ - qctl->qc_cmd = Q_GETQUOTA; - qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata_type; - ret = fsfilt_quotactl(obd, sb, qctl); - if (ret) { - if (ret == -ESRCH) /* no limit */ - ret = QUOTA_RET_NOLIMIT; - else - CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", - ret); - GOTO(out, ret); - } - - usage = qctl->qc_dqblk.dqb_curspace; - limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; - if (!limit){ /* no limit */ - ret = QUOTA_RET_NOLIMIT; - GOTO(out, ret); - } - - if (limit >= usage) - qdata->qd_count = limit - usage; - else - qdata->qd_count = 0; - EXIT; -out: - OBD_FREE_PTR(qctl); - return ret; -} - /* caller must hold qunit_hash_lock */ static struct lustre_qunit *dqacq_in_flight(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) @@ -293,7 +289,7 @@ static struct lustre_qunit *alloc_qunit(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit = NULL; ENTRY; - OBD_SLAB_ALLOC(qunit, qunit_cachep, SLAB_NOFS, sizeof(*qunit)); + OBD_SLAB_ALLOC(qunit, qunit_cachep, GFP_NOFS, sizeof(*qunit)); if (qunit == NULL) RETURN(NULL); @@ -351,14 +347,14 @@ struct qunit_waiter { /* FIXME check if this mds is the master of specified id */ -static int -is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, +static int +is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, unsigned int id, int type) { return qctxt->lqc_handler ? 1 : 0; } -static int +static int schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, int opc, int wait); @@ -548,7 +544,7 @@ static int dqacq_interpret(struct ptlrpc_request *req, void *data, int rc) qdata = lustre_quota_old_to_new(qdata_old); } if (qdata == NULL) { - DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data\n"); + DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data"); RETURN(-EPROTO); } @@ -598,18 +594,18 @@ schedule_dqacq(struct obd_device *obd, if ((empty = alloc_qunit(qctxt, qdata, opc)) == NULL) RETURN(-ENOMEM); - + spin_lock(&qunit_hash_lock); qunit = dqacq_in_flight(qctxt, qdata); if (qunit) { - if (wait) + if (wait) list_add_tail(&qw.qw_entry, &qunit->lq_waiters); spin_unlock(&qunit_hash_lock); - + free_qunit(empty); goto wait_completion; - } + } qunit = empty; insert_qunit_nolock(qctxt, qunit); if (wait) @@ -667,7 +663,7 @@ schedule_dqacq(struct obd_device *obd, req->rq_interpret_reply = dqacq_interpret; ptlrpcd_add_req(req); - QDATA_DEBUG(qdata, "%s scheduled.\n", + QDATA_DEBUG(qdata, "%s scheduled.\n", opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); wait_completion: if (wait && qunit) { @@ -718,7 +714,7 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, RETURN(rc); } -int +int qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id, unsigned short type, int isblk) { @@ -796,7 +792,7 @@ void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force) remove_qunit_nolock(qunit); /* wake up all waiters */ - list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters, + list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters, qw_entry) { list_del_init(&qw->qw_entry); qw->qw_rc = 0; @@ -825,7 +821,7 @@ static int qslave_recovery_main(void *arg) struct qslave_recov_thread_data *data = arg; struct obd_device *obd = data->obd; struct lustre_quota_ctxt *qctxt = data->qctxt; - unsigned int type; + unsigned int type; int rc = 0; ENTRY; @@ -884,7 +880,7 @@ static int qslave_recovery_main(void *arg) rc = 0; if (rc) - CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, + CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, "qslave recovery failed! (id:%d type:%d " " rc:%d)\n", dqid->di_id, type, rc); free: @@ -896,7 +892,7 @@ free: RETURN(rc); } -void +void qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt) { struct qslave_recov_thread_data data; diff --git a/lustre/quota/quota_interface.c b/lustre/quota/quota_interface.c index 5c915f4..d7084a9 100644 --- a/lustre/quota/quota_interface.c +++ b/lustre/quota/quota_interface.c @@ -46,7 +46,6 @@ #ifdef __KERNEL__ - /* quota proc file handling functions */ #ifdef LPROCFS int lprocfs_rd_bunit(char *page, char **start, off_t off, int count, @@ -79,6 +78,7 @@ int lprocfs_wr_bunit(struct file *file, const char *buffer, LASSERT(obd != NULL); rc = lprocfs_write_helper(buffer, count, &val); + if (rc) return rc; @@ -158,11 +158,11 @@ int lprocfs_wr_itune(struct file *file, const char *buffer, struct obd_device *obd = (struct obd_device *)data; int val, rc; LASSERT(obd != NULL); - + rc = lprocfs_write_helper(buffer, count, &val); if (rc) return rc; - + if (val <= MIN_QLIMIT || val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) return -EINVAL; @@ -197,7 +197,7 @@ int lprocfs_rd_type(char *page, char **start, off_t off, int count, } EXPORT_SYMBOL(lprocfs_rd_type); -static int auto_quota_on(struct obd_device *obd, int type, +static int auto_quota_on(struct obd_device *obd, int type, struct super_block *sb, int is_master) { struct obd_quotactl *oqctl; @@ -238,7 +238,7 @@ local_quota: CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, "auto-enable local quota failed. rc=%d\n", rc); if (is_master) - mds_quota_off(obd, oqctl); + mds_quota_off(obd, oqctl); } else { obd->u.obt.obt_qctxt.lqc_status = 1; } @@ -249,6 +249,7 @@ out_pop: RETURN(rc); } + int lprocfs_wr_type(struct file *file, const char *buffer, unsigned long count, void *data) { @@ -295,6 +296,7 @@ static int filter_quota_setup(struct obd_device *obd) CERROR("initialize quota context failed! (rc:%d)\n", rc); RETURN(rc); } + RETURN(rc); } @@ -373,16 +375,16 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa) oa->o_valid |= (cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA; if (oqctl->qc_dqblk.dqb_bhardlimit && - (toqb(oqctl->qc_dqblk.dqb_curspace) > + (toqb(oqctl->qc_dqblk.dqb_curspace) > oqctl->qc_dqblk.dqb_bhardlimit)) - oa->o_flags |= (cnt == USRQUOTA) ? + oa->o_flags |= (cnt == USRQUOTA) ? OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA; } OBD_FREE_PTR(oqctl); RETURN(rc); } -static int filter_quota_acquire(struct obd_device *obd, unsigned int uid, +static int filter_quota_acquire(struct obd_device *obd, unsigned int uid, unsigned int gid) { struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; @@ -393,6 +395,17 @@ static int filter_quota_acquire(struct obd_device *obd, unsigned int uid, RETURN(rc == -EAGAIN); } +static int mds_quota_init(void) +{ + return lustre_dquot_init(); +} + +static int mds_quota_exit(void) +{ + lustre_dquot_exit(); + return 0; +} + /* check whether the left quota of certain uid and uid can satisfy a write rpc * when need to acquire quota, return QUOTA_RET_ACQUOTA */ static int filter_quota_check(struct obd_device *obd, unsigned int uid, @@ -425,17 +438,6 @@ static int filter_quota_check(struct obd_device *obd, unsigned int uid, RETURN(rc); } -static int mds_quota_init(void) -{ - return lustre_dquot_init(); -} - -static int mds_quota_exit(void) -{ - lustre_dquot_exit(); - return 0; -} - static int mds_quota_setup(struct obd_device *obd) { struct obd_device_target *obt = &obd->u.obt; @@ -451,6 +453,7 @@ static int mds_quota_setup(struct obd_device *obd) CERROR("initialize quota context failed! (rc:%d)\n", rc); RETURN(rc); } + RETURN(rc); } @@ -505,7 +508,7 @@ static inline int hashfn(struct client_obd *cli, unsigned long id, int type) /* caller must hold qinfo_list_lock */ static inline void insert_qinfo_hash(struct osc_quota_info *oqi) { - struct list_head *head = qinfo_hash + + struct list_head *head = qinfo_hash + hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type); LASSERT_SPIN_LOCKED(&qinfo_list_lock); @@ -558,7 +561,7 @@ static void free_qinfo(struct osc_quota_info *oqi) OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi)); } -int osc_quota_chkdq(struct client_obd *cli, +int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid) { unsigned int id; @@ -581,7 +584,7 @@ int osc_quota_chkdq(struct client_obd *cli, RETURN(rc); } -int osc_quota_setdq(struct client_obd *cli, +int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid, obd_flag valid, obd_flag flags) { @@ -594,12 +597,12 @@ int osc_quota_setdq(struct client_obd *cli, for (cnt = 0; cnt < MAXQUOTAS; cnt++) { struct osc_quota_info *oqi, *old; - if (!(valid & ((cnt == USRQUOTA) ? + if (!(valid & ((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA))) continue; id = (cnt == USRQUOTA) ? uid : gid; - noquota = (cnt == USRQUOTA) ? + noquota = (cnt == USRQUOTA) ? (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA); oqi = alloc_qinfo(cli, id, cnt); diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index 86f4b72..8b4c358 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -39,7 +39,7 @@ static struct list_head lustre_dquot_hash[NR_DQHASH]; static spinlock_t dquot_hash_lock = SPIN_LOCK_UNLOCKED; -kmem_cache_t *lustre_dquot_cachep; +cfs_mem_cache_t *lustre_dquot_cachep; int lustre_dquot_init(void) { @@ -47,9 +47,9 @@ int lustre_dquot_init(void) ENTRY; LASSERT(lustre_dquot_cachep == NULL); - lustre_dquot_cachep = kmem_cache_create("lustre_dquot_cache", + lustre_dquot_cachep = cfs_mem_cache_create("lustre_dquot_cache", sizeof(struct lustre_dquot), - 0, 0, NULL, NULL); + 0, 0); if (!lustre_dquot_cachep) return (-ENOMEM); @@ -69,13 +69,9 @@ void lustre_dquot_exit(void) LASSERT(list_empty(lustre_dquot_hash + i)); } if (lustre_dquot_cachep) { -#ifdef HAVE_KMEM_CACHE_DESTROY_INT int rc; - rc = kmem_cache_destroy(lustre_dquot_cachep); + rc = cfs_mem_cache_destroy(lustre_dquot_cachep); LASSERTF(rc == 0,"couldn't destroy lustre_dquot_cachep slab\n"); -#else - kmem_cache_destroy(lustre_dquot_cachep); -#endif lustre_dquot_cachep = NULL; } EXIT; @@ -116,7 +112,7 @@ static struct lustre_dquot *alloc_dquot(struct lustre_quota_info *lqi, struct lustre_dquot *dquot = NULL; ENTRY; - OBD_SLAB_ALLOC(dquot, lustre_dquot_cachep, SLAB_NOFS, sizeof(*dquot)); + OBD_SLAB_ALLOC(dquot, lustre_dquot_cachep, GFP_NOFS, sizeof(*dquot)); if (dquot == NULL) RETURN(NULL); @@ -305,7 +301,7 @@ int mds_quota_adjust(struct obd_device *obd, unsigned int qcids[], int rc2 = 0; ENTRY; - if (rc && rc != -EDQUOT && rc != ENOLCK) + if (rc && rc != -EDQUOT) RETURN(0); switch (opc) { @@ -342,7 +338,7 @@ int filter_quota_adjust(struct obd_device *obd, unsigned int qcids[], int rc2 = 0; ENTRY; - if (rc && rc != -EDQUOT) + if (rc && rc != -EDQUOT && rc != ENOLCK) RETURN(0); switch (opc) { diff --git a/lustre/quota/quotacheck_test.c b/lustre/quota/quotacheck_test.c index c5ec4aa..c2c2040 100644 --- a/lustre/quota/quotacheck_test.c +++ b/lustre/quota/quotacheck_test.c @@ -85,9 +85,8 @@ static void print_inode(struct inode *inode) S_ISLNK(inode->i_mode)) size = inode_get_bytes(inode); - CERROR("%lu: uid: %u, size: %llu, blocks: %llu, real size: %llu\n", - inode->i_ino, inode->i_uid, inode->i_size, - (long long)inode->i_blocks, size); + CERROR("%lu: uid: %u, size: %llu, blocks: %lu, real size: %llu\n", + inode->i_ino, inode->i_uid, inode->i_size, inode->i_blocks, size); } /* Test quotaon */ @@ -154,10 +153,9 @@ static int quotacheck_test_cleanup(struct obd_device *obd) return 0; } -static int quotacheck_test_setup(struct obd_device *obd, obd_count len, void *buf) +static int quotacheck_test_setup(struct obd_device *obd, struct lustre_cfg* lcfg) { struct lprocfs_static_vars lvars; - struct lustre_cfg *lcfg = buf; struct obd_device *tgt; int rc; ENTRY; @@ -201,8 +199,8 @@ static int __init quotacheck_test_init(void) struct lprocfs_static_vars lvars; lprocfs_init_vars(quotacheck_test, &lvars); - return class_register_type("acheck_obd_ops, lvars.module_vars, - "quotacheck_test"); + return class_register_type("acheck_obd_ops, NULL, lvars.module_vars, + "quotacheck_test", NULL); } static void __exit quotacheck_test_exit(void) diff --git a/lustre/quota/quotactl_test.c b/lustre/quota/quotactl_test.c index 92ada91..cffb646 100644 --- a/lustre/quota/quotactl_test.c +++ b/lustre/quota/quotactl_test.c @@ -80,7 +80,7 @@ static int quotactl_test_2(struct obd_device *obd, struct super_block *sb) RETURN(0); } #endif - + /* Test set/getquota */ static int quotactl_test_3(struct obd_device *obd, struct super_block *sb) { @@ -294,10 +294,9 @@ static int quotactl_test_cleanup(struct obd_device *obd) return 0; } -static int quotactl_test_setup(struct obd_device *obd, obd_count len, void *buf) +static int quotactl_test_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct lprocfs_static_vars lvars; - struct lustre_cfg *lcfg = buf; struct obd_device *tgt; int rc; ENTRY; @@ -341,8 +340,8 @@ static int __init quotactl_test_init(void) struct lprocfs_static_vars lvars; lprocfs_init_vars(quotactl_test, &lvars); - return class_register_type("actl_obd_ops, lvars.module_vars, - "quotactl_test"); + return class_register_type("actl_obd_ops, NULL, lvars.module_vars, + "quotactl_test", NULL); } static void __exit quotactl_test_exit(void) diff --git a/lustre/scripts/.cvsignore b/lustre/scripts/.cvsignore index e5a85d6..9dae161 100644 --- a/lustre/scripts/.cvsignore +++ b/lustre/scripts/.cvsignore @@ -8,11 +8,11 @@ Makefile.in .deps TAGS version_tag.pl -lustre_createcsv -lustre_config -lc_net -lc_modprobe -lc_hb -lc_cluman -lc_md -lc_lvm +lustre_createcsv.sh +lustre_config.sh +lc_net.sh +lc_modprobe.sh +lc_hb.sh +lc_cluman.sh +lc_md.sh +lc_lvm.sh diff --git a/lustre/scripts/lc_cluman.in b/lustre/scripts/lc_cluman.in index 11194a6..37231eb 100644 --- a/lustre/scripts/lc_cluman.in +++ b/lustre/scripts/lc_cluman.in @@ -181,7 +181,7 @@ cluman_running() { local host_name=$1 local ret_str - ret_str=`${REMOTE} ${host_name} "service clumanager status" 2>&1` + ret_str=`${REMOTE} ${host_name} "/sbin/service clumanager status" 2>&1` if [ $? -ne 0 ]; then if [ "${ret_str}" != "${ret_str#*unrecognized*}" ]; then echo >&2 "`basename $0`: cluman_running() error:"\ diff --git a/lustre/scripts/lc_common b/lustre/scripts/lc_common index 06a547f..8b1bcbe 100644 --- a/lustre/scripts/lc_common +++ b/lustre/scripts/lc_common @@ -292,7 +292,7 @@ nid2hostname() { fi # Execute remote command to get the host name - ret_str=$(${REMOTE} ${ip_addr} "hostname" 2>&1) + ret_str=$(${REMOTE} ${ip_addr} "hostname" 2>&1 </dev/null) if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then echo "`basename $0`: nid2hostname() error:" \ "remote command to ${ip_addr} error: ${ret_str}" @@ -522,3 +522,70 @@ check_nodelist() { return 0 } + +# nid_in_nidlist nid nidlist +# Given a nid, and a list of nids in one node (delimited by comma ','), +# return true if the nid appears in the list of nids, or false otherwise. +nid_in_nidlist() { + local nid="$1" + local nidlist="$2" + local my_nid + + [ -z "${nid}" -o -z "${nidlist}" ] && false && return + + if [[ "${nid}" != *@* || "${nid#*@}" == tcp* ]]; then + # network type is tcp + for my_nid in ${nidlist//,/ }; do + [ "${nid%@*}" = "${my_nid%@*}" ] && true && return + done + else + # network type is not tcp + [[ ,${nidlist}, == *,${nid},* ]] && true && return + fi + + false && return +} + +# get_mgs_nids mgs_hostname mgs_nids +# Get the corresponding NID(s) of the MGS node ${mgs_hostname} from the +# "mgs nids" field of one lustre target in the csv file +get_mgs_nids() { + local mgs_node="$1" + local all_mgs_nids="$2" + local mgs_nids + local ret_str + + # Check whether the hostname of the mgs node is in + # the mgs nids string + for mgs_nids in ${all_mgs_nids//:/ }; do + if nid_in_nidlist ${mgs_node} ${mgs_nids}; then + echo ${mgs_nids} + return 0 + fi + done + + # Let's use lctl to get the real nids from the mgs node + ret_str=$(${REMOTE} ${mgs_node} "${LCTL} list_nids" 2>&1 </dev/null) + if [ ${PIPESTATUS[0]} -ne 0 -a -n "${ret_str}" ]; then + echo "$(basename $0): get_mgs_nids() error:" \ + "remote command to ${mgs_node} error: ${ret_str}" + return 1 + fi + remote_error "get_mgs_nids" ${mgs_node} "${ret_str}" && return 1 + + local real_mgs_nids=${ret_str//${mgs_node}:/} + for real_mgs_nid in ${real_mgs_nids}; do + for mgs_nids in ${all_mgs_nids//:/ }; do + if nid_in_nidlist ${real_mgs_nid} ${mgs_nids}; then + echo ${mgs_nids} + return 0 + fi + done + done + + echo "$(basename $0): get_mgs_nids() error:" \ + "Can not figure out which nids corresponding to the MGS"\ + "node ${mgs_node} from \"${all_mgs_nids}\"!" + + return 1 +} diff --git a/lustre/scripts/lc_md.in b/lustre/scripts/lc_md.in index 127974d..ab741af 100644 --- a/lustre/scripts/lc_md.in +++ b/lustre/scripts/lc_md.in @@ -317,7 +317,7 @@ construct_mdadm_rm_cmdline() { # Remove the "missing" devices from the component devices real_devs=`echo ${MD_DEVS[i]} | sed 's/missing//g'` # Over-written the superblock with zeros - mdadm_cmd=${mdadm_cmd}" && ${MDADM} --zero-superblock ${real_devs}" + mdadm_cmd=${mdadm_cmd}" && ${MDADM} --zero-superblock ${real_devs} || true" fi echo ${mdadm_cmd} diff --git a/lustre/scripts/lc_modprobe.in b/lustre/scripts/lc_modprobe.in index 2fe7f78..dc090d7 100644 --- a/lustre/scripts/lc_modprobe.in +++ b/lustre/scripts/lc_modprobe.in @@ -1,7 +1,7 @@ #!/bin/bash # # lc_modprobe - add lustre module options into modprobe.conf or -# modules.conf +# modules.conf # ################################################################################# diff --git a/lustre/scripts/lc_servip b/lustre/scripts/lc_servip index 92c6ab2..98b99a1 100644 --- a/lustre/scripts/lc_servip +++ b/lustre/scripts/lc_servip @@ -1,7 +1,7 @@ #!/bin/bash # # lc_servip - script for verifying the service IP and the real -# interface IP in a remote host are in the same subnet +# interface IP in a remote host are in the same subnet # ############################################################################### diff --git a/lustre/scripts/lmc2csv.pl b/lustre/scripts/lmc2csv.pl index 73da5fe..60cd5d9 100644 --- a/lustre/scripts/lmc2csv.pl +++ b/lustre/scripts/lmc2csv.pl @@ -18,6 +18,7 @@ sub get_arg_val { return $foo[1]; } } + return undef; } sub get_arg { @@ -39,6 +40,9 @@ sub add_net { $net->{"nid"} = get_arg_val("nid", \@_); $net->{"nettype"} = get_arg_val("nettype", \@_); $net->{"port"} = get_arg_val("port", \@_); + # note that this is not standard lmc syntax. it's an extension to it + # to handle something that lmc never had to deal with. + $net->{"iface"} = get_arg_val("iface", \@_); if ($#_ > 0) { print STDERR "Unknown arguments to \"--add net\": @_\n"; exit(1); @@ -49,10 +53,14 @@ sub add_net { sub add_mds { my $mds = {}; $mds->{"node"} = get_arg_val("node", \@_); - $mds->{"mds"} = get_arg_val("mds", \@_); + $mds->{"name"} = get_arg_val("mds", \@_); $mds->{"fstype"} = get_arg_val("fstype", \@_); $mds->{"dev"} = get_arg_val("dev", \@_); $mds->{"size"} = get_arg_val("size", \@_); + $mds->{"lmv"} = get_arg_val("lmv", \@_); + $mds->{"failover"} = get_arg("failover", \@_); + $mds->{"failout"} = get_arg("failout", \@_); + $mds->{"inode_size"} = get_arg_val("inode_size", \@_); if ($#_ > 0) { print STDERR "Unknown arguments to \"--add mds\": @_\n"; exit(1); @@ -62,8 +70,9 @@ sub add_mds { sub add_lov { my $lov = {}; - $lov->{"lov"} = get_arg_val("lov", \@_); + $lov->{"name"} = get_arg_val("lov", \@_); $lov->{"mds"} = get_arg_val("mds", \@_); + $lov->{"lmv"} = get_arg_val("lmv", \@_); $lov->{"stripe_sz"} = get_arg_val("stripe_sz", \@_); $lov->{"stripe_cnt"} = get_arg_val("stripe_cnt", \@_); $lov->{"stripe_pattern"} = get_arg_val("stripe_pattern", \@_); @@ -77,13 +86,15 @@ sub add_lov { sub add_ost { my $ost = {}; $ost->{"node"} = get_arg_val("node", \@_); - $ost->{"ost"} = get_arg_val("ost", \@_); + $ost->{"name"} = get_arg_val("ost", \@_); $ost->{"fstype"} = get_arg_val("fstype", \@_); $ost->{"dev"} = get_arg_val("dev", \@_); $ost->{"size"} = get_arg_val("size", \@_); $ost->{"lov"} = get_arg_val("lov", \@_); $ost->{"mountfsoptions"} = get_arg_val("mountfsoptions", \@_); $ost->{"failover"} = get_arg("failover", \@_); + $ost->{"failout"} = get_arg("failout", \@_); + $ost->{"inode_size"} = get_arg_val("inode_size", \@_); if ($#_ > 0) { print STDERR "Unknown arguments to \"--add ost\": @_\n"; exit(1); @@ -97,6 +108,7 @@ sub add_mtpt { $mtpt->{"path"} = get_arg_val("path", \@_); $mtpt->{"mds"} = get_arg_val("mds", \@_); $mtpt->{"lov"} = get_arg_val("lov", \@_); + $mtpt->{"lmv"} = get_arg_val("lmv", \@_); if ($#_ > 0) { print STDERR "Unknown arguments to \"--add mtpt\": @_\n"; exit(1); @@ -106,23 +118,32 @@ sub add_mtpt { no strict 'refs'; -sub find_obj { +sub find_objs { my $type = shift; my $key = shift; my $value = shift; my @objs = @_; + my @found_objs; foreach my $obj (@objs) { - if ($obj->{$key} eq $value) { - return $obj; + if (defined($obj->{$key}) && defined($value) + && $obj->{$key} eq $value) { + push(@found_objs, $obj); } } + + return @found_objs; } sub lnet_options { my $net = shift; - my $options_str = "options lnet networks=" . $net->{"nettype"} . + my $networks = $net->{"nettype"}; + if (defined($net->{"iface"})) { + my $iface = $net->{"iface"}; + $networks .= "($iface)"; + } + my $options_str = "options lnet networks=" . $networks . " accept=all"; if (defined($net->{"port"})) { $options_str .= " accept_port=" . $net->{"port"}; @@ -161,68 +182,171 @@ while(<>) { # link lovs to mdses foreach my $lov (@{$objs{"lov"}}) { - my $mds = find_obj("mds", "mds", $lov->{"mds"}, @{$objs{"mds"}}); - $mds->{"lov"} = $lov; + foreach my $mds (find_objs("mds", "name", $lov->{"mds"}, @{$objs{"mds"}})) { + if ($mds) { + $mds->{"lov"} = $lov; + } + } + # try via lmvs as well + foreach my $mds (find_objs("mds", "lmv", $lov->{"lmv"}, @{$objs{"mds"}})) { + if ($mds) { + $mds->{"lov"} = $lov; + } + } +} + +# create lmvs and link them to mdses +foreach my $mds (@{$objs{"mds"}}) { + my $lmv; + my @lmvs = find_objs("lmv", "name", $mds->{"lmv"}, @{$objs{"lmv"}}); + if ($#lmvs < 0) { + $lmv = {}; + $lmv->{"name"} = $mds->{"lmv"}; + push(@{$objs{"lmv"}}, $lmv); + } else { + $lmv = pop(@lmvs); + } + $mds->{"lmv"} = $lmv; } + +# link mtpts to lovs and lmvs or mdses +foreach my $mtpt (@{$objs{"mtpt"}}) { + foreach my $mds (find_objs("mds", "name", $mtpt->{"mds"}, @{$objs{"mds"}})) { + if ($mds) { + $mds->{"mtpt"} = $mtpt; + } + } + foreach my $lmv (find_objs("lmv", "name", $mtpt->{"lmv"}, @{$objs{"lmv"}})) { + if ($lmv) { + $lmv->{"mtpt"} = $mtpt; + } + } + foreach my $lov (find_objs("lov", "name", $mtpt->{"lov"}, @{$objs{"lov"}})) { + if ($lov) { + $lov->{"mtpt"} = $mtpt; + } + } +} + # XXX could find failover pairs of osts and mdts here and link them to # one another and then fill in their details in the csv generators below my $COUNT = 1; foreach my $mds (@{$objs{"mds"}}) { # find the net for this node - my $net = find_obj("net", "node", $mds->{"node"}, @{$objs{"net"}}); + my @nets = find_objs("net", "node", $mds->{"node"}, @{$objs{"net"}}); + my $lmv = $mds->{"lmv"}; my $lov = $mds->{"lov"}; - my $mkfs_options=""; + my $mtpt; + if ($lmv) { + $mtpt = $mds->{"lmv"}->{"mtpt"}; + } else { + $mtpt = $mds->{"mtpt"}; + } + my $fmt_options=""; if (defined($lov->{"stripe_sz"})) { - $mkfs_options .= "lov.stripesize=" . $lov->{"stripe_sz"} . " "; + $fmt_options .= "lov.stripesize=" . $lov->{"stripe_sz"} . " "; } if (defined($lov->{"stripe_cnt"})) { - $mkfs_options .= "lov.stripecount=" . $lov->{"stripe_cnt"} . " "; + $fmt_options .= "lov.stripecount=" . $lov->{"stripe_cnt"} . " "; } if (defined($lov->{"stripe_pattern"})) { - $mkfs_options .= "lov.stripetype=" . $lov->{"stripe_pattern"} . " "; + $fmt_options .= "lov.stripetype=" . $lov->{"stripe_pattern"} . " "; + } + if (defined($mds->{"failover"}) & $mds->{"failover"}) { + $fmt_options .= "failover.mode=failover" . " "; + } + if (defined($mds->{"failout"}) & $mds->{"failout"}) { + $fmt_options .= "failover.mode=failout" . " "; + } + chop($fmt_options); + if ($fmt_options ne "") { + $fmt_options = " --param=\"$fmt_options\""; + } + + my $mkfs_options=""; + if (defined($mds->{"inode_size"})) { + $mkfs_options .= "-I " . $mds->{"inode_size"} . " "; } chop($mkfs_options); - if ($mkfs_options ne "") { - $mkfs_options = " --param=\"$mkfs_options\""; + + my $fs_name=""; + my $mount_point = "$MOUNTPT/" . $mds->{"name"}; + if (defined($mtpt->{"node"})) { + $fs_name = $mtpt->{"node"}; + $mount_point .= "_" . $mtpt->{"node"}; } if ($COUNT == 1) { # mgs/mdt - printf "%s,%s,%s,$MOUNTPT/%s,mgs|mdt,,,,--device-size=%s --noformat%s,,noauto\n", + printf "%s,%s,%s,%s,mgs|mdt,%s,,,--device-size=%s --noformat%s,%s,\n", $mds->{"node"}, - lnet_options($net), + lnet_options($nets[0]), $mds->{"dev"}, - $mds->{"mds"}, + $mount_point, + $fs_name, $mds->{"size"}, + $fmt_options, $mkfs_options; - push(@mgses, $net->{"nid"}); + push(@mgses, $nets[0]->{"nid"}); } else { # mdt - printf "%s,%s,%s,$MOUNTPT/%s,mdt,,\"%s\",,--device-size=%s --noformat,,noauto\n", + printf "%s,%s,%s,%s,mdt,%s,\"%s\",,--device-size=%s --noformat%s,%s,\n", $mds->{"node"}, - lnet_options($net), + lnet_options($nets[0]), $mds->{"dev"}, - $mds->{"mds"}, + $mount_point, + $fs_name, join(",", @mgses), - $mds->{"size"}; + $mds->{"size"}, + $fmt_options, + $mkfs_options; } $COUNT++; } foreach my $ost (@{$objs{"ost"}}) { - # find the net for this node - my $mount_opts="noauto"; + my $mount_opts=""; if (defined($ost->{"mountfsoptions"})) { - $mount_opts .= "," . $ost->{"mountfsoptions"}; + $mount_opts .= "\"" . $ost->{"mountfsoptions"} . "\""; + } + my $fmt_options=""; + if (defined($ost->{"failover"}) & $ost->{"failover"}) { + $fmt_options .= "failover.mode=failover" . " "; + } + if (defined($ost->{"failout"}) & $ost->{"failout"}) { + $fmt_options .= "failover.mode=failout" . " "; + } + chop($fmt_options); + if ($fmt_options ne "") { + $fmt_options = " --param=\"$fmt_options\""; + } + + my $mkfs_options=""; + if (defined($ost->{"inode_size"})) { + $mkfs_options .= "-I " . $ost->{"inode_size"} . " "; } - my $net = find_obj("net", "node", $ost->{"node"}, @{$objs{"net"}}); - printf "%s,%s,%s,$MOUNTPT/%s,ost,,\"%s\",,--device-size=%s --noformat,,\"%s\"\n", + chop($mkfs_options); + + $ost->{"lov"} = (find_objs("lov", "name", $ost->{"lov"}, @{$objs{"lov"}}))[0]; + my $fs_name=""; + my $mount_point = "$MOUNTPT/" . $ost->{"name"}, + my $mtpt = $ost->{"lov"}->{"mtpt"}; + if (defined($mtpt->{"node"})) { + $fs_name = $mtpt->{"node"}; + $mount_point .= "_" . $mtpt->{"node"}; + } + # find the net for this node + my @nets = find_objs("net", "node", $ost->{"node"}, @{$objs{"net"}}); + printf "%s,%s,%s,%s,ost,%s,\"%s\",,--device-size=%s --noformat%s,%s,%s\n", $ost->{"node"}, - lnet_options($net), + lnet_options($nets[0]), $ost->{"dev"}, - $ost->{"ost"}, + $mount_point, + $fs_name, join(",", @mgses), $ost->{"size"}, + $fmt_options, + $mkfs_options, $mount_opts; } diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index e6cca00..0e3acba 100755 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -136,7 +136,7 @@ stop() { return fi # Cat the modprobe file and place all lines that follow a trailing backslash on the same line - ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"` ++ ROUTER=`cat ${MODPROBE_CONF} | sed ':a;N;$!ba;s#\\\[:space:]*\\n##g' | grep lnet | grep forwarding=\"enabled\"` if [[ ! -z ${ROUTER} ]]; then MODULE_LOADED=`lsmod | awk ' { print $1 } ' | grep lnet` if [[ ! -z ${MODULE_LOADED} ]]; then diff --git a/lustre/scripts/lustre_config.in b/lustre/scripts/lustre_config.in index 47662b4..c89dca4 100644 --- a/lustre/scripts/lustre_config.in +++ b/lustre/scripts/lustre_config.in @@ -243,9 +243,11 @@ declare -a TARGET_OPTS # target services in one failover group declare -a HOST_NAME MODULE_OPTS DEVICE_NAME MOUNT_POINT DEVICE_TYPE FS_NAME declare -a MGS_NIDS INDEX FORMAT_OPTIONS MKFS_OPTIONS MOUNT_OPTIONS FAILOVERS -# Corresponding to MGS_NIDS and FAILOVERS arrays, -# IP addresses in which were converted to hostnames -declare -a MGS_NIDS_NAMES FAILOVERS_NAMES +# Heartbeat software requires that node names in the configuration directive +# must (normally) match the "uname -n" of that machine. Since the value of the +# "failover nids" field in the csv file is the NID(s) of failover partner node, +# we have to figure out the corresponding hostname of that node. +declare -a FAILOVERS_NAMES VERIFY_CONNECT=true CONFIG_MD_LVM=false @@ -804,12 +806,6 @@ get_items() { MODULE_OPTS[idx]=`echo "${MODULE_OPTS[idx]}" | sed 's/"/\\\"/g'` # Convert IP addresses in NIDs to hostnames - MGS_NIDS_NAMES[idx]=$(ip2hostname_multi_node ${MGS_NIDS[idx]}) - if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${MGS_NIDS_NAMES[idx]}" - return 1 - fi - FAILOVERS_NAMES[idx]=$(ip2hostname_multi_node ${FAILOVERS[idx]}) if [ ${PIPESTATUS[0]} -ne 0 ]; then echo >&2 "${FAILOVERS_NAMES[idx]}" @@ -838,7 +834,6 @@ check_lnet_connect() { local COMMAND RET_STR local mgs_prim_nids - local nids nids_names local nids_str= local mgs_nid local ping_mgs @@ -849,7 +844,7 @@ check_lnet_connect() { "${HOST_NAME[i]} and the MGS node ${mgs_node}" mgs_prim_nids=`echo ${MGS_NIDS[i]} | awk -F: '{print $1}'` - if [ -z "${mgs_node}" ]; then + if [ -z "${mgs_node}" -o $MGS_NUM -eq 1 ]; then nids_str=${mgs_prim_nids} # nids of primary MGS node if [ -z "${nids_str}" ]; then echo >&2 $"`basename $0`: check_lnet_connect() error:"\ @@ -858,21 +853,11 @@ check_lnet_connect() { return 1 fi else - for nids in ${MGS_NIDS[i]//:/ }; do - nids_names=$(ip2hostname_single_node ${nids}) - if [ ${PIPESTATUS[0]} -ne 0 ]; then - echo >&2 "${nids_names}" - return 1 - fi - - [ "${nids_names}" != "${nids_names#*$mgs_node*}" ]\ - && nids_str=${nids} # nids of backup MGS node - done - if [ -z "${nids_str}" ]; then - echo >&2 $"`basename $0`: check_lnet_connect() error:"\ - "Check the mgs nids item of host ${HOST_NAME[i]}!"\ - "Can not figure out which nids corresponding to the MGS"\ - "node ${mgs_node} from \"${MGS_NIDS[i]}\"!" + # Get the corresponding NID(s) of the MGS node ${mgs_node} + # from the "mgs nids" field + nids_str=$(get_mgs_nids ${mgs_node} ${MGS_NIDS[i]}) + if [ ${PIPESTATUS[0]} -ne 0 ]; then + echo >&2 "${nids_str}" return 1 fi fi diff --git a/lustre/scripts/lustre_rmmod b/lustre/scripts/lustre_rmmod index c306594..2f6b6c2 100755 --- a/lustre/scripts/lustre_rmmod +++ b/lustre/scripts/lustre_rmmod @@ -7,14 +7,6 @@ SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH -case `uname -r` in -2.4.*) RMMOD="modprobe -r";; -*) RMMOD="rmmod";; -esac - -lctl modules | awk '{ print $2 }' | xargs $RMMOD >/dev/null 2>&1 +lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 # do it again, in case we tried to unload the lnd's too early -lctl modules | awk '{ print $2 }' | xargs $RMMOD >/dev/null 2>&1 -# third times the charm -lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs $RMMOD - +lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod diff --git a/lustre/scripts/version_tag.pl.in b/lustre/scripts/version_tag.pl.in index c252212..c341642 100644 --- a/lustre/scripts/version_tag.pl.in +++ b/lustre/scripts/version_tag.pl.in @@ -156,7 +156,12 @@ sub generate_ver($$$) $hour, $min, $sec); print "#define BUILD_VERSION \""; - if ($pristine) { + + my $lustre_vers = $ENV{LUSTRE_VERS}; + + if ($lustre_vers) { + print "$tag-$lustre_vers\"\n"; + } elsif ($pristine) { print "$tag-$show_last-PRISTINE-$linuxdir-$kernver\"\n"; } else { print "$tag-$show_last-CHANGED-$linuxdir-$kernver\"\n"; diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 5a6f422..d3d60f0 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -4,30 +4,28 @@ AM_CFLAGS = $(LLCFLAGS) # LDADD = -lldap # LDADD := -lreadline -ltermcap # -lefence -noinst_DATA = -noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh test-framework.sh -noinst_SCRIPTS += runvmstat runiozone runtests +noinst_DATA = +noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh noinst_SCRIPTS += sanity.sh rundbench test-framework.sh -noinst_SCRIPTS += acceptance-small.sh compile.sh compile.sh conf-sanity.sh -noinst_SCRIPTS += echo.sh insanity.sh lfscktest.sh llechocleanup.sh -noinst_SCRIPTS += llog-test.sh lockorder.sh lstiming.sh mount2fs.sh oos2.sh -noinst_SCRIPTS += oos.sh recovery-cleanup.sh recovery-small.sh replay-dual.sh -noinst_SCRIPTS += replay-ost-single.sh replay-single.sh routed.sh run-llog.sh -noinst_SCRIPTS += run-quotacheck.sh run-quotactl.sh run-quotafmt.sh +noinst_SCRIPTS += acceptance-small.sh compile.sh conf-sanity.sh +noinst_SCRIPTS += echo.sh insanity.sh lfscktest.sh llechocleanup.sh +noinst_SCRIPTS += llog-test.sh lockorder.sh lstiming.sh mount2fs.sh oos2.sh +noinst_SCRIPTS += oos.sh recovery-cleanup.sh recovery-small.sh replay-dual.sh +noinst_SCRIPTS += replay-ost-single.sh replay-single.sh routed.sh run-llog.sh +noinst_SCRIPTS += run-quotacheck.sh run-quotactl.sh run-quotafmt.sh noinst_SCRIPTS += sanity-buffalo.sh sanityN.sh sanity-quota.sh tmpfs-sanity.sh -noinst_SCRIPTS += kbuild lkcdmap rundbench runiozone runslabinfo -noinst_SCRIPTS += socketclient socketserver -nobase_noinst_SCRIPTS = cfg/insanity-local.sh cfg/insanity-ltest.sh +noinst_SCRIPTS += kbuild lkcdmap rundbench runiozone runvmstat runslabinfo +noinst_SCRIPTS += runtests socketclient socketserver sanity-sec.sh +nobase_noinst_SCRIPTS = cfg/insanity-local.sh cfg/insanity-ltest.sh nobase_noinst_SCRIPTS += cfg/local.sh acl/make-tree acl/run -nobase_noinst_DATA = acl/cp.test acl/getfacl-noacl.test acl/inheritance.test +nobase_noinst_DATA = acl/cp.test acl/getfacl-noacl.test acl/inheritance.test nobase_noinst_DATA += acl/misc.test acl/permissions.test acl/setfacl.test EXTRA_DIST = $(noinst_SCRIPTS) $(noinst_DATA) \ - $(nobase_noinst_SCRIPTS) $(nobase_noinst_DATA) \ - sanity.sh rundbench + $(nobase_noinst_SCRIPTS) $(nobase_noinst_DATA) if TESTS -noinst_PROGRAMS = openunlink testreq truncate directio openme writeme +noinst_PROGRAMS = openunlink truncate directio openme writeme noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy noinst_PROGRAMS += stat createmany chownmany statmany multifstat createtest mlink utime noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat @@ -39,6 +37,7 @@ noinst_PROGRAMS += mmap_sanity flock_test writemany random-reads flocks_test noinst_PROGRAMS += ll_getstripe_info if MPITESTS noinst_PROGRAMS += parallel_grouplock write_append_truncate createmany_mpi +noinst_PROGRAMS += iam_ut endif # noinst_PROGRAMS += ldaptest copy_attr mkdirdeep bin_PROGRAMS = mcreate munlink diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index e43528c..35048b2 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -6,6 +6,7 @@ set -e PATH=`dirname $0`/../utils:$PATH +[ -z "$CONFIG" -a "$NAME" ] && CONFIGS=$NAME [ "$CONFIGS" ] || CONFIGS="local" #"local lov" [ "$MAX_THREADS" ] || MAX_THREADS=20 RAMKB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo` diff --git a/lustre/tests/acl/getfacl-noacl.test b/lustre/tests/acl/getfacl-noacl.test index 6d730c4..4082a3b 100644 --- a/lustre/tests/acl/getfacl-noacl.test +++ b/lustre/tests/acl/getfacl-noacl.test @@ -38,7 +38,11 @@ filesystem with or without ACL support. $ mkdir d $ touch d/y $ ln -s d l - $ getfacl -dR . | grep file | sort +The result of "getfacl -dR . | grep file | sort" is related with +the dentry item order in parent directory. Such order depends on +FS implementation. Fix with -P (--physical) option. +# $ getfacl -dR . | grep file | sort + $ getfacl -dRP . | grep file | sort > # file: . > # file: d > # file: d/y diff --git a/lustre/tests/acl/permissions.test b/lustre/tests/acl/permissions.test index bcf947c..04accac 100644 --- a/lustre/tests/acl/permissions.test +++ b/lustre/tests/acl/permissions.test @@ -202,8 +202,11 @@ only need to verify that ACL permissions make a difference. > e/h following 2 lines seems not valid, which also failed on ext3 in FC3 enviroment, although it pass in FC2. commented out by CFS (agreed with HP) +Replaced "echo" with "touch" can resolve such problem. # $ echo i > e/i # > e/i: Permission denied + $ touch e/i + > touch: cannot touch `e/i': Permission denied $ su $ setfacl -m u:bin:rwx e diff --git a/lustre/tests/cfg/insanity-lmv.sh b/lustre/tests/cfg/insanity-lmv.sh new file mode 100644 index 0000000..cea4fbb --- /dev/null +++ b/lustre/tests/cfg/insanity-lmv.sh @@ -0,0 +1,84 @@ +FSNAME=lustre + +# facet hosts +mds_HOST=${mds_HOST:-`hostname`} +mdsfailover_HOST=${mdsfailover_HOST} +mgs_HOST=${mgs_HOST:-$mds_HOST} +ost_HOST=${ost_HOST:-`hostname`} +ostfailover_HOST=${ostfailover_HOST} + +mds1_HOST=${mds1_HOST:-$mds_HOST} +mds2_HOST=$mds1_HOST +mds3_HOST=$mds1_HOST +ost2_HOST=${ost2_HOST:-$ost_HOST} +gks_HOST=${gks_HOST:-$mds_HOST} +LIVE_CLIENT=${LIVE_CLIENT:-`hostname`} +# This should always be a list, not a regexp +FAIL_CLIENTS=${FAIL_CLIENTS:-""} + +NETTYPE=${NETTYPE:-tcp} +MGSNID=${MGSNID:-`h2$NETTYPE $mgs_HOST`} +FSTYPE=${FSTYPE:-ldiskfs} +STRIPE_BYTES=${STRIPE_BYTES:-1048576} +STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-$((OSTCOUNT -1))} +TIMEOUT=${TIMEOUT:-30} +PTLDEBUG=${PTLDEBUG:-0x33f0404} +SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff} + +TMP=${TMP:-/tmp} + +MDSCOUNT=${MDSCOUNT:-3} +MDSDEVBASE=${MDSDEVBASE:-$TMP/${FSNAME}-mdt} +MDSSIZE=${MDSSIZE:-100000} + +OSTCOUNT=${OSTCOUNT:-2} +OSTDEVBASE=${OSTDEVBASE:-$TMP/${FSNAME}-ost} +OSTSIZE=${OSTSIZE:-200000} + +#client +MOUNT=${MOUNT:-/mnt/${FSNAME}} +MOUNT1=${MOUNT1:-$MOUNT} +MOUNT2=${MOUNT2:-${MOUNT}2} +MOUNTOPT=${MOUNTOPT:-"user_xattr,"} +[ "x$RMTCLIENT" != "x" ] && + MOUNTOPT=$MOUNTOPT",remote_client" +DIR=${DIR:-$MOUNT} +DIR1=${DIR:-$MOUNT1} +DIR2=${DIR2:-$MOUNT2} + +PDSH=${PDSH:-no_dsh} +FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD +POWER_DOWN=${POWER_DOWN:-"powerman --off"} +POWER_UP=${POWER_UP:-"powerman --on"} + +MKFSOPT="" +MOUNTOPT="" +[ "x$MDSJOURNALSIZE" != "x" ] && + MKFSOPT=$MKFSOPT" -J size=$MDSJOURNALSIZE" +[ "x$MDSISIZE" != "x" ] && + MKFSOPT=$MKFSOPT" -i $MDSISIZE" +[ "x$MKFSOPT" != "x" ] && + MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$mdsfailover_HOST" != "x" ] && + MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" +[ "x$STRIPE_BYTES" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param lov.stripesize=$STRIPE_BYTES" +[ "x$STRIPES_PER_OBJ" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param lov.stripecount=$STRIPES_PER_OBJ" +MDS_MKFS_OPTS="--mgs --mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $MDSOPT" +MDSn_MKFS_OPTS="--mgsnode=$MGSNID --mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $MDSOPT" + +MKFSOPT="" +MOUNTOPT="" +[ "x$OSTJOURNALSIZE" != "x" ] && + MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" +[ "x$MKFSOPT" != "x" ] && + MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$ostfailover_HOST" != "x" ] && + MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" +OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" + +MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop"} +OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"} + +SINGLEMDS=${SINGLEMDS:-"mds1"} diff --git a/lustre/tests/cfg/insanity-local.sh b/lustre/tests/cfg/insanity-local.sh index e3f4087..d31c089 100644 --- a/lustre/tests/cfg/insanity-local.sh +++ b/lustre/tests/cfg/insanity-local.sh @@ -3,6 +3,8 @@ FSNAME=lustre # facet hosts mds_HOST=${mds_HOST:-`hostname`} mdsfailover_HOST=${mdsfailover_HOST:-""} +mds1_HOST=${mds1_HOST:-$mds_HOST} +mds1failover_HOST=${mds1failover_HOST:-$mdsfailover_HOST} mgs_HOST=${mgs_HOST:-$mds_HOST} ost_HOST=${ost_HOST:-`hostname`} LIVE_CLIENT=${LIVE_CLIENT:-`hostname`} @@ -10,7 +12,9 @@ LIVE_CLIENT=${LIVE_CLIENT:-`hostname`} FAIL_CLIENTS=${FAIL_CLIENTS:-""} TMP=${TMP:-/tmp} -MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt} +MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt1} +MDSCOUNT=${MDSCOUNT:-1} +MDSDEVBASE=${MDSDEVBASE:-$TMP/${FSNAME}-mdt} MDSSIZE=${MDSSIZE:-100000} MDSOPT=${MDSOPT:-"--mountfsoptions=acl"} @@ -26,6 +30,7 @@ STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0} TIMEOUT=${TIMEOUT:-30} PTLDEBUG=${PTLDEBUG:-0x33f0404} SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff} +SINGLEMDS=${SINGLEMDS:-"mds1"} MKFSOPT="" MOUNTOPT="" @@ -35,6 +40,8 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -i $MDSISIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$MDSCAPA" != "x" ] && + MKFSOPT="--param mdt.capa=$MDSCAPA" [ "x$mdsfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" [ "x$STRIPE_BYTES" != "x" ] && @@ -49,12 +56,14 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$OSSCAPA" != "x" ] && + MKFSOPT="--param ost.capa=$OSSCAPA" [ "x$ostfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" -MDS_MOUNT_OPTS="-o loop" -OST_MOUNT_OPTS="-o loop" +MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop"} +OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"} MOUNT=${MOUNT:-"/mnt/lustre"} PDSH=${PDSH:-no_dsh} diff --git a/lustre/tests/cfg/insanity-ltest.sh b/lustre/tests/cfg/insanity-ltest.sh index 38ad798..0a7e45b 100644 --- a/lustre/tests/cfg/insanity-ltest.sh +++ b/lustre/tests/cfg/insanity-ltest.sh @@ -30,6 +30,7 @@ client_HOST=${CLIENT1} LIVE_CLIENT=${LIVE_CLIENT:-${CLIENT1}} # This should always be a list, not a regexp FAIL_CLIENTS=${FAIL_CLIENTS:-"`all_but_one_clients`"} +SINGLEMDS=${SINGLEMDS:-"mds"} NETTYPE=${NETTYPE:-${NETTYPE}} diff --git a/lustre/tests/cfg/insanity-mdev.sh b/lustre/tests/cfg/insanity-mdev.sh index 05f038d..59ceeb3 100644 --- a/lustre/tests/cfg/insanity-mdev.sh +++ b/lustre/tests/cfg/insanity-mdev.sh @@ -8,6 +8,7 @@ LIVE_CLIENT=${LIVE_CLIENT:-mdev6} # This should always be a list, not a regexp FAIL_CLIENTS=${FAIL_CLIENTS:-mdev8} #FAIL_CLIENTS=${FAIL_CLIENTS:-""} +SINGLEMDS=${SINGLEMDS:-"mds"} NETTYPE=${NETTYPE:-tcp} diff --git a/lustre/tests/cfg/lmv.sh b/lustre/tests/cfg/lmv.sh new file mode 100644 index 0000000..8f96af3 --- /dev/null +++ b/lustre/tests/cfg/lmv.sh @@ -0,0 +1,88 @@ +FSNAME=lustre + +# facet hosts +mds_HOST=${mds_HOST:-`hostname`} +mdsfailover_HOST=${mdsfailover_HOST} +mgs_HOST=${mgs_HOST:-$mds_HOST} +ost_HOST=${ost_HOST:-`hostname`} +ostfailover_HOST=${ostfailover_HOST} + +mds1_HOST=${mds1_HOST:-$mds_HOST} +mds2_HOST=$mds1_HOST +mds3_HOST=$mds1_HOST +mds4_HOST=$mds1_HOST +ost2_HOST=${ost2_HOST:-$ost_HOST} +gks_HOST=${gks_HOST:-$mds_HOST} + +NETTYPE=${NETTYPE:-tcp} +MGSNID=${MGSNID:-`h2$NETTYPE $mgs_HOST`} +FSTYPE=${FSTYPE:-ldiskfs} +STRIPE_BYTES=${STRIPE_BYTES:-1048576} +STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-$((OSTCOUNT -1))} +TIMEOUT=${TIMEOUT:-20} +PTLDEBUG=${PTLDEBUG:-0x33f0404} +SUBSYSTEM=${SUBSYSTEM:-0xffb7e3ff} + +TMP=${TMP:-/tmp} + +MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt1} +MDSCOUNT=${MDSCOUNT:-3} +test $MDSCOUNT -gt 4 && MDSCOUNT=4 +MDSDEVBASE=${MDSDEVBASE:-$TMP/${FSNAME}-mdt} +MDSSIZE=${MDSSIZE:-100000} + +OSTCOUNT=${OSTCOUNT:-2} +OSTDEVBASE=${OSTDEVBASE:-$TMP/${FSNAME}-ost} +OSTSIZE=${OSTSIZE:-200000} + +#client +MOUNT=${MOUNT:-/mnt/${FSNAME}} +MOUNT1=${MOUNT1:-$MOUNT} +MOUNT2=${MOUNT2:-${MOUNT}2} +MOUNTOPT=${MOUNTOPT:-"user_xattr,"} +[ "x$RMTCLIENT" != "x" ] && + MOUNTOPT=$MOUNTOPT",remote_client" +DIR=${DIR:-$MOUNT} +DIR1=${DIR:-$MOUNT1} +DIR2=${DIR2:-$MOUNT2} + +PDSH=${PDSH:-no_dsh} +FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD +POWER_DOWN=${POWER_DOWN:-"powerman --off"} +POWER_UP=${POWER_UP:-"powerman --on"} + +MKFSOPT="" +MOUNTOPT="" +[ "x$MDSJOURNALSIZE" != "x" ] && + MKFSOPT=$MKFSOPT" -J size=$MDSJOURNALSIZE" +[ "x$MDSISIZE" != "x" ] && + MKFSOPT=$MKFSOPT" -i $MDSISIZE" +[ "x$MKFSOPT" != "x" ] && + MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$MDSCAPA" != "x" ] && + MKFSOPT="--param mdt.capa=$MDSCAPA" +[ "x$mdsfailover_HOST" != "x" ] && + MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" +[ "x$STRIPE_BYTES" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param lov.stripesize=$STRIPE_BYTES" +[ "x$STRIPES_PER_OBJ" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param lov.stripecount=$STRIPES_PER_OBJ" +MDS_MKFS_OPTS="--mgs --mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $MDSOPT" +MDSn_MKFS_OPTS="--mgsnode=$MGSNID --mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $MDSOPT" + +MKFSOPT="" +MOUNTOPT="" +[ "x$OSTJOURNALSIZE" != "x" ] && + MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" +[ "x$MKFSOPT" != "x" ] && + MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$OSSCAPA" != "x" ] && + MKFSOPT="--param ost.capa=$OSSCAPA" +[ "x$ostfailover_HOST" != "x" ] && + MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" +OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" + +MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop"} +OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"} + +SINGLEMDS=${SINGLEMDS:-"mds1"} diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index deeb91f..3dfea6b 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -1,22 +1,27 @@ -FSNAME=${FSNAME:-lustre} +FSNAME=lustre # facet hosts mds_HOST=${mds_HOST:-`hostname`} mdsfailover_HOST=${mdsfailover_HOST} +mds1_HOST=${mds1_HOST:-$mds_HOST} +mds1failover_HOST=${mds1failover_HOST:-$mdsfailover_HOST} mgs_HOST=${mgs_HOST:-$mds_HOST} ost_HOST=${ost_HOST:-`hostname`} ostfailover_HOST=${ostfailover_HOST} TMP=${TMP:-/tmp} -MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt} -MDSSIZE=${MDSSIZE:-400000} +DAEMONSIZE=${DAEMONSIZE:-500} +MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt1} +MDSCOUNT=${MDSCOUNT:-1} +MDSDEVBASE=${MDSDEVBASE:-$TMP/${FSNAME}-mdt} +MDSSIZE=${MDSSIZE:-100000} MDSOPT=${MDSOPT:-"--mountfsoptions=acl"} OSTCOUNT=${OSTCOUNT:-2} OSTDEVBASE=${OSTDEVBASE:-$TMP/${FSNAME}-ost} -OSTSIZE=${OSTSIZE:-300000} -OSTOPT=${OSTOPT:-""} +OSTSIZE=${OSTSIZE:-200000} +OSTOPT="" # Can specify individual ost devs with # OSTDEV1="/dev/sda" # on specific hosts with @@ -27,8 +32,9 @@ MGSNID=${MGSNID:-`h2$NETTYPE $mgs_HOST`} FSTYPE=${FSTYPE:-ldiskfs} STRIPE_BYTES=${STRIPE_BYTES:-1048576} STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0} +SINGLEMDS=${SINGLEMDS:-"mds1"} TIMEOUT=${TIMEOUT:-20} -PTLDEBUG=${PTLDEBUG:-0x33f1504} +PTLDEBUG=${PTLDEBUG:-0x33f0404} SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff} MKFSOPT="" @@ -39,6 +45,8 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -i $MDSISIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$MDSCAPA" != "x" ] && + MKFSOPT="--param mdt.capa=$MDSCAPA" [ "x$mdsfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" [ "x$STRIPE_BYTES" != "x" ] && @@ -53,11 +61,13 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$OSSCAPA" != "x" ] && + MKFSOPT="--param ost.capa=$OSSCAPA" [ "x$ostfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" -MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop"} +MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop,user_xattr,acl"} OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"} #client @@ -65,6 +75,8 @@ MOUNT=${MOUNT:-/mnt/${FSNAME}} MOUNT1=${MOUNT1:-$MOUNT} MOUNT2=${MOUNT2:-${MOUNT}2} MOUNTOPT=${MOUNTOPT:-"user_xattr,acl"} +[ "x$RMTCLIENT" != "x" ] && + MOUNTOPT=$MOUNTOPT",remote_client" DIR=${DIR:-$MOUNT} DIR1=${DIR:-$MOUNT1} DIR2=${DIR2:-$MOUNT2} diff --git a/lustre/tests/cfg/lov.sh b/lustre/tests/cfg/lov.sh index 56ca580..520a7a9 100644 --- a/lustre/tests/cfg/lov.sh +++ b/lustre/tests/cfg/lov.sh @@ -1,21 +1,27 @@ FSNAME=lustre +TMP=${TMP:-/tmp} + # facet hosts mds_HOST=${mds_HOST:-`hostname`} mdsfailover_HOST=${mdsfailover_HOST} +mds1_HOST=${mds1_HOST:-$mds_HOST} +mds1failover_HOST=${mds1failover_HOST:-$mdsfailover_HOST} mgs_HOST=${mgs_HOST:-$mds_HOST} ost_HOST=${ost_HOST:-`hostname`} ostfailover_HOST=${ostfailover_HOST} -TMP=${TMP:-/tmp} -MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt} +MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt1} +MDSCOUNT=${MDSCOUNT:-1} +MDSDEVBASE=${MDSDEVBASE:-$TMP/${FSNAME}-mdt} MDSSIZE=${MDSSIZE:-400000} -MDSOPT=${MDSOPT:-"--mountfsoptions=user_xattr,acl,"} +MDSOPT=${MDSOPT:-"--mountfsoptions=user_xattr,acl"} OSTCOUNT=${OSTCOUNT:-6} OSTDEVBASE=${OSTDEVBASE:-$TMP/${FSNAME}-ost} OSTSIZE=${OSTSIZE:-150000} +SINGLEMDS=${SINGLEMDS:-"mds1"} NETTYPE=${NETTYPE:-tcp} MGSNID=${MGSNID:-`h2$NETTYPE $mgs_HOST`} FSTYPE=${FSTYPE:-ldiskfs} @@ -33,6 +39,8 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -i $MDSISIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$MDSCAPA" != "x" ] && + MKFSOPT="--param mdt.capa=$MDSCAPA" [ "x$mdsfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" [ "x$STRIPE_BYTES" != "x" ] && @@ -47,18 +55,22 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$OSSCAPA" != "x" ] && + MKFSOPT="--param ost.capa=$OSSCAPA" [ "x$ostfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" -MDS_MOUNT_OPTS="-o loop" -OST_MOUNT_OPTS="-o loop" +MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop"} +OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"} #client MOUNT=${MOUNT:-/mnt/${FSNAME}} MOUNT1=${MOUNT1:-$MOUNT} MOUNT2=${MOUNT2:-${MOUNT}2} MOUNTOPT=${MOUNTOPT:-"user_xattr,"} +[ "x$RMTCLIENT" != "x" ] && + MOUNTOPT=$MOUNTOPT",remote_client" DIR=${DIR:-$MOUNT} DIR1=${DIR:-$MOUNT1} DIR2=${DIR2:-$MOUNT2} diff --git a/lustre/tests/cfg/mdev.sh b/lustre/tests/cfg/mdev.sh new file mode 100644 index 0000000..251e4ff --- /dev/null +++ b/lustre/tests/cfg/mdev.sh @@ -0,0 +1,32 @@ + +mds_HOST=${mds_HOST:-mdev4} +mdsfailover_HOST=${mdsfailover_HOST:-mdev5} +ost_HOST=${ost_HOST:-mdev2} +ost2_HOST=${ost2_HOST:-mdev3} +client_HOST=${client_HOST:-client} +NETTYPE=${NETTYPE:-tcp} +SINGLEMDS=${SINGLEMDS:-"mds"} + +MOUNT=${MOUNT:-"/mnt/lustre"} +MOUNT1=${MOUNT1:-$MOUNT} +MOUNT2=${MOUNT2:-"/mnt/lustre2"} +DIR=${DIR:-$MOUNT} +DIR2=${DIR2:-$MOUNT1} +PTLDEBUG=${PTLDEBUG:-0x3f0400} +SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff} +PDSH=${PDSH:-pdsh -S -w} + +MDSDEV=${MDSDEV:-/dev/sda1} +MDSSIZE=${MDSSIZE:-100000} +OSTDEV=${OSTDEV:-/tmp/ost1-`hostname`} +OSTSIZE=${OSTSIZE:-200000} +FSTYPE=${FSTYPE:-ext3} +TIMEOUT=${TIMEOUT:-10} +#UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh} + +STRIPE_BYTES=${STRIPE_BYTES:-1048576} +STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0} + +FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD +POWER_DOWN=${POWER_DOWN:-"powerman --off"} +POWER_UP=${POWER_UP:-"powerman --on"} diff --git a/lustre/tests/checkstack.pl b/lustre/tests/checkstack.pl new file mode 100644 index 0000000..c59c970 --- /dev/null +++ b/lustre/tests/checkstack.pl @@ -0,0 +1,83 @@ +#!/usr/bin/perl +# Check the stack usage of functions +# +# Copyright Joern Engel <joern@wh.fh-wedel.de> +# Inspired by Linus Torvalds +# Original idea maybe from Keith Owens +# s390 port and big speedup by Arnd Bergmann <arnd@bergmann-dalldorf.de> +# Modified to have simpler output format by Dan Kegel +# +# Usage: +# objdump -d vmlinux | stackcheck.pl [arch] +# +# find <moduledir> -name "*.o" | while read M; do +# objdump -d $M | perl ~/checkstack.pl <arch> | \ +# sed "s/^/`basename $M`: /" ; done | \ +# awk '/esp/ { print $5, $2, $4 }' | sort -nr + +# TODO : Port to all architectures (one regex per arch) + +# check for arch +# +# $re is used for three matches: +# $& (whole re) matches the complete objdump line with the stack growth +# $1 (first bracket) matches the code that will be displayed in the output +# $2 (second bracket) matches the size of the stack growth +# +# use anything else and feel the pain ;) +{ + my $arch = shift; + $x = "[0-9a-f]{1,5}"; # hex number + $d = "[0-9]{1,5}"; # decimal number + if ($arch eq "") { + $arch = `uname -m`; + } + if ($arch =~ /^i[3456]86$/) { + #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp + $re = qr/^.*(sub \$(0x$x),\%esp)$/o; + $todec = sub { return hex($_[0]); }; + } elsif ($arch =~ /^ia64$/) { + #e0000000044011fc: 01 0f fc 8c adds r12=-384,r12 + $re = qr/.*(adds.*r12=-($d),r12)/o; + $todec = sub { return $_[0]; }; + } elsif ($arch =~ /^mips64$/) { + #8800402c: 67bdfff0 daddiu sp,sp,-16 + $re = qr/.*(daddiu.*sp,sp,-($d))/o; + $todec = sub { return $_[0]; }; + } elsif ($arch =~ /^mips$/) { + #88003254: 27bdffe0 addiu sp,sp,-32 + $re = qr/.*(addiu.*sp,sp,-($d))/o; + $todec = sub { return $_[0]; }; + } elsif ($arch =~ /^ppc$/) { + #c00029f4: 94 21 ff 30 stwu r1,-208(r1) + $re = qr/.*(stwu.*r1,-($x)\(r1\))/o; + $todec = sub { return hex($_[0]); }; + } elsif ($arch =~ /^s390x?$/) { + # 11160: a7 fb ff 60 aghi %r15,-160 + $re = qr/.*(ag?hi.*\%r15,-($d))/o; + $todec = sub { return $_[0]; }; + } else { + print "Usage: objdump -d vmlinux | checkstack.pl [arch]\n"; + print "where arch is i386, ia64, mips, mips64, ppc, or s390\n"; + print "Each output line gives a function's stack usage, name\n"; + print "Lines are output in order of decreasing stack usage\n"; + die("wrong or unknown architecture\n"); + } +} + +$funcre = qr/^[0-9a-f]* \<(.*)\>:$/; +while ($line = <STDIN>) { + if ($line =~ m/$funcre/) { + ($func = $line) =~ s/$funcre/\1/; + chomp($func); + } + if ($line =~ m/$re/) { + push(@stack, &$todec($2)." ".$func); + # don't expect more than one stack allocation per function + $func .= " ** bug **"; + } +} + +foreach (sort { $b - $a } (@stack)) { + print $_."\n"; +} diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 757d9cc..2c628cf 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -12,23 +12,25 @@ set -e ONLY=${ONLY:-"$*"} # These tests don't apply to mountconf -# xml xml xml xml xml xml dumb FIXME -MOUNTCONFSKIP="10 11 12 13 13b 14 15 18" +MOUNTCONFSKIP="9 10 11 12 13 13b 14 15 18" # bug number for skipped test: -ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP" +ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP 16 23" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH +PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} RLUSTRE=${RLUSTRE:-$LUSTRE} +MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} +MKFSLUSTRE=${MKFSLUSTRE:-/usr/sbin/mkfs.lustre} HOSTNAME=`hostname` . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} reformat() { formatall @@ -163,6 +165,7 @@ fi gen_config +init_krb5_env test_0() { setup @@ -840,6 +843,7 @@ test_24a() { sleep 10 [ -e $MOUNT2/$tfile ] && error "File bleed" && return 7 # 2 should work + sleep 5 cp /etc/passwd $MOUNT2/b || return 3 rm $MOUNT2/b || return 4 # 2 is actually mounted @@ -851,8 +855,8 @@ test_24a() { umount_client $MOUNT # the MDS must remain up until last MDT stop_mds - MDS=$(awk '($3 ~ "mdt" && $4 ~ "MDS") { print $4 }' $LPROC/devices) - [ -z "$MDS" ] && error "No MDS" && return 8 + MDS=$(awk '($3 ~ "mdt" && $4 ~ "MDT") { print $4 }' $LPROC/devices) + [ -z "$MDS" ] && error "No MDT" && return 8 umount $MOUNT2 stop fs2mds -f stop fs2ost -f @@ -883,7 +887,7 @@ test_26() { # we need modules before mount for sysctl, so make sure... [ -z "$(lsmod | grep lustre)" ] && modprobe lustre #define OBD_FAIL_MDS_FS_SETUP 0x135 - sysctl -w lustre.fail_loc=0x80000135 + do_facet mds "sysctl -w lustre.fail_loc=0x80000135" start_mds && echo MDS started && return 1 cat $LPROC/devices DEVS=$(cat $LPROC/devices | wc -l) @@ -936,8 +940,8 @@ run_test 27a "Reacquire MGS lock if OST started first" test_27b() { setup facet_failover mds - set_and_check "cat $LPROC/mds/$FSNAME-MDT0000/group_acquire_expire" "$FSNAME-MDT0000.mdt.group_acquire_expire" || return 3 - set_and_check "cat $LPROC/mdc/$FSNAME-MDT0000-mdc-*/max_rpcs_in_flight" "$FSNAME-MDT0000.mdc.max_rpcs_in_flight" || return 4 + set_and_check "cat $LPROC/mdt/$FSNAME-MDT0000/identity_acquire_expire" "$FSNAME-MDT0000.mdt.identity_acquire_expire" || return 3 + set_and_check "cat $LPROC/mdc/$FSNAME-MDT0000-mdc-*/max_rpcs_in_flight" "$FSNAME-MDT0000.mdc.max_rpcs_in_flight" || return 4 cleanup } run_test 27b "Reacquire MGS lock after failover" @@ -970,8 +974,8 @@ test_29() { sleep 10 local PARAM="$FSNAME-OST0001.osc.active" - local PROC_ACT="$LPROC/osc/$FSNAME-OST0001-osc-*/active" - local PROC_UUID="$LPROC/osc/$FSNAME-OST0001-osc-*/ost_server_uuid" + local PROC_ACT="$LPROC/osc/$FSNAME-OST0001-osc-[^M]*/active" + local PROC_UUID="$LPROC/osc/$FSNAME-OST0001-osc-[^M]*/ost_server_uuid" if [ ! -r $PROC_ACT ]; then echo "Can't read $PROC_ACT" ls $LPROC/osc/$FSNAME-* @@ -990,7 +994,7 @@ test_29() { fi # check MDT too - local MPROC="$LPROC/osc/$FSNAME-OST0001-osc/active" + local MPROC="$LPROC/osc/$FSNAME-OST0001-osc-[M]*/active" if [ -r $MPROC ]; then RESULT=$(cat $MPROC) if [ $RESULT -ne $DEAC ]; then @@ -1142,6 +1146,7 @@ run_test 32b "Upgrade from 1.4 with writeconf" umount_client $MOUNT cleanup_nocli +cleanup_krb5_env equals_msg "Done" echo "$0: completed" diff --git a/lustre/tests/directio.c b/lustre/tests/directio.c index 5cd7cd9..ebcedb2 100644 --- a/lustre/tests/directio.c +++ b/lustre/tests/directio.c @@ -1,7 +1,9 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <string.h> #include <unistd.h> @@ -104,7 +106,7 @@ int main(int argc, char **argv) rc = read(fd, rbuf, len); if (rc != len) { - printf("Read error: %s (rc = %d)\n",strerror(errno),rc); + printf("Read error: %s rc = %d\n",strerror(errno),rc); return 1; } diff --git a/lustre/tests/disk1_4.zip b/lustre/tests/disk1_4.zip deleted file mode 100644 index c5773e703135e94827f06922b91c478531328206..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 170785 zcmeF430PA}zUXnBmhN%k_PC3fwrxd4KxLCP-P+1d+69m;(WV6>LIi?=K!}d5jtd|n zgf+ARiL6ms!j{CP1tmZ<0fKB%F~kFe2nmF&=bhNy^XARHbLV~cz4yI4_nz~8aOzb3 zt4`Ig_B#JReCNIJ_bZHyj6OBmf$p|lF|hIHvu~ltua_<~Hd<+fk3M4^{X@L}k<W}4 z+63SDc16&QZ~YU0Z=_hDSTH&KSz%$q@%A@_2*Q&;*8KU+y`QYUTJY6Zr$0UadI9c> zFTeZ4!M#h@?EmB5Z|;2d)w#sI`>#2S{P`q*-O81g+gI+`us~LOG}`;+_MCx3zfU`7 zd}mm%cdp%e{@Zllq0KHliM*?`RW>=<DkBqSx+k6G0V_Y8Odyf{Nyws+stV;~fWJ_| zq&W=wOB%Kd3sQzI)+!zKk6(42QjX_hY%2~#r19#;?+z~F&|b2;EC(F!;WYNm{34<2 zl)EGqyjw7$8`y_SZ0{e&aAI?46RC%Z&fOgKpB+fk?$)`a*-vV%kh95wRt2w44>>b- zv~LKn827~TY#pW9w9;~{tf!b(M&c{#WVxDDCUMh_<wi!Q_qdjyZi@-XmV!}g8Pjty zgz>Mh)A)s_jcRMKE*~lut?{=Hkrmo)(*5)YqlX8kJCJYswism?ZPFp2giIg3XhS>d zd%q)hq;;Cg)?oriE5={M=g)RAZ6_XGNu!QDnObvndVVUjTQ$R+U5rB<Js8o&tBcKJ zJ|H!{=Zbf8d=P+m?QR@M9l38tu~w(>Uf+IZ#fm!iw9q7AwlO09%&U&N1IcO=_Rz4i zZM1gjurfurGN`69!ft#Yv4VIeY;wS#_q?vsx@KI?c^SqSVogv7*SS<zp2Ch{&PGO& zLh(P8P;FR|9~uJ9dhT+kUv-FdXV)F}XLfbu#a7SSHHA&I#5DBX7n~jTFut^PG_<eX zbyS2-->sVy)>~1+pM_DUEtM^WGOo%Td7JJA6b#SK><i{%#*~yTgx(Wjs5JtoWy9m? z;t-AGc2nPsZ@&kZFi7N(g~dPUmg!o<@ZZ$d#~j8atKB(0!x|&ZGNaRf>UOmTt7?Sz zN&LI&OG$$*qR4`XFwL{MhyrO80T)1s0VxyNQjSS0$FZ^NP=h~rZg|`@WbhJ_1X_g? zh+)dt4W}K!OZ!Y`Uu2TasiNWJr_ySPaV5X3JS95(y(Tiq7}Z{1gNcbhiFRBSOz3sH zdWO(W+93=Ps7J!26VZKWb|#v<Z;-&U$WM^Wb|{=T{4BrnNF#EKnI_4=#`I6t=<jC= zqs1oiU;|sJ5)~FR+bXVu&Le&6*Mkp?P7s^)X}FVp3EdQ_cr|WNknF!bd=JBw3|tGQ zFu3AJv0PPL%VnK*L~^NXs!tJ@T1o-C5gC}eEz0QLY=4s2+;sNsa7%?YG*F(p>)u;) zZl(HZhOC^aK2kDO!1s4(!1ZCcyyK$$z6Z9>!f;x`(C|2$m(lTfq&YH>bexKqMNH@2 z4-6Z*%a!piv``+>PJWopBn~q&6bYe>R=3@|e7#ztUkkT)jFKc1NO3~J%siMMq(pBZ zvT$AJyP6rJ32cgsov=KZu^?ySx?*Oiw&-(IC)l(PL}EgjQ;~t~3VJ<Aw&FTe2{mej zc7kMq&aM}T?p|&nUqTlTxJ_UsqajPH8QrTd&rPQ6l5Y%->ls;HjYBKd5g*KVz1RTG zG|T3s!SQ_?z~@iafMspuo^|iFI#kqHb+T5uu5G?p@7@NgF(QqcOkH$jIrnGLlB%r& zMq_ykp_@+~*-i4JMZcnA{BYf3Rq+Hnazxt_!ZWqf{(N@!#V9V%LDOTi$})6>@Zs&| z#i)sbozcO0ZVo~YK2_*C$f0zNS8$~)T~!5DGg<QpTN&R}(J~(Rj<}C0T=U69)EF*g zq7ruqB@<Kdz09dhiF3=WA_vHDWmQ`7KAX$6r4in{QRF8`)kI7n>w!<F(2SCh@Th+m z>jYXdQ>9ZD*Ha=pcIHTyaKMtjiWx0|(l?JwNRf<^!%r?h{-S4Yo|m!)Y0SM2-t}+e zXsISTJt3Vek&(fjsUXD^JQ(#s$<fVobcnT}?$R!O<T~(R<#n+2GynR1ZKS(>g$L@x z+h><`&LeJ(U!IOX(=d&w@0e$FADHcyN?#QTZS-QhO9xKPAr-RNbSNw!Qmd)xlY{k4 zP?)L}C9lZ<+ru=K<r4*wTK7IWFRG{}O6zcIG!494U*W3X$Iy*v)Ur#apv2|%yaNsx zk70H0u5~3%->kLoSCVZTW3I_!4hFnlrYx|Dspfe8^&knmVFEA#m;g)wCIAzF3BUwk z0x$uX089WT026=-zyx3dFaekVOaLYT6Zr2*;JSTR-HG0lek@5!t)t!QhG(kj-u|(- zo4GkCx1{HJ)=uS}76}f;35u8T%nns-Z$y_P3VCYojk{~fK(njtSxIqhr@b`E9g*_0 zz$7o(r<*c3(0YIV=M!Aip71w8P37ae_6c$d6CU6E#ClJ?&*0fJ1377jR~0oTs<xT$ za5?evv7mulC&38@_x#W|6eMnN|K4nYk<r6VVM$bxN(**J2lGJ4<D742V<vkcR7npH zNK{DZep>AU8XE%8Ng{IK_y)lFcLv`TmBn!n_xHM@bnQ!vqK}A6=7n#%CpIWc;yL6V zUs<a3wnJ<9<P-ykSq+J=c{;!Yx$*NO73-Yq6aHXGzzdzoe-LE8K6&y@E#lp5fzr`3 zd9kfi<eYzRlj-CLloun@y&4%P&THXlpbS0SQV|Y<U6rRMQ?rGJP{Z~5bGpIPUClN; znYy>@!EizDY-n{Sf|&MGVN9QsS#4B;EYkQLe)86wm^vqB4KfWMxv&3p{``dlYAd6i z%}b1o?g87#cdf|^Yw`=SEJirTz(FO3QqPR)O{x}1Gg{8feOJ9LZRwnA4x(PUf1*Ih zdc<CA(*AlrBSbk**zoE1oa3$6Mtw$+r?RJu9d1{geb#8wn=iO~cCvBg{95qBA>Gc@ zW!?7rl7O{f@AAr1lIxo9svV;;$i2ne%3pymIPH$m2f^T?X&D!I_e&U3FLS-#4vyC7 zdV6NOZ1kHZ1X3v{Aq~6~*E(&yroK%QOTLQ~D_PSu(+NrXr1y?p>amdg+1EcGK*^31 zmQ*FC5UeCAs^a&L2aN-61<7rqL&vI5W31G1O`X;PcVg0|&Or{H8J`8*;VGBI_=ORL z53f2Rs>Wlx#$G?j#-`0F5GhS9gp&jFrp+>y+TuZIVArXh%ZekjubcbpE2&x7Kul<_ zgj~{sZb}xqGVKzB1AF<MoXPG!b>sVI2V5hGyw}kTr@IGn2kYmQ{a7FWVf)H;DSD(n z6$Eh&eRHB3Q*8T<-Q6chLpFM;c&h+M4x5NmeSL*%!xpDf<RnM%ow=gNoK)L_PW6aV zakYB>yPD>ziO-riVs)i+4o+Hpc`TUgaD0=cD0)Jt4<p~4bympFzWk)Cui2L*1rWMJ z%y8209+tdl63-U*&N?-q)?HvMUe-RPd_@ZqT6nQuMQ8?w3wnzNK{4@!Mz&CfTNYD$ zE4udqiK%YuYj}eaak%|OXrXJPWk@&Y&5HxWy=>MM2|<aDO`cFas=#BnV+Pl#^+A1) z1hLf<l)?4+fpZtF8173)0@@cJ78lf@|G|fLOg&q{R+2;8e>a(_nJKM{%rBhtI?V5> zBW!n0M|o>X0j$Wl$+czXauYd3q|xHKc_HEcZJ-%0YFRot=-h5rDA?v;JK-eZ&dpq7 zN&(jP>|KRIDo3%unky8~ylxbyGz~vVO_jwzi6BG~9g%!4S(4Fi9h{|U81JZH&t#<$ zr>DaFQ)?=dIFOb`me^nSwq4m1LaI2Z^aUzR2OTCDV#z(6Z&Xlz(Znu+(^O1?NaB9t zIBvMMlD(6cq0X4^?60)K^@x>`2c{&cSkTtS>;|=vdXt(@%~|GST;|l}WlZ14pHwLD zCy57~c9>DqJ;>B_A97~8mpk1N$OD3aTw4dT0{w1R=PvK|vQa@iU#l2WwBq*?Q=N8M zdMp-rVh=hw>inHH7v-aG_<3IwEEZhj->kDOqIP?5`7`(kCyS!6R4U&Q*bI~bCjc?9 z637-4T#G1rC)W0&VssHYOTQ)T2D$)EK^KqQ7|9*EHBxBhgY(4s;=FMvnTO0r=C!(v zK))tP=5GdA013N4I8Se{3nP+)v)yjE<+|On@<sR|JQ2Q>vW&6}ItN9eqe@XURG9@I zS&E`t2>byX03qlFYJqq_4lKqdG;awjVCS-Lu?yKZ+4<}oHkF;p&SPh@i`ZHEZDAC4 zf!>j1O+xCt)j|ahP%C7>CB+5B69ocr2ik!)MV6u%peZl_9pDJS(gFcU3*qzl)j)$n zuDDjWMYnsHI^{v2VahOcQK^X5!k6)f0VQApfc&qwo2b)ze8S3|XG?csYmewl23*p7 zLrN_L0oaK;-*uD?Sq;7!zN3P(SWVr&fnBavx}bq=(!veJ)Em@XHQx#=wB6|Ilf{q0 zD(l>dhX!nX{gTVJVNVS>&(g8jVp~^fK}4yJug69>ec|fpy4lsv)zU0?nO8!7_A<ZA zrLO=0c%itCKkDRSmZ^6!%hP{pmYwbw&$kiO@^@iloa}Wch-dRD>E7*hNvR5G0+N6q z6<YlF!8caXviNuKaq3FEmy;dqi=v!W^wqT0Wt*{BC)c9PoH8-ci4SmcX6-4;Lf=H^ zqjS(ybS65l+gDP?@nd*N=z(-$DaUhlsgNct6Ve?@9cT_^4)n!>5PpbYkUuDJ<~s{o z`1^rj;H`p+_ax$onBdF+8poGR87U4ZGbuHpnUr}8E(_fG?t&9QI4}yx1Y%%0kOdqD zt}09b0nmZ>SNo{j@d@hF>QHrxdT(%1ji-=iO|veurZY+zG)5VNjw{8{aAi2Utd#5} zER)g5ej;xX3cRVe(}gs<>5`i_>b}tJ)VZq<s$-jX>AuoMHgD4fs<CRB0tt*MI6#h~ z79Z7YsdLeNS?$U3VbY!XML-}R1RMY+P@`zU6VxR2J~hYoM#rUNo9qq;Zf8)ltN(1M zZ~G(msBpn5EVs^oz$rSbt*pH?S`aRnuL~P+lioP%li^LyIqT(2Z>O7=n$y^I-UD{N z9?J!#bs+<;(oE{DIiD;68tavxKj){z1`n9~qBOL%SWX=|KW~`_$`9p<@<n+Y(=Hr( z_2=Aw#tPqVT5BN?(Ky~RR3+b-Z!EaXF9V(cn}8DFf#SYmTA>7H6jv0_73KK->dOiM zf1-IiX^Sqr*@<LNvLd;WHdgxyOQQs<1r$C-5XcV{2>C*R1K&Zw<TC{XKC*0rPY@vK zvN9Q+><Q-UEyIfSuERG{Z=~kxH|e(O_6+By-qhO+-_l#?Y;^>6F+N@`!h5TOn(c;j zQmLt#eWe)Mpui01R#Yk`6a|Vwe4sk4*;(hN+o;2=^AHOdrsa9fhX~`I;rxhlhiGaW z&61ykt*lEJ*plX*;W<car%hv9>PQ0{rMVj@`W@0+XZ<ozC=Zkm2}`Mq%D=IN=80w2 zB@b+J-6qZ3LTB-B+1g2q)PlM?R6cdi*M|T9aWpSN7j}DEds_Qidt0Lz9t<A|P0q*A z1Iy%sN<kD~RJxcSC79q(2u%5=f&zYl;38lNR0Gj~6fgsdfH2^WVhSHiL^XfKa$s3k zd;F7EcGl!yS9a^e2TAJ+8azU1)%-qefvuJ6mgs^F1#NUGe-3-OE}R&uuB|&f;OOe+ zy3uvlY*`q;2D>*ucT3qYe+rwSD81ZRj80m5oLlj-kjPU-#E<v%iTv6F!warac7(X8 z-ffO#zc1a*=owDwd+E*_uGnsUJG!q>t|*B>r1sc!c<;F&Pj^wTZ9grb5)_ZzU352^ z+Ly3<?AiZkI}^kJ->|%YSTgupvuQkmgS^8SC5m;Q8-}I9+@;Bg4n4X^25x1z=IA(R zn~4$bKAAEo{#fnQL}D?|o0>myq*`f|{fio3@o|^wOH(7ab#kkk5M^y;xLPovkt}8R zITb233go;}Q<do7=SMEtFaXBB*KJdR+KhS;qE@nV1Z4fJ0o|+58$0T=F7MKNL#krE zj(Tw|vab_b^vmq7Pa}01*TGp1Td5<9aUTDBDtnrxnE5Nx@!Rd9-MG2($Q{9rt+Llo zy;o8D9F7mv?Iy+4kO6JYBOQh-=0+s+iswdB1N?cUv%=SPy-lILVY3!-tlNW8*Z(wY zCudgA_Bxc)l{4PW;~~oTKo~0oh0D|T3tGl!99mtsqyc8lJaI}8rgB8+&)BhCp3r_G zENtglA6%ZW$t8*d#!JO{yRVBKpUA+wcJnGsm$6>UD2LP&Pe8ko?c_?CRs;S#Cjyx~ zm04x<IsvUK_a!oXfUTKCCJeX<B9{1ZVhSwR3FeY#+^+di@1H4NYo7jIXV^S#==VU7 z&j`L_jGQUXS^R;1{}oYEwH7e;H!`ySlXNg7c^?l0_TLYNM9;a?dQpx)wK|kcs*%?T ziIA63NsvZGR>{e#+`C=E`1-hK+z1WP(QHt}4t4wT@x&rXWM@Y<kGuo*_OcroPbvoc zk=G+tT#|m&P?F*atyWUsPUalH4(1ukGK$3Q^_Sj=MO4_)4@&!ESqp9S#%1c`+VzFe zTKDSR`bzVF-fk(cb|lGl6qCN|g-4hEvfj0Om%gNL{5l!qXD*V-Ie$?l)MYAHtOGM= z0eLWeVjZ|toWY{^%rCrtJyWke@B3s&F=*`ac3z8w%0_t*4(86CeZ9FQY>;!_M7q>a zJi~SR+sa6BadSl*g79YYc5r<D1=->ayN~<$e_U^wCYs-#8Ohtn!}d9d_*In$P&m~W z{Etf12F+D;lIG#nxM4eE`w6ylu;iLSd-clh>d?qi3jN@)ru&uo$cy1*AZ($<SjxQ$ z>)+bGL3g??Wz;76y=7nE4?#8gy?0H3^6|Kk)l+|~JRNn0RmVQ=r}<5Nn)Ns8(^87A z<!nb>%`e@@s#tY!D4<X1gak{P`ZOHn`Zf)Rk_bZW3etUjZnE<)eaP;`v<Bm*3GZr6 z$N<q$TN8lK<}u(z-)MM}R^&u6&$+@`d-Na;6ML8-D=c$molqD03_ft>6pj>f<wzlk z*Qz3zRIN#=`p2rTfngg={}21o?V)6!bpmI6F<13~9l&B#qJy@KO{8<1n~zQAj_hW{ zC-v#Im;YM(^|gd-hnSahXv@O2l(mo=>*zc=<hp`n7}V_3(?`K<%4f+?o7QXqaS`H1 zvY{>P98Xi|XD;eMDad<v4+KjFJ!HHX5WL*Lo&<T9j?95bCc{o2V#b-%gQHG04s{^d z3F*Ne1HqS3r5VV3*xZD)QE)+qZ8~&V^Wi-|Ny}00(IX7a?50|yp-TyTg^sr4=+5v* zxAa&|cj4}MaJjNLj{heTs*wHqoIY!{Rjwmn`8gD04K<wyisDLThJ%uOvG^1{7$D`% zVkk2Qzi^Wo(o8(M%D4EwXo0|wy6eOvy~Zk+FXG4+9FCx-%qX_m_ysreo@+XX2#*w> zEIy!iGaMK_+|=uVux6amBHO|6ANJ@)P!SQl?rCttfI3c#v@ea*a!##o2b1f@p;i;H ziG+b7ol7ej)qx>_z5V6>^?lq_LpobtUo^KYNO*j|eS@3k8#E`WSnvQmP!b=3@h@t5 zG-U?K+}nts1Ua)FBE!+svC;SaAi1E9je5LY&W-wg0TY!RLJwD-F77^EJQtY0{8LD% zr2FAR3-|wd90ZgHL#ILQmx;U>XixTfU$UX{O1^f~hrh}rzn?EIY9~*+{L9zi&be{S z)O<!s-|K0lFi9)>4-7e@L9$lrG(&B(*-nE{EBCa|X9QRBOtsHOPR;1jLDt+TIPxS> zi<}z3_=bsGMv)KPNazWC*eK`rdK@%#`Aeros(F_=W|Z%%GoxUk@p^E~GHiJEwL{@6 zDm(Fckm;lbxh|CXl)&6-aptn7Ypuhco2D_g3;I`7QM1Y873+$n?2p3_Z}BCcHHZB_ z&a;a~Jnw2n_+Jl|5_;bQ!(UQNTEdS#jfxmfP7+HOT681`*Y{UxrcM@Fqi1?T%{%Ji ziZapEZZN7iaUVfeMp#;vD}PkMZj2e`)uaO`O&me?Vg@pCxlKR%KgXU$4p*&){?%d9 zYa*8Pq;Skp^~-S*vFU#~PNEE2YY5U?;?U`esv`Y?Ftpqv#$uvUrxpzilDq1upw#iY zgzW7CBI*|nj3QO346w%auLdBMruc8?@EBLOoOW<#LC^ozEN&dI#4RB8`xfH+X=Y~u zX?*RqRaP?R#Oa$C>9fAM_SHVc1rr!2PNh{wnt0|!ZS#0sG7ec;A#rPoIx{dHS~Fgw zUfDGALTY#5kvd;B*ry&Z=n*8VHiyktCo2>+Bh_@K4|x&@OHiGu`Nz|^zIJZLXKmX| z;Tt7nRFI;J{HS}c9Uz0I!dO<hbh=mtoj&G3Ls1m>(Ymgg--X40fKH(kPE90~7BJTe zjuPKKI1yQF``}~mPy8XomeiJN?Bkr&cIy+3tmT*&8Dty#<R#5K7tlNRVBffz%WUFB zp9og(*eWz7y2jJf8N-}YtcZg?>cb;gsG}tQIL-bS=-#iWPVHDAW~~+T-Vgp|1e}wD z4@@66R;&wgORZHdVE-PHG+b*=IL(0w<=4Vbpoa~4&;!}WKh5&;f;tch`JDd#uMdo0 z6T4;$&jy;c|I60pnu_tErAfw67d>};q8|iz{blfg&W59<pa~SaV2cJc93s;g2ESj= ziy?pb`cFtei`2~V1V>hUVx(oyp#EjYXN#`K9y_z8^-@LS=Rvu)yL@lt9ywjUalx)_ zcQ=)kATGSjI96z;HEMhH@|&>P*;&_ZOY6S%@OUPp6c%bWFlB|w0`b_&|Agg)vjh`> z3BUwk0x$uX089WT026=-zy$vJ3B1O)3I6<tcu%X+(Y?p5nsPncMAx3YtelOI`|7?= zHthJjBfebk3W7Ttcp&ujP#kNy9@LY!o0%LKoE_uO{-CYjv%a^%-T^&@KG|ZBAaA`? zA^`_qsmM-iMV~(1n$8ug;#$9Jc3w?+8%G4ar%$aAHDLx$-ij)lQD2-hZ2A>KA_r4_ zBp^rAX;FB?IP1?&=3STWk*P}!2a=KYMPls|&Ky7i^#=Vm?=9m<!^y*c<Wd(z)1P{g z9%M&STQM<W3(u;E!;5M|2a{g)ryLv}9IR53CX3@^pudyy@!HdN`qNFG1E?>(a4skj z=>A4o{l0zvK4PuM_qKIplQT9g%zAlqa~RI*;*$wm;i`3Is#AL5xO(p>Iuu>?wEi?2 zt^DH3-ay^?<>RX#s4LrhqLb88`Q;~-FL8|R3j%*owOld^*+8Jt?)?R2-2$B)|k zI_t?3pB>+Eg(a<Sto73u5;lTiPk}ko_VvTc%Nd9xf=HPyVKlv{AKO4^6zL_i%B(fx z<D;cIo@NdA4gt~P-Z#uLNg8H)E+O<5@41~?DSBCT_SSx-@~4)ml$_BWl3D7-IVGod z$|ED;a+T=i=|bUT4S7EFKf)A_0~3G=zyx3dFaekVOaLYT6MzZ81YiO%0hj<x04DIC zj=*c{<TGD<%+)(^;L{5Df(gI`U;;1!m;g)wCIAzF3BUwk0x$uX089WT026=-zyx3d zFaekVOaLYT6MzZ81YiO%0hj<x;Gdnq_Q^-*Kk5S90HZeD{bw%?oPU@AOaLYT6MzZ8 z1YiO%0hj<x044wvfC<0^U;;1!m;g)wCIAzF3BUwk0x$uX089WT026=-zyvDp85<k^ z=BCltdPdq$Fb|jjOaLYT6MzZ81YiO%0hj<x044wvfC<0^U;;1!m;g)wCIAzF3BUwk z0x$uX089WT026=-zy$v32|V7g{?m_t7(m6b|I<HxY2X6D1YiO%0hj<x044wvfC<0^ zU;;1!m;g)wCIAzF3BUwk0x$uX089WT026=-zyx3dFaekVOaLa})pE9W;M*0S7$x%U z4^-|A{L_v7h>g4M{tz_Tw%`B7*DH_Teg5qqqBoVUL_9iyxb01G`;*CQoB1U*L#L+R z`PrBiY|-!Fm{?sN%4?Dg#Ya*WeAFv|f58M`0{`U+c-hl$51O9-u|FlL+3aoC^Zqnu z)zf~2*;1v5W^S&zaFMG&t(7?aIwqVGMQ?eo{;IX$da&E{77*m*XixUBMa_!L-sgAC z1+oQGKQ~=?si|Hg7X@*KJNP>b&MN2BPpSmtXT6fTw}hA7#3lH#vvYCGTjq&6>4fA} zm}y{)4Wm?;+)5IM)izU~l5!?0DdUyZDe6B?r6jO3un5#LlL4{IQzt~$Tg?@*I@zJA zUqt62%2Wd-)3MIONi$zc5tEVb?N4n=Vv!kgHTs>&eRO#^T6E2+Z}ezQ`siW}W3J>G zV&p<pwo?qRJlxW)mg*xa+Idy^Q$Ay5xO3Z=s6CetY!->Akw+!?-8F%IA!9BDu`!X( zbJJ&bjx?8MT)qG#xiu=!oZ;YIqwAfEVxn?S1f{a>Zc@x1D5D}Bg->U%GglQwJp<pQ zbOo8ES#;Zds;g?xzgqQFWmOTCWAb0V9l-^J3H-k!aE?5^_&e`~zh42p;!`6_bhquB zC4bmoz=VdSbL$p;YP8Y_AAQCe7mwfNf8;Zxh4#TWzFiV@<6Hm4-y10wC>Bf(FD?xI zgrt09Q`_=<%g<kZvHj}9b6=nPy8o*mTXtry@Gkq~;+&uAZv62JkKbSXWC8BVm#3~@ z{qm~t+vOi>?%qwr*qvHoU6(y_FZ$HtvbtmI>MYBmM?|mke9!Jn^t|*mETUK<SM81* z>zv9XKPYs}%6ql#567ySla3!u*`a$-yVcK;wKYAczp-_o>gd4T){%EMHARH^Yc)lI z+fF?#=Txl7uFAi~y($q2LA&fR<xqLj*u633dk+k?_VqPyWucNaL1#$=k>n>AnXYbv z>&g9_hcD!i=h;ypp(9_CGVz4hqTxQh-_GI$zj{zp7*1qsU*kRUGne_gVk7diBj``~ zh4@6Lx5o=Ak;`lr7(M*3EhEFc^veDuExGft&V7FF->JEWC+n`X$1pPWAwTxvugl(V zCI9GrKz-@qr+2R_mKr5~xfU3WejWSz{0<|dd-a*Ag}+;J`8-%+UKA^BJ)c^Td~?=h zD%hH~>gkohfC=SLNVO7k6c|T;cu31m#*d}sa3v=LqKE1pgeF8-Rh}5`>XMgH3$29| zitN3Mva5?sv#&um1%>-YzI1(0)TEi_#3E>g@p|9=B)K+^@OmbVYbvR96j4-XJFrbd zS>h7qWQBEeb?r#<-bhk!T{^LeO(oI@J;G|~_z058Y)uvYq`IU}I&!(w_`0h%MT{iP zMo%KPKh4aTPDa%Ay2Xd^>SZ~kzUIqqLSq@ooygX3OalWoO7iHnHa+gia2jbetWrsB z)Xa2G*|i6bi)XGbx0pTeD@{&_R1T!2X)cHGtIM?m9S`ShqClYM8~JKofn@`gd@7nN zEu=18U%1TEhJU-<yvwJSKZ!q@pS29-ep;e?DkVhFrbDK{ZZ;=2gsWn3YIczw@kvQZ zx><{gSyhmgETg8stWwi=MBlU{rUuhHxQF8=;W8{JTv^L&y`?g(x4TU|NLY-G)U_MW zMk5eCi^3Q#)ZCn99>&Y$_rFug-R9@%E-~B4dPZK0So?5#<kD!xEcKiAq(HNRi4|$# z4`-a5olqDR2#)MOxIyWQh|>nH?cZ-}%W2#_UvQc(m%ShU;4$Gu&<-xGv=eI&o61om zr}U4U|2!2uF?iJFssypeEp=WSwr{NXyC{<BYL|@?QRFl2@~CRu=;o3ny;e(!cN^e= zufxxqCA>LF1wRM%m)+ckn)=lCIoHQjOu3BAgpx`GPEyIvpuQMwNG0uK04KoU&KOe} z>vTx@{<L*bm-ISW802XkbwQ;puV<%Gn2)r)3drF-a7<4!yp(UBOXffczK@1P7*Gfo z3c=R*F=LZVJIP;H&qlZ2SvrQi2fe@M<OP+9A+p8bF+E@q@rOjt7(^aGBBhWBx_6PB zcNY=~gkE&HFIdzT1tmu{WH{Qi6Ip90MZ+U)?Wt;9co5{#Vu&MLBu5ys&cIzzNqV3- zz0o_tH&FW1m+W)FKSPllX;AtPAlZ?ANY)>c#htpK%BX=}<N&>>-4JO8lI=IV$lJ6N zoP-=lUqZ5h23e?R8HS=U>mO-n4I&x{UeIV+TE?9Ea+9GT(ydg`JC96Hzq4k(MKq&C z;`Qw_-_H|V#<ji4&Q7Zp*1h?ZV#1?4eGYq`3G(UI`fgmrEqVxN5$KJJE{uuNznSTN zsa1j@xnT1yH?XB`UMtL)R*78OBxK&2V(=2=ncP0Fm6?vpIppg+@I2&U40#~djmjD8 zp&#f@PJ{w7p@4DdCm;Gr3s2D(KuICYpup2m;56iE4tW$qG03MN4*>FrG<ZM~B9}G^ zI0U^+4n?!RKCQy7g`{F3sWM240=<lxKCR;HhNQ+JsVGQ_0p*di3-TC)JVGE3HYCY} zBt?d(gzG%cStvRS@>D{e$Td(tp#p$sAkPxWGa8D4fRZ2^(gdI+A|a1K_i63FGhV_$ zFaekVOaLYT6MzZ81YiO%0hj<x044wvfC<0^U;;1!m;g)wCIAzF3H+BNu=A*$@AmDz zYYQlqy=xt759bgpw**i^Q?N3H+=@MUXTP|4=D}y#n-^P4T_<kFTk-Zk%~D$z90^L9 zdc2u+jpr0O-6RR!)1-1mJqHTwPI(c;KQ__3TvfLuRHsAn(Ek8>cr?JR8=;*Cg@@N= zfQGN&_BIKnI0MZOjq-T}bRQ^g2cg@Gxeld{P}<#--BvceiFgh2I`^iB(0R~dMcQTk z{?Us@x@*~rchO-WH-1%0EI^gB3A#F_Zd`(E{LnC#U`YF5m1>@ovIN6ka_sS@(CA~j zpWA>&UB0BnlwN<QX&IE{nv3?nYk?5Rbv>rwoc(uh=P_;o#x0553q-97a8KkJ{%?^> z>Xu)>3J->XuWM#!gv$yOPt>RKSiMB>Vs_2cCnl$Zo7^M2Bb!p>k1Em%0&*(5J5SVC zwKOHMd$QzW^~{V}k%Zl}A7r?0WjGi<3%i2_ri&&WVve?va^t#H4l8H7<dGxixeivi zLTk{MW-XT<u^qv`{J-*Hn6ov4Ia%B1YRh&@1TkpF5IEe}Jiq}h$g889$b)AH;i5$0 zILmyB-CCR6HJwdtYJK~&7wOnx4SM($>umU_{!I=(YCET4_dQ+b0NE+wg?2v}ajjZX z96T83Opjb>Oy9@9jz2l>X(OnzbvDZxD}5sPd@zX|t0~IEYI8K4S{ZJrFHS$fi0Z^F z3qzyPWZl(Mq|_X)ro`ebFwVBiCR<%Am<XJ;?`H5+GZP)+8I4sVnM(?<uZ>mB4euQY z0;j(r4YnKCe<QTWFsGF*<ez_%vg3%p)x=G|EhD4@`ffHB<A^-d%6!v!iWT`HcErDZ z%xVAVntQF?>D>Zm71@86>UcXj`*yqg1x})I5!qdK$vs&3O3nNv(Wj$5E9<IPLfe@5 zR6^VJw($0_Ygc+Vuf_~G>sqf_x12oKRI@eJ?Zn;_&mVgy^P1lka#yFlw={+tM}yFV z1O8+=X~!pDd$A5NvcTvxCDUzW(WR$PDRbQ}6Bn&ru<jTIr>lLbc=4yF_TN|BZ$0`4 zL(@Ck#RDtt7edZ~@8r{D6+YGd<^0xj(c6R+&~4}aSf?1ym};sGTix<&Q*9-f85zxk zrU1cm;gj1bZ`mn)PRfkFLl9`K*a9VEbQ0;FZgI7Q{}RZ>pC+DlvN9{~FbSX*P_z6z z<a}A_azVU6M$6*M$|Pl30vTOGKg`!E{-iJldho%-<LW1PV(^V_4{I-LzepO=i(J_4 z6-mDcp!km1gH8@bly1~D!6(3G;BnnoEZeYKR({sr)+kD;_hDS~jxZ|vVCto~=G|f0 z=sa{bx(J<xF36#s01|*%U|ewt?@J6P9wx>IQ%7zFlrm||GA2Dr;LLXxBmiN6S}~=d z;`b48L=2Iv{#t!Z{atWgK$!_WqqG<6+q`S|mRr7CjvLi2(=E>}+pWkg%dH^Buh_d7 zh4EmPTGOn{tmz?wKz^V=$QKG6_znW*A&mO0+D{!5T)?KVi&Jm(l_6+|vO#`^0L4cM zczh{<5)_vqQAiY_l;c6p({I-$s0;9s>iz0L1p$x=)Cv=T0w4s)vKIa@P%Ll-Fnkt( z5pc>hd>s&o-KSR8?Q*@5T9lfl-=vFFGZe?vGR1f5Vg&~P`LrXc_Jqy;huz10!my$1 zpo%arzOjW?#4lBp+B*7r9u{<CDT;S>QA97|DPr(|rK_v!CMlJgH|LQ|V;!=T7R`C` zyfic&UyluT`ob)CnU_HD2)JRp$t<79@3gh(^1EE-2z<bQ>$KY}JKe9nG+a<Chy~`4 zpyCDb{Lipxb<vEx<Ns}qXV!QG(kl5Df=Yg+Ac`L);MeV7S+U&0ibihKc;b9;UN}Fl zAcTKa(Whuo%;BSn?!?GuE0#rA@kr4~R*Z+tSLQ84Av_R12rmmkh2o7Ov(Bm7(}7mN zcNett+Xd!)a{--C7ew+S1zkWqAO{u$If|!>GKCl)rS?*vQU|Mv>I3Qsb-=i{$WP=c z@{JNG`AUHazgh7_QGyRq$ErQmSandds}8HKRY(8{kgiw_SPL*^96xf_ln+$-1$rwT zDY#g_QMX$cuBPJyo1Jy>&E`7qW(QrAnu*6WBX!%OJwU2G&6v*+;P~WH&<D)k-~$$I z@B*{+4m!-htqww?{~^1CU#F~cXJ3!y0zd2_;-UOPT4^~}Qs+&K8?bZT>}uy~Db1l$ zshMX{JfGxJR{?rBjnMahqr&mu6JylF_(N)zq7@&f)+w}jul!>44gD8I`F`HkKIB|K z)MEZxU_bB_=v24^8G;~SvTmo&)5+Y@OG1<LRoK|(FIZoN<>r)9XxI2pffIljxQWM( z`%=od0t^01pabtuJVEp)CJ=pz(ZTsSWx_Hcy;5Mxw*%4uU!V({MDz~M9LXQa8KKtr z$~<H~!ZJeHgrG%`%wGzy01}`B76KQ5eb^Jh*=~hyHv{N)KpXzB`lR}(I#K<t`Umw9 zHD2wbKHcm{vLd;WHV)rp=dy3H3uAmSo)}+@HwGp05c!C_NWdWA476aQ)o699`kP=% zYHsSS7;mH>(i7>6^k&kLzT_Oei_T>@Q~#yzgt}581aSN!U^Rdfh{`yq2?4ScLnHfu zx2C)Zbh4kal<cXbk$u6ODQ_hmQ??Wk3EC82s}t0|>I_9Oz~Ns2ID&lunGe!H?}e!J z|7dNO=gj$ON+Ziif>bPMYiV|4nU8T90Q_63Z(o|_rF*&4!}<4tS3n~^rGnlBI08S~ zTI>9rcDXv}Vw|?=&_uM8qb`-0>a@8FW$i(};pa;)TTMgy(CME5*A;gZZ}Ac85=A>u zg+CjdtN*GfYgJjiAPLac?FqYqzG>y-&G#1U2i__kC}!}U&9<{NE?*3+1g_#wG;e3^ zW^HFVv9_@6Eq%HC3I2qj7g!49E7}w^#aNvSYjd>+&KKv6LkVewQUYy)-@=~-h5=XL zl43-`R_GPy6^|6R>vmUrA^Z@Ym4bYHc=I-rEoleIh4dw9H)%V`iL`}eKV0ZWaVuuu zi1EbuV7xGXOd6w<L1UCL=(tiG4OhliNEJnj;W~SgH3_NvqS_njgY;sSA?U@W#kAtG zK|wJM=}o4lQl~t~nfl#2fAxgI0q9m-Q!oJ;Uqqh;5PWL^l1BC-l(h(u^khD#G+Dr* zk-e2=8UcfUNg>0A6Ti?o>9*+H)jS0Tc%%>kw-rc$EC9=V%TvB@y;)JdDS=jFYn6r? z#_r9}J?oXl&*Hzc-R$e@Ob_E{32K0CKskWLG8GEN^}1wY#K0zL-keu`9bv%S)k0b@ z=dGb*fphp{PFsCZ#>{yut!HGYGw!E18sSBErv6db#M7QhR83W%tce*c<J{x-vR<F} zwNSqARPCT}QjYbu`C4%OBgY;D_YDuZV8)-T;z&uV^zL}kkT&wvs5+~_t`DyO<1Q@t z8+Aiyh5GBl5|i~3QaIecj)T8#&UKf)^2gvqB3a5GyVVs~v)MS3>#UFLE+b^Hmqm`j zCF$7T$d~HmcbDavtVzzX;*!<lgZ;=OS2rO>m2M|1Nn;IeHh*VfiMT}3FqaY>=F$k& zBgt>4xXvxY^_X%B*TS!=$JoAJ)>||j<zPRBi1B37W}++>4)UsW^uxwo7zM7OpQtad zw`9f>?a!QX`l`$0GL33$)8$nwIF9vmaxuGKq+HDu<>j^Enm}ibzAf5P!qGRkX~i$Z zV)3kyakozr+0=?`{+?1Z6>?Z5dC=ID>W>@Mstc_-HKA4<*`GAbA%_5GQwmOGSz0s; zsT(p$Dc_mmSu=^>8>ZSlo$Y(YJk9IT;+e)Wt%?&dl_Ou+o@|<4*_XV?OcIuGy5_KL z`U396<a-wqR~@)A3vm`wHzcOm&R-+EP!SX{iKPie)#SnB_i2UX@e-cMpzC)HQoq`n zlbvC1zA}6KdKb5M{QBCo_Vkuj%PYOD*mVST(rV1vmv`dAl3xATaq}W&j+*e?ks`VC z*0UVfQQGk~ezg@4meS{Vjpr(P{P}vr+Oz*sre3!&`jPCrcb`=}s5U>bz^K8V-#+;8 z-Wnk6Gf3dW{Dkw^UDK;=-XNs=+F)2CW@L<wf;0XP#f-PPA3hDVfEJqvcHU1K7~W$^ zcE%=sTy&192YP<pK|OSa;+z9HnW@6vJJ;tUVh6w<bYeOq_`Q4&6!H_vRVzP4cJ4zq zMzOTomye3U(A2Rgm5Py!56~dNdc%fjOv0h&aZh#eh-UK}#F3nwiKq*1tIsA&ni$&F z{_?a56P;Gjy|I|wEZRvPlb-u!KlG@W?0iPmCQg%!O|3e3_pFdv_d}<i&%8`N+q-(W zFG%KkyOw<_*qs%MLAeT?$0z#ECQyoTA!rWnMM?;HLygI~OZTI(Di!ZvDas=3?uX0; zO>9_)L9~L_jKC={CNxYc0@JM5X*lP3Rh+2Vk>v!;$Sc;^yca7J9s7Nm+r4rUV`>uJ zaz7S4<K0}Z_4#mQZGUQn6j|hSQh$eB`?ODvs*Y|d64ai*AlQgK;AA^nnuiU@FIwgu z&wm5_S=!2J)OB~-rgC+nS4xMNC`WKsXg6kk*~;3Q!Aad;?}p@xbuU-8;1fjo9Z1U4 zuGvbWsYEDAQ6-4uRHW5I95MU;z^ld-P0U7V6*&JBTRM7S;HQb~_T>Wp0^s3sw^iH8 zBgP2uR=cH`2inCm3lV!p2G4}nkJ!B5BdZ9HFS--E<S07QT>^ykaVm1P_eMD#FGfoq zVOlSAb4mzLHdUJflEJ=Paui9SYJrqc!>6}W$*<?<-^d9|^2mE3h0@RuKNgbTk6yT9 z&;q^r(gl3bR`3cTm5@K&f0*8UW(5z)XCO~5b^|la?{*|2Af5Nqmw(v}z0=BE%I_YD zT#+WpF<%Ka{=&SQ;$+&93fE07bc2=7rEf-eflm-1h0!!npL;YcZn}&m;I+&@xV9>L zqJz8TNY0MdI%So2cazKNm8fyf=|d-`n-)_PR&Eo8PbJj&<0{-Ait`VZQ+LOO8`8UQ zizhd6(c@ZI{WJ}o0wlk4J=_-PM@=5bMiwbTblH%|$Lry;@!$H0yE^c5!-hX2%g{$C z=Llt=Wt^#xBdKt|Ux@fdIJ~D?I^=@9pOm^+j%SXYfFvJoy@x&A?jicUk<qaurU1%v z;iY}5Z&OvJB-IZj8Gs1xw|ktt3KBdvKig%gCcgp=O7srjh5Ocn`}GUY8Z>1$|1w}~ z(s>)JbL>VsKYUPG>>CqR8&q(eu1cVee(dxbK!~Wj$U30LeI9&xt?1xdU=TW&Sa4l2 zZNGCz2j;bd0qr|{92i&cv?=oJ^O?UYtS#;brGp`__j2)V_3}mP=oo_TLQ5Z4iTJz6 z7=McUyR!{Ot~1HnFsBB)H!?sGZs^{1%}#Nl9JhVEK*DQXNWq1U?7^fy+>04Ao&Ru= zx&y(A3?#jbE`qw0(?lgLp@%y=+hfBjVaG%VD)po55PdHkPO@*4H|!IM7W^H}c5B;L zmYS*rlQf3O2uKlHD9H{rHFGksZFQPXbg9Ku)7BCobB$Gsp;;xJ(Zdz5G!GbGSTLX* z@1CkRHC380OQD&dKKr~hvQIB4zc6O4b_hA`3ib@-3_7}+?%88@Gop0gVN`xzMA<9g zo~^kVHNC%Eb8S%8=`+e40*grG(S{s3h8z~Ik9$8eGozjeJTf7#4IM#*-S-YuLTdBz zK|`UO3TA97shffcw4#Uzk8~kZrj<o$T&8RlzgwMQGCt_$c6VtrI>}|1VbQmKEd2J2 zg!Ew=l{9onuf^C9xT^D`tflS#6RsE1<uy+B!for?&xsSx;;A#bV(~Y<B`h%h*=pHO za`$NsQg};_lj}J9Br-!IdRmgGcTWZ7FGj)742si_mdELDXP9g;sCs)JkZT83dfB7) z-#L#zKIlRobp+ewVDI-_Ie0jqqI_4ElJ?LV>WVh*GnYR8qi#bVxGT#oiP^dirPI7e zLeD1;ceR7!kb5131mDCTE3+rK<V|Z57ZVAib#sa_)s-&!@_c&zUpj^0Rwf&ofgawq z=NAVrT5@^jigw0RGxJ>S?FT-5+ztqb3<n>ZP9`J4;}U~X`0lGn@R;H0?`qM5W|o%C zi7R-JP1<#{O*-|3hI>K}&~k6o_EBf2hS{b`Bcpx4bO>r8SoCq0E%ckh)C5Gn>#PZ4 zM|RA2Jda1YN~^NQ(kAtbpx3WimDSyNcI0bLm$Zg_Mo3ht<;xlW_WFy5su^g+o@g5H z3=QBm7ZgamT^%ts*4CnzNmEg5J&FGW4GufLn-ytX{WxoG|E;A5*W~DXZpuOg`^7&v z&?ASAK3S(;)a9Vyg}<NBoWCClJkZh^>@?lJvOP1qQe_t!1|nl761KFQD6XHnJw5wK z1I`e|dRbd<uU;pbzjGh_?{J=Xm<=*q?h@;i;8N9FGrWFlDk#d2(?_$x`1jgjr~}-G z8VdMKMwYJI4gLtVug2G(If5_RjsIfMAx!~gzZi3DrE;bV$)hUp)P1oE+}6cSj%$vO zPm1v43KtK#l^rUwi*r~DwHCwG?<LyhKKz|6RjqtUhMr&0@-sj#^H?^nw(u{jsSm3Z z(6L>#>|?K3NY~h+ihEs#nMutUIUXDX>c)TtU4n%LbYnXd-2r7{wP=L;Vh`CvUZ-lf zIB>g5Kpg`*$AGFaU~&vN-X(DC5<DrOyW65NPE$AZy0|tc5q;!}{xNCdA-T4;fZlG0 z%D_?kOzESG#)<~DHc$^f_xa2D6-J0-(A?p2GgH^O`&i;F)H@1FO+oo_UMm8vhR8oE zA;TxgqH1u2NACZ{isda2*>8CIW#-A3h`Ko-T?h>z`ym4usaC`2h1^enaf?1$i1aOz z%ML?pTWiQdq}X8oL8*JB)fmQ{GRRaU6{?4qgOCWu5CYkF2q5#4QG-3mIfJpse$!6! zF~gd|U=MNx5($JXPy`0smkLPawLwG-tv^aJ<QJ=2Nq>SC6!YonGwzMyrMORxb~dMd zocpMUZgGw<4q1{jM{X%5wiy=Hn_9_+`G3RezwO-D{$A%sxZOK7O3M~k*u)1}PQ=7e zt!Lk*G_hM#+@U3^N}b_plB{!=ybvPY{EE|$b~}Q!JM1KV=WokX<7J8y?DHm=_y7Ll zREDgXKE9N6Va!x5*`=7ZeAr)$57G+)MQ!@qht{jsaFpMKNNp?z8@pp(MOG)DOd);H zb6U|&U`uR5JsAv4-Rb+^<FNi;cW#~RW9QZ<{dMPNfKz|nx!oYst)qsvxVB4<lW54s zh{cn47ekvaalC#Z4@6C2Vq8GXh7G=!e_00FG7`Pwhwo7DB#u9v>AaIT@NlMTJ$LC2 zp4C;l$qq!o3f`Kd)Wa)UAUApiZ~alXJ6q@09PR!_LS4vpU-=ZR<C?t+K!H|`_g!lH zljSp2-xqgrYp+E{qur|d3G$i3?>SxE%d0gwwA-EBh9uRqx97T`h{nry@W!_-8)$W@ z&1EMg1;l<B6Tf=8v4CP?5pfz#J-og-N%i4Qq<rS{r=5`es#<$%gn#FTqtxNUw-5o1 zC*zu2<gq%#ko1$zZUZ-2XPBSsYTM+>4InhDe(fC98X+1r3{NK`)09B6)?mdgt(tC8 zx#*!0;oRB2d6M=w(Ue8MesAlZ+xlxTGxE*!$0S==fB5*3?x*42-yE|B)5cX#|FVjG zzX8wMXXw`cf4%Kx{eL$$<3b~c?6Dk#O8#PMZriUVQ&v6xg$qHm>K6blgNzTqe0Chx zU;O36;}mV>uVKzV{T6oqx3Fc~e@)7P^;_7Zzl8nvnn1T6<u8d2-u^9+`0o*nG>o!W zorw-E{O0aHj`Uvth$^GfU>OGuiS35KLC9Q>Xt3xb+44A5TTeZ07-K+tvKYho>1@~} zoV))+#2eMeN+;C*_O33gUvo=1`dgJkliV~pylHx#Oy403_NlUJn&*!IlCYFl`>%B* zh)*ov?!5{<kleRf5=y;x)qECXU;bcz=%P1qb#X%X+jw5nnK*ee^`^iP+k0!O<Fj~F zLGsKto@+ws7_<j6Tu}4+<tr^+-=3j}{x}pPptWOm&+z<LA2uR1OCh1U^Vh7UMM-^& zF^T+EGi|e%yv}L<@r+OP)*O4inIX=a&5Kxikz5ZsU(g^Yd2jwabX);O|1{8w_KV5} zdB1$(J;h%u>^6PRY#RPY2D{|bN9$t+9dW&RO;tGw4h}pDF^(OIL9<KbLk<pL-aw=N zT|}B)(LVIQ+!r+-U#mRc;V2Ou>2cr{CmCCb8e3!8+RGGnXJG1<@PBBr3$5d9)@wel z<4VRqLF{a^49fu(z0CXMyDQw?+GL~`0h-PZ`<mK+^B^LdliM)FiEeDUk8Az+&mum6 z{Vlay9cyZry~gmiwtKG+8e9M8g^MT-NtuIF(Zq-yAsfyIJziD2#(KdYGs><Mb$M2O z?QwL{XyXEl*tdIP|CnQRT(t4i+H3b@Nz=<+-sN3h&-rk=;t1h8F5q~R5qYG#MkFiD z9}F9|5$P)3U1t*#0!a;_)NPdUWaoleZk2hP>vGlTx33b9>s_BdPnxn=Jap7*IIgKY zG{%a8Zc=bhd}s9}Zf!RAP}6np3Dpu1oucP-_(+Ph8l~F0FTQ#PIr~Eq=MQ1kj4AM9 z`|7@_9oB*5jFH(B*3MvFT)94MgAbFe_8{pNi)Y6*72IAPOLEN|w6~h95Ej2$k{4;- zVA4m!AyzC`RyLT|SW=*G$Li>jRTZfsb0)b$nlr8hNyaF&XlNvP>fz&H-1pA1VP=l< z3eoglGw)hge5)1&PbTfFA7;vKNjcqPhrhKX#YfIuF1+pFlKifzI%aV8EqSOx?K7)o z(z`jDPgLf`Uvyi~M7ivFqH<tnaBM-Y-*fUenV8Yr@w>rn`a90OXN>8#pl<LeD_(== zfLVjTv+gU1*aOO2@e~K`7kg<><*Qa#V%|KZng7nHvw_BBo{v7rYP+v$1D$25JYBMB z>M27XXb_EeG$PR>Yw*dYtW*L%acH6@!y%{HbabD0<rBU7_M;DH1o4J`ht9t;3SwR; zLLv9LqFu3CaIqj(M{nA>UWS0Yx`TW<Ao$@7wspXIagB9GUkhvGWPQtn`@1Hs%!tSC z?K--)+C(TMJaB`A>#oQ_M-cJCsv8U>xgP9OfhWx*pamoJjh5UEN({e*Y&qzgq8Dj2 zP&SYGK~4@aFQGErD1aHr_`LPYG;AO~_QX16>2!rHx~MIR-{_Dgq)j*5qKn&1`Hj|T z4m770LE^z+k?*K4w$(9>WjiO><1xB3t;E)<EVi&sY+F+rYnR3>jU}(c`N|vjgmEm> zjQNSl>xBHo{mo=G;fnX;2@5E<w(nJkLq`;NsN8vSmz~BHygf&pb(KW|!Uzl0vz8)j zAt9qd+|0ZQWjN4!){?#fG1c%Tr){)0MSH=fll;>&Ml62Wj0pWsIRAS6JB-ClRLaR5 zIWhn-@E+febUgY_nE3rAIl?(OqaZ`~yq#Qm_%=Hc5tF3PsB;0cEP3F|90|EXtU~t9 zXc4$1Jw1d9E{D9I=SaYwB@&T(K7&4oq^wZcjks`CybtpklvR*>x4}J4_J`U{b!{QI z1OeWGn$3qD666yhvS#Z{RBQCI8B7rLcFp>CoC+u`-SHVY0TR!I?!M6d+4^^)5GY&% zg*QOq3*&Eq%}~iqy1&s+T+^<T<zLV`K(9?bxok!T6{|HFDyE@0yd@Iw>fL2E!l1j$ z?h>Z2a2GFj$PR3qRr@7;&5~`Sfb7KEHtX#j4@2a>5C#jD8xWoyvItk}K{1g;cgh?Y z&YS}?@(Z^?0XHW2xg#K>XTrHRL@&G216tet8ng0yBFG75$d0sVWobQNM!~Q07U4vY z?9E~ymd<dj4}qMacRo-%Iv0vrDbRpAC>!-$us%}_W^ai!XiB_m-a%7>%s%J^R-hpv z#&1~*Y**;P1}H}ZVvuxD36`L<Y(9Zv#F73-d-onzW46ZuywSLvGiW-)$vrd8FcG1Y z60aGOsbr=k4cdq(Qgjz~a~yJ+%5_4w$0$uTiqUOnXPXmJsgQ&&Tj?UHrd>+4_g?4s zc9~=T@SH#9pW*p#&)V;PSL<DC{XT2G@4h_SUP~>TnHt|EIwIsg>*#M<MqQOU2%6gb z)sqh2XKG_2rtl%e9jI5ZZ=;nG&6chu3U*QC$A1pe89e?%c3zWzRd9ZcL(!wuF<keH zqjjknt|h6qIUWWwwwgXc*+uEgyjEdGO{b9FKnmk1iD)(y$>frIlpa%}nIWm5beGa2 zO0%;%Lo{_HFAhH3-TXnT?)A=(IvQwB?D3@mT2AwjA7Kv~VY9l4!Xz45M~@>C0Sz3@ zQ?*0#F(LV1DA&=SGd+$7`Qj}`w<P@bpwTUEx@?QQnx-*r#cso8N&NV`(3YQa+~7_( zZJOR|X+F>eOx{KlCxxol(+g*ZPAQda@6igz9Z2Wybu+UdtC2)-%X1@1N(`(S_x(zO z)SYc?OW6~PY-eUNmO)mZSA9qK>38FJvD1mc1Ua6@F6O{Oy02Lr%a7oOj7c<t0?jz4 zX*MpH22j7RmI@jU;lgO8&}h9DO@znKmh%(Avg9U>>sYOqY^u83fea>i-ruSh-|Bg4 z&*l1Fs(#No8-BQFt<j{3V^<P7cdx^BzS%GO^2<M@d^c*aL#tvGHEz(?>{Y&53u^Ym z7QV8d`Z~Yud+L1J;^!<-oj<5QRvSa=y!4-Rys7vzg4pI6PmL}(AKx7HfC>Gn8ttkJ zuLo;H<&)?{2+^AGTCg^{l<h;chiX+&Wdhf}Y8=fPT7Kz{On3B%EAdRdaMO+JecDNJ zwUvbj$tC<mDy39HNh*^I?@)S3>Ap-J<?5&8=t|^Bmz&9yENLz}Q?jSjIkc`hyG_BQ z>duUrUoSmJL)+BvIFXNNG<71#*NkSpw9h?@cd^?o(tBZQ@(+r)f0#b!i;s_E&1T~$ zEVZ${Cry1Ge;(WQFc!SVSa7pNjji7wBJ`zGWFvLOmXcRqD}Qm6IQuCJQ%smNl@h;A zGhrNEM>*Q(XrH5f3GGX0Uqbs5+85HkkoM~~&7rp?$vG{Ggk_;wUy8)ydJzqT%3PW# z#RXRn=hlo|Rn{o~Nuc%YzT-DR8j%sB`8lau+_JYObe{T^=G~RuLJcc@Mc6jF8k)?{ zYG(6_<b&yiqVtS3n(hhWube4#1r2mEVvg#j_d5+kyM8Fp?o^|*C-PLKJUdseD+pJ1 z2Ij~w9TPmwip`h&#vQ)WWVAhQNrT}8;}Zh+|4c1<^|(;s=5?S%w{v;+qgV25GpEiL z?t{*#t@M;ZVc5_>U83g(-@I0?oW^jopK=*x$G+hO!mx)|B{G$1ciP$wE>4Tzy;@_D z!bw?D*8Aj^!pC3g=C!d*Q`c>1MrM$MWUjGElUGUhSc~$Yma{R%>x7CsYkhquH)u|n z9%R?O@z<9%cXy_hjJ@D(XJD|uJZ7QVP=i5J&%Lbd+Udo0J5zNNrPo`SuFKBp^!QrA zsb?(BxSiZp+=IjBFlm#ggGBq#^TVwQI#aJ1#n%_|?>SZGHdmSjw+Vbo)+Akhs^8JD zb>?)-int8xMJ8Qr(<T1HR;ORD*D)W%c^dS1?Iz1Mrt$mZam+2`DgSLy{&r22)52)G zR!J`_gv*LN_<Zl1bg=HhX&t2+#amQAsPT0klQ7qK0$)B{OU(G!IJMHgCn`RZzgcam zdc{KZdm8M`!94^Z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf{5gSF zT>Fy+JyqOdDeB8@af1K^AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2teR%7kIc=wdcQjPh=&Fzkb_;LH{8D0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00I!OUDsUTUHy6CoZ&3N=*e~2#eWwMx_wnGQ}@Wgy{e<a2X4LmX~0)i zD)!$kDDXHa3RtZYdeo()WZ>YDXGi=pe?g&E@%>V1vVPKH4_TP;=0Eov1vnZ45P$## zAn+!EfQHKBg_2WiM7uiWA~#<_E7x>9N#e|^EM-XweqEFQcY8J5=#j(ZdzQha-PMmx zPLE5|4bJUM&XlgMwfC2#WGZ*xx2%1+f?q8~xh$|i+9B+a1SB%ITxG^3k$Zbp%+$KV znp9b4LQ!>lmWf5voZGF{1zF-HO~$wLs++RJ=2qity>@iXa69HdywyO|F{AldcVfO_ zGPmW7p28$gp)e^_RfuoxRfumdRETr46ygFcg*fM&LY(iY5a&G&^qZgm5fwa61#PHc z5f$9dCCMzgB)!JErA|~%9OYv_uJ(q0W72fHWleRDycA>K5cHE$|Eul%`a9RWd_4AZ ztoa9TrR-Tk?{YD3IEK#-(EMdpX~Uv3Iotg?Ki5dI(%@X`bVIgnogSX)Tc6wM;T77q z)a09L321nc+HQQ*X-8Q_NMio+obBCJI}8mE`^gHum4$bs?R66p`BkaD_w?W2Ei8{P zyxgvI%1{b)T(<ROdzn#oQG{V@yK;p&^H=iAr%Ek+oYGlZx!+}BHS;!SN&RK*sBB4u zVYivGIl7Hy)Sjop*H-#$N@r1x1z`#;CZ+82ec7?I%4prnx_sqP=4R6+T;;*9u(tN+ zqR~xE)2Ao<inc<MAmW!*oAF>CWk-4TWc$~535_oF<!AEw-aYx<+6u0}_-;Pi%R?`0 zlAU#z(7}ser)zDr9_7|iTR-eNa*rL}>CcbI)AFcFB^P#&wO0F3dp~DP;oG~XN<}s3 z(4+GCG0e`^j$c}9@y;1p3bx*y=^p3!_@47iUtL?GwlSR*#<a01qBHkcSbPFstJb{m zN+t6no8#R1HkR5~G-N|M6PVYtrLuMAtU10{zHRgVZdP+bDV?Ml)4+<8`^!?bg0Je? z8rgPpR@h(Gm&)!kG;DR2#hy?`*{$K%B^y<LaA*fhpztk};S^YZ7fx3yh0|hKU5<bP zvF|hB*?R+<{aRqg6xiVtO2H(dMLqK>wxutn1{`fK=k7C!Wj(W%bx`+~_ceB!8cU)Y zBX%9T$4dJeTdG&aBvyQ6&i{){KhK>>BB@aIWlx19az$?i&XLqFtr+H$!%;0O`ub%` zfvu;B=`16)!oEqFeA~Up%U)(m>b(XOFm4cl00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafytM+q39wWKz83`n2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uXq&1h7=byEUGO1_BU(00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ zur6wwchR69X9P6)9a@wdZ|ykb(@!rqhGfKVKAyZq)oQ`4QR{}pER&8_9eJ_w$1snr z_8pI-vNcw{wZDXbjza(f5P-m6P=Kwqw@q*_7Im%Ek#NKI$i-b=ipC5>Kc)Vw-D#pL z(yLqNZ#Y+~ll=6-hRfyK=R|DE5=SPlSFhM-Rc3v1cA)W^2)m6hjU&=-`przRuAQ{4 z`}|koHL@0+{TXL9tWRdv<o&E|^<~W`PCL(PuD`qJxvA|*d(o(<fenqjMx0CA{^EQY zH`CNw8b7(AIQ?<7sL8ta&V*&&a~_?ku9-5Zd*;Iv*=Id}oShfDacEg!!P3po&pq;1 z<hZx4x5<9B@R{dAn@u-dPRzF5k~twp`p`MhN>@Ms?zdNz8!otfqtj}eV%gzY6(H<Z z1RDQMQhWK6c}BsHoYS%r^B=sNRT>&w?=n;S%)TpU#}-{l9bNZzRJCA5es!jM>F}ti zK^lcGKaNZc&-|Cm%AmCR&rG5f4T<>$)_FH{Ry*w9#x-7v$*rGZ5nDLlXWr=84U*9I zUw3pFOgmgswR??@?3MAMmK#r>hL#VwvGvkpr~F~DTjb4>PyT|Vk1jy~0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_0D-@tz%@;*vw-i^KmY;|fB*y_009U<00Izz00bZa z0SG_<0uX=z1Rwwb2tWV=5P$##-Yo&Fv+!<>C!&D>1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00Izz00bZa0SK&98~#53g9QO{!Pqar0Rad=00Izz00bZa0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fWTWXuy5X+4|^6bkO^Jif9s<`01$ux1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SKIIe-gm|Fu;O4tVA(nWY5<Ea0dYh zKmY;|fB*y_009WR`vL)HRlc@(Z-9pC2#R%xijG5p{)opPU-zz}-37x`2dfPG-5~lq z{-LL<yRPd758LGfRaEu9J+jEwSLF{iOrshqhyL9(EbTq|<*_k?hWw$1i&QUPgPVuv a%$_O?ey?KSAinwm^lu++_n#R+_x}ZOjtn>e diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c index 6cf43ee..dc97fd9 100644 --- a/lustre/tests/fsx.c +++ b/lustre/tests/fsx.c @@ -438,21 +438,21 @@ assign_fd_policy(char *policy) } } -int +int get_fd(void) { struct test_file *tf = get_tf(); return tf->fd; } -static const char *basename(const char *path) +static const char *my_basename(const char *path) { char *c = strrchr(path, '/'); return c ? c++ : path; } -void +void open_test_files(char **argv, int argc) { struct test_file *tf; @@ -1334,14 +1334,14 @@ main(int argc, char **argv) open_test_files(argv, argc); - strncat(goodfile, dirpath ? basename(fname) : fname, 256); + strncat(goodfile, dirpath ? my_basename(fname) : fname, 256); strcat (goodfile, ".fsxgood"); fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666); if (fsxgoodfd < 0) { prterr(goodfile); exit(92); } - strncat(logfile, dirpath ? basename(fname) : fname, 256); + strncat(logfile, dirpath ? my_basename(fname) : fname, 256); strcat (logfile, ".fsxlog"); fsxlogf = fopen(logfile, "w"); if (fsxlogf == NULL) { diff --git a/lustre/tests/gensymmap.c b/lustre/tests/gensymmap.c new file mode 100644 index 0000000..c5cf1f2 --- /dev/null +++ b/lustre/tests/gensymmap.c @@ -0,0 +1,96 @@ +#include <stdio.h> +#include <errno.h> +#include <sys/utsname.h> +#include <string.h> + +struct file_addr { + char path[256]; + char modname[32]; + unsigned long base; +}; + +int print_symbol_address(struct file_addr * fa) +{ + char buffer[4096]; + char cmd[256]; + char func_name[256]; + unsigned long addr; + char mode[256]; + FILE *file; + + sprintf(cmd, "modprobe -l %s", fa->modname); + file = popen(cmd, "r"); + if (!file) { + printf("failed to execute %s:%s\n." + "Have you installed modules?\n", + cmd, strerror(errno)); + pclose(file); + return -1; + } + if (fgets(buffer, 4095, file) == NULL) { + printf("failed to get modprobe ouput for %s:%s\n", + fa->modname, strerror(errno)); + pclose(file); + return -1; + } + pclose(file); + + sprintf(cmd, "nm -n %s", buffer); + file = popen(cmd, "r"); + if (!file) { + printf("failed to execute %s:%s\n." + "Have you installed modules?\n", + cmd, strerror(errno)); + return -1; + } + + while (fgets(buffer, 4095, file)) { + if (fscanf(file, "%x %s %s\n", &addr, mode, func_name) != 3) + continue; + + /* only list symbol in text section. */ + if (strcasecmp(mode, "t") == 0) { + /* skip __init functoin. How to filter others? */ + if (strcmp(func_name, "init_module") != 0) + printf("%x %s %s\n", fa->base + addr, + mode, func_name); + } + } + pclose(file); + return 0; +} + + +int generate_symbol_file() +{ + static char* cmd = "lctl modules"; + char other[4096]; + FILE *file; + struct file_addr gfa; + + memset(&gfa, 0, sizeof(gfa)); + file = popen(cmd, "r"); + if (!file) { + printf("failed to execute %s: %s\n", cmd, strerror(errno)); + return -1; + } + + while ( fscanf(file, "%s %s %lx\n", other, gfa.path, &gfa.base) == 3) { + strncpy(gfa.modname, strrchr(gfa.path, '/') + 1, + strrchr(gfa.path, '.') - strrchr(gfa.path, '/') - 1); + + //fprintf(stderr, "%s %s %#x\n", gfa.path, gfa.modname, gfa.base); + + /* continue going without checking result */ + print_symbol_address(&gfa); + memset(&gfa, 0, sizeof(gfa)); + } + pclose(file); + return 0; +} + + +int main() +{ + return generate_symbol_file(); +} diff --git a/lustre/tests/iam_ut b/lustre/tests/iam_ut new file mode 100755 index 0000000000000000000000000000000000000000..3cf58045d22fbf37f89868a73edfad1942aa1d25 GIT binary patch literal 63499 zcmeFad3+Sb_BUMB)6>(Psbo$jGh`vakOcySJ?zLH_DyydM1)|X5D)_5f`9=fhA0Rs zZdcrwD~gJW3W~CcE8>QV+r5fmQBe`ajpuu+I)L|gKkxgz&-?!QhEI3bsXBG)Y_&|! znZv^3q5Y+lqVCTU0m0Du*V=9oo~yq$Ekz<BTu~tMMUHS##cf54fP+hSK&K$;Y+M0u z16;Hi?YPna^TlmUn=k#xeQ|Bb6oN1AFUYj(x`)w*YXUl)i;KtQb_<VJf&VJn^u;hP zZs#%h8VLM>7@MyIV7}_wMt)r}bt?K#z14R4ilL3Ky7n{C4)A~c)zNP~z3hV4)2Fta zURFMLeyce%TeUNN!J3wB!0@rUFZ0P4%aw|&KCURPIIa*bupq*?>fz$?6|NLqU|nS3 z((>Y&j?2T9h3h~6nErqM@px<l7gy4MHl7Bvyv+0Ie{82@T+DCe^aj6N;y<4NlPt@P zKFs_w+{%Za^x+qMxS<bE_u*N7|L#7_yzrRo{{w!{hYRpz`uF&7GavrLhadOhp+4N& zhY$MjVt>3<KHSo8KhK9d`EXw!ZsEgk`|t-oT<ya)6wLf|^x>|4`@=rW9>DFl`EVV7 z^L@CEze{|$PX12-FS*Hf4V)sL_1lO0{Nr)*FM$&KQr%yI3zG)Vn>4EA!m>FPCA0fZ zpFC$y$s93h(uFf-mQR{fF?n{yq)7tqJB8i0qnI_fVoK@c*`jRbl#1zMPQ}!jb1TH0 z@>#RX$}6U!+nkEo6*H$#0#q`)eDZWLqh!XES(k}fC9`MGoXx%Gh?%oM^};&bxu+pd znLK^^%qe1;PYtxnx$}i373jmH=9FNtim4zbrd>L_tfB-|0F=+1gnpAN%4U{}lKEx8 zDXF+%F4!3~0t`(pE1xuXPRUf@vK+opGbWdngVyZnCFNqk&_R9rPHNYx4XeZYfACN0 z*~;@ziahkKn;TSChADxS(O3S8M{L)@<Xp{9Oyo8E#KgUWpP0yN`H4w=A3rf^H{cmU z-;Mmlgx$nX_|F!8!qD6JiAnn^KQU?F;3p>hZhm3{zsFB36npuJMPeU6!O<7|j0o`+ zKf!D*KQW08^ApZ@grAtuNBIdy`IVoTsDJPie2uO8y7I7Fy8x`!e#N&4i0#@cPIPz_ z7$@@A<IXxfk<Sv8a`C_yC-9fgl9Y1Wf!!KriAq^r2exRKB`f76;lO$gvxKDyhSzGC zB`qyrc!h>p;!?KPfkhf-$xAykJWIo@fzn<KpRZxoMCm|=M{1ZgQaY01UK(c2lulr{ zjfPo6rROu8&@gMNlm$9K8fJ}^&SF?-m^D{ApW!2ap(39(Sh|SeFEq@WEM3O%ZVj_W zOII+w1u&i`|Jr_gUe%YCUmY1aW>l%dye{3e+IG(zS2}XJ?bfyj?V~GJzz(C&tmQ7} zEZ&-xD@66GKhUytPerhHEeaFP*|l|r|7UEr=nVBw*k*3ayInl138?jJo?!}=TkLze z?!=yxO>+<J2r|0dwYBP)>3{b&Y@JKDR5aemeNf2QY3FfW?cqy*|HV+Md5PPq4o^He zHnU**S55q*wkJGt@z#rSgs7?f;^2uBRd!x&6w+5u{DV8yY-qy$HGV<W#9yn6Rjtxp zI-yI|+&`*{k5m^^Rq@f9;v-f4^4#j;7#?<YvCEW7L22ohsvUDvs*Y5nH}FBU>R9bv ze_{wxRgtHFP1VI2+_$FkPv(HTS00AOfB+g}fO=?)&q&RtrYHI7?DKOaCfDMv4>tDs zsRcjNb`~E2SbVsqxRxpZovS^dsJWuZvvZG}&RK2ff1j^Su<oH(jFz>2GFYGAW5&b$ zV5X)rc1TM@8v|wTBCw%YQ<@u=x=u%HPiedASe<;&+hy(+p4gh`ZJOxz1VkE>r)ny5 zeZv2h-pKz(@3|neP)BdDrZ<=A4P|=m{;%kX|3<GZm|MKS<w-IzvU<zbL&<fnt)I}x z_;GEIjjlYbsu!r*k6l(u5aV_E>&gpjDtlwRw#V4GAt<rSUkiHHtQyEXj6GuXSv}T` zJY&~0S~|jrpZ$*J?s>IW^OV|B;Q?b&_j;lt*dHW!!|&k)qP8=zDz~X~&YLuGYaI=A zZtQn1Hl35<or}6VBC_NXR4{>e4_t^+%{)=3f32#>Gw@>Aw7zD)e<LB5y|U<Fn?=8Q z^OCqUdC_mdOOr5DBG0_SejRyc3!As5a){CA-`u$7w-YD8CKhN>+ZO!z)AOW1k019U zw`r9XyR{EuM_Rmf4-Q-`{d#o9WU=3=t=wX9#R;w`J_1cB$XT^B@{F})%iM5v@sT?5 znHeCqgIrn@M_%4<N1o{;xv~K&y(7y)11gJMOSf|GHxF%DacaJ6`z=0v2YAB5SDUW+ z>BIbhkLt==*4GP@A?VIxEI(50EZzWAQH{cS7G!7fqx`^dJBuIEQonv2tmui|b{G84 zHq-lQ)b1?iVbEobdECd3zpY<a@*F;W)?ANnH6vx!j>_LIja)kr`*=;|J}es5_6oW8 z#1|)uH=N$T|J=>h!`9a<cy;Nn$kJ!9SyzGJMyo7?r=<<E@PM_?gXYpLk)^YN5P7!v z)vDqxc)V6sys5f)!_qBRwXZ4O4g44?-@+5CNAVkYVp?y)0}QhH?V8H@;DgoN9P~D7 zCFt3{rB_w)YOe2t`l{l!r!{Rt(+X~C%1t|~i`RmWB`@Q2vGhb_$#ZyC&wX_1vB;8p zxwz^m8x^$lpm#9pBGmKX=m|0>_EZmh2%;`I?=<uk27L{qck|IPqfYFpTyUR^EFn~( z6o@SO8HXE`tjLllP05ZdS#3&AWXU<E6pSqS1){MYF{ZCU=AiZnyXKCi$Fz7A$t!9q znuwZ`qk2yIcr}%Cd4?1(XO?OzuhPIW-TTSYd(YkJcc?a~)x`#h=Tg;<+OvPbZ10>} z(PineijW@e(9OodUSh4ozpl))K~-(c2582*mdApk|3cqGuQU5KmG^NSSg-Sgt4`-3 zva~0(4MiC%+qDTy-krtU*kh`TH+CuB7O8m?<(kTMT8t$%l^b+n5iEp74zq<vHioIa z5W{L(E6g0=rrJ@ctS;V$GF)ki!Di$7)~x;UnCUcj+s0a0sTn&VFc%q2wyRzH7UTR+ zV@zRxsxwApX$98u$^{$b!u%7(+kEbqkAZyuv3|GRa$pcI`<f|u^b5Y$CSa69+ptYl zzG-wu8$)HP7xVL92Hyd>c%Q8ZRUJ8SoOf0;LwsF)$h@7>>#G0{eNa>RX&q&^OtFan zjq-F*KJXwY)>M95hwcFVz-+daHfr4-xl!+=T48^4(`G(%k)@N+=fE7?PHlu6dAmGx zwb{a-_qT82&G$gLM*It-9(W#`SM{nR-2F}zjUyeHd#ZK<7xjwwpAN|{1K4~ul_4w^ z2QGpr2HePN2b=!16VNEk_XF9C0?D42Wf{G|$w_auf7Ywkl4iWgX_AdV<IwZfmA`^B zv&i1TsxH0*Ycjj-fp6j9rn&8&=Xm1;7G!I?2g|a{w0VXM#i&)q8xL$bg}kfnp0>w; z%g46UFz;bZjJ(k+v)N)NSu5j)9sMIqAJwKh!(SI?{{(q%Mb+sn9tXEo#cQxxXch-! z)Sbom@yiozD7vbXslp5p2U?(9+Yyo<I1_Y$$P@lR{3NSjviObzLDK^(mN;-6DgYk8 zRXMco<iN#i4t!x6u;z;c@1g7)ECaCKkwFS-_vbMVOCx+KHn9}*S;g6Te>+^8^Y!>~ zm4_46V`KUx3JuS7aT3~Dd=zA{Zs4q6HMh3vud3pGRbvm&oz&&mc`d4bt1hnnW$wPJ zohx=0?`6EI1xKpK?!~5FHTG!r*rS!(5`3`1F{QeA-_k8}Q}mkG7FPM?2PSj`1#~Y) z4@2kBi~fGzWx?Tj;kG9ZJ?O)c&BceSc2@q<Xl`b=xrcFv8N09Ti7sPnXJ4Zo^}v-Z z*4V=v6U@Z%X1`VyfA#LaFlgkNVPCylTXp>1l6~GQ7=>Z*U-jd=2T{4-G<!R$j#nM| zW$xj3OKPh=;N5KBFW+?e@zO|SX&&fyL$_<{qcGw?9C!XKGJYH%d+I(reTEOYwM~BD z38jEgvx@J{cE9|qg%i(Qw^E<)c5Q8YqUwVa^#)Y#u)8fdGW+McYNmJUgO-0PMIA=4 zU*PwUck>N%qoUra^Iqj4`^4_*0BYwRop8>?t&Bgij-TPo&)^@;Cpb3C|NLX0<o6D9 z=S;q^q-&yV@{CDyD-!3loHr4-(us*_Wz$Q{>zRW5InBBz^2=K1mn91FXEaL^gL)6c zJCVMV1`QuwJZj8IaOj8;L&lCg3HB=<T0Ev$T$#Y%l_Gz>7(aVv`GtwnlKF|LWfzuJ zB%0<=X__SFl}*36>yWO)yUy-9s_WD_qsvEj75xVdA6`75_i)j#cX7YoL&uI5{pQY^ zUWRuwC5ekmE=!cptVm2PsVJFJQ8Kl0k{C5;T=A$;V~2~&TISDhIbY1;$0dR>^K<7X z%F5@I%m(IZjE|W#ym-PX&IgT|G<wX4Q><&aH$)rRduVb0L8FSrl*#2hsD87=T;MIz z%-NSE`5HQ<kU?X#uKq(&BS#E9YuJcUBL|AH<&!U%UXrMonV2<s_MDOg-bmqf)111u zm#6Fm=I4}M4$I`16((kvOgRP2s+fHWC@m?wu(ZNpl~10*_;Yx9{13(k6p!gWX3VJ5 zc>~V;f1NmSACATU|MkD8gX!O#!mIYn8~tYldTjOofi8Rh2aREX$NP0t@SNk97y8dS z+bsjU=Cryp*jCmLbR*cAcbY<MEQNUDHKG2KmyZ(iN>|obw!I7peBR{iSL`P(xI^z0 zcd6AUIaqv+eJw8*_`{;6-!auaxsFl!vj|=fia(axE?y6ck^Xz+QhW!(yVveYypO~) zakcHTb0(t`-pJ_poOSP!d5DMbHj8&Y{!GPu@_Ttcc1#6bE*Ig+@3!0E;_nOaA(ubu z&y1lSJk4ZYaJHHG*%I^q5a9poKmMMlj!5%sY_}_}!MM)GbrG)1ab1V&He8S3dLGwn zxIV^p5Z7<GlJK2Y7Ov*Fy5bs)tB&Rtraz!>->!)Qe0|d_QPiqUt2T*tZQ68b)1h65 zM8T+%sfmG;D@=XMF3rUMF>?D>?OPT7t^5C3&HpWj>_%)x^W_xGwaj*hvAz92)84dQ zUHghvwyRyvZ*PmI>CZTw@r?!cjCNB?rijTX&zd~tV!eNe2LS2!@7(Vc(dGDRWHkhx zG->YCvUyX>+H118AmuZ~ZXy<7P-g5X82b)ikbR0eX?=l`^*;Mo;Ku~`3t4#{$~k?w zhTm69*BXX53W3})T+<gdR>*1!=MLm?FY9V<%)f*izfEWYFT!I31(jU0kO?@}EckYy z8I!c`E<myQT5kLu16e_<abuKQFm-D?oFq`V8ATgkga|um{mj%`>Vf*<tzw|v$K3M< z90c8<)w2Q0?GJNJZ2~nR>oP{~#yFOT`5EZWjaDwEV4&(RE`H6$>omRnm{5UgUHpIv zEoT|51x-+_(YUwdqj<eW*bZd}ZqUV-@LghH#bgK;SPeG7<5ZS+<xkxEcf5fK+{gk1 zZvuuJSmnHrLuz0ZLsl-njJzKOZg;sLPb$3wOTOwV#=4I=Sj_`i5?^%$Zqj_+h(3Xv zHD9Zl+gmg<OYz-DV2#FI#N*tmYyZGNf!lQLGmLNt4`_AZZg=Y1c6dz|_=m2Yf%z4< zTNfL#9QSpD&v>@=3H&1PpvJxvVg?@7#i|I3>$Iq&*pTZrZv$CTPiWrOVpR)l(6xuT z__VHV%*AK*Afg3|FO;%$6S%nPpBOL6)+C?eip`sJ#Vfc6u3;TH*4r%47TvuuJHb{h zWiE)?o38_&^$rm1!@Ibc3k3V8_fQP;B5nuktG6JEP*q@3s@;sEhGP8=sH3o?t^R^^ zK{W~ncGPDLU?DXSE4Zs-@E=dz21id)Cm?oMorPXXt%9YJ)i(4>QTI2-ODMH32MfD; z4rWVLgJFfJdK$V(Q?o%WCc^vB!43{qO`0<swP01M+=k4?A`BEz9;CL_hqwn-bC7b> zCm<eDd*Ms2nvY(dsszC#H4=TpYO!WxHB6YSdTKWK<6%Tiz;q(D4uhns%M;8-8Mcx% z^$PAW)ehpsRWneir+$Mf($)K5K11;`m8lMaPL?Xh)>L0T29dMXWGJnHx*ZPKP{qM* zBXuT}kWi!0+E`r+b>*lvxaX?X5Ij${0ay9zZeTW1pJTc=RfEyDK)sAnnyDe+u(`Sq znrNZAVDv&Y8}hbPQ-I$}6~IYbt98I>qgH^=wyF%Av{QqC-(LNI4Yo*S!k8V@80e>? zx)+YxNxcPmJFDj)Zx^)~s_v>rLRa0?5`5p*UA=~y?&>`BJwt6peGdhNiJqzp475e4 z?Igs@LcIVTzarFU7-*|d7O=MowI7w+h1vw(b_n$m`t20zElB^WQ0IZwT|(UeoxUd2 zEU4{up;`iZL#Te}{-#jNQiXU+s7E2l+d}PxlkFDjAL#dvP@5t39-%G<6YmQ3IojS6 z>VBBueWAX?2p<Ub6o`E&)N$~?SEw67^CO`?K;_3mm1919B2+4Te4kMB;LD#1H5C^5 zOsMfN`R77C4x4--R2U<CDb!;S^It;6;6DErDi@EhgvtQs*Fvp<W%dhoDb(_fP!E7u zEoj1M2f!J0dr+u*u|yscsuT2mSg3_C+_yp%f!*(f>JR3>7itN*9}((hF#iLH!E1gL z>JXU!NvN%`=ux5G0F|GGx*1#_gObtx7om1R$-fHqJIs1qs4OV*HwcD-eiy0(F#ixL z9nAkJ)a|hOUl<+zPM`y{yGW`k2)$UU92o9usXl?Ul~N_3@+DHuM%z-U-bULsQr(2M zYo*!;n#-i>1a_;W>I$P?C)Hr^SuNGO7-+dvMVO8?Qf<IM*GqK}%DMp%l(j;t2cXuK zQk8?sjZ$3)mR5l@e15f5Gl6-NR7p_C%~B14rEigHF?d-c)ijX4RjPrI_BN^NK|QyF z7qs0W)qAk*ol=E?@(-zc!5nu<^$^%yD^&o?^4(J9gXTR_Z3VmcO0^Rm?vv_j*yw(# zngR0xsaApA2c=pH4L&5*w@~@RQn^s;BT}^h^Z%6UJaGM}R4X9gI;q+<5#lkaCPMbd zr7FRSuwJS{jQ)gFPlC#mQjLYXJtb9V475S2AK(m6OLZGCpOMN&+p|zII&74xF(%t{ zQZ)p-&r5X`Ed7F1&%u{ANp%aR-DXe$%8M`+DqjLi;B1RjS>WYmseXZ4Ux8jgb1O86 zS-%baAj5X4y1|S)qzXZwJEeLU8ayP`aENkPisX90v4g)$wTP#+YJ-76rUkF6OEE$~ z-4F3<L#hb|I1p01fpjpW#$jPP6jFDBqr)L}4j$izR2Zy$7gFQF-}fQa5gp!e)nc&n zrmG5Jo3~t51`WLJs>U$iZdcXM$6R*RYq0(vS6vU3cU|=<sJ!Q@^<d<ES3Lsg16NG| zb050uF$}cVRj=dmk*iA3_OYuzfn1-sY83Ri&s81Z)}Oj+J6!2ASKS9gf9|UNVB!l9 zlbFk{>I??|<*ESG@^4qQ!-V_FRT4D6cGU?~?nggp=NngTK)+g7mF7TpSG^A_9(2`U zINc#vq!fT{t61`O9vlEIy3RDm1)t8;%}^4~D3RGEGlWxOAX``uLTYDP4p8w$8snE6 z{OS5cP=`OmK)`>1@yka6)oKF0g>re$StGEThVlk+u^E^R<)6dFC0yHN8W*qNV$+|v z7zV4Mg1@-f36nU~jK8I_ELb<x>LzHx+6&``iaK!Z!X_wo(ZzfW8S3G2Z7s|Y>f4Bm zx58SXfxWo66SfTv8^XnJ7<W{Ri!Wo2h0fL|l4)3fLX&jyPxxDC>KsP6nMW(z%f*c_ zTByQi9-d})=03s2BB(EPMHUyY1%f;D-;6sGbHY70pKBX~E}|~DxR?38MIVsrb8(G6 z_zuC`3Yo*(7UtolN*2Bx9YR~yauE|mgkIJ*=?^1^Uip}7gDl1NK3sgfIf^?pv46se zLOa`V?HnHB)x%tTg!S;6rVs>zm(ImtH>i4%*GQjIy@m|wQ|b^Dm`8m|Z3MC4kY1xa zZmvQxsR<*{b+}D}ce(DRj)(Ex`C9N?;T(Wlx|h|t1U&^rR*xI*uMxuo>JSNr3J*Na z`pSjx`~^KEwbkH5Sc;N5X*c}>-4`VdcXlLDHH0C}<Pj8J#8t=S2G}WS_%3dKggcMr zM(ZvVLq|um?#HlVel6qTJy;|{e=I~Xr5ZCSWJ$(I8O%xwNJdHdj!|sMI97icMdh(Z zQs+YcJk=t~-@B=13`IW&1=XBc6``f5;Ipe{Dpyz?F&9;fai|S`i(7{3?!1UqNEK;I zKvkE(?^OqFtz6r=5H6}Znl{(D3hGc@45XZ7Of1#aV3PAASXJFjTTG;;!wwm$XS6ks z*L?-Er8zu<8w`g%42MloagxJyVciL~RZnIv^-MI`s@U}G%aG~W&-9#pde4Bc#$nM? z#qCfV{2I4HHO|O3+;AnF(QpGb;-qXapBi}>!&$kmvyr7a(?I0h%WRJ_P=+&^q0#(F z#L7)L<uIcfW2odglVC(O)<6X!l>@2TsI!dR<EAsqTJG`OV7Q;ab47$2q2i?6t%Y^a z1);N;@zg<RvehKhbK)tv&(m^Sw_$})lbQp`>c^u`(W6ImP`r>uP5Tl}O|W2OJ9}j8 zA7I{jS8^TPH1=g<fbaRB9jnBu;JojHZmb7-IUh))Fcn(~gF7GkAjL9xUEb@1vDjcH z^^xB<1D~4Sz}}zMi{R^6XfvF2D?Vw`eBoqRT*rgI3F(|nA9T<<ML1cOUL|r>tUp!< zr@jxSpmnlvvMt{vA~X_27*)HnB~m5;c7C_c<?-wbaIbeMZkoGvF6kSZ7NDGA@#ov* zR#hldHfGP=09PlsihU1bMwKs;h~33ts^rgnSO8*#RU{hqK{xgeI!4ocP{rC{Do0~J zNU@I-0OLLwi;cok60PTh8L`cI0MmUiJC@I$k>P`hSW})LSw5H-Th65F`(QyV9qNo` z`(R=0b!M`mJb^{n%54)X;?{%=;{0Xh7R4@uMWZ>sM4e;rF>(_h>>lgStp#!%#J6&L z#oEE%(L!k~&@VQH<!vb|p=B$#e=LcaY$e-h$^&9AVWEh&lf5)JFgBP;b(4B~${iZx z0HJ7aX=cfo*qIFW@xckPnqq+cd^XOFWi#devcD#EzOe3xc9MIrby60=tE>xT8}N`i z3JSMVMn~tv4^z%;%IfDZsgzMXC$Rsc!c7^Y*Q}J2vpfpprHpQmHob<=h8<GIFs0yw zxT#1(XCKz8NVd^|a27+3NCT#&@oxowq~X<!{~>M^$u;<mji5e$j=^_@)!G4m?vp?b zH$_*Qny6X;LOEDTr6SA;OTE>U4}n)g+qOC%6FsOtz{={V9q1fV>6n|YY5+`6gu81Z zomBu#;!qCe5XpKF4;nZCO-c4J-0LmEO=~lqOX(#rO?oZgzT?|*z6CMy^{%RK=y(k< zi7_9<Jnl|Coeg->+&wrWU~)P?VgaFfX&SPfH{c;O-z)^KbBxEo+)NAQWJ3<R!a(Hg zgb`^0vu)+ZoXc5fSA{XQm7C#AV$6kYHI(giz}U2Cl!g+{<qRz`Bjh<}V^O1}1}bn~ z^Z;FBphBkt8l!6s)W&&&q3aA(<nRVV%Z=A{hL9ICRo28k*bVZim-9{n6{}95hJ`$a z=|HOuWINw6w8oHfop$hkGB2lcmD7dQvDUPaGduw3p1oQ*G3_#RuW^A4r<8|$aEES7 zfCV-|l6@5S%sz00B%6mzvP~IQ71lZyc1S(2o@8g^UXL{-*}siB8C6)Xfg%m>uCe+s z7HjU5(S>ytuVy1AYJ@4cXU${;O+r|=F;^2V*NuyCulE!;vOK4`4C~Ff%ztqnVLioW zpZKDtv5jfeo}_WAr}j7&F`aM1I*etUCjX>69p_H$Qf6Q-r8ndoJ12zCL^+dp!6Z8m z#Uxw1-xE2YFtvlO=!Sc}k+>NOTuNso;A*2Q_S2Hds7bOf#64>cZZe(Q1hGvnPQv>p z>f?NdC6+E?4Y42m!9!fkD#4_8W@0kZ^l`xEqkwj)8Jar$;7!&+xv?%`5T;*0tpNtX zhKF*J&3-^}ti}g0rc3J_$>x1bTN*<h$EkxJ=V{7AY2xb{8$;nE2ny`7*}OjqYi$s? zPqap5lKmNo2hT>aFjC~afB_@zxQe-FB9ZpokBOvSrja^CHBxIvie^LFNKpqg2x~fS zkq%7Bx)^GSbXv=`Y^z9T?cZaWT$eFin*lZ>=1|uGBvM<vfNn`Po2UVsH@cp9-&x0} zrRX!xTTo8)S+f%fXCaTg(QHJv^C=V%-J}NqlS%fgni8+^4S7d7fr1@<T@_%S;$0AE zOZA#wEWblbK!kXoIe9>0CSC@C(74lU+lv(5$fHA~-a2~n9(#c-2THwVxJiB+t6d?T z7pru(h)Ahoo-dZG8SAmXW3glTSmR;T=p6YxW4*^%EQq1SVBMO^n?lMm54iK>e#Sla zH(X)e0fi=?{UNAX?XcJ<pT|X-1P^%$PM48Z?W{}S)U<pp$g^n|LLF%}2C|$uBucx{ z>^!z}5mwW*RR&VdXBax|CjF5R+M_2y@*}KL8ELl}lDGej<Q;z_dFS6q{^M^X@2Vqd zb!QJKTMxBcp#sdJW|>$ot!J?J#)iC$;gal+aL@W0H)#}N@y-+5#lO*Nlm@>)3v0C< z&xt;Xy)d53DvV}gKF0I({IoK#ugCL`06W+h*lxV9^9}aGcyGgkaN1(5cpv?_7@x<2 z^MIt{L!4is$@oCyTEej#0~%x?+Zl%;<AZr{o*Ti3b(hhG?#RE<J=3Rax{Ny6<yUlx z#V49B6aLoathz2`Smfeon=xHyB&I|BoM&MW3oFL&@S^y+1|kuRpwW&`ao&Vy#4j+* z3Ir}fwHv?484shzOZjBa=AXq-nJ<e-y$(nf@r$ER!*%1`8HzT>rXBCw5IjcDfKlQ@ zSaXR<vnxb=H0vii75*Kc$Z|*LG42$WCwe~oFn$s9B0{&}&O_*vbhe<+0duJGqIi+B zIRU&5h5_N+z^e5>|JlxFOw71hGO$eEfhiTgn8kq5!s>xSd^(d3H-~52@vBt;rs$`x zf&W~to`hgms0kSLN_7;xEKu1n`&DWNm|Lh`%|U~z!3MlYitrTO|2FkAV7#Jhfe}<Y z$Qn?*C)jE`oGhqTz^5Fw30e&){t=5S!guJ7kE>(|Aypr}nk>RF0Tr>i<3SJ%s;Q`V z)H~=NQc0L#==g;0xJ`8e9jWrbpC!U?=*l-$wXS>~Ca~0Y@EH){FLl#i^=pvvm&2uj zpVENE{!)Dm5U+%xP)j`qKM5$pq_)*kczaMKqs9^8ZW{SVbr{-_>SN978DKl0ZiS|7 z)f-bSsOEz`M>L)SWH+8GW0{aV-dN5A3~!0u%#$U@KV;k3|4}c~*^x0fxl_))xE00o zWUlt0Ov!NWZ#;b(NcQa9Ql4SClGzcVr*Pklf_4lg(*6Ye8TjoLkIL8$Y~VC$riqC0 z7sm0JUKX*h#ja#l;y&oc&f%GzC+C6ydq>KbSefGak_Sj`0nOX-mUwsD6l19pG~tal zyw*||z}B`%9n2W@@}haznCc~XHHi)ZYQ3CkXb@TF<8Bt5bS|3BKFMY`PO`Z#J%!tF zpf0bM<)pw1>*evFJi%(<FZBxfeG%`*o4BntiyX*ZA{_yl>a}|ssDa#N_$e5+c&3zg z47XRnriyitv0m`OSVzfRu?-Hx&aqA>^)MeS$GX(%K~N(!_bLKo=~3ru$eW(VHOvYB zKr1~q24m}!Kx<SppmiS*G!5`}7>Cl+O~;JCb<FfTIvt_1^epoUJ4w)Dbn5|=QF;R> zhVJPNxd!8j&`K08IW<E5)d_rw&<|heTj`A>Q!W+hx!elW$Q%Ir^%8UxshE5rHbv9q zI<ugL^d<&UPG`7!dQ$_D6NmvSFi^~?hHlfF87L!ypX-VA<~*d8o9#>ohw0`^_T1^t zpR9>OBT%`sg*V~WH)_mTPClOLZI}w1#Oa6WlHS&U%cU~_6FR-!J-`X%u8_`$SVq#@ zKWRYAsHW%@1L8!%G})9|#UT;meoRWkK@sAPY=*ja?#X7TgZG?{-wB6Y40jkS!G!cK zkqhUD^sYt(;e3bbTnAZ^X=v(Znr!DcteSO@6Twe3)4Q7{*BQo2@u8&1R5YDonv`=p zxUPdzA|+GHM0yXSSpp3}Jq;9#lw2@Nr1vsV2HF7iHc&R&=8N<`21<YmpuPsmi<C?O z)XzW#k&?-PiValgG-iwRH&B}h|7tURfPso4C1n_4pn*C^rhw@|hUxCk`<M!KP!H$X z7@)xhv6pj{U2}+m`bFlT+fdVOpfd&&s}36M+z1CvAI6%ta)&xUFp=S=+sMe2+3iI7 z2m_75)Mp2$6+kV=TJ2`kcjjR!&B*q*Tqn{PPy?RqaBC61ABc8Fu3Ca+L#hT?X)Qtg z<@p=1gUV=cdKcC89uDJY_#30`{FDl)BZdJjyaZ^R3$PPYBto%?jqx)ZI&Wi9%WT9o zJmGIS4YI-RH$b-*!l5&BxJZM6_c>07ZdMm(5>}$Du7+3P48z8o<-c6TYw1ZKpVeb5 z(35N)x#8uw9mC>cXPv9E!I@CMf)2i7v$Rn6qIoTNrL5^rp8)!n^Q&@Z=?S!C%`y<g zcoa&=ny*KF07`JX^C70mB-B>S5QXj%XCmgi>wiiY&N>g!HNHZfCEVtJN_U-Wp=@`V zX;aQMELN3)$mz?N{wMgDlg7rXHf<S>;Q6-PK-o?M%x|~GKnZ6CEamE07b`c<8OYE| zKC)Z*CYhFjOYkGeU8{w3@1A^`knTMOf{^2(f%;=BFh#9@<5qv1F5Zp(tNwUChS45S zItsI>?2gV+u#??MTMFvx3w338HXZ>=H)Fi)E)N5pe#PxMW;VN<vkn7fclY}U>*F+l zXMD<7vmxpU_)#MJTxT~nqU?!v-gOmJl6{_WW!veAgLn2ML&0?}MX&7hO&dA<+AVvE zQAy01jfElG|Edhi!(a9^UwOi6%FJKHFOq_*akH~mI~(8;*{ci&6mTy#kL=Y8&}ikr z;MsTa*E03q#ZB*E=3^6s=}mb_<9_KMG01$w`}kv3lC8<zkwE`@4|0DGq)dM<>5uE_ z%zj<32x{wp1XWAxb$Dy`L*w|cGa0q9><vy5v$(!a<yn~V*-y+s8z!0)WsN^+RH2;r zFj)3e=IBf!i;<)r2s|-0YhVr#HY8AN(1RuYDdlF2njO$@7nAQsy`3GAYBUNY(q6>l zUf8`?cAAWNHV|Wy>-6UN778r1-U}kadLFLOVAn^etzV4=OHv-@-$dBdZP>tBf~~J% zL$ja>=OC=vu#tf*=eh=f5(dH>C<jnu137pB1|su-9&OmfIS204uxTBUo-sfL2C|%m z*mW8<Gm!0^4>21yH<07}hMCu}MIY#!HTDdgv0)+8wQ@ZXx(Nkt^0&0`Pk`-4xoQCh z#W(txrj}|9a<+=IEUFt6PU=w%o>2Qic#BZOprS=mZO8OmWGTrHTdl%0T!e3Rq2)!S zc*R?k5Mir;sb8Rm!x5#RDF7`<m67P=^ni*Ioy|K$;owXy5?u^rJ2@~?qN{;iXGI85 zHv=hW4(4g1yY{$Ta*{zfafX3n!r~9#ja##OSr+gbx48@>)~^Cn(h9;qb8M}oZ-9^! zu4E1Mz`V;*T3LI+QBKNQu6+TL=0r5^cbL67sd|KCkSZtIh7n3IP)__~F5ZqIa_SxC zVi|CAbGS&WF-teVx7+0Qb4H<@Tl_Ncc*&iD=G^`UvYmaHzqtbp<U035+qnY`q@0ED zjod*7BB#m%G}u5f=S6NC@}Z`g;k*|FG}N>uoNr)~++haFb8do7bB7zKz!}fujWAH5 zu(oo?r3}(n;POlCGn97)z5&9!(<{x=iX$^9<Sj6e?feMc<XvS{ju*p_CU2pEl%V>s z?h&kX8F^Pbw?XZBm8MBJyP@d3C3j&EK6j@uwA9!X?=JeGTNlitSl%+{dU#}BRo&Rf zT|n3Q-2_zuaR8i=@?VO~oHkA5Z!v3uaIS?-^Ita0sqL)5*!i!RHhfaWsQFv{RReoG zh=f5TmcPT<i)a4MA5g=#>7M}fs)1|=zgH0XyV605ClY^wm;ai9lw-jf`LB1<Z6xY( z7qjB=D0S-`4^%G|IHrk9W<_NrtWhj>lKl%~F{RV6PN&iUO-<+GHZzRbl;29K^<YJ+ zLtw#D`(aR9wMgR0`Wb3SHG{b_RBO~{iKaK8-$kk1ua{IkP?u2O!?;~V$~;WD{4@1R zn6e!UY5piv{uh@gN%o2GE9h;@mTD~Agz%2D5J5dSxMj#9XEfGF*@3^?XT_CcOxe*u zmcw6$$W8{bov+~!va^94XF4yIU6>LdoI?31oP5a_!Rv0oe?m@Ic6a{dRpX4h9dZZ! zLiX?tEa*}o?uB4V_I6%?D6$WeVqUg10Myrv=Zc1(q55OYTwC^6$H4C5JzrwI>Ij#( z8m0NF4D40zxf`XgKyC?2SE|o3yO!!YnS-dUJx`$ZW0>h$lx(#N!!6Tw<DsjnJu4T@ z7X`D?EXPQWXg&*<9NVm<C8Ew)6K89~o{!pp>6YfxrV2TMCx#r^96vJwpuf2s*&IJJ zIR#9a0kpFWT8lryuRM!qg&bMvfl+ymZW-5d<}4v6YS~xL63srwpocv)m+3)|V8F|( zm>oT+{(87>&<jr=v<}dNmYzOn>FI-B<PVD6EOI*A%34_}n$>TGg^tBps0Uqy0e7An zRIe3vgI;y|pgtg)oeKh&v12UJ1a_ly`gJF{)@w)|fy&bfRI)qBk)qk1Ag~-$SypKR z33R^j6ajq<ts}7PbOOuj2<!lXDLk#N*92yv^I}b)#cVdAIfAZqMJuZ{K&<|9m0SUY z{?(dIZMYlxn-FWI+q`_XkUHvGj&E5e<Z7cd(V8RaVJ=(Vr!3eAv4jmkNNqsbQmgRR zCZL+3##Xmu1rMrE@cGFRZ8+v$UZ!w+T=qe)kr?tRGc3aLg?x-Fp@cRZjxQTAZ!buG zSHA^F1Ceb+j&1Qa&I|H+t^|LDkAtw>WT07v*RxY?W=M_~Z92BalfPn=uWC(jx)Ki6 z2a#$lA95!P10pTT;QMkXpAD^*ipAVzTKct|4JUq$0m<G8PkG${h2we6c*CIeD%6U7 zlcB)MO$%EN7xFCwY+P7)78D@gHsFSZh5fNF$=wEAzp!vOCZ&AGfR8RLT*a*KG2lZB z3m?SfmG6yYIY#N(@d4W`u=2izE%jvE%iB(1<=TZUwbediP>wI$%~U=yZFejz`~l{W z`wX~dVavIbh5U?{Lu=*0wgR-r;c9=Q?VgXTT^SEHvi&?fx&ZEltEdV<(M^228#hD? zGv>3nI$mE4?6|xHmF*ux8#i2ytKG{0<jA5o@hJKTH~iJpa<G@`O~u@xbKW@ERGoNC z`2*HQXQk8zvpWA9tDLh!^1KzDcmR1%2sI<5j<WOHhw1jt{3W<EOtKj}&q?SqD^hgY z&;435-LX=$>h~LcTEEe!^&71R7hPJynVaa-nw0g=Y6c?p`K`3<lH35V{;uaQhotN> z5|b+V1|Qg#3Sfl+#IlaivDgoRZP^9>@&dBv@b6R3>ny~m4jf?&0@mz``S?zP*PMWj z4#K``2&#h}dZV*9Kvxgr>L*c_!OdMz9X?N2PsQCS2O$~ijX`ehj=0f4Q+M*zsrZoy z*I)~Ar<Y8|4@3-PdpDnfVVWDr^}ZSisD*))_sDQSg$5$;Aut(eX`q-lc?6(V2Fmcd z3<cD>zaAmmTZ{E3(8fRs@A;m9+8Rv!-USi`+8L<8BXAffGEkwnu^pfeQ#F+~-t~O} zbuQCTcW)@x<3Lw~IncYFx$ed>dw6l;ox?QGn4_Ty-Yn*&r@@@+eZqY9TBh4dz1N__ zK%W{7&GOVpK>ZBnd@sOI@r}CeO0P4^FyIzI{A$i?1Gf$gV$?wHYU!<FJq<D78tJv~ z18}GT?~vZx+;_ME*Gg|CvoOMd_erl4tQi<-z=x!F4GVUr0UwnfFL;4b23#+_qwN8X zHsA&+l31x>^LrgN5oK;k<^p1deoVkALP^5_{l`tQ{3lUCW<KNS+ez$5f9(7(L6df1 zhfQgaS5O${k<=F3u0<`G6Z*P85NJ0<2vRw49C{D+A$@vsDHXDKG#O@0qI{?*OmB6= zPC(D2KABdcR|-7@3K80eJJABvr_%E<N|g4XCXN1r%f%?K4_>HKC04e2v;g;Xx)Hvf zL6yMF6seoRX%)z42Of5!!`P&cIL6yVbi|ne|LKEv^b*YUz@t9sMh7wSIv-TgA&mT( z4^ngnBR}qgv1kUk2(0(P43XAsDBvd?)>FhwL0I`<R#vJ@$q-D?A+RoKA}THV6%#Z- zAK-4&pYW?7)pliNJ&5r`^eb>&`W-bM1$(iwp2KVp(;1k#igsbxWa@*~6uJR(CqjMD zO0>C%mGw55h*C$GB#jDyAEOU2Zk)OSvmQ;soJpq<sL7xWJ)kTKqb7@bfrt833Hh?A z2=llB?Zmwy<-s)@Q6;D)=(c|NP)Tn=uQ_xD)SXN12jTMxJqRiCsT!tgLLEV^DYb*i z3+NuyG^4)2Y)+4ZLJK+!-U{hY;IyQNz<(?H7F@Na2f%q7a)8s8t^v(<G_e)72f81Q zQbgUMy$+NG89UMllsnNN@Yb2ugP$%m1X6S*8#?JmMVR&7DFF&+P<=RG531>e-<i<< z7W~<rJ_pjvg8q&6R}k3&LADC&0$pqq^fSC@yP)yVzz&4)V1S*1Lg@XfpnK7_OA!C^ z?KMHyL#eOh6C)mP2>JkGyot~XpuC0ofa&qJpm)%3x1jA1=p8|`FiG|xlnUd$i#Q@k z_MV{M@pxZQ4IUo|dJm5e5#t2Ry$BhC`aTl$10?%c&`7B06G26A|9ygP!sAmx1Mv7v z(A9W+E~pbm|3c8~czh}71?cEskPl-1TTm{n@Rgv0F!0yl8l3GH^c%$dM$k@JqE=8x zDDnVE4@Ydepsphjn=WVrpu>U+Fh9RV2oV(WouD%SeJ`j2VjdAR2z>s4SSVEfC@2A5 zeiF0^_CAVFOQ89)pnt&%#{{)UhhGG}4}1P9XcwU4cpnG`e-ktVYW*D_p2684g6>D# zpMpNc<1axQA<+qZD92-wq>rJV#geYU<7!C{<54N;6O6t@(jZKkrIKplN!Li4kAbe0 z^dK~`OwuY$swzo$L9pv2y@LI)TG9t-TQ2E!Fjylg1qQfY(zCG94U&4oPgh7v0A;15 zTQCD}lyndht&+3?<E@qyg57VDlmr{yEa@<4-XiG>_}>~yFTgc!l{5yLzfICjz`R{j z131GSl0HX=J0-md%zsEa4+GsLsT>Thm9z+$cS{-qY44Gg4d`A;XJhpHB<+Wm?w2$H zl@CZ74Ang-=_^R|5GtX%hb4t!fJY>K2uuG{(mP<{QAs<%^*TuxftSZ5eF`%^F6k#c z)=T;p@;xCb4VX_#Qqb8`lD>f^Hc0vi!#*wP1S+4Av<a2ZN@@hFY?M?4T|Xx&2V6go zO340#BoB{Gl3s*QY?j17dwo&T*T8&9(kSR`i=<>gFH5>=80Md(sd#Lav<>WTlav94 zY?m||G<Qha4a}XAE{3$PO1d92U>C%M3SN`+E=>G7yZ|U~NScMmo03X^@)ooU)x8ZH z0cE$O+o9ceBz+0l_ekn^CgvY7;n(j;dJT4XUs7YtfDfQLRDLL_11k4QdJ5~rN0OR@ zvyb5nm@A)1`WZ^zC+TrOpGv9%=4X<|!Kj~0Iv+~@Led;~&zF)4F$4Z3=^k|Wx1=nH z`4vcmiLWIc1?l~grla3Ck}iV*Y9-}Ey9Xo<hprF8;4tGM$N*6eOKJ@jd@E@?x_>9> zQVjb&6bbVmk#qr6{sUYes{0W>4>SHG>Gu|xe;5|_`x!ou4#(i8F!(Q$=7G;&C0Xcj zT+(~c^>6S3%%k5W9Rc%yz$#$rPf6E<&%Y$qKm{i-S1`~bi#p)3*rEsVxZ0vep!rIR zeuJf#Sd<TyFSY1=bhyT%Awap-qRAM2nMDIYy2_$8(Dii|J&hSxZ4v+2cezChOw?FJ zFwXTBy#U#7uxL3nvBIJWz+7q3tx(pD7Oey6RTi~^T31`t2ol|7(Uq71H(OMS$1N6Z zhZn4|Xcbtx)glQy+-6Z6{cZ=JfbOv9UJQGuMJr*Ie^~T2Sh~xi@4&=bbcb#4w&)m! zy$4)FqI)g+8Kd84Q4BQihhXUUfJO7s??H=(;qeeg2QLp>bQEfR#G>68=${tdj>n@G zJqsUPXVGPNJO(X+!N)Cf@K|rrB^coei=IIDCoOsg-Jh~(A|4wodJbYfZP8pjp0Q{z zIy`I9pRnIXi>?6loJAMFM$cRHPgwc|i?+dxo1jQcwapg22)B9BqPHOJOBU?|^II%> z0FRd~Y7A$8#iApa%v-@VI&4D+2)5m#WGHe6#02S`7R?0dS1p<cYwoh>T(J8Z_(X@- zEqWPkZ&;L#Vc)c9Jv8wabPb{3wx~TScf&?7<2%5_V~<6Bpw@RSdJ4Y#9?XdD?_1Os zuJM6IYa#T9K!LP-(GPC(kwpj4?_*dK{XVhi4~)LgqO&l<rxq2!RG(RNDfs-{qJwao zFD$Yk!<QBvhtU7B=swJce_J#NI{V6^8p!aqMY}Niev3Y7iTMX!AZ@Kh6&U>hctQ7r z78POGL*NCI@~}nsfWdDeJ0$uJnBe7mi&9!+{#o=HnD_zXf##3k1%CFEMI}(rQRo`o ze})$T<(Nfx0p%Bq`a$Sl;Y)Ca<8Vb_{$|k(F#o$n17VIoELx4npO%OWNWp;*g4ltA zWV!|x6LbLlNg4s3EjkQJ0cr->Z2AcIAbkVHIMfZE91;=!amaMYVF$V<(<E>!iNFcy zUV#?=lxYY0Ceu_D1Z}}YmLjqaEtg^lJ1``fYC%U(3n)g?O0WxVAzpy8;d(ah8jcY~ zWG8w|giqOlbCc;u&_~oV_yVnOcogW$4qTT^Z-HYR4|o(B2P&2b@{gYEbUu>Dya?77 zyOGB~&9@segwLQtWum~B{_N67q<ot|b12BaeX<+<2=YM=$_qB*k%Ig)s9?JU(1V;p z!FhiuivGu)Ag53W<($VgCWS(fQz(RTXK_tmiSa_grVeWIxL1%<D1`EP=pg4#2o<bh zJd-;i$hi|j&6rm3ZVVr4{xmn5+zG+PjZxEr$p$%hLa31C3v$wgP)kj^UsDv@)p8e; zG$F`I6GH8O<C<Csh;+!DG$GWDae|yQA=I54gPb%WRHX>Tueo@grnf&0#cEyTqzR$r znrlv)5USC*x8$REy++`q385Qwk&`BbR?Gykl+`xW84PmLgb+?TAkRq?LN{v5+zZ$Z zt<shWtzt-9<|!1oU0X(qAZJSmty<2VIa@+#HB$~s*d=t6X6QzkKXkKZh_fYxZqZzD zwuI0cjmy~*LbvK#&Xy3mP1ka^gwP#YhaI@vow}B@C4~N=YdKp&=x$x)Yzd+J`lRB6 zjveG|384oyHfKu+J*tbGEg^)H5P0Kk38D3xH_nz2dP4KIHh|&=UCY@LLQm`3#$0?> z55n0JLNCl?3Y;w=wCPEV7qB%+&Xy3`yj551!ae00IF@vRoGl@=MR#w^*59h7<RH@E zkRLSmFSyqWfPbmyg9vgEX>iwmREIjD+W$?TXMWRn5TnBi-hnLG<}wzHqd79p2BMBO z^>Q*f+SJSCAu~DJ)XQVFh#*IsdihK-b{%f6*Vp4{Q?K`8pCm_{dVO+ylCjM|Qr-~n zS4<6WAd};9Ioi}4WFXt)Xj5-656)j_#E$4LqYd4Wf1`V*PuFxAb+SuOxNyvyXu3@J zTbHxyx^T3qceWYR^*GwpJLg5srt&!2)H~NeBx2)0&i1Bw9Bt}dz=E^O#;!oM>s{n= zw5eBWv?jb+43+t^h^%*jRN-Bm$+3c7cZM=K+SKb?0<khV+SD7unyb&zrrv1QPbNp3 zdJ|dhOpZ47rm#Gj9Bt}d#JmXgCGI?go-Nt<eJ!l3qfNacZ!@O6*TFC#JdQT?I{NH* z9Bt}#HjwMx0Sey5EC$CD207Z)o6e-OF9AK<yP5(pML*Kfrlg}yX#$47l615w>1b2Z z(Wdlj4jRahHqCxS_rHyFv?=LmQ_|6<q@ztqN1Kw4HYFWxN+#Mg``^0b<CF{`B<W~V z($S`*qfJRio05(;B^_-_CfYRnSKV<N>1b0j(WcpHaTeoE($S`*qfKc$_zZ~b0^PKi zbhIg%Xwz&C+PA$gNk^NKjy5G7ZAv=YlytNynP}7Ot2OeEq@ztqN1M_aU^_rM+LUy( zDVb=~29E&Q^>SsDqfNcW@)^Kzq9{k3dO6bglr8FUy-a6E#u~t#8Xm;0$jg(Bv<GEM zh8uCTsn<ZVXE)+#Q&47hg!&Ella8T8YCm}LXYll1UQ|Xo+SE&vW}1j7N1J*v>H9{M zqfNcI54ur~HpQ@;K#;v7?aKh*e8~eOW`nBjwUlII^}$a?&?HGmo05q(&05JANqLzZ zZJLzeWhWCK0Yp;HIy8uS58`fCrgSc*>%}Bsvl|C&?i>0Ix8Xoto|NTrv}sZv56X)L zN1G-U^0tgc%HwF$B>f7QqfI>=6GT$GKY*&EO&iOA&6HBlNXO|EY^q=f8RclxU`NSZ zu?;xdG}!5+9yr=G*riSnA~X$Mb+l<X>T$GbIE`zV6OJ|w$9QYzg-XpsH3NE5A~bRx zU>$85PB$Gh{?;+m@91%~X*i3qIgOFpd^#CRg!TgN0TWWVfydFO;f7p;@sVi;1>Rcx z5pzC*YE1NLxUq}q({L`g0@X$IX*kaSw#(6{VSN|@-yTPwhMSl+<#F_BxT%52<LJ|H zfq`NkN1uk987RZ$=+kg>9um=~9!H;s^)bcDo$hh;X}Hh`SnhH3X}I+PjXBH9PXKDe zRP-xujy?^yHQ;jTar9}p-ErV@^r^?ur{VS<&K3YIqpG4z0|ta*nrvFFfDlru0A^Ae z4q~SPCYz<MoqMu=>)>X9PvUnHp^ex8b@XYti;L*fa91OO@HqOk4zgTCpN6}cCfnoa z(>lm;5q%o&UT0a3KCOe2TtuIS&oE8O<LJ{mD8=RI({K;w#L6Ym0Mye!F}LJ`St8ua zKpAKQ)Z0MWXqzv>eGHTU6+nFrl;?8vX}F((3S5pp4Hp}z(BtUSaDM}}aXI=lJitIj zE=Qk+2O6lei|EtvAj5QbkE2iPpdKDapN0n;#9kgppN5ARsGp1I)9_H!ZJ@`|r*+U^ zkE2h+!&uY&B??EMhKHMOBVCR@4UaI;7%T<s;Gu7Ts-sU;eUGD0RkptcdmMeL8u$yC z$o>O}w#p?PeM%<!G}Io8o{m0M?M?5Zy57U#h|1q6ZI7c*Rma}i1o(|hW1e4~m|~DK z10?IolT`|0dC4s@`S<6^%^1q$%mB&FdG{A;J}UT}rrj$SYYg<2++q-FWBdssBe}cB znE{fE49mjf%mB$9`1HnW8D|Da_V;Sn<IDibT@0r3I5R+USA$6&X9h^_X4+yRD+wJl zl6z+IZ`hK%R|3^=cm_8Z4tp35vruu8!*mh66Kp5<WahH^p~+4zHa+_?WP0{9J;$8h zGa!PT86dehAGNW)xD_UkGqMe@lWjC~mptO6Y*2RcNdDNtYYt}yNIug*<Z)(z<WUC7 z@HjI-@@QkEgvXfylE)Y-c^+p5NFHmT0_2;(3BFD8Sw`-0lbB^~nep6UxSzludhokC zRGgH%wFn->QkHxcGoIBQO?L7m({th}xzE#b2RSo9@}vep3UX$E<S9IQkTU}$U&x{c zzaEN%c_Up63_>w~4scZ^ZWD27SBT={0?OTrusNlS)0;uc_|rFolnMTlAwnJC63a0~ zW~H3VOL)peGaK|0o^qZ6%o3h5$v|caPdVQ}W(iN3Y#_6Qr(9qlvxKKiF_2lpQ>OBW zbxU|kiD{dTtPiMWDg3oED0@S=vXQ6_*ep%#78EC>RCrIY0?JEK#hc>Bj({%V&UMo} zWwxPdrgzF51C0^Ed;0*jVjNI&g4lzboXP()S;~Ab%mgnpXu|ub5YXj}z;onU=rH99 zp499|m$n6TB{PEGNqZmn1+>7N45oX>x!+aHX(0D7((u{$04+Fc0BRSSVk>;8zl9B# z#u;;*V#yW{@^3UFEjnYofXyxw<e%(C%3AU%fqzj-t;V1j`UVbtH^9}@D#{rTV#+@< zh$v?~h^0#Y+y<7TobezQ^+7kv84qG<KB%Ic@gNrSL5hA1vaz@i#-g0@AXd)@GoqaF zAeQcf*-_4T5X<nvM3gfg#Ik%aFS?vb)%U@IC}%u~W&2<uekBU9p?nTol$F~i${7!0 z3Hg!+i=v$IAeQ4x)H%u-4`NMxuzR#Ww-(5$P!}>DMB8CejulGd!u_J0@gUYx@*56h zJcx3}gIFthP*WZdeF+O-teyN#g9D=+dL8Q~`SUOQCCV8OV!i$I@R%rPJc#x2!3j~$ zco6I7vvF>eGakhHOMyd~m3uy{1MN@`wock2><rEYlILnx1C;I5(V3j_Aoa{2p_NR| zc#t}Z=Q_xv?52*<YjNty)r&J8q>laz$S|P@a>j$yF-$2o8#fhg=yAq_XtvRT@Hpc^ zv;ou7_?+<|+He@---8=Pa}9oDBdCv`WAL%vw03}>y8x)!91LowH6fkxfON(K(ism( zXFMRC@qo_90v05l@ql#31JW4}$Yeao4&^YB&ZILQkjZ!u<ctSt19|BR*crIz4#7=p zGo4H6ZJ?m^r}*|9-?s5>H{W=UnD+<)`%GYl7*l6FNbBisV13WM6WZYE&lwNW=6wcB z@N9iU0-Dd=dA4%KgS5-dv``*rJV?93K;&`8gR}+AHqYeCF-Ov_`T(OK<AKK+57HL$ zOB`ONIO9RuBE7rkCOpn~kha8(kmqs6gS4dvD)2btLE1G2D)czxLE5zjYU6RngS6`m zROIo-nYP^QIh~=Mju<P%nz#pZI4`Z2_YNE(ZPja<1>teVgS6EKvOUgtkhaE<ay`y? zkfvWS;bW7>84uFdnl|z{<3ZXz_i9zew9BO3Yg{11<BSJs53ba039!H>2-q*-p2Tkk z12zvAuuYkdDM8M7kT&E?UHuL2xvU|{{%y?3s3OQ257LH*V32^FjC(F??v&9*kTV{n zjo=k9V7J1(UROrYBt(!i9;8hu){R4O&%KHpS)S8ehV^D#=D#?P2y(`Qw24)k#tls4 zsgpEL_0%56BBt|A1P^0<NSpkk?zD|Nu}hhOxs-l@Z|t1l`YV)^c&QH9-=i3?86(IU z57MUM|JX!@gL^I~bu$#Wl+H+KVitmm{j{V|<F>=SUJu4KA4@}QlZ%rulhgWmobe#- zBGwQ)`yV{S#aC&__Bi7~+VsZ2W@pzfHG`?MA8^Klv~puz!l+E&PQ%`W52*Y;6?|aY zhrU4Z0N8TV)_I)qAnh@8AQc{GJV<+-rzy`XO?*9LV<@4%M}b{7k3(`qaBUE{PjCv* zfc-G;v3w}A5KH_E7%<w7tC)Kx5^c}@m`GMXjnpAiBeiCv%xp*-EfQ!D!Rfd~J20gn zXFP~@;+NDxxQB@97fL~Xc@piyn?R5=9z^wFHJ@W3W9vh_&FBONv1dF^a1eXeY&^o_ z1P8H=X5X<rPH+(0<j*Cre>M6AY+llv@CKq29K>EHo#23SK^xz>;-eY9Y=al$10ioK zC(n7@^$Jud!VYn=w!KK>eLFTp>YaiU9K<e=1whH|gPUB*-mD#y*QRtffJoy62eFGK z=O)Z8{X3R+M}sw<xAr-*g0VQ}-guj)#bDhM%6mW>Cpd`BlWQ4w{oim!kP{rx1m0+Z zoL?`VJDRo1`Ss#?C0ykEdhz^AxybqT;!UpRBInnOH<h3s3}a1-7wC7*oL?{AjNh*Z zExc)nx4MHpp7ZO)+jZvJeE4>}M>-cdsb0KqOD=L!z4*Z4T-*uAjt@JFi=0$1K8oL= z205u-d;;%(LC&NXpUQ8ef}BY&UiLW`a|0-rvpR#ER4-l;=Hk=L!`zKrEP`3%S2X70 zwLox(@_Xo@Nvc=KwI-=vCoXbQz4$GBCJb^?z4#jKXq;3pZr<^8QoZ;~eCP;rfL?ry z_D_6%6!DkYpMsoJFaC=D4kXA@Y}a47a8kYa4o!^1+v7X+@t%|F#b4E5lJKv$;;%7< z&@4EC&T&_-zkYFEZ$RCPbIx&BZ=e99diUoXcj;{z3_fI|*ohC8u{+UyQTlLiM*>wt z7{V+RY8?t!;L_*pMk2^L?$U=}gPI`cxJw_)jlsK6jDItNNpg<6_@NRm-oq;X{t6Uh z`b~buG>;QKW=u6RNVuHnF{8vlIL2_I$Bb$Gvy@mD2<~Q-8~o|aAh<F3GYkZLPV|^j z9s_FX3%Kb7Xc>ECloOz3yeqj5y`o>jQ#0Q4K|9I`&@$flK{v_?&@w*Ia{)_XloOz3 zeCUG|<pgLMdwnn#9n7RY^803B0{;eSQ?uYSIssZny492upk-uO=7pJvassrBOdoX6 zIz?n;S>`aRqWxjXjQT#9g4W6S^{VB6^2F8#xifyZnt*=5F2p^zH*T7nbS~)|ng*a; z5PS!$XCB_on&%|cnLmAkV)oZ)v$N`xPC`vp@E3%dz+WuVNvKIDp(dS#nsgFs(n+Xk zU=S9d7-p(Rw>c1ybP{UPNvLTRrfo89gV|F^C!yBAqb1^XGnI4_YSKxlX*L`tCbGM> zWi|#=6Bq^Qn!u_gorIbeVW0r%B-HdFm<>|%92}2HC!r>tgqm~`YN~{?lV~LRhG}sg zubiu4o@DB&+0aR-NhhJEbr>X-E{7{eNhhHuorIcn5^B;(s7WWGruW&Rq?1t7A<)U9 zVt7w|(n+XEC!r>tgqq@5zZ#KFLQSKfkj8W^R-PQvNvKIDp(dS#nsgFs`W)+jQ_@MO z>1B-4jD{p}Sm1q7T?^_0LllxuLQOgeHR&YOq?1sSPC`vO2{q{?)TEP8Q)Ym}0(BB< z(n+Z4Esw(jpM$(zXfagXm2?tn(n+Z4HPmz`orIdULi0Tc3PW&U7o2CeAXpFnh2TJw zgqlneYBEWvX+P!#f&)zwYBEWv$t0ns^RONwIM5`ardd!Mf&*J(HA8TqNkUD_;Cl!T zG)bt*B%!8%pdW$*O%iG{NvO#rp(c}rnoJUEGD)cEIQU0!ph-eaCJ8l_O9V#<N`;Rj zIM5`aCX<Ak#>3<Y4m3%q$t0ns#~>zx17mO>4i3Zv!GRgTL~!65SO&p?mqINF4txL; z1HpkN2{oA{)MS!S6Mv0};6Rgvnu@|YIM5`arX}c(;J}x$z9Tr$B%vmggqpU(>Ie=r zNvP>&aE;(V3*8YMXp&IV?=UNZ15FZYO2t434m3%qiFZB(2i}f3f#5)sgqnVbb{8Qc z3vRd=L22+P1P7WV)MS!S(`>XYm1L4olSx8NCJ8ll0=ozfG)bswFqDkoK$C=;iZC5( zBy9kL*CPNB%0h791}F=`fhGwxl|u#u2VMu35FB_@o(>K)NvO#rp+>4R9UQnA%Ljr3 zr$Gh;2bv_*WRg&mNkUB~2{oA{)YJ>+KyaW*LQMfI)psLS40}0(15FZY+KFQyf&)zw zYH9{gMsT1>LQN(KHJK#TWRg%*3owu1K$C=;OcH7`NvMfmf+0B2B%vmggqlneYBEWv z$t0nsA287n9C#a69|Q-QB-F%*AOr`RB-CV*P?JeQO(qF7nIzQI87E`}2j;-R5FFS7 z2H%2!HR$?fNhS$3nIzPdg!Ko(fhGwxnIzN{f<6%(Xp&IVa2NnDjS>~@cu_?ln@&Pa zZLrTv(n+XkDMkp8NkUB~2{rA8*$;-uB%vmggqlneY6^pu??PmfP?JeQO(qF76~cUP zxnz=1lSx8N{3+!fmtKSQ_qcRDP~LUPB%!ADVB~$5OcH7`NvO#rp(c}rnoJUEGD)au z6!f^yC6k1jw!`T@bIBy3CX<AkOcHA93<dtnC6k1jOcH7`NvP=rTz)_LK|9~LWRg%* zDZKQ6OC||54TeJ=!mkjYgXXdeu@luF?eQ<|>W}4vGENKLQ~d#*<DW^qtFgq_KbNub zDa4(J{};XfMAN3Q?{X>lKoe|~-TCYIX0zJY<xTKR-UNdxJd-z}j;P6-U?A7K7P7m` z45U1hH^D&U^~KuZ`tJc^UK&*HR-3jA&*V)oP_}3CCKxE;nY;<+pp@sCya`6Y0z{pF zOE@#<P4GB5gnM@iRvea{lS8=o7znR@j*oznTIn<C_@O9{)aTc6Ts%`BPe-&wag;vF z4(H-%eNx@r9mR3XN^lbw$Lm9@&E3w{!~$GAN1tC?pNZnMP!Y}%zjc5!+d=$NRb*e7 z$u$dygB?4#q8Dl|YQZ&^qej`m#cfcdUzVhnq55X<>1MZ(O{dSiRN%QA0`kG{`))w1 z18vc88F1a~j<We2kzFLu0MvZ?bdlXb?neiaHmx_%J4&Y5;vz63|5=P%A|QW2Gyfuq zf0-o57M9~TcLA=G;|lSEO}{*{<plKMeoq4RQ(UdC1kmalzOBHm_3gM>D`&NN0M9m0 z@NE-U@6^@jw%((k(_4S4pQWv9@s#6Q|HO|`t^eZ3_|{%WrZJ&)Oh2c#ZiuJk0Dcha z&gD_92k--GZPy0ucEr`X71xZ!j|;}2P#?(nFHN*iRuN<gS}aQuB=JkL@%z%C#kd_V zy;R7M#d_;d4pi3^@yn^YBEFlGo?qKW$Rz6%(CRR??a5;KsoL_>YTKPexC97c(+$5% zsViO}q%xHk%&04N>;{%P&A{d**XTCUxdVQkBA+pcGeF1``LxV3+4|RL_NiZ^(e?aE z-v2ck-C%&O7ho`(AfpsARgx3MJ&;mAB|jw%o1j%s1X8+&yM+g(fo>jvaJUERZ4_No z45mq$77jBK4z9^3@%h{8kVG4IbsLL8X{~{PaKA7fN&Mi3uzgx@Re;|7Vk7xvEf(e! zpNAmhM=*eE7~%nl2WO_t@TuviQH!491^#^+B520^Hqc};ai6LW){BEoQ^n(@2f;WF z$<Un0Y@*9qrWMYV26nqE%o~;tKcT5p7D7W<fKz-mV!lp+iBm%0Bz;nU;49~(0044N zF^Ts_ar~z)Lgf2$1dYH=Gzd!=u<2>Tf<Qql-x^I->i_EO{9`1`uKNCFYSt9q^?G-` zU@Wj1Y!lnNGu=D0v*X!y6t=pmy1P8pRW<dad&WU}uCA`0E>Cw=P5qc3PGHw~fhduM zL5f0zS0wyFU=axc5?cf*pa5~ANHGG8L=q%^L<)+M00~MY2%FFM+<U9ud)14QJWWr% z``&%$o_p^3eeb*P%u{Dxrl%iSGfY%n9SoPBd1OJ>v($Q)q91k@FI-6#FH-blpu1~H zkOAPjVX{E%Io0{L#~jdP!<XL6jZ20HLi<Q`>0JW({Cm%YYj|XrpzRBf!k15-A(!TH zF>d&10_-CKdmi4p=8?A=FD_C4w#PJ9Z^h-lLvVRu&2wRR9|_~CL4Mo2{h)tMf}?1a z577mph@-yNj~z^|;@chTJh~K{$AW0QBZ$W1OYwSVdK{k!a`9xMO~yBHxMy45fHeYJ z$C<`jisjcisq=ij<mxnAzGsOuq2PiO>D)IiI|sK5xxjJnT{Hte?^`@W;hQ})!{+?r z8SZ?GW99ez&UjY-z*6Ww=;Y~J1Kr7PBnSdN%%&N!eaAm6>HLU+TX5Vn%PR15mQMnk zUhsSmAo1PBBsZ=(&Rf6YnesWE@WR@oXF6|#R|7*nzhM6b2g0)fJvi4(i_zigqtFgL zo@C-{kHXd~eEJgHmp6ue;T(FHcMznz|CF6XU>qKZ&aBU;ll9|KuY0_HJlJ2q-RoV{ z@8IgSs~5*QAI=|Kygi(++h_OI2gBa+{GcD%W4^GyKbTEZS5k%b>BIEh_3rcpd0*$> z@vuJ&Fx7sKg*R_`hYBFWM?&qZuWLQfkEXMOgZ|;#k(Pzc9*+A{PU+)(IN%QF_-4J) z@K7Hab$7T-JMK<+0Mg6`R)Ol%!R={(<}3G)`C2RI%NhV5&yFVj?m>8(D*zpg9L)Q@ zqwd6=##^!49^7`t+Z67(Q|fmox(wD}6P`Wn4QIz(6~w{qVOQr4qrvb{@ZF7WPhVeM z6ahQyKM<Bkg|MITtwDG97%FfEqxt^Pk2#-}z37YFu;O?!IC1sdc>D6#K<iO&c6>3> z)sxYD=qYkSyLJwa?zuyepQl275ORa+F1z!%chKLTtM@d&xXt<P{*0XuxDGrV-?Q2l zB=>9hNiY5SZ+`yjSHD01>N6MCe)a6jUq|5&K9=95RQZ{+*S<dg(%B17{PFwR`3q-1 zl=yw-!sMG?`s14)&Y!OyJo9S)LVh=2{=}DZ^H+X1{~fva<X_BBUjM@D`9I9RcI(FV z7k)DT#`Zt^(X$_XJpVnpZ|LNXp3Qwje(Ue&j(U^alXr9DXD7LL-px&3%sqa0H@`QT z=GXGO*DmDua@T(6W51Pu^|i@la`ySB^Iyn+U;ep2&;QDw=U;#Q^}qKcujjt@hw`uH zzU9|)`>&k+@DrcUzww32`0U!d@^5_k_s)Ln6ZuJgJO8_9KlnucGry91(91oR|K(xs z6Zu!pKJ&yg7uJXQ_x-Qj!?WvW@>lG_kN#=y-r3Tb-1zKA&g6e3cfXT=_0`<g7xNb; z=iZk4;WM4wKYyi{`^D%kzt5gKll!q(@~?2?KSZ5_{R@;g=<pXNxliVw<%!&roqLnq ztTV~Ir?Z=j`JHs$$i0)_$2z%_&%Kd*l+&j>XYmZr)L^=^ykM499FDcu2hU%-nils{ zv;my5-u>wM{(NwJU@FEb6O>I5{xk?Fh?Itr3PCRf?IX6~;mb}3%ob7x+Xx?62y|0| zoD`>^G9=VXm66f_|DW=<su|`>>4d`<a!=F$uR4P+2R#w62BODWiXrGK*ACU4B)zpN zT*w;|8wh$N(Ub5gSj+wJ+s@?v=Xd1)_lI8k@JoN?;`Q85p80Vk;HBKZieAip@s$_t z_qFf*<;ib+;kDdXqtD&CkbC@d+qt*j{i}Z`_w_&Y(Y=rCz0qE~R{q%bmoBzGw!N3X z_`b7`z)`>_(zoyO5-$Apy#JOjStvUs_M{UQpYq-OEUUTwJ?#hdU8#HLZhQ~wBHY*3 zJ%96*`vJCg+1X0n&qd*$Hqcqz+j_?%YB$`|-Z<HNPkWrS?ah+`X6YS|1YES!%|8v- z`6Tg%K1sa!S#8;2hqk2YCGLMb(NFC{^EVRr?Agj^7qGR#%teMReC{~hKaH-!=O#oC zAFK_}gij>LC*2`mQ$L6XVy4M--0wx0E;fUB{&GBv;#M(kR4Q@3Rf&uBb}MopCb#Rw zYQ1<pZt=<VV!Oib0Ml-a)H(YUQNPf-RV!BN(fBAnJnpjIR&bGsdme6ayU>hxDm4Sv zT9)F;@!;g@6>52jicjX)IRDg6c}MLgF^e`jt+>ANaXzRX#l==!-Q=&b(Teyw@%iF; z_sQTmHWN(G4oA`PIG)b;rB^_D(x1*olm3LS@470pNq0Dn`@@6yus4fPU$JCPDy4r< zT^x?0R(WeT-YGQH<n(AXnVp{xb%!q>YafkRPxI09X!hy&1ixk0pWdexL;7aDQLe?E zTID8=iho9KgY}|NY8I-s2u~=U9MBq%RO<0&rCPS0H5iAR@$S{QQf)9k_07$V%2r%0 zZB`3gEn4l#KGPHWcQ`oiN5}Daa1d3h)p(;&YPGLNMMzVqLcn;tvbEi5m7CExHZvd1 zj$?TYKp{&v@0)T_zD_JqE$^1Am*ZlgxLuB0l~4SI*og9e7#PKBrPlW78CzYt>Ic%% zp<LUJE0?a-#3fUBuhOpUl$&v*S>KGH_2Hm@%))Bn030ZO6a4fk`6XJH;!+Z?Mp3n> z-RQIv-GvXAecvmv<8B-uPLFJK4+e}qEnJQp8*om&SZLEh$N6eb?RlhAgebIH^&)~J zY3WalR;-%u&iZ2;K*mrq#gnbdR(UtB)obNwFxDn~8sC%dr$&=_I1yx+7G4$8Hrm_G za-kIOmYXdJm9S(e@eQdrJ?{6%2x-)dd%b8dAH<`v*z%qTSJ;77AX7VTwd;-O5V!sg zkuJCU_wdxz%mS84mwH&M#yhPok@awbmmF<2%JF8kUEhWJ?siQ##TfAsZm+T9{mEd~ zkN3M%HwqH-?Pk3OTa-Y-@M7C2H%dl=PmhNE7;z8|K=oa0wF~Hvpa6{c(eY?_`@H?< z#AyHcj_W$OA3(C{9fN=bR2y^x0icU9W8N%_LyFD#<|gdpSd^wdUf)1()!NN^)gCOB ztA$&R#f+DDvQeSW8y!0AREP1bSeVCC!wn~w7l2FU-AWNzXq1bU%}UX5*ez9Bg$+ct zbTi)EX~(s~P8kpkB9L!xv|a#d$!ic>6BJQ-t6A?fT8_IZw{D^0>J88BUMD4igq}Ms zCuX|SpA7rQaj!du#u6ok0jEAvbi&wf78>zJ^LnFgwcbLpEM+&Ii_4|L567eFf`5!a zSG;0y@*ujk$~*CHq1q`g`sAd&wY_6>-6%Ah6*yd|_QrsQv3Sf?s+PA3#al*EGa=%s z-e^3KhNjZa4YUJFQRBzsgfLjFc#Bcq6$jLJ8r5>U>=i(e3dWs_(fw)sa6EX>KSnIz zOj>1R7JXl`lp|~$&qlF=#|VRPFgl43y0dN+_vRB6?eY9%XxE)_uwFMRHK~v5ErH+X z84w!`Q4q2tsMr2bHx3V?=``j=kAQV~r_sLU5tg+f4w^=dI$GPz%WN_mIW=5q>|T{+ zMV~+uHQ9zZGDN6FN=S3HP&08hd<wgg$dcpbt#W&-89UkVtc7uaU4p5uAtFl)9v^}K zv8TBk7!*@gvO~4QsXZ!e52r>VHF!C0?ZrrjdMULe>)}UcvqN|4owl@cj37UVhyDIR zpY}%@l?f9MqAq@{4ZJL#WNo8@&mqtfK99Aj*uyzv*bR<_cyr;eY!q5TaeC>hBatao zSj`3QiptgN<AZp7-0k%v1g%|ewW9+K@|A0TkYTWC_g-Ii99Z+%EHnc{<PHZMbqB*} zr-h_C)mQI;WjyYZx3{0m)-wJ<g8{I<EM;+Fx<NQ$9I?W2?|3xr$A?3+5BIx+8G-d3 zC(x$)W+(C{gfaTzdZ$5SloON`rR1(5x8-K745>xBdTlF)kkP@2O(=B*;V+iG-y*3t z*}f*C^dRYIa?-`t3>ZEsZ5nZ&$kKjpwxZtDhAN(4W25u<VITiRkjpn4%@{T5Of`2T zonu@qnU0iuWwJCF9<U!H)s9B<Nn|?7;bwTt=?J=Ip?L8^jKx%AQ}nlXJHb0p58SvG zt0mB>w96%0o*TRB8E51R#bUivYgcMp_Beu5Xgay^{93D*Hh3<m6_K-G@>p-$6D=WR z3@MXI>%n9e;2hIWsGN3TQ3IpeEJHSQb@v$495@$@yk!VV-Sr30EcRx>HQ3m`g{mrS z0Yu%%*q@k6aP~pJcW)N+!q;fx6bKS$ZF)z9A;9NZq|of7J2p2$On!IXm1xMGin3C? zYO7G%t+eV*si;kN1^r%VY}eHz(^GUft4dRbZs7vPL$S477&hX(47%0;oVr(UUJs*j zIo_-{+isMhrkKYuBDi?pK@@IQ(3->z<x&SZVjxet6YStLa<si{pqu5bG9L5Nn{tsz zJetq;=Z7ZUOQ@~FChkwCX7P+9x;Hx6AK=uR+hh8`c|9>X(H?%X_n4Ou3hg!;R*p@f zRPqCtkVkHzM0X-6a55N4gEUL^S`}j_eM#m~)CZc=w+pP3M(QophfqK@Gv1CrTHEES z^tp>=WPi}ISS<sO;=#$SmP=*49t*eBuEjABB_fw_g2x!e^C8yt075bf+%RQ`YDbt1 z%yu0)Hzy|;prVY{UYXFM?c5phor@{W4~?A;wx>;Eee~D9B}GYSCoWSYSM=R5hF83? zU{#CXj9lJ83H8JAgGnF79FI`z5B@?{#IpmVxH}WY$+DVw9MIj#Lm-V~b8dm;CBG_f zqzW&#ip@%+E$L5=M#5$Aj+KT_?oW;X91YkrH6Bdk;ruw(xLe3^IO*R-#VVj2j%I_y zhqUN?UFTYLXS2y*pSaPXH=lHs^##GD5-zmsJ90eL-ZtVA#-NQ<EC&T`LpNoR6$z?x zVK=Et<N(W?@}iDsfu3_LWO^yVZf|A6peU9FH%HO#!E~lEr#r~bxI4j`X@F=~VGT-c z53%LPxk<q!gjjpwC@-3|_!d_2AtZ2|Y}{;YT~@E8^9+2*QaA_Gt4Df8Zc9qn*@UHz z9FCNUFf^UgBnvP<hQ3%0*&+Bb#G-P05*0_zuU>eK3shy#eIy=y4NF{TETO?t^^Zxo zc>RAI-#_XP@j^r)uiKV7#2GzrHmAgu&N&&+jxC995g8sD$wa;5s|FTYWGOGRMQG@q z205Q0f?kPA3=zy>tK2q*viMEtFBz}Z#-f7m&W3^;<Gavb+=kf!sugnK&P@CLJMv}_ z%fmzCYlW<cgAWb&XQTTVLUfZHo6*qH7V{wv!%@tf1|iLYX*Ie8+t+bv1OvCk!@zx! z<ASKw>a9)%%!0)*O)Xwj^a&15pxF!ZU_{jH^9;L{X1l}i_8*L?h0dGZ^AW`sYR)^A zmh8b}2>B(V&%H{iy$wsF&nrb-jJRCeZAB+?3x$A-kj=U<BI6OOWO@&K$9*yaU~;l= zF09TWqlHkH;)+XREQ3rJY57W+$CCMQ3kgYRp1^g2)-i8SLnn5SvAt_;-gXX9JbnJM z9R8hhYwHI1-X8SK?+!_c@l?LvXe=YjNJ!~`aBwV;=h?uV`XI19h8Bxzjk#wn?<6c; z7H%QA&JqDkWYv(ReAmOt=p;J8@SyQv5OgK?li4_NiOeM?0>3Tlt5uMUmF2JmSEIRG zZxkaE4~m~q<fAd+mPoRJf@GL*f<+cAg!$5pi;!Tj1j3Wq#^QM&&Mt<6IAakE1qrlC zK&;3jsX<t3i9M4V$2>08<+7KCiFa(89g?N@f(HnxvYK|GM-H9;eSdVPKSZ4FMqQkt ztOZJVTEPazJ={FPgpdqM*{p@a(u3iNz=*-$UUVJ@VobtQgp}*OQK_LAvGV-2n+^42 zK^v;HAak{>^j)#sa%RM9H7sbm*(qW{!K&OWC}m8AsT>a66qhKQ_PQ2V)#}_=pb5x? z$0xXLd;=ahVEbvyCmlYRPH;@5%0q(J2iyr!Nsx$+d1Im=)!8bwqL2s&ce{*zvUXFv zuuN@ublguc@XV^-!%J*A>t>HzifY-eUGol(?}gHPmRPe8ynP*tCOdq+jKd!U+44CO zA#W<kP!ifKY(C=LA~YaAATIGSjklA<&cgdOR~?TlXa+@>5wpDn@!Hqj4RNPYcG|Rj zvs{eEq>&B=w|(5Ge6f#8tah-u#j4yjTnU$2cA*gs-DkGIEKN+t$eyTq&ZswA%FD_9 zR04;PT&fX4Tcr3<jHEK+R;SS*7_f(1oE1tH`BzPRV`IZ&T@V2y1;L1lEhvS=bvkk3 zdC7%gQdFjfhO(^a61ZTeLxN+$z353x6}rZ6wc5CKr9!)435Me?5%6T@(*<IEH-n*6 z3@PIy{!e5%Ouxx;9iGfwO6hnsx-%b#glnm?%SaFwl8c6$aYj@5ilApai!TkQnO3j7 zl7f<C<Rr}rHflCfjRN+|bk$q9vq__pHc^XE++X(T_l3q_EHqzZY6>HO;SaDN=#5Uf zgIuF_l@`G*n>)&PxB@z4v!u6ty3U&sAOtlPlw~TB`WM~|ZDZJnN=C>Vmw{lAY}i|I zWoM_uppv(Zanc!7hml2&q*y|er>RYhh~g0j01w9e3|Me(O66XEvQIY0cqasMRJwgK zldToAIj<ctX)$|Mv!Kz31aL1CFiwBh$~W62s|H~%$OXBedcS+8KW82btH7KN4l9aT zGRQcPRW~*yqLBQ79v+qa|MjEMNq>DfxHFh_*WtHMvA|`zjy_mFIhbDL!fdq8h|DJa z@Wad%$w)sCi-BAv3eImB25=~D*Y|1(_Xu=H_lHU*`haxlKlIFhFnUzBj5`)u&4h1J zQl8%ECNmT$YO@>~H=ijR&QJEOnRNK(W`h_lG>w+a7@U-XH>TFCy!;(!NRCHcB`8Px zqSC^e7Nm^g>3wKx+C%P)i`isF+~iG>EN3uZF5g6!qIkVg$M)D5ktij{&p@>d6DelJ z=MP+N8s}MQ7>z$sq}qcA<xY7DGZR9p04#b;+oDs<n>0X+WH1R)ai#d7R>!!J)36Gi zrIG?&nG4S<j$XC8(&$#R*(MfT5WiTx9=DlwT}A;mcepHSIuVTKE{h<3V?_GM(}_X5 zycjWglT1=lH)NZ5NHae8Hn*9^!nMGaL-?q1AT<h}^5B%OFmzmE5~|xRvf7QkWLDeH zU{&Y_NcdUaC7z!sH%VbFeS1Xm;X(Idbay^-Q@vyKI{uE5!BCkb{hfilcZHA&+l07$ zOxJoCXs-vN9p)&>U6EW}NJ!F8U$`EM(+<0eH&su%*aA<v07_SCXk3CT%&7&Ql+FT! z=@DVAcf726D{Y;W4g^TGzQv5hA{l_M=3HM*BMzYWfRuzj@G?Ikil$3Gl__JS$BkE% zI6isMn<Beha@_3Q4VU|4{t$25QrDW-@Q%6FH2^q!#N_1pXXKNmt;lqQsY5@RA2%`e z&<q=}kHdjhvl`~7*j%HTMoppEM7$8$tdNS72WQjqnjM_Rw)O~3W2wDF9404vIkC8y zbQcS0Oa*3yx}MC*lT70g{(w!M#jNJr2U|(FbLD9lw=QW29Pt~4y;>xdX70L1bY-4C zp&x<byiJBjE|QXd3V_9ipaLS^-(JcpVyp>Hw0d%WGoJe?j`EF8Wj8KX7vfA4%0tTv zOoWqVqG}~2T@++XG?_!TpD;sap@NiKh~-D5CBV!gYt8A1>g`V~m`O8R3eg!<Y^Di_ z>|U7Tm1}sjT~RAe!6&JbvfjZ!yHQ2Rh2_GD)vRfn;Hj{1F}sFqVHyc`w&3x>35q9> z4IxNO3zKR%a8p*^7<r>$E+pNx5C^`}{D%@6Fay6ruf>X$)jXw`vp=0i;uA+DbU$;O z;!T2SxJu6^8HX4g``wd|c!wzvlYLFvE9qp4=RnC!(c7XD=0T)aDpf>D%MF}+U@9r) zG73wL3M0;`GC;PnBi0wn1g*GTA-m;VgSp`_O*r3Pake@e$Qiw!5SFy!Fr{rPTRj<2 z$EC`S2SlF767<UR$l7Sm+nS)DjExGKU0?8-Gier!Y-<V&#B7>ZO9+@pLkZY5a}S+b zqgN`abB@LC9}Rj(LPiRMytLCNa9BE!R`g}-fMg_(`h(j?s1TP9bgNKfOb*UX$n|1m z3rqwei}xjq3l*N^zoiQV-pz7c>V$dv!)YRc8+uDOZ;{k)Y|FaclY8fs3sKiS7K&I; zYtAM?FGxl1fpsF_<W;ikvBDV=+^oWI0>(^ZST3Zblvsg@z_6TzWY&OmCP0{~chMf@ z#eyQHo{7sG;UQqP(jpeOF;ntWt0gM64xBi*2?2ZFO~HuSydbnSjRA`zW-jq=`Cpbj zVX<Xw-P#VvzNIpYT1;27HU@X!W*eQ$Bsay*JUy1t6g;Ld%dwKIiwU6+EOmAyCqcMP zunSf4)}n6rc;*N^H<!XB(55ot={4w)%qZQR_euFHY;?<WT<Y7%;-bc4^Ojae#8cX4 z>YlFS7|%vZO)>W5NQuFHk`$p^_<+sAo=;UmA1XSNDN?vaYsT2h@m4lEOgszgA?uvT zd>m*}n!$6XwzJIYr*$VS*}9j+?6C~Fh47x0YjrHEwVn>6@e8`M(SS}RB}siG8^)cJ zK^Wmhmi_qzMM%0|3CSN;szok2ys#MFFJeW2lLRqKyg$K>?sZu|WVRoI>>%fDmJ6Yf z%pi$Kr_Sfab7c*r(aiiB-(W|9D&687XvZK)0xhIf`FLAm6A#TU)66F7Z)>NWP)0u4 z;~n>GhC!&MgI|{j3gV6T>E=W=??hZDw_L8TGLOjAr{*VGk0a{#j*d~nj*(p|R`~}R z`ry7RWA5_CsM7JIe-DXwc_n3RwSdea$rDL7P8u)bAPa7SLffh_MXN<5q+1q`iR1B? zNVvL01Zy~a;@eizfVN4ylGkX$!IT*tV*w_}y)+Jn_jqN*CZxiG!`RYrzNT9eis>IX z(|>Q)-6z-L=`)|o=MrT#v7$Ih>sf5p2_t@8>lR!dES+9(gf|{AZZPLi>1_igcLVR* z7CpqJttNA<VT^EWEQa>>t+UKuDqpM&^5LNd351pjTo}qP$Xmvm3QhQ{H(0Dlnn@0{ z*<eeV2S=%*%%&}ox$85O$+2TGjx{9D&bey~d+DYi7h`_O)?%2uZrZ(n!s4#pkqbb2 zA^8c;i>!}jT)m58jkvjH8?(yIYgbw0h6iCQlnSNoMk`8P?9>{?>(xdn!urHF7#h}- zk|t69KUJ?$5dD(iIxM-;YMoMaIL2z->t}8|z?tS6o6TY)+S$43S~~3RPcx5ssF~-i zv0C{Cx#dmP?GPd>QcU3kf;IZV{Je4k>3hYUMhBTFaUT&t$Zlge*+VA<pt##^w_2B5 zmGotU+lopn4K9GTqRw{3B9t_|K%dlA>j|qlGSoc1+o05FT?XqE2!Az$X9GjiYVB>L z>(Xxqc)N7t(&cNJJHGzvjqpgiK28dVko-h(v1~eh(=}7QaY{RSPOGpc2LlthR96}- zm&jtwrBVb#efmMV4X)#)W{J_*yj<Cf4v~y>Z9%?~xwOgiq>qzatZIAdOTL{92R2&d zpwkr_fNN#2Z*7)qqyaV?J4&=Zf9<2`N2+z8N1Obxf)D4ug{xySmqLoQv047G2X7S8 z(1cS4dBZ7l4MwePwX=lVsqgNzu4`yq9D^6R%EQ7WLkNdSCyj}3B2$&CtL-*g+nvq5 z=>BLzvP<jhQ(cF*TP0)=4<L1&FsWH?lrE*Ox&|_>Hn>{8QCj6|IAvu7o7KIFYWe|V zJkStx(ysX72kdB&LZZ>CR&QAK)MeT;?UEg{h9mKY2@N_2tB-a%3AxDAVPrb94ZmbZ zPPzhBnC**iZKdA^q7@ZdJ2CEOrbjnf*^)w}AtA|J(uh?vOywp1bb%LcwGs=kjJFTR zEG%Tju9k|V+Y<j~)R<}E(>MS<0xRMaVz~)}GJwEvg&v*ib{6SYvwDgqWcp{jnXPK- zv9u?07FTqC(BhfVVj*w~K)lJugicqlk*4vK;j^eOjlDEZrukyEnH_fV^;X*uN<A(e zVnBI)U{#m6r#94$!|7du`_!{CC8f)a3JW26eKY$k`keMAgE2b()T%g?ilv<uP1k?{ zOE)fOl`h2XtY~6*%mfs9aB*82D=93bJq=!*A#0dr7*{zn5~9@#1Sh{~sJiA<KnevM zmDUZiVyTM&t(CJLlxwLKt>aL?#ruvC6D#DzwPYy;&l%cmKTXw@PSN$%(yH$9ATqsg zZ0hxjR9hZuM=yx5%vTOuY^%#otCqgj*sQS%CUZ?P)5$RHj=f-mJLo)wc2#*K^7qBI zyy^@9f@nl#c07FxQ6Yz!>VO7HzNVZvB}Is-NZDuVfYpkklG4spv$j_hCHz}!XRC@m z(ob8p3=?9BtE-aP*fi$AqV)D>CoDcnc^=3>r?vVJ;H#@{`djFHgJHs*f{ped2b;RY z9D4eKF>}_4$hdd13~3$CUUZLV_|84vQnt;%shh?DMYn2~9rz3+uk4}|CQFpyPvb5x zXH}1T8f76V<fybWo`VG<mG<Q<+`U4h)FNw{9x1z;At>qyZMQ-eDJj#aDG<^n*{Tg> zw6i*P-eccb<qY3q%hX90MGRX8L6dlecB_~W&TOvwUZ(M~>TR>UL&7fwf`}*!RoqZT zJEto)Hdpip6{(lSsl9Wv*vjyyTFJnJ0H*|K&fi-l7krfwX1Hbrq~Vik$3Z-0Xuy7} zo%N3FVx~v3=$T8P)2bp|84~SKbuUvDhF$TxiDt4I)LB-kv8{5ArB&6_%)gnp=!hhE zreSkc^e}T9TNZEa^QzLSYb8a$N?h~eSKKRgJ42ow7Z9Zw$c3uexJx>;l!!)pBP~Hl zKPdk#>a<RYwi|`rt<%s%FQ;LcSY!vsF0Lp(@k=Qx?!>JfRydsoCc`)q^DJl=k*ydT z?>{>+$7CAir$>WB>ejFN-}_A%I?Ml}YjFvrKMiWl**Fcm<}ZfGqFXz^qhtBA@N8 zCuF8E1h-LH(Ul;YVSLMitWZ!^1+|=}zi&R<ki<EQ(<(vK`A(KoHBiPu40x5SXF1)V zZm&`sG-xn`g#(}AJ41NYC9OA4r|fBdW>t?LGo#ceIX=x~=vcX(rIC_$tI)j?%|_Vj z$x5S&{8pgKp;`fJX@OPshE%Q+T3H37v|>`FZ>!PGQ|ys7zCzmFt<_OzIB#?XDVUD1 zaIy9<X0cSe!aNddL7XFLZ{DO-jBE_yXSP`Y?M0KB-nd?ZW1*kL_R3{U)7&E2EJdX~ zg5GQ@wOVr4uF700s@KimGa-~h!yL-2K?mj{|GCH<ot!*4EhDG#00VfL8Q&CW43QE) z8F{k$TFU-#zmEoj$r5QQ3oEAsIb+>ne>7)N%%p$VpOKi(^qaib&C{G3w9@M6Soptk zS(wOe35gPt^oPBpOgm1RvXZW=<r`V`=Ky91>>~ITUE$$oDw}L%8w(IJbuFW~(kTbw zl&Z$e%4(dFRTVFi3A>zaS?qpVxPh>3D8<CQlg%KchrQ6c&d{&85sOB$$eENTxc&Oo zG#{D8&0LawUJX*E*0)wjZKgtcJtP#!1TZz8LfSu>LCrszxk}L6sV49x0n1AfX?S>6 zS?-XMtZBShFt?gMfnwWL9zn{K`sT%O#;SjtwT3A?EsL-^=$5`G`BAFK$4Wg>b$$_+ zzjxmz{nNKC+I6IK`N)I8^XVF1QDt<5j~=ofrHeJv0O;C{%PW}~3t2OH4+(WPL1pGJ z4N#XdS_K};DjKGND|(+UMGZZgan^Nd&+8x?m!mH4(4v-74?+dg*A>kRwaTnwC@quf z8!J>dPimDxv7IZvNTXFKofd?_%bJMFbQC<~;bd77Q|Xd6VM{2KSsH&2j{GdsjH~=7 zZQ^XD86UE9hiL##;byLxA1-!W+A44QuB2;9HLmuY)F+vVjmYiDa2i(9+^4a7tFof3 zOjspCEB&@vu)zbSC>hqN=Dy9O@Is`$+>;E!i`C643nS7n7A|!9vgr)Hh2p$v((TMo zry7Uz8Nb`gB&|9C@}Dz<tNI!IP5Ik=WC}77FY`O;OYkw&%w808rLP$)YyL6onQa3h zi<0Swl@VA-|H>U3g_X3DLC*AyRiHG6G!^%>TM@!a3Qy}%)7UmP8>eIijo;H11uxTt z8QB$YRaY_IrN~%9+s4hbUMWRPiCIZXE16qt@m^KBmL`C*6k}krmuPX-TTs0WzMj!& z_K=yObX(GJnd%a%!f6+`vj`X$Ic>63Us76=*@II=9jPm2^}TV|1}jBYb)Cx%XW7g$ z+scP^N-m7XVQ`CgQ#6F<iIEynv4d4*ZNqL^TP0u!NB5?giCIq`Y)<FKDUrD)kmRMV zE)wv8lDHK|)vp0bp}`Qjd3%%AH0j;btI>derezYoqHyI?t@`)MJ)GjX*Z{3Yl#3p( z$eB_b(w!?epdzE+)AGLKbT<%?tTi!dT~R{;nGCyO0hB^IwMF)6aIfU)^}8!<+L$7) zTFwA-LYT$JWn)ii!ugaL%k5#TLN>E$E3$c6n598U<g}w?3OS#Ia~WV;W_Zf&Wp;-B zPcl;!ps}i;A(dh%MD(pD-<`9wLQS8V$aKo*9upn$b~Zv$46^e*7NTbo(a2eiL~?XU za7DbxQp0(6*3j}VS-E$@?H61)ZEajb+=Qr|lE;%{SH2OkhKyH=Y+pP4S(kO<WK}k$ z#ZH@WM;x>40c2gZ*c7W#sNG5rr7W9M+o>UxS>0Yq8F))&BfC>B5w4#CthM5}B41gw z2nbdtk>u>B;pxH5sDiKZ%vwz3-y3S3lAbU&N2V1Q3~g5GWj0;ZhLQi0zh|}MWjNNR z4^LPO7p-X}sr?>>Gldg;IAD$KAo^>CjmkxR#(fQ6_b6IBcsS%i_seSjHFh6a<AsH( zQVPjJ%)LqfSoPh>m^Vz;OlH>XwWqa13fvmm?mKIJc4pMJZ}AZ?O)g5&T8~Lzwp=}M zKcDJx6Odh2hteGH$>_f|gYDq#Gyk^6eh;>5$eP=mpvyw`+hhfNaXjDGn)^lBw($s6 z_xH8j(U<J41b^-}rSk<S73z4{fvo=*zkKOT+rnxS<><<1&gHLAU>EJh5k~0)Tw!ZX ztG7+PZEl=j)Z(iJ{+y3MXvbx%cSOA->V1iFxUXOR=}STEHg5IKf7iMEc@X$1>grj3 ztd92W{5Jly^Re*z%4g5zze3?1s^E`sDz<v_%{ND?_uH?X%l|fw@KuLJySlEwHh+Ka z>plJ5=kia};CZek+oc$34^n;0M9*BIp1xl69Oub?<S8HX7wR2Q?|^zg=<BI}s;_!; z{)8+0&_;hoy+5PgyEzX>fIH?wsQ2{ufFBRM96HH8)zjZ!OVs=1_nymtlEL{yKJO3? z(N4!__zUnFfBRg%@pbG!Z+F~(q22HG_0->2srM@NeqV(5$9ezzef$cZ>K*)D`pYMj z!k(|H65zegd8qe*dJm}gmsP<Zq252Bta@tdXMg-${%5HtdW9p@`-fZz^?sguKmQi> z{z;<VFH!H8s25-eW;p*j&NUWl_si7#GWEX9x%fbb>Isj3!Cz?i^KYEXe;(q6y=u8- z^;B2ShI&6ny&t3AO;zxR>gexR`3v=;pE#F~5I}u8Asnis<2U(J`@-)r>ODriaF0ui z^MB8|;Dtl={%t7m8}9wb#JO(&JL<ix6Muwy|ATTk*R5}4quh?Kqn~i^94Fyi*T>ZR znIoOKBiwr;Q||-c&G)+kk*ON$y)RSm=h(3LZ)t%15$ZjYski<8d^bMaq;8<mM}0lX ztd3v*0s3*no$G%+qQ4DaPuFyO<oDC#t2#QK<WFM}jso=>;U*WuseUCJ;rRCdtnVT5 zjS(*JjUmHNvMHUBgm?Vk`94`do_hfvrz?l_v@Yg+E$L%db}dWqy0U9Q`qh<P1Ja|e z>;^*m(v=;-r59ZprNSZo=gRgINzb{mdg1h$EAyQP4(TmdmIjO>=_glqoGv}&%1&WP z-?;MQ`Z2xY%I~zY^oJ`uttLI;${Lj@l0I-{J~zc7xp!q}$t2&d>@=0+*p*Ro9FkX8 z{(38$TymE48d~z_?k6Q*_fZ;&BbU;wx{r`0j^&Qs?qV+DBXNl%7x5*?#F5jNxDrP$ zI-e*B{<mbx@6VJ!kSTxAIOa7!9%s=!j)Jb*x#*AHM2t-j>&hAKKErRI@0sYS=o;lf z-!pg$Wy*O^pEJ;_LRs_@|DHipN_YHJqP;tm|1eR`wtt`dU-ILn_Ftp?3ybno(f13U z=Z7=wnEu0*llc4u<@27O)W2V(91ivGUsDKssq6k%D2M&iRsIdiVZU^he~WV1A6@0& zp&a%^SNT6t4*Q_1{NE^tz0XyClXBSeT;;EYFy}w#_(_`oamrP8m8TlVW6`@PKm7yC z@;6X^K2d%z<xeKc@2C95MEOIMznUmtp!^4k@+HcDk|<xL{AQy3G0Kl7@Ohc?Hzvwu z${$LUKTi3%M7d7+<wUtn`N>3oZ&411#-m3$$mf%I633LK52UG{j2@4szWzVvSJM66 zzAQ;p`3EV7ec_*iem_Dv=!YlH{?VI1L;0NN&nM5mn~x(xW#CKmss2|e>-cH@1n>8J zS(2#oo0P*I@lTy){Je4>b6xk}Ls|YDr<C6weY3ys@aK~@nR3yWKWz1}NtA<r6h7bX z?|;PK*GHXo{3HI<-mAX6?#r)JPWJbwDE~a)W~<x%$D&`L9Q4-{(EHy|4*TS*y<erQ z<1@ZL|CMqQ-kZAb>31Iae!DkXe3j4e`6kLipFb77BYMBTFaKWnUZI?Xx9#sg>+gTN zFMrjSkA3+Zzk2>V`Ay=l&rg1b=P!-N7bpk){Uo1M`)SHgczo60FH+X=5`TjCWnY$F zQu()hInejNP!8W95Pkl{-w*VD7XzE@&tK9&bA<7}#>s05{@wETYo5NhR9@is?p z7mV}#eVTF-{^v9IBfd#=%x)=cddPGj%awZ*-t3$o9<H%bQEb~t>BZGJrc~0VQj5~Z z*#z7d#%s^9bM(nr+ln2mX>Z<xQGEM&w9lK}7Tm^NJ}Klv;Dh<eiFRQ$(CnqMgd7T+ zyj8~AbpDo_3Fk|w7C9SK+kkHdw(fm5+jP!0;3b#MzCmnAVEg|p_dHAO5xLxzX{~zY zqtROR>NWm*;gbG)Ud1aqyQ;Ix`tLdY_oA*@JzaZ2#jC3Pf&tL8S6)mk$&5?4c>7FS zNHY0&bU$KedG?Kc`Q<I%n&h=rXvf+J(h`TsONkAk_{6}F4VNa9?nA##^1*bp*p$Le z=Juv@s}_ytEE4FZp!2q{wmKm9<X$FBHnW`T<iTm`j-@l)HiOCQZWF}RmA<xkO<wV= z<UW0pyg_P#mobx9^jfC&-L`37?Q)R5ndrJ>Pu?TcYIyb65ah*I?XzgCdt%!Hx(arM zZ8!#**7Q(<mddO6r&q_oQIz-AdgF&_w3s@}0GbY0pYJ~y%#zPZ)JDDJmAG$-9p1g= zOJHlzD<1P=@X>sDXKkO2owlm=4OSWQ7BZR4*!E6B4)w;UZ8(sG1eVOLC9go&VFvNV zUcip%yv%IbuobE&`sOI>Hsnr1*K4=7jgNMvIEW7IjoMTNd&k_<Hvag%xar@kFWQb? RQ&d2jieY@0x(L<qe*p4?1F!%9 literal 0 HcmV?d00001 diff --git a/lustre/tests/iam_ut.c b/lustre/tests/iam_ut.c new file mode 100644 index 0000000..db68d3e --- /dev/null +++ b/lustre/tests/iam_ut.c @@ -0,0 +1,423 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * iam_ut.c + * iam unit-tests + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <assert.h> + +#include <sys/types.h> + +#ifdef HAVE_ENDIAN_H +#include <endian.h> +#endif + +#include <libcfs/libcfs.h> + +enum { + /* + * Maximal format name length. + */ + DX_FMT_NAME_LEN = 16 +}; + +struct iam_uapi_info { + __u16 iui_keysize; + __u16 iui_recsize; + __u16 iui_ptrsize; + __u16 iui_height; + char iui_fmt_name[DX_FMT_NAME_LEN]; +}; + +struct iam_uapi_op { + void *iul_key; + void *iul_rec; +}; + +struct iam_uapi_it { + struct iam_uapi_op iui_op; + __u16 iui_state; +}; + +enum iam_ioctl_cmd { + IAM_IOC_INIT = _IOW('i', 1, struct iam_uapi_info), + IAM_IOC_GETINFO = _IOR('i', 2, struct iam_uapi_info), + IAM_IOC_INSERT = _IOR('i', 3, struct iam_uapi_op), + IAM_IOC_LOOKUP = _IOWR('i', 4, struct iam_uapi_op), + IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op), + IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it), + IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it), + IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it), + + IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long) +}; + +static void usage(void) +{ + printf("usage: iam_ut [-v] [-h] file\n"); +} + +static int doop(int fd, const void *key, const void *rec, + int cmd, const char *name) +{ + int result; + + struct iam_uapi_op op = { + .iul_key = key, + .iul_rec = rec + }; + result = ioctl(fd, cmd, &op); + if (result != 0) + fprintf(stderr, "ioctl(%s): %i/%i (%m)\n", name, result, errno); + return result; +} + +static int doit(int fd, const void *key, const void *rec, + int cmd, const char *name) +{ + int result; + + struct iam_uapi_it it = { + .iui_op = { + .iul_key = key, + .iul_rec = rec + }, + .iui_state = 0 + }; + + assert((void *)&it == (void *)&it.iui_op); + + result = ioctl(fd, cmd, &it); + if (result != 0) + fprintf(stderr, "ioctl(%s): %i/%i (%m)\n", name, result, errno); + else + result = it.iui_state; + return result; +} + +static int insert(int fd, const void *key, const void *rec) +{ + return doop(fd, key, rec, IAM_IOC_INSERT, "IAM_IOC_INSERT"); +} + +static int lookup(int fd, const void *key, void *rec) +{ + return doop(fd, key, rec, IAM_IOC_LOOKUP, "IAM_IOC_LOOKUP"); +} + +static int delete(int fd, const void *key, void *rec) +{ + return doop(fd, key, rec, IAM_IOC_DELETE, "IAM_IOC_DELETE"); +} + +static int rec_is_nul_term(int recsize) +{ + return recsize == 255; +} + +static void print_rec(const unsigned char *rec, int nr) +{ + int i; + + for (i = 0; i < nr; ++i) { + printf("%c", rec[i]); + if (rec_is_nul_term(nr) && rec[i] == 0) + break; + } + printf("| |"); + for (i = 0; i < nr; ++i) { + printf("%x", rec[i]); + if (rec_is_nul_term(nr) && rec[i] == 0) + break; + } + printf("\n"); +} + +enum op { + OP_TEST, + OP_INSERT, + OP_LOOKUP, + OP_DELETE, + OP_IT_START, + OP_IT_NEXT, + OP_IT_STOP +}; + +unsigned char hex2dec(unsigned char hex) +{ + if ('0' <= hex && hex <= '9') { + return hex - '0'; + } else if ('a' <= hex && hex <= 'f') { + return hex - 'a' + 10; + } else if ('A' <= hex && hex <= 'F') { + return hex - 'A' + 10; + } else { + fprintf(stderr, "Wrong hex digit '%c'\n", hex); + exit(1); + } +} + +unsigned char *packdigit(unsigned char *number) +{ + unsigned char *area; + unsigned char *scan; + + area = calloc(strlen(number) / 2 + 2, sizeof area[0]); + if (area != NULL) { + for (scan = area; *number; number += 2, scan++) + *scan = (hex2dec(number[0]) << 4) | hex2dec(number[1]); + } + return area; +} + +int main(int argc, char **argv) +{ + int i; + int rc; + int opt; + int keysize; + int recsize; + int N = 0x10000; + int verbose = 0; + int doinit = 1; + int keynul = 1; + int recnul = 1; + + void *(*copier)(void *, void *, size_t); + + enum op op; + + char *key; + char *rec; + + char *key_opt; + char *rec_opt; + + struct iam_uapi_info ua; + + setbuf(stdout, NULL); + setbuf(stderr, NULL); + + key_opt = NULL; + rec_opt = NULL; + + op = OP_TEST; + + do { + opt = getopt(argc, argv, "vilk:K:N:r:R:dsSnP:"); + switch (opt) { + case 'v': + verbose++; + case -1: + break; + case 'K': + key_opt = packdigit(optarg); + keynul = 0; + break; + case 'k': + key_opt = optarg; + break; + case 'N': + N = atoi(optarg); + break; + case 'R': + rec_opt = packdigit(optarg); + recnul = 0; + break; + case 'r': + rec_opt = optarg; + break; + case 'i': + op = OP_INSERT; + break; + case 'l': + op = OP_LOOKUP; + break; + case 'd': + op = OP_DELETE; + break; + case 's': + op = OP_IT_START; + break; + case 'S': + op = OP_IT_STOP; + doinit = 0; + break; + case 'n': + op = OP_IT_NEXT; + doinit = 0; + break; + case 'P': { + unsigned long mode; + + mode = strtoul(optarg, NULL, 0); + rc = ioctl(0, IAM_IOC_POLYMORPH, mode); + if (rc == -1) + perror("IAM_IOC_POLYMORPH"); + return 0; + } + case '?': + default: + fprintf(stderr, "Unable to parse options."); + case 'h': + usage(); + return 0; + } + } while (opt != -1); + + if (doinit) { + rc = ioctl(0, IAM_IOC_INIT, &ua); + if (rc != 0) { + fprintf(stderr, "ioctl(IAM_IOC_INIT): %i (%m)\n", rc); + return 1; + } + } + rc = ioctl(0, IAM_IOC_GETINFO, &ua); + if (rc != 0) { + fprintf(stderr, "ioctl(IAM_IOC_GETATTR): %i (%m)\n", rc); + return 1; + } + + keysize = ua.iui_keysize; + recsize = ua.iui_recsize; + if (verbose > 0) + printf("keysize: %i, recsize: %i, ptrsize: %i, " + "height: %i, name: %s\n", + keysize, recsize, ua.iui_ptrsize, + ua.iui_height, ua.iui_fmt_name); + + key = calloc(keysize + 1, sizeof key[0]); + rec = calloc(recsize + 1, sizeof rec[0]); + + if (key == NULL || rec == NULL) { + fprintf(stderr, "cannot allocate memory\n"); + rc = 1; + goto out; + } + + copier = keynul ? &strncpy : &memcpy; + copier(key, key_opt ? : "RIVERRUN", keysize + 1); + if (keynul == 0) { + free(key_opt); + key_opt = NULL; + } + copier = recnul ? &strncpy : &memcpy; + copier(rec, rec_opt ? : "PALEFIRE", recsize + 1); + if (recnul == 0) { + free(rec_opt); + rec_opt = NULL; + } + + if (op == OP_INSERT) { + rc = doop(0, key, rec, IAM_IOC_INSERT, "IAM_IOC_INSERT"); + goto out; + } else if (op == OP_DELETE) { + rc = doop(0, key, rec, IAM_IOC_DELETE, "IAM_IOC_DELETE"); + goto out; + } else if (op == OP_LOOKUP) { + rc = doop(0, key, rec, IAM_IOC_LOOKUP, "IAM_IOC_LOOKUP"); + if (rc == 0) + print_rec(rec, recsize); + goto out; + } else if (op == OP_IT_START) { + rc = doop(0, key, rec, IAM_IOC_IT_START, "IAM_IOC_IT_START"); + if (rc == 0) { + print_rec(key, keysize); + print_rec(rec, recsize); + } + goto out; + } else if (op == OP_IT_STOP) { + rc = doop(0, key, rec, IAM_IOC_IT_STOP, "IAM_IOC_IT_STOP"); + goto out; + } else if (op == OP_IT_NEXT) { + rc = doop(0, key, rec, IAM_IOC_IT_NEXT, "IAM_IOC_IT_NEXT"); + if (rc == 0) { + print_rec(key, keysize); + print_rec(rec, recsize); + } + goto out; + } + + rc = insert(0, key, rec); + if (rc != 0) { + rc = 1; + goto out; + } + + rc = insert(0, "DAEDALUS", "FINNEGAN"); + if (rc != 0) { + rc = 1; + goto out; + } + + rc = insert(0, "DAEDALUS", "FINNEGAN"); + if (errno != EEXIST) { + if (rc == 0) + fprintf(stderr, "Duplicate key not detected!\n"); + if (rc != 0) { + rc = 1; + goto out; + } + } + + rc = lookup(0, "RIVERRUN", rec); + if (rc != 0) { + rc = 1; + goto out; + } + + print_rec(rec, recsize); + + for (i = 0; i < N; ++i) { + memset(key, 0, keysize + 1); + memset(rec, 0, recsize + 1); + snprintf(key, keysize + 1, "y-%x-x", i); + snprintf(rec, recsize + 1, "p-%x-q", 1000 - i); + rc = insert(0, key, rec); + if (rc != 0) { + rc = 1; + goto out; + } + if (verbose > 1) + printf("key %#x inserted\n", i); + } + + rc = 0; + +out: + if (key) { + free(key); + } + if (rec) { + free(rec); + } + return rc; +} diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index 86c46ac..47b2933 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -17,7 +17,7 @@ CLEANUP=${CLEANUP:-"cleanup"} build_test_filter -assert_env mds_HOST MDS_MKFS_OPTS MDSDEV +assert_env mds_HOST MDS_MKFS_OPTS assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT assert_env LIVE_CLIENT FSNAME @@ -179,7 +179,7 @@ fi echo "Starting Test 17 at `date`" test_0() { - facet_failover mds + facet_failover $SINGLEMDS echo "Waiting for df pid: $DFPID" wait $DFPID || { echo "df returned $?" && return 1; } @@ -207,12 +207,12 @@ test_2() { echo "Verify Lustre filesystem is up and running" client_df - shutdown_facet mds - reboot_facet mds + shutdown_facet $SINGLEMDS + reboot_facet $SINGLEMDS # prepare for MDS failover - change_active mds - reboot_facet mds + change_active $SINGLEMDS + reboot_facet $SINGLEMDS client_df & DFPID=$! @@ -225,8 +225,8 @@ test_2() { wait_for ost1 start_ost 1 || return 2 - wait_for mds - start mds $MDSDEV $MDS_MOUNT_OPTS || return $? + wait_for $SINGLEMDS + start $SINGLEMDS `mdsdevname 1` $MDS_MOUNT_OPTS || return $? #Check FS wait $DFPID @@ -245,7 +245,7 @@ test_3() { echo "Verify Lustre filesystem is up and running" #MDS Portion - facet_failover mds + facet_failover $SINGLEMDS wait $DFPID || echo df failed: $? #Check FS @@ -283,12 +283,12 @@ test_4() { sleep 5 #MDS Portion - shutdown_facet mds - reboot_facet mds + shutdown_facet $SINGLEMDS + reboot_facet $SINGLEMDS # prepare for MDS failover - change_active mds - reboot_facet mds + change_active $SINGLEMDS + reboot_facet $SINGLEMDS client_df & DFPIDB=$! @@ -300,8 +300,8 @@ test_4() { wait_for ost1 start_ost 1 - wait_for mds - start mds $MDSDEV $MDS_MOUNT_OPTS + wait_for $SINGLEMDS + start $SINGLEMDS `mdsdevname 1` $MDS_MOUNT_OPTS #Check FS wait $DFPIDA @@ -433,7 +433,7 @@ test_7() { client_rm testfile #MDS Portion - facet_failover mds + facet_failover $SINGLEMDS #Check FS echo "Test Lustre stability after MDS failover" @@ -573,5 +573,3 @@ run_test 10 "Running Availability for 6 hours..." equals_msg "Done, cleaning up" $CLEANUP -echo "$0: completed" - diff --git a/lustre/tests/krb5_login.sh b/lustre/tests/krb5_login.sh new file mode 100755 index 0000000..e80ae7c2 --- /dev/null +++ b/lustre/tests/krb5_login.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +# +# nothing need for root +# +if [ $UID -eq 0 ]; then + exit 0 +fi + +if [ -z "$KRB5DIR" ]; then + KRB5DIR=/usr/kerberos +fi + +$KRB5DIR/bin/klist -5 -s +invalid=$? + +if [ $invalid -eq 0 ]; then + exit 0 +fi + +echo "***** refresh Kerberos V5 TGT for uid $UID *****" +if [ -z "$GSS_PASS" ]; then + $KRB5DIR/bin/kinit +else + expect <<EOF +set timeout 30 + +log_user 1 + +set spawnid [spawn /bin/bash] +send "export PS1=\"user@host $ \" \r" +expect { + timeout {puts "timeout" ;exit 1} + "user@host $ " +} + +send "$KRB5DIR/bin/kinit\r" +expect { + timeout {puts "timeout" ;exit 1} + "Password for " +} + +send "$GSS_PASS\r" +expect { + timeout {puts "timeout" ;exit 1} + "user@host $ " +} + +exit 0 +EOF +fi +ret=$? +exit $ret diff --git a/lustre/tests/lfscktest.sh b/lustre/tests/lfscktest.sh index 0b18e3e..7aaf964 100755 --- a/lustre/tests/lfscktest.sh +++ b/lustre/tests/lfscktest.sh @@ -15,6 +15,8 @@ GETFATTR=${GETFATTR:-getfattr} SETFATTR=${SETFATTR:-setfattr} MAX_ERR=1 +FSTYPE=${FSTYPE:-ldiskfs} + export PATH=$LFSCK_PATH:`dirname $0`:`dirname $0`/../utils:$PATH [ -z "`which $GETFATTR`" ] && echo "$0: $GETFATTR not found" && exit 5 diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c index 8edaaae..84d0d08 100644 --- a/lustre/tests/ll_dirstripe_verify.c +++ b/lustre/tests/ll_dirstripe_verify.c @@ -74,14 +74,14 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, int i; FILE *fp; - fp = popen("\\ls -d /proc/fs/lustre/lov/*lov* | head -1", "r"); + fp = popen("\\ls -d /proc/fs/lustre/lov/*clilov* | head -1", "r"); if (!fp) { - fprintf(stderr, "open(lustre/lov/*lov*) failed: %s\n", + fprintf(stderr, "open(lustre/lov/*clilov*) failed: %s\n", strerror(errno)); return 2; } if (fscanf(fp, "%s", lov_path) < 1) { - fprintf(stderr, "read(lustre/lov/*lov*) failed: %s\n", + fprintf(stderr, "read(lustre/lov/*clilov*) failed: %s\n", strerror(errno)); pclose(fp); return 3; @@ -94,9 +94,9 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0) return 5; - stripe_count = atoi(buf); + stripe_count = (int)strtoul(buf, NULL, 10);; } else { - stripe_count = (int)lum_dir->lmm_stripe_count; + stripe_count = (signed short)lum_dir->lmm_stripe_count; } if (stripe_count == 0) stripe_count = 1; diff --git a/lustre/tests/ll_sparseness_write.c b/lustre/tests/ll_sparseness_write.c index c53ca7e..7c11096 100644 --- a/lustre/tests/ll_sparseness_write.c +++ b/lustre/tests/ll_sparseness_write.c @@ -1,7 +1,9 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <stdlib.h> #include <unistd.h> diff --git a/lustre/tests/mmap_sanity.c b/lustre/tests/mmap_sanity.c index f4d0595..cfa4cafb 100644 --- a/lustre/tests/mmap_sanity.c +++ b/lustre/tests/mmap_sanity.c @@ -403,7 +403,7 @@ static int cancel_lru_locks(char *prefix) } if (prefix) - sprintf(cmd, "ls /proc/fs/lustre/ldlm/namespaces/*/lru_size | grep -i %s", prefix); + sprintf(cmd, "ls /proc/fs/lustre/ldlm/namespaces/*-%s-*/lru_size", prefix); else sprintf(cmd, "ls /proc/fs/lustre/ldlm/namespaces/*/lru_size"); diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index 63a6f08..389e26f 100755 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -1,7 +1,9 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE /* pull in O_DIRECTORY in bits/fcntl.h */ +#endif #include <stdio.h> #include <fcntl.h> #include <string.h> @@ -15,7 +17,10 @@ #define T1 "write data before unlink\n" #define T2 "write data after unlink\n" -char buf[] = "yabba dabba doo, I'm coming for you, I live in a shoe, I don't know what to do.\n'Bigger, bigger,and bigger yet!' cried the Creator. 'You are not yet substantial enough for my boundless intents!' And ever greater and greater the object became, until all was lost 'neath its momentus bulk.\n"; +char msg[] = "yabba dabba doo, I'm coming for you, I live in a shoe, I don't know what to do.\n'Bigger, bigger,and bigger yet!' cried the Creator. 'You are not yet substantial enough for my boundless intents!' And ever greater and greater the object became, until all was lost 'neath its momentus bulk.\n"; +char *buf, *buf_align; +int bufsize = 0; +#define ALIGN 65535 char usage[] = "Usage: %s filename command-sequence\n" @@ -248,21 +253,28 @@ int main(int argc, char **argv) len = atoi(commands+1); if (len <= 0) len = 1; - while(len > 0) { - if (read(fd, &buf, - min(len,sizeof(buf))) == -1) { + if (bufsize < len) { + buf = realloc(buf, len + ALIGN); + if (buf == NULL) { save_errno = errno; - perror("read"); + perror("allocating buf for read\n"); exit(save_errno); } - len -= sizeof(buf); + bufsize = len; + buf_align = (char *)((long)(buf + ALIGN) & + ~ALIGN); } - break; - case 'S': - if (fstat(fd, &st) == -1) { - save_errno = errno; - perror("fstat"); - exit(save_errno); + while (len > 0) { + rc = read(fd, buf_align, len); + if (rc == -1) { + save_errno = errno; + perror("read"); + exit(save_errno); + } + if (rc < len) + fprintf(stderr, "short read: %u/%u\n", + rc, len); + len -= rc; } break; case 'R': @@ -276,6 +288,13 @@ int main(int argc, char **argv) exit(save_errno); } break; + case 'S': + if (fstat(fd, &st) == -1) { + save_errno = errno; + perror("fstat"); + exit(save_errno); + } + break; case 't': if (fchmod(fd, 0) == -1) { save_errno = errno; @@ -310,15 +329,29 @@ int main(int argc, char **argv) len = atoi(commands+1); if (len <= 0) len = 1; - while(len > 0) { - if ((rc = write(fd, buf, - min(len, sizeof(buf)))) - == -1) { + if (bufsize < len) { + buf = realloc(buf, len + ALIGN); + if (buf == NULL) { + save_errno = errno; + perror("allocating buf for write\n"); + exit(save_errno); + } + bufsize = len; + buf_align = (char *)((long)(buf + ALIGN) & + ~ALIGN); + strncpy(buf_align, msg, bufsize); + } + while (len > 0) { + rc = write(fd, buf_align, len); + if (rc == -1) { save_errno = errno; perror("write"); exit(save_errno); } - len -= sizeof(buf); + if (rc < len) + fprintf(stderr, "short write: %u/%u\n", + rc, len); + len -= rc; } break; case 'W': @@ -364,5 +397,8 @@ int main(int argc, char **argv) } } + if (buf) + free(buf); + return 0; } diff --git a/lustre/tests/o_directory.c b/lustre/tests/o_directory.c index d4b2c1b..b587cd0 100644 --- a/lustre/tests/o_directory.c +++ b/lustre/tests/o_directory.c @@ -3,7 +3,9 @@ */ /* for O_DIRECTORY */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <fcntl.h> #include <unistd.h> diff --git a/lustre/tests/oos.sh b/lustre/tests/oos.sh index 3da2ceb..49e5a24 100755 --- a/lustre/tests/oos.sh +++ b/lustre/tests/oos.sh @@ -27,7 +27,7 @@ ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1` MAXFREE=${MAXFREE:-$((400000 * $STRIPECOUNT))} if [ $ORIGFREE -gt $MAXFREE ]; then echo "skipping out-of-space test on $OSC" - echo "reports ${ORIGFREE}kB free, more tham MAXFREE ${MAXFREE}kB" + echo "reports ${ORIGFREE}kB free, more than MAXFREE ${MAXFREE}kB" echo "increase $MAXFREE (or reduce test fs size) to proceed" exit 0 fi @@ -54,8 +54,10 @@ sync; sleep 1 ; sync for OSC in /proc/fs/lustre/osc/*-osc-*; do AVAIL=`cat $OSC/kbytesavail` - GRANT=`cat $OSC/cur_grant_bytes` - [ $(($AVAIL - $GRANT / 1024)) -lt 400 ] && OSCFULL=full + GRANT=$((`cat $OSC/cur_grant_bytes` / 1024)) + echo -n "$(basename $OSC) avl=$AVAIL grnt=$GRANT diff=$(($AVAIL - $GRANT))" + [ $(($AVAIL - $GRANT)) -lt 400 ] && OSCFULL=full && echo -n " FULL" + echo " " done if [ -z "$OSCFULL" ]; then @@ -78,6 +80,9 @@ fi #lctl debug_daemon stop rm -f $OOS +sync; sleep 1; sync + +sync; sleep 3; sync if [ $SUCCESS -eq 1 ]; then echo "Success!" diff --git a/lustre/tests/oos2.sh b/lustre/tests/oos2.sh index 60772c4..580fad2 100644 --- a/lustre/tests/oos2.sh +++ b/lustre/tests/oos2.sh @@ -83,6 +83,7 @@ if [ "$RECORDSOUT" -ne $(($FILESIZE / 1024)) ]; then fi rm -f $OOS $OOS2 +sync; sleep 1; sync if [ $SUCCESS -eq 1 ]; then echo "Success!" diff --git a/lustre/tests/openclose.c b/lustre/tests/openclose.c index 22c859c..6ca7af7 100644 --- a/lustre/tests/openclose.c +++ b/lustre/tests/openclose.c @@ -3,7 +3,9 @@ */ /* for O_DIRECT */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdlib.h> #include <stdio.h> diff --git a/lustre/tests/opendevunlink.c b/lustre/tests/opendevunlink.c index 9335eda..15ac708 100644 --- a/lustre/tests/opendevunlink.c +++ b/lustre/tests/opendevunlink.c @@ -2,7 +2,9 @@ * vim:expandtab:shiftwidth=8:tabstop=8: */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <errno.h> diff --git a/lustre/tests/opendirunlink.c b/lustre/tests/opendirunlink.c index 5e70f7d..5f410b3 100644 --- a/lustre/tests/opendirunlink.c +++ b/lustre/tests/opendirunlink.c @@ -3,7 +3,9 @@ */ /* for O_DIRECTORY */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <stdlib.h> @@ -48,7 +50,7 @@ int main(int argc, char **argv) if (fddir1 == -1) { fprintf(stderr, "open %s fails: %s\n", dname1, strerror(errno)); - exit(1); + exit(2); } // doesn't matter if the two dirs are the same?? @@ -56,7 +58,7 @@ int main(int argc, char **argv) if (fddir2 == -1) { fprintf(stderr, "open %s fails: %s\n", dname2, strerror(errno)); - exit(1); + exit(3); } // another method @@ -74,17 +76,17 @@ int main(int argc, char **argv) if (rc) { fprintf(stderr, "unlink %s error: %s\n", dname1, strerror(errno)); - exit(1); + exit(4); } if (access(dname2, F_OK) == 0){ fprintf(stderr, "%s still exists\n", dname2); - exit(1); + exit(5); } if (access(dname1, F_OK) == 0){ fprintf(stderr, "%s still exists\n", dname1); - exit(1); + exit(6); } // fchmod the dir @@ -93,7 +95,7 @@ int main(int argc, char **argv) { fprintf(stderr, "fchmod unlinked dir fails %s\n", strerror(errno)); - exit(1); + exit(7); } // fstat two dirs to check if they are the same @@ -102,19 +104,19 @@ int main(int argc, char **argv) { fprintf(stderr, "fstat unlinked dir %s fails %s\n", dname1, strerror(errno)); - exit(1); + exit(8); } rc = fstat(fddir2, &st2); if (rc == -1) { fprintf(stderr, "fstat dir %s fails %s\n", dname2, strerror(errno)); - exit(1); + exit(9); } if (st1.st_mode != st2.st_mode) { // can we do this? fprintf(stderr, "fstat different value on %s and %s\n", dname1, dname2); - exit(1); + exit(10); } fprintf(stderr, "Ok, everything goes well.\n"); diff --git a/lustre/tests/openfile.c b/lustre/tests/openfile.c index 40d197f..6cba76b 100644 --- a/lustre/tests/openfile.c +++ b/lustre/tests/openfile.c @@ -7,7 +7,9 @@ #endif /* for O_DIRECTORY and O_DIRECT */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <sys/types.h> diff --git a/lustre/tests/openfilleddirunlink.c b/lustre/tests/openfilleddirunlink.c index 6c7707f..9b07c64 100644 --- a/lustre/tests/openfilleddirunlink.c +++ b/lustre/tests/openfilleddirunlink.c @@ -3,7 +3,9 @@ */ /* for O_DIRECTORY */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <stdlib.h> diff --git a/lustre/tests/qos.sh b/lustre/tests/qos.sh new file mode 100644 index 0000000..db33465 --- /dev/null +++ b/lustre/tests/qos.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +set -e + +export PATH=`dirname $0`/../utils:$PATH + +LFS=${LFS:-lfs} +LCTL=${LCTL:-lctl} +MOUNT=${MOUNT:-/mnt/lustre} +MAXAGE=${MAXAGE:-1} + +QOSFILE=$MOUNT/qos_file +TAB='--' + +echo "remove all files on $MOUNT..." +rm -fr $MOUNT/* +sleep 1 # to ensure we get up-to-date statfs info + +set_qos() { + for i in `ls /proc/fs/lustre/lov/*/qos_threshold`; do + echo $(($1/1024)) > $i + done + for i in `ls /proc/fs/lustre/lov/*/qos_maxage`; do + echo $2 > $i + done +} + +# assume all osts has same free space +OSTCOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1` +TOTALAVAIL=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1` +SINGLEAVAIL=$(($TOTALAVAIL/$OSTCOUNT)) +MINFREE=$((1024 * 4)) # 4M +TOTALFFREE=`cat /proc/fs/lustre/llite/*/filesfree | head -n 1` + +if [ $SINGLEAVAIL -lt $MINFREE ]; then + echo "ERROR: single ost free size($SINGLEAVAIL kb) is too low!" + exit 1; +fi +if [ $OSTCOUNT -lt 3 ]; then + echo "WARN: ost count($OSTCOUNT) must be greater than 2!" + exit 0; +fi + +qos_test_1() { + echo "[qos test 1]: creation skip almost full OST (avail space < threshold)" + + # set qos_threshold as half ost size + THRESHOLD=$(($SINGLEAVAIL/2)) + set_qos $THRESHOLD $MAXAGE + + # set stripe number to 1 + $LFS setstripe $QOSFILE 65536 -1 1 + FULLOST=`$LFS getstripe -q $QOSFILE | awk '/\s*\d*/ {print $1}'` + + # floodfill the FULLOST + echo "$TAB fill the OST $FULLOST to almost fullness..." + dd if=/dev/zero of=$QOSFILE count=$(($SINGLEAVAIL - $THRESHOLD + 1500)) bs=1k > /dev/null 2>&1 || return 1 + echo "$TAB done" + + sleep $(($MAXAGE * 2)) + echo "$TAB create 10 files with 1 stripe" + for i in `seq 10`; do + rm -f $MOUNT/file-$i + $LFS setstripe $MOUNT/file-$i 65536 -1 1 + idx=`$LFS getstripe -q $MOUNT/file-$i | awk '/\s*\d*/ {print $1}'` + if [ $idx -eq $FULLOST ]; then + echo "$TAB ERROR: create object on full OST $FULLOST" + return 1 + fi + done + echo "$TAB no object created on OST $FULLOST" + + # cleanup + for i in `seq 10`; do + rm -f $MOUNT/file-$i + done + rm -f $QOSFILE + # set threshold and maxage to normal value + set_qos 10240 1 + + sleep 1 + return 0 +} + +qos_test_2 () { + echo "[qos test 2]: creation balancing over all OSTs by free space" + + if [ $OSTCOUNT -lt 3 ]; then + echo "$TAB WARN: OST count < 3, test skipped" + return 0 + fi + + WADSZ=$(($SINGLEAVAIL * 3 / 4)) + TOTALSZ=$(($WADSZ * $OSTCOUNT - 1)) + + # fill all OST 0 to 3/4 fulness + $LFS setstripe $QOSFILE 65536 0 1 + echo "$TAB fill the OST 0 to 3/4 fulness..." + dd if=/dev/zero of=$QOSFILE count=$WADSZ bs=1k > /dev/null 2>&1 || return 1 + echo "$TAB done" + + # write 2 stripe files to fill up other OSTs + LOOPCNT=500 + echo "$TAB create $LOOPCNT files with 2 stripe..." + for i in `seq $LOOPCNT`; do + rm -f $MOUNT/file-$i + $LFS setstripe $MOUNT/file-$i 65536 -1 2 + done + echo "$TAB done" + + # the objects created on OST 0 should be 1/4 of on other OSTs' + CNT0=`$LFS getstripe -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c 0` + CNT0=$(($CNT0 - 1)) + echo "$TAB object created on OST 0: $CNT0" + + # the object count of other osts must be greater than 2 times + CNT0=$(($CNT0 * 2)) + for i in `seq $(($OSTCOUNT - 1))`; do + CNT=`$LFS getstripe -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c $i` + echo "$TAB object created on OST $i: $CNT" + if [ $CNT0 -gt $CNT ] ; then + echo "$TAB ERROR: too much objects created on OST 0" + return 1 + fi + done + echo "$TAB objects created on OST 0 is about 1/4 of others'" + + # cleanup + for i in `seq $LOOPCNT`; do + rm -f $MOUNT/file-$i + done + rm -f $QOSFILE + return 0 +} + + +# run tests +for j in `seq 2`; do + qos_test_$j + [ $? -ne 0 ] && exit 1 +done +exit 0 diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index bf472dd..0cdca40 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1,10 +1,9 @@ - #!/bin/bash set -e -# bug 11190 5494 7288 5493 -ALWAYS_EXCEPT="19b 24 27 52 $RECOVERY_SMALL_EXCEPT" +# bug 5494 7288 5493 +ALWAYS_EXCEPT="24 27 52 $RECOVERY_SMALL_EXCEPT" PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} @@ -20,7 +19,7 @@ SETUP=${SETUP:-"setup"} CLEANUP=${CLEANUP:-"cleanup"} setup() { - formatall + [ "$REFORMAT" ] && formatall setupall } @@ -136,8 +135,8 @@ run_test 11 "wake up a thread waiting for completion after eviction (b=2460)" #b=2494 test_12(){ $LCTL mark multiop $MOUNT/$tfile OS_c - do_facet mds "sysctl -w lustre.fail_loc=0x115" - clear_failloc mds $((TIMEOUT * 2)) & + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x115" + clear_failloc $SINGLEMDS $((TIMEOUT * 2)) & multiop $MOUNT/$tfile OS_c & PID=$! #define OBD_FAIL_MDS_CLOSE_NET 0x115 @@ -154,9 +153,9 @@ test_13() { mkdir $MOUNT/readdir || return 1 touch $MOUNT/readdir/newentry || return # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE - do_facet mds "sysctl -w lustre.fail_loc=0x80000104" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000104" ls $MOUNT/readdir || return 3 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" rm -rf $MOUNT/readdir || return 4 } run_test 13 "mdc_readpage restart test (bug 1138)" @@ -166,14 +165,14 @@ test_14() { mkdir $MOUNT/readdir touch $MOUNT/readdir/newentry # OBD_FAIL_MDS_SENDPAGE|OBD_FAIL_ONCE - do_facet mds "sysctl -w lustre.fail_loc=0x80000106" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000106" ls $MOUNT/readdir || return 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" } run_test 14 "mdc_readpage resend test (bug 1138)" test_15() { - do_facet mds "sysctl -w lustre.fail_loc=0x80000128" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000128" touch $DIR/$tfile && return 1 return 0 } @@ -198,11 +197,11 @@ test_16() { stop_read_ahead #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE - do_facet ost1 sysctl -w lustre.fail_loc=0x80000504 + do_facet ost1 "sysctl -w lustre.fail_loc=0x80000504" cancel_lru_locks osc # OST bulk will time out here, client resends do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 1 - sysctl -w lustre.fail_loc=0 + do_facet ost1 sysctl -w lustre.fail_loc=0 # give recovery a chance to finish (shouldn't take long) sleep $TIMEOUT do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2 @@ -213,13 +212,13 @@ run_test 16 "timeout bulk put, don't evict client (2732)" test_17() { # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE # OST bulk will time out here, client retries - sysctl -w lustre.fail_loc=0x80000503 + do_facet ost1 sysctl -w lustre.fail_loc=0x80000503 # need to ensure we send an RPC do_facet client cp /etc/termcap $DIR/$tfile sync sleep $TIMEOUT - sysctl -w lustre.fail_loc=0 + do_facet ost1 sysctl -w lustre.fail_loc=0 do_facet client "df $DIR" # expect cmp to succeed, client resent bulk do_facet client "cmp /etc/termcap $DIR/$tfile" || return 3 @@ -242,7 +241,7 @@ test_18a() { do_facet client cp /etc/termcap $f sync - local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'` + local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | egrep -v 'MDT' | awk '{print $1}'` $LCTL --device $osc2dev deactivate || return 3 # my understanding is that there should be nothing in the page # cache after the client reconnects? @@ -338,18 +337,18 @@ test_21a() { multiop $DIR/$tdir-1/f O_c & close_pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000129" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000129" multiop $DIR/$tdir-2/f Oc & open_pid=$! sleep 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" - do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115" kill -USR1 $close_pid cancel_lru_locks mdc wait $close_pid || return 1 wait $open_pid || return 2 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" $CHECKSTAT -t file $DIR/$tdir-1/f || return 3 $CHECKSTAT -t file $DIR/$tdir-2/f || return 4 @@ -364,11 +363,11 @@ test_21b() { multiop $DIR/$tdir-1/f O_c & close_pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107" mcreate $DIR/$tdir-2/f & open_pid=$! sleep 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" kill -USR1 $close_pid cancel_lru_locks mdc @@ -387,19 +386,19 @@ test_21c() { multiop $DIR/$tdir-1/f O_c & close_pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107" mcreate $DIR/$tdir-2/f & open_pid=$! sleep 3 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" - do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115" kill -USR1 $close_pid cancel_lru_locks mdc wait $close_pid || return 1 wait $open_pid || return 2 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 @@ -413,16 +412,16 @@ test_21d() { multiop $DIR/$tdir-1/f O_c & pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000129" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000129" multiop $DIR/$tdir-2/f Oc & sleep 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" - do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122" kill -USR1 $pid cancel_lru_locks mdc wait $pid || return 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 @@ -437,10 +436,10 @@ test_21e() { multiop $DIR/$tdir-1/f O_c & pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119" touch $DIR/$tdir-2/f & sleep 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" kill -USR1 $pid cancel_lru_locks mdc @@ -459,16 +458,16 @@ test_21f() { multiop $DIR/$tdir-1/f O_c & pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119" touch $DIR/$tdir-2/f & sleep 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" - do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122" kill -USR1 $pid cancel_lru_locks mdc wait $pid || return 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 @@ -482,16 +481,16 @@ test_21g() { multiop $DIR/$tdir-1/f O_c & pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119" touch $DIR/$tdir-2/f & sleep 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" - do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115" kill -USR1 $pid cancel_lru_locks mdc wait $pid || return 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 @@ -505,17 +504,17 @@ test_21h() { multiop $DIR/$tdir-1/f O_c & pid=$! - do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107" touch $DIR/$tdir-2/f & touch_pid=$! sleep 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" - do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000122" cancel_lru_locks mdc kill -USR1 $pid wait $pid || return 1 - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" wait $touch_pid || return 2 @@ -530,7 +529,7 @@ test_22() { f1=$DIR/${tfile}-1 f2=$DIR/${tfile}-2 - do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000115" multiop $f2 Oc & close_pid=$! @@ -538,7 +537,7 @@ test_22() { multiop $f1 msu || return 1 cancel_lru_locks mdc - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" wait $close_pid || return 2 rm -rf $f2 || return 4 @@ -554,7 +553,7 @@ test_23() { #b=4561 # try the close drop_request "kill -USR1 $pid" - fail mds + fail $SINGLEMDS wait $pid || return 1 return 0 } @@ -604,9 +603,10 @@ test_26() { # bug 5921 - evict dead exports by pinger run_test 26 "evict dead exports" test_26b() { # bug 10140 - evict dead exports by pinger - zconf_mount `hostname` $MOUNT2 - MDS_FILE=$LPROC/mds/${mds_svc}/num_exports - MDS_NEXP1="`do_facet mds cat $MDS_FILE | cut -d' ' -f2`" + client_df + zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2" + MDS_FILE=$LPROC/mdt/${mds1_svc}/num_exports + MDS_NEXP1="`do_facet $SINGLEMDS cat $MDS_FILE | cut -d' ' -f2`" OST_FILE=$LPROC/obdfilter/${ost1_svc}/num_exports OST_NEXP1="`do_facet ost1 cat $OST_FILE | cut -d' ' -f2`" echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports @@ -617,7 +617,7 @@ test_26b() { # bug 10140 - evict dead exports by pinger echo Waiting for $(($TIMEOUT * 4)) secs sleep $(($TIMEOUT * 4)) OST_NEXP2="`do_facet ost1 cat $OST_FILE | cut -d' ' -f2`" - MDS_NEXP2="`do_facet mds cat $MDS_FILE | cut -d' ' -f2`" + MDS_NEXP2="`do_facet $SINGLEMDS cat $MDS_FILE | cut -d' ' -f2`" echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted from OST" [ $MDS_NEXP1 -le $MDS_NEXP2 ] && error "client not evicted from MDS" @@ -633,7 +633,7 @@ test_27() { CLIENT_PID=$! sleep 1 FAILURE_MODE="SOFT" - facet_failover mds + facet_failover $SINGLEMDS #define OBD_FAIL_OSC_SHUTDOWN 0x407 sysctl -w lustre.fail_loc=0x80000407 # need to wait for reconnect @@ -642,7 +642,7 @@ test_27() { sleep 1 echo -n . done - facet_failover mds + facet_failover $SINGLEMDS #no crashes allowed! kill -USR1 $CLIENT_PID wait $CLIENT_PID @@ -653,12 +653,12 @@ run_test 27 "fail LOV while using OSC's" test_28() { # bug 6086 - error adding new clients do_facet client mcreate $MOUNT/$tfile || return 1 drop_bl_callback "chmod 0777 $MOUNT/$tfile" ||echo "evicted as expected" - #define OBD_FAIL_MDS_ADD_CLIENT 0x12f - do_facet mds sysctl -w lustre.fail_loc=0x8000012f + #define OBD_FAIL_MDS_CLIENT_ADD 0x12f + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000012f" # fail once (evicted), reconnect fail (fail_loc), ok - df || (sleep 1; df) || (sleep 1; df) || error "reconnect failed" + df || (sleep 10; df) || (sleep 10; df) || error "reconnect failed" rm -f $MOUNT/$tfile - fail mds # verify MDS last_rcvd can be loaded + fail $SINGLEMDS # verify MDS last_rcvd can be loaded } run_test 28 "handle error adding new clients (bug 6086)" @@ -670,12 +670,12 @@ test_50() { echo writemany pid $CLIENT_PID sleep 10 FAILURE_MODE="SOFT" - fail mds + fail $SINGLEMDS # wait for client to reconnect to MDS sleep 60 - fail mds + fail $SINGLEMDS sleep 60 - fail mds + fail $SINGLEMDS # client process should see no problems even though MDS went down sleep $TIMEOUT kill -USR1 $CLIENT_PID @@ -694,7 +694,7 @@ test_51() { CLIENT_PID=$! sleep 1 FAILURE_MODE="SOFT" - facet_failover mds + facet_failover $SINGLEMDS # failover at various points during recovery SEQ="1 5 10 $(seq $TIMEOUT 5 $(($TIMEOUT+10)))" echo will failover at $SEQ @@ -702,7 +702,7 @@ test_51() { do echo failover in $i sec sleep $i - facet_failover mds + facet_failover $SINGLEMDS done # client process should see no problems even though MDS went down # and recovery was interrupted @@ -764,7 +764,7 @@ test_54() { touch $DIR2/$tfile.1 sleep 10 cat $DIR2/$tfile.missing # save transno = 0, rc != 0 into last_rcvd - fail mds + fail $SINGLEMDS umount $MOUNT2 ERROR=`dmesg | egrep "(test 54|went back in time)" | tail -n1 | grep "went back in time"` [ x"$ERROR" == x ] || error "back in time occured" @@ -833,9 +833,9 @@ run_test 55 "ost_brw_read/write drops timed-out read/write request" test_56() { # b=11277 #define OBD_FAIL_MDS_RESEND 0x136 touch $DIR/$tfile - do_facet mds sysctl -w lustre.fail_loc=0x80000136 + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000136" stat $DIR/$tfile - do_facet mds sysctl -w lustre.fail_loc=0 + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" rm -f $DIR/$tfile } run_test 56 "do not allow reconnect to busy exports" @@ -857,7 +857,7 @@ test_57() { # bug 10866 sysctl -w lustre.fail_loc=0x80000B00 zconf_umount `hostname` $DIR sysctl -w lustre.fail_loc=0x80000B00 - fail_abort mds + fail_abort $SINGLEMDS kill -9 $pid sysctl -w lustre.fail_loc=0 mount_client $DIR diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 820a97b..6db74a2 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -30,7 +30,7 @@ fi setup() { cleanup - formatall + [ "$REFORMAT" ] && formatall setupall mount_client $MOUNT2 } @@ -40,10 +40,10 @@ $SETUP test_1() { touch $MOUNT1/a - replay_barrier mds + replay_barrier $SINGLEMDS touch $MOUNT2/b - fail mds + fail $SINGLEMDS checkstat $MOUNT2/a || return 1 checkstat $MOUNT1/b || return 2 rm $MOUNT2/a $MOUNT1/b @@ -56,10 +56,10 @@ run_test 1 "|X| simple create" test_2() { - replay_barrier mds + replay_barrier $SINGLEMDS mkdir $MOUNT1/adir - fail mds + fail $SINGLEMDS checkstat $MOUNT2/adir || return 1 rmdir $MOUNT2/adir checkstat $MOUNT2/adir && return 2 @@ -68,11 +68,11 @@ test_2() { run_test 2 "|X| mkdir adir" test_3() { - replay_barrier mds + replay_barrier $SINGLEMDS mkdir $MOUNT1/adir mkdir $MOUNT2/adir/bdir - fail mds + fail $SINGLEMDS checkstat $MOUNT2/adir || return 1 checkstat $MOUNT1/adir/bdir || return 2 rmdir $MOUNT2/adir/bdir $MOUNT1/adir @@ -84,11 +84,11 @@ run_test 3 "|X| mkdir adir, mkdir adir/bdir " test_4() { mkdir $MOUNT1/adir - replay_barrier mds + replay_barrier $SINGLEMDS mkdir $MOUNT1/adir && return 1 mkdir $MOUNT2/adir/bdir - fail mds + fail $SINGLEMDS checkstat $MOUNT2/adir || return 2 checkstat $MOUNT1/adir/bdir || return 3 @@ -108,11 +108,11 @@ test_5() { # give multiop a chance to open sleep 1 rm -f $MOUNT1/a - replay_barrier mds + replay_barrier $SINGLEMDS kill -USR1 $pid wait $pid || return 1 - fail mds + fail $SINGLEMDS [ -e $MOUNT2/a ] && return 2 return 0 } @@ -128,22 +128,22 @@ test_6() { # give multiop a chance to open sleep 1 rm -f $MOUNT1/a - replay_barrier mds + replay_barrier $SINGLEMDS kill -USR1 $pid1 wait $pid1 || return 1 - fail mds + fail $SINGLEMDS kill -USR1 $pid2 wait $pid2 || return 1 [ -e $MOUNT2/a ] && return 2 return 0 } -run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2" +run_test 6 "open1, open2, unlink |X| close1 [fail $SINGLEMDS] close2" test_8() { - replay_barrier mds + replay_barrier $SINGLEMDS drop_reint_reply "mcreate $MOUNT1/$tfile" || return 1 - fail mds + fail $SINGLEMDS checkstat $MOUNT2/$tfile || return 2 rm $MOUNT1/$tfile || return 3 @@ -152,12 +152,12 @@ test_8() { run_test 8 "replay of resent request" test_9() { - replay_barrier mds + replay_barrier $SINGLEMDS mcreate $MOUNT1/$tfile-1 mcreate $MOUNT2/$tfile-2 # drop first reint reply sysctl -w lustre.fail_loc=0x80000119 - fail mds + fail $SINGLEMDS sysctl -w lustre.fail_loc=0 rm $MOUNT1/$tfile-[1,2] || return 1 @@ -168,12 +168,12 @@ run_test 9 "resending a replayed create" test_10() { mcreate $MOUNT1/$tfile-1 - replay_barrier mds + replay_barrier $SINGLEMDS munlink $MOUNT1/$tfile-1 mcreate $MOUNT2/$tfile-2 # drop first reint reply sysctl -w lustre.fail_loc=0x80000119 - fail mds + fail $SINGLEMDS sysctl -w lustre.fail_loc=0 checkstat $MOUNT1/$tfile-1 && return 1 @@ -185,7 +185,7 @@ test_10() { run_test 10 "resending a replayed unlink" test_11() { - replay_barrier mds + replay_barrier $SINGLEMDS mcreate $MOUNT1/$tfile-1 mcreate $MOUNT2/$tfile-2 mcreate $MOUNT1/$tfile-3 @@ -193,15 +193,11 @@ test_11() { mcreate $MOUNT1/$tfile-5 # drop all reint replies for a while sysctl -w lustre.fail_loc=0x0119 - facet_failover mds + facet_failover $SINGLEMDS #sleep for while, let both clients reconnect and timeout sleep $((TIMEOUT * 2)) sysctl -w lustre.fail_loc=0 - while [ -z "$(ls $MOUNT1/$tfile-[1-5] 2>/dev/null)" ]; do - sleep 5 - echo -n "." - done - ls $MOUNT1/$tfile-[1-5] + rm $MOUNT1/$tfile-[1-5] || return 1 return 0 @@ -209,7 +205,7 @@ test_11() { run_test 11 "both clients timeout during replay" test_12() { - replay_barrier mds + replay_barrier $SINGLEMDS multiop $DIR/$tfile mo_c & MULTIPID=$! @@ -217,7 +213,7 @@ test_12() { #define OBD_FAIL_LDLM_ENQUEUE 0x302 sysctl -w lustre.fail_loc=0x80000302 - facet_failover mds + facet_failover $SINGLEMDS df $MOUNT || return 1 sysctl -w lustre.fail_loc=0 @@ -236,14 +232,14 @@ test_13() { MULTIPID=$! sleep 5 - replay_barrier mds + replay_barrier $SINGLEMDS kill -USR1 $MULTIPID || return 3 wait $MULTIPID || return 4 # drop close sysctl -w lustre.fail_loc=0x80000115 - facet_failover mds + facet_failover $SINGLEMDS df $MOUNT || return 1 sysctl -w lustre.fail_loc=0 @@ -256,13 +252,13 @@ test_13() { run_test 13 "close resend timeout" test_14() { - replay_barrier mds + replay_barrier $SINGLEMDS createmany -o $MOUNT1/$tfile- 25 createmany -o $MOUNT2/$tfile-2- 1 createmany -o $MOUNT1/$tfile-3- 25 umount $MOUNT2 - facet_failover mds + facet_failover $SINGLEMDS # expect failover to fail df $MOUNT && return 1 sleep 1 @@ -276,12 +272,12 @@ test_14() { run_test 14 "timeouts waiting for lost client during replay" test_15() { - replay_barrier mds + replay_barrier $SINGLEMDS createmany -o $MOUNT1/$tfile- 25 createmany -o $MOUNT2/$tfile-2- 1 umount $MOUNT2 - facet_failover mds + facet_failover $SINGLEMDS df $MOUNT || return 1 unlinkmany $MOUNT1/$tfile- 25 || return 2 @@ -296,11 +292,11 @@ test_15a() { local ost_last_id="" local osc_last_id="" - replay_barrier mds + replay_barrier $SINGLEMDS echo "data" > "$MOUNT2/${tfile}-m2" umount $MOUNT2 - facet_failover mds + facet_failover $SINGLEMDS df $MOUNT || return 1 ost_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id` @@ -342,12 +338,12 @@ test_15a() { #CROW run_test 15a "OST clear orphans - synchronize ids on MDS and OST" test_15b() { - replay_barrier mds + replay_barrier $SINGLEMDS echo "data" > "$MOUNT2/${tfile}-m2" umount $MOUNT2 do_facet ost1 "sysctl -w lustre.fail_loc=0x80000802" - facet_failover mds + facet_failover $SINGLEMDS df $MOUNT || return 1 do_facet ost1 "sysctl -w lustre.fail_loc=0" @@ -358,13 +354,13 @@ test_15b() { #CROW run_test 15b "multiple delayed OST clear orphans" test_15c() { - replay_barrier mds + replay_barrier $SINGLEMDS for ((i = 0; i < 2000; i++)); do echo "data" > "$MOUNT2/${tfile}-$i" || error "create ${tfile}-$i failed" done umount $MOUNT2 - facet_failover mds + facet_failover $SINGLEMDS df $MOUNT || return 1 @@ -374,14 +370,14 @@ test_15c() { run_test 15c "remove multiple OST orphans" test_16() { - replay_barrier mds + replay_barrier $SINGLEMDS createmany -o $MOUNT1/$tfile- 25 createmany -o $MOUNT2/$tfile-2- 1 umount $MOUNT2 - facet_failover mds + facet_failover $SINGLEMDS sleep $TIMEOUT - facet_failover mds + facet_failover $SINGLEMDS df $MOUNT || return 1 unlinkmany $MOUNT1/$tfile- 25 || return 2 @@ -424,7 +420,7 @@ test_18() { # bug 3822 - evicting client with enqueued lock statmany -s $MOUNT1/$tdir/f 1 500 & OPENPID=$! NOW=`date +%s` - do_facet mds sysctl -w lustre.fail_loc=0x8000030b # hold enqueue + do_facet $SINGLEMDS sysctl -w lustre.fail_loc=0x8000030b # hold enqueue sleep 1 #define OBD_FAIL_LDLM_BL_CALLBACK 0x305 do_facet client sysctl -w lustre.fail_loc=0x80000305 # drop cb, evict @@ -438,9 +434,9 @@ test_18() { # bug 3822 - evicting client with enqueued lock run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)" test_19() { # Bug 10991 - resend of open request does not fail assertion. - replay_barrier mds + replay_barrier $SINGLEMDS drop_ldlm_reply "createmany -o $DIR/$tfile 1" || return 1 - fail mds + fail $SINGLEMDS checkstat $DIR2/${tfile}0 || return 2 rm $DIR/${tfile}0 || return 3 diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index bec28ad..e1c0535 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -49,7 +49,7 @@ test_0a() { setup() { gen_config - start mds $MDSDEV $MDS_MOUNT_OPTS + start mds1 `mdsdevname 1` $MDS_MOUNT_OPTS start ost1 `ostdevname 1` $OST_MOUNT_OPTS [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE @@ -60,6 +60,7 @@ setup() { if [ -z "`grep " $MOUNT " /proc/mounts`" ]; then zconf_mount `hostname` $MOUNT || error "mount fail" fi + sleep 5 do_facet ost1 "sysctl -w lustre.fail_loc=0" } @@ -178,7 +179,7 @@ run_test 6 "Fail OST before obd_destroy" test_7() { f=$DIR/$tfile rm -f $f - sync && sleep 2 && sync # wait for delete thread + sync && sleep 5 && sync # wait for delete thread before=`kbytesfree` dd if=/dev/urandom bs=4096 count=1280 of=$f || return 4 sync @@ -192,7 +193,7 @@ test_7() { $CHECKSTAT -t file $f && return 2 || true sync # let the delete happen - sleep 2 + sleep 5 after=`kbytesfree` log "before: $before after: $after" (( $before <= $after + 40 )) || return 3 # take OST logs into account @@ -201,5 +202,3 @@ run_test 7 "Fail OST before obd_destroy" equals_msg test complete, cleaning up $CLEANUP -echo "$0: completed" - diff --git a/lustre/tests/replay-single-lmv.sh b/lustre/tests/replay-single-lmv.sh new file mode 100755 index 0000000..fbc53c9 --- /dev/null +++ b/lustre/tests/replay-single-lmv.sh @@ -0,0 +1,113 @@ +#!/bin/sh + +set -e +#set -v + +# +# This test needs to be run on the client +# + +LUSTRE=${LUSTRE:-`dirname $0`/..} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/lmv.sh} + + +# Skip these tests +ALWAYS_EXCEPT="" +build_test_filter + +SETUP=${SETUP:-"setup"} +CLEANUP=${CLEANUP:-"stopall"} + +if [ "$ONLY" == "cleanup" ]; then + sysctl -w lnet.debug=0 || true + $CLEANUP + exit 0 +fi + +setup() { + formatall + setupall +} + +$SETUP + +if [ "$ONLY" == "setup" ]; then + exit 0 +fi + +mkdir -p $DIR + + +test_0() { + replay_barrier mds1 + fail mds1 +} +run_test 0 "empty replay" + +test_0b() { + # this test attempts to trigger a race in the precreation code, + # and must run before any other objects are created on the filesystem + fail ost1 + createmany -o $DIR/$tfile 20 || return 1 + unlinkmany $DIR/$tfile 20 || return 2 +} +run_test 0b "ensure object created after recover exists. (3284)" + +test_1a() { + mkdir $DIR/dir01 + replay_barrier mds2 + $CHECKSTAT -t dir $DIR/dir01 || return 1 + rmdir $DIR/dir01 + fail mds2 + stat $DIR/dir01 +} +run_test 1a "unlink cross-node dir (fail mds with inode)" + +test_1b() { + mkdir $DIR/dir11 + replay_barrier mds1 + $CHECKSTAT -t dir $DIR/dir11 || return 1 + rmdir $DIR/dir11 + fail mds1 + stat $DIR/dir11 +} +run_test 1b "unlink cross-node dir (fail mds with name)" + +test_2a() { + mkdir $DIR/dir21 + createmany -o $DIR/dir21/f 3000 + sleep 10 + $CHECKSTAT -t dir $DIR/dir21 || return 1 + $CHECKSTAT -t file $DIR/dir21/f1002 || return 1 + replay_barrier mds1 + rm $DIR/dir21/f1002 + fail mds1 + stat $DIR/dir21/f1002 +} +run_test 2a "unlink cross-node file (fail mds with name)" + +test_3a() { + replay_barrier mds2 + mkdir $DIR/dir3a1 + $LCTL mark "FAILOVER mds2" + fail mds2 + stat $DIR + $CHECKSTAT -t dir $DIR/dir3a1 || return 1 +} +run_test 3a "mkdir cross-node dir (fail mds with inode)" + +test_3b() { + replay_barrier mds1 + mkdir $DIR/dir3b1 + $LCTL mark "FAILOVER mds1" + fail mds1 + stat $DIR + $CHECKSTAT -t dir $DIR/dir3b1 || return 1 +} +run_test 3b "mkdir cross-node dir (fail mds with inode)" + +equals_msg test complete, cleaning up +$CLEANUP + diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 1d385f8..5eeac63 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -14,21 +14,21 @@ init_test_env $@ # Skip these tests -# bug number: 4176 -ALWAYS_EXCEPT="39 $REPLAY_SINGLE_EXCEPT" +# bug number: 2766 4176 +ALWAYS_EXCEPT="0b 39 $REPLAY_SINGLE_EXCEPT" gen_config() { rm -f $XMLCONFIG - add_mds mds --dev $MDSDEV --size $MDSSIZE + add_mds $SINGLEMDS --dev $MDSDEV --size $MDSSIZE if [ ! -z "$mdsfailover_HOST" ]; then - add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE + add_mdsfailover $SINGLEMDS --dev $MDSDEV --size $MDSSIZE fi - add_lov lov1 mds --stripe_sz $STRIPE_BYTES \ + add_lov lov1 $SINGLEMDS --stripe_sz $STRIPE_BYTES \ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 - add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE - add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE - add_client client mds --lov lov1 --path $MOUNT + add_ost ost --lov lov1 --dev `ostdevname 1` --size $OSTSIZE + add_ost ost2 --lov lov1 --dev `ostdevname 2` --size $OSTSIZE + add_client client $SINGLEMDS --lov lov1 --path $MOUNT } build_test_filter @@ -43,7 +43,7 @@ if [ "$ONLY" == "cleanup" ]; then fi setup() { - formatall + [ "$REFORMAT" ] && formatall setupall } @@ -56,8 +56,11 @@ fi mkdir -p $DIR test_0() { - replay_barrier mds - fail mds + sleep 10 + mkdir $DIR/$tfile + replay_barrier $SINGLEMDS + fail $SINGLEMDS + rmdir $DIR/$tfile } run_test 0 "empty replay" @@ -70,63 +73,88 @@ test_0b() { } run_test 0b "ensure object created after recover exists. (3284)" -test_1() { - replay_barrier mds - mcreate $DIR/$tfile - fail mds - $CHECKSTAT -t file $DIR/$tfile || return 1 - rm $DIR/$tfile +seq_set_width() +{ + local mds=$1 + local width=$2 + local file=`ls /proc/fs/lustre/seq/cli-srv-$mds-mdc-*/width` + echo $width > $file } -run_test 1 "simple create" -test_1a() { - do_facet ost1 "sysctl -w lustre.fail_loc=0" +seq_get_width() +{ + local mds=$1 + local file=`ls /proc/fs/lustre/seq/cli-srv-$mds-mdc-*/width` + cat $file +} - rm -fr $DIR/$tfile - local old_last_id=`cat $LPROC/obdfilter/*/last_id` - touch -o $DIR/$tfile 1 - sync - local new_last_id=`cat $LPROC/obdfilter/*/last_id` +# This test should pass for single-mds and multi-mds configs. +# But for different configurations it tests different things. +# +# single-mds +# ---------- +# (1) fld_create replay should happen; +# +# (2) fld_create replay should not return -EEXISTS, if it does +# this means sequence manager recovery code is buggy and allocated +# same sequence two times after recovery. +# +# multi-mds +# --------- +# (1) fld_create replay may not happen, because its home MDS is +# MDS2 which is not involved to revovery; +# +# (2) as fld_create does not happen on MDS1, it does not make any +# problem. +test_0c() { + local label=`mdsdevlabel 1` + [ -z "$label" ] && echo "No label for mds1" && return 1 + + replay_barrier $SINGLEMDS + local sw=`seq_get_width $label` - test "$old_last_id" = "$new_last_id" || { - echo "OST object create is caused by MDS" - return 1 - } + # make seq manager switch to next sequence each + # time as new fid is needed. + seq_set_width $label 1 - old_last_id=`cat $LPROC/obdfilter/*/last_id` - echo "data" > $DIR/$tfile - sync - new_last_id=`cat $LPROC/obdfilter/*/last_id` - test "$old_last_id" = "$new_last_id "&& { - echo "CROW does not work on write" - return 1 - } + # make sure that fld has created at least one new + # entry on server + touch $DIR/$tfile || return 2 + seq_set_width $label $sw - rm -fr $DIR/$tfile - -#define OBD_FAIL_OST_CROW_EIO | OBD_FAIL_ONCE - do_facet ost1 "sysctl -w lustre.fail_loc=0x80000801" - - rm -fr $DIR/1a1 - old_last_id=`cat $LPROC/obdfilter/*/last_id` - echo "data" > $DIR/1a1 - sync - new_last_id=`cat $LPROC/obdfilter/*/last_id` - test "$old_last_id" = "$new_last_id" || { - echo "CROW does work with fail_loc=0x80000801" - return 1 - } + # fail $SINGLEMDS and start recovery, replay RPCs, etc. + fail $SINGLEMDS + + # wait for recovery finish + sleep 10 + df $MOUNT - rm -fr $DIR/1a1 + # flush fld cache and dentry cache to make it lookup + # created entry instead of revalidating existent one + umount $MOUNT + zconf_mount `hostname` $MOUNT - do_facet ost1 "sysctl -w lustre.fail_loc=0" + # issue lookup which should call fld lookup which + # should fail if client did not replay fld create + # correctly and server has no fld entry + touch $DIR/$tfile || return 3 + rm $DIR/$tfile || return 4 } -#CROW run_test 1a "CROW object create (check OST last_id)" +run_test 0c "fld create" + +test_1() { + replay_barrier $SINGLEMDS + mcreate $DIR/$tfile + fail $SINGLEMDS + $CHECKSTAT -t file $DIR/$tfile || return 1 + rm $DIR/$tfile +} +run_test 1 "simple create" test_2a() { - replay_barrier mds + replay_barrier $SINGLEMDS touch $DIR/$tfile - fail mds + fail $SINGLEMDS $CHECKSTAT -t file $DIR/$tfile || return 1 rm $DIR/$tfile } @@ -134,43 +162,43 @@ run_test 2a "touch" test_2b() { ./mcreate $DIR/$tfile - replay_barrier mds + replay_barrier $SINGLEMDS touch $DIR/$tfile - fail mds + fail $SINGLEMDS $CHECKSTAT -t file $DIR/$tfile || return 1 rm $DIR/$tfile } run_test 2b "touch" test_3a() { - replay_barrier mds + replay_barrier $SINGLEMDS mcreate $DIR/$tfile o_directory $DIR/$tfile - fail mds + fail $SINGLEMDS $CHECKSTAT -t file $DIR/$tfile || return 2 rm $DIR/$tfile } run_test 3a "replay failed open(O_DIRECTORY)" test_3b() { - replay_barrier mds + replay_barrier $SINGLEMDS #define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE - do_facet mds "sysctl -w lustre.fail_loc=0x80000114" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000114" touch $DIR/$tfile - do_facet mds "sysctl -w lustre.fail_loc=0" - fail mds + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" + fail $SINGLEMDS $CHECKSTAT -t file $DIR/$tfile && return 2 return 0 } run_test 3b "replay failed open -ENOMEM" test_3c() { - replay_barrier mds + replay_barrier $SINGLEMDS #define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE - do_facet mds "sysctl -w lustre.fail_loc=0x80000128" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000128" touch $DIR/$tfile - do_facet mds "sysctl -w lustre.fail_loc=0" - fail mds + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" + fail $SINGLEMDS $CHECKSTAT -t file $DIR/$tfile && return 2 return 0 @@ -178,11 +206,11 @@ test_3c() { run_test 3c "replay failed open -ENOMEM" test_4() { - replay_barrier mds + replay_barrier $SINGLEMDS for i in `seq 10`; do echo "tag-$i" > $DIR/$tfile-$i done - fail mds + fail $SINGLEMDS for i in `seq 10`; do grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i" done @@ -190,9 +218,9 @@ test_4() { run_test 4 "|x| 10 open(O_CREAT)s" test_4b() { - replay_barrier mds + replay_barrier $SINGLEMDS rm -rf $DIR/$tfile-* - fail mds + fail $SINGLEMDS $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true } run_test 4b "|x| rm 10 files" @@ -200,11 +228,11 @@ run_test 4b "|x| rm 10 files" # The idea is to get past the first block of precreated files on both # osts, and then replay. test_5() { - replay_barrier mds + replay_barrier $SINGLEMDS for i in `seq 220`; do echo "tag-$i" > $DIR/$tfile-$i done - fail mds + fail $SINGLEMDS for i in `seq 220`; do grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i" done @@ -216,10 +244,10 @@ run_test 5 "|x| 220 open(O_CREAT)" test_6() { - replay_barrier mds + replay_barrier $SINGLEMDS mkdir $DIR/$tdir mcreate $DIR/$tdir/$tfile - fail mds + fail $SINGLEMDS $CHECKSTAT -t dir $DIR/$tdir || return 1 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2 sleep 2 @@ -228,18 +256,18 @@ test_6() { run_test 6 "mkdir + contained create" test_6b() { - replay_barrier mds + replay_barrier $SINGLEMDS rm -rf $DIR/$tdir - fail mds + fail $SINGLEMDS $CHECKSTAT -t dir $DIR/$tdir && return 1 || true } run_test 6b "|X| rmdir" test_7() { mkdir $DIR/$tdir - replay_barrier mds + replay_barrier $SINGLEMDS mcreate $DIR/$tdir/$tfile - fail mds + fail $SINGLEMDS $CHECKSTAT -t dir $DIR/$tdir || return 1 $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2 rm -fr $DIR/$tdir @@ -247,11 +275,13 @@ test_7() { run_test 7 "mkdir |X| contained create" test_8() { - replay_barrier mds + # make sure no side-effect from previous test. + rm -f $DIR/$tfile + replay_barrier $SINGLEMDS multiop $DIR/$tfile mo_c & MULTIPID=$! sleep 1 - fail mds + fail $SINGLEMDS ls $DIR/$tfile $CHECKSTAT -t file $DIR/$tfile || return 1 kill -USR1 $MULTIPID || return 2 @@ -261,10 +291,10 @@ test_8() { run_test 8 "creat open |X| close" test_9() { - replay_barrier mds + replay_barrier $SINGLEMDS mcreate $DIR/$tfile local old_inum=`ls -i $DIR/$tfile | awk '{print $1}'` - fail mds + fail $SINGLEMDS local new_inum=`ls -i $DIR/$tfile | awk '{print $1}'` echo " old_inum == $old_inum, new_inum == $new_inum" @@ -281,10 +311,10 @@ run_test 9 "|X| create (same inum/gen)" test_10() { mcreate $DIR/$tfile - replay_barrier mds + replay_barrier $SINGLEMDS mv $DIR/$tfile $DIR/$tfile-2 rm -f $DIR/$tfile - fail mds + fail $SINGLEMDS $CHECKSTAT $DIR/$tfile && return 1 $CHECKSTAT $DIR/$tfile-2 ||return 2 rm $DIR/$tfile-2 @@ -296,11 +326,11 @@ test_11() { mcreate $DIR/$tfile echo "old" > $DIR/$tfile mv $DIR/$tfile $DIR/$tfile-2 - replay_barrier mds + replay_barrier $SINGLEMDS echo "new" > $DIR/$tfile grep new $DIR/$tfile grep old $DIR/$tfile-2 - fail mds + fail $SINGLEMDS grep new $DIR/$tfile || return 1 grep old $DIR/$tfile-2 || return 2 } @@ -313,11 +343,11 @@ test_12() { # give multiop a chance to open sleep 1 rm -f $DIR/$tfile - replay_barrier mds + replay_barrier $SINGLEMDS kill -USR1 $pid wait $pid || return 1 - fail mds + fail $SINGLEMDS [ -e $DIR/$tfile ] && return 2 return 0 } @@ -334,8 +364,8 @@ test_13() { sleep 1 chmod 0 $DIR/$tfile $CHECKSTAT -p 0 $DIR/$tfile - replay_barrier mds - fail mds + replay_barrier $SINGLEMDS + fail $SINGLEMDS kill -USR1 $pid wait $pid || return 1 @@ -350,11 +380,11 @@ test_14() { # give multiop a chance to open sleep 1 rm -f $DIR/$tfile - replay_barrier mds + replay_barrier $SINGLEMDS kill -USR1 $pid || return 1 wait $pid || return 2 - fail mds + fail $SINGLEMDS [ -e $DIR/$tfile ] && return 3 return 0 } @@ -366,12 +396,12 @@ test_15() { # give multiop a chance to open sleep 1 rm -f $DIR/$tfile - replay_barrier mds + replay_barrier $SINGLEMDS touch $DIR/g11 || return 1 kill -USR1 $pid wait $pid || return 2 - fail mds + fail $SINGLEMDS [ -e $DIR/$tfile ] && return 3 touch $DIR/h11 || return 4 return 0 @@ -380,11 +410,11 @@ run_test 15 "open(O_CREAT), unlink |X| touch new, close" test_16() { - replay_barrier mds + replay_barrier $SINGLEMDS mcreate $DIR/$tfile munlink $DIR/$tfile mcreate $DIR/$tfile-2 - fail mds + fail $SINGLEMDS [ -e $DIR/$tfile ] && return 1 [ -e $DIR/$tfile-2 ] || return 2 munlink $DIR/$tfile-2 || return 3 @@ -392,12 +422,12 @@ test_16() { run_test 16 "|X| open(O_CREAT), unlink, touch new, unlink new" test_17() { - replay_barrier mds + replay_barrier $SINGLEMDS multiop $DIR/$tfile O_c & pid=$! # give multiop a chance to open sleep 1 - fail mds + fail $SINGLEMDS kill -USR1 $pid || return 1 wait $pid || return 2 $CHECKSTAT -t file $DIR/$tfile || return 3 @@ -406,7 +436,7 @@ test_17() { run_test 17 "|X| open(O_CREAT), |replay| close" test_18() { - replay_barrier mds + replay_barrier $SINGLEMDS multiop $DIR/$tfile O_tSc & pid=$! # give multiop a chance to open @@ -417,7 +447,7 @@ test_18() { kill -USR1 $pid wait $pid || return 2 - fail mds + fail $SINGLEMDS [ -e $DIR/$tfile ] && return 3 [ -e $DIR/$tfile-2 ] || return 4 # this touch frequently fails @@ -430,25 +460,25 @@ run_test 18 "|X| open(O_CREAT), unlink, touch new, close, touch, unlink" # bug 1855 (a simpler form of test_11 above) test_19() { - replay_barrier mds + replay_barrier $SINGLEMDS mcreate $DIR/$tfile echo "old" > $DIR/$tfile mv $DIR/$tfile $DIR/$tfile-2 grep old $DIR/$tfile-2 - fail mds + fail $SINGLEMDS grep old $DIR/$tfile-2 || return 2 } run_test 19 "|X| mcreate, open, write, rename " test_20() { - replay_barrier mds + replay_barrier $SINGLEMDS multiop $DIR/$tfile O_tSc & pid=$! # give multiop a chance to open sleep 1 rm -f $DIR/$tfile - fail mds + fail $SINGLEMDS kill -USR1 $pid wait $pid || return 1 [ -e $DIR/$tfile ] && return 2 @@ -470,7 +500,7 @@ test_20b() { # bug 10480 mds_evict_client df -P $DIR || df -P $DIR || true # reconnect - fail mds # start orphan recovery + fail $SINGLEMDS # start orphan recovery df -P $DIR || df -P $DIR || true # reconnect sleep 2 @@ -502,7 +532,7 @@ test_20c() { # bug 10480 run_test 20c "check that client eviction does not affect file content" test_21() { - replay_barrier mds + replay_barrier $SINGLEMDS multiop $DIR/$tfile O_tSc & pid=$! # give multiop a chance to open @@ -510,7 +540,7 @@ test_21() { rm -f $DIR/$tfile touch $DIR/g11 || return 1 - fail mds + fail $SINGLEMDS kill -USR1 $pid wait $pid || return 2 [ -e $DIR/$tfile ] && return 3 @@ -525,10 +555,10 @@ test_22() { # give multiop a chance to open sleep 1 - replay_barrier mds + replay_barrier $SINGLEMDS rm -f $DIR/$tfile - fail mds + fail $SINGLEMDS kill -USR1 $pid wait $pid || return 1 [ -e $DIR/$tfile ] && return 2 @@ -542,11 +572,11 @@ test_23() { # give multiop a chance to open sleep 1 - replay_barrier mds + replay_barrier $SINGLEMDS rm -f $DIR/$tfile touch $DIR/g11 || return 1 - fail mds + fail $SINGLEMDS kill -USR1 $pid wait $pid || return 2 [ -e $DIR/$tfile ] && return 3 @@ -561,8 +591,8 @@ test_24() { # give multiop a chance to open sleep 1 - replay_barrier mds - fail mds + replay_barrier $SINGLEMDS + fail $SINGLEMDS rm -f $DIR/$tfile kill -USR1 $pid wait $pid || return 1 @@ -578,8 +608,8 @@ test_25() { sleep 1 rm -f $DIR/$tfile - replay_barrier mds - fail mds + replay_barrier $SINGLEMDS + fail $SINGLEMDS kill -USR1 $pid wait $pid || return 1 [ -e $DIR/$tfile ] && return 2 @@ -588,7 +618,7 @@ test_25() { run_test 25 "open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)" test_26() { - replay_barrier mds + replay_barrier $SINGLEMDS multiop $DIR/$tfile-1 O_tSc & pid1=$! multiop $DIR/$tfile-2 O_tSc & @@ -600,7 +630,7 @@ test_26() { kill -USR1 $pid2 wait $pid2 || return 1 - fail mds + fail $SINGLEMDS kill -USR1 $pid1 wait $pid1 || return 2 [ -e $DIR/$tfile-1 ] && return 3 @@ -610,7 +640,7 @@ test_26() { run_test 26 "|X| open(O_CREAT), unlink two, close one, replay, close one (test mds_cleanup_orphans)" test_27() { - replay_barrier mds + replay_barrier $SINGLEMDS multiop $DIR/$tfile-1 O_tSc & pid1=$! multiop $DIR/$tfile-2 O_tSc & @@ -620,7 +650,7 @@ test_27() { rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 - fail mds + fail $SINGLEMDS kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -638,13 +668,13 @@ test_28() { pid2=$! # give multiop a chance to open sleep 1 - replay_barrier mds + replay_barrier $SINGLEMDS rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 kill -USR1 $pid2 wait $pid2 || return 1 - fail mds + fail $SINGLEMDS kill -USR1 $pid1 wait $pid1 || return 2 [ -e $DIR/$tfile-1 ] && return 3 @@ -660,11 +690,11 @@ test_29() { pid2=$! # give multiop a chance to open sleep 1 - replay_barrier mds + replay_barrier $SINGLEMDS rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 - fail mds + fail $SINGLEMDS kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -685,8 +715,8 @@ test_30() { rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 - replay_barrier mds - fail mds + replay_barrier $SINGLEMDS + fail $SINGLEMDS kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -706,9 +736,9 @@ test_31() { sleep 1 rm -f $DIR/$tfile-1 - replay_barrier mds + replay_barrier $SINGLEMDS rm -f $DIR/$tfile-2 - fail mds + fail $SINGLEMDS kill -USR1 $pid1 wait $pid1 || return 1 kill -USR1 $pid2 @@ -739,15 +769,29 @@ run_test 32 "close() notices client eviction; close() after client eviction" # Abort recovery before client complete test_33() { - replay_barrier mds - touch $DIR/$tfile - fail_abort mds + replay_barrier $SINGLEMDS + createmany -o $DIR/$tfile-%d 100 + fail_abort $SINGLEMDS # this file should be gone, because the replay was aborted - $CHECKSTAT -t file $DIR/$tfile && return 3 + $CHECKSTAT -t file $DIR/$tfile-* && return 3 + unlinkmany $DIR/$tfile-%d 0 100 return 0 } run_test 33 "abort recovery before client does replay" +# Stale FID sequence +test_33a() { + replay_barrier $SINGLEMDS + createmany -o $DIR/$tfile-%d 10 + fail_abort $SINGLEMDS + unlinkmany $DIR/$tfile-%d 0 10 + # recreate shouldn't fail + createmany -o $DIR/$tfile-%d 10 || return 3 + unlinkmany $DIR/$tfile-%d 0 10 + return 0 +} +run_test 33a "fid shouldn't be reused after abort recovery" + test_34() { multiop $DIR/$tfile O_c & pid=$! @@ -755,8 +799,8 @@ test_34() { sleep 1 rm -f $DIR/$tfile - replay_barrier mds - fail_abort mds + replay_barrier $SINGLEMDS + fail_abort $SINGLEMDS kill -USR1 $pid [ -e $DIR/$tfile ] && return 1 sync @@ -769,13 +813,13 @@ test_35() { touch $DIR/$tfile #define OBD_FAIL_MDS_REINT_NET_REP 0x119 - do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119" rm -f $DIR/$tfile & sleep 1 sync sleep 1 # give a chance to remove from MDS - fail_abort mds + fail_abort $SINGLEMDS $CHECKSTAT -t file $DIR/$tfile && return 1 || true } run_test 35 "test recovery from llog for unlink op" @@ -783,10 +827,10 @@ run_test 35 "test recovery from llog for unlink op" # b=2432 resent cancel after replay uses wrong cookie, # so don't resend cancels test_36() { - replay_barrier mds + replay_barrier $SINGLEMDS touch $DIR/$tfile checkstat $DIR/$tfile - facet_failover mds + facet_failover $SINGLEMDS cancel_lru_locks mdc if dmesg | grep "unknown lock cookie"; then echo "cancel after replay failed" @@ -805,10 +849,10 @@ test_37() { sleep 1 rmdir $DIR/$tfile - replay_barrier mds + replay_barrier $SINGLEMDS # clear the dmesg buffer so we only see errors from this recovery dmesg -c >/dev/null - fail_abort mds + fail_abort $SINGLEMDS kill -USR1 $pid dmesg | grep "mds_unlink_orphan.*error .* unlinking orphan" && return 1 sync @@ -819,8 +863,8 @@ run_test 37 "abort recovery before client does replay (test mds_cleanup_orphans test_38() { createmany -o $DIR/$tfile-%d 800 unlinkmany $DIR/$tfile-%d 0 400 - replay_barrier mds - fail mds + replay_barrier $SINGLEMDS + fail $SINGLEMDS unlinkmany $DIR/$tfile-%d 400 400 sleep 2 $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true @@ -829,12 +873,11 @@ run_test 38 "test recovery from unlink llog (test llog_gen_rec) " test_39() { # bug 4176 createmany -o $DIR/$tfile-%d 800 - replay_barrier mds + replay_barrier $SINGLEMDS unlinkmany $DIR/$tfile-%d 0 400 - fail mds + fail $SINGLEMDS unlinkmany $DIR/$tfile-%d 400 400 sleep 2 - ls -1f $DIR/$tfile-* $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true } run_test 39 "test recovery from unlink llog (test llog_gen_rec) " @@ -851,9 +894,9 @@ test_40(){ writeme -s $MOUNT/${tfile}-2 & WRITE_PID=$! sleep 1 - facet_failover mds + facet_failover $SINGLEMDS #define OBD_FAIL_MDS_CONNECT_NET 0x117 - do_facet mds "sysctl -w lustre.fail_loc=0x80000117" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000117" kill -USR1 $PID stat1=`count_ost_writes` sleep $TIMEOUT @@ -893,7 +936,7 @@ test_41() { do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3 cancel_lru_locks osc # fail ost2 and read from ost1 - local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'` + local osc2dev=`grep ${ost2_svc}-osc-MDT0000 $LPROC/devices | awk '{print $1}'` [ "$osc2dev" ] || return 4 $LCTL --device $osc2dev deactivate || return 1 do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3 @@ -908,7 +951,7 @@ test_42() { createmany -o $DIR/$tfile-%d 800 replay_barrier ost1 unlinkmany $DIR/$tfile-%d 0 400 - DEBUG42="`sysctl -n lnet.debug`" + DEBUG42=`sysctl -n lnet.debug` sysctl -w lnet.debug=-1 facet_failover ost1 @@ -917,7 +960,7 @@ test_42() { #[ $blocks_after -lt $blocks ] || return 1 echo wait for MDS to timeout and recover sleep $((TIMEOUT * 2)) - sysctl -w lnet.debug=$DEBUG42 + sysctl -w lnet.debug="$DEBUG42" unlinkmany $DIR/$tfile-%d 400 400 $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true } @@ -925,11 +968,11 @@ run_test 42 "recovery after ost failure" # timeout in MDS/OST recovery RPC will LBUG MDS test_43() { # bug 2530 - replay_barrier mds + replay_barrier $SINGLEMDS # OBD_FAIL_OST_CREATE_NET 0x204 do_facet ost1 "sysctl -w lustre.fail_loc=0x80000204" - fail mds + fail $SINGLEMDS sleep 10 do_facet ost1 "sysctl -w lustre.fail_loc=0" @@ -938,36 +981,36 @@ test_43() { # bug 2530 run_test 43 "mds osc import failure during recovery; don't LBUG" test_44() { - mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` + mdcdev=`awk '/MDT0000-mdc-/ {print $1}' $LPROC/devices` [ "$mdcdev" ] || exit 2 for i in `seq 1 10`; do #define OBD_FAIL_TGT_CONN_RACE 0x701 - do_facet mds "sysctl -w lustre.fail_loc=0x80000701" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000701" $LCTL --device $mdcdev recover df $MOUNT done - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" return 0 } run_test 44 "race in target handle connect" test_44b() { - mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` + mdcdev=`awk '/MDT0000-mdc-/ {print $1}' $LPROC/devices` [ "$mdcdev" ] || exit 2 for i in `seq 1 10`; do #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 - do_facet mds "sysctl -w lustre.fail_loc=0x80000704" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000704" $LCTL --device $mdcdev recover df $MOUNT done - do_facet mds "sysctl -w lustre.fail_loc=0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0" return 0 } run_test 44b "race in target handle connect" # Handle failed close test_45() { - mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` + mdcdev=`awk '/MDT0000-mdc-/ {print $1}' $LPROC/devices` [ "$mdcdev" ] || exit 2 $LCTL --device $mdcdev recover @@ -994,7 +1037,7 @@ run_test 45 "Handle failed close" test_46() { dmesg -c >/dev/null drop_reply "touch $DIR/$tfile" - fail mds + fail $SINGLEMDS # ironically, the previous test, 45, will cause a real forced close, # so just look for one for this test dmesg | grep -i "force closing client file handle for $tfile" && return 1 @@ -1026,10 +1069,10 @@ test_47() { # bug 2824 run_test 47 "MDS->OSC failure during precreate cleanup (2824)" test_48() { - replay_barrier mds + replay_barrier $SINGLEMDS createmany -o $DIR/$tfile 20 || return 1 # OBD_FAIL_OST_EROFS 0x216 - fail mds + fail $SINGLEMDS do_facet ost1 "sysctl -w lustre.fail_loc=0x80000216" df $MOUNT || return 2 @@ -1042,7 +1085,7 @@ test_48() { run_test 48 "MDS->OSC failure during precreate cleanup (2824)" test_50() { - local oscdev=`grep ${ost1_svc}-osc- $LPROC/devices | awk '{print $1}'` + local oscdev=`grep ${ost1_svc}-osc-MDT0000 $LPROC/devices | awk '{print $1}'` [ "$oscdev" ] || return 1 $LCTL --device $oscdev recover && $LCTL --device $oscdev recover # give the mds_lov_sync threads a chance to run @@ -1056,11 +1099,11 @@ test_52() { cancel_lru_locks mdc multiop $DIR/$tfile s || return 1 - replay_barrier mds + replay_barrier $SINGLEMDS #define OBD_FAIL_LDLM_REPLY 0x30c - do_facet mds "sysctl -w lustre.fail_loc=0x8000030c" - fail mds || return 2 - do_facet mds "sysctl -w lustre.fail_loc=0x0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000030c" + fail $SINGLEMDS || return 2 + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x0" $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true } @@ -1072,11 +1115,11 @@ run_test 52 "time out lock replay (3764)" #b3761 ASSERTION(hash != 0) failed test_55() { # OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE - do_facet mds "sysctl -w lustre.fail_loc=0x8000012b" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000012b" touch $DIR/$tfile & # give touch a chance to run sleep 5 - do_facet mds "sysctl -w lustre.fail_loc=0x0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x0" rm $DIR/$tfile return 0 } @@ -1085,9 +1128,9 @@ run_test 55 "let MDS_CHECK_RESENT return the original return code instead of 0" #b3440 ASSERTION(rec->ur_fid2->id) failed test_56() { ln -s foo $DIR/$tfile - replay_barrier mds + replay_barrier $SINGLEMDS #drop_reply "cat $DIR/$tfile" - fail mds + fail $SINGLEMDS sleep 10 } run_test 56 "don't replay a symlink open request (3440)" @@ -1095,13 +1138,13 @@ run_test 56 "don't replay a symlink open request (3440)" #recovery one mds-ost setattr from llog test_57() { #define OBD_FAIL_MDS_OST_SETATTR 0x12c - do_facet mds "sysctl -w lustre.fail_loc=0x8000012c" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000012c" touch $DIR/$tfile - replay_barrier mds - fail mds + replay_barrier $SINGLEMDS + fail $SINGLEMDS sleep 1 $CHECKSTAT -t file $DIR/$tfile || return 1 - do_facet mds "sysctl -w lustre.fail_loc=0x0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x0" rm $DIR/$tfile } run_test 57 "test recovery from llog for setattr op" @@ -1109,18 +1152,49 @@ run_test 57 "test recovery from llog for setattr op" #recovery many mds-ost setattr from llog test_58() { #define OBD_FAIL_MDS_OST_SETATTR 0x12c - do_facet mds "sysctl -w lustre.fail_loc=0x8000012c" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x8000012c" mkdir $DIR/$tdir createmany -o $DIR/$tdir/$tfile-%d 2500 - replay_barrier mds - fail mds + replay_barrier $SINGLEMDS + fail $SINGLEMDS sleep 2 $CHECKSTAT -t file $DIR/$tdir/$tfile-* || return 1 - do_facet mds "sysctl -w lustre.fail_loc=0x0" + do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x0" unlinkmany $DIR/$tdir/$tfile-%d 2500 rmdir $DIR/$tdir } run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)" +# log_commit_thread vs filter_destroy race used to lead to import use after free +# bug 11658 +test_59() { + mkdir $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 200 + sync + unlinkmany $DIR/$tdir/$tfile-%d 200 +#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507 + do_facet ost1 "sysctl -w lustre.fail_loc=0x507" + fail ost1 + fail $SINGLEMDS + do_facet ost1 "sysctl -w lustre.fail_loc=0x0" + sleep 20 + rmdir $DIR/$tdir +} +run_test 59 "test log_commit_thread vs filter_destroy race" + +# race between add unlink llog vs cat log init in post_recovery (only for b1_6) +# bug 12086: should no oops and No ctxt error for this test +test_60() { + mkdir $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 200 + replay_barrier $SINGLEMDS + unlinkmany $DIR/$tdir/$tfile-%d 0 100 + fail $SINGLEMDS + unlinkmany $DIR/$tdir/$tfile-%d 100 100 + local no_ctxt=`dmesg | grep "No ctxt"` + [ -z "$no_ctxt" ] || error "ctxt is not initialized in recovery" +} +run_test 60 "test llog post recovery init vs llog unlink" + equals_msg `basename $0`: test complete, cleaning up $CLEANUP diff --git a/lustre/tests/run-llog.sh b/lustre/tests/run-llog.sh index 1b808e9..4d0041e 100644 --- a/lustre/tests/run-llog.sh +++ b/lustre/tests/run-llog.sh @@ -2,12 +2,12 @@ PATH=`dirname $0`:`dirname $0`/../utils:$PATH TMP=${TMP:-/tmp} -MDS=`ls $LPROC/mds | grep -v num_refs | head -n 1` +MDS=`ls /proc/fs/lustre/mdt | grep -v num_refs | head -n 1` [ -z "$MDS" ] && echo "no MDS available, skipping llog test" && exit 0 case `uname -r` in -2.4.*) insmod ../obdclass/llog_test.o || exit 1 ;; -2.6.*) insmod ../obdclass/llog_test.ko || exit 1 ;; +2.4.*) modprobe llog_test || exit 1 ;; +2.6.*) modprobe llog_test || exit 1 ;; *) echo "unknown kernel version `uname -r`" && exit 99 ;; esac lctl modules > $TMP/ogdb-`hostname` diff --git a/lustre/tests/run-quotafmt.sh b/lustre/tests/run-quotafmt.sh index 989cfa5..307f1dd 100644 --- a/lustre/tests/run-quotafmt.sh +++ b/lustre/tests/run-quotafmt.sh @@ -2,7 +2,7 @@ PATH=`dirname $0`:`dirname $0`/../utils:$PATH TMP=${TMP:-/tmp} -MDS=`ls /proc/fs/lustre/mds | grep -v num_refs | head -n 1` +MDS=`ls /proc/fs/lustre/mdt | grep -v num_refs | head -n 1` [ -z "$MDS" ] && echo "no MDS available, skipping quotafile test" && exit 0 insmod ../ldiskfs/quotafmt_test.ko || exit 1 diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c index 4db7617..416d251 100644 --- a/lustre/tests/runas.c +++ b/lustre/tests/runas.c @@ -19,9 +19,11 @@ #endif static const char usage[] = -"Usage: %s -u user_id [-g grp_id] [-G[gid0,gid1,...]] command\n" +"Usage: %s -u user_id [-g grp_id] [-v euid] [-j egid] [-G[gid0,gid1,...]] command\n" " -u user_id switch to UID user_id\n" " -g grp_id switch to GID grp_id\n" +" -v euid switch euid to UID\n" +" -j egid switch egid to GID\n" " -G[gid0,gid1,...] set supplementary groups\n"; void Usage_and_abort(const char *name) @@ -37,6 +39,9 @@ int main(int argc, char **argv) int gid_is_set = 0, uid_is_set = 0, num_supp = -1; uid_t user_id = 0; gid_t grp_id = 0, supp_groups[NGROUPS_MAX] = { 0 }; + int euid_is_set = 0, egid_is_set = 0; + uid_t euid = 0; + gid_t egid = 0; if (argc == 1) { fprintf(stderr, "No parameter count\n"); @@ -44,7 +49,7 @@ int main(int argc, char **argv) } // get UID and GID - while ((c = getopt(argc, argv, "+u:g:hG::")) != -1) { + while ((c = getopt(argc, argv, "+u:g:v:j:hG::")) != -1) { switch (c) { case 'u': if (!isdigit(optarg[0])) { @@ -78,6 +83,36 @@ int main(int argc, char **argv) gid_is_set = 1; break; + case 'v': + if (!isdigit(optarg[0])) { + struct passwd *pw = getpwnam(optarg); + if (pw == NULL) { + fprintf(stderr, "parameter '%s' bad\n", + optarg); + Usage_and_abort(name); + } + euid = pw->pw_uid; + } else { + euid = (uid_t)atoi(optarg); + } + euid_is_set = 1; + break; + + case 'j': + if (!isdigit(optarg[0])) { + struct group *gr = getgrnam(optarg); + if (gr == NULL) { + fprintf(stderr, "getgrname %s failed\n", + optarg); + Usage_and_abort(name); + } + egid = gr->gr_gid; + } else { + egid = (gid_t)atoi(optarg); + } + egid_is_set = 1; + break; + case 'G': num_supp = 0; if (optarg == NULL || !isdigit(optarg[0])) @@ -126,10 +161,12 @@ int main(int argc, char **argv) #endif // set GID - status = setregid(grp_id, grp_id); + if (!egid_is_set) + egid = grp_id; + status = setregid(grp_id, egid); if (status == -1) { - fprintf(stderr, "Cannot change grp_ID to %d, errno=%d (%s)\n", - grp_id, errno, strerror(errno) ); + fprintf(stderr, "Cannot change gid to %d/%d, errno=%d (%s)\n", + grp_id, egid, errno, strerror(errno) ); exit(-1); } @@ -142,16 +179,19 @@ int main(int argc, char **argv) } // set UID - status = setreuid(user_id, user_id ); + if (!euid_is_set) + euid = user_id; + status = setreuid(user_id, euid); if(status == -1) { - fprintf(stderr,"Cannot change user_ID to %d, errno=%d (%s)\n", - user_id, errno, strerror(errno) ); + fprintf(stderr,"Cannot change uid to %d/%d, errno=%d (%s)\n", + user_id, euid, errno, strerror(errno) ); exit(-1); } - fprintf(stderr, "running as UID %d, GID %d", user_id, grp_id); + fprintf(stderr, "running as uid/gid/euid/egid %d/%d/%d/%d, groups:", + user_id, grp_id, euid, egid); for (i = 0; i < num_supp; i++) - fprintf(stderr, ":%d", supp_groups[i]); + fprintf(stderr, " %d", supp_groups[i]); fprintf(stderr, "\n"); for (i = 0; i < argc - optind; i++) diff --git a/lustre/tests/rundbench b/lustre/tests/rundbench index 3c96233..9c5371b 100755 --- a/lustre/tests/rundbench +++ b/lustre/tests/rundbench @@ -8,8 +8,6 @@ SRC=${SRC:-/usr/lib/dbench/client.txt} [ ! -s $TGT -a -s $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT SRC=/usr/lib/dbench/client_plain.txt [ ! -s $TGT -a -s $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT -SRC=/usr/share/dbench/client.txt -[ ! -s $TGT -a -s $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT [ ! -s $TGT ] && echo "$0: $TGT doesn't exist (SRC=$SRC)" && exit 1 cd $DIR echo "running 'dbench $@' on $PWD at `date`" diff --git a/lustre/tests/runfailure-mds b/lustre/tests/runfailure-mds deleted file mode 100755 index f2942c3..0000000 --- a/lustre/tests/runfailure-mds +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`" -. $SRCDIR/common.sh - -. $SRCDIR/llmount.sh - -MNT="setup_mount" - -test_fail() { - echo $1 > /proc/sys/lustre/fail_loc - shift - echo "Running '$*'" - $* - - echo "Cleaning up and restarting MDS" - umount /mnt/lustre || fail "unable to unmount" - $OBDCTL <<- EOF - name2dev MDSDEV - cleanup - detach - quit - EOF - - echo 0 > /proc/sys/lustre/fail_loc - - $OBDCTL <<- EOF - newdev - attach mds MDSDEV - setup ${MDS} ${MDSFS} - quit - EOF - $MNT -} - -#set -vx - -touch /mnt/lustre/foo -chmod a+x /mnt/lustre/foo -sync - -# OBD_FAIL_MDS_REINT_SETATTR_WRITE - MDS will discard data from setattr -test_fail 0x10a chmod 000 /mnt/lustre/foo -ls -l /mnt/lustre/foo -[ ! -x /mnt/lustre/foo ] && fail "/mnt/lustre/foo is not executable!" - -# OBD_FAIL_MDS_REINT_CREATE_WRITE - MDS will not create the file -test_fail 0x10c touch /mnt/lustre/bar -ls /mnt/lustre/bar -[ $? -eq 0 ] && fail "/mnt/lustre/bar was created!" - -# OBD_FAIL_MDS_REINT_UNLINK_WRITE - MDS will discard data from unlink -test_fail 0x10e rm /mnt/lustre/foo -ls /mnt/lustre/foo -[ $? -eq 1 ] && fail "/mnt/lustre/foo has been removed!" - -# OBD_FAIL_MDS_REINT_RENAME_WRITE - MDS will discard data from rename -test_fail 0x112 mv /mnt/lustre/foo /mnt/lustre/bar -ls /mnt/lustre/foo /mnt/lustre/bar -[ ! -f /mnt/lustre/foo -o -f /mnt/lustre/bar ] && \ - fail "/mnt/lustre/foo has been renamed to bar!" - -echo "Done." diff --git a/lustre/tests/runfailure-net b/lustre/tests/runfailure-net deleted file mode 100755 index 4e9bdd7..0000000 --- a/lustre/tests/runfailure-net +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh - -fail() { - echo "ERROR: $1" 1>&2 - [ $2 ] && RC=$2 || RC=1 - exit $RC -} - -test_fail() { - oldtimeout=`cat /proc/sys/lustre/timeout` - echo $TIMEOUT > /proc/sys/lustre/timeout - echo $1 > /proc/sys/lustre/fail_loc - shift - $* & - sleep $TIMEOUT - sleep 2 # fudge - kill -9 $! - - echo $oldtimeout > /proc/sys/lustre/timeout - echo 0 > /proc/sys/lustre/fail_loc - umount -f /mnt/lustre || fail "cannot unmount /mnt/lustre" - mount -t lustre_lite -o "osc=$OSC,mdc=$MDC" none /mnt/lustre || \ - fail "cannot remount $OSC/$MDC on /mnt/lustre" -} - -set -vx - -LCTL=../utils/lctl -OSC=OSC_`hostname`_UUID -MDC=MDC_client1_UUID -TIMEOUT=5 # complete in finite time - -[ "`mount | grep /mnt/lustre`" ] || echo | sh llmount.sh || exit -1 - -# GETATTR_NET - ls will hang on the getattr -# test_fail 0x102 ls -l /mnt/lustre - -# READPAGE_NET - ls will hang reading in new pages (lost+found is not in cache) -test_fail 0x104 ls /mnt/lustre - -sleep 1 - -# REINT_NET - touch will hang on setattr -test_fail 0x107 touch /mnt/lustre - -# REINT_NET - touch will hang on create -test_fail 0x107 touch /mnt/lustre/tt - -# REINT_NET - mv will hang on rename -touch /mnt/lustre/foo -test_fail 0x107 mv /mnt/lustre/foo /mnt/lustre/bar - -# REINT_NET - rm will hang on unlink -touch /mnt/lustre/salmon -test_fail 0x107 rm /mnt/lustre/salmon - -# OPEN_NET - touch will hang on open -touch /mnt/lustre/foo -test_fail 0x113 cat /mnt/lustre/foo - -# CLOSE_NET - ls will hang on close -test_fail 0x115 ./testreq --close junk_file_handle - -echo 0 > /proc/sys/lustre/fail_loc - -echo "Done." diff --git a/lustre/tests/runfailure-ost b/lustre/tests/runfailure-ost deleted file mode 100755 index 0c68d5a..0000000 --- a/lustre/tests/runfailure-ost +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`" -. $SRCDIR/common.sh - -setup_opts "$@" - -set -vx - -test_fail() { - echo $1 > /proc/sys/lustre/fail_loc - shift - echo "Running '$*'" - $* & - sleep 1 - kill -9 $! - - echo 0 > /proc/sys/lustre/fail_loc - umount /mnt/lustre || fail "cannot unmount /mnt/lustre" - setup_mount || fail "cannot remount /mnt/lustre" -} - -[ "`mount | grep /mnt/lustre`" ] || . llsetup.sh "$@" || exit -1 - -# OBD_FAIL_OST_OPEN_NET: OST will discard open request packet -touch /mnt/lustre/foo -test_fail 0x208 cat /mnt/lustre/foo - -# OBD_FAIL_OST_CLOSE_NET: OST will discard close request packet -test_fail 0x209 cat /mnt/lustre/foo - -# OBD_FAIL_OST_CREATE_NET: OST will discard create request packet -test_fail 0x204 touch /mnt/lustre/bar - -# OBD_FAIL_OST_DESTROY_NET: OST will discard destroy request packet -test_fail 0x205 rm /mnt/lustre/foo - -# OBD_FAIL_OST_BRW_NET: OST will discard read request packet -echo foo >> /mnt/lustre/foo -test_fail 0x20a cat /mnt/lustre/foo - -# OBD_FAIL_OST_BRW_NET: OST will discard write request packet -test_fail 0x20a "echo bar >> /mnt/lustre/foo" - -# OBD_FAIL_OST_PUNCH_NET: OST will discard truncate request packet -test_fail 0x208 "echo bar > /mnt/lustre/foo" - -# OBD_FAIL_OST_STATFS_NET: OST will discard statfs request packet -test_fail 0x208 df /mnt/lustre - -echo "Done." diff --git a/lustre/tests/runregression-brw.sh b/lustre/tests/runregression-brw.sh deleted file mode 100644 index 666b253..0000000 --- a/lustre/tests/runregression-brw.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/sh -SRCDIR="`dirname $0`/" -export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH - -LOOPS=${LOOPS:-1} -COUNT=${COUNT:-100000} -COUNT_10=`expr $COUNT / 10` -COUNT_100=`expr $COUNT / 100` - -ENDRUN=endrun-`hostname` - -ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -n 1`" - -if [ -z "$ECHONAME" ]; then - echo "$0: needs an ECHO_CLIENT set up first" 1>&2 - exit 1 -fi - -cleanup () { - lctl --device \$$ECHONAME destroy $OID -} - -runthreads() { - THR=$1 - DO=$2 - CNT=$3 - V=$4 - PGS=$5 - - case $DO in - test_getattr) - RW= - ;; - test_brw_write) - DO=test_brw - RW=w - ;; - test_brw_read) - DO=test_brw - RW=r - ;; - esac - - lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1 - - if [ -e $ENDRUN ]; then - rm $ENDRUN - echo "exiting because $ENDRUN file was found" - cleanup - fi -} - -[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID" -[ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1 - -# TODO: obdctl needs to check on the progress of each forked thread -# (IPC SHM, sockets?) to see if it hangs. -for i in `seq $LOOPS`; do - PG=1 - PGVW=${PGVW:-16} - PGVR=${PGVR:-16} - - # We use '--threads 1 X' instead of '--device X' so that - # obdctl can monitor the forked thread for progress (TODO). - debug_server_off - debug_client_off - runthreads 1 test_brw_write 1000 -30 $PG - runthreads 1 test_brw_read 1000 -30 $PG - - [ "$PGVW" ] && runthreads 1 test_brw_write 100 -30 $PGVW - [ "$PGVW" ] && runthreads 1 test_brw_read 1600 -30 $PG - [ "$PGVR" ] && runthreads 1 test_brw_read 100 -30 $PGVR - - runthreads 1 test_brw_write $COUNT -30 $PG - runthreads 1 test_brw_read $COUNT -30 $PG - - [ "$PGVW" ] && runthreads 1 test_brw_write $COUNT_10 -30 $PGVW - [ "$PGVR" ] && runthreads 1 test_brw_read $COUNT_10 -30 $PGVR - - runthreads 2 test_brw_write $COUNT -30 $PG - runthreads 2 test_brw_read $COUNT -30 $PG - - [ "$PGVW" ] && runthreads 2 test_brw_write $COUNT_10 -30 $PGVW - [ "$PGVR" ] && runthreads 2 test_brw_read $COUNT_10 -30 $PGVR - - runthreads 10 test_brw_write $COUNT_10 -30 $PG - runthreads 10 test_brw_read $COUNT_10 -30 $PG - - [ "$PGVW" ] && runthreads 10 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 10 test_brw_read $COUNT_100 -60 $PGVR - - runthreads 32 test_brw_write $COUNT_10 -30 $PG - runthreads 32 test_brw_read $COUNT_10 -30 $PG - - [ "$PGVW" ] && runthreads 32 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 32 test_brw_read $COUNT_100 -60 $PGVR - - runthreads 64 test_brw_write $COUNT_10 -30 $PG - runthreads 64 test_brw_read $COUNT_10 -30 $PG - - [ "$PGVW" ] && runthreads 64 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 64 test_brw_read $COUNT_100 -60 $PGVR - - runthreads 100 test_brw_write $COUNT_100 -60 $PG - runthreads 100 test_brw_read $COUNT_100 -60 $PG - - [ "$PGVW" ] && runthreads 100 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 100 test_brw_read $COUNT_100 -60 $PGVR -done - -cleanup diff --git a/lustre/tests/runregression-net.sh b/lustre/tests/runregression-net.sh deleted file mode 100644 index 58f33ef..0000000 --- a/lustre/tests/runregression-net.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/sh -SRCDIR="`dirname $0`/" -export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH - -COUNT=${COUNT:-100000} -COUNT_10=`expr $COUNT / 10` -COUNT_100=`expr $COUNT / 100` -COUNT_1000=`expr $COUNT / 1000` - -ENDRUN=endrun-`hostname` - -ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -n 1`" - -if [ -z "$ECHONAME" ]; then - echo "$0: needs an ECHO_CLIENT set up first" 1>&2 - exit 1 -fi - -cleanup () { - lctl --device \$$ECHONAME destroy $OID -} - -runthreads() { - THR=$1 - DO=$2 - CNT=$3 - V=$4 - PGS=$5 - - case $DO in - test_getattr) - RW= - ;; - test_brw_write) - DO=test_brw - RW=w - ;; - test_brw_read) - DO=test_brw - RW=r - ;; - esac - - lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1 - - if [ -e $ENDRUN ]; then - rm $ENDRUN - echo "exiting because $ENDRUN file was found" - cleanup - fi -} - -[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID" -[ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1 - -# TODO: obdctl needs to check on the progress of each forked thread -# (IPC SHM, sockets?) to see if it hangs. -for CMD in test_getattr test_brw_write test_brw_read; do - case $CMD in - test_getattr) - PG= - PGV= - ;; - test_brw_write) - PG=1 - PGV=${PGV:-16} - ;; - test_brw_read) - PG=1 - PGV=${PGV:-16} - ;; - esac - - # We use '--threads 1 X' instead of '--device X' so that - # obdctl can monitor the forked thread for progress (TODO). - runthreads 1 $CMD 1 1 $PG - runthreads 1 $CMD 100 1 $PG - - echo 0 > /proc/sys/lnet/debug - runthreads 1 $CMD $COUNT_100 -10 $PG - [ "$PGV" ] && runthreads 1 $CMD $COUNT_1000 -10 $PGV - - runthreads 1 $CMD $COUNT -30 $PG - [ "$PGV" ] && runthreads 1 $CMD $COUNT_10 -30 $PGV - - runthreads 2 $CMD $COUNT_100 -30 $PG - [ "$PGV" ] && runthreads 2 $CMD $COUNT_1000 -30 $PGV - - runthreads 2 $CMD $COUNT -30 $PG - [ "$PGV" ] && runthreads 2 $CMD $COUNT_10 -30 $PGV - - runthreads 10 $CMD $COUNT_10 -30 $PG - [ "$PGV" ] && runthreads 10 $CMD $COUNT_100 -30 $PGV - - runthreads 100 $CMD $COUNT_100 -30 $PG - [ "$PGV" ] && runthreads 100 $CMD $COUNT_1000 -30 $PGV -done - -lctl --device \$$ECHONAME destroy $OID diff --git a/lustre/tests/runtests b/lustre/tests/runtests index cafbaa2..42bc087 100755 --- a/lustre/tests/runtests +++ b/lustre/tests/runtests @@ -8,9 +8,11 @@ LUSTRE=${LUSTRE:-`dirname $0`/..} SRCDIR="`dirname $0`" export PATH=/sbin:/usr/sbin:$SRCDIR:$SRCDIR/../utils:$PATH +export NAME=${NAME:-local} + . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} SETUP=${SETUP:-setupall} FORMAT=${FORMAT:-formatall} @@ -44,15 +46,20 @@ while [ "$1" ]; do shift done -EXISTING_MOUNT=`awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts` -if [ -z "$EXISTING_MOUNT" ]; then - $FORMAT - $SETUP - EXISTING_MOUNT=`awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts` - [ -z "$EXISTING_MOUNT" ] && fail "no lustre filesystem mounted" 1 - I_MOUNTED="yes" +mounted_lustre_filesystems() { + awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts +} + +MOUNTED="`mounted_lustre_filesystems`" +if [ -z "$MOUNTED" ]; then + formatall + setupall + MOUNTED="`mounted_lustre_filesystems`" + [ -z "$MOUNTED" ] && error "NAME=$NAME not mounted" + I_MOUNTED=yes fi -MOUNT=$EXISTING_MOUNT + +MOUNT=$MOUNTED OSCTMP=`echo $MOUNT | tr "/" "."` USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -n 1` @@ -150,6 +157,5 @@ if [ `expr $NOWUSED - $USED` -gt 1024 ]; then fi if [ "$I_MOUNTED" = "yes" ]; then - sync && sleep 2 && sync # wait for delete thread $CLEANUP fi diff --git a/lustre/tests/sanity-gss.sh b/lustre/tests/sanity-gss.sh new file mode 100644 index 0000000..c6a0f3c --- /dev/null +++ b/lustre/tests/sanity-gss.sh @@ -0,0 +1,390 @@ +#!/bin/bash +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: +# +# Run select tests by setting ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# +# e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31" +set -e + +ONLY=${ONLY:-"$*"} +# bug number for skipped test: +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""} +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT" + +# Tests that fail on uml, maybe elsewhere, FIXME +CPU=`awk '/model/ {print $4}' /proc/cpuinfo` +[ "$CPU" = "UML" ] && EXCEPT="$EXCEPT" + +case `uname -r` in +2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT " ;; +*) error "unsupported kernel (gss only works with 2.6.x)" ;; +esac + +SRCDIR=`dirname $0` +export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$SRCDIR/../utils/gss:$PATH:/sbin + +TMP=${TMP:-/tmp} + +CHECKSTAT=${CHECKSTAT:-"checkstat -v"} +CREATETEST=${CREATETEST:-createtest} +LFS=${LFS:-lfs} +LCTL=${LCTL:-lctl} +MEMHOG=${MEMHOG:-memhog} +DIRECTIO=${DIRECTIO:-directio} +ACCEPTOR_PORT=${ACCEPTOR_PORT:-988} +UMOUNT=${UMOUNT:-"umount -d"} + +if [ $UID -ne 0 ]; then + echo "Warning: running as non-root uid $UID" + RUNAS_ID="$UID" + RUNAS="" +else + RUNAS_ID=${RUNAS_ID:-500} + RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} + + # $RUNAS_ID may get set incorrectly somewhere else + if [ $RUNAS_ID -eq 0 ]; then + echo "Error: \$RUNAS_ID set to 0, but \$UID is also 0!" + exit 1 + fi +fi + +SANITYLOG=${SANITYLOG:-/tmp/sanity-gss.log} + +export NAME=${NAME:-local} + +SAVE_PWD=$PWD + +# +# check pre-set $SEC +# +if [ ! -z $SEC ]; then + if [ "$SEC" != "krb5i" -a "$SEC" != "krb5p" ]; then + echo "SEC=$SEC is invalid, this script only run in gss mode (krb5i/krb5p)" + exit 1 + fi +fi + +export SEC=${SEC:-krb5p} +export KRB5_CCACHE_DIR=/tmp +export KRB5_CRED=$KRB5_CCACHE_DIR/krb5cc_$RUNAS_ID +export KRB5_CRED_SAVE=$KRB5_CCACHE_DIR/krb5cc.sanity.save + +echo "Using security flavor $SEC" + +LUSTRE=${LUSTRE:-`dirname $0`/..} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} + +prepare_krb5_creds() { + rm -f $CRED_SAVE + $RUNAS krb5_login.sh || exit 1 + [ -f $KRB5_CRED ] || exit 2 + cp $KRB5_CRED $KRB5_CRED_SAVE +} + +cleanup() { + echo -n "cln.." + cleanupall ${FORCE} $* || { echo "FAILed to clean up"; exit 20; } +} +CLEANUP=${CLEANUP:-:} + +setup() { + echo -n "mnt.." + load_modules + setupall || exit 10 + echo "done" +} +SETUP=${SETUP:-:} + +trace() { + log "STARTING: $*" + strace -o $TMP/$1.strace -ttt $* + RC=$? + log "FINISHED: $*: rc $RC" + return 1 +} +TRACE=${TRACE:-""} + +check_kernel_version() { + VERSION_FILE=$LPROC/kernel_version + WANT_VER=$1 + [ ! -f $VERSION_FILE ] && echo "can't find kernel version" && return 1 + GOT_VER=`cat $VERSION_FILE` + [ $GOT_VER -ge $WANT_VER ] && return 0 + log "test needs at least kernel version $WANT_VER, running $GOT_VER" + return 1 +} + +_basetest() { + echo $* +} + +[ "$SANITYLOG" ] && rm -f $SANITYLOG || true + + +prepare_krb5_creds +build_test_filter +umask 077 + +# setup filesystem +formatall +setupall +chmod a+rwx $MOUNT + +restore_krb5_cred() { + cp $KRB5_CRED_SAVE $KRB5_CRED + chown $RUNAS_ID:$RUNAS_ID $KRB5_CRED + chmod 0600 $KRB5_CRED +} + +test_1() { + # access w/o cred + $RUNAS kdestroy + $RUNAS touch $MOUNT/f1 && error "unexpected success" + + # access w/ cred + restore_krb5_cred + $RUNAS touch $MOUNT/f1 || error "should not fail" + [ -f $MOUNT/f1 ] || error "$MOUNT/f1 not found" +} +run_test 1 "access with or without krb5 credential" + +test_2() { + # current access should be ok + $RUNAS touch $MOUNT/f2_1 || error "can't touch $MOUNT/f2_1" + [ -f $MOUNT/f2_1 ] || error "$MOUNT/f2_1 not found" + + # cleanup all cred/ctx and touch + $RUNAS kdestroy + $RUNAS $LFS flushctx + $RUNAS touch $MOUNT/f2_2 && error "unexpected success" + + # restore and touch + restore_krb5_cred + $RUNAS touch $MOUNT/f2_2 || error "should not fail" + [ -f $MOUNT/f2_2 ] || error "$MOUNT/f2_2 not found" +} +run_test 2 "lfs flushctx" + +test_3() { + local file=$MOUNT/f3 + + # create file + echo "aaaaaaaaaaaaaaaaa" > $file + chmod 0666 $file + $CHECKSTAT -p 0666 $file || error "$UID checkstat error" + $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error" + $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error" + + # start multiop + $RUNAS multiop $file o_r & + OPPID=$! + # wait multiop finish its open() + sleep 1 + + # cleanup all cred/ctx and check + # metadata check should fail, but file data check should success + # because we always use root credential to OSTs + $RUNAS kdestroy + $RUNAS $LFS flushctx + $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed" + kill -s 10 $OPPID + wait $OPPID || error "read file data failed" + echo "read file data OK" + + # restore and check again + restore_krb5_cred + $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error" + $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error" + $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error" +} +run_test 3 "local cache under DLM lock" + +test_4() { + local file1=$MOUNT/f4_1 + local file2=$MOUNT/f4_2 + + # current access should be ok + $RUNAS touch $file1 || error "can't touch $file1" + [ -f $file1 ] || error "$file1 not found" + + # stop lgssd + send_sigint client lgssd + sleep 5 + check_gss_daemon_facet client lgssd && error "lgssd still running" + + # flush context, and touch + $RUNAS $LFS flushctx + $RUNAS touch $file2 & + TOUCHPID=$! + echo "waiting touch pid $TOUCHPID" + wait $TOUCHPID && error "touch should fail" + + # restart lgssd + do_facet client "$LGSSD -v" + sleep 5 + check_gss_daemon_facet client lgssd + + # touch new should succeed + $RUNAS touch $file2 || error "can't touch $file2" + [ -f $file2 ] || error "$file2 not found" +} +run_test 4 "lgssd dead, operations should wait timeout and fail" + +test_5() { + local file1=$MOUNT/f5_1 + local file2=$MOUNT/f5_2 + local wait_time=120 + + # current access should be ok + $RUNAS touch $file1 || error "can't touch $file1" + [ -f $file1 ] || error "$file1 not found" + + # stop lsvcgssd + send_sigint mds lsvcgssd + sleep 5 + check_gss_daemon_facet mds lsvcgssd && error "lsvcgssd still running" + + # flush context, and touch + $RUNAS $LFS flushctx + $RUNAS touch $file2 & + TOUCHPID=$! + + # wait certain time + echo "waiting $wait_time seconds for touch pid $TOUCHPID" + sleep $wait_time + num=`ps --no-headers -p $TOUCHPID | wc -l` + [ $num -eq 1 ] || error "touch already ended ($num)" + echo "process $TOUCHPID still hanging there... OK" + + # restart lsvcgssd, expect touch suceed + echo "restart lsvcgssd and recovering" + do_facet mds "$LSVCGSSD -v" + sleep 5 + check_gss_daemon_facet mds lsvcgssd + wait $TOUCHPID || error "touch fail" + [ -f $file2 ] || error "$file2 not found" +} +run_test 5 "lsvcgssd dead, operations lead to recovery" + +test_6() { + NPROC=`cat /proc/cpuinfo 2>/dev/null | grep ^processor | wc -l` + [ $NPROC -ne 0 ] || NPROC=2 + + echo "starting dbench $NPROC" + sh rundbench $NPROC & + RUNPID=$! + + for ((n=0;;n++)); do + sleep 2 + num=`ps --no-headers -p $RUNPID | wc -l` + [ $num -ne 0 ] || break + echo "flush ctx ..." + $LFS flushctx + done + wait $RUNPID || error "dbench detect error" +} +run_test 6 "recoverable from losing context" + +test_7() { + local tdir=$MOUNT/dir7 + local num_osts + + # + # for open(), client only reserve space for default stripe count lovea, + # and server may return larger lovea in reply (because of larger stripe + # count), client need call enlarge_reqbuf() and save the replied lovea + # in request for future possible replay. + # + # Note: current script does NOT guarantee enlarge_reqbuf() will be in + # the path, however it does work in local test which has 2 OSTs and + # default stripe count is 1. + # + num_osts=`$LFS getstripe $MOUNT | egrep "^[0-9]*:.*ACTIVE" | wc -l` + echo "found $num_osts active OSTs" + [ $num_osts -lt 2 ] && echo "skipping $TESTNAME (must have >= 2 OSTs)" && return + + mkdir $tdir || error + $LFS setstripe $tdir 0 -1 -1 || error + + echo "creating..." + for ((i=0;i<20;i++)); do + dd if=/dev/zero of=$tdir/f$i bs=4k count=16 2>/dev/null + done + echo "reading..." + for ((i=0;i<20;i++)); do + dd if=$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null + done + rm -rf $tdir +} +run_test 7 "exercise enlarge_reqbuf()" + +check_multiple_gss_daemons() { + local facet=$1 + + for ((i=0;i<10;i++)); do + do_facet $facet "$LSVCGSSD -v &" + done + for ((i=0;i<10;i++)); do + do_facet $facet "$LGSSD -v &" + done + + # wait daemons entering "stable" status + sleep 5 + + numc=`do_facet $facet ps -o cmd -C lgssd | grep lgssd | wc -l` + nums=`do_facet $facet ps -o cmd -C lgssd | grep lgssd | wc -l` + echo "$numc lgssd and $nums lsvcgssd are running" + + if [ $numc -ne 1 -o $nums -ne 1 ]; then + error "lgssd/lsvcgssd not unique" + fi +} + +test_100() { + local facet=mds + + # cleanup everything at first + cleanupall + + echo "bring up gss daemons..." + start_gss_daemons + + echo "check with someone already running..." + check_multiple_gss_daemons $facet + + echo "check with someone run & finished..." + do_facet $facet killall -q -2 lgssd lsvcgssd || true + sleep 5 # wait fully exit + check_multiple_gss_daemons $facet + + echo "check refresh..." + do_facet $facet killall -q -2 lgssd lsvcgssd || true + sleep 5 # wait fully exit + do_facet $facet ipcrm -S 0x3b92d473 + do_facet $facet ipcrm -S 0x3a92d473 + check_multiple_gss_daemons $facet + + stop_gss_daemons +} +run_test 100 "start more multiple gss daemons" + +TMPDIR=$OLDTMPDIR +TMP=$OLDTMP +HOME=$OLDHOME + +log "cleanup: ======================================================" +if [ "`mount | grep ^$NAME`" ]; then + rm -rf $DIR/[Rdfs][1-9]* +fi + +cleanupall -f || error "cleanup failed" + + +echo '=========================== finished ===============================' +[ -f "$SANITYLOG" ] && cat $SANITYLOG && exit 1 || true diff --git a/lustre/tests/sanity-lmv.sh b/lustre/tests/sanity-lmv.sh new file mode 100644 index 0000000..0c76144 --- /dev/null +++ b/lustre/tests/sanity-lmv.sh @@ -0,0 +1,397 @@ +#!/bin/bash +# +# Run select tests by setting ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# +# e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31" +set -e + +ONLY=${ONLY:-"$*"} +# bug number for skipped test: 2108 +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""} +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +#case `uname -r` in +#2.6.*) ALWAYS_EXCEPT="$ALWAYS_EXCEPT 54c 55" # bug 3117 +#esac + +[ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" + +SRCDIR=`dirname $0` +export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH +export SECURITY=${SECURITY:-"null"} + +TMP=${TMP:-/tmp} +FSTYPE=${FSTYPE:-ldiskfs} + +CHECKSTAT=${CHECKSTAT:-"checkstat -v"} +CREATETEST=${CREATETEST:-createtest} +LFS=${LFS:-lfs} +LSTRIPE=${LSTRIPE:-"$LFS setstripe"} +LFIND=${LFIND:-"$LFS find"} +LVERIFY=${LVERIFY:-ll_dirstripe_verify} +LCTL=${LCTL:-lctl} +MCREATE=${MCREATE:-mcreate} +OPENFILE=${OPENFILE:-openfile} +OPENUNLINK=${OPENUNLINK:-openunlink} +TOEXCL=${TOEXCL:-toexcl} +TRUNCATE=${TRUNCATE:-truncate} +MUNLINK=${MUNLINK:-munlink} +SOCKETSERVER=${SOCKETSERVER:-socketserver} +SOCKETCLIENT=${SOCKETCLIENT:-socketclient} +IOPENTEST1=${IOPENTEST1:-iopentest1} +IOPENTEST2=${IOPENTEST2:-iopentest2} +PTLDEBUG=${PTLDEBUG:-0} + +export NAME=${NAME:-lmv} + +SAVE_PWD=$PWD + +LUSTRE=${LUSTRE:-`dirname $0`/..} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/lmv.sh} + +cleanup() { + echo -n "cln.." + cleanupall ${FORCE} $* || { echo "FAILed to clean up"; exit 20; } +} +CLEANUP=${CLEANUP:-:} + +setup() { + echo -n "mnt.." + load_modules + setupall || exit 10 + echo "done" +} + +SETUP=${SETUP:-:} + +log() { + echo "$*" + $LCTL mark "$*" 2> /dev/null || true +} + +trace() { + log "STARTING: $*" + strace -o $TMP/$1.strace -ttt $* + RC=$? + log "FINISHED: $*: rc $RC" + return 1 +} +TRACE=${TRACE:-""} + +check_kernel_version() { + VERSION_FILE=$LPROC/version + WANT_VER=$1 + [ ! -f $VERSION_FILE ] && echo "can't find kernel version" && return 1 + GOT_VER=$(awk '/kernel:/ {print $2}' $VERSION_FILE) + [ $GOT_VER -ge $WANT_VER ] && return 0 + log "test needs at least kernel version $WANT_VER, running $GOT_VER" + return 1 +} + +_basetest() { + echo $* +} + +basetest() { + IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 +} + +run_one() { + if ! grep -q $DIR /proc/mounts; then + $SETUP + fi + testnum=$1 + message=$2 + BEFORE=`date +%s` + log "== test $testnum: $message= `date +%H:%M:%S` ($BEFORE)" + export TESTNAME=test_$testnum + export tfile=f${testnum} + export tdir=d${base} + test_${testnum} || error "exit with rc=$?" + unset TESTNAME + pass "($((`date +%s` - $BEFORE))s)" + cd $SAVE_PWD + $CLEANUP +} + +build_test_filter() { + [ "$ALWAYS_EXCEPT$EXCEPT$SANITY_EXCEPT" ] && \ + echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITY_EXCEPT`" + + for O in $ONLY; do + eval ONLY_${O}=true + done + for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do + eval EXCEPT_${E}=true + done +} + +_basetest() { + echo $* +} + +basetest() { + IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 +} + +run_test() { + export base=`basetest $1` + if [ "$ONLY" ]; then + testname=ONLY_$1 + if [ ${!testname}x != x ]; then + run_one $1 "$2" + return $? + fi + testname=ONLY_$base + if [ ${!testname}x != x ]; then + run_one $1 "$2" + return $? + fi + echo -n "." + return 0 + fi + testname=EXCEPT_$1 + if [ ${!testname}x != x ]; then + echo "skipping excluded test $1" + return 0 + fi + testname=EXCEPT_$base + if [ ${!testname}x != x ]; then + echo "skipping excluded test $1 (base $base)" + return 0 + fi + run_one $1 "$2" + return $? +} + +[ "$SANITYLOG" ] && rm -f $SANITYLOG || true + +error() { + sysctl -w lustre.fail_loc=0 + log "FAIL: $TESTNAME $@" + $LCTL dk $TMP/lustre-log-$TESTNAME.log + if [ "$SANITYLOG" ]; then + echo "FAIL: $TESTNAME $@" >> $SANITYLOG + else + exit 1 + fi +} + +pass() { + echo PASS $@ +} + +mounted_lustre_filesystems() { + awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts +} + +MOUNTED="`mounted_lustre_filesystems`" +if [ -z "$MOUNTED" ]; then + formatall + setupall + MOUNTED="`mounted_lustre_filesystems`" + [ -z "$MOUNTED" ] && error "NAME=$NAME not mounted" + I_MOUNTED=yes +fi + +[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once" + +DIR=${DIR:-$MOUNT} +[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99 + +LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` +OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd` +STRIPECOUNT=`cat $LPROC/lov/$LOVNAME/stripecount` +STRIPESIZE=`cat $LPROC/lov/$LOVNAME/stripesize` +ORIGFREE=`cat $LPROC/lov/$LOVNAME/kbytesavail` +MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} +MDS=$(\ls $LPROC/mdt 2> /dev/null | grep -v num_refs | tail -n 1) + +[ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo +[ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo +rm -rf $DIR/[Rdfs][1-9]* + +build_test_filter + +if [ "${ONLY}" = "MOUNT" ] ; then + echo "Lustre is up, please go on" + exit +fi + +echo "preparing for tests involving mounts" +EXT2_DEV=${EXT2_DEV:-$TMP/SANITY.LOOP} +touch $EXT2_DEV +mke2fs -j -F $EXT2_DEV 8000 > /dev/null +echo # add a newline after mke2fs. + +umask 077 + +test_0a() { + mkdir $DIR/0a0 || error + for ((i=0;i<5000;i++)); do + mkdir $DIR/0a0/`uuidgen -t` || error + done + rm -rf $DIR/0a0 || error +} +#run_test 0a " create random names =============================" + +test_1a() { + mkdir $DIR/1a0 || error + createmany -o $DIR/1a0/f 5000 || error + rmdir $DIR/1a0 && error + rm -rf $DIR/1a0 || error +} +run_test 1a " remove splitted dir =============================" + +test_1b() { + mkdir $DIR/1b0 || error + createmany -o $DIR/1b0/f 5000 || error + unlinkmany $DIR/1b0/f 5000 || error + NUM=`ls $DIR/1b0 | wc -l` + if [ $NUM -ne 0 ] ; then + echo "dir must be empty" + error + fi + + touch $DIR/1b0/file0 || error + touch $DIR/1b0/file1 || error + touch $DIR/1b0/file2 || error + + echo "3 files left" + rmdir $DIR/1b0 && error + rm -f $DIR/1b0/file0 + + echo "2 files left" + rmdir $DIR/1b0 && error + rm -f $DIR/1b0/file1 + + echo "1 files left" + rmdir $DIR/1b0 && error + rm -f $DIR/1b0/file2 + + echo "0 files left" + rmdir $DIR/1b0 || error +} +run_test 1b " remove splitted dir =============================" + +test_1c() { + mkdir $DIR/1b1 || error + createmany -o $DIR/1b1/f 5000 || error + unlinkmany $DIR/1b1/f 5000 || error + NUM=`ls $DIR/1b1 | wc -l` + if [ $NUM -ne 0 ] ; then + echo "dir must be empty" + error + fi + touch $DIR/1b1/file0 || error + touch $DIR/1b1/file1 || error + touch $DIR/1b1/file2 || error + + ls $DIR/1b1/ + log "3 files left" + rmdir $DIR/1b1 && error + rm -f $DIR/1b1/file0 + + ls $DIR/1b1/ + log "2 files left" + rmdir $DIR/1b1 && error + rm -f $DIR/1b1/file1 + + ls $DIR/1b1/ + log "1 files left" + rmdir $DIR/1b1 && error + rm -f $DIR/1b1/file2 + + ls $DIR/1b1/ + log "0 files left" + rmdir $DIR/1b1 || error +} +run_test 1c " remove splitted cross-node dir =============================" + +test_2a() { + mkdir $DIR/2a0 || error + createmany -o $DIR/2a0/f 5000 || error + NUM=`ls $DIR/2a0 | wc -l` + echo "found $NUM files" + if [ $NUM -ne 5000 ]; then + echo "wrong number of files: $NUM" + error + fi + rm -rf $DIR/2a0 || error +} +run_test 2a " list splitted dir =============================" + +test_2b() { + mkdir $DIR/2b1 || error + createmany -o $DIR/2b1/f 5000 || error + $CLEAN + $START + statmany -l $DIR/2b1/f 5000 5000 || error + statmany -s $DIR/2b1/f 5000 5000 || error + rm -rf $DIR/2b1 || error +} +run_test 2b " list splitted dir after remount =============================" + +test_3a() { + mkdir $DIR/3a0 || error + for i in `seq 100`; do + mkdir $DIR/3a0/d${i} || error + done + createmany -o $DIR/3a0/f 5000 || error + rm -rf $DIR/3a0 || error +} +run_test 3a " dir splitting with cross-ref =============================" + +test_3b() { + mkdir $DIR/3b1 || error + createmany -m $DIR/3b1/f 5000 || error + rm -rf $DIR/3b1 || error +} +run_test 3b " dir splitting via createmany -m =============================" + +test_3c() { + mkdir $DIR/3c1 || error + echo "MDS nodes: $MDSCOUNT" + for j in `seq 3`; do + for i in `seq 10`; do + $LFS dirstripe $DIR/3c1/d-${j}-${i} $j || error + createmany -m $DIR/3c1/d-${j}-${i}/m 200 || error + createmany -o $DIR/3c1/d-${j}-${i}/o 200 || error + done + done + rm -rf $DIR/3c1 || error +} + +#run_test 3c " dir splitting via lfs stripe =============================" + +test_4a() { + let rr=0 + while let "rr < 33000"; do + if let "rr % 2000 == 0"; then + echo "$rr" + fi + mkdir $DIR/4a1 || error + rm -rf $DIR/4a1 + let "rr = rr + 1" + done +} + +## this test is very time-consuming, don't run it by default +#run_test 4a " FIDS/ nlink overflow test =============================" + +TMPDIR=$OLDTMPDIR +TMP=$OLDTMP +HOME=$OLDHOME + +log "cleanup: ======================================================" +if [ "`mount | grep ^$NAME`" ]; then + rm -rf $DIR/[Rdfs][1-9]* +fi +if [ "$I_MOUNTED" = "yes" ]; then + cleanupall -f || error "cleanup failed" +fi + +echo '=========================== finished ===============================' +[ -f "$SANITYLOG" ] && cat $SANITYLOG && exit 1 || true diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index dd1dc88..0379429 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -194,7 +194,7 @@ STRIPECOUNT=`cat $LPROC/lov/$LOVNAME/stripecount` STRIPESIZE=`cat $LPROC/lov/$LOVNAME/stripesize` ORIGFREE=`cat $LPROC/lov/$LOVNAME/kbytesavail` MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} -MDS=$(\ls $LPROC/mds 2> /dev/null | grep -v num_refs | tail -n 1) +MDS=$(\ls $LPROC/mdt 2> /dev/null | grep -v num_refs | tail -n 1) TSTDIR=$DIR/quota_test_dir TSTDIR2=$DIR2/quota_test_dir SHOW_QUOTA_USER="$LFS quota -u $TSTUSR $MOUNT" @@ -617,7 +617,7 @@ test_6() { return 0; fi - LIMIT=$((BUNIT_SZ * (OSTCOUNT + 1) * 5)) # 5 bunits per server + LIMIT=$(($BUNIT_SZ * $(($OSTCOUNT + 1)) * 5)) # 5 bunits per server FILEA="$TSTDIR/quota_tst60_a" FILEB="$TSTDIR/quota_tst60_b" diff --git a/lustre/tests/sanity-sec.sh b/lustre/tests/sanity-sec.sh new file mode 100644 index 0000000..b20c986 --- /dev/null +++ b/lustre/tests/sanity-sec.sh @@ -0,0 +1,370 @@ +#!/bin/bash +# +# Run select tests by setting SEC_ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# + +set -e + +SRCDIR=`dirname $0` +export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin + +SEC_ONLY=${SEC_ONLY:-"$*"} +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""} +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +[ "$ALWAYS_EXCEPT$EXCEPT" ] && \ + echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`" + +TMP=${TMP:-/tmp} +LFS=${LFS:-lfs} +LCTL=${LCTL:-lctl} +RUNAS=${RUNAS:-runas} + +log() { + echo "$*" + $LCTL mark "$*" 2> /dev/null || true +} + +SANITYSECLOG=${SANITYSECLOG:-/tmp/sanity-sec.log} + +[ "$SANITYSECLOG" ] && rm -f $SANITYSECLOG || true + +sec_error() { + log "FAIL: $TESTNAME $@" + if [ "$SANITYSECLOG" ]; then + echo "FAIL: $TESTNAME $@" >> $SANITYSECLOG + else + exit 1 + fi +} + +pass() { + echo PASS $@ +} + +ID1=500 +ID2=501 + +USER1=`cat /etc/passwd|grep :$ID1:$ID1:|cut -d: -f1` +USER2=`cat /etc/passwd|grep :$ID2:$ID2:|cut -d: -f1` + +if [ ! "$USER1" ]; then + echo "===== Please add user1 (uid=$ID1 gid=$ID1)! Skip sanity-sec =====" + sec_error "===== Please add user1 (uid=$ID1 gid=$ID1)! =====" + exit 0 +fi + +if [ ! "$USER2" ]; then + echo "===== Please add user2 (uid=$ID2 gid=$ID2)! Skip sanity-sec =====" + sec_error "===== Please add user2 (uid=$ID2 gid=$ID2)! =====" + exit 0 +fi + +export NAME=${NAME:-local} + +LUSTRE=${LUSTRE:-`dirname $0`/..} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} + +if [ ! -z "$USING_KRB5" ]; then + $RUNAS -u $ID1 krb5_login.sh || exit 1 + $RUNAS -u $ID2 krb5_login.sh || exit 1 +fi + +sec_run_one() { + BEFORE=`date +%s` + log "== test $1 $2= `date +%H:%M:%S` ($BEFORE)" + export TESTNAME=test_$1 + test_$1 || sec_error "exit with rc=$?" + unset TESTNAME + pass "($((`date +%s` - $BEFORE))s)" +} + +build_test_filter() { + for O in $SEC_ONLY; do + eval SEC_ONLY_${O}=true + done + for E in $EXCEPT $ALWAYS_EXCEPT; do + eval EXCEPT_${E}=true + done +} + +_basetest() { + echo $* +} + +basetest() { + IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 +} + +sec_run_test() { + base=`basetest $1` + if [ "$SEC_ONLY" ]; then + testname=SEC_ONLY_$1 + if [ ${!testname}x != x ]; then + sec_run_one $1 "$2" + return $? + fi + testname=SEC_ONLY_$base + if [ ${!testname}x != x ]; then + sec_run_one $1 "$2" + return $? + fi + echo -n "." + return 0 + fi + testname=EXCEPT_$1 + if [ ${!testname}x != x ]; then + echo "skipping excluded test $1" + return 0 + fi + testname=EXCEPT_$base + if [ ${!testname}x != x ]; then + echo "skipping excluded test $1 (base $base)" + return 0 + fi + sec_run_one $1 "$2" + return $? +} + +mounted_lustre_filesystems() { + awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts +} + +MOUNTED="`mounted_lustre_filesystems`" +if [ -z "$MOUNTED" ]; then + formatall + setupall + MOUNTED="`mounted_lustre_filesystems`" + [ -z "$MOUNTED" ] && sec_error "NAME=$NAME not mounted" + S_MOUNTED=yes +fi + +[ `echo $MOUNT | wc -w` -gt 1 ] && sec_error "NAME=$NAME mounted more than once" + +DIR=${DIR:-$MOUNT} +[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && \ + sec_cleanup && exit 99 + +[ `ls -l $LPROC/ldlm 2> /dev/null | grep lustre-MDT | wc -l` -gt 1 ] \ + && echo "skip multi-MDS test" && sec_cleanup && exit 0 + +if [ -z "`lsmod | grep mdt`" ]; then + LOCAL_MDT=0 + echo "remote mdt" + EXCEPT="$EXCEPT 1 3" +else + LOCAL_MDT=1 + echo "local mdt" + EXCEPT="$EXCEPT 1 2 3" +fi + +LPROC=/proc/fs/lustre +ENABLE_IDENTITY=/usr/sbin/l_getidentity +DISABLE_IDENTITY=NONE +LUSTRE_CONF_DIR=/etc/lustre +SETXID_CONF=$LUSTRE_CONF_DIR/setxid.conf +SETXID_CONF_BAK=$LUSTRE_CONF_DIR/setxid.conf.bak + +if [ $LOCAL_MDT -eq 1 ]; then + MDT=$(\ls $LPROC/mdt 2> /dev/null | grep -v num_refs | tail -n 1) + IDENTITY_UPCALL=$LPROC/mdt/$MDT/identity_upcall + IDENTITY_UPCALL_BAK=`more $IDENTITY_UPCALL` + IDENTITY_FLUSH=$LPROC/mdt/$MDT/identity_flush + ROOTSQUASH_UID=$LPROC/mdt/$MDT/rootsquash_uid + ROOTSQUASH_GID=$LPROC/mdt/$MDT/rootsquash_gid + NOSQUASH_NIDS=$LPROC/mdt/$MDT/nosquash_nids +fi + +CLIENT_TYPE=$LPROC/llite/*/client_type +grep "local client" $CLIENT_TYPE > /dev/null 2>&1 && EXCEPT="$EXCEPT 2" +grep "remote client" $CLIENT_TYPE > /dev/null 2>&1 && EXCEPT="$EXCEPT 1 3" + +build_test_filter + +setup() { + if [ -f "$SETXID_CONF" ]; then + mv -f $SETXID_CONF $SETXID_CONF_BAK + else + rm -f $SETXID_CONF_BAK + fi + + if [ $LOCAL_MDT -eq 1 ]; then + echo $ENABLE_IDENTITY > $IDENTITY_UPCALL + echo -1 > $IDENTITY_FLUSH + fi + + $RUNAS -u $ID1 ls $DIR + $RUNAS -u $ID2 ls $DIR +} +setup + +# run as different user +test_0() { + rm -rf $DIR/d0 + mkdir $DIR/d0 + + chown $USER1 $DIR/d0 || sec_error + $RUNAS -u $ID1 ls $DIR || sec_error + $RUNAS -u $ID1 touch $DIR/f0 && sec_error + $RUNAS -u $ID1 touch $DIR/d0/f1 || sec_error + $RUNAS -u $ID2 touch $DIR/d0/f2 && sec_error + touch $DIR/d0/f3 || sec_error + chown root $DIR/d0 + chgrp $USER1 $DIR/d0 + chmod 775 $DIR/d0 + $RUNAS -u $ID1 touch $DIR/d0/f4 || sec_error + $RUNAS -u $ID2 touch $DIR/d0/f5 && sec_error + touch $DIR/d0/f6 || sec_error + + rm -rf $DIR/d0 +} +sec_run_test 0 "uid permission =============================" + +# setuid/gid +test_1() { + rm -rf $DIR/d1 + mkdir $DIR/d1 + + chown $USER1 $DIR/d1 || sec_error + $RUNAS -u $ID2 -v $ID1 touch $DIR/d1/f0 && sec_error + echo "* $ID2 setuid" > $SETXID_CONF + echo "enable uid $ID2 setuid" + echo -1 > $IDENTITY_FLUSH + $RUNAS -u $ID2 -v $ID1 touch $DIR/d1/f1 || sec_error + + chown root $DIR/d1 + chgrp $USER1 $DIR/d1 + chmod 770 $DIR/d1 + $RUNAS -u $ID2 -g $ID2 touch $DIR/d1/f2 && sec_error + echo "* $ID2 setuid,setgid" > $SETXID_CONF + echo "enable uid $ID2 setuid,setgid" + echo -1 > $IDENTITY_FLUSH + $RUNAS -u $ID2 -g $ID2 -j $ID1 touch $DIR/d1/f3 || sec_error + $RUNAS -u $ID2 -v $ID1 -g $ID2 -j $ID1 touch $DIR/d1/f4 || sec_error + + rm -f $SETXID_CONF + rm -rf $DIR/d1 + echo -1 > $IDENTITY_FLUSH +} +sec_run_test 1 "setuid/gid =============================" + +# lfs getfacl/setfacl +test_2() { + rm -rf $DIR/d2 + mkdir $DIR/d2 + chmod 755 $DIR/d2 + echo xxx > $DIR/d2/f0 + chmod 644 $DIR/d2/f0 + + $LFS getfacl $DIR/d2/f0 || sec_error + $RUNAS -u $ID1 cat $DIR/d2/f0 || sec_error + $RUNAS -u $ID1 touch $DIR/d2/f0 && sec_error + + $LFS setfacl -m u:$USER1:w $DIR/d2/f0 || sec_error + $LFS getfacl $DIR/d2/f0 || sec_error + echo "set user $USER1 write permission on file $DIR/d2/fo" + $RUNAS -u $ID1 touch $DIR/d2/f0 || sec_error + $RUNAS -u $ID1 cat $DIR/d2/f0 && sec_error + + rm -rf $DIR/d2 +} +sec_run_test 2 "lfs getfacl/setfacl =============================" + +# rootsquash +test_3() { + $LCTL conf_param $MDT.mdt.nosquash_nids=none + while grep LNET_NID_ANY $NOSQUASH_NIDS > /dev/null; do sleep 1; done + $LCTL conf_param $MDT.mdt.rootsquash_uid=0 + while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done + $LCTL conf_param $MDT.mdt.rootsquash_gid=0 + while [ "`cat $ROOTSQUASH_GID`" -ne 0 ]; do sleep 1; done + + rm -rf $DIR/d3 + mkdir $DIR/d3 + chown $USER1 $DIR/d3 + chmod 700 $DIR/d3 + $LCTL conf_param $MDT.mdt.rootsquash_uid=$ID1 + echo "set rootsquash uid = $ID1" + while [ "`cat $ROOTSQUASH_UID`" -ne $ID1 ]; do sleep 1; done + touch $DIR/f3_0 && sec_error + touch $DIR/d3/f3_1 || sec_error + + $LCTL conf_param $MDT.mdt.rootsquash_uid=0 + echo "disable rootsquash" + while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done + chown root $DIR/d3 + chgrp $USER2 $DIR/d3 + chmod 770 $DIR/d3 + + $LCTL conf_param $MDT.mdt.rootsquash_uid=$ID1 + echo "set rootsquash uid = $ID1" + while [ "`cat $ROOTSQUASH_UID`" -ne $ID1 ]; do sleep 1; done + touch $DIR/d3/f3_2 && sec_error + $LCTL conf_param $MDT.mdt.rootsquash_gid=$ID2 + echo "set rootsquash gid = $ID2" + while [ "`cat $ROOTSQUASH_GID`" -ne $ID2 ]; do sleep 1; done + touch $DIR/d3/f3_3 || sec_error + + $LCTL conf_param $MDT.mdt.nosquash_nids=* + echo "add host in rootsquash skip list" + while ! grep LNET_NID_ANY $NOSQUASH_NIDS > /dev/null; + do sleep 1; + done + touch $DIR/f3_4 || sec_error + + $LCTL conf_param $MDT.mdt.rootsquash_uid=0 + while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done + $LCTL conf_param $MDT.mdt.rootsquash_gid=0 + while [ "`cat $ROOTSQUASH_GID`" -ne 0 ]; do sleep 1; done + $LCTL conf_param $MDT.mdt.nosquash_nids=none + rm -rf $DIR/d3 + rm -f $DIR/f3_? +} +sec_run_test 3 "rootsquash =============================" + +# bug 3285 - supplementary group should always succeed (see do_init_ucred), +# NB: the supplementary groups are set for local client only, as for remote +# client, the groups of the specified uid on MDT will be obtained by +# upcall /sbin/l_getidentity and used. +test_4() { + mkdir $DIR/d4 + chmod 771 $DIR/d4 + chgrp $ID1 $DIR/d4 + $RUNAS -u $ID1 ls $DIR/d4 || sec_error "setgroups(1) failed" + grep "local client" $CLIENT_TYPE > /dev/null 2>&1 && \ + ($RUNAS -u $ID2 -G1,2,$ID1 ls $DIR/d4 || \ + sec_error "setgroups(2) failed") + $RUNAS -u $ID2 -G1,2 ls $DIR/d4 && sec_error "setgroups(3) failed" + rm -rf $DIR/d4 +} +sec_run_test 4 "set supplementary group ===============" + +log "cleanup: ======================================================" + +unsetup() { + if [ -f "$SETXID_CONF_BAK" ]; then + mv -f $SETXID_CONF_BAK $SETXID_CONF + fi + + if [ $LOCAL_MDT -eq 1 ]; then + echo $IDENTITY_UPCALL_BAK > $IDENTITY_UPCALL + echo -1 > $IDENTITY_FLUSH + fi + + $RUNAS -u $ID1 ls $DIR + $RUNAS -u $ID2 ls $DIR +} +unsetup + +sec_cleanup() { + if [ "$S_MOUNTED" = "yes" ]; then + cleanupall -f || sec_error "cleanup failed" + fi +} +sec_cleanup + +echo '=========================== finished ===============================' +[ -f "$SANITYSECLOG" ] && cat $SANITYSECLOG && exit 1 || true diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 39db040..53efa0b 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -9,19 +9,23 @@ set -e ONLY=${ONLY:-"$*"} # bug number for skipped test: 4900 4900 2108 9789 3637 9789 3561 5188/5749 10764 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27o 27q 42a 42b 42c 42d 45 68 75"} +# bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749 1443 +#ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27m 42a 42b 42c 42d 45 68 76"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 36f 36g 51b 51c 63 64b 71 73 77 101 115" +[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 36f 36g 51b 51c 63 64b 71 73 101 115" -# Tests that fail on uml, maybe elsewhere, FIXME +# Tests that fail on uml CPU=`awk '/model/ {print $4}' /proc/cpuinfo` # buffer i/o errs sock spc runas [ "$CPU" = "UML" ] && EXCEPT="$EXCEPT 27m 27n 27o 27p 27q 27r 31d 54a 64b 99a 99b 99c 99d 99e 99f 101" +# test76 is not valid with FIDs because inode numbers are not reused +ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" + case `uname -r` in -2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" - [ "$CPU" = "UML" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 105a";; -2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT " ;; +2.4*) FSTYPE=${FSTYPE:-ext3} ;; +2.6*) FSTYPE=${FSTYPE:-ldiskfs} ;; *) error "unsupported kernel" ;; esac @@ -55,6 +59,7 @@ MEMHOG=${MEMHOG:-memhog} DIRECTIO=${DIRECTIO:-directio} ACCEPTOR_PORT=${ACCEPTOR_PORT:-988} UMOUNT=${UMOUNT:-"umount -d"} +STRIPES_PER_OBJ=-1 if [ $UID -ne 0 ]; then echo "Warning: running as non-root uid $UID" @@ -80,7 +85,12 @@ SAVE_PWD=$PWD LUSTRE=${LUSTRE:-`dirname $0`/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} + +if [ ! -z "$USING_KRB5" ]; then + $RUNAS krb5_login.sh || exit 1 + $RUNAS -u $(($RUNAS_ID + 1)) krb5_login.sh || exit 1 +fi cleanup() { echo -n "cln.." @@ -228,8 +238,6 @@ if [ -z "$MOUNTED" ]; then I_MOUNTED=yes fi -[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once" - DIR=${DIR:-$MOUNT} [ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99 @@ -239,7 +247,7 @@ STRIPECOUNT=`cat $LPROC/lov/$LOVNAME/stripecount` STRIPESIZE=`cat $LPROC/lov/$LOVNAME/stripesize` ORIGFREE=`cat $LPROC/lov/$LOVNAME/kbytesavail` MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} -MDS=$(\ls $LPROC/mds 2> /dev/null | grep -v num_refs | tail -n 1) +MDS=$(\ls $LPROC/mdt 2> /dev/null | grep -v num_refs | tail -n 1) [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo @@ -247,6 +255,11 @@ rm -rf $DIR/[Rdfs][1-9]* build_test_filter +if [ "${ONLY}" = "MOUNT" ] ; then + echo "Lustre is up, please go on" + exit +fi + echo "preparing for tests involving mounts" EXT2_DEV=${EXT2_DEV:-$TMP/SANITY.LOOP} touch $EXT2_DEV @@ -272,6 +285,7 @@ run_test 0b "chmod 0755 $DIR =============================" test_1a() { mkdir $DIR/d1 mkdir $DIR/d1/d2 + mkdir $DIR/d1/d2 && error "we expect EEXIST, but not returned" $CHECKSTAT -t dir $DIR/d1/d2 || error } run_test 1a "mkdir .../d1; mkdir .../d1/d2 =====================" @@ -546,6 +560,18 @@ test_17d() { } run_test 17d "symlinks: create dangling ========================" +test_17f() { + mkdir -p $DIR/d17f + ln -s 1234567890/2234567890/3234567890/4234567890 $DIR/d17f/111 + ln -s 1234567890/2234567890/3234567890/4234567890/5234567890/6234567890 $DIR/d17f/222 + ln -s 1234567890/2234567890/3234567890/4234567890/5234567890/6234567890/7234567890/8234567890 $DIR/d17f/333 + ln -s 1234567890/2234567890/3234567890/4234567890/5234567890/6234567890/7234567890/8234567890/9234567890/a234567890/b234567890 $DIR/d17f/444 + ln -s 1234567890/2234567890/3234567890/4234567890/5234567890/6234567890/7234567890/8234567890/9234567890/a234567890/b234567890/c234567890/d234567890/f234567890 $DIR/d17f/555 + ln -s 1234567890/2234567890/3234567890/4234567890/5234567890/6234567890/7234567890/8234567890/9234567890/a234567890/b234567890/c234567890/d234567890/f234567890/aaaaaaaaaa/bbbbbbbbbb/cccccccccc/dddddddddd/eeeeeeeeee/ffffffffff/ $DIR/d17f/666 + ls -l $DIR/d17f +} +run_test 17f "symlinks: long and very long symlink name ========================" + test_18() { touch $DIR/f ls $DIR || error @@ -602,15 +628,15 @@ test_21() { run_test 21 "write to dangling link ============================" test_22() { - mkdir $DIR/d22 - chown $RUNAS_ID $DIR/d22 - # Tar gets pissy if it can't access $PWD *sigh* - (cd $TMP || error "cd $TMP failed"; + WDIR=$DIR/$tdir + mkdir $WDIR + chown $RUNAS_ID $WDIR + (cd $WDIR || error "cd $WDIR failed"; $RUNAS tar cf - /etc/hosts /etc/sysconfig/network | \ - $RUNAS tar xfC - $DIR/d22) - ls -lR $DIR/d22/etc || error "ls -lR $DIR/d22/etc failed" - $CHECKSTAT -t dir $DIR/d22/etc || error "checkstat -t dir failed" - $CHECKSTAT -u \#$RUNAS_ID $DIR/d22/etc || error "checkstat -u failed" + $RUNAS tar xf -) + ls -lR $WDIR/etc || error "ls -lR $WDIR/etc failed" + $CHECKSTAT -t dir $WDIR/etc || error "checkstat -t dir failed" + $CHECKSTAT -u \#$RUNAS_ID $WDIR/etc || error "checkstat -u failed" } run_test 22 "unpack tar archive as non-root user ===============" @@ -684,7 +710,7 @@ test_24g() { $CHECKSTAT -a $DIR/R7a/d || error $CHECKSTAT -t dir $DIR/R7b/e || error } -run_test 24g "mkdir .../R7{a,b}/d; mv .../R7a/d .../R5b/e ======" +run_test 24g "mkdir .../R7{a,b}/d; mv .../R7a/d .../R7b/e ======" test_24h() { mkdir $DIR/R8{a,b} @@ -720,8 +746,8 @@ test_24k() { mkdir $DIR/R11a $DIR/R11a/d touch $DIR/R11a/f mv $DIR/R11a/f $DIR/R11a/d - $CHECKSTAT -a $DIR/R11a/f || error - $CHECKSTAT -t file $DIR/R11a/d/f || error + $CHECKSTAT -a $DIR/R11a/f || error + $CHECKSTAT -t file $DIR/R11a/d/f || error } run_test 24k "touch .../R11a/f; mv .../R11a/f .../R11a/d =======" @@ -886,7 +912,7 @@ test_27a() { $SETSTRIPE $DIR/d27/f0 65536 0 1 || error "lstripe failed" $CHECKSTAT -t file $DIR/d27/f0 || error "checkstat failed" pass - log "== test_27b: write to one stripe file =========================" + log "== test_27a: write to one stripe file =========================" cp /etc/hosts $DIR/d27/f0 || error } run_test 27a "one stripe file ==================================" @@ -992,6 +1018,7 @@ test_27m() { [ `$GETSTRIPE $DIR/d27/f27m_$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "0"` ] && \ error "OST0 was full but new created file still use it" rm -r $DIR/d27 + sleep 15 } run_test 27m "create file while OST0 was full ==================" @@ -1008,16 +1035,17 @@ exhaust_precreations() { OST=$(grep ${OSTIDX}": " $LPROC/lov/${LOVNAME}/target_obd | \ awk '{print $2}' | sed -e 's/_UUID$//') # on the mdt's osc - last_id=$(cat $LPROC/osc/${OST}-osc/prealloc_last_id) - next_id=$(cat $LPROC/osc/${OST}-osc/prealloc_next_id) + OSC=$(ls $LPROC/osc | grep "${OST}-osc-MDT0000") + last_id=$(cat $LPROC/osc/${OSC}/prealloc_last_id) + next_id=$(cat $LPROC/osc/${OSC}/prealloc_next_id) - mkdir -p $DIR/d27/${OST} + mkdir -p $DIR/d27/${OST} $SETSTRIPE $DIR/d27/${OST} 0 $OSTIDX 1 #define OBD_FAIL_OST_ENOSPC 0x215 sysctl -w lustre.fail_loc=0x215 echo "Creating to objid $last_id on ost $OST..." createmany -o $DIR/d27/${OST}/f $next_id $((last_id - next_id + 2)) - grep '[0-9]' $LPROC/osc/${OST}-osc/prealloc* + grep '[0-9]' $LPROC/osc/${OSC}/prealloc* reset_enospc $2 } @@ -1054,6 +1082,7 @@ test_27o() { touch $DIR/d27/f27o && error "able to create $DIR/d27/f27o" reset_enospc + rm -rf $DIR/d27/* } run_test 27o "create file with all full OSTs (should error) ====" @@ -1108,9 +1137,9 @@ test_27r() { run_test 27r "stripe file with some full OSTs (shouldn't LBUG) =" test_27s() { # bug 10725 - mkdir -p $DIR/$tdir - $LSTRIPE $DIR/$tdir $((2048 * 1024 * 1024)) -1 2 && \ - error "stripe width >= 2^32 succeeded" || true + mkdir -p $DIR/$tdir + $LSTRIPE $DIR/$tdir $((2048 * 1024 * 1024)) -1 2 && \ + error "stripe width >= 2^32 succeeded" || true } run_test 27s "lsm_xfersize overflow (should error) (bug 10725)" @@ -1154,7 +1183,7 @@ test_29() { touch $DIR/d29/foo log 'first d29' ls -l $DIR/d29 - MDCDIR=${MDCDIR:-`find $LPROC/ldlm/namespaces | grep mdc | head -1`} + MDCDIR=${MDCDIR:-$LPROC/ldlm/namespaces/*-mdc-*} LOCKCOUNTORIG=`cat $MDCDIR/lock_count` LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count` [ -z $"LOCKCOUNTORIG" ] && echo "No mdc lock count" && return 1 @@ -1180,7 +1209,7 @@ test_29() { run_test 29 "IT_GETATTR regression ============================" test_30() { - cp `which ls` $DIR + cp `which ls` $DIR || cp /bin/ls $DIR $DIR/ls / rm $DIR/ls } @@ -1258,6 +1287,87 @@ test_31f() { # bug 4554 } run_test 31f "remove of open directory with open-unlink file ===" +test_31g() { + echo "-- cross directory link --" + mkdir $DIR/d31g{a,b} + touch $DIR/d31ga/f + ln $DIR/d31ga/f $DIR/d31gb/g + $CHECKSTAT -t file $DIR/d31ga/f || error "source" + [ `stat -c%h $DIR/d31ga/f` == '2' ] || error "source nlink" + $CHECKSTAT -t file $DIR/d31gb/g || error "target" + [ `stat -c%h $DIR/d31gb/g` == '2' ] || error "target nlink" +} +run_test 31g "cross directory link===============" + +test_31h() { + echo "-- cross directory link --" + mkdir $DIR/d31h + mkdir $DIR/d31h/dir + touch $DIR/d31h/f + ln $DIR/d31h/f $DIR/d31h/dir/g + $CHECKSTAT -t file $DIR/d31h/f || error "source" + [ `stat -c%h $DIR/d31h/f` == '2' ] || error "source nlink" + $CHECKSTAT -t file $DIR/d31h/dir/g || error "target" + [ `stat -c%h $DIR/d31h/dir/g` == '2' ] || error "target nlink" +} +run_test 31h "cross directory link under child===============" + +test_31i() { + echo "-- cross directory link --" + mkdir $DIR/d31i + mkdir $DIR/d31i/dir + touch $DIR/d31i/dir/f + ln $DIR/d31i/dir/f $DIR/d31i/g + $CHECKSTAT -t file $DIR/d31i/dir/f || error "source" + [ `stat -c%h $DIR/d31i/dir/f` == '2' ] || error "source nlink" + $CHECKSTAT -t file $DIR/d31i/g || error "target" + [ `stat -c%h $DIR/d31i/g` == '2' ] || error "target nlink" +} +run_test 31i "cross directory link under parent===============" + + +test_31j() { + mkdir $DIR/d31j + mkdir $DIR/d31j/dir1 + ln $DIR/d31j/dir1 $DIR/d31j/dir2 && error "ln for dir" + link $DIR/d31j/dir1 $DIR/d31j/dir3 && error "link for dir" + mlink $DIR/d31j/dir1 $DIR/d31j/dir4 && error "mlink for dir" + mlink $DIR/d31j/dir1 $DIR/d31j/dir1 && error "mlink to the same dir" + return 0 +} +run_test 31j "link for directory===============" + + +test_31k() { + mkdir $DIR/d31k + touch $DIR/d31k/s + touch $DIR/d31k/exist + mlink $DIR/d31k/s $DIR/d31k/t || error "mlink" + mlink $DIR/d31k/s $DIR/d31k/exist && error "mlink to exist file" + mlink $DIR/d31k/s $DIR/d31k/s && error "mlink to the same file" + mlink $DIR/d31k/s $DIR/d31k && error "mlink to parent dir" + mlink $DIR/d31k $DIR/d31k/s && error "mlink parent dir to target" + mlink $DIR/d31k/not-exist $DIR/d31k/foo && error "mlink non-existing to new" + mlink $DIR/d31k/not-exist $DIR/d31k/s && error "mlink non-existing to exist" + return 0 +} +run_test 31k "link to file: the same, non-existing, dir===============" + +test_31m() { + mkdir $DIR/d31m + touch $DIR/d31m/s + mkdir $DIR/d31m2 + touch $DIR/d31m2/exist + mlink $DIR/d31m/s $DIR/d31m2/t || error "mlink" + mlink $DIR/d31m/s $DIR/d31m2/exist && error "mlink to exist file" + mlink $DIR/d31m/s $DIR/d31m2 && error "mlink to parent dir" + mlink $DIR/d31m2 $DIR/d31m/s && error "mlink parent dir to target" + mlink $DIR/d31m/not-exist $DIR/d31m2/foo && error "mlink non-existing to new" + mlink $DIR/d31m/not-exist $DIR/d31m2/s && error "mlink non-existing to exist" + return 0 +} +run_test 31m "link to file: the same, non-existing, dir===============" + test_32a() { echo "== more mountpoints and symlinks =================" [ -e $DIR/d32a ] && rm -fr $DIR/d32a @@ -1614,7 +1724,9 @@ test_36f() { } run_test 36f "utime on file racing with OST BRW write ==========" -export FMD_MAX_AGE=`cat $LPROC/obdfilter/*/client_cache_seconds 2> /dev/null | head -n 1` +if [ -d $LPROC/obdfilter ]; then +export FMD_MAX_AGE=`cat $LPROC/obdfilter/*/client_cache_seconds | head -n 1` +fi test_36g() { [ -z "$FMD_MAX_AGE" ] && echo "skip $TESTNAME for remote OST" && return FMD_BEFORE="`awk '/ll_fmd_cache/ { print $2 }' /proc/slabinfo`" @@ -1709,6 +1821,7 @@ stop_writeback() { if [ -f /proc/sys/vm/dirty_writeback_centisecs ]; then WRITEBACK_SAVE=`cat /proc/sys/vm/dirty_writeback_centisecs` echo 0 > /proc/sys/vm/dirty_writeback_centisecs + echo 0 > /proc/sys/vm/dirty_writeback_centisecs # save and increase /proc/sys/vm/dirty_ratio DIRTY_RATIO_SAVE=`cat /proc/sys/vm/dirty_ratio` echo $MAX_DIRTY_RATIO > /proc/sys/vm/dirty_ratio @@ -1739,8 +1852,8 @@ test_42a() { stop_writeback sync; sleep 1; sync # just to be safe BEFOREWRITES=`count_ost_writes` - grep "[0-9]" $LPROC/osc/*[oO][sS][cC]*/cur_grant_bytes - dd if=/dev/zero of=$DIR/f42a bs=1024 count=100 + grep "[0-9]" $LPROC/osc/*[oO][sS][cC][_-]*/cur_grant_bytes + dd if=/dev/zero of=$DIR/f42a bs=1024 count=100 AFTERWRITES=`count_ost_writes` [ $BEFOREWRITES -eq $AFTERWRITES ] || \ error "$BEFOREWRITES < $AFTERWRITES" @@ -1823,15 +1936,15 @@ run_test 42d "test complete truncate of file with cached dirty data" test_43() { mkdir $DIR/$tdir cp -p /bin/ls $DIR/$tdir/$tfile - exec 100>> $DIR/$tdir/$tfile + exec 9>> $DIR/$tdir/$tfile $DIR/$tdir/$tfile && error || true - exec 100<&- + exec 9<&- } run_test 43 "execution of file opened for write should return -ETXTBSY" test_43a() { mkdir -p $DIR/d43 - cp -p `which multiop` $DIR/d43/multiop + cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop $DIR/d43/multiop $TMP/test43.junk O_c & MULTIPID=$! sleep 1 @@ -1844,7 +1957,7 @@ run_test 43a "open(RDWR) of file being executed should return -ETXTBSY" test_43b() { mkdir -p $DIR/d43 - cp -p `which multiop` $DIR/d43/multiop + cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop $DIR/d43/multiop $TMP/test43.junk O_c & MULTIPID=$! sleep 1 @@ -1876,7 +1989,7 @@ test_44a() { awk '{print $2}'` local stride=`$LCTL lov_getconfig $DIR | grep default_stripe_size: | \ awk '{print $2}'` - if [ $nstripe -eq 0 ] ; then + if [ $nstripe -eq 0 -o $nstripe -gt 1024 ] ; then nstripe=`$LCTL lov_getconfig $DIR | grep obd_count: | awk '{print $2}'` fi [ -z "$nstripe" ] && error "can't get stripe info" @@ -1884,21 +1997,23 @@ test_44a() { OFFSETS="0 $((stride/2)) $((stride-1))" for offset in $OFFSETS ; do for i in `seq 0 $((nstripe-1))`; do - rm -f $DIR/d44a local GLOBALOFFSETS="" local size=$((((i + 2 * $nstripe )*$stride + $offset))) # Bytes - ll_sparseness_write $DIR/d44a $size || error "ll_sparseness_write" + local myfn=$DIR/d44a-$size + echo "--------writing $myfn at $size" + ll_sparseness_write $myfn $size || error "ll_sparseness_write" GLOBALOFFSETS="$GLOBALOFFSETS $size" - ll_sparseness_verify $DIR/d44a $GLOBALOFFSETS \ + ll_sparseness_verify $myfn $GLOBALOFFSETS \ || error "ll_sparseness_verify $GLOBALOFFSETS" for j in `seq 0 $((nstripe-1))`; do size=$((((j + $nstripe )*$stride + $offset))) # Bytes - ll_sparseness_write $DIR/d44a $size || error "ll_sparseness_write" + ll_sparseness_write $myfn $size || error "ll_sparseness_write" GLOBALOFFSETS="$GLOBALOFFSETS $size" done - ll_sparseness_verify $DIR/d44a $GLOBALOFFSETS \ + ll_sparseness_verify $myfn $GLOBALOFFSETS \ || error "ll_sparseness_verify $GLOBALOFFSETS" + rm -f $myfn done done } @@ -2100,7 +2215,10 @@ test_51() { } run_test 51 "special situations: split htree with empty entry ==" -export NUMTEST=70000 +#export NUMTEST=70000 +# FIXME: I select a relatively small number to do basic test. +# large number may give panic(). debugging on this is going on. +export NUMTEST=70 test_51b() { NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` [ $NUMFREE -lt 21000 ] && \ @@ -2190,9 +2308,8 @@ test_52b() { run_test 52b "immutable flag test (should return errors) =======" test_53() { - [ -z "$MDS" ] && echo "skipping $TESTNAME with remote MDS" && return - - for i in `ls -d $LPROC/osc/*-osc 2> /dev/null` ; do + # only test MDT0000 + for i in `ls -d $LPROC/osc/*-osc-MDT0000 2> /dev/null` ; do ostname=`basename $i | cut -d - -f 1-2` ost_last=`cat $LPROC/obdfilter/$ostname/last_id` mds_last=`cat $i/prealloc_last_id` @@ -2403,6 +2520,7 @@ test_56h() { run_test 56h "check lfs find ! -name =============================" test_57a() { + # note test will not do anything if MDS is not local [ -z "$MDS" ] && echo "skipping $TESTNAME for remote MDS" && return for DEV in `cat $LPROC/mds/*/mntdev`; do dumpe2fs -h $DEV > $TMP/t57a.dump || error "can't access $DEV" @@ -2439,16 +2557,16 @@ test_57b() { $GETSTRIPE $FILEN | grep -q "obdidx" || error "$FILEN missing EA" sleep 1 # make sure we get new statfs data - MDSFREE2="`cat $LPROC/mds/*/kbytesfree 2> /dev/null`" - MDCFREE2="`cat $LPROC/mdc/*/kbytesfree | head -n 1`" - if [ "$MDCFREE2" -lt "$((MDCFREE - 8))" ]; then - if [ "$MDSFREE" != "$MDSFREE2" ]; then - error "MDC before $MDCFREE != after $MDCFREE2" - else - echo "MDC before $MDCFREE != after $MDCFREE2" - echo "unable to confirm if MDS has large inodes" - fi - fi +# MDSFREE2="`cat $LPROC/mds/*/kbytesfree`" +# MDCFREE2="`cat $LPROC/mdc/*/kbytesfree`" +# if [ "$MDCFREE2" -lt "$((MDCFREE - 8))" ]; then +# if [ "$MDSFREE" != "$MDSFREE2" ]; then +# error "MDC before $MDCFREE != after $MDCFREE2" +# else +# echo "MDC before $MDCFREE != after $MDCFREE2" +# echo "unable to confirm if MDS has large inodes" +# fi +# fi rm -rf $DIR/d57b } run_test 57b "default LOV EAs are stored inside large inodes ===" @@ -2471,12 +2589,12 @@ test_59() { run_test 59 "verify cancellation of llog records async =========" TEST60_HEAD="test_60 run $RANDOM" -test_60() { +test_60a() { [ ! -f run-llog.sh ] && echo "missing subtest, skipping" && return log "$TEST60_HEAD - from kernel mode" - sh run-llog.sh +# sh run-llog.sh } -run_test 60 "llog sanity tests run from kernel module ==========" +run_test 60a "llog sanity tests run from kernel module ==========" test_60b() { # bug 6411 dmesg > $DIR/$tfile @@ -2484,6 +2602,16 @@ test_60b() { # bug 6411 [ $LLOG_COUNT -gt 50 ] && error "CDEBUG_LIMIT not limiting messages"|| true } run_test 60b "limit repeated messages from CERROR/CWARN ========" + +test_60c() { + echo "create 5000 files" + createmany -o $DIR/f60c- 5000 +#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x13c + sysctl -w lustre.fail_loc=0x8000013c + unlinkmany $DIR/f60c- 5000 + sysctl -w lustre.fail_loc=0 +} +run_test 60c "unlink file when mds full" test_61() { f="$DIR/f61" @@ -2552,7 +2680,7 @@ run_test 63b "async write errors should be returned to fsync ===" test_64a () { df $DIR - grep "[0-9]" $LPROC/osc/*[oO][sS][cC]*/cur* + grep "[0-9]" $LPROC/osc/*[oO][sS][cC][_-]*/cur* } run_test 64a "verify filter grant calculations (in kernel) =====" @@ -2589,7 +2717,14 @@ test_65c() { } run_test 65c "directory setstripe $(($STRIPESIZE * 4)) 1 $(($OSTCOUNT - 1))" -[ $STRIPECOUNT -eq 0 ] && sc=1 || sc=$(($STRIPECOUNT - 1)) +if [ $STRIPECOUNT -eq 0 ]; then + sc=1 +elif [ $STRIPECOUNT -gt 160 ]; then +#LOV_MAX_STRIPE_COUNT is 160, 4294967295(-1) is included. + [ $OSTCOUNT -gt 160 ] && sc=160 || sc=$(($OSTCOUNT - 1)) +else + sc=$(($STRIPECOUNT - 1)) +fi test_65d() { mkdir -p $DIR/d65 @@ -2658,52 +2793,10 @@ test_66() { } run_test 66 "update inode blocks count on client ===============" -test_67() { # bug 3285 - supplementary group fails on MDS, passes on client - [ "$RUNAS_ID" = "$UID" ] && echo "skipping $TESTNAME" && return - check_kernel_version 35 || return 0 - mkdir $DIR/$tdir - chmod 771 $DIR/$tdir - chgrp $RUNAS_ID $DIR/$tdir - $RUNAS -u $RUNAS_ID -g $(($RUNAS_ID + 1)) -G1,2,$RUNAS_ID ls $DIR/$tdir - RC=$? - if [ "$MDS" ]; then - # can't tell which is correct otherwise - GROUP_UPCALL=`cat $LPROC/mds/$MDS/group_upcall` - [ "$GROUP_UPCALL" = "NONE" -a $RC -eq 0 ] && \ - error "no-upcall passed" || true - [ "$GROUP_UPCALL" != "NONE" -a $RC -ne 0 ] && \ - error "upcall failed" || true - fi +test_67() { + sh sanity-sec.sh } -run_test 67 "supplementary group failure (should return error) =" - -cleanup_67b() { - trap 0 - echo NONE > $LPROC/mds/$MDS/group_upcall - set +vx -} - -test_67b() { # bug 3285 - supplementary group fails on MDS, passes on client - T67_UID=${T67_UID:-1} # needs to be in /etc/groups on MDS, gid == uid - [ "$UID" = "$T67_UID" ] && echo "skipping $TESTNAME" && return - check_kernel_version 35 || return 0 - [ -z "$MDS" ] && echo "skipping $TESTNAME - no MDS" && return - GROUP_UPCALL=`cat $LPROC/mds/$MDS/group_upcall` - [ "$GROUP_UPCALL" != "NONE" ] && echo "skip $TESTNAME - upcall" &&return - set -vx - trap cleanup_67b EXIT - mkdir -p $DIR/$tdir - chmod 771 $DIR/$tdir - chgrp $T67_UID $DIR/$tdir - echo `which l_getgroups` > $LPROC/mds/$MDS/group_upcall - l_getgroups -d $T67_UID - $RUNAS -u $T67_UID -g $((T67_UID + 1)) -G8,9 id - $RUNAS -u $T67_UID -g 999 -G8,9,$T67_UID touch $DIR/$tdir/$tfile || \ - error "'touch $DIR/$tdir/$tfile' failed" - [ -f $DIR/$tdir/$tfile ] || error "$DIR/$tdir/$tfile create error" - cleanup_67b -} -run_test 67b "supplementary group test =========================" +run_test 67 "security test =====================================" cleanup_68() { trap 0 @@ -2727,8 +2820,8 @@ swap_used() { # and then consuming memory until it is used. test_68() { [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return - [ "`lsmod|grep obdfilter`" ] && echo "skipping $TESTNAME (local OST)" && \ - return + grep -q obdfilter $LPROC/devices && \ + echo "skip $TESTNAME (local OST)" && return find_loop_dev dd if=/dev/zero of=$DIR/f68 bs=64k count=1024 @@ -2756,6 +2849,8 @@ run_test 68 "support swapping to Lustre ========================" test_69() { [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && echo "skipping $TESTNAME for remote OST" && return + [ ! -z "$USING_KRB5" ] && + echo "skipping $TESTNAME (gss with bulk security will triger oops. re-enable this after b10091 get fixed)" && return f="$DIR/$tfile" touch $f @@ -2814,6 +2909,8 @@ run_test 71 "Running dbench on lustre (don't segment fault) ====" test_72() { # bug 5695 - Test that on 2.6 remove_suid works properly check_kernel_version 43 || return 0 [ "$RUNAS_ID" = "$UID" ] && echo "skipping $TESTNAME" && return + # We had better clear the $DIR to get enough space for dd + rm -rf $DIR/* touch $DIR/f72 chmod 777 $DIR/f72 chmod ug+s $DIR/f72 @@ -2887,6 +2984,9 @@ test75_prep() { } test_75a() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return test75_prep cp -p ${F128k} ${FHEAD} @@ -2904,6 +3004,9 @@ test_75a() { run_test 75a "TEST join file ====================================" test_75b() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return test75_prep cp -p ${F128k} ${FTAIL} @@ -2917,6 +3020,9 @@ test_75b() { run_test 75b "TEST join file 2 ==================================" test_75c() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return test75_prep cp -p ${F128k} ${FTAIL} @@ -2930,6 +3036,9 @@ test_75c() { run_test 75c "TEST join file 3 ==================================" test_75d() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return test75_prep cp -p ${F128k} ${FHEAD} @@ -2944,6 +3053,9 @@ test_75d() { run_test 75d "TEST join file 4 ==================================" test_75e() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return test75_prep rm -rf ${FHEAD} || "delete join file error" @@ -2951,6 +3063,9 @@ test_75e() { run_test 75e "TEST join file 5 (remove joined file) =============" test_75f() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return test75_prep cp -p ${F128k} ${F75}_join_10_compare @@ -2968,6 +3083,9 @@ test_75f() { run_test 75f "TEST join file 6 (join 10 files) ==================" test_75g() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return [ ! -f ${F75}_join_10 ] && echo "${F75}_join_10 missing" && return $LFS getstripe ${F75}_join_10 @@ -2982,7 +3100,9 @@ num_inodes() { } test_76() { # bug 1443 - BEFORE_INODES=`num_inodes` + DETH=$(grep deathrow /proc/kallsyms /proc/ksyms 2> /dev/null | wc -l) + [ $DETH -eq 0 ] && echo "No _iget, skipping" && return 0 + BEFORE_INODES=`num_inodes` echo "before inodes: $BEFORE_INODES" for i in `seq 1000`; do touch $DIR/$tfile @@ -2996,109 +3116,139 @@ test_76() { # bug 1443 } run_test 76 "destroy duplicate inodes in client inode cache ====" +export ORIG_CSUM="" +set_checksums() +{ + [ "$ORIG_CSUM" ]||ORIG_CSUM=`cat $LPROC/llite/*/checksum_pages|head -n1` + for f in $LPROC/llite/*/checksum_pages; do + echo $1 >> $f + done + + return 0 +} + F77_TMP=$TMP/f77-temp +F77SZ=8 +setup_f77() { + dd if=/dev/urandom of=$F77_TMP bs=1M count=$F77SZ || \ + error "error writing to $F77_TMP" +} + test_77a() { # bug 10889 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done - if [ ! -f $F77_TMP ]; then - dd if=/dev/urandom of=$F77_TMP bs=1M count=8 || \ - error "error writing to $F77_TMP" - fi - dd if=$F77_TMP of=$DIR/$tfile bs=1M count=8 || error "dd error" - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + [ ! -f $F77_TMP ] && setup_f77 + set_checksums 1 + dd if=$F77_TMP of=$DIR/$tfile bs=1M count=$F77SZ || error "dd error" + set_checksums 0 } run_test 77a "normal checksum read/write operation =============" test_77b() { # bug 10889 - [ ! -f $F77_TMP ] && echo "requires 77a" && return + [ ! -f $F77_TMP ] && setup_f77 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 sysctl -w lustre.fail_loc=0x80000409 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done - dd if=$F77_TMP of=$DIR/f77b bs=8M count=1 conv=sync || \ - error "write error: rc=$?" + set_checksums 1 + dd if=$F77_TMP of=$DIR/f77b bs=1M count=$F77SZ conv=sync || \ + error "dd error: $?" sysctl -w lustre.fail_loc=0 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + set_checksums 0 } run_test 77b "checksum error on client write ====================" test_77c() { # bug 10889 - [ ! -f $F77_TMP ] && echo "requires 77a" && return + [ ! -f $DIR/f77b ] && log "requires 77b - skipping" && return cancel_lru_locks osc #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 sysctl -w lustre.fail_loc=0x80000408 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done + set_checksums 1 cmp $F77_TMP $DIR/f77b || error "file compare failed" sysctl -w lustre.fail_loc=0 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + set_checksums 0 } run_test 77c "checksum error on client read ===================" test_77d() { # bug 10889 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 sysctl -w lustre.fail_loc=0x80000409 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done - directio write $DIR/f77 0 1 || error "direct write: rc=$?" + set_checksums 1 + directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) || \ + error "direct write: rc=$?" sysctl -w lustre.fail_loc=0 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + set_checksums 0 } run_test 77d "checksum error on OST direct write ===============" test_77e() { # bug 10889 + [ ! -f $DIR/f77 ] && log "requires 77d - skipping" && return #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 sysctl -w lustre.fail_loc=0x80000408 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done + set_checksums 1 cancel_lru_locks osc - directio read $DIR/f77 0 1 || error "direct read: rc=$?" + directio read $DIR/f77 0 $F77SZ $((1024 * 1024)) || \ + error "direct read: rc=$?" sysctl -w lustre.fail_loc=0 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + set_checksums 0 } run_test 77e "checksum error on OST direct read ================" test_77f() { # bug 10889 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 sysctl -w lustre.fail_loc=0x409 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done - directio write $DIR/f77 0 1 && error "direct write succeeded" + set_checksums 1 + directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) && \ + error "direct write succeeded" sysctl -w lustre.fail_loc=0 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + set_checksums 0 } run_test 77f "repeat checksum error on write (expect error) ====" test_77g() { # bug 10889 - [ ! -f $F77_TMP ] && echo "requires 77a" && return - [ -z "`lsmod|grep obdfilter`" ] && + [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ echo "skipping $TESTNAME (remote OST)" && return + [ ! -f $F77_TMP ] && setup_f77 #define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a sysctl -w lustre.fail_loc=0x8000021a - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done - dd if=$F77_TMP of=$DIR/f77 bs=8M count=1 || error "write error: rc=$?" + set_checksums 1 + dd if=$F77_TMP of=$DIR/f77 bs=1M count=$F77SZ || \ + error "write error: rc=$?" sysctl -w lustre.fail_loc=0 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + set_checksums 0 } run_test 77g "checksum error on OST write ======================" test_77h() { # bug 10889 - [ ! -f $DIR/f77 ] && echo "requires 77a,g" && return - [ -z "`lsmod|grep obdfilter`" ] && + [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ echo "skipping $TESTNAME (remote OST)" && return + [ ! -f $DIR/f77 ] && log "requires 77g - skipping" && return cancel_lru_locks osc #define OBD_FAIL_OST_CHECKSUM_SEND 0x21b sysctl -w lustre.fail_loc=0x8000021b - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 1 >> $f ; done + set_checksums 1 cmp $F77_TMP $DIR/f77 || error "file compare failed" sysctl -w lustre.fail_loc=0 - for f in $LPROC/llite/${FSNAME}-*/checksum_pages ; do echo 0 >> $f ; done + set_checksums 0 } run_test 77h "checksum error on OST read =======================" +[ "$ORIG_CSUM" ] && set_checksums $ORIG_CSUM || true rm -f $F77_TMP unset F77_TMP test_78() { # bug 10901 + NSEQ=5 F78SIZE=$(($(awk '/MemFree:/ { print $2 }' /proc/meminfo) / 1024)) [ $F78SIZE -gt 512 ] && F78SIZE=512 [ $F78SIZE -gt $((MAXFREE / 1024)) ] && F78SIZE=$((MAXFREE / 1024)) - $SETSTRIPE $DIR/$tfile 0 -1 -1 - $DIRECTIO rdwr $DIR/$tfile 0 $F78SIZE 1048576 + SMALLESTOST=`lfs df $DIR |grep OST | awk '{print $4}' |sort -n |head -1` + [ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024)) ] && \ + F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024)) + $SETSTRIPE $DIR/$tfile 0 -1 -1 || error "setstripe failed" + for i in `seq 1 $NSEQ` + do + echo directIO rdwr round $i of $NSEQ + $DIRECTIO rdwr $DIR/$tfile 0 $F78SIZE 1048576 || error "rdwr failed" + done + + rm -f $DIR/$tfile } run_test 78 "handle large O_DIRECT writes correctly ============" @@ -3195,7 +3345,7 @@ function get_named_value() done } -export CACHE_MAX=`cat /proc/fs/lustre/llite/*/max_cached_mb | head -n 1` +export CACHE_MAX=`cat $LPROC/llite/*/max_cached_mb | head -n 1` cleanup_101() { for s in $LPROC/llite/*/max_cached_mb; do echo $CACHE_MAX > $s @@ -3210,7 +3360,7 @@ test_101() { [ "$CPU" = "UML" ] && nreads=1000 local cache_limit=32 - for s in $LPROC/osc/*/rpc_stats; do + for s in $LPROC/osc/*-osc*/rpc_stats; do echo 0 > $s done trap cleanup_101 EXIT @@ -3232,7 +3382,7 @@ test_101() { cleanup_101 if [ $(($discard * 10)) -gt $nreads ] ;then - for s in $LPROC/osc/*/rpc_stats; do + for s in $LPROC/osc/*-osc*/rpc_stats; do echo $s; cat $s done for s in $LPROC/llite/*/read_ahead_stats; do @@ -3289,7 +3439,7 @@ test_102a() { touch $testfile [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return - [ -z "`grep xattr $LPROC/mdc/*[mM][dD][cC]*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return + [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return [ -z "$(which setfattr 2>/dev/null)" ] && echo "skipping $TESTNAME (could not find setfattr)" && return echo "set/get xattr..." @@ -3337,7 +3487,7 @@ test_102b() { local testfile=$DIR/$tfile $SETSTRIPE $testfile 65536 1 2 getfattr -d -m "^trusted" $testfile 2> /dev/null | \ - grep "trusted.lov" || error + grep "trusted.lov" || error "can't get trusted.lov from $testfile" local testfile2=${testfile}2 local value=`getfattr -n trusted.lov $testfile 2> /dev/null | \ @@ -3349,8 +3499,8 @@ test_102b() { $GETSTRIPE -v $testfile2 > $tmp_file local stripe_size=`grep "size" $tmp_file| awk '{print $2}'` local stripe_count=`grep "count" $tmp_file| awk '{print $2}'` - [ $stripe_size -eq 65536 ] || error "different stripe size" - [ $stripe_count -eq 2 ] || error "different stripe count" + [ $stripe_size -eq 65536 ] || error "stripe size $stripe_size != 65536" + [ $stripe_count -eq 2 ] || error "stripe count $stripe_count != 2" } run_test 102b "getfattr/setfattr for trusted.lov EAs ============" @@ -3363,7 +3513,7 @@ test_102c() { local testfile=$DIR/$tdir/$tfile $RUNAS $SETSTRIPE $testfile 65536 1 2 $RUNAS getfattr -d -m "^trusted" $testfile 2> /dev/null | \ - grep "trusted.lov" || error + grep "trusted.lov" || error "can't get trusted.lov from $testfile" local testfile2=${testfile}2 local value=`getfattr -n trusted.lov $testfile 2> /dev/null | \ @@ -3375,8 +3525,8 @@ test_102c() { $RUNAS $GETSTRIPE -v $testfile2 > $tmp_file local stripe_size=`grep "size" $tmp_file| awk '{print $2}'` local stripe_count=`grep "count" $tmp_file| awk '{print $2}'` - [ $stripe_size -eq 65536 ] || error "different stripe size" - [ $stripe_count -eq 2 ] || error "different stripe count" + [ $stripe_size -eq 65536 ] || error "stripe size $stripe_size != 65536" + [ $stripe_count -eq 2 ] || error "stripe count $stripe_count != 2" } run_test 102c "non-root getfattr/setfattr for trusted.lov EAs ===========" @@ -3527,8 +3677,9 @@ run_acl_subtest() test_103 () { [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return - [ -z "$(grep acl $LPROC/mdc/*[mM][dD][cC]*/connect_flags)" ] && echo "skipping $TESTNAME (must have acl enabled)" && return + [ -z "$(grep acl $LPROC/mdc/*-mdc-*/connect_flags)" ] && echo "skipping $TESTNAME (must have acl enabled)" && return [ -z "$(which setfacl 2>/dev/null)" ] && echo "skipping $TESTNAME (could not find setfacl)" && return + [ ! -z "$USING_KRB5" ] && echo "skipping $TESTNAME (could not run under gss)" && return SAVE_UMASK=`umask` umask 0022 @@ -3567,7 +3718,7 @@ test_104() { lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed" lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed" - OSC=`awk '/-osc-|OSC.*MNT/ {print $4}' $LPROC/devices | head -n 1` + OSC=`awk '/-osc-/ {print $4}' $LPROC/devices | head -n 1` lctl --device %$OSC deactivate lfs df || error "lfs df with deactivated OSC failed" lctl --device %$OSC recover @@ -3609,10 +3760,10 @@ test_105c() { } run_test 105c "lockf when mounted without -o flock test ========" -test_106() { #10921 - mkdir $DIR/d106 - $DIR/d106 && error - chmod 777 $DIR/d106 || error +test_106() { #bug 10921 + mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed" + $DIR/$tdir && error "exec $DIR/$tdir succeeded" + chmod 777 $DIR/$tdir || error "chmod $DIR/$tdir failed" } run_test 106 "attempt exec of dir followed by chown of that dir" @@ -3642,6 +3793,17 @@ test_107() { } run_test 107 "Coredump on SIG" +test_110() { + mkdir -p $DIR/d110 + mkdir $DIR/d110/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa || error "mkdir with 255 char fail" + mkdir $DIR/d110/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb && error "mkdir with 256 char should fail, but not" + touch $DIR/d110/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx || error "create with 255 char fail" + touch $DIR/d110/yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy && error ""create with 256 char should fail, but not + + ls -l $DIR/d110 +} +run_test 110 "filename length checking" + test_115() { OSTIO_pre=$(ps -e|grep ll_ost_io|awk '{print $4}'|sort -n|tail -1|\ cut -c11-20) @@ -3671,8 +3833,8 @@ test_115() { run_test 115 "verify dynamic thread creation====================" free_min_max () { - AVAIL=($(cat $LPROC/osc/*[oO][sS][cC]-*/kbytesavail)) - echo OST kbytes available: ${AVAIL[@]} + AVAIL=($(cat $LPROC/osc/*[oO][sS][cC]-[^M]*/kbytesavail)) + echo OST kbytes available: ${AVAIL[@]} MAXI=0; MAXV=${AVAIL[0]} MINI=0; MINV=${AVAIL[0]} for ((i = 0; i < ${#AVAIL[@]}; i++)); do @@ -3690,10 +3852,12 @@ free_min_max () { test_116() { [ "$OSTCOUNT" -lt "2" ] && echo "not enough OSTs" && return + [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && + echo "remote MDS, skipping test" && return echo -n "Free space priority " - cat $LPROC/lov/*/qos_prio_free - DELAY=$(cat $LPROC/lov/*/qos_maxage | head -1 | awk '{print $1}') + cat $LPROC/lov/*-clilov-*/qos_prio_free + DELAY=$(cat $LPROC/lov/*-clilov-*/qos_maxage | head -1 | awk '{print $1}') declare -a AVAIL free_min_max [ $MINV -gt 960000 ] && echo "too much free space in OST$MINI, skip" &&\ @@ -3705,12 +3869,12 @@ test_116() { FILL=$(($MINV / 4)) echo "Filling 25% remaining space in OST${MINI} with ${FILL}Kb" $SETSTRIPE $DIR/$tdir/OST${MINI} 0 $MINI 1 - i=1 + i=0 while [ $FILL -gt 0 ]; do + i=$(($i + 1)) dd if=/dev/zero of=$DIR/$tdir/OST${MINI}/$tfile-$i bs=2M count=1 2>/dev/null FILL=$(($FILL - 2048)) echo -n . - i=$(($i + 1)) done FILL=$(($MINV / 4)) sync @@ -3733,12 +3897,13 @@ test_116() { # now fill using QOS echo writing a bunch of files to QOS-assigned OSTs - i=1 + $SETSTRIPE $DIR/$tdir 0 -1 1 + i=0 while [ $FILL -gt 0 ]; do + i=$(($i + 1)) dd if=/dev/zero of=$DIR/$tdir/$tfile-$i bs=1024 count=200 2>/dev/null FILL=$(($FILL - 200)) echo -n . - i=$(($i + 1)) done echo "wrote $i 200k files" sync @@ -3761,6 +3926,7 @@ test_116() { MINC=$($LFS getstripe --obd $UUID $DIR/$tdir | wc -l) echo "$MINC files created on smaller OST $MINI1" UUID=$(awk '/'$MAXI1': / {print $2; exit}' $LPROC/lov/${FSNAME}-clilov-*/target_obd) + echo $UUID MAXC=$($LFS getstripe --obd $UUID $DIR/$tdir | wc -l) echo "$MAXC files created on larger OST $MAXI1" [ $MINC -gt 0 ] && echo "Wrote $(($MAXC * 100 / $MINC - 100))% more files to larger OST $MAXI1" @@ -3781,14 +3947,42 @@ run_test 117 "verify fsfilt_extend ==========" test_118() #bug 11710 { - + sync; sleep 1; sync multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c; - dirty=$(grep -c dirty /proc/fs/lustre/llite/lustre-*/dump_page_cache) + dirty=$(grep -c dirty $LPROC/llite/*/dump_page_cache) return $dirty } run_test 118 "verify O_SYNC work" +test_119a() # bug 11737 +{ + BSIZE=$((512 * 1024)) + directio write $DIR/$tfile 0 1 $BSIZE + # We ask to read two blocks, which is more than a file size. + # directio will indicate an error when requested and actual + # sizes aren't equeal (a normal situation in this case) and + # print actual read amount. + NOB=`directio read $DIR/$tfile 0 2 $BSIZE | awk '/error/ {print $6}'` + if [ "$NOB" != "$BSIZE" ]; then + error "read $NOB bytes instead of $BSIZE" + fi + rm -f $DIR/$tfile +} +run_test 119a "Short directIO read must return actual read amount" + +test_119b() # bug 11737 +{ + [ "$OSTCOUNT" -lt "2" ] && echo "skipping 2-stripe test" && return + + lfs setstripe $DIR/$tfile 0 -1 2 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1 || error "dd failed" + sync + multiop $DIR/$tfile oO_RDONLY:O_DIRECT:r$((2048 * 1024)) || \ + error "direct read failed" +} +run_test 119b "Sparse directIO read must return actual read amount" + TMPDIR=$OLDTMPDIR TMP=$OLDTMP HOME=$OLDHOME @@ -3804,4 +3998,3 @@ fi echo '=========================== finished ===============================' [ -f "$SANITYLOG" ] && cat $SANITYLOG && exit 1 || true -echo "$0: completed" diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 6045487..6d45392 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -12,6 +12,10 @@ ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"14b 28"} # Tests that fail on uml [ "$UML" = "true" ] && EXCEPT="$EXCEPT 7" +# Join file feature is not supported currently. +# It will be ported soon. +EXCEPT="$EXCEPT 22" + SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH @@ -20,6 +24,7 @@ CHECKSTAT=${CHECKSTAT:-"checkstat -v"} CREATETEST=${CREATETEST:-createtest} GETSTRIPE=${GETSTRIPE:-lfs getstripe} SETSTRIPE=${SETSTRIPE:-lstripe} +LFS=${LFS:-lfs} LCTL=${LCTL:-lctl} MCREATE=${MCREATE:-mcreate} OPENFILE=${OPENFILE:-openfile} @@ -27,6 +32,8 @@ OPENUNLINK=${OPENUNLINK:-openunlink} TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} export TMP=${TMP:-/tmp} +CHECK_GRANT=${CHECK_GRANT:-"no"} + if [ $UID -ne 0 ]; then RUNAS_ID="$UID" @@ -38,11 +45,17 @@ fi SAVE_PWD=$PWD +export NAME=${NAME:-local} + LUSTRE=${LUSTRE:-`dirname $0`/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +if [ ! -z "$USING_KRB5" ]; then + $RUNAS krb5_login.sh || exit 1 +fi + cleanup() { echo -n "cln.." grep " $MOUNT2 " /proc/mounts && zconf_umount `hostname` $MOUNT2 ${FORCE} @@ -85,6 +98,7 @@ run_one() { export tfile=f${testnum} export tdir=d${base} test_$1 || error "exit with rc=$?" + check_grant || error "check grant fail" unset TESTNAME pass "($((`date +%s` - $BEFORE))s)" cd $SAVE_PWD @@ -131,6 +145,54 @@ basetest() { IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 } +sync_clients() { + cd $DIR1 + sync; sleep 1; sync + cd $DIR2 + sync; sleep 1; sync + + cd $SAVE_PWD +} + +check_grant() { + [ "$CHECK_GRANT" == "no" ] && return 0 + + echo -n "checking grant......" + cd $SAVE_PWD + # write some data to sync client lost_grant + rm -f $DIR1/${tfile}_check_grant_* 2>&1 + for i in `seq $OSTCOUNT`; do + $LFS setstripe $DIR1/${tfile}_check_grant_$i 0 $(($i -1)) 1 + dd if=/dev/zero of=$DIR1/${tfile}_check_grant_$i bs=4k \ + count=1 > /dev/null 2>&1 + done + # sync all the data and make sure no pending data on server + sync_clients + + #get client grant and server grant + client_grant=0 + for d in /proc/fs/lustre/osc/*/cur_grant_bytes; do + client_grant=$(($client_grant + `cat $d`)) + done + server_grant=0 + for d in /proc/fs/lustre/obdfilter/*/tot_granted; do + server_grant=$(($server_grant + `cat $d`)) + done + + # cleanup the check_grant file + for i in `seq $OSTCOUNT`; do + rm $DIR1/${tfile}_check_grant_$i + done + + #check whether client grant == server grant + if [ $client_grant != $server_grant ]; then + echo "failed: client:${client_grant} server: ${server_grant}" + return 1 + else + echo "pass" + fi +} + run_test() { export base=`basetest $1` if [ "$ONLY" ]; then @@ -166,7 +228,8 @@ run_test() { error () { sysctl -w lustre.fail_loc=0 2> /dev/null || true log "$0: FAIL: $TESTNAME $@" - if [ "$SANITYLOG" ]; then + $LCTL dk $TMP/lustre-log-$TESTNAME.log + if [ "$SANITYLOG" ]; then echo "$0: FAIL: $TESTNAME $@" >> $SANITYLOG else exit 1 @@ -201,6 +264,10 @@ export DIR2=${DIR2:-$MOUNT2} [ -z "`echo $DIR1 | grep $MOUNT1`" ] && echo "$DIR1 not in $MOUNT1" && exit 96 [ -z "`echo $DIR2 | grep $MOUNT2`" ] && echo "$DIR2 not in $MOUNT2" && exit 95 +LPROC=/proc/fs/lustre +LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` +OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd` + rm -rf $DIR1/[df][0-9]* $DIR1/lnk build_test_filter @@ -402,7 +469,7 @@ test_14a() { multiop $DIR2/d14/multiop Oc && error "expected error, got success" kill -USR1 $MULTIPID || return 2 wait $MULTIPID || return 3 - rm $TMP/test14.junk + rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" @@ -417,7 +484,7 @@ test_14b() { # bug 3192, 7040 kill -USR1 $MULTIPID || return 2 wait $MULTIPID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" - rm $TMP/test14.junk + rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14b "truncate of executing file returns -ETXTBSY ======" @@ -431,7 +498,7 @@ test_14c() { # bug 3430, 7040 kill -USR1 $MULTIPID || return 2 wait $MULTIPID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" - rm $TMP/test14.junk + rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14c "open(O_TRUNC) of executing file return -ETXTBSY ==" @@ -446,18 +513,22 @@ test_14d() { # bug 10921 kill -USR1 $MULTIPID || return 2 wait $MULTIPID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" - rm $TMP/test14.junk + rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14d "chmod of executing file is still possible ========" test_15() { # bug 974 - ENOSPC echo "PATH=$PATH" sh oos2.sh $MOUNT1 $MOUNT2 + grant_error=`dmesg | grep "> available"` + [ -z "$grant_error" ] || error "$grant_error" } run_test 15 "test out-of-space with multiple writers ===========" test_16() { - fsx -c 50 -p 100 -N 2500 -S 0 $MOUNT1/fsxfile $MOUNT2/fsxfile + rm -f $MOUNT1/fsxfile + lfs setstripe $MOUNT1/fsxfile 0 -1 -1 # b=10919 + fsx -c 50 -p 100 -N 2500 -l $((SIZE * 256)) -S 0 $MOUNT1/fsxfile $MOUNT2/fsxfile } run_test 16 "2500 iterations of dual-mount fsx =================" @@ -597,6 +668,7 @@ test_24() { lfs df -ih $DIR2/$tfile || error "lfs df -ih $DIR2/$tfile failed" OSC=`lctl dl | awk '/-osc-|OSC.*MNT/ {print $4}' | head -n 1` +# OSC=`lctl dl | awk '/-osc-/ {print $4}' | head -n 1` lctl --device %$OSC deactivate lfs df -i || error "lfs df -i with deactivated OSC failed" lctl --device %$OSC recover @@ -646,7 +718,7 @@ test_26b() { run_test 26b "sync mtime between ost and mds" test_27() { - cancel_lru_locks OSC + cancel_lru_locks osc lctl clear dd if=/dev/zero of=$DIR2/$tfile bs=$((4096+4))k conv=notrunc count=4 seek=3 & DD2_PID=$! diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 2d9a6ee..a0f0320 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -6,10 +6,11 @@ set -e #set -x -export REFORMAT="" +export REFORMAT=${REFORMAT:-""} export VERBOSE=false export GMNALNID=${GMNALNID:-/usr/sbin/gmlndnid} export CATASTROPHE=${CATASTROPHE:-/proc/sys/lnet/catastrophe} +#export PDSH="pdsh -S -Rssh -w" # eg, assert_env LUSTRE MDSNODES OSTNODES CLIENTS assert_env() { @@ -38,7 +39,7 @@ init_test_env() { [ -d /r ] && export ROOT=${ROOT:-/r} export TMP=${TMP:-$ROOT/tmp} - export PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests + export PATH=:$PATH:$LUSTRE/utils:$LUSTRE/utils/gss:$LUSTRE/tests export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"} @@ -46,14 +47,24 @@ init_test_env() { export TUNEFS=${TUNEFS:-"$LUSTRE/utils/tunefs.lustre"} [ ! -f "$TUNEFS" ] && export TUNEFS=$(which tunefs.lustre) export CHECKSTAT="${CHECKSTAT:-checkstat} " - export FSYTPE=${FSTYPE:-"ext3"} + export FSYTPE=${FSTYPE:-"ldiskfs"} export NAME=${NAME:-local} export LPROC=/proc/fs/lustre + export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"} + export LSVCGSSD=${LSVCGSSD:-"$LUSTRE/utils/gss/lsvcgssd"} + export KRB5DIR=${KRB5DIR:-"/usr/kerberos"} if [ "$ACCEPTOR_PORT" ]; then export PORT_OPT="--port $ACCEPTOR_PORT" fi + case "x$SEC" in + xkrb5*) + echo "Using GSS/krb5 ptlrpc security flavor" + export USING_KRB5="y" + ;; + esac + # Paths on remote nodes, if different export RLUSTRE=${RLUSTRE:-$LUSTRE} export RPWD=${RPWD:-$PWD} @@ -73,12 +84,8 @@ init_test_env() { ONLY=${ONLY:-$*} } -case `uname -r` in -2.4.*) EXT=".o"; USE_QUOTA=no; FSTYPE=ext3 ;; - *) EXT=".ko"; USE_QUOTA=yes; [ "$FSTYPE" ] || FSTYPE=ldiskfs ;; -esac - load_module() { + EXT=".ko" module=$1 shift BASE=`basename $module $EXT` @@ -87,7 +94,12 @@ load_module() { insmod ${LUSTRE}/${module}${EXT} $@ else # must be testing a "make install" or "rpm" installation - modprobe $BASE $@ + # note failed to load ptlrpc_gss is considered not fatal + if [ "$BASE" == "ptlrpc_gss" ]; then + modprobe $BASE $@ || echo "gss/krb5 is not supported" + else + modprobe $BASE $@ + fi fi } @@ -104,21 +116,34 @@ load_modules() { echo Loading modules from $LUSTRE load_module ../lnet/libcfs/libcfs + [ -z "$LNETOPTS" ] && \ + LNETOPTS=$(awk '/^options lnet/ { print $0}' /etc/modprobe.conf | sed 's/^options lnet //g') + echo "lnet options: '$LNETOPTS'" # note that insmod will ignore anything in modprobe.conf load_module ../lnet/lnet/lnet $LNETOPTS LNETLND=${LNETLND:-"socklnd/ksocklnd"} load_module ../lnet/klnds/$LNETLND + [ "$FSTYPE" = "ldiskfs" ] && load_module ../ldiskfs/ldiskfs/ldiskfs load_module lvfs/lvfs load_module obdclass/obdclass + load_module lvfs/fsfilt_$FSTYPE load_module ptlrpc/ptlrpc - [ "$USE_QUOTA" = "yes" ] && load_module quota/lquota + load_module ptlrpc/gss/ptlrpc_gss + # Now, some modules depend on lquota without USE_QUOTA check, + # will fix later. Disable check "$USE_QUOTA" = "yes" temporary. + #[ "$USE_QUOTA" = "yes" ] && load_module quota/lquota + load_module quota/lquota + load_module fid/fid + load_module fld/fld + load_module lmv/lmv load_module mdc/mdc load_module osc/osc load_module lov/lov load_module mds/mds - [ "$FSTYPE" = "ldiskfs" ] && load_module ldiskfs/ldiskfs - [ "$FSTYPE" = "ldiskfs2" ] && load_module ldiskfs/ldiskfs2 - load_module lvfs/fsfilt_$FSTYPE + load_module mdd/mdd + load_module mdt/mdt + load_module cmm/cmm + load_module osd/osd load_module ost/ost load_module obdfilter/obdfilter load_module llite/lustre @@ -142,7 +167,7 @@ wait_for_lnet() { MODULES=$($LCTL modules | awk '{ print $2 }') while [ -n "$MODULES" ]; do sleep 5 - $RMMOD $MODULES >/dev/null 2>&1 || true + $RMMOD $MODULES > /dev/null 2>&1 || true MODULES=$($LCTL modules | awk '{ print $2 }') if [ -z "$MODULES" ]; then return 0 @@ -161,10 +186,10 @@ wait_for_lnet() { unload_modules() { lsmod | grep lnet > /dev/null && $LCTL dl && $LCTL dk $TMP/debug local MODULES=$($LCTL modules | awk '{ print $2 }') - $RMMOD $MODULES >/dev/null 2>&1 || true + $RMMOD $MODULES > /dev/null 2>&1 || true # do it again, in case we tried to unload ksocklnd too early MODULES=$($LCTL modules | awk '{ print $2 }') - [ -n "$MODULES" ] && $RMMOD $MODULES >/dev/null || true + [ -n "$MODULES" ] && $RMMOD $MODULES > /dev/null 2>&1 || true MODULES=$($LCTL modules | awk '{ print $2 }') if [ -n "$MODULES" ]; then echo "Modules still loaded: " @@ -175,7 +200,7 @@ unload_modules() { lsmod return 2 else - echo "Lustre stopped, but LNET is still loaded" + echo "Lustre stopped but LNET is still loaded, waiting..." wait_for_lnet || return 3 fi fi @@ -188,13 +213,102 @@ unload_modules() { echo "$LEAK_PORTALS" 1>&2 mv $TMP/debug $TMP/debug-leak.`date +%s` || true echo "Memory leaks detected" - [ "$LEAK_LUSTRE" -a $(echo $LEAK_LUSTRE | awk 'leaked=$8 {print leaked % 56}') == 0 ] && echo "ignoring known bug 10818" && return 0 return 254 fi echo "modules unloaded." return 0 } +check_gss_daemon_facet() { + facet=$1 + dname=$2 + + num=`do_facet $facet ps -o cmd -C $dname | grep $dname | wc -l` + if [ $num -ne 1 ]; then + echo "$num instance of $dname on $facet" + return 1 + fi + return 0 +} + +send_sigint() { + local facet=$1 + shift + do_facet $facet "killall -2 $@ 2>/dev/null || true" +} + +start_gss_daemons() { + # starting on MDT + for num in `seq $MDSCOUNT`; do + do_facet mds$num "$LSVCGSSD -v" + do_facet mds$num "$LGSSD -v" + done + # starting on OSTs + for num in `seq $OSTCOUNT`; do + do_facet ost$num "$LSVCGSSD -v" + done + # starting on client + # FIXME: is "client" the right facet name? + do_facet client "$LGSSD -v" + + # wait daemons entering "stable" status + sleep 5 + + # + # check daemons are running + # + for num in `seq $MDSCOUNT`; do + check_gss_daemon_facet mds$num lsvcgssd + check_gss_daemon_facet mds$num lgssd + done + for num in `seq $OSTCOUNT`; do + check_gss_daemon_facet ost$num lsvcgssd + done + check_gss_daemon_facet client lgssd +} + +stop_gss_daemons() { + for num in `seq $MDSCOUNT`; do + send_sigint mds$num lsvcgssd lgssd + done + for num in `seq $OSTCOUNT`; do + send_sigint ost$num lsvcgssd + done + send_sigint client lgssd +} + +init_krb5_env() { + if [ ! -z $SEC ]; then + MDS_MOUNT_OPTS=$MDS_MOUNT_OPTS,sec=$SEC + OST_MOUNT_OPTS=$OST_MOUNT_OPTS,sec=$SEC + fi + + if [ ! -z $USING_KRB5 ]; then + start_gss_daemons + fi +} + +cleanup_krb5_env() { + if [ ! -z $USING_KRB5 ]; then + stop_gss_daemons + # maybe cleanup credential cache? + fi +} + +mdsdevlabel() { + local num=$1 + local device=`mdsdevname $num` + local label=`do_facet mds$num "e2label ${device}" | grep -v "CMD: "` + echo -n $label +} + +ostdevlabel() { + local num=$1 + local device=`ostdevname $num` + local label=`do_facet ost$num "e2label ${device}" | grep -v "CMD: "` + echo -n $label +} + # Facet functions # start facet device options start() { @@ -211,7 +325,7 @@ start() { echo Start of ${device} on ${facet} failed ${RC} else do_facet ${facet} sync - label=`do_facet ${facet} "e2label ${device}" | grep -v "CMD: "` + label=$(do_facet ${facet} "e2label ${device}") [ -z "$label" ] && echo no label for ${device} && exit 1 eval export ${facet}_svc=${label} eval export ${facet}_dev=${device} @@ -228,7 +342,7 @@ stop() { HOST=`facet_active_host $facet` [ -z $HOST ] && echo stop: no host for $facet && return 0 - running=`do_facet ${facet} "grep -c ${MOUNT%/*}/${facet}' ' /proc/mounts" | grep -v "CMD: "` + running=$(do_facet ${facet} "grep -c ${MOUNT%/*}/${facet}' ' /proc/mounts") || true if [ ${running} -ne 0 ]; then echo "Stopping ${MOUNT%/*}/${facet} (opts:$@)" do_facet ${facet} umount -d $@ ${MOUNT%/*}/${facet} @@ -240,16 +354,16 @@ stop() { local INTERVAL=1 # conf-sanity 31 takes a long time cleanup while [ $WAIT -lt 300 ]; do - running=$(do_facet ${facet} "[ -e $LPROC ] && grep ST' ' $LPROC/devices" | grep -v "CMD: ") || true - if [ -z "${running}" ]; then - return 0 - fi - echo "waited $WAIT for${running}" - if [ $INTERVAL -lt 64 ]; then - INTERVAL=$((INTERVAL + INTERVAL)) - fi - sleep $INTERVAL - WAIT=$((WAIT + INTERVAL)) + running=$(do_facet ${facet} "[ -e $LPROC ] && grep ST' ' $LPROC/devices") || true + if [ -z "${running}" ]; then + return 0 + fi + echo "waited $WAIT for${running}" + if [ $INTERVAL -lt 64 ]; then + INTERVAL=$((INTERVAL + INTERVAL)) + fi + sleep $INTERVAL + WAIT=$((WAIT + INTERVAL)) done echo "service didn't stop after $WAIT seconds. Still running:" echo ${running} @@ -283,7 +397,7 @@ zconf_umount() { client=$1 mnt=$2 [ "$3" ] && force=-f - local running=`do_node $client "grep -c $mnt' ' /proc/mounts" | grep -v "CMD: "` + local running=$(do_node $client "grep -c $mnt' ' /proc/mounts") || true if [ $running -ne 0 ]; then echo "Stopping client $mnt (opts:$force)" do_node $client umount $force $mnt @@ -414,12 +528,12 @@ replay_barrier_nodf() { } mds_evict_client() { - UUID=`cat /proc/fs/lustre/mdc/${mds_svc}-mdc-*/uuid` - do_facet mds "echo $UUID > /proc/fs/lustre/mds/${mds_svc}/evict_client" + UUID=`cat /proc/fs/lustre/mdc/${mds1_svc}-mdc-*/uuid` + do_facet mds1 "echo $UUID > /proc/fs/lustre/mdt/${mds1_svc}/evict_client" } ost_evict_client() { - UUID=`cat /proc/fs/lustre/osc/${ost1_svc}-osc-*/uuid` + UUID=`grep ${ost1_svc}-osc- $LPROC/devices | egrep -v 'MDT' | awk '{print $5}'` do_facet ost1 "echo $UUID > /proc/fs/lustre/obdfilter/${ost1_svc}/evict_client" } @@ -454,11 +568,27 @@ h2gm () { fi } -h2tcp() { +h2name_or_ip() { if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else - echo $1"@tcp" + echo $1"@$2" fi } + +h2ptl() { + if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else + ID=`xtprocadmin -n $1 2>/dev/null | egrep -v 'NID' | awk '{print $1}'` + if [ -z "$ID" ]; then + echo "Could not get a ptl id for $1..." + exit 1 + fi + echo $ID"@ptl" + fi +} +declare -fx h2ptl + +h2tcp() { + h2name_or_ip "$1" "tcp" +} declare -fx h2tcp h2elan() { @@ -474,13 +604,15 @@ h2elan() { declare -fx h2elan h2openib() { - if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else - ID=`echo $1 | sed 's/[^0-9]*//g'` - echo $ID"@openib" - fi + h2name_or_ip "$1" "openib" } declare -fx h2openib +h2o2ib() { + h2name_or_ip "$1" "o2ib" +} +declare -fx h2o2ib + facet_host() { local facet=$1 varname=${facet}_HOST @@ -542,10 +674,11 @@ do_node() { myPDSH="no_dsh" fi if $VERBOSE; then - echo "CMD: $HOST $@" + echo "CMD: $HOST $@" >&2 $myPDSH $HOST $LCTL mark "$@" > /dev/null 2>&1 || : fi - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; sh -c \"$@\")" + $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; sh -c \"$@\")" | sed "s/^${HOST}: //" + return ${PIPESTATUS[0]} } do_facet() { @@ -573,21 +706,31 @@ ostdevname() { echo -n $DEVPTR } +mdsdevname() { + num=$1 + DEVNAME=MDSDEV$num + #if $MDSDEVn isn't defined, default is $MDSDEVBASE + num + eval DEVPTR=${!DEVNAME:=${MDSDEVBASE}${num}} + echo -n $DEVPTR +} + ######## ## MountConf setup stopall() { # make sure we are using the primary server, so test-framework will # be able to clean up properly. - activemds=`facet_active mds` - if [ $activemds != "mds" ]; then - fail mds + activemds=`facet_active mds1` + if [ $activemds != "mds1" ]; then + fail mds1 fi # assume client mount is local grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT $* grep " $MOUNT2 " /proc/mounts && zconf_umount `hostname` $MOUNT2 $* - stop mds -f + for num in `seq $MDSCOUNT`; do + stop mds$num -f + done for num in `seq $OSTCOUNT`; do stop ost$num -f done @@ -597,6 +740,13 @@ stopall() { cleanupall() { stopall $* unload_modules + cleanup_krb5_env +} + +mdsmkfsopts() +{ + local nr=$1 + test $nr = 1 && echo -n $MDS_MKFS_OPTS || echo -n $MDSn_MKFS_OPTS } formatall() { @@ -605,18 +755,23 @@ formatall() { stopall # We need ldiskfs here, may as well load them all load_modules - echo Formatting mds, osts - if $VERBOSE; then - add mds $MDS_MKFS_OPTS $FSTYPE_OPT --reformat $MDSDEV || exit 10 - else - add mds $MDS_MKFS_OPTS $FSTYPE_OPT --reformat $MDSDEV > /dev/null || exit 10 - fi + [ "$CLIENTONLY" ] && return + echo "Formatting mdts, osts" + for num in `seq $MDSCOUNT`; do + echo "Format mds$num: $(mdsdevname $num)" + if $VERBOSE; then + add mds$num `mdsmkfsopts $num` $FSTYPE_OPT --reformat `mdsdevname $num` || exit 9 + else + add mds$num `mdsmkfsopts $num` $FSTYPE_OPT --reformat `mdsdevname $num` > /dev/null || exit 9 + fi + done for num in `seq $OSTCOUNT`; do + echo "Format ost$num: $(ostdevname $num)" if $VERBOSE; then - add ost$num $OST_MKFS_OPTS $FSTYPE_OPT --reformat `ostdevname $num` || exit 10 + add ost$num $OST_MKFS_OPTS --reformat `ostdevname $num` || exit 10 else - add ost$num $OST_MKFS_OPTS $FSTYPE_OPT --reformat `ostdevname $num` > /dev/null || exit 10 + add ost$num $OST_MKFS_OPTS --reformat `ostdevname $num` > /dev/null || exit 10 fi done } @@ -627,12 +782,20 @@ mount_client() { setupall() { load_modules - echo Setup mdt, osts - start mds $MDSDEV $MDS_MOUNT_OPTS - for num in `seq $OSTCOUNT`; do - DEVNAME=`ostdevname $num` - start ost$num $DEVNAME $OST_MOUNT_OPTS - done + init_krb5_env + if [ -z "$CLIENTONLY" ]; then + echo "Setup mdts, osts" + for num in `seq $MDSCOUNT`; do + DEVNAME=$(mdsdevname $num) + echo $REFORMAT | grep -q "reformat" \ + || do_facet mds$num "$TUNEFS --writeconf $DEVNAME" + start mds$num $DEVNAME $MDS_MOUNT_OPTS + done + for num in `seq $OSTCOUNT`; do + DEVNAME=$(ostdevname $num) + start ost$num $DEVNAME $OST_MOUNT_OPTS + done + fi [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE mount_client $MOUNT if [ "$MOUNT_2" ]; then @@ -785,8 +948,10 @@ pgcache_empty() { # Test interface error() { sysctl -w lustre.fail_loc=0 2> /dev/null || true - echo "${TESTSUITE}: **** FAIL:" $@ + log "${TESTSUITE}: **** FAIL:" $@ + $LCTL dk $TMP/lustre-log-$TESTNAME.log log "FAIL: $TESTNAME $@" + $LCTL dk $TMP/lustrefail_${TESTSUITE}_${TESTNAME}.$(date +%s) exit 1 } diff --git a/lustre/tests/test_brw.c b/lustre/tests/test_brw.c index 0e5605e..fe18021 100644 --- a/lustre/tests/test_brw.c +++ b/lustre/tests/test_brw.c @@ -3,7 +3,9 @@ */ /* for O_DIRECT */ -#define _GNU_SOURCE +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <stdio.h> #include <string.h> diff --git a/lustre/tests/testreq.c b/lustre/tests/testreq.c deleted file mode 100644 index 0b19a44..0000000 --- a/lustre/tests/testreq.c +++ /dev/null @@ -1,141 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdlib.h> -#include <stdio.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <signal.h> -#include <unistd.h> -#include <sys/ioctl.h> - -#define _GNU_SOURCE -#include <getopt.h> -#undef _GNU_SOURCE - -#include <liblustre.h> -#include <lustre_mds.h> - -static void usage(char *argv0, int status) -{ - printf( -"Usage: %s [OPTION...]\n\ -\n\ ---getattr <directory>\n\ ---setattr <directory>\n\ ---readpage <directory>\n\ ---open <directory>\n\ ---close <directory handle (returned by open)>\n\ ---create <new name>\n", argv0); - - exit(status); -} - -int main(int argc, char **argv) -{ - int fd = 0; - int rc = 0; - int c = 0; - long cmd = 0; - unsigned long arg; - char *short_opts = "h", *name = argv[0]; - static struct option long_opts[] = { -#define OPT_GETATTR -2 - {"getattr", no_argument, NULL, OPT_GETATTR}, -#define OPT_READPAGE -3 - {"readpage", no_argument, NULL, OPT_READPAGE}, -#define OPT_SETATTR -4 - {"setattr", no_argument, NULL, OPT_SETATTR}, -#define OPT_CREATE -5 - {"create", no_argument, NULL, OPT_CREATE}, -#define OPT_OPEN -6 - {"open", no_argument, NULL, OPT_OPEN}, -#define OPT_CLOSE -7 - {"close", required_argument, NULL, OPT_CLOSE}, -#define OPT_HELP 'h' - {"help", no_argument, NULL, OPT_HELP}, - {0} - }; - - do { - c = getopt_long(argc, argv, short_opts, long_opts, NULL); - - switch (c) { - case OPT_HELP: - usage(argv[0], 0); - break; - case OPT_GETATTR: - cmd = IOC_REQUEST_GETATTR; - name = "getattr"; - arg = 2; - break; - case OPT_SETATTR: - cmd = IOC_REQUEST_SETATTR; - name = "setattr"; - arg = 2; - break; - case OPT_READPAGE: - cmd = IOC_REQUEST_READPAGE; - name = "readpage"; - arg = 2; - break; - case OPT_CREATE: - cmd = IOC_REQUEST_CREATE; - name ="create"; - arg = 2; - break; - case OPT_OPEN: - cmd = IOC_REQUEST_OPEN; - name = "open"; - arg = 2; - break; - case OPT_CLOSE: - cmd = IOC_REQUEST_CLOSE; - name = "close"; - arg = strtoul(optarg, NULL, 0); - break; - case '?': - usage(argv[0], 1); - } - } while (c != -1); - - if (cmd == 0) - usage(argv[0], 1); - - fd = open("/dev/request", O_RDONLY); - if (fd == -1) { - fprintf(stderr, "error opening /dev/request: %s\n", - strerror(errno)); - exit(1); - } - - fprintf(stderr, "Executing %s test (arg=%lu)...\n", name, arg); - if (cmd == IOC_REQUEST_OPEN) { - rc = ioctl(fd, cmd, &arg); - printf("%lu\n", arg); - } else - rc = ioctl(fd, cmd, arg); - fprintf(stderr, "result code: %d\n", rc); - - return 0; -} diff --git a/lustre/utils/.cvsignore b/lustre/utils/.cvsignore index 71e48e8..b8c3e72 100644 --- a/lustre/utils/.cvsignore +++ b/lustre/utils/.cvsignore @@ -21,6 +21,9 @@ obdbarrier lload llverfs llverdev +l_getidentity +l_facl +req_layout l_getgroups .*.cmd .*.d diff --git a/lustre/utils/Lustre/.cvsignore b/lustre/utils/Lustre/.cvsignore new file mode 100644 index 0000000..97e22b9 --- /dev/null +++ b/lustre/utils/Lustre/.cvsignore @@ -0,0 +1,4 @@ +Makefile +Makefile.in +.deps +*.pyc diff --git a/lustre/utils/Lustre/Makefile.am b/lustre/utils/Lustre/Makefile.am new file mode 100644 index 0000000..c3d9a59 --- /dev/null +++ b/lustre/utils/Lustre/Makefile.am @@ -0,0 +1,4 @@ +if UTILS +pymod_SCRIPTS = __init__.py lustredb.py error.py cmdline.py +endif +EXTRA_DIST = __init__.py lustredb.py error.py cmdline.py diff --git a/lustre/utils/Lustre/__init__.py b/lustre/utils/Lustre/__init__.py new file mode 100644 index 0000000..7a21df3 --- /dev/null +++ b/lustre/utils/Lustre/__init__.py @@ -0,0 +1,7 @@ +__all__ = ["lustredb"] + +from lustredb import LustreDB, LustreDB_XML, LustreDB_LDAP +from error import LconfError, OptionError +from cmdline import Options + +CONFIG_VERSION="2003070801" diff --git a/lustre/utils/Lustre/cmdline.py b/lustre/utils/Lustre/cmdline.py new file mode 100644 index 0000000..d2a39f6 --- /dev/null +++ b/lustre/utils/Lustre/cmdline.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +# +# Copyright (C) 2002 Cluster File Systems, Inc. +# Author: Robert Read <rread@clusterfs.com> +# This file is part of Lustre, http://www.lustre.org. +# +# Lustre is free software; you can redistribute it and/or +# modify it under the terms of version 2 of the GNU General Public +# License as published by the Free Software Foundation. +# +# Lustre is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Lustre; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +# Standard the comand line handling for all the python tools. + +import sys, getopt, types +import string +import error + +class Options: + FLAG = 1 + PARAM = 2 + INTPARAM = 3 + PARAMLIST = 4 + def __init__(self, cmd, remain_help, options): + self.options = options + shorts = "" + longs = [] + options.append(('help,h', "Print this help")) + for opt in options: + long = self.long(opt) + short = self.short(opt) + if self.type(opt) in (Options.PARAM, Options.INTPARAM, + Options.PARAMLIST): + if short: short = short + ':' + if long: + long = long + '=' + if string.find(long, '_') >= 0: + longs.append(string.replace(long, '_', '-')) + shorts = shorts + short + longs.append(long) + self.short_opts = shorts + self.long_opts = longs + self.cmd = cmd + self.remain_help = remain_help + + def init_values(self): + values = {} + for opt in self.options: + values[self.key(opt)] = self.default(opt) + return values + + def long(self, option): + n = string.find(option[0], ',') + if n < 0: return option[0] + else: return option[0][0:n] + + def key(self, option): + key = self.long(option) + return string.replace(key, '-', '_') + + def short(self, option): + n = string.find(option[0], ',') + if n < 0: return '' + else: return option[0][n+1:] + + def help(self, option): + return option[1] + + def type(self, option): + if len(option) >= 3: + return option[2] + return Options.FLAG + + def default(self, option): + if len(option) >= 4: + return option[3] + if self.type(option) == Options.PARAMLIST: + return [] + return None + + def lookup_option(self, key, key_func): + for opt in self.options: + if key_func(opt) == key: + return opt + + def lookup_short(self, key): + return self.lookup_option(key, self.short) + + def lookup_long(self, key): + key = string.replace(key, '-', '_') + return self.lookup_option(key, self.long) + + def handle_opts(self, opts): + values = self.init_values() + for o, a in opts: + if o[0:2] != '--': + option = self.lookup_short(o[1:]) + else: + option = self.lookup_long(o[2:]) + if self.type(option) == Options.PARAM: + val = a + elif self.type(option) == Options.INTPARAM: + try: + val = int(a) + except ValueError, e: + raise error.OptionError("option: '%s' expects integer value, got '%s' " % (o,a)) + elif self.type(option) == Options.PARAMLIST: + val = values[self.key(option)]; + val.append(a) + else: + val = 1 + values[self.key(option)] = val + return values + + + class option_wrapper: + def __init__(self, values): + self.__dict__['values'] = values + def __getattr__(self, name): + if self.values.has_key(name): + return self.values[name] + else: + raise error.OptionError("bad option name: " + name) + def __getitem__(self, name): + if self.values.has_key(name): + return self.values[name] + else: + raise error.OptionError("bad option name: " + name) + def __setattr__(self, name, value): + self.values[name] = value + + def parse(self, argv): + try: + opts, args = getopt.getopt(argv, self.short_opts, self.long_opts) + values = self.handle_opts(opts) + if values["help"]: + self.usage() + sys.exit(0) + return self.option_wrapper(values), args + except getopt.error, e: + raise error.OptionError(str(e)) + + def usage(self): + ret = 'usage: %s [options] %s\n' % (self.cmd, self.remain_help) + for opt in self.options: + s = self.short(opt) + if s: str = "-%s|--%s" % (s,self.long(opt)) + else: str = "--%s" % (self.long(opt),) + if self.type(opt) in (Options.PARAM, Options.INTPARAM): + str = "%s <arg>" % (str,) + help = self.help(opt) + n = string.find(help, '\n') + if self.default(opt) != None: + if n < 0: + str = "%-15s %s (default=%s)" %(str, help, + self.default(opt)) + else: + str = "%-15s %s (default=%s)%s" %(str, help[0:n], + self.default(opt), + help[n:]) + else: + str = "%-15s %s" %(str, help) + ret = ret + str + "\n" + print ret + +# Test driver +if __name__ == "__main__": + cl = Options("test", "xml_file", [ + ('verbose,v', "verbose ", Options.FLAG, 0), + ('cleanup,d', "shutdown"), + ('gdb', "Display gdb module file ", Options.FLAG, 0), + ('device', "device path ", Options.PARAM), + ('ldapurl', "LDAP server URL ", Options.PARAM), + ('lustre', "Lustre source dir ", Options.PARAM), + ('portals', "Portals source dir ", Options.PARAM), + ('maxlevel', """Specify the maximum level + Levels are aproximatly like: + 70 - mountpoint, echo_client, osc, mdc, lov""", + Options.INTPARAM, 100), + + ]) + + conf, args = cl.parse(sys.argv[1:]) + + for key in conf.values.keys(): + print "%-10s = %s" % (key, conf.values[key]) diff --git a/lustre/utils/Lustre/error.py b/lustre/utils/Lustre/error.py new file mode 100644 index 0000000..6c30416 --- /dev/null +++ b/lustre/utils/Lustre/error.py @@ -0,0 +1,10 @@ +import exceptions + +class LconfError (exceptions.Exception): + def __init__(self, args): + self.args = args + +class OptionError (exceptions.Exception): + def __init__(self, args): + self.args = args + diff --git a/lustre/utils/Lustre/lustredb.py b/lustre/utils/Lustre/lustredb.py new file mode 100644 index 0000000..82409e1 --- /dev/null +++ b/lustre/utils/Lustre/lustredb.py @@ -0,0 +1,551 @@ +import sys, types, string, os +import re, exceptions +import xml.dom.minidom +import Lustre + +# ============================================================ +# XML processing and query + +class LustreDB: + caching_enabled = 1 + + def __init__(self): + self.lookup_uuid_cache = {} + self.lookup_name_cache = {} + self.lookup_class_cache = {} + self.lookup_val_cache = {} + self.lookup_refs_cache = {} + self.lookup_lovtgts_cache = {} + self.lookup_nid2srv_cache = {} + self.lookup_activedev_cache = {} + self.lookup_tgtdev_cache = {} + self.lookup_group_cache = {} + + self.lookup_allrefs_cache = None + self.lookup_networks_cache = None + + def lookup(self, uuid): + """ lookup returns a new LustreDB instance""" + if self.caching_enabled and self.lookup_uuid_cache.has_key(uuid): + res = self.lookup_uuid_cache[uuid] + else: + res = self._lookup_by_uuid(uuid) + if self.caching_enabled: + self.lookup_uuid_cache[uuid] = res + return res + + def lookup_name(self, name, class_name = ""): + """ lookup returns a new LustreDB instance""" + if self.caching_enabled and self.lookup_name_cache.has_key((name, class_name)): + res = self.lookup_name_cache[(name, class_name)] + else: + res = self._lookup_by_name(name, class_name) + if self.caching_enabled: + self.lookup_name_cache[(name, class_name)] = res + return res + + def lookup_class(self, class_name): + """ lookup returns a new LustreDB instance""" + if self.caching_enabled and self.lookup_class_cache.has_key(class_name): + res = self.lookup_class_cache[class_name] + else: + res = self._lookup_by_class(class_name) + if self.caching_enabled: + self.lookup_class_cache[class_name] = res + return res + + def get_val(self, tag, default=None): + if self.caching_enabled and self.lookup_class_cache.has_key(tag): + v = self.lookup_val_cache[tag] + else: + v = self._get_val(tag) + if self.caching_enabled: + self.lookup_val_cache[tag] = v + if v: + return v + if default != None: + return default + return None + + def get_class(self): + return self._get_class() + + def get_val_int(self, tag, default=0): + str = self.get_val(tag) + try: + if str: + return int(str) + return default + except ValueError: + raise Lustre.LconfError("text value is not integer: " + str) + + def get_first_ref(self, tag): + """ Get the first uuidref of the type TAG. Only + one is expected. Returns the uuid.""" + uuids = self.get_refs(tag) + if len(uuids) > 0: + return uuids[0] + return None + + def get_refs(self, tag): + """ Get all the refs of type TAG. Returns list of uuids. """ + if self.caching_enabled and self.lookup_refs_cache.has_key(tag): + uuids = self.lookup_refs_cache[tag] + else: + uuids = self._get_refs(tag) + if self.caching_enabled: + self.lookup_refs_cache[tag] = uuids + return uuids + + def get_all_refs(self): + """ Get all the refs. Returns list of uuids. """ + if self.caching_enabled and self.lookup_allrefs_cache: + uuids = self.lookup_allrefs_cache + else: + uuids = self._get_all_refs() + if self.caching_enabled: + self.lookup_allrefs_cache = uuids + return uuids + + def get_lov_tgts(self, tag): + """ Returns list of lov tgts. """ + if self.caching_enabled and self.lookup_lovtgts_cache.has_key(tag): + tgts = self.lookup_lovtgts_cache[tag] + else: + tgts = self._get_lov_tgts(tag) + if self.caching_enabled: + self.lookup_lovtgts_cache[tag] = tgts + return tgts + + def nid2server(self, nid, net_type, cluster_id): + if self.caching_enabled and self.lookup_nid2srv_cache.has_key((nid, net_type, cluster_id)): + res = self.lookup_nid2srv_cache[(nid, net_type, cluster_id)] + else: + netlist = self.lookup_class('network') + for net_db in netlist: + if (net_db.get_val('nid') == nid and + net_db.get_val('nettype') == net_type and + net_db.get_val('clusterid') == cluster_id): + res = net_db + break + if self.caching_enabled: + self.lookup_nid2srv_cache[(nid, net_type, cluster_id)] = res + return res + + # Find the target_device for target on a node + # node->profiles->device_refs->target + def get_node_tgt_dev(self, node_name, target_uuid): + node_db = self.lookup_name(node_name) + if not node_db: + return None + return node_db.get_tgt_dev(target_uuid) + + # get all network uuids for this node + def get_networks(self): + if self.caching_enabled and self.lookup_networks_cache: + ret = self.lookup_networks_cache + else: + ret = [] + prof_list = self.get_refs('profile') + for prof_uuid in prof_list: + prof_db = self.lookup(prof_uuid) + net_list = prof_db.get_refs('network') + for net_uuid in net_list: + ret.append(net_uuid) + if self.caching_enabled: + self.lookup_networks_cache = ret + return ret + + def get_active_dev(self, tgtuuid): + if self.caching_enabled and self.lookup_activedev_cache.has_key(tgtuuid): + tgt_dev_uuid = self.lookup_activedev_cache[tgtuuid] + else: + tgt = self.lookup(tgtuuid) + tgt_dev_uuid = tgt.get_first_ref('active') + if self.caching_enabled: + self.lookup_activedev_cache[tgtuuid] = tgt_dev_uuid + return tgt_dev_uuid + + def get_tgt_dev(self, tgtuuid): + if self.caching_enabled and self.lookup_tgtdev_cache.has_key(tgtuuid): + res = self.lookup_tgtdev_cache[tgtuuid] + else: + prof_list = self.get_refs('profile') + res = None + for prof_uuid in prof_list: + prof_db = self.lookup(prof_uuid) + if not prof_db: + panic("profile:", profile, "not found.") + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class in ('osd', 'mdsdev'): + devdb = self.lookup(ref_uuid) + uuid = devdb.get_first_ref('target') + if tgtuuid == uuid: + res = ref_uuid + break + if not res is None: + break + if self.caching_enabled: + self.lookup_tgtdev_cache[tgtuuid] = res + return res + + def get_group(self, group): + if self.caching_enabled and self.lookup_group_cache.has_key(group): + ret = self.lookup_group_cache[group] + else: + ret = [] + devs = self.lookup_class('mds') + for tgt in devs: + if tgt.get_val('group', tgt.get_val('name')) == group: + ret.append(tgt.getUUID()) + devs = self.lookup_class('ost') + for tgt in devs: + if tgt.get_val('group', tgt.get_val('name')) == group: + ret.append(tgt.getUUID()) + if self.caching_enabled: + self.lookup_group_cache[group] = ret + return ret + + # Change the current active device for a target + def update_active(self, tgtuuid, new_uuid): + self._update_active(tgtuuid, new_uuid) + + def get_version(self): + return self.get_val('version') + + def get_mtime(self): + return self.get_val('mtime') + +class LustreDB_XML(LustreDB): + def __init__(self, dom, root_node): + LustreDB.__init__(self) + + # init xmlfile + self.dom_node = dom + self.root_node = root_node + + def close(self): + # do nothing + return None + + def xmltext(self, dom_node, tag): + list = dom_node.getElementsByTagName(tag) + if len(list) > 0: + dom_node = list[0] + dom_node.normalize() + if dom_node.firstChild: + txt = string.strip(dom_node.firstChild.data) + if txt: + return txt + + def xmlattr(self, dom_node, attr): + return dom_node.getAttribute(attr) + + def _get_val(self, tag): + """a value could be an attribute of the current node + or the text value in a child node""" + ret = self.xmlattr(self.dom_node, tag) + if not ret: + ret = self.xmltext(self.dom_node, tag) + return ret + + def _get_class(self): + return self.dom_node.nodeName + + def get_ref_type(self, ref_tag): + res = string.split(ref_tag, '_') + return res[0] + + # + # [(ref_class, ref_uuid),] + def _get_all_refs(self): + list = [] + for n in self.dom_node.childNodes: + if n.nodeType == n.ELEMENT_NODE: + ref_uuid = self.xml_get_ref(n) + ref_class = self.get_ref_type(n.nodeName) + list.append((ref_class, ref_uuid)) + + list.sort() + return list + + def _get_refs(self, tag): + """ Get all the refs of type TAG. Returns list of uuids. """ + uuids = [] + refname = '%s_ref' % tag + reflist = self.dom_node.getElementsByTagName(refname) + for r in reflist: + uuids.append(self.xml_get_ref(r)) + return uuids + + def _get_lov_tgts(self, tag): + """ Get all the refs of type TAG. Returns list of lov_tgts. """ + tgts = [] + tgtlist = self.dom_node.getElementsByTagName(tag) + for tgt in tgtlist: + uuidref = tgt.getAttribute('uuidref') + index = tgt.getAttribute('index') + generation = tgt.getAttribute('generation') + active = int(tgt.getAttribute('active')) + tgts.append((uuidref, index, generation, active)) + return tgts + + def xmllookup_by_uuid(self, dom_node, uuid): + for n in dom_node.childNodes: + if n.nodeType == n.ELEMENT_NODE: + if self.xml_get_uuid(n) == uuid: + return n + else: + n = self.xmllookup_by_uuid(n, uuid) + if n: return n + return None + + def _lookup_by_uuid(self, uuid): + dom = self. xmllookup_by_uuid(self.root_node, uuid) + if dom: + return LustreDB_XML(dom, self.root_node) + + def xmllookup_by_name(self, dom_node, name): + for n in dom_node.childNodes: + if n.nodeType == n.ELEMENT_NODE: + if self.xml_get_name(n) == name: + return n + else: + n = self.xmllookup_by_name(n, name) + if n: return n + return None + + def _lookup_by_name(self, name, class_name): + dom = self.xmllookup_by_name(self.root_node, name) + if dom: + return LustreDB_XML(dom, self.root_node) + + def xmllookup_by_class(self, dom_node, class_name): + return dom_node.getElementsByTagName(class_name) + + def _lookup_by_class(self, class_name): + ret = [] + domlist = self.xmllookup_by_class(self.root_node, class_name) + for node in domlist: + ret.append(LustreDB_XML(node, self.root_node)) + return ret + + def xml_get_name(self, n): + return n.getAttribute('name') + + def getName(self): + return self.xml_get_name(self.dom_node) + + def xml_get_ref(self, n): + return n.getAttribute('uuidref') + + def xml_get_uuid(self, dom_node): + return dom_node.getAttribute('uuid') + + def getUUID(self): + return self.xml_get_uuid(self.dom_node) + + # Convert routes from the router to a route that will be used + # on the local system. The network type and gw are changed to the + # interface on the router the local system will connect to. + def get_local_routes(self, type, gw): + """ Return the routes as a list of tuples of the form: + [(type, gw, lo, hi),]""" + res = [] + tbl = self.dom_node.getElementsByTagName('routetbl') + for t in tbl: + routes = t.getElementsByTagName('route') + for r in routes: + net_type = self.xmlattr(r, 'type') + if type != net_type: + lo = self.xmlattr(r, 'lo') + hi = self.xmlattr(r, 'hi') + tgt_cluster_id = self.xmlattr(r, 'tgtclusterid') + res.append((type, gw, tgt_cluster_id, lo, hi)) + return res + + def get_route_tbl(self): + ret = [] + for r in self.dom_node.getElementsByTagName('route'): + net_type = self.xmlattr(r, 'type') + gw = self.xmlattr(r, 'gw') + gw_cluster_id = self.xmlattr(r, 'gwclusterid') + tgt_cluster_id = self.xmlattr(r, 'tgtclusterid') + lo = self.xmlattr(r, 'lo') + hi = self.xmlattr(r, 'hi') + ret.append((net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)) + return ret + + def get_hostaddr(self): + ret = [] + list = self.dom_node.getElementsByTagName('hostaddr') + for node in list: + ret.append(node.firstChild.data) + return ret + + def _update_active(self, tgt, new): + raise Lustre.LconfError("updates not implemented for XML") + +# ================================================================ +# LDAP Support +class LustreDB_LDAP(LustreDB): + def __init__(self, name, attrs, + base = "fs=lustre", + parent = None, + url = "ldap://localhost", + user = "cn=Manager, fs=lustre", + pw = "" + ): + LustreDB.__init__(self) + + self._name = name + self._attrs = attrs + self._base = base + self._parent = parent + self._url = url + self._user = user + self._pw = pw + if parent: + self.l = parent.l + self._base = parent._base + else: + self.open() + + def open(self): + import ldap + try: + self.l = ldap.initialize(self._url) + # Set LDAP protocol version used + self.l.protocol_version=ldap.VERSION3 + # user and pw only needed if modifying db + self.l.bind_s(self._user, self._pw, ldap.AUTH_SIMPLE); + except ldap.LDAPError, e: + raise Lustre.LconfError('Unable to connect to ldap server:' + self._url) + + try: + self._name, self._attrs = self.l.search_s(self._base, + ldap.SCOPE_BASE)[0] + except ldap.LDAPError, e: + raise Lustre.LconfError("no config found in ldap: %s" + % (self._base,)) + def close(self): + self.l.unbind_s() + + def ldap_search(self, filter): + """Return list of uuids matching the filter.""" + import ldap + dn = self._base + ret = [] + uuids = [] + try: + for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL, + filter, ["uuid"]): + for v in attrs['uuid']: + uuids.append(v) + except ldap.NO_SUCH_OBJECT, e: + pass + except ldap.LDAPError, e: + print e # FIXME: die here? + if len(uuids) > 0: + for uuid in uuids: + ret.append(self._lookup_by_uuid(uuid)) + return ret + + def _lookup_by_name(self, name, class_name): + list = self.ldap_search("lustreName=%s" %(name)) + if len(list) == 1: + return list[0] + return None + + def _lookup_by_class(self, class_name): + return self.ldap_search("objectclass=%s" %(string.upper(class_name))) + + def _lookup_by_uuid(self, uuid): + import ldap + dn = "uuid=%s,%s" % (uuid, self._base) + ret = None + try: + for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE, + "objectclass=*"): + ret = LustreDB_LDAP(name, attrs, parent = self) + + except ldap.NO_SUCH_OBJECT, e: + pass # just return empty list + except ldap.LDAPError, e: + print e # FIXME: die here? + return ret + + + def _get_val(self, k): + ret = None + if k == 'name': + k = 'lustreName' + if self._attrs.has_key(k): + v = self._attrs[k] + if type(v) == types.ListType: + ret = str(v[0]) + else: + ret = str(v) + return ret + + def _get_class(self): + return string.lower(self._attrs['objectClass'][0]) + + def get_ref_type(self, ref_tag): + return ref_tag[:-3] + + def _get_lov_tgts(self, tag): + """ Get all the refs of type TAG. Returns list of lov_tgts. """ + tgts = [] + return tgts + + # + # [(ref_class, ref_uuid),] + def _get_all_refs(self): + reflist = [] + for k in self._attrs.keys(): + if re.search('.*Ref', k): + for uuid in self._attrs[k]: + ref_class = self.get_ref_type(k) + reflist.append((ref_class, uuid)) + return reflist + + def _get_refs(self, tag): + """ Get all the refs of type TAG. Returns list of uuids. """ + refname = '%sRef' % tag + + if self._attrs.has_key(refname): + return self._attrs[refname] + + reflist = [] + for obj in self._lookup_by_class("*"): + if obj._attrs.has_key(refname): + reflist.extend(obj._attrs[refname]) + + return reflist + + def getName(self): + return self._get_val('lustreName') + + def getUUID(self): + return self._get_val('uuid') + + def get_route_tbl(self): + return [] + + def get_hostaddr(self): + return self._get_refs('hostaddr') + + def _update_active(self, tgtuuid, newuuid): + """Return list of uuids matching the filter.""" + import ldap + dn = "uuid=%s,%s" %(tgtuuid, self._base) + ret = [] + uuids = [] + try: + self.l.modify_s(dn, [(ldap.MOD_REPLACE, "activeRef", newuuid)]) + except ldap.NO_SUCH_OBJECT, e: + print e + except ldap.LDAPError, e: + print e # FIXME: die here? + return diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 5b09c54..a7ee8f4 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -1,35 +1,41 @@ # Administration utilities Makefile +if GSS +SUBDIRS = gss +endif + AM_CFLAGS=$(LLCFLAGS) AM_CPPFLAGS=$(LLCPPFLAGS) -DLUSTRE_UTILS=1 AM_LDFLAGS := -L$(top_builddir)/lnet/utils LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a -sbin_scripts = lrun -bin_scripts = llstat llobdstat plot-llstat +sbin_scripts = lrun +bin_scripts = llstat llobdstat plot-llstat if UTILS noinst_PROGRAMS = wirecheck wiretest obdio obdbarrier + # mount only finds helpers in /sbin rootsbin_PROGRAMS = mount.lustre sbin_PROGRAMS = mkfs.lustre tunefs.lustre lctl \ - l_getgroups llverfs llverdev llog_reader lr_reader + l_getidentity l_facl llverfs llverdev \ + llog_reader lr_reader if LIBPTHREAD sbin_PROGRAMS += loadgen endif -bin_PROGRAMS = lfs +bin_PROGRAMS = lfs req_layout bin_SCRIPTS = $(bin_scripts) sbin_SCRIPTS = $(sbin_scripts) endif # UTILS -lib_LIBRARIES = liblustreapi.a +lib_LIBRARIES = liblustreapi.a libiam.a lctl_SOURCES = parser.c obd.c lustre_cfg.c lctl.c parser.h obdctl.h platform.h lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL) lctl_DEPENDENCIES := $(LIBPTLCTL) -lfs_SOURCES = lfs.c parser.c lustre_cfg.c obd.c +lfs_SOURCES = lfs.c parser.c obd.c lustre_cfg.c lfs_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a @@ -56,6 +62,7 @@ llverfs_LDADD := $(EXT2FSLIB) $(E2PLIB) llverdev_LDADD := $(EXT2FSLIB) $(BLKIDLIB) liblustreapi_a_SOURCES = liblustreapi.c +libiam_a_SOURCES = libiam.c wirecheck_SOURCES = wirecheck.c wirecheck_CPPFLAGS = -DCC="\"$(CC)\"" @@ -64,6 +71,7 @@ wiretest_SOURCES = wiretest.c obdio_SOURCES = obdio.c obdiolib.c obdiolib.h obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h +req_layout_SOURCES = req-layout.c llog_reader_SOURCES = llog_reader.c llog_reader_LDADD := $(LIBPTLCTL) @@ -77,14 +85,22 @@ mount_lustre_DEPENDENCIES := $(LIBPTLCTL) mkfs_lustre_SOURCES = mkfs_lustre.c mkfs_lustre_CPPFLAGS = -UTUNEFS $(AM_CPPFLAGS) -mkfs_lustre_LDADD := $(LIBPTLCTL) -mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL) +mkfs_lustre_LDADD := libiam.a $(LIBPTLCTL) +mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL) libiam.a tunefs_lustre_SOURCES = $(mkfs_lustre_SOURCES) tunefs_lustre_CPPFLAGS = -DTUNEFS $(AM_CPPFLAGS) tunefs_lustre_LDADD := $(mkfs_lustre_LDADD) tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES) +l_getidentity_SOURCES = l_getidentity.c +l_getidentity_LDADD := $(LIBPTLCTL) +l_getidentity_DEPENDENCIES := $(LIBPTLCTL) + +l_facl_SOURCES = l_facl.c +l_facl_LDADD := liblustreapi.a +l_facl_DEPENDENCIES := liblustreapi.a + EXTRA_DIST = $(sbin_scripts) $(bin_scripts) # NOTE: this should only be run on i386. diff --git a/lustre/utils/automatic-reconnect-sample b/lustre/utils/automatic-reconnect-sample new file mode 100755 index 0000000..bf9ecc4 --- /dev/null +++ b/lustre/utils/automatic-reconnect-sample @@ -0,0 +1,34 @@ +#!/bin/sh + +if [ -z "$1" ]; then + echo "No UUID given to Lustre upcall!" | wall + exit 1 +fi + +# FIXME: OSTHOST can't be hard-coded! +OST=$1 +OSTHOST=dev7 +LUSTRE=/home/pschwan/lustre/lustre + +while ( ! ping -c 1 -w 3 $OSTHOST ) ; do + sleep 2 +done; + +echo -n "OST $OSTHOST UUID $OST responding to pings : " +date + +$LUSTRE/utils/lctl <<EOF +network tcp +close_uuid $OST +del_uuid $OST +connect $OSTHOST 988 +add_uuid $OST $OSTHOST +quit +EOF + +$LUSTRE/utils/lctl <<EOF +device \$RPCDEV +probe +newconn $OST +quit +EOF diff --git a/lustre/utils/create_iam.c b/lustre/utils/create_iam.c new file mode 100644 index 0000000..5268802 --- /dev/null +++ b/lustre/utils/create_iam.c @@ -0,0 +1,352 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * create_iam.c + * User-level tool for creation of iam files. + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> + +#include <sys/types.h> + +#ifdef HAVE_ENDIAN_H +#include <endian.h> +#endif + +#include <libcfs/libcfs.h> + +void usage(void) +{ + printf("usage: create_iam " + "[-h] [-k <keysize>] [-r recsize] [-b <blocksize] [-p <ptrsize>] [-v]\n"); +} + +enum { + IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL, + IAM_LVAR_ROOT_MAGIC = 0xb01dface +}; + +struct iam_lfix_root { + u_int64_t ilr_magic; + u_int16_t ilr_keysize; + u_int16_t ilr_recsize; + u_int16_t ilr_ptrsize; + u_int16_t ilr_indirect_levels; +}; + +enum { + IAM_LEAF_HEADER_MAGIC = 0x1976, + IAM_LVAR_LEAF_MAGIC = 0x1973 +}; + +struct iam_leaf_head { + u_int16_t ill_magic; + u_int16_t ill_count; +}; + +struct dx_countlimit { + u_int16_t limit; + u_int16_t count; +}; + +typedef __u32 lvar_hash_t; + +struct lvar_leaf_header { + u_int16_t vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */ + u_int16_t vlh_used; /* used bytes, including header */ +}; + +struct lvar_root { + u_int32_t vr_magic; + u_int16_t vr_recsize; + u_int16_t vr_ptrsize; + u_int8_t vr_indirect_levels; + u_int8_t vr_padding0; + u_int16_t vr_padding1; +}; + +struct lvar_leaf_entry { + u_int32_t vle_hash; + u_int16_t vle_keysize; + u_int8_t vle_key[0]; +}; + +enum { + LVAR_PAD = 4, + LVAR_ROUND = LVAR_PAD - 1 +}; + +static void lfix_root(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct iam_lfix_root *root; + struct dx_countlimit *limit; + void *entry; + + root = buf; + *root = (typeof(*root)) { + .ilr_magic = cpu_to_le64(IAM_LFIX_ROOT_MAGIC), + .ilr_keysize = cpu_to_le16(keysize), + .ilr_recsize = cpu_to_le16(recsize), + .ilr_ptrsize = cpu_to_le16(ptrsize), + .ilr_indirect_levels = 0 + }; + + limit = (void *)(root + 1); + *limit = (typeof(*limit)){ + /* + * limit itself + one pointer to the leaf. + */ + .count = cpu_to_le16(2), + .limit = (blocksize - sizeof *root) / (keysize + ptrsize) + }; + + entry = root + 1; + /* + * Skip over @limit. + */ + entry += keysize + ptrsize; + + /* + * Entry format is <key> followed by <ptr>. In the minimal tree + * consisting of a root and single node, <key> is a minimal possible + * key. + * + * XXX: this key is hard-coded to be a sequence of 0's. + */ + entry += keysize; + /* now @entry points to <ptr> */ + if (ptrsize == 4) + *(u_int32_t *)entry = cpu_to_le32(1); + else + *(u_int64_t *)entry = cpu_to_le64(1); +} + +static void lfix_leaf(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct iam_leaf_head *head; + + /* form leaf */ + head = buf; + *head = (struct iam_leaf_head) { + .ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC), + /* + * Leaf contains an entry with the smallest possible key + * (created by zeroing). + */ + .ill_count = cpu_to_le16(1), + }; +} + +static void lvar_root(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct lvar_root *root; + struct dx_countlimit *limit; + void *entry; + int isize; + + isize = sizeof(lvar_hash_t) + ptrsize; + root = buf; + *root = (typeof(*root)) { + .vr_magic = cpu_to_le32(IAM_LVAR_ROOT_MAGIC), + .vr_recsize = cpu_to_le16(recsize), + .vr_ptrsize = cpu_to_le16(ptrsize), + .vr_indirect_levels = 0 + }; + + limit = (void *)(root + 1); + *limit = (typeof(*limit)){ + /* + * limit itself + one pointer to the leaf. + */ + .count = cpu_to_le16(2), + .limit = (blocksize - sizeof *root) / isize + }; + + entry = root + 1; + /* + * Skip over @limit. + */ + entry += isize; + + /* + * Entry format is <key> followed by <ptr>. In the minimal tree + * consisting of a root and single node, <key> is a minimal possible + * key. + * + * XXX: this key is hard-coded to be a sequence of 0's. + */ + entry += sizeof(lvar_hash_t); + /* now @entry points to <ptr> */ + if (ptrsize == 4) + *(u_int32_t *)entry = cpu_to_le32(1); + else + *(u_int64_t *)entry = cpu_to_le64(1); +} + +static int lvar_esize(int namelen, int recsize) +{ + return (offsetof(struct lvar_leaf_entry, vle_key) + + namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND; +} + +static void lvar_leaf(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct lvar_leaf_header *head; + + /* form leaf */ + head = buf; + *head = (typeof(*head)) { + .vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC), + .vlh_used = cpu_to_le16(sizeof *head + lvar_esize(0, recsize)) + }; +} + +enum iam_fmt_t { + FMT_LFIX, + FMT_LVAR +}; + +int main(int argc, char **argv) +{ + int rc; + int opt; + int blocksize = 4096; + int keysize = 8; + int recsize = 8; + int ptrsize = 4; + int verbose = 0; + void *buf; + char *fmtstr = "lfix"; + enum iam_fmt_t fmt; + + do { + opt = getopt(argc, argv, "hb:k:r:p:vf:"); + switch (opt) { + case 'v': + verbose++; + case -1: + break; + case 'b': + blocksize = atoi(optarg); + break; + case 'k': + keysize = atoi(optarg); + break; + case 'r': + recsize = atoi(optarg); + break; + case 'p': + ptrsize = atoi(optarg); + break; + case 'f': + fmtstr = optarg; + break; + case '?': + default: + fprintf(stderr, "Unable to parse options."); + case 'h': + usage(); + return 0; + } + } while (opt != -1); + + if (ptrsize != 4 && ptrsize != 8) { + fprintf(stderr, "Invalid ptrsize (%i). " + "Only 4 and 8 are supported\n", ptrsize); + return 1; + } + + if (blocksize <= 100 || keysize < 1 || recsize < 0) { + fprintf(stderr, "Too small record, key or block block\n"); + return 1; + } + + if (keysize + recsize + sizeof(struct iam_leaf_head) > blocksize / 3) { + fprintf(stderr, "Too large (record, key) or too small block\n"); + return 1; + } + + if (!strcmp(fmtstr, "lfix")) + fmt = FMT_LFIX; + else if (!strcmp(fmtstr, "lvar")) + fmt = FMT_LVAR; + else { + fprintf(stderr, "Wrong format `%s'\n", fmtstr); + return 1; + } + + if (verbose > 0) { + fprintf(stderr, + "fmt: %s, key: %i, rec: %i, ptr: %i, block: %i\n", + fmtstr, keysize, recsize, ptrsize, blocksize); + } + buf = malloc(blocksize); + if (buf == NULL) { + fprintf(stderr, "Unable to allocate %i bytes\n", blocksize); + return 1; + } + + memset(buf, 0, blocksize); + + if (fmt == FMT_LFIX) + lfix_root(buf, blocksize, keysize, ptrsize, recsize); + else + lvar_root(buf, blocksize, keysize, ptrsize, recsize); + + rc = write(1, buf, blocksize); + if (rc != blocksize) { + fprintf(stderr, "Unable to write root node: %m (%i)\n", rc); + free(buf); + return 1; + } + + /* form leaf */ + memset(buf, 0, blocksize); + + if (fmt == FMT_LFIX) + lfix_leaf(buf, blocksize, keysize, ptrsize, recsize); + else + lvar_leaf(buf, blocksize, keysize, ptrsize, recsize); + + rc = write(1, buf, blocksize); + free(buf); + if (rc != blocksize) { + fprintf(stderr, "Unable to write leaf node: %m (%i)\n", rc); + return 1; + } + if (verbose > 0) + fprintf(stderr, "Don't forget to umount/mount " + "before accessing iam from the kernel!\n"); + return 0; +} diff --git a/lustre/utils/gss/.cvsignore b/lustre/utils/gss/.cvsignore new file mode 100644 index 0000000..15f680f --- /dev/null +++ b/lustre/utils/gss/.cvsignore @@ -0,0 +1,11 @@ +.Xrefs +Makefile +Makefile.in +.deps +tags +TAGS +lgssd +lsvcgssd +l_idmap +.*.cmd +.*.d diff --git a/lustre/utils/gss/Makefile.am b/lustre/utils/gss/Makefile.am new file mode 100644 index 0000000..01ee650 --- /dev/null +++ b/lustre/utils/gss/Makefile.am @@ -0,0 +1,67 @@ +# gss daemons Makefile + +SUBDIRS = + +AM_CFLAGS=$(LLCFLAGS) +AM_CPPFLAGS=$(LLCPPFLAGS) -DLUSTRE_UTILS=1 +AM_LDFLAGS := -L$(top_builddir)/lnet/utils + +LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a + +sbin_PROGRAMS = lgssd lsvcgssd l_idmap + +COMMON_SRCS = \ + context.c \ + context_lucid.c \ + context_mit.c \ + context_heimdal.c \ + context_spkm3.c \ + gss_util.c \ + gss_oids.c \ + err_util.c \ + lsupport.c \ + \ + context.h \ + err_util.h \ + gss_oids.h \ + gss_util.h \ + lsupport.h + +lgssd_SOURCES = \ + $(COMMON_SRCS) \ + gssd.c \ + gssd_main_loop.c \ + gssd_proc.c \ + krb5_util.c \ + \ + gssd.h \ + krb5_util.h \ + write_bytes.h + +lgssd_LDADD = $(GSSAPI_LIBS) $(KRBLIBS) +lgssd_LDFLAGS = $(KRBLDFLAGS) +lgssd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) $(KRBCFLAGS) + +lsvcgssd_SOURCES = \ + $(COMMON_SRCS) \ + cacheio.c \ + svcgssd.c \ + svcgssd_main_loop.c \ + svcgssd_mech2file.c \ + svcgssd_proc.c \ + \ + cacheio.h \ + svcgssd.h + +lsvcgssd_LDADD = $(GSSAPI_LIBS) $(KRBLIBS) +lsvcgssd_LDFLAGS = $(KRBLDFLAGS) +lsvcgssd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) $(KRBCFLAGS) + +l_idmap_SOURCES = \ + l_idmap.c \ + lsupport.c \ + err_util.c \ + \ + lsupport.h + +EXTRA_DIST = diff --git a/lustre/utils/gss/README b/lustre/utils/gss/README new file mode 100644 index 0000000..94d1dda --- /dev/null +++ b/lustre/utils/gss/README @@ -0,0 +1,12 @@ +lustre/utils/gss: client & server side gss daemons for Lustre. + +All files came from standard nfs-utils package, applied with patches +created by Cluster File Systems Inc. + +1. Stock nfs-utils-1.0.10.tgz +2. Apply nfs-utils-1.0.10-CITI_NFS4_ALL-3.dif from Center for Information + Technology Integration, University of Michigan + (http://www.citi.umich.edu/projects/nfsv4/linux/) +3. Apply lustre patch: nfs-utils-1.0.10-lustre.diff +4. Copy nfs-utils-1.0.10/aclocal/kerberos5.m4 to lustre/autoconf +5. Copy nfs-utils-1.0.10/utils/gssd/*.[ch] to here diff --git a/lustre/utils/gss/cacheio.c b/lustre/utils/gss/cacheio.c new file mode 100644 index 0000000..3b39316 --- /dev/null +++ b/lustre/utils/gss/cacheio.c @@ -0,0 +1,296 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * support/nfs/cacheio.c + * support IO on the cache channel files in 2.5 and beyond. + * These use 'qwords' which are like words, but with a little quoting. + * + */ + + +/* + * Support routines for text-based upcalls. + * Fields are separated by spaces. + * Fields are either mangled to quote space tab newline slosh with slosh + * or a hexified with a leading \x + * Record is terminated with newline. + * + */ + +#include "cacheio.h" +#include <stdio.h> +#include <ctype.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include "err_util.h" + +void qword_add(char **bpp, int *lp, char *str) +{ + char *bp = *bpp; + int len = *lp; + char c; + + if (len < 0) return; + + while ((c=*str++) && len) + switch(c) { + case ' ': + case '\t': + case '\n': + case '\\': + if (len >= 4) { + *bp++ = '\\'; + *bp++ = '0' + ((c & 0300)>>6); + *bp++ = '0' + ((c & 0070)>>3); + *bp++ = '0' + ((c & 0007)>>0); + } + len -= 4; + break; + default: + *bp++ = c; + len--; + } + if (c || len <1) len = -1; + else { + *bp++ = ' '; + len--; + } + *bpp = bp; + *lp = len; +} + +void qword_addhex(char **bpp, int *lp, char *buf, int blen) +{ + char *bp = *bpp; + int len = *lp; + + if (len < 0) return; + + if (len > 2) { + *bp++ = '\\'; + *bp++ = 'x'; + len -= 2; + while (blen && len >= 2) { + unsigned char c = *buf++; + *bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1); + *bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1); + len -= 2; + blen--; + } + } + if (blen || len<1) len = -1; + else { + *bp++ = ' '; + len--; + } + *bpp = bp; + *lp = len; +} + +void qword_addint(char **bpp, int *lp, int n) +{ + int len; + + len = snprintf(*bpp, *lp, "%d ", n); + if (len > *lp) + len = *lp; + *bpp += len; + *lp -= len; +} + +void qword_addeol(char **bpp, int *lp) +{ + if (*lp <= 0) + return; + **bpp = '\n'; + (*bpp)++; + (*lp)--; +} + +static char qword_buf[8192]; +static char tmp_buf[8192]; +void qword_print(FILE *f, char *str) +{ + char *bp = qword_buf; + int len = sizeof(qword_buf); + qword_add(&bp, &len, str); + fwrite(qword_buf, bp-qword_buf, 1, f); + /* XXX: */ + memcpy(tmp_buf, qword_buf, bp-qword_buf); + tmp_buf[bp-qword_buf] = '\0'; + printerr(2, "%s", tmp_buf); +} + +void qword_printhex(FILE *f, char *str, int slen) +{ + char *bp = qword_buf; + int len = sizeof(qword_buf); + qword_addhex(&bp, &len, str, slen); + fwrite(qword_buf, bp-qword_buf, 1, f); + /* XXX: */ + memcpy(tmp_buf, qword_buf, bp-qword_buf); + tmp_buf[bp-qword_buf] = '\0'; + printerr(2, "%s", tmp_buf); +} + +void qword_printint(FILE *f, int num) +{ + fprintf(f, "%d ", num); + printerr(2, "%d ", num); +} + +void qword_eol(FILE *f) +{ + fprintf(f,"\n"); + fflush(f); + printerr(2, "\n"); +} + + + +#define isodigit(c) (isdigit(c) && c <= '7') +int qword_get(char **bpp, char *dest, int bufsize) +{ + /* return bytes copied, or -1 on error */ + char *bp = *bpp; + int len = 0; + + while (*bp == ' ') bp++; + + if (bp[0] == '\\' && bp[1] == 'x') { + /* HEX STRING */ + bp += 2; + while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { + int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; + bp++; + byte <<= 4; + byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; + *dest++ = byte; + bp++; + len++; + } + } else { + /* text with \nnn octal quoting */ + while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) { + if (*bp == '\\' && + isodigit(bp[1]) && (bp[1] <= '3') && + isodigit(bp[2]) && + isodigit(bp[3])) { + int byte = (*++bp -'0'); + bp++; + byte = (byte << 3) | (*bp++ - '0'); + byte = (byte << 3) | (*bp++ - '0'); + *dest++ = byte; + len++; + } else { + *dest++ = *bp++; + len++; + } + } + } + + if (*bp != ' ' && *bp != '\n' && *bp != '\0') + return -1; + while (*bp == ' ') bp++; + *bpp = bp; +// why should we clear *dest??? +// *dest = '\0'; + return len; +} + +int qword_get_int(char **bpp, int *anint) +{ + char buf[50]; + char *ep; + int rv; + int len = qword_get(bpp, buf, 50); + if (len < 0) return -1; + if (len ==0) return -1; + rv = strtol(buf, &ep, 0); + if (*ep) return -1; + *anint = rv; + return 0; +} + +#define READLINE_BUFFER_INCREMENT 2048 + +int readline(int fd, char **buf, int *lenp) +{ + /* read a line into *buf, which is malloced *len long + * realloc if needed until we find a \n + * nul out the \n and return + * 0 of eof, 1 of success + */ + int len; + + if (*lenp == 0) { + char *b = malloc(READLINE_BUFFER_INCREMENT); + if (b == NULL) + return 0; + *buf = b; + *lenp = READLINE_BUFFER_INCREMENT; + } + len = read(fd, *buf, *lenp); + if (len <= 0) { + printerr(0, "readline: read error: len %d errno %d (%s)\n", + len, errno, strerror(errno)); + return 0; + } + while ((*buf)[len-1] != '\n') { + /* now the less common case. There was no newline, + * so we have to keep reading after re-alloc + */ + char *new; + int nl; + *lenp += READLINE_BUFFER_INCREMENT; + new = realloc(*buf, *lenp); + if (new == NULL) + return 0; + *buf = new; + nl = read(fd, *buf +len, *lenp - len); + if (nl <= 0 ) { + printerr(0, "readline: read error: len %d " + "errno %d (%s)\n", nl, errno, strerror(errno)); + return 0; + } + len += nl; + } + (*buf)[len-1] = 0; + printerr(3, "readline: read %d chars into buffer of size %d:\n%s\n", + len, *lenp, *buf); + return 1; +} diff --git a/lustre/utils/gss/cacheio.h b/lustre/utils/gss/cacheio.h new file mode 100644 index 0000000..cc97b36 --- /dev/null +++ b/lustre/utils/gss/cacheio.h @@ -0,0 +1,48 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _CACHEIO_H_ +#define _CACHEIO_H_ + +#include <stdio.h> + +void qword_add(char **bpp, int *lp, char *str); +void qword_addhex(char **bpp, int *lp, char *buf, int blen); +void qword_addint(char **bpp, int *lp, int n); +void qword_addeol(char **bpp, int *lp); +void qword_print(FILE *f, char *str); +void qword_printhex(FILE *f, char *str, int slen); +void qword_printint(FILE *f, int num); +void qword_eol(FILE *f); +int readline(int fd, char **buf, int *lenp); +int qword_get(char **bpp, char *dest, int bufsize); +int qword_get_int(char **bpp, int *anint); + +#endif /* _CACHEIO_H_ */ diff --git a/lustre/utils/gss/context.c b/lustre/utils/gss/context.c new file mode 100644 index 0000000..5f347bb --- /dev/null +++ b/lustre/utils/gss/context.c @@ -0,0 +1,57 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "config.h" +#include <stdio.h> +#include <syslog.h> +#include <string.h> +#include <gssapi/gssapi.h> +#include "gss_util.h" +#include "gss_oids.h" +#include "err_util.h" +#include "context.h" + +int +serialize_context_for_kernel(gss_ctx_id_t ctx, + gss_buffer_desc *buf, + gss_OID mech) +{ + if (g_OID_equal(&krb5oid, mech)) + return serialize_krb5_ctx(ctx, buf); +#ifdef HAVE_SPKM3_H + else if (g_OID_equal(&spkm3oid, mech)) + return serialize_spkm3_ctx(ctx, buf); +#endif + else { + printerr(0, "ERROR: attempting to serialize context with " + "unknown/unsupported mechanism oid\n"); + return -1; + } +} diff --git a/lustre/utils/gss/context.h b/lustre/utils/gss/context.h new file mode 100644 index 0000000..8243f5d --- /dev/null +++ b/lustre/utils/gss/context.h @@ -0,0 +1,47 @@ +/* + Copyright (c) 2004-2006 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _CONTEXT_H_ +#define _CONTEXT_H_ + +/* Hopefully big enough to hold any serialized context */ +#define MAX_CTX_LEN 4096 + +/* New context format flag values */ +#define KRB5_CTX_FLAG_INITIATOR 0x00000001 +#define KRB5_CTX_FLAG_CFX 0x00000002 +#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 + +int serialize_context_for_kernel(gss_ctx_id_t ctx, gss_buffer_desc *buf, + gss_OID mech); +int serialize_spkm3_ctx(gss_ctx_id_t ctx, gss_buffer_desc *buf); +int serialize_krb5_ctx(gss_ctx_id_t ctx, gss_buffer_desc *buf); + +#endif /* _CONTEXT_H_ */ diff --git a/lustre/utils/gss/context_heimdal.c b/lustre/utils/gss/context_heimdal.c new file mode 100644 index 0000000..5520cbc --- /dev/null +++ b/lustre/utils/gss/context_heimdal.c @@ -0,0 +1,267 @@ +/* + Copyright (c) 2004-2006 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "config.h" + +#ifndef HAVE_LUCID_CONTEXT_SUPPORT +#ifdef HAVE_HEIMDAL + +#include <stdio.h> +#include <stdlib.h> +#include <syslog.h> +#include <string.h> +#include <errno.h> +#include <krb5.h> +#include <gssapi.h> /* Must use the heimdal copy! */ +#ifdef HAVE_COM_ERR_H +#include <com_err.h> +#endif +#include "err_util.h" +#include "gss_oids.h" +#include "write_bytes.h" + +int write_heimdal_keyblock(char **p, char *end, krb5_keyblock *key) +{ + gss_buffer_desc tmp; + int code = -1; + + if (WRITE_BYTES(p, end, key->keytype)) goto out_err; + tmp.length = key->keyvalue.length; + tmp.value = key->keyvalue.data; + if (write_buffer(p, end, &tmp)) goto out_err; + code = 0; + out_err: + return(code); +} + +int write_heimdal_enc_key(char **p, char *end, gss_ctx_id_t ctx) +{ + krb5_keyblock enc_key, *key; + krb5_context context; + krb5_error_code ret; + int i; + char *skd, *dkd; + int code = -1; + + if ((ret = krb5_init_context(&context))) { + printerr(0, "ERROR: initializing krb5_context: %s\n", + error_message(ret)); + goto out_err; + } + + if ((ret = krb5_auth_con_getlocalsubkey(context, + ctx->auth_context, &key))){ + printerr(0, "ERROR: getting auth_context key: %s\n", + error_message(ret)); + goto out_err_free_context; + } + + memset(&enc_key, 0, sizeof(enc_key)); + enc_key.keytype = key->keytype; + /* XXX current kernel code only handles des-cbc-raw (4) */ + if (enc_key.keytype != 4) { + printerr(1, "WARN: write_heimdal_enc_key: " + "overriding heimdal keytype (%d => %d)\n", + enc_key.keytype, 4); + enc_key.keytype = 4; + } + enc_key.keyvalue.length = key->keyvalue.length; + if ((enc_key.keyvalue.data = + calloc(1, enc_key.keyvalue.length)) == NULL) { + + printerr(0, "ERROR: allocating memory for enc key: %s\n", + error_message(ENOMEM)); + goto out_err_free_key; + } + skd = (char *) key->keyvalue.data; + dkd = (char *) enc_key.keyvalue.data; + for (i = 0; i < enc_key.keyvalue.length; i++) + dkd[i] = skd[i] ^ 0xf0; + if (write_heimdal_keyblock(p, end, &enc_key)) { + goto out_err_free_enckey; + } + + code = 0; + + out_err_free_enckey: + krb5_free_keyblock_contents(context, &enc_key); + out_err_free_key: + krb5_free_keyblock(context, key); + out_err_free_context: + krb5_free_context(context); + out_err: + printerr(2, "write_heimdal_enc_key: %s\n", code ? "FAILED" : "SUCCESS"); + return(code); +} + +int write_heimdal_seq_key(char **p, char *end, gss_ctx_id_t ctx) +{ + krb5_keyblock *key; + krb5_context context; + krb5_error_code ret; + int code = -1; + + if ((ret = krb5_init_context(&context))) { + printerr(0, "ERROR: initializing krb5_context: %s\n", + error_message(ret)); + goto out_err; + } + + if ((ret = krb5_auth_con_getlocalsubkey(context, + ctx->auth_context, &key))){ + printerr(0, "ERROR: getting auth_context key: %s\n", + error_message(ret)); + goto out_err_free_context; + } + + /* XXX current kernel code only handles des-cbc-raw (4) */ + if (key->keytype != 4) { + printerr(1, "WARN: write_heimdal_seq_key: " + "overriding heimdal keytype (%d => %d)\n", + key->keytype, 4); + key->keytype = 4; + } + + if (write_heimdal_keyblock(p, end, key)) { + goto out_err_free_key; + } + + code = 0; + + out_err_free_key: + krb5_free_keyblock(context, key); + out_err_free_context: + krb5_free_context(context); + out_err: + printerr(2, "write_heimdal_seq_key: %s\n", code ? "FAILED" : "SUCCESS"); + return(code); +} + +/* + * The following is the kernel structure that we are filling in: + * + * struct krb5_ctx { + * int initiate; + * int seed_init; + * unsigned char seed[16]; + * int signalg; + * int sealalg; + * struct crypto_tfm *enc; + * struct crypto_tfm *seq; + * s32 endtime; + * u32 seq_send; + * struct xdr_netobj mech_used; + * }; + * + * However, note that we do not send the data fields in the + * order they appear in the structure. The order they are + * sent down in is: + * + * initiate + * seed_init + * seed + * signalg + * sealalg + * endtime + * seq_send + * mech_used + * enc key + * seq key + * + */ + +int +serialize_krb5_ctx(gss_ctx_id_t ctx, gss_buffer_desc *buf) +{ + + char *p, *end; + static int constant_one = 1; + static int constant_zero = 0; + unsigned char fakeseed[16]; + uint32_t algorithm; + + if (!(buf->value = calloc(1, MAX_CTX_LEN))) + goto out_err; + p = buf->value; + end = buf->value + MAX_CTX_LEN; + + + /* initiate: 1 => initiating 0 => accepting */ + if (ctx->more_flags & LOCAL) { + if (WRITE_BYTES(&p, end, constant_one)) goto out_err; + } + else { + if (WRITE_BYTES(&p, end, constant_zero)) goto out_err; + } + + /* seed_init: not used by kernel code */ + if (WRITE_BYTES(&p, end, constant_zero)) goto out_err; + + /* seed: not used by kernel code */ + memset(&fakeseed, 0, sizeof(fakeseed)); + if (write_bytes(&p, end, &fakeseed, 16)) goto out_err; + + /* signalg */ + algorithm = 0; /* SGN_ALG_DES_MAC_MD5 XXX */ + if (WRITE_BYTES(&p, end, algorithm)) goto out_err; + + /* sealalg */ + algorithm = 0; /* SEAL_ALG_DES XXX */ + if (WRITE_BYTES(&p, end, algorithm)) goto out_err; + + /* endtime */ + if (WRITE_BYTES(&p, end, ctx->lifetime)) goto out_err; + + /* seq_send */ + if (WRITE_BYTES(&p, end, ctx->auth_context->local_seqnumber)) + goto out_err; + /* mech_used */ + if (write_buffer(&p, end, (gss_buffer_desc*)&krb5oid)) goto out_err; + + /* enc: derive the encryption key and copy it into buffer */ + if (write_heimdal_enc_key(&p, end, ctx)) goto out_err; + + /* seq: get the sequence number key and copy it into buffer */ + if (write_heimdal_seq_key(&p, end, ctx)) goto out_err; + + buf->length = p - (char *)buf->value; + printerr(2, "serialize_krb5_ctx: returning buffer " + "with %d bytes\n", buf->length); + + return 0; +out_err: + printerr(0, "ERROR: failed exporting Heimdal krb5 ctx to kernel\n"); + if (buf->value) free(buf->value); + buf->length = 0; + return -1; +} + +#endif /* HAVE_HEIMDAL */ +#endif /* HAVE_LUCID_CONTEXT_SUPPORT */ diff --git a/lustre/utils/gss/context_lucid.c b/lustre/utils/gss/context_lucid.c new file mode 100644 index 0000000..2f802de --- /dev/null +++ b/lustre/utils/gss/context_lucid.c @@ -0,0 +1,604 @@ +/* + * COPYRIGHT (c) 2006 + * The Regents of the University of Michigan + * ALL RIGHTS RESERVED + * + * Permission is granted to use, copy, create derivative works + * and redistribute this software and such derivative works + * for any purpose, so long as the name of The University of + * Michigan is not used in any advertising or publicity + * pertaining to the use of distribution of this software + * without specific, written prior authorization. If the + * above copyright notice or any other identification of the + * University of Michigan is included in any copy of any + * portion of this software, then the disclaimer below must + * also be included. + * + * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION + * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY + * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF + * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE + * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR + * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING + * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN + * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES. + */ + +#include "config.h" + +#ifdef HAVE_LUCID_CONTEXT_SUPPORT + +/* + * Newer versions of MIT and Heimdal have lucid context support. + * We can use common code if it is supported. + */ + +#include <stdio.h> +#include <syslog.h> +#include <string.h> +#include <errno.h> +#include <stdint.h> +#include <krb5.h> +#include <gssapi/gssapi.h> +#ifndef OM_uint64 +typedef uint64_t OM_uint64; +#endif +#include <gssapi/gssapi_krb5.h> + +#include "gss_util.h" +#include "gss_oids.h" +#include "err_util.h" +#include "context.h" + +static int +write_lucid_keyblock(char **p, char *end, gss_krb5_lucid_key_t *key) +{ + gss_buffer_desc tmp; + + if (WRITE_BYTES(p, end, key->type)) return -1; + tmp.length = key->length; + tmp.value = key->data; + if (write_buffer(p, end, &tmp)) return -1; + return 0; +} + +static int +prepare_krb5_rfc1964_buffer(gss_krb5_lucid_context_v1_t *lctx, + gss_buffer_desc *buf) +{ + char *p, *end; + static int constant_zero = 0; + unsigned char fakeseed[16]; + uint32_t word_send_seq; + gss_krb5_lucid_key_t enc_key; + int i; + char *skd, *dkd; + gss_buffer_desc fakeoid; + + /* + * The new Kerberos interface to get the gss context + * does not include the seed or seed_init fields + * because we never really use them. But for now, + * send down a fake buffer so we can use the same + * interface to the kernel. + */ + memset(&enc_key, 0, sizeof(enc_key)); + memset(&fakeoid, 0, sizeof(fakeoid)); + + if (!(buf->value = calloc(1, MAX_CTX_LEN))) + goto out_err; + p = buf->value; + end = buf->value + MAX_CTX_LEN; + + if (WRITE_BYTES(&p, end, lctx->initiate)) goto out_err; + + /* seed_init and seed not used by kernel anyway */ + if (WRITE_BYTES(&p, end, constant_zero)) goto out_err; + if (write_bytes(&p, end, &fakeseed, 16)) goto out_err; + + if (WRITE_BYTES(&p, end, lctx->rfc1964_kd.sign_alg)) goto out_err; + if (WRITE_BYTES(&p, end, lctx->rfc1964_kd.seal_alg)) goto out_err; + if (WRITE_BYTES(&p, end, lctx->endtime)) goto out_err; + word_send_seq = lctx->send_seq; /* XXX send_seq is 64-bit */ + if (WRITE_BYTES(&p, end, word_send_seq)) goto out_err; + if (write_oid(&p, end, &krb5oid)) goto out_err; + +#ifdef HAVE_HEIMDAL + /* + * The kernel gss code expects des-cbc-raw for all flavors of des. + * The keytype from MIT has this type, but Heimdal does not. + * Force the Heimdal keytype to 4 (des-cbc-raw). + * Note that the rfc1964 version only supports DES enctypes. + */ + if (lctx->rfc1964_kd.ctx_key.type != 4) { + printerr(2, "%s: overriding heimdal keytype (%d => %d)\n", + __FUNCTION__, lctx->rfc1964_kd.ctx_key.type, 4); + lctx->rfc1964_kd.ctx_key.type = 4; + } +#endif + printerr(2, "%s: serializing keys with enctype %d and length %d\n", + __FUNCTION__, lctx->rfc1964_kd.ctx_key.type, + lctx->rfc1964_kd.ctx_key.length); + + /* derive the encryption key and copy it into buffer */ + enc_key.type = lctx->rfc1964_kd.ctx_key.type; + enc_key.length = lctx->rfc1964_kd.ctx_key.length; + if ((enc_key.data = calloc(1, enc_key.length)) == NULL) + goto out_err; + skd = (char *) lctx->rfc1964_kd.ctx_key.data; + dkd = (char *) enc_key.data; + for (i = 0; i < enc_key.length; i++) + dkd[i] = skd[i] ^ 0xf0; + if (write_lucid_keyblock(&p, end, &enc_key)) { + free(enc_key.data); + goto out_err; + } + free(enc_key.data); + + if (write_lucid_keyblock(&p, end, &lctx->rfc1964_kd.ctx_key)) + goto out_err; + + buf->length = p - (char *)buf->value; + return 0; +out_err: + printerr(0, "ERROR: failed serializing krb5 context for kernel\n"); + if (buf->value) free(buf->value); + buf->length = 0; + if (enc_key.data) free(enc_key.data); + return -1; +} + +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ + +/* for 3DES */ +#define KG_USAGE_SEAL 22 +#define KG_USAGE_SIGN 23 +#define KG_USAGE_SEQ 24 + +/* for rfc???? */ +#define KG_USAGE_ACCEPTOR_SEAL 22 +#define KG_USAGE_ACCEPTOR_SIGN 23 +#define KG_USAGE_INITIATOR_SEAL 24 +#define KG_USAGE_INITIATOR_SIGN 25 + +/* Lifted from mit src/lib/gssapi/krb5/gssapiP_krb5.h */ +enum seal_alg { + SEAL_ALG_NONE = 0xffff, + SEAL_ALG_DES = 0x0000, + SEAL_ALG_1 = 0x0001, /* not published */ + SEAL_ALG_MICROSOFT_RC4 = 0x0010, /* microsoft w2k; */ + SEAL_ALG_DES3KD = 0x0002 +}; + +#define KEY_USAGE_SEED_ENCRYPTION 0xAA +#define KEY_USAGE_SEED_INTEGRITY 0x55 +#define KEY_USAGE_SEED_CHECKSUM 0x99 +#define K5CLENGTH 5 + +/* Flags for version 2 context flags */ +#define KRB5_CTX_FLAG_INITIATOR 0x00000001 +#define KRB5_CTX_FLAG_CFX 0x00000002 +#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 + +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ +/* + * We don't have "legal" access to these MIT-only + * structures located in libk5crypto + */ +extern void krb5int_enc_arcfour; +extern void krb5int_enc_des3; +extern void krb5int_enc_aes128; +extern void krb5int_enc_aes256; +extern int krb5_derive_key(); + +static void +key_lucid_to_krb5(const gss_krb5_lucid_key_t *lin, krb5_keyblock *kout) +{ + memset(kout, '\0', sizeof(kout)); +#ifdef HAVE_KRB5 + kout->enctype = lin->type; + kout->length = lin->length; + kout->contents = lin->data; +#else + kout->keytype = lin->type; + kout->keyvalue.length = lin->length; + kout->keyvalue.data = lin->data; +#endif +} + +static void +key_krb5_to_lucid(const krb5_keyblock *kin, gss_krb5_lucid_key_t *lout) +{ + memset(lout, '\0', sizeof(lout)); +#ifdef HAVE_KRB5 + lout->type = kin->enctype; + lout->length = kin->length; + lout->data = kin->contents; +#else + lout->type = kin->keytype; + lout->length = kin->keyvalue.length; + memcpy(lout->data, kin->keyvalue.data, kin->keyvalue.length); +#endif +} + +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ +/* XXX Hack alert! XXX Do NOT submit upstream! XXX */ +/* + * Function to derive a new key from a given key and given constant data. + */ +static krb5_error_code +derive_key_lucid(const gss_krb5_lucid_key_t *in, gss_krb5_lucid_key_t *out, + int usage, char extra) +{ + krb5_error_code code; + unsigned char constant_data[K5CLENGTH]; + krb5_data datain; + int keylength; + void *enc; + krb5_keyblock kin, kout; /* must send krb5_keyblock, not lucid! */ +#ifdef HAVE_HEIMDAL + krb5_context kcontext; + krb5_keyblock *outkey; +#endif + + /* + * XXX Hack alert. We don't have "legal" access to these + * values and structures located in libk5crypto + */ + switch (in->type) { + case ENCTYPE_DES3_CBC_SHA1: +#ifdef HAVE_KRB5 + case ENCTYPE_DES3_CBC_RAW: +#endif + keylength = 24; +#ifdef HAVE_KRB5 + enc = &krb5int_enc_des3; +#endif + break; + case ENCTYPE_AES128_CTS_HMAC_SHA1_96: + keylength = 16; +#ifdef HAVE_KRB5 + enc = &krb5int_enc_aes128; +#endif + break; + case ENCTYPE_AES256_CTS_HMAC_SHA1_96: + keylength = 32; +#ifdef HAVE_KRB5 + enc = &krb5int_enc_aes256; +#endif + break; + default: + code = KRB5_BAD_ENCTYPE; + goto out; + } + + /* allocate memory for output key */ + if ((out->data = malloc(keylength)) == NULL) { + code = ENOMEM; + goto out; + } + out->length = keylength; + out->type = in->type; + + /* Convert to correct format for call to krb5_derive_key */ + key_lucid_to_krb5(in, &kin); + key_lucid_to_krb5(out, &kout); + + datain.data = (char *) constant_data; + datain.length = K5CLENGTH; + + ((char *)(datain.data))[0] = (usage>>24)&0xff; + ((char *)(datain.data))[1] = (usage>>16)&0xff; + ((char *)(datain.data))[2] = (usage>>8)&0xff; + ((char *)(datain.data))[3] = usage&0xff; + + ((char *)(datain.data))[4] = (char) extra; + +#ifdef HAVE_KRB5 + code = krb5_derive_key(enc, &kin, &kout, &datain); +#else + if ((code = krb5_init_context(&kcontext))) { + } + code = krb5_derive_key(kcontext, &kin, in->type, constant_data, K5CLENGTH, &outkey); +#endif + if (code) { + free(out->data); + out->data = NULL; + goto out; + } +#ifdef HAVE_KRB5 + key_krb5_to_lucid(&kout, out); +#else + key_krb5_to_lucid(outkey, out); + krb5_free_keyblock(kcontext, outkey); + krb5_free_context(kcontext); +#endif + + out: + if (code) + printerr(0, "ERROR: %s: returning error %d (%s)\n", + __FUNCTION__, code, error_message(code)); + return (code); +} + + +/* + * Prepare a new-style buffer, as defined in rfc4121 (a.k.a. cfx), + * to send to the kernel for newer encryption types -- or for DES3. + * + * The new format is: + * + * u32 initiate; ( whether we are the initiator or not ) + * s32 endtime; + * u32 flags; + * #define KRB5_CTX_FLAG_INITIATOR 0x00000001 + * #define KRB5_CTX_FLAG_CFX 0x00000002 + * #define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 + * u64 seq_send; + * u32 enctype; ( encrption type of keys ) + * u32 size_of_each_key; ( size of each key in bytes ) + * u32 number_of_keys; ( N -- should always be 3 for now ) + * keydata-1; ( Ke ) + * keydata-2; ( Ki ) + * keydata-3; ( Kc ) + * + */ +static int +prepare_krb5_rfc4121_buffer(gss_krb5_lucid_context_v1_t *lctx, + gss_buffer_desc *buf) +{ + static int constant_two = 2; + char *p, *end; + uint32_t v2_flags = 0; + gss_krb5_lucid_key_t enc_key; + gss_krb5_lucid_key_t derived_key; + gss_buffer_desc fakeoid; + uint32_t enctype; + uint32_t keysize; + uint32_t numkeys; + + memset(&enc_key, 0, sizeof(enc_key)); + memset(&fakeoid, 0, sizeof(fakeoid)); + + if (!(buf->value = calloc(1, MAX_CTX_LEN))) + goto out_err; + p = buf->value; + end = buf->value + MAX_CTX_LEN; + + /* Version 2 */ + if (WRITE_BYTES(&p, end, constant_two)) goto out_err; + if (WRITE_BYTES(&p, end, lctx->endtime)) goto out_err; + + if (lctx->initiate) + v2_flags |= KRB5_CTX_FLAG_INITIATOR; + if (lctx->protocol != 0) + v2_flags |= KRB5_CTX_FLAG_CFX; + if (lctx->protocol != 0 && lctx->cfx_kd.have_acceptor_subkey == 1) + v2_flags |= KRB5_CTX_FLAG_ACCEPTOR_SUBKEY; + + if (WRITE_BYTES(&p, end, v2_flags)) goto out_err; + + if (WRITE_BYTES(&p, end, lctx->send_seq)) goto out_err; + + /* Protocol 0 here implies DES3 or RC4 */ + printerr(2, "%s: protocol %d\n", __FUNCTION__, lctx->protocol); + if (lctx->protocol == 0) { + enctype = lctx->rfc1964_kd.ctx_key.type; +#ifdef HAVE_HEIMDAL + /* + * The kernel gss code expects ENCTYPE_DES3_CBC_RAW (6) for + * 3des keys, but Heimdal key has ENCTYPE_DES3_CBC_SHA1 (16). + * Force the Heimdal enctype to 6. + */ + if (enctype == ENCTYPE_DES3_CBC_SHA1) { + printerr(2, "%s: overriding heimdal keytype (%d => %d)\n", + __FUNCTION__, enctype, 6); + + enctype = 6; + } +#endif + keysize = lctx->rfc1964_kd.ctx_key.length; + numkeys = 3; /* XXX is always gonna be three? */ + } else { + if (lctx->cfx_kd.have_acceptor_subkey) { + enctype = lctx->cfx_kd.acceptor_subkey.type; + keysize = lctx->cfx_kd.acceptor_subkey.length; + } else { + enctype = lctx->cfx_kd.ctx_key.type; + keysize = lctx->cfx_kd.ctx_key.length; + } + numkeys = 3; + } + printerr(2, "%s: serializing %d keys with enctype %d and size %d\n", + __FUNCTION__, numkeys, enctype, keysize); + if (WRITE_BYTES(&p, end, enctype)) goto out_err; + if (WRITE_BYTES(&p, end, keysize)) goto out_err; + if (WRITE_BYTES(&p, end, numkeys)) goto out_err; + + if (lctx->protocol == 0) { + /* derive and send down: Ke, Ki, and Kc */ + /* Ke */ + if (write_bytes(&p, end, lctx->rfc1964_kd.ctx_key.data, + lctx->rfc1964_kd.ctx_key.length)) + goto out_err; + + /* Ki */ + if (write_bytes(&p, end, lctx->rfc1964_kd.ctx_key.data, + lctx->rfc1964_kd.ctx_key.length)) + goto out_err; + + /* Kc */ + /* + * RC4 is special, it dosen't need key derivation. Actually + * the Ke is based on plain text. Here we just let all three + * key identical, kernel will handle everything. --ericm + */ + if (lctx->rfc1964_kd.ctx_key.type == ENCTYPE_ARCFOUR_HMAC) { + if (write_bytes(&p, end, lctx->rfc1964_kd.ctx_key.data, + lctx->rfc1964_kd.ctx_key.length)) + goto out_err; + } else { + if (derive_key_lucid(&lctx->rfc1964_kd.ctx_key, + &derived_key, + KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM)) + goto out_err; + if (write_bytes(&p, end, derived_key.data, + derived_key.length)) + goto out_err; + free(derived_key.data); + } + } else { + gss_krb5_lucid_key_t *keyptr; + uint32_t sign_usage, seal_usage; + + if (lctx->cfx_kd.have_acceptor_subkey) + keyptr = &lctx->cfx_kd.acceptor_subkey; + else + keyptr = &lctx->cfx_kd.ctx_key; + +#if 0 + if (lctx->initiate == 1) { + sign_usage = KG_USAGE_INITIATOR_SIGN; + seal_usage = KG_USAGE_INITIATOR_SEAL; + } else { + sign_usage = KG_USAGE_ACCEPTOR_SIGN; + seal_usage = KG_USAGE_ACCEPTOR_SEAL; + } +#else + /* FIXME + * These are from rfc4142, but I don't understand: if we supply + * different 'usage' value for client & server, then the peers + * will have different derived keys. How could this work? + * + * Here we simply use old SIGN/SEAL values until we find the + * answer. --ericm + * FIXME + */ + sign_usage = KG_USAGE_SIGN; + seal_usage = KG_USAGE_SEAL; +#endif + + /* derive and send down: Ke, Ki, and Kc */ + + /* Ke */ + if (derive_key_lucid(keyptr, &derived_key, + seal_usage, KEY_USAGE_SEED_ENCRYPTION)) + goto out_err; + if (write_bytes(&p, end, derived_key.data, + derived_key.length)) + goto out_err; + free(derived_key.data); + + /* Ki */ + if (derive_key_lucid(keyptr, &derived_key, + seal_usage, KEY_USAGE_SEED_INTEGRITY)) + goto out_err; + if (write_bytes(&p, end, derived_key.data, + derived_key.length)) + goto out_err; + free(derived_key.data); + + /* Kc */ + if (derive_key_lucid(keyptr, &derived_key, + sign_usage, KEY_USAGE_SEED_CHECKSUM)) + goto out_err; + if (write_bytes(&p, end, derived_key.data, + derived_key.length)) + goto out_err; + free(derived_key.data); + } + + buf->length = p - (char *)buf->value; + return 0; + +out_err: + printerr(0, "ERROR: %s: failed serializing krb5 context for kernel\n", + __FUNCTION__); + if (buf->value) { + free(buf->value); + buf->value = NULL; + } + buf->length = 0; + if (enc_key.data) { + free(enc_key.data); + enc_key.data = NULL; + } + return -1; +} +int +serialize_krb5_ctx(gss_ctx_id_t ctx, gss_buffer_desc *buf) +{ + OM_uint32 maj_stat, min_stat; + void *return_ctx = 0; + OM_uint32 vers; + gss_krb5_lucid_context_v1_t *lctx = 0; + int retcode = 0; + + printerr(2, "DEBUG: %s: lucid version!\n", __FUNCTION__); + maj_stat = gss_export_lucid_sec_context(&min_stat, &ctx, + 1, &return_ctx); + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_export_lucid_sec_context", + maj_stat, min_stat, &krb5oid); + goto out_err; + } + + /* Check the version returned, we only support v1 right now */ + vers = ((gss_krb5_lucid_context_version_t *)return_ctx)->version; + switch (vers) { + case 1: + lctx = (gss_krb5_lucid_context_v1_t *) return_ctx; + break; + default: + printerr(0, "ERROR: unsupported lucid sec context version %d\n", + vers); + goto out_err; + break; + } + + /* + * Now lctx points to a lucid context that we can send down to kernel + * + * Note: we send down different information to the kernel depending + * on the protocol version and the enctyption type. + * For protocol version 0 with all enctypes besides DES3, we use + * the original format. For protocol version != 0 or DES3, we + * send down the new style information. + */ + + if (lctx->protocol == 0 && lctx->rfc1964_kd.ctx_key.type <= 4) + retcode = prepare_krb5_rfc1964_buffer(lctx, buf); + else + retcode = prepare_krb5_rfc4121_buffer(lctx, buf); + + maj_stat = gss_free_lucid_sec_context(&min_stat, ctx, return_ctx); + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_export_lucid_sec_context", + maj_stat, min_stat, &krb5oid); + printerr(0, "WARN: failed to free lucid sec context\n"); + } + + if (retcode) { + printerr(1, "%s: prepare_krb5_*_buffer failed (retcode = %d)\n", + __FUNCTION__, retcode); + goto out_err; + } + + return 0; + +out_err: + printerr(0, "ERROR: failed serializing krb5 context for kernel\n"); + return -1; +} + + + +#endif /* HAVE_LUCID_CONTEXT_SUPPORT */ diff --git a/lustre/utils/gss/context_mit.c b/lustre/utils/gss/context_mit.c new file mode 100644 index 0000000..43fc81d --- /dev/null +++ b/lustre/utils/gss/context_mit.c @@ -0,0 +1,392 @@ +/* + Copyright (c) 2004-2006 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "config.h" + +#ifndef HAVE_LUCID_CONTEXT_SUPPORT +#ifdef HAVE_KRB5 + +#include <stdio.h> +#include <syslog.h> +#include <string.h> +#include <errno.h> +#include <gssapi/gssapi.h> +#include <rpc/rpc.h> +#include "gss_util.h" +#include "gss_oids.h" +#include "err_util.h" +#include "context.h" + +#include <krb5.h> + +#if (KRB5_VERSION > 131) +/* XXX argggg, there's gotta be a better way than just duplicating this + * whole struct. Unfortunately, this is in a "private" header file, + * so this is our best choice at this point :-/ + */ + +typedef struct _krb5_gss_ctx_id_rec { + unsigned int initiate : 1; /* nonzero if initiating, zero if accepting */ + unsigned int established : 1; + unsigned int big_endian : 1; + unsigned int have_acceptor_subkey : 1; + unsigned int seed_init : 1; /* XXX tested but never actually set */ +#ifdef CFX_EXERCISE + unsigned int testing_unknown_tokid : 1; /* for testing only */ +#endif + OM_uint32 gss_flags; + unsigned char seed[16]; + krb5_principal here; + krb5_principal there; + krb5_keyblock *subkey; + int signalg; + size_t cksum_size; + int sealalg; + krb5_keyblock *enc; + krb5_keyblock *seq; + krb5_timestamp endtime; + krb5_flags krb_flags; + /* XXX these used to be signed. the old spec is inspecific, and + the new spec specifies unsigned. I don't believe that the change + affects the wire encoding. */ + uint64_t seq_send; /* gssint_uint64 */ + uint64_t seq_recv; /* gssint_uint64 */ + void *seqstate; + krb5_auth_context auth_context; + gss_OID_desc *mech_used; /* gss_OID_desc */ + /* Protocol spec revision + 0 => RFC 1964 with 3DES and RC4 enhancements + 1 => draft-ietf-krb-wg-gssapi-cfx-01 + No others defined so far. */ + int proto; + krb5_cksumtype cksumtype; /* for "main" subkey */ + krb5_keyblock *acceptor_subkey; /* CFX only */ + krb5_cksumtype acceptor_subkey_cksumtype; +#ifdef CFX_EXERCISE + gss_buffer_desc init_token; +#endif +} krb5_gss_ctx_id_rec, *krb5_gss_ctx_id_t; + +#else /* KRB5_VERSION > 131 */ + +typedef struct _krb5_gss_ctx_id_rec { + int initiate; + u_int32_t gss_flags; + int seed_init; + unsigned char seed[16]; + krb5_principal here; + krb5_principal there; + krb5_keyblock *subkey; + int signalg; + int cksum_size; + int sealalg; + krb5_keyblock *enc; + krb5_keyblock *seq; + krb5_timestamp endtime; + krb5_flags krb_flags; + krb5_ui_4 seq_send; + krb5_ui_4 seq_recv; + void *seqstate; + int established; + int big_endian; + krb5_auth_context auth_context; + gss_OID_desc *mech_used; + int nctypes; + krb5_cksumtype *ctypes; +} krb5_gss_ctx_id_rec, *krb5_gss_ctx_id_t; + +#endif /* KRB5_VERSION */ + + +static int +write_keyblock(char **p, char *end, struct _krb5_keyblock *arg) +{ + gss_buffer_desc tmp; + + if (WRITE_BYTES(p, end, arg->enctype)) return -1; + tmp.length = arg->length; + tmp.value = arg->contents; + if (write_buffer(p, end, &tmp)) return -1; + return 0; +} + +/* + * XXX Hack alert! XXX Do NOT submit upstream! + * XXX Hack alert! XXX Do NOT submit upstream! + * + * We shouldn't be using these definitions + * + * XXX Hack alert! XXX Do NOT submit upstream! + * XXX Hack alert! XXX Do NOT submit upstream! + */ +/* for 3DES */ +#define KG_USAGE_SEAL 22 +#define KG_USAGE_SIGN 23 +#define KG_USAGE_SEQ 24 + +/* for rfc???? */ +#define KG_USAGE_ACCEPTOR_SEAL 22 +#define KG_USAGE_ACCEPTOR_SIGN 23 +#define KG_USAGE_INITIATOR_SEAL 24 +#define KG_USAGE_INITIATOR_SIGN 25 + +/* Lifted from mit src/lib/gssapi/krb5/gssapiP_krb5.h */ +enum seal_alg { + SEAL_ALG_NONE = 0xffff, + SEAL_ALG_DES = 0x0000, + SEAL_ALG_1 = 0x0001, /* not published */ + SEAL_ALG_MICROSOFT_RC4 = 0x0010, /* microsoft w2k; */ + SEAL_ALG_DES3KD = 0x0002 +}; + +#define KEY_USAGE_SEED_ENCRYPTION 0xAA +#define KEY_USAGE_SEED_INTEGRITY 0x55 +#define KEY_USAGE_SEED_CHECKSUM 0x99 +#define K5CLENGTH 5 + +extern void krb5_enc_des3; +extern void krb5int_enc_des3; +extern void krb5int_enc_arcfour; +extern void krb5int_enc_aes128; +extern void krb5int_enc_aes256; +extern int krb5_derive_key(); + +/* + * XXX Hack alert! XXX Do NOT submit upstream! + * XXX Hack alert! XXX Do NOT submit upstream! + * + * We should be passing down a single key to the kernel + * and it should be deriving the other keys. We cannot + * depend on any of this stuff being accessible in the + * future. + * + * XXX Hack alert! XXX Do NOT submit upstream! + * XXX Hack alert! XXX Do NOT submit upstream! + */ +/* + * Function to derive a new key from a given key and given constant data. + */ +static krb5_error_code +derive_key(const krb5_keyblock *in, krb5_keyblock *out, int usage, char extra) +{ + krb5_error_code code; + unsigned char constant_data[K5CLENGTH]; + krb5_data datain; + int keylength; + void *enc; + + switch (in->enctype) { +#ifdef ENCTYPE_DES3_CBC_RAW + case ENCTYPE_DES3_CBC_RAW: + keylength = 24; +/* Extra hack, the structure was renamed as rc4 was added... */ +#if defined(ENCTYPE_ARCFOUR_HMAC) + enc = &krb5int_enc_des3; +#else + enc = &krb5_enc_des3; +#endif + break; +#endif +#ifdef ENCTYPE_ARCFOUR_HMAC + case ENCTYPE_ARCFOUR_HMAC: + keylength = 16; + enc = &krb5int_enc_arcfour; + break; +#endif + default: + code = KRB5_BAD_ENCTYPE; + goto out; + } + + /* allocate memory for output key */ + if ((out->contents = malloc(keylength)) == NULL) { + code = ENOMEM; + goto out; + } + out->length = keylength; + out->enctype = in->enctype; + + datain.data = (char *) constant_data; + datain.length = K5CLENGTH; + + datain.data[0] = (usage>>24)&0xff; + datain.data[1] = (usage>>16)&0xff; + datain.data[2] = (usage>>8)&0xff; + datain.data[3] = usage&0xff; + + datain.data[4] = (char) extra; + + if ((code = krb5_derive_key(enc, in, out, &datain))) { + free(out->contents); + out->contents = NULL; + } + + out: + if (code) + printerr(0, "ERROR: derive_key returning error %d (%s)\n", + code, error_message(code)); + return (code); +} + +/* + * We really shouldn't know about glue-layer context structure, but + * we need to get at the real krb5 context pointer. This should be + * removed as soon as we say there is no support for MIT Kerberos + * prior to 1.4 -- which gives us "legal" access to the context info. + */ +typedef struct gss_union_ctx_id_t { + gss_OID mech_type; + gss_ctx_id_t internal_ctx_id; +} gss_union_ctx_id_desc, *gss_union_ctx_id_t; + +int +serialize_krb5_ctx(gss_ctx_id_t ctx, gss_buffer_desc *buf) +{ + krb5_gss_ctx_id_t kctx = ((gss_union_ctx_id_t)ctx)->internal_ctx_id; + char *p, *end; + static int constant_zero = 0; + static int constant_one = 1; + static int constant_two = 2; + uint32_t word_seq_send; + u_int64_t seq_send_64bit; + uint32_t v2_flags = 0; + krb5_keyblock derived_key; + uint32_t numkeys; + + if (!(buf->value = calloc(1, MAX_CTX_LEN))) + goto out_err; + p = buf->value; + end = buf->value + MAX_CTX_LEN; + + switch (kctx->sealalg) { + case SEAL_ALG_DES: + /* Old format of context to the kernel */ + if (kctx->initiate) { + if (WRITE_BYTES(&p, end, constant_one)) goto out_err; + } + else { + if (WRITE_BYTES(&p, end, constant_zero)) goto out_err; + } + if (kctx->seed_init) { + if (WRITE_BYTES(&p, end, constant_one)) goto out_err; + } + else { + if (WRITE_BYTES(&p, end, constant_zero)) goto out_err; + } + if (write_bytes(&p, end, &kctx->seed, sizeof(kctx->seed))) + goto out_err; + if (WRITE_BYTES(&p, end, kctx->signalg)) goto out_err; + if (WRITE_BYTES(&p, end, kctx->sealalg)) goto out_err; + if (WRITE_BYTES(&p, end, kctx->endtime)) goto out_err; + word_seq_send = kctx->seq_send; + if (WRITE_BYTES(&p, end, word_seq_send)) goto out_err; + if (write_oid(&p, end, kctx->mech_used)) goto out_err; + + printerr(2, "serialize_krb5_ctx: serializing keys with " + "enctype %d and length %d\n", + kctx->enc->enctype, kctx->enc->length); + + if (write_keyblock(&p, end, kctx->enc)) goto out_err; + if (write_keyblock(&p, end, kctx->seq)) goto out_err; + break; + case SEAL_ALG_MICROSOFT_RC4: + case SEAL_ALG_DES3KD: + /* New format of context to the kernel */ + /* s32 endtime; + * u32 flags; + * #define KRB5_CTX_FLAG_INITIATOR 0x00000001 + * #define KRB5_CTX_FLAG_CFX 0x00000002 + * #define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 + * u64 seq_send; + * u32 enctype; + * u32 size_of_each_key; ( size in bytes ) + * u32 number_of_keys; ( N (assumed to be 3 for now) ) + * keydata-1; ( Ke (Kenc for DES3) ) + * keydata-2; ( Ki (Kseq for DES3) ) + * keydata-3; ( Kc (derived checksum key) ) + */ + if (WRITE_BYTES(&p, end, constant_two)) goto out_err; + if (WRITE_BYTES(&p, end, kctx->endtime)) goto out_err; + + /* Only applicable flag for this is initiator */ + if (kctx->initiate) v2_flags |= KRB5_CTX_FLAG_INITIATOR; + if (WRITE_BYTES(&p, end, v2_flags)) goto out_err; + + seq_send_64bit = kctx->seq_send; + if (WRITE_BYTES(&p, end, seq_send_64bit)) goto out_err; + + if (WRITE_BYTES(&p, end, kctx->enc->enctype)) goto out_err; + if (WRITE_BYTES(&p, end, kctx->enc->length)) goto out_err; + numkeys = 3; + if (WRITE_BYTES(&p, end, numkeys)) goto out_err; + printerr(2, "serialize_krb5_ctx: serializing %d keys with " + "enctype %d and size %d\n", + numkeys, kctx->enc->enctype, kctx->enc->length); + + /* Ke */ + if (write_bytes(&p, end, kctx->enc->contents, + kctx->enc->length)) + goto out_err; + + /* Ki */ + if (write_bytes(&p, end, kctx->enc->contents, + kctx->enc->length)) + goto out_err; + + /* Kc */ + if (derive_key(kctx->seq, &derived_key, + KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM)) + goto out_err; + if (write_bytes(&p, end, derived_key.contents, + derived_key.length)) + goto out_err; + free(derived_key.contents); + break; + default: + printerr(0, "ERROR: serialize_krb5_ctx: unsupported seal " + "algorithm %d\n", kctx->sealalg); + goto out_err; + } + + buf->length = p - (char *)buf->value; + return 0; + +out_err: + printerr(0, "ERROR: failed serializing krb5 context for kernel\n"); + if (buf->value) { + free(buf->value); + } + buf->value = NULL; + buf->length = 0; + return -1; +} + +#endif /* HAVE_KRB5 */ +#endif /* HAVE_LUCID_CONTEXT_SUPPORT */ diff --git a/lustre/utils/gss/context_spkm3.c b/lustre/utils/gss/context_spkm3.c new file mode 100644 index 0000000..7a77bef --- /dev/null +++ b/lustre/utils/gss/context_spkm3.c @@ -0,0 +1,176 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "config.h" +#include <stdio.h> +#include <syslog.h> +#include <string.h> +#include <gssapi/gssapi.h> +#include "gss_util.h" +#include "gss_oids.h" +#include "err_util.h" +#include "context.h" + +#ifdef HAVE_SPKM3_H + +#include <spkm3.h> + +/* + * Function: prepare_spkm3_ctx_buffer() + * + * Prepare spkm3 lucid context for the kernel + * + * buf->length should be: + * + * version 4 + * ctx_id 4 + 12 + * qop 4 + * mech_used 4 + 7 + * ret_fl 4 + * req_fl 4 + * share 4 + key_len + * conf_alg 4 + oid_len + * d_conf_key 4 + key_len + * intg_alg 4 + oid_len + * d_intg_key 4 + key_len + * kyestb 4 + oid_len + * owl alg 4 + oid_len +*/ +static int +prepare_spkm3_ctx_buffer(gss_spkm3_lucid_ctx_t *lctx, gss_buffer_desc *buf) +{ + char *p, *end; + unsigned int buf_size = 0; + + buf_size = sizeof(lctx->version) + + lctx->ctx_id.length + sizeof(lctx->ctx_id.length) + + sizeof(lctx->endtime) + + sizeof(lctx->mech_used.length) + lctx->mech_used.length + + sizeof(lctx->ret_flags) + + sizeof(lctx->conf_alg.length) + lctx->conf_alg.length + + sizeof(lctx->derived_conf_key.length) + + lctx->derived_conf_key.length + + sizeof(lctx->intg_alg.length) + lctx->intg_alg.length + + sizeof(lctx->derived_integ_key.length) + + lctx->derived_integ_key.length; + + if (!(buf->value = calloc(1, buf_size))) + goto out_err; + p = buf->value; + end = buf->value + buf_size; + + if (WRITE_BYTES(&p, end, lctx->version)) + goto out_err; + printerr(2, "DEBUG: exporting version = %d\n", lctx->version); + + if (write_buffer(&p, end, &lctx->ctx_id)) + goto out_err; + printerr(2, "DEBUG: exporting ctx_id(%d)\n", lctx->ctx_id.length); + + if (WRITE_BYTES(&p, end, lctx->endtime)) + goto out_err; + printerr(2, "DEBUG: exporting endtime = %d\n", lctx->endtime); + + if (write_buffer(&p, end, &lctx->mech_used)) + goto out_err; + printerr(2, "DEBUG: exporting mech oid (%d)\n", lctx->mech_used.length); + + if (WRITE_BYTES(&p, end, lctx->ret_flags)) + goto out_err; + printerr(2, "DEBUG: exporting ret_flags = %d\n", lctx->ret_flags); + + if (write_buffer(&p, end, &lctx->conf_alg)) + goto out_err; + printerr(2, "DEBUG: exporting conf_alg oid (%d)\n", lctx->conf_alg.length); + + if (write_buffer(&p, end, &lctx->derived_conf_key)) + goto out_err; + printerr(2, "DEBUG: exporting conf key (%d)\n", lctx->derived_conf_key.length); + + if (write_buffer(&p, end, &lctx->intg_alg)) + goto out_err; + printerr(2, "DEBUG: exporting intg_alg oid (%d)\n", lctx->intg_alg.length); + + if (write_buffer(&p, end, &lctx->derived_integ_key)) + goto out_err; + printerr(2, "DEBUG: exporting intg key (%d)\n", lctx->derived_integ_key.length); + + buf->length = p - (char *)buf->value; + return 0; +out_err: + printerr(0, "ERROR: failed serializing spkm3 context for kernel\n"); + if (buf->value) free(buf->value); + buf->length = 0; + + return -1; +} + +/* ANDROS: need to determine which fields of the spkm3_gss_ctx_id_desc_t + * are needed in the kernel for get_mic, validate, wrap, unwrap, and destroy + * and only export those fields to the kernel. + */ +int +serialize_spkm3_ctx(gss_ctx_id_t ctx, gss_buffer_desc *buf) +{ + OM_uint32 vers, ret, maj_stat, min_stat; + void *ret_ctx = 0; + gss_spkm3_lucid_ctx_t *lctx; + + printerr(1, "serialize_spkm3_ctx called\n"); + + printerr(2, "DEBUG: serialize_spkm3_ctx: lucid version!\n"); + maj_stat = gss_export_lucid_sec_context(&min_stat, &ctx, 1, &ret_ctx); + if (maj_stat != GSS_S_COMPLETE) + goto out_err; + + lctx = (gss_spkm3_lucid_ctx_t *)ret_ctx; + + vers = lctx->version; + if (vers != 1) { + printerr(0, "ERROR: unsupported spkm3 context version %d\n", + vers); + goto out_err; + } + ret = prepare_spkm3_ctx_buffer(lctx, buf); + + maj_stat = gss_free_lucid_sec_context(&min_stat, ctx, ret_ctx); + + if (maj_stat != GSS_S_COMPLETE) + printerr(0, "WARN: failed to free lucid sec context\n"); + if (ret) + goto out_err; + printerr(2, "DEBUG: serialize_spkm3_ctx: success\n"); + return 0; + +out_err: + printerr(2, "DEBUG: serialize_spkm3_ctx: failed\n"); + return -1; +} +#endif /* HAVE_SPKM3_H */ diff --git a/lustre/utils/gss/err_util.c b/lustre/utils/gss/err_util.c new file mode 100644 index 0000000..376fb59 --- /dev/null +++ b/lustre/utils/gss/err_util.c @@ -0,0 +1,132 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdio.h> +#include <stdarg.h> +#include <syslog.h> +#include <string.h> +#include <fcntl.h> +#include <ctype.h> +#include "err_util.h" + +static int verbosity = 0; +static int fg = 0; + +static char message_buf[500]; + +void initerr(char *progname, int set_verbosity, int set_fg) +{ + verbosity = set_verbosity; + fg = set_fg; + if (!fg) + openlog(progname, LOG_PID, LOG_DAEMON); +} + + +void printerr(int priority, char *format, ...) +{ + va_list args; + int ret; + int buf_used, buf_available; + char *buf; + + /* Don't bother formatting a message we're never going to print! */ + if (priority > verbosity) + return; + + buf_used = strlen(message_buf); + /* subtract 4 to leave room for "...\n" if necessary */ + buf_available = sizeof(message_buf) - buf_used - 4; + buf = message_buf + buf_used; + + /* + * Aggregate lines: only print buffer when we get to the + * end of a line or run out of space + */ + va_start(args, format); + ret = vsnprintf(buf, buf_available, format, args); + va_end(args); + + if (ret < 0) + goto printit; + if (ret >= buf_available) { + /* Indicate we're truncating */ + strcat(message_buf, "...\n"); + goto printit; + } + if (message_buf[strlen(message_buf) - 1] == '\n') + goto printit; + return; +printit: + if (fg) { + fprintf(stderr, "%s", message_buf); + } else { + syslog(LOG_ERR, "%s", message_buf); + } + /* reset the buffer */ + memset(message_buf, 0, sizeof(message_buf)); +} + +void print_hexl(int pri, unsigned char *cp, int length) +{ + int i, j, jm; + unsigned char c; + + printerr(pri, "length %d\n",length); + printerr(pri, "\n"); + + for (i = 0; i < length; i += 0x10) { + printerr(pri, " %04x: ", (u_int)i); + jm = length - i; + jm = jm > 16 ? 16 : jm; + + for (j = 0; j < jm; j++) { + if ((j % 2) == 1) + printerr(pri,"%02x ", (u_int)cp[i+j]); + else + printerr(pri,"%02x", (u_int)cp[i+j]); + } + for (; j < 16; j++) { + if ((j % 2) == 1) + printerr(pri," "); + else + printerr(pri," "); + } + printerr(pri," "); + + for (j = 0; j < jm; j++) { + c = cp[i+j]; + c = isprint(c) ? c : '.'; + printerr(pri,"%c", c); + } + printerr(pri,"\n"); + } +} + diff --git a/lustre/utils/gss/err_util.h b/lustre/utils/gss/err_util.h new file mode 100644 index 0000000..1d6b20c --- /dev/null +++ b/lustre/utils/gss/err_util.h @@ -0,0 +1,38 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _ERR_UTIL_H_ +#define _ERR_UTIL_H_ + +void initerr(char *progname, int verbosity, int fg); +void printerr(int priority, char *format, ...); +void print_hexl(int pri, unsigned char *cp, int length); + +#endif /* _ERR_UTIL_H_ */ diff --git a/lustre/utils/gss/gss_oids.c b/lustre/utils/gss/gss_oids.c new file mode 100644 index 0000000..c569b0c --- /dev/null +++ b/lustre/utils/gss/gss_oids.c @@ -0,0 +1,39 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <sys/types.h> +#include <gssapi/gssapi.h> + +/* from kerberos source, gssapi_krb5.c */ +gss_OID_desc krb5oid = + {9, "\052\206\110\206\367\022\001\002\002"}; + +gss_OID_desc spkm3oid = + {7, "\053\006\001\005\005\001\003"}; diff --git a/lustre/utils/gss/gss_oids.h b/lustre/utils/gss/gss_oids.h new file mode 100644 index 0000000..8b0a352 --- /dev/null +++ b/lustre/utils/gss/gss_oids.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _GSS_OIDS_H_ +#define _GSS_OIDS_H_ + +#include <sys/types.h> + +extern gss_OID_desc krb5oid; +extern gss_OID_desc spkm3oid; + +#ifndef g_OID_equal +#define g_OID_equal(o1,o2) \ + (((o1)->length == (o2)->length) && \ + (memcmp((o1)->elements,(o2)->elements,(unsigned int) (o1)->length) == 0)) +#endif + +#endif /* _GSS_OIDS_H_ */ diff --git a/lustre/utils/gss/gss_util.c b/lustre/utils/gss/gss_util.c new file mode 100644 index 0000000..b08f2f5 --- /dev/null +++ b/lustre/utils/gss/gss_util.c @@ -0,0 +1,402 @@ +/* + * Adapted in part from MIT Kerberos 5-1.2.1 slave/kprop.c and from + * http://docs.sun.com/?p=/doc/816-1331/6m7oo9sms&a=view + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + * J. Bruce Fields <bfields@umich.edu> + * Marius Aamodt Eriksen <marius@umich.edu> + */ + +/* + * slave/kprop.c + * + * Copyright 1990,1991 by the Massachusetts Institute of Technology. + * All Rights Reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + */ + +/* + * Copyright 1994 by OpenVision Technologies, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appears in all copies and + * that both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of OpenVision not be used + * in advertising or publicity pertaining to distribution of the software + * without specific, written prior permission. OpenVision makes no + * representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied warranty. + * + * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" +#include <errno.h> +#include <stdio.h> +#include <ctype.h> +#include <sys/file.h> +#include <signal.h> +#include <string.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <sys/param.h> +#include <netdb.h> +#include <fcntl.h> +#include <gssapi/gssapi.h> +#if defined(HAVE_KRB5) && !defined(GSS_C_NT_HOSTBASED_SERVICE) +#include <gssapi/gssapi_generic.h> +#define GSS_C_NT_HOSTBASED_SERVICE gss_nt_service_name +#endif +#include "gss_util.h" +#include "err_util.h" +#include "gssd.h" +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <stdlib.h> +#ifdef HAVE_COM_ERR_H +#include <com_err.h> +#endif +#include "lsupport.h" + +/* Global gssd_credentials handle */ +gss_cred_id_t gssd_cred_mds; +gss_cred_id_t gssd_cred_oss; +int gssd_cred_mds_valid = 0; +int gssd_cred_oss_valid = 0; + +char *mds_local_realm = NULL; +char *oss_local_realm = NULL; + +gss_OID g_mechOid = GSS_C_NULL_OID;; + +#if 0 +static void +display_status_1(char *m, u_int32_t code, int type, const gss_OID mech) +{ + u_int32_t maj_stat, min_stat; + gss_buffer_desc msg = GSS_C_EMPTY_BUFFER; + u_int32_t msg_ctx = 0; + char *typestr; + + switch (type) { + case GSS_C_GSS_CODE: + typestr = "GSS"; + break; + case GSS_C_MECH_CODE: + typestr = "mechanism"; + break; + default: + return; + /* NOTREACHED */ + } + + for (;;) { + maj_stat = gss_display_status(&min_stat, code, + type, mech, &msg_ctx, &msg); + if (maj_stat != GSS_S_COMPLETE) { + printerr(0, "ERROR: in call to " + "gss_display_status called from %s\n", m); + break; + } else { + printerr(0, "ERROR: GSS-API: (%s) error in %s(): %s\n", + typestr, m, (char *)msg.value); + } + + if (msg.length != 0) + (void) gss_release_buffer(&min_stat, &msg); + + if (msg_ctx == 0) + break; + } +} +#endif + +static void +display_status_2(char *m, u_int32_t major, u_int32_t minor, const gss_OID mech) +{ + u_int32_t maj_stat1, min_stat1; + u_int32_t maj_stat2, min_stat2; + gss_buffer_desc maj_gss_buf = GSS_C_EMPTY_BUFFER; + gss_buffer_desc min_gss_buf = GSS_C_EMPTY_BUFFER; + char maj_buf[30], min_buf[30]; + char *maj, *min; + u_int32_t msg_ctx = 0; + + /* Get major status message */ + maj_stat1 = gss_display_status(&min_stat1, major, + GSS_C_GSS_CODE, mech, &msg_ctx, &maj_gss_buf); + + if (maj_stat1 != GSS_S_COMPLETE) { + snprintf(maj_buf, sizeof(maj_buf), "(0x%08x)", major); + maj = &maj_buf[0]; + } else { + maj = maj_gss_buf.value; + } + + /* Get minor status message */ + maj_stat2 = gss_display_status(&min_stat2, minor, + GSS_C_MECH_CODE, mech, &msg_ctx, &min_gss_buf); + + if (maj_stat2 != GSS_S_COMPLETE) { + snprintf(min_buf, sizeof(min_buf), "(0x%08x)", minor); + min = &min_buf[0]; + } else { + min = min_gss_buf.value; + } + + printerr(0, "ERROR: GSS-API: error in %s(): %s - %s\n", + m, maj, min); + + if (maj_gss_buf.length != 0) + (void) gss_release_buffer(&min_stat1, &maj_gss_buf); + if (min_gss_buf.length != 0) + (void) gss_release_buffer(&min_stat2, &min_gss_buf); +} + +void +pgsserr(char *msg, u_int32_t maj_stat, u_int32_t min_stat, const gss_OID mech) +{ + display_status_2(msg, maj_stat, min_stat, mech); +} + +static +int extract_realm_name(gss_buffer_desc *name, char **realm) +{ + char *sname, *c; + int rc = 0; + + sname = malloc(name->length + 1); + if (!sname) { + printerr(0, "out of memory\n"); + return -ENOMEM; + } + + memcpy(sname, name->value, name->length); + sname[name->length] = '\0'; + printerr(1, "service principal: %s\n", sname); + + c = strchr(sname, '@'); + if (!c) { + printerr(2, "no realm found in principal, use default\n"); + *realm = strdup(this_realm); + if (!*realm) { + printerr(0, "failed to duplicate default realm\n"); + rc = -ENOMEM; + } + } else { + c++; + *realm = strdup(c); + if (!*realm) { + printerr(0, "failed to duplicated realm\n"); + rc = -ENOMEM; + } + } + free(sname); + + return rc; +} + +static +int gssd_acquire_cred(char *server_name, gss_cred_id_t *cred, + char **local_realm, int *valid) +{ + gss_buffer_desc name; + gss_name_t target_name; + u_int32_t maj_stat, min_stat; + u_int32_t ignore_maj_stat, ignore_min_stat; + gss_OID name_type; + gss_buffer_desc pbuf; + + *valid = 0; + + name.value = (void *)server_name; + name.length = strlen(server_name); + + maj_stat = gss_import_name(&min_stat, &name, + (const gss_OID) GSS_C_NT_HOSTBASED_SERVICE, + &target_name); + + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_import_name", maj_stat, min_stat, g_mechOid); + return -1; + } + + maj_stat = gss_display_name(&min_stat, target_name, &name, &name_type); + if (maj_stat != GSS_S_COMPLETE) { + pgsserr(0, maj_stat, min_stat, g_mechOid); + return -1; + } + if (extract_realm_name(&name, local_realm)) + return -1; + + maj_stat = gss_acquire_cred(&min_stat, target_name, 0, + GSS_C_NULL_OID_SET, GSS_C_ACCEPT, + cred, NULL, NULL); + + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_acquire_cred", maj_stat, min_stat, g_mechOid); + ignore_maj_stat = gss_display_name(&ignore_min_stat, + target_name, &pbuf, NULL); + if (ignore_maj_stat == GSS_S_COMPLETE) { + printerr(0, "Unable to obtain credentials for '%.*s'\n", + pbuf.length, pbuf.value); + ignore_maj_stat = gss_release_buffer(&ignore_min_stat, + &pbuf); + } + } else + *valid = 1; + + ignore_maj_stat = gss_release_name(&ignore_min_stat, &target_name); + + if (maj_stat != GSS_S_COMPLETE) + return -1; + return 0; +} + +int gssd_prepare_creds(int must_srv_mds, int must_srv_oss) +{ + if (gssd_acquire_cred(GSSD_SERVICE_MDS, &gssd_cred_mds, + &mds_local_realm, &gssd_cred_mds_valid)) { + if (must_srv_mds) + return -1; + } + + if (gssd_acquire_cred(GSSD_SERVICE_OSS, &gssd_cred_oss, + &oss_local_realm, &gssd_cred_oss_valid)) { + if (must_srv_oss) + return -1; + } + + if (!gssd_cred_mds_valid && !gssd_cred_oss_valid) { + printerr(0, "can't obtain both mds & oss creds, exit\n"); + return -1; + } + + if (gssd_cred_mds_valid) + printerr(0, "Ready to serve Lustre MDS in realm %s\n", + mds_local_realm ? mds_local_realm : "N/A"); + if (gssd_cred_oss_valid) + printerr(0, "Ready to serve Lustre OSS in realm %s\n", + oss_local_realm ? oss_local_realm : "N/A"); + + return 0; +} + +gss_cred_id_t gssd_select_svc_cred(int lustre_svc) +{ + switch (lustre_svc) { + case LUSTRE_GSS_SVC_MDS: + if (!gssd_cred_mds_valid) { + printerr(0, "ERROR: service cred for mds not ready\n"); + return NULL; + } + printerr(2, "select mds service cred\n"); + return gssd_cred_mds; + case LUSTRE_GSS_SVC_OSS: + if (!gssd_cred_oss_valid) { + printerr(0, "ERROR: service cred for oss not ready\n"); + return NULL; + } + printerr(2, "select oss service cred\n"); + return gssd_cred_oss; + default: + printerr(0, "ERROR: invalid lustre svc id %d\n", lustre_svc); + } + + return NULL; +} + +int gssd_check_mechs(void) +{ + u_int32_t maj_stat, min_stat; + gss_OID_set supported_mechs = GSS_C_NO_OID_SET; + int retval = -1; + + maj_stat = gss_indicate_mechs(&min_stat, &supported_mechs); + if (maj_stat != GSS_S_COMPLETE) { + printerr(0, "Unable to obtain list of supported mechanisms. " + "Check that gss library is properly configured.\n"); + goto out; + } + if (supported_mechs == GSS_C_NO_OID_SET || + supported_mechs->count == 0) { + printerr(0, "Unable to obtain list of supported mechanisms. " + "Check that gss library is properly configured.\n"); + goto out; + } + maj_stat = gss_release_oid_set(&min_stat, &supported_mechs); + retval = 0; +out: + return retval; +} + +/********************************* + * FIXME should be in krb5_util.c + *********************************/ + +#include "krb5_util.h" + +/* realm of this node */ +char *this_realm = NULL; + +int gssd_get_local_realm(void) +{ + krb5_context context = NULL; + krb5_error_code code; + int retval = -1; + + if (this_realm != NULL) + return 0; + + code = krb5_init_context(&context); + if (code) { + printerr(0, "ERROR: get default realm: init ctx: %s\n", + error_message(code)); + goto out; + } + + code = krb5_get_default_realm(context, &this_realm); + if (code) { + printerr(0, "ERROR: get default realm: %s\n", + error_message(code)); + goto out; + } + retval = 0; + + printerr(1, "Local realm: %s\n", this_realm); +out: + krb5_free_context(context); + return retval; +} + diff --git a/lustre/utils/gss/gss_util.h b/lustre/utils/gss/gss_util.h new file mode 100644 index 0000000..7a2a6b5 --- /dev/null +++ b/lustre/utils/gss/gss_util.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _GSS_UTIL_H_ +#define _GSS_UTIL_H_ + +#include <stdlib.h> +#include "write_bytes.h" + +char *this_realm; +extern gss_cred_id_t gssd_creds; + +void pgsserr(char *msg, u_int32_t maj_stat, u_int32_t min_stat, + const gss_OID mech); +int gssd_check_mechs(void); +int gssd_get_local_realm(void); + +#endif /* _GSS_UTIL_H_ */ diff --git a/lustre/utils/gss/gssd.c b/lustre/utils/gss/gssd.c new file mode 100644 index 0000000..c23e644 --- /dev/null +++ b/lustre/utils/gss/gssd.c @@ -0,0 +1,259 @@ +/* + gssd.c + + Copyright (c) 2000 The Regents of the University of Michigan. + All rights reserved. + + Copyright (c) 2000 Dug Song <dugsong@UMICH.EDU>. + Copyright (c) 2002 Andy Adamson <andros@UMICH.EDU>. + Copyright (c) 2002 Marius Aamodt Eriksen <marius@UMICH.EDU>. + All rights reserved, all wrongs reversed. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "config.h" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/sem.h> + +#include <unistd.h> +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <errno.h> +#include "gssd.h" +#include "err_util.h" +#include "gss_util.h" +#include "krb5_util.h" +#include "lsupport.h" + +char pipefs_dir[PATH_MAX] = GSSD_PIPEFS_DIR; +char pipefs_nfsdir[PATH_MAX] = GSSD_PIPEFS_DIR; +char keytabfile[PATH_MAX] = GSSD_DEFAULT_KEYTAB_FILE; +char ccachedir[PATH_MAX] = GSSD_DEFAULT_CRED_DIR; +int use_memcache = 0; +int lgssd_mutex_downcall = -1; + +static int lgssd_create_mutex(int *semid) +{ + int id; + int arg; + + id = semget(IPC_PRIVATE, 1, IPC_CREAT); + if (id == -1) { + printerr(0, "semget: %s\n", strerror(errno)); + return -1; + } + + arg = 1; + if (semctl(id, 0, SETVAL, arg) != 0) { + printerr(0, "semctl: %s\n", strerror(errno)); + semctl(id, 1, IPC_RMID, arg); + return -1; + } + + *semid = id; + return 0; +} + +void lgssd_init_mutexs(void) +{ + if (lgssd_create_mutex(&lgssd_mutex_downcall)) { + printerr(0, "can't create downcall mutex\n"); + exit(1); + } +} + +void lgssd_fini_mutexs(void) +{ + int arg = 0; + + if (lgssd_mutex_downcall != -1) + semctl(lgssd_mutex_downcall, 1, IPC_RMID, arg); +} + +void lgssd_mutex_get(int semid) +{ + struct sembuf op[1] = { {0, -1, SEM_UNDO} }; + int rc; + + rc = semop(semid, op, 1); + if (rc != 0) { + printerr(0, "exit on mutex_get err %d: %s\n", + rc, strerror(errno)); + exit(1); + } +} + +void lgssd_mutex_put(int semid) +{ + struct sembuf op[1] = { {0, 1, 0} }; + int rc; + + rc = semop(semid, op, 1); + if (rc != 0) { + printerr(0, "ignore mutex_put err %d: %s\n", + rc, strerror(errno)); + } +} + +static void lgssd_cleanup(void) +{ + pid_t child_pid; + + /* make sure all children finished */ + while (1) { + child_pid = waitpid(-1, NULL, 0); + if (child_pid < 0) + break; + + printerr(3, "cleanup: child %d terminated\n", child_pid); + } + + lgssd_fini_mutexs(); + + /* destroy krb5 machine creds */ + gssd_destroy_krb5_machine_creds(); +} + +void +sig_die(int signal) +{ + printerr(1, "exiting on signal %d\n", signal); + lgssd_cleanup(); + exit(1); +} + +void +sig_hup(int signal) +{ + /* don't exit on SIGHUP */ + printerr(1, "Received SIGHUP... Ignoring.\n"); + return; +} + +static void +usage(char *progname) +{ + fprintf(stderr, "usage: %s [-f] [-v] [-p pipefsdir] [-k keytab] [-d ccachedir]\n", + progname); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + int fg = 0; + int verbosity = 0; + int opt; + extern char *optarg; + char *progname; + + while ((opt = getopt(argc, argv, "fvrmMp:k:d:")) != -1) { + switch (opt) { + case 'f': + fg = 1; + break; + case 'M': + use_memcache = 1; + break; + case 'v': + verbosity++; + break; + case 'p': + strncpy(pipefs_dir, optarg, sizeof(pipefs_dir)); + if (pipefs_dir[sizeof(pipefs_dir)-1] != '\0') + errx(1, "pipefs path name too long"); + break; + case 'k': + strncpy(keytabfile, optarg, sizeof(keytabfile)); + if (keytabfile[sizeof(keytabfile)-1] != '\0') + errx(1, "keytab path name too long"); + break; + case 'd': + strncpy(ccachedir, optarg, sizeof(ccachedir)); + if (ccachedir[sizeof(ccachedir-1)] != '\0') + errx(1, "ccachedir path name too long"); + break; + default: + usage(argv[0]); + break; + } + } + + if ((progname = strrchr(argv[0], '/'))) + progname++; + else + progname = argv[0]; + + initerr(progname, verbosity, fg); + + if (gssd_check_mechs() != 0) + errx(1, "Problem with gssapi library"); + + if (gssd_get_local_realm()) + errx(1, "get local realm"); + + if (!fg && daemon(0, 0) < 0) + errx(1, "fork"); + + /* This should be checked _after_ daemon(), because we need to own + * the undo-able semaphore by this process + */ + gssd_init_unique(GSSD_CLI); + + /* Process keytab file and get machine credentials. This will modify + * disk status so do it after we are sure we are the only instance + */ + if (gssd_refresh_krb5_machine_creds()) + return -1; + + signal(SIGINT, sig_die); + signal(SIGTERM, sig_die); + signal(SIGHUP, sig_hup); + +#if 0 + /* Determine Kerberos information from the kernel */ + gssd_obtain_kernel_krb5_info(); +#endif + + lgssd_init_mutexs(); + + printerr(0, "lgssd initialized and ready to serve\n"); + lgssd_run(); + + lgssd_cleanup(); + printerr(0, "lgssd exiting\n"); + return 0; +} diff --git a/lustre/utils/gss/gssd.h b/lustre/utils/gss/gssd.h new file mode 100644 index 0000000..5f0006e --- /dev/null +++ b/lustre/utils/gss/gssd.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RPC_GSSD_H_ +#define _RPC_GSSD_H_ + +#include <sys/types.h> +#include <sys/queue.h> +#include <gssapi/gssapi.h> + +#define MAX_FILE_NAMELEN 32 +#define FD_ALLOC_BLOCK 32 +#ifndef GSSD_PIPEFS_DIR +#define GSSD_PIPEFS_DIR "/var/lib/nfs/rpc_pipefs" +#endif +#define INFO "info" +#define KRB5 "krb5" +#define DNOTIFY_SIGNAL (SIGRTMIN + 3) + +#define GSSD_DEFAULT_CRED_DIR "/tmp" +#define GSSD_DEFAULT_CRED_PREFIX "krb5cc_" +#define GSSD_DEFAULT_MACHINE_CRED_SUFFIX "machine" +#define GSSD_DEFAULT_KEYTAB_FILE "/etc/krb5.keytab" +#define GSSD_SERVICE_MDS "lustre_mds" +#define GSSD_SERVICE_OSS "lustre_oss" +#define GSSD_SERVICE_MDS_NAMELEN 10 +#define GSSD_SERVICE_OSS_NAMELEN 10 + +#define LUSTRE_ROOT_NAME "lustre_root" +#define LUSTRE_ROOT_NAMELEN 11 + +/* + * The gss mechanisms that we can handle + */ +enum {AUTHTYPE_KRB5, AUTHTYPE_SPKM3, AUTHTYPE_LIPKEY}; + + + +extern char pipefs_dir[PATH_MAX]; +extern char keytabfile[PATH_MAX]; +extern char ccachedir[PATH_MAX]; +extern char gethostname_ex[PATH_MAX]; +extern int use_memcache; + +TAILQ_HEAD(clnt_list_head, clnt_info) clnt_list; + +struct clnt_info { + TAILQ_ENTRY(clnt_info) list; + char *dirname; + int dir_fd; + char *servicename; + int krb5_fd; + int krb5_poll_index; + int spkm3_fd; + int spkm3_poll_index; +}; + +void init_client_list(void); +int update_client_list(void); +void handle_krb5_upcall(struct clnt_info *clp); +void handle_spkm3_upcall(struct clnt_info *clp); +void lgssd_run(void); + + +extern int lgssd_mutex_downcall; + +void lgssd_init_mutexs(void); +void lgssd_fini_mutexs(void); +void lgssd_mutex_get(int semid); +void lgssd_mutex_put(int semid); + +#endif /* _RPC_GSSD_H_ */ diff --git a/lustre/utils/gss/gssd_main_loop.c b/lustre/utils/gss/gssd_main_loop.c new file mode 100644 index 0000000..435e861 --- /dev/null +++ b/lustre/utils/gss/gssd_main_loop.c @@ -0,0 +1,165 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <netinet/in.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <memory.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <unistd.h> + +#include "gssd.h" +#include "err_util.h" + +extern struct pollfd *pollarray; +extern int pollsize; + +#define POLL_MILLISECS 500 + +static volatile int dir_changed = 1; + +static void dir_notify_handler(int sig, siginfo_t *si, void *data) +{ + dir_changed = 1; +} + +static void +scan_poll_results(int ret) +{ + int i; + struct clnt_info *clp; + + for (clp = clnt_list.tqh_first; clp != NULL; clp = clp->list.tqe_next) + { + i = clp->krb5_poll_index; + if (i >= 0 && pollarray[i].revents) { + if (pollarray[i].revents & POLLHUP) + dir_changed = 1; + if (pollarray[i].revents & POLLIN) + handle_krb5_upcall(clp); + pollarray[clp->krb5_poll_index].revents = 0; + ret--; + if (!ret) + break; + } + i = clp->spkm3_poll_index; + if (i >= 0 && pollarray[i].revents) { + if (pollarray[i].revents & POLLHUP) + dir_changed = 1; + if (pollarray[i].revents & POLLIN) + handle_spkm3_upcall(clp); + pollarray[clp->spkm3_poll_index].revents = 0; + ret--; + if (!ret) + break; + } + } +}; + +void +lgssd_run() +{ + int ret; + struct sigaction dn_act; + int fd; + time_t child_check = 0; + pid_t child_pid; + + /* Taken from linux/Documentation/dnotify.txt: */ + dn_act.sa_sigaction = dir_notify_handler; + sigemptyset(&dn_act.sa_mask); + dn_act.sa_flags = SA_SIGINFO; + sigaction(DNOTIFY_SIGNAL, &dn_act, NULL); + + if ((fd = open(pipefs_dir, O_RDONLY)) == -1) { + printerr(0, "ERROR: failed to open %s: %s\n", + pipefs_dir, strerror(errno)); + return; + } + fcntl(fd, F_SETSIG, DNOTIFY_SIGNAL); + fcntl(fd, F_NOTIFY, DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT); + + init_client_list(); + + while (1) { + while (dir_changed) { + dir_changed = 0; + printerr(2, "pipefs root dir changed\n"); + if (update_client_list()) { + printerr(0, "ERROR: couldn't update " + "client list\n"); + goto out; + } + } + + /* every 5s cleanup possible zombies of child processes */ + if (time(NULL) - child_check >= 5) { + printerr(3, "check zombie children...\n"); + + while (1) { + child_pid = waitpid(-1, NULL, WNOHANG); + if (child_pid <= 0) + break; + + printerr(2, "terminate zombie child: %d\n", + child_pid); + } + + child_check = time(NULL); + } + + /* race condition here: dir_changed could be set before we + * enter the poll, and we'd never notice if it weren't for the + * timeout. */ + ret = poll(pollarray, pollsize, POLL_MILLISECS); + if (ret < 0) { + if (errno != EINTR) + printerr(0, + "WARNING: error return from poll\n"); + } else if (ret == 0) { + /* timeout */ + } else { /* ret > 0 */ + scan_poll_results(ret); + } + } +out: + close(fd); + return; +} diff --git a/lustre/utils/gss/gssd_proc.c b/lustre/utils/gss/gssd_proc.c new file mode 100644 index 0000000..a44724d --- /dev/null +++ b/lustre/utils/gss/gssd_proc.c @@ -0,0 +1,1101 @@ +/* + gssd_proc.c + + Copyright (c) 2000-2004 The Regents of the University of Michigan. + All rights reserved. + + Copyright (c) 2000 Dug Song <dugsong@UMICH.EDU>. + Copyright (c) 2001 Andy Adamson <andros@UMICH.EDU>. + Copyright (c) 2002 Marius Aamodt Eriksen <marius@UMICH.EDU>. + Copyright (c) 2002 Bruce Fields <bfields@UMICH.EDU> + Copyright (c) 2004 Kevin Coffman <kwc@umich.edu> + All rights reserved, all wrongs reversed. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include "config.h" +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <sys/fsuid.h> + +#include <stdio.h> +#include <stdlib.h> +#include <pwd.h> +#include <grp.h> +#include <string.h> +#include <dirent.h> +#include <poll.h> +#include <fcntl.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <gssapi/gssapi.h> +#include <netdb.h> + +#include "gssd.h" +#include "err_util.h" +#include "gss_util.h" +#include "gss_oids.h" +#include "krb5_util.h" +#include "context.h" +#include "lsupport.h" + +/* + * pollarray: + * array of struct pollfd suitable to pass to poll. initialized to + * zero - a zero struct is ignored by poll() because the events mask is 0. + * + * clnt_list: + * linked list of struct clnt_info which associates a clntXXX directory + * with an index into pollarray[], and other basic data about that client. + * + * Directory structure: created by the kernel nfs client + * {pipefs_nfsdir}/clntXX : one per rpc_clnt struct in the kernel + * {pipefs_nfsdir}/clntXX/krb5 : read uid for which kernel wants + * a context, write the resulting context + * {pipefs_nfsdir}/clntXX/info : stores info such as server name + * + * Algorithm: + * Poll all {pipefs_nfsdir}/clntXX/krb5 files. When ready, data read + * is a uid; performs rpcsec_gss context initialization protocol to + * get a cred for that user. Writes result to corresponding krb5 file + * in a form the kernel code will understand. + * In addition, we make sure we are notified whenever anything is + * created or destroyed in {pipefs_nfsdir} or in an of the clntXX directories, + * and rescan the whole {pipefs_nfsdir} when this happens. + */ + +struct pollfd * pollarray; + +int pollsize; /* the size of pollaray (in pollfd's) */ + +static void +destroy_client(struct clnt_info *clp) +{ + printerr(3, "clp %p: dirname %s, krb5fd %d\n", clp, clp->dirname, clp->krb5_fd); + if (clp->krb5_poll_index != -1) + memset(&pollarray[clp->krb5_poll_index], 0, + sizeof(struct pollfd)); + if (clp->spkm3_poll_index != -1) + memset(&pollarray[clp->spkm3_poll_index], 0, + sizeof(struct pollfd)); + if (clp->dir_fd != -1) close(clp->dir_fd); + if (clp->krb5_fd != -1) close(clp->krb5_fd); + if (clp->spkm3_fd != -1) close(clp->spkm3_fd); + if (clp->dirname) free(clp->dirname); + if (clp->servicename) free(clp->servicename); + free(clp); +} + +static struct clnt_info * +insert_new_clnt(void) +{ + struct clnt_info *clp = NULL; + + if (!(clp = (struct clnt_info *)calloc(1,sizeof(struct clnt_info)))) { + printerr(0, "ERROR: can't malloc clnt_info: %s\n", + strerror(errno)); + goto out; + } + clp->krb5_poll_index = -1; + clp->spkm3_poll_index = -1; + clp->krb5_fd = -1; + clp->spkm3_fd = -1; + clp->dir_fd = -1; + + TAILQ_INSERT_HEAD(&clnt_list, clp, list); +out: + return clp; +} + +static int +process_clnt_dir_files(struct clnt_info * clp) +{ + char kname[32]; + char sname[32]; + + if (clp->krb5_fd == -1) { + snprintf(kname, sizeof(kname), "%s/krb5", clp->dirname); + clp->krb5_fd = open(kname, O_RDWR); + } + if (clp->spkm3_fd == -1) { + snprintf(sname, sizeof(sname), "%s/spkm3", clp->dirname); + clp->spkm3_fd = open(sname, O_RDWR); + } + if((clp->krb5_fd == -1) && (clp->spkm3_fd == -1)) + return -1; + return 0; +} + +static int +get_poll_index(int *ind) +{ + int i; + + *ind = -1; + for (i=0; i<FD_ALLOC_BLOCK; i++) { + if (pollarray[i].events == 0) { + *ind = i; + break; + } + } + if (*ind == -1) { + printerr(0, "ERROR: No pollarray slots open\n"); + return -1; + } + return 0; +} + + +static int +insert_clnt_poll(struct clnt_info *clp) +{ + if ((clp->krb5_fd != -1) && (clp->krb5_poll_index == -1)) { + if (get_poll_index(&clp->krb5_poll_index)) { + printerr(0, "ERROR: Too many krb5 clients\n"); + return -1; + } + pollarray[clp->krb5_poll_index].fd = clp->krb5_fd; + pollarray[clp->krb5_poll_index].events |= POLLIN; + printerr(2, "monitoring krb5 channel under %s\n", + clp->dirname); + } + + if ((clp->spkm3_fd != -1) && (clp->spkm3_poll_index == -1)) { + if (get_poll_index(&clp->spkm3_poll_index)) { + printerr(0, "ERROR: Too many spkm3 clients\n"); + return -1; + } + pollarray[clp->spkm3_poll_index].fd = clp->spkm3_fd; + pollarray[clp->spkm3_poll_index].events |= POLLIN; + } + + return 0; +} + +static void +process_clnt_dir(char *dir) +{ + struct clnt_info * clp; + + if (!(clp = insert_new_clnt())) + goto fail_destroy_client; + + if (!(clp->dirname = calloc(strlen(dir) + 1, 1))) { + goto fail_destroy_client; + } + memcpy(clp->dirname, dir, strlen(dir)); + if ((clp->dir_fd = open(clp->dirname, O_RDONLY)) == -1) { + printerr(0, "ERROR: can't open %s: %s\n", + clp->dirname, strerror(errno)); + goto fail_destroy_client; + } + fcntl(clp->dir_fd, F_SETSIG, DNOTIFY_SIGNAL); + fcntl(clp->dir_fd, F_NOTIFY, DN_CREATE | DN_DELETE | DN_MULTISHOT); + + if (process_clnt_dir_files(clp)) + goto fail_keep_client; + + if (insert_clnt_poll(clp)) + goto fail_destroy_client; + + return; + +fail_destroy_client: + if (clp) { + TAILQ_REMOVE(&clnt_list, clp, list); + destroy_client(clp); + } +fail_keep_client: + /* We couldn't find some subdirectories, but we keep the client + * around in case we get a notification on the directory when the + * subdirectories are created. */ + return; +} + +void +init_client_list(void) +{ + TAILQ_INIT(&clnt_list); + /* Eventually plan to grow/shrink poll array: */ + pollsize = FD_ALLOC_BLOCK; + pollarray = calloc(pollsize, sizeof(struct pollfd)); +} + +/* + * This is run after a DNOTIFY signal, and should clear up any + * directories that are no longer around, and re-scan any existing + * directories, since the DNOTIFY could have been in there. + */ +static void +update_old_clients(struct dirent **namelist, int size) +{ + struct clnt_info *clp; + void *saveprev; + int i, stillhere; + + for (clp = clnt_list.tqh_first; clp != NULL; clp = clp->list.tqe_next) { + stillhere = 0; + for (i=0; i < size; i++) { + if (!strcmp(clp->dirname, namelist[i]->d_name)) { + stillhere = 1; + break; + } + } + if (!stillhere) { + printerr(2, "destroying client %s\n", clp->dirname); + saveprev = clp->list.tqe_prev; + TAILQ_REMOVE(&clnt_list, clp, list); + destroy_client(clp); + clp = saveprev; + } + } + for (clp = clnt_list.tqh_first; clp != NULL; clp = clp->list.tqe_next) { + if (!process_clnt_dir_files(clp)) + insert_clnt_poll(clp); + } +} + +/* Search for a client by directory name, return 1 if found, 0 otherwise */ +static int +find_client(char *dirname) +{ + struct clnt_info *clp; + + for (clp = clnt_list.tqh_first; clp != NULL; clp = clp->list.tqe_next) + if (!strcmp(clp->dirname, dirname)) + return 1; + return 0; +} + +/* Used to read (and re-read) list of clients, set up poll array. */ +int +update_client_list(void) +{ + char lustre_dir[PATH_MAX]; + struct dirent lustre_dirent = { .d_name = "lustre" }; + struct dirent *namelist[1]; + struct stat statbuf; + int i, j; + + if (chdir(pipefs_dir) < 0) { + printerr(0, "ERROR: can't chdir to %s: %s\n", + pipefs_dir, strerror(errno)); + return -1; + } + + snprintf(lustre_dir, sizeof(lustre_dir), "%s/%s", pipefs_dir, "lustre"); + if (stat(lustre_dir, &statbuf) == 0) { + namelist[0] = &lustre_dirent; + j = 1; + printerr(2, "re-processing lustre directory\n"); + } else { + namelist[0] = NULL; + j = 0; + printerr(2, "lustre directory not exist\n"); + } + + update_old_clients(namelist, j); + for (i=0; i < j; i++) { + if (i < FD_ALLOC_BLOCK && !find_client(namelist[i]->d_name)) + process_clnt_dir(namelist[i]->d_name); + } + + chdir("/"); + return 0; +} + +/* Context creation response. */ +struct lustre_gss_init_res { + gss_buffer_desc gr_ctx; /* context handle */ + u_int gr_major; /* major status */ + u_int gr_minor; /* minor status */ + u_int gr_win; /* sequence window */ + gss_buffer_desc gr_token; /* token */ +}; + +struct lustre_gss_data { + int lgd_established; + int lgd_lustre_svc; /* mds/oss */ + int lgd_uid; /* uid */ + char *lgd_uuid; /* client device uuid */ + gss_name_t lgd_name; /* service name */ + + gss_OID lgd_mech; /* mech OID */ + u_int lgd_req_flags; /* request flags */ + gss_cred_id_t lgd_cred; /* credential */ + gss_ctx_id_t lgd_ctx; /* session context */ + gss_buffer_desc lgd_rmt_ctx; /* remote handle of context */ + uint32_t lgd_seq_win; /* sequence window */ + + int lgd_rpc_err; + int lgd_gss_err; +}; + +static int +do_downcall(int k5_fd, struct lgssd_upcall_data *updata, + struct lustre_gss_data *lgd, gss_buffer_desc *context_token) +{ + char *buf = NULL, *p = NULL, *end = NULL; + unsigned int timeout = 0; /* XXX decide on a reasonable value */ + unsigned int buf_size = 0; + + printerr(2, "doing downcall\n"); + buf_size = sizeof(updata->seq) + sizeof(timeout) + + sizeof(lgd->lgd_seq_win) + + sizeof(lgd->lgd_rmt_ctx.length) + lgd->lgd_rmt_ctx.length + + sizeof(context_token->length) + context_token->length; + p = buf = malloc(buf_size); + end = buf + buf_size; + + if (WRITE_BYTES(&p, end, updata->seq)) goto out_err; + /* Not setting any timeout for now: */ + if (WRITE_BYTES(&p, end, timeout)) goto out_err; + if (WRITE_BYTES(&p, end, lgd->lgd_seq_win)) goto out_err; + if (write_buffer(&p, end, &lgd->lgd_rmt_ctx)) goto out_err; + if (write_buffer(&p, end, context_token)) goto out_err; + + lgssd_mutex_get(lgssd_mutex_downcall); + if (write(k5_fd, buf, p - buf) < p - buf) { + lgssd_mutex_put(lgssd_mutex_downcall); + goto out_err; + } + lgssd_mutex_put(lgssd_mutex_downcall); + + if (buf) free(buf); + return 0; +out_err: + if (buf) free(buf); + printerr(0, "ERROR: Failed to write downcall!\n"); + return -1; +} + +static int +do_error_downcall(int k5_fd, uint32_t seq, int rpc_err, int gss_err) +{ + char buf[1024]; + char *p = buf, *end = buf + 1024; + unsigned int timeout = 0; + int zero = 0; + + printerr(1, "doing error downcall\n"); + + if (WRITE_BYTES(&p, end, seq)) goto out_err; + if (WRITE_BYTES(&p, end, timeout)) goto out_err; + /* use seq_win = 0 to indicate an error: */ + if (WRITE_BYTES(&p, end, zero)) goto out_err; + if (WRITE_BYTES(&p, end, rpc_err)) goto out_err; + if (WRITE_BYTES(&p, end, gss_err)) goto out_err; + + lgssd_mutex_get(lgssd_mutex_downcall); + if (write(k5_fd, buf, p - buf) < p - buf) { + lgssd_mutex_put(lgssd_mutex_downcall); + goto out_err; + } + lgssd_mutex_put(lgssd_mutex_downcall); + return 0; +out_err: + printerr(0, "Failed to write error downcall!\n"); + return -1; +} + +#if 0 +/* + * Create an RPC connection and establish an authenticated + * gss context with a server. + */ +int create_auth_rpc_client(struct clnt_info *clp, + CLIENT **clnt_return, + AUTH **auth_return, + uid_t uid, + int authtype) +{ + CLIENT *rpc_clnt = NULL; + struct rpc_gss_sec sec; + AUTH *auth = NULL; + uid_t save_uid = -1; + int retval = -1; + int errcode; + OM_uint32 min_stat; + char rpc_errmsg[1024]; + int sockp = RPC_ANYSOCK; + int sendsz = 32768, recvsz = 32768; + struct addrinfo ai_hints, *a = NULL; + char service[64]; + char *at_sign; + + /* Create the context as the user (not as root) */ + save_uid = geteuid(); + if (setfsuid(uid) != 0) { + printerr(0, "WARNING: Failed to setfsuid for " + "user with uid %d\n", uid); + goto out_fail; + } + printerr(2, "creating context using fsuid %d (save_uid %d)\n", + uid, save_uid); + + sec.qop = GSS_C_QOP_DEFAULT; + sec.svc = RPCSEC_GSS_SVC_NONE; + sec.cred = GSS_C_NO_CREDENTIAL; + sec.req_flags = 0; + if (authtype == AUTHTYPE_KRB5) { + sec.mech = (gss_OID)&krb5oid; + sec.req_flags = GSS_C_MUTUAL_FLAG; + } + else if (authtype == AUTHTYPE_SPKM3) { + sec.mech = (gss_OID)&spkm3oid; + /* XXX sec.req_flags = GSS_C_ANON_FLAG; + * Need a way to switch.... + */ + sec.req_flags = GSS_C_MUTUAL_FLAG; + } + else { + printerr(0, "ERROR: Invalid authentication type (%d) " + "in create_auth_rpc_client\n", authtype); + goto out_fail; + } + + + if (authtype == AUTHTYPE_KRB5) { +#ifdef HAVE_SET_ALLOWABLE_ENCTYPES + /* + * Do this before creating rpc connection since we won't need + * rpc connection if it fails! + */ + if (limit_krb5_enctypes(&sec, uid)) { + printerr(1, "WARNING: Failed while limiting krb5 " + "encryption types for user with uid %d\n", + uid); + goto out_fail; + } +#endif + } + + /* create an rpc connection to the nfs server */ + + printerr(2, "creating %s client for server %s\n", clp->protocol, + clp->servername); + + memset(&ai_hints, '\0', sizeof(ai_hints)); + ai_hints.ai_family = PF_INET; + ai_hints.ai_flags |= AI_CANONNAME; + if ((strcmp(clp->protocol, "tcp")) == 0) { + ai_hints.ai_socktype = SOCK_STREAM; + ai_hints.ai_protocol = IPPROTO_TCP; + } else if ((strcmp(clp->protocol, "udp")) == 0) { + ai_hints.ai_socktype = SOCK_DGRAM; + ai_hints.ai_protocol = IPPROTO_UDP; + } else { + printerr(0, "WARNING: unrecognized protocol, '%s', requested " + "for connection to server %s for user with uid %d", + clp->protocol, clp->servername, uid); + goto out_fail; + } + + /* extract the service name from clp->servicename */ + if ((at_sign = strchr(clp->servicename, '@')) == NULL) { + printerr(0, "WARNING: servicename (%s) not formatted as " + "expected with service@host", clp->servicename); + goto out_fail; + } + if ((at_sign - clp->servicename) >= sizeof(service)) { + printerr(0, "WARNING: service portion of servicename (%s) " + "is too long!", clp->servicename); + goto out_fail; + } + strncpy(service, clp->servicename, at_sign - clp->servicename); + service[at_sign - clp->servicename] = '\0'; + + errcode = getaddrinfo(clp->servername, service, &ai_hints, &a); + if (errcode) { + printerr(0, "WARNING: Error from getaddrinfo for server " + "'%s': %s", clp->servername, gai_strerror(errcode)); + goto out_fail; + } + + if (a == NULL) { + printerr(0, "WARNING: No address information found for " + "connection to server %s for user with uid %d", + clp->servername, uid); + goto out_fail; + } + if (a->ai_protocol == IPPROTO_TCP) { + if ((rpc_clnt = clnttcp_create( + (struct sockaddr_in *) a->ai_addr, + clp->prog, clp->vers, &sockp, + sendsz, recvsz)) == NULL) { + snprintf(rpc_errmsg, sizeof(rpc_errmsg), + "WARNING: can't create tcp rpc_clnt " + "for server %s for user with uid %d", + clp->servername, uid); + printerr(0, "%s\n", + clnt_spcreateerror(rpc_errmsg)); + goto out_fail; + } + } else if (a->ai_protocol == IPPROTO_UDP) { + const struct timeval timeout = {5, 0}; + if ((rpc_clnt = clntudp_bufcreate( + (struct sockaddr_in *) a->ai_addr, + clp->prog, clp->vers, timeout, + &sockp, sendsz, recvsz)) == NULL) { + snprintf(rpc_errmsg, sizeof(rpc_errmsg), + "WARNING: can't create udp rpc_clnt " + "for server %s for user with uid %d", + clp->servername, uid); + printerr(0, "%s\n", + clnt_spcreateerror(rpc_errmsg)); + goto out_fail; + } + } else { + /* Shouldn't happen! */ + printerr(0, "ERROR: requested protocol '%s', but " + "got addrinfo with protocol %d", + clp->protocol, a->ai_protocol); + goto out_fail; + } + /* We're done with this */ + freeaddrinfo(a); + a = NULL; + + printerr(2, "creating context with server %s\n", clp->servicename); + auth = authgss_create_default(rpc_clnt, clp->servicename, &sec); + if (!auth) { + /* Our caller should print appropriate message */ + printerr(2, "WARNING: Failed to create %s context for " + "user with uid %d for server %s\n", + (authtype == AUTHTYPE_KRB5 ? "krb5":"spkm3"), + uid, clp->servername); + goto out_fail; + } + + /* Success !!! */ + rpc_clnt->cl_auth = auth; + *clnt_return = rpc_clnt; + *auth_return = auth; + retval = 0; + + out: + if (sec.cred != GSS_C_NO_CREDENTIAL) + gss_release_cred(&min_stat, &sec.cred); + if (a != NULL) freeaddrinfo(a); + /* Restore euid to original value */ + if ((save_uid != -1) && (setfsuid(save_uid) != uid)) { + printerr(0, "WARNING: Failed to restore fsuid" + " to uid %d from %d\n", save_uid, uid); + } + return retval; + + out_fail: + /* Only destroy here if failure. Otherwise, caller is responsible */ + if (rpc_clnt) clnt_destroy(rpc_clnt); + + goto out; +} +#endif + +static +int do_negotiation(struct lustre_gss_data *lgd, + gss_buffer_desc *gss_token, + struct lustre_gss_init_res *gr, + int timeout) +{ + char *file = "/proc/fs/lustre/sptlrpc/gss/init_channel"; + struct lgssd_ioctl_param param; + struct passwd *pw; + int fd, ret; + char outbuf[8192]; + unsigned int *p; + int res; + + pw = getpwuid(lgd->lgd_uid); + if (!pw) { + printerr(0, "no uid %u in local user database\n", + lgd->lgd_uid); + return -1; + } + + param.version = GSSD_INTERFACE_VERSION; + param.uuid = lgd->lgd_uuid; + param.lustre_svc = lgd->lgd_lustre_svc; + param.uid = lgd->lgd_uid; + param.gid = pw->pw_gid; + param.send_token_size = gss_token->length; + param.send_token = (char *) gss_token->value; + param.reply_buf_size = sizeof(outbuf); + param.reply_buf = outbuf; + + fd = open(file, O_RDWR); + if (fd < 0) { + printerr(0, "can't open file %s\n", file); + return -1; + } + + ret = write(fd, ¶m, sizeof(param)); + + if (ret != sizeof(param)) { + printerr(0, "lustre ioctl err: %d\n", strerror(errno)); + close(fd); + return -1; + } + if (param.status) { + close(fd); + printerr(0, "status: %d (%s)\n", + param.status, strerror((int)param.status)); + if (param.status == -ETIMEDOUT) { + /* kernel return -ETIMEDOUT means the rpc timedout, + * we should notify the caller to reinitiate the + * gss negotiation, by return -ERESTART + */ + lgd->lgd_rpc_err = -ERESTART; + lgd->lgd_gss_err = 0; + } else { + lgd->lgd_rpc_err = param.status; + lgd->lgd_gss_err = 0; + } + return -1; + } + p = (unsigned int *)outbuf; + res = *p++; + gr->gr_major = *p++; + gr->gr_minor = *p++; + gr->gr_win = *p++; + + gr->gr_ctx.length = *p++; + gr->gr_ctx.value = malloc(gr->gr_ctx.length); + memcpy(gr->gr_ctx.value, p, gr->gr_ctx.length); + p += (((gr->gr_ctx.length + 3) & ~3) / 4); + + gr->gr_token.length = *p++; + gr->gr_token.value = malloc(gr->gr_token.length); + memcpy(gr->gr_token.value, p, gr->gr_token.length); + p += (((gr->gr_token.length + 3) & ~3) / 4); + + printerr(2, "do_negotiation: receive handle len %d, token len %d\n", + gr->gr_ctx.length, gr->gr_token.length); + close(fd); + return 0; +} + +static +int gssd_refresh_lgd(struct lustre_gss_data *lgd) +{ + struct lustre_gss_init_res gr; + gss_buffer_desc *recv_tokenp, send_token; + OM_uint32 maj_stat, min_stat, call_stat, ret_flags; + + /* GSS context establishment loop. */ + memset(&gr, 0, sizeof(gr)); + recv_tokenp = GSS_C_NO_BUFFER; + + for (;;) { + /* print the token we just received */ + if (recv_tokenp != GSS_C_NO_BUFFER) { + printerr(3, "The received token length %d\n", + recv_tokenp->length); + print_hexl(3, recv_tokenp->value, recv_tokenp->length); + } + + maj_stat = gss_init_sec_context(&min_stat, + lgd->lgd_cred, + &lgd->lgd_ctx, + lgd->lgd_name, + lgd->lgd_mech, + lgd->lgd_req_flags, + 0, /* time req */ + NULL, /* channel */ + recv_tokenp, + NULL, /* used mech */ + &send_token, + &ret_flags, + NULL); /* time rec */ + + if (recv_tokenp != GSS_C_NO_BUFFER) { + gss_release_buffer(&min_stat, &gr.gr_token); + recv_tokenp = GSS_C_NO_BUFFER; + } + if (maj_stat != GSS_S_COMPLETE && + maj_stat != GSS_S_CONTINUE_NEEDED) { + pgsserr("gss_init_sec_context", maj_stat, min_stat, + lgd->lgd_mech); + break; + } + if (send_token.length != 0) { + memset(&gr, 0, sizeof(gr)); + + /* print the token we are about to send */ + printerr(3, "token being sent length %d\n", + send_token.length); + print_hexl(3, send_token.value, send_token.length); + + call_stat = do_negotiation(lgd, &send_token, &gr, 0); + gss_release_buffer(&min_stat, &send_token); + + if (call_stat != 0 || + (gr.gr_major != GSS_S_COMPLETE && + gr.gr_major != GSS_S_CONTINUE_NEEDED)) { + printerr(0, "call stat %d, major stat 0x%x\n", + (int)call_stat, gr.gr_major); + return -1; + } + + if (gr.gr_ctx.length != 0) { + if (lgd->lgd_rmt_ctx.value) + gss_release_buffer(&min_stat, + &lgd->lgd_rmt_ctx); + lgd->lgd_rmt_ctx = gr.gr_ctx; + } + if (gr.gr_token.length != 0) { + if (maj_stat != GSS_S_CONTINUE_NEEDED) + break; + recv_tokenp = &gr.gr_token; + } + } + + /* GSS_S_COMPLETE => check gss header verifier, + * usually checked in gss_validate + */ + if (maj_stat == GSS_S_COMPLETE) { + lgd->lgd_established = 1; + lgd->lgd_seq_win = gr.gr_win; + break; + } + } + /* End context negotiation loop. */ + if (!lgd->lgd_established) { + if (gr.gr_token.length != 0) + gss_release_buffer(&min_stat, &gr.gr_token); + + printerr(0, "context negotiation failed\n"); + return -1; + } + + printerr(2, "successfully refreshed lgd\n"); + return 0; +} + +static +int gssd_create_lgd(struct clnt_info *clp, + struct lustre_gss_data *lgd, + struct lgssd_upcall_data *updata, + int authtype) +{ + gss_buffer_desc sname; + OM_uint32 maj_stat, min_stat; + int retval = -1; + + lgd->lgd_established = 0; + lgd->lgd_lustre_svc = updata->svc; + lgd->lgd_uid = updata->uid; + lgd->lgd_uuid = updata->obd; + + switch (authtype) { + case AUTHTYPE_KRB5: + lgd->lgd_mech = (gss_OID) &krb5oid; + lgd->lgd_req_flags = GSS_C_MUTUAL_FLAG; + break; + case AUTHTYPE_SPKM3: + lgd->lgd_mech = (gss_OID) &spkm3oid; + /* XXX sec.req_flags = GSS_C_ANON_FLAG; + * Need a way to switch.... + */ + lgd->lgd_req_flags = GSS_C_MUTUAL_FLAG; + break; + default: + printerr(0, "Invalid authentication type (%d)\n", authtype); + return -1; + } + + lgd->lgd_cred = GSS_C_NO_CREDENTIAL; + lgd->lgd_ctx = GSS_C_NO_CONTEXT; + lgd->lgd_rmt_ctx = (gss_buffer_desc) GSS_C_EMPTY_BUFFER; + lgd->lgd_seq_win = 0; + + sname.value = clp->servicename; + sname.length = strlen(clp->servicename); + + maj_stat = gss_import_name(&min_stat, &sname, + (gss_OID) GSS_C_NT_HOSTBASED_SERVICE, + &lgd->lgd_name); + if (maj_stat != GSS_S_COMPLETE) { + pgsserr(0, maj_stat, min_stat, lgd->lgd_mech); + goto out_fail; + } + + retval = gssd_refresh_lgd(lgd); + + if (lgd->lgd_name != GSS_C_NO_NAME) + gss_release_name(&min_stat, &lgd->lgd_name); + + if (lgd->lgd_cred != GSS_C_NO_CREDENTIAL) + gss_release_cred(&min_stat, &lgd->lgd_cred); + + out_fail: + return retval; +} + +static +void gssd_free_lgd(struct lustre_gss_data *lgd) +{ + gss_buffer_t token = GSS_C_NO_BUFFER; + OM_uint32 maj_stat, min_stat; + + if (lgd->lgd_ctx == GSS_C_NO_CONTEXT) + return; + + maj_stat = gss_delete_sec_context(&min_stat, &lgd->lgd_ctx, token); +} + +static +int construct_service_name(struct clnt_info *clp, + struct lgssd_upcall_data *ud) +{ + const int buflen = 256; + char name[buflen]; + + if (clp->servicename) { + free(clp->servicename); + clp->servicename = NULL; + } + + if (lnet_nid2hostname(ud->nid, name, buflen)) + return -1; + + clp->servicename = malloc(32 + strlen(name)); + if (!clp->servicename) { + printerr(0, "can't alloc memory\n"); + return -1; + } + sprintf(clp->servicename, "%s@%s", + ud->svc == LUSTRE_GSS_SVC_MDS ? + GSSD_SERVICE_MDS : GSSD_SERVICE_OSS, + name); + printerr(2, "constructed servicename: %s\n", clp->servicename); + return 0; +} + +/* + * this code uses the userland rpcsec gss library to create a krb5 + * context on behalf of the kernel + */ +void +handle_krb5_upcall(struct clnt_info *clp) +{ + pid_t pid; + gss_buffer_desc token = { 0, NULL }; + struct lgssd_upcall_data updata; + struct lustre_gss_data lgd; + char **credlist = NULL; + char **ccname; + int read_rc; + + printerr(2, "handling krb5 upcall\n"); + + memset(&lgd, 0, sizeof(lgd)); + lgd.lgd_rpc_err = -EPERM; /* default error code */ + + read_rc = read(clp->krb5_fd, &updata, sizeof(updata)); + if (read_rc < 0) { + printerr(0, "WARNING: failed reading from krb5 " + "upcall pipe: %s\n", strerror(errno)); + return; + } else if (read_rc != sizeof(updata)) { + printerr(0, "upcall data mismatch: length %d, expect %d\n", + read_rc, sizeof(updata)); + + /* the sequence number must be the first field. if read >= 4 + * bytes then we know at least sequence is fine, try to send + * error notification nicely. + */ + if (read_rc >= 4) + do_error_downcall(clp->krb5_fd, updata.seq, -EPERM, 0); + return; + } + + /* fork child process */ + pid = fork(); + if (pid < 0) { + printerr(0, "can't fork: %s\n", strerror(errno)); + do_error_downcall(clp->krb5_fd, updata.seq, -EPERM, 0); + return; + } else if (pid > 0) { + printerr(2, "forked child process: %d\n", pid); + return; + } + + printerr(1, "krb5 upcall: seq %u, uid %u, svc %u, nid 0x%llx, obd %s\n", + updata.seq, updata.uid, updata.svc, updata.nid, updata.obd); + + if (updata.svc != LUSTRE_GSS_SVC_MDS && + updata.svc != LUSTRE_GSS_SVC_OSS) { + printerr(0, "invalid svc %d\n", updata.svc); + lgd.lgd_rpc_err = -EPROTO; + goto out_return_error; + } + updata.obd[sizeof(updata.obd)-1] = '\0'; + + if (construct_service_name(clp, &updata)) { + printerr(0, "failed to construct service name\n"); + goto out_return_error; + } + + if (updata.uid == 0) { + int success = 0; + + /* + * Get a list of credential cache names and try each + * of them until one works or we've tried them all + */ + if (gssd_get_krb5_machine_cred_list(&credlist)) { + printerr(0, "ERROR: Failed to obtain machine " + "credentials for %s\n", clp->servicename); + goto out_return_error; + } + for (ccname = credlist; ccname && *ccname; ccname++) { + gssd_setup_krb5_machine_gss_ccache(*ccname); + if ((gssd_create_lgd(clp, &lgd, &updata, + AUTHTYPE_KRB5)) == 0) { + /* Success! */ + success++; + break; + } + printerr(2, "WARNING: Failed to create krb5 context " + "for user with uid %d with credentials " + "cache %s for service %s\n", + updata.uid, *ccname, clp->servicename); + } + gssd_free_krb5_machine_cred_list(credlist); + if (!success) { + printerr(0, "ERROR: Failed to create krb5 context " + "for user with uid %d with any " + "credentials cache for service %s\n", + updata.uid, clp->servicename); + goto out_return_error; + } + } + else { + /* Tell krb5 gss which credentials cache to use */ + gssd_setup_krb5_user_gss_ccache(updata.uid, clp->servicename); + + if ((gssd_create_lgd(clp, &lgd, &updata, AUTHTYPE_KRB5)) != 0) { + printerr(0, "WARNING: Failed to create krb5 context " + "for user with uid %d for service %s\n", + updata.uid, clp->servicename); + goto out_return_error; + } + } + + if (serialize_context_for_kernel(lgd.lgd_ctx, &token, &krb5oid)) { + printerr(0, "WARNING: Failed to serialize krb5 context for " + "user with uid %d for service %s\n", + updata.uid, clp->servicename); + goto out_return_error; + } + + printerr(1, "refreshed: %u@%s for %s\n", + updata.uid, updata.obd, clp->servicename); + do_downcall(clp->krb5_fd, &updata, &lgd, &token); + +out: + if (token.value) + free(token.value); + + gssd_free_lgd(&lgd); + exit(0); /* i'm child process */ + +out_return_error: + do_error_downcall(clp->krb5_fd, updata.seq, + lgd.lgd_rpc_err, lgd.lgd_gss_err); + goto out; +} + +/* + * this code uses the userland rpcsec gss library to create an spkm3 + * context on behalf of the kernel + */ +void +handle_spkm3_upcall(struct clnt_info *clp) +{ +#if 0 + uid_t uid; + CLIENT *rpc_clnt = NULL; + AUTH *auth = NULL; + struct authgss_private_data pd; + gss_buffer_desc token; + + printerr(2, "handling spkm3 upcall\n"); + + token.length = 0; + token.value = NULL; + + if (read(clp->spkm3_fd, &uid, sizeof(uid)) < sizeof(uid)) { + printerr(0, "WARNING: failed reading uid from spkm3 " + "upcall pipe: %s\n", strerror(errno)); + goto out; + } + + if (create_auth_rpc_client(clp, &rpc_clnt, &auth, uid, AUTHTYPE_SPKM3)) { + printerr(0, "WARNING: Failed to create spkm3 context for " + "user with uid %d\n", uid); + goto out_return_error; + } + + if (!authgss_get_private_data(auth, &pd)) { + printerr(0, "WARNING: Failed to obtain authentication " + "data for user with uid %d for server %s\n", + uid, clp->servername); + goto out_return_error; + } + + if (serialize_context_for_kernel(pd.pd_ctx, &token, &spkm3oid)) { + printerr(0, "WARNING: Failed to serialize spkm3 context for " + "user with uid %d for server\n", + uid, clp->servername); + goto out_return_error; + } + + do_downcall(clp->spkm3_fd, uid, &pd, &token); + +out: + if (token.value) + free(token.value); + if (auth) + AUTH_DESTROY(auth); + if (rpc_clnt) + clnt_destroy(rpc_clnt); + return; + +out_return_error: + do_error_downcall(clp->spkm3_fd, uid, -1); + goto out; +#endif +} diff --git a/lustre/utils/gss/krb5_util.c b/lustre/utils/gss/krb5_util.c new file mode 100644 index 0000000..629c279 --- /dev/null +++ b/lustre/utils/gss/krb5_util.c @@ -0,0 +1,1124 @@ +/* + * Adapted in part from MIT Kerberos 5-1.2.1 slave/kprop.c and from + * http://docs.sun.com/?p=/doc/816-1331/6m7oo9sms&a=view + * + * Copyright (c) 2002-2004 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson <andros@umich.edu> + * J. Bruce Fields <bfields@umich.edu> + * Marius Aamodt Eriksen <marius@umich.edu> + * Kevin Coffman <kwc@umich.edu> + */ + +/* + * slave/kprop.c + * + * Copyright 1990,1991 by the Massachusetts Institute of Technology. + * All Rights Reserved. + * + * Export of this software from the United States of America may + * require a specific license from the United States Government. + * It is the responsibility of any person or organization contemplating + * export to obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of M.I.T. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. Furthermore if you modify this software you must label + * your software as modified software and not distribute it in such a + * fashion that it might be confused with the original M.I.T. software. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + */ + +/* + * Copyright 1994 by OpenVision Technologies, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appears in all copies and + * that both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of OpenVision not be used + * in advertising or publicity pertaining to distribution of the software + * without specific, written prior permission. OpenVision makes no + * representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied warranty. + * + * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ +/* + krb5_util.c + + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include "config.h" +#include <sys/param.h> +#include <rpc/rpc.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/utsname.h> +#include <sys/socket.h> +#include <arpa/inet.h> + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <netdb.h> +#include <dirent.h> +#include <fcntl.h> +#include <errno.h> +#include <time.h> +#include <gssapi/gssapi.h> +#ifdef USE_PRIVATE_KRB5_FUNCTIONS +#include <gssapi/gssapi_krb5.h> +#endif +#include <krb5.h> + +#include "gssd.h" +#include "err_util.h" +#include "gss_util.h" +#include "gss_oids.h" +#include "krb5_util.h" + +/* Global list of principals/cache file names for machine credentials */ +struct gssd_k5_kt_princ *gssd_k5_kt_princ_list = NULL; + +/* Encryption types supported by the kernel rpcsec_gss code */ +int num_krb5_enctypes = 0; +krb5_enctype *krb5_enctypes = NULL; + +/* credential expire time in advance */ +unsigned long machine_cred_expire_advance = 300; /* 5 mins */ + +/*==========================*/ +/*=== Internal routines ===*/ +/*==========================*/ + +static int select_krb5_ccache(const struct dirent *d); +static int gssd_find_existing_krb5_ccache(uid_t uid, struct dirent **d); +static int gssd_get_single_krb5_cred(krb5_context context, + krb5_keytab kt, struct gssd_k5_kt_princ *ple); +static int gssd_process_krb5_keytab(krb5_context context, krb5_keytab kt, + char *kt_name); + +/* + * convenient macros, these perhaps need further cleanup + */ +#ifdef HAVE_KRB5 + +#define KEYTAB_ENTRY_MATCH(kte, name) \ + ( \ + (kte).principal->data[0].length == (sizeof(name)-1) && \ + strncmp((kte).principal->data[0].data, (name), sizeof(name)-1) == 0 \ + ) +#define KRB5_FREE_UNPARSED_NAME(ctx, name) \ + krb5_free_unparsed_name((ctx), (name)); +#define KRB5_STRDUP(str) \ + strndup((str).data, (str).length) +#define KRB5_STRCMP(str, name) \ + ( \ + (str)->length != strlen(name) || \ + strncmp((str)->data, (name), (str)->length) != 0 \ + ) +#define KRB5_STRCASECMP(str, name) \ + ( \ + (str)->length != strlen(name) || \ + strncasecmp((str)->data, (name), (str)->length) != 0 \ + ) + +#else /* !HAVE_KRB5 */ + +#define KEYTAB_ENTRY_MATCH(kte, name) \ + ( \ + strlen((kte).principal->name.name_string.val[0]) == \ + (sizeof(name)-1) && \ + strncmp(kte.principal->name.name_string.val[0], (name), \ + sizeof(name)-1) == 0 \ + ) +#define KRB5_FREE_UNPARSED_NAME(ctx, name) \ + free(pname); +#define KRB5_STRDUP(str) \ + strdup(str) +#define KRB5_STRCMP(str, name) \ + strcmp((str), (name)) +#define KRB5_STRCASECMP(str, name) \ + strcmp((str), (name)) + +#endif /* HAVE_KRB5 */ + +/* + * Called from the scandir function to weed out potential krb5 + * credentials cache files + * + * Returns: + * 0 => don't select this one + * 1 => select this one + */ +static int +select_krb5_ccache(const struct dirent *d) +{ + /* + * Note: We used to check d->d_type for DT_REG here, + * but apparenlty reiser4 always has DT_UNKNOWN. + * Check for IS_REG after stat() call instead. + */ + if (strstr(d->d_name, GSSD_DEFAULT_CRED_PREFIX)) + return 1; + else + return 0; +} + +/* + * Look in the ccachedir for files that look like they + * are Kerberos Credential Cache files for a given UID. Return + * non-zero and the dirent pointer for the entry most likely to be + * what we want. Otherwise, return zero and no dirent pointer. + * The caller is responsible for freeing the dirent if one is returned. + * + * Returns: + * 0 => could not find an existing entry + * 1 => found an existing entry + */ +static int +gssd_find_existing_krb5_ccache(uid_t uid, struct dirent **d) +{ + struct dirent **namelist; + int n; + int i; + int found = 0; + struct dirent *best_match_dir = NULL; + struct stat best_match_stat, tmp_stat; + + memset(&best_match_stat, 0, sizeof(best_match_stat)); + *d = NULL; + n = scandir(ccachedir, &namelist, select_krb5_ccache, 0); + if (n < 0) { + perror("scandir looking for krb5 credentials caches"); + } + else if (n > 0) { + char substring[128]; + char fullstring[128]; + char statname[1024]; + snprintf(substring, sizeof(substring), "_%d_", uid); + snprintf(fullstring, sizeof(fullstring), "_%d", uid); + for (i = 0; i < n; i++) { + printerr(3, "CC file '%s' being considered\n", + namelist[i]->d_name); + if (strstr(namelist[i]->d_name, substring) || + !strcmp(namelist[i]->d_name, fullstring)) { + snprintf(statname, sizeof(statname), + "%s/%s", ccachedir, + namelist[i]->d_name); + if (stat(statname, &tmp_stat)) { + printerr(0, "Error doing stat " + "on file '%s'\n", + statname); + continue; + } + if (!S_ISREG(tmp_stat.st_mode)) { + printerr(3, "File '%s' is not " + "a regular file\n", + statname); + continue; + } + printerr(3, "CC file '%s' matches " + "name check and has " + "mtime of %u\n", + namelist[i]->d_name, + tmp_stat.st_mtime); + /* if more than one match is found, + * return the most recent (the one + * with the latest mtime), + * and don't free the dirent */ + if (!found) { + best_match_dir = namelist[i]; + best_match_stat = tmp_stat; + found++; + } + else { + /* + * If the current match has + * an mtime later than the + * one we are looking at, + * then use the current match. + * Otherwise, we still have + * the best match. + */ + if (tmp_stat.st_mtime > + best_match_stat.st_mtime) { + free(best_match_dir); + best_match_dir = namelist[i]; + best_match_stat = tmp_stat; + } + else { + free(namelist[i]); + } + printerr(3, "CC file '%s' is our " + "current best match " + "with mtime of %u\n", + best_match_dir->d_name, + best_match_stat.st_mtime); + } + } + else + free(namelist[i]); + } + free(namelist); + } + if (found) + { + *d = best_match_dir; + } + return found; +} + + +/* + * Obtain credentials via a key in the keytab given + * a keytab handle and a gssd_k5_kt_princ structure. + * Checks to see if current credentials are expired, + * if not, uses the keytab to obtain new credentials. + * + * Returns: + * 0 => success (or credentials have not expired) + * nonzero => error + */ +static int +gssd_get_single_krb5_cred(krb5_context context, + krb5_keytab kt, + struct gssd_k5_kt_princ *ple) +{ + krb5_get_init_creds_opt options; + krb5_creds my_creds; + krb5_ccache ccache = NULL; + char kt_name[BUFSIZ]; + char cc_name[BUFSIZ]; + int code; + time_t now = time(0); + char *cache_type; + + memset(&my_creds, 0, sizeof(my_creds)); + + if (ple->ccname && ple->endtime > now + machine_cred_expire_advance) { + printerr(2, "INFO: Credentials in CC '%s' are good until %d\n", + ple->ccname, ple->endtime); + code = 0; + goto out; + } + + if ((code = krb5_kt_get_name(context, kt, kt_name, BUFSIZ))) { + printerr(0, "ERROR: Unable to get keytab name in " + "gssd_get_single_krb5_cred\n"); + goto out; + } + + krb5_get_init_creds_opt_init(&options); + krb5_get_init_creds_opt_set_address_list(&options, NULL); + +#ifdef TEST_SHORT_LIFETIME + /* set a short lifetime (for debugging only!) */ + printerr(0, "WARNING: Using (debug) short machine cred lifetime!\n"); + krb5_get_init_creds_opt_set_tkt_life(&options, 5*60); +#else + /* FIXME try to get the ticket with lifetime as long as possible, + * to work around ticket-expiry + recovery problem in cmd3-11 + * remove this!!! + */ + krb5_get_init_creds_opt_set_tkt_life(&options, 30*24*60*60); +#endif + if ((code = krb5_get_init_creds_keytab(context, &my_creds, ple->princ, + kt, 0, NULL, &options))) { + char *pname; + if ((krb5_unparse_name(context, ple->princ, &pname))) { + pname = NULL; + } + printerr(0, "WARNING: %s while getting initial ticket for " + "principal '%s' from keytab '%s'\n", + error_message(code), + pname ? pname : "<unparsable>", kt_name); + if (pname) KRB5_FREE_UNPARSED_NAME(context, pname); + goto out; + } + + /* + * Initialize cache file which we're going to be using + */ + + if (use_memcache) + cache_type = "MEMORY"; + else + cache_type = "FILE"; + snprintf(cc_name, sizeof(cc_name), "%s:%s/%s%s_%s", + cache_type, + GSSD_DEFAULT_CRED_DIR, GSSD_DEFAULT_CRED_PREFIX, + GSSD_DEFAULT_MACHINE_CRED_SUFFIX, ple->realm); + ple->endtime = my_creds.times.endtime; + ple->ccname = strdup(cc_name); + if (ple->ccname == NULL) { + printerr(0, "ERROR: no storage to duplicate credentials " + "cache name\n"); + code = ENOMEM; + goto out; + } + if ((code = krb5_cc_resolve(context, cc_name, &ccache))) { + printerr(0, "ERROR: %s while opening credential cache '%s'\n", + error_message(code), cc_name); + goto out; + } + if ((code = krb5_cc_initialize(context, ccache, ple->princ))) { + printerr(0, "ERROR: %s while initializing credential " + "cache '%s'\n", error_message(code), cc_name); + goto out; + } + if ((code = krb5_cc_store_cred(context, ccache, &my_creds))) { + printerr(0, "ERROR: %s while storing credentials in '%s'\n", + error_message(code), cc_name); + goto out; + } + + code = 0; + printerr(1, "Using (machine) credentials cache: '%s'\n", cc_name); + out: + if (ccache) + krb5_cc_close(context, ccache); + krb5_free_cred_contents(context, &my_creds); + return (code); +} + +static struct gssd_k5_kt_princ * gssd_get_realm_ple(void *r) +{ + struct gssd_k5_kt_princ *ple; +#ifdef HAVE_KRB5 + krb5_data *realm = (krb5_data *)r; +#else + char *realm = (char *)r; +#endif + + for (ple = gssd_k5_kt_princ_list; ple; ple = ple->next) { + if (KRB5_STRCMP(realm, ple->realm) == 0) + return ple; + } + return NULL; +} + +static void gssd_free_ple(krb5_context kctx, struct gssd_k5_kt_princ *ple) +{ + if (ple->princ) + krb5_free_principal(kctx, ple->princ); + if (ple->realm) + free(ple->realm); + if (ple->ccname) + free(ple->ccname); + free(ple); +} + +static int gssd_remove_ple(krb5_context kctx, struct gssd_k5_kt_princ *ple) +{ + struct gssd_k5_kt_princ **prev = &gssd_k5_kt_princ_list; + struct gssd_k5_kt_princ *ent = gssd_k5_kt_princ_list; + + for (; ent; prev = &ent->next, ent = ent->next) { + if (ent != ple) + continue; + + *prev = ent->next; + gssd_free_ple(kctx, ent); + return 1; + } + return 0; +} + +static +struct gssd_k5_kt_princ *gssd_create_ple(krb5_context kctx, + krb5_principal principal) +{ + struct gssd_k5_kt_princ *ple; + krb5_error_code code; + + ple = malloc(sizeof(*ple)); + if (ple == NULL) { + printerr(0, "ERROR: could not allocate storage " + "for principal list entry\n"); + return NULL; + } + + memset(ple, 0, sizeof(*ple)); + + ple->realm = KRB5_STRDUP(principal->realm); + if (ple->realm == NULL) { + printerr(0, "ERROR: not enough memory while copying realm to " + "principal list entry\n"); + goto err_free; + } + + code = krb5_copy_principal(kctx, principal, &ple->princ); + if (code) { + printerr(0, "ERROR: %s while copying principal " + "to principal list entry\n", + error_message(code)); + goto err_free; + } + + return ple; +err_free: + gssd_free_ple(kctx, ple); + return NULL; +} + +/* + * Process the given keytab file and create a list of principals we + * might use to perform mount operations. + * + * Returns: + * 0 => Sucess + * nonzero => Error + */ +static int +gssd_process_krb5_keytab(krb5_context context, krb5_keytab kt, char *kt_name) +{ + krb5_kt_cursor cursor; + krb5_keytab_entry kte; + krb5_error_code code; + struct gssd_k5_kt_princ *ple; + int retval = -1; + + /* + * Look through each entry in the keytab file and determine + * if we might want to use it later to do a mount. If so, + * save info in the global principal list + * (gssd_k5_kt_princ_list). + * Note: (ple == principal list entry) + */ + if ((code = krb5_kt_start_seq_get(context, kt, &cursor))) { + printerr(0, "ERROR: %s while beginning keytab scan " + "for keytab '%s'\n", + error_message(code), kt_name); + retval = code; + goto out; + } + + while ((code = krb5_kt_next_entry(context, kt, &kte, &cursor)) == 0) { + char *pname; + if ((code = krb5_unparse_name(context, kte.principal, + &pname))) { + printerr(0, "WARNING: Skipping keytab entry because " + "we failed to unparse principal name: %s\n", + error_message(code)); + continue; + } + printerr(2, "Processing keytab entry for principal '%s'\n", + pname); + + /* mds service entry: + * - hostname and realm should match this node + * - replace existing non-mds entry of this realm + */ + if (KEYTAB_ENTRY_MATCH(kte, GSSD_SERVICE_MDS)) { + krb5_principal princ = kte.principal; + krb5_data *princ_host; + struct utsname utsbuf; + struct hostent *host; + + if (KRB5_STRCASECMP(krb5_princ_realm(context, princ), + this_realm) != 0) { + printerr(2, "alien mds service entry, skip\n"); + goto next; + } + + princ_host = krb5_princ_component(context, princ, 1); + if (princ_host == NULL) { + printerr(2, "mds service entry: no hostname in " + "principal, skip\n"); + goto next; + } + + if (uname(&utsbuf)) { + printerr(2, "mds service entry: unable to get " + "UTS name, skip\n"); + goto next; + } + host = gethostbyname(utsbuf.nodename); + if (host == NULL) { + printerr(2, "mds service entry: unable to get " + "local hostname, skip\n"); + goto next; + } + + if (KRB5_STRCASECMP(princ_host, host->h_name) != 0) { + printerr(2, "mds service entry: hostname " + "doesn't match: %s - %.*s, skip\n", + host->h_name, + princ_host->length, princ_host->data); + goto next; + } + + ple = gssd_get_realm_ple((void *)&kte.principal->realm); + if (ple) { + if (ple->fl_mds) { + printerr(2,"mds service entry: found a" + "duplicated one, it's like a " + "mis-configuration, skip\n"); + goto next; + } + + gssd_remove_ple(context, ple); + printerr(2, "mds service entry: replace an " + "existed non-mds one\n"); + } + } else if (KEYTAB_ENTRY_MATCH(kte, LUSTRE_ROOT_NAME)) { + ple = gssd_get_realm_ple((void *)&kte.principal->realm); + if (ple) { + if (ple->fl_mds || ple->fl_root) { + printerr(2, "root entry: found a " + "existed %s entry, skip\n", + ple->fl_mds ? "mds" : "root"); + goto next; + } + + gssd_remove_ple(context, ple); + printerr(2, "root entry: replace an existed " + "non-mds non-root one\n"); + } + } else { + printerr(2, "We will NOT use this entry (%s)\n", + pname); + goto next; + } + + /* construct ple */ + printerr(2, "We will use this entry (%s)\n", pname); + ple = gssd_create_ple(context, kte.principal); + if (ple == NULL) { + KRB5_FREE_UNPARSED_NAME(context, pname); + goto out; + } + + /* add proper flags */ + if (KEYTAB_ENTRY_MATCH(kte, GSSD_SERVICE_MDS)) + ple->fl_mds = 1; + else if (KEYTAB_ENTRY_MATCH(kte, LUSTRE_ROOT_NAME)) + ple->fl_root = 1; + + /* enqueue */ + if (gssd_k5_kt_princ_list == NULL) + gssd_k5_kt_princ_list = ple; + else { + ple->next = gssd_k5_kt_princ_list; + gssd_k5_kt_princ_list = ple; + } + next: + KRB5_FREE_UNPARSED_NAME(context, pname); + } + + if ((code = krb5_kt_end_seq_get(context, kt, &cursor))) { + printerr(0, "WARNING: %s while ending keytab scan for " + "keytab '%s'\n", + error_message(code), kt_name); + } + + retval = 0; + out: + return retval; +} + +/* + * Depending on the version of Kerberos, we either need to use + * a private function, or simply set the environment variable. + */ +static void +gssd_set_krb5_ccache_name(char *ccname) +{ +#ifdef USE_GSS_KRB5_CCACHE_NAME + u_int maj_stat, min_stat; + + printerr(2, "using gss_krb5_ccache_name to select krb5 ccache %s\n", + ccname); + maj_stat = gss_krb5_ccache_name(&min_stat, ccname, NULL); + if (maj_stat != GSS_S_COMPLETE) { + printerr(0, "WARNING: gss_krb5_ccache_name with " + "name '%s' failed (%s)\n", + ccname, error_message(min_stat)); + } +#else + /* + * Set the KRB5CCNAME environment variable to tell the krb5 code + * which credentials cache to use. (Instead of using the private + * function above for which there is no generic gssapi + * equivalent.) + */ + printerr(2, "using environment variable to select krb5 ccache %s\n", + ccname); + setenv("KRB5CCNAME", ccname, 1); +#endif +} + +/* + * Parse the supported encryption type information + */ +static int +parse_enctypes(char *enctypes) +{ + int n = 0; + char *curr, *comma; + int i; + + /* Just in case this ever gets called more than once */ + if (krb5_enctypes != NULL) { + free(krb5_enctypes); + krb5_enctypes = NULL; + num_krb5_enctypes = 0; + } + + /* count the number of commas */ + for (curr = enctypes; curr && *curr != '\0'; curr = ++comma) { + comma = strchr(curr, ','); + if (comma != NULL) + n++; + else + break; + } + /* If no more commas and we're not at the end, there's one more value */ + if (*curr != '\0') + n++; + + /* Empty string, return an error */ + if (n == 0) + return ENOENT; + + /* Allocate space for enctypes array */ + if ((krb5_enctypes = (int *) calloc(n, sizeof(int))) == NULL) { + return ENOMEM; + } + + /* Now parse each value into the array */ + for (curr = enctypes, i = 0; curr && *curr != '\0'; curr = ++comma) { + krb5_enctypes[i++] = atoi(curr); + comma = strchr(curr, ','); + if (comma == NULL) + break; + } + + num_krb5_enctypes = n; + return 0; +} + +/*==========================*/ +/*=== External routines ===*/ +/*==========================*/ + +/* + * Attempt to find the best match for a credentials cache file + * given only a UID. We really need more information, but we + * do the best we can. + * + * Returns: + * void + */ +void +gssd_setup_krb5_user_gss_ccache(uid_t uid, char *servername) +{ + char buf[MAX_NETOBJ_SZ]; + struct dirent *d; + + printerr(2, "getting credentials for client with uid %u for " + "server %s\n", uid, servername); + memset(buf, 0, sizeof(buf)); + + if (gssd_find_existing_krb5_ccache(uid, &d)) { + snprintf(buf, sizeof(buf), "FILE:%s/%s", + ccachedir, d->d_name); + free(d); + } + else + snprintf(buf, sizeof(buf), "FILE:%s/%s%u", + ccachedir, GSSD_DEFAULT_CRED_PREFIX, uid); + printerr(2, "using %s as credentials cache for client with " + "uid %u for server %s\n", buf, uid, servername); + gssd_set_krb5_ccache_name(buf); +} + +/* + * Let the gss code know where to find the machine credentials ccache. + * + * Returns: + * void + */ +void +gssd_setup_krb5_machine_gss_ccache(char *ccname) +{ + printerr(2, "using %s as credentials cache for machine creds\n", + ccname); + gssd_set_krb5_ccache_name(ccname); +} + +/* + * The first time through this routine, go through the keytab and + * determine which keys we will try to use as machine credentials. + * Every time through this routine, try to obtain credentials using + * the keytab entries selected the first time through. + * + * Returns: + * 0 => obtained one or more credentials + * nonzero => error + * + */ + +int +gssd_refresh_krb5_machine_creds(void) +{ + krb5_context context = NULL; + krb5_keytab kt = NULL;; + krb5_error_code code; + int retval = -1; + struct gssd_k5_kt_princ *ple; + int gotone = 0; + static int processed_keytab = 0; + + + code = krb5_init_context(&context); + if (code) { + printerr(0, "ERROR: %s while initializing krb5 in " + "gssd_refresh_krb5_machine_creds\n", + error_message(code)); + retval = code; + goto out; + } + + printerr(2, "Using keytab file '%s'\n", keytabfile); + + if ((code = krb5_kt_resolve(context, keytabfile, &kt))) { + printerr(0, "ERROR: %s while resolving keytab '%s'\n", + error_message(code), keytabfile); + goto out; + } + + /* Only go through the keytab file once. Only print messages once. */ + if (gssd_k5_kt_princ_list == NULL && !processed_keytab) { + processed_keytab = 1; + gssd_process_krb5_keytab(context, kt, keytabfile); + if (gssd_k5_kt_princ_list == NULL) { + printerr(0, "ERROR: No usable keytab entries found in " + "keytab '%s'\n", keytabfile); + printerr(0, "You must have a valid keytab entry for " + "%s/<your.host>@<YOUR.REALM> on MDT nodes, " + "and %s@<YOUR.REALM> on client nodes, in " + "keytab file %s ?\n", + GSSD_SERVICE_MDS, LUSTRE_ROOT_NAME, + keytabfile); + } + } + + /* + * If we don't have any keytab entries we liked, then we have a problem + */ + if (gssd_k5_kt_princ_list == NULL) { + retval = ENOENT; + goto out; + } + + /* + * Now go through the list of saved entries and get initial + * credentials for them (We can't do this while making the + * list because it messes up the keytab iteration cursor + * when we use the keytab to get credentials.) + */ + for (ple = gssd_k5_kt_princ_list; ple; ple = ple->next) { + if ((gssd_get_single_krb5_cred(context, kt, ple)) == 0) { + gotone++; + } + } + if (!gotone) { + printerr(0, "ERROR: No usable machine credentials obtained\n"); + goto out; + } + + retval = 0; + out: + if (kt) krb5_kt_close(context, kt); + krb5_free_context(context); + + return retval; +} + + +/* + * Return an array of pointers to names of credential cache files + * which can be used to try to create gss contexts with a server. + * + * Returns: + * 0 => list is attached + * nonzero => error + */ +int +gssd_get_krb5_machine_cred_list(char ***list) +{ + char **l; + int listinc = 10; + int listsize = listinc; + int i = 0; + int retval; + struct gssd_k5_kt_princ *ple; + + /* Assume failure */ + retval = -1; + *list = (char **) NULL; + + /* Refresh machine credentials */ + if ((retval = gssd_refresh_krb5_machine_creds())) { + goto out; + } + + if ((l = (char **) malloc(listsize * sizeof(char *))) == NULL) { + retval = ENOMEM; + goto out; + } + + for (ple = gssd_k5_kt_princ_list; ple; ple = ple->next) { + if (ple->ccname) { + if (i + 1 > listsize) { + listsize += listinc; + l = (char **) + realloc(l, listsize * sizeof(char *)); + if (l == NULL) { + retval = ENOMEM; + goto out; + } + } + if ((l[i++] = strdup(ple->ccname)) == NULL) { + retval = ENOMEM; + goto out; + } + } + } + if (i > 0) { + l[i] = NULL; + *list = l; + retval = 0; + goto out; + } + out: + return retval; +} + +/* + * Frees the list of names returned in get_krb5_machine_cred_list() + */ +void +gssd_free_krb5_machine_cred_list(char **list) +{ + char **n; + + if (list == NULL) + return; + for (n = list; n && *n; n++) { + free(*n); + } + free(list); +} + +/* + * Called upon exit. Destroys machine credentials. + */ +void +gssd_destroy_krb5_machine_creds(void) +{ + krb5_context context; + krb5_error_code code = 0; + krb5_ccache ccache; + struct gssd_k5_kt_princ *ple; + + code = krb5_init_context(&context); + if (code) { + printerr(0, "ERROR: %s while initializing krb5\n", + error_message(code)); + goto out; + } + + for (ple = gssd_k5_kt_princ_list; ple; ple = ple->next) { + if (!ple->ccname) + continue; + if ((code = krb5_cc_resolve(context, ple->ccname, &ccache))) { + printerr(0, "WARNING: %s while resolving credential " + "cache '%s' for destruction\n", + error_message(code), ple->ccname); + continue; + } + + if ((code = krb5_cc_destroy(context, ccache))) { + printerr(0, "WARNING: %s while destroying credential " + "cache '%s'\n", + error_message(code), ple->ccname); + } + } + out: + krb5_free_context(context); +} + +#if 0 +#ifdef HAVE_SET_ALLOWABLE_ENCTYPES +/* + * this routine obtains a credentials handle via gss_acquire_cred() + * then calls gss_krb5_set_allowable_enctypes() to limit the encryption + * types negotiated. + * + * Returns: + * 0 => all went well + * -1 => there was an error + */ + +int +limit_krb5_enctypes(struct rpc_gss_sec *sec, uid_t uid) +{ + u_int maj_stat, min_stat; + gss_cred_id_t credh; + gss_OID_set_desc desired_mechs; + krb5_enctype enctypes[] = {ENCTYPE_DES_CBC_CRC}; + int num_enctypes = sizeof(enctypes) / sizeof(enctypes[0]); + + /* We only care about getting a krb5 cred */ + desired_mechs.count = 1; + desired_mechs.elements = &krb5oid; + + maj_stat = gss_acquire_cred(&min_stat, NULL, 0, + &desired_mechs, GSS_C_INITIATE, + &credh, NULL, NULL); + + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_acquire_cred", + maj_stat, min_stat, &krb5oid); + return -1; + } + + /* + * If we failed for any reason to produce global + * list of supported enctypes, use local default here. + */ + if (krb5_enctypes == NULL) + maj_stat = gss_set_allowable_enctypes(&min_stat, credh, + &krb5oid, num_enctypes, &enctypes); + else + maj_stat = gss_set_allowable_enctypes(&min_stat, credh, + &krb5oid, num_krb5_enctypes, + krb5_enctypes); + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_set_allowable_enctypes", + maj_stat, min_stat, &krb5oid); + return -1; + } + sec->cred = credh; + + return 0; +} +#endif /* HAVE_SET_ALLOWABLE_ENCTYPES */ +#endif + +/* + * Obtain supported enctypes from kernel. + * Set defaults if info is not available. + */ +void +gssd_obtain_kernel_krb5_info(void) +{ + char enctype_file_name[128]; + char buf[1024]; + char enctypes[128]; + int nscanned; + int fd; + int use_default_enctypes = 0; + int nbytes, numfields; + char default_enctypes[] = "1,3,2"; + int code; + + snprintf(enctype_file_name, sizeof(enctype_file_name), + "%s/%s", pipefs_dir, "krb5_info"); + + if ((fd = open(enctype_file_name, O_RDONLY)) == -1) { + printerr(1, "WARNING: gssd_obtain_kernel_krb5_info: " + "Unable to open '%s'. Unable to determine " + "Kerberos encryption types supported by the " + "kernel; using defaults (%s).\n", + enctype_file_name, default_enctypes); + use_default_enctypes = 1; + goto do_the_parse; + } + memset(buf, 0, sizeof(buf)); + if ((nbytes = read(fd, buf, sizeof(buf)-1)) == -1) { + printerr(0, "WARNING: gssd_obtain_kernel_krb5_info: " + "Error reading Kerberos encryption type " + "information file '%s'; using defaults (%s).\n", + enctype_file_name, default_enctypes); + use_default_enctypes = 1; + close(fd); + goto do_the_parse; + } + close(fd); + numfields = sscanf(buf, "enctypes: %s\n%n", enctypes, &nscanned); + if (numfields < 1) { + printerr(0, "WARNING: gssd_obtain_kernel_krb5_info: " + "error parsing Kerberos encryption type " + "information from file '%s'; using defaults (%s).\n", + enctype_file_name, default_enctypes); + use_default_enctypes = 1; + goto do_the_parse; + } + if (nbytes > nscanned) { + printerr(2, "gssd_obtain_kernel_krb5_info: " + "Ignoring extra information, '%s', from '%s'\n", + buf+nscanned, enctype_file_name); + goto do_the_parse; + } + do_the_parse: + if (use_default_enctypes) + strcpy(enctypes, default_enctypes); + + if ((code = parse_enctypes(enctypes)) != 0) { + printerr(0, "ERROR: gssd_obtain_kernel_krb5_info: " + "parse_enctypes%s failed with code %d\n", + use_default_enctypes ? " (with default enctypes)" : "", + code); + } +} diff --git a/lustre/utils/gss/krb5_util.h b/lustre/utils/gss/krb5_util.h new file mode 100644 index 0000000..fa5292d --- /dev/null +++ b/lustre/utils/gss/krb5_util.h @@ -0,0 +1,30 @@ +#ifndef KRB5_UTIL_H +#define KRB5_UTIL_H + +#include <krb5.h> + +/* + * List of principals from our keytab that we + * may try to get credentials for + */ +struct gssd_k5_kt_princ { + struct gssd_k5_kt_princ *next; + krb5_principal princ; + unsigned int fl_root:1, + fl_mds:1; + char *ccname; + char *realm; + krb5_timestamp endtime; +}; + + +void gssd_setup_krb5_user_gss_ccache(uid_t uid, char *servername); +int gssd_get_krb5_machine_cred_list(char ***list); +int gssd_refresh_krb5_machine_creds(void); +void gssd_free_krb5_machine_cred_list(char **list); +void gssd_setup_krb5_machine_gss_ccache(char *servername); +void gssd_destroy_krb5_machine_creds(void); +void gssd_obtain_kernel_krb5_info(void); + + +#endif /* KRB5_UTIL_H */ diff --git a/lustre/utils/gss/l_idmap.c b/lustre/utils/gss/l_idmap.c new file mode 100644 index 0000000..eada85c --- /dev/null +++ b/lustre/utils/gss/l_idmap.c @@ -0,0 +1,37 @@ +#include <sys/types.h> +#include <stdlib.h> +#include <stdio.h> + +#include "lsupport.h" + +int main(int argc, char **argv) +{ + lnet_nid_t nid; + uid_t uid; + int rc; + + if (argc < 3) { + printf("Usage:\n" + "%s <princ> <nid>\n", + basename(argv[0])); + return 1; + } + + nid = libcfs_str2nid(argv[2]); + if (nid == LNET_NID_ANY) { + printf("parse nid %s failed\n", argv[2]); + return 1; + } + rc = lookup_mapping(argv[1], nid, &uid); + if (rc == -1) { + printf("lookup mapping failed\n"); + return 1; + } + + printf("principal: %s\n" + "nid: %#llx\n" + "uid: %u\n", + argv[1], nid, uid); + + return 0; +} diff --git a/lustre/utils/gss/lsupport.c b/lustre/utils/gss/lsupport.c new file mode 100644 index 0000000..4ab3854 --- /dev/null +++ b/lustre/utils/gss/lsupport.c @@ -0,0 +1,783 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2005 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include "config.h" +#include <sys/param.h> +#include <sys/utsname.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <sys/types.h> +#include <sys/ipc.h> +#include <sys/sem.h> + +#include <stdio.h> +#include <stdlib.h> +#include <pwd.h> +#include <grp.h> +#include <string.h> +#include <dirent.h> +#include <poll.h> +#include <fcntl.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <assert.h> +#ifdef HAVE_GETHOSTBYNAME +# include <netdb.h> +#endif + +#include "err_util.h" +#include "gssd.h" +#include "lsupport.h" + +/**************************************** + * exclusive startup * + ****************************************/ + +static struct __sem_s { + char *name; + key_t sem_key; + int sem_id; +} sems[2] = { + [GSSD_CLI] = { "client", 0x3a92d473, 0 }, + [GSSD_SVC] = { "server", 0x3b92d473, 0 }, +}; + +void gssd_init_unique(int type) +{ + struct __sem_s *sem = &sems[type]; + struct sembuf sembuf; + + assert(type == GSSD_CLI || type == GSSD_SVC); + +again: + sem->sem_id = semget(sem->sem_key, 1, IPC_CREAT | IPC_EXCL | 0700); + if (sem->sem_id == -1) { + if (errno != EEXIST) { + printerr(0, "Create sem: %s\n", strerror(errno)); + exit(-1); + } + + /* already exist. Note there's still a small window racing + * with other processes, due to the stupid semaphore semantics. + */ + sem->sem_id = semget(sem->sem_key, 0, 0700); + if (sem->sem_id == -1) { + if (errno == ENOENT) { + printerr(0, "another instance just exit, " + "try again\n"); + goto again; + } + + printerr(0, "Obtain sem: %s\n", strerror(errno)); + exit(-1); + } + } else { + int val = 1; + + if (semctl(sem->sem_id, 0, SETVAL, val) == -1) { + printerr(0, "Initialize sem: %s\n", + strerror(errno)); + exit(-1); + } + } + + sembuf.sem_num = 0; + sembuf.sem_op = -1; + sembuf.sem_flg = IPC_NOWAIT | SEM_UNDO; + + if (semop(sem->sem_id, &sembuf, 1) != 0) { + if (errno == EAGAIN) { + printerr(0, "Another instance is running, exit\n"); + exit(0); + } + printerr(0, "Grab sem: %s\n", strerror(errno)); + exit(0); + } + + printerr(2, "Successfully created %s global identity\n", sem->name); +} + +void gssd_exit_unique(int type) +{ + assert(type == GSSD_CLI || type == GSSD_SVC); + + /* + * do nothing. we can't remove the sem here, otherwise the race + * window would be much bigger. So it's sad we have to leave the + * sem in the system forever. + */ +} + +/**************************************** + * client side resolvation: * + * lnd/netid/nid => hostname * + ****************************************/ + +char gethostname_ex[PATH_MAX] = GSSD_DEFAULT_GETHOSTNAME_EX; + +typedef int lnd_nid2hostname_t(char *lnd, uint32_t net, uint32_t addr, + char *buf, int buflen); + +/* FIXME what about IPv6? */ +static +int socklnd_nid2hostname(char *lnd, uint32_t net, uint32_t addr, + char *buf, int buflen) +{ + struct hostent *ent; + + addr = htonl(addr); + ent = gethostbyaddr(&addr, sizeof(addr), AF_INET); + if (!ent) { + printerr(0, "%s: can't resolve 0x%x\n", lnd, addr); + return -1; + } + if (strlen(ent->h_name) >= buflen) { + printerr(0, "%s: name too long: %s\n", lnd, ent->h_name); + return -1; + } + strcpy(buf, ent->h_name); + + printerr(2, "%s: net 0x%x, addr 0x%x => %s\n", + lnd, net, addr, buf); + return 0; +} + +static +int lolnd_nid2hostname(char *lnd, uint32_t net, uint32_t addr, + char *buf, int buflen) +{ + struct utsname uts; + struct hostent *ent; + + if (addr) { + printerr(0, "%s: addr is 0x%x, we expect 0\n", lnd, addr); + return -1; + } + + if (uname(&uts)) { + printerr(0, "%s: failed obtain local machine name\n", lnd); + return -1; + } + + ent = gethostbyname(uts.nodename); + if (!ent) { + printerr(0, "%s: failed obtain canonical name of %s\n", + lnd, uts.nodename); + return -1; + } + + if (strlen(ent->h_name) >= buflen) { + printerr(0, "%s: name too long: %s\n", lnd, ent->h_name); + return -1; + } + strcpy(buf, ent->h_name); + + printerr(2, "%s: addr 0x%x => %s\n", lnd, addr, buf); + return 0; +} + +static int is_space(char c) +{ + return (c == ' ' || c == '\t' || c == '\n'); +} + +static +int external_nid2hostname(char *lnd, uint32_t net, uint32_t addr, + char *namebuf, int namebuflen) +{ + const int bufsize = PATH_MAX + 256; + char buf[bufsize], *head, *tail; + FILE *fghn; + + sprintf(buf, "%s %s 0x%x 0x%x", gethostname_ex, lnd, net, addr); + printerr(2, "cmd: %s\n", buf); + + fghn = popen(buf, "r"); + if (fghn == NULL) { + printerr(0, "failed to call %s\n", gethostname_ex); + return -1; + } + + head = fgets(buf, bufsize, fghn); + if (head == NULL) { + printerr(0, "can't read from %s\n", gethostname_ex); + return -1; + } + if (pclose(fghn) == -1) + printerr(1, "pclose failed, continue\n"); + + /* trim head/tail space */ + while (is_space(*head)) + head++; + + tail = head + strlen(head); + if (tail <= head) { + printerr(0, "no output from %s\n", gethostname_ex); + return -1; + } + while (is_space(*(tail - 1))) + tail--; + if (tail <= head) { + printerr(0, "output are all space from %s\n", gethostname_ex); + return -1; + } + *tail = '\0'; + + /* start with '@' means error msg */ + if (head[0] == '@') { + printerr(0, "error from %s: %s\n", gethostname_ex, &head[1]); + return -1; + } + + if (tail - head > namebuflen) { + printerr(0, "external hostname too long: %s\n", head); + return -1; + } + + printerr(2, "%s: net 0x%x, addr 0x%x => %s\n", + lnd, net, addr, head); + strcpy(namebuf, head); + return 0; +} + +static struct { + char *name; + lnd_nid2hostname_t *nid2name; +} converter[LND_ENUM_END_MARKER] = { + {"UNUSED0", NULL}, + [QSWLND] = { "QSWLND", external_nid2hostname}, + [SOCKLND] = { "SOCKLND", socklnd_nid2hostname }, + [GMLND] = { "GMLND", external_nid2hostname}, + [PTLLND] = { "PTLLND", external_nid2hostname }, + [O2IBLND] = { "O2IBLND", socklnd_nid2hostname }, /* XXX */ + [CIBLND] = { "CIBLND", external_nid2hostname }, + [OPENIBLND] = { "OPENIBLND",external_nid2hostname }, + [IIBLND] = { "IIBLND", external_nid2hostname }, + [LOLND] = { "LOLND", lolnd_nid2hostname }, + [RALND] = { "RALND", external_nid2hostname }, + [VIBLND] = { "VIBLND", external_nid2hostname }, +}; + +int lnet_nid2hostname(lnet_nid_t nid, char *buf, int buflen) +{ + uint32_t lnd, net, addr; + + addr = LNET_NIDADDR(nid); + net = LNET_NIDNET(nid); + lnd = LNET_NETTYP(net); + + if (lnd >= LND_ENUM_END_MARKER) { + printerr(0, "ERROR: Unrecognized LND %u\n", lnd); + return -1; + } + + if (converter[lnd].nid2name == NULL) { + printerr(0, "ERROR: %s converter not ready\n", + converter[lnd].name); + return -1; + } + + return converter[lnd].nid2name(converter[lnd].name, net, addr, + buf, buflen); +} + + +/**************************************** + * lnet support routine * + * (from lnet/libcfs/nidstrings.c * + ****************************************/ + +#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ + +static int libcfs_lo_str2addr(char *str, int nob, uint32_t *addr); +static void libcfs_ip_addr2str(uint32_t addr, char *str); +static int libcfs_ip_str2addr(char *str, int nob, uint32_t *addr); +static void libcfs_decnum_addr2str(uint32_t addr, char *str); +static void libcfs_hexnum_addr2str(uint32_t addr, char *str); +static int libcfs_num_str2addr(char *str, int nob, uint32_t *addr); + +struct netstrfns { + int nf_type; + char *nf_name; + char *nf_modname; + void (*nf_addr2str)(uint32_t addr, char *str); + int (*nf_str2addr)(char *str, int nob, uint32_t *addr); +}; + +static struct netstrfns libcfs_netstrfns[] = { + {/* .nf_type */ LOLND, + /* .nf_name */ "lo", + /* .nf_modname */ "klolnd", + /* .nf_addr2str */ libcfs_decnum_addr2str, + /* .nf_str2addr */ libcfs_lo_str2addr}, + {/* .nf_type */ SOCKLND, + /* .nf_name */ "tcp", + /* .nf_modname */ "ksocklnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ O2IBLND, + /* .nf_name */ "o2ib", + /* .nf_modname */ "ko2iblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ CIBLND, + /* .nf_name */ "cib", + /* .nf_modname */ "kciblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ OPENIBLND, + /* .nf_name */ "openib", + /* .nf_modname */ "kopeniblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ IIBLND, + /* .nf_name */ "iib", + /* .nf_modname */ "kiiblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ VIBLND, + /* .nf_name */ "vib", + /* .nf_modname */ "kviblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ RALND, + /* .nf_name */ "ra", + /* .nf_modname */ "kralnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ QSWLND, + /* .nf_name */ "elan", + /* .nf_modname */ "kqswlnd", + /* .nf_addr2str */ libcfs_decnum_addr2str, + /* .nf_str2addr */ libcfs_num_str2addr}, + {/* .nf_type */ GMLND, + /* .nf_name */ "gm", + /* .nf_modname */ "kgmlnd", + /* .nf_addr2str */ libcfs_hexnum_addr2str, + /* .nf_str2addr */ libcfs_num_str2addr}, + {/* .nf_type */ PTLLND, + /* .nf_name */ "ptl", + /* .nf_modname */ "kptllnd", + /* .nf_addr2str */ libcfs_decnum_addr2str, + /* .nf_str2addr */ libcfs_num_str2addr}, + /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */ + {/* .nf_type */ -1}, +}; + +const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]); + +static int +libcfs_lo_str2addr(char *str, int nob, uint32_t *addr) +{ + *addr = 0; + return 1; +} + +static void +libcfs_ip_addr2str(uint32_t addr, char *str) +{ + snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u", + (addr >> 24) & 0xff, (addr >> 16) & 0xff, + (addr >> 8) & 0xff, addr & 0xff); +} + +/* CAVEAT EMPTOR XscanfX + * I use "%n" at the end of a sscanf format to detect trailing junk. However + * sscanf may return immediately if it sees the terminating '0' in a string, so + * I initialise the %n variable to the expected length. If sscanf sets it; + * fine, if it doesn't, then the scan ended at the end of the string, which is + * fine too :) */ + +static int +libcfs_ip_str2addr(char *str, int nob, uint32_t *addr) +{ + int a; + int b; + int c; + int d; + int n = nob; /* XscanfX */ + + /* numeric IP? */ + if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && + n == nob && + (a & ~0xff) == 0 && (b & ~0xff) == 0 && + (c & ~0xff) == 0 && (d & ~0xff) == 0) { + *addr = ((a<<24)|(b<<16)|(c<<8)|d); + return 1; + } + +#ifdef HAVE_GETHOSTBYNAME + /* known hostname? */ + if (('a' <= str[0] && str[0] <= 'z') || + ('A' <= str[0] && str[0] <= 'Z')) { + char *tmp; + + tmp = malloc(nob + 1); + if (tmp != NULL) { + struct hostent *he; + + memcpy(tmp, str, nob); + tmp[nob] = 0; + + he = gethostbyname(tmp); + + free(tmp); + tmp = NULL; + + if (he != NULL) { + uint32_t ip = *(uint32_t *)he->h_addr; + + *addr = ntohl(ip); + return 1; + } + } + } +#endif + return 0; +} + +static void +libcfs_decnum_addr2str(uint32_t addr, char *str) +{ + snprintf(str, LNET_NIDSTR_SIZE, "%u", addr); +} + +static void +libcfs_hexnum_addr2str(uint32_t addr, char *str) +{ + snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr); +} + +static int +libcfs_num_str2addr(char *str, int nob, uint32_t *addr) +{ + int n; + + n = nob; + if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) + return 1; + + n = nob; + if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) + return 1; + + n = nob; + if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) + return 1; + + return 0; +} + +static struct netstrfns * +libcfs_lnd2netstrfns(int lnd) +{ + int i; + + if (lnd >= 0) + for (i = 0; i < libcfs_nnetstrfns; i++) + if (lnd == libcfs_netstrfns[i].nf_type) + return &libcfs_netstrfns[i]; + + return NULL; +} + +static struct netstrfns * +libcfs_str2net_internal(char *str, uint32_t *net) +{ + struct netstrfns *nf; + int nob; + int netnum; + int i; + + for (i = 0; i < libcfs_nnetstrfns; i++) { + nf = &libcfs_netstrfns[i]; + if (nf->nf_type >= 0 && + !strncmp(str, nf->nf_name, strlen(nf->nf_name))) + break; + } + + if (i == libcfs_nnetstrfns) + return NULL; + + nob = strlen(nf->nf_name); + + if (strlen(str) == (unsigned int)nob) { + netnum = 0; + } else { + if (nf->nf_type == LOLND) /* net number not allowed */ + return NULL; + + str += nob; + i = strlen(str); + if (sscanf(str, "%u%n", &netnum, &i) < 1 || + i != (int)strlen(str)) + return NULL; + } + + *net = LNET_MKNET(nf->nf_type, netnum); + return nf; +} + +lnet_nid_t +libcfs_str2nid(char *str) +{ + char *sep = strchr(str, '@'); + struct netstrfns *nf; + uint32_t net; + uint32_t addr; + + if (sep != NULL) { + nf = libcfs_str2net_internal(sep + 1, &net); + if (nf == NULL) + return LNET_NID_ANY; + } else { + sep = str + strlen(str); + net = LNET_MKNET(SOCKLND, 0); + nf = libcfs_lnd2netstrfns(SOCKLND); + if (!nf) + return LNET_NID_ANY; + } + + if (!nf->nf_str2addr(str, sep - str, &addr)) + return LNET_NID_ANY; + + return LNET_MKNID(net, addr); +} + +/**************************************** + * user mapping database handling * + * (very rudiment) * + ****************************************/ + +#define MAPPING_GROW_SIZE 512 +#define MAX_LINE_LEN 256 + +struct user_map_item { + char *principal; /* NULL means match all, will cause multi->single mapped, FORBID */ + lnet_nid_t nid; + uid_t uid; +}; + +struct user_mapping { + int nitems; + struct user_map_item *items; +}; + +static struct user_mapping mapping = {0, NULL}; +/* FIXME to be finished: monitor change of mapping database */ +static int mapping_mtime = 0; + +void cleanup_mapping(void) +{ + if (mapping.items) { + for (; mapping.nitems > 0; mapping.nitems--) + free(mapping.items[mapping.nitems - 1].principal); + free(mapping.items); + mapping.items = NULL; + } +} + +static int grow_mapping(int nitems) +{ + struct user_map_item *new; + int oldsize, newsize; + + oldsize = (mapping.nitems * sizeof(struct user_map_item) + + MAPPING_GROW_SIZE - 1) / MAPPING_GROW_SIZE; + newsize = (nitems * sizeof(struct user_map_item) + + MAPPING_GROW_SIZE - 1) / MAPPING_GROW_SIZE; + while (newsize <= oldsize) + return 0; + + newsize *= MAPPING_GROW_SIZE; + new = malloc(newsize); + if (!new) { + printerr(0, "can't alloc mapping size %d\n", newsize); + return -1; + } + + if (mapping.items) { + memcpy(new, mapping.items, mapping.nitems * sizeof(struct user_map_item)); + free(mapping.items); + } + mapping.items = new; + return 0; +} + +uid_t parse_uid(char *uidstr) +{ + struct passwd *pw; + char *p = NULL; + long uid; + + pw = getpwnam(uidstr); + if (pw) + return pw->pw_uid; + + uid = strtol(uidstr, &p, 0); + if (*p == '\0') + return (uid_t) uid; + + return -1; +} + +static int read_mapping_db(void) +{ + char princ[MAX_LINE_LEN]; + char nid_str[MAX_LINE_LEN]; + char dest[MAX_LINE_LEN]; + char linebuf[MAX_LINE_LEN]; + char *line; + lnet_nid_t nid; + uid_t dest_uid; + FILE *f; + + /* cleanup old mappings */ + cleanup_mapping(); + + f = fopen(MAPPING_DATABASE_FILE, "r"); + if (!f) { + printerr(0, "can't open mapping database: %s\n", + MAPPING_DATABASE_FILE); + return -1; + } + + while ((line = fgets(linebuf, MAX_LINE_LEN, f)) != NULL) { + char *name; + + if (strlen(line) >= MAX_LINE_LEN) { + printerr(0, "invalid mapping db: line too long (%d)\n", + strlen(line)); + continue; + } + + if (sscanf(line, "%s %s %s", princ, nid_str, dest) != 3) { + printerr(0, "mapping db: syntax error\n"); + continue; + } + + if (!strcmp(princ, "*")) { + printerr(0, "NOT permit \"*\" princ, it will cause multi->single mapped\n"); + continue; + } else { + name = strdup(princ); + if (!name) { + printerr(0, "fail to dup str %s\n", princ); + continue; + } + } + + if (!strcmp(nid_str, "*")) { + nid = LNET_NID_ANY; + } else { + nid = libcfs_str2nid(nid_str); + if (nid == LNET_NID_ANY) { + printerr(0, "fail to parse nid %s\n", nid_str); + free(name); + continue; + } + } + + dest_uid = parse_uid(dest); + if (dest_uid == -1) { + printerr(0, "no valid user: %s\n", dest); + free(name); + continue; + } + + if (grow_mapping(mapping.nitems + 1)) { + printerr(0, "fail to grow mapping to %d\n", + mapping.nitems + 1); + free(name); + fclose(f); + return -1; + } + + mapping.items[mapping.nitems].principal = name; + mapping.items[mapping.nitems].nid = nid; + mapping.items[mapping.nitems].uid = dest_uid; + mapping.nitems++; + printerr(1, "add mapping: %s(%s/0x%llx) ==> %d\n", + name, nid_str, nid, dest_uid); + } + + fclose(f); + return 0; +} + +static inline int mapping_changed(void) +{ + struct stat st; + + if (stat(MAPPING_DATABASE_FILE, &st) == -1) { + /* stat failed, treat it like doesn't exist or be removed */ + if (mapping_mtime == 0) { + return 0; + } else { + printerr(0, "Warning: stat %s failed: %s\n", + MAPPING_DATABASE_FILE, strerror(errno)); + + mapping_mtime = 0; + return 1; + } + } + + if (st.st_mtime != mapping_mtime) { + mapping_mtime = st.st_mtime; + return 1; + } + + return 0; +} + +int lookup_mapping(char *princ, lnet_nid_t nid, uid_t *uid) +{ + int n; + + *uid = -1; + + /* FIXME race condition here */ + if (mapping_changed()) { + if (read_mapping_db()) + printerr(0, "all remote users will be denied\n"); + } + + for (n = 0; n < mapping.nitems; n++) { + struct user_map_item *entry = &mapping.items[n]; + + if (entry->nid != LNET_NID_ANY && entry->nid != nid) + continue; + if (!strcasecmp(entry->principal, princ)) { + printerr(1, "found mapping: %s ==> %d\n", + princ, entry->uid); + *uid = entry->uid; + return 0; + } + } + + printerr(2, "no mapping for %s/%#Lx\n", princ, nid); + return -1; +} diff --git a/lustre/utils/gss/lsupport.h b/lustre/utils/gss/lsupport.h new file mode 100644 index 0000000..2640210 --- /dev/null +++ b/lustre/utils/gss/lsupport.h @@ -0,0 +1,89 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef __LIBCFS_H__ +#define __LIBCFS_H__ + +#include <unistd.h> +#include <stdint.h> + +#define GSSD_CLI (0) +#define GSSD_SVC (1) + +void gssd_init_unique(int type); +void gssd_exit_unique(int type); + +/* + * copied from lustre source + */ + +#define LUSTRE_GSS_SVC_MDS 0 +#define LUSTRE_GSS_SVC_OSS 1 + +struct lgssd_upcall_data { + uint32_t seq; + uint32_t uid; + uint32_t gid; + uint32_t svc; + uint64_t nid; + char obd[64]; +}; + +#define GSSD_INTERFACE_VERSION (1) + +struct lgssd_ioctl_param { + int version; /* in */ + char *uuid; /* in */ + int lustre_svc; /* in */ + uid_t uid; /* in */ + gid_t gid; /* in */ + long send_token_size;/* in */ + char *send_token; /* in */ + long reply_buf_size; /* in */ + char *reply_buf; /* in */ + long status; /* out */ + long reply_length; /* out */ +}; + +#define GSSD_DEFAULT_GETHOSTNAME_EX "/etc/lustre/nid2hostname" +#define MAPPING_DATABASE_FILE "/etc/lustre/idmap.conf" + +typedef uint64_t lnet_nid_t; +typedef uint32_t lnet_netid_t; + +#define LNET_NID_ANY ((lnet_nid_t) -1) +#define LNET_PID_ANY ((lnet_pid_t) -1) + +enum { + /* Only add to these values (i.e. don't ever change or redefine them): + * network addresses depend on them... */ + QSWLND = 1, + SOCKLND = 2, + GMLND = 3, + PTLLND = 4, + O2IBLND = 5, + CIBLND = 6, + OPENIBLND = 7, + IIBLND = 8, + LOLND = 9, + RALND = 10, + VIBLND = 11, + LND_ENUM_END_MARKER +}; + +int lnet_nid2hostname(lnet_nid_t nid, char *buf, int buflen); +void cleanup_mapping(void); +int lookup_mapping(char *princ, uint64_t nid, uid_t *uid); +lnet_nid_t libcfs_str2nid(char *str); + +/* how an LNET NID encodes net:address */ +#define LNET_NIDADDR(nid) ((uint32_t)((nid) & 0xffffffff)) +#define LNET_NIDNET(nid) ((uint32_t)(((nid) >> 32)) & 0xffffffff) +#define LNET_MKNID(net,addr) ((((uint64_t)(net))<<32)|((uint64_t)(addr))) +/* how net encodes type:number */ +#define LNET_NETNUM(net) ((net) & 0xffff) +#define LNET_NETTYP(net) (((net) >> 16) & 0xffff) +#define LNET_MKNET(typ,num) ((((uint32_t)(typ))<<16)|((uint32_t)(num))) + +#endif /* __LIBCFS_H__ */ diff --git a/lustre/utils/gss/nfs-utils-1.0.10-lustre.diff b/lustre/utils/gss/nfs-utils-1.0.10-lustre.diff new file mode 100644 index 0000000..a9d31e0 --- /dev/null +++ b/lustre/utils/gss/nfs-utils-1.0.10-lustre.diff @@ -0,0 +1,3962 @@ +diff -rup nfs-utils-1.0.10.orig/configure.in nfs-utils-1.0.10/configure.in +--- nfs-utils-1.0.10.orig/configure.in 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/configure.in 2006-12-15 15:11:52.000000000 -0700 +@@ -17,61 +17,14 @@ AC_ARG_WITH(release, + RELEASE=$withval, + RELEASE=1) + AC_SUBST(RELEASE) +-AC_ARG_WITH(statedir, +- [ --with-statedir=/foo use state dir /foo [/var/lib/nfs]], +- statedir=$withval, +- statedir=/var/lib/nfs) +- AC_SUBST(statedir) +-AC_ARG_WITH(statduser, +- [AC_HELP_STRING([--with-statduser=rpcuser], +- [statd to run under @<:@rpcuser or nobody@:>@] +- )], +- statduser=$withval, +- if test "x$cross_compiling" = "xno"; then +- if grep -s '^rpcuser:' /etc/passwd > /dev/null; then +- statduser=rpcuser +- else +- statduser=nobody +- fi +- else +- statduser=nobody +- fi) +- AC_SUBST(statduser) +-AC_ARG_ENABLE(nfsv3, +- [AC_HELP_STRING([--enable-nfsv3], +- [enable support for NFSv3 @<:@default=yes@:>@])], +- enable_nfsv3=$enableval, +- enable_nfsv3=yes) +- if test "$enable_nfsv3" = yes; then +- AC_DEFINE(NFS3_SUPPORTED, 1, [Define this if you want NFSv3 support compiled in]) +- else +- enable_nfsv3= +- fi +- AC_SUBST(enable_nfsv3) +-AC_ARG_ENABLE(nfsv4, +- [AC_HELP_STRING([--enable-nfsv4], +- [enable support for NFSv4 @<:@default=yes@:>@])], +- enable_nfsv4=$enableval, +- enable_nfsv4=yes) +- if test "$enable_nfsv4" = yes; then +- AC_DEFINE(NFS4_SUPPORTED, 1, [Define this if you want NFSv4 support compiled in]) +- IDMAPD=idmapd +- else +- enable_nfsv4= +- IDMAPD= +- fi +- AC_SUBST(IDMAPD) +- AC_SUBST(enable_nfsv4) +- AM_CONDITIONAL(CONFIG_NFSV4, [test "$enable_nfsv4" = "yes"]) + AC_ARG_ENABLE(gss, + [AC_HELP_STRING([--enable-gss], + [enable support for rpcsec_gss @<:@default=yes@:>@])], + enable_gss=$enableval, + enable_gss=yes) + if test "$enable_gss" = yes; then +- AC_DEFINE(GSS_SUPPORTED, 1, [Define this if you want rpcsec_gss support compiled in]) +- GSSD=gssd +- SVCGSSD=svcgssd ++ GSSD=lgssd ++ SVCGSSD=lsvcgssd + else + enable_gss= + GSSD= +@@ -81,38 +34,6 @@ AC_ARG_ENABLE(gss, + AC_SUBST(SVCGSSD) + AC_SUBST(enable_gss) + AM_CONDITIONAL(CONFIG_GSS, [test "$enable_gss" = "yes"]) +-AC_ARG_ENABLE(kprefix, +- [AC_HELP_STRING([--enable-kprefix], [install progs as rpc.knfsd etc])], +- test "$enableval" = "yes" && kprefix=k, +- kprefix=) +- AC_SUBST(kprefix) +-AC_ARG_ENABLE(secure-statd, +- [AC_HELP_STRING([--enable-secure-statd], +- [Only lockd can use statd (security)])], +- test "$enableval" = "yes" && secure_statd=yes, +- secure_statd=no) +- if test "$secure_statd" = yes; then +- AC_DEFINE(RESTRICTED_STATD, 1, [Define this if you want to enable various security checks in statd. These checks basically keep anyone but lockd from using this service.]) +- fi +- AC_SUBST(secure_statd) +-AC_ARG_ENABLE(rquotad, +- [AC_HELP_STRING([--enable-rquotad], +- [enable rquotad @<:@default=yes@:>@])], +- enable_rquotad=$enableval, +- enable_rquotad=yes) +- if test "$enable_rquotad" = yes; then +- RQUOTAD=rquotad +- else +- RQUOTAD= +- fi +- AM_CONDITIONAL(CONFIG_RQUOTAD, [test "$enable_rquotad" = "yes"]) +- +-AC_ARG_ENABLE(mount, +- [AC_HELP_STRING([--enable-mount], +- [Create mount.nfs and don't use the util-linux mount(8) functionality. @<:@default=no@:>@])], +- enable_mount=$enableval, +- enable_mount=no) +- AM_CONDITIONAL(CONFIG_MOUNT, [test "$enable_mount" = "yes"]) + + # Check whether user wants TCP wrappers support + AC_TCP_WRAPPERS +@@ -155,47 +76,17 @@ AC_CHECK_FUNC(connect, , + AC_MSG_ERROR(Function 'socket' not found.), $LIBNSL)) + + AC_CHECK_LIB(crypt, crypt, [LIBCRYPT="-lcrypt"]) +-if test "$enable_nfsv4" = yes; then +- AC_CHECK_LIB(event, event_dispatch, [libevent=1], AC_MSG_ERROR([libevent needed for nfsv4 support])) +- AC_CHECK_LIB(nfsidmap, nfs4_init_name_mapping, [libnfsidmap=1], AC_MSG_ERROR([libnfsidmap needed for nfsv4 support])) +- AC_CHECK_HEADERS(event.h, ,AC_MSG_ERROR([libevent needed for nfsv4 support])) +- AC_CHECK_HEADERS(nfsidmap.h, ,AC_MSG_ERROR([libnfsidmap needed for nfsv4 support])) +- dnl librpcsecgss already has a dependency on libgssapi, +- dnl but we need to make sure we get the right version + if test "$enable_gss" = yes; then +- PKG_CHECK_MODULES(RPCSECGSS, librpcsecgss >= 0.10, , +- [AC_MSG_ERROR([Unable to locate information required to use librpcsecgss. If you have pkgconfig installed, you might try setting environment variable PKG_CONFIG_PATH to /usr/local/lib/pkgconfig]) +- ] +- ) + PKG_CHECK_MODULES(GSSAPI, libgssapi >= 0.9) + fi + +-fi +-if test "$knfsd_cv_glibc2" = no; then +- AC_CHECK_LIB(bsd, daemon, [LIBBSD="-lbsd"]) +-fi + AC_SUBST(LIBSOCKET) + AC_SUBST(LIBCRYPT) + AC_SUBST(LIBBSD) + + if test "$enable_gss" = yes; then +- dnl 'gss' also depends on nfsidmap.h - at least for svcgssd_proc.c +- AC_CHECK_HEADERS(nfsidmap.h, ,AC_MSG_ERROR([libnfsidmap needed for gss support])) +- AC_CHECK_HEADERS(spkm3.h, ,AC_MSG_WARN([could not locate SPKM3 header; will not have SPKM3 support])) +- dnl the nfs4_set_debug function doesn't appear in all version of the library +- AC_CHECK_LIB(nfsidmap, nfs4_set_debug, +- AC_DEFINE(HAVE_NFS4_SET_DEBUG,1, +- [Whether nfs4_set_debug() is present in libnfsidmap]),) +- + dnl Check for Kerberos V5 + AC_KERBEROS_V5 +- +- dnl This is not done until here because we need to have KRBLIBS set +- dnl ("librpcsecgss=1" is so that it doesn't get added to LIBS) +- AC_CHECK_LIB(rpcsecgss, authgss_create_default, [librpcsecgss=1], AC_MSG_ERROR([librpcsecgss needed for nfsv4 support]), -lgssapi -ldl) +- AC_CHECK_LIB(rpcsecgss, authgss_set_debug_level, +- AC_DEFINE(HAVE_AUTHGSS_SET_DEBUG_LEVEL, 1, [Define this if the rpcsec_gss library has the function authgss_set_debug_level]),, -lgssapi -ldl) +- + fi + + dnl ************************************************************* +@@ -307,35 +198,7 @@ AC_SUBST([ACLOCAL_AMFLAGS], ["-I $ac_mac + + AC_CONFIG_FILES([ + Makefile +- linux-nfs/Makefile +- support/Makefile +- support/export/Makefile +- support/include/nfs/Makefile +- support/include/rpcsvc/Makefile +- support/include/sys/fs/Makefile +- support/include/sys/Makefile +- support/include/Makefile +- support/misc/Makefile +- support/nfs/Makefile +- tools/Makefile +- tools/getiversion/Makefile +- tools/getkversion/Makefile +- tools/locktest/Makefile +- tools/nlmtest/Makefile +- tools/rpcdebug/Makefile +- tools/rpcgen/Makefile + utils/Makefile +- utils/exportfs/Makefile +- utils/gssd/Makefile +- utils/idmapd/Makefile +- utils/lockd/Makefile +- utils/mount/Makefile +- utils/mountd/Makefile +- utils/nfsd/Makefile +- utils/nfsstat/Makefile +- utils/nhfsstone/Makefile +- utils/rquotad/Makefile +- utils/showmount/Makefile +- utils/statd/Makefile]) ++ utils/gssd/Makefile]) + AC_OUTPUT + +diff -rup nfs-utils-1.0.10.orig/Makefile.am nfs-utils-1.0.10/Makefile.am +--- nfs-utils-1.0.10.orig/Makefile.am 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/Makefile.am 2006-12-15 15:11:52.000000000 -0700 +@@ -1,6 +1,6 @@ + ## Process this file with automake to produce Makefile.in + +-SUBDIRS = tools support utils linux-nfs ++SUBDIRS = utils + + MAINTAINERCLEANFILES = Makefile.in + +diff -rup nfs-utils-1.0.10.orig/utils/gssd/cacheio.c nfs-utils-1.0.10/utils/gssd/cacheio.c +--- nfs-utils-1.0.10.orig/utils/gssd/cacheio.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/cacheio.c 2006-12-15 15:12:23.000000000 -0700 +@@ -227,7 +227,8 @@ int qword_get(char **bpp, char *dest, in + return -1; + while (*bp == ' ') bp++; + *bpp = bp; +- *dest = '\0'; ++// why should we clear *dest??? ++// *dest = '\0'; + return len; + } + +diff -rup nfs-utils-1.0.10.orig/utils/gssd/context.c nfs-utils-1.0.10/utils/gssd/context.c +--- nfs-utils-1.0.10.orig/utils/gssd/context.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/context.c 2006-12-15 15:12:23.000000000 -0700 +@@ -33,8 +33,6 @@ + #include <syslog.h> + #include <string.h> + #include <gssapi/gssapi.h> +-#include <rpc/rpc.h> +-#include <rpc/auth_gss.h> + #include "gss_util.h" + #include "gss_oids.h" + #include "err_util.h" +diff -rup nfs-utils-1.0.10.orig/utils/gssd/context.h nfs-utils-1.0.10/utils/gssd/context.h +--- nfs-utils-1.0.10.orig/utils/gssd/context.h 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/context.h 2006-12-15 15:12:23.000000000 -0700 +@@ -31,8 +31,6 @@ + #ifndef _CONTEXT_H_ + #define _CONTEXT_H_ + +-#include <rpc/rpc.h> +- + /* Hopefully big enough to hold any serialized context */ + #define MAX_CTX_LEN 4096 + +diff -rup nfs-utils-1.0.10.orig/utils/gssd/context_lucid.c nfs-utils-1.0.10/utils/gssd/context_lucid.c +--- nfs-utils-1.0.10.orig/utils/gssd/context_lucid.c 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/context_lucid.c 2006-12-15 15:12:23.000000000 -0700 +@@ -41,11 +41,7 @@ + #include <syslog.h> + #include <string.h> + #include <errno.h> +-#include "gss_util.h" +-#include "gss_oids.h" +-#include "err_util.h" +-#include "context.h" +- ++#include <stdint.h> + #include <krb5.h> + #include <gssapi/gssapi.h> + #ifndef OM_uint64 +@@ -53,6 +49,11 @@ typedef uint64_t OM_uint64; + #endif + #include <gssapi/gssapi_krb5.h> + ++#include "gss_util.h" ++#include "gss_oids.h" ++#include "err_util.h" ++#include "context.h" ++ + static int + write_lucid_keyblock(char **p, char *end, gss_krb5_lucid_key_t *key) + { +@@ -354,6 +355,7 @@ static int + prepare_krb5_rfc4121_buffer(gss_krb5_lucid_context_v1_t *lctx, + gss_buffer_desc *buf) + { ++ static int constant_two = 2; + char *p, *end; + uint32_t v2_flags = 0; + gss_krb5_lucid_key_t enc_key; +@@ -372,7 +374,7 @@ prepare_krb5_rfc4121_buffer(gss_krb5_luc + end = buf->value + MAX_CTX_LEN; + + /* Version 2 */ +- if (WRITE_BYTES(&p, end, lctx->initiate)) goto out_err; ++ if (WRITE_BYTES(&p, end, constant_two)) goto out_err; + if (WRITE_BYTES(&p, end, lctx->endtime)) goto out_err; + + if (lctx->initiate) +@@ -434,14 +436,25 @@ prepare_krb5_rfc4121_buffer(gss_krb5_luc + goto out_err; + + /* Kc */ +- if (derive_key_lucid(&lctx->rfc1964_kd.ctx_key, +- &derived_key, +- KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM)) +- goto out_err; +- if (write_bytes(&p, end, derived_key.data, +- derived_key.length)) +- goto out_err; +- free(derived_key.data); ++ /* ++ * RC4 is special, it dosen't need key derivation. Actually ++ * the Ke is based on plain text. Here we just let all three ++ * key identical, kernel will handle everything. --ericm ++ */ ++ if (lctx->rfc1964_kd.ctx_key.type == ENCTYPE_ARCFOUR_HMAC) { ++ if (write_bytes(&p, end, lctx->rfc1964_kd.ctx_key.data, ++ lctx->rfc1964_kd.ctx_key.length)) ++ goto out_err; ++ } else { ++ if (derive_key_lucid(&lctx->rfc1964_kd.ctx_key, ++ &derived_key, ++ KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM)) ++ goto out_err; ++ if (write_bytes(&p, end, derived_key.data, ++ derived_key.length)) ++ goto out_err; ++ free(derived_key.data); ++ } + } else { + gss_krb5_lucid_key_t *keyptr; + uint32_t sign_usage, seal_usage; +@@ -451,6 +464,7 @@ prepare_krb5_rfc4121_buffer(gss_krb5_luc + else + keyptr = &lctx->cfx_kd.ctx_key; + ++#if 0 + if (lctx->initiate == 1) { + sign_usage = KG_USAGE_INITIATOR_SIGN; + seal_usage = KG_USAGE_INITIATOR_SEAL; +@@ -458,6 +472,19 @@ prepare_krb5_rfc4121_buffer(gss_krb5_luc + sign_usage = KG_USAGE_ACCEPTOR_SIGN; + seal_usage = KG_USAGE_ACCEPTOR_SEAL; + } ++#else ++ /* FIXME ++ * These are from rfc4142, but I don't understand: if we supply ++ * different 'usage' value for client & server, then the peers ++ * will have different derived keys. How could this work? ++ * ++ * Here we simply use old SIGN/SEAL values until we find the ++ * answer. --ericm ++ * FIXME ++ */ ++ sign_usage = KG_USAGE_SIGN; ++ seal_usage = KG_USAGE_SEAL; ++#endif + + /* derive and send down: Ke, Ki, and Kc */ + +diff -rup nfs-utils-1.0.10.orig/utils/gssd/context_mit.c nfs-utils-1.0.10/utils/gssd/context_mit.c +--- nfs-utils-1.0.10.orig/utils/gssd/context_mit.c 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/context_mit.c 2006-12-15 15:12:23.000000000 -0700 +@@ -39,7 +39,6 @@ + #include <errno.h> + #include <gssapi/gssapi.h> + #include <rpc/rpc.h> +-#include <rpc/auth_gss.h> + #include "gss_util.h" + #include "gss_oids.h" + #include "err_util.h" +@@ -333,12 +332,7 @@ serialize_krb5_ctx(gss_ctx_id_t ctx, gss + * keydata-2; ( Ki (Kseq for DES3) ) + * keydata-3; ( Kc (derived checksum key) ) + */ +- if (kctx->initiate) { +- if (WRITE_BYTES(&p, end, constant_one)) goto out_err; +- } +- else { +- if (WRITE_BYTES(&p, end, constant_zero)) goto out_err; +- } ++ if (WRITE_BYTES(&p, end, constant_two)) goto out_err; + if (WRITE_BYTES(&p, end, kctx->endtime)) goto out_err; + + /* Only applicable flag for this is initiator */ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/context_spkm3.c nfs-utils-1.0.10/utils/gssd/context_spkm3.c +--- nfs-utils-1.0.10.orig/utils/gssd/context_spkm3.c 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/context_spkm3.c 2006-12-15 15:12:23.000000000 -0700 +@@ -33,8 +33,6 @@ + #include <syslog.h> + #include <string.h> + #include <gssapi/gssapi.h> +-#include <rpc/rpc.h> +-#include <rpc/auth_gss.h> + #include "gss_util.h" + #include "gss_oids.h" + #include "err_util.h" +diff -rup nfs-utils-1.0.10.orig/utils/gssd/err_util.c nfs-utils-1.0.10/utils/gssd/err_util.c +--- nfs-utils-1.0.10.orig/utils/gssd/err_util.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/err_util.c 2006-12-15 15:12:23.000000000 -0700 +@@ -32,6 +32,8 @@ + #include <stdarg.h> + #include <syslog.h> + #include <string.h> ++#include <fcntl.h> ++#include <ctype.h> + #include "err_util.h" + + static int verbosity = 0; +@@ -91,3 +93,40 @@ printit: + /* reset the buffer */ + memset(message_buf, 0, sizeof(message_buf)); + } ++ ++void print_hexl(int pri, unsigned char *cp, int length) ++{ ++ int i, j, jm; ++ unsigned char c; ++ ++ printerr(pri, "length %d\n",length); ++ printerr(pri, "\n"); ++ ++ for (i = 0; i < length; i += 0x10) { ++ printerr(pri, " %04x: ", (u_int)i); ++ jm = length - i; ++ jm = jm > 16 ? 16 : jm; ++ ++ for (j = 0; j < jm; j++) { ++ if ((j % 2) == 1) ++ printerr(pri,"%02x ", (u_int)cp[i+j]); ++ else ++ printerr(pri,"%02x", (u_int)cp[i+j]); ++ } ++ for (; j < 16; j++) { ++ if ((j % 2) == 1) ++ printerr(pri," "); ++ else ++ printerr(pri," "); ++ } ++ printerr(pri," "); ++ ++ for (j = 0; j < jm; j++) { ++ c = cp[i+j]; ++ c = isprint(c) ? c : '.'; ++ printerr(pri,"%c", c); ++ } ++ printerr(pri,"\n"); ++ } ++} ++ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/err_util.h nfs-utils-1.0.10/utils/gssd/err_util.h +--- nfs-utils-1.0.10.orig/utils/gssd/err_util.h 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/err_util.h 2006-12-15 15:12:23.000000000 -0700 +@@ -33,5 +33,6 @@ + + void initerr(char *progname, int verbosity, int fg); + void printerr(int priority, char *format, ...); ++void print_hexl(int pri, unsigned char *cp, int length); + + #endif /* _ERR_UTIL_H_ */ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/gss_clnt_send_err.c nfs-utils-1.0.10/utils/gssd/gss_clnt_send_err.c +--- nfs-utils-1.0.10.orig/utils/gssd/gss_clnt_send_err.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/gss_clnt_send_err.c 2006-12-15 15:12:23.000000000 -0700 +@@ -47,6 +47,7 @@ + #include "gssd.h" + #include "write_bytes.h" + ++#if 0 + char pipefsdir[PATH_MAX] = GSSD_PIPEFS_DIR; + + static void +@@ -102,3 +103,4 @@ main(int argc, char *argv[]) + } + exit(0); + } ++#endif +diff -rup nfs-utils-1.0.10.orig/utils/gssd/gssd.c nfs-utils-1.0.10/utils/gssd/gssd.c +--- nfs-utils-1.0.10.orig/utils/gssd/gssd.c 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/gssd.c 2006-12-15 15:12:23.000000000 -0700 +@@ -38,9 +38,12 @@ + + #include "config.h" + ++#include <sys/types.h> + #include <sys/param.h> + #include <sys/socket.h> +-#include <rpc/rpc.h> ++#include <sys/wait.h> ++#include <sys/ipc.h> ++#include <sys/sem.h> + + #include <unistd.h> + #include <err.h> +@@ -48,23 +51,107 @@ + #include <stdlib.h> + #include <string.h> + #include <signal.h> ++#include <errno.h> + #include "gssd.h" + #include "err_util.h" + #include "gss_util.h" + #include "krb5_util.h" ++#include "lsupport.h" + + char pipefs_dir[PATH_MAX] = GSSD_PIPEFS_DIR; + char pipefs_nfsdir[PATH_MAX] = GSSD_PIPEFS_DIR; + char keytabfile[PATH_MAX] = GSSD_DEFAULT_KEYTAB_FILE; + char ccachedir[PATH_MAX] = GSSD_DEFAULT_CRED_DIR; + int use_memcache = 0; ++int lgssd_mutex_downcall = -1; + +-void +-sig_die(int signal) ++static int lgssd_create_mutex(int *semid) ++{ ++ int id; ++ int arg; ++ ++ id = semget(IPC_PRIVATE, 1, IPC_CREAT); ++ if (id == -1) { ++ printerr(0, "semget: %s\n", strerror(errno)); ++ return -1; ++ } ++ ++ arg = 1; ++ if (semctl(id, 0, SETVAL, arg) != 0) { ++ printerr(0, "semctl: %s\n", strerror(errno)); ++ semctl(id, 1, IPC_RMID, arg); ++ return -1; ++ } ++ ++ *semid = id; ++ return 0; ++} ++ ++void lgssd_init_mutexs(void) ++{ ++ if (lgssd_create_mutex(&lgssd_mutex_downcall)) { ++ printerr(0, "can't create downcall mutex\n"); ++ exit(1); ++ } ++} ++ ++void lgssd_fini_mutexs(void) ++{ ++ int arg = 0; ++ ++ if (lgssd_mutex_downcall != -1) ++ semctl(lgssd_mutex_downcall, 1, IPC_RMID, arg); ++} ++ ++void lgssd_mutex_get(int semid) ++{ ++ struct sembuf op[1] = { {0, -1, SEM_UNDO} }; ++ int rc; ++ ++ rc = semop(semid, op, 1); ++ if (rc != 0) { ++ printerr(0, "exit on mutex_get err %d: %s\n", ++ rc, strerror(errno)); ++ exit(1); ++ } ++} ++ ++void lgssd_mutex_put(int semid) + { ++ struct sembuf op[1] = { {0, 1, 0} }; ++ int rc; ++ ++ rc = semop(semid, op, 1); ++ if (rc != 0) { ++ printerr(0, "ignore mutex_put err %d: %s\n", ++ rc, strerror(errno)); ++ } ++} ++ ++static void lgssd_cleanup(void) ++{ ++ pid_t child_pid; ++ ++ /* make sure all children finished */ ++ while (1) { ++ child_pid = waitpid(-1, NULL, 0); ++ if (child_pid < 0) ++ break; ++ ++ printerr(3, "cleanup: child %d terminated\n", child_pid); ++ } ++ ++ lgssd_fini_mutexs(); ++ + /* destroy krb5 machine creds */ + gssd_destroy_krb5_machine_creds(); ++} ++ ++void ++sig_die(int signal) ++{ + printerr(1, "exiting on signal %d\n", signal); ++ lgssd_cleanup(); + exit(1); + } + +@@ -79,7 +166,7 @@ sig_hup(int signal) + static void + usage(char *progname) + { +- fprintf(stderr, "usage: %s [-f] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir]\n", ++ fprintf(stderr, "usage: %s [-f] [-v] [-p pipefsdir] [-k keytab] [-d ccachedir]\n", + progname); + exit(1); + } +@@ -89,7 +176,6 @@ main(int argc, char *argv[]) + { + int fg = 0; + int verbosity = 0; +- int rpc_verbosity = 0; + int opt; + extern char *optarg; + char *progname; +@@ -99,18 +185,12 @@ main(int argc, char *argv[]) + case 'f': + fg = 1; + break; +- case 'm': +- /* Accept but ignore this. Now the default. */ +- break; + case 'M': + use_memcache = 1; + break; + case 'v': + verbosity++; + break; +- case 'r': +- rpc_verbosity++; +- break; + case 'p': + strncpy(pipefs_dir, optarg, sizeof(pipefs_dir)); + if (pipefs_dir[sizeof(pipefs_dir)-1] != '\0') +@@ -131,10 +211,6 @@ main(int argc, char *argv[]) + break; + } + } +- snprintf(pipefs_nfsdir, sizeof(pipefs_nfsdir), "%s/%s", +- pipefs_dir, GSSD_SERVICE_NAME); +- if (pipefs_nfsdir[sizeof(pipefs_nfsdir)-1] != '\0') +- errx(1, "pipefs_nfsdir path name too long"); + + if ((progname = strrchr(argv[0], '/'))) + progname++; +@@ -142,30 +218,42 @@ main(int argc, char *argv[]) + progname = argv[0]; + + initerr(progname, verbosity, fg); +-#ifdef HAVE_AUTHGSS_SET_DEBUG_LEVEL +- authgss_set_debug_level(rpc_verbosity); +-#else +- if (rpc_verbosity > 0) +- printerr(0, "Warning: rpcsec_gss library does not " +- "support setting debug level\n"); +-#endif + + if (gssd_check_mechs() != 0) + errx(1, "Problem with gssapi library"); + ++ if (gssd_get_local_realm()) ++ errx(1, "get local realm"); ++ + if (!fg && daemon(0, 0) < 0) + errx(1, "fork"); + ++ /* This should be checked _after_ daemon(), because we need to own ++ * the undo-able semaphore by this process ++ */ ++ gssd_init_unique(GSSD_CLI); ++ ++ /* Process keytab file and get machine credentials. This will modify ++ * disk status so do it after we are sure we are the only instance ++ */ ++ if (gssd_refresh_krb5_machine_creds()) ++ return -1; ++ + signal(SIGINT, sig_die); + signal(SIGTERM, sig_die); + signal(SIGHUP, sig_hup); + +- /* Process keytab file and get machine credentials */ +- gssd_refresh_krb5_machine_creds(); ++#if 0 + /* Determine Kerberos information from the kernel */ + gssd_obtain_kernel_krb5_info(); ++#endif ++ ++ lgssd_init_mutexs(); ++ ++ printerr(0, "lgssd initialized and ready to serve\n"); ++ lgssd_run(); + +- gssd_run(); +- printerr(0, "gssd_run returned!\n"); +- abort(); ++ lgssd_cleanup(); ++ printerr(0, "lgssd exiting\n"); ++ return 0; + } +diff -rup nfs-utils-1.0.10.orig/utils/gssd/gssd.h nfs-utils-1.0.10/utils/gssd/gssd.h +--- nfs-utils-1.0.10.orig/utils/gssd/gssd.h 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/gssd.h 2006-12-15 15:12:23.000000000 -0700 +@@ -48,8 +48,13 @@ + #define GSSD_DEFAULT_CRED_PREFIX "krb5cc_" + #define GSSD_DEFAULT_MACHINE_CRED_SUFFIX "machine" + #define GSSD_DEFAULT_KEYTAB_FILE "/etc/krb5.keytab" +-#define GSSD_SERVICE_NAME "nfs" +-#define GSSD_SERVICE_NAME_LEN 3 ++#define GSSD_SERVICE_MDS "lustre_mds" ++#define GSSD_SERVICE_OSS "lustre_oss" ++#define GSSD_SERVICE_MDS_NAMELEN 10 ++#define GSSD_SERVICE_OSS_NAMELEN 10 ++ ++#define LUSTRE_ROOT_NAME "lustre_root" ++#define LUSTRE_ROOT_NAMELEN 11 + + /* + * The gss mechanisms that we can handle +@@ -59,9 +64,9 @@ enum {AUTHTYPE_KRB5, AUTHTYPE_SPKM3, AUT + + + extern char pipefs_dir[PATH_MAX]; +-extern char pipefs_nfsdir[PATH_MAX]; + extern char keytabfile[PATH_MAX]; + extern char ccachedir[PATH_MAX]; ++extern char gethostname_ex[PATH_MAX]; + extern int use_memcache; + + TAILQ_HEAD(clnt_list_head, clnt_info) clnt_list; +@@ -71,10 +76,6 @@ struct clnt_info { + char *dirname; + int dir_fd; + char *servicename; +- char *servername; +- int prog; +- int vers; +- char *protocol; + int krb5_fd; + int krb5_poll_index; + int spkm3_fd; +@@ -85,8 +86,14 @@ void init_client_list(void); + int update_client_list(void); + void handle_krb5_upcall(struct clnt_info *clp); + void handle_spkm3_upcall(struct clnt_info *clp); +-int gssd_acquire_cred(char *server_name); +-void gssd_run(void); ++void lgssd_run(void); ++ ++ ++extern int lgssd_mutex_downcall; + ++void lgssd_init_mutexs(void); ++void lgssd_fini_mutexs(void); ++void lgssd_mutex_get(int semid); ++void lgssd_mutex_put(int semid); + + #endif /* _RPC_GSSD_H_ */ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/gssd_main_loop.c nfs-utils-1.0.10/utils/gssd/gssd_main_loop.c +--- nfs-utils-1.0.10.orig/utils/gssd/gssd_main_loop.c 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/gssd_main_loop.c 2006-12-15 15:12:23.000000000 -0700 +@@ -94,11 +94,13 @@ scan_poll_results(int ret) + }; + + void +-gssd_run() ++lgssd_run() + { + int ret; + struct sigaction dn_act; + int fd; ++ time_t child_check = 0; ++ pid_t child_pid; + + /* Taken from linux/Documentation/dnotify.txt: */ + dn_act.sa_sigaction = dir_notify_handler; +@@ -106,10 +108,10 @@ gssd_run() + dn_act.sa_flags = SA_SIGINFO; + sigaction(DNOTIFY_SIGNAL, &dn_act, NULL); + +- if ((fd = open(pipefs_nfsdir, O_RDONLY)) == -1) { ++ if ((fd = open(pipefs_dir, O_RDONLY)) == -1) { + printerr(0, "ERROR: failed to open %s: %s\n", +- pipefs_nfsdir, strerror(errno)); +- exit(1); ++ pipefs_dir, strerror(errno)); ++ return; + } + fcntl(fd, F_SETSIG, DNOTIFY_SIGNAL); + fcntl(fd, F_NOTIFY, DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT); +@@ -119,12 +121,30 @@ gssd_run() + while (1) { + while (dir_changed) { + dir_changed = 0; ++ printerr(2, "pipefs root dir changed\n"); + if (update_client_list()) { + printerr(0, "ERROR: couldn't update " + "client list\n"); +- exit(1); ++ goto out; + } + } ++ ++ /* every 5s cleanup possible zombies of child processes */ ++ if (time(NULL) - child_check >= 5) { ++ printerr(3, "check zombie children...\n"); ++ ++ while (1) { ++ child_pid = waitpid(-1, NULL, WNOHANG); ++ if (child_pid <= 0) ++ break; ++ ++ printerr(2, "terminate zombie child: %d\n", ++ child_pid); ++ } ++ ++ child_check = time(NULL); ++ } ++ + /* race condition here: dir_changed could be set before we + * enter the poll, and we'd never notice if it weren't for the + * timeout. */ +@@ -139,6 +159,7 @@ gssd_run() + scan_poll_results(ret); + } + } ++out: + close(fd); + return; + } +diff -rup nfs-utils-1.0.10.orig/utils/gssd/gssd_proc.c nfs-utils-1.0.10/utils/gssd/gssd_proc.c +--- nfs-utils-1.0.10.orig/utils/gssd/gssd_proc.c 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/gssd_proc.c 2006-12-15 15:12:23.000000000 -0700 +@@ -43,7 +43,6 @@ + #endif + #include "config.h" + #include <sys/param.h> +-#include <rpc/rpc.h> + #include <sys/stat.h> + #include <sys/socket.h> + #include <arpa/inet.h> +@@ -69,6 +68,7 @@ + #include "gss_oids.h" + #include "krb5_util.h" + #include "context.h" ++#include "lsupport.h" + + /* + * pollarray: +@@ -99,86 +99,10 @@ struct pollfd * pollarray; + + int pollsize; /* the size of pollaray (in pollfd's) */ + +-/* XXX buffer problems: */ +-static int +-read_service_info(char *info_file_name, char **servicename, char **servername, +- int *prog, int *vers, char **protocol) { +-#define INFOBUFLEN 256 +- char buf[INFOBUFLEN]; +- static char dummy[128]; +- int nbytes; +- static char service[128]; +- static char address[128]; +- char program[16]; +- char version[16]; +- char protoname[16]; +- in_addr_t inaddr; +- int fd = -1; +- struct hostent *ent = NULL; +- int numfields; +- +- *servicename = *servername = *protocol = NULL; +- +- if ((fd = open(info_file_name, O_RDONLY)) == -1) { +- printerr(0, "ERROR: can't open %s: %s\n", info_file_name, +- strerror(errno)); +- goto fail; +- } +- if ((nbytes = read(fd, buf, INFOBUFLEN)) == -1) +- goto fail; +- close(fd); +- +- numfields = sscanf(buf,"RPC server: %127s\n" +- "service: %127s %15s version %15s\n" +- "address: %127s\n" +- "protocol: %15s\n", +- dummy, +- service, program, version, +- address, +- protoname); +- +- if (numfields == 5) { +- strcpy(protoname, "tcp"); +- } else if (numfields != 6) { +- goto fail; +- } +- +- /* check service, program, and version */ +- if(memcmp(service, "nfs", 3)) return -1; +- *prog = atoi(program + 1); /* skip open paren */ +- *vers = atoi(version); +- if((*prog != 100003) || ((*vers != 2) && (*vers != 3) && (*vers != 4))) +- goto fail; +- +- /* create service name */ +- inaddr = inet_addr(address); +- if (!(ent = gethostbyaddr(&inaddr, sizeof(inaddr), AF_INET))) { +- printerr(0, "ERROR: can't resolve server %s name\n", address); +- goto fail; +- } +- if (!(*servername = calloc(strlen(ent->h_name) + 1, 1))) +- goto fail; +- memcpy(*servername, ent->h_name, strlen(ent->h_name)); +- snprintf(buf, INFOBUFLEN, "%s@%s", service, ent->h_name); +- if (!(*servicename = calloc(strlen(buf) + 1, 1))) +- goto fail; +- memcpy(*servicename, buf, strlen(buf)); +- +- if (!(*protocol = strdup(protoname))) +- goto fail; +- return 0; +-fail: +- printerr(0, "ERROR: failed to read service info\n"); +- if (fd != -1) close(fd); +- if (*servername) free(*servername); +- if (*servicename) free(*servicename); +- if (*protocol) free(*protocol); +- return -1; +-} +- + static void + destroy_client(struct clnt_info *clp) + { ++ printerr(3, "clp %p: dirname %s, krb5fd %d\n", clp, clp->dirname, clp->krb5_fd); + if (clp->krb5_poll_index != -1) + memset(&pollarray[clp->krb5_poll_index], 0, + sizeof(struct pollfd)); +@@ -190,8 +114,6 @@ destroy_client(struct clnt_info *clp) + if (clp->spkm3_fd != -1) close(clp->spkm3_fd); + if (clp->dirname) free(clp->dirname); + if (clp->servicename) free(clp->servicename); +- if (clp->servername) free(clp->servername); +- if (clp->protocol) free(clp->protocol); + free(clp); + } + +@@ -221,7 +143,6 @@ process_clnt_dir_files(struct clnt_info + { + char kname[32]; + char sname[32]; +- char info_file_name[32]; + + if (clp->krb5_fd == -1) { + snprintf(kname, sizeof(kname), "%s/krb5", clp->dirname); +@@ -233,13 +154,6 @@ process_clnt_dir_files(struct clnt_info + } + if((clp->krb5_fd == -1) && (clp->spkm3_fd == -1)) + return -1; +- snprintf(info_file_name, sizeof(info_file_name), "%s/info", +- clp->dirname); +- if ((clp->servicename == NULL) && +- read_service_info(info_file_name, &clp->servicename, +- &clp->servername, &clp->prog, &clp->vers, +- &clp->protocol)) +- return -1; + return 0; + } + +@@ -273,6 +187,8 @@ insert_clnt_poll(struct clnt_info *clp) + } + pollarray[clp->krb5_poll_index].fd = clp->krb5_fd; + pollarray[clp->krb5_poll_index].events |= POLLIN; ++ printerr(2, "monitoring krb5 channel under %s\n", ++ clp->dirname); + } + + if ((clp->spkm3_fd != -1) && (clp->spkm3_poll_index == -1)) { +@@ -386,67 +302,106 @@ find_client(char *dirname) + int + update_client_list(void) + { +- struct dirent **namelist; ++ char lustre_dir[PATH_MAX]; ++ struct dirent lustre_dirent = { .d_name = "lustre" }; ++ struct dirent *namelist[1]; ++ struct stat statbuf; + int i, j; + +- if (chdir(pipefs_nfsdir) < 0) { ++ if (chdir(pipefs_dir) < 0) { + printerr(0, "ERROR: can't chdir to %s: %s\n", +- pipefs_nfsdir, strerror(errno)); ++ pipefs_dir, strerror(errno)); + return -1; + } + +- j = scandir(pipefs_nfsdir, &namelist, NULL, alphasort); +- if (j < 0) { +- printerr(0, "ERROR: can't scandir %s: %s\n", +- pipefs_nfsdir, strerror(errno)); +- return -1; ++ snprintf(lustre_dir, sizeof(lustre_dir), "%s/%s", pipefs_dir, "lustre"); ++ if (stat(lustre_dir, &statbuf) == 0) { ++ namelist[0] = &lustre_dirent; ++ j = 1; ++ printerr(2, "re-processing lustre directory\n"); ++ } else { ++ namelist[0] = NULL; ++ j = 0; ++ printerr(2, "lustre directory not exist\n"); + } ++ + update_old_clients(namelist, j); + for (i=0; i < j; i++) { +- if (i < FD_ALLOC_BLOCK +- && !strncmp(namelist[i]->d_name, "clnt", 4) +- && !find_client(namelist[i]->d_name)) ++ if (i < FD_ALLOC_BLOCK && !find_client(namelist[i]->d_name)) + process_clnt_dir(namelist[i]->d_name); +- free(namelist[i]); + } + +- free(namelist); ++ chdir("/"); + return 0; + } + ++/* Context creation response. */ ++struct lustre_gss_init_res { ++ gss_buffer_desc gr_ctx; /* context handle */ ++ u_int gr_major; /* major status */ ++ u_int gr_minor; /* minor status */ ++ u_int gr_win; /* sequence window */ ++ gss_buffer_desc gr_token; /* token */ ++}; ++ ++struct lustre_gss_data { ++ int lgd_established; ++ int lgd_lustre_svc; /* mds/oss */ ++ int lgd_uid; /* uid */ ++ char *lgd_uuid; /* client device uuid */ ++ gss_name_t lgd_name; /* service name */ ++ ++ gss_OID lgd_mech; /* mech OID */ ++ u_int lgd_req_flags; /* request flags */ ++ gss_cred_id_t lgd_cred; /* credential */ ++ gss_ctx_id_t lgd_ctx; /* session context */ ++ gss_buffer_desc lgd_rmt_ctx; /* remote handle of context */ ++ uint32_t lgd_seq_win; /* sequence window */ ++ ++ int lgd_rpc_err; ++ int lgd_gss_err; ++}; ++ + static int +-do_downcall(int k5_fd, uid_t uid, struct authgss_private_data *pd, +- gss_buffer_desc *context_token) ++do_downcall(int k5_fd, struct lgssd_upcall_data *updata, ++ struct lustre_gss_data *lgd, gss_buffer_desc *context_token) + { + char *buf = NULL, *p = NULL, *end = NULL; + unsigned int timeout = 0; /* XXX decide on a reasonable value */ + unsigned int buf_size = 0; + +- printerr(1, "doing downcall\n"); +- buf_size = sizeof(uid) + sizeof(timeout) + sizeof(pd->pd_seq_win) + +- sizeof(pd->pd_ctx_hndl.length) + pd->pd_ctx_hndl.length + ++ printerr(2, "doing downcall\n"); ++ buf_size = sizeof(updata->seq) + sizeof(timeout) + ++ sizeof(lgd->lgd_seq_win) + ++ sizeof(lgd->lgd_rmt_ctx.length) + lgd->lgd_rmt_ctx.length + + sizeof(context_token->length) + context_token->length; + p = buf = malloc(buf_size); + end = buf + buf_size; + +- if (WRITE_BYTES(&p, end, uid)) goto out_err; ++ if (WRITE_BYTES(&p, end, updata->seq)) goto out_err; + /* Not setting any timeout for now: */ + if (WRITE_BYTES(&p, end, timeout)) goto out_err; +- if (WRITE_BYTES(&p, end, pd->pd_seq_win)) goto out_err; +- if (write_buffer(&p, end, &pd->pd_ctx_hndl)) goto out_err; ++ if (WRITE_BYTES(&p, end, lgd->lgd_seq_win)) goto out_err; ++ if (write_buffer(&p, end, &lgd->lgd_rmt_ctx)) goto out_err; + if (write_buffer(&p, end, context_token)) goto out_err; + +- if (write(k5_fd, buf, p - buf) < p - buf) goto out_err; ++ lgssd_mutex_get(lgssd_mutex_downcall); ++ if (write(k5_fd, buf, p - buf) < p - buf) { ++ lgssd_mutex_put(lgssd_mutex_downcall); ++ goto out_err; ++ } ++ lgssd_mutex_put(lgssd_mutex_downcall); ++ + if (buf) free(buf); + return 0; + out_err: + if (buf) free(buf); +- printerr(0, "Failed to write downcall!\n"); ++ printerr(0, "ERROR: Failed to write downcall!\n"); + return -1; + } + + static int +-do_error_downcall(int k5_fd, uid_t uid, int err) ++do_error_downcall(int k5_fd, uint32_t seq, int rpc_err, int gss_err) + { + char buf[1024]; + char *p = buf, *end = buf + 1024; +@@ -455,19 +410,26 @@ do_error_downcall(int k5_fd, uid_t uid, + + printerr(1, "doing error downcall\n"); + +- if (WRITE_BYTES(&p, end, uid)) goto out_err; ++ if (WRITE_BYTES(&p, end, seq)) goto out_err; + if (WRITE_BYTES(&p, end, timeout)) goto out_err; + /* use seq_win = 0 to indicate an error: */ + if (WRITE_BYTES(&p, end, zero)) goto out_err; +- if (WRITE_BYTES(&p, end, err)) goto out_err; ++ if (WRITE_BYTES(&p, end, rpc_err)) goto out_err; ++ if (WRITE_BYTES(&p, end, gss_err)) goto out_err; + +- if (write(k5_fd, buf, p - buf) < p - buf) goto out_err; ++ lgssd_mutex_get(lgssd_mutex_downcall); ++ if (write(k5_fd, buf, p - buf) < p - buf) { ++ lgssd_mutex_put(lgssd_mutex_downcall); ++ goto out_err; ++ } ++ lgssd_mutex_put(lgssd_mutex_downcall); + return 0; + out_err: + printerr(0, "Failed to write error downcall!\n"); + return -1; + } + ++#if 0 + /* + * Create an RPC connection and establish an authenticated + * gss context with a server. +@@ -659,7 +621,287 @@ int create_auth_rpc_client(struct clnt_i + + goto out; + } ++#endif ++ ++static ++int do_negotiation(struct lustre_gss_data *lgd, ++ gss_buffer_desc *gss_token, ++ struct lustre_gss_init_res *gr, ++ int timeout) ++{ ++ char *file = "/proc/fs/lustre/sptlrpc/gss/init_channel"; ++ struct lgssd_ioctl_param param; ++ struct passwd *pw; ++ int fd, ret; ++ char outbuf[8192]; ++ unsigned int *p; ++ int res; ++ ++ pw = getpwuid(lgd->lgd_uid); ++ if (!pw) { ++ printerr(0, "no uid %u in local user database\n", ++ lgd->lgd_uid); ++ return -1; ++ } ++ ++ param.version = GSSD_INTERFACE_VERSION; ++ param.uuid = lgd->lgd_uuid; ++ param.lustre_svc = lgd->lgd_lustre_svc; ++ param.uid = lgd->lgd_uid; ++ param.gid = pw->pw_gid; ++ param.send_token_size = gss_token->length; ++ param.send_token = (char *) gss_token->value; ++ param.reply_buf_size = sizeof(outbuf); ++ param.reply_buf = outbuf; ++ ++ fd = open(file, O_RDWR); ++ if (fd < 0) { ++ printerr(0, "can't open file %s\n", file); ++ return -1; ++ } ++ ++ ret = write(fd, ¶m, sizeof(param)); ++ ++ if (ret != sizeof(param)) { ++ printerr(0, "lustre ioctl err: %d\n", strerror(errno)); ++ close(fd); ++ return -1; ++ } ++ if (param.status) { ++ close(fd); ++ printerr(0, "status: %d (%s)\n", ++ param.status, strerror((int)param.status)); ++ if (param.status == -ETIMEDOUT) { ++ /* kernel return -ETIMEDOUT means the rpc timedout, ++ * we should notify the caller to reinitiate the ++ * gss negotiation, by return -ERESTART ++ */ ++ lgd->lgd_rpc_err = -ERESTART; ++ lgd->lgd_gss_err = 0; ++ } else { ++ lgd->lgd_rpc_err = param.status; ++ lgd->lgd_gss_err = 0; ++ } ++ return -1; ++ } ++ p = (unsigned int *)outbuf; ++ res = *p++; ++ gr->gr_major = *p++; ++ gr->gr_minor = *p++; ++ gr->gr_win = *p++; ++ ++ gr->gr_ctx.length = *p++; ++ gr->gr_ctx.value = malloc(gr->gr_ctx.length); ++ memcpy(gr->gr_ctx.value, p, gr->gr_ctx.length); ++ p += (((gr->gr_ctx.length + 3) & ~3) / 4); ++ ++ gr->gr_token.length = *p++; ++ gr->gr_token.value = malloc(gr->gr_token.length); ++ memcpy(gr->gr_token.value, p, gr->gr_token.length); ++ p += (((gr->gr_token.length + 3) & ~3) / 4); ++ ++ printerr(2, "do_negotiation: receive handle len %d, token len %d\n", ++ gr->gr_ctx.length, gr->gr_token.length); ++ close(fd); ++ return 0; ++} ++ ++static ++int gssd_refresh_lgd(struct lustre_gss_data *lgd) ++{ ++ struct lustre_gss_init_res gr; ++ gss_buffer_desc *recv_tokenp, send_token; ++ OM_uint32 maj_stat, min_stat, call_stat, ret_flags; ++ ++ /* GSS context establishment loop. */ ++ memset(&gr, 0, sizeof(gr)); ++ recv_tokenp = GSS_C_NO_BUFFER; ++ ++ for (;;) { ++ /* print the token we just received */ ++ if (recv_tokenp != GSS_C_NO_BUFFER) { ++ printerr(3, "The received token length %d\n", ++ recv_tokenp->length); ++ print_hexl(3, recv_tokenp->value, recv_tokenp->length); ++ } ++ ++ maj_stat = gss_init_sec_context(&min_stat, ++ lgd->lgd_cred, ++ &lgd->lgd_ctx, ++ lgd->lgd_name, ++ lgd->lgd_mech, ++ lgd->lgd_req_flags, ++ 0, /* time req */ ++ NULL, /* channel */ ++ recv_tokenp, ++ NULL, /* used mech */ ++ &send_token, ++ &ret_flags, ++ NULL); /* time rec */ ++ ++ if (recv_tokenp != GSS_C_NO_BUFFER) { ++ gss_release_buffer(&min_stat, &gr.gr_token); ++ recv_tokenp = GSS_C_NO_BUFFER; ++ } ++ if (maj_stat != GSS_S_COMPLETE && ++ maj_stat != GSS_S_CONTINUE_NEEDED) { ++ pgsserr("gss_init_sec_context", maj_stat, min_stat, ++ lgd->lgd_mech); ++ break; ++ } ++ if (send_token.length != 0) { ++ memset(&gr, 0, sizeof(gr)); ++ ++ /* print the token we are about to send */ ++ printerr(3, "token being sent length %d\n", ++ send_token.length); ++ print_hexl(3, send_token.value, send_token.length); ++ ++ call_stat = do_negotiation(lgd, &send_token, &gr, 0); ++ gss_release_buffer(&min_stat, &send_token); ++ ++ if (call_stat != 0 || ++ (gr.gr_major != GSS_S_COMPLETE && ++ gr.gr_major != GSS_S_CONTINUE_NEEDED)) { ++ printerr(0, "call stat %d, major stat 0x%x\n", ++ (int)call_stat, gr.gr_major); ++ return -1; ++ } ++ ++ if (gr.gr_ctx.length != 0) { ++ if (lgd->lgd_rmt_ctx.value) ++ gss_release_buffer(&min_stat, ++ &lgd->lgd_rmt_ctx); ++ lgd->lgd_rmt_ctx = gr.gr_ctx; ++ } ++ if (gr.gr_token.length != 0) { ++ if (maj_stat != GSS_S_CONTINUE_NEEDED) ++ break; ++ recv_tokenp = &gr.gr_token; ++ } ++ } ++ ++ /* GSS_S_COMPLETE => check gss header verifier, ++ * usually checked in gss_validate ++ */ ++ if (maj_stat == GSS_S_COMPLETE) { ++ lgd->lgd_established = 1; ++ lgd->lgd_seq_win = gr.gr_win; ++ break; ++ } ++ } ++ /* End context negotiation loop. */ ++ if (!lgd->lgd_established) { ++ if (gr.gr_token.length != 0) ++ gss_release_buffer(&min_stat, &gr.gr_token); ++ ++ printerr(0, "context negotiation failed\n"); ++ return -1; ++ } ++ ++ printerr(2, "successfully refreshed lgd\n"); ++ return 0; ++} ++ ++static ++int gssd_create_lgd(struct clnt_info *clp, ++ struct lustre_gss_data *lgd, ++ struct lgssd_upcall_data *updata, ++ int authtype) ++{ ++ gss_buffer_desc sname; ++ OM_uint32 maj_stat, min_stat; ++ int retval = -1; ++ ++ lgd->lgd_established = 0; ++ lgd->lgd_lustre_svc = updata->svc; ++ lgd->lgd_uid = updata->uid; ++ lgd->lgd_uuid = updata->obd; ++ ++ switch (authtype) { ++ case AUTHTYPE_KRB5: ++ lgd->lgd_mech = (gss_OID) &krb5oid; ++ lgd->lgd_req_flags = GSS_C_MUTUAL_FLAG; ++ break; ++ case AUTHTYPE_SPKM3: ++ lgd->lgd_mech = (gss_OID) &spkm3oid; ++ /* XXX sec.req_flags = GSS_C_ANON_FLAG; ++ * Need a way to switch.... ++ */ ++ lgd->lgd_req_flags = GSS_C_MUTUAL_FLAG; ++ break; ++ default: ++ printerr(0, "Invalid authentication type (%d)\n", authtype); ++ return -1; ++ } ++ ++ lgd->lgd_cred = GSS_C_NO_CREDENTIAL; ++ lgd->lgd_ctx = GSS_C_NO_CONTEXT; ++ lgd->lgd_rmt_ctx = (gss_buffer_desc) GSS_C_EMPTY_BUFFER; ++ lgd->lgd_seq_win = 0; ++ ++ sname.value = clp->servicename; ++ sname.length = strlen(clp->servicename); ++ ++ maj_stat = gss_import_name(&min_stat, &sname, ++ (gss_OID) GSS_C_NT_HOSTBASED_SERVICE, ++ &lgd->lgd_name); ++ if (maj_stat != GSS_S_COMPLETE) { ++ pgsserr(0, maj_stat, min_stat, lgd->lgd_mech); ++ goto out_fail; ++ } + ++ retval = gssd_refresh_lgd(lgd); ++ ++ if (lgd->lgd_name != GSS_C_NO_NAME) ++ gss_release_name(&min_stat, &lgd->lgd_name); ++ ++ if (lgd->lgd_cred != GSS_C_NO_CREDENTIAL) ++ gss_release_cred(&min_stat, &lgd->lgd_cred); ++ ++ out_fail: ++ return retval; ++} ++ ++static ++void gssd_free_lgd(struct lustre_gss_data *lgd) ++{ ++ gss_buffer_t token = GSS_C_NO_BUFFER; ++ OM_uint32 maj_stat, min_stat; ++ ++ if (lgd->lgd_ctx == GSS_C_NO_CONTEXT) ++ return; ++ ++ maj_stat = gss_delete_sec_context(&min_stat, &lgd->lgd_ctx, token); ++} ++ ++static ++int construct_service_name(struct clnt_info *clp, ++ struct lgssd_upcall_data *ud) ++{ ++ const int buflen = 256; ++ char name[buflen]; ++ ++ if (clp->servicename) { ++ free(clp->servicename); ++ clp->servicename = NULL; ++ } ++ ++ if (lnet_nid2hostname(ud->nid, name, buflen)) ++ return -1; ++ ++ clp->servicename = malloc(32 + strlen(name)); ++ if (!clp->servicename) { ++ printerr(0, "can't alloc memory\n"); ++ return -1; ++ } ++ sprintf(clp->servicename, "%s@%s", ++ ud->svc == LUSTRE_GSS_SVC_MDS ? ++ GSSD_SERVICE_MDS : GSSD_SERVICE_OSS, ++ name); ++ printerr(2, "constructed servicename: %s\n", clp->servicename); ++ return 0; ++} + + /* + * this code uses the userland rpcsec gss library to create a krb5 +@@ -668,27 +910,78 @@ int create_auth_rpc_client(struct clnt_i + void + handle_krb5_upcall(struct clnt_info *clp) + { +- uid_t uid; +- CLIENT *rpc_clnt = NULL; +- AUTH *auth = NULL; +- struct authgss_private_data pd; +- gss_buffer_desc token; ++ pid_t pid; ++ gss_buffer_desc token = { 0, NULL }; ++ struct lgssd_upcall_data updata; ++ struct lustre_gss_data lgd; + char **credlist = NULL; + char **ccname; ++ int read_rc; + +- printerr(1, "handling krb5 upcall\n"); ++ printerr(2, "handling krb5 upcall\n"); + +- token.length = 0; +- token.value = NULL; +- memset(&pd, 0, sizeof(struct authgss_private_data)); ++ memset(&lgd, 0, sizeof(lgd)); ++ lgd.lgd_rpc_err = -EPERM; /* default error code */ + +- if (read(clp->krb5_fd, &uid, sizeof(uid)) < sizeof(uid)) { +- printerr(0, "WARNING: failed reading uid from krb5 " ++ read_rc = read(clp->krb5_fd, &updata, sizeof(updata)); ++ if (read_rc < 0) { ++ printerr(0, "WARNING: failed reading from krb5 " + "upcall pipe: %s\n", strerror(errno)); +- goto out; ++ return; ++ } else if (read_rc != sizeof(updata)) { ++ printerr(0, "upcall data mismatch: length %d, expect %d\n", ++ read_rc, sizeof(updata)); ++ ++ /* the sequence number must be the first field. if read >= 4 ++ * bytes then we know at least sequence is fine, try to send ++ * error notification nicely. ++ */ ++ if (read_rc >= 4) ++ do_error_downcall(clp->krb5_fd, updata.seq, -EPERM, 0); ++ return; ++ } ++ ++ /* fork child process */ ++ pid = fork(); ++ if (pid < 0) { ++ printerr(0, "can't fork: %s\n", strerror(errno)); ++ do_error_downcall(clp->krb5_fd, updata.seq, -EPERM, 0); ++ return; ++ } else if (pid > 0) { ++ printerr(2, "forked child process: %d\n", pid); ++ return; ++ } ++ ++ printerr(1, "krb5 upcall: seq %u, uid %u, svc %u, nid 0x%llx, " ++ "pag %llx, obd %s\n", updata.seq, updata.uid, updata.svc, ++ updata.nid, updata.pag, updata.obd); ++ ++ /* XXX in kernel pag is defined as "unsigned long", which might ++ * not keep original signed value after converted to u64. ++ */ ++ if (updata.pag != updata.uid && ++ ((updata.pag == 0xffffffffffffffffULL) || ++ (updata.pag == 0xffffffff))) { ++ printerr(0, "uid %u: pag %llx not allowed\n", ++ updata.uid, updata.pag); ++ lgd.lgd_rpc_err = -EPROTO; ++ goto out_return_error; + } + +- if (uid == 0) { ++ if (updata.svc != LUSTRE_GSS_SVC_MDS && ++ updata.svc != LUSTRE_GSS_SVC_OSS) { ++ printerr(0, "invalid svc %d\n", updata.svc); ++ lgd.lgd_rpc_err = -EPROTO; ++ goto out_return_error; ++ } ++ updata.obd[sizeof(updata.obd)-1] = '\0'; ++ ++ if (construct_service_name(clp, &updata)) { ++ printerr(0, "failed to construct service name\n"); ++ goto out_return_error; ++ } ++ ++ if (updata.uid == 0) { + int success = 0; + + /* +@@ -696,75 +989,66 @@ handle_krb5_upcall(struct clnt_info *clp + * of them until one works or we've tried them all + */ + if (gssd_get_krb5_machine_cred_list(&credlist)) { +- printerr(0, "WARNING: Failed to obtain machine " +- "credentials for connection to " +- "server %s\n", clp->servername); +- goto out_return_error; ++ printerr(0, "ERROR: Failed to obtain machine " ++ "credentials for %s\n", clp->servicename); ++ goto out_return_error; + } + for (ccname = credlist; ccname && *ccname; ccname++) { + gssd_setup_krb5_machine_gss_ccache(*ccname); +- if ((create_auth_rpc_client(clp, &rpc_clnt, &auth, uid, +- AUTHTYPE_KRB5)) == 0) { ++ if ((gssd_create_lgd(clp, &lgd, &updata, ++ AUTHTYPE_KRB5)) == 0) { + /* Success! */ + success++; + break; + } + printerr(2, "WARNING: Failed to create krb5 context " + "for user with uid %d with credentials " +- "cache %s for server %s\n", +- uid, *ccname, clp->servername); ++ "cache %s for service %s\n", ++ updata.uid, *ccname, clp->servicename); + } + gssd_free_krb5_machine_cred_list(credlist); + if (!success) { +- printerr(0, "WARNING: Failed to create krb5 context " ++ printerr(0, "ERROR: Failed to create krb5 context " + "for user with uid %d with any " +- "credentials cache for server %s\n", +- uid, clp->servername); ++ "credentials cache for service %s\n", ++ updata.uid, clp->servicename); + goto out_return_error; + } + } + else { + /* Tell krb5 gss which credentials cache to use */ +- gssd_setup_krb5_user_gss_ccache(uid, clp->servername); ++ gssd_setup_krb5_user_gss_ccache(updata.pag, updata.uid, ++ clp->servicename); + +- if ((create_auth_rpc_client(clp, &rpc_clnt, &auth, uid, +- AUTHTYPE_KRB5)) != 0) { ++ if ((gssd_create_lgd(clp, &lgd, &updata, AUTHTYPE_KRB5)) != 0) { + printerr(0, "WARNING: Failed to create krb5 context " +- "for user with uid %d for server %s\n", +- uid, clp->servername); ++ "for user with uid %d for service %s\n", ++ updata.uid, clp->servicename); + goto out_return_error; + } + } + +- if (!authgss_get_private_data(auth, &pd)) { +- printerr(0, "WARNING: Failed to obtain authentication " +- "data for user with uid %d for server %s\n", +- uid, clp->servername); +- goto out_return_error; +- } +- +- if (serialize_context_for_kernel(pd.pd_ctx, &token, &krb5oid)) { ++ if (serialize_context_for_kernel(lgd.lgd_ctx, &token, &krb5oid)) { + printerr(0, "WARNING: Failed to serialize krb5 context for " +- "user with uid %d for server %s\n", +- uid, clp->servername); ++ "user with uid %d for service %s\n", ++ updata.uid, clp->servicename); + goto out_return_error; + } + +- do_downcall(clp->krb5_fd, uid, &pd, &token); ++ printerr(1, "refreshed: %u@%s for %s\n", ++ updata.uid, updata.obd, clp->servicename); ++ do_downcall(clp->krb5_fd, &updata, &lgd, &token); + + out: + if (token.value) + free(token.value); +- if (pd.pd_ctx_hndl.length != 0) +- authgss_free_private_data(&pd); +- if (auth) +- AUTH_DESTROY(auth); +- if (rpc_clnt) +- clnt_destroy(rpc_clnt); +- return; ++ ++ gssd_free_lgd(&lgd); ++ exit(0); /* i'm child process */ + + out_return_error: +- do_error_downcall(clp->krb5_fd, uid, -1); ++ do_error_downcall(clp->krb5_fd, updata.seq, ++ lgd.lgd_rpc_err, lgd.lgd_gss_err); + goto out; + } + +@@ -775,6 +1059,7 @@ out_return_error: + void + handle_spkm3_upcall(struct clnt_info *clp) + { ++#if 0 + uid_t uid; + CLIENT *rpc_clnt = NULL; + AUTH *auth = NULL; +@@ -826,4 +1111,5 @@ out: + out_return_error: + do_error_downcall(clp->spkm3_fd, uid, -1); + goto out; ++#endif + } +diff -rup nfs-utils-1.0.10.orig/utils/gssd/gss_util.c nfs-utils-1.0.10/utils/gssd/gss_util.c +--- nfs-utils-1.0.10.orig/utils/gssd/gss_util.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/gss_util.c 2006-12-15 15:12:23.000000000 -0700 +@@ -87,9 +87,16 @@ + #ifdef HAVE_COM_ERR_H + #include <com_err.h> + #endif ++#include "lsupport.h" + + /* Global gssd_credentials handle */ +-gss_cred_id_t gssd_creds; ++gss_cred_id_t gssd_cred_mds; ++gss_cred_id_t gssd_cred_oss; ++int gssd_cred_mds_valid = 0; ++int gssd_cred_oss_valid = 0; ++ ++char *mds_local_realm = NULL; ++char *oss_local_realm = NULL; + + gss_OID g_mechOid = GSS_C_NULL_OID;; + +@@ -183,15 +190,56 @@ pgsserr(char *msg, u_int32_t maj_stat, u + display_status_2(msg, maj_stat, min_stat, mech); + } + +-int +-gssd_acquire_cred(char *server_name) ++static ++int extract_realm_name(gss_buffer_desc *name, char **realm) ++{ ++ char *sname, *c; ++ int rc = 0; ++ ++ sname = malloc(name->length + 1); ++ if (!sname) { ++ printerr(0, "out of memory\n"); ++ return -ENOMEM; ++ } ++ ++ memcpy(sname, name->value, name->length); ++ sname[name->length] = '\0'; ++ printerr(1, "service principal: %s\n", sname); ++ ++ c = strchr(sname, '@'); ++ if (!c) { ++ printerr(2, "no realm found in principal, use default\n"); ++ *realm = strdup(this_realm); ++ if (!*realm) { ++ printerr(0, "failed to duplicate default realm\n"); ++ rc = -ENOMEM; ++ } ++ } else { ++ c++; ++ *realm = strdup(c); ++ if (!*realm) { ++ printerr(0, "failed to duplicated realm\n"); ++ rc = -ENOMEM; ++ } ++ } ++ free(sname); ++ ++ return rc; ++} ++ ++static ++int gssd_acquire_cred(char *server_name, gss_cred_id_t *cred, ++ char **local_realm, int *valid) + { + gss_buffer_desc name; + gss_name_t target_name; + u_int32_t maj_stat, min_stat; + u_int32_t ignore_maj_stat, ignore_min_stat; ++ gss_OID name_type; + gss_buffer_desc pbuf; + ++ *valid = 0; ++ + name.value = (void *)server_name; + name.length = strlen(server_name); + +@@ -201,12 +249,20 @@ gssd_acquire_cred(char *server_name) + + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_import_name", maj_stat, min_stat, g_mechOid); +- return (FALSE); ++ return -1; ++ } ++ ++ maj_stat = gss_display_name(&min_stat, target_name, &name, &name_type); ++ if (maj_stat != GSS_S_COMPLETE) { ++ pgsserr(0, maj_stat, min_stat, g_mechOid); ++ return -1; + } ++ if (extract_realm_name(&name, local_realm)) ++ return -1; + + maj_stat = gss_acquire_cred(&min_stat, target_name, 0, + GSS_C_NULL_OID_SET, GSS_C_ACCEPT, +- &gssd_creds, NULL, NULL); ++ cred, NULL, NULL); + + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("gss_acquire_cred", maj_stat, min_stat, g_mechOid); +@@ -218,11 +274,67 @@ gssd_acquire_cred(char *server_name) + ignore_maj_stat = gss_release_buffer(&ignore_min_stat, + &pbuf); + } +- } ++ } else ++ *valid = 1; + + ignore_maj_stat = gss_release_name(&ignore_min_stat, &target_name); + +- return (maj_stat == GSS_S_COMPLETE); ++ if (maj_stat != GSS_S_COMPLETE) ++ return -1; ++ return 0; ++} ++ ++int gssd_prepare_creds(int must_srv_mds, int must_srv_oss) ++{ ++ if (gssd_acquire_cred(GSSD_SERVICE_MDS, &gssd_cred_mds, ++ &mds_local_realm, &gssd_cred_mds_valid)) { ++ if (must_srv_mds) ++ return -1; ++ } ++ ++ if (gssd_acquire_cred(GSSD_SERVICE_OSS, &gssd_cred_oss, ++ &oss_local_realm, &gssd_cred_oss_valid)) { ++ if (must_srv_oss) ++ return -1; ++ } ++ ++ if (!gssd_cred_mds_valid && !gssd_cred_oss_valid) { ++ printerr(0, "can't obtain both mds & oss creds, exit\n"); ++ return -1; ++ } ++ ++ if (gssd_cred_mds_valid) ++ printerr(0, "Ready to serve Lustre MDS in realm %s\n", ++ mds_local_realm ? mds_local_realm : "N/A"); ++ if (gssd_cred_oss_valid) ++ printerr(0, "Ready to serve Lustre OSS in realm %s\n", ++ oss_local_realm ? oss_local_realm : "N/A"); ++ ++ return 0; ++} ++ ++gss_cred_id_t gssd_select_svc_cred(int lustre_svc) ++{ ++ switch (lustre_svc) { ++ case LUSTRE_GSS_SVC_MDS: ++ if (!gssd_cred_mds_valid) { ++ printerr(0, "ERROR: service cred for mds not ready\n"); ++ return NULL; ++ } ++ printerr(2, "select mds service cred\n"); ++ return gssd_cred_mds; ++ case LUSTRE_GSS_SVC_OSS: ++ if (!gssd_cred_oss_valid) { ++ printerr(0, "ERROR: service cred for oss not ready\n"); ++ return NULL; ++ } ++ printerr(2, "select oss service cred\n"); ++ return gssd_cred_oss; ++ default: ++ printerr(0, "ERROR: invalid lustre svc id %d\n", lustre_svc); ++ } ++ ++ return NULL; + } + + int gssd_check_mechs(void) +@@ -249,3 +361,42 @@ out: + return retval; + } + ++/********************************* ++ * FIXME should be in krb5_util.c ++ *********************************/ ++ ++#include "krb5_util.h" ++ ++/* realm of this node */ ++char *this_realm = NULL; ++ ++int gssd_get_local_realm(void) ++{ ++ krb5_context context = NULL; ++ krb5_error_code code; ++ int retval = -1; ++ ++ if (this_realm != NULL) ++ return 0; ++ ++ code = krb5_init_context(&context); ++ if (code) { ++ printerr(0, "ERROR: get default realm: init ctx: %s\n", ++ error_message(code)); ++ goto out; ++ } ++ ++ code = krb5_get_default_realm(context, &this_realm); ++ if (code) { ++ printerr(0, "ERROR: get default realm: %s\n", ++ error_message(code)); ++ goto out; ++ } ++ retval = 0; ++ ++ printerr(1, "Local realm: %s\n", this_realm); ++out: ++ krb5_free_context(context); ++ return retval; ++} ++ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/gss_util.h nfs-utils-1.0.10/utils/gssd/gss_util.h +--- nfs-utils-1.0.10.orig/utils/gssd/gss_util.h 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/gss_util.h 2006-12-15 15:12:23.000000000 -0700 +@@ -32,14 +32,14 @@ + #define _GSS_UTIL_H_ + + #include <stdlib.h> +-#include <rpc/rpc.h> + #include "write_bytes.h" + ++char *this_realm; + extern gss_cred_id_t gssd_creds; + +-int gssd_acquire_cred(char *server_name); + void pgsserr(char *msg, u_int32_t maj_stat, u_int32_t min_stat, + const gss_OID mech); + int gssd_check_mechs(void); ++int gssd_get_local_realm(void); + + #endif /* _GSS_UTIL_H_ */ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/krb5_util.c nfs-utils-1.0.10/utils/gssd/krb5_util.c +--- nfs-utils-1.0.10.orig/utils/gssd/krb5_util.c 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/krb5_util.c 2006-12-15 15:12:23.000000000 -0700 +@@ -99,12 +99,15 @@ + #include <rpc/rpc.h> + #include <sys/types.h> + #include <sys/stat.h> ++#include <sys/utsname.h> + #include <sys/socket.h> + #include <arpa/inet.h> + ++#include <unistd.h> + #include <stdio.h> + #include <stdlib.h> + #include <string.h> ++#include <netdb.h> + #include <dirent.h> + #include <fcntl.h> + #include <errno.h> +@@ -114,7 +117,6 @@ + #include <gssapi/gssapi_krb5.h> + #endif + #include <krb5.h> +-#include <rpc/auth_gss.h> + + #include "gssd.h" + #include "err_util.h" +@@ -129,6 +131,9 @@ struct gssd_k5_kt_princ *gssd_k5_kt_prin + int num_krb5_enctypes = 0; + krb5_enctype *krb5_enctypes = NULL; + ++/* credential expire time in advance */ ++unsigned long machine_cred_expire_advance = 300; /* 5 mins */ ++ + /*==========================*/ + /*=== Internal routines ===*/ + /*==========================*/ +@@ -137,11 +142,55 @@ static int select_krb5_ccache(const stru + static int gssd_find_existing_krb5_ccache(uid_t uid, struct dirent **d); + static int gssd_get_single_krb5_cred(krb5_context context, + krb5_keytab kt, struct gssd_k5_kt_princ *ple); +-static int gssd_have_realm_ple(void *realm); + static int gssd_process_krb5_keytab(krb5_context context, krb5_keytab kt, + char *kt_name); + + /* ++ * convenient macros, these perhaps need further cleanup ++ */ ++#ifdef HAVE_KRB5 ++ ++#define KEYTAB_ENTRY_MATCH(kte, name) \ ++ ( \ ++ (kte).principal->data[0].length == (sizeof(name)-1) && \ ++ strncmp((kte).principal->data[0].data, (name), sizeof(name)-1) == 0 \ ++ ) ++#define KRB5_FREE_UNPARSED_NAME(ctx, name) \ ++ krb5_free_unparsed_name((ctx), (name)); ++#define KRB5_STRDUP(str) \ ++ strndup((str).data, (str).length) ++#define KRB5_STRCMP(str, name) \ ++ ( \ ++ (str)->length != strlen(name) || \ ++ strncmp((str)->data, (name), (str)->length) != 0 \ ++ ) ++#define KRB5_STRCASECMP(str, name) \ ++ ( \ ++ (str)->length != strlen(name) || \ ++ strncasecmp((str)->data, (name), (str)->length) != 0 \ ++ ) ++ ++#else /* !HAVE_KRB5 */ ++ ++#define KEYTAB_ENTRY_MATCH(kte, name) \ ++ ( \ ++ strlen((kte).principal->name.name_string.val[0]) == \ ++ (sizeof(name)-1) && \ ++ strncmp(kte.principal->name.name_string.val[0], (name), \ ++ sizeof(name)-1) == 0 \ ++ ) ++#define KRB5_FREE_UNPARSED_NAME(ctx, name) \ ++ free(pname); ++#define KRB5_STRDUP(str) \ ++ strdup(str) ++#define KRB5_STRCMP(str, name) \ ++ strcmp((str), (name)) ++#define KRB5_STRCASECMP(str, name) \ ++ strcmp((str), (name)) ++ ++#endif /* HAVE_KRB5 */ ++ ++/* + * Called from the scandir function to weed out potential krb5 + * credentials cache files + * +@@ -294,7 +343,7 @@ gssd_get_single_krb5_cred(krb5_context c + + memset(&my_creds, 0, sizeof(my_creds)); + +- if (ple->ccname && ple->endtime > now) { ++ if (ple->ccname && ple->endtime > now + machine_cred_expire_advance) { + printerr(2, "INFO: Credentials in CC '%s' are good until %d\n", + ple->ccname, ple->endtime); + code = 0; +@@ -325,11 +374,7 @@ gssd_get_single_krb5_cred(krb5_context c + "principal '%s' from keytab '%s'\n", + error_message(code), + pname ? pname : "<unparsable>", kt_name); +-#ifdef HAVE_KRB5 +- if (pname) krb5_free_unparsed_name(context, pname); +-#else +- if (pname) free(pname); +-#endif ++ if (pname) KRB5_FREE_UNPARSED_NAME(context, pname); + goto out; + } + +@@ -378,15 +423,7 @@ gssd_get_single_krb5_cred(krb5_context c + return (code); + } + +-/* +- * Determine if we already have a ple for the given realm +- * +- * Returns: +- * 0 => no ple found for given realm +- * 1 => found ple for given realm +- */ +-static int +-gssd_have_realm_ple(void *r) ++static struct gssd_k5_kt_princ * gssd_get_realm_ple(void *r) + { + struct gssd_k5_kt_princ *ple; + #ifdef HAVE_KRB5 +@@ -396,18 +433,76 @@ gssd_have_realm_ple(void *r) + #endif + + for (ple = gssd_k5_kt_princ_list; ple; ple = ple->next) { +-#ifdef HAVE_KRB5 +- if ((realm->length == strlen(ple->realm)) && +- (strncmp(realm->data, ple->realm, realm->length) == 0)) { +-#else +- if (strcmp(realm, ple->realm) == 0) { +-#endif +- return 1; +- } ++ if (KRB5_STRCMP(realm, ple->realm) == 0) ++ return ple; ++ } ++ return NULL; ++} ++ ++static void gssd_free_ple(krb5_context kctx, struct gssd_k5_kt_princ *ple) ++{ ++ if (ple->princ) ++ krb5_free_principal(kctx, ple->princ); ++ if (ple->realm) ++ free(ple->realm); ++ if (ple->ccname) ++ free(ple->ccname); ++ free(ple); ++} ++ ++static int gssd_remove_ple(krb5_context kctx, struct gssd_k5_kt_princ *ple) ++{ ++ struct gssd_k5_kt_princ **prev = &gssd_k5_kt_princ_list; ++ struct gssd_k5_kt_princ *ent = gssd_k5_kt_princ_list; ++ ++ for (; ent; prev = &ent->next, ent = ent->next) { ++ if (ent != ple) ++ continue; ++ ++ *prev = ent->next; ++ gssd_free_ple(kctx, ent); ++ return 1; + } + return 0; + } + ++static ++struct gssd_k5_kt_princ *gssd_create_ple(krb5_context kctx, ++ krb5_principal principal) ++{ ++ struct gssd_k5_kt_princ *ple; ++ krb5_error_code code; ++ ++ ple = malloc(sizeof(*ple)); ++ if (ple == NULL) { ++ printerr(0, "ERROR: could not allocate storage " ++ "for principal list entry\n"); ++ return NULL; ++ } ++ ++ memset(ple, 0, sizeof(*ple)); ++ ++ ple->realm = KRB5_STRDUP(principal->realm); ++ if (ple->realm == NULL) { ++ printerr(0, "ERROR: not enough memory while copying realm to " ++ "principal list entry\n"); ++ goto err_free; ++ } ++ ++ code = krb5_copy_principal(kctx, principal, &ple->princ); ++ if (code) { ++ printerr(0, "ERROR: %s while copying principal " ++ "to principal list entry\n", ++ error_message(code)); ++ goto err_free; ++ } ++ ++ return ple; ++err_free: ++ gssd_free_ple(kctx, ple); ++ return NULL; ++} ++ + /* + * Process the given keytab file and create a list of principals we + * might use to perform mount operations. +@@ -451,82 +546,106 @@ gssd_process_krb5_keytab(krb5_context co + } + printerr(2, "Processing keytab entry for principal '%s'\n", + pname); +-#ifdef HAVE_KRB5 +- if ( (kte.principal->data[0].length == GSSD_SERVICE_NAME_LEN) && +- (strncmp(kte.principal->data[0].data, GSSD_SERVICE_NAME, +- GSSD_SERVICE_NAME_LEN) == 0) && +-#else +- if ( (strlen(kte.principal->name.name_string.val[0]) == GSSD_SERVICE_NAME_LEN) && +- (strncmp(kte.principal->name.name_string.val[0], GSSD_SERVICE_NAME, +- GSSD_SERVICE_NAME_LEN) == 0) && +- +-#endif +- (!gssd_have_realm_ple((void *)&kte.principal->realm)) ) { +- printerr(2, "We will use this entry (%s)\n", pname); +- ple = malloc(sizeof(struct gssd_k5_kt_princ)); +- if (ple == NULL) { +- printerr(0, "ERROR: could not allocate storage " +- "for principal list entry\n"); +-#ifdef HAVE_KRB5 +- krb5_free_unparsed_name(context, pname); +-#else +- free(pname); +-#endif +- retval = ENOMEM; +- goto out; ++ ++ /* mds service entry: ++ * - hostname and realm should match this node ++ * - replace existing non-mds entry of this realm ++ */ ++ if (KEYTAB_ENTRY_MATCH(kte, GSSD_SERVICE_MDS)) { ++ krb5_principal princ = kte.principal; ++ krb5_data *princ_host; ++ struct utsname utsbuf; ++ struct hostent *host; ++ ++ if (KRB5_STRCASECMP(krb5_princ_realm(context, princ), ++ this_realm) != 0) { ++ printerr(2, "alien mds service entry, skip\n"); ++ goto next; + } +- /* These will be filled in later */ +- ple->next = NULL; +- ple->ccname = NULL; +- ple->endtime = 0; +- if ((ple->realm = +-#ifdef HAVE_KRB5 +- strndup(kte.principal->realm.data, +- kte.principal->realm.length)) +-#else +- strdup(kte.principal->realm)) +-#endif +- == NULL) { +- printerr(0, "ERROR: %s while copying realm to " +- "principal list entry\n", +- "not enough memory"); +-#ifdef HAVE_KRB5 +- krb5_free_unparsed_name(context, pname); +-#else +- free(pname); +-#endif +- retval = ENOMEM; +- goto out; ++ ++ princ_host = krb5_princ_component(context, princ, 1); ++ if (princ_host == NULL) { ++ printerr(2, "mds service entry: no hostname in " ++ "principal, skip\n"); ++ goto next; + } +- if ((code = krb5_copy_principal(context, +- kte.principal, &ple->princ))) { +- printerr(0, "ERROR: %s while copying principal " +- "to principal list entry\n", +- error_message(code)); +-#ifdef HAVE_KRB5 +- krb5_free_unparsed_name(context, pname); +-#else +- free(pname); +-#endif +- retval = code; +- goto out; ++ ++ if (uname(&utsbuf)) { ++ printerr(2, "mds service entry: unable to get " ++ "UTS name, skip\n"); ++ goto next; + } +- if (gssd_k5_kt_princ_list == NULL) +- gssd_k5_kt_princ_list = ple; +- else { +- ple->next = gssd_k5_kt_princ_list; +- gssd_k5_kt_princ_list = ple; ++ host = gethostbyname(utsbuf.nodename); ++ if (host == NULL) { ++ printerr(2, "mds service entry: unable to get " ++ "local hostname, skip\n"); ++ goto next; + } +- } +- else { ++ ++ if (KRB5_STRCASECMP(princ_host, host->h_name) != 0) { ++ printerr(2, "mds service entry: hostname " ++ "doesn't match: %s - %.*s, skip\n", ++ host->h_name, ++ princ_host->length, princ_host->data); ++ goto next; ++ } ++ ++ ple = gssd_get_realm_ple((void *)&kte.principal->realm); ++ if (ple) { ++ if (ple->fl_mds) { ++ printerr(2,"mds service entry: found a" ++ "duplicated one, it's like a " ++ "mis-configuration, skip\n"); ++ goto next; ++ } ++ ++ gssd_remove_ple(context, ple); ++ printerr(2, "mds service entry: replace an " ++ "existed non-mds one\n"); ++ } ++ } else if (KEYTAB_ENTRY_MATCH(kte, LUSTRE_ROOT_NAME)) { ++ ple = gssd_get_realm_ple((void *)&kte.principal->realm); ++ if (ple) { ++ if (ple->fl_mds || ple->fl_root) { ++ printerr(2, "root entry: found a " ++ "existed %s entry, skip\n", ++ ple->fl_mds ? "mds" : "root"); ++ goto next; ++ } ++ ++ gssd_remove_ple(context, ple); ++ printerr(2, "root entry: replace an existed " ++ "non-mds non-root one\n"); ++ } ++ } else { + printerr(2, "We will NOT use this entry (%s)\n", + pname); ++ goto next; + } +-#ifdef HAVE_KRB5 +- krb5_free_unparsed_name(context, pname); +-#else +- free(pname); +-#endif ++ ++ /* construct ple */ ++ printerr(2, "We will use this entry (%s)\n", pname); ++ ple = gssd_create_ple(context, kte.principal); ++ if (ple == NULL) { ++ KRB5_FREE_UNPARSED_NAME(context, pname); ++ goto out; ++ } ++ ++ /* add proper flags */ ++ if (KEYTAB_ENTRY_MATCH(kte, GSSD_SERVICE_MDS)) ++ ple->fl_mds = 1; ++ else if (KEYTAB_ENTRY_MATCH(kte, LUSTRE_ROOT_NAME)) ++ ple->fl_root = 1; ++ ++ /* enqueue */ ++ if (gssd_k5_kt_princ_list == NULL) ++ gssd_k5_kt_princ_list = ple; ++ else { ++ ple->next = gssd_k5_kt_princ_list; ++ gssd_k5_kt_princ_list = ple; ++ } ++ next: ++ KRB5_FREE_UNPARSED_NAME(context, pname); + } + + if ((code = krb5_kt_end_seq_get(context, kt, &cursor))) { +@@ -634,14 +753,21 @@ parse_enctypes(char *enctypes) + * void + */ + void +-gssd_setup_krb5_user_gss_ccache(uid_t uid, char *servername) ++gssd_setup_krb5_user_gss_ccache(uint64_t pag, uid_t uid, char *servername) + { + char buf[MAX_NETOBJ_SZ]; + struct dirent *d; + +- printerr(2, "getting credentials for client with uid %u for " +- "server %s\n", uid, servername); ++ printerr(2, "getting credentials for client with pag %llx/uid %u for " ++ "server %s\n", pag, uid, servername); + memset(buf, 0, sizeof(buf)); ++ ++ if (pag != uid) { ++ snprintf(buf, sizeof(buf), "FILE:%s/%spag_%llx", ++ ccachedir, GSSD_DEFAULT_CRED_PREFIX, pag); ++ goto set_ccname; ++ } ++ + if (gssd_find_existing_krb5_ccache(uid, &d)) { + snprintf(buf, sizeof(buf), "FILE:%s/%s", + ccachedir, d->d_name); +@@ -652,6 +778,7 @@ gssd_setup_krb5_user_gss_ccache(uid_t ui + ccachedir, GSSD_DEFAULT_CRED_PREFIX, uid); + printerr(2, "using %s as credentials cache for client with " + "uid %u for server %s\n", buf, uid, servername); ++set_ccname: + gssd_set_krb5_ccache_name(buf); + } + +@@ -702,7 +829,7 @@ gssd_refresh_krb5_machine_creds(void) + goto out; + } + +- printerr(1, "Using keytab file '%s'\n", keytabfile); ++ printerr(2, "Using keytab file '%s'\n", keytabfile); + + if ((code = krb5_kt_resolve(context, keytabfile, &kt))) { + printerr(0, "ERROR: %s while resolving keytab '%s'\n", +@@ -717,12 +844,12 @@ gssd_refresh_krb5_machine_creds(void) + if (gssd_k5_kt_princ_list == NULL) { + printerr(0, "ERROR: No usable keytab entries found in " + "keytab '%s'\n", keytabfile); +- printerr(0, "Do you have a valid keytab entry for " +- "%s/<your.host>@<YOUR.REALM> in " ++ printerr(0, "You must have a valid keytab entry for " ++ "%s/<your.host>@<YOUR.REALM> on MDT nodes, " ++ "and %s@<YOUR.REALM> on client nodes, in " + "keytab file %s ?\n", +- GSSD_SERVICE_NAME, keytabfile); +- printerr(0, "Continuing without (machine) credentials " +- "- nfs4 mounts with Kerberos will fail\n"); ++ GSSD_SERVICE_MDS, LUSTRE_ROOT_NAME, ++ keytabfile); + } + } + +@@ -872,6 +999,7 @@ gssd_destroy_krb5_machine_creds(void) + krb5_free_context(context); + } + ++#if 0 + #ifdef HAVE_SET_ALLOWABLE_ENCTYPES + /* + * this routine obtains a credentials handle via gss_acquire_cred() +@@ -927,6 +1055,7 @@ limit_krb5_enctypes(struct rpc_gss_sec * + return 0; + } + #endif /* HAVE_SET_ALLOWABLE_ENCTYPES */ ++#endif + + /* + * Obtain supported enctypes from kernel. +diff -rup nfs-utils-1.0.10.orig/utils/gssd/krb5_util.h nfs-utils-1.0.10/utils/gssd/krb5_util.h +--- nfs-utils-1.0.10.orig/utils/gssd/krb5_util.h 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/krb5_util.h 2006-12-15 15:12:23.000000000 -0700 +@@ -10,13 +10,15 @@ + struct gssd_k5_kt_princ { + struct gssd_k5_kt_princ *next; + krb5_principal princ; ++ unsigned int fl_root:1, ++ fl_mds:1; + char *ccname; + char *realm; + krb5_timestamp endtime; + }; + + +-void gssd_setup_krb5_user_gss_ccache(uid_t uid, char *servername); ++void gssd_setup_krb5_user_gss_ccache(uint64_t pag, uid_t uid, char *servername); + int gssd_get_krb5_machine_cred_list(char ***list); + int gssd_refresh_krb5_machine_creds(void); + void gssd_free_krb5_machine_cred_list(char **list); +@@ -25,8 +27,4 @@ void gssd_destroy_krb5_machine_creds(voi + void gssd_obtain_kernel_krb5_info(void); + + +-#ifdef HAVE_SET_ALLOWABLE_ENCTYPES +-int limit_krb5_enctypes(struct rpc_gss_sec *sec, uid_t uid); +-#endif +- + #endif /* KRB5_UTIL_H */ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/lsupport.c nfs-utils-1.0.10/utils/gssd/lsupport.c +--- nfs-utils-1.0.10.orig/utils/gssd/lsupport.c 2006-11-15 21:41:25.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/lsupport.c 2006-12-15 15:12:23.000000000 -0700 +@@ -0,0 +1,782 @@ ++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- ++ * vim:expandtab:shiftwidth=8:tabstop=8: ++ * ++ * Copyright (c) 2005 Cluster File Systems, Inc. ++ * ++ * This file is part of Lustre, http://www.lustre.org. ++ * ++ * Lustre is free software; you can redistribute it and/or ++ * modify it under the terms of version 2 of the GNU General Public ++ * License as published by the Free Software Foundation. ++ * ++ * Lustre is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with Lustre; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE ++#endif ++#include "config.h" ++#include <sys/param.h> ++#include <sys/utsname.h> ++#include <sys/stat.h> ++#include <sys/socket.h> ++#include <arpa/inet.h> ++#include <sys/types.h> ++#include <sys/ipc.h> ++#include <sys/sem.h> ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <pwd.h> ++#include <grp.h> ++#include <string.h> ++#include <dirent.h> ++#include <poll.h> ++#include <fcntl.h> ++#include <signal.h> ++#include <unistd.h> ++#include <errno.h> ++#include <assert.h> ++#ifdef HAVE_GETHOSTBYNAME ++# include <netdb.h> ++#endif ++ ++#include "err_util.h" ++#include "gssd.h" ++#include "lsupport.h" ++ ++/**************************************** ++ * exclusive startup * ++ ****************************************/ ++ ++static struct __sem_s { ++ char *name; ++ key_t sem_key; ++ int sem_id; ++} sems[2] = { ++ [GSSD_CLI] = { "client", 0x3a92d473, 0 }, ++ [GSSD_SVC] = { "server", 0x3b92d473, 0 }, ++}; ++ ++void gssd_init_unique(int type) ++{ ++ struct __sem_s *sem = &sems[type]; ++ struct sembuf sembuf; ++ ++ assert(type == GSSD_CLI || type == GSSD_SVC); ++ ++again: ++ sem->sem_id = semget(sem->sem_key, 1, IPC_CREAT | IPC_EXCL | 0700); ++ if (sem->sem_id == -1) { ++ if (errno != EEXIST) { ++ printerr(0, "Create sem: %s\n", strerror(errno)); ++ exit(-1); ++ } ++ ++ /* already exist. Note there's still a small window racing ++ * with other processes, due to the stupid semaphore semantics. ++ */ ++ sem->sem_id = semget(sem->sem_key, 0, 0700); ++ if (sem->sem_id == -1) { ++ if (errno == ENOENT) { ++ printerr(0, "another instance just exit, " ++ "try again\n"); ++ goto again; ++ } ++ ++ printerr(0, "Obtain sem: %s\n", strerror(errno)); ++ exit(-1); ++ } ++ } else { ++ int val = 1; ++ ++ if (semctl(sem->sem_id, 0, SETVAL, val) == -1) { ++ printerr(0, "Initialize sem: %s\n", ++ strerror(errno)); ++ exit(-1); ++ } ++ } ++ ++ sembuf.sem_num = 0; ++ sembuf.sem_op = -1; ++ sembuf.sem_flg = IPC_NOWAIT | SEM_UNDO; ++ ++ if (semop(sem->sem_id, &sembuf, 1) != 0) { ++ if (errno == EAGAIN) { ++ printerr(0, "Another instance is running, exit\n"); ++ exit(0); ++ } ++ printerr(0, "Grab sem: %s\n", strerror(errno)); ++ exit(0); ++ } ++ ++ printerr(2, "Successfully created %s global identity\n", sem->name); ++} ++ ++void gssd_exit_unique(int type) ++{ ++ assert(type == GSSD_CLI || type == GSSD_SVC); ++ ++ /* ++ * do nothing. we can't remove the sem here, otherwise the race ++ * window would be much bigger. So it's sad we have to leave the ++ * sem in the system forever. ++ */ ++} ++ ++/**************************************** ++ * client side resolvation: * ++ * lnd/netid/nid => hostname * ++ ****************************************/ ++ ++char gethostname_ex[PATH_MAX] = GSSD_DEFAULT_GETHOSTNAME_EX; ++ ++typedef int lnd_nid2hostname_t(char *lnd, uint32_t net, uint32_t addr, ++ char *buf, int buflen); ++ ++/* FIXME what about IPv6? */ ++static ++int socklnd_nid2hostname(char *lnd, uint32_t net, uint32_t addr, ++ char *buf, int buflen) ++{ ++ struct hostent *ent; ++ ++ addr = htonl(addr); ++ ent = gethostbyaddr(&addr, sizeof(addr), AF_INET); ++ if (!ent) { ++ printerr(0, "%s: can't resolve 0x%x\n", lnd, addr); ++ return -1; ++ } ++ if (strlen(ent->h_name) >= buflen) { ++ printerr(0, "%s: name too long: %s\n", lnd, ent->h_name); ++ return -1; ++ } ++ strcpy(buf, ent->h_name); ++ ++ printerr(2, "%s: net 0x%x, addr 0x%x => %s\n", ++ lnd, net, addr, buf); ++ return 0; ++} ++ ++static ++int lolnd_nid2hostname(char *lnd, uint32_t net, uint32_t addr, ++ char *buf, int buflen) ++{ ++ struct utsname uts; ++ struct hostent *ent; ++ ++ if (addr) { ++ printerr(0, "%s: addr is 0x%x, we expect 0\n", lnd, addr); ++ return -1; ++ } ++ ++ if (uname(&uts)) { ++ printerr(0, "%s: failed obtain local machine name\n", lnd); ++ return -1; ++ } ++ ++ ent = gethostbyname(uts.nodename); ++ if (!ent) { ++ printerr(0, "%s: failed obtain canonical name of %s\n", ++ lnd, uts.nodename); ++ return -1; ++ } ++ ++ if (strlen(ent->h_name) >= buflen) { ++ printerr(0, "%s: name too long: %s\n", lnd, ent->h_name); ++ return -1; ++ } ++ strcpy(buf, ent->h_name); ++ ++ printerr(2, "%s: addr 0x%x => %s\n", lnd, addr, buf); ++ return 0; ++} ++ ++static int is_space(char c) ++{ ++ return (c == ' ' || c == '\t' || c == '\n'); ++} ++ ++static ++int external_nid2hostname(char *lnd, uint32_t net, uint32_t addr, ++ char *namebuf, int namebuflen) ++{ ++ const int bufsize = PATH_MAX + 256; ++ char buf[bufsize], *head, *tail; ++ FILE *fghn; ++ ++ sprintf(buf, "%s %s 0x%x 0x%x", gethostname_ex, lnd, net, addr); ++ printerr(2, "cmd: %s\n", buf); ++ ++ fghn = popen(buf, "r"); ++ if (fghn == NULL) { ++ printerr(0, "failed to call %s\n", gethostname_ex); ++ return -1; ++ } ++ ++ head = fgets(buf, bufsize, fghn); ++ if (head == NULL) { ++ printerr(0, "can't read from %s\n", gethostname_ex); ++ return -1; ++ } ++ if (pclose(fghn) == -1) ++ printerr(1, "pclose failed, continue\n"); ++ ++ /* trim head/tail space */ ++ while (is_space(*head)) ++ head++; ++ ++ tail = head + strlen(head); ++ if (tail <= head) { ++ printerr(0, "no output from %s\n", gethostname_ex); ++ return -1; ++ } ++ while (is_space(*(tail - 1))) ++ tail--; ++ if (tail <= head) { ++ printerr(0, "output are all space from %s\n", gethostname_ex); ++ return -1; ++ } ++ *tail = '\0'; ++ ++ /* start with '@' means error msg */ ++ if (head[0] == '@') { ++ printerr(0, "error from %s: %s\n", gethostname_ex, &head[1]); ++ return -1; ++ } ++ ++ if (tail - head > namebuflen) { ++ printerr(0, "external hostname too long: %s\n", head); ++ return -1; ++ } ++ ++ printerr(2, "%s: net 0x%x, addr 0x%x => %s\n", ++ lnd, net, addr, head); ++ strcpy(namebuf, head); ++ return 0; ++} ++ ++static struct { ++ char *name; ++ lnd_nid2hostname_t *nid2name; ++} converter[LND_ENUM_END_MARKER] = { ++ {"UNUSED0", NULL}, ++ [QSWLND] = { "QSWLND", external_nid2hostname}, ++ [SOCKLND] = { "SOCKLND", socklnd_nid2hostname}, ++ [GMLND] = { "GMLND", external_nid2hostname}, ++ [PTLLND] = { "PTLLND", external_nid2hostname }, ++ [O2IBLND] = { "O2IBLND", external_nid2hostname }, ++ [CIBLND] = { "CIBLND", external_nid2hostname }, ++ [OPENIBLND] = { "OPENIBLND",external_nid2hostname }, ++ [IIBLND] = { "IIBLND", external_nid2hostname }, ++ [LOLND] = { "LOLND", lolnd_nid2hostname }, ++ [RALND] = { "RALND", external_nid2hostname }, ++ [VIBLND] = { "VIBLND", external_nid2hostname }, ++}; ++ ++int lnet_nid2hostname(lnet_nid_t nid, char *buf, int buflen) ++{ ++ uint32_t lnd, net, addr; ++ ++ addr = LNET_NIDADDR(nid); ++ net = LNET_NIDNET(nid); ++ lnd = LNET_NETTYP(net); ++ ++ if (lnd >= LND_ENUM_END_MARKER) { ++ printerr(0, "ERROR: Unrecognized LND %u\n", lnd); ++ return -1; ++ } ++ ++ if (converter[lnd].nid2name == NULL) { ++ printerr(0, "ERROR: %s converter not ready\n", ++ converter[lnd].name); ++ return -1; ++ } ++ ++ return converter[lnd].nid2name(converter[lnd].name, net, addr, ++ buf, buflen); ++} ++ ++ ++/**************************************** ++ * lnet support routine * ++ * (from lnet/libcfs/nidstrings.c * ++ ****************************************/ ++ ++#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ ++ ++static int libcfs_lo_str2addr(char *str, int nob, uint32_t *addr); ++static void libcfs_ip_addr2str(uint32_t addr, char *str); ++static int libcfs_ip_str2addr(char *str, int nob, uint32_t *addr); ++static void libcfs_decnum_addr2str(uint32_t addr, char *str); ++static void libcfs_hexnum_addr2str(uint32_t addr, char *str); ++static int libcfs_num_str2addr(char *str, int nob, uint32_t *addr); ++ ++struct netstrfns { ++ int nf_type; ++ char *nf_name; ++ char *nf_modname; ++ void (*nf_addr2str)(uint32_t addr, char *str); ++ int (*nf_str2addr)(char *str, int nob, uint32_t *addr); ++}; ++ ++static struct netstrfns libcfs_netstrfns[] = { ++ {/* .nf_type */ LOLND, ++ /* .nf_name */ "lo", ++ /* .nf_modname */ "klolnd", ++ /* .nf_addr2str */ libcfs_decnum_addr2str, ++ /* .nf_str2addr */ libcfs_lo_str2addr}, ++ {/* .nf_type */ SOCKLND, ++ /* .nf_name */ "tcp", ++ /* .nf_modname */ "ksocklnd", ++ /* .nf_addr2str */ libcfs_ip_addr2str, ++ /* .nf_str2addr */ libcfs_ip_str2addr}, ++ {/* .nf_type */ O2IBLND, ++ /* .nf_name */ "o2ib", ++ /* .nf_modname */ "ko2iblnd", ++ /* .nf_addr2str */ libcfs_ip_addr2str, ++ /* .nf_str2addr */ libcfs_ip_str2addr}, ++ {/* .nf_type */ CIBLND, ++ /* .nf_name */ "cib", ++ /* .nf_modname */ "kciblnd", ++ /* .nf_addr2str */ libcfs_ip_addr2str, ++ /* .nf_str2addr */ libcfs_ip_str2addr}, ++ {/* .nf_type */ OPENIBLND, ++ /* .nf_name */ "openib", ++ /* .nf_modname */ "kopeniblnd", ++ /* .nf_addr2str */ libcfs_ip_addr2str, ++ /* .nf_str2addr */ libcfs_ip_str2addr}, ++ {/* .nf_type */ IIBLND, ++ /* .nf_name */ "iib", ++ /* .nf_modname */ "kiiblnd", ++ /* .nf_addr2str */ libcfs_ip_addr2str, ++ /* .nf_str2addr */ libcfs_ip_str2addr}, ++ {/* .nf_type */ VIBLND, ++ /* .nf_name */ "vib", ++ /* .nf_modname */ "kviblnd", ++ /* .nf_addr2str */ libcfs_ip_addr2str, ++ /* .nf_str2addr */ libcfs_ip_str2addr}, ++ {/* .nf_type */ RALND, ++ /* .nf_name */ "ra", ++ /* .nf_modname */ "kralnd", ++ /* .nf_addr2str */ libcfs_ip_addr2str, ++ /* .nf_str2addr */ libcfs_ip_str2addr}, ++ {/* .nf_type */ QSWLND, ++ /* .nf_name */ "elan", ++ /* .nf_modname */ "kqswlnd", ++ /* .nf_addr2str */ libcfs_decnum_addr2str, ++ /* .nf_str2addr */ libcfs_num_str2addr}, ++ {/* .nf_type */ GMLND, ++ /* .nf_name */ "gm", ++ /* .nf_modname */ "kgmlnd", ++ /* .nf_addr2str */ libcfs_hexnum_addr2str, ++ /* .nf_str2addr */ libcfs_num_str2addr}, ++ {/* .nf_type */ PTLLND, ++ /* .nf_name */ "ptl", ++ /* .nf_modname */ "kptllnd", ++ /* .nf_addr2str */ libcfs_decnum_addr2str, ++ /* .nf_str2addr */ libcfs_num_str2addr}, ++ /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */ ++ {/* .nf_type */ -1}, ++}; ++ ++const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]); ++ ++static int ++libcfs_lo_str2addr(char *str, int nob, uint32_t *addr) ++{ ++ *addr = 0; ++ return 1; ++} ++ ++static void ++libcfs_ip_addr2str(uint32_t addr, char *str) ++{ ++ snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u", ++ (addr >> 24) & 0xff, (addr >> 16) & 0xff, ++ (addr >> 8) & 0xff, addr & 0xff); ++} ++ ++/* CAVEAT EMPTOR XscanfX ++ * I use "%n" at the end of a sscanf format to detect trailing junk. However ++ * sscanf may return immediately if it sees the terminating '0' in a string, so ++ * I initialise the %n variable to the expected length. If sscanf sets it; ++ * fine, if it doesn't, then the scan ended at the end of the string, which is ++ * fine too :) */ ++ ++static int ++libcfs_ip_str2addr(char *str, int nob, uint32_t *addr) ++{ ++ int a; ++ int b; ++ int c; ++ int d; ++ int n = nob; /* XscanfX */ ++ ++ /* numeric IP? */ ++ if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && ++ n == nob && ++ (a & ~0xff) == 0 && (b & ~0xff) == 0 && ++ (c & ~0xff) == 0 && (d & ~0xff) == 0) { ++ *addr = ((a<<24)|(b<<16)|(c<<8)|d); ++ return 1; ++ } ++ ++#ifdef HAVE_GETHOSTBYNAME ++ /* known hostname? */ ++ if (('a' <= str[0] && str[0] <= 'z') || ++ ('A' <= str[0] && str[0] <= 'Z')) { ++ char *tmp; ++ ++ tmp = malloc(nob + 1); ++ if (tmp != NULL) { ++ struct hostent *he; ++ ++ memcpy(tmp, str, nob); ++ tmp[nob] = 0; ++ ++ he = gethostbyname(tmp); ++ ++ free(tmp); ++ tmp = NULL; ++ ++ if (he != NULL) { ++ uint32_t ip = *(uint32_t *)he->h_addr; ++ ++ *addr = ntohl(ip); ++ return 1; ++ } ++ } ++ } ++#endif ++ return 0; ++} ++ ++static void ++libcfs_decnum_addr2str(uint32_t addr, char *str) ++{ ++ snprintf(str, LNET_NIDSTR_SIZE, "%u", addr); ++} ++ ++static void ++libcfs_hexnum_addr2str(uint32_t addr, char *str) ++{ ++ snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr); ++} ++ ++static int ++libcfs_num_str2addr(char *str, int nob, uint32_t *addr) ++{ ++ int n; ++ ++ n = nob; ++ if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) ++ return 1; ++ ++ n = nob; ++ if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) ++ return 1; ++ ++ n = nob; ++ if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) ++ return 1; ++ ++ return 0; ++} ++ ++static struct netstrfns * ++libcfs_lnd2netstrfns(int lnd) ++{ ++ int i; ++ ++ if (lnd >= 0) ++ for (i = 0; i < libcfs_nnetstrfns; i++) ++ if (lnd == libcfs_netstrfns[i].nf_type) ++ return &libcfs_netstrfns[i]; ++ ++ return NULL; ++} ++ ++static struct netstrfns * ++libcfs_str2net_internal(char *str, uint32_t *net) ++{ ++ struct netstrfns *nf; ++ int nob; ++ int netnum; ++ int i; ++ ++ for (i = 0; i < libcfs_nnetstrfns; i++) { ++ nf = &libcfs_netstrfns[i]; ++ if (nf->nf_type >= 0 && ++ !strncmp(str, nf->nf_name, strlen(nf->nf_name))) ++ break; ++ } ++ ++ if (i == libcfs_nnetstrfns) ++ return NULL; ++ ++ nob = strlen(nf->nf_name); ++ ++ if (strlen(str) == (unsigned int)nob) { ++ netnum = 0; ++ } else { ++ if (nf->nf_type == LOLND) /* net number not allowed */ ++ return NULL; ++ ++ str += nob; ++ i = strlen(str); ++ if (sscanf(str, "%u%n", &netnum, &i) < 1 || ++ i != (int)strlen(str)) ++ return NULL; ++ } ++ ++ *net = LNET_MKNET(nf->nf_type, netnum); ++ return nf; ++} ++ ++lnet_nid_t ++libcfs_str2nid(char *str) ++{ ++ char *sep = strchr(str, '@'); ++ struct netstrfns *nf; ++ uint32_t net; ++ uint32_t addr; ++ ++ if (sep != NULL) { ++ nf = libcfs_str2net_internal(sep + 1, &net); ++ if (nf == NULL) ++ return LNET_NID_ANY; ++ } else { ++ sep = str + strlen(str); ++ net = LNET_MKNET(SOCKLND, 0); ++ nf = libcfs_lnd2netstrfns(SOCKLND); ++ if (!nf) ++ return LNET_NID_ANY; ++ } ++ ++ if (!nf->nf_str2addr(str, sep - str, &addr)) ++ return LNET_NID_ANY; ++ ++ return LNET_MKNID(net, addr); ++} ++ ++/**************************************** ++ * user mapping database handling * ++ * (very rudiment) * ++ ****************************************/ ++ ++#define MAPPING_GROW_SIZE 512 ++#define MAX_LINE_LEN 1024 ++ ++struct user_map_item { ++ char *principal; /* NULL means match all */ ++ lnet_nid_t nid; ++ uid_t uid; ++}; ++ ++struct user_mapping { ++ int size; ++ int nitems; ++ struct user_map_item *items; ++}; ++ ++static struct user_mapping mapping = {0, 0, NULL}; ++/* FIXME to be finished: monitor change of mapping database */ ++static int mapping_mtime = 0; ++ ++static ++void cleanup_mapping(void) ++{ ++ int n; ++ ++ for (n = 0; n < mapping.nitems; n++) { ++ if (mapping.items[n].principal) ++ free(mapping.items[n].principal); ++ } ++ mapping.nitems = 0; ++} ++ ++static ++int grow_mapping(int size) ++{ ++ struct user_map_item *new; ++ int newsize; ++ ++ if (size <= mapping.size) ++ return 0; ++ ++ newsize = mapping.size + MAPPING_GROW_SIZE; ++ while (newsize < size) ++ newsize += MAPPING_GROW_SIZE; ++ ++ new = malloc(newsize * sizeof(struct user_map_item)); ++ if (!new) { ++ printerr(0, "can't alloc mapping size %d\n", newsize); ++ return -1; ++ } ++ memcpy(new, mapping.items, mapping.nitems * sizeof(void*)); ++ free(mapping.items); ++ mapping.items = new; ++ mapping.size = newsize; ++ return 0; ++} ++ ++uid_t parse_uid(char *uidstr) ++{ ++ struct passwd *pw; ++ char *p = NULL; ++ long uid; ++ ++ pw = getpwnam(uidstr); ++ if (pw) ++ return pw->pw_uid; ++ ++ uid = strtol(uidstr, &p, 0); ++ if (*p == '\0') ++ return (uid_t) uid; ++ ++ return -1; ++} ++ ++static ++int read_mapping_db(void) ++{ ++ char princ[MAX_LINE_LEN]; ++ char nid_str[MAX_LINE_LEN]; ++ char dest[MAX_LINE_LEN]; ++ lnet_nid_t nid; ++ uid_t dest_uid; ++ FILE *f; ++ char *line, linebuf[MAX_LINE_LEN]; ++ ++ /* cleanup old mappings */ ++ cleanup_mapping(); ++ ++ f = fopen(MAPPING_DATABASE_FILE, "r"); ++ if (!f) { ++ printerr(0, "can't open mapping database: %s\n", ++ MAPPING_DATABASE_FILE); ++ return -1; ++ } ++ ++ while ((line = fgets(linebuf, MAX_LINE_LEN, f))) { ++ char *name; ++ ++ if (strlen(line) >= MAX_LINE_LEN) { ++ printerr(0, "invalid mapping db: line too long (%d)\n", ++ strlen(line)); ++ cleanup_mapping(); ++ fclose(f); ++ return -1; ++ } ++ if (sscanf(line, "%s %s %s", princ, nid_str, dest) != 3) { ++ printerr(0, "mapping db: syntax error\n"); ++ cleanup_mapping(); ++ fclose(f); ++ return -1; ++ } ++ if (grow_mapping(mapping.nitems + 1)) { ++ printerr(0, "fail to grow mapping to %d\n", ++ mapping.nitems + 1); ++ fclose(f); ++ return -1; ++ } ++ if (!strcmp(princ, "*")) { ++ name = NULL; ++ } else { ++ name = strdup(princ); ++ if (!name) { ++ printerr(0, "fail to dup str %s\n", princ); ++ fclose(f); ++ return -1; ++ } ++ } ++ if (!strcmp(nid_str, "*")) { ++ nid = LNET_NID_ANY; ++ } else { ++ nid = libcfs_str2nid(nid_str); ++ if (nid == LNET_NID_ANY) { ++ printerr(0, "fail to parse nid %s\n", nid_str); ++ fclose(f); ++ return -1; ++ } ++ } ++ dest_uid = parse_uid(dest); ++ if (dest_uid == -1) { ++ printerr(0, "no valid user: %s\n", dest); ++ free(name); ++ fclose(f); ++ return -1; ++ } ++ ++ mapping.items[mapping.nitems].principal = name; ++ mapping.items[mapping.nitems].nid = nid; ++ mapping.items[mapping.nitems].uid = dest_uid; ++ mapping.nitems++; ++ printerr(1, "add mapping: %s(%s/0x%llx) ==> %d\n", ++ name ? name : "*", nid_str, nid, dest_uid); ++ } ++ ++ return 0; ++} ++ ++static inline int mapping_changed(void) ++{ ++ struct stat st; ++ ++ if (stat(MAPPING_DATABASE_FILE, &st) == -1) { ++ /* stat failed, treat it like doesn't exist or be removed */ ++ if (mapping_mtime == 0) { ++ return 0; ++ } else { ++ printerr(0, "Warning: stat %s failed: %s\n", ++ MAPPING_DATABASE_FILE, strerror(errno)); ++ ++ mapping_mtime = 0; ++ return 1; ++ } ++ } ++ ++ if (st.st_mtime != mapping_mtime) { ++ mapping_mtime = st.st_mtime; ++ return 1; ++ } ++ ++ return 0; ++} ++ ++int lookup_mapping(char *princ, lnet_nid_t nid, uid_t *uid) ++{ ++ int n; ++ ++ /* FIXME race condition here */ ++ if (mapping_changed()) { ++ if (read_mapping_db()) ++ printerr(0, "all remote users will be denied\n"); ++ } ++ ++ for (n = 0; n < mapping.nitems; n++) { ++ struct user_map_item *entry = &mapping.items[n]; ++ ++ if (entry->nid != LNET_NID_ANY && entry->nid != nid) ++ continue; ++ if (!entry->principal || ++ !strcasecmp(entry->principal, princ)) { ++ printerr(1, "found mapping: %s ==> %d\n", ++ princ, entry->uid); ++ *uid = entry->uid; ++ return 0; ++ } ++ } ++ printerr(2, "no mapping for %s/%#Lx\n", princ, nid); ++ *uid = -1; ++ return -1; ++} ++ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/lsupport.h nfs-utils-1.0.10/utils/gssd/lsupport.h +--- nfs-utils-1.0.10.orig/utils/gssd/lsupport.h 2006-11-15 21:41:23.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/lsupport.h 2006-12-15 15:12:23.000000000 -0700 +@@ -0,0 +1,89 @@ ++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- ++ * vim:expandtab:shiftwidth=8:tabstop=8: ++ */ ++ ++#ifndef __LIBCFS_H__ ++#define __LIBCFS_H__ ++ ++#include <unistd.h> ++#include <stdint.h> ++ ++#define GSSD_CLI (0) ++#define GSSD_SVC (1) ++ ++void gssd_init_unique(int type); ++void gssd_exit_unique(int type); ++ ++/* ++ * copied from lustre source ++ */ ++ ++#define LUSTRE_GSS_SVC_MDS 0 ++#define LUSTRE_GSS_SVC_OSS 1 ++ ++struct lgssd_upcall_data { ++ uint32_t seq; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t svc; ++ uint64_t nid; ++ uint64_t pag; ++ char obd[64]; ++}; ++ ++#define GSSD_INTERFACE_VERSION (1) ++ ++struct lgssd_ioctl_param { ++ int version; /* in */ ++ char *uuid; /* in */ ++ int lustre_svc; /* in */ ++ uid_t uid; /* in */ ++ gid_t gid; /* in */ ++ long send_token_size;/* in */ ++ char *send_token; /* in */ ++ long reply_buf_size; /* in */ ++ char *reply_buf; /* in */ ++ long status; /* out */ ++ long reply_length; /* out */ ++}; ++ ++#define GSSD_DEFAULT_GETHOSTNAME_EX "/etc/lustre/nid2hostname" ++#define MAPPING_DATABASE_FILE "/etc/lustre/idmap.conf" ++ ++typedef uint64_t lnet_nid_t; ++typedef uint32_t lnet_netid_t; ++ ++#define LNET_NID_ANY ((lnet_nid_t) -1) ++#define LNET_PID_ANY ((lnet_pid_t) -1) ++ ++enum { ++ /* Only add to these values (i.e. don't ever change or redefine them): ++ * network addresses depend on them... */ ++ QSWLND = 1, ++ SOCKLND = 2, ++ GMLND = 3, ++ PTLLND = 4, ++ O2IBLND = 5, ++ CIBLND = 6, ++ OPENIBLND = 7, ++ IIBLND = 8, ++ LOLND = 9, ++ RALND = 10, ++ VIBLND = 11, ++ LND_ENUM_END_MARKER ++}; ++ ++int lnet_nid2hostname(lnet_nid_t nid, char *buf, int buflen); ++int lookup_mapping(char *princ, uint64_t nid, uid_t *uid); ++lnet_nid_t libcfs_str2nid(char *str); ++ ++/* how an LNET NID encodes net:address */ ++#define LNET_NIDADDR(nid) ((uint32_t)((nid) & 0xffffffff)) ++#define LNET_NIDNET(nid) ((uint32_t)(((nid) >> 32)) & 0xffffffff) ++#define LNET_MKNID(net,addr) ((((uint64_t)(net))<<32)|((uint64_t)(addr))) ++/* how net encodes type:number */ ++#define LNET_NETNUM(net) ((net) & 0xffff) ++#define LNET_NETTYP(net) (((net) >> 16) & 0xffff) ++#define LNET_MKNET(typ,num) ((((uint32_t)(typ))<<16)|((uint32_t)(num))) ++ ++#endif /* __LIBCFS_H__ */ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/Makefile.am nfs-utils-1.0.10/utils/gssd/Makefile.am +--- nfs-utils-1.0.10.orig/utils/gssd/Makefile.am 2006-11-15 21:26:08.000000000 -0700 ++++ nfs-utils-1.0.10/utils/gssd/Makefile.am 2006-12-15 15:11:52.000000000 -0700 +@@ -1,17 +1,11 @@ + ## Process this file with automake to produce Makefile.in + +-man8_MANS = gssd.man svcgssd.man +- +-RPCPREFIX = rpc. ++RPCPREFIX = + KPREFIX = @kprefix@ +-sbin_PREFIXED = gssd svcgssd +-sbin_PROGRAMS = $(sbin_PREFIXED) gss_clnt_send_err ++sbin_PREFIXED = lgssd lsvcgssd ++sbin_PROGRAMS = $(sbin_PREFIXED) + sbin_SCRIPTS = gss_destroy_creds + +-EXTRA_DIST = \ +- gss_destroy_creds \ +- $(man8_MANS) +- + COMMON_SRCS = \ + context.c \ + context_mit.c \ +@@ -21,13 +15,15 @@ COMMON_SRCS = \ + gss_util.c \ + gss_oids.c \ + err_util.c \ ++ lsupport.c \ + \ + context.h \ + err_util.h \ + gss_oids.h \ +- gss_util.h ++ gss_util.h \ ++ lsupport.h + +-gssd_SOURCES = \ ++lgssd_SOURCES = \ + $(COMMON_SRCS) \ + gssd.c \ + gssd_main_loop.c \ +@@ -38,13 +34,12 @@ gssd_SOURCES = \ + krb5_util.h \ + write_bytes.h + +-gssd_LDADD = $(RPCSECGSS_LIBS) $(KRBLIBS) +-gssd_LDFLAGS = $(KRBLDFLAGS) ++lgssd_LDADD = $(GSSAPI_LIBS) $(KRBLIBS) ++lgssd_LDFLAGS = $(KRBLDFLAGS) + +-gssd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) \ +- $(RPCSECGSS_CFLAGS) $(KRBCFLAGS) ++lgssd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) $(GSSAPI_CFLAGS) $(KRBCFLAGS) + +-svcgssd_SOURCES = \ ++lsvcgssd_SOURCES = \ + $(COMMON_SRCS) \ + cacheio.c \ + svcgssd.c \ +@@ -55,20 +50,11 @@ svcgssd_SOURCES = \ + cacheio.h \ + svcgssd.h + +-svcgssd_LDADD = \ +- ../../support/nfs/libnfs.a \ +- $(RPCSECGSS_LIBS) -lnfsidmap \ +- $(KRBLIBS) +- +-svcgssd_LDFLAGS = $(KRBLDFLAGS) +- +-svcgssd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) \ +- $(RPCSECGSS_CFLAGS) $(KRBCFLAGS) ++lsvcgssd_LDADD = $(GSSAPI_LIBS) $(KRBLIBS) + +-gss_clnt_send_err_SOURCES = gss_clnt_send_err.c ++lsvcgssd_LDFLAGS = $(KRBLDFLAGS) + +-gss_clnt_send_err_CFLAGS = $(AM_CFLAGS) $(CFLAGS) \ +- $(RPCSECGSS_CFLAGS) $(KRBCFLAGS) ++lsvcgssd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) $(GSSAPI_CFLAGS) $(KRBCFLAGS) + + MAINTAINERCLEANFILES = Makefile.in + +@@ -92,23 +78,3 @@ uninstall-hook: + done) + + +-# XXX This makes some assumptions about what automake does. +-# XXX But there is no install-man-hook or install-man-local. +-install-man: install-man8 install-man-links +-uninstall-man: uninstall-man8 uninstall-man-links +- +-install-man-links: +- (cd $(DESTDIR)$(man8dir) && \ +- for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ +- inst=`echo $$m | sed -e 's/man$$/8/'`; \ +- rm -f $(RPCPREFIX)$$inst ; \ +- $(LN_S) $$inst $(RPCPREFIX)$$inst ; \ +- done) +- +-uninstall-man-links: +- (cd $(DESTDIR)$(man8dir) && \ +- for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ +- inst=`echo $$m | sed -e 's/man$$/8/'`; \ +- rm -f $(RPCPREFIX)$$inst ; \ +- done) +- +diff -rup nfs-utils-1.0.10.orig/utils/gssd/svcgssd.c nfs-utils-1.0.10/utils/gssd/svcgssd.c +--- nfs-utils-1.0.10.orig/utils/gssd/svcgssd.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/svcgssd.c 2006-12-15 15:12:23.000000000 -0700 +@@ -43,7 +43,6 @@ + #include <sys/types.h> + #include <sys/stat.h> + #include <sys/socket.h> +-#include <rpc/rpc.h> + #include <fcntl.h> + #include <errno.h> + +@@ -54,11 +53,33 @@ + #include <stdlib.h> + #include <string.h> + #include <signal.h> +-#include "nfslib.h" ++#include <dirent.h> + #include "svcgssd.h" + #include "gss_util.h" + #include "err_util.h" ++#include "lsupport.h" + ++void ++closeall(int min) ++{ ++ DIR *dir = opendir("/proc/self/fd"); ++ if (dir != NULL) { ++ int dfd = dirfd(dir); ++ struct dirent *d; ++ ++ while ((d = readdir(dir)) != NULL) { ++ char *endp; ++ long n = strtol(d->d_name, &endp, 10); ++ if (*endp != '\0' && n >= min && n != dfd) ++ (void) close(n); ++ } ++ closedir(dir); ++ } else { ++ int fd = sysconf(_SC_OPEN_MAX); ++ while (--fd >= min) ++ (void) close(fd); ++ } ++} + /* + * mydaemon creates a pipe between the partent and child + * process. The parent process will wait until the +@@ -165,8 +186,8 @@ main(int argc, char *argv[]) + int get_creds = 1; + int fg = 0; + int verbosity = 0; +- int rpc_verbosity = 0; + int opt; ++ int must_srv_mds = 0, must_srv_oss = 0; + extern char *optarg; + char *progname; + +@@ -181,8 +202,13 @@ main(int argc, char *argv[]) + case 'v': + verbosity++; + break; +- case 'r': +- rpc_verbosity++; ++ case 'm': ++ get_creds = 1; ++ must_srv_mds = 1; ++ break; ++ case 'o': ++ get_creds = 1; ++ must_srv_oss = 1; + break; + default: + usage(argv[0]); +@@ -196,27 +222,18 @@ main(int argc, char *argv[]) + progname = argv[0]; + + initerr(progname, verbosity, fg); +-#ifdef HAVE_AUTHGSS_SET_DEBUG_LEVEL +- authgss_set_debug_level(rpc_verbosity); +-#else +- if (rpc_verbosity > 0) +- printerr(0, "Warning: rpcsec_gss library does not " +- "support setting debug level\n"); +-#endif + + if (gssd_check_mechs() != 0) { + printerr(0, "ERROR: Problem with gssapi library\n"); + exit(1); + } + +- if (!fg) +- mydaemon(0, 0); +- +- signal(SIGINT, sig_die); +- signal(SIGTERM, sig_die); +- signal(SIGHUP, sig_hup); ++ if (gssd_get_local_realm()) { ++ printerr(0, "ERROR: Can't get Local Kerberos realm\n"); ++ exit(1); ++ } + +- if (get_creds && !gssd_acquire_cred(GSSD_SERVICE_NAME)) { ++ if (get_creds && gssd_prepare_creds(must_srv_mds, must_srv_oss)) { + printerr(0, "unable to obtain root (machine) credentials\n"); + printerr(0, "do you have a keytab entry for " + "nfs/<your.host>@<YOUR.REALM> in " +@@ -225,9 +242,18 @@ main(int argc, char *argv[]) + } + + if (!fg) ++ mydaemon(0, 0); ++ ++ signal(SIGINT, sig_die); ++ signal(SIGTERM, sig_die); ++ signal(SIGHUP, sig_hup); ++ ++ if (!fg) + release_parent(); + +- gssd_run(); ++ gssd_init_unique(GSSD_SVC); ++ ++ svcgssd_run(); + printerr(0, "gssd_run returned!\n"); + abort(); + } +diff -rup nfs-utils-1.0.10.orig/utils/gssd/svcgssd.h nfs-utils-1.0.10/utils/gssd/svcgssd.h +--- nfs-utils-1.0.10.orig/utils/gssd/svcgssd.h 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/svcgssd.h 2006-12-15 15:12:23.000000000 -0700 +@@ -35,9 +35,20 @@ + #include <sys/queue.h> + #include <gssapi/gssapi.h> + +-void handle_nullreq(FILE *f); +-void gssd_run(void); ++int handle_nullreq(FILE *f); ++void svcgssd_run(void); ++int gssd_prepare_creds(int must_srv_mds, int must_srv_oss); ++gss_cred_id_t gssd_select_svc_cred(int lustre_svc); + +-#define GSSD_SERVICE_NAME "nfs" ++extern char *mds_local_realm; ++extern char *oss_local_realm; ++ ++#define GSSD_SERVICE_NAME "lustre" ++ ++/* XXX */ ++#define GSSD_SERVICE_MDS "lustre_mds" ++#define GSSD_SERVICE_OSS "lustre_oss" ++#define LUSTRE_ROOT_NAME "lustre_root" ++#define LUSTRE_ROOT_NAMELEN 11 + + #endif /* _RPC_SVCGSSD_H_ */ +diff -rup nfs-utils-1.0.10.orig/utils/gssd/svcgssd_main_loop.c nfs-utils-1.0.10/utils/gssd/svcgssd_main_loop.c +--- nfs-utils-1.0.10.orig/utils/gssd/svcgssd_main_loop.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/svcgssd_main_loop.c 2006-12-15 15:12:23.000000000 -0700 +@@ -46,46 +46,66 @@ + #include "svcgssd.h" + #include "err_util.h" + ++/* ++ * nfs4 in-kernel cache implementation make upcall failed directly ++ * if there's no listener detected. so here we should keep the init ++ * channel file open as possible as we can. ++ * ++ * unfortunately the proc doesn't support dir change notification. ++ * and when an entry get unlinked, we only got POLLIN event once, ++ * it's the only oppotunity we can close the file and startover. ++ */ + void +-gssd_run() ++svcgssd_run() + { + int ret; +- FILE *f; ++ FILE *f = NULL; + struct pollfd pollfd; ++ struct timespec halfsec = { .tv_sec = 0, .tv_nsec = 500000000 }; + +-#define NULLRPC_FILE "/proc/net/rpc/auth.rpcsec.init/channel" ++#define NULLRPC_FILE "/proc/net/rpc/auth.ptlrpcs.init/channel" + +- f = fopen(NULLRPC_FILE, "rw"); +- +- if (!f) { +- printerr(0, "failed to open %s: %s\n", +- NULLRPC_FILE, strerror(errno)); +- exit(1); +- } +- pollfd.fd = fileno(f); +- pollfd.events = POLLIN; + while (1) { + int save_err; + ++ while (f == NULL) { ++ f = fopen(NULLRPC_FILE, "rw"); ++ if (f == NULL) { ++ printerr(4, "failed to open %s: %s\n", ++ NULLRPC_FILE, strerror(errno)); ++ nanosleep(&halfsec, NULL); ++ } else { ++ printerr(1, "successfully open %s\n", ++ NULLRPC_FILE); ++ break; ++ } ++ } ++ pollfd.fd = fileno(f); ++ pollfd.events = POLLIN; ++ + pollfd.revents = 0; +- printerr(1, "entering poll\n"); +- ret = poll(&pollfd, 1, -1); ++ ret = poll(&pollfd, 1, 1000); + save_err = errno; +- printerr(1, "leaving poll\n"); ++ + if (ret < 0) { +- if (save_err != EINTR) +- printerr(0, "error return from poll: %s\n", +- strerror(save_err)); ++ printerr(0, "error return from poll: %s\n", ++ strerror(save_err)); ++ fclose(f); ++ f = NULL; + } else if (ret == 0) { +- /* timeout; shouldn't happen. */ ++ printerr(3, "poll timeout\n"); + } else { + if (ret != 1) { + printerr(0, "bug: unexpected poll return %d\n", + ret); + exit(1); + } +- if (pollfd.revents & POLLIN) +- handle_nullreq(f); ++ if (pollfd.revents & POLLIN) { ++ if (handle_nullreq(f) < 0) { ++ fclose(f); ++ f = NULL; ++ } ++ } + } + } + } +diff -rup nfs-utils-1.0.10.orig/utils/gssd/svcgssd_proc.c nfs-utils-1.0.10/utils/gssd/svcgssd_proc.c +--- nfs-utils-1.0.10.orig/utils/gssd/svcgssd_proc.c 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/gssd/svcgssd_proc.c 2006-12-15 15:12:23.000000000 -0700 +@@ -35,7 +35,6 @@ + + #include <sys/param.h> + #include <sys/stat.h> +-#include <rpc/rpc.h> + + #include <pwd.h> + #include <stdio.h> +@@ -44,25 +43,28 @@ + #include <string.h> + #include <fcntl.h> + #include <errno.h> +-#include <nfsidmap.h> ++#include <netdb.h> + + #include "svcgssd.h" + #include "gss_util.h" + #include "err_util.h" + #include "context.h" + #include "cacheio.h" ++#include "lsupport.h" + + extern char * mech2file(gss_OID mech); +-#define SVCGSSD_CONTEXT_CHANNEL "/proc/net/rpc/auth.rpcsec.context/channel" +-#define SVCGSSD_INIT_CHANNEL "/proc/net/rpc/auth.rpcsec.init/channel" ++#define SVCGSSD_CONTEXT_CHANNEL "/proc/net/rpc/auth.ptlrpcs.context/channel" ++#define SVCGSSD_INIT_CHANNEL "/proc/net/rpc/auth.ptlrpcs.init/channel" + + #define TOKEN_BUF_SIZE 8192 + + struct svc_cred { +- uid_t cr_uid; +- gid_t cr_gid; +- int cr_ngroups; +- gid_t cr_groups[NGROUPS]; ++ uint32_t cr_remote; ++ uint32_t cr_usr_root; ++ uint32_t cr_usr_mds; ++ uid_t cr_uid; ++ uid_t cr_mapped_uid; ++ uid_t cr_gid; + }; + + static int +@@ -70,10 +72,9 @@ do_svc_downcall(gss_buffer_desc *out_han + gss_OID mech, gss_buffer_desc *context_token) + { + FILE *f; +- int i; + char *fname = NULL; + +- printerr(1, "doing downcall\n"); ++ printerr(2, "doing downcall\n"); + if ((fname = mech2file(mech)) == NULL) + goto out_err; + f = fopen(SVCGSSD_CONTEXT_CHANNEL, "w"); +@@ -86,11 +87,12 @@ do_svc_downcall(gss_buffer_desc *out_han + qword_printhex(f, out_handle->value, out_handle->length); + /* XXX are types OK for the rest of this? */ + qword_printint(f, 0x7fffffff); /*XXX need a better timeout */ ++ qword_printint(f, cred->cr_remote); ++ qword_printint(f, cred->cr_usr_root); ++ qword_printint(f, cred->cr_usr_mds); ++ qword_printint(f, cred->cr_mapped_uid); + qword_printint(f, cred->cr_uid); + qword_printint(f, cred->cr_gid); +- qword_printint(f, cred->cr_ngroups); +- for (i=0; i < cred->cr_ngroups; i++) +- qword_printint(f, cred->cr_groups[i]); + qword_print(f, fname); + qword_printhex(f, context_token->value, context_token->length); + qword_eol(f); +@@ -119,7 +121,7 @@ send_response(FILE *f, gss_buffer_desc * + /* XXXARG: */ + int g; + +- printerr(1, "sending null reply\n"); ++ printerr(2, "sending null reply\n"); + + qword_addhex(&bp, &blen, in_handle->value, in_handle->length); + qword_addhex(&bp, &blen, in_token->value, in_token->length); +@@ -159,6 +161,7 @@ send_response(FILE *f, gss_buffer_desc * + #define rpcsec_gsserr_credproblem 13 + #define rpcsec_gsserr_ctxproblem 14 + ++#if 0 + static void + add_supplementary_groups(char *secname, char *name, struct svc_cred *cred) + { +@@ -182,7 +185,9 @@ add_supplementary_groups(char *secname, + } + } + } ++#endif + ++#if 0 + static int + get_ids(gss_name_t client_name, gss_OID mech, struct svc_cred *cred) + { +@@ -248,7 +253,9 @@ out_free: + out: + return res; + } ++#endif + ++#if 0 + void + print_hexl(int pri, unsigned char *cp, int length) + { +@@ -285,12 +292,121 @@ print_hexl(int pri, unsigned char *cp, i + printerr(pri,"\n"); + } + } ++#endif + +-void ++static int ++get_ids(gss_name_t client_name, gss_OID mech, struct svc_cred *cred, ++ lnet_nid_t nid, uint32_t lustre_svc) ++{ ++ u_int32_t maj_stat, min_stat; ++ gss_buffer_desc name; ++ char *sname, *realm, *slash; ++ int res = -1; ++ gss_OID name_type = GSS_C_NO_OID; ++ struct passwd *pw; ++ ++ cred->cr_remote = cred->cr_usr_root = cred->cr_usr_mds = 0; ++ cred->cr_uid = cred->cr_mapped_uid = cred->cr_gid = -1; ++ ++ maj_stat = gss_display_name(&min_stat, client_name, &name, &name_type); ++ if (maj_stat != GSS_S_COMPLETE) { ++ pgsserr("get_ids: gss_display_name", ++ maj_stat, min_stat, mech); ++ return -1; ++ } ++ if (name.length >= 0xffff || /* be certain name.length+1 doesn't overflow */ ++ !(sname = calloc(name.length + 1, 1))) { ++ printerr(0, "WARNING: get_ids: error allocating %d bytes " ++ "for sname\n", name.length + 1); ++ gss_release_buffer(&min_stat, &name); ++ return -1; ++ } ++ memcpy(sname, name.value, name.length); ++ printerr(1, "authenticated %s from %016llx\n", sname, nid); ++ gss_release_buffer(&min_stat, &name); ++ ++ if (lustre_svc == LUSTRE_GSS_SVC_MDS) ++ lookup_mapping(sname, nid, &cred->cr_mapped_uid); ++ else ++ cred->cr_mapped_uid = -1; ++ ++ realm = strchr(sname, '@'); ++ if (!realm) { ++ printerr(0, "WARNNING: principal %s contains no realm name\n", ++ sname); ++ cred->cr_remote = (mds_local_realm != NULL); ++ } else { ++ *realm++ = '\0'; ++ if (!mds_local_realm) ++ cred->cr_remote = 1; ++ else ++ cred->cr_remote = ++ (strcasecmp(mds_local_realm, realm) != 0); ++ } ++ ++ if (cred->cr_remote) { ++ if (cred->cr_mapped_uid != -1) ++ res = 0; ++ else if (lustre_svc == LUSTRE_GSS_SVC_OSS && ++ strcmp(sname, "lustre_root") == 0) ++ res = 0; ++ else ++ printerr(0, "principal %s is remote without mapping\n", ++ sname); ++ goto out_free; ++ } ++ ++ slash = strchr(sname, '/'); ++ if (slash) ++ *slash = '\0'; ++ ++ if (!(pw = getpwnam(sname))) { ++ /* If client use machine credential, we map it to root, which ++ * will subject to further mapping by root-squash in kernel. ++ * ++ * MDS service keytab is treated as special user, also mapped ++ * to root. OSS service keytab can't be used as a user. ++ */ ++ if (!strcmp(sname, LUSTRE_ROOT_NAME)) { ++ printerr(2, "lustre_root principal, resolve to uid 0\n"); ++ cred->cr_uid = 0; ++ cred->cr_usr_root = 1; ++ } else if (!strcmp(sname, GSSD_SERVICE_MDS)) { ++ printerr(2, "mds service principal, resolve to uid 0\n"); ++ cred->cr_uid = 0; ++ cred->cr_usr_mds = 1; ++ } else { ++ cred->cr_uid = -1; ++ if (cred->cr_mapped_uid == -1) { ++ printerr(0, "invalid user %s\n", sname); ++ goto out_free; ++ } ++ printerr(2, "user %s mapped to %u\n", ++ sname, cred->cr_mapped_uid); ++ } ++ } else { ++ cred->cr_uid = pw->pw_uid; ++ printerr(2, "%s resolve to uid %u\n", sname, cred->cr_uid); ++ } ++ ++ res = 0; ++out_free: ++ free(sname); ++ return res; ++} ++ ++typedef struct gss_union_ctx_id_t { ++ gss_OID mech_type; ++ gss_ctx_id_t internal_ctx_id; ++} gss_union_ctx_id_desc, *gss_union_ctx_id_t; ++ ++/* ++ * return -1 only if we detect error during reading from upcall channel, ++ * all other cases return 0. ++ */ ++int + handle_nullreq(FILE *f) { +- /* XXX initialize to a random integer to reduce chances of unnecessary +- * invalidation of existing ctx's on restarting svcgssd. */ +- static u_int32_t handle_seq = 0; ++ uint64_t handle_seq; + char in_tok_buf[TOKEN_BUF_SIZE]; + char in_handle_buf[15]; + char out_handle_buf[15]; +@@ -302,10 +418,13 @@ handle_nullreq(FILE *f) { + ignore_out_tok = {.value = NULL}, + /* XXX isn't there a define for this?: */ + null_token = {.value = NULL}; ++ uint32_t lustre_svc; ++ lnet_nid_t nid; + u_int32_t ret_flags; + gss_ctx_id_t ctx = GSS_C_NO_CONTEXT; + gss_name_t client_name; + gss_OID mech = GSS_C_NO_OID; ++ gss_cred_id_t svc_cred; + u_int32_t maj_stat = GSS_S_FAILURE, min_stat = 0; + u_int32_t ignore_min_stat; + struct svc_cred cred; +@@ -313,25 +432,31 @@ handle_nullreq(FILE *f) { + static int lbuflen = 0; + static char *cp; + +- printerr(1, "handling null request\n"); ++ printerr(2, "handling null request\n"); + + if (readline(fileno(f), &lbuf, &lbuflen) != 1) { + printerr(0, "WARNING: handle_nullreq: " + "failed reading request\n"); +- return; ++ return -1; + } + + cp = lbuf; + ++ qword_get(&cp, (char *) &lustre_svc, sizeof(lustre_svc)); ++ qword_get(&cp, (char *) &nid, sizeof(nid)); ++ qword_get(&cp, (char *) &handle_seq, sizeof(handle_seq)); ++ printerr(1, "handling req: svc %u, nid %016llx, idx %llx\n", ++ lustre_svc, nid, handle_seq); ++ + in_handle.length = (size_t) qword_get(&cp, in_handle.value, + sizeof(in_handle_buf)); +- printerr(2, "in_handle: \n"); +- print_hexl(2, in_handle.value, in_handle.length); ++ printerr(3, "in_handle: \n"); ++ print_hexl(3, in_handle.value, in_handle.length); + + in_tok.length = (size_t) qword_get(&cp, in_tok.value, + sizeof(in_tok_buf)); +- printerr(2, "in_tok: \n"); +- print_hexl(2, in_tok.value, in_tok.length); ++ printerr(3, "in_tok: \n"); ++ print_hexl(3, in_tok.value, in_tok.length); + + if (in_tok.length < 0) { + printerr(0, "WARNING: handle_nullreq: " +@@ -351,7 +476,13 @@ handle_nullreq(FILE *f) { + memcpy(&ctx, in_handle.value, in_handle.length); + } + +- maj_stat = gss_accept_sec_context(&min_stat, &ctx, gssd_creds, ++ svc_cred = gssd_select_svc_cred(lustre_svc); ++ if (!svc_cred) { ++ printerr(0, "no service credential for svc %u\n", lustre_svc); ++ goto out_err; ++ } ++ ++ maj_stat = gss_accept_sec_context(&min_stat, &ctx, svc_cred, + &in_tok, GSS_C_NO_CHANNEL_BINDINGS, &client_name, + &mech, &out_tok, &ret_flags, NULL, NULL); + +@@ -369,7 +500,8 @@ handle_nullreq(FILE *f) { + maj_stat, min_stat, mech); + goto out_err; + } +- if (get_ids(client_name, mech, &cred)) { ++ ++ if (get_ids(client_name, mech, &cred, nid, lustre_svc)) { + /* get_ids() prints error msg */ + maj_stat = GSS_S_BAD_NAME; /* XXX ? */ + gss_release_name(&ignore_min_stat, &client_name); +@@ -377,10 +509,8 @@ handle_nullreq(FILE *f) { + } + gss_release_name(&ignore_min_stat, &client_name); + +- + /* Context complete. Pass handle_seq in out_handle to use + * for context lookup in the kernel. */ +- handle_seq++; + out_handle.length = sizeof(handle_seq); + memcpy(out_handle.value, &handle_seq, sizeof(handle_seq)); + +@@ -404,8 +534,7 @@ out: + free(ctx_token.value); + if (out_tok.value != NULL) + gss_release_buffer(&ignore_min_stat, &out_tok); +- printerr(1, "finished handling null request\n"); +- return; ++ return 0; + + out_err: + if (ctx != GSS_C_NO_CONTEXT) +diff -rup nfs-utils-1.0.10.orig/utils/Makefile.am nfs-utils-1.0.10/utils/Makefile.am +--- nfs-utils-1.0.10.orig/utils/Makefile.am 2006-08-07 00:40:50.000000000 -0600 ++++ nfs-utils-1.0.10/utils/Makefile.am 2006-12-15 15:11:52.000000000 -0700 +@@ -2,31 +2,6 @@ + + OPTDIRS = + +-if CONFIG_RQUOTAD +-OPTDIRS += rquotad +-endif +- +-if CONFIG_NFSV4 +-OPTDIRS += idmapd +-endif +- +-if CONFIG_GSS +-OPTDIRS += gssd +-endif +- +-if CONFIG_MOUNT +-OPTDIRS += mount +-endif +- +-SUBDIRS = \ +- exportfs \ +- lockd \ +- mountd \ +- nfsd \ +- nfsstat \ +- nhfsstone \ +- showmount \ +- statd \ +- $(OPTDIRS) ++SUBDIRS = gssd + + MAINTAINERCLEANFILES = Makefile.in diff --git a/lustre/utils/gss/svcgssd.c b/lustre/utils/gss/svcgssd.c new file mode 100644 index 0000000..f3b9681 --- /dev/null +++ b/lustre/utils/gss/svcgssd.c @@ -0,0 +1,265 @@ +/* + gssd.c + + Copyright (c) 2000 The Regents of the University of Michigan. + All rights reserved. + + Copyright (c) 2000 Dug Song <dugsong@UMICH.EDU>. + Copyright (c) 2002 Andy Adamson <andros@UMICH.EDU>. + Copyright (c) 2002 Marius Aamodt Eriksen <marius@UMICH.EDU>. + Copyright (c) 2002 J. Bruce Fields <bfields@UMICH.EDU>. + All rights reserved, all wrongs reversed. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "config.h" + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <fcntl.h> +#include <errno.h> + + +#include <unistd.h> +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <dirent.h> +#include "svcgssd.h" +#include "gss_util.h" +#include "err_util.h" +#include "lsupport.h" + +void +closeall(int min) +{ + DIR *dir = opendir("/proc/self/fd"); + if (dir != NULL) { + int dfd = dirfd(dir); + struct dirent *d; + + while ((d = readdir(dir)) != NULL) { + char *endp; + long n = strtol(d->d_name, &endp, 10); + if (*endp != '\0' && n >= min && n != dfd) + (void) close(n); + } + closedir(dir); + } else { + int fd = sysconf(_SC_OPEN_MAX); + while (--fd >= min) + (void) close(fd); + } +} +/* + * mydaemon creates a pipe between the partent and child + * process. The parent process will wait until the + * child dies or writes a '1' on the pipe signaling + * that it started successfully. + */ +int pipefds[2] = { -1, -1}; + +static void +mydaemon(int nochdir, int noclose) +{ + int pid, status, tempfd; + + if (pipe(pipefds) < 0) { + printerr(1, "mydaemon: pipe() failed: errno %d (%s)\n", + errno, strerror(errno)); + exit(1); + } + if ((pid = fork ()) < 0) { + printerr(1, "mydaemon: fork() failed: errno %d (%s)\n", + errno, strerror(errno)); + exit(1); + } + + if (pid != 0) { + /* + * Parent. Wait for status from child. + */ + close(pipefds[1]); + if (read(pipefds[0], &status, 1) != 1) + exit(1); + exit (0); + } + /* Child. */ + close(pipefds[0]); + setsid (); + if (nochdir == 0) { + if (chdir ("/") == -1) { + printerr(1, "mydaemon: chdir() failed: errno %d (%s)\n", + errno, strerror(errno)); + exit(1); + } + } + + while (pipefds[1] <= 2) { + pipefds[1] = dup(pipefds[1]); + if (pipefds[1] < 0) { + printerr(1, "mydaemon: dup() failed: errno %d (%s)\n", + errno, strerror(errno)); + exit(1); + } + } + + if (noclose == 0) { + tempfd = open("/dev/null", O_RDWR); + dup2(tempfd, 0); + dup2(tempfd, 1); + dup2(tempfd, 2); + closeall(3); + } + + return; +} + +static void +release_parent() +{ + int status; + + if (pipefds[1] > 0) { + write(pipefds[1], &status, 1); + close(pipefds[1]); + pipefds[1] = -1; + } +} + +void +sig_die(int signal) +{ + /* destroy krb5 machine creds */ + cleanup_mapping(); + printerr(1, "exiting on signal %d\n", signal); + exit(1); +} + +void +sig_hup(int signal) +{ + /* don't exit on SIGHUP */ + printerr(1, "Received SIGHUP... Ignoring.\n"); + return; +} + +static void +usage(char *progname) +{ + fprintf(stderr, "usage: %s [-n] [-f] [-v] [-r]\n", + progname); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + int get_creds = 1; + int fg = 0; + int verbosity = 0; + int opt; + int must_srv_mds = 0, must_srv_oss = 0; + extern char *optarg; + char *progname; + + while ((opt = getopt(argc, argv, "fvrnp:")) != -1) { + switch (opt) { + case 'f': + fg = 1; + break; + case 'n': + get_creds = 0; + break; + case 'v': + verbosity++; + break; + case 'm': + get_creds = 1; + must_srv_mds = 1; + break; + case 'o': + get_creds = 1; + must_srv_oss = 1; + break; + default: + usage(argv[0]); + break; + } + } + + if ((progname = strrchr(argv[0], '/'))) + progname++; + else + progname = argv[0]; + + initerr(progname, verbosity, fg); + + if (gssd_check_mechs() != 0) { + printerr(0, "ERROR: Problem with gssapi library\n"); + exit(1); + } + + if (gssd_get_local_realm()) { + printerr(0, "ERROR: Can't get Local Kerberos realm\n"); + exit(1); + } + + if (get_creds && gssd_prepare_creds(must_srv_mds, must_srv_oss)) { + printerr(0, "unable to obtain root (machine) credentials\n"); + printerr(0, "do you have a keytab entry for " + "nfs/<your.host>@<YOUR.REALM> in " + "/etc/krb5.keytab?\n"); + exit(1); + } + + if (!fg) + mydaemon(0, 0); + + /* + * XXX: There is risk of memory leak for missing call + * cleanup_mapping() for SIGKILL and SIGSTOP. + */ + signal(SIGINT, sig_die); + signal(SIGTERM, sig_die); + signal(SIGHUP, sig_hup); + + if (!fg) + release_parent(); + + gssd_init_unique(GSSD_SVC); + + svcgssd_run(); + cleanup_mapping(); + printerr(0, "gssd_run returned!\n"); + abort(); +} diff --git a/lustre/utils/gss/svcgssd.h b/lustre/utils/gss/svcgssd.h new file mode 100644 index 0000000..5283c95 --- /dev/null +++ b/lustre/utils/gss/svcgssd.h @@ -0,0 +1,54 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RPC_SVCGSSD_H_ +#define _RPC_SVCGSSD_H_ + +#include <sys/types.h> +#include <sys/queue.h> +#include <gssapi/gssapi.h> + +int handle_nullreq(FILE *f); +void svcgssd_run(void); +int gssd_prepare_creds(int must_srv_mds, int must_srv_oss); +gss_cred_id_t gssd_select_svc_cred(int lustre_svc); + +extern char *mds_local_realm; +extern char *oss_local_realm; + +#define GSSD_SERVICE_NAME "lustre" + +/* XXX */ +#define GSSD_SERVICE_MDS "lustre_mds" +#define GSSD_SERVICE_OSS "lustre_oss" +#define LUSTRE_ROOT_NAME "lustre_root" +#define LUSTRE_ROOT_NAMELEN 11 + +#endif /* _RPC_SVCGSSD_H_ */ diff --git a/lustre/utils/gss/svcgssd_main_loop.c b/lustre/utils/gss/svcgssd_main_loop.c new file mode 100644 index 0000000..3132829 --- /dev/null +++ b/lustre/utils/gss/svcgssd_main_loop.c @@ -0,0 +1,111 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <netinet/in.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <memory.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> + +#include "svcgssd.h" +#include "err_util.h" + +/* + * nfs4 in-kernel cache implementation make upcall failed directly + * if there's no listener detected. so here we should keep the init + * channel file open as possible as we can. + * + * unfortunately the proc doesn't support dir change notification. + * and when an entry get unlinked, we only got POLLIN event once, + * it's the only oppotunity we can close the file and startover. + */ +void +svcgssd_run() +{ + int ret; + FILE *f = NULL; + struct pollfd pollfd; + struct timespec halfsec = { .tv_sec = 0, .tv_nsec = 500000000 }; + +#define NULLRPC_FILE "/proc/net/rpc/auth.ptlrpcs.init/channel" + + while (1) { + int save_err; + + while (f == NULL) { + f = fopen(NULLRPC_FILE, "rw"); + if (f == NULL) { + printerr(4, "failed to open %s: %s\n", + NULLRPC_FILE, strerror(errno)); + nanosleep(&halfsec, NULL); + } else { + printerr(1, "successfully open %s\n", + NULLRPC_FILE); + break; + } + } + pollfd.fd = fileno(f); + pollfd.events = POLLIN; + + pollfd.revents = 0; + ret = poll(&pollfd, 1, 1000); + save_err = errno; + + if (ret < 0) { + printerr(0, "error return from poll: %s\n", + strerror(save_err)); + fclose(f); + f = NULL; + } else if (ret == 0) { + printerr(3, "poll timeout\n"); + } else { + if (ret != 1) { + printerr(0, "bug: unexpected poll return %d\n", + ret); + exit(1); + } + if (pollfd.revents & POLLIN) { + if (handle_nullreq(f) < 0) { + fclose(f); + f = NULL; + } + } + } + } +} diff --git a/lustre/utils/gss/svcgssd_mech2file.c b/lustre/utils/gss/svcgssd_mech2file.c new file mode 100644 index 0000000..22c3ed8 --- /dev/null +++ b/lustre/utils/gss/svcgssd_mech2file.c @@ -0,0 +1,73 @@ +/* + linux_downcall.c + + Copyright (c) 2000 The Regents of the University of Michigan. + All rights reserved. + + Copyright (c) 2004 Andy Adamson <andros@UMICH.EDU>. + All rights reserved, all wrongs reversed. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "config.h" + +#include <gssapi/gssapi.h> +#include <string.h> + + +#define g_OID_equal(o1,o2) \ + (((o1)->length == (o2)->length) && \ + (memcmp((o1)->elements,(o2)->elements,(int) (o1)->length) == 0)) + +struct mech2file { + gss_OID_desc mech; + char filename[8]; +}; + +struct mech2file m2f[] = { + {{9, "\052\206\110\206\367\022\001\002\002"}, "krb5"}, + {{7, "\053\006\001\005\005\001\003"}, "spkm3"}, + {{7, "\053\006\001\005\005\001\009"}, "lipkey"}, + {{0,0},""}, +}; + +/* + * Find the Linux svcgssd downcall file name given the mechanism + */ +char * +mech2file(gss_OID mech) +{ + struct mech2file *m2fp = m2f; + + while(m2fp->mech.length != 0) { + if (g_OID_equal(mech,&m2fp->mech)) + return(m2fp->filename); + m2fp++; + } + return NULL; +} diff --git a/lustre/utils/gss/svcgssd_proc.c b/lustre/utils/gss/svcgssd_proc.c new file mode 100644 index 0000000..7507627 --- /dev/null +++ b/lustre/utils/gss/svcgssd_proc.c @@ -0,0 +1,545 @@ +/* + svc_in_gssd_proc.c + + Copyright (c) 2000 The Regents of the University of Michigan. + All rights reserved. + + Copyright (c) 2002 Bruce Fields <bfields@UMICH.EDU> + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <pwd.h> +#include <stdio.h> +#include <unistd.h> +#include <ctype.h> +#include <string.h> +#include <fcntl.h> +#include <errno.h> +#include <netdb.h> + +#include "svcgssd.h" +#include "gss_util.h" +#include "err_util.h" +#include "context.h" +#include "cacheio.h" +#include "lsupport.h" + +extern char * mech2file(gss_OID mech); +#define SVCGSSD_CONTEXT_CHANNEL "/proc/net/rpc/auth.ptlrpcs.context/channel" +#define SVCGSSD_INIT_CHANNEL "/proc/net/rpc/auth.ptlrpcs.init/channel" + +#define TOKEN_BUF_SIZE 8192 + +struct svc_cred { + uint32_t cr_remote; + uint32_t cr_usr_root; + uint32_t cr_usr_mds; + uid_t cr_uid; + uid_t cr_mapped_uid; + uid_t cr_gid; +}; + +static int +do_svc_downcall(gss_buffer_desc *out_handle, struct svc_cred *cred, + gss_OID mech, gss_buffer_desc *context_token) +{ + FILE *f; + char *fname = NULL; + + printerr(2, "doing downcall\n"); + if ((fname = mech2file(mech)) == NULL) + goto out_err; + f = fopen(SVCGSSD_CONTEXT_CHANNEL, "w"); + if (f == NULL) { + printerr(0, "WARNING: unable to open downcall channel " + "%s: %s\n", + SVCGSSD_CONTEXT_CHANNEL, strerror(errno)); + goto out_err; + } + qword_printhex(f, out_handle->value, out_handle->length); + /* XXX are types OK for the rest of this? */ + qword_printint(f, 0x7fffffff); /*XXX need a better timeout */ + qword_printint(f, cred->cr_remote); + qword_printint(f, cred->cr_usr_root); + qword_printint(f, cred->cr_usr_mds); + qword_printint(f, cred->cr_mapped_uid); + qword_printint(f, cred->cr_uid); + qword_printint(f, cred->cr_gid); + qword_print(f, fname); + qword_printhex(f, context_token->value, context_token->length); + qword_eol(f); + fclose(f); + return 0; +out_err: + printerr(0, "WARNING: downcall failed\n"); + return -1; +} + +struct gss_verifier { + u_int32_t flav; + gss_buffer_desc body; +}; + +#define RPCSEC_GSS_SEQ_WIN 5 + +static int +send_response(FILE *f, gss_buffer_desc *in_handle, gss_buffer_desc *in_token, + u_int32_t maj_stat, u_int32_t min_stat, + gss_buffer_desc *out_handle, gss_buffer_desc *out_token) +{ + char buf[2 * TOKEN_BUF_SIZE]; + char *bp = buf; + int blen = sizeof(buf); + /* XXXARG: */ + int g; + + printerr(2, "sending null reply\n"); + + qword_addhex(&bp, &blen, in_handle->value, in_handle->length); + qword_addhex(&bp, &blen, in_token->value, in_token->length); + qword_addint(&bp, &blen, 0x7fffffff); /*XXX need a better timeout */ + qword_addint(&bp, &blen, maj_stat); + qword_addint(&bp, &blen, min_stat); + qword_addhex(&bp, &blen, out_handle->value, out_handle->length); + qword_addhex(&bp, &blen, out_token->value, out_token->length); + qword_addeol(&bp, &blen); + if (blen <= 0) { + printerr(0, "WARNING: send_respsonse: message too long\n"); + return -1; + } + g = open(SVCGSSD_INIT_CHANNEL, O_WRONLY); + if (g == -1) { + printerr(0, "WARNING: open %s failed: %s\n", + SVCGSSD_INIT_CHANNEL, strerror(errno)); + return -1; + } + *bp = '\0'; + printerr(3, "writing message: %s", buf); + if (write(g, buf, bp - buf) == -1) { + printerr(0, "WARNING: failed to write message\n"); + close(g); + return -1; + } + close(g); + return 0; +} + +#define rpc_auth_ok 0 +#define rpc_autherr_badcred 1 +#define rpc_autherr_rejectedcred 2 +#define rpc_autherr_badverf 3 +#define rpc_autherr_rejectedverf 4 +#define rpc_autherr_tooweak 5 +#define rpcsec_gsserr_credproblem 13 +#define rpcsec_gsserr_ctxproblem 14 + +#if 0 +static void +add_supplementary_groups(char *secname, char *name, struct svc_cred *cred) +{ + int ret; + static gid_t *groups = NULL; + + cred->cr_ngroups = NGROUPS; + ret = nfs4_gss_princ_to_grouplist(secname, name, + cred->cr_groups, &cred->cr_ngroups); + if (ret < 0) { + groups = realloc(groups, cred->cr_ngroups*sizeof(gid_t)); + ret = nfs4_gss_princ_to_grouplist(secname, name, + groups, &cred->cr_ngroups); + if (ret < 0) + cred->cr_ngroups = 0; + else { + if (cred->cr_ngroups > NGROUPS) + cred->cr_ngroups = NGROUPS; + memcpy(cred->cr_groups, groups, + cred->cr_ngroups*sizeof(gid_t)); + } + } +} +#endif + +#if 0 +static int +get_ids(gss_name_t client_name, gss_OID mech, struct svc_cred *cred) +{ + u_int32_t maj_stat, min_stat; + gss_buffer_desc name; + char *sname; + int res = -1; + uid_t uid, gid; + gss_OID name_type = GSS_C_NO_OID; + char *secname; + + maj_stat = gss_display_name(&min_stat, client_name, &name, &name_type); + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("get_ids: gss_display_name", + maj_stat, min_stat, mech); + goto out; + } + if (name.length >= 0xffff || /* be certain name.length+1 doesn't overflow */ + !(sname = calloc(name.length + 1, 1))) { + printerr(0, "WARNING: get_ids: error allocating %d bytes " + "for sname\n", name.length + 1); + gss_release_buffer(&min_stat, &name); + goto out; + } + memcpy(sname, name.value, name.length); + printerr(1, "sname = %s\n", sname); + gss_release_buffer(&min_stat, &name); + + res = -EINVAL; + if ((secname = mech2file(mech)) == NULL) { + printerr(0, "WARNING: get_ids: error mapping mech to " + "file for name '%s'\n", sname); + goto out_free; + } + nfs4_init_name_mapping(NULL); /* XXX: should only do this once */ + res = nfs4_gss_princ_to_ids(secname, sname, &uid, &gid); + if (res < 0) { + /* + * -ENOENT means there was no mapping, any other error + * value means there was an error trying to do the + * mapping. + * If there was no mapping, we send down the value -1 + * to indicate that the anonuid/anongid for the export + * should be used. + */ + if (res == -ENOENT) { + cred->cr_uid = -1; + cred->cr_gid = -1; + cred->cr_ngroups = 0; + res = 0; + goto out_free; + } + printerr(0, "WARNING: get_ids: failed to map name '%s' " + "to uid/gid: %s\n", sname, strerror(-res)); + goto out_free; + } + cred->cr_uid = uid; + cred->cr_gid = gid; + add_supplementary_groups(secname, sname, cred); + res = 0; +out_free: + free(sname); +out: + return res; +} +#endif + +#if 0 +void +print_hexl(int pri, unsigned char *cp, int length) +{ + int i, j, jm; + unsigned char c; + + printerr(pri, "length %d\n",length); + printerr(pri, "\n"); + + for (i = 0; i < length; i += 0x10) { + printerr(pri, " %04x: ", (u_int)i); + jm = length - i; + jm = jm > 16 ? 16 : jm; + + for (j = 0; j < jm; j++) { + if ((j % 2) == 1) + printerr(pri,"%02x ", (u_int)cp[i+j]); + else + printerr(pri,"%02x", (u_int)cp[i+j]); + } + for (; j < 16; j++) { + if ((j % 2) == 1) + printerr(pri," "); + else + printerr(pri," "); + } + printerr(pri," "); + + for (j = 0; j < jm; j++) { + c = cp[i+j]; + c = isprint(c) ? c : '.'; + printerr(pri,"%c", c); + } + printerr(pri,"\n"); + } +} +#endif + +static int +get_ids(gss_name_t client_name, gss_OID mech, struct svc_cred *cred, + lnet_nid_t nid, uint32_t lustre_svc) +{ + u_int32_t maj_stat, min_stat; + gss_buffer_desc name; + char *sname, *realm, *slash; + int res = -1; + gss_OID name_type = GSS_C_NO_OID; + struct passwd *pw; + + cred->cr_remote = cred->cr_usr_root = cred->cr_usr_mds = 0; + cred->cr_uid = cred->cr_mapped_uid = cred->cr_gid = -1; + + maj_stat = gss_display_name(&min_stat, client_name, &name, &name_type); + if (maj_stat != GSS_S_COMPLETE) { + pgsserr("get_ids: gss_display_name", + maj_stat, min_stat, mech); + return -1; + } + if (name.length >= 0xffff || /* be certain name.length+1 doesn't overflow */ + !(sname = calloc(name.length + 1, 1))) { + printerr(0, "WARNING: get_ids: error allocating %d bytes " + "for sname\n", name.length + 1); + gss_release_buffer(&min_stat, &name); + return -1; + } + memcpy(sname, name.value, name.length); + printerr(1, "authenticated %s from %016llx\n", sname, nid); + gss_release_buffer(&min_stat, &name); + + if (lustre_svc == LUSTRE_GSS_SVC_MDS) + lookup_mapping(sname, nid, &cred->cr_mapped_uid); + else + cred->cr_mapped_uid = -1; + + realm = strchr(sname, '@'); + if (!realm) { + printerr(0, "WARNNING: principal %s contains no realm name\n", + sname); + cred->cr_remote = (mds_local_realm != NULL); + } else { + *realm++ = '\0'; + if (!mds_local_realm) + cred->cr_remote = 1; + else + cred->cr_remote = + (strcasecmp(mds_local_realm, realm) != 0); + } + + if (cred->cr_remote) { + if (cred->cr_mapped_uid != -1) + res = 0; + else if (lustre_svc == LUSTRE_GSS_SVC_OSS && + strcmp(sname, "lustre_root") == 0) + res = 0; + else + printerr(0, "principal %s is remote without mapping\n", + sname); + goto out_free; + } + + slash = strchr(sname, '/'); + if (slash) + *slash = '\0'; + + if (!(pw = getpwnam(sname))) { + /* If client use machine credential, we map it to root, which + * will subject to further mapping by root-squash in kernel. + * + * MDS service keytab is treated as special user, also mapped + * to root. OSS service keytab can't be used as a user. + */ + if (!strcmp(sname, LUSTRE_ROOT_NAME)) { + printerr(2, "lustre_root principal, resolve to uid 0\n"); + cred->cr_uid = 0; + cred->cr_usr_root = 1; + } else if (!strcmp(sname, GSSD_SERVICE_MDS)) { + printerr(2, "mds service principal, resolve to uid 0\n"); + cred->cr_uid = 0; + cred->cr_usr_mds = 1; + } else { + cred->cr_uid = -1; + if (cred->cr_mapped_uid == -1) { + printerr(0, "invalid user %s\n", sname); + goto out_free; + } + printerr(2, "user %s mapped to %u\n", + sname, cred->cr_mapped_uid); + } + } else { + cred->cr_uid = pw->pw_uid; + printerr(2, "%s resolve to uid %u\n", sname, cred->cr_uid); + } + + res = 0; +out_free: + free(sname); + return res; +} + +typedef struct gss_union_ctx_id_t { + gss_OID mech_type; + gss_ctx_id_t internal_ctx_id; +} gss_union_ctx_id_desc, *gss_union_ctx_id_t; + +/* + * return -1 only if we detect error during reading from upcall channel, + * all other cases return 0. + */ +int +handle_nullreq(FILE *f) { + uint64_t handle_seq; + char in_tok_buf[TOKEN_BUF_SIZE]; + char in_handle_buf[15]; + char out_handle_buf[15]; + gss_buffer_desc in_tok = {.value = in_tok_buf}, + out_tok = {.value = NULL}, + in_handle = {.value = in_handle_buf}, + out_handle = {.value = out_handle_buf}, + ctx_token = {.value = NULL}, + ignore_out_tok = {.value = NULL}, + /* XXX isn't there a define for this?: */ + null_token = {.value = NULL}; + uint32_t lustre_svc; + lnet_nid_t nid; + u_int32_t ret_flags; + gss_ctx_id_t ctx = GSS_C_NO_CONTEXT; + gss_name_t client_name; + gss_OID mech = GSS_C_NO_OID; + gss_cred_id_t svc_cred; + u_int32_t maj_stat = GSS_S_FAILURE, min_stat = 0; + u_int32_t ignore_min_stat; + struct svc_cred cred; + static char *lbuf = NULL; + static int lbuflen = 0; + static char *cp; + + printerr(2, "handling null request\n"); + + if (readline(fileno(f), &lbuf, &lbuflen) != 1) { + printerr(0, "WARNING: handle_nullreq: " + "failed reading request\n"); + return -1; + } + + cp = lbuf; + + qword_get(&cp, (char *) &lustre_svc, sizeof(lustre_svc)); + qword_get(&cp, (char *) &nid, sizeof(nid)); + qword_get(&cp, (char *) &handle_seq, sizeof(handle_seq)); + printerr(1, "handling req: svc %u, nid %016llx, idx %llx\n", + lustre_svc, nid, handle_seq); + + in_handle.length = (size_t) qword_get(&cp, in_handle.value, + sizeof(in_handle_buf)); + printerr(3, "in_handle: \n"); + print_hexl(3, in_handle.value, in_handle.length); + + in_tok.length = (size_t) qword_get(&cp, in_tok.value, + sizeof(in_tok_buf)); + printerr(3, "in_tok: \n"); + print_hexl(3, in_tok.value, in_tok.length); + + if (in_tok.length < 0) { + printerr(0, "WARNING: handle_nullreq: " + "failed parsing request\n"); + goto out_err; + } + + if (in_handle.length != 0) { /* CONTINUE_INIT case */ + if (in_handle.length != sizeof(ctx)) { + printerr(0, "WARNING: handle_nullreq: " + "input handle has unexpected length %d\n", + in_handle.length); + goto out_err; + } + /* in_handle is the context id stored in the out_handle + * for the GSS_S_CONTINUE_NEEDED case below. */ + memcpy(&ctx, in_handle.value, in_handle.length); + } + + svc_cred = gssd_select_svc_cred(lustre_svc); + if (!svc_cred) { + printerr(0, "no service credential for svc %u\n", lustre_svc); + goto out_err; + } + + maj_stat = gss_accept_sec_context(&min_stat, &ctx, svc_cred, + &in_tok, GSS_C_NO_CHANNEL_BINDINGS, &client_name, + &mech, &out_tok, &ret_flags, NULL, NULL); + + if (maj_stat == GSS_S_CONTINUE_NEEDED) { + printerr(1, "gss_accept_sec_context GSS_S_CONTINUE_NEEDED\n"); + + /* Save the context handle for future calls */ + out_handle.length = sizeof(ctx); + memcpy(out_handle.value, &ctx, sizeof(ctx)); + goto continue_needed; + } + else if (maj_stat != GSS_S_COMPLETE) { + printerr(0, "WARNING: gss_accept_sec_context failed\n"); + pgsserr("handle_nullreq: gss_accept_sec_context", + maj_stat, min_stat, mech); + goto out_err; + } + + if (get_ids(client_name, mech, &cred, nid, lustre_svc)) { + /* get_ids() prints error msg */ + maj_stat = GSS_S_BAD_NAME; /* XXX ? */ + gss_release_name(&ignore_min_stat, &client_name); + goto out_err; + } + gss_release_name(&ignore_min_stat, &client_name); + + /* Context complete. Pass handle_seq in out_handle to use + * for context lookup in the kernel. */ + out_handle.length = sizeof(handle_seq); + memcpy(out_handle.value, &handle_seq, sizeof(handle_seq)); + + /* kernel needs ctx to calculate verifier on null response, so + * must give it context before doing null call: */ + if (serialize_context_for_kernel(ctx, &ctx_token, mech)) { + printerr(0, "WARNING: handle_nullreq: " + "serialize_context_for_kernel failed\n"); + maj_stat = GSS_S_FAILURE; + goto out_err; + } + /* We no longer need the gss context */ + gss_delete_sec_context(&ignore_min_stat, &ctx, &ignore_out_tok); + + do_svc_downcall(&out_handle, &cred, mech, &ctx_token); +continue_needed: + send_response(f, &in_handle, &in_tok, maj_stat, min_stat, + &out_handle, &out_tok); +out: + if (ctx_token.value != NULL) + free(ctx_token.value); + if (out_tok.value != NULL) + gss_release_buffer(&ignore_min_stat, &out_tok); + return 0; + +out_err: + if (ctx != GSS_C_NO_CONTEXT) + gss_delete_sec_context(&ignore_min_stat, &ctx, &ignore_out_tok); + send_response(f, &in_handle, &in_tok, maj_stat, min_stat, + &null_token, &null_token); + goto out; +} diff --git a/lustre/utils/gss/write_bytes.h b/lustre/utils/gss/write_bytes.h new file mode 100644 index 0000000..4fc72cc --- /dev/null +++ b/lustre/utils/gss/write_bytes.h @@ -0,0 +1,158 @@ +/* + Copyright (c) 2004 The Regents of the University of Michigan. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _WRITE_BYTES_H_ +#define _WRITE_BYTES_H_ + +#include <stdlib.h> +#include <sys/types.h> +#include <netinet/in.h> /* for ntohl */ + +inline static int +write_bytes(char **ptr, const char *end, const void *arg, int arg_len) +{ + char *p = *ptr, *arg_end; + + arg_end = p + arg_len; + if (arg_end > end || arg_end < p) + return -1; + memcpy(p, arg, arg_len); + *ptr = arg_end; + return 0; +} + +#define WRITE_BYTES(p, end, arg) write_bytes(p, end, &arg, sizeof(arg)) + +inline static int +write_buffer(char **p, char *end, gss_buffer_desc *arg) +{ + int len = (int)arg->length; /* make an int out of size_t */ + if (WRITE_BYTES(p, end, len)) + return -1; + if (*p + len > end) + return -1; + memcpy(*p, arg->value, len); + *p += len; + return 0; +} + +inline static int +write_oid(char **p, char *end, gss_OID_desc *arg) +{ + int len = (int)arg->length; /* make an int out of size_t */ + if (WRITE_BYTES(p, end, len)) + return -1; + if (*p + arg->length > end) + return -1; + memcpy(*p, arg->elements, len); + *p += len; + return 0; +} + +static inline int +get_bytes(char **ptr, const char *end, void *res, int len) +{ + char *p, *q; + p = *ptr; + q = p + len; + if (q > end || q < p) + return -1; + memcpy(res, p, len); + *ptr = q; + return 0; +} + +static inline int +get_buffer(char **ptr, const char *end, gss_buffer_desc *res) +{ + char *p, *q; + p = *ptr; + int len; + if (get_bytes(&p, end, &len, sizeof(len))) + return -1; + res->length = len; /* promote to size_t if necessary */ + q = p + res->length; + if (q > end || q < p) + return -1; + if (!(res->value = malloc(res->length))) + return -1; + memcpy(res->value, p, res->length); + *ptr = q; + return 0; +} + +static inline int +xdr_get_u32(u_int32_t **ptr, const u_int32_t *end, u_int32_t *res) +{ + if (get_bytes((char **)ptr, (char *)end, res, sizeof(res))) + return -1; + *res = ntohl(*res); + return 0; +} + +static inline int +xdr_get_buffer(u_int32_t **ptr, const u_int32_t *end, gss_buffer_desc *res) +{ + u_int32_t *p, *q; + u_int32_t len; + p = *ptr; + if (xdr_get_u32(&p, end, &len)) + return -1; + res->length = len; + q = p + ((res->length + 3) >> 2); + if (q > end || q < p) + return -1; + if (!(res->value = malloc(res->length))) + return -1; + memcpy(res->value, p, res->length); + *ptr = q; + return 0; +} + +static inline int +xdr_write_u32(u_int32_t **ptr, const u_int32_t *end, u_int32_t arg) +{ + u_int32_t tmp; + + tmp = htonl(arg); + return WRITE_BYTES((char **)ptr, (char *)end, tmp); +} + +static inline int +xdr_write_buffer(u_int32_t **ptr, const u_int32_t *end, gss_buffer_desc *arg) +{ + int len = arg->length; + if (xdr_write_u32(ptr, end, len)) + return -1; + return write_bytes((char **)ptr, (char *)end, arg->value, + (arg->length + 3) & ~3); +} + +#endif /* _WRITE_BYTES_H_ */ diff --git a/lustre/utils/l_facl.c b/lustre/utils/l_facl.c new file mode 100644 index 0000000..3d12ec0 --- /dev/null +++ b/lustre/utils/l_facl.c @@ -0,0 +1,268 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <pwd.h> +#include <grp.h> +#include <stdarg.h> +#include <stddef.h> +#include <libgen.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <mntent.h> + +#include <lustre/liblustreapi.h> +#include <lustre/lustre_user.h> + +#include "obdctl.h" + +static char *progname; + +static void usage(void) +{ + fprintf(stderr, + "\nusage: %s {uid} {gid} {mdtname} {key} {handle} {cmd}\n" + "Normally invoked as an upcall from Lustre, set via:\n" + " /proc/fs/lustre/mdt/{mdtname}/rmtacl_upcall\n", + progname); +} + +static inline void show_result(struct rmtacl_downcall_data *data) +{ + fprintf(stdout, "buflen %d\n\n%s\n", data->add_buflen, data->add_buf); +} + +#define MDS_ERR "server processing error" + +static void errlog(char *buf, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); +} + +static char *get_lustre_mount(void) +{ + FILE *fp; + struct mntent *mnt; + static char mntpath[PATH_MAX] = ""; + + fp = setmntent(MOUNTED, "r"); + if (fp == NULL) { + fprintf(stderr, "setmntent %s failed: %s\n", + MOUNTED, strerror(errno)); + return NULL; + } + + while (1) { + mnt = getmntent(fp); + if (!mnt) + break; + + if (!llapi_is_lustre_mnttype(mnt)) + continue; + + /* + * XXX: The fsname can be configed by user, it should pass into as a parameter. + * Since we only need one client on mdt node for remote set{get}facl, it is + * unnecessary to do the accurate match for fsname, but match ":/" temporary. + */ + //if (strstr(mnt->mnt_fsname, ":/lustre")) { + /* save the mountpoint dir part */ + strncpy(mntpath, mnt->mnt_dir, sizeof(mntpath)); + endmntent(fp); + return mntpath; + //} + } + endmntent(fp); + + return NULL; +} + +int main(int argc, char **argv) +{ + struct rmtacl_downcall_data *data; + char procname[1024], *buf, *mntpath; + int out_pipe[2], err_pipe[2], pid, size, buflen, fd, rc; + + progname = basename(argv[0]); + + if (argc != 7) { + usage(); + return 1; + } + + size = offsetof(struct rmtacl_downcall_data, add_buf[RMTACL_SIZE_MAX]); + data = malloc(size); + if (!data) { + fprintf(stderr, "malloc %d failed\n", size); + return 1; + } + memset(data, 0, size); + data->add_magic = RMTACL_DOWNCALL_MAGIC; + data->add_key = strtoll(argv[4], NULL, 10); + data->add_handle = strtoul(argv[5], NULL, 10); + buf = data->add_buf; + + mntpath = get_lustre_mount(); + if (!mntpath) { + errlog(buf, MDS_ERR"(no lustre mounted on MDS)\n"); + goto downcall; + } + + /* create pipe */ + if (pipe(out_pipe) < 0 || pipe(err_pipe) < 0) { + errlog(buf, MDS_ERR"(pipe failed): %s\n", strerror(errno)); + goto downcall; + } + + if ((pid = fork()) < 0) { + errlog(buf, MDS_ERR"(fork failed): %s\n", strerror(errno)); + goto downcall; + } else if (pid == 0) { + uid_t uid; + gid_t gid; + + close(out_pipe[0]); + if (out_pipe[1] != STDOUT_FILENO) { + dup2(out_pipe[1], STDOUT_FILENO); + close(out_pipe[1]); + } + close(err_pipe[0]); + if (err_pipe[1] != STDERR_FILENO) { + dup2(err_pipe[1], STDERR_FILENO); + close(err_pipe[1]); + } + close(STDIN_FILENO); + + if (chdir(mntpath) < 0) { + fprintf(stderr, "chdir %s failed: %s\n", + mntpath, strerror(errno)); + return 1; + } + + gid = (gid_t)atoi(argv[2]); + if (gid) { + if (setgid(gid) == -1) { + fprintf(stderr, "setgid %u failed: %s\n", + gid, strerror(errno)); + return 1; + } + } + + uid = (uid_t)atoi(argv[1]); + if (uid) { + if (setuid(uid) == -1) { + fprintf(stderr, "setuid %u failed: %s\n", + uid, strerror(errno)); + return 1; + } + } + + execl("/bin/sh", "sh", "-c", argv[6], NULL); + fprintf(stderr, "execl %s failed: %s\n", + argv[6], strerror(errno)); + + return 1; + } + + /* parent process handling */ + close(out_pipe[1]); + close(err_pipe[1]); + + buflen = 0; + while (1) { + rc = read(out_pipe[0], buf + buflen, RMTACL_SIZE_MAX - buflen); + if (rc < 0) { + errlog(buf, MDS_ERR"(read failed): %s\n", + strerror(errno)); + break; + } + if (rc == 0) + break; + buflen += rc; + if (buflen >= RMTACL_SIZE_MAX) + break; + } + + if (buflen != 0) { + wait(&rc); + goto downcall; + } + + while (1) { + rc = read(err_pipe[0], buf + buflen, RMTACL_SIZE_MAX - buflen); + if (rc < 0) { + errlog(buf, MDS_ERR"(read failed): %s\n", + strerror(errno)); + break; + } + if (rc == 0) + break; + buflen += rc; + if (buflen >= RMTACL_SIZE_MAX) + break; + } + + wait(&rc); + +downcall: + buf[RMTACL_SIZE_MAX - 1] = 0; + data->add_buflen = strlen(buf) + 1; + if (getenv("L_FACL_TEST")) { + show_result(data); + free(data); + return 0; + } + + snprintf(procname, sizeof(procname), + "/proc/fs/lustre/mdt/%s/rmtacl_info", argv[3]); + fd = open(procname, O_WRONLY); + if (fd < 0) { + fprintf(stderr, "open %s failed: %s\n", + procname, strerror(errno)); + free(data); + return 1; + } + + buflen = offsetof(struct rmtacl_downcall_data, + add_buf[data->add_buflen]); + rc = write(fd, data, buflen); + close(fd); + if (rc != buflen) { + fprintf(stderr, "write %s len %d return %d: %s\n", + procname, buflen, rc, strerror(errno)); + free(data); + return 1; + } + + free(data); + return 0; +} diff --git a/lustre/utils/l_getgroups.c b/lustre/utils/l_getgroups.c deleted file mode 100644 index 1aa53e7..0000000 --- a/lustre/utils/l_getgroups.c +++ /dev/null @@ -1,249 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdlib.h> -#include <stdint.h> -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <pwd.h> -#include <grp.h> -#include <stdarg.h> -#include <stddef.h> -#include <syslog.h> -#include <sys/mman.h> -#include <time.h> - -#include <lustre/lustre_user.h> - -#define CHECK_DURATION_START \ -do { \ - time_t __check_start = time(NULL) - -#define CHECK_DURATION_END(str, secs) \ - if (time(NULL) > __check_start + (secs)) \ - errlog("LONG OP %s: %d elapsed, %d expected\n", str, \ - time(NULL) - __check_start, secs); \ -} while (0) - -void usage(FILE *out, const char *progname) -{ - fprintf(out, "\nusage: %s [-v] {-d | <mdsname>} <uid>\n" - "usage: %s [-v] -s\n" - "Normally invoked as an upcall from Lustre, set via:\n" - " /proc/fs/lustre/mds/{mdsname}/group_upcall\n" - "\t-d: debug, print values to stdout instead of Lustre\n" - "\t-s: sleep, mlock memory in core and sleep forever\n" - "\t-v: verbose, log start/stop to syslog\n", - progname, progname); -} - -static int compare_u32(const void *v1, const void *v2) -{ - return (*(__u32 *)v1 - *(__u32 *)v2); -} - -static void errlog(const char *fmt, ...) -{ - va_list arg, carg; - - va_start(arg, fmt); - va_copy(carg, arg); - vsyslog(LOG_NOTICE, fmt, arg); - va_end(arg); - - vfprintf(stderr, fmt, carg); - va_end(carg); -} - -int get_groups_local(struct mds_grp_downcall_data **grp) -{ - struct mds_grp_downcall_data *param; - int i, maxgroups, size; - struct passwd *pw; - struct group *gr; - - CHECK_DURATION_START; - pw = getpwuid((*grp)->mgd_uid); - CHECK_DURATION_END("getpwuid", 2); - if (!pw) { - errlog("no such user %u\n", (*grp)->mgd_uid); - (*grp)->mgd_err = errno ? errno : EIDRM; - return sizeof(*param); - } - (*grp)->mgd_gid = pw->pw_gid; - - maxgroups = sysconf(_SC_NGROUPS_MAX); - size = offsetof(struct mds_grp_downcall_data, mgd_groups[maxgroups]); - param = malloc(size); - if (param == NULL) { - errlog("fail to alloc %d bytes for uid %u with %d groups\n", - size, (*grp)->mgd_uid, maxgroups); - return sizeof(*param); - } - - memcpy(param, *grp, sizeof(*param)); - param->mgd_groups[param->mgd_ngroups++] = pw->pw_gid; - *grp = param; - CHECK_DURATION_START; - while ((gr = getgrent())) { - if (gr->gr_gid == pw->pw_gid) - continue; - if (!gr->gr_mem) - continue; - for (i = 0; gr->gr_mem[i]; i++) { - if (strcmp(gr->gr_mem[i], pw->pw_name) == 0) { - param->mgd_groups[param->mgd_ngroups++] = - gr->gr_gid; - break; - } - } - if (param->mgd_ngroups == maxgroups) - break; - } - CHECK_DURATION_END("getgrent loop", 3); - endgrent(); - qsort(param->mgd_groups, param->mgd_ngroups, - sizeof(param->mgd_groups[0]), compare_u32); - - return size; -} - -/* Note that we need to make the downcall regardless of error, so that the - * MDS doesn't continue to wait on the upcall. */ -int main(int argc, char **argv) -{ - int fd, rc, c, size; - int debug = 0, sleepy = 0, verbose = 0, print_usage = 0; - pid_t mypid; - struct mds_grp_downcall_data sparam = { MDS_GRP_DOWNCALL_MAGIC }; - struct mds_grp_downcall_data *param = &sparam; - char pathname[1024], *end, *progname, *mdsname = NULL; - - progname = strrchr(argv[0], '/'); - if (progname == NULL) - progname = argv[0]; - else - progname++; - - if (strstr(progname, "verbose")) - verbose++; - - openlog(progname, LOG_PERROR, LOG_AUTHPRIV); - - opterr = 0; - while ((c = getopt(argc, argv, "dhsv")) != -1) { - switch (c) { - case 'd': - debug++; - break; - case 's': - sleepy++; - break; - case 'v': - verbose++; - break; - default: - errlog("bad parameter '%c'\n", optopt); - print_usage++; - case 'h': - print_usage++; - break; - } - } - - /* sleep has 0 param, debug has 1 param, upcall has 2 param */ - if (!sleepy && optind + !sleepy + !debug != argc) - print_usage++; - - if (print_usage) { - usage(stderr, progname); - return print_usage > 1 ? EINVAL : 0; - } - - if (!sleepy) { - param->mgd_uid = strtoul(argv[optind + !debug], &end, 0); - if (*end) { - errlog("invalid uid '%s'", argv[optind + !debug]); - usage(stderr, progname); - return EINVAL; - } - if (!debug) - mdsname = argv[optind]; - } - - mypid = getpid(); - - if (verbose) - syslog(LOG_DEBUG, "starting l_getgroups(pid %u) for uid %u\n", - mypid, param->mgd_uid); - - CHECK_DURATION_START; - size = get_groups_local(¶m); - CHECK_DURATION_END("get_groups_local", 10); - if (debug) { - int i; - if (param->mgd_err) { - if (param->mgd_err != ENXIO) - errlog("error getting uid %d groups: %s\n", - param->mgd_uid,strerror(param->mgd_err)); - rc = param->mgd_err; - } else { - printf("uid=%d gid=", param->mgd_uid); - for (i = 0; i < param->mgd_ngroups; i++) - printf("%s%d", i > 0 ? "," : "", - param->mgd_groups[i]); - printf("\n"); - rc = 0; - } - } else if (sleepy) { - rc = mlockall(MCL_CURRENT); - errlog("%s all pages in RAM (pid %u): rc %d\n", - rc ? "failed to lock" : "locked", mypid, rc); - sleep(1000000000); - } else { - snprintf(pathname, 1024, "/proc/fs/lustre/mds/%s/group_info", - mdsname); - CHECK_DURATION_START; - fd = open(pathname, O_WRONLY); - if (fd < 0) { - errlog("can't open device %s: %s\n", - pathname, strerror(errno)); - rc = errno; - } else { - rc = write(fd, param, size); - if (rc > 0) - rc = 0; - - close(fd); - } - CHECK_DURATION_END("group_info write", 1); - } - if (verbose) - syslog(LOG_DEBUG, "ending l_getgroups(pid %u) for uid %u\n", - mypid, param->mgd_uid); - - closelog(); - return rc; -} diff --git a/lustre/utils/l_getidentity.c b/lustre/utils/l_getidentity.c new file mode 100644 index 0000000..c37057b --- /dev/null +++ b/lustre/utils/l_getidentity.c @@ -0,0 +1,440 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <pwd.h> +#include <grp.h> +#include <stdarg.h> +#include <stddef.h> +#include <libgen.h> +#include <syslog.h> + +#include <liblustre.h> +#include <lustre/lustre_user.h> +#include <lustre/lustre_idl.h> +#include <libcfs/kp30.h> + +#define SETXID_PATHNAME "/etc/lustre/setxid.conf" + +/* + * setxid permission file format is like this: + * {nid} {uid} {perms} + * + * '*' nid means any nid + * '*' uid means any uid + * the valid values for perms are: + * setuid/setgid/setgrp -- enable corresponding perm + * nosetuid/nosetgid/nosetgrp -- disable corresponding perm + * they can be listed together, seperated by ',', + * when perm and noperm are in the same line (item), noperm is preferential, + * when they are in different lines (items), the latter is preferential, + * '*' nid is as default perm, and is not preferential. + */ + +static char *progname; + +static void usage(void) +{ + fprintf(stderr, + "\nusage: %s {mdtname} {uid}\n" + "Normally invoked as an upcall from Lustre, set via:\n" + " /proc/fs/lustre/mdt/{mdtname}/identity_upcall\n", + progname); +} + +static int compare_u32(const void *v1, const void *v2) +{ + return (*(__u32 *)v1 - *(__u32 *)v2); +} + +static void errlog(const char *fmt, ...) +{ + va_list args; + + openlog(progname, LOG_PERROR, LOG_AUTHPRIV); + + va_start(args, fmt); + vsyslog(LOG_NOTICE, fmt, args); + fprintf(stderr, fmt, args); + va_end(args); + + closelog(); +} + +int get_groups_local(struct identity_downcall_data *data) +{ + int maxgroups; + gid_t *groups; + unsigned int ngroups = 0; + struct passwd *pw; + struct group *gr; + char *pw_name; + int namelen; + int i; + + pw = getpwuid(data->idd_uid); + if (!pw) { + errlog("no such user %u\n", data->idd_uid); + data->idd_err = errno ? errno : EIDRM; + return -1; + } + data->idd_gid = pw->pw_gid; + + namelen = sysconf(_SC_LOGIN_NAME_MAX); + if (namelen < _POSIX_LOGIN_NAME_MAX) + namelen = _POSIX_LOGIN_NAME_MAX; + pw_name = (char *)malloc(namelen); + if (!pw_name) { + errlog("malloc error\n"); + data->idd_err = errno; + return -1; + } + memset(pw_name, 0, namelen); + strncpy(pw_name, pw->pw_name, namelen - 1); + + maxgroups = sysconf(_SC_NGROUPS_MAX); + if (maxgroups > NGROUPS_MAX) + maxgroups = NGROUPS_MAX; + groups = data->idd_groups; + + groups[ngroups++] = pw->pw_gid; + while ((gr = getgrent())) { + if (gr->gr_gid == groups[0]) + continue; + if (!gr->gr_mem) + continue; + for (i = 0; gr->gr_mem[i]; i++) { + if (!strcmp(gr->gr_mem[i], pw_name)) { + groups[ngroups++] = gr->gr_gid; + break; + } + } + if (ngroups == maxgroups) + break; + } + endgrent(); + qsort(groups, ngroups, sizeof(*groups), compare_u32); + data->idd_ngroups = ngroups; + + free(pw_name); + return 0; +} + +static inline int comment_line(char *line) +{ + char *p = line; + + while (*p && (*p == ' ' || *p == '\t')) p++; + + if (!*p || *p == '\n' || *p == '#') + return 1; + return 0; +} + +static inline int match_uid(uid_t uid, const char *str) +{ + char *end; + uid_t uid2; + + if(!strcmp(str, "*")) + return -1; + + uid2 = strtoul(str, &end, 0); + if (*end) + return 0; + + return (uid == uid2); +} + +typedef struct { + char *name; + __u32 bit; +} setxid_perm_type_t; + +static setxid_perm_type_t setxid_perm_types[] = { + { "setuid", LUSTRE_SETUID_PERM }, + { "setgid", LUSTRE_SETGID_PERM }, + { "setgrp", LUSTRE_SETGRP_PERM }, + { 0 } +}; + +static setxid_perm_type_t setxid_noperm_types[] = { + { "nosetuid", LUSTRE_SETUID_PERM }, + { "nosetgid", LUSTRE_SETGID_PERM }, + { "nosetgrp", LUSTRE_SETGRP_PERM }, + { 0 } +}; + +int parse_setxid_perm(__u32 *perm, __u32 *noperm, char *str) +{ + char *start, *end; + char name[64]; + setxid_perm_type_t *pt; + + *perm = 0; + *noperm = 0; + start = str; + while (1) { + memset(name, 0, sizeof(name)); + end = strchr(start, ','); + if (!end) + end = str + strlen(str); + if (start >= end) + break; + strncpy(name, start, end - start); + for (pt = setxid_perm_types; pt->name; pt++) { + if (!strcasecmp(name, pt->name)) { + *perm |= pt->bit; + break; + } + } + + if (!pt->name) { + for (pt = setxid_noperm_types; pt->name; pt++) { + if (!strcasecmp(name, pt->name)) { + *noperm |= pt->bit; + break; + } + } + + if (!pt->name) { + printf("unkown type: %s\n", name); + return -1; + } + } + + start = end + 1; + } + return 0; +} + +int parse_setxid_perm_line(struct identity_downcall_data *data, char *line) +{ + char uid_str[256], nid_str[256], perm_str[256]; + lnet_nid_t nid; + __u32 perm, noperm; + int rc, i; + + if (data->idd_nperms >= N_SETXID_PERMS_MAX) { + errlog("setxid permission count %d > max %d\n", + data->idd_nperms, N_SETXID_PERMS_MAX); + return -1; + } + + rc = sscanf(line, "%s %s %s", nid_str, uid_str, perm_str); + if (rc != 3) { + errlog("can't parse line %s\n", line); + return -1; + } + + if (!match_uid(data->idd_uid, uid_str)) + return 0; + + if (!strcmp(nid_str, "*")) { + nid = LNET_NID_ANY; + } else { + nid = libcfs_str2nid(nid_str); + if (nid == LNET_NID_ANY) { + errlog("can't parse nid %s\n", nid_str); + return -1; + } + } + + if (parse_setxid_perm(&perm, &noperm, perm_str)) { + errlog("invalid perm %s\n", perm_str); + return -1; + } + + /* merge the perms with the same nid. + * + * If there is LNET_NID_ANY in data->idd_perms[i].pdd_nid, + * it must be data->idd_perms[0].pdd_nid, and act as default perm. + */ + if (nid != LNET_NID_ANY) { + int found = 0; + + /* search for the same nid */ + for (i = data->idd_nperms - 1; i >= 0; i--) { + if (data->idd_perms[i].pdd_nid == nid) { + data->idd_perms[i].pdd_perm = + (data->idd_perms[i].pdd_perm | perm) & + ~noperm; + found = 1; + break; + } + } + + /* NOT found, add to tail */ + if (!found) { + data->idd_perms[data->idd_nperms].pdd_nid = nid; + data->idd_perms[data->idd_nperms].pdd_perm = + perm & ~noperm; + data->idd_nperms++; + } + } else { + if (data->idd_nperms > 0) { + /* the first one isn't LNET_NID_ANY, need exchange */ + if (data->idd_perms[0].pdd_nid != LNET_NID_ANY) { + data->idd_perms[data->idd_nperms].pdd_nid = + data->idd_perms[0].pdd_nid; + data->idd_perms[data->idd_nperms].pdd_perm = + data->idd_perms[0].pdd_perm; + data->idd_perms[0].pdd_nid = LNET_NID_ANY; + data->idd_perms[0].pdd_perm = perm & ~noperm; + data->idd_nperms++; + } else { + /* only fix LNET_NID_ANY item */ + data->idd_perms[0].pdd_perm = + (data->idd_perms[0].pdd_perm | perm) & + ~noperm; + } + } else { + /* it is the first one, only add to head */ + data->idd_perms[0].pdd_nid = LNET_NID_ANY; + data->idd_perms[0].pdd_perm = perm & ~noperm; + data->idd_nperms = 1; + } + } + + return 0; +} + +int get_setxid_perms(FILE *fp, struct identity_downcall_data *data) +{ + char line[1024]; + + while (fgets(line, 1024, fp)) { + if (comment_line(line)) + continue; + + if (parse_setxid_perm_line(data, line)) { + errlog("parse line %s failed!\n", line); + return -1; + } + } + + return 0; +} + +static void show_result(struct identity_downcall_data *data) +{ + int i; + + if (data->idd_err) { + errlog("failed to get identity for uid %d: %s\n", + data->idd_uid, strerror(data->idd_err)); + return; + } + + printf("uid=%d gid=", data->idd_uid); + for (i = 0; i < data->idd_ngroups; i++) + printf("%s%u", i > 0 ? "," : "", data->idd_groups[i]); + printf("\n"); + printf("setxid permissions:\n" + " nid\t\t\tperm\n"); + for (i = 0; i < data->idd_nperms; i++) { + struct setxid_perm_downcall_data *pdd; + + pdd = &data->idd_perms[i]; + + printf(" %#llx\t0x%x\n", pdd->pdd_nid, pdd->pdd_perm); + } + printf("\n"); +} + +int main(int argc, char **argv) +{ + FILE *perms_fp; + char *end; + struct identity_downcall_data *data; + char procname[1024]; + unsigned long uid; + int fd, rc; + + progname = basename(argv[0]); + + if (argc != 3) { + usage(); + return 1; + } + + uid = strtoul(argv[2], &end, 0); + if (*end) { + errlog("%s: invalid uid '%s'\n", progname, argv[2]); + usage(); + return 1; + } + + data = malloc(sizeof(*data)); + if (!data) { + errlog("malloc identity downcall data(%d) failed!\n", + sizeof(*data)); + return 1; + } + memset(data, 0, sizeof(*data)); + data->idd_magic = IDENTITY_DOWNCALL_MAGIC; + data->idd_uid = uid; + + /* get groups for uid */ + rc = get_groups_local(data); + if (rc) + goto downcall; + + /* read permission database */ + perms_fp = fopen(SETXID_PATHNAME, "r"); + if (perms_fp) { + get_setxid_perms(perms_fp, data); + fclose(perms_fp); + } else if (errno != ENOENT) { + errlog("open %s failed: %s\n", + SETXID_PATHNAME, strerror(errno)); + } + +downcall: + if (getenv("L_GETIDENTITY_TEST")) { + show_result(data); + return 0; + } + + snprintf(procname, sizeof(procname), + "/proc/fs/lustre/mdt/%s/identity_info", argv[1]); + fd = open(procname, O_WRONLY); + if (fd < 0) { + errlog("can't open file %s: %s\n", procname, strerror(errno)); + return 1; + } + + rc = write(fd, data, sizeof(*data)); + close(fd); + if (rc != sizeof(*data)) { + errlog("partial write ret %d: %s\n", rc, strerror(errno)); + return 1; + } + + return 0; +} diff --git a/lustre/utils/lconf b/lustre/utils/lconf new file mode 100755 index 0000000..acba01a --- /dev/null +++ b/lustre/utils/lconf @@ -0,0 +1,2910 @@ +#!/usr/bin/env python +# -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- +# vim:expandtab:shiftwidth=8:tabstop=8: +# +# Copyright (C) 2002-2003 Cluster File Systems, Inc. +# Authors: Robert Read <rread@clusterfs.com> +# Mike Shaver <shaver@clusterfs.com> +# This file is part of Lustre, http://www.lustre.org. +# +# Lustre is free software; you can redistribute it and/or +# modify it under the terms of version 2 of the GNU General Public +# License as published by the Free Software Foundation. +# +# Lustre is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Lustre; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# lconf - lustre configuration tool +# +# lconf is the main driver script for starting and stopping +# lustre filesystem services. +# +# Based in part on the XML obdctl modifications done by Brian Behlendorf + +import sys, getopt, types, errno +import string, os, stat, popen2, socket, time, random, fcntl, select +import re, exceptions, signal, traceback +import xml.dom.minidom + +if sys.version[0] == '1': + from FCNTL import F_GETFL, F_SETFL +else: + from fcntl import F_GETFL, F_SETFL + +PYMOD_DIR = ["/usr/lib64/lustre/python", "/usr/lib/lustre/python"] +PLATFORM = '' +KEXTPATH = '' +if string.find(sys.platform, 'linux') != -1: + PLATFORM='LINUX' +elif string.find(sys.platform, 'darwin') != -1: + PLATFORM='DARWIN' + KEXTPATH='/System/Library/Extensions/' +else: + PLATFORM='Unsupported' + +def development_mode(): + base = os.path.dirname(sys.argv[0]) + if os.access(base+"/Makefile", os.R_OK): + return 1 + return 0 + +if development_mode(): + sys.path.append('../utils') +else: + sys.path.extend(PYMOD_DIR) + +import Lustre + +# Global parameters +MAXTCPBUF = 16777216 +# +# Maximum number of devices to search for. +# (the /dev/loop* nodes need to be created beforehand) +MAX_LOOP_DEVICES = 256 +PORTALS_DIR = '../lnet' + +# Needed to call lconf --record +CONFIG_FILE = "" + +# Please keep these in sync with the values in lnet/include/libcfs/libcfs.h +ptldebug_names = { + "trace" : (1 << 0), + "inode" : (1 << 1), + "super" : (1 << 2), + "ext2" : (1 << 3), + "malloc" : (1 << 4), + "cache" : (1 << 5), + "info" : (1 << 6), + "ioctl" : (1 << 7), + "blocks" : (1 << 8), + "net" : (1 << 9), + "warning" : (1 << 10), + "buffs" : (1 << 11), + "other" : (1 << 12), + "dentry" : (1 << 13), + "portals" : (1 << 14), # deprecated + "lnet" : (1 << 14), + "page" : (1 << 15), + "dlmtrace" : (1 << 16), + "error" : (1 << 17), + "emerg" : (1 << 18), + "ha" : (1 << 19), + "rpctrace" : (1 << 20), + "vfstrace" : (1 << 21), + "reada" : (1 << 22), + "mmap" : (1 << 23), + "config" : (1 << 24), + "console" : (1 << 25), + "quota" : (1 << 26), + "sec" : (1 << 27), + } + +subsystem_names = { + "undefined" : (1 << 0), + "mdc" : (1 << 1), + "mds" : (1 << 2), + "osc" : (1 << 3), + "ost" : (1 << 4), + "class" : (1 << 5), + "log" : (1 << 6), + "llite" : (1 << 7), + "rpc" : (1 << 8), + "lnet" : (1 << 10), + "portals" : (1 << 10), # deprecated + "lnd" : (1 << 11), + "nal" : (1 << 11), # deprecated + "pinger" : (1 << 12), + "filter" : (1 << 13), + "ptlbd" : (1 << 14), # deprecated + "echo" : (1 << 15), + "ldlm" : (1 << 16), + "lov" : (1 << 17), + "ptlrouter" : (1 << 18), # deprecated + "cobd" : (1 << 19), + "sm" : (1 << 20), + "asobd" : (1 << 21), + "confobd" : (1 << 22), # deprecated + "lmv" : (1 << 23), + "cmobd" : (1 << 24), + "sec" : (1 << 25), + "sec" : (1 << 26), + "gss" : (1 << 27), + "gks" : (1 << 28), + "mgc" : (1 << 29), + "mgs" : (1 << 30), + } + + +first_cleanup_error = 0 +def cleanup_error(rc): + global first_cleanup_error + if not first_cleanup_error: + first_cleanup_error = rc + +# ============================================================ +# debugging and error funcs + +def fixme(msg = "this feature"): + raise Lustre.LconfError, msg + ' not implemented yet.' + +def panic(*args): + msg = string.join(map(str,args)) + if not config.noexec: + raise Lustre.LconfError(msg) + else: + print "! " + msg + +def log(*args): + msg = string.join(map(str,args)) + print msg + +def logall(msgs): + for s in msgs: + print string.strip(s) + +def debug(*args): + # apparently, (non)execution of the following line affects mds device + # startup order (e.g. two mds's using loopback devices), so always do it. + msg = string.join(map(str,args)) + if config.verbose: + print msg + +# ack, python's builtin int() does not support '0x123' syntax. +# eval can do it, although what a hack! +def my_int(s): + import types + if type(s) is types.IntType: + return s + try: + if (s[0:2] == '0x') or (s[0:1] == '0'): + return eval(s, {}, {}) + else: + return int(s) + except SyntaxError, e: + raise ValueError("not a number") + except TypeError, e: + raise ValueError("not a number") + except NameError, e: + raise ValueError("not a number") + +# ============================================================ +# locally defined exceptions +class CommandError (exceptions.Exception): + def __init__(self, cmd_name, cmd_err, rc=None): + self.cmd_name = cmd_name + self.cmd_err = cmd_err + self.rc = rc + + def dump(self): + import types + if type(self.cmd_err) == types.StringType: + if self.rc: + print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err) + else: + print "! %s: %s" % (self.cmd_name, self.cmd_err) + elif type(self.cmd_err) == types.ListType: + if self.rc: + print "! %s (error %d):" % (self.cmd_name, self.rc) + else: + print "! %s:" % (self.cmd_name) + for s in self.cmd_err: + print "> %s" %(string.strip(s)) + else: + print self.cmd_err + +# ============================================================ +# handle lctl interface +class LCTLInterface: + """ + Manage communication with lctl + """ + + def __init__(self, cmd): + """ + Initialize close by finding the lctl binary. + """ + self.lctl = find_prog(cmd) + self.save_file = '' + self.record_device = '' + if not self.lctl: + if config.noexec: + debug('! lctl not found') + self.lctl = 'lctl' + else: + raise CommandError('lctl', "unable to find lctl binary.") + + def use_save_file(self, file): + self.save_file = file + + def record(self, dev_name, logname): + log("Recording log", logname, "on", dev_name) + self.record_device = dev_name + self.record_log = logname + + def end_record(self): + log("End recording log", self.record_log, "on", self.record_device) + self.record_device = None + self.record_log = None + + def set_nonblock(self, fd): + fl = fcntl.fcntl(fd, F_GETFL) + fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY) + + def run(self, cmds): + """ + run lctl + the cmds are written to stdin of lctl + lctl doesn't return errors when run in script mode, so + stderr is checked + should modify command line to accept multiple commands, or + create complex command line options + """ + cmd_line = self.lctl + if self.save_file: + cmds = '\n dump ' + self.save_file + '\n' + cmds + elif self.record_device: + cmds = """ + device $%s + record %s + %s""" % (self.record_device, self.record_log, cmds) + + debug("+", cmd_line, cmds) + if config.noexec: return (0, []) + + child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command + child.tochild.write(cmds + "\nq\n") + child.tochild.close() + + # From "Python Cookbook" from O'Reilly + outfile = child.fromchild + outfd = outfile.fileno() + self.set_nonblock(outfd) + errfile = child.childerr + errfd = errfile.fileno() + self.set_nonblock(errfd) + + outdata = errdata = '' + outeof = erreof = 0 + while 1: + ready = select.select([outfd,errfd],[],[]) # Wait for input + if outfd in ready[0]: + outchunk = outfile.read() + if outchunk == '': outeof = 1 + outdata = outdata + outchunk + if errfd in ready[0]: + errchunk = errfile.read() + if errchunk == '': erreof = 1 + errdata = errdata + errchunk + if outeof and erreof: break + # end of "borrowed" code + + ret = child.wait() + if os.WIFEXITED(ret): + rc = os.WEXITSTATUS(ret) + else: + rc = 0 + if rc or len(errdata): + raise CommandError(self.lctl, errdata, rc) + return rc, outdata + + def runcmd(self, *args): + """ + run lctl using the command line + """ + cmd = string.join(map(str,args)) + debug("+", self.lctl, cmd) + rc, out = run(self.lctl, cmd) + if rc: + raise CommandError(self.lctl, out, rc) + return rc, out + + def unconfigure_network(self): + """get lnet to unreference itself""" + cmds = """ + network unconfigure""" + self.run(cmds) + + def clear_log(self, dev, log): + """ clear an existing log """ + cmds = """ + device $%s + probe + clear_log %s + quit """ % (dev, log) + self.run(cmds) + + # create a new connection + def add_uuid(self, net_type, uuid, nid): + if net_type != 'lnet' and string.find(nid,'@') < 0: + nidstr = nid + "@" + net_type + else: + nidstr = nid + cmds = "\n add_uuid %s %s" %(uuid, nidstr) + self.run(cmds) + + def connect(self, srv): + if not srv.nid_uuid: + panic('nid_uuid not set for ', srv.net_type, srv.nid) + hostaddr = srv.db.get_hostaddr() + if len(hostaddr) > 1: + panic('multiple --hostaddr for ', srv.nid_uuid, ' not supported') + elif len(hostaddr) == 1 and hostaddr[0] != srv.nid: + panic('different --hostaddr and --nid for ', srv.nid_uuid, ' not supported') + else: + self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) + + # Recover a device + def recover(self, dev_name, new_conn): + cmds = """ + device $%s + recover %s""" %(dev_name, new_conn) + self.run(cmds) + + # disconnect one connection + def disconnect(self, srv): + if not srv.nid_uuid: + panic('nid_uuid not set for ', srv.net_type, srv.nid) + self.del_uuid(srv.nid_uuid) + + def del_uuid(self, uuid): + cmds = """ + ignore_errors + del_uuid %s + quit""" % (uuid,) + self.run(cmds) + + def attach(self, type, name, uuid): + cmds = """ + attach %s %s %s + quit""" % (type, name, uuid) + self.run(cmds) + + def setup(self, name, setup = ""): + cmds = """ + cfg_device %s + setup %s + quit""" % (name, setup) + self.run(cmds) + + def abort_recovery(self, name): + cmds = """ + ignore_errors + device $%s + abort_recovery + quit""" % (name) + self.run(cmds) + + def add_conn(self, name, conn_uuid): + cmds = """ + cfg_device %s + add_conn %s + quit""" % (name, conn_uuid) + self.run(cmds) + + # create a new device with lctl + def newdev(self, type, name, uuid, setup = ""): + self.attach(type, name, uuid); + try: + self.setup(name, setup) + except CommandError, e: + self.cleanup(name, uuid, 0) + raise e + if (config.abort_recovery): + if (type == 'obdfilter' or type == 'mds'): + self.abort_recovery(name) + + # cleanup a device + def cleanup(self, name, uuid, force, failover = 0): + if failover: force = 1 + cmds = """ + ignore_errors + cfg_device $%s + cleanup %s %s + detach + quit""" % (name, ('', 'force')[force], + ('', 'failover')[failover]) + self.run(cmds) + + # create an lov + def lov_setup(self, name, uuid, desc_uuid, mdsuuid, stripe_cnt, + stripe_sz, stripe_off, pattern): + cmds = """ + attach lov %s %s + lov_setup %s %d %d %d %s + quit""" % (name, uuid, desc_uuid, stripe_cnt, stripe_sz, stripe_off, pattern) + self.run(cmds) + + # add an OBD to a LOV + def lov_add_obd(self, name, uuid, obd_uuid, index, gen): + cmds = """ + cfg_device %s + lov_modify_tgts add %s %s %s %s + quit""" % (name, name, obd_uuid, index, gen) + self.run(cmds) + + # delete an OBD from a LOV + def lov_del_obd(self, name, uuid, obd_uuid, index, gen): + cmds = """ + cfg_device %s + lov_modify_tgts del %s %s %s %s + quit""" % (name, name, obd_uuid, index, gen) + self.run(cmds) + + # deactivate an OBD + def deactivate(self, name): + cmds = """ + cfg_device %s + deactivate + quit""" % (name) + self.run(cmds) + + # dump the log file + def dump(self, dump_file): + cmds = """ + debug_kernel %s 1 + quit""" % (dump_file) + self.run(cmds) + + # get list of devices + def device_list(self): + ret = [] + if PLATFORM == 'LINUX': + devices = '/proc/fs/lustre/devices' + if os.access(devices, os.R_OK): + try: + fp = open(devices, 'r') + ret = fp.readlines() + fp.close() + except IOError, e: + log(e) + elif PLATFORM == 'DARWIN': + rc, out = self.run("device_list") + ret = out.split("\n") + if len(ret) == 0: + return ret + tail = ret[-1] + if not tail: + # remove the last empty line + ret = ret[:-1] + return ret + + # get lustre version + def lustre_version(self): + rc, out = self.runcmd('version') + return out + + # dump mount options + def mount_option(self, profile, osc, mdc): + cmds = """ + mount_option %s %s %s + quit""" % (profile, osc, mdc) + self.run(cmds) + + # delete mount options + def del_mount_option(self, profile): + cmds = """ + del_mount_option %s + quit""" % (profile,) + self.run(cmds) + + def set_timeout(self, timeout): + cmds = """ + set_timeout %s + quit""" % (timeout,) + self.run(cmds) + + # set lustre upcall + def set_lustre_upcall(self, upcall): + cmds = """ + set_lustre_upcall %s + quit""" % (upcall,) + self.run(cmds) +# ============================================================ +# Various system-level functions +# (ideally moved to their own module) + +# Run a command and return the output and status. +# stderr is sent to /dev/null, could use popen3 to +# save it if necessary +def runcmd(cmd): + debug ("+", cmd) + if config.noexec: return (0, []) + f = os.popen(cmd + ' 2>&1') + out = f.readlines() + ret = f.close() + if ret: + ret = ret >> 8 + else: + ret = 0 + return (ret, out) + +def run(*args): + cmd = string.join(map(str,args)) + return runcmd(cmd) + +# Run a command in the background. +def run_daemon(*args): + cmd = string.join(map(str,args)) + debug ("+", cmd) + if config.noexec: return 0 + f = os.popen(cmd + ' 2>&1') + ret = f.close() + if ret: + ret = ret >> 8 + else: + ret = 0 + return ret + +# Determine full path to use for an external command +# searches dirname(argv[0]) first, then PATH +def find_prog(cmd): + syspath = string.split(os.environ['PATH'], ':') + cmdpath = os.path.dirname(sys.argv[0]) + syspath.insert(0, cmdpath); + if config.portals: + syspath.insert(0, os.path.join(config.portals, 'utils/')) + for d in syspath: + prog = os.path.join(d,cmd) + if os.access(prog, os.X_OK): + return prog + return '' + +# Recursively look for file starting at base dir +def do_find_file(base, mod): + fullname = os.path.join(base, mod) + if os.access(fullname, os.R_OK): + return fullname + for d in os.listdir(base): + dir = os.path.join(base,d) + if os.path.isdir(dir): + module = do_find_file(dir, mod) + if module: + return module + +def find_module(src_dir, dev_dir, modname): + modbase = src_dir +'/'+ dev_dir +'/'+ modname + for modext in '.ko', '.o': + module = modbase + modext + try: + if os.access(module, os.R_OK): + return module + except OSError: + pass + return None + +# is the path a block device? +def is_block(path): + s = () + try: + s = os.stat(path) + except OSError: + return 0 + return stat.S_ISBLK(s[stat.ST_MODE]) + +def my_realpath(path): + try: + if os.path.islink(path): + # get the realpath of the mount point path + if 'realpath' in dir(os.path): + real_path = os.path.realpath(path) + else: + real_path = path + link_count = 0 + while os.path.islink(real_path) and (link_count < 20): + link_count = link_count + 1 + path_link = os.readlink(real_path) + if os.path.isabs(path_link): + real_path = path_link + else: + real_path = os.path.join(os.path.dirname(real_path), path_link) + if link_count > 19: + panic("Encountered too many symbolic links resolving path:", path) + else: + real_path = path + + return real_path + except: + panic("Fatal error realpath()ing path:", path) + + +# build fs according to type +# fixme: dangerous +def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): + block_cnt = '' + jopt = '' + iopt = '' + if devsize: + if devsize < 8000: + panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"% + (dev, devsize)) + # devsize is in 1k, and fs block count is in 4k + block_cnt = devsize/4 + + if fstype in ('ext3', 'ldiskfs'): + # ext3 journal size is in megabytes + if jsize == 0: + if devsize == 0: + if not is_block(dev): + ret, out = runcmd("ls -l %s" %dev) + devsize = int(string.split(out[0])[4]) / 1024 + else: + # sfdisk works for symlink, hardlink, and realdev + ret, out = runcmd("sfdisk -s %s" %dev) + if not ret: + devsize = int(out[0]) + else: + # sfdisk -s will fail for too large block device, + # then, read the size of partition from /proc/partitions + + # get the realpath of the device + # it may be the real device, such as /dev/hda7 + # or the hardlink created via mknod for a device + real_dev = my_realpath(dev) + + # get the major and minor number of the realpath via ls + # it seems python(os.stat) does not return + # the st_rdev member of the stat structure + ret, out = runcmd("ls -l %s" %real_dev) + major = string.split(string.split(out[0])[4], ",")[0] + minor = string.split(out[0])[5] + + # get the devsize from /proc/partitions with the major and minor number + ret, out = runcmd("cat /proc/partitions") + for line in out: + if len(line) > 1: + if string.split(line)[0] == major and string.split(line)[1] == minor: + devsize = int(string.split(line)[2]) + break + + if devsize > 1024 * 1024: + jsize = ((devsize / 102400) * 4) + if jsize > 400: + jsize = 400 + if jsize: jopt = "-J size=%d" %(jsize,) + if isize: iopt = "-I %d" %(isize,) + mkfs = 'mkfs.ext2 -j -b 4096 ' + if not isblock or config.force: + mkfs = mkfs + ' -F ' + elif fstype == 'reiserfs': + # reiserfs journal size is in blocks + if jsize: jopt = "--journal_size %d" %(jsize,) + mkfs = 'mkreiserfs -ff' + else: + panic('unsupported fs type: ', fstype) + + if config.mkfsoptions != None: + mkfs = mkfs + ' ' + config.mkfsoptions + if mkfsoptions != None: + mkfs = mkfs + ' ' + mkfsoptions + (ret, out) = run (mkfs, jopt, iopt, dev, block_cnt) + if ret: + panic("Unable to build fs:", dev, string.join(out)) + # enable hash tree indexing on fsswe + if fstype in ('ext3', 'ldiskfs'): + htree = 'tune2fs -O dir_index' + (ret, out) = run (htree, dev) + if ret: + panic("Unable to enable htree:", dev) + +# some systems use /dev/loopN, some /dev/loop/N +def loop_base(): + import re + loop = '/dev/loop' + if not os.access(loop + str(0), os.R_OK): + loop = loop + '/' + if not os.access(loop + str(0), os.R_OK): + loop='/dev/loop' + return loop + +# find loop device assigned to the file +def find_loop(file): + loop = loop_base() + for n in xrange(0, MAX_LOOP_DEVICES): + dev = loop + str(n) + if os.access(dev, os.R_OK): + (stat, out) = run('losetup', dev) + if out and stat == 0: + m = re.search(r'\((.*)\)', out[0]) + if m and file == m.group(1): + return dev + else: + break + return '' + +# create file if necessary and assign the first free loop device +def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reformat): + dev = find_loop(file) + if dev: + print 'WARNING file:', file, 'already mapped to', dev + return dev + if reformat or not os.access(file, os.R_OK | os.W_OK): + if size < 8000: + panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (file,size)) + (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, + file)) + if ret: + panic("Unable to create backing store:", file) + mkfs(file, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) + + loop = loop_base() + # find next free loop + for n in xrange(0, MAX_LOOP_DEVICES): + dev = loop + str(n) + if os.access(dev, os.R_OK): + (stat, out) = run('losetup', dev) + if stat: + (stat, out) = run('losetup', dev, file) + if stat: + panic("losetup failed: (%s) %s" % (stat, out[0].strip())) + return dev + else: + print "out of loop devices" + return '' + print "out of loop devices" + return '' + +# undo loop assignment +def clean_loop(file): + dev = find_loop(file) + if dev: + ret, out = run('losetup -d', dev) + if ret: + log('unable to clean loop device:', dev, 'for file:', file) + logall(out) + +# determine if dev is formatted as a <fstype> filesystem +def need_format(fstype, dev): + # FIXME don't know how to implement this + return 0 + +# initialize a block device if needed +def block_dev(dev, size, fstype, reformat, autoformat, journal_size, + inode_size, mkfsoptions): + if config.noexec: return dev + if not is_block(dev): + dev = init_loop(dev, size, fstype, journal_size, inode_size, + mkfsoptions, reformat) + elif reformat or (need_format(fstype, dev) and autoformat == 'yes'): + mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions, + isblock=0) +# else: +# panic("device:", dev, +# "not prepared, and autoformat is not set.\n", +# "Rerun with --reformat option to format ALL filesystems") + return dev + +def if2addr(iface): + """lookup IP address for an interface""" + rc, out = run("/sbin/ifconfig", iface) + if rc or not out: + return None + addr = string.split(out[1])[1] + ip = string.split(addr, ':')[1] + return ip + +def def_mount_options(fstype, target, blkdev): + """returns deafult mount options for passed fstype and target (mds, ost)""" + if fstype == 'ext3' or fstype == 'ldiskfs': + mountfsoptions = "errors=remount-ro" + if target == 'ost': + if sys_get_branch() == '2.4': + mountfsoptions = "%s,asyncdel" % (mountfsoptions) + #else: + # mountfsoptions = "%s,extents,mballoc" % (mountfsoptions) + elif target == 'mds': + if config.user_xattr: + mountfsoptions = "%s,user_xattr" % (mountfsoptions) + if config.acl: + mountfsoptions = "%s,acl" % (mountfsoptions) + + if blkdev: + # grab superblock info + dumpe2fs="dumpe2fs -f -h" + (ret, sb) = run(dumpe2fs, blkdev) + if ret: + panic("unable to get superblock for ", blkdev) + + # extract journal UUID + journal_UUID='' + journal_DEV='' + for line in sb: + lst = string.split(line, ":") + if lst[0] == 'Journal UUID': + if len(lst[1]) < 3: + panic("cannot retrieve journal UUID for ", blkdev) + if string.split(lst[1])[0] != '<none>': + journal_UUID = string.split(lst[1])[0] + debug(blkdev, 'has journal UUID', journal_UUID) + if lst[0] == 'Journal device': + if len(lst[1]) < 3: + panic("cannot retrieve journal device for ", blkdev) + if string.split(lst[1])[0] != '0x0000': + journal_DEV = string.split(lst[1])[0] + debug(blkdev, 'has journal device', journal_DEV) + break + + if len(journal_UUID) == 0 or len(journal_DEV) == 0: + debug('no external journal found for', blkdev) + # use internal journal + return mountfsoptions + + # run blkid, lookup highest-priority device with matching UUID + blkid = "blkid -o device -l -t UUID='%s'" % (journal_UUID) + (ret, devname) = run(blkid) + if ret or len(devname) == 0: + panic("cannot find external journal for ", blkdev) + debug('found', blkdev, 'journal UUID', journal_UUID, 'on', + string.replace(devname[0], '\n', '')) + + try: # sigh, python 1.5 does not support os.stat().st_rdev + jdevpath = my_realpath(string.replace(devname[0], '\n', '')) + ret, out = runcmd("ls -l %s" %jdevpath) + debug('ls -l:', out) + major = int(string.split(string.split(out[0])[4], ',')[0]) + minor = int(string.split(out[0])[5]) + debug('major', major, 'minor', minor) + rdev = major << 8 | minor + except OSError: + panic("cannot stat ", devname[0]) + + debug('found', blkdev, 'journal UUID', journal_UUID, 'on', + jdevpath, 'rdev', rdev) + + # add mount option + if string.atoi(journal_DEV, 0) != rdev: + mountfsoptions = "%s,journal_dev=%#x" % (mountfsoptions,rdev) + + return mountfsoptions + return "" + +def sys_get_branch(): + """Returns kernel release""" + return os.uname()[2][:3] + +def mod_loaded(modname): + """Check if a module is already loaded. Look in /proc/modules for it.""" + if PLATFORM == 'LINUX': + try: + fp = open('/proc/modules') + lines = fp.readlines() + fp.close() + # please forgive my tired fingers for this one + ret = filter(lambda word, mod=modname: word == mod, + map(lambda line: string.split(line)[0], lines)) + return ret + except Exception, e: + return 0 + elif PLATFORM == 'DARWIN': + ret, out = run('/usr/sbin/kextstat | /usr/bin/grep', modname) + if ret == 0: + return 1 + else: + return 0 + else: + return 0 + +# XXX: instead of device_list, ask for $name and see what we get +def is_prepared(name): + """Return true if a device exists for the name""" + if config.lctl_dump: + return 0 + if (config.noexec or config.record) and config.cleanup: + return 1 + try: + # expect this format: + # 1 UP ldlm ldlm ldlm_UUID 2 + out = lctl.device_list() + for s in out: + if name == string.split(s)[3]: + return 1 + except CommandError, e: + e.dump() + return 0 + +def is_network_prepared(): + """If the any device exists, then assume that all networking + has been configured""" + out = lctl.device_list() + return len(out) > 0 + +def fs_is_mounted(path): + """Return true if path is a mounted lustre filesystem""" + try: + real_path = my_realpath(path) + + fp = open('/proc/mounts') + lines = fp.readlines() + fp.close() + for l in lines: + a = string.split(l) + if a[1] == real_path and a[2] == 'lustre_lite': + return 1 + except IOError, e: + log(e) + return 0 + +class kmod: + """Manage kernel modules""" + def __init__(self, lustre_dir, portals_dir): + self.lustre_dir = lustre_dir + self.portals_dir = portals_dir + self.kmodule_list = [] + + def add_portals_module(self, dev_dir, modname): + """Append a module to list of modules to load.""" + self.kmodule_list.append((self.portals_dir, dev_dir, modname)) + + def add_lustre_module(self, dev_dir, modname): + """Append a module to list of modules to load.""" + self.kmodule_list.append((self.lustre_dir, dev_dir, modname)) + + def load_module(self): + """Load all the modules in the list in the order they appear.""" + for src_dir, dev_dir, mod in self.kmodule_list: + if mod_loaded(mod) and not config.noexec: + continue + log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) + if PLATFORM == 'LINUX': + options = '' + if mod == 'lnet': + #For LNET we really need modprobe to load defined LNDs + run('/sbin/modprobe lnet') + #But if that fails, try insmod anyhow with dev option + #accept=all for dev liblustre testing + options = 'accept=all' + if src_dir: + module = find_module(src_dir, dev_dir, mod) + if not module: + panic('module not found:', mod) + (rc, out) = run('/sbin/insmod', module, options) + if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") + raise CommandError('insmod', out, rc) + else: + (rc, out) = run('/sbin/modprobe', mod) + if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") + raise CommandError('modprobe', out, rc) + elif PLATFORM == 'DARWIN': + run('/sbin/kextload', KEXTPATH + mod + '.kext'); + + def cleanup_module(self): + """Unload the modules in the list in reverse order.""" + + rev = self.kmodule_list[:] # make *copy* of list + rev.reverse() + for src_dir, dev_dir, mod in rev: + if not mod_loaded(mod) and not config.noexec: + continue + if mod == 'ksocklnd' and not config.noexec: + # Ignore ksocklnd in module list (lnet will remove) + continue + log('unloading module:', mod) + if mod == 'lnet' and not config.noexec: + # remove any self-ref portals created + lctl.unconfigure_network() + if config.dump: + debug('dumping debug log to', config.dump) + # debug hack + lctl.dump(config.dump) + log('unloading the network') + lctl.unconfigure_network() + if mod_loaded("ksocklnd"): + if PLATFORM == 'LINUX': + run('/sbin/rmmod ksocklnd') + elif PLATFORM == 'DARWIN': + run('/sbin/kextunload', KEXTPATH+'ksocklnd.kext') + if mod_loaded("kqswlnd"): + run('/sbin/rmmod kqswlnd') + if mod_loaded("kgmlnd"): + run('/sbin/rmmod kgmlnd') + if mod_loaded("kopeniblnd"): + run('/sbin/rmmod kopeniblnd') + if mod_loaded("kiiblnd"): + run('/sbin/rmmod kiiblnd') + if mod_loaded("kviblnd"): + run('/sbin/rmmod kviblnd') + if mod_loaded("kciblnd"): + run('/sbin/rmmod kciblnd') + if mod_loaded("ko2iblnd"): + run('/sbin/rmmod ko2iblnd') + if mod_loaded("kralnd"): + run('/sbin/rmmod kralnd') + if mod_loaded("kptllnd"): + run('/sbin/rmmod kptllnd') + if PLATFORM == 'LINUX': + (rc, out) = run('/sbin/rmmod', mod) + elif PLATFORM == 'DARWIN': + (rc, out) = run('/sbin/kextunload', KEXTPATH+mod+'.kext'); + if rc: + log('! unable to unload module:', mod) + logall(out) + + +# ============================================================ +# Classes to prepare and cleanup the various objects +# +class Module: + """ Base class for the rest of the modules. The default cleanup method is + defined here, as well as some utilitiy funcs. + """ + def __init__(self, module_name, db): + self.db = db + self.module_name = module_name + self.name = self.db.getName() + self.uuid = self.db.getUUID() + self._server = None + self._connected = 0 + self.kmod = kmod(config.lustre, config.portals) + + def info(self, *args): + msg = string.join(map(str,args)) + log (self.module_name + ":", self.name, self.uuid, msg) + + def cleanup(self): + """ default cleanup, used for most modules """ + self.info() + try: + lctl.cleanup(self.name, self.uuid, config.force) + except CommandError, e: + log(self.module_name, "cleanup failed: ", self.name) + e.dump() + cleanup_error(e.rc) + + def add_portals_module(self, dev_dir, modname): + """Append a module to list of modules to load.""" + self.kmod.add_portals_module(dev_dir, modname) + + def add_lustre_module(self, dev_dir, modname): + """Append a module to list of modules to load.""" + self.kmod.add_lustre_module(dev_dir, modname) + + def load_module(self): + """Load all the modules in the list in the order they appear.""" + self.kmod.load_module() + + def cleanup_module(self): + """Unload the modules in the list in reverse order.""" + if self.safe_to_clean(): + self.kmod.cleanup_module() + + def safe_to_clean(self): + return 1 + + def safe_to_clean_modules(self): + return self.safe_to_clean() + +class Network(Module): + def __init__(self,db,nid_uuid=0): + Module.__init__(self, 'NETWORK', db) + self.net_type = self.db.get_val('nettype') + self.nid = self.db.get_val('nid', '*') + self.cluster_id = self.db.get_val('clusterid', "0") + self.port = self.db.get_val_int('port', 0) + self.nid_uuid = nid_uuid + self.add_portals_module('libcfs', 'libcfs') + self.add_portals_module('lnet', 'lnet') + # Add the socklnd for developers without modprobe.conf (umls) + self.add_portals_module('klnds/socklnd', 'ksocklnd') + + def prepare(self): + if is_network_prepared(): + return + self.info(self.net_type, self.nid) + if self.net_type == 'tcp': + sys_tweak_socknal() + if self.net_type == 'elan': + sys_optimize_elan() + + def safe_to_clean(self): + if PLATFORM == 'LINUX': + return not is_network_prepared() + elif PLATFORM == 'DARWIN': + # XXX always assume it's safe to clean + return 1 + return 1 + + def cleanup(self): + self.info(self.net_type, self.nid) + +# This is only needed to load the modules; the LDLM device +# is now created automatically. +class LDLM(Module): + def __init__(self,db): + Module.__init__(self, 'LDLM', db) + self.add_lustre_module('lvfs', 'lvfs') + self.add_lustre_module('obdclass', 'obdclass') + self.add_lustre_module('ptlrpc', 'ptlrpc') + self.add_lustre_module('ptlrpc/gss', 'ptlrpc_gss') + + def prepare(self): + return + + def cleanup(self): + return + +class LOV(Module): + def __init__(self, db, uuid, fs_name, name_override = None, config_only = None): + Module.__init__(self, 'LOV', db) + if name_override != None: + self.name = "lov_%s" % name_override + self.add_lustre_module('lov', 'lov') + self.mds_uuid = self.db.get_first_ref('mds') + self.stripe_sz = self.db.get_val_int('stripesize', 1048576) + self.stripe_off = self.db.get_val_int('stripeoffset', 0) + self.pattern = self.db.get_val_int('stripepattern', 0) + self.devlist = [] + self.stripe_cnt = self.db.get_val_int('stripecount', 1) + self.osclist = [] + self.desc_uuid = self.uuid + self.uuid = generate_client_uuid(self.name) + self.fs_name = fs_name + # settings below here won't be seen by the MDSDEV code! + if config_only: + self.config_only = 1 + return + self.config_only = None + mds = self.db.lookup(self.mds_uuid) + self.mds_name = mds.getName() + self.devlist = self.db.get_lov_tgts('lov_tgt') + for (obd_uuid, index, gen, active) in self.devlist: + if obd_uuid == '': + continue + obd = self.db.lookup(obd_uuid) + osc = get_osc(obd, self.uuid, fs_name) + if osc: + self.osclist.append((osc, index, gen, active)) + else: + panic('osc not found:', obd_uuid) + if self.osclist == []: + debug("get_lov_tgts failed, using get_refs"); + index = 0 + self.devlist = self.db.get_refs('obd') + for obd_uuid in self.devlist: + obd = self.db.lookup(obd_uuid) + osc = get_osc(obd, self.uuid, fs_name) + if osc: + self.osclist.append((osc, index, 1, 1)) + else: + panic('osc not found:', obd_uuid) + index = index + 1 + if self.osclist == []: + panic('No OSCs configured for LOV') + debug('dbg LOV __init__:', self.osclist, self.devlist, self.stripe_cnt) + + def prepare(self): + debug('dbg LOV prepare') + if is_prepared(self.name): + return + debug('dbg LOV prepare:', self.osclist, self.devlist) + self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, + self.stripe_off, self.pattern, self.devlist, + self.mds_name) + lctl.lov_setup(self.name, self.uuid, + self.desc_uuid, self.mds_name, self.stripe_cnt, + self.stripe_sz, self.stripe_off, self.pattern) + if self.osclist == []: + panic('No OSCs configured for LOV?') + for (osc, index, gen, active) in self.osclist: + target_uuid = osc.target_uuid + try: + # Only ignore connect failures with --force, which + # isn't implemented here yet. + osc.active = active + osc.prepare(ignore_connect_failure=0) + except CommandError, e: + print "Error preparing OSC %s\n" % osc.uuid + raise e + lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen) + + def cleanup(self): + if is_prepared(self.name): + Module.cleanup(self) + for (osc, index, gen, active) in self.osclist: + osc.cleanup() + if self.config_only: + panic("Can't clean up config_only LOV ", self.name) + + def load_module(self): + if self.config_only: + panic("Can't load modules for config_only LOV ", self.name) + for (osc, index, gen, active) in self.osclist: + osc.load_module() + break + Module.load_module(self) + + def cleanup_module(self): + if self.config_only: + panic("Can't cleanup modules for config_only LOV ", self.name) + Module.cleanup_module(self) + for (osc, index, gen, active) in self.osclist: + if active: + osc.cleanup_module() + break + +class MDSDEV(Module): + def __init__(self,db): + Module.__init__(self, 'MDSDEV', db) + self.devpath = self.db.get_val('devpath','') + self.size = self.db.get_val_int('devsize', 0) + self.journal_size = self.db.get_val_int('journalsize', 0) + + self.fstype = self.db.get_val('fstype', '') + if sys_get_branch() == '2.4' and self.fstype == 'ldiskfs': + self.fstype = 'ext3' + elif sys_get_branch() == '2.6' and self.fstype == 'ext3': + self.fstype = 'ldiskfs' + + self.nspath = self.db.get_val('nspath', '') + self.mkfsoptions = '-i 4096 ' + self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') + if config.quota: + self.quota = config.quota + else: + self.quota = self.db.get_val('quota', '') + # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid + target_uuid = self.db.get_first_ref('target') + mds = self.db.lookup(target_uuid) + self.name = mds.getName() + self.filesystem_uuids = mds.get_refs('filesystem') + # FIXME: if fstype not set, then determine based on kernel version + self.format = self.db.get_val('autoformat', "no") + if mds.get_val('failover', '1') != '0': + self.failover_mds = 'f' + else: + self.failover_mds = 'n' + active_uuid = get_active_target(mds) + if not active_uuid: + panic("No target device found:", target_uuid) + if active_uuid == self.uuid: + self.active = 1 + else: + self.active = 0 + if self.active and config.group and config.group != mds.get_val('group', mds.get_val('name')): + self.active = 0 + + self.inode_size = self.db.get_val_int('inodesize', 0) + debug('original inode_size ', self.inode_size) + if self.inode_size == 0: + # find the LOV for this MDS + lovconfig_uuid = mds.get_first_ref('lovconfig') + if not lovconfig_uuid: + panic("No LOV config found for MDS ", mds.name) + lovconfig = mds.lookup(lovconfig_uuid) + lov_uuid = lovconfig.get_first_ref('lov') + if not lov_uuid: + panic("No LOV found for lovconfig ", lovconfig.name) + lov = LOV(self.db.lookup(lov_uuid), lov_uuid, 'FS_name', config_only = 1) + + # default stripe count controls default inode_size + if (lov.stripe_cnt > 0): + stripe_count = lov.stripe_cnt + else: + stripe_count = 1 + if stripe_count > 77: + self.inode_size = 512 + elif stripe_count > 34: + self.inode_size = 2048 + elif stripe_count > 13: + self.inode_size = 1024 + #elif stripe_count < 3: + # self.inode_size = 256 + else: + self.inode_size = 512 + debug('stripe_count ', stripe_count,' inode_size ',self.inode_size) + + self.target_dev_uuid = self.uuid + self.uuid = target_uuid + + # loading modules + if self.quota: + self.add_lustre_module('quota', 'lquota') + self.add_lustre_module('mdc', 'mdc') + self.add_lustre_module('osc', 'osc') + self.add_lustre_module('lov', 'lov') + self.add_lustre_module('mds', 'mds') + if self.fstype == 'ldiskfs': + self.add_lustre_module('ldiskfs', 'ldiskfs') + if self.fstype: + self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) + + def load_module(self): + if self.active: + Module.load_module(self) + + def prepare(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + if config.reformat: + # run write_conf automatically, if --reformat used + self.write_conf() + self.info(self.devpath, self.fstype, self.size, self.format) + # never reformat here + blkdev = block_dev(self.devpath, self.size, self.fstype, 0, + self.format, self.journal_size, self.inode_size, + self.mkfsoptions) + if not is_prepared('MDT'): + lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") + try: + mountfsoptions = def_mount_options(self.fstype, 'mds', blkdev) + + if config.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + config.mountfsoptions + else: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + if self.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + print 'MDS mount options: ' + mountfsoptions + + lctl.newdev("mds", self.name, self.uuid, + setup ="%s %s %s %s %s" %(blkdev, self.fstype, self.name, + mountfsoptions, self.quota)) + self.group_upcall = self.db.get_val('group_upcall','') + sys_set_group_upcall(self.name, self.group_upcall) + + except CommandError, e: + if e.rc == 2: + panic("MDS failed to start. Check the syslog for details." + + " (May need to run lconf --write-conf)") + else: + raise e + + def write_conf(self): + if is_prepared(self.name): + return + self.info(self.devpath, self.fstype, self.format) + blkdev = block_dev(self.devpath, self.size, self.fstype, + config.reformat, self.format, self.journal_size, + self.inode_size, self.mkfsoptions) + lctl.newdev("mds", self.name, self.uuid, + setup ="%s %s" %(blkdev, self.fstype)) + + # record logs for the MDS lov + for uuid in self.filesystem_uuids: + log("recording clients for filesystem:", uuid) + fs = self.db.lookup(uuid) + obd_uuid = fs.get_first_ref('obd') + client_uuid = generate_client_uuid(self.name) + client = VOSC(self.db.lookup(obd_uuid), client_uuid, self.name, + self.name) + config.record = 1 + lctl.clear_log(self.name, self.name) + lctl.record(self.name, self.name) + client.prepare() + lctl.mount_option(self.name, client.get_name(), "") + lctl.end_record() + config.record = 0 + + # record logs for each client + if config.ldapurl: + config_options = "--ldapurl " + config.ldapurl + " --config " + config.config + else: + config_options = CONFIG_FILE + + for node_db in self.db.lookup_class('node'): + client_name = node_db.getName() + for prof_uuid in node_db.get_refs('profile'): + prof_db = node_db.lookup(prof_uuid) + # refactor this into a funtion to test "clientness" of a node. + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class in ('mountpoint','echoclient'): + thing = self.db.lookup(ref_uuid); + fs_uuid = thing.get_first_ref('filesystem') + if not fs_uuid in self.filesystem_uuids: + continue; + + log("Recording log", client_name, "on", self.name) + old_noexec = config.noexec + config.noexec = 0 + noexec_opt = ('', '-n') + ret, out = run (sys.argv[0], + noexec_opt[old_noexec == 1], + " -v --record --nomod --old_conf", + "--record_log", client_name, + "--record_device", self.name, + "--node", client_name, + config_options) + if ret: + lctl.clear_log(self.name, client_name) + print out + self.cleanup() + panic("Record client log %s on %s failed" %( + client_name, self.name)) + if config.verbose: + for s in out: log("record> ", string.strip(s)) + config.noexec = old_noexec + try: + lctl.cleanup(self.name, self.uuid, config.force, config.failover) + except CommandError, e: + log(self.module_name, "cleanup failed: ", self.name) + e.dump() + cleanup_error(e.rc) + Module.cleanup(self) + clean_loop(self.devpath) + + #change the mtime of LLOG to match the XML creation time + if toplustreDB.get_mtime(): + mtime = toplustreDB.get_mtime() + debug("changing mtime of LOGS to %s" %mtime) + ret, mktemp = runcmd("mktemp /tmp/lustre-cmd.XXXXXXXX") + if ret: + log(self.module_name, "create mtime LOGS cmdfile failed: ", self.name) + else: + mtimecmdfile = string.split(mktemp[0])[0] + fd = os.open(mtimecmdfile, os.O_RDWR | os.O_CREAT) + os.write(fd, "\n\n\n\n\n%s\n\n" %mtime) + os.close(fd) + cmd = "debugfs -w -R \"mi /LOGS\" <%s %s" %(mtimecmdfile, self.devpath) + ret, outs = runcmd(cmd) + os.remove(mtimecmdfile) + if ret: + print "Can not change mtime of LOGS by debugfs." + + def mds_remaining(self): + out = lctl.device_list() + for s in out: + if string.split(s)[2] in ('mds',): + if string.split(s)[1] in ('ST',): + return 0 + return 1 + + def safe_to_clean(self): + return self.active + + def safe_to_clean_modules(self): + return not self.mds_remaining() + + def cleanup(self): + if not self.active: + debug(self.uuid, "not active") + return + self.info() + if is_prepared(self.name): + try: + lctl.cleanup(self.name, self.uuid, config.force, + config.failover) + except CommandError, e: + log(self.module_name, "cleanup failed: ", self.name) + e.dump() + cleanup_error(e.rc) + Module.cleanup(self) + if not self.mds_remaining() and is_prepared('MDT'): + try: + lctl.cleanup("MDT", "MDT_UUID", config.force, + config.failover) + except CommandError, e: + print "cleanup failed: ", self.name + e.dump() + cleanup_error(e.rc) + clean_loop(self.devpath) + +class OSD(Module): + def __init__(self, db): + Module.__init__(self, 'OSD', db) + self.osdtype = self.db.get_val('osdtype') + self.devpath = self.db.get_val('devpath', '') + self.size = self.db.get_val_int('devsize', 0) + self.journal_size = self.db.get_val_int('journalsize', 0) + + # now as we store fids in EA on OST we need to make inode bigger + self.inode_size = self.db.get_val_int('inodesize', 0) + if self.inode_size == 0: + self.inode_size = 256 + self.mkfsoptions = self.db.get_val('mkfsoptions', '') + # Allocate fewer inodes on large OST devices. Most filesystems + # can be much more aggressive than this, but by default we can't. + if self.size > 1000000: + self.mkfsoptions = '-i 16384 ' + self.mkfsoptions + self.mountfsoptions = self.db.get_val('mountfsoptions', '') + if config.quota: + self.quota = config.quota + else: + self.quota = self.db.get_val('quota', '') + + self.fstype = self.db.get_val('fstype', '') + if sys_get_branch() == '2.4' and self.fstype == 'ldiskfs': + self.fstype = 'ext3' + elif sys_get_branch() == '2.6' and self.fstype == 'ext3': + self.fstype = 'ldiskfs' + + self.nspath = self.db.get_val('nspath', '') + target_uuid = self.db.get_first_ref('target') + ost = self.db.lookup(target_uuid) + self.name = ost.getName() + self.format = self.db.get_val('autoformat', 'yes') + if ost.get_val('failover', '1') != '0': + self.failover_ost = 'f' + else: + self.failover_ost = 'n' + + active_uuid = get_active_target(ost) + if not active_uuid: + panic("No target device found:", target_uuid) + if active_uuid == self.uuid: + self.active = 1 + else: + self.active = 0 + if self.active and config.group and config.group != ost.get_val('group', ost.get_val('name')): + self.active = 0 + + self.target_dev_uuid = self.uuid + self.uuid = target_uuid + # modules + if self.quota: + self.add_lustre_module('quota', 'lquota') + self.add_lustre_module('ost', 'ost') + # FIXME: should we default to ext3 here? + if self.fstype == 'ldiskfs': + self.add_lustre_module('ldiskfs', 'ldiskfs') + if self.fstype: + self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) + self.add_lustre_module(self.osdtype, self.osdtype) + + def load_module(self): + if self.active: + Module.load_module(self) + + # need to check /proc/mounts and /etc/mtab before + # formatting anything. + # FIXME: check if device is already formatted. + def prepare(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + self.info(self.osdtype, self.devpath, self.size, self.fstype, + self.format, self.journal_size, self.inode_size) + if self.osdtype == 'obdecho': + blkdev = '' + else: + blkdev = block_dev(self.devpath, self.size, self.fstype, + config.reformat, self.format, self.journal_size, + self.inode_size, self.mkfsoptions) + + mountfsoptions = def_mount_options(self.fstype, 'ost', blkdev) + + if config.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + config.mountfsoptions + else: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + if self.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + print 'OST mount options: ' + mountfsoptions + + lctl.newdev(self.osdtype, self.name, self.uuid, + setup ="%s %s %s %s %s" %(blkdev, self.fstype, + self.failover_ost, mountfsoptions, + self.quota)) + if not is_prepared('OSS'): + lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") + + def osd_remaining(self): + out = lctl.device_list() + for s in out: + if string.split(s)[2] in ('obdfilter', 'obdecho'): + return 1 + + def safe_to_clean(self): + return self.active + + def safe_to_clean_modules(self): + return not self.osd_remaining() + + def cleanup(self): + if not self.active: + debug(self.uuid, "not active") + return + if is_prepared(self.name): + self.info() + try: + lctl.cleanup(self.name, self.uuid, config.force, + config.failover) + except CommandError, e: + log(self.module_name, "cleanup failed: ", self.name) + e.dump() + cleanup_error(e.rc) + if not self.osd_remaining() and is_prepared('OSS'): + try: + lctl.cleanup("OSS", "OSS_UUID", config.force, + config.failover) + except CommandError, e: + print "cleanup failed: ", self.name + e.dump() + cleanup_error(e.rc) + if not self.osdtype == 'obdecho': + clean_loop(self.devpath) + +# Generic client module, used by OSC and MDC +class Client(Module): + def __init__(self, tgtdb, uuid, module, fs_name, self_name=None, + module_dir=None): + self.target_name = tgtdb.getName() + self.target_uuid = tgtdb.getUUID() + self.db = tgtdb + self.backup_targets = [] + + self.tgt_dev_uuid = get_active_target(tgtdb) + if not self.tgt_dev_uuid: + panic("No target device found for target:", self.target_name) + + self.kmod = kmod(config.lustre, config.portals) + self._server = None + self._connected = 0 + + self.module = module + self.module_name = string.upper(module) + if not self_name: + self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(), + self.target_name, fs_name) + else: + self.name = self_name + self.uuid = uuid + self.lookup_server(self.tgt_dev_uuid) + self.lookup_backup_targets() + self.fs_name = fs_name + if not module_dir: + module_dir = module + self.add_lustre_module(module_dir, module) + + def lookup_server(self, srv_uuid): + """ Lookup a server's network information """ + self._server_nets = get_ost_net(self.db, srv_uuid) + if len(self._server_nets) == 0: + panic("Unable to find a server for:", srv_uuid) + + def get_servers(self): + return self._server_nets + + def lookup_backup_targets(self): + """ Lookup alternative network information """ + prof_list = toplustreDB.get_refs('profile') + for prof_uuid in prof_list: + prof_db = toplustreDB.lookup(prof_uuid) + if not prof_db: + panic("profile:", prof_uuid, "not found.") + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class in ('osd', 'mdsdev'): + devdb = toplustreDB.lookup(ref_uuid) + uuid = devdb.get_first_ref('target') + if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid: + debug("add backup target", ref_uuid) + self.backup_targets.append(ref_uuid) + + def prepare(self, ignore_connect_failure = 0): + self.info(self.target_uuid) + if is_prepared(self.name): + self.cleanup() + try: + srv_list = self.get_servers() + debug('dbg CLIENT __prepare__:', self.target_uuid, srv_list) + for srv in srv_list: + lctl.connect(srv) + if len(srv_list) == 0: + panic("no servers for ", self.target_uuid) + except CommandError, e: + if not ignore_connect_failure: + raise e + + if srv_list[0]: + srv = srv_list[0] + if self.target_uuid in config.inactive and self.permits_inactive(): + debug("%s inactive" % self.target_uuid) + inactive_p = "inactive" + else: + debug("%s active" % self.target_uuid) + inactive_p = "" + lctl.newdev(self.module, self.name, self.uuid, + setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid, + inactive_p)) + else: + panic("Unable to create OSC for ", self.target_uuid) + + for tgt_dev_uuid in self.backup_targets: + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a backup server for:", tgt_dev_uuid) + else: + for srv in this_nets: + lctl.connect(srv) + if srv: + lctl.add_conn(self.name, srv.nid_uuid); + + + def cleanup(self): + if is_prepared(self.name): + Module.cleanup(self) + srv_list = self.get_servers() + for srv in srv_list: + lctl.disconnect(srv) + for tgt_dev_uuid in self.backup_targets: + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a backup server for:", tgt_dev_uuid) + else: + for srv in this_nets: + lctl.disconnect(srv) + +class MDC(Client): + def __init__(self, db, uuid, fs_name): + Client.__init__(self, db, uuid, 'mdc', fs_name) + + def permits_inactive(self): + return 0 + +class OSC(Client): + def __init__(self, db, uuid, fs_name): + Client.__init__(self, db, uuid, 'osc', fs_name) + + def permits_inactive(self): + return 1 + +class COBD(Module): + def __init__(self, db): + Module.__init__(self, 'COBD', db) + self.real_uuid = self.db.get_first_ref('realobd') + self.cache_uuid = self.db.get_first_ref('cacheobd') + self.add_lustre_module('cobd' , 'cobd') + + # need to check /proc/mounts and /etc/mtab before + # formatting anything. + # FIXME: check if device is already formatted. + def prepare(self): + if is_prepared(self.name): + return + self.info(self.real_uuid, self.cache_uuid) + lctl.newdev("cobd", self.name, self.uuid, + setup ="%s %s" %(self.real_uuid, self.cache_uuid)) + + +# virtual interface for OSC and LOV +class VOSC(Module): + def __init__(self, db, uuid, fs_name, name_override = None, quota = None): + Module.__init__(self, 'VOSC', db) + if quota: + self.add_lustre_module('quota', 'lquota') + if db.get_class() == 'lov': + self.osc = LOV(db, uuid, fs_name, name_override) + else: + self.osc = get_osc(db, uuid, fs_name) + def get_uuid(self): + return self.osc.uuid + def get_name(self): + return self.osc.name + def prepare(self): + self.osc.prepare() + def cleanup(self): + self.osc.cleanup() + def load_module(self): + Module.load_module(self) + self.osc.load_module() + def cleanup_module(self): + self.osc.cleanup_module() + Module.cleanup_module(self) + + +class ECHO_CLIENT(Module): + def __init__(self,db): + Module.__init__(self, 'ECHO_CLIENT', db) + self.add_lustre_module('obdecho', 'obdecho') + self.obd_uuid = self.db.get_first_ref('obd') + obd = self.db.lookup(self.obd_uuid) + self.uuid = generate_client_uuid(self.name) + self.osc = VOSC(obd, self.uuid, self.name) + + def prepare(self): + if is_prepared(self.name): + return + self.osc.prepare() # XXX This is so cheating. -p + self.info(self.obd_uuid) + + lctl.newdev("echo_client", self.name, self.uuid, + setup = self.osc.get_name()) + + def cleanup(self): + if is_prepared(self.name): + Module.cleanup(self) + self.osc.cleanup() + + def load_module(self): + self.osc.load_module() + Module.load_module(self) + + def cleanup_module(self): + Module.cleanup_module(self) + self.osc.cleanup_module() + + +def generate_client_uuid(name): + client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576), + name, + int(random.random() * 1048576), + int(random.random() * 1048576)) + return client_uuid[:36] + + +def my_rstrip(s, chars): + """my_rstrip(s, chars) -> strips any instances of the characters + found in chars from the right side of string s""" + # XXX required because python versions pre 2.2.3 don't allow + #string.rstrip() to take alternate char lists + import string + ns=s + try: + ns = string.rstrip(s, '/') + except TypeError, e: + for i in range(len(s) - 1, 0, -1): + if s[i] in chars: + continue + else: + ns = s[0:i+1] + break + return ns + + +class Mountpoint(Module): + def __init__(self,db): + Module.__init__(self, 'MTPT', db) + self.path = my_rstrip(self.db.get_val('path'), '/') + self.clientoptions = self.db.get_val('clientoptions', '') + self.fs_uuid = self.db.get_first_ref('filesystem') + fs = self.db.lookup(self.fs_uuid) + self.mds_uuid = fs.get_first_ref('mds') + mds_db = self.db.lookup(self.mds_uuid) + if config.quota: + quota = config.quota + else: + quota = mds_db.get_val('quota', config.quota) + self.obd_uuid = fs.get_first_ref('obd') + obd = self.db.lookup(self.obd_uuid) + client_uuid = generate_client_uuid(self.name) + self.vosc = VOSC(obd, client_uuid, self.name, quota=quota) + self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid) + + self.add_lustre_module('mdc', 'mdc') + self.add_lustre_module('llite', 'llite') + + def prepare(self): + if fs_is_mounted(self.path): + log(self.path, "already mounted.") + return + self.vosc.prepare() + self.mdc.prepare() + mdc_name = self.mdc.name + + self.info(self.path, self.mds_uuid, self.obd_uuid) + if config.record or config.lctl_dump: + lctl.mount_option(local_node_name, self.vosc.get_name(), mdc_name) + return + + if config.clientoptions: + if self.clientoptions: + self.clientoptions = self.clientoptions + ',' + config.clientoptions + else: + self.clientoptions = config.clientoptions + if self.clientoptions: + self.clientoptions = ',' + self.clientoptions + # Linux kernel will deal with async and not pass it to ll_fill_super, + # so replace it with Lustre async + self.clientoptions = string.replace(self.clientoptions, "async", "lasync") + + cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \ + (self.vosc.get_name(), mdc_name, self.clientoptions, config.config, self.path) + run("mkdir", self.path) + ret, val = run(cmd) + if ret: + self.mdc.cleanup() + self.vosc.cleanup() + panic("mount failed:", self.path, ":", string.join(val)) + + def cleanup(self): + self.info(self.path, self.mds_uuid,self.obd_uuid) + + if config.record or config.lctl_dump: + lctl.del_mount_option(local_node_name) + else: + if fs_is_mounted(self.path): + if config.force: + (rc, out) = run("umount", "-f", self.path) + else: + (rc, out) = run("umount", self.path) + if rc: + raise CommandError('umount', out, rc) + + if fs_is_mounted(self.path): + panic("fs is still mounted:", self.path) + + self.mdc.cleanup() + self.vosc.cleanup() + + def load_module(self): + self.vosc.load_module() + Module.load_module(self) + + def cleanup_module(self): + Module.cleanup_module(self) + self.vosc.cleanup_module() + + +# ============================================================ +# misc query functions + +def get_ost_net(self, osd_uuid): + srv_list = [] + if not osd_uuid: + return srv_list + osd = self.lookup(osd_uuid) + node_uuid = osd.get_first_ref('node') + node = self.lookup(node_uuid) + if not node: + panic("unable to find node for osd_uuid:", osd_uuid, + " node_ref:", node_uuid) + for net_uuid in node.get_networks(): + db = node.lookup(net_uuid) + net = Network(db, node_uuid) + srv_list.append(net) + return srv_list + + +# the order of iniitailization is based on level. +def getServiceLevel(self): + type = self.get_class() + ret=0; + if type in ('network',): + ret = 5 + elif type in ('ldlm',): + ret = 20 + elif type in ('osd', 'cobd'): + ret = 30 + elif type in ('mdsdev',): + ret = 40 + elif type in ('mountpoint', 'echoclient'): + ret = 70 + else: + panic("Unknown type: ", type) + + if ret < config.minlevel or ret > config.maxlevel: + ret = 0 + return ret + +# +# return list of services in a profile. list is a list of tuples +# [(level, db_object),] +def getServices(self): + list = [] + for ref_class, ref_uuid in self.get_all_refs(): + servdb = self.lookup(ref_uuid) + if servdb: + level = getServiceLevel(servdb) + if level > 0: + list.append((level, servdb)) + else: + panic('service not found: ' + ref_uuid) + + list.sort() + return list + + +############################################################ +# MDC UUID hack - +# FIXME: clean this mess up! +# +# OSC is no longer in the xml, so we have to fake it. +# this is getting ugly and begging for another refactoring +def get_osc(ost_db, uuid, fs_name): + osc = OSC(ost_db, uuid, fs_name) + return osc + +def get_mdc(db, uuid, fs_name, mds_uuid): + mds_db = db.lookup(mds_uuid); + if not mds_db: + panic("no mds:", mds_uuid) + mdc = MDC(mds_db, uuid, fs_name) + return mdc + +def get_active_target(db): + target_uuid = db.getUUID() + target_name = db.getName() + node_name = get_select(target_name) + if node_name: + tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid) + else: + tgt_dev_uuid = db.get_first_ref('active') + return tgt_dev_uuid + +def get_server_by_nid_uuid(db, nid_uuid): + for n in db.lookup_class("network"): + net = Network(n) + if net.nid_uuid == nid_uuid: + return net + + +############################################################ +# lconf level logic +# Start a service. +def newService(db): + type = db.get_class() + debug('Service:', type, db.getName(), db.getUUID()) + n = None + if type == 'ldlm': + n = LDLM(db) + elif type == 'lov': + n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID") + elif type == 'network': + n = Network(db) + elif type == 'osd': + n = OSD(db) + elif type == 'cobd': + n = COBD(db) + elif type == 'mdsdev': + n = MDSDEV(db) + elif type == 'mountpoint': + n = Mountpoint(db) + elif type == 'echoclient': + n = ECHO_CLIENT(db) + else: + panic("unknown service type:", type) + return n + +# +# Prepare the system to run lustre using a particular profile +# in a the configuration. +# * load & the modules +# * setup networking for the current node +# * make sure partitions are in place and prepared +# * initialize devices with lctl +# Levels is important, and needs to be enforced. +def for_each_profile(db, prof_list, operation): + for prof_uuid in prof_list: + prof_db = db.lookup(prof_uuid) + if not prof_db: + panic("profile:", prof_uuid, "not found.") + services = getServices(prof_db) + operation(services) + +def doWriteconf(services): + if config.nosetup: + return + have_mds = 0 + for s in services: + if s[1].get_class() == 'mdsdev': + n = newService(s[1]) + n.write_conf() + have_mds = 1 + if have_mds == 0: + panic("Cannot find mds device, please run --write_conf on the mds node.") + + +def doSetup(services): + if config.nosetup: + return + for s in services: + n = newService(s[1]) + n.prepare() + +def doModules(services): + if config.nomod: + return + for s in services: + n = newService(s[1]) + n.load_module() + +def doCleanup(services): + if config.nosetup: + return + services.reverse() + for s in services: + n = newService(s[1]) + if n.safe_to_clean(): + n.cleanup() + +def doUnloadModules(services): + if config.nomod: + return + services.reverse() + for s in services: + n = newService(s[1]) + if n.safe_to_clean_modules(): + n.cleanup_module() + +def doMakeServiceScript(services): + if config.nosetup: + return + try: + os.makedirs(config.service_scripts) + except OSError, e: + if e[0] != errno.EEXIST: + panic("Couldn't create scripts dir " + config.service_scripts + ": " + e[1]) + + for s in services: + if s[1].get_class() != 'osd' and s[1].get_class() != 'mdsdev': + continue + + target_uuid = s[1].get_first_ref('target') + target = toplustreDB.lookup(target_uuid) + target_symlink = config.service_scripts + "/" + target.getName() + if config.force: + try: + try: + os.unlink(target_symlink) + if config.verbose: + print "Removed " + target_symlink + except OSError, e: + if e[0] != errno.EISDIR: + raise e + os.rmdir(target_symlink) + if config.verbose: + print "Removed " + target_symlink + except OSError, e: + if e[0] != errno.ENOENT: + panic("Error removing " + target_symlink + ": " + e[1]) + + try: + os.symlink("/etc/init.d/lustre", target_symlink) + if config.verbose: + print "Created service link " + target_symlink + " to /etc/init.d/lustre" + + except OSError, e: + if e[0] == errno.EEXIST: + extra_error = " (use --force option to remove existing files)" + else: + extra_error = "" + panic("Error creating " + target_symlink + ": " + e[1] + extra_error) + +# Check mtime of config logs +def doCheckMtime(lustreDB, hosts): + for h in hosts: + node_db = lustreDB.lookup_name(h, 'node') + if node_db: + break + if not node_db: + return + + mdsdb = 0 + prof_list = node_db.get_refs('profile') + for prof_uuid in prof_list: + prof_db = node_db.lookup(prof_uuid) + if prof_db: + services = getServices(prof_db) + for s in services: + if s[1].get_class() == 'mdsdev': + mdsdb = s[1] + break + + if mdsdb and lustreDB.get_mtime(): + debug("Checking XML modification time") + devpath = mdsdb.get_val('devpath','') + xmtime = string.atol(lustreDB.get_mtime()) + cmd = "debugfs -c -R 'stat /LOGS' %s 2>&1 | grep mtime" %devpath + ret, kmtimes = runcmd(cmd) + if ret: + log("Can not get mtime info of MDS LOGS directory") + else: + kmtime = string.atoi(string.split(kmtimes[0])[1], 0) + if xmtime > kmtime: + debug('xmtime ', xmtime, '> kmtime', kmtime) + if config.old_conf: + log("Warning: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + %CONFIG_FILE) + else: + panic("Error: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + " Use '--old_conf' to start anyways." %CONFIG_FILE) + return + +# +# Load profile for +def doHost(lustreDB, hosts): + global local_node_name, tgt_select + node_db = None + for h in hosts: + node_db = lustreDB.lookup_name(h, 'node') + if node_db: + if config.service: + tgt_select[config.service] = h + config.group = config.service + break + if not node_db: + panic('No host entry found.') + + local_node_name = node_db.get_val('name', 0) + lustre_upcall = node_db.get_val('lustreUpcall', '') + portals_upcall = node_db.get_val('portalsUpcall', '') + timeout = node_db.get_val_int('timeout', 0) + ptldebug = node_db.get_val('ptldebug', '') + subsystem = node_db.get_val('subsystem', '') + + # Two step process: (1) load modules, (2) setup lustre + # if not cleaning, load modules first. + prof_list = node_db.get_refs('profile') + + if config.make_service_scripts: + for_each_profile(node_db, prof_list, doMakeServiceScript) + return + + elif config.write_conf: + for_each_profile(node_db, prof_list, doModules) + for_each_profile(node_db, prof_list, doWriteconf) + for_each_profile(node_db, prof_list, doUnloadModules) + lustreDB.close() + + elif config.recover: + if not (config.tgt_uuid and config.client_uuid and config.conn_uuid): + raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " + + "--client_uuid <UUID> --conn_uuid <UUID>") + doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid, + config.conn_uuid) + elif config.cleanup: + if not mod_loaded('lnet'): + return + + # ugly hack, only need to run lctl commands for --dump + if config.lctl_dump or config.record: + for_each_profile(node_db, prof_list, doCleanup) + return + + sys_set_ptldebug(ptldebug) + sys_set_subsystem(subsystem) + sys_set_lustre_upcall(lustre_upcall) + sys_set_portals_upcall(portals_upcall) + + for_each_profile(node_db, prof_list, doCleanup) + for_each_profile(node_db, prof_list, doUnloadModules) + lustreDB.close() + + else: + # ugly hack, only need to run lctl commands for --dump + if config.lctl_dump or config.record: + sys_set_timeout(timeout) + sys_set_lustre_upcall(lustre_upcall) + for_each_profile(node_db, prof_list, doSetup) + return + + if PLATFORM == 'LINUX': + sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) + sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) + + for_each_profile(node_db, prof_list, doModules) + + if PLATFORM == 'LINUX': + # XXX need to be fixed for Darwin + sys_set_debug_path() + sys_set_ptldebug(ptldebug) + sys_set_subsystem(subsystem) + script = config.gdb_script + run(lctl.lctl, ' modules >', script) + if config.gdb: + log ("The GDB module script is in", script) + # pause, so user has time to break and + # load the script + time.sleep(5) + sys_set_timeout(timeout) + sys_set_lustre_upcall(lustre_upcall) + sys_set_portals_upcall(portals_upcall) + + for_each_profile(node_db, prof_list, doSetup) + lustreDB.close() + +def add_clumanager_node(node_db, nodes, services): + new_services = [] + node_name = node_db.getUUID() + nodes[node_name] = [] + + for prof_uuid in node_db.get_refs('profile'): + prof_db = toplustreDB.lookup(prof_uuid) + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class not in ('osd', 'mdsdev'): + continue + devdb = toplustreDB.lookup(ref_uuid) + tgt_uuid = devdb.get_first_ref('target') + + nodes[node_name].append(ref_uuid) + + if not services.has_key(tgt_uuid): + if config.verbose: + print "New service: " + tgt_uuid + " (originally found on " + node_name + ")" + new_services.append(tgt_uuid) + services[tgt_uuid] = [] + services[tgt_uuid].append(ref_uuid) + + return new_services + +def add_clumanager_services(new_services, nodes, dev_list): + new_nodes = [] + for devdb in dev_list: + tgt_uuid = devdb.get_first_ref('target') + if tgt_uuid in new_services: + node_uuid = devdb.get_first_ref('node') + + if not (nodes.has_key(node_uuid) or node_uuid in new_nodes): + if config.verbose: + print "New node: " + node_uuid + " for service " + tgt_uuid + new_nodes.append(node_uuid) + + return new_nodes + +def doClumanager(lustreDB, hosts): + nodes = {} + services = {} + + dev_list = [] + + for dev_uuid in toplustreDB.get_refs('osd') + toplustreDB.get_refs('mdsdev'): + dev_list.append(lustreDB.lookup(dev_uuid)) + + node_db = None + for h in hosts: + node_db = lustreDB.lookup_name(h, 'node') + if node_db: + our_host = h + new_services = add_clumanager_node(node_db, nodes, services) + break + + if not node_db: + panic('No host entry found.') + + while 1: + if len(new_services) == 0: + break + + new_nodes = add_clumanager_services(new_services, nodes, dev_list) + if len(new_nodes) == 0: + break + + if len(new_nodes) + len(nodes.keys()) > 8: + panic("CluManager only supports 8 nodes per failover \"cluster.\"") + + new_services = [] + for node_uuid in new_nodes: + node_db = lustreDB.lookup(node_uuid) + if not node_db: + panic("No node entry for " + node_uuid + " was found.") + + new_services.append(add_clumanager_node(node_db, nodes, services)) + + nodenames = [] + for node in nodes.keys(): + nodedb = lustreDB.lookup(node) + nodenames.append(nodedb.getName()) + nodenames.sort() + + print """<?xml version="1.0"?> +<cluconfig version="3.0"> + <clumembd broadcast="no" interval="750000" loglevel="5" multicast="yes" multicast_ipaddress="225.0.0.11" thread="yes" tko_count="20"/> + <cluquorumd loglevel="5" pinginterval="2"/> + <clurmtabd loglevel="5" pollinterval="4"/> + <clusvcmgrd loglevel="5"/> + <clulockd loglevel="5"/> + <cluster config_viewnumber="1" name="%s"/> + <sharedstate driver="libsharedraw.so" rawprimary="%s" rawshadow="%s" type="raw"/> + <members> """ % (string.join(nodenames), config.rawprimary, config.rawsecondary) + + + i = 0 + for node in nodenames: + print " <member id=\"%d\" name=\"%s\" watchdog=\"yes\"/>" % (i, node) + i = i + 1 + + print " </members>\n <failoverdomains>" + + servicekeys = services.keys() + servicekeys.sort() + + i = 0 + for service in servicekeys: + svcdb = lustreDB.lookup(service) + print " <failoverdomain id=\"%d\" name=\"%s\" ordered=\"yes\" restricted=\"yes\">" % (i, svcdb.getName()) + i = i + 1 + + j = 0 + active_uuid = get_active_target(svcdb) + for svc_uuid in [active_uuid] + services[service]: + if svc_uuid == active_uuid and j > 0: + continue + svcdb = lustreDB.lookup(svc_uuid) + + svc_node_uuid = svcdb.get_first_ref('node') + svc_nodedb = lustreDB.lookup(svc_node_uuid) + + print " <failoverdomainnode id=\"%d\" name=\"%s\"/>" % (j, svc_nodedb.getName()) + j = j + 1 + + print " </failoverdomain>" + + print " </failoverdomains>\n <services>" + + i = 0 + for service in servicekeys: + svcdb = lustreDB.lookup(service) + active_uuid = get_active_target(svcdb) + activedb = lustreDB.lookup(active_uuid) + + svc_node_uuid = activedb.get_first_ref('node') + svc_nodedb = lustreDB.lookup(svc_node_uuid) + + print " <service checkinterval=\"30\" failoverdomain=\"%s\" id=\"%d\" name=\"%s\" userscript=\"%s/%s\">" \ + % ( svcdb.getName(), i, svcdb.getName(), config.service_scripts, svcdb.getName()) + print " <service_ipaddresses/>\n </service>" + i = i + 1 + + print " </services>\n</cluconfig>" + +def doRecovery(lustreDB, lctl, tgt_uuid, client_uuid, nid_uuid): + tgt = lustreDB.lookup(tgt_uuid) + if not tgt: + raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.") + new_uuid = get_active_target(tgt) + if not new_uuid: + raise Lustre.LconfError("doRecovery: no active target found for: " + + tgt_uuid) + srv_list = find_local_servers(get_ost_net(lustreDB, new_uuid)) + if not srv_list[0]: + raise Lustre.LconfError("Unable to find a connection to:" + new_uuid) + + oldsrv = get_server_by_nid_uuid(lustreDB, nid_uuid) + lustreDB.close() + + for srv in srv_list: + if oldsrv.net_type != srv.net_type: + continue + + log("Reconnecting", tgt_uuid, "to", srv.nid_uuid) + + lctl.recover(client_uuid, srv.nid_uuid) + + +def setupModulePath(cmd, portals_dir = PORTALS_DIR): + base = os.path.dirname(cmd) + if development_mode(): + if not config.lustre: + debug('using objdir module paths') + config.lustre = (os.path.join(base, "..")) + # normalize the portals dir, using command line arg if set + if config.portals: + portals_dir = config.portals + dir = os.path.join(config.lustre, portals_dir) + config.portals = dir + debug('config.portals', config.portals) + elif config.lustre and config.portals: + # production mode + # if --lustre and --portals, normalize portals + # can ignore POTRALS_DIR here, since it is probly useless here + config.portals = os.path.join(config.lustre, config.portals) + debug('config.portals B', config.portals) + +def sysctl(path, val): + debug("+ sysctl", path, val) + if config.noexec: + return + try: + fp = open(os.path.join('/proc/sys', path), 'w') + fp.write(str(val)) + fp.close() + except IOError, e: + panic(str(e)) + + +def sys_set_debug_path(): + sysctl('lnet/debug_path', config.debug_path) + +def validate_upcall(upcall): + import os + if upcall in ('DEFAULT','NONE'): + pass + elif os.path.exists(upcall): + if not os.access(upcall, os.X_OK): + print "WARNING upcall script not executable: %s" % upcall + else: + print "WARNING invalid upcall script specified: %s" % upcall + +def sys_set_lustre_upcall(upcall): + # the command line overrides the value in the node config + if config.lustre_upcall: + upcall = config.lustre_upcall + elif config.upcall: + upcall = config.upcall + if upcall: + validate_upcall(upcall) + lctl.set_lustre_upcall(upcall) + +def sys_set_portals_upcall(upcall): + # the command line overrides the value in the node config + if config.portals_upcall: + upcall = config.portals_upcall + elif config.upcall: + upcall = config.upcall + if upcall: + validate_upcall(upcall) + sysctl('lnet/upcall', upcall) + +def sys_set_group_upcall(mds, upcall): + if config.noexec: + return + # the command line overrides the value in the MDS config + if config.group_upcall: + upcall = config.group_upcall + if upcall: + validate_upcall(upcall) + debug("setting MDS", mds, "upcall to:", upcall) + path = "/proc/fs/lustre/mds/" + mds + "/group_upcall" + fp = open(path, 'w') + fp.write(upcall) + fp.close() + +def sys_set_timeout(timeout): + # the command overrides the value in the node config + if config.timeout and config.timeout > 0: + timeout = config.timeout + if timeout != None and timeout > 0: + lctl.set_timeout(timeout) + +def sys_tweak_socknal (): + if config.single_socket: + sysctl("socknal/typed", 0) + +def sys_optimize_elan (): + procfiles = ["/proc/elan/config/eventint_punt_loops", + "/proc/qsnet/elan3/config/eventint_punt_loops", + "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"] + for p in procfiles: + if os.access(p, os.W_OK): + run ("echo 1 > " + p) + +def sys_set_ptldebug(ptldebug): + if config.ptldebug: + ptldebug = config.ptldebug + if ptldebug: + try: + val = eval(ptldebug, ptldebug_names) + val = "0x%x" % (val) + sysctl('lnet/debug', val) + except NameError, e: + panic(str(e)) + +def sys_set_subsystem(subsystem): + if config.subsystem: + subsystem = config.subsystem + if subsystem: + try: + val = eval(subsystem, subsystem_names) + val = "0x%x" % (val) + sysctl('lnet/subsystem_debug', val) + except NameError, e: + panic(str(e)) + +def sys_set_netmem_max(path, max): + debug("setting", path, "to at least", max) + if config.noexec: + return + fp = open(path) + str = fp.readline() + fp.close() + cur = int(str) + if max > cur: + fp = open(path, 'w') + fp.write('%d\n' %(max)) + fp.close() + + +# Add dir to the global PATH, if not already there. +def add_to_path(new_dir): + syspath = string.split(os.environ['PATH'], ':') + if new_dir in syspath: + return + os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir + +def default_debug_path(): + path = '/tmp/lustre-log' + if os.path.isdir('/r'): + return '/r' + path + else: + return path + +def default_gdb_script(): + script = '/tmp/ogdb' + if os.path.isdir('/r'): + return '/r' + script + else: + return script + +DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') +# ensure basic elements are in the system path +def sanitise_path(): + for dir in DEFAULT_PATH: + add_to_path(dir) + +# global hack for the --select handling +tgt_select = {} +def init_select(args): + # args = [service=nodeA,service2=nodeB service3=nodeC] + # --service <service> is analagous to: + # --group <service> --select <service>=<node> + # this is handled in doHost() + global tgt_select + for arg in args: + list = string.split(arg, ',') + for entry in list: + srv, node = string.split(entry, '=') + tgt_select[srv] = node + +def get_select(srv): + if tgt_select.has_key(srv): + return tgt_select[srv] + return None + + +FLAG = Lustre.Options.FLAG +PARAM = Lustre.Options.PARAM +INTPARAM = Lustre.Options.INTPARAM +PARAMLIST = Lustre.Options.PARAMLIST +lconf_options = [ + ('verbose,v', "Print system commands as they are run"), + ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM), + ('config', "Cluster config name used for LDAP query", PARAM), + ('select', "service=nodeA,service2=nodeB ", PARAMLIST), + ('service', "shorthand for --group <service> --select <service>=<node>", PARAM), + ('node', "Load config for <nodename>", PARAM), + ('cleanup,d', "Cleans up config. (Shutdown)"), + ('force,f', "Forced unmounting and/or obd detach during cleanup", + FLAG, 0), + ('single_socket', "socknal option: only use one socket instead of bundle", + FLAG, 0), + ('failover',"""Used to shut down without saving state. + This will allow this node to "give up" a service to a + another node for failover purposes. This will not + be a clean shutdown.""", + FLAG, 0), + ('abort_recovery',"""Used to start a service when you know recovery + will not succeed. This will skip the recovery + timeout period."""), + ('gdb', """Prints message after creating gdb module script + and sleeps for 5 seconds."""), + ('noexec,n', """Prints the commands and steps that will be run for a + config without executing them. This can used to check if a + config file is doing what it should be doing"""), + ('nomod', "Skip load/unload module step."), + ('nosetup', "Skip device setup/cleanup step."), + ('reformat', "Reformat all devices (without question)"), + ('mkfsoptions', "Additional options for the mk*fs command line", PARAM), + ('mountfsoptions', "Additional options for mount fs command line", PARAM), + ('clientoptions', "Additional options for Lustre", PARAM), + ('dump', "Dump the kernel debug log to file before portals is unloaded", + PARAM), + ('write_conf', "Save all the client config information on mds."), + ('old_conf', "Start up service even though config logs appear outdated."), + ('record', "Write config information on mds."), + ('record_log', "Name of config record log.", PARAM), + ('record_device', "MDS device name that will record the config commands", + PARAM), + ('minlevel', "Minimum level of services to configure/cleanup", + INTPARAM, 0), + ('maxlevel', """Maximum level of services to configure/cleanup + Levels are aproximatly like: + 10 - network + 20 - device, ldlm + 30 - osd, mdd + 40 - mds, ost + 70 - mountpoint, echo_client, osc, mdc, lov""", + INTPARAM, 100), + ('lustre', """Base directory of lustre sources. This parameter will + cause lconf to load modules from a source tree.""", PARAM), + ('portals', """Portals source directory. If this is a relative path, + then it is assumed to be relative to lustre. """, PARAM), + ('timeout', "Set recovery timeout", INTPARAM), + ('upcall', "Set both portals and lustre upcall script", PARAM), + ('lustre_upcall', "Set lustre upcall script", PARAM), + ('portals_upcall', "Set portals upcall script", PARAM), + ('group_upcall', "Set supplementary group upcall program", PARAM), + ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM), + ('ptldebug', "Set the portals debug level", PARAM), + ('subsystem', "Set the portals debug subsystem", PARAM), + ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()), + ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()), + ('allow_unprivileged_port', "Allow connections from unprivileged ports"), + ('clumanager', "Generate CluManager config file for this node's cluster"), + ('rawprimary', "For clumanager, device of the primary quorum", PARAM, "/dev/raw/raw1"), + ('rawsecondary', "For clumanager, device of the secondary quorum", PARAM, "/dev/raw/raw2"), + ('service_scripts', "For clumanager, directory containing per-service scripts", PARAM, "/etc/lustre/services"), + ('make_service_scripts', "Create per-service symlinks for use with clumanager"), +# Client recovery options + ('recover', "Recover a device"), + ('group,g', "The group of devices to configure or cleanup", PARAM), + ('tgt_uuid', "The failed target (required for recovery)", PARAM), + ('client_uuid', "The failed client (required for recovery)", PARAM), + ('conn_uuid', "The failed connection (required for recovery)", PARAM), + + ('inactive', """The name of an inactive service, to be ignored during + mounting (currently OST-only). Can be repeated.""", + PARAMLIST), + ('user_xattr', """Enable user_xattr support on MDS""", FLAG, 0), + ('acl', """Enable ACL support on MDS""", FLAG, 0), + ('quota', "Enable quota support for client file system", PARAM), + ] + +def main(): + global lctl, config, toplustreDB, CONFIG_FILE + + # in the upcall this is set to SIG_IGN + signal.signal(signal.SIGCHLD, signal.SIG_DFL) + + cl = Lustre.Options("lconf", "config.xml", lconf_options) + try: + config, args = cl.parse(sys.argv[1:]) + except Lustre.OptionError, e: + print e + sys.exit(1) + + setupModulePath(sys.argv[0]) + + host = socket.gethostname() + + # the PRNG is normally seeded with time(), which is not so good for starting + # time-synchronized clusters + input = open('/dev/urandom', 'r') + if not input: + print 'Unable to open /dev/urandom!' + sys.exit(1) + seed = input.read(32) + input.close() + random.seed(seed) + + sanitise_path() + + init_select(config.select) + + if len(args) > 0: + # allow config to be fetched via HTTP, but only with python2 + if sys.version[0] != '1' and args[0].startswith('http://'): + import urllib2 + try: + config_file = urllib2.urlopen(args[0]) + except (urllib2.URLError, socket.error), err: + if hasattr(err, 'args'): + err = err.args[1] + print "Could not access '%s': %s" %(args[0], err) + sys.exit(1) + elif not os.access(args[0], os.R_OK): + print 'File not found or readable:', args[0] + sys.exit(1) + else: + # regular file + config_file = open(args[0], 'r') + try: + dom = xml.dom.minidom.parse(config_file) + except Exception: + panic("%s does not appear to be a config file." % (args[0])) + sys.exit(1) # make sure to die here, even in debug mode. + config_file.close() + CONFIG_FILE = args[0] + lustreDB = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) + if not config.config: + config.config = os.path.basename(args[0])# use full path? + if config.config[-4:] == '.xml': + config.config = config.config[:-4] + elif config.ldapurl: + if not config.config: + panic("--ldapurl requires --config name") + dn = "config=%s,fs=lustre" % (config.config) + lustreDB = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) + elif config.ptldebug or config.subsystem: + sys_set_ptldebug(None) + sys_set_subsystem(None) + sys.exit(0) + else: + print 'Missing config file or ldap URL.' + print 'see lconf --help for command summary' + sys.exit(1) + + if config.reformat and config.cleanup: + panic("Options \"reformat\" and \"cleanup\" are incompatible. "+ + "Please specify only one.") + + toplustreDB = lustreDB + + ver = lustreDB.get_version() + if not ver: + panic("No version found in config data, please recreate.") + if ver != Lustre.CONFIG_VERSION: + panic("Config version", ver, "does not match lconf version", + Lustre.CONFIG_VERSION) + + node_list = [] + if config.node: + node_list.append(config.node) + else: + if len(host) > 0: + node_list.append(host) +# node_list.append('localhost') + + debug("configuring for host: ", node_list) + + if len(host) > 0: + config.debug_path = config.debug_path + '-' + host + config.gdb_script = config.gdb_script + '-' + host + + lctl = LCTLInterface('lctl') + + if config.lctl_dump: + lctl.use_save_file(config.lctl_dump) + + if not (config.reformat or config.write_conf or config.cleanup): + doCheckMtime(lustreDB, node_list) + + if config.record: + if not (config.record_device and config.record_log): + panic("When recording, both --record_log and --record_device must be specified.") + lctl.clear_log(config.record_device, config.record_log) + lctl.record(config.record_device, config.record_log) + + if config.clumanager: + doClumanager(lustreDB, node_list) + else: + doHost(lustreDB, node_list) + + if config.record: + lctl.end_record() + +if __name__ == "__main__": + try: + main() + except Lustre.LconfError, e: + print e +# traceback.print_exc(file=sys.stdout) + sys.exit(1) + except CommandError, e: + e.dump() + rc = e.rc + if rc == 0: + rc = 1 + sys.exit(rc) + + if first_cleanup_error: + sys.exit(first_cleanup_error) diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 6466cae5..f249ab2 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -68,7 +68,10 @@ static int lfs_quotaoff(int argc, char **argv); static int lfs_setquota(int argc, char **argv); static int lfs_quota(int argc, char **argv); #endif +static int lfs_flushctx(int argc, char **argv); static int lfs_join(int argc, char **argv); +static int lfs_getfacl(int argc, char **argv); +static int lfs_setfacl(int argc, char **argv); /* all avaialable commands */ command_t cmdlist[] = { @@ -139,6 +142,14 @@ command_t cmdlist[] = { {"quota", lfs_quota, 0, "Display disk usage and limits.\n" "usage: quota [ -o obd_uuid ] [ -u | -g ] [name] <filesystem>"}, #endif + {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n" + "usage: flushctx [-k] [mountpoint...]"}, + {"getfacl", lfs_getfacl, 0, + "Get file access control list in remote client.\n" + "usage: getfacl [-dRLPvh] file"}, + {"setfacl", lfs_setfacl, 0, + "Set file access control list in remote client.\n" + "usage: setfacl [-bkndRLPvh] [{-m|-x} acl_spec] [{-M|-X} acl_file] file"}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, @@ -216,12 +227,12 @@ static int lfs_setstripe(int argc, char **argv) return CMD_HELP; } } - if (optind < argc) fname = argv[optind]; else return CMD_HELP; + if (delete && (stripe_size_arg != NULL || stripe_off_arg != NULL || stripe_count_arg != NULL)) { @@ -231,11 +242,10 @@ static int lfs_setstripe(int argc, char **argv) return CMD_HELP; } } - if (optind != argc - 1) { fprintf(stderr, "error: %s: only 1 filename|dirname can be " "specified: '%s'\n", - argv[0], argv[argc-1]); + argv[0], argv[argc - 1]); return CMD_HELP; } @@ -244,16 +254,17 @@ static int lfs_setstripe(int argc, char **argv) st_size = strtoul(stripe_size_arg, &end, 0); if (*end != '\0') { fprintf(stderr, "error: %s: bad stripe size '%s'\n", - argv[0], stripe_size_arg); + argv[0], stripe_size_arg); return CMD_HELP; } + } /* get the stripe offset */ if (stripe_off_arg != NULL) { st_offset = strtoul(stripe_off_arg, &end, 0); if (*end != '\0') { fprintf(stderr, "error: %s: bad stripe offset '%s'\n", - argv[0], stripe_off_arg); + argv[0], stripe_off_arg); return CMD_HELP; } } @@ -262,7 +273,7 @@ static int lfs_setstripe(int argc, char **argv) st_count = strtoul(stripe_count_arg, &end, 0); if (*end != '\0') { fprintf(stderr, "error: %s: bad stripe count '%s'\n", - argv[0], stripe_count_arg); + argv[0], stripe_count_arg); return CMD_HELP; } } @@ -374,7 +385,7 @@ static int lfs_find(int argc, char **argv) case 1: if (strcmp(optarg, "!") == 0) neg_opt = 2; - break; + break; case 'A': xtime = ¶m.atime; xsign = ¶m.asign; @@ -454,10 +465,10 @@ static int lfs_find(int argc, char **argv) return CMD_HELP; }; } - + if (pathstart == -1) { fprintf(stderr, "error: %s: no filename|pathname\n", - argv[0]); + argv[0]); return CMD_HELP; } else if (pathend == -1) { /* no options */ @@ -475,7 +486,7 @@ static int lfs_find(int argc, char **argv) param.quiet = quiet; param.maxdepth = recursive ? -1 : 1; } - + do { if (new_fashion) ret = llapi_find(argv[pathstart], ¶m); @@ -486,7 +497,6 @@ static int lfs_find(int argc, char **argv) if (ret) fprintf(stderr, "error: %s failed for %s.\n", argv[0], argv[optind - 1]); - return ret; } @@ -755,12 +765,12 @@ static int mntdf(char *mntdir, int ishow, int cooked) if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO || rc == -ENODATA || rc == 0) { - showdf(mntdir, &stat_buf, obd_uuid2str(&uuid_buf), - ishow, cooked, "MDT", index, rc); + showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked, + "MDT", index, rc); } else { fprintf(stderr, "error: llapi_obd_statfs(%s): %s (%d)\n", - obd_uuid2str(&uuid_buf), strerror(-rc), rc); + uuid_buf.uuid, strerror(-rc), rc); return rc; } if (rc == 0) { @@ -782,8 +792,8 @@ static int mntdf(char *mntdir, int ishow, int cooked) if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO || rc == -ENODATA || rc == 0) { - showdf(mntdir, &stat_buf, obd_uuid2str(&uuid_buf), - ishow, cooked, "OST", index, rc); + showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked, + "OST", index, rc); } else { fprintf(stderr, "error: llapi_obd_statfs failed: %s (%d)\n", @@ -1026,7 +1036,8 @@ static int lfs_quotacheck(int argc, char **argv) char *mnt; struct if_quotacheck qchk; struct if_quotactl qctl; - char *obd_type = (char *)qchk.obd_type; + char *obd_type = qchk.obd_type; + char *obd_uuid = qchk.obd_uuid.uuid; int rc; memset(&qchk, 0, sizeof(qchk)); @@ -1076,8 +1087,7 @@ static int lfs_quotacheck(int argc, char **argv) rc = llapi_poll_quotacheck(mnt, &qchk); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, - obd_uuid2str(&qchk.obd_uuid)); + fprintf(stderr, "%s %s ", obd_type, obd_uuid); fprintf(stderr, "quota check failed: %s\n", strerror(errno)); return rc; } @@ -1089,8 +1099,8 @@ static int lfs_quotacheck(int argc, char **argv) rc = llapi_quotactl(mnt, &qctl); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", (char *)qctl.obd_type, - obd_uuid2str(&qctl.obd_uuid)); + fprintf(stderr, "%s %s ", + qctl.obd_type, qctl.obd_uuid.uuid); fprintf(stderr, "%s turn on quota failed: %s\n", argv[0], strerror(errno)); return rc; @@ -1104,7 +1114,8 @@ static int lfs_quotaon(int argc, char **argv) int c; char *mnt; struct if_quotactl qctl; - char *obd_type = (char *)qctl.obd_type; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; int rc; memset(&qctl, 0, sizeof(qctl)); @@ -1141,8 +1152,7 @@ static int lfs_quotaon(int argc, char **argv) rc = llapi_quotactl(mnt, &qctl); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, - obd_uuid2str(&qctl.obd_uuid)); + fprintf(stderr, "%s %s ", obd_type, obd_uuid); fprintf(stderr, "%s failed: %s\n", argv[0], strerror(errno)); return rc; } @@ -1155,7 +1165,8 @@ static int lfs_quotaoff(int argc, char **argv) int c; char *mnt; struct if_quotactl qctl; - char *obd_type = (char *)qctl.obd_type; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; int rc; memset(&qctl, 0, sizeof(qctl)); @@ -1188,8 +1199,7 @@ static int lfs_quotaoff(int argc, char **argv) rc = llapi_quotactl(mnt, &qctl); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, - obd_uuid2str(&qctl.obd_uuid)); + fprintf(stderr, "%s %s ", obd_type, obd_uuid); fprintf(stderr, "quotaoff failed: %s\n", strerror(errno)); return rc; } @@ -1266,7 +1276,8 @@ int lfs_setquota(int argc, char **argv) int c; char *mnt; struct if_quotactl qctl; - char *obd_type = (char *)qctl.obd_type; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; int rc; memset(&qctl, 0, sizeof(qctl)); @@ -1334,8 +1345,7 @@ int lfs_setquota(int argc, char **argv) rc = llapi_quotactl(mnt, &qctl); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, - obd_uuid2str(&qctl.obd_uuid)); + fprintf(stderr, "%s %s ", obd_type, obd_uuid); fprintf(stderr, "setquota failed: %s\n", strerror(errno)); return rc; } @@ -1487,7 +1497,7 @@ static void print_mds_quota(char *mnt, struct if_quotactl *qctl) } qctl->qc_dqblk.dqb_valid = 0; - print_quota(obd_uuid2str(&qctl->obd_uuid), qctl, 0); + print_quota(qctl->obd_uuid.uuid, qctl, 0); } static void print_lov_quota(char *mnt, struct if_quotactl *qctl) @@ -1521,7 +1531,7 @@ static void print_lov_quota(char *mnt, struct if_quotactl *qctl) continue; } - print_quota((char *)uuidp->uuid, qctl, 1); + print_quota(uuidp->uuid, qctl, 1); } out: @@ -1534,8 +1544,8 @@ static int lfs_quota(int argc, char **argv) int c; char *name = NULL, *mnt; struct if_quotactl qctl; - char *obd_type = (char *)qctl.obd_type; - char *obd_uuid = (char *)qctl.obd_uuid.uuid; + char *obd_type = qctl.obd_type; + char *obd_uuid = qctl.obd_uuid.uuid; int rc; memset(&qctl, 0, sizeof(qctl)); @@ -1617,6 +1627,205 @@ static int lfs_quota(int argc, char **argv) } #endif /* HAVE_QUOTA_SUPPORT */ +static int flushctx_ioctl(char *mp) +{ + int fd, rc; + + fd = open(mp, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "flushctx: error open %s: %s\n", + mp, strerror(errno)); + return -1; + } + + rc = ioctl(fd, LL_IOC_FLUSHCTX); + if (rc == -1) + fprintf(stderr, "flushctx: error ioctl %s: %s\n", + mp, strerror(errno)); + + close(fd); + return rc; +} + +static int lfs_flushctx(int argc, char **argv) +{ + int kdestroy = 0, c; + FILE *proc; + char procline[PATH_MAX], *line; + int rc = 0; + + optind = 0; + while ((c = getopt(argc, argv, "k")) != -1) { + switch (c) { + case 'k': + kdestroy = 1; + break; + default: + fprintf(stderr, "error: %s: option '-%c' " + "unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (kdestroy) + system("kdestroy > /dev/null"); + + if (optind >= argc) { + /* flush for all mounted lustre fs. */ + proc = fopen("/proc/mounts", "r"); + if (!proc) { + fprintf(stderr, "error: %s: can't open /proc/mounts\n", + argv[0]); + return -1; + } + + while ((line = fgets(procline, PATH_MAX, proc)) != NULL) { + char dev[PATH_MAX]; + char mp[PATH_MAX]; + char fs[PATH_MAX]; + + if (sscanf(line, "%s %s %s", dev, mp, fs) != 3) { + fprintf(stderr, "%s: unexpected format in " + "/proc/mounts\n", + argv[0]); + return -1; + } + + if (strcmp(fs, "lustre") != 0) + continue; + /* we use '@' to determine it's a client. are there + * any other better way? + */ + if (strchr(dev, '@') == NULL) + continue; + + if (flushctx_ioctl(mp)) + rc = -1; + } + } else { + /* flush fs as specified */ + while (optind < argc) { + if (flushctx_ioctl(argv[optind++])) + rc = -1; + } + } + + return rc; +} + +/* + * We assume one and only one filename is supplied as the + * last parameter. + */ +static int acl_cmd_parse(int argc, char **argv, char *fname, char *cmd) +{ + char *dname, *rpath = NULL; + char path[PATH_MAX], cwd[PATH_MAX]; + FILE *fp; + struct mntent *mnt; + int i; + + if (argc < 2) + return -1; + + /* FIXME the premise is there is no sub-mounted filesystems under this + * mounted lustre tree. */ + strncpy(fname, argv[argc - 1], PATH_MAX); + + /* get path prefix */ + dname = dirname(fname); + + /* try to resolve the pathname into relative to the root of the mounted + * lustre filesystem. + */ + if (getcwd(cwd, sizeof(cwd)) == NULL) { + fprintf(stderr, "getcwd %s failed: %s\n", cwd, strerror(errno)); + return -1; + } + + if (chdir(dname) == -1) { + fprintf(stderr, "chdir to %s failed: %s\n", + dname, strerror(errno)); + return -1; + } + + if (getcwd(path, sizeof(path)) == NULL) { + fprintf(stderr, "getcwd %s: %s\n", path, strerror(errno)); + return -1; + } + + if (chdir(cwd) == -1) { + fprintf(stderr, "chdir back to %s: %s\n", + cwd, strerror(errno)); + return -1; + } + + strncat(path, "/", PATH_MAX); + strncpy(fname, argv[argc - 1], PATH_MAX); + strncat(path, basename(fname), PATH_MAX); + + fp = setmntent(MOUNTED, "r"); + if (fp == NULL) { + fprintf(stderr, "setmntent %s failed: %s\n", + MOUNTED, strerror(errno)); + return -1; + } + + while (1) { + mnt = getmntent(fp); + if (!mnt) + break; + + if (!llapi_is_lustre_mnttype(mnt)) + continue; + + if (!strncmp(mnt->mnt_dir, path, strlen(mnt->mnt_dir))) { + rpath = path + strlen(mnt->mnt_dir); + break; + } + } + endmntent(fp); + + /* remove char '/' from rpath to be a relative path */ + while (rpath && *rpath == '/') rpath++; + + if (!rpath) { + fprintf(stderr, + "%s: file %s doesn't belong to a lustre file system!\n", + argv[0], argv[argc - 1]); + return -1; + } + + for (i = 0; i < argc - 1; i++) { + strncat(cmd, argv[i], PATH_MAX); + strncat(cmd, " ", PATH_MAX); + } + strncat(cmd, *rpath ? rpath : ".", PATH_MAX); + strncpy(fname, argv[argc - 1], sizeof(fname)); + + return 0; +} + +static int lfs_getfacl(int argc, char **argv) +{ + char fname[PATH_MAX] = "", cmd[PATH_MAX] = ""; + + if (acl_cmd_parse(argc, argv, fname, cmd)) + return CMD_HELP; + + return llapi_getfacl(fname, cmd); +} + +static int lfs_setfacl(int argc, char **argv) +{ + char fname[PATH_MAX] = "", cmd[PATH_MAX] = ""; + + if (acl_cmd_parse(argc, argv, fname, cmd)) + return CMD_HELP; + + return llapi_setfacl(fname, cmd); +} + int main(int argc, char **argv) { int rc; diff --git a/lustre/utils/libiam.c b/lustre/utils/libiam.c new file mode 100644 index 0000000..072d10e --- /dev/null +++ b/lustre/utils/libiam.c @@ -0,0 +1,605 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * libiam.c + * iam user level library + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Wang Di <wangdi@clusterfs.com> + * Author: Nikita Danilov <nikita@clusterfs.com> + * Author: Fan Yong <fanyong@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <assert.h> + +#include <sys/types.h> + +#ifdef HAVE_ENDIAN_H +#include <endian.h> +#endif + +#include <libcfs/libcfs.h> + +#include <lustre/libiam.h> + +typedef __u32 lvar_hash_t; + +enum { + IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL, + IAM_LVAR_ROOT_MAGIC = 0xb01dface +}; + +struct iam_lfix_root { + u_int64_t ilr_magic; + u_int16_t ilr_keysize; + u_int16_t ilr_recsize; + u_int16_t ilr_ptrsize; + u_int16_t ilr_indirect_levels; +}; + +enum { + IAM_LEAF_HEADER_MAGIC = 0x1976, + IAM_LVAR_LEAF_MAGIC = 0x1973 +}; + +struct iam_leaf_head { + u_int16_t ill_magic; + u_int16_t ill_count; +}; + +struct dx_countlimit { + u_int16_t limit; + u_int16_t count; +}; + +struct lvar_leaf_header { + u_int16_t vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */ + u_int16_t vlh_used; /* used bytes, including header */ +}; + +struct lvar_root { + u_int32_t vr_magic; + u_int16_t vr_recsize; + u_int16_t vr_ptrsize; + u_int8_t vr_indirect_levels; + u_int8_t vr_padding0; + u_int16_t vr_padding1; +}; + +struct lvar_leaf_entry { + u_int32_t vle_hash; + u_int16_t vle_keysize; + u_int8_t vle_key[0]; +}; + +enum { + LVAR_PAD = 4, + LVAR_ROUND = LVAR_PAD - 1 +}; + +static int root_limit(int rootgap, int blocksize, int size) +{ + int limit; + int nlimit; + + limit = (blocksize - rootgap) / size; + nlimit = blocksize / size; + if (limit == nlimit) + limit--; + return limit; +} + +static int lfix_root_limit(int blocksize, int size) +{ + return root_limit(sizeof(struct iam_lfix_root), blocksize, size); +} + +static void lfix_root(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct iam_lfix_root *root; + struct dx_countlimit *limit; + void *entry; + + root = buf; + *root = (typeof(*root)) { + .ilr_magic = cpu_to_le64(IAM_LFIX_ROOT_MAGIC), + .ilr_keysize = cpu_to_le16(keysize), + .ilr_recsize = cpu_to_le16(recsize), + .ilr_ptrsize = cpu_to_le16(ptrsize), + .ilr_indirect_levels = 0 + }; + + limit = (void *)(root + 1); + *limit = (typeof(*limit)){ + /* + * limit itself + one pointer to the leaf. + */ + .count = cpu_to_le16(2), + .limit = lfix_root_limit(blocksize, keysize + ptrsize) + }; + + entry = root + 1; + /* + * Skip over @limit. + */ + entry += keysize + ptrsize; + + /* + * Entry format is <key> followed by <ptr>. In the minimal tree + * consisting of a root and single node, <key> is a minimal possible + * key. + * + * XXX: this key is hard-coded to be a sequence of 0's. + */ + entry += keysize; + /* now @entry points to <ptr> */ + if (ptrsize == 4) + *(u_int32_t *)entry = cpu_to_le32(1); + else + *(u_int64_t *)entry = cpu_to_le64(1); +} + +static void lfix_leaf(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct iam_leaf_head *head; + + /* form leaf */ + head = buf; + *head = (typeof(*head)) { + .ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC), + /* + * Leaf contains an entry with the smallest possible key + * (created by zeroing). + */ + .ill_count = cpu_to_le16(1), + }; +} + +static int lvar_root_limit(int blocksize, int size) +{ + return root_limit(sizeof(struct lvar_root), blocksize, size); +} + +static void lvar_root(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct lvar_root *root; + struct dx_countlimit *limit; + void *entry; + int isize; + + isize = sizeof(lvar_hash_t) + ptrsize; + root = buf; + *root = (typeof(*root)) { + .vr_magic = cpu_to_le32(IAM_LVAR_ROOT_MAGIC), + .vr_recsize = cpu_to_le16(recsize), + .vr_ptrsize = cpu_to_le16(ptrsize), + .vr_indirect_levels = 0 + }; + + limit = (void *)(root + 1); + *limit = (typeof(*limit)) { + /* + * limit itself + one pointer to the leaf. + */ + .count = cpu_to_le16(2), + .limit = lvar_root_limit(blocksize, keysize + ptrsize) + }; + + entry = root + 1; + /* + * Skip over @limit. + */ + entry += isize; + + /* + * Entry format is <key> followed by <ptr>. In the minimal tree + * consisting of a root and single node, <key> is a minimal possible + * key. + * + * XXX: this key is hard-coded to be a sequence of 0's. + */ + entry += sizeof(lvar_hash_t); + /* now @entry points to <ptr> */ + if (ptrsize == 4) + *(u_int32_t *)entry = cpu_to_le32(1); + else + *(u_int64_t *)entry = cpu_to_le64(1); +} + +static int lvar_esize(int namelen, int recsize) +{ + return (offsetof(struct lvar_leaf_entry, vle_key) + + namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND; +} + +static void lvar_leaf(void *buf, + int blocksize, int keysize, int ptrsize, int recsize) +{ + struct lvar_leaf_header *head; + char *rec; + + /* form leaf */ + head = buf; + *head = (typeof(*head)) { + .vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC), + .vlh_used = cpu_to_le16(sizeof *head + lvar_esize(0, recsize)) + }; + rec = (void *)(head + 1); + rec[offsetof(struct lvar_leaf_entry, vle_key)] = recsize; +} + + +struct iam_uapi_op { + void *iul_key; + void *iul_rec; +}; + +struct iam_uapi_it { + struct iam_uapi_op iui_op; + __u16 iui_state; +}; + +enum iam_ioctl_cmd { + IAM_IOC_INIT = _IOW('i', 1, struct iam_uapi_info), + IAM_IOC_GETINFO = _IOR('i', 2, struct iam_uapi_info), + IAM_IOC_INSERT = _IOR('i', 3, struct iam_uapi_op), + IAM_IOC_LOOKUP = _IOWR('i', 4, struct iam_uapi_op), + IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op), + IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it), + IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it), + IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it), + IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long) +}; + +static unsigned char hex2dec(unsigned char hex) +{ + if (('0' <= hex) && (hex <= '9')) + return hex - '0'; + else if (('a' <= hex) && (hex <= 'f')) + return hex - 'a' + 10; + else if (('A' <= hex) && (hex <= 'F')) + return hex - 'A' + 10; + else + exit(1); +} + +static unsigned char *packdigit(unsigned char *number) +{ + unsigned char *area; + unsigned char *scan; + + area = calloc(strlen(number) / 2 + 2, sizeof(char)); + if (area != NULL) { + for (scan = area; *number; number += 2, scan++) + *scan = (hex2dec(number[0]) << 4) | hex2dec(number[1]); + } + return area; +} + +static char *iam_convert(int size, int need_convert, char *source) +{ + char *ptr; + char *opt; + + if (source == NULL) + return NULL; + + ptr = calloc(size + 1, sizeof(char)); + if (ptr == NULL) + return NULL; + + if (need_convert) { + opt = packdigit(source); + if (opt == NULL) { + free(ptr); + return NULL; + } else { + memcpy(ptr, opt, size + 1); + free(opt); + } + } else { + strncpy(ptr, source, size + 1); + } + + return ptr; +} + +static int iam_doop(int fd, struct iam_uapi_info *ua, int cmd, + int key_need_convert, char *key_buf, + int *keysize, char *save_key, + int rec_need_convert, char *rec_buf, + int *recsize, char *save_rec) +{ + int ret; + char *key; + char *rec; + struct iam_uapi_op op; + + key = iam_convert(ua->iui_keysize, key_need_convert, key_buf); + if (key == NULL) + return -1; + + rec = iam_convert(ua->iui_recsize, rec_need_convert, rec_buf); + if (rec == NULL) { + free(key); + return -1; + } + + op.iul_key = key; + op.iul_rec = rec; + ret = ioctl(fd, cmd, &op); + if (ret == 0) { + if ((keysize != NULL) && (*keysize > 0) && (save_key != NULL)) { + if (*keysize > ua->iui_keysize) + *keysize = ua->iui_keysize; + memcpy(save_key, key, *keysize); + } + if ((recsize != NULL) && (*recsize > 0) && (save_rec != NULL)) { + if (*recsize > ua->iui_recsize) + *recsize = ua->iui_recsize; + memcpy(save_rec, rec, *recsize); + } + } + free(key); + free(rec); + return ret; +} + + +/* + * Creat an iam file, but do NOT open it. + * Return 0 if success, else -1. + */ +int iam_creat(char *filename, enum iam_fmt_t fmt, + int blocksize, int keysize, int recsize, int ptrsize) +{ + int fd; + char *buf; + + if (filename == NULL) { + errno = EINVAL; + return -1; + } + + if ((fmt != FMT_LFIX) && (fmt != FMT_LVAR)) { + errno = EOPNOTSUPP; + return -1; + } + + if (blocksize <= 100) { + errno = EINVAL; + return -1; + } + + if (keysize < 1) { + errno = EINVAL; + return -1; + } + + if (recsize < 0) { + errno = EINVAL; + return -1; + } + + if (ptrsize != 4 && ptrsize != 8) { + errno = EINVAL; + return -1; + } + + if (keysize + recsize + sizeof(struct iam_leaf_head) > blocksize / 3) { + errno = EINVAL; + return -1; + } + + fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0600); + if (fd < 0) { + return -1; + } + + buf = malloc(blocksize); + if (buf == NULL) { + close(fd); + return -1; + } + + memset(buf, 0, blocksize); + if (fmt == FMT_LFIX) + lfix_root(buf, blocksize, keysize, ptrsize, recsize); + else + lvar_root(buf, blocksize, keysize, ptrsize, recsize); + + if (write(fd, buf, blocksize) != blocksize) { + close(fd); + free(buf); + return -1; + } + + memset(buf, 0, blocksize); + if (fmt == FMT_LFIX) + lfix_leaf(buf, blocksize, keysize, ptrsize, recsize); + else + lvar_leaf(buf, blocksize, keysize, ptrsize, recsize); + + if (write(fd, buf, blocksize) != blocksize) { + close(fd); + free(buf); + return -1; + } + + close(fd); + free(buf); + return 0; +} + +/* + * Open an iam file, but do NOT creat it if the file doesn't exist. + * Please use iam_creat for creating the file before use iam_open. + * Return file id (fd) if success, else -1. + */ +int iam_open(char *filename, struct iam_uapi_info *ua) +{ + int fd; + + if (filename == NULL) { + errno = EINVAL; + return -1; + } + + if (ua == NULL) { + errno = EINVAL; + return -1; + } + + fd = open(filename, O_RDONLY); + if (fd < 0) { + return -1; + } + + if (ioctl(fd, IAM_IOC_INIT, ua) != 0) { + close(fd); + return -1; + } + + if (ioctl(fd, IAM_IOC_GETINFO, ua) != 0) { + close(fd); + return -1; + } + + return fd; +} + +/* + * Close file opened by iam_open. + */ +int iam_close(int fd) +{ + return close(fd); +} + +/* + * Please use iam_open before use this function. + */ +int iam_insert(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int rec_need_convert, char *rec_buf) +{ + return iam_doop(fd, ua, IAM_IOC_INSERT, + key_need_convert, key_buf, NULL, NULL, + rec_need_convert, rec_buf, NULL, NULL); +} + +/* + * Please use iam_open before use this function. + */ +int iam_lookup(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int *keysize, char *save_key, + int rec_need_convert, char *rec_buf, + int *recsize, char *save_rec) +{ + return iam_doop(fd, ua, IAM_IOC_LOOKUP, + key_need_convert, key_buf, keysize, save_key, + rec_need_convert, rec_buf, recsize, save_rec); +} + +/* + * Please use iam_open before use this function. + */ +int iam_delete(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int rec_need_convert, char *rec_buf) +{ + return iam_doop(fd, ua, IAM_IOC_DELETE, + key_need_convert, key_buf, NULL, NULL, + rec_need_convert, rec_buf, NULL, NULL); +} + +/* + * Please use iam_open before use this function. + */ +int iam_it_start(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int *keysize, char *save_key, + int rec_need_convert, char *rec_buf, + int *recsize, char *save_rec) +{ + return iam_doop(fd, ua, IAM_IOC_IT_START, + key_need_convert, key_buf, keysize, save_key, + rec_need_convert, rec_buf, recsize, save_rec); +} + +/* + * Please use iam_open before use this function. + */ +int iam_it_next(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int *keysize, char *save_key, + int rec_need_convert, char *rec_buf, + int *recsize, char *save_rec) +{ + return iam_doop(fd, ua, IAM_IOC_IT_NEXT, + key_need_convert, key_buf, keysize, save_key, + rec_need_convert, rec_buf, recsize, save_rec); +} + +/* + * Please use iam_open before use this function. + */ +int iam_it_stop(int fd, struct iam_uapi_info *ua, + int key_need_convert, char *key_buf, + int rec_need_convert, char *rec_buf) +{ + return iam_doop(fd, ua, IAM_IOC_IT_STOP, + key_need_convert, key_buf, NULL, NULL, + rec_need_convert, rec_buf, NULL, NULL); +} + +/* + * Change iam file mode. + */ +int iam_polymorph(char *filename, unsigned long mode) +{ + int fd; + int ret; + + if (filename == NULL) { + errno = EINVAL; + return -1; + } + + fd = open(filename, O_RDONLY); + if (fd < 0) { + return -1; + } + + ret = ioctl(fd, IAM_IOC_POLYMORPH, mode); + close(fd); + return ret; +} diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index efb90e1..89c5555 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -55,7 +55,6 @@ #include <liblustre.h> #include <obd.h> #include <lustre_lib.h> -#include <lustre/liblustreapi.h> #include <obd_lov.h> #include <lustre/liblustreapi.h> @@ -70,8 +69,8 @@ static void err_msg(char *fmt, ...) fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno); } -int llapi_file_create(const char *name, unsigned long stripe_size, - int stripe_offset, int stripe_count, int stripe_pattern) +int llapi_file_create(const char *name, unsigned long stripe_size, int stripe_offset, + int stripe_count, int stripe_pattern) { struct lov_user_md lum = { 0 }; int fd, rc = 0; @@ -86,7 +85,7 @@ int llapi_file_create(const char *name, unsigned long stripe_size, if (fd < 0) { rc = -errno; - err_msg("unable to open '%s'",name); + err_msg("unable to open '%s'", name); return rc; } @@ -182,12 +181,8 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count) /* Get the lov name */ rc = ioctl(fd, OBD_IOC_GETNAME, (void *) lov_name); if (rc) { - if (errno != ENOTTY) { - rc = errno; - err_msg("error: can't get lov name."); - } else { - rc = 0; - } + rc = errno; + err_msg("error: can't get lov name."); return rc; } @@ -202,7 +197,7 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count) } while ((fgets(buf, sizeof(buf), fp) != NULL) && index < *ost_count) { - if (sscanf(buf, "%d: %s", &index, (char *)&uuidp[index].uuid)<2) + if (sscanf(buf, "%d: %s", &index, uuidp[index].uuid) < 2) break; index++; } @@ -252,7 +247,7 @@ static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param) break; if (param->obduuid) { - if (strncmp((char *)param->obduuid->uuid, uuid, + if (strncmp(param->obduuid->uuid, uuid, sizeof(uuid)) == 0) { param->obdindex = index; break; @@ -806,6 +801,28 @@ static int cb_find_init(char *path, DIR *parent, DIR *dir, void *data) The regulat stat is almost of the same speed as some new 'glimpse-size-ioctl'. */ if (!decision && param->lmd->lmd_lmm.lmm_stripe_count) { + if (param->obdindex != OBD_NOT_FOUND) { + /* Check whether the obd is active or not, if it is + * not active, just print the object affected by this + * failed ost + * */ + struct obd_statfs stat_buf; + struct obd_uuid uuid_buf; + + memset(&stat_buf, 0, sizeof(struct obd_statfs)); + memset(&uuid_buf, 0, sizeof(struct obd_uuid)); + ret = llapi_obd_statfs(path, LL_STATFS_LOV, + param->obdindex, &stat_buf, + &uuid_buf); + if (ret) { + if (ret == -ENODATA || ret == -ENODEV + || ret == -EIO) + errno = EIO; + printf("obd_uuid: %s failed %s ", + param->obduuid->uuid, strerror(errno)); + goto print_path; + } + } if (dir) { ret = ioctl(dirfd(dir), IOC_LOV_GETINFO, (void *)param->lmd); @@ -825,6 +842,7 @@ static int cb_find_init(char *path, DIR *parent, DIR *dir, void *data) decision = find_time_check(st, param, 0); } +print_path: if (decision != -1) { printf("%s", path); if (param->zeroend) @@ -1260,3 +1278,68 @@ out: find_param_fini(¶m); return ret; } + +int llapi_getfacl(char *fname, char *cmd) +{ + struct rmtacl_ioctl_data data; + char out[RMTACL_SIZE_MAX] = ""; + int fd, rc; + + data.cmd = cmd; + data.cmd_len = strlen(cmd) + 1; + data.res = out; + data.res_len = sizeof(out); + + fd = open(fname, 0); + if (fd == -1) { + err_msg("open %s failed", fname); + return -1; + } + + rc = ioctl(fd, LL_IOC_GETFACL, &data); + close(fd); + if (errno == EBADE) { + fprintf(stderr, "Please use getfacl directly!\n"); + rc = 1; + } else if (rc) { + err_msg("getfacl %s failed", fname); + } else { + printf("%s", out); + } + + return rc; +} + +int llapi_setfacl(char *fname, char *cmd) +{ + struct rmtacl_ioctl_data data; + char out[RMTACL_SIZE_MAX] = ""; + int fd, rc; + + data.cmd = cmd; + data.cmd_len = strlen(cmd) + 1; + data.res = out; + data.res_len = sizeof(out); + + fd = open(fname, 0); + if (fd == -1) { + err_msg("open %s failed", fname); + return -1; + } + + rc = ioctl(fd, LL_IOC_SETFACL, &data); + close(fd); + if (errno == EBADE) { + fprintf(stderr, "Please use setfacl directly!\n"); + rc = 1; + } else if (errno == EOPNOTSUPP) { + fprintf(stderr, "setfacl: %s: %s\n", fname, strerror(errno)); + rc = 1; + } else if (rc) { + err_msg("setfacl %s failed", fname); + } else { + printf("%s", out); + } + + return rc; +} diff --git a/lustre/utils/llanalyze b/lustre/utils/llanalyze index c794c70..5aa8ffd 100644 --- a/lustre/utils/llanalyze +++ b/lustre/utils/llanalyze @@ -67,7 +67,7 @@ sub extractpid { $line = shift; # print "$_\n"; - if ($line =~ m/\d+:\d+:\d+:\d+\.\d+:\d+:\d+:*$/) { + if ($line =~ /(\d+)\:(\d+)\:(\d+)\:(\d+\.\d+)\:(\d+)\:(\d+)\:.*/) { return $6; } } @@ -76,7 +76,7 @@ sub extracthostpid { $line = shift; # print "$_\n"; - if ($line =~ m/\d+:\d+:\d+:\d+\.\d+:\d+:\d+:\d+:*$/) { + if ($line =~ /(\d+)\:(\d+)\:(\d+)\:(\d+\.\d+)\:(\d+)\:(\d+)\:(\d+)\:.*/) { return $7; } } @@ -271,15 +271,15 @@ print "pid: $pid, dlm_ns: $dlm_ns\n"; print "extract_start: $extract_start\n"; print "extract_end: $extract_end\n"; while (<LOG>) { - if ($extract_start && $extract_start != LLANAYZE_FOUND) { + if ($extract_start && $extract_start ne "LLANAYZE_FOUND") { next if (index($_, $extract_start, 0) == -1); - $extract_start = LLANAYZE_FOUND; + $extract_start = "LLANAYZE_FOUND"; } if ($extract_end) { - next if ($extract_end == LLANAYZE_FOUND); + next if ($extract_end eq "LLANAYZE_FOUND"); if (index($_, $extract_end, 0) != -1) { - $extract_end = LLANAYZE_FOUND; + $extract_end = "LLANAYZE_FOUND"; } } diff --git a/lustre/utils/llog_reader.c b/lustre/utils/llog_reader.c index 117bf64..dec25e4 100644 --- a/lustre/utils/llog_reader.c +++ b/lustre/utils/llog_reader.c @@ -201,7 +201,7 @@ void print_llog_header(struct llog_log_hdr *llog_buf) time_t t; printf("Header size : %u\n", - le32_to_cpu(llog_buf->llh_hdr.lrh_len)); + le32_to_cpu(llog_buf->llh_hdr.lrh_len)); t = le64_to_cpu(llog_buf->llh_timestamp); printf("Time : %s", ctime(&t)); @@ -253,6 +253,7 @@ static void print_setup_cfg(struct lustre_cfg *lcfg) printf("setup "); print_1_cfg(lcfg); } + return; } @@ -313,6 +314,16 @@ void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip) print_1_cfg(lcfg); break; } + case(LCFG_ADD_MDC):{ + printf("modify_mdc_tgts add "); + print_1_cfg(lcfg); + break; + } + case(LCFG_DEL_MDC):{ + printf("modify_mdc_tgts del "); + print_1_cfg(lcfg); + break; + } case(LCFG_MOUNTOPT):{ printf("mount_option "); print_1_cfg(lcfg); @@ -341,6 +352,7 @@ void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip) case(LCFG_MARKER):{ struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1); char createtime[26], canceltime[26] = ""; + if (marker->cm_flags & CM_SKIP) { if (marker->cm_flags & CM_START) { printf("SKIP START "); @@ -402,6 +414,5 @@ void print_records(struct llog_rec_hdr **recs, int rec_number) printf("padding\n"); } else printf("unknown type %x\n", lopt); - } } diff --git a/lustre/utils/lmc b/lustre/utils/lmc new file mode 100755 index 0000000..c05883c --- /dev/null +++ b/lustre/utils/lmc @@ -0,0 +1,1238 @@ +#!/usr/bin/env python +# Copyright (C) 2002 Cluster File Systems, Inc. +# Author: Robert Read <rread@clusterfs.com> + +# This file is part of Lustre, http://www.lustre.org. +# +# Lustre is free software; you can redistribute it and/or +# modify it under the terms of version 2 of the GNU General Public +# License as published by the Free Software Foundation. +# +# Lustre is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Lustre; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +""" +lmc - lustre configuration data manager + + See the man page, or the Lustre Operations Manual, for documentation on lmc. + +""" + +import sys, os, getopt, string, exceptions, re +import xml.dom.minidom + +def printDoc(doc, stream=sys.stdout): + try: + from xml.dom.ext import PrettyPrint + PrettyPrint(doc, stream) + except ImportError: + stream.write(doc.toxml()) + stream.write("\n") + + +PYMOD_DIR = ["/usr/lib/lustre/python", "/usr/lib64/lustre/python"] + +def development_mode(): + base = os.path.dirname(sys.argv[0]) + if os.access(base+"/Makefile.am", os.R_OK): + return 1 + return 0 + +if not development_mode(): + sys.path.extend(PYMOD_DIR) + +import Lustre + +DEFAULT_PORT = 988 +DEFAULT_STRIPE_SZ = 1048576 +DEFAULT_STRIPE_CNT = 1 +DEFAULT_STRIPE_PATTERN = 0 +UUID_MAX_LENGTH = 31 + +def reference(): + print """usage: lmc --add object [object parameters] + +Object creation command summary: + +--add node + --node node_name + --timeout num + --upcall path + --lustre_upcall path + --group_upcall path + --portals_upcall path + --ptldebug debug_level + --subsystem subsystem_name + +--add net + --node node_name + --nid nid + --cluster_id + --nettype tcp|elan|gm|openib|iib|vib|ra|ptl|lnet + --hostaddr ip[/netmask] + --port port + --tcpbuf size + --irq_affinity 0|1 + --router + +--add mds + --node node_name + --mds mds_name + --dev path + --fstype ldiskfs|ext3 + --size size + --nspath + --group_upcall upcall + --journal_size size + --inode_size size + --mdsuuid uuid + --mkfsoptions options + --mountfsoptions options + --quota quotaon=u|g|ug,iunit=,bunit=,itune=,btune= + +--add lov + --lov lov_name + --mds mds_name + --stripe_sz num + --stripe_cnt num + --stripe_pattern num + +--add ost + --node node_name + --ost ost_name + --failout + --failover + --lov lov_name + --dev path + --size size + --fstype ldiskfs|ext3 + --journal_size size + --inode_size size + --osdtype obdecho|obdfilter + --ostuuid uuid + --mkfsoptions options + --mountfsoptions options + --quota quotaon=u|g|ug,iunit=,bunit=,itune=,btune= + +--add mtpt - Mountpoint + --node node_name + --path /mnt/point + --mds mds_name + --ost ost_name OR --lov lov_name + --clientoptions options + +--add route + --node nodename + --router + --gw nid + --gateway_cluster_id nid + --target_cluster_id nid + --lo nid + --hi nid + +--add echo_client + --node nodename +""" + +PARAM = Lustre.Options.PARAM +PARAMLIST = Lustre.Options.PARAMLIST +lmc_options = [ + # lmc input/output options + ('reference', "Print short reference for commands."), + ('verbose,v', "Print system commands as they are run."), + ('merge,m', "Append to the specified config file.", PARAM), + ('output,o', "Write XML configuration into given output file. Overwrite existing content.", PARAM), + ('input,i', "", PARAM), + ('batch', "Used to execute lmc commands in batch mode.", PARAM), + + # commands + ('add', "", PARAM), + + # node options + ('node', "Add a new node in the cluster configuration.", PARAM), + ('timeout', "Set timeout to initiate recovery.", PARAM), + ('upcall', "Set both lustre and portals upcall scripts.", PARAM), + ('lustre_upcall', "Set location of lustre upcall script.", PARAM), + ('group_upcall', "Set location of extended group upcall script.", PARAM), + ('portals_upcall', "Set location of portals upcall script.", PARAM), + ('ptldebug', "Set the portals debug level", PARAM), + ('subsystem', "Specify which Lustre subsystems have debug output recorded in the log", PARAM), + + # network + ('nettype', "Specify the network type. This can be tcp/elan/gm/openib/iib/vib/ra/ptl/lnet.", PARAM), + ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM), + ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT), + ('hostaddr', "Optional argument to specify the host address.", PARAMLIST), + ('cluster_id', "Specify the cluster ID", PARAM, "0"), + ('nonet', "Skip the remote host networking check"), + + # routes + ('route', "Add a new route for the cluster.", PARAM), + ('router', "Optional flag to mark a node as router."), + ('gw', "Specify the nid of the gateway for a route.", PARAM), + ('gateway_cluster_id', "", PARAM, "0"), + ('target_cluster_id', "", PARAM, "0"), + ('lo', "For a range route, this is the low value nid.", PARAM), + ('hi', "For a range route, this is a hi value nid.", PARAM,""), + + # servers: mds and ost + ('mds', "Specify MDS name.", PARAM), + ('ost', "Specify the OST name.", PARAM,""), + ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"), + ('failout', "Disable failover support on OST"), + ('failover', "Enable failover support on OST"), + ('group', "", PARAM), + ('dev', "Path of the device on local system.", PARAM,""), + ('size', "Specify the size of the device if needed.", PARAM,"0"), + ('group_upcall', "Set location of supplementary group upcall.", PARAM,""), + ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"), + ('inode_size', "Specify new inode size for underlying ext3 file system.", PARAM,"0"), + ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"), + ('mkfsoptions', "Optional argument to mkfs.", PARAM, ""), + ('mountfsoptions', "Optional argument to mount fs.", PARAM, ""), + ('ostuuid', "Optional argument to specify OST UUID", PARAM,""), + ('mdsuuid', "Optional argument to specify MDS UUID", PARAM,""), + ('nspath', "Local mount point of server namespace.", PARAM,""), + ('format', ""), + ('quota', """ + quotaon: enable quota, only u|g|ug is supported now. + iunit: the unit for slave to acquire/release inode quota from/to master. + Int type (>0), default value in Lustre is 5000 inodes. + bunit: the unit for slave to acquire/release block quota from/to master. + Mbytes (>0), default value in Lustre is 100(Mbytes). + itune: used to tune the threthold. When inode quota usage reach the threthold, + slave should acquire/release inode quota from/to master. + Int type (100 > btune > 0), default value in Lustre is 50 (percentge). + inode threthold = iunit * itune / 100. + btune: used to tune the threthold. When block quota usage reach the threthold, + slave should acquire/release block quota from/to master. + Int type (100 > btune > 0), default value in Lustre is 50 (percentage). + block threthold = bunit * btune / 100.""", PARAM,""), + # clients: mountpoint and echo + ('echo_client', "", PARAM), + ('path', "Specify the mountpoint for Lustre.", PARAM), + ('filesystem', "Lustre filesystem name", PARAM,""), + ('clientoptions', "Specify the options for Lustre, such as async.", PARAM, ""), + + # lov + ('lov', "Specify LOV name.", PARAM,""), + ('stripe_sz', "Specify the stripe size in bytes.", PARAM, DEFAULT_STRIPE_SZ), + ('stripe_cnt', "Specify the number of OSTs each file should be striped on.", PARAM, DEFAULT_STRIPE_CNT), + ('stripe_pattern', "Specify the stripe pattern. RAID 0 is the only one currently supported.", PARAM, 0), + + # cobd + ('real_obd', "Specify the real device for the cache obd system.", PARAM), + ('cache_obd', "Specify the cache device for the cache obd system.", PARAM), + ] + +def error(*args): + msg = string.join(map(str,args)) + raise OptionError("Error: " + msg) + +def panic(cmd, msg): + print "! " + cmd + print msg + sys.exit(1) + +def warning(*args): + msg = string.join(map(str,args)) + sys.stderr.write("WARNING: %s\n" % (msg)) + +def info(*args): + msg = string.join(map(str,args)) + sys.stderr.write("INFO: %s\n" % (msg)) + +# +# manage names and uuids +# need to initialize this by walking tree to ensure +# no duplicate names or uuids are created. +# this are just place holders for now. +# consider changing this to be like OBD-dev-host +def new_name(base): + ctr = 2 + ret = base + while names.has_key(ret): + ret = "%s_%d" % (base, ctr) + ctr = 1 + ctr + names[ret] = 1 + return ret + +def new_uuid(name): + ctr = 2 + ret = "%s_UUID" % (name) + if len(ret) > UUID_MAX_LENGTH: + ret = ret[-UUID_MAX_LENGTH:] + while uuids.has_key(ret): + ret = "%s_UUID_%d" % (name, ctr) + ctr = 1 + ctr + if len(ret) > UUID_MAX_LENGTH: + ret = ret[-UUID_MAX_LENGTH:] + uuids[ret] = 1 + return ret + + +ldlm_name = 'ldlm' +ldlm_uuid = 'ldlm_UUID' + +def new_lustre(dom): + """Create a new empty lustre document""" + # adding ldlm here is a bit of a hack, but one is enough. + str = """<lustre version="%s"> + <ldlm name="%s" uuid="%s"/> + </lustre>""" % (Lustre.CONFIG_VERSION, ldlm_name, ldlm_uuid) + return dom.parseString(str) + + +names = {} +uuids = {} + +def init_names(doc): + """initialize auto-name generation tables""" + global names, uuids + # get all elements that contain a name attribute + for n in doc.childNodes: + if n.nodeType == n.ELEMENT_NODE: + if getName(n): + names[getName(n)] = 1 + uuids[getUUID(n)] = 1 + init_names(n) + +def get_format_flag(options): + if options.format: + return 'yes' + return 'no' + +############################################################ +# Build config objects using DOM +# +class GenConfig: + doc = None + dom = None + def __init__(self, doc): + self.doc = doc + + def ref(self, type, uuid): + """ generate <[type]_ref uuidref="[uuid]"/> """ + tag = "%s_ref" % (type) + ref = self.doc.createElement(tag) + ref.setAttribute("uuidref", uuid) + return ref + + def newService(self, tag, name, uuid): + """ create a new service elmement, which requires name and uuid attributes """ + new = self.doc.createElement(tag) + new.setAttribute("uuid", uuid); + new.setAttribute("name", name); + return new + + def addText(self, node, str): + txt = self.doc.createTextNode(str) + node.appendChild(txt) + + def addElement(self, node, tag, str=None): + """ create a new element and add it as a child to node. If str is passed, + a text node is created for the new element""" + new = self.doc.createElement(tag) + if str: + self.addText(new, str) + node.appendChild(new) + return new + + def recordtime(self, timestr): + lustre = self.doc.getElementsByTagName("lustre") + lustre[0].setAttribute("mtime", timestr) + + def network(self, name, uuid, nid, cluster_id, net, hostaddr="", + port=0): + """create <network> node""" + network = self.newService("network", name, uuid) + network.setAttribute("nettype", net); + self.addElement(network, "nid", nid) + self.addElement(network, "clusterid", cluster_id) + for host in hostaddr: + self.addElement(network, "hostaddr", host) + if port: + self.addElement(network, "port", "%d" %(port)) + + return network + + def routetbl(self, name, uuid): + """create <routetbl> node""" + rtbl = self.newService("routetbl", name, uuid) + return rtbl + + def route(self, gw_net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi): + """ create one entry for the route table """ + ref = self.doc.createElement('route') + ref.setAttribute("type", gw_net_type) + ref.setAttribute("gw", gw) + ref.setAttribute("gwclusterid", gw_cluster_id) + ref.setAttribute("tgtclusterid", tgt_cluster_id) + ref.setAttribute("lo", lo) + if hi: + ref.setAttribute("hi", hi) + return ref + + def profile(self, name, uuid): + """ create a host """ + profile = self.newService("profile", name, uuid) + return profile + + def node(self, name, uuid, prof_uuid): + """ create a host """ + node = self.newService("node", name, uuid) + node.appendChild(self.ref("profile", prof_uuid)) + return node + + def ldlm(self, name, uuid): + """ create a ldlm """ + ldlm = self.newService("ldlm", name, uuid) + return ldlm + + def osd(self, name, uuid, fstype, osdtype, devname, format, ost_uuid, + node_uuid, dev_size=0, journal_size=0, inode_size=0, nspath="", + mkfsoptions="", mountfsoptions="", quota=""): + osd = self.newService("osd", name, uuid) + osd.setAttribute('osdtype', osdtype) + osd.appendChild(self.ref("target", ost_uuid)) + osd.appendChild(self.ref("node", node_uuid)) + if fstype: + self.addElement(osd, "fstype", fstype) + if devname: + dev = self.addElement(osd, "devpath", devname) + self.addElement(osd, "autoformat", format) + if dev_size: + self.addElement(osd, "devsize", "%s" % (dev_size)) + if journal_size: + self.addElement(osd, "journalsize", "%s" % (journal_size)) + if inode_size: + self.addElement(osd, "inodesize", "%s" % (inode_size)) + if mkfsoptions: + self.addElement(osd, "mkfsoptions", mkfsoptions) + if mountfsoptions: + self.addElement(osd, "mountfsoptions", mountfsoptions) + if quota: + self.addElement(osd, "quota", quota) + if nspath: + self.addElement(osd, "nspath", nspath) + return osd + + def cobd(self, name, uuid, real_uuid, cache_uuid): + cobd = self.newService("cobd", name, uuid) + cobd.appendChild(self.ref("realobd",real_uuid)) + cobd.appendChild(self.ref("cacheobd",cache_uuid)) + return cobd + + def ost(self, name, uuid, osd_uuid, group=""): + ost = self.newService("ost", name, uuid) + ost.appendChild(self.ref("active", osd_uuid)) + if group: + self.addElement(ost, "group", group) + return ost + + def oss(self, name, uuid): + oss = self.newService("oss", name, uuid) + return oss + + def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern): + lov = self.newService("lov", name, uuid) + lov.appendChild(self.ref("mds", mds_uuid)) + lov.setAttribute("stripesize", str(stripe_sz)) + lov.setAttribute("stripecount", str(stripe_cnt)) + lov.setAttribute("stripepattern", str(pattern)) + return lov + + def lovconfig(self, name, uuid, lov_uuid): + lovconfig = self.newService("lovconfig", name, uuid) + lovconfig.appendChild(self.ref("lov", lov_uuid)) + return lovconfig + + def mds(self, name, uuid, mdd_uuid, group=""): + mds = self.newService("mds", name, uuid) + mds.appendChild(self.ref("active",mdd_uuid)) + if group: + self.addElement(mds, "group", group) + return mds + + def mdsdev(self, name, uuid, fstype, devname, format, node_uuid, + mds_uuid, dev_size=0, journal_size=0, inode_size=256, + nspath="", mkfsoptions="", mountfsoptions="", quota="", group_upcall=""): + mdd = self.newService("mdsdev", name, uuid) + self.addElement(mdd, "fstype", fstype) + dev = self.addElement(mdd, "devpath", devname) + self.addElement(mdd, "autoformat", format) + if dev_size: + self.addElement(mdd, "devsize", "%s" % (dev_size)) + if journal_size: + self.addElement(mdd, "journalsize", "%s" % (journal_size)) + if inode_size: + self.addElement(mdd, "inodesize", "%s" % (inode_size)) + if nspath: + self.addElement(mdd, "nspath", nspath) + if mkfsoptions: + self.addElement(mdd, "mkfsoptions", mkfsoptions) + if mountfsoptions: + self.addElement(mdd, "mountfsoptions", mountfsoptions) + if quota: + self.addElement(mdd, "quota", quota) + if group_upcall: + self.addElement(mdd, "group_upcall", group_upcall) + + mdd.appendChild(self.ref("node", node_uuid)) + mdd.appendChild(self.ref("target", mds_uuid)) + return mdd + + def mountpoint(self, name, uuid, fs_uuid, path, clientoptions): + mtpt = self.newService("mountpoint", name, uuid) + mtpt.appendChild(self.ref("filesystem", fs_uuid)) + self.addElement(mtpt, "path", path) + if clientoptions: + self.addElement(mtpt, "clientoptions", clientoptions) + return mtpt + + def filesystem(self, name, uuid, mds_uuid, obd_uuid): + fs = self.newService("filesystem", name, uuid) + fs.appendChild(self.ref("mds", mds_uuid)) + fs.appendChild(self.ref("obd", obd_uuid)) + return fs + + def echo_client(self, name, uuid, osc_uuid): + ec = self.newService("echoclient", name, uuid) + ec.appendChild(self.ref("obd", osc_uuid)) + return ec + +############################################################ +# Utilities to query a DOM tree +# Using this functions we can treat use config information +# directly as a database. +def getName(n): + return n.getAttribute('name') + +def getUUID(node): + return node.getAttribute('uuid') + + +def findByName(lustre, name, tag = ""): + for n in lustre.childNodes: + if n.nodeType == n.ELEMENT_NODE: + if tag and n.nodeName != tag: + continue + if getName(n) == name: + return n + else: + n = findByName(n, name) + if n: return n + return None + + +def lookup(node, uuid): + for n in node.childNodes: + if n.nodeType == n.ELEMENT_NODE: + if getUUID(n) == uuid: + return n + else: + n = lookup(n, uuid) + if n: return n + return None + + +def name2uuid(lustre, name, tag="", fatal=1): + ret = findByName(lustre, name, tag) + if not ret: + if fatal: + error('name2uuid:', '"'+name+'"', tag, 'element not found.') + else: + return "" + return getUUID(ret) + +def lookup_filesystem(lustre, mds_uuid, ost_uuid): + for n in lustre.childNodes: + if n.nodeType == n.ELEMENT_NODE and n.nodeName == 'filesystem': + if ref_exists(n, mds_uuid) and ref_exists(n, ost_uuid): + return getUUID(n) + return None + +# XXX: assumes only one network element per node. will fix this +# as soon as support for routers is added +def get_net_uuid(lustre, node_name): + """ get a network uuid for a node_name """ + node = findByName(lustre, node_name, "node") + if not node: + error ('get_net_uuid:', '"'+node_name+'"', "node element not found.") + net = node.getElementsByTagName('network') + if net: + return getUUID(net[0]) + return None + + +def lov_add_obd(gen, lov, osc_uuid): + lov.appendChild(gen.ref("obd", osc_uuid)) + +def ref_exists(profile, uuid): + elist = profile.childNodes + for e in elist: + if e.nodeType == e.ELEMENT_NODE: + ref = e.getAttribute('uuidref') + if ref == uuid: + return 1 + return 0 + +# ensure that uuid is not already in the profile +# return true if uuid is added +def node_add_profile(gen, node, ref, uuid): + refname = "%s_ref" % "profile" + ret = node.getElementsByTagName(refname) + if not ret: + error('node has no profile ref:', node) + prof_uuid = ret[0].getAttribute('uuidref') + profile = lookup(node.parentNode, prof_uuid) + if not profile: + error("no profile found:", prof_uuid) + if ref_exists(profile, uuid): + return 0 + profile.appendChild(gen.ref(ref, uuid)) + return 1 + +def get_attr(dom_node, attr, default=""): + v = dom_node.getAttribute(attr) + if v: + return v + return default + +############################################################ +# Top level commands +# +def runcmd(cmd): + f = os.popen(cmd) + ret = f.close() + if ret: + ret = ret >> 8 + else: + ret = 0 + return ret + +def set_node_options(gen, node, options): + if options.router: + node.setAttribute('router', '1') + if options.timeout: + gen.addElement(node, "timeout", get_option(options, 'timeout')) + if options.upcall: + default_upcall = get_option(options, 'upcall') + else: + default_upcall = '' + if default_upcall or options.lustre_upcall: + if options.lustre_upcall: + gen.addElement(node, 'lustreUpcall', options.lustre_upcall) + else: + gen.addElement(node, 'lustreUpcall', default_upcall) + if options.group_upcall: + gen.addElement(node, 'groupUpcall', options.group_upcall) + if default_upcall or options.portals_upcall: + if options.portals_upcall: + gen.addElement(node, 'portalsUpcall', options.portals_upcall) + else: + gen.addElement(node, 'portalsUpcall', default_upcall) + if options.ptldebug: + gen.addElement(node, "ptldebug", get_option(options, 'ptldebug')) + if options.subsystem: + gen.addElement(node, "subsystem", get_option(options, 'subsystem')) + return node + +def do_add_node(gen, lustre, options, node_name): + uuid = new_uuid(node_name) + prof_name = new_name("PROFILE_" + node_name) + prof_uuid = new_uuid(prof_name) + profile = gen.profile(prof_name, prof_uuid) + node = gen.node(node_name, uuid, prof_uuid) + lustre.appendChild(node) + lustre.appendChild(profile) + + node_add_profile(gen, node, 'ldlm', ldlm_uuid) + set_node_options(gen, node, options) + return node + + +def add_node(gen, lustre, options): + """ create a node with a network config """ + + node_name = get_option(options, 'node') + ret = findByName(lustre, node_name, "node") + if ret: + print "Node:", node_name, "exists." + return + do_add_node(gen, lustre, options, node_name) + + +def add_net(gen, lustre, options): + """ create a node with a network config """ + + node_name = get_option(options, 'node') + nid = get_option(options, 'nid') + cluster_id = get_option(options, 'cluster_id') + hostaddr = get_option(options, 'hostaddr') + net_type = get_option(options, 'nettype') + + if net_type in ('lnet','tcp','openib','ra'): + port = get_option_int(options, 'port') + elif net_type in ('elan','gm','iib','vib','lo','ptl'): + port = 0 + else: + print "Unknown net_type: ", net_type + sys.exit(2) + + real_net_type = net_type + if net_type == 'lnet' and string.find(nid,'@') > 0: + real_net_type = string.split(nid,'@')[1] + + # testing network + if options.nonet: + if options.verbose: + print "Skipping the remote host networking test." + elif (real_net_type == 'tcp') and (nid != '*'): + if options.verbose: + print "Testing network on", node_name + target = string.split(nid,'@')[0] + if target != '*' and target != '\\*': + out = runcmd("ping -c 1 -w 5 %s" %target) + if out != 0: + print "Could not connect to %s, please check network." % node_name + + ret = findByName(lustre, node_name, "node") + if not ret: + node = do_add_node(gen, lustre, options, node_name) + else: + node = ret + set_node_options(gen, node, options) + + net_name = new_name('NET_'+ node_name +'_'+ net_type) + net_uuid = new_uuid(net_name) + node.appendChild(gen.network(net_name, net_uuid, nid, cluster_id, net_type, + hostaddr, port)) + node_add_profile(gen, node, "network", net_uuid) + + +def add_route(gen, lustre, options): + """ create a node with a network config """ + + node_name = get_option(options, 'node') + gw_net_type = get_option(options, 'nettype') + gw = get_option(options, 'gw') + gw_cluster_id = get_option(options, 'gateway_cluster_id') + tgt_cluster_id = get_option(options, 'target_cluster_id') + lo = get_option(options, 'lo') + hi = get_option(options, 'hi') + if not hi: + hi = lo + + node = findByName(lustre, node_name, "node") + if not node: + error (node_name, " not found.") + + rlist = node.getElementsByTagName('routetbl') + if len(rlist) > 0: + rtbl = rlist[0] + else: + rtbl_name = new_name("RTBL_" + node_name) + rtbl_uuid = new_uuid(rtbl_name) + rtbl = gen.routetbl(rtbl_name, rtbl_uuid) + node.appendChild(rtbl) + node_add_profile(gen, node, "routetbl", rtbl_uuid) + rtbl.appendChild(gen.route(gw_net_type, gw, gw_cluster_id, tgt_cluster_id, + lo, hi)) + + +def add_mds(gen, lustre, options): + node_name = get_option(options, 'node') + mds_name = get_option(options, 'mds') + mdd_name = new_name("MDD_" + mds_name +"_" + node_name) + mdd_uuid = new_uuid(mdd_name) + + mds_uuid = name2uuid(lustre, mds_name, 'mds', fatal=0) + if not mds_uuid: + mds_uuid = get_option(options, 'mdsuuid') + if mds_uuid: + if lookup(lustre, mds_uuid): + error("Duplicate MDS UUID:", mds_uuid) + else: + mds_uuid = new_uuid(mds_name) + + mds = gen.mds(mds_name, mds_uuid, mdd_uuid, options.group) + lustre.appendChild(mds) + else: + mds = lookup(lustre, mds_uuid) + if options.failover: + mds.setAttribute('failover', "1") + if options.failout: + mds.setAttribute('failover,',"0") + + devname = get_option(options, 'dev') + size = get_option(options, 'size') + fstype = get_option(options, 'fstype') + journal_size = get_option(options, 'journal_size') + inode_size = get_option(options, 'inode_size') + nspath = get_option(options, 'nspath') + mkfsoptions = get_option(options, 'mkfsoptions') + mountfsoptions = get_option(options, 'mountfsoptions') + quota = get_option(options, 'quota') + group_upcall = get_option(options, 'group_upcall') + + node_uuid = name2uuid(lustre, node_name, 'node') + + node = findByName(lustre, node_name, "node") + node_add_profile(gen, node, "mdsdev", mdd_uuid) + net_uuid = get_net_uuid(lustre, node_name) + if not net_uuid: + error("NODE: ", node_name, "not found") + + mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, + get_format_flag(options), node_uuid, mds_uuid, + size, journal_size, inode_size, nspath, mkfsoptions, + mountfsoptions, quota, group_upcall) + lustre.appendChild(mdd) + + +def add_ost(gen, lustre, options): + node_name = get_option(options, 'node') + lovname = get_option(options, 'lov') + osdtype = get_option(options, 'osdtype') + + node_uuid = name2uuid(lustre, node_name, 'node') + + if osdtype == 'obdecho': + fstype = '' + devname = '' + size = 0 + fstype = '' + journal_size = '' + inode_size = '' + mkfsoptions = '' + mountfsoptions = '' + quota = '' + else: + devname = get_option(options, 'dev') # can be unset for bluearcs + size = get_option(options, 'size') + fstype = get_option(options, 'fstype') + journal_size = get_option(options, 'journal_size') + inode_size = get_option(options, 'inode_size') + mkfsoptions = get_option(options, 'mkfsoptions') + mountfsoptions = get_option(options, 'mountfsoptions') + quota = get_option(options, 'quota') + + nspath = get_option(options, 'nspath') + + ostname = get_option(options, 'ost') + if not ostname: + ostname = new_name('OST_'+ node_name) + + osdname = new_name("OSD_" + ostname + "_" + node_name) + osd_uuid = new_uuid(osdname) + + ost_uuid = name2uuid(lustre, ostname, 'ost', fatal=0) + if not ost_uuid: + ost_uuid = get_option(options, 'ostuuid') + if ost_uuid: + if lookup(lustre, ost_uuid): + error("Duplicate OST UUID:", ost_uuid) + else: + ost_uuid = new_uuid(ostname) + + ost = gen.ost(ostname, ost_uuid, osd_uuid, options.group) + lustre.appendChild(ost) + if lovname: + lov = findByName(lustre, lovname, "lov") + if not lov: + error('add_ost:', '"'+lovname+'"', "lov element not found.") + lov_add_obd(gen, lov, ost_uuid) + else: + ost = lookup(lustre, ost_uuid) + + if options.failover: + ost.setAttribute('failover', "1") + if options.failout: + ost.setAttribute('failover', "0") + + + osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, + get_format_flag(options), ost_uuid, node_uuid, size, + journal_size, inode_size, nspath, mkfsoptions, + mountfsoptions, quota) + + node = findByName(lustre, node_name, "node") + +## if node_add_profile(gen, node, 'oss', oss_uuid): +## ossname = 'OSS' +## oss_uuid = new_uuid(ossname) +## oss = gen.oss(ossname, oss_uuid) +## lustre.appendChild(oss) + + node_add_profile(gen, node, 'osd', osd_uuid) + lustre.appendChild(osd) + + +def add_cobd(gen, lustre, options): + node_name = get_option(options, 'node') + name = new_name('COBD_' + node_name) + uuid = new_uuid(name) + + real_name = get_option(options, 'real_obd') + cache_name = get_option(options, 'cache_obd') + + real_uuid = name2uuid(lustre, real_name, tag='obd') + cache_uuid = name2uuid(lustre, cache_name, tag='obd') + + node = findByName(lustre, node_name, "node") + node_add_profile(gen, node, "cobd", uuid) + cobd = gen.cobd(name, uuid, real_uuid, cache_uuid) + lustre.appendChild(cobd) + + +def add_echo_client(gen, lustre, options): + """ add an echo client to the profile for this node. """ + node_name = get_option(options, 'node') + lov_name = get_option(options, 'ost') + + node = findByName(lustre, node_name, 'node') + + echoname = new_name('ECHO_'+ node_name) + echo_uuid = new_uuid(echoname) + node_add_profile(gen, node, 'echoclient', echo_uuid) + + lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0) + if not lov_uuid: + lov_uuid = name2uuid(lustre, lov_name, tag='ost', fatal=1) + + echo = gen.echo_client(echoname, echo_uuid, lov_uuid) + lustre.appendChild(echo) + + +def add_lov(gen, lustre, options): + """ create a lov """ + + lov_orig = get_option(options, 'lov') + name = new_name(lov_orig) + if name != lov_orig: + warning("name:", lov_orig, "already used. using:", name) + + mds_name = get_option(options, 'mds') + stripe_sz = get_option_int(options, 'stripe_sz') + stripe_cnt = get_option_int(options, 'stripe_cnt') + if stripe_cnt == 0: + info("default stripe count (0) - will use %d stripe(s) per file" \ + % DEFAULT_STRIPE_CNT) + pattern = get_option_int(options, 'stripe_pattern') + uuid = new_uuid(name) + + ret = findByName(lustre, name, "lov") + if ret: + error("LOV: ", name, " already exists.") + + mds_uuid = name2uuid(lustre, mds_name, 'mds') + lov = gen.lov(name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern) + lustre.appendChild(lov) + + # add an lovconfig entry to the active mdsdev profile + lovconfig_name = new_name('LVCFG_' + name) + lovconfig_uuid = new_uuid(lovconfig_name) + mds = findByName(lustre, mds_name, "mds") + mds.appendChild(gen.ref("lovconfig", lovconfig_uuid)) + lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid) + lustre.appendChild(lovconfig) + +def add_default_lov(gen, lustre, mds_name, lov_name): + """ create a default lov """ + + stripe_sz = DEFAULT_STRIPE_SZ + stripe_cnt = DEFAULT_STRIPE_CNT + pattern = DEFAULT_STRIPE_PATTERN + uuid = new_uuid(lov_name) + + ret = findByName(lustre, lov_name, "lov") + if ret: + error("LOV: ", lov_name, " already exists.") + + mds_uuid = name2uuid(lustre, mds_name, 'mds') + lov = gen.lov(lov_name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern) + lustre.appendChild(lov) + + # add an lovconfig entry to the active mdsdev profile + lovconfig_name = new_name('LVCFG_' + lov_name) + lovconfig_uuid = new_uuid(lovconfig_name) + mds = findByName(lustre, mds_name) + mds.appendChild(gen.ref("lovconfig", lovconfig_uuid)) + lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid) + lustre.appendChild(lovconfig) + +def new_filesystem(gen, lustre, mds_uuid, obd_uuid): + fs_name = new_name("FS_fsname") + fs_uuid = new_uuid(fs_name) + mds = lookup(lustre, mds_uuid) + mds.appendChild(gen.ref("filesystem", fs_uuid)) + fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid) + lustre.appendChild(fs) + return fs_uuid + +def get_fs_uuid(gen, lustre, mds_name, obd_name): + mds_uuid = name2uuid(lustre, mds_name, tag='mds') + obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0) + fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid) + if not fs_uuid: + fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid) + return fs_uuid + +def add_mtpt(gen, lustre, options): + """ create mtpt on a node """ + node_name = get_option(options, 'node') + + path = get_option(options, 'path') + clientoptions = get_option(options, "clientoptions") + fs_name = get_option(options, 'filesystem') + + lov_name = get_option(options, 'lov') + ost_name = get_option(options, 'ost') + mds_name = get_option(options, 'mds') + if lov_name == '': + if ost_name == '': + error("--add mtpt requires --lov lov_name or --ost ost_name") + else: + warning("use default value for lov, due no --lov lov_name provided") + lov_name = new_name("lov_default") + add_default_lov(gen, lustre, mds_name, lov_name) + ost_uuid = name2uuid(lustre, ost_name, 'ost', fatal=0) + if not ost_uuid: + error('add_mtpt:', '"'+ost_name+'"', "ost element not found.") + lov = findByName(lustre, lov_name, "lov") + lov_add_obd(gen, lov, ost_uuid) + + if fs_name == '': + fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name) + else: + fs_uuid = name2uuid(lustre, fs_name, tag='filesystem') + + name = new_name('MNT_'+ node_name) + + ret = findByName(lustre, name, "mountpoint") + if ret: + # this can't happen, because new_name creates unique names + error("MOUNTPOINT: ", name, " already exists.") + + uuid = new_uuid(name) + mtpt = gen.mountpoint(name, uuid, fs_uuid, path, clientoptions) + node = findByName(lustre, node_name, "node") + if not node: + error('node:', node_name, "not found.") + node_add_profile(gen, node, "mountpoint", uuid) + lustre.appendChild(mtpt) + +############################################################ +# Command line processing +# +class OptionError (exceptions.Exception): + def __init__(self, args): + self.args = args + +def get_option(options, tag): + """Look for tag in options hash and return the value if set. If not + set, then if return default it is set, otherwise exception.""" + if options.__getattr__(tag) != None: + return options.__getattr__(tag) + else: + raise OptionError("--add %s requires --%s <value>" % (options.add, tag)) + +def get_option_int(options, tag): + """Return an integer option. Raise exception if the value is not an int""" + val = get_option(options, tag) + try: + n = int(val) + except ValueError: + raise OptionError("--%s <num> (value must be integer)" % (tag)) + return n + +# simple class for profiling +import time +class chrono: + def __init__(self): + self._start = 0 + def start(self): + self._stop = 0 + self._start = time.time() + def stop(self, msg=''): + self._stop = time.time() + if msg: + self.display(msg) + def dur(self): + return self._stop - self._start + def display(self, msg): + d = self.dur() + str = '%s: %g secs' % (msg, d) + print str + +################################################################# +# function cmdlinesplit used to split cmd line from batch file +# +def cmdlinesplit(cmdline): + + double_quote = re.compile(r'"(([^"\\]|\\.)*)"') + single_quote = re.compile(r"'(.*?)'") + escaped = re.compile(r'\\(.)') + esc_quote = re.compile(r'\\([\\"])') + outside = re.compile(r"""([^\s\\'"]+)""") + + arg_list = [] + i = 0; arg = None + while i < len(cmdline): + c = cmdline[i] + if c == '"': + match = double_quote.match(cmdline, i) + if not match: + print "Unmatched double quote:", cmdline + sys.exit(1) + i = match.end() + if arg is None: arg = esc_quote.sub(r'\1', match.group(1)) + else: arg = arg + esc_quote.sub(r'\1', match.group(1)) + + elif c == "'": + match = single_quote.match(cmdline, i) + if not match: + print "Unmatched single quote:", cmdline + sys.exit(1) + i = match.end() + if arg is None: arg = match.group(1) + else: arg = arg + match.group(1) + + elif c == "\\": + match = escaped.match(cmdline, i) + if not match: + print "Unmatched backslash", cmdline + sys.exit(1) + i = match.end() + if arg is None: arg = match.group(1) + else: arg = arg + match.group(1) + + elif c in string.whitespace: + if arg != None: + arg_list.append(str(arg)) + arg = None + while i < len(cmdline) and cmdline[i] in string.whitespace: + i = i + 1 + else: + match = outside.match(cmdline, i) + assert match + i = match.end() + if arg is None: arg = match.group() + else: arg = arg + match.group() + + if arg != None: arg_list.append(str(arg)) + + return arg_list + +############################################################ +# Main +# + +def add(devtype, gen, lustre, options): + if devtype == 'net': + add_net(gen, lustre, options) + elif devtype == 'mtpt': + add_mtpt(gen, lustre, options) + elif devtype == 'mds': + add_mds(gen, lustre, options) + elif devtype == 'ost': + add_ost(gen, lustre, options) + elif devtype == 'lov': + add_lov(gen, lustre, options) + elif devtype == 'route': + add_route(gen, lustre, options) + elif devtype == 'node': + add_node(gen, lustre, options) + elif devtype == 'echo_client': + add_echo_client(gen, lustre, options) + elif devtype == 'cobd': + add_cobd(gen, lustre, options) + else: + error("unknown device type:", devtype) + +def do_command(gen, lustre, options, args): + if options.add: + add(options.add, gen, lustre, options) + else: + error("Missing command") + +def main(): + cl = Lustre.Options("lmc", "", lmc_options) + try: + options, args = cl.parse(sys.argv[1:]) + except Lustre.OptionError, e: + panic("lmc", e) + + if len(args) > 0: + panic(string.join(sys.argv), "Unexpected extra arguments on command line: " + string.join(args)) + + if options.reference: + reference() + sys.exit(0) + + outFile = '-' + + if options.merge: + outFile = options.merge + if os.access(outFile, os.R_OK): + doc = xml.dom.minidom.parse(outFile) + else: + doc = new_lustre(xml.dom.minidom) + elif options.input: + doc = xml.dom.minidom.parse(options.input) + else: + doc = new_lustre(xml.dom.minidom) + + if options.output: + outFile = options.output + + lustre = doc.documentElement + init_names(lustre) + if lustre.tagName != "lustre": + print "Existing config not valid." + sys.exit(1) + + gen = GenConfig(doc) + + if options.batch: + fp = open(options.batch) + batchCommands = fp.readlines() + fp.close() + for cmd in batchCommands: + try: + options, args = cl.parse(cmdlinesplit(cmd)) + if options.merge or options.input or options.output: + print "The batchfile should not contain --merge, --input or --output." + sys.exit(1) + do_command(gen, lustre, options, args) + except OptionError, e: + panic(cmd, e) + except Lustre.OptionError, e: + panic(cmd, e) + else: + try: + do_command(gen, lustre, options, args) + except OptionError, e: + panic(string.join(sys.argv),e) + except Lustre.OptionError, e: + panic("lmc", e) + + #record timestamp + timestr = string.split(str(time.time()), '.') + gen.recordtime(timestr[0]) + + if outFile == '-': + printDoc(doc) + else: + printDoc(doc, open(outFile,"w")) + +if __name__ == "__main__": + main() diff --git a/lustre/utils/loadgen.c b/lustre/utils/loadgen.c index 4ba3dcd..88c8b02 100644 --- a/lustre/utils/loadgen.c +++ b/lustre/utils/loadgen.c @@ -248,7 +248,7 @@ static int write_proc(char *proc_path, char *value) return rc; } -static int read_proc(char *proc_path, unsigned long long *value) +static int read_proc(char *proc_path, unsigned long long *value) { int fd, rc; char buf[50]; @@ -461,6 +461,7 @@ static int obj_create(struct kid_t *kid) data.ioc_dev = kid->k_dev; data.ioc_obdo1.o_mode = 0100644; data.ioc_obdo1.o_id = 0; + data.ioc_obdo1.o_gr = 2; data.ioc_obdo1.o_uid = 0; data.ioc_obdo1.o_gid = 0; data.ioc_obdo1.o_valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | diff --git a/lustre/utils/loadmod_all.sh b/lustre/utils/loadmod_all.sh new file mode 100755 index 0000000..9238c79 --- /dev/null +++ b/lustre/utils/loadmod_all.sh @@ -0,0 +1,47 @@ +#!/bin/sh + +dmesg -c >/dev/null +dmesg -n 8 + + +modprobe mds +modprobe osd +modprobe obdfilter +modprobe ost +modprobe mgs +modprobe lov +modprobe ptlrpc +modprobe obdecho +modprobe lustre +modprobe mgc +modprobe ldiskfs +modprobe osc +modprobe mdt +modprobe lquota +modprobe cmm +modprobe mdc +modprobe fsfilt_ldiskfs +modprobe lvfs +modprobe obdclass +modprobe mdd +modprobe fld +modprobe fid +modprobe lmv +modprobe libcfs +modprobe pingcli +modprobe spingsrv +modprobe pingsrv +modprobe spingcli +modprobe lnet +modprobe ksocklnd + + +# To generate gdb debug file: +rm -f /r/tmp/ogdb-`hostname` +./lctl modules > /r/tmp/ogdb-`hostname` + + +HOST=`hostname` +echo -1 >/proc/sys/lnet/debug +echo "/r/tmp/$HOST.debug" >/proc/sys/lnet/daemon_file + diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index cc29553..b938ed8 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -40,7 +40,7 @@ #include <lustre_dlm.h> #include <obd.h> /* for struct lov_stripe_md */ #include <obd_lov.h> -#include <linux/lustre_build_version.h> +#include <lustre/lustre_build_version.h> #include <unistd.h> #include <sys/un.h> @@ -489,7 +489,6 @@ int jt_lcfg_mgsparam(int argc, char **argv) return CMD_HELP; lustre_cfg_bufs_reset(&bufs, NULL); - for (i = 1; i < argc; i++) { lustre_cfg_bufs_set_string(&bufs, i, argv[i]); } diff --git a/lustre/utils/mds-failover-sample b/lustre/utils/mds-failover-sample new file mode 100755 index 0000000..f6269f4 --- /dev/null +++ b/lustre/utils/mds-failover-sample @@ -0,0 +1,20 @@ +#!/bin/sh + +MDS=NET_mds_tcp_UUID +MDSHOST=mds + +/r/src/lustre/utils/lctl <<EOF +network tcp +close_uuid $MDS +del_uuid $MDS +connect $MDSHOST 988 +add_uuid $MDS $MDSHOST +quit +EOF + +/r/src/lustre/utils/lctl <<EOF +device \$RPCDEV +probe +newconn $MDS +quit +EOF diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 222a7be..f770da5 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -64,7 +64,7 @@ struct mkfs_opts { char mo_loopdev[128]; /* in case a loop dev is needed */ __u64 mo_device_sz; /* in KB */ int mo_stripe_count; - int mo_flags; + int mo_flags; int mo_mgs_failnodes; }; @@ -77,7 +77,7 @@ void usage(FILE *out) { fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname); fprintf(out, "usage: %s <target types> [options] <device>\n", progname); - fprintf(out, + fprintf(out, "\t<device>:block device or file (e.g /dev/sda or /tmp/ost1)\n" "\ttarget types:\n" "\t\t--ost: object storage, mutually exclusive with mdt,mgs\n" @@ -144,7 +144,7 @@ char *strscpy(char *dst, char *src, int buflen) { return strscat(dst, src, buflen); } -inline unsigned int +inline unsigned int dev_major (unsigned long long int __dev) { return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff); @@ -165,16 +165,16 @@ int get_os_version() char release[4] = ""; fd = open("/proc/sys/kernel/osrelease", O_RDONLY); - if (fd < 0) + if (fd < 0) fprintf(stderr, "%s: Warning: Can't resolve kernel " "version, assuming 2.6\n", progname); else { read(fd, release, 4); close(fd); } - if (strncmp(release, "2.4.", 4) == 0) + if (strncmp(release, "2.4.", 4) == 0) version = 24; - else + else version = 26; } return version; @@ -184,7 +184,7 @@ int run_command(char *cmd, int cmdsz) { char log[] = "/tmp/mkfs_logXXXXXX"; int fd = -1, rc; - + if ((cmdsz - strlen(cmd)) < 6) { fatal(); fprintf(stderr, "Command buffer overflow: %.*s...\n", @@ -216,10 +216,10 @@ int run_command(char *cmd, int cmdsz) fclose(fp); } } - if (fd >= 0) + if (fd >= 0) remove(log); return rc; -} +} static int check_mtab_entry(char *spec) { @@ -251,14 +251,14 @@ int loop_setup(struct mkfs_opts *mop) { char loop_base[20]; char l_device[64]; - int i,ret = 0; + int i, ret = 0; /* Figure out the loop device names */ - if (!access("/dev/loop0", F_OK | R_OK)) + if (!access("/dev/loop0", F_OK | R_OK)) { strcpy(loop_base, "/dev/loop\0"); - else if (!access("/dev/loop/0", F_OK | R_OK)) + } else if (!access("/dev/loop/0", F_OK | R_OK)) { strcpy(loop_base, "/dev/loop/\0"); - else { + } else { fprintf(stderr, "%s: can't access loop devices\n", progname); return EACCES; } @@ -268,11 +268,11 @@ int loop_setup(struct mkfs_opts *mop) char cmd[PATH_MAX]; int cmdsz = sizeof(cmd); sprintf(l_device, "%s%d", loop_base, i); - if (access(l_device, F_OK | R_OK)) + if (access(l_device, F_OK | R_OK)) break; snprintf(cmd, cmdsz, "losetup %s > /dev/null 2>&1", l_device); ret = system(cmd); - + /* losetup gets 1 (ret=256) for non-set-up device */ if (ret) { /* Set up a loopback device to our file */ @@ -284,15 +284,15 @@ int loop_setup(struct mkfs_opts *mop) progname, ret, strerror(ret)); return ret; } - strscpy(mop->mo_loopdev, l_device, + strscpy(mop->mo_loopdev, l_device, sizeof(mop->mo_loopdev)); return ret; } } - + fprintf(stderr, "%s: out of loop devices!\n", progname); return EMFILE; -} +} int loop_cleanup(struct mkfs_opts *mop) { @@ -312,7 +312,7 @@ int is_block(char* devname) int ret = 0; ret = access(devname, F_OK); - if (ret != 0) + if (ret != 0) return 0; ret = stat(devname, &st); if (ret != 0) { @@ -322,14 +322,14 @@ int is_block(char* devname) return S_ISBLK(st.st_mode); } -__u64 get_device_size(char* device) +__u64 get_device_size(char* device) { int ret, fd; __u64 size = 0; fd = open(device, O_RDONLY); if (fd < 0) { - fprintf(stderr, "%s: cannot open %s: %s\n", + fprintf(stderr, "%s: cannot open %s: %s\n", progname, device, strerror(errno)); return 0; } @@ -342,16 +342,16 @@ __u64 get_device_size(char* device) __u32 lsize = 0; /* size in blocks */ ret = ioctl(fd, BLKGETSIZE, (void*)&lsize); - size = (__u64)lsize * 512; + size = (__u64)lsize * 512; } #endif close(fd); if (ret < 0) { - fprintf(stderr, "%s: size ioctl failed: %s\n", + fprintf(stderr, "%s: size ioctl failed: %s\n", progname, strerror(errno)); return 0; } - + vprint("device size = "LPU64"MB\n", size >> 20); /* return value in KB */ return size >> 10; @@ -360,7 +360,7 @@ __u64 get_device_size(char* device) int loop_format(struct mkfs_opts *mop) { int ret = 0; - + if (mop->mo_device_sz == 0) { fatal(); fprintf(stderr, "loop device requires a --device-size= " @@ -369,10 +369,18 @@ int loop_format(struct mkfs_opts *mop) } ret = creat(mop->mo_device, S_IRUSR|S_IWUSR); + if (ret < 0) { + ret = errno; + fprintf(stderr, "%s: Unable to create backing store: %d\n", + progname, ret); + } else { + close(ret); + } + ret = truncate(mop->mo_device, mop->mo_device_sz * 1024); if (ret != 0) { ret = errno; - fprintf(stderr, "%s: Unable to create backing store: %d\n", + fprintf(stderr, "%s: Unable to truncate backing store: %d\n", progname, ret); } @@ -388,7 +396,7 @@ static int file_in_dev(char *file_name, char *dev_name) int i; /* Construct debugfs command line. */ - snprintf(debugfs_cmd, sizeof(debugfs_cmd), + snprintf(debugfs_cmd, sizeof(debugfs_cmd), "debugfs -c -R 'stat %s' %s 2>&1 | egrep '(Inode|unsupported)'", file_name, dev_name); @@ -409,7 +417,7 @@ static int file_in_dev(char *file_name, char *dev_name) if (strstr(debugfs_cmd, "unsupported feature")) { fprintf(stderr, "In all likelihood, the " "'unsupported feature' is 'extents', which " - "older debugfs does not understand.\n" + "older debugfs does not understand.\n" "Use e2fsprogs-1.38-cfs1 or later, available " "from ftp://ftp.lustre.org/pub/lustre/other/" "e2fsprogs/\n"); @@ -424,19 +432,20 @@ static int file_in_dev(char *file_name, char *dev_name) static int is_lustre_target(struct mkfs_opts *mop) { int rc; + vprint("checking for existing Lustre data: "); - + if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))) { - vprint("found %s\n", + vprint("found %s\n", (rc == 1) ? MOUNT_DATA_FILE : "extents"); /* in the -1 case, 'extents' means this really IS a lustre target */ return rc; } - - if ((rc = file_in_dev(LAST_RCVD, mop->mo_device))) { + + if ((rc = file_in_dev(LAST_RCVD, mop->mo_device))) { vprint("found %s\n", LAST_RCVD); - return rc; + return rc; } vprint("not found\n"); @@ -455,7 +464,7 @@ int make_lustre_backfs(struct mkfs_opts *mop) if (mop->mo_device_sz != 0) { if (mop->mo_device_sz < 8096){ fprintf(stderr, "%s: size of filesystem must be larger " - "than 8MB, but is set to %lluKB\n", + "than 8MB, but is set to %lldKB\n", progname, (long long)mop->mo_device_sz); return EINVAL; } @@ -480,38 +489,44 @@ int make_lustre_backfs(struct mkfs_opts *mop) long journal_sz = 0, max_sz; if (device_sz > 1024 * 1024) /* 1GB */ journal_sz = (device_sz / 102400) * 4; + /* cap journal size at 1GB */ + if (journal_sz > 1024L) + journal_sz = 1024L; /* man mkfs.ext3 */ - max_sz = (102400 * L_BLOCK_SIZE) >> 20; /* 400MB */ + max_sz = (256000 * L_BLOCK_SIZE) >> 20; /* 1GB */ if (journal_sz > max_sz) journal_sz = max_sz; if (journal_sz) { sprintf(buf, " -J size=%ld", journal_sz); - strscat(mop->mo_mkfsopts, buf, + strscat(mop->mo_mkfsopts, buf, sizeof(mop->mo_mkfsopts)); } } - /* bytes_per_inode: disk size / num inodes */ + /* Bytes_per_inode: disk size / num inodes */ if (strstr(mop->mo_mkfsopts, "-i") == NULL) { long bytes_per_inode = 0; - - if (IS_MDT(&mop->mo_ldd)) + + if (IS_MDT(&mop->mo_ldd)) bytes_per_inode = 4096; /* Allocate fewer inodes on large OST devices. Most - filesystems can be much more aggressive than even + filesystems can be much more aggressive than even this. */ - if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000))) - bytes_per_inode = 16384; - + if ((IS_OST(&mop->mo_ldd) && (device_sz > 100000000))) + bytes_per_inode = 16384; /* > 100 Gb device */ + + if (bytes_per_inode > 0) { sprintf(buf, " -i %ld", bytes_per_inode); strscat(mop->mo_mkfsopts, buf, sizeof(mop->mo_mkfsopts)); } } - - /* Inode size (for extended attributes) */ + + /* Inode size (for extended attributes). The LOV EA size is + * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data), + * and we want some margin above that for ACLs, other EAs... */ if (strstr(mop->mo_mkfsopts, "-I") == NULL) { long inode_size = 0; if (IS_MDT(&mop->mo_ldd)) { @@ -535,11 +550,10 @@ int make_lustre_backfs(struct mkfs_opts *mop) strscat(mop->mo_mkfsopts, buf, sizeof(mop->mo_mkfsopts)); } - } if (verbose < 2) { - strscat(mop->mo_mkfsopts, " -q", + strscat(mop->mo_mkfsopts, " -q", sizeof(mop->mo_mkfsopts)); } @@ -554,35 +568,33 @@ int make_lustre_backfs(struct mkfs_opts *mop) sizeof(mop->mo_mkfsopts)); } - /* Allow reformat of full devices (as opposed to + /* Allow reformat of full devices (as opposed to partitions.) We already checked for mounted dev. */ strscat(mop->mo_mkfsopts, " -F", sizeof(mop->mo_mkfsopts)); - snprintf(mkfs_cmd, sizeof(mkfs_cmd), + snprintf(mkfs_cmd, sizeof(mkfs_cmd), "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE, mop->mo_ldd.ldd_svname); - } else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) { long journal_sz = 0; /* FIXME default journal size */ - if (journal_sz > 0) { + if (journal_sz > 0) { sprintf(buf, " --journal_size %ld", journal_sz); strscat(mop->mo_mkfsopts, buf, sizeof(mop->mo_mkfsopts)); } snprintf(mkfs_cmd, sizeof(mkfs_cmd), "mkreiserfs -ff "); - } else { fprintf(stderr,"%s: unsupported fs type: %d (%s)\n", - progname, mop->mo_ldd.ldd_mount_type, + progname, mop->mo_ldd.ldd_mount_type, MT_STR(&mop->mo_ldd)); return EINVAL; } /* For loop device format the dev, not the filename */ dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) + if (mop->mo_flags & MO_IS_LOOP) dev = mop->mo_loopdev; - + vprint("formatting backing filesystem %s on %s\n", MT_STR(&mop->mo_ldd), dev); vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname); @@ -613,7 +625,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) { printf("\n %s:\n", str); printf("Target: %s\n", ldd->ldd_svname); - if (ldd->ldd_svindex == INDEX_UNASSIGNED) + if (ldd->ldd_svindex == INDEX_UNASSIGNED) printf("Index: unassigned\n"); else printf("Index: %d\n", ldd->ldd_svindex); @@ -623,7 +635,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); printf(" (%s%s%s%s%s%s%s%s)\n", - IS_MDT(ldd) ? "MDT ":"", + IS_MDT(ldd) ? "MDT ":"", IS_OST(ldd) ? "OST ":"", IS_MGS(ldd) ? "MGS ":"", ldd->ldd_flags & LDD_F_NEED_INDEX ? "needs_index ":"", @@ -655,16 +667,16 @@ int write_local_files(struct mkfs_opts *mop) } dev = mop->mo_device; - if (mop->mo_flags & MO_IS_LOOP) + if (mop->mo_flags & MO_IS_LOOP) dev = mop->mo_loopdev; - + ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL); if (ret) { - fprintf(stderr, "%s: Unable to mount %s: %s\n", + fprintf(stderr, "%s: Unable to mount %s: %s\n", progname, dev, strerror(errno)); ret = errno; if (errno == ENODEV) { - fprintf(stderr, "Is the %s module available?\n", + fprintf(stderr, "Is the %s module available?\n", MT_STR(&mop->mo_ldd)); } goto out_rmdir; @@ -674,7 +686,17 @@ int write_local_files(struct mkfs_opts *mop) sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR); ret = mkdir(filepnm, 0777); if ((ret != 0) && (errno != EEXIST)) { - fprintf(stderr, "%s: Can't make configs dir %s: %s\n", + fprintf(stderr, "%s: Can't make configs dir %s (%d)\n", + progname, filepnm, strerror(errno)); + goto out_umnt; + } else if (errno == EEXIST) { + ret = 0; + } + + sprintf(filepnm, "%s/%s", mntpt, "ROOT"); + ret = mkdir(filepnm, 0777); + if ((ret != 0) && (errno != EEXIST)) { + fprintf(stderr, "%s: Can't make ROOT dir %s (%d)\n", progname, filepnm, strerror(errno)); goto out_umnt; } else if (errno == EEXIST) { @@ -693,19 +715,19 @@ int write_local_files(struct mkfs_opts *mop) } fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep); fclose(filep); - + /* COMPAT_146 */ #ifdef TUNEFS /* Check for upgrade */ - if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) + if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) == (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) { char cmd[128]; char *term; int cmdsz = sizeof(cmd); vprint("Copying old logs\n"); - + /* Copy the old client log to fsname-client */ - sprintf(filepnm, "%s/%s/%s-client", + sprintf(filepnm, "%s/%s/%s-client", mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname); snprintf(cmd, cmdsz, "cp %s/%s/client %s", mntpt, MDT_LOGS_DIR, filepnm); @@ -717,24 +739,24 @@ int write_local_files(struct mkfs_opts *mop) "find the client log for fs %s and " "copy it manually into %s/%s-client, " "then umount.\n", - mop->mo_device, + mop->mo_device, mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname); goto out_umnt; } /* We need to use the old mdt log because otherwise mdt won't - have complete lov if old clients connect before all + have complete lov if old clients connect before all servers upgrade. */ /* Copy the old mdt log to fsname-MDT0000 (get old name from mdt_UUID) */ ret = 1; - strscpy(filepnm, mop->mo_ldd.ldd_uuid, sizeof(filepnm)); + strscpy(filepnm, (char *)mop->mo_ldd.ldd_uuid, sizeof(filepnm)); term = strstr(filepnm, "_UUID"); if (term) { *term = '\0'; snprintf(cmd, cmdsz, "cp %s/%s/%s %s/%s/%s", - mntpt, MDT_LOGS_DIR, filepnm, + mntpt, MDT_LOGS_DIR, filepnm, mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_svname); ret = run_command(cmd, cmdsz); @@ -746,7 +768,7 @@ int write_local_files(struct mkfs_opts *mop) "find the MDT log for fs %s and " "copy it manually into %s/%s, " "then umount.\n", - mop->mo_device, + mop->mo_device, mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_svname); goto out_umnt; @@ -757,7 +779,7 @@ int write_local_files(struct mkfs_opts *mop) out_umnt: - umount(mntpt); + umount(mntpt); out_rmdir: rmdir(mntpt); return ret; @@ -816,7 +838,7 @@ int read_local_files(struct mkfs_opts *mop) progname, LAST_RCVD, ret); goto out_rmdir; } - + filep = fopen(filepnm, "r"); if (!filep) { fprintf(stderr, "%s: Unable to open %s: %s\n", @@ -853,7 +875,7 @@ int read_local_files(struct mkfs_opts *mop) /* We must co-locate so mgs can see old logs. If user doesn't want this, they can copy the old logs manually and re-tunefs. */ - mop->mo_ldd.ldd_flags = + mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS; mop->mo_ldd.ldd_svindex = lsd.lsd_mdt_index; } else { @@ -871,9 +893,9 @@ int read_local_files(struct mkfs_opts *mop) /* If there's a LOGS dir, it's an MDT */ if ((ret = access(filepnm, F_OK)) == 0) { mop->mo_ldd.ldd_flags = - LDD_F_SV_TYPE_MDT | + LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS; - /* Old MDT's are always index 0 + /* Old MDT's are always index 0 (pre CMD) */ mop->mo_ldd.ldd_svindex = 0; } else { @@ -884,20 +906,20 @@ int read_local_files(struct mkfs_opts *mop) } } } - + ret = 0; - memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid, + memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid, sizeof(mop->mo_ldd.ldd_uuid)); mop->mo_ldd.ldd_flags |= LDD_F_UPGRADE14; } /* end COMPAT_146 */ -out_close: +out_close: fclose(filep); out_rmdir: snprintf(cmd, cmdsz, "rm -rf %s", tmpdir); run_command(cmd, cmdsz); - if (ret) + if (ret) verrprint("Failed to read old data (%d)\n", ret); return ret; } @@ -910,11 +932,11 @@ void set_defaults(struct mkfs_opts *mop) mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN; mop->mo_mgs_failnodes = 0; strcpy(mop->mo_ldd.ldd_fsname, "lustre"); - if (get_os_version() == 24) + if (get_os_version() == 24) mop->mo_ldd.ldd_mount_type = LDD_MT_EXT3; - else + else mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; - + mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED; mop->mo_stripe_count = 1; } @@ -932,7 +954,7 @@ static int add_param(char *buf, char *key, char *val) int start = strlen(buf); int keylen = 0; - if (key) + if (key) keylen = strlen(key); if (start + 1 + keylen + strlen(val) >= end) { fprintf(stderr, "%s: params are too long-\n%s %s%s\n", @@ -952,26 +974,30 @@ static char *convert_hostnames(char *s1) char *converted, *s2 = 0, *c; int left = MAXNIDSTR; lnet_nid_t nid; - + converted = malloc(left); + if (converted == NULL) { + return NULL; + } + c = converted; while ((left > 0) && ((s2 = strsep(&s1, ",: \0")))) { nid = libcfs_str2nid(s2); if (nid == LNET_NID_ANY) { - if (*s2 == '/') + if (*s2 == '/') /* end of nids */ break; - fprintf(stderr, "%s: Can't parse NID '%s'\n", + fprintf(stderr, "%s: Can't parse NID '%s'\n", progname, s2); free(converted); return NULL; } - if (strncmp(libcfs_nid2str(nid), "127.0.0.1", + if (strncmp(libcfs_nid2str(nid), "127.0.0.1", strlen("127.0.0.1")) == 0) { fprintf(stderr, "%s: The NID '%s' resolves to the " "loopback address '%s'. Lustre requires a " - "non-loopback address.\n", + "non-loopback address.\n", progname, s2, libcfs_nid2str(nid)); free(converted); return NULL; @@ -1021,7 +1047,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, int opt; int rc, longidx; - while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != + while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != EOF) { switch (opt) { case 'b': { @@ -1054,7 +1080,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, printf("Configdev not implemented\n"); return 1; case 'd': - mop->mo_device_sz = atol(optarg); + mop->mo_device_sz = atol(optarg); break; case 'e': mop->mo_ldd.ldd_params[0] = '\0'; @@ -1063,18 +1089,18 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, break; case 'f': { char *nids = convert_hostnames(optarg); - if (!nids) + if (!nids) return 1; - rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, - nids); + rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, + nids); /* Combo needs to add MDT failnodes as MGS failnodes as well */ if (!rc && IS_MGS(&mop->mo_ldd)) { - rc = add_param(mop->mo_ldd.ldd_params, - PARAM_MGSNODE, nids); + rc = add_param(mop->mo_ldd.ldd_params, + PARAM_MGSNODE, nids); } free(nids); - if (rc) + if (rc) return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; @@ -1087,7 +1113,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, usage(stdout); return 1; case 'i': - if (!(mop->mo_ldd.ldd_flags & + if (!(mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_VIRGIN | LDD_F_WRITECONF))) { fprintf(stderr, "%s: cannot change the index of" @@ -1103,13 +1129,13 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, } break; case 'k': - strscpy(mop->mo_mkfsopts, optarg, + strscpy(mop->mo_mkfsopts, optarg, sizeof(mop->mo_mkfsopts)); break; case 'L': { char *tmp; if (!(mop->mo_flags & MO_FORCEFORMAT) && - (!(mop->mo_ldd.ldd_flags & + (!(mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_VIRGIN | LDD_F_WRITECONF)))) { fprintf(stderr, "%s: cannot change the name of" @@ -1126,18 +1152,18 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, "filesystem name\n", progname, *tmp); return 1; } - strscpy(mop->mo_ldd.ldd_fsname, optarg, + strscpy(mop->mo_ldd.ldd_fsname, optarg, sizeof(mop->mo_ldd.ldd_fsname)); break; } case 'm': { char *nids = convert_hostnames(optarg); - if (!nids) + if (!nids) return 1; - rc = add_param(mop->mo_ldd.ldd_params, PARAM_MGSNODE, - nids); + rc = add_param(mop->mo_ldd.ldd_params, PARAM_MGSNODE, + nids); free(nids); - if (rc) + if (rc) return rc; mop->mo_mgs_failnodes++; break; @@ -1159,7 +1185,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, break; case 'p': rc = add_param(mop->mo_ldd.ldd_params, NULL, optarg); - if (rc) + if (rc) return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; @@ -1199,6 +1225,225 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, return 0; } +#include <lustre/libiam.h> + +#define LDISKFS_IOC_GETVERSION _IOR('f', 3, long) + +static int mkfs_iam_insert(int key_need_convert, char *keybuf, + int rec_need_convert, char *recbuf, char *filename) +{ + int fd; + int ret; + struct iam_uapi_info ua; + + fd = iam_open(filename, &ua); + if (fd < 0) { + fprintf(stderr, "failed to iam_open %s\n", filename); + return 1; + } + + ret = iam_insert(fd, &ua, + key_need_convert, keybuf, + rec_need_convert, recbuf); + iam_close(fd); + if (ret) { + fprintf(stderr, "failed to iam_insert %s\n", filename); + return 1; + } else { + return 0; + } +} + +static int touch_file(char *filename) +{ + int fd; + + if (filename == NULL) { + return 1; + } + + fd = open(filename, O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + return 1; + } else { + close(fd); + return 0; + } +} + +static int get_generation(char *filename, unsigned long *result) +{ + int fd; + int ret; + + if (filename == NULL) { + return 1; + } + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: failed to open %s\n", + __FUNCTION__, filename); + return 1; + } + + ret = ioctl(fd, LDISKFS_IOC_GETVERSION, result); + close(fd); + + return ((ret < 0) ? ret : 0); +} + +static int mkfs_mdt(struct mkfs_opts *mop) +{ + char mntpt[] = "/tmp/mntXXXXXX"; + char fstype[] = "ldiskfs"; + char filepnm[128]; + char recbuf[64]; + char *source; + int ret; + unsigned long generation; + struct stat st; + + source = mop->mo_device; + if (mop->mo_flags & MO_IS_LOOP) { + source = mop->mo_loopdev; + } + + if ((source == NULL) || (*source == 0)) { + return 1; + } + + if (!mkdtemp(mntpt)) { + fprintf(stderr, "%s: failed to mkdtemp %s\n", + __FUNCTION__, mntpt); + return errno; + } + + ret = mount(source, mntpt, fstype, 0, NULL); + if (ret) { + goto out_rmdir; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "seq_ctl"); + ret = touch_file(filepnm); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "seq_srv"); + ret = touch_file(filepnm); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "last_received"); + ret = touch_file(filepnm); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "lov_objid"); + ret = touch_file(filepnm); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "root"); + ret = iam_creat(filepnm, FMT_LVAR, L_BLOCK_SIZE, 4, 17, 4); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "fld"); + ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 8, 8, 4); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "orphans"); + ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 20, 8, 4); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "oi.16"); + ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 16, 8, 4); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "oi.5"); + ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 5, 8, 4); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, CAPA_KEYS); + ret = touch_file(filepnm); + if (ret) { + goto out_umount; + } + + umount(mntpt); + ret = mount(source, mntpt, fstype, 0, NULL); + if (ret) { + goto out_rmdir; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "root"); + ret = iam_polymorph(filepnm, 040755); + if (ret) { + perror("IAM_IOC_POLYMORPH"); + goto out_umount; + } + + umount(mntpt); + ret = mount(source, mntpt, fstype, 0, NULL); + if (ret) { + goto out_rmdir; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "fld"); + ret = mkfs_iam_insert(1, "0000000000000002", 1, "0000000000000000", filepnm); + if (ret) { + goto out_umount; + } + + ret = mkfs_iam_insert(1, "0000000000000001", 1, "0000000000000000", filepnm); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "root"); + ret = stat(filepnm, &st); + if (ret) { + goto out_umount; + } + + ret = get_generation(filepnm, &generation); + if (ret) { + goto out_umount; + } + + snprintf(recbuf, sizeof(recbuf) - 1, "110000000000000001%8.8x%8.8x", + (unsigned int)st.st_ino, (unsigned int)generation); + ret = mkfs_iam_insert(0, ".", 1, recbuf, filepnm); + if (ret) { + goto out_umount; + } + + ret = mkfs_iam_insert(0, "..", 1, recbuf, filepnm); + if (ret) { + goto out_umount; + } + +out_umount: + umount(mntpt); +out_rmdir: + rmdir(mntpt); + return ret; +} + int main(int argc, char *const argv[]) { struct mkfs_opts mop; @@ -1206,7 +1451,7 @@ int main(int argc, char *const argv[]) char *mountopts = NULL; char always_mountopts[512] = ""; char default_mountopts[512] = ""; - int ret = 0; + int ret = 0; if ((progname = strrchr(argv[0], '/')) != NULL) progname++; @@ -1226,15 +1471,15 @@ int main(int argc, char *const argv[]) /* Are we using a loop device? */ ret = is_block(mop.mo_device); - if (ret < 0) + if (ret < 0) goto out; - if (ret == 0) + if (ret == 0) mop.mo_flags |= MO_IS_LOOP; #ifdef TUNEFS /* For tunefs, we must read in the old values before parsing any new ones. */ - + /* Check whether the disk has already been formatted by mkfs.lustre */ ret = is_lustre_target(&mop); if (ret == 0) { @@ -1255,16 +1500,16 @@ int main(int argc, char *const argv[]) if (strstr(mop.mo_ldd.ldd_params, PARAM_MGSNODE)) mop.mo_mgs_failnodes++; - if (verbose > 0) + if (verbose > 0) print_ldd("Read previous values", &(mop.mo_ldd)); #endif ret = parse_opts(argc, argv, &mop, &mountopts); - if (ret) + if (ret) goto out; ldd = &mop.mo_ldd; - + if (!(IS_MDT(ldd) || IS_OST(ldd) || IS_MGS(ldd))) { fatal(); fprintf(stderr, "must set target type: MDT,OST,MGS\n"); @@ -1287,18 +1532,28 @@ int main(int argc, char *const argv[]) ret = EINVAL; goto out; } - +#if 0 + /* + * Comment out these 2 checks temporarily, since for multi-MDSes + * in single node only 1 mds node could have mgs service + */ + if (IS_MDT(ldd) && !IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) { + verrprint("No management node specified, adding MGS to this " + "MDT\n"); + ldd->ldd_flags |= LDD_F_SV_TYPE_MGS; + } if (!IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) { fatal(); - if (IS_MDT(ldd)) + if (IS_MDT(ldd)) fprintf(stderr, "Must specify --mgs or --mgsnode=\n"); - else + else fprintf(stderr, "Must specify --mgsnode=\n"); ret = EINVAL; goto out; } +#endif - /* These are the permanent mount options (always included) */ + /* These are the permanent mount options (always included) */ switch (ldd->ldd_mount_type) { case LDD_MT_EXT3: case LDD_MT_LDISKFS: @@ -1311,9 +1566,9 @@ int main(int argc, char *const argv[]) strscat(always_mountopts, ",asyncdel", sizeof(always_mountopts)); /* NB: Files created while extents are enabled cannot be read - if mounted with a kernel that doesn't include the CFS + if mounted with a kernel that doesn't include the CFS patches! */ - if (IS_OST(ldd) && + if (IS_OST(ldd) && (ldd->ldd_mount_type == LDD_MT_LDISKFS || ldd->ldd_mount_type == LDD_MT_LDISKFS2)) { strscat(default_mountopts, ",extents,mballoc", @@ -1335,20 +1590,20 @@ int main(int argc, char *const argv[]) ret = EINVAL; goto out; } - } + } if (mountopts) { /* If user specifies mount opts, don't use defaults, but always use always_mountopts */ - sprintf(ldd->ldd_mount_opts, "%s,%s", + sprintf(ldd->ldd_mount_opts, "%s,%s", always_mountopts, mountopts); } else { #ifdef TUNEFS - if (ldd->ldd_mount_opts[0] == 0) + if (ldd->ldd_mount_opts[0] == 0) /* use the defaults unless old opts exist */ #endif { - sprintf(ldd->ldd_mount_opts, "%s%s", + sprintf(ldd->ldd_mount_opts, "%s%s", always_mountopts, default_mountopts); } } @@ -1370,14 +1625,14 @@ int main(int argc, char *const argv[]) /* Create the loopback file */ if (mop.mo_flags & MO_IS_LOOP) { ret = access(mop.mo_device, F_OK); - if (ret) + if (ret) ret = errno; #ifndef TUNEFS /* mkfs.lustre */ /* Reformat the loopback file */ if (ret || (mop.mo_flags & MO_FORCEFORMAT)) ret = loop_format(&mop); #endif - if (ret == 0) + if (ret == 0) ret = loop_setup(&mop); if (ret) { fatal(); @@ -1393,7 +1648,7 @@ int main(int argc, char *const argv[]) ret = is_lustre_target(&mop); if (ret) { fatal(); - fprintf(stderr, "Device %s was previously formatted " + fprintf(stderr, "Device %s was previously formatted " "for lustre. Use --reformat to reformat it, " "or tunefs.lustre to modify.\n", mop.mo_device); @@ -1418,10 +1673,20 @@ int main(int argc, char *const argv[]) goto out; } +#ifndef TUNEFS /* mkfs.lustre */ + if (IS_MDT(ldd)) { + ret = mkfs_mdt(&mop); + if (ret != 0) { + fprintf(stderr, "failed to mkfs_mdt\n"); + goto out; + } + } +#endif + out: - loop_cleanup(&mop); + loop_cleanup(&mop); - /* Fix any crazy return values from system() */ + /* Fix any crazy return values from system() */ if (ret && ((ret & 255) == 0)) return (1); if (ret) diff --git a/lustre/utils/module_cleanup.sh b/lustre/utils/module_cleanup.sh index 92f0cce..a30faeb 100755 --- a/lustre/utils/module_cleanup.sh +++ b/lustre/utils/module_cleanup.sh @@ -19,4 +19,5 @@ echo "Removing Lustre modules from "$MDIR rm -f $MDIR/* depmod -a rm -f /sbin/mount.lustre - +rm -f /usr/sbin/l_getidentity +rm -f /usr/sbin/l_facl diff --git a/lustre/utils/module_setup.sh b/lustre/utils/module_setup.sh index 2aa2f59..8597bcd 100755 --- a/lustre/utils/module_setup.sh +++ b/lustre/utils/module_setup.sh @@ -1,6 +1,7 @@ #!/bin/sh MDIR=/lib/modules/`uname -r`/lustre +/bin/rm -rf $MDIR mkdir -p $MDIR KVER=26 @@ -20,26 +21,39 @@ fi echo "Copying modules from local build dir to "$MDIR -cp -u ../../lnet/libcfs/libcfs.$EXT $MDIR cp -u ../../lnet/lnet/lnet.$EXT $MDIR +cp -u ../../lnet/libcfs/libcfs.$EXT $MDIR cp -u ../../lnet/klnds/socklnd/ksocklnd.$EXT $MDIR -cp -u ../lvfs/lvfs.$EXT $MDIR -cp -u ../obdclass/obdclass.$EXT $MDIR +cp -u ../../lnet/tests/pingcli.$EXT $MDIR +cp -u ../../lnet/tests/pingsrv.$EXT $MDIR +cp -u ../mgs/mgs.$EXT $MDIR +cp -u ../quota/lquota.$EXT $MDIR +cp -u ../quota/quotacheck_test.$EXT $MDIR +cp -u ../quota/quotactl_test.$EXT $MDIR cp -u ../ptlrpc/ptlrpc.$EXT $MDIR -cp -u ../mdc/mdc.$EXT $MDIR -cp -u ../osc/osc.$EXT $MDIR +cp -u ../ptlrpc/gss/ptlrpc_gss.$EXT $MDIR +cp -u ../fld/fld.$EXT $MDIR cp -u ../lov/lov.$EXT $MDIR +cp -u ../mdc/mdc.$EXT $MDIR +cp -u ../llite/lustre.$EXT $MDIR +cp -u ../obdclass/llog_test.$EXT $MDIR +cp -u ../obdclass/obdclass.$EXT $MDIR +cp -u ../mdt/mdt.$EXT $MDIR +cp -u ../fid/fid.$EXT $MDIR cp -u ../mds/mds.$EXT $MDIR -cp -u ../lvfs/$FSFLT.$EXT $MDIR -[ $KVER == "26" ] && cp -u ../ldiskfs/ldiskfs.$EXT $MDIR -[ $KVER == "26" ] && cp -u ../ldiskfs2/ldiskfs2.$EXT $MDIR -cp -u ../ost/ost.$EXT $MDIR +cp -u ../osd/osd.$EXT $MDIR +cp -u ../obdecho/obdecho.$EXT $MDIR cp -u ../obdfilter/obdfilter.$EXT $MDIR -cp -u ../llite/lustre.$EXT $MDIR +cp -u ../cmm/cmm.$EXT $MDIR +[ $KVER == "26" ] && cp -u ../../ldiskfs/ldiskfs/ldiskfs.$EXT $MDIR +cp -u ../lvfs/quotafmt_test.$EXT $MDIR +cp -u ../mdd/mdd.$EXT $MDIR +cp -u ../osc/osc.$EXT $MDIR +cp -u ../ost/ost.$EXT $MDIR cp -u ../mgc/mgc.$EXT $MDIR -cp -u ../mgs/mgs.$EXT $MDIR -cp -u ../quota/lquota.$EXT $MDIR -cp -u ../obdecho/obdecho.$EXT $MDIR +cp -u ../lvfs/$FSFLT.$EXT $MDIR +cp -u ../lvfs/lvfs.$EXT $MDIR +cp -u ../lmv/lmv.$EXT $MDIR # prevent warnings on my uml rm -f /lib/modules/`uname -r`/modules.* @@ -48,6 +62,8 @@ depmod -A -e echo "Copying mount from local build dir to "$MDIR cp -u ../utils/mount.lustre /sbin/. +cp -u ../utils/l_getidentity /usr/sbin/. +cp -u ../utils/l_facl /usr/sbin/. MP="/sbin/modprobe" MPI="$MP --ignore-install" diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index 17c5471..3db5dc7 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -141,6 +141,7 @@ static char *convert_hostnames(char *s1) sep = *s2; *s2 = '\0'; nid = libcfs_str2nid(s1); + *s2 = sep; /* back to original string */ if (nid == LNET_NID_ANY) goto out_free; c += snprintf(c, left, "%s%c", libcfs_nid2str(nid), sep); @@ -243,7 +244,7 @@ int parse_options(char *orig_options, int *flagp) int main(int argc, char *const argv[]) { char default_options[] = ""; - char *source, *target, *ptr; + char *usource, *source, *target, *ptr; char *options, *optcopy, *orig_options = default_options; int i, nargs = 3, opt, rc, flags, optlen; static struct option long_opt[] = { @@ -301,7 +302,8 @@ int main(int argc, char *const argv[]) usage(stderr); } - source = convert_hostnames(argv[optind]); + usource = argv[optind]; + source = convert_hostnames(usource); target = argv[optind + 1]; ptr = target + strlen(target) - 1; while ((ptr > target) && (*ptr == '/')) { @@ -309,14 +311,14 @@ int main(int argc, char *const argv[]) ptr--; } - if (!source) { + if (!usource || !source) { usage(stderr); } if (verbose) { for (i = 0; i < argc; i++) printf("arg[%d] = %s\n", i, argv[i]); - printf("source = %s, target = %s\n", source, target); + printf("source = %s (%s), target = %s\n", usource, source, target); printf("options = %s\n", orig_options); } @@ -330,17 +332,17 @@ int main(int argc, char *const argv[]) } if (!force) { - rc = check_mtab_entry(source, target, "lustre"); + rc = check_mtab_entry(usource, target, "lustre"); if (rc && !(flags & MS_REMOUNT)) { fprintf(stderr, "%s: according to %s %s is " "already mounted on %s\n", - progname, MOUNTED, source, target); + progname, MOUNTED, usource, target); return(EEXIST); } if (!rc && (flags & MS_REMOUNT)) { fprintf(stderr, "%s: according to %s %s is " "not already mounted on %s\n", - progname, MOUNTED, source, target); + progname, MOUNTED, usource, target); return(ENOENT); } } @@ -380,14 +382,14 @@ int main(int argc, char *const argv[]) rc = errno; - cli = strrchr(source, ':'); + cli = strrchr(usource, ':'); if (cli && (strlen(cli) > 2)) cli += 2; else cli = NULL; fprintf(stderr, "%s: mount %s at %s failed: %s\n", progname, - source, target, strerror(errno)); + usource, target, strerror(errno)); if (errno == ENODEV) fprintf(stderr, "Are the lustre modules loaded?\n" "Check /etc/modprobe.conf and /proc/filesystems" @@ -406,16 +408,16 @@ int main(int argc, char *const argv[]) } if (errno == EALREADY) fprintf(stderr, "The target service is already running." - " (%s)\n", source); + " (%s)\n", usource); if (errno == ENXIO) fprintf(stderr, "The target service failed to start " "(bad config log?) (%s). " - "See /var/log/messages.\n", source); + "See /var/log/messages.\n", usource); if (errno == EIO) fprintf(stderr, "Is the MGS running?\n"); if (errno == EADDRINUSE) fprintf(stderr, "The target service's index is already " - "in use. (%s)\n", source); + "in use. (%s)\n", usource); if (errno == EINVAL) { fprintf(stderr, "This may have multiple causes.\n"); if (cli) @@ -426,14 +428,14 @@ int main(int argc, char *const argv[]) } /* May as well try to clean up loop devs */ - if (strncmp(source, "/dev/loop", 9) == 0) { + if (strncmp(usource, "/dev/loop", 9) == 0) { char cmd[256]; - sprintf(cmd, "/sbin/losetup -d %s", source); + sprintf(cmd, "/sbin/losetup -d %s", usource); system(cmd); } } else if (!nomtab) { - rc = update_mtab_entry(source, target, "lustre", orig_options, + rc = update_mtab_entry(usource, target, "lustre", orig_options, 0,0,0); } diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index fdd18cf..8aa8cd7 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -35,11 +35,12 @@ #include <stdio.h> #include <stdarg.h> #include <signal.h> +#include <ctype.h> #include "obdctl.h" #include <obd.h> /* for struct lov_stripe_md */ -#include <linux/lustre_build_version.h> +#include <lustre/lustre_build_version.h> #include <unistd.h> #include <sys/un.h> @@ -47,7 +48,6 @@ #include <sys/time.h> #include <errno.h> #include <string.h> -#include <ctype.h> #ifdef HAVE_ASM_PAGE_H #include <asm/page.h> /* needed for PAGE_SIZE - rread */ @@ -96,7 +96,6 @@ static int max = sizeof(rawbuf); static int cur_device = -1; - #define MAX_STRIPES 170 struct lov_oinfo lov_oinfos[MAX_STRIPES]; diff --git a/lustre/utils/req-layout.c b/lustre/utils/req-layout.c new file mode 100644 index 0000000..389d158 --- /dev/null +++ b/lustre/utils/req-layout.c @@ -0,0 +1,149 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * req_layout.c + * User-level tool for printing request layouts + * + * Copyright (c) 2006 Cluster File Systems, Inc. + * Author: Nikita Danilov <nikita@clusterfs.com> + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <liblustre.h> +#include <lustre/lustre_idl.h> + +#define __REQ_LAYOUT_USER__ (1) + +#define lustre_swab_generic_32s NULL +#define lustre_swab_lu_range NULL +#define lustre_swab_md_fld NULL +#define lustre_swab_mdt_body NULL +#define lustre_swab_mdt_epoch NULL +#define lustre_swab_ptlrpc_body NULL +#define lustre_swab_obd_statfs NULL +#define lustre_swab_connect NULL +#define lustre_swab_ldlm_request NULL +#define lustre_swab_ldlm_reply NULL +#define lustre_swab_ldlm_intent NULL +#define lustre_swab_lov_mds_md NULL +#define lustre_swab_mdt_rec_unlink NULL +#define lustre_swab_mdt_rec_link NULL +#define lustre_swab_mdt_rec_rename NULL +#define lustre_swab_mdt_rec_create NULL +#define lustre_swab_mdt_rec_setattr NULL +#define lustre_swab_lustre_capa NULL +#define lustre_swab_lustre_capa_key NULL + +/* + * Yes, include .c file. + */ +#include "../ptlrpc/layout.c" + +void usage(void) +{ + fprintf(stderr, "req-layout -- prints lustre request layouts\n"); +} + +void printt_field(const char *prefix, const struct req_msg_field *fld) +{ +} + +void print_layout(const struct req_format *rf) +{ + int j; + int k; + + int offset; + int variable; + + static const char *prefix[RCL_NR] = { + [RCL_CLIENT] = "C", + [RCL_SERVER] = "S" + }; + + printf("L %s (%i/%i)\n", rf->rf_name, + rf->rf_fields[RCL_CLIENT].nr, rf->rf_fields[RCL_SERVER].nr); + + for (j = 0; j < RCL_NR; ++j) { + offset = 0; + variable = 0; + for (k = 0; k < rf->rf_fields[j].nr; ++k) { + const struct req_msg_field *fld; + + fld = rf->rf_fields[j].d[k]; + + printf(" F%s %i [%03.3i%s %-20.20s (", + prefix[j], k, offset, + variable ? " + ...]" : "] ", + fld->rmf_name); + if (fld->rmf_size > 0) { + printf("%3.3i) ", fld->rmf_size); + offset += fld->rmf_size; + } else { + printf("var) "); + variable = 1; + } + if (fld->rmf_flags & RMF_F_STRING) + printf("string"); + printf("\n"); + } + if (k > 0 && j != RCL_NR - 1) + printf(" -----------------------------------\n"); + } +} + +void print_layouts(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(req_formats); ++i) { + print_layout(req_formats[i]); + printf("\n"); + } +} + +int main(int argc, char **argv) +{ + int opt; + int verbose; + + verbose = 0; + do { + opt = getopt(argc, argv, "hb:k:r:p:v"); + switch (opt) { + case 'v': + verbose++; + case -1: + break; + case '?': + default: + fprintf(stderr, "Unable to parse options."); + case 'h': + usage(); + return 0; + } + } while (opt != -1); + print_layouts(); + return 0; +} diff --git a/lustre/utils/rmmod_all.sh b/lustre/utils/rmmod_all.sh new file mode 100755 index 0000000..5b23988 --- /dev/null +++ b/lustre/utils/rmmod_all.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +SRCDIR=`dirname $0` +PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH + +rmmod pingsrv +rmmod pingcli +rmmod quotacheck_test +rmmod quotactl_test +rmmod quotafmt_test + +lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 +# do it again, in case we tried to unload ksocklnd too early +lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index b45ee6f..5fe8169 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -152,10 +152,11 @@ static void check_obd_connect_data(void) CHECK_MEMBER(obd_connect_data, ocd_ibits_known); CHECK_MEMBER(obd_connect_data, ocd_nllu); CHECK_MEMBER(obd_connect_data, ocd_nllg); + CHECK_MEMBER(obd_connect_data, ocd_transno); + CHECK_MEMBER(obd_connect_data, ocd_group); CHECK_MEMBER(obd_connect_data, padding1); CHECK_MEMBER(obd_connect_data, padding2); CHECK_MEMBER(obd_connect_data, padding3); - CHECK_MEMBER(obd_connect_data, padding4); CHECK_CDEFINE(OBD_CONNECT_RDONLY); CHECK_CDEFINE(OBD_CONNECT_INDEX); @@ -165,9 +166,8 @@ static void check_obd_connect_data(void) CHECK_CDEFINE(OBD_CONNECT_REQPORTAL); CHECK_CDEFINE(OBD_CONNECT_ACL); CHECK_CDEFINE(OBD_CONNECT_XATTR); - CHECK_CDEFINE(OBD_CONNECT_CROW); + CHECK_CDEFINE(OBD_CONNECT_REAL); CHECK_CDEFINE(OBD_CONNECT_TRUNCLOCK); - CHECK_CDEFINE(OBD_CONNECT_TRANSNO); CHECK_CDEFINE(OBD_CONNECT_IBITS); CHECK_CDEFINE(OBD_CONNECT_JOIN); CHECK_CDEFINE(OBD_CONNECT_ATTRFID); @@ -176,6 +176,10 @@ static void check_obd_connect_data(void) CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT); CHECK_CDEFINE(OBD_CONNECT_BRW_SIZE); CHECK_CDEFINE(OBD_CONNECT_QUOTA64); + CHECK_CDEFINE(OBD_CONNECT_MDS_CAPA); + CHECK_CDEFINE(OBD_CONNECT_OSS_CAPA); + CHECK_CDEFINE(OBD_CONNECT_MDS_MDS); + CHECK_CDEFINE(OBD_CONNECT_SOM); } static void @@ -255,7 +259,6 @@ check_obdo(void) CHECK_CDEFINE(OBD_FL_DEBUG_CHECK); CHECK_CDEFINE(OBD_FL_NO_USRQUOTA); CHECK_CDEFINE(OBD_FL_NO_GRPQUOTA); - CHECK_CDEFINE(OBD_FL_CREATE_CROW); } static void @@ -817,7 +820,7 @@ check_llog_size_change_rec(void) CHECK_STRUCT(llog_size_change_rec); CHECK_MEMBER(llog_size_change_rec, lsc_hdr); CHECK_MEMBER(llog_size_change_rec, lsc_fid); - CHECK_MEMBER(llog_size_change_rec, lsc_io_epoch); + CHECK_MEMBER(llog_size_change_rec, lsc_ioepoch); CHECK_MEMBER(llog_size_change_rec, padding); CHECK_MEMBER(llog_size_change_rec, lsc_tail); } diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 5d6d250..45b5c95 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -28,8 +28,8 @@ void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' * (make -C lustre/utils newwirecheck) - * running on Linux pancake 2.6.18-skas3-v9-pre9 #2 Tue Oct 17 13:08:24 PDT 2006 i686 i686 i3 - * with gcc version 3.4.4 */ + * running on Linux duplo 2.6.17.3 #4 Wed Oct 18 02:46:08 MSD 2006 i686 unknown unknown GNU/L + * with gcc version 3.4.6 */ /* Constants... */ @@ -127,7 +127,7 @@ void lustre_assert_wire_constants(void) (long long)MDS_QUOTACHECK); LASSERTF(MDS_QUOTACTL == 48, " found %lld\n", (long long)MDS_QUOTACTL); - LASSERTF(MDS_LAST_OPC == 51, " found %lld\n", + LASSERTF(MDS_LAST_OPC == 53, " found %lld\n", (long long)MDS_LAST_OPC); LASSERTF(REINT_SETATTR == 1, " found %lld\n", (long long)REINT_SETATTR); @@ -443,41 +443,48 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_connect_data, ocd_nllg)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllg) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllg)); - LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 40, " found %lld\n", + LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, " found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_transno)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, " found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, " found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_group)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group)); + LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 52, " found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding1)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, " found %lld\n", + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1)); - LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 48, " found %lld\n", + LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 56, " found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding2)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2)); - LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 56, " found %lld\n", + LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 64, " found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding3)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, " found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3)); - LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 64, " found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding4)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, " found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4)); - CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL); - CLASSERT(OBD_CONNECT_INDEX == 0x2ULL); - CLASSERT(OBD_CONNECT_GRANT == 0x8ULL); - CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL); - CLASSERT(OBD_CONNECT_VERSION == 0x20ULL); - CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL); - CLASSERT(OBD_CONNECT_ACL == 0x80ULL); - CLASSERT(OBD_CONNECT_XATTR == 0x100ULL); - CLASSERT(OBD_CONNECT_CROW == 0x200ULL); - CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL); - CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL); - CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL); - CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL); - CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL); - CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL); - CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x10000ULL); - CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x20000ULL); - CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL); - CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL); + CLASSERT(OBD_CONNECT_RDONLY == 0x00000001ULL); + CLASSERT(OBD_CONNECT_INDEX == 0x00000002ULL); + CLASSERT(OBD_CONNECT_GRANT == 0x00000008ULL); + CLASSERT(OBD_CONNECT_SRVLOCK == 0x00000010ULL); + CLASSERT(OBD_CONNECT_VERSION == 0x00000020ULL); + CLASSERT(OBD_CONNECT_REQPORTAL == 0x00000040ULL); + CLASSERT(OBD_CONNECT_ACL == 0x00000080ULL); + CLASSERT(OBD_CONNECT_XATTR == 0x00000100ULL); + CLASSERT(OBD_CONNECT_REAL == 0x00000200ULL); + CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x00000400ULL); + CLASSERT(OBD_CONNECT_IBITS == 0x00001000ULL); + CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL); + CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL); + CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL); + CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x00010000ULL); + CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00020000ULL); + CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL); + CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL); + CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL); + CLASSERT(OBD_CONNECT_OSS_CAPA == 0x00200000ULL); + CLASSERT(OBD_CONNECT_MDS_MDS == 0x00400000ULL); + CLASSERT(OBD_CONNECT_SOM == 0x00800000ULL); /* Checks for struct obdo */ LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n", @@ -621,7 +628,6 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_FL_DEBUG_CHECK == (0x00000040)); CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100)); CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200)); - CLASSERT(OBD_FL_CREATE_CROW == (0x00000400)); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n", @@ -1782,10 +1788,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, " found %lld\n", (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_io_epoch) == 32, " found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_io_epoch)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch) == 4, " found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, " found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch)); LASSERTF((int)offsetof(struct llog_size_change_rec, padding) == 36, " found %lld\n", (long long)(int)offsetof(struct llog_size_change_rec, padding)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->padding) == 4, " found %lld\n", -- 1.8.3.1